From 172d0a4c759804b07f5592c270ae3c36253274d5 Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Sun, 6 Mar 2022 12:09:24 +0800 Subject: [PATCH 01/23] Add support for LoongArch --- 0ad/0ad-fix-build.patch | 18 + 0ad/0ad-la64.patch | 97 + 0ad/PKGBUILD | 10 + a2ps/PKGBUILD | 2 + aardvark-dns/PKGBUILD | 2 +- abseil-cpp/PKGBUILD | 7 +- abseil-cpp/abseil-cpp-la64.patch | 13 + acme-redirect/PKGBUILD | 11 +- acpi_call-lts/PKGBUILD | 2 +- acpi_call/PKGBUILD | 2 +- acpica/PKGBUILD | 10 +- acpica/acpica-fix-build.patch | 13 + acpica/acpica-unix-la64.patch | 39 + aida-x/PKGBUILD | 2 +- aiksaurus/PKGBUILD | 9 +- aiksaurus/aiksaurus-fix-build.patch | 400 + alacritty/PKGBUILD | 2 +- alsa-tools/PKGBUILD | 2 +- android-tools/PKGBUILD | 11 +- android-tools/android-tools-la64.patch | 13 + anewer/PKGBUILD | 2 +- angle-grinder/PKGBUILD | 2 +- apache-orc/PKGBUILD | 6 +- apptainer/PKGBUILD | 3 + apr-util/PKGBUILD | 2 +- apr/PKGBUILD | 14 +- apr/apr-1.7-fix-build.patch | 13 + arch-rebuild-order/PKGBUILD | 6 +- arch-repro-status/PKGBUILD | 2 +- archinstall/PKGBUILD | 8 +- archinstall/archinstall-la64-2.6.3.patch | 139 + arrow/PKGBUILD | 5 + arti/PKGBUILD | 2 +- aspell-ru/PKGBUILD | 2 +- at51/PKGBUILD | 2 +- auth-tarball-from-git/PKGBUILD | 2 +- autogen/PKGBUILD | 1 + autotiling-rs/PKGBUILD | 2 +- avisynthplus/PKGBUILD | 10 +- avisynthplus/avisynthplus-la64.patch | 19 + b3sum/PKGBUILD | 2 +- babl/PKGBUILD | 7 +- babl/babl-fix-gir-pkgname.patch | 13 + bacon/PKGBUILD | 2 +- baidupcs-go/PKGBUILD | 2 + bandwhich/PKGBUILD | 2 +- bcprov/PKGBUILD | 2 +- bees/PKGBUILD | 11 +- bigloo/PKGBUILD | 8 +- bigloo/bigloo-la64.patch | 11 + bingrep/PKGBUILD | 2 +- binocle/PKGBUILD | 2 +- blender/PKGBUILD | 13 +- bonnie++/PKGBUILD | 2 +- boost/PKGBUILD | 7 +- boost/boost-1.79.0-la64.patch | 55 + booster/PKGBUILD | 6 +- bore/PKGBUILD | 2 +- borg/PKGBUILD | 2 +- bottom/PKGBUILD | 2 +- box2d/PKGBUILD | 11 +- box2d/box2d-fix-build.patch | 13 + boxxy/PKGBUILD | 2 +- breezy/PKGBUILD | 11 +- breezy/breezy-fix-install.patch | 15 + brltty/PKGBUILD | 1 - broadcom-wl/PKGBUILD | 2 +- broot/PKGBUILD | 2 +- buildkit/0001-add-loongarch64-support.patch | 130 + buildkit/PKGBUILD | 11 +- bupstash/PKGBUILD | 2 +- caddy/PKGBUILD | 3 + calf/PKGBUILD | 2 +- capnet-assist/PKGBUILD | 8 + cargo-audit/PKGBUILD | 2 +- cargo-auditable/PKGBUILD | 2 +- cargo-bloat/PKGBUILD | 2 +- cargo-c/PKGBUILD | 2 +- cargo-cyclonedx/PKGBUILD | 2 +- cargo-depgraph/PKGBUILD | 2 +- cargo-edit/PKGBUILD | 2 +- cargo-expand/PKGBUILD | 2 +- cargo-geiger/PKGBUILD | 2 +- cargo-generate/PKGBUILD | 2 +- cargo-insta/PKGBUILD | 2 +- cargo-machete/PKGBUILD | 2 +- cargo-msrv/PKGBUILD | 2 +- cargo-ndk/PKGBUILD | 2 +- cargo-outdated/PKGBUILD | 2 +- cargo-pgrx/PKGBUILD | 2 +- cargo-sort/PKGBUILD | 2 +- cargo-spellcheck/PKGBUILD | 2 +- cargo-supply-chain/PKGBUILD | 2 +- cargo-tarpaulin/PKGBUILD | 2 +- cargo-tauri/PKGBUILD | 2 +- cargo-udeps/PKGBUILD | 4 +- cargo-update/PKGBUILD | 2 +- cargo-watch/PKGBUILD | 2 +- cargo2junit/PKGBUILD | 2 +- cbindgen/PKGBUILD | 2 +- cdparanoia/PKGBUILD | 2 + cdrtools/PKGBUILD | 11 +- cdrtools/cdrtools-la64.patch | 16 + chezmoi/PKGBUILD | 5 + chmlib/PKGBUILD | 11 +- chmlib/chmlib-fix-gcc13.patch | 25 + choose/PKGBUILD | 2 +- clamav/PKGBUILD | 2 +- clang14/0001-add-loong64-support.patch | 16205 ++ clang14/0002-add-loong64-support.patch | 9243 ++ clang14/PKGBUILD | 12 +- clash/PKGBUILD | 4 + cln/PKGBUILD | 11 +- cln/cln-la64.patch | 53 + clucene/PKGBUILD | 3 + cni-plugins/PKGBUILD | 3 + cobalt/PKGBUILD | 2 +- cocogitto/PKGBUILD | 2 +- code/PKGBUILD | 3 + coin-or-cbc/PKGBUILD | 6 +- committed/PKGBUILD | 2 +- compiler-rt/PKGBUILD | 3 + ...C8F98282B944E3B0D5C2530FC3042E345AD05D.asc | 75 + conky/PKGBUILD | 2 +- containerd/PKGBUILD | 4 + cpputest/PKGBUILD | 2 +- cri-o/PKGBUILD | 1 + criu/2183.patch | 2237 + criu/PKGBUILD | 5 +- cups-pdf/PKGBUILD | 2 +- cups-pk-helper/PKGBUILD | 2 +- cxxbridge/PKGBUILD | 2 +- cypari2/PKGBUILD | 2 +- daktilo/PKGBUILD | 2 +- dbeaver/PKGBUILD | 2 +- dbus-c++/PKGBUILD | 3 + dconf-editor/dconf-editor-fix-meson.patch | 20 + deepin-anything/PKGBUILD | 2 +- deepin-desktop-base/PKGBUILD | 12 +- .../deepin-deskto-base-la64.patch | 27 + deepin-grand-search/62.patch | 126 + deepin-grand-search/PKGBUILD | 4 +- devtools/PKGBUILD | 16 +- devtools/devtools-loong64-1.0.4.patch | 898 + dfrs/PKGBUILD | 19 +- dfrs/dfrs-la64.patch | 13 + dhcp/PKGBUILD | 10 + diesel-cli/PKGBUILD | 2 +- discover/PKGBUILD | 2 +- diskonaut/PKGBUILD | 2 +- diskus/PKGBUILD | 2 +- distcc/PKGBUILD | 5 +- dns-over-https/PKGBUILD | 6 +- docker-machine/PKGBUILD | 11 +- docker-machine/docker-machine-la64.patch | 16 + docker/PKGBUILD | 11 +- docker/moby-la64.patch | 12 + dog/PKGBUILD | 2 +- dra/PKGBUILD | 2 +- dragonfly-reverb/PKGBUILD | 2 +- dtc/PKGBUILD | 2 +- dua-cli/PKGBUILD | 8 +- dump_syms/PKGBUILD | 10 +- duplicity/PKGBUILD | 10 +- dysk/PKGBUILD | 2 +- easyloggingpp/PKGBUILD | 2 +- edk2/60-edk2-loongarch64.json | 31 + edk2/PKGBUILD | 60 +- edk2/edk2-use-env-toolchains.patch | 62 + edk2/relax_edk2_gcc14.diff | 44 + efitools/PKGBUILD | 9 +- efitools/efitools-la64.patch | 13 + electron/PKGBUILD | 6 +- electron25/PKGBUILD | 10 +- emacs/PKGBUILD | 2 +- emacs/emacs-la64.patch | 10 + erdtree/PKGBUILD | 2 +- espeakup/PKGBUILD | 2 +- espflash/PKGBUILD | 2 +- espup/PKGBUILD | 2 +- eva/PKGBUILD | 2 +- evcxr_repl/PKGBUILD | 2 +- farstream/PKGBUILD | 2 +- fbterm/0001-Fix-build-with-gcc-6.patch | 104 + fbterm/PKGBUILD | 60 + fbterm/color_palette.patch | 102 + fbterm/fbconfig.patch | 78 + fbterm/fbterm.patch | 9 + fbterm/fbtermrc | 66 + fbterm/fix_ftbfs_crosscompile.patch | 28 + fbterm/fix_ftbfs_epoll.patch | 15 + fcitx/PKGBUILD | 8 +- fcitx5-chinese-addons/PKGBUILD | 4 +- fcitx5-m17n/PKGBUILD | 2 +- ffcall/PKGBUILD | 12 +- ffcall/libffcall-la64-2.4.patch | 3304 + ffmpeg/PKGBUILD | 8 +- ffmpeg4.4/PKGBUILD | 10 - ffmpegthumbnailer/PKGBUILD | 2 +- fftw/PKGBUILD | 26 +- firecracker/PKGBUILD | 2 +- .../0001-Add-support-for-LoongArch64.patch | 80 + ...2-Enable-VA-API-support-for-AMD-GPUs.patch | 31 + ...rchitectural-limit-on-VA-API-support.patch | 40 + .../0004-Enable-WebRTC-for-LoongArch.patch | 152 + .../0005-Fix-libyuv-build-with-LSX-LASX.patch | 398 + firefox-developer-edition/PKGBUILD | 59 +- .../0001-Add-support-for-LoongArch64.patch | 80 + ...2-Enable-VA-API-support-for-AMD-GPUs.patch | 31 + ...rchitectural-limit-on-VA-API-support.patch | 40 + .../0004-Enable-WebRTC-for-LoongArch.patch | 152 + .../0005-Fix-libyuv-build-with-LSX-LASX.patch | 398 + firefox/PKGBUILD | 68 +- flac/PKGBUILD | 2 +- flashrom/0001-Loongson-3-SPI-tmp.patch | 353 + flashrom/0002-Speed-up.patch | 37 + .../0003-Add-support-for-loongarch64.patch | 220 + flashrom/PKGBUILD | 28 +- flatpak/PKGBUILD | 2 +- fluidd/PKGBUILD | 1 + fmt/PKGBUILD | 2 +- foomatic-db-engine/PKGBUILD | 2 +- foomatic-db/PKGBUILD | 2 +- foot/PKGBUILD | 2 +- fossil/PKGBUILD | 4 + fuse2/PKGBUILD | 14 +- fuse2/fuse-closefrom.patch | 22 + fuse2/fuse-loongarch.patch | 28 + .../0001-add-support-for-loongarch64.patch | 315 + fwupd-efi/PKGBUILD | 13 +- fwupd/PKGBUILD | 21 +- fwupd/fwupd-1.9.5-loong64.patch | 39 + gcc12/PKGBUILD | 24 +- gcr/PKGBUILD | 4 - gendesk/PKGBUILD | 5 +- geos/PKGBUILD | 10 +- geos/geos-3.11.1-gcc13.patch | 90 + gfold/PKGBUILD | 2 +- ghc/PKGBUILD | 14 +- git-branchless/PKGBUILD | 2 +- git-bug/PKGBUILD | 2 + git-cliff/PKGBUILD | 2 +- git-delta/PKGBUILD | 2 +- git-grab/PKGBUILD | 2 +- gitlab-exporter/PKGBUILD | 2 +- gitlab-gitaly/PKGBUILD | 7 + gitlab-runner/PKGBUILD | 2 + gitlab-shell/PKGBUILD | 6 +- gitoxide/PKGBUILD | 2 +- gitui/PKGBUILD | 2 +- gloox/PKGBUILD | 11 +- gloox/gloox-fix-build.patch | 188 + glusterfs/PKGBUILD | 3 +- gn/PKGBUILD | 16 +- gn/gn-fix-build.patch | 10 + gnome-control-center/PKGBUILD | 2 +- .../gnome-dictionary-fix-meson.patch | 34 + .../gnome-font-viewer-meson.patch | 25 + gnome-mplayer/PKGBUILD | 2 +- gnome-remote-desktop/PKGBUILD | 2 +- gnome-tetravex/PKGBUILD | 6 +- gnome-tetravex/gnome-tetravex-fix-meson.patch | 22 + gnome-tour/PKGBUILD | 6 + gnu-efi/PKGBUILD | 10 +- gnu-efi/gnu-efi-3.0.17-la64.patch | 89 + gnugo/PKGBUILD | 2 + go-md2man/PKGBUILD | 3 +- go/PKGBUILD | 19 +- godot/PKGBUILD | 4 +- gpg-tui/PKGBUILD | 2 +- gping/PKGBUILD | 2 +- gptfdisk/PKGBUILD | 2 + graphviz/PKGBUILD | 4 +- grcov/PKGBUILD | 2 +- greetd-regreet/PKGBUILD | 2 +- greetd-tuigreet/PKGBUILD | 2 +- greetd/PKGBUILD | 2 +- grex/PKGBUILD | 2 +- grpc/PKGBUILD | 2 +- gssdp/PKGBUILD | 1 + gstreamer/PKGBUILD | 18 +- gtk3/PKGBUILD | 2 + gtk4/gtk-objcopy.patch | 36 + gunicorn/PKGBUILD | 1 + halp/PKGBUILD | 2 +- handlr/PKGBUILD | 2 +- haskell-doctest-parallel/PKGBUILD | 2 +- hck/PKGBUILD | 2 +- hdf5/PKGBUILD | 6 +- heh/PKGBUILD | 2 +- helix/PKGBUILD | 2 +- hexyl/PKGBUILD | 2 +- hidapi/hidapi-fix-build.patch | 12 + himalaya/PKGBUILD | 2 +- hitori/PKGBUILD | 7 +- hitori/hitori-fix-meson.patch | 21 + hotdoc/PKGBUILD | 2 +- hplip/PKGBUILD | 2 +- hspell/PKGBUILD | 12 +- htmlcxx/PKGBUILD | 11 +- htmlcxx/htmlcxx-la64.patch | 26 + htmlq/PKGBUILD | 2 +- httplz/PKGBUILD | 2 +- hub/PKGBUILD | 14 +- hugo/PKGBUILD | 2 +- hypercorn/PKGBUILD | 1 + hyperfine/PKGBUILD | 2 +- i3status-rust/PKGBUILD | 2 +- iempluginsuite/PKGBUILD | 2 +- igrep/PKGBUILD | 2 +- imagemagick/PKGBUILD | 4 +- imlib2/PKGBUILD | 3 +- inkscape/PKGBUILD | 1 + ipmitool/PKGBUILD | 3 +- ipp-usb/PKGBUILD | 3 +- ipxe/PKGBUILD | 52 +- ipxe/arch.ipxe | 45 +- ipxe/general.h | 2 +- ipxe/ipxe-la64.patch | 15 + ispc/PKGBUILD | 7 +- jack2/PKGBUILD | 2 +- java-openjdk/freedesktop-java.desktop | 12 - java-openjdk/freedesktop-jconsole.desktop | 11 - java-openjdk/freedesktop-jshell.desktop | 9 - java-openjdk/install_jdk-openjdk.sh | 50 - java-openjdk/install_jre-openjdk-headless.sh | 48 - java-openjdk/install_jre-openjdk.sh | 35 - java11-openjdk/PKGBUILD | 21 +- java11-openjdk/jdk11-11.0.20.1-la64.patch | 116875 ++++++++++++++ java17-openjdk/PKGBUILD | 22 +- java17-openjdk/jdk17-17.0.9.8-la64.patch | 121820 +++++++++++++++ java8-openjdk/PKGBUILD | 13 +- java8-openjdk/jdk8u382-la64.patch | 116949 ++++++++++++++ jemalloc/PKGBUILD | 15 +- jemalloc/add-loongarch64.patch | 14 + jless/PKGBUILD | 2 +- js102/PKGBUILD | 75 +- js102/js102-loong64-jit.patch | 72 + js115/PKGBUILD | 74 +- js91/mozjs-la64.patch | 98 + js91/spidermonkey-91-add-loongarch.patch | 686 + just/PKGBUILD | 8 +- jwt-cli/PKGBUILD | 2 +- k3b/PKGBUILD | 2 +- ...c-support-for-LoongArch-architecture.patch | 123 + kcov/PKGBUILD | 11 +- kdeplasma-addons/PKGBUILD | 2 +- kernel-headers-musl/PKGBUILD | 19 +- kmon/PKGBUILD | 2 +- kondo/PKGBUILD | 2 +- kooha/PKGBUILD | 2 +- kubie/PKGBUILD | 2 +- ladspa/PKGBUILD | 4 + lapce/PKGBUILD | 2 +- latex2rtf/PKGBUILD | 4 +- ldproxy/PKGBUILD | 2 +- leafpad/PKGBUILD | 2 + lgi/PKGBUILD | 2 +- libavif/PKGBUILD | 2 +- libb2/PKGBUILD | 14 +- libb2/libb2-fix-build.patch | 14 + libcdio/PKGBUILD | 2 + libclc/PKGBUILD | 1 - libdaemon/PKGBUILD | 2 +- libdrm/PKGBUILD | 2 +- libdsme/PKGBUILD | 2 +- libetebase/PKGBUILD | 2 +- libfbclient/PKGBUILD | 3 + libfbclient/fbclient-la64-4.0.0.patch | 27 + libfbclient/fbclient-la64.patch | 27 + libfido2/libfido2-no-ssp.patch | 27 + libfilezilla/PKGBUILD | 5 + libgda/PKGBUILD | 24 +- libgda/libgda-la64.patch | 22 + libgexiv2/PKGBUILD | 2 +- libglvnd/PKGBUILD | 1 + libgme/PKGBUILD | 2 +- libgoom2/PKGBUILD | 2 + libgpod/PKGBUILD | 2 +- libimagequant/PKGBUILD | 6 + libinput/PKGBUILD | 2 + libjpeg-turbo/PKGBUILD | 3 +- libjxl/PKGBUILD | 2 +- libksysguard/PKGBUILD | 2 +- libopenraw/PKGBUILD | 7 +- libopenraw/libopenraw-fix-build.patch | 12 + liborcus/PKGBUILD | 11 +- liborcus/liborcus-cstdint.patch | 20 + libotr/PKGBUILD | 7 +- libotr/libotr-fix-build.patch | 12 + libraw/PKGBUILD | 1 + libredefender/PKGBUILD | 2 +- libreoffice-fresh/PKGBUILD | 4 +- libretro-genesis-plus-gx/PKGBUILD | 5 + librustls/PKGBUILD | 2 +- libserialport/PKGBUILD | 2 +- .../0001-add-support-for-loongarch64.patch | 672 + libsmbios/PKGBUILD | 6 +- liburcu/PKGBUILD | 9 +- liburcu/userspace-rcu-loongarch64.patch | 186 + libusbsio/PKGBUILD | 6 +- libvirt/PKGBUILD | 13 +- libvirt/libvirt-loongarch.patch | 407 + libvisual/PKGBUILD | 2 + libvpx/PKGBUILD | 2 + libyuv/0001-fix-build-error.patch | 113 + libyuv/PKGBUILD | 7 +- link-grammar/PKGBUILD | 5 +- linux-hardened/PKGBUILD | 54 +- linux-hardened/config.la64 | 8539 + linux-hardened/remove_shm_align_mask.diff | 67 + linux-tools/PKGBUILD | 25 +- liteide/PKGBUILD | 7 +- liteide/liteide-fix-build.patch | 11 + lld/PKGBUILD | 9 +- lld/lld-la64.patch | 2569 + llvm/PKGBUILD | 10 +- ...timeDyld-MCJIT-Add-LoongArch-support.patch | 328 + llvm/llvm-newreloc-la64.patch | 132 + llvm14/PKGBUILD | 9 +- llvm14/llvm-loong64.patch | 47164 ++++++ lm_sensors/PKGBUILD | 2 +- lsd/PKGBUILD | 2 +- lua-compat53/PKGBUILD | 2 +- lua-system/PKGBUILD | 2 +- lua-term/PKGBUILD | 2 +- luaexpat/PKGBUILD | 2 +- luajit/PKGBUILD | 14 +- luarocks/PKGBUILD | 11 +- luarocks/luarocks-la64.patch | 24 + luasocket/PKGBUILD | 2 +- lucky-commit/PKGBUILD | 2 +- lurk/PKGBUILD | 2 +- lxc/4363.patch | 235 + lxc/PKGBUILD | 9 +- malcontent/PKGBUILD | 1 + mandown/PKGBUILD | 6 + mariadb/PKGBUILD | 8 +- mariadb/mariadb-fix-build.patch | 50 + marisa/PKGBUILD | 2 +- marked-man/PKGBUILD | 2 +- mastodon-twitter-sync/PKGBUILD | 2 +- mate-applets/PKGBUILD | 4 +- mate-terminal/PKGBUILD | 11 +- mate-terminal/theme-colors-false.patch | 11 + materialx/PKGBUILD | 7 +- materialx/materialx-fix-build.patch | 11 + matrix-synapse/PKGBUILD | 4 +- maturin/PKGBUILD | 2 +- maven/PKGBUILD | 4 +- mcfly/PKGBUILD | 2 +- mdbook-linkcheck/PKGBUILD | 10 +- mdbook/PKGBUILD | 2 +- mdcat/PKGBUILD | 2 +- menyoki/PKGBUILD | 2 +- mesa/PKGBUILD | 4 +- meson/PKGBUILD | 12 +- mididings/PKGBUILD | 4 +- mingw-w64-gcc/PKGBUILD | 4 +- mirro-rs/PKGBUILD | 2 +- mkinitcpio-archiso/PKGBUILD | 11 +- .../mkinitcpio-archiso-loong64.patch | 12 + mkosi/PKGBUILD | 8 +- mold/PKGBUILD | 2 +- mpg123/PKGBUILD | 2 +- mplayer/PKGBUILD | 10 +- mplayer/mplayer-la64.patch | 31 + musl/0001-musl-add-loongarch64-support.patch | 1722 + musl/PKGBUILD | 11 +- mutter/PKGBUILD | 2 +- nautilus-sendto/PKGBUILD | 7 +- .../nautilus-sendto-fix-meson.patch | 13 + navi/PKGBUILD | 2 +- ncspot/PKGBUILD | 2 +- neofetch/PKGBUILD | 11 +- neofetch/neofetch-la64.patch | 15 + netavark/PKGBUILD | 2 +- netpbm/PKGBUILD | 3 + netplan/PKGBUILD | 12 +- netplan/netplan-disable-pandoc.patch | 27 + newsboat/PKGBUILD | 2 +- nextcloud-app-deck/PKGBUILD | 2 +- nextcloud-app-notify_push/PKGBUILD | 6 +- nginx-mod-ndk-set-misc/PKGBUILD | 2 +- nickel/PKGBUILD | 2 +- ninja/PKGBUILD | 6 +- ...o-not-use-PTHREAD_STACK_MIN-on-glibc.patch | 32 + ntp/ntp-ssp-la.patch | 10 + nushell/PKGBUILD | 2 +- nuspell/PKGBUILD | 2 +- nvidia-cg-toolkit/PKGBUILD | 1 + nvidia-lts/PKGBUILD | 2 +- nvidia/PKGBUILD | 2 +- ocaml/PKGBUILD | 15 +- ocaml/ocaml-5.0.0-la64.patch | 2389 + onefetch/PKGBUILD | 2 +- open-iscsi/PKGBUILD | 3 +- open-iscsi/open-iscsi-fix-build.patch | 25 + open-isns/PKGBUILD | 2 +- openal/PKGBUILD | 2 +- openblas/PKGBUILD | 11 +- openblas/fix-loong.patch | 47 + opencv/PKGBUILD | 7 +- openh264/PKGBUILD | 1 + openimagedenoise/PKGBUILD | 2 +- openjade/PKGBUILD | 12 +- openjade/openjade-nola.patch | 12 + openmp/PKGBUILD | 2 +- openmp/openmp-loong64.patch | 514 + openpgp-ca/PKGBUILD | 2 +- openpgp-card-tools/PKGBUILD | 2 +- opus/PKGBUILD | 2 + ouch/PKGBUILD | 2 +- paccat/PKGBUILD | 2 +- pacman-bintrans/PKGBUILD | 2 +- pacman-contrib/PKGBUILD | 3 + pacman-contrib/pkgbuild-vim-la64.patch | 13 + pari/PKGBUILD | 4 +- pastel/PKGBUILD | 2 +- perl-image-sane/PKGBUILD | 2 +- phonon/PKGBUILD | 2 +- pipe-rename/PKGBUILD | 2 +- pixman/83.patch | 11078 ++ pixman/PKGBUILD | 18 +- pkgfile/PKGBUILD | 13 +- pkgfile/pkgfile-use-loong64.patch | 17 + plasma-desktop/PKGBUILD | 3 +- pngquant/PKGBUILD | 2 +- polkit-qt/PKGBUILD | 2 +- polkit/PKGBUILD | 1 + portmidi/PKGBUILD | 2 + postgresql/PKGBUILD | 4 +- postgresql/add-loongarch-support.patch | 13 + ppsspp/PKGBUILD | 19 + primecount/PKGBUILD | 2 +- procs/PKGBUILD | 2 +- progpick/PKGBUILD | 2 +- prometheus-memcached-exporter/PKGBUILD | 3 + prometheus-mysqld-exporter/PKGBUILD | 3 + prometheus-wireguard-exporter/PKGBUILD | 2 +- protobuf-c/PKGBUILD | 1 + psiconv/PKGBUILD | 4 +- psiconv/psiconv-fix-build.patch | 74 + pueue/PKGBUILD | 2 +- pyalpm/PKGBUILD | 2 +- pycups/PKGBUILD | 2 +- pyflow/PKGBUILD | 2 +- pygobject/PKGBUILD | 2 +- pyqt5/PKGBUILD | 2 +- pyqt6-3d/PKGBUILD | 2 +- pyqt6-charts/PKGBUILD | 2 +- pyqt6-datavisualization/PKGBUILD | 2 +- pyqt6-networkauth/PKGBUILD | 2 +- python-aiohttp/PKGBUILD | 2 +- python-ansiwrap/PKGBUILD | 1 + python-appdirs/PKGBUILD | 4 +- python-apsw/PKGBUILD | 2 +- python-binaryornot/PKGBUILD | 16 +- python-black/PKGBUILD | 1 + python-cachy/PKGBUILD | 1 + python-cryptography/PKGBUILD | 6 +- python-debugpy/PKGBUILD | 2 +- python-et-xmlfile/PKGBUILD | 2 +- python-greenlet/PKGBUILD | 11 +- python-greenlet/python-greenlet-la64.patch | 61 + python-libcst/PKGBUILD | 1 + python-mss/PKGBUILD | 1 + python-nodeenv/PKGBUILD | 13 +- python-nodeenv/nodeenv-loong64.patch | 12 + python-numpy/PKGBUILD | 11 +- python-numpy/add-loongarch-support.patch | 33 + python-parso/PKGBUILD | 2 +- python-poetry/PKGBUILD | 2 +- python-pyelftools/PKGBUILD | 13 +- python-pyelftools/pyelftools-0.29.patch | 452 + python-pylint/PKGBUILD | 1 + python-pyopenssl/PKGBUILD | 2 +- python-pypandoc/PKGBUILD | 2 +- python-rpds-py/PKGBUILD | 2 +- python-simple-term-menu/PKGBUILD | 8 + python-simple-term-menu/cjk-preview.patch | 11 + python-stone/PKGBUILD | 1 + python-urllib3/PKGBUILD | 2 +- python-virtualenv/PKGBUILD | 10 +- python-wstools/PKGBUILD | 2 +- qd/PKGBUILD | 2 +- qemu/PKGBUILD | 15 +- qemu/qemu-4k-pagesize.patch | 58 + qemu/qemu-kvm-la64.patch | 1680 + qt5-base/PKGBUILD | 1 + qt5-doc/PKGBUILD | 7 +- qt5-doc/qt5-base-la64.patch | 12 + qt5-script/PKGBUILD | 7 +- qt5-script/loongarch_ports.patch | 60 + qt5-script/qt5-base-la64.patch | 12 + qt5-webengine/PKGBUILD | 4 +- qt5-webengine/qtwebengine-5.15.2-la64.patch | 63993 ++++++++ qt6-quick3dphysics/PKGBUILD | 10 +- qt6-quick3dphysics/qt3d-la64.patch | 31 + qt6-tools/qt6-tools-fix-build.patch | 11 + qtcreator/qtcreator-la64.patch | 179 + quazip/PKGBUILD | 5 +- rathole/PKGBUILD | 2 +- rbw/PKGBUILD | 2 +- reapack/PKGBUILD | 2 +- rebuilderd/PKGBUILD | 2 +- refind/PKGBUILD | 16 +- refind/refind-la64-0.14.0.patch | 729 + repod/0001-add-loong64-support.patch | 33 + repod/PKGBUILD | 14 +- repro-env/PKGBUILD | 2 +- rhit/PKGBUILD | 2 +- rhythmbox/PKGBUILD | 2 +- riff/PKGBUILD | 2 +- ripgrep-all/PKGBUILD | 2 +- riscv64-linux-gnu-glibc/PKGBUILD | 2 +- roc-toolkit/PKGBUILD | 10 +- rosenpass/PKGBUILD | 2 +- rpg-cli/PKGBUILD | 2 +- rpm-tools/PKGBUILD | 7 +- rpm-tools/rpm-add-loongarch.patch | 63 + rq/PKGBUILD | 2 +- ruby-base64/PKGBUILD | 2 +- ruby-bigdecimal/PKGBUILD | 2 +- ruby-cri/PKGBUILD | 2 +- ruby-ffi/PKGBUILD | 2 +- ruby-iconv/PKGBUILD | 2 +- ruby-rake/PKGBUILD | 1 + ruby/PKGBUILD | 18 +- ruff/PKGBUILD | 2 +- runc/PKGBUILD | 17 +- runc/runc-la64.patch | 11 + runst/PKGBUILD | 2 +- rust-bindgen/PKGBUILD | 2 +- rust-script/PKGBUILD | 2 +- rust/PKGBUILD | 69 +- rustscan/PKGBUILD | 2 +- rustypaste-cli/PKGBUILD | 2 +- rustypaste/PKGBUILD | 2 +- sad/PKGBUILD | 2 +- sbsigntools/PKGBUILD | 5 +- sbsigntools/sbsigntools-la64.patch | 24 + scaleway-cli/PKGBUILD | 4 +- sccache/PKGBUILD | 14 +- sdl2_gfx/PKGBUILD | 2 +- seabios/PKGBUILD | 9 +- selene/PKGBUILD | 2 +- sentry-cli/PKGBUILD | 8 +- sequoia-chameleon-gnupg/PKGBUILD | 2 +- sequoia-sop/PKGBUILD | 4 +- sequoia-sq/PKGBUILD | 4 +- sequoia-wot/PKGBUILD | 2 +- sh4d0wup/PKGBUILD | 2 +- shaderc/PKGBUILD | 1 + sharutils/PKGBUILD | 2 + sheldon/PKGBUILD | 10 +- shotgun/PKGBUILD | 4 +- signon-plugin-oauth2/PKGBUILD | 10 +- signon-ui/PKGBUILD | 10 +- signon-ui/signon-ui-loong64-bad-fix.patch | 31 + singularity/PKGBUILD | 2 +- skim/PKGBUILD | 2 +- sn0int/PKGBUILD | 2 +- sniffglue/PKGBUILD | 2 +- sniffnet/PKGBUILD | 2 +- sonic/PKGBUILD | 2 +- spicy-launcher/PKGBUILD | 4 +- spirv-tools/PKGBUILD | 2 +- spotify-launcher/PKGBUILD | 2 +- spotifyd/PKGBUILD | 2 +- spytrap-adb/PKGBUILD | 2 +- sshx/PKGBUILD | 2 +- stalonetray/PKGBUILD | 2 +- stardict/PKGBUILD | 3 + starship/PKGBUILD | 2 +- stochas/PKGBUILD | 4 +- suitesparse/PKGBUILD | 6 + supermin/PKGBUILD | 12 +- sws/PKGBUILD | 2 +- syslog-ng/PKGBUILD | 4 +- systeroid/PKGBUILD | 2 +- taplo-cli/PKGBUILD | 4 +- taskwarrior-tui/PKGBUILD | 2 +- tealdeer/PKGBUILD | 2 +- tectonic/PKGBUILD | 2 +- tere/PKGBUILD | 2 +- texlab/PKGBUILD | 2 +- texlive-bin/PKGBUILD | 7 +- texlive-bin/texlive-bin-la64.patch | 11150 ++ thunderbird/PKGBUILD | 10 +- thunderbird/firefox-115-loong.patch | 619 + thunderbird/mozconfig.cfg | 7 +- tickrs/PKGBUILD | 2 +- toastify/PKGBUILD | 2 +- tokei/PKGBUILD | 8 +- torchvision/PKGBUILD | 4 +- tracker3-miners/PKGBUILD | 4 +- .../tracker-miners-fix-build.patch | 13 + trippy/PKGBUILD | 2 +- typst/PKGBUILD | 2 +- ublock-origin/PKGBUILD | 1 + unrar/PKGBUILD | 1 + unzip/PKGBUILD | 1 + updlockfiles/PKGBUILD | 2 +- upx/0001-just-for-la64-build.patch | 35 + upx/PKGBUILD | 3 + uucp/PKGBUILD | 2 + v2ray-domain-list-community/PKGBUILD | 3 + v2ray/PKGBUILD | 4 + valgrind/PKGBUILD | 18 +- valgrind/valgrind-3.21-la64.patch | 93230 +++++++++++ vaultwarden/PKGBUILD | 2 +- virt-manager/600.patch | 186 + virt-manager/PKGBUILD | 13 +- virtiofsd/PKGBUILD | 2 +- virtualbox-host-modules-arch/PKGBUILD | 2 +- viu/PKGBUILD | 2 +- vivid/PKGBUILD | 2 +- vtk/PKGBUILD | 21 +- vtk/vtk-loong64.patch | 15 + wasm-pack/PKGBUILD | 2 +- wasmtime/PKGBUILD | 2 +- wayland-protocols/PKGBUILD | 2 +- wayland/PKGBUILD | 2 +- webkit2gtk-4.1/PKGBUILD | 19 +- webkit2gtk-4.1/webkit2-gtk-fix-build.patch | 10 + .../webkit2gtk-fix-cmake-build.patch | 11 + webkit2gtk/PKGBUILD | 21 +- webkit2gtk/webkit2-gtk-fix-build.patch | 10 + webkit2gtk/webkit2gtk-fix-cmake-build.patch | 11 + webkitgtk-6.0/PKGBUILD | 15 +- .../webkit2gtk-fix-cmake-build.patch | 11 + webrtc-audio-processing-1/PKGBUILD | 7 +- .../webrtc-audio-processing-la64.patch | 12 + webrtc-audio-processing/PKGBUILD | 4 +- .../webrtc-audio-processing-la64.patch | 14 + whipper/PKGBUILD | 2 +- wiki-tui/PKGBUILD | 2 +- wldash/PKGBUILD | 4 +- wolf-shaper/PKGBUILD | 2 +- woodpecker/PKGBUILD | 4 + wpewebkit/PKGBUILD | 16 +- wpewebkit/webkit2gtk-fix-cmake-build.patch | 11 + x11vnc/PKGBUILD | 4 +- x264/PKGBUILD | 9 +- x86_64-linux-gnu-binutils/PKGBUILD | 66 + ...24BC1E8FB409FA9F14371813FCEF89DD9E3C4F.asc | 51 + x86_64-linux-gnu-gcc/PKGBUILD | 96 + ...975A70E63C361C73AE69EF6EEB81F8981C74C7.asc | 53 + ...C235A34C46AA3FFB293709A328C3A2C3C45C06.asc | 16 + ...A93CAD751C2AF4F8C7AD516C35B99309B5FA62.asc | 122 + x86_64-linux-gnu-gdb/PKGBUILD | 49 + x86_64-linux-gnu-glibc/PKGBUILD | 126 + ...73542B39962DF7B299931416792B4EA25340F8.asc | 54 + ...7C7372637EC10C57D7AA6579C43DFBF1CF2187.asc | 68 + x86_64-linux-gnu-glibc/reenable_DT_HASH.patch | 28 + x86_64-linux-gnu-glibc/sdt-config.h | 6 + x86_64-linux-gnu-glibc/sdt.h | 430 + x86_64-linux-gnu-linux-api-headers/PKGBUILD | 29 + xdg-desktop-portal-wlr/PKGBUILD | 2 +- xf86-video-loongson/PKGBUILD | 46 + ...h-against-Multimedia-Video-Controlle.patch | 32 + xorg-server/10-modeset.conf | 6 + xorg-server/PKGBUILD | 9 + xsd/0120-g++10.patch | 19 + xsd/xsd-c++17.patch | 48 + xsv/PKGBUILD | 2 +- yaegi/PKGBUILD | 2 +- yazi/PKGBUILD | 2 +- zbus_xmlgen/PKGBUILD | 2 +- zellij/PKGBUILD | 2 +- zenith/PKGBUILD | 2 +- zip/PKGBUILD | 2 + zola/PKGBUILD | 2 +- zoxide/PKGBUILD | 2 +- zram-generator/PKGBUILD | 6 + 776 files changed, 646613 insertions(+), 1231 deletions(-) create mode 100644 0ad/0ad-fix-build.patch create mode 100644 0ad/0ad-la64.patch create mode 100644 abseil-cpp/abseil-cpp-la64.patch create mode 100644 acpica/acpica-fix-build.patch create mode 100644 acpica/acpica-unix-la64.patch create mode 100644 aiksaurus/aiksaurus-fix-build.patch create mode 100644 android-tools/android-tools-la64.patch create mode 100644 apr/apr-1.7-fix-build.patch create mode 100644 archinstall/archinstall-la64-2.6.3.patch create mode 100644 avisynthplus/avisynthplus-la64.patch create mode 100644 babl/babl-fix-gir-pkgname.patch create mode 100644 bigloo/bigloo-la64.patch create mode 100644 boost/boost-1.79.0-la64.patch create mode 100644 box2d/box2d-fix-build.patch create mode 100644 breezy/breezy-fix-install.patch create mode 100644 buildkit/0001-add-loongarch64-support.patch create mode 100644 cdrtools/cdrtools-la64.patch create mode 100644 chmlib/chmlib-fix-gcc13.patch create mode 100644 clang14/0001-add-loong64-support.patch create mode 100644 clang14/0002-add-loong64-support.patch create mode 100644 cln/cln-la64.patch create mode 100644 compiler-rt14/keys/pgp/B6C8F98282B944E3B0D5C2530FC3042E345AD05D.asc create mode 100644 criu/2183.patch create mode 100644 dconf-editor/dconf-editor-fix-meson.patch create mode 100644 deepin-desktop-base/deepin-deskto-base-la64.patch create mode 100644 deepin-grand-search/62.patch create mode 100644 devtools/devtools-loong64-1.0.4.patch create mode 100644 dfrs/dfrs-la64.patch create mode 100644 docker-machine/docker-machine-la64.patch create mode 100644 docker/moby-la64.patch create mode 100644 edk2/60-edk2-loongarch64.json create mode 100644 edk2/edk2-use-env-toolchains.patch create mode 100644 edk2/relax_edk2_gcc14.diff create mode 100644 efitools/efitools-la64.patch create mode 100644 emacs/emacs-la64.patch create mode 100644 fbterm/0001-Fix-build-with-gcc-6.patch create mode 100644 fbterm/PKGBUILD create mode 100644 fbterm/color_palette.patch create mode 100644 fbterm/fbconfig.patch create mode 100644 fbterm/fbterm.patch create mode 100644 fbterm/fbtermrc create mode 100644 fbterm/fix_ftbfs_crosscompile.patch create mode 100644 fbterm/fix_ftbfs_epoll.patch create mode 100644 ffcall/libffcall-la64-2.4.patch create mode 100644 firefox-developer-edition/0001-Add-support-for-LoongArch64.patch create mode 100644 firefox-developer-edition/0002-Enable-VA-API-support-for-AMD-GPUs.patch create mode 100644 firefox-developer-edition/0003-Remove-architectural-limit-on-VA-API-support.patch create mode 100644 firefox-developer-edition/0004-Enable-WebRTC-for-LoongArch.patch create mode 100644 firefox-developer-edition/0005-Fix-libyuv-build-with-LSX-LASX.patch create mode 100644 firefox/0001-Add-support-for-LoongArch64.patch create mode 100644 firefox/0002-Enable-VA-API-support-for-AMD-GPUs.patch create mode 100644 firefox/0003-Remove-architectural-limit-on-VA-API-support.patch create mode 100644 firefox/0004-Enable-WebRTC-for-LoongArch.patch create mode 100644 firefox/0005-Fix-libyuv-build-with-LSX-LASX.patch create mode 100644 flashrom/0001-Loongson-3-SPI-tmp.patch create mode 100644 flashrom/0002-Speed-up.patch create mode 100644 flashrom/0003-Add-support-for-loongarch64.patch create mode 100644 fuse2/fuse-closefrom.patch create mode 100644 fuse2/fuse-loongarch.patch create mode 100644 fwupd-efi/0001-add-support-for-loongarch64.patch create mode 100644 fwupd/fwupd-1.9.5-loong64.patch create mode 100644 geos/geos-3.11.1-gcc13.patch create mode 100644 gloox/gloox-fix-build.patch create mode 100644 gn/gn-fix-build.patch create mode 100644 gnome-dictionary/gnome-dictionary-fix-meson.patch create mode 100644 gnome-font-viewer/gnome-font-viewer-meson.patch create mode 100644 gnome-tetravex/gnome-tetravex-fix-meson.patch create mode 100644 gnu-efi/gnu-efi-3.0.17-la64.patch create mode 100644 gtk4/gtk-objcopy.patch create mode 100644 hidapi/hidapi-fix-build.patch create mode 100644 hitori/hitori-fix-meson.patch create mode 100644 htmlcxx/htmlcxx-la64.patch create mode 100644 ipxe/ipxe-la64.patch delete mode 100644 java-openjdk/freedesktop-java.desktop delete mode 100644 java-openjdk/freedesktop-jconsole.desktop delete mode 100644 java-openjdk/freedesktop-jshell.desktop delete mode 100644 java-openjdk/install_jdk-openjdk.sh delete mode 100644 java-openjdk/install_jre-openjdk-headless.sh delete mode 100644 java-openjdk/install_jre-openjdk.sh create mode 100644 java11-openjdk/jdk11-11.0.20.1-la64.patch create mode 100644 java17-openjdk/jdk17-17.0.9.8-la64.patch create mode 100644 java8-openjdk/jdk8u382-la64.patch create mode 100644 jemalloc/add-loongarch64.patch create mode 100644 js102/js102-loong64-jit.patch create mode 100644 js91/mozjs-la64.patch create mode 100644 js91/spidermonkey-91-add-loongarch.patch create mode 100644 kcov/0001-Add-basic-support-for-LoongArch-architecture.patch create mode 100644 libb2/libb2-fix-build.patch create mode 100644 libfbclient/fbclient-la64-4.0.0.patch create mode 100644 libfbclient/fbclient-la64.patch create mode 100644 libfido2/libfido2-no-ssp.patch create mode 100644 libgda/libgda-la64.patch create mode 100644 libopenraw/libopenraw-fix-build.patch create mode 100644 liborcus/liborcus-cstdint.patch create mode 100644 libotr/libotr-fix-build.patch create mode 100644 libsmbios/0001-add-support-for-loongarch64.patch create mode 100644 liburcu/userspace-rcu-loongarch64.patch create mode 100644 libvirt/libvirt-loongarch.patch create mode 100644 libyuv/0001-fix-build-error.patch create mode 100644 linux-hardened/config.la64 create mode 100644 linux-hardened/remove_shm_align_mask.diff create mode 100644 liteide/liteide-fix-build.patch create mode 100644 lld/lld-la64.patch create mode 100644 llvm/RuntimeDyld-MCJIT-Add-LoongArch-support.patch create mode 100644 llvm/llvm-newreloc-la64.patch create mode 100644 llvm14/llvm-loong64.patch create mode 100644 luarocks/luarocks-la64.patch create mode 100644 lxc/4363.patch create mode 100644 mariadb/mariadb-fix-build.patch create mode 100644 mate-terminal/theme-colors-false.patch create mode 100644 materialx/materialx-fix-build.patch create mode 100644 mkinitcpio-archiso/mkinitcpio-archiso-loong64.patch create mode 100644 mplayer/mplayer-la64.patch create mode 100644 musl/0001-musl-add-loongarch64-support.patch create mode 100644 nautilus-sendto/nautilus-sendto-fix-meson.patch create mode 100644 neofetch/neofetch-la64.patch create mode 100644 netplan/netplan-disable-pandoc.patch create mode 100644 ntp/0001-libntp-Do-not-use-PTHREAD_STACK_MIN-on-glibc.patch create mode 100644 ntp/ntp-ssp-la.patch create mode 100644 ocaml/ocaml-5.0.0-la64.patch create mode 100644 open-iscsi/open-iscsi-fix-build.patch create mode 100644 openblas/fix-loong.patch create mode 100644 openjade/openjade-nola.patch create mode 100644 openmp/openmp-loong64.patch create mode 100644 pacman-contrib/pkgbuild-vim-la64.patch create mode 100644 pixman/83.patch create mode 100644 pkgfile/pkgfile-use-loong64.patch create mode 100644 postgresql/add-loongarch-support.patch create mode 100644 psiconv/psiconv-fix-build.patch create mode 100644 python-greenlet/python-greenlet-la64.patch create mode 100644 python-nodeenv/nodeenv-loong64.patch create mode 100644 python-numpy/add-loongarch-support.patch create mode 100644 python-pyelftools/pyelftools-0.29.patch create mode 100644 python-simple-term-menu/cjk-preview.patch create mode 100644 qemu/qemu-4k-pagesize.patch create mode 100644 qemu/qemu-kvm-la64.patch create mode 100644 qt5-doc/qt5-base-la64.patch create mode 100644 qt5-script/loongarch_ports.patch create mode 100644 qt5-script/qt5-base-la64.patch create mode 100644 qt5-webengine/qtwebengine-5.15.2-la64.patch create mode 100644 qt6-quick3dphysics/qt3d-la64.patch create mode 100644 qt6-tools/qt6-tools-fix-build.patch create mode 100644 qtcreator/qtcreator-la64.patch create mode 100644 refind/refind-la64-0.14.0.patch create mode 100644 repod/0001-add-loong64-support.patch create mode 100644 rpm-tools/rpm-add-loongarch.patch create mode 100644 runc/runc-la64.patch create mode 100644 sbsigntools/sbsigntools-la64.patch create mode 100644 signon-ui/signon-ui-loong64-bad-fix.patch create mode 100644 texlive-bin/texlive-bin-la64.patch create mode 100644 thunderbird/firefox-115-loong.patch create mode 100644 tracker3-miners/tracker-miners-fix-build.patch create mode 100644 upx/0001-just-for-la64-build.patch create mode 100644 valgrind/valgrind-3.21-la64.patch create mode 100644 virt-manager/600.patch create mode 100644 vtk/vtk-loong64.patch create mode 100644 webkit2gtk-4.1/webkit2-gtk-fix-build.patch create mode 100644 webkit2gtk-4.1/webkit2gtk-fix-cmake-build.patch create mode 100644 webkit2gtk/webkit2-gtk-fix-build.patch create mode 100644 webkit2gtk/webkit2gtk-fix-cmake-build.patch create mode 100644 webkitgtk-6.0/webkit2gtk-fix-cmake-build.patch create mode 100644 webrtc-audio-processing-1/webrtc-audio-processing-la64.patch create mode 100644 webrtc-audio-processing/webrtc-audio-processing-la64.patch create mode 100644 wpewebkit/webkit2gtk-fix-cmake-build.patch create mode 100644 x86_64-linux-gnu-binutils/PKGBUILD create mode 100644 x86_64-linux-gnu-binutils/keys/pgp/3A24BC1E8FB409FA9F14371813FCEF89DD9E3C4F.asc create mode 100644 x86_64-linux-gnu-gcc/PKGBUILD create mode 100644 x86_64-linux-gnu-gcc/keys/pgp/13975A70E63C361C73AE69EF6EEB81F8981C74C7.asc create mode 100644 x86_64-linux-gnu-gcc/keys/pgp/33C235A34C46AA3FFB293709A328C3A2C3C45C06.asc create mode 100644 x86_64-linux-gnu-gcc/keys/pgp/D3A93CAD751C2AF4F8C7AD516C35B99309B5FA62.asc create mode 100644 x86_64-linux-gnu-gdb/PKGBUILD create mode 100644 x86_64-linux-gnu-glibc/PKGBUILD create mode 100644 x86_64-linux-gnu-glibc/keys/pgp/7273542B39962DF7B299931416792B4EA25340F8.asc create mode 100644 x86_64-linux-gnu-glibc/keys/pgp/BC7C7372637EC10C57D7AA6579C43DFBF1CF2187.asc create mode 100644 x86_64-linux-gnu-glibc/reenable_DT_HASH.patch create mode 100644 x86_64-linux-gnu-glibc/sdt-config.h create mode 100644 x86_64-linux-gnu-glibc/sdt.h create mode 100644 x86_64-linux-gnu-linux-api-headers/PKGBUILD create mode 100644 xf86-video-loongson/PKGBUILD create mode 100644 xorg-server/0001-modesetting-match-against-Multimedia-Video-Controlle.patch create mode 100644 xorg-server/10-modeset.conf create mode 100644 xsd/0120-g++10.patch create mode 100644 xsd/xsd-c++17.patch diff --git a/0ad/0ad-fix-build.patch b/0ad/0ad-fix-build.patch new file mode 100644 index 0000000000..d3ffd43165 --- /dev/null +++ b/0ad/0ad-fix-build.patch @@ -0,0 +1,18 @@ +--- a/build.sh 2023-04-20 17:19:57.567640306 +0800 ++++ b/build.sh 2023-04-20 17:20:56.920551226 +0800 +@@ -140,6 +140,7 @@ + ${CONF_OPTS} \ + --enable-debug \ + --disable-optimize \ ++ --disable-new-pass-manager \ + --enable-gczeal + ${MAKE} ${MAKE_OPTS} + cd .. +@@ -151,6 +152,7 @@ + LLVM_OBJDUMP="${LLVM_OBJDUMP}" \ + ${CONF_OPTS} \ ++ --disable-new-pass-manager \ + --enable-optimize + ${MAKE} ${MAKE_OPTS} + cd .. + diff --git a/0ad/0ad-la64.patch b/0ad/0ad-la64.patch new file mode 100644 index 0000000000..2d0d6a3f72 --- /dev/null +++ b/0ad/0ad-la64.patch @@ -0,0 +1,97 @@ +diff --git a/libraries/source/spidermonkey/mozjs-78.6.0/build/gyp_base.mozbuild b/libraries/source/spidermonkey/mozjs-78.6.0/build/gyp_base.mozbuild +index 9344cc4e7..6c3f64524 100644 +--- a/libraries/source/spidermonkey/mozjs-78.6.0/build/gyp_base.mozbuild ++++ b/libraries/source/spidermonkey/mozjs-78.6.0/build/gyp_base.mozbuild +@@ -32,6 +32,7 @@ arches = { + 'x86_64': 'x64', + 'x86': 'ia32', + 'aarch64': 'arm64', ++ 'loongarch64': 'loongarch64', + } + + gyp_vars['host_arch'] = arches.get(CONFIG['HOST_CPU_ARCH'], CONFIG['HOST_CPU_ARCH']) +diff --git a/libraries/source/spidermonkey/mozjs-78.6.0/build/moz.configure/init.configure b/libraries/source/spidermonkey/mozjs-78.6.0/build/moz.configure/init.configure +index 2fdeb5497..3a906aef8 100644 +--- a/libraries/source/spidermonkey/mozjs-78.6.0/build/moz.configure/init.configure ++++ b/libraries/source/spidermonkey/mozjs-78.6.0/build/moz.configure/init.configure +@@ -738,6 +738,9 @@ def split_triplet(triplet, allow_msvc=False): + elif cpu.startswith('aarch64'): + canonical_cpu = 'aarch64' + endianness = 'little' ++ elif cpu.startswith('loongarch64'): ++ canonical_cpu = 'loongarch64' ++ endianness = 'little' + elif cpu == 'sh4': + canonical_cpu = 'sh4' + endianness = 'little' +diff --git a/libraries/source/spidermonkey/mozjs-78.6.0/intl/icu/source/i18n/double-conversion-utils.h b/libraries/source/spidermonkey/mozjs-78.6.0/intl/icu/source/i18n/double-conversion-utils.h +index 8c6a0e16e..afb63176d 100644 +--- a/libraries/source/spidermonkey/mozjs-78.6.0/intl/icu/source/i18n/double-conversion-utils.h ++++ b/libraries/source/spidermonkey/mozjs-78.6.0/intl/icu/source/i18n/double-conversion-utils.h +@@ -123,6 +123,7 @@ int main(int argc, char** argv) { + defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ + defined(__sparc__) || defined(__sparc) || defined(__s390__) || \ + defined(__SH4__) || defined(__alpha__) || \ ++ defined(__loongarch64) || \ + defined(_MIPS_ARCH_MIPS32R2) || defined(__ARMEB__) ||\ + defined(__AARCH64EL__) || defined(__aarch64__) || defined(__AARCH64EB__) || \ + defined(__riscv) || defined(__e2k__) || \ +diff --git a/libraries/source/spidermonkey/mozjs-78.6.0/js/src/jit/AtomicOperations.h b/libraries/source/spidermonkey/mozjs-78.6.0/js/src/jit/AtomicOperations.h +index 0486cbad1..0a95e11e7 100644 +--- a/libraries/source/spidermonkey/mozjs-78.6.0/js/src/jit/AtomicOperations.h ++++ b/libraries/source/spidermonkey/mozjs-78.6.0/js/src/jit/AtomicOperations.h +@@ -391,7 +391,7 @@ inline bool AtomicOperations::isLockfreeJS(int32_t size) { + #elif defined(__ppc__) || defined(__PPC__) || defined(__sparc__) || \ + defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || \ + defined(__PPC64LE__) || defined(__alpha__) || defined(__hppa__) || \ +- defined(__sh__) || defined(__s390__) || defined(__s390x__) ++ defined(__loongarch64) || defined(__sh__) || defined(__s390__) || defined(__s390x__) + # include "jit/shared/AtomicOperations-feeling-lucky.h" + #else + # error "No AtomicOperations support provided for this platform" +diff --git a/libraries/source/spidermonkey/mozjs-78.6.0/mfbt/double-conversion/double-conversion/utils.h b/libraries/source/spidermonkey/mozjs-78.6.0/mfbt/double-conversion/double-conversion/utils.h +index f031495b7..55a40ab39 100644 +--- a/libraries/source/spidermonkey/mozjs-78.6.0/mfbt/double-conversion/double-conversion/utils.h ++++ b/libraries/source/spidermonkey/mozjs-78.6.0/mfbt/double-conversion/double-conversion/utils.h +@@ -93,6 +93,7 @@ int main(int argc, char** argv) { + #if defined(_M_X64) || defined(__x86_64__) || \ + defined(__ARMEL__) || defined(__avr32__) || defined(_M_ARM) || defined(_M_ARM64) || \ + defined(__hppa__) || defined(__ia64__) || \ ++ defined(__loongarch__) || \ + defined(__mips__) || \ + defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \ + defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ +diff --git a/libraries/source/spidermonkey/mozjs-78.6.0/mfbt/tests/TestPoisonArea.cpp b/libraries/source/spidermonkey/mozjs-78.6.0/mfbt/tests/TestPoisonArea.cpp +index fbd336471..1de67549e 100644 +--- a/libraries/source/spidermonkey/mozjs-78.6.0/mfbt/tests/TestPoisonArea.cpp ++++ b/libraries/source/spidermonkey/mozjs-78.6.0/mfbt/tests/TestPoisonArea.cpp +@@ -141,6 +141,9 @@ + #elif defined __hppa + # define RETURN_INSTR 0xe840c002 /* bv,n r0(rp) */ + ++#elif defined __loongarch64 ++# define RETURN_INSTR 0x4c000020 /* jirl */ ++ + #elif defined __mips + # define RETURN_INSTR 0x03e00008 /* jr ra */ + +diff --git a/libraries/source/spidermonkey/mozjs-78.6.0/python/mozbuild/mozbuild/configure/constants.py b/libraries/source/spidermonkey/mozjs-78.6.0/python/mozbuild/mozbuild/configure/constants.py +index 7542dcdc6..87e1461be 100644 +--- a/libraries/source/spidermonkey/mozjs-78.6.0/python/mozbuild/mozbuild/configure/constants.py ++++ b/libraries/source/spidermonkey/mozjs-78.6.0/python/mozbuild/mozbuild/configure/constants.py +@@ -45,6 +45,7 @@ CPU_bitness = { + 'arm': 32, + 'hppa': 32, + 'ia64': 64, ++ 'loongarch64': 64, + 'mips32': 32, + 'mips64': 64, + 'ppc': 32, +@@ -83,6 +84,7 @@ CPU_preprocessor_checks = OrderedDict(( + ('ppc', '__powerpc__'), + ('Alpha', '__alpha__'), + ('hppa', '__hppa__'), ++ ('loongarch64', '__loongarch64'), + ('sparc64', '__sparc__ && __arch64__'), + ('sparc', '__sparc__'), + ('mips64', '__mips64'), diff --git a/0ad/PKGBUILD b/0ad/PKGBUILD index 7286e409a0..453c704d8a 100644 --- a/0ad/PKGBUILD +++ b/0ad/PKGBUILD @@ -20,12 +20,16 @@ source=("https://releases.wildfiregames.com/$pkgname-$_pkgver-unix-build.tar.xz" https://github.com/0ad/0ad/commit/839edc3a.patch https://github.com/0ad/0ad/commit/093e1eb2.patch https://github.com/0ad/0ad/commit/d2426312.patch) + 0ad-fix-build.patch + 0ad-la64.patch) sha512sums=('aaf647d5d8454c244015d2a198beeaaebc571a5bc96446f3acff8dbd05f9843029c500bf4162651a5e1fcdb42bd5fb5b4f5c512c78372479fbd8565dd093f272' '3a0f935ab05e1c0d6a926ba02a5ed72afbb94b6910acaad77661b927680f192a06c7614287fad2ff8a54e3e1ee814614c9abfc9497a27e86b9e58ae1f6eebbfb' '748a75420541947e2a215b3a8789a0e137179e4981d0977e1c4b20cd7b86af2d96b9976e04d60ace8d5ee465d542cadc42ee9bceedaaa97d2b320f533e3e3892' '1dfc8a0c6ac29040f72d9bbf6b631a74cbdec444b9078a015345139228666354d9b5059f85b640ce3afc0f590bcbe8afd5e158509a0c95751e1cd69fece46876' 'a7fd1454385f56b7c8cb0fc6ac001761d4419df4aeec570ba846c7df4eb327d25b9ff1a7946cb334315109fa90ca2c1820583619f4e1ec5d53805afa08e10093' '5f32d47f01d845e07b2f919c9b04ac5e50dc9977fa97f981eba4a53677a29d797d0d76bc385ac047dd7c7d24af7d95cd8256d433bd43ce1a6606763c0ea736cb') + '956effc37bbad8ca44a5e82a8750ca82c1c2347152dd684ebc2921953d4fa81ef9291b5bb5de05559b2b4ef79c336b837216892f0bcf806e50aac8c4ea42edde' + 'c01e52a4241736eda82f6002c3627d9c4b5b505109969fc608d95dd71db8681df8f3de6a372bca8fe977bee14f5180f4c27681e40d26b0a06ddc556122886d04') prepare() { cd "$pkgname-$_pkgver" @@ -36,6 +40,12 @@ prepare() { patch -p1 -i ../839edc3a.patch # Fix build with fmt 10 patch -p1 -i ../093e1eb2.patch # Fix build with GCC 13 patch -p1 -i ../d2426312.patch # Fix build with libxml2 2.12 + + patch -d libraries/source/spidermonkey -p1 -i $srcdir/0ad-fix-build.patch + cp $srcdir/0ad-la64.patch libraries/source/spidermonkey/0ad-la64.patch + echo "patch -p5 < ../0ad-la64.patch" >> libraries/source/spidermonkey/patch.sh + + echo "cp /usr/share/automake-1.16/config.* build/autoconf/" >> libraries/source/spidermonkey/patch.sh } build() { diff --git a/a2ps/PKGBUILD b/a2ps/PKGBUILD index 05f2552547..cf306591f0 100644 --- a/a2ps/PKGBUILD +++ b/a2ps/PKGBUILD @@ -28,6 +28,8 @@ prepare() { build() { cd ${pkgname}-${pkgver} + CFLAGS=${CFLAGS/-Wformat -Werror=format-security/} + CXXFLAGS=${CXXFLAGS/-Wformat -Werror=format-security/} libtoolize --force --copy autoreconf --force --install -I m4 LIBS+="-lm" ./configure --prefix=/usr --sysconfdir=/etc/a2ps \ diff --git a/aardvark-dns/PKGBUILD b/aardvark-dns/PKGBUILD index 4a219235cf..7e1798efc3 100644 --- a/aardvark-dns/PKGBUILD +++ b/aardvark-dns/PKGBUILD @@ -31,7 +31,7 @@ pkgver() { prepare() { cd $pkgname - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/abseil-cpp/PKGBUILD b/abseil-cpp/PKGBUILD index 9600745050..fbf442e730 100644 --- a/abseil-cpp/PKGBUILD +++ b/abseil-cpp/PKGBUILD @@ -11,13 +11,16 @@ license=('Apache') depends=('gcc-libs') makedepends=('cmake' 'gtest') source=("https://github.com/abseil/abseil-cpp/archive/$pkgver/$pkgname-$pkgver.tar.gz" - scoped-mock-log.patch) + scoped-mock-log.patch + abseil-cpp-la64.patch) sha256sums=('987ce98f02eefbaf930d6e38ab16aa05737234d7afbab2d5c4ea7adbe50c28ed' - 'a6cbc612a2b96fcbd52d081e03e8581107ceb4827edb19d96510a31c568e1396') + 'a6cbc612a2b96fcbd52d081e03e8581107ceb4827edb19d96510a31c568e1396' + '8817cf256a94dd9059bccd540a4d6bbe0d9606c600e8543a1b1011226c350b23') prepare() { cd "$srcdir/$pkgname-$pkgver" patch -p1 -i ../scoped-mock-log.patch # Install target needed by protobuf + patch -p1 -i $srcdir/abseil-cpp-la64.patch } build() { diff --git a/abseil-cpp/abseil-cpp-la64.patch b/abseil-cpp/abseil-cpp-la64.patch new file mode 100644 index 0000000000..6b538ea737 --- /dev/null +++ b/abseil-cpp/abseil-cpp-la64.patch @@ -0,0 +1,13 @@ +Index: abseil-cpp-20211102.0/absl/debugging/internal/examine_stack.cc +=================================================================== +--- abseil-cpp-20211102.0.orig/absl/debugging/internal/examine_stack.cc ++++ abseil-cpp-20211102.0/absl/debugging/internal/examine_stack.cc +@@ -57,6 +57,8 @@ void* GetProgramCounter(void* vuc) { + return reinterpret_cast(context->uc_mcontext.gregs[14]); + #elif defined(__ia64__) + return reinterpret_cast(context->uc_mcontext.sc_ip); ++#elif defined(__loongarch64) ++ return reinterpret_cast(context->uc_mcontext.__pc); + #elif defined(__m68k__) + return reinterpret_cast(context->uc_mcontext.gregs[16]); + #elif defined(__mips__) diff --git a/acme-redirect/PKGBUILD b/acme-redirect/PKGBUILD index 46bb1a042e..d41035a003 100644 --- a/acme-redirect/PKGBUILD +++ b/acme-redirect/PKGBUILD @@ -22,13 +22,20 @@ validpgpkeys=("64B13F7117D6E07D661BBCE0FE763A64F5E54FD6") prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + mkdir .cargo + cat > .cargo/config.toml < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) + #define USE_MS_ABI 1 + #endif +@@ -189,11 +189,11 @@ + + #define VOID void + +-#if defined(__ia64__) || defined(__x86_64__) ++#if defined(__ia64__) || defined(__x86_64__) || defined(__loongarch64) + + #define ACPI_MACHINE_WIDTH 64 + +-#if defined(__x86_64__) ++#if defined(__x86_64__) || defined(__loongarch64) + + /* for x86_64, EFI_FUNCTION_WRAPPER must be defined */ + +Index: acpica-unix-20210331/source/include/platform/aclinux.h +=================================================================== +--- acpica-unix-20210331.orig/source/include/platform/aclinux.h ++++ acpica-unix-20210331/source/include/platform/aclinux.h +@@ -325,6 +325,7 @@ + #if defined(__ia64__) || (defined(__x86_64__) && !defined(__ILP32__)) ||\ + defined(__aarch64__) || defined(__PPC64__) ||\ + defined(__s390x__) ||\ ++ defined(__loongarch64) ||\ + (defined(__riscv) && (defined(__LP64__) || defined(_LP64))) + #define ACPI_MACHINE_WIDTH 64 + #define COMPILER_DEPENDENT_INT64 long diff --git a/aida-x/PKGBUILD b/aida-x/PKGBUILD index 8e66ad45b7..c10a644983 100644 --- a/aida-x/PKGBUILD +++ b/aida-x/PKGBUILD @@ -172,5 +172,5 @@ package_aida-x-vst3() { ) # mv -v $pkgname/* "$pkgdir" - install -vDm 755 build/bin/$_name.vst3/Contents/$CARCH-linux/*.so -t "$pkgdir/usr/lib/vst3/$_name.vst3/Contents/$CARCH-linux/" + install -vDm 755 build/bin/$_name.vst3/Contents/`uname -m`-linux/*.so -t "$pkgdir/usr/lib/vst3/$_name.vst3/Contents/`uname -m`-linux/" } diff --git a/aiksaurus/PKGBUILD b/aiksaurus/PKGBUILD index 86a5b37543..9a4acc1985 100644 --- a/aiksaurus/PKGBUILD +++ b/aiksaurus/PKGBUILD @@ -11,19 +11,24 @@ arch=('loong64' 'x86_64') depends=('gcc-libs') source=(https://downloads.sourceforge.net/${pkgname}/${pkgname}-${pkgver}.tar.gz aiksaurus-gcc44.patch - format-security.patch) + format-security.patch + aiksaurus-fix-build.patch) sha512sums=('48591850f28f1a8f4b4986df14090ef7bd57cbfbad739cb0013db021f6f5bcb3c592b38e36774735499e27b9e99330504f8d9c6022158e25469cbc81d13f7463' '66db53f7499425eb1ff572df4a674f378ed681edeb48ea5926b21d39c8a399c36740e03de101e23a47e043fcce29f66a03c64dc813dc5beda1043d0057711fa5' - '72ebdc63cbb2c56bd8274f92501cbbae8c66e7d81b11b8fdeda38149da4bf44ab88699d248093eedc9813f6cd60e2a1f360fb39d778b4b7827777b1d003ab1dc') + '72ebdc63cbb2c56bd8274f92501cbbae8c66e7d81b11b8fdeda38149da4bf44ab88699d248093eedc9813f6cd60e2a1f360fb39d778b4b7827777b1d003ab1dc' + 'bcbffca8b6632b708550da7c15c46457f6e9b74e4bd5705c0310a51f0bf37ba215e328a88e7f4d870276414d646aa64c88933ab22dd64d6b69215030958b1a4e') prepare() { cd $pkgname-$pkgver patch -p1 < ../format-security.patch patch -p0 < ../aiksaurus-gcc44.patch + patch -p1 -i $srcdir/aiksaurus-fix-build.patch } build() { cd ${pkgname}-${pkgver} + CFLAGS=${CFLAGS/-Wformat -Werror=format-security/} + CXXFLAGS=${CXXFLAGS/-Wformat -Werror=format-security/} export CXXFLAGS+=' -std=c++14' ./configure --prefix=/usr diff --git a/aiksaurus/aiksaurus-fix-build.patch b/aiksaurus/aiksaurus-fix-build.patch new file mode 100644 index 0000000000..05a717398a --- /dev/null +++ b/aiksaurus/aiksaurus-fix-build.patch @@ -0,0 +1,400 @@ +diff -uNr aiksaurus-1.2.1/base/Aiksaurus.cpp aiksaurus-1.2.1.ok/base/Aiksaurus.cpp +--- aiksaurus-1.2.1/base/Aiksaurus.cpp 2003-08-09 00:18:43.000000000 +0800 ++++ aiksaurus-1.2.1.ok/base/Aiksaurus.cpp 2022-06-23 12:28:39.298196569 +0800 +@@ -98,21 +98,21 @@ + public: + + ThesaurusImpl(const char* mfile, const char* wfile) +- throw(AiksaurusException); ++ noexcept(false); + + ~ThesaurusImpl() throw(); + + const char* word() const throw(); + +- bool find(const char* word) throw(AiksaurusException); +- const char* next(int& id) throw(AiksaurusException); +- const char* similar() throw(AiksaurusException); ++ bool find(const char* word) noexcept(false); ++ const char* next(int& id) noexcept(false); ++ const char* similar() noexcept(false); + }; + + + + ThesaurusImpl::ThesaurusImpl(const char* mfile, const char* wfile) +-throw(AiksaurusException) ++noexcept(false) + : d_meanings(mfile), + d_words(wfile), + d_links(NULL), +@@ -152,7 +152,7 @@ + + + bool +-ThesaurusImpl::find(const char* word) throw(AiksaurusException) ++ThesaurusImpl::find(const char* word) noexcept(false) + { + try + { +@@ -197,7 +197,7 @@ + + + +-const char* ThesaurusImpl::next(int& id) throw(AiksaurusException) ++const char* ThesaurusImpl::next(int& id) noexcept(false) + { + if (d_currentStream >= d_meaningStreams.size()) + return ""; +@@ -220,7 +220,7 @@ + } + + +-const char* ThesaurusImpl::similar() throw(AiksaurusException) ++const char* ThesaurusImpl::similar() noexcept(false) + { + if (d_similarID < d_similarStop) + { +diff -uNr aiksaurus-1.2.1/base/MeaningsFile.cpp aiksaurus-1.2.1.ok/base/MeaningsFile.cpp +--- aiksaurus-1.2.1/base/MeaningsFile.cpp 2003-06-15 18:17:56.000000000 +0800 ++++ aiksaurus-1.2.1.ok/base/MeaningsFile.cpp 2022-06-23 12:29:38.830290837 +0800 +@@ -74,7 +74,7 @@ + // MeaningStream::read + // Put the next integer in the stream into x. + // Return true normally, false if EOF. +- bool read(int& x) throw(AiksaurusException); ++ bool read(int& x) noexcept(false); + }; + + +@@ -104,7 +104,7 @@ + // a link. (EOF or problem reading file). + // + inline bool +-MeaningStream::read(int& x) throw(AiksaurusException) ++MeaningStream::read(int& x) noexcept(false) + { + bool ret = true; + +@@ -166,7 +166,7 @@ + // All we need to do is get a handle to the file. + // We'll also check to make sure our meanings file opens ok. + // +-MeaningsFile::MeaningsFile(const char* fname) throw(AiksaurusException) ++MeaningsFile::MeaningsFile(const char* fname) noexcept(false) + { + d_file_ptr = fopen(fname, "rb"); + if (!d_file_ptr) +@@ -195,7 +195,7 @@ + // line, plus one slot for end-of-links (-1). + // + int +-MeaningsFile::_readline(MeaningStream& s, int* buffer) throw(AiksaurusException) ++MeaningsFile::_readline(MeaningStream& s, int* buffer) noexcept(false) + { + int i = 0; + +@@ -225,7 +225,7 @@ + // the function is called. + // + int* +-MeaningsFile::getWords(int id) throw(AiksaurusException) ++MeaningsFile::getWords(int id) noexcept(false) + { + // First we need to create our buffer to return. + // We know that there are at most s_dataMaxLineLength +diff -uNr aiksaurus-1.2.1/base/MeaningsFile.h aiksaurus-1.2.1.ok/base/MeaningsFile.h +--- aiksaurus-1.2.1/base/MeaningsFile.h 2003-06-15 18:17:56.000000000 +0800 ++++ aiksaurus-1.2.1.ok/base/MeaningsFile.h 2022-06-23 12:22:49.261982237 +0800 +@@ -46,16 +46,16 @@ + + // Utility function + int _readline(MeaningStream& s, int* buffer) +- throw(AiksaurusException); ++ noexcept(false); + + public: + + // Creation and Destruction +- MeaningsFile(const char* fname) throw(AiksaurusException); ++ MeaningsFile(const char* fname) noexcept(false); + ~MeaningsFile() throw(); + + // Word Lookup +- int* getWords(int id) throw(AiksaurusException); ++ int* getWords(int id) noexcept(false); + }; + } + +diff -uNr aiksaurus-1.2.1/base/WordsFile.cpp aiksaurus-1.2.1.ok/base/WordsFile.cpp +--- aiksaurus-1.2.1/base/WordsFile.cpp 2003-06-15 18:17:58.000000000 +0800 ++++ aiksaurus-1.2.1.ok/base/WordsFile.cpp 2022-06-23 12:30:29.725543860 +0800 +@@ -112,7 +112,7 @@ + // Attempt to safely initialize the Words file. Might have problems + // with running out of memory or file not found. + // +-WordsFile::WordsFile(const char* fname) throw(AiksaurusException) ++WordsFile::WordsFile(const char* fname) noexcept(false) + { + + try +@@ -171,7 +171,7 @@ + // Returns the index of the word on success, or -1 on + // failure. + // +-bool WordsFile::findWord(const char* str, int& index) throw(AiksaurusException) ++bool WordsFile::findWord(const char* str, int& index) noexcept(false) + { + // Create copy of str, so that we can turn spaces into colons. + // We only need to copy the first s_wordlen + 1 bytes to ensure +@@ -280,7 +280,7 @@ + // loadWord() + // Read a particular word from the Words file. + // +-void WordsFile::loadWord(int id) throw(AiksaurusException) ++void WordsFile::loadWord(int id) noexcept(false) + { + assert(id >= 0); + assert(id < getSize()); +diff -uNr aiksaurus-1.2.1/base/WordsFile.h aiksaurus-1.2.1.ok/base/WordsFile.h +--- aiksaurus-1.2.1/base/WordsFile.h 2003-06-15 18:17:59.000000000 +0800 ++++ aiksaurus-1.2.1.ok/base/WordsFile.h 2022-06-23 12:23:41.711944968 +0800 +@@ -55,12 +55,12 @@ + public: + + // Creation and Destruction +- WordsFile(const char* fname) throw(AiksaurusException); ++ WordsFile(const char* fname) noexcept(false); + ~WordsFile() throw(); + + // Word Lookup +- void loadWord(int id) throw(AiksaurusException); +- bool findWord(const char* str, int& index) throw(AiksaurusException); ++ void loadWord(int id) noexcept(false); ++ bool findWord(const char* str, int& index) noexcept(false); + + // Inspection + int getSize() const throw(); +diff -uNr aiksaurus-1.2.1/base/WordStream.h aiksaurus-1.2.1.ok/base/WordStream.h +--- aiksaurus-1.2.1/base/WordStream.h 2003-06-15 18:17:58.000000000 +0800 ++++ aiksaurus-1.2.1.ok/base/WordStream.h 2022-06-23 12:24:57.220828383 +0800 +@@ -38,7 +38,7 @@ + + public: + +- WordStream(int* words) throw(std::bad_alloc) ++ WordStream(int* words) noexcept(false) + { + for(int i = 0;words[i] != -1;++i) + d_words.push(words[i]); +diff -uNr aiksaurus-1.2.1/gtk/src/AiksaurusGTK.cpp aiksaurus-1.2.1.ok/gtk/src/AiksaurusGTK.cpp +--- aiksaurus-1.2.1/gtk/src/AiksaurusGTK.cpp 2003-05-28 07:46:37.000000000 +0800 ++++ aiksaurus-1.2.1.ok/gtk/src/AiksaurusGTK.cpp 2022-06-23 12:41:19.848044967 +0800 +@@ -54,7 +54,7 @@ + const char* runThesaurus(const char* word) throw(); + void setTitle(const char* title) throw(); + void setReplacebar(bool replacebar) throw(); +- void setInitialMessage(const char* message) throw(std::bad_alloc); ++ void setInitialMessage(const char* message) noexcept(false); + + void eventCancel() throw(); + void eventReplace(const char* replacement) throw(); +@@ -84,7 +84,7 @@ + } + + +- void DialogImpl::setInitialMessage(const char* message) throw(std::bad_alloc) ++ void DialogImpl::setInitialMessage(const char* message) noexcept(false) + { + d_initialMessage = message; + } +diff -uNr aiksaurus-1.2.1/gtk/src/Display.cpp aiksaurus-1.2.1.ok/gtk/src/Display.cpp +--- aiksaurus-1.2.1/gtk/src/Display.cpp 2022-06-23 12:46:37.102548374 +0800 ++++ aiksaurus-1.2.1.ok/gtk/src/Display.cpp 2022-06-23 12:43:10.935559296 +0800 +@@ -73,7 +73,7 @@ + + + void Display::_createMeaning(const string& title, vector& words) +- throw(std::bad_alloc) ++ noexcept(false) + { + Meaning *mean = new Meaning(title, words, *this); + d_meanings.push_back(mean); +@@ -98,7 +98,7 @@ + d_meanings.clear(); + } + +- void Display::_displayResults(const char* word) throw(Exception, std::bad_alloc) ++ void Display::_displayResults(const char* word) noexcept(false) + { + _checkThesaurus(); + +@@ -137,7 +137,7 @@ + + + +- void Display::_checkThesaurus() throw(Exception) ++ void Display::_checkThesaurus() noexcept(false) + { + if (d_thesaurus.error()[0]) + { +@@ -153,7 +153,7 @@ + } + + void Display::_displayAlternatives() +- throw(Exception, std::bad_alloc) ++ noexcept(false) + { + _checkThesaurus(); + vector words; +@@ -176,7 +176,7 @@ + gtk_widget_show_all(d_layout); + } + +- void Display::search(const char* word) throw(std::bad_alloc) ++ void Display::search(const char* word) noexcept(false) + { + try + { +@@ -197,7 +197,7 @@ + } + + +- void Display::_handleClick(bool isDoubleClick, const char* text) throw(std::bad_alloc) ++ void Display::_handleClick(bool isDoubleClick, const char* text) noexcept(false) + { + string str(text); // might throw + +diff -uNr aiksaurus-1.2.1/gtk/src/Display.h aiksaurus-1.2.1.ok/gtk/src/Display.h +--- aiksaurus-1.2.1/gtk/src/Display.h 2002-07-11 18:09:37.000000000 +0800 ++++ aiksaurus-1.2.1.ok/gtk/src/Display.h 2022-06-23 12:40:58.468835523 +0800 +@@ -29,17 +29,17 @@ + std::vector d_meanings; + + void _handleSelection(GtkWidget* list) throw(); +- void _handleClick(bool isDoubleClick, const char* text) throw(std::bad_alloc); ++ void _handleClick(bool isDoubleClick, const char* text) noexcept(false); + + void _resetDisplay() throw(); + + void _createMeaning(const std::string& title, std::vector& words) +- throw(std::bad_alloc); ++ noexcept(false); + +- void _displayResults(const char* word) throw(Exception, std::bad_alloc); +- void _displayAlternatives() throw(Exception, std::bad_alloc); ++ void _displayResults(const char* word) noexcept(false); ++ void _displayAlternatives() noexcept(false); + +- void _checkThesaurus() throw(Exception); ++ void _checkThesaurus() noexcept(false); + + static void _initResources() throw(); + +@@ -50,7 +50,7 @@ + const Aiksaurus& getThesaurus() const throw(); + GtkWidget* getDisplay() throw(); + +- void search(const char* word) throw(std::bad_alloc); ++ void search(const char* word) noexcept(false); + void showMessage(const char* message) throw(); + }; + +diff -uNr aiksaurus-1.2.1/gtk/src/Meaning.cpp aiksaurus-1.2.1.ok/gtk/src/Meaning.cpp +--- aiksaurus-1.2.1/gtk/src/Meaning.cpp 2003-05-28 07:56:55.000000000 +0800 ++++ aiksaurus-1.2.1.ok/gtk/src/Meaning.cpp 2022-06-23 12:43:59.713595459 +0800 +@@ -26,7 +26,7 @@ + } + + Meaning::Meaning(const string& title, vector& words, Display& display) +- throw(bad_alloc) ++ noexcept(false) + : d_title(title), d_words(words), d_display(display), d_lists(4) + { + d_masterLayout = gtk_event_box_new(); +@@ -142,7 +142,7 @@ + + gint Meaning::_wordclick + (GtkWidget* list, gint row, gint col, GdkEventButton *e, gpointer data) +- throw(std::bad_alloc) ++ noexcept(false) + { + Meaning *m = static_cast(data); + m->d_display._handleSelection(GTK_WIDGET(list)); +diff -uNr aiksaurus-1.2.1/gtk/src/Meaning.h aiksaurus-1.2.1.ok/gtk/src/Meaning.h +--- aiksaurus-1.2.1/gtk/src/Meaning.h 2003-05-28 07:56:55.000000000 +0800 ++++ aiksaurus-1.2.1.ok/gtk/src/Meaning.h 2022-06-23 12:42:22.403620376 +0800 +@@ -24,12 +24,12 @@ + GtkWidget* d_label; + + static gint _wordclick(GtkWidget* l, gint row, gint col, +- GdkEventButton *e, gpointer data) throw(std::bad_alloc); ++ GdkEventButton *e, gpointer data) noexcept(false); + + public: + + Meaning(const string& title, vector& words, Display& display) +- throw(std::bad_alloc); ++ noexcept(false); + + ~Meaning() throw(); + +diff -uNr aiksaurus-1.2.1/gtk/src/Toolbar.cpp aiksaurus-1.2.1.ok/gtk/src/Toolbar.cpp +--- aiksaurus-1.2.1/gtk/src/Toolbar.cpp 2004-06-12 12:12:57.000000000 +0800 ++++ aiksaurus-1.2.1.ok/gtk/src/Toolbar.cpp 2022-06-23 12:45:07.159865371 +0800 +@@ -26,7 +26,7 @@ + namespace AiksaurusGTK_impl + { + +- Toolbar::Toolbar(DialogMediator& mediator, GtkWidget* window) throw(std::bad_alloc) ++ Toolbar::Toolbar(DialogMediator& mediator, GtkWidget* window) noexcept(false) + : d_mediator(mediator), + d_searchbar_words(12), + d_ishistorymove(false), +@@ -96,7 +96,7 @@ + + } + +- void Toolbar::_updateNavigation() throw(std::bad_alloc) ++ void Toolbar::_updateNavigation() noexcept(false) + { + if (d_history.size_back()) + d_backbutton_ptr->enable(); +@@ -115,7 +115,7 @@ + d_forwardbutton_ptr->updateMenuOptions(); + } + +- void Toolbar::search(const char* str) throw(std::bad_alloc) ++ void Toolbar::search(const char* str) noexcept(false) + { + if (!d_ishistorymove) + d_history.search(str); +diff -uNr aiksaurus-1.2.1/gtk/src/Toolbar.h aiksaurus-1.2.1.ok/gtk/src/Toolbar.h +--- aiksaurus-1.2.1/gtk/src/Toolbar.h 2003-05-28 07:46:42.000000000 +0800 ++++ aiksaurus-1.2.1.ok/gtk/src/Toolbar.h 2022-06-23 12:38:42.279404124 +0800 +@@ -56,7 +56,7 @@ + GtkWidget* d_searchbar_ptr; + GtkWidget* d_searchbar_label_ptr; + +- void _updateNavigation() throw(std::bad_alloc); ++ void _updateNavigation() noexcept(false); + + void _setTooltip(GtkWidget* w, const char* str) throw(); + +@@ -75,14 +75,14 @@ + + public: + +- Toolbar(DialogMediator& mediator, GtkWidget* window) throw(std::bad_alloc); ++ Toolbar(DialogMediator& mediator, GtkWidget* window) noexcept(false); + ~Toolbar() throw(); + + GtkWidget* getToolbar() throw(); + const char* getText() const throw(); + void focus() throw(); + +- void search(const char* str) throw(std::bad_alloc); ++ void search(const char* str) noexcept(false); + }; + + } diff --git a/alacritty/PKGBUILD b/alacritty/PKGBUILD index b8adf73210..27dca355eb 100644 --- a/alacritty/PKGBUILD +++ b/alacritty/PKGBUILD @@ -23,7 +23,7 @@ sha256sums=('SKIP') prepare() { cd "$pkgname" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build(){ diff --git a/alsa-tools/PKGBUILD b/alsa-tools/PKGBUILD index d382cd111b..be16ce2bc2 100644 --- a/alsa-tools/PKGBUILD +++ b/alsa-tools/PKGBUILD @@ -6,7 +6,7 @@ pkgname=alsa-tools pkgver=1.2.5 -pkgrel=2 +pkgrel=4 pkgdesc="Advanced tools for certain sound cards" arch=(loong64 x86_64) url="https://alsa-project.org/" diff --git a/android-tools/PKGBUILD b/android-tools/PKGBUILD index 0418f8116e..63f6c3ac51 100644 --- a/android-tools/PKGBUILD +++ b/android-tools/PKGBUILD @@ -12,8 +12,15 @@ url='http://tools.android.com/' license=(Apache MIT) depends=(libusb protobuf brotli zstd android-udev pcre2) makedepends=(gtest cmake go ninja git) -source=(https://github.com/nmeum/android-tools/releases/download/$_tag/android-tools-$_tag.tar.xz) -sha256sums=('7a22ff9cea81ff4f38f560687858e8f8fb733624412597e3cc1ab0262f8da3a1') +source=(https://github.com/nmeum/android-tools/releases/download/$_tag/android-tools-$_tag.tar.xz + android-tools-la64.patch) +sha256sums=('7a22ff9cea81ff4f38f560687858e8f8fb733624412597e3cc1ab0262f8da3a1' + '1a9c66a0c00eba62ad4a7babd26047f8f48dac4095f2849f75f7648c5f366d01') + +prepare() { + cd android-tools-$_tag + patch -p1 -i ../android-tools-la64.patch +} build() { cd android-tools-$_tag diff --git a/android-tools/android-tools-la64.patch b/android-tools/android-tools-la64.patch new file mode 100644 index 0000000000..e192cd01d8 --- /dev/null +++ b/android-tools/android-tools-la64.patch @@ -0,0 +1,13 @@ +Index: android-tools-34.0.1/vendor/boringssl/include/openssl/base.h +=================================================================== +--- android-tools-34.0.1.orig/vendor/boringssl/include/openssl/base.h ++++ android-tools-34.0.1/vendor/boringssl/include/openssl/base.h +@@ -118,6 +118,8 @@ extern "C" { + #define OPENSSL_32_BIT + #elif defined(__myriad2__) + #define OPENSSL_32_BIT ++#elif defined(__loongarch_lp64) ++#define OPENSSL_64_BIT + #else + // Note BoringSSL only supports standard 32-bit and 64-bit two's-complement, + // little-endian architectures. Functions will not produce the correct answer diff --git a/anewer/PKGBUILD b/anewer/PKGBUILD index d95b896baa..674fad0604 100644 --- a/anewer/PKGBUILD +++ b/anewer/PKGBUILD @@ -13,7 +13,7 @@ b2sums=('b6a65f94b5d57ecd7947c75dda57c8c1166c94b2677a6fe25bf1fede8af49e4546429f3 prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/angle-grinder/PKGBUILD b/angle-grinder/PKGBUILD index 818d5ba8ae..6bae5b9643 100644 --- a/angle-grinder/PKGBUILD +++ b/angle-grinder/PKGBUILD @@ -23,7 +23,7 @@ pkgver() { prepare() { cd "$pkgname" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/apache-orc/PKGBUILD b/apache-orc/PKGBUILD index a50af9a7be..b80163550b 100644 --- a/apache-orc/PKGBUILD +++ b/apache-orc/PKGBUILD @@ -11,7 +11,7 @@ url="https://orc.apache.org" license=(Apache) depends=(lz4 protobuf snappy zlib zstd) makedepends=(cmake) -checkdepends=(gtest) +makedepends+=(gtest) source=(https://dlcdn.apache.org/${_pkg}/${_pkg}-${pkgver}/${_pkg}-${pkgver}.tar.gz{,.asc}) sha256sums=('0dca8bbccdb2ee87e59ba964933436beebd02ea78c4134424828a8127fbc4faa' 'SKIP') @@ -40,7 +40,9 @@ build(){ -DORC_PREFER_STATIC_ZLIB=OFF \ -DBUILD_LIBHDFSPP=OFF \ -DBUILD_JAVA=OFF \ - -DINSTALL_VENDORED_LIBS=OFF + -DSTOP_BUILD_ON_WARNING=OFF \ + -DINSTALL_VENDORED_LIBS=OFF \ + -DBUILD_CPP_TESTS=OFF make -C build } diff --git a/apptainer/PKGBUILD b/apptainer/PKGBUILD index 8a7892543d..28bc401586 100644 --- a/apptainer/PKGBUILD +++ b/apptainer/PKGBUILD @@ -72,6 +72,9 @@ build() { # provide version to build script echo "$pkgver" > VERSION + go mod edit -replace=go.etcd.io/bbolt=go.etcd.io/bbolt@v1.3.8 + go mod edit -replace=github.com/cilium/ebpf=github.com/cilium/ebpf@v0.12.3 + go mod tidy # set Go flags export CGO_CPPFLAGS="${CPPFLAGS}" diff --git a/apr-util/PKGBUILD b/apr-util/PKGBUILD index 71a94784df..4f01001b24 100644 --- a/apr-util/PKGBUILD +++ b/apr-util/PKGBUILD @@ -3,7 +3,7 @@ pkgname=apr-util pkgver=1.6.3 -pkgrel=1 +pkgrel=2 pkgdesc="The Apache Portable Runtime" arch=('loong64' 'x86_64') url="https://apr.apache.org/" diff --git a/apr/PKGBUILD b/apr/PKGBUILD index d85b8e5220..c33ea681cc 100644 --- a/apr/PKGBUILD +++ b/apr/PKGBUILD @@ -4,7 +4,7 @@ pkgname=apr pkgver=1.7.4 -pkgrel=1 +pkgrel=3 pkgdesc='The Apache Portable Runtime' arch=('loong64' 'x86_64') url='https://apr.apache.org/' @@ -16,14 +16,16 @@ source=(https://archive.apache.org/dist/apr/apr-$pkgver.tar.bz2{,.asc} fix-apr.pc.patch ship_find_apr.m4.patch omit_extra_libs.patch - dont_override_external_buildflags) + dont_override_external_buildflags + apr-1.7-fix-build.patch) sha256sums=('fc648de983f3a2a6c9e78dea1f180639bd2fad6c06d556d4367a701fe5c35577' 'SKIP' '572efb102d02bb3e85ff08eca6b2ea8ff7936ce5228da7a45c1e639faca36a5c' '12595d331b48be9e44bd843635eb4f0f500bd213e197a551a9d383a28a24641f' '315932ef6536fc0644c1efe770ceb3bb675c3c7103a7cbb2f02efd8be03eb752' '3d491d3af8fb5a75db4e085a17e5d8dcbe058bd256ef893ee779dc97fc9f8ad6' - '5ac0bdc532479f6082d29115ac9d3ca24524fd8b97a556568755b88e5a68e3df') + '5ac0bdc532479f6082d29115ac9d3ca24524fd8b97a556568755b88e5a68e3df' + 'e543e08a3517b5a6143c1b6efeb9fffec091953e689d2d4d98526407be8f7d9c') validpgpkeys=('5B5181C2C0AB13E59DA3F7A3EC582EB639FF092C' # Jeff Trawick 'B1B96F45DFBDCCF974019235193F180AB55D9977' # Nick Kew '65B2D44FE74BD5E3DE3AC3F082781DE46D5954FA' # "Eric Covener " @@ -36,10 +38,14 @@ prepare() { patch -Np1 -i ../fix-apr.pc.patch patch -Np1 -i ../omit_extra_libs.patch patch -Np1 -i ../dont_override_external_buildflags - #./buildconf + patch -Np1 -i ../apr-1.7-fix-build.patch + ./buildconf } build() { +# CFLAGS=${CFLAGS/-Wformat -Werror=format-security/} +# CXXFLAGS=${CXXFLAGS/-Wformat -Werror=format-security/} +# unset CFLAGS CXXFLAGS cd apr-$pkgver ./configure --prefix=/usr --includedir=/usr/include/apr-1 \ --with-installbuilddir=/usr/share/apr-1/build \ diff --git a/apr/apr-1.7-fix-build.patch b/apr/apr-1.7-fix-build.patch new file mode 100644 index 0000000000..aaad10e7ae --- /dev/null +++ b/apr/apr-1.7-fix-build.patch @@ -0,0 +1,13 @@ +Index: apr-1.7.0/build/apr_common.m4 +=================================================================== +--- apr-1.7.0.orig/build/apr_common.m4 ++++ apr-1.7.0/build/apr_common.m4 +@@ -501,7 +501,7 @@ AC_DEFUN([APR_TRY_COMPILE_NO_WARNING], + [apr_save_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS $CFLAGS_WARN" + if test "$ac_cv_prog_gcc" = "yes"; then +- CFLAGS="$CFLAGS -Werror" ++ CFLAGS="$CFLAGS" + fi + AC_COMPILE_IFELSE( + [AC_LANG_SOURCE( diff --git a/arch-rebuild-order/PKGBUILD b/arch-rebuild-order/PKGBUILD index 9f1a1b95e0..35ce9a2d50 100644 --- a/arch-rebuild-order/PKGBUILD +++ b/arch-rebuild-order/PKGBUILD @@ -8,7 +8,7 @@ url='https://gitlab.archlinux.org/archlinux/arch-rebuild-order' arch=('loong64' 'x86_64') license=('MIT') depends=('glibc' 'libalpm.so') -makedepends=('cargo' 'mandown' 'git') +makedepends=('rust' 'mandown' 'git') groups=('archlinux-tools') source=(git+https://gitlab.archlinux.org/archlinux/arch-rebuild-order.git#tag=v$pkgver?signed) sha512sums=('SKIP') @@ -16,12 +16,12 @@ validpgpkeys=("E499C79F53C96A54E572FEE1C06086337C50773E") prepare() { cd ${pkgname} - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { cd ${pkgname} - cargo build --frozen --release --all-features + cargo build --release --all-features } check() { diff --git a/arch-repro-status/PKGBUILD b/arch-repro-status/PKGBUILD index 6349ef31e6..0aa6cc331d 100644 --- a/arch-repro-status/PKGBUILD +++ b/arch-repro-status/PKGBUILD @@ -16,7 +16,7 @@ sha512sums=('16fa85c6bd1990363f7129d7c7b2229fa682e0032cef176f22f5cdc1dd03fd60894 prepare() { cd "$pkgname-v$pkgver" mkdir completions/ - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/archinstall/PKGBUILD b/archinstall/PKGBUILD index b7adbeef6b..1f371acf4c 100644 --- a/archinstall/PKGBUILD +++ b/archinstall/PKGBUILD @@ -42,15 +42,19 @@ provides=(python-archinstall) source=( $pkgname-v$pkgver.tar.gz::$url/archive/refs/tags/v$pkgver.tar.gz $pkgname-v$pkgver.tar.gz.sig::$url/releases/download/v$pkgver/$pkgname-$pkgver.tar.gz.sig + archinstall-la64-2.6.3.patch ) sha512sums=('b88a301ff747f10f5b2e23b8c9217f28b54a5846123580eba06d1960a64e56357ec85414f3c0ce8b641cc2038fd5244608bdbac3ee00bef0a55928a51add0c05' - 'SKIP') + 'SKIP' + 'bba2c25733ab266f2b848f2e569dc6b03a0dd9e844cdb976a16fe0190145c6d7858b6530fd0ac9785579968f43fba0e9037802b62d160b98832076e09f5376df') b2sums=('105b297c649e08edb67e36f1675a65c4b9a930cee154e7bdf9cde8d0a5af25ca8045f31fe2cdcccc076dc278dd9f952fbc03fe57f7436118e623c96cddcc8a4a' - 'SKIP') + 'SKIP' + 'ed1717d1f5649383bc15db231636181a135857da3ec4f51d2e559b7af402c4013ba6e0daa0d5af154ca00f3fea15a88c11e97b8c9060a867345cb361b2b8151b') validpgpkeys=('8AA2213C8464C82D879C8127D4B58E897A929F2E') # torxed@archlinux.org prepare() { cd $pkgname-$pkgver + patch -p1 -i $srcdir/archinstall-la64-2.6.3.patch } build() { diff --git a/archinstall/archinstall-la64-2.6.3.patch b/archinstall/archinstall-la64-2.6.3.patch new file mode 100644 index 0000000000..84b5638eb3 --- /dev/null +++ b/archinstall/archinstall-la64-2.6.3.patch @@ -0,0 +1,139 @@ +diff --git a/archinstall/lib/global_menu.py b/archinstall/lib/global_menu.py +index b38dac0b..ca69a322 100644 +--- a/archinstall/lib/global_menu.py ++++ b/archinstall/lib/global_menu.py +@@ -46,7 +46,9 @@ class GlobalMenu(AbstractMenu): + _('Archinstall language'), + lambda x: self._select_archinstall_language(x), + display_func=lambda x: x.display_name, +- default=self.translation_handler.get_language_by_abbr('en')) ++ default=self.translation_handler.get_language_by_abbr('zh-CN')) ++ self.translation_handler.activate(self.translation_handler.get_language_by_abbr('zh-CN')) ++ + self._menu_options['locale_config'] = \ + Selector( + _('Locales'), +diff --git a/archinstall/lib/hardware.py b/archinstall/lib/hardware.py +index 56d3bc7b..737cbe06 100644 +--- a/archinstall/lib/hardware.py ++++ b/archinstall/lib/hardware.py +@@ -47,6 +47,7 @@ class GfxPackage(Enum): + VulkanRadeon = 'vulkan-radeon' + Xf86VideoAmdgpu = "xf86-video-amdgpu" + Xf86VideoAti = "xf86-video-ati" ++ Xf86VideoLoongson = 'xf86-video-loongson' + Xf86VideoNouveau = 'xf86-video-nouveau' + Xf86VideoVmware = 'xf86-video-vmware' + +@@ -76,6 +77,7 @@ class GfxDriver(Enum): + GfxPackage.Mesa, + GfxPackage.Xf86VideoAmdgpu, + GfxPackage.Xf86VideoAti, ++ GfxPackage.Xf86VideoLoongson, + GfxPackage.Xf86VideoNouveau, + GfxPackage.Xf86VideoVmware, + GfxPackage.LibvaMesaDriver, +@@ -89,6 +91,7 @@ class GfxDriver(Enum): + GfxPackage.Mesa, + GfxPackage.Xf86VideoAmdgpu, + GfxPackage.Xf86VideoAti, ++ GfxPackage.Xf86VideoLoongson, + GfxPackage.LibvaMesaDriver, + GfxPackage.VulkanRadeon + ] +@@ -272,6 +275,7 @@ class SysInfo: + 'snd_gina20', + 'snd_gina24', + 'snd_hda_codec_ca0132', ++ 'snd_hda_loongson', + 'snd_hdsp', + 'snd_indigo', + 'snd_indigodj', +diff --git a/archinstall/lib/installer.py b/archinstall/lib/installer.py +index 585389ed..4e066896 100644 +--- a/archinstall/lib/installer.py ++++ b/archinstall/lib/installer.py +@@ -916,7 +916,7 @@ class Installer: + boot_dir_arg.append(f'--boot-directory={boot_dir}') + + add_options = [ +- '--target=x86_64-efi', ++ '--target=loongarch64-efi', + f'--efi-directory={efi_partition.mountpoint}', + *boot_dir_arg, + '--bootloader-id=GRUB', +diff --git a/archinstall/lib/locale/locale_menu.py b/archinstall/lib/locale/locale_menu.py +index 2e254315..fbcbddd8 100644 +--- a/archinstall/lib/locale/locale_menu.py ++++ b/archinstall/lib/locale/locale_menu.py +@@ -16,7 +16,7 @@ class LocaleConfiguration: + + @staticmethod + def default() -> 'LocaleConfiguration': +- return LocaleConfiguration('us', 'en_US', 'UTF-8') ++ return LocaleConfiguration('us', 'zh_CN', 'UTF-8') + + def json(self) -> Dict[str, str]: + return { +@@ -68,7 +68,7 @@ class LocaleMenu(AbstractSubMenu): + Selector( + _('Locale language'), + lambda preset: select_locale_lang(preset), +- default=self._preset.sys_lang, ++ default='zh_CN', + enabled=True) + self._menu_options['sys-encoding'] = \ + Selector( +diff --git a/archinstall/lib/mirrors.py b/archinstall/lib/mirrors.py +index 74cdd0aa..70b3794e 100644 +--- a/archinstall/lib/mirrors.py ++++ b/archinstall/lib/mirrors.py +@@ -323,7 +323,7 @@ def list_mirrors() -> Dict[str, List[str]]: + with pathlib.Path('/etc/pacman.d/mirrorlist').open('r') as fp: + mirrorlist = fp.read() + else: +- url = "https://archlinux.org/mirrorlist/?protocol=https&protocol=http&ip_version=4&ip_version=6&use_mirror_status=on" ++ url = "https://archapi.zhcn.cc/api/v1/mirrorlist/?protocol=https&protocol=http&ip_version=4&ip_version=6&use_mirror_status=on" + try: + mirrorlist = fetch_data_from_url(url) + except ValueError as err: +diff --git a/archinstall/lib/translationhandler.py b/archinstall/lib/translationhandler.py +index 33230562..5caa191f 100644 +--- a/archinstall/lib/translationhandler.py ++++ b/archinstall/lib/translationhandler.py +@@ -24,7 +24,10 @@ class Language: + + @property + def display_name(self) -> str: +- name = self.name_en ++ if self.translated_lang: ++ name = self.translated_lang ++ else: ++ name = self.name_en + return f'{name} ({self.translation_percent}%)' + + def is_match(self, lang_or_translated_lang: str) -> bool: +diff --git a/archinstall/scripts/guided.py b/archinstall/scripts/guided.py +index d7cf16cd..1d746c21 100644 +--- a/archinstall/scripts/guided.py ++++ b/archinstall/scripts/guided.py +@@ -181,6 +181,19 @@ def perform_installation(mountpoint: Path): + if profile_config := archinstall.arguments.get('profile_config', None): + profile_handler.install_profile_config(installation, profile_config) + ++ # Add Chinese input method and fonts ++ if locale_config.sys_lang in ["zh_CN", "zh_TW"]: ++ installation.add_additional_packages(['wqy-bitmapfont', 'wqy-microhei', 'wqy-microhei-lite', 'wqy-zenhei']) ++ # Install Chinese Input Method ++ if profile_config.profile.is_desktop_type_profile(): ++ installation.add_additional_packages(['fcitx5', 'fcitx5-chinese-addons', 'fcitx5-configtool', 'fcitx5-gtk', 'fcitx5-qt']) ++ with open(f"{archinstall.storage['installation_session'].target}/etc/X11/xinit/xinitrc.d/50-input.sh", 'w') as finput: ++ finput.write(f'export XIM=fcitx\n') ++ finput.write(f'export GTK_IM_MODULE=fcitx\n') ++ finput.write(f'export QT_IM_MODULE=fcitx\n') ++ finput.write(f'export XMODIFIERS="@im=fcitx"\n') ++ installation.arch_chroot('chmod +x /etc/X11/xinit/xinitrc.d/50-input.sh') ++ + if timezone := archinstall.arguments.get('timezone', None): + installation.set_timezone(timezone) + diff --git a/arrow/PKGBUILD b/arrow/PKGBUILD index ad226f1b50..bf021063d8 100644 --- a/arrow/PKGBUILD +++ b/arrow/PKGBUILD @@ -33,6 +33,11 @@ prepare() { build(){ CC=clang \ CXX=clang++ \ +# clang didn't support -mlsx + CFLAGS=${CFLAGS/-mlsx /} + CXXFLAGS=${CXXFLAGS/-mlsx /} + CFLAGS=${CFLAGS/-fstack-clash-protection/} + CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/} cmake \ -B build -S apache-${pkgname}-${pkgver}/cpp \ -DCMAKE_INSTALL_PREFIX="/usr" \ diff --git a/arti/PKGBUILD b/arti/PKGBUILD index cc59f298ff..aa67e66e36 100644 --- a/arti/PKGBUILD +++ b/arti/PKGBUILD @@ -23,7 +23,7 @@ b2sums=('f1bec1c26a147372f19a66022dda7a2f8989d40338399f926f84c8ad17d6cd9d92ad565 prepare() { mv "$pkgname-$pkgname-v$pkgver" "$pkgname-$pkgver" cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/aspell-ru/PKGBUILD b/aspell-ru/PKGBUILD index fa07c6f2dc..c33beac8a2 100644 --- a/aspell-ru/PKGBUILD +++ b/aspell-ru/PKGBUILD @@ -7,7 +7,7 @@ _pkgver=0.99f7-1 pkgver=${_pkgver//-/.} pkgrel=1 pkgdesc="Russian dictionary for aspell" -arch=(loong64' 'x86_64) # We cannot use 'any' see FS#22443 +arch=('loong64' 'x86_64') # We cannot use 'any' see FS#22443 url='http://aspell.net' license=(custom) depends=(aspell) diff --git a/at51/PKGBUILD b/at51/PKGBUILD index 2a8c9b070d..e685079485 100644 --- a/at51/PKGBUILD +++ b/at51/PKGBUILD @@ -21,7 +21,7 @@ b2sums=('3bb3793c2082fa4ce2973bd4c58ff684ebe6afdcf1507d112a0a9c89e8410bda8493d13 prepare() { cd $pkgname-$pkgver - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { cd $pkgname-$pkgver diff --git a/auth-tarball-from-git/PKGBUILD b/auth-tarball-from-git/PKGBUILD index d790d5e0b5..a1efaa435f 100644 --- a/auth-tarball-from-git/PKGBUILD +++ b/auth-tarball-from-git/PKGBUILD @@ -19,7 +19,7 @@ validpgpkeys=("64B13F7117D6E07D661BBCE0FE763A64F5E54FD6") prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/autogen/PKGBUILD b/autogen/PKGBUILD index 02a2cc2082..96f869bfb9 100644 --- a/autogen/PKGBUILD +++ b/autogen/PKGBUILD @@ -32,6 +32,7 @@ prepare() { build() { cd "${srcdir}/${pkgname}-${pkgver}" + unset CFLAGS CXXFLAGS ./configure --prefix=/usr sed -i -e 's/ -shared / -Wl,-O1,--as-needed\0/g' libtool make diff --git a/autotiling-rs/PKGBUILD b/autotiling-rs/PKGBUILD index 77f07302be..49f174065a 100644 --- a/autotiling-rs/PKGBUILD +++ b/autotiling-rs/PKGBUILD @@ -15,7 +15,7 @@ b2sums=('3bffa4f9beef917c1ac731507e61ac716164829b1ce038e20708ca8d0511dd9a4382716 prepare() { cd ${pkgname}-${pkgver} - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/avisynthplus/PKGBUILD b/avisynthplus/PKGBUILD index 6093ec84b1..e2464a4da6 100644 --- a/avisynthplus/PKGBUILD +++ b/avisynthplus/PKGBUILD @@ -12,9 +12,15 @@ optdepends=('devil: for ImageSeq plugin') makedepends=('cmake' 'devil') provides=('libavisynth.so') source=("https://github.com/AviSynth/AviSynthPlus/archive/v${pkgver}/${pkgname}-${pkgver}.tar.gz" - 'avisynthplus.xml') + 'avisynthplus.xml' + 'avisynthplus-la64.patch') sha256sums=('b847705af6f16fa26664d06e0fea2bda14a7f6aac8249a9c37e4106ecb8fd44c' - 'c4b270a3df7fbe1c153400215169c4ae4cae3b7a8710c843393e3a6ed0fd8a3e') + 'c4b270a3df7fbe1c153400215169c4ae4cae3b7a8710c843393e3a6ed0fd8a3e' + '02a013f60b849eda8fe5edc9a8e451e790d2a7b3ebc2dc034730718be62475d7') + +prepare() { + patch -d "AviSynthPlus-${pkgver}" -p1 -i "$srcdir/avisynthplus-la64.patch" +} build() { cmake -B build -S "AviSynthPlus-${pkgver}" \ diff --git a/avisynthplus/avisynthplus-la64.patch b/avisynthplus/avisynthplus-la64.patch new file mode 100644 index 0000000000..234b2697b8 --- /dev/null +++ b/avisynthplus/avisynthplus-la64.patch @@ -0,0 +1,19 @@ +commit 9609a9565b6cb754ce7787fbd032f7d7f7a151dd +Author: Xiaotian Wu +Date: Sun Jun 5 08:44:22 2022 +0800 + + add support for LoongArch + +diff --git a/avs_core/include/avs/config.h b/avs_core/include/avs/config.h +index bdabf17f..1d0b4eef 100644 +--- a/avs_core/include/avs/config.h ++++ b/avs_core/include/avs/config.h +@@ -59,6 +59,8 @@ + # define PPC32 + #elif defined(__riscv) + # define RISCV ++#elif defined(__loongarch__) ++# define LOONGARCH + #elif defined(__sparc_v9__) + # define SPARC + #elif defined(__mips__) diff --git a/b3sum/PKGBUILD b/b3sum/PKGBUILD index 8507d3116a..8404995ca7 100644 --- a/b3sum/PKGBUILD +++ b/b3sum/PKGBUILD @@ -15,7 +15,7 @@ b2sums=('SKIP') prepare() { cd $_name/$pkgname - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/babl/PKGBUILD b/babl/PKGBUILD index 0f897b3859..904f15b96f 100644 --- a/babl/PKGBUILD +++ b/babl/PKGBUILD @@ -10,8 +10,10 @@ url='https://gegl.org/babl/' license=('LGPL3') depends=('glibc' 'lcms2') makedepends=('git' 'meson' 'gobject-introspection' 'vala') -source=("git+https://gitlab.gnome.org/GNOME/babl.git#tag=$_tag") -sha256sums=('SKIP') +source=("git+https://gitlab.gnome.org/GNOME/babl.git#tag=$_tag" + babl-fix-gir-pkgname.patch) +sha256sums=('SKIP' + 'bceba2643d5baef3d5add6f715d8bf982cbaf25701e589d489b1a8d819879e6a') pkgver() { cd "${pkgname}" @@ -21,6 +23,7 @@ pkgver() { prepare() { # https://gitlab.gnome.org/GNOME/babl/-/merge_requests/45 sed -i s/Description/description/ "${pkgname}"/meson.build + patch -d babl -p1 -i $srcdir/babl-fix-gir-pkgname.patch } build() { diff --git a/babl/babl-fix-gir-pkgname.patch b/babl/babl-fix-gir-pkgname.patch new file mode 100644 index 0000000000..ea0bb8d0ba --- /dev/null +++ b/babl/babl-fix-gir-pkgname.patch @@ -0,0 +1,13 @@ +Index: babl/babl/meson.build +=================================================================== +--- babl.orig/babl/meson.build ++++ babl/babl/meson.build +@@ -156,7 +156,7 @@ if build_gir + namespace: 'Babl', + nsversion: api_version, + header: 'babl.h', +- export_packages: 'babl-0.1', ++ export_packages: 'babl', + install: true, + ) + diff --git a/bacon/PKGBUILD b/bacon/PKGBUILD index 111624c894..0f9267a520 100644 --- a/bacon/PKGBUILD +++ b/bacon/PKGBUILD @@ -24,7 +24,7 @@ pkgver() { prepare() { cd "$pkgname" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/baidupcs-go/PKGBUILD b/baidupcs-go/PKGBUILD index 7a6803c9bf..07b6881513 100644 --- a/baidupcs-go/PKGBUILD +++ b/baidupcs-go/PKGBUILD @@ -21,6 +21,8 @@ build() { export CGO_LDFLAGS="${LDFLAGS}" export CGO_CFLAGS="${CFLAGS}" export CGO_CPPFLAGS="${CPPFLAGS}" + go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664 + go mod tidy go build -o baidupcs-go } diff --git a/bandwhich/PKGBUILD b/bandwhich/PKGBUILD index a2348391d9..76bc317249 100644 --- a/bandwhich/PKGBUILD +++ b/bandwhich/PKGBUILD @@ -19,7 +19,7 @@ b2sums=('faa9bc5620e9e2a7d5ddd8c715934b2eefc6f4f069348fc14d983ac9c7b22e43b1d167c prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/bcprov/PKGBUILD b/bcprov/PKGBUILD index eaa60f4e96..5705f83b95 100644 --- a/bcprov/PKGBUILD +++ b/bcprov/PKGBUILD @@ -8,7 +8,7 @@ arch=('any') url='https://www.bouncycastle.org/java.html' license=('MIT') depends=('java-runtime-headless') -makedepends=('ant' 'strip-nondeterminism') +makedepends=('git' 'ant' 'strip-nondeterminism') source=("$pkgname-$pkgver.tar.gz::https://github.com/bcgit/bc-java/archive/refs/tags/r${pkgver/./rv}.tar.gz") sha512sums=('7d2abab42a7e29159ae063244a4296708d1269e5a8250f0e2f62b095916d509e6e4213f4b32d45f375e1aabea572860d9b81df9ee5efcdff9b9e569864c9f8db') diff --git a/bees/PKGBUILD b/bees/PKGBUILD index 95b0763398..36588d71d8 100644 --- a/bees/PKGBUILD +++ b/bees/PKGBUILD @@ -11,8 +11,15 @@ url="https://github.com/Zygo/bees" license=('GPL3') depends=('util-linux-libs' 'bash') makedepends=('btrfs-progs' 'systemd') -source=("${pkgname}-${pkgver}.tar.gz"::"https://github.com/Zygo/bees/archive/v${pkgver}.tar.gz") -sha256sums=('d100efbc6084f494400892ef53fa476fd6f201dba3b2fddee11ef90dd9d6111d') +source=("${pkgname}-${pkgver}.tar.gz"::"https://github.com/Zygo/bees/archive/v${pkgver}.tar.gz" + "bees-fix-build.patch::https://github.com/Zygo/bees/commit/d6732c58e29b6f969e8b53c16541d1572a31c485.patch") +sha256sums=('d100efbc6084f494400892ef53fa476fd6f201dba3b2fddee11ef90dd9d6111d' + 'd08111d97ee1b8c1d3b7abcdc25872de965e472f318383e9121917667748d3c7') + +prepare() { + cd "${srcdir}/${pkgname}-${pkgver}" + patch -p1 -i $srcdir/bees-fix-build.patch +} build() { cd "${srcdir}/${pkgname}-${pkgver}" diff --git a/bigloo/PKGBUILD b/bigloo/PKGBUILD index 3830533ecd..fc32cbe94b 100644 --- a/bigloo/PKGBUILD +++ b/bigloo/PKGBUILD @@ -14,13 +14,17 @@ depends=('gmp' 'openssl' 'libunistring' 'libnsl' 'gc' 'libuv') makedepends=('emacs' 'zip' 'sqlite' 'alsa-lib' 'flac' 'avahi' 'libpulse') optdepends=('emacs' 'zip' 'sqlite' 'alsa-lib' 'flac' 'avahi') options=('!makeflags' '!lto') -source=("ftp://ftp-sop.inria.fr/indes/fp/Bigloo/${pkgname}-${_src_ver}.tar.gz") -sha256sums=('d8f04e889936187dc519719b749ad03fe574165a0b6d318e561f1b3bce0d5808') +source=("ftp://ftp-sop.inria.fr/indes/fp/Bigloo/${pkgname}-${_src_ver}.tar.gz" +bigloo-la64.patch) +sha256sums=('d8f04e889936187dc519719b749ad03fe574165a0b6d318e561f1b3bce0d5808' + '7ccb954d6116379c38f0405b3ed4160ebed68a1134225365170c865d2be8920f') elisp_dir=/usr/share/emacs/site-lisp/bigloo prepare() { sed -i 's/$(GCLIB)/c/' "${srcdir}/${pkgname}-${_src_ver}/configure" + cd "${srcdir}/${pkgname}-${_src_ver}" + patch -p1 -i $srcdir/bigloo-la64.patch } build() { diff --git a/bigloo/bigloo-la64.patch b/bigloo/bigloo-la64.patch new file mode 100644 index 0000000000..50b3f9b86a --- /dev/null +++ b/bigloo/bigloo-la64.patch @@ -0,0 +1,11 @@ +Index: bigloo-4.5a-1/libbacktrace/install-libbacktrace +=================================================================== +--- bigloo-4.5a-1.orig/libbacktrace/install-libbacktrace ++++ bigloo-4.5a-1/libbacktrace/install-libbacktrace +@@ -11,4 +11,5 @@ + #*=====================================================================*/ + + tar xfz $LIBBACKTRACESRC || (echo "tar xfz $LIBBACKTRACESRC failed"; exit 1) +- ++for c_s in $(find -type f -name config.sub -o -name configure.sub); do cp -f /usr/share/automake-1.16/config.sub "$c_s"; done ++for c_g in $(find -type f -name config.guess -o -name configure.guess); do cp -f /usr/share/automake-1.16/config.guess "$c_g"; done diff --git a/bingrep/PKGBUILD b/bingrep/PKGBUILD index 3bd3343e2c..7a523edf78 100644 --- a/bingrep/PKGBUILD +++ b/bingrep/PKGBUILD @@ -15,7 +15,7 @@ sha512sums=('9a50aecffdd613f3241d12802ad49dc5d98219c8a99455418dc741eebc0a7c2a261 prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/binocle/PKGBUILD b/binocle/PKGBUILD index fc9dd6bdc2..257fb62c90 100644 --- a/binocle/PKGBUILD +++ b/binocle/PKGBUILD @@ -15,7 +15,7 @@ sha512sums=('d4d2e225723e72d991eac9dc91c0056c902eeabbe046161447c4a8a4e3200515b5d prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/blender/PKGBUILD b/blender/PKGBUILD index 04e39c99ae..41c4cff075 100644 --- a/blender/PKGBUILD +++ b/blender/PKGBUILD @@ -18,13 +18,12 @@ url="https://www.blender.org" depends=('libpng' 'libtiff' 'openexr' 'python' 'desktop-file-utils' 'python-requests' 'potrace' 'shared-mime-info' 'hicolor-icon-theme' 'xdg-utils' 'glew' 'openjpeg2' 'python-numpy' 'freetype2' 'openal' 'ffmpeg' 'fftw' 'boost-libs' 'opencollada' 'alembic' 'openxr' - 'openimageio' 'libsndfile' 'jack' 'opencolorio' 'openimagedenoise' 'materialx' - 'jemalloc' 'libspnav' 'ptex' 'opensubdiv' 'openvdb' 'sdl2' 'embree' 'libharu' - 'draco' 'openpgl' 'level-zero-loader' 'libxkbcommon' 'libepoxy' 'usd' 'openshadinglanguage' + 'openimageio' 'libsndfile' 'jack' 'opencolorio' + 'jemalloc' 'libspnav' 'ptex' 'opensubdiv' 'openvdb' 'sdl2' 'libharu' + 'draco' 'level-zero-loader' 'libxkbcommon' 'libepoxy' 'openshadinglanguage' 'intel-oneapi-compiler-shared-runtime-libs' 'intel-oneapi-compiler-dpcpp-cpp-runtime-libs') makedepends=('cmake' 'boost' 'mesa' 'git' 'subversion' 'llvm' 'cuda' 'ninja' 'wayland-protocols' 'libxkbcommon' - 'libdecor' 'hip-runtime-amd' 'level-zero-headers' 'intel-oneapi-dpcpp-cpp' - 'intel-oneapi-compiler-shared-runtime' 'intel-compute-runtime') + 'libdecor' 'level-zero-headers') optdepends=('cuda: Cycles renderer CUDA support' 'intel-compute-runtime: Cycles renderer Intel OneAPI support' 'libdecor: wayland support') @@ -117,6 +116,8 @@ build() { -DWITH_PYTHON_INSTALL=OFF \ -DOCLOC_INSTALL_DIR=/usr \ -DUSD_ROOT_DIR=/usr \ + -DWITH_MATERIALX=OFF \ + -DWITH_CYCLES=OFF \ -DSYCL_OFFLINE_COMPILER_PARALLEL_JOBS=8 cmake --build build # For debug: @@ -142,5 +143,5 @@ package() { rm -r "${pkgdir}"/usr/share/blender/4*/python # Move OneAPI AOT lib to proper place - mv "${pkgdir}"/usr/share/blender/lib/libcycles_kernel_oneapi_aot.so "${pkgdir}"/usr/lib/ +# mv "${pkgdir}"/usr/share/blender/lib/libcycles_kernel_oneapi_aot.so "${pkgdir}"/usr/lib/ } diff --git a/bonnie++/PKGBUILD b/bonnie++/PKGBUILD index 2cc4531276..7ff31db6f5 100644 --- a/bonnie++/PKGBUILD +++ b/bonnie++/PKGBUILD @@ -4,7 +4,7 @@ pkgname=bonnie++ pkgver=2.00a -pkgrel=2 +pkgrel=3 pkgdesc="Based on the Bonnie hard drive benchmark by Tim Bray" arch=('loong64' 'x86_64') url="https://www.coker.com.au/bonnie++/" diff --git a/boost/PKGBUILD b/boost/PKGBUILD index 5aae060550..8cc5cad354 100644 --- a/boost/PKGBUILD +++ b/boost/PKGBUILD @@ -21,11 +21,13 @@ makedepends=('icu' 'python' 'python-numpy' 'bzip2' 'zlib' 'openmpi' 'zstd') source=(https://boostorg.jfrog.io/artifactory/main/release/$pkgver/source/$_srcname.tar.bz2 boost-1.81.0-phoenix-multiple-definitions.patch $pkgname-support-fn.contains-f-where-f-is-a-function.patch::https://github.com/boostorg/function/commit/7ca2310b15e3.patch - $pkgname-ublas-c++20-iterator.patch::https://github.com/boostorg/ublas/commit/a31e5cffa85f.patch) + $pkgname-ublas-c++20-iterator.patch::https://github.com/boostorg/ublas/commit/a31e5cffa85f.patch + boost-1.79.0-la64.patch) sha256sums=('6478edfe2f3305127cffe8caf73ea0176c53769f4bf1585be237eb30798c3b8e' '3ebf428ef6be090a7b56a233330375539ac429333b83708e28fe5db049cfecdb' '1b5998ee8fb389dd6df55a3684d29ffa37246bc007e8e6712bf2be6c7f745036' - 'aa38addb40d5f44b4a8472029b475e7e6aef1c460509eb7d8edf03491dc1b5ee') + 'aa38addb40d5f44b4a8472029b475e7e6aef1c460509eb7d8edf03491dc1b5ee' + '0fb9188bf211deff0d48dfb7cef614bbdebcd7dccea6e8c015da5d691eda5d94') prepare() { cd $_srcname @@ -40,6 +42,7 @@ prepare() { # https://github.com/boostorg/ublas/pull/97 patch -Np2 -i ../$pkgname-ublas-c++20-iterator.patch + patch -Np1 -i $srcdir/boost-1.79.0-la64.patch } build() { diff --git a/boost/boost-1.79.0-la64.patch b/boost/boost-1.79.0-la64.patch new file mode 100644 index 0000000000..2b9602a900 --- /dev/null +++ b/boost/boost-1.79.0-la64.patch @@ -0,0 +1,55 @@ +diff --git a/boostcpp.jam b/boostcpp.jam +index 082536e2a5..7565dae80d 100644 +--- a/boostcpp.jam ++++ b/boostcpp.jam +@@ -634,7 +634,7 @@ rule address-model ( ) + return @boostcpp.deduce-address-model ; + } + +-local deducable-architectures = arm mips1 power riscv s390x sparc x86 combined ; ++local deducable-architectures = arm loongarch mips1 power riscv s390x sparc x86 combined ; + feature.feature deduced-architecture : $(deducable-architectures) : propagated optional composite hidden ; + for a in $(deducable-architectures) + { +@@ -645,9 +645,10 @@ rule deduce-architecture ( properties * ) + { + local result ; + local filtered = [ toolset-properties $(properties) ] ; +- local names = arm mips1 power riscv s390x sparc x86 combined ; ++ local names = arm loongarch mips1 power riscv s390x sparc x86 combined ; + local idx = [ configure.find-builds "default architecture" : $(filtered) + : /boost/architecture//arm ++ : /boost/architecture//loongarch + : /boost/architecture//mips1 + : /boost/architecture//power + : /boost/architecture//riscv +Submodule libs/config 08dced51e9..5c177b2269: +diff --git a/libs/config/checks/architecture/Jamfile.jam b/libs/config/checks/architecture/Jamfile.jam +index 2ba54f9a..e8838b41 100644 +--- a/libs/config/checks/architecture/Jamfile.jam ++++ b/libs/config/checks/architecture/Jamfile.jam +@@ -18,6 +18,7 @@ obj 64 : 64.cpp ; + + obj arm : arm.cpp ; + obj combined : combined.cpp ; ++obj loongarch : loongarch.cpp ; + obj mips : mips.cpp ; + alias mips1 : mips ; # Backwards compatibility + obj power : power.cpp ; +diff --git a/libs/config/checks/architecture/loongarch.cpp b/libs/config/checks/architecture/loongarch.cpp +new file mode 100644 +index 00000000..5be8cb09 +--- /dev/null ++++ b/libs/config/checks/architecture/loongarch.cpp +@@ -0,0 +1,11 @@ ++// loongarch.cpp ++// ++// Copyright (c) 2012 Steven Watanabe ++// ++// Distributed under the Boost Software License Version 1.0. (See ++// accompanying file LICENSE_1_0.txt or copy at ++// http://www.boost.org/LICENSE_1_0.txt) ++ ++#if !defined(__loongarch__) ++#error "Not LoongArch" ++#endif diff --git a/booster/PKGBUILD b/booster/PKGBUILD index 610a91a7b1..85e627597e 100644 --- a/booster/PKGBUILD +++ b/booster/PKGBUILD @@ -23,10 +23,14 @@ sha512sums=('66443568c504d563d5a774dd25d47d72ec745cad2b77fea3cbf881b51ed1ecfa093 build() { cd booster-$pkgver + export GOPROXY=https://goproxy.cn + go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664 + go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305 + go mod edit -replace=github.com/u-root/uio=github.com/loongarch64/uio@dev-main + go mod tidy cd generator CGO_CPPFLAGS="${CPPFLAGS}" CGO_CFLAGS="${CFLAGS}" CGO_CXXFLAGS="${CXXFLAGS}" CGO_LDFLAGS="${LDFLAGS}" \ go build -trimpath \ - -buildmode=pie \ -mod=readonly \ -modcacherw \ -ldflags "-linkmode external -extldflags \"${LDFLAGS}\"" diff --git a/bore/PKGBUILD b/bore/PKGBUILD index c661321155..019564a944 100644 --- a/bore/PKGBUILD +++ b/bore/PKGBUILD @@ -14,7 +14,7 @@ sha512sums=('55d783a46e25393fc003d9c90760c141692af88fe88fecfc27c632bdc6d33523096 prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/borg/PKGBUILD b/borg/PKGBUILD index b7754cd501..e318eec732 100644 --- a/borg/PKGBUILD +++ b/borg/PKGBUILD @@ -30,7 +30,7 @@ build() { } check() { - cd "$_pkgname-$pkgver/build/lib.linux-$CARCH-cpython-"*/ + cd "$_pkgname-$pkgver/build/lib.linux-`uname -m`-cpython-"*/ PYTHONPATH=$PWD PYTHONDONTWRITEBYTECODE=1 pytest -k 'not benchmark' } diff --git a/bottom/PKGBUILD b/bottom/PKGBUILD index 12732a5662..4fc5eed06c 100644 --- a/bottom/PKGBUILD +++ b/bottom/PKGBUILD @@ -17,7 +17,7 @@ b2sums=('201484c33cb9978776fe089a04b0b231cfaf719c9210f678ba3909f50cd2a078295e3fc prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/box2d/PKGBUILD b/box2d/PKGBUILD index 31a0e8510f..6ebe55f6a8 100644 --- a/box2d/PKGBUILD +++ b/box2d/PKGBUILD @@ -11,8 +11,15 @@ depends=('gcc-libs') makedepends=('cmake' 'doctest' 'doxygen' 'ninja') # We're going to this alternate fork until the patches are upstreamed. # See https://github.com/erincatto/box2d/issues/621 -source=("$pkgname-$pkgver.tar.gz::https://github.com/erincatto/Box2D/archive/v${pkgver}.tar.gz") -sha512sums=('d900f925b77906777719c91488bdc5e2df1ad1f4a8ca39a574229f5e57070e3a843bdd7530e817112605fde6d82145c872d8afdfc65b84531a73199098c81162') +source=("$pkgname-$pkgver.tar.gz::https://github.com/erincatto/Box2D/archive/v${pkgver}.tar.gz" + box2d-fix-build.patch) +sha512sums=('d900f925b77906777719c91488bdc5e2df1ad1f4a8ca39a574229f5e57070e3a843bdd7530e817112605fde6d82145c872d8afdfc65b84531a73199098c81162' + '74055d49f0b9f601c2e68576aa3e0ef43c061beb428cda3de847d5b9fb8bf6adb74b69521264f9aea048d268e5104be6bafdcfb0cfb09aec1de1662d263235e9') + +prepare() { + cd $pkgname-$pkgver + patch -p1 -i $srcdir/box2d-fix-build.patch +} prepare() { # Use system doctest diff --git a/box2d/box2d-fix-build.patch b/box2d/box2d-fix-build.patch new file mode 100644 index 0000000000..0f89e2c772 --- /dev/null +++ b/box2d/box2d-fix-build.patch @@ -0,0 +1,13 @@ +Index: box2d-2.4.1/unit-test/doctest.h +=================================================================== +--- box2d-2.4.1.orig/unit-test/doctest.h ++++ box2d-2.4.1/unit-test/doctest.h +@@ -4018,7 +4018,7 @@ namespace { + static bool isSet; + static struct sigaction oldSigActions[DOCTEST_COUNTOF(signalDefs)]; + static stack_t oldSigStack; +- static char altStackMem[4 * SIGSTKSZ]; ++ static char altStackMem[65536]; + + static void handleSignal(int sig) { + const char* name = ""; diff --git a/boxxy/PKGBUILD b/boxxy/PKGBUILD index 8577810451..97e8be02d7 100644 --- a/boxxy/PKGBUILD +++ b/boxxy/PKGBUILD @@ -15,7 +15,7 @@ options=('!lto') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/breezy/PKGBUILD b/breezy/PKGBUILD index a893cbe2f7..6ac69bacbb 100644 --- a/breezy/PKGBUILD +++ b/breezy/PKGBUILD @@ -40,14 +40,21 @@ provides=(bzr) conflicts=(bzr) replaces=(bzr) _tag=d206a54af4040025561cc9159fa5559c14a7ef46 -source=(git+https://github.com/breezy-team/breezy.git#tag=${_tag}) -sha256sums=(SKIP) +source=(git+https://github.com/breezy-team/breezy.git#tag=${_tag} +breezy-fix-install.patch) +sha256sums=('SKIP' + '1e778eae61605bd8a99d57ce97da4ac9ca74fe1b86c371fd55805e96aeb347e8') pkgver() { cd breezy git describe --tags | sed 's/brz-//; s/-/./g' } +prepare() { + cd breezy + patch -p1 -i "$srcdir/breezy-fix-install.patch" +} + build() { cd breezy python -m build --wheel --no-isolation diff --git a/breezy/breezy-fix-install.patch b/breezy/breezy-fix-install.patch new file mode 100644 index 0000000000..94918fc9eb --- /dev/null +++ b/breezy/breezy-fix-install.patch @@ -0,0 +1,15 @@ +--- aaa/setup.py 2023-11-03 11:00:28.748584492 +0800 ++++ /tmp/setup.py 2023-11-03 10:56:36.364171431 +0800 +@@ -193,12 +193,6 @@ + + # ad-hoc for easy_install + DATA_FILES = [] +-if ('bdist_egg' not in sys.argv and 'bdist_wheel' not in sys.argv +- and 'editable_wheel' not in sys.argv): +- # generate and install brz.1 only with plain install, not the +- # easy_install one +- build.sub_commands.append(('build_man', lambda _: True)) +- DATA_FILES = [('man/man1', ['brz.1', 'breezy/git/git-remote-bzr.1'])] + + import site + diff --git a/brltty/PKGBUILD b/brltty/PKGBUILD index 8fe68730ab..16960e56fc 100644 --- a/brltty/PKGBUILD +++ b/brltty/PKGBUILD @@ -117,7 +117,6 @@ package_brltty() { 'libx11: for xbrlapi' 'libxfixes: for xbrlapi' 'libxtst: for xbrlapi' - 'ocaml: OCaml support' 'python: Python support' 'speech-dispatcher: speech-dispatcher driver' 'tcl: tcl support' diff --git a/broadcom-wl/PKGBUILD b/broadcom-wl/PKGBUILD index 3058b83539..84bfb3269c 100644 --- a/broadcom-wl/PKGBUILD +++ b/broadcom-wl/PKGBUILD @@ -25,7 +25,7 @@ package() { _extramodules="/usr/lib/modules/${_kernver}/extramodules" install -Dm644 -t "${pkgdir}${_extramodules}" \ - ${_module}/${pkgver}/${_kernver}/${CARCH}/module/* + ${_module}/${pkgver}/${_kernver}/`uname -m`/module/* # compress kernel modules find "$pkgdir" -name "*.ko" -exec xz {} + diff --git a/broot/PKGBUILD b/broot/PKGBUILD index b56c320fae..4d9a46ea6c 100644 --- a/broot/PKGBUILD +++ b/broot/PKGBUILD @@ -18,7 +18,7 @@ sha256sums=('0b9bf4a0dfa8a9cdcefcf18222dba4025379a8fa19190075835a99a507ae3d73') prepare() { cd $pkgname-$pkgver - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/buildkit/0001-add-loongarch64-support.patch b/buildkit/0001-add-loongarch64-support.patch new file mode 100644 index 0000000000..2833a2b6bd --- /dev/null +++ b/buildkit/0001-add-loongarch64-support.patch @@ -0,0 +1,130 @@ +From e94d2e706531af3efd0235f1d8c7c6fdf31ab5eb Mon Sep 17 00:00:00 2001 +From: Xiaotian Wu +Date: Mon, 13 Nov 2023 10:14:01 +0800 +Subject: [PATCH] add loongarch64 support + +--- + util/archutil/Dockerfile | 6 ++++-- + util/archutil/detect.go | 10 ++++++++++ + util/archutil/fixtures/exit.loongarch64.s | 6 ++++++ + util/archutil/loong64_binary.go | 9 +++++++++ + util/archutil/loong64_check.go | 8 ++++++++ + util/archutil/loong64_check_loong64.go | 8 ++++++++ + 6 files changed, 45 insertions(+), 2 deletions(-) + create mode 100644 util/archutil/fixtures/exit.loongarch64.s + create mode 100644 util/archutil/loong64_binary.go + create mode 100644 util/archutil/loong64_check.go + create mode 100644 util/archutil/loong64_check_loong64.go + +diff --git a/util/archutil/Dockerfile b/util/archutil/Dockerfile +index 2b24b230b..df161291b 100644 +--- a/util/archutil/Dockerfile ++++ b/util/archutil/Dockerfile +@@ -8,7 +8,8 @@ RUN apt-get update && apt-get --no-install-recommends install -y \ + binutils-s390x-linux-gnu \ + binutils-powerpc64le-linux-gnu \ + binutils-mips64el-linux-gnuabi64 \ +- binutils-mips64-linux-gnuabi64 ++ binutils-mips64-linux-gnuabi64 \ ++ binutils-loongarch64-linux-gnu + WORKDIR /src + + +@@ -64,9 +65,10 @@ COPY --from=exit-ppc64 /src/exit ppc64 + COPY --from=exit-ppc64le /src/exit ppc64le + COPY --from=exit-mips64le /src/exit mips64le + COPY --from=exit-mips64 /src/exit mips64 ++COPY --from=exit-loong64 /src/exit loong64 + COPY generate.go . + +-RUN go run generate.go amd64 386 arm64 arm riscv64 s390x ppc64 ppc64le mips64le mips64 && ls -l ++RUN go run generate.go amd64 386 arm64 arm riscv64 s390x ppc64 ppc64le mips64le mips64 loong64 && ls -l + + + FROM scratch +diff --git a/util/archutil/detect.go b/util/archutil/detect.go +index 782644127..b36726c92 100644 +--- a/util/archutil/detect.go ++++ b/util/archutil/detect.go +@@ -78,6 +78,11 @@ func SupportedPlatforms(noCache bool) []ocispecs.Platform { + arr = append(arr, linux(p)) + } + } ++ if p := "loong64"; def.Architecture != p { ++ if _, err := loong64Supported(); err == nil { ++ arr = append(arr, linux(p)) ++ } ++ } + if p := "arm"; def.Architecture != p { + if _, err := armSupported(); err == nil { + p := linux("arm") +@@ -144,6 +149,11 @@ func WarnIfUnsupported(pfs []ocispecs.Platform) { + printPlatformWarning(p, err) + } + } ++ if p.Architecture == "loong64" { ++ if _, err := loong64Supported(); err != nil { ++ printPlatformWarning(p, err) ++ } ++ } + if p.Architecture == "arm" { + if _, err := armSupported(); err != nil { + printPlatformWarning(p, err) +diff --git a/util/archutil/fixtures/exit.loongarch64.s b/util/archutil/fixtures/exit.loongarch64.s +new file mode 100644 +index 000000000..478cd622c +--- /dev/null ++++ b/util/archutil/fixtures/exit.loongarch64.s +@@ -0,0 +1,6 @@ ++ .global _start ++ .text ++_start: ++ li.w $a0,0 ++ li.w $a7,93 ++ syscall 0 +diff --git a/util/archutil/loong64_binary.go b/util/archutil/loong64_binary.go +new file mode 100644 +index 000000000..fa85a4553 +--- /dev/null ++++ b/util/archutil/loong64_binary.go +@@ -0,0 +1,9 @@ ++//go:build !loong64 ++// +build !loong64 ++ ++package archutil ++ ++// This file is generated by running make inside the archutil package. ++// Do not edit manually. ++ ++const Binaryloong64 = "\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff\xaa\x77\xf5\x71\x63\x62\x64\x64\x80\x01\x26\x06\x08\x6f\x03\x03\x83\x02\x88\x76\x80\x8a\x5f\x80\xd2\xcc\x60\x31\x0b\x06\x26\x06\x07\x06\x66\x06\x26\x06\x90\x1a\x56\x06\x14\xa0\xc0\x88\x44\xef\x81\x0a\xc2\x68\x98\x81\x81\x4f\x4b\x52\xd8\x18\x88\x07\x02\x50\x9a\x85\x41\x94\x81\xbb\xa4\x91\x99\x81\x41\x9b\x81\x41\xaf\x38\xa3\xb8\xa4\xa8\x24\x31\x89\x41\xaf\x24\xb5\xa2\x84\x81\x0a\x80\x9b\x81\x01\xec\x27\x98\xdb\x60\xe1\xb0\x01\xca\xe7\x41\x53\xcf\x88\x85\xcf\x8c\xc5\x5c\x98\xff\x05\x09\xe8\x07\x04\x00\x00\xff\xff\x31\xd2\xf1\xb5\x90\x01\x00\x00" +diff --git a/util/archutil/loong64_check.go b/util/archutil/loong64_check.go +new file mode 100644 +index 000000000..9bc966ce2 +--- /dev/null ++++ b/util/archutil/loong64_check.go +@@ -0,0 +1,8 @@ ++//go:build !loong64 ++// +build !loong64 ++ ++package archutil ++ ++func loong64Supported() (string, error) { ++ return check("loong64", Binaryloong64) ++} +diff --git a/util/archutil/loong64_check_loong64.go b/util/archutil/loong64_check_loong64.go +new file mode 100644 +index 000000000..b801c5938 +--- /dev/null ++++ b/util/archutil/loong64_check_loong64.go +@@ -0,0 +1,8 @@ ++//go:build loong64 ++// +build loong64 ++ ++package archutil ++ ++func loong64Supported() (string, error) { ++ return "", nil ++} +-- +2.42.0 + diff --git a/buildkit/PKGBUILD b/buildkit/PKGBUILD index 5f874d19c7..3a0143ad43 100644 --- a/buildkit/PKGBUILD +++ b/buildkit/PKGBUILD @@ -12,8 +12,10 @@ depends=('runc' 'containerd') makedepends=('git' 'go') options=('!lto') _commit='567a99433ca23402d5e9b9f9124005d2e59b8861' -source=("$pkgname::git+$url.git#commit=$_commit") -b2sums=('SKIP') +source=("$pkgname::git+$url.git#commit=$_commit" +0001-add-loongarch64-support.patch) +b2sums=('SKIP' + '633f1e8e9e66c38f3a21a6c3af14721efd6a2cc6c0201e8492d333a86461a5783eec2c144edfe1ef17ee8385fa29588f30e88d26cf49ce8b477480be3e5459be') pkgver() { cd "$pkgname" @@ -28,6 +30,7 @@ prepare() { # fix paths in systemd unit files sed -i 's:/usr/local:/usr:' \ examples/systemd/{system,user}/*.service + patch -p1 -i $srcdir/0001-add-loongarch64-support.patch # create directory for build output mkdir build @@ -46,6 +49,10 @@ build() { export CGO_CXXFLAGS="${CXXFLAGS}" export GOPATH="${srcdir}" + go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.13.0 + go mod edit -replace=go.etcd.io/bbolt=go.etcd.io/bbolt@v1.3.7-0.20221114114133-eedea6cb26ef + go mod tidy + local package='github.com/moby/buildkit' go build -v \ diff --git a/bupstash/PKGBUILD b/bupstash/PKGBUILD index 9385d3e396..3f1f5b4201 100644 --- a/bupstash/PKGBUILD +++ b/bupstash/PKGBUILD @@ -25,7 +25,7 @@ prepare() { cd "$pkgname" # download dependencies - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { cd "$pkgname" diff --git a/caddy/PKGBUILD b/caddy/PKGBUILD index f8ad1ddd82..fe15df98b9 100644 --- a/caddy/PKGBUILD +++ b/caddy/PKGBUILD @@ -64,6 +64,9 @@ build() { export CGO_CFLAGS="${CFLAGS}" export CGO_CXXFLAGS="${CXXFLAGS}" export GOFLAGS="-buildmode=pie -trimpath -ldflags=-linkmode=external -mod=readonly -modcacherw" + go mod edit -replace=go.etcd.io/bbolt=go.etcd.io/bbolt@master + go mod tidy + go build . for i in zsh bash fish; do diff --git a/calf/PKGBUILD b/calf/PKGBUILD index c9a83c4070..0334824c4f 100644 --- a/calf/PKGBUILD +++ b/calf/PKGBUILD @@ -38,7 +38,7 @@ prepare(){ build() { local configure_options=( --enable-experimental - --enable-sse + --disable-sse --prefix=/usr --with-lv2 ) diff --git a/capnet-assist/PKGBUILD b/capnet-assist/PKGBUILD index b70ae7028b..00cb2c6344 100644 --- a/capnet-assist/PKGBUILD +++ b/capnet-assist/PKGBUILD @@ -13,6 +13,8 @@ depends=( glib2 gtk3 libgranite.so + + libhandy-1.so libsoup networkmanager @@ -33,6 +35,12 @@ pkgver() { git describe --tags } +prepare() { + cd capnet-assist + sed -i '7d' data/meson.build + sed -i '16d' data/meson.build +} + build() { arch-meson capnet-assist build \ -D b_pie=false diff --git a/cargo-audit/PKGBUILD b/cargo-audit/PKGBUILD index ce9e916bee..8b8192409b 100644 --- a/cargo-audit/PKGBUILD +++ b/cargo-audit/PKGBUILD @@ -16,7 +16,7 @@ b2sums=('a3fd2dd5d2382fb5dc4733af86a3a9535154e4f8f846f8c8f013f270bd4ac3932f8070e prepare() { cd rustsec-${pkgname}-v${pkgver}/${pkgname} - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/cargo-auditable/PKGBUILD b/cargo-auditable/PKGBUILD index 6f9508be55..7db46c6956 100644 --- a/cargo-auditable/PKGBUILD +++ b/cargo-auditable/PKGBUILD @@ -17,7 +17,7 @@ sha512sums=('191b6ef15436bd3c6a9b4666e80de5a085afe00f8ee3793040fc5e5f78eecc25d45 prepare() { cd "$srcdir/$pkgname-$pkgver" patch -Np1 -i "../$pkgname-$pkgver-cargo-lock.patch" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/cargo-bloat/PKGBUILD b/cargo-bloat/PKGBUILD index 8f750ea676..7630c8e653 100644 --- a/cargo-bloat/PKGBUILD +++ b/cargo-bloat/PKGBUILD @@ -14,7 +14,7 @@ sha256sums=('4f338c1a7f7ee6bcac150f7856ed1f32cf8d9009cfd513ca6c1aac1e6685c35f') prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/cargo-c/PKGBUILD b/cargo-c/PKGBUILD index 60223ec3ad..ac384b4564 100644 --- a/cargo-c/PKGBUILD +++ b/cargo-c/PKGBUILD @@ -20,7 +20,7 @@ sha256sums=('a52bb78cf6db00aa1caf06c679cfece27357c84367d8ac167d715e05e5f5a778' prepare() { ln -sf "../${pkgname}-${pkgver}.Cargo.lock" "${pkgname}-${pkgver}/Cargo.lock" - cargo fetch --locked --target "${CARCH}-unknown-linux-gnu" --manifest-path="${pkgname}-${pkgver}/Cargo.toml" + cargo fetch --locked --manifest-path="${pkgname}-${pkgver}/Cargo.toml" } build() { diff --git a/cargo-cyclonedx/PKGBUILD b/cargo-cyclonedx/PKGBUILD index de6036a83e..c51f0130d9 100644 --- a/cargo-cyclonedx/PKGBUILD +++ b/cargo-cyclonedx/PKGBUILD @@ -21,7 +21,7 @@ b2sums=('c1907710867b3c1342cc5c9661a095c5c4f62c52a8284eccba8ff71398d933667924df6 prepare() { cd $_upstream_name-$pkgname-$pkgver export RUSTUP_TOOLCHAIN=stable - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/cargo-depgraph/PKGBUILD b/cargo-depgraph/PKGBUILD index 089a877bb1..a7539be79e 100644 --- a/cargo-depgraph/PKGBUILD +++ b/cargo-depgraph/PKGBUILD @@ -17,7 +17,7 @@ sha256sums=('d447316253217e0157af027c50bca10e84eba9f27b4f7c9642bcf38ad36d4766' prepare() { cd "$pkgname-$pkgver" patch -Np1 -i "$srcdir/$pkgname-$pkgver-lockfile.patch" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "$(rustc -vV | sed -n 's/host: //p')" } build() { diff --git a/cargo-edit/PKGBUILD b/cargo-edit/PKGBUILD index bf4c5bed03..d095bbdfc7 100644 --- a/cargo-edit/PKGBUILD +++ b/cargo-edit/PKGBUILD @@ -17,7 +17,7 @@ b2sums=('37e91b5eb41fd56e2be382ee77bd6a6c859d1e1d7c99d45c2597e1a24194ea79ad1c563 prepare() { cd "${pkgname}-${pkgver}" sed -i '/\"vendored-libgit2\"/d' Cargo.toml - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/cargo-expand/PKGBUILD b/cargo-expand/PKGBUILD index 33eb14c8b5..35bca5f34d 100644 --- a/cargo-expand/PKGBUILD +++ b/cargo-expand/PKGBUILD @@ -16,7 +16,7 @@ options=('!lto') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/cargo-geiger/PKGBUILD b/cargo-geiger/PKGBUILD index eb5f8ae45f..39a7bdc300 100644 --- a/cargo-geiger/PKGBUILD +++ b/cargo-geiger/PKGBUILD @@ -16,7 +16,7 @@ options=('!lto') prepare() { mv "$pkgname-$pkgname-v$pkgver" "$pkgname-$pkgver" cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/cargo-generate/PKGBUILD b/cargo-generate/PKGBUILD index a1eba51572..5e97546f6a 100644 --- a/cargo-generate/PKGBUILD +++ b/cargo-generate/PKGBUILD @@ -14,7 +14,7 @@ sha256sums=('520e7a98bf82f368e911c14e774f8ef16a4c8ffd785d492c9d518ee563dc3864') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/cargo-insta/PKGBUILD b/cargo-insta/PKGBUILD index 1088ebd7c0..5049b5775c 100644 --- a/cargo-insta/PKGBUILD +++ b/cargo-insta/PKGBUILD @@ -17,7 +17,7 @@ sha512sums=('0ee791792e5324f8e53efb645ae862965b973498059cd32e66f379a971d5ffadac2 prepare() { cd "$pkgname-$pkgver" export RUSTUP_TOOLCHAIN=stable - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/cargo-machete/PKGBUILD b/cargo-machete/PKGBUILD index 608c786fba..88725e4382 100644 --- a/cargo-machete/PKGBUILD +++ b/cargo-machete/PKGBUILD @@ -13,7 +13,7 @@ sha256sums=('a13fab0c5ff64907e6b39dee054e5e9c4278fbe06065ff5bfcb160a5c1d204ea') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/cargo-msrv/PKGBUILD b/cargo-msrv/PKGBUILD index 56b9f2cc8b..86de6152b0 100644 --- a/cargo-msrv/PKGBUILD +++ b/cargo-msrv/PKGBUILD @@ -16,7 +16,7 @@ options=('!lto') prepare() { cd "$pkgname-$pkgver" export RUSTUP_TOOLCHAIN=stable - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/cargo-ndk/PKGBUILD b/cargo-ndk/PKGBUILD index 758c926256..7d32b53f30 100644 --- a/cargo-ndk/PKGBUILD +++ b/cargo-ndk/PKGBUILD @@ -14,7 +14,7 @@ sha256sums=('7756f00ff040030c64e6590ec6ffe59245165b9c78350462d960e5ff6fe12dcd') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/cargo-outdated/PKGBUILD b/cargo-outdated/PKGBUILD index e6dffe7a0b..09a4c8e5e7 100644 --- a/cargo-outdated/PKGBUILD +++ b/cargo-outdated/PKGBUILD @@ -20,7 +20,7 @@ options=('!lto') prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/cargo-pgrx/PKGBUILD b/cargo-pgrx/PKGBUILD index b6f2a3bd13..d7435d1672 100644 --- a/cargo-pgrx/PKGBUILD +++ b/cargo-pgrx/PKGBUILD @@ -26,7 +26,7 @@ prepare() { cd "$pkgname" # download dependencies - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/cargo-sort/PKGBUILD b/cargo-sort/PKGBUILD index bbe3b5b55b..4944d3abe3 100644 --- a/cargo-sort/PKGBUILD +++ b/cargo-sort/PKGBUILD @@ -19,7 +19,7 @@ prepare() { cd "$pkgname-$pkgver" # download dependencies - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/cargo-spellcheck/PKGBUILD b/cargo-spellcheck/PKGBUILD index 356c499b53..b3eceb53a7 100644 --- a/cargo-spellcheck/PKGBUILD +++ b/cargo-spellcheck/PKGBUILD @@ -16,7 +16,7 @@ options=('!lto') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" mkdir -p completions } diff --git a/cargo-supply-chain/PKGBUILD b/cargo-supply-chain/PKGBUILD index 8d1806c144..74530895db 100644 --- a/cargo-supply-chain/PKGBUILD +++ b/cargo-supply-chain/PKGBUILD @@ -17,7 +17,7 @@ b2sums=('c196e9b8bd6882c6fa4360f27d623ff91275a0209612a1b74043a9869ead3e21557052d prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/cargo-tarpaulin/PKGBUILD b/cargo-tarpaulin/PKGBUILD index bd03da7725..2569570aa0 100644 --- a/cargo-tarpaulin/PKGBUILD +++ b/cargo-tarpaulin/PKGBUILD @@ -28,7 +28,7 @@ b2sums=('SKIP') prepare() { cargo fetch \ --locked \ - --target $CARCH-unknown-linux-gnu \ + --target `uname -m`-unknown-linux-gnu \ --manifest-path tarpaulin/Cargo.toml } diff --git a/cargo-tauri/PKGBUILD b/cargo-tauri/PKGBUILD index f3bd33b2af..9d5dfc08eb 100644 --- a/cargo-tauri/PKGBUILD +++ b/cargo-tauri/PKGBUILD @@ -16,7 +16,7 @@ options=('!lto') prepare() { mv "$_pkgname-tauri-cli-v$pkgver" "$pkgname-$pkgver" cd "$pkgname-$pkgver/tooling/cli" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" mkdir -p completions } diff --git a/cargo-udeps/PKGBUILD b/cargo-udeps/PKGBUILD index 05aa82f5a1..217f1ec32f 100644 --- a/cargo-udeps/PKGBUILD +++ b/cargo-udeps/PKGBUILD @@ -15,14 +15,14 @@ sha256sums=('e5839d74071c44efb44ae33859ff438ff5823c007960889f567b2c2c33cff4d1') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --target "`uname -m`-unknown-linux-gnu" } build() { cd "$pkgname-$pkgver" export LIBSSH2_SYS_USE_PKG_CONFIG=1 CFLAGS+=" -ffat-lto-objects" - cargo build --release --frozen + cargo build --release } # Tests require rustup nightly diff --git a/cargo-update/PKGBUILD b/cargo-update/PKGBUILD index 4d11ce49ca..98ddb27abd 100644 --- a/cargo-update/PKGBUILD +++ b/cargo-update/PKGBUILD @@ -19,7 +19,7 @@ options=('!lto') prepare() { cd "$pkgname-$pkgver" cp "$srcdir/Cargo.lock" . - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/cargo-watch/PKGBUILD b/cargo-watch/PKGBUILD index 8126017898..5ff224d1a6 100644 --- a/cargo-watch/PKGBUILD +++ b/cargo-watch/PKGBUILD @@ -17,7 +17,7 @@ b2sums=('f6b1a250b0ba4a79d525f7d2038f1a4ddae1495261c38cff1c32f6f73dae500a689dfcd prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/cargo2junit/PKGBUILD b/cargo2junit/PKGBUILD index b483e643a2..6eeacc6413 100644 --- a/cargo2junit/PKGBUILD +++ b/cargo2junit/PKGBUILD @@ -14,7 +14,7 @@ sha256sums=('647c41ce7416421f41dee298a6fb99ad8be7c584c7c16c1a7926720eb9777376') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/cbindgen/PKGBUILD b/cbindgen/PKGBUILD index cf1977d861..1ceb6614a7 100644 --- a/cbindgen/PKGBUILD +++ b/cbindgen/PKGBUILD @@ -34,7 +34,7 @@ pkgver() { prepare() { cd cbindgen - cargo fetch --locked --target x86_64-unknown-linux-gnu + cargo fetch --locked --target `uname -m`-unknown-linux-gnu } build() { diff --git a/cdparanoia/PKGBUILD b/cdparanoia/PKGBUILD index e69b576a4b..17143d000c 100644 --- a/cdparanoia/PKGBUILD +++ b/cdparanoia/PKGBUILD @@ -31,6 +31,8 @@ prepare() { build() { cd cdparanoia-III-$pkgver + CFLAGS=${CFLAGS/-Werror=format-security/} + CXXFLAGS=${CXXFLAGS/-Werror=format-security/} ./configure --prefix=/usr --mandir=/usr/share/man make } diff --git a/cdrtools/PKGBUILD b/cdrtools/PKGBUILD index 535b5f58e4..2018940579 100644 --- a/cdrtools/PKGBUILD +++ b/cdrtools/PKGBUILD @@ -12,10 +12,17 @@ provides=('cdrkit') replaces=('cdrkit') options=(!makeflags) backup=('etc/default/cdrecord' 'etc/default/rscsi') -source=(https://downloads.sourceforge.net/cdrtools/cdrtools-$pkgver.tar.bz2) -sha256sums=('aa28438f458ef3f314b79f2029db27679dae1d5ffe1569b6de57742511915e81') +source=(https://downloads.sourceforge.net/cdrtools/cdrtools-$pkgver.tar.bz2 + cdrtools-la64.patch) +sha256sums=('aa28438f458ef3f314b79f2029db27679dae1d5ffe1569b6de57742511915e81' + '2a3925ae1293a84277179497d7f0f17789447f8fa72c90f012ac36d36b3edc30') install=cdrtools.install +prepare() { + cd "$srcdir"/cdrtools-${pkgver%%a*} + patch -p1 -i "$srcdir/cdrtools-la64.patch" +} + build() { cd "$srcdir"/cdrtools-${pkgver%%a*} sed -i 's|/opt/schily|/usr|g' DEFAULTS/Defaults.linux diff --git a/cdrtools/cdrtools-la64.patch b/cdrtools/cdrtools-la64.patch new file mode 100644 index 0000000000..db1227b2ac --- /dev/null +++ b/cdrtools/cdrtools-la64.patch @@ -0,0 +1,16 @@ +Index: cdrtools-3.02/RULES/MKLINKS +=================================================================== +--- cdrtools-3.02.orig/RULES/MKLINKS ++++ cdrtools-3.02/RULES/MKLINKS +@@ -262,6 +262,11 @@ $symlink i586-linux-clang64.rul aarch64- + $symlink i586-linux-gcc.rul aarch64-linux-gcc.rul + $symlink i586-linux-gcc32.rul aarch64-linux-gcc32.rul + $symlink i586-linux-gcc64.rul aarch64-linux-gcc64.rul ++$symlink i586-linux-cc.rul loongarch64-linux-cc.rul ++$symlink i586-linux-clang.rul loongarch64-linux-clang.rul ++$symlink i586-linux-clang64.rul loongarch64-linux-clang64.rul ++$symlink i586-linux-gcc.rul loongarch64-linux-gcc.rul ++$symlink i586-linux-gcc64.rul loongarch64-linux-gcc64.rul + $symlink i586-linux-cc.rul sh3-linux-cc.rul + $symlink i586-linux-gcc.rul sh3-linux-gcc.rul + $symlink i586-linux-cc.rul sh4-linux-cc.rul diff --git a/chezmoi/PKGBUILD b/chezmoi/PKGBUILD index ebee5ffa5b..b5dcf9e302 100644 --- a/chezmoi/PKGBUILD +++ b/chezmoi/PKGBUILD @@ -25,6 +25,11 @@ prepare() { build() { cd "$pkgname-$pkgver" + export GOPROXY=https://goproxy.cn + go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664 + go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305 + go mod edit -replace=go.etcd.io/bbolt=go.etcd.io/bbolt@master + go mod tidy export CGO_LDFLAGS="${LDFLAGS}" export CGO_CFLAGS="${CFLAGS}" diff --git a/chmlib/PKGBUILD b/chmlib/PKGBUILD index 617a5cf5cb..df56185a35 100644 --- a/chmlib/PKGBUILD +++ b/chmlib/PKGBUILD @@ -10,8 +10,15 @@ arch=('loong64' 'x86_64') url="http://www.jedrea.com/chmlib/" license=('LGPL') depends=('glibc') -source=("http://www.jedrea.com/chmlib/chmlib-0.40.tar.bz2") -sha256sums=('3449d64b0cf71578b2c7e3ddc048d4af3661f44a83941ea074a7813f3a59ffa3') +source=("http://www.jedrea.com/chmlib/chmlib-0.40.tar.bz2" + chmlib-fix-gcc13.patch) +sha256sums=('3449d64b0cf71578b2c7e3ddc048d4af3661f44a83941ea074a7813f3a59ffa3' + 'b5f792c16b01c9ad0a08e4f2c55134c0bb5d93096f1d5634fd65a0da8ac5ab9e') + +prepare() { + cd "$pkgname-$pkgver" + patch -p1 -i "$srcdir/chmlib-fix-gcc13.patch" +} build() { cd "${srcdir}"/${pkgname}-${pkgver} diff --git a/chmlib/chmlib-fix-gcc13.patch b/chmlib/chmlib-fix-gcc13.patch new file mode 100644 index 0000000000..1c32dc614b --- /dev/null +++ b/chmlib/chmlib-fix-gcc13.patch @@ -0,0 +1,25 @@ +Index: chmlib-0.40/src/chm_http.c +=================================================================== +--- chmlib-0.40.orig/src/chm_http.c ++++ chmlib-0.40/src/chm_http.c +@@ -42,6 +42,7 @@ + /* includes for networking */ + #include + #include ++#include + #include + + /* threading includes */ +Index: chmlib-0.40/src/chm_lib.c +=================================================================== +--- chmlib-0.40.orig/src/chm_lib.c ++++ chmlib-0.40/src/chm_lib.c +@@ -164,7 +164,7 @@ typedef unsigned long long UInt64; + + /* x86-64 */ + /* Note that these may be appropriate for other 64-bit machines. */ +-#elif __x86_64__ || __ia64__ ++#elif __x86_64__ || __ia64__ || __loongarch_lp64 + typedef unsigned char UChar; + typedef short Int16; + typedef unsigned short UInt16; diff --git a/choose/PKGBUILD b/choose/PKGBUILD index 50d6b223d0..ec05a1c13a 100644 --- a/choose/PKGBUILD +++ b/choose/PKGBUILD @@ -25,7 +25,7 @@ pkgver() { prepare() { cd "$pkgname" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/clamav/PKGBUILD b/clamav/PKGBUILD index 15409b5f97..7d0052acef 100644 --- a/clamav/PKGBUILD +++ b/clamav/PKGBUILD @@ -14,7 +14,7 @@ arch=('loong64' 'x86_64') depends=('bzip2' 'libltdl' 'libxml2' 'curl' 'systemd-libs' 'pcre2' 'json-c' 'libmspack' 'ncurses' libsystemd.so libncursesw.so libcurl.so libjson-c.so libbz2.so) makedepends=('libmilter' 'systemd' 'cmake' 'ninja' 'python' 'cargo') -checkdepends=('check') +makedepends+=('check') backup=('etc/clamav/clamd.conf' 'etc/clamav/freshclam.conf' 'etc/clamav/clamav-milter.conf' diff --git a/clang14/0001-add-loong64-support.patch b/clang14/0001-add-loong64-support.patch new file mode 100644 index 0000000000..4c6ae8d62f --- /dev/null +++ b/clang14/0001-add-loong64-support.patch @@ -0,0 +1,16205 @@ +From e5f62e4b6e97e38353668baeac0ef7219992aa63 Mon Sep 17 00:00:00 2001 +From: Xiaotian Wu +Date: Tue, 20 Dec 2022 18:53:42 +0800 +Subject: [PATCH 1/2] add loong64 support + +--- + bindings/python/tests/CMakeLists.txt | 2 +- + include/clang/Basic/BuiltinsLoongArch.def | 1974 ++++++ + include/clang/Basic/DiagnosticDriverKinds.td | 2 + + include/clang/Basic/TargetBuiltins.h | 13 +- + include/clang/Basic/TargetCXXABI.def | 6 + + include/clang/Basic/TargetCXXABI.h | 6 + + include/clang/Driver/Options.td | 21 +- + include/clang/Sema/Sema.h | 3 + + include/clang/module.modulemap | 1 + + lib/AST/ASTContext.cpp | 2 + + lib/Basic/CMakeLists.txt | 1 + + lib/Basic/Targets.cpp | 20 + + lib/Basic/Targets/LoongArch.cpp | 149 + + lib/Basic/Targets/LoongArch.h | 352 ++ + lib/CodeGen/CodeGenFunction.cpp | 36 +- + lib/CodeGen/CodeGenFunction.h | 4 + + lib/CodeGen/CodeGenModule.cpp | 17 +- + lib/CodeGen/CodeGenModule.h | 5 - + lib/CodeGen/ItaniumCXXABI.cpp | 3 + + lib/CodeGen/TargetInfo.cpp | 555 ++ + lib/Driver/CMakeLists.txt | 1 + + lib/Driver/Driver.cpp | 16 + + lib/Driver/SanitizerArgs.cpp | 13 - + lib/Driver/ToolChains/Arch/LoongArch.cpp | 179 + + lib/Driver/ToolChains/Arch/LoongArch.h | 41 + + lib/Driver/ToolChains/Clang.cpp | 55 + + lib/Driver/ToolChains/Clang.h | 4 + + lib/Driver/ToolChains/CommonArgs.cpp | 21 + + lib/Driver/ToolChains/Gnu.cpp | 65 + + lib/Driver/ToolChains/Linux.cpp | 24 +- + lib/Driver/ToolChains/Linux.h | 5 - + lib/Driver/XRayArgs.cpp | 2 + + lib/Headers/CMakeLists.txt | 3 + + lib/Headers/larchintrin.h | 319 + + lib/Headers/lasxintrin.h | 5349 +++++++++++++++++ + lib/Headers/lsxintrin.h | 5165 ++++++++++++++++ + lib/Sema/SemaChecking.cpp | 544 ++ + lib/Sema/SemaTemplateInstantiateDecl.cpp | 5 +- + test/CodeGen/sanitize-coverage-old-pm.c | 4 +- + test/CodeGen/ubsan-function.cpp | 5 +- + test/CodeGenCXX/catch-undef-behavior.cpp | 37 +- + test/CodeGenCXX/ubsan-function-noexcept.cpp | 6 +- + test/Driver/baremetal-sysroot.cpp | 2 +- + test/Driver/baremetal.cpp | 2 +- + test/Driver/fsanitize.c | 9 +- + test/Driver/hexagon-toolchain-linux.c | 4 +- + test/Driver/mips-cs.cpp | 48 +- + test/Driver/stack-protector.c | 4 +- + test/Preprocessor/init.c | 30 + + .../InterpreterExceptionTest.cpp | 5 + + 50 files changed, 15017 insertions(+), 122 deletions(-) + create mode 100644 include/clang/Basic/BuiltinsLoongArch.def + create mode 100644 lib/Basic/Targets/LoongArch.cpp + create mode 100644 lib/Basic/Targets/LoongArch.h + create mode 100644 lib/Driver/ToolChains/Arch/LoongArch.cpp + create mode 100644 lib/Driver/ToolChains/Arch/LoongArch.h + create mode 100644 lib/Headers/larchintrin.h + create mode 100644 lib/Headers/lasxintrin.h + create mode 100644 lib/Headers/lsxintrin.h + +diff --git a/bindings/python/tests/CMakeLists.txt b/bindings/python/tests/CMakeLists.txt +index 280da9d0..9d9cb911 100644 +--- a/bindings/python/tests/CMakeLists.txt ++++ b/bindings/python/tests/CMakeLists.txt +@@ -40,7 +40,7 @@ endif() + # addressed. + # SystemZ has broken Python/FFI interface: + # https://reviews.llvm.org/D52840#1265716 +-if(${LLVM_NATIVE_ARCH} MATCHES "^(AArch64|Hexagon|Sparc|SystemZ)$") ++if(${LLVM_NATIVE_ARCH} MATCHES "^(AArch64|Hexagon|LoongArch|Sparc|SystemZ)$") + set(RUN_PYTHON_TESTS FALSE) + endif() + +diff --git a/include/clang/Basic/BuiltinsLoongArch.def b/include/clang/Basic/BuiltinsLoongArch.def +new file mode 100644 +index 00000000..5606e62d +--- /dev/null ++++ b/include/clang/Basic/BuiltinsLoongArch.def +@@ -0,0 +1,1974 @@ ++//===-- BuiltinsLoongArch.def - LoongArch Builtin function database --------*- C++ -*-==// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines the LoongArch-specific builtin function database. Users of ++// this file must define the BUILTIN macro to make use of this information. ++// ++//===----------------------------------------------------------------------===// ++ ++// The format of this database matches clang/Basic/Builtins.def. ++ ++// LoongArch LSX ++ ++BUILTIN(__builtin_lsx_vclo_b, "V16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vclo_h, "V8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vclo_w, "V4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vclo_d, "V2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vflogb_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vflogb_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vpickve2gr_b, "iV16ScIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_h, "iV8SsIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_w, "iV4SiIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_d, "LLiV2SLLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vpickve2gr_bu, "iV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_hu, "iV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_wu, "iV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vpickve2gr_du, "LLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vreplvei_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vreplvei_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vreplvei_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vreplvei_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vmskltz_b, "V16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmskltz_h, "V8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmskltz_w, "V4iV4i", "nc") ++BUILTIN(__builtin_lsx_vmskltz_d, "V2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vfmadd_s, "V4fV4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmadd_d, "V2dV2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmsub_s, "V4fV4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmsub_d, "V2dV2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfnmadd_s, "V4fV4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfnmadd_d, "V2dV2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfnmsub_s, "V4fV4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfnmsub_d, "V2dV2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_caf_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_caf_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cor_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cor_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cun_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cun_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cune_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cune_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cueq_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cueq_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_ceq_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_ceq_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cne_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cne_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_clt_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_clt_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cult_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cult_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cle_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cle_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_cule_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_cule_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_saf_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_saf_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sor_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sor_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sun_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sun_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sune_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sune_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sueq_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sueq_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_seq_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_seq_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sne_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sne_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_slt_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_slt_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sult_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sult_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sle_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sle_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcmp_sule_s, "V4SiV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcmp_sule_d, "V2SLLiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vbitsel_v, "V16UcV16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vshuf_b, "V16UcV16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vldrepl_b, "V16cv*Ii", "nc") ++BUILTIN(__builtin_lsx_vldrepl_h, "V8sv*Ii", "nc") ++BUILTIN(__builtin_lsx_vldrepl_w, "V4iv*Ii", "nc") ++BUILTIN(__builtin_lsx_vldrepl_d, "V2LLiv*Ii", "nc") ++ ++BUILTIN(__builtin_lsx_vstelm_b, "vV16Scv*IiUi", "nc") ++BUILTIN(__builtin_lsx_vstelm_h, "vV8Ssv*IiUi", "nc") ++BUILTIN(__builtin_lsx_vstelm_w, "vV4Siv*IiUi", "nc") ++BUILTIN(__builtin_lsx_vstelm_d, "vV2SLLiv*IiUi", "nc") ++ ++BUILTIN(__builtin_lsx_vldx, "V16Scv*LLi", "nc") ++BUILTIN(__builtin_lsx_vstx, "vV16Scv*LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwev_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vaddwev_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vaddwev_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vaddwev_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubwev_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsubwev_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsubwev_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsubwev_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwod_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vaddwod_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vaddwod_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vaddwod_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubwod_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsubwod_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsubwod_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsubwod_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwev_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vaddwev_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vaddwev_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vaddwev_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubwev_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vsubwev_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vsubwev_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vsubwev_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwod_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vaddwod_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vaddwod_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vaddwod_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubwod_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vsubwod_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vsubwod_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vsubwod_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwev_d_wu_w, "V2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vaddwev_w_hu_h, "V4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vaddwev_h_bu_b, "V8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vaddwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddwod_d_wu_w, "V2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vaddwod_w_hu_h, "V4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vaddwod_h_bu_b, "V8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vaddwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vhaddw_q_d, "V2LLiV2LLiV2LLi", "nc") ++BUILTIN(__builtin_lsx_vhsubw_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vhaddw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc") ++BUILTIN(__builtin_lsx_vhsubw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmuh_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmuh_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmuh_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vmuh_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmuh_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmuh_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmuh_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmuh_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwev_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmulwev_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmulwev_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmulwev_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwod_d_w, "V2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmulwod_w_h, "V4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmulwod_h_b, "V8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmulwod_q_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwev_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmulwev_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmulwev_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmulwev_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwod_d_wu, "V2LLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmulwod_w_hu, "V4SiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmulwod_h_bu, "V8sV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmulwod_q_du, "V2LLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwev_d_wu_w, "V2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmulwev_w_hu_h, "V4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vmulwev_h_bu_b, "V8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vmulwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmulwod_d_wu_w, "V2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmulwod_w_hu_h, "V4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vmulwod_h_bu_b, "V8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vmulwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwev_d_w, "V2LLiV2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_w_h, "V4SiV4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_h_b, "V8sV8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwod_d_w, "V2LLiV2LLiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_w_h, "V4SiV4SiV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_h_b, "V8sV8sV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwev_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_w_hu, "V4UiV4UiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_h_bu, "V8UsV8UsV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwod_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_w_hu, "V4UiV4UiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_h_bu, "V8UsV8UsV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwev_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_w_hu_h, "V4SiV4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_h_bu_b, "V8sV8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vmaddwev_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaddwod_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_w_hu_h, "V4SiV4SiV8UsV8s", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_h_bu_b, "V8sV8sV16UcV16c", "nc") ++BUILTIN(__builtin_lsx_vmaddwod_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrln_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrln_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsrln_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsran_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsran_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsran_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrlrn_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrlrn_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsrlrn_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrarn_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrarn_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsrarn_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrln_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vssrln_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssrln_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssran_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vssran_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssran_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlrn_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vssrlrn_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssrlrn_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrarn_b_h, "V16ScV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vssrarn_h_w, "V8sV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssrarn_w_d, "V4SiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrln_bu_h, "V16UcV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssrln_hu_w, "V8UsV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssrln_wu_d, "V4UiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssran_bu_h, "V16UcV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssran_hu_w, "V8UsV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssran_wu_d, "V4UiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlrn_bu_h, "V16UcV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssrlrn_hu_w, "V8UsV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssrlrn_wu_d, "V4UiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrarn_bu_h, "V16UcV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssrarn_hu_w, "V8UsV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssrarn_wu_d, "V4UiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vandn_v, "V16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vorn_v, "V16ScV16ScV16Sc", "nc") ++ ++BUILTIN(__builtin_lsx_vfrstp_b, "V16ScV16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vfrstp_h, "V8SsV8SsV8SsV8Ss", "nc") ++ ++BUILTIN(__builtin_lsx_vadd_q, "V2LLiV2LLiV2LLi", "nc") ++BUILTIN(__builtin_lsx_vsub_q, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsigncov_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vsigncov_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vsigncov_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsigncov_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vfcvt_h_s, "V8sV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfcvt_s_d, "V4fV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftint_w_d, "V4SiV2dV2d", "nc") ++BUILTIN(__builtin_lsx_vffint_s_l, "V4fV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrz_w_d, "V4SiV2dV2d", "nc") ++BUILTIN(__builtin_lsx_vftintrp_w_d, "V4SiV2dV2d", "nc") ++BUILTIN(__builtin_lsx_vftintrm_w_d, "V4SiV2dV2d", "nc") ++BUILTIN(__builtin_lsx_vftintrne_w_d, "V4SiV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vbsrl_v, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vbsll_v, "V16cV16cIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vfrstpi_b, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vfrstpi_h, "V8sV8sV8sIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vneg_b, "V16cV16c", "nc") ++BUILTIN(__builtin_lsx_vneg_h, "V8sV8s", "nc") ++BUILTIN(__builtin_lsx_vneg_w, "V4iV4i", "nc") ++BUILTIN(__builtin_lsx_vneg_d, "V2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmskgez_b, "V16cV16c", "nc") ++BUILTIN(__builtin_lsx_vmsknz_b, "V8sV8s", "nc") ++ ++BUILTIN(__builtin_lsx_vfrintrm_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vfrintrm_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrintrp_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vfrintrp_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrintrz_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vfrintrz_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrintrne_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vfrintrne_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vffinth_d_w, "V2dV4Si", "nc") ++BUILTIN(__builtin_lsx_vffintl_d_w, "V2dV4Si", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrm_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrm_l_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrp_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrp_l_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrz_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrz_l_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrne_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrne_l_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftinth_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintl_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrmh_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrml_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrph_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrpl_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrzh_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrzl_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrneh_l_s, "V2LLiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrnel_l_s, "V2LLiV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vexth_d_w, "V2LLiV4Si", "nc") ++BUILTIN(__builtin_lsx_vexth_w_h, "V4SiV8s", "nc") ++BUILTIN(__builtin_lsx_vexth_h_b, "V8sV16c", "nc") ++BUILTIN(__builtin_lsx_vexth_q_d, "V2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vexth_du_wu, "V2ULLiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vexth_wu_hu, "V4UiV8Us", "nc") ++BUILTIN(__builtin_lsx_vexth_hu_bu, "V8UsV16Uc", "nc") ++BUILTIN(__builtin_lsx_vexth_qu_du, "V2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsllwil_d_w, "V2LLiV4SiIUi", "nc") ++BUILTIN(__builtin_lsx_vsllwil_w_h, "V4SiV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsllwil_h_b, "V8sV16cIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vextl_q_d, "V2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsllwil_du_wu, "V2ULLiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vsllwil_wu_hu, "V4UiV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vsllwil_hu_bu, "V8UsV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vextl_qu_du, "V2LLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitclri_b, "V16UcV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vbitclri_h, "V8UsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vbitclri_w, "V4UiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vbitclri_d, "V2ULLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitseti_b, "V16UcV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vbitseti_h, "V8UsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vbitseti_w, "V4UiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vbitseti_d, "V2ULLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitrevi_b, "V16UcV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vbitrevi_h, "V8UsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vbitrevi_w, "V4UiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vbitrevi_d, "V2ULLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlrni_b_h, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_h_w, "V8sV8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_w_d, "V4iV4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrani_b_h, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrani_h_w, "V8sV8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrani_w_d, "V4iV4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vextrins_b, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vextrins_h, "V8sV8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vextrins_w, "V4iV4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vextrins_d, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitseli_b, "V16UcV16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vandi_b, "V16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vori_b, "V16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vxori_b, "V16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vnori_b, "V16UcV16UcIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vldi, "V2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vpermi_w, "V4iV4iV4iIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsadd_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vsadd_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vsadd_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsadd_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssub_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vssub_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vssub_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vssub_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsadd_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vsadd_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vsadd_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vsadd_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vssub_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vssub_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vssub_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vssub_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vhaddw_h_b, "V8SsV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vhaddw_w_h, "V4SiV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vhaddw_d_w, "V2SLLiV4SiV4Si", "nc") ++ ++BUILTIN(__builtin_lsx_vhsubw_h_b, "V8SsV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vhsubw_w_h, "V4SiV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vhsubw_d_w, "V2SLLiV4SiV4Si", "nc") ++ ++BUILTIN(__builtin_lsx_vhaddw_hu_bu, "V8UsV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vhaddw_wu_hu, "V4UiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vhaddw_du_wu, "V2ULLiV4UiV4Ui", "nc") ++ ++BUILTIN(__builtin_lsx_vhsubw_hu_bu, "V8UsV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vhsubw_wu_hu, "V4UiV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vhsubw_du_wu, "V2ULLiV4UiV4Ui", "nc") ++ ++BUILTIN(__builtin_lsx_vadda_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vadda_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vadda_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vadda_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vabsd_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vabsd_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vabsd_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vabsd_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vabsd_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vabsd_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vabsd_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vabsd_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vavg_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vavg_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vavg_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vavg_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vavg_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vavg_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vavg_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vavg_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vavgr_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vavgr_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vavgr_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vavgr_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vavgr_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vavgr_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vavgr_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vavgr_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrlr_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsrlr_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrlr_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsrlr_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrar_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsrar_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrar_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsrar_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vfmax_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmax_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmin_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmin_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmaxa_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmaxa_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmina_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmina_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfclass_s, "V4iV4f", "nc") ++BUILTIN(__builtin_lsx_vfclass_d, "V2LLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrecip_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrecip_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrsqrt_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrsqrt_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfcvtl_s_h, "V4fV8s", "nc") ++BUILTIN(__builtin_lsx_vfcvtl_d_s, "V2dV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vfcvth_s_h, "V4fV8s", "nc") ++BUILTIN(__builtin_lsx_vfcvth_d_s, "V2dV4f", "nc") ++ ++BUILTIN(__builtin_lsx_vftint_w_s, "V4SiV4f", "nc") ++BUILTIN(__builtin_lsx_vftint_l_d, "V2SLLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vftint_wu_s, "V4UiV4f", "nc") ++BUILTIN(__builtin_lsx_vftint_lu_d, "V2ULLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vsrlri_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlri_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlri_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlri_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrari_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrari_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrari_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrari_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsat_b, "V16ScV16ScIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_h, "V8SsV8SsIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_w, "V4SiV4SiIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_d, "V2SLLiV2SLLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsat_bu, "V16UcV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_hu, "V8UsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_wu, "V4UiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vsat_du, "V2ULLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrlni_b_h, "V16cV16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlni_h_w, "V8sV8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlni_w_d, "V4iV4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlni_b_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_h_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_w_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_d_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlrni_bu_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_hu_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_wu_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrlrni_du_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrarni_b_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vsrarni_h_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vsrarni_w_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vsrarni_d_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrani_b_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_h_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_w_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_d_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrani_bu_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_hu_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_wu_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrani_du_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrarni_b_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_h_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_w_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_d_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrarni_bu_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_hu_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_wu_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrarni_du_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vssrlni_bu_h, "V16cV16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_hu_w, "V8sV8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_wu_d, "V4iV4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vssrlni_du_q, "V2LLiV2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vseq_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vseq_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vseq_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vseq_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsle_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vsle_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vsle_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vsle_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsle_bu, "V16ScV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vsle_hu, "V8SsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vsle_wu, "V4SiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vsle_du, "V2SLLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vslt_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vslt_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vslt_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vslt_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vslt_bu, "V16ScV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vslt_hu, "V8SsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vslt_wu, "V4SiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vslt_du, "V2SLLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vadd_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vadd_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vadd_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vadd_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsub_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsub_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsub_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsub_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmax_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmax_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmax_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmax_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmin_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmin_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmin_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmin_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmax_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmax_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmax_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmax_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmin_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmin_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmin_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmin_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmul_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmul_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmul_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmul_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmadd_b, "V16ScV16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmadd_h, "V8SsV8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmadd_w, "V4SiV4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmadd_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmsub_b, "V16ScV16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmsub_h, "V8SsV8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmsub_w, "V4SiV4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmsub_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vdiv_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vdiv_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vdiv_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vdiv_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmod_b, "V16ScV16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vmod_h, "V8SsV8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vmod_w, "V4SiV4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vmod_d, "V2SLLiV2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vdiv_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vdiv_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vdiv_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vdiv_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsll_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsll_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsll_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsll_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrl_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsrl_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsrl_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsrl_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitclr_b, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vbitclr_h, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vbitclr_w, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vbitclr_d, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vbitset_b, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vbitset_h, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vbitset_w, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vbitset_d, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpackev_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vpackev_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vpackev_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vpackev_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpackod_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vpackod_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vpackod_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vpackod_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vilvl_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vilvl_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vilvl_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vilvl_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vilvh_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vilvh_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vilvh_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vilvh_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpickev_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vpickev_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vpickev_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vpickev_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vand_v, "V16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vor_v, "V16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vbitrev_b, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vbitrev_h, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vbitrev_w, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vbitrev_d, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vmod_bu, "V16UcV16UcV16Uc", "nc") ++BUILTIN(__builtin_lsx_vmod_hu, "V8UsV8UsV8Us", "nc") ++BUILTIN(__builtin_lsx_vmod_wu, "V4UiV4UiV4Ui", "nc") ++BUILTIN(__builtin_lsx_vmod_du, "V2ULLiV2ULLiV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpickod_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vpickod_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vpickod_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vpickod_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vreplve_b, "V16cV16cUi", "nc") ++BUILTIN(__builtin_lsx_vreplve_h, "V8sV8sUi", "nc") ++BUILTIN(__builtin_lsx_vreplve_w, "V4iV4iUi", "nc") ++BUILTIN(__builtin_lsx_vreplve_d, "V2LLiV2LLiUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsra_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vsra_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vsra_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vsra_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vxor_v, "V16cV16cV16c", "nc") ++ ++BUILTIN(__builtin_lsx_vnor_v, "V16UcV16UcV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_vfadd_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfadd_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfsub_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfsub_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfmul_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfmul_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vshuf_h, "V8sV8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vshuf_w, "V4iV4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vshuf_d, "V2LLiV2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vseqi_b, "V16ScV16ScISi", "nc") ++BUILTIN(__builtin_lsx_vseqi_h, "V8SsV8SsISi", "nc") ++BUILTIN(__builtin_lsx_vseqi_w, "V4SiV4SiISi", "nc") ++BUILTIN(__builtin_lsx_vseqi_d, "V2SLLiV2SLLiISi", "nc") ++ ++BUILTIN(__builtin_lsx_vslei_b, "V16ScV16ScISi", "nc") ++BUILTIN(__builtin_lsx_vslei_h, "V8SsV8SsISi", "nc") ++BUILTIN(__builtin_lsx_vslei_w, "V4SiV4SiISi", "nc") ++BUILTIN(__builtin_lsx_vslei_d, "V2SLLiV2SLLiISi", "nc") ++ ++BUILTIN(__builtin_lsx_vslei_bu, "V16ScV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vslei_hu, "V8SsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vslei_wu, "V4SiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vslei_du, "V2SLLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vslti_b, "V16ScV16ScISi", "nc") ++BUILTIN(__builtin_lsx_vslti_h, "V8SsV8SsISi", "nc") ++BUILTIN(__builtin_lsx_vslti_w, "V4SiV4SiISi", "nc") ++BUILTIN(__builtin_lsx_vslti_d, "V2SLLiV2SLLiISi", "nc") ++ ++BUILTIN(__builtin_lsx_vslti_bu, "V16ScV16UcIUi", "nc") ++BUILTIN(__builtin_lsx_vslti_hu, "V8SsV8UsIUi", "nc") ++BUILTIN(__builtin_lsx_vslti_wu, "V4SiV4UiIUi", "nc") ++BUILTIN(__builtin_lsx_vslti_du, "V2SLLiV2ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vaddi_bu, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vaddi_hu, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vaddi_wu, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vaddi_du, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsubi_bu, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsubi_hu, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsubi_wu, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsubi_du, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaxi_b, "V16ScV16ScIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_h, "V8SsV8SsIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_w, "V4SiV4SiIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_d, "V2SLLiV2SLLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vmini_b, "V16ScV16ScIi", "nc") ++BUILTIN(__builtin_lsx_vmini_h, "V8SsV8SsIi", "nc") ++BUILTIN(__builtin_lsx_vmini_w, "V4SiV4SiIi", "nc") ++BUILTIN(__builtin_lsx_vmini_d, "V2SLLiV2SLLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vmaxi_bu, "V16UcV16UcIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_hu, "V8UsV8UsIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_wu, "V4UiV4UiIi", "nc") ++BUILTIN(__builtin_lsx_vmaxi_du, "V2ULLiV2ULLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vmini_bu, "V16UcV16UcIi", "nc") ++BUILTIN(__builtin_lsx_vmini_hu, "V8UsV8UsIi", "nc") ++BUILTIN(__builtin_lsx_vmini_wu, "V4UiV4UiIi", "nc") ++BUILTIN(__builtin_lsx_vmini_du, "V2ULLiV2ULLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vclz_b, "V16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vclz_h, "V8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vclz_w, "V4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vclz_d, "V2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vpcnt_b, "V16ScV16Sc", "nc") ++BUILTIN(__builtin_lsx_vpcnt_h, "V8SsV8Ss", "nc") ++BUILTIN(__builtin_lsx_vpcnt_w, "V4SiV4Si", "nc") ++BUILTIN(__builtin_lsx_vpcnt_d, "V2SLLiV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vfsqrt_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfsqrt_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vfrint_s, "V4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfrint_d, "V2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vffint_s_w, "V4fV4Si", "nc") ++BUILTIN(__builtin_lsx_vffint_d_l, "V2dV2SLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vffint_s_wu, "V4fV4Ui", "nc") ++BUILTIN(__builtin_lsx_vffint_d_lu, "V2dV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_vftintrz_wu_s, "V4UiV4f", "nc") ++BUILTIN(__builtin_lsx_vftintrz_lu_d, "V2ULLiV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vreplgr2vr_b, "V16Sci", "nc") ++BUILTIN(__builtin_lsx_vreplgr2vr_h, "V8Ssi", "nc") ++BUILTIN(__builtin_lsx_vreplgr2vr_w, "V4Sii", "nc") ++BUILTIN(__builtin_lsx_vreplgr2vr_d, "V2SLLiLLi", "nc") ++ ++BUILTIN(__builtin_lsx_vinsgr2vr_b, "V16ScV16SciIUi", "nc") ++BUILTIN(__builtin_lsx_vinsgr2vr_h, "V8SsV8SsiIUi", "nc") ++BUILTIN(__builtin_lsx_vinsgr2vr_w, "V4SiV4SiiIUi", "nc") ++BUILTIN(__builtin_lsx_vinsgr2vr_d, "V2SLLiV2SLLiLLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vfdiv_s, "V4fV4fV4f", "nc") ++BUILTIN(__builtin_lsx_vfdiv_d, "V2dV2dV2d", "nc") ++ ++BUILTIN(__builtin_lsx_vslli_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vslli_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vslli_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vslli_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrli_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrli_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrli_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrli_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vsrai_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vsrai_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vsrai_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vsrai_d, "V2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vshuf4i_b, "V16cV16cIUi", "nc") ++BUILTIN(__builtin_lsx_vshuf4i_h, "V8sV8sIUi", "nc") ++BUILTIN(__builtin_lsx_vshuf4i_w, "V4iV4iIUi", "nc") ++BUILTIN(__builtin_lsx_vshuf4i_d, "V2LLiV2LLiV2LLiIUi", "nc") ++ ++BUILTIN(__builtin_lsx_vrotr_b, "V16cV16cV16c", "nc") ++BUILTIN(__builtin_lsx_vrotr_h, "V8sV8sV8s", "nc") ++BUILTIN(__builtin_lsx_vrotr_w, "V4iV4iV4i", "nc") ++BUILTIN(__builtin_lsx_vrotr_d, "V2LLiV2LLiV2LLi", "nc") ++ ++BUILTIN(__builtin_lsx_vrotri_b, "V16cV16cIi", "nc") ++BUILTIN(__builtin_lsx_vrotri_h, "V8sV8sIi", "nc") ++BUILTIN(__builtin_lsx_vrotri_w, "V4iV4iIi", "nc") ++BUILTIN(__builtin_lsx_vrotri_d, "V2LLiV2LLiIi", "nc") ++ ++BUILTIN(__builtin_lsx_vld, "V16Scv*Ii", "nc") ++ ++BUILTIN(__builtin_lsx_vst, "vV16Scv*Ii", "nc") ++ ++BUILTIN(__builtin_lsx_bz_v, "iV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_bnz_v, "iV16Uc", "nc") ++ ++BUILTIN(__builtin_lsx_bz_b, "iV16Uc", "nc") ++BUILTIN(__builtin_lsx_bz_h, "iV8Us", "nc") ++BUILTIN(__builtin_lsx_bz_w, "iV4Ui", "nc") ++BUILTIN(__builtin_lsx_bz_d, "iV2ULLi", "nc") ++ ++BUILTIN(__builtin_lsx_bnz_b, "iV16Uc", "nc") ++BUILTIN(__builtin_lsx_bnz_h, "iV8Us", "nc") ++BUILTIN(__builtin_lsx_bnz_w, "iV4Ui", "nc") ++BUILTIN(__builtin_lsx_bnz_d, "iV2ULLi", "nc") ++ ++//LoongArch LASX ++ ++BUILTIN(__builtin_lasx_xvfmadd_s, "V8fV8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmadd_d, "V4dV4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmsub_s, "V8fV8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmsub_d, "V4dV4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfnmadd_s, "V8fV8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfnmadd_d, "V4dV4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfnmsub_s, "V8fV8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfnmsub_d, "V4dV4dV4dV4d", "nc") ++ ++ ++BUILTIN(__builtin_lasx_xvsll_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsll_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsll_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsll_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslli_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvslli_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvslli_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvslli_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsra_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsra_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsra_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsra_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrai_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrai_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrai_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrai_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrar_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsrar_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrar_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsrar_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrari_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrari_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrari_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrari_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrl_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsrl_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrl_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsrl_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrli_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrli_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrli_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrli_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlr_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsrlr_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrlr_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsrlr_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlri_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlri_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlri_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlri_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitclr_b, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvbitclr_h, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvbitclr_w, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvbitclr_d, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitclri_b, "V32UcV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitclri_h, "V16UsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitclri_w, "V8UiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitclri_d, "V4ULLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitset_b, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvbitset_h, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvbitset_w, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvbitset_d, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitseti_b, "V32UcV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitseti_h, "V16UsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitseti_w, "V8UiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitseti_d, "V4ULLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitrev_b, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvbitrev_h, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvbitrev_w, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvbitrev_d, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitrevi_b, "V32UcV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitrevi_h, "V16UsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitrevi_w, "V8UiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvbitrevi_d, "V4ULLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvadd_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvadd_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvadd_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvadd_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddi_bu, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvaddi_hu, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvaddi_wu, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvaddi_du, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsub_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsub_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsub_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvsub_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubi_bu, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsubi_hu, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsubi_wu, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsubi_du, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmax_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmax_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmax_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmax_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaxi_b, "V32ScV32ScIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_h, "V16SsV16SsIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_w, "V8SiV8SiIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_d, "V4SLLiV4SLLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmax_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmax_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmax_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmax_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaxi_bu, "V32UcV32UcIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_hu, "V16UsV16UsIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_wu, "V8UiV8UiIi", "nc") ++BUILTIN(__builtin_lasx_xvmaxi_du, "V4ULLiV4ULLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmin_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmin_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmin_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmin_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmini_b, "V32ScV32ScIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_h, "V16SsV16SsIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_w, "V8SiV8SiIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_d, "V4SLLiV4SLLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmin_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmin_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmin_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmin_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmini_bu, "V32UcV32UcIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_hu, "V16UsV16UsIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_wu, "V8UiV8UiIi", "nc") ++BUILTIN(__builtin_lasx_xvmini_du, "V4ULLiV4ULLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvseq_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvseq_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvseq_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvseq_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvseqi_b, "V32ScV32ScISi", "nc") ++BUILTIN(__builtin_lasx_xvseqi_h, "V16SsV16SsISi", "nc") ++BUILTIN(__builtin_lasx_xvseqi_w, "V8SiV8SiISi", "nc") ++BUILTIN(__builtin_lasx_xvseqi_d, "V4SLLiV4SLLiISi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslt_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvslt_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvslt_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvslt_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslti_b, "V32ScV32ScISi", "nc") ++BUILTIN(__builtin_lasx_xvslti_h, "V16SsV16SsISi", "nc") ++BUILTIN(__builtin_lasx_xvslti_w, "V8SiV8SiISi", "nc") ++BUILTIN(__builtin_lasx_xvslti_d, "V4SLLiV4SLLiISi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslt_bu, "V32ScV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvslt_hu, "V16SsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvslt_wu, "V8SiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvslt_du, "V4SLLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslti_bu, "V32ScV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvslti_hu, "V16SsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvslti_wu, "V8SiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvslti_du, "V4SLLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsle_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvsle_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvsle_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsle_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslei_b, "V32ScV32ScISi", "nc") ++BUILTIN(__builtin_lasx_xvslei_h, "V16SsV16SsISi", "nc") ++BUILTIN(__builtin_lasx_xvslei_w, "V8SiV8SiISi", "nc") ++BUILTIN(__builtin_lasx_xvslei_d, "V4SLLiV4SLLiISi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsle_bu, "V32ScV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvsle_hu, "V16SsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvsle_wu, "V8SiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvsle_du, "V4SLLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvslei_bu, "V32ScV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvslei_hu, "V16SsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvslei_wu, "V8SiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvslei_du, "V4SLLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsat_b, "V32ScV32ScIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_h, "V16SsV16SsIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_w, "V8SiV8SiIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_d, "V4SLLiV4SLLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsat_bu, "V32UcV32UcIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_hu, "V16UsV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_wu, "V8UiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvsat_du, "V4ULLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvadda_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvadda_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvadda_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvadda_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsadd_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvsadd_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvsadd_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsadd_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsadd_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvsadd_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvsadd_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvsadd_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvavg_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvavg_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvavg_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvavg_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvavg_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvavg_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvavg_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvavg_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvavgr_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvavgr_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvavgr_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvavgr_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvavgr_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvavgr_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvavgr_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvavgr_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssub_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvssub_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvssub_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssub_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssub_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvssub_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssub_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssub_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvabsd_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvabsd_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvabsd_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvabsd_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvabsd_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvabsd_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvabsd_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvabsd_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmul_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmul_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmul_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmul_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmadd_b, "V32ScV32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmadd_h, "V16SsV16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmadd_w, "V8SiV8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmadd_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmsub_b, "V32ScV32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmsub_h, "V16SsV16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmsub_w, "V8SiV8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmsub_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvdiv_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvdiv_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvdiv_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvdiv_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvdiv_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvdiv_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvdiv_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvdiv_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvhaddw_h_b, "V16SsV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvhaddw_w_h, "V8SiV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvhaddw_d_w, "V4SLLiV8SiV8Si", "nc") ++ ++BUILTIN(__builtin_lasx_xvhaddw_hu_bu, "V16UsV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvhaddw_wu_hu, "V8UiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvhaddw_du_wu, "V4ULLiV8UiV8Ui", "nc") ++ ++BUILTIN(__builtin_lasx_xvhsubw_h_b, "V16SsV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_w_h, "V8SiV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_d_w, "V4SLLiV8SiV8Si", "nc") ++ ++BUILTIN(__builtin_lasx_xvhsubw_hu_bu, "V16UsV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_wu_hu, "V8UiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_du_wu, "V4ULLiV8UiV8Ui", "nc") ++ ++BUILTIN(__builtin_lasx_xvmod_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvmod_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvmod_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmod_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmod_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmod_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmod_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmod_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvrepl128vei_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvrepl128vei_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvrepl128vei_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvrepl128vei_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickev_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvpickev_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvpickev_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvpickev_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickod_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvpickod_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvpickod_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvpickod_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvilvh_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvilvh_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvilvh_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvilvh_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvilvl_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvilvl_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvilvl_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvilvl_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpackev_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvpackev_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvpackev_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvpackev_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpackod_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvpackod_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvpackod_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvpackod_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvshuf_b, "V32UcV32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvshuf_h, "V16sV16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvshuf_w, "V8iV8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvshuf_d, "V4LLiV4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvand_v, "V32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvandi_b, "V32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvor_v, "V32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvori_b, "V32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvnor_v, "V32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvnori_b, "V32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvxor_v, "V32cV32cV32c", "nc") ++ ++BUILTIN(__builtin_lasx_xvxori_b, "V32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitsel_v, "V32UcV32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvbitseli_b, "V32UcV32UcV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvshuf4i_b, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvshuf4i_h, "V16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvshuf4i_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvshuf4i_d, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvreplgr2vr_b, "V32Sci", "nc") ++BUILTIN(__builtin_lasx_xvreplgr2vr_h, "V16Ssi", "nc") ++BUILTIN(__builtin_lasx_xvreplgr2vr_w, "V8Sii", "nc") ++BUILTIN(__builtin_lasx_xvreplgr2vr_d, "V4SLLiLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpcnt_b, "V32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvpcnt_h, "V16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvpcnt_w, "V8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvpcnt_d, "V4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvclo_b, "V32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvclo_h, "V16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvclo_w, "V8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvclo_d, "V4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvclz_b, "V32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvclz_h, "V16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvclz_w, "V8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvclz_d, "V4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_caf_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_caf_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cor_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cor_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cun_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cun_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cune_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cune_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cueq_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cueq_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_ceq_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_ceq_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cne_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cne_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_clt_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_clt_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cult_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cult_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cle_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cle_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_cule_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_cule_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_saf_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_saf_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sor_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sor_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sun_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sun_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sune_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sune_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sueq_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sueq_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_seq_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_seq_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sne_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sne_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_slt_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_slt_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sult_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sult_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sle_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sle_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcmp_sule_s, "V8SiV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcmp_sule_d, "V4SLLiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfadd_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfadd_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfsub_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfsub_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmul_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmul_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfdiv_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfdiv_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcvt_h_s, "V16sV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfcvt_s_d, "V8fV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmin_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmin_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmina_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmina_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmax_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmax_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfmaxa_s, "V8fV8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfmaxa_d, "V4dV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfclass_s, "V8iV8f", "nc") ++BUILTIN(__builtin_lasx_xvfclass_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfsqrt_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfsqrt_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrecip_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrecip_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrint_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrint_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrsqrt_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrsqrt_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvflogb_s, "V8fV8f", "nc") ++BUILTIN(__builtin_lasx_xvflogb_d, "V4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcvth_s_h, "V8fV16s", "nc") ++BUILTIN(__builtin_lasx_xvfcvth_d_s, "V4dV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvfcvtl_s_h, "V8fV16s", "nc") ++BUILTIN(__builtin_lasx_xvfcvtl_d_s, "V4dV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvftint_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftint_l_d, "V4SLLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftint_wu_s, "V8UiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftint_lu_d, "V4ULLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrz_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrz_l_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrz_wu_s, "V8UiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrz_lu_d, "V4ULLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvffint_s_w, "V8fV8Si", "nc") ++BUILTIN(__builtin_lasx_xvffint_d_l, "V4dV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvffint_s_wu, "V8fV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvffint_d_lu, "V4dV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvreplve_b, "V32cV32cUi", "nc") ++BUILTIN(__builtin_lasx_xvreplve_h, "V16sV16sUi", "nc") ++BUILTIN(__builtin_lasx_xvreplve_w, "V8iV8iUi", "nc") ++BUILTIN(__builtin_lasx_xvreplve_d, "V4LLiV4LLiUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpermi_w, "V8iV8iV8iIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvandn_v, "V32UcV32UcV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xvneg_b, "V32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvneg_h, "V16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvneg_w, "V8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvneg_d, "V4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmuh_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmuh_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmuh_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvmuh_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmuh_bu, "V32UcV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmuh_hu, "V16UsV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmuh_wu, "V8UiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmuh_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsllwil_d_w, "V4LLiV8SiIUi", "nc") ++BUILTIN(__builtin_lasx_xvsllwil_w_h, "V8SiV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsllwil_h_b, "V16sV32cIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsllwil_du_wu, "V4ULLiV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvsllwil_wu_hu, "V8UiV16UsIUi", "nc") ++BUILTIN(__builtin_lasx_xvsllwil_hu_bu, "V16UsV32UcIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsran_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsran_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsran_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssran_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvssran_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssran_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssran_bu_h, "V32UcV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssran_hu_w, "V16UsV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssran_wu_d, "V8UiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrarn_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrarn_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsrarn_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrarn_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvssrarn_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssrarn_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrarn_bu_h, "V32UcV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssrarn_hu_w, "V16UsV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssrarn_wu_d, "V8UiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrln_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrln_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsrln_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrln_bu_h, "V32UcV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssrln_hu_w, "V16UsV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssrln_wu_d, "V8UiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlrn_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsrlrn_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsrlrn_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlrn_bu_h, "V32UcV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvssrlrn_hu_w, "V16UsV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvssrlrn_wu_d, "V8UiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrstpi_b, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvfrstpi_h, "V16sV16sV16sIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrstp_b, "V32ScV32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvfrstp_h, "V16SsV16SsV16SsV16Ss", "nc") ++ ++BUILTIN(__builtin_lasx_xvbsrl_v, "V32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvbsll_v, "V32cV32cIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvextrins_b, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvextrins_h, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvextrins_w, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvextrins_d, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmskltz_b, "V32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmskltz_h, "V16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmskltz_w, "V8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvmskltz_d, "V4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsigncov_b, "V32ScV32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvsigncov_h, "V16SsV16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvsigncov_w, "V8SiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsigncov_d, "V4SLLiV4SLLiV4SLLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrne_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrne_l_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrp_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrp_l_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrm_w_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrm_l_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftint_w_d, "V8SiV4dV4d", "nc") ++BUILTIN(__builtin_lasx_xvffint_s_l, "V8fV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrz_w_d, "V8SiV4dV4d", "nc") ++BUILTIN(__builtin_lasx_xvftintrp_w_d, "V8SiV4dV4d", "nc") ++BUILTIN(__builtin_lasx_xvftintrm_w_d, "V8SiV4dV4d", "nc") ++BUILTIN(__builtin_lasx_xvftintrne_w_d, "V8SiV4dV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvftinth_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintl_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvffinth_d_w, "V4dV8Si", "nc") ++BUILTIN(__builtin_lasx_xvffintl_d_w, "V4dV8Si", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrzh_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrzl_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrph_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrpl_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrmh_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrml_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvftintrneh_l_s, "V4LLiV8f", "nc") ++BUILTIN(__builtin_lasx_xvftintrnel_l_s, "V4LLiV8f", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrintrne_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrintrne_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrintrz_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrintrz_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrintrp_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrintrp_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvfrintrm_s, "V8SiV8f", "nc") ++BUILTIN(__builtin_lasx_xvfrintrm_d, "V4LLiV4d", "nc") ++ ++BUILTIN(__builtin_lasx_xvld, "V32Scv*Ii", "nc") ++ ++BUILTIN(__builtin_lasx_xvst, "vV32Scv*Ii", "nc") ++ ++BUILTIN(__builtin_lasx_xvstelm_b, "vV32Scv*IiUi", "nc") ++BUILTIN(__builtin_lasx_xvstelm_h, "vV16Ssv*IiUi", "nc") ++BUILTIN(__builtin_lasx_xvstelm_w, "vV8Siv*IiUi", "nc") ++BUILTIN(__builtin_lasx_xvstelm_d, "vV4SLLiv*IiUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvinsve0_w, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvinsve0_d, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickve_w, "V8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvpickve_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlrn_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvssrlrn_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssrlrn_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrln_b_h, "V32ScV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvssrln_h_w, "V16sV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvssrln_w_d, "V8SiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvorn_v, "V32ScV32ScV32Sc", "nc") ++ ++BUILTIN(__builtin_lasx_xvldi, "V4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvldx, "V32Scv*LLi", "nc") ++BUILTIN(__builtin_lasx_xvstx, "vV32Scv*LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvinsgr2vr_w, "V8SiV8SiiIUi", "nc") ++BUILTIN(__builtin_lasx_xvinsgr2vr_d, "V4SLLiV4SLLiLLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvreplve0_b, "V32ScV32Sc", "nc") ++BUILTIN(__builtin_lasx_xvreplve0_h, "V16SsV16Ss", "nc") ++BUILTIN(__builtin_lasx_xvreplve0_w, "V8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvreplve0_d, "V4SLLiV4SLLi", "nc") ++BUILTIN(__builtin_lasx_xvreplve0_q, "V32ScV32Sc", "nc") ++ ++BUILTIN(__builtin_lasx_vext2xv_d_w, "V4LLiV8Si", "nc") ++BUILTIN(__builtin_lasx_vext2xv_w_h, "V8SiV16s", "nc") ++BUILTIN(__builtin_lasx_vext2xv_h_b, "V16sV32c", "nc") ++ ++BUILTIN(__builtin_lasx_vext2xv_d_h, "V4LLiV16s", "nc") ++BUILTIN(__builtin_lasx_vext2xv_w_b, "V8SiV32c", "nc") ++BUILTIN(__builtin_lasx_vext2xv_d_b, "V4LLiV32c", "nc") ++ ++BUILTIN(__builtin_lasx_vext2xv_du_wu, "V4LLiV8Si", "nc") ++BUILTIN(__builtin_lasx_vext2xv_wu_hu, "V8SiV16s", "nc") ++BUILTIN(__builtin_lasx_vext2xv_hu_bu, "V16sV32c", "nc") ++ ++BUILTIN(__builtin_lasx_vext2xv_du_hu, "V4LLiV16s", "nc") ++BUILTIN(__builtin_lasx_vext2xv_wu_bu, "V8SiV32c", "nc") ++BUILTIN(__builtin_lasx_vext2xv_du_bu, "V4LLiV32c", "nc") ++ ++BUILTIN(__builtin_lasx_xvpermi_q, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvpermi_d, "V4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvperm_w, "V8iV8iV8i", "nc") ++ ++BUILTIN(__builtin_lasx_xvldrepl_b, "V32cv*Ii", "nc") ++BUILTIN(__builtin_lasx_xvldrepl_h, "V16sv*Ii", "nc") ++BUILTIN(__builtin_lasx_xvldrepl_w, "V8iv*Ii", "nc") ++BUILTIN(__builtin_lasx_xvldrepl_d, "V4LLiv*Ii", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickve2gr_w, "iV8SiIUi", "nc") ++BUILTIN(__builtin_lasx_xvpickve2gr_d, "LLiV4SLLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvpickve2gr_wu, "iV8UiIUi", "nc") ++BUILTIN(__builtin_lasx_xvpickve2gr_du, "LLiV4ULLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwev_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwev_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubwev_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubwev_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvsubwev_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwev_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwev_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwod_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwod_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubwod_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsubwod_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvsubwod_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwod_d_w, "V4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_w_h, "V8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_h_b, "V16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwod_d_wu, "V4LLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_w_hu, "V8SiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_h_bu, "V16sV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_q_du, "V4LLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwev_d_wu_w, "V4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_w_hu_h, "V8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_h_bu_b, "V16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvaddwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwev_d_wu_w, "V4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_w_hu_h, "V8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_h_bu_b, "V16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvmulwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvaddwod_d_wu_w, "V4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_w_hu_h, "V8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_h_bu_b, "V16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvaddwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmulwod_d_wu_w, "V4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_w_hu_h, "V8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_h_bu_b, "V16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvmulwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvhaddw_q_d, "V4LLiV4LLiV4LLi", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_q_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvhaddw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc") ++BUILTIN(__builtin_lasx_xvhsubw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwev_d_w, "V4LLiV4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_w_h, "V8SiV8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_h_b, "V16sV16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwev_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_w_hu, "V8UiV8UiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_h_bu, "V16UsV16UsV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwod_d_w, "V4LLiV4LLiV8SiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_w_h, "V8SiV8SiV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_h_b, "V16sV16sV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwod_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_w_hu, "V8UiV8UiV16UsV16Us", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_h_bu, "V16UsV16UsV32UcV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwev_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_w_hu_h, "V8SiV8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_h_bu_b, "V16sV16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvmaddwev_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmaddwod_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_w_hu_h, "V8SiV8SiV16UsV16s", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_h_bu_b, "V16sV16sV32UcV32c", "nc") ++BUILTIN(__builtin_lasx_xvmaddwod_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvrotr_b, "V32cV32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvrotr_h, "V16sV16sV16s", "nc") ++BUILTIN(__builtin_lasx_xvrotr_w, "V8iV8iV8i", "nc") ++BUILTIN(__builtin_lasx_xvrotr_d, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvadd_q, "V4LLiV4LLiV4LLi", "nc") ++BUILTIN(__builtin_lasx_xvsub_q, "V4LLiV4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvmskgez_b, "V32cV32c", "nc") ++BUILTIN(__builtin_lasx_xvmsknz_b, "V16sV16s", "nc") ++ ++BUILTIN(__builtin_lasx_xvexth_d_w, "V4LLiV8Si", "nc") ++BUILTIN(__builtin_lasx_xvexth_w_h, "V8SiV16s", "nc") ++BUILTIN(__builtin_lasx_xvexth_h_b, "V16sV32c", "nc") ++BUILTIN(__builtin_lasx_xvexth_q_d, "V4LLiV4LLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvexth_du_wu, "V4ULLiV8Ui", "nc") ++BUILTIN(__builtin_lasx_xvexth_wu_hu, "V8UiV16Us", "nc") ++BUILTIN(__builtin_lasx_xvexth_hu_bu, "V16UsV32Uc", "nc") ++BUILTIN(__builtin_lasx_xvexth_qu_du, "V4ULLiV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvrotri_b, "V32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvrotri_h, "V16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvrotri_w, "V8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvrotri_d, "V4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlni_b_h, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlni_h_w, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlni_w_d, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrlrni_b_h, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlrni_h_w, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlrni_w_d, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlni_b_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_h_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_w_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_d_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlni_bu_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_hu_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_wu_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlni_du_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlrni_b_h, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_h_w, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_w_d, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrlrni_bu_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_hu_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_wu_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrlrni_du_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrani_b_h, "V32cV32cV32cIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrani_h_w, "V16sV16sV16sIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrani_w_d, "V8iV8iV8iIUi", "nc") ++BUILTIN(__builtin_lasx_xvsrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc") ++ ++BUILTIN(__builtin_lasx_xvsrarni_b_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvsrarni_h_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvsrarni_w_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvsrarni_d_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrani_b_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_h_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_w_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_d_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrani_bu_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_hu_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_wu_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrani_du_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrarni_b_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_h_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_w_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_d_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xvssrarni_bu_h, "V32cV32cV32cIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_hu_w, "V16sV16sV16sIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_wu_d, "V8iV8iV8iIi", "nc") ++BUILTIN(__builtin_lasx_xvssrarni_du_q, "V4LLiV4LLiV4LLiIi", "nc") ++ ++BUILTIN(__builtin_lasx_xbz_v, "iV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xbnz_v, "iV32Uc", "nc") ++ ++BUILTIN(__builtin_lasx_xbz_b, "iV32Uc", "nc") ++BUILTIN(__builtin_lasx_xbz_h, "iV16Us", "nc") ++BUILTIN(__builtin_lasx_xbz_w, "iV8Ui", "nc") ++BUILTIN(__builtin_lasx_xbz_d, "iV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc") ++BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc") ++BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc") ++BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc") ++ ++BUILTIN(__builtin_lasx_xvextl_q_d, "V4LLiV4LLi", "nc") ++BUILTIN(__builtin_lasx_xvextl_qu_du, "V4LLiV4ULLi", "nc") ++ ++ ++// LoongArch BASE ++ ++BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc") ++BUILTIN(__builtin_loongarch_csrrd, "UiIUi", "nc") ++BUILTIN(__builtin_loongarch_dcsrrd, "ULiIULi", "nc") ++BUILTIN(__builtin_loongarch_csrwr, "UiUiIUi", "nc") ++BUILTIN(__builtin_loongarch_dcsrwr, "ULiULiIULi", "nc") ++BUILTIN(__builtin_loongarch_csrxchg, "UiUiUiIUi", "nc") ++BUILTIN(__builtin_loongarch_dcsrxchg, "ULiULiULiIULi", "nc") ++BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrrd_d, "ULiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc") ++BUILTIN(__builtin_loongarch_iocsrwr_d, "vULiUi", "nc") ++BUILTIN(__builtin_loongarch_cacop, "viUii", "nc") ++BUILTIN(__builtin_loongarch_dcacop, "viULiLi", "nc") ++BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crc_w_d_w, "iLii", "nc") ++BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc") ++BUILTIN(__builtin_loongarch_crcc_w_d_w, "iLii", "nc") ++BUILTIN(__builtin_loongarch_tlbclr, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbflush, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbfill, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbrd, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbwr, "v", "nc") ++BUILTIN(__builtin_loongarch_tlbsrch, "v", "nc") ++BUILTIN(__builtin_loongarch_syscall, "vIULi", "nc") ++BUILTIN(__builtin_loongarch_break, "vIULi", "nc") ++BUILTIN(__builtin_loongarch_asrtle_d, "vLiLi", "nc") ++BUILTIN(__builtin_loongarch_asrtgt_d, "vLiLi", "nc") ++BUILTIN(__builtin_loongarch_dbar, "vIULi", "nc") ++BUILTIN(__builtin_loongarch_ibar, "vIULi", "nc") ++#undef BUILTIN +diff --git a/include/clang/Basic/DiagnosticDriverKinds.td b/include/clang/Basic/DiagnosticDriverKinds.td +index 3efedbe0..abcdec74 100644 +--- a/include/clang/Basic/DiagnosticDriverKinds.td ++++ b/include/clang/Basic/DiagnosticDriverKinds.td +@@ -193,6 +193,8 @@ def err_drv_force_crash : Error< + "failing because %select{environment variable 'FORCE_CLANG_DIAGNOSTICS_CRASH' is set|'-gen-reproducer' is used}0">; + def err_drv_invalid_mfloat_abi : Error< + "invalid float ABI '%0'">; ++def err_drv_invalid_loongarch_mfpu : Error< ++ "invalid loongarch FPU value '%0'. Please specify FPU = 64,32 or none">; + def err_drv_invalid_mtp : Error< + "invalid thread pointer reading mode '%0'">; + def err_drv_missing_arg_mtp : Error< +diff --git a/include/clang/Basic/TargetBuiltins.h b/include/clang/Basic/TargetBuiltins.h +index d4ea8e98..5f3851af 100644 +--- a/include/clang/Basic/TargetBuiltins.h ++++ b/include/clang/Basic/TargetBuiltins.h +@@ -145,6 +145,16 @@ namespace clang { + }; + } // namespace RISCV + ++ /// LoongArch builtins ++ namespace LoongArch { ++ enum { ++ LastTIBuiltin = clang::Builtin::FirstTSBuiltin-1, ++#define BUILTIN(ID, TYPE, ATTRS) BI##ID, ++#include "clang/Basic/BuiltinsLoongArch.def" ++ LastTSBuiltin ++ }; ++ } // namespace LoongArch ++ + /// Flags to identify the types for overloaded Neon builtins. + /// + /// These must be kept in sync with the flags in utils/TableGen/NeonEmitter.h. +@@ -336,7 +346,8 @@ namespace clang { + PPC::LastTSBuiltin, NVPTX::LastTSBuiltin, AMDGPU::LastTSBuiltin, + X86::LastTSBuiltin, VE::LastTSBuiltin, RISCV::LastTSBuiltin, + Hexagon::LastTSBuiltin, Mips::LastTSBuiltin, XCore::LastTSBuiltin, +- SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin}); ++ SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin, ++ LoongArch::LastTSBuiltin}); + + } // end namespace clang. + +diff --git a/include/clang/Basic/TargetCXXABI.def b/include/clang/Basic/TargetCXXABI.def +index 9501cca7..8ea4bece 100644 +--- a/include/clang/Basic/TargetCXXABI.def ++++ b/include/clang/Basic/TargetCXXABI.def +@@ -88,6 +88,12 @@ ITANIUM_CXXABI(GenericAArch64, "aarch64") + /// - representation of member function pointers adjusted as in ARM. + ITANIUM_CXXABI(GenericMIPS, "mips") + ++/// The generic LoongArch ABI is a modified version of the Itanium ABI. ++/// ++/// At the moment, only change from the generic ABI in this case is: ++/// - representation of member function pointers adjusted as in ARM. ++ITANIUM_CXXABI(GenericLoongArch, "loongarch") ++ + /// The WebAssembly ABI is a modified version of the Itanium ABI. + /// + /// The changes from the Itanium ABI are: +diff --git a/include/clang/Basic/TargetCXXABI.h b/include/clang/Basic/TargetCXXABI.h +index e727f85e..507cf580 100644 +--- a/include/clang/Basic/TargetCXXABI.h ++++ b/include/clang/Basic/TargetCXXABI.h +@@ -102,6 +102,9 @@ public: + case GenericAArch64: + return T.isAArch64(); + ++ case GenericLoongArch: ++ return T.isLoongArch(); ++ + case GenericMIPS: + return T.isMIPS(); + +@@ -166,6 +169,7 @@ public: + case Fuchsia: + case GenericARM: + case GenericAArch64: ++ case GenericLoongArch: + case GenericMIPS: + // TODO: ARM-style pointers to member functions put the discriminator in + // the this adjustment, so they don't require functions to have any +@@ -250,6 +254,7 @@ public: + case GenericItanium: + case iOS: // old iOS compilers did not follow this rule + case Microsoft: ++ case GenericLoongArch: + case GenericMIPS: + case XL: + return true; +@@ -288,6 +293,7 @@ public: + case GenericAArch64: + case GenericARM: + case iOS: ++ case GenericLoongArch: + case GenericMIPS: + case XL: + return UseTailPaddingUnlessPOD03; +diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td +index e0d21584..bbca2ae5 100644 +--- a/include/clang/Driver/Options.td ++++ b/include/clang/Driver/Options.td +@@ -176,6 +176,8 @@ def m_x86_Features_Group : OptionGroup<"">, + Group, Flags<[CoreOption]>, DocName<"X86">; + def m_riscv_Features_Group : OptionGroup<"">, + Group, DocName<"RISCV">; ++def m_loongarch_Features_Group : OptionGroup<"">, ++ Group, DocName<"LoongArch">; + + def m_libc_Group : OptionGroup<"">, Group, + Flags<[HelpHidden]>; +@@ -3315,12 +3317,15 @@ def mcmodel_EQ_medany : Flag<["-"], "mcmodel=medany">, Group, Group, + HelpText<"Enable use of experimental RISC-V extensions.">; + +-def munaligned_access : Flag<["-"], "munaligned-access">, Group, +- HelpText<"Allow memory accesses to be unaligned (AArch32/AArch64 only)">; +-def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group, +- HelpText<"Force all memory accesses to be aligned (AArch32/AArch64 only)">; ++def munaligned_access : Flag<["-"], "munaligned-access">, Group, ++ HelpText<"Allow memory accesses to be unaligned">; ++def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group, ++ HelpText<"Force all memory accesses to be aligned">; + def mstrict_align : Flag<["-"], "mstrict-align">, Alias, Flags<[CC1Option,HelpHidden]>, + HelpText<"Force all memory accesses to be aligned (same as mno-unaligned-access)">; ++def mno_strict_align : Flag<["-"], "mno-strict-align">, Group, ++ Flags<[CC1Option,HelpHidden]>, Alias, ++ HelpText<"Allow memory accesses to be unaligned (LoongArch only, same as munaligned-access)">; + def mno_thumb : Flag<["-"], "mno-thumb">, Group; + def mrestrict_it: Flag<["-"], "mrestrict-it">, Group, + HelpText<"Disallow generation of deprecated IT blocks for ARMv8. It is on by default for ARMv8 Thumb mode.">; +@@ -3616,6 +3621,14 @@ def mstack_protector_guard_reg_EQ : Joined<["-"], "mstack-protector-guard-reg="> + def mfentry : Flag<["-"], "mfentry">, HelpText<"Insert calls to fentry at function entry (x86/SystemZ only)">, + Flags<[CC1Option]>, Group, + MarshallingInfoFlag>; ++def mlsx : Flag<["-"], "mlsx">, Group, ++ HelpText<"Use LARCH Loongson LSX instructions.">; ++def mno_lsx : Flag<["-"], "mno-lsx">, Group, ++ HelpText<"Disable LARCH Loongson LSX instructions.">; ++def mlasx : Flag<["-"], "mlasx">, Group, ++ HelpText<"Enable LARCH Loongson LASX instructions.">; ++def mno_lasx : Flag<["-"], "mno-lasx">, Group, ++ HelpText<"Disable LARCH Loongson LASX instructions.">; + def mnop_mcount : Flag<["-"], "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">, + Flags<[CC1Option]>, Group, + MarshallingInfoFlag>; +diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h +index 4b609f4b..c6ee1053 100644 +--- a/include/clang/Sema/Sema.h ++++ b/include/clang/Sema/Sema.h +@@ -12749,6 +12749,9 @@ private: + bool CheckRISCVLMUL(CallExpr *TheCall, unsigned ArgNum); + bool CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, + CallExpr *TheCall); ++ bool CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, ++ unsigned BuiltinID, ++ CallExpr *TheCall); + + bool SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall); + bool SemaBuiltinVAStartARMMicrosoft(CallExpr *Call); +diff --git a/include/clang/module.modulemap b/include/clang/module.modulemap +index 2b73cd54..efc6aa21 100644 +--- a/include/clang/module.modulemap ++++ b/include/clang/module.modulemap +@@ -42,6 +42,7 @@ module Clang_Basic { + textual header "Basic/BuiltinsHexagon.def" + textual header "Basic/BuiltinsHexagonDep.def" + textual header "Basic/BuiltinsHexagonMapCustomDep.def" ++ textual header "Basic/BuiltinsLoongArch.def" + textual header "Basic/BuiltinsMips.def" + textual header "Basic/BuiltinsNEON.def" + textual header "Basic/BuiltinsNVPTX.def" +diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp +index e4b3827b..e56cd4ce 100644 +--- a/lib/AST/ASTContext.cpp ++++ b/lib/AST/ASTContext.cpp +@@ -901,6 +901,7 @@ CXXABI *ASTContext::createCXXABI(const TargetInfo &T) { + case TargetCXXABI::iOS: + case TargetCXXABI::WatchOS: + case TargetCXXABI::GenericAArch64: ++ case TargetCXXABI::GenericLoongArch: + case TargetCXXABI::GenericMIPS: + case TargetCXXABI::GenericItanium: + case TargetCXXABI::WebAssembly: +@@ -11651,6 +11652,7 @@ MangleContext *ASTContext::createMangleContext(const TargetInfo *T) { + case TargetCXXABI::GenericAArch64: + case TargetCXXABI::GenericItanium: + case TargetCXXABI::GenericARM: ++ case TargetCXXABI::GenericLoongArch: + case TargetCXXABI::GenericMIPS: + case TargetCXXABI::iOS: + case TargetCXXABI::WebAssembly: +diff --git a/lib/Basic/CMakeLists.txt b/lib/Basic/CMakeLists.txt +index 40de9433..ac6bc570 100644 +--- a/lib/Basic/CMakeLists.txt ++++ b/lib/Basic/CMakeLists.txt +@@ -78,6 +78,7 @@ add_clang_library(clangBasic + Targets/Hexagon.cpp + Targets/Lanai.cpp + Targets/Le64.cpp ++ Targets/LoongArch.cpp + Targets/M68k.cpp + Targets/MSP430.cpp + Targets/Mips.cpp +diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp +index 994a491c..35f577f4 100644 +--- a/lib/Basic/Targets.cpp ++++ b/lib/Basic/Targets.cpp +@@ -22,6 +22,7 @@ + #include "Targets/Hexagon.h" + #include "Targets/Lanai.h" + #include "Targets/Le64.h" ++#include "Targets/LoongArch.h" + #include "Targets/M68k.h" + #include "Targets/MSP430.h" + #include "Targets/Mips.h" +@@ -325,6 +326,25 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple, + case llvm::Triple::le64: + return new Le64TargetInfo(Triple, Opts); + ++#if 0 ++ //TODO: support it in future ++ case llvm::Triple::loongarch32: ++ switch (os) { ++ case llvm::Triple::Linux: ++ return new LinuxTargetInfo(Triple, Opts); ++ default: ++ return new LoongArchTargetInfo(Triple, Opts); ++ } ++#endif ++ ++ case llvm::Triple::loongarch64: ++ switch (os) { ++ case llvm::Triple::Linux: ++ return new LinuxTargetInfo(Triple, Opts); ++ default: ++ return new LoongArchTargetInfo(Triple, Opts); ++ } ++ + case llvm::Triple::ppc: + if (Triple.isOSDarwin()) + return new DarwinPPC32TargetInfo(Triple, Opts); +diff --git a/lib/Basic/Targets/LoongArch.cpp b/lib/Basic/Targets/LoongArch.cpp +new file mode 100644 +index 00000000..f94d9f09 +--- /dev/null ++++ b/lib/Basic/Targets/LoongArch.cpp +@@ -0,0 +1,149 @@ ++//===--- LoongArch.cpp - Implement LoongArch target feature support -----------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file implements LoongArch TargetInfo objects. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArch.h" ++#include "Targets.h" ++#include "clang/Basic/Diagnostic.h" ++#include "clang/Basic/MacroBuilder.h" ++#include "clang/Basic/TargetBuiltins.h" ++#include "llvm/ADT/StringSwitch.h" ++ ++using namespace clang; ++using namespace clang::targets; ++ ++const Builtin::Info LoongArchTargetInfo::BuiltinInfo[] = { ++#define BUILTIN(ID, TYPE, ATTRS) \ ++ {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, ++#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \ ++ {#ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr}, ++#include "clang/Basic/BuiltinsLoongArch.def" ++}; ++ ++bool LoongArchTargetInfo::processorSupportsGPR64() const { ++ return llvm::StringSwitch(CPU) ++ .Cases("la464", "generic-la64", true) ++ .Default(false); ++ return false; ++} ++ ++static constexpr llvm::StringLiteral ValidCPUNames[] = { ++ {"la464"}, {"generic-la64"}, {"generic-la32"}}; ++ ++bool LoongArchTargetInfo::isValidCPUName(StringRef Name) const { ++ return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames); ++} ++ ++void LoongArchTargetInfo::fillValidCPUList( ++ SmallVectorImpl &Values) const { ++ Values.append(std::begin(ValidCPUNames), std::end(ValidCPUNames)); ++} ++ ++void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, ++ MacroBuilder &Builder) const { ++ Builder.defineMacro("__loongarch__"); ++ ++ if (ABI == "lp64d" || ABI == "lp64s" || ABI == "lp64f") { ++ Builder.defineMacro("__loongarch_lp64"); ++ Builder.defineMacro("__loongarch64"); ++ Builder.defineMacro("_ABILP64", "3"); ++ Builder.defineMacro("_LOONGARCH_SIM", "_ABILP64"); ++ } else ++ llvm_unreachable("Invalid ABI."); ++ ++ Builder.defineMacro("__REGISTER_PREFIX__", ""); ++ ++ if (HasLSX) ++ Builder.defineMacro("__loongarch_sx", Twine(1)); ++ ++ if (HasLASX) ++ Builder.defineMacro("__loongarch_asx", Twine(1)); ++ ++ Builder.defineMacro("_LOONGARCH_SZPTR", Twine(getPointerWidth(0))); ++ Builder.defineMacro("_LOONGARCH_SZINT", Twine(getIntWidth())); ++ Builder.defineMacro("_LOONGARCH_SZLONG", Twine(getLongWidth())); ++ ++ Builder.defineMacro("_LOONGARCH_TUNE", "\"" + CPU + "\""); ++ Builder.defineMacro("_LOONGARCH_TUNE_" + StringRef(CPU).upper()); ++ ++ Builder.defineMacro("_LOONGARCH_ARCH", "\"" + getTriple().getArchName() + "\""); ++ Builder.defineMacro("_LOONGARCH_ARCH_" + StringRef(getTriple().getArchName()).upper()); ++ ++ Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); ++ Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); ++ Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); ++ ++ // 32-bit loongarch processors don't have the necessary ll.d/sc.d instructions ++ // found in 64-bit processors. ++ if (ABI == "lp64d" || ABI == "lp64s" || ABI == "lp64f") ++ Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); ++ ++ // Bit-width of general purpose registers. ++ Builder.defineMacro("__loongarch_grlen", Twine(getRegisterWidth())); ++ ++ // Bit-width of floating-point registers. The possible values for ++ // this macro are 0, 32 and 64. 0 if there is no FPU. ++ if (HasBasicD || HasBasicF) ++ Builder.defineMacro("__loongarch_frlen", HasBasicD ? "64" : "32"); ++ else ++ Builder.defineMacro("__loongarch_frlen", "0"); ++ ++ // FIXME: Defined if floating-point/extended ABI type is single or double. ++ if (ABI == "lp64d" || ABI == "lp64f") ++ Builder.defineMacro("__loongarch_hard_float"); ++ ++ // FIXME: Defined if floating-point/extended ABI type is double. ++ if (ABI == "lp64d") ++ Builder.defineMacro("__loongarch_double_float"); ++ ++ // FIXME: Defined if floating-point/extended ABI type is single. ++ if (ABI == "lp64f") ++ Builder.defineMacro("__loongarch_single_float"); ++ ++ // FIXME: Defined if floating-point/extended ABI type is soft. ++ if (ABI == "lp64s") ++ Builder.defineMacro("__loongarch_soft_float"); ++} ++ ++bool LoongArchTargetInfo::hasFeature(StringRef Feature) const { ++ return llvm::StringSwitch(Feature) ++ .Case("lsx", HasLSX) ++ .Case("lasx", HasLASX) ++ .Case("d", HasBasicD) ++ .Case("f", HasBasicF) ++ .Default(false); ++} ++ ++ArrayRef LoongArchTargetInfo::getTargetBuiltins() const { ++ return llvm::makeArrayRef(BuiltinInfo, clang::LoongArch::LastTSBuiltin - ++ Builtin::FirstTSBuiltin); ++} ++ ++bool LoongArchTargetInfo::validateTarget(DiagnosticsEngine &Diags) const { ++ // 64-bit ABI's require 64-bit CPU's. ++ if (!processorSupportsGPR64() && ++ (ABI == "lp64d" || ABI == "lp64s" || ABI == "lp64f")) { ++ Diags.Report(diag::err_target_unsupported_abi) << ABI << CPU; ++ return false; ++ } ++ ++ // FIXME: It's valid to use lp64d/lp64s/lp64f on a loongarch32 triple ++ // but the backend can't handle this yet. It's better to fail here than on the ++ // backend assertion. ++ if (getTriple().isLoongArch32() && ++ (ABI == "lp64d" || ABI == "lp64s" || ABI == "lp64f")) { ++ Diags.Report(diag::err_target_unsupported_abi_for_triple) ++ << ABI << getTriple().str(); ++ return false; ++ } ++ ++ return true; ++} +diff --git a/lib/Basic/Targets/LoongArch.h b/lib/Basic/Targets/LoongArch.h +new file mode 100644 +index 00000000..6e854fd7 +--- /dev/null ++++ b/lib/Basic/Targets/LoongArch.h +@@ -0,0 +1,352 @@ ++//===--- LoongArch.h - Declare LoongArch target feature support -----------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file declares LoongArch TargetInfo objects. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H ++#define LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H ++ ++#include "clang/Basic/TargetInfo.h" ++#include "clang/Basic/TargetOptions.h" ++#include "llvm/ADT/Triple.h" ++#include "llvm/Support/Compiler.h" ++ ++namespace clang { ++namespace targets { ++ ++class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { ++ void setDataLayout() { ++ StringRef Layout; ++ ++ if (ABI == "ilp32d" || ABI == "ilp32f" || ABI == "ilp32s") ++ // TODO ++ llvm_unreachable("Unimplemented ABI"); ++ else if (ABI == "lp64d" || ABI == "lp64s" || ABI == "lp64f") ++ Layout = "m:e-i8:8:32-i16:16:32-i64:64-n32:64-S128"; ++ else ++ llvm_unreachable("Invalid ABI"); ++ ++ resetDataLayout(("e-" + Layout).str()); ++ } ++ ++ static const Builtin::Info BuiltinInfo[]; ++ std::string CPU; ++ bool HasLSX; ++ bool HasLASX; ++ bool HasBasicF; ++ bool HasBasicD; ++ ++protected: ++ std::string ABI; ++ ++public: ++ LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) ++ : TargetInfo(Triple), HasLSX(false), HasLASX(false), HasBasicF(false), ++ HasBasicD(false) { ++ TheCXXABI.set(TargetCXXABI::GenericLoongArch); ++ ++ if (Triple.isLoongArch32()) ++ // TODO ++ llvm_unreachable("Unimplemented triple"); ++ else ++ setABI("lp64d"); ++ ++ // Currently, CPU only supports 'la464' in LA. ++ if ( ABI == "lp64d") ++ CPU = "la464"; ++ } ++ ++ bool processorSupportsGPR64() const; ++ ++ StringRef getABI() const override { return ABI; } ++ ++ bool setABI(const std::string &Name) override { ++ if (Name == "ilp32d" || Name == "ilp32f" || Name == "ilp32s") { ++ // TODO ++ llvm_unreachable("Unimplemented ABI"); ++ } ++ ++ if (Name == "lp64d" || Name == "lp64s" || Name == "lp64f") { ++ setLP64ABITypes(); ++ ABI = Name; ++ return true; ++ } ++ return false; ++ } ++ ++ void setLP64ABITypes() { ++ LongDoubleWidth = LongDoubleAlign = 128; ++ LongDoubleFormat = &llvm::APFloat::IEEEquad(); ++ MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; ++ SuitableAlign = 128; ++ Int64Type = SignedLong; ++ IntMaxType = Int64Type; ++ LongWidth = LongAlign = 64; ++ PointerWidth = PointerAlign = 64; ++ PtrDiffType = SignedLong; ++ SizeType = UnsignedLong; ++ } ++ ++ bool isValidCPUName(StringRef Name) const override; ++ void fillValidCPUList(SmallVectorImpl &Values) const override; ++ ++ bool setCPU(const std::string &Name) override { ++ CPU = Name; ++ return isValidCPUName(Name); ++ } ++ ++ const std::string &getCPU() const { return CPU; } ++ bool ++ initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, ++ StringRef CPU, ++ const std::vector &FeaturesVec) const override { ++#if 0 ++ if (CPU.empty()) ++ CPU = getCPU(); ++ Features[CPU] = true; ++#else ++// if (CPU == "la464") ++// Features["loongarch64"] = true; ++ ++//FIXME: we need this? ++// if (CPU == "la464") ++// Features["64bit"] = true; ++#endif ++ return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec); ++ } ++ ++ void getTargetDefines(const LangOptions &Opts, ++ MacroBuilder &Builder) const override; ++ ++ ArrayRef getTargetBuiltins() const override; ++ ++ bool hasFeature(StringRef Feature) const override; ++ ++ bool hasBitIntType() const override { return true; } ++ ++ BuiltinVaListKind getBuiltinVaListKind() const override { ++ return TargetInfo::VoidPtrBuiltinVaList; ++ } ++ ++ ArrayRef getGCCRegNames() const override { ++ static const char *const GCCRegNames[] = { ++ // CPU register names ++ // Must match second column of GCCRegAliases ++ "$r0", "$r1", "$r2", "$r3", "$r4", "$r5", "$r6", "$r7", "$r8", "$r9", ++ "$r10", "$r11", "$r12", "$r13", "$r14", "$r15", "$r16", "$r17", "$r18", ++ "$r19", "$r20", "$r21", "$r22", "$r23", "$r24", "$r25", "$r26", "$r27", ++ "$r28", "$r29", "$r30", "$r31", ++ // Floating point register names ++ "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", ++ "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", ++ "$f19", "$f20", "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27", ++ "$f28", "$f29", "$f30", "$f31", ++ // condition register names ++ "$fcc0", "$fcc1", "$fcc2", "$fcc3", "$fcc4", "$fcc5", "$fcc6", "$fcc7", ++ // LSX register names ++ "$vr0", "$vr1", "$vr2", "$vr3", "$vr4", "$vr5", "$vr6", "$vr7", "$vr8", ++ "$vr9", "$vr10", "$vr11", "$vr12", "$vr13", "$vr14", "$vr15", "$vr16", ++ "$vr17", "$vr18", "$vr19", "$vr20", "$vr21", "$vr22", "$vr23", "$vr24", ++ "$vr25", "$vr26", "$vr27", "$vr28", "$vr29", "$vr30", "$vr31", ++ // LASX register names ++ "$xr0", "$xr1", "$xr2", "$xr3", "$xr4", "$xr5", "$xr6", "$xr7", "$xr8", ++ "$xr9", "$xr10", "$xr11", "$xr12", "$xr13", "$xr14", "$xr15", "$xr16", ++ "$xr17", "$xr18", "$xr19", "$xr20", "$xr21", "$xr22", "$xr23", "$xr24", ++ "$xr25", "$xr26", "$xr27", "$xr28", "$xr29", "$xr30", "$xr31" ++ ++ }; ++ return llvm::makeArrayRef(GCCRegNames); ++ } ++ ++ bool validateAsmConstraint(const char *&Name, ++ TargetInfo::ConstraintInfo &Info) const override { ++ switch (*Name) { ++ default: ++ return false; ++ case 'r': // CPU registers. ++ case 'f': // floating-point registers. ++ Info.setAllowsRegister(); ++ return true; ++ case 'l': // Signed 16-bit constant ++ case 'I': // Signed 12-bit constant ++ case 'K': // Unsigned 12-bit constant ++ case 'J': // Integer 0 ++ case 'G': // Floating-point 0 ++ return true; ++ case 'm': // Memory address with 12-bit offset ++ case 'R': // An address that can be used in a non-macro load or store ++ Info.setAllowsMemory(); ++ return true; ++ case 'Z': ++ if (Name[1] == 'C' // Memory address with 16-bit and 4 bytes aligned offset ++ || Name[1] == 'B' ) { // Memory address with 0 offset ++ Info.setAllowsMemory(); ++ Name++; // Skip over 'Z'. ++ return true; ++ } ++ return false; ++ } ++ } ++ ++ std::string convertConstraint(const char *&Constraint) const override { ++ std::string R; ++ switch (*Constraint) { ++ case 'Z': // Two-character constraint; add "^" hint for later parsing. ++ if (Constraint[1] == 'C' || Constraint[1] == 'B') { ++ R = std::string("^") + std::string(Constraint, 2); ++ Constraint++; ++ return R; ++ } ++ break; ++ } ++ return TargetInfo::convertConstraint(Constraint); ++ } ++ ++ const char *getClobbers() const override { ++#if 0 ++ // In GCC, $1 is not widely used in generated code (it's used only in a few ++ // specific situations), so there is no real need for users to add it to ++ // the clobbers list if they want to use it in their inline assembly code. ++ // ++ // In LLVM, $1 is treated as a normal GPR and is always allocatable during ++ // code generation, so using it in inline assembly without adding it to the ++ // clobbers list can cause conflicts between the inline assembly code and ++ // the surrounding generated code. ++ // ++ // Another problem is that LLVM is allowed to choose $1 for inline assembly ++ // operands, which will conflict with the ".set at" assembler option (which ++ // we use only for inline assembly, in order to maintain compatibility with ++ // GCC) and will also conflict with the user's usage of $1. ++ // ++ // The easiest way to avoid these conflicts and keep $1 as an allocatable ++ // register for generated code is to automatically clobber $1 for all inline ++ // assembly code. ++ // ++ // FIXME: We should automatically clobber $1 only for inline assembly code ++ // which actually uses it. This would allow LLVM to use $1 for inline ++ // assembly operands if the user's assembly code doesn't use it. ++ return "~{$1}"; ++#endif ++ return ""; ++ } ++ ++ bool handleTargetFeatures(std::vector &Features, ++ DiagnosticsEngine &Diags) override { ++ HasBasicF = false; ++ HasBasicD = false; ++ ++ for (const auto &Feature : Features) { ++ if (Feature == "+lsx") ++ HasLSX = true; ++ else if (Feature == "+lasx") { ++ HasLASX = true; ++ HasLSX = true; ++ } else if (Feature == "+f") ++ HasBasicF = true; ++ else if (Feature == "+d") ++ HasBasicD = true; ++ } ++ ++ setDataLayout(); ++ ++ return true; ++ } ++ ++ int getEHDataRegisterNumber(unsigned RegNo) const override { ++ if (RegNo == 0) ++ return 4; ++ if (RegNo == 1) ++ return 5; ++ return -1; ++ } ++ ++ bool isCLZForZeroUndef() const override { return false; } ++ ++ ArrayRef getGCCRegAliases() const override { ++ static const TargetInfo::GCCRegAlias GCCRegAliases[] = { ++ {{"zero", "$zero", "r0", "$0"}, "$r0"}, ++ {{"ra", "$ra", "r1", "$1"}, "$r1"}, ++ {{"tp", "$tp", "r2", "$2"}, "$r2"}, ++ {{"sp", "$sp", "r3", "$3"}, "$r3"}, ++ {{"a0", "$a0", "r4", "$4", "v0"}, "$r4"}, ++ {{"a1", "$a1", "r5", "$5", "v1"}, "$r5"}, ++ {{"a2", "$a2", "r6", "$6"}, "$r6"}, ++ {{"a3", "$a3", "r7", "$7"}, "$r7"}, ++ {{"a4", "$a4", "r8", "$8"}, "$r8"}, ++ {{"a5", "$a5", "r9", "$9"}, "$r9"}, ++ {{"a6", "$a6", "r10", "$10"}, "$r10"}, ++ {{"a7", "$a7", "r11", "$11"}, "$r11"}, ++ {{"t0", "$t0", "r12", "$12"}, "$r12"}, ++ {{"t1", "$t1", "r13", "$13"}, "$r13"}, ++ {{"t2", "$t2", "r14", "$14"}, "$r14"}, ++ {{"t3", "$t3", "r15", "$15"}, "$r15"}, ++ {{"t4", "$t4", "r16", "$16"}, "$r16"}, ++ {{"t5", "$t5", "r17", "$17"}, "$r17"}, ++ {{"t6", "$t6", "r18", "$18"}, "$r18"}, ++ {{"t7", "$t7", "r19", "$19"}, "$r19"}, ++ {{"t8", "$t8", "r20", "$20"}, "$r20"}, ++ //{{"x", "$x", "r21", "$21"}, "$r21"}, ++ {{"fp", "$fp", "r22", "$22"}, "$r22"}, ++ {{"s0", "$s0", "r23", "$23"}, "$r23"}, ++ {{"s1", "$s1", "r24", "$24"}, "$r24"}, ++ {{"s2", "$s2", "r25", "$25"}, "$r25"}, ++ {{"s3", "$s3", "r26", "$26"}, "$r26"}, ++ {{"s4", "$s4", "r27", "$27"}, "$r27"}, ++ {{"s5", "$s5", "r28", "$28"}, "$r28"}, ++ {{"s6", "$s6", "r29", "$29"}, "$r29"}, ++ {{"s7", "$s7", "r30", "$30"}, "$r30"}, ++ {{"s8", "$s8", "r31", "$31"}, "$r31"}, ++ {{"fa0", "$fa0", "f0"}, "$f0"}, ++ {{"fa1", "$fa1", "f1"}, "$f1"}, ++ {{"fa2", "$fa2", "f2"}, "$f2"}, ++ {{"fa3", "$fa3", "f3"}, "$f3"}, ++ {{"fa4", "$fa4", "f4"}, "$f4"}, ++ {{"fa5", "$fa5", "f5"}, "$f5"}, ++ {{"fa6", "$fa6", "f6"}, "$f6"}, ++ {{"fa7", "$fa7", "f7"}, "$f7"}, ++ {{"ft0", "$ft0", "f8"}, "$f8"}, ++ {{"ft1", "$ft1", "f9"}, "$f9"}, ++ {{"ft2", "$ft2", "f10"}, "$f10"}, ++ {{"ft3", "$ft3", "f11"}, "$f11"}, ++ {{"ft4", "$ft4", "f12"}, "$f12"}, ++ {{"ft5", "$ft5", "f13"}, "$f13"}, ++ {{"ft6", "$ft6", "f14"}, "$f14"}, ++ {{"ft7", "$ft7", "f15"}, "$f15"}, ++ {{"ft8", "$ft8", "f16"}, "$f16"}, ++ {{"ft9", "$ft9", "f17"}, "$f17"}, ++ {{"ft10", "$ft10", "f18"}, "$f18"}, ++ {{"ft11", "$ft11", "f19"}, "$f19"}, ++ {{"ft12", "$ft12", "f20"}, "$f20"}, ++ {{"ft13", "$ft13", "f21"}, "$f21"}, ++ {{"ft14", "$ft14", "f22"}, "$f22"}, ++ {{"ft15", "$ft15", "f23"}, "$f23"}, ++ {{"fs0", "$fs0", "f24"}, "$f24"}, ++ {{"fs1", "$fs1", "f25"}, "$f25"}, ++ {{"fs2", "$fs2", "f26"}, "$f26"}, ++ {{"fs3", "$fs3", "f27"}, "$f27"}, ++ {{"fs4", "$fs4", "f28"}, "$f28"}, ++ {{"fs5", "$fs5", "f29"}, "$f29"}, ++ {{"fs6", "$fs6", "f30"}, "$f30"}, ++ {{"fs7", "$fs7", "f31"}, "$f31"}, ++ }; ++ return llvm::makeArrayRef(GCCRegAliases); ++ } ++ ++ bool hasInt128Type() const override { ++ return (ABI == "lp64d" || ABI == "lp64s" || ABI == "lp64f") || ++ getTargetOpts().ForceEnableInt128; ++ } ++ ++ bool validateTarget(DiagnosticsEngine &Diags) const override; ++}; ++} // namespace targets ++} // namespace clang ++ ++#endif // LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H +diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp +index d7393526..50e16389 100644 +--- a/lib/CodeGen/CodeGenFunction.cpp ++++ b/lib/CodeGen/CodeGenFunction.cpp +@@ -560,6 +560,29 @@ bool CodeGenFunction::AlwaysEmitXRayTypedEvents() const { + XRayInstrKind::Typed); + } + ++llvm::Constant * ++CodeGenFunction::EncodeAddrForUseInPrologue(llvm::Function *F, ++ llvm::Constant *Addr) { ++ // Addresses stored in prologue data can't require run-time fixups and must ++ // be PC-relative. Run-time fixups are undesirable because they necessitate ++ // writable text segments, which are unsafe. And absolute addresses are ++ // undesirable because they break PIE mode. ++ ++ // Add a layer of indirection through a private global. Taking its address ++ // won't result in a run-time fixup, even if Addr has linkonce_odr linkage. ++ auto *GV = new llvm::GlobalVariable(CGM.getModule(), Addr->getType(), ++ /*isConstant=*/true, ++ llvm::GlobalValue::PrivateLinkage, Addr); ++ ++ // Create a PC-relative address. ++ auto *GOTAsInt = llvm::ConstantExpr::getPtrToInt(GV, IntPtrTy); ++ auto *FuncAsInt = llvm::ConstantExpr::getPtrToInt(F, IntPtrTy); ++ auto *PCRelAsInt = llvm::ConstantExpr::getSub(GOTAsInt, FuncAsInt); ++ return (IntPtrTy == Int32Ty) ++ ? PCRelAsInt ++ : llvm::ConstantExpr::getTrunc(PCRelAsInt, Int32Ty); ++} ++ + llvm::Value * + CodeGenFunction::DecodeAddrUsedInPrologue(llvm::Value *F, + llvm::Value *EncodedAddr) { +@@ -903,13 +926,12 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, + FD->getType(), EST_None); + llvm::Constant *FTRTTIConst = + CGM.GetAddrOfRTTIDescriptor(ProtoTy, /*ForEH=*/true); +- llvm::GlobalVariable *FTRTTIProxy = +- CGM.GetOrCreateRTTIProxyGlobalVariable(FTRTTIConst); +- llvm::LLVMContext &Ctx = Fn->getContext(); +- llvm::MDBuilder MDB(Ctx); +- Fn->setMetadata(llvm::LLVMContext::MD_func_sanitize, +- MDB.createRTTIPointerPrologue(PrologueSig, FTRTTIProxy)); +- CGM.addCompilerUsedGlobal(FTRTTIProxy); ++ llvm::Constant *FTRTTIConstEncoded = ++ EncodeAddrForUseInPrologue(Fn, FTRTTIConst); ++ llvm::Constant *PrologueStructElems[] = {PrologueSig, FTRTTIConstEncoded}; ++ llvm::Constant *PrologueStructConst = ++ llvm::ConstantStruct::getAnon(PrologueStructElems, /*Packed=*/true); ++ Fn->setPrologueData(PrologueStructConst); + } + } + +diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h +index 046b249b..df99cd9a 100644 +--- a/lib/CodeGen/CodeGenFunction.h ++++ b/lib/CodeGen/CodeGenFunction.h +@@ -2351,6 +2351,10 @@ public: + /// XRay typed event handling calls. + bool AlwaysEmitXRayTypedEvents() const; + ++ /// Encode an address into a form suitable for use in a function prologue. ++ llvm::Constant *EncodeAddrForUseInPrologue(llvm::Function *F, ++ llvm::Constant *Addr); ++ + /// Decode an address used in a function prologue, encoded by \c + /// EncodeAddrForUseInPrologue. + llvm::Value *DecodeAddrUsedInPrologue(llvm::Value *F, +diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp +index 58eef1b0..6c95dd61 100644 +--- a/lib/CodeGen/CodeGenModule.cpp ++++ b/lib/CodeGen/CodeGenModule.cpp +@@ -82,6 +82,7 @@ static CGCXXABI *createCXXABI(CodeGenModule &CGM) { + case TargetCXXABI::GenericARM: + case TargetCXXABI::iOS: + case TargetCXXABI::WatchOS: ++ case TargetCXXABI::GenericLoongArch: + case TargetCXXABI::GenericMIPS: + case TargetCXXABI::GenericItanium: + case TargetCXXABI::WebAssembly: +@@ -1826,22 +1827,6 @@ CodeGenModule::getMostBaseClasses(const CXXRecordDecl *RD) { + return MostBases.takeVector(); + } + +-llvm::GlobalVariable * +-CodeGenModule::GetOrCreateRTTIProxyGlobalVariable(llvm::Constant *Addr) { +- auto It = RTTIProxyMap.find(Addr); +- if (It != RTTIProxyMap.end()) +- return It->second; +- +- auto *FTRTTIProxy = new llvm::GlobalVariable( +- TheModule, Addr->getType(), +- /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, Addr, +- "__llvm_rtti_proxy"); +- FTRTTIProxy->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); +- +- RTTIProxyMap[Addr] = FTRTTIProxy; +- return FTRTTIProxy; +-} +- + void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, + llvm::Function *F) { + llvm::AttrBuilder B(F->getContext()); +diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h +index 3a9d542e..a8a63c8d 100644 +--- a/lib/CodeGen/CodeGenModule.h ++++ b/lib/CodeGen/CodeGenModule.h +@@ -561,8 +561,6 @@ private: + MetadataTypeMap VirtualMetadataIdMap; + MetadataTypeMap GeneralizedMetadataIdMap; + +- llvm::DenseMap RTTIProxyMap; +- + public: + CodeGenModule(ASTContext &C, const HeaderSearchOptions &headersearchopts, + const PreprocessorOptions &ppopts, +@@ -1413,9 +1411,6 @@ public: + std::vector + getMostBaseClasses(const CXXRecordDecl *RD); + +- llvm::GlobalVariable * +- GetOrCreateRTTIProxyGlobalVariable(llvm::Constant *Addr); +- + /// Get the declaration of std::terminate for the platform. + llvm::FunctionCallee getTerminateFn(); + +diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp +index 2979d92c..5ef50a16 100644 +--- a/lib/CodeGen/ItaniumCXXABI.cpp ++++ b/lib/CodeGen/ItaniumCXXABI.cpp +@@ -533,6 +533,9 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) { + return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true, + /*UseARMGuardVarABI=*/true); + ++ case TargetCXXABI::GenericLoongArch: ++ return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true); ++ + case TargetCXXABI::GenericMIPS: + return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true); + +diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp +index d83bc9e5..2ce2573a 100644 +--- a/lib/CodeGen/TargetInfo.cpp ++++ b/lib/CodeGen/TargetInfo.cpp +@@ -11242,6 +11242,557 @@ public: + }; + } // namespace + ++//===----------------------------------------------------------------------===// ++// LoongArch ABI Implementation ++//===----------------------------------------------------------------------===// ++ ++namespace { ++class LoongArchABIInfo : public DefaultABIInfo { ++private: ++ // Size of the integer ('r') registers in bits. ++ unsigned GRLen; ++ // Size of the floating point ('f') registers in bits. Note that the target ++ // ISA might have a wider FRLen than the selected ABI. ++ unsigned FRLen; ++ static const int NumArgGPRs = 8; ++ static const int NumArgFPRs = 8; ++ bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, ++ llvm::Type *&Field1Ty, ++ CharUnits &Field1Off, ++ llvm::Type *&Field2Ty, ++ CharUnits &Field2Off) const; ++ ++public: ++ LoongArchABIInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, unsigned FRLen) ++ : DefaultABIInfo(CGT), GRLen(GRLen), FRLen(FRLen) {} ++ ++ // DefaultABIInfo's classifyReturnType and classifyArgumentType are ++ // non-virtual, but computeInfo is virtual, so we overload it. ++ void computeInfo(CGFunctionInfo &FI) const override; ++ ++ ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft, ++ int &ArgFPRsLeft) const; ++ ABIArgInfo classifyReturnType(QualType RetTy) const; ++ ++ uint64_t MinABIStackAlignInBytes = 8; ++ uint64_t StackAlignInBytes = 16; ++ llvm::Type* HandleAggregates(QualType Ty, uint64_t TySize) const; ++ llvm::Type* getPaddingType(uint64_t Align, uint64_t Offset) const; ++ void CoerceToIntArgs(uint64_t TySize, ++ SmallVectorImpl &ArgList) const; ++ ++ Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, ++ QualType Ty) const override; ++ ++ ABIArgInfo extendType(QualType Ty) const; ++ ++ bool detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, ++ CharUnits &Field1Off, llvm::Type *&Field2Ty, ++ CharUnits &Field2Off, int &NeededArgGPRs, ++ int &NeededArgFPRs) const; ++ ABIArgInfo coerceAndExpandFPCCEligibleStruct(llvm::Type *Field1Ty, ++ CharUnits Field1Off, ++ llvm::Type *Field2Ty, ++ CharUnits Field2Off) const; ++}; ++} // end anonymous namespace ++ ++void LoongArchABIInfo::computeInfo(CGFunctionInfo &FI) const { ++ QualType RetTy = FI.getReturnType(); ++ if (!getCXXABI().classifyReturnType(FI)) ++ FI.getReturnInfo() = classifyReturnType(RetTy); ++ ++ // IsRetIndirect is true if classifyArgumentType indicated the value should ++ // be passed indirect or if the type size is greater than 2*grlen. ++ bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect || ++ getContext().getTypeSize(RetTy) > (2 * GRLen); ++ ++ // We must track the number of GPRs used in order to conform to the LoongArch ++ // ABI, as integer scalars passed in registers should have signext/zeroext ++ // when promoted, but are anyext if passed on the stack. As GPR usage is ++ // different for variadic arguments, we must also track whether we are ++ // examining a vararg or not. ++ int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; ++ int ArgFPRsLeft = FRLen ? NumArgFPRs : 0; ++ int NumFixedArgs = FI.getNumRequiredArgs(); ++ ++ int ArgNum = 0; ++ for (auto &ArgInfo : FI.arguments()) { ++ bool IsFixed = ArgNum < NumFixedArgs; ++ ArgInfo.info = ++ classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, ArgFPRsLeft); ++ ArgNum++; ++ } ++} ++ ++// Returns true if the struct is a potential candidate for the floating point ++// calling convention. If this function returns true, the caller is ++// responsible for checking that if there is only a single field then that ++// field is a float. ++bool LoongArchABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, ++ llvm::Type *&Field1Ty, ++ CharUnits &Field1Off, ++ llvm::Type *&Field2Ty, ++ CharUnits &Field2Off) const { ++ bool IsInt = Ty->isIntegralOrEnumerationType(); ++ bool IsFloat = Ty->isRealFloatingType(); ++ ++ if (IsInt || IsFloat) { ++ uint64_t Size = getContext().getTypeSize(Ty); ++ if (IsInt && Size > GRLen) ++ return false; ++ // Can't be eligible if larger than the FP registers. Half precision isn't ++ // currently supported on LoongArch and the ABI hasn't been confirmed, so ++ // default to the integer ABI in that case. ++ if (IsFloat && (Size > FRLen || Size < 32)) ++ return false; ++ // Can't be eligible if an integer type was already found (int+int pairs ++ // are not eligible). ++ if (IsInt && Field1Ty && Field1Ty->isIntegerTy()) ++ return false; ++ if (!Field1Ty) { ++ Field1Ty = CGT.ConvertType(Ty); ++ Field1Off = CurOff; ++ return true; ++ } ++ if (!Field2Ty) { ++ Field2Ty = CGT.ConvertType(Ty); ++ Field2Off = CurOff; ++ return true; ++ } ++ return false; ++ } ++ ++ if (auto CTy = Ty->getAs()) { ++ if (Field1Ty) ++ return false; ++ QualType EltTy = CTy->getElementType(); ++ if (getContext().getTypeSize(EltTy) > FRLen) ++ return false; ++ Field1Ty = CGT.ConvertType(EltTy); ++ Field1Off = CurOff; ++ assert(CurOff.isZero() && "Unexpected offset for first field"); ++ Field2Ty = Field1Ty; ++ Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy); ++ return true; ++ } ++ ++ if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) { ++ uint64_t ArraySize = ATy->getSize().getZExtValue(); ++ QualType EltTy = ATy->getElementType(); ++ CharUnits EltSize = getContext().getTypeSizeInChars(EltTy); ++ for (uint64_t i = 0; i < ArraySize; ++i) { ++ bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty, ++ Field1Off, Field2Ty, Field2Off); ++ if (!Ret) ++ return false; ++ CurOff += EltSize; ++ } ++ return true; ++ } ++ ++ if (const auto *RTy = Ty->getAs()) { ++ // Structures with either a non-trivial destructor or a non-trivial ++ // copy constructor are not eligible for the FP calling convention. ++ if (getRecordArgABI(Ty, CGT.getCXXABI())) ++ return false; ++ if (isEmptyRecord(getContext(), Ty, true)) ++ return true; ++ const RecordDecl *RD = RTy->getDecl(); ++ // Unions aren't eligible unless they're empty (which is caught above). ++ if (RD->isUnion()) ++ return false; ++ const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); ++ // If this is a C++ record, check the bases first. ++ if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { ++ for (const CXXBaseSpecifier &B : CXXRD->bases()) { ++ const auto *BDecl = ++ cast(B.getType()->castAs()->getDecl()); ++ CharUnits BaseOff = Layout.getBaseClassOffset(BDecl); ++ bool Ret = detectFPCCEligibleStructHelper(B.getType(), CurOff + BaseOff, ++ Field1Ty, Field1Off, Field2Ty, ++ Field2Off); ++ if (!Ret) ++ return false; ++ } ++ } ++ int ZeroWidthBitFieldCount = 0; ++ for (const FieldDecl *FD : RD->fields()) { ++ uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex()); ++ QualType QTy = FD->getType(); ++ if (FD->isBitField()) { ++ unsigned BitWidth = FD->getBitWidthValue(getContext()); ++ // Allow a bitfield with a type greater than GRLen as long as the ++ // bitwidth is GRLen or less. ++ if (getContext().getTypeSize(QTy) > GRLen && BitWidth <= GRLen) ++ QTy = getContext().getIntTypeForBitwidth(GRLen, false); ++ if (BitWidth == 0) { ++ ZeroWidthBitFieldCount++; ++ continue; ++ } ++ } ++ ++ bool Ret = detectFPCCEligibleStructHelper( ++ QTy, CurOff + getContext().toCharUnitsFromBits(FieldOffInBits), ++ Field1Ty, Field1Off, Field2Ty, Field2Off); ++ if (!Ret) ++ return false; ++ ++ // As a quirk of the ABI, zero-width bitfields aren't ignored for fp+fp ++ // or int+fp structs, but are ignored for a struct with an fp field and ++ // any number of zero-width bitfields. ++ if (Field2Ty && ZeroWidthBitFieldCount > 0) ++ return false; ++ } ++ return Field1Ty != nullptr; ++ } ++ ++ return false; ++} ++ ++// Determine if a struct is eligible for passing according to the floating ++// point calling convention (i.e., when flattened it contains a single fp ++// value, fp+fp, or int+fp of appropriate size). If so, NeededArgFPRs and ++// NeededArgGPRs are incremented appropriately. ++bool LoongArchABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, ++ CharUnits &Field1Off, ++ llvm::Type *&Field2Ty, ++ CharUnits &Field2Off, ++ int &NeededArgGPRs, ++ int &NeededArgFPRs) const { ++ Field1Ty = nullptr; ++ Field2Ty = nullptr; ++ NeededArgGPRs = 0; ++ NeededArgFPRs = 0; ++ bool IsCandidate = detectFPCCEligibleStructHelper( ++ Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off); ++ // Not really a candidate if we have a single int but no float. ++ if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) ++ return IsCandidate = false; ++ if (!IsCandidate) ++ return false; ++ if (Field1Ty && Field1Ty->isFloatingPointTy()) ++ NeededArgFPRs++; ++ else if (Field1Ty) ++ NeededArgGPRs++; ++ if (Field2Ty && Field2Ty->isFloatingPointTy()) ++ NeededArgFPRs++; ++ else if (Field2Ty) ++ NeededArgGPRs++; ++ return IsCandidate; ++} ++ ++// Call getCoerceAndExpand for the two-element flattened struct described by ++// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an ++// appropriate coerceToType and unpaddedCoerceToType. ++ABIArgInfo LoongArchABIInfo::coerceAndExpandFPCCEligibleStruct( ++ llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty, ++ CharUnits Field2Off) const { ++ SmallVector CoerceElts; ++ SmallVector UnpaddedCoerceElts; ++ if (!Field1Off.isZero()) ++ CoerceElts.push_back(llvm::ArrayType::get( ++ llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity())); ++ ++ CoerceElts.push_back(Field1Ty); ++ UnpaddedCoerceElts.push_back(Field1Ty); ++ ++ if (!Field2Ty) { ++ return ABIArgInfo::getCoerceAndExpand( ++ llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()), ++ UnpaddedCoerceElts[0]); ++ } ++ ++ CharUnits Field2Align = ++ CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(Field2Ty)); ++ CharUnits Field1Size = ++ CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty)); ++ CharUnits Field2OffNoPadNoPack = Field1Size.alignTo(Field2Align); ++ ++ CharUnits Padding = CharUnits::Zero(); ++ if (Field2Off > Field2OffNoPadNoPack) ++ Padding = Field2Off - Field2OffNoPadNoPack; ++ else if (Field2Off != Field2Align && Field2Off > Field1Size) ++ Padding = Field2Off - Field1Size; ++ ++ bool IsPacked = !Field2Off.isMultipleOf(Field2Align); ++ ++ if (!Padding.isZero()) ++ CoerceElts.push_back(llvm::ArrayType::get( ++ llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity())); ++ ++ CoerceElts.push_back(Field2Ty); ++ UnpaddedCoerceElts.push_back(Field2Ty); ++ ++ auto CoerceToType = ++ llvm::StructType::get(getVMContext(), CoerceElts, IsPacked); ++ auto UnpaddedCoerceToType = ++ llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked); ++ ++ return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType); ++} ++ ++void LoongArchABIInfo::CoerceToIntArgs( ++ uint64_t TySize, SmallVectorImpl &ArgList) const { ++ llvm::IntegerType *IntTy = ++ llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8); ++ ++ // Add (TySize / MinABIStackAlignInBytes) args of IntTy. ++ for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N) ++ ArgList.push_back(IntTy); ++ ++ // If necessary, add one more integer type to ArgList. ++ unsigned R = TySize % (MinABIStackAlignInBytes * 8); ++ ++ if (R) ++ ArgList.push_back(llvm::IntegerType::get(getVMContext(), R)); ++} ++ ++llvm::Type* LoongArchABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const { ++ SmallVector ArgList, IntArgList; ++ ++ if (Ty->isComplexType()) ++ return CGT.ConvertType(Ty); ++ ++ const RecordType *RT = Ty->getAs(); ++ ++ // Unions/vectors are passed in integer registers. ++ if (!RT || !RT->isStructureOrClassType()) { ++ CoerceToIntArgs(TySize, ArgList); ++ return llvm::StructType::get(getVMContext(), ArgList); ++ } ++ ++ const RecordDecl *RD = RT->getDecl(); ++ const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); ++ assert(!(TySize % 8) && "Size of structure must be multiple of 8."); ++ ++ uint64_t LastOffset = 0; ++ unsigned idx = 0; ++ llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64); ++ ++ // Iterate over fields in the struct/class and check if there are any aligned ++ // double fields. ++ for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); ++ i != e; ++i, ++idx) { ++ const QualType Ty = i->getType(); ++ const BuiltinType *BT = Ty->getAs(); ++ ++ if (!BT || BT->getKind() != BuiltinType::Double) ++ continue; ++ ++ uint64_t Offset = Layout.getFieldOffset(idx); ++ if (Offset % 64) // Ignore doubles that are not aligned. ++ continue; ++ ++ // Add ((Offset - LastOffset) / 64) args of type i64. ++ for (unsigned j = (Offset - LastOffset) / 64; j > 0; --j) ++ ArgList.push_back(I64); ++ ++ // Add double type. ++ ArgList.push_back(llvm::Type::getDoubleTy(getVMContext())); ++ LastOffset = Offset + 64; ++ } ++ ++ CoerceToIntArgs(TySize - LastOffset, IntArgList); ++ ArgList.append(IntArgList.begin(), IntArgList.end()); ++ ++ return llvm::StructType::get(getVMContext(), ArgList); ++} ++ ++llvm::Type * LoongArchABIInfo::getPaddingType(uint64_t OrigOffset, ++ uint64_t Offset) const { ++ if (OrigOffset + MinABIStackAlignInBytes > Offset) ++ return nullptr; ++ ++ return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8); ++} ++ ++ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, ++ int &ArgGPRsLeft, ++ int &ArgFPRsLeft) const { ++ assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow"); ++ Ty = useFirstFieldIfTransparentUnion(Ty); ++ ++ // Structures with either a non-trivial destructor or a non-trivial ++ // copy constructor are always passed indirectly. ++ if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { ++ if (ArgGPRsLeft) ++ ArgGPRsLeft -= 1; ++ return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == ++ CGCXXABI::RAA_DirectInMemory); ++ } ++ ++ // Ignore empty structs/unions. ++ if (isEmptyRecord(getContext(), Ty, true)) ++ return ABIArgInfo::getIgnore(); ++ ++ uint64_t Size = getContext().getTypeSize(Ty); ++ ++ // Pass floating point values via FPRs if possible. ++ if (IsFixed && Ty->isFloatingType() && FRLen >= Size && ArgFPRsLeft) { ++ ArgFPRsLeft--; ++ return ABIArgInfo::getDirect(); ++ } ++ ++ // Complex types for the hard float ABI must be passed direct rather than ++ // using CoerceAndExpand. ++ if (IsFixed && Ty->isComplexType() && FRLen && ArgFPRsLeft >= 2) { ++ QualType EltTy = Ty->getAs()->getElementType(); ++ if (getContext().getTypeSize(EltTy) <= FRLen) { ++ ArgFPRsLeft -= 2; ++ return ABIArgInfo::getDirect(); ++ } ++ } ++ ++ if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) && ++ (getTarget().hasFeature("lsx"))) || ++ ((getContext().getTypeSize(Ty) == 256) && ++ getTarget().hasFeature("lasx")))) ++ return ABIArgInfo::getDirect(); ++ ++ if (IsFixed && FRLen && Ty->isStructureOrClassType()) { ++ llvm::Type *Field1Ty = nullptr; ++ llvm::Type *Field2Ty = nullptr; ++ CharUnits Field1Off = CharUnits::Zero(); ++ CharUnits Field2Off = CharUnits::Zero(); ++ int NeededArgGPRs; ++ int NeededArgFPRs; ++ bool IsCandidate = ++ detectFPCCEligibleStruct(Ty, Field1Ty, Field1Off, Field2Ty, Field2Off, ++ NeededArgGPRs, NeededArgFPRs); ++ if (IsCandidate && NeededArgGPRs <= ArgGPRsLeft && ++ NeededArgFPRs <= ArgFPRsLeft) { ++ ArgGPRsLeft -= NeededArgGPRs; ++ ArgFPRsLeft -= NeededArgFPRs; ++ return coerceAndExpandFPCCEligibleStruct(Field1Ty, Field1Off, Field2Ty, ++ Field2Off); ++ } ++ } else if (Ty->isStructureOrClassType() && Size == 128 && ++ isAggregateTypeForABI(Ty)) { ++ uint64_t Offset = 8; ++ uint64_t OrigOffset = Offset; ++ uint64_t TySize = getContext().getTypeSize(Ty); ++ uint64_t Align = getContext().getTypeAlign(Ty) / 8; ++ ++ Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes), ++ (uint64_t)StackAlignInBytes); ++ unsigned CurrOffset = llvm::alignTo(Offset, Align); ++ Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8; ++ ++ ABIArgInfo ArgInfo = ++ ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0, ++ getPaddingType(OrigOffset, CurrOffset)); ++ ArgInfo.setInReg(true); ++ return ArgInfo; ++ } ++ ++ uint64_t NeededAlign = getContext().getTypeAlign(Ty); ++ // Determine the number of GPRs needed to pass the current argument ++ // according to the ABI. 2*GRLen-aligned varargs are passed in "aligned" ++ // register pairs, so may consume 3 registers. ++ int NeededArgGPRs = 1; ++ if (!IsFixed && NeededAlign == 2 * GRLen) ++ NeededArgGPRs = 2 + (ArgGPRsLeft % 2); ++ else if (Size > GRLen && Size <= 2 * GRLen) ++ NeededArgGPRs = 2; ++ ++ if (NeededArgGPRs > ArgGPRsLeft) { ++ NeededArgGPRs = ArgGPRsLeft; ++ } ++ ++ ArgGPRsLeft -= NeededArgGPRs; ++ ++ if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) { ++ // Treat an enum type as its underlying type. ++ if (const EnumType *EnumTy = Ty->getAs()) ++ Ty = EnumTy->getDecl()->getIntegerType(); ++ ++ // All integral types are promoted to GRLen width, unless passed on the ++ // stack. ++ if (Size < GRLen && Ty->isIntegralOrEnumerationType()) { ++ return extendType(Ty); ++ } ++ ++ return ABIArgInfo::getDirect(); ++ } ++ ++ // Aggregates which are <= 2*GRLen will be passed in registers if possible, ++ // so coerce to integers. ++ if (Size <= 2 * GRLen) { ++ unsigned Alignment = getContext().getTypeAlign(Ty); ++ ++ // Use a single GRLen int if possible, 2*GRLen if 2*GRLen alignment is ++ // required, and a 2-element GRLen array if only GRLen alignment is required. ++ if (Size <= GRLen) { ++ return ABIArgInfo::getDirect( ++ llvm::IntegerType::get(getVMContext(), GRLen)); ++ } else if (Alignment == 2 * GRLen) { ++ return ABIArgInfo::getDirect( ++ llvm::IntegerType::get(getVMContext(), 2 * GRLen)); ++ } else { ++ return ABIArgInfo::getDirect(llvm::ArrayType::get( ++ llvm::IntegerType::get(getVMContext(), GRLen), 2)); ++ } ++ } ++ return getNaturalAlignIndirect(Ty, /*ByVal=*/false); ++} ++ ++ABIArgInfo LoongArchABIInfo::classifyReturnType(QualType RetTy) const { ++ if (RetTy->isVoidType()) ++ return ABIArgInfo::getIgnore(); ++ ++ int ArgGPRsLeft = 2; ++ int ArgFPRsLeft = FRLen ? 2 : 0; ++ ++ // The rules for return and argument types are the same, so defer to ++ // classifyArgumentType. ++ return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft, ++ ArgFPRsLeft); ++} ++ ++Address LoongArchABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, ++ QualType Ty) const { ++ CharUnits SlotSize = CharUnits::fromQuantity(GRLen / 8); ++ ++ // Empty records are ignored for parameter passing purposes. ++ if (isEmptyRecord(getContext(), Ty, true)) { ++ Address Addr(CGF.Builder.CreateLoad(VAListAddr), SlotSize); ++ Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); ++ return Addr; ++ } ++ ++ auto TInfo = getContext().getTypeInfoInChars(Ty); ++ ++ // Arguments bigger than 2*GRlen bytes are passed indirectly. ++ bool IsIndirect = TInfo.Width > 2 * SlotSize; ++ ++ return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TInfo, ++ SlotSize, /*AllowHigherAlign=*/true); ++} ++ ++ABIArgInfo LoongArchABIInfo::extendType(QualType Ty) const { ++ int TySize = getContext().getTypeSize(Ty); ++ // LP64 ABI requires unsigned 32 bit integers to be sign extended. ++ if (GRLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) ++ return ABIArgInfo::getSignExtend(Ty); ++ return ABIArgInfo::getExtend(Ty); ++} ++ ++namespace { ++class LoongArchTargetCodeGenInfo : public TargetCodeGenInfo { ++public: ++ LoongArchTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, ++ unsigned FRLen) ++ : TargetCodeGenInfo(std::make_unique( ++ CGT, GRLen, FRLen)) {} ++ ++ void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, ++ CodeGen::CodeGenModule &CGM) const override { ++ return; ++ } ++}; ++} // namespace ++ + //===----------------------------------------------------------------------===// + // VE ABI Implementation. + // +@@ -11320,6 +11871,7 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { + + case llvm::Triple::le32: + return SetCGInfo(new PNaClTargetCodeGenInfo(Types)); ++ + case llvm::Triple::m68k: + return SetCGInfo(new M68kTargetCodeGenInfo(Types)); + case llvm::Triple::mips: +@@ -11437,6 +11989,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { + case llvm::Triple::msp430: + return SetCGInfo(new MSP430TargetCodeGenInfo(Types)); + ++ case llvm::Triple::loongarch64: ++ return SetCGInfo(new LoongArchTargetCodeGenInfo(Types, 64, 64)); ++ + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: { + StringRef ABIStr = getTarget().getABI(); +diff --git a/lib/Driver/CMakeLists.txt b/lib/Driver/CMakeLists.txt +index 78e8fd18..90454cb1 100644 +--- a/lib/Driver/CMakeLists.txt ++++ b/lib/Driver/CMakeLists.txt +@@ -26,6 +26,7 @@ add_clang_library(clangDriver + ToolChain.cpp + ToolChains/Arch/AArch64.cpp + ToolChains/Arch/ARM.cpp ++ ToolChains/Arch/LoongArch.cpp + ToolChains/Arch/M68k.cpp + ToolChains/Arch/Mips.cpp + ToolChains/Arch/PPC.cpp +diff --git a/lib/Driver/Driver.cpp b/lib/Driver/Driver.cpp +index 3bfddeef..0e13c31c 100644 +--- a/lib/Driver/Driver.cpp ++++ b/lib/Driver/Driver.cpp +@@ -617,6 +617,22 @@ static llvm::Triple computeTargetTriple(const Driver &D, + Target.setVendorName("intel"); + } + ++ // If target is LoongArch adjust the target triple ++ // accordingly to provided ABI name. ++ A = Args.getLastArg(options::OPT_mabi_EQ); ++ if (A && Target.isLoongArch()) { ++ StringRef ABIName = A->getValue(); ++ if (ABIName == "ilp32d" || ABIName == "ilp32f" || ABIName == "ilp32s") { ++ // TODO ++ llvm_unreachable("Unimplemented ABI"); ++ } else if (ABIName == "lp64d") { ++ Target = Target.get64BitArchVariant(); ++ if (Target.getEnvironment() == llvm::Triple::GNU || ++ Target.getEnvironment() == llvm::Triple::GNUABILPX32) ++ Target.setEnvironment(llvm::Triple::GNUABI64); ++ } ++ } ++ + // If target is MIPS adjust the target triple + // accordingly to provided ABI name. + A = Args.getLastArg(options::OPT_mabi_EQ); +diff --git a/lib/Driver/SanitizerArgs.cpp b/lib/Driver/SanitizerArgs.cpp +index 96cef9eb..403fac76 100644 +--- a/lib/Driver/SanitizerArgs.cpp ++++ b/lib/Driver/SanitizerArgs.cpp +@@ -367,19 +367,6 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, + Add &= ~NotAllowedWithMinimalRuntime; + } + +- if (llvm::opt::Arg *A = Args.getLastArg(options::OPT_mcmodel_EQ)) { +- StringRef CM = A->getValue(); +- if (CM != "small" && +- (Add & SanitizerKind::Function & ~DiagnosedKinds)) { +- if (DiagnoseErrors) +- D.Diag(diag::err_drv_argument_only_allowed_with) +- << "-fsanitize=function" +- << "-mcmodel=small"; +- Add &= ~SanitizerKind::Function; +- DiagnosedKinds |= SanitizerKind::Function; +- } +- } +- + // FIXME: Make CFI on member function calls compatible with cross-DSO CFI. + // There are currently two problems: + // - Virtual function call checks need to pass a pointer to the function +diff --git a/lib/Driver/ToolChains/Arch/LoongArch.cpp b/lib/Driver/ToolChains/Arch/LoongArch.cpp +new file mode 100644 +index 00000000..0dcec221 +--- /dev/null ++++ b/lib/Driver/ToolChains/Arch/LoongArch.cpp +@@ -0,0 +1,179 @@ ++//===--- LoongArch.cpp - Tools Implementations -----------------------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArch.h" ++#include "ToolChains/CommonArgs.h" ++#include "clang/Driver/Driver.h" ++#include "clang/Driver/DriverDiagnostic.h" ++#include "clang/Driver/Options.h" ++#include "llvm/ADT/StringSwitch.h" ++#include "llvm/Option/ArgList.h" ++ ++using namespace clang::driver; ++using namespace clang::driver::tools; ++using namespace clang; ++using namespace llvm::opt; ++ ++// Get CPU and ABI names. They are not independent ++// so we have to calculate them together. ++void loongarch::getLoongArchCPUAndABI(const ArgList &Args, const llvm::Triple &Triple, ++ StringRef &CPUName, StringRef &ABIName) { ++ const char *DefLoongArch32CPU = "generic-la32"; ++ const char *DefLoongArch64CPU = "la464"; ++ ++ if (Arg *A = Args.getLastArg(clang::driver::options::OPT_march_EQ, ++ options::OPT_mcpu_EQ)) ++ CPUName = A->getValue(); ++ ++ if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) ++ ABIName = A->getValue(); ++ ++ // Setup default CPU and ABI names. ++ if (CPUName.empty() && ABIName.empty()) { ++ switch (Triple.getArch()) { ++ default: ++ llvm_unreachable("Unexpected triple arch name"); ++ case llvm::Triple::loongarch32: ++ CPUName = DefLoongArch32CPU; ++ break; ++ case llvm::Triple::loongarch64: ++ CPUName = DefLoongArch64CPU; ++ break; ++ } ++ } ++ ++ if (ABIName.empty() && (Triple.getEnvironment() == llvm::Triple::GNUABILPX32)) ++ ABIName = "lpx32"; ++ ++ if (ABIName.empty()) { ++ ABIName = llvm::StringSwitch(CPUName) ++ .Case("generic-la32", "ilp32d") ++ .Cases("la464", "generic-la64", "lp64d") ++ .Default(Triple.isLoongArch32() ? "ilp32d" : "lp64d"); ++ } ++ ++ if (CPUName.empty()) { ++ // Deduce CPU name from ABI name. ++ CPUName = llvm::StringSwitch(ABIName) ++ .Cases("lp64d", "lp64f", "lp64s", DefLoongArch64CPU) ++ .Default(""); ++ } ++ ++ if (Arg *A = Args.getLastArg(options::OPT_msingle_float, ++ options::OPT_mdouble_float, ++ options::OPT_msoft_float)) { ++ if (A->getOption().matches(options::OPT_msingle_float)) ++ ABIName = "lp64f"; ++ else if (A->getOption().matches(options::OPT_mdouble_float)) ++ ABIName = "lp64d"; ++ else ++ ABIName = "lp64s"; ++ } ++ ++ // FIXME: Warn on inconsistent use of -march and -mabi. ++} ++ ++std::string loongarch::getLoongArchABILibSuffix(const ArgList &Args, ++ const llvm::Triple &Triple) { ++ StringRef CPUName, ABIName; ++ tools::loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ return llvm::StringSwitch(ABIName) ++ .Cases("ilp32d", "ilp32f", "ilp32s", "32") ++ .Cases("lp64d", "lp64f", "lp64s", "64"); ++} ++ ++void loongarch::getLoongArchTargetFeatures(const Driver &D, const llvm::Triple &Triple, ++ const ArgList &Args, ++ std::vector &Features) { ++ StringRef CPUName; ++ StringRef ABIName; ++ StringRef FPUValue; ++ getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ ++ bool NonPIC = false; ++ ++ Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC, ++ options::OPT_fpic, options::OPT_fno_pic, ++ options::OPT_fPIE, options::OPT_fno_PIE, ++ options::OPT_fpie, options::OPT_fno_pie); ++ if (LastPICArg) { ++ Option O = LastPICArg->getOption(); ++ NonPIC = ++ (O.matches(options::OPT_fno_PIC) || O.matches(options::OPT_fno_pic) || ++ O.matches(options::OPT_fno_PIE) || O.matches(options::OPT_fno_pie)); ++ } ++ ++ if (NonPIC) { ++ NonPIC = false; ++ } ++ ++ AddTargetFeature(Args, Features, options::OPT_mlsx, options::OPT_mno_lsx, ++ "lsx"); ++ AddTargetFeature(Args, Features, options::OPT_mlasx, options::OPT_mno_lasx, ++ "lasx"); ++ ++ AddTargetFeature(Args, Features, options::OPT_munaligned_access, ++ options::OPT_mno_unaligned_access, "unaligned-access"); ++ if (Arg *A = Args.getLastArg(options::OPT_mfpu_EQ)) ++ FPUValue = A->getValue(); ++ ++ if (Arg *A = Args.getLastArg(options::OPT_msingle_float, ++ options::OPT_mdouble_float, ++ options::OPT_msoft_float)) { ++ if (A->getOption().matches(options::OPT_msingle_float)) ++ FPUValue = "32"; ++ else if (A->getOption().matches(options::OPT_mdouble_float)) ++ FPUValue = "64"; ++ else ++ FPUValue = "none"; ++ } ++ ++ // Setup feature. ++ if (FPUValue.empty()) ++ Features.push_back("+d"); ++ else { ++ if (FPUValue == "64") ++ Features.push_back("+d"); ++ else if (FPUValue == "32") ++ Features.push_back("+f"); ++ else if (FPUValue == "none") { ++ Features.push_back("-f"); ++ Features.push_back("-d"); ++ } else ++ D.Diag(clang::diag::err_drv_invalid_loongarch_mfpu) ++ << FPUValue; ++ } ++ ++ // lp64f ABI and -mfpu=none are incompatible. ++ if (hasLoongArchAbiArg(Args, "lp64f") && hasLoongArchFpuArg(Args, "none")) { ++ D.Diag(clang::diag::err_opt_not_valid_with_opt) << "lp64f" ++ << "-mfpu=none"; ++ } ++ ++ // Also lp64d ABI is only compatible with -mfpu=64. ++ if ((hasLoongArchAbiArg(Args, "lp64d") || ABIName == "lp64d") && ++ (hasLoongArchFpuArg(Args, "none") || hasLoongArchFpuArg(Args, "32"))) { ++ D.Diag(clang::diag::err_opt_not_valid_without_opt) << "lp64d" ++ << "-mfpu=64"; ++ } ++} ++ ++bool loongarch::hasLoongArchAbiArg(const ArgList &Args, const char *Value) { ++ Arg *A = Args.getLastArg(options::OPT_mabi_EQ); ++ return A && (A->getValue() == StringRef(Value)); ++} ++ ++bool loongarch::isUCLibc(const ArgList &Args) { ++ Arg *A = Args.getLastArg(options::OPT_m_libc_Group); ++ return A && A->getOption().matches(options::OPT_muclibc); ++} ++ ++bool loongarch::hasLoongArchFpuArg(const ArgList &Args, const char *Value) { ++ Arg *A = Args.getLastArg(options::OPT_mfpu_EQ); ++ return A && (A->getValue() == StringRef(Value)); ++} +diff --git a/lib/Driver/ToolChains/Arch/LoongArch.h b/lib/Driver/ToolChains/Arch/LoongArch.h +new file mode 100644 +index 00000000..5c581ff6 +--- /dev/null ++++ b/lib/Driver/ToolChains/Arch/LoongArch.h +@@ -0,0 +1,41 @@ ++//===--- LoongArch.h - LoongArch-specific Tool Helpers ----------------------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H ++#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H ++ ++#include "clang/Driver/Driver.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/ADT/Triple.h" ++#include "llvm/Option/Option.h" ++#include ++#include ++ ++namespace clang { ++namespace driver { ++namespace tools { ++ ++namespace loongarch { ++void getLoongArchCPUAndABI(const llvm::opt::ArgList &Args, ++ const llvm::Triple &Triple, StringRef &CPUName, ++ StringRef &ABIName); ++void getLoongArchTargetFeatures(const Driver &D, const llvm::Triple &Triple, ++ const llvm::opt::ArgList &Args, ++ std::vector &Features); ++std::string getLoongArchABILibSuffix(const llvm::opt::ArgList &Args, ++ const llvm::Triple &Triple); ++bool hasLoongArchAbiArg(const llvm::opt::ArgList &Args, const char *Value); ++bool hasLoongArchFpuArg(const llvm::opt::ArgList &Args, const char *Value); ++bool isUCLibc(const llvm::opt::ArgList &Args); ++ ++} // end namespace loongarch ++} // end namespace target ++} // end namespace driver ++} // end namespace clang ++ ++#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H +diff --git a/lib/Driver/ToolChains/Clang.cpp b/lib/Driver/ToolChains/Clang.cpp +index f2f18e90..fdfde359 100644 +--- a/lib/Driver/ToolChains/Clang.cpp ++++ b/lib/Driver/ToolChains/Clang.cpp +@@ -10,6 +10,7 @@ + #include "AMDGPU.h" + #include "Arch/AArch64.h" + #include "Arch/ARM.h" ++#include "Arch/LoongArch.h" + #include "Arch/M68k.h" + #include "Arch/Mips.h" + #include "Arch/PPC.h" +@@ -331,6 +332,11 @@ static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple, + arm::getARMTargetFeatures(D, Triple, Args, CmdArgs, Features, ForAS); + break; + ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: ++ loongarch::getLoongArchTargetFeatures(D, Triple, Args, Features); ++ break; ++ + case llvm::Triple::ppc: + case llvm::Triple::ppcle: + case llvm::Triple::ppc64: +@@ -535,6 +541,8 @@ static bool useFramePointerForTargetByDefault(const ArgList &Args, + // XCore never wants frame pointers, regardless of OS. + // WebAssembly never wants frame pointers. + return false; ++ case llvm::Triple::loongarch64: ++ case llvm::Triple::loongarch32: + case llvm::Triple::ppc: + case llvm::Triple::ppcle: + case llvm::Triple::ppc64: +@@ -1751,6 +1759,11 @@ void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple, + CmdArgs.push_back("-fallow-half-arguments-and-returns"); + break; + ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: ++ AddLoongArchTargetArgs(Args, CmdArgs); ++ break; ++ + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: +@@ -1895,6 +1908,32 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args, + } + } + ++void Clang::AddLoongArchTargetArgs(const ArgList &Args, ++ ArgStringList &CmdArgs) const { ++ const Driver &D = getToolChain().getDriver(); ++ StringRef CPUName; ++ StringRef ABIName; ++ const llvm::Triple &Triple = getToolChain().getTriple(); ++ loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ ++ CmdArgs.push_back("-target-abi"); ++ CmdArgs.push_back(ABIName.data()); ++ ++ if (Arg *A = Args.getLastArg(options::OPT_mcheck_zero_division, ++ options::OPT_mno_check_zero_division)) { ++ if (A->getOption().matches(options::OPT_mno_check_zero_division)) { ++ CmdArgs.push_back("-mllvm"); ++ CmdArgs.push_back("-mnocheck-zero-division"); ++ } ++ } ++ ++ llvm::Reloc::Model RelocationModel; ++ unsigned PICLevel; ++ bool IsPIE; ++ std::tie(RelocationModel, PICLevel, IsPIE) = ++ ParsePICArgs(getToolChain(), Args); ++} ++ + void Clang::AddMIPSTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + const Driver &D = getToolChain().getDriver(); +@@ -7627,6 +7666,17 @@ const char *Clang::getDependencyFileName(const ArgList &Args, + + // Begin ClangAs + ++void ClangAs::AddLoongArchTargetArgs(const ArgList &Args, ++ ArgStringList &CmdArgs) const { ++ StringRef CPUName; ++ StringRef ABIName; ++ const llvm::Triple &Triple = getToolChain().getTriple(); ++ loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ ++ CmdArgs.push_back("-target-abi"); ++ CmdArgs.push_back(ABIName.data()); ++} ++ + void ClangAs::AddMIPSTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + StringRef CPUName; +@@ -7816,6 +7866,11 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, + default: + break; + ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: ++ AddLoongArchTargetArgs(Args, CmdArgs); ++ break; ++ + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: +diff --git a/lib/Driver/ToolChains/Clang.h b/lib/Driver/ToolChains/Clang.h +index 79407c98..ba59f751 100644 +--- a/lib/Driver/ToolChains/Clang.h ++++ b/lib/Driver/ToolChains/Clang.h +@@ -57,6 +57,8 @@ private: + bool KernelOrKext) const; + void AddARM64TargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; ++ void AddLoongArchTargetArgs(const llvm::opt::ArgList &Args, ++ llvm::opt::ArgStringList &CmdArgs) const; + void AddMIPSTargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; + void AddPPCTargetArgs(const llvm::opt::ArgList &Args, +@@ -123,6 +125,8 @@ class LLVM_LIBRARY_VISIBILITY ClangAs : public Tool { + public: + ClangAs(const ToolChain &TC) + : Tool("clang::as", "clang integrated assembler", TC) {} ++ void AddLoongArchTargetArgs(const llvm::opt::ArgList &Args, ++ llvm::opt::ArgStringList &CmdArgs) const; + void AddMIPSTargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; + void AddX86TargetArgs(const llvm::opt::ArgList &Args, +diff --git a/lib/Driver/ToolChains/CommonArgs.cpp b/lib/Driver/ToolChains/CommonArgs.cpp +index 8f9244ca..a8bcf851 100644 +--- a/lib/Driver/ToolChains/CommonArgs.cpp ++++ b/lib/Driver/ToolChains/CommonArgs.cpp +@@ -9,6 +9,7 @@ + #include "CommonArgs.h" + #include "Arch/AArch64.h" + #include "Arch/ARM.h" ++#include "Arch/LoongArch.h" + #include "Arch/M68k.h" + #include "Arch/Mips.h" + #include "Arch/PPC.h" +@@ -383,6 +384,14 @@ std::string tools::getCPUName(const Driver &D, const ArgList &Args, + return A->getValue(); + return ""; + ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: { ++ StringRef CPUName; ++ StringRef ABIName; ++ loongarch::getLoongArchCPUAndABI(Args, T, CPUName, ABIName); ++ return std::string(CPUName); ++ } ++ + case llvm::Triple::m68k: + return m68k::getM68kTargetCPU(Args); + +@@ -1321,6 +1330,18 @@ tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) { + if ((ROPI || RWPI) && (PIC || PIE)) + ToolChain.getDriver().Diag(diag::err_drv_ropi_rwpi_incompatible_with_pic); + ++ if (Triple.isLoongArch()) { ++ StringRef CPUName; ++ StringRef ABIName; ++ loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ // When targeting the LP64D ABI, PIC is the default. ++ if (ABIName == "lp64d") ++ PIC = true; ++ // Unlike other architectures, LoongArch, even with -fPIC/-mxgot/multigot, ++ // does not use PIC level 2 for historical reasons. ++ IsPICLevelTwo = false; ++ } ++ + if (Triple.isMIPS()) { + StringRef CPUName; + StringRef ABIName; +diff --git a/lib/Driver/ToolChains/Gnu.cpp b/lib/Driver/ToolChains/Gnu.cpp +index 7a9570a6..c0aa8fef 100644 +--- a/lib/Driver/ToolChains/Gnu.cpp ++++ b/lib/Driver/ToolChains/Gnu.cpp +@@ -8,6 +8,7 @@ + + #include "Gnu.h" + #include "Arch/ARM.h" ++#include "Arch/LoongArch.h" + #include "Arch/Mips.h" + #include "Arch/PPC.h" + #include "Arch/RISCV.h" +@@ -254,6 +255,10 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) { + case llvm::Triple::armeb: + case llvm::Triple::thumbeb: + return isArmBigEndian(T, Args) ? "armelfb_linux_eabi" : "armelf_linux_eabi"; ++ case llvm::Triple::loongarch32: ++ return "elf32loongarch"; ++ case llvm::Triple::loongarch64: ++ return "elf64loongarch"; + case llvm::Triple::m68k: + return "m68kelf"; + case llvm::Triple::ppc: +@@ -822,6 +827,55 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C, + + break; + } ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: { ++ StringRef CPUName; ++ StringRef ABIName; ++ loongarch::getLoongArchCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName); ++ ++ //FIXME: Currently gnu as doesn't support -march ++ //CmdArgs.push_back("-march=loongarch"); ++ //CmdArgs.push_back(CPUName.data()); ++ ++ CmdArgs.push_back("-mabi=lp64d"); ++ ++ // -mno-shared should be emitted unless -fpic, -fpie, -fPIC, -fPIE, ++ // or -mshared (not implemented) is in effect. ++ if (RelocationModel == llvm::Reloc::Static) ++ CmdArgs.push_back("-mno-shared"); ++ ++ break; ++ ++ // Add the last -mfp32/-mfp64. ++ if (Arg *A = Args.getLastArg(options::OPT_mfp32, ++ options::OPT_mfp64)) { ++ A->claim(); ++ A->render(Args, CmdArgs); ++ } ++ ++ if (Arg *A = Args.getLastArg(options::OPT_mlsx, options::OPT_mno_lsx)) { ++ // Do not use AddLastArg because not all versions of LoongArch assembler ++ // support -mlsx / -mno-lsx options. ++ if (A->getOption().matches(options::OPT_mlsx)) ++ CmdArgs.push_back(Args.MakeArgString("-mlsx")); ++ } ++ ++ if (Arg *A = Args.getLastArg(options::OPT_mlasx, options::OPT_mno_lasx)) { ++ // Do not use AddLastArg because not all versions of LoongArch assembler ++ // support -mlasx / -mno-lasx options. ++ if (A->getOption().matches(options::OPT_mlasx)) ++ CmdArgs.push_back(Args.MakeArgString("-mlasx")); ++ } ++ ++ Args.AddLastArg(CmdArgs, options::OPT_mhard_float, ++ options::OPT_msoft_float); ++ ++ Args.AddLastArg(CmdArgs, options::OPT_mdouble_float, ++ options::OPT_msingle_float); ++ ++ AddAssemblerKPIC(getToolChain(), Args, CmdArgs); ++ break; ++ } + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: +@@ -2185,6 +2239,10 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( + "s390x-linux-gnu", "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu", + "s390x-suse-linux", "s390x-redhat-linux"}; + ++ static const char *const LoongArch64LibDirs[] = {"/lib64", "/lib"}; ++ static const char *const LoongArch64Triples[] = { ++ "loongarch64-linux-gnu", "loongarch64-unknown-linux-gnu", ++ "loongarch64-loongson-linux-gnu"}; + + using std::begin; + using std::end; +@@ -2353,6 +2411,10 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( + BiarchTripleAliases.append(begin(X32Triples), end(X32Triples)); + } + break; ++ case llvm::Triple::loongarch64: ++ LibDirs.append(begin(LoongArch64LibDirs), end(LoongArch64LibDirs)); ++ TripleAliases.append(begin(LoongArch64Triples), end(LoongArch64Triples)); ++ break; + case llvm::Triple::m68k: + LibDirs.append(begin(M68kLibDirs), end(M68kLibDirs)); + TripleAliases.append(begin(M68kTriples), end(M68kTriples)); +@@ -2708,6 +2770,7 @@ bool Generic_GCC::isPICDefault() const { + switch (getArch()) { + case llvm::Triple::x86_64: + return getTriple().isOSWindows(); ++ case llvm::Triple::loongarch64: + case llvm::Triple::mips64: + case llvm::Triple::mips64el: + return true; +@@ -2750,6 +2813,8 @@ bool Generic_GCC::IsIntegratedAssemblerDefault() const { + case llvm::Triple::mips64el: + case llvm::Triple::msp430: + case llvm::Triple::m68k: ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: + return true; + case llvm::Triple::sparc: + case llvm::Triple::sparcel: +diff --git a/lib/Driver/ToolChains/Linux.cpp b/lib/Driver/ToolChains/Linux.cpp +index 83cb4115..d001dcd0 100644 +--- a/lib/Driver/ToolChains/Linux.cpp ++++ b/lib/Driver/ToolChains/Linux.cpp +@@ -8,6 +8,7 @@ + + #include "Linux.h" + #include "Arch/ARM.h" ++#include "Arch/LoongArch.h" + #include "Arch/Mips.h" + #include "Arch/PPC.h" + #include "Arch/RISCV.h" +@@ -85,6 +86,11 @@ std::string Linux::getMultiarchTriple(const Driver &D, + case llvm::Triple::aarch64_be: + return "aarch64_be-linux-gnu"; + ++ case llvm::Triple::loongarch32: ++ return "loongarch32-linux-gnu"; ++ case llvm::Triple::loongarch64: ++ return "loongarch64-linux-gnu"; ++ + case llvm::Triple::m68k: + return "m68k-linux-gnu"; + +@@ -452,6 +458,14 @@ std::string Linux::getDynamicLinker(const ArgList &Args) const { + Loader = HF ? "ld-linux-armhf.so.3" : "ld-linux.so.3"; + break; + } ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: { ++ StringRef CPUName, ABIName; ++ tools::loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); ++ LibDir = "lib" + tools::loongarch::getLoongArchABILibSuffix(Args, Triple); ++ Loader = ("ld-linux-loongarch-" + ABIName + ".so.1").str(); ++ break; ++ } + case llvm::Triple::m68k: + LibDir = "lib"; + Loader = "ld.so.1"; +@@ -702,6 +716,7 @@ SanitizerMask Linux::getSupportedSanitizers() const { + const bool IsRISCV64 = getTriple().getArch() == llvm::Triple::riscv64; + const bool IsSystemZ = getTriple().getArch() == llvm::Triple::systemz; + const bool IsHexagon = getTriple().getArch() == llvm::Triple::hexagon; ++ const bool IsLoongArch64 = getTriple().getArch() == llvm::Triple::loongarch64; + SanitizerMask Res = ToolChain::getSupportedSanitizers(); + Res |= SanitizerKind::Address; + Res |= SanitizerKind::PointerCompare; +@@ -712,19 +727,20 @@ SanitizerMask Linux::getSupportedSanitizers() const { + Res |= SanitizerKind::Memory; + Res |= SanitizerKind::Vptr; + Res |= SanitizerKind::SafeStack; +- if (IsX86_64 || IsMIPS64 || IsAArch64) ++ if (IsX86_64 || IsMIPS64 || IsAArch64 || IsLoongArch64) + Res |= SanitizerKind::DataFlow; + if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsArmArch || IsPowerPC64 || +- IsRISCV64 || IsSystemZ || IsHexagon) ++ IsRISCV64 || IsSystemZ || IsHexagon || IsLoongArch64) + Res |= SanitizerKind::Leak; +- if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ) ++ if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ || ++ IsLoongArch64) + Res |= SanitizerKind::Thread; + if (IsX86_64) + Res |= SanitizerKind::KernelMemory; + if (IsX86 || IsX86_64) + Res |= SanitizerKind::Function; + if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsMIPS || IsArmArch || +- IsPowerPC64 || IsHexagon) ++ IsPowerPC64 || IsHexagon || IsLoongArch64) + Res |= SanitizerKind::Scudo; + if (IsX86_64 || IsAArch64) { + Res |= SanitizerKind::HWAddress; +diff --git a/lib/Driver/ToolChains/Linux.h b/lib/Driver/ToolChains/Linux.h +index 3c4546cb..a5648d79 100644 +--- a/lib/Driver/ToolChains/Linux.h ++++ b/lib/Driver/ToolChains/Linux.h +@@ -10,7 +10,6 @@ + #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_LINUX_H + + #include "Gnu.h" +-#include "clang/Basic/LangOptions.h" + #include "clang/Driver/ToolChain.h" + + namespace clang { +@@ -47,10 +46,6 @@ public: + IsAArch64OutlineAtomicsDefault(const llvm::opt::ArgList &Args) const override; + bool isPIEDefault(const llvm::opt::ArgList &Args) const override; + bool IsMathErrnoDefault() const override; +- LangOptions::StackProtectorMode +- GetDefaultStackProtectorLevel(bool KernelOrKext) const override { +- return LangOptions::SSPStrong; +- } + SanitizerMask getSupportedSanitizers() const override; + void addProfileRTLibs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const override; +diff --git a/lib/Driver/XRayArgs.cpp b/lib/Driver/XRayArgs.cpp +index 63b57517..4e3ae3f2 100644 +--- a/lib/Driver/XRayArgs.cpp ++++ b/lib/Driver/XRayArgs.cpp +@@ -42,6 +42,8 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) { + case llvm::Triple::aarch64: + case llvm::Triple::hexagon: + case llvm::Triple::ppc64le: ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: +diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt +index 07898898..5ae7dbab 100644 +--- a/lib/Headers/CMakeLists.txt ++++ b/lib/Headers/CMakeLists.txt +@@ -85,6 +85,7 @@ set(files + invpcidintrin.h + iso646.h + keylockerintrin.h ++ larchintrin.h + limits.h + lwpintrin.h + lzcntintrin.h +@@ -94,6 +95,8 @@ set(files + module.modulemap + movdirintrin.h + msa.h ++ lsxintrin.h ++ lasxintrin.h + mwaitxintrin.h + nmmintrin.h + opencl-c.h +diff --git a/lib/Headers/larchintrin.h b/lib/Headers/larchintrin.h +new file mode 100644 +index 00000000..7e99f19a +--- /dev/null ++++ b/lib/Headers/larchintrin.h +@@ -0,0 +1,319 @@ ++//===----------- larchintrin.h - LoongArch BASE intrinsics ------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch Base intrinsics ++// ++//===----------------------------------------------------------------------===// ++#ifndef __LOONGARCH_BASE_H ++#define __LOONGARCH_BASE_H ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++typedef struct drdtime{ ++ unsigned long dvalue; ++ unsigned long dtimeid; ++} __drdtime_t; ++ ++typedef struct rdtime{ ++ unsigned int value; ++ unsigned int timeid; ++} __rdtime_t; ++ ++/* Assembly instruction format: rd, csr_num */ ++/* Data types in instruction templates: unsigned int, uimm14_32 */ ++#define __csrrd(/*uimm14_32*/ _1) ((unsigned int)__builtin_loongarch_csrrd(_1)) ++ ++/* Assembly instruction format: rd, csr_num */ ++/* Data types in instruction templates: unsigned int, uimm14_32 */ ++#define __csrwr(/*unsigned int*/ _1, /*uimm14_32*/ _2) ((unsigned int)__builtin_loongarch_csrwr((unsigned int)(_1), (_2))) ++ ++/* Assembly instruction format: rd, rj, csr_num */ ++/* Data types in instruction templates: unsigned int, unsigned int, uimm14_32 */ ++#define __csrxchg(/*unsigned int*/ _1, /*unsigned int*/ _2, /*uimm14_32*/ _3) ((unsigned int)__builtin_loongarch_csrxchg((unsigned int)(_1), (unsigned int)(_2), (_3))) ++ ++/* Assembly instruction format: rd, csr_num */ ++/* Data types in instruction templates: unsigned long int, uimm14 */ ++#define __dcsrrd(/*uimm14*/ _1) ((unsigned long int)__builtin_loongarch_dcsrrd(_1)) ++ ++/* Assembly instruction format: rd, csr_num */ ++/* Data types in instruction templates: unsigned long int, uimm14 */ ++#define __dcsrwr(/*unsigned long int*/ _1, /*uimm14*/ _2) ((unsigned long int)__builtin_loongarch_dcsrwr((unsigned long int)(_1), (_2))) ++ ++/* Assembly instruction format: rd, rj, csr_num */ ++/* Data types in instruction templates: unsigned long int, unsigned long int, uimm14 */ ++#define __dcsrxchg(/*unsigned long int*/ _1, /*unsigned long int*/ _2, /*uimm14*/ _3) ((unsigned long int)__builtin_loongarch_dcsrxchg((unsigned long int)(_1), (unsigned long int)(_2), (_3))) ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned char, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned char __iocsrrd_b(unsigned int _1) ++{ ++ return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned short, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned short __iocsrrd_h(unsigned int _1) ++{ ++ return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned int, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned int __iocsrrd_w(unsigned int _1) ++{ ++ return (unsigned int)__builtin_loongarch_iocsrrd_w((unsigned int)_1); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned long int, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++unsigned long int __iocsrrd_d(unsigned int _1) ++{ ++ return (unsigned long int)__builtin_loongarch_iocsrrd_d((unsigned int)_1); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned char, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_b(unsigned char _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_b((unsigned char)_1, (unsigned int)_2); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned short, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_h(unsigned short _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_h((unsigned short)_1, (unsigned int)_2); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned int, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_w(unsigned int _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_w((unsigned int)_1, (unsigned int)_2); ++} ++ ++/* Assembly instruction format: rd, rj */ ++/* Data types in instruction templates: unsigned long int, unsigned int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __iocsrwr_d(unsigned long int _1, unsigned int _2) ++{ ++ return (void)__builtin_loongarch_iocsrwr_d((unsigned long int)_1, (unsigned int)_2); ++} ++ ++/* Assembly instruction format: op, rj, si12 */ ++/* Data types in instruction templates: uimm5, unsigned int, simm12 */ ++#define __cacop(/*uimm5*/ _1, /*unsigned int*/ _2, /*simm12*/ _3) ((void)__builtin_loongarch_cacop((_1), (unsigned int)(_2), (_3))) ++ ++/* Assembly instruction format: op, rj, si12 */ ++/* Data types in instruction templates: uimm5, unsigned long int, simm12 */ ++#define __dcacop(/*uimm5*/ _1, /*unsigned long int*/ _2, /*simm12*/ _3) ((void)__builtin_loongarch_dcacop((_1), (unsigned long int)(_2), (_3))) ++ ++#define __rdtime_d __builtin_loongarch_rdtime_d ++#define __rdtimel_w __builtin_loongarch_rdtimel_w ++#define __rdtimeh_w __builtin_loongarch_rdtimeh_w ++ ++extern __inline __drdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_rdtime_d (void) ++{ ++ __drdtime_t drdtime; ++ __asm__ volatile ( ++ "rdtime.d\t%[val],%[tid]\n\t" ++ : [val]"=&r"(drdtime.dvalue),[tid]"=&r"(drdtime.dtimeid) ++ : ++ ); ++ return drdtime; ++} ++ ++extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_rdtimeh_w (void) ++{ ++ __rdtime_t rdtime; ++ __asm__ volatile ( ++ "rdtimeh.w\t%[val],%[tid]\n\t" ++ : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid) ++ : ++ ); ++ return rdtime; ++} ++ ++extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++__builtin_loongarch_rdtimel_w (void) ++{ ++ __rdtime_t rdtime; ++ __asm__ volatile ( ++ "rdtimel.w\t%[val],%[tid]\n\t" ++ : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid) ++ : ++ ); ++ return rdtime; ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, char, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_b_w(char _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_b_w((char)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, short, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_h_w(short _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_h_w((short)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, int, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_w_w(int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_w_w((int)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, long int, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crc_w_d_w(long int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crc_w_d_w((long int)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, char, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_b_w(char _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_b_w((char)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, short, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_h_w(short _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_h_w((short)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, int, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_w_w(int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_w_w((int)_1, (int)_2); ++} ++ ++/* Assembly instruction format: rd, rj, rk */ ++/* Data types in instruction templates: int, long int, int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++int __crcc_w_d_w(long int _1, int _2) ++{ ++ return (int)__builtin_loongarch_crcc_w_d_w((long int)_1, (int)_2); ++} ++ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbclr() ++{ ++ return (void)__builtin_loongarch_tlbclr(); ++} ++ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbflush() ++{ ++ return (void)__builtin_loongarch_tlbflush(); ++} ++ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbfill() ++{ ++ return (void)__builtin_loongarch_tlbfill(); ++} ++ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbrd() ++{ ++ return (void)__builtin_loongarch_tlbrd(); ++} ++ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbwr() ++{ ++ return (void)__builtin_loongarch_tlbwr(); ++} ++ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __tlbsrch() ++{ ++ return (void)__builtin_loongarch_tlbsrch(); ++} ++ ++/* Assembly instruction format: code */ ++/* Data types in instruction templates: uimm15 */ ++#define __syscall(/*uimm15*/ _1) ((void)__builtin_loongarch_syscall(_1)) ++ ++/* Assembly instruction format: code */ ++/* Data types in instruction templates: uimm15 */ ++#define __break(/*uimm15*/ _1) ((void)__builtin_loongarch_break(_1)) ++ ++/* Assembly instruction format: hint */ ++/* Data types in instruction templates: uimm15 */ ++#define __dbar(/*uimm15*/ _1) ((void)__builtin_loongarch_dbar(_1)) ++ ++/* Assembly instruction format: hint */ ++/* Data types in instruction templates: uimm15 */ ++#define __ibar(/*uimm15*/ _1) ((void)__builtin_loongarch_ibar(_1)) ++ ++/* Assembly instruction format: rj, rk */ ++/* Data types in instruction templates: long int, long int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __asrtle_d(long int _1, long int _2) ++{ ++ return (void)__builtin_loongarch_asrtle_d((long int)_1, (long int)_2); ++} ++ ++/* Assembly instruction format: rj, rk */ ++/* Data types in instruction templates: long int, long int */ ++extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++void __asrtgt_d(long int _1, long int _2) ++{ ++ return (void)__builtin_loongarch_asrtgt_d((long int)_1, (long int)_2); ++} ++ ++#define __movfcsr2gr(uimm5) \ ++({ \ ++ unsigned int rd; \ ++ __asm__ volatile ( \ ++ "movfcsr2gr %0, $fcsr" #uimm5 \ ++ : "=&r"(rd) \ ++ : \ ++ ); rd; \ ++}) ++ ++#define __movgr2fcsr(uimm5, rj) \ ++{ \ ++ __asm__ volatile ( \ ++ "movgr2fcsr $fcsr" #uimm5 ", %0" \ ++ : \ ++ : "r" (rj) \ ++ ); \ ++} ++ ++#ifdef __cplusplus ++} ++#endif ++#endif /* __LOONGARCH_BASE_H */ +diff --git a/lib/Headers/lasxintrin.h b/lib/Headers/lasxintrin.h +new file mode 100644 +index 00000000..48a0a176 +--- /dev/null ++++ b/lib/Headers/lasxintrin.h +@@ -0,0 +1,5349 @@ ++//===----------- lasxintrin.h - LoongArch LASX intrinsics ++//------------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch LASX intrinsics. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef _GCC_LOONGSON_ASXINTRIN_H ++#define _GCC_LOONGSON_ASXINTRIN_H 1 ++ ++#if defined(__loongarch_asx) ++ ++typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); ++typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); ++typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); ++typedef short v16i16 __attribute__((vector_size(32), aligned(32))); ++typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); ++typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); ++typedef int v8i32 __attribute__((vector_size(32), aligned(32))); ++typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); ++typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); ++typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); ++typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); ++typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); ++typedef float v8f32 __attribute__((vector_size(32), aligned(32))); ++typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++typedef float __m256 __attribute__((__vector_size__(32), __may_alias__)); ++typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__)); ++typedef double __m256d __attribute__((__vector_size__(32), __may_alias__)); ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3 */ ++/* Data types in instruction templates: V32QI, V32QI, UQI */ ++#define __lasx_xvslli_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: V16HI, V16HI, UQI */ ++#define __lasx_xvslli_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V8SI, V8SI, UQI */ ++#define __lasx_xvslli_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: V4DI, V4DI, UQI */ ++#define __lasx_xvslli_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3 */ ++/* Data types in instruction templates: V32QI, V32QI, UQI */ ++#define __lasx_xvsrai_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: V16HI, V16HI, UQI */ ++#define __lasx_xvsrai_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V8SI, V8SI, UQI */ ++#define __lasx_xvsrai_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: V4DI, V4DI, UQI */ ++#define __lasx_xvsrai_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3 */ ++/* Data types in instruction templates: V32QI, V32QI, UQI */ ++#define __lasx_xvsrari_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: V16HI, V16HI, UQI */ ++#define __lasx_xvsrari_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V8SI, V8SI, UQI */ ++#define __lasx_xvsrari_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: V4DI, V4DI, UQI */ ++#define __lasx_xvsrari_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3 */ ++/* Data types in instruction templates: V32QI, V32QI, UQI */ ++#define __lasx_xvsrli_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: V16HI, V16HI, UQI */ ++#define __lasx_xvsrli_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V8SI, V8SI, UQI */ ++#define __lasx_xvsrli_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: V4DI, V4DI, UQI */ ++#define __lasx_xvsrli_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3 */ ++/* Data types in instruction templates: V32QI, V32QI, UQI */ ++#define __lasx_xvsrlri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: V16HI, V16HI, UQI */ ++#define __lasx_xvsrlri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V8SI, V8SI, UQI */ ++#define __lasx_xvsrlri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: V4DI, V4DI, UQI */ ++#define __lasx_xvsrlri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_b((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3 */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI */ ++#define __lasx_xvbitclri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI */ ++#define __lasx_xvbitclri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_h((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI */ ++#define __lasx_xvbitclri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_w((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI */ ++#define __lasx_xvbitclri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_d((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_b((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3 */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI */ ++#define __lasx_xvbitseti_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI */ ++#define __lasx_xvbitseti_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_h((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI */ ++#define __lasx_xvbitseti_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_w((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI */ ++#define __lasx_xvbitseti_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_d((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_b((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3 */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI */ ++#define __lasx_xvbitrevi_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI */ ++#define __lasx_xvbitrevi_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_h((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI */ ++#define __lasx_xvbitrevi_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_w((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI */ ++#define __lasx_xvbitrevi_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_d((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V32QI, V32QI, UQI */ ++#define __lasx_xvaddi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_bu((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V16HI, V16HI, UQI */ ++#define __lasx_xvaddi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_hu((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V8SI, V8SI, UQI */ ++#define __lasx_xvaddi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_wu((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V4DI, V4DI, UQI */ ++#define __lasx_xvaddi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_du((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V32QI, V32QI, UQI */ ++#define __lasx_xvsubi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_bu((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V16HI, V16HI, UQI */ ++#define __lasx_xvsubi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_hu((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V8SI, V8SI, UQI */ ++#define __lasx_xvsubi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_wu((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V4DI, V4DI, UQI */ ++#define __lasx_xvsubi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_du((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V32QI, V32QI, QI */ ++#define __lasx_xvmaxi_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V16HI, V16HI, QI */ ++#define __lasx_xvmaxi_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V8SI, V8SI, QI */ ++#define __lasx_xvmaxi_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V4DI, V4DI, QI */ ++#define __lasx_xvmaxi_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI */ ++#define __lasx_xvmaxi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_bu((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI */ ++#define __lasx_xvmaxi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_hu((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI */ ++#define __lasx_xvmaxi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_wu((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI */ ++#define __lasx_xvmaxi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_du((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V32QI, V32QI, QI */ ++#define __lasx_xvmini_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V16HI, V16HI, QI */ ++#define __lasx_xvmini_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V8SI, V8SI, QI */ ++#define __lasx_xvmini_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V4DI, V4DI, QI */ ++#define __lasx_xvmini_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI */ ++#define __lasx_xvmini_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_bu((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI */ ++#define __lasx_xvmini_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_hu((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI */ ++#define __lasx_xvmini_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_wu((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI */ ++#define __lasx_xvmini_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_du((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V32QI, V32QI, QI */ ++#define __lasx_xvseqi_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V16HI, V16HI, QI */ ++#define __lasx_xvseqi_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V8SI, V8SI, QI */ ++#define __lasx_xvseqi_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V4DI, V4DI, QI */ ++#define __lasx_xvseqi_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V32QI, V32QI, QI */ ++#define __lasx_xvslti_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V16HI, V16HI, QI */ ++#define __lasx_xvslti_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V8SI, V8SI, QI */ ++#define __lasx_xvslti_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V4DI, V4DI, QI */ ++#define __lasx_xvslti_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V32QI, UV32QI, UQI */ ++#define __lasx_xvslti_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_bu((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V16HI, UV16HI, UQI */ ++#define __lasx_xvslti_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_hu((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V8SI, UV8SI, UQI */ ++#define __lasx_xvslti_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_wu((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V4DI, UV4DI, UQI */ ++#define __lasx_xvslti_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_du((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V32QI, V32QI, QI */ ++#define __lasx_xvslei_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V16HI, V16HI, QI */ ++#define __lasx_xvslei_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V8SI, V8SI, QI */ ++#define __lasx_xvslei_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, si5 */ ++/* Data types in instruction templates: V4DI, V4DI, QI */ ++#define __lasx_xvslei_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V32QI, UV32QI, UQI */ ++#define __lasx_xvslei_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_bu((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V16HI, UV16HI, UQI */ ++#define __lasx_xvslei_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_hu((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V8SI, UV8SI, UQI */ ++#define __lasx_xvslei_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_wu((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V4DI, UV4DI, UQI */ ++#define __lasx_xvslei_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_du((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui3 */ ++/* Data types in instruction templates: V32QI, V32QI, UQI */ ++#define __lasx_xvsat_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: V16HI, V16HI, UQI */ ++#define __lasx_xvsat_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V8SI, V8SI, UQI */ ++#define __lasx_xvsat_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: V4DI, V4DI, UQI */ ++#define __lasx_xvsat_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui3 */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI */ ++#define __lasx_xvsat_bu(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_bu((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: UV16HI, UV16HI, UQI */ ++#define __lasx_xvsat_hu(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_hu((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: UV8SI, UV8SI, UQI */ ++#define __lasx_xvsat_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_wu((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: UV4DI, UV4DI, UQI */ ++#define __lasx_xvsat_du(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_du((v4u64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_w((v8i32)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_w((v8i32)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_hu_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_hu_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_wu_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_wu_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_du_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_du_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_hu_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_hu_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_wu_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_wu_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_du_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_du_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: V32QI, V32QI, UQI */ ++#define __lasx_xvrepl128vei_b(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui3 */ ++/* Data types in instruction templates: V16HI, V16HI, UQI */ ++#define __lasx_xvrepl128vei_h(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui2 */ ++/* Data types in instruction templates: V8SI, V8SI, UQI */ ++#define __lasx_xvrepl128vei_w(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui1 */ ++/* Data types in instruction templates: V4DI, V4DI, UQI */ ++#define __lasx_xvrepl128vei_d(/*__m256i*/ _1, /*ui1*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_w((v8i32)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvand_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvand_v((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui8 */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI */ ++#define __lasx_xvandi_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvandi_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvor_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvor_v((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui8 */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI */ ++#define __lasx_xvori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvori_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvnor_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvnor_v((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui8 */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI */ ++#define __lasx_xvnori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvnori_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvxor_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvxor_v((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui8 */ ++/* Data types in instruction templates: UV32QI, UV32QI, UQI */ ++#define __lasx_xvxori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvxori_b((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk, xa */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitsel_v(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvbitsel_v((v32u8)_1, (v32u8)_2, (v32u8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, ui8 */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI, UQI */ ++#define __lasx_xvbitseli_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvbitseli_b((v32u8)(_1), (v32u8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui8 */ ++/* Data types in instruction templates: V32QI, V32QI, USI */ ++#define __lasx_xvshuf4i_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui8 */ ++/* Data types in instruction templates: V16HI, V16HI, USI */ ++#define __lasx_xvshuf4i_h(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui8 */ ++/* Data types in instruction templates: V8SI, V8SI, USI */ ++#define __lasx_xvshuf4i_w(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, rj */ ++/* Data types in instruction templates: V32QI, SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_b(int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_b((int)_1); ++} ++ ++/* Assembly instruction format: xd, rj */ ++/* Data types in instruction templates: V16HI, SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_h(int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_h((int)_1); ++} ++ ++/* Assembly instruction format: xd, rj */ ++/* Data types in instruction templates: V8SI, SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_w(int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_w((int)_1); ++} ++ ++/* Assembly instruction format: xd, rj */ ++/* Data types in instruction templates: V4DI, DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_d(long int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_d((long int)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_caf_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_caf_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_caf_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_caf_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cor_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cor_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cor_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cor_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cun_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cun_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cun_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cun_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cune_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cune_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cune_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cune_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cueq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cueq_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cueq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cueq_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_ceq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_ceq_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_ceq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_ceq_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cne_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cne_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cne_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cne_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_clt_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_clt_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_clt_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_clt_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cult_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cult_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cult_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cult_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cle_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cle_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cle_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cle_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cule_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cule_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cule_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cule_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_saf_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_saf_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_saf_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_saf_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sor_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sor_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sor_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sor_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sun_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sun_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sun_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sun_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sune_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sune_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sune_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sune_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sueq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sueq_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sueq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sueq_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_seq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_seq_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_seq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_seq_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sne_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sne_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sne_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sne_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_slt_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_slt_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_slt_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_slt_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sult_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sult_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sult_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sult_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sle_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sle_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sle_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sle_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sule_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sule_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sule_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sule_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfadd_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfadd_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfadd_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfadd_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfsub_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfsub_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfsub_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfsub_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmul_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmul_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmul_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmul_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfdiv_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfdiv_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfdiv_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfdiv_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcvt_h_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcvt_h_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SF, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfcvt_s_d(__m256d _1, __m256d _2) { ++ return (__m256)__builtin_lasx_xvfcvt_s_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmin_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmin_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmin_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmin_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmina_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmina_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmina_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmina_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmax_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmax_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmax_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmax_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmaxa_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmaxa_s((v8f32)_1, (v8f32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmaxa_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmaxa_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfclass_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvfclass_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfclass_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvfclass_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfsqrt_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfsqrt_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfsqrt_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfsqrt_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrecip_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrecip_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrecip_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrint_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrint_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrint_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrint_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrsqrt_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrsqrt_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrsqrt_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvflogb_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvflogb_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvflogb_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvflogb_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SF, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfcvth_s_h(__m256i _1) { ++ return (__m256)__builtin_lasx_xvfcvth_s_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfcvth_d_s(__m256 _1) { ++ return (__m256d)__builtin_lasx_xvfcvth_d_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SF, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfcvtl_s_h(__m256i _1) { ++ return (__m256)__builtin_lasx_xvfcvtl_s_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfcvtl_d_s(__m256 _1) { ++ return (__m256d)__builtin_lasx_xvfcvtl_d_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftint_w_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftint_l_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: UV8SI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_wu_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftint_wu_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: UV4DI, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_lu_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftint_lu_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_w_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_l_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: UV8SI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_wu_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_wu_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: UV4DI, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_lu_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_lu_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SF, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvffint_s_w(__m256i _1) { ++ return (__m256)__builtin_lasx_xvffint_s_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DF, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffint_d_l(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffint_d_l((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SF, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvffint_s_wu(__m256i _1) { ++ return (__m256)__builtin_lasx_xvffint_s_wu((v8u32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DF, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffint_d_lu(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffint_d_lu((v4u64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, rk */ ++/* Data types in instruction templates: V32QI, V32QI, SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_b(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_b((v32i8)_1, (int)_2); ++} ++ ++/* Assembly instruction format: xd, xj, rk */ ++/* Data types in instruction templates: V16HI, V16HI, SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_h(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_h((v16i16)_1, (int)_2); ++} ++ ++/* Assembly instruction format: xd, xj, rk */ ++/* Data types in instruction templates: V8SI, V8SI, SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_w(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_w((v8i32)_1, (int)_2); ++} ++ ++/* Assembly instruction format: xd, xj, rk */ ++/* Data types in instruction templates: V4DI, V4DI, SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_d(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_d((v4i64)_1, (int)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui8 */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI */ ++#define __lasx_xvpermi_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvpermi_w((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvandn_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvandn_v((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui3 */ ++/* Data types in instruction templates: V16HI, V32QI, UQI */ ++#define __lasx_xvsllwil_h_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_h_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: V8SI, V16HI, UQI */ ++#define __lasx_xvsllwil_w_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_w_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V4DI, V8SI, UQI */ ++#define __lasx_xvsllwil_d_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_d_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui3 */ ++/* Data types in instruction templates: UV16HI, UV32QI, UQI */ ++#define __lasx_xvsllwil_hu_bu(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_hu_bu((v32u8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: UV8SI, UV16HI, UQI */ ++#define __lasx_xvsllwil_wu_hu(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_wu_hu((v16u16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: UV4DI, UV8SI, UQI */ ++#define __lasx_xvsllwil_du_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_du_wu((v8u32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsran_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsran_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsran_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsran_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsran_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsran_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrarn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrarn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrarn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrarn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrarn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrarn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrln_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrln_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrln_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrln_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrln_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrln_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlrn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlrn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlrn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlrn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlrn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlrn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV32QI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, UQI */ ++#define __lasx_xvfrstpi_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvfrstpi_b((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, UQI */ ++#define __lasx_xvfrstpi_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvfrstpi_h((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrstp_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvfrstp_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrstp_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvfrstp_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, ui8 */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI */ ++#define __lasx_xvshuf4i_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_d((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V32QI, V32QI, UQI */ ++#define __lasx_xvbsrl_v(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbsrl_v((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V32QI, V32QI, UQI */ ++#define __lasx_xvbsll_v(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbsll_v((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui8 */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, UQI */ ++#define __lasx_xvextrins_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_b((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui8 */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, UQI */ ++#define __lasx_xvextrins_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_h((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui8 */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, UQI */ ++#define __lasx_xvextrins_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_w((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui8 */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, UQI */ ++#define __lasx_xvextrins_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_d((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmadd_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmadd_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmsub_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmsub_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfnmadd_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfnmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfnmadd_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfnmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa */ ++/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfnmsub_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfnmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk, xa */ ++/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfnmsub_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfnmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrne_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrne_w_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrne_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrne_l_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrp_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrp_w_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrp_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrp_l_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrm_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrm_w_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrm_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrm_l_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftint_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SF, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvffint_s_l(__m256i _1, __m256i _2) { ++ return (__m256)__builtin_lasx_xvffint_s_l((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrz_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrp_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrp_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrm_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrm_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V4DF, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrne_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrne_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftinth_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftinth_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintl_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintl_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DF, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffinth_d_w(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffinth_d_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DF, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffintl_d_w(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffintl_d_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrzh_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrzh_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrzl_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrzl_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrph_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrph_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrpl_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrpl_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrmh_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrmh_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrml_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrml_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrneh_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrneh_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrnel_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrnel_l_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrintrne_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvfrintrne_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrintrne_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvfrintrne_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrintrz_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvfrintrz_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrintrz_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvfrintrz_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrintrp_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvfrintrp_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrintrp_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvfrintrp_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V8SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrintrm_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvfrintrm_s((v8f32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V4DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrintrm_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvfrintrm_d((v4f64)_1); ++} ++ ++/* Assembly instruction format: xd, rj, si12 */ ++/* Data types in instruction templates: V32QI, CVPOINTER, SI */ ++#define __lasx_xvld(/*void **/ _1, /*si12*/ _2) \ ++ ((__m256i)__builtin_lasx_xvld((void *)(_1), (_2))) ++ ++/* Assembly instruction format: xd, rj, si12 */ ++/* Data types in instruction templates: VOID, V32QI, CVPOINTER, SI */ ++#define __lasx_xvst(/*__m256i*/ _1, /*void **/ _2, /*si12*/ _3) \ ++ ((void)__builtin_lasx_xvst((v32i8)(_1), (void *)(_2), (_3))) ++ ++/* Assembly instruction format: xd, rj, si8, idx */ ++/* Data types in instruction templates: VOID, V32QI, CVPOINTER, SI, UQI */ ++#define __lasx_xvstelm_b(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_b((v32i8)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: xd, rj, si8, idx */ ++/* Data types in instruction templates: VOID, V16HI, CVPOINTER, SI, UQI */ ++#define __lasx_xvstelm_h(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_h((v16i16)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: xd, rj, si8, idx */ ++/* Data types in instruction templates: VOID, V8SI, CVPOINTER, SI, UQI */ ++#define __lasx_xvstelm_w(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_w((v8i32)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: xd, rj, si8, idx */ ++/* Data types in instruction templates: VOID, V4DI, CVPOINTER, SI, UQI */ ++#define __lasx_xvstelm_d(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_d((v4i64)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: xd, xj, ui3 */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, UQI */ ++#define __lasx_xvinsve0_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui3*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsve0_w((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui2 */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, UQI */ ++#define __lasx_xvinsve0_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui2*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsve0_d((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui3 */ ++/* Data types in instruction templates: V8SI, V8SI, UQI */ ++#define __lasx_xvpickve_w(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvpickve_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui2 */ ++/* Data types in instruction templates: V4DI, V4DI, UQI */ ++#define __lasx_xvpickve_d(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((__m256i)__builtin_lasx_xvpickve_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvorn_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvorn_v((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, i13 */ ++/* Data types in instruction templates: V4DI, HI */ ++#define __lasx_xvldi(/*i13*/ _1) ((__m256i)__builtin_lasx_xvldi((_1))) ++ ++/* Assembly instruction format: xd, rj, rk */ ++/* Data types in instruction templates: V32QI, CVPOINTER, DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvldx(void *_1, long int _2) { ++ return (__m256i)__builtin_lasx_xvldx((void *)_1, (long int)_2); ++} ++ ++/* Assembly instruction format: xd, rj, rk */ ++/* Data types in instruction templates: VOID, V32QI, CVPOINTER, DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void ++ __lasx_xvstx(__m256i _1, void *_2, long int _3) { ++ return (void)__builtin_lasx_xvstx((v32i8)_1, (void *)_2, (long int)_3); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvextl_qu_du(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvextl_qu_du((v4u64)_1); ++} ++ ++/* Assembly instruction format: xd, rj, ui3 */ ++/* Data types in instruction templates: V8SI, V8SI, SI, UQI */ ++#define __lasx_xvinsgr2vr_w(/*__m256i*/ _1, /*int*/ _2, /*ui3*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsgr2vr_w((v8i32)(_1), (int)(_2), (_3))) ++ ++/* Assembly instruction format: xd, rj, ui2 */ ++/* Data types in instruction templates: V4DI, V4DI, DI, UQI */ ++#define __lasx_xvinsgr2vr_d(/*__m256i*/ _1, /*long int*/ _2, /*ui2*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsgr2vr_d((v4i64)(_1), (long int)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_q(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_q((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V16HI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_h_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_h_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_w_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_w_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_d_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_d_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_w_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_w_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_d_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_d_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_d_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_d_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V16HI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_hu_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_hu_bu((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_wu_hu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_wu_hu((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_du_wu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_du_wu((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_wu_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_wu_bu((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_du_hu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_du_hu((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_du_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_du_bu((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj, ui8 */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI */ ++#define __lasx_xvpermi_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvpermi_q((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui8 */ ++/* Data types in instruction templates: V4DI, V4DI, USI */ ++#define __lasx_xvpermi_d(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvpermi_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvperm_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvperm_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, rj, si12 */ ++/* Data types in instruction templates: V32QI, CVPOINTER, SI */ ++#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_b((void *)(_1), (_2))) ++ ++/* Assembly instruction format: xd, rj, si11 */ ++/* Data types in instruction templates: V16HI, CVPOINTER, SI */ ++#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_h((void *)(_1), (_2))) ++ ++/* Assembly instruction format: xd, rj, si10 */ ++/* Data types in instruction templates: V8SI, CVPOINTER, SI */ ++#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_w((void *)(_1), (_2))) ++ ++/* Assembly instruction format: xd, rj, si9 */ ++/* Data types in instruction templates: V4DI, CVPOINTER, SI */ ++#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_d((void *)(_1), (_2))) ++ ++/* Assembly instruction format: rd, xj, ui3 */ ++/* Data types in instruction templates: SI, V8SI, UQI */ ++#define __lasx_xvpickve2gr_w(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((int)__builtin_lasx_xvpickve2gr_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: rd, xj, ui3 */ ++/* Data types in instruction templates: USI, V8SI, UQI */ ++#define __lasx_xvpickve2gr_wu(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((unsigned int)__builtin_lasx_xvpickve2gr_wu((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: rd, xj, ui2 */ ++/* Data types in instruction templates: DI, V4DI, UQI */ ++#define __lasx_xvpickve2gr_d(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((long int)__builtin_lasx_xvpickve2gr_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: rd, xj, ui2 */ ++/* Data types in instruction templates: UDI, V4DI, UQI */ ++#define __lasx_xvpickve2gr_du(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((unsigned long int)__builtin_lasx_xvpickve2gr_du((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, UV16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, UV32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, UV16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, UV32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, UV16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, UV32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, UV16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, UV32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_qu_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_qu_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_qu_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_qu_du((v4u64)_1, (v4u64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_q_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_d_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_w_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_w_h((v8i32)_1, (v16i16)_2, ++ (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_h_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_h_b((v16i16)_1, (v32i8)_2, ++ (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_q_du(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_q_du((v4u64)_1, (v4u64)_2, ++ (v4u64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_d_wu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_d_wu((v4u64)_1, (v8u32)_2, ++ (v8u32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_w_hu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_w_hu((v8u32)_1, (v16u16)_2, ++ (v16u16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_h_bu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_h_bu((v16u16)_1, (v32u8)_2, ++ (v32u8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_q_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_d_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_w_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_w_h((v8i32)_1, (v16i16)_2, ++ (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_h_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_h_b((v16i16)_1, (v32i8)_2, ++ (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV4DI, UV4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_q_du(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_q_du((v4u64)_1, (v4u64)_2, ++ (v4u64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV4DI, UV4DI, UV8SI, UV8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_d_wu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_d_wu((v4u64)_1, (v8u32)_2, ++ (v8u32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV8SI, UV8SI, UV16HI, UV16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_w_hu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_w_hu((v8u32)_1, (v16u16)_2, ++ (v16u16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: UV16HI, UV16HI, UV32QI, UV32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_h_bu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_h_bu((v16u16)_1, (v32u8)_2, ++ (v32u8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, UV4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_q_du_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_q_du_d((v4i64)_1, (v4u64)_2, ++ (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, UV8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_d_wu_w((v4i64)_1, (v8u32)_2, ++ (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, UV16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_w_hu_h((v8i32)_1, (v16u16)_2, ++ (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, UV32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_h_bu_b((v16i16)_1, (v32u8)_2, ++ (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, UV4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_q_du_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_q_du_d((v4i64)_1, (v4u64)_2, ++ (v4i64)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, UV8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_d_wu_w((v4i64)_1, (v8u32)_2, ++ (v8i32)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, UV16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_w_hu_h((v8i32)_1, (v16u16)_2, ++ (v16i16)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, UV32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_h_bu_b((v16i16)_1, (v32u8)_2, ++ (v32i8)_3); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_b((v32i8)_1, (v32i8)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_h((v16i16)_1, (v16i16)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_w((v8i32)_1, (v8i32)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_d((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_q(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_q((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_q(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_q((v4i64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj, xk */ ++/* Data types in instruction templates: V4DI, UV4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskgez_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskgez_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V32QI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsknz_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmsknz_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V16HI, V32QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_h_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_h_b((v32i8)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V8SI, V16HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_w_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_w_h((v16i16)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V8SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_d_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_d_w((v8i32)_1); ++} ++ ++/* Assembly instruction format: xd, xj */ ++/* Data types in instruction templates: V4DI, V4DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_q_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_q_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV16HI, UV32QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_hu_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_hu_bu((v32u8)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV8SI, UV16HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_wu_hu(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_wu_hu((v16u16)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV4DI, UV8SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_du_wu(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_du_wu((v8u32)_1); ++} ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: UV4DI, UV4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_qu_du(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_qu_du((v4u64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, ui3 */ ++/* Data types in instruction templates: V32QI, V32QI, UQI */ ++#define __lasx_xvrotri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_b((v32i8)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: V16HI, V16HI, UQI */ ++#define __lasx_xvrotri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_h((v16i16)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V8SI, V8SI, UQI */ ++#define __lasx_xvrotri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_w((v8i32)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: V4DI, V4DI, UQI */ ++#define __lasx_xvrotri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_d((v4i64)(_1), (_2))) ++ ++/* Assembly instruction format: xd, xj. */ ++/* Data types in instruction templates: V4DI, V4DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvextl_q_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvextl_q_d((v4i64)_1); ++} ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI */ ++#define __lasx_xvsrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI */ ++#define __lasx_xvsrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI */ ++#define __lasx_xvsrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7 */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI */ ++#define __lasx_xvsrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI */ ++#define __lasx_xvsrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI */ ++#define __lasx_xvsrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI */ ++#define __lasx_xvsrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7 */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI */ ++#define __lasx_xvsrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI */ ++#define __lasx_xvssrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI */ ++#define __lasx_xvssrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI */ ++#define __lasx_xvssrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7 */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI */ ++#define __lasx_xvssrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI */ ++#define __lasx_xvssrlni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI */ ++#define __lasx_xvssrlni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI */ ++#define __lasx_xvssrlni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7 */ ++/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI */ ++#define __lasx_xvssrlni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI */ ++#define __lasx_xvssrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI */ ++#define __lasx_xvssrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI */ ++#define __lasx_xvssrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7 */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI */ ++#define __lasx_xvssrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI */ ++#define __lasx_xvssrlrni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI */ ++#define __lasx_xvssrlrni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI */ ++#define __lasx_xvssrlrni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7 */ ++/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI */ ++#define __lasx_xvssrlrni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI */ ++#define __lasx_xvsrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI */ ++#define __lasx_xvsrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI */ ++#define __lasx_xvsrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7 */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI */ ++#define __lasx_xvsrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI */ ++#define __lasx_xvsrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI */ ++#define __lasx_xvsrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI */ ++#define __lasx_xvsrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7 */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI */ ++#define __lasx_xvsrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI */ ++#define __lasx_xvssrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI */ ++#define __lasx_xvssrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI */ ++#define __lasx_xvssrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7 */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI */ ++#define __lasx_xvssrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI */ ++#define __lasx_xvssrani_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI */ ++#define __lasx_xvssrani_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI */ ++#define __lasx_xvssrani_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7 */ ++/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI */ ++#define __lasx_xvssrani_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: V32QI, V32QI, V32QI, USI */ ++#define __lasx_xvssrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: V16HI, V16HI, V16HI, USI */ ++#define __lasx_xvssrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: V8SI, V8SI, V8SI, USI */ ++#define __lasx_xvssrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7 */ ++/* Data types in instruction templates: V4DI, V4DI, V4DI, USI */ ++#define __lasx_xvssrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui4 */ ++/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI */ ++#define __lasx_xvssrarni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui5 */ ++/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI */ ++#define __lasx_xvssrarni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui6 */ ++/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI */ ++#define __lasx_xvssrarni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++/* Assembly instruction format: xd, xj, ui7 */ ++/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI */ ++#define __lasx_xvssrarni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lasx_xbnz_v(__m256i _1) { ++ return __builtin_lasx_xbnz_v((v32u8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lasx_xbz_v(__m256i _1) { ++ return __builtin_lasx_xbz_v((v32u8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lasx_xbnz_b(__m256i _1) { ++ return __builtin_lasx_xbnz_b((v32u8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lasx_xbnz_h(__m256i _1) { ++ return __builtin_lasx_xbnz_h((v16u16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lasx_xbnz_w(__m256i _1) { ++ return __builtin_lasx_xbnz_w((v8u32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lasx_xbnz_d(__m256i _1) { ++ return __builtin_lasx_xbnz_d((v4u64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lasx_xbz_b(__m256i _1) { ++ return __builtin_lasx_xbz_b((v32u8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lasx_xbz_h(__m256i _1) { ++ return __builtin_lasx_xbz_h((v16u16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lasx_xbz_w(__m256i _1) { ++ return __builtin_lasx_xbz_w((v8u32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lasx_xbz_d(__m256i _1) { ++ return __builtin_lasx_xbz_d((v4u64)_1); ++} ++ ++#if 0 ++/* Assembly instruction format: vd, i10 */ ++/* Data types in instruction templates: V32QI, i10 */ ++#define __lasx_xvrepli_b(/*i10*/ _1) ((__m256i)__builtin_lasx_xvrepli_b(_1) ++ ++/* Assembly instruction format: vd, i10 */ ++/* Data types in instruction templates: V16HI, i10 */ ++#define __lasx_xvrepli_h(/*i10*/ _1) ((__m256i)__builtin_lasx_xvrepli_h(_1) ++ ++/* Assembly instruction format: vd, i10 */ ++/* Data types in instruction templates: V8SI, i10 */ ++#define __lasx_xvrepli_w(/*i10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w(_1) ++ ++/* Assembly instruction format: vd, i10 */ ++/* Data types in instruction templates: V4DI, i10 */ ++#define __lasx_xvrepli_d(/*i10*/ _1) ((__m256i)__builtin_lasx_xvrepli_d(_1) ++#endif ++ ++#endif /* defined(__loongarch_asx) */ ++#endif /* _GCC_LOONGSON_ASXINTRIN_H */ +diff --git a/lib/Headers/lsxintrin.h b/lib/Headers/lsxintrin.h +new file mode 100644 +index 00000000..bd5f15a0 +--- /dev/null ++++ b/lib/Headers/lsxintrin.h +@@ -0,0 +1,5165 @@ ++//===----------- lsxintrin.h - LoongArch LSX intrinsics ------------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch LSX intrinsics. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef _GCC_LOONGSON_SXINTRIN_H ++#define _GCC_LOONGSON_SXINTRIN_H 1 ++ ++#if defined(__loongarch_sx) ++typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); ++typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); ++typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); ++typedef short v8i16 __attribute__((vector_size(16), aligned(16))); ++typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); ++typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); ++typedef int v4i32 __attribute__((vector_size(16), aligned(16))); ++typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); ++typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); ++typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); ++typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); ++typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); ++typedef float v4f32 __attribute__((vector_size(16), aligned(16))); ++typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); ++typedef double v2f64 __attribute__((vector_size(16), aligned(16))); ++typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); ++ ++typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); ++typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); ++typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3 */ ++/* Data types in instruction templates: V16QI, V16QI, UQI */ ++#define __lsx_vslli_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: V8HI, V8HI, UQI */ ++#define __lsx_vslli_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V4SI, V4SI, UQI */ ++#define __lsx_vslli_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: V2DI, V2DI, UQI */ ++#define __lsx_vslli_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3 */ ++/* Data types in instruction templates: V16QI, V16QI, UQI */ ++#define __lsx_vsrai_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: V8HI, V8HI, UQI */ ++#define __lsx_vsrai_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V4SI, V4SI, UQI */ ++#define __lsx_vsrai_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: V2DI, V2DI, UQI */ ++#define __lsx_vsrai_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3 */ ++/* Data types in instruction templates: V16QI, V16QI, UQI */ ++#define __lsx_vsrari_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: V8HI, V8HI, UQI */ ++#define __lsx_vsrari_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V4SI, V4SI, UQI */ ++#define __lsx_vsrari_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: V2DI, V2DI, UQI */ ++#define __lsx_vsrari_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3 */ ++/* Data types in instruction templates: V16QI, V16QI, UQI */ ++#define __lsx_vsrli_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: V8HI, V8HI, UQI */ ++#define __lsx_vsrli_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V4SI, V4SI, UQI */ ++#define __lsx_vsrli_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: V2DI, V2DI, UQI */ ++#define __lsx_vsrli_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3 */ ++/* Data types in instruction templates: V16QI, V16QI, UQI */ ++#define __lsx_vsrlri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: V8HI, V8HI, UQI */ ++#define __lsx_vsrlri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V4SI, V4SI, UQI */ ++#define __lsx_vsrlri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: V2DI, V2DI, UQI */ ++#define __lsx_vsrlri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_b((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3 */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI */ ++#define __lsx_vbitclri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI */ ++#define __lsx_vbitclri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_h((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI */ ++#define __lsx_vbitclri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_w((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI */ ++#define __lsx_vbitclri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_d((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_b((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3 */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI */ ++#define __lsx_vbitseti_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI */ ++#define __lsx_vbitseti_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_h((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI */ ++#define __lsx_vbitseti_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_w((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI */ ++#define __lsx_vbitseti_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_d((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_b((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3 */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI */ ++#define __lsx_vbitrevi_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI */ ++#define __lsx_vbitrevi_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_h((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI */ ++#define __lsx_vbitrevi_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_w((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI */ ++#define __lsx_vbitrevi_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_d((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V16QI, V16QI, UQI */ ++#define __lsx_vaddi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_bu((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V8HI, V8HI, UQI */ ++#define __lsx_vaddi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_hu((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V4SI, V4SI, UQI */ ++#define __lsx_vaddi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_wu((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V2DI, V2DI, UQI */ ++#define __lsx_vaddi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_du((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V16QI, V16QI, UQI */ ++#define __lsx_vsubi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_bu((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V8HI, V8HI, UQI */ ++#define __lsx_vsubi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_hu((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V4SI, V4SI, UQI */ ++#define __lsx_vsubi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_wu((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V2DI, V2DI, UQI */ ++#define __lsx_vsubi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_du((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V16QI, V16QI, QI */ ++#define __lsx_vmaxi_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V8HI, V8HI, QI */ ++#define __lsx_vmaxi_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V4SI, V4SI, QI */ ++#define __lsx_vmaxi_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V2DI, V2DI, QI */ ++#define __lsx_vmaxi_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI */ ++#define __lsx_vmaxi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_bu((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI */ ++#define __lsx_vmaxi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_hu((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI */ ++#define __lsx_vmaxi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_wu((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI */ ++#define __lsx_vmaxi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_du((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V16QI, V16QI, QI */ ++#define __lsx_vmini_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V8HI, V8HI, QI */ ++#define __lsx_vmini_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V4SI, V4SI, QI */ ++#define __lsx_vmini_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V2DI, V2DI, QI */ ++#define __lsx_vmini_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI */ ++#define __lsx_vmini_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_bu((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI */ ++#define __lsx_vmini_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_hu((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI */ ++#define __lsx_vmini_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_wu((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI */ ++#define __lsx_vmini_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_du((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V16QI, V16QI, QI */ ++#define __lsx_vseqi_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V8HI, V8HI, QI */ ++#define __lsx_vseqi_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V4SI, V4SI, QI */ ++#define __lsx_vseqi_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V2DI, V2DI, QI */ ++#define __lsx_vseqi_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V16QI, V16QI, QI */ ++#define __lsx_vslti_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V8HI, V8HI, QI */ ++#define __lsx_vslti_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V4SI, V4SI, QI */ ++#define __lsx_vslti_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V2DI, V2DI, QI */ ++#define __lsx_vslti_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V16QI, UV16QI, UQI */ ++#define __lsx_vslti_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_bu((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V8HI, UV8HI, UQI */ ++#define __lsx_vslti_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_hu((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V4SI, UV4SI, UQI */ ++#define __lsx_vslti_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_wu((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V2DI, UV2DI, UQI */ ++#define __lsx_vslti_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_du((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V16QI, V16QI, QI */ ++#define __lsx_vslei_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V8HI, V8HI, QI */ ++#define __lsx_vslei_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V4SI, V4SI, QI */ ++#define __lsx_vslei_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, si5 */ ++/* Data types in instruction templates: V2DI, V2DI, QI */ ++#define __lsx_vslei_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V16QI, UV16QI, UQI */ ++#define __lsx_vslei_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_bu((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V8HI, UV8HI, UQI */ ++#define __lsx_vslei_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_hu((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V4SI, UV4SI, UQI */ ++#define __lsx_vslei_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_wu((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V2DI, UV2DI, UQI */ ++#define __lsx_vslei_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_du((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui3 */ ++/* Data types in instruction templates: V16QI, V16QI, UQI */ ++#define __lsx_vsat_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: V8HI, V8HI, UQI */ ++#define __lsx_vsat_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V4SI, V4SI, UQI */ ++#define __lsx_vsat_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: V2DI, V2DI, UQI */ ++#define __lsx_vsat_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui3 */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI */ ++#define __lsx_vsat_bu(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_bu((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: UV8HI, UV8HI, UQI */ ++#define __lsx_vsat_hu(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_hu((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: UV4SI, UV4SI, UQI */ ++#define __lsx_vsat_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_wu((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: UV2DI, UV2DI, UQI */ ++#define __lsx_vsat_du(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_du((v2u64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_w((v4i32)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_w((v4i32)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_hu_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_hu_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_wu_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_wu_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_du_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_du_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_hu_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_hu_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_wu_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_wu_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_du_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_du_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, rk */ ++/* Data types in instruction templates: V16QI, V16QI, SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_b(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_b((v16i8)_1, (int)_2); ++} ++ ++/* Assembly instruction format: vd, vj, rk */ ++/* Data types in instruction templates: V8HI, V8HI, SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_h(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_h((v8i16)_1, (int)_2); ++} ++ ++/* Assembly instruction format: vd, vj, rk */ ++/* Data types in instruction templates: V4SI, V4SI, SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_w(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_w((v4i32)_1, (int)_2); ++} ++ ++/* Assembly instruction format: vd, vj, rk */ ++/* Data types in instruction templates: V2DI, V2DI, SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_d(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_d((v2i64)_1, (int)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: V16QI, V16QI, UQI */ ++#define __lsx_vreplvei_b(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui3 */ ++/* Data types in instruction templates: V8HI, V8HI, UQI */ ++#define __lsx_vreplvei_h(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui2 */ ++/* Data types in instruction templates: V4SI, V4SI, UQI */ ++#define __lsx_vreplvei_w(/*__m128i*/ _1, /*ui2*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui1 */ ++/* Data types in instruction templates: V2DI, V2DI, UQI */ ++#define __lsx_vreplvei_d(/*__m128i*/ _1, /*ui1*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_w((v4i32)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vand_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vand_v((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui8 */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI */ ++#define __lsx_vandi_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vandi_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vor_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vor_v((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui8 */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI */ ++#define __lsx_vori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vori_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vnor_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vnor_v((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui8 */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI */ ++#define __lsx_vnori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vnori_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vxor_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vxor_v((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui8 */ ++/* Data types in instruction templates: UV16QI, UV16QI, UQI */ ++#define __lsx_vxori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vxori_b((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk, va */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitsel_v(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vbitsel_v((v16u8)_1, (v16u8)_2, (v16u8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, ui8 */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI, UQI */ ++#define __lsx_vbitseli_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vbitseli_b((v16u8)(_1), (v16u8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui8 */ ++/* Data types in instruction templates: V16QI, V16QI, UQI */ ++#define __lsx_vshuf4i_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vshuf4i_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui8 */ ++/* Data types in instruction templates: V8HI, V8HI, UQI */ ++#define __lsx_vshuf4i_h(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vshuf4i_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui8 */ ++/* Data types in instruction templates: V4SI, V4SI, UQI */ ++#define __lsx_vshuf4i_w(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vshuf4i_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, rj */ ++/* Data types in instruction templates: V16QI, SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_b(int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_b((int)_1); ++} ++ ++/* Assembly instruction format: vd, rj */ ++/* Data types in instruction templates: V8HI, SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_h(int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_h((int)_1); ++} ++ ++/* Assembly instruction format: vd, rj */ ++/* Data types in instruction templates: V4SI, SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_w(int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_w((int)_1); ++} ++ ++/* Assembly instruction format: vd, rj */ ++/* Data types in instruction templates: V2DI, DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_d(long int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_d((long int)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: rd, vj, ui4 */ ++/* Data types in instruction templates: SI, V16QI, UQI */ ++#define __lsx_vpickve2gr_b(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((int)__builtin_lsx_vpickve2gr_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui3 */ ++/* Data types in instruction templates: SI, V8HI, UQI */ ++#define __lsx_vpickve2gr_h(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((int)__builtin_lsx_vpickve2gr_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui2 */ ++/* Data types in instruction templates: SI, V4SI, UQI */ ++#define __lsx_vpickve2gr_w(/*__m128i*/ _1, /*ui2*/ _2) \ ++ ((int)__builtin_lsx_vpickve2gr_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui1 */ ++/* Data types in instruction templates: DI, V2DI, UQI */ ++#define __lsx_vpickve2gr_d(/*__m128i*/ _1, /*ui1*/ _2) \ ++ ((long int)__builtin_lsx_vpickve2gr_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui4 */ ++/* Data types in instruction templates: USI, V16QI, UQI */ ++#define __lsx_vpickve2gr_bu(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((unsigned int)__builtin_lsx_vpickve2gr_bu((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui3 */ ++/* Data types in instruction templates: USI, V8HI, UQI */ ++#define __lsx_vpickve2gr_hu(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((unsigned int)__builtin_lsx_vpickve2gr_hu((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui2 */ ++/* Data types in instruction templates: USI, V4SI, UQI */ ++#define __lsx_vpickve2gr_wu(/*__m128i*/ _1, /*ui2*/ _2) \ ++ ((unsigned int)__builtin_lsx_vpickve2gr_wu((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: rd, vj, ui1 */ ++/* Data types in instruction templates: UDI, V2DI, UQI */ ++#define __lsx_vpickve2gr_du(/*__m128i*/ _1, /*ui1*/ _2) \ ++ ((unsigned long int)__builtin_lsx_vpickve2gr_du((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, rj, ui4 */ ++/* Data types in instruction templates: V16QI, V16QI, SI, UQI */ ++#define __lsx_vinsgr2vr_b(/*__m128i*/ _1, /*int*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_b((v16i8)(_1), (int)(_2), (_3))) ++ ++/* Assembly instruction format: vd, rj, ui3 */ ++/* Data types in instruction templates: V8HI, V8HI, SI, UQI */ ++#define __lsx_vinsgr2vr_h(/*__m128i*/ _1, /*int*/ _2, /*ui3*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_h((v8i16)(_1), (int)(_2), (_3))) ++ ++/* Assembly instruction format: vd, rj, ui2 */ ++/* Data types in instruction templates: V4SI, V4SI, SI, UQI */ ++#define __lsx_vinsgr2vr_w(/*__m128i*/ _1, /*int*/ _2, /*ui2*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_w((v4i32)(_1), (int)(_2), (_3))) ++ ++/* Assembly instruction format: vd, rj, ui1 */ ++/* Data types in instruction templates: V2DI, V2DI, SI, UQI */ ++#define __lsx_vinsgr2vr_d(/*__m128i*/ _1, /*long int*/ _2, /*ui1*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_d((v2i64)(_1), (long int)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_caf_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_caf_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_caf_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_caf_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cor_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cor_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cor_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cor_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cun_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cun_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cun_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cun_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cune_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cune_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cune_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cune_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cueq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cueq_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cueq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cueq_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_ceq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_ceq_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_ceq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_ceq_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cne_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cne_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cne_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cne_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_clt_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_clt_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_clt_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_clt_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cult_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cult_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cult_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cult_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cle_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cle_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cle_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cle_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cule_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cule_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cule_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cule_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_saf_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_saf_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_saf_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_saf_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sor_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sor_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sor_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sor_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sun_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sun_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sun_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sun_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sune_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sune_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sune_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sune_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sueq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sueq_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sueq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sueq_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_seq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_seq_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_seq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_seq_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sne_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sne_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sne_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sne_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_slt_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_slt_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_slt_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_slt_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sult_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sult_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sult_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sult_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sle_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sle_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sle_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sle_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sule_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sule_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sule_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sule_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfadd_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfadd_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfadd_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfadd_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfsub_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfsub_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfsub_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfsub_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmul_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmul_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmul_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmul_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfdiv_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfdiv_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfdiv_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfdiv_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcvt_h_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcvt_h_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SF, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfcvt_s_d(__m128d _1, __m128d _2) { ++ return (__m128)__builtin_lsx_vfcvt_s_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmin_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmin_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmin_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmin_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmina_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmina_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmina_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmina_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmax_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmax_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmax_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmax_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmaxa_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmaxa_s((v4f32)_1, (v4f32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmaxa_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmaxa_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfclass_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vfclass_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfclass_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vfclass_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfsqrt_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfsqrt_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfsqrt_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfsqrt_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrecip_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrecip_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrecip_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrint_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrint_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrint_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrint_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrsqrt_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrsqrt_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrsqrt_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vflogb_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vflogb_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vflogb_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vflogb_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SF, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfcvth_s_h(__m128i _1) { ++ return (__m128)__builtin_lsx_vfcvth_s_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfcvth_d_s(__m128 _1) { ++ return (__m128d)__builtin_lsx_vfcvth_d_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SF, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfcvtl_s_h(__m128i _1) { ++ return (__m128)__builtin_lsx_vfcvtl_s_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfcvtl_d_s(__m128 _1) { ++ return (__m128d)__builtin_lsx_vfcvtl_d_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftint_w_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftint_l_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: UV4SI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_wu_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftint_wu_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: UV2DI, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_lu_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftint_lu_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrz_w_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrz_l_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: UV4SI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_wu_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrz_wu_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: UV2DI, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_lu_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrz_lu_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SF, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vffint_s_w(__m128i _1) { ++ return (__m128)__builtin_lsx_vffint_s_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DF, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffint_d_l(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffint_d_l((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SF, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vffint_s_wu(__m128i _1) { ++ return (__m128)__builtin_lsx_vffint_s_wu((v4u32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DF, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffint_d_lu(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffint_d_lu((v2u64)_1); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vandn_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vandn_v((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui3 */ ++/* Data types in instruction templates: V8HI, V16QI, UQI */ ++#define __lsx_vsllwil_h_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_h_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: V4SI, V8HI, UQI */ ++#define __lsx_vsllwil_w_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_w_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V2DI, V4SI, UQI */ ++#define __lsx_vsllwil_d_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_d_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui3 */ ++/* Data types in instruction templates: UV8HI, UV16QI, UQI */ ++#define __lsx_vsllwil_hu_bu(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_hu_bu((v16u8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: UV4SI, UV8HI, UQI */ ++#define __lsx_vsllwil_wu_hu(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_wu_hu((v8u16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: UV2DI, UV4SI, UQI */ ++#define __lsx_vsllwil_du_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_du_wu((v4u32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsran_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsran_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsran_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsran_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsran_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsran_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrarn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrarn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrarn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrarn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrarn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrarn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrln_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrln_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrln_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrln_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrln_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrln_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlrn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlrn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlrn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlrn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlrn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlrn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV16QI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, UQI */ ++#define __lsx_vfrstpi_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vfrstpi_b((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, UQI */ ++#define __lsx_vfrstpi_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vfrstpi_h((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrstp_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vfrstp_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrstp_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vfrstp_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, ui8 */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI */ ++#define __lsx_vshuf4i_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vshuf4i_d((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V16QI, V16QI, UQI */ ++#define __lsx_vbsrl_v(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbsrl_v((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V16QI, V16QI, UQI */ ++#define __lsx_vbsll_v(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbsll_v((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui8 */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, UQI */ ++#define __lsx_vextrins_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_b((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui8 */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, UQI */ ++#define __lsx_vextrins_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_h((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui8 */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, UQI */ ++#define __lsx_vextrins_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_w((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui8 */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, UQI */ ++#define __lsx_vextrins_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_d((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmadd_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmadd_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmsub_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmsub_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfnmadd_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfnmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfnmadd_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfnmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va */ ++/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfnmsub_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfnmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk, va */ ++/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfnmsub_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfnmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrne_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrne_w_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrne_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrne_l_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrp_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrp_w_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrp_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrp_l_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrm_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrm_w_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrm_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrm_l_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftint_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SF, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vffint_s_l(__m128i _1, __m128i _2) { ++ return (__m128)__builtin_lsx_vffint_s_l((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrz_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrp_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrp_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrm_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrm_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V2DF, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrne_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrne_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintl_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintl_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftinth_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftinth_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DF, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffinth_d_w(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffinth_d_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DF, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffintl_d_w(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffintl_d_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrzl_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrzl_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrzh_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrzh_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrpl_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrpl_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrph_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrph_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrml_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrml_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrmh_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrmh_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrnel_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrnel_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrneh_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrneh_l_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrintrne_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vfrintrne_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrintrne_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vfrintrne_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrintrz_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vfrintrz_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrintrz_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vfrintrz_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrintrp_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vfrintrp_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrintrp_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vfrintrp_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SI, V4SF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrintrm_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vfrintrm_s((v4f32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V2DF */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrintrm_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vfrintrm_d((v2f64)_1); ++} ++ ++/* Assembly instruction format: vd, rj, si8, idx */ ++/* Data types in instruction templates: VOID, V16QI, CVPOINTER, SI, UQI */ ++#define __lsx_vstelm_b(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_b((v16i8)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: vd, rj, si8, idx */ ++/* Data types in instruction templates: VOID, V8HI, CVPOINTER, SI, UQI */ ++#define __lsx_vstelm_h(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_h((v8i16)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: vd, rj, si8, idx */ ++/* Data types in instruction templates: VOID, V4SI, CVPOINTER, SI, UQI */ ++#define __lsx_vstelm_w(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_w((v4i32)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: vd, rj, si8, idx */ ++/* Data types in instruction templates: VOID, V2DI, CVPOINTER, SI, UQI */ ++#define __lsx_vstelm_d(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_d((v2i64)(_1), (void *)(_2), (_3), (_4))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, UV8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, UV16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, UV8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, UV16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, UV8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, UV16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, UV8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, UV16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, UV2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_qu_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_qu_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_qu_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_qu_du((v2u64)_1, (v2u64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_d_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_w_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_h_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_d_wu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_w_hu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_h_bu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_d_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_w_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_h_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV4SI, UV4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_d_wu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV4SI, UV4SI, UV8HI, UV8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_w_hu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV8HI, UV8HI, UV16QI, UV16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_h_bu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, UV4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_d_wu_w((v2i64)_1, (v4u32)_2, ++ (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, UV8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_w_hu_h((v4i32)_1, (v8u16)_2, ++ (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, UV16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_h_bu_b((v8i16)_1, (v16u8)_2, ++ (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, UV4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_d_wu_w((v2i64)_1, (v4u32)_2, ++ (v4i32)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, UV8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_w_hu_h((v4i32)_1, (v8u16)_2, ++ (v8i16)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, UV16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_h_bu_b((v8i16)_1, (v16u8)_2, ++ (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_q_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_q_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_q_du(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: UV2DI, UV2DI, UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_q_du(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, UV2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_q_du_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_q_du_d((v2i64)_1, (v2u64)_2, ++ (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, UV2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_q_du_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_q_du_d((v2i64)_1, (v2u64)_2, ++ (v2i64)_3); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_b((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_q(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_q((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_q(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_q((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, rj, si12 */ ++/* Data types in instruction templates: V16QI, CVPOINTER, SI */ ++#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_b((void *)(_1), (_2))) ++ ++/* Assembly instruction format: vd, rj, si11 */ ++/* Data types in instruction templates: V8HI, CVPOINTER, SI */ ++#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_h((void *)(_1), (_2))) ++ ++/* Assembly instruction format: vd, rj, si10 */ ++/* Data types in instruction templates: V4SI, CVPOINTER, SI */ ++#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_w((void *)(_1), (_2))) ++ ++/* Assembly instruction format: vd, rj, si9 */ ++/* Data types in instruction templates: V2DI, CVPOINTER, SI */ ++#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_d((void *)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskgez_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskgez_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsknz_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmsknz_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V8HI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_h_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_h_b((v16i8)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V4SI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_w_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_w_h((v8i16)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_d_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_d_w((v4i32)_1); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_q_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_q_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV8HI, UV16QI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_hu_bu(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_hu_bu((v16u8)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV4SI, UV8HI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_wu_hu(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_wu_hu((v8u16)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV2DI, UV4SI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_du_wu(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_du_wu((v4u32)_1); ++} ++ ++/* Assembly instruction format: vd, vj. */ ++/* Data types in instruction templates: UV2DI, UV2DI. */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_qu_du(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_qu_du((v2u64)_1); ++} ++ ++/* Assembly instruction format: vd, vj, ui3 */ ++/* Data types in instruction templates: V16QI, V16QI, UQI */ ++#define __lsx_vrotri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_b((v16i8)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: V8HI, V8HI, UQI */ ++#define __lsx_vrotri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_h((v8i16)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V4SI, V4SI, UQI */ ++#define __lsx_vrotri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_w((v4i32)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: V2DI, V2DI, UQI */ ++#define __lsx_vrotri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_d((v2i64)(_1), (_2))) ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vextl_q_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vextl_q_d((v2i64)_1); ++} ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI */ ++#define __lsx_vsrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI */ ++#define __lsx_vsrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI */ ++#define __lsx_vsrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7 */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI */ ++#define __lsx_vsrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI */ ++#define __lsx_vssrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI */ ++#define __lsx_vssrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI */ ++#define __lsx_vssrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7 */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI */ ++#define __lsx_vssrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI */ ++#define __lsx_vssrlni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI */ ++#define __lsx_vssrlni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI */ ++#define __lsx_vssrlni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7 */ ++/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI */ ++#define __lsx_vssrlni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI */ ++#define __lsx_vssrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI */ ++#define __lsx_vssrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI */ ++#define __lsx_vssrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7 */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI */ ++#define __lsx_vssrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI */ ++#define __lsx_vssrlrni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI */ ++#define __lsx_vssrlrni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI */ ++#define __lsx_vssrlrni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7 */ ++/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI */ ++#define __lsx_vssrlrni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI */ ++#define __lsx_vsrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI */ ++#define __lsx_vsrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI */ ++#define __lsx_vsrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7 */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI */ ++#define __lsx_vsrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI */ ++#define __lsx_vsrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI */ ++#define __lsx_vsrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI */ ++#define __lsx_vsrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7 */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI */ ++#define __lsx_vsrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI */ ++#define __lsx_vssrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI */ ++#define __lsx_vssrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI */ ++#define __lsx_vssrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7 */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI */ ++#define __lsx_vssrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI */ ++#define __lsx_vssrani_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI */ ++#define __lsx_vssrani_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI */ ++#define __lsx_vssrani_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7 */ ++/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI */ ++#define __lsx_vssrani_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, USI */ ++#define __lsx_vssrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: V8HI, V8HI, V8HI, USI */ ++#define __lsx_vssrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI */ ++#define __lsx_vssrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7 */ ++/* Data types in instruction templates: V2DI, V2DI, V2DI, USI */ ++#define __lsx_vssrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui4 */ ++/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI */ ++#define __lsx_vssrarni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui5 */ ++/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI */ ++#define __lsx_vssrarni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui6 */ ++/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI */ ++#define __lsx_vssrarni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui7 */ ++/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI */ ++#define __lsx_vssrarni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, ui8 */ ++/* Data types in instruction templates: V4SI, V4SI, V4SI, USI */ ++#define __lsx_vpermi_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vpermi_w((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++/* Assembly instruction format: vd, rj, si12 */ ++/* Data types in instruction templates: V16QI, CVPOINTER, SI */ ++#define __lsx_vld(/*void **/ _1, /*si12*/ _2) \ ++ ((__m128i)__builtin_lsx_vld((void *)(_1), (_2))) ++ ++/* Assembly instruction format: vd, rj, si12 */ ++/* Data types in instruction templates: VOID, V16QI, CVPOINTER, SI */ ++#define __lsx_vst(/*__m128i*/ _1, /*void **/ _2, /*si12*/ _3) \ ++ ((void)__builtin_lsx_vst((v16i8)(_1), (void *)(_2), (_3))) ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V8HI, V8HI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V8HI, V4SI, V4SI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V4SI, V2DI, V2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++/* Assembly instruction format: vd, vj, vk */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vorn_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vorn_v((v16i8)_1, (v16i8)_2); ++} ++ ++/* Assembly instruction format: vd, i13 */ ++/* Data types in instruction templates: V2DI, HI */ ++#define __lsx_vldi(/*i13*/ _1) ((__m128i)__builtin_lsx_vldi((_1))) ++ ++/* Assembly instruction format: vd, vj, vk, va */ ++/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++/* Assembly instruction format: vd, rj, rk */ ++/* Data types in instruction templates: V16QI, CVPOINTER, DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vldx(void *_1, long int _2) { ++ return (__m128i)__builtin_lsx_vldx((void *)_1, (long int)_2); ++} ++ ++/* Assembly instruction format: vd, rj, rk */ ++/* Data types in instruction templates: VOID, V16QI, CVPOINTER, DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void ++ __lsx_vstx(__m128i _1, void *_2, long int _3) { ++ return (void)__builtin_lsx_vstx((v16i8)_1, (void *)_2, (long int)_3); ++} ++ ++/* Assembly instruction format: vd, vj */ ++/* Data types in instruction templates: UV2DI, UV2DI */ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vextl_qu_du(__m128i _1) { ++ return (__m128i)__builtin_lsx_vextl_qu_du((v2u64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lsx_bnz_v(__m128i _1) { ++ return __builtin_lsx_bnz_v((v16u8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lsx_bz_v(__m128i _1) { ++ return __builtin_lsx_bz_v((v16u8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lsx_bnz_b(__m128i _1) { ++ return __builtin_lsx_bnz_b((v16u8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lsx_bnz_h(__m128i _1) { ++ return __builtin_lsx_bnz_h((v8u16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lsx_bnz_w(__m128i _1) { ++ return __builtin_lsx_bnz_w((v4u32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lsx_bnz_d(__m128i _1) { ++ return __builtin_lsx_bnz_d((v2u64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lsx_bz_b(__m128i _1) { ++ return __builtin_lsx_bz_b((v16u8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lsx_bz_h(__m128i _1) { ++ return __builtin_lsx_bz_h((v8u16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lsx_bz_w(__m128i _1) { ++ return __builtin_lsx_bz_w((v4u32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) int ++ __lsx_bz_d(__m128i _1) { ++ return __builtin_lsx_bz_d((v2u64)_1); ++} ++ ++#if 0 ++/* Assembly instruction format: vd, i10 */ ++/* Data types in instruction templates: V16QI, i10 */ ++#define __lsx_vrepli_b(/*i10*/ _1) ((__m128i)__builtin_lsx_vrepli_b(_1) ++ ++/* Assembly instruction format: vd, i10 */ ++/* Data types in instruction templates: V8HI, i10 */ ++#define __lsx_vrepli_h(/*i10*/ _1) ((__m128i)__builtin_lsx_vrepli_h(_1) ++ ++/* Assembly instruction format: vd, i10 */ ++/* Data types in instruction templates: V4SI, i10 */ ++#define __lsx_vrepli_w(/*i10*/ _1) ((__m128i)__builtin_lsx_vrepli_w(_1) ++ ++/* Assembly instruction format: vd, i10 */ ++/* Data types in instruction templates: V2DI, i10 */ ++#define __lsx_vrepli_d(/*i10*/ _1) ((__m128i)__builtin_lsx_vrepli_d(_1) ++ ++#endif ++ ++#endif /* defined(__loongarch_sx) */ ++#endif /* _GCC_LOONGSON_SXINTRIN_H */ +diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp +index 69dcc3aa..d5721b52 100644 +--- a/lib/Sema/SemaChecking.cpp ++++ b/lib/Sema/SemaChecking.cpp +@@ -1658,6 +1658,9 @@ bool Sema::CheckTSBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: + return CheckRISCVBuiltinFunctionCall(TI, BuiltinID, TheCall); ++ case llvm::Triple::loongarch32: ++ case llvm::Triple::loongarch64: ++ return CheckLoongArchBuiltinFunctionCall(TI, BuiltinID, TheCall); + } + } + +@@ -4032,6 +4035,547 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, + return false; + } + ++// CheckLoongArchBuiltinFunctionCall - Checks the constant value passed to the ++// intrinsic is correct. ++// ++// FIXME: The size tests here should instead be tablegen'd along with the ++// definitions from include/clang/Basic/BuiltinsLoongArch.def. ++// FIXME: GCC is strict on signedness for some of these intrinsics, we should ++// be too. ++bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, ++ unsigned BuiltinID, ++ CallExpr *TheCall) { ++ unsigned i = 0, l = 0, u = 0, m = 0; ++ switch (BuiltinID) { ++ default: return false; ++ // LSX/LASX intrinsics. ++ // These intrinsics take an unsigned 3 bit immediate. ++ case LoongArch::BI__builtin_lsx_vbitclri_b: ++ case LoongArch::BI__builtin_lasx_xvbitclri_b: ++ case LoongArch::BI__builtin_lsx_vbitrevi_b: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_b: ++ case LoongArch::BI__builtin_lsx_vbitseti_b: ++ case LoongArch::BI__builtin_lasx_xvbitseti_b: ++ case LoongArch::BI__builtin_lsx_vsat_b: ++ case LoongArch::BI__builtin_lsx_vsat_bu: ++ case LoongArch::BI__builtin_lasx_xvsat_b: ++ case LoongArch::BI__builtin_lasx_xvsat_bu: ++ case LoongArch::BI__builtin_lsx_vslli_b: ++ case LoongArch::BI__builtin_lasx_xvslli_b: ++ case LoongArch::BI__builtin_lsx_vsrai_b: ++ case LoongArch::BI__builtin_lasx_xvsrai_b: ++ case LoongArch::BI__builtin_lsx_vsrari_b: ++ case LoongArch::BI__builtin_lasx_xvsrari_b: ++ case LoongArch::BI__builtin_lsx_vsrli_b: ++ case LoongArch::BI__builtin_lasx_xvsrli_b: ++ case LoongArch::BI__builtin_lsx_vsllwil_h_b: ++ case LoongArch::BI__builtin_lsx_vsllwil_hu_bu: ++ case LoongArch::BI__builtin_lasx_xvsllwil_h_b: ++ case LoongArch::BI__builtin_lasx_xvsllwil_hu_bu: ++ case LoongArch::BI__builtin_lsx_vrotri_b: ++ case LoongArch::BI__builtin_lasx_xvrotri_b: ++ case LoongArch::BI__builtin_lasx_xvsrlri_b: ++ case LoongArch::BI__builtin_lsx_vsrlri_b: ++ i = 1; ++ l = 0; ++ u = 7; ++ break; ++ // These intrinsics take an unsigned 4 bit immediate. ++ case LoongArch::BI__builtin_lsx_vbitclri_h: ++ case LoongArch::BI__builtin_lasx_xvbitclri_h: ++ case LoongArch::BI__builtin_lsx_vbitrevi_h: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_h: ++ case LoongArch::BI__builtin_lsx_vbitseti_h: ++ case LoongArch::BI__builtin_lasx_xvbitseti_h: ++ case LoongArch::BI__builtin_lsx_vsat_h: ++ case LoongArch::BI__builtin_lsx_vsat_hu: ++ case LoongArch::BI__builtin_lasx_xvsat_h: ++ case LoongArch::BI__builtin_lasx_xvsat_hu: ++ case LoongArch::BI__builtin_lsx_vslli_h: ++ case LoongArch::BI__builtin_lasx_xvslli_h: ++ case LoongArch::BI__builtin_lsx_vsrai_h: ++ case LoongArch::BI__builtin_lasx_xvsrai_h: ++ case LoongArch::BI__builtin_lsx_vsrari_h: ++ case LoongArch::BI__builtin_lasx_xvsrari_h: ++ case LoongArch::BI__builtin_lsx_vsrli_h: ++ case LoongArch::BI__builtin_lasx_xvsrli_h: ++ case LoongArch::BI__builtin_lsx_vsllwil_w_h: ++ case LoongArch::BI__builtin_lsx_vsllwil_wu_hu: ++ case LoongArch::BI__builtin_lasx_xvsllwil_w_h: ++ case LoongArch::BI__builtin_lasx_xvsllwil_wu_hu: ++ case LoongArch::BI__builtin_lsx_vrotri_h: ++ case LoongArch::BI__builtin_lasx_xvrotri_h: ++ case LoongArch::BI__builtin_lasx_xvsrlri_h: ++ case LoongArch::BI__builtin_lsx_vsrlri_h: ++ i = 1; ++ l = 0; ++ u = 15; ++ break; ++ case LoongArch::BI__builtin_lsx_vssrarni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrarni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrarni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrarni_bu_h: ++ case LoongArch::BI__builtin_lsx_vssrani_b_h: ++ case LoongArch::BI__builtin_lsx_vssrani_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrani_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrani_bu_h: ++ case LoongArch::BI__builtin_lsx_vsrarni_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrarni_b_h: ++ case LoongArch::BI__builtin_lsx_vsrlni_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrlni_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrlni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrlni_bu_h: ++ case LoongArch::BI__builtin_lsx_vssrlrni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlrni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_bu_h: ++ case LoongArch::BI__builtin_lsx_vsrani_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrani_b_h: ++ i = 2; ++ l = 0; ++ u = 15; ++ break; ++ // These intrinsics take an unsigned 5 bit immediate. ++ // The first block of intrinsics actually have an unsigned 5 bit field, ++ // not a df/n field. ++ case LoongArch::BI__builtin_lsx_vslei_bu: ++ case LoongArch::BI__builtin_lsx_vslei_hu: ++ case LoongArch::BI__builtin_lsx_vslei_wu: ++ case LoongArch::BI__builtin_lsx_vslei_du: ++ case LoongArch::BI__builtin_lasx_xvslei_bu: ++ case LoongArch::BI__builtin_lasx_xvslei_hu: ++ case LoongArch::BI__builtin_lasx_xvslei_wu: ++ case LoongArch::BI__builtin_lasx_xvslei_du: ++ case LoongArch::BI__builtin_lsx_vslti_bu: ++ case LoongArch::BI__builtin_lsx_vslti_hu: ++ case LoongArch::BI__builtin_lsx_vslti_wu: ++ case LoongArch::BI__builtin_lsx_vslti_du: ++ case LoongArch::BI__builtin_lasx_xvslti_bu: ++ case LoongArch::BI__builtin_lasx_xvslti_hu: ++ case LoongArch::BI__builtin_lasx_xvslti_wu: ++ case LoongArch::BI__builtin_lasx_xvslti_du: ++ case LoongArch::BI__builtin_lsx_vmaxi_bu: ++ case LoongArch::BI__builtin_lsx_vmaxi_hu: ++ case LoongArch::BI__builtin_lsx_vmaxi_wu: ++ case LoongArch::BI__builtin_lsx_vmaxi_du: ++ case LoongArch::BI__builtin_lasx_xvmaxi_bu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_hu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_wu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_du: ++ case LoongArch::BI__builtin_lsx_vmini_bu: ++ case LoongArch::BI__builtin_lsx_vmini_hu: ++ case LoongArch::BI__builtin_lsx_vmini_wu: ++ case LoongArch::BI__builtin_lsx_vmini_du: ++ case LoongArch::BI__builtin_lasx_xvmini_bu: ++ case LoongArch::BI__builtin_lasx_xvmini_hu: ++ case LoongArch::BI__builtin_lasx_xvmini_wu: ++ case LoongArch::BI__builtin_lasx_xvmini_du: ++ case LoongArch::BI__builtin_lsx_vaddi_bu: ++ case LoongArch::BI__builtin_lsx_vaddi_hu: ++ case LoongArch::BI__builtin_lsx_vaddi_wu: ++ case LoongArch::BI__builtin_lsx_vaddi_du: ++ case LoongArch::BI__builtin_lasx_xvaddi_bu: ++ case LoongArch::BI__builtin_lasx_xvaddi_hu: ++ case LoongArch::BI__builtin_lasx_xvaddi_wu: ++ case LoongArch::BI__builtin_lasx_xvaddi_du: ++ case LoongArch::BI__builtin_lsx_vbitclri_w: ++ case LoongArch::BI__builtin_lasx_xvbitclri_w: ++ case LoongArch::BI__builtin_lsx_vbitrevi_w: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_w: ++ case LoongArch::BI__builtin_lsx_vbitseti_w: ++ case LoongArch::BI__builtin_lasx_xvbitseti_w: ++ case LoongArch::BI__builtin_lsx_vsat_w: ++ case LoongArch::BI__builtin_lsx_vsat_wu: ++ case LoongArch::BI__builtin_lasx_xvsat_w: ++ case LoongArch::BI__builtin_lasx_xvsat_wu: ++ case LoongArch::BI__builtin_lsx_vslli_w: ++ case LoongArch::BI__builtin_lasx_xvslli_w: ++ case LoongArch::BI__builtin_lsx_vsrai_w: ++ case LoongArch::BI__builtin_lasx_xvsrai_w: ++ case LoongArch::BI__builtin_lsx_vsrari_w: ++ case LoongArch::BI__builtin_lasx_xvsrari_w: ++ case LoongArch::BI__builtin_lsx_vsrli_w: ++ case LoongArch::BI__builtin_lasx_xvsrli_w: ++ case LoongArch::BI__builtin_lsx_vsllwil_d_w: ++ case LoongArch::BI__builtin_lsx_vsllwil_du_wu: ++ case LoongArch::BI__builtin_lasx_xvsllwil_d_w: ++ case LoongArch::BI__builtin_lasx_xvsllwil_du_wu: ++ case LoongArch::BI__builtin_lsx_vsrlri_w: ++ case LoongArch::BI__builtin_lasx_xvsrlri_w: ++ case LoongArch::BI__builtin_lsx_vrotri_w: ++ case LoongArch::BI__builtin_lasx_xvrotri_w: ++ case LoongArch::BI__builtin_lsx_vsubi_bu: ++ case LoongArch::BI__builtin_lsx_vsubi_hu: ++ case LoongArch::BI__builtin_lasx_xvsubi_bu: ++ case LoongArch::BI__builtin_lasx_xvsubi_hu: ++ case LoongArch::BI__builtin_lasx_xvsubi_wu: ++ case LoongArch::BI__builtin_lasx_xvsubi_du: ++ case LoongArch::BI__builtin_lsx_vbsrl_v: ++ case LoongArch::BI__builtin_lsx_vbsll_v: ++ case LoongArch::BI__builtin_lasx_xvbsrl_v: ++ case LoongArch::BI__builtin_lasx_xvbsll_v: ++ case LoongArch::BI__builtin_lsx_vsubi_wu: ++ case LoongArch::BI__builtin_lsx_vsubi_du: ++ i = 1; ++ l = 0; ++ u = 31; ++ break; ++ case LoongArch::BI__builtin_lsx_vssrarni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrarni_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrarni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrarni_hu_w: ++ case LoongArch::BI__builtin_lsx_vssrani_h_w: ++ case LoongArch::BI__builtin_lsx_vssrani_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrani_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrani_hu_w: ++ case LoongArch::BI__builtin_lsx_vsrarni_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrarni_h_w: ++ case LoongArch::BI__builtin_lsx_vsrani_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrani_h_w: ++ case LoongArch::BI__builtin_lsx_vfrstpi_b: ++ case LoongArch::BI__builtin_lsx_vfrstpi_h: ++ case LoongArch::BI__builtin_lasx_xvfrstpi_b: ++ case LoongArch::BI__builtin_lasx_xvfrstpi_h: ++ case LoongArch::BI__builtin_lsx_vsrlni_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrlni_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlni_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrlni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrlni_hu_w: ++ case LoongArch::BI__builtin_lsx_vssrlrni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlrni_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_hu_w: ++ i = 2; ++ l = 0; ++ u = 31; ++ break; ++ case LoongArch::BI__builtin_lasx_xvstelm_b: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 31); ++ // These intrinsics take an unsigned 6 bit immediate. ++ case LoongArch::BI__builtin_lsx_vbitclri_d: ++ case LoongArch::BI__builtin_lasx_xvbitclri_d: ++ case LoongArch::BI__builtin_lsx_vbitrevi_d: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_d: ++ case LoongArch::BI__builtin_lsx_vbitseti_d: ++ case LoongArch::BI__builtin_lasx_xvbitseti_d: ++ case LoongArch::BI__builtin_lsx_vsat_d: ++ case LoongArch::BI__builtin_lsx_vsat_du: ++ case LoongArch::BI__builtin_lasx_xvsat_d: ++ case LoongArch::BI__builtin_lasx_xvsat_du: ++ case LoongArch::BI__builtin_lsx_vslli_d: ++ case LoongArch::BI__builtin_lasx_xvslli_d: ++ case LoongArch::BI__builtin_lsx_vsrai_d: ++ case LoongArch::BI__builtin_lasx_xvsrai_d: ++ case LoongArch::BI__builtin_lsx_vsrli_d: ++ case LoongArch::BI__builtin_lasx_xvsrli_d: ++ case LoongArch::BI__builtin_lsx_vsrari_d: ++ case LoongArch::BI__builtin_lasx_xvsrari_d: ++ case LoongArch::BI__builtin_lsx_vrotri_d: ++ case LoongArch::BI__builtin_lasx_xvrotri_d: ++ case LoongArch::BI__builtin_lasx_xvsrlri_d: ++ case LoongArch::BI__builtin_lsx_vsrlri_d: ++ i = 1; ++ l = 0; ++ u = 63; ++ break; ++ case LoongArch::BI__builtin_lsx_vssrarni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrarni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrarni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrarni_wu_d: ++ case LoongArch::BI__builtin_lsx_vssrani_w_d: ++ case LoongArch::BI__builtin_lsx_vssrani_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrani_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrani_wu_d: ++ case LoongArch::BI__builtin_lsx_vsrarni_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrarni_w_d: ++ case LoongArch::BI__builtin_lsx_vsrlni_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrlni_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrlni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrlni_wu_d: ++ case LoongArch::BI__builtin_lsx_vssrlrni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlrni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_wu_d: ++ case LoongArch::BI__builtin_lsx_vsrani_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrani_w_d: ++ i = 2; ++ l = 0; ++ u = 63; ++ break; ++ // These intrinsics take an unsigned 7 bit immediate. ++ case LoongArch::BI__builtin_lsx_vssrarni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrarni_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrarni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrarni_du_q: ++ case LoongArch::BI__builtin_lsx_vssrani_d_q: ++ case LoongArch::BI__builtin_lsx_vssrani_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrani_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrani_du_q: ++ case LoongArch::BI__builtin_lsx_vsrarni_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrarni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlni_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrlni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrlni_du_q: ++ case LoongArch::BI__builtin_lsx_vssrlrni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlrni_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_du_q: ++ case LoongArch::BI__builtin_lsx_vsrani_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrani_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrlni_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_d_q: ++ case LoongArch::BI__builtin_lsx_vsrlni_d_q: ++ i = 2; ++ l = 0; ++ u = 127; ++ break; ++ // These intrinsics take a signed 5 bit immediate. ++ case LoongArch::BI__builtin_lsx_vseqi_b: ++ case LoongArch::BI__builtin_lsx_vseqi_h: ++ case LoongArch::BI__builtin_lsx_vseqi_w: ++ case LoongArch::BI__builtin_lsx_vseqi_d: ++ case LoongArch::BI__builtin_lasx_xvseqi_b: ++ case LoongArch::BI__builtin_lasx_xvseqi_h: ++ case LoongArch::BI__builtin_lasx_xvseqi_w: ++ case LoongArch::BI__builtin_lasx_xvseqi_d: ++ case LoongArch::BI__builtin_lsx_vslti_b: ++ case LoongArch::BI__builtin_lsx_vslti_h: ++ case LoongArch::BI__builtin_lsx_vslti_w: ++ case LoongArch::BI__builtin_lsx_vslti_d: ++ case LoongArch::BI__builtin_lasx_xvslti_b: ++ case LoongArch::BI__builtin_lasx_xvslti_h: ++ case LoongArch::BI__builtin_lasx_xvslti_w: ++ case LoongArch::BI__builtin_lasx_xvslti_d: ++ case LoongArch::BI__builtin_lsx_vslei_b: ++ case LoongArch::BI__builtin_lsx_vslei_h: ++ case LoongArch::BI__builtin_lsx_vslei_w: ++ case LoongArch::BI__builtin_lsx_vslei_d: ++ case LoongArch::BI__builtin_lasx_xvslei_b: ++ case LoongArch::BI__builtin_lasx_xvslei_h: ++ case LoongArch::BI__builtin_lasx_xvslei_w: ++ case LoongArch::BI__builtin_lasx_xvslei_d: ++ case LoongArch::BI__builtin_lsx_vmaxi_b: ++ case LoongArch::BI__builtin_lsx_vmaxi_h: ++ case LoongArch::BI__builtin_lsx_vmaxi_w: ++ case LoongArch::BI__builtin_lsx_vmaxi_d: ++ case LoongArch::BI__builtin_lasx_xvmaxi_b: ++ case LoongArch::BI__builtin_lasx_xvmaxi_h: ++ case LoongArch::BI__builtin_lasx_xvmaxi_w: ++ case LoongArch::BI__builtin_lasx_xvmaxi_d: ++ case LoongArch::BI__builtin_lsx_vmini_b: ++ case LoongArch::BI__builtin_lsx_vmini_h: ++ case LoongArch::BI__builtin_lsx_vmini_w: ++ case LoongArch::BI__builtin_lasx_xvmini_b: ++ case LoongArch::BI__builtin_lasx_xvmini_h: ++ case LoongArch::BI__builtin_lasx_xvmini_w: ++ case LoongArch::BI__builtin_lasx_xvmini_d: ++ case LoongArch::BI__builtin_lsx_vmini_d: ++ i = 1; ++ l = -16; ++ u = 15; ++ break; ++ // These intrinsics take a signed 9 bit immediate. ++ case LoongArch::BI__builtin_lasx_xvldrepl_d: ++ case LoongArch::BI__builtin_lsx_vldrepl_d: ++ i = 1; ++ l = -256; ++ u = 255; ++ break; ++ // These intrinsics take an unsigned 8 bit immediate. ++ case LoongArch::BI__builtin_lsx_vandi_b: ++ case LoongArch::BI__builtin_lasx_xvandi_b: ++ case LoongArch::BI__builtin_lsx_vnori_b: ++ case LoongArch::BI__builtin_lasx_xvnori_b: ++ case LoongArch::BI__builtin_lsx_vori_b: ++ case LoongArch::BI__builtin_lasx_xvori_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_h: ++ case LoongArch::BI__builtin_lsx_vshuf4i_w: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_b: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_h: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_w: ++ case LoongArch::BI__builtin_lasx_xvxori_b: ++ case LoongArch::BI__builtin_lasx_xvpermi_d: ++ case LoongArch::BI__builtin_lsx_vxori_b: ++ i = 1; ++ l = 0; ++ u = 255; ++ break; ++ case LoongArch::BI__builtin_lsx_vbitseli_b: ++ case LoongArch::BI__builtin_lasx_xvbitseli_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_d: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_d: ++ case LoongArch::BI__builtin_lsx_vextrins_b: ++ case LoongArch::BI__builtin_lsx_vextrins_h: ++ case LoongArch::BI__builtin_lsx_vextrins_w: ++ case LoongArch::BI__builtin_lsx_vextrins_d: ++ case LoongArch::BI__builtin_lasx_xvextrins_b: ++ case LoongArch::BI__builtin_lasx_xvextrins_h: ++ case LoongArch::BI__builtin_lasx_xvextrins_w: ++ case LoongArch::BI__builtin_lasx_xvextrins_d: ++ case LoongArch::BI__builtin_lasx_xvpermi_q: ++ case LoongArch::BI__builtin_lsx_vpermi_w: ++ case LoongArch::BI__builtin_lasx_xvpermi_w: ++ i = 2; ++ l = 0; ++ u = 255; ++ break; ++ // df/n format ++ // These intrinsics take an unsigned 4 bit immediate. ++ case LoongArch::BI__builtin_lsx_vpickve2gr_b: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_bu: ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_b: ++ case LoongArch::BI__builtin_lsx_vreplvei_b: ++ i = 1; ++ l = 0; ++ u = 15; ++ break; ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_b: ++ i = 2; ++ l = 0; ++ u = 15; ++ break; ++ case LoongArch::BI__builtin_lasx_xvstelm_h: ++ case LoongArch::BI__builtin_lsx_vstelm_b: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); ++ // These intrinsics take an unsigned 3 bit immediate. ++ case LoongArch::BI__builtin_lsx_vpickve2gr_h: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_hu: ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_h: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_w: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_wu: ++ case LoongArch::BI__builtin_lasx_xvpickve_w: ++ case LoongArch::BI__builtin_lsx_vreplvei_h: ++ i = 1; ++ l = 0; ++ u = 7; ++ break; ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_h: ++ case LoongArch::BI__builtin_lasx_xvinsgr2vr_w: ++ case LoongArch::BI__builtin_lasx_xvinsve0_w: ++ i = 2; ++ l = 0; ++ u = 7; ++ break; ++ case LoongArch::BI__builtin_lasx_xvstelm_w: ++ case LoongArch::BI__builtin_lsx_vstelm_h: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); ++ // These intrinsics take an unsigned 2 bit immediate. ++ case LoongArch::BI__builtin_lsx_vpickve2gr_w: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_wu: ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_w: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_d: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_du: ++ case LoongArch::BI__builtin_lasx_xvpickve_d: ++ case LoongArch::BI__builtin_lsx_vreplvei_w: ++ i = 1; ++ l = 0; ++ u = 3; ++ break; ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_w: ++ case LoongArch::BI__builtin_lasx_xvinsve0_d: ++ case LoongArch::BI__builtin_lasx_xvinsgr2vr_d: ++ i = 2; ++ l = 0; ++ u = 3; ++ break; ++ case LoongArch::BI__builtin_lasx_xvstelm_d: ++ case LoongArch::BI__builtin_lsx_vstelm_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); ++ // These intrinsics take an unsigned 1 bit immediate. ++ case LoongArch::BI__builtin_lsx_vpickve2gr_d: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_du: ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_d: ++ case LoongArch::BI__builtin_lsx_vreplvei_d: ++ i = 1; ++ l = 0; ++ u = 1; ++ break; ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_d: ++ i = 2; ++ l = 0; ++ u = 1; ++ break; ++ case LoongArch::BI__builtin_lsx_vstelm_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 1); ++ // Memory offsets and immediate loads. ++ // These intrinsics take a signed 10 bit immediate. ++ case LoongArch::BI__builtin_lasx_xvldrepl_w: ++ case LoongArch::BI__builtin_lsx_vldrepl_w: ++ i = 1; ++ l = -512; ++ u = 511; ++ break; ++ case LoongArch::BI__builtin_lasx_xvldrepl_h: ++ case LoongArch::BI__builtin_lsx_vldrepl_h: ++ i = 1; ++ l = -1024; ++ u = 1023; ++ break; ++ case LoongArch::BI__builtin_lasx_xvldrepl_b: ++ case LoongArch::BI__builtin_lsx_vldrepl_b: ++ i = 1; ++ l = -2048; ++ u = 2047; ++ break; ++ case LoongArch::BI__builtin_lasx_xvld: ++ case LoongArch::BI__builtin_lsx_vld: ++ i = 1; ++ l = -2048; ++ u = 2047; ++ break; ++ case LoongArch::BI__builtin_lsx_vst: ++ case LoongArch::BI__builtin_lasx_xvst: ++ i = 2; ++ l = -2048; ++ u = 2047; ++ break; ++ case LoongArch::BI__builtin_lasx_xvldi: ++ case LoongArch::BI__builtin_lsx_vldi: ++ i = 0; ++ l = -4096; ++ u = 4095; ++ break; ++ // These intrinsics take an unsigned 5 bit immediate and a signed 12 bit immediate. ++ case LoongArch::BI__builtin_loongarch_cacop: ++ case LoongArch::BI__builtin_loongarch_dcacop: ++ return SemaBuiltinConstantArgRange(TheCall, 0, 0, 31) || ++ SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); ++ // These intrinsics take an unsigned 14 bit immediate. ++ case LoongArch::BI__builtin_loongarch_csrrd: ++ case LoongArch::BI__builtin_loongarch_dcsrrd: i = 0; l = 0; u = 16383; break; ++ case LoongArch::BI__builtin_loongarch_csrwr: ++ case LoongArch::BI__builtin_loongarch_dcsrwr: i = 1; l = 0; u = 16383; break; ++ case LoongArch::BI__builtin_loongarch_csrxchg: ++ case LoongArch::BI__builtin_loongarch_dcsrxchg: i = 2; l = 0; u = 16383; break; ++ // These intrinsics take an unsigned 15 bit immediate. ++ case LoongArch::BI__builtin_loongarch_dbar: ++ case LoongArch::BI__builtin_loongarch_ibar: ++ case LoongArch::BI__builtin_loongarch_syscall: ++ case LoongArch::BI__builtin_loongarch_break: i = 0; l = 0; u = 32767; break; ++ } ++ ++ if (!m) ++ return SemaBuiltinConstantArgRange(TheCall, i, l, u); ++ ++ return SemaBuiltinConstantArgRange(TheCall, i, l, u) || ++ SemaBuiltinConstantArgMultiple(TheCall, i, m); ++} ++ + bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, + CallExpr *TheCall) { + if (BuiltinID == SystemZ::BI__builtin_tabort) { +diff --git a/lib/Sema/SemaTemplateInstantiateDecl.cpp b/lib/Sema/SemaTemplateInstantiateDecl.cpp +index 29378282..467372c7 100644 +--- a/lib/Sema/SemaTemplateInstantiateDecl.cpp ++++ b/lib/Sema/SemaTemplateInstantiateDecl.cpp +@@ -4826,8 +4826,7 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation, + /*Complain*/DefinitionRequired)) { + if (DefinitionRequired) + Function->setInvalidDecl(); +- else if (TSK == TSK_ExplicitInstantiationDefinition || +- (Function->isConstexpr() && !Recursive)) { ++ else if (TSK == TSK_ExplicitInstantiationDefinition) { + // Try again at the end of the translation unit (at which point a + // definition will be required). + assert(!Recursive); +@@ -4842,7 +4841,7 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation, + Diag(PatternDecl->getLocation(), diag::note_forward_template_decl); + if (getLangOpts().CPlusPlus11) + Diag(PointOfInstantiation, diag::note_inst_declaration_hint) +- << Function; ++ << Function; + } + } + +diff --git a/test/CodeGen/sanitize-coverage-old-pm.c b/test/CodeGen/sanitize-coverage-old-pm.c +index 9b4f8991..18123a53 100644 +--- a/test/CodeGen/sanitize-coverage-old-pm.c ++++ b/test/CodeGen/sanitize-coverage-old-pm.c +@@ -7,8 +7,8 @@ + // + // Host armv7 is currently unsupported: https://bugs.llvm.org/show_bug.cgi?id=46117 + // UNSUPPORTED: armv7, armv7l, thumbv7, armv8l +-// The same issue also occurs on a riscv32 host. +-// XFAIL: riscv32 ++// The same issue also occurs on riscv32 and loongarch64 hosts. ++// XFAIL: riscv32, loongarch64 + + int x[10]; + +diff --git a/test/CodeGen/ubsan-function.cpp b/test/CodeGen/ubsan-function.cpp +index 8a16dfdf..2466d8a2 100644 +--- a/test/CodeGen/ubsan-function.cpp ++++ b/test/CodeGen/ubsan-function.cpp +@@ -1,7 +1,6 @@ + // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s -fsanitize=function -fno-sanitize-recover=all | FileCheck %s + +-// CHECK: @[[PROXY:.*]] = private unnamed_addr constant i8* bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*) +-// CHECK: define{{.*}} void @_Z3funv() #0 !func_sanitize ![[FUNCSAN:.*]] { ++// CHECK-LABEL: define{{.*}} void @_Z3funv() #0 prologue <{ i32, i32 }> <{ i32 846595819, i32 trunc (i64 sub (i64 ptrtoint (i8** @0 to i64), i64 ptrtoint (void ()* @_Z3funv to i64)) to i32) }> { + void fun() {} + + // CHECK-LABEL: define{{.*}} void @_Z6callerPFvvE(void ()* noundef %f) +@@ -21,5 +20,3 @@ void fun() {} + // CHECK: [[LABEL3]]: + // CHECK: br label %[[LABEL4]], !nosanitize + void caller(void (*f)()) { f(); } +- +-// CHECK: ![[FUNCSAN]] = !{i32 846595819, i8** @[[PROXY]]} +diff --git a/test/CodeGenCXX/catch-undef-behavior.cpp b/test/CodeGenCXX/catch-undef-behavior.cpp +index ade29797..d6b094cb 100644 +--- a/test/CodeGenCXX/catch-undef-behavior.cpp ++++ b/test/CodeGenCXX/catch-undef-behavior.cpp +@@ -1,8 +1,8 @@ +-// RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,unreachable,return,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -fsanitize-recover=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -emit-llvm %s -o - -triple x86_64-linux-gnu | opt -instnamer -S | FileCheck %s --check-prefixes=CHECK,CHECK-FUNCSAN ++// RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,unreachable,return,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -fsanitize-recover=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -emit-llvm %s -o - -triple x86_64-linux-gnu | opt -instnamer -S | FileCheck %s + // RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=vptr,address -fsanitize-recover=vptr,address -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-ASAN + // RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=vptr -fsanitize-recover=vptr -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=DOWNCAST-NULL +-// RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=function -emit-llvm %s -o - -triple x86_64-linux-gnux32 | FileCheck %s --check-prefix=CHECK-FUNCSAN +-// RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=function -emit-llvm %s -o - -triple i386-linux-gnu | FileCheck %s --check-prefix=CHECK-FUNCSAN ++// RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=function -emit-llvm %s -o - -triple x86_64-linux-gnux32 | FileCheck %s --check-prefix=CHECK-X32 ++// RUN: %clang_cc1 -disable-noundef-analysis -std=c++11 -fsanitize=function -emit-llvm %s -o - -triple i386-linux-gnu | FileCheck %s --check-prefix=CHECK-X86 + + struct S { + double d; +@@ -16,7 +16,9 @@ struct S { + // Check that type mismatch handler is not modified by ASan. + // CHECK-ASAN: private unnamed_addr global { { [{{.*}} x i8]*, i32, i32 }, { i16, i16, [4 x i8] }*, i8*, i8 } { {{.*}}, { i16, i16, [4 x i8] }* [[TYPE_DESCR]], {{.*}} } + +-// CHECK-FUNCSAN: [[PROXY:@.+]] = private unnamed_addr constant i8* bitcast ({ i8*, i8* }* @_ZTIFvPFviEE to i8*) ++// CHECK: [[IndirectRTTI_ZTIFvPFviEE:@.+]] = private constant i8* bitcast ({ i8*, i8* }* @_ZTIFvPFviEE to i8*) ++// CHECK-X86: [[IndirectRTTI_ZTIFvPFviEE:@.+]] = private constant i8* bitcast ({ i8*, i8* }* @_ZTIFvPFviEE to i8*) ++// CHECK-X32: [[IndirectRTTI_ZTIFvPFviEE:@.+]] = private constant i8* bitcast ({ i8*, i8* }* @_ZTIFvPFviEE to i8*) + + struct T : S {}; + +@@ -397,7 +399,10 @@ void downcast_reference(B &b) { + // CHECK-NEXT: br i1 [[AND]] + } + +-// CHECK-FUNCSAN: @_Z22indirect_function_callPFviE({{.*}} !func_sanitize ![[FUNCSAN:.*]] { ++// ++// CHECK-LABEL: @_Z22indirect_function_callPFviE({{.*}} prologue <{ i32, i32 }> <{ i32 846595819, i32 trunc (i64 sub (i64 ptrtoint (i8** {{.*}} to i64), i64 ptrtoint (void (void (i32)*)* @_Z22indirect_function_callPFviE to i64)) to i32) }> ++// CHECK-X32: @_Z22indirect_function_callPFviE({{.*}} prologue <{ i32, i32 }> <{ i32 846595819, i32 sub (i32 ptrtoint (i8** [[IndirectRTTI_ZTIFvPFviEE]] to i32), i32 ptrtoint (void (void (i32)*)* @_Z22indirect_function_callPFviE to i32)) }> ++// CHECK-X86: @_Z22indirect_function_callPFviE({{.*}} prologue <{ i32, i32 }> <{ i32 846595819, i32 sub (i32 ptrtoint (i8** [[IndirectRTTI_ZTIFvPFviEE]] to i32), i32 ptrtoint (void (void (i32)*)* @_Z22indirect_function_callPFviE to i32)) }> + void indirect_function_call(void (*p)(int)) { + // CHECK: [[PTR:%.+]] = bitcast void (i32)* {{.*}} to <{ i32, i32 }>* + +@@ -478,34 +483,34 @@ void force_irgen() { + } + + // CHECK-LABEL: define{{.*}} void @_ZN29FunctionSanitizerVirtualCalls1B1fEv +-// CHECK-NOT: !func_sanitize ++// CHECK-NOT: prologue + // + // CHECK-LABEL: define{{.*}} void @_ZTv0_n24_N29FunctionSanitizerVirtualCalls1B1fEv +-// CHECK-NOT: !func_sanitize ++// CHECK-NOT: prologue + // + // CHECK-LABEL: define{{.*}} void @_ZN29FunctionSanitizerVirtualCalls11force_irgenEv() +-// CHECK: !func_sanitize ++// CHECK: prologue + // + // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1AC1Ev +-// CHECK-NOT: !func_sanitize ++// CHECK-NOT: prologue + // + // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1A1gEv +-// CHECK-NOT: !func_sanitize ++// CHECK-NOT: prologue + // + // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1A1hEv +-// CHECK-NOT: !func_sanitize ++// CHECK-NOT: prologue + // + // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1BC1Ev +-// CHECK-NOT: !func_sanitize ++// CHECK-NOT: prologue + // + // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1B1bEv +-// CHECK-NOT: !func_sanitize ++// CHECK-NOT: prologue + // + // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1B1gEv +-// CHECK-NOT: !func_sanitize ++// CHECK-NOT: prologue + // + // CHECK-LABEL: define linkonce_odr void @_ZN29FunctionSanitizerVirtualCalls1B1qEv +-// CHECK: !func_sanitize ++// CHECK: prologue + + } + +@@ -749,5 +754,3 @@ void ThisAlign::this_align_lambda_2() { + } + + // CHECK: attributes [[NR_NUW]] = { noreturn nounwind } +- +-// CHECK-FUNCSAN: ![[FUNCSAN]] = !{i32 846595819, i8** [[PROXY]]} +diff --git a/test/CodeGenCXX/ubsan-function-noexcept.cpp b/test/CodeGenCXX/ubsan-function-noexcept.cpp +index 9d5eb1ed..3c0c0e8b 100644 +--- a/test/CodeGenCXX/ubsan-function-noexcept.cpp ++++ b/test/CodeGenCXX/ubsan-function-noexcept.cpp +@@ -2,8 +2,8 @@ + + // Check that typeinfo recorded in function prolog doesn't have "Do" noexcept + // qualifier in its mangled name. +-// CHECK: [[PROXY:@.*]] = private unnamed_addr constant i8* bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*) +-// CHECK: define{{.*}} void @_Z1fv() #{{.*}} !func_sanitize ![[FUNCSAN:.*]] { ++// CHECK: @[[RTTI:[0-9]+]] = private constant i8* bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*) ++// CHECK: define{{.*}} void @_Z1fv() #{{.*}} prologue <{ i32, i32 }> <{ i32 {{.*}}, i32 trunc (i64 sub (i64 ptrtoint (i8** @[[RTTI]] to i64), i64 ptrtoint (void ()* @_Z1fv to i64)) to i32) }> + void f() noexcept {} + + // CHECK: define{{.*}} void @_Z1gPDoFvvE +@@ -13,5 +13,3 @@ void g(void (*p)() noexcept) { + // CHECK: icmp eq i8* %{{.*}}, bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*), !nosanitize + p(); + } +- +-// CHECK: ![[FUNCSAN]] = !{i32 846595819, i8** [[PROXY]]} +diff --git a/test/Driver/baremetal-sysroot.cpp b/test/Driver/baremetal-sysroot.cpp +index ae174e01..fc660207 100644 +--- a/test/Driver/baremetal-sysroot.cpp ++++ b/test/Driver/baremetal-sysroot.cpp +@@ -10,7 +10,7 @@ + // RUN: ln -s %clang %T/baremetal_default_sysroot/bin/clang + + // RUN: %T/baremetal_default_sysroot/bin/clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: -target armv6m-none-eabi \ ++// RUN: -target armv6m-none-eabi --sysroot= \ + // RUN: | FileCheck --check-prefix=CHECK-V6M-C %s + // CHECK-V6M-C: "{{.*}}clang{{.*}}" "-cc1" "-triple" "thumbv6m-none-unknown-eabi" + // CHECK-V6M-C-SAME: "-internal-isystem" "{{.*}}/baremetal_default_sysroot{{[/\\]+}}bin{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+}}armv6m-none-eabi{{[/\\]+}}include{{[/\\]+}}c++{{[/\\]+}}v1" +diff --git a/test/Driver/baremetal.cpp b/test/Driver/baremetal.cpp +index 7c11fe67..56eb5b70 100644 +--- a/test/Driver/baremetal.cpp ++++ b/test/Driver/baremetal.cpp +@@ -105,7 +105,7 @@ + // CHECK-SYSROOT-INC-NOT: "-internal-isystem" "include" + + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: -target aarch64-none-elf \ ++// RUN: -target aarch64-none-elf --sysroot= \ + // RUN: | FileCheck --check-prefix=CHECK-AARCH64-NO-HOST-INC %s + // Verify that the bare metal driver does not include any host system paths: + // CHECK-AARCH64-NO-HOST-INC: InstalledDir: [[INSTALLEDDIR:.+]] +diff --git a/test/Driver/fsanitize.c b/test/Driver/fsanitize.c +index a98fc2ee..17fce198 100644 +--- a/test/Driver/fsanitize.c ++++ b/test/Driver/fsanitize.c +@@ -666,12 +666,12 @@ + // RUN: %clang -fno-sanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=NOSP + // NOSP-NOT: "-fsanitize=safe-stack" + +-// RUN: %clang -target x86_64-linux-gnu -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=SP ++// RUN: %clang -target x86_64-linux-gnu -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=NO-SP + // RUN: %clang -target x86_64-linux-gnu -fsanitize=address,safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=SP-ASAN + // RUN: %clang -target x86_64-linux-gnu -fstack-protector -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=SP + // RUN: %clang -target x86_64-linux-gnu -fsanitize=safe-stack -fstack-protector-all -### %s 2>&1 | FileCheck %s -check-prefix=SP +-// RUN: %clang -target arm-linux-androideabi -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=SP +-// RUN: %clang -target aarch64-linux-android -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=SP ++// RUN: %clang -target arm-linux-androideabi -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=NO-SP ++// RUN: %clang -target aarch64-linux-android -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=NO-SP + // RUN: %clang -target i386-contiki-unknown -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=NO-SP + // NO-SP-NOT: stack-protector + // NO-SP: "-fsanitize=safe-stack" +@@ -915,6 +915,3 @@ + + // RUN: %clang -fsanitize=undefined,float-divide-by-zero %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-DIVBYZERO-UBSAN + // CHECK-DIVBYZERO-UBSAN: "-fsanitize={{.*}},float-divide-by-zero,{{.*}}" +- +-// RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined,function -mcmodel=large %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION-CODE-MODEL +-// CHECK-UBSAN-FUNCTION-CODE-MODEL: error: invalid argument '-fsanitize=function' only allowed with '-mcmodel=small' +diff --git a/test/Driver/hexagon-toolchain-linux.c b/test/Driver/hexagon-toolchain-linux.c +index da595903..1ef0561f 100644 +--- a/test/Driver/hexagon-toolchain-linux.c ++++ b/test/Driver/hexagon-toolchain-linux.c +@@ -100,7 +100,7 @@ + // ----------------------------------------------------------------------------- + // internal-isystem for linux with and without musl + // ----------------------------------------------------------------------------- +-// RUN: %clang -### -target hexagon-unknown-linux-musl \ ++// RUN: %clang -### -target hexagon-unknown-linux-musl --sysroot= \ + // RUN: -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \ + // RUN: -resource-dir=%S/Inputs/resource_dir \ + // RUN: %s 2>&1 \ +@@ -110,7 +110,7 @@ + // CHECK008-SAME: {{^}} "-internal-isystem" "[[RESOURCE]]/include" + // CHECK008-SAME: {{^}} "-internal-externc-isystem" "[[INSTALLED_DIR]]/../target/hexagon/include" + +-// RUN: %clang -### -target hexagon-unknown-linux \ ++// RUN: %clang -### -target hexagon-unknown-linux --sysroot= \ + // RUN: -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \ + // RUN: -resource-dir=%S/Inputs/resource_dir \ + // RUN: %s 2>&1 \ +diff --git a/test/Driver/mips-cs.cpp b/test/Driver/mips-cs.cpp +index 39f87d8f..6ef4c5d4 100644 +--- a/test/Driver/mips-cs.cpp ++++ b/test/Driver/mips-cs.cpp +@@ -4,7 +4,7 @@ + // + // = Big-endian, hard float + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mips-linux-gnu -no-pie \ ++// RUN: --target=mips-linux-gnu \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-BE-HF-32 %s + // CHECK-BE-HF-32: "-internal-isystem" +@@ -32,7 +32,7 @@ + // + // = Big-endian, hard float, uclibc + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mips-linux-gnu -muclibc -no-pie \ ++// RUN: --target=mips-linux-gnu -muclibc \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-BE-UC-HF-32 %s + // CHECK-BE-UC-HF-32: "-internal-isystem" +@@ -61,7 +61,7 @@ + // + // = Big-endian, hard float, mips16 + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mips-linux-gnu -mips16 -no-pie \ ++// RUN: --target=mips-linux-gnu -mips16 \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-BE-HF-16 %s + // CHECK-BE-HF-16: "-internal-isystem" +@@ -90,7 +90,7 @@ + // + // = Big-endian, hard float, mmicromips + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mips-linux-gnu -mmicromips -no-pie \ ++// RUN: --target=mips-linux-gnu -mmicromips \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-BE-HF-MICRO %s + // CHECK-BE-HF-MICRO: "-internal-isystem" +@@ -119,7 +119,7 @@ + // + // = Big-endian, hard float, nan2008 + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mips-linux-gnu -mnan=2008 -no-pie \ ++// RUN: --target=mips-linux-gnu -mnan=2008 \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-BE-HF-NAN %s + // CHECK-BE-HF-NAN: "-internal-isystem" +@@ -148,7 +148,7 @@ + // + // = Big-endian, hard float, uclibc, nan2008 + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mips-linux-gnu -muclibc -mnan=2008 -no-pie \ ++// RUN: --target=mips-linux-gnu -muclibc -mnan=2008 \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-BE-UC-HF-NAN %s + // CHECK-BE-UC-HF-NAN: "-internal-isystem" +@@ -177,7 +177,7 @@ + // + // = Big-endian, soft float + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mips-linux-gnu -msoft-float -no-pie \ ++// RUN: --target=mips-linux-gnu -msoft-float \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-BE-SF-32 %s + // CHECK-BE-SF-32: "-internal-isystem" +@@ -206,7 +206,7 @@ + // + // = Big-endian, soft float, uclibc + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mips-linux-gnu -muclibc -msoft-float -no-pie \ ++// RUN: --target=mips-linux-gnu -muclibc -msoft-float \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-BE-UC-SF-32 %s + // CHECK-BE-UC-SF-32: "-internal-isystem" +@@ -235,7 +235,7 @@ + // + // = Big-endian, soft float, mips16 + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mips-linux-gnu -msoft-float -mips16 -no-pie \ ++// RUN: --target=mips-linux-gnu -msoft-float -mips16 \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-BE-SF-16 %s + // CHECK-BE-SF-16: "-internal-isystem" +@@ -264,7 +264,7 @@ + // + // = Big-endian, soft float, micromips + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mips-linux-gnu -msoft-float -mmicromips -no-pie \ ++// RUN: --target=mips-linux-gnu -msoft-float -mmicromips \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-BE-SF-MICRO %s + // CHECK-BE-SF-MICRO: "-internal-isystem" +@@ -293,7 +293,7 @@ + // + // = Big-endian, hard float, 64-bit + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mips64-linux-gnu -no-pie \ ++// RUN: --target=mips64-linux-gnu \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-BE-HF-64 %s + // CHECK-BE-HF-64: "-internal-isystem" +@@ -322,7 +322,7 @@ + // + // = Big-endian, soft float, 64-bit + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mips64-linux-gnu -msoft-float -no-pie \ ++// RUN: --target=mips64-linux-gnu -msoft-float \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-BE-SF-64 %s + // CHECK-BE-SF-64: "-internal-isystem" +@@ -351,7 +351,7 @@ + // + // = Little-endian, hard float + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mipsel-linux-gnu -mhard-float -no-pie \ ++// RUN: --target=mipsel-linux-gnu -mhard-float \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-EL-HF-32 %s + // CHECK-EL-HF-32: "-internal-isystem" +@@ -380,7 +380,7 @@ + // + // = Little-endian, hard float, uclibc + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mipsel-linux-gnu -mhard-float -muclibc -no-pie \ ++// RUN: --target=mipsel-linux-gnu -mhard-float -muclibc \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-EL-UC-HF-32 %s + // CHECK-EL-UC-HF-32: "-internal-isystem" +@@ -409,7 +409,7 @@ + // + // = Little-endian, hard float, mips16 + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mipsel-linux-gnu -mips16 -no-pie \ ++// RUN: --target=mipsel-linux-gnu -mips16 \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-EL-HF-16 %s + // CHECK-EL-HF-16: "-internal-isystem" +@@ -438,7 +438,7 @@ + // + // = Little-endian, hard float, micromips + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mipsel-linux-gnu -mmicromips -no-pie \ ++// RUN: --target=mipsel-linux-gnu -mmicromips \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-EL-HF-MICRO %s + // CHECK-EL-HF-MICRO: "-internal-isystem" +@@ -467,7 +467,7 @@ + // + // = Little-endian, hard float, nan2008 + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mipsel-linux-gnu -mnan=2008 -no-pie \ ++// RUN: --target=mipsel-linux-gnu -mnan=2008 \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-EL-HF-NAN %s + // CHECK-EL-HF-NAN: "-internal-isystem" +@@ -496,7 +496,7 @@ + // + // = Little-endian, hard float, uclibc, nan2008 + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mipsel-linux-gnu -muclibc -mnan=2008 -no-pie \ ++// RUN: --target=mipsel-linux-gnu -muclibc -mnan=2008 \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-EL-UC-HF-NAN %s + // CHECK-EL-UC-HF-NAN: "-internal-isystem" +@@ -525,7 +525,7 @@ + // + // = Little-endian, soft float + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mipsel-linux-gnu -mfloat-abi=soft -no-pie \ ++// RUN: --target=mipsel-linux-gnu -mfloat-abi=soft \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-EL-SF-32 %s + // CHECK-EL-SF-32: "-internal-isystem" +@@ -554,7 +554,7 @@ + // + // = Little-endian, soft float, uclibc + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mipsel-linux-gnu -mfloat-abi=soft -muclibc -no-pie \ ++// RUN: --target=mipsel-linux-gnu -mfloat-abi=soft -muclibc \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-EL-UC-SF-32 %s + // CHECK-EL-UC-SF-32: "-internal-isystem" +@@ -583,7 +583,7 @@ + // + // = Little-endian, soft float, mips16 + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mipsel-linux-gnu -mips16 -msoft-float -no-pie \ ++// RUN: --target=mipsel-linux-gnu -mips16 -msoft-float \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-EL-SF-16 %s + // CHECK-EL-SF-16: "-internal-isystem" +@@ -612,7 +612,7 @@ + // + // = Little-endian, soft float, micromips + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mipsel-linux-gnu -mmicromips -msoft-float -no-pie \ ++// RUN: --target=mipsel-linux-gnu -mmicromips -msoft-float \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-EL-SF-MICRO %s + // CHECK-EL-SF-MICRO: "-internal-isystem" +@@ -641,7 +641,7 @@ + // + // = Little-endian, hard float, 64-bit + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mips64el-linux-gnu -no-pie \ ++// RUN: --target=mips64el-linux-gnu \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-EL-HF-64 %s + // CHECK-EL-HF-64: "-internal-isystem" +@@ -670,7 +670,7 @@ + // + // = Little-endian, soft float, 64-bit + // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +-// RUN: --target=mips64el-linux-gnu -msoft-float -no-pie \ ++// RUN: --target=mips64el-linux-gnu -msoft-float \ + // RUN: -stdlib=libstdc++ --gcc-toolchain=%S/Inputs/mips_cs_tree \ + // RUN: | FileCheck --check-prefix=CHECK-EL-SF-64 %s + // CHECK-EL-SF-64: "-internal-isystem" +diff --git a/test/Driver/stack-protector.c b/test/Driver/stack-protector.c +index dfffe0d6..a3e40b50 100644 +--- a/test/Driver/stack-protector.c ++++ b/test/Driver/stack-protector.c +@@ -3,11 +3,11 @@ + // NOSSP-NOT: "-stack-protector-buffer-size" + + // RUN: %clang -target i386-unknown-linux -fstack-protector -### %s 2>&1 | FileCheck %s -check-prefix=SSP +-// SSP: "-stack-protector" "2" ++// SSP: "-stack-protector" "1" + // SSP-NOT: "-stack-protector-buffer-size" + + // RUN: %clang -target i386-unknown-linux -fstack-protector --param ssp-buffer-size=16 -### %s 2>&1 | FileCheck %s -check-prefix=SSP-BUF +-// SSP-BUF: "-stack-protector" "2" ++// SSP-BUF: "-stack-protector" "1" + // SSP-BUF: "-stack-protector-buffer-size" "16" + + // RUN: %clang -target i386-pc-openbsd -### %s 2>&1 | FileCheck %s -check-prefix=OPENBSD +diff --git a/test/Preprocessor/init.c b/test/Preprocessor/init.c +index 46cfcd6d..c83c82d7 100644 +--- a/test/Preprocessor/init.c ++++ b/test/Preprocessor/init.c +@@ -2603,3 +2603,33 @@ + // RISCV64-LINUX: #define __unix__ 1 + // RISCV64-LINUX: #define linux 1 + // RISCV64-LINUX: #define unix 1 ++ ++// RUN: %clang_cc1 -x c -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=loongarch64 -target-feature +d /dev/null \ ++// RUN: | FileCheck -match-full-lines -check-prefixes=LOONGARCH64,LOONGARCH64-HASBASICD %s ++// RUN: %clang_cc1 -x c -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=loongarch64 -target-feature +f /dev/null \ ++// RUN: | FileCheck -match-full-lines -check-prefixes=LOONGARCH64,LOONGARCH64-HASBASICF %s ++// RUN: %clang_cc1 -x c -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=loongarch64 -target-feature -d -target-feature -f /dev/null \ ++// RUN: | FileCheck -match-full-lines -check-prefixes=LOONGARCH64,LOONGARCH64-SOFT %s ++// RUN: %clang_cc1 -x c -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=loongarch64 -target-abi lp64s /dev/null \ ++// RUN: | FileCheck -match-full-lines -check-prefixes=LOONGARCH64,LOONGARCH64-LP64S %s ++// RUN: %clang_cc1 -x c -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=loongarch64 -target-abi lp64f /dev/null \ ++// RUN: | FileCheck -match-full-lines -check-prefixes=LOONGARCH64,LOONGARCH64-LP64F,LOONGARCH64-HARD %s ++// RUN: %clang_cc1 -x c -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=loongarch64 -target-abi lp64d /dev/null \ ++// RUN: | FileCheck -match-full-lines -check-prefixes=LOONGARCH64,LOONGARCH64-LP64D,LOONGARCH64-HARD %s ++// RUN: %clang_cc1 -x c -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=loongarch64 /dev/null \ ++// RUN: | FileCheck -match-full-lines -check-prefix=LOONGARCH64 %s ++// LOONGARCH64: #define _LOONGARCH_ARCH "loongarch64" ++// LOONGARCH64: #define _LOONGARCH_SZINT 32 ++// LOONGARCH64: #define _LOONGARCH_SZLONG 64 ++// LOONGARCH64: #define _LOONGARCH_SZPTR 64 ++// LOONGARCH64: #define _LOONGARCH_TUNE "la464" ++// LOONGARCH64: #define __loongarch__ 1 ++// LOONGARCH64-LP64D: #define __loongarch_double_float 1 ++// LOONGARCH64-HASBASICD: #define __loongarch_frlen 64 ++// LOONGARCH64-HASBASICF: #define __loongarch_frlen 32 ++// LOONGARCH64-SOFT: #define __loongarch_frlen 0 ++// LOONGARCH64: #define __loongarch_grlen 64 ++// LOONGARCH64-HARD: #define __loongarch_hard_float 1 ++// LOONGARCH64: #define __loongarch_lp64 1 ++// LOONGARCH64-LP64F: #define __loongarch_single_float 1 ++// LOONGARCH64-LP64S: #define __loongarch_soft_float 1 +diff --git a/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp b/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp +index 75928d91..3350ee3f 100644 +--- a/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp ++++ b/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp +@@ -104,6 +104,11 @@ extern "C" int throw_exception() { + if (Triple.isPPC()) + return; + ++ // FIXME: LoongArch64 fails due to `Symbols not found: ++ // [DW.ref.__gxx_personality_v0]` ++ if (Triple.isLoongArch64()) ++ return; ++ + // FIXME: ARM fails due to `Not implemented relocation type!` + if (Triple.isARM()) + return; +-- +2.38.1 + diff --git a/clang14/0002-add-loong64-support.patch b/clang14/0002-add-loong64-support.patch new file mode 100644 index 0000000000..298561e608 --- /dev/null +++ b/clang14/0002-add-loong64-support.patch @@ -0,0 +1,9243 @@ +From 7f62cae72a49ab95af602ef5103ba0aeee68b604 Mon Sep 17 00:00:00 2001 +From: Xiaotian Wu +Date: Tue, 20 Dec 2022 18:54:24 +0800 +Subject: [PATCH 2/2] add loong64 support + +--- + test/CodeGen/LoongArch/abi-lp64d.c | 474 +++ + .../LoongArch/inlineasm-float-double-in-gpr.c | 49 + + test/CodeGen/builtins-loongarch-base.c | 417 ++ + test/CodeGen/builtins-loongarch-lasx-error.c | 266 ++ + test/CodeGen/builtins-loongarch-lasx.c | 3761 +++++++++++++++++ + test/CodeGen/builtins-loongarch-lsx-error.c | 250 ++ + test/CodeGen/builtins-loongarch-lsx.c | 3630 ++++++++++++++++ + test/CodeGen/loongarch-inline-asm-modifiers.c | 50 + + test/CodeGen/loongarch-inline-asm.c | 31 + + .../LoongArch/abi-lp64d-struct-inherit.cpp | 95 + + test/Driver/loongarch-abi-fpu.c | 26 + + test/Driver/loongarch-alignment-feature.c | 8 + + test/Driver/loongarch-double-single-soft.c | 12 + + test/Driver/loongarch-mabi.c | 22 + + test/Driver/loongarch-mfpu.c | 21 + + 15 files changed, 9112 insertions(+) + create mode 100644 test/CodeGen/LoongArch/abi-lp64d.c + create mode 100644 test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c + create mode 100644 test/CodeGen/builtins-loongarch-base.c + create mode 100644 test/CodeGen/builtins-loongarch-lasx-error.c + create mode 100644 test/CodeGen/builtins-loongarch-lasx.c + create mode 100644 test/CodeGen/builtins-loongarch-lsx-error.c + create mode 100644 test/CodeGen/builtins-loongarch-lsx.c + create mode 100644 test/CodeGen/loongarch-inline-asm-modifiers.c + create mode 100644 test/CodeGen/loongarch-inline-asm.c + create mode 100644 test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp + create mode 100644 test/Driver/loongarch-abi-fpu.c + create mode 100644 test/Driver/loongarch-alignment-feature.c + create mode 100644 test/Driver/loongarch-double-single-soft.c + create mode 100644 test/Driver/loongarch-mabi.c + create mode 100644 test/Driver/loongarch-mfpu.c + +diff --git a/test/CodeGen/LoongArch/abi-lp64d.c b/test/CodeGen/LoongArch/abi-lp64d.c +new file mode 100644 +index 00000000..80435701 +--- /dev/null ++++ b/test/CodeGen/LoongArch/abi-lp64d.c +@@ -0,0 +1,474 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d \ ++// RUN: -emit-llvm %s -o - | FileCheck %s ++ ++/// This test checks the calling convention of the lp64d ABI. ++ ++#include ++#include ++ ++/// Part 0: C Data Types and Alignment. ++ ++/// `char` datatype is signed by default. ++/// In most cases, the unsigned integer data types are zero-extended when stored ++/// in general-purpose register, and the signed integer data types are ++/// sign-extended. However, in the LP64D ABI, unsigned 32-bit types, such as ++/// unsigned int, are stored in general-purpose registers as proper sign ++/// extensions of their 32-bit values. ++ ++// CHECK-LABEL: define{{.*}} zeroext i1 @check_bool() ++_Bool check_bool() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} signext i8 @check_char() ++char check_char() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} signext i16 @check_short() ++short check_short() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} signext i32 @check_int() ++int check_int() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} i64 @check_long() ++long check_long() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} i64 @check_longlong() ++long long check_longlong() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} zeroext i8 @check_uchar() ++unsigned char check_uchar() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} zeroext i16 @check_ushort() ++unsigned short check_ushort() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} signext i32 @check_uint() ++unsigned int check_uint() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} i64 @check_ulong() ++unsigned long check_ulong() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} i64 @check_ulonglong() ++unsigned long long check_ulonglong() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} float @check_float() ++float check_float() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} double @check_double() ++double check_double() { return 0; } ++ ++// CHECK-LABEL: define{{.*}} fp128 @check_longdouble() ++long double check_longdouble() { return 0; } ++ ++/// Part 1: Scalar arguments and return value. ++ ++/// The lp64d abi says: ++/// 1. 1 < WOA <= GRLEN ++/// a. Argument is passed in a single argument register, or on the stack by ++/// value if none is available. ++/// i. If the argument is floating-point type, the argument is passed in FAR. if ++/// no FAR is available, it’s passed in GAR. If no GAR is available, it’s ++/// passed on the stack. When passed in registers or on the stack, ++/// floating-point types narrower than GRLEN bits are widened to GRLEN bits, ++/// with the upper bits undefined. ++/// ii. If the argument is integer or pointer type, the argument is passed in ++/// GAR. If no GAR is available, it’s passed on the stack. When passed in ++/// registers or on the stack, the unsigned integer scalars narrower than GRLEN ++/// bits are zero-extended to GRLEN bits, and the signed integer scalars are ++/// sign-extended. ++/// 2. GRLEN < WOA ≤ 2 × GRLEN ++/// a. The argument is passed in a pair of GAR, with the low-order GRLEN bits in ++/// the lower-numbered register and the high-order GRLEN bits in the ++/// higher-numbered register. If exactly one register is available, the ++/// low-order GRLEN bits are passed in the register and the high-order GRLEN ++/// bits are passed on the stack. If no GAR is available, it’s passed on the ++/// stack. ++ ++/// Note that most of these conventions are handled at the llvm side, so here we ++/// only check the correctness of argument (or return value)'s sign/zero ++/// extension attribute. ++ ++// CHECK-LABEL: define{{.*}} signext i32 @f_scalar(i1{{.*}} zeroext %a, i8{{.*}} signext %b, i8{{.*}} zeroext %c, i16{{.*}} signext %d, i16{{.*}} zeroext %e, i32{{.*}} signext %f, i32{{.*}} signext %g, i64{{.*}} %h, i1{{.*}} zeroext %i, i8{{.*}} signext %j, i8{{.*}} zeroext %k, i16{{.*}} signext %l, i16{{.*}} zeroext %m, i32{{.*}} signext %n, i32{{.*}} signext %o, i64{{.*}} %p) ++int f_scalar(_Bool a, int8_t b, uint8_t c, int16_t d, uint16_t e, int32_t f, ++ uint32_t g, int64_t h, /* begin of stack passing -> */ _Bool i, ++ int8_t j, uint8_t k, int16_t l, uint16_t m, int32_t n, ++ uint32_t o, int64_t p) { ++ return 0; ++} ++ ++/// Part 2: Structure arguments and return value. ++ ++/// The lp64d abi says: ++/// Empty structures are ignored by C compilers which support them as a ++/// non-standard extension(same as union arguments and return values). Bits ++/// unused due to padding, and bits past the end of a structure whose size in ++/// bits is not divisible by GRLEN, are undefined. And the layout of the ++/// structure on the stack is consistent with that in memory. ++ ++/// Check empty structs are ignored. ++ ++struct empty_s {}; ++ ++// CHECK-LABEL: define{{.*}} void @f_empty_s() ++struct empty_s f_empty_s(struct empty_s x) { ++ return x; ++} ++ ++/// 1. 0 < WOA ≤ GRLEN ++/// a. The structure has only fixed-point members. If there is an available GAR, ++/// the structure is passed through the GAR by value passing; If no GAR is ++/// available, it’s passed on the stack. ++ ++struct i16x4_s { ++ int16_t a, b, c, d; ++}; ++ ++// CHECK-LABEL: define{{.*}} i64 @f_i16x4_s(i64 %x.coerce) ++struct i16x4_s f_i16x4_s(struct i16x4_s x) { ++ return x; ++} ++ ++/// b. The structure has only floating-point members: ++/// i. One floating-point member. The argument is passed in a FAR; If no FAR is ++/// available, the value is passed in a GAR; if no GAR is available, the value ++/// is passed on the stack. ++ ++struct f32x1_s { ++ float a; ++}; ++ ++struct f64x1_s { ++ double a; ++}; ++ ++// CHECK-LABEL: define{{.*}} float @f_f32x1_s(float %0) ++struct f32x1_s f_f32x1_s(struct f32x1_s x) { ++ return x; ++} ++ ++// CHECK-LABEL: define{{.*}} double @f_f64x1_s(double %0) ++struct f64x1_s f_f64x1_s(struct f64x1_s x) { ++ return x; ++} ++ ++/// ii. Two floating-point members. The argument is passed in a pair of ++/// available FAR, with the low-order float member bits in the lower-numbered ++/// FAR and the high-order float member bits in the higher-numbered FAR. If the ++/// number of available FAR is less than 2, it’s passed in a GAR, and passed on ++/// the stack if no GAR is available. ++ ++struct f32x2_s { ++ float a, b; ++}; ++ ++// CHECK-LABEL: define{{.*}} { float, float } @f_f32x2_s(float %0, float %1) ++struct f32x2_s f_f32x2_s(struct f32x2_s x) { ++ return x; ++} ++ ++/// c. The structure has both fixed-point and floating-point members, i.e. the ++/// structure has one float member and... ++/// i. Multiple fixed-point members. If there are available GAR, the structure ++/// is passed in a GAR, and passed on the stack if no GAR is available. ++ ++struct f32x1_i16x2_s { ++ float a; ++ int16_t b, c; ++}; ++ ++// CHECK-LABEL: define{{.*}} i64 @f_f32x1_i16x2_s(i64 %x.coerce) ++struct f32x1_i16x2_s f_f32x1_i16x2_s(struct f32x1_i16x2_s x) { ++ return x; ++} ++ ++/// ii. Only one fixed-point member. If one FAR and one GAR are available, the ++/// floating-point member of the structure is passed in the FAR, and the integer ++/// member of the structure is passed in the GAR; If no floating-point register ++/// but one GAR is available, it’s passed in GAR; If no GAR is available, it’s ++/// passed on the stack. ++ ++struct f32x1_i32x1_s { ++ float a; ++ int32_t b; ++}; ++ ++// CHECK-LABEL: define{{.*}} { float, i32 } @f_f32x1_i32x1_s(float %0, i32 %1) ++struct f32x1_i32x1_s f_f32x1_i32x1_s(struct f32x1_i32x1_s x) { ++ return x; ++} ++ ++/// 2. GRLEN < WOA ≤ 2 × GRLEN ++/// a. Only fixed-point members. ++/// i. The argument is passed in a pair of available GAR, with the low-order ++/// bits in the lower-numbered GAR and the high-order bits in the ++/// higher-numbered GAR. If only one GAR is available, the low-order bits are in ++/// the GAR and the high-order bits are on the stack, and passed on the stack if ++/// no GAR is available. ++ ++struct i64x2_s { ++ int64_t a, b; ++}; ++ ++// CHECK-LABEL: define{{.*}} [2 x i64] @f_i64x2_s([2 x i64] %x.coerce) ++struct i64x2_s f_i64x2_s(struct i64x2_s x) { ++ return x; ++} ++ ++/// b. Only floating-point members. ++/// i. The structure has one long double member or one double member and two ++/// adjacent float members or 3-4 float members. The argument is passed in a ++/// pair of available GAR, with the low-order bits in the lower-numbered GAR and ++/// the high-order bits in the higher-numbered GAR. If only one GAR is ++/// available, the low-order bits are in the GAR and the high-order bits are on ++/// the stack, and passed on the stack if no GAR is available. ++ ++struct f128x1_s { ++ long double a; ++}; ++ ++// CHECK-LABEL: define{{.*}} i128 @f_f128x1_s(i128 %x.coerce) ++struct f128x1_s f_f128x1_s(struct f128x1_s x) { ++ return x; ++} ++ ++struct f64x1_f32x2_s { ++ double a; ++ float b, c; ++}; ++ ++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f64x1_f32x2_s([2 x i64] %x.coerce) ++struct f64x1_f32x2_s f_f64x1_f32x2_s(struct f64x1_f32x2_s x) { ++ return x; ++} ++ ++struct f32x3_s { ++ float a, b, c; ++}; ++ ++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x3_s([2 x i64] %x.coerce) ++struct f32x3_s f_f32x3_s(struct f32x3_s x) { ++ return x; ++} ++ ++struct f32x4_s { ++ float a, b, c, d; ++}; ++ ++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x4_s([2 x i64] %x.coerce) ++struct f32x4_s f_f32x4_s(struct f32x4_s x) { ++ return x; ++} ++ ++/// ii. The structure with two double members is passed in a pair of available ++/// FARs. If no a pair of available FARs, it’s passed in GARs. A structure with ++/// one double member and one float member is same. ++ ++struct f64x2_s { ++ double a, b; ++}; ++ ++// CHECK-LABEL: define{{.*}} { double, double } @f_f64x2_s(double %0, double %1) ++struct f64x2_s f_f64x2_s(struct f64x2_s x) { ++ return x; ++} ++ ++/// c. Both fixed-point and floating-point members. ++/// i. The structure has one double member and only one fixed-point member. ++/// A. If one FAR and one GAR are available, the floating-point member of the ++/// structure is passed in the FAR, and the integer member of the structure is ++/// passed in the GAR; If no floating-point registers but two GARs are ++/// available, it’s passed in the two GARs; If only one GAR is available, the ++/// low-order bits are in the GAR and the high-order bits are on the stack; And ++/// it’s passed on the stack if no GAR is available. ++ ++struct f64x1_i64x1_s { ++ double a; ++ int64_t b; ++}; ++ ++// CHECK-LABEL: define{{.*}} { double, i64 } @f_f64x1_i64x1_s(double %0, i64 %1) ++struct f64x1_i64x1_s f_f64x1_i64x1_s(struct f64x1_i64x1_s x) { ++ return x; ++} ++ ++/// ii. Others ++/// A. The argument is passed in a pair of available GAR, with the low-order ++/// bits in the lower-numbered GAR and the high-order bits in the ++/// higher-numbered GAR. If only one GAR is available, the low-order bits are in ++/// the GAR and the high-order bits are on the stack, and passed on the stack if ++/// no GAR is available. ++ ++struct f64x1_i32x2_s { ++ double a; ++ int32_t b, c; ++}; ++ ++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f64x1_i32x2_s([2 x i64] %x.coerce) ++struct f64x1_i32x2_s f_f64x1_i32x2_s(struct f64x1_i32x2_s x) { ++ return x; ++} ++ ++struct f32x2_i32x2_s { ++ float a, b; ++ int32_t c, d; ++}; ++ ++// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x2_i32x2_s([2 x i64] %x.coerce) ++struct f32x2_i32x2_s f_f32x2_i32x2_s(struct f32x2_i32x2_s x) { ++ return x; ++} ++ ++/// 3. WOA > 2 × GRLEN ++/// a. It’s passed by reference and are replaced in the argument list with the ++/// address. If there is an available GAR, the reference is passed in the GAR, ++/// and passed on the stack if no GAR is available. ++ ++struct i64x4_s { ++ int64_t a, b, c, d; ++}; ++ ++// CHECK-LABEL: define{{.*}} void @f_i64x4_s(%struct.i64x4_s*{{.*}} sret(%struct.i64x4_s){{.*}} %agg.result, %struct.i64x4_s*{{.*}} %x) ++struct i64x4_s f_i64x4_s(struct i64x4_s x) { ++ return x; ++} ++ ++struct f64x4_s { ++ double a, b, c, d; ++}; ++ ++// CHECK-LABEL: define{{.*}} void @f_f64x4_s(%struct.f64x4_s*{{.*}} sret(%struct.f64x4_s){{.*}} %agg.result, %struct.f64x4_s*{{.*}} %x) ++struct f64x4_s f_f64x4_s(struct f64x4_s x) { ++ return x; ++} ++ ++/// Part 3: Union arguments and return value. ++ ++/// Check empty unions are ignored. ++ ++union empty_u {}; ++ ++// CHECK-LABEL: define{{.*}} void @f_empty_u() ++union empty_u f_empty_u(union empty_u x) { ++ return x; ++} ++ ++/// Union is passed in GAR or stack. ++/// 1. 0 < WOA ≤ GRLEN ++/// a. The argument is passed in a GAR, or on the stack by value if no GAR is ++/// available. ++ ++union i32_f32_u { ++ int32_t a; ++ float b; ++}; ++ ++// CHECK-LABEL: define{{.*}} i64 @f_i32_f32_u(i64 %x.coerce) ++union i32_f32_u f_i32_f32_u(union i32_f32_u x) { ++ return x; ++} ++ ++union i64_f64_u { ++ int64_t a; ++ double b; ++}; ++ ++// CHECK-LABEL: define{{.*}} i64 @f_i64_f64_u(i64 %x.coerce) ++union i64_f64_u f_i64_f64_u(union i64_f64_u x) { ++ return x; ++} ++ ++/// 2. GRLEN < WOA ≤ 2 × GRLEN ++/// a. The argument is passed in a pair of available GAR, with the low-order ++/// bits in the lower-numbered GAR and the high-order bits in the ++/// higher-numbered GAR. If only one GAR is available, the low-order bits are in ++/// the GAR and the high-order bits are on the stack. The arguments are passed ++/// on the stack when no GAR is available. ++ ++union i128_f128_u { ++ __int128_t a; ++ long double b; ++}; ++ ++// CHECK-LABEL: define{{.*}} i128 @f_i128_f128_u(i128 %x.coerce) ++union i128_f128_u f_i128_f128_u(union i128_f128_u x) { ++ return x; ++} ++ ++/// 3. WOA > 2 × GRLEN ++/// a. It’s passed by reference and are replaced in the argument list with the ++/// address. If there is an available GAR, the reference is passed in the GAR, ++/// and passed on the stack if no GAR is available. ++ ++union i64_arr3_u { ++ int64_t a[3]; ++}; ++ ++// CHECK-LABEL: define{{.*}} void @f_i64_arr3_u(%union.i64_arr3_u*{{.*}} sret(%union.i64_arr3_u){{.*}} %agg.result, %union.i64_arr3_u*{{.*}} %x) ++union i64_arr3_u f_i64_arr3_u(union i64_arr3_u x) { ++ return x; ++} ++ ++/// Part 4: Complex number arguments and return value. ++ ++/// A complex floating-point number, or a structure containing just one complex ++/// floating-point number, is passed as though it were a structure containing ++/// two floating-point reals. ++ ++// CHECK-LABEL: define{{.*}} { float, float } @f_floatcomplex(float{{.*}} %x.coerce0, float{{.*}} %x.coerce1) ++float __complex__ f_floatcomplex(float __complex__ x) { return x; } ++ ++// CHECK-LABEL: define{{.*}} { double, double } @f_doublecomplex(double{{.*}} %x.coerce0, double{{.*}} %x.coerce1) ++double __complex__ f_doublecomplex(double __complex__ x) { return x; } ++ ++struct floatcomplex_s { ++ float __complex__ c; ++}; ++// CHECK-LABEL: define{{.*}} { float, float } @f_floatcomplex_s(float %0, float %1) ++struct floatcomplex_s f_floatcomplex_s(struct floatcomplex_s x) { ++ return x; ++} ++ ++struct doublecomplex_s { ++ double __complex__ c; ++}; ++// CHECK-LABEL: define{{.*}} { double, double } @f_doublecomplex_s(double %0, double %1) ++struct doublecomplex_s f_doublecomplex_s(struct doublecomplex_s x) { ++ return x; ++} ++ ++/// Part 5: Variadic arguments. ++ ++/// Variadic arguments are passed in GARs in the same manner as named arguments. ++ ++int f_va_callee(int, ...); ++ ++// CHECK-LABEL: define{{.*}} void @f_va_caller() ++// CHECK: call signext i32 (i32, ...) @f_va_callee(i32{{.*}} signext 1, i32{{.*}} signext 2, i64{{.*}} 3, double{{.*}} 4.000000e+00, double{{.*}} 5.000000e+00, i64 {{.*}}, i64 {{.*}}, i64 {{.*}}) ++void f_va_caller(void) { ++ f_va_callee(1, 2, 3LL, 4.0f, 5.0, (struct i16x4_s){6, 7, 8, 9}, ++ (struct i64x2_s){10, 11}); ++} ++ ++// CHECK-LABE: define signext i32 @f_va_int(i8* %fmt, ...) ++// CHECK: entry: ++// CHECK: %fmt.addr = alloca i8*, align 8 ++// CHECK: %va = alloca i8*, align 8 ++// CHECK: %v = alloca i32, align 4 ++// CHECK: store i8* %fmt, i8** %fmt.addr, align 8 ++// CHECK: %va1 = bitcast i8** %va to i8* ++// CHECK: call void @llvm.va_start(i8* %va1) ++// CHECK: %argp.cur = load i8*, i8** %va, align 8 ++// CHECK: %argp.next = getelementptr inbounds i8, i8* %argp.cur, i64 8 ++// CHECK: store i8* %argp.next, i8** %va, align 8 ++// CHECK: %0 = bitcast i8* %argp.cur to i32* ++// CHECK: %1 = load i32, i32* %0, align 8 ++// CHECK: store i32 %1, i32* %v, align 4 ++// CHECK: %va2 = bitcast i8** %va to i8* ++// CHECK: call void @llvm.va_end(i8* %va2) ++// CHECK: %2 = load i32, i32* %v, align 4 ++// CHECK: ret i32 %2 ++// CHECK: } ++int f_va_int(char *fmt, ...) { ++ __builtin_va_list va; ++ __builtin_va_start(va, fmt); ++ int v = __builtin_va_arg(va, int); ++ __builtin_va_end(va); ++ return v; ++} +diff --git a/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c b/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c +new file mode 100644 +index 00000000..bc9c616b +--- /dev/null ++++ b/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c +@@ -0,0 +1,49 @@ ++// RUN: %clang_cc1 -triple loongarch64 -O2 -emit-llvm %s -o - \ ++// RUN: | FileCheck %s ++ ++float f; ++double d; ++ ++// CHECK-LABEL: @reg_float( ++// CHECK: [[FLT_ARG:%.*]] = load float, float* @f ++// CHECK: call void asm sideeffect "", "r"(float [[FLT_ARG]]) ++// CHECK: ret void ++void reg_float() { ++ float a = f; ++ asm volatile("" ++ : ++ : "r"(a)); ++} ++ ++// CHECK-LABEL: @r4_float( ++// CHECK: [[FLT_ARG:%.*]] = load float, float* @f ++// CHECK: call void asm sideeffect "", "{$r4}"(float [[FLT_ARG]]) ++// CHECK: ret void ++void r4_float() { ++ register float a asm("$r4") = f; ++ asm volatile("" ++ : ++ : "r"(a)); ++} ++ ++// CHECK-LABEL: @reg_double( ++// CHECK: [[DBL_ARG:%.*]] = load double, double* @d ++// CHECK: call void asm sideeffect "", "r"(double [[DBL_ARG]]) ++// CHECK: ret void ++void reg_double() { ++ double a = d; ++ asm volatile("" ++ : ++ : "r"(a)); ++} ++ ++// CHECK-LABEL: @r4_double( ++// CHECK: [[DBL_ARG:%.*]] = load double, double* @d ++// CHECK: call void asm sideeffect "", "{$r4}"(double [[DBL_ARG]]) ++// CHECK: ret void ++void r4_double() { ++ register double a asm("$r4") = d; ++ asm volatile("" ++ : ++ : "r"(a)); ++} +diff --git a/test/CodeGen/builtins-loongarch-base.c b/test/CodeGen/builtins-loongarch-base.c +new file mode 100644 +index 00000000..d7221359 +--- /dev/null ++++ b/test/CodeGen/builtins-loongarch-base.c +@@ -0,0 +1,417 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-linux-gnu -emit-llvm %s -o - | FileCheck %s ++ ++#include ++ ++typedef char i8; ++typedef unsigned char u8; ++typedef short i16; ++typedef unsigned short u16; ++typedef int i32; ++typedef unsigned int u32; ++ ++#if __LONG_MAX__ == __LONG_LONG_MAX__ ++typedef long int i64; ++typedef unsigned long int u64; ++#else ++typedef long long i64; ++typedef unsigned long long u64; ++#endif ++ ++__drdtime_t drdtime; ++__rdtime_t rdtime; ++ ++void cpucfg(){ ++ ++ u32 u32_r, u32_a; ++ // __cpucfg ++ // rd, rj ++ // unsigned int, unsigned int ++ u32_r= __builtin_loongarch_cpucfg(u32_a); // CHECK: call i32 @llvm.loongarch.cpucfg ++ ++} ++ ++void csrrd(){ ++ ++ u32 u32_r; ++ // __csrrd ++ // rd, csr_num ++ // unsigned int, uimm14_32 ++ u32_r=__builtin_loongarch_csrrd(1); // CHECK: call i32 @llvm.loongarch.csrrd ++ ++} ++ ++void dcsrrd(){ ++ ++ u64 u64_r; ++ // __dcsrrd ++ // rd, csr_num ++ // unsigned long int, uimm14 ++ u64_r=__builtin_loongarch_dcsrrd(1); // CHECK: call i64 @llvm.loongarch.dcsrrd ++ ++} ++ ++void csrwr(){ ++ ++ u32 u32_r, u32_a; ++ // __csrwr ++ // rd, csr_num ++ // unsigned int, uimm14_32 ++ u32_r=__builtin_loongarch_csrwr(u32_a, 1); // CHECK: call i32 @llvm.loongarch.csrwr ++ ++} ++ ++void dcsrwr(){ ++ ++ u64 u64_r, u64_a; ++ // __dcsrwr ++ // rd, csr_num ++ // unsigned long int, uimm14 ++ u64_r=__builtin_loongarch_dcsrwr(u64_a, 1); // CHECK: call i64 @llvm.loongarch.dcsrwr ++ ++} ++ ++void csrxchg(){ ++ ++ u32 u32_r, u32_a, u32_b; ++ // __csrxchg ++ // rd, rj, csr_num ++ // unsigned int, unsigned int, uimm14_32 ++ u32_r=__builtin_loongarch_csrxchg(u32_a, u32_b, 1); // CHECK: call i32 @llvm.loongarch.csrxchg ++ ++} ++ ++void dcsrxchg(){ ++ ++ u64 u64_r, u64_a, u64_b; ++ // __dcsrxchg ++ // rd, rj, csr_num ++ // unsigned long int, unsigned long int, uimm14 ++ u64_r=__builtin_loongarch_dcsrxchg(u64_a, u64_b, 1); // CHECK: call i64 @llvm.loongarch.dcsrxchg ++ ++} ++ ++void iocsrrd_b(){ ++ ++ u32 u32_a; ++ u8 u8_r; ++ // __iocsrrd_b ++ // rd, rj ++ // unsigned char, unsigned int ++ u8_r=__builtin_loongarch_iocsrrd_b(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.b ++ ++} ++ ++void iocsrrd_h(){ ++ ++ u32 u32_a; ++ u16 u16_r; ++ // __iocsrrd_h ++ // rd, rj ++ // unsigned short, unsigned int ++ u16_r=__builtin_loongarch_iocsrrd_h(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.h ++ ++} ++ ++void iocsrrd_w(){ ++ ++ u32 u32_r, u32_a; ++ // __iocsrrd_w ++ // rd, rj ++ // unsigned int, unsigned int ++ u32_r=__builtin_loongarch_iocsrrd_w(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.w ++ ++} ++ ++void iocsrrd_d(){ ++ ++ u32 u32_a; ++ u64 u64_r; ++ // __iocsrrd_d ++ // rd, rj ++ // unsigned long int, unsigned int ++ u64_r=__builtin_loongarch_iocsrrd_d(u32_a); // CHECK: call i64 @llvm.loongarch.iocsrrd.d ++ ++} ++ ++void iocsrwr_b(){ ++ ++ u32 u32_a; ++ u8 u8_a; ++ // __iocsrwr_b ++ // rd, rj ++ // unsigned char, unsigned int ++ __builtin_loongarch_iocsrwr_b(u8_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.b ++ ++} ++ ++void iocsrwr_h(){ ++ ++ u32 u32_a; ++ u16 u16_a; ++ // __iocsrwr_h ++ // rd, rj ++ // unsigned short, unsigned int ++ __builtin_loongarch_iocsrwr_h(u16_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.h ++ ++} ++ ++void iocsrwr_w(){ ++ ++ u32 u32_a, u32_b; ++ // __iocsrwr_w ++ // rd, rj ++ // unsigned int, unsigned int ++ __builtin_loongarch_iocsrwr_w(u32_a, u32_b); // CHECK: void @llvm.loongarch.iocsrwr.w ++ ++} ++ ++void iocsrwr_d(){ ++ ++ u32 u32_a; ++ u64 u64_a; ++ // __iocsrwr_d ++ // rd, rj ++ // unsigned long int, unsigned int ++ __builtin_loongarch_iocsrwr_d(u64_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.d ++ ++} ++ ++void cacop(){ ++ ++ i32 i32_a; ++ // __cacop ++ // op, rj, si12 ++ // uimm5, unsigned int, simm12 ++ __builtin_loongarch_cacop(1, i32_a, 2); // CHECK: void @llvm.loongarch.cacop ++ ++} ++ ++void dcacop(){ ++ ++ i64 i64_a; ++ // __dcacop ++ // op, rj, si12 ++ // uimm5, unsigned long int, simm12 ++ __builtin_loongarch_dcacop(1, i64_a, 2); // CHECK: void @llvm.loongarch.dcacop ++ ++} ++ ++void rdtime_d(){ ++ ++ drdtime= __builtin_loongarch_rdtime_d(); // CHECK: call { i64, i64 } asm sideeffect "rdtime.d\09$0,$1\0A\09", "=&r,=&r"() ++ ++} ++ ++void rdtimeh_w(){ ++ ++ rdtime= __builtin_loongarch_rdtimeh_w(); // CHECK: call { i32, i32 } asm sideeffect "rdtimeh.w\09$0,$1\0A\09", "=&r,=&r"() ++ ++} ++ ++void rdtimel_w(){ ++ ++ rdtime= __builtin_loongarch_rdtimel_w(); // CHECK: call { i32, i32 } asm sideeffect "rdtimel.w\09$0,$1\0A\09", "=&r,=&r"() ++ ++} ++ ++void crc_w_b_w(){ ++ ++ i32 i32_r, i32_a; ++ i8 i8_a; ++ // __crc_w_b_w ++ // rd, rj, rk ++ // int, char, int ++ i32_r=__builtin_loongarch_crc_w_b_w(i8_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.b.w ++ ++} ++ ++void crc_w_h_w(){ ++ ++ i32 i32_r, i32_a; ++ i16 i16_a; ++ // __crc_w_h_w ++ // rd, rj, rk ++ // int, short, int ++ i32_r=__builtin_loongarch_crc_w_h_w(i16_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.h.w ++ ++} ++ ++void crc_w_w_w(){ ++ ++ i32 i32_r, i32_a, i32_b; ++ // __crc_w_w_w ++ // rd, rj, rk ++ // int, int, int ++ i32_r=__builtin_loongarch_crc_w_w_w(i32_a, i32_b); // CHECK: call i32 @llvm.loongarch.crc.w.w.w ++ ++} ++ ++void crc_w_d_w(){ ++ ++ i32 i32_r, i32_a; ++ i64 i64_a; ++ // __crc_w_d_w ++ // rd, rj, rk ++ // int, long int, int ++ i32_r=__builtin_loongarch_crc_w_d_w(i64_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.d.w ++ ++} ++ ++void crcc_w_b_w(){ ++ ++ i32 i32_r, i32_a; ++ i8 i8_a; ++ // __crcc_w_b_w ++ // rd, rj, rk ++ // int, char, int ++ i32_r=__builtin_loongarch_crcc_w_b_w(i8_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.b.w ++ ++} ++ ++void crcc_w_h_w(){ ++ ++ i32 i32_r, i32_a; ++ i16 i16_a; ++ // __crcc_w_h_w ++ // rd, rj, rk ++ // int, short, int ++ i32_r=__builtin_loongarch_crcc_w_h_w(i16_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.h.w ++ ++} ++ ++void crcc_w_w_w(){ ++ ++ i32 i32_r, i32_a, i32_b; ++ // __crcc_w_w_w ++ // rd, rj, rk ++ // int, int, int ++ i32_r=__builtin_loongarch_crcc_w_w_w(i32_a, i32_b); // CHECK: call i32 @llvm.loongarch.crcc.w.w.w ++ ++} ++ ++void crcc_w_d_w(){ ++ ++ i32 i32_r, i32_a; ++ i64 i64_a; ++ // __crcc_w_d_w ++ // rd, rj, rk ++ // int, long int, int ++ i32_r=__builtin_loongarch_crcc_w_d_w(i64_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.d.w ++ ++} ++ ++void tlbclr(){ ++ ++ // __tlbclr ++ __builtin_loongarch_tlbclr(); // CHECK: call void @llvm.loongarch.tlbclr ++ ++} ++ ++void tlbflush(){ ++ ++ // __tlbflush ++ __builtin_loongarch_tlbflush(); // CHECK: call void @llvm.loongarch.tlbflush ++ ++} ++ ++void tlbfill(){ ++ ++ // __tlbfill ++ __builtin_loongarch_tlbfill(); // CHECK: call void @llvm.loongarch.tlbfill ++ ++} ++ ++void tlbrd(){ ++ ++ // __tlbrd ++ __builtin_loongarch_tlbrd(); // CHECK: call void @llvm.loongarch.tlbrd ++ ++} ++ ++void tlbwr(){ ++ ++ // __tlbwr ++ __builtin_loongarch_tlbwr(); // CHECK: call void @llvm.loongarch.tlbwr ++ ++} ++ ++void tlbsrch(){ ++ ++ // __tlbsrch ++ __builtin_loongarch_tlbsrch(); // CHECK: call void @llvm.loongarch.tlbsrch ++ ++} ++ ++void syscall(){ ++ ++ // __syscall ++ // Code ++ // uimm15 ++ __builtin_loongarch_syscall(1); // CHECK: call void @llvm.loongarch.syscall ++ ++} ++ ++void break_builtin(){ ++ ++ // __break ++ // Code ++ // uimm15 ++ __builtin_loongarch_break(1); // CHECK: call void @llvm.loongarch.break ++ ++} ++ ++void asrtle_d(){ ++ ++ i64 i64_a, i64_b; ++ // __asrtle_d ++ // rj, rk ++ // long int, long int ++ __builtin_loongarch_asrtle_d(i64_a, i64_b); // CHECK: call void @llvm.loongarch.asrtle.d ++ ++} ++ ++void asrtgt_d(){ ++ ++ i64 i64_a, i64_b; ++ // __asrtgt_d ++ // rj, rk ++ // long int, long int ++ __builtin_loongarch_asrtgt_d(i64_a, i64_b); // CHECK: call void @llvm.loongarch.asrtgt.d ++ ++} ++ ++void dbar(){ ++ ++ // __dbar ++ // hint ++ // uimm15 ++ __builtin_loongarch_dbar(0); // CHECK: call void @llvm.loongarch.dbar ++ ++} ++ ++void ibar(){ ++ ++ // __ibar ++ // hint ++ // uimm15 ++ __builtin_loongarch_ibar(0); // CHECK: call void @llvm.loongarch.ibar ++ ++} ++ ++void movfcsr2gr(){ ++ ++ u32 u32_r; ++ // __movfcsr2gr ++ u32_r=__movfcsr2gr(0); // CHECK: call i32 asm sideeffect "movfcsr2gr $0, $$fcsr0", "=&r"() ++ ++} ++ ++ ++void movgr2fcsr() { ++ ++ u32 u32_a; ++ // __movgr2fcsr ++ __movgr2fcsr(0, u32_a); // CHECK: call void asm sideeffect "movgr2fcsr $$fcsr0, $0", "r"(i32 %0) ++ ++} +diff --git a/test/CodeGen/builtins-loongarch-lasx-error.c b/test/CodeGen/builtins-loongarch-lasx-error.c +new file mode 100644 +index 00000000..99f2687e +--- /dev/null ++++ b/test/CodeGen/builtins-loongarch-lasx-error.c +@@ -0,0 +1,266 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -fsyntax-only %s \ ++// RUN: -target-feature +lasx \ ++// RUN: -verify -o - 2>&1 ++ ++#include ++ ++void test() { ++ v32i8 v32i8_a = (v32i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; ++ v32i8 v32i8_b = (v32i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ++ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ v32i8 v32i8_c = (v32i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ++ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; ++ v32i8 v32i8_r; ++ ++ v16i16 v16i16_a = (v16i16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16i16 v16i16_b = (v16i16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16i16 v16i16_c = (v16i16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16i16 v16i16_r; ++ ++ v8i32 v8i32_a = (v8i32){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8i32 v8i32_b = (v8i32){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8i32 v8i32_c = (v8i32){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8i32 v8i32_r; ++ ++ v4i64 v4i64_a = (v4i64){0, 1, 2, 3}; ++ v4i64 v4i64_b = (v4i64){1, 2, 3, 4}; ++ v4i64 v4i64_c = (v4i64){2, 3, 4, 5}; ++ v4i64 v4i64_r; ++ ++ v32u8 v32u8_a = (v32u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; ++ v32u8 v32u8_b = (v32u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ++ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ v32u8 v32u8_c = (v32u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ++ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; ++ v32u8 v32u8_r; ++ ++ v16u16 v16u16_a = (v16u16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16u16 v16u16_b = (v16u16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16u16 v16u16_c = (v16u16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16u16 v16u16_r; ++ ++ v8u32 v8u32_a = (v8u32){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8u32 v8u32_b = (v8u32){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8u32 v8u32_c = (v8u32){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8u32 v8u32_r; ++ ++ v4u64 v4u64_a = (v4u64){0, 1, 2, 3}; ++ v4u64 v4u64_b = (v4u64){1, 2, 3, 4}; ++ v4u64 v4u64_c = (v4u64){2, 3, 4, 5}; ++ v4u64 v4u64_r; ++ ++ v8f32 v8f32_a = (v8f32){0.5, 1, 2, 3, 4, 5, 6, 7}; ++ v8f32 v8f32_b = (v8f32){1.5, 2, 3, 4, 5, 6, 7, 8}; ++ v8f32 v8f32_c = (v8f32){2.5, 3, 4, 5, 6, 7, 8, 9}; ++ v8f32 v8f32_r; ++ v4f64 v4f64_a = (v4f64){0.5, 1, 2, 3}; ++ v4f64 v4f64_b = (v4f64){1.5, 2, 3, 4}; ++ v4f64 v4f64_c = (v4f64){2.5, 3, 4, 5}; ++ v4f64 v4f64_r; ++ ++ int i32_r; ++ int i32_a = 1; ++ int i32_b = 2; ++ unsigned int u32_r; ++ unsigned int u32_a = 1; ++ unsigned int u32_b = 2; ++ long long i64_r; ++ long long i64_a = 1; ++ long long i64_b = 2; ++ long long i64_c = 3; ++ unsigned long long u64_r; ++ unsigned long long u64_a = 1; ++ unsigned long long u64_b = 2; ++ unsigned long long u64_c = 3; ++ ++ v32i8_r = __lasx_xvslli_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvslli_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvslli_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvslli_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrai_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsrai_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsrai_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsrai_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrari_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsrari_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsrari_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsrari_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrli_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsrli_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsrli_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsrli_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrlri_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsrlri_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsrlri_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsrlri_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32u8_r = __lasx_xvbitclri_b(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16u16_r = __lasx_xvbitclri_h(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u32_r = __lasx_xvbitclri_w(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvbitclri_d(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32u8_r = __lasx_xvbitseti_b(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16u16_r = __lasx_xvbitseti_h(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u32_r = __lasx_xvbitseti_w(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvbitseti_d(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32u8_r = __lasx_xvbitrevi_b(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16u16_r = __lasx_xvbitrevi_h(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u32_r = __lasx_xvbitrevi_w(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvbitrevi_d(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvaddi_bu(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvaddi_hu(v16i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvaddi_wu(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvaddi_du(v4i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvsubi_bu(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvsubi_hu(v16i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsubi_wu(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsubi_du(v4i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvmaxi_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvmaxi_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvmaxi_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvmaxi_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32u8_r = __lasx_xvmaxi_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16u16_r = __lasx_xvmaxi_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvmaxi_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvmaxi_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvmini_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvmini_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvmini_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvmini_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32u8_r = __lasx_xvmini_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16u16_r = __lasx_xvmini_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvmini_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvmini_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvseqi_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvseqi_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvseqi_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvseqi_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32i8_r = __lasx_xvslti_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvslti_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvslti_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvslti_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32i8_r = __lasx_xvslti_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvslti_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvslti_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvslti_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvslei_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i16_r = __lasx_xvslei_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i32_r = __lasx_xvslei_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i64_r = __lasx_xvslei_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v32i8_r = __lasx_xvslei_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvslei_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvslei_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvslei_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvsat_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvsat_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvsat_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvsat_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32u8_r = __lasx_xvsat_bu(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16u16_r = __lasx_xvsat_hu(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u32_r = __lasx_xvsat_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u64_r = __lasx_xvsat_du(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvrepl128vei_b(v32i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvrepl128vei_h(v16i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i32_r = __lasx_xvrepl128vei_w(v8i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v4i64_r = __lasx_xvrepl128vei_d(v4i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v32u8_r = __lasx_xvandi_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32u8_r = __lasx_xvori_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32u8_r = __lasx_xvnori_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32u8_r = __lasx_xvxori_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32u8_r = __lasx_xvbitseli_b(v32u8_a, v32u8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32i8_r = __lasx_xvshuf4i_b(v32i8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i16_r = __lasx_xvshuf4i_h(v16i16_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i32_r = __lasx_xvshuf4i_w(v8i32_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i32_r = __lasx_xvpermi_w(v8i32_a, v8i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i16_r = __lasx_xvsllwil_h_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i32_r = __lasx_xvsllwil_w_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i64_r = __lasx_xvsllwil_d_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16u16_r = __lasx_xvsllwil_hu_bu(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u32_r = __lasx_xvsllwil_wu_hu(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u64_r = __lasx_xvsllwil_du_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvfrstpi_b(v32i8_a, v32i8_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i16_r = __lasx_xvfrstpi_h(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvshuf4i_d(v4i64_a, v4i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32i8_r = __lasx_xvbsrl_v(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvbsll_v(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v32i8_r = __lasx_xvextrins_b(v32i8_a, v32i8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i16_r = __lasx_xvextrins_h(v16i16_a, v16i16_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i32_r = __lasx_xvextrins_w(v8i32_a, v8i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v4i64_r = __lasx_xvextrins_d(v4i64_a, v4i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32i8_r = __lasx_xvld(&v32i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __lasx_xvst(v32i8_a, &v32i8_b, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __lasx_xvstelm_b(v32i8_a, &v32i8_b, 0, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ __lasx_xvstelm_h(v16i16_a, &v16i16_b, 0, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ __lasx_xvstelm_w(v8i32_a, &v8i32_b, 0, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ __lasx_xvstelm_d(v4i64_a, &v4i64_b, 0, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v8i32_r = __lasx_xvinsve0_w(v8i32_a, v8i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i64_r = __lasx_xvinsve0_d(v4i64_a, v4i64_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v8i32_r = __lasx_xvpickve_w(v8i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i64_r = __lasx_xvpickve_d(v4i64_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v4i64_r = __lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} ++ v8i32_r = __lasx_xvinsgr2vr_w(v8i32_a, i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i64_r = __lasx_xvinsgr2vr_d(v4i64_a, i64_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v32i8_r = __lasx_xvpermi_q(v32i8_a, v32i8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v4i64_r = __lasx_xvpermi_d(v4i64_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v32i8_r = __lasx_xvldrepl_b(&v32i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ v16i16_r = __lasx_xvldrepl_h(&v16i16_a, -1025); // expected-error {{argument value -1025 is outside the valid range [-1024, 1023]}} ++ v8i32_r = __lasx_xvldrepl_w(&v8i32_a, -513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ v4i64_r = __lasx_xvldrepl_d(&v4i64_a, -257); // expected-error {{argument value -257 is outside the valid range [-256, 255]}} ++ i32_r = __lasx_xvpickve2gr_w(v8i32_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ u32_r = __lasx_xvpickve2gr_wu(v8i32_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ i64_r = __lasx_xvpickve2gr_d(v4i64_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ u64_r = __lasx_xvpickve2gr_du(v4i64_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v32i8_r = __lasx_xvrotri_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v16i16_r = __lasx_xvrotri_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i32_r = __lasx_xvrotri_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i64_r = __lasx_xvrotri_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v32i8_r = __lasx_xvsrlni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvsrlni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsrlni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvsrlni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvsrlrni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvsrlrni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsrlrni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvsrlrni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvssrlni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvssrlni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvssrlni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvssrlni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32u8_r = __lasx_xvssrlni_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16u16_r = __lasx_xvssrlni_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvssrlni_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4u64_r = __lasx_xvssrlni_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvssrlrni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvssrlrni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvssrlrni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvssrlrni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32u8_r = __lasx_xvssrlrni_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16u16_r = __lasx_xvssrlrni_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvssrlrni_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4u64_r = __lasx_xvssrlrni_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvsrani_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvsrani_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsrani_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvsrani_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvsrarni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvsrarni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvsrarni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvsrarni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvssrani_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvssrani_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvssrani_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvssrani_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32u8_r = __lasx_xvssrani_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16u16_r = __lasx_xvssrani_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvssrani_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4u64_r = __lasx_xvssrani_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32i8_r = __lasx_xvssrarni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16i16_r = __lasx_xvssrarni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i32_r = __lasx_xvssrarni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4i64_r = __lasx_xvssrarni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v32u8_r = __lasx_xvssrarni_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v16u16_r = __lasx_xvssrarni_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u32_r = __lasx_xvssrarni_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v4u64_r = __lasx_xvssrarni_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++} +diff --git a/test/CodeGen/builtins-loongarch-lasx.c b/test/CodeGen/builtins-loongarch-lasx.c +new file mode 100644 +index 00000000..b9ec3a3c +--- /dev/null ++++ b/test/CodeGen/builtins-loongarch-lasx.c +@@ -0,0 +1,3761 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -emit-llvm %s \ ++// RUN: -target-feature +lasx \ ++// RUN: -target-feature +d \ ++// RUN: -o - | FileCheck %s ++ ++#include ++ ++#define ui1_b 1 ++#define ui2 1 ++#define ui2_b ui2 ++#define ui3 4 ++#define ui3_b ui3 ++#define ui4 7 ++#define ui4_b ui4 ++#define ui5 25 ++#define ui5_b ui5 ++#define ui6 44 ++#define ui6_b ui6 ++#define ui7 100 ++#define ui7_b ui7 ++#define ui8 127 //200 ++#define ui8_b ui8 ++#define si5_b -4 ++#define si8 -100 ++#define si9 0 ++#define si10 0 ++#define si11 0 ++#define si12 0 ++#define i10 500 ++#define i13 4000 ++#define mode 0 ++#define idx1 1 ++#define idx2 2 ++#define idx3 4 ++#define idx4 8 ++ ++void test(void) { ++ v32i8 v32i8_a = (v32i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; ++ v32i8 v32i8_b = (v32i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ++ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ v32i8 v32i8_c = (v32i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ++ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; ++ v32i8 v32i8_r; ++ ++ v16i16 v16i16_a = (v16i16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16i16 v16i16_b = (v16i16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16i16 v16i16_c = (v16i16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16i16 v16i16_r; ++ ++ v8i32 v8i32_a = (v8i32){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8i32 v8i32_b = (v8i32){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8i32 v8i32_c = (v8i32){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8i32 v8i32_r; ++ ++ v4i64 v4i64_a = (v4i64){0, 1, 2, 3}; ++ v4i64 v4i64_b = (v4i64){1, 2, 3, 4}; ++ v4i64 v4i64_c = (v4i64){2, 3, 4, 5}; ++ v4i64 v4i64_r; ++ ++ v32u8 v32u8_a = (v32u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; ++ v32u8 v32u8_b = (v32u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ++ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ v32u8 v32u8_c = (v32u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ++ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; ++ v32u8 v32u8_r; ++ ++ v16u16 v16u16_a = (v16u16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16u16 v16u16_b = (v16u16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16u16 v16u16_c = (v16u16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16u16 v16u16_r; ++ ++ v8u32 v8u32_a = (v8u32){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8u32 v8u32_b = (v8u32){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8u32 v8u32_c = (v8u32){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8u32 v8u32_r; ++ ++ v4u64 v4u64_a = (v4u64){0, 1, 2, 3}; ++ v4u64 v4u64_b = (v4u64){1, 2, 3, 4}; ++ v4u64 v4u64_c = (v4u64){2, 3, 4, 5}; ++ v4u64 v4u64_r; ++ ++ v8f32 v8f32_a = (v8f32){0.5, 1, 2, 3, 4, 5, 6, 7}; ++ v8f32 v8f32_b = (v8f32){1.5, 2, 3, 4, 5, 6, 7, 8}; ++ v8f32 v8f32_c = (v8f32){2.5, 3, 4, 5, 6, 7, 8, 9}; ++ v8f32 v8f32_r; ++ v4f64 v4f64_a = (v4f64){0.5, 1, 2, 3}; ++ v4f64 v4f64_b = (v4f64){1.5, 2, 3, 4}; ++ v4f64 v4f64_c = (v4f64){2.5, 3, 4, 5}; ++ v4f64 v4f64_r; ++ ++ int i32_r; ++ int i32_a = 1; ++ int i32_b = 2; ++ unsigned int u32_r; ++ unsigned int u32_a = 1; ++ unsigned int u32_b = 2; ++ long long i64_r; ++ long long i64_a = 1; ++ long long i64_b = 2; ++ long long i64_c = 3; ++ long int i64_d = 0; ++ unsigned long long u64_r; ++ unsigned long long u64_a = 1; ++ unsigned long long u64_b = 2; ++ unsigned long long u64_c = 3; ++ ++ // __lasx_xvsll_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsll_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsll.b( ++ ++ // __lasx_xvsll_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsll_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsll.h( ++ ++ // __lasx_xvsll_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsll_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsll.w( ++ ++ // __lasx_xvsll_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsll_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsll.d( ++ ++ // __lasx_xvslli_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvslli_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslli.b( ++ ++ // __lasx_xvslli_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvslli_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslli.h( ++ ++ // __lasx_xvslli_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvslli_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslli.w( ++ ++ // __lasx_xvslli_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvslli_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslli.d( ++ ++ // __lasx_xvsra_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsra_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsra.b( ++ ++ // __lasx_xvsra_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsra_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsra.h( ++ ++ // __lasx_xvsra_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsra_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsra.w( ++ ++ // __lasx_xvsra_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsra_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsra.d( ++ ++ // __lasx_xvsrai_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsrai_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrai.b( ++ ++ // __lasx_xvsrai_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsrai_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrai.h( ++ ++ // __lasx_xvsrai_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsrai_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrai.w( ++ ++ // __lasx_xvsrai_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsrai_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrai.d( ++ ++ // __lasx_xvsrar_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsrar_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrar.b( ++ ++ // __lasx_xvsrar_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsrar_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrar.h( ++ ++ // __lasx_xvsrar_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsrar_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrar.w( ++ ++ // __lasx_xvsrar_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsrar_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrar.d( ++ ++ // __lasx_xvsrari_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsrari_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrari.b( ++ ++ // __lasx_xvsrari_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsrari_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrari.h( ++ ++ // __lasx_xvsrari_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsrari_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrari.w( ++ ++ // __lasx_xvsrari_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsrari_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrari.d( ++ ++ // __lasx_xvsrl_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsrl_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrl.b( ++ ++ // __lasx_xvsrl_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsrl_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrl.h( ++ ++ // __lasx_xvsrl_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsrl_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrl.w( ++ ++ // __lasx_xvsrl_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsrl_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrl.d( ++ ++ // __lasx_xvsrli_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsrli_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrli.b( ++ ++ // __lasx_xvsrli_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsrli_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrli.h( ++ ++ // __lasx_xvsrli_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsrli_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrli.w( ++ ++ // __lasx_xvsrli_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsrli_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrli.d( ++ ++ // __lasx_xvsrlr_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsrlr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b( ++ ++ // __lasx_xvsrlr_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsrlr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h( ++ ++ // __lasx_xvsrlr_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsrlr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w( ++ ++ // __lasx_xvsrlr_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsrlr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d( ++ ++ // __lasx_xvsrlri_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsrlri_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b( ++ ++ // __lasx_xvsrlri_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsrlri_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h( ++ ++ // __lasx_xvsrlri_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsrlri_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w( ++ ++ // __lasx_xvsrlri_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsrlri_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d( ++ ++ // __lasx_xvbitclr_b ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvbitclr_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b( ++ ++ // __lasx_xvbitclr_h ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvbitclr_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h( ++ ++ // __lasx_xvbitclr_w ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvbitclr_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w( ++ ++ // __lasx_xvbitclr_d ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvbitclr_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d( ++ ++ // __lasx_xvbitclri_b ++ // xd, xj, ui3 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvbitclri_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b( ++ ++ // __lasx_xvbitclri_h ++ // xd, xj, ui4 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvbitclri_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h( ++ ++ // __lasx_xvbitclri_w ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvbitclri_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w( ++ ++ // __lasx_xvbitclri_d ++ // xd, xj, ui6 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvbitclri_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d( ++ ++ // __lasx_xvbitset_b ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvbitset_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitset.b( ++ ++ // __lasx_xvbitset_h ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvbitset_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitset.h( ++ ++ // __lasx_xvbitset_w ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvbitset_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitset.w( ++ ++ // __lasx_xvbitset_d ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvbitset_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitset.d( ++ ++ // __lasx_xvbitseti_b ++ // xd, xj, ui3 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvbitseti_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b( ++ ++ // __lasx_xvbitseti_h ++ // xd, xj, ui4 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvbitseti_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h( ++ ++ // __lasx_xvbitseti_w ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvbitseti_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w( ++ ++ // __lasx_xvbitseti_d ++ // xd, xj, ui6 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvbitseti_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d( ++ ++ // __lasx_xvbitrev_b ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvbitrev_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b( ++ ++ // __lasx_xvbitrev_h ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvbitrev_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h( ++ ++ // __lasx_xvbitrev_w ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvbitrev_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w( ++ ++ // __lasx_xvbitrev_d ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvbitrev_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d( ++ ++ // __lasx_xvbitrevi_b ++ // xd, xj, ui3 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvbitrevi_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b( ++ ++ // __lasx_xvbitrevi_h ++ // xd, xj, ui4 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvbitrevi_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h( ++ ++ // __lasx_xvbitrevi_w ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvbitrevi_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w( ++ ++ // __lasx_xvbitrevi_d ++ // xd, xj, ui6 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvbitrevi_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d( ++ ++ // __lasx_xvadd_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvadd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvadd.b( ++ ++ // __lasx_xvadd_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvadd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvadd.h( ++ ++ // __lasx_xvadd_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvadd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvadd.w( ++ ++ // __lasx_xvadd_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvadd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadd.d( ++ ++ // __lasx_xvaddi_bu ++ // xd, xj, ui5 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvaddi_bu(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu( ++ ++ // __lasx_xvaddi_hu ++ // xd, xj, ui5 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvaddi_hu(v16i16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu( ++ ++ // __lasx_xvaddi_wu ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvaddi_wu(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu( ++ ++ // __lasx_xvaddi_du ++ // xd, xj, ui5 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvaddi_du(v4i64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddi.du( ++ ++ // __lasx_xvsub_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsub_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsub.b( ++ ++ // __lasx_xvsub_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsub_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsub.h( ++ ++ // __lasx_xvsub_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsub_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsub.w( ++ ++ // __lasx_xvsub_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsub_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsub.d( ++ ++ // __lasx_xvsubi_bu ++ // xd, xj, ui5 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsubi_bu(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu( ++ ++ // __lasx_xvsubi_hu ++ // xd, xj, ui5 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsubi_hu(v16i16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu( ++ ++ // __lasx_xvsubi_wu ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsubi_wu(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu( ++ ++ // __lasx_xvsubi_du ++ // xd, xj, ui5 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsubi_du(v4i64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubi.du( ++ ++ // __lasx_xvmax_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmax_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmax.b( ++ ++ // __lasx_xvmax_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmax_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmax.h( ++ ++ // __lasx_xvmax_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmax_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmax.w( ++ ++ // __lasx_xvmax_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmax_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmax.d( ++ ++ // __lasx_xvmaxi_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvmaxi_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b( ++ ++ // __lasx_xvmaxi_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvmaxi_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h( ++ ++ // __lasx_xvmaxi_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvmaxi_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w( ++ ++ // __lasx_xvmaxi_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvmaxi_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d( ++ ++ // __lasx_xvmax_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvmax_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmax.bu( ++ ++ // __lasx_xvmax_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvmax_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmax.hu( ++ ++ // __lasx_xvmax_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvmax_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmax.wu( ++ ++ // __lasx_xvmax_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmax_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmax.du( ++ ++ // __lasx_xvmaxi_bu ++ // xd, xj, ui5 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvmaxi_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu( ++ ++ // __lasx_xvmaxi_hu ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvmaxi_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu( ++ ++ // __lasx_xvmaxi_wu ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvmaxi_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu( ++ ++ // __lasx_xvmaxi_du ++ // xd, xj, ui5 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvmaxi_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du( ++ ++ // __lasx_xvmin_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmin_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmin.b( ++ ++ // __lasx_xvmin_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmin_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmin.h( ++ ++ // __lasx_xvmin_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmin_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmin.w( ++ ++ // __lasx_xvmin_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmin_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmin.d( ++ ++ // __lasx_xvmini_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvmini_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmini.b( ++ ++ // __lasx_xvmini_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvmini_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmini.h( ++ ++ // __lasx_xvmini_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvmini_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmini.w( ++ ++ // __lasx_xvmini_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvmini_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmini.d( ++ ++ // __lasx_xvmin_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvmin_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmin.bu( ++ ++ // __lasx_xvmin_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvmin_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmin.hu( ++ ++ // __lasx_xvmin_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvmin_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmin.wu( ++ ++ // __lasx_xvmin_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmin_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmin.du( ++ ++ // __lasx_xvmini_bu ++ // xd, xj, ui5 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvmini_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmini.bu( ++ ++ // __lasx_xvmini_hu ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvmini_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmini.hu( ++ ++ // __lasx_xvmini_wu ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvmini_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmini.wu( ++ ++ // __lasx_xvmini_du ++ // xd, xj, ui5 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvmini_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmini.du( ++ ++ // __lasx_xvseq_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvseq_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvseq.b( ++ ++ // __lasx_xvseq_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvseq_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvseq.h( ++ ++ // __lasx_xvseq_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvseq_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvseq.w( ++ ++ // __lasx_xvseq_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvseq_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvseq.d( ++ ++ // __lasx_xvseqi_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvseqi_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvseqi.b( ++ ++ // __lasx_xvseqi_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvseqi_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvseqi.h( ++ ++ // __lasx_xvseqi_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvseqi_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvseqi.w( ++ ++ // __lasx_xvseqi_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvseqi_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvseqi.d( ++ ++ // __lasx_xvslt_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvslt_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslt.b( ++ ++ // __lasx_xvslt_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvslt_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslt.h( ++ ++ // __lasx_xvslt_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvslt_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslt.w( ++ ++ // __lasx_xvslt_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvslt_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslt.d( ++ ++ // __lasx_xvslti_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvslti_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslti.b( ++ ++ // __lasx_xvslti_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvslti_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslti.h( ++ ++ // __lasx_xvslti_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvslti_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslti.w( ++ ++ // __lasx_xvslti_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvslti_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslti.d( ++ ++ // __lasx_xvslt_bu ++ // xd, xj, xk ++ // V32QI, UV32QI, UV32QI ++ v32i8_r = __lasx_xvslt_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslt.bu( ++ ++ // __lasx_xvslt_hu ++ // xd, xj, xk ++ // V16HI, UV16HI, UV16HI ++ v16i16_r = __lasx_xvslt_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslt.hu( ++ ++ // __lasx_xvslt_wu ++ // xd, xj, xk ++ // V8SI, UV8SI, UV8SI ++ v8i32_r = __lasx_xvslt_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslt.wu( ++ ++ // __lasx_xvslt_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvslt_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslt.du( ++ ++ // __lasx_xvslti_bu ++ // xd, xj, ui5 ++ // V32QI, UV32QI, UQI ++ v32i8_r = __lasx_xvslti_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslti.bu( ++ ++ // __lasx_xvslti_hu ++ // xd, xj, ui5 ++ // V16HI, UV16HI, UQI ++ v16i16_r = __lasx_xvslti_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslti.hu( ++ ++ // __lasx_xvslti_wu ++ // xd, xj, ui5 ++ // V8SI, UV8SI, UQI ++ v8i32_r = __lasx_xvslti_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslti.wu( ++ ++ // __lasx_xvslti_du ++ // xd, xj, ui5 ++ // V4DI, UV4DI, UQI ++ v4i64_r = __lasx_xvslti_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslti.du( ++ ++ // __lasx_xvsle_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsle_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsle.b( ++ ++ // __lasx_xvsle_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsle_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsle.h( ++ ++ // __lasx_xvsle_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsle_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsle.w( ++ ++ // __lasx_xvsle_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsle_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsle.d( ++ ++ // __lasx_xvslei_b ++ // xd, xj, si5 ++ // V32QI, V32QI, QI ++ v32i8_r = __lasx_xvslei_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslei.b( ++ ++ // __lasx_xvslei_h ++ // xd, xj, si5 ++ // V16HI, V16HI, QI ++ v16i16_r = __lasx_xvslei_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslei.h( ++ ++ // __lasx_xvslei_w ++ // xd, xj, si5 ++ // V8SI, V8SI, QI ++ v8i32_r = __lasx_xvslei_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslei.w( ++ ++ // __lasx_xvslei_d ++ // xd, xj, si5 ++ // V4DI, V4DI, QI ++ v4i64_r = __lasx_xvslei_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslei.d( ++ ++ // __lasx_xvsle_bu ++ // xd, xj, xk ++ // V32QI, UV32QI, UV32QI ++ v32i8_r = __lasx_xvsle_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsle.bu( ++ ++ // __lasx_xvsle_hu ++ // xd, xj, xk ++ // V16HI, UV16HI, UV16HI ++ v16i16_r = __lasx_xvsle_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsle.hu( ++ ++ // __lasx_xvsle_wu ++ // xd, xj, xk ++ // V8SI, UV8SI, UV8SI ++ v8i32_r = __lasx_xvsle_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsle.wu( ++ ++ // __lasx_xvsle_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvsle_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsle.du( ++ ++ // __lasx_xvslei_bu ++ // xd, xj, ui5 ++ // V32QI, UV32QI, UQI ++ v32i8_r = __lasx_xvslei_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslei.bu( ++ ++ // __lasx_xvslei_hu ++ // xd, xj, ui5 ++ // V16HI, UV16HI, UQI ++ v16i16_r = __lasx_xvslei_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslei.hu( ++ ++ // __lasx_xvslei_wu ++ // xd, xj, ui5 ++ // V8SI, UV8SI, UQI ++ v8i32_r = __lasx_xvslei_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslei.wu( ++ ++ // __lasx_xvslei_du ++ // xd, xj, ui5 ++ // V4DI, UV4DI, UQI ++ v4i64_r = __lasx_xvslei_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslei.du( ++ ++ // __lasx_xvsat_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvsat_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsat.b( ++ ++ // __lasx_xvsat_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvsat_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsat.h( ++ ++ // __lasx_xvsat_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvsat_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsat.w( ++ ++ // __lasx_xvsat_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvsat_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsat.d( ++ ++ // __lasx_xvsat_bu ++ // xd, xj, ui3 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvsat_bu(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsat.bu( ++ ++ // __lasx_xvsat_hu ++ // xd, xj, ui4 ++ // UV16HI, UV16HI, UQI ++ v16u16_r = __lasx_xvsat_hu(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsat.hu( ++ ++ // __lasx_xvsat_wu ++ // xd, xj, ui5 ++ // UV8SI, UV8SI, UQI ++ v8u32_r = __lasx_xvsat_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsat.wu( ++ ++ // __lasx_xvsat_du ++ // xd, xj, ui6 ++ // UV4DI, UV4DI, UQI ++ v4u64_r = __lasx_xvsat_du(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsat.du( ++ ++ // __lasx_xvadda_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvadda_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvadda.b( ++ ++ // __lasx_xvadda_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvadda_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvadda.h( ++ ++ // __lasx_xvadda_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvadda_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvadda.w( ++ ++ // __lasx_xvadda_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvadda_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadda.d( ++ ++ // __lasx_xvsadd_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsadd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsadd.b( ++ ++ // __lasx_xvsadd_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsadd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsadd.h( ++ ++ // __lasx_xvsadd_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsadd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsadd.w( ++ ++ // __lasx_xvsadd_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsadd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsadd.d( ++ ++ // __lasx_xvsadd_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvsadd_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu( ++ ++ // __lasx_xvsadd_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvsadd_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu( ++ ++ // __lasx_xvsadd_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvsadd_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu( ++ ++ // __lasx_xvsadd_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvsadd_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsadd.du( ++ ++ // __lasx_xvavg_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvavg_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavg.b( ++ ++ // __lasx_xvavg_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvavg_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavg.h( ++ ++ // __lasx_xvavg_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvavg_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavg.w( ++ ++ // __lasx_xvavg_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvavg_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavg.d( ++ ++ // __lasx_xvavg_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvavg_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavg.bu( ++ ++ // __lasx_xvavg_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvavg_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavg.hu( ++ ++ // __lasx_xvavg_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvavg_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavg.wu( ++ ++ // __lasx_xvavg_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvavg_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavg.du( ++ ++ // __lasx_xvavgr_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvavgr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavgr.b( ++ ++ // __lasx_xvavgr_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvavgr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavgr.h( ++ ++ // __lasx_xvavgr_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvavgr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavgr.w( ++ ++ // __lasx_xvavgr_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvavgr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavgr.d( ++ ++ // __lasx_xvavgr_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvavgr_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu( ++ ++ // __lasx_xvavgr_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvavgr_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu( ++ ++ // __lasx_xvavgr_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvavgr_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu( ++ ++ // __lasx_xvavgr_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvavgr_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavgr.du( ++ ++ // __lasx_xvssub_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvssub_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssub.b( ++ ++ // __lasx_xvssub_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvssub_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssub.h( ++ ++ // __lasx_xvssub_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvssub_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssub.w( ++ ++ // __lasx_xvssub_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvssub_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssub.d( ++ ++ // __lasx_xvssub_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvssub_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssub.bu( ++ ++ // __lasx_xvssub_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvssub_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssub.hu( ++ ++ // __lasx_xvssub_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvssub_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssub.wu( ++ ++ // __lasx_xvssub_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvssub_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssub.du( ++ ++ // __lasx_xvabsd_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvabsd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvabsd.b( ++ ++ // __lasx_xvabsd_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvabsd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvabsd.h( ++ ++ // __lasx_xvabsd_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvabsd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvabsd.w( ++ ++ // __lasx_xvabsd_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvabsd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvabsd.d( ++ ++ // __lasx_xvabsd_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvabsd_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu( ++ ++ // __lasx_xvabsd_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvabsd_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu( ++ ++ // __lasx_xvabsd_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvabsd_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu( ++ ++ // __lasx_xvabsd_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvabsd_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvabsd.du( ++ ++ // __lasx_xvmul_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmul_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmul.b( ++ ++ // __lasx_xvmul_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmul_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmul.h( ++ ++ // __lasx_xvmul_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmul_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmul.w( ++ ++ // __lasx_xvmul_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmul_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmul.d( ++ ++ // __lasx_xvmadd_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmadd_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmadd.b( ++ ++ // __lasx_xvmadd_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmadd_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmadd.h( ++ ++ // __lasx_xvmadd_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmadd_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmadd.w( ++ ++ // __lasx_xvmadd_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmadd_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmadd.d( ++ ++ // __lasx_xvmsub_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmsub_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmsub.b( ++ ++ // __lasx_xvmsub_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmsub_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmsub.h( ++ ++ // __lasx_xvmsub_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmsub_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmsub.w( ++ ++ // __lasx_xvmsub_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmsub_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmsub.d( ++ ++ // __lasx_xvdiv_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvdiv_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvdiv.b( ++ ++ // __lasx_xvdiv_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvdiv_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvdiv.h( ++ ++ // __lasx_xvdiv_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvdiv_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvdiv.w( ++ ++ // __lasx_xvdiv_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvdiv_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvdiv.d( ++ ++ // __lasx_xvdiv_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvdiv_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu( ++ ++ // __lasx_xvdiv_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvdiv_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu( ++ ++ // __lasx_xvdiv_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvdiv_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu( ++ ++ // __lasx_xvdiv_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvdiv_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvdiv.du( ++ ++ // __lasx_xvhaddw_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvhaddw_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b( ++ ++ // __lasx_xvhaddw_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvhaddw_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h( ++ ++ // __lasx_xvhaddw_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvhaddw_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w( ++ ++ // __lasx_xvhaddw_hu_bu ++ // xd, xj, xk ++ // UV16HI, UV32QI, UV32QI ++ v16u16_r = __lasx_xvhaddw_hu_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu( ++ ++ // __lasx_xvhaddw_wu_hu ++ // xd, xj, xk ++ // UV8SI, UV16HI, UV16HI ++ v8u32_r = __lasx_xvhaddw_wu_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu( ++ ++ // __lasx_xvhaddw_du_wu ++ // xd, xj, xk ++ // UV4DI, UV8SI, UV8SI ++ v4u64_r = __lasx_xvhaddw_du_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu( ++ ++ // __lasx_xvhsubw_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvhsubw_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b( ++ ++ // __lasx_xvhsubw_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvhsubw_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h( ++ ++ // __lasx_xvhsubw_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvhsubw_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w( ++ ++ // __lasx_xvhsubw_hu_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvhsubw_hu_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu( ++ ++ // __lasx_xvhsubw_wu_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvhsubw_wu_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu( ++ ++ // __lasx_xvhsubw_du_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvhsubw_du_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu( ++ ++ // __lasx_xvmod_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmod.b( ++ ++ // __lasx_xvmod_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmod.h( ++ ++ // __lasx_xvmod_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmod.w( ++ ++ // __lasx_xvmod_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmod.d( ++ ++ // __lasx_xvmod_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvmod_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmod.bu( ++ ++ // __lasx_xvmod_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvmod_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmod.hu( ++ ++ // __lasx_xvmod_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvmod_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmod.wu( ++ ++ // __lasx_xvmod_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmod_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmod.du( ++ ++ // __lasx_xvrepl128vei_b ++ // xd, xj, ui4 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvrepl128vei_b(v32i8_a, ui4_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b( ++ ++ // __lasx_xvrepl128vei_h ++ // xd, xj, ui3 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvrepl128vei_h(v16i16_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h( ++ ++ // __lasx_xvrepl128vei_w ++ // xd, xj, ui2 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvrepl128vei_w(v8i32_a, ui2_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w( ++ ++ // __lasx_xvrepl128vei_d ++ // xd, xj, ui1 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvrepl128vei_d(v4i64_a, ui1_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d( ++ ++ // __lasx_xvpickev_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvpickev_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpickev.b( ++ ++ // __lasx_xvpickev_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvpickev_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpickev.h( ++ ++ // __lasx_xvpickev_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvpickev_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickev.w( ++ ++ // __lasx_xvpickev_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvpickev_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickev.d( ++ ++ // __lasx_xvpickod_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvpickod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpickod.b( ++ ++ // __lasx_xvpickod_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvpickod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpickod.h( ++ ++ // __lasx_xvpickod_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvpickod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickod.w( ++ ++ // __lasx_xvpickod_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvpickod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickod.d( ++ ++ // __lasx_xvilvh_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvilvh_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvilvh.b( ++ ++ // __lasx_xvilvh_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvilvh_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvilvh.h( ++ ++ // __lasx_xvilvh_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvilvh_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvilvh.w( ++ ++ // __lasx_xvilvh_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvilvh_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvilvh.d( ++ ++ // __lasx_xvilvl_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvilvl_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvilvl.b( ++ ++ // __lasx_xvilvl_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvilvl_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvilvl.h( ++ ++ // __lasx_xvilvl_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvilvl_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvilvl.w( ++ ++ // __lasx_xvilvl_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvilvl_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvilvl.d( ++ ++ // __lasx_xvpackev_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvpackev_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpackev.b( ++ ++ // __lasx_xvpackev_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvpackev_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpackev.h( ++ ++ // __lasx_xvpackev_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvpackev_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpackev.w( ++ ++ // __lasx_xvpackev_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvpackev_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpackev.d( ++ ++ // __lasx_xvpackod_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvpackod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpackod.b( ++ ++ // __lasx_xvpackod_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvpackod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpackod.h( ++ ++ // __lasx_xvpackod_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvpackod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpackod.w( ++ ++ // __lasx_xvpackod_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvpackod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpackod.d( ++ ++ // __lasx_xvshuf_b ++ // xd, xj, xk, xa ++ // V32QI, V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvshuf_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvshuf.b( ++ ++ // __lasx_xvshuf_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvshuf_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvshuf.h( ++ ++ // __lasx_xvshuf_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvshuf_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvshuf.w( ++ ++ // __lasx_xvshuf_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvshuf_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvshuf.d( ++ ++ // __lasx_xvand_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvand_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvand.v( ++ ++ // __lasx_xvandi_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvandi_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvandi.b( ++ ++ // __lasx_xvor_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvor.v( ++ ++ // __lasx_xvori_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvori.b( ++ ++ // __lasx_xvnor_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvnor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvnor.v( ++ ++ // __lasx_xvnori_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvnori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvnori.b( ++ ++ // __lasx_xvxor_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvxor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvxor.v( ++ ++ // __lasx_xvxori_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvxori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvxori.b( ++ ++ // __lasx_xvbitsel_v ++ // xd, xj, xk, xa ++ // UV32QI, UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvbitsel_v(v32u8_a, v32u8_b, v32u8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v( ++ ++ // __lasx_xvbitseli_b ++ // xd, xj, ui8 ++ // UV32QI, UV32QI, UV32QI, UQI ++ v32u8_r = __lasx_xvbitseli_b(v32u8_a, v32u8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b( ++ ++ // __lasx_xvshuf4i_b ++ // xd, xj, ui8 ++ // V32QI, V32QI, USI ++ v32i8_r = __lasx_xvshuf4i_b(v32i8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b( ++ ++ // __lasx_xvshuf4i_h ++ // xd, xj, ui8 ++ // V16HI, V16HI, USI ++ v16i16_r = __lasx_xvshuf4i_h(v16i16_a, ui8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h( ++ ++ // __lasx_xvshuf4i_w ++ // xd, xj, ui8 ++ // V8SI, V8SI, USI ++ v8i32_r = __lasx_xvshuf4i_w(v8i32_a, ui8_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w( ++ ++ // __lasx_xvreplgr2vr_b ++ // xd, rj ++ // V32QI, SI ++ v32i8_r = __lasx_xvreplgr2vr_b(i32_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b( ++ ++ // __lasx_xvreplgr2vr_h ++ // xd, rj ++ // V16HI, SI ++ v16i16_r = __lasx_xvreplgr2vr_h(i32_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h( ++ ++ // __lasx_xvreplgr2vr_w ++ // xd, rj ++ // V8SI, SI ++ v8i32_r = __lasx_xvreplgr2vr_w(i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w( ++ ++ // __lasx_xvreplgr2vr_d ++ // xd, rj ++ // V4DI, DI ++ v4i64_r = __lasx_xvreplgr2vr_d(i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d( ++ ++ // __lasx_xvpcnt_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvpcnt_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b( ++ ++ // __lasx_xvpcnt_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvpcnt_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h( ++ ++ // __lasx_xvpcnt_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvpcnt_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w( ++ ++ // __lasx_xvpcnt_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvpcnt_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d( ++ ++ // __lasx_xvclo_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvclo_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvclo.b( ++ ++ // __lasx_xvclo_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvclo_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvclo.h( ++ ++ // __lasx_xvclo_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvclo_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvclo.w( ++ ++ // __lasx_xvclo_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvclo_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvclo.d( ++ ++ // __lasx_xvclz_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvclz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvclz.b( ++ ++ // __lasx_xvclz_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvclz_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvclz.h( ++ ++ // __lasx_xvclz_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvclz_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvclz.w( ++ ++ // __lasx_xvclz_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvclz_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvclz.d( ++ ++ // __lasx_xvfcmp_caf_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_caf_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s( ++ ++ // __lasx_xvfcmp_caf_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_caf_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d( ++ ++ // __lasx_xvfcmp_cor_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cor_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s( ++ ++ // __lasx_xvfcmp_cor_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cor_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d( ++ ++ // __lasx_xvfcmp_cun_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cun_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s( ++ ++ // __lasx_xvfcmp_cun_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cun_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d( ++ ++ // __lasx_xvfcmp_cune_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cune_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s( ++ ++ // __lasx_xvfcmp_cune_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cune_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d( ++ ++ // __lasx_xvfcmp_cueq_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cueq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s( ++ ++ // __lasx_xvfcmp_cueq_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cueq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d( ++ ++ // __lasx_xvfcmp_ceq_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_ceq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s( ++ ++ // __lasx_xvfcmp_ceq_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_ceq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d( ++ ++ // __lasx_xvfcmp_cne_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cne_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s( ++ ++ // __lasx_xvfcmp_cne_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cne_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d( ++ ++ // __lasx_xvfcmp_clt_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_clt_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s( ++ ++ // __lasx_xvfcmp_clt_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_clt_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d( ++ ++ // __lasx_xvfcmp_cult_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cult_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s( ++ ++ // __lasx_xvfcmp_cult_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cult_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d( ++ ++ // __lasx_xvfcmp_cle_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cle_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s( ++ ++ // __lasx_xvfcmp_cle_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cle_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d( ++ ++ // __lasx_xvfcmp_cule_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_cule_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s( ++ ++ // __lasx_xvfcmp_cule_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_cule_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d( ++ ++ // __lasx_xvfcmp_saf_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_saf_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s( ++ ++ // __lasx_xvfcmp_saf_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_saf_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d( ++ ++ // __lasx_xvfcmp_sor_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sor_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s( ++ ++ // __lasx_xvfcmp_sor_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sor_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d( ++ ++ // __lasx_xvfcmp_sun_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sun_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s( ++ ++ // __lasx_xvfcmp_sun_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sun_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d( ++ ++ // __lasx_xvfcmp_sune_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sune_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s( ++ ++ // __lasx_xvfcmp_sune_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sune_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d( ++ ++ // __lasx_xvfcmp_sueq_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sueq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s( ++ ++ // __lasx_xvfcmp_sueq_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sueq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d( ++ ++ // __lasx_xvfcmp_seq_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_seq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s( ++ ++ // __lasx_xvfcmp_seq_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_seq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d( ++ ++ // __lasx_xvfcmp_sne_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sne_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s( ++ ++ // __lasx_xvfcmp_sne_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sne_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d( ++ ++ // __lasx_xvfcmp_slt_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_slt_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s( ++ ++ // __lasx_xvfcmp_slt_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_slt_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d( ++ ++ // __lasx_xvfcmp_sult_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sult_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s( ++ ++ // __lasx_xvfcmp_sult_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sult_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d( ++ ++ // __lasx_xvfcmp_sle_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sle_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s( ++ ++ // __lasx_xvfcmp_sle_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sle_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d( ++ ++ // __lasx_xvfcmp_sule_s ++ // xd, xj, xk ++ // V8SI, V8SF, V8SF ++ v8i32_r = __lasx_xvfcmp_sule_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s( ++ ++ // __lasx_xvfcmp_sule_d ++ // xd, xj, xk ++ // V4DI, V4DF, V4DF ++ v4i64_r = __lasx_xvfcmp_sule_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d( ++ ++ // __lasx_xvfadd_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfadd_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfadd.s( ++ ++ // __lasx_xvfadd_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfadd_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfadd.d( ++ ++ // __lasx_xvfsub_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfsub_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfsub.s( ++ ++ // __lasx_xvfsub_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfsub_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfsub.d( ++ ++ // __lasx_xvfmul_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmul_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmul.s( ++ ++ // __lasx_xvfmul_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmul_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmul.d( ++ ++ // __lasx_xvfdiv_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfdiv_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfdiv.s( ++ ++ // __lasx_xvfdiv_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfdiv_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfdiv.d( ++ ++ // __lasx_xvfcvt_h_s ++ // xd, xj, xk ++ // V16HI, V8SF, V8SF ++ v16i16_r = __lasx_xvfcvt_h_s(v8f32_a, v8f32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s( ++ ++ // __lasx_xvfcvt_s_d ++ // xd, xj, xk ++ // V8SF, V4DF, V4DF ++ v8f32_r = __lasx_xvfcvt_s_d(v4f64_a, v4f64_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d( ++ ++ // __lasx_xvfmin_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmin_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmin.s( ++ ++ // __lasx_xvfmin_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmin_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmin.d( ++ ++ // __lasx_xvfmina_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmina_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmina.s( ++ ++ // __lasx_xvfmina_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmina_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmina.d( ++ ++ // __lasx_xvfmax_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmax_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmax.s( ++ ++ // __lasx_xvfmax_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmax_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmax.d( ++ ++ // __lasx_xvfmaxa_s ++ // xd, xj, xk ++ // V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmaxa_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s( ++ ++ // __lasx_xvfmaxa_d ++ // xd, xj, xk ++ // V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmaxa_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d( ++ ++ // __lasx_xvfclass_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvfclass_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfclass.s( ++ ++ // __lasx_xvfclass_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvfclass_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfclass.d( ++ ++ // __lasx_xvfsqrt_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfsqrt_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s( ++ ++ // __lasx_xvfsqrt_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfsqrt_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d( ++ ++ // __lasx_xvfrecip_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrecip_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrecip.s( ++ ++ // __lasx_xvfrecip_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrecip_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrecip.d( ++ ++ // __lasx_xvfrint_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrint_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrint.s( ++ ++ // __lasx_xvfrint_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrint_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrint.d( ++ ++ // __lasx_xvfrsqrt_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvfrsqrt_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s( ++ ++ // __lasx_xvfrsqrt_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvfrsqrt_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d( ++ ++ // __lasx_xvflogb_s ++ // xd, xj ++ // V8SF, V8SF ++ v8f32_r = __lasx_xvflogb_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvflogb.s( ++ ++ // __lasx_xvflogb_d ++ // xd, xj ++ // V4DF, V4DF ++ v4f64_r = __lasx_xvflogb_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvflogb.d( ++ ++ // __lasx_xvfcvth_s_h ++ // xd, xj ++ // V8SF, V16HI ++ v8f32_r = __lasx_xvfcvth_s_h(v16i16_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h( ++ ++ // __lasx_xvfcvth_d_s ++ // xd, xj ++ // V4DF, V8SF ++ v4f64_r = __lasx_xvfcvth_d_s(v8f32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s( ++ ++ // __lasx_xvfcvtl_s_h ++ // xd, xj ++ // V8SF, V16HI ++ v8f32_r = __lasx_xvfcvtl_s_h(v16i16_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h( ++ ++ // __lasx_xvfcvtl_d_s ++ // xd, xj ++ // V4DF, V8SF ++ v4f64_r = __lasx_xvfcvtl_d_s(v8f32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s( ++ ++ // __lasx_xvftint_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftint_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s( ++ ++ // __lasx_xvftint_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftint_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d( ++ ++ // __lasx_xvftint_wu_s ++ // xd, xj ++ // UV8SI, V8SF ++ v8u32_r = __lasx_xvftint_wu_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s( ++ ++ // __lasx_xvftint_lu_d ++ // xd, xj ++ // UV4DI, V4DF ++ v4u64_r = __lasx_xvftint_lu_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d( ++ ++ // __lasx_xvftintrz_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftintrz_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s( ++ ++ // __lasx_xvftintrz_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftintrz_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d( ++ ++ // __lasx_xvftintrz_wu_s ++ // xd, xj ++ // UV8SI, V8SF ++ v8u32_r = __lasx_xvftintrz_wu_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s( ++ ++ // __lasx_xvftintrz_lu_d ++ // xd, xj ++ // UV4DI, V4DF ++ v4u64_r = __lasx_xvftintrz_lu_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d( ++ ++ // __lasx_xvffint_s_w ++ // xd, xj ++ // V8SF, V8SI ++ v8f32_r = __lasx_xvffint_s_w(v8i32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.w( ++ ++ // __lasx_xvffint_d_l ++ // xd, xj ++ // V4DF, V4DI ++ v4f64_r = __lasx_xvffint_d_l(v4i64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffint.d.l( ++ ++ // __lasx_xvffint_s_wu ++ // xd, xj ++ // V8SF, UV8SI ++ v8f32_r = __lasx_xvffint_s_wu(v8u32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu( ++ ++ // __lasx_xvffint_d_lu ++ // xd, xj ++ // V4DF, UV4DI ++ v4f64_r = __lasx_xvffint_d_lu(v4u64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu( ++ ++ // __lasx_xvreplve_b ++ // xd, xj, rk ++ // V32QI, V32QI, SI ++ v32i8_r = __lasx_xvreplve_b(v32i8_a, i32_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve.b( ++ ++ // __lasx_xvreplve_h ++ // xd, xj, rk ++ // V16HI, V16HI, SI ++ v16i16_r = __lasx_xvreplve_h(v16i16_a, i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplve.h( ++ ++ // __lasx_xvreplve_w ++ // xd, xj, rk ++ // V8SI, V8SI, SI ++ v8i32_r = __lasx_xvreplve_w(v8i32_a, i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplve.w( ++ ++ // __lasx_xvreplve_d ++ // xd, xj, rk ++ // V4DI, V4DI, SI ++ v4i64_r = __lasx_xvreplve_d(v4i64_a, i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplve.d( ++ ++ // __lasx_xvpermi_w ++ // xd, xj, ui8 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvpermi_w(v8i32_a, v8i32_b, ui8); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpermi.w( ++ ++ // __lasx_xvandn_v ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvandn_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvandn.v( ++ ++ // __lasx_xvneg_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvneg_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvneg.b( ++ ++ // __lasx_xvneg_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvneg_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvneg.h( ++ ++ // __lasx_xvneg_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvneg_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvneg.w( ++ ++ // __lasx_xvneg_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvneg_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvneg.d( ++ ++ // __lasx_xvmuh_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvmuh_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmuh.b( ++ ++ // __lasx_xvmuh_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvmuh_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmuh.h( ++ ++ // __lasx_xvmuh_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvmuh_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmuh.w( ++ ++ // __lasx_xvmuh_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmuh_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmuh.d( ++ ++ // __lasx_xvmuh_bu ++ // xd, xj, xk ++ // UV32QI, UV32QI, UV32QI ++ v32u8_r = __lasx_xvmuh_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu( ++ ++ // __lasx_xvmuh_hu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV16HI ++ v16u16_r = __lasx_xvmuh_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu( ++ ++ // __lasx_xvmuh_wu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV8SI ++ v8u32_r = __lasx_xvmuh_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu( ++ ++ // __lasx_xvmuh_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmuh_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmuh.du( ++ ++ // __lasx_xvsllwil_h_b ++ // xd, xj, ui3 ++ // V16HI, V32QI, UQI ++ v16i16_r = __lasx_xvsllwil_h_b(v32i8_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b( ++ ++ // __lasx_xvsllwil_w_h ++ // xd, xj, ui4 ++ // V8SI, V16HI, UQI ++ v8i32_r = __lasx_xvsllwil_w_h(v16i16_a, ui4_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h( ++ ++ // __lasx_xvsllwil_d_w ++ // xd, xj, ui5 ++ // V4DI, V8SI, UQI ++ v4i64_r = __lasx_xvsllwil_d_w(v8i32_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w( ++ ++ // __lasx_xvsllwil_hu_bu ++ // xd, xj, ui3 ++ // UV16HI, UV32QI, UQI ++ v16u16_r = __lasx_xvsllwil_hu_bu(v32u8_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu( ++ ++ // __lasx_xvsllwil_wu_hu ++ // xd, xj, ui4 ++ // UV8SI, UV16HI, UQI ++ v8u32_r = __lasx_xvsllwil_wu_hu(v16u16_a, ui4_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu( ++ ++ // __lasx_xvsllwil_du_wu ++ // xd, xj, ui5 ++ // UV4DI, UV8SI, UQI ++ v4u64_r = __lasx_xvsllwil_du_wu(v8u32_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu( ++ ++ // __lasx_xvsran_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvsran_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h( ++ ++ // __lasx_xvsran_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvsran_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w( ++ ++ // __lasx_xvsran_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvsran_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d( ++ ++ // __lasx_xvssran_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvssran_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h( ++ ++ // __lasx_xvssran_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvssran_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w( ++ ++ // __lasx_xvssran_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvssran_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d( ++ ++ // __lasx_xvssran_bu_h ++ // xd, xj, xk ++ // UV32QI, UV16HI, UV16HI ++ v32u8_r = __lasx_xvssran_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h( ++ ++ // __lasx_xvssran_hu_w ++ // xd, xj, xk ++ // UV16HI, UV8SI, UV8SI ++ v16u16_r = __lasx_xvssran_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w( ++ ++ // __lasx_xvssran_wu_d ++ // xd, xj, xk ++ // UV8SI, UV4DI, UV4DI ++ v8u32_r = __lasx_xvssran_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d( ++ ++ // __lasx_xvsrarn_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvsrarn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h( ++ ++ // __lasx_xvsrarn_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvsrarn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w( ++ ++ // __lasx_xvsrarn_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvsrarn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d( ++ ++ // __lasx_xvssrarn_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvssrarn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h( ++ ++ // __lasx_xvssrarn_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvssrarn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w( ++ ++ // __lasx_xvssrarn_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvssrarn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d( ++ ++ // __lasx_xvssrarn_bu_h ++ // xd, xj, xk ++ // UV32QI, UV16HI, UV16HI ++ v32u8_r = __lasx_xvssrarn_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h( ++ ++ // __lasx_xvssrarn_hu_w ++ // xd, xj, xk ++ // UV16HI, UV8SI, UV8SI ++ v16u16_r = __lasx_xvssrarn_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w( ++ ++ // __lasx_xvssrarn_wu_d ++ // xd, xj, xk ++ // UV8SI, UV4DI, UV4DI ++ v8u32_r = __lasx_xvssrarn_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d( ++ ++ // __lasx_xvsrln_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvsrln_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h( ++ ++ // __lasx_xvsrln_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvsrln_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w( ++ ++ // __lasx_xvsrln_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvsrln_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d( ++ ++ // __lasx_xvssrln_bu_h ++ // xd, xj, xk ++ // UV32QI, UV16HI, UV16HI ++ v32u8_r = __lasx_xvssrln_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h( ++ ++ // __lasx_xvssrln_hu_w ++ // xd, xj, xk ++ // UV16HI, UV8SI, UV8SI ++ v16u16_r = __lasx_xvssrln_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w( ++ ++ // __lasx_xvssrln_wu_d ++ // xd, xj, xk ++ // UV8SI, UV4DI, UV4DI ++ v8u32_r = __lasx_xvssrln_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d( ++ ++ // __lasx_xvsrlrn_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvsrlrn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h( ++ ++ // __lasx_xvsrlrn_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvsrlrn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w( ++ ++ // __lasx_xvsrlrn_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvsrlrn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d( ++ ++ // __lasx_xvssrlrn_bu_h ++ // xd, xj, xk ++ // UV32QI, UV16HI, UV16HI ++ v32u8_r = __lasx_xvssrlrn_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h( ++ ++ // __lasx_xvssrlrn_hu_w ++ // xd, xj, xk ++ // UV16HI, UV8SI, UV8SI ++ v16u16_r = __lasx_xvssrlrn_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w( ++ ++ // __lasx_xvssrlrn_wu_d ++ // xd, xj, xk ++ // UV8SI, UV4DI, UV4DI ++ v8u32_r = __lasx_xvssrlrn_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d( ++ ++ // __lasx_xvfrstpi_b ++ // xd, xj, ui5 ++ // V32QI, V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvfrstpi_b(v32i8_a, v32i8_b, ui5); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b( ++ ++ // __lasx_xvfrstpi_h ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvfrstpi_h(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h( ++ ++ // __lasx_xvfrstp_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvfrstp_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b( ++ ++ // __lasx_xvfrstp_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvfrstp_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h( ++ ++ // __lasx_xvshuf4i_d ++ // xd, xj, ui8 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvshuf4i_d(v4i64_a, v4i64_b, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d( ++ ++ // __lasx_xvbsrl_v ++ // xd, xj, ui5 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvbsrl_v(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v( ++ ++ // __lasx_xvbsll_v ++ // xd, xj, ui5 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvbsll_v(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbsll.v( ++ ++ // __lasx_xvextrins_b ++ // xd, xj, ui8 ++ // V32QI, V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvextrins_b(v32i8_a, v32i8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvextrins.b( ++ ++ // __lasx_xvextrins_h ++ // xd, xj, ui8 ++ // V16HI, V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvextrins_h(v16i16_a, v16i16_b, ui8); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvextrins.h( ++ ++ // __lasx_xvextrins_w ++ // xd, xj, ui8 ++ // V8SI, V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvextrins_w(v8i32_a, v8i32_b, ui8); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvextrins.w( ++ ++ // __lasx_xvextrins_d ++ // xd, xj, ui8 ++ // V4DI, V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvextrins_d(v4i64_a, v4i64_b, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextrins.d( ++ ++ // __lasx_xvmskltz_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvmskltz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b( ++ ++ // __lasx_xvmskltz_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvmskltz_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h( ++ ++ // __lasx_xvmskltz_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvmskltz_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w( ++ ++ // __lasx_xvmskltz_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvmskltz_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d( ++ ++ // __lasx_xvsigncov_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvsigncov_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b( ++ ++ // __lasx_xvsigncov_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvsigncov_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h( ++ ++ // __lasx_xvsigncov_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvsigncov_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w( ++ ++ // __lasx_xvsigncov_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsigncov_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d( ++ ++ // __lasx_xvfmadd_s ++ // xd, xj, xk, xa ++ // V8SF, V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmadd_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmadd.s( ++ ++ // __lasx_xvfmadd_d ++ // xd, xj, xk, xa ++ // V4DF, V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmadd_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmadd.d( ++ ++ // __lasx_xvfmsub_s ++ // xd, xj, xk, xa ++ // V8SF, V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfmsub_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmsub.s( ++ ++ // __lasx_xvfmsub_d ++ // xd, xj, xk, xa ++ // V4DF, V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfmsub_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmsub.d( ++ ++ // __lasx_xvfnmadd_s ++ // xd, xj, xk, xa ++ // V8SF, V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfnmadd_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s( ++ ++ // __lasx_xvfnmadd_d ++ // xd, xj, xk, xa ++ // V4DF, V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfnmadd_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d( ++ ++ // __lasx_xvfnmsub_s ++ // xd, xj, xk, xa ++ // V8SF, V8SF, V8SF, V8SF ++ v8f32_r = __lasx_xvfnmsub_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s( ++ ++ // __lasx_xvfnmsub_d ++ // xd, xj, xk, xa ++ // V4DF, V4DF, V4DF, V4DF ++ v4f64_r = __lasx_xvfnmsub_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d( ++ ++ // __lasx_xvftintrne_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftintrne_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s( ++ ++ // __lasx_xvftintrne_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftintrne_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d( ++ ++ // __lasx_xvftintrp_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftintrp_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s( ++ ++ // __lasx_xvftintrp_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftintrp_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d( ++ ++ // __lasx_xvftintrm_w_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvftintrm_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s( ++ ++ // __lasx_xvftintrm_l_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvftintrm_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d( ++ ++ // __lasx_xvftint_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftint_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d( ++ ++ // __lasx_xvffint_s_l ++ // xd, xj, xk ++ // V8SF, V4DI, V4DI ++ v8f32_r = __lasx_xvffint_s_l(v4i64_a, v4i64_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.l( ++ ++ // __lasx_xvftintrz_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftintrz_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d( ++ ++ // __lasx_xvftintrp_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftintrp_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d( ++ ++ // __lasx_xvftintrm_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftintrm_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d( ++ ++ // __lasx_xvftintrne_w_d ++ // xd, xj, xk ++ // V8SI, V4DF, V4DF ++ v8i32_r = __lasx_xvftintrne_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d( ++ ++ // __lasx_xvftinth_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftinth_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s( ++ ++ // __lasx_xvftintl_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s( ++ ++ // __lasx_xvffinth_d_w ++ // xd, xj ++ // V4DF, V8SI ++ v4f64_r = __lasx_xvffinth_d_w(v8i32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w( ++ ++ // __lasx_xvffintl_d_w ++ // xd, xj ++ // V4DF, V8SI ++ v4f64_r = __lasx_xvffintl_d_w(v8i32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w( ++ ++ // __lasx_xvftintrzh_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrzh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s( ++ ++ // __lasx_xvftintrzl_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrzl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s( ++ ++ // __lasx_xvftintrph_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrph_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s( ++ ++ // __lasx_xvftintrpl_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrpl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s( ++ ++ // __lasx_xvftintrmh_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrmh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s( ++ ++ // __lasx_xvftintrml_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrml_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s( ++ ++ // __lasx_xvftintrneh_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrneh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s( ++ ++ // __lasx_xvftintrnel_l_s ++ // xd, xj ++ // V4DI, V8SF ++ v4i64_r = __lasx_xvftintrnel_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s( ++ ++ // __lasx_xvfrintrne_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvfrintrne_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfrintrne.s( ++ ++ // __lasx_xvfrintrne_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvfrintrne_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfrintrne.d( ++ ++ // __lasx_xvfrintrz_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvfrintrz_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfrintrz.s( ++ ++ // __lasx_xvfrintrz_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvfrintrz_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfrintrz.d( ++ ++ // __lasx_xvfrintrp_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvfrintrp_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfrintrp.s( ++ ++ // __lasx_xvfrintrp_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvfrintrp_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfrintrp.d( ++ ++ // __lasx_xvfrintrm_s ++ // xd, xj ++ // V8SI, V8SF ++ v8i32_r = __lasx_xvfrintrm_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfrintrm.s( ++ ++ // __lasx_xvfrintrm_d ++ // xd, xj ++ // V4DI, V4DF ++ v4i64_r = __lasx_xvfrintrm_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfrintrm.d( ++ ++ // __lasx_xvld ++ // xd, rj, si12 ++ // V32QI, CVPOINTER, SI ++ v32i8_r = __lasx_xvld(&v32i8_a, si12); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvld( ++ ++ // __lasx_xvst ++ // xd, rj, si12 ++ // VOID, V32QI, CVPOINTER, SI ++ __lasx_xvst(v32i8_a, &v32i8_b, si12); // CHECK: call void @llvm.loongarch.lasx.xvst( ++ ++ // __lasx_xvstelm_b ++ // xd, rj, si8, idx ++ // VOID, V32QI, CVPOINTER, SI, UQI ++ __lasx_xvstelm_b(v32i8_a, &v32i8_b, 0, idx4); // CHECK: call void @llvm.loongarch.lasx.xvstelm.b( ++ ++ // __lasx_xvstelm_h ++ // xd, rj, si8, idx ++ // VOID, V16HI, CVPOINTER, SI, UQI ++ __lasx_xvstelm_h(v16i16_a, &v16i16_b, 0, idx3); // CHECK: call void @llvm.loongarch.lasx.xvstelm.h( ++ ++ // __lasx_xvstelm_w ++ // xd, rj, si8, idx ++ // VOID, V8SI, CVPOINTER, SI, UQI ++ __lasx_xvstelm_w(v8i32_a, &v8i32_b, 0, idx2); // CHECK: call void @llvm.loongarch.lasx.xvstelm.w( ++ ++ // __lasx_xvstelm_d ++ // xd, rj, si8, idx ++ // VOID, V4DI, CVPOINTER, SI, UQI ++ __lasx_xvstelm_d(v4i64_a, &v4i64_b, 0, idx1); // CHECK: call void @llvm.loongarch.lasx.xvstelm.d( ++ ++ // __lasx_xvinsve0_w ++ // xd, xj, ui3 ++ // V8SI, V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvinsve0_w(v8i32_a, v8i32_b, 2); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w( ++ ++ // __lasx_xvinsve0_d ++ // xd, xj, ui2 ++ // V4DI, V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvinsve0_d(v4i64_a, v4i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d( ++ ++ // __lasx_xvpickve_w ++ // xd, xj, ui3 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvpickve_w(v8i32_b, 2); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickve.w( ++ ++ // __lasx_xvpickve_d ++ // xd, xj, ui2 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvpickve_d(v4i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickve.d( ++ ++ // __lasx_xvssrlrn_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvssrlrn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h( ++ ++ // __lasx_xvssrlrn_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvssrlrn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w( ++ ++ // __lasx_xvssrlrn_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvssrlrn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d( ++ ++ // __lasx_xvssrln_b_h ++ // xd, xj, xk ++ // V32QI, V16HI, V16HI ++ v32i8_r = __lasx_xvssrln_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h( ++ ++ // __lasx_xvssrln_h_w ++ // xd, xj, xk ++ // V16HI, V8SI, V8SI ++ v16i16_r = __lasx_xvssrln_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w( ++ ++ // __lasx_xvssrln_w_d ++ // xd, xj, xk ++ // V8SI, V4DI, V4DI ++ v8i32_r = __lasx_xvssrln_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d( ++ ++ // __lasx_xvorn_v ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvorn_v(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvorn.v( ++ ++ // __lasx_xvldi ++ // xd, i13 ++ // V4DI, HI ++ v4i64_r = __lasx_xvldi(i13); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvldi( ++ ++ // __lasx_xvldx ++ // xd, rj, rk ++ // V32QI, CVPOINTER, DI ++ v32i8_r = __lasx_xvldx(&v32i8_a, i64_d); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvldx( ++ ++ // __lasx_xvstx ++ // xd, rj, rk ++ // VOID, V32QI, CVPOINTER, DI ++ __lasx_xvstx(v32i8_a, &v32i8_b, i64_d); // CHECK: call void @llvm.loongarch.lasx.xvstx( ++ ++ // __lasx_xvinsgr2vr_w ++ // xd, rj, ui3 ++ // V8SI, V8SI, SI, UQI ++ v8i32_r = __lasx_xvinsgr2vr_w(v8i32_a, i32_b, ui3); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w( ++ ++ // __lasx_xvinsgr2vr_d ++ // xd, rj, ui2 ++ // V4DI, V4DI, DI, UQI ++ v4i64_r = __lasx_xvinsgr2vr_d(v4i64_a, i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d( ++ ++ // __lasx_xvreplve0_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvreplve0_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b( ++ ++ // __lasx_xvreplve0_h ++ // xd, xj ++ // V16HI, V16HI ++ v16i16_r = __lasx_xvreplve0_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h( ++ ++ // __lasx_xvreplve0_w ++ // xd, xj ++ // V8SI, V8SI ++ v8i32_r = __lasx_xvreplve0_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w( ++ ++ // __lasx_xvreplve0_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvreplve0_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d( ++ ++ // __lasx_xvreplve0_q ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvreplve0_q(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q( ++ ++ // __lasx_vext2xv_h_b ++ // xd, xj ++ // V16HI, V32QI ++ v16i16_r = __lasx_vext2xv_h_b(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b( ++ ++ // __lasx_vext2xv_w_h ++ // xd, xj ++ // V8SI, V16HI ++ v8i32_r = __lasx_vext2xv_w_h(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h( ++ ++ // __lasx_vext2xv_d_w ++ // xd, xj ++ // V4DI, V8SI ++ v4i64_r = __lasx_vext2xv_d_w(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w( ++ ++ // __lasx_vext2xv_w_b ++ // xd, xj ++ // V8SI, V32QI ++ v8i32_r = __lasx_vext2xv_w_b(v32i8_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b( ++ ++ //gcc build fail ++ // __lasx_vext2xv_d_h ++ // xd, xj ++ // V4DI, V16HI ++ v4i64_r = __lasx_vext2xv_d_h(v16i16_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h( ++ ++ // __lasx_vext2xv_d_b ++ // xd, xj ++ // V4DI, V32QI ++ v4i64_r = __lasx_vext2xv_d_b(v32i8_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b( ++ ++ // __lasx_vext2xv_hu_bu ++ // xd, xj ++ // V16HI, V32QI ++ v16i16_r = __lasx_vext2xv_hu_bu(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu( ++ ++ // __lasx_vext2xv_wu_hu ++ // xd, xj ++ // V8SI, V16HI ++ v8i32_r = __lasx_vext2xv_wu_hu(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu( ++ ++ // __lasx_vext2xv_du_wu ++ // xd, xj ++ // V4DI, V8SI ++ v4i64_r = __lasx_vext2xv_du_wu(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu( ++ ++ // __lasx_vext2xv_wu_bu ++ // xd, xj ++ // V8SI, V32QI ++ v8i32_r = __lasx_vext2xv_wu_bu(v32i8_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu( ++ ++ //gcc build fail ++ // __lasx_vext2xv_du_hu ++ // xd, xj ++ // V4DI, V16HI ++ v4i64_r = __lasx_vext2xv_du_hu(v16i16_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu( ++ ++ // __lasx_vext2xv_du_bu ++ // xd, xj ++ // V4DI, V32QI ++ v4i64_r = __lasx_vext2xv_du_bu(v32i8_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu( ++ ++ // __lasx_xvpermi_q ++ // xd, xj, ui8 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvpermi_q(v32i8_a, v32i8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpermi.q( ++ ++ // __lasx_xvpermi_d ++ // xd, xj, ui8 ++ // V4DI, V4DI, USI ++ v4i64_r = __lasx_xvpermi_d(v4i64_a, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpermi.d( ++ ++ // __lasx_xvperm_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvperm_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvperm.w( ++ ++ // __lasx_xvldrepl_b ++ // xd, rj, si12 ++ // V32QI, CVPOINTER, SI ++ v32i8_r = __lasx_xvldrepl_b(&v32i8_a, si12); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b( ++ ++ // __lasx_xvldrepl_h ++ // xd, rj, si11 ++ // V16HI, CVPOINTER, SI ++ v16i16_r = __lasx_xvldrepl_h(&v16i16_a, si11); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h( ++ ++ // __lasx_xvldrepl_w ++ // xd, rj, si10 ++ // V8SI, CVPOINTER, SI ++ v8i32_r = __lasx_xvldrepl_w(&v8i32_a, si10); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w( ++ ++ // __lasx_xvldrepl_d ++ // xd, rj, si9 ++ // V4DI, CVPOINTER, SI ++ v4i64_r = __lasx_xvldrepl_d(&v4i64_a, si9); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d( ++ ++ // __lasx_xvpickve2gr_w ++ // rd, xj, ui3 ++ // SI, V8SI, UQI ++ i32_r = __lasx_xvpickve2gr_w(v8i32_a, ui3_b); // CHECK: call i32 @llvm.loongarch.lasx.xvpickve2gr.w( ++ ++ // __lasx_xvpickve2gr_wu ++ // rd, xj, ui3 ++ // USI, V8SI, UQI ++ u32_r = __lasx_xvpickve2gr_wu(v8i32_a, ui3_b); // CHECK: call i32 @llvm.loongarch.lasx.xvpickve2gr.wu( ++ ++ // __lasx_xvpickve2gr_d ++ // rd, xj, ui2 ++ // DI, V4DI, UQI ++ i64_r = __lasx_xvpickve2gr_d(v4i64_a, ui2_b); // CHECK: call i64 @llvm.loongarch.lasx.xvpickve2gr.d( ++ ++ // __lasx_xvpickve2gr_du ++ // rd, xj, ui2 ++ // UDI, V4DI, UQI ++ u64_r = __lasx_xvpickve2gr_du(v4i64_a, ui2_b); // CHECK: call i64 @llvm.loongarch.lasx.xvpickve2gr.du( ++ ++ // __lasx_xvaddwev_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvaddwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d( ++ ++ // __lasx_xvaddwev_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvaddwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w( ++ ++ // __lasx_xvaddwev_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvaddwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h( ++ ++ // __lasx_xvaddwev_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvaddwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b( ++ ++ // __lasx_xvaddwev_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvaddwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du( ++ ++ // __lasx_xvaddwev_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvaddwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu( ++ ++ // __lasx_xvaddwev_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvaddwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu( ++ ++ // __lasx_xvaddwev_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvaddwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu( ++ ++ // __lasx_xvsubwev_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsubwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d( ++ ++ // __lasx_xvsubwev_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvsubwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w( ++ ++ // __lasx_xvsubwev_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvsubwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h( ++ ++ // __lasx_xvsubwev_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvsubwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b( ++ ++ // __lasx_xvsubwev_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvsubwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du( ++ ++ // __lasx_xvsubwev_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvsubwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu( ++ ++ // __lasx_xvsubwev_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvsubwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu( ++ ++ // __lasx_xvsubwev_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvsubwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu( ++ ++ // __lasx_xvmulwev_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmulwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d( ++ ++ // __lasx_xvmulwev_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvmulwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w( ++ ++ // __lasx_xvmulwev_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvmulwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h( ++ ++ // __lasx_xvmulwev_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvmulwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b( ++ ++ // __lasx_xvmulwev_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvmulwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du( ++ ++ // __lasx_xvmulwev_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvmulwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu( ++ ++ // __lasx_xvmulwev_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvmulwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu( ++ ++ // __lasx_xvmulwev_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvmulwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu( ++ ++ // __lasx_xvaddwod_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvaddwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d( ++ ++ // __lasx_xvaddwod_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvaddwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w( ++ ++ // __lasx_xvaddwod_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvaddwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h( ++ ++ // __lasx_xvaddwod_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvaddwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b( ++ ++ // __lasx_xvaddwod_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvaddwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du( ++ ++ // __lasx_xvaddwod_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvaddwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu( ++ ++ // __lasx_xvaddwod_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvaddwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu( ++ ++ // __lasx_xvaddwod_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvaddwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu( ++ ++ // __lasx_xvsubwod_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsubwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d( ++ ++ // __lasx_xvsubwod_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvsubwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w( ++ ++ // __lasx_xvsubwod_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvsubwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h( ++ ++ // __lasx_xvsubwod_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvsubwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b( ++ ++ // __lasx_xvsubwod_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvsubwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du( ++ ++ // __lasx_xvsubwod_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvsubwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu( ++ ++ // __lasx_xvsubwod_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvsubwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu( ++ ++ // __lasx_xvsubwod_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvsubwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu( ++ ++ // __lasx_xvmulwod_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmulwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d( ++ ++ // __lasx_xvmulwod_d_w ++ // xd, xj, xk ++ // V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvmulwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w( ++ ++ // __lasx_xvmulwod_w_h ++ // xd, xj, xk ++ // V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvmulwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h( ++ ++ // __lasx_xvmulwod_h_b ++ // xd, xj, xk ++ // V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvmulwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b( ++ ++ // __lasx_xvmulwod_q_du ++ // xd, xj, xk ++ // V4DI, UV4DI, UV4DI ++ v4i64_r = __lasx_xvmulwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du( ++ ++ // __lasx_xvmulwod_d_wu ++ // xd, xj, xk ++ // V4DI, UV8SI, UV8SI ++ v4i64_r = __lasx_xvmulwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu( ++ ++ // __lasx_xvmulwod_w_hu ++ // xd, xj, xk ++ // V8SI, UV16HI, UV16HI ++ v8i32_r = __lasx_xvmulwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu( ++ ++ // __lasx_xvmulwod_h_bu ++ // xd, xj, xk ++ // V16HI, UV32QI, UV32QI ++ v16i16_r = __lasx_xvmulwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu( ++ ++ // __lasx_xvaddwev_d_wu_w ++ // xd, xj, xk ++ // V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvaddwev_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w( ++ ++ // __lasx_xvaddwev_w_hu_h ++ // xd, xj, xk ++ // V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvaddwev_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h( ++ ++ // __lasx_xvaddwev_h_bu_b ++ // xd, xj, xk ++ // V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvaddwev_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b( ++ ++ // __lasx_xvmulwev_d_wu_w ++ // xd, xj, xk ++ // V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvmulwev_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w( ++ ++ // __lasx_xvmulwev_w_hu_h ++ // xd, xj, xk ++ // V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvmulwev_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h( ++ ++ // __lasx_xvmulwev_h_bu_b ++ // xd, xj, xk ++ // V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvmulwev_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b( ++ ++ // __lasx_xvaddwod_d_wu_w ++ // xd, xj, xk ++ // V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvaddwod_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w( ++ ++ // __lasx_xvaddwod_w_hu_h ++ // xd, xj, xk ++ // V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvaddwod_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h( ++ ++ // __lasx_xvaddwod_h_bu_b ++ // xd, xj, xk ++ // V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvaddwod_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b( ++ ++ // __lasx_xvmulwod_d_wu_w ++ // xd, xj, xk ++ // V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvmulwod_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w( ++ ++ // __lasx_xvmulwod_w_hu_h ++ // xd, xj, xk ++ // V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvmulwod_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h( ++ ++ // __lasx_xvmulwod_h_bu_b ++ // xd, xj, xk ++ // V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvmulwod_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b( ++ ++ // __lasx_xvhaddw_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvhaddw_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d( ++ ++ // __lasx_xvhaddw_qu_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvhaddw_qu_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du( ++ ++ // __lasx_xvhsubw_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvhsubw_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d( ++ ++ // __lasx_xvhsubw_qu_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvhsubw_qu_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du( ++ ++ // __lasx_xvmaddwev_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmaddwev_q_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d( ++ ++ // __lasx_xvmaddwev_d_w ++ // xd, xj, xk ++ // V4DI, V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvmaddwev_d_w(v4i64_a, v8i32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w( ++ ++ // __lasx_xvmaddwev_w_h ++ // xd, xj, xk ++ // V8SI, V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvmaddwev_w_h(v8i32_a, v16i16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h( ++ ++ // __lasx_xvmaddwev_h_b ++ // xd, xj, xk ++ // V16HI, V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvmaddwev_h_b(v16i16_a, v32i8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b( ++ ++ // __lasx_xvmaddwev_q_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmaddwev_q_du(v4u64_a, v4u64_b, v4u64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du( ++ ++ // __lasx_xvmaddwev_d_wu ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV8SI, UV8SI ++ v4u64_r = __lasx_xvmaddwev_d_wu(v4u64_a, v8u32_b, v8u32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu( ++ ++ // __lasx_xvmaddwev_w_hu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV16HI, UV16HI ++ v8u32_r = __lasx_xvmaddwev_w_hu(v8u32_a, v16u16_b, v16u16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu( ++ ++ // __lasx_xvmaddwev_h_bu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV32QI, UV32QI ++ v16u16_r = __lasx_xvmaddwev_h_bu(v16u16_a, v32u8_b, v32u8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu( ++ ++ // __lasx_xvmaddwod_q_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvmaddwod_q_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d( ++ ++ // __lasx_xvmaddwod_d_w ++ // xd, xj, xk ++ // V4DI, V4DI, V8SI, V8SI ++ v4i64_r = __lasx_xvmaddwod_d_w(v4i64_a, v8i32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w( ++ ++ // __lasx_xvmaddwod_w_h ++ // xd, xj, xk ++ // V8SI, V8SI, V16HI, V16HI ++ v8i32_r = __lasx_xvmaddwod_w_h(v8i32_a, v16i16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h( ++ ++ // __lasx_xvmaddwod_h_b ++ // xd, xj, xk ++ // V16HI, V16HI, V32QI, V32QI ++ v16i16_r = __lasx_xvmaddwod_h_b(v16i16_a, v32i8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b( ++ ++ // __lasx_xvmaddwod_q_du ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV4DI, UV4DI ++ v4u64_r = __lasx_xvmaddwod_q_du(v4u64_a, v4u64_b, v4u64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du( ++ ++ // __lasx_xvmaddwod_d_wu ++ // xd, xj, xk ++ // UV4DI, UV4DI, UV8SI, UV8SI ++ v4u64_r = __lasx_xvmaddwod_d_wu(v4u64_a, v8u32_b, v8u32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu( ++ ++ // __lasx_xvmaddwod_w_hu ++ // xd, xj, xk ++ // UV8SI, UV8SI, UV16HI, UV16HI ++ v8u32_r = __lasx_xvmaddwod_w_hu(v8u32_a, v16u16_b, v16u16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu( ++ ++ // __lasx_xvmaddwod_h_bu ++ // xd, xj, xk ++ // UV16HI, UV16HI, UV32QI, UV32QI ++ v16u16_r = __lasx_xvmaddwod_h_bu(v16u16_a, v32u8_b, v32u8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu( ++ ++ // __lasx_xvmaddwev_q_du_d ++ // xd, xj, xk ++ // V4DI, V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvmaddwev_q_du_d(v4i64_a, v4u64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d( ++ ++ // __lasx_xvmaddwev_d_wu_w ++ // xd, xj, xk ++ // V4DI, V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvmaddwev_d_wu_w(v4i64_a, v8u32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w( ++ ++ // __lasx_xvmaddwev_w_hu_h ++ // xd, xj, xk ++ // V8SI, V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvmaddwev_w_hu_h(v8i32_a, v16u16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h( ++ ++ // __lasx_xvmaddwev_h_bu_b ++ // xd, xj, xk ++ // V16HI, V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvmaddwev_h_bu_b(v16i16_a, v32u8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b( ++ ++ // __lasx_xvmaddwod_q_du_d ++ // xd, xj, xk ++ // V4DI, V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvmaddwod_q_du_d(v4i64_a, v4u64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d( ++ ++ // __lasx_xvmaddwod_d_wu_w ++ // xd, xj, xk ++ // V4DI, V4DI, UV8SI, V8SI ++ v4i64_r = __lasx_xvmaddwod_d_wu_w(v4i64_a, v8u32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w( ++ ++ // __lasx_xvmaddwod_w_hu_h ++ // xd, xj, xk ++ // V8SI, V8SI, UV16HI, V16HI ++ v8i32_r = __lasx_xvmaddwod_w_hu_h(v8i32_a, v16u16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h( ++ ++ // __lasx_xvmaddwod_h_bu_b ++ // xd, xj, xk ++ // V16HI, V16HI, UV32QI, V32QI ++ v16i16_r = __lasx_xvmaddwod_h_bu_b(v16i16_a, v32u8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b( ++ ++ // __lasx_xvrotr_b ++ // xd, xj, xk ++ // V32QI, V32QI, V32QI ++ v32i8_r = __lasx_xvrotr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrotr.b( ++ ++ // __lasx_xvrotr_h ++ // xd, xj, xk ++ // V16HI, V16HI, V16HI ++ v16i16_r = __lasx_xvrotr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrotr.h( ++ ++ // __lasx_xvrotr_w ++ // xd, xj, xk ++ // V8SI, V8SI, V8SI ++ v8i32_r = __lasx_xvrotr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrotr.w( ++ ++ // __lasx_xvrotr_d ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvrotr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrotr.d( ++ ++ // __lasx_xvadd_q ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvadd_q(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadd.q( ++ ++ // __lasx_xvsub_q ++ // xd, xj, xk ++ // V4DI, V4DI, V4DI ++ v4i64_r = __lasx_xvsub_q(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsub.q( ++ ++ // __lasx_xvaddwev_q_du_d ++ // xd, xj, xk ++ // V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvaddwev_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d( ++ ++ // __lasx_xvaddwod_q_du_d ++ // xd, xj, xk ++ // V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvaddwod_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d( ++ ++ // __lasx_xvmulwev_q_du_d ++ // xd, xj, xk ++ // V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvmulwev_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d( ++ ++ // __lasx_xvmulwod_q_du_d ++ // xd, xj, xk ++ // V4DI, UV4DI, V4DI ++ v4i64_r = __lasx_xvmulwod_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d( ++ ++ // __lasx_xvmskgez_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvmskgez_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b( ++ ++ // __lasx_xvmsknz_b ++ // xd, xj ++ // V32QI, V32QI ++ v32i8_r = __lasx_xvmsknz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b( ++ ++ // __lasx_xvexth_h_b ++ // xd, xj ++ // V16HI, V32QI ++ v16i16_r = __lasx_xvexth_h_b(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b( ++ ++ // __lasx_xvexth_w_h ++ // xd, xj ++ // V8SI, V16HI ++ v8i32_r = __lasx_xvexth_w_h(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h( ++ ++ // __lasx_xvexth_d_w ++ // xd, xj ++ // V4DI, V8SI ++ v4i64_r = __lasx_xvexth_d_w(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w( ++ ++ // __lasx_xvexth_q_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvexth_q_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d( ++ ++ // __lasx_xvexth_hu_bu ++ // xd, xj ++ // UV16HI, UV32QI ++ v16u16_r = __lasx_xvexth_hu_bu(v32u8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu( ++ ++ // __lasx_xvexth_wu_hu ++ // xd, xj ++ // UV8SI, UV16HI ++ v8u32_r = __lasx_xvexth_wu_hu(v16u16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu( ++ ++ // __lasx_xvexth_du_wu ++ // xd, xj ++ // UV4DI, UV8SI ++ v4u64_r = __lasx_xvexth_du_wu(v8u32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu( ++ ++ // __lasx_xvexth_qu_du ++ // xd, xj ++ // UV4DI, UV4DI ++ v4u64_r = __lasx_xvexth_qu_du(v4u64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du( ++ ++ // __lasx_xvextl_q_d ++ // xd, xj ++ // V4DI, V4DI ++ v4i64_r = __lasx_xvextl_q_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d( ++ ++ // __lasx_xvextl_qu_du ++ // xd, xj ++ // UV4DI, UV4DI ++ v4u64_r = __lasx_xvextl_qu_du(v4u64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du( ++ ++ // __lasx_xvrotri_b ++ // xd, xj, ui3 ++ // V32QI, V32QI, UQI ++ v32i8_r = __lasx_xvrotri_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrotri.b( ++ ++ // __lasx_xvrotri_h ++ // xd, xj, ui4 ++ // V16HI, V16HI, UQI ++ v16i16_r = __lasx_xvrotri_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrotri.h( ++ ++ // __lasx_xvrotri_w ++ // xd, xj, ui5 ++ // V8SI, V8SI, UQI ++ v8i32_r = __lasx_xvrotri_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrotri.w( ++ ++ // __lasx_xvrotri_d ++ // xd, xj, ui6 ++ // V4DI, V4DI, UQI ++ v4i64_r = __lasx_xvrotri_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrotri.d( ++ ++ // __lasx_xvsrlni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvsrlni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h( ++ ++ // __lasx_xvsrlni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvsrlni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w( ++ ++ // __lasx_xvsrlni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvsrlni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d( ++ ++ // __lasx_xvsrlni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvsrlni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q( ++ ++ // __lasx_xvsrlrni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvsrlrni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h( ++ ++ // __lasx_xvsrlrni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvsrlrni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w( ++ ++ // __lasx_xvsrlrni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvsrlrni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d( ++ ++ // __lasx_xvsrlrni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvsrlrni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q( ++ ++ // __lasx_xvssrlni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvssrlni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h( ++ ++ // __lasx_xvssrlni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvssrlni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w( ++ ++ // __lasx_xvssrlni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvssrlni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d( ++ ++ // __lasx_xvssrlni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvssrlni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q( ++ ++ // __lasx_xvssrlni_bu_h ++ // xd, xj, ui4 ++ // UV32QI, UV32QI, V32QI, USI ++ v32u8_r = __lasx_xvssrlni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h( ++ ++ // __lasx_xvssrlni_hu_w ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, V16HI, USI ++ v16u16_r = __lasx_xvssrlni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w( ++ ++ // __lasx_xvssrlni_wu_d ++ // xd, xj, ui6 ++ // UV8SI, UV8SI, V8SI, USI ++ v8u32_r = __lasx_xvssrlni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d( ++ ++ // __lasx_xvssrlni_du_q ++ // xd, xj, ui7 ++ // UV4DI, UV4DI, V4DI, USI ++ v4u64_r = __lasx_xvssrlni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q( ++ ++ // __lasx_xvssrlrni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvssrlrni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h( ++ ++ // __lasx_xvssrlrni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvssrlrni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w( ++ ++ // __lasx_xvssrlrni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvssrlrni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d( ++ ++ // __lasx_xvssrlrni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvssrlrni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q( ++ ++ // __lasx_xvssrlrni_bu_h ++ // xd, xj, ui4 ++ // UV32QI, UV32QI, V32QI, USI ++ v32u8_r = __lasx_xvssrlrni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h( ++ ++ // __lasx_xvssrlrni_hu_w ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, V16HI, USI ++ v16u16_r = __lasx_xvssrlrni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w( ++ ++ // __lasx_xvssrlrni_wu_d ++ // xd, xj, ui6 ++ // UV8SI, UV8SI, V8SI, USI ++ v8u32_r = __lasx_xvssrlrni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d( ++ ++ // __lasx_xvssrlrni_du_q ++ // xd, xj, ui7 ++ // UV4DI, UV4DI, V4DI, USI ++ v4u64_r = __lasx_xvssrlrni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q( ++ ++ // __lasx_xvsrani_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvsrani_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h( ++ ++ // __lasx_xvsrani_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvsrani_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w( ++ ++ // __lasx_xvsrani_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvsrani_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d( ++ ++ // __lasx_xvsrani_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvsrani_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q( ++ ++ // __lasx_xvsrarni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvsrarni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h( ++ ++ // __lasx_xvsrarni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvsrarni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w( ++ ++ // __lasx_xvsrarni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvsrarni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d( ++ ++ // __lasx_xvsrarni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvsrarni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q( ++ ++ // __lasx_xvssrani_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvssrani_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h( ++ ++ // __lasx_xvssrani_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvssrani_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w( ++ ++ // __lasx_xvssrani_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvssrani_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d( ++ ++ // __lasx_xvssrani_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvssrani_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q( ++ ++ // __lasx_xvssrani_bu_h ++ // xd, xj, ui4 ++ // UV32QI, UV32QI, V32QI, USI ++ v32u8_r = __lasx_xvssrani_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h( ++ ++ // __lasx_xvssrani_hu_w ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, V16HI, USI ++ v16u16_r = __lasx_xvssrani_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w( ++ ++ // __lasx_xvssrani_wu_d ++ // xd, xj, ui6 ++ // UV8SI, UV8SI, V8SI, USI ++ v8u32_r = __lasx_xvssrani_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d( ++ ++ // __lasx_xvssrani_du_q ++ // xd, xj, ui7 ++ // UV4DI, UV4DI, V4DI, USI ++ v4u64_r = __lasx_xvssrani_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q( ++ ++ // __lasx_xvssrarni_b_h ++ // xd, xj, ui4 ++ // V32QI, V32QI, V32QI, USI ++ v32i8_r = __lasx_xvssrarni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h( ++ ++ // __lasx_xvssrarni_h_w ++ // xd, xj, ui5 ++ // V16HI, V16HI, V16HI, USI ++ v16i16_r = __lasx_xvssrarni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w( ++ ++ // __lasx_xvssrarni_w_d ++ // xd, xj, ui6 ++ // V8SI, V8SI, V8SI, USI ++ v8i32_r = __lasx_xvssrarni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d( ++ ++ // __lasx_xvssrarni_d_q ++ // xd, xj, ui7 ++ // V4DI, V4DI, V4DI, USI ++ v4i64_r = __lasx_xvssrarni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q( ++ ++ // __lasx_xvssrarni_bu_h ++ // xd, xj, ui4 ++ // UV32QI, UV32QI, V32QI, USI ++ v32u8_r = __lasx_xvssrarni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h( ++ ++ // __lasx_xvssrarni_hu_w ++ // xd, xj, ui5 ++ // UV16HI, UV16HI, V16HI, USI ++ v16u16_r = __lasx_xvssrarni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w( ++ ++ // __lasx_xvssrarni_wu_d ++ // xd, xj, ui6 ++ // UV8SI, UV8SI, V8SI, USI ++ v8u32_r = __lasx_xvssrarni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d( ++ ++ // __lasx_xvssrarni_du_q ++ // xd, xj, ui7 ++ // UV4DI, UV4DI, V4DI, USI ++ v4u64_r = __lasx_xvssrarni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q( ++ ++ // __lasx_xbnz_v ++ // rd, xj ++ // SI, UV32QI ++ i32_r = __lasx_xbnz_v(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.v( ++ ++ // __lasx_xbz_v ++ // rd, xj ++ // SI, UV32QI ++ i32_r = __lasx_xbz_v(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.v( ++ ++ // __lasx_xbnz_b ++ // rd, xj ++ // SI, UV32QI ++ i32_r = __lasx_xbnz_b(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.b( ++ ++ // __lasx_xbnz_h ++ // rd, xj ++ // SI, UV16HI ++ i32_r = __lasx_xbnz_h(v16u16_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.h( ++ ++ // __lasx_xbnz_w ++ // rd, xj ++ // SI, UV8SI ++ i32_r = __lasx_xbnz_w(v8u32_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.w( ++ ++ // __lasx_xbnz_d ++ // rd, xj ++ // SI, UV4DI ++ i32_r = __lasx_xbnz_d(v4u64_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.d( ++ ++ // __lasx_xbz_b ++ // rd, xj ++ // SI, UV32QI ++ i32_r = __lasx_xbz_b(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.b( ++ ++ // __lasx_xbz_h ++ // rd, xj ++ // SI, UV16HI ++ i32_r = __lasx_xbz_h(v16u16_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.h( ++ ++ // __lasx_xbz_w ++ // rd, xj ++ // SI, UV8SI ++ i32_r = __lasx_xbz_w(v8u32_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.w( ++ ++ // __lasx_xbz_d ++ // rd, xj ++ // SI, UV4DI ++ i32_r = __lasx_xbz_d(v4u64_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.d( ++} +diff --git a/test/CodeGen/builtins-loongarch-lsx-error.c b/test/CodeGen/builtins-loongarch-lsx-error.c +new file mode 100644 +index 00000000..f566a736 +--- /dev/null ++++ b/test/CodeGen/builtins-loongarch-lsx-error.c +@@ -0,0 +1,250 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -fsyntax-only %s \ ++// RUN: -target-feature +lsx \ ++// RUN: -verify -o - 2>&1 ++ ++#include ++ ++void test() { ++ v16i8 v16i8_a = (v16i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16i8 v16i8_b = (v16i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16i8 v16i8_c = (v16i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16i8 v16i8_r; ++ v8i16 v8i16_a = (v8i16){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8i16 v8i16_b = (v8i16){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8i16 v8i16_c = (v8i16){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8i16 v8i16_r; ++ v4i32 v4i32_a = (v4i32){0, 1, 2, 3}; ++ v4i32 v4i32_b = (v4i32){1, 2, 3, 4}; ++ v4i32 v4i32_c = (v4i32){2, 3, 4, 5}; ++ v4i32 v4i32_r; ++ v2i64 v2i64_a = (v2i64){0, 1}; ++ v2i64 v2i64_b = (v2i64){1, 2}; ++ v2i64 v2i64_c = (v2i64){2, 3}; ++ v2i64 v2i64_r; ++ ++ v16u8 v16u8_a = (v16u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16u8 v16u8_b = (v16u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16u8 v16u8_c = (v16u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16u8 v16u8_r; ++ v8u16 v8u16_a = (v8u16){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8u16 v8u16_b = (v8u16){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8u16 v8u16_c = (v8u16){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8u16 v8u16_r; ++ v4u32 v4u32_a = (v4u32){0, 1, 2, 3}; ++ v4u32 v4u32_b = (v4u32){1, 2, 3, 4}; ++ v4u32 v4u32_c = (v4u32){2, 3, 4, 5}; ++ v4u32 v4u32_r; ++ v2u64 v2u64_a = (v2u64){0, 1}; ++ v2u64 v2u64_b = (v2u64){1, 2}; ++ v2u64 v2u64_c = (v2u64){2, 3}; ++ v2u64 v2u64_r; ++ ++ v4f32 v4f32_a = (v4f32){0.5, 1, 2, 3}; ++ v4f32 v4f32_b = (v4f32){1.5, 2, 3, 4}; ++ v4f32 v4f32_c = (v4f32){2.5, 3, 4, 5}; ++ v4f32 v4f32_r; ++ v2f64 v2f64_a = (v2f64){0.5, 1}; ++ v2f64 v2f64_b = (v2f64){1.5, 2}; ++ v2f64 v2f64_c = (v2f64){2.5, 3}; ++ v2f64 v2f64_r; ++ ++ int i32_r; ++ int i32_a = 1; ++ int i32_b = 2; ++ unsigned int u32_r; ++ unsigned int u32_a = 1; ++ unsigned int u32_b = 2; ++ long long i64_r; ++ long long i64_a = 1; ++ long long i64_b = 2; ++ long long i64_c = 3; ++ unsigned long long u64_r; ++ unsigned long long u64_a = 1; ++ unsigned long long u64_b = 2; ++ unsigned long long u64_c = 3; ++ ++ v16i8_r = __lsx_vslli_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vslli_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vslli_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vslli_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrai_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsrai_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsrai_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsrai_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrari_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsrari_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsrari_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsrari_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrli_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsrli_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsrli_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsrli_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrlri_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsrlri_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsrlri_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsrlri_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16u8_r = __lsx_vbitclri_b(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u16_r = __lsx_vbitclri_h(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u32_r = __lsx_vbitclri_w(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vbitclri_d(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16u8_r = __lsx_vbitseti_b(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u16_r = __lsx_vbitseti_h(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u32_r = __lsx_vbitseti_w(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vbitseti_d(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16u8_r = __lsx_vbitrevi_b(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u16_r = __lsx_vbitrevi_h(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u32_r = __lsx_vbitrevi_w(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vbitrevi_d(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vaddi_bu(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vaddi_hu(v8i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vaddi_wu(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vaddi_du(v2i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vsubi_bu(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vsubi_hu(v8i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vsubi_wu(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsubi_du(v2i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vmaxi_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vmaxi_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vmaxi_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vmaxi_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16u8_r = __lsx_vmaxi_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u16_r = __lsx_vmaxi_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vmaxi_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vmaxi_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vmini_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vmini_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vmini_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vmini_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16u8_r = __lsx_vmini_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u16_r = __lsx_vmini_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vmini_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vmini_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vseqi_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vseqi_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vseqi_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vseqi_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i8_r = __lsx_vslti_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vslti_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vslti_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vslti_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i8_r = __lsx_vslti_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vslti_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vslti_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vslti_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vslei_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v8i16_r = __lsx_vslei_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v4i32_r = __lsx_vslei_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v2i64_r = __lsx_vslei_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} ++ v16i8_r = __lsx_vslei_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vslei_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vslei_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vslei_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vsat_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vsat_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vsat_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vsat_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16u8_r = __lsx_vsat_bu(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8u16_r = __lsx_vsat_hu(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4u32_r = __lsx_vsat_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2u64_r = __lsx_vsat_du(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vreplvei_b(v16i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vreplvei_h(v8i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i32_r = __lsx_vreplvei_w(v4i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v2i64_r = __lsx_vreplvei_d(v2i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v16u8_r = __lsx_vandi_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16u8_r = __lsx_vori_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16u8_r = __lsx_vnori_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16u8_r = __lsx_vxori_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16u8_r = __lsx_vbitseli_b(v16u8_a, v16u8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i8_r = __lsx_vshuf4i_b(v16i8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i16_r = __lsx_vshuf4i_h(v8i16_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v4i32_r = __lsx_vshuf4i_w(v4i32_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ i32_r = __lsx_vpickve2gr_b(v16i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ i32_r = __lsx_vpickve2gr_h(v8i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ i32_r = __lsx_vpickve2gr_w(v4i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ i64_r = __lsx_vpickve2gr_d(v2i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ u32_r = __lsx_vpickve2gr_bu(v16i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ u32_r = __lsx_vpickve2gr_hu(v8i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ u32_r = __lsx_vpickve2gr_wu(v4i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ u64_r = __lsx_vpickve2gr_du(v2i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v16i8_r = __lsx_vinsgr2vr_b(v16i8_a, i32_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vinsgr2vr_h(v8i16_a, i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i32_r = __lsx_vinsgr2vr_w(v4i32_a, i32_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ v2i64_r = __lsx_vinsgr2vr_d(v2i64_a, i32_b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v8i16_r = __lsx_vsllwil_h_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4i32_r = __lsx_vsllwil_w_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v2i64_r = __lsx_vsllwil_d_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8u16_r = __lsx_vsllwil_hu_bu(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v4u32_r = __lsx_vsllwil_wu_hu(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v2u64_r = __lsx_vsllwil_du_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vfrstpi_b(v16i8_a, v16i8_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v8i16_r = __lsx_vfrstpi_h(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vshuf4i_d(v2i64_a, v2i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i8_r = __lsx_vbsrl_v(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vbsll_v(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v16i8_r = __lsx_vextrins_b(v16i8_a, v16i8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v8i16_r = __lsx_vextrins_h(v8i16_a, v8i16_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v4i32_r = __lsx_vextrins_w(v4i32_a, v4i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v2i64_r = __lsx_vextrins_d(v2i64_a, v2i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ __lsx_vstelm_b(v16i8_a, &v16i8_b, 0, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ __lsx_vstelm_h(v8i16_a, &v8i16_b, 0, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ __lsx_vstelm_w(v4i32_a, &v4i32_b, 0, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} ++ __lsx_vstelm_d(v2i64_a, &v2i64_b, 0, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} ++ v16i8_r = __lsx_vldrepl_b(&v16i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ v8i16_r = __lsx_vldrepl_h(&v8i16_a, -1025); // expected-error {{argument value -1025 is outside the valid range [-1024, 1023]}} ++ v4i32_r = __lsx_vldrepl_w(&v4i32_a, -513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} ++ v2i64_r = __lsx_vldrepl_d(&v2i64_a, -257); // expected-error {{argument value -257 is outside the valid range [-256, 255]}} ++ v16i8_r = __lsx_vrotri_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} ++ v8i16_r = __lsx_vrotri_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v4i32_r = __lsx_vrotri_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v2i64_r = __lsx_vrotri_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v16i8_r = __lsx_vsrlni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vsrlni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vsrlni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vsrlni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vssrlni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vssrlni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vssrlni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vssrlni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16u8_r = __lsx_vssrlni_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u16_r = __lsx_vssrlni_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vssrlni_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2u64_r = __lsx_vssrlni_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vssrlrni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vssrlrni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vssrlrni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vssrlrni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16u8_r = __lsx_vssrlrni_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u16_r = __lsx_vssrlrni_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vssrlrni_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2u64_r = __lsx_vssrlrni_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vsrani_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vsrani_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vsrani_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vsrani_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vsrarni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vsrarni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vsrarni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vsrarni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vssrani_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vssrani_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vssrani_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vssrani_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16u8_r = __lsx_vssrani_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u16_r = __lsx_vssrani_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vssrani_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2u64_r = __lsx_vssrani_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16i8_r = __lsx_vssrarni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8i16_r = __lsx_vssrarni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4i32_r = __lsx_vssrarni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2i64_r = __lsx_vssrarni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v16u8_r = __lsx_vssrarni_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} ++ v8u16_r = __lsx_vssrarni_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} ++ v4u32_r = __lsx_vssrarni_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} ++ v2u64_r = __lsx_vssrarni_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} ++ v4i32_r = __lsx_vpermi_w(v4i32_a, v4i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} ++ v16i8_r = __lsx_vld(&v16i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ __lsx_vst(v16i8_a, &v16i8_b, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} ++ v2i64_r = __lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} ++} +diff --git a/test/CodeGen/builtins-loongarch-lsx.c b/test/CodeGen/builtins-loongarch-lsx.c +new file mode 100644 +index 00000000..0cfc2105 +--- /dev/null ++++ b/test/CodeGen/builtins-loongarch-lsx.c +@@ -0,0 +1,3630 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -emit-llvm %s \ ++// RUN: -target-feature +lsx \ ++// RUN: -target-feature +d \ ++// RUN: -o - | FileCheck %s ++ ++#include ++ ++#define ui1 0 ++#define ui2 1 ++#define ui3 4 ++#define ui4 7 ++#define ui5 25 ++#define ui6 44 ++#define ui7 100 ++#define ui8 127 //200 ++#define si5 -4 ++#define si8 -100 ++#define si9 0 ++#define si10 0 ++#define si11 0 ++#define si12 0 ++#define i10 500 ++#define i13 4000 ++#define mode 11 ++#define idx1 1 ++#define idx2 2 ++#define idx3 4 ++#define idx4 8 ++ ++void test(void) { ++ v16i8 v16i8_a = (v16i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16i8 v16i8_b = (v16i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16i8 v16i8_c = (v16i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16i8 v16i8_r; ++ v8i16 v8i16_a = (v8i16){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8i16 v8i16_b = (v8i16){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8i16 v8i16_c = (v8i16){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8i16 v8i16_r; ++ v4i32 v4i32_a = (v4i32){0, 1, 2, 3}; ++ v4i32 v4i32_b = (v4i32){1, 2, 3, 4}; ++ v4i32 v4i32_c = (v4i32){2, 3, 4, 5}; ++ v4i32 v4i32_r; ++ v2i64 v2i64_a = (v2i64){0, 1}; ++ v2i64 v2i64_b = (v2i64){1, 2}; ++ v2i64 v2i64_c = (v2i64){2, 3}; ++ v2i64 v2i64_r; ++ ++ v16u8 v16u8_a = (v16u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ v16u8 v16u8_b = (v16u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ v16u8 v16u8_c = (v16u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; ++ v16u8 v16u8_r; ++ v8u16 v8u16_a = (v8u16){0, 1, 2, 3, 4, 5, 6, 7}; ++ v8u16 v8u16_b = (v8u16){1, 2, 3, 4, 5, 6, 7, 8}; ++ v8u16 v8u16_c = (v8u16){2, 3, 4, 5, 6, 7, 8, 9}; ++ v8u16 v8u16_r; ++ v4u32 v4u32_a = (v4u32){0, 1, 2, 3}; ++ v4u32 v4u32_b = (v4u32){1, 2, 3, 4}; ++ v4u32 v4u32_c = (v4u32){2, 3, 4, 5}; ++ v4u32 v4u32_r; ++ v2u64 v2u64_a = (v2u64){0, 1}; ++ v2u64 v2u64_b = (v2u64){1, 2}; ++ v2u64 v2u64_c = (v2u64){2, 3}; ++ v2u64 v2u64_r; ++ ++ v4f32 v4f32_a = (v4f32){0.5, 1, 2, 3}; ++ v4f32 v4f32_b = (v4f32){1.5, 2, 3, 4}; ++ v4f32 v4f32_c = (v4f32){2.5, 3, 4, 5}; ++ v4f32 v4f32_r; ++ v2f64 v2f64_a = (v2f64){0.5, 1}; ++ v2f64 v2f64_b = (v2f64){1.5, 2}; ++ v2f64 v2f64_c = (v2f64){2.5, 3}; ++ v2f64 v2f64_r; ++ ++ int i32_r; ++ int i32_a = 1; ++ int i32_b = 2; ++ unsigned int u32_r; ++ unsigned int u32_a = 1; ++ unsigned int u32_b = 2; ++ long long i64_r; ++ long long i64_a = 1; ++ long long i64_b = 2; ++ long long i64_c = 3; ++ long int i64_d = 0; ++ unsigned long long u64_r; ++ unsigned long long u64_a = 1; ++ unsigned long long u64_b = 2; ++ unsigned long long u64_c = 3; ++ ++ // __lsx_vsll_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsll_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsll.b( ++ ++ // __lsx_vsll_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsll_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsll.h( ++ ++ // __lsx_vsll_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsll_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsll.w( ++ ++ // __lsx_vsll_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsll_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsll.d( ++ ++ // __lsx_vslli_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vslli_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslli.b( ++ ++ // __lsx_vslli_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vslli_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslli.h( ++ ++ // __lsx_vslli_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vslli_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslli.w( ++ ++ // __lsx_vslli_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vslli_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslli.d( ++ ++ // __lsx_vsra_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsra_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsra.b( ++ ++ // __lsx_vsra_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsra_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsra.h( ++ ++ // __lsx_vsra_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsra_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsra.w( ++ ++ // __lsx_vsra_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsra_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsra.d( ++ ++ // __lsx_vsrai_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsrai_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrai.b( ++ ++ // __lsx_vsrai_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsrai_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrai.h( ++ ++ // __lsx_vsrai_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsrai_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrai.w( ++ ++ // __lsx_vsrai_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsrai_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrai.d( ++ ++ // __lsx_vsrar_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsrar_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrar.b( ++ ++ // __lsx_vsrar_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsrar_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrar.h( ++ ++ // __lsx_vsrar_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsrar_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrar.w( ++ ++ // __lsx_vsrar_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsrar_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrar.d( ++ ++ // __lsx_vsrari_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsrari_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrari.b( ++ ++ // __lsx_vsrari_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsrari_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrari.h( ++ ++ // __lsx_vsrari_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsrari_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrari.w( ++ ++ // __lsx_vsrari_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsrari_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrari.d( ++ ++ // __lsx_vsrl_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsrl_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrl.b( ++ ++ // __lsx_vsrl_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsrl_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrl.h( ++ ++ // __lsx_vsrl_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsrl_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrl.w( ++ ++ // __lsx_vsrl_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsrl_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrl.d( ++ ++ // __lsx_vsrli_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsrli_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrli.b( ++ ++ // __lsx_vsrli_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsrli_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrli.h( ++ ++ // __lsx_vsrli_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsrli_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrli.w( ++ ++ // __lsx_vsrli_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsrli_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrli.d( ++ ++ // __lsx_vsrlr_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsrlr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlr.b( ++ ++ // __lsx_vsrlr_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsrlr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlr.h( ++ ++ // __lsx_vsrlr_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsrlr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlr.w( ++ ++ // __lsx_vsrlr_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsrlr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlr.d( ++ ++ // __lsx_vsrlri_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsrlri_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlri.b( ++ ++ // __lsx_vsrlri_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsrlri_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlri.h( ++ ++ // __lsx_vsrlri_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsrlri_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlri.w( ++ ++ // __lsx_vsrlri_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsrlri_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlri.d( ++ ++ // __lsx_vbitclr_b ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vbitclr_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitclr.b( ++ ++ // __lsx_vbitclr_h ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vbitclr_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitclr.h( ++ ++ // __lsx_vbitclr_w ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vbitclr_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitclr.w( ++ ++ // __lsx_vbitclr_d ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vbitclr_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitclr.d( ++ ++ // __lsx_vbitclri_b ++ // vd, vj, ui3 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vbitclri_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitclri.b( ++ ++ // __lsx_vbitclri_h ++ // vd, vj, ui4 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vbitclri_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitclri.h( ++ ++ // __lsx_vbitclri_w ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vbitclri_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitclri.w( ++ ++ // __lsx_vbitclri_d ++ // vd, vj, ui6 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vbitclri_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitclri.d( ++ ++ // __lsx_vbitset_b ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vbitset_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitset.b( ++ ++ // __lsx_vbitset_h ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vbitset_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitset.h( ++ ++ // __lsx_vbitset_w ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vbitset_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitset.w( ++ ++ // __lsx_vbitset_d ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vbitset_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitset.d( ++ ++ // __lsx_vbitseti_b ++ // vd, vj, ui3 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vbitseti_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitseti.b( ++ ++ // __lsx_vbitseti_h ++ // vd, vj, ui4 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vbitseti_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitseti.h( ++ ++ // __lsx_vbitseti_w ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vbitseti_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitseti.w( ++ ++ // __lsx_vbitseti_d ++ // vd, vj, ui6 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vbitseti_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitseti.d( ++ ++ // __lsx_vbitrev_b ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vbitrev_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitrev.b( ++ ++ // __lsx_vbitrev_h ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vbitrev_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitrev.h( ++ ++ // __lsx_vbitrev_w ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vbitrev_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitrev.w( ++ ++ // __lsx_vbitrev_d ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vbitrev_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitrev.d( ++ ++ // __lsx_vbitrevi_b ++ // vd, vj, ui3 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vbitrevi_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b( ++ ++ // __lsx_vbitrevi_h ++ // vd, vj, ui4 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vbitrevi_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h( ++ ++ // __lsx_vbitrevi_w ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vbitrevi_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w( ++ ++ // __lsx_vbitrevi_d ++ // vd, vj, ui6 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vbitrevi_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d( ++ ++ // __lsx_vadd_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vadd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vadd.b( ++ ++ // __lsx_vadd_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vadd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vadd.h( ++ ++ // __lsx_vadd_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vadd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vadd.w( ++ ++ // __lsx_vadd_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vadd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadd.d( ++ ++ // __lsx_vaddi_bu ++ // vd, vj, ui5 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vaddi_bu(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vaddi.bu( ++ ++ // __lsx_vaddi_hu ++ // vd, vj, ui5 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vaddi_hu(v8i16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddi.hu( ++ ++ // __lsx_vaddi_wu ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vaddi_wu(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddi.wu( ++ ++ // __lsx_vaddi_du ++ // vd, vj, ui5 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vaddi_du(v2i64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddi.du( ++ ++ // __lsx_vsub_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsub_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsub.b( ++ ++ // __lsx_vsub_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsub_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsub.h( ++ ++ // __lsx_vsub_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsub_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsub.w( ++ ++ // __lsx_vsub_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsub_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsub.d( ++ ++ // __lsx_vsubi_bu ++ // vd, vj, ui5 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsubi_bu(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsubi.bu( ++ ++ // __lsx_vsubi_hu ++ // vd, vj, ui5 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsubi_hu(v8i16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubi.hu( ++ ++ // __lsx_vsubi_wu ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsubi_wu(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubi.wu( ++ ++ // __lsx_vsubi_du ++ // vd, vj, ui5 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsubi_du(v2i64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubi.du( ++ ++ // __lsx_vmax_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmax_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmax.b( ++ ++ // __lsx_vmax_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmax_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmax.h( ++ ++ // __lsx_vmax_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmax_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmax.w( ++ ++ // __lsx_vmax_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmax_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmax.d( ++ ++ // __lsx_vmaxi_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vmaxi_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmaxi.b( ++ ++ // __lsx_vmaxi_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vmaxi_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaxi.h( ++ ++ // __lsx_vmaxi_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vmaxi_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaxi.w( ++ ++ // __lsx_vmaxi_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vmaxi_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaxi.d( ++ ++ // __lsx_vmax_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vmax_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmax.bu( ++ ++ // __lsx_vmax_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vmax_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmax.hu( ++ ++ // __lsx_vmax_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vmax_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmax.wu( ++ ++ // __lsx_vmax_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmax_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmax.du( ++ ++ // __lsx_vmaxi_bu ++ // vd, vj, ui5 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vmaxi_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu( ++ ++ // __lsx_vmaxi_hu ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vmaxi_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu( ++ ++ // __lsx_vmaxi_wu ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vmaxi_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu( ++ ++ // __lsx_vmaxi_du ++ // vd, vj, ui5 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vmaxi_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaxi.du( ++ ++ // __lsx_vmin_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmin_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmin.b( ++ ++ // __lsx_vmin_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmin_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmin.h( ++ ++ // __lsx_vmin_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmin_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmin.w( ++ ++ // __lsx_vmin_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmin_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmin.d( ++ ++ // __lsx_vmini_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vmini_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmini.b( ++ ++ // __lsx_vmini_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vmini_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmini.h( ++ ++ // __lsx_vmini_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vmini_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmini.w( ++ ++ // __lsx_vmini_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vmini_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmini.d( ++ ++ // __lsx_vmin_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vmin_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmin.bu( ++ ++ // __lsx_vmin_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vmin_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmin.hu( ++ ++ // __lsx_vmin_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vmin_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmin.wu( ++ ++ // __lsx_vmin_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmin_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmin.du( ++ ++ // __lsx_vmini_bu ++ // vd, vj, ui5 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vmini_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmini.bu( ++ ++ // __lsx_vmini_hu ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vmini_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmini.hu( ++ ++ // __lsx_vmini_wu ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vmini_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmini.wu( ++ ++ // __lsx_vmini_du ++ // vd, vj, ui5 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vmini_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmini.du( ++ ++ // __lsx_vseq_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vseq_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vseq.b( ++ ++ // __lsx_vseq_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vseq_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vseq.h( ++ ++ // __lsx_vseq_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vseq_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vseq.w( ++ ++ // __lsx_vseq_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vseq_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vseq.d( ++ ++ // __lsx_vseqi_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vseqi_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vseqi.b( ++ ++ // __lsx_vseqi_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vseqi_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vseqi.h( ++ ++ // __lsx_vseqi_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vseqi_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vseqi.w( ++ ++ // __lsx_vseqi_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vseqi_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vseqi.d( ++ ++ // __lsx_vslti_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vslti_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslti.b( ++ ++ // __lsx_vslt_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vslt_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslt.b( ++ ++ // __lsx_vslt_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vslt_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslt.h( ++ ++ // __lsx_vslt_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vslt_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslt.w( ++ ++ // __lsx_vslt_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vslt_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslt.d( ++ ++ // __lsx_vslti_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vslti_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslti.h( ++ ++ // __lsx_vslti_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vslti_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslti.w( ++ ++ // __lsx_vslti_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vslti_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslti.d( ++ ++ // __lsx_vslt_bu ++ // vd, vj, vk ++ // V16QI, UV16QI, UV16QI ++ v16i8_r = __lsx_vslt_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslt.bu( ++ ++ // __lsx_vslt_hu ++ // vd, vj, vk ++ // V8HI, UV8HI, UV8HI ++ v8i16_r = __lsx_vslt_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslt.hu( ++ ++ // __lsx_vslt_wu ++ // vd, vj, vk ++ // V4SI, UV4SI, UV4SI ++ v4i32_r = __lsx_vslt_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslt.wu( ++ ++ // __lsx_vslt_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vslt_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslt.du( ++ ++ // __lsx_vslti_bu ++ // vd, vj, ui5 ++ // V16QI, UV16QI, UQI ++ v16i8_r = __lsx_vslti_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslti.bu( ++ ++ // __lsx_vslti_hu ++ // vd, vj, ui5 ++ // V8HI, UV8HI, UQI ++ v8i16_r = __lsx_vslti_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslti.hu( ++ ++ // __lsx_vslti_wu ++ // vd, vj, ui5 ++ // V4SI, UV4SI, UQI ++ v4i32_r = __lsx_vslti_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslti.wu( ++ ++ // __lsx_vslti_du ++ // vd, vj, ui5 ++ // V2DI, UV2DI, UQI ++ v2i64_r = __lsx_vslti_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslti.du( ++ ++ // __lsx_vsle_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsle_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsle.b( ++ ++ // __lsx_vsle_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsle_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsle.h( ++ ++ // __lsx_vsle_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsle_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsle.w( ++ ++ // __lsx_vsle_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsle_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsle.d( ++ ++ // __lsx_vslei_b ++ // vd, vj, si5 ++ // V16QI, V16QI, QI ++ v16i8_r = __lsx_vslei_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslei.b( ++ ++ // __lsx_vslei_h ++ // vd, vj, si5 ++ // V8HI, V8HI, QI ++ v8i16_r = __lsx_vslei_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslei.h( ++ ++ // __lsx_vslei_w ++ // vd, vj, si5 ++ // V4SI, V4SI, QI ++ v4i32_r = __lsx_vslei_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslei.w( ++ ++ // __lsx_vslei_d ++ // vd, vj, si5 ++ // V2DI, V2DI, QI ++ v2i64_r = __lsx_vslei_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslei.d( ++ ++ // __lsx_vsle_bu ++ // vd, vj, vk ++ // V16QI, UV16QI, UV16QI ++ v16i8_r = __lsx_vsle_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsle.bu( ++ ++ // __lsx_vsle_hu ++ // vd, vj, vk ++ // V8HI, UV8HI, UV8HI ++ v8i16_r = __lsx_vsle_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsle.hu( ++ ++ // __lsx_vsle_wu ++ // vd, vj, vk ++ // V4SI, UV4SI, UV4SI ++ v4i32_r = __lsx_vsle_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsle.wu( ++ ++ // __lsx_vsle_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vsle_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsle.du( ++ ++ // __lsx_vslei_bu ++ // vd, vj, ui5 ++ // V16QI, UV16QI, UQI ++ v16i8_r = __lsx_vslei_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslei.bu( ++ ++ // __lsx_vslei_hu ++ // vd, vj, ui5 ++ // V8HI, UV8HI, UQI ++ v8i16_r = __lsx_vslei_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslei.hu( ++ ++ // __lsx_vslei_wu ++ // vd, vj, ui5 ++ // V4SI, UV4SI, UQI ++ v4i32_r = __lsx_vslei_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslei.wu( ++ ++ // __lsx_vslei_du ++ // vd, vj, ui5 ++ // V2DI, UV2DI, UQI ++ v2i64_r = __lsx_vslei_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslei.du( ++ ++ // __lsx_vsat_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vsat_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsat.b( ++ ++ // __lsx_vsat_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vsat_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsat.h( ++ ++ // __lsx_vsat_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vsat_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsat.w( ++ ++ // __lsx_vsat_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vsat_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsat.d( ++ ++ // __lsx_vsat_bu ++ // vd, vj, ui3 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vsat_bu(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsat.bu( ++ ++ // __lsx_vsat_hu ++ // vd, vj, ui4 ++ // UV8HI, UV8HI, UQI ++ v8u16_r = __lsx_vsat_hu(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsat.hu( ++ ++ // __lsx_vsat_wu ++ // vd, vj, ui5 ++ // UV4SI, UV4SI, UQI ++ v4u32_r = __lsx_vsat_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsat.wu( ++ ++ // __lsx_vsat_du ++ // vd, vj, ui6 ++ // UV2DI, UV2DI, UQI ++ v2u64_r = __lsx_vsat_du(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsat.du( ++ ++ // __lsx_vadda_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vadda_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vadda.b( ++ ++ // __lsx_vadda_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vadda_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vadda.h( ++ ++ // __lsx_vadda_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vadda_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vadda.w( ++ ++ // __lsx_vadda_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vadda_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadda.d( ++ ++ // __lsx_vsadd_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsadd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsadd.b( ++ ++ // __lsx_vsadd_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsadd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsadd.h( ++ ++ // __lsx_vsadd_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsadd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsadd.w( ++ ++ // __lsx_vsadd_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsadd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsadd.d( ++ ++ // __lsx_vsadd_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vsadd_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsadd.bu( ++ ++ // __lsx_vsadd_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vsadd_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsadd.hu( ++ ++ // __lsx_vsadd_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vsadd_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsadd.wu( ++ ++ // __lsx_vsadd_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vsadd_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsadd.du( ++ ++ // __lsx_vavg_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vavg_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavg.b( ++ ++ // __lsx_vavg_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vavg_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavg.h( ++ ++ // __lsx_vavg_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vavg_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavg.w( ++ ++ // __lsx_vavg_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vavg_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavg.d( ++ ++ // __lsx_vavg_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vavg_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavg.bu( ++ ++ // __lsx_vavg_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vavg_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavg.hu( ++ ++ // __lsx_vavg_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vavg_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavg.wu( ++ ++ // __lsx_vavg_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vavg_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavg.du( ++ ++ // __lsx_vavgr_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vavgr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavgr.b( ++ ++ // __lsx_vavgr_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vavgr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavgr.h( ++ ++ // __lsx_vavgr_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vavgr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavgr.w( ++ ++ // __lsx_vavgr_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vavgr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavgr.d( ++ ++ // __lsx_vavgr_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vavgr_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavgr.bu( ++ ++ // __lsx_vavgr_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vavgr_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavgr.hu( ++ ++ // __lsx_vavgr_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vavgr_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavgr.wu( ++ ++ // __lsx_vavgr_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vavgr_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavgr.du( ++ ++ // __lsx_vssub_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vssub_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssub.b( ++ ++ // __lsx_vssub_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vssub_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssub.h( ++ ++ // __lsx_vssub_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vssub_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssub.w( ++ ++ // __lsx_vssub_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vssub_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssub.d( ++ ++ // __lsx_vssub_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vssub_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssub.bu( ++ ++ // __lsx_vssub_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vssub_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssub.hu( ++ ++ // __lsx_vssub_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vssub_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssub.wu( ++ ++ // __lsx_vssub_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vssub_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssub.du( ++ ++ // __lsx_vabsd_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vabsd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vabsd.b( ++ ++ // __lsx_vabsd_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vabsd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vabsd.h( ++ ++ // __lsx_vabsd_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vabsd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vabsd.w( ++ ++ // __lsx_vabsd_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vabsd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vabsd.d( ++ ++ // __lsx_vabsd_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vabsd_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vabsd.bu( ++ ++ // __lsx_vabsd_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vabsd_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vabsd.hu( ++ ++ // __lsx_vabsd_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vabsd_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vabsd.wu( ++ ++ // __lsx_vabsd_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vabsd_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vabsd.du( ++ ++ // __lsx_vmul_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmul_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmul.b( ++ ++ // __lsx_vmul_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmul_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmul.h( ++ ++ // __lsx_vmul_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmul_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmul.w( ++ ++ // __lsx_vmul_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmul_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmul.d( ++ ++ // __lsx_vmadd_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmadd_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmadd.b( ++ ++ // __lsx_vmadd_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmadd_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmadd.h( ++ ++ // __lsx_vmadd_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmadd_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmadd.w( ++ ++ // __lsx_vmadd_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmadd_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmadd.d( ++ ++ // __lsx_vmsub_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmsub_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmsub.b( ++ ++ // __lsx_vmsub_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmsub_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmsub.h( ++ ++ // __lsx_vmsub_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmsub_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmsub.w( ++ ++ // __lsx_vmsub_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmsub_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmsub.d( ++ ++ // __lsx_vdiv_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vdiv_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vdiv.b( ++ ++ // __lsx_vdiv_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vdiv_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vdiv.h( ++ ++ // __lsx_vdiv_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vdiv_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vdiv.w( ++ ++ // __lsx_vdiv_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vdiv_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vdiv.d( ++ ++ // __lsx_vdiv_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vdiv_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vdiv.bu( ++ ++ // __lsx_vdiv_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vdiv_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vdiv.hu( ++ ++ // __lsx_vdiv_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vdiv_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vdiv.wu( ++ ++ // __lsx_vdiv_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vdiv_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vdiv.du( ++ ++ // __lsx_vhaddw_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vhaddw_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b( ++ ++ // __lsx_vhaddw_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vhaddw_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h( ++ ++ // __lsx_vhaddw_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vhaddw_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w( ++ ++ // __lsx_vhaddw_hu_bu ++ // vd, vj, vk ++ // UV8HI, UV16QI, UV16QI ++ v8u16_r = __lsx_vhaddw_hu_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu( ++ ++ // __lsx_vhaddw_wu_hu ++ // vd, vj, vk ++ // UV4SI, UV8HI, UV8HI ++ v4u32_r = __lsx_vhaddw_wu_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu( ++ ++ // __lsx_vhaddw_du_wu ++ // vd, vj, vk ++ // UV2DI, UV4SI, UV4SI ++ v2u64_r = __lsx_vhaddw_du_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu( ++ ++ // __lsx_vhsubw_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vhsubw_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b( ++ ++ // __lsx_vhsubw_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vhsubw_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h( ++ ++ // __lsx_vhsubw_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vhsubw_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w( ++ ++ // __lsx_vhsubw_hu_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vhsubw_hu_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu( ++ ++ // __lsx_vhsubw_wu_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vhsubw_wu_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu( ++ ++ // __lsx_vhsubw_du_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vhsubw_du_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu( ++ ++ // __lsx_vmod_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmod.b( ++ ++ // __lsx_vmod_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmod.h( ++ ++ // __lsx_vmod_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmod.w( ++ ++ // __lsx_vmod_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmod.d( ++ ++ // __lsx_vmod_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vmod_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmod.bu( ++ ++ // __lsx_vmod_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vmod_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmod.hu( ++ ++ // __lsx_vmod_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vmod_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmod.wu( ++ ++ // __lsx_vmod_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmod_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmod.du( ++ ++ // __lsx_vreplve_b ++ // vd, vj, rk ++ // V16QI, V16QI, SI ++ v16i8_r = __lsx_vreplve_b(v16i8_a, i32_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplve.b( ++ ++ // __lsx_vreplve_h ++ // vd, vj, rk ++ // V8HI, V8HI, SI ++ v8i16_r = __lsx_vreplve_h(v8i16_a, i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplve.h( ++ ++ // __lsx_vreplve_w ++ // vd, vj, rk ++ // V4SI, V4SI, SI ++ v4i32_r = __lsx_vreplve_w(v4i32_a, i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplve.w( ++ ++ // __lsx_vreplve_d ++ // vd, vj, rk ++ // V2DI, V2DI, SI ++ v2i64_r = __lsx_vreplve_d(v2i64_a, i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplve.d( ++ ++ // __lsx_vreplvei_b ++ // vd, vj, ui4 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vreplvei_b(v16i8_a, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplvei.b( ++ ++ // __lsx_vreplvei_h ++ // vd, vj, ui3 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vreplvei_h(v8i16_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplvei.h( ++ ++ // __lsx_vreplvei_w ++ // vd, vj, ui2 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vreplvei_w(v4i32_a, ui2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplvei.w( ++ ++ // __lsx_vreplvei_d ++ // vd, vj, ui1 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vreplvei_d(v2i64_a, ui1); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplvei.d( ++ ++ // __lsx_vpickev_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vpickev_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpickev.b( ++ ++ // __lsx_vpickev_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vpickev_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpickev.h( ++ ++ // __lsx_vpickev_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vpickev_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpickev.w( ++ ++ // __lsx_vpickev_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vpickev_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpickev.d( ++ ++ // __lsx_vpickod_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vpickod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpickod.b( ++ ++ // __lsx_vpickod_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vpickod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpickod.h( ++ ++ // __lsx_vpickod_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vpickod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpickod.w( ++ ++ // __lsx_vpickod_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vpickod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpickod.d( ++ ++ // __lsx_vilvh_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vilvh_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vilvh.b( ++ ++ // __lsx_vilvh_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vilvh_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vilvh.h( ++ ++ // __lsx_vilvh_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vilvh_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vilvh.w( ++ ++ // __lsx_vilvh_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vilvh_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vilvh.d( ++ ++ // __lsx_vilvl_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vilvl_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vilvl.b( ++ ++ // __lsx_vilvl_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vilvl_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vilvl.h( ++ ++ // __lsx_vilvl_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vilvl_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vilvl.w( ++ ++ // __lsx_vilvl_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vilvl_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vilvl.d( ++ ++ // __lsx_vpackev_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vpackev_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpackev.b( ++ ++ // __lsx_vpackev_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vpackev_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpackev.h( ++ ++ // __lsx_vpackev_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vpackev_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpackev.w( ++ ++ // __lsx_vpackev_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vpackev_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpackev.d( ++ ++ // __lsx_vpackod_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vpackod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpackod.b( ++ ++ // __lsx_vpackod_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vpackod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpackod.h( ++ ++ // __lsx_vpackod_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vpackod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpackod.w( ++ ++ // __lsx_vpackod_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vpackod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpackod.d( ++ ++ // __lsx_vshuf_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vshuf_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vshuf.h( ++ ++ // __lsx_vshuf_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vshuf_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vshuf.w( ++ ++ // __lsx_vshuf_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vshuf_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vshuf.d( ++ ++ // __lsx_vand_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vand_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vand.v( ++ ++ // __lsx_vandi_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vandi_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vandi.b( ++ ++ // __lsx_vor_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vor.v( ++ ++ // __lsx_vori_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vori.b( ++ ++ // __lsx_vnor_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vnor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vnor.v( ++ ++ // __lsx_vnori_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vnori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vnori.b( ++ ++ // __lsx_vxor_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vxor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vxor.v( ++ ++ // __lsx_vxori_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vxori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vxori.b( ++ ++ // __lsx_vbitsel_v ++ // vd, vj, vk, va ++ // UV16QI, UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vbitsel_v(v16u8_a, v16u8_b, v16u8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitsel.v( ++ ++ // __lsx_vbitseli_b ++ // vd, vj, ui8 ++ // UV16QI, UV16QI, UV16QI, UQI ++ v16u8_r = __lsx_vbitseli_b(v16u8_a, v16u8_b, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitseli.b( ++ ++ // __lsx_vshuf4i_b ++ // vd, vj, ui8 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vshuf4i_b(v16i8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b( ++ ++ // __lsx_vshuf4i_h ++ // vd, vj, ui8 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vshuf4i_h(v8i16_a, ui8); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h( ++ ++ // __lsx_vshuf4i_w ++ // vd, vj, ui8 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vshuf4i_w(v4i32_a, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w( ++ ++ // __lsx_vreplgr2vr_b ++ // vd, rj ++ // V16QI, SI ++ v16i8_r = __lsx_vreplgr2vr_b(i32_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b( ++ ++ // __lsx_vreplgr2vr_h ++ // vd, rj ++ // V8HI, SI ++ v8i16_r = __lsx_vreplgr2vr_h(i32_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h( ++ ++ // __lsx_vreplgr2vr_w ++ // vd, rj ++ // V4SI, SI ++ v4i32_r = __lsx_vreplgr2vr_w(i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w( ++ ++ // __lsx_vreplgr2vr_d ++ // vd, rj ++ // V2DI, DI ++ v2i64_r = __lsx_vreplgr2vr_d(i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d( ++ ++ // __lsx_vpcnt_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vpcnt_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpcnt.b( ++ ++ // __lsx_vpcnt_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vpcnt_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpcnt.h( ++ ++ // __lsx_vpcnt_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vpcnt_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpcnt.w( ++ ++ // __lsx_vpcnt_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vpcnt_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpcnt.d( ++ ++ // __lsx_vclo_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vclo_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vclo.b( ++ ++ // __lsx_vclo_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vclo_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vclo.h( ++ ++ // __lsx_vclo_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vclo_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vclo.w( ++ ++ // __lsx_vclo_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vclo_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vclo.d( ++ ++ // __lsx_vclz_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vclz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vclz.b( ++ ++ // __lsx_vclz_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vclz_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vclz.h( ++ ++ // __lsx_vclz_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vclz_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vclz.w( ++ ++ // __lsx_vclz_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vclz_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vclz.d( ++ ++ // __lsx_vpickve2gr_b ++ // rd, vj, ui4 ++ // SI, V16QI, UQI ++ i32_r = __lsx_vpickve2gr_b(v16i8_a, ui4); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.b( ++ ++ // __lsx_vpickve2gr_h ++ // rd, vj, ui3 ++ // SI, V8HI, UQI ++ i32_r = __lsx_vpickve2gr_h(v8i16_a, ui3); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.h( ++ ++ // __lsx_vpickve2gr_w ++ // rd, vj, ui2 ++ // SI, V4SI, UQI ++ i32_r = __lsx_vpickve2gr_w(v4i32_a, ui2); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.w( ++ ++ // __lsx_vpickve2gr_d ++ // rd, vj, ui1 ++ // DI, V2DI, UQI ++ i64_r = __lsx_vpickve2gr_d(v2i64_a, ui1); // CHECK: call i64 @llvm.loongarch.lsx.vpickve2gr.d( ++ ++ // __lsx_vpickve2gr_bu ++ // rd, vj, ui4 ++ // USI, V16QI, UQI ++ u32_r = __lsx_vpickve2gr_bu(v16i8_a, ui4); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.bu( ++ ++ // __lsx_vpickve2gr_hu ++ // rd, vj, ui3 ++ // USI, V8HI, UQI ++ u32_r = __lsx_vpickve2gr_hu(v8i16_a, ui3); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.hu( ++ ++ // __lsx_vpickve2gr_wu ++ // rd, vj, ui2 ++ // USI, V4SI, UQI ++ u32_r = __lsx_vpickve2gr_wu(v4i32_a, ui2); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.wu( ++ ++ // __lsx_vpickve2gr_du ++ // rd, vj, ui1 ++ // UDI, V2DI, UQI ++ u64_r = __lsx_vpickve2gr_du(v2i64_a, ui1); // CHECK: call i64 @llvm.loongarch.lsx.vpickve2gr.du( ++ ++ // __lsx_vinsgr2vr_b ++ // vd, rj, ui4 ++ // V16QI, V16QI, SI, UQI ++ v16i8_r = __lsx_vinsgr2vr_b(v16i8_a, i32_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b( ++ ++ // __lsx_vinsgr2vr_h ++ // vd, rj, ui3 ++ // V8HI, V8HI, SI, UQI ++ v8i16_r = __lsx_vinsgr2vr_h(v8i16_a, i32_b, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h( ++ ++ // __lsx_vinsgr2vr_w ++ // vd, rj, ui2 ++ // V4SI, V4SI, SI, UQI ++ v4i32_r = __lsx_vinsgr2vr_w(v4i32_a, i32_b, ui2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w( ++ ++ // __lsx_vinsgr2vr_d ++ // vd, rj, ui1 ++ // V2DI, V2DI, SI, UQI ++ v2i64_r = __lsx_vinsgr2vr_d(v2i64_a, i32_b, ui1); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d( ++ ++ // __lsx_vfcmp_caf_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_caf_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s( ++ ++ // __lsx_vfcmp_caf_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_caf_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d( ++ ++ // __lsx_vfcmp_cor_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cor_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s( ++ ++ // __lsx_vfcmp_cor_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cor_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d( ++ ++ // __lsx_vfcmp_cun_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cun_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s( ++ ++ // __lsx_vfcmp_cun_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cun_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d( ++ ++ // __lsx_vfcmp_cune_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cune_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s( ++ ++ // __lsx_vfcmp_cune_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cune_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d( ++ ++ // __lsx_vfcmp_cueq_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cueq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s( ++ ++ // __lsx_vfcmp_cueq_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cueq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d( ++ ++ // __lsx_vfcmp_ceq_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_ceq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s( ++ ++ // __lsx_vfcmp_ceq_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_ceq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d( ++ ++ // __lsx_vfcmp_cne_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cne_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s( ++ ++ // __lsx_vfcmp_cne_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cne_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d( ++ ++ // __lsx_vfcmp_clt_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_clt_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s( ++ ++ // __lsx_vfcmp_clt_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_clt_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d( ++ ++ // __lsx_vfcmp_cult_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cult_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s( ++ ++ // __lsx_vfcmp_cult_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cult_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d( ++ ++ // __lsx_vfcmp_cle_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cle_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s( ++ ++ // __lsx_vfcmp_cle_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cle_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d( ++ ++ // __lsx_vfcmp_cule_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_cule_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s( ++ ++ // __lsx_vfcmp_cule_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_cule_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d( ++ ++ // __lsx_vfcmp_saf_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_saf_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s( ++ ++ // __lsx_vfcmp_saf_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_saf_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d( ++ ++ // __lsx_vfcmp_sor_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sor_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s( ++ ++ // __lsx_vfcmp_sor_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sor_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d( ++ ++ // __lsx_vfcmp_sun_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sun_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s( ++ ++ // __lsx_vfcmp_sun_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sun_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d( ++ ++ // __lsx_vfcmp_sune_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sune_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s( ++ ++ // __lsx_vfcmp_sune_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sune_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d( ++ ++ // __lsx_vfcmp_sueq_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sueq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s( ++ ++ // __lsx_vfcmp_sueq_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sueq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d( ++ ++ // __lsx_vfcmp_seq_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_seq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s( ++ ++ // __lsx_vfcmp_seq_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_seq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d( ++ ++ // __lsx_vfcmp_sne_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sne_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s( ++ ++ // __lsx_vfcmp_sne_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sne_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d( ++ ++ // __lsx_vfcmp_slt_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_slt_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s( ++ ++ // __lsx_vfcmp_slt_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_slt_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d( ++ ++ // __lsx_vfcmp_sult_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sult_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s( ++ ++ // __lsx_vfcmp_sult_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sult_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d( ++ ++ // __lsx_vfcmp_sle_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sle_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s( ++ ++ // __lsx_vfcmp_sle_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sle_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d( ++ ++ // __lsx_vfcmp_sule_s ++ // vd, vj, vk ++ // V4SI, V4SF, V4SF ++ v4i32_r = __lsx_vfcmp_sule_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s( ++ ++ // __lsx_vfcmp_sule_d ++ // vd, vj, vk ++ // V2DI, V2DF, V2DF ++ v2i64_r = __lsx_vfcmp_sule_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d( ++ ++ // __lsx_vfadd_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfadd_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfadd.s( ++ // __lsx_vfadd_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfadd_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfadd.d( ++ ++ // __lsx_vfsub_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfsub_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfsub.s( ++ ++ // __lsx_vfsub_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfsub_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfsub.d( ++ ++ // __lsx_vfmul_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmul_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmul.s( ++ ++ // __lsx_vfmul_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmul_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmul.d( ++ ++ // __lsx_vfdiv_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfdiv_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfdiv.s( ++ ++ // __lsx_vfdiv_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfdiv_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfdiv.d( ++ ++ // __lsx_vfcvt_h_s ++ // vd, vj, vk ++ // V8HI, V4SF, V4SF ++ v8i16_r = __lsx_vfcvt_h_s(v4f32_a, v4f32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s( ++ ++ // __lsx_vfcvt_s_d ++ // vd, vj, vk ++ // V4SF, V2DF, V2DF ++ v4f32_r = __lsx_vfcvt_s_d(v2f64_a, v2f64_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d( ++ ++ // __lsx_vfmin_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmin_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmin.s( ++ ++ // __lsx_vfmin_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmin_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmin.d( ++ ++ // __lsx_vfmina_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmina_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmina.s( ++ ++ // __lsx_vfmina_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmina_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmina.d( ++ ++ // __lsx_vfmax_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmax_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmax.s( ++ ++ // __lsx_vfmax_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmax_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmax.d( ++ ++ // __lsx_vfmaxa_s ++ // vd, vj, vk ++ // V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmaxa_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmaxa.s( ++ ++ // __lsx_vfmaxa_d ++ // vd, vj, vk ++ // V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmaxa_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmaxa.d( ++ ++ // __lsx_vfclass_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vfclass_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfclass.s( ++ ++ // __lsx_vfclass_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vfclass_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfclass.d( ++ ++ // __lsx_vfsqrt_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfsqrt_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfsqrt.s( ++ ++ // __lsx_vfsqrt_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfsqrt_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfsqrt.d( ++ ++ // __lsx_vfrecip_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrecip_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrecip.s( ++ ++ // __lsx_vfrecip_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrecip_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrecip.d( ++ ++ // __lsx_vfrint_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrint_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrint.s( ++ ++ // __lsx_vfrint_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrint_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrint.d( ++ ++ // __lsx_vfrsqrt_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vfrsqrt_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s( ++ ++ // __lsx_vfrsqrt_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vfrsqrt_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d( ++ ++ // __lsx_vflogb_s ++ // vd, vj ++ // V4SF, V4SF ++ v4f32_r = __lsx_vflogb_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vflogb.s( ++ ++ // __lsx_vflogb_d ++ // vd, vj ++ // V2DF, V2DF ++ v2f64_r = __lsx_vflogb_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vflogb.d( ++ ++ // __lsx_vfcvth_s_h ++ // vd, vj ++ // V4SF, V8HI ++ v4f32_r = __lsx_vfcvth_s_h(v8i16_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h( ++ ++ // __lsx_vfcvth_d_s ++ // vd, vj ++ // V2DF, V4SF ++ v2f64_r = __lsx_vfcvth_d_s(v4f32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s( ++ ++ //gcc build fail ++ ++ // __lsx_vfcvtl_s_h ++ // vd, vj ++ // V4SF, V8HI ++ v4f32_r = __lsx_vfcvtl_s_h(v8i16_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h( ++ ++ // __lsx_vfcvtl_d_s ++ // vd, vj ++ // V2DF, V4SF ++ v2f64_r = __lsx_vfcvtl_d_s(v4f32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s( ++ ++ // __lsx_vftint_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftint_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.w.s( ++ ++ // __lsx_vftint_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftint_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftint.l.d( ++ ++ // __lsx_vftint_wu_s ++ // vd, vj ++ // UV4SI, V4SF ++ v4u32_r = __lsx_vftint_wu_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s( ++ ++ // __lsx_vftint_lu_d ++ // vd, vj ++ // UV2DI, V2DF ++ v2u64_r = __lsx_vftint_lu_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d( ++ ++ // __lsx_vftintrz_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftintrz_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s( ++ ++ // __lsx_vftintrz_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftintrz_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d( ++ ++ // __lsx_vftintrz_wu_s ++ // vd, vj ++ // UV4SI, V4SF ++ v4u32_r = __lsx_vftintrz_wu_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s( ++ ++ // __lsx_vftintrz_lu_d ++ // vd, vj ++ // UV2DI, V2DF ++ v2u64_r = __lsx_vftintrz_lu_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d( ++ ++ // __lsx_vffint_s_w ++ // vd, vj ++ // V4SF, V4SI ++ v4f32_r = __lsx_vffint_s_w(v4i32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.w( ++ ++ // __lsx_vffint_d_l ++ // vd, vj ++ // V2DF, V2DI ++ v2f64_r = __lsx_vffint_d_l(v2i64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffint.d.l( ++ ++ // __lsx_vffint_s_wu ++ // vd, vj ++ // V4SF, UV4SI ++ v4f32_r = __lsx_vffint_s_wu(v4u32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.wu( ++ ++ // __lsx_vffint_d_lu ++ // vd, vj ++ // V2DF, UV2DI ++ v2f64_r = __lsx_vffint_d_lu(v2u64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffint.d.lu( ++ ++ // __lsx_vandn_v ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vandn_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vandn.v( ++ ++ // __lsx_vneg_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vneg_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vneg.b( ++ ++ // __lsx_vneg_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vneg_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vneg.h( ++ ++ // __lsx_vneg_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vneg_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vneg.w( ++ ++ // __lsx_vneg_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vneg_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vneg.d( ++ ++ // __lsx_vmuh_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vmuh_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmuh.b( ++ ++ // __lsx_vmuh_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vmuh_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmuh.h( ++ ++ // __lsx_vmuh_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vmuh_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmuh.w( ++ ++ // __lsx_vmuh_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmuh_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmuh.d( ++ ++ // __lsx_vmuh_bu ++ // vd, vj, vk ++ // UV16QI, UV16QI, UV16QI ++ v16u8_r = __lsx_vmuh_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmuh.bu( ++ ++ // __lsx_vmuh_hu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV8HI ++ v8u16_r = __lsx_vmuh_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmuh.hu( ++ ++ // __lsx_vmuh_wu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV4SI ++ v4u32_r = __lsx_vmuh_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmuh.wu( ++ ++ // __lsx_vmuh_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmuh_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmuh.du( ++ ++ // __lsx_vsllwil_h_b ++ // vd, vj, ui3 ++ // V8HI, V16QI, UQI ++ v8i16_r = __lsx_vsllwil_h_b(v16i8_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b( ++ ++ // __lsx_vsllwil_w_h ++ // vd, vj, ui4 ++ // V4SI, V8HI, UQI ++ v4i32_r = __lsx_vsllwil_w_h(v8i16_a, ui4); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h( ++ ++ // __lsx_vsllwil_d_w ++ // vd, vj, ui5 ++ // V2DI, V4SI, UQI ++ v2i64_r = __lsx_vsllwil_d_w(v4i32_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w( ++ ++ // __lsx_vsllwil_hu_bu ++ // vd, vj, ui3 ++ // UV8HI, UV16QI, UQI ++ v8u16_r = __lsx_vsllwil_hu_bu(v16u8_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu( ++ ++ // __lsx_vsllwil_wu_hu ++ // vd, vj, ui4 ++ // UV4SI, UV8HI, UQI ++ v4u32_r = __lsx_vsllwil_wu_hu(v8u16_a, ui4); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu( ++ ++ // __lsx_vsllwil_du_wu ++ // vd, vj, ui5 ++ // UV2DI, UV4SI, UQI ++ v2u64_r = __lsx_vsllwil_du_wu(v4u32_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu( ++ ++ // __lsx_vsran_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vsran_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsran.b.h( ++ ++ // __lsx_vsran_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vsran_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsran.h.w( ++ ++ // __lsx_vsran_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vsran_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsran.w.d( ++ ++ // __lsx_vssran_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vssran_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssran.b.h( ++ ++ // __lsx_vssran_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vssran_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssran.h.w( ++ ++ // __lsx_vssran_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vssran_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssran.w.d( ++ ++ // __lsx_vssran_bu_h ++ // vd, vj, vk ++ // UV16QI, UV8HI, UV8HI ++ v16u8_r = __lsx_vssran_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h( ++ ++ // __lsx_vssran_hu_w ++ // vd, vj, vk ++ // UV8HI, UV4SI, UV4SI ++ v8u16_r = __lsx_vssran_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w( ++ ++ // __lsx_vssran_wu_d ++ // vd, vj, vk ++ // UV4SI, UV2DI, UV2DI ++ v4u32_r = __lsx_vssran_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d( ++ ++ // __lsx_vsrarn_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vsrarn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h( ++ ++ // __lsx_vsrarn_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vsrarn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w( ++ ++ // __lsx_vsrarn_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vsrarn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d( ++ ++ // __lsx_vssrarn_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vssrarn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h( ++ ++ // __lsx_vssrarn_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vssrarn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w( ++ ++ // __lsx_vssrarn_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vssrarn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d( ++ ++ // __lsx_vssrarn_bu_h ++ // vd, vj, vk ++ // UV16QI, UV8HI, UV8HI ++ v16u8_r = __lsx_vssrarn_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h( ++ ++ // __lsx_vssrarn_hu_w ++ // vd, vj, vk ++ // UV8HI, UV4SI, UV4SI ++ v8u16_r = __lsx_vssrarn_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w( ++ ++ // __lsx_vssrarn_wu_d ++ // vd, vj, vk ++ // UV4SI, UV2DI, UV2DI ++ v4u32_r = __lsx_vssrarn_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d( ++ ++ // __lsx_vsrln_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vsrln_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h( ++ ++ // __lsx_vsrln_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vsrln_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w( ++ ++ // __lsx_vsrln_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vsrln_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d( ++ ++ // __lsx_vssrln_bu_h ++ // vd, vj, vk ++ // UV16QI, UV8HI, UV8HI ++ v16u8_r = __lsx_vssrln_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h( ++ ++ // __lsx_vssrln_hu_w ++ // vd, vj, vk ++ // UV8HI, UV4SI, UV4SI ++ v8u16_r = __lsx_vssrln_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w( ++ ++ // __lsx_vssrln_wu_d ++ // vd, vj, vk ++ // UV4SI, UV2DI, UV2DI ++ v4u32_r = __lsx_vssrln_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d( ++ ++ // __lsx_vsrlrn_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vsrlrn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h( ++ ++ // __lsx_vsrlrn_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vsrlrn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w( ++ ++ // __lsx_vsrlrn_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vsrlrn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d( ++ ++ // __lsx_vssrlrn_bu_h ++ // vd, vj, vk ++ // UV16QI, UV8HI, UV8HI ++ v16u8_r = __lsx_vssrlrn_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h( ++ ++ // __lsx_vssrlrn_hu_w ++ // vd, vj, vk ++ // UV8HI, UV4SI, UV4SI ++ v8u16_r = __lsx_vssrlrn_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w( ++ ++ // __lsx_vssrlrn_wu_d ++ // vd, vj, vk ++ // UV4SI, UV2DI, UV2DI ++ v4u32_r = __lsx_vssrlrn_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d( ++ ++ // __lsx_vfrstpi_b ++ // vd, vj, ui5 ++ // V16QI, V16QI, V16QI, UQI ++ v16i8_r = __lsx_vfrstpi_b(v16i8_a, v16i8_b, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b( ++ ++ // __lsx_vfrstpi_h ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, UQI ++ v8i16_r = __lsx_vfrstpi_h(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h( ++ ++ // __lsx_vfrstp_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vfrstp_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vfrstp.b( ++ ++ // __lsx_vfrstp_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vfrstp_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfrstp.h( ++ ++ // __lsx_vshuf4i_d ++ // vd, vj, ui8 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vshuf4i_d(v2i64_a, v2i64_b, ui8); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d( ++ ++ // __lsx_vbsrl_v ++ // vd, vj, ui5 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vbsrl_v(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbsrl.v( ++ ++ // __lsx_vbsll_v ++ // vd, vj, ui5 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vbsll_v(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbsll.v( ++ ++ // __lsx_vextrins_b ++ // vd, vj, ui8 ++ // V16QI, V16QI, V16QI, UQI ++ v16i8_r = __lsx_vextrins_b(v16i8_a, v16i8_b, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vextrins.b( ++ ++ // __lsx_vextrins_h ++ // vd, vj, ui8 ++ // V8HI, V8HI, V8HI, UQI ++ v8i16_r = __lsx_vextrins_h(v8i16_a, v8i16_b, ui8); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vextrins.h( ++ ++ // __lsx_vextrins_w ++ // vd, vj, ui8 ++ // V4SI, V4SI, V4SI, UQI ++ v4i32_r = __lsx_vextrins_w(v4i32_a, v4i32_b, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vextrins.w( ++ ++ // __lsx_vextrins_d ++ // vd, vj, ui8 ++ // V2DI, V2DI, V2DI, UQI ++ v2i64_r = __lsx_vextrins_d(v2i64_a, v2i64_b, ui8); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextrins.d( ++ ++ // __lsx_vmskltz_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vmskltz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmskltz.b( ++ ++ // __lsx_vmskltz_h ++ // vd, vj ++ // V8HI, V8HI ++ v8i16_r = __lsx_vmskltz_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmskltz.h( ++ ++ // __lsx_vmskltz_w ++ // vd, vj ++ // V4SI, V4SI ++ v4i32_r = __lsx_vmskltz_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmskltz.w( ++ ++ // __lsx_vmskltz_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vmskltz_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmskltz.d( ++ ++ // __lsx_vsigncov_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vsigncov_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsigncov.b( ++ ++ // __lsx_vsigncov_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vsigncov_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsigncov.h( ++ ++ // __lsx_vsigncov_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vsigncov_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsigncov.w( ++ ++ // __lsx_vsigncov_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsigncov_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsigncov.d( ++ ++ // __lsx_vfmadd_s ++ // vd, vj, vk, va ++ // V4SF, V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmadd_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmadd.s( ++ ++ // __lsx_vfmadd_d ++ // vd, vj, vk, va ++ // V2DF, V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmadd_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmadd.d( ++ ++ // __lsx_vfmsub_s ++ // vd, vj, vk, va ++ // V4SF, V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfmsub_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmsub.s( ++ ++ // __lsx_vfmsub_d ++ // vd, vj, vk, va ++ // V2DF, V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfmsub_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmsub.d( ++ ++ // __lsx_vfnmadd_s ++ // vd, vj, vk, va ++ // V4SF, V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfnmadd_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfnmadd.s( ++ ++ // __lsx_vfnmadd_d ++ // vd, vj, vk, va ++ // V2DF, V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfnmadd_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfnmadd.d( ++ ++ // __lsx_vfnmsub_s ++ // vd, vj, vk, va ++ // V4SF, V4SF, V4SF, V4SF ++ v4f32_r = __lsx_vfnmsub_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfnmsub.s( ++ ++ // __lsx_vfnmsub_d ++ // vd, vj, vk, va ++ // V2DF, V2DF, V2DF, V2DF ++ v2f64_r = __lsx_vfnmsub_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfnmsub.d( ++ ++ // __lsx_vftintrne_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftintrne_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s( ++ ++ // __lsx_vftintrne_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftintrne_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d( ++ ++ // __lsx_vftintrp_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftintrp_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s( ++ ++ // __lsx_vftintrp_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftintrp_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d( ++ ++ // __lsx_vftintrm_w_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vftintrm_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s( ++ ++ // __lsx_vftintrm_l_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vftintrm_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d( ++ ++ // __lsx_vftint_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftint_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.w.d( ++ ++ // __lsx_vffint_s_l ++ // vd, vj, vk ++ // V4SF, V2DI, V2DI ++ v4f32_r = __lsx_vffint_s_l(v2i64_a, v2i64_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.l( ++ ++ // __lsx_vftintrz_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftintrz_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d( ++ ++ // __lsx_vftintrp_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftintrp_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d( ++ ++ // __lsx_vftintrm_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftintrm_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d( ++ ++ // __lsx_vftintrne_w_d ++ // vd, vj, vk ++ // V4SI, V2DF, V2DF ++ v4i32_r = __lsx_vftintrne_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d( ++ ++ // __lsx_vftintl_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s( ++ ++ // __lsx_vftinth_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftinth_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s( ++ ++ // __lsx_vffinth_d_w ++ // vd, vj ++ // V2DF, V4SI ++ v2f64_r = __lsx_vffinth_d_w(v4i32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffinth.d.w( ++ ++ // __lsx_vffintl_d_w ++ // vd, vj ++ // V2DF, V4SI ++ v2f64_r = __lsx_vffintl_d_w(v4i32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffintl.d.w( ++ ++ // __lsx_vftintrzl_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrzl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s( ++ ++ // __lsx_vftintrzh_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrzh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s( ++ ++ // __lsx_vftintrpl_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrpl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s( ++ ++ // __lsx_vftintrph_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrph_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s( ++ ++ // __lsx_vftintrml_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrml_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s( ++ ++ // __lsx_vftintrmh_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrmh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s( ++ ++ // __lsx_vftintrnel_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrnel_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s( ++ ++ // __lsx_vftintrneh_l_s ++ // vd, vj ++ // V2DI, V4SF ++ v2i64_r = __lsx_vftintrneh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s( ++ ++ // __lsx_vfrintrne_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vfrintrne_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfrintrne.s( ++ ++ // __lsx_vfrintrne_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vfrintrne_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfrintrne.d( ++ ++ // __lsx_vfrintrz_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vfrintrz_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfrintrz.s( ++ ++ // __lsx_vfrintrz_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vfrintrz_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfrintrz.d( ++ ++ // __lsx_vfrintrp_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vfrintrp_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfrintrp.s( ++ ++ // __lsx_vfrintrp_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vfrintrp_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfrintrp.d( ++ ++ // __lsx_vfrintrm_s ++ // vd, vj ++ // V4SI, V4SF ++ v4i32_r = __lsx_vfrintrm_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfrintrm.s( ++ ++ // __lsx_vfrintrm_d ++ // vd, vj ++ // V2DI, V2DF ++ v2i64_r = __lsx_vfrintrm_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfrintrm.d( ++ ++ // __lsx_vstelm_b ++ // vd, rj, si8, idx ++ // VOID, V16QI, CVPOINTER, SI, UQI ++ __lsx_vstelm_b(v16i8_a, &v16i8_b, 0, idx4); // CHECK: call void @llvm.loongarch.lsx.vstelm.b( ++ // __lsx_vstelm_h ++ // vd, rj, si8, idx ++ // VOID, V8HI, CVPOINTER, SI, UQI ++ __lsx_vstelm_h(v8i16_a, &v8i16_b, 0, idx3); // CHECK: call void @llvm.loongarch.lsx.vstelm.h( ++ ++ // __lsx_vstelm_w ++ // vd, rj, si8, idx ++ // VOID, V4SI, CVPOINTER, SI, UQI ++ __lsx_vstelm_w(v4i32_a, &v4i32_b, 0, idx2); // CHECK: call void @llvm.loongarch.lsx.vstelm.w( ++ ++ // __lsx_vstelm_d ++ // vd, rj, si8, idx ++ // VOID, V2DI, CVPOINTER, SI, UQI ++ __lsx_vstelm_d(v2i64_a, &v2i64_b, 0, idx1); // CHECK: call void @llvm.loongarch.lsx.vstelm.d( ++ ++ // __lsx_vaddwev_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vaddwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w( ++ ++ // __lsx_vaddwev_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vaddwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h( ++ ++ // __lsx_vaddwev_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vaddwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b( ++ ++ // __lsx_vaddwod_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vaddwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w( ++ ++ // __lsx_vaddwod_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vaddwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h( ++ ++ // __lsx_vaddwod_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vaddwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b( ++ ++ // __lsx_vaddwev_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vaddwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu( ++ ++ // __lsx_vaddwev_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vaddwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu( ++ ++ // __lsx_vaddwev_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vaddwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu( ++ ++ // __lsx_vaddwod_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vaddwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu( ++ ++ // __lsx_vaddwod_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vaddwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu( ++ ++ // __lsx_vaddwod_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vaddwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu( ++ ++ // __lsx_vaddwev_d_wu_w ++ // vd, vj, vk ++ // V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vaddwev_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w( ++ ++ // __lsx_vaddwev_w_hu_h ++ // vd, vj, vk ++ // V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vaddwev_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h( ++ ++ // __lsx_vaddwev_h_bu_b ++ // vd, vj, vk ++ // V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vaddwev_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b( ++ ++ // __lsx_vaddwod_d_wu_w ++ // vd, vj, vk ++ // V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vaddwod_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w( ++ ++ // __lsx_vaddwod_w_hu_h ++ // vd, vj, vk ++ // V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vaddwod_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h( ++ ++ // __lsx_vaddwod_h_bu_b ++ // vd, vj, vk ++ // V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vaddwod_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b( ++ ++ // __lsx_vsubwev_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vsubwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w( ++ ++ // __lsx_vsubwev_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vsubwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h( ++ ++ // __lsx_vsubwev_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vsubwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b( ++ ++ // __lsx_vsubwod_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vsubwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w( ++ ++ // __lsx_vsubwod_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vsubwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h( ++ ++ // __lsx_vsubwod_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vsubwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b( ++ ++ // __lsx_vsubwev_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vsubwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu( ++ ++ // __lsx_vsubwev_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vsubwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu( ++ ++ // __lsx_vsubwev_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vsubwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu( ++ ++ // __lsx_vsubwod_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vsubwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu( ++ ++ // __lsx_vsubwod_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vsubwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu( ++ ++ // __lsx_vsubwod_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vsubwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu( ++ ++ // __lsx_vaddwev_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vaddwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d( ++ ++ // __lsx_vaddwod_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vaddwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d( ++ ++ // __lsx_vaddwev_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vaddwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du( ++ ++ // __lsx_vaddwod_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vaddwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du( ++ ++ // __lsx_vsubwev_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsubwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d( ++ ++ // __lsx_vsubwod_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsubwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d( ++ ++ // __lsx_vsubwev_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vsubwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du( ++ ++ // __lsx_vsubwod_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vsubwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du( ++ ++ // __lsx_vaddwev_q_du_d ++ // vd, vj, vk ++ // V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vaddwev_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d( ++ ++ // __lsx_vaddwod_q_du_d ++ // vd, vj, vk ++ // V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vaddwod_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d( ++ ++ // __lsx_vmulwev_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vmulwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w( ++ ++ // __lsx_vmulwev_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vmulwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h( ++ ++ // __lsx_vmulwev_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vmulwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b( ++ ++ // __lsx_vmulwod_d_w ++ // vd, vj, vk ++ // V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vmulwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w( ++ ++ // __lsx_vmulwod_w_h ++ // vd, vj, vk ++ // V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vmulwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h( ++ ++ // __lsx_vmulwod_h_b ++ // vd, vj, vk ++ // V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vmulwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b( ++ ++ // __lsx_vmulwev_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vmulwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu( ++ ++ // __lsx_vmulwev_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vmulwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu( ++ ++ // __lsx_vmulwev_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vmulwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu( ++ ++ // __lsx_vmulwod_d_wu ++ // vd, vj, vk ++ // V2DI, UV4SI, UV4SI ++ v2i64_r = __lsx_vmulwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu( ++ ++ // __lsx_vmulwod_w_hu ++ // vd, vj, vk ++ // V4SI, UV8HI, UV8HI ++ v4i32_r = __lsx_vmulwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu( ++ ++ // __lsx_vmulwod_h_bu ++ // vd, vj, vk ++ // V8HI, UV16QI, UV16QI ++ v8i16_r = __lsx_vmulwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu( ++ ++ // __lsx_vmulwev_d_wu_w ++ // vd, vj, vk ++ // V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vmulwev_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w( ++ ++ // __lsx_vmulwev_w_hu_h ++ // vd, vj, vk ++ // V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vmulwev_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h( ++ ++ // __lsx_vmulwev_h_bu_b ++ // vd, vj, vk ++ // V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vmulwev_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b( ++ ++ // __lsx_vmulwod_d_wu_w ++ // vd, vj, vk ++ // V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vmulwod_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w( ++ ++ // __lsx_vmulwod_w_hu_h ++ // vd, vj, vk ++ // V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vmulwod_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h( ++ ++ // __lsx_vmulwod_h_bu_b ++ // vd, vj, vk ++ // V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vmulwod_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b( ++ ++ // __lsx_vmulwev_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmulwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d( ++ ++ // __lsx_vmulwod_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmulwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d( ++ ++ // __lsx_vmulwev_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vmulwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du( ++ ++ // __lsx_vmulwod_q_du ++ // vd, vj, vk ++ // V2DI, UV2DI, UV2DI ++ v2i64_r = __lsx_vmulwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du( ++ ++ // __lsx_vmulwev_q_du_d ++ // vd, vj, vk ++ // V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vmulwev_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d( ++ ++ // __lsx_vmulwod_q_du_d ++ // vd, vj, vk ++ // V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vmulwod_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d( ++ ++ // __lsx_vhaddw_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vhaddw_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d( ++ ++ // __lsx_vhaddw_qu_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vhaddw_qu_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du( ++ ++ // __lsx_vhsubw_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vhsubw_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d( ++ ++ // __lsx_vhsubw_qu_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vhsubw_qu_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du( ++ ++ // __lsx_vmaddwev_d_w ++ // vd, vj, vk ++ // V2DI, V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vmaddwev_d_w(v2i64_a, v4i32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w( ++ ++ // __lsx_vmaddwev_w_h ++ // vd, vj, vk ++ // V4SI, V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vmaddwev_w_h(v4i32_a, v8i16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h( ++ ++ // __lsx_vmaddwev_h_b ++ // vd, vj, vk ++ // V8HI, V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vmaddwev_h_b(v8i16_a, v16i8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b( ++ ++ // __lsx_vmaddwev_d_wu ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV4SI, UV4SI ++ v2u64_r = __lsx_vmaddwev_d_wu(v2u64_a, v4u32_b, v4u32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu( ++ ++ // __lsx_vmaddwev_w_hu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV8HI, UV8HI ++ v4u32_r = __lsx_vmaddwev_w_hu(v4u32_a, v8u16_b, v8u16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu( ++ ++ // __lsx_vmaddwev_h_bu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV16QI, UV16QI ++ v8u16_r = __lsx_vmaddwev_h_bu(v8u16_a, v16u8_b, v16u8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu( ++ ++ // __lsx_vmaddwod_d_w ++ // vd, vj, vk ++ // V2DI, V2DI, V4SI, V4SI ++ v2i64_r = __lsx_vmaddwod_d_w(v2i64_a, v4i32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w( ++ ++ // __lsx_vmaddwod_w_h ++ // vd, vj, vk ++ // V4SI, V4SI, V8HI, V8HI ++ v4i32_r = __lsx_vmaddwod_w_h(v4i32_a, v8i16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h( ++ ++ // __lsx_vmaddwod_h_b ++ // vd, vj, vk ++ // V8HI, V8HI, V16QI, V16QI ++ v8i16_r = __lsx_vmaddwod_h_b(v8i16_a, v16i8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b( ++ ++ // __lsx_vmaddwod_d_wu ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV4SI, UV4SI ++ v2u64_r = __lsx_vmaddwod_d_wu(v2u64_a, v4u32_b, v4u32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu( ++ ++ // __lsx_vmaddwod_w_hu ++ // vd, vj, vk ++ // UV4SI, UV4SI, UV8HI, UV8HI ++ v4u32_r = __lsx_vmaddwod_w_hu(v4u32_a, v8u16_b, v8u16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu( ++ ++ // __lsx_vmaddwod_h_bu ++ // vd, vj, vk ++ // UV8HI, UV8HI, UV16QI, UV16QI ++ v8u16_r = __lsx_vmaddwod_h_bu(v8u16_a, v16u8_b, v16u8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu( ++ ++ // __lsx_vmaddwev_d_wu_w ++ // vd, vj, vk ++ // V2DI, V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vmaddwev_d_wu_w(v2i64_a, v4u32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w( ++ ++ // __lsx_vmaddwev_w_hu_h ++ // vd, vj, vk ++ // V4SI, V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vmaddwev_w_hu_h(v4i32_a, v8u16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h( ++ ++ // __lsx_vmaddwev_h_bu_b ++ // vd, vj, vk ++ // V8HI, V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vmaddwev_h_bu_b(v8i16_a, v16u8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b( ++ ++ // __lsx_vmaddwod_d_wu_w ++ // vd, vj, vk ++ // V2DI, V2DI, UV4SI, V4SI ++ v2i64_r = __lsx_vmaddwod_d_wu_w(v2i64_a, v4u32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w( ++ ++ // __lsx_vmaddwod_w_hu_h ++ // vd, vj, vk ++ // V4SI, V4SI, UV8HI, V8HI ++ v4i32_r = __lsx_vmaddwod_w_hu_h(v4i32_a, v8u16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h( ++ ++ // __lsx_vmaddwod_h_bu_b ++ // vd, vj, vk ++ // V8HI, V8HI, UV16QI, V16QI ++ v8i16_r = __lsx_vmaddwod_h_bu_b(v8i16_a, v16u8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b( ++ ++ // __lsx_vmaddwev_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmaddwev_q_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d( ++ ++ // __lsx_vmaddwod_q_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vmaddwod_q_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d( ++ ++ // __lsx_vmaddwev_q_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmaddwev_q_du(v2u64_a, v2u64_b, v2u64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du( ++ ++ // __lsx_vmaddwod_q_du ++ // vd, vj, vk ++ // UV2DI, UV2DI, UV2DI, UV2DI ++ v2u64_r = __lsx_vmaddwod_q_du(v2u64_a, v2u64_b, v2u64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du( ++ ++ // __lsx_vmaddwev_q_du_d ++ // vd, vj, vk ++ // V2DI, V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vmaddwev_q_du_d(v2i64_a, v2u64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d( ++ ++ // __lsx_vmaddwod_q_du_d ++ // vd, vj, vk ++ // V2DI, V2DI, UV2DI, V2DI ++ v2i64_r = __lsx_vmaddwod_q_du_d(v2i64_a, v2u64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d( ++ ++ // __lsx_vrotr_b ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vrotr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vrotr.b( ++ ++ // __lsx_vrotr_h ++ // vd, vj, vk ++ // V8HI, V8HI, V8HI ++ v8i16_r = __lsx_vrotr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vrotr.h( ++ ++ // __lsx_vrotr_w ++ // vd, vj, vk ++ // V4SI, V4SI, V4SI ++ v4i32_r = __lsx_vrotr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vrotr.w( ++ ++ // __lsx_vrotr_d ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vrotr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vrotr.d( ++ ++ // __lsx_vadd_q ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vadd_q(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadd.q( ++ ++ // __lsx_vsub_q ++ // vd, vj, vk ++ // V2DI, V2DI, V2DI ++ v2i64_r = __lsx_vsub_q(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsub.q( ++ ++ // __lsx_vldrepl_b ++ // vd, rj, si12 ++ // V16QI, CVPOINTER, SI ++ v16i8_r = __lsx_vldrepl_b(&v16i8_a, si12); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vldrepl.b( ++ ++ // __lsx_vldrepl_h ++ // vd, rj, si11 ++ // V8HI, CVPOINTER, SI ++ v8i16_r = __lsx_vldrepl_h(&v8i16_a, si11); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vldrepl.h( ++ ++ // __lsx_vldrepl_w ++ // vd, rj, si10 ++ // V4SI, CVPOINTER, SI ++ v4i32_r = __lsx_vldrepl_w(&v4i32_a, si10); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vldrepl.w( ++ ++ // __lsx_vldrepl_d ++ // vd, rj, si9 ++ // V2DI, CVPOINTER, SI ++ v2i64_r = __lsx_vldrepl_d(&v2i64_a, si9); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vldrepl.d( ++ ++ // __lsx_vmskgez_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vmskgez_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmskgez.b( ++ ++ // __lsx_vmsknz_b ++ // vd, vj ++ // V16QI, V16QI ++ v16i8_r = __lsx_vmsknz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmsknz.b( ++ ++ // __lsx_vexth_h_b ++ // vd, vj ++ // V8HI, V16QI ++ v8i16_r = __lsx_vexth_h_b(v16i8_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vexth.h.b( ++ ++ // __lsx_vexth_w_h ++ // vd, vj ++ // V4SI, V8HI ++ v4i32_r = __lsx_vexth_w_h(v8i16_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vexth.w.h( ++ ++ // __lsx_vexth_d_w ++ // vd, vj ++ // V2DI, V4SI ++ v2i64_r = __lsx_vexth_d_w(v4i32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.d.w( ++ ++ // __lsx_vexth_q_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vexth_q_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.q.d( ++ ++ // __lsx_vexth_hu_bu ++ // vd, vj ++ // UV8HI, UV16QI ++ v8u16_r = __lsx_vexth_hu_bu(v16u8_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu( ++ ++ // __lsx_vexth_wu_hu ++ // vd, vj ++ // UV4SI, UV8HI ++ v4u32_r = __lsx_vexth_wu_hu(v8u16_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu( ++ ++ // __lsx_vexth_du_wu ++ // vd, vj ++ // UV2DI, UV4SI ++ v2u64_r = __lsx_vexth_du_wu(v4u32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu( ++ ++ // __lsx_vexth_qu_du ++ // vd, vj ++ // UV2DI, UV2DI ++ v2u64_r = __lsx_vexth_qu_du(v2u64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du( ++ ++ // __lsx_vrotri_b ++ // vd, vj, ui3 ++ // V16QI, V16QI, UQI ++ v16i8_r = __lsx_vrotri_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vrotri.b( ++ ++ // __lsx_vrotri_h ++ // vd, vj, ui4 ++ // V8HI, V8HI, UQI ++ v8i16_r = __lsx_vrotri_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vrotri.h( ++ ++ // __lsx_vrotri_w ++ // vd, vj, ui5 ++ // V4SI, V4SI, UQI ++ v4i32_r = __lsx_vrotri_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vrotri.w( ++ ++ // __lsx_vrotri_d ++ // vd, vj, ui6 ++ // V2DI, V2DI, UQI ++ v2i64_r = __lsx_vrotri_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vrotri.d( ++ ++ // __lsx_vextl_q_d ++ // vd, vj ++ // V2DI, V2DI ++ v2i64_r = __lsx_vextl_q_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextl.q.d( ++ ++ // __lsx_vsrlni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vsrlni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h( ++ ++ // __lsx_vsrlni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vsrlni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w( ++ ++ // __lsx_vsrlni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vsrlni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d( ++ ++ // __lsx_vsrlni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vsrlni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q( ++ ++ // __lsx_vssrlni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vssrlni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h( ++ ++ // __lsx_vssrlni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vssrlni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w( ++ ++ // __lsx_vssrlni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vssrlni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d( ++ ++ // __lsx_vssrlni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vssrlni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q( ++ ++ // __lsx_vssrlni_bu_h ++ // vd, vj, ui4 ++ // UV16QI, UV16QI, V16QI, USI ++ v16u8_r = __lsx_vssrlni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h( ++ ++ // __lsx_vssrlni_hu_w ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, V8HI, USI ++ v8u16_r = __lsx_vssrlni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w( ++ ++ // __lsx_vssrlni_wu_d ++ // vd, vj, ui6 ++ // UV4SI, UV4SI, V4SI, USI ++ v4u32_r = __lsx_vssrlni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d( ++ ++ // __lsx_vssrlni_du_q ++ // vd, vj, ui7 ++ // UV2DI, UV2DI, V2DI, USI ++ v2u64_r = __lsx_vssrlni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q( ++ ++ // __lsx_vssrlrni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vssrlrni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h( ++ ++ // __lsx_vssrlrni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vssrlrni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w( ++ ++ // __lsx_vssrlrni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vssrlrni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d( ++ ++ // __lsx_vssrlrni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vssrlrni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q( ++ ++ // __lsx_vssrlrni_bu_h ++ // vd, vj, ui4 ++ // UV16QI, UV16QI, V16QI, USI ++ v16u8_r = __lsx_vssrlrni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h( ++ ++ // __lsx_vssrlrni_hu_w ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, V8HI, USI ++ v8u16_r = __lsx_vssrlrni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w( ++ ++ // __lsx_vssrlrni_wu_d ++ // vd, vj, ui6 ++ // UV4SI, UV4SI, V4SI, USI ++ v4u32_r = __lsx_vssrlrni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d( ++ ++ // __lsx_vssrlrni_du_q ++ // vd, vj, ui7 ++ // UV2DI, UV2DI, V2DI, USI ++ v2u64_r = __lsx_vssrlrni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q( ++ ++ // __lsx_vsrani_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vsrani_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h( ++ ++ // __lsx_vsrani_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vsrani_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w( ++ ++ // __lsx_vsrani_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vsrani_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d( ++ ++ // __lsx_vsrani_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vsrani_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q( ++ ++ // __lsx_vsrarni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vsrarni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h( ++ ++ // __lsx_vsrarni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vsrarni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w( ++ ++ // __lsx_vsrarni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vsrarni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d( ++ ++ // __lsx_vsrarni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vsrarni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q( ++ ++ // __lsx_vssrani_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vssrani_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h( ++ ++ // __lsx_vssrani_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vssrani_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w( ++ ++ // __lsx_vssrani_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vssrani_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d( ++ ++ // __lsx_vssrani_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vssrani_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q( ++ ++ // __lsx_vssrani_bu_h ++ // vd, vj, ui4 ++ // UV16QI, UV16QI, V16QI, USI ++ v16u8_r = __lsx_vssrani_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h( ++ ++ // __lsx_vssrani_hu_w ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, V8HI, USI ++ v8u16_r = __lsx_vssrani_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w( ++ ++ // __lsx_vssrani_wu_d ++ // vd, vj, ui6 ++ // UV4SI, UV4SI, V4SI, USI ++ v4u32_r = __lsx_vssrani_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d( ++ ++ // __lsx_vssrani_du_q ++ // vd, vj, ui7 ++ // UV2DI, UV2DI, V2DI, USI ++ v2u64_r = __lsx_vssrani_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q( ++ ++ // __lsx_vssrarni_b_h ++ // vd, vj, ui4 ++ // V16QI, V16QI, V16QI, USI ++ v16i8_r = __lsx_vssrarni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h( ++ ++ // __lsx_vssrarni_h_w ++ // vd, vj, ui5 ++ // V8HI, V8HI, V8HI, USI ++ v8i16_r = __lsx_vssrarni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w( ++ ++ // __lsx_vssrarni_w_d ++ // vd, vj, ui6 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vssrarni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d( ++ ++ // __lsx_vssrarni_d_q ++ // vd, vj, ui7 ++ // V2DI, V2DI, V2DI, USI ++ v2i64_r = __lsx_vssrarni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q( ++ ++ // __lsx_vssrarni_bu_h ++ // vd, vj, ui4 ++ // UV16QI, UV16QI, V16QI, USI ++ v16u8_r = __lsx_vssrarni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h( ++ ++ // __lsx_vssrarni_hu_w ++ // vd, vj, ui5 ++ // UV8HI, UV8HI, V8HI, USI ++ v8u16_r = __lsx_vssrarni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w( ++ ++ // __lsx_vssrarni_wu_d ++ // vd, vj, ui6 ++ // UV4SI, UV4SI, V4SI, USI ++ v4u32_r = __lsx_vssrarni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d( ++ ++ // __lsx_vssrarni_du_q ++ // vd, vj, ui7 ++ // UV2DI, UV2DI, V2DI, USI ++ v2u64_r = __lsx_vssrarni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q( ++ ++ // __lsx_vpermi_w ++ // vd, vj, ui8 ++ // V4SI, V4SI, V4SI, USI ++ v4i32_r = __lsx_vpermi_w(v4i32_a, v4i32_b, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpermi.w( ++ ++ // __lsx_vld ++ // vd, rj, si12 ++ // V16QI, CVPOINTER, SI ++ v16i8_r = __lsx_vld(&v16i8_a, si12); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vld( ++ ++ // __lsx_vst ++ // vd, rj, si12 ++ // VOID, V16QI, CVPOINTER, SI ++ __lsx_vst(v16i8_a, &v16i8_b, 0); // CHECK: call void @llvm.loongarch.lsx.vst( ++ ++ // __lsx_vssrlrn_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vssrlrn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h( ++ ++ // __lsx_vssrlrn_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vssrlrn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w( ++ ++ // __lsx_vssrlrn_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vssrlrn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d( ++ ++ // __lsx_vssrln_b_h ++ // vd, vj, vk ++ // V16QI, V8HI, V8HI ++ v16i8_r = __lsx_vssrln_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h( ++ ++ // __lsx_vssrln_h_w ++ // vd, vj, vk ++ // V8HI, V4SI, V4SI ++ v8i16_r = __lsx_vssrln_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w( ++ ++ // __lsx_vssrln_w_d ++ // vd, vj, vk ++ // V4SI, V2DI, V2DI ++ v4i32_r = __lsx_vssrln_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d( ++ ++ // __lsx_vorn_v ++ // vd, vj, vk ++ // V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vorn_v(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vorn.v( ++ ++ // __lsx_vldi ++ // vd, i13 ++ // V2DI, HI ++ v2i64_r = __lsx_vldi(i13); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vldi( ++ ++ // __lsx_vshuf_b ++ // vd, vj, vk, va ++ // V16QI, V16QI, V16QI, V16QI ++ v16i8_r = __lsx_vshuf_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vshuf.b( ++ ++ // __lsx_vldx ++ // vd, rj, rk ++ // V16QI, CVPOINTER, DI ++ v16i8_r = __lsx_vldx(&v16i8_a, i64_d); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vldx( ++ ++ // __lsx_vstx ++ // vd, rj, rk ++ // VOID, V16QI, CVPOINTER, DI ++ __lsx_vstx(v16i8_a, &v16i8_b, i64_d); // CHECK: call void @llvm.loongarch.lsx.vstx( ++ ++ // __lsx_vextl_qu_du ++ // vd, vj ++ // UV2DI, UV2DI ++ v2u64_r = __lsx_vextl_qu_du(v2u64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du( ++ ++ // __lsx_bnz_v ++ // rd, vj ++ // SI, UV16QI ++ i32_r = __lsx_bnz_v(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.v( ++ ++ // __lsx_bz_v ++ // rd, vj ++ // SI, UV16QI ++ i32_r = __lsx_bz_v(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.v( ++ ++ // __lsx_bnz_b ++ // rd, vj ++ // SI, UV16QI ++ i32_r = __lsx_bnz_b(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.b( ++ ++ // __lsx_bnz_h ++ // rd, vj ++ // SI, UV8HI ++ i32_r = __lsx_bnz_h(v8u16_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.h( ++ ++ // __lsx_bnz_w ++ // rd, vj ++ // SI, UV4SI ++ i32_r = __lsx_bnz_w(v4u32_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.w( ++ ++ // __lsx_bnz_d ++ // rd, vj ++ // SI, UV2DI ++ i32_r = __lsx_bnz_d(v2u64_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.d( ++ ++ // __lsx_bz_b ++ // rd, vj ++ // SI, UV16QI ++ i32_r = __lsx_bz_b(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.b( ++ ++ // __lsx_bz_h ++ // rd, vj ++ // SI, UV8HI ++ i32_r = __lsx_bz_h(v8u16_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.h( ++ ++ // __lsx_bz_w ++ // rd, vj ++ // SI, UV4SI ++ i32_r = __lsx_bz_w(v4u32_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.w( ++ ++ // __lsx_bz_d ++ // rd, vj ++ // SI, UV2DI ++ i32_r = __lsx_bz_d(v2u64_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.d( ++} +diff --git a/test/CodeGen/loongarch-inline-asm-modifiers.c b/test/CodeGen/loongarch-inline-asm-modifiers.c +new file mode 100644 +index 00000000..08822e64 +--- /dev/null ++++ b/test/CodeGen/loongarch-inline-asm-modifiers.c +@@ -0,0 +1,50 @@ ++// RUN: %clang -target loongarch64-unknown-linux-gnu -S -o - -emit-llvm %s \ ++// RUN: | FileCheck %s ++ ++// This checks that the frontend will accept inline asm operand modifiers ++ ++int printf(const char*, ...); ++ ++typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16))); ++typedef long long v4i64 __attribute__ ((vector_size(32), aligned(32))); ++ ++// CHECK: %{{[0-9]+}} = call i32 asm "ld.w $0,$1;\0A", "=r,*m"(i32* elementtype(i32) getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 {{[0-9]+}}, i64 {{[0-9]+}})) #2, ++// CHECK: %{{[0-9]+}} = call i32 asm "ld.w $0,${1:D};\0A", "=r,*m"(i32* elementtype(i32) getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 {{[0-9]+}}, i64 {{[0-9]+}})) #2, ++// CHECK: %{{[0-9]+}} = call <2 x i64> asm "vldi ${0:w},1", "=f" ++// CHECK: %{{[0-9]+}} = call <4 x i64> asm "xldi ${0:u},1", "=f" ++int b[8] = {0,1,2,3,4,5,6,7}; ++int main() ++{ ++ int i; ++ v2i64 v2i64_r; ++ v4i64 v4i64_r; ++ ++ // The first word. Notice, no 'D' ++ {asm ( ++ "ld.w %0,%1;\n" ++ : "=r" (i) ++ : "m" (*(b+4)));} ++ ++ printf("%d\n",i); ++ ++ // The second word ++ {asm ( ++ "ld.w %0,%D1;\n" ++ : "=r" (i) ++ : "m" (*(b+4)) ++ );} ++ ++ // LSX registers ++ { asm("vldi %w0,1" ++ : "=f"(v2i64_r)); } ++ ++ printf("%d\n", i); ++ ++ // LASX registers ++ { asm("xldi %u0,1" ++ : "=f"(v4i64_r)); } ++ ++ printf("%d\n",i); ++ ++ return 1; ++} +diff --git a/test/CodeGen/loongarch-inline-asm.c b/test/CodeGen/loongarch-inline-asm.c +new file mode 100644 +index 00000000..dadb7e3f +--- /dev/null ++++ b/test/CodeGen/loongarch-inline-asm.c +@@ -0,0 +1,31 @@ ++// REQUIRES: loongarch-registered-target ++// RUN: %clang_cc1 -triple loongarch64-linux-gnu -emit-llvm -o - %s | FileCheck %s ++ ++int data; ++ ++void m () { ++ asm("ld.w $r1, %0" :: "m"(data)); ++ // CHECK: call void asm sideeffect "ld.w $$r1, $0", "*m"(i32* elementtype(i32) @data) ++} ++ ++void ZC () { ++ asm("ll.w $r1, %0" :: "ZC"(data)); ++ // CHECK: call void asm sideeffect "ll.w $$r1, $0", "*^ZC"(i32* elementtype(i32) @data) ++} ++ ++void ZB () { ++ asm("amadd_db.w $zero, $r1, %0" :: "ZB"(data)); ++ // CHECK: call void asm sideeffect "amadd_db.w $$zero, $$r1, $0", "*^ZB"(i32* elementtype(i32) @data) ++} ++ ++void R () { ++ asm("ld.w $r1, %0" :: "R"(data)); ++ // CHECK: call void asm sideeffect "ld.w $$r1, $0", "*R"(i32* elementtype(i32) @data) ++} ++ ++int *p; ++void preld () { ++ asm("preld 0, %0, 2" :: "r"(p)); ++ // CHECK: %0 = load i32*, i32** @p, align 8 ++ // CHECK: call void asm sideeffect "preld 0, $0, 2", "r"(i32* %0) ++} +diff --git a/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp b/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp +new file mode 100644 +index 00000000..cbe6469d +--- /dev/null ++++ b/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp +@@ -0,0 +1,95 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d \ ++// RUN: -emit-llvm %s -o - | FileCheck %s ++ ++#include ++ ++/// Ensure that fields inherited from a parent struct are treated in the same ++/// way as fields directly in the child for the purposes of LoongArch ABI rules. ++ ++struct parent1_int32_s { ++ int32_t i1; ++}; ++ ++struct child1_int32_s : parent1_int32_s { ++ int32_t i2; ++}; ++ ++// CHECK-LABEL: define{{.*}} i64 @_Z30int32_int32_struct_inheritance14child1_int32_s(i64 %a.coerce) ++struct child1_int32_s int32_int32_struct_inheritance(struct child1_int32_s a) { ++ return a; ++} ++ ++struct parent2_int32_s { ++ int32_t i1; ++}; ++ ++struct child2_float_s : parent2_int32_s { ++ float f1; ++}; ++ ++// CHECK-LABEL: define{{.*}} { i32, float } @_Z30int32_float_struct_inheritance14child2_float_s(i32 %0, float %1) ++struct child2_float_s int32_float_struct_inheritance(struct child2_float_s a) { ++ return a; ++} ++ ++struct parent3_float_s { ++ float f1; ++}; ++ ++struct child3_int64_s : parent3_float_s { ++ int64_t i1; ++}; ++ ++// CHECK-LABEL: define{{.*}} { float, i64 } @_Z30float_int64_struct_inheritance14child3_int64_s(float %0, i64 %1) ++struct child3_int64_s float_int64_struct_inheritance(struct child3_int64_s a) { ++ return a; ++} ++ ++struct parent4_double_s { ++ double d1; ++}; ++ ++struct child4_double_s : parent4_double_s { ++ double d1; ++}; ++ ++// CHECK-LABEL: define{{.*}} { double, double } @_Z32double_double_struct_inheritance15child4_double_s(double %0, double %1) ++struct child4_double_s double_double_struct_inheritance(struct child4_double_s a) { ++ return a; ++} ++ ++/// When virtual inheritance is used, the resulting struct isn't eligible for ++/// passing in registers. ++ ++struct parent5_virtual_s { ++ int32_t i1; ++}; ++ ++struct child5_virtual_s : virtual parent5_virtual_s { ++ float f1; ++}; ++ ++// CHECK-LABEL: define{{.*}} void @_ZN16child5_virtual_sC1EOS_(%struct.child5_virtual_s*{{.*}} %this, %struct.child5_virtual_s*{{.*}} dereferenceable(12) %0) ++struct child5_virtual_s int32_float_virtual_struct_inheritance(struct child5_virtual_s a) { ++ return a; ++} ++ ++/// Check for correct lowering in the presence of diamoned inheritance. ++ ++struct parent6_float_s { ++ float f1; ++}; ++ ++struct child6a_s : parent6_float_s { ++}; ++ ++struct child6b_s : parent6_float_s { ++}; ++ ++struct grandchild_6_s : child6a_s, child6b_s { ++}; ++ ++// CHECK-LABEL: define{{.*}} { float, float } @_Z38float_float_diamond_struct_inheritance14grandchild_6_s(float %0, float %1) ++struct grandchild_6_s float_float_diamond_struct_inheritance(struct grandchild_6_s a) { ++ return a; ++} +diff --git a/test/Driver/loongarch-abi-fpu.c b/test/Driver/loongarch-abi-fpu.c +new file mode 100644 +index 00000000..180d440c +--- /dev/null ++++ b/test/Driver/loongarch-abi-fpu.c +@@ -0,0 +1,26 @@ ++/// Check passing -mabi= and -mfpu= options to the backend. ++ ++// RUN: %clang -target loongarch64 %s -mabi=lp64s -mfpu=none -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=FEATURE-NF-ND %s ++// RUN: %clang -target loongarch64 %s -mabi=lp64s -mfpu=32 -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=FEATURE-F %s ++// RUN: %clang -target loongarch64 %s -mabi=lp64s -mfpu=64 -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=FEATURE-D %s ++// RUN: %clang -target loongarch64 %s -mabi=lp64f -mfpu=none -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=ERRLP64F-WITH-FPUNONE %s ++// RUN: %clang -target loongarch64 %s -mabi=lp64f -mfpu=32 -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=FEATURE-F %s ++// RUN: %clang -target loongarch64 %s -mabi=lp64f -mfpu=64 -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=FEATURE-D %s ++// RUN: %clang -target loongarch64 %s -mabi=lp64d -mfpu=none -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=ERRLP64D-ONLY-FPU64 %s ++// RUN: %clang -target loongarch64 %s -mabi=lp64d -mfpu=32 -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=ERRLP64D-ONLY-FPU64 %s ++// RUN: %clang -target loongarch64 %s -mabi=lp64d -mfpu=64 -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=FEATURE-D %s ++ ++// FEATURE-D: "-target-feature" "+d" ++// FEATURE-F: "-target-feature" "+f" ++// FEATURE-NF-ND: "-target-feature" "-f" "-target-feature" "-d" ++// ERRLP64D-ONLY-FPU64: error: option 'lp64d' cannot be specified without '-mfpu=64' ++// ERRLP64F-WITH-FPUNONE: error: option 'lp64f' cannot be specified with '-mfpu=none' +diff --git a/test/Driver/loongarch-alignment-feature.c b/test/Driver/loongarch-alignment-feature.c +new file mode 100644 +index 00000000..2270ff53 +--- /dev/null ++++ b/test/Driver/loongarch-alignment-feature.c +@@ -0,0 +1,8 @@ ++// RUN: %clang -target loongarch64-unknown-linux-gnu -mno-strict-align -### %s 2> %t ++// RUN: FileCheck --check-prefix=CHECK-UNALIGNED < %t %s ++ ++// RUN: %clang -target loongarch64-unknown-linux-gnu -mstrict-align -### %s 2> %t ++// RUN: FileCheck --check-prefix=CHECK-ALIGNED < %t %s ++ ++// CHECK-UNALIGNED: "-target-feature" "+unaligned-access" ++// CHECK-ALIGNED: "-target-feature" "-unaligned-access" +diff --git a/test/Driver/loongarch-double-single-soft.c b/test/Driver/loongarch-double-single-soft.c +new file mode 100644 +index 00000000..4b25f876 +--- /dev/null ++++ b/test/Driver/loongarch-double-single-soft.c +@@ -0,0 +1,12 @@ ++// Check passing -m*-float options to the backend. ++ ++// RUN: %clang -target loongarch64 %s -mdouble-float -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=CHECK-DOUBLE %s ++// RUN: %clang -target loongarch64 %s -msingle-float -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=CHECK-SINGLE %s ++// RUN: %clang -target loongarch64 %s -msoft-float -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=CHECK-SOFT %s ++ ++// CHECK-DOUBLE: "-target-feature" "+d" "-target-abi" "lp64d" ++// CHECK-SINGLE: "-target-feature" "+f" "-target-abi" "lp64f" ++// CHECK-SOFT: "-target-feature" "-f" "-target-feature" "-d" "-target-abi" "lp64s" +diff --git a/test/Driver/loongarch-mabi.c b/test/Driver/loongarch-mabi.c +new file mode 100644 +index 00000000..88a90408 +--- /dev/null ++++ b/test/Driver/loongarch-mabi.c +@@ -0,0 +1,22 @@ ++// Check passing -mabi= options to the backend. ++ ++// check default ABI for loongarch64 ++// RUN: %clang -target loongarch64 %s -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=CHECK-LP64D %s ++// check -mabi=lp64d option for loongarch64 ++// RUN: %clang -target loongarch64 %s -mabi=lp64d -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=CHECK-LP64D %s ++// check -mabi=lp64f option for loongarch64 ++// RUN: %clang -target loongarch64 %s -mabi=lp64f -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=CHECK-LP64F %s ++// check -mabi=lp64s option for loongarch64 ++// RUN: %clang -target loongarch64 %s -mabi=lp64s -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=CHECK-LP64S %s ++// check invalid -mabi=x option for loongarch64 ++// RUN: not %clang -target loongarch64 %s -mabi=x 2>&1 \ ++// RUN: | FileCheck --check-prefix=CHECK-X %s ++ ++// CHECK-LP64D: "-target-abi" "lp64d" ++// CHECK-LP64F: "-target-abi" "lp64f" ++// CHECK-LP64S: "-target-abi" "lp64s" ++// CHECK-X: error: unknown target ABI 'x' +diff --git a/test/Driver/loongarch-mfpu.c b/test/Driver/loongarch-mfpu.c +new file mode 100644 +index 00000000..0cf05fd3 +--- /dev/null ++++ b/test/Driver/loongarch-mfpu.c +@@ -0,0 +1,21 @@ ++// Check passing -mfpu= options to the backend. ++ ++// check default feature for loongarch64 ++// RUN: %clang -target loongarch64 %s -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=FEATURE-D %s ++// check -mfpu=64 option for loongarch64 ++// RUN: %clang -target loongarch64 %s -mfpu=64 -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=FEATURE-D %s ++// check -mfpu=32 option for loongarch64 ++// RUN: %clang -target loongarch64 %s -mfpu=32 -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=ERRLP64D-ONLY-FPU64 %s ++// check -mfpu=none option for loongarch64 ++// RUN: %clang -target loongarch64 %s -mfpu=none -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=ERRLP64D-ONLY-FPU64 %s ++// check -mfpu=x option for loongarch64 ++// RUN: %clang -target loongarch64 %s -mfpu=x -### 2>&1 \ ++// RUN: | FileCheck --check-prefix=INVALID-FPU %s ++ ++// FEATURE-D: "-target-feature" "+d" ++// INVALID-FPU: error: invalid loongarch FPU value 'x'. Please specify FPU = 64,32 or none ++// ERRLP64D-ONLY-FPU64: error: option 'lp64d' cannot be specified without '-mfpu=64' +-- +2.38.1 + diff --git a/clang14/PKGBUILD b/clang14/PKGBUILD index 0b76be2b3b..13097ce52e 100644 --- a/clang14/PKGBUILD +++ b/clang14/PKGBUILD @@ -19,14 +19,18 @@ source=($_source_base/clang-$pkgver.src.tar.xz{,.sig} $_source_base/llvm-$pkgver.src.tar.xz{,.sig} enforce-instantiation-of-constexpr-template-functions.patch clang-coroutines-ubsan.patch - enable-fstack-protector-strong-by-default.patch) + enable-fstack-protector-strong-by-default.patch + 0001-add-loong64-support.patch + 0002-add-loong64-support.patch) sha256sums=('2b5847b6a63118b9efe5c85548363c81ffe096b66c3b3675e953e26342ae4031' 'SKIP' '050922ecaaca5781fdf6631ea92bc715183f202f9d2f15147226f023414f619a' 'SKIP' 'eb2916131ae63b3bd1689f6a27dc0c2fadad73a5c5f2c828062c8a2c547e4a0d' '2c25ddf0ba6be01949842873fef4d285456321aaccd4ba95db61b69a4c580106' - '7a9ce949579a3b02d4b91b6835c4fb45adc5f743007572fb0e28e6433e48f3a5') + '7a9ce949579a3b02d4b91b6835c4fb45adc5f743007572fb0e28e6433e48f3a5' + 'ac0284b611d21a01327b66ad1ab9dfb83cb883b7f99b7960418477b7d2c04dfc' + '192ecde3154897e13b6d23de9aefca3022ad43a1339a1c4bfa27832e6166886c') validpgpkeys=('474E22316ABF4785A88C6E8EA2C794A986419D8A') # Tom Stellard # Utilizing LLVM_DISTRIBUTION_COMPONENTS to avoid @@ -62,6 +66,10 @@ prepare() { # https://github.com/llvm/llvm-project/issues/49689 patch -Np2 -i ../clang-coroutines-ubsan.patch + + # https://github.com/llvm/llvm-project/issues/54116 + patch -Np1 -i ../0001-add-loong64-support.patch + patch -Np1 -i ../0002-add-loong64-support.patch } build() { diff --git a/clash/PKGBUILD b/clash/PKGBUILD index 4b0af71517..b359732c89 100644 --- a/clash/PKGBUILD +++ b/clash/PKGBUILD @@ -25,8 +25,12 @@ build() { export CGO_CXXFLAGS="${CXXFLAGS}" export CGO_LDFLAGS="${LDFLAGS}" export GOFLAGS="-buildmode=pie -trimpath -mod=readonly -modcacherw" + export GOPROXY=https://goproxy.cn cd "${pkgname}-${pkgver}" + go mod edit -replace=go.etcd.io/bbolt=go.etcd.io/bbolt@v1.3.7-0.20221114114133-eedea6cb26ef + go mod edit -replace=github.com/u-root/uio=github.com/loongarch64/uio@dev-main + go mod tidy go build -trimpath -ldflags "-X github.com/Dreamacro/clash/constant.Version=${pkgver}" -mod=readonly } diff --git a/cln/PKGBUILD b/cln/PKGBUILD index 4645f6bb9a..11252173a3 100644 --- a/cln/PKGBUILD +++ b/cln/PKGBUILD @@ -10,8 +10,15 @@ url="https://www.ginac.de/CLN/" license=('GPL') depends=('gmp') makedepends=('texlive-latex') -source=(https://www.ginac.de/CLN/${pkgname}-${pkgver}.tar.bz2) -sha256sums=('f492530e8879bda529009b6033e1923c8f4aae843149fc28c667c20b094d984a') +source=(https://www.ginac.de/CLN/${pkgname}-${pkgver}.tar.bz2 + cln-la64.patch) +sha256sums=('f492530e8879bda529009b6033e1923c8f4aae843149fc28c667c20b094d984a' + '8cea3f49ab301dff766450bfdbff3ac4e5bf64cb60307f73681267aa151bb6c5') + +prepare() { + cd "$pkgname-$pkgver" + patch -p1 -i "$srcdir/cln-la64.patch" +} build() { cd ${pkgname}-${pkgver} diff --git a/cln/cln-la64.patch b/cln/cln-la64.patch new file mode 100644 index 0000000000..df654d5ba3 --- /dev/null +++ b/cln/cln-la64.patch @@ -0,0 +1,53 @@ +Index: cln-1.3.6/include/cln/object.h +=================================================================== +--- cln-1.3.6.orig/include/cln/object.h ++++ cln-1.3.6/include/cln/object.h +@@ -25,7 +25,7 @@ namespace cln { + #if defined(__i386__) || (defined(__mips__) && !defined(__LP64__)) || (defined(__sparc__) && !defined(__arch64__)) || defined(__hppa__) || defined(__arm__) || defined(__rs6000__) || defined(__m88k__) || defined(__convex__) || (defined(__s390__) && !defined(__s390x__)) || defined(__sh__) || (defined(__x86_64__) && defined(__ILP32__)) + #define cl_word_alignment 4 + #endif +-#if defined(__alpha__) || defined(__ia64__) || defined(__mips64__) || defined(__powerpc64__) || (defined(__sparc__) && defined(__arch64__)) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(__s390x__) || defined(__aarch64__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__e2k__) ++#if defined(__alpha__) || defined(__ia64__) || defined(__mips64__) || defined(__powerpc64__) || (defined(__sparc__) && defined(__arch64__)) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(__s390x__) || defined(__aarch64__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__e2k__) || defined(__loongarch64) + #define cl_word_alignment 8 + #endif + #if !defined(cl_word_alignment) +Index: cln-1.3.6/include/cln/types.h +=================================================================== +--- cln-1.3.6.orig/include/cln/types.h ++++ cln-1.3.6/include/cln/types.h +@@ -51,7 +51,7 @@ + #undef HAVE_LONGLONG + #endif + #endif +- #if defined(HAVE_LONGLONG) && (defined(__alpha__) || defined(__ia64__) || defined(__mips64__) || defined(__powerpc64__) || defined(__s390x__) || (defined(__sparc__) && defined(__arch64__)) || (defined(__x86_64__) || defined(_M_AMD64)) || defined(__aarch64__) || (defined(__riscv) && __riscv_xlen == 64)) || defined(__e2k__) ++ #if defined(HAVE_LONGLONG) && (defined(__alpha__) || defined(__ia64__) || defined(__mips64__) || defined(__powerpc64__) || defined(__s390x__) || (defined(__sparc__) && defined(__arch64__)) || (defined(__x86_64__) || defined(_M_AMD64)) || defined(__aarch64__) || (defined(__riscv) && __riscv_xlen == 64)) || defined(__e2k__) || defined(__loongarch64) + // 64 bit registers in hardware + #define HAVE_FAST_LONGLONG + #endif +@@ -79,7 +79,7 @@ + + // Integer type used for counters. + // Constraint: sizeof(uintC) >= sizeof(uintL) +- #if (defined(HAVE_FAST_LONGLONG) && (defined(__alpha__) || defined(__ia64__) || defined(__powerpc64__) || defined(__s390x__) || (defined(__sparc__) && defined(__arch64__)) || defined(__x86_64__) || defined(__aarch64__) || defined(__mips64__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__e2k__))) ++ #if (defined(HAVE_FAST_LONGLONG) && (defined(__alpha__) || defined(__ia64__) || defined(__powerpc64__) || defined(__s390x__) || (defined(__sparc__) && defined(__arch64__)) || defined(__x86_64__) || defined(__aarch64__) || defined(__mips64__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__e2k__)) || defined(__loongarch64)) + #define intCsize long_bitsize + typedef long sintC; + typedef unsigned long uintC; +@@ -91,7 +91,7 @@ + + // Integer type used for lfloat exponents. + // Constraint: sizeof(uintE) >= sizeof(uintC) +- #if (defined(HAVE_LONGLONG) && (defined(__alpha__) || defined(__ia64__) || defined(__powerpc64__) || defined(__s390x__) || (defined(__sparc__) && defined(__arch64__)) || defined(__x86_64__) || defined(__i386__) || defined(__mips__) || defined(__rs6000__) || defined(__aarch64__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__e2k__))) ++ #if (defined(HAVE_LONGLONG) && (defined(__alpha__) || defined(__ia64__) || defined(__powerpc64__) || defined(__s390x__) || (defined(__sparc__) && defined(__arch64__)) || defined(__x86_64__) || defined(__i386__) || defined(__mips__) || defined(__rs6000__) || defined(__aarch64__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__e2k__) || defined(__loongarch64))) + #define intEsize 64 + typedef sint64 sintE; + typedef uint64 uintE; +@@ -132,7 +132,7 @@ + typedef int sintD; + typedef unsigned int uintD; + #else // we are not using GMP, so just guess something reasonable +- #if (defined(HAVE_FAST_LONGLONG) && (defined(__alpha__) || defined(__ia64__) || defined(__powerpc64__) || (defined(__sparc__) && defined(__arch64__)) || defined(__s390x__) || defined(__x86_64__) || defined(__aarch64__) || defined(__mips64__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__e2k__))) ++ #if (defined(HAVE_FAST_LONGLONG) && (defined(__alpha__) || defined(__ia64__) || defined(__powerpc64__) || (defined(__sparc__) && defined(__arch64__)) || defined(__s390x__) || defined(__x86_64__) || defined(__aarch64__) || defined(__mips64__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__e2k__) || defined(__loongarch64))) + #define intDsize 64 + typedef sint64 sintD; + typedef uint64 uintD; diff --git a/clucene/PKGBUILD b/clucene/PKGBUILD index 828ce314f1..84602d628c 100644 --- a/clucene/PKGBUILD +++ b/clucene/PKGBUILD @@ -13,6 +13,7 @@ license=('APACHE' 'LGPL') depends=('gcc-libs>=4.7.1-5' 'zlib') makedepends=('cmake' 'boost>=1.54.0') source=(https://downloads.sourceforge.net/$pkgname/$pkgname-core-$pkgver.tar.gz + https://gitweb.gentoo.org/repo/gentoo.git/plain/dev-cpp/clucene/files/clucene-2.3.3.4-gmtime.patch # Fedora patches clucene-core-2.3.3.4-pkgconfig.patch clucene-core-2.3.3.4-install_contribs_lib.patch @@ -26,6 +27,7 @@ source=(https://downloads.sourceforge.net/$pkgname/$pkgname-core-$pkgver.tar.gz clucene-narrowing-conversions.patch clucene-multimap-put.patch) sha512sums=('1c9da9077edcebd46563bd9e47d330518e0b30061016650a759cfe051e9748fdad8932a472b1cca53a6adafed5f41656527271fc5f55ddfcefb558f0d83286b4' + '1c23c08edf5512c29a061e4a8515dfa209151af83e46680842f0aeee1269d529a14fd4a89aab7c25312fd7dbb9daf80912a265d21fcf08ac892a467be4a59a60' 'b357cb5a1c5d66219f3168724a71af3ebf5c45c752a612d3a69c170b739acc065dc17c261c3a730298ea6c637fe820637a100f73ab03d931734f80bb598fbf55' '0aa92635949089196e4e7c579e78761e8751987ef5036320a161a4aaa67da3c63756398c903419c76ea0fbdc8a949e871fcb65be98179a9853a24a5a4cacfde3' 'f606481b3bae44487a05e81da1e19dfa0bc5db8b10832d5b84c4e269fecb99ad010b90c5132e618c300f32b8c5bf28cfd0038c4ca2ddb4870c5a3f5113a18e64' @@ -51,6 +53,7 @@ prepare() { patch -Np0 -i "${srcdir}"/clucene-debug.patch patch -Np0 -i "${srcdir}"/clucene-narrowing-conversions.patch patch -Np0 -i "${srcdir}"/clucene-multimap-put.patch + patch -Np1 -i "${srcdir}"/clucene-2.3.3.4-gmtime.patch # FS#77036 / https://sourceforge.net/p/clucene/bugs/235/ patch -Np1 -i ../0001-Fix-missing-include-time.h.patch diff --git a/cni-plugins/PKGBUILD b/cni-plugins/PKGBUILD index f1ac376996..0458b04cdc 100644 --- a/cni-plugins/PKGBUILD +++ b/cni-plugins/PKGBUILD @@ -34,6 +34,9 @@ build() { export CGO_LDFLAGS="${LDFLAGS}" export GOPATH="${srcdir}" export GOFLAGS="-buildmode=pie -mod=readonly -modcacherw" + go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664 + go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305 + go mod tidy # custom go build calls, since build_linux.sh is not flexible enough for plugin in plugins/meta/* plugins/main/* plugins/ipam/*; do diff --git a/cobalt/PKGBUILD b/cobalt/PKGBUILD index 1f69e2ccd0..e1d3be7991 100644 --- a/cobalt/PKGBUILD +++ b/cobalt/PKGBUILD @@ -16,7 +16,7 @@ b2sums=('d99f09b5764339acdadae5142e429070c5634494538c0aa8a72e6f48114fbba0a12873c build() { cd ${pkgname}.rs-${pkgver} - cargo build --release --locked --features 'syntax-highlight sass' + cargo build --release --features 'syntax-highlight sass' } check() { diff --git a/cocogitto/PKGBUILD b/cocogitto/PKGBUILD index ecfdfd1f64..c12465f811 100644 --- a/cocogitto/PKGBUILD +++ b/cocogitto/PKGBUILD @@ -20,7 +20,7 @@ sha256sums=('2a0e332b7028ffcfeb113c734b4bf506c34362730e371b03a3e4a71142099330') prepare() { cd "$_archive" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" +# cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" mkdir {completions,man} } diff --git a/code/PKGBUILD b/code/PKGBUILD index 21bccfc973..a1b509bcbe 100644 --- a/code/PKGBUILD +++ b/code/PKGBUILD @@ -44,6 +44,9 @@ case "$CARCH" in armv7h) _vscode_arch=arm ;; + loong64) + _vscode_arch=loong64 + ;; *) # Needed for mksrcinfo _vscode_arch=DUMMY diff --git a/coin-or-cbc/PKGBUILD b/coin-or-cbc/PKGBUILD index c7ed6f3600..9f39caa2be 100644 --- a/coin-or-cbc/PKGBUILD +++ b/coin-or-cbc/PKGBUILD @@ -22,10 +22,10 @@ source=($pkgname-$pkgver.tar.gz::https://github.com/coin-or/Cbc/archive/refs/tag sha256sums=('1fb591dd88336fdaf096b8e42e46111e41671a5eb85d4ee36e45baff1678bd33') build() { - cd Cbc-releases-$pkgver + cd Cbc-releases-$pkgver/Cbc ./configure --prefix=/usr \ --enable-cbc-parallel \ - --with-nauty-lib=/usr/lib/libnauty.a --with-nauty-incdir=/usr/include/nauty + --with-cbc-lib=/usr/lib/libnauty.a --with-cbc-incdir=/usr/include/nauty sed -i -e 's/ -shared / -Wl,-O1,--as-needed\0/g' libtool make } @@ -36,7 +36,7 @@ check() { } package() { - cd Cbc-releases-$pkgver + cd Cbc-releases-$pkgver/Cbc make DESTDIR="$pkgdir" install # Remove nauty from linker flags in pc file, it is statically compiled diff --git a/committed/PKGBUILD b/committed/PKGBUILD index 22657da4fb..8cb1aaa9bc 100644 --- a/committed/PKGBUILD +++ b/committed/PKGBUILD @@ -15,7 +15,7 @@ sha256sums=('bb89632260499ae9dbbf1b2cd43dc9d43337e75259f84f762f821b1eb358849b') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/compiler-rt/PKGBUILD b/compiler-rt/PKGBUILD index 2040d9a63b..d9d6d302c3 100644 --- a/compiler-rt/PKGBUILD +++ b/compiler-rt/PKGBUILD @@ -36,6 +36,9 @@ prepare() { build() { cd compiler-rt-$pkgver.src/build +# clang didn't support -mlsx + CFLAGS=${CFLAGS/-mlsx /} + CXXFLAGS=${CFLAGS/-mlsx /} local cmake_args=( -G Ninja diff --git a/compiler-rt14/keys/pgp/B6C8F98282B944E3B0D5C2530FC3042E345AD05D.asc b/compiler-rt14/keys/pgp/B6C8F98282B944E3B0D5C2530FC3042E345AD05D.asc new file mode 100644 index 0000000000..7ef29ec73c --- /dev/null +++ b/compiler-rt14/keys/pgp/B6C8F98282B944E3B0D5C2530FC3042E345AD05D.asc @@ -0,0 +1,75 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mQINBFS+1SABEACnmkESkY7eZq0GhDjbkWpKmURGk9+ycsfAhA44NqUvf4tk1GPM +5SkJ/fYedYZJaDVhIp98fHgucD0O+vjOzghtgwtITusYjiPHPFBd/MN+MQqSEAP+ +LUa/kjHLjgyXxKhFUIDGVaDWL5tKOA7/AQKl1TyJ8lz89NHQoUHFsF/hu10+qhJe +V65d32MXFehIUSvegh8DrPuExrliSiORO4HOhuc6151dWA4YBWVg4rX5kfKrGMMT +pTWnSSZtgoRhkKW2Ey8cmZUqPuUJIfWyeNVu1e4SFtAivLvu/Ymz2WBJcNA1ZlTr +RCOR5SIRgZ453pQnI/Bzna2nnJ/TV1gGJIGRahj/ini0cs2x1CILfS/YJQ3rWGGo +OxwG0BVmPk0cmLVtyTq8gUPwxcPUd6WcBKhot3TDMlrffZACnQwQjlVjk5S1dEEz +atUfpEuNitU9WOM4jr/gjv36ZNCOWm95YwLhsuci/NddBN8HXhyvs+zYTVZEXa2W +l/FqOdQsQqZBcJjjWckGKhESdd7934+cesGD3O8KaeSGxww7slJrS0+6QJ8oBoAB +P/WCn/y2AiY2syEKp3wYIGJyAbsm542zMZ4nc7pYfSu49mcyhQQICmqN5QvOyYUx +OSqwbAOUNtlOyeRLZNIKoXtTqWDEu5aEiDROTw6Rkq+dIcxPNgOLdeQ3HwARAQAB +tCFIYW5zIFdlbm5ib3JnIDxoYW5zQGNocm9taXVtLm9yZz6JAlQEEwEKAD4WIQS2 +yPmCgrlE47DVwlMPwwQuNFrQXQUCXKW+LwIbAwUJDwUmjQULCQgHAgYVCgkICwIE +FgIDAQIeAQIXgAAKCRAPwwQuNFrQXXw+EACc4n7pYF89qmi6k4u1H5PLPcRVw4Ch +zY293N5JT8dM7c5Q0opPcgSS625SzAzEA8I3kRakFMsYZmJ7NFeFwIV7iJnaolft +iGCinbnB6bF8NnaEUOU0Pl4ByAuPiZqq8t5ORWUnZX/iRtOFEmCyRWHJPxCPFcJG +XCmQHTwnucePFdvNoIHN8vbkrHU32SUQ3iL4aEH92Y2s4D3WoNMW7g3b7srRynO1 +pzrT+bhihrl1MAnR6FiS4lSjw7VaEon1PJyaxs6OYO2x/fEz+uUnNPYZGhHQDTQ8 +DUyXNlXQ1mOOTMAwxg5JmqWfA2y1pmgJGpKe92t6vpVe9E90GBS9oCvSFXzItNg+ +p+9ogNDxMWnT48fygCqDVpk/PLdlyuNAQfuvtcZb8h5y1bzcwwBGHWb9McG12Z/K +JpcWvSQe/eZ9uHcyj2+b7SQHIJL9eaBsyhgvv573PK62Rc8fze+HtwZMWMvw5Fsc ++q5pJ8JS8y3s/EZYJ8URQ00QWOL6DDN1ik0vjxZ6zf+dpK1/3jToSrTnsY5TxXAM +gxeoFVhAtccnoAYY2zp2Dp7JonGNqXrE8rjMe67QBWzVUADgWMlCvFZ4W7ZGcj9y +2XgA4DbOgJVsx3xAGA6FuEIV0UDwDo4WweWnD4Jo+KVC3nWGW8AjNQb9EAn33WlI +K/mivl/oxH2rx7kCDQRUvtUgARAA7EHGtB6wKGOsKoqNjk+dKxJil5vh+ui5ysLz +3wAXDYOA39nP5bvC1JNu3P8ZFwK6uPNm83ujasK42TSPT6zWyBlmbYF2V2VpsvL5 +QX+RJbWtvmqF9dwYa5u7jw4x21J+iT2U5zRDUvgc2UYTiVQGRnOYjtiSp+X4HCub +2umLniDi5r08iKIcgCYyhkhxu04bUpoOvoKhdGT/eDZmIZTCGreMUauiIGwoRqnY +UnVuHk0mTYSDylXt8w4XuFRAoFms060g+7yEDlYSCS7dTdViNFIjdIOLpBecMv7E +fFqOJakq0XcmNmHzL8IJMPw/I/fhiN9m4WaR2yR7lx3HofRXZQKIfjnedyAVV1AN +eRjif7QxPOHLbG7QhVWcHFgNg2GL7cyNMcl30LjEyL237ki4S8MA+GB9mMOlBqQQ +/PqFWaCPSaUoiBGKUFEr3+Q7GTL260GkaTeMQkau7+Eo2WgU2ymhi1jrMBMCvwRw +6CgIVATSciS1yDfAX344ISdXbz9rtdnBRnsaX+p84e12vfvjCjyR3xHdXx3Yb2rn +DT+4JX001DR8ZZkM8Ohi3rCc8vqBm/+ckzyhlj67SsLbhbBJxkieJqvILgkcNqwC +GvZLYK2AK8GCyUrp/eAPXoofE9kwGlfvdPM5giEwQ/+9eBUltQPp1iG35T1zg6EQ +MmjCfR0AEQEAAYkCPAQYAQIAJgIbDBYhBLbI+YKCuUTjsNXCUw/DBC40WtBdBQJa +XfpLBQkPBSarAAoJEA/DBC40WtBdPX8P/1ilEM2BomXdhUO1Vmh5DCHsFDpQtlN5 +cU+iBiQXaPdVaDyz1SYCziyD/hr70otJqe1eNf4kWxG/SVB7kav9WXxVDgsoRcF+ +IaZKK+Mhnt6il13dg/bDoblPdIDh3YJB+yDiuck+dciPMo2JI6LfrzJue318vRja +vZqotOY/pjuKywNQ74nVNbVcebfj0k9HQeXhxO42dabgm5fabYIkRzlcGUMCFr2l +RWz4nkLYPRQUWTJ47N4k/DLrHkClYebzifwCOFBKm7WpErEpd3B6Lq2RBZYwe6L5 +OBJj/MKSYP3+hjXkSLlq8nhaAhtMslShkyLvSuI+ZTxOGOnMDtL42TSDusw+r5eX +XCGMpT+7S52WysgmPOSHp+2opSYiRvFhOmOGcS6M2sSvmbZLpnrHfL0TlBqAExF3 +FGF+T4dvIAJw/+n2tc7OXgzb3UOgp4AAfvQYeeIbHI2z2sCgyv+EPldb9avPd1wo +xzaznnkToxkgsTZmKiVxGf5tg4w9m1aVvH3y3y6ox/j2BjgUZAFkDA+CUyvHuaub +sdMiJdqFOFAY4mDqLMkMAPlHBIQaUBwvbxPwoC4zoIsuSGUF9DCIqxQE2eH2vzBX +eUH6lXQaEv7eLTvuBNh9kFHAvOMV2Gb3FQoRpnqs3UFf2XOLHh5I0rmeWfSNSrXr +sfYgf//ax/x3uQINBFylxXABEAC2Qt89UYDndAxNoCIJktuSBWh9BxC1JPPQtmLd +XTsG5vd2h63rBN64ZYTGuW2AQxGV24ngP8rv5F1QzSPY0UgOt25r7pS3+1MZbv+d +sZTtN4LWTXRdIVU+wcqKX1FZCGDSuGs5EpyElnKHxxGh7Wi0KFZMN64t83WPrbzq +aiKrpp9/QHMUtrNqPgUBNKvH8k5g/AGa21+fF1kRsUtmsZbre4IK9bakIjmAfNMA +ZA/YnJy0Ou06HcFWzkfTRLMrQHINUzOzNOhhXuYx3h4qSrvcJnqoGMJ9pZkOfrEJ +VPQexYq3hvL1jwMLdFKDozViUx520/7K8frusf+Df0RlucEVF4QjAV4RAuHBtrzP +LkH/0v6U3u1rX+5VMK8otud43cXcNet/cZ97jRm2rPzviRgYI9EljjD9vGPCIzmo +aJYs+eNJRIJGPqzVV+AELiH9Bc9jCad8XeECBsTCVNx+kEijKclQWr+3y610SXNY +JRKzlPBlMrqJ0U+/vNo59TUgZlwC8KdbiWtxEQ3JYFT7rHVH9cQeAlLXAE0yIfZK ++ss2HpIXgBvJ4nNyNBcFzoqF/iKBcH6yYRILNSGLEKOBnX3/XpAlvnOB1gcTSOQY +frNoXHpA7yzpGh1MeypdCeOqOicZZRF/xX1KR6YDC5YDOFM2paydDNS1ql0Wp0VW +WcIp1wARAQABiQI8BBgBCgAmFiEEtsj5goK5ROOw1cJTD8MELjRa0F0FAlylxXAC +GwwFCQlmAYAACgkQD8MELjRa0F3Quw/+MVB3lHyIORyth4q9KsTUUXBW11UtjKqq +SML0nMuNiqHefNd9P1+zVougyF002TfjkSnOpOoH2Uub3iCX0Cfyigo0rcjBXAvO +j9N9g8eL1xBenTdxYiiHvvIm0BadikfsdoqQebv3ONFda7eoQl689LqMKZ9ZEOxi +w7xQKcIPiNEt2WvBVv4mpEFx1pDbLZ/bUgbR3t7v/t6ijAVdIOjQvW/WPemyRTcB +7iJd68H6Uou/Ofy5EPUH4c/heyCw+eUUFnC9msDIvwtTbkz0Aaa7awbpoegFMz2L +LmSRMLybFn5lQTRR7TizzUvrprOx+UalbUASJS+TONZmVltz0eVVeJ3IHylUM/24 +cBh2wXqR63osDCZZkXVxbN9AtyoezEVvg8+XhDLyXeh+o05A/lRjMA33BkwyoKzi +5nZb7iaVYWlKM8Zs6PrB8zq9ErDGcka7gikvUuJ2KLKjJqj19/6Z90oCtJQa9ifi +glN+ER3y4hLHFmKI6ns+GNf0FwpgwD7WD9XBQR9uxBPCrVjXXv4IT9rBidzXT8rK +iXYX9tHBHn2wAk28uJOtdDNcsOdOEqfdmIVfBXNv2df6r8ewEzpNd2MpEOZRW8mc +cn+5dkF+W2mGn8Vky04ewU2+Bo9rApv3zJ76s0Skt2c8axKKtLhHY/H5HPiLNC29 +Qk8uiuyeUfE= +=H/uX +-----END PGP PUBLIC KEY BLOCK----- diff --git a/conky/PKGBUILD b/conky/PKGBUILD index dc0efa4643..a65bf54a63 100644 --- a/conky/PKGBUILD +++ b/conky/PKGBUILD @@ -79,7 +79,7 @@ build() { -D CMAKE_CXX_FLAGS="$CXXFLAGS -ffat-lto-objects" \ -D MAINTAINER_MODE=OFF \ -D BUILD_TESTS=ON \ - -D BUILD_DOCS=ON \ + -D BUILD_DOCS=OFF \ -D BUILD_EXTRAS=ON \ -D BUILD_WLAN=ON \ -D BUILD_XDBE=ON \ diff --git a/containerd/PKGBUILD b/containerd/PKGBUILD index e166cedcce..8791747158 100644 --- a/containerd/PKGBUILD +++ b/containerd/PKGBUILD @@ -26,6 +26,10 @@ prepare() { build() { cd "${pkgname}" export GOFLAGS="-trimpath -mod=readonly -modcacherw" + export GOPROXY=https://goproxy.cn + go mod edit -replace=go.etcd.io/bbolt=go.etcd.io/bbolt@v1.3.7-0.20221114114133-eedea6cb26ef + go mod edit -replace=github.com/cilium/ebpf=github.com/cilium/ebpf@v0.10.1-0.20230613102335-1140a754d780 + go mod tidy make VERSION=v$pkgver GO_BUILD_FLAGS="-trimpath -mod=readonly -modcacherw" GO_GCFLAGS="" EXTRA_LDFLAGS="-buildid=" make VERSION=v$pkgver man } diff --git a/cpputest/PKGBUILD b/cpputest/PKGBUILD index ccfe005856..581088f76a 100644 --- a/cpputest/PKGBUILD +++ b/cpputest/PKGBUILD @@ -2,7 +2,7 @@ pkgname=cpputest pkgver=4.0 -pkgrel=4 +pkgrel=5 pkgdesc="Unit testing and mocking framework for C/C++" arch=(loong64 x86_64) url="https://github.com/cpputest/cpputest" diff --git a/cri-o/PKGBUILD b/cri-o/PKGBUILD index 328e3193fb..91eb208557 100644 --- a/cri-o/PKGBUILD +++ b/cri-o/PKGBUILD @@ -31,6 +31,7 @@ makedepends=( libassuan libseccomp ostree + git ) optdepends=( 'apparmor: for apparmor integration' diff --git a/criu/2183.patch b/criu/2183.patch new file mode 100644 index 0000000000..998e5ae3a6 --- /dev/null +++ b/criu/2183.patch @@ -0,0 +1,2237 @@ +From 50bae581a3e5e996bc36e656631a29be9f12ad9a Mon Sep 17 00:00:00 2001 +From: znley +Date: Mon, 12 Jun 2023 11:23:38 +0800 +Subject: [PATCH 1/6] include: add common header files for loongarch64 + +Signed-off-by: znley +--- + include/common/arch/loongarch64/asm/atomic.h | 62 +++++++++++++++++++ + include/common/arch/loongarch64/asm/bitops.h | 24 +++++++ + .../common/arch/loongarch64/asm/bitsperlong.h | 6 ++ + include/common/arch/loongarch64/asm/linkage.h | 19 ++++++ + include/common/arch/loongarch64/asm/page.h | 39 ++++++++++++ + 5 files changed, 150 insertions(+) + create mode 100644 include/common/arch/loongarch64/asm/atomic.h + create mode 100644 include/common/arch/loongarch64/asm/bitops.h + create mode 100644 include/common/arch/loongarch64/asm/bitsperlong.h + create mode 100644 include/common/arch/loongarch64/asm/linkage.h + create mode 100644 include/common/arch/loongarch64/asm/page.h + +diff --git a/include/common/arch/loongarch64/asm/atomic.h b/include/common/arch/loongarch64/asm/atomic.h +new file mode 100644 +index 0000000000..9017254397 +--- /dev/null ++++ b/include/common/arch/loongarch64/asm/atomic.h +@@ -0,0 +1,62 @@ ++#ifndef __CR_ATOMIC_H__ ++#define __CR_ATOMIC_H__ ++ ++#include ++#include "common/compiler.h" ++ ++typedef struct { ++ int counter; ++} atomic_t; ++ ++static inline int atomic_read(const atomic_t *v) ++{ ++ return (*(volatile int *)&(v)->counter); ++} ++ ++static inline void atomic_set(atomic_t *v, int i) ++{ ++ v->counter = i; ++} ++ ++static inline int __atomic_add(int i, atomic_t *v) ++{ ++ int result; ++ asm volatile("amadd_db.w %1, %2, %0" : "+ZB"(v->counter), "=&r"(result) : "r"(i) : "memory"); ++ return result + i; ++} ++ ++static inline void atomic_add(int i, atomic_t *v) ++{ ++ __atomic_add(i, v); ++} ++ ++static inline int atomic_add_return(int i, atomic_t *v) ++{ ++ return __atomic_add(i, v); ++} ++ ++#define atomic_sub(i, v) atomic_add(-(int)i, v) ++#define atomic_sub_return(i, v) atomic_add_return(-(int)i, v) ++#define atomic_inc(v) atomic_add(1, v) ++#define atomic_inc_return(v) atomic_add_return(1, v) ++#define atomic_dec(v) atomic_sub(1, v) ++#define atomic_dec_return(v) atomic_sub_return(1, v) ++ ++static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) ++{ ++ int ret; ++ asm volatile("1: \n" ++ " ll.w %0, %1 \n" ++ " bne %0, %2, 2f \n" ++ " or $t0, %3, $zero \n" ++ " sc.w $t0, %1 \n" ++ " beqz $t0, 1b \n" ++ "2: \n" ++ " dbar 0 \n" ++ : "=&r"(ret), "+ZB"(ptr->counter) ++ : "r"(old), "r"(new) ++ : "t0", "memory"); ++ return ret; ++} ++ ++#endif /* __CR_ATOMIC_H__ */ +diff --git a/include/common/arch/loongarch64/asm/bitops.h b/include/common/arch/loongarch64/asm/bitops.h +new file mode 100644 +index 0000000000..170e4f7369 +--- /dev/null ++++ b/include/common/arch/loongarch64/asm/bitops.h +@@ -0,0 +1,24 @@ ++#ifndef _LINUX_BITOPS_H ++#define _LINUX_BITOPS_H ++#include "common/asm-generic/bitops.h" ++ ++/** ++ * test_and_set_bit - Set a bit and return its old value ++ * @nr: Bit to set ++ * @addr: Address to count from ++ * ++ * This operation is atomic and cannot be reordered. ++ * It also implies a memory barrier. ++ */ ++ ++#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) ++#define BIT_WORD(nr) ((1UL << ((nr) / BITS_PER_LONG)) - 1) ++static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *addr) ++{ ++ unsigned long res, mask; ++ mask = BIT_MASK(nr); ++ asm volatile("amor_db.d %0, %2, %1" : "=&r"(res), "+ZB"(addr[BIT_WORD(nr)]) : "r"(mask) : "memory"); ++ return (res & mask) != 0; ++} ++ ++#endif +diff --git a/include/common/arch/loongarch64/asm/bitsperlong.h b/include/common/arch/loongarch64/asm/bitsperlong.h +new file mode 100644 +index 0000000000..13d06a384e +--- /dev/null ++++ b/include/common/arch/loongarch64/asm/bitsperlong.h +@@ -0,0 +1,6 @@ ++#ifndef __CR_BITSPERLONG_H__ ++#define __CR_BITSPERLONG_H__ ++ ++#define BITS_PER_LONG _LOONGARCH_SZLONG ++ ++#endif /* __CR_BITSPERLONG_H__ */ +diff --git a/include/common/arch/loongarch64/asm/linkage.h b/include/common/arch/loongarch64/asm/linkage.h +new file mode 100644 +index 0000000000..448acc29fc +--- /dev/null ++++ b/include/common/arch/loongarch64/asm/linkage.h +@@ -0,0 +1,19 @@ ++#ifndef __CR_LINKAGE_H__ ++#define __CR_LINKAGE_H__ ++ ++#define __ALIGN .align 2 ++#define __ALIGN_STR ".align 2" ++ ++#define GLOBAL(name) \ ++ .globl name; \ ++name: ++ ++#define ENTRY(name) \ ++ .globl name; \ ++ __ALIGN; \ ++ .type name, @function; \ ++name: ++ ++#define END(sym) .size sym, .- sym ++ ++#endif /* __CR_LINKAGE_H__ */ +diff --git a/include/common/arch/loongarch64/asm/page.h b/include/common/arch/loongarch64/asm/page.h +new file mode 100644 +index 0000000000..25bdbc1412 +--- /dev/null ++++ b/include/common/arch/loongarch64/asm/page.h +@@ -0,0 +1,39 @@ ++#ifndef __CR_ASM_PAGE_H__ ++#define __CR_ASM_PAGE_H__ ++ ++#define ARCH_HAS_LONG_PAGES ++ ++#ifndef CR_NOGLIBC ++#include /* ffsl() */ ++#include /* _SC_PAGESIZE */ ++ ++static unsigned __page_size; ++static unsigned __page_shift; ++ ++static inline unsigned page_size(void) ++{ ++ if (!__page_size) ++ __page_size = sysconf(_SC_PAGESIZE); ++ return __page_size; ++} ++ ++static inline unsigned page_shift(void) ++{ ++ if (!__page_shift) ++ __page_shift = (ffsl(page_size()) - 1); ++ return __page_shift; ++} ++ ++#define PAGE_SIZE page_size() ++#define PAGE_SHIFT page_shift() ++#define PAGE_MASK (~(PAGE_SIZE - 1)) ++ ++#define PAGE_PFN(addr) ((addr) / PAGE_SIZE) ++#else /* CR_NOGLIBC */ ++ ++extern unsigned page_size(void); ++#define PAGE_SIZE page_size() ++ ++#endif /* CR_NOGLIBC */ ++ ++#endif /* __CR_ASM_PAGE_H__ */ + +From 0d63f58663fa22eb9fecd1cc778a49c49ddccfc9 Mon Sep 17 00:00:00 2001 +From: znley +Date: Mon, 12 Jun 2023 09:35:40 +0000 +Subject: [PATCH 2/6] compel: add loongarch64 support + +Signed-off-by: znley +--- + Makefile | 10 +- + compel/Makefile | 2 +- + .../plugins/include/asm/prologue.h | 35 +++ + .../plugins/include/asm/syscall-types.h | 30 +++ + .../loongarch64/plugins/include/features.h | 4 + + .../loongarch64/plugins/std/parasite-head.S | 9 + + .../plugins/std/syscalls/Makefile.syscalls | 117 ++++++++++ + .../syscalls/syscall-common-loongarch-64.S | 44 ++++ + .../plugins/std/syscalls/syscall_64.tbl | 121 +++++++++++ + .../loongarch64/scripts/compel-pack.lds.S | 32 +++ + compel/arch/loongarch64/src/lib/cpu.c | 41 ++++ + .../loongarch64/src/lib/handle-elf-host.c | 22 ++ + compel/arch/loongarch64/src/lib/handle-elf.c | 22 ++ + .../loongarch64/src/lib/include/handle-elf.h | 8 + + .../loongarch64/src/lib/include/syscall.h | 8 + + .../src/lib/include/uapi/asm/breakpoints.h | 6 + + .../src/lib/include/uapi/asm/cpu.h | 6 + + .../src/lib/include/uapi/asm/fpu.h | 4 + + .../src/lib/include/uapi/asm/infect-types.h | 67 ++++++ + .../src/lib/include/uapi/asm/sigframe.h | 86 ++++++++ + compel/arch/loongarch64/src/lib/infect.c | 204 ++++++++++++++++++ + compel/src/main.c | 3 + + scripts/nmk/scripts/include.mk | 3 +- + 23 files changed, 881 insertions(+), 3 deletions(-) + create mode 100644 compel/arch/loongarch64/plugins/include/asm/prologue.h + create mode 100644 compel/arch/loongarch64/plugins/include/asm/syscall-types.h + create mode 100644 compel/arch/loongarch64/plugins/include/features.h + create mode 100644 compel/arch/loongarch64/plugins/std/parasite-head.S + create mode 100644 compel/arch/loongarch64/plugins/std/syscalls/Makefile.syscalls + create mode 100644 compel/arch/loongarch64/plugins/std/syscalls/syscall-common-loongarch-64.S + create mode 100644 compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl + create mode 100644 compel/arch/loongarch64/scripts/compel-pack.lds.S + create mode 100644 compel/arch/loongarch64/src/lib/cpu.c + create mode 100644 compel/arch/loongarch64/src/lib/handle-elf-host.c + create mode 100644 compel/arch/loongarch64/src/lib/handle-elf.c + create mode 100644 compel/arch/loongarch64/src/lib/include/handle-elf.h + create mode 100644 compel/arch/loongarch64/src/lib/include/syscall.h + create mode 100644 compel/arch/loongarch64/src/lib/include/uapi/asm/breakpoints.h + create mode 100644 compel/arch/loongarch64/src/lib/include/uapi/asm/cpu.h + create mode 100644 compel/arch/loongarch64/src/lib/include/uapi/asm/fpu.h + create mode 100644 compel/arch/loongarch64/src/lib/include/uapi/asm/infect-types.h + create mode 100644 compel/arch/loongarch64/src/lib/include/uapi/asm/sigframe.h + create mode 100644 compel/arch/loongarch64/src/lib/infect.c + +diff --git a/Makefile b/Makefile +index a5c6c5bccf..9a297d2d83 100644 +--- a/Makefile ++++ b/Makefile +@@ -19,7 +19,7 @@ endif + + # + # Supported Architectures +-ifneq ($(filter-out x86 arm aarch64 ppc64 s390 mips,$(ARCH)),) ++ifneq ($(filter-out x86 arm aarch64 ppc64 s390 mips loongarch64,$(ARCH)),) + $(error "The architecture $(ARCH) isn't supported") + endif + +@@ -80,6 +80,10 @@ ifeq ($(ARCH),mips) + DEFINES := -DCONFIG_MIPS + endif + ++ifeq ($(ARCH),loongarch64) ++ DEFINES := -DCONFIG_LOONGARCH64 ++endif ++ + # + # CFLAGS_PIE: + # +@@ -122,6 +126,10 @@ ifeq ($(ARCH),mips) + WARNINGS := -rdynamic + endif + ++ifeq ($(ARCH),loongarch64) ++WARNINGS := -Wno-implicit-function-declaration ++endif ++ + ifneq ($(GCOV),) + LDFLAGS += -lgcov + CFLAGS += $(CFLAGS-GCOV) +diff --git a/compel/Makefile b/compel/Makefile +index b79aee6871..78ec4826af 100644 +--- a/compel/Makefile ++++ b/compel/Makefile +@@ -33,7 +33,7 @@ lib-y += arch/$(ARCH)/src/lib/thread_area.o + endif + + # handle_elf() has no support of ELF relocations on ARM (yet?) +-ifneq ($(filter arm aarch64,$(ARCH)),) ++ifneq ($(filter arm aarch64 loongarch64,$(ARCH)),) + CFLAGS += -DNO_RELOCS + HOSTCFLAGS += -DNO_RELOCS + endif +diff --git a/compel/arch/loongarch64/plugins/include/asm/prologue.h b/compel/arch/loongarch64/plugins/include/asm/prologue.h +new file mode 100644 +index 0000000000..c19ce54d7a +--- /dev/null ++++ b/compel/arch/loongarch64/plugins/include/asm/prologue.h +@@ -0,0 +1,35 @@ ++#ifndef __ASM_PROLOGUE_H__ ++#define __ASM_PROLOGUE_H__ ++ ++#ifndef __ASSEMBLY__ ++ ++#include ++#include ++#include ++ ++#include ++ ++#define sys_recv(sockfd, ubuf, size, flags) sys_recvfrom(sockfd, ubuf, size, flags, NULL, NULL) ++ ++typedef struct prologue_init_args { ++ struct sockaddr_un ctl_sock_addr; ++ unsigned int ctl_sock_addr_len; ++ ++ unsigned int arg_s; ++ void *arg_p; ++ ++ void *sigframe; ++} prologue_init_args_t; ++ ++#endif /* __ASSEMBLY__ */ ++ ++/* ++ * Reserve enough space for sigframe. ++ * ++ * FIXME It is rather should be taken from sigframe header. ++ */ ++#define PROLOGUE_SGFRAME_SIZE 4096 ++ ++#define PROLOGUE_INIT_ARGS_SIZE 1024 ++ ++#endif /* __ASM_PROLOGUE_H__ */ +diff --git a/compel/arch/loongarch64/plugins/include/asm/syscall-types.h b/compel/arch/loongarch64/plugins/include/asm/syscall-types.h +new file mode 100644 +index 0000000000..b883bd8bed +--- /dev/null ++++ b/compel/arch/loongarch64/plugins/include/asm/syscall-types.h +@@ -0,0 +1,30 @@ ++#ifndef COMPEL_ARCH_SYSCALL_TYPES_H__ ++#define COMPEL_ARCH_SYSCALL_TYPES_H__ ++ ++#include ++/* Types for sigaction, sigprocmask syscalls */ ++typedef void rt_signalfn_t(int, siginfo_t *, void *); ++typedef rt_signalfn_t *rt_sighandler_t; ++ ++typedef void rt_restorefn_t(void); ++typedef rt_restorefn_t *rt_sigrestore_t; ++ ++/* refer to arch/loongarch/include/uapi/asm/signal.h */ ++#define _KNSIG 64 ++#define _NSIG_BPW BITS_PER_LONG ++#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW) ++ ++typedef struct { ++ uint64_t sig[_KNSIG_WORDS]; ++} k_rtsigset_t; ++ ++typedef struct { ++ rt_sighandler_t rt_sa_handler; ++ unsigned long rt_sa_flags; ++ rt_sigrestore_t rt_sa_restorer; ++ k_rtsigset_t rt_sa_mask; ++} rt_sigaction_t; ++ ++#define SA_RESTORER 0x04000000 ++ ++#endif /* COMPEL_ARCH_SYSCALL_TYPES_H__ */ +diff --git a/compel/arch/loongarch64/plugins/include/features.h b/compel/arch/loongarch64/plugins/include/features.h +new file mode 100644 +index 0000000000..b4a3cded2b +--- /dev/null ++++ b/compel/arch/loongarch64/plugins/include/features.h +@@ -0,0 +1,4 @@ ++#ifndef __COMPEL_ARCH_FEATURES_H ++#define __COMPEL_ARCH_FEATURES_H ++ ++#endif /* __COMPEL_ARCH_FEATURES_H */ +diff --git a/compel/arch/loongarch64/plugins/std/parasite-head.S b/compel/arch/loongarch64/plugins/std/parasite-head.S +new file mode 100644 +index 0000000000..3a960490eb +--- /dev/null ++++ b/compel/arch/loongarch64/plugins/std/parasite-head.S +@@ -0,0 +1,9 @@ ++ ++#include "common/asm/linkage.h" ++ ++ .section .head.text, "ax" ++ENTRY(__export_parasite_head_start) ++ bl parasite_service; ++ break 0; ++END(__export_parasite_head_start) ++ +diff --git a/compel/arch/loongarch64/plugins/std/syscalls/Makefile.syscalls b/compel/arch/loongarch64/plugins/std/syscalls/Makefile.syscalls +new file mode 100644 +index 0000000000..0d08f34e1d +--- /dev/null ++++ b/compel/arch/loongarch64/plugins/std/syscalls/Makefile.syscalls +@@ -0,0 +1,117 @@ ++std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/syscalls-64.o ++sys-proto-types := $(obj)/include/uapi/std/syscall-types.h ++sys-proto-generic := $(obj)/include/uapi/std/syscall.h ++sys-codes-generic := $(obj)/include/uapi/std/syscall-codes.h ++sys-codes = $(obj)/include/uapi/std/syscall-codes-$(1).h ++sys-proto = $(obj)/include/uapi/std/syscall-$(1).h ++sys-def = $(PLUGIN_ARCH_DIR)/std/syscalls/syscall_$(1).tbl ++sys-asm = $(PLUGIN_ARCH_DIR)/std/syscalls-$(1).S ++sys-asm-common-name = std/syscalls/syscall-common-loongarch-$(1).S ++sys-asm-common = $(PLUGIN_ARCH_DIR)/$(sys-asm-common-name) ++sys-asm-types := $(obj)/include/uapi/std/asm/syscall-types.h ++sys-exec-tbl = $(PLUGIN_ARCH_DIR)/std/sys-exec-tbl-$(1).c ++ ++sys-bits := 64 ++ ++AV := $$$$ ++ ++define gen-rule-sys-codes ++$(sys-codes): $(sys-def) $(sys-proto-types) ++ $(call msg-gen, $$@) ++ $(Q) echo "/* Autogenerated, don't edit */" > $$@ ++ $(Q) echo "#ifndef ASM_SYSCALL_CODES_H_$(1)__" >> $$@ ++ $(Q) echo "#define ASM_SYSCALL_CODES_H_$(1)__" >> $$@ ++ $(Q) cat $$< | awk '/^__NR/{SYSN=$(AV)1; \ ++ sub("^__NR", "SYS", SYSN); \ ++ print "\n#ifndef ", $(AV)1; \ ++ print "#define", $(AV)1, $(AV)2; \ ++ print "#endif"; \ ++ print "\n#ifndef ", SYSN; \ ++ print "#define ", SYSN, $(AV)1; \ ++ print "#endif";}' >> $$@ ++ $(Q) echo "#endif /* ASM_SYSCALL_CODES_H_$(1)__ */" >> $$@ ++endef ++ ++define gen-rule-sys-proto ++$(sys-proto): $(sys-def) $(sys-proto-types) ++ $(call msg-gen, $$@) ++ $(Q) echo "/* Autogenerated, don't edit */" > $$@ ++ $(Q) echo "#ifndef ASM_SYSCALL_PROTO_H_$(1)__" >> $$@ ++ $(Q) echo "#define ASM_SYSCALL_PROTO_H_$(1)__" >> $$@ ++ $(Q) echo '#include ' >> $$@ ++ $(Q) echo '#include ' >> $$@ ++ifeq ($(1),32) ++ $(Q) echo '#include "asm/syscall32.h"' >> $$@ ++endif ++ $(Q) cat $$< | awk '/^__NR/{print "extern long", $(AV)3, \ ++ substr($(AV)0, index($(AV)0,$(AV)4)), ";"}' >> $$@ ++ $(Q) echo "#endif /* ASM_SYSCALL_PROTO_H_$(1)__ */" >> $$@ ++endef ++ ++define gen-rule-sys-asm ++$(sys-asm): $(sys-def) $(sys-asm-common) $(sys-codes) $(sys-proto) $(sys-proto-types) ++ $(call msg-gen, $$@) ++ $(Q) echo "/* Autogenerated, don't edit */" > $$@ ++ $(Q) echo '#include ' >> $$@ ++ $(Q) echo '#include "$(sys-asm-common-name)"' >> $$@ ++ $(Q) cat $$< | awk '/^__NR/{print "SYSCALL(", $(AV)3, ",", $(AV)2, ")"}' >> $$@ ++endef ++ ++define gen-rule-sys-exec-tbl ++$(sys-exec-tbl): $(sys-def) $(sys-codes) $(sys-proto) $(sys-proto-generic) $(sys-proto-types) ++ $(call msg-gen, $$@) ++ $(Q) echo "/* Autogenerated, don't edit */" > $$@ ++ $(Q) cat $$< | awk '/^__NR/{print \ ++ "SYSCALL(", substr($(AV)3, 5), ",", $(AV)2, ")"}' >> $$@ ++endef ++ ++$(sys-codes-generic): $(sys-proto-types) ++ $(call msg-gen, $@) ++ $(Q) echo "/* Autogenerated, don't edit */" > $@ ++ $(Q) echo "#ifndef __ASM_CR_SYSCALL_CODES_H__" >> $@ ++ $(Q) echo "#define __ASM_CR_SYSCALL_CODES_H__" >> $@ ++ $(Q) echo '#include ' >> $@ ++ $(Q) cat $< | awk '/^__NR/{NR32=$$1; \ ++ sub("^__NR", "__NR32", NR32); \ ++ print "\n#ifndef ", NR32; \ ++ print "#define ", NR32, $$2; \ ++ print "#endif";}' >> $@ ++ $(Q) echo "#endif /* __ASM_CR_SYSCALL_CODES_H__ */" >> $@ ++mrproper-y += $(sys-codes-generic) ++ ++$(sys-proto-generic): $(strip $(call map,sys-proto,$(sys-bits))) $(sys-proto-types) ++ $(call msg-gen, $@) ++ $(Q) echo "/* Autogenerated, don't edit */" > $@ ++ $(Q) echo "#ifndef __ASM_CR_SYSCALL_PROTO_H__" >> $@ ++ $(Q) echo "#define __ASM_CR_SYSCALL_PROTO_H__" >> $@ ++ $(Q) echo "" >> $@ ++ $(Q) echo '#include ' >> $@ ++ $(Q) echo "" >> $@ ++ $(Q) echo "#endif /* __ASM_CR_SYSCALL_PROTO_H__ */" >> $@ ++mrproper-y += $(sys-proto-generic) ++ ++define gen-rule-sys-exec-tbl ++$(sys-exec-tbl): $(sys-def) $(sys-codes) $(sys-proto) $(sys-proto-generic) ++ $(call msg-gen, $$@) ++ $(Q) echo "/* Autogenerated, don't edit */" > $$@ ++ $(Q) cat $$< | awk '/^__NR/{print \ ++ "SYSCALL(", substr($(AV)3, 5), ",", $(AV)2, ")"}' >> $$@ ++endef ++ ++$(eval $(call map,gen-rule-sys-codes,$(sys-bits))) ++$(eval $(call map,gen-rule-sys-proto,$(sys-bits))) ++$(eval $(call map,gen-rule-sys-asm,$(sys-bits))) ++$(eval $(call map,gen-rule-sys-exec-tbl,$(sys-bits))) ++ ++$(sys-asm-types): $(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h ++ $(call msg-gen, $@) ++ $(Q) ln -s ../../../../../../$(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h $(sys-asm-types) ++ ++std-headers-deps += $(call sys-codes,$(sys-bits)) ++std-headers-deps += $(call sys-proto,$(sys-bits)) ++std-headers-deps += $(call sys-asm,$(sys-bits)) ++std-headers-deps += $(call sys-exec-tbl,$(sys-bits)) ++std-headers-deps += $(sys-codes-generic) ++std-headers-deps += $(sys-proto-generic) ++std-headers-deps += $(sys-asm-types) ++mrproper-y += $(std-headers-deps) +diff --git a/compel/arch/loongarch64/plugins/std/syscalls/syscall-common-loongarch-64.S b/compel/arch/loongarch64/plugins/std/syscalls/syscall-common-loongarch-64.S +new file mode 100644 +index 0000000000..fff8944669 +--- /dev/null ++++ b/compel/arch/loongarch64/plugins/std/syscalls/syscall-common-loongarch-64.S +@@ -0,0 +1,44 @@ ++#include "common/asm/linkage.h" ++ ++#define SYSCALL(name, opcode) \ ++ENTRY(name); \ ++ addi.d $a7, $zero, opcode; \ ++ syscall 0; \ ++ jirl $r0, $r1, 0; \ ++END(name) ++ ++#ifndef AT_FDCWD ++#define AT_FDCWD -100 ++#endif ++ ++#ifndef AT_REMOVEDIR ++#define AT_REMOVEDIR 0x200 ++#endif ++ ++ENTRY(sys_open) ++ or $a3, $zero, $a2 ++ or $a2, $zero, $a1 ++ or $a1, $zero, $a0 ++ addi.d $a0, $zero, AT_FDCWD ++ b sys_openat ++END(sys_open) ++ ++ENTRY(sys_mkdir) ++ or $a3, $zero, $a2 ++ or $a2, $zero, $a1 ++ or $a1, $zero, $a0 ++ addi.d $a0, $zero, AT_FDCWD ++ b sys_mkdirat ++END(sys_mkdir) ++ ++ENTRY(sys_rmdir) ++ addi.d $a2, $zero, AT_REMOVEDIR ++ or $a1, $zero, $a0 ++ addi.d $a0, $zero, AT_FDCWD ++ b sys_unlinkat ++END(sys_rmdir) ++ ++ENTRY(__cr_restore_rt) ++ addi.d $a7, $zero, __NR_rt_sigreturn ++ syscall 0 ++END(__cr_restore_rt) +diff --git a/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl b/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl +new file mode 100644 +index 0000000000..b37a22674e +--- /dev/null ++++ b/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl +@@ -0,0 +1,121 @@ ++# ++# System calls table, please make sure the table consist only the syscalls ++# really used somewhere in project. ++# from kernel/linux-3.10.84/arch/mips/include/uapi/asm/unistd.h Linux 64-bit syscalls are in the range from 5000 to 5999. ++# ++# __NR_name code name arguments ++# ------------------------------------------------------------------------------------------------------------------------------------------------------------- ++__NR_io_setup 0 sys_io_setup (unsigned nr_events, aio_context_t *ctx) ++__NR_io_submit 2 sys_io_submit (aio_context_t ctx, long nr, struct iocb **iocbpp) ++__NR_io_getevents 4 sys_io_getevents (aio_context_t ctx, long min_nr, long nr, struct io_event *evs, struct timespec *tmo) ++__NR_fcntl 25 sys_fcntl (int fd, int type, long arg) ++__NR_ioctl 29 sys_ioctl (unsigned int fd, unsigned int cmd, unsigned long arg) ++__NR_flock 32 sys_flock (int fd, unsigned long cmd) ++__NR_mkdirat 34 sys_mkdirat (int dfd, const char *pathname, int flag) ++__NR_unlinkat 35 sys_unlinkat (int dfd, const char *pathname, int flag) ++__NR_umount2 39 sys_umount2 (char *name, int flags) ++__NR_mount 40 sys_mount (char *dev_nmae, char *dir_name, char *type, unsigned long flags, void *data) ++__NR_fallocate 47 sys_fallocate (int fd, int mode, loff_t offset, loff_t len) ++__NR_close 57 sys_close (int fd) ++__NR_openat 56 sys_openat (int dfd, const char *filename, int flags, int mode) ++__NR_lseek 62 sys_lseek (int fd, unsigned long offset, unsigned long origin) ++__NR_read 63 sys_read (int fd, void *buf, unsigned long count) ++__NR_write 64 sys_write (int fd, const void *buf, unsigned long count) ++__NR_pread64 67 sys_pread (unsigned int fd, char *buf, size_t count, loff_t pos) ++__NR_preadv 69 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h) ++__NR_ppoll 73 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) ++__NR_signalfd4 74 sys_signalfd (int fd, k_rtsigset_t *mask, size_t sizemask, int flags) ++__NR_vmsplice 75 sys_vmsplice (int fd, const struct iovec *iov, unsigned long nr_segs, unsigned int flags) ++__NR_readlinkat 78 sys_readlinkat (int fd, const char *path, char *buf, int bufsize) ++__NR_timerfd_settime 86 sys_timerfd_settime (int ufd, int flags, const struct itimerspec *utmr, struct itimerspec *otmr) ++__NR_capget 90 sys_capget (struct cap_header *h, struct cap_data *d) ++__NR_capset 91 sys_capset (struct cap_header *h, struct cap_data *d) ++__NR_personality 92 sys_personality (unsigned int personality) ++__NR_exit 93 sys_exit (unsigned long error_code) ++__NR_exit_group 94 sys_exit_group (int error_code) ++__NR_waitid 95 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru) ++__NR_set_tid_address 96 sys_set_tid_address (int *tid_addr) ++__NR_futex 98 sys_futex (uint32_t *uaddr, int op, uint32_t val, struct timespec *utime, uint32_t *uaddr2, uint32_t val3) ++__NR_set_robust_list 99 sys_set_robust_list (struct robust_list_head *head, size_t len) ++__NR_get_robust_list 100 sys_get_robust_list (int pid, struct robust_list_head **head_ptr, size_t *len_ptr) ++__NR_nanosleep 101 sys_nanosleep (struct timespec *req, struct timespec *rem) ++__NR_getitimer 102 sys_getitimer (int which, const struct itimerval *val) ++__NR_setitimer 103 sys_setitimer (int which, const struct itimerval *val, struct itimerval *old) ++__NR_sys_timer_create 107 sys_timer_create (clockid_t which_clock, struct sigevent *timer_event_spec, kernel_timer_t *created_timer_id) ++__NR_sys_timer_gettime 108 sys_timer_gettime (int timer_id, const struct itimerspec *setting) ++__NR_sys_timer_getoverrun 109 sys_timer_getoverrun (int timer_id) ++__NR_sys_timer_settime 110 sys_timer_settime (kernel_timer_t timer_id, int flags, const struct itimerspec *new_setting, struct itimerspec *old_setting) ++__NR_sys_timer_delete 111 sys_timer_delete (kernel_timer_t timer_id) ++__NR_clock_gettime 113 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp) ++__NR_sched_setscheduler 119 sys_sched_setscheduler (int pid, int policy, struct sched_param *p) ++__NR_restart_syscall 128 sys_restart_syscall (void) ++__NR_kill 129 sys_kill (long pid, int sig) ++__NR_sigaltstack 132 sys_sigaltstack (const void *uss, void *uoss) ++__NR_rt_sigaction 134 sys_sigaction (int signum, const rt_sigaction_t *act, rt_sigaction_t *oldact, size_t sigsetsize) ++__NR_rt_sigprocmask 135 sys_sigprocmask (int how, k_rtsigset_t *set, k_rtsigset_t *old, size_t sigsetsize) ++__NR_rt_sigqueueinfo 138 sys_rt_sigqueueinfo (pid_t pid, int sig, siginfo_t *info) ++__NR_rt_sigreturn 139 sys_rt_sigreturn (void) ++__NR_setpriority 140 sys_setpriority (int which, int who, int nice) ++__NR_setresuid 147 sys_setresuid (int uid, int euid, int suid) ++__NR_getresuid 148 sys_getresuid (int *uid, int *euid, int *suid) ++__NR_setresgid 149 sys_setresgid (int gid, int egid, int sgid) ++__NR_getresgid 150 sys_getresgid (int *gid, int *egid, int *sgid) ++__NR_getpgid 155 sys_getpgid (pid_t pid) ++__NR_setfsuid 151 sys_setfsuid (int fsuid) ++__NR_setfsgid 152 sys_setfsgid (int fsgid) ++__NR_getsid 156 sys_getsid (void) ++__NR_getgroups 158 sys_getgroups (int gsize, unsigned int *groups) ++__NR_setgroups 159 sys_setgroups (int gsize, unsigned int *groups) ++__NR_setrlimit 164 sys_setrlimit (int resource, struct krlimit *rlim) ++__NR_umask 166 sys_umask (int mask) ++__NR_prctl 167 sys_prctl (int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) ++__NR_gettimeofday 169 sys_gettimeofday (struct timeval *tv, struct timezone *tz) ++__NR_getpid 172 sys_getpid (void) ++__NR_ptrace 177 sys_ptrace (long request, pid_t pid, void *addr, void *data) ++__NR_gettid 178 sys_gettid (void) ++__NR_shmat 196 sys_shmat (int shmid, void *shmaddr, int shmflag) ++__NR_socket 198 sys_socket (int domain, int type, int protocol) ++__NR_bind 200 sys_bind (int sockfd, const struct sockaddr *addr, int addrlen) ++__NR_connect 203 sys_connect (int sockfd, struct sockaddr *addr, int addrlen) ++__NR_sendto 206 sys_sendto (int sockfd, void *buff, size_t len, unsigned int flags, struct sockaddr *addr, int addr_len) ++__NR_recvfrom 207 sys_recvfrom (int sockfd, void *ubuf, size_t size, unsigned int flags, struct sockaddr *addr, int *addr_len) ++__NR_setsockopt 208 sys_setsockopt (int sockfd, int level, int optname, const void *optval, socklen_t optlen) ++__NR_getsockopt 209 sys_getsockopt (int sockfd, int level, int optname, const void *optval, socklen_t *optlen) ++__NR_shutdown 210 sys_shutdown (int sockfd, int how) ++__NR_sendmsg 211 sys_sendmsg (int sockfd, const struct msghdr *msg, int flags) ++__NR_recvmsg 212 sys_recvmsg (int sockfd, struct msghdr *msg, int flags) ++__NR_brk 214 sys_brk (void *addr) ++__NR_munmap 215 sys_munmap (void *addr, unsigned long len) ++__NR_mremap 216 sys_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr) ++__NR_clone 220 sys_clone (unsigned long flags, void *child_stack, void *parent_tid, unsigned long newtls, void *child_tid) ++__NR_mmap 222 sys_mmap (void *addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long offset) ++__NR_mprotect 226 sys_mprotect (const void *addr, unsigned long len, unsigned long prot) ++__NR_mincore 232 sys_mincore (void *addr, unsigned long size, unsigned char *vec) ++__NR_madvise 233 sys_madvise (unsigned long start, size_t len, int behavior) ++__NR_rt_tgsigqueueinfo 240 sys_rt_tgsigqueueinfo (pid_t tgid, pid_t pid, int sig, siginfo_t *info) ++__NR_wait4 260 sys_wait4 (int pid, int *status, int options, struct rusage *ru) ++__NR_fanotify_init 262 sys_fanotify_init (unsigned int flags, unsigned int event_f_flags) ++__NR_fanotify_mark 263 sys_fanotify_mark (int fanotify_fd, unsigned int flags, uint64_t mask, int dfd, const char *pathname) ++__NR_open_by_handle_at 265 sys_open_by_handle_at (int mountdirfd, struct file_handle *handle, int flags) ++__NR_setns 268 sys_setns (int fd, int nstype) ++__NR_kcmp 272 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2) ++__NR_seccomp 277 sys_seccomp (unsigned int op, unsigned int flags, const char *uargs) ++__NR_memfd_create 279 sys_memfd_create (const char *name, unsigned int flags) ++__NR_userfaultfd 282 sys_userfaultfd (int flags) ++__NR_rseq 293 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) ++__NR_open_tree 428 sys_open_tree (int dirfd, const char *pathname, unsigned int flags) ++__NR_move_mount 429 sys_move_mount (int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, int flags) ++__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) ++__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) ++__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) ++__NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags) ++__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) ++__NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how, size_t size) ++__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags) ++#__NR_dup2 ! sys_dup2 (int oldfd, int newfd) ++#__NR_rmdir ! sys_rmdir (const char *name) ++#__NR_unlink ! sys_unlink (char *pathname) ++#__NR_cacheflush ! sys_cacheflush (char *addr, int nbytes, int cache) ++#__NR_set_thread_area ! sys_set_thread_area (unsigned long *addr) ++#__NR_mkdir ! sys_mkdir (const char *name, int mode) ++#__NR_open ! sys_open (const char *filename, unsigned long flags, unsigned long mode) +diff --git a/compel/arch/loongarch64/scripts/compel-pack.lds.S b/compel/arch/loongarch64/scripts/compel-pack.lds.S +new file mode 100644 +index 0000000000..cfb7a2fb35 +--- /dev/null ++++ b/compel/arch/loongarch64/scripts/compel-pack.lds.S +@@ -0,0 +1,32 @@ ++OUTPUT_ARCH(loongarch) ++EXTERN(__export_parasite_head_start) ++ ++SECTIONS ++{ ++ .crblob 0x0 : { ++ *(.head.text) ++ ASSERT(DEFINED(__export_parasite_head_start), ++ "Symbol __export_parasite_head_start is missing"); ++ *(.text*) ++ . = ALIGN(32); ++ *(.data*) ++ . = ALIGN(32); ++ *(.rodata*) ++ . = ALIGN(32); ++ *(.bss*) ++ . = ALIGN(32); ++ *(.got*) ++ . = ALIGN(32); ++ *(.toc*) ++ . = ALIGN(32); ++ } =0x00000000, ++ ++ /DISCARD/ : { ++ *(.debug*) ++ *(.comment*) ++ *(.note*) ++ *(.group*) ++ *(.eh_frame*) ++ *(*) ++ } ++} +diff --git a/compel/arch/loongarch64/src/lib/cpu.c b/compel/arch/loongarch64/src/lib/cpu.c +new file mode 100644 +index 0000000000..172b90e275 +--- /dev/null ++++ b/compel/arch/loongarch64/src/lib/cpu.c +@@ -0,0 +1,41 @@ ++#include ++#include ++ ++#include "compel-cpu.h" ++#include "common/bitops.h" ++#include "common/compiler.h" ++#include "log.h" ++ ++#undef LOG_PREFIX ++#define LOG_PREFIX "cpu: " ++ ++static compel_cpuinfo_t rt_info; ++static bool rt_info_done = false; ++ ++void compel_set_cpu_cap(compel_cpuinfo_t *c, unsigned int feature) ++{ ++} ++ ++void compel_clear_cpu_cap(compel_cpuinfo_t *c, unsigned int feature) ++{ ++} ++ ++int compel_test_cpu_cap(compel_cpuinfo_t *c, unsigned int feature) ++{ ++ return 0; ++} ++ ++int compel_cpuid(compel_cpuinfo_t *c) ++{ ++ return 0; ++} ++ ++bool compel_cpu_has_feature(unsigned int feature) ++{ ++ if (!rt_info_done) { ++ compel_cpuid(&rt_info); ++ rt_info_done = true; ++ } ++ ++ return compel_test_cpu_cap(&rt_info, feature); ++} +diff --git a/compel/arch/loongarch64/src/lib/handle-elf-host.c b/compel/arch/loongarch64/src/lib/handle-elf-host.c +new file mode 100644 +index 0000000000..a605a5a452 +--- /dev/null ++++ b/compel/arch/loongarch64/src/lib/handle-elf-host.c +@@ -0,0 +1,22 @@ ++#include ++#include ++ ++#include "handle-elf.h" ++#include "piegen.h" ++#include "log.h" ++ ++static const unsigned char __maybe_unused elf_ident_64_le[EI_NIDENT] = { ++ 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, /* clang-format */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++}; ++ ++extern int __handle_elf(void *mem, size_t size); ++ ++int handle_binary(void *mem, size_t size) ++{ ++ if (memcmp(mem, elf_ident_64_le, sizeof(elf_ident_64_le)) == 0) ++ return __handle_elf(mem, size); ++ ++ pr_err("Unsupported Elf format detected\n"); ++ return -EINVAL; ++} +diff --git a/compel/arch/loongarch64/src/lib/handle-elf.c b/compel/arch/loongarch64/src/lib/handle-elf.c +new file mode 100644 +index 0000000000..a605a5a452 +--- /dev/null ++++ b/compel/arch/loongarch64/src/lib/handle-elf.c +@@ -0,0 +1,22 @@ ++#include ++#include ++ ++#include "handle-elf.h" ++#include "piegen.h" ++#include "log.h" ++ ++static const unsigned char __maybe_unused elf_ident_64_le[EI_NIDENT] = { ++ 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, /* clang-format */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++}; ++ ++extern int __handle_elf(void *mem, size_t size); ++ ++int handle_binary(void *mem, size_t size) ++{ ++ if (memcmp(mem, elf_ident_64_le, sizeof(elf_ident_64_le)) == 0) ++ return __handle_elf(mem, size); ++ ++ pr_err("Unsupported Elf format detected\n"); ++ return -EINVAL; ++} +diff --git a/compel/arch/loongarch64/src/lib/include/handle-elf.h b/compel/arch/loongarch64/src/lib/include/handle-elf.h +new file mode 100644 +index 0000000000..b0a66ef879 +--- /dev/null ++++ b/compel/arch/loongarch64/src/lib/include/handle-elf.h +@@ -0,0 +1,8 @@ ++#ifndef COMPEL_HANDLE_ELF_H__ ++#define COMPEL_HANDLE_ELF_H__ ++ ++#include "elf64-types.h" ++ ++#define arch_is_machine_supported(e_machine) (e_machine == EM_LOONGARCH) ++ ++#endif /* COMPEL_HANDLE_ELF_H__ */ +diff --git a/compel/arch/loongarch64/src/lib/include/syscall.h b/compel/arch/loongarch64/src/lib/include/syscall.h +new file mode 100644 +index 0000000000..ac3e2799ac +--- /dev/null ++++ b/compel/arch/loongarch64/src/lib/include/syscall.h +@@ -0,0 +1,8 @@ ++#ifndef __COMPEL_SYSCALL_H__ ++#define __COMPEL_SYSCALL_H__ ++ ++#ifndef SIGSTKFLT ++#define SIGSTKFLT 16 ++#endif ++ ++#endif +diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/breakpoints.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/breakpoints.h +new file mode 100644 +index 0000000000..21eb1309f2 +--- /dev/null ++++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/breakpoints.h +@@ -0,0 +1,6 @@ ++#ifndef __COMPEL_BREAKPOINTS_H__ ++#define __COMPEL_BREAKPOINTS_H__ ++#define ARCH_SI_TRAP TRAP_BRKPT ++extern int ptrace_set_breakpoint(pid_t pid, void *addr); ++extern int ptrace_flush_breakpoints(pid_t pid); ++#endif +diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/cpu.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/cpu.h +new file mode 100644 +index 0000000000..e568df789c +--- /dev/null ++++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/cpu.h +@@ -0,0 +1,6 @@ ++#ifndef __CR_ASM_CPU_H__ ++#define __CR_ASM_CPU_H__ ++ ++typedef struct { ++} compel_cpuinfo_t; ++#endif /* __CR_ASM_CPU_H__ */ +diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/fpu.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/fpu.h +new file mode 100644 +index 0000000000..7f476d541a +--- /dev/null ++++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/fpu.h +@@ -0,0 +1,4 @@ ++#ifndef __CR_ASM_FPU_H__ ++#define __CR_ASM_FPU_H__ ++ ++#endif /* __CR_ASM_FPU_H__ */ +diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/infect-types.h +new file mode 100644 +index 0000000000..0b047a5b08 +--- /dev/null ++++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/infect-types.h +@@ -0,0 +1,67 @@ ++#ifndef UAPI_COMPEL_ASM_TYPES_H__ ++#define UAPI_COMPEL_ASM_TYPES_H__ ++ ++#include ++ ++#define SIGMAX 64 ++#define SIGMAX_OLD 31 ++ ++/* ++ * From the Linux kernel header arch/loongarch/include/uapi/asm/ptrace.h ++ * ++ * A thread LoongArch CPU context ++ * ++ * struct user_fp_state { ++ * uint64_t fpr[32]; ++ * uint64_t fcc; ++ * uint32_t fcsr; ++ * }; ++ * ++ * struct user_pt_regs { ++ * unsigned long regs[32]; ++ * unsigned long csr_era; ++ * unsigned long csr_badv; ++ * unsigned long reserved[11]; ++ * }; ++ */ ++ ++struct user_gp_regs { ++ uint64_t regs[32]; ++ uint64_t orig_a0; ++ uint64_t pc; ++ uint64_t csr_badv; ++ uint64_t reserved[10]; ++} __attribute__((aligned(8))); ++ ++struct user_fp_regs { ++ uint64_t regs[32]; ++ uint64_t fcc; ++ uint32_t fcsr; ++}; ++ ++typedef struct user_gp_regs user_regs_struct_t; ++typedef struct user_fp_regs user_fpregs_struct_t; ++ ++#define user_regs_native(regs) true ++ ++#define __compel_arch_fetch_thread_area(tid, th) 0 ++#define compel_arch_fetch_thread_area(tctl) 0 ++#define compel_arch_get_tls_task(ctl, tls) ++#define compel_arch_get_tls_thread(tctl, tls) ++ ++#define REG_RES(r) ((uint64_t)(r).regs[4]) ++#define REG_IP(r) ((uint64_t)(r).pc) ++#define REG_SP(r) ((uint64_t)(r).regs[3]) ++#define REG_SYSCALL_NR(r) ((uint64_t)(r).regs[11]) ++#define SET_REG_IP(r, val) ((r).pc = (val)) ++ ++#define GPR_NUM 32 ++#define FPR_NUM 32 ++ ++#define __NR(syscall, compat) \ ++ ({ \ ++ (void)compat; \ ++ __NR_##syscall; \ ++ }) ++ ++#endif /* UAPI_COMPEL_ASM_TYPES_H__ */ +diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/sigframe.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/sigframe.h +new file mode 100644 +index 0000000000..fcb545a1d2 +--- /dev/null ++++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/sigframe.h +@@ -0,0 +1,86 @@ ++#ifndef UAPI_COMPEL_ASM_SIGFRAME_H__ ++#define UAPI_COMPEL_ASM_SIGFRAME_H__ ++ ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include ++ ++#define rt_sigcontext sigcontext ++/* sigcontext defined in usr/include/uapi/asm/sigcontext.h*/ ++#include ++typedef __u32 u32; ++ ++typedef struct sigcontext_t { ++ __u64 pc; ++ __u64 regs[32]; ++ __u32 flags; ++ __u64 extcontext[0] __attribute__((__aligned__(16))); ++} sigcontext_t; ++ ++typedef struct context_info_t { ++ __u32 magic; ++ __u32 size; ++ __u64 padding; ++} context_info_t; ++ ++#define FPU_CTX_MAGIC 0x46505501 ++#define FPU_CTX_ALIGN 8 ++typedef struct fpu_context_t { ++ __u64 regs[32]; ++ __u64 fcc; ++ __u64 fcsr; ++} fpu_context_t; ++ ++typedef struct ucontext { ++ unsigned long uc_flags; ++ struct ucontext *uc_link; ++ stack_t uc_stack; ++ sigset_t uc_sigmask; ++ __u8 __unused[1024 / 8 - sizeof(sigset_t)]; ++ sigcontext_t uc_mcontext; ++} ucontext; ++ ++/* Copy from the kernel source arch/loongarch/kernel/signal.c */ ++struct rt_sigframe { ++ rt_siginfo_t rs_info; ++ ucontext rs_uc; ++}; ++ ++#define RT_SIGFRAME_UC(rt_sigframe) (&(rt_sigframe->rs_uc)) ++#define RT_SIGFRAME_SIGMASK(rt_sigframe) ((k_rtsigset_t *)&RT_SIGFRAME_UC(rt_sigframe)->uc_sigmask) ++#define RT_SIGFRAME_SIGCTX(rt_sigframe) (&(RT_SIGFRAME_UC(rt_sigframe)->uc_mcontext)) ++#define RT_SIGFRAME_REGIP(rt_sigframe) ((long unsigned int)(RT_SIGFRAME_SIGCTX(rt_sigframe)->pc)) ++#define RT_SIGFRAME_HAS_FPU(rt_sigframe) (1) ++ ++#define RT_SIGFRAME_FPU(rt_sigframe) \ ++ ({ \ ++ context_info_t *ctx = (context_info_t *)RT_SIGFRAME_SIGCTX(rt_sigframe)->extcontext; \ ++ ctx->magic = FPU_CTX_MAGIC; \ ++ ctx->size = sizeof(context_info_t) + sizeof(fpu_context_t); \ ++ (fpu_context_t *)((char *)ctx + sizeof(context_info_t)); \ ++ }) ++ ++#define RT_SIGFRAME_OFFSET(rt_sigframe) 0 ++ ++/* clang-format off */ ++#define ARCH_RT_SIGRETURN(new_sp, rt_sigframe) \ ++ asm volatile( \ ++ "addi.d $sp, %0, 0 \n" \ ++ "addi.d $a7, $zero, "__stringify(__NR_rt_sigreturn)" \n" \ ++ "syscall 0" \ ++ : \ ++ :"r"(new_sp) \ ++ : "$a7", "memory") ++/* clang-format on */ ++ ++int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe); ++ ++#define rt_sigframe_erase_sigset(sigframe) memset(RT_SIGFRAME_SIGMASK(sigframe), 0, sizeof(k_rtsigset_t)) ++#define rt_sigframe_copy_sigset(sigframe, from) memcpy(RT_SIGFRAME_SIGMASK(sigframe), from, sizeof(k_rtsigset_t)) ++ ++#endif /* UAPI_COMPEL_ASM_SIGFRAME_H__ */ +diff --git a/compel/arch/loongarch64/src/lib/infect.c b/compel/arch/loongarch64/src/lib/infect.c +new file mode 100644 +index 0000000000..8e3c19aff2 +--- /dev/null ++++ b/compel/arch/loongarch64/src/lib/infect.c +@@ -0,0 +1,204 @@ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include "errno.h" ++#include ++#include ++#include "common/err.h" ++#include "common/page.h" ++#include "asm/infect-types.h" ++#include "ptrace.h" ++#include "infect.h" ++#include "infect-priv.h" ++#include "log.h" ++#include "common/bug.h" ++ ++/* ++ * Injected syscall instruction ++ * loongarch64 is Little Endian ++ */ ++const char code_syscall[] = { ++ 0x00, 0x00, 0x2b, 0x00, /* syscall */ ++ 0x00, 0x00, 0x2a, 0x00 /* break */ ++}; ++ ++int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs) ++{ ++ sigcontext_t *sc; ++ fpu_context_t *fpu; ++ ++ sc = RT_SIGFRAME_SIGCTX(sigframe); ++ memcpy(sc->regs, regs->regs, sizeof(regs->regs)); ++ sc->pc = regs->pc; ++ ++ fpu = RT_SIGFRAME_FPU(sigframe); ++ memcpy(fpu->regs, fpregs->regs, sizeof(fpregs->regs)); ++ fpu->fcc = fpregs->fcc; ++ fpu->fcsr = fpregs->fcsr; ++ return 0; ++} ++ ++int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe) ++{ ++ return 0; ++} ++ ++int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save, ++ void *arg, __maybe_unused unsigned long flags) ++{ ++ user_fpregs_struct_t tmp, *fpregs = ext_regs ? ext_regs : &tmp; ++ struct iovec iov; ++ int ret; ++ ++ pr_info("Dumping GP/FPU registers for %d\n", pid); ++ ++ iov.iov_base = regs; ++ iov.iov_len = sizeof(user_regs_struct_t); ++ if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov))) { ++ pr_perror("Failed to obtain CPU registers for %d", pid); ++ goto err; ++ } ++ ++ /* ++ * Refer to Linux kernel arch/loongarch/kernel/signal.c ++ */ ++ if (regs->regs[0]) { ++ switch (regs->regs[4]) { ++ case -ERESTARTNOHAND: ++ case -ERESTARTSYS: ++ case -ERESTARTNOINTR: ++ regs->regs[4] = regs->orig_a0; ++ regs->pc -= 4; ++ break; ++ case -ERESTART_RESTARTBLOCK: ++ regs->regs[4] = regs->orig_a0; ++ regs->regs[11] = __NR_restart_syscall; ++ regs->pc -= 4; ++ break; ++ } ++ regs->regs[0] = 0; /* Don't deal with this again. */ ++ } ++ ++ iov.iov_base = fpregs; ++ iov.iov_len = sizeof(user_fpregs_struct_t); ++ if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov))) { ++ pr_perror("Failed to obtain FPU registers for %d", pid); ++ goto err; ++ } ++ ++ ret = save(arg, regs, fpregs); ++err: ++ return 0; ++} ++ ++int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs) ++{ ++ struct iovec iov; ++ ++ pr_info("Restoring GP/FPU registers for %d\n", pid); ++ ++ iov.iov_base = ext_regs; ++ iov.iov_len = sizeof(*ext_regs); ++ if (ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov)) { ++ pr_perror("Failed to set FPU registers for %d", pid); ++ return -1; ++ } ++ return 0; ++} ++ ++/* ++ * Registers $4 ~ $11 represents arguments a0 ~ a7, especially a7 is ++ * used as syscall number. ++ */ ++int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret, unsigned long arg1, unsigned long arg2, ++ unsigned long arg3, unsigned long arg4, unsigned long arg5, unsigned long arg6) ++{ ++ int err; ++ user_regs_struct_t regs = ctl->orig.regs; ++ ++ regs.regs[11] = (unsigned long)nr; ++ regs.regs[4] = arg1; ++ regs.regs[5] = arg2; ++ regs.regs[6] = arg3; ++ regs.regs[7] = arg4; ++ regs.regs[8] = arg5; ++ regs.regs[9] = arg6; ++ err = compel_execute_syscall(ctl, ®s, code_syscall); ++ ++ *ret = regs.regs[4]; ++ ++ return err; ++} ++ ++void *remote_mmap(struct parasite_ctl *ctl, void *addr, size_t length, int prot, int flags, int fd, off_t offset) ++{ ++ long map; ++ int err; ++ ++ err = compel_syscall(ctl, __NR_mmap, &map, (unsigned long)addr, length, prot, flags, fd, offset >> PAGE_SHIFT); ++ ++ if (err < 0 || IS_ERR_VALUE(map)) { ++ pr_err("remote mmap() failed: %s\n", strerror(-map)); ++ return NULL; ++ } ++ ++ return (void *)map; ++} ++ ++/* ++ * regs must be inited when calling this function from original context ++ */ ++void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs) ++{ ++ regs->pc = new_ip; ++ if (stack) ++ regs->regs[4] = (unsigned long)stack; ++} ++ ++bool arch_can_dump_task(struct parasite_ctl *ctl) ++{ ++ return true; ++} ++ ++int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s) ++{ ++ long ret; ++ int err; ++ ++ err = compel_syscall(ctl, __NR_sigaltstack, &ret, 0, (unsigned long)&s->rs_uc.uc_stack, 0, 0, 0, 0); ++ return err ? err : ret; ++} ++ ++/* ++ * TODO: add feature ++ */ ++int ptrace_set_breakpoint(pid_t pid, void *addr) ++{ ++ return 0; ++} ++ ++int ptrace_flush_breakpoints(pid_t pid) ++{ ++ return 0; ++} ++ ++/* ++ * Refer to Linux kernel arch/loongarch/include/asm/processor.h ++ */ ++#define TASK_SIZE32 (1UL) << 31 ++#define TASK_SIZE64_MIN (1UL) << 40 ++#define TASK_SIZE64_MAX (1UL) << 48 ++ ++unsigned long compel_task_size(void) ++{ ++ unsigned long task_size; ++ for (task_size = TASK_SIZE64_MIN; task_size < TASK_SIZE64_MAX; task_size <<= 1) ++ if (munmap((void *)task_size, page_size())) ++ break; ++ return task_size; ++} +diff --git a/compel/src/main.c b/compel/src/main.c +index ef05a46d01..bc16c0ab41 100644 +--- a/compel/src/main.c ++++ b/compel/src/main.c +@@ -57,6 +57,9 @@ static const flags_t flags = { + #elif defined CONFIG_MIPS + .arch = "mips", + .cflags = COMPEL_CFLAGS_PIE, ++#elif defined CONFIG_LOONGARCH64 ++ .arch = "loongarch64", ++ .cflags = COMPEL_CFLAGS_PIE, + #else + #error "CONFIG_ not defined, or unsupported ARCH" + #endif +diff --git a/scripts/nmk/scripts/include.mk b/scripts/nmk/scripts/include.mk +index c1c1e94af4..55c5be307f 100644 +--- a/scripts/nmk/scripts/include.mk ++++ b/scripts/nmk/scripts/include.mk +@@ -20,7 +20,8 @@ ARCH ?= $(shell echo $(SUBARCH) | sed \ + -e s/ppc64.*/ppc64/ \ + -e s/mips.*/mips/ \ + -e s/sh[234].*/sh/ \ +- -e s/aarch64.*/aarch64/) ++ -e s/aarch64.*/aarch64/ \ ++ -e s/loongarch64.*/loongarch64/) + + export SUBARCH ARCH + + +From 91c0f7a6d8bef0d8d5836d86430864b2036e140f Mon Sep 17 00:00:00 2001 +From: znley +Date: Mon, 12 Jun 2023 15:09:22 +0800 +Subject: [PATCH 3/6] images: add loongarch64 core image + +Signed-off-by: znley +--- + images/Makefile | 1 + + images/core-loongarch64.proto | 23 +++++++++++++++++++++++ + images/core.proto | 3 +++ + 3 files changed, 27 insertions(+) + create mode 100755 images/core-loongarch64.proto + +diff --git a/images/Makefile b/images/Makefile +index 004e22ec3f..ca85b1a213 100644 +--- a/images/Makefile ++++ b/images/Makefile +@@ -2,6 +2,7 @@ proto-obj-y += stats.o + proto-obj-y += core.o + proto-obj-y += core-x86.o + proto-obj-y += core-mips.o ++proto-obj-y += core-loongarch64.o + proto-obj-y += core-arm.o + proto-obj-y += core-aarch64.o + proto-obj-y += core-ppc64.o +diff --git a/images/core-loongarch64.proto b/images/core-loongarch64.proto +new file mode 100755 +index 0000000000..8258f006ea +--- /dev/null ++++ b/images/core-loongarch64.proto +@@ -0,0 +1,23 @@ ++// SPDX-License-Identifier: MIT ++ ++syntax = "proto2"; ++ ++import "opts.proto"; ++ ++message user_loongarch64_gpregs_entry { ++ repeated uint64 regs = 1; ++ required uint64 pc = 2; ++} ++ ++message user_loongarch64_fpregs_entry { ++ repeated uint64 regs = 1; ++ required uint64 fcc = 2; ++ required uint32 fcsr = 3; ++} ++ ++message thread_info_loongarch64 { ++ required uint64 clear_tid_addr = 1[(criu).hex = true]; ++ required uint64 tls = 2; ++ required user_loongarch64_gpregs_entry gpregs = 3[(criu).hex = true]; ++ required user_loongarch64_fpregs_entry fpregs = 4[(criu).hex = true]; ++} +diff --git a/images/core.proto b/images/core.proto +index eddd1dc555..1882fe8e42 100644 +--- a/images/core.proto ++++ b/images/core.proto +@@ -8,6 +8,7 @@ import "core-aarch64.proto"; + import "core-ppc64.proto"; + import "core-s390.proto"; + import "core-mips.proto"; ++import "core-loongarch64.proto"; + + import "rlimit.proto"; + import "timer.proto"; +@@ -122,6 +123,7 @@ message core_entry { + PPC64 = 4; + S390 = 5; + MIPS = 6; ++ LOONGARCH64 = 7; + } + + required march mtype = 1; +@@ -131,6 +133,7 @@ message core_entry { + optional thread_info_ppc64 ti_ppc64 = 9; + optional thread_info_s390 ti_s390 = 10; + optional thread_info_mips ti_mips = 11; ++ optional thread_info_loongarch64 ti_loongarch64 = 12; + + optional task_core_entry tc = 3; + optional task_kobj_ids_entry ids = 4; + +From c3de76052f24ad5aa88d0093d1c033735f57f163 Mon Sep 17 00:00:00 2001 +From: znley +Date: Mon, 12 Jun 2023 15:15:30 +0800 +Subject: [PATCH 4/6] criu: add loongarch64 support to parasite and restorer + +Signed-off-by: znley +--- + criu/arch/loongarch64/Makefile | 14 +++ + criu/arch/loongarch64/cpu.c | 31 +++++ + criu/arch/loongarch64/crtools.c | 115 ++++++++++++++++++ + criu/arch/loongarch64/include/asm/dump.h | 15 +++ + criu/arch/loongarch64/include/asm/int.h | 6 + + criu/arch/loongarch64/include/asm/kerndat.h | 7 ++ + .../include/asm/parasite-syscall.h | 6 + + criu/arch/loongarch64/include/asm/parasite.h | 11 ++ + criu/arch/loongarch64/include/asm/restore.h | 33 +++++ + criu/arch/loongarch64/include/asm/restorer.h | 97 +++++++++++++++ + .../loongarch64/include/asm/thread_pointer.h | 27 ++++ + criu/arch/loongarch64/include/asm/types.h | 39 ++++++ + criu/arch/loongarch64/include/asm/vdso.h | 27 ++++ + criu/arch/loongarch64/restorer.c | 14 +++ + criu/arch/loongarch64/sigframe.c | 12 ++ + criu/arch/loongarch64/vdso-pie.c | 48 ++++++++ + 16 files changed, 502 insertions(+) + create mode 100644 criu/arch/loongarch64/Makefile + create mode 100644 criu/arch/loongarch64/cpu.c + create mode 100644 criu/arch/loongarch64/crtools.c + create mode 100644 criu/arch/loongarch64/include/asm/dump.h + create mode 100644 criu/arch/loongarch64/include/asm/int.h + create mode 100644 criu/arch/loongarch64/include/asm/kerndat.h + create mode 100644 criu/arch/loongarch64/include/asm/parasite-syscall.h + create mode 100644 criu/arch/loongarch64/include/asm/parasite.h + create mode 100644 criu/arch/loongarch64/include/asm/restore.h + create mode 100644 criu/arch/loongarch64/include/asm/restorer.h + create mode 100644 criu/arch/loongarch64/include/asm/thread_pointer.h + create mode 100644 criu/arch/loongarch64/include/asm/types.h + create mode 100644 criu/arch/loongarch64/include/asm/vdso.h + create mode 100644 criu/arch/loongarch64/restorer.c + create mode 100644 criu/arch/loongarch64/sigframe.c + create mode 100644 criu/arch/loongarch64/vdso-pie.c + +diff --git a/criu/arch/loongarch64/Makefile b/criu/arch/loongarch64/Makefile +new file mode 100644 +index 0000000000..4bd99eb7eb +--- /dev/null ++++ b/criu/arch/loongarch64/Makefile +@@ -0,0 +1,14 @@ ++builtin-name := crtools.built-in.o ++ ++ccflags-y += -iquote $(obj)/include ++ccflags-y += -iquote criu/include -iquote include ++ccflags-y += $(COMPEL_UAPI_INCLUDES) ++ ++asflags-y += -Wstrict-prototypes ++asflags-y += -D__ASSEMBLY__ -nostdlib -fomit-frame-pointer ++asflags-y += -iquote $(obj)/include ++ldflags-y += -r -z noexecstack ++ ++obj-y += cpu.o ++obj-y += crtools.o ++obj-y += sigframe.o +diff --git a/criu/arch/loongarch64/cpu.c b/criu/arch/loongarch64/cpu.c +new file mode 100644 +index 0000000000..5559c4288f +--- /dev/null ++++ b/criu/arch/loongarch64/cpu.c +@@ -0,0 +1,31 @@ ++#undef LOG_PREFIX ++#define LOG_PREFIX "cpu: " ++ ++int cpu_init(void) ++{ ++ return 0; ++} ++ ++int cpu_dump_cpuinfo(void) ++{ ++ return 0; ++} ++ ++int cpu_validate_cpuinfo(void) ++{ ++ return 0; ++} ++ ++int cpuinfo_dump(void) ++{ ++ if (cpu_init()) ++ return -1; ++ if (cpu_dump_cpuinfo()) ++ return -1; ++ return 0; ++} ++ ++int cpuinfo_check(void) ++{ ++ return 0; ++} +diff --git a/criu/arch/loongarch64/crtools.c b/criu/arch/loongarch64/crtools.c +new file mode 100644 +index 0000000000..eeb0731ca6 +--- /dev/null ++++ b/criu/arch/loongarch64/crtools.c +@@ -0,0 +1,115 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "types.h" ++#include "log.h" ++#include "asm/restorer.h" ++#include "asm/parasite-syscall.h" ++#include ++#include "asm/dump.h" ++#include "cr_options.h" ++#include "common/compiler.h" ++#include "restorer.h" ++#include "parasite-syscall.h" ++#include "util.h" ++#include "cpu.h" ++#include ++#include "kerndat.h" ++ ++#include "protobuf.h" ++#include "images/core.pb-c.h" ++#include "images/creds.pb-c.h" ++ ++#define assign_reg(dst, src, e) (dst)->e = (__typeof__(dst->e))(src)->e ++ ++int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs) ++{ ++ int i; ++ CoreEntry *core = x; ++ UserLoongarch64GpregsEntry *gprs = core->ti_loongarch64->gpregs; ++ UserLoongarch64FpregsEntry *fprs = core->ti_loongarch64->fpregs; ++ for (i = 0; i < GPR_NUM; i++) ++ assign_reg(gprs, regs, regs[i]); ++ assign_reg(gprs, regs, pc); ++ ++ for (i = 0; i < FPR_NUM; i++) ++ assign_reg(fpregs, fpregs, regs[i]); ++ assign_reg(fprs, fpregs, fcc); ++ assign_reg(fprs, fpregs, fcsr); ++ return 0; ++} ++ ++int arch_alloc_thread_info(CoreEntry *core) ++{ ++ ThreadInfoLoongarch64 *ti_loongarch64; ++ UserLoongarch64GpregsEntry *gpregs; ++ UserLoongarch64FpregsEntry *fpregs; ++ ++ ti_loongarch64 = xmalloc(sizeof(*ti_loongarch64)); ++ thread_info_loongarch64__init(ti_loongarch64); ++ core->ti_loongarch64 = ti_loongarch64; ++ ++ gpregs = xmalloc(sizeof(*gpregs)); ++ if (!gpregs) ++ goto err; ++ user_loongarch64_gpregs_entry__init(gpregs); ++ gpregs->n_regs = GPR_NUM; ++ gpregs->regs = xmalloc(GPR_NUM * sizeof(uint64_t)); ++ if (!gpregs->regs) ++ goto err; ++ ti_loongarch64->gpregs = gpregs; ++ ++ fpregs = xmalloc(sizeof(*fpregs)); ++ if (!fpregs) ++ goto err; ++ user_loongarch64_fpregs_entry__init(fpregs); ++ fpregs->n_regs = FPR_NUM; ++ fpregs->regs = xmalloc(FPR_NUM * sizeof(uint64_t)); ++ if (!fpregs->regs) ++ goto err; ++ ti_loongarch64->fpregs = fpregs; ++ ++ return 0; ++err: ++ return -1; ++} ++ ++void arch_free_thread_info(CoreEntry *core) ++{ ++ if (CORE_THREAD_ARCH_INFO(core)) { ++ if (CORE_THREAD_ARCH_INFO(core)->fpregs) { ++ xfree(CORE_THREAD_ARCH_INFO(core)->fpregs->regs); ++ xfree(CORE_THREAD_ARCH_INFO(core)->fpregs); ++ } ++ xfree(CORE_THREAD_ARCH_INFO(core)->gpregs->regs); ++ xfree(CORE_THREAD_ARCH_INFO(core)->gpregs); ++ xfree(CORE_THREAD_ARCH_INFO(core)); ++ CORE_THREAD_ARCH_INFO(core) = NULL; ++ } ++} ++ ++int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core) ++{ ++ fpu_context_t *fpu = RT_SIGFRAME_FPU(sigframe); ++ UserLoongarch64FpregsEntry *fpregs = core->ti_loongarch64->fpregs; ++ ++ memcpy(fpu->regs, fpregs->regs, sizeof(fpu->regs)); ++ fpu->fcc = fpregs->fcc; ++ fpu->fcsr = fpregs->fcsr; ++ return 0; ++} ++ ++int restore_gpregs(struct rt_sigframe *sigframe, UserRegsEntry *r) ++{ ++ sigcontext_t *sc = RT_SIGFRAME_SIGCTX(sigframe); ++ memcpy(sc->regs, r->regs, sizeof(sc->regs)); ++ sc->pc = r->pc; ++ return 0; ++} +diff --git a/criu/arch/loongarch64/include/asm/dump.h b/criu/arch/loongarch64/include/asm/dump.h +new file mode 100644 +index 0000000000..04347155c3 +--- /dev/null ++++ b/criu/arch/loongarch64/include/asm/dump.h +@@ -0,0 +1,15 @@ ++#ifndef __CR_ASM_DUMP_H__ ++#define __CR_ASM_DUMP_H__ ++ ++extern int save_task_regs(void *, user_regs_struct_t *, user_fpregs_struct_t *); ++extern int arch_alloc_thread_info(CoreEntry *core); ++extern void arch_free_thread_info(CoreEntry *core); ++ ++static inline void core_put_tls(CoreEntry *core, tls_t tls) ++{ ++ core->ti_loongarch64->tls = tls; ++} ++ ++#define get_task_futex_robust_list_compat(pid, info) -1 ++ ++#endif +diff --git a/criu/arch/loongarch64/include/asm/int.h b/criu/arch/loongarch64/include/asm/int.h +new file mode 100644 +index 0000000000..642804e9b4 +--- /dev/null ++++ b/criu/arch/loongarch64/include/asm/int.h +@@ -0,0 +1,6 @@ ++#ifndef __CR_ASM_INT_H__ ++#define __CR_ASM_INT_H__ ++ ++#include "asm-generic/int.h" ++ ++#endif /* __CR_ASM_INT_H__ */ +diff --git a/criu/arch/loongarch64/include/asm/kerndat.h b/criu/arch/loongarch64/include/asm/kerndat.h +new file mode 100644 +index 0000000000..bb70cf6cf5 +--- /dev/null ++++ b/criu/arch/loongarch64/include/asm/kerndat.h +@@ -0,0 +1,7 @@ ++#ifndef __CR_ASM_KERNDAT_H__ ++#define __CR_ASM_KERNDAT_H__ ++ ++#define kdat_compatible_cr() 0 ++#define kdat_can_map_vdso() 0 ++ ++#endif /* __CR_ASM_KERNDAT_H__ */ +diff --git a/criu/arch/loongarch64/include/asm/parasite-syscall.h b/criu/arch/loongarch64/include/asm/parasite-syscall.h +new file mode 100644 +index 0000000000..6008c37923 +--- /dev/null ++++ b/criu/arch/loongarch64/include/asm/parasite-syscall.h +@@ -0,0 +1,6 @@ ++#ifndef __CR_ASM_PARASITE_SYSCALL_H__ ++#define __CR_ASM_PARASITE_SYSCALL_H__ ++ ++struct parasite_ctl; ++ ++#endif +diff --git a/criu/arch/loongarch64/include/asm/parasite.h b/criu/arch/loongarch64/include/asm/parasite.h +new file mode 100644 +index 0000000000..b64cb3185c +--- /dev/null ++++ b/criu/arch/loongarch64/include/asm/parasite.h +@@ -0,0 +1,11 @@ ++#ifndef __ASM_PARASITE_H__ ++#define __ASM_PARASITE_H__ ++ ++static inline void arch_get_tls(tls_t *ptls) ++{ ++ tls_t tls; ++ asm volatile("or %0, $zero, $tp" : "=r"(tls)); ++ *ptls = tls; ++} ++ ++#endif +diff --git a/criu/arch/loongarch64/include/asm/restore.h b/criu/arch/loongarch64/include/asm/restore.h +new file mode 100644 +index 0000000000..d956231c81 +--- /dev/null ++++ b/criu/arch/loongarch64/include/asm/restore.h +@@ -0,0 +1,33 @@ ++#ifndef __CR_ASM_RESTORE_H__ ++#define __CR_ASM_RESTORE_H__ ++ ++#include "asm/restorer.h" ++#include "images/core.pb-c.h" ++ ++/* clang-format off */ ++#define JUMP_TO_RESTORER_BLOB(new_sp, restore_task_exec_start, task_args) \ ++({ \ ++ uint64_t save_sp; \ ++ asm volatile("or %0, $zero, $sp" : "=r"(save_sp) : :"memory"); \ ++ asm volatile( \ ++ "or $a0, $zero, %2 \n" \ ++ "or $sp, $zero, %0 \n" \ ++ "jirl $ra, %1, 0 \n" \ ++ : \ ++ : "r"(new_sp & ~15), \ ++ "r"(restore_task_exec_start), \ ++ "r"(task_args) \ ++ : "$a0", "memory"); \ ++ asm volatile("or $sp, $zero, %0" : : "r"(save_sp) : "memory"); \ ++}) ++ ++/* clang-format on */ ++ ++static inline void core_get_tls(CoreEntry *pcore, tls_t *ptls) ++{ ++ *ptls = pcore->ti_loongarch64->tls; ++} ++ ++int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core); ++ ++#endif +diff --git a/criu/arch/loongarch64/include/asm/restorer.h b/criu/arch/loongarch64/include/asm/restorer.h +new file mode 100644 +index 0000000000..7a0d35c5b5 +--- /dev/null ++++ b/criu/arch/loongarch64/include/asm/restorer.h +@@ -0,0 +1,97 @@ ++#ifndef __CR_ASM_RESTORER_H__ ++#define __CR_ASM_RESTORER_H__ ++ ++#include "asm/types.h" ++#include ++#include "images/core.pb-c.h" ++#include ++#include ++ ++/* clang-format off */ ++#define RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, \ ++ thread_args, clone_restore_fn) \ ++ asm volatile( \ ++ "clone_emul: \n" \ ++ "ld.d $a1, %2 \n" \ ++ "addi.d $a1, $a1, -16 \n" \ ++ "st.d %5, $a1, 0 \n" \ ++ "st.d %6, $a1, 8 \n" \ ++ "or $a0, $zero, %1 \n" \ ++ "or $a2, $zero, %3 \n" \ ++ "or $a3, $zero, %4 \n" \ ++ "ori $a7, $zero, "__stringify(__NR_clone)" \n" \ ++ "syscall 0 \n" \ ++ \ ++ "beqz $a0, thread_run \n" \ ++ \ ++ "or %0, $zero, $a0 \n" \ ++ "b clone_end \n" \ ++ \ ++ "thread_run: \n" \ ++ "ld.d $a1, $sp, 0 \n" \ ++ "ld.d $a0, $sp, 8 \n" \ ++ "jirl $ra, $a1, 0 \n" \ ++ \ ++ "clone_end: \n" \ ++ : "=r"(ret) \ ++ : "r"(clone_flags), \ ++ "ZB"(new_sp), \ ++ "r"(&parent_tid), \ ++ "r"(&thread_args[i].pid), \ ++ "r"(&clone_restore_fn), \ ++ "r"(&thread_args[i]) \ ++ : "$a0", "$a1", "$a2", "$a3", "$a7", "memory") ++ ++#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ ++ clone_restore_fn) \ ++ asm volatile( \ ++ "clone3_emul: \n" \ ++ "or $a0, $zero, %1 \n" \ ++ "or $a1, $zero, %2 \n" \ ++ "or $a2, $zero, %3 \n" \ ++ "or $a3, $zero, %4 \n" \ ++ "ori $a7, $zero, "__stringify(__NR_clone3)" \n" \ ++ "syscall 0 \n" \ ++ \ ++ "beqz $a0, clone3_thread_run \n" \ ++ \ ++ "or %0, $zero, $a0 \n" \ ++ "b clone3_end \n" \ ++ \ ++ "clone3_thread_run: \n" \ ++ "or $a0, $zero, $a3 \n" \ ++ "jirl $ra, $a2, 0 \n" \ ++ "clone3_end: \n" \ ++ : "=r"(ret) \ ++ : "r"(&clone_args), \ ++ "r"(size), \ ++ "r"(clone_restore_fn), \ ++ "r"(args) \ ++ : "$a0", "$a1", "$a2", "$a3", "$a7", "memory") ++/* clang-format on */ ++ ++static inline void restore_tls(tls_t *ptls) ++{ ++ asm volatile("or $tp, $zero, %0" : : "r"(*ptls)); ++} ++static inline int arch_compat_rt_sigaction(void *stack, int sig, void *act) ++{ ++ return -1; ++} ++static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len) ++{ ++ return -1; ++} ++static inline void *alloc_compat_syscall_stack(void) ++{ ++ return NULL; ++} ++static inline void free_compat_syscall_stack(void *stack32) ++{ ++} ++int restore_gpregs(struct rt_sigframe *f, UserLoongarch64GpregsEntry *r); ++int restore_nonsigframe_gpregs(UserLoongarch64GpregsEntry *r); ++ ++#define arch_map_vdso(map, compat) -1 ++ ++#endif +diff --git a/criu/arch/loongarch64/include/asm/thread_pointer.h b/criu/arch/loongarch64/include/asm/thread_pointer.h +new file mode 100644 +index 0000000000..f7e07066a5 +--- /dev/null ++++ b/criu/arch/loongarch64/include/asm/thread_pointer.h +@@ -0,0 +1,27 @@ ++/* __thread_pointer definition. Generic version. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#ifndef _SYS_THREAD_POINTER_H ++#define _SYS_THREAD_POINTER_H ++ ++static inline void *__criu_thread_pointer(void) ++{ ++ return __builtin_thread_pointer(); ++} ++ ++#endif /* _SYS_THREAD_POINTER_H */ +diff --git a/criu/arch/loongarch64/include/asm/types.h b/criu/arch/loongarch64/include/asm/types.h +new file mode 100644 +index 0000000000..72bca2022b +--- /dev/null ++++ b/criu/arch/loongarch64/include/asm/types.h +@@ -0,0 +1,39 @@ ++#ifndef __CR_ASM_TYPES_H__ ++#define __CR_ASM_TYPES_H__ ++ ++#include ++#include ++ ++#include "page.h" ++#include "bitops.h" ++#include "asm/int.h" ++#include "images/core.pb-c.h" ++ ++#include ++ ++#define core_is_compat(core) false ++ ++#define CORE_ENTRY__MARCH CORE_ENTRY__MARCH__LOONGARCH64 ++ ++#define CORE_THREAD_ARCH_INFO(core) core->ti_loongarch64 ++ ++#define TI_SP(core) ((core)->ti_loongarch64->gpregs->regs[4]) ++ ++#define TI_IP(core) ((core)->ti_loongarch64->gpregs->pc) ++ ++typedef UserLoongarch64GpregsEntry UserRegsEntry; ++ ++static inline uint64_t encode_pointer(void *p) ++{ ++ return (uint64_t)p; ++} ++static inline void *decode_pointer(uint64_t v) ++{ ++ return (void *)v; ++} ++ ++#define AT_VECTOR_SIZE 44 ++typedef uint64_t auxv_t; ++typedef uint64_t tls_t; ++ ++#endif /* __CR_ASM_TYPES_H__ */ +diff --git a/criu/arch/loongarch64/include/asm/vdso.h b/criu/arch/loongarch64/include/asm/vdso.h +new file mode 100644 +index 0000000000..64631dee09 +--- /dev/null ++++ b/criu/arch/loongarch64/include/asm/vdso.h +@@ -0,0 +1,27 @@ ++#ifndef __CR_ASM_VDSO_H__ ++#define __CR_ASM_VDSO_H__ ++ ++#include "asm/int.h" ++#include "asm-generic/vdso.h" ++ ++/* This definition is used in pie/util-vdso.c to initialize the vdso symbol ++ * name string table 'vdso_symbols' ++ */ ++ ++/* ++ * This is a minimal amount of symbols ++ * we should support at the moment. ++ */ ++#define VDSO_SYMBOL_MAX 5 ++#define VDSO_SYMBOL_GTOD 3 ++ ++#define ARCH_VDSO_SYMBOLS_LIST \ ++ const char *aarch_vdso_symbol1 = "__vdso_getcpu"; \ ++ const char *aarch_vdso_symbol2 = "__vdso_clock_getres"; \ ++ const char *aarch_vdso_symbol3 = "__vdso_clock_gettime"; \ ++ const char *aarch_vdso_symbol4 = "__vdso_gettimeofday"; \ ++ const char *aarch_vdso_symbol5 = "__vdso_rt_sigreturn"; ++ ++#define ARCH_VDSO_SYMBOLS \ ++ aarch_vdso_symbol1, aarch_vdso_symbol2, aarch_vdso_symbol3, aarch_vdso_symbol4, aarch_vdso_symbol5 ++#endif +diff --git a/criu/arch/loongarch64/restorer.c b/criu/arch/loongarch64/restorer.c +new file mode 100644 +index 0000000000..730318ac14 +--- /dev/null ++++ b/criu/arch/loongarch64/restorer.c +@@ -0,0 +1,14 @@ ++#include ++ ++#include "restorer.h" ++#include "asm/restorer.h" ++#include ++ ++#include ++#include "log.h" ++#include "cpu.h" ++ ++int restore_nonsigframe_gpregs(UserLoongarch64GpregsEntry *r) ++{ ++ return 0; ++} +diff --git a/criu/arch/loongarch64/sigframe.c b/criu/arch/loongarch64/sigframe.c +new file mode 100644 +index 0000000000..18983ff138 +--- /dev/null ++++ b/criu/arch/loongarch64/sigframe.c +@@ -0,0 +1,12 @@ ++#include ++#include ++ ++#include "asm/sigframe.h" ++#include "asm/types.h" ++ ++#include "log.h" ++#include ++int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe) ++{ ++ return 0; ++} +diff --git a/criu/arch/loongarch64/vdso-pie.c b/criu/arch/loongarch64/vdso-pie.c +new file mode 100644 +index 0000000000..7a75d2741d +--- /dev/null ++++ b/criu/arch/loongarch64/vdso-pie.c +@@ -0,0 +1,48 @@ ++#include ++#include "asm/types.h" ++ ++#include ++#include ++#include "parasite-vdso.h" ++#include "log.h" ++#include "common/bug.h" ++ ++#ifdef LOG_PREFIX ++#undef LOG_PREFIX ++#endif ++#define LOG_PREFIX "vdso: " ++static void insert_trampoline(uintptr_t from, uintptr_t to) ++{ ++ struct { ++ uint32_t pcaddi; ++ uint32_t ldptr; ++ uint32_t jirl; ++ uint32_t guards; ++ uint64_t imm64; ++ } __packed jmp = { ++ .pcaddi = 0x18000095, /* pcaddi $x, 4 */ ++ .ldptr = 0x260002b5, /* ldptr.d $x, $x, 0 */ ++ .jirl = 0x4c0002a0, /* jirl $zero, $x, 0 */ ++ .guards = 0x002a0000, /* break 0 */ ++ .imm64 = to, ++ }; ++ memcpy((void *)from, &jmp, sizeof(jmp)); ++} ++ ++int vdso_redirect_calls(unsigned long base_to, unsigned long base_from, struct vdso_symtable *sto, ++ struct vdso_symtable *sfrom, bool compat_vdso) ++{ ++ unsigned int i; ++ unsigned long from, to; ++ for (i = 0; i < ARRAY_SIZE(sto->symbols); i++) { ++ if (vdso_symbol_empty(&sfrom->symbols[i])) ++ continue; ++ pr_debug("br: %lx/%lx -> %lx/%lx (index %d)\n", base_from, sfrom->symbols[i].offset, base_to, ++ sto->symbols[i].offset, i); ++ ++ from = base_from + sfrom->symbols[i].offset; ++ to = base_to + sto->symbols[i].offset; ++ insert_trampoline(from, to); ++ } ++ return 0; ++} + +From 7a4a4fbb9a0055112ce7ebc005ef56a317d0b64b Mon Sep 17 00:00:00 2001 +From: znley +Date: Mon, 12 Jun 2023 15:26:35 +0800 +Subject: [PATCH 5/6] zdtm: add loongarch64 support + +Signed-off-by: znley +--- + .../lib/arch/loongarch64/include/asm/atomic.h | 49 +++++++++++++++++++ + test/zdtm/lib/test.c | 2 +- + 2 files changed, 50 insertions(+), 1 deletion(-) + create mode 100644 test/zdtm/lib/arch/loongarch64/include/asm/atomic.h + +diff --git a/test/zdtm/lib/arch/loongarch64/include/asm/atomic.h b/test/zdtm/lib/arch/loongarch64/include/asm/atomic.h +new file mode 100644 +index 0000000000..1803aaeb44 +--- /dev/null ++++ b/test/zdtm/lib/arch/loongarch64/include/asm/atomic.h +@@ -0,0 +1,49 @@ ++#ifndef __CR_ATOMIC_H__ ++#define __CR_ATOMIC_H__ ++ ++typedef uint32_t atomic_t; ++ ++#define atomic_get(v) (*(volatile int *)v) ++#define atomic_set(v, i) (*(v) = (i)) ++ ++static inline int __atomic_add(int i, atomic_t *v) ++{ ++ int result; ++ asm volatile("amadd_db.w %1, %2, %0" : "+ZB"(*v), "=&r"(result) : "r"(i) : "memory"); ++ return result + i; ++} ++ ++static inline void atomic_add(int i, atomic_t *v) ++{ ++ __atomic_add(i, v); ++} ++ ++static inline int atomic_add_return(int i, atomic_t *v) ++{ ++ return __atomic_add(i, v); ++} ++ ++#define atomic_sub(i, v) atomic_add(-(int)i, v) ++#define atomic_sub_return(i, v) atomic_add_return(-(int)i, v) ++#define atomic_inc(v) atomic_add_return(1, v) ++#define atomic_dec(v) atomic_sub_return(1, v) ++#define atomic_dec_return(v) atomic_sub_return(1, v) ++ ++static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) ++{ ++ int ret; ++ asm volatile("1: \n" ++ " ll.w %0, %1 \n" ++ " bne %0, %2, 2f \n" ++ " or $t0, %3, $zero \n" ++ " sc.w $t0, %1 \n" ++ " beqz $t0, 1b \n" ++ "2: \n" ++ " dbar 0 \n" ++ : "=&r"(ret), "+ZB"(*ptr) ++ : "r"(old), "r"(new) ++ : "t0", "memory"); ++ return ret; ++} ++ ++#endif /* __CR_ATOMIC_H__ */ +diff --git a/test/zdtm/lib/test.c b/test/zdtm/lib/test.c +index 6291ea4a7b..a5ba38b2dd 100644 +--- a/test/zdtm/lib/test.c ++++ b/test/zdtm/lib/test.c +@@ -406,7 +406,7 @@ pid_t sys_clone_unified(unsigned long flags, void *child_stack, void *parent_tid + { + #ifdef __x86_64__ + return (pid_t)syscall(__NR_clone, flags, child_stack, parent_tid, child_tid, newtls); +-#elif (__i386__ || __arm__ || __aarch64__ || __powerpc64__ || __mips__) ++#elif (__i386__ || __arm__ || __aarch64__ || __powerpc64__ || __mips__ || __loongarch64) + return (pid_t)syscall(__NR_clone, flags, child_stack, parent_tid, newtls, child_tid); + #elif __s390x__ + return (pid_t)syscall(__NR_clone, child_stack, flags, parent_tid, child_tid, newtls); + +From 53f1b58307ef74c26cfc8cb2d2f69a1ab40cbc3b Mon Sep 17 00:00:00 2001 +From: znley +Date: Tue, 11 Jul 2023 15:20:00 +0800 +Subject: [PATCH 6/6] ci: add workflow for loongarch64 + +Signed-off-by: znley +--- + .github/workflows/loongarch64-qemu-test.yml | 15 +++++ + scripts/ci/Makefile | 5 ++ + scripts/ci/loongarch64-qemu-test.sh | 69 +++++++++++++++++++++ + 3 files changed, 89 insertions(+) + create mode 100644 .github/workflows/loongarch64-qemu-test.yml + create mode 100755 scripts/ci/loongarch64-qemu-test.sh + +diff --git a/.github/workflows/loongarch64-qemu-test.yml b/.github/workflows/loongarch64-qemu-test.yml +new file mode 100644 +index 0000000000..ba22fa25ff +--- /dev/null ++++ b/.github/workflows/loongarch64-qemu-test.yml +@@ -0,0 +1,15 @@ ++name: LoongArch64 Qemu Test ++ ++on: [push, pull_request] ++ ++# Cancel any preceding run on the pull request. ++concurrency: ++ group: loongarch64-qemu-test-${{ github.event.pull_request.number || github.ref }} ++ cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }} ++ ++jobs: ++ build: ++ runs-on: ubuntu-22.04 ++ steps: ++ - uses: actions/checkout@v2 ++ - run: sudo make -C scripts/ci loongarch64-qemu-test +diff --git a/scripts/ci/Makefile b/scripts/ci/Makefile +index 5c45791034..ce844a17ce 100644 +--- a/scripts/ci/Makefile ++++ b/scripts/ci/Makefile +@@ -110,5 +110,10 @@ check-commit: + + .PHONY: check-commit + ++loongarch64-qemu-test: ++ ./loongarch64-qemu-test.sh ++ ++.PHONY: loongarch64-qemu-test ++ + %: + $(MAKE) -C ../build $@$(target-suffix) +diff --git a/scripts/ci/loongarch64-qemu-test.sh b/scripts/ci/loongarch64-qemu-test.sh +new file mode 100755 +index 0000000000..52e587619c +--- /dev/null ++++ b/scripts/ci/loongarch64-qemu-test.sh +@@ -0,0 +1,69 @@ ++#!/bin/bash ++ ++set -o nounset ++set -o errexit ++set -x ++ ++./apt-install \ ++ apt-transport-https \ ++ ca-certificates \ ++ curl \ ++ software-properties-common \ ++ sshpass \ ++ openssh-client ++ ++curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - ++ ++add-apt-repository \ ++ "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ ++ $(lsb_release -cs) \ ++ stable test" ++ ++./apt-install docker-ce ++ ++# shellcheck source=/dev/null ++. /etc/lsb-release ++ ++# docker checkpoint and restore is an experimental feature ++echo '{ "experimental": true }' > /etc/docker/daemon.json ++service docker restart ++ ++docker info ++ ++# run a loongarch64 vm ++ ++PORT='2222' ++USER='root' ++PASSWORD='loongarch64' ++NAME='vm' ++ ++docker run \ ++ -d \ ++ --net host \ ++ --name $NAME \ ++ merore/archlinux-loongarch64 ++ ++run() { ++ if [ -z "$1" ]; then ++ echo "Command cannot be empty." ++ exit 1 ++ fi ++ sshpass -p $PASSWORD ssh -o StrictHostKeyChecking=no -p $PORT $USER@127.0.0.1 "$1" ++} ++ ++# wait vm to start ++while (! run "uname -a") ++do ++ echo "Wait vm to start..." ++ sleep 1 ++done ++echo "The loongarch64 vm is started!" ++ ++# Tar criu and send to vm ++tar -cf criu.tar ../../../criu ++sshpass -p $PASSWORD scp -o StrictHostKeyChecking=no -P $PORT criu.tar $USER@127.0.0.1:/root ++ ++# build and test ++run 'cd /root; tar -xf criu.tar' ++run 'cd /root/criu; make -j4' ++run "cd /root/criu; ./test/zdtm.py run -t zdtm/static/maps02 -t zdtm/static/maps05 -t zdtm/static/maps06 -t zdtm/static/maps10 -t zdtm/static/maps_file_prot -t zdtm/static/memfd00 -t zdtm/transition/fork -t zdtm/transition/fork2 -t zdtm/transition/shmem -f h" diff --git a/criu/PKGBUILD b/criu/PKGBUILD index 06d3ec0591..86a3e31ecc 100644 --- a/criu/PKGBUILD +++ b/criu/PKGBUILD @@ -37,11 +37,13 @@ source=( 'no-python-pip.patch' 'no-recompile-on-install.patch' 'no-amdgpu-manpage.patch' + 2183.patch ) b2sums=('SKIP' 'd83da0ce0222c1aea1fc0c97bbf8a40f3cd5a6b5d55ee973b64f97bd9769df265b148e89cee8ee6564f065adc00552b511904f322555ac659b735933d42a9a64' 'e4b7c4831fa513d602c73e377847705240a6a42ee1986effd10a589784bd0ad818032ff8283c1f9fd17cb7ddf3204e4a932796a1df816afc30a0e594c92b50f6' - '9c713724e8f6b062f7a09e34555d31e5aa0315db6308b7527835484eaad8dbf5deac5c66521bf5a819462d5f38c64f6602ba421f7bbb73180a3b05189816c8f6') + '9c713724e8f6b062f7a09e34555d31e5aa0315db6308b7527835484eaad8dbf5deac5c66521bf5a819462d5f38c64f6602ba421f7bbb73180a3b05189816c8f6' + 'b0d762bb3cc93608fe32f96d3c018ada1e8708196031cb2f80ec6350af1d96d121476288083e1053805dcdeaf9fa7e636e08142a5ea83ecaa393341cffc9c664') pkgver() { cd "$pkgname" @@ -60,6 +62,7 @@ prepare() { # do not install amdgpu_plugin manpage patch -p1 -i "$srcdir/no-amdgpu-manpage.patch" + patch -p1 -i "$srcdir/2183.patch" } build() { diff --git a/cups-pdf/PKGBUILD b/cups-pdf/PKGBUILD index bb17900797..52f1ed1f19 100644 --- a/cups-pdf/PKGBUILD +++ b/cups-pdf/PKGBUILD @@ -3,7 +3,7 @@ pkgname=cups-pdf pkgver=3.0.1 -pkgrel=7 +pkgrel=8 pkgdesc="PDF printer for cups" arch=(loong64 x86_64) depends=('cups' 'ghostscript') diff --git a/cups-pk-helper/PKGBUILD b/cups-pk-helper/PKGBUILD index cec13e4588..89ead6f475 100644 --- a/cups-pk-helper/PKGBUILD +++ b/cups-pk-helper/PKGBUILD @@ -2,7 +2,7 @@ pkgname=cups-pk-helper pkgver=0.2.7 -pkgrel=1 +pkgrel=2 pkgdesc="A helper that makes system-config-printer use PolicyKit" arch=(loong64 x86_64) url="https://www.freedesktop.org/software/cups-pk-helper/releases/" diff --git a/cxxbridge/PKGBUILD b/cxxbridge/PKGBUILD index c1d223977b..dd2191dc75 100644 --- a/cxxbridge/PKGBUILD +++ b/cxxbridge/PKGBUILD @@ -19,7 +19,7 @@ b2sums=('91f01cb775413d89f5b8f4ae2adc49b91da48f47448c78787404274b7ba47e5a2891a44 prepare() { cd "cxx-${pkgver}" cp ../Cargo.lock . - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } updlockfiles() { diff --git a/cypari2/PKGBUILD b/cypari2/PKGBUILD index cd77cd2e8a..a6e0e3622f 100644 --- a/cypari2/PKGBUILD +++ b/cypari2/PKGBUILD @@ -3,7 +3,7 @@ pkgbase=cypari2 pkgname=python-cypari2 pkgver=2.1.4 -pkgrel=1 +pkgrel=2 pkgdesc='Cython bindings for PARI' arch=(loong64 x86_64) url='https://www.sagemath.org' diff --git a/daktilo/PKGBUILD b/daktilo/PKGBUILD index f0e523cffe..fcb6b2898c 100644 --- a/daktilo/PKGBUILD +++ b/daktilo/PKGBUILD @@ -14,7 +14,7 @@ sha512sums=('059318cba86996f08540167c77ac5711aa470083ab92415dab3a37b82be3d224264 prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" mkdir completions/ mkdir man/ } diff --git a/dbeaver/PKGBUILD b/dbeaver/PKGBUILD index 7046b15c84..4f3b0ef85b 100644 --- a/dbeaver/PKGBUILD +++ b/dbeaver/PKGBUILD @@ -64,7 +64,7 @@ package() { done # Move into the target directory - cd "target/products/org.jkiss.dbeaver.core.product/linux/gtk/${CARCH}" + cd "target/products/org.jkiss.dbeaver.core.product/linux/gtk/`uname -m`" # Initially install everything into /usr/lib/dbeaver install -m 755 -d "${pkgdir}/usr/lib" diff --git a/dbus-c++/PKGBUILD b/dbus-c++/PKGBUILD index 8d288ca8cb..85c2487206 100644 --- a/dbus-c++/PKGBUILD +++ b/dbus-c++/PKGBUILD @@ -63,9 +63,12 @@ build() { --disable-static --enable-glib --prefix=/usr + --enable-glib --disable-examples --disable-tests ) cd $_pkgname-$pkgver + # -lpthread needed + export LDFLAGS+=" -lpthread" ./configure "${configure_options[@]}" # prevent excessive overlinking due to libtool sed -i -e 's/ -shared / -Wl,-O1,--as-needed\0/g' libtool diff --git a/dconf-editor/dconf-editor-fix-meson.patch b/dconf-editor/dconf-editor-fix-meson.patch new file mode 100644 index 0000000000..d3320604a5 --- /dev/null +++ b/dconf-editor/dconf-editor-fix-meson.patch @@ -0,0 +1,20 @@ +diff --git a/editor/meson.build b/editor/meson.build +index 4c89ab99..0eb3bb1a 100644 +--- a/editor/meson.build ++++ b/editor/meson.build +@@ -1,7 +1,6 @@ + desktop = dconf_editor_namespace + '.desktop' + + i18n.merge_file ( +- desktop, + type: 'desktop', + input: desktop + '.in', + output: desktop, +@@ -26,7 +25,6 @@ configure_file( + appdata = dconf_editor_namespace + '.appdata.xml' + + i18n.merge_file( +- appdata, + input: appdata + '.in', + output: appdata, + po_dir: po_dir, diff --git a/deepin-anything/PKGBUILD b/deepin-anything/PKGBUILD index 7bb034f561..492a91a707 100644 --- a/deepin-anything/PKGBUILD +++ b/deepin-anything/PKGBUILD @@ -9,7 +9,7 @@ pkgdesc="Deepin Anything file search tool" arch=('loong64' 'x86_64') url="https://github.com/linuxdeepin/deepin-anything" license=('GPL3') -makedepends=('dtkcore' 'udisks2-qt5') +makedepends=('dtkcore' 'udisks2-qt5' 'pcre') source=("https://github.com/linuxdeepin/deepin-anything/archive/$pkgver/$pkgname-$pkgver.tar.gz" deepin-anything-server.sysusers) sha512sums=('73cc90f394b53a8d8edc17b258b63789cddd3094008de8403b24cdb0ed94e677cb9eedac297b73e8142204612a4ae5119450ebb08be4ea1c94abf0d375f8b51d' diff --git a/deepin-desktop-base/PKGBUILD b/deepin-desktop-base/PKGBUILD index 8b233e54c7..ab28810655 100644 --- a/deepin-desktop-base/PKGBUILD +++ b/deepin-desktop-base/PKGBUILD @@ -12,9 +12,16 @@ url="https://github.com/linuxdeepin/deepin-desktop-base" license=('GPL3') groups=('deepin') source=("https://github.com/linuxdeepin/deepin-desktop-base/archive/$pkgver/$pkgname-$pkgver.tar.gz" - distribution.info) + distribution.info + deepin-deskto-base-la64.patch) sha512sums=('3828007c10836a63d62244f5490bd0f8b66763b2b5a2ff43919c1e53e5d0d7a39973b44cc4ec1f17598f5f52f1db349d8617a44fa5a2d1875789b7b616f62dc0' - '17b17c3174a052ae93e57cfffaf551cac05a56da62b2f4829f5e8f4d0bd9dbe47e043ffcbd2c6299a771f7f8b8f2fffc727f2b5b754ab1cffbbf72fa3f54d035') + '17b17c3174a052ae93e57cfffaf551cac05a56da62b2f4829f5e8f4d0bd9dbe47e043ffcbd2c6299a771f7f8b8f2fffc727f2b5b754ab1cffbbf72fa3f54d035' + 'f79f5377f13e76ea812c822d9d6c41aa0faac16de1a0ddaa95b05673022fc5fe8f332f3bba6a99f6dfff4ffac6aa32d4bdd2ec44104fe1503de25cefa2709efe') + +prepare() { + cd "$pkgname-$pkgver" + patch -p1 -i "$srcdir/deepin-deskto-base-la64.patch" +} build() { cd $pkgname-$pkgver @@ -37,6 +44,7 @@ package() { ln -s ../usr/lib/deepin/desktop-version "$pkgdir"/etc/deepin-version # Install os-version and rename to uos-version + cp files/os-version-amd files/os-version install -Dm644 files/os-version "$pkgdir"/etc/uos-version # Remove apt-specific templates diff --git a/deepin-desktop-base/deepin-deskto-base-la64.patch b/deepin-desktop-base/deepin-deskto-base-la64.patch new file mode 100644 index 0000000000..362257c379 --- /dev/null +++ b/deepin-desktop-base/deepin-deskto-base-la64.patch @@ -0,0 +1,27 @@ +Index: deepin-desktop-base-2021.06.16/Makefile +=================================================================== +--- deepin-desktop-base-2021.06.16.orig/Makefile ++++ deepin-desktop-base-2021.06.16/Makefile +@@ -23,6 +23,8 @@ build: + sed -e "s|@@VERSION@@|$(VERSION)|g" -e "s|@@RELEASE@@|$(RELEASE)|g" files/os-version-loongson > files/os-version + else ifeq (${ARCH_BUILD}, mips64) + sed -e "s|@@VERSION@@|$(VERSION)|g" -e "s|@@RELEASE@@|$(RELEASE)|g" files/desktop-version-loongson.in > files/desktop-version ++ else ifeq (${ARCH_BUILD}, loongarch64) ++ sed -e "s|@@VERSION@@|$(VERSION)|g" -e "s|@@RELEASE@@|$(RELEASE)|g" files/desktop-version-loongarch64.in > files/desktop-version + else ifeq (${ARCH_BUILD}, sw_64) + sed -e "s|@@VERSION@@|$(VERSION)|g" -e "s|@@RELEASE@@|$(RELEASE)|g" files/desktop-version-sw.in > files/desktop-version + sed -e "s|@@VERSION@@|$(VERSION)|g" -e "s|@@RELEASE@@|$(RELEASE)|g" files/os-version-sw > files/os-version +Index: deepin-desktop-base-2021.06.16/files/desktop-version-loongarch64.in +=================================================================== +--- /dev/null ++++ deepin-desktop-base-2021.06.16/files/desktop-version-loongarch64.in +@@ -0,0 +1,9 @@ ++[Release] ++Version=@@VERSION@@ ++Type=Desktop ++Type[zh_CN]=社区版 ++Edition=Y2020E0001 ++Copyright=Y2020CR001 ++[Addition] ++Milestone=@@RELEASE@@ ++Buildid=build1 diff --git a/deepin-grand-search/62.patch b/deepin-grand-search/62.patch new file mode 100644 index 0000000000..7c83b5a7be --- /dev/null +++ b/deepin-grand-search/62.patch @@ -0,0 +1,126 @@ +From 66dd5c6c79922ee5366d9bf09e8a9879f7231306 Mon Sep 17 00:00:00 2001 +From: xzl +Date: Thu, 7 Sep 2023 16:05:54 +0800 +Subject: [PATCH] fix: fix build error in v23 + +Log: +--- + src/grand-search-daemon/main.cpp | 1 + + .../exhibition/matchresult/listview/grandsearchlistdelegate.cpp | 1 + + .../gui/exhibition/matchresult/listview/grandsearchlistview.cpp | 1 + + src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp | 1 + + src/grand-search/gui/searchconfig/bestmatchwidget.cpp | 1 + + .../gui/searchconfig/blacklistview/blacklistview.cpp | 1 + + src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp | 1 + + src/libgrand-search-daemon/main.cpp | 1 + + src/preview-plugin/audio-preview/audioview.cpp | 1 + + 9 files changed, 9 insertions(+) + +diff --git a/src/grand-search-daemon/main.cpp b/src/grand-search-daemon/main.cpp +index a2aada02..87bab961 100644 +--- a/src/grand-search-daemon/main.cpp ++++ b/src/grand-search-daemon/main.cpp +@@ -10,6 +10,7 @@ + #include + + #include ++#include + + #include + #include +diff --git a/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistdelegate.cpp b/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistdelegate.cpp +index 26cb863b..b71278c9 100755 +--- a/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistdelegate.cpp ++++ b/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistdelegate.cpp +@@ -28,6 +28,7 @@ + #define TailMaxWidth 150 // 拖尾信息最大显示宽度 + + DWIDGET_USE_NAMESPACE ++DGUI_USE_NAMESPACE + using namespace GrandSearch; + + GrandSearchListDelegate::GrandSearchListDelegate(QAbstractItemView *parent) +diff --git a/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistview.cpp b/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistview.cpp +index f0133d41..d665d6b5 100755 +--- a/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistview.cpp ++++ b/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistview.cpp +@@ -20,6 +20,7 @@ + + using namespace GrandSearch; + DCORE_USE_NAMESPACE ++DGUI_USE_NAMESPACE + DWIDGET_USE_NAMESPACE + + #define ICON_SIZE 24 +diff --git a/src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp b/src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp +index 2d6acdc8..16e3a19e 100644 +--- a/src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp ++++ b/src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp +@@ -25,6 +25,7 @@ + using namespace GrandSearch; + + DWIDGET_USE_NAMESPACE ++DGUI_USE_NAMESPACE + + NameLabel::NameLabel(const QString &text, QWidget *parent, Qt::WindowFlags f): + QLabel(text, parent, f) +diff --git a/src/grand-search/gui/searchconfig/bestmatchwidget.cpp b/src/grand-search/gui/searchconfig/bestmatchwidget.cpp +index 85e9d7cb..37606ebb 100644 +--- a/src/grand-search/gui/searchconfig/bestmatchwidget.cpp ++++ b/src/grand-search/gui/searchconfig/bestmatchwidget.cpp +@@ -14,6 +14,7 @@ + #include + + DWIDGET_USE_NAMESPACE ++DGUI_USE_NAMESPACE + using namespace GrandSearch; + + BestMatchWidget::BestMatchWidget(QWidget *parent) +diff --git a/src/grand-search/gui/searchconfig/blacklistview/blacklistview.cpp b/src/grand-search/gui/searchconfig/blacklistview/blacklistview.cpp +index 1473e4c8..eb634305 100644 +--- a/src/grand-search/gui/searchconfig/blacklistview/blacklistview.cpp ++++ b/src/grand-search/gui/searchconfig/blacklistview/blacklistview.cpp +@@ -26,6 +26,7 @@ DCORE_USE_NAMESPACE + #define InitCount 7 // 初始显示数量 + + DWIDGET_USE_NAMESPACE ++DGUI_USE_NAMESPACE + using namespace GrandSearch; + + BlackListView::BlackListView(QWidget *parent) +diff --git a/src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp b/src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp +index 1449f095..ba0efbfa 100644 +--- a/src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp ++++ b/src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp +@@ -12,6 +12,7 @@ + #define ICONLABELSIZE 36 + + DWIDGET_USE_NAMESPACE ++DGUI_USE_NAMESPACE + using namespace GrandSearch; + + SwitchWidget::SwitchWidget(const QString &title, QWidget *parent) +diff --git a/src/libgrand-search-daemon/main.cpp b/src/libgrand-search-daemon/main.cpp +index 1dba137b..53aab763 100644 +--- a/src/libgrand-search-daemon/main.cpp ++++ b/src/libgrand-search-daemon/main.cpp +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + + GRANDSEARCH_USE_NAMESPACE + DCORE_USE_NAMESPACE +diff --git a/src/preview-plugin/audio-preview/audioview.cpp b/src/preview-plugin/audio-preview/audioview.cpp +index d50fde46..2e7d8cd7 100644 +--- a/src/preview-plugin/audio-preview/audioview.cpp ++++ b/src/preview-plugin/audio-preview/audioview.cpp +@@ -19,6 +19,7 @@ + #define MARGIN_SIZE 15 + + DWIDGET_USE_NAMESPACE ++DGUI_USE_NAMESPACE + GRANDSEARCH_USE_NAMESPACE + using namespace GrandSearch::audio_preview; + diff --git a/deepin-grand-search/PKGBUILD b/deepin-grand-search/PKGBUILD index 469780e349..86f4e48c7d 100644 --- a/deepin-grand-search/PKGBUILD +++ b/deepin-grand-search/PKGBUILD @@ -13,7 +13,8 @@ depends=(deepin-anything deepin-application-manager taglib ffmpeg icu deepin-pdf makedepends=(cmake deepin-dock ninja qt5-tools) groups=(deepin-extra) source=("$pkgname-$pkgver.tar.gz::https://github.com/linuxdeepin/dde-grand-search/archive/$pkgver.tar.gz" - taglib-2.patch) + taglib-2.patch + 62.patch) sha512sums=('7d2bd203b9c0dfef57a0667690252a9b3b3f3b5b2e30f44f6706de0d98885908f21f982fd19257812b92a0564e4e7888f8a6789bee2aa5ac2c573a2cadf0b838' '8364cd5aa0350a7d109be7ce10035c6c4e3fd6686205bc880017b1fc93a10cff6e78a8f66daeb25427c416a6dc075482136146c9d8278aee6de71653673d59a0') @@ -22,6 +23,7 @@ prepare() { # https://github.com/linuxdeepin/dde-grand-search/pull/65 sed -i 's/-fPIE -pie//g' src/*/CMakeLists.txt + patch -p1 -i $srcdir/62.patch patch -p1 -i ../taglib-2.patch } diff --git a/devtools/PKGBUILD b/devtools/PKGBUILD index 9ec5c0b21a..1587c5f80a 100644 --- a/devtools/PKGBUILD +++ b/devtools/PKGBUILD @@ -34,11 +34,12 @@ depends=( ) makedepends=( asciidoc - shellcheck +# shellcheck ) optdepends=('btrfs-progs: btrfs support') replaces=(devtools-git-poc) -source=(https://gitlab.archlinux.org/archlinux/devtools/-/releases/v${pkgver}/downloads/devtools-${pkgver}.tar.gz{,.sig}) +source=(https://gitlab.archlinux.org/archlinux/devtools/-/releases/v${pkgver}/downloads/devtools-${pkgver}.tar.gz{,.sig} + devtools-loong64-1.0.4.patch) validpgpkeys=( '4AA4767BBC9C4B1D18AE28B77F2D434B9741E8AC' # Pierre Schmitz '86CFFCA918CF3AF47147588051E8B148A9999C34' # Evangelos Foutras @@ -49,9 +50,16 @@ validpgpkeys=( 'E240B57E2C4630BA768E2F26FC1B547C8D8172C8' # Levente Polyak ) sha256sums=('dded4b47f669751cf86367284c9adabbec92321f5fb0fc684d4fcc0e039c6719' - 'SKIP') + 'SKIP' + '1ad1e4de88d9f7e1ae2de9b7ab4a494634e39075ee04bb497241cc5e154d1a00') b2sums=('bfc3727fe70dbae1333d491a48342955230072830ab9cb7a308992eaba7127202ed6a3489398f86540ebc213b2d530d92ae7485fb02241a67f7d71c496088123' - 'SKIP') + 'SKIP' + '3cde8f2e4ca9e69b7d9a7159391896511dbd5b2f1f9f575c49c790ce330b8749eba6c7c1f69eebe33d632a245feacddd8fb84e7f79efbf5bab7d9ea55bf7d15f') + +prepare() { + cd ${pkgname}-${pkgver} + patch -p1 -i $srcdir/devtools-loong64-1.0.4.patch +} build() { cd ${pkgname}-${pkgver} diff --git a/devtools/devtools-loong64-1.0.4.patch b/devtools/devtools-loong64-1.0.4.patch new file mode 100644 index 0000000000..36f76978f9 --- /dev/null +++ b/devtools/devtools-loong64-1.0.4.patch @@ -0,0 +1,898 @@ +diff --git a/Makefile b/Makefile +index bfe80a8..20c3152 100644 +--- a/Makefile ++++ b/Makefile +@@ -25,28 +25,41 @@ COMMITPKG_LINKS = \ + extrapkg \ + extra-testingpkg \ + extra-stagingpkg \ ++ laurpkg \ ++ laur-testingpkg \ ++ laur-stagingpkg \ + multilibpkg \ + multilib-testingpkg \ + multilib-stagingpkg \ + kde-unstablepkg \ +- gnome-unstablepkg ++ gnome-unstablepkg \ ++ wine-appspkg + + ARCHBUILD_LINKS = \ ++ core-testing-loong64-build \ + core-testing-x86_64-build \ + core-testing-x86_64_v3-build \ ++ core-staging-loong64-build \ + core-staging-x86_64-build \ + core-staging-x86_64_v3-build \ ++ extra-loong64-build \ + extra-x86_64-build \ + extra-x86_64_v3-build \ ++ extra-testing-loong64-build \ + extra-testing-x86_64-build \ + extra-testing-x86_64_v3-build \ ++ extra-staging-loong64-build \ + extra-staging-x86_64-build \ + extra-staging-x86_64_v3-build \ ++ laur-loong64-build \ ++ laur-testing-loong64-build \ ++ laur-staging-loong64-build \ + multilib-build \ + multilib-testing-build \ + multilib-staging-build \ +- kde-unstable-x86_64-build \ +- gnome-unstable-x86_64-build ++ kde-unstable-loong64-build kde-unstable-x86_64-build \ ++ gnome-unstable-loong64-build gnome-unstable-x86_64-build \ ++ wine-apps-loong64-build + + COMPLETIONS = $(addprefix $(BUILDDIR)/,$(patsubst %.in,%,$(wildcard contrib/completion/*/*))) + +@@ -159,7 +172,7 @@ dist: + git archive --format=tar --prefix=devtools-$(V)/ v$(V) | gzip > devtools-$(V).tar.gz + gpg --detach-sign --use-agent devtools-$(V).tar.gz + +-check: $(BINPROGS_SRC) $(LIBRARY_SRC) contrib/completion/bash/devtools.in config/makepkg/x86_64.conf contrib/makepkg/PKGBUILD.proto ++check: $(BINPROGS_SRC) $(LIBRARY_SRC) contrib/completion/bash/devtools.in config/makepkg/loong64.conf config/makepkg/x86_64.conf contrib/makepkg/PKGBUILD.proto + shellcheck $^ + + .PHONY: all binprogs library completion conf man clean install uninstall tag dist upload check +diff --git a/config/makepkg/loong64.conf b/config/makepkg/loong64.conf +new file mode 100644 +index 0000000..c1b2987 +--- /dev/null ++++ b/config/makepkg/loong64.conf +@@ -0,0 +1,162 @@ ++#!/hint/bash ++# shellcheck disable=2034 ++ ++# ++# /etc/makepkg.conf ++# ++ ++######################################################################### ++# SOURCE ACQUISITION ++######################################################################### ++# ++#-- The download utilities that makepkg should use to acquire sources ++# Format: 'protocol::agent' ++DLAGENTS=('file::/usr/bin/curl -qgC - -o %o %u' ++ 'ftp::/usr/bin/curl -qgfC - --ftp-pasv --retry 3 --retry-delay 3 -o %o %u' ++ 'http::/usr/bin/curl -qgb "" -fLC - --retry 3 --retry-delay 3 -o %o %u' ++ 'https::/usr/bin/curl -qgb "" -fLC - --retry 3 --retry-delay 3 -o %o %u' ++ 'rsync::/usr/bin/rsync --no-motd -z %u %o' ++ 'scp::/usr/bin/scp -C %u %o') ++ ++# Other common tools: ++# /usr/bin/snarf ++# /usr/bin/lftpget -c ++# /usr/bin/wget ++ ++#-- The package required by makepkg to download VCS sources ++# Format: 'protocol::package' ++VCSCLIENTS=('bzr::bzr' ++ 'fossil::fossil' ++ 'git::git' ++ 'hg::mercurial' ++ 'svn::subversion') ++ ++######################################################################### ++# ARCHITECTURE, COMPILE FLAGS ++######################################################################### ++# ++CARCH="loong64" ++CHOST="loongarch64-unknown-linux-gnu" ++ ++#-- Compiler and Linker Flags ++#CPPFLAGS="" ++CFLAGS="-mabi=lp64d -march=loongarch64 -mlsx -O2 -pipe -fno-plt -fexceptions \ ++ -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security \ ++ -fstack-clash-protection" ++CXXFLAGS="$CFLAGS -Wp,-D_GLIBCXX_ASSERTIONS" ++LDFLAGS="-Wl,-O1,--sort-common,--as-needed,-z,relro,-z,now" ++LTOFLAGS="-flto=auto" ++RUSTFLAGS="" ++#-- Make Flags: change this for DistCC/SMP systems ++#MAKEFLAGS="-j2" ++#-- Debugging flags ++DEBUG_CFLAGS="-g" ++DEBUG_CXXFLAGS="$DEBUG_CFLAGS" ++DEBUG_RUSTFLAGS="-C debuginfo=2" ++ ++######################################################################### ++# BUILD ENVIRONMENT ++######################################################################### ++# ++# Makepkg defaults: BUILDENV=(!distcc !color !ccache check !sign) ++# A negated environment option will do the opposite of the comments below. ++# ++#-- distcc: Use the Distributed C/C++/ObjC compiler ++#-- color: Colorize output messages ++#-- ccache: Use ccache to cache compilation ++#-- check: Run the check() function if present in the PKGBUILD ++#-- sign: Generate PGP signature file ++# ++BUILDENV=(!distcc color !ccache check !sign) ++# ++#-- If using DistCC, your MAKEFLAGS will also need modification. In addition, ++#-- specify a space-delimited list of hosts running in the DistCC cluster. ++#DISTCC_HOSTS="" ++# ++#-- Specify a directory for package building. ++#BUILDDIR=/tmp/makepkg ++ ++######################################################################### ++# GLOBAL PACKAGE OPTIONS ++# These are default values for the options=() settings ++######################################################################### ++# ++# Makepkg defaults: OPTIONS=(!strip docs libtool staticlibs emptydirs !zipman !purge !debug !lto) ++# A negated option will do the opposite of the comments below. ++# ++#-- strip: Strip symbols from binaries/libraries ++#-- docs: Save doc directories specified by DOC_DIRS ++#-- libtool: Leave libtool (.la) files in packages ++#-- staticlibs: Leave static library (.a) files in packages ++#-- emptydirs: Leave empty directories in packages ++#-- zipman: Compress manual (man and info) pages in MAN_DIRS with gzip ++#-- purge: Remove files specified by PURGE_TARGETS ++#-- debug: Add debugging flags as specified in DEBUG_* variables ++#-- lto: Add compile flags for building with link time optimization ++# ++OPTIONS=(strip docs !libtool !staticlibs emptydirs zipman purge debug lto) ++ ++#-- File integrity checks to use. Valid: md5, sha1, sha224, sha256, sha384, sha512, b2 ++INTEGRITY_CHECK=(sha256) ++#-- Options to be used when stripping binaries. See `man strip' for details. ++STRIP_BINARIES="--strip-all" ++#-- Options to be used when stripping shared libraries. See `man strip' for details. ++STRIP_SHARED="--strip-unneeded" ++#-- Options to be used when stripping static libraries. See `man strip' for details. ++STRIP_STATIC="--strip-debug" ++#-- Manual (man and info) directories to compress (if zipman is specified) ++MAN_DIRS=({usr{,/local}{,/share},opt/*}/{man,info}) ++#-- Doc directories to remove (if !docs is specified) ++DOC_DIRS=(usr/{,local/}{,share/}{doc,gtk-doc} opt/*/{doc,gtk-doc}) ++#-- Files to be removed from all packages (if purge is specified) ++PURGE_TARGETS=(usr/{,share}/info/dir .packlist *.pod) ++#-- Directory to store source code in for debug packages ++DBGSRCDIR="/usr/src/debug" ++ ++######################################################################### ++# PACKAGE OUTPUT ++######################################################################### ++# ++# Default: put built package and cached source in build directory ++# ++#-- Destination: specify a fixed directory where all packages will be placed ++#PKGDEST=/home/packages ++#-- Source cache: specify a fixed directory where source files will be cached ++#SRCDEST=/home/sources ++#-- Source packages: specify a fixed directory where all src packages will be placed ++#SRCPKGDEST=/home/srcpackages ++#-- Log files: specify a fixed directory where all log files will be placed ++#LOGDEST=/home/makepkglogs ++#-- Packager: name/email of the person or organization building packages ++#PACKAGER="John Doe " ++#-- Specify a key to use for package signing ++#GPGKEY="" ++ ++######################################################################### ++# COMPRESSION DEFAULTS ++######################################################################### ++# ++COMPRESSGZ=(gzip -c -f -n) ++COMPRESSBZ2=(bzip2 -c -f) ++COMPRESSXZ=(xz -c -z -) ++COMPRESSZST=(zstd -c -T0 --ultra -20 -) ++COMPRESSLRZ=(lrzip -q) ++COMPRESSLZO=(lzop -q) ++COMPRESSZ=(compress -c -f) ++COMPRESSLZ4=(lz4 -q) ++COMPRESSLZ=(lzip -c -f) ++ ++######################################################################### ++# EXTENSION DEFAULTS ++######################################################################### ++# ++PKGEXT='.pkg.tar.zst' ++SRCEXT='.src.tar.gz' ++ ++######################################################################### ++# OTHER ++######################################################################### ++# ++#-- Command used to run pacman as root, instead of trying sudo and su ++#PACMAN_AUTH=() ++# vim: set ft=sh ts=2 sw=2 et: +diff --git a/config/pacman/laur-staging.conf b/config/pacman/laur-staging.conf +new file mode 100644 +index 0000000..1726921 +--- /dev/null ++++ b/config/pacman/laur-staging.conf +@@ -0,0 +1,104 @@ ++# ++# /etc/pacman.conf ++# ++# See the pacman.conf(5) manpage for option and repository directives ++ ++# ++# GENERAL OPTIONS ++# ++[options] ++# The following paths are commented out with their default values listed. ++# If you wish to use different paths, uncomment and update the paths. ++#RootDir = / ++#DBPath = /var/lib/pacman/ ++#CacheDir = /var/cache/pacman/pkg/ ++#LogFile = /var/log/pacman.log ++#GPGDir = /etc/pacman.d/gnupg/ ++#HookDir = /etc/pacman.d/hooks/ ++HoldPkg = pacman glibc ++#XferCommand = /usr/bin/curl -L -C - -f -o %o %u ++#XferCommand = /usr/bin/wget --passive-ftp -c -O %o %u ++#CleanMethod = KeepInstalled ++Architecture = auto ++ ++# Pacman won't upgrade packages listed in IgnorePkg and members of IgnoreGroup ++#IgnorePkg = ++#IgnoreGroup = ++ ++#NoUpgrade = ++#NoExtract = ++ ++# Misc options ++#UseSyslog ++#Color ++NoProgressBar ++# We cannot check disk space from within a chroot environment ++#CheckSpace ++VerbosePkgLists ++ParallelDownloads = 5 ++ ++# By default, pacman accepts packages signed by keys that its local keyring ++# trusts (see pacman-key and its man page), as well as unsigned packages. ++SigLevel = Required DatabaseOptional ++LocalFileSigLevel = Optional ++#RemoteFileSigLevel = Required ++ ++# NOTE: You must run `pacman-key --init` before first using pacman; the local ++# keyring can then be populated with the keys of all official Arch Linux ++# packagers with `pacman-key --populate archlinux`. ++ ++# ++# REPOSITORIES ++# - can be defined here or included from another file ++# - pacman will search repositories in the order defined here ++# - local/custom mirrors can be added here or in separate files ++# - repositories listed first will take precedence when packages ++# have identical names, regardless of version number ++# - URLs will have $repo replaced by the name of the current repo ++# - URLs will have $arch replaced by the name of the architecture ++# ++# Repository entries are of the format: ++# [repo-name] ++# Server = ServerName ++# Include = IncludePath ++# ++# The header [repo-name] is crucial - it must be present and ++# uncommented to enable the repo. ++# ++ ++# The testing repositories are disabled by default. To enable, uncomment the ++# repo name header and Include lines. You can add preferred servers immediately ++# after the header, and they will be used before the default mirrors. ++ ++[core-staging] ++Include = /etc/pacman.d/mirrorlist ++ ++[core-testing] ++Include = /etc/pacman.d/mirrorlist ++ ++[core] ++Include = /etc/pacman.d/mirrorlist ++ ++[extra-staging] ++Include = /etc/pacman.d/mirrorlist ++ ++[extra-testing] ++Include = /etc/pacman.d/mirrorlist ++ ++[extra] ++Include = /etc/pacman.d/mirrorlist ++ ++[laur-staging] ++Include = /etc/pacman.d/mirrorlist ++ ++[laur-testing] ++Include = /etc/pacman.d/mirrorlist ++ ++[laur] ++Include = /etc/pacman.d/mirrorlist ++ ++# An example of a custom package repository. See the pacman manpage for ++# tips on creating your own repositories. ++#[custom] ++#SigLevel = Optional TrustAll ++#Server = file:///home/custompkgs +diff --git a/config/pacman/laur-testing.conf b/config/pacman/laur-testing.conf +new file mode 100644 +index 0000000..bbe7791 +--- /dev/null ++++ b/config/pacman/laur-testing.conf +@@ -0,0 +1,95 @@ ++# ++# /etc/pacman.conf ++# ++# See the pacman.conf(5) manpage for option and repository directives ++ ++# ++# GENERAL OPTIONS ++# ++[options] ++# The following paths are commented out with their default values listed. ++# If you wish to use different paths, uncomment and update the paths. ++#RootDir = / ++#DBPath = /var/lib/pacman/ ++#CacheDir = /var/cache/pacman/pkg/ ++#LogFile = /var/log/pacman.log ++#GPGDir = /etc/pacman.d/gnupg/ ++#HookDir = /etc/pacman.d/hooks/ ++HoldPkg = pacman glibc ++#XferCommand = /usr/bin/curl -L -C - -f -o %o %u ++#XferCommand = /usr/bin/wget --passive-ftp -c -O %o %u ++#CleanMethod = KeepInstalled ++Architecture = auto ++ ++# Pacman won't upgrade packages listed in IgnorePkg and members of IgnoreGroup ++#IgnorePkg = ++#IgnoreGroup = ++ ++#NoUpgrade = ++#NoExtract = ++ ++# Misc options ++#UseSyslog ++#Color ++NoProgressBar ++# We cannot check disk space from within a chroot environment ++#CheckSpace ++VerbosePkgLists ++ParallelDownloads = 5 ++ ++# By default, pacman accepts packages signed by keys that its local keyring ++# trusts (see pacman-key and its man page), as well as unsigned packages. ++SigLevel = Required DatabaseOptional ++LocalFileSigLevel = Optional ++#RemoteFileSigLevel = Required ++ ++# NOTE: You must run `pacman-key --init` before first using pacman; the local ++# keyring can then be populated with the keys of all official Arch Linux ++# packagers with `pacman-key --populate archlinux`. ++ ++# ++# REPOSITORIES ++# - can be defined here or included from another file ++# - pacman will search repositories in the order defined here ++# - local/custom mirrors can be added here or in separate files ++# - repositories listed first will take precedence when packages ++# have identical names, regardless of version number ++# - URLs will have $repo replaced by the name of the current repo ++# - URLs will have $arch replaced by the name of the architecture ++# ++# Repository entries are of the format: ++# [repo-name] ++# Server = ServerName ++# Include = IncludePath ++# ++# The header [repo-name] is crucial - it must be present and ++# uncommented to enable the repo. ++# ++ ++# The testing repositories are disabled by default. To enable, uncomment the ++# repo name header and Include lines. You can add preferred servers immediately ++# after the header, and they will be used before the default mirrors. ++ ++[core-testing] ++Include = /etc/pacman.d/mirrorlist ++ ++[core] ++Include = /etc/pacman.d/mirrorlist ++ ++[extra-testing] ++Include = /etc/pacman.d/mirrorlist ++ ++[extra] ++Include = /etc/pacman.d/mirrorlist ++ ++[laur-testing] ++Include = /etc/pacman.d/mirrorlist ++ ++[laur] ++Include = /etc/pacman.d/mirrorlist ++ ++# An example of a custom package repository. See the pacman manpage for ++# tips on creating your own repositories. ++#[custom] ++#SigLevel = Optional TrustAll ++#Server = file:///home/custompkgs +diff --git a/config/pacman/laur.conf b/config/pacman/laur.conf +new file mode 100644 +index 0000000..6c50d86 +--- /dev/null ++++ b/config/pacman/laur.conf +@@ -0,0 +1,92 @@ ++# ++# /etc/pacman.conf ++# ++# See the pacman.conf(5) manpage for option and repository directives ++ ++# ++# GENERAL OPTIONS ++# ++[options] ++# The following paths are commented out with their default values listed. ++# If you wish to use different paths, uncomment and update the paths. ++#RootDir = / ++#DBPath = /var/lib/pacman/ ++#CacheDir = /var/cache/pacman/pkg/ ++#LogFile = /var/log/pacman.log ++#GPGDir = /etc/pacman.d/gnupg/ ++#HookDir = /etc/pacman.d/hooks/ ++HoldPkg = pacman glibc ++#XferCommand = /usr/bin/curl -L -C - -f -o %o %u ++#XferCommand = /usr/bin/wget --passive-ftp -c -O %o %u ++#CleanMethod = KeepInstalled ++Architecture = auto ++ ++# Pacman won't upgrade packages listed in IgnorePkg and members of IgnoreGroup ++#IgnorePkg = ++#IgnoreGroup = ++ ++#NoUpgrade = ++#NoExtract = ++ ++# Misc options ++#UseSyslog ++#Color ++NoProgressBar ++# We cannot check disk space from within a chroot environment ++#CheckSpace ++VerbosePkgLists ++ParallelDownloads = 5 ++ ++# By default, pacman accepts packages signed by keys that its local keyring ++# trusts (see pacman-key and its man page), as well as unsigned packages. ++SigLevel = Required DatabaseOptional ++LocalFileSigLevel = Optional ++#RemoteFileSigLevel = Required ++ ++# NOTE: You must run `pacman-key --init` before first using pacman; the local ++# keyring can then be populated with the keys of all official Arch Linux ++# packagers with `pacman-key --populate archlinux`. ++ ++# ++# REPOSITORIES ++# - can be defined here or included from another file ++# - pacman will search repositories in the order defined here ++# - local/custom mirrors can be added here or in separate files ++# - repositories listed first will take precedence when packages ++# have identical names, regardless of version number ++# - URLs will have $repo replaced by the name of the current repo ++# - URLs will have $arch replaced by the name of the architecture ++# ++# Repository entries are of the format: ++# [repo-name] ++# Server = ServerName ++# Include = IncludePath ++# ++# The header [repo-name] is crucial - it must be present and ++# uncommented to enable the repo. ++# ++ ++# The testing repositories are disabled by default. To enable, uncomment the ++# repo name header and Include lines. You can add preferred servers immediately ++# after the header, and they will be used before the default mirrors. ++ ++#[core-testing] ++#Include = /etc/pacman.d/mirrorlist ++ ++[core] ++Include = /etc/pacman.d/mirrorlist ++ ++#[extra-testing] ++#Include = /etc/pacman.d/mirrorlist ++ ++[extra] ++Include = /etc/pacman.d/mirrorlist ++ ++[laur] ++Include = /etc/pacman.d/mirrorlist ++ ++# An example of a custom package repository. See the pacman manpage for ++# tips on creating your own repositories. ++#[custom] ++#SigLevel = Optional TrustAll ++#Server = file:///home/custompkgs +diff --git a/config/pacman/multilib.conf b/config/pacman/multilib.conf +index e09a9d5..496d84b 100644 +--- a/config/pacman/multilib.conf ++++ b/config/pacman/multilib.conf +@@ -88,7 +88,7 @@ Include = /etc/pacman.d/mirrorlist + #[multilib-testing] + #Include = /etc/pacman.d/mirrorlist + +-[multilib] ++[laur] + Include = /etc/pacman.d/mirrorlist + + # An example of a custom package repository. See the pacman manpage for +diff --git a/config/setarch-aliases.d/loong64 b/config/setarch-aliases.d/loong64 +new file mode 100644 +index 0000000..a4d6d47 +--- /dev/null ++++ b/config/setarch-aliases.d/loong64 +@@ -0,0 +1 @@ ++loongarch64 +diff --git a/src/archbuild.in b/src/archbuild.in +index 2f3faf9..c977a24 100644 +--- a/src/archbuild.in ++++ b/src/archbuild.in +@@ -15,7 +15,7 @@ makechrootpkg_args=(-c -n -C) + cmd="${0##*/}" + if [[ "${cmd%%-*}" == 'multilib' ]]; then + repo="${cmd%-build}" +- arch='x86_64' ++ arch='loong64' + base_packages+=(multilib-devel) + else + tag="${cmd%-build}" +diff --git a/src/archrelease.in b/src/archrelease.in +index 818b0ca..d1e277b 100644 +--- a/src/archrelease.in ++++ b/src/archrelease.in +@@ -45,50 +45,50 @@ if [[ ! -f PKGBUILD ]]; then + die 'archrelease: PKGBUILD not found' + fi + +-# shellcheck source=contrib/makepkg/PKGBUILD.proto +-. ./PKGBUILD +-pkgbase=${pkgbase:-$pkgname} +-pkgver=$(get_full_version "$pkgbase") +-gittag=$(get_tag_from_pkgver "$pkgver") +- +-# Check if releasing from a branch +-if ! branchname=$(git symbolic-ref --short HEAD); then +- die 'not on any branch' +-fi +-if [[ "${branchname}" != main ]]; then +- die 'must be run from the main branch' +-fi +- +-# Check if remote origin is setup properly +-if ! giturl=$(git remote get-url origin) || [[ ${giturl} != *${GIT_PACKAGING_URL_SSH}* ]]; then +- die "remote origin is not configured, run 'pkgctl repo configure'" +-fi +-if ! git ls-remote origin >/dev/null; then +- die "configured remote origin may not exist, run 'pkgctl repo create ${pkgbase}' to create it" +-fi +- +-msg 'Fetching remote changes' +-git fetch --prune --prune-tags origin || die 'failed to fetch remote changes' +- +-# Check if local branch is up to date and contains the latest origin commit +-if remoteref=$(git rev-parse "origin/${branchname}" 2>/dev/null); then +- if [[ $(git branch "${branchname}" --contains "${remoteref}" --format '%(refname:short)') != "${branchname}" ]]; then +- die "local branch is out of date, run 'git pull --rebase'" +- fi +-fi +- +-# If the tag exists we check if it's properly signed and that it +-# matches the working directory PKGBUILD. +-if git tag --verify "$gittag" &> /dev/null; then +- cwd_checksum=$(sha256sum PKGBUILD|cut -d' ' -f1) +- tag_checksum=$(git show "${gittag}:PKGBUILD" | sha256sum |cut -d' ' -f1) +- if [[ "$cwd_checksum" != "$tag_checksum" ]]; then +- die "tagged PKGBUILD is not the same as the working dir PKGBUILD" +- fi +- git push --tags --set-upstream origin main || abort +- exit 0 +-fi +- +-msg "Releasing package" +-git tag --sign --message="Package release ${pkgver}" "$gittag" || abort +-git push --tags --set-upstream origin main || abort ++## shellcheck source=contrib/makepkg/PKGBUILD.proto ++#. ./PKGBUILD ++#pkgbase=${pkgbase:-$pkgname} ++#pkgver=$(get_full_version "$pkgbase") ++#gittag=$(get_tag_from_pkgver "$pkgver") ++# ++## Check if releasing from a branch ++#if ! branchname=$(git symbolic-ref --short HEAD); then ++# die 'not on any branch' ++#fi ++#if [[ "${branchname}" != main ]]; then ++# die 'must be run from the main branch' ++#fi ++# ++## Check if remote origin is setup properly ++#if ! giturl=$(git remote get-url origin) || [[ ${giturl} != *${GIT_PACKAGING_URL_SSH}* ]]; then ++# die "remote origin is not configured, run 'pkgctl repo configure'" ++#fi ++#if ! git ls-remote origin >/dev/null; then ++# die "configured remote origin may not exist, run 'pkgctl repo create ${pkgbase}' to create it" ++#fi ++# ++#msg 'Fetching remote changes' ++#git fetch --prune --prune-tags origin || die 'failed to fetch remote changes' ++# ++## Check if local branch is up to date and contains the latest origin commit ++#if remoteref=$(git rev-parse "origin/${branchname}" 2>/dev/null); then ++# if [[ $(git branch "${branchname}" --contains "${remoteref}" --format '%(refname:short)') != "${branchname}" ]]; then ++# die "local branch is out of date, run 'git pull --rebase'" ++# fi ++#fi ++# ++## If the tag exists we check if it's properly signed and that it ++## matches the working directory PKGBUILD. ++#if git tag --verify "$gittag" &> /dev/null; then ++# cwd_checksum=$(sha256sum PKGBUILD|cut -d' ' -f1) ++# tag_checksum=$(git show "${gittag}:PKGBUILD" | sha256sum |cut -d' ' -f1) ++# if [[ "$cwd_checksum" != "$tag_checksum" ]]; then ++# die "tagged PKGBUILD is not the same as the working dir PKGBUILD" ++# fi ++# git push --tags --set-upstream origin main || abort ++# exit 0 ++#fi ++# ++#msg "Releasing package" ++#git tag --sign --message="Package release ${pkgver}" "$gittag" || abort ++#git push --tags --set-upstream origin main || abort +diff --git a/src/commitpkg.in b/src/commitpkg.in +index f979d61..0b1226f 100644 +--- a/src/commitpkg.in ++++ b/src/commitpkg.in +@@ -128,17 +128,17 @@ for key in "${validpgpkeys[@]}"; do + done + + # assert that they really are controlled by git +-if (( ${#needsversioning[*]} )); then +- for file in "${needsversioning[@]}"; do +- # skip none existing files +- if [[ ! -f "${file}" ]]; then +- continue +- fi +- if ! git ls-files --error-unmatch "$file"; then +- die "%s is not under version control" "$file" +- fi +- done +-fi ++#if (( ${#needsversioning[*]} )); then ++# for file in "${needsversioning[@]}"; do ++# # skip none existing files ++# if [[ ! -f "${file}" ]]; then ++# continue ++# fi ++# if ! git ls-files --error-unmatch "$file"; then ++# die "%s is not under version control" "$file" ++# fi ++# done ++#fi + + + server=${PACKAGING_REPO_RELEASE_HOST} +@@ -176,51 +176,51 @@ for _arch in "${arch[@]}"; do + done + + # check for PKGBUILD standards +-check_pkgbuild_validity ++#check_pkgbuild_validity + + # auto generate .SRCINFO +-stat_busy 'Generating .SRCINFO' +-write_srcinfo_content > .SRCINFO +-git add --force .SRCINFO +-stat_done +- +-if [[ -n $(git status --porcelain --untracked-files=no) ]]; then +- stat_busy 'Staging files' +- for f in $(git ls-files --modified); do +- git add "$f" +- done +- for f in $(git ls-files --deleted); do +- git rm "$f" +- done +- stat_done +- +- msgtemplate="upgpkg: $(get_full_version)" +- if [[ -n $1 ]]; then +- stat_busy 'Committing changes' +- git commit -q -m "${msgtemplate}: ${1}" || die +- stat_done +- else +- [[ -z ${WORKDIR:-} ]] && setup_workdir +- msgfile=$(mktemp --tmpdir="${WORKDIR}" commitpkg.XXXXXXXXXX) +- echo "$msgtemplate" > "$msgfile" +- if [[ -n $GIT_EDITOR ]]; then +- $GIT_EDITOR "$msgfile" || die +- elif [[ -n $VISUAL ]]; then +- $VISUAL "$msgfile" || die +- elif [[ -n $EDITOR ]]; then +- $EDITOR "$msgfile" || die +- elif giteditor=$(git config --get core.editor); then +- $giteditor "$msgfile" || die +- else +- die "No usable editor found (tried \$GIT_EDITOR, \$VISUAL, \$EDITOR, git config [core.editor])." +- fi +- [[ -s $msgfile ]] || die +- stat_busy 'Committing changes' +- git commit -v -q -F "$msgfile" || die +- unlink "$msgfile" +- stat_done +- fi +-fi ++#stat_busy 'Generating .SRCINFO' ++#write_srcinfo_content > .SRCINFO ++#git add --force .SRCINFO ++#stat_done ++ ++#if [[ -n $(git status --porcelain --untracked-files=no) ]]; then ++# stat_busy 'Staging files' ++# for f in $(git ls-files --modified); do ++# git add "$f" ++# done ++# for f in $(git ls-files --deleted); do ++# git rm "$f" ++# done ++# stat_done ++# ++# msgtemplate="upgpkg: $(get_full_version)" ++# if [[ -n $1 ]]; then ++# stat_busy 'Committing changes' ++# git commit -q -m "${msgtemplate}: ${1}" || die ++# stat_done ++# else ++# [[ -z ${WORKDIR:-} ]] && setup_workdir ++# msgfile=$(mktemp --tmpdir="${WORKDIR}" commitpkg.XXXXXXXXXX) ++# echo "$msgtemplate" > "$msgfile" ++# if [[ -n $GIT_EDITOR ]]; then ++# $GIT_EDITOR "$msgfile" || die ++# elif [[ -n $VISUAL ]]; then ++# $VISUAL "$msgfile" || die ++# elif [[ -n $EDITOR ]]; then ++# $EDITOR "$msgfile" || die ++# elif giteditor=$(git config --get core.editor); then ++# $giteditor "$msgfile" || die ++# else ++# die "No usable editor found (tried \$GIT_EDITOR, \$VISUAL, \$EDITOR, git config [core.editor])." ++# fi ++# [[ -s $msgfile ]] || die ++# stat_busy 'Committing changes' ++# git commit -v -q -F "$msgfile" || die ++# unlink "$msgfile" ++# stat_done ++# fi ++#fi + + declare -a uploads + declare -a commit_arches +diff --git a/src/lib/build/build.sh b/src/lib/build/build.sh +index a5a272d..ec7af1a 100644 +--- a/src/lib/build/build.sh ++++ b/src/lib/build/build.sh +@@ -319,6 +319,9 @@ pkgctl_build() { + BUILD_ARCH=("${_arch[0]}") + else + for loop_arch in "${arch[@]}"; do ++ if in_array "${loop_arch}" "x86_64"; then ++ continue ++ fi + if in_array "${loop_arch}" "${_arch[@]}"; then + BUILD_ARCH+=("$loop_arch") + else +diff --git a/src/lib/valid-repos.sh b/src/lib/valid-repos.sh +index 14f90ce..21a03ec 100644 +--- a/src/lib/valid-repos.sh ++++ b/src/lib/valid-repos.sh +@@ -7,16 +7,20 @@ + _repos=( + core core-staging core-testing + extra extra-staging extra-testing ++ laur laur-staging laur-testing + multilib multilib-staging multilib-testing + gnome-unstable + kde-unstable ++ wine-apps + ) + + # shellcheck disable=2034 + _build_repos=( + core-staging core-testing + extra extra-staging extra-testing ++ laur laur-staging laur-testing + multilib multilib-staging multilib-testing + gnome-unstable + kde-unstable ++ wine-apps + ) +diff --git a/src/lib/valid-tags.sh b/src/lib/valid-tags.sh +index ca8d7d7..c2397e2 100644 +--- a/src/lib/valid-tags.sh ++++ b/src/lib/valid-tags.sh +@@ -5,21 +5,26 @@ + + # shellcheck disable=2034 + _arch=( ++ loong64 + x86_64 + any + ) + + # shellcheck disable=2034 + _tags=( +- core-x86_64 core-any +- core-staging-x86_64 core-staging-any +- core-testing-x86_64 core-testing-any +- extra-x86_64 extra-any +- extra-staging-x86_64 extra-staging-any +- extra-testing-x86_64 extra-testing-any ++ core-loong64 core-x86_64 core-any ++ core-staging-loong64 core-staging-x86_64 core-staging-any ++ core-testing-loong64 core-testing-x86_64 core-testing-any ++ extra-loong64 extra-x86_64 extra-any ++ extra-staging-loong64 extra-staging-x86_64 extra-staging-any ++ extra-testing-loong64 extra-testing-x86_64 extra-testing-any ++ laur-loong64 ++ laur-testing-loong64 ++ laur-staging-loong64 + multilib-x86_64 + multilib-testing-x86_64 + multilib-staging-x86_64 +- kde-unstable-x86_64 kde-unstable-any +- gnome-unstable-x86_64 gnome-unstable-any ++ kde-unstable-loong64 kde-unstable-x86_64 kde-unstable-any ++ gnome-unstable-loong64 gnome-unstable-x86_64 gnome-unstable-any ++ wine-apps-loong64 wine-apps-any + ) +diff --git a/src/makechrootpkg.in b/src/makechrootpkg.in +index 2cfd849..c1b00af 100644 +--- a/src/makechrootpkg.in ++++ b/src/makechrootpkg.in +@@ -14,7 +14,7 @@ source /usr/share/makepkg/util/config.sh + + shopt -s nullglob + +-default_makepkg_args=(--syncdeps --noconfirm --log --holdver --skipinteg) ++default_makepkg_args=(--syncdeps --noconfirm --log --holdver --skipinteg --skippgpcheck) + makepkg_args=("${default_makepkg_args[@]}") + verifysource_args=() + chrootdir= +@@ -241,7 +241,7 @@ download_sources() { + # Ensure sources are downloaded + sudo -u "$makepkg_user" --preserve-env=GNUPGHOME,SSH_AUTH_SOCK \ + env SRCDEST="$SRCDEST" BUILDDIR="$WORKDIR" \ +- makepkg --config="$copydir/etc/makepkg.conf" --verifysource -o "${verifysource_args[@]}" || ++ makepkg --config="$copydir/etc/makepkg.conf" --skippgpcheck --verifysource -o "${verifysource_args[@]}" || + die "Could not download sources." + } + diff --git a/dfrs/PKGBUILD b/dfrs/PKGBUILD index ab19daf01f..ec452fbbc6 100644 --- a/dfrs/PKGBUILD +++ b/dfrs/PKGBUILD @@ -10,18 +10,31 @@ license=('MIT') depends=('glibc' 'gcc-libs') makedepends=('cargo' 'scdoc') source=(${url}/archive/${pkgver}/${pkgname}-${pkgver}.tar.gz - ${url}/releases/download/${pkgver}/${pkgname}-${pkgver}.tar.gz.sig) + ${url}/releases/download/${pkgver}/${pkgname}-${pkgver}.tar.gz.sig + dfrs-la64.patch) options=('!makeflags') sha512sums=('8b8f67ff919e4f1012fe415b564574fe9b11caf01da90a025ca8b8c7707e3f90e865e30cfbeb24db7b80829d082ba7accce44e1897a67e39d129d94c171f21fe' - 'SKIP') + 'SKIP' + '6464137f5f049a9d5b5c196bd459932726f572dffd4115a1b855753f739e87c5c994c24637920c5f173dd63af250827e0be99cdd6916608555f377cf0dac7500') b2sums=('c051c1d712811d2b9c5273ec4b89bc54f8bc10a07a18a99c9908f7c58025b99e74c0d06ce9d5ec6029eda4250befbd52d3e269ec2c6fb1a160b06b4f3b019c8d' - 'SKIP') + 'SKIP' + '373b428a11de3ed16a5d6992e48ebf4b92a370e669a3e17457e8ff33dd0215a8ba1efcb3e99975f7f24ac7030e9780ab5a3495ddcdf95c6a4a12c5fb0a6ba220') validpgpkeys=( 'E240B57E2C4630BA768E2F26FC1B547C8D8172C8' # Levente Polyak ) +prepare() { + cd "$pkgname-$pkgver" + patch -p1 -i "$srcdir/dfrs-la64.patch" +} build() { cd ${pkgname}-${pkgver} + find -name Cargo.lock -exec rm -f {} \; + mkdir -p .cargo + cat > .cargo/config.toml </dev/null } +prepare() { + cd moby + patch -p1 -i $srcdir/moby-la64.patch +} + build() { ### check my mistakes on commit version echo 'Checking commit mismatch' diff --git a/docker/moby-la64.patch b/docker/moby-la64.patch new file mode 100644 index 0000000000..f0a027976f --- /dev/null +++ b/docker/moby-la64.patch @@ -0,0 +1,12 @@ +Index: moby/vendor/github.com/cilium/ebpf/internal/endian_le.go +=================================================================== +--- moby.orig/vendor/github.com/cilium/ebpf/internal/endian_le.go ++++ moby/vendor/github.com/cilium/ebpf/internal/endian_le.go +@@ -1,5 +1,5 @@ +-//go:build 386 || amd64 || amd64p32 || arm || arm64 || mipsle || mips64le || mips64p32le || ppc64le || riscv64 +-// +build 386 amd64 amd64p32 arm arm64 mipsle mips64le mips64p32le ppc64le riscv64 ++//go:build 386 || amd64 || amd64p32 || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || ppc64le || riscv64 ++// +build 386 amd64 amd64p32 arm arm64 loong64 mipsle mips64le mips64p32le ppc64le riscv64 + + package internal + diff --git a/dog/PKGBUILD b/dog/PKGBUILD index 2b7624ebf5..b4a6c2ac6b 100644 --- a/dog/PKGBUILD +++ b/dog/PKGBUILD @@ -21,7 +21,7 @@ b2sums=('7ea52027e73deb6db6cb67b89063cddd507246256b3ca93479a634ff6696ab6961f004d prepare() { cd ${pkgname}-${pkgver} patch -Np1 -i ../dog-openssl-crates-update.patch - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/dra/PKGBUILD b/dra/PKGBUILD index 91c2937d08..a6c49033a4 100644 --- a/dra/PKGBUILD +++ b/dra/PKGBUILD @@ -15,7 +15,7 @@ options=('!lto') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" mkdir -p completions } diff --git a/dragonfly-reverb/PKGBUILD b/dragonfly-reverb/PKGBUILD index c6b54c263a..01772110de 100644 --- a/dragonfly-reverb/PKGBUILD +++ b/dragonfly-reverb/PKGBUILD @@ -179,6 +179,6 @@ package_dragonfly-reverb-vst3() { cd $pkgbase-$pkgver for name in "${_names[@]}"; do - install -vDm 755 bin/$name.vst3/Contents/$CARCH-linux/$name.so -t "$pkgdir/usr/lib/vst3/" + install -vDm 755 bin/$name.vst3/Contents/`uname -m`-linux/$name.so -t "$pkgdir/usr/lib/vst3/" done } diff --git a/dtc/PKGBUILD b/dtc/PKGBUILD index a193bf1d6d..75a2ab13ce 100644 --- a/dtc/PKGBUILD +++ b/dtc/PKGBUILD @@ -10,7 +10,7 @@ arch=(loong64 x86_64) license=(GPL2) depends=(bash glibc libyaml python) makedepends=(meson swig python-setuptools-scm) -checkdepends=(valgrind) +makedepends+=(valgrind) source=(https://www.kernel.org/pub/software/utils/dtc/dtc-$pkgver.tar.xz fix_test.patch::https://github.com/dgibson/dtc/commit/32174a66efa4ad19fc6a2a6422e4af2ae4f055cb.patch fix_meson_version.patch::https://github.com/dgibson/dtc/commit/64a907f08b9bedd89833c1eee674148cff2343c6.patch diff --git a/dua-cli/PKGBUILD b/dua-cli/PKGBUILD index 1565f1b601..36235b2dae 100644 --- a/dua-cli/PKGBUILD +++ b/dua-cli/PKGBUILD @@ -21,7 +21,13 @@ prepare() { build() { cd "$srcdir/$pkgname-$pkgver" - cargo build --release --locked + find -name Cargo.lock -exec rm -f {} \; + mkdir -p .cargo + cat > .cargo/config.toml < .cargo/config.toml < 7953AC1FBC3DC8B3B292393ED5E9E43F7DF9EE8C # Richard Levitte A21FAB74B0088AA361152586B8EF1A6BA9DA2D5C # Tomáš Mráz ) -_arch_list=(ARM AARCH64 IA32 X64) +_arch_list=(ARM AARCH64 LOONGARCH64 IA32 X64) _build_type=RELEASE _build_plugin=GCC5 @@ -129,6 +142,8 @@ pkgver() { prepare() { # patch to be able to use brotli 1.0.9 patch -Np1 -d $pkgbase -i ../$pkgbase-202202-brotli.patch + patch -Np1 -d $pkgbase -i ../edk2-use-env-toolchains.patch + patch -Np1 -d $pkgbase -i ../relax_edk2_gcc14.diff cd $pkgbase @@ -157,6 +172,10 @@ prepare() { # copy seabios's CSM binary into place, so that it can be included in the binaries: cp -v /usr/share/qemu/bios-csm.bin OvmfPkg/Csm/Csm16/Csm16.bin + ln -sf $srcdir/edk2-platforms/Drivers . + ln -sf $srcdir/edk2-platforms/Features . + ln -sf $srcdir/edk2-platforms/Platform . + ln -sf $srcdir/edk2-platforms/Silicon . } # TODO: check TPM_ENABLE/TPM2_ENABLE @@ -199,6 +218,8 @@ build() { ARCH=AARCH64 make -C BaseTools echo "Building base tools (ARM)" ARCH=ARM make -C BaseTools + echo "Building base tools (LOONGARCH64)" + ARCH=LOONGARCH64 make -C BaseTools echo "Building base tools" make -C BaseTools # expose build tooling in PATH @@ -366,6 +387,24 @@ build() { dd if=Build/ArmVirtQemu-$_arch/${_build_type}_$_build_plugin/FV/QEMU_EFI.fd of=Build/ArmVirtQemu-$_arch/${_build_type}_$_build_plugin/FV/QEMU_CODE.fd conv=notrunc dd if=/dev/zero of=Build/ArmVirtQemu-$_arch/${_build_type}_$_build_plugin/FV/QEMU_VARS.fd bs=1M count=64 ;; + LOONGARCH64) + echo "Building ovmf ($_arch) with secure boot" + local _build_options=( + -p Platform/Loongson/LoongArchQemuPkg/Loongson.dsc + -a "${_arch}" + "${_common_args[@]}" + "${_efi_args[@]}" + -D NETWORK_HTTP_BOOT_ENABLE + -D NETWORK_TLS_ENABLE + -D SECURE_BOOT_ENABLE + -D TPM_ENABLE + -D TPM_CONFIG_ENABLE + ) + BaseTools/BinWrappers/PosixLike/build "${_build_options[@]}" + dd if=/dev/zero of=Build/LoongArchQemu/${_build_type}_${_build_plugin}/FV/QEMU_CODE.fd bs=1M count=4 + dd if=Build/LoongArchQemu/${_build_type}_${_build_plugin}/FV/QEMU_EFI.fd of=Build/LoongArchQemu/${_build_type}_${_build_plugin}/FV/QEMU_CODE.fd conv=notrunc + dd if=/dev/zero of=Build/LoongArchQemu/${_build_type}_${_build_plugin}/FV/QEMU_VARS.fd bs=1M count=16 + ;; esac done } @@ -415,6 +454,21 @@ package_edk2-arm() { install -vDm 644 License.txt -t "$pkgdir/usr/share/licenses/$pkgname/" } +package_edk2-loongarch64() { + local _arch=LOONGARCH64 + + pkgdesc="Firmware for Virtual Machines (loongarch64)" + url="https://github.com/tianocore/tianocore.github.io/wiki/LoongArchQemuPkg" + + cd $pkgbase + install -vDm 644 Build/LoongArchQemu/${_build_type}_${_build_plugin}/FV/*.fd -t "$pkgdir/usr/share/$pkgbase/${_arch,,}/" + # install qemu descriptors in accordance with qemu: + # https://git.qemu.org/?p=qemu.git;a=tree;f=pc-bios/descriptors + install -vDm 644 ../*$pkgname*.json -t "$pkgdir/usr/share/qemu/firmware/" + # license + install -vDm 644 License.txt -t "$pkgdir/usr/share/licenses/$pkgname/" +} + package_edk2-shell() { local _arch # minimal UEFI shell, as defined in ShellPkg/Application/Shell/ShellPkg.inf diff --git a/edk2/edk2-use-env-toolchains.patch b/edk2/edk2-use-env-toolchains.patch new file mode 100644 index 0000000000..b7832443dc --- /dev/null +++ b/edk2/edk2-use-env-toolchains.patch @@ -0,0 +1,62 @@ +Index: edk2-edk2-stable202211/BaseTools/Conf/tools_def.template +=================================================================== +--- a/BaseTools/Conf/tools_def.template ++++ b/BaseTools/Conf/tools_def.template +@@ -2294,17 +2294,17 @@ RELEASE_GCC49_AARCH64_DLINK_XIPFLAGS = - + ################## + # GCC5 IA32 definitions + ################## +-*_GCC5_IA32_OBJCOPY_PATH = DEF(GCC5_IA32_PREFIX)objcopy +-*_GCC5_IA32_CC_PATH = DEF(GCC5_IA32_PREFIX)gcc +-*_GCC5_IA32_SLINK_PATH = DEF(GCC5_IA32_PREFIX)gcc-ar +-*_GCC5_IA32_DLINK_PATH = DEF(GCC5_IA32_PREFIX)gcc +-*_GCC5_IA32_ASLDLINK_PATH = DEF(GCC5_IA32_PREFIX)gcc +-*_GCC5_IA32_ASM_PATH = DEF(GCC5_IA32_PREFIX)gcc +-*_GCC5_IA32_PP_PATH = DEF(GCC5_IA32_PREFIX)gcc +-*_GCC5_IA32_VFRPP_PATH = DEF(GCC5_IA32_PREFIX)gcc +-*_GCC5_IA32_ASLCC_PATH = DEF(GCC5_IA32_PREFIX)gcc +-*_GCC5_IA32_ASLPP_PATH = DEF(GCC5_IA32_PREFIX)gcc +-*_GCC5_IA32_RC_PATH = DEF(GCC5_IA32_PREFIX)objcopy ++*_GCC5_IA32_OBJCOPY_PATH = ENV(GCC5_IA32_PREFIX)objcopy ++*_GCC5_IA32_CC_PATH = ENV(GCC5_IA32_PREFIX)gcc ++*_GCC5_IA32_SLINK_PATH = ENV(GCC5_IA32_PREFIX)gcc-ar ++*_GCC5_IA32_DLINK_PATH = ENV(GCC5_IA32_PREFIX)gcc ++*_GCC5_IA32_ASLDLINK_PATH = ENV(GCC5_IA32_PREFIX)gcc ++*_GCC5_IA32_ASM_PATH = ENV(GCC5_IA32_PREFIX)gcc ++*_GCC5_IA32_PP_PATH = ENV(GCC5_IA32_PREFIX)gcc ++*_GCC5_IA32_VFRPP_PATH = ENV(GCC5_IA32_PREFIX)gcc ++*_GCC5_IA32_ASLCC_PATH = ENV(GCC5_IA32_PREFIX)gcc ++*_GCC5_IA32_ASLPP_PATH = ENV(GCC5_IA32_PREFIX)gcc ++*_GCC5_IA32_RC_PATH = ENV(GCC5_IA32_PREFIX)objcopy + + *_GCC5_IA32_ASLCC_FLAGS = DEF(GCC5_ASLCC_FLAGS) -m32 + *_GCC5_IA32_ASLDLINK_FLAGS = DEF(GCC5_IA32_X64_ASLDLINK_FLAGS) -Wl,-m,elf_i386 -no-pie +@@ -2326,17 +2326,17 @@ RELEASE_GCC5_IA32_DLINK_FLAGS = DEF(G + ################## + # GCC5 X64 definitions + ################## +-*_GCC5_X64_OBJCOPY_PATH = DEF(GCC5_X64_PREFIX)objcopy +-*_GCC5_X64_CC_PATH = DEF(GCC5_X64_PREFIX)gcc +-*_GCC5_X64_SLINK_PATH = DEF(GCC5_X64_PREFIX)gcc-ar +-*_GCC5_X64_DLINK_PATH = DEF(GCC5_X64_PREFIX)gcc +-*_GCC5_X64_ASLDLINK_PATH = DEF(GCC5_X64_PREFIX)gcc +-*_GCC5_X64_ASM_PATH = DEF(GCC5_X64_PREFIX)gcc +-*_GCC5_X64_PP_PATH = DEF(GCC5_X64_PREFIX)gcc +-*_GCC5_X64_VFRPP_PATH = DEF(GCC5_X64_PREFIX)gcc +-*_GCC5_X64_ASLCC_PATH = DEF(GCC5_X64_PREFIX)gcc +-*_GCC5_X64_ASLPP_PATH = DEF(GCC5_X64_PREFIX)gcc +-*_GCC5_X64_RC_PATH = DEF(GCC5_X64_PREFIX)objcopy ++*_GCC5_X64_OBJCOPY_PATH = ENV(GCC5_X64_PREFIX)objcopy ++*_GCC5_X64_CC_PATH = ENV(GCC5_X64_PREFIX)gcc ++*_GCC5_X64_SLINK_PATH = ENV(GCC5_X64_PREFIX)gcc-ar ++*_GCC5_X64_DLINK_PATH = ENV(GCC5_X64_PREFIX)gcc ++*_GCC5_X64_ASLDLINK_PATH = ENV(GCC5_X64_PREFIX)gcc ++*_GCC5_X64_ASM_PATH = ENV(GCC5_X64_PREFIX)gcc ++*_GCC5_X64_PP_PATH = ENV(GCC5_X64_PREFIX)gcc ++*_GCC5_X64_VFRPP_PATH = ENV(GCC5_X64_PREFIX)gcc ++*_GCC5_X64_ASLCC_PATH = ENV(GCC5_X64_PREFIX)gcc ++*_GCC5_X64_ASLPP_PATH = ENV(GCC5_X64_PREFIX)gcc ++*_GCC5_X64_RC_PATH = ENV(GCC5_X64_PREFIX)objcopy + + *_GCC5_X64_ASLCC_FLAGS = DEF(GCC5_ASLCC_FLAGS) -m64 + *_GCC5_X64_ASLDLINK_FLAGS = DEF(GCC5_IA32_X64_ASLDLINK_FLAGS) -Wl,-m,elf_x86_64 diff --git a/edk2/relax_edk2_gcc14.diff b/edk2/relax_edk2_gcc14.diff new file mode 100644 index 0000000000..35901ff55b --- /dev/null +++ b/edk2/relax_edk2_gcc14.diff @@ -0,0 +1,44 @@ +diff --git a/BaseTools/Source/C/GenFw/Elf64Convert.c b/BaseTools/Source/C/GenFw/Elf64Convert.c +index d53ecb1767..8018d68db1 100644 +--- a/BaseTools/Source/C/GenFw/Elf64Convert.c ++++ b/BaseTools/Source/C/GenFw/Elf64Convert.c +@@ -1778,7 +1778,11 @@ WriteSections64 ( + case R_LARCH_TLS_LD64_HI20: + case R_LARCH_TLS_GD_PC_HI20: + case R_LARCH_TLS_GD64_HI20: ++ case R_LARCH_32_PCREL: + case R_LARCH_RELAX: ++ case R_LARCH_DELETE: ++ case R_LARCH_ALIGN: ++ case R_LARCH_PCREL20_S2: + // + // These types are not used or do not require fixup. + // +@@ -2185,7 +2189,11 @@ WriteRelocations64 ( + case R_LARCH_TLS_LD64_HI20: + case R_LARCH_TLS_GD_PC_HI20: + case R_LARCH_TLS_GD64_HI20: ++ case R_LARCH_32_PCREL: + case R_LARCH_RELAX: ++ case R_LARCH_DELETE: ++ case R_LARCH_ALIGN: ++ case R_LARCH_PCREL20_S2: + // + // These types are not used or do not require fixup in PE format files. + // +diff --git a/BaseTools/Source/C/GenFw/elf_common.h b/BaseTools/Source/C/GenFw/elf_common.h +index ccd32804b0..d3a5303953 100644 +--- a/BaseTools/Source/C/GenFw/elf_common.h ++++ b/BaseTools/Source/C/GenFw/elf_common.h +@@ -1144,5 +1144,10 @@ typedef struct { + #define R_LARCH_TLS_LD64_HI20 96 + #define R_LARCH_TLS_GD_PC_HI20 97 + #define R_LARCH_TLS_GD64_HI20 98 +-#define R_LARCH_RELAX 99 ++#define R_LARCH_32_PCREL 99 ++#define R_LARCH_RELAX 100 ++#define R_LARCH_DELETE 101 ++#define R_LARCH_ALIGN 102 ++#define R_LARCH_PCREL20_S2 103 ++ + #endif /* !_SYS_ELF_COMMON_H_ */ diff --git a/efitools/PKGBUILD b/efitools/PKGBUILD index fd81afc3e2..f4744f4c73 100644 --- a/efitools/PKGBUILD +++ b/efitools/PKGBUILD @@ -13,19 +13,22 @@ license=(GPL2 LGPL2.1) makedepends=(git gnu-efi-libs help2man perl-file-slurp sbsigntools) depends=(glibc openssl) source=("git+https://git.kernel.org/pub/scm/linux/kernel/git/jejb/$pkgname.git#tag=v${pkgver}?signed" - "${pkgname}-1.9.2-console_warning_typo.patch") + "${pkgname}-1.9.2-console_warning_typo.patch" + efitools-la64.patch) sha512sums=('SKIP' - '9e609eb4fb2a7116166626d15470d66e2eb66a25867618d4065d48636304f88549a71c5e827ac92750183f0fabaa3b84beea3dffa905031a2867939bfae955e7') + '9e609eb4fb2a7116166626d15470d66e2eb66a25867618d4065d48636304f88549a71c5e827ac92750183f0fabaa3b84beea3dffa905031a2867939bfae955e7' + '23f8751e4cfe3369d3ec161fef908ebab91833de2e36982cc708f3fcfbaa654facc0cefca7b6bd909a918ae056640913a7d648b011cf261bae7024f0153eac55') validpgpkeys=('D5606E73C8B46271BEAD9ADF814AE47C214854D6') # James Bottomley prepare() { cd "${pkgname}" patch -Np1 -i "../${pkgname}-1.9.2-console_warning_typo.patch" + patch -Np1 -i "../efitools-la64.patch" } build() { # fix PreLoader.efi building on x86_64 #49314 - export ARCH="${CARCH}" + export ARCH="`uname -m`" # build with one job because the Makefile does not support parallel jobs ;_; # https://bugs.archlinux.org/task/73600 make -j1 -C "${pkgname}" diff --git a/efitools/efitools-la64.patch b/efitools/efitools-la64.patch new file mode 100644 index 0000000000..7ecf4efe8d --- /dev/null +++ b/efitools/efitools-la64.patch @@ -0,0 +1,13 @@ +Index: efitools/Make.rules +=================================================================== +--- efitools.orig/Make.rules ++++ efitools/Make.rules +@@ -10,6 +10,8 @@ else ifeq ($(ARCH),aarch64) + ARCH3264 = + else ifeq ($(ARCH),arm) + ARCH3264 = ++else ifeq ($(ARCH),loongarch64) ++ARCH3264 = + else + $(error unknown architecture $(ARCH)) + endif diff --git a/electron/PKGBUILD b/electron/PKGBUILD index d9c9a136b7..a68f59fc39 100644 --- a/electron/PKGBUILD +++ b/electron/PKGBUILD @@ -9,10 +9,10 @@ pkgdesc='Meta package providing the latest available stable Electron build' arch=(any) url='https://electronjs.org' license=(MIT) -depends=("electron$pkgver") +depends=("electron$pkgver-bin") package() { mkdir -p "$pkgdir/usr/bin" "$pkgdir/usr/lib" - ln -sf "${depends[0]}" "$pkgdir/usr/bin/$pkgname" - ln -sf "${depends[0]}" "$pkgdir/usr/lib/$pkgname" + ln -sf "${depends[0]%-bin}" "$pkgdir/usr/bin/$pkgname" + ln -sf "${depends[0]%-bin}" "$pkgdir/usr/lib/$pkgname" } diff --git a/electron25/PKGBUILD b/electron25/PKGBUILD index 2d6ddceae3..4565753c8d 100644 --- a/electron25/PKGBUILD +++ b/electron25/PKGBUILD @@ -62,7 +62,10 @@ source=("git+https://github.com/electron/electron.git#tag=v$pkgver" std-vector-non-const.patch use-system-libraries-in-node.patch libxml2-2.12.patch - icu-74.patch) + icu-74.patch + electron-la64.patch + ) +# shellcheck disable=SC2034 sha256sums=('SKIP' 'SKIP' 'SKIP' @@ -77,6 +80,10 @@ sha256sums=('SKIP' 'ff588a8a4fd2f79eb8a4f11cf1aa151298ffb895be566c57cc355d47f161f53f' 'bfae9e773edfd0ddbc617777fdd4c0609cba2b048be7afe40f97768e4eb6117e' '547e092f6a20ebd15e486b31111145bc94b8709ec230da89c591963001378845') + '621ed210d75d0e846192c1571bb30db988721224a41572c27769c0288d361c11' + '1b782b0f6d4f645e4e0daa8a4852d63f0c972aa0473319216ff04613a0592a69' + 'ba4dd0a25a4fc3267ed19ccb39f28b28176ca3f97f53a4e9f5e9215280040ea0' + '671fd958b429414a66c209c8b91b6876a77bf4ed38244044ba14703de3f02a66') # Possible replacements are listed in build/linux/unbundle/replace_gn_files.py # Keys are the names in the above script; values are the dependencies in Arch @@ -145,6 +152,7 @@ EOF pushd src/electron patch -Np1 -i ../../std-vector-non-const.patch + patch -Np1 -i ../../electron-la64.patch popd echo "Running hooks..." diff --git a/emacs/PKGBUILD b/emacs/PKGBUILD index e6adc0db04..7ff18f7181 100644 --- a/emacs/PKGBUILD +++ b/emacs/PKGBUILD @@ -118,7 +118,7 @@ package_emacs() { package_emacs-nativecomp() { pkgdesc='The extensible, customizable, self-documenting real-time display editor with native compilation enabled' - depends+=(libgccjit) +#depends+=(libgccjit) provides=(emacs) conflicts=(emacs) diff --git a/emacs/emacs-la64.patch b/emacs/emacs-la64.patch new file mode 100644 index 0000000000..f55e617b8f --- /dev/null +++ b/emacs/emacs-la64.patch @@ -0,0 +1,10 @@ +--- emacs-27.2.orig/src/sysdep.c 2022-04-26 17:50:07.714027425 +0800 ++++ emacs-27.2/src/sysdep.c 2022-04-26 17:50:01.898411164 +0800 +@@ -1818,6 +1818,7 @@ + + /* Alternate stack used by SIGSEGV handler below. */ + ++#define SIGSTKSZ 16384 + static unsigned char sigsegv_stack[SIGSTKSZ]; + + diff --git a/erdtree/PKGBUILD b/erdtree/PKGBUILD index 704884c1f9..b7a30ac892 100644 --- a/erdtree/PKGBUILD +++ b/erdtree/PKGBUILD @@ -23,7 +23,7 @@ prepare() { cd "$pkgname" # download dependencies - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/espeakup/PKGBUILD b/espeakup/PKGBUILD index 9eb2e05c1b..c89958ad50 100644 --- a/espeakup/PKGBUILD +++ b/espeakup/PKGBUILD @@ -4,7 +4,7 @@ pkgname=espeakup pkgver=0.90 -pkgrel=2 +pkgrel=3 pkgdesc="A light weight connector for espeak-ng and speakup" arch=(loong64 x86_64) url="https://github.com/linux-speakup/espeakup" diff --git a/espflash/PKGBUILD b/espflash/PKGBUILD index a20342f8f1..b53139d338 100644 --- a/espflash/PKGBUILD +++ b/espflash/PKGBUILD @@ -17,7 +17,7 @@ b2sums=('f4361c5c8f7d31d10cf22c67723847b1597c6ca307c67aa76e9b1620e9f3bb0a18b9f03 prepare() { cd ${pkgbase}-${pkgver} - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/espup/PKGBUILD b/espup/PKGBUILD index 671e862f42..6d9d169bc5 100644 --- a/espup/PKGBUILD +++ b/espup/PKGBUILD @@ -16,7 +16,7 @@ b2sums=('ecf2b10a72f664db49e584901ccacbff78e00a7b33a9ab9ae146d09ebb754d9dcd5e5ad prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/eva/PKGBUILD b/eva/PKGBUILD index 810f8af0bf..dc67d93440 100644 --- a/eva/PKGBUILD +++ b/eva/PKGBUILD @@ -17,7 +17,7 @@ sha256sums=('d6a6eb8e0d46de1fea9bd00c361bd7955fcd7cc8f3310b786aad48c1dce7b3f7') prepare() { cd "$_archive" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/evcxr_repl/PKGBUILD b/evcxr_repl/PKGBUILD index 2646b2d384..f2db39e12f 100644 --- a/evcxr_repl/PKGBUILD +++ b/evcxr_repl/PKGBUILD @@ -16,7 +16,7 @@ options=('!lto') prepare() { cd "$_pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/farstream/PKGBUILD b/farstream/PKGBUILD index bea546c842..25714268f3 100644 --- a/farstream/PKGBUILD +++ b/farstream/PKGBUILD @@ -2,7 +2,7 @@ pkgname=farstream pkgver=0.2.9 -pkgrel=3 +pkgrel=4 pkgdesc="Farstream (formerly Farsight) - Audio/Video Communications Framework" arch=('loong64' 'x86_64') url="https://www.freedesktop.org/wiki/Software/Farstream" diff --git a/fbterm/0001-Fix-build-with-gcc-6.patch b/fbterm/0001-Fix-build-with-gcc-6.patch new file mode 100644 index 0000000000..ad5dd65c48 --- /dev/null +++ b/fbterm/0001-Fix-build-with-gcc-6.patch @@ -0,0 +1,104 @@ +From 69917d25c6f718572433262d86691bf24e72e4c8 Mon Sep 17 00:00:00 2001 +From: Nobuhiro Iwamatsu +Date: Wed, 13 Jul 2016 12:02:10 +0900 +Subject: [PATCH] Fix build with gcc-6 + +Signed-off-by: Nobuhiro Iwamatsu +--- + src/lib/vterm.cpp | 4 ++-- + src/lib/vterm_states.cpp | 18 +++++++++--------- + 2 files changed, 11 insertions(+), 11 deletions(-) + +diff --git a/src/lib/vterm.cpp b/src/lib/vterm.cpp +index 3a5dcc7..f79f44c 100644 +--- a/src/lib/vterm.cpp ++++ b/src/lib/vterm.cpp +@@ -68,13 +68,13 @@ u8 VTerm::control_map[MAX_CONTROL_CODE], VTerm::escape_map[NR_STATES][MAX_ESCAPE + + void VTerm::init_state() + { +- for (u8 i = 1; control_sequences[i].code != (u16)-1; i++) { ++ for (u8 i = 1; control_sequences[i].code != (u16)0xFFFF; i++) { + control_map[control_sequences[i].code] = i; + } + + u8 state = ESnormal; + for (u8 i = 1; ; i++) { +- if (escape_sequences[i].code == (u16)-1) { ++ if (escape_sequences[i].code == (u16)0xFFFF) { + state++; + if (state == NR_STATES) break; + } else { +diff --git a/src/lib/vterm_states.cpp b/src/lib/vterm_states.cpp +index 49e7588..6aaa8b3 100644 +--- a/src/lib/vterm_states.cpp ++++ b/src/lib/vterm_states.cpp +@@ -39,14 +39,14 @@ const VTerm::Sequence VTerm::control_sequences[] = { + { 0x1B, 0, ESesc }, + { 0x7F, 0, ESkeep }, + { 0x9B, 0, ESsquare }, +- { -1} ++ { 0xFFFF} + }; + + const VTerm::Sequence VTerm::escape_sequences[] = { + { 0, 0, ESnormal }, + + // ESnormal +- { -1 }, ++ { 0xFFFF }, + + // ESesc + { '[', &VTerm::clear_param, ESsquare }, +@@ -65,7 +65,7 @@ const VTerm::Sequence VTerm::escape_sequences[] = { + { '8', &VTerm::restore_cursor, ESnormal }, + { '>', &VTerm::keypad_numeric, ESnormal }, + { '=', &VTerm::keypad_application, ESnormal }, +- { -1 }, ++ { 0xFFFF }, + + // ESsquare + { '[', 0, ESfunckey }, +@@ -104,7 +104,7 @@ const VTerm::Sequence VTerm::escape_sequences[] = { + { '`', &VTerm::cursor_position_col, ESnormal }, + { ']', &VTerm::linux_specific, ESnormal }, + { '}', &VTerm::fbterm_specific, ESnormal }, +- { -1 }, ++ { 0xFFFF }, + + // ESnonstd + { '0' | ADDSAME(9), &VTerm::set_palette, ESkeep }, +@@ -112,25 +112,25 @@ const VTerm::Sequence VTerm::escape_sequences[] = { + { 'a' | ADDSAME(5), &VTerm::set_palette, ESkeep }, + { 'P', &VTerm::begin_set_palette, ESkeep }, + { 'R', &VTerm::reset_palette, ESnormal }, +- { -1 }, ++ { 0xFFFF }, + + // ESpercent + { '@', &VTerm::clear_utf8, ESnormal }, + { 'G', &VTerm::set_utf8, ESnormal }, + { '8', &VTerm::set_utf8, ESnormal }, +- { -1 }, ++ { 0xFFFF }, + + // EScharset + { '0', &VTerm::set_charset, ESnormal }, + { 'B', &VTerm::set_charset, ESnormal }, + { 'U', &VTerm::set_charset, ESnormal }, + { 'K', &VTerm::set_charset, ESnormal }, +- { -1 }, ++ { 0xFFFF }, + + // EShash + { '8', &VTerm::screen_align, ESnormal }, +- { -1 }, ++ { 0xFFFF }, + + // ESfunckey +- { -1 }, ++ { 0xFFFF }, + }; +-- +2.8.1 + diff --git a/fbterm/PKGBUILD b/fbterm/PKGBUILD new file mode 100644 index 0000000000..73ca72ded3 --- /dev/null +++ b/fbterm/PKGBUILD @@ -0,0 +1,60 @@ +# Maintainer: ivanp7 + +pkgname=fbterm +_gitname=fbterm +_majorver=1.7 +pkgver=1.7_5 +pkgrel=5 +pkgdesc='Framebuffer terminal emulator' +arch=('x86_64' 'armv7h' 'loong64') +url='https://salsa.debian.org/debian/fbterm' +license=('GPL2') +depends=(freetype2 fontconfig ncurses) +makedepends=(autoconf patch) +provides=(fbterm) +conflicts=(fbterm) +source=("http://deb.debian.org/debian/pool/main/f/fbterm/fbterm_1.7.orig.tar.gz" + '0001-Fix-build-with-gcc-6.patch' 'fix_ftbfs_crosscompile.patch' 'fix_ftbfs_epoll.patch' + 'fbconfig.patch' 'color_palette.patch' 'fbterm.patch' + 'fbtermrc') +sha256sums=('b98d487e84618503887e3996162354c482e24884bad8bf2219b6776372f306ad' + '8054410ab97da3df03406543c6a471acf3323b9e5712da6455d7c49cad7489ce' + '73f0c87aaa5a74631c167fb765c0340dc28626b00d0a3cd065cebf71acc585f7' + '2b5daa2664adf1efb3e478c2f97376c055b1698422524d262fbae2e7a530a323' + '0d1781e2654d32d5dfd1cbf17680b49aefbb124b7164ca1d70fcf4468563be7c' + 'ad865628f2f6d67c82a5d29b1ec68af37293b9df5f4a6e8fad6b356d08ab368b' + '5cd1c14c640679a40f8a9d9781c2a5af5db7543c2296cda99f2886aa40468735' + 'ccd21f8b66631067393cb74e222aca1935c449be569b95c1008d6c7c76b7d4b6') + +prepare() { + cd $srcdir/$_gitname-$_majorver + + patch -p1 < "$srcdir/fbconfig.patch" + patch -p1 < "$srcdir/color_palette.patch" + patch -p1 < "$srcdir/fbterm.patch" + patch -p1 < "$srcdir/0001-Fix-build-with-gcc-6.patch" + patch -p1 < "$srcdir/fix_ftbfs_crosscompile.patch" + patch -p1 < "$srcdir/fix_ftbfs_epoll.patch" + + autoreconf -fvi + ./configure --prefix=/usr +} + +build() { + cd $srcdir/$_gitname-$_majorver + make + mkdir -p tic + TERMINFO=$srcdir/$_gitname-$_majorver/tic tic terminfo/fbterm +} + +package() { + cd $srcdir/$_gitname-$_majorver + make DESTDIR="$pkgdir/" install + + mkdir -p "$pkgdir/usr/share/terminfo" + cp -r tic/f "$pkgdir/usr/share/terminfo/" + + mkdir -p "$pkgdir/etc/fbterm" + cp $srcdir/fbtermrc "$pkgdir/etc/fbterm/fbtermrc.example" +} + diff --git a/fbterm/color_palette.patch b/fbterm/color_palette.patch new file mode 100644 index 0000000000..efc7eefe7e --- /dev/null +++ b/fbterm/color_palette.patch @@ -0,0 +1,102 @@ +--- a/src/screen_render.cpp ++++ b/src/screen_render.cpp +@@ -78,7 +78,7 @@ void Screen::initFillDraw() + + u32 color = 0; + Config::instance()->getOption("color-background", color); +- if (color > 7) color = 0; ++ if (color >= NR_COLORS) color = 0; + bgcolor = color; + + u32 size = mBytesPerLine * ((mRotateType == Rotate0 || mRotateType == Rotate180) ? mHeight : mWidth); +--- a/src/fbshell.h 2010-10-18 11:20:11.000000000 +0300 ++++ b/src/fbshell.h 2021-11-27 23:48:54.286721768 +0300 +@@ -58,6 +58,7 @@ class FbShell : public Shell { + void changeMode(ModeType type, u16 val); + void reportCursor(); + void reportMode(); ++ void configColors(); + + struct Cursor { + Cursor() { +--- a/src/fbshell.cpp 2010-10-18 11:20:11.000000000 +0300 ++++ b/src/fbshell.cpp 2021-11-27 23:48:54.286721768 +0300 +@@ -39,7 +39,7 @@ + #define screen (Screen::instance()) + #define manager (FbShellManager::instance()) + +-static const Color defaultPalette[NR_COLORS] = { ++static Color defaultPalette[NR_COLORS] = { + {0x00, 0x00, 0x00}, /* 0 */ + {0xaa, 0x00, 0x00}, /* 1 */ + {0x00, 0xaa, 0x00}, /* 2 */ +@@ -322,11 +322,11 @@ u8 VTerm::init_default_color(bool foreground) + if (foreground) { + color = 7; + Config::instance()->getOption("color-foreground", color); +- if (color > 7) color = 7; ++ if (color >= NR_COLORS) color = 7; + } else { + color = 0; + Config::instance()->getOption("color-background", color); +- if (color > 7) color = 0; ++ if (color >= NR_COLORS) color = 0; + } + + return color; +@@ -552,6 +552,8 @@ static s32 tty0_fd = -1; + + void FbShell::switchVt(bool enter, FbShell *peer) + { ++ configColors(); ++ + if (tty0_fd == -1) tty0_fd = open("/dev/tty0", O_RDWR); + if (tty0_fd != -1) { + seteuid(0); +@@ -771,3 +773,34 @@ bool FbShell::childProcessExited(s32 pid) + + return false; + } ++ ++void FbShell::configColors(){ ++ s8 varColor[32], color[7], rgb[3]; ++ u32 i,j,k,x; ++ for(k=0;kgetOption(varColor, color, sizeof(color)); ++ for(i=0;i<3;i++){ ++ rgb[i]=0; ++ for(j=0;j<2;j++){ ++ x=i*2+j; ++ if(('0' <= color[x]) && (color[x] <= '9')) ++ rgb[i]|=(color[x]-48); ++ else if(('A' <= color[x]) && (color[x] <= 'F')) ++ rgb[i]|=(color[x]-55); ++ else if(('a' <= color[x]) && (color[x] <= 'f')) ++ rgb[i]|=(color[x]-87); ++ else ++ goto NoTouch; ++ if(!j) ++ rgb[i]<<=4; ++ } ++ if(i==2){ ++ defaultPalette[k].red=rgb[0]; ++ defaultPalette[k].green=rgb[1]; ++ defaultPalette[k].blue=rgb[2]; ++ } ++ } ++NoTouch:; ++ } ++} +--- a/doc/fbterm.1.in 2010-10-18 11:20:11.000000000 +0300 ++++ b/doc/fbterm.1.in 2021-11-27 23:11:43.270223092 +0300 +@@ -176,6 +176,9 @@ + + A new terminfo database entry named "fbterm" was added to use these private sequences, all program based on terminfo should work with it. + By default, FbTerm sets environment variable "TERM" to value "linux", user need run "TERM=fbterm /path/to/program" to enable 256 color mode. ++ ++The palette colors may be changed in the configuration using options "\fIcolor-num\fR=RRGGBB", where "num" is a color number from 0 to 255. ++ + .SH "INPUT METHOD" + Instead of adding input method directly in FbTerm, a client-server based input method framework is designed to do + this work. FbTerm acts as a client, standalone IM program as a server, and they run in separated processes. diff --git a/fbterm/fbconfig.patch b/fbterm/fbconfig.patch new file mode 100644 index 0000000000..b5f861a631 --- /dev/null +++ b/fbterm/fbconfig.patch @@ -0,0 +1,78 @@ +--- a/src/fbconfig.cpp 2010-10-18 11:20:11.000000000 +0300 ++++ b/src/fbconfig.cpp 2021-11-27 23:48:54.286721768 +0300 +@@ -28,6 +28,8 @@ + #include "config.h" + #include "fbconfig.h" + ++#define CONFIG_DIR_NAME "fbterm" ++#define CONFIG_FILE_NAME "fbtermrc" + #define MAX_CONFIG_FILE_SIZE 10240 + + DEFINE_INSTANCE_DEFAULT(Config) +@@ -38,18 +40,34 @@ + mConfigBuf = 0; + mConfigEntrys = 0; + +- const s8 *home = getenv("HOME"); +- if (!home) { +- if (getuid()) return; +- home = "/root"; +- } +- +- s8 name[64]; +- snprintf(name, sizeof(name), "%s/%s", home, ".fbtermrc"); ++ s8 name[256]; + +- checkConfigFile(name); ++ const s8 *home = getenv("XDG_CONFIG_HOME"); ++ const s8 *format_d = NULL, *format_f = NULL, *format_df = NULL; ++ if (!home || !home[0]) ++ { ++ home = getenv("HOME"); ++ if (!home || !home[0]) ++ return; ++ ++ format_d = "%s/.config/" CONFIG_DIR_NAME "/"; ++ format_f = "%s/.config/" CONFIG_FILE_NAME; ++ format_df = "%s/.config/" CONFIG_DIR_NAME "/" CONFIG_FILE_NAME; ++ } ++ else ++ { ++ format_d = "%s/" CONFIG_DIR_NAME "/"; ++ format_f = "%s/" CONFIG_FILE_NAME; ++ format_df = "%s/" CONFIG_DIR_NAME "/" CONFIG_FILE_NAME; ++ } + ++ snprintf(name, sizeof(name), format_d, home); + struct stat cstat; ++ if (stat(name, &cstat) == -1) ++ snprintf(name, sizeof(name), format_f, home); ++ else ++ snprintf(name, sizeof(name), format_df, home); ++ + if (stat(name, &cstat) == -1) return; + if (cstat.st_size > MAX_CONFIG_FILE_SIZE) return; + +--- a/doc/fbterm.1.in 2010-10-18 11:20:11.000000000 +0300 ++++ b/doc/fbterm.1.in 2021-11-27 23:11:43.270223092 +0300 +@@ -22,8 +22,9 @@ + file. If that is not set, /bin/sh will be used. You should use the \fI--\fR argument to separate FbTerm's options from + the arguments supplied to the \fIcommand\fR. + +-FbTerm first uses option value specified in command line arguments, then in the configure file \fI$HOME/.fbtermrc\fR. +-If that file doesn't exist, FbTerm will create it with default options on startup. ++FbTerm first uses option value specified in command line arguments, then in the configure file \fI$XDG_CONFIG_HOME/fbtermrc\fR or ++\fI$XDG_CONFIG_HOME/fbterm/fbtermrc\fR (if directory \fI$XDG_CONFIG_HOME/fbterm/\fR exists). ++If \fIXDG_CONFIG_HOME\fR is unset or empty, FbTerm uses \fI$HOME/.config\fR for it instead. + .TP + \fB-h, --help\fR + display the help and exit +@@ -77,7 +78,7 @@ + display available VESA video modes + + .TP +-see comments in \fI$HOME/.fbtermrc\fR for details of these options. ++see comments in the configure file for details of these options. + .SH "SHORTCUT SUMMARY" + keyboard: + CTRL_ALT_E: exit from FbTerm diff --git a/fbterm/fbterm.patch b/fbterm/fbterm.patch new file mode 100644 index 0000000000..6a9656ff34 --- /dev/null +++ b/fbterm/fbterm.patch @@ -0,0 +1,9 @@ +--- a/terminfo/fbterm 2010-10-18 11:20:11.000000000 +0300 ++++ b/terminfo/fbterm 2021-05-19 03:05:56.885773502 +0300 +@@ -1,5 +1,5 @@ + # Reconstructed via infocmp from file: /lib/terminfo/l/linux +-fbterm|framebuffer based terminal emulator, ++fbterm-256color|framebuffer based terminal emulator, + am, bce, ccc, eo, mir, msgr, xenl, xon, + colors#256, it#8, ncv#18, pairs#32767, + acsc=+\020\,\021-\030.^Y0\333`\004a\261f\370g\361h\260i\316j\331k\277l\332m\300n\305o~p\304q\304r\304s_t\303u\264v\301w\302x\263y\363z\362{\343|\330}\234~\376, diff --git a/fbterm/fbtermrc b/fbterm/fbtermrc new file mode 100644 index 0000000000..f2ef3201a9 --- /dev/null +++ b/fbterm/fbtermrc @@ -0,0 +1,66 @@ +# Configuration for FbTerm + +# Lines starting with '#' are ignored. +# Note that end-of-line comments are NOT supported, comments must be on a line of their own. + + +# font family names/pixelsize used by fbterm, multiple font family names must be seperated by ',' +# and using a fixed width font as the first is strongly recommended +font-names=xos4 Terminus +font-size=12 + +# force font width (and/or height), usually for non-fixed width fonts +# legal value format: n (fw_new = n), +n (fw_new = fw_old + n), -n (fw_new = fw_old - n) +#font-width= +#font-height= + +# terminal palette consists of 256 colors (0-255) +# 0 = black, 1 = red, 2 = green, 3 = brown, 4 = blue, 5 = magenta, 6 = cyan, 7 = white +# 8-15 are brighter versions of 0-7 +# 16-231 is 6x6x6 color cube +# 232-255 is grayscale +color-0=000000 +color-1=AA0000 +color-2=00AA00 +color-3=AA5500 +color-4=0000AA +color-5=AA00AA +color-6=00AAAA +color-7=AAAAAA +color-8=555555 +color-9=FF5555 +color-10=55FF55 +color-11=FFFF55 +color-12=5555FF +color-13=FF55FF +color-14=55FFFF +color-15=FFFFFF + +# default foreground/background colors (chosen from palette) +color-foreground=7 +color-background=0 + +# max scroll-back history lines of every window, value must be [0 - 65535], 0 means disable it +history-lines=0 + +# up to 5 additional text encodings, multiple encodings must be seperated by ',' +# run 'iconv --list' to get available encodings. +text-encodings= + +# cursor shape: 0 = underline, 1 = block +# cursor flash interval in milliseconds, 0 means disable flashing +cursor-shape=0 +cursor-interval=500 + +# additional ascii chars considered as part of a word while auto-selecting text, except ' ', 0-9, a-z, A-Z +word-chars=._- + +# change the clockwise orientation angle of screen display +# available values: 0 = 0 degree, 1 = 90 degrees, 2 = 180 degrees, 3 = 270 degrees +screen-rotate=0 + +# specify the favorite input method program to run +input-method= + +# treat ambiguous width characters as wide +#ambiguous-wide=yes diff --git a/fbterm/fix_ftbfs_crosscompile.patch b/fbterm/fix_ftbfs_crosscompile.patch new file mode 100644 index 0000000000..0381834d20 --- /dev/null +++ b/fbterm/fix_ftbfs_crosscompile.patch @@ -0,0 +1,28 @@ +Description: fbterm FTCBFS: falls back to broken select code +Forwarded: not yet +Bug-Debian: https://bugs.debian.org/909679 +Author: Helmut Grohne + +--- fbterm-1.7.orig/configure.ac ++++ fbterm-1.7/configure.ac +@@ -83,16 +83,16 @@ + fi + fi + +-if test x"$EPOLL" = xauto -a x"$cross_compiling" = xno; then +- AC_RUN_IFELSE( ++if test x"$EPOLL" = xauto; then ++ AC_COMPILE_IFELSE( + AC_LANG_PROGRAM([[#include ]], + [[if (epoll_create(10) >= 0) return 0; return 1;]]), + [EPOLL=yes] + ) + fi + +-if test x"$SIGNALFD" = xauto -a x"$cross_compiling" = xno; then +- AC_RUN_IFELSE( ++if test x"$SIGNALFD" = xauto; then ++ AC_COMPILE_IFELSE( + AC_LANG_PROGRAM([[#include ]], + [[sigset_t mask; if (signalfd(-1, &mask, 0) >= 0) return 0; return 1;]]), + [SIGNALFD=yes] diff --git a/fbterm/fix_ftbfs_epoll.patch b/fbterm/fix_ftbfs_epoll.patch new file mode 100644 index 0000000000..0a0bff2cc1 --- /dev/null +++ b/fbterm/fix_ftbfs_epoll.patch @@ -0,0 +1,15 @@ +Description: fbterms FTBFS for architectures without epoll support +Forwarded: not yet +Bug-Debian: https://bugs.debian.org/909680 +Author: Helmut Grohne + +--- fbterm-1.7.orig/src/fbio.cpp ++++ fbterm-1.7/src/fbio.cpp +@@ -30,6 +30,7 @@ + #define NR_EPOLL_FDS 10 + s32 epollFd; + #else ++#include + static fd_set fds; + static u32 maxfd = 0; + #endif diff --git a/fcitx/PKGBUILD b/fcitx/PKGBUILD index 32fa376003..dadea2c1cb 100644 --- a/fcitx/PKGBUILD +++ b/fcitx/PKGBUILD @@ -29,13 +29,13 @@ sha512sums=('d871df84ebb3514b6474000b693246c8e5b198121e9a5a0ca18d478e54cd4144f8d validpgpkeys=('2CC8A0609AD2A479C65B6D5C8E8B898CBF2412F9') # Weng Xuetian prepare() { - mkdir build + mkdir -p _build sed -e 's|enchant/enchant.h|enchant-2/enchant.h|' -i $pkgname-$pkgver/cmake/FindEnchant.cmake } build() { - cd build + cd _build cmake ../$pkgname-$pkgver \ -DCMAKE_INSTALL_PREFIX=/usr \ @@ -53,11 +53,11 @@ build() { } check() { - cd build + cd _build make test } package() { - cd build + cd _build make DESTDIR="$pkgdir" install } diff --git a/fcitx5-chinese-addons/PKGBUILD b/fcitx5-chinese-addons/PKGBUILD index 116c9506af..01eb6db0d7 100644 --- a/fcitx5-chinese-addons/PKGBUILD +++ b/fcitx5-chinese-addons/PKGBUILD @@ -9,7 +9,7 @@ arch=('loong64' 'x86_64') url="https://github.com/fcitx/fcitx5-chinese-addons" license=('GPL') conflicts=('fcitx') -depends=('curl' 'fcitx5-qt' 'libime' 'opencc' 'qt5-webengine') +depends=('curl' 'fcitx5-qt' 'libime' 'opencc' 'qt5-webkit') #'qt5-webengine') makedepends=('boost' 'extra-cmake-modules' 'fcitx5-lua' 'fmt' 'ninja') optdepends=('fcitx5-lua: Lua and imeapi support from pinyin') source=("https://download.fcitx-im.org/fcitx5/$pkgname/$pkgname-${pkgver}_dict.tar.xz"{,.sig}) @@ -20,7 +20,7 @@ validpgpkeys=('2CC8A0609AD2A479C65B6D5C8E8B898CBF2412F9') # Weng Xuetian ++ ++ loongarch: Fix gcc installation. ++ * cross-tools/cross.conf (loongarch64): Change gcc version to match what ++ it actually reports. ++ ++2022-02-12 Bruno Haible ++ ++ Fix cross-tools build. ++ * cross-tools/cross-build.sh (func_build_gcc): Set LD_LIBRARY_PATH, so ++ that libisl.so.15 gets found during installation. ++ ++2022-02-12 Bruno Haible ++ ++ loongarch: Allow for continuous integration. ++ * cross-tools/cross-build.sh (func_build_gcc): Download fork tarball ++ from alpha.gnu.org. Adjust installation completeness test. ++ * cross-tools/cross.conf (loongarch64): Use binutils 2.38. ++ ++2022-01-16 Bruno Haible ++ ++ loongarch: Add support for loongarch64 ABI. ++ * cross-tools/cross-build.sh (func_build_binutils): Add support for ++ newer binutils snapshots. ++ (func_build_gcc): Add support for GCC 10 and newer. ++ * cross-tools/cross.conf: Add configuration for loongarch64 cross tools. ++ * porting-tools/abis/call-used-registers.txt: Add info about ++ loongarch64. ++ * porting-tools/abis/reg-struct-return.txt: Likewise. ++ * porting-tools/abis/stack-frame.txt: Likewise. Some more tweaks. ++ * porting-tools/execstack/voidfunc.c: Add command for loongarch64. ++ * porting-tools/execstack/voidfunc-loongarch64.o: New generated file. ++ * porting-tools/execstack/main.c (voidfunc): Define also for loongarch. ++ * porting-tools/execstack/README: Add info about loongarch64. ++ * ffcall-abi.h: Add support for loongarch64-lp64. ++ * common/asm-loongarch.sh: New file. ++ * Makefile.in (SOURCE_FILES): Add it. ++ * avcall/avcall.h (__AV_STRUCT_RETURN, __AV_REGISTER_STRUCT_RETURN): Add ++ code for __loongarch64__. ++ * avcall/avcall-alist.h (__av_alist): Likewise. ++ * avcall/avcall-internal.h: Add code for __loongarch64__, especially ++ __av_start1, __av_reg_struct_return, __av_start_struct4, __av_word, ++ __av_long, __av_ulong, __av_ptr, __av_longlong, __av_ulonglong, ++ _av_float, _av_double, __av_struct. ++ * avcall/avcall-loongarch64.c: New file, based on ++ avcall/avcall-riscv64.c. ++ * avcall/Makefile.devel (avcall-loongarch64-linux.s, ++ avcall-loongarch64-macro.S): New targets. ++ * avcall/Makefile.in (avcall-loongarch64.lo, avcall-loongarch64.s): New ++ targets. ++ (clean): Remove avcall-loongarch64.s. ++ (SOURCE_FILES): Add avcall-loongarch64.c, avcall-loongarch64-linux.s, ++ avcall-loongarch64-macro.S. ++ * vacall/vacall.h (__VA_STRUCT_RETURN, __VA_REGISTER_STRUCT_RETURN): Add ++ code for __loongarch64__. ++ * vacall/vacall-internal.h: Add code for __loongarch64__, especially ++ __va_alist, __va_reg_struct_return, __va_start_struct2, ++ __va_arg_leftadjusted, __va_arg_adjusted, _va_arg_longlong, ++ _va_arg_ulonglong, __va_align_double, _va_arg_float, _va_arg_double, ++ __va_arg_struct, _va_return_longlong. ++ * vacall/vacall-loongarch64.c: New file, based on ++ vacall/vacall-riscv64.c. ++ * vacall/Makefile.devel (vacall-loongarch64-linux.s, ++ vacall-loongarch64-macro.S): New targets. ++ * vacall/Makefile.in (vacall-loongarch64.@OBJEXT@, ++ vacall-loongarch64.s): New targets. ++ (clean): Remove vacall-loongarch64.s. ++ (SOURCE_FILES): Add vacall-loongarch64.c, vacall-loongarch64-linux.s, ++ vacall-loongarch64-macro.S. ++ * callback/vacall_r/vacall_r.h (__VA_STRUCT_RETURN, ++ __VA_REGISTER_STRUCT_RETURN): Add code for __loongarch64__. ++ * callback/vacall_r/Makefile.devel (vacall-loongarch64-linux.s, ++ vacall-loongarch64-macro.S): New targets. ++ * callback/vacall_r/Makefile.in (vacall-loongarch64.lo, ++ vacall-loongarch64.s): New targets. ++ (clean): Remove vacall-loongarch64.s. ++ (SOURCE_FILES): Add vacall-loongarch64-linux.s, ++ vacall-loongarch64-macro.S. ++ * trampoline/Makefile.devel (proto-loongarch64.s, tramp-loongarch64.o): ++ New targets. ++ * trampoline/proto-loongarch64.s: New generated file. ++ * trampoline/tramp-loongarch64.s: New file. ++ * trampoline/tramp-loongarch64.o: New generated file. ++ * trampoline/trampoline.c: Implement for __loongarch64__. ++ * callback/trampoline_r/Makefile.devel (proto-loongarch64.s, ++ tramp-loongarch64.o): New targets. ++ * callback/trampoline_r/proto64.c: Add code for __loongarch64__. ++ * callback/trampoline_r/proto-loongarch64.s: New generated file. ++ * callback/trampoline_r/tramp-loongarch64.s: New file. ++ * callback/trampoline_r/tramp-loongarch64.o: New generated file. ++ * callback/trampoline_r/trampoline.c: Implement for __loongarch64__. ++ * callback/trampoline_r/test1.c: Add support for __loongarch64__. ++ * PLATFORMS, */PLATFORMS: List the 64-bit LoongArch ABI. ++ * NEWS: Mention the new port. ++ ++2022-01-16 Bruno Haible ++ ++ Simplify. ++ * callback/vacall_r/Makefile.in (vacall-alpha.s, vacall-powerpc.s, ++ vacall-s390.lo, vacall-s390x.s, vacall-riscv32-ilp32d.s, ++ vacall-riscv64-lp64d.s): Don't use -I options during preprocessing. ++ ++2021-06-26 Bruno Haible ++ ++ x86_64: Create a read-only .eh_frame section on all platforms. ++ Reported by Thomas Klausner at ++ . ++ * common/asm-x86_64.h (EH_FRAME_SECTION): Use flags "a" (instead of ++ "aw") on all platforms. ++ ++2021-06-13 Bruno Haible ++ ++ maint: Don't require an internet connection for running autogen.sh. ++ * libtool-patches: New directory. ++ * Makefile.maint (libtool-imported-files): Don't call wget. Take the ++ patches from libtool-patches/ instead. ++ + 2021-06-13 Bruno Haible + + Prepare for 2.4 release. +diff --git a/Makefile.in b/Makefile.in +index 05a83dd..2a74ecc 100644 +--- a/Makefile.in ++++ b/Makefile.in +@@ -151,6 +151,7 @@ SOURCE_FILES = \ + common/asm-hppa.sh common/asm-hppa.h \ + common/asm-hppa64.sh common/asm-hppa64.h \ + common/asm-i386.sh common/asm-i386.h \ ++ common/asm-loongarch.sh \ + common/asm-m68k.sh common/asm-m68k.h \ + common/asm-mips.sh common/asm-mips.h \ + common/asm-powerpc.sh \ +diff --git a/NEWS b/NEWS +index 5911682..af1dc48 100644 +--- a/NEWS ++++ b/NEWS +@@ -1,3 +1,9 @@ ++New in 2.5: ++ ++* Added support for the following platforms: ++ (Previously, a build on these platforms failed.) ++ - loongarch64: Linux with lp64d ABI. ++ + New in 2.4: + + * Added support for the following platforms: +diff --git a/PLATFORMS b/PLATFORMS +index b7dc0c8..ca5a53c 100644 +--- a/PLATFORMS ++++ b/PLATFORMS +@@ -49,4 +49,5 @@ Supported CPUs: (Put the GNU config.guess values here.) + s390x s390x-ibm-linux (gcc) + riscv32 riscv32-unknown-linux (gcc -mabi=ilp32d) + riscv64 riscv64-unknown-linux (gcc -mabi=lp64d) ++ loongarch64 loongarch64-unknown-linux (gcc) + +diff --git a/avcall/Makefile.devel b/avcall/Makefile.devel +index db392b6..2933060 100644 +--- a/avcall/Makefile.devel ++++ b/avcall/Makefile.devel +@@ -25,7 +25,8 @@ precompiled : \ + avcall-ia64-macro.S \ + avcall-x86_64-macro.S avcall-x86_64-x32-linux.s avcall-x86_64-windows-macro.S \ + avcall-s390-macro.S avcall-s390x-macro.S \ +- avcall-riscv32-ilp32d-macro.S avcall-riscv64-lp64d-macro.S ++ avcall-riscv32-ilp32d-macro.S avcall-riscv64-lp64d-macro.S \ ++ avcall-loongarch64-macro.S + + + avcall-i386-linux.s : avcall-i386.c avcall-internal.h avcall.h avcall-alist.h $(THISFILE) +@@ -242,6 +243,13 @@ avcall-riscv64-lp64d-macro.S : avcall-riscv64-lp64d-linux.s ../common/asm-riscv. + (../common/asm-riscv.sh < avcall-riscv64-lp64d-linux.s ; cat ../common/noexecstack.h) > avcall-riscv64-lp64d-macro.S + + ++avcall-loongarch64-linux.s : avcall-loongarch64.c avcall-internal.h avcall.h avcall-alist.h $(THISFILE) ++ $(CROSS_TOOL) loongarch64-linux gcc $(GCCFLAGS) -D__loongarch64__ -S avcall-loongarch64.c -o avcall-loongarch64-linux.s ++ ++avcall-loongarch64-macro.S : avcall-loongarch64-linux.s ../common/asm-loongarch.sh ../common/noexecstack.h $(THISFILE) ++ (../common/asm-loongarch.sh < avcall-loongarch64-linux.s ; cat ../common/noexecstack.h) > avcall-loongarch64-macro.S ++ ++ + # --------------- Rules for debugging test failures --------------- + + tests : tests-i386.s tests-m68k.s tests-mips.s tests-sparc.s tests-alpha.s tests-hppa.s tests-arm.s tests-powerpc.s tests-ia64.s tests-x86_64.s +diff --git a/avcall/Makefile.in b/avcall/Makefile.in +index 466023d..b78ba78 100644 +--- a/avcall/Makefile.in ++++ b/avcall/Makefile.in +@@ -258,6 +258,12 @@ avcall-riscv64-lp64d.lo : avcall-riscv64-lp64d.s + avcall-riscv64-lp64d.s : $(srcdir)/avcall-riscv64-lp64d-macro.S + $(CPP) $(ASPFLAGS) $(srcdir)/avcall-riscv64-lp64d-macro.S | grep -v '^ *#line' | grep -v '^#' | sed -e 's,% ,%,g' -e 's,//.*$$,,' > avcall-riscv64-lp64d.s + ++avcall-loongarch64.lo : avcall-loongarch64.s ++ $(LIBTOOL_COMPILE) $(CC) @GCC_X_NONE@ -c avcall-loongarch64.s ++ ++avcall-loongarch64.s : $(srcdir)/avcall-loongarch64-macro.S ++ $(CPP) $(ASPFLAGS) $(srcdir)/avcall-loongarch64-macro.S | grep -v '^ *#line' | grep -v '^#' | sed -e 's,% ,%,g' -e 's,//.*$$,,' > avcall-loongarch64.s ++ + avcall-libapi.lo : $(srcdir)/avcall-libapi.c ../config.h $(srcdir)/avcall-internal.h $(srcdir)/avcall.h $(srcdir)/avcall-alist.h + $(LIBTOOL_COMPILE) $(CC) $(INCLUDES) $(CPPFLAGS) $(CFLAGS) @DISABLE_TYPE_BASED_ALIASING@ -c $(srcdir)/avcall-libapi.c + +@@ -353,7 +359,7 @@ mostlyclean : clean + + clean : force + $(RM) *.@OBJEXT@ *.lo *.a libavcall.* core +- $(RM) avcall-i386.s avcall-sparc.s avcall-sparc64.s avcall-m68k.s avcall-mips.s avcall-mipsn32.s avcall-mips64.s avcall-alpha.s avcall-hppa.s avcall-hppa64.s avcall-arm.s avcall-armhf.s avcall-arm64.s avcall-powerpc.s avcall-powerpc64.s avcall-powerpc64-elfv2.s avcall-ia64.s avcall-x86_64.s avcall-x86_64.asm avcall-x86_64-x32.s avcall-s390.s avcall-s390x.s avcall-riscv32-ilp32d.s avcall-riscv64-lp64d.s ++ $(RM) avcall-i386.s avcall-sparc.s avcall-sparc64.s avcall-m68k.s avcall-mips.s avcall-mipsn32.s avcall-mips64.s avcall-alpha.s avcall-hppa.s avcall-hppa64.s avcall-arm.s avcall-armhf.s avcall-arm64.s avcall-powerpc.s avcall-powerpc64.s avcall-powerpc64-elfv2.s avcall-ia64.s avcall-x86_64.s avcall-x86_64.asm avcall-x86_64-x32.s avcall-s390.s avcall-s390x.s avcall-riscv32-ilp32d.s avcall-riscv64-lp64d.s avcall-loongarch64.s + $(RM) -r .libs _libs + $(RM) minitests.@OBJEXT@ minitests.s minitests minitests.out + $(RM) minitests-c++.@OBJEXT@ minitests-c++ minitests-c++.out +@@ -381,6 +387,7 @@ SOURCE_FILES = \ + avcall-hppa64.c avcall-hppa64-linux.s avcall-hppa64-macro.S \ + avcall-i386.c avcall-i386-linux.s avcall-i386-macro.S \ + avcall-ia64.c avcall-ia64-linux.s avcall-ia64-macro.S \ ++ avcall-loongarch64.c avcall-loongarch64-linux.s avcall-loongarch64-macro.S \ + avcall-m68k.c avcall-m68k-linux.s avcall-m68k-sun.s avcall-m68k.mit.S avcall-m68k.motorola.S \ + avcall-mips.c avcall-mipseb-linux.s avcall-mipsel-linux.s avcall-mipseb-macro.S avcall-mipsel-macro.S \ + avcall-mipsn32.c avcall-mipsn32eb-linux.s avcall-mipsn32el-linux.s avcall-mipsn32eb-macro.S avcall-mipsn32el-macro.S \ +diff --git a/avcall/PLATFORMS b/avcall/PLATFORMS +index a823a51..5fcb26a 100644 +--- a/avcall/PLATFORMS ++++ b/avcall/PLATFORMS +@@ -78,4 +78,5 @@ Supported CPUs: (Put the GNU config.guess values here.) + s390x s390x-ibm-linux (gcc) emulated Linux + riscv32 riscv32-unknown-linux (gcc -mabi=ilp32d) emulated Linux + riscv64 riscv64-unknown-linux (gcc -mabi=lp64d) emulated Linux ++ loongarch64 loongarch64-unknown-linux (gcc) Loongson Linux + +diff --git a/avcall/avcall-alist.h b/avcall/avcall-alist.h +index 797f730..0574055 100644 +--- a/avcall/avcall-alist.h ++++ b/avcall/avcall-alist.h +@@ -1,6 +1,6 @@ + /* + * Copyright 1993-1995 Bill Triggs +- * Copyright 1995-2021 Bruno Haible ++ * Copyright 1995-2022 Bruno Haible + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -197,7 +197,7 @@ typedef struct + float fargs[__AV_FARG_NUM]; + double dargs[__AV_FARG_NUM]; + #endif +-#if defined(__riscv32__) || defined(__riscv64__) ++#if defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__) + #define __AV_FARG_NUM 8 + /* store the floating-point arguments in an extra array */ + unsigned int fanum; /* number of fargs[] words that are occupied so far */ +diff --git a/avcall/avcall-internal.h b/avcall/avcall-internal.h +index c878134..72de32e 100644 +--- a/avcall/avcall-internal.h ++++ b/avcall/avcall-internal.h +@@ -1,6 +1,6 @@ + /* + * Copyright 1993-1995 Bill Triggs +- * Copyright 1995-2021 Bruno Haible ++ * Copyright 1995-2022 Bruno Haible + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -111,7 +111,7 @@ typedef int __av_alist_verify[2*(__AV_ALIST_SIZE_BOUND - (int)sizeof(__av_alist) + (LIST).farg_mask = 0, \ + (LIST).darg_mask = 0, + #endif +-#if defined(__riscv32__) || defined(__riscv64__) ++#if defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__) + #define __av_start1(LIST,LIST_ARGS_END) \ + (LIST).aptr = &(LIST).args[0], \ + (LIST).fanum = 0, \ +@@ -262,7 +262,7 @@ typedef int __av_alist_verify[2*(__AV_ALIST_SIZE_BOUND - (int)sizeof(__av_alist) + #define __av_start_struct3(LIST) \ + ((LIST).flags |= __AV_REGISTER_STRUCT_RETURN, 0) + #endif +-#if defined(__hppa64__) || defined(__arm64__) || (defined(__powerpc64__) && defined(__powerpc64_elfv2__)) || defined(__x86_64_sysv__) || defined(__riscv64__) ++#if defined(__hppa64__) || defined(__arm64__) || (defined(__powerpc64__) && defined(__powerpc64_elfv2__)) || defined(__x86_64_sysv__) || defined(__riscv64__) || defined(__loongarch64__) + #define __av_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE) \ + ((TYPE_SIZE) <= 16) + /* Turn on __AV_REGISTER_STRUCT_RETURN if __AV_SMALL_STRUCT_RETURN was set +@@ -287,7 +287,7 @@ typedef int __av_alist_verify[2*(__AV_ALIST_SIZE_BOUND - (int)sizeof(__av_alist) + #endif + /* Return structure pointer is passed as first arg. + */ +-#if defined(__i386__) || defined(__alpha__) || (defined(__arm__) && !defined(__armhf__)) || defined(__powerpc_aix__) || defined(__powerpc64__) || defined(__riscv32__) || defined(__riscv64__) ++#if defined(__i386__) || defined(__alpha__) || (defined(__arm__) && !defined(__armhf__)) || defined(__powerpc_aix__) || defined(__powerpc64__) || defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__) + #define __av_start_struct4(LIST,TYPE_SIZE) \ + (*(LIST).aptr++ = (__avword)((LIST).raddr), 0) + #endif +@@ -330,7 +330,7 @@ typedef int __av_alist_verify[2*(__AV_ALIST_SIZE_BOUND - (int)sizeof(__av_alist) + * scalar argument types + */ + +-#if defined(__i386__) || defined(__m68k__) || (defined(__sparc__) && !defined(__sparc64__)) || defined(__alpha__) || defined(__hppa64__) || (defined(__arm__) && !defined(__armhf__)) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64_sysv__) || defined(__s390__) || defined(__s390x__) || defined(__riscv32__) || defined(__riscv64__) ++#if defined(__i386__) || defined(__m68k__) || (defined(__sparc__) && !defined(__sparc64__)) || defined(__alpha__) || defined(__hppa64__) || (defined(__arm__) && !defined(__armhf__)) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64_sysv__) || defined(__s390__) || defined(__s390x__) || defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__) + /* Floats and all integer types are passed as words, + * doubles as two words (on 32-bit platforms) or one word (on 64-bit platforms). + */ +@@ -465,7 +465,7 @@ typedef int __av_alist_verify[2*(__AV_ALIST_SIZE_BOUND - (int)sizeof(__av_alist) + #define __av_ptr(LIST,VAL) __av_word(LIST,VAL) + #endif + +-#if defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc64__) || defined(__ia64__) || (defined(__x86_64__) && !defined(__x86_64_x32__) && !defined(__AV_LLP64)) || defined(__s390x__) || defined(__riscv64__) ++#if defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc64__) || defined(__ia64__) || (defined(__x86_64__) && !defined(__x86_64_x32__) && !defined(__AV_LLP64)) || defined(__s390x__) || defined(__riscv64__) || defined(__loongarch64__) + /* ‘long long’ and ‘long’ are identical. */ + #define __av_longlong __av_long + #define __av_ulonglong __av_ulong +@@ -833,7 +833,7 @@ typedef int __av_alist_verify[2*(__AV_ALIST_SIZE_BOUND - (int)sizeof(__av_alist) + + #endif + +-#if defined(__arm64__) || defined(__riscv64__) ++#if defined(__arm64__) || defined(__riscv64__) || defined(__loongarch64__) + + /* Up to __AV_FARG_NUM float or double args can be passed in float registers. + The remaining float or double args are passed in the general-purpose +@@ -1535,7 +1535,7 @@ extern void avcall_structcpy (void* dest, const void* src, unsigned long size, u + (LIST).aptr[-1] = (__avword)(LIST).eptr, \ + 0)))) + #endif +-#if defined(__riscv32__) || defined(__riscv64__) ++#if defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__) + /* Structures <= 16 bytes are passed as embedded copies on the arg stack. + * Big structures are passed as pointers to caller-made local copies. + */ +diff --git a/avcall/avcall-loongarch64-linux.s b/avcall/avcall-loongarch64-linux.s +new file mode 100644 +index 0000000..2d3a0d2 +--- /dev/null ++++ b/avcall/avcall-loongarch64-linux.s +@@ -0,0 +1,344 @@ ++ .file "avcall-loongarch64.c" ++ .text ++ .align 2 ++ .globl avcall_call ++ .type avcall_call, @function ++avcall_call: ++.LFB0 = . ++ .cfi_startproc ++ addi.d $sp,$sp,-48 ++ .cfi_def_cfa_offset 48 ++ st.d $fp,$sp,32 ++ st.d $s0,$sp,24 ++ st.d $s1,$sp,16 ++ st.d $s2,$sp,8 ++ stptr.d $s3,$sp,0 ++ st.d $ra,$sp,40 ++ .cfi_offset 22, -16 ++ .cfi_offset 23, -24 ++ .cfi_offset 24, -32 ++ .cfi_offset 25, -40 ++ .cfi_offset 26, -48 ++ .cfi_offset 1, -8 ++ addi.d $fp,$sp,48 ++ .cfi_def_cfa 22, 0 ++ ld.d $s1,$a0,48 ++ ld.d $t0,$a0,40 ++ addi.w $t1,$r0,8 # 0x8 ++ ldptr.w $s3,$a0,64 ++ sub.d $t0,$t0,$s1 ++ srai.d $t0,$t0,3 ++ slli.w $s2,$t0,0 ++ or $s0,$a0,$r0 ++ addi.d $sp,$sp,-2048 ++ ble $s2,$t1,.L72 ++ addi.w $a2,$t0,-9 ++ or $t2,$sp,$r0 ++ bstrpick.d $a2,$a2,31,0 ++ alsl.d $a2,$a2,$zero,3 ++ addi.d $a2,$a2,8 ++ addi.d $a1,$s1,64 ++ or $a0,$t2,$r0 ++ bl %plt(memcpy) ++ ldptr.d $a0,$s1,0 ++.L5: ++ addi.w $t0,$r0,2 # 0x2 ++ ld.d $a1,$s1,8 ++ beq $s2,$t0,.L6 ++ addi.w $t0,$r0,3 # 0x3 ++ ld.d $a2,$s1,16 ++ beq $s2,$t0,.L6 ++ addi.w $t0,$r0,4 # 0x4 ++ ld.d $a3,$s1,24 ++ beq $s2,$t0,.L6 ++ addi.w $t0,$r0,5 # 0x5 ++ ld.d $a4,$s1,32 ++ beq $s2,$t0,.L6 ++ addi.w $t0,$r0,6 # 0x6 ++ ld.d $a5,$s1,40 ++ beq $s2,$t0,.L6 ++ addi.w $t0,$r0,7 # 0x7 ++ ld.d $a6,$s1,48 ++ beq $s2,$t0,.L6 ++ ld.d $a7,$s1,56 ++.L6: ++ beqz $s3,.L8 ++ ldptr.w $t0,$s0,72 ++ andi $t1,$t0,1 ++ beqz $t1,.L9 ++ fld.d $f0,$s0,112 ++.L10: ++ addi.w $t1,$r0,1 # 0x1 ++ beq $s3,$t1,.L8 ++ andi $t1,$t0,2 ++ bnez $t1,.L73 ++ ldptr.w $t1,$s0,68 ++ andi $t1,$t1,2 ++ beqz $t1,.L13 ++ fld.s $f1,$s0,80 ++.L13: ++ addi.w $t1,$r0,2 # 0x2 ++ beq $s3,$t1,.L8 ++ andi $t1,$t0,4 ++ beqz $t1,.L14 ++ fld.d $f2,$s0,128 ++.L15: ++ addi.w $t1,$r0,3 # 0x3 ++ beq $s3,$t1,.L8 ++ andi $t1,$t0,8 ++ beqz $t1,.L16 ++ fld.d $f3,$s0,136 ++.L17: ++ addi.w $t1,$r0,4 # 0x4 ++ beq $s3,$t1,.L8 ++ andi $t1,$t0,16 ++ bnez $t1,.L74 ++ ldptr.w $t1,$s0,68 ++ andi $t1,$t1,16 ++ beqz $t1,.L19 ++ fld.s $f4,$s0,92 ++.L19: ++ addi.w $t1,$r0,5 # 0x5 ++ beq $s3,$t1,.L8 ++ andi $t1,$t0,32 ++ beqz $t1,.L20 ++ fld.d $f5,$s0,152 ++.L21: ++ addi.w $t1,$r0,6 # 0x6 ++ beq $s3,$t1,.L8 ++ andi $t1,$t0,64 ++ beqz $t1,.L22 ++ fld.d $f6,$s0,160 ++.L23: ++ addi.w $t1,$r0,7 # 0x7 ++ beq $s3,$t1,.L8 ++ andi $t0,$t0,128 ++ beqz $t0,.L24 ++ fld.d $f7,$s0,168 ++.L8: ++ ldptr.w $t0,$s0,24 ++ addi.w $t1,$r0,13 # 0xd ++ ld.d $t2,$s0,8 ++ beq $t0,$t1,.L75 ++ addi.w $t1,$r0,14 # 0xe ++ beq $t0,$t1,.L76 ++ jirl $ra,$t2,0 ++ ldptr.w $t0,$s0,24 ++ addi.w $t1,$r0,1 # 0x1 ++ beq $t0,$t1,.L26 ++ addi.w $t1,$r0,16 # 0x10 ++ bgtu $t0,$t1,.L26 ++ la.local $t1,.L29 ++ slli.d $t0,$t0,3 ++ ldx.d $t0,$t1,$t0 ++ add.d $t1,$t1,$t0 ++ jr $t1 ++ .section .rodata ++ .align 3 ++ .align 2 ++.L29: ++ .dword .L26-.L29 ++ .dword .L26-.L29 ++ .dword .L37-.L29 ++ .dword .L37-.L29 ++ .dword .L37-.L29 ++ .dword .L35-.L29 ++ .dword .L35-.L29 ++ .dword .L33-.L29 ++ .dword .L33-.L29 ++ .dword .L30-.L29 ++ .dword .L30-.L29 ++ .dword .L30-.L29 ++ .dword .L30-.L29 ++ .dword .L26-.L29 ++ .dword .L26-.L29 ++ .dword .L30-.L29 ++ .dword .L28-.L29 ++ .text ++.L9: ++ ldptr.w $t1,$s0,68 ++ andi $t1,$t1,1 ++ beqz $t1,.L10 ++ fld.s $f0,$s0,76 ++ b .L10 ++.L76: ++ ld.d $s0,$s0,16 ++ jirl $ra,$t2,0 ++ fst.d $f0,$s0,0 ++.L26: ++ addi.d $sp,$fp,-48 ++ .cfi_remember_state ++ .cfi_def_cfa 3, 48 ++ ld.d $ra,$sp,40 ++ .cfi_restore 1 ++ ld.d $fp,$sp,32 ++ .cfi_restore 22 ++ ld.d $s0,$sp,24 ++ .cfi_restore 23 ++ ld.d $s1,$sp,16 ++ .cfi_restore 24 ++ ld.d $s2,$sp,8 ++ .cfi_restore 25 ++ ldptr.d $s3,$sp,0 ++ .cfi_restore 26 ++ or $a0,$zero,$r0 ++ addi.d $sp,$sp,48 ++ .cfi_def_cfa_offset 0 ++ jr $ra ++.L72: ++ .cfi_restore_state ++ ble $s2,$r0,.L6 ++ addi.w $t0,$r0,1 # 0x1 ++ ldptr.d $a0,$s1,0 ++ bne $s2,$t0,.L5 ++ b .L6 ++.L75: ++ ld.d $s0,$s0,16 ++ jirl $ra,$t2,0 ++ fst.s $f0,$s0,0 ++ b .L26 ++.L73: ++ fld.d $f1,$s0,120 ++ b .L13 ++.L30: ++ ld.d $t0,$s0,16 ++ stptr.d $a0,$t0,0 ++ b .L26 ++.L37: ++ ld.d $t0,$s0,16 ++ st.b $a0,$t0,0 ++ b .L26 ++.L14: ++ ldptr.w $t1,$s0,68 ++ andi $t1,$t1,4 ++ beqz $t1,.L15 ++ fld.s $f2,$s0,84 ++ b .L15 ++.L16: ++ ldptr.w $t1,$s0,68 ++ andi $t1,$t1,8 ++ beqz $t1,.L17 ++ fld.s $f3,$s0,88 ++ b .L17 ++.L74: ++ fld.d $f4,$s0,144 ++ b .L19 ++.L35: ++ ld.d $t0,$s0,16 ++ st.h $a0,$t0,0 ++ b .L26 ++.L33: ++ ld.d $t0,$s0,16 ++ stptr.w $a0,$t0,0 ++ b .L26 ++.L28: ++ ldptr.w $t0,$s0,0 ++ andi $t0,$t0,512 ++ beqz $t0,.L26 ++ ld.d $s1,$s0,32 ++ addi.w $t0,$r0,15 # 0xf ++ addi.d $t1,$s1,-1 ++ bgtu $t1,$t0,.L26 ++ ld.d $t0,$s0,16 ++ addi.w $t4,$r0,-8 # 0xfffffffffffffff8 ++ addi.w $t1,$r0,1 # 0x1 ++ and $t4,$t0,$t4 ++ ldptr.d $t5,$t4,0 ++ andi $t0,$t0,7 ++ slli.w $t8,$t0,3 ++ add.d $s2,$s1,$t0 ++ sll.d $t7,$a0,$t8 ++ addi.w $s0,$r0,8 # 0x8 ++ sll.d $t1,$t1,$t8 ++ xor $t7,$t7,$t5 ++ slli.w $t6,$s2,3 ++ bgtu $s1,$s0,.L40 ++ bgtu $s2,$s0,.L41 ++ addi.w $t6,$t6,-1 ++ addi.w $t0,$r0,2 # 0x2 ++ sll.d $t0,$t0,$t6 ++ sub.d $t0,$t0,$t1 ++ and $t0,$t0,$t7 ++ xor $t0,$t0,$t5 ++ stptr.d $t0,$t4,0 ++ b .L26 ++.L20: ++ ldptr.w $t1,$s0,68 ++ andi $t1,$t1,32 ++ beqz $t1,.L21 ++ fld.s $f5,$s0,96 ++ b .L21 ++.L22: ++ ldptr.w $t1,$s0,68 ++ andi $t1,$t1,64 ++ beqz $t1,.L23 ++ fld.s $f6,$s0,100 ++ b .L23 ++.L24: ++ ldptr.w $t0,$s0,68 ++ andi $t0,$t0,128 ++ beqz $t0,.L8 ++ fld.s $f7,$s0,104 ++ b .L8 ++.L40: ++ sub.d $t1,$zero,$t1 ++ and $t1,$t1,$t7 ++ xor $t1,$t1,$t5 ++ stptr.d $t1,$t4,0 ++ addi.w $t1,$r0,16 # 0x10 ++ sub.d $t0,$s0,$t0 ++ sll.d $t8,$a1,$t8 ++ bgtu $s2,$t1,.L42 ++ slli.w $t0,$t0,2 ++ ld.d $t3,$t4,8 ++ sra.d $t2,$a0,$t0 ++ sra.d $t0,$t2,$t0 ++ addi.w $t6,$t6,-65 ++ addi.w $t1,$r0,2 # 0x2 ++ or $t0,$t0,$t8 ++ sll.d $t1,$t1,$t6 ++ xor $t0,$t0,$t3 ++ addi.d $t1,$t1,-1 ++ and $t0,$t0,$t1 ++ xor $t0,$t0,$t3 ++ st.d $t0,$t4,8 ++ b .L26 ++.L41: ++ ld.d $t3,$t4,8 ++ sub.d $t0,$s0,$t0 ++ slli.w $t0,$t0,3 ++ addi.w $t6,$t6,-65 ++ addi.w $t2,$r0,2 # 0x2 ++ sra.d $t0,$a0,$t0 ++ sll.d $t2,$t2,$t6 ++ sub.d $t1,$zero,$t1 ++ xor $t0,$t0,$t3 ++ addi.d $t2,$t2,-1 ++ and $t1,$t1,$t7 ++ and $t0,$t0,$t2 ++ xor $t1,$t1,$t5 ++ xor $t0,$t0,$t3 ++ stptr.d $t1,$t4,0 ++ st.d $t0,$t4,8 ++ b .L26 ++.L42: ++ ld.d $t7,$t4,16 ++ slli.w $t0,$t0,3 ++ addi.w $t1,$t6,-129 ++ addi.w $t5,$r0,2 # 0x2 ++ sll.d $t5,$t5,$t1 ++ sra.d $t1,$a1,$t0 ++ addi.d $t3,$t5,-1 ++ xor $t1,$t1,$t7 ++ sra.d $t0,$a0,$t0 ++ and $t1,$t3,$t1 ++ or $t0,$t0,$t8 ++ xor $t1,$t1,$t7 ++ st.d $t0,$t4,8 ++ st.d $t1,$t4,16 ++ b .L26 ++ .cfi_endproc ++.LFE0: ++ .size avcall_call, .-avcall_call ++ .ident "GCC: (GNU) 12.0.1 20220317 (experimental)" ++ .section .note.GNU-stack,"",@progbits +diff --git a/avcall/avcall-loongarch64-macro.S b/avcall/avcall-loongarch64-macro.S +new file mode 100644 +index 0000000..ab5dc24 +--- /dev/null ++++ b/avcall/avcall-loongarch64-macro.S +@@ -0,0 +1,345 @@ ++ .file "avcall-loongarch64.c" ++ .text ++ .align 2 ++ .globl avcall_call ++ .type avcall_call, @function ++avcall_call: ++.LFB0 = . ++ .cfi_startproc ++ addi.d $sp,$sp,-48 ++ .cfi_def_cfa_offset 48 ++ st.d $fp,$sp,32 ++ st.d $s0,$sp,24 ++ st.d $s1,$sp,16 ++ st.d $s2,$sp,8 ++ stptr.d $s3,$sp,0 ++ st.d $ra,$sp,40 ++ .cfi_offset 22, -16 ++ .cfi_offset 23, -24 ++ .cfi_offset 24, -32 ++ .cfi_offset 25, -40 ++ .cfi_offset 26, -48 ++ .cfi_offset 1, -8 ++ addi.d $fp,$sp,48 ++ .cfi_def_cfa 22, 0 ++ ld.d $s1,$a0,48 ++ ld.d $t0,$a0,40 ++ addi.w $t1,$r0,8 # 0x8 ++ ldptr.w $s3,$a0,64 ++ sub.d $t0,$t0,$s1 ++ srai.d $t0,$t0,3 ++ slli.w $s2,$t0,0 ++ or $s0,$a0,$r0 ++ addi.d $sp,$sp,-2048 ++ ble $s2,$t1,.L72 ++ addi.w $a2,$t0,-9 ++ or $t2,$sp,$r0 ++ bstrpick.d $a2,$a2,31,0 ++ alsl.d $a2,$a2,$zero,3 ++ addi.d $a2,$a2,8 ++ addi.d $a1,$s1,64 ++ or $a0,$t2,$r0 ++ bl %plt(memcpy) ++ ldptr.d $a0,$s1,0 ++.L5: ++ addi.w $t0,$r0,2 # 0x2 ++ ld.d $a1,$s1,8 ++ beq $s2,$t0,.L6 ++ addi.w $t0,$r0,3 # 0x3 ++ ld.d $a2,$s1,16 ++ beq $s2,$t0,.L6 ++ addi.w $t0,$r0,4 # 0x4 ++ ld.d $a3,$s1,24 ++ beq $s2,$t0,.L6 ++ addi.w $t0,$r0,5 # 0x5 ++ ld.d $a4,$s1,32 ++ beq $s2,$t0,.L6 ++ addi.w $t0,$r0,6 # 0x6 ++ ld.d $a5,$s1,40 ++ beq $s2,$t0,.L6 ++ addi.w $t0,$r0,7 # 0x7 ++ ld.d $a6,$s1,48 ++ beq $s2,$t0,.L6 ++ ld.d $a7,$s1,56 ++.L6: ++ beqz $s3,.L8 ++ ldptr.w $t0,$s0,72 ++ andi $t1,$t0,1 ++ beqz $t1,.L9 ++ fld.d $f0,$s0,112 ++.L10: ++ addi.w $t1,$r0,1 # 0x1 ++ beq $s3,$t1,.L8 ++ andi $t1,$t0,2 ++ bnez $t1,.L73 ++ ldptr.w $t1,$s0,68 ++ andi $t1,$t1,2 ++ beqz $t1,.L13 ++ fld.s $f1,$s0,80 ++.L13: ++ addi.w $t1,$r0,2 # 0x2 ++ beq $s3,$t1,.L8 ++ andi $t1,$t0,4 ++ beqz $t1,.L14 ++ fld.d $f2,$s0,128 ++.L15: ++ addi.w $t1,$r0,3 # 0x3 ++ beq $s3,$t1,.L8 ++ andi $t1,$t0,8 ++ beqz $t1,.L16 ++ fld.d $f3,$s0,136 ++.L17: ++ addi.w $t1,$r0,4 # 0x4 ++ beq $s3,$t1,.L8 ++ andi $t1,$t0,16 ++ bnez $t1,.L74 ++ ldptr.w $t1,$s0,68 ++ andi $t1,$t1,16 ++ beqz $t1,.L19 ++ fld.s $f4,$s0,92 ++.L19: ++ addi.w $t1,$r0,5 # 0x5 ++ beq $s3,$t1,.L8 ++ andi $t1,$t0,32 ++ beqz $t1,.L20 ++ fld.d $f5,$s0,152 ++.L21: ++ addi.w $t1,$r0,6 # 0x6 ++ beq $s3,$t1,.L8 ++ andi $t1,$t0,64 ++ beqz $t1,.L22 ++ fld.d $f6,$s0,160 ++.L23: ++ addi.w $t1,$r0,7 # 0x7 ++ beq $s3,$t1,.L8 ++ andi $t0,$t0,128 ++ beqz $t0,.L24 ++ fld.d $f7,$s0,168 ++.L8: ++ ldptr.w $t0,$s0,24 ++ addi.w $t1,$r0,13 # 0xd ++ ld.d $t2,$s0,8 ++ beq $t0,$t1,.L75 ++ addi.w $t1,$r0,14 # 0xe ++ beq $t0,$t1,.L76 ++ jirl $ra,$t2,0 ++ ldptr.w $t0,$s0,24 ++ addi.w $t1,$r0,1 # 0x1 ++ beq $t0,$t1,.L26 ++ addi.w $t1,$r0,16 # 0x10 ++ bgtu $t0,$t1,.L26 ++ la.local $t1,.L29 ++ slli.d $t0,$t0,3 ++ ldx.d $t0,$t1,$t0 ++ add.d $t1,$t1,$t0 ++ jr $t1 ++ .section .rodata ++ .align 3 ++ .align 2 ++.L29: ++ .dword .L26-.L29 ++ .dword .L26-.L29 ++ .dword .L37-.L29 ++ .dword .L37-.L29 ++ .dword .L37-.L29 ++ .dword .L35-.L29 ++ .dword .L35-.L29 ++ .dword .L33-.L29 ++ .dword .L33-.L29 ++ .dword .L30-.L29 ++ .dword .L30-.L29 ++ .dword .L30-.L29 ++ .dword .L30-.L29 ++ .dword .L26-.L29 ++ .dword .L26-.L29 ++ .dword .L30-.L29 ++ .dword .L28-.L29 ++ .text ++.L9: ++ ldptr.w $t1,$s0,68 ++ andi $t1,$t1,1 ++ beqz $t1,.L10 ++ fld.s $f0,$s0,76 ++ b .L10 ++.L76: ++ ld.d $s0,$s0,16 ++ jirl $ra,$t2,0 ++ fst.d $f0,$s0,0 ++.L26: ++ addi.d $sp,$fp,-48 ++ .cfi_remember_state ++ .cfi_def_cfa 3, 48 ++ ld.d $ra,$sp,40 ++ .cfi_restore 1 ++ ld.d $fp,$sp,32 ++ .cfi_restore 22 ++ ld.d $s0,$sp,24 ++ .cfi_restore 23 ++ ld.d $s1,$sp,16 ++ .cfi_restore 24 ++ ld.d $s2,$sp,8 ++ .cfi_restore 25 ++ ldptr.d $s3,$sp,0 ++ .cfi_restore 26 ++ or $a0,$zero,$r0 ++ addi.d $sp,$sp,48 ++ .cfi_def_cfa_offset 0 ++ jr $ra ++.L72: ++ .cfi_restore_state ++ ble $s2,$r0,.L6 ++ addi.w $t0,$r0,1 # 0x1 ++ ldptr.d $a0,$s1,0 ++ bne $s2,$t0,.L5 ++ b .L6 ++.L75: ++ ld.d $s0,$s0,16 ++ jirl $ra,$t2,0 ++ fst.s $f0,$s0,0 ++ b .L26 ++.L73: ++ fld.d $f1,$s0,120 ++ b .L13 ++.L30: ++ ld.d $t0,$s0,16 ++ stptr.d $a0,$t0,0 ++ b .L26 ++.L37: ++ ld.d $t0,$s0,16 ++ st.b $a0,$t0,0 ++ b .L26 ++.L14: ++ ldptr.w $t1,$s0,68 ++ andi $t1,$t1,4 ++ beqz $t1,.L15 ++ fld.s $f2,$s0,84 ++ b .L15 ++.L16: ++ ldptr.w $t1,$s0,68 ++ andi $t1,$t1,8 ++ beqz $t1,.L17 ++ fld.s $f3,$s0,88 ++ b .L17 ++.L74: ++ fld.d $f4,$s0,144 ++ b .L19 ++.L35: ++ ld.d $t0,$s0,16 ++ st.h $a0,$t0,0 ++ b .L26 ++.L33: ++ ld.d $t0,$s0,16 ++ stptr.w $a0,$t0,0 ++ b .L26 ++.L28: ++ ldptr.w $t0,$s0,0 ++ andi $t0,$t0,512 ++ beqz $t0,.L26 ++ ld.d $s1,$s0,32 ++ addi.w $t0,$r0,15 # 0xf ++ addi.d $t1,$s1,-1 ++ bgtu $t1,$t0,.L26 ++ ld.d $t0,$s0,16 ++ addi.w $t4,$r0,-8 # 0xfffffffffffffff8 ++ addi.w $t1,$r0,1 # 0x1 ++ and $t4,$t0,$t4 ++ ldptr.d $t5,$t4,0 ++ andi $t0,$t0,7 ++ slli.w $t8,$t0,3 ++ add.d $s2,$s1,$t0 ++ sll.d $t7,$a0,$t8 ++ addi.w $s0,$r0,8 # 0x8 ++ sll.d $t1,$t1,$t8 ++ xor $t7,$t7,$t5 ++ slli.w $t6,$s2,3 ++ bgtu $s1,$s0,.L40 ++ bgtu $s2,$s0,.L41 ++ addi.w $t6,$t6,-1 ++ addi.w $t0,$r0,2 # 0x2 ++ sll.d $t0,$t0,$t6 ++ sub.d $t0,$t0,$t1 ++ and $t0,$t0,$t7 ++ xor $t0,$t0,$t5 ++ stptr.d $t0,$t4,0 ++ b .L26 ++.L20: ++ ldptr.w $t1,$s0,68 ++ andi $t1,$t1,32 ++ beqz $t1,.L21 ++ fld.s $f5,$s0,96 ++ b .L21 ++.L22: ++ ldptr.w $t1,$s0,68 ++ andi $t1,$t1,64 ++ beqz $t1,.L23 ++ fld.s $f6,$s0,100 ++ b .L23 ++.L24: ++ ldptr.w $t0,$s0,68 ++ andi $t0,$t0,128 ++ beqz $t0,.L8 ++ fld.s $f7,$s0,104 ++ b .L8 ++.L40: ++ sub.d $t1,$zero,$t1 ++ and $t1,$t1,$t7 ++ xor $t1,$t1,$t5 ++ stptr.d $t1,$t4,0 ++ addi.w $t1,$r0,16 # 0x10 ++ sub.d $t0,$s0,$t0 ++ sll.d $t8,$a1,$t8 ++ bgtu $s2,$t1,.L42 ++ slli.w $t0,$t0,2 ++ ld.d $t3,$t4,8 ++ sra.d $t2,$a0,$t0 ++ sra.d $t0,$t2,$t0 ++ addi.w $t6,$t6,-65 ++ addi.w $t1,$r0,2 # 0x2 ++ or $t0,$t0,$t8 ++ sll.d $t1,$t1,$t6 ++ xor $t0,$t0,$t3 ++ addi.d $t1,$t1,-1 ++ and $t0,$t0,$t1 ++ xor $t0,$t0,$t3 ++ st.d $t0,$t4,8 ++ b .L26 ++.L41: ++ ld.d $t3,$t4,8 ++ sub.d $t0,$s0,$t0 ++ slli.w $t0,$t0,3 ++ addi.w $t6,$t6,-65 ++ addi.w $t2,$r0,2 # 0x2 ++ sra.d $t0,$a0,$t0 ++ sll.d $t2,$t2,$t6 ++ sub.d $t1,$zero,$t1 ++ xor $t0,$t0,$t3 ++ addi.d $t2,$t2,-1 ++ and $t1,$t1,$t7 ++ and $t0,$t0,$t2 ++ xor $t1,$t1,$t5 ++ xor $t0,$t0,$t3 ++ stptr.d $t1,$t4,0 ++ st.d $t0,$t4,8 ++ b .L26 ++.L42: ++ ld.d $t7,$t4,16 ++ slli.w $t0,$t0,3 ++ addi.w $t1,$t6,-129 ++ addi.w $t5,$r0,2 # 0x2 ++ sll.d $t5,$t5,$t1 ++ sra.d $t1,$a1,$t0 ++ addi.d $t3,$t5,-1 ++ xor $t1,$t1,$t7 ++ sra.d $t0,$a0,$t0 ++ and $t1,$t3,$t1 ++ or $t0,$t0,$t8 ++ xor $t1,$t1,$t7 ++ st.d $t0,$t4,8 ++ st.d $t1,$t4,16 ++ b .L26 ++ .cfi_endproc ++.LFE0: ++ .size avcall_call, .-avcall_call ++#if defined __linux__ || defined __FreeBSD__ || defined __FreeBSD_kernel__ || defined __DragonFly__ ++ .section .note.GNU-stack,"",@progbits ++#endif +diff --git a/avcall/avcall-loongarch64.c b/avcall/avcall-loongarch64.c +new file mode 100644 +index 0000000..0e33033 +--- /dev/null ++++ b/avcall/avcall-loongarch64.c +@@ -0,0 +1,358 @@ ++/** ++ Copyright 1993 Bill Triggs ++ Copyright 1995-2022 Bruno Haible ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 2 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . ++**/ ++/*---------------------------------------------------------------------- ++ Foreign function interface for LoongArch 64-bit CPU with LP64 ABI. ++ ++ This calls a C function with an argument list built up using macros ++ defined in avcall.h. ++ ++ LoongArch64 argument passing conventions: ++ ++ Up to 8 words are passed in integer registers (r4, ..., r11); remaining ++ words are passed on the stack. ++ Up to 8 float/double arguments are passed in floating point registers ++ (f0, ..., f7), further float/double arguments are passed in integer ++ registers, remaining float/double arguments are passed on the stack. ++ Arguments passed on the stack have 8-bytes alignment. ++ Structure args <= 16 bytes are passed as up to two words in registers ++ (floating-point fields of structures with at most two fields in ++ floating-point registers, other fields in integer registers). ++ Structure args larger than 16 bytes are passed as pointers to caller-made ++ local copies. ++ ++ Integers are returned in r4, r5. ++ Float/double values are returned in f0, f1. ++ Structures <= 16 bytes are returned in registers (floating-point fields ++ of structures with at most two fields in floating-point registers, other ++ fields in integer registers). ++ To return a structure larger than 16 bytes, the caller allocates the ++ space and passes a pointer to it as first argument (i.e. in r4). ++ ----------------------------------------------------------------------*/ ++#include "avcall-internal.h" ++ ++#define RETURN(TYPE,VAL) (*(TYPE*)l->raddr = (TYPE)(VAL)) ++ ++register __avrword iarg1 __asm__("a0"); ++register __avrword iarg2 __asm__("a1"); ++register __avrword iarg3 __asm__("a2"); ++register __avrword iarg4 __asm__("a3"); ++register __avrword iarg5 __asm__("a4"); ++register __avrword iarg6 __asm__("a5"); ++register __avrword iarg7 __asm__("a6"); ++register __avrword iarg8 __asm__("a7"); ++ ++register float farg1 __asm__("f0"); ++register float farg2 __asm__("f1"); ++register float farg3 __asm__("f2"); ++register float farg4 __asm__("f3"); ++register float farg5 __asm__("f4"); ++register float farg6 __asm__("f5"); ++register float farg7 __asm__("f6"); ++register float farg8 __asm__("f7"); ++ ++register double darg1 __asm__("f0"); ++register double darg2 __asm__("f1"); ++register double darg3 __asm__("f2"); ++register double darg4 __asm__("f3"); ++register double darg5 __asm__("f4"); ++register double darg6 __asm__("f5"); ++register double darg7 __asm__("f6"); ++register double darg8 __asm__("f7"); ++ ++int ++avcall_call(av_alist* list) ++{ ++ register __avrword iretreg __asm__("a0"); ++ register __avrword iret2reg __asm__("a1"); ++ register double dret __asm__("f0"); ++ ++ __av_alist* l = &AV_LIST_INNER(list); ++ ++ __avword* argframe = __builtin_alloca(__AV_ALIST_WORDS * sizeof(__avword)); /* make room for argument list */ ++ int arglen = l->aptr - l->args; ++ unsigned int fanum = l->fanum; ++ ++ { ++ int i; ++ for (i = 8; i < arglen; i++) /* push function args onto stack */ ++ argframe[i-8] = l->args[i]; ++ } ++ ++ /* Put up to 8 integer args into registers. */ ++ if (arglen >= 1) { ++ iarg1 = l->args[0]; ++ if (arglen >= 2) { ++ iarg2 = l->args[1]; ++ if (arglen >= 3) { ++ iarg3 = l->args[2]; ++ if (arglen >= 4) { ++ iarg4 = l->args[3]; ++ if (arglen >= 5) { ++ iarg5 = l->args[4]; ++ if (arglen >= 6) { ++ iarg6 = l->args[5]; ++ if (arglen >= 7) { ++ iarg7 = l->args[6]; ++ if (arglen >= 8) { ++ iarg8 = l->args[7]; ++ } ++ } ++ } ++ } ++ } ++ } ++ } ++ } ++ ++ /* Put upto 8 floating-point args into registers. */ ++ if (fanum >= 1) { ++ if (l->darg_mask & (1 << 0)) darg1 = l->dargs[0]; ++ else if (l->farg_mask & (1 << 0)) farg1 = l->fargs[0]; ++ if (fanum >= 2) { ++ if (l->darg_mask & (1 << 1)) darg2 = l->dargs[1]; ++ else if (l->farg_mask & (1 << 1)) farg2 = l->fargs[1]; ++ if (fanum >= 3) { ++ if (l->darg_mask & (1 << 2)) darg3 = l->dargs[2]; ++ else if (l->farg_mask & (1 << 2)) farg3 = l->fargs[2]; ++ if (fanum >= 4) { ++ if (l->darg_mask & (1 << 3)) darg4 = l->dargs[3]; ++ else if (l->farg_mask & (1 << 3)) farg4 = l->fargs[3]; ++ if (fanum >= 5) { ++ if (l->darg_mask & (1 << 4)) darg5 = l->dargs[4]; ++ else if (l->farg_mask & (1 << 4)) farg5 = l->fargs[4]; ++ if (fanum >= 6) { ++ if (l->darg_mask & (1 << 5)) darg6 = l->dargs[5]; ++ else if (l->farg_mask & (1 << 5)) farg6 = l->fargs[5]; ++ if (fanum >= 7) { ++ if (l->darg_mask & (1 << 6)) darg7 = l->dargs[6]; ++ else if (l->farg_mask & (1 << 6)) farg7 = l->fargs[6]; ++ if (fanum >= 8) { ++ if (l->darg_mask & (1 << 7)) darg8 = l->dargs[7]; ++ else if (l->farg_mask & (1 << 7)) farg8 = l->fargs[7]; ++ } ++ } ++ } ++ } ++ } ++ } ++ } ++ } ++ ++ /* Call function. */ ++ if (l->rtype == __AVfloat) { ++ *(float*)l->raddr = (*(float(*)())l->func)(); ++ } else ++ if (l->rtype == __AVdouble) { ++ *(double*)l->raddr = (*(double(*)())l->func)(); ++ } else { ++ __avrword iret, iret2; ++ ++ iret = (*l->func)(); ++ iret2 = iret2reg; ++ ++ /* save return value */ ++ if (l->rtype == __AVvoid) { ++ } else ++ if (l->rtype == __AVchar) { ++ RETURN(char, iret); ++ } else ++ if (l->rtype == __AVschar) { ++ RETURN(signed char, iret); ++ } else ++ if (l->rtype == __AVuchar) { ++ RETURN(unsigned char, iret); ++ } else ++ if (l->rtype == __AVshort) { ++ RETURN(short, iret); ++ } else ++ if (l->rtype == __AVushort) { ++ RETURN(unsigned short, iret); ++ } else ++ if (l->rtype == __AVint) { ++ RETURN(int, iret); ++ } else ++ if (l->rtype == __AVuint) { ++ RETURN(unsigned int, iret); ++ } else ++ if (l->rtype == __AVlong || l->rtype == __AVlonglong) { ++ RETURN(long, iret); ++ } else ++ if (l->rtype == __AVulong || l->rtype == __AVulonglong) { ++ RETURN(unsigned long, iret); ++ } else ++ /* see above ++ if (l->rtype == __AVfloat) { ++ } else ++ if (l->rtype == __AVdouble) { ++ } else ++ */ ++ if (l->rtype == __AVvoidp) { ++ RETURN(void*, iret); ++ } else ++ if (l->rtype == __AVstruct) { ++ if (l->flags & __AV_REGISTER_STRUCT_RETURN) { ++ /* Return structs of size <= 16 in registers. */ ++ if (l->rsize > 0 && l->rsize <= 16) { ++ void* raddr = l->raddr; ++ #if 0 /* Unoptimized */ ++ if (l->rsize == 1) { ++ ((unsigned char *)raddr)[0] = (unsigned char)(iret); ++ } else ++ if (l->rsize == 2) { ++ ((unsigned char *)raddr)[0] = (unsigned char)(iret); ++ ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8); ++ } else ++ if (l->rsize == 3) { ++ ((unsigned char *)raddr)[0] = (unsigned char)(iret); ++ ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8); ++ ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16); ++ } else ++ if (l->rsize == 4) { ++ ((unsigned char *)raddr)[0] = (unsigned char)(iret); ++ ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8); ++ ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16); ++ ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24); ++ } else ++ if (l->rsize == 5) { ++ ((unsigned char *)raddr)[0] = (unsigned char)(iret); ++ ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8); ++ ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16); ++ ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24); ++ ((unsigned char *)raddr)[4] = (unsigned char)(iret>>32); ++ } else ++ if (l->rsize == 6) { ++ ((unsigned char *)raddr)[0] = (unsigned char)(iret); ++ ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8); ++ ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16); ++ ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24); ++ ((unsigned char *)raddr)[4] = (unsigned char)(iret>>32); ++ ((unsigned char *)raddr)[5] = (unsigned char)(iret>>40); ++ } else ++ if (l->rsize == 7) { ++ ((unsigned char *)raddr)[0] = (unsigned char)(iret); ++ ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8); ++ ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16); ++ ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24); ++ ((unsigned char *)raddr)[4] = (unsigned char)(iret>>32); ++ ((unsigned char *)raddr)[5] = (unsigned char)(iret>>40); ++ ((unsigned char *)raddr)[6] = (unsigned char)(iret>>48); ++ } else ++ if (l->rsize >= 8 && l->rsize <= 16) { ++ ((unsigned char *)raddr)[0] = (unsigned char)(iret); ++ ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8); ++ ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16); ++ ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24); ++ ((unsigned char *)raddr)[4] = (unsigned char)(iret>>32); ++ ((unsigned char *)raddr)[5] = (unsigned char)(iret>>40); ++ ((unsigned char *)raddr)[6] = (unsigned char)(iret>>48); ++ ((unsigned char *)raddr)[7] = (unsigned char)(iret>>56); ++ if (l->rsize == 8) { ++ } else ++ if (l->rsize == 9) { ++ ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2); ++ } else ++ if (l->rsize == 10) { ++ ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2); ++ ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8); ++ } else ++ if (l->rsize == 11) { ++ ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2); ++ ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8); ++ ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16); ++ } else ++ if (l->rsize == 12) { ++ ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2); ++ ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8); ++ ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16); ++ ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24); ++ } else ++ if (l->rsize == 13) { ++ ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2); ++ ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8); ++ ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16); ++ ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24); ++ ((unsigned char *)raddr)[8+4] = (unsigned char)(iret2>>32); ++ } else ++ if (l->rsize == 14) { ++ ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2); ++ ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8); ++ ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16); ++ ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24); ++ ((unsigned char *)raddr)[8+4] = (unsigned char)(iret2>>32); ++ ((unsigned char *)raddr)[8+5] = (unsigned char)(iret2>>40); ++ } else ++ if (l->rsize == 15) { ++ ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2); ++ ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8); ++ ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16); ++ ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24); ++ ((unsigned char *)raddr)[8+4] = (unsigned char)(iret2>>32); ++ ((unsigned char *)raddr)[8+5] = (unsigned char)(iret2>>40); ++ ((unsigned char *)raddr)[8+6] = (unsigned char)(iret2>>48); ++ } else ++ if (l->rsize == 16) { ++ ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2); ++ ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8); ++ ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16); ++ ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24); ++ ((unsigned char *)raddr)[8+4] = (unsigned char)(iret2>>32); ++ ((unsigned char *)raddr)[8+5] = (unsigned char)(iret2>>40); ++ ((unsigned char *)raddr)[8+6] = (unsigned char)(iret2>>48); ++ ((unsigned char *)raddr)[8+7] = (unsigned char)(iret2>>56); ++ } ++ } ++ #else /* Optimized: fewer conditional jumps, fewer memory accesses */ ++ uintptr_t count = l->rsize; /* > 0, ≤ 2*sizeof(__avrword) */ ++ __avrword* wordaddr = (__avrword*)((uintptr_t)raddr & ~(uintptr_t)(sizeof(__avrword)-1)); ++ uintptr_t start_offset = (uintptr_t)raddr & (uintptr_t)(sizeof(__avrword)-1); /* ≥ 0, < sizeof(__avrword) */ ++ uintptr_t end_offset = start_offset + count; /* > 0, < 3*sizeof(__avrword) */ ++ if (count <= sizeof(__avrword)) { ++ /* Use iret. */ ++ if (end_offset <= sizeof(__avrword)) { ++ /* 0 < end_offset ≤ sizeof(__avrword) */ ++ __avrword mask0 = ((__avrword)2 << (end_offset*8-1)) - ((__avrword)1 << (start_offset*8)); ++ wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0; ++ } else { ++ /* sizeof(__avrword) < end_offset < 2*sizeof(__avrword), start_offset > 0 */ ++ __avrword mask0 = - ((__avrword)1 << (start_offset*8)); ++ __avrword mask1 = ((__avrword)2 << (end_offset*8-sizeof(__avrword)*8-1)) - 1; ++ wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0; ++ wordaddr[1] ^= (wordaddr[1] ^ (iret >> (sizeof(__avrword)*8-start_offset*8))) & mask1; ++ } ++ } else { ++ /* Use iret, iret2. */ ++ __avrword mask0 = - ((__avrword)1 << (start_offset*8)); ++ wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0; ++ if (end_offset <= 2*sizeof(__avrword)) { ++ /* sizeof(__avrword) < end_offset ≤ 2*sizeof(__avrword) */ ++ __avrword mask1 = ((__avrword)2 << (end_offset*8-sizeof(__avrword)*8-1)) - 1; ++ wordaddr[1] ^= (wordaddr[1] ^ ((iret >> (sizeof(__avrword)*4-start_offset*4) >> (sizeof(__avrword)*4-start_offset*4)) | (iret2 << (start_offset*8)))) & mask1; ++ } else { ++ /* 2*sizeof(__avrword) < end_offset < 3*sizeof(__avrword), start_offset > 0 */ ++ __avrword mask2 = ((__avrword)2 << (end_offset*8-2*sizeof(__avrword)*8-1)) - 1; ++ wordaddr[1] = (iret >> (sizeof(__avrword)*8-start_offset*8)) | (iret2 << (start_offset*8)); ++ wordaddr[2] ^= (wordaddr[2] ^ (iret2 >> (sizeof(__avrword)*8-start_offset*8))) & mask2; ++ } ++ } ++ #endif ++ } ++ } ++ } ++ } ++ return 0; ++} +diff --git a/avcall/avcall.h b/avcall/avcall.h +index 3d10db4..66c9c19 100644 +--- a/avcall/avcall.h ++++ b/avcall/avcall.h +@@ -1,6 +1,6 @@ + /* + * Copyright 1993-1995 Bill Triggs +- * Copyright 1995-2021 Bruno Haible ++ * Copyright 1995-2022 Bruno Haible + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -148,7 +148,7 @@ enum __AV_alist_flags + #if defined(__sparc__) && !defined(__sparc64__) && defined(__sun) && (defined(__SUNPRO_C) || defined(__SUNPRO_CC)) /* SUNWspro cc or CC */ + __AV_SUNPROCC_STRUCT_RETURN, + #else +-#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || (defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) || defined(__riscv64__) ++#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || (defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__) + __AV_SMALL_STRUCT_RETURN | + #endif + #if defined(__GNUC__) && !((defined(__mipsn32__) || defined(__mips64__)) && ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3))) +@@ -265,7 +265,7 @@ enum __AV_alist_flags + #endif + + /* These are for internal use only */ +-#if defined(__i386__) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64__) || (defined(__s390__) && !defined(__s390x__)) || defined(__riscv64__) ++#if defined(__i386__) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64__) || (defined(__s390__) && !defined(__s390x__)) || defined(__riscv64__) || defined(__loongarch64__) + __AV_REGISTER_STRUCT_RETURN = 1<<9, + #endif + +diff --git a/callback/PLATFORMS b/callback/PLATFORMS +index df4c6e2..aded5a6 100644 +--- a/callback/PLATFORMS ++++ b/callback/PLATFORMS +@@ -40,4 +40,5 @@ Supported CPUs: (Put the GNU config.guess values here.) + s390x s390x-ibm-linux (gcc) + riscv32 riscv32-unknown-linux (gcc -mabi=ilp32d) + riscv64 riscv64-unknown-linux (gcc -mabi=lp64d) ++ loongarch64 loongarch64-unknown-linux (gcc) + +diff --git a/callback/trampoline_r/Makefile.devel b/callback/trampoline_r/Makefile.devel +index 7abb351..a5c7e83 100644 +--- a/callback/trampoline_r/Makefile.devel ++++ b/callback/trampoline_r/Makefile.devel +@@ -79,7 +79,7 @@ cache-powerpc64-elfv2-macro.S : cache-powerpc64-elfv2-linux.s ../../common/asm-p + OLDGCCFLAGS = -O2 -fomit-frame-pointer + ASPREFIX = /usr1/gnu/lib + +-proto-precompiled : proto-i386.s proto-m68k.s proto-mips.s proto-mipsn32.s proto-mips64.s proto-sparc.s proto-sparc64.s proto-alpha.s proto-hppa.s proto-hppa64.s proto-arm.s proto-arm64.s proto-powerpc-aix.s proto-powerpc-sysv4.s proto-powerpc-macos.s proto-powerpc64-aix.s proto-powerpc64-elfv2.s proto-ia64.c proto-x86_64.c proto-x86_64-x32.s proto-s390.s proto-s390x.s proto-riscv32.s proto-riscv64.s ++proto-precompiled : proto-i386.s proto-m68k.s proto-mips.s proto-mipsn32.s proto-mips64.s proto-sparc.s proto-sparc64.s proto-alpha.s proto-hppa.s proto-hppa64.s proto-arm.s proto-arm64.s proto-powerpc-aix.s proto-powerpc-sysv4.s proto-powerpc-macos.s proto-powerpc64-aix.s proto-powerpc64-elfv2.s proto-ia64.c proto-x86_64.c proto-x86_64-x32.s proto-s390.s proto-s390x.s proto-riscv32.s proto-riscv64.s proto-loongarch64.s + + proto-i386.s : proto.c + $(GCC) -V 2.7.2 -b i486-linuxaout $(OLDGCCFLAGS) -D__i386__ -S proto.c -o $@ +@@ -153,6 +153,9 @@ proto-riscv32.s : proto.c + proto-riscv64.s : proto64.c + $(CROSS_TOOL) riscv64-linux gcc-7.3.0 $(OLDGCCFLAGS) -D__riscv64__ -S proto64.c -o $@ + ++proto-loongarch64.s : proto64.c ++ $(CROSS_TOOL) loongarch64-linux gcc $(OLDGCCFLAGS) -D__loongarch64__ -S proto64.c -o $@ ++ + tramp-i386.o : tramp-i386.s + $(ASPREFIX)/i486-linux/bin/as tramp-i386.s -o $@ + +@@ -215,3 +218,6 @@ tramp-riscv32.o : tramp-riscv32.s + + tramp-riscv64.o : tramp-riscv64.s + $(CROSS_TOOL) riscv64-linux as tramp-riscv64.s -o $@ ++ ++tramp-loongarch64.o : tramp-loongarch64.s ++ $(CROSS_TOOL) loongarch64-linux as tramp-loongarch64.s -o $@ +diff --git a/callback/trampoline_r/proto-loongarch64.s b/callback/trampoline_r/proto-loongarch64.s +new file mode 100644 +index 0000000..decf82e +--- /dev/null ++++ b/callback/trampoline_r/proto-loongarch64.s +@@ -0,0 +1,29 @@ ++ .file "proto64.c" ++ .text ++ .align 2 ++ .globl tramp ++ .type tramp, @function ++tramp: ++ lu12i.w $r20,1130504192>>12 # 0x43622000 ++ lu12i.w $r12,-559878144>>12 # 0xffffffffdea0f000 ++ ori $r20,$r20,341 ++ ori $r12,$r12,4011 ++ lu32i.d $r20,0x5471100000000>>32 ++ lu32i.d $r12,0xfffebec000000000>>32 ++ lu52i.d $r20,$r20,0x7350000000000000>>52 ++ lu52i.d $r12,$r12,0xbab0000000000000>>52 ++ jr $r12 ++ .size tramp, .-tramp ++ .section .text.unlikely,"ax",@progbits ++ .align 2 ++ .globl jump ++ .type jump, @function ++jump: ++ lu12i.w $r12,-559878144>>12 # 0xffffffffdea0f000 ++ ori $r12,$r12,4011 ++ lu32i.d $r12,0xfffebec000000000>>32 ++ lu52i.d $r12,$r12,0xbab0000000000000>>52 ++ jr $r12 ++ .size jump, .-jump ++ .ident "GCC: (GNU) 12.0.0 20211224 (experimental)" ++ .section .note.GNU-stack,"",@progbits +diff --git a/callback/trampoline_r/test1.c b/callback/trampoline_r/test1.c +index 4b2f9f8..9105c8f 100644 +--- a/callback/trampoline_r/test1.c ++++ b/callback/trampoline_r/test1.c +@@ -1,7 +1,7 @@ + /* Trampoline test */ + + /* +- * Copyright 1995-2021 Bruno Haible ++ * Copyright 1995-2022 Bruno Haible + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -83,6 +83,9 @@ register void* env __asm__("r0"); + #endif + #if defined(__riscv32__) || defined(__riscv64__) + register void* env __asm__("t2"); ++#endif ++#ifdef __loongarch64__ ++register void* env __asm__("r20"); + #endif + + return x + (int)(long)((void**)env)[1] + (int)(long)((void**)env)[0] + MAGIC3; +diff --git a/callback/trampoline_r/tramp-loongarch64.s b/callback/trampoline_r/tramp-loongarch64.s +new file mode 100644 +index 0000000..9cd0a93 +--- /dev/null ++++ b/callback/trampoline_r/tramp-loongarch64.s +@@ -0,0 +1,36 @@ ++/* Trampoline for LoongArch CPU in 64-bit mode */ ++ ++/* ++ * Copyright 1996-2022 Bruno Haible ++ * ++ * This program is free software: you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program. If not, see . ++ */ ++ ++/* Available registers: $r12 ... $r20. */ ++ ++ .text ++ .align 3 ++ .globl tramp ++ .type tramp, @function ++tramp: ++ pcaddu12i $r12, 0 ++ /* Now our own address (=tramp) is in $r12. */ ++ ld.d $r20, $r12, 16 /* $LC0-tramp */ ++ ld.d $r12, $r12, 24 /* $LC1-tramp */ ++ jr $r12 ++$LC0: ++ .dword 0x7355471143622155 ++$LC1: ++ .dword 0xbabebec0dea0ffab ++ .size tramp, .-tramp +diff --git a/callback/trampoline_r/trampoline.c b/callback/trampoline_r/trampoline.c +index 5d4f8c2..21751e3 100644 +--- a/callback/trampoline_r/trampoline.c ++++ b/callback/trampoline_r/trampoline.c +@@ -1,7 +1,7 @@ + /* Trampoline construction */ + + /* +- * Copyright 1995-2021 Bruno Haible ++ * Copyright 1995-2022 Bruno Haible + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -326,6 +326,10 @@ static int open_noinherit (const char *filename, int flags, int mode) + #define TRAMP_LENGTH 32 + #define TRAMP_ALIGN 8 + #endif ++#ifdef __loongarch64__ ++#define TRAMP_LENGTH 32 ++#define TRAMP_ALIGN 8 ++#endif + + #ifndef TRAMP_BIAS + #define TRAMP_BIAS 0 +@@ -1257,6 +1261,32 @@ __TR_function alloc_trampoline_r (__TR_function address, void* data0, void* data + (*(unsigned long *) (function +24)) + #define tramp_data(function) \ + (*(unsigned long *) (function +16)) ++#endif ++#ifdef __loongarch64__ ++ /* function: ++ * pcaddu12i $r12,0 1C00000C ++ * ld.d $r20,$r12,16 28C04194 ++ * ld.d $r12,$r12,24 28C0618C ++ * jirl $r0,$r12,0 4C000180 ++ * .dword ++ * .dword
++ */ ++ *(unsigned int *) (function + 0) = 0x1C00000C; ++ *(unsigned int *) (function + 4) = 0x28C04194; ++ *(unsigned int *) (function + 8) = 0x28C0618C; ++ *(unsigned int *) (function +12) = 0x4C000180; ++ *(unsigned long *) (function +16) = (unsigned long) data; ++ *(unsigned long *) (function +24) = (unsigned long) address; ++#define TRAMP_CODE_LENGTH 16 ++#define is_tramp(function) \ ++ *(unsigned int *) (function + 0) == 0x1C00000C && \ ++ *(unsigned int *) (function + 4) == 0x28C04194 && \ ++ *(unsigned int *) (function + 8) == 0x28C0618C && \ ++ *(unsigned int *) (function +12) == 0x4C000180 ++#define tramp_address(function) \ ++ *(unsigned long *) (function +24) ++#define tramp_data(function) \ ++ *(unsigned long *) (function +16) + #endif + /* + * data: +@@ -1408,6 +1438,10 @@ __TR_function alloc_trampoline_r (__TR_function address, void* data0, void* data + __asm__ __volatile__ ("fence.i"); + #endif + #endif ++#if defined(__loongarch64__) ++ /* Use the GCC built-in. It expands to 'ibar 0'. */ ++ __clear_cache((void*)function_x,(void*)(function_x+TRAMP_CODE_LENGTH)); ++#endif + #endif + #endif + +diff --git a/callback/vacall_r/Makefile.devel b/callback/vacall_r/Makefile.devel +index 1a2e62e..5eb626c 100644 +--- a/callback/vacall_r/Makefile.devel ++++ b/callback/vacall_r/Makefile.devel +@@ -25,7 +25,8 @@ precompiled : \ + vacall-ia64-macro.S \ + vacall-x86_64-macro.S vacall-x86_64-x32-linux.s vacall-x86_64-windows-macro.S \ + vacall-s390-macro.S vacall-s390x-macro.S \ +- vacall-riscv32-ilp32d-macro.S vacall-riscv64-lp64d-macro.S ++ vacall-riscv32-ilp32d-macro.S vacall-riscv64-lp64d-macro.S \ ++ vacall-loongarch64-macro.S + + + vacall-i386-linux.s : ../../vacall/vacall-i386.c ../../vacall/vacall-internal.h vacall_r.h $(THISFILE) +@@ -233,3 +234,10 @@ vacall-riscv64-lp64d-linux.s : ../../vacall/vacall-riscv64.c ../../vacall/vacall + + vacall-riscv64-lp64d-macro.S : vacall-riscv64-lp64d-linux.s ../../common/asm-riscv.sh ../../common/noexecstack.h $(THISFILE) + (../../common/asm-riscv.sh < vacall-riscv64-lp64d-linux.s ; cat ../../common/noexecstack.h) > vacall-riscv64-lp64d-macro.S ++ ++ ++vacall-loongarch64-linux.s : ../../vacall/vacall-loongarch64.c ../../vacall/vacall-internal.h vacall_r.h $(THISFILE) ++ $(CROSS_TOOL) loongarch64-linux gcc $(GCCFLAGS) -D__loongarch64__ -S ../../vacall/vacall-loongarch64.c -I../../vacall -I. -o vacall-loongarch64-linux.s ++ ++vacall-loongarch64-macro.S : vacall-loongarch64-linux.s ../../common/asm-loongarch.sh ../../common/noexecstack.h $(THISFILE) ++ (../../common/asm-loongarch.sh < vacall-loongarch64-linux.s ; cat ../../common/noexecstack.h) > vacall-loongarch64-macro.S +diff --git a/callback/vacall_r/Makefile.in b/callback/vacall_r/Makefile.in +index 012f4d9..8cf11b3 100644 +--- a/callback/vacall_r/Makefile.in ++++ b/callback/vacall_r/Makefile.in +@@ -242,6 +242,12 @@ vacall-riscv64-lp64d.lo : vacall-riscv64-lp64d.s + vacall-riscv64-lp64d.s : $(srcdir)/vacall-riscv64-lp64d-macro.S + $(CPP) $(ASPFLAGS) -I$(srcdir) $(srcdir)/vacall-riscv64-lp64d-macro.S | grep -v '^ *#line' | grep -v '^#' | sed -e 's,% ,%,g' -e 's,//.*$$,,' > vacall-riscv64-lp64d.s + ++vacall-loongarch64.lo : vacall-loongarch64.s ++ $(LIBTOOL_COMPILE) $(CC) @GCC_X_NONE@ -c vacall-loongarch64.s ++ ++vacall-loongarch64.s : $(srcdir)/vacall-loongarch64-macro.S ++ $(CPP) $(ASPFLAGS) $(srcdir)/vacall-loongarch64-macro.S | grep -v '^ *#line' | grep -v '^#' | sed -e 's,% ,%,g' -e 's,//.*$$,,' > vacall-loongarch64.s ++ + vacall-libapi.lo : $(srcdir)/vacall-libapi.c $(srcdir)/../../vacall/vacall-internal.h $(srcdir)/vacall_r.h ../../config.h + $(LIBTOOL_COMPILE) $(CC) $(INCLUDES_WITH_GNULIB) $(CPPFLAGS) $(CFLAGS) @DISABLE_TYPE_BASED_ALIASING@ -DREENTRANT -c $(srcdir)/vacall-libapi.c + +@@ -295,7 +301,7 @@ mostlyclean : clean + + clean : force + $(RM) *.@OBJEXT@ *.lo *.a libvacall.* core +- $(RM) vacall-i386.s vacall-sparc.s vacall-sparc64.s vacall-m68k.s vacall-mips.s vacall-mipsn32.s vacall-mips64.s vacall-alpha.s vacall-hppa.s vacall-hppa64.s vacall-arm.s vacall-armhf.s vacall-arm64.s vacall-powerpc.s vacall-powerpc64.s vacall-powerpc64-elfv2.s vacall-ia64.s vacall-x86_64.s vacall-x86_64.asm vacall-x86_64-x32.s vacall-s390.s vacall-s390x.s vacall-riscv32-ilp32d.s vacall-riscv64-lp64d.s ++ $(RM) vacall-i386.s vacall-sparc.s vacall-sparc64.s vacall-m68k.s vacall-mips.s vacall-mipsn32.s vacall-mips64.s vacall-alpha.s vacall-hppa.s vacall-hppa64.s vacall-arm.s vacall-armhf.s vacall-arm64.s vacall-powerpc.s vacall-powerpc64.s vacall-powerpc64-elfv2.s vacall-ia64.s vacall-x86_64.s vacall-x86_64.asm vacall-x86_64-x32.s vacall-s390.s vacall-s390x.s vacall-riscv32-ilp32d.s vacall-riscv64-lp64d.s vacall-loongarch64.s + $(RM) -r .libs _libs + + distclean : clean +@@ -321,6 +327,7 @@ SOURCE_FILES = \ + vacall-hppa64-linux.s vacall-hppa64-macro.S \ + vacall-i386-linux.s vacall-i386-macro.S \ + vacall-ia64-linux.s vacall-ia64-macro.S \ ++ vacall-loongarch64-linux.s vacall-loongarch64-macro.S \ + vacall-m68k-linux.s vacall-m68k-sun.s vacall-m68k.mit.S vacall-m68k.motorola.S \ + vacall-mipseb-linux.s vacall-mipsel-linux.s vacall-mipseb-macro.S vacall-mipsel-macro.S \ + vacall-mipsn32eb-linux.s vacall-mipsn32el-linux.s vacall-mipsn32eb-macro.S vacall-mipsn32el-macro.S \ +diff --git a/callback/vacall_r/vacall-loongarch64-linux.s b/callback/vacall_r/vacall-loongarch64-linux.s +new file mode 100644 +index 0000000..7277364 +--- /dev/null ++++ b/callback/vacall_r/vacall-loongarch64-linux.s +@@ -0,0 +1,217 @@ ++ .file "vacall-loongarch64.c" ++ .text ++ .align 2 ++ .type callback_receiver, @function ++callback_receiver: ++.LFB0 = . ++ .cfi_startproc ++ addi.d $sp,$sp,-288 ++ .cfi_def_cfa_offset 288 ++ st.d $ra,$sp,264 ++ st.d $fp,$sp,256 ++ st.d $s0,$sp,248 ++ .cfi_offset 1, -24 ++ .cfi_offset 22, -32 ++ .cfi_offset 23, -40 ++ addi.d $fp,$sp,272 ++ .cfi_def_cfa 22, 16 ++ addi.d $t1,$fp,16 ++ ldptr.d $t0,$t8,0 ++ st.d $a7,$fp,8 ++ st.d $a1,$fp,-192 ++ st.d $a2,$fp,-184 ++ st.d $a3,$fp,-176 ++ st.d $a4,$fp,-168 ++ st.d $a5,$fp,-160 ++ st.d $a6,$fp,-152 ++ st.d $a7,$fp,-144 ++ fst.s $f0,$fp,-132 ++ fst.s $f1,$fp,-128 ++ fst.s $f2,$fp,-124 ++ fst.s $f3,$fp,-120 ++ fst.s $f4,$fp,-116 ++ fst.s $f5,$fp,-112 ++ fst.s $f6,$fp,-108 ++ fst.s $f7,$fp,-104 ++ fst.d $f0,$fp,-96 ++ fst.d $f1,$fp,-88 ++ fst.d $f2,$fp,-80 ++ fst.d $f3,$fp,-72 ++ fst.d $f4,$fp,-64 ++ fst.d $f5,$fp,-56 ++ fst.d $f6,$fp,-48 ++ fst.d $f7,$fp,-40 ++ st.d $t1,$fp,-240 ++ st.d $a0,$fp,-200 ++ st.w $zero,$fp,-264 ++ st.d $zero,$fp,-232 ++ ld.d $a0,$t8,8 ++ st.w $zero,$fp,-224 ++ st.w $zero,$fp,-208 ++ st.w $zero,$fp,-136 ++ addi.d $a1,$fp,-264 ++ jirl $ra,$t0,0 ++ ldptr.w $t0,$fp,-224 ++ beqz $t0,.L1 ++ addi.w $t1,$r0,15 # 0xf ++ bgtu $t0,$t1,.L1 ++ la.local $t1,.L4 ++ slli.d $t0,$t0,3 ++ ldx.d $t0,$t1,$t0 ++ add.d $t1,$t1,$t0 ++ jr $t1 ++ .section .rodata ++ .align 3 ++ .align 2 ++.L4: ++ .dword .L1-.L4 ++ .dword .L15-.L4 ++ .dword .L15-.L4 ++ .dword .L14-.L4 ++ .dword .L13-.L4 ++ .dword .L12-.L4 ++ .dword .L11-.L4 ++ .dword .L10-.L4 ++ .dword .L5-.L4 ++ .dword .L5-.L4 ++ .dword .L5-.L4 ++ .dword .L5-.L4 ++ .dword .L7-.L4 ++ .dword .L6-.L4 ++ .dword .L5-.L4 ++ .dword .L3-.L4 ++ .text ++.L5: ++ ld.d $a0,$fp,-256 ++.L1: ++ ld.d $ra,$sp,264 ++ .cfi_remember_state ++ .cfi_restore 1 ++ ld.d $fp,$sp,256 ++ .cfi_restore 22 ++ ld.d $s0,$sp,248 ++ .cfi_restore 23 ++ addi.d $sp,$sp,288 ++ .cfi_def_cfa 3, 0 ++ jr $ra ++.L15: ++ .cfi_restore_state ++ ld.b $a0,$fp,-256 ++ b .L1 ++.L10: ++ ld.wu $a0,$fp,-256 ++ b .L1 ++.L14: ++ ld.bu $a0,$fp,-256 ++ b .L1 ++.L13: ++ ld.h $a0,$fp,-256 ++ b .L1 ++.L12: ++ ld.hu $a0,$fp,-256 ++ b .L1 ++.L11: ++ ldptr.w $a0,$fp,-256 ++ b .L1 ++.L6: ++ fld.d $f0,$fp,-256 ++ b .L1 ++.L7: ++ fld.s $f0,$fp,-256 ++ b .L1 ++.L3: ++ ldptr.w $t0,$fp,-264 ++ andi $t0,$t0,1024 ++ beqz $t0,.L1 ++ ld.d $t0,$fp,-216 ++ addi.w $t1,$r0,15 # 0xf ++ addi.d $t2,$t0,-1 ++ bgtu $t2,$t1,.L1 ++ ld.d $t1,$fp,-232 ++ addi.w $t3,$r0,-8 # 0xfffffffffffffff8 ++ addi.w $s0,$r0,8 # 0x8 ++ andi $t2,$t1,7 ++ add.d $t4,$t0,$t2 ++ and $t1,$t1,$t3 ++ ldptr.d $t5,$t1,0 ++ slli.w $t3,$t4,3 ++ slli.w $t6,$t2,3 ++ bgtu $t0,$s0,.L17 ++ bgtu $t4,$s0,.L18 ++ addi.w $t3,$t3,-1 ++ addi.w $a0,$r0,2 # 0x2 ++ sll.d $a0,$a0,$t3 ++ addi.d $a0,$a0,-1 ++ and $a0,$a0,$t5 ++ sra.d $a0,$a0,$t6 ++ b .L1 ++.L17: ++ addi.w $t0,$r0,16 # 0x10 ++ ld.d $t7,$t1,8 ++ sra.d $t5,$t5,$t6 ++ sub.d $s0,$s0,$t2 ++ bgtu $t4,$t0,.L19 ++ addi.w $t3,$t3,-65 ++ addi.w $a1,$r0,2 # 0x2 ++ sll.d $a1,$a1,$t3 ++ addi.d $a1,$a1,-1 ++ and $a1,$a1,$t7 ++ slli.w $s0,$s0,2 ++ sll.d $a0,$a1,$s0 ++ sll.d $a0,$a0,$s0 ++ or $a0,$a0,$t5 ++ sra.d $a1,$a1,$t6 ++ b .L1 ++.L18: ++ ld.d $t4,$t1,8 ++ addi.w $t3,$t3,-65 ++ addi.w $t0,$r0,2 # 0x2 ++ sll.d $t0,$t0,$t3 ++ addi.d $t0,$t0,-1 ++ sub.d $t1,$s0,$t2 ++ and $t0,$t0,$t4 ++ slli.w $t1,$t1,3 ++ sll.d $t0,$t0,$t1 ++ sra.d $t5,$t5,$t6 ++ or $a0,$t0,$t5 ++ b .L1 ++.L19: ++ ld.d $t2,$t1,16 ++ addi.w $t3,$t3,-129 ++ addi.w $t0,$r0,2 # 0x2 ++ sll.d $t0,$t0,$t3 ++ addi.d $t0,$t0,-1 ++ slli.w $t1,$s0,3 ++ and $t0,$t0,$t2 ++ sll.d $a0,$t7,$t1 ++ sll.d $t0,$t0,$t1 ++ sra.d $t7,$t7,$t6 ++ or $a0,$a0,$t5 ++ or $a1,$t0,$t7 ++ b .L1 ++ .cfi_endproc ++.LFE0: ++ .size callback_receiver, .-callback_receiver ++ .align 2 ++ .globl callback_get_receiver ++ .type callback_get_receiver, @function ++callback_get_receiver: ++.LFB1 = . ++ .cfi_startproc ++ addi.d $sp,$sp,-16 ++ .cfi_def_cfa_offset 16 ++ st.d $fp,$sp,8 ++ .cfi_offset 22, -8 ++ addi.d $fp,$sp,16 ++ .cfi_def_cfa 22, 0 ++ ld.d $fp,$sp,8 ++ .cfi_restore 22 ++ la.local $a0,callback_receiver ++ addi.d $sp,$sp,16 ++ .cfi_def_cfa_register 3 ++ jr $ra ++ .cfi_endproc ++.LFE1: ++ .size callback_get_receiver, .-callback_get_receiver ++ .ident "GCC: (GNU) 12.0.1 20220317 (experimental)" ++ .section .note.GNU-stack,"",@progbits +diff --git a/callback/vacall_r/vacall-loongarch64-macro.S b/callback/vacall_r/vacall-loongarch64-macro.S +new file mode 100644 +index 0000000..3b3b3ad +--- /dev/null ++++ b/callback/vacall_r/vacall-loongarch64-macro.S +@@ -0,0 +1,218 @@ ++ .file "vacall-loongarch64.c" ++ .text ++ .align 2 ++ .type callback_receiver, @function ++callback_receiver: ++.LFB0 = . ++ .cfi_startproc ++ addi.d $sp,$sp,-288 ++ .cfi_def_cfa_offset 288 ++ st.d $ra,$sp,264 ++ st.d $fp,$sp,256 ++ st.d $s0,$sp,248 ++ .cfi_offset 1, -24 ++ .cfi_offset 22, -32 ++ .cfi_offset 23, -40 ++ addi.d $fp,$sp,272 ++ .cfi_def_cfa 22, 16 ++ addi.d $t1,$fp,16 ++ ldptr.d $t0,$t8,0 ++ st.d $a7,$fp,8 ++ st.d $a1,$fp,-192 ++ st.d $a2,$fp,-184 ++ st.d $a3,$fp,-176 ++ st.d $a4,$fp,-168 ++ st.d $a5,$fp,-160 ++ st.d $a6,$fp,-152 ++ st.d $a7,$fp,-144 ++ fst.s $f0,$fp,-132 ++ fst.s $f1,$fp,-128 ++ fst.s $f2,$fp,-124 ++ fst.s $f3,$fp,-120 ++ fst.s $f4,$fp,-116 ++ fst.s $f5,$fp,-112 ++ fst.s $f6,$fp,-108 ++ fst.s $f7,$fp,-104 ++ fst.d $f0,$fp,-96 ++ fst.d $f1,$fp,-88 ++ fst.d $f2,$fp,-80 ++ fst.d $f3,$fp,-72 ++ fst.d $f4,$fp,-64 ++ fst.d $f5,$fp,-56 ++ fst.d $f6,$fp,-48 ++ fst.d $f7,$fp,-40 ++ st.d $t1,$fp,-240 ++ st.d $a0,$fp,-200 ++ st.w $zero,$fp,-264 ++ st.d $zero,$fp,-232 ++ ld.d $a0,$t8,8 ++ st.w $zero,$fp,-224 ++ st.w $zero,$fp,-208 ++ st.w $zero,$fp,-136 ++ addi.d $a1,$fp,-264 ++ jirl $ra,$t0,0 ++ ldptr.w $t0,$fp,-224 ++ beqz $t0,.L1 ++ addi.w $t1,$r0,15 # 0xf ++ bgtu $t0,$t1,.L1 ++ la.local $t1,.L4 ++ slli.d $t0,$t0,3 ++ ldx.d $t0,$t1,$t0 ++ add.d $t1,$t1,$t0 ++ jr $t1 ++ .section .rodata ++ .align 3 ++ .align 2 ++.L4: ++ .dword .L1-.L4 ++ .dword .L15-.L4 ++ .dword .L15-.L4 ++ .dword .L14-.L4 ++ .dword .L13-.L4 ++ .dword .L12-.L4 ++ .dword .L11-.L4 ++ .dword .L10-.L4 ++ .dword .L5-.L4 ++ .dword .L5-.L4 ++ .dword .L5-.L4 ++ .dword .L5-.L4 ++ .dword .L7-.L4 ++ .dword .L6-.L4 ++ .dword .L5-.L4 ++ .dword .L3-.L4 ++ .text ++.L5: ++ ld.d $a0,$fp,-256 ++.L1: ++ ld.d $ra,$sp,264 ++ .cfi_remember_state ++ .cfi_restore 1 ++ ld.d $fp,$sp,256 ++ .cfi_restore 22 ++ ld.d $s0,$sp,248 ++ .cfi_restore 23 ++ addi.d $sp,$sp,288 ++ .cfi_def_cfa 3, 0 ++ jr $ra ++.L15: ++ .cfi_restore_state ++ ld.b $a0,$fp,-256 ++ b .L1 ++.L10: ++ ld.wu $a0,$fp,-256 ++ b .L1 ++.L14: ++ ld.bu $a0,$fp,-256 ++ b .L1 ++.L13: ++ ld.h $a0,$fp,-256 ++ b .L1 ++.L12: ++ ld.hu $a0,$fp,-256 ++ b .L1 ++.L11: ++ ldptr.w $a0,$fp,-256 ++ b .L1 ++.L6: ++ fld.d $f0,$fp,-256 ++ b .L1 ++.L7: ++ fld.s $f0,$fp,-256 ++ b .L1 ++.L3: ++ ldptr.w $t0,$fp,-264 ++ andi $t0,$t0,1024 ++ beqz $t0,.L1 ++ ld.d $t0,$fp,-216 ++ addi.w $t1,$r0,15 # 0xf ++ addi.d $t2,$t0,-1 ++ bgtu $t2,$t1,.L1 ++ ld.d $t1,$fp,-232 ++ addi.w $t3,$r0,-8 # 0xfffffffffffffff8 ++ addi.w $s0,$r0,8 # 0x8 ++ andi $t2,$t1,7 ++ add.d $t4,$t0,$t2 ++ and $t1,$t1,$t3 ++ ldptr.d $t5,$t1,0 ++ slli.w $t3,$t4,3 ++ slli.w $t6,$t2,3 ++ bgtu $t0,$s0,.L17 ++ bgtu $t4,$s0,.L18 ++ addi.w $t3,$t3,-1 ++ addi.w $a0,$r0,2 # 0x2 ++ sll.d $a0,$a0,$t3 ++ addi.d $a0,$a0,-1 ++ and $a0,$a0,$t5 ++ sra.d $a0,$a0,$t6 ++ b .L1 ++.L17: ++ addi.w $t0,$r0,16 # 0x10 ++ ld.d $t7,$t1,8 ++ sra.d $t5,$t5,$t6 ++ sub.d $s0,$s0,$t2 ++ bgtu $t4,$t0,.L19 ++ addi.w $t3,$t3,-65 ++ addi.w $a1,$r0,2 # 0x2 ++ sll.d $a1,$a1,$t3 ++ addi.d $a1,$a1,-1 ++ and $a1,$a1,$t7 ++ slli.w $s0,$s0,2 ++ sll.d $a0,$a1,$s0 ++ sll.d $a0,$a0,$s0 ++ or $a0,$a0,$t5 ++ sra.d $a1,$a1,$t6 ++ b .L1 ++.L18: ++ ld.d $t4,$t1,8 ++ addi.w $t3,$t3,-65 ++ addi.w $t0,$r0,2 # 0x2 ++ sll.d $t0,$t0,$t3 ++ addi.d $t0,$t0,-1 ++ sub.d $t1,$s0,$t2 ++ and $t0,$t0,$t4 ++ slli.w $t1,$t1,3 ++ sll.d $t0,$t0,$t1 ++ sra.d $t5,$t5,$t6 ++ or $a0,$t0,$t5 ++ b .L1 ++.L19: ++ ld.d $t2,$t1,16 ++ addi.w $t3,$t3,-129 ++ addi.w $t0,$r0,2 # 0x2 ++ sll.d $t0,$t0,$t3 ++ addi.d $t0,$t0,-1 ++ slli.w $t1,$s0,3 ++ and $t0,$t0,$t2 ++ sll.d $a0,$t7,$t1 ++ sll.d $t0,$t0,$t1 ++ sra.d $t7,$t7,$t6 ++ or $a0,$a0,$t5 ++ or $a1,$t0,$t7 ++ b .L1 ++ .cfi_endproc ++.LFE0: ++ .size callback_receiver, .-callback_receiver ++ .align 2 ++ .globl callback_get_receiver ++ .type callback_get_receiver, @function ++callback_get_receiver: ++.LFB1 = . ++ .cfi_startproc ++ addi.d $sp,$sp,-16 ++ .cfi_def_cfa_offset 16 ++ st.d $fp,$sp,8 ++ .cfi_offset 22, -8 ++ addi.d $fp,$sp,16 ++ .cfi_def_cfa 22, 0 ++ ld.d $fp,$sp,8 ++ .cfi_restore 22 ++ la.local $a0,callback_receiver ++ addi.d $sp,$sp,16 ++ .cfi_def_cfa_register 3 ++ jr $ra ++ .cfi_endproc ++.LFE1: ++ .size callback_get_receiver, .-callback_get_receiver ++#if defined __linux__ || defined __FreeBSD__ || defined __FreeBSD_kernel__ || defined __DragonFly__ ++ .section .note.GNU-stack,"",@progbits ++#endif +diff --git a/callback/vacall_r/vacall_r.h b/callback/vacall_r/vacall_r.h +index 08b0f09..1bd3638 100644 +--- a/callback/vacall_r/vacall_r.h ++++ b/callback/vacall_r/vacall_r.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 1995-2021 Bruno Haible ++ * Copyright 1995-2022 Bruno Haible + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -175,7 +175,7 @@ enum __VA_alist_flags + #if defined(__sparc__) && !defined(__sparc64__) && defined(__sun) && (defined(__SUNPRO_C) || defined(__SUNPRO_CC)) /* SUNWspro cc or CC */ + __VA_SUNPROCC_STRUCT_RETURN, + #else +-#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || (defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) || defined(__riscv64__) ++#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || (defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__) + __VA_SMALL_STRUCT_RETURN | + #endif + #if defined(__GNUC__) && !((defined(__mipsn32__) || defined(__mips64__)) && ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3))) +@@ -292,7 +292,7 @@ enum __VA_alist_flags + #endif + + /* These are for internal use only */ +-#if defined(__i386__) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64__) || (defined(__s390__) && !defined(__s390x__)) || defined(__riscv64__) ++#if defined(__i386__) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64__) || (defined(__s390__) && !defined(__s390x__)) || defined(__riscv64__) || defined(__loongarch64__) + __VA_REGISTER_STRUCT_RETURN = 1<<10, + #endif + #if defined(__mipsn32__) || defined(__mips64__) +diff --git a/common/asm-loongarch.sh b/common/asm-loongarch.sh +new file mode 100755 +index 0000000..ccab8c1 +--- /dev/null ++++ b/common/asm-loongarch.sh +@@ -0,0 +1,44 @@ ++#!/bin/sh ++# Translate the assembler syntax of LoongArch assembler programs ++# Usage: asm-loongarch.sh < loongarchlinux-asm-file > portable-asm-file ++# The portable-asm-file has to be ++# 1. preprocessed, ++# 2. grep -v '^ *#line' | grep -v '^#' ++# 3. sed -e 's,% ,%,g' -e 's,//.*$,,' ++ ++# Copyright (C) 2017-2022 Bruno Haible ++# ++# This program is free software: you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++tmpscript1=sed$$tmp1 ++tmpscript2=sed$$tmp2 ++tmpremove='rm -f $tmpscript1 $tmpscript2' ++trap "$tmpremove" 1 2 15 ++ ++cat > $tmpscript1 << \EOF ++# ----------- Remove gcc self-identification ++/gcc2_compiled/d ++/gnu_compiled_c/d ++/\.ident/d ++EOF ++ ++cat > $tmpscript2 << \EOF ++# ----------- Introduce macro syntax for assembler pseudo-ops ++/\.section\([ ]\+\).*GNU-stack/d ++EOF ++ ++sed -f $tmpscript1 | \ ++sed -f $tmpscript2 ++ ++eval "$tmpremove" +diff --git a/ffcall-abi.h b/ffcall-abi.h +index 117fd65..4de0366 100644 +--- a/ffcall-abi.h ++++ b/ffcall-abi.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2017-2019 Bruno Haible ++ * Copyright 2017-2022 Bruno Haible + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -249,3 +249,9 @@ + #define __riscv64__ 1 + #endif + #endif ++ ++#ifndef __loongarch64__ ++#if defined(__loongarch64) && defined(__LP64__) ++#define __loongarch64__ 1 ++#endif ++#endif +diff --git a/trampoline/Makefile.devel b/trampoline/Makefile.devel +index ec9e0ae..bc3c6ff 100644 +--- a/trampoline/Makefile.devel ++++ b/trampoline/Makefile.devel +@@ -79,7 +79,7 @@ cache-powerpc64-elfv2-macro.S : cache-powerpc64-elfv2-linux.s ../common/asm-powe + OLDGCCFLAGS = -O2 -fomit-frame-pointer + ASPREFIX = /usr1/gnu/lib + +-proto-precompiled : proto-i386.s proto-m68k.s proto-mips.s proto-mipsn32.s proto-mips64.s proto-sparc.s proto-sparc64.s proto-alpha.s proto-hppa.s proto-hppa64.s proto-arm.s proto-arm64.s proto-powerpc-aix.s proto-powerpc-sysv4.s proto-powerpc-macos.s proto-powerpc64-aix.s proto-powerpc64-elfv2.s proto-ia64.s proto-x86_64.s proto-x86_64-x32.s proto-s390.s proto-s390x.s proto-riscv32.s proto-riscv64.s ++proto-precompiled : proto-i386.s proto-m68k.s proto-mips.s proto-mipsn32.s proto-mips64.s proto-sparc.s proto-sparc64.s proto-alpha.s proto-hppa.s proto-hppa64.s proto-arm.s proto-arm64.s proto-powerpc-aix.s proto-powerpc-sysv4.s proto-powerpc-macos.s proto-powerpc64-aix.s proto-powerpc64-elfv2.s proto-ia64.s proto-x86_64.s proto-x86_64-x32.s proto-s390.s proto-s390x.s proto-riscv32.s proto-riscv64.s proto-loongarch64.s + + proto-i386.s : proto.c + $(GCC) -V 2.7.2 -b i486-linuxaout $(OLDGCCFLAGS) -D__i386__ -S proto.c -o $@ +@@ -153,6 +153,9 @@ proto-riscv32.s : proto.c + proto-riscv64.s : proto64.c + $(CROSS_TOOL) riscv64-linux gcc-7.3.0 $(OLDGCCFLAGS) -D__riscv64__ -S proto64.c -o $@ + ++proto-loongarch64.s : proto64.c ++ $(CROSS_TOOL) loongarch64-linux gcc $(OLDGCCFLAGS) -D__loongarch64__ -S proto64.c -o $@ ++ + tramp-i386.o : tramp-i386.s + $(ASPREFIX)/i486-linux/bin/as tramp-i386.s -o $@ + +@@ -215,3 +218,6 @@ tramp-riscv32.o : tramp-riscv32.s + + tramp-riscv64.o : tramp-riscv64.s + $(CROSS_TOOL) riscv64-linux as tramp-riscv64.s -o $@ ++ ++tramp-loongarch64.o : tramp-loongarch64.s ++ $(CROSS_TOOL) loongarch64-linux as tramp-loongarch64.s -o $@ +diff --git a/trampoline/PLATFORMS b/trampoline/PLATFORMS +index ef73e74..eeaf877 100644 +--- a/trampoline/PLATFORMS ++++ b/trampoline/PLATFORMS +@@ -37,4 +37,5 @@ Supported CPUs: (Put the GNU config.guess values here.) + s390x s390x-ibm-linux + riscv32 riscv32-unknown-linux + riscv64 riscv64-unknown-linux ++ loongarch64 loongarch64-unknown-linux + +diff --git a/trampoline/proto-loongarch64.s b/trampoline/proto-loongarch64.s +new file mode 100644 +index 0000000..2446569 +--- /dev/null ++++ b/trampoline/proto-loongarch64.s +@@ -0,0 +1,44 @@ ++ .file "proto64.c" ++ .text ++ .align 2 ++ .globl tramp ++ .type tramp, @function ++tramp: ++ lu12i.w $r12,324501504>>12 # 0x13578000 ++ ori $r12,$r12,1893 ++ lu32i.d $r12,0x4567800000000>>32 ++ lu52i.d $r12,$r12,0x1230000000000000>>52 ++ addi.w $r15,$r0,33 # 0x21 ++ st.b $r15,$r12,1 ++ addi.w $r15,$r0,98 # 0x62 ++ lu12i.w $r13,-559878144>>12 # 0xffffffffdea0f000 ++ st.b $r15,$r12,2 ++ addi.w $r15,$r0,67 # 0x43 ++ addi.w $r14,$r0,85 # 0x55 ++ ori $r13,$r13,4011 ++ st.b $r15,$r12,3 ++ addi.w $r15,$r0,17 # 0x11 ++ lu32i.d $r13,0xfffebec000000000>>32 ++ st.b $r14,$r12,0 ++ st.b $r15,$r12,4 ++ st.b $r14,$r12,6 ++ addi.w $r15,$r0,71 # 0x47 ++ addi.w $r14,$r0,115 # 0x73 ++ st.b $r15,$r12,5 ++ st.b $r14,$r12,7 ++ lu52i.d $r13,$r13,0xbab0000000000000>>52 ++ jr $r13 ++ .size tramp, .-tramp ++ .section .text.unlikely,"ax",@progbits ++ .align 2 ++ .globl jump ++ .type jump, @function ++jump: ++ lu12i.w $r12,-559878144>>12 # 0xffffffffdea0f000 ++ ori $r12,$r12,4011 ++ lu32i.d $r12,0xfffebec000000000>>32 ++ lu52i.d $r12,$r12,0xbab0000000000000>>52 ++ jr $r12 ++ .size jump, .-jump ++ .ident "GCC: (GNU) 12.0.0 20211224 (experimental)" ++ .section .note.GNU-stack,"",@progbits +diff --git a/trampoline/tramp-loongarch64.s b/trampoline/tramp-loongarch64.s +new file mode 100644 +index 0000000..52d887f +--- /dev/null ++++ b/trampoline/tramp-loongarch64.s +@@ -0,0 +1,40 @@ ++/* Trampoline for LoongArch CPU in 64-bit mode */ ++ ++/* ++ * Copyright 1996-2022 Bruno Haible ++ * ++ * This program is free software: you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program. If not, see . ++ */ ++ ++/* Available registers: $r12 ... $r20. */ ++ ++ .text ++ .align 3 ++ .globl tramp ++ .type tramp, @function ++tramp: ++ pcaddu12i $r12, 0 ++ /* Now our own address (=tramp) is in $r12. */ ++ ld.d $r13, $r12, 24 /* $LC0-tramp */ ++ ld.d $r14, $r12, 32 /* $LC1-tramp */ ++ st.d $r14, $r13, 0 ++ ld.d $r12, $r12, 40 /* $LC2-tramp */ ++ jr $r12 ++$LC0: ++ .dword 0x1234567813578765 ++$LC1: ++ .dword 0x7355471143622155 ++$LC2: ++ .dword 0xbabebec0dea0ffab ++ .size tramp, .-tramp +diff --git a/trampoline/trampoline.c b/trampoline/trampoline.c +index 9b79e0d..fdcbcd4 100644 +--- a/trampoline/trampoline.c ++++ b/trampoline/trampoline.c +@@ -1,7 +1,7 @@ + /* Trampoline construction */ + + /* +- * Copyright 1995-2021 Bruno Haible ++ * Copyright 1995-2022 Bruno Haible + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -330,6 +330,10 @@ static int open_noinherit (const char *filename, int flags, int mode) + #define TRAMP_LENGTH 48 + #define TRAMP_ALIGN 8 + #endif ++#if defined(__loongarch64__) ++#define TRAMP_LENGTH 48 ++#define TRAMP_ALIGN 8 ++#endif + + #ifndef TRAMP_BIAS + #define TRAMP_BIAS 0 +@@ -1514,6 +1518,42 @@ trampoline_function_t alloc_trampoline (trampoline_function_t address, void** va + #define tramp_data(function) \ + (*(unsigned long *) (function +24)) + #endif ++#ifdef __loongarch64__ ++ /* function: ++ * pcaddu12i $r12,0 1C00000C ++ * ld.d $r13,$r12,24 28C0618D ++ * ld.d $r14,$r12,32 28C0818E ++ * st.d $r14,$r13,0 29C001AE ++ * ld.d $r12,$r12,40 28C0A18C ++ * jirl $r0,$r12,0 4C000180 ++ * .dword ++ * .dword ++ * .dword
++ */ ++ *(unsigned int *) (function + 0) = 0x1C00000C; ++ *(unsigned int *) (function + 4) = 0x28C0618D; ++ *(unsigned int *) (function + 8) = 0x28C0818E; ++ *(unsigned int *) (function +12) = 0x29C001AE; ++ *(unsigned int *) (function +16) = 0x28C0A18C; ++ *(unsigned int *) (function +20) = 0x4C000180; ++ *(unsigned long *) (function +24) = (unsigned long) variable; ++ *(unsigned long *) (function +32) = (unsigned long) data; ++ *(unsigned long *) (function +40) = (unsigned long) address; ++#define TRAMP_CODE_LENGTH 24 ++#define is_tramp(function) \ ++ *(unsigned int *) (function + 0) == 0x1C00000C && \ ++ *(unsigned int *) (function + 4) == 0x28C0618D && \ ++ *(unsigned int *) (function + 8) == 0x28C0818E && \ ++ *(unsigned int *) (function +12) == 0x29C001AE && \ ++ *(unsigned int *) (function +16) == 0x28C0A18C && \ ++ *(unsigned int *) (function +20) == 0x4C000180 ++#define tramp_address(function) \ ++ *(unsigned long *) (function +40) ++#define tramp_variable(function) \ ++ *(unsigned long *) (function +24) ++#define tramp_data(function) \ ++ *(unsigned long *) (function +32) ++#endif + + /* 3. Set memory protection to "executable" */ + +@@ -1657,6 +1697,10 @@ trampoline_function_t alloc_trampoline (trampoline_function_t address, void** va + __asm__ __volatile__ ("fence.i"); + #endif + #endif ++#if defined(__loongarch64__) ++ /* Use the GCC built-in. It expands to 'ibar 0'. */ ++ __clear_cache((void*)function_x,(void*)(function_x+TRAMP_CODE_LENGTH)); ++#endif + #endif + #endif + +diff --git a/vacall/Makefile.devel b/vacall/Makefile.devel +index 1c78798..7e41aad 100644 +--- a/vacall/Makefile.devel ++++ b/vacall/Makefile.devel +@@ -25,7 +25,8 @@ precompiled : \ + vacall-ia64-macro.S \ + vacall-x86_64-macro.S vacall-x86_64-x32-linux.s vacall-x86_64-windows-macro.S \ + vacall-s390-macro.S vacall-s390x-macro.S \ +- vacall-riscv32-ilp32d-macro.S vacall-riscv64-lp64d-macro.S ++ vacall-riscv32-ilp32d-macro.S vacall-riscv64-lp64d-macro.S \ ++ vacall-loongarch64-macro.S + + + vacall-i386-linux.s : vacall-i386.c vacall-internal.h vacall.h $(THISFILE) +@@ -278,6 +279,13 @@ vacall-riscv64-lp64d-macro.S : vacall-riscv64-lp64d-linux.s ../common/asm-riscv. + (../common/asm-riscv.sh < vacall-riscv64-lp64d-linux.s ; cat ../common/noexecstack.h) > vacall-riscv64-lp64d-macro.S + + ++vacall-loongarch64-linux.s : vacall-loongarch64.c vacall-internal.h vacall.h $(THISFILE) ++ $(CROSS_TOOL) loongarch64-linux gcc $(GCCFLAGS) -D__loongarch64__ -S vacall-loongarch64.c -o vacall-loongarch64-linux.s ++ ++vacall-loongarch64-macro.S : vacall-loongarch64-linux.s ../common/asm-loongarch.sh ../common/noexecstack.h $(THISFILE) ++ (../common/asm-loongarch.sh < vacall-loongarch64-linux.s ; cat ../common/noexecstack.h) > vacall-loongarch64-macro.S ++ ++ + # --------------- Rules for debugging test failures --------------- + + tests : tests-i386.s tests-m68k.s tests-mips.s tests-sparc.s tests-alpha.s tests-hppa.s tests-arm.s tests-powerpc.s tests-powerpc64.s tests-ia64.s tests-x86_64.s +diff --git a/vacall/Makefile.in b/vacall/Makefile.in +index ec31846..0101d7d 100644 +--- a/vacall/Makefile.in ++++ b/vacall/Makefile.in +@@ -236,6 +236,12 @@ vacall-riscv64-lp64d.@OBJEXT@ : vacall-riscv64-lp64d.s + vacall-riscv64-lp64d.s : $(srcdir)/vacall-riscv64-lp64d-macro.S + $(CPP) $(ASPFLAGS) $(srcdir)/vacall-riscv64-lp64d-macro.S | grep -v '^ *#line' | grep -v '^#' | sed -e 's,% ,%,g' -e 's,//.*$$,,' > vacall-riscv64-lp64d.s + ++vacall-loongarch64.@OBJEXT@ : vacall-loongarch64.s ++ $(CC) @GCC_X_NONE@ -c vacall-loongarch64.s ++ ++vacall-loongarch64.s : $(srcdir)/vacall-loongarch64-macro.S ++ $(CPP) $(ASPFLAGS) $(srcdir)/vacall-loongarch64-macro.S | grep -v '^ *#line' | grep -v '^#' | sed -e 's,% ,%,g' -e 's,//.*$$,,' > vacall-loongarch64.s ++ + vacall-libapi.@OBJEXT@ : $(srcdir)/vacall-libapi.c $(srcdir)/vacall-internal.h $(srcdir)/vacall.h ../config.h + $(CC) $(INCLUDES_WITH_GNULIB) $(CPPFLAGS) $(CFLAGS) @DISABLE_TYPE_BASED_ALIASING@ -c $(srcdir)/vacall-libapi.c + +@@ -319,7 +325,7 @@ mostlyclean : clean + + clean : force + $(RM) *.@OBJEXT@ *.a core +- $(RM) vacall-i386.s vacall-sparc.s vacall-sparc64.s vacall-m68k.s vacall-mips.s vacall-mipsn32.s vacall-mips64.s vacall-alpha.s vacall-hppa.s vacall-hppa64.s vacall-arm.s vacall-armhf.s vacall-arm64.s vacall-powerpc.s vacall-powerpc64.s vacall-powerpc64-elfv2.s vacall-ia64.s vacall-x86_64.s vacall-x86_64.asm vacall-x86_64-x32.s vacall-s390.s vacall-s390x.s vacall-riscv32-ilp32d.s vacall-riscv64-lp64d.s ++ $(RM) vacall-i386.s vacall-sparc.s vacall-sparc64.s vacall-m68k.s vacall-mips.s vacall-mipsn32.s vacall-mips64.s vacall-alpha.s vacall-hppa.s vacall-hppa64.s vacall-arm.s vacall-armhf.s vacall-arm64.s vacall-powerpc.s vacall-powerpc64.s vacall-powerpc64-elfv2.s vacall-ia64.s vacall-x86_64.s vacall-x86_64.asm vacall-x86_64-x32.s vacall-s390.s vacall-s390x.s vacall-riscv32-ilp32d.s vacall-riscv64-lp64d.s vacall-loongarch64.s + $(RM) minitests.@OBJEXT@ minitests.s minitests minitests.out + $(RM) minitests-c++.@OBJEXT@ minitests-c++ minitests-c++.out + $(RM) tests.@OBJEXT@ tests.s tests tests.out +@@ -346,6 +352,7 @@ SOURCE_FILES = \ + vacall-hppa64.c vacall-hppa64-linux.s vacall-hppa64-macro.S \ + vacall-i386.c vacall-i386-linux.s vacall-i386-linux-pic.s vacall-i386-macro.S \ + vacall-ia64.c vacall-ia64-linux.s vacall-ia64-macro.S \ ++ vacall-loongarch64.c vacall-loongarch64-linux.s vacall-loongarch64-macro.S \ + vacall-m68k.c vacall-m68k-linux.s vacall-m68k-sun.s vacall-m68k.mit.S vacall-m68k.motorola.S \ + vacall-mips.c vacall-mipseb-linux.s vacall-mipsel-linux.s vacall-mipseb-macro.S vacall-mipsel-macro.S \ + vacall-mipsn32.c vacall-mipsn32eb-linux.s vacall-mipsn32el-linux.s vacall-mipsn32eb-macro.S vacall-mipsn32el-macro.S \ +diff --git a/vacall/PLATFORMS b/vacall/PLATFORMS +index 01efe52..683e233 100644 +--- a/vacall/PLATFORMS ++++ b/vacall/PLATFORMS +@@ -48,4 +48,5 @@ Supported CPUs: (Put the GNU config.guess values here.) + s390x s390x-ibm-linux (gcc) + riscv32 riscv32-unknown-linux (gcc -mabi=ilp32d) + riscv64 riscv64-unknown-linux (gcc -mabi=lp64d) ++ loongarch64 loongarch64-unknown-linux (gcc) + +diff --git a/vacall/vacall-internal.h b/vacall/vacall-internal.h +index c196348..f56b238 100644 +--- a/vacall/vacall-internal.h ++++ b/vacall/vacall-internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 1995-2021 Bruno Haible ++ * Copyright 1995-2022 Bruno Haible + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -92,7 +92,7 @@ typedef struct vacall_alist + unsigned int _uint; + long _long; + unsigned long _ulong; +-#if !(defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc64__) || defined(__ia64__) || defined(__riscv64__)) ++#if !(defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc64__) || defined(__ia64__) || defined(__riscv64__) || defined(__loongarch64__)) + long long _longlong; + unsigned long long _ulonglong; + #endif +@@ -215,7 +215,7 @@ typedef struct vacall_alist + float farg[__VA_FARG_NUM]; + double darg[__VA_FARG_NUM]; + #endif +-#if defined(__riscv32__) || defined(__riscv64__) ++#if defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__) + #define __VA_IARG_NUM 8 + unsigned int ianum; + __varword iarg[__VA_IARG_NUM]; +@@ -371,7 +371,7 @@ typedef struct vacall_alist + ((LIST)->flags |= __VA_REGISTER_STRUCT_RETURN, \ + 0) + #endif +-#if defined(__hppa64__) || defined(__arm64__) || (defined(__powerpc64__) && defined(__powerpc64_elfv2__)) || defined(__x86_64_sysv__) || defined(__riscv64__) ++#if defined(__hppa64__) || defined(__arm64__) || (defined(__powerpc64__) && defined(__powerpc64_elfv2__)) || defined(__x86_64_sysv__) || defined(__riscv64__) || defined(__loongarch64__) + #define __va_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE) \ + ((TYPE_SIZE) <= 16) + /* Turn on __VA_REGISTER_STRUCT_RETURN if __VA_SMALL_STRUCT_RETURN was set +@@ -419,7 +419,7 @@ typedef struct vacall_alist + 0 \ + ) + #endif +-#if defined(__powerpc_sysv4__) || defined(__x86_64_sysv__) || defined(__s390__) || defined(__s390x__) || defined(__riscv32__) || defined(__riscv64__) ++#if defined(__powerpc_sysv4__) || defined(__x86_64_sysv__) || defined(__s390__) || defined(__s390x__) || defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__) + /* Return structure pointer is passed as first arg. */ + #define __va_start_struct2(LIST) \ + ((LIST)->raddr = (void*)((LIST)->iarg[(LIST)->ianum++]), \ +@@ -440,7 +440,7 @@ typedef struct vacall_alist + /* Padding of non-struct arguments. */ + #define __va_argsize(TYPE_SIZE) \ + (((TYPE_SIZE) + sizeof(__vaword)-1) & -(intptr_t)sizeof(__vaword)) +-#if defined(__i386__) || defined(__m68k__) || (defined(__mips__) && !defined(__mipsn32__) && !defined(__mips64__)) || (defined(__sparc__) && !defined(__sparc64__)) || defined(__alpha__) || (defined(__arm__) && !defined(__armhf__)) || defined(__arm64__) || defined(__powerpc_aix__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64_sysv__) || defined(__s390__) || defined(__s390x__) || defined(__riscv32__) || defined(__riscv64__) ++#if defined(__i386__) || defined(__m68k__) || (defined(__mips__) && !defined(__mipsn32__) && !defined(__mips64__)) || (defined(__sparc__) && !defined(__sparc64__)) || defined(__alpha__) || (defined(__arm__) && !defined(__armhf__)) || defined(__arm64__) || defined(__powerpc_aix__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64_sysv__) || defined(__s390__) || defined(__s390x__) || defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__) + /* args grow up */ + /* small structures < 1 word are adjusted depending on compiler */ + #define __va_arg_leftadjusted(LIST,TYPE_SIZE,TYPE_ALIGN) \ +@@ -625,7 +625,7 @@ typedef struct vacall_alist + (void*)__va_arg_rightadjusted(LIST,TYPE_SIZE,TYPE_ALIGN) \ + ) ) + #endif +-#if defined(__riscv32__) || defined(__riscv64__) ++#if defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__) + /* the first __VA_IARG_NUM argument words are passed in registers */ + #define __va_arg_adjusted(LIST,TYPE_SIZE,TYPE_ALIGN) \ + ((LIST)->ianum + ((TYPE_SIZE) + sizeof(__varword)-1) / sizeof(__varword) <= __VA_IARG_NUM \ +@@ -655,7 +655,7 @@ typedef struct vacall_alist + #define _va_arg_long(LIST) __va_arg(LIST,long) + #define _va_arg_ulong(LIST) __va_arg(LIST,unsigned long) + +-#if defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc64__) || defined(__ia64__) || (defined(__x86_64__) && !defined(__x86_64_x32__) && !defined(__VA_LLP64)) || defined(__s390x__) || defined(__riscv64__) ++#if defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc64__) || defined(__ia64__) || (defined(__x86_64__) && !defined(__x86_64_x32__) && !defined(__VA_LLP64)) || defined(__s390x__) || defined(__riscv64__) || defined(__loongarch64__) + /* ‘long long’ and ‘long’ are identical. */ + #define _va_arg_longlong _va_arg_long + #define _va_arg_ulonglong _va_arg_ulong +@@ -719,7 +719,7 @@ typedef struct vacall_alist + + /* Floating point arguments. */ + +-#if defined(__i386__) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64__) || defined(__s390__) || defined(__s390x__) || defined(__riscv32__) || defined(__riscv64__) ++#if defined(__i386__) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64__) || defined(__s390__) || defined(__s390x__) || defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__) + #define __va_align_double(LIST) + #endif + #if defined(__mips__) && !defined(__mipsn32__) && !defined(__mips64__) || defined(__arm__) || defined(__armhf__) +@@ -1005,7 +1005,7 @@ typedef struct vacall_alist + *(double*)((LIST)->aptr - sizeof(double)) \ + ) ) + #endif +-#if defined(__riscv32__) || defined(__riscv64__) ++#if defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__) + /* The first __VA_FARG_NUM floating-point args have been stored elsewhere. */ + #define _va_arg_float(LIST) \ + ((LIST)->fanum < __VA_FARG_NUM \ +@@ -1115,7 +1115,7 @@ typedef struct vacall_alist + (void*)__va_arg_leftadjusted(LIST,TYPE_SIZE,TYPE_ALIGN) \ + ) + #endif +-#if defined(__arm64__) || defined(__riscv32__) || defined(__riscv64__) ++#if defined(__arm64__) || defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__) + /* Small structures are passed in registers or on the stack. */ + /* Big structures are passed as pointers to caller-made local copies. */ + #define __va_arg_struct(LIST,TYPE_SIZE,TYPE_ALIGN) \ +@@ -1181,7 +1181,7 @@ typedef struct vacall_alist + (__va_return(LIST,__VAlong), (LIST)->tmp._long = (VAL)) + #define _va_return_ulong(LIST,VAL) \ + (__va_return(LIST,__VAulong), (LIST)->tmp._ulong = (VAL)) +-#if defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc64__) || defined(__ia64__) || (defined(__x86_64__) && !defined(__x86_64_x32__) && !defined(__VA_LLP64)) || defined(__riscv64__) ++#if defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm64__) || defined(__powerpc64__) || defined(__ia64__) || (defined(__x86_64__) && !defined(__x86_64_x32__) && !defined(__VA_LLP64)) || defined(__riscv64__) || defined(__loongarch64__) + #define _va_return_longlong(LIST,VAL) \ + (__va_return(LIST,__VAlonglong), (LIST)->tmp._long = (VAL)) + #define _va_return_ulonglong(LIST,VAL) \ +diff --git a/vacall/vacall-loongarch64-linux.s b/vacall/vacall-loongarch64-linux.s +new file mode 100644 +index 0000000..f430424 +--- /dev/null ++++ b/vacall/vacall-loongarch64-linux.s +@@ -0,0 +1,192 @@ ++ .file "vacall-loongarch64.c" ++ .text ++ .align 2 ++ .globl vacall_receiver ++ .type vacall_receiver, @function ++vacall_receiver: ++.LFB0 = . ++ .cfi_startproc ++ addi.d $sp,$sp,-272 ++ .cfi_def_cfa_offset 272 ++ st.d $ra,$sp,248 ++ st.d $fp,$sp,240 ++ la.global $t0,vacall_function ++ .cfi_offset 1, -24 ++ .cfi_offset 22, -32 ++ addi.d $fp,$sp,256 ++ .cfi_def_cfa 22, 16 ++ addi.d $t1,$fp,16 ++ ldptr.d $t0,$t0,0 ++ st.d $a0,$fp,-184 ++ st.d $a7,$fp,8 ++ st.d $a1,$fp,-176 ++ st.d $a2,$fp,-168 ++ st.d $a3,$fp,-160 ++ st.d $a4,$fp,-152 ++ st.d $a5,$fp,-144 ++ st.d $a6,$fp,-136 ++ st.d $a7,$fp,-128 ++ fst.s $f0,$fp,-116 ++ fst.s $f1,$fp,-112 ++ fst.s $f2,$fp,-108 ++ fst.s $f3,$fp,-104 ++ fst.s $f4,$fp,-100 ++ fst.s $f5,$fp,-96 ++ fst.s $f6,$fp,-92 ++ fst.s $f7,$fp,-88 ++ fst.d $f0,$fp,-80 ++ fst.d $f1,$fp,-72 ++ fst.d $f2,$fp,-64 ++ fst.d $f3,$fp,-56 ++ fst.d $f4,$fp,-48 ++ fst.d $f5,$fp,-40 ++ fst.d $f6,$fp,-32 ++ fst.d $f7,$fp,-24 ++ st.d $t1,$fp,-224 ++ st.w $zero,$fp,-248 ++ st.d $zero,$fp,-216 ++ st.w $zero,$fp,-208 ++ addi.d $a0,$fp,-248 ++ st.w $zero,$fp,-192 ++ st.w $zero,$fp,-120 ++ jirl $ra,$t0,0 ++ ldptr.w $t0,$fp,-208 ++ beqz $t0,.L1 ++ addi.w $t1,$r0,15 # 0xf ++ bgtu $t0,$t1,.L1 ++ slli.d $t0,$t0,3 ++ la.local $t1,.L4 ++ ldx.d $t0,$t1,$t0 ++ jr $t0 ++ .section .rodata ++ .align 3 ++ .align 2 ++.L4: ++ .dword .L1 ++ .dword .L15 ++ .dword .L15 ++ .dword .L14 ++ .dword .L13 ++ .dword .L12 ++ .dword .L11 ++ .dword .L10 ++ .dword .L5 ++ .dword .L5 ++ .dword .L5 ++ .dword .L5 ++ .dword .L7 ++ .dword .L6 ++ .dword .L5 ++ .dword .L3 ++ .text ++.L5: ++ ld.d $a0,$fp,-240 ++.L1: ++ ld.d $ra,$sp,248 ++ .cfi_remember_state ++ .cfi_restore 1 ++ ld.d $fp,$sp,240 ++ .cfi_restore 22 ++ addi.d $sp,$sp,272 ++ .cfi_def_cfa 3, 0 ++ jr $ra ++.L15: ++ .cfi_restore_state ++ ld.b $a0,$fp,-240 ++ b .L1 ++.L10: ++ ld.wu $a0,$fp,-240 ++ b .L1 ++.L14: ++ ld.bu $a0,$fp,-240 ++ b .L1 ++.L13: ++ ld.h $a0,$fp,-240 ++ b .L1 ++.L12: ++ ld.hu $a0,$fp,-240 ++ b .L1 ++.L11: ++ ldptr.w $a0,$fp,-240 ++ b .L1 ++.L6: ++ fld.d $f0,$fp,-240 ++ b .L1 ++.L7: ++ fld.s $f0,$fp,-240 ++ b .L1 ++.L3: ++ ldptr.w $t0,$fp,-248 ++ andi $t0,$t0,1024 ++ beqz $t0,.L1 ++ ld.d $t1,$fp,-200 ++ addi.w $t0,$r0,15 # 0xf ++ addi.d $t2,$t1,-1 ++ bgtu $t2,$t0,.L1 ++ ld.d $t2,$fp,-216 ++ addi.w $t3,$r0,-8 # 0xfffffffffffffff8 ++ addi.w $t0,$r0,8 # 0x8 ++ andi $t4,$t2,7 ++ add.d $t5,$t1,$t4 ++ and $t2,$t2,$t3 ++ ldptr.d $t7,$t2,0 ++ slli.w $t3,$t5,3 ++ slli.w $t6,$t4,3 ++ bgtu $t1,$t0,.L17 ++ bgtu $t5,$t0,.L18 ++ addi.w $t3,$t3,-1 ++ addi.w $a0,$r0,2 # 0x2 ++ sll.d $a0,$a0,$t3 ++ addi.d $a0,$a0,-1 ++ and $a0,$a0,$t7 ++ sra.d $a0,$a0,$t6 ++ b .L1 ++.L17: ++ addi.w $t1,$r0,16 # 0x10 ++ ld.d $t8,$t2,8 ++ sra.d $t7,$t7,$t6 ++ sub.d $t0,$t0,$t4 ++ bgtu $t5,$t1,.L19 ++ addi.w $t3,$t3,-65 ++ addi.w $a1,$r0,2 # 0x2 ++ sll.d $a1,$a1,$t3 ++ addi.d $a1,$a1,-1 ++ and $a1,$a1,$t8 ++ slli.w $t0,$t0,2 ++ sll.d $a0,$a1,$t0 ++ sll.d $a0,$a0,$t0 ++ or $a0,$a0,$t7 ++ sra.d $a1,$a1,$t6 ++ b .L1 ++.L18: ++ ld.d $t2,$t2,8 ++ addi.w $t3,$t3,-65 ++ addi.w $t1,$r0,2 # 0x2 ++ sll.d $t1,$t1,$t3 ++ addi.d $t1,$t1,-1 ++ sub.d $t0,$t0,$t4 ++ and $t1,$t1,$t2 ++ slli.w $t0,$t0,3 ++ sll.d $t0,$t1,$t0 ++ sra.d $t7,$t7,$t6 ++ or $a0,$t0,$t7 ++ b .L1 ++.L19: ++ ld.d $t2,$t2,16 ++ addi.w $t3,$t3,-129 ++ addi.w $t1,$r0,2 # 0x2 ++ sll.d $t1,$t1,$t3 ++ addi.d $t1,$t1,-1 ++ slli.w $t0,$t0,3 ++ and $t1,$t1,$t2 ++ sll.d $a0,$t8,$t0 ++ sll.d $t0,$t1,$t0 ++ sra.d $t8,$t8,$t6 ++ or $a0,$a0,$t7 ++ or $a1,$t0,$t8 ++ b .L1 ++ .cfi_endproc ++.LFE0: ++ .size vacall_receiver, .-vacall_receiver ++ .ident "GCC: (GNU) 12.0.1 20220317 (experimental)" ++ .section .note.GNU-stack,"",@progbits +diff --git a/vacall/vacall-loongarch64-macro.S b/vacall/vacall-loongarch64-macro.S +new file mode 100644 +index 0000000..0699ab6 +--- /dev/null ++++ b/vacall/vacall-loongarch64-macro.S +@@ -0,0 +1,193 @@ ++ .file "vacall-loongarch64.c" ++ .text ++ .align 2 ++ .globl vacall_receiver ++ .type vacall_receiver, @function ++vacall_receiver: ++.LFB0 = . ++ .cfi_startproc ++ addi.d $sp,$sp,-272 ++ .cfi_def_cfa_offset 272 ++ st.d $ra,$sp,248 ++ st.d $fp,$sp,240 ++ la.global $t0,vacall_function ++ .cfi_offset 1, -24 ++ .cfi_offset 22, -32 ++ addi.d $fp,$sp,256 ++ .cfi_def_cfa 22, 16 ++ addi.d $t1,$fp,16 ++ ldptr.d $t0,$t0,0 ++ st.d $a0,$fp,-184 ++ st.d $a7,$fp,8 ++ st.d $a1,$fp,-176 ++ st.d $a2,$fp,-168 ++ st.d $a3,$fp,-160 ++ st.d $a4,$fp,-152 ++ st.d $a5,$fp,-144 ++ st.d $a6,$fp,-136 ++ st.d $a7,$fp,-128 ++ fst.s $f0,$fp,-116 ++ fst.s $f1,$fp,-112 ++ fst.s $f2,$fp,-108 ++ fst.s $f3,$fp,-104 ++ fst.s $f4,$fp,-100 ++ fst.s $f5,$fp,-96 ++ fst.s $f6,$fp,-92 ++ fst.s $f7,$fp,-88 ++ fst.d $f0,$fp,-80 ++ fst.d $f1,$fp,-72 ++ fst.d $f2,$fp,-64 ++ fst.d $f3,$fp,-56 ++ fst.d $f4,$fp,-48 ++ fst.d $f5,$fp,-40 ++ fst.d $f6,$fp,-32 ++ fst.d $f7,$fp,-24 ++ st.d $t1,$fp,-224 ++ st.w $zero,$fp,-248 ++ st.d $zero,$fp,-216 ++ st.w $zero,$fp,-208 ++ addi.d $a0,$fp,-248 ++ st.w $zero,$fp,-192 ++ st.w $zero,$fp,-120 ++ jirl $ra,$t0,0 ++ ldptr.w $t0,$fp,-208 ++ beqz $t0,.L1 ++ addi.w $t1,$r0,15 # 0xf ++ bgtu $t0,$t1,.L1 ++ slli.d $t0,$t0,3 ++ la.local $t1,.L4 ++ ldx.d $t0,$t1,$t0 ++ jr $t0 ++ .section .rodata ++ .align 3 ++ .align 2 ++.L4: ++ .dword .L1 ++ .dword .L15 ++ .dword .L15 ++ .dword .L14 ++ .dword .L13 ++ .dword .L12 ++ .dword .L11 ++ .dword .L10 ++ .dword .L5 ++ .dword .L5 ++ .dword .L5 ++ .dword .L5 ++ .dword .L7 ++ .dword .L6 ++ .dword .L5 ++ .dword .L3 ++ .text ++.L5: ++ ld.d $a0,$fp,-240 ++.L1: ++ ld.d $ra,$sp,248 ++ .cfi_remember_state ++ .cfi_restore 1 ++ ld.d $fp,$sp,240 ++ .cfi_restore 22 ++ addi.d $sp,$sp,272 ++ .cfi_def_cfa 3, 0 ++ jr $ra ++.L15: ++ .cfi_restore_state ++ ld.b $a0,$fp,-240 ++ b .L1 ++.L10: ++ ld.wu $a0,$fp,-240 ++ b .L1 ++.L14: ++ ld.bu $a0,$fp,-240 ++ b .L1 ++.L13: ++ ld.h $a0,$fp,-240 ++ b .L1 ++.L12: ++ ld.hu $a0,$fp,-240 ++ b .L1 ++.L11: ++ ldptr.w $a0,$fp,-240 ++ b .L1 ++.L6: ++ fld.d $f0,$fp,-240 ++ b .L1 ++.L7: ++ fld.s $f0,$fp,-240 ++ b .L1 ++.L3: ++ ldptr.w $t0,$fp,-248 ++ andi $t0,$t0,1024 ++ beqz $t0,.L1 ++ ld.d $t1,$fp,-200 ++ addi.w $t0,$r0,15 # 0xf ++ addi.d $t2,$t1,-1 ++ bgtu $t2,$t0,.L1 ++ ld.d $t2,$fp,-216 ++ addi.w $t3,$r0,-8 # 0xfffffffffffffff8 ++ addi.w $t0,$r0,8 # 0x8 ++ andi $t4,$t2,7 ++ add.d $t5,$t1,$t4 ++ and $t2,$t2,$t3 ++ ldptr.d $t7,$t2,0 ++ slli.w $t3,$t5,3 ++ slli.w $t6,$t4,3 ++ bgtu $t1,$t0,.L17 ++ bgtu $t5,$t0,.L18 ++ addi.w $t3,$t3,-1 ++ addi.w $a0,$r0,2 # 0x2 ++ sll.d $a0,$a0,$t3 ++ addi.d $a0,$a0,-1 ++ and $a0,$a0,$t7 ++ sra.d $a0,$a0,$t6 ++ b .L1 ++.L17: ++ addi.w $t1,$r0,16 # 0x10 ++ ld.d $t8,$t2,8 ++ sra.d $t7,$t7,$t6 ++ sub.d $t0,$t0,$t4 ++ bgtu $t5,$t1,.L19 ++ addi.w $t3,$t3,-65 ++ addi.w $a1,$r0,2 # 0x2 ++ sll.d $a1,$a1,$t3 ++ addi.d $a1,$a1,-1 ++ and $a1,$a1,$t8 ++ slli.w $t0,$t0,2 ++ sll.d $a0,$a1,$t0 ++ sll.d $a0,$a0,$t0 ++ or $a0,$a0,$t7 ++ sra.d $a1,$a1,$t6 ++ b .L1 ++.L18: ++ ld.d $t2,$t2,8 ++ addi.w $t3,$t3,-65 ++ addi.w $t1,$r0,2 # 0x2 ++ sll.d $t1,$t1,$t3 ++ addi.d $t1,$t1,-1 ++ sub.d $t0,$t0,$t4 ++ and $t1,$t1,$t2 ++ slli.w $t0,$t0,3 ++ sll.d $t0,$t1,$t0 ++ sra.d $t7,$t7,$t6 ++ or $a0,$t0,$t7 ++ b .L1 ++.L19: ++ ld.d $t2,$t2,16 ++ addi.w $t3,$t3,-129 ++ addi.w $t1,$r0,2 # 0x2 ++ sll.d $t1,$t1,$t3 ++ addi.d $t1,$t1,-1 ++ slli.w $t0,$t0,3 ++ and $t1,$t1,$t2 ++ sll.d $a0,$t8,$t0 ++ sll.d $t0,$t1,$t0 ++ sra.d $t8,$t8,$t6 ++ or $a0,$a0,$t7 ++ or $a1,$t0,$t8 ++ b .L1 ++ .cfi_endproc ++.LFE0: ++ .size vacall_receiver, .-vacall_receiver ++#if defined __linux__ || defined __FreeBSD__ || defined __FreeBSD_kernel__ || defined __DragonFly__ ++ .section .note.GNU-stack,"",@progbits ++#endif +diff --git a/vacall/vacall-loongarch64.c b/vacall/vacall-loongarch64.c +new file mode 100644 +index 0000000..d5cfa7f +--- /dev/null ++++ b/vacall/vacall-loongarch64.c +@@ -0,0 +1,238 @@ ++/* vacall function for LoongArch 64-bit CPU */ ++ ++/* ++ * Copyright 1995-2022 Bruno Haible ++ * ++ * This program is free software: you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program. If not, see . ++ */ ++ ++#include "vacall-internal.h" ++ ++#ifdef REENTRANT ++#define vacall_receiver callback_receiver ++register struct { void (*vacall_function) (void*,va_alist); void* arg; } ++ * env __asm__("t8"); ++#endif ++ ++register __varword iarg1 __asm__("a0"); ++register __varword iarg2 __asm__("a1"); ++register __varword iarg3 __asm__("a2"); ++register __varword iarg4 __asm__("a3"); ++register __varword iarg5 __asm__("a4"); ++register __varword iarg6 __asm__("a5"); ++register __varword iarg7 __asm__("a6"); ++register __varword iarg8 __asm__("a7"); ++ ++register float farg1 __asm__("f0"); ++register float farg2 __asm__("f1"); ++register float farg3 __asm__("f2"); ++register float farg4 __asm__("f3"); ++register float farg5 __asm__("f4"); ++register float farg6 __asm__("f5"); ++register float farg7 __asm__("f6"); ++register float farg8 __asm__("f7"); ++ ++register double darg1 __asm__("f0"); ++register double darg2 __asm__("f1"); ++register double darg3 __asm__("f2"); ++register double darg4 __asm__("f3"); ++register double darg5 __asm__("f4"); ++register double darg6 __asm__("f5"); ++register double darg7 __asm__("f6"); ++register double darg8 __asm__("f7"); ++ ++register __varword iret __asm__("a0"); ++register __varword iret2 __asm__("a1"); ++register float fret __asm__("f0"); ++register double dret __asm__("f0"); ++ ++/* The ABI requires that the first 8 general-purpose argument words are ++ being passed in registers, even if these words belong to structs that are ++ at most 2 words large. No room is allocated for these register words on ++ the stack by the caller, but the callee allocates room for them - at the ++ right place in the stack frame, that is, above the retaddr - if and only ++ if they are part of a struct that extends to the stack and the address of ++ this struct is taken. */ ++struct gpargsequence { ++ __vaword word8; /* a7 */ ++ __vaword firststackword; ++}; ++ ++#ifdef REENTRANT ++static ++#endif ++void /* the return type is variable, not void! */ ++vacall_receiver (__vaword word1, __vaword word2, __vaword word3, __vaword word4, ++ __vaword word5, __vaword word6, __vaword word7, ++ struct gpargsequence gpargs) ++{ ++ __va_alist list; ++ /* Move the arguments passed in registers to temp storage. */ ++ list.iarg[0] = iarg1; ++ list.iarg[1] = iarg2; ++ list.iarg[2] = iarg3; ++ list.iarg[3] = iarg4; ++ list.iarg[4] = iarg5; ++ list.iarg[5] = iarg6; ++ list.iarg[6] = iarg7; ++ list.iarg[7] = iarg8; /* = gpargs.word8 */ ++ list.farg[0] = farg1; ++ list.farg[1] = farg2; ++ list.farg[2] = farg3; ++ list.farg[3] = farg4; ++ list.farg[4] = farg5; ++ list.farg[5] = farg6; ++ list.farg[6] = farg7; ++ list.farg[7] = farg8; ++ list.darg[0] = darg1; ++ list.darg[1] = darg2; ++ list.darg[2] = darg3; ++ list.darg[3] = darg4; ++ list.darg[4] = darg5; ++ list.darg[5] = darg6; ++ list.darg[6] = darg7; ++ list.darg[7] = darg8; ++ /* Prepare the va_alist. */ ++ list.flags = 0; ++ list.aptr = (long)&gpargs + sizeof(__vaword); ++ list.raddr = (void*)0; ++ list.rtype = __VAvoid; ++ list.ianum = 0; ++ list.fanum = 0; ++ /* Call vacall_function. The macros do all the rest. */ ++#ifndef REENTRANT ++ (*vacall_function) (&list); ++#else /* REENTRANT */ ++ (*env->vacall_function) (env->arg,&list); ++#endif ++ /* Put return value into proper register. */ ++ if (list.rtype == __VAvoid) { ++ } else ++ if (list.rtype == __VAchar) { ++ iret = list.tmp._char; ++ } else ++ if (list.rtype == __VAschar) { ++ iret = list.tmp._schar; ++ } else ++ if (list.rtype == __VAuchar) { ++ iret = list.tmp._uchar; ++ } else ++ if (list.rtype == __VAshort) { ++ iret = list.tmp._short; ++ } else ++ if (list.rtype == __VAushort) { ++ iret = list.tmp._ushort; ++ } else ++ if (list.rtype == __VAint) { ++ iret = list.tmp._int; ++ } else ++ if (list.rtype == __VAuint) { ++ iret = list.tmp._uint; ++ } else ++ if (list.rtype == __VAlong || list.rtype == __VAlonglong) { ++ iret = list.tmp._long; ++ } else ++ if (list.rtype == __VAulong || list.rtype == __VAulonglong) { ++ iret = list.tmp._ulong; ++ } else ++ if (list.rtype == __VAfloat) { ++ fret = list.tmp._float; ++ } else ++ if (list.rtype == __VAdouble) { ++ dret = list.tmp._double; ++ } else ++ if (list.rtype == __VAvoidp) { ++ iret = (long)list.tmp._ptr; ++ } else ++ if (list.rtype == __VAstruct) { ++ /* normal struct return convention */ ++ if (list.flags & __VA_REGISTER_STRUCT_RETURN) { ++ /* Return structs of size <= 16 in registers. */ ++ if (list.rsize > 0 && list.rsize <= 16) { ++ #if 0 /* Unoptimized */ ++ iret = (__varword)((unsigned char *) list.raddr)[0]; ++ if (list.rsize >= 2) ++ iret |= (__varword)((unsigned char *) list.raddr)[1] << 8; ++ if (list.rsize >= 3) ++ iret |= (__varword)((unsigned char *) list.raddr)[2] << 16; ++ if (list.rsize >= 4) ++ iret |= (__varword)((unsigned char *) list.raddr)[3] << 24; ++ if (list.rsize >= 5) ++ iret |= (__varword)((unsigned char *) list.raddr)[4] << 32; ++ if (list.rsize >= 6) ++ iret |= (__varword)((unsigned char *) list.raddr)[5] << 40; ++ if (list.rsize >= 7) ++ iret |= (__varword)((unsigned char *) list.raddr)[6] << 48; ++ if (list.rsize >= 8) ++ iret |= (__varword)((unsigned char *) list.raddr)[7] << 56; ++ if (list.rsize >= 9) { ++ iret2 = (__varword)((unsigned char *) list.raddr)[8]; ++ if (list.rsize >= 10) ++ iret2 |= (__varword)((unsigned char *) list.raddr)[9] << 8; ++ if (list.rsize >= 11) ++ iret2 |= (__varword)((unsigned char *) list.raddr)[10] << 16; ++ if (list.rsize >= 12) ++ iret2 |= (__varword)((unsigned char *) list.raddr)[11] << 24; ++ if (list.rsize >= 13) ++ iret2 |= (__varword)((unsigned char *) list.raddr)[12] << 32; ++ if (list.rsize >= 14) ++ iret2 |= (__varword)((unsigned char *) list.raddr)[13] << 40; ++ if (list.rsize >= 15) ++ iret2 |= (__varword)((unsigned char *) list.raddr)[14] << 48; ++ if (list.rsize >= 16) ++ iret2 |= (__varword)((unsigned char *) list.raddr)[15] << 56; ++ } ++ #else /* Optimized: fewer conditional jumps, fewer memory accesses */ ++ uintptr_t count = list.rsize; /* > 0, ≤ 2*sizeof(__varword) */ ++ __varword* wordaddr = (__varword*)((uintptr_t)list.raddr & ~(uintptr_t)(sizeof(__varword)-1)); ++ uintptr_t start_offset = (uintptr_t)list.raddr & (uintptr_t)(sizeof(__varword)-1); /* ≥ 0, < sizeof(__varword) */ ++ uintptr_t end_offset = start_offset + count; /* > 0, < 3*sizeof(__varword) */ ++ if (count <= sizeof(__varword)) { ++ /* Assign iret. */ ++ if (end_offset <= sizeof(__varword)) { ++ /* 0 < end_offset ≤ sizeof(__varword) */ ++ __varword mask0 = ((__varword)2 << (end_offset*8-1)) - 1; ++ iret = (wordaddr[0] & mask0) >> (start_offset*8); ++ } else { ++ /* sizeof(__varword) < end_offset < 2*sizeof(__varword), start_offset > 0 */ ++ __varword mask1 = ((__varword)2 << (end_offset*8-sizeof(__varword)*8-1)) - 1; ++ iret = (wordaddr[0] >> (start_offset*8)) | ((wordaddr[1] & mask1) << (sizeof(__varword)*8-start_offset*8)); ++ } ++ } else { ++ /* Assign iret, iret2. */ ++ if (end_offset <= 2*sizeof(__varword)) { ++ /* sizeof(__varword) < end_offset ≤ 2*sizeof(__varword) */ ++ __varword mask1 = ((__varword)2 << (end_offset*8-sizeof(__varword)*8-1)) - 1; ++ iret = (wordaddr[0] >> (start_offset*8)) | ((wordaddr[1] & mask1) << (sizeof(__varword)*4-start_offset*4) << (sizeof(__varword)*4-start_offset*4)); ++ iret2 = (wordaddr[1] & mask1) >> (start_offset*8); ++ } else { ++ /* 2*sizeof(__varword) < end_offset < 3*sizeof(__varword), start_offset > 0 */ ++ __varword mask2 = ((__varword)2 << (end_offset*8-2*sizeof(__varword)*8-1)) - 1; ++ iret = (wordaddr[0] >> (start_offset*8)) | (wordaddr[1] << (sizeof(__varword)*8-start_offset*8)); ++ iret2 = (wordaddr[1] >> (start_offset*8)) | ((wordaddr[2] & mask2) << (sizeof(__varword)*8-start_offset*8)); ++ } ++ } ++ #endif ++ } ++ } ++ } ++} ++ ++#ifdef REENTRANT ++__vacall_r_t ++callback_get_receiver (void) ++{ ++ return (__vacall_r_t)(void*)&callback_receiver; ++} ++#endif +diff --git a/vacall/vacall.h b/vacall/vacall.h +index 9d14d4a..a467df7 100644 +--- a/vacall/vacall.h ++++ b/vacall/vacall.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 1995-2021 Bruno Haible ++ * Copyright 1995-2022 Bruno Haible + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -136,7 +136,7 @@ enum __VA_alist_flags + #if defined(__sparc__) && !defined(__sparc64__) && defined(__sun) && (defined(__SUNPRO_C) || defined(__SUNPRO_CC)) /* SUNWspro cc or CC */ + __VA_SUNPROCC_STRUCT_RETURN, + #else +-#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || (defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) || defined(__riscv64__) ++#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || (defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) || defined(__riscv64__) || defined(__loongarch64__) + __VA_SMALL_STRUCT_RETURN | + #endif + #if defined(__GNUC__) && !((defined(__mipsn32__) || defined(__mips64__)) && ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3))) +@@ -253,7 +253,7 @@ enum __VA_alist_flags + #endif + + /* These are for internal use only */ +-#if defined(__i386__) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64__) || (defined(__s390__) && !defined(__s390x__)) || defined(__riscv64__) ++#if defined(__i386__) || defined(__m68k__) || defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || defined(__alpha__) || defined(__hppa64__) || defined(__arm__) || defined(__armhf__) || defined(__arm64__) || defined(__powerpc__) || defined(__powerpc64__) || defined(__ia64__) || defined(__x86_64__) || (defined(__s390__) && !defined(__s390x__)) || defined(__riscv64__) || defined(__loongarch64__) + __VA_REGISTER_STRUCT_RETURN = 1<<10, + #endif + #if defined(__mipsn32__) || defined(__mips64__) diff --git a/ffmpeg/PKGBUILD b/ffmpeg/PKGBUILD index 7f9ecaf9f6..924225d2c9 100644 --- a/ffmpeg/PKGBUILD +++ b/ffmpeg/PKGBUILD @@ -156,7 +156,7 @@ build() { --disable-stripping \ --enable-amf \ --enable-avisynth \ - --enable-cuda-llvm \ + --disable-cuda-llvm \ --enable-lto \ --enable-fontconfig \ --enable-frei0r \ @@ -209,12 +209,14 @@ build() { --enable-libxml2 \ --enable-libxvid \ --enable-libzimg \ - --enable-nvdec \ - --enable-nvenc \ + --disable-nvdec \ + --disable-nvenc \ --enable-opencl \ --enable-opengl \ --enable-shared \ --enable-version3 \ + --disable-doc \ + --disable-lsx \ --enable-vulkan make make tools/qt-faststart diff --git a/ffmpeg4.4/PKGBUILD b/ffmpeg4.4/PKGBUILD index 1386743253..9cf705846f 100644 --- a/ffmpeg4.4/PKGBUILD +++ b/ffmpeg4.4/PKGBUILD @@ -29,7 +29,6 @@ depends=( libdrm libfreetype.so libiec61883 - libmfx libmodplug libpulse librav1e.so @@ -98,11 +97,6 @@ source=(git+https://git.ffmpeg.org/ffmpeg.git#tag=${_tag} b2sums=('SKIP' 'b656a17dd3996c6871d322ba1fcf25410ed580d9600348cda087d705660601d06070492300d31c12d54b0e9914cb92bb9d997e51462c0577e1a90539bf0b76ee') -pkgver() { - cd ffmpeg - git describe --tags | sed 's/^n//' -} - prepare() { cd ffmpeg git cherry-pick -n 988f2e9eb063db7c1a678729f58aab6eba59a55b # fix nvenc on older gpus @@ -128,7 +122,6 @@ build() { --disable-stripping \ --enable-amf \ --enable-avisynth \ - --enable-cuda-llvm \ --enable-lto \ --enable-fontconfig \ --enable-gmp \ @@ -145,7 +138,6 @@ build() { --enable-libgsm \ --enable-libiec61883 \ --enable-libjack \ - --enable-libmfx \ --enable-libmodplug \ --enable-libmp3lame \ --enable-libopencore_amrnb \ @@ -172,8 +164,6 @@ build() { --enable-libxml2 \ --enable-libxvid \ --enable-libzimg \ - --enable-nvdec \ - --enable-nvenc \ --enable-shared \ --enable-version3 diff --git a/ffmpegthumbnailer/PKGBUILD b/ffmpegthumbnailer/PKGBUILD index 22277a7eca..522125ac66 100644 --- a/ffmpegthumbnailer/PKGBUILD +++ b/ffmpegthumbnailer/PKGBUILD @@ -4,7 +4,7 @@ pkgname=ffmpegthumbnailer pkgver=2.2.2 -pkgrel=5 +pkgrel=6 pkgdesc="Lightweight video thumbnailer that can be used by file managers" url="https://github.com/dirkvdb/ffmpegthumbnailer" license=('GPL2') diff --git a/fftw/PKGBUILD b/fftw/PKGBUILD index e56eabad26..ed871e0296 100644 --- a/fftw/PKGBUILD +++ b/fftw/PKGBUILD @@ -21,9 +21,9 @@ makedepends=( gcc-fortran ) provides=( - libfftw3q_threads.so - libfftw3q_omp.so - libfftw3q.so +# libfftw3q_threads.so +# libfftw3q_omp.so +# libfftw3q.so libfftw3l_threads.so libfftw3l_omp.so libfftw3l_mpi.so @@ -65,20 +65,20 @@ build() { --enable-openmp ) local _configure_single=( - --enable-sse - --enable-avx + #--enable-sse + #--enable-avx --enable-single ) local _configure_double=( - --enable-sse2 - --enable-avx + #--enable-sse2 + #--enable-avx ) local _configure_long_double=( --enable-long-double ) local _configure_quad=( --disable-mpi - --enable-quad-precision + #--enable-quad-precision ) local _cmake_options=( -B build @@ -90,10 +90,10 @@ build() { -D ENABLE_FLOAT=ON -D ENABLE_LONG_DOUBLE=ON -D ENABLE_QUAD_PRECISION=ON - -D ENABLE_SSE=ON - -D ENABLE_SSE2=ON - -D ENABLE_AVX=ON - -D ENABLE_AVX2=ON + -D ENABLE_SSE=OFF + -D ENABLE_SSE2=OFF + -D ENABLE_AVX=OFF + -D ENABLE_AVX2=OFF ) # create missing FFTW3LibraryDepends.cmake @@ -104,7 +104,7 @@ build() { export F77='gfortran' # use upstream default CFLAGS while keeping our -march/-mtune - CFLAGS+=" -O3 -fomit-frame-pointer -malign-double -fstrict-aliasing -ffast-math" + CFLAGS+=" -O3 -fomit-frame-pointer -fstrict-aliasing -ffast-math" for _name in "${_build_types[@]}"; do ( diff --git a/firecracker/PKGBUILD b/firecracker/PKGBUILD index 2468e92adb..c68ccefbd3 100644 --- a/firecracker/PKGBUILD +++ b/firecracker/PKGBUILD @@ -14,7 +14,7 @@ _commit='8a43b32e9a885443a87b5bfe70808a3d68936289' source=("$pkgname::git+https://github.com/firecracker-microvm/firecracker.git#commit=$_commit") b2sums=('SKIP') -_cargo_target="$CARCH-unknown-linux-gnu" +_cargo_target="`uname -m`-unknown-linux-gnu" pkgver() { cd "$pkgbase" diff --git a/firefox-developer-edition/0001-Add-support-for-LoongArch64.patch b/firefox-developer-edition/0001-Add-support-for-LoongArch64.patch new file mode 100644 index 0000000000..b8a33207c9 --- /dev/null +++ b/firefox-developer-edition/0001-Add-support-for-LoongArch64.patch @@ -0,0 +1,80 @@ +From 0c4dfaca7c7a38244034a6d872c0c7aeec0d4819 Mon Sep 17 00:00:00 2001 +From: Jiangjin Wang +Date: Sun, 22 Oct 2023 22:13:17 -0700 +Subject: [PATCH 1/5] Add support for LoongArch64 + +Adapted from LoongArchLinux. Rebased to Firefox 118.0.2. + +Co-Authored-By: loongson +Co-Authored-By: WANG Xuerui +--- + third_party/libwebrtc/build/build_config.h | 4 ++++ + third_party/rust/nix/.cargo-checksum.json | 2 +- + third_party/rust/nix/src/sys/ioctl/linux.rs | 1 + + toolkit/components/telemetry/pingsender/pingsender.cpp | 1 + + toolkit/moz.configure | 2 +- + 5 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/third_party/libwebrtc/build/build_config.h b/third_party/libwebrtc/build/build_config.h +index c39ae9da50f99..28191de02654b 100644 +--- a/third_party/libwebrtc/build/build_config.h ++++ b/third_party/libwebrtc/build/build_config.h +@@ -210,6 +210,10 @@ + #define ARCH_CPU_SPARC 1 + #define ARCH_CPU_32_BITS 1 + #define ARCH_CPU_BIG_ENDIAN 1 ++#elif defined(__loongarch_lp64) ++#define ARCH_CPU_LOONGARCH64 1 ++#define ARCH_CPU_64_BITS 1 ++#define ARCH_CPU_LITTLE_ENDIAN 1 + #else + #error Please add support for your architecture in build/build_config.h + #endif +diff --git a/third_party/rust/nix/.cargo-checksum.json b/third_party/rust/nix/.cargo-checksum.json +index f4c932b88926b..b7b9c9f3c9a89 100644 +--- a/third_party/rust/nix/.cargo-checksum.json ++++ b/third_party/rust/nix/.cargo-checksum.json +@@ -1 +1 @@ +-{"files":{"CHANGELOG.md":"8ee4e556e53d1b39400a48675d3ecff0bf27e419accab7ca3be76ab934289548","Cargo.toml":"2e6eff9170182f107188b8bc9802efd044ef47178afc7f138950ecff1c1ceb96","LICENSE":"66e3ee1fa7f909ad3c612d556f2a0cdabcd809ad6e66f3b0605015ac64841b70","README.md":"1ed9a0e26ae6e575b3262ae734dd02889455593b761ee62403ea5a64104f3c9c","src/dir.rs":"0280a2dc480bd913f24ed84fbe26569fa2e8eefa660e5ad7c21e05fc34c14d16","src/env.rs":"028bc5e20139ebba418a655a2978a53335dc7680bf1de43d2c8333dd72cfa5c4","src/errno.rs":"e55d075858e349d9afea9ce0480f7fb7ba4dccccf0694fd7b3280b918836203c","src/fcntl.rs":"ea8f43d8fec0b6c3b7d903333e4c1ce85611684a4afd561c55cfe4b61a979e94","src/features.rs":"5b4a0831e5f4b79a6f0e42ed052fd66c875da18959750be51e41fb59ac19feed","src/ifaddrs.rs":"377865eb48040d28c392a1aec0221320108e3392ea285d23405ae2cfa5c54b20","src/kmod.rs":"c818ced08d55ae36fdf82fa914ba856b688e37234d574d3faa37128211d512fb","src/lib.rs":"a62fac2ba7111157c5b64251f67f8a189f04bd587d5c80703454a596ea7ae5d9","src/macros.rs":"e23d7d8be22ef0bf9febaaf2739585453103607c0139bd3995a324e4a16d011e","src/mount/bsd.rs":"4cf35606a63d7ca41caac3b38f01e2b70c63e71978c0529f19fc79182629dbe0","src/mount/linux.rs":"6e5d61788dedf1ca4416c6c6a3a9c6c747f9352c26d863f4a1d4142e288584d6","src/mount/mod.rs":"ba9f60eb831224ab73bdd87e00e15d13b9ce9efb70b18bf8f3fe60406d522b3e","src/mqueue.rs":"ed0a189036b2437b5f7f7f1312fa545540b06ca72171b451d8bce42cc3627534","src/net/if_.rs":"b32a8a1f952de60d95e549779a5c673fd72aa665e86bfdfc8ec6badf3016b9b1","src/net/mod.rs":"577f70170e53d4a6de1abb70bf8f1031ec3e65c0e63ef5fcf05c907125e7ac17","src/poll.rs":"2fc1d144fb40db51811c6357b520ab7993529702d8f0d8060c903118ff4f7259","src/pty.rs":"27b4f76c23acf02542674017067fee74cdcac907338458700a1aa4d6f6a62e27","src/sched.rs":"403aa5ebed81910263d42a94717612b737550bf053227b7d90f1c8949188d919","src/sys/aio.rs":"ae091de8540c97da374a39e7d154c1b3ce50f41e6fc20a45c6b06eb838e74366","src/sys/epoll.rs":"28e22debf474d1b047e8044a00b354c25dab2fa125960f9f2f14cc34289fd5c9","src/sys/event.rs":"dbd8e84bccb813839295b0a336485783ef19548d2317931f0ceb5ee62f839a40","src/sys/eventfd.rs":"c8db8f5874726fdad289ad5e2603a7d71a1ae5a899dcde3a35d3edff8b498b7e","src/sys/inotify.rs":"5b4da774313afa9c28c3f92f9d07dce9bf4c8d044fd6a16f19480e79a19e808b","src/sys/ioctl/bsd.rs":"bbd02e30b0a78c1cb22777d9b00cfcbba9c68505cffc06118ac68474cf6fea39","src/sys/ioctl/linux.rs":"028181834d119b834bf399f2b8a6176cc57e75144693f28f32059d087d8c8018","src/sys/ioctl/mod.rs":"89b20579476b2e0254e0ecb1b41830cccd7027a22cbdb816a9d4ec3924842ac1","src/sys/memfd.rs":"f58d7fbe67c4b994832d72f5fbd59c136c8f1ae88ea8b0bc1c099db2d847ee6c","src/sys/mman.rs":"17df1bc34ba92bdd6bad1e11e4ef139998117f6c468c8f560421858f3cc899a5","src/sys/mod.rs":"baabf649f758ad4acce849ec1795dd4e4f9c6539e677bad5fa777300a4871dcb","src/sys/personality.rs":"aa89760c023bfec3fca5d8636f9eac9d337f5547933793ce6df7a0de97ae6ee1","src/sys/pthread.rs":"258cdf7ff0b61a4afa6d228109e4cb4fb88d859bb8dfe6c959d95130fb010906","src/sys/ptrace/bsd.rs":"4c590d8f023ff52f396f8b6f2150c08e5c9486d3088d9c173db33a70d616b800","src/sys/ptrace/linux.rs":"c82db3fb18aa97755f9ccb440a957cd46d664968a94045830c5d74d2d53bc19f","src/sys/ptrace/mod.rs":"e9e5d970097f5eafffba900959d4fdbf233bff9ed7f599fc9896bb44d86a57a4","src/sys/quota.rs":"02e698a25f0986fb43aa88689f3d3d8b9edc6ae48496ad02f7214fccaa493e00","src/sys/reboot.rs":"eacdf57694a6629fb05787e16450446102a62818274495f2ad4e445807d09221","src/sys/resource.rs":"d498d0c00fd30e35e1269a8902cb812014d813f63ec95364f8f59f1912ba5657","src/sys/select.rs":"65c39b129d3cc85b8ca026ff26dcf80c5639824f43715881c3c1bbb6bf0c8a60","src/sys/sendfile.rs":"7a62099f9771fecff49b9c11210341e3c1a4acf22f8dfb96d395e29421648676","src/sys/signal.rs":"c3e13a2edea54d190a4b051f62efc97953c00b5051a9fda0e39e3bc732a31939","src/sys/signalfd.rs":"583524434fd37143be3db37fa6f6cbd339f7946416f05b58a95e246947e5cc9d","src/sys/socket/addr.rs":"84df895052f59ec84774b189ffb285d2a37a9703af6c8310ae5040cca1a2583e","src/sys/socket/mod.rs":"6deb55438cad3606385303f036b0efd842dfd759fba93611911f5a4f2613c9dc","src/sys/socket/sockopt.rs":"ed1f920364bfe88bbe6eaeeefb27a63bfcdd7d67604aca2f03e22f2b502df55a","src/sys/stat.rs":"337dea8d55d6177dc85b3235b40b8a3e81af7f4a6e2806a0b2f730bec5424350","src/sys/statfs.rs":"17103659a85279bac046c69cb3b22bf2c11c2492cffb0edfa4c3b233d161a2f2","src/sys/statvfs.rs":"f81e3900ef90d62e7eceaf1b6ff8dcfd965466714c033eb4717687f692171f48","src/sys/sysinfo.rs":"b4519b1ca091c9dbe94d2a6fd6304944bf3df5626973d2c6884022559706f0d9","src/sys/termios.rs":"7923f9846a8122096b6b1cd240d3618b876ce500a751ac434954d172e2e85745","src/sys/time.rs":"9026033b60a5ccc95b70424aef043c8c748722e2ea8c7c86366ecd4585b651a0","src/sys/timer.rs":"8c10f0e7cfac857ad00460be30bc68b957909cc9296e70718d3b5d4a0babafde","src/sys/timerfd.rs":"ef7c48aefdcfac13316eeddbef5da04cf12e9f574b8d9f43402c02b6b8db86b3","src/sys/uio.rs":"e1d59ccbee9d46c65d3aa8c36aa3a3222539beea0d20163a8b707d08fca14e09","src/sys/utsname.rs":"0cdda0cc111caaa0e4ebe2d4588bdc825d878e5bcb7a9136073b15f87a20e11f","src/sys/wait.rs":"cc70d2d9b880ff6c48577a479c209af6127067bc013a90ee22538e4dfad7d2b4","src/time.rs":"d4e0872361a57810837f5bd790cbca3a2b9db1ac4694a3c52d1564ad3532d3be","src/ucontext.rs":"b8f2e04757a9c2bc38c3b1e259d3a013da8a730fe9bfbe5487637395681b43d3","src/unistd.rs":"e19be456124731c5b93aef92ed72a7c4c9092e28db0649814ba3fcc1f0d620fa","test/common/mod.rs":"1d7e28e3635754664cd056f3a1079232ff5c118df619e1d0551a9972eb0b3cd6","test/sys/mod.rs":"87b2891d83067ff21f72b8ff7fde3019dc45b6877282ac278b6da151de45c7a7","test/sys/test_aio.rs":"4dac9f716f852f1f438f78d6e64bf041e6fd316bf15dcb27afffaf0894bdefa6","test/sys/test_aio_drop.rs":"614070155fa16a979b7341d001639c5ce24a1d6f632c3abce45a5a6d49c4039b","test/sys/test_epoll.rs":"ffe95e36c79e37426ef8e8ca3b137b7f35ea0333ce666a20a4b7878db17680e9","test/sys/test_inotify.rs":"a141b9a995892547b51ceeb6761a70a6b86d37e8f38d13ea2c497b81b4b0f49f","test/sys/test_ioctl.rs":"00ccc5afb665e533a0a4b6d6a6be438bcaea19fce335390feef4e91d17b3036c","test/sys/test_mman.rs":"2b4161964c9204b74659028b0f89a88f4e3bcc9886137a3039737cd91d2698cb","test/sys/test_pthread.rs":"ace36a2f5587f1874854281b4fd84e4e4d892a1e3c5cc38ced57975739522ad6","test/sys/test_ptrace.rs":"0385eebc8b1b8c72f655b745769decd9143ad83018198375982da0896310456b","test/sys/test_select.rs":"54cea1c34ad28d5770a613c1c3cbc3b1064b22037ec2b9d3fcd422d3be9e60a7","test/sys/test_signal.rs":"acc9941227bd3e2afad323613c2b8c83902ed0486d3745fd72704f395924f1e4","test/sys/test_signalfd.rs":"0e1060143e2612c490bc3d0168d0bbb042ef55e3f1d91d2578b9e42e4310a14d","test/sys/test_socket.rs":"d2df1001f9a0b2dac0b88051a67c3868bb216e72e4da4eecd11c4448b9fa4b40","test/sys/test_sockopt.rs":"4465f22f718442f3f7b502e052dad02b93cebfa3b71fa55ff4f25fb02534acab","test/sys/test_stat.rs":"6630a28217fd708bb84cd4f7e7101836b74f2420f9888923fdab664ccc331c1d","test/sys/test_sysinfo.rs":"ffd49bc96375914a2c4a4a59730cae8072f85771e2c4a80d3403df38d967e272","test/sys/test_termios.rs":"e5bcef10c84bd7583d600d5601835bcb3cfc88781cb283ab0185bbef5faf4327","test/sys/test_timerfd.rs":"cfed3abf58118611d08f6985251a7739cff67108e11214222a1d2394a3a026ce","test/sys/test_uio.rs":"32656bd0a5699e4d019aa928edf104637937179782914a82d50d37226e84c421","test/sys/test_wait.rs":"6fd59fffeeb09ff620c359baefd062ba777598982b6cb001ccc07b6bc7605493","test/test.rs":"11f40b0718ddd1a150cb9e703d56d0b2a9462306505a2245ddf273a2011f48b5","test/test_clearenv.rs":"45ca548035b3c20ec87314715feaba2be973709a635d85b8cde46fd1d9f1ecd4","test/test_dir.rs":"ae3c11c58cb06da6557aa2a839c6653c54cd7724283fffe9df5a5d3feabdd89a","test/test_fcntl.rs":"71dcb87f7b04d78fc62937ba46cb7f0f1f2dbb330b63a996ea2e8ec9056b98a9","test/test_kmod/hello_mod/Makefile":"0219f7bce0603f97d997fb377ca071966c90333ecc665e78a54dfeb97a9c811b","test/test_kmod/hello_mod/hello.c":"bcac6b19c5bd807e1f3878c15e426acc85785a8ade9840c3bb4d068635c9188c","test/test_kmod/mod.rs":"b4ae25841c2f06f32de9f1acd8230eeccd7095721302ebe78ad454e4e4f9c783","test/test_mount.rs":"6dd242b6e23c9c39e1a75612bbea62573898818ab374c3c032c2cdb97033554d","test/test_mq.rs":"136071f24131aac0e65d5f29ac18e3806641dfae1164813f5570c0e3a6f70553","test/test_net.rs":"f2912327ebb2a3d37e6cff02a5ac3106cf889cc5c74404db4ef0034059ba26f1","test/test_nix_path.rs":"01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b","test/test_nmount.rs":"d6c112547bb80968170b5497cda4b6cbf69dabec6f51d494bd52298995ceff18","test/test_poll.rs":"3e0b8f0397ba080785c61a3bfc3d637bc87f324bc4e52b5f1bf3ca0d32dbc9fe","test/test_pty.rs":"b26238a0783746cb31880e11eebc1913149be999ce75fbc2d6677bdd1e2731b2","test/test_ptymaster_drop.rs":"ae63c815f5028ddc67d194e86559483018ab1816316bdb917f40cee9364fd8a5","test/test_resource.rs":"40aef790ab745cec31a4b333d2ca406b462aa9bdf4a6d3756371e498b8d51e9a","test/test_sched.rs":"c4579bd376fab8816e63b07fa9ace31dc08e63ebb7c855a2c450698090d1d1e8","test/test_sendfile.rs":"bb41b4f3621b518e397d3a5b5ad3c5dcef3fe506afe516eab7572fbab92b77e3","test/test_stat.rs":"c407ca47a5258750076d041afad2f6add4c3563be36628bde1c5b314f5d0765d","test/test_time.rs":"f7a21b1e279e60e84909d5dadda97ded66d3326b131fe317badf9af0a1b50335","test/test_timer.rs":"3ae20d364f075d2811f3ff94eda9886682cc21d8807656007d2464fe36d1e361","test/test_unistd.rs":"20a00be4fbe26302ea5fe50ce25b99265dc763db138663d6aa1d7ac729a1d292"},"package":"bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"} +\ No newline at end of file ++{"files":{"CHANGELOG.md":"8ee4e556e53d1b39400a48675d3ecff0bf27e419accab7ca3be76ab934289548","Cargo.toml":"2e6eff9170182f107188b8bc9802efd044ef47178afc7f138950ecff1c1ceb96","LICENSE":"66e3ee1fa7f909ad3c612d556f2a0cdabcd809ad6e66f3b0605015ac64841b70","README.md":"1ed9a0e26ae6e575b3262ae734dd02889455593b761ee62403ea5a64104f3c9c","src/dir.rs":"0280a2dc480bd913f24ed84fbe26569fa2e8eefa660e5ad7c21e05fc34c14d16","src/env.rs":"028bc5e20139ebba418a655a2978a53335dc7680bf1de43d2c8333dd72cfa5c4","src/errno.rs":"e55d075858e349d9afea9ce0480f7fb7ba4dccccf0694fd7b3280b918836203c","src/fcntl.rs":"ea8f43d8fec0b6c3b7d903333e4c1ce85611684a4afd561c55cfe4b61a979e94","src/features.rs":"5b4a0831e5f4b79a6f0e42ed052fd66c875da18959750be51e41fb59ac19feed","src/ifaddrs.rs":"377865eb48040d28c392a1aec0221320108e3392ea285d23405ae2cfa5c54b20","src/kmod.rs":"c818ced08d55ae36fdf82fa914ba856b688e37234d574d3faa37128211d512fb","src/lib.rs":"a62fac2ba7111157c5b64251f67f8a189f04bd587d5c80703454a596ea7ae5d9","src/macros.rs":"e23d7d8be22ef0bf9febaaf2739585453103607c0139bd3995a324e4a16d011e","src/mount/bsd.rs":"4cf35606a63d7ca41caac3b38f01e2b70c63e71978c0529f19fc79182629dbe0","src/mount/linux.rs":"6e5d61788dedf1ca4416c6c6a3a9c6c747f9352c26d863f4a1d4142e288584d6","src/mount/mod.rs":"ba9f60eb831224ab73bdd87e00e15d13b9ce9efb70b18bf8f3fe60406d522b3e","src/mqueue.rs":"ed0a189036b2437b5f7f7f1312fa545540b06ca72171b451d8bce42cc3627534","src/net/if_.rs":"b32a8a1f952de60d95e549779a5c673fd72aa665e86bfdfc8ec6badf3016b9b1","src/net/mod.rs":"577f70170e53d4a6de1abb70bf8f1031ec3e65c0e63ef5fcf05c907125e7ac17","src/poll.rs":"2fc1d144fb40db51811c6357b520ab7993529702d8f0d8060c903118ff4f7259","src/pty.rs":"27b4f76c23acf02542674017067fee74cdcac907338458700a1aa4d6f6a62e27","src/sched.rs":"403aa5ebed81910263d42a94717612b737550bf053227b7d90f1c8949188d919","src/sys/aio.rs":"ae091de8540c97da374a39e7d154c1b3ce50f41e6fc20a45c6b06eb838e74366","src/sys/epoll.rs":"28e22debf474d1b047e8044a00b354c25dab2fa125960f9f2f14cc34289fd5c9","src/sys/event.rs":"dbd8e84bccb813839295b0a336485783ef19548d2317931f0ceb5ee62f839a40","src/sys/eventfd.rs":"c8db8f5874726fdad289ad5e2603a7d71a1ae5a899dcde3a35d3edff8b498b7e","src/sys/inotify.rs":"5b4da774313afa9c28c3f92f9d07dce9bf4c8d044fd6a16f19480e79a19e808b","src/sys/ioctl/bsd.rs":"bbd02e30b0a78c1cb22777d9b00cfcbba9c68505cffc06118ac68474cf6fea39","src/sys/ioctl/linux.rs":"54bad026ee637b73b95dad8135b6db61cae855670fd9323e7bf21acaff0827f4","src/sys/ioctl/mod.rs":"89b20579476b2e0254e0ecb1b41830cccd7027a22cbdb816a9d4ec3924842ac1","src/sys/memfd.rs":"f58d7fbe67c4b994832d72f5fbd59c136c8f1ae88ea8b0bc1c099db2d847ee6c","src/sys/mman.rs":"17df1bc34ba92bdd6bad1e11e4ef139998117f6c468c8f560421858f3cc899a5","src/sys/mod.rs":"baabf649f758ad4acce849ec1795dd4e4f9c6539e677bad5fa777300a4871dcb","src/sys/personality.rs":"aa89760c023bfec3fca5d8636f9eac9d337f5547933793ce6df7a0de97ae6ee1","src/sys/pthread.rs":"258cdf7ff0b61a4afa6d228109e4cb4fb88d859bb8dfe6c959d95130fb010906","src/sys/ptrace/bsd.rs":"4c590d8f023ff52f396f8b6f2150c08e5c9486d3088d9c173db33a70d616b800","src/sys/ptrace/linux.rs":"c82db3fb18aa97755f9ccb440a957cd46d664968a94045830c5d74d2d53bc19f","src/sys/ptrace/mod.rs":"e9e5d970097f5eafffba900959d4fdbf233bff9ed7f599fc9896bb44d86a57a4","src/sys/quota.rs":"02e698a25f0986fb43aa88689f3d3d8b9edc6ae48496ad02f7214fccaa493e00","src/sys/reboot.rs":"eacdf57694a6629fb05787e16450446102a62818274495f2ad4e445807d09221","src/sys/resource.rs":"d498d0c00fd30e35e1269a8902cb812014d813f63ec95364f8f59f1912ba5657","src/sys/select.rs":"65c39b129d3cc85b8ca026ff26dcf80c5639824f43715881c3c1bbb6bf0c8a60","src/sys/sendfile.rs":"7a62099f9771fecff49b9c11210341e3c1a4acf22f8dfb96d395e29421648676","src/sys/signal.rs":"c3e13a2edea54d190a4b051f62efc97953c00b5051a9fda0e39e3bc732a31939","src/sys/signalfd.rs":"583524434fd37143be3db37fa6f6cbd339f7946416f05b58a95e246947e5cc9d","src/sys/socket/addr.rs":"84df895052f59ec84774b189ffb285d2a37a9703af6c8310ae5040cca1a2583e","src/sys/socket/mod.rs":"6deb55438cad3606385303f036b0efd842dfd759fba93611911f5a4f2613c9dc","src/sys/socket/sockopt.rs":"ed1f920364bfe88bbe6eaeeefb27a63bfcdd7d67604aca2f03e22f2b502df55a","src/sys/stat.rs":"337dea8d55d6177dc85b3235b40b8a3e81af7f4a6e2806a0b2f730bec5424350","src/sys/statfs.rs":"17103659a85279bac046c69cb3b22bf2c11c2492cffb0edfa4c3b233d161a2f2","src/sys/statvfs.rs":"f81e3900ef90d62e7eceaf1b6ff8dcfd965466714c033eb4717687f692171f48","src/sys/sysinfo.rs":"b4519b1ca091c9dbe94d2a6fd6304944bf3df5626973d2c6884022559706f0d9","src/sys/termios.rs":"7923f9846a8122096b6b1cd240d3618b876ce500a751ac434954d172e2e85745","src/sys/time.rs":"9026033b60a5ccc95b70424aef043c8c748722e2ea8c7c86366ecd4585b651a0","src/sys/timer.rs":"8c10f0e7cfac857ad00460be30bc68b957909cc9296e70718d3b5d4a0babafde","src/sys/timerfd.rs":"ef7c48aefdcfac13316eeddbef5da04cf12e9f574b8d9f43402c02b6b8db86b3","src/sys/uio.rs":"e1d59ccbee9d46c65d3aa8c36aa3a3222539beea0d20163a8b707d08fca14e09","src/sys/utsname.rs":"0cdda0cc111caaa0e4ebe2d4588bdc825d878e5bcb7a9136073b15f87a20e11f","src/sys/wait.rs":"cc70d2d9b880ff6c48577a479c209af6127067bc013a90ee22538e4dfad7d2b4","src/time.rs":"d4e0872361a57810837f5bd790cbca3a2b9db1ac4694a3c52d1564ad3532d3be","src/ucontext.rs":"b8f2e04757a9c2bc38c3b1e259d3a013da8a730fe9bfbe5487637395681b43d3","src/unistd.rs":"e19be456124731c5b93aef92ed72a7c4c9092e28db0649814ba3fcc1f0d620fa","test/common/mod.rs":"1d7e28e3635754664cd056f3a1079232ff5c118df619e1d0551a9972eb0b3cd6","test/sys/mod.rs":"87b2891d83067ff21f72b8ff7fde3019dc45b6877282ac278b6da151de45c7a7","test/sys/test_aio.rs":"4dac9f716f852f1f438f78d6e64bf041e6fd316bf15dcb27afffaf0894bdefa6","test/sys/test_aio_drop.rs":"614070155fa16a979b7341d001639c5ce24a1d6f632c3abce45a5a6d49c4039b","test/sys/test_epoll.rs":"ffe95e36c79e37426ef8e8ca3b137b7f35ea0333ce666a20a4b7878db17680e9","test/sys/test_inotify.rs":"a141b9a995892547b51ceeb6761a70a6b86d37e8f38d13ea2c497b81b4b0f49f","test/sys/test_ioctl.rs":"00ccc5afb665e533a0a4b6d6a6be438bcaea19fce335390feef4e91d17b3036c","test/sys/test_mman.rs":"2b4161964c9204b74659028b0f89a88f4e3bcc9886137a3039737cd91d2698cb","test/sys/test_pthread.rs":"ace36a2f5587f1874854281b4fd84e4e4d892a1e3c5cc38ced57975739522ad6","test/sys/test_ptrace.rs":"0385eebc8b1b8c72f655b745769decd9143ad83018198375982da0896310456b","test/sys/test_select.rs":"54cea1c34ad28d5770a613c1c3cbc3b1064b22037ec2b9d3fcd422d3be9e60a7","test/sys/test_signal.rs":"acc9941227bd3e2afad323613c2b8c83902ed0486d3745fd72704f395924f1e4","test/sys/test_signalfd.rs":"0e1060143e2612c490bc3d0168d0bbb042ef55e3f1d91d2578b9e42e4310a14d","test/sys/test_socket.rs":"d2df1001f9a0b2dac0b88051a67c3868bb216e72e4da4eecd11c4448b9fa4b40","test/sys/test_sockopt.rs":"4465f22f718442f3f7b502e052dad02b93cebfa3b71fa55ff4f25fb02534acab","test/sys/test_stat.rs":"6630a28217fd708bb84cd4f7e7101836b74f2420f9888923fdab664ccc331c1d","test/sys/test_sysinfo.rs":"ffd49bc96375914a2c4a4a59730cae8072f85771e2c4a80d3403df38d967e272","test/sys/test_termios.rs":"e5bcef10c84bd7583d600d5601835bcb3cfc88781cb283ab0185bbef5faf4327","test/sys/test_timerfd.rs":"cfed3abf58118611d08f6985251a7739cff67108e11214222a1d2394a3a026ce","test/sys/test_uio.rs":"32656bd0a5699e4d019aa928edf104637937179782914a82d50d37226e84c421","test/sys/test_wait.rs":"6fd59fffeeb09ff620c359baefd062ba777598982b6cb001ccc07b6bc7605493","test/test.rs":"11f40b0718ddd1a150cb9e703d56d0b2a9462306505a2245ddf273a2011f48b5","test/test_clearenv.rs":"45ca548035b3c20ec87314715feaba2be973709a635d85b8cde46fd1d9f1ecd4","test/test_dir.rs":"ae3c11c58cb06da6557aa2a839c6653c54cd7724283fffe9df5a5d3feabdd89a","test/test_fcntl.rs":"71dcb87f7b04d78fc62937ba46cb7f0f1f2dbb330b63a996ea2e8ec9056b98a9","test/test_kmod/hello_mod/Makefile":"0219f7bce0603f97d997fb377ca071966c90333ecc665e78a54dfeb97a9c811b","test/test_kmod/hello_mod/hello.c":"bcac6b19c5bd807e1f3878c15e426acc85785a8ade9840c3bb4d068635c9188c","test/test_kmod/mod.rs":"b4ae25841c2f06f32de9f1acd8230eeccd7095721302ebe78ad454e4e4f9c783","test/test_mount.rs":"6dd242b6e23c9c39e1a75612bbea62573898818ab374c3c032c2cdb97033554d","test/test_mq.rs":"136071f24131aac0e65d5f29ac18e3806641dfae1164813f5570c0e3a6f70553","test/test_net.rs":"f2912327ebb2a3d37e6cff02a5ac3106cf889cc5c74404db4ef0034059ba26f1","test/test_nix_path.rs":"01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b","test/test_nmount.rs":"d6c112547bb80968170b5497cda4b6cbf69dabec6f51d494bd52298995ceff18","test/test_poll.rs":"3e0b8f0397ba080785c61a3bfc3d637bc87f324bc4e52b5f1bf3ca0d32dbc9fe","test/test_pty.rs":"b26238a0783746cb31880e11eebc1913149be999ce75fbc2d6677bdd1e2731b2","test/test_ptymaster_drop.rs":"ae63c815f5028ddc67d194e86559483018ab1816316bdb917f40cee9364fd8a5","test/test_resource.rs":"40aef790ab745cec31a4b333d2ca406b462aa9bdf4a6d3756371e498b8d51e9a","test/test_sched.rs":"c4579bd376fab8816e63b07fa9ace31dc08e63ebb7c855a2c450698090d1d1e8","test/test_sendfile.rs":"bb41b4f3621b518e397d3a5b5ad3c5dcef3fe506afe516eab7572fbab92b77e3","test/test_stat.rs":"c407ca47a5258750076d041afad2f6add4c3563be36628bde1c5b314f5d0765d","test/test_time.rs":"f7a21b1e279e60e84909d5dadda97ded66d3326b131fe317badf9af0a1b50335","test/test_timer.rs":"3ae20d364f075d2811f3ff94eda9886682cc21d8807656007d2464fe36d1e361","test/test_unistd.rs":"20a00be4fbe26302ea5fe50ce25b99265dc763db138663d6aa1d7ac729a1d292"},"package":"bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"} +diff --git a/third_party/rust/nix/src/sys/ioctl/linux.rs b/third_party/rust/nix/src/sys/ioctl/linux.rs +index 0c0a2090538f8..214d9e8c60281 100644 +--- a/third_party/rust/nix/src/sys/ioctl/linux.rs ++++ b/third_party/rust/nix/src/sys/ioctl/linux.rs +@@ -41,6 +41,7 @@ mod consts { + target_arch = "s390x", + target_arch = "x86_64", + target_arch = "aarch64", ++ target_arch = "loongarch64", + target_arch = "riscv32", + target_arch = "riscv64" + ))] +diff --git a/toolkit/components/telemetry/pingsender/pingsender.cpp b/toolkit/components/telemetry/pingsender/pingsender.cpp +index 30f2907c720e1..e6645227a2949 100644 +--- a/toolkit/components/telemetry/pingsender/pingsender.cpp ++++ b/toolkit/components/telemetry/pingsender/pingsender.cpp +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + #include + + #include +diff --git a/toolkit/moz.configure b/toolkit/moz.configure +index 8b462ecde463f..a4aa84cc7c45e 100644 +--- a/toolkit/moz.configure ++++ b/toolkit/moz.configure +@@ -2432,7 +2432,7 @@ with only_when(compile_environment | artifact_builds): + use_nasm = False + elif target.cpu == "x86_64": + flags = ["-D__x86_64__", "-DPIC", "-DELF", "-Pconfig_unix64.asm"] +- elif target.cpu in ("x86", "arm", "aarch64"): ++ elif target.cpu in ("x86", "arm", "aarch64", "loongarch64"): + flac_only = True + else: + enable = False +-- +2.43.0 + diff --git a/firefox-developer-edition/0002-Enable-VA-API-support-for-AMD-GPUs.patch b/firefox-developer-edition/0002-Enable-VA-API-support-for-AMD-GPUs.patch new file mode 100644 index 0000000000..143927ffe1 --- /dev/null +++ b/firefox-developer-edition/0002-Enable-VA-API-support-for-AMD-GPUs.patch @@ -0,0 +1,31 @@ +From 9f3a0a22ba3c6ad1f14e90cfc2774b87215d7135 Mon Sep 17 00:00:00 2001 +From: Jiangjin Wang +Date: Tue, 14 Nov 2023 18:14:20 -0800 +Subject: [PATCH 2/5] Enable VA-API support for AMD GPUs + +--- + widget/gtk/GfxInfo.cpp | 8 -------- + 1 file changed, 8 deletions(-) + +diff --git a/widget/gtk/GfxInfo.cpp b/widget/gtk/GfxInfo.cpp +index b34e85baa28e5..8c95ce0d4274b 100644 +--- a/widget/gtk/GfxInfo.cpp ++++ b/widget/gtk/GfxInfo.cpp +@@ -1112,14 +1112,6 @@ const nsTArray& GfxInfo::GetGfxDriverInfo() { + nsIGfxInfo::FEATURE_BLOCKED_DEVICE, DRIVER_LESS_THAN, V(23, 1, 1, 0), + "FEATURE_HARDWARE_VIDEO_DECODING_AMD_DISABLE", "Mesa 23.1.1.0"); + +- // Disable on Release/late Beta on AMD +-#if !defined(EARLY_BETA_OR_EARLIER) +- APPEND_TO_DRIVER_BLOCKLIST(OperatingSystem::Linux, DeviceFamily::AtiAll, +- nsIGfxInfo::FEATURE_HARDWARE_VIDEO_DECODING, +- nsIGfxInfo::FEATURE_BLOCKED_DEVICE, +- DRIVER_COMPARISON_IGNORED, V(0, 0, 0, 0), +- "FEATURE_HARDWARE_VIDEO_DECODING_DISABLE", ""); +-#endif + //////////////////////////////////// + // FEATURE_HW_DECODED_VIDEO_ZERO_COPY - ALLOWLIST + APPEND_TO_DRIVER_BLOCKLIST2(OperatingSystem::Linux, DeviceFamily::All, +-- +2.43.0 + diff --git a/firefox-developer-edition/0003-Remove-architectural-limit-on-VA-API-support.patch b/firefox-developer-edition/0003-Remove-architectural-limit-on-VA-API-support.patch new file mode 100644 index 0000000000..aa45fa3e87 --- /dev/null +++ b/firefox-developer-edition/0003-Remove-architectural-limit-on-VA-API-support.patch @@ -0,0 +1,40 @@ +From b25c3742c98c87de9621eac8b672f9381e15c088 Mon Sep 17 00:00:00 2001 +From: Jiangjin Wang +Date: Tue, 14 Nov 2023 18:16:46 -0800 +Subject: [PATCH 3/5] Remove architectural limit on VA-API support + +--- + toolkit/moz.configure | 9 ++------- + 1 file changed, 2 insertions(+), 7 deletions(-) + +diff --git a/toolkit/moz.configure b/toolkit/moz.configure +index a4aa84cc7c45e..67fc08237bba4 100644 +--- a/toolkit/moz.configure ++++ b/toolkit/moz.configure +@@ -537,11 +537,8 @@ set_define("MOZ_WAYLAND", depends_if(wayland_headers)(lambda _: True)) + + # Hardware-accelerated video decode with VAAPI and V4L2 on Linux + # ============================================================== +-@depends(target, toolkit_gtk) +-def vaapi(target, toolkit_gtk): +- # VAAPI is mostly used on x86(-64) but is sometimes used on ARM/ARM64 SOCs. +- if target.cpu in ("arm", "aarch64", "x86", "x86_64") and toolkit_gtk: +- return True ++set_config("MOZ_ENABLE_VAAPI", True, when=toolkit_gtk) ++set_define("MOZ_ENABLE_VAAPI", True, when=toolkit_gtk) + + + @depends(target, toolkit_gtk) +@@ -552,9 +549,7 @@ def v4l2(target, toolkit_gtk): + return True + + +-set_config("MOZ_ENABLE_VAAPI", True, when=vaapi) + set_config("MOZ_ENABLE_V4L2", True, when=v4l2) +-set_define("MOZ_ENABLE_VAAPI", True, when=vaapi) + set_define("MOZ_ENABLE_V4L2", True, when=v4l2) + + +-- +2.43.0 + diff --git a/firefox-developer-edition/0004-Enable-WebRTC-for-LoongArch.patch b/firefox-developer-edition/0004-Enable-WebRTC-for-LoongArch.patch new file mode 100644 index 0000000000..3cd2fcf4f4 --- /dev/null +++ b/firefox-developer-edition/0004-Enable-WebRTC-for-LoongArch.patch @@ -0,0 +1,152 @@ +From 476458e2e0cafaa5fe5fbc6a99750dd920e7ba67 Mon Sep 17 00:00:00 2001 +From: Jiangjin Wang +Date: Tue, 21 Nov 2023 17:17:16 -0800 +Subject: [PATCH 4/5] Enable WebRTC for LoongArch + +--- + .../common_audio/common_audio_c_gn/moz.build | 8 ++++++ + .../spl_sqrt_floor_gn/moz.build | 6 ++++ + .../aecm/aecm_core_gn/moz.build | 6 ++++ + .../desktop_capture_gn/moz.build | 28 +++++++++++++++++++ + .../desktop_capture/primitives_gn/moz.build | 4 +++ + third_party/libwebrtc/moz.build | 7 +++++ + toolkit/moz.configure | 1 + + 7 files changed, 60 insertions(+) + +diff --git a/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build b/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build +index 60ee6cfc164be..1e69b2881ca90 100644 +--- a/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build ++++ b/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build +@@ -255,6 +255,14 @@ if CONFIG["CPU_ARCH"] == "riscv64": + "/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12.c" + ] + ++if CONFIG["CPU_ARCH"] == "loongarch64": ++ ++ UNIFIED_SOURCES += [ ++ "/third_party/libwebrtc/common_audio/signal_processing/complex_bit_reverse.c", ++ "/third_party/libwebrtc/common_audio/signal_processing/complex_fft.c", ++ "/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12.c" ++ ] ++ + if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True +diff --git a/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build b/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build +index d2d0287623b54..36ad6222b3dea 100644 +--- a/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build ++++ b/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build +@@ -174,6 +174,12 @@ if CONFIG["CPU_ARCH"] == "riscv64": + "/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c" + ] + ++if CONFIG["CPU_ARCH"] == "loongarch64": ++ ++ UNIFIED_SOURCES += [ ++ "/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c" ++ ] ++ + if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True +diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build +index 9874037197896..147e12653cbe2 100644 +--- a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build ++++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build +@@ -206,6 +206,12 @@ if CONFIG["CPU_ARCH"] == "riscv64": + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + ++if CONFIG["CPU_ARCH"] == "loongarch64": ++ ++ SOURCES += [ ++ "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" ++ ] ++ + if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True +diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build +index b0a5d1522da86..0efac49ac5dc3 100644 +--- a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build ++++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build +@@ -390,6 +390,34 @@ if CONFIG["CPU_ARCH"] == "riscv64": + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc" + ] + ++if CONFIG["CPU_ARCH"] == "loongarch64": ++ ++ DEFINES["USE_X11"] = "1" ++ DEFINES["WEBRTC_USE_X11"] = True ++ ++ OS_LIBS += [ ++ "X11", ++ "Xcomposite", ++ "Xdamage", ++ "Xext", ++ "Xfixes", ++ "Xrandr", ++ "Xrender" ++ ] ++ ++ UNIFIED_SOURCES += [ ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/mouse_cursor_monitor_x11.cc", ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/screen_capturer_x11.cc", ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/shared_x_display.cc", ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_capturer_x11.cc", ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_finder_x11.cc", ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_list_utils.cc", ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_atom_cache.cc", ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_error_trap.cc", ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_server_pixel_buffer.cc", ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc" ++ ] ++ + if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True +diff --git a/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build b/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build +index 8edb2c2344870..e6cf9f56540f7 100644 +--- a/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build ++++ b/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build +@@ -148,6 +148,10 @@ if CONFIG["CPU_ARCH"] == "riscv64": + + DEFINES["USE_X11"] = "1" + ++if CONFIG["CPU_ARCH"] == "loongarch64": ++ ++ DEFINES["USE_X11"] = "1" ++ + if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True +diff --git a/third_party/libwebrtc/moz.build b/third_party/libwebrtc/moz.build +index f528cb1108180..88fd9792acdf1 100644 +--- a/third_party/libwebrtc/moz.build ++++ b/third_party/libwebrtc/moz.build +@@ -692,3 +692,10 @@ if CONFIG["CPU_ARCH"] == "riscv64" and CONFIG["MOZ_X11"] == "1" and CONFIG["OS_T + "/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn", + "/third_party/libwebrtc/modules/desktop_capture/primitives_gn" + ] ++ ++if CONFIG["CPU_ARCH"] == "loongarch64" and CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": ++ ++ DIRS += [ ++ "/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn", ++ "/third_party/libwebrtc/modules/desktop_capture/primitives_gn" ++ ] +diff --git a/toolkit/moz.configure b/toolkit/moz.configure +index 67fc08237bba4..f7252539c7eaa 100644 +--- a/toolkit/moz.configure ++++ b/toolkit/moz.configure +@@ -1328,6 +1328,7 @@ def webrtc_default(target): + "ppc", + "ppc64", + "riscv64", ++ "loongarch64", + ) + + return os_match and cpu_match and target.endianness == "little" +-- +2.43.0 + diff --git a/firefox-developer-edition/0005-Fix-libyuv-build-with-LSX-LASX.patch b/firefox-developer-edition/0005-Fix-libyuv-build-with-LSX-LASX.patch new file mode 100644 index 0000000000..f69d1ab983 --- /dev/null +++ b/firefox-developer-edition/0005-Fix-libyuv-build-with-LSX-LASX.patch @@ -0,0 +1,398 @@ +From 7a3c2cbce2b6cf951c94850596dac20b5c3a98dc Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Sun, 31 Dec 2023 13:16:33 +0800 +Subject: [PATCH 5/5] Fix libyuv build with LSX & LASX + +This is not of upstream quality, and will not be upstreamed as-is. +This is only meant as a quick-and-dirty build fix for LoongArch early +adopters. +--- + media/libyuv/libyuv/BUILD.gn | 37 +++++++++++++++++++++ + media/libyuv/libyuv/libyuv.gni | 2 ++ + media/libyuv/libyuv/libyuv.gypi | 5 +++ + media/libyuv/libyuv/source/row_lasx.cc | 46 ++++++++++++++++---------- + media/libyuv/libyuv/source/row_lsx.cc | 30 +++++++++++------ + 5 files changed, 92 insertions(+), 28 deletions(-) + +diff --git a/media/libyuv/libyuv/BUILD.gn b/media/libyuv/libyuv/BUILD.gn +index a72ff06558000..7d70848be9f1a 100644 +--- a/media/libyuv/libyuv/BUILD.gn ++++ b/media/libyuv/libyuv/BUILD.gn +@@ -69,6 +69,14 @@ group("libyuv") { + deps += [ ":libyuv_msa" ] + } + ++ if (libyuv_use_lsx) { ++ deps += [ ":libyuv_lsx" ] ++ } ++ ++ if (libyuv_use_lasx) { ++ deps += [ ":libyuv_lasx" ] ++ } ++ + if (!is_ios && !libyuv_disable_jpeg) { + # Make sure that clients of libyuv link with libjpeg. This can't go in + # libyuv_internal because in Windows x64 builds that will generate a clang +@@ -90,6 +98,7 @@ static_library("libyuv_internal") { + "include/libyuv/convert_from.h", + "include/libyuv/convert_from_argb.h", + "include/libyuv/cpu_id.h", ++ "include/libyuv/loongson_intrinsics.h", + "include/libyuv/mjpeg_decoder.h", + "include/libyuv/planar_functions.h", + "include/libyuv/rotate.h", +@@ -229,6 +238,34 @@ if (libyuv_use_msa) { + } + } + ++if (libyuv_use_lsx) { ++ static_library("libyuv_lsx") { ++ sources = [ ++ # LSX Source Files ++ "source/rotate_lsx.cc", ++ "source/row_lsx.cc", ++ "source/scale_lsx.cc", ++ ] ++ ++ deps = [ ":libyuv_internal" ] ++ ++ public_configs = [ ":libyuv_config" ] ++ } ++} ++ ++if (libyuv_use_lasx) { ++ static_library("libyuv_lasx") { ++ sources = [ ++ # LASX Source Files ++ "source/row_lasx.cc", ++ ] ++ ++ deps = [ ":libyuv_internal" ] ++ ++ public_configs = [ ":libyuv_config" ] ++ } ++} ++ + if (libyuv_include_tests) { + config("libyuv_unittest_warnings_config") { + if (!is_win) { +diff --git a/media/libyuv/libyuv/libyuv.gni b/media/libyuv/libyuv/libyuv.gni +index 852f08ca9d61f..ecad693508811 100644 +--- a/media/libyuv/libyuv/libyuv.gni ++++ b/media/libyuv/libyuv/libyuv.gni +@@ -20,4 +20,6 @@ declare_args() { + (current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_msa + libyuv_use_mmi = + (current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_mmi ++ libyuv_use_lsx = current_cpu == "loong64" || current_cpu == "loongarch64" ++ libyuv_use_lasx = current_cpu == "loong64" || current_cpu == "loongarch64" + } +diff --git a/media/libyuv/libyuv/libyuv.gypi b/media/libyuv/libyuv/libyuv.gypi +index 48936aa7b0239..9c19abf9c34c9 100644 +--- a/media/libyuv/libyuv/libyuv.gypi ++++ b/media/libyuv/libyuv/libyuv.gypi +@@ -18,6 +18,7 @@ + 'include/libyuv/convert_from.h', + 'include/libyuv/convert_from_argb.h', + 'include/libyuv/cpu_id.h', ++ 'include/libyuv/loongson_intrinsics.h', + 'include/libyuv/macros_msa.h', + 'include/libyuv/mjpeg_decoder.h', + 'include/libyuv/planar_functions.h', +@@ -57,6 +58,7 @@ + 'source/rotate_argb.cc', + 'source/rotate_common.cc', + 'source/rotate_gcc.cc', ++ 'source/rotate_lsx.cc', + 'source/rotate_msa.cc', + 'source/rotate_neon.cc', + 'source/rotate_neon64.cc', +@@ -64,6 +66,8 @@ + 'source/row_any.cc', + 'source/row_common.cc', + 'source/row_gcc.cc', ++ 'source/row_lasx.cc', ++ 'source/row_lsx.cc', + 'source/row_msa.cc', + 'source/row_neon.cc', + 'source/row_neon64.cc', +@@ -73,6 +77,7 @@ + 'source/scale_argb.cc', + 'source/scale_common.cc', + 'source/scale_gcc.cc', ++ 'source/scale_lsx.cc', + 'source/scale_msa.cc', + 'source/scale_neon.cc', + 'source/scale_neon64.cc', +diff --git a/media/libyuv/libyuv/source/row_lasx.cc b/media/libyuv/libyuv/source/row_lasx.cc +index 29ac9254d9924..8c325483b116a 100644 +--- a/media/libyuv/libyuv/source/row_lasx.cc ++++ b/media/libyuv/libyuv/source/row_lasx.cc +@@ -543,8 +543,8 @@ void I422ToARGB4444Row_LASX(const uint8_t* src_y, + __m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; + __m256i vec_ubvr, vec_ugvg; + __m256i const_0x80 = __lasx_xvldi(0x80); +- __m256i alpha = {0xF000F000F000F000, 0xF000F000F000F000, 0xF000F000F000F000, +- 0xF000F000F000F000}; ++ __m256i alpha = {static_cast(0xF000F000F000F000), static_cast(0xF000F000F000F000), static_cast(0xF000F000F000F000), ++ static_cast(0xF000F000F000F000)}; + __m256i mask = {0x00F000F000F000F0, 0x00F000F000F000F0, 0x00F000F000F000F0, + 0x00F000F000F000F0}; + +@@ -595,8 +595,8 @@ void I422ToARGB1555Row_LASX(const uint8_t* src_y, + __m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; + __m256i vec_ubvr, vec_ugvg; + __m256i const_0x80 = __lasx_xvldi(0x80); +- __m256i alpha = {0x8000800080008000, 0x8000800080008000, 0x8000800080008000, +- 0x8000800080008000}; ++ __m256i alpha = {static_cast(0x8000800080008000), static_cast(0x8000800080008000), static_cast(0x8000800080008000), ++ static_cast(0x8000800080008000)}; + + YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb); + vec_ubvr = __lasx_xvilvl_h(vec_ub, vec_vr); +@@ -799,8 +799,8 @@ void ARGBToUVRow_LASX(const uint8_t* src_argb0, + 0x0009000900090009, 0x0009000900090009}; + __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002, + 0x0000000700000003}; +- __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080, +- 0x8080808080808080, 0x8080808080808080}; ++ __m256i const_0x8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080), ++ static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64, +@@ -1037,8 +1037,8 @@ void ARGBToUV444Row_LASX(const uint8_t* src_argb, + __m256i const_38 = __lasx_xvldi(38); + __m256i const_94 = __lasx_xvldi(94); + __m256i const_18 = __lasx_xvldi(18); +- __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080, +- 0x8080808080808080, 0x8080808080808080}; ++ __m256i const_0x8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080), ++ static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002, + 0x0000000700000003}; + for (x = 0; x < len; x++) { +@@ -1609,8 +1609,8 @@ void ARGB1555ToUVRow_LASX(const uint8_t* src_argb1555, + __m256i const_38 = __lasx_xvldi(0x413); + __m256i const_94 = __lasx_xvldi(0x42F); + __m256i const_18 = __lasx_xvldi(0x409); +- __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, +- 0x8080808080808080, 0x8080808080808080}; ++ __m256i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080), ++ static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lasx_xvld, src_argb1555, 0, src_argb1555, 32, next_argb1555, 0, +@@ -1726,8 +1726,8 @@ void RGB565ToUVRow_LASX(const uint8_t* src_rgb565, + __m256i const_38 = __lasx_xvldi(0x413); + __m256i const_94 = __lasx_xvldi(0x42F); + __m256i const_18 = __lasx_xvldi(0x409); +- __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, +- 0x8080808080808080, 0x8080808080808080}; ++ __m256i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080), ++ static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lasx_xvld, src_rgb565, 0, src_rgb565, 32, next_rgb565, 0, +@@ -1793,8 +1793,8 @@ void RGB24ToUVRow_LASX(const uint8_t* src_rgb24, + __m256i const_38 = __lasx_xvldi(0x413); + __m256i const_94 = __lasx_xvldi(0x42F); + __m256i const_18 = __lasx_xvldi(0x409); +- __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, +- 0x8080808080808080, 0x8080808080808080}; ++ __m256i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080), ++ static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + __m256i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18, + 0x15120F0C09060300, 0x00000000001E1B18}; + __m256i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908, +@@ -1856,8 +1856,8 @@ void RAWToUVRow_LASX(const uint8_t* src_raw, + __m256i const_38 = __lasx_xvldi(0x413); + __m256i const_94 = __lasx_xvldi(0x42F); + __m256i const_18 = __lasx_xvldi(0x409); +- __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, +- 0x8080808080808080, 0x8080808080808080}; ++ __m256i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080), ++ static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + __m256i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18, + 0x15120F0C09060300, 0x00000000001E1B18}; + __m256i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908, +@@ -2000,11 +2000,13 @@ void NV21ToARGBRow_LASX(const uint8_t* src_y, + } + } + ++#ifndef RgbConstants + struct RgbConstants { + uint8_t kRGBToY[4]; + uint16_t kAddY; + uint16_t pad; + }; ++#define RgbConstants RgbConstants + + // RGB to JPeg coefficients + // B * 0.1140 coefficient = 29 +@@ -2030,6 +2032,7 @@ static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, + static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, + 0x1080, + 0}; ++#endif // RgbConstaints + + // ARGB expects first 3 values to contain RGB and 4th value is ignored. + static void ARGBToYMatrixRow_LASX(const uint8_t* src_argb, +@@ -2242,8 +2245,8 @@ void ARGBToUVJRow_LASX(const uint8_t* src_argb, + __m256i const_21 = __lasx_xvldi(0x415); + __m256i const_53 = __lasx_xvldi(0x435); + __m256i const_10 = __lasx_xvldi(0x40A); +- __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, +- 0x8080808080808080, 0x8080808080808080}; ++ __m256i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080), ++ static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + __m256i shuff = {0x1614060412100200, 0x1E1C0E0C1A180A08, 0x1715070513110301, + 0x1F1D0F0D1B190B09}; + +@@ -2296,6 +2299,13 @@ void ARGBToUVJRow_LASX(const uint8_t* src_argb, + } + } + ++// undef for unified sources build ++#undef YUVTORGB_SETUP ++#undef YUVTORGB ++#undef I444TORGB ++#undef STOREARGB ++#undef RGBTOUV ++ + #ifdef __cplusplus + } // extern "C" + } // namespace libyuv +diff --git a/media/libyuv/libyuv/source/row_lsx.cc b/media/libyuv/libyuv/source/row_lsx.cc +index 9c1e16f22e02d..91221ff03ca29 100644 +--- a/media/libyuv/libyuv/source/row_lsx.cc ++++ b/media/libyuv/libyuv/source/row_lsx.cc +@@ -407,7 +407,7 @@ void ARGB1555ToUVRow_LSX(const uint8_t* src_argb1555, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_argb1555, 0, src_argb1555, 16, next_argb1555, 0, +@@ -516,7 +516,7 @@ void RGB565ToUVRow_LSX(const uint8_t* src_rgb565, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_rgb565, 0, src_rgb565, 16, next_rgb565, 0, +@@ -577,7 +577,7 @@ void RGB24ToUVRow_LSX(const uint8_t* src_rgb24, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + __m128i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18}; + __m128i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908}; + __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19}; +@@ -630,7 +630,7 @@ void RAWToUVRow_LSX(const uint8_t* src_raw, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + __m128i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18}; + __m128i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908}; + __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19}; +@@ -865,7 +865,7 @@ void BGRAToUVRow_LSX(const uint8_t* src_bgra, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_bgra, 0, src_bgra, 16, src_bgra, 32, src_bgra, 48, +@@ -913,7 +913,7 @@ void ABGRToUVRow_LSX(const uint8_t* src_abgr, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_abgr, 0, src_abgr, 16, src_abgr, 32, src_abgr, 48, +@@ -961,7 +961,7 @@ void RGBAToUVRow_LSX(const uint8_t* src_rgba, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_rgba, 0, src_rgba, 16, src_rgba, 32, src_rgba, 48, +@@ -1010,7 +1010,7 @@ void ARGBToUVJRow_LSX(const uint8_t* src_argb, + __m128i const_21 = __lsx_vldi(0x415); + __m128i const_53 = __lsx_vldi(0x435); + __m128i const_10 = __lsx_vldi(0x40A); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48, +@@ -1388,7 +1388,7 @@ void ARGBBlendRow_LSX(const uint8_t* src_argb, + __m128i const_256 = __lsx_vldi(0x500); + __m128i zero = __lsx_vldi(0); + __m128i alpha = __lsx_vldi(0xFF); +- __m128i control = {0xFF000000FF000000, 0xFF000000FF000000}; ++ __m128i control = {static_cast(0xFF000000FF000000), static_cast(0xFF000000FF000000)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb1, 0, src_argb1, 16, +@@ -1434,7 +1434,7 @@ void ARGBQuantizeRow_LSX(uint8_t* dst_argb, + __m128i vec_offset = __lsx_vreplgr2vr_b(interval_offset); + __m128i vec_scale = __lsx_vreplgr2vr_w(scale); + __m128i zero = __lsx_vldi(0); +- __m128i control = {0xFF000000FF000000, 0xFF000000FF000000}; ++ __m128i control = {static_cast(0xFF000000FF000000), static_cast(0xFF000000FF000000)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, dst_argb, 0, dst_argb, 16, dst_argb, 32, dst_argb, 48, +@@ -1643,11 +1643,13 @@ void HalfFloatRow_LSX(const uint16_t* src, + } + } + ++#ifndef RgbConstants + struct RgbConstants { + uint8_t kRGBToY[4]; + uint16_t kAddY; + uint16_t pad; + }; ++#define RgbConstants RgbConstants + + // RGB to JPeg coefficients + // B * 0.1140 coefficient = 29 +@@ -1673,6 +1675,7 @@ static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, + static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, + 0x1080, + 0}; ++#endif // RgbConstaints + + // ARGB expects first 3 values to contain RGB and 4th value is ignored. + static void ARGBToYMatrixRow_LSX(const uint8_t* src_argb, +@@ -1853,6 +1856,13 @@ void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width) { + RGBToYMatrixRow_LSX(src_raw, dst_y, width, &kRawI601Constants); + } + ++// undef for unified sources build ++#undef YUVTORGB_SETUP ++#undef YUVTORGB ++#undef I444TORGB ++#undef STOREARGB ++#undef RGBTOUV ++ + #ifdef __cplusplus + } // extern "C" + } // namespace libyuv +-- +2.43.0 + diff --git a/firefox-developer-edition/PKGBUILD b/firefox-developer-edition/PKGBUILD index 55cbe586ae..8873766e38 100644 --- a/firefox-developer-edition/PKGBUILD +++ b/firefox-developer-edition/PKGBUILD @@ -60,6 +60,11 @@ source=( $pkgname.desktop identity-icons-brand.svg firefox-install-dir.patch + 0001-Add-support-for-LoongArch64.patch + 0002-Enable-VA-API-support-for-AMD-GPUs.patch + 0003-Remove-architectural-limit-on-VA-API-support.patch + 0004-Enable-WebRTC-for-LoongArch.patch + 0005-Fix-libyuv-build-with-LSX-LASX.patch ) validpgpkeys=( # Mozilla Software Releases @@ -75,7 +80,12 @@ b2sums=('4eeb4ea242b9187abafb8e580f2038747bc2962230fa598a4de0f25f999ab378d92fc61 'SKIP' 'd2d14042a03ffcc5ed9212fca9cc167e8bfb2ba3f0d61a89441e033484cb914424d0f2544e0f1bc58992fee9cae03a73679352ee0fac9777fa5633ddc8d76e7d' '63a8dd9d8910f9efb353bed452d8b4b2a2da435857ccee083fc0c557f8c4c1339ca593b463db320f70387a1b63f1a79e709e9d12c69520993e26d85a3d742e34' - 'eb61793257458b20bc7ab5598240bc1901666d7fb7e971941af99ac706d387859642d8fba3130fa31789546b9123c7500edbe6373701ce9fc7d65aef3974c90c') + 'eb61793257458b20bc7ab5598240bc1901666d7fb7e971941af99ac706d387859642d8fba3130fa31789546b9123c7500edbe6373701ce9fc7d65aef3974c90c' + 'cab0bf0922520866aa9fddf9142512a0ff30437ab779dc4e266b278aea363d4077db5edad11ac30190ee69cc321ec6a7ab3eea8003982faeac991389417af7e9' + '19dc9f0e2aa13be99f7226dbf1e80eee67bbce3ac3ed8256894158565d62324589bc075df402bc43f5d597cc2de60bed6d68b58e20e9caa1f34776f680fe45b5' + 'bb2658edb90dc022df36d89206789ef30222ff1b26376b61b2340d421738fe240063bb7113c9deb828f00e8e297bda8b87e2da9d26796aa7fd4d48bef1aa0719' + '65e9739926174b3eecb8e01cae9805861f6a7c2cadf9faef7cda92acbef3a569a31b2e8c0f4f8ab726416a60e601e7a2a4eb1fc6cb74dfd2cb4ab1572c1fdd97' + 'e2fc795f224f34d14fab2655235a7e31b5fdbe84937ded697f6b1ccd1751bfb3a05ae6b46846201e201487d02a987322f3fc36cdd3208d4904c844caf3f2d628') # Google API keys (see http://www.chromium.org/developers/how-tos/api-keys) # Note: These are for Arch Linux use ONLY. For your own distribution, please @@ -95,6 +105,11 @@ prepare() { # Change install dir from 'firefox' to 'firefox-developer-edition' patch -Np1 -i ../firefox-install-dir.patch + patch -Np1 -i ../0001-Add-support-for-LoongArch64.patch + patch -Np1 -i ../0002-Enable-VA-API-support-for-AMD-GPUs.patch + patch -Np1 -i ../0003-Remove-architectural-limit-on-VA-API-support.patch + patch -Np1 -i ../0004-Enable-WebRTC-for-LoongArch.patch + patch -Np1 -i ../0005-Fix-libyuv-build-with-LSX-LASX.patch echo -n "$_google_api_key" >google-api-key echo -n "$_mozilla_api_key" >mozilla-api-key @@ -108,11 +123,11 @@ ac_add_options --enable-release ac_add_options --enable-hardening ac_add_options --enable-optimize ac_add_options --enable-rust-simd -ac_add_options --enable-linker=lld +#ac_add_options --enable-linker=lld ac_add_options --disable-install-strip -ac_add_options --disable-elf-hack +#ac_add_options --disable-elf-hack ac_add_options --disable-bootstrap -ac_add_options --with-wasi-sysroot=/usr/share/wasi-sysroot +ac_add_options --without-wasm-sandboxed-libraries # Branding ac_add_options --with-branding=browser/branding/aurora @@ -136,7 +151,7 @@ ac_add_options --with-system-nss # Features ac_add_options --enable-alsa ac_add_options --enable-jack -ac_add_options --enable-crashreporter +ac_add_options --disable-crashreporter ac_add_options --disable-updater ac_add_options --disable-tests END @@ -149,6 +164,11 @@ build() { export MOZBUILD_STATE_PATH="$srcdir/mozbuild" export MOZ_BUILD_DATE="$(date -u${SOURCE_DATE_EPOCH:+d @$SOURCE_DATE_EPOCH} +%Y%m%d%H%M%S)" export MOZ_NOSPAM=1 +# clang didn't support -mlsx + CFLAGS=${CFLAGS/-mlsx /} + CXXFLAGS=${CXXFLAGS/-mlsx /} + CFLAGS=${CFLAGS/-fstack-clash-protection/} + CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/} # malloc_usable_size is used in various parts of the codebase CFLAGS="${CFLAGS/_FORTIFY_SOURCE=3/_FORTIFY_SOURCE=2}" @@ -159,35 +179,14 @@ build() { # Do 3-tier PGO echo "Building instrumented browser..." - cat >.mozconfig ../mozconfig - <.mozconfig ../mozconfig - <.mozconfig ../mozconfig - < +Date: Sun, 22 Oct 2023 22:13:17 -0700 +Subject: [PATCH 1/5] Add support for LoongArch64 + +Adapted from LoongArchLinux. Rebased to Firefox 118.0.2. + +Co-Authored-By: loongson +Co-Authored-By: WANG Xuerui +--- + third_party/libwebrtc/build/build_config.h | 4 ++++ + third_party/rust/nix/.cargo-checksum.json | 2 +- + third_party/rust/nix/src/sys/ioctl/linux.rs | 1 + + toolkit/components/telemetry/pingsender/pingsender.cpp | 1 + + toolkit/moz.configure | 2 +- + 5 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/third_party/libwebrtc/build/build_config.h b/third_party/libwebrtc/build/build_config.h +index c39ae9da50f99..28191de02654b 100644 +--- a/third_party/libwebrtc/build/build_config.h ++++ b/third_party/libwebrtc/build/build_config.h +@@ -210,6 +210,10 @@ + #define ARCH_CPU_SPARC 1 + #define ARCH_CPU_32_BITS 1 + #define ARCH_CPU_BIG_ENDIAN 1 ++#elif defined(__loongarch_lp64) ++#define ARCH_CPU_LOONGARCH64 1 ++#define ARCH_CPU_64_BITS 1 ++#define ARCH_CPU_LITTLE_ENDIAN 1 + #else + #error Please add support for your architecture in build/build_config.h + #endif +diff --git a/third_party/rust/nix/.cargo-checksum.json b/third_party/rust/nix/.cargo-checksum.json +index f4c932b88926b..b7b9c9f3c9a89 100644 +--- a/third_party/rust/nix/.cargo-checksum.json ++++ b/third_party/rust/nix/.cargo-checksum.json +@@ -1 +1 @@ +-{"files":{"CHANGELOG.md":"8ee4e556e53d1b39400a48675d3ecff0bf27e419accab7ca3be76ab934289548","Cargo.toml":"2e6eff9170182f107188b8bc9802efd044ef47178afc7f138950ecff1c1ceb96","LICENSE":"66e3ee1fa7f909ad3c612d556f2a0cdabcd809ad6e66f3b0605015ac64841b70","README.md":"1ed9a0e26ae6e575b3262ae734dd02889455593b761ee62403ea5a64104f3c9c","src/dir.rs":"0280a2dc480bd913f24ed84fbe26569fa2e8eefa660e5ad7c21e05fc34c14d16","src/env.rs":"028bc5e20139ebba418a655a2978a53335dc7680bf1de43d2c8333dd72cfa5c4","src/errno.rs":"e55d075858e349d9afea9ce0480f7fb7ba4dccccf0694fd7b3280b918836203c","src/fcntl.rs":"ea8f43d8fec0b6c3b7d903333e4c1ce85611684a4afd561c55cfe4b61a979e94","src/features.rs":"5b4a0831e5f4b79a6f0e42ed052fd66c875da18959750be51e41fb59ac19feed","src/ifaddrs.rs":"377865eb48040d28c392a1aec0221320108e3392ea285d23405ae2cfa5c54b20","src/kmod.rs":"c818ced08d55ae36fdf82fa914ba856b688e37234d574d3faa37128211d512fb","src/lib.rs":"a62fac2ba7111157c5b64251f67f8a189f04bd587d5c80703454a596ea7ae5d9","src/macros.rs":"e23d7d8be22ef0bf9febaaf2739585453103607c0139bd3995a324e4a16d011e","src/mount/bsd.rs":"4cf35606a63d7ca41caac3b38f01e2b70c63e71978c0529f19fc79182629dbe0","src/mount/linux.rs":"6e5d61788dedf1ca4416c6c6a3a9c6c747f9352c26d863f4a1d4142e288584d6","src/mount/mod.rs":"ba9f60eb831224ab73bdd87e00e15d13b9ce9efb70b18bf8f3fe60406d522b3e","src/mqueue.rs":"ed0a189036b2437b5f7f7f1312fa545540b06ca72171b451d8bce42cc3627534","src/net/if_.rs":"b32a8a1f952de60d95e549779a5c673fd72aa665e86bfdfc8ec6badf3016b9b1","src/net/mod.rs":"577f70170e53d4a6de1abb70bf8f1031ec3e65c0e63ef5fcf05c907125e7ac17","src/poll.rs":"2fc1d144fb40db51811c6357b520ab7993529702d8f0d8060c903118ff4f7259","src/pty.rs":"27b4f76c23acf02542674017067fee74cdcac907338458700a1aa4d6f6a62e27","src/sched.rs":"403aa5ebed81910263d42a94717612b737550bf053227b7d90f1c8949188d919","src/sys/aio.rs":"ae091de8540c97da374a39e7d154c1b3ce50f41e6fc20a45c6b06eb838e74366","src/sys/epoll.rs":"28e22debf474d1b047e8044a00b354c25dab2fa125960f9f2f14cc34289fd5c9","src/sys/event.rs":"dbd8e84bccb813839295b0a336485783ef19548d2317931f0ceb5ee62f839a40","src/sys/eventfd.rs":"c8db8f5874726fdad289ad5e2603a7d71a1ae5a899dcde3a35d3edff8b498b7e","src/sys/inotify.rs":"5b4da774313afa9c28c3f92f9d07dce9bf4c8d044fd6a16f19480e79a19e808b","src/sys/ioctl/bsd.rs":"bbd02e30b0a78c1cb22777d9b00cfcbba9c68505cffc06118ac68474cf6fea39","src/sys/ioctl/linux.rs":"028181834d119b834bf399f2b8a6176cc57e75144693f28f32059d087d8c8018","src/sys/ioctl/mod.rs":"89b20579476b2e0254e0ecb1b41830cccd7027a22cbdb816a9d4ec3924842ac1","src/sys/memfd.rs":"f58d7fbe67c4b994832d72f5fbd59c136c8f1ae88ea8b0bc1c099db2d847ee6c","src/sys/mman.rs":"17df1bc34ba92bdd6bad1e11e4ef139998117f6c468c8f560421858f3cc899a5","src/sys/mod.rs":"baabf649f758ad4acce849ec1795dd4e4f9c6539e677bad5fa777300a4871dcb","src/sys/personality.rs":"aa89760c023bfec3fca5d8636f9eac9d337f5547933793ce6df7a0de97ae6ee1","src/sys/pthread.rs":"258cdf7ff0b61a4afa6d228109e4cb4fb88d859bb8dfe6c959d95130fb010906","src/sys/ptrace/bsd.rs":"4c590d8f023ff52f396f8b6f2150c08e5c9486d3088d9c173db33a70d616b800","src/sys/ptrace/linux.rs":"c82db3fb18aa97755f9ccb440a957cd46d664968a94045830c5d74d2d53bc19f","src/sys/ptrace/mod.rs":"e9e5d970097f5eafffba900959d4fdbf233bff9ed7f599fc9896bb44d86a57a4","src/sys/quota.rs":"02e698a25f0986fb43aa88689f3d3d8b9edc6ae48496ad02f7214fccaa493e00","src/sys/reboot.rs":"eacdf57694a6629fb05787e16450446102a62818274495f2ad4e445807d09221","src/sys/resource.rs":"d498d0c00fd30e35e1269a8902cb812014d813f63ec95364f8f59f1912ba5657","src/sys/select.rs":"65c39b129d3cc85b8ca026ff26dcf80c5639824f43715881c3c1bbb6bf0c8a60","src/sys/sendfile.rs":"7a62099f9771fecff49b9c11210341e3c1a4acf22f8dfb96d395e29421648676","src/sys/signal.rs":"c3e13a2edea54d190a4b051f62efc97953c00b5051a9fda0e39e3bc732a31939","src/sys/signalfd.rs":"583524434fd37143be3db37fa6f6cbd339f7946416f05b58a95e246947e5cc9d","src/sys/socket/addr.rs":"84df895052f59ec84774b189ffb285d2a37a9703af6c8310ae5040cca1a2583e","src/sys/socket/mod.rs":"6deb55438cad3606385303f036b0efd842dfd759fba93611911f5a4f2613c9dc","src/sys/socket/sockopt.rs":"ed1f920364bfe88bbe6eaeeefb27a63bfcdd7d67604aca2f03e22f2b502df55a","src/sys/stat.rs":"337dea8d55d6177dc85b3235b40b8a3e81af7f4a6e2806a0b2f730bec5424350","src/sys/statfs.rs":"17103659a85279bac046c69cb3b22bf2c11c2492cffb0edfa4c3b233d161a2f2","src/sys/statvfs.rs":"f81e3900ef90d62e7eceaf1b6ff8dcfd965466714c033eb4717687f692171f48","src/sys/sysinfo.rs":"b4519b1ca091c9dbe94d2a6fd6304944bf3df5626973d2c6884022559706f0d9","src/sys/termios.rs":"7923f9846a8122096b6b1cd240d3618b876ce500a751ac434954d172e2e85745","src/sys/time.rs":"9026033b60a5ccc95b70424aef043c8c748722e2ea8c7c86366ecd4585b651a0","src/sys/timer.rs":"8c10f0e7cfac857ad00460be30bc68b957909cc9296e70718d3b5d4a0babafde","src/sys/timerfd.rs":"ef7c48aefdcfac13316eeddbef5da04cf12e9f574b8d9f43402c02b6b8db86b3","src/sys/uio.rs":"e1d59ccbee9d46c65d3aa8c36aa3a3222539beea0d20163a8b707d08fca14e09","src/sys/utsname.rs":"0cdda0cc111caaa0e4ebe2d4588bdc825d878e5bcb7a9136073b15f87a20e11f","src/sys/wait.rs":"cc70d2d9b880ff6c48577a479c209af6127067bc013a90ee22538e4dfad7d2b4","src/time.rs":"d4e0872361a57810837f5bd790cbca3a2b9db1ac4694a3c52d1564ad3532d3be","src/ucontext.rs":"b8f2e04757a9c2bc38c3b1e259d3a013da8a730fe9bfbe5487637395681b43d3","src/unistd.rs":"e19be456124731c5b93aef92ed72a7c4c9092e28db0649814ba3fcc1f0d620fa","test/common/mod.rs":"1d7e28e3635754664cd056f3a1079232ff5c118df619e1d0551a9972eb0b3cd6","test/sys/mod.rs":"87b2891d83067ff21f72b8ff7fde3019dc45b6877282ac278b6da151de45c7a7","test/sys/test_aio.rs":"4dac9f716f852f1f438f78d6e64bf041e6fd316bf15dcb27afffaf0894bdefa6","test/sys/test_aio_drop.rs":"614070155fa16a979b7341d001639c5ce24a1d6f632c3abce45a5a6d49c4039b","test/sys/test_epoll.rs":"ffe95e36c79e37426ef8e8ca3b137b7f35ea0333ce666a20a4b7878db17680e9","test/sys/test_inotify.rs":"a141b9a995892547b51ceeb6761a70a6b86d37e8f38d13ea2c497b81b4b0f49f","test/sys/test_ioctl.rs":"00ccc5afb665e533a0a4b6d6a6be438bcaea19fce335390feef4e91d17b3036c","test/sys/test_mman.rs":"2b4161964c9204b74659028b0f89a88f4e3bcc9886137a3039737cd91d2698cb","test/sys/test_pthread.rs":"ace36a2f5587f1874854281b4fd84e4e4d892a1e3c5cc38ced57975739522ad6","test/sys/test_ptrace.rs":"0385eebc8b1b8c72f655b745769decd9143ad83018198375982da0896310456b","test/sys/test_select.rs":"54cea1c34ad28d5770a613c1c3cbc3b1064b22037ec2b9d3fcd422d3be9e60a7","test/sys/test_signal.rs":"acc9941227bd3e2afad323613c2b8c83902ed0486d3745fd72704f395924f1e4","test/sys/test_signalfd.rs":"0e1060143e2612c490bc3d0168d0bbb042ef55e3f1d91d2578b9e42e4310a14d","test/sys/test_socket.rs":"d2df1001f9a0b2dac0b88051a67c3868bb216e72e4da4eecd11c4448b9fa4b40","test/sys/test_sockopt.rs":"4465f22f718442f3f7b502e052dad02b93cebfa3b71fa55ff4f25fb02534acab","test/sys/test_stat.rs":"6630a28217fd708bb84cd4f7e7101836b74f2420f9888923fdab664ccc331c1d","test/sys/test_sysinfo.rs":"ffd49bc96375914a2c4a4a59730cae8072f85771e2c4a80d3403df38d967e272","test/sys/test_termios.rs":"e5bcef10c84bd7583d600d5601835bcb3cfc88781cb283ab0185bbef5faf4327","test/sys/test_timerfd.rs":"cfed3abf58118611d08f6985251a7739cff67108e11214222a1d2394a3a026ce","test/sys/test_uio.rs":"32656bd0a5699e4d019aa928edf104637937179782914a82d50d37226e84c421","test/sys/test_wait.rs":"6fd59fffeeb09ff620c359baefd062ba777598982b6cb001ccc07b6bc7605493","test/test.rs":"11f40b0718ddd1a150cb9e703d56d0b2a9462306505a2245ddf273a2011f48b5","test/test_clearenv.rs":"45ca548035b3c20ec87314715feaba2be973709a635d85b8cde46fd1d9f1ecd4","test/test_dir.rs":"ae3c11c58cb06da6557aa2a839c6653c54cd7724283fffe9df5a5d3feabdd89a","test/test_fcntl.rs":"71dcb87f7b04d78fc62937ba46cb7f0f1f2dbb330b63a996ea2e8ec9056b98a9","test/test_kmod/hello_mod/Makefile":"0219f7bce0603f97d997fb377ca071966c90333ecc665e78a54dfeb97a9c811b","test/test_kmod/hello_mod/hello.c":"bcac6b19c5bd807e1f3878c15e426acc85785a8ade9840c3bb4d068635c9188c","test/test_kmod/mod.rs":"b4ae25841c2f06f32de9f1acd8230eeccd7095721302ebe78ad454e4e4f9c783","test/test_mount.rs":"6dd242b6e23c9c39e1a75612bbea62573898818ab374c3c032c2cdb97033554d","test/test_mq.rs":"136071f24131aac0e65d5f29ac18e3806641dfae1164813f5570c0e3a6f70553","test/test_net.rs":"f2912327ebb2a3d37e6cff02a5ac3106cf889cc5c74404db4ef0034059ba26f1","test/test_nix_path.rs":"01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b","test/test_nmount.rs":"d6c112547bb80968170b5497cda4b6cbf69dabec6f51d494bd52298995ceff18","test/test_poll.rs":"3e0b8f0397ba080785c61a3bfc3d637bc87f324bc4e52b5f1bf3ca0d32dbc9fe","test/test_pty.rs":"b26238a0783746cb31880e11eebc1913149be999ce75fbc2d6677bdd1e2731b2","test/test_ptymaster_drop.rs":"ae63c815f5028ddc67d194e86559483018ab1816316bdb917f40cee9364fd8a5","test/test_resource.rs":"40aef790ab745cec31a4b333d2ca406b462aa9bdf4a6d3756371e498b8d51e9a","test/test_sched.rs":"c4579bd376fab8816e63b07fa9ace31dc08e63ebb7c855a2c450698090d1d1e8","test/test_sendfile.rs":"bb41b4f3621b518e397d3a5b5ad3c5dcef3fe506afe516eab7572fbab92b77e3","test/test_stat.rs":"c407ca47a5258750076d041afad2f6add4c3563be36628bde1c5b314f5d0765d","test/test_time.rs":"f7a21b1e279e60e84909d5dadda97ded66d3326b131fe317badf9af0a1b50335","test/test_timer.rs":"3ae20d364f075d2811f3ff94eda9886682cc21d8807656007d2464fe36d1e361","test/test_unistd.rs":"20a00be4fbe26302ea5fe50ce25b99265dc763db138663d6aa1d7ac729a1d292"},"package":"bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"} +\ No newline at end of file ++{"files":{"CHANGELOG.md":"8ee4e556e53d1b39400a48675d3ecff0bf27e419accab7ca3be76ab934289548","Cargo.toml":"2e6eff9170182f107188b8bc9802efd044ef47178afc7f138950ecff1c1ceb96","LICENSE":"66e3ee1fa7f909ad3c612d556f2a0cdabcd809ad6e66f3b0605015ac64841b70","README.md":"1ed9a0e26ae6e575b3262ae734dd02889455593b761ee62403ea5a64104f3c9c","src/dir.rs":"0280a2dc480bd913f24ed84fbe26569fa2e8eefa660e5ad7c21e05fc34c14d16","src/env.rs":"028bc5e20139ebba418a655a2978a53335dc7680bf1de43d2c8333dd72cfa5c4","src/errno.rs":"e55d075858e349d9afea9ce0480f7fb7ba4dccccf0694fd7b3280b918836203c","src/fcntl.rs":"ea8f43d8fec0b6c3b7d903333e4c1ce85611684a4afd561c55cfe4b61a979e94","src/features.rs":"5b4a0831e5f4b79a6f0e42ed052fd66c875da18959750be51e41fb59ac19feed","src/ifaddrs.rs":"377865eb48040d28c392a1aec0221320108e3392ea285d23405ae2cfa5c54b20","src/kmod.rs":"c818ced08d55ae36fdf82fa914ba856b688e37234d574d3faa37128211d512fb","src/lib.rs":"a62fac2ba7111157c5b64251f67f8a189f04bd587d5c80703454a596ea7ae5d9","src/macros.rs":"e23d7d8be22ef0bf9febaaf2739585453103607c0139bd3995a324e4a16d011e","src/mount/bsd.rs":"4cf35606a63d7ca41caac3b38f01e2b70c63e71978c0529f19fc79182629dbe0","src/mount/linux.rs":"6e5d61788dedf1ca4416c6c6a3a9c6c747f9352c26d863f4a1d4142e288584d6","src/mount/mod.rs":"ba9f60eb831224ab73bdd87e00e15d13b9ce9efb70b18bf8f3fe60406d522b3e","src/mqueue.rs":"ed0a189036b2437b5f7f7f1312fa545540b06ca72171b451d8bce42cc3627534","src/net/if_.rs":"b32a8a1f952de60d95e549779a5c673fd72aa665e86bfdfc8ec6badf3016b9b1","src/net/mod.rs":"577f70170e53d4a6de1abb70bf8f1031ec3e65c0e63ef5fcf05c907125e7ac17","src/poll.rs":"2fc1d144fb40db51811c6357b520ab7993529702d8f0d8060c903118ff4f7259","src/pty.rs":"27b4f76c23acf02542674017067fee74cdcac907338458700a1aa4d6f6a62e27","src/sched.rs":"403aa5ebed81910263d42a94717612b737550bf053227b7d90f1c8949188d919","src/sys/aio.rs":"ae091de8540c97da374a39e7d154c1b3ce50f41e6fc20a45c6b06eb838e74366","src/sys/epoll.rs":"28e22debf474d1b047e8044a00b354c25dab2fa125960f9f2f14cc34289fd5c9","src/sys/event.rs":"dbd8e84bccb813839295b0a336485783ef19548d2317931f0ceb5ee62f839a40","src/sys/eventfd.rs":"c8db8f5874726fdad289ad5e2603a7d71a1ae5a899dcde3a35d3edff8b498b7e","src/sys/inotify.rs":"5b4da774313afa9c28c3f92f9d07dce9bf4c8d044fd6a16f19480e79a19e808b","src/sys/ioctl/bsd.rs":"bbd02e30b0a78c1cb22777d9b00cfcbba9c68505cffc06118ac68474cf6fea39","src/sys/ioctl/linux.rs":"54bad026ee637b73b95dad8135b6db61cae855670fd9323e7bf21acaff0827f4","src/sys/ioctl/mod.rs":"89b20579476b2e0254e0ecb1b41830cccd7027a22cbdb816a9d4ec3924842ac1","src/sys/memfd.rs":"f58d7fbe67c4b994832d72f5fbd59c136c8f1ae88ea8b0bc1c099db2d847ee6c","src/sys/mman.rs":"17df1bc34ba92bdd6bad1e11e4ef139998117f6c468c8f560421858f3cc899a5","src/sys/mod.rs":"baabf649f758ad4acce849ec1795dd4e4f9c6539e677bad5fa777300a4871dcb","src/sys/personality.rs":"aa89760c023bfec3fca5d8636f9eac9d337f5547933793ce6df7a0de97ae6ee1","src/sys/pthread.rs":"258cdf7ff0b61a4afa6d228109e4cb4fb88d859bb8dfe6c959d95130fb010906","src/sys/ptrace/bsd.rs":"4c590d8f023ff52f396f8b6f2150c08e5c9486d3088d9c173db33a70d616b800","src/sys/ptrace/linux.rs":"c82db3fb18aa97755f9ccb440a957cd46d664968a94045830c5d74d2d53bc19f","src/sys/ptrace/mod.rs":"e9e5d970097f5eafffba900959d4fdbf233bff9ed7f599fc9896bb44d86a57a4","src/sys/quota.rs":"02e698a25f0986fb43aa88689f3d3d8b9edc6ae48496ad02f7214fccaa493e00","src/sys/reboot.rs":"eacdf57694a6629fb05787e16450446102a62818274495f2ad4e445807d09221","src/sys/resource.rs":"d498d0c00fd30e35e1269a8902cb812014d813f63ec95364f8f59f1912ba5657","src/sys/select.rs":"65c39b129d3cc85b8ca026ff26dcf80c5639824f43715881c3c1bbb6bf0c8a60","src/sys/sendfile.rs":"7a62099f9771fecff49b9c11210341e3c1a4acf22f8dfb96d395e29421648676","src/sys/signal.rs":"c3e13a2edea54d190a4b051f62efc97953c00b5051a9fda0e39e3bc732a31939","src/sys/signalfd.rs":"583524434fd37143be3db37fa6f6cbd339f7946416f05b58a95e246947e5cc9d","src/sys/socket/addr.rs":"84df895052f59ec84774b189ffb285d2a37a9703af6c8310ae5040cca1a2583e","src/sys/socket/mod.rs":"6deb55438cad3606385303f036b0efd842dfd759fba93611911f5a4f2613c9dc","src/sys/socket/sockopt.rs":"ed1f920364bfe88bbe6eaeeefb27a63bfcdd7d67604aca2f03e22f2b502df55a","src/sys/stat.rs":"337dea8d55d6177dc85b3235b40b8a3e81af7f4a6e2806a0b2f730bec5424350","src/sys/statfs.rs":"17103659a85279bac046c69cb3b22bf2c11c2492cffb0edfa4c3b233d161a2f2","src/sys/statvfs.rs":"f81e3900ef90d62e7eceaf1b6ff8dcfd965466714c033eb4717687f692171f48","src/sys/sysinfo.rs":"b4519b1ca091c9dbe94d2a6fd6304944bf3df5626973d2c6884022559706f0d9","src/sys/termios.rs":"7923f9846a8122096b6b1cd240d3618b876ce500a751ac434954d172e2e85745","src/sys/time.rs":"9026033b60a5ccc95b70424aef043c8c748722e2ea8c7c86366ecd4585b651a0","src/sys/timer.rs":"8c10f0e7cfac857ad00460be30bc68b957909cc9296e70718d3b5d4a0babafde","src/sys/timerfd.rs":"ef7c48aefdcfac13316eeddbef5da04cf12e9f574b8d9f43402c02b6b8db86b3","src/sys/uio.rs":"e1d59ccbee9d46c65d3aa8c36aa3a3222539beea0d20163a8b707d08fca14e09","src/sys/utsname.rs":"0cdda0cc111caaa0e4ebe2d4588bdc825d878e5bcb7a9136073b15f87a20e11f","src/sys/wait.rs":"cc70d2d9b880ff6c48577a479c209af6127067bc013a90ee22538e4dfad7d2b4","src/time.rs":"d4e0872361a57810837f5bd790cbca3a2b9db1ac4694a3c52d1564ad3532d3be","src/ucontext.rs":"b8f2e04757a9c2bc38c3b1e259d3a013da8a730fe9bfbe5487637395681b43d3","src/unistd.rs":"e19be456124731c5b93aef92ed72a7c4c9092e28db0649814ba3fcc1f0d620fa","test/common/mod.rs":"1d7e28e3635754664cd056f3a1079232ff5c118df619e1d0551a9972eb0b3cd6","test/sys/mod.rs":"87b2891d83067ff21f72b8ff7fde3019dc45b6877282ac278b6da151de45c7a7","test/sys/test_aio.rs":"4dac9f716f852f1f438f78d6e64bf041e6fd316bf15dcb27afffaf0894bdefa6","test/sys/test_aio_drop.rs":"614070155fa16a979b7341d001639c5ce24a1d6f632c3abce45a5a6d49c4039b","test/sys/test_epoll.rs":"ffe95e36c79e37426ef8e8ca3b137b7f35ea0333ce666a20a4b7878db17680e9","test/sys/test_inotify.rs":"a141b9a995892547b51ceeb6761a70a6b86d37e8f38d13ea2c497b81b4b0f49f","test/sys/test_ioctl.rs":"00ccc5afb665e533a0a4b6d6a6be438bcaea19fce335390feef4e91d17b3036c","test/sys/test_mman.rs":"2b4161964c9204b74659028b0f89a88f4e3bcc9886137a3039737cd91d2698cb","test/sys/test_pthread.rs":"ace36a2f5587f1874854281b4fd84e4e4d892a1e3c5cc38ced57975739522ad6","test/sys/test_ptrace.rs":"0385eebc8b1b8c72f655b745769decd9143ad83018198375982da0896310456b","test/sys/test_select.rs":"54cea1c34ad28d5770a613c1c3cbc3b1064b22037ec2b9d3fcd422d3be9e60a7","test/sys/test_signal.rs":"acc9941227bd3e2afad323613c2b8c83902ed0486d3745fd72704f395924f1e4","test/sys/test_signalfd.rs":"0e1060143e2612c490bc3d0168d0bbb042ef55e3f1d91d2578b9e42e4310a14d","test/sys/test_socket.rs":"d2df1001f9a0b2dac0b88051a67c3868bb216e72e4da4eecd11c4448b9fa4b40","test/sys/test_sockopt.rs":"4465f22f718442f3f7b502e052dad02b93cebfa3b71fa55ff4f25fb02534acab","test/sys/test_stat.rs":"6630a28217fd708bb84cd4f7e7101836b74f2420f9888923fdab664ccc331c1d","test/sys/test_sysinfo.rs":"ffd49bc96375914a2c4a4a59730cae8072f85771e2c4a80d3403df38d967e272","test/sys/test_termios.rs":"e5bcef10c84bd7583d600d5601835bcb3cfc88781cb283ab0185bbef5faf4327","test/sys/test_timerfd.rs":"cfed3abf58118611d08f6985251a7739cff67108e11214222a1d2394a3a026ce","test/sys/test_uio.rs":"32656bd0a5699e4d019aa928edf104637937179782914a82d50d37226e84c421","test/sys/test_wait.rs":"6fd59fffeeb09ff620c359baefd062ba777598982b6cb001ccc07b6bc7605493","test/test.rs":"11f40b0718ddd1a150cb9e703d56d0b2a9462306505a2245ddf273a2011f48b5","test/test_clearenv.rs":"45ca548035b3c20ec87314715feaba2be973709a635d85b8cde46fd1d9f1ecd4","test/test_dir.rs":"ae3c11c58cb06da6557aa2a839c6653c54cd7724283fffe9df5a5d3feabdd89a","test/test_fcntl.rs":"71dcb87f7b04d78fc62937ba46cb7f0f1f2dbb330b63a996ea2e8ec9056b98a9","test/test_kmod/hello_mod/Makefile":"0219f7bce0603f97d997fb377ca071966c90333ecc665e78a54dfeb97a9c811b","test/test_kmod/hello_mod/hello.c":"bcac6b19c5bd807e1f3878c15e426acc85785a8ade9840c3bb4d068635c9188c","test/test_kmod/mod.rs":"b4ae25841c2f06f32de9f1acd8230eeccd7095721302ebe78ad454e4e4f9c783","test/test_mount.rs":"6dd242b6e23c9c39e1a75612bbea62573898818ab374c3c032c2cdb97033554d","test/test_mq.rs":"136071f24131aac0e65d5f29ac18e3806641dfae1164813f5570c0e3a6f70553","test/test_net.rs":"f2912327ebb2a3d37e6cff02a5ac3106cf889cc5c74404db4ef0034059ba26f1","test/test_nix_path.rs":"01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b","test/test_nmount.rs":"d6c112547bb80968170b5497cda4b6cbf69dabec6f51d494bd52298995ceff18","test/test_poll.rs":"3e0b8f0397ba080785c61a3bfc3d637bc87f324bc4e52b5f1bf3ca0d32dbc9fe","test/test_pty.rs":"b26238a0783746cb31880e11eebc1913149be999ce75fbc2d6677bdd1e2731b2","test/test_ptymaster_drop.rs":"ae63c815f5028ddc67d194e86559483018ab1816316bdb917f40cee9364fd8a5","test/test_resource.rs":"40aef790ab745cec31a4b333d2ca406b462aa9bdf4a6d3756371e498b8d51e9a","test/test_sched.rs":"c4579bd376fab8816e63b07fa9ace31dc08e63ebb7c855a2c450698090d1d1e8","test/test_sendfile.rs":"bb41b4f3621b518e397d3a5b5ad3c5dcef3fe506afe516eab7572fbab92b77e3","test/test_stat.rs":"c407ca47a5258750076d041afad2f6add4c3563be36628bde1c5b314f5d0765d","test/test_time.rs":"f7a21b1e279e60e84909d5dadda97ded66d3326b131fe317badf9af0a1b50335","test/test_timer.rs":"3ae20d364f075d2811f3ff94eda9886682cc21d8807656007d2464fe36d1e361","test/test_unistd.rs":"20a00be4fbe26302ea5fe50ce25b99265dc763db138663d6aa1d7ac729a1d292"},"package":"bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"} +diff --git a/third_party/rust/nix/src/sys/ioctl/linux.rs b/third_party/rust/nix/src/sys/ioctl/linux.rs +index 0c0a2090538f8..214d9e8c60281 100644 +--- a/third_party/rust/nix/src/sys/ioctl/linux.rs ++++ b/third_party/rust/nix/src/sys/ioctl/linux.rs +@@ -41,6 +41,7 @@ mod consts { + target_arch = "s390x", + target_arch = "x86_64", + target_arch = "aarch64", ++ target_arch = "loongarch64", + target_arch = "riscv32", + target_arch = "riscv64" + ))] +diff --git a/toolkit/components/telemetry/pingsender/pingsender.cpp b/toolkit/components/telemetry/pingsender/pingsender.cpp +index 30f2907c720e1..e6645227a2949 100644 +--- a/toolkit/components/telemetry/pingsender/pingsender.cpp ++++ b/toolkit/components/telemetry/pingsender/pingsender.cpp +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + #include + + #include +diff --git a/toolkit/moz.configure b/toolkit/moz.configure +index 8b462ecde463f..a4aa84cc7c45e 100644 +--- a/toolkit/moz.configure ++++ b/toolkit/moz.configure +@@ -2432,7 +2432,7 @@ with only_when(compile_environment | artifact_builds): + use_nasm = False + elif target.cpu == "x86_64": + flags = ["-D__x86_64__", "-DPIC", "-DELF", "-Pconfig_unix64.asm"] +- elif target.cpu in ("x86", "arm", "aarch64"): ++ elif target.cpu in ("x86", "arm", "aarch64", "loongarch64"): + flac_only = True + else: + enable = False +-- +2.43.0 + diff --git a/firefox/0002-Enable-VA-API-support-for-AMD-GPUs.patch b/firefox/0002-Enable-VA-API-support-for-AMD-GPUs.patch new file mode 100644 index 0000000000..143927ffe1 --- /dev/null +++ b/firefox/0002-Enable-VA-API-support-for-AMD-GPUs.patch @@ -0,0 +1,31 @@ +From 9f3a0a22ba3c6ad1f14e90cfc2774b87215d7135 Mon Sep 17 00:00:00 2001 +From: Jiangjin Wang +Date: Tue, 14 Nov 2023 18:14:20 -0800 +Subject: [PATCH 2/5] Enable VA-API support for AMD GPUs + +--- + widget/gtk/GfxInfo.cpp | 8 -------- + 1 file changed, 8 deletions(-) + +diff --git a/widget/gtk/GfxInfo.cpp b/widget/gtk/GfxInfo.cpp +index b34e85baa28e5..8c95ce0d4274b 100644 +--- a/widget/gtk/GfxInfo.cpp ++++ b/widget/gtk/GfxInfo.cpp +@@ -1112,14 +1112,6 @@ const nsTArray& GfxInfo::GetGfxDriverInfo() { + nsIGfxInfo::FEATURE_BLOCKED_DEVICE, DRIVER_LESS_THAN, V(23, 1, 1, 0), + "FEATURE_HARDWARE_VIDEO_DECODING_AMD_DISABLE", "Mesa 23.1.1.0"); + +- // Disable on Release/late Beta on AMD +-#if !defined(EARLY_BETA_OR_EARLIER) +- APPEND_TO_DRIVER_BLOCKLIST(OperatingSystem::Linux, DeviceFamily::AtiAll, +- nsIGfxInfo::FEATURE_HARDWARE_VIDEO_DECODING, +- nsIGfxInfo::FEATURE_BLOCKED_DEVICE, +- DRIVER_COMPARISON_IGNORED, V(0, 0, 0, 0), +- "FEATURE_HARDWARE_VIDEO_DECODING_DISABLE", ""); +-#endif + //////////////////////////////////// + // FEATURE_HW_DECODED_VIDEO_ZERO_COPY - ALLOWLIST + APPEND_TO_DRIVER_BLOCKLIST2(OperatingSystem::Linux, DeviceFamily::All, +-- +2.43.0 + diff --git a/firefox/0003-Remove-architectural-limit-on-VA-API-support.patch b/firefox/0003-Remove-architectural-limit-on-VA-API-support.patch new file mode 100644 index 0000000000..aa45fa3e87 --- /dev/null +++ b/firefox/0003-Remove-architectural-limit-on-VA-API-support.patch @@ -0,0 +1,40 @@ +From b25c3742c98c87de9621eac8b672f9381e15c088 Mon Sep 17 00:00:00 2001 +From: Jiangjin Wang +Date: Tue, 14 Nov 2023 18:16:46 -0800 +Subject: [PATCH 3/5] Remove architectural limit on VA-API support + +--- + toolkit/moz.configure | 9 ++------- + 1 file changed, 2 insertions(+), 7 deletions(-) + +diff --git a/toolkit/moz.configure b/toolkit/moz.configure +index a4aa84cc7c45e..67fc08237bba4 100644 +--- a/toolkit/moz.configure ++++ b/toolkit/moz.configure +@@ -537,11 +537,8 @@ set_define("MOZ_WAYLAND", depends_if(wayland_headers)(lambda _: True)) + + # Hardware-accelerated video decode with VAAPI and V4L2 on Linux + # ============================================================== +-@depends(target, toolkit_gtk) +-def vaapi(target, toolkit_gtk): +- # VAAPI is mostly used on x86(-64) but is sometimes used on ARM/ARM64 SOCs. +- if target.cpu in ("arm", "aarch64", "x86", "x86_64") and toolkit_gtk: +- return True ++set_config("MOZ_ENABLE_VAAPI", True, when=toolkit_gtk) ++set_define("MOZ_ENABLE_VAAPI", True, when=toolkit_gtk) + + + @depends(target, toolkit_gtk) +@@ -552,9 +549,7 @@ def v4l2(target, toolkit_gtk): + return True + + +-set_config("MOZ_ENABLE_VAAPI", True, when=vaapi) + set_config("MOZ_ENABLE_V4L2", True, when=v4l2) +-set_define("MOZ_ENABLE_VAAPI", True, when=vaapi) + set_define("MOZ_ENABLE_V4L2", True, when=v4l2) + + +-- +2.43.0 + diff --git a/firefox/0004-Enable-WebRTC-for-LoongArch.patch b/firefox/0004-Enable-WebRTC-for-LoongArch.patch new file mode 100644 index 0000000000..3cd2fcf4f4 --- /dev/null +++ b/firefox/0004-Enable-WebRTC-for-LoongArch.patch @@ -0,0 +1,152 @@ +From 476458e2e0cafaa5fe5fbc6a99750dd920e7ba67 Mon Sep 17 00:00:00 2001 +From: Jiangjin Wang +Date: Tue, 21 Nov 2023 17:17:16 -0800 +Subject: [PATCH 4/5] Enable WebRTC for LoongArch + +--- + .../common_audio/common_audio_c_gn/moz.build | 8 ++++++ + .../spl_sqrt_floor_gn/moz.build | 6 ++++ + .../aecm/aecm_core_gn/moz.build | 6 ++++ + .../desktop_capture_gn/moz.build | 28 +++++++++++++++++++ + .../desktop_capture/primitives_gn/moz.build | 4 +++ + third_party/libwebrtc/moz.build | 7 +++++ + toolkit/moz.configure | 1 + + 7 files changed, 60 insertions(+) + +diff --git a/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build b/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build +index 60ee6cfc164be..1e69b2881ca90 100644 +--- a/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build ++++ b/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build +@@ -255,6 +255,14 @@ if CONFIG["CPU_ARCH"] == "riscv64": + "/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12.c" + ] + ++if CONFIG["CPU_ARCH"] == "loongarch64": ++ ++ UNIFIED_SOURCES += [ ++ "/third_party/libwebrtc/common_audio/signal_processing/complex_bit_reverse.c", ++ "/third_party/libwebrtc/common_audio/signal_processing/complex_fft.c", ++ "/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12.c" ++ ] ++ + if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True +diff --git a/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build b/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build +index d2d0287623b54..36ad6222b3dea 100644 +--- a/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build ++++ b/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build +@@ -174,6 +174,12 @@ if CONFIG["CPU_ARCH"] == "riscv64": + "/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c" + ] + ++if CONFIG["CPU_ARCH"] == "loongarch64": ++ ++ UNIFIED_SOURCES += [ ++ "/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c" ++ ] ++ + if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True +diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build +index 9874037197896..147e12653cbe2 100644 +--- a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build ++++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build +@@ -206,6 +206,12 @@ if CONFIG["CPU_ARCH"] == "riscv64": + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + ++if CONFIG["CPU_ARCH"] == "loongarch64": ++ ++ SOURCES += [ ++ "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" ++ ] ++ + if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True +diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build +index b0a5d1522da86..0efac49ac5dc3 100644 +--- a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build ++++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build +@@ -390,6 +390,34 @@ if CONFIG["CPU_ARCH"] == "riscv64": + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc" + ] + ++if CONFIG["CPU_ARCH"] == "loongarch64": ++ ++ DEFINES["USE_X11"] = "1" ++ DEFINES["WEBRTC_USE_X11"] = True ++ ++ OS_LIBS += [ ++ "X11", ++ "Xcomposite", ++ "Xdamage", ++ "Xext", ++ "Xfixes", ++ "Xrandr", ++ "Xrender" ++ ] ++ ++ UNIFIED_SOURCES += [ ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/mouse_cursor_monitor_x11.cc", ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/screen_capturer_x11.cc", ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/shared_x_display.cc", ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_capturer_x11.cc", ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_finder_x11.cc", ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/window_list_utils.cc", ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_atom_cache.cc", ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_error_trap.cc", ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_server_pixel_buffer.cc", ++ "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc" ++ ] ++ + if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True +diff --git a/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build b/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build +index 8edb2c2344870..e6cf9f56540f7 100644 +--- a/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build ++++ b/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build +@@ -148,6 +148,10 @@ if CONFIG["CPU_ARCH"] == "riscv64": + + DEFINES["USE_X11"] = "1" + ++if CONFIG["CPU_ARCH"] == "loongarch64": ++ ++ DEFINES["USE_X11"] = "1" ++ + if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True +diff --git a/third_party/libwebrtc/moz.build b/third_party/libwebrtc/moz.build +index f528cb1108180..88fd9792acdf1 100644 +--- a/third_party/libwebrtc/moz.build ++++ b/third_party/libwebrtc/moz.build +@@ -692,3 +692,10 @@ if CONFIG["CPU_ARCH"] == "riscv64" and CONFIG["MOZ_X11"] == "1" and CONFIG["OS_T + "/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn", + "/third_party/libwebrtc/modules/desktop_capture/primitives_gn" + ] ++ ++if CONFIG["CPU_ARCH"] == "loongarch64" and CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": ++ ++ DIRS += [ ++ "/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn", ++ "/third_party/libwebrtc/modules/desktop_capture/primitives_gn" ++ ] +diff --git a/toolkit/moz.configure b/toolkit/moz.configure +index 67fc08237bba4..f7252539c7eaa 100644 +--- a/toolkit/moz.configure ++++ b/toolkit/moz.configure +@@ -1328,6 +1328,7 @@ def webrtc_default(target): + "ppc", + "ppc64", + "riscv64", ++ "loongarch64", + ) + + return os_match and cpu_match and target.endianness == "little" +-- +2.43.0 + diff --git a/firefox/0005-Fix-libyuv-build-with-LSX-LASX.patch b/firefox/0005-Fix-libyuv-build-with-LSX-LASX.patch new file mode 100644 index 0000000000..f69d1ab983 --- /dev/null +++ b/firefox/0005-Fix-libyuv-build-with-LSX-LASX.patch @@ -0,0 +1,398 @@ +From 7a3c2cbce2b6cf951c94850596dac20b5c3a98dc Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Sun, 31 Dec 2023 13:16:33 +0800 +Subject: [PATCH 5/5] Fix libyuv build with LSX & LASX + +This is not of upstream quality, and will not be upstreamed as-is. +This is only meant as a quick-and-dirty build fix for LoongArch early +adopters. +--- + media/libyuv/libyuv/BUILD.gn | 37 +++++++++++++++++++++ + media/libyuv/libyuv/libyuv.gni | 2 ++ + media/libyuv/libyuv/libyuv.gypi | 5 +++ + media/libyuv/libyuv/source/row_lasx.cc | 46 ++++++++++++++++---------- + media/libyuv/libyuv/source/row_lsx.cc | 30 +++++++++++------ + 5 files changed, 92 insertions(+), 28 deletions(-) + +diff --git a/media/libyuv/libyuv/BUILD.gn b/media/libyuv/libyuv/BUILD.gn +index a72ff06558000..7d70848be9f1a 100644 +--- a/media/libyuv/libyuv/BUILD.gn ++++ b/media/libyuv/libyuv/BUILD.gn +@@ -69,6 +69,14 @@ group("libyuv") { + deps += [ ":libyuv_msa" ] + } + ++ if (libyuv_use_lsx) { ++ deps += [ ":libyuv_lsx" ] ++ } ++ ++ if (libyuv_use_lasx) { ++ deps += [ ":libyuv_lasx" ] ++ } ++ + if (!is_ios && !libyuv_disable_jpeg) { + # Make sure that clients of libyuv link with libjpeg. This can't go in + # libyuv_internal because in Windows x64 builds that will generate a clang +@@ -90,6 +98,7 @@ static_library("libyuv_internal") { + "include/libyuv/convert_from.h", + "include/libyuv/convert_from_argb.h", + "include/libyuv/cpu_id.h", ++ "include/libyuv/loongson_intrinsics.h", + "include/libyuv/mjpeg_decoder.h", + "include/libyuv/planar_functions.h", + "include/libyuv/rotate.h", +@@ -229,6 +238,34 @@ if (libyuv_use_msa) { + } + } + ++if (libyuv_use_lsx) { ++ static_library("libyuv_lsx") { ++ sources = [ ++ # LSX Source Files ++ "source/rotate_lsx.cc", ++ "source/row_lsx.cc", ++ "source/scale_lsx.cc", ++ ] ++ ++ deps = [ ":libyuv_internal" ] ++ ++ public_configs = [ ":libyuv_config" ] ++ } ++} ++ ++if (libyuv_use_lasx) { ++ static_library("libyuv_lasx") { ++ sources = [ ++ # LASX Source Files ++ "source/row_lasx.cc", ++ ] ++ ++ deps = [ ":libyuv_internal" ] ++ ++ public_configs = [ ":libyuv_config" ] ++ } ++} ++ + if (libyuv_include_tests) { + config("libyuv_unittest_warnings_config") { + if (!is_win) { +diff --git a/media/libyuv/libyuv/libyuv.gni b/media/libyuv/libyuv/libyuv.gni +index 852f08ca9d61f..ecad693508811 100644 +--- a/media/libyuv/libyuv/libyuv.gni ++++ b/media/libyuv/libyuv/libyuv.gni +@@ -20,4 +20,6 @@ declare_args() { + (current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_msa + libyuv_use_mmi = + (current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_mmi ++ libyuv_use_lsx = current_cpu == "loong64" || current_cpu == "loongarch64" ++ libyuv_use_lasx = current_cpu == "loong64" || current_cpu == "loongarch64" + } +diff --git a/media/libyuv/libyuv/libyuv.gypi b/media/libyuv/libyuv/libyuv.gypi +index 48936aa7b0239..9c19abf9c34c9 100644 +--- a/media/libyuv/libyuv/libyuv.gypi ++++ b/media/libyuv/libyuv/libyuv.gypi +@@ -18,6 +18,7 @@ + 'include/libyuv/convert_from.h', + 'include/libyuv/convert_from_argb.h', + 'include/libyuv/cpu_id.h', ++ 'include/libyuv/loongson_intrinsics.h', + 'include/libyuv/macros_msa.h', + 'include/libyuv/mjpeg_decoder.h', + 'include/libyuv/planar_functions.h', +@@ -57,6 +58,7 @@ + 'source/rotate_argb.cc', + 'source/rotate_common.cc', + 'source/rotate_gcc.cc', ++ 'source/rotate_lsx.cc', + 'source/rotate_msa.cc', + 'source/rotate_neon.cc', + 'source/rotate_neon64.cc', +@@ -64,6 +66,8 @@ + 'source/row_any.cc', + 'source/row_common.cc', + 'source/row_gcc.cc', ++ 'source/row_lasx.cc', ++ 'source/row_lsx.cc', + 'source/row_msa.cc', + 'source/row_neon.cc', + 'source/row_neon64.cc', +@@ -73,6 +77,7 @@ + 'source/scale_argb.cc', + 'source/scale_common.cc', + 'source/scale_gcc.cc', ++ 'source/scale_lsx.cc', + 'source/scale_msa.cc', + 'source/scale_neon.cc', + 'source/scale_neon64.cc', +diff --git a/media/libyuv/libyuv/source/row_lasx.cc b/media/libyuv/libyuv/source/row_lasx.cc +index 29ac9254d9924..8c325483b116a 100644 +--- a/media/libyuv/libyuv/source/row_lasx.cc ++++ b/media/libyuv/libyuv/source/row_lasx.cc +@@ -543,8 +543,8 @@ void I422ToARGB4444Row_LASX(const uint8_t* src_y, + __m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; + __m256i vec_ubvr, vec_ugvg; + __m256i const_0x80 = __lasx_xvldi(0x80); +- __m256i alpha = {0xF000F000F000F000, 0xF000F000F000F000, 0xF000F000F000F000, +- 0xF000F000F000F000}; ++ __m256i alpha = {static_cast(0xF000F000F000F000), static_cast(0xF000F000F000F000), static_cast(0xF000F000F000F000), ++ static_cast(0xF000F000F000F000)}; + __m256i mask = {0x00F000F000F000F0, 0x00F000F000F000F0, 0x00F000F000F000F0, + 0x00F000F000F000F0}; + +@@ -595,8 +595,8 @@ void I422ToARGB1555Row_LASX(const uint8_t* src_y, + __m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; + __m256i vec_ubvr, vec_ugvg; + __m256i const_0x80 = __lasx_xvldi(0x80); +- __m256i alpha = {0x8000800080008000, 0x8000800080008000, 0x8000800080008000, +- 0x8000800080008000}; ++ __m256i alpha = {static_cast(0x8000800080008000), static_cast(0x8000800080008000), static_cast(0x8000800080008000), ++ static_cast(0x8000800080008000)}; + + YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb); + vec_ubvr = __lasx_xvilvl_h(vec_ub, vec_vr); +@@ -799,8 +799,8 @@ void ARGBToUVRow_LASX(const uint8_t* src_argb0, + 0x0009000900090009, 0x0009000900090009}; + __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002, + 0x0000000700000003}; +- __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080, +- 0x8080808080808080, 0x8080808080808080}; ++ __m256i const_0x8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080), ++ static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64, +@@ -1037,8 +1037,8 @@ void ARGBToUV444Row_LASX(const uint8_t* src_argb, + __m256i const_38 = __lasx_xvldi(38); + __m256i const_94 = __lasx_xvldi(94); + __m256i const_18 = __lasx_xvldi(18); +- __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080, +- 0x8080808080808080, 0x8080808080808080}; ++ __m256i const_0x8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080), ++ static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002, + 0x0000000700000003}; + for (x = 0; x < len; x++) { +@@ -1609,8 +1609,8 @@ void ARGB1555ToUVRow_LASX(const uint8_t* src_argb1555, + __m256i const_38 = __lasx_xvldi(0x413); + __m256i const_94 = __lasx_xvldi(0x42F); + __m256i const_18 = __lasx_xvldi(0x409); +- __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, +- 0x8080808080808080, 0x8080808080808080}; ++ __m256i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080), ++ static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lasx_xvld, src_argb1555, 0, src_argb1555, 32, next_argb1555, 0, +@@ -1726,8 +1726,8 @@ void RGB565ToUVRow_LASX(const uint8_t* src_rgb565, + __m256i const_38 = __lasx_xvldi(0x413); + __m256i const_94 = __lasx_xvldi(0x42F); + __m256i const_18 = __lasx_xvldi(0x409); +- __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, +- 0x8080808080808080, 0x8080808080808080}; ++ __m256i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080), ++ static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lasx_xvld, src_rgb565, 0, src_rgb565, 32, next_rgb565, 0, +@@ -1793,8 +1793,8 @@ void RGB24ToUVRow_LASX(const uint8_t* src_rgb24, + __m256i const_38 = __lasx_xvldi(0x413); + __m256i const_94 = __lasx_xvldi(0x42F); + __m256i const_18 = __lasx_xvldi(0x409); +- __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, +- 0x8080808080808080, 0x8080808080808080}; ++ __m256i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080), ++ static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + __m256i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18, + 0x15120F0C09060300, 0x00000000001E1B18}; + __m256i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908, +@@ -1856,8 +1856,8 @@ void RAWToUVRow_LASX(const uint8_t* src_raw, + __m256i const_38 = __lasx_xvldi(0x413); + __m256i const_94 = __lasx_xvldi(0x42F); + __m256i const_18 = __lasx_xvldi(0x409); +- __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, +- 0x8080808080808080, 0x8080808080808080}; ++ __m256i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080), ++ static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + __m256i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18, + 0x15120F0C09060300, 0x00000000001E1B18}; + __m256i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908, +@@ -2000,11 +2000,13 @@ void NV21ToARGBRow_LASX(const uint8_t* src_y, + } + } + ++#ifndef RgbConstants + struct RgbConstants { + uint8_t kRGBToY[4]; + uint16_t kAddY; + uint16_t pad; + }; ++#define RgbConstants RgbConstants + + // RGB to JPeg coefficients + // B * 0.1140 coefficient = 29 +@@ -2030,6 +2032,7 @@ static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, + static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, + 0x1080, + 0}; ++#endif // RgbConstaints + + // ARGB expects first 3 values to contain RGB and 4th value is ignored. + static void ARGBToYMatrixRow_LASX(const uint8_t* src_argb, +@@ -2242,8 +2245,8 @@ void ARGBToUVJRow_LASX(const uint8_t* src_argb, + __m256i const_21 = __lasx_xvldi(0x415); + __m256i const_53 = __lasx_xvldi(0x435); + __m256i const_10 = __lasx_xvldi(0x40A); +- __m256i const_8080 = {0x8080808080808080, 0x8080808080808080, +- 0x8080808080808080, 0x8080808080808080}; ++ __m256i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080), ++ static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + __m256i shuff = {0x1614060412100200, 0x1E1C0E0C1A180A08, 0x1715070513110301, + 0x1F1D0F0D1B190B09}; + +@@ -2296,6 +2299,13 @@ void ARGBToUVJRow_LASX(const uint8_t* src_argb, + } + } + ++// undef for unified sources build ++#undef YUVTORGB_SETUP ++#undef YUVTORGB ++#undef I444TORGB ++#undef STOREARGB ++#undef RGBTOUV ++ + #ifdef __cplusplus + } // extern "C" + } // namespace libyuv +diff --git a/media/libyuv/libyuv/source/row_lsx.cc b/media/libyuv/libyuv/source/row_lsx.cc +index 9c1e16f22e02d..91221ff03ca29 100644 +--- a/media/libyuv/libyuv/source/row_lsx.cc ++++ b/media/libyuv/libyuv/source/row_lsx.cc +@@ -407,7 +407,7 @@ void ARGB1555ToUVRow_LSX(const uint8_t* src_argb1555, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_argb1555, 0, src_argb1555, 16, next_argb1555, 0, +@@ -516,7 +516,7 @@ void RGB565ToUVRow_LSX(const uint8_t* src_rgb565, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_rgb565, 0, src_rgb565, 16, next_rgb565, 0, +@@ -577,7 +577,7 @@ void RGB24ToUVRow_LSX(const uint8_t* src_rgb24, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + __m128i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18}; + __m128i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908}; + __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19}; +@@ -630,7 +630,7 @@ void RAWToUVRow_LSX(const uint8_t* src_raw, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + __m128i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18}; + __m128i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908}; + __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19}; +@@ -865,7 +865,7 @@ void BGRAToUVRow_LSX(const uint8_t* src_bgra, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_bgra, 0, src_bgra, 16, src_bgra, 32, src_bgra, 48, +@@ -913,7 +913,7 @@ void ABGRToUVRow_LSX(const uint8_t* src_abgr, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_abgr, 0, src_abgr, 16, src_abgr, 32, src_abgr, 48, +@@ -961,7 +961,7 @@ void RGBAToUVRow_LSX(const uint8_t* src_rgba, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_rgba, 0, src_rgba, 16, src_rgba, 32, src_rgba, 48, +@@ -1010,7 +1010,7 @@ void ARGBToUVJRow_LSX(const uint8_t* src_argb, + __m128i const_21 = __lsx_vldi(0x415); + __m128i const_53 = __lsx_vldi(0x435); + __m128i const_10 = __lsx_vldi(0x40A); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48, +@@ -1388,7 +1388,7 @@ void ARGBBlendRow_LSX(const uint8_t* src_argb, + __m128i const_256 = __lsx_vldi(0x500); + __m128i zero = __lsx_vldi(0); + __m128i alpha = __lsx_vldi(0xFF); +- __m128i control = {0xFF000000FF000000, 0xFF000000FF000000}; ++ __m128i control = {static_cast(0xFF000000FF000000), static_cast(0xFF000000FF000000)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb1, 0, src_argb1, 16, +@@ -1434,7 +1434,7 @@ void ARGBQuantizeRow_LSX(uint8_t* dst_argb, + __m128i vec_offset = __lsx_vreplgr2vr_b(interval_offset); + __m128i vec_scale = __lsx_vreplgr2vr_w(scale); + __m128i zero = __lsx_vldi(0); +- __m128i control = {0xFF000000FF000000, 0xFF000000FF000000}; ++ __m128i control = {static_cast(0xFF000000FF000000), static_cast(0xFF000000FF000000)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, dst_argb, 0, dst_argb, 16, dst_argb, 32, dst_argb, 48, +@@ -1643,11 +1643,13 @@ void HalfFloatRow_LSX(const uint16_t* src, + } + } + ++#ifndef RgbConstants + struct RgbConstants { + uint8_t kRGBToY[4]; + uint16_t kAddY; + uint16_t pad; + }; ++#define RgbConstants RgbConstants + + // RGB to JPeg coefficients + // B * 0.1140 coefficient = 29 +@@ -1673,6 +1675,7 @@ static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, + static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, + 0x1080, + 0}; ++#endif // RgbConstaints + + // ARGB expects first 3 values to contain RGB and 4th value is ignored. + static void ARGBToYMatrixRow_LSX(const uint8_t* src_argb, +@@ -1853,6 +1856,13 @@ void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width) { + RGBToYMatrixRow_LSX(src_raw, dst_y, width, &kRawI601Constants); + } + ++// undef for unified sources build ++#undef YUVTORGB_SETUP ++#undef YUVTORGB ++#undef I444TORGB ++#undef STOREARGB ++#undef RGBTOUV ++ + #ifdef __cplusplus + } // extern "C" + } // namespace libyuv +-- +2.43.0 + diff --git a/firefox/PKGBUILD b/firefox/PKGBUILD index eec806bf5f..cb2ecff444 100644 --- a/firefox/PKGBUILD +++ b/firefox/PKGBUILD @@ -31,7 +31,6 @@ makedepends=( imake inetutils jack - lld llvm mesa nasm @@ -64,6 +63,11 @@ source=( https://archive.mozilla.org/pub/firefox/releases/$pkgver/source/firefox-$pkgver.source.tar.xz{,.asc} $pkgname.desktop identity-icons-brand.svg + 0001-Add-support-for-LoongArch64.patch + 0002-Enable-VA-API-support-for-AMD-GPUs.patch + 0003-Remove-architectural-limit-on-VA-API-support.patch + 0004-Enable-WebRTC-for-LoongArch.patch + 0005-Fix-libyuv-build-with-LSX-LASX.patch ) validpgpkeys=( # Mozilla Software Releases @@ -73,11 +77,21 @@ validpgpkeys=( sha256sums=('b84815a90e147965e4c0b50599c85b1022ab0fce42105e5ef45c630dcca5dec3' 'SKIP' '1f241fdc619f92a914c75aece7c7c717401d7467c9a306458e106b05f34e5044' - 'a9b8b4a0a1f4a7b4af77d5fc70c2686d624038909263c795ecc81e0aec7711e9') + 'a9b8b4a0a1f4a7b4af77d5fc70c2686d624038909263c795ecc81e0aec7711e9' + '94ccc1e5efe217a8491bea4f3d80c962ccda2ebb3203f67e4d995190d29b0544' + 'ab6eb723cb2b70831cf4e66d6e315e0842f77467812c67d5de2365fc5117c320' + '779cafabc2c738dc26a1f945695802f038af916d6b86ede9493b1cceca7e7428' + '322d0fb02661018d819f5db218b94f8f680b0e5bc6a3648db35de465431590cc' + 'f7b56a3bd993b0a0c05f305c40fabcc4af62c68d43097c0731db9525ab6156bd') b2sums=('7252cd58fef9f5fcb504c8c9f885567109c05e6ec92157459cc384edc6935adb206e3be0b805aeaa37dbd72656c3243db1291b745dd0f705f37a61319a4dc820' 'SKIP' 'd07557840097dd48a60c51cc5111950781e1c6ce255557693bd11306c7a9258b2a82548329762148f117b2295145f9e66e0483a18e2fe09c5afcffed2e4b8628' - '63a8dd9d8910f9efb353bed452d8b4b2a2da435857ccee083fc0c557f8c4c1339ca593b463db320f70387a1b63f1a79e709e9d12c69520993e26d85a3d742e34') + '63a8dd9d8910f9efb353bed452d8b4b2a2da435857ccee083fc0c557f8c4c1339ca593b463db320f70387a1b63f1a79e709e9d12c69520993e26d85a3d742e34' + 'cab0bf0922520866aa9fddf9142512a0ff30437ab779dc4e266b278aea363d4077db5edad11ac30190ee69cc321ec6a7ab3eea8003982faeac991389417af7e9' + '19dc9f0e2aa13be99f7226dbf1e80eee67bbce3ac3ed8256894158565d62324589bc075df402bc43f5d597cc2de60bed6d68b58e20e9caa1f34776f680fe45b5' + 'bb2658edb90dc022df36d89206789ef30222ff1b26376b61b2340d421738fe240063bb7113c9deb828f00e8e297bda8b87e2da9d26796aa7fd4d48bef1aa0719' + '65e9739926174b3eecb8e01cae9805861f6a7c2cadf9faef7cda92acbef3a569a31b2e8c0f4f8ab726416a60e601e7a2a4eb1fc6cb74dfd2cb4ab1572c1fdd97' + 'e2fc795f224f34d14fab2655235a7e31b5fdbe84937ded697f6b1ccd1751bfb3a05ae6b46846201e201487d02a987322f3fc36cdd3208d4904c844caf3f2d628') # Google API keys (see http://www.chromium.org/developers/how-tos/api-keys) # Note: These are for Arch Linux use ONLY. For your own distribution, please @@ -95,6 +109,13 @@ prepare() { mkdir mozbuild cd firefox-$pkgver +# patch -Np1 -i ../firefox-118-loong.patch + patch -Np1 -i ../0001-Add-support-for-LoongArch64.patch + patch -Np1 -i ../0002-Enable-VA-API-support-for-AMD-GPUs.patch + patch -Np1 -i ../0003-Remove-architectural-limit-on-VA-API-support.patch + patch -Np1 -i ../0004-Enable-WebRTC-for-LoongArch.patch + patch -Np1 -i ../0005-Fix-libyuv-build-with-LSX-LASX.patch + echo -n "$_google_api_key" >google-api-key echo -n "$_mozilla_api_key" >mozilla-api-key @@ -107,11 +128,11 @@ ac_add_options --enable-release ac_add_options --enable-hardening ac_add_options --enable-optimize ac_add_options --enable-rust-simd -ac_add_options --enable-linker=lld +#ac_add_options --enable-linker=lld ac_add_options --disable-install-strip ac_add_options --disable-elf-hack ac_add_options --disable-bootstrap -ac_add_options --with-wasi-sysroot=/usr/share/wasi-sysroot +ac_add_options --without-wasm-sandboxed-libraries # Branding ac_add_options --enable-official-branding @@ -134,19 +155,25 @@ ac_add_options --with-system-nss # Features ac_add_options --enable-alsa ac_add_options --enable-jack -ac_add_options --enable-crashreporter +ac_add_options --disable-crashreporter ac_add_options --disable-updater ac_add_options --disable-tests END } build() { + set -x cd firefox-$pkgver export MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE=pip export MOZBUILD_STATE_PATH="$srcdir/mozbuild" export MOZ_BUILD_DATE="$(date -u${SOURCE_DATE_EPOCH:+d @$SOURCE_DATE_EPOCH} +%Y%m%d%H%M%S)" export MOZ_NOSPAM=1 +# clang didn't support -mlsx + CFLAGS=${CFLAGS/-mlsx /} + CXXFLAGS=${CXXFLAGS/-mlsx /} + CFLAGS=${CFLAGS/-fstack-clash-protection/} + CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/} # malloc_usable_size is used in various parts of the codebase CFLAGS="${CFLAGS/_FORTIFY_SOURCE=3/_FORTIFY_SOURCE=2}" @@ -157,34 +184,7 @@ build() { # Do 3-tier PGO echo "Building instrumented browser..." - cat >.mozconfig ../mozconfig - <.mozconfig ../mozconfig - < +Date: Wed, 29 Jul 2020 20:41:19 +0800 +Subject: [PATCH 1/3] Loongson-3 SPI tmp + +Change-Id: I85d05d0f06f0a0deff27a9f8ac74ef91c3923efb +Signed-off-by: Jiaxun Yang +--- + Makefile | 8 ++ + loongson3_spi.c | 288 ++++++++++++++++++++++++++++++++++++++++++++++++ + programmer.h | 5 + + 3 files changed, 301 insertions(+) + create mode 100644 loongson3_spi.c + +diff --git a/Makefile b/Makefile +index 7242b09..4793433 100644 +--- a/Makefile ++++ b/Makefile +@@ -696,6 +696,9 @@ CONFIG_DIGILENT_SPI ?= yes + # Disable J-Link for now. + CONFIG_JLINK_SPI ?= no + ++# Always enable Loongson-3 SPI ++CONFIG_LOONGSON3_SPI ?= yes ++ + # Disable wiki printing by default. It is only useful if you have wiki access. + CONFIG_PRINT_WIKI ?= no + +@@ -1038,6 +1041,11 @@ ifneq ($(NEED_SERIAL), ) + LIB_OBJS += serial.o custom_baud.o + endif + ++ifeq ($(CONFIG_LOONGSON3_SPI), yes) ++FEATURE_CFLAGS += -D'CONFIG_LOONGSON3_SPI=1' ++PROGRAMMER_OBJS += loongson3_spi.o ++endif ++ + ifneq ($(NEED_POSIX_SOCKETS), ) + ifeq ($(TARGET_OS), SunOS) + LIBS += -lsocket -lnsl +diff --git a/loongson3_spi.c b/loongson3_spi.c +new file mode 100644 +index 0000000..be619af +--- /dev/null ++++ b/loongson3_spi.c +@@ -0,0 +1,288 @@ ++/* ++ * This file is part of the flashrom project. ++ * ++ * Copyright (C) 2020 Jiaxun Yang ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; version 2 of the License. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++ ++#if CONFIG_LOONGSON3_SPI == 1 ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "flash.h" ++#include "chipdrivers.h" ++#include "programmer.h" ++#include "spi.h" ++ ++#define LOONGSON64C_SPI_BASE 0x1fe00220 ++#define LOONGSON64G_SPI_BASE 0x1fe001f0 ++#define LOONGSON3_SPI_REG_SIZE 0x10 ++ ++#define SPICTRL_SPCR 0x0 ++#define SPCR_MSTR (1 << 4) ++#define SPCR_SPE (1 << 6) ++ ++#define SPICTRL_SPSR 0x1 ++#define SPSR_RFEMPTY (1 << 0) ++#define SPSR_RFFULL (1 << 1) ++#define SPSR_WFEMPTY (1 << 2) ++#define SPSR_WFFULL (1 << 3) ++#define SPSR_WCOL (1 << 6) ++ ++#define SPICTRL_FIFO 0x2 ++ ++#define SPICTRL_SFCP 0x4 ++#define SFCP_MEMEN (1 << 0) ++ ++#define SPICTRL_SOFTCS 0x5 ++/* Firmware flash is always connected to CS0 */ ++#define SOFTCS_ASSERT ((0 << 4) | (1 << 0)) ++#define SOFTCS_DESSERT ((1 << 4) | (1 << 0)) ++ ++static uint8_t *spictrl_base; ++ ++static int loongson3_spi_shutdown(void *data); ++static int loongson3_spi_send_command(const struct flashctx *flash, unsigned int writecnt, ++ unsigned int readcnt, ++ const uint8_t *writearr, ++ uint8_t *readarr); ++ ++static const struct spi_master spi_master_loongson3 = { ++ .max_data_read = MAX_DATA_READ_UNLIMITED, ++ .max_data_write = MAX_DATA_WRITE_UNLIMITED, ++ .command = loongson3_spi_send_command, ++ .multicommand = default_spi_send_multicommand, ++ .read = default_spi_read, ++ .write_256 = default_spi_write_256, ++ .write_aai = default_spi_write_aai, ++}; ++ ++static int cpu_is_loongson64c(char *cpu) ++{ ++ if (strcmp(cpu, "3b1500") == 0) ++ return 1; ++ ++ if (strcmp(cpu, "3a2000") == 0) ++ return 1; ++ ++ if (strcmp(cpu, "3b2000") == 0) ++ return 1; ++ ++ if (strcmp(cpu, "3a3000") == 0) ++ return 1; ++ ++ if (strcmp(cpu, "3b3000") == 0) ++ return 1; ++ ++ return 0; ++} ++ ++static int cpu_is_loongson64g(char *cpu) ++{ ++ if (strcmp(cpu, "3a4000") == 0) ++ return 1; ++ ++ if (strcmp(cpu, "3b4000") == 0) ++ return 1; ++ ++ return 0; ++} ++ ++int loongson3_spi_init(void) ++{ ++ uint8_t reg; ++ char *cpu; ++ ++ /* Use -cpu parameter as different kernels have different cpuinfo ++ * and it is almost impossible for us to determine all of them. ++ */ ++ cpu = extract_programmer_param("cpu"); ++ ++ if (!cpu) { ++ free(cpu); ++ msg_perr("No -cpu specified\n"); ++ return 1; ++ } ++ ++ if (cpu_is_loongson64c(cpu)) { ++ spictrl_base = rphysmap("Loongson64C SPICTRL", LOONGSON64C_SPI_BASE, ++ LOONGSON3_SPI_REG_SIZE); ++ msg_pwarn("64c\n"); ++ } else if (cpu_is_loongson64g(cpu)) { ++ spictrl_base = rphysmap("Loongson64G SPICTRL", LOONGSON64G_SPI_BASE, ++ LOONGSON3_SPI_REG_SIZE); ++ msg_pwarn("64g\n"); ++ } else { ++ free(cpu); ++ msg_perr("Invalid -cpu specified\n"); ++ return 1; ++ } ++ ++ free(cpu); ++ ++ if (!spictrl_base) { ++ msg_perr("Failed to map base\n"); ++ return 1; ++ } ++ ++ reg = mmio_readb(spictrl_base + SPICTRL_SFCP); ++ if (!(reg & SFCP_MEMEN)) ++ msg_pwarn("Read engine is not enabled, SPI is not system firmware?\n"); ++ ++ if (register_shutdown(loongson3_spi_shutdown, NULL)) ++ return 1; ++ ++ /* Dessert CS */ ++ mmio_writeb(SOFTCS_DESSERT, spictrl_base + SPICTRL_SOFTCS); ++ ++ /* Enable SPI Controller */ ++ reg = mmio_readb(spictrl_base + SPICTRL_SPCR); ++ reg |= SPCR_MSTR | SPCR_SPE; ++ mmio_writeb(reg, spictrl_base + SPICTRL_SPCR); ++ ++ /* Disable read engine for software control */ ++ reg = mmio_readb(spictrl_base + SPICTRL_SFCP); ++ reg &= ~SFCP_MEMEN; ++ mmio_writeb(reg, spictrl_base + SPICTRL_SFCP); ++ ++ ++ /* Sometimes Read FIFO is not empty at boot time */ ++ while (!(mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_RFEMPTY)) ++ mmio_readb(spictrl_base + SPICTRL_FIFO); ++ ++ register_spi_master(&spi_master_loongson3); ++ return 0; ++} ++ ++static int loongson3_spi_shutdown(void *data) ++{ ++ if (!spictrl_base) { ++ uint8_t reg; ++ ++ /* Disable soft CS */ ++ mmio_writeb(0x0, spictrl_base + SPICTRL_SOFTCS); ++ ++ /* Enable read engine again */ ++ reg = mmio_readb(spictrl_base + SPICTRL_SFCP); ++ reg |= SFCP_MEMEN; ++ mmio_writeb(reg, spictrl_base + SPICTRL_SFCP); ++ } ++ ++ return 0; ++} ++ ++static int loongson3_spi_send_command(const struct flashctx *flash, unsigned int writecnt, ++ unsigned int readcnt, ++ const uint8_t *writearr, ++ uint8_t *readarr) ++{ ++ unsigned int i; ++ ++ msg_pwarn("writecnt: %d, readcnt: %d\n", writecnt, readcnt); ++ ++ mmio_writeb(SOFTCS_ASSERT, spictrl_base + SPICTRL_SOFTCS); ++ ++ for (i = 0; i < writecnt; i++) { ++ mmio_writeb(writearr[i], spictrl_base + SPICTRL_FIFO); ++ ++ /* Wait until Read FIFO not empty */ ++ while (mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_RFEMPTY); ++ ++ mmio_readb(spictrl_base + SPICTRL_FIFO); ++ } ++ ++ for (i = 0; i < readcnt; i++) { ++ mmio_writeb(writearr[i], spictrl_base + SPICTRL_FIFO); ++ ++ /* Wait until Read FIFO not empty */ ++ while (mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_RFEMPTY); ++ ++ readarr[i] = mmio_readb(spictrl_base + SPICTRL_FIFO); ++ } ++ ++ mmio_writeb(SOFTCS_DESSERT, spictrl_base + SPICTRL_SOFTCS); ++ ++ return 0; ++} ++ ++#if 0 ++#define FIFO_DETPTH 4 ++ ++static int loongson3_spi_send_command(const struct flashctx *flash, unsigned int writecnt, ++ unsigned int readcnt, ++ const uint8_t *writearr, ++ uint8_t *readarr) ++{ ++ unsigned int i, j, cur_depth; ++ ++ msg_pwarn("writecnt: %d, readcnt: %d\n", writecnt, readcnt); ++ ++ mmio_writeb(SOFTCS_ASSERT, spictrl_base + SPICTRL_SOFTCS); ++ ++ cur_depth = 0; ++ for (i = 0; i < writecnt; i++) { ++ mmio_writeb(writearr[i], spictrl_base + SPICTRL_FIFO); ++ ++ if ((mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_WFFULL) || ++ (writecnt - i == 1)) { ++ ++ msg_pwarn("CMD W FULL %d\n", cur_depth); ++ /* Wait until WF empty */ ++// while (!(mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_WFEMPTY)); ++ /* Wait until RF is not empty */ ++ ++ for (j = 0; j < cur_depth + 1; j++) { ++ while (mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_RFEMPTY); ++ ++ mmio_readb(spictrl_base + SPICTRL_FIFO); ++ } ++ cur_depth = 0; ++ } ++ cur_depth++; ++ } ++ ++ cur_depth = 0; ++ for (i = 0; i < readcnt; i++) { ++ mmio_writeb(0x0, spictrl_base + SPICTRL_FIFO); ++ ++ if ((mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_WFFULL) || ++ (readcnt - i == 1)) { ++ msg_pwarn("CMD R FULL %d\n", cur_depth); ++ /* Wait until WF empty */ ++// while (!(mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_WFEMPTY)); ++ /* Wait until RF is not empty */ ++ ++ for (j = 0; j < cur_depth + 1; j++) { ++ while (mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_RFEMPTY); ++ ++ readarr[i - cur_depth + j] = mmio_readb(spictrl_base + SPICTRL_FIFO); ++ } ++ cur_depth = 0; ++ } ++ cur_depth++; ++ } ++ ++ mmio_writeb(SOFTCS_DESSERT, spictrl_base + SPICTRL_SOFTCS); ++ ++ return 0; ++} ++#endif ++ ++#endif // CONFIG_LOONGSON3_SPI == 1 +diff --git a/programmer.h b/programmer.h +index 3cf53b9..ccdf44c 100644 +--- a/programmer.h ++++ b/programmer.h +@@ -573,6 +573,11 @@ int jlink_spi_init(void); + int ni845x_spi_init(void); + #endif + ++/* loongson3_spi.c */ ++#if CONFIG_LOONGSON3_SPI == 1 ++int loongson3_spi_init(void); ++#endif ++ + /* flashrom.c */ + struct decode_sizes { + uint32_t parallel; +-- +2.35.1 + diff --git a/flashrom/0002-Speed-up.patch b/flashrom/0002-Speed-up.patch new file mode 100644 index 0000000000..ce6929d2b0 --- /dev/null +++ b/flashrom/0002-Speed-up.patch @@ -0,0 +1,37 @@ +From 34ab39614b4dd46c5c97dcc6222b206450964a1d Mon Sep 17 00:00:00 2001 +From: Jiaxun Yang +Date: Thu, 30 Jul 2020 20:55:01 +0800 +Subject: [PATCH 2/3] Speed up???? + +Change-Id: I3294d55eab38632b1bfcaeed9008c2eedce19e45 +Signed-off-by: Jiaxun Yang +--- + loongson3_spi.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/loongson3_spi.c b/loongson3_spi.c +index be619af..54fc814 100644 +--- a/loongson3_spi.c ++++ b/loongson3_spi.c +@@ -155,7 +155,9 @@ int loongson3_spi_init(void) + /* Enable SPI Controller */ + reg = mmio_readb(spictrl_base + SPICTRL_SPCR); + reg |= SPCR_MSTR | SPCR_SPE; ++ reg &= ~0x2; + mmio_writeb(reg, spictrl_base + SPICTRL_SPCR); ++ mmio_writeb(0x0, spictrl_base + 0x3); + + /* Disable read engine for software control */ + reg = mmio_readb(spictrl_base + SPICTRL_SFCP); +@@ -209,7 +211,7 @@ static int loongson3_spi_send_command(const struct flashctx *flash, unsigned int + } + + for (i = 0; i < readcnt; i++) { +- mmio_writeb(writearr[i], spictrl_base + SPICTRL_FIFO); ++ mmio_writeb(0, spictrl_base + SPICTRL_FIFO); + + /* Wait until Read FIFO not empty */ + while (mmio_readb(spictrl_base + SPICTRL_SPSR) & SPSR_RFEMPTY); +-- +2.35.1 + diff --git a/flashrom/0003-Add-support-for-loongarch64.patch b/flashrom/0003-Add-support-for-loongarch64.patch new file mode 100644 index 0000000000..2a46bcd97f --- /dev/null +++ b/flashrom/0003-Add-support-for-loongarch64.patch @@ -0,0 +1,220 @@ +From 43c4fa9cd8cc408f4df4d8ce02c817f70be5ae97 Mon Sep 17 00:00:00 2001 +From: Xiaotian Wu +Date: Wed, 11 May 2022 21:32:59 +0800 +Subject: [PATCH 3/3] Add support for loongarch64 + +Change-Id: I58f40f69487d515fd4c95ec5dd57fd370c9e55ec +Signed-off-by: Xiaotian Wu +--- + Makefile | 14 +++++++------- + flashrom.c | 12 ++++++++++++ + hwaccess.h | 4 ++++ + loongson3_spi.c | 15 +++++++++++---- + meson.build | 6 ++++++ + meson_options.txt | 1 + + platform.h | 5 ++++- + programmer.h | 3 +++ + 8 files changed, 48 insertions(+), 12 deletions(-) + +diff --git a/Makefile b/Makefile +index 4793433..ef611be 100644 +--- a/Makefile ++++ b/Makefile +@@ -106,7 +106,7 @@ endif + # IMPORTANT: The following line must be placed before TARGET_OS is ever used + # (of course), but should come after any lines setting CC because the line + # below uses CC itself. +-override TARGET_OS := $(strip $(call debug_shell,$(CC) $(CPPFLAGS) -E os.h 2>/dev/null | grep -v '^\#' | grep '"' | cut -f 2 -d'"')) ++override TARGET_OS := $(strip $(call debug_shell,$(CC) $(CPPFLAGS) -E os.h 2>/dev/null | grep -v '^\#' | tail -n 1 | grep '"' | cut -f 2 -d'"')) + + ifeq ($(TARGET_OS), Darwin) + override CPPFLAGS += -I/opt/local/include -I/usr/local/include +@@ -421,7 +421,7 @@ endif + # IMPORTANT: The following line must be placed before ARCH is ever used + # (of course), but should come after any lines setting CC because the line + # below uses CC itself. +-override ARCH := $(strip $(call debug_shell,$(CC) $(CPPFLAGS) -E archtest.c 2>/dev/null | grep -v '^\#' | grep '"' | cut -f 2 -d'"')) ++override ARCH := $(strip $(call debug_shell,$(CC) $(CPPFLAGS) -E archtest.c 2>/dev/null | grep -v '^\#' | tail -n 1 | grep '"' | cut -f 2 -d'"')) + override ENDIAN := $(strip $(call debug_shell,$(CC) $(CPPFLAGS) -E endiantest.c 2>/dev/null | grep -v '^\#')) + + # Disable the internal programmer on unsupported architectures (everything but x86 and mipsel) +@@ -476,7 +476,7 @@ endif + # Disable all drivers needing raw access (memory, PCI, port I/O) on + # architectures with unknown raw access properties. + # Right now those architectures are alpha hppa m68k sh s390 +-ifneq ($(ARCH),$(filter $(ARCH),x86 mips ppc arm sparc arc)) ++ifneq ($(ARCH),$(filter $(ARCH),x86 mips ppc arm sparc arc loongarch)) + ifeq ($(CONFIG_RAYER_SPI), yes) + UNSUPPORTED_FEATURES += CONFIG_RAYER_SPI=yes + else +@@ -1037,15 +1037,15 @@ LIBS += -lni845x + PROGRAMMER_OBJS += ni845x_spi.o + endif + +-ifneq ($(NEED_SERIAL), ) +-LIB_OBJS += serial.o custom_baud.o +-endif +- + ifeq ($(CONFIG_LOONGSON3_SPI), yes) + FEATURE_CFLAGS += -D'CONFIG_LOONGSON3_SPI=1' + PROGRAMMER_OBJS += loongson3_spi.o + endif + ++ifneq ($(NEED_SERIAL), ) ++LIB_OBJS += serial.o custom_baud.o ++endif ++ + ifneq ($(NEED_POSIX_SOCKETS), ) + ifeq ($(TARGET_OS), SunOS) + LIBS += -lsocket -lnsl +diff --git a/flashrom.c b/flashrom.c +index e540027..115a394 100644 +--- a/flashrom.c ++++ b/flashrom.c +@@ -473,6 +473,18 @@ const struct programmer_entry programmer_table[] = { + }, + #endif + ++#if CONFIG_LOONGSON3_SPI == 1 ++ { ++ .name = "loongson3_spi", ++ .type = OTHER, ++ .devs.note = "Loongson-3 SPI BIOS Flash\n", ++ .init = loongson3_spi_init, ++ .map_flash_region = fallback_map, ++ .unmap_flash_region = fallback_unmap, ++ .delay = internal_delay, ++ }, ++#endif ++ + {0}, /* This entry corresponds to PROGRAMMER_INVALID. */ + }; + +diff --git a/hwaccess.h b/hwaccess.h +index 5602c15..e3104e8 100644 +--- a/hwaccess.h ++++ b/hwaccess.h +@@ -279,6 +279,10 @@ int libpayload_wrmsr(int addr, msr_t msr); + + /* PCI port I/O is not yet implemented on PowerPC. */ + ++#elif IS_LOONGARCH ++ ++/* PCI port I/O is not yet implemented on LoongArch. */ ++ + #elif IS_MIPS + + /* PCI port I/O is not yet implemented on MIPS. */ +diff --git a/loongson3_spi.c b/loongson3_spi.c +index 54fc814..f29fc4a 100644 +--- a/loongson3_spi.c ++++ b/loongson3_spi.c +@@ -59,11 +59,18 @@ + static uint8_t *spictrl_base; + + static int loongson3_spi_shutdown(void *data); +-static int loongson3_spi_send_command(const struct flashctx *flash, unsigned int writecnt, ++static int loongson3_spi_send_command(struct flashctx *flash, unsigned int writecnt, + unsigned int readcnt, +- const uint8_t *writearr, ++ const unsigned char *writearr, + uint8_t *readarr); + ++ ++//loongson3_spi.c:70:27: error: initialization of ' ++//int (*)(struct flashrom_flashctx *, unsigned int, unsigned int, const unsigned char *, unsigned char *)' from incompatible pointer type ' ++//int (*)(const struct flashrom_flashctx *, unsigned int, unsigned int, const uint8_t *, uint8_t *)' {aka ' ++//int (*)(const struct flashrom_flashctx *, unsigned int, unsigned int, const unsigned char *, unsigned char *)'} [-Werror=incompatible-pointer-types] ++// ++ + static const struct spi_master spi_master_loongson3 = { + .max_data_read = MAX_DATA_READ_UNLIMITED, + .max_data_write = MAX_DATA_WRITE_UNLIMITED, +@@ -190,9 +197,9 @@ static int loongson3_spi_shutdown(void *data) + return 0; + } + +-static int loongson3_spi_send_command(const struct flashctx *flash, unsigned int writecnt, ++static int loongson3_spi_send_command(struct flashctx *flash, unsigned int writecnt, + unsigned int readcnt, +- const uint8_t *writearr, ++ const unsigned char *writearr, + uint8_t *readarr) + { + unsigned int i; +diff --git a/meson.build b/meson.build +index 375089c..8e58d36 100644 +--- a/meson.build ++++ b/meson.build +@@ -46,6 +46,7 @@ config_internal = get_option('config_internal') + config_it8212 = get_option('config_it8212') + config_linux_mtd = get_option('config_linux_mtd') + config_linux_spi = get_option('config_linux_spi') ++config_loongson3_spi = get_option('config_loongson3_spi') + config_mstarddc_spi = get_option('config_mstarddc_spi') + config_nic3com = get_option('config_nic3com') + config_nicintel_eeprom = get_option('config_nicintel_eeprom') +@@ -205,6 +206,11 @@ if config_linux_spi + srcs += 'linux_spi.c' + cargs += '-DCONFIG_LINUX_SPI=1' + endif ++if config_loongson3_spi ++ srcs += 'loongson3_spi.c' ++ cargs += '-DCONFIG_LOONGSON3_SPI=1' ++ need_raw_access = true ++endif + if config_mstarddc_spi + srcs += 'mstarddc_spi.c' + cargs += '-DCONFIG_MSTARDDC_SPI=1' +diff --git a/meson_options.txt b/meson_options.txt +index ea87311..d71ac5f 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -18,6 +18,7 @@ option('config_internal_dmi', type : 'boolean', value : true, description : 'Use + option('config_it8212', type : 'boolean', value : true, description : 'ITE IT8212F PATA') + option('config_linux_mtd', type : 'boolean', value : true, description : 'Linux MTD interfaces') + option('config_linux_spi', type : 'boolean', value : true, description : 'Linux spidev interfaces') ++option('config_loongson3_spi', type : 'boolean', value : true, description : 'Loongson3 interfaces') + option('config_mstarddc_spi', type : 'boolean', value : false, description : 'MSTAR DDC support') + option('config_nic3com', type : 'boolean', value : true, description : '3Com NICs') + option('config_nicintel_eeprom', type : 'boolean', value : true, description : 'EEPROM on Intel NICs') +diff --git a/platform.h b/platform.h +index 751957c..9b2f91f 100644 +--- a/platform.h ++++ b/platform.h +@@ -62,6 +62,9 @@ + #elif defined (__hppa__) || defined (__hppa) + #define __FLASHROM_ARCH__ "hppa" + #define IS_HPPA 1 ++#elif defined (__loongarch__) ++ #define __FLASHROM_ARCH__ "loongarch" ++ #define IS_LOONGARCH 1 + #elif defined (__m68k__) + #define __FLASHROM_ARCH__ "m68k" + #define IS_M68K 1 +@@ -79,7 +82,7 @@ + #define IS_ARC 1 + #endif + +-#if !(IS_X86 || IS_MIPS || IS_PPC || IS_ARM || IS_SPARC || IS_ALPHA || IS_HPPA || IS_M68K || IS_RISCV || IS_SH || IS_S390 || IS_ARC) ++#if !(IS_X86 || IS_MIPS || IS_PPC || IS_ARM || IS_SPARC || IS_ALPHA || IS_HPPA || IS_LOONGARCH || IS_M68K || IS_RISCV || IS_SH || IS_S390 || IS_ARC) + #error Unknown architecture + #endif + +diff --git a/programmer.h b/programmer.h +index ccdf44c..2d8f631 100644 +--- a/programmer.h ++++ b/programmer.h +@@ -126,6 +126,9 @@ enum programmer { + #endif + #if CONFIG_STLINKV3_SPI == 1 + PROGRAMMER_STLINKV3_SPI, ++#endif ++#if CONFIG_LOONGSON3_SPI == 1 ++ PROGRAMMER_LOONGSON3_SPI, + #endif + PROGRAMMER_INVALID /* This must always be the last entry. */ + }; +-- +2.35.1 + diff --git a/flashrom/PKGBUILD b/flashrom/PKGBUILD index c6112c1b09..9710b39b34 100644 --- a/flashrom/PKGBUILD +++ b/flashrom/PKGBUILD @@ -14,14 +14,33 @@ license=(GPL) depends=(libftdi pciutils libusb) makedepends=(meson) optdepends=('dmidecode: for SMBIOS/DMI table decoder support') -source=("https://download.flashrom.org/releases/${pkgname}-v${pkgver}.tar.bz2"{,.asc}) +source=("https://download.flashrom.org/releases/${pkgname}-v${pkgver}.tar.bz2"{,.asc} +0001-Loongson-3-SPI-tmp.patch +0002-Speed-up.patch +0003-Add-support-for-loongarch64.patch +) + sha256sums=('e1f8d95881f5a4365dfe58776ce821dfcee0f138f75d0f44f8a3cd032d9ea42b' 'SKIP') validpgpkeys=(58A4868B25C7CFD662FB0132A3EB95B8D9780F68) # David Hendricks (packaging key) + +prepare() { + cd ${pkgname}-v${pkgver} + patch -p1 -i "$srcdir/0001-Loongson-3-SPI-tmp.patch" + patch -p1 -i "$srcdir/0002-Speed-up.patch" + patch -p1 -i "$srcdir/0003-Add-support-for-loongarch64.patch" +} + build() { cd ${pkgname}-v${pkgver} - arch-meson ../build + arch-meson ../build \ + -Dconfig_loongson3_spi=true \ + -Dconfig_nic3com=false \ + -Dconfig_satamv=false \ + -Dconfig_satasii=false \ + -Dconfig_nicrealtek=false \ + -Dconfig_rayer_spi=false ninja -v -C ../build # Workaround for meson not installing manpage make @@ -34,3 +53,8 @@ package() { make PREFIX=temp/ install cp -r temp/share "${pkgdir}"/usr/ } +sha256sums=('e1f8d95881f5a4365dfe58776ce821dfcee0f138f75d0f44f8a3cd032d9ea42b' + 'SKIP' + 'daa1baf186747b34d6e36c3497295b9c29978632d8b958e3a2d08fd11fd035b5' + 'e8c2801791ca22f391a6820e83bd1bedf2be3aeb97057b12d3b718d467c4f115' + '5e1700d8d90353b721f39f4dc030131277083e3e15b9d0265bbc6473604b228f') diff --git a/flatpak/PKGBUILD b/flatpak/PKGBUILD index bde0a6e075..14bc8afad7 100644 --- a/flatpak/PKGBUILD +++ b/flatpak/PKGBUILD @@ -47,7 +47,7 @@ makedepends=( wayland-protocols xmlto ) -checkdepends=( +makedepends+=( socat valgrind ) diff --git a/fluidd/PKGBUILD b/fluidd/PKGBUILD index e79ae50fa8..962506821f 100644 --- a/fluidd/PKGBUILD +++ b/fluidd/PKGBUILD @@ -26,6 +26,7 @@ pkgver() { build() { cd fluidd +# npm config set registry https://registry.loongnix.cn:5873/ npm install --frozen-lockfile npm run build } diff --git a/fmt/PKGBUILD b/fmt/PKGBUILD index 9b6953d853..91c74ac17b 100644 --- a/fmt/PKGBUILD +++ b/fmt/PKGBUILD @@ -51,7 +51,7 @@ build() { -DCMAKE_INSTALL_LIBDIR=/usr/lib \ -DBUILD_SHARED_LIBS=ON cmake --build build - cmake --build build --target doc +#cmake --build build --target doc } check() { diff --git a/foomatic-db-engine/PKGBUILD b/foomatic-db-engine/PKGBUILD index 0f7a74f2c6..da6f43bf21 100644 --- a/foomatic-db-engine/PKGBUILD +++ b/foomatic-db-engine/PKGBUILD @@ -2,7 +2,7 @@ pkgname=foomatic-db-engine arch=('loong64' 'x86_64') -pkgver=20220521 +pkgver=20220929 pkgrel=1 epoch=4 url="http://www.linuxprinting.org/foomatic.html" diff --git a/foomatic-db/PKGBUILD b/foomatic-db/PKGBUILD index 10560be89e..e43d2fcc42 100644 --- a/foomatic-db/PKGBUILD +++ b/foomatic-db/PKGBUILD @@ -3,7 +3,7 @@ pkgbase="foomatic-db" pkgname=('foomatic-db' 'foomatic-db-ppds' 'foomatic-db-nonfree' 'foomatic-db-nonfree-ppds') arch=('any') -pkgver=20230903 +pkgver=20230910 pkgrel=1 epoch=3 diff --git a/foot/PKGBUILD b/foot/PKGBUILD index bf5e93b125..8499c6c56e 100644 --- a/foot/PKGBUILD +++ b/foot/PKGBUILD @@ -27,7 +27,7 @@ makedepends=(fcft optdepends=('libutempter: utmp logging') backup=(etc/xdg/foot/foot.ini) source=("$pkgname-$pkgver.tar.gz::$url/archive/$pkgver.tar.gz") -sha256sums=('0e02af376e5f4a96eeb90470b7ad2e79a1d660db2a7d1aa772be43c7db00e475') +sha256sums=('8060ec28cbf6e2e3d408665330da4bc48fd094d4f1265d7c58dc75c767463c29') build() { cd "$pkgbase" diff --git a/fossil/PKGBUILD b/fossil/PKGBUILD index 0edbdf890d..bbfe331262 100644 --- a/fossil/PKGBUILD +++ b/fossil/PKGBUILD @@ -24,6 +24,10 @@ sha256sums=('f885e17998dc1eece1688a75e516663462fe72a7f4f132def4132055777c7ff8' build() { cd fossil-src-$pkgver + if [ -f autosetup/autosetup-config.guess ]; then + cp /usr/share/automake-1.16/config.guess autosetup/autosetup-config.guess + cp /usr/share/automake-1.16/config.sub autosetup/autosetup-config.sub + fi ./configure --prefix=/usr --json --disable-internal-sqlite --with-tcl=/usr --with-tcl-private-stubs=1 # headers and translate targets are problematic with parallel jobs #make -j1 bld bld/headers diff --git a/fuse2/PKGBUILD b/fuse2/PKGBUILD index 130d7ff4ee..f7e8c64fbf 100644 --- a/fuse2/PKGBUILD +++ b/fuse2/PKGBUILD @@ -15,11 +15,21 @@ conflicts=('fuse') depends=('glibc' 'fuse-common') makedepends=('pkg-config') options=(!emptydirs) -source=(https://github.com/libfuse/libfuse/releases/download/fuse-$pkgver/fuse-$pkgver.tar.gz{,.asc}) +source=(https://github.com/libfuse/libfuse/releases/download/fuse-$pkgver/fuse-$pkgver.tar.gz{,.asc} + fuse-closefrom.patch + fuse-loongarch.patch) sha1sums=('943ba651b14bc4a3c6fd959ed4b8c04f4a59032d' - 'SKIP') + 'SKIP' + 'aef8e65806d4b73672463378b5cdb694dcd1a051' + 'b7791ec80f9d8b21f327cd506adb512868174893') validpgpkeys=(ED31791B2C5C1613AF388B8AD113FCAC3C4E599F) # Nikolaus Rath +prepare() { + cd fuse-$pkgver + patch -p1 -i "$srcdir/fuse-closefrom.patch" + patch -p1 -i "$srcdir/fuse-loongarch.patch" +} + build() { cd fuse-$pkgver diff --git a/fuse2/fuse-closefrom.patch b/fuse2/fuse-closefrom.patch new file mode 100644 index 0000000000..184dcb42de --- /dev/null +++ b/fuse2/fuse-closefrom.patch @@ -0,0 +1,22 @@ +Index: fuse-2.9.9/util/ulockmgr_server.c +=================================================================== +--- fuse-2.9.9.orig/util/ulockmgr_server.c ++++ fuse-2.9.9/util/ulockmgr_server.c +@@ -124,7 +124,7 @@ static int receive_message(int sock, voi + return res; + } + +-static int closefrom(int minfd) ++static int closefromfd(int minfd) + { + DIR *dir = opendir("/proc/self/fd"); + if (dir) { +@@ -384,7 +384,7 @@ int main(int argc, char *argv[]) + dup2(nullfd, 1); + } + close(3); +- closefrom(5); ++ closefromfd(5); + while (1) { + char c; + int sock; diff --git a/fuse2/fuse-loongarch.patch b/fuse2/fuse-loongarch.patch new file mode 100644 index 0000000000..94a05cb1d8 --- /dev/null +++ b/fuse2/fuse-loongarch.patch @@ -0,0 +1,28 @@ +Index: fuse-2.9.9/lib/fuse_loop_mt.c +=================================================================== +--- fuse-2.9.9.orig/lib/fuse_loop_mt.c ++++ fuse-2.9.9/lib/fuse_loop_mt.c +@@ -6,11 +6,6 @@ + See the file COPYING.LIB. + */ + +-#include "fuse_lowlevel.h" +-#include "fuse_misc.h" +-#include "fuse_kernel.h" +-#include "fuse_i.h" +- + #include + #include + #include +@@ -20,6 +15,11 @@ + #include + #include + ++#include "fuse_lowlevel.h" ++#include "fuse_misc.h" ++#include "fuse_kernel.h" ++#include "fuse_i.h" ++ + /* Environment var controlling the thread stack size */ + #define ENVNAME_THREAD_STACK "FUSE_THREAD_STACK" + diff --git a/fwupd-efi/0001-add-support-for-loongarch64.patch b/fwupd-efi/0001-add-support-for-loongarch64.patch new file mode 100644 index 0000000000..e66a3e4028 --- /dev/null +++ b/fwupd-efi/0001-add-support-for-loongarch64.patch @@ -0,0 +1,315 @@ +diff --git a/efi/crt0/crt0-efi-loongarch64.S b/efi/crt0/crt0-efi-loongarch64.S +new file mode 100644 +index 0000000..416a316 +--- /dev/null ++++ b/efi/crt0/crt0-efi-loongarch64.S +@@ -0,0 +1,172 @@ ++/* ++ * crt0-efi-loongarch64.S - PE/COFF header for LoongArch64 EFI applications ++ * ++ * Copyright (C) 2014 Linaro Ltd. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice and this list of conditions, without modification. ++ * 2. The name of the author may not be used to endorse or promote products ++ * derived from this software without specific prior written permission. ++ * ++ * Alternatively, this software may be distributed under the terms of the ++ * GNU General Public License as published by the Free Software Foundation; ++ * either version 2 of the License, or (at your option) any later version. ++ */ ++ ++ .section .text.head ++ ++ /* ++ * Magic "MZ" signature for PE/COFF ++ */ ++ .globl ImageBase ++ImageBase: ++ .ascii "MZ" ++ .skip 58 // 'MZ' + pad + offset == 64 ++ .long pe_header - ImageBase // Offset to the PE header. ++pe_header: ++ .ascii "PE" ++ .short 0 ++coff_header: ++ .short 0x6264 // LoongArch64 ++ .short 4 // nr_sections ++ .long 0 // TimeDateStamp ++ .long 0 // PointerToSymbolTable ++ .long 1 // NumberOfSymbols ++ .short section_table - optional_header // SizeOfOptionalHeader ++ .short 0x206 // Characteristics. ++ // IMAGE_FILE_DEBUG_STRIPPED | ++ // IMAGE_FILE_EXECUTABLE_IMAGE | ++ // IMAGE_FILE_LINE_NUMS_STRIPPED ++optional_header: ++ .short 0x20b // PE32+ format ++ .byte 0x02 // MajorLinkerVersion ++ .byte 0x14 // MinorLinkerVersion ++ .long _text_size // SizeOfCode ++ .long _alldata_size // SizeOfInitializedData ++ .long 0 // SizeOfUninitializedData ++ .long _start - ImageBase // AddressOfEntryPoint ++ .long _start - ImageBase // BaseOfCode ++ ++extra_header_fields: ++ .quad 0 // ImageBase ++ .long 0x1000 // SectionAlignment ++ .long 0x200 // FileAlignment ++ .short 0 // MajorOperatingSystemVersion ++ .short 0 // MinorOperatingSystemVersion ++ .short 0 // MajorImageVersion ++ .short 0 // MinorImageVersion ++ .short 0 // MajorSubsystemVersion ++ .short 0 // MinorSubsystemVersion ++ .long 0 // Win32VersionValue ++ ++ .long _erodata - ImageBase // SizeOfImage ++ ++ // Everything before the kernel image is considered part of the header ++ .long _start - ImageBase // SizeOfHeaders ++ .long 0 // CheckSum ++ .short EFI_SUBSYSTEM // Subsystem ++ .short 0 // DllCharacteristics ++ .quad 0 // SizeOfStackReserve ++ .quad 0 // SizeOfStackCommit ++ .quad 0 // SizeOfHeapReserve ++ .quad 0 // SizeOfHeapCommit ++ .long 0 // LoaderFlags ++ .long 0x6 // NumberOfRvaAndSizes ++ ++ .quad 0 // ExportTable ++ .quad 0 // ImportTable ++ .quad 0 // ResourceTable ++ .quad 0 // ExceptionTable ++ .quad 0 // CertificationTable ++ .quad 0 // BaseRelocationTable ++ ++ // Section table ++section_table: ++ .ascii ".text\0\0\0" ++ .long _evtext - _start // VirtualSize ++ .long _start - ImageBase // VirtualAddress ++ .long _etext - _start // SizeOfRawData ++ .long _start - ImageBase // PointerToRawData ++ ++ .long 0 // PointerToRelocations (0 for executables) ++ .long 0 // PointerToLineNumbers (0 for executables) ++ .short 0 // NumberOfRelocations (0 for executables) ++ .short 0 // NumberOfLineNumbers (0 for executables) ++ /* ++ * EFI_IMAGE_SCN_MEM_READ | EFI_IMAGE_SCN_MEM_EXECUTE | EFI_IMAGE_SCN_CNT_CODE ++ */ ++ .long 0x60000020 // Characteristics (section flags) ++ ++ .ascii ".data\0\0\0" ++ .long _data_vsize // VirtualSize ++ .long _data - ImageBase // VirtualAddress ++ .long _data_size // SizeOfRawData ++ .long _data - ImageBase // PointerToRawData ++ ++ .long 0 // PointerToRelocations (0 for executables) ++ .long 0 // PointerToLineNumbers (0 for executables) ++ .short 0 // NumberOfRelocations (0 for executables) ++ .short 0 // NumberOfLineNumbers (0 for executables) ++ /* ++ * EFI_IMAGE_SCN_MEM_WRITE | EFI_IMAGE_SCN_MEM_READ | EFI_IMAGE_SCN_CNT_INITIALIZED_DATA ++ */ ++ .long 0xc0000040 // Characteristics (section flags) ++ ++ .ascii ".sbat\0\0\0" ++ .long _sbat_vsize // VirtualSize ++ .long _sbat - ImageBase // VirtualAddress ++ .long _sbat_size // SizeOfRawData ++ .long _sbat - ImageBase // PointerToRawData ++ ++ .long 0 // PointerToRelocations (0 for executables) ++ .long 0 // PointerToLineNumbers (0 for executables) ++ .short 0 // NumberOfRelocations (0 for executables) ++ .short 0 // NumberOfLineNumbers (0 for executables) ++ /* ++ * EFI_IMAGE_SCN_MEM_READ | EFI_IMAGE_SCN_ALIGN_8BYTES | EFI_IMAGE_SCN_CNT_INITIALIZED_DATA ++ */ ++ .long 0x40400040 // Characteristics (section flags) ++ ++ .ascii ".rodata\0" ++ .long _rodata_vsize // VirtualSize ++ .long _rodata - ImageBase // VirtualAddress ++ .long _rodata_size // SizeOfRawData ++ .long _rodata - ImageBase // PointerToRawData ++ ++ .long 0 // PointerToRelocations (0 for executables) ++ .long 0 // PointerToLineNumbers (0 for executables) ++ .short 0 // NumberOfRelocations (0 for executables) ++ .short 0 // NumberOfLineNumbers (0 for executables) ++ /* ++ * EFI_IMAGE_SCN_MEM_READ | EFI_IMAGE_SCN_ALIGN_8BYTES | EFI_IMAGE_SCN_CNT_INITIALIZED_DATA ++ */ ++ .long 0x40400040 // Characteristics (section flags) ++ ++ .align 12 ++ ++ .globl _start ++ .type _start, @function ++_start: ++ addi.d $sp, $sp, -24 ++ st.d $ra, $sp, 0 ++ st.d $a0, $sp, 8 ++ st.d $a1, $sp, 16 ++ ++ move $a2, $a0 // a2: ImageHandle ++ move $a3, $a1 // a3: SystemTable ++ la.local $a0, ImageBase // a0: ImageBase ++ la.local $a1, _DYNAMIC // a1: DynamicSection ++ bl _relocate ++ bnez $a0, 0f ++ ++ ld.d $a0, $sp, 8 ++ ld.d $a1, $sp, 16 ++ bl efi_main ++ ++0: ld.d $ra, $sp, 0 ++ addi.d $sp, $sp, 24 ++ jr $ra ++ .end _start +diff --git a/efi/lds/elf_loongarch64_efi.lds b/efi/lds/elf_loongarch64_efi.lds +new file mode 100644 +index 0000000..899e352 +--- /dev/null ++++ b/efi/lds/elf_loongarch64_efi.lds +@@ -0,0 +1,103 @@ ++OUTPUT_FORMAT("elf64-loongarch", "elf64-loongarch", "elf64-loongarch") ++OUTPUT_ARCH(loongarch) ++ENTRY(_start) ++SECTIONS ++{ ++ .text 0x0 : { ++ _text = .; ++ *(.text.head) ++ *(.text) ++ *(.text.*) ++ *(.gnu.linkonce.t.*) ++ _evtext = .; ++ . = ALIGN(4096); ++ } ++ _etext = .; ++ _text_size = . - _text; ++ _text_vsize = _evtext - _text; ++ ++ . = ALIGN(4096); ++ .data : ++ { ++ _data = .; ++ *(.sdata) ++ *(.data) ++ *(.data1) ++ *(.data.*) ++ *(.got.plt) ++ *(.got) ++ ++ *(.dynamic) ++ ++ /* the EFI loader doesn't seem to like a .bss section, so we stick ++ it all into .data: */ ++ . = ALIGN(16); ++ _bss = .; ++ *(.sbss) ++ *(.scommon) ++ *(.dynbss) ++ *(.bss) ++ *(COMMON) ++ _evdata = .; ++ . = ALIGN(4096); ++ _bss_end = .; ++ } ++ _edata = .; ++ _data_vsize = _evdata - _data; ++ _data_size = . - _data; ++ ++ /* ++ * Note that _sbat must be the beginning of the data, and _esbat must be the ++ * end and must be before any section padding. The sbat self-check uses ++ * _esbat to find the bounds of the data, and if the padding is included, the ++ * CSV parser (correctly) rejects the data as having NUL values in one of the ++ * required columns. ++ */ ++ . = ALIGN(4096); ++ .sbat : ++ { ++ _sbat = .; ++ *(.sbat) ++ *(.sbat.*) ++ _esbat = .; ++ . = ALIGN(4096); ++ _epsbat = .; ++ } ++ _sbat_size = _epsbat - _sbat; ++ _sbat_vsize = _esbat - _sbat; ++ ++ . = ALIGN(4096); ++ .rodata : ++ { ++ _rodata = .; ++ *(.rela.dyn) ++ *(.rela.plt) ++ *(.rela.got) ++ *(.rela.data) ++ *(.rela.data*) ++ ++ *(.rodata*) ++ *(.srodata) ++ *(.dynsym) ++ *(.dynstr) ++ . = ALIGN(16); ++ *(.note.gnu.build-id) ++ . = ALIGN(4096); ++ *(.vendor_cert) ++ *(.data.ident) ++ _evrodata = .; ++ . = ALIGN(4096); ++ } ++ _erodata = .; ++ _rodata_size = . - _rodata; ++ _rodata_vsize = _evrodata - _rodata; ++ _alldata_size = . - _data; ++ ++ /DISCARD/ : ++ { ++ *(.rel.reloc) ++ *(.eh_frame) ++ *(.note.GNU-stack) ++ } ++ .comment 0 : { *(.comment) } ++} +diff --git a/efi/meson.build b/efi/meson.build +index 2bba4f5..e65b0b1 100644 +--- a/efi/meson.build ++++ b/efi/meson.build +@@ -77,6 +77,9 @@ endif + if host_cpu == 'arm' or (host_cpu == 'aarch64' and (objcopy_version.version_compare ('< 2.38') or coff_header_in_crt0)) + objcopy_manualsymbols = true + generate_binary_extra = ['--objcopy-manualsymbols'] ++elif host_cpu == 'loongarch64' and (objcopy_version.version_compare ('< 2.41') or coff_header_in_crt0) ++ objcopy_manualsymbols = true ++ generate_binary_extra = ['--objcopy-manualsymbols'] + else + objcopy_manualsymbols = false + generate_binary_extra = [] +diff --git a/meson.build b/meson.build +index 1526fd7..9467e23 100644 +--- a/meson.build ++++ b/meson.build +@@ -33,6 +33,9 @@ elif host_cpu == 'arm' + elif host_cpu == 'aarch64' + EFI_MACHINE_TYPE_NAME = 'aa64' + gnu_efi_arch = 'aarch64' ++elif host_cpu == 'loongarch64' ++ EFI_MACHINE_TYPE_NAME = 'loongarch64' ++ gnu_efi_arch = 'loongarch64' + else + error('Unknown host_cpu ' + host_cpu) + endif diff --git a/fwupd-efi/PKGBUILD b/fwupd-efi/PKGBUILD index 0357c788b2..0e55c5a104 100644 --- a/fwupd-efi/PKGBUILD +++ b/fwupd-efi/PKGBUILD @@ -9,18 +9,25 @@ arch=('loong64' 'x86_64') url='https://github.com/fwupd/fwupd-efi' license=('LGPL') makedepends=('meson' 'gnu-efi' 'python-pefile') -source=("https://people.freedesktop.org/~hughsient/releases/${pkgname}-${pkgver}.tar.xz"{,.asc}) +source=("https://people.freedesktop.org/~hughsient/releases/${pkgname}-${pkgver}.tar.xz"{,.asc} + 0001-add-support-for-loongarch64.patch) sha256sums=('27ddbc0a4e3209543eab49e69f1a014eb9ed95f93ff51f966f31082d3b7bffbc' - 'SKIP') + 'SKIP' + 'be38120643c6afc770733d70fb3191abb63b3f71986140ec219fae174395492d') validpgpkeys=('163EB50119225DB3DF8F49EA17ACBA8DFA970E17') # Richard Hughes +prepare() { + cd "$pkgname-$pkgver" + patch -p1 -i "$srcdir/0001-add-support-for-loongarch64.patch" +} + build() { arch-meson ${pkgname}-${pkgver} build \ -D efi_sbat_distro_id='arch' \ -D efi_sbat_distro_summary='Arch Linux' \ -D efi_sbat_distro_pkgname=${pkgname} \ -D efi_sbat_distro_version=${pkgver} \ - -D efi_sbat_distro_url="https://archlinux.org/packages/community/x86_64/${pkgname}/" + -D efi_sbat_distro_url="https://archlinux.org/packages/community/loongarch64/${pkgname}/" ninja -C build } diff --git a/fwupd/PKGBUILD b/fwupd/PKGBUILD index c0bf2102a6..7b4cc82bf7 100644 --- a/fwupd/PKGBUILD +++ b/fwupd/PKGBUILD @@ -44,7 +44,6 @@ makedepends=( meson noto-fonts noto-fonts-cjk - pandoc python-cairo python-gobject python-pillow @@ -55,22 +54,30 @@ checkdepends=(umockdev) source=( "https://github.com/fwupd/fwupd/releases/download/${pkgver}/${pkgname}-${pkgver}.tar.xz"{,.asc} fwupd.sysusers + fwupd-1.9.5-loong64.patch ) sha512sums=('d0b2db6fb9dc231022b8109e0c62b682173a89755e759972f6e4b33fa9a8c31426b3bef3fab9b931ef591adcc867962e8251632223e5460122ea499df636b214' 'SKIP' '637203080b55eda74a659f58c853a9a723a2dad5da70915b2b0e036c6145a649468ebec700cc83975d9cb5378b9dced8b3a3b26bdbcc75ddc774837355e75deb') b2sums=('f5679c128cd4c4278c47fb098b0d38d7c4d0a8a0f4f0421b72b1cca934622ecb9d1974f7b67f49f9c9c2f805664d3c16ff6861c0572c95a863b4ebd7858e0a78' 'SKIP' - 'e65ca7da22a20a40882cfc1fe4479643f9a38c90a4f2c3e71e6e5e3de1d6db212a0f17d600097619fe3cdb0a9b860422f8b0b9a9d45441518e51a7eb12a918bb') + 'e65ca7da22a20a40882cfc1fe4479643f9a38c90a4f2c3e71e6e5e3de1d6db212a0f17d600097619fe3cdb0a9b860422f8b0b9a9d45441518e51a7eb12a918bb' + 'c1da6c2cee029024cb94a2a94559499f635a7c36db8b03324c8f506a4390c6171017d8e7c1095ed972d7dd404b53b236f8950cdbf92e308fb9585bd2d48994ac') validpgpkeys=(163EB50119225DB3DF8F49EA17ACBA8DFA970E17) # Richard Hughes +prepare() { + cd ${pkgname}-${pkgver} + patch -p1 -i "$srcdir/fwupd-1.9.5-loong64.patch" +} + build() { arch-meson ${pkgname}-${pkgver} build \ -D b_lto=false \ - -D docs=enabled \ + -D docs=disabled \ -D plugin_amdgpu=disabled \ + -D plugin_msr=disabled \ -D launchd=disabled \ - -D plugin_intel_spi=true \ + -D plugin_intel_spi=false \ -D supported_build=enabled \ -D efi_binary=false \ -D systemd_unit_user=fwupd @@ -113,10 +120,10 @@ package_fwupd() { mv "${pkgdir}"/usr/bin/{,fwupd-}dbxtool mv "${pkgdir}"/usr/share/man/man1/{,fwupd-}dbxtool.1 # Remove msr module-load config as it is built-in - rm "${pkgdir}"/usr/lib/modules-load.d/fwupd-msr.conf - rmdir "${pkgdir}"/usr/lib/modules-load.d +# rm "${pkgdir}"/usr/lib/modules-load.d/fwupd-msr.conf +# rmdir "${pkgdir}"/usr/lib/modules-load.d - _pick docs "${pkgdir}"/usr/share/doc/{,fwupd/}{libfwupdplugin,libfwupd} +# _pick docs "${pkgdir}"/usr/share/doc/{,fwupd/}{libfwupdplugin,libfwupd} } package_fwupd-docs() { diff --git a/fwupd/fwupd-1.9.5-loong64.patch b/fwupd/fwupd-1.9.5-loong64.patch new file mode 100644 index 0000000000..bc2547463e --- /dev/null +++ b/fwupd/fwupd-1.9.5-loong64.patch @@ -0,0 +1,39 @@ +From 6e776b7e9c2bb393d3c7fceda3caffcd76ab8146 Mon Sep 17 00:00:00 2001 +From: Xiaotian Wu +Date: Thu, 12 May 2022 19:08:38 +0800 +Subject: [PATCH] Add support for loongarch64 + +--- + meson.build | 2 ++ + plugins/uefi-capsule/fu-uefi-common.c | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/meson.build b/meson.build +index b3feada65..ce656fdc9 100644 +--- a/meson.build ++++ b/meson.build +@@ -431,6 +431,8 @@ if build_standalone + EFI_MACHINE_TYPE_NAME = 'arm' + elif host_cpu == 'aarch64' + EFI_MACHINE_TYPE_NAME = 'aa64' ++ elif host_cpu == 'loongarch64' ++ EFI_MACHINE_TYPE_NAME = 'loongarch64' + else + EFI_MACHINE_TYPE_NAME = '' + endif +diff --git a/plugins/uefi-capsule/fu-uefi-common.c b/plugins/uefi-capsule/fu-uefi-common.c +index b1da5100a..138109b17 100644 +--- a/plugins/uefi-capsule/fu-uefi-common.c ++++ b/plugins/uefi-capsule/fu-uefi-common.c +@@ -22,6 +22,8 @@ fu_uefi_bootmgr_get_suffix(GError **error) + {64, "x64"}, + #elif defined(__aarch64__) + {64, "aa64"}, ++#elif defined(__loongarch64) ++ {64, "loongarch64"}, + #endif + #if defined(__x86_64__) || defined(__i386__) || defined(__i686__) + {32, "ia32"}, +-- +2.42.0 + diff --git a/gcc12/PKGBUILD b/gcc12/PKGBUILD index b5708c4f4a..ae16e7ecd7 100644 --- a/gcc12/PKGBUILD +++ b/gcc12/PKGBUILD @@ -20,8 +20,8 @@ url='https://gcc.gnu.org' makedepends=( binutils doxygen - gcc-ada - gcc-d +# gcc-ada +# gcc-d git libisl libmpc @@ -102,6 +102,8 @@ build() { # TODO: properly deal with the build issues resulting from this CFLAGS=${CFLAGS/-Werror=format-security/} CXXFLAGS=${CXXFLAGS/-Werror=format-security/} + CFLAGS=${CFLAGS/-mlsx /} + CXXFLAGS=${CXXFLAGS/-mlsx /} "$srcdir/gcc/configure" \ --enable-languages=c,c++,fortran \ @@ -134,15 +136,14 @@ package_gcc12-libs() { cd gcc-build make -C $CHOST/libgcc DESTDIR="$pkgdir" install-shared - mv "${pkgdir}/${_libdir}"/../lib/* "${pkgdir}/${_libdir}" - rmdir "${pkgdir}/${_libdir}"/../lib +#mv "${pkgdir}/${_libdir}"/../lib/* "${pkgdir}/${_libdir}" +# rmdir "${pkgdir}/${_libdir}"/../lib rm -f "$pkgdir/$_libdir/libgcc_eh.a" for lib in libasan.so \ libatomic.so \ libgfortran.so \ libgomp.so \ - libitm.so \ liblsan.so \ libquadmath.so \ libstdc++.so \ @@ -178,6 +179,7 @@ package_gcc12() { make -C $CHOST/libgcc DESTDIR="$pkgdir" install rm -f "$pkgdir"/usr/lib/libgcc_s.so* + rm -f "$pkgdir"/usr/lib/gcc/loongarch64-unknown-linux-gnu/12.3.0/libgcc_s.so* make -C $CHOST/libstdc++-v3/src DESTDIR="$pkgdir" install make -C $CHOST/libstdc++-v3/include DESTDIR="$pkgdir" install @@ -194,12 +196,12 @@ package_gcc12() { "$pkgdir/${_libdir}/bfd-plugins/" make -C $CHOST/libgomp DESTDIR="$pkgdir" install-nodist_{libsubinclude,toolexeclib}HEADERS - make -C $CHOST/libitm DESTDIR="$pkgdir" install-nodist_toolexeclibHEADERS +# make -C $CHOST/libitm DESTDIR="$pkgdir" install-nodist_toolexeclibHEADERS make -C $CHOST/libquadmath DESTDIR="$pkgdir" install-nodist_libsubincludeHEADERS - make -C $CHOST/libsanitizer DESTDIR="$pkgdir" install-nodist_{saninclude,toolexeclib}HEADERS - make -C $CHOST/libsanitizer/asan DESTDIR="$pkgdir" install-nodist_toolexeclibHEADERS - make -C $CHOST/libsanitizer/tsan DESTDIR="$pkgdir" install-nodist_toolexeclibHEADERS - make -C $CHOST/libsanitizer/lsan DESTDIR="$pkgdir" install-nodist_toolexeclibHEADERS +# make -C $CHOST/libsanitizer DESTDIR="$pkgdir" install-nodist_{saninclude,toolexeclib}HEADERS +# make -C $CHOST/libsanitizer/asan DESTDIR="$pkgdir" install-nodist_toolexeclibHEADERS +# make -C $CHOST/libsanitizer/tsan DESTDIR="$pkgdir" install-nodist_toolexeclibHEADERS +# make -C $CHOST/libsanitizer/lsan DESTDIR="$pkgdir" install-nodist_toolexeclibHEADERS make -C libcpp DESTDIR="$pkgdir" install make -C gcc DESTDIR="$pkgdir" install-po @@ -210,7 +212,7 @@ package_gcc12() { # create cc-rs compatible symlinks # https://github.com/rust-lang/cc-rs/blob/1.0.73/src/lib.rs#L2578-L2581 for binary in {c++,g++,gcc,gcc-ar,gcc-nm,gcc-ranlib}; do - ln -s /usr/bin/${binary} "${pkgdir}"/usr/bin/x86_64-linux-gnu-${binary}-12 + ln -s /usr/bin/${binary} "${pkgdir}"/usr/bin/loongarch64-linux-gnu-${binary}-12 done # POSIX conformance launcher scripts for c89 and c99 diff --git a/gcr/PKGBUILD b/gcr/PKGBUILD index 6377c55974..879cc53381 100644 --- a/gcr/PKGBUILD +++ b/gcr/PKGBUILD @@ -36,10 +36,6 @@ pkgver() { git describe --tags | sed 's/[^-]*-g/r&/;s/-/+/g' } -prepare() { - cd gcr -} - build() { local meson_options=( # ssh-agent moved to gcr-4 diff --git a/gendesk/PKGBUILD b/gendesk/PKGBUILD index 388b15514b..e0c8b701d1 100644 --- a/gendesk/PKGBUILD +++ b/gendesk/PKGBUILD @@ -17,8 +17,11 @@ b2sums=('07ba72b713bd240e9035ced7555e02bd544ba402a4cf1e8417804ea3d53d7c43cbd47da options=('!lto') build() { + export GOPROXY=https://goproxy.cn cd $pkgname-$pkgver - go build -v -mod=vendor -trimpath -buildmode=pie -ldflags="-s -w -extldflags $LDFLAGS" + go mod edit -replace=golang.org/x/sys=github.com/golang/sys@master + go mod tidy + go build -v -trimpath -ldflags="-s -w -extldflags $LDFLAGS" } package() { diff --git a/geos/PKGBUILD b/geos/PKGBUILD index 2af3bbf659..1b916b5cf7 100644 --- a/geos/PKGBUILD +++ b/geos/PKGBUILD @@ -15,8 +15,14 @@ depends=(gcc-libs bash) makedepends=(cmake) options=(!emptydirs) changelog=$pkgname.changelog -source=(https://download.osgeo.org/$pkgname/$pkgname-$pkgver.tar.bz2) -sha256sums=('d96db96011259178a35555a0f6d6e75a739e52a495a6b2aa5efb3d75390fbc39') +source=(https://download.osgeo.org/$pkgname/$pkgname-$pkgver.tar.bz2 + $pkgname-$pkgver-gcc13.patch) +sha256sums=('d96db96011259178a35555a0f6d6e75a739e52a495a6b2aa5efb3d75390fbc39' + '61b348c1177814073e4c2926ee0a0787b7eb3c1acd7b6e77095f8e6868a95b1b') + +prepare(){ + patch -d $pkgname-$pkgver -Np1 -i $srcdir/$pkgname-$pkgver-gcc13.patch +} build() { cmake -B build -S $pkgname-$pkgver \ diff --git a/geos/geos-3.11.1-gcc13.patch b/geos/geos-3.11.1-gcc13.patch new file mode 100644 index 0000000000..721a5f2ff1 --- /dev/null +++ b/geos/geos-3.11.1-gcc13.patch @@ -0,0 +1,90 @@ +--- geos-3.11.1/include/geos/shape/fractal/HilbertEncoder.h 2023-03-28 19:32:13.476662911 +0800 ++++ geos-3.11.1/include/geos/shape/fractal/HilbertEncoder.h 2023-03-28 19:34:15.021935510 +0800 +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + + // Forward declarations + namespace geos { +--- geos-3.11.1/tests/unit/capi/GEOSMakeValidTest.cpp 2023-03-28 19:32:13.992668278 +0800 ++++ geos-3.11.1/tests/unit/capi/GEOSMakeValidTest.cpp 2023-03-28 19:35:04.686459884 +0800 +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + + #include "capi_test_utils.h" + +--- geos-3.11.1/include/geos/geomgraph/Label.h 2022-11-14 03:24:40.000000000 +0800 ++++ geos-3.11.1/include/geos/geomgraph/Label.h 2023-03-31 09:45:20.832282317 +0800 +@@ -26,6 +26,7 @@ + + #include // for operator<< + #include ++#include + + namespace geos { + namespace geomgraph { // geos.geomgraph +--- geos-3.11.1/include/geos/geomgraph/TopologyLocation.h 2022-11-14 03:24:40.000000000 +0800 ++++ geos-3.11.1/include/geos/geomgraph/TopologyLocation.h 2023-03-31 09:47:17.450824619 +0800 +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + + #ifdef _MSC_VER + #pragma warning(push) +--- geos-3.11.1/include/geos/geomgraph/Depth.h 2022-11-14 03:24:40.000000000 +0800 ++++ geos-3.11.1/include/geos/geomgraph/Depth.h 2023-03-31 09:52:33.839867426 +0800 +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + + // Forward declarations + namespace geos { +--- geos-3.11.1/include/geos/io/WKTWriter.h 2022-11-14 03:24:40.000000000 +0800 ++++ geos-3.11.1/include/geos/io/WKTWriter.h 2023-03-31 09:57:54.364575348 +0800 +@@ -24,6 +24,7 @@ + + #include + #include ++#include + + #ifdef _MSC_VER + #pragma warning(push) +--- geos-3.11.1/include/geos/operation/overlayng/OverlayLabel.h 2022-11-14 03:24:40.000000000 +0800 ++++ geos-3.11.1/include/geos/operation/overlayng/OverlayLabel.h 2023-03-31 10:01:01.399614523 +0800 +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + + using geos::geom::Location; + using geos::geom::Position; +--- geos-3.11.1/include/geos/shape/fractal/HilbertCode.h 2022-11-14 03:24:40.000000000 +0800 ++++ geos-3.11.1/include/geos/shape/fractal/HilbertCode.h 2023-03-31 10:09:16.067844595 +0800 +@@ -17,6 +17,7 @@ + + #include + #include ++#include + + // Forward declarations + namespace geos { +--- geos-3.11.1/include/geos/shape/fractal/MortonCode.h 2022-11-14 03:24:40.000000000 +0800 ++++ geos-3.11.1/include/geos/shape/fractal/MortonCode.h 2023-03-31 10:25:30.274038075 +0800 +@@ -17,6 +17,7 @@ + + #include + #include ++#include + + // Forward declarations + namespace geos { diff --git a/gfold/PKGBUILD b/gfold/PKGBUILD index f8c5de5e3e..1e872fbbda 100644 --- a/gfold/PKGBUILD +++ b/gfold/PKGBUILD @@ -16,7 +16,7 @@ sha512sums=('SKIP') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "$(rustc -vV | sed -n 's/host: //p')" } build() { diff --git a/ghc/PKGBUILD b/ghc/PKGBUILD index 35fc100809..947b739efb 100644 --- a/ghc/PKGBUILD +++ b/ghc/PKGBUILD @@ -11,8 +11,8 @@ shopt -s extglob pkgbase=ghc pkgname=(ghc-libs ghc ghc-static) -pkgver=9.0.2 -pkgrel=3 +pkgver=9.4.7 +pkgrel=1 pkgdesc='The Glasgow Haskell Compiler' arch=('loong64' 'x86_64') url='https://www.haskell.org/ghc/' @@ -23,7 +23,7 @@ source=("https://downloads.haskell.org/~ghc/$pkgver/$pkgbase-${pkgver}-src.tar.x ghc-sphinx-6.patch::https://gitlab.haskell.org/ghc/ghc/-/commit/00dc51060881df81258ba3b3bdf447294618a4de.patch ghc-rebuild-doc-index.hook ghc-register.hook ghc-unregister.hook) noextract=("$pkgbase-${pkgver}-src.tar.xz") -sha512sums=('32994c7d2b8f47bae604cd825bfcf9c788d79ce26d1d5f58bd73a7093e11ae6c3c17b31dc0c9e454dbf67ca169b942f92213c388d615768cae86055bf6094dee' +sha512sums=('0fa2b864e90e6b76fa5a12b7ab417c0b945653707cdbc4942fbba58f93baee43b6160bb23aa0aa1282907022956275fa5469875059a743f181987d1b067f30e2' '7a79a5e9591b1ddd78fa349526a85cf9fee64db80639dcf1f3d6edef422fd4454222aedf5581e21489a20d748656265a40e7645004e4d5220280f6214c568e64' 'd69e5222d1169c4224a2b69a13e57fdd574cb1b5932b15f4bc6c7d269a9658dd87acb1be81f52fbcf3cb64f96978b9943d10cee2c21bff0565aaa93a5d35fcae' '5f659651d8e562a4dcaae0f821d272d6e9c648b645b1d6ab1af61e4dd690dc5a4b9c6846753b7f935963f001bb1ae1f40cd77731b71ef5a8dbc079a360aa3f8f' @@ -36,19 +36,19 @@ prepare() { LANG=en_US.UTF-8 bsdtar xf $pkgbase-${pkgver}-src.tar.xz cd ghc-$pkgver - patch -p1 -i ../ghc-sphinx-6.patch + #patch -p1 -i ../ghc-sphinx-6.patch # Suppress warnings for newer LLVM. LlvmMaxVersion is non-inclusive and currently GHC # doesn't work with LLVM 15: https://gitlab.haskell.org/ghc/ghc/-/merge_requests/8999 - sed -i 's/LlvmMaxVersion=13/LlvmMaxVersion=15/' configure.ac + #sed -i 's/LlvmMaxVersion=13/LlvmMaxVersion=15/' configure.ac # Temporary hack to use LLVM 14 for bootstrapping during LLVM 15 rebuild. - export PATH="/usr/lib/llvm14/bin/:$PATH" + #export PATH="/usr/lib/llvm14/bin/:$PATH" cp mk/build.mk{.sample,} sed -i '1iBuildFlavour = perf-llvm' mk/build.mk - ./boot + ./boot.source } build() { diff --git a/git-branchless/PKGBUILD b/git-branchless/PKGBUILD index 78b81fad7c..a9ccbd044d 100644 --- a/git-branchless/PKGBUILD +++ b/git-branchless/PKGBUILD @@ -17,7 +17,7 @@ sha256sums=('f9e13d9a3de960b32fb684a59492defd812bb0785df48facc964478f675f0355') prepare() { cd "$_archive" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/git-bug/PKGBUILD b/git-bug/PKGBUILD index 8a7ceb7023..fd94a093bd 100644 --- a/git-bug/PKGBUILD +++ b/git-bug/PKGBUILD @@ -40,6 +40,8 @@ build() { export CGO_CPPFLAGS="${CPPFLAGS}" export CGO_CFLAGS="${CFLAGS}" export CGO_CXXFLAGS="${CXXFLAGS}" + go mod edit -replace=go.etcd.io/bbolt=go.etcd.io/bbolt@v1.3.8 + go mod tidy local GIT_COMMIT="$(git rev-list -1 HEAD)" local GIT_LAST_TAG="$(git describe --abbrev=0 --tags)" diff --git a/git-cliff/PKGBUILD b/git-cliff/PKGBUILD index 87b6df3d68..222d87d2b1 100644 --- a/git-cliff/PKGBUILD +++ b/git-cliff/PKGBUILD @@ -17,7 +17,7 @@ prepare() { cd "$pkgname-$pkgver" mkdir completions/ mkdir man/ - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/git-delta/PKGBUILD b/git-delta/PKGBUILD index d58fe0408a..ac4e9c05fb 100644 --- a/git-delta/PKGBUILD +++ b/git-delta/PKGBUILD @@ -27,7 +27,7 @@ prepare() { cd "$_pkgname-$pkgver" # download dependencies - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/git-grab/PKGBUILD b/git-grab/PKGBUILD index 1ad7303ec9..368edd3b2c 100644 --- a/git-grab/PKGBUILD +++ b/git-grab/PKGBUILD @@ -25,7 +25,7 @@ pkgver() { prepare() { cd "$pkgname" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/gitlab-exporter/PKGBUILD b/gitlab-exporter/PKGBUILD index 312ab48710..39564be273 100644 --- a/gitlab-exporter/PKGBUILD +++ b/gitlab-exporter/PKGBUILD @@ -20,7 +20,7 @@ validpgpkeys=( prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/gitlab-gitaly/PKGBUILD b/gitlab-gitaly/PKGBUILD index 5a1a76a61e..e6b430c942 100644 --- a/gitlab-gitaly/PKGBUILD +++ b/gitlab-gitaly/PKGBUILD @@ -36,6 +36,13 @@ prepare() { build() { cd "$_archive" + pushd ruby + gem-2.7 sources -r https://rubygems.org/ + gem-2.7 sources -a https://rubygems.loongnix.cn + gem-2.7 sources -c +# bundle-2.7 config force_ruby_platform true # build from sources as some prebuilt gems are not available for newer ruby +# bundle-2.7 install --path vendor/bundle + popd make V=1 BUILD_TAGS="tracer_static tracer_static_jaeger" } diff --git a/gitlab-runner/PKGBUILD b/gitlab-runner/PKGBUILD index b65a65977a..e4e46adc50 100644 --- a/gitlab-runner/PKGBUILD +++ b/gitlab-runner/PKGBUILD @@ -75,6 +75,8 @@ build() { export GOFLAGS="-buildmode=pie -trimpath -ldflags=-linkmode=external -mod=readonly -modcacherw" cd gitlab-runner + go mod edit -replace=github.com/cilium/ebpf=github.com/cilium/ebpf@v0.12.3 + go mod tidy go build -o gitlab-runner . } diff --git a/gitlab-shell/PKGBUILD b/gitlab-shell/PKGBUILD index ca2f7ba55f..2926376ca8 100644 --- a/gitlab-shell/PKGBUILD +++ b/gitlab-shell/PKGBUILD @@ -43,6 +43,7 @@ prepare() { patch -p1 < ../configs.patch # At this point config file should not contain any references to '/home/git' +git clone https://github.com/golang/sys.git } build() { @@ -52,7 +53,10 @@ build() { export CGO_CFLAGS="${CFLAGS}" export CGO_CXXFLAGS="${CXXFLAGS}" export CGO_LDFLAGS="${LDFLAGS}" - export GOFLAGS="-buildmode=pie -trimpath -mod=readonly -modcacherw" + export GOFLAGS="-trimpath -mod=readonly -modcacherw" +export GOPATH="$srcdir/build:/usr/share/gocode" +go mod tidy +go mod edit -replace=golang.org/x/sys@v0.0.0-20210412220455-f1c623a9e750=$srcdir/sys make build } diff --git a/gitoxide/PKGBUILD b/gitoxide/PKGBUILD index e88f716373..05602e4886 100644 --- a/gitoxide/PKGBUILD +++ b/gitoxide/PKGBUILD @@ -15,7 +15,7 @@ b2sums=('319b4838b1555b308cac1f945ff648f29f485df387a10aac48d78d1b1bccb7585a328e3 prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/gitui/PKGBUILD b/gitui/PKGBUILD index db6b058db6..a6f647616a 100644 --- a/gitui/PKGBUILD +++ b/gitui/PKGBUILD @@ -19,7 +19,7 @@ b2sums=('a861679de253f179e40bdae65d161f4407778edeebad1bd5b358601d4e813fe04fe8dca prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/gloox/PKGBUILD b/gloox/PKGBUILD index 53518345fd..1a76551a6c 100644 --- a/gloox/PKGBUILD +++ b/gloox/PKGBUILD @@ -9,8 +9,15 @@ arch=(loong64 x86_64) url="https://camaya.net/gloox" license=("GPL") depends=('libidn' 'gnutls') -source=(https://camaya.net/download/gloox-$pkgver.tar.bz2) -sha256sums=('0b8b7371439bc58d9e51384b616c964b18b7b41b87af1b7855104380eda86ffb') +source=(https://camaya.net/download/gloox-$pkgver.tar.bz2 + gloox-fix-build.patch) +sha256sums=('0b8b7371439bc58d9e51384b616c964b18b7b41b87af1b7855104380eda86ffb' + '4476111313494a51f6faca57c246ad7179c4eb4c40c7c47231e8a47f32309212') + +prepare() { + cd "$srcdir"/gloox-$pkgver + patch -p1 -i $srcdir/gloox-fix-build.patch +} build() { cd "$srcdir"/gloox-$pkgver diff --git a/gloox/gloox-fix-build.patch b/gloox/gloox-fix-build.patch new file mode 100644 index 0000000000..1ff6f07873 --- /dev/null +++ b/gloox/gloox-fix-build.patch @@ -0,0 +1,188 @@ +--- a/src/tests/tag/tag_perf.cpp ++++ b/src/tests/tag/tag_perf.cpp +@@ -20,6 +20,7 @@ using namespace gloox; + #include + #include + #include // [s]print[f] ++#include + + #include + +--- a/src/tests/zlib/zlib_perf.cpp ++++ b/src/tests/zlib/zlib_perf.cpp +@@ -24,6 +24,7 @@ using namespace gloox; + #include + #include + #include // [s]print[f] ++#include + + #ifdef HAVE_ZLIB + +Index: gloox-1.0.24/src/examples/adhoc_example.cpp +=================================================================== +--- gloox-1.0.24.orig/src/examples/adhoc_example.cpp ++++ gloox-1.0.24/src/examples/adhoc_example.cpp +@@ -25,6 +25,7 @@ using namespace gloox; + #include + + #include // [s]print[f] ++#include + + + class AdhocTest : public ConnectionListener, AdhocCommandProvider, LogHandler +Index: gloox-1.0.24/src/examples/annotations_example.cpp +=================================================================== +--- gloox-1.0.24.orig/src/examples/annotations_example.cpp ++++ gloox-1.0.24/src/examples/annotations_example.cpp +@@ -22,6 +22,7 @@ using namespace gloox; + #include + + #include // [s]print[f] ++#include + + class AnnotationsTest : public AnnotationsHandler, ConnectionListener + { +Index: gloox-1.0.24/src/examples/bookmarkstorage_example.cpp +=================================================================== +--- gloox-1.0.24.orig/src/examples/bookmarkstorage_example.cpp ++++ gloox-1.0.24/src/examples/bookmarkstorage_example.cpp +@@ -22,6 +22,7 @@ using namespace gloox; + #include + + #include // [s]print[f] ++#include + + class BookmarkStorageTest : public BookmarkHandler, ConnectionListener + { +Index: gloox-1.0.24/src/examples/component_example.cpp +=================================================================== +--- gloox-1.0.24.orig/src/examples/component_example.cpp ++++ gloox-1.0.24/src/examples/component_example.cpp +@@ -22,6 +22,7 @@ using namespace gloox; + #include + + #include // [s]print[f] ++#include + + class ComponentTest : public DiscoHandler, ConnectionListener, LogHandler + { +Index: gloox-1.0.24/src/examples/disco_example.cpp +=================================================================== +--- gloox-1.0.24.orig/src/examples/disco_example.cpp ++++ gloox-1.0.24/src/examples/disco_example.cpp +@@ -24,6 +24,7 @@ using namespace gloox; + #include + + #include // [s]print[f] ++#include + + class DiscoTest : public DiscoHandler, ConnectionListener, LogHandler + { +Index: gloox-1.0.24/src/examples/e2ee_client.cpp +=================================================================== +--- gloox-1.0.24.orig/src/examples/e2ee_client.cpp ++++ gloox-1.0.24/src/examples/e2ee_client.cpp +@@ -29,6 +29,7 @@ using namespace gloox; + #include + + #include // [s]print[f] ++#include + + #ifdef WIN32 + #include +Index: gloox-1.0.24/src/examples/e2ee_server.cpp +=================================================================== +--- gloox-1.0.24.orig/src/examples/e2ee_server.cpp ++++ gloox-1.0.24/src/examples/e2ee_server.cpp +@@ -29,6 +29,7 @@ using namespace gloox; + #include + + #include // [s]print[f] ++#include + + #ifdef WIN32 + #include +Index: gloox-1.0.24/src/examples/privacylist_example.cpp +=================================================================== +--- gloox-1.0.24.orig/src/examples/privacylist_example.cpp ++++ gloox-1.0.24/src/examples/privacylist_example.cpp +@@ -22,6 +22,7 @@ using namespace gloox; + #include + + #include // [s]print[f] ++#include + + class PLTest : public PrivacyListHandler, ConnectionListener + { +Index: gloox-1.0.24/src/examples/privatexml_example.cpp +=================================================================== +--- gloox-1.0.24.orig/src/examples/privatexml_example.cpp ++++ gloox-1.0.24/src/examples/privatexml_example.cpp +@@ -22,6 +22,7 @@ using namespace gloox; + #include + + #include // [s]print[f] ++#include + + class PrivateXMLTest : public PrivateXMLHandler, ConnectionListener + { +Index: gloox-1.0.24/src/examples/register_example.cpp +=================================================================== +--- gloox-1.0.24.orig/src/examples/register_example.cpp ++++ gloox-1.0.24/src/examples/register_example.cpp +@@ -22,6 +22,7 @@ using namespace gloox; + #include + + #include // [s]print[f] ++#include + + class RegTest : public RegistrationHandler, ConnectionListener, LogHandler + { +Index: gloox-1.0.24/src/examples/reset_example.cpp +=================================================================== +--- gloox-1.0.24.orig/src/examples/reset_example.cpp ++++ gloox-1.0.24/src/examples/reset_example.cpp +@@ -23,6 +23,7 @@ using namespace gloox; + #include + + #include // [s]print[f] ++#include + + class RosterTest : public ConnectionListener, LogHandler + { +Index: gloox-1.0.24/src/examples/roster_example.cpp +=================================================================== +--- gloox-1.0.24.orig/src/examples/roster_example.cpp ++++ gloox-1.0.24/src/examples/roster_example.cpp +@@ -27,6 +27,7 @@ using namespace gloox; + #include + + #include // [s]print[f] ++#include + + class RosterTest : public RosterListener, ConnectionListener, LogHandler, MessageHandler + { +Index: gloox-1.0.24/src/examples/vcard_example.cpp +=================================================================== +--- gloox-1.0.24.orig/src/examples/vcard_example.cpp ++++ gloox-1.0.24/src/examples/vcard_example.cpp +@@ -26,6 +26,7 @@ using namespace gloox; + #include + + #include // [s]print[f] ++#include + + class VCardTest : public ConnectionListener, LogHandler, VCardHandler + { +Index: gloox-1.0.24/src/examples/ft_recv.cpp +=================================================================== +--- gloox-1.0.24.orig/src/examples/ft_recv.cpp ++++ gloox-1.0.24/src/examples/ft_recv.cpp +@@ -27,6 +27,7 @@ using namespace gloox; + #include + + #include // [s]print[f] ++#include + + #if defined( WIN32 ) || defined( _WIN32 ) + # include diff --git a/glusterfs/PKGBUILD b/glusterfs/PKGBUILD index 4659147377..0b7c407a52 100644 --- a/glusterfs/PKGBUILD +++ b/glusterfs/PKGBUILD @@ -20,7 +20,7 @@ backup=('etc/glusterfs/glusterd.vol' 'etc/glusterfs/glusterd.vol' 'etc/glusterfs/glusterfs-georep-logrotate' 'etc/glusterfs/glusterfs-logrotate') -depends=(fuse python libxml2 libaio liburcu attr rpcbind liburing gperftools) +depends=(fuse python libxml2 libaio liburcu attr rpcbind liburing) makedepends=(rpcsvc-proto) optdepends=('glib2: qemu-block' 'python-prettytable: gluster-georep-sshkey') @@ -46,6 +46,7 @@ build() { --libexecdir=/usr/lib/$pkgname \ --with-systemddir=/usr/lib/systemd/system \ --with-tmpfilesdir=/usr/lib/tmpfiles.d \ + --without-tcmalloc \ --enable-gnfs \ LEXLIB= make diff --git a/gn/PKGBUILD b/gn/PKGBUILD index 670622197b..8cdaa36980 100644 --- a/gn/PKGBUILD +++ b/gn/PKGBUILD @@ -10,16 +10,28 @@ url="https://gn.googlesource.com/gn/" license=('BSD') depends=('gcc-libs') makedepends=('clang' 'ninja' 'python' 'git') -source=(git+https://gn.googlesource.com/gn#commit=$_commit) -sha256sums=('SKIP') +source=(git+https://gn.googlesource.com/gn#commit=$_commit +gn-fix-build.patch) +sha256sums=('SKIP' + '072db93d2c4a1486a5a70aa7a17e562e3478bbff5c2d5b219729ddda630bb31e') pkgver() { cd $pkgname echo 0.$(git rev-list --count initial-commit..).$(git rev-parse --short=8 HEAD) } +prepare() { + cd $pkgname + patch -p1 -i $srcdir/gn-fix-build.patch +} + build() { cd $pkgname +# clang didn't support -mlsx + CFLAGS=${CFLAGS/-mlsx /} + CXXFLAGS=${CXXFLAGS/-mlsx /} + CFLAGS=${CFLAGS/-fstack-clash-protection/} + CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/} ./build/gen.py ninja -C out } diff --git a/gn/gn-fix-build.patch b/gn/gn-fix-build.patch new file mode 100644 index 0000000000..59d9ff1515 --- /dev/null +++ b/gn/gn-fix-build.patch @@ -0,0 +1,10 @@ +--- gn/src/base/containers/span.h 2023-03-10 20:15:49.702581313 +0800 ++++ gn/src/base/containers/span.h 2023-03-10 20:17:02.498893761 +0800 +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + + #include "base/logging.h" + #include "base/stl_util.h" diff --git a/gnome-control-center/PKGBUILD b/gnome-control-center/PKGBUILD index f8763cd05e..841d43594b 100644 --- a/gnome-control-center/PKGBUILD +++ b/gnome-control-center/PKGBUILD @@ -66,7 +66,7 @@ makedepends=( modemmanager python ) -checkdepends=( +makedepends+=( python-dbusmock python-gobject xorg-server-xvfb diff --git a/gnome-dictionary/gnome-dictionary-fix-meson.patch b/gnome-dictionary/gnome-dictionary-fix-meson.patch new file mode 100644 index 0000000000..7b36ac92c5 --- /dev/null +++ b/gnome-dictionary/gnome-dictionary-fix-meson.patch @@ -0,0 +1,34 @@ +diff --git a/data/appdata/meson.build b/data/appdata/meson.build +index 1dc4ed7..d37e144 100644 +--- a/data/appdata/meson.build ++++ b/data/appdata/meson.build +@@ -1,6 +1,6 @@ + appdata_conf = configuration_data() + appdata_conf.set('application_id', application_id) +-i18n.merge_file('appdata', ++i18n.merge_file( + input: configure_file( + input: 'org.gnome.Dictionary.appdata.xml.in.in', + output: 'org.gnome.Dictionary.appdata.xml.in', +diff --git a/data/meson.build b/data/meson.build +index 660e6b8..fb0aefd 100644 +--- a/data/meson.build ++++ b/data/meson.build +@@ -3,7 +3,7 @@ subdir('appdata') + desktop_conf = configuration_data() + desktop_conf.set('icon', application_id) + desktop_conf.set('application_id', application_id) +-i18n.merge_file('desktop', ++i18n.merge_file( + input: configure_file( + input: 'org.gnome.Dictionary.desktop.in.in', + output: 'org.gnome.Dictionary.desktop.in', +@@ -45,7 +45,7 @@ sources = [ + ] + + foreach s: sources +- i18n.merge_file('sources', ++ i18n.merge_file( + input: '@0@.in'.format(s), + output: s, + install: true, diff --git a/gnome-font-viewer/gnome-font-viewer-meson.patch b/gnome-font-viewer/gnome-font-viewer-meson.patch new file mode 100644 index 0000000000..c530794bd2 --- /dev/null +++ b/gnome-font-viewer/gnome-font-viewer-meson.patch @@ -0,0 +1,25 @@ +diff --git a/data/meson.build b/data/meson.build +index bfc9caa..8e42134 100644 +--- a/data/meson.build ++++ b/data/meson.build +@@ -1,6 +1,6 @@ + appdatadir = join_paths(datadir, 'metainfo') + appdata_file = 'org.gnome.font-viewer.appdata.xml' +-merged_appdata = i18n.merge_file(appdata_file, ++merged_appdata = i18n.merge_file( + input: appdata_file + '.in', + output: appdata_file, + po_dir: '../po', +diff --git a/src/meson.build b/src/meson.build +index f863d45..826c59b 100644 +--- a/src/meson.build ++++ b/src/meson.build +@@ -39,7 +39,7 @@ desktop_file = 'org.gnome.font-viewer.desktop' + desktop_conf = configuration_data() + desktop_conf.set('VERSION', meson.project_version()) + desktop_conf.set('APPLICATION_ID', application_id) +-i18n.merge_file(desktop_file, ++i18n.merge_file( + input: configure_file(input: desktop_file + '.in.in', + output: desktop_file + '.in', + configuration: desktop_conf), diff --git a/gnome-mplayer/PKGBUILD b/gnome-mplayer/PKGBUILD index a5d275734e..b6b0817a32 100644 --- a/gnome-mplayer/PKGBUILD +++ b/gnome-mplayer/PKGBUILD @@ -5,7 +5,7 @@ pkgname=gnome-mplayer pkgver=1.0.9 -pkgrel=8 +pkgrel=10 pkgdesc='GTK/Gnome interface around MPlayer' arch=('loong64' 'x86_64') url='https://sites.google.com/site/kdekorte2/gnomemplayer' diff --git a/gnome-remote-desktop/PKGBUILD b/gnome-remote-desktop/PKGBUILD index 623a252411..82ddffafdc 100644 --- a/gnome-remote-desktop/PKGBUILD +++ b/gnome-remote-desktop/PKGBUILD @@ -33,7 +33,7 @@ makedepends=( git meson ) -checkdepends=( +makedepends+=( dbus-broker libegl mutter diff --git a/gnome-tetravex/PKGBUILD b/gnome-tetravex/PKGBUILD index 98b0332dd9..76669b62ab 100644 --- a/gnome-tetravex/PKGBUILD +++ b/gnome-tetravex/PKGBUILD @@ -12,8 +12,9 @@ depends=(gtk3) makedepends=(meson gobject-introspection vala yelp-tools appstream-glib git) groups=(gnome-extra) _commit=76c564d4cd5aaaf3e2eea89d538358070b019753 # tags/3.38.2^0 -source=("git+https://gitlab.gnome.org/GNOME/gnome-tetravex.git#commit=$_commit") -sha256sums=('SKIP') +source=("git+https://gitlab.gnome.org/GNOME/gnome-tetravex.git#commit=$_commit" + gnome-tetravex-fix-meson.patch) +sha256sums=('SKIP' 'SKIP') pkgver() { cd $pkgname @@ -22,6 +23,7 @@ pkgver() { prepare() { cd $pkgname + patch -p1 -i $srcdir/gnome-tetravex-fix-meson.patch } build() { diff --git a/gnome-tetravex/gnome-tetravex-fix-meson.patch b/gnome-tetravex/gnome-tetravex-fix-meson.patch new file mode 100644 index 0000000000..8c2cdc2567 --- /dev/null +++ b/gnome-tetravex/gnome-tetravex-fix-meson.patch @@ -0,0 +1,22 @@ +diff --git a/data/meson.build b/data/meson.build +index bb77248..b881e8a 100644 +--- a/data/meson.build ++++ b/data/meson.build +@@ -34,7 +34,7 @@ endif + + # Desktop file + if get_option('build_gui') +- desktop_file = i18n.merge_file ('desktop-file', ++ desktop_file = i18n.merge_file ( + input: project_id + '.desktop.in', + output: project_id + '.desktop', + install: true, +@@ -56,7 +56,7 @@ endif + + # AppData file + if get_option('build_gui') +- appdata_file = i18n.merge_file ('appdata-file', ++ appdata_file = i18n.merge_file ( + input: project_id + '.appdata.xml.in', + output: project_id + '.appdata.xml', + install: true, diff --git a/gnome-tour/PKGBUILD b/gnome-tour/PKGBUILD index afbf907c17..bbd9aff284 100644 --- a/gnome-tour/PKGBUILD +++ b/gnome-tour/PKGBUILD @@ -23,6 +23,12 @@ export CARGO_PROFILE_RELEASE_DEBUG=2 build() { arch-meson $pkgname-$pkgver build + find -name Cargo.lock -exec rm -f {} \; + mkdir -p .cargo + cat > .cargo/config.toml < ++ ++typedef wchar_t CHAR16; ++#define WCHAR CHAR16 + + typedef uint64_t UINT64; + typedef int64_t INT64; +@@ -54,12 +55,13 @@ typedef int32_t INT32; + + typedef uint16_t UINT16; + typedef int16_t INT16; ++ + typedef uint8_t UINT8; ++typedef char CHAR8; + typedef int8_t INT8; +-typedef __WCHAR_TYPE__ WCHAR; + + #undef VOID +-#define VOID void ++typedef void VOID; + + typedef int64_t INTN; + typedef uint64_t UINTN; diff --git a/gnugo/PKGBUILD b/gnugo/PKGBUILD index 826d0ec9a7..9a3de24406 100644 --- a/gnugo/PKGBUILD +++ b/gnugo/PKGBUILD @@ -19,6 +19,8 @@ sha256sums=('da68d7a65f44dcf6ce6e4e630b6f6dd9897249d34425920bfdd4e07ff1866a72' build() { cd "${srcdir}/${pkgname}-${pkgver}" CFLAGS+=' -fcommon' # https://wiki.gentoo.org/wiki/Gcc_10_porting_notes/fno_common + CFLAGS=${CFLAGS/-Werror=format-security/} + CXXFLAGS=${CXXFLAGS/-Werror=format-security/} ./configure --prefix=/usr make } diff --git a/go-md2man/PKGBUILD b/go-md2man/PKGBUILD index d6d9b0b575..02f63e3f04 100644 --- a/go-md2man/PKGBUILD +++ b/go-md2man/PKGBUILD @@ -14,8 +14,9 @@ sha256sums=('7ca3a04bb4ab83387538235decc42a535097a05d2fb9f2266d0c47b33119501f') build() { cd "$pkgname-$pkgver" - export GOFLAGS="-buildmode=pie -mod=vendor -trimpath" + export GOFLAGS="-mod=vendor -trimpath" export CGO_LDFLAGS="$LDFLAGS" + export GOPROXY=https://goproxy.cn go build -o go-md2man . ./go-md2man -in=go-md2man.1.md -out=go-md2man.1 } diff --git a/go/PKGBUILD b/go/PKGBUILD index cb5b9cd8bb..522a7fdf2c 100644 --- a/go/PKGBUILD +++ b/go/PKGBUILD @@ -24,14 +24,23 @@ makedepends=(git go) replaces=(go-pie) provides=(go-pie) options=(!strip staticlibs) -source=(https://go.dev/dl/go${pkgver}.src.tar.gz{,.asc}) +source=(https://go.dev/dl/go${pkgver}.src.tar.gz{,.asc} +http://public.loongarch.dev/sources/go/go1.21p.tar.gz/9a6b12102fddd56e29e41a67d0494166/go1.21p.tar.gz) validpgpkeys=('EB4C1BFD4F042F6DDDCCEC917721F63BD38B4796') sha256sums=('124926a62e45f78daabbaedb9c011d97633186a33c238ffc1e25320c02046248' - 'SKIP') + 'SKIP' + 'a0ec920455ec49777d4bba8ce64f80b6c93458e62112c7178fd7ac34ecbfb506') + +prepare() { + cd "$pkgname" + for i in $srcdir/go1.21p/*.patch; + do + patch -p1 -i $i + done +} build() { - export GOARCH=amd64 - export GOAMD64=v1 # make sure we're building for the right x86-64 version + export GOARCH=loong64 export GOROOT_FINAL=/usr/lib/go export GOROOT_BOOTSTRAP=/usr/lib/go @@ -50,7 +59,7 @@ package() { cd "$pkgname" install -d "$pkgdir/usr/bin" "$pkgdir/usr/lib/go" "$pkgdir/usr/share/doc/go" \ - "$pkgdir/usr/lib/go/pkg/linux_amd64_"{dynlink,race} + "$pkgdir/usr/lib/go/pkg/linux_loong64_"{dynlink,race} cp -a bin pkg src lib misc api test "$pkgdir/usr/lib/go" # We can't strip all binaries and libraries, diff --git a/godot/PKGBUILD b/godot/PKGBUILD index a89c7a8a38..2ee1b537cc 100644 --- a/godot/PKGBUILD +++ b/godot/PKGBUILD @@ -38,7 +38,7 @@ build() { CFLAGS="$CFLAGS -fPIC -Wl,-z,relro,-z,now -w -I/usr/include/mbedtls2" \ CXXFLAGS="$CXXFLAGS -fPIC -Wl,-z,relro,-z,now -w -I/usr/include/mbedtls2" \ LINKFLAGS="$LDFLAGS -L/usr/lib/mbedtls2" \ - arch=$CARCH \ + arch=`uname -m` \ builtin_embree=no \ builtin_enet=yes \ builtin_freetype=no \ @@ -82,5 +82,5 @@ package() { install -Dm644 misc/dist/linux/godot.6 "$pkgdir/usr/share/man/man6/godot.6" install -Dm644 misc/dist/linux/org.godotengine.Godot.xml \ "$pkgdir/usr/share/mime/packages/org.godotengine.Godot.xml" - install -Dm755 bin/godot.linuxbsd.editor.$CARCH "$pkgdir/usr/bin/godot" + install -Dm755 bin/godot.linuxbsd.editor.`uname -m` "$pkgdir/usr/bin/godot" } diff --git a/gpg-tui/PKGBUILD b/gpg-tui/PKGBUILD index 3d20767b81..7e85a02580 100644 --- a/gpg-tui/PKGBUILD +++ b/gpg-tui/PKGBUILD @@ -21,7 +21,7 @@ sha512sums=('819481ed5f52c8092a8c711e642653955573250183a436278beb396d069c9734a1b prepare() { cd "$pkgname-$pkgver" mkdir completions/ - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/gping/PKGBUILD b/gping/PKGBUILD index 87650e62ae..eab8b9daa6 100644 --- a/gping/PKGBUILD +++ b/gping/PKGBUILD @@ -16,7 +16,7 @@ options=('!lto') prepare() { mv "$pkgname-$pkgname-v$pkgver" "$pkgname-$pkgver" cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/gptfdisk/PKGBUILD b/gptfdisk/PKGBUILD index 289ce803cf..4b2ee74c68 100644 --- a/gptfdisk/PKGBUILD +++ b/gptfdisk/PKGBUILD @@ -37,6 +37,8 @@ prepare() { build() { cd "$srcdir/$pkgname-$pkgver" + CFLAGS=${CFLAGS/-Wformat -Werror=format-security/} + CXXFLAGS=${CXXFLAGS/-Wformat -Werror=format-security/} make } diff --git a/graphviz/PKGBUILD b/graphviz/PKGBUILD index f9a96d33aa..eba587dbe4 100644 --- a/graphviz/PKGBUILD +++ b/graphviz/PKGBUILD @@ -11,8 +11,8 @@ url='https://www.graphviz.org/' license=('EPL') arch=('loong64' 'x86_64') depends=('libltdl' 'gd' 'librsvg' 'ghostscript' 'pango' 'gts' 'gsfonts') -makedepends=('swig' 'mono' 'guile' 'lua' 'perl' 'python' 'r' 'tk' 'qt6-base' 'gtk2') -optdepends=('mono: sharp bindings' +makedepends=('swig' 'guile' 'lua' 'perl' 'python' 'r' 'tk' 'qt6-base' 'gtk2') +optdepends=(#'mono: sharp bindings' 'guile: guile bindings' 'lua: lua bindings' 'perl: perl bindings' diff --git a/grcov/PKGBUILD b/grcov/PKGBUILD index 94b8b841f5..389e6b9920 100644 --- a/grcov/PKGBUILD +++ b/grcov/PKGBUILD @@ -15,7 +15,7 @@ sha256sums=('d8ea0fb293dc5431b502e8ffbd7c9a62336d9e878df9b78a8aed57098fbfb2d8') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/greetd-regreet/PKGBUILD b/greetd-regreet/PKGBUILD index 6951b104e7..7cc8c12676 100644 --- a/greetd-regreet/PKGBUILD +++ b/greetd-regreet/PKGBUILD @@ -24,7 +24,7 @@ sha256sums=('a658c91cdf242dfea814f0bfd0c4d877bd39e3af498d36e5024061e3d07ea76b' prepare() { cd "$_archive" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/greetd-tuigreet/PKGBUILD b/greetd-tuigreet/PKGBUILD index 25220b4dfb..f2b7941688 100644 --- a/greetd-tuigreet/PKGBUILD +++ b/greetd-tuigreet/PKGBUILD @@ -23,7 +23,7 @@ sha256sums=('ed371ebe288a3e5782f01681c6c4ed4786b470184af286fa0e7b8898e47c154e' prepare() { cd "$_archive" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/greetd/PKGBUILD b/greetd/PKGBUILD index c361292f81..b24c34290a 100644 --- a/greetd/PKGBUILD +++ b/greetd/PKGBUILD @@ -30,7 +30,7 @@ sha256sums=('a0cec141dea7fd7838b60a52237692d0fd5a0169cf748b8f8379d8409a3768eb' prepare() { cd "$_archive" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/grex/PKGBUILD b/grex/PKGBUILD index 8abce5fb2c..5a73db61d1 100644 --- a/grex/PKGBUILD +++ b/grex/PKGBUILD @@ -15,7 +15,7 @@ sha512sums=('3715160417703a28447012abc70ea39548c4a3aaddebbfc6a3a6dc54dfe8f6856ff prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/grpc/PKGBUILD b/grpc/PKGBUILD index 8134770353..6e87b1ba0e 100644 --- a/grpc/PKGBUILD +++ b/grpc/PKGBUILD @@ -109,7 +109,7 @@ build() { check() { cd "$srcdir/$pkgbase-$pkgver" local _pyver=$(python -c "import sys; print('{0}{1}'.format(*sys.version_info[:2]))") - PYTHONPATH="pyb/lib.linux-$CARCH-cpython-$_pyver" python -c 'import grpc' + PYTHONPATH="pyb/lib.linux-`uname -m`-cpython-$_pyver" python -c 'import grpc' } package_grpc() { diff --git a/gssdp/PKGBUILD b/gssdp/PKGBUILD index d2b1a9fba0..4086995a92 100644 --- a/gssdp/PKGBUILD +++ b/gssdp/PKGBUILD @@ -41,6 +41,7 @@ prepare() { build() { local meson_options=( -D gtk_doc=true + -D manpages=false ) arch-meson gssdp build "${meson_options[@]}" diff --git a/gstreamer/PKGBUILD b/gstreamer/PKGBUILD index 8b2e68de3e..d34e0d2787 100644 --- a/gstreamer/PKGBUILD +++ b/gstreamer/PKGBUILD @@ -10,11 +10,11 @@ pkgname=( gst-plugins-good gst-plugins-bad gst-plugin-gtk - gst-plugin-msdk - gst-plugin-opencv +# gst-plugin-msdk +# gst-plugin-opencv gst-plugin-qml6 gst-plugin-qmlgl - gst-plugin-qsv +# gst-plugin-qsv gst-plugin-va gst-plugin-wpe gst-plugins-ugly @@ -55,7 +55,7 @@ makedepends=( libavtp libbs2b libdca libde265 libdvdnav libfdk-aac libfreeaptx libgme libkate libldac liblrdf libltc libmicrodns libmodplug libmpcdec libnice libopenmpt libsrtp libva libxkbcommon-x11 libxml2 lilv lv2 mjpegtools neon - openal opencv qrencode rtmpdump sbc shaderc soundtouch spandsp svt-hevc + openal opencv qrencode rtmpdump sbc shaderc soundtouch spandsp vulkan-headers vulkan-icd-loader vulkan-validation-layers webrtc-audio-processing wildmidi wpewebkit zbar zvbi zxing-cpp @@ -71,7 +71,7 @@ makedepends=( # gst-python python-gobject ) -checkdepends=(xorg-server-xvfb) +makedepends+=(xorg-server-xvfb) source=( "git+https://gitlab.freedesktop.org/gstreamer/gstreamer.git?signed#tag=$pkgver" "https://gstreamer.freedesktop.org/src/gstreamer-docs/gstreamer-docs-$pkgver.tar.xz"{,.asc} @@ -142,6 +142,10 @@ build() { -D gst-plugins-bad:wic=disabled -D gst-plugins-bad:win32ipc=disabled -D gst-plugins-ugly:sidplay=disabled + -D gst-plugins-bad:opencv=disabled + -D gst-plugins-bad:msdk=disabled + -D gst-plugins-bad:qsv=disabled + -D gst-plugins-bad:svthevcenc=disabled -D gst-editing-services:validate=disabled ) @@ -481,7 +485,7 @@ package_gst-plugins-bad() { libfreeaptx libgme libkate libldac liblrdf libltc libmicrodns libmodplug libmpcdec libopenmpt librsvg libsndfile libsrtp libwebp libxml2 lilv mjpegtools neon nettle openal openexr openjpeg2 openssl opus pango qrencode - rtmpdump sbc soundtouch spandsp srt svt-hevc webrtc-audio-processing + rtmpdump sbc soundtouch spandsp srt webrtc-audio-processing wildmidi x265 zbar zvbi zxing-cpp ) @@ -536,7 +540,7 @@ package_gst-plugins-bad() { usr/lib/gstreamer-1.0/libgstspandsp.so usr/lib/gstreamer-1.0/libgstsrt.so usr/lib/gstreamer-1.0/libgstsrtp.so - usr/lib/gstreamer-1.0/libgstsvthevcenc.so +#usr/lib/gstreamer-1.0/libgstsvthevcenc.so usr/lib/gstreamer-1.0/libgstteletext.so usr/lib/gstreamer-1.0/libgsttimecode.so usr/lib/gstreamer-1.0/libgstttmlsubs.so diff --git a/gtk3/PKGBUILD b/gtk3/PKGBUILD index e5fd4e0be0..16983f4574 100644 --- a/gtk3/PKGBUILD +++ b/gtk3/PKGBUILD @@ -77,6 +77,8 @@ prepare() { } build() { + CFLAGS=${CFLAGS/-Wformat -Werror=format-security/} + CXXFLAGS=${CXXFLAGS/-Wformat -Werror=format-security/} local meson_options=( -D broadway_backend=true -D cloudproviders=true diff --git a/gtk4/gtk-objcopy.patch b/gtk4/gtk-objcopy.patch new file mode 100644 index 0000000000..47dac886af --- /dev/null +++ b/gtk4/gtk-objcopy.patch @@ -0,0 +1,36 @@ +Index: gtk/gtk/meson.build +=================================================================== +--- gtk.orig/gtk/meson.build ++++ gtk/gtk/meson.build +@@ -943,6 +943,7 @@ if not meson.is_cross_build() and build_ + command : [objcopy, + '--strip-all', + '--add-symbol','_gtk_resource_data=.data:0', ++ '--alt-elf-eflags=0x03', + '@INPUT@', + '@OUTPUT@']) + +Index: gtk/demos/gtk-demo/meson.build +=================================================================== +--- gtk.orig/demos/gtk-demo/meson.build ++++ gtk/demos/gtk-demo/meson.build +@@ -211,6 +211,7 @@ if build_machine.system() == 'linux' and + command : [objcopy, + '--strip-all', + '--add-symbol','_g_binary_gtkdemo_resource_data=.data:0', ++ '--alt-elf-eflags=0x03', + '@INPUT@', + '@OUTPUT@']) + +Index: gtk/demos/widget-factory/meson.build +=================================================================== +--- gtk.orig/demos/widget-factory/meson.build ++++ gtk/demos/widget-factory/meson.build +@@ -59,6 +59,7 @@ if build_machine.system() == 'linux' and + command : [objcopy, + '--strip-all', + '--add-symbol','_g_binary_widgetfactory_resource_data=.data:0', ++ '--alt-elf-eflags=0x03', + '@INPUT@', + '@OUTPUT@']) + diff --git a/gunicorn/PKGBUILD b/gunicorn/PKGBUILD index 1ca66967e1..8d11f725de 100644 --- a/gunicorn/PKGBUILD +++ b/gunicorn/PKGBUILD @@ -2,6 +2,7 @@ # Contributor: Jeremy "Ichimonji10" Audet # Contributor: pumpkin # Contributor: Vsevolod Balashov +export CHECKFUNC=1 pkgname=gunicorn pkgver=20.1.0 diff --git a/halp/PKGBUILD b/halp/PKGBUILD index 4850b12514..a867393b85 100644 --- a/halp/PKGBUILD +++ b/halp/PKGBUILD @@ -15,7 +15,7 @@ options=('!lto') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" mkdir completions/ mkdir man/ } diff --git a/handlr/PKGBUILD b/handlr/PKGBUILD index 8afab788a7..d54bae1f1d 100644 --- a/handlr/PKGBUILD +++ b/handlr/PKGBUILD @@ -18,7 +18,7 @@ sha512sums=('55779ad0c01e065678e1a57f338272f1d38057658fe6b7c54f7bc35595575aafe13 prepare() { cd "$pkgname-$pkgver" patch -Np1 -i "../$pkgname-bash-completion.patch" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/haskell-doctest-parallel/PKGBUILD b/haskell-doctest-parallel/PKGBUILD index 132094df4f..9b49fe9644 100644 --- a/haskell-doctest-parallel/PKGBUILD +++ b/haskell-doctest-parallel/PKGBUILD @@ -38,7 +38,7 @@ check() { # == An ugly hack to generate .ghc.environment without cabal-install == # doctest-parallel relies on this cabal-install feature to configure GHCi. # https://github.com/martijnbastiaan/doctest-parallel/issues/22 - _ghc_env_filename=.ghc.environment.$CARCH-linux-$(expac %v ghc | cut -d - -f 1) + _ghc_env_filename=.ghc.environment.`uname -m`-linux-$(expac %v ghc | cut -d - -f 1) echo -e "package-db dist/package.conf.inplace" > $_ghc_env_filename ls dist/package.conf.inplace/*.conf | sed 's|.*/\(.*\).conf$|package-id \1|' >> $_ghc_env_filename ls /usr/lib/ghc-9.0.2/package.conf.d/*.conf | sed 's|.*/\(.*\).conf$|package-id \1|' >> $_ghc_env_filename diff --git a/hck/PKGBUILD b/hck/PKGBUILD index 67244389e5..d163eb08d3 100644 --- a/hck/PKGBUILD +++ b/hck/PKGBUILD @@ -25,7 +25,7 @@ prepare() { cd "$pkgname" # download dependencies - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/hdf5/PKGBUILD b/hdf5/PKGBUILD index 51435e3952..6a69dc74e0 100644 --- a/hdf5/PKGBUILD +++ b/hdf5/PKGBUILD @@ -12,7 +12,7 @@ arch=(loong64 x86_64) url="https://www.hdfgroup.org/hdf5" license=(custom) depends=(zlib libaec bash) -makedepends=(cmake time gcc-fortran java-environment) +makedepends=(cmake time gcc-fortran) # java-environment) replaces=(hdf5-java) provides=(hdf5-java) source=(https://support.hdfgroup.org/ftp/HDF5/releases/${pkgname}-${pkgver:0:4}/${pkgname}-${pkgver/_/-}/src/${pkgname}-${pkgver/_/-}.tar.bz2) @@ -35,7 +35,7 @@ build() { -DHDF5_BUILD_HL_LIB=ON \ -DHDF5_BUILD_CPP_LIB=ON \ -DHDF5_BUILD_FORTRAN=ON \ - -DHDF5_BUILD_JAVA=ON \ + -DHDF5_BUILD_JAVA=OFF \ -DHDF5_ENABLE_Z_LIB_SUPPORT=ON \ -DHDF5_ENABLE_SZIP_SUPPORT=ON \ -DHDF5_ENABLE_SZIP_ENCODING=ON \ @@ -52,7 +52,7 @@ build() { --enable-hl \ --enable-cxx \ --enable-fortran \ - --enable-java \ + --disable-java \ --with-pic \ --with-zlib \ --with-szlib diff --git a/heh/PKGBUILD b/heh/PKGBUILD index c9e296946e..fb2c6088d3 100644 --- a/heh/PKGBUILD +++ b/heh/PKGBUILD @@ -14,7 +14,7 @@ sha256sums=('c44fc2ef6845080f9a022884dc864d5144636a3a9a7f4bdc8e1793a09d939704') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/helix/PKGBUILD b/helix/PKGBUILD index 32d3473ec8..e1c2b4b647 100644 --- a/helix/PKGBUILD +++ b/helix/PKGBUILD @@ -42,7 +42,7 @@ prepare() { # NOTE: we are renaming hx to helix so there is no conflict with hex (providing hx) sed -i "s|hx|helix|g" contrib/completion/hx.* sed -i 's|hx|helix|g' contrib/Helix.desktop - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/hexyl/PKGBUILD b/hexyl/PKGBUILD index caeddf1953..5a98ee97fe 100644 --- a/hexyl/PKGBUILD +++ b/hexyl/PKGBUILD @@ -17,7 +17,7 @@ b2sums=('1c2ccbb21c7aad1d2c1daca7ed99009ec2e2a02a96dd8a73d6ba11d00291f0e81afdd79 prepare() { cd ${pkgname}-${pkgver} - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/hidapi/hidapi-fix-build.patch b/hidapi/hidapi-fix-build.patch new file mode 100644 index 0000000000..86af3961bc --- /dev/null +++ b/hidapi/hidapi-fix-build.patch @@ -0,0 +1,12 @@ +Index: hidapi-hidapi-0.10.1/configure.ac +=================================================================== +--- hidapi-hidapi-0.10.1.orig/configure.ac ++++ hidapi-hidapi-0.10.1/configure.ac +@@ -13,7 +13,6 @@ LTLDFLAGS="-version-info ${lt_current}:$ + + AC_CONFIG_MACRO_DIR([m4]) + AM_INIT_AUTOMAKE([foreign -Wall -Werror]) +-AC_CONFIG_MACRO_DIR([m4]) + + m4_ifdef([AM_PROG_AR], [AM_PROG_AR]) + LT_INIT diff --git a/himalaya/PKGBUILD b/himalaya/PKGBUILD index f3fbb00e0b..6e5185ba2a 100644 --- a/himalaya/PKGBUILD +++ b/himalaya/PKGBUILD @@ -17,7 +17,7 @@ _features='notmuch-backend,pgp-gpg' prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" mkdir -p {completions,man} } diff --git a/hitori/PKGBUILD b/hitori/PKGBUILD index be22d3a528..62fa0e1c72 100644 --- a/hitori/PKGBUILD +++ b/hitori/PKGBUILD @@ -17,8 +17,10 @@ makedepends=( ) groups=(gnome-extra) _commit=53db1397d6a711862023d7a3070e785235a98c87 # tags/44.0^0 -source=("git+https://gitlab.gnome.org/GNOME/hitori.git#commit=$_commit") -sha256sums=('SKIP') +source=("git+https://gitlab.gnome.org/GNOME/hitori.git#commit=$_commit" + hitori-fix-meson.patch) +sha256sums=('SKIP' + '20b67731b04efadbc3f5b03958a25eddacc17c05c842943a4cd99fe37fd0d503') pkgver() { cd hitori @@ -27,6 +29,7 @@ pkgver() { prepare() { cd hitori + patch -p1 -i $srcdir/hitori-fix-meson.patch } build() { diff --git a/hitori/hitori-fix-meson.patch b/hitori/hitori-fix-meson.patch new file mode 100644 index 0000000000..126106e2cd --- /dev/null +++ b/hitori/hitori-fix-meson.patch @@ -0,0 +1,21 @@ +diff --git a/data/meson.build b/data/meson.build +index 97b8e68..c66a233 100644 +--- a/data/meson.build ++++ b/data/meson.build +@@ -1,6 +1,6 @@ + subdir('icons') + +-desktop_file = i18n.merge_file('desktop-file', ++desktop_file = i18n.merge_file( + type: 'desktop', + input: '@0@.desktop.in'.format(application_id), + output: '@0@.desktop'.format(application_id), +@@ -20,7 +20,7 @@ if desktop_file_validate.found() + ) + endif + +-appdata_file = i18n.merge_file('appdata-file', ++appdata_file = i18n.merge_file( + input: '@0@.appdata.xml.in'.format(application_id), + output: '@0@.appdata.xml'.format(application_id), + po_dir: join_paths(meson.source_root(), 'po'), diff --git a/hotdoc/PKGBUILD b/hotdoc/PKGBUILD index fd0e3f3a6d..c6b46f6119 100644 --- a/hotdoc/PKGBUILD +++ b/hotdoc/PKGBUILD @@ -34,7 +34,7 @@ build() { } check() { - cd ${pkgname}-${pkgver}/build/lib.linux-$CARCH-cpython-* + cd ${pkgname}-${pkgver}/build/lib.linux-`uname -m`-cpython-* python -m unittest } diff --git a/hplip/PKGBUILD b/hplip/PKGBUILD index de495dd3d5..b813b8019b 100644 --- a/hplip/PKGBUILD +++ b/hplip/PKGBUILD @@ -81,7 +81,7 @@ build() { ./configure --prefix=/usr \ --enable-qt5 \ --disable-qt4 \ - --enable-hpcups-install \ + --disable-hpcups-install \ --enable-cups-drv-install \ --disable-imageProcessor-build \ --enable-pp-build #--help diff --git a/hspell/PKGBUILD b/hspell/PKGBUILD index 8124786ffc..3bee1f35f7 100644 --- a/hspell/PKGBUILD +++ b/hspell/PKGBUILD @@ -54,10 +54,10 @@ package_hunspell-he() { popd # Install webengine dictionaries - install -d "$pkgdir"/usr/share/qt{,6}/qtwebengine_dictionaries/ - for _file in "$pkgdir"/usr/share/hunspell/*.dic; do - _filename=$(basename $_file) - /usr/lib/qt6/qwebengine_convert_dict $_file "$pkgdir"/usr/share/qt6/qtwebengine_dictionaries/${_filename/\.dic/\.bdic} - ln -rs "$pkgdir"/usr/share/qt6/qtwebengine_dictionaries/${_filename/\.dic/\.bdic} "$pkgdir"/usr/share/qt/qtwebengine_dictionaries/ - done +# install -d "$pkgdir"/usr/share/qt{,6}/qtwebengine_dictionaries/ +# for _file in "$pkgdir"/usr/share/hunspell/*.dic; do +# _filename=$(basename $_file) +# /usr/lib/qt6/qwebengine_convert_dict $_file "$pkgdir"/usr/share/qt6/qtwebengine_dictionaries/${_filename/\.dic/\.bdic} +# ln -rs "$pkgdir"/usr/share/qt6/qtwebengine_dictionaries/${_filename/\.dic/\.bdic} "$pkgdir"/usr/share/qt/qtwebengine_dictionaries/ +# done } diff --git a/htmlcxx/PKGBUILD b/htmlcxx/PKGBUILD index 71072d9c69..17f79e6ee8 100644 --- a/htmlcxx/PKGBUILD +++ b/htmlcxx/PKGBUILD @@ -11,8 +11,15 @@ url="http://gcc-libs.sourceforge.net/" license=('LGPL') depends=('glibc') provides=('htmlcxx' 'libhtmlcxx' 'libcss_parser') -source=("https://sourceforge.net/projects/$pkgname/files/v$pkgver/$pkgname-$pkgver.tar.gz") -sha256sums=('5d38f938cf4df9a298a5346af27195fffabfef9f460fc2a02233cbcfa8fc75c8') +source=("https://sourceforge.net/projects/$pkgname/files/v$pkgver/$pkgname-$pkgver.tar.gz" + htmlcxx-la64.patch) +sha256sums=('5d38f938cf4df9a298a5346af27195fffabfef9f460fc2a02233cbcfa8fc75c8' + '79994572157ce98aec93b6e2c3a3c3e93e3f8c848a12ca69cdce90399dcb9a5b') + +prepare() { + cd $pkgname-$pkgver + patch -p1 -i $srcdir/htmlcxx-la64.patch +} build() { cd $pkgname-$pkgver diff --git a/htmlcxx/htmlcxx-la64.patch b/htmlcxx/htmlcxx-la64.patch new file mode 100644 index 0000000000..354479f5ea --- /dev/null +++ b/htmlcxx/htmlcxx-la64.patch @@ -0,0 +1,26 @@ +Index: htmlcxx-0.87/html/CharsetConverter.h +=================================================================== +--- htmlcxx-0.87.orig/html/CharsetConverter.h ++++ htmlcxx-0.87/html/CharsetConverter.h +@@ -17,7 +17,7 @@ namespace htmlcxx + : std::runtime_error(arg) {} + }; + +- CharsetConverter(const std::string &from, const std::string &to) throw (Exception); ++ CharsetConverter(const std::string &from, const std::string &to) noexcept(false); + ~CharsetConverter(); + + std::string convert(const std::string &input); +Index: htmlcxx-0.87/html/CharsetConverter.cc +=================================================================== +--- htmlcxx-0.87.orig/html/CharsetConverter.cc ++++ htmlcxx-0.87/html/CharsetConverter.cc +@@ -7,7 +7,7 @@ + using namespace std; + using namespace htmlcxx; + +-CharsetConverter::CharsetConverter(const string &from, const string &to) throw (Exception) ++CharsetConverter::CharsetConverter(const string &from, const string &to) noexcept(false) + { + mIconvDescriptor = iconv_open(to.c_str(), from.c_str()); + if (mIconvDescriptor == (iconv_t)(-1)) diff --git a/htmlq/PKGBUILD b/htmlq/PKGBUILD index f4b0106a64..3ffb62a0e1 100644 --- a/htmlq/PKGBUILD +++ b/htmlq/PKGBUILD @@ -23,7 +23,7 @@ pkgver() { prepare() { cd "$pkgname" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/httplz/PKGBUILD b/httplz/PKGBUILD index 05f8bc6407..21c8297f08 100644 --- a/httplz/PKGBUILD +++ b/httplz/PKGBUILD @@ -21,7 +21,7 @@ prepare() { cp Cargo.lock "${_pkgname}-${pkgver}" # fetch dependencies cd "${_pkgname}-${pkgver}" - cargo fetch --locked --target="${CARCH}-unknown-linux-gnu" + cargo fetch --locked --target="`uname -m`-unknown-linux-gnu" # rename man page mv "${_pkgname}.md" "${pkgname}.md" mkdir man diff --git a/hub/PKGBUILD b/hub/PKGBUILD index 7248466c06..84259f0d5b 100644 --- a/hub/PKGBUILD +++ b/hub/PKGBUILD @@ -18,13 +18,16 @@ makedepends=('go') source=("hub-$pkgver.tar.gz::https://github.com/github/hub/archive/v$pkgver.tar.gz" "fix-tests.patch::https://github.com/github/hub/commit/f1170f982c414ec53ebf35ad3b250226ec18a952.patch") sha256sums=('e19e0fdfd1c69c401e1c24dd2d4ecf3fd9044aa4bd3f8d6fd942ed1b2b2ad21a' - 'a7d5ab7ea437353b818f4934987455ae9b0d883722015761fac4fe48fd3b5256') + 'fb0213f5bec7a64d9d0f02e7fbe6acb99fdfc3fee5ce0bdc5a57900c36d68f31') b2sums=('b0ff7bcebca7f4b515acf412b756da8512dad7c89f1976cc749c68d70303ff7172b6817e92c839c1864b55ac137e67cf1c99fa07e09b01fd76ad3d40be8d2163' - '2fe3848bae8c26d56f8a9fb0fa81902931f70b7ddf7ef31ed0f141c7b1e631561616b38f904a712d5de4d258b4c29bb0758f22763017959849c2f183eb0062d2') + 'fd54c571e0f41a42318c83341f1abd8cf7d0649a33f0a3399b9b5f6f72d1d37b0980bc4f0631b0f4031cdee95217b00693447006b1fc266d66311a552a85c79a') prepare() { cd "${pkgname}-${pkgver}" patch -Np1 < "$srcdir/fix-tests.patch" + cd $srcdir + [ -d sys ] || git clone https://github.com/golang/sys.git + [ -d net ] || git clone https://github.com/golang/net.git } build() { @@ -34,7 +37,12 @@ build() { export CGO_CFLAGS="${CFLAGS}" export CGO_CXXFLAGS="${CXXFLAGS}" export CGO_LDFLAGS="${LDFLAGS}" - export GOFLAGS="-buildmode=pie -trimpath -mod=readonly -modcacherw" + export GOFLAGS="-trimpath -mod=readonly -modcacherw" + + rm -rf vendor/golang.org/x/sys/ + rm -rf vendor/golang.org/x/net/ + cp -r $srcdir/sys vendor/golang.org/x/sys/ + cp -r $srcdir/net vendor/golang.org/x/net/ make make man-pages diff --git a/hugo/PKGBUILD b/hugo/PKGBUILD index f2c5181106..e5281757ea 100644 --- a/hugo/PKGBUILD +++ b/hugo/PKGBUILD @@ -24,7 +24,7 @@ build() { export CGO_CFLAGS="${CFLAGS}" export CGO_CXXFLAGS="${CXXFLAGS}" export CGO_LDFLAGS="${LDFLAGS}" - export GOFLAGS="-buildmode=pie -trimpath -mod=readonly -modcacherw" + export GOFLAGS="-trimpath -mod=readonly -modcacherw" go build -tags extended ./hugo gen man diff --git a/hypercorn/PKGBUILD b/hypercorn/PKGBUILD index a8ed27bf8e..6701bb54ca 100644 --- a/hypercorn/PKGBUILD +++ b/hypercorn/PKGBUILD @@ -1,4 +1,5 @@ # Maintainer: Maxime Gauduin +export CHECKFUNC=1 pkgname=hypercorn pkgver=0.16.0 diff --git a/hyperfine/PKGBUILD b/hyperfine/PKGBUILD index 9847fb7b32..009b9ce4df 100644 --- a/hyperfine/PKGBUILD +++ b/hyperfine/PKGBUILD @@ -21,7 +21,7 @@ sha256sums=('fea7b92922117ed04b9c84bb9998026264346768804f66baa40743c5528bed6b') prepare() { cd "$srcdir/$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/i3status-rust/PKGBUILD b/i3status-rust/PKGBUILD index d634547f1c..42561cd2a4 100644 --- a/i3status-rust/PKGBUILD +++ b/i3status-rust/PKGBUILD @@ -25,7 +25,7 @@ b2sums=('94b79a356151284eec41262606c9824f928de0e4a167a468fa23bbc917dbd83097e298d prepare() { cd ${pkgname}-${pkgver} - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } diff --git a/iempluginsuite/PKGBUILD b/iempluginsuite/PKGBUILD index bb30f3d146..78b3f6d60a 100644 --- a/iempluginsuite/PKGBUILD +++ b/iempluginsuite/PKGBUILD @@ -209,6 +209,6 @@ package_iempluginsuite-vst3() { ) for name in "${_names[@]}"; do - install -vDm 755 build/$name/${name}_artefacts/None/VST3/$name.vst3/Contents/$CARCH-linux/$name.so -t "$pkgdir/usr/lib/vst3/$name.vst3/Contents/$CARCH-linux/" + install -vDm 755 build/$name/${name}_artefacts/None/VST3/$name.vst3/Contents/`uname -m`-linux/$name.so -t "$pkgdir/usr/lib/vst3/$name.vst3/Contents/`uname -m`-linux/" done } diff --git a/igrep/PKGBUILD b/igrep/PKGBUILD index ad59a1d03b..5f9d78c033 100644 --- a/igrep/PKGBUILD +++ b/igrep/PKGBUILD @@ -15,7 +15,7 @@ options=('!lto') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/imagemagick/PKGBUILD b/imagemagick/PKGBUILD index 7a38438586..b03e93833a 100644 --- a/imagemagick/PKGBUILD +++ b/imagemagick/PKGBUILD @@ -68,7 +68,7 @@ makedepends=(chrpath checkdepends=(gsfonts ttf-dejavu) replaces=(imagemagick-doc) -source=(https://imagemagick.org/archive/$_tarname.tar.xz{,.asc} +source=(https://imagemagick.org/archive/releases/$_tarname.tar.xz{,.asc} arch-fonts.diff) sha256sums=('ca23eb8f980cccbc9ea1e1490edd0fb33699fd2283790378049a505809a2f7d0' 'SKIP' @@ -102,7 +102,7 @@ build() { --enable-opencl \ --without-gslib \ --with-djvu \ - --with-fftw \ + --without-fftw \ --with-jxl \ --with-lqr \ --with-modules \ diff --git a/imlib2/PKGBUILD b/imlib2/PKGBUILD index 83127fb671..2f52a13b07 100644 --- a/imlib2/PKGBUILD +++ b/imlib2/PKGBUILD @@ -31,8 +31,7 @@ build() { ./configure \ --prefix=/usr \ --sysconfdir=/etc/imlib2 \ - --x-libraries=/usr/lib \ - --enable-amd64 + --x-libraries=/usr/lib make } diff --git a/inkscape/PKGBUILD b/inkscape/PKGBUILD index d1672155c0..9e2bdcc5b8 100644 --- a/inkscape/PKGBUILD +++ b/inkscape/PKGBUILD @@ -11,6 +11,7 @@ url='https://inkscape.org/' license=('GPL' 'LGPL') arch=('loong64' 'x86_64') makedepends=('cmake' 'boost' 'git') +#makedepends=('cmake' 'boost' 'git' 'libsoup') depends=( 'atkmm' diff --git a/ipmitool/PKGBUILD b/ipmitool/PKGBUILD index a965fd02ca..5e972e99d6 100644 --- a/ipmitool/PKGBUILD +++ b/ipmitool/PKGBUILD @@ -4,7 +4,7 @@ pkgname=ipmitool pkgver=1.8.19 -pkgrel=2 +pkgrel=3 pkgdesc="Command-line interface to IPMI-enabled devices" arch=('loong64' 'x86_64') url="https://github.com/ipmitool/ipmitool" @@ -31,4 +31,5 @@ package(){ # Install license install -Dm644 COPYING "${pkgdir}/usr/share/licenses/${pkgname}/LICENSE" + curl https://www.iana.org/assignments/enterprise-numbers.txt > $pkgdir/usr/share/misc/enterprise-numbers } diff --git a/ipp-usb/PKGBUILD b/ipp-usb/PKGBUILD index e2b35b0a69..823dac9123 100644 --- a/ipp-usb/PKGBUILD +++ b/ipp-usb/PKGBUILD @@ -31,7 +31,8 @@ build() { export CGO_CFLAGS="${CFLAGS}" export CGO_CXXFLAGS="${CXXFLAGS}" export CGO_LDFLAGS="${LDFLAGS}" - export GOFLAGS="-buildmode=pie -trimpath -mod=readonly -modcacherw" + export GOPROXY=https://goproxy.cn + export GOFLAGS="-trimpath -mod=readonly -modcacherw" #go build -o "$pkgname" . make diff --git a/ipxe/PKGBUILD b/ipxe/PKGBUILD index fe05b8d277..6e01850c06 100644 --- a/ipxe/PKGBUILD +++ b/ipxe/PKGBUILD @@ -2,12 +2,13 @@ pkgname=ipxe pkgver=1.21.1 +_commit=c1834f323f4f6b9b46cd5895b1457a117381363f pkgrel=5 pkgdesc="Network bootloader" arch=(loong64 x86_64) url="https://ipxe.org" license=(GPL2 custom:UBDL) -makedepends=(cdrtools) +makedepends=(git cdrtools) optdepends=( 'bash: for run_ipxe' 'qemu-desktop: for run_ipxe' @@ -18,8 +19,7 @@ optdepends=( # the code signing setup is described in Arch Linux's releng repository: # https://gitlab.archlinux.org/archlinux/releng/-/blob/master/README.rst#code-signing source=( - $pkgname-$pkgver.tar.gz::https://github.com/$pkgname/$pkgname/archive/refs/tags/v$pkgver.tar.gz - $pkgname-1.21.1-fragmented_handshake.patch::https://github.com/ipxe/ipxe/pull/116/commits/ca9f5fc5645c60c00c3ca232d2a492aa1eb29c58.patch + git+https://github.com/$pkgname/$pkgname.git#commit=${_commit} arch.ipxe isrgrootx1.pem lets-encrypt-r3.pem @@ -27,42 +27,42 @@ source=( remote.ipxe general.h run_$pkgname + ipxe-la64.patch ) -sha512sums=('47400975110ed4ab95835aa1b7c8d5a6917c19c5713c6ab88bc0741a3adcd62245a9c4251d1f46fffc45289c6b18bf893f86dbc3b67d3189c41b7f198367ecaa' - '7b021b5720ddf71d3162d2d326a05e4d883562d91effce92a8c90368e69424ccf581d2d3bf6c5e1517e3b6cc5e4ab5edfdcd41c36368488b6d357d2fd00f63b0' - 'ec41e20333ce91b555d4f6a64f211323315a183466d8437404dc548287b96cc8aa4d2953bb5a496677f77e73b7b99752dc973688ade0ccab842fabb8f6127f47' +sha512sums=('SKIP' + 'baa5de9f2714d626041455f7d6764b3ed7a8d6a375bd7721312a5be3ccab93764e1f72d349d404196badf1e751435cc3f7f61800fd643e2035f9616be1770a00' 'b819e7965412dbeecc6417b1e57356d9b10e8e2feb0db1165947e7e1b7d882de226afb8457475f5107393a981d902c7f405500cadb6f61bd2acbca5d8c7cc1f4' '7ff2a6b6501b30806e19446d569db0348c0457c15a9c86f186d957607278ee3cbeedd8307e1ff6dc5c0740545192eada7c0f84cdeb8ff39e6b85bd3fc400a914' 'e3a8c74dcf95cb4b77ed379d2185ef56b6ab2f4c7bdaf5a68876d21aca4d7961b0d8090da7132c6f1797bdca24014dfea032129ee207282797b91e31b6dc4d48' '9162f528cd0080b9231785795f08d3229c52ce3c18ca5a6efcfbea5028e103a294ddef79a0f28ab64b8d0cdcb9e6cdd7fee797766ad2c3d1dbc3891ddeb4b553' - '080b5b7f1a02d6e3a4691e0e65f12a554ede2a783284357f4ef940eb506fec7ec477dc3060c67cf31999af99eba26b0bfa1495cb2a5baa5af4c133bdca2152af' - '4f026baf7d30ef33b660530001b3bcf8189a7d1a11603ccb126957d07070283907c8207dad912ff4c735b8a0376c8a5383fef2235ac3b71ef519d7201c079b93') -b2sums=('03871b5f89c6228a9082bb89c7b102d85e5f3afcd5fe0d93762e220fe162c9c3037a9918f30251fd103835d949335f99109a12559f560a5b686e65a7c24c6501' - '2c1ef1e1ffd1716e29e046ae4bc69e8b98f9116c1cb3d6e2e10a9119256194ea4fd510a9d4bf79b96504fb95f6bef0b2edce9b257d8d360224dfe1ce6029025c' - '13f73fbd49867a087cbb036562f067ee30e3a3718402363fd6c6d318bb819dde5728510c1459d7bb5906bec37469b2046a2ad148175b6ea4fb58ce68ee614d91' + '2522b1a76a466aa0d396d4616de38929ca3198e218f763545220a14f66127618bce2d46179999fd697e1d0f0a585ca1e58347b3a7dd5795331c395e1e5972788' + '4f026baf7d30ef33b660530001b3bcf8189a7d1a11603ccb126957d07070283907c8207dad912ff4c735b8a0376c8a5383fef2235ac3b71ef519d7201c079b93' + '4a74676c26f286811852566a7edf0b3399fd8165550848dd004c7b28bb9bf06990f4f232623ae5e2ba2edcd27b88d422225c0687d883551405e905aad7ce98a8') +b2sums=('SKIP' + '294a510a4ca0d80fcaa2b67f9083ca91ae17270f73bee35728a6c42519599f5d60896d4e279a794a8a0237de3e1a751356d670fb722b6507057303c0f1efec7f' '6d02d871afa45caaa2b22ea2ed48217012aeeb61c50b28e82cc0750344719bdb9ef4b0100abc524b12ec6cb2b1c0084f4d24ce480af87b52aa39d4d3714467ca' '44fc45af926d8c0a563b81640764a4ced266f857c72113839dcd5d441c030bb6f78576b04fcbd8b17f645ed4e2701a4634e55755f13210fa880f442ad6fbb5b1' 'a61f76a2ecbf344bb26e064146e4c6821ee195c7b7579cbf8c61d60ded3c3946d53329a8c2e795435ef5498bec97042472f186c13b4e0dc274da34d047f8f326' 'f38eec3584967f9a8d4f9f2cc39803de9fa21fd1406efe802c3422f6de30c79e4cd679e775a886f778a40aacb81b9c4120d7205178284cacf69fa7d43557a906' - 'a69a2dabf23b931aa062d20936510eda6bc9d6a61cded4b5e5960958b2a06642d527bb788b3fae9961dbf5d2ac18c63a6df69db52668cf904b75bd7366117b9b' - '9c7a8eb0f9aafdc336d7eac984b6f1fcbb875d1589fb4b67f45393054f66e916c1157e1bb4e8d02af68e6438dff68a812e57bbf685a0b477634891e49c1c3284') + 'cdcb27a945397e7a8ef5214a31b69c4120ce1608359e0b11bbdb191169d4e10404953dc42bf9351f75beec0d8c6727bc0d053978026e50f145ef0881dae91bc1' + '9c7a8eb0f9aafdc336d7eac984b6f1fcbb875d1589fb4b67f45393054f66e916c1157e1bb4e8d02af68e6438dff68a812e57bbf685a0b477634891e49c1c3284' + 'a044ef24fe2de06ce371f6f8c6b9eeb736d41057f1190c5eb93fd9d91374631bd68502112492d72cf020520ecb94bf3077f163fcf3c005ad286fea979591cf8b') prepare() { # fix issues with fragmented handshakes (e.g. fullchain.pem when using a letsencrypt certificate): # https://github.com/ipxe/ipxe/issues/407 - patch -Np1 -d $pkgname-$pkgver -i ../$pkgname-1.21.1-fragmented_handshake.patch +#patch -Np1 -d $pkgname-$pkgver -i ../$pkgname-1.21.1-fragmented_handshake.patch + patch -Np1 -d $pkgname -i ../ipxe-la64.patch + ln -s $pkgname $pkgname-$pkgver # symlink header with custom configuration into place - ln -sv ../../../../general.h $pkgname-$pkgver/src/config/local/ + ln -sv ../../../../general.h $pkgname/src/config/local/ } build() { local _file _certs="" local _options=( NO_WERROR=1 - bin/ipxe.lkrn - bin/ipxe.pxe - bin-i386-efi/ipxe.efi - bin-x86_64-efi/ipxe.efi + bin-loong64-efi/ipxe.efi -C src ) @@ -80,19 +80,13 @@ build() { make EMBED="$srcdir/arch.ipxe" CERT="$_certs" TRUST="$_certs" "${_options[@]}" # move binaries out of the way - mv -v src/bin/ipxe{,-arch}.lkrn - mv -v src/bin/ipxe{,-arch}.pxe - mv -v src/bin-i386-efi/ipxe{,-arch}.efi - mv -v src/bin-x86_64-efi/ipxe{,-arch}.efi + mv -v src/bin-loong64-efi/ipxe{,-arch}.efi # build remote images make EMBED="$srcdir/remote.ipxe" "${_options[@]}" # move binaries out of the way - mv -v src/bin/ipxe{,-remote}.lkrn - mv -v src/bin/ipxe{,-remote}.pxe - mv -v src/bin-i386-efi/ipxe{,-remote}.efi - mv -v src/bin-x86_64-efi/ipxe{,-remote}.efi + mv -v src/bin-loong64-efi/ipxe{,-remote}.efi # build default images make "${_options[@]}" @@ -103,8 +97,8 @@ package() { local _arch cd $pkgname-$pkgver - install -vDm 644 src/bin/ipxe{,-arch,-remote}.{lkrn,pxe} -t "$pkgdir/usr/share/$pkgname/" - for _arch in i386 x86_64; do +#install -vDm 644 src/bin/ipxe{,-arch,-remote}.{lkrn,pxe} -t "$pkgdir/usr/share/$pkgname/" + for _arch in loong64; do install -vDm 644 src/bin-$_arch-efi/ipxe{,-arch,-remote}.efi -t "$pkgdir/usr/share/$pkgname/$_arch/" done install -vDm 644 COPYING.UBDL -t "$pkgdir/usr/share/licenses/$pkgname/" diff --git a/ipxe/arch.ipxe b/ipxe/arch.ipxe index 929ed800d8..788d8fee0a 100644 --- a/ipxe/arch.ipxe +++ b/ipxe/arch.ipxe @@ -1,4 +1,43 @@ #!ipxe -ifconf -ntp pool.ntp.org -chain https://ipxe.archlinux.org/releng/netboot/archlinux.ipxe || shell +# + +:netconfig +dhcp || goto ipxeshell + +:prompt +prompt --key 0x02 --timeout 3000 Press Ctrl-B for the iPXE command line... && goto ipxeshell || + +# We call the default.ipxe script to show and confirm the ipxe client has enough feature. +set next-server-port 80 +chain http://${next-server}:${next-server-port}/default.ipxe || + +echo +echo Failed to boot to default menu. +echo Try to reload iPXE from server. +echo + +:netboot +chain http://58.49.29.194/loongarch/archlinux/netboot/archlinux.ipxe && set server http://58.49.29.194/loongarch/archlinux/ && goto exit || + +:reload +chain http://${gateway}/ipxe.efi && set server http://${gateway} && goto netconfig || + +:ipxeshell +echo +echo mac...............: ${mac} +echo ip................: ${ip} +echo netmask...........: ${netmask} +echo gateway...........: ${gateway} +echo dns...............: ${dns} +echo domain............: ${domain} +echo dhcp-server.......: ${dhcp-server} +echo filename..........: ${filename} +echo next-server.......: ${next-server} +echo hostname..........: ${hostname} +echo uuid..............: ${uuid} +echo serial............: ${serial} +echo +shell + +:exit +exit diff --git a/ipxe/general.h b/ipxe/general.h index 03c4458784..f03b49b8d4 100644 --- a/ipxe/general.h +++ b/ipxe/general.h @@ -4,7 +4,7 @@ #undef CRYPTO_80211_WPA /* WPA Personal, authenticating with passphrase */ // enable additional options -#define NET_PROTO_IPV6 /* IPv6 protocol */ +//#define NET_PROTO_IPV6 /* IPv6 protocol */ #define DOWNLOAD_PROTO_HTTPS /* Secure Hypertext Transfer Protocol */ #define DOWNLOAD_PROTO_NFS /* Network File System Protocol */ #define IMAGE_TRUST_CMD /* Image trust management commands */ diff --git a/ipxe/ipxe-la64.patch b/ipxe/ipxe-la64.patch new file mode 100644 index 0000000000..ca92c0d582 --- /dev/null +++ b/ipxe/ipxe-la64.patch @@ -0,0 +1,15 @@ +diff --git a/src/arch/loong64/Makefile b/src/arch/loong64/Makefile +index fd0bf137..ce2a7505 100644 +--- a/src/arch/loong64/Makefile ++++ b/src/arch/loong64/Makefile +@@ -13,6 +13,10 @@ ifeq ($(CCTYPE),gcc) + MNER_TEST = $(CC) -mno-explicit-relocs -x c -c /dev/null -o /dev/null >/dev/null 2>&1 + MNER_FLAGS := $(shell $(MNER_TEST) && $(ECHO) '-mno-explicit-relocs') + WORKAROUND_CFLAGS += $(MNER_FLAGS) ++ ++MNRX_TEST = $(CC) -mno-relax -x c -c /dev/null -o /dev/null >/dev/null 2>&1 ++MNRX_FLAGS := $(shell $(MNRX_TEST) && $(ECHO) '-mno-relax' || $(ECHO) '-Wa,-mno-relax' ) ++WORKAROUND_CFLAGS += $(MNRX_FLAGS) + endif + + # EFI requires -fshort-wchar, and nothing else currently uses wchar_t diff --git a/ispc/PKGBUILD b/ispc/PKGBUILD index dcd0ddddb1..cef96870bd 100644 --- a/ispc/PKGBUILD +++ b/ispc/PKGBUILD @@ -11,7 +11,7 @@ arch=(loong64 x86_64) url="https://ispc.github.io/" license=(BSD) depends=(clang gcc-libs glibc llvm-libs spirv-llvm-translator onetbb) -makedepends=(cmake git level-zero-headers level-zero-loader lib32-glibc llvm openmp python vc-intrinsics) +makedepends=(cmake git level-zero-headers level-zero-loader llvm openmp python vc-intrinsics) checkdepends=(intel-compute-runtime) optdepends=( 'intel-compute-runtime: GPU support' @@ -55,6 +55,11 @@ build() { -S $pkgname -W no-dev ) +# clang didn't support -mlsx + CFLAGS=${CFLAGS/-mlsx /} + CXXFLAGS=${CXXFLAGS/-mlsx /} + CFLAGS=${CFLAGS/-fstack-clash-protection/} + CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/} cmake "${cmake_options[@]}" cmake --build build --verbose diff --git a/jack2/PKGBUILD b/jack2/PKGBUILD index 2d7d4be97c..ffd838bfe5 100644 --- a/jack2/PKGBUILD +++ b/jack2/PKGBUILD @@ -9,7 +9,7 @@ pkgname=(jack2 jack2-dbus jack2-docs) pkgdesc="The JACK low-latency audio server" pkgver=1.9.22 _commit=80149e552b56d6d57d754dc04d119b8170d27313 # refs/tags/v1.9.22 -pkgrel=1 +pkgrel=4 arch=(loong64 x86_64) url="https://github.com/jackaudio/jack2" license=(GPL2) diff --git a/java-openjdk/freedesktop-java.desktop b/java-openjdk/freedesktop-java.desktop deleted file mode 100644 index 78a4a552c9..0000000000 --- a/java-openjdk/freedesktop-java.desktop +++ /dev/null @@ -1,12 +0,0 @@ -[Desktop Entry] -Name=OpenJDK Java 21 Runtime -Name[fi]=OpenJDK Java 21 - ajonaikainen ympäristö -Comment=OpenJDK Java 21 Runtime -Comment[fi]=OpenJDK Java 21 - ajonaikainen ympäristö -Keywords=java;runtime -Exec=/usr/lib/jvm/java-21-openjdk/bin/java -jar -Terminal=false -Type=Application -Icon=java21-openjdk -MimeType=application/x-java-archive;application/java-archive;application/x-jar; -NoDisplay=true diff --git a/java-openjdk/freedesktop-jconsole.desktop b/java-openjdk/freedesktop-jconsole.desktop deleted file mode 100644 index dc76c0f041..0000000000 --- a/java-openjdk/freedesktop-jconsole.desktop +++ /dev/null @@ -1,11 +0,0 @@ -[Desktop Entry] -Name=OpenJDK Java 21 Console -Name[fi]=OpenJDK Java 21 - konsoli -Comment=OpenJDK Java 21 Monitoring & Management Console -Comment[fi]=OpenJDK Java 21 - valvonta- ja hallintakonsoli -Keywords=java;console;monitoring -Exec=/usr/lib/jvm/java-21-openjdk/bin/jconsole -Terminal=false -Type=Application -Icon=java21-openjdk -Categories=Application;System; diff --git a/java-openjdk/freedesktop-jshell.desktop b/java-openjdk/freedesktop-jshell.desktop deleted file mode 100644 index bce0aa6157..0000000000 --- a/java-openjdk/freedesktop-jshell.desktop +++ /dev/null @@ -1,9 +0,0 @@ -[Desktop Entry] -Name=OpenJDK Java 21 Shell -Comment=OpenJDK Java 21 Shell -Keywords=java;shell -Exec=/usr/lib/jvm/java-21-openjdk/bin/jshell -Terminal=true -Type=Application -Icon=java21-openjdk -Categories=Application;System; diff --git a/java-openjdk/install_jdk-openjdk.sh b/java-openjdk/install_jdk-openjdk.sh deleted file mode 100644 index e97f91fd81..0000000000 --- a/java-openjdk/install_jdk-openjdk.sh +++ /dev/null @@ -1,50 +0,0 @@ -THIS_JDK='java-21-openjdk' - -fix_default() { - if [ ! -x /usr/bin/java ]; then - /usr/bin/archlinux-java unset - echo "" - else - /usr/bin/archlinux-java get - fi -} - -post_install() { - default=$(fix_default) - case ${default} in - "") - /usr/bin/archlinux-java set ${THIS_JDK} - ;; - ${THIS_JDK}) - # Nothing - ;; - *) - echo "Default Java environment is already set to '${default}'" - echo "See 'archlinux-java help' to change it" - ;; - esac - - if [ ! -f /etc/ssl/certs/java/cacerts ]; then - /usr/bin/update-ca-trust - fi -} - -post_upgrade() { - default=$(fix_default) - if [ -z "${default}" ]; then - /usr/bin/archlinux-java set ${THIS_JDK} - fi - - if [ ! -f /etc/ssl/certs/java/cacerts ]; then - /usr/bin/update-ca-trust - fi -} - -pre_remove() { - if [ "x$(fix_default)" = "x${THIS_JDK}" ]; then - # Check JRE is still available - if [ -x /usr/lib/jvm/${THIS_JDK}/bin/java ]; then - /usr/bin/archlinux-java unset - fi - fi -} diff --git a/java-openjdk/install_jre-openjdk-headless.sh b/java-openjdk/install_jre-openjdk-headless.sh deleted file mode 100644 index 72c3bb17d9..0000000000 --- a/java-openjdk/install_jre-openjdk-headless.sh +++ /dev/null @@ -1,48 +0,0 @@ -THIS_JRE='java-21-openjdk' - -fix_default() { - if [ ! -x /usr/bin/java ]; then - /usr/bin/archlinux-java unset - echo "" - else - /usr/bin/archlinux-java get - fi -} - -post_install() { - default=$(fix_default) - case ${default} in - "") - /usr/bin/archlinux-java set ${THIS_JRE} - ;; - ${THIS_JRE}) - # Nothing - ;; - *) - echo "Default Java environment is already set to '${default}'" - echo "See 'archlinux-java help' to change it" - ;; - esac - - if [ ! -f /etc/ssl/certs/java/cacerts ]; then - /usr/bin/update-ca-trust - fi -} - -post_upgrade() { - if [ -z "$(fix_default)" ]; then - /usr/bin/archlinux-java set ${THIS_JRE} - fi - - if [ ! -f /etc/ssl/certs/java/cacerts ]; then - /usr/bin/update-ca-trust - fi -} - -pre_remove() { - default=$(fix_default) - if [ "x${default}" = "x${THIS_JRE}" ]; then - /usr/bin/archlinux-java unset - echo "No Java environment is set as default anymore" - fi -} diff --git a/java-openjdk/install_jre-openjdk.sh b/java-openjdk/install_jre-openjdk.sh deleted file mode 100644 index 9ffb19ee06..0000000000 --- a/java-openjdk/install_jre-openjdk.sh +++ /dev/null @@ -1,35 +0,0 @@ -THIS_JRE='java-21-openjdk' - -fix_default() { - if [ ! -x /usr/bin/java ]; then - /usr/bin/archlinux-java unset - echo "" - else - /usr/bin/archlinux-java get - fi -} - -post_install() { - default=$(fix_default) - case ${default} in - "") - /usr/bin/archlinux-java set ${THIS_JRE} - ;; - ${THIS_JRE}) - # Nothing - ;; - *) - echo "Default Java environment is already set to '${default}'" - echo "See 'archlinux-java help' to change it" - ;; - esac - - echo "when you use a non-reparenting window manager," - echo "set _JAVA_AWT_WM_NONREPARENTING=1 in /etc/profile.d/jre.sh" -} - -post_upgrade() { - if [ -z "$(fix_default)" ]; then - /usr/bin/archlinux-java set ${THIS_JRE} - fi -} diff --git a/java11-openjdk/PKGBUILD b/java11-openjdk/PKGBUILD index e0ea8e7527..bbebf73c8b 100644 --- a/java11-openjdk/PKGBUILD +++ b/java11-openjdk/PKGBUILD @@ -24,23 +24,26 @@ _git_tag=jdk-${_majorver}.${_minorver}.${_securityver}+${_updatever} arch=('loong64' 'x86_64') url='https://openjdk.java.net/' license=('custom') -makedepends=('java-environment>=10' 'java-environment<12' 'cpio' 'unzip' 'zip' 'libelf' 'libcups' 'libx11' - 'libxrender' 'libxtst' 'libxt' 'libxext' 'libxrandr' 'alsa-lib' 'pandoc' +makedepends=('jdk11-openjdk' 'cpio' 'unzip' 'zip' 'libelf' 'libcups' 'libx11' + 'libxrender' 'libxtst' 'libxt' 'libxext' 'libxrandr' 'alsa-lib' 'graphviz' 'freetype2' 'libjpeg-turbo' 'giflib' 'libpng' 'lcms2' 'libnet' 'bash' 'harfbuzz' 'glibc' 'gcc-libs') options=(!lto) source=(https://github.com/openjdk/jdk${_majorver}u/archive/${_git_tag}.tar.gz freedesktop-java.desktop freedesktop-jconsole.desktop - freedesktop-jshell.desktop) + freedesktop-jshell.desktop + jdk11-11.0.20.1-la64.patch) sha256sums=('c24c8708244e78c4418ff8680ae2122b1b7ff9bc4d0bf3187d3579ba84c1b29d' '575587ad58dfa9908f046d307b9afc7b0b2eb20a1eb454f8fdbbd539ea7b3d01' '2f57b7c7dd671eabe9fa10c4f1283573e99d7f7c36eccd82c95b705979a2e8cb' - 'f271618a8c2a892b554caf26857af41efdf0d8bcb95d57ce7ba535d6979e96da') + 'f271618a8c2a892b554caf26857af41efdf0d8bcb95d57ce7ba535d6979e96da' + 'c55c5c1a8fbc5721f4c1ebdfba6101c4283b1ce69c8b828d54f49bc5c6e8ccb7') case "${CARCH}" in x86_64) _JARCH='x86_64';; i686) _JARCH='x86';; + loong64) _JARCH='loongarch64';; esac _jvmdir=/usr/lib/jvm/java-${_majorver}-openjdk @@ -52,6 +55,12 @@ _nonheadless=(lib/libawt_xawt.so lib/libjsound.so lib/libsplashscreen.so) +prepare() { + cd ${_jdkdir} + + patch -Np1 -i "${srcdir}"/jdk11-11.0.20.1-la64.patch +} + build() { cd ${_jdkdir} @@ -105,7 +114,7 @@ build() { ${NUM_PROC_OPT} #--disable-javac-server - make images legacy-jre-image docs + make images legacy-jre-image #docs # https://bugs.openjdk.java.net/browse/JDK-8173610 find "../${_imgdir}" -iname '*.so' -exec chmod +x {} \; @@ -275,7 +284,7 @@ package_openjdk11-doc() { provides=("openjdk${_majorver}-doc=${pkgver}-${pkgrel}") install -dm 755 "${pkgdir}/usr/share/doc" - cp -r ${_imgdir}/docs "${pkgdir}/usr/share/doc/${pkgbase}" +# cp -r ${_imgdir}/docs "${pkgdir}/usr/share/doc/${pkgbase}" install -dm 755 "${pkgdir}/usr/share/licenses" ln -s ${pkgbase} "${pkgdir}/usr/share/licenses/${pkgname}" diff --git a/java11-openjdk/jdk11-11.0.20.1-la64.patch b/java11-openjdk/jdk11-11.0.20.1-la64.patch new file mode 100644 index 0000000000..6a77e6b63b --- /dev/null +++ b/java11-openjdk/jdk11-11.0.20.1-la64.patch @@ -0,0 +1,116875 @@ +diff --git a/make/CompileJavaModules.gmk b/make/CompileJavaModules.gmk +index 46fb9b4219..c6d8b24fc4 100644 +--- a/make/CompileJavaModules.gmk ++++ b/make/CompileJavaModules.gmk +@@ -430,6 +430,7 @@ jdk.internal.vm.ci_ADD_JAVAC_FLAGS += -parameters -Xlint:-exports -XDstringConca + + jdk.internal.vm.compiler_ADD_JAVAC_FLAGS += -parameters -XDstringConcat=inline \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.aarch64=jdk.internal.vm.compiler \ ++ --add-exports jdk.internal.vm.ci/jdk.vm.ci.loongarch64=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.amd64=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.code=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.code.site=jdk.internal.vm.compiler \ +@@ -437,6 +438,7 @@ jdk.internal.vm.compiler_ADD_JAVAC_FLAGS += -parameters -XDstringConcat=inline \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.common=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.aarch64=jdk.internal.vm.compiler \ ++ --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.loongarch64=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.amd64=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.sparc=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.meta=jdk.internal.vm.compiler \ +@@ -456,6 +458,7 @@ jdk.internal.vm.compiler_EXCLUDES += \ + org.graalvm.compiler.api.directives.test \ + org.graalvm.compiler.api.test \ + org.graalvm.compiler.asm.aarch64.test \ ++ org.graalvm.compiler.asm.loongarch64.test \ + org.graalvm.compiler.asm.amd64.test \ + org.graalvm.compiler.asm.sparc.test \ + org.graalvm.compiler.asm.test \ +diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4 +index a3e1e00b2c..22f479120b 100644 +--- a/make/autoconf/hotspot.m4 ++++ b/make/autoconf/hotspot.m4 +@@ -34,6 +34,12 @@ DEPRECATED_JVM_FEATURES="trace" + # All valid JVM variants + VALID_JVM_VARIANTS="server client minimal core zero custom" + ++# ++# This file has been modified by Loongson Technology in 2021. These ++# modifications are Copyright (c) 2020, 2021, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + ############################################################################### + # Check if the specified JVM variant should be built. To be used in shell if + # constructs, like this: +@@ -337,6 +343,26 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], + HOTSPOT_TARGET_CPU_ARCH=arm + fi + ++ # Override hotspot cpu definitions for MIPS and LOONGARCH platforms ++ if test "x$OPENJDK_TARGET_CPU" = xmips64el && test "x$HOTSPOT_TARGET_CPU" != xzero; then ++ HOTSPOT_TARGET_CPU=mips_64 ++ HOTSPOT_TARGET_CPU_ARCH=mips ++ elif test "x$OPENJDK_TARGET_CPU" = xloongarch64 && test "x$HOTSPOT_TARGET_CPU" != xzero; then ++ HOTSPOT_TARGET_CPU=loongarch_64 ++ HOTSPOT_TARGET_CPU_ARCH=loongarch ++ fi ++ ++ # Disable compiler1 on linux-mips and linux-loongarch ++ if ! (HOTSPOT_CHECK_JVM_FEATURE(compiler1)); then ++ AC_MSG_CHECKING([if compiler1 should be built, $JVM_FEATURES]) ++ if test "x$OPENJDK_TARGET_OS" = "xlinux" && test "x$HOTSPOT_TARGET_CPU_ARCH" = "xmips"; then ++ DISABLED_JVM_FEATURES="$DISABLED_JVM_FEATURES compiler1" ++ AC_MSG_RESULT([no, platform not supported]) ++ else ++ AC_MSG_RESULT([yes]) ++ fi ++ fi ++ + # Verify that dependencies are met for explicitly set features. + if HOTSPOT_CHECK_JVM_FEATURE(jvmti) && ! HOTSPOT_CHECK_JVM_FEATURE(services); then + AC_MSG_ERROR([Specified JVM feature 'jvmti' requires feature 'services']) +@@ -421,10 +447,11 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], + JVM_FEATURES_jvmci="" + INCLUDE_JVMCI="false" + else +- # Only enable jvmci on x86_64, sparcv9 and aarch64 ++ # Only enable jvmci on x86_64, sparcv9, aarch64 and loongarch64 + if test "x$OPENJDK_TARGET_CPU" = "xx86_64" || \ + test "x$OPENJDK_TARGET_CPU" = "xsparcv9" || \ +- test "x$OPENJDK_TARGET_CPU" = "xaarch64" ; then ++ test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \ ++ test "x$OPENJDK_TARGET_CPU" = "xloongarch64" ; then + AC_MSG_RESULT([yes]) + JVM_FEATURES_jvmci="jvmci" + INCLUDE_JVMCI="true" +diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4 +index 26a58eb2ee..061952ee45 100644 +--- a/make/autoconf/platform.m4 ++++ b/make/autoconf/platform.m4 +@@ -23,6 +23,12 @@ + # questions. + # + ++# ++# This file has been modified by Loongson Technology in 2021. These ++# modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + # Support macro for PLATFORM_EXTRACT_TARGET_AND_BUILD. + # Converts autoconf style CPU name to OpenJDK style, into + # VAR_CPU, VAR_CPU_ARCH, VAR_CPU_BITS and VAR_CPU_ENDIAN. +@@ -554,6 +560,12 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER], + HOTSPOT_$1_CPU_DEFINE=PPC64 + elif test "x$OPENJDK_$1_CPU" = xppc64le; then + HOTSPOT_$1_CPU_DEFINE=PPC64 ++ elif test "x$OPENJDK_$1_CPU" = xmips64; then ++ HOTSPOT_$1_CPU_DEFINE=MIPS64 ++ elif test "x$OPENJDK_$1_CPU" = xmips64el; then ++ HOTSPOT_$1_CPU_DEFINE=MIPS64 ++ elif test "x$OPENJDK_$1_CPU" = xloongarch64; then ++ HOTSPOT_$1_CPU_DEFINE=LOONGARCH64 + + # The cpu defines below are for zero, we don't support them directly. + elif test "x$OPENJDK_$1_CPU" = xsparc; then +diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +index fdd2c0ca3d..318191233a 100644 +--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp ++++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +@@ -1123,7 +1123,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { + } + } + +- ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); +@@ -1663,6 +1665,10 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L + __ csel(result->as_register(), opr1->as_register(), opr2->as_register(), acond); + } + ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ ShouldNotReachHere(); ++} ++ + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); + +diff --git a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp +index cebc1e410d..816226c068 100644 +--- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp ++++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp +@@ -260,18 +260,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + __ store(reg, addr); + } + +-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { + LIR_Opr reg = new_register(T_INT); + __ load(generate_address(base, disp, T_INT), reg, info); +- __ cmp(condition, reg, LIR_OprFact::intConst(c)); ++ __ cmp_branch(condition, reg, LIR_OprFact::intConst(c), T_INT, tgt); + } + +-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); ++ ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { + LIR_Opr reg1 = new_register(T_INT); + __ load(generate_address(base, disp, type), reg1, info); +- __ cmp(condition, reg, reg1); ++ __ cmp_branch(condition, reg, reg1, type, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); + + bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { + +diff --git a/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp +index ce75dc552a..74c4b7e556 100644 +--- a/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp ++++ b/src/hotspot/cpu/aarch64/c1_LIR_aarch64.cpp +@@ -52,3 +52,24 @@ void LIR_Address::verify() const { + "wrong type for addresses"); + } + #endif // PRODUCT ++ ++template ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { ++ cmp(condition, left, right, info); ++ branch(condition, type, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); ++ ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ cmp(condition, left, right); ++ branch(condition, type, block, unordered); ++} ++ ++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ cmp(condition, left, right); ++ cmove(condition, src1, src2, dst, type); ++} +diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp +index f0a7229aa1..29db21f975 100644 +--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp ++++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp +@@ -1150,6 +1150,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { + __ b(*(op->label()), acond); + } + ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); +@@ -3082,6 +3085,10 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { + __ bind(*stub->continuation()); + } + ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ ShouldNotReachHere(); ++} ++ + #ifdef ASSERT + // emit run-time assertion + void LIR_Assembler::emit_assert(LIR_OpAssert* op) { +diff --git a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp +index b05fc876f2..b3c1afe69a 100644 +--- a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp ++++ b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp +@@ -423,18 +423,27 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + __ move(temp, addr); + } + +- +-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { + __ load(new LIR_Address(base, disp, T_INT), FrameMap::LR_opr, info); +- __ cmp(condition, FrameMap::LR_opr, c); ++ __ cmp_branch(condition, FrameMap::LR_opr, c, T_INT, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); + +-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { + __ load(new LIR_Address(base, disp, type), FrameMap::LR_opr, info); +- __ cmp(condition, reg, FrameMap::LR_opr); ++ __ cmp_branch(condition, reg, FrameMap::LR_opr, type, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); + + bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { + assert(left != result, "should be different registers"); +diff --git a/src/hotspot/cpu/arm/c1_LIR_arm.cpp b/src/hotspot/cpu/arm/c1_LIR_arm.cpp +index 806da32020..5305fe371e 100644 +--- a/src/hotspot/cpu/arm/c1_LIR_arm.cpp ++++ b/src/hotspot/cpu/arm/c1_LIR_arm.cpp +@@ -84,3 +84,24 @@ void LIR_Address::verify() const { + #endif // AARCH64 + } + #endif // PRODUCT ++ ++template ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { ++ cmp(condition, left, right, info); ++ branch(condition, type, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); ++ ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ cmp(condition, left, right); ++ branch(condition, type, block, unordered); ++} ++ ++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ cmp(condition, left, right); ++ cmove(condition, src1, src2, dst, type); ++} +diff --git a/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp b/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp +new file mode 100644 +index 0000000000..0412b99537 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/abstractInterpreter_loongarch.cpp +@@ -0,0 +1,132 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "ci/ciMethod.hpp" ++#include "interpreter/interpreter.hpp" ++#include "runtime/frame.inline.hpp" ++ ++// asm based interpreter deoptimization helpers ++int AbstractInterpreter::size_activation(int max_stack, ++ int temps, ++ int extra_args, ++ int monitors, ++ int callee_params, ++ int callee_locals, ++ bool is_top_frame) { ++ // Note: This calculation must exactly parallel the frame setup ++ // in AbstractInterpreterGenerator::generate_method_entry. ++ ++ // fixed size of an interpreter frame: ++ int overhead = frame::java_frame_sender_sp_offset - ++ frame::interpreter_frame_initial_sp_offset; ++ // Our locals were accounted for by the caller (or last_frame_adjust ++ // on the transistion) Since the callee parameters already account ++ // for the callee's params we only need to account for the extra ++ // locals. ++ int size = overhead + ++ (callee_locals - callee_params)*Interpreter::stackElementWords + ++ monitors * frame::interpreter_frame_monitor_size() + ++ temps* Interpreter::stackElementWords + extra_args; ++ ++ return size; ++} ++ ++// How much stack a method activation needs in words. ++int AbstractInterpreter::size_top_interpreter_activation(Method* method) { ++ ++ const int entry_size = frame::interpreter_frame_monitor_size(); ++ ++ // total overhead size: entry_size + (saved ebp thru expr stack bottom). ++ // be sure to change this if you add/subtract anything to/from the overhead area ++ const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size; ++ ++ const int stub_code = 6; // see generate_call_stub ++ // return overhead_size + method->max_locals() + method->max_stack() + stub_code; ++ const int method_stack = (method->max_locals() + method->max_stack()) * ++ Interpreter::stackElementWords; ++ return overhead_size + method_stack + stub_code; ++} ++ ++void AbstractInterpreter::layout_activation(Method* method, ++ int tempcount, ++ int popframe_extra_args, ++ int moncount, ++ int caller_actual_parameters, ++ int callee_param_count, ++ int callee_locals, ++ frame* caller, ++ frame* interpreter_frame, ++ bool is_top_frame, ++ bool is_bottom_frame) { ++ // Note: This calculation must exactly parallel the frame setup ++ // in AbstractInterpreterGenerator::generate_method_entry. ++ // If interpreter_frame!=NULL, set up the method, locals, and monitors. ++ // The frame interpreter_frame, if not NULL, is guaranteed to be the ++ // right size, as determined by a previous call to this method. ++ // It is also guaranteed to be walkable even though it is in a skeletal state ++ ++ // fixed size of an interpreter frame: ++ ++ int max_locals = method->max_locals() * Interpreter::stackElementWords; ++ int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords; ++ ++#ifdef ASSERT ++ assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); ++#endif ++ ++ interpreter_frame->interpreter_frame_set_method(method); ++ // NOTE the difference in using sender_sp and interpreter_frame_sender_sp ++ // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) ++ // and sender_sp is fp+8 ++ intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; ++ ++#ifdef ASSERT ++ if (caller->is_interpreted_frame()) { ++ assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); ++ } ++#endif ++ ++ interpreter_frame->interpreter_frame_set_locals(locals); ++ BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); ++ BasicObjectLock* monbot = montop - moncount; ++ interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount); ++ ++ //set last sp; ++ intptr_t* esp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords - ++ popframe_extra_args; ++ interpreter_frame->interpreter_frame_set_last_sp(esp); ++ // All frames but the initial interpreter frame we fill in have a ++ // value for sender_sp that allows walking the stack but isn't ++ // truly correct. Correct the value here. ++ // ++ if (extra_locals != 0 && ++ interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) { ++ interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals); ++ } ++ *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache(); ++ *interpreter_frame->interpreter_frame_mirror_addr() = method->method_holder()->java_mirror(); ++} ++ +diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.cpp b/src/hotspot/cpu/loongarch/assembler_loongarch.cpp +new file mode 100644 +index 0000000000..e6e62cccad +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/assembler_loongarch.cpp +@@ -0,0 +1,849 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "runtime/os.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/macros.hpp" ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) block_comment(str) ++#define STOP(error) block_comment(error); stop(error) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++// Implementation of AddressLiteral ++ ++AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { ++ _is_lval = false; ++ _target = target; ++ _rspec = rspec_from_rtype(rtype, target); ++} ++ ++// Implementation of Address ++ ++Address Address::make_array(ArrayAddress adr) { ++ AddressLiteral base = adr.base(); ++ Address index = adr.index(); ++ assert(index._disp == 0, "must not have disp"); // maybe it can? ++ Address array(index._base, index._index, index._scale, (intptr_t) base.target()); ++ array._rspec = base._rspec; ++ return array; ++} ++ ++// exceedingly dangerous constructor ++Address::Address(address loc, RelocationHolder spec) { ++ _base = noreg; ++ _index = noreg; ++ _scale = no_scale; ++ _disp = (intptr_t) loc; ++ _rspec = spec; ++} ++ ++ ++int Assembler::is_int_mask(int x) { ++ int xx = x; ++ int count = 0; ++ ++ while (x != 0) { ++ x &= (x - 1); ++ count++; ++ } ++ ++ if ((1<> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_b(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_b(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_b(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_bu(Register rd, Address src) { ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_bu(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_bu(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_bu(dst, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_bu(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_bu(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_bu(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_d(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_d(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_d(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_d(dst, AT, disp); ++ } ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ if (scale == 0) { ++ add_d(AT, base, index); ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ } ++ ldptr_d(dst, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_d(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_d(dst, base, disp); ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ ldptr_d(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_d(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_h(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_h(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_h(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_h(dst, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_h(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_h(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_h(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_hu(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_hu(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_hu(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_hu(dst, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_hu(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_hu(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_hu(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ll_w(Register rd, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ll_w(rd, src.base(), src.disp()); ++} ++ ++void Assembler::ll_d(Register rd, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ll_d(rd, src.base(), src.disp()); ++} ++ ++void Assembler::ld_w(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_w(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_w(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_w(dst, AT, disp); ++ } ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ if (scale == 0) { ++ add_d(AT, base, index); ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ } ++ ldptr_w(dst, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_w(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_w(dst, base, disp); ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ ldptr_w(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_w(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_wu(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_wu(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_wu(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_wu(dst, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_wu(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_wu(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_wu(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::st_b(Register rd, Address dst) { ++ Register src = rd; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ assert_different_registers(src, AT); ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ stx_b(src, base, index); ++ } else { ++ add_d(AT, base, index); ++ st_b(src, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ st_b(src, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ stx_b(src, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ st_b(src, base, disp); ++ } else { ++ assert_different_registers(src, AT); ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ stx_b(src, base, AT); ++ } ++ } ++} ++ ++void Assembler::sc_w(Register rd, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sc_w(rd, dst.base(), dst.disp()); ++} ++ ++void Assembler::sc_d(Register rd, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sc_d(rd, dst.base(), dst.disp()); ++} ++ ++void Assembler::st_d(Register rd, Address dst) { ++ Register src = rd; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ assert_different_registers(src, AT); ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ stx_d(src, base, index); ++ } else { ++ add_d(AT, base, index); ++ st_d(src, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ st_d(src, AT, disp); ++ } ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ if (scale == 0) { ++ add_d(AT, base, index); ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ } ++ stptr_d(src, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ stx_d(src, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ st_d(src, base, disp); ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ stptr_d(src, base, disp); ++ } else { ++ assert_different_registers(src, AT); ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ stx_d(src, base, AT); ++ } ++ } ++} ++ ++void Assembler::st_h(Register rd, Address dst) { ++ Register src = rd; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ assert_different_registers(src, AT); ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ stx_h(src, base, index); ++ } else { ++ add_d(AT, base, index); ++ st_h(src, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ st_h(src, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ stx_h(src, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ st_h(src, base, disp); ++ } else { ++ assert_different_registers(src, AT); ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ stx_h(src, base, AT); ++ } ++ } ++} ++ ++void Assembler::st_w(Register rd, Address dst) { ++ Register src = rd; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ assert_different_registers(src, AT); ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ stx_w(src, base, index); ++ } else { ++ add_d(AT, base, index); ++ st_w(src, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ st_w(src, AT, disp); ++ } ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ if (scale == 0) { ++ add_d(AT, base, index); ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ } ++ stptr_w(src, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ stx_w(src, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ st_w(src, base, disp); ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ stptr_w(src, base, disp); ++ } else { ++ assert_different_registers(src, AT); ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ stx_w(src, base, AT); ++ } ++ } ++} ++ ++void Assembler::fld_s(FloatRegister fd, Address src) { ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ fldx_s(fd, base, index); ++ } else { ++ add_d(AT, base, index); ++ fld_s(fd, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ fld_s(fd, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ fldx_s(fd, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ fld_s(fd, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ fldx_s(fd, base, AT); ++ } ++ } ++} ++ ++void Assembler::fld_d(FloatRegister fd, Address src) { ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ fldx_d(fd, base, index); ++ } else { ++ add_d(AT, base, index); ++ fld_d(fd, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ fld_d(fd, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ fldx_d(fd, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ fld_d(fd, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ fldx_d(fd, base, AT); ++ } ++ } ++} ++ ++void Assembler::fst_s(FloatRegister fd, Address dst) { ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ fstx_s(fd, base, index); ++ } else { ++ add_d(AT, base, index); ++ fst_s(fd, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ fst_s(fd, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ fstx_s(fd, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ fst_s(fd, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ fstx_s(fd, base, AT); ++ } ++ } ++} ++ ++void Assembler::fst_d(FloatRegister fd, Address dst) { ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ fstx_d(fd, base, index); ++ } else { ++ add_d(AT, base, index); ++ fst_d(fd, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ fst_d(fd, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ fstx_d(fd, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ fst_d(fd, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ fstx_d(fd, base, AT); ++ } ++ } ++} +diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp +new file mode 100644 +index 0000000000..179da7bd0e +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp +@@ -0,0 +1,2827 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/register.hpp" ++#include "runtime/vm_version.hpp" ++ ++class BiasedLockingCounters; ++ ++ ++// Note: A register location is represented via a Register, not ++// via an address for efficiency & simplicity reasons. ++ ++class ArrayAddress; ++ ++class Address { ++ public: ++ enum ScaleFactor { ++ no_scale = -1, ++ times_1 = 0, ++ times_2 = 1, ++ times_4 = 2, ++ times_8 = 3, ++ times_ptr = times_8 ++ }; ++ static ScaleFactor times(int size) { ++ assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size"); ++ if (size == 8) return times_8; ++ if (size == 4) return times_4; ++ if (size == 2) return times_2; ++ return times_1; ++ } ++ ++ private: ++ Register _base; ++ Register _index; ++ ScaleFactor _scale; ++ int _disp; ++ RelocationHolder _rspec; ++ ++ // Easily misused constructors make them private ++ Address(address loc, RelocationHolder spec); ++ Address(int disp, address loc, relocInfo::relocType rtype); ++ Address(int disp, address loc, RelocationHolder spec); ++ ++ public: ++ ++ // creation ++ Address() ++ : _base(noreg), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(0) { ++ } ++ ++ // No default displacement otherwise Register can be implicitly ++ // converted to 0(Register) which is quite a different animal. ++ ++ Address(Register base, int disp = 0) ++ : _base(base), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(disp) { ++ assert_different_registers(_base, AT); ++ } ++ ++ Address(Register base, Register index, ScaleFactor scale, int disp = 0) ++ : _base (base), ++ _index(index), ++ _scale(scale), ++ _disp (disp) { ++ assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); ++ assert_different_registers(_base, _index, AT); ++ } ++ ++ // The following two overloads are used in connection with the ++ // ByteSize type (see sizes.hpp). They simplify the use of ++ // ByteSize'd arguments in assembly code. Note that their equivalent ++ // for the optimized build are the member functions with int disp ++ // argument since ByteSize is mapped to an int type in that case. ++ // ++ // Note: DO NOT introduce similar overloaded functions for WordSize ++ // arguments as in the optimized mode, both ByteSize and WordSize ++ // are mapped to the same type and thus the compiler cannot make a ++ // distinction anymore (=> compiler errors). ++ ++#ifdef ASSERT ++ Address(Register base, ByteSize disp) ++ : _base(base), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(in_bytes(disp)) { ++ assert_different_registers(_base, AT); ++ } ++ ++ Address(Register base, Register index, ScaleFactor scale, ByteSize disp) ++ : _base(base), ++ _index(index), ++ _scale(scale), ++ _disp(in_bytes(disp)) { ++ assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); ++ assert_different_registers(_base, _index, AT); ++ } ++#endif // ASSERT ++ ++ // accessors ++ bool uses(Register reg) const { return _base == reg || _index == reg; } ++ Register base() const { return _base; } ++ Register index() const { return _index; } ++ ScaleFactor scale() const { return _scale; } ++ int disp() const { return _disp; } ++ ++ static Address make_array(ArrayAddress); ++ ++ friend class Assembler; ++ friend class MacroAssembler; ++ friend class LIR_Assembler; // base/index/scale/disp ++}; ++ ++// Calling convention ++class Argument { ++ public: ++ enum { ++ n_register_parameters = 8, // 8 integer registers used to pass parameters ++ n_float_register_parameters = 8 // 8 float registers used to pass parameters ++ }; ++}; ++ ++// ++// AddressLiteral has been split out from Address because operands of this type ++// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out ++// the few instructions that need to deal with address literals are unique and the ++// MacroAssembler does not have to implement every instruction in the Assembler ++// in order to search for address literals that may need special handling depending ++// on the instruction and the platform. As small step on the way to merging i486/amd64 ++// directories. ++// ++class AddressLiteral { ++ friend class ArrayAddress; ++ RelocationHolder _rspec; ++ // Typically we use AddressLiterals we want to use their rval ++ // However in some situations we want the lval (effect address) of the item. ++ // We provide a special factory for making those lvals. ++ bool _is_lval; ++ ++ // If the target is far we'll need to load the ea of this to ++ // a register to reach it. Otherwise if near we can do rip ++ // relative addressing. ++ ++ address _target; ++ ++ protected: ++ // creation ++ AddressLiteral() ++ : _is_lval(false), ++ _target(NULL) ++ {} ++ ++ public: ++ ++ ++ AddressLiteral(address target, relocInfo::relocType rtype); ++ ++ AddressLiteral(address target, RelocationHolder const& rspec) ++ : _rspec(rspec), ++ _is_lval(false), ++ _target(target) ++ {} ++ ++ AddressLiteral addr() { ++ AddressLiteral ret = *this; ++ ret._is_lval = true; ++ return ret; ++ } ++ ++ ++ private: ++ ++ address target() { return _target; } ++ bool is_lval() { return _is_lval; } ++ ++ relocInfo::relocType reloc() const { return _rspec.type(); } ++ const RelocationHolder& rspec() const { return _rspec; } ++ ++ friend class Assembler; ++ friend class MacroAssembler; ++ friend class Address; ++ friend class LIR_Assembler; ++ RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { ++ switch (rtype) { ++ case relocInfo::external_word_type: ++ return external_word_Relocation::spec(addr); ++ case relocInfo::internal_word_type: ++ return internal_word_Relocation::spec(addr); ++ case relocInfo::opt_virtual_call_type: ++ return opt_virtual_call_Relocation::spec(); ++ case relocInfo::static_call_type: ++ return static_call_Relocation::spec(); ++ case relocInfo::runtime_call_type: ++ return runtime_call_Relocation::spec(); ++ case relocInfo::poll_type: ++ case relocInfo::poll_return_type: ++ return Relocation::spec_simple(rtype); ++ case relocInfo::none: ++ case relocInfo::oop_type: ++ // Oops are a special case. Normally they would be their own section ++ // but in cases like icBuffer they are literals in the code stream that ++ // we don't have a section for. We use none so that we get a literal address ++ // which is always patchable. ++ return RelocationHolder(); ++ default: ++ ShouldNotReachHere(); ++ return RelocationHolder(); ++ } ++ } ++ ++}; ++ ++// Convience classes ++class RuntimeAddress: public AddressLiteral { ++ ++ public: ++ ++ RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {} ++ ++}; ++ ++class OopAddress: public AddressLiteral { ++ ++ public: ++ ++ OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){} ++ ++}; ++ ++class ExternalAddress: public AddressLiteral { ++ ++ public: ++ ++ ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){} ++ ++}; ++ ++class InternalAddress: public AddressLiteral { ++ ++ public: ++ ++ InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {} ++ ++}; ++ ++// x86 can do array addressing as a single operation since disp can be an absolute ++// address amd64 can't. We create a class that expresses the concept but does extra ++// magic on amd64 to get the final result ++ ++class ArrayAddress { ++ private: ++ ++ AddressLiteral _base; ++ Address _index; ++ ++ public: ++ ++ ArrayAddress() {}; ++ ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {}; ++ AddressLiteral base() { return _base; } ++ Address index() { return _index; } ++ ++}; ++ ++// The LoongArch Assembler: Pure assembler doing NO optimizations on the instruction ++// level ; i.e., what you write is what you get. The Assembler is generating code into ++// a CodeBuffer. ++ ++class Assembler : public AbstractAssembler { ++ friend class AbstractAssembler; // for the non-virtual hack ++ friend class LIR_Assembler; // as_Address() ++ friend class StubGenerator; ++ ++ public: ++ // 22-bit opcode, highest 22 bits: bits[31...10] ++ enum ops22 { ++ clo_w_op = 0b0000000000000000000100, ++ clz_w_op = 0b0000000000000000000101, ++ cto_w_op = 0b0000000000000000000110, ++ ctz_w_op = 0b0000000000000000000111, ++ clo_d_op = 0b0000000000000000001000, ++ clz_d_op = 0b0000000000000000001001, ++ cto_d_op = 0b0000000000000000001010, ++ ctz_d_op = 0b0000000000000000001011, ++ revb_2h_op = 0b0000000000000000001100, ++ revb_4h_op = 0b0000000000000000001101, ++ revb_2w_op = 0b0000000000000000001110, ++ revb_d_op = 0b0000000000000000001111, ++ revh_2w_op = 0b0000000000000000010000, ++ revh_d_op = 0b0000000000000000010001, ++ bitrev_4b_op = 0b0000000000000000010010, ++ bitrev_8b_op = 0b0000000000000000010011, ++ bitrev_w_op = 0b0000000000000000010100, ++ bitrev_d_op = 0b0000000000000000010101, ++ ext_w_h_op = 0b0000000000000000010110, ++ ext_w_b_op = 0b0000000000000000010111, ++ rdtimel_w_op = 0b0000000000000000011000, ++ rdtimeh_w_op = 0b0000000000000000011001, ++ rdtime_d_op = 0b0000000000000000011010, ++ cpucfg_op = 0b0000000000000000011011, ++ fabs_s_op = 0b0000000100010100000001, ++ fabs_d_op = 0b0000000100010100000010, ++ fneg_s_op = 0b0000000100010100000101, ++ fneg_d_op = 0b0000000100010100000110, ++ flogb_s_op = 0b0000000100010100001001, ++ flogb_d_op = 0b0000000100010100001010, ++ fclass_s_op = 0b0000000100010100001101, ++ fclass_d_op = 0b0000000100010100001110, ++ fsqrt_s_op = 0b0000000100010100010001, ++ fsqrt_d_op = 0b0000000100010100010010, ++ frecip_s_op = 0b0000000100010100010101, ++ frecip_d_op = 0b0000000100010100010110, ++ frsqrt_s_op = 0b0000000100010100011001, ++ frsqrt_d_op = 0b0000000100010100011010, ++ fmov_s_op = 0b0000000100010100100101, ++ fmov_d_op = 0b0000000100010100100110, ++ movgr2fr_w_op = 0b0000000100010100101001, ++ movgr2fr_d_op = 0b0000000100010100101010, ++ movgr2frh_w_op = 0b0000000100010100101011, ++ movfr2gr_s_op = 0b0000000100010100101101, ++ movfr2gr_d_op = 0b0000000100010100101110, ++ movfrh2gr_s_op = 0b0000000100010100101111, ++ movgr2fcsr_op = 0b0000000100010100110000, ++ movfcsr2gr_op = 0b0000000100010100110010, ++ movfr2cf_op = 0b0000000100010100110100, ++ movcf2fr_op = 0b0000000100010100110101, ++ movgr2cf_op = 0b0000000100010100110110, ++ movcf2gr_op = 0b0000000100010100110111, ++ fcvt_s_d_op = 0b0000000100011001000110, ++ fcvt_d_s_op = 0b0000000100011001001001, ++ ftintrm_w_s_op = 0b0000000100011010000001, ++ ftintrm_w_d_op = 0b0000000100011010000010, ++ ftintrm_l_s_op = 0b0000000100011010001001, ++ ftintrm_l_d_op = 0b0000000100011010001010, ++ ftintrp_w_s_op = 0b0000000100011010010001, ++ ftintrp_w_d_op = 0b0000000100011010010010, ++ ftintrp_l_s_op = 0b0000000100011010011001, ++ ftintrp_l_d_op = 0b0000000100011010011010, ++ ftintrz_w_s_op = 0b0000000100011010100001, ++ ftintrz_w_d_op = 0b0000000100011010100010, ++ ftintrz_l_s_op = 0b0000000100011010101001, ++ ftintrz_l_d_op = 0b0000000100011010101010, ++ ftintrne_w_s_op = 0b0000000100011010110001, ++ ftintrne_w_d_op = 0b0000000100011010110010, ++ ftintrne_l_s_op = 0b0000000100011010111001, ++ ftintrne_l_d_op = 0b0000000100011010111010, ++ ftint_w_s_op = 0b0000000100011011000001, ++ ftint_w_d_op = 0b0000000100011011000010, ++ ftint_l_s_op = 0b0000000100011011001001, ++ ftint_l_d_op = 0b0000000100011011001010, ++ ffint_s_w_op = 0b0000000100011101000100, ++ ffint_s_l_op = 0b0000000100011101000110, ++ ffint_d_w_op = 0b0000000100011101001000, ++ ffint_d_l_op = 0b0000000100011101001010, ++ frint_s_op = 0b0000000100011110010001, ++ frint_d_op = 0b0000000100011110010010, ++ iocsrrd_b_op = 0b0000011001001000000000, ++ iocsrrd_h_op = 0b0000011001001000000001, ++ iocsrrd_w_op = 0b0000011001001000000010, ++ iocsrrd_d_op = 0b0000011001001000000011, ++ iocsrwr_b_op = 0b0000011001001000000100, ++ iocsrwr_h_op = 0b0000011001001000000101, ++ iocsrwr_w_op = 0b0000011001001000000110, ++ iocsrwr_d_op = 0b0000011001001000000111, ++ vpcnt_b_op = 0b0111001010011100001000, ++ vpcnt_h_op = 0b0111001010011100001001, ++ vpcnt_w_op = 0b0111001010011100001010, ++ vpcnt_d_op = 0b0111001010011100001011, ++ vneg_b_op = 0b0111001010011100001100, ++ vneg_h_op = 0b0111001010011100001101, ++ vneg_w_op = 0b0111001010011100001110, ++ vneg_d_op = 0b0111001010011100001111, ++ vfclass_s_op = 0b0111001010011100110101, ++ vfclass_d_op = 0b0111001010011100110110, ++ vfsqrt_s_op = 0b0111001010011100111001, ++ vfsqrt_d_op = 0b0111001010011100111010, ++ vfrint_s_op = 0b0111001010011101001101, ++ vfrint_d_op = 0b0111001010011101001110, ++ vfrintrm_s_op = 0b0111001010011101010001, ++ vfrintrm_d_op = 0b0111001010011101010010, ++ vfrintrp_s_op = 0b0111001010011101010101, ++ vfrintrp_d_op = 0b0111001010011101010110, ++ vfrintrz_s_op = 0b0111001010011101011001, ++ vfrintrz_d_op = 0b0111001010011101011010, ++ vfrintrne_s_op = 0b0111001010011101011101, ++ vfrintrne_d_op = 0b0111001010011101011110, ++ vfcvtl_s_h_op = 0b0111001010011101111010, ++ vfcvth_s_h_op = 0b0111001010011101111011, ++ vfcvtl_d_s_op = 0b0111001010011101111100, ++ vfcvth_d_s_op = 0b0111001010011101111101, ++ vffint_s_w_op = 0b0111001010011110000000, ++ vffint_s_wu_op = 0b0111001010011110000001, ++ vffint_d_l_op = 0b0111001010011110000010, ++ vffint_d_lu_op = 0b0111001010011110000011, ++ vffintl_d_w_op = 0b0111001010011110000100, ++ vffinth_d_w_op = 0b0111001010011110000101, ++ vftint_w_s_op = 0b0111001010011110001100, ++ vftint_l_d_op = 0b0111001010011110001101, ++ vftintrm_w_s_op = 0b0111001010011110001110, ++ vftintrm_l_d_op = 0b0111001010011110001111, ++ vftintrp_w_s_op = 0b0111001010011110010000, ++ vftintrp_l_d_op = 0b0111001010011110010001, ++ vftintrz_w_s_op = 0b0111001010011110010010, ++ vftintrz_l_d_op = 0b0111001010011110010011, ++ vftintrne_w_s_op = 0b0111001010011110010100, ++ vftintrne_l_d_op = 0b0111001010011110010101, ++ vftint_wu_s = 0b0111001010011110010110, ++ vftint_lu_d = 0b0111001010011110010111, ++ vftintrz_wu_f = 0b0111001010011110011100, ++ vftintrz_lu_d = 0b0111001010011110011101, ++ vftintl_l_s_op = 0b0111001010011110100000, ++ vftinth_l_s_op = 0b0111001010011110100001, ++ vftintrml_l_s_op = 0b0111001010011110100010, ++ vftintrmh_l_s_op = 0b0111001010011110100011, ++ vftintrpl_l_s_op = 0b0111001010011110100100, ++ vftintrph_l_s_op = 0b0111001010011110100101, ++ vftintrzl_l_s_op = 0b0111001010011110100110, ++ vftintrzh_l_s_op = 0b0111001010011110100111, ++ vftintrnel_l_s_op = 0b0111001010011110101000, ++ vftintrneh_l_s_op = 0b0111001010011110101001, ++ vreplgr2vr_b_op = 0b0111001010011111000000, ++ vreplgr2vr_h_op = 0b0111001010011111000001, ++ vreplgr2vr_w_op = 0b0111001010011111000010, ++ vreplgr2vr_d_op = 0b0111001010011111000011, ++ xvpcnt_b_op = 0b0111011010011100001000, ++ xvpcnt_h_op = 0b0111011010011100001001, ++ xvpcnt_w_op = 0b0111011010011100001010, ++ xvpcnt_d_op = 0b0111011010011100001011, ++ xvneg_b_op = 0b0111011010011100001100, ++ xvneg_h_op = 0b0111011010011100001101, ++ xvneg_w_op = 0b0111011010011100001110, ++ xvneg_d_op = 0b0111011010011100001111, ++ xvfclass_s_op = 0b0111011010011100110101, ++ xvfclass_d_op = 0b0111011010011100110110, ++ xvfsqrt_s_op = 0b0111011010011100111001, ++ xvfsqrt_d_op = 0b0111011010011100111010, ++ xvfrint_s_op = 0b0111011010011101001101, ++ xvfrint_d_op = 0b0111011010011101001110, ++ xvfrintrm_s_op = 0b0111011010011101010001, ++ xvfrintrm_d_op = 0b0111011010011101010010, ++ xvfrintrp_s_op = 0b0111011010011101010101, ++ xvfrintrp_d_op = 0b0111011010011101010110, ++ xvfrintrz_s_op = 0b0111011010011101011001, ++ xvfrintrz_d_op = 0b0111011010011101011010, ++ xvfrintrne_s_op = 0b0111011010011101011101, ++ xvfrintrne_d_op = 0b0111011010011101011110, ++ xvfcvtl_s_h_op = 0b0111011010011101111010, ++ xvfcvth_s_h_op = 0b0111011010011101111011, ++ xvfcvtl_d_s_op = 0b0111011010011101111100, ++ xvfcvth_d_s_op = 0b0111011010011101111101, ++ xvffint_s_w_op = 0b0111011010011110000000, ++ xvffint_s_wu_op = 0b0111011010011110000001, ++ xvffint_d_l_op = 0b0111011010011110000010, ++ xvffint_d_lu_op = 0b0111011010011110000011, ++ xvffintl_d_w_op = 0b0111011010011110000100, ++ xvffinth_d_w_op = 0b0111011010011110000101, ++ xvftint_w_s_op = 0b0111011010011110001100, ++ xvftint_l_d_op = 0b0111011010011110001101, ++ xvftintrm_w_s_op = 0b0111011010011110001110, ++ xvftintrm_l_d_op = 0b0111011010011110001111, ++ xvftintrp_w_s_op = 0b0111011010011110010000, ++ xvftintrp_l_d_op = 0b0111011010011110010001, ++ xvftintrz_w_s_op = 0b0111011010011110010010, ++ xvftintrz_l_d_op = 0b0111011010011110010011, ++ xvftintrne_w_s_op = 0b0111011010011110010100, ++ xvftintrne_l_d_op = 0b0111011010011110010101, ++ xvftint_wu_s = 0b0111011010011110010110, ++ xvftint_lu_d = 0b0111011010011110010111, ++ xvftintrz_wu_f = 0b0111011010011110011100, ++ xvftintrz_lu_d = 0b0111011010011110011101, ++ xvftintl_l_s_op = 0b0111011010011110100000, ++ xvftinth_l_s_op = 0b0111011010011110100001, ++ xvftintrml_l_s_op = 0b0111011010011110100010, ++ xvftintrmh_l_s_op = 0b0111011010011110100011, ++ xvftintrpl_l_s_op = 0b0111011010011110100100, ++ xvftintrph_l_s_op = 0b0111011010011110100101, ++ xvftintrzl_l_s_op = 0b0111011010011110100110, ++ xvftintrzh_l_s_op = 0b0111011010011110100111, ++ xvftintrnel_l_s_op = 0b0111011010011110101000, ++ xvftintrneh_l_s_op = 0b0111011010011110101001, ++ xvreplgr2vr_b_op = 0b0111011010011111000000, ++ xvreplgr2vr_h_op = 0b0111011010011111000001, ++ xvreplgr2vr_w_op = 0b0111011010011111000010, ++ xvreplgr2vr_d_op = 0b0111011010011111000011, ++ vext2xv_h_b_op = 0b0111011010011111000100, ++ vext2xv_w_b_op = 0b0111011010011111000101, ++ vext2xv_d_b_op = 0b0111011010011111000110, ++ vext2xv_w_h_op = 0b0111011010011111000111, ++ vext2xv_d_h_op = 0b0111011010011111001000, ++ vext2xv_d_w_op = 0b0111011010011111001001, ++ vext2xv_hu_bu_op = 0b0111011010011111001010, ++ vext2xv_wu_bu_op = 0b0111011010011111001011, ++ vext2xv_du_bu_op = 0b0111011010011111001100, ++ vext2xv_wu_hu_op = 0b0111011010011111001101, ++ vext2xv_du_hu_op = 0b0111011010011111001110, ++ vext2xv_du_wu_op = 0b0111011010011111001111, ++ xvreplve0_b_op = 0b0111011100000111000000, ++ xvreplve0_h_op = 0b0111011100000111100000, ++ xvreplve0_w_op = 0b0111011100000111110000, ++ xvreplve0_d_op = 0b0111011100000111111000, ++ xvreplve0_q_op = 0b0111011100000111111100, ++ ++ unknow_ops22 = 0b1111111111111111111111 ++ }; ++ ++ // 21-bit opcode, highest 21 bits: bits[31...11] ++ enum ops21 { ++ vinsgr2vr_d_op = 0b011100101110101111110, ++ vpickve2gr_d_op = 0b011100101110111111110, ++ vpickve2gr_du_op = 0b011100101111001111110, ++ vreplvei_d_op = 0b011100101111011111110, ++ ++ unknow_ops21 = 0b111111111111111111111 ++ }; ++ ++ // 20-bit opcode, highest 20 bits: bits[31...12] ++ enum ops20 { ++ vinsgr2vr_w_op = 0b01110010111010111110, ++ vpickve2gr_w_op = 0b01110010111011111110, ++ vpickve2gr_wu_op = 0b01110010111100111110, ++ vreplvei_w_op = 0b01110010111101111110, ++ xvinsgr2vr_d_op = 0b01110110111010111110, ++ xvpickve2gr_d_op = 0b01110110111011111110, ++ xvpickve2gr_du_op = 0b01110110111100111110, ++ xvinsve0_d_op = 0b01110110111111111110, ++ xvpickve_d_op = 0b01110111000000111110, ++ ++ unknow_ops20 = 0b11111111111111111111 ++ }; ++ ++ // 19-bit opcode, highest 19 bits: bits[31...13] ++ enum ops19 { ++ vrotri_b_op = 0b0111001010100000001, ++ vinsgr2vr_h_op = 0b0111001011101011110, ++ vpickve2gr_h_op = 0b0111001011101111110, ++ vpickve2gr_hu_op = 0b0111001011110011110, ++ vreplvei_h_op = 0b0111001011110111110, ++ vbitclri_b_op = 0b0111001100010000001, ++ vbitseti_b_op = 0b0111001100010100001, ++ vbitrevi_b_op = 0b0111001100011000001, ++ vslli_b_op = 0b0111001100101100001, ++ vsrli_b_op = 0b0111001100110000001, ++ vsrai_b_op = 0b0111001100110100001, ++ xvrotri_b_op = 0b0111011010100000001, ++ xvinsgr2vr_w_op = 0b0111011011101011110, ++ xvpickve2gr_w_op = 0b0111011011101111110, ++ xvpickve2gr_wu_op = 0b0111011011110011110, ++ xvinsve0_w_op = 0b0111011011111111110, ++ xvpickve_w_op = 0b0111011100000011110, ++ xvbitclri_b_op = 0b0111011100010000001, ++ xvbitseti_b_op = 0b0111011100010100001, ++ xvbitrevi_b_op = 0b0111011100011000001, ++ xvslli_b_op = 0b0111011100101100001, ++ xvsrli_b_op = 0b0111011100110000001, ++ xvsrai_b_op = 0b0111011100110100001, ++ ++ unknow_ops19 = 0b1111111111111111111 ++ }; ++ ++ // 18-bit opcode, highest 18 bits: bits[31...14] ++ enum ops18 { ++ vrotri_h_op = 0b011100101010000001, ++ vinsgr2vr_b_op = 0b011100101110101110, ++ vpickve2gr_b_op = 0b011100101110111110, ++ vpickve2gr_bu_op = 0b011100101111001110, ++ vreplvei_b_op = 0b011100101111011110, ++ vbitclri_h_op = 0b011100110001000001, ++ vbitseti_h_op = 0b011100110001010001, ++ vbitrevi_h_op = 0b011100110001100001, ++ vslli_h_op = 0b011100110010110001, ++ vsrli_h_op = 0b011100110011000001, ++ vsrai_h_op = 0b011100110011010001, ++ vsrlni_b_h_op = 0b011100110100000001, ++ xvrotri_h_op = 0b011101101010000001, ++ xvbitclri_h_op = 0b011101110001000001, ++ xvbitseti_h_op = 0b011101110001010001, ++ xvbitrevi_h_op = 0b011101110001100001, ++ xvslli_h_op = 0b011101110010110001, ++ xvsrli_h_op = 0b011101110011000001, ++ xvsrai_h_op = 0b011101110011010001, ++ ++ unknow_ops18 = 0b111111111111111111 ++ }; ++ ++ // 17-bit opcode, highest 17 bits: bits[31...15] ++ enum ops17 { ++ asrtle_d_op = 0b00000000000000010, ++ asrtgt_d_op = 0b00000000000000011, ++ add_w_op = 0b00000000000100000, ++ add_d_op = 0b00000000000100001, ++ sub_w_op = 0b00000000000100010, ++ sub_d_op = 0b00000000000100011, ++ slt_op = 0b00000000000100100, ++ sltu_op = 0b00000000000100101, ++ maskeqz_op = 0b00000000000100110, ++ masknez_op = 0b00000000000100111, ++ nor_op = 0b00000000000101000, ++ and_op = 0b00000000000101001, ++ or_op = 0b00000000000101010, ++ xor_op = 0b00000000000101011, ++ orn_op = 0b00000000000101100, ++ andn_op = 0b00000000000101101, ++ sll_w_op = 0b00000000000101110, ++ srl_w_op = 0b00000000000101111, ++ sra_w_op = 0b00000000000110000, ++ sll_d_op = 0b00000000000110001, ++ srl_d_op = 0b00000000000110010, ++ sra_d_op = 0b00000000000110011, ++ rotr_w_op = 0b00000000000110110, ++ rotr_d_op = 0b00000000000110111, ++ mul_w_op = 0b00000000000111000, ++ mulh_w_op = 0b00000000000111001, ++ mulh_wu_op = 0b00000000000111010, ++ mul_d_op = 0b00000000000111011, ++ mulh_d_op = 0b00000000000111100, ++ mulh_du_op = 0b00000000000111101, ++ mulw_d_w_op = 0b00000000000111110, ++ mulw_d_wu_op = 0b00000000000111111, ++ div_w_op = 0b00000000001000000, ++ mod_w_op = 0b00000000001000001, ++ div_wu_op = 0b00000000001000010, ++ mod_wu_op = 0b00000000001000011, ++ div_d_op = 0b00000000001000100, ++ mod_d_op = 0b00000000001000101, ++ div_du_op = 0b00000000001000110, ++ mod_du_op = 0b00000000001000111, ++ crc_w_b_w_op = 0b00000000001001000, ++ crc_w_h_w_op = 0b00000000001001001, ++ crc_w_w_w_op = 0b00000000001001010, ++ crc_w_d_w_op = 0b00000000001001011, ++ crcc_w_b_w_op = 0b00000000001001100, ++ crcc_w_h_w_op = 0b00000000001001101, ++ crcc_w_w_w_op = 0b00000000001001110, ++ crcc_w_d_w_op = 0b00000000001001111, ++ break_op = 0b00000000001010100, ++ fadd_s_op = 0b00000001000000001, ++ fadd_d_op = 0b00000001000000010, ++ fsub_s_op = 0b00000001000000101, ++ fsub_d_op = 0b00000001000000110, ++ fmul_s_op = 0b00000001000001001, ++ fmul_d_op = 0b00000001000001010, ++ fdiv_s_op = 0b00000001000001101, ++ fdiv_d_op = 0b00000001000001110, ++ fmax_s_op = 0b00000001000010001, ++ fmax_d_op = 0b00000001000010010, ++ fmin_s_op = 0b00000001000010101, ++ fmin_d_op = 0b00000001000010110, ++ fmaxa_s_op = 0b00000001000011001, ++ fmaxa_d_op = 0b00000001000011010, ++ fmina_s_op = 0b00000001000011101, ++ fmina_d_op = 0b00000001000011110, ++ fscaleb_s_op = 0b00000001000100001, ++ fscaleb_d_op = 0b00000001000100010, ++ fcopysign_s_op = 0b00000001000100101, ++ fcopysign_d_op = 0b00000001000100110, ++ ldx_b_op = 0b00111000000000000, ++ ldx_h_op = 0b00111000000001000, ++ ldx_w_op = 0b00111000000010000, ++ ldx_d_op = 0b00111000000011000, ++ stx_b_op = 0b00111000000100000, ++ stx_h_op = 0b00111000000101000, ++ stx_w_op = 0b00111000000110000, ++ stx_d_op = 0b00111000000111000, ++ ldx_bu_op = 0b00111000001000000, ++ ldx_hu_op = 0b00111000001001000, ++ ldx_wu_op = 0b00111000001010000, ++ fldx_s_op = 0b00111000001100000, ++ fldx_d_op = 0b00111000001101000, ++ fstx_s_op = 0b00111000001110000, ++ fstx_d_op = 0b00111000001111000, ++ vldx_op = 0b00111000010000000, ++ vstx_op = 0b00111000010001000, ++ xvldx_op = 0b00111000010010000, ++ xvstx_op = 0b00111000010011000, ++ amswap_w_op = 0b00111000011000000, ++ amswap_d_op = 0b00111000011000001, ++ amadd_w_op = 0b00111000011000010, ++ amadd_d_op = 0b00111000011000011, ++ amand_w_op = 0b00111000011000100, ++ amand_d_op = 0b00111000011000101, ++ amor_w_op = 0b00111000011000110, ++ amor_d_op = 0b00111000011000111, ++ amxor_w_op = 0b00111000011001000, ++ amxor_d_op = 0b00111000011001001, ++ ammax_w_op = 0b00111000011001010, ++ ammax_d_op = 0b00111000011001011, ++ ammin_w_op = 0b00111000011001100, ++ ammin_d_op = 0b00111000011001101, ++ ammax_wu_op = 0b00111000011001110, ++ ammax_du_op = 0b00111000011001111, ++ ammin_wu_op = 0b00111000011010000, ++ ammin_du_op = 0b00111000011010001, ++ amswap_db_w_op = 0b00111000011010010, ++ amswap_db_d_op = 0b00111000011010011, ++ amadd_db_w_op = 0b00111000011010100, ++ amadd_db_d_op = 0b00111000011010101, ++ amand_db_w_op = 0b00111000011010110, ++ amand_db_d_op = 0b00111000011010111, ++ amor_db_w_op = 0b00111000011011000, ++ amor_db_d_op = 0b00111000011011001, ++ amxor_db_w_op = 0b00111000011011010, ++ amxor_db_d_op = 0b00111000011011011, ++ ammax_db_w_op = 0b00111000011011100, ++ ammax_db_d_op = 0b00111000011011101, ++ ammin_db_w_op = 0b00111000011011110, ++ ammin_db_d_op = 0b00111000011011111, ++ ammax_db_wu_op = 0b00111000011100000, ++ ammax_db_du_op = 0b00111000011100001, ++ ammin_db_wu_op = 0b00111000011100010, ++ ammin_db_du_op = 0b00111000011100011, ++ dbar_op = 0b00111000011100100, ++ ibar_op = 0b00111000011100101, ++ fldgt_s_op = 0b00111000011101000, ++ fldgt_d_op = 0b00111000011101001, ++ fldle_s_op = 0b00111000011101010, ++ fldle_d_op = 0b00111000011101011, ++ fstgt_s_op = 0b00111000011101100, ++ fstgt_d_op = 0b00111000011101101, ++ fstle_s_op = 0b00111000011101110, ++ fstle_d_op = 0b00111000011101111, ++ ldgt_b_op = 0b00111000011110000, ++ ldgt_h_op = 0b00111000011110001, ++ ldgt_w_op = 0b00111000011110010, ++ ldgt_d_op = 0b00111000011110011, ++ ldle_b_op = 0b00111000011110100, ++ ldle_h_op = 0b00111000011110101, ++ ldle_w_op = 0b00111000011110110, ++ ldle_d_op = 0b00111000011110111, ++ stgt_b_op = 0b00111000011111000, ++ stgt_h_op = 0b00111000011111001, ++ stgt_w_op = 0b00111000011111010, ++ stgt_d_op = 0b00111000011111011, ++ stle_b_op = 0b00111000011111100, ++ stle_h_op = 0b00111000011111101, ++ stle_w_op = 0b00111000011111110, ++ stle_d_op = 0b00111000011111111, ++ vseq_b_op = 0b01110000000000000, ++ vseq_h_op = 0b01110000000000001, ++ vseq_w_op = 0b01110000000000010, ++ vseq_d_op = 0b01110000000000011, ++ vsle_b_op = 0b01110000000000100, ++ vsle_h_op = 0b01110000000000101, ++ vsle_w_op = 0b01110000000000110, ++ vsle_d_op = 0b01110000000000111, ++ vsle_bu_op = 0b01110000000001000, ++ vsle_hu_op = 0b01110000000001001, ++ vsle_wu_op = 0b01110000000001010, ++ vsle_du_op = 0b01110000000001011, ++ vslt_b_op = 0b01110000000001100, ++ vslt_h_op = 0b01110000000001101, ++ vslt_w_op = 0b01110000000001110, ++ vslt_d_op = 0b01110000000001111, ++ vslt_bu_op = 0b01110000000010000, ++ vslt_hu_op = 0b01110000000010001, ++ vslt_wu_op = 0b01110000000010010, ++ vslt_du_op = 0b01110000000010011, ++ vadd_b_op = 0b01110000000010100, ++ vadd_h_op = 0b01110000000010101, ++ vadd_w_op = 0b01110000000010110, ++ vadd_d_op = 0b01110000000010111, ++ vsub_b_op = 0b01110000000011000, ++ vsub_h_op = 0b01110000000011001, ++ vsub_w_op = 0b01110000000011010, ++ vsub_d_op = 0b01110000000011011, ++ vabsd_b_op = 0b01110000011000000, ++ vabsd_h_op = 0b01110000011000001, ++ vabsd_w_op = 0b01110000011000010, ++ vabsd_d_op = 0b01110000011000011, ++ vmax_b_op = 0b01110000011100000, ++ vmax_h_op = 0b01110000011100001, ++ vmax_w_op = 0b01110000011100010, ++ vmax_d_op = 0b01110000011100011, ++ vmin_b_op = 0b01110000011100100, ++ vmin_h_op = 0b01110000011100101, ++ vmin_w_op = 0b01110000011100110, ++ vmin_d_op = 0b01110000011100111, ++ vmul_b_op = 0b01110000100001000, ++ vmul_h_op = 0b01110000100001001, ++ vmul_w_op = 0b01110000100001010, ++ vmul_d_op = 0b01110000100001011, ++ vmuh_b_op = 0b01110000100001100, ++ vmuh_h_op = 0b01110000100001101, ++ vmuh_w_op = 0b01110000100001110, ++ vmuh_d_op = 0b01110000100001111, ++ vmuh_bu_op = 0b01110000100010000, ++ vmuh_hu_op = 0b01110000100010001, ++ vmuh_wu_op = 0b01110000100010010, ++ vmuh_du_op = 0b01110000100010011, ++ vmulwev_h_b_op = 0b01110000100100000, ++ vmulwev_w_h_op = 0b01110000100100001, ++ vmulwev_d_w_op = 0b01110000100100010, ++ vmulwev_q_d_op = 0b01110000100100011, ++ vmulwod_h_b_op = 0b01110000100100100, ++ vmulwod_w_h_op = 0b01110000100100101, ++ vmulwod_d_w_op = 0b01110000100100110, ++ vmulwod_q_d_op = 0b01110000100100111, ++ vmadd_b_op = 0b01110000101010000, ++ vmadd_h_op = 0b01110000101010001, ++ vmadd_w_op = 0b01110000101010010, ++ vmadd_d_op = 0b01110000101010011, ++ vmsub_b_op = 0b01110000101010100, ++ vmsub_h_op = 0b01110000101010101, ++ vmsub_w_op = 0b01110000101010110, ++ vmsub_d_op = 0b01110000101010111, ++ vsll_b_op = 0b01110000111010000, ++ vsll_h_op = 0b01110000111010001, ++ vsll_w_op = 0b01110000111010010, ++ vsll_d_op = 0b01110000111010011, ++ vsrl_b_op = 0b01110000111010100, ++ vsrl_h_op = 0b01110000111010101, ++ vsrl_w_op = 0b01110000111010110, ++ vsrl_d_op = 0b01110000111010111, ++ vsra_b_op = 0b01110000111011000, ++ vsra_h_op = 0b01110000111011001, ++ vsra_w_op = 0b01110000111011010, ++ vsra_d_op = 0b01110000111011011, ++ vrotr_b_op = 0b01110000111011100, ++ vrotr_h_op = 0b01110000111011101, ++ vrotr_w_op = 0b01110000111011110, ++ vrotr_d_op = 0b01110000111011111, ++ vbitclr_b_op = 0b01110001000011000, ++ vbitclr_h_op = 0b01110001000011001, ++ vbitclr_w_op = 0b01110001000011010, ++ vbitclr_d_op = 0b01110001000011011, ++ vbitset_b_op = 0b01110001000011100, ++ vbitset_h_op = 0b01110001000011101, ++ vbitset_w_op = 0b01110001000011110, ++ vbitset_d_op = 0b01110001000011111, ++ vbitrev_b_op = 0b01110001000100000, ++ vbitrev_h_op = 0b01110001000100001, ++ vbitrev_w_op = 0b01110001000100010, ++ vbitrev_d_op = 0b01110001000100011, ++ vand_v_op = 0b01110001001001100, ++ vor_v_op = 0b01110001001001101, ++ vxor_v_op = 0b01110001001001110, ++ vnor_v_op = 0b01110001001001111, ++ vandn_v_op = 0b01110001001010000, ++ vorn_v_op = 0b01110001001010001, ++ vadd_q_op = 0b01110001001011010, ++ vsub_q_op = 0b01110001001011011, ++ vfadd_s_op = 0b01110001001100001, ++ vfadd_d_op = 0b01110001001100010, ++ vfsub_s_op = 0b01110001001100101, ++ vfsub_d_op = 0b01110001001100110, ++ vfmul_s_op = 0b01110001001110001, ++ vfmul_d_op = 0b01110001001110010, ++ vfdiv_s_op = 0b01110001001110101, ++ vfdiv_d_op = 0b01110001001110110, ++ vfmax_s_op = 0b01110001001111001, ++ vfmax_d_op = 0b01110001001111010, ++ vfmin_s_op = 0b01110001001111101, ++ vfmin_d_op = 0b01110001001111110, ++ vfcvt_h_s_op = 0b01110001010001100, ++ vfcvt_s_d_op = 0b01110001010001101, ++ vffint_s_l_op = 0b01110001010010000, ++ vftint_w_d_op = 0b01110001010010011, ++ vftintrm_w_d_op = 0b01110001010010100, ++ vftintrp_w_d_op = 0b01110001010010101, ++ vftintrz_w_d_op = 0b01110001010010110, ++ vftintrne_w_d_op = 0b01110001010010111, ++ vshuf_h_op = 0b01110001011110101, ++ vshuf_w_op = 0b01110001011110110, ++ vshuf_d_op = 0b01110001011110111, ++ vslti_bu_op = 0b01110010100010000, ++ vslti_hu_op = 0b01110010100010001, ++ vslti_wu_op = 0b01110010100010010, ++ vslti_du_op = 0b01110010100010011, ++ vaddi_bu_op = 0b01110010100010100, ++ vaddi_hu_op = 0b01110010100010101, ++ vaddi_wu_op = 0b01110010100010110, ++ vaddi_du_op = 0b01110010100010111, ++ vsubi_bu_op = 0b01110010100011000, ++ vsubi_hu_op = 0b01110010100011001, ++ vsubi_wu_op = 0b01110010100011010, ++ vsubi_du_op = 0b01110010100011011, ++ vrotri_w_op = 0b01110010101000001, ++ vbitclri_w_op = 0b01110011000100001, ++ vbitseti_w_op = 0b01110011000101001, ++ vbitrevi_w_op = 0b01110011000110001, ++ vslli_w_op = 0b01110011001011001, ++ vsrli_w_op = 0b01110011001100001, ++ vsrai_w_op = 0b01110011001101001, ++ vsrlni_h_w_op = 0b01110011010000001, ++ xvseq_b_op = 0b01110100000000000, ++ xvseq_h_op = 0b01110100000000001, ++ xvseq_w_op = 0b01110100000000010, ++ xvseq_d_op = 0b01110100000000011, ++ xvsle_b_op = 0b01110100000000100, ++ xvsle_h_op = 0b01110100000000101, ++ xvsle_w_op = 0b01110100000000110, ++ xvsle_d_op = 0b01110100000000111, ++ xvsle_bu_op = 0b01110100000001000, ++ xvsle_hu_op = 0b01110100000001001, ++ xvsle_wu_op = 0b01110100000001010, ++ xvsle_du_op = 0b01110100000001011, ++ xvslt_b_op = 0b01110100000001100, ++ xvslt_h_op = 0b01110100000001101, ++ xvslt_w_op = 0b01110100000001110, ++ xvslt_d_op = 0b01110100000001111, ++ xvslt_bu_op = 0b01110100000010000, ++ xvslt_hu_op = 0b01110100000010001, ++ xvslt_wu_op = 0b01110100000010010, ++ xvslt_du_op = 0b01110100000010011, ++ xvadd_b_op = 0b01110100000010100, ++ xvadd_h_op = 0b01110100000010101, ++ xvadd_w_op = 0b01110100000010110, ++ xvadd_d_op = 0b01110100000010111, ++ xvsub_b_op = 0b01110100000011000, ++ xvsub_h_op = 0b01110100000011001, ++ xvsub_w_op = 0b01110100000011010, ++ xvsub_d_op = 0b01110100000011011, ++ xvabsd_b_op = 0b01110100011000000, ++ xvabsd_h_op = 0b01110100011000001, ++ xvabsd_w_op = 0b01110100011000010, ++ xvabsd_d_op = 0b01110100011000011, ++ xvmax_b_op = 0b01110100011100000, ++ xvmax_h_op = 0b01110100011100001, ++ xvmax_w_op = 0b01110100011100010, ++ xvmax_d_op = 0b01110100011100011, ++ xvmin_b_op = 0b01110100011100100, ++ xvmin_h_op = 0b01110100011100101, ++ xvmin_w_op = 0b01110100011100110, ++ xvmin_d_op = 0b01110100011100111, ++ xvmul_b_op = 0b01110100100001000, ++ xvmul_h_op = 0b01110100100001001, ++ xvmul_w_op = 0b01110100100001010, ++ xvmul_d_op = 0b01110100100001011, ++ xvmuh_b_op = 0b01110100100001100, ++ xvmuh_h_op = 0b01110100100001101, ++ xvmuh_w_op = 0b01110100100001110, ++ xvmuh_d_op = 0b01110100100001111, ++ xvmuh_bu_op = 0b01110100100010000, ++ xvmuh_hu_op = 0b01110100100010001, ++ xvmuh_wu_op = 0b01110100100010010, ++ xvmuh_du_op = 0b01110100100010011, ++ xvmulwev_h_b_op = 0b01110100100100000, ++ xvmulwev_w_h_op = 0b01110100100100001, ++ xvmulwev_d_w_op = 0b01110100100100010, ++ xvmulwev_q_d_op = 0b01110100100100011, ++ xvmulwod_h_b_op = 0b01110100100100100, ++ xvmulwod_w_h_op = 0b01110100100100101, ++ xvmulwod_d_w_op = 0b01110100100100110, ++ xvmulwod_q_d_op = 0b01110100100100111, ++ xvmadd_b_op = 0b01110100101010000, ++ xvmadd_h_op = 0b01110100101010001, ++ xvmadd_w_op = 0b01110100101010010, ++ xvmadd_d_op = 0b01110100101010011, ++ xvmsub_b_op = 0b01110100101010100, ++ xvmsub_h_op = 0b01110100101010101, ++ xvmsub_w_op = 0b01110100101010110, ++ xvmsub_d_op = 0b01110100101010111, ++ xvsll_b_op = 0b01110100111010000, ++ xvsll_h_op = 0b01110100111010001, ++ xvsll_w_op = 0b01110100111010010, ++ xvsll_d_op = 0b01110100111010011, ++ xvsrl_b_op = 0b01110100111010100, ++ xvsrl_h_op = 0b01110100111010101, ++ xvsrl_w_op = 0b01110100111010110, ++ xvsrl_d_op = 0b01110100111010111, ++ xvsra_b_op = 0b01110100111011000, ++ xvsra_h_op = 0b01110100111011001, ++ xvsra_w_op = 0b01110100111011010, ++ xvsra_d_op = 0b01110100111011011, ++ xvrotr_b_op = 0b01110100111011100, ++ xvrotr_h_op = 0b01110100111011101, ++ xvrotr_w_op = 0b01110100111011110, ++ xvrotr_d_op = 0b01110100111011111, ++ xvbitclr_b_op = 0b01110101000011000, ++ xvbitclr_h_op = 0b01110101000011001, ++ xvbitclr_w_op = 0b01110101000011010, ++ xvbitclr_d_op = 0b01110101000011011, ++ xvbitset_b_op = 0b01110101000011100, ++ xvbitset_h_op = 0b01110101000011101, ++ xvbitset_w_op = 0b01110101000011110, ++ xvbitset_d_op = 0b01110101000011111, ++ xvbitrev_b_op = 0b01110101000100000, ++ xvbitrev_h_op = 0b01110101000100001, ++ xvbitrev_w_op = 0b01110101000100010, ++ xvbitrev_d_op = 0b01110101000100011, ++ xvand_v_op = 0b01110101001001100, ++ xvor_v_op = 0b01110101001001101, ++ xvxor_v_op = 0b01110101001001110, ++ xvnor_v_op = 0b01110101001001111, ++ xvandn_v_op = 0b01110101001010000, ++ xvorn_v_op = 0b01110101001010001, ++ xvadd_q_op = 0b01110101001011010, ++ xvsub_q_op = 0b01110101001011011, ++ xvfadd_s_op = 0b01110101001100001, ++ xvfadd_d_op = 0b01110101001100010, ++ xvfsub_s_op = 0b01110101001100101, ++ xvfsub_d_op = 0b01110101001100110, ++ xvfmul_s_op = 0b01110101001110001, ++ xvfmul_d_op = 0b01110101001110010, ++ xvfdiv_s_op = 0b01110101001110101, ++ xvfdiv_d_op = 0b01110101001110110, ++ xvfmax_s_op = 0b01110101001111001, ++ xvfmax_d_op = 0b01110101001111010, ++ xvfmin_s_op = 0b01110101001111101, ++ xvfmin_d_op = 0b01110101001111110, ++ xvfcvt_h_s_op = 0b01110101010001100, ++ xvfcvt_s_d_op = 0b01110101010001101, ++ xvffint_s_l_op = 0b01110101010010000, ++ xvftint_w_d_op = 0b01110101010010011, ++ xvftintrm_w_d_op = 0b01110101010010100, ++ xvftintrp_w_d_op = 0b01110101010010101, ++ xvftintrz_w_d_op = 0b01110101010010110, ++ xvftintrne_w_d_op = 0b01110101010010111, ++ xvshuf_h_op = 0b01110101011110101, ++ xvshuf_w_op = 0b01110101011110110, ++ xvshuf_d_op = 0b01110101011110111, ++ xvperm_w_op = 0b01110101011111010, ++ xvslti_bu_op = 0b01110110100010000, ++ xvslti_hu_op = 0b01110110100010001, ++ xvslti_wu_op = 0b01110110100010010, ++ xvslti_du_op = 0b01110110100010011, ++ xvaddi_bu_op = 0b01110110100010100, ++ xvaddi_hu_op = 0b01110110100010101, ++ xvaddi_wu_op = 0b01110110100010110, ++ xvaddi_du_op = 0b01110110100010111, ++ xvsubi_bu_op = 0b01110110100011000, ++ xvsubi_hu_op = 0b01110110100011001, ++ xvsubi_wu_op = 0b01110110100011010, ++ xvsubi_du_op = 0b01110110100011011, ++ xvrotri_w_op = 0b01110110101000001, ++ xvbitclri_w_op = 0b01110111000100001, ++ xvbitseti_w_op = 0b01110111000101001, ++ xvbitrevi_w_op = 0b01110111000110001, ++ xvslli_w_op = 0b01110111001011001, ++ xvsrli_w_op = 0b01110111001100001, ++ xvsrai_w_op = 0b01110111001101001, ++ ++ unknow_ops17 = 0b11111111111111111 ++ }; ++ ++ // 16-bit opcode, highest 16 bits: bits[31...16] ++ enum ops16 { ++ vrotri_d_op = 0b0111001010100001, ++ vbitclri_d_op = 0b0111001100010001, ++ vbitseti_d_op = 0b0111001100010101, ++ vbitrevi_d_op = 0b0111001100011001, ++ vslli_d_op = 0b0111001100101101, ++ vsrli_d_op = 0b0111001100110001, ++ vsrai_d_op = 0b0111001100110101, ++ vsrlni_w_d_op = 0b0111001101000001, ++ xvrotri_d_op = 0b0111011010100001, ++ xvbitclri_d_op = 0b0111011100010001, ++ xvbitseti_d_op = 0b0111011100010101, ++ xvbitrevi_d_op = 0b0111011100011001, ++ xvslli_d_op = 0b0111011100101101, ++ xvsrli_d_op = 0b0111011100110001, ++ xvsrai_d_op = 0b0111011100110101, ++ ++ unknow_ops16 = 0b1111111111111111 ++ }; ++ ++ // 15-bit opcode, highest 15 bits: bits[31...17] ++ enum ops15 { ++ vsrlni_d_q_op = 0b011100110100001, ++ ++ unknow_ops15 = 0b111111111111111 ++ }; ++ ++ // 14-bit opcode, highest 14 bits: bits[31...18] ++ enum ops14 { ++ alsl_w_op = 0b00000000000001, ++ bytepick_w_op = 0b00000000000010, ++ bytepick_d_op = 0b00000000000011, ++ alsl_d_op = 0b00000000001011, ++ slli_op = 0b00000000010000, ++ srli_op = 0b00000000010001, ++ srai_op = 0b00000000010010, ++ rotri_op = 0b00000000010011, ++ lddir_op = 0b00000110010000, ++ ldpte_op = 0b00000110010001, ++ vshuf4i_b_op = 0b01110011100100, ++ vshuf4i_h_op = 0b01110011100101, ++ vshuf4i_w_op = 0b01110011100110, ++ vshuf4i_d_op = 0b01110011100111, ++ vandi_b_op = 0b01110011110100, ++ vori_b_op = 0b01110011110101, ++ vxori_b_op = 0b01110011110110, ++ vnori_b_op = 0b01110011110111, ++ vldi_op = 0b01110011111000, ++ vpermi_w_op = 0b01110011111001, ++ xvshuf4i_b_op = 0b01110111100100, ++ xvshuf4i_h_op = 0b01110111100101, ++ xvshuf4i_w_op = 0b01110111100110, ++ xvshuf4i_d_op = 0b01110111100111, ++ xvandi_b_op = 0b01110111110100, ++ xvori_b_op = 0b01110111110101, ++ xvxori_b_op = 0b01110111110110, ++ xvnori_b_op = 0b01110111110111, ++ xvldi_op = 0b01110111111000, ++ xvpermi_w_op = 0b01110111111001, ++ xvpermi_d_op = 0b01110111111010, ++ xvpermi_q_op = 0b01110111111011, ++ ++ unknow_ops14 = 0b11111111111111 ++ }; ++ ++ // 12-bit opcode, highest 12 bits: bits[31...20] ++ enum ops12 { ++ fmadd_s_op = 0b000010000001, ++ fmadd_d_op = 0b000010000010, ++ fmsub_s_op = 0b000010000101, ++ fmsub_d_op = 0b000010000110, ++ fnmadd_s_op = 0b000010001001, ++ fnmadd_d_op = 0b000010001010, ++ fnmsub_s_op = 0b000010001101, ++ fnmsub_d_op = 0b000010001110, ++ vfmadd_s_op = 0b000010010001, ++ vfmadd_d_op = 0b000010010010, ++ vfmsub_s_op = 0b000010010101, ++ vfmsub_d_op = 0b000010010110, ++ vfnmadd_s_op = 0b000010011001, ++ vfnmadd_d_op = 0b000010011010, ++ vfnmsub_s_op = 0b000010011101, ++ vfnmsub_d_op = 0b000010011110, ++ xvfmadd_s_op = 0b000010100001, ++ xvfmadd_d_op = 0b000010100010, ++ xvfmsub_s_op = 0b000010100101, ++ xvfmsub_d_op = 0b000010100110, ++ xvfnmadd_s_op = 0b000010101001, ++ xvfnmadd_d_op = 0b000010101010, ++ xvfnmsub_s_op = 0b000010101101, ++ xvfnmsub_d_op = 0b000010101110, ++ fcmp_cond_s_op = 0b000011000001, ++ fcmp_cond_d_op = 0b000011000010, ++ vfcmp_cond_s_op = 0b000011000101, ++ vfcmp_cond_d_op = 0b000011000110, ++ xvfcmp_cond_s_op = 0b000011001001, ++ xvfcmp_cond_d_op = 0b000011001010, ++ fsel_op = 0b000011010000, ++ vbitsel_v_op = 0b000011010001, ++ xvbitsel_v_op = 0b000011010010, ++ vshuf_b_op = 0b000011010101, ++ xvshuf_b_op = 0b000011010110, ++ ++ unknow_ops12 = 0b111111111111 ++ }; ++ ++ // 10-bit opcode, highest 10 bits: bits[31...22] ++ enum ops10 { ++ bstr_w_op = 0b0000000001, ++ bstrins_d_op = 0b0000000010, ++ bstrpick_d_op = 0b0000000011, ++ slti_op = 0b0000001000, ++ sltui_op = 0b0000001001, ++ addi_w_op = 0b0000001010, ++ addi_d_op = 0b0000001011, ++ lu52i_d_op = 0b0000001100, ++ andi_op = 0b0000001101, ++ ori_op = 0b0000001110, ++ xori_op = 0b0000001111, ++ ld_b_op = 0b0010100000, ++ ld_h_op = 0b0010100001, ++ ld_w_op = 0b0010100010, ++ ld_d_op = 0b0010100011, ++ st_b_op = 0b0010100100, ++ st_h_op = 0b0010100101, ++ st_w_op = 0b0010100110, ++ st_d_op = 0b0010100111, ++ ld_bu_op = 0b0010101000, ++ ld_hu_op = 0b0010101001, ++ ld_wu_op = 0b0010101010, ++ preld_op = 0b0010101011, ++ fld_s_op = 0b0010101100, ++ fst_s_op = 0b0010101101, ++ fld_d_op = 0b0010101110, ++ fst_d_op = 0b0010101111, ++ vld_op = 0b0010110000, ++ vst_op = 0b0010110001, ++ xvld_op = 0b0010110010, ++ xvst_op = 0b0010110011, ++ ldl_w_op = 0b0010111000, ++ ldr_w_op = 0b0010111001, ++ ++ unknow_ops10 = 0b1111111111 ++ }; ++ ++ // 8-bit opcode, highest 8 bits: bits[31...22] ++ enum ops8 { ++ ll_w_op = 0b00100000, ++ sc_w_op = 0b00100001, ++ ll_d_op = 0b00100010, ++ sc_d_op = 0b00100011, ++ ldptr_w_op = 0b00100100, ++ stptr_w_op = 0b00100101, ++ ldptr_d_op = 0b00100110, ++ stptr_d_op = 0b00100111, ++ ++ unknow_ops8 = 0b11111111 ++ }; ++ ++ // 7-bit opcode, highest 7 bits: bits[31...25] ++ enum ops7 { ++ lu12i_w_op = 0b0001010, ++ lu32i_d_op = 0b0001011, ++ pcaddi_op = 0b0001100, ++ pcalau12i_op = 0b0001101, ++ pcaddu12i_op = 0b0001110, ++ pcaddu18i_op = 0b0001111, ++ ++ unknow_ops7 = 0b1111111 ++ }; ++ ++ // 6-bit opcode, highest 6 bits: bits[31...25] ++ enum ops6 { ++ addu16i_d_op = 0b000100, ++ beqz_op = 0b010000, ++ bnez_op = 0b010001, ++ bccondz_op = 0b010010, ++ jirl_op = 0b010011, ++ b_op = 0b010100, ++ bl_op = 0b010101, ++ beq_op = 0b010110, ++ bne_op = 0b010111, ++ blt_op = 0b011000, ++ bge_op = 0b011001, ++ bltu_op = 0b011010, ++ bgeu_op = 0b011011, ++ ++ unknow_ops6 = 0b111111 ++ }; ++ ++ enum fcmp_cond { ++ fcmp_caf = 0x00, ++ fcmp_cun = 0x08, ++ fcmp_ceq = 0x04, ++ fcmp_cueq = 0x0c, ++ fcmp_clt = 0x02, ++ fcmp_cult = 0x0a, ++ fcmp_cle = 0x06, ++ fcmp_cule = 0x0e, ++ fcmp_cne = 0x10, ++ fcmp_cor = 0x14, ++ fcmp_cune = 0x18, ++ fcmp_saf = 0x01, ++ fcmp_sun = 0x09, ++ fcmp_seq = 0x05, ++ fcmp_sueq = 0x0d, ++ fcmp_slt = 0x03, ++ fcmp_sult = 0x0b, ++ fcmp_sle = 0x07, ++ fcmp_sule = 0x0f, ++ fcmp_sne = 0x11, ++ fcmp_sor = 0x15, ++ fcmp_sune = 0x19 ++ }; ++ ++ enum Condition { ++ zero , ++ notZero , ++ equal , ++ notEqual , ++ less , ++ lessEqual , ++ greater , ++ greaterEqual , ++ below , ++ belowEqual , ++ above , ++ aboveEqual ++ }; ++ ++ static const int LogInstructionSize = 2; ++ static const int InstructionSize = 1 << LogInstructionSize; ++ ++ enum WhichOperand { ++ // input to locate_operand, and format code for relocations ++ imm_operand = 0, // embedded 32-bit|64-bit immediate operand ++ disp32_operand = 1, // embedded 32-bit displacement or address ++ call32_operand = 2, // embedded 32-bit self-relative displacement ++ narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop ++ _WhichOperand_limit = 4 ++ }; ++ ++ static int low (int x, int l) { return bitfield(x, 0, l); } ++ static int low16(int x) { return low(x, 16); } ++ static int low26(int x) { return low(x, 26); } ++ ++ static int high (int x, int l) { return bitfield(x, 32-l, l); } ++ static int high16(int x) { return high(x, 16); } ++ static int high6 (int x) { return high(x, 6); } ++ ++ ++ static ALWAYSINLINE void patch(address a, int length, uint32_t val) { ++ guarantee(val < (1ULL << length), "Field too big for insn"); ++ guarantee(length > 0, "length > 0"); ++ unsigned target = *(unsigned *)a; ++ target = (target >> length) << length; ++ target |= val; ++ *(unsigned *)a = target; ++ } ++ ++ protected: ++ // help methods for instruction ejection ++ ++ // 2R-type ++ // 31 10 9 5 4 0 ++ // | opcode | rj | rd | ++ static inline int insn_RR (int op, int rj, int rd) { return (op<<10) | (rj<<5) | rd; } ++ ++ // 3R-type ++ // 31 15 14 10 9 5 4 0 ++ // | opcode | rk | rj | rd | ++ static inline int insn_RRR (int op, int rk, int rj, int rd) { return (op<<15) | (rk<<10) | (rj<<5) | rd; } ++ ++ // 4R-type ++ // 31 20 19 15 14 10 9 5 4 0 ++ // | opcode | ra | rk | rj | rd | ++ static inline int insn_RRRR (int op, int ra, int rk, int rj, int rd) { return (op<<20) | (ra << 15) | (rk<<10) | (rj<<5) | rd; } ++ ++ // 2RI1-type ++ // 31 11 10 9 5 4 0 ++ // | opcode | I1 | vj | rd | ++ static inline int insn_I1RR (int op, int ui1, int vj, int rd) { assert(is_uimm(ui1, 1), "not a unsigned 1-bit int"); return (op<<11) | (low(ui1, 1)<<10) | (vj<<5) | rd; } ++ ++ // 2RI2-type ++ // 31 12 11 10 9 5 4 0 ++ // | opcode | I2 | vj | rd | ++ static inline int insn_I2RR (int op, int ui2, int vj, int rd) { assert(is_uimm(ui2, 2), "not a unsigned 2-bit int"); return (op<<12) | (low(ui2, 2)<<10) | (vj<<5) | rd; } ++ ++ // 2RI3-type ++ // 31 13 12 10 9 5 4 0 ++ // | opcode | I3 | vj | vd | ++ static inline int insn_I3RR (int op, int ui3, int vj, int vd) { assert(is_uimm(ui3, 3), "not a unsigned 3-bit int"); return (op<<13) | (low(ui3, 3)<<10) | (vj<<5) | vd; } ++ ++ // 2RI4-type ++ // 31 14 13 10 9 5 4 0 ++ // | opcode | I4 | vj | vd | ++ static inline int insn_I4RR (int op, int ui4, int vj, int vd) { assert(is_uimm(ui4, 4), "not a unsigned 4-bit int"); return (op<<14) | (low(ui4, 4)<<10) | (vj<<5) | vd; } ++ ++ // 2RI5-type ++ // 31 15 14 10 9 5 4 0 ++ // | opcode | I5 | vj | vd | ++ static inline int insn_I5RR (int op, int ui5, int vj, int vd) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); return (op<<15) | (low(ui5, 5)<<10) | (vj<<5) | vd; } ++ ++ // 2RI6-type ++ // 31 16 15 10 9 5 4 0 ++ // | opcode | I6 | vj | vd | ++ static inline int insn_I6RR (int op, int ui6, int vj, int vd) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); return (op<<16) | (low(ui6, 6)<<10) | (vj<<5) | vd; } ++ ++ // 2RI7-type ++ // 31 17 16 10 9 5 4 0 ++ // | opcode | I7 | vj | vd | ++ static inline int insn_I7RR (int op, int ui7, int vj, int vd) { assert(is_uimm(ui7, 7), "not a unsigned 7-bit int"); return (op<<17) | (low(ui7, 6)<<10) | (vj<<5) | vd; } ++ ++ // 2RI8-type ++ // 31 18 17 10 9 5 4 0 ++ // | opcode | I8 | rj | rd | ++ static inline int insn_I8RR (int op, int imm8, int rj, int rd) { /*assert(is_simm(imm8, 8), "not a signed 8-bit int");*/ return (op<<18) | (low(imm8, 8)<<10) | (rj<<5) | rd; } ++ ++ // 2RI12-type ++ // 31 22 21 10 9 5 4 0 ++ // | opcode | I12 | rj | rd | ++ static inline int insn_I12RR(int op, int imm12, int rj, int rd) { /* assert(is_simm(imm12, 12), "not a signed 12-bit int");*/ return (op<<22) | (low(imm12, 12)<<10) | (rj<<5) | rd; } ++ ++ ++ // 2RI14-type ++ // 31 24 23 10 9 5 4 0 ++ // | opcode | I14 | rj | rd | ++ static inline int insn_I14RR(int op, int imm14, int rj, int rd) { assert(is_simm(imm14, 14), "not a signed 14-bit int"); return (op<<24) | (low(imm14, 14)<<10) | (rj<<5) | rd; } ++ ++ // 2RI16-type ++ // 31 26 25 10 9 5 4 0 ++ // | opcode | I16 | rj | rd | ++ static inline int insn_I16RR(int op, int imm16, int rj, int rd) { assert(is_simm16(imm16), "not a signed 16-bit int"); return (op<<26) | (low16(imm16)<<10) | (rj<<5) | rd; } ++ ++ // 1RI13-type (?) ++ // 31 18 17 5 4 0 ++ // | opcode | I13 | vd | ++ static inline int insn_I13R (int op, int imm13, int vd) { assert(is_simm(imm13, 13), "not a signed 13-bit int"); return (op<<18) | (low(imm13, 13)<<5) | vd; } ++ ++ // 1RI20-type (?) ++ // 31 25 24 5 4 0 ++ // | opcode | I20 | rd | ++ static inline int insn_I20R (int op, int imm20, int rd) { assert(is_simm(imm20, 20), "not a signed 20-bit int"); return (op<<25) | (low(imm20, 20)<<5) | rd; } ++ ++ // 1RI21-type ++ // 31 26 25 10 9 5 4 0 ++ // | opcode | I21[15:0] | rj |I21[20:16]| ++ static inline int insn_IRI(int op, int imm21, int rj) { assert(is_simm(imm21, 21), "not a signed 21-bit int"); return (op << 26) | (low16(imm21) << 10) | (rj << 5) | low(imm21 >> 16, 5); } ++ ++ // I26-type ++ // 31 26 25 10 9 0 ++ // | opcode | I26[15:0] | I26[25:16] | ++ static inline int insn_I26(int op, int imm26) { assert(is_simm(imm26, 26), "not a signed 26-bit int"); return (op << 26) | (low16(imm26) << 10) | low(imm26 >> 16, 10); } ++ ++ // imm15 ++ // 31 15 14 0 ++ // | opcode | I15 | ++ static inline int insn_I15 (int op, int imm15) { assert(is_uimm(imm15, 15), "not a unsigned 15-bit int"); return (op<<15) | low(imm15, 15); } ++ ++ ++ // get the offset field of beq, bne, blt[u], bge[u] instruction ++ int offset16(address entry) { ++ assert(is_simm16((entry - pc()) / 4), "change this code"); ++ if (!is_simm16((entry - pc()) / 4)) { ++ tty->print_cr("!!! is_simm16: %lx", (entry - pc()) / 4); ++ } ++ return (entry - pc()) / 4; ++ } ++ ++ // get the offset field of beqz, bnez instruction ++ int offset21(address entry) { ++ assert(is_simm((int)(entry - pc()) / 4, 21), "change this code"); ++ if (!is_simm((int)(entry - pc()) / 4, 21)) { ++ tty->print_cr("!!! is_simm21: %lx", (entry - pc()) / 4); ++ } ++ return (entry - pc()) / 4; ++ } ++ ++ // get the offset field of b instruction ++ int offset26(address entry) { ++ assert(is_simm((int)(entry - pc()) / 4, 26), "change this code"); ++ if (!is_simm((int)(entry - pc()) / 4, 26)) { ++ tty->print_cr("!!! is_simm26: %lx", (entry - pc()) / 4); ++ } ++ return (entry - pc()) / 4; ++ } ++ ++public: ++ using AbstractAssembler::offset; ++ ++ //sign expand with the sign bit is h ++ static int expand(int x, int h) { return -(x & (1<> 16; ++ } ++ ++ static int split_high16(int x) { ++ return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff; ++ } ++ ++ static int split_low20(int x) { ++ return (x & 0xfffff); ++ } ++ ++ // Convert 20-bit x to a sign-extended 20-bit integer ++ static int simm20(int x) { ++ assert(x == (x & 0xFFFFF), "must be 20-bit only"); ++ return (x << 12) >> 12; ++ } ++ ++ static int split_low12(int x) { ++ return (x & 0xfff); ++ } ++ ++ static inline void split_simm32(jlong si32, jint& si12, jint& si20) { ++ si12 = ((jint)(si32 & 0xfff) << 20) >> 20; ++ si32 += (si32 & 0x800) << 1; ++ si20 = si32 >> 12; ++ } ++ ++ static inline void split_simm38(jlong si38, jint& si18, jint& si20) { ++ si18 = ((jint)(si38 & 0x3ffff) << 14) >> 14; ++ si38 += (si38 & 0x20000) << 1; ++ si20 = si38 >> 18; ++ } ++ ++ // Convert 12-bit x to a sign-extended 12-bit integer ++ static int simm12(int x) { ++ assert(x == (x & 0xFFF), "must be 12-bit only"); ++ return (x << 20) >> 20; ++ } ++ ++ // Convert 26-bit x to a sign-extended 26-bit integer ++ static int simm26(int x) { ++ assert(x == (x & 0x3FFFFFF), "must be 26-bit only"); ++ return (x << 6) >> 6; ++ } ++ ++ static intptr_t merge(intptr_t x0, intptr_t x12) { ++ //lu12i, ori ++ return (((x12 << 12) | x0) << 32) >> 32; ++ } ++ ++ static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32) { ++ //lu32i, lu12i, ori ++ return (((x32 << 32) | (x12 << 12) | x0) << 12) >> 12; ++ } ++ ++ static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32, intptr_t x52) { ++ //lu52i, lu32i, lu12i, ori ++ return (x52 << 52) | (x32 << 32) | (x12 << 12) | x0; ++ } ++ ++ // Test if x is within signed immediate range for nbits. ++ static bool is_simm (int x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 32, "out of bounds"); ++ const int min = -( ((int)1) << nbits-1 ); ++ const int maxplus1 = ( ((int)1) << nbits-1 ); ++ return min <= x && x < maxplus1; ++ } ++ ++ static bool is_simm(jlong x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 64, "out of bounds"); ++ const jlong min = -( ((jlong)1) << nbits-1 ); ++ const jlong maxplus1 = ( ((jlong)1) << nbits-1 ); ++ return min <= x && x < maxplus1; ++ } ++ ++ static bool is_simm16(int x) { return is_simm(x, 16); } ++ static bool is_simm16(long x) { return is_simm((jlong)x, (unsigned int)16); } ++ ++ // Test if x is within unsigned immediate range for nbits ++ static bool is_uimm(int x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 32, "out of bounds"); ++ const int maxplus1 = ( ((int)1) << nbits ); ++ return 0 <= x && x < maxplus1; ++ } ++ ++ static bool is_uimm(jlong x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 64, "out of bounds"); ++ const jlong maxplus1 = ( ((jlong)1) << nbits ); ++ return 0 <= x && x < maxplus1; ++ } ++ ++public: ++ ++ void flush() { ++ AbstractAssembler::flush(); ++ } ++ ++ inline void emit_int32(int x) { ++ AbstractAssembler::emit_int32(x); ++ } ++ ++ inline void emit_data(int x) { emit_int32(x); } ++ inline void emit_data(int x, relocInfo::relocType rtype) { ++ relocate(rtype); ++ emit_int32(x); ++ } ++ ++ inline void emit_data(int x, RelocationHolder const& rspec) { ++ relocate(rspec); ++ emit_int32(x); ++ } ++ ++ ++ // Generic instructions ++ // Does 32bit or 64bit as needed for the platform. In some sense these ++ // belong in macro assembler but there is no need for both varieties to exist ++ ++ void clo_w (Register rd, Register rj) { emit_int32(insn_RR(clo_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void clz_w (Register rd, Register rj) { emit_int32(insn_RR(clz_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void cto_w (Register rd, Register rj) { emit_int32(insn_RR(cto_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void ctz_w (Register rd, Register rj) { emit_int32(insn_RR(ctz_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void clo_d (Register rd, Register rj) { emit_int32(insn_RR(clo_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void clz_d (Register rd, Register rj) { emit_int32(insn_RR(clz_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void cto_d (Register rd, Register rj) { emit_int32(insn_RR(cto_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void ctz_d (Register rd, Register rj) { emit_int32(insn_RR(ctz_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void revb_2h(Register rd, Register rj) { emit_int32(insn_RR(revb_2h_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revb_4h(Register rd, Register rj) { emit_int32(insn_RR(revb_4h_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revb_2w(Register rd, Register rj) { emit_int32(insn_RR(revb_2w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revb_d (Register rd, Register rj) { emit_int32(insn_RR( revb_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revh_2w(Register rd, Register rj) { emit_int32(insn_RR(revh_2w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revh_d (Register rd, Register rj) { emit_int32(insn_RR( revh_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void bitrev_4b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_4b_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void bitrev_8b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_8b_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void bitrev_w (Register rd, Register rj) { emit_int32(insn_RR(bitrev_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void bitrev_d (Register rd, Register rj) { emit_int32(insn_RR(bitrev_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void ext_w_h(Register rd, Register rj) { emit_int32(insn_RR(ext_w_h_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void ext_w_b(Register rd, Register rj) { emit_int32(insn_RR(ext_w_b_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void rdtimel_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimel_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void rdtimeh_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimeh_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void rdtime_d(Register rd, Register rj) { emit_int32(insn_RR(rdtime_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void cpucfg(Register rd, Register rj) { emit_int32(insn_RR(cpucfg_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void asrtle_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtle_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); } ++ void asrtgt_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtgt_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); } ++ ++ void alsl_w(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ void alsl_wu(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_w_op, ( (1 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bytepick_w(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(bytepick_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bytepick_d(Register rd, Register rj, Register rk, int sa3) { assert(is_uimm(sa3, 3), "not a unsigned 3-bit int"); emit_int32(insn_I8RR(bytepick_d_op, ( (sa3 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void add_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void add_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sub_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sub_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void slt (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(slt_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sltu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sltu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void maskeqz (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(maskeqz_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void masknez (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(masknez_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void nor (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(nor_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void AND (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(and_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void OR (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(or_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void XOR (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(xor_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void orn (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(orn_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void andn(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(andn_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void sll_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void srl_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sra_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sll_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void srl_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sra_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void rotr_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void rotr_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void mul_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulh_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulh_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mul_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulh_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulh_du (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulw_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulw_d_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void div_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mod_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void div_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mod_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void div_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mod_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void div_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mod_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void crc_w_b_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_b_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crc_w_h_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_h_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crc_w_w_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_w_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crc_w_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crcc_w_b_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_b_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crcc_w_h_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_h_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crcc_w_w_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_w_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crcc_w_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void brk(int code) { assert(is_uimm(code, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(break_op, code)); } ++ ++ void alsl_d(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_d_op, ( (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void slli_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void slli_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void srli_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void srli_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void srai_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void srai_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void rotri_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void rotri_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void bstrins_w (Register rd, Register rj, int msbw, int lsbw) { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (0<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bstrpick_w (Register rd, Register rj, int msbw, int lsbw) { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (1<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bstrins_d (Register rd, Register rj, int msbd, int lsbd) { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrins_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bstrpick_d (Register rd, Register rj, int msbd, int lsbd) { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrpick_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void fadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmul_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmul_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fdiv_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fdiv_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmax_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmax_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmin_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmin_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmaxa_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmaxa_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmina_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmina_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void fscaleb_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fscaleb_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fcopysign_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fcopysign_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void fabs_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fabs_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fabs_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fabs_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fneg_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fneg_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fneg_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fneg_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void flogb_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(flogb_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void flogb_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(flogb_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fclass_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fclass_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fclass_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fclass_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fsqrt_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fsqrt_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frecip_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frecip_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frecip_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frecip_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frsqrt_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frsqrt_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fmov_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fmov_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fmov_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fmov_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void movgr2fr_w (FloatRegister fd, Register rj) { emit_int32(insn_RR(movgr2fr_w_op, (int)rj->encoding(), (int)fd->encoding())); } ++ void movgr2fr_d (FloatRegister fd, Register rj) { emit_int32(insn_RR(movgr2fr_d_op, (int)rj->encoding(), (int)fd->encoding())); } ++ void movgr2frh_w(FloatRegister fd, Register rj) { emit_int32(insn_RR(movgr2frh_w_op, (int)rj->encoding(), (int)fd->encoding())); } ++ void movfr2gr_s (Register rd, FloatRegister fj) { emit_int32(insn_RR(movfr2gr_s_op, (int)fj->encoding(), (int)rd->encoding())); } ++ void movfr2gr_d (Register rd, FloatRegister fj) { emit_int32(insn_RR(movfr2gr_d_op, (int)fj->encoding(), (int)rd->encoding())); } ++ void movfrh2gr_s(Register rd, FloatRegister fj) { emit_int32(insn_RR(movfrh2gr_s_op, (int)fj->encoding(), (int)rd->encoding())); } ++ void movgr2fcsr (int fcsr, Register rj) { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movgr2fcsr_op, (int)rj->encoding(), fcsr)); } ++ void movfcsr2gr (Register rd, int fcsr) { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movfcsr2gr_op, fcsr, (int)rd->encoding())); } ++ void movfr2cf (ConditionalFlagRegister cd, FloatRegister fj) { emit_int32(insn_RR(movfr2cf_op, (int)fj->encoding(), (int)cd->encoding())); } ++ void movcf2fr (FloatRegister fd, ConditionalFlagRegister cj) { emit_int32(insn_RR(movcf2fr_op, (int)cj->encoding(), (int)fd->encoding())); } ++ void movgr2cf (ConditionalFlagRegister cd, Register rj) { emit_int32(insn_RR(movgr2cf_op, (int)rj->encoding(), (int)cd->encoding())); } ++ void movcf2gr (Register rd, ConditionalFlagRegister cj) { emit_int32(insn_RR(movcf2gr_op, (int)cj->encoding(), (int)rd->encoding())); } ++ ++ void fcvt_s_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fcvt_s_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fcvt_d_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fcvt_d_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void ftintrm_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrm_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrm_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrm_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrp_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrp_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrp_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrp_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrz_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrz_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrz_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrz_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrne_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrne_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrne_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrne_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftint_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftint_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftint_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftint_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ffint_s_w(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_s_w_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ffint_s_l(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_s_l_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ffint_d_w(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_d_w_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ffint_d_l(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_d_l_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frint_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frint_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frint_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frint_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void slti (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(slti_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void sltui (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(sltui_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void addi_w(Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void addi_d(Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void lu52i_d(Register rd, Register rj, int si12) { /*assert(is_simm(si12, 12), "not a signed 12-bit int");*/ emit_int32(insn_I12RR(lu52i_d_op, simm12(si12), (int)rj->encoding(), (int)rd->encoding())); } ++ void andi (Register rd, Register rj, int ui12) { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(andi_op, ui12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ori (Register rd, Register rj, int ui12) { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(ori_op, ui12, (int)rj->encoding(), (int)rd->encoding())); } ++ void xori (Register rd, Register rj, int ui12) { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(xori_op, ui12, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void fmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fnmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fnmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fnmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fnmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void fcmp_caf_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cun_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_ceq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_clt_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cle_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cne_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cor_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_saf_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sun_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_seq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_slt_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sle_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sne_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sor_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ ++ void fcmp_caf_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cun_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_ceq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_clt_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cle_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cne_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cor_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_saf_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sun_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_seq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_slt_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sle_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sne_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sor_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ ++ void fsel (FloatRegister fd, FloatRegister fj, FloatRegister fk, ConditionalFlagRegister ca) { emit_int32(insn_RRRR(fsel_op, (int)ca->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void addu16i_d(Register rj, Register rd, int si16) { assert(is_simm(si16, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(addu16i_d_op, si16, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void lu12i_w(Register rj, int si20) { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu12i_w_op, simm20(si20), (int)rj->encoding())); } ++ void lu32i_d(Register rj, int si20) { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu32i_d_op, simm20(si20), (int)rj->encoding())); } ++ void pcaddi(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddi_op, si20, (int)rj->encoding())); } ++ void pcalau12i(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcalau12i_op, si20, (int)rj->encoding())); } ++ void pcaddu12i(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu12i_op, si20, (int)rj->encoding())); } ++ void pcaddu18i(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu18i_op, si20, (int)rj->encoding())); } ++ ++ void ll_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void sc_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void ll_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void sc_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void ldptr_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void stptr_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void ldptr_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void stptr_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void ld_b (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_b_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_h (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_h_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_d (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void st_b (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_b_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void st_h (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_h_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void st_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void st_d (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_bu (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_bu_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_hu (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_hu_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_wu (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_wu_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void preld (int hint, Register rj, int si12) { assert(is_uimm(hint, 5), "not a unsigned 5-bit int"); assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(preld_op, si12, (int)rj->encoding(), hint)); } ++ void fld_s (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); } ++ void fst_s (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); } ++ void fld_d (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); } ++ void fst_d (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); } ++ void ldl_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldl_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ldr_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldr_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void ldx_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stx_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stx_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stx_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stx_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_bu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_bu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_hu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_hu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fldx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } ++ void fldx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } ++ void fstx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } ++ void fstx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } ++ ++ void ld_b (Register rd, Address src); ++ void ld_bu (Register rd, Address src); ++ void ld_d (Register rd, Address src); ++ void ld_h (Register rd, Address src); ++ void ld_hu (Register rd, Address src); ++ void ll_w (Register rd, Address src); ++ void ll_d (Register rd, Address src); ++ void ld_wu (Register rd, Address src); ++ void ld_w (Register rd, Address src); ++ void st_b (Register rd, Address dst); ++ void st_d (Register rd, Address dst); ++ void st_w (Register rd, Address dst); ++ void sc_w (Register rd, Address dst); ++ void sc_d (Register rd, Address dst); ++ void st_h (Register rd, Address dst); ++ void fld_s (FloatRegister fd, Address src); ++ void fld_d (FloatRegister fd, Address src); ++ void fst_s (FloatRegister fd, Address dst); ++ void fst_d (FloatRegister fd, Address dst); ++ ++ void amswap_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amswap_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amadd_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amadd_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rj->encoding())); } ++ void amand_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amand_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amor_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amor_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amxor_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amxor_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_wu (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_du (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_wu (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_du (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amswap_db_w(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amswap_db_d(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amadd_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amadd_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amand_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amand_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amor_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amor_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amxor_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amxor_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void dbar(int hint) { ++ assert(is_uimm(hint, 15), "not a unsigned 15-bit int"); ++ ++ if (os::is_ActiveCoresMP()) ++ andi(R0, R0, 0); ++ else ++ emit_int32(insn_I15(dbar_op, hint)); ++ } ++ void ibar(int hint) { assert(is_uimm(hint, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(ibar_op, hint)); } ++ ++ void fldgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fldle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fstgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fstgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fstle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fstle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void ldgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void beqz(Register rj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(beqz_op, offs, (int)rj->encoding())); } ++ void bnez(Register rj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bnez_op, offs, (int)rj->encoding())); } ++ void bceqz(ConditionalFlagRegister cj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b00<<3) | (int)cj->encoding()))); } ++ void bcnez(ConditionalFlagRegister cj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b01<<3) | (int)cj->encoding()))); } ++ ++ void jirl(Register rd, Register rj, int offs) { assert(is_simm(offs, 18) && ((offs & 3) == 0), "not a signed 18-bit int"); emit_int32(insn_I16RR(jirl_op, offs >> 2, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void b(int offs) { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(b_op, offs)); } ++ void bl(int offs) { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(bl_op, offs)); } ++ ++ ++ void beq(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(beq_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void bne(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bne_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void blt(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(blt_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void bge(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bge_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void bltu(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bltu_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void bgeu(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bgeu_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void beq (Register rj, Register rd, address entry) { beq (rj, rd, offset16(entry)); } ++ void bne (Register rj, Register rd, address entry) { bne (rj, rd, offset16(entry)); } ++ void blt (Register rj, Register rd, address entry) { blt (rj, rd, offset16(entry)); } ++ void bge (Register rj, Register rd, address entry) { bge (rj, rd, offset16(entry)); } ++ void bltu (Register rj, Register rd, address entry) { bltu (rj, rd, offset16(entry)); } ++ void bgeu (Register rj, Register rd, address entry) { bgeu (rj, rd, offset16(entry)); } ++ void beqz (Register rj, address entry) { beqz (rj, offset21(entry)); } ++ void bnez (Register rj, address entry) { bnez (rj, offset21(entry)); } ++ void b(address entry) { b(offset26(entry)); } ++ void bl(address entry) { bl(offset26(entry)); } ++ void bceqz(ConditionalFlagRegister cj, address entry) { bceqz(cj, offset21(entry)); } ++ void bcnez(ConditionalFlagRegister cj, address entry) { bcnez(cj, offset21(entry)); } ++ ++ void beq (Register rj, Register rd, Label& L) { beq (rj, rd, target(L)); } ++ void bne (Register rj, Register rd, Label& L) { bne (rj, rd, target(L)); } ++ void blt (Register rj, Register rd, Label& L) { blt (rj, rd, target(L)); } ++ void bge (Register rj, Register rd, Label& L) { bge (rj, rd, target(L)); } ++ void bltu (Register rj, Register rd, Label& L) { bltu (rj, rd, target(L)); } ++ void bgeu (Register rj, Register rd, Label& L) { bgeu (rj, rd, target(L)); } ++ void beqz (Register rj, Label& L) { beqz (rj, target(L)); } ++ void bnez (Register rj, Label& L) { bnez (rj, target(L)); } ++ void b(Label& L) { b(target(L)); } ++ void bl(Label& L) { bl(target(L)); } ++ void bceqz(ConditionalFlagRegister cj, Label& L) { bceqz(cj, target(L)); } ++ void bcnez(ConditionalFlagRegister cj, Label& L) { bcnez(cj, target(L)); } ++ ++ typedef enum { ++ // hint[4] ++ Completion = 0, ++ Ordering = (1 << 4), ++ ++ // The bitwise-not of the below constants is corresponding to the hint. This is convenient for OR operation. ++ // hint[3:2] and hint[1:0] ++ LoadLoad = ((1 << 3) | (1 << 1)), ++ LoadStore = ((1 << 3) | (1 << 0)), ++ StoreLoad = ((1 << 2) | (1 << 1)), ++ StoreStore = ((1 << 2) | (1 << 0)), ++ AnyAny = ((3 << 2) | (3 << 0)), ++ } Membar_mask_bits; ++ ++ // Serializes memory and blows flags ++ void membar(Membar_mask_bits hint) { ++ assert((hint & (3 << 0)) != 0, "membar mask unsupported!"); ++ assert((hint & (3 << 2)) != 0, "membar mask unsupported!"); ++ dbar(Ordering | (~hint & 0xf)); ++ } ++ ++ // LSX and LASX ++#define ASSERT_LSX assert(UseLSX, ""); ++#define ASSERT_LASX assert(UseLASX, ""); ++ ++ void vadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vadd_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvadd_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsub_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsub_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vaddi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vaddi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vaddi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vaddi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvaddi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvaddi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvaddi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvaddi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsubi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsubi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsubi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsubi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsubi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsubi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsubi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsubi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vneg_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_b_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vneg_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_h_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vneg_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_w_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vneg_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvneg_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvneg_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvneg_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvneg_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vabsd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vabsd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vabsd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vabsd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvabsd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvabsd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvabsd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvabsd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmax_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmax_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmax_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmax_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmax_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmax_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmin_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmin_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmin_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmin_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmin_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmin_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmul_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmul_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmul_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmul_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmul_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmul_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmuh_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmuh_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmuh_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmuh_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmulwev_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwev_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwev_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwev_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmulwev_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwev_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwev_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwev_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmulwod_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwod_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwod_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwod_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmulwod_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwod_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwod_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwod_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vext2xv_h_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_h_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_w_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_d_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_w_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_d_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vext2xv_hu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_hu_bu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_wu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_bu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_du_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_bu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_wu_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_hu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_du_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_hu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_du_wu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_wu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vldi(FloatRegister vd, int i13) { ASSERT_LSX emit_int32(insn_I13R( vldi_op, i13, (int)vd->encoding())); } ++ void xvldi(FloatRegister xd, int i13) { ASSERT_LASX emit_int32(insn_I13R(xvldi_op, i13, (int)xd->encoding())); } ++ ++ void vand_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vand_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvand_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvand_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vxor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vxor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvxor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvxor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vnor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vnor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvnor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvnor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vandn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vandn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvandn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvandn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vorn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vorn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvorn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvorn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vandi_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vandi_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvandi_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvandi_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vxori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vxori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvxori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvxori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vnori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vnori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvnori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvnori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsll_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsll_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsll_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsll_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsll_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsll_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsll_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsll_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vslli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vslli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vslli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vslli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvslli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvslli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvslli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvslli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsrl_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrl_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrl_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrl_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsrl_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrl_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrl_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrl_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsrli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vsrli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vsrli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsrli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vsrli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsrli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsra_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsra_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsra_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsra_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsra_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsra_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsra_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsra_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsrai_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vsrai_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrai_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vsrai_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrai_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsrai_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrai_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vsrai_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsrai_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrai_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrai_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrai_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrai_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrai_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrai_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrai_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vrotr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvrotr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vrotri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vrotri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vrotri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vrotri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vrotri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvrotri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvrotri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvrotri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvrotri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvrotri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsrlni_b_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vsrlni_b_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrlni_h_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsrlni_h_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrlni_w_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vsrlni_w_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrlni_d_q(FloatRegister vd, FloatRegister vj, int ui7) { ASSERT_LSX emit_int32(insn_I7RR( vsrlni_d_q_op, ui7, (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void vpcnt_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_b_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vpcnt_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_h_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vpcnt_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_w_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vpcnt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvpcnt_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvpcnt_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvpcnt_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvpcnt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitclr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitclr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitclri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vbitclri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vbitclri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vbitclri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vbitclri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitclri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitclri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitclri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitclri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitclri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitset_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitset_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitset_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitset_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitset_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitset_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitset_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitset_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitseti_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vbitseti_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitseti_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vbitseti_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitseti_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vbitseti_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitseti_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vbitseti_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitseti_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitseti_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitseti_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitseti_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitseti_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitseti_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitseti_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitseti_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitrev_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrev_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrev_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrev_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitrev_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrev_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrev_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrev_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitrevi_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vbitrevi_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrevi_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vbitrevi_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrevi_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vbitrevi_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrevi_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vbitrevi_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitrevi_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitrevi_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrevi_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitrevi_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrevi_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitrevi_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrevi_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitrevi_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfadd_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfsub_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmul_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmul_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmul_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfdiv_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfdiv_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfdiv_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfdiv_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfdiv_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfdiv_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfnmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfnmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfnmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfnmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfnmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfnmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfnmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfnmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmax_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmax_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmax_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmin_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmin_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmin_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfclass_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfclass_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfclass_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfclass_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfclass_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfclass_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfsqrt_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfsqrt_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfsqrt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfsqrt_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfsqrt_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfsqrt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfcvtl_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvtl_s_h_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vfcvtl_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvtl_d_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvfcvtl_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_s_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcvtl_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_d_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfcvth_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvth_s_h_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vfcvth_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvth_d_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvfcvth_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_s_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcvth_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_d_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfcvt_h_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfcvt_h_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcvt_s_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfcvt_s_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfcvt_h_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_h_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcvt_s_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_s_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrintrne_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrne_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrintrne_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrne_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrintrne_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrintrne_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrintrz_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrz_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrintrz_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrz_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrintrz_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrintrz_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrintrp_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrp_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrintrp_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrp_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrintrp_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrintrp_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrintrm_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrm_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrintrm_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrm_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrintrm_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrintrm_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrint_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrint_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrint_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrint_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrint_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrint_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrne_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrne_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftintrne_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrne_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrne_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftintrne_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrz_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrz_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftintrz_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrz_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrz_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftintrz_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrp_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrp_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftintrp_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrp_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrp_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftintrp_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrm_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrm_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftintrm_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrm_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrm_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftintrm_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftint_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftint_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftint_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftint_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftint_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftint_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrne_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrne_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftintrne_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrne_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrz_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrz_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftintrz_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrz_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrp_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrp_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftintrp_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrp_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrm_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrm_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftintrm_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrm_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftint_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftint_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftint_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftint_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrnel_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrnel_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrnel_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrnel_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrneh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrneh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrneh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrneh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrzl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrzl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrzl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrzh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrzh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrzh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrpl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrpl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrpl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrpl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrph_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrph_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrph_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrph_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrml_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrml_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrml_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrml_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrmh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrmh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrmh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrmh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftinth_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftinth_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftinth_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftinth_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vffint_s_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffint_s_w_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vffint_d_l(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffint_d_l_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvffint_s_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_s_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvffint_d_l(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_d_l_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vffint_s_l(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vffint_s_l_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvffint_s_l(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvffint_s_l_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vffintl_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffintl_d_w_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvffintl_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffintl_d_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vffinth_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffinth_d_w_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvffinth_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffinth_d_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vseq_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vseq_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vseq_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vseq_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvseq_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvseq_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvseq_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvseq_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsle_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsle_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsle_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsle_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vslt_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvslt_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vslt_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvslt_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vslti_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslti_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslti_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslti_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvslti_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslti_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslti_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslti_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfcmp_caf_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cun_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_ceq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_clt_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cle_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cne_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cor_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_saf_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sun_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_seq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_slt_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sle_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sne_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sor_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void vfcmp_caf_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cun_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_ceq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_clt_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cle_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cne_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cor_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_saf_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sun_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_seq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_slt_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sle_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sne_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sor_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void xvfcmp_caf_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cun_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_ceq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_clt_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cle_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cne_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cor_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_saf_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sun_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_seq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_slt_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sle_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sne_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sor_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvfcmp_caf_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cun_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_ceq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_clt_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cle_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cne_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cor_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_saf_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sun_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_seq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_slt_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sle_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sne_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sor_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitsel_v(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vbitsel_v_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitsel_v(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvbitsel_v_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vinsgr2vr_b(FloatRegister vd, Register rj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vinsgr2vr_b_op, ui4, (int)rj->encoding(), (int)vd->encoding())); } ++ void vinsgr2vr_h(FloatRegister vd, Register rj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vinsgr2vr_h_op, ui3, (int)rj->encoding(), (int)vd->encoding())); } ++ void vinsgr2vr_w(FloatRegister vd, Register rj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR( vinsgr2vr_w_op, ui2, (int)rj->encoding(), (int)vd->encoding())); } ++ void vinsgr2vr_d(FloatRegister vd, Register rj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR( vinsgr2vr_d_op, ui1, (int)rj->encoding(), (int)vd->encoding())); } ++ ++ void xvinsgr2vr_w(FloatRegister xd, Register rj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsgr2vr_w_op, ui3, (int)rj->encoding(), (int)xd->encoding())); } ++ void xvinsgr2vr_d(FloatRegister xd, Register rj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsgr2vr_d_op, ui2, (int)rj->encoding(), (int)xd->encoding())); } ++ ++ void vpickve2gr_b(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vpickve2gr_b_op, ui4, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_h(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vpickve2gr_h_op, ui3, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_w(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR( vpickve2gr_w_op, ui2, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_d(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR( vpickve2gr_d_op, ui1, (int)vj->encoding(), (int)rd->encoding())); } ++ ++ void vpickve2gr_bu(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vpickve2gr_bu_op, ui4, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_hu(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vpickve2gr_hu_op, ui3, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_wu(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR( vpickve2gr_wu_op, ui2, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_du(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR( vpickve2gr_du_op, ui1, (int)vj->encoding(), (int)rd->encoding())); } ++ ++ void xvpickve2gr_w(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_w_op, ui3, (int)xj->encoding(), (int)rd->encoding())); } ++ void xvpickve2gr_d(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_d_op, ui2, (int)xj->encoding(), (int)rd->encoding())); } ++ ++ void xvpickve2gr_wu(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_wu_op, ui3, (int)xj->encoding(), (int)rd->encoding())); } ++ void xvpickve2gr_du(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_du_op, ui2, (int)xj->encoding(), (int)rd->encoding())); } ++ ++ void vreplgr2vr_b(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_b_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vreplgr2vr_h(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_h_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vreplgr2vr_w(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_w_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vreplgr2vr_d(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvreplgr2vr_b(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_b_op, (int)rj->encoding(), (int)xd->encoding())); } ++ void xvreplgr2vr_h(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_h_op, (int)rj->encoding(), (int)xd->encoding())); } ++ void xvreplgr2vr_w(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_w_op, (int)rj->encoding(), (int)xd->encoding())); } ++ void xvreplgr2vr_d(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_d_op, (int)rj->encoding(), (int)xd->encoding())); } ++ ++ void vreplvei_b(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR(vreplvei_b_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vreplvei_h(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR(vreplvei_h_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vreplvei_w(FloatRegister vd, FloatRegister vj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR(vreplvei_w_op, ui2, (int)vj->encoding(), (int)vd->encoding())); } ++ void vreplvei_d(FloatRegister vd, FloatRegister vj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR(vreplvei_d_op, ui1, (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void xvreplve0_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvreplve0_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvreplve0_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvreplve0_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvreplve0_q(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_q_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvinsve0_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsve0_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvinsve0_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsve0_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvpickve_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvpickve_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vshuf_b(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vshuf_b_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvshuf_b(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvshuf_b_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vshuf_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vshuf_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vshuf_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vshuf_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vshuf_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vshuf_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void xvshuf_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvshuf_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvshuf_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvperm_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvperm_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vshuf4i_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void vshuf4i_h(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_h_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void vshuf4i_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvshuf4i_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvshuf4i_h(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_h_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvshuf4i_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vshuf4i_d(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_d_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvshuf4i_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vpermi_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vpermi_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvpermi_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvpermi_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvpermi_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvpermi_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvpermi_q(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvpermi_q_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vld(FloatRegister vd, Register rj, int si12) { ASSERT_LSX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vld_op, si12, (int)rj->encoding(), (int)vd->encoding()));} ++ void xvld(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvld_op, si12, (int)rj->encoding(), (int)xd->encoding()));} ++ ++ void vst(FloatRegister vd, Register rj, int si12) { ASSERT_LSX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vst_op, si12, (int)rj->encoding(), (int)vd->encoding()));} ++ void xvst(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvst_op, si12, (int)rj->encoding(), (int)xd->encoding()));} ++ ++ void vldx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX emit_int32(insn_RRR( vldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); } ++ void xvldx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); } ++ ++ void vstx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX emit_int32(insn_RRR( vstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); } ++ void xvstx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); } ++ ++#undef ASSERT_LSX ++#undef ASSERT_LASX ++ ++public: ++ // Creation ++ Assembler(CodeBuffer* code) : AbstractAssembler(code) {} ++ ++ // Decoding ++ static address locate_operand(address inst, WhichOperand which); ++ static address locate_next_instruction(address inst); ++}; ++ ++#endif // CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp +new file mode 100644 +index 0000000000..9ca0cd4504 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp +@@ -0,0 +1,33 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "asm/codeBuffer.hpp" ++#include "code/codeCache.hpp" ++ ++#endif // CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP +diff --git a/src/hotspot/cpu/loongarch/bytes_loongarch.hpp b/src/hotspot/cpu/loongarch/bytes_loongarch.hpp +new file mode 100644 +index 0000000000..c15344eb39 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/bytes_loongarch.hpp +@@ -0,0 +1,73 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_BYTES_LOONGARCH_HPP ++#define CPU_LOONGARCH_BYTES_LOONGARCH_HPP ++ ++#include "memory/allocation.hpp" ++ ++class Bytes: AllStatic { ++ public: ++ // Returns true if the byte ordering used by Java is different from the native byte ordering ++ // of the underlying machine. For example, this is true for Intel x86, but false for Solaris ++ // on Sparc. ++ // we use LoongArch, so return true ++ static inline bool is_Java_byte_ordering_different(){ return true; } ++ ++ ++ // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering ++ // (no special code is needed since LoongArch CPUs can access unaligned data) ++ static inline u2 get_native_u2(address p) { return *(u2*)p; } ++ static inline u4 get_native_u4(address p) { return *(u4*)p; } ++ static inline u8 get_native_u8(address p) { return *(u8*)p; } ++ ++ static inline void put_native_u2(address p, u2 x) { *(u2*)p = x; } ++ static inline void put_native_u4(address p, u4 x) { *(u4*)p = x; } ++ static inline void put_native_u8(address p, u8 x) { *(u8*)p = x; } ++ ++ ++ // Efficient reading and writing of unaligned unsigned data in Java ++ // byte ordering (i.e. big-endian ordering). Byte-order reversal is ++ // needed since LoongArch64 CPUs use little-endian format. ++ static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } ++ static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } ++ static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } ++ ++ static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); } ++ static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); } ++ static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } ++ ++ ++ // Efficient swapping of byte ordering ++ static inline u2 swap_u2(u2 x); // compiler-dependent implementation ++ static inline u4 swap_u4(u4 x); // compiler-dependent implementation ++ static inline u8 swap_u8(u8 x); ++}; ++ ++ ++// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base] ++#include OS_CPU_HEADER_INLINE(bytes) ++ ++#endif // CPU_LOONGARCH_BYTES_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp +new file mode 100644 +index 0000000000..c0eeb63962 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp +@@ -0,0 +1,344 @@ ++/* ++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "c1/c1_CodeStubs.hpp" ++#include "c1/c1_FrameMap.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "classfile/javaClasses.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#define __ ce->masm()-> ++ ++void CounterOverflowStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ Metadata *m = _method->as_constant_ptr()->as_metadata(); ++ __ mov_metadata(SCR2, m); ++ ce->store_parameter(SCR2, 1); ++ ce->store_parameter(_bci, 0); ++ __ call(Runtime1::entry_for(Runtime1::counter_overflow_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ __ b(_continuation); ++} ++ ++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) ++ : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) { ++ assert(info != NULL, "must have info"); ++ _info = new CodeEmitInfo(info); ++} ++ ++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) ++ : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) { ++ assert(info != NULL, "must have info"); ++ _info = new CodeEmitInfo(info); ++} ++ ++void RangeCheckStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ if (_info->deoptimize_on_exception()) { ++ address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); ++ __ call(a, relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++ return; ++ } ++ ++ if (_index->is_cpu_register()) { ++ __ move(SCR1, _index->as_register()); ++ } else { ++ __ li(SCR1, _index->as_jint()); ++ } ++ Runtime1::StubID stub_id; ++ if (_throw_index_out_of_bounds_exception) { ++ stub_id = Runtime1::throw_index_exception_id; ++ } else { ++ assert(_array != NULL, "sanity"); ++ __ move(SCR2, _array->as_pointer_register()); ++ stub_id = Runtime1::throw_range_check_failed_id; ++ } ++ __ call(Runtime1::entry_for(stub_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { ++ _info = new CodeEmitInfo(info); ++} ++ ++void PredicateFailedStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); ++ __ call(a, relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++void DivByZeroStub::emit_code(LIR_Assembler* ce) { ++ if (_offset != -1) { ++ ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); ++ } ++ __ bind(_entry); ++ __ call(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++#ifdef ASSERT ++ __ should_not_reach_here(); ++#endif ++} ++ ++// Implementation of NewInstanceStub ++ ++NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, ++ CodeEmitInfo* info, Runtime1::StubID stub_id) { ++ _result = result; ++ _klass = klass; ++ _klass_reg = klass_reg; ++ _info = new CodeEmitInfo(info); ++ assert(stub_id == Runtime1::new_instance_id || ++ stub_id == Runtime1::fast_new_instance_id || ++ stub_id == Runtime1::fast_new_instance_init_check_id, ++ "need new_instance id"); ++ _stub_id = stub_id; ++} ++ ++void NewInstanceStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ __ move(A3, _klass_reg->as_register()); ++ __ call(Runtime1::entry_for(_stub_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ assert(_result->as_register() == A0, "result must in A0"); ++ __ b(_continuation); ++} ++ ++// Implementation of NewTypeArrayStub ++ ++NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, ++ CodeEmitInfo* info) { ++ _klass_reg = klass_reg; ++ _length = length; ++ _result = result; ++ _info = new CodeEmitInfo(info); ++} ++ ++void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ assert(_length->as_register() == S0, "length must in S0,"); ++ assert(_klass_reg->as_register() == A3, "klass_reg must in A3"); ++ __ call(Runtime1::entry_for(Runtime1::new_type_array_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ assert(_result->as_register() == A0, "result must in A0"); ++ __ b(_continuation); ++} ++ ++// Implementation of NewObjectArrayStub ++ ++NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, ++ CodeEmitInfo* info) { ++ _klass_reg = klass_reg; ++ _result = result; ++ _length = length; ++ _info = new CodeEmitInfo(info); ++} ++ ++void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ assert(_length->as_register() == S0, "length must in S0,"); ++ assert(_klass_reg->as_register() == A3, "klass_reg must in A3"); ++ __ call(Runtime1::entry_for(Runtime1::new_object_array_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ assert(_result->as_register() == A0, "result must in A0"); ++ __ b(_continuation); ++} ++ ++// Implementation of MonitorAccessStubs ++ ++MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) ++ : MonitorAccessStub(obj_reg, lock_reg) { ++ _info = new CodeEmitInfo(info); ++} ++ ++void MonitorEnterStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ ce->store_parameter(_obj_reg->as_register(), 1); ++ ce->store_parameter(_lock_reg->as_register(), 0); ++ Runtime1::StubID enter_id; ++ if (ce->compilation()->has_fpu_code()) { ++ enter_id = Runtime1::monitorenter_id; ++ } else { ++ enter_id = Runtime1::monitorenter_nofpu_id; ++ } ++ __ call(Runtime1::entry_for(enter_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ __ b(_continuation); ++} ++ ++void MonitorExitStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ if (_compute_lock) { ++ // lock_reg was destroyed by fast unlocking attempt => recompute it ++ ce->monitor_address(_monitor_ix, _lock_reg); ++ } ++ ce->store_parameter(_lock_reg->as_register(), 0); ++ // note: non-blocking leaf routine => no call info needed ++ Runtime1::StubID exit_id; ++ if (ce->compilation()->has_fpu_code()) { ++ exit_id = Runtime1::monitorexit_id; ++ } else { ++ exit_id = Runtime1::monitorexit_nofpu_id; ++ } ++ __ lipc(RA, _continuation); ++ __ jmp(Runtime1::entry_for(exit_id), relocInfo::runtime_call_type); ++} ++ ++// Implementation of patching: ++// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes) ++// - Replace original code with a call to the stub ++// At Runtime: ++// - call to stub, jump to runtime ++// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object) ++// - in runtime: after initializing class, restore original code, reexecute instruction ++ ++int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size; ++ ++void PatchingStub::align_patch_site(MacroAssembler* masm) { ++} ++ ++void PatchingStub::emit_code(LIR_Assembler* ce) { ++ assert(false, "LoongArch64 should not use C1 runtime patching"); ++} ++ ++void DeoptimizeStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ ce->store_parameter(_trap_request, 0); ++ __ call(Runtime1::entry_for(Runtime1::deoptimize_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ DEBUG_ONLY(__ should_not_reach_here()); ++} ++ ++void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { ++ address a; ++ if (_info->deoptimize_on_exception()) { ++ // Deoptimize, do not throw the exception, because it is probably wrong to do it here. ++ a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); ++ } else { ++ a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id); ++ } ++ ++ ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); ++ __ bind(_entry); ++ __ call(a, relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ ++ __ bind(_entry); ++ // pass the object in a scratch register because all other registers ++ // must be preserved ++ if (_obj->is_cpu_register()) { ++ __ move(SCR1, _obj->as_register()); ++ } ++ __ call(Runtime1::entry_for(_stub), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++void ArrayCopyStub::emit_code(LIR_Assembler* ce) { ++ //---------------slow case: call to native----------------- ++ __ bind(_entry); ++ // Figure out where the args should go ++ // This should really convert the IntrinsicID to the Method* and signature ++ // but I don't know how to do that. ++ // ++ VMRegPair args[5]; ++ BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT}; ++ SharedRuntime::java_calling_convention(signature, args, 5, true); ++ ++ // push parameters ++ // (src, src_pos, dest, destPos, length) ++ Register r[5]; ++ r[0] = src()->as_register(); ++ r[1] = src_pos()->as_register(); ++ r[2] = dst()->as_register(); ++ r[3] = dst_pos()->as_register(); ++ r[4] = length()->as_register(); ++ ++ // next registers will get stored on the stack ++ for (int i = 0; i < 5 ; i++ ) { ++ VMReg r_1 = args[i].first(); ++ if (r_1->is_stack()) { ++ int st_off = r_1->reg2stack() * wordSize; ++ __ stptr_d (r[i], SP, st_off); ++ } else { ++ assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg "); ++ } ++ } ++ ++ ce->align_call(lir_static_call); ++ ++ ce->emit_static_call_stub(); ++ if (ce->compilation()->bailed_out()) { ++ return; // CodeCache is full ++ } ++ AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(), ++ relocInfo::static_call_type); ++ address call = __ trampoline_call(resolve); ++ if (call == NULL) { ++ ce->bailout("trampoline stub overflow"); ++ return; ++ } ++ ce->add_call_info_here(info()); ++ ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ __ li(SCR2, (address)&Runtime1::_arraycopy_slowcase_cnt); ++ __ increment(Address(SCR2)); ++ } ++#endif ++ ++ __ b(_continuation); ++} ++ ++#undef __ +diff --git a/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp +new file mode 100644 +index 0000000000..1140e44431 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp +@@ -0,0 +1,79 @@ ++/* ++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP ++ ++// native word offsets from memory address (little endian) ++enum { ++ pd_lo_word_offset_in_bytes = 0, ++ pd_hi_word_offset_in_bytes = BytesPerWord ++}; ++ ++// explicit rounding operations are required to implement the strictFP mode ++enum { ++ pd_strict_fp_requires_explicit_rounding = false ++}; ++ ++// FIXME: There are no callee-saved ++ ++// registers ++enum { ++ pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers, // number of registers used during code emission ++ pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers, // number of registers used during code emission ++ ++ pd_nof_caller_save_cpu_regs_frame_map = 15, // number of registers killed by calls ++ pd_nof_caller_save_fpu_regs_frame_map = 32, // number of registers killed by calls ++ ++ pd_first_callee_saved_reg = pd_nof_caller_save_cpu_regs_frame_map, ++ pd_last_callee_saved_reg = 21, ++ ++ pd_last_allocatable_cpu_reg = pd_nof_caller_save_cpu_regs_frame_map - 1, ++ ++ pd_nof_cpu_regs_reg_alloc = pd_nof_caller_save_cpu_regs_frame_map, // number of registers that are visible to register allocator ++ pd_nof_fpu_regs_reg_alloc = 32, // number of registers that are visible to register allocator ++ ++ pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan ++ pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of registers visible to linear scan ++ pd_nof_xmm_regs_linearscan = 0, // don't have vector registers ++ pd_first_cpu_reg = 0, ++ pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1, ++ pd_first_byte_reg = 0, ++ pd_last_byte_reg = pd_nof_cpu_regs_reg_alloc - 1, ++ pd_first_fpu_reg = pd_nof_cpu_regs_frame_map, ++ pd_last_fpu_reg = pd_first_fpu_reg + 31, ++ ++ pd_first_callee_saved_fpu_reg = 24 + pd_first_fpu_reg, ++ pd_last_callee_saved_fpu_reg = 31 + pd_first_fpu_reg, ++}; ++ ++// Encoding of float value in debug info. This is true on x86 where ++// floats are extended to doubles when stored in the stack, false for ++// LoongArch64 where floats and doubles are stored in their native form. ++enum { ++ pd_float_saved_as_double = false ++}; ++ ++#endif // CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp +new file mode 100644 +index 0000000000..bd8578c72a +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp +@@ -0,0 +1,32 @@ ++/* ++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP ++ ++// No FPU stack on LoongArch ++class FpuStackSim; ++ ++#endif // CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp +new file mode 100644 +index 0000000000..1a89c437a8 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++//-------------------------------------------------------- ++// FpuStackSim ++//-------------------------------------------------------- ++ ++// No FPU stack on LoongArch64 ++#include "precompiled.hpp" +diff --git a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp +new file mode 100644 +index 0000000000..4f0cf05361 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp +@@ -0,0 +1,143 @@ ++/* ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP ++ ++// On LoongArch64 the frame looks as follows: ++// ++// +-----------------------------+---------+----------------------------------------+----------------+----------- ++// | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling . ++// +-----------------------------+---------+----------------------------------------+----------------+----------- ++ ++ public: ++ static const int pd_c_runtime_reserved_arg_size; ++ ++ enum { ++ first_available_sp_in_frame = 0, ++ frame_pad_in_bytes = 16, ++ nof_reg_args = 8 ++ }; ++ ++ public: ++ static LIR_Opr receiver_opr; ++ ++ static LIR_Opr r0_opr; ++ static LIR_Opr ra_opr; ++ static LIR_Opr tp_opr; ++ static LIR_Opr sp_opr; ++ static LIR_Opr a0_opr; ++ static LIR_Opr a1_opr; ++ static LIR_Opr a2_opr; ++ static LIR_Opr a3_opr; ++ static LIR_Opr a4_opr; ++ static LIR_Opr a5_opr; ++ static LIR_Opr a6_opr; ++ static LIR_Opr a7_opr; ++ static LIR_Opr t0_opr; ++ static LIR_Opr t1_opr; ++ static LIR_Opr t2_opr; ++ static LIR_Opr t3_opr; ++ static LIR_Opr t4_opr; ++ static LIR_Opr t5_opr; ++ static LIR_Opr t6_opr; ++ static LIR_Opr t7_opr; ++ static LIR_Opr t8_opr; ++ static LIR_Opr rx_opr; ++ static LIR_Opr fp_opr; ++ static LIR_Opr s0_opr; ++ static LIR_Opr s1_opr; ++ static LIR_Opr s2_opr; ++ static LIR_Opr s3_opr; ++ static LIR_Opr s4_opr; ++ static LIR_Opr s5_opr; ++ static LIR_Opr s6_opr; ++ static LIR_Opr s7_opr; ++ static LIR_Opr s8_opr; ++ ++ static LIR_Opr ra_oop_opr; ++ static LIR_Opr a0_oop_opr; ++ static LIR_Opr a1_oop_opr; ++ static LIR_Opr a2_oop_opr; ++ static LIR_Opr a3_oop_opr; ++ static LIR_Opr a4_oop_opr; ++ static LIR_Opr a5_oop_opr; ++ static LIR_Opr a6_oop_opr; ++ static LIR_Opr a7_oop_opr; ++ static LIR_Opr t0_oop_opr; ++ static LIR_Opr t1_oop_opr; ++ static LIR_Opr t2_oop_opr; ++ static LIR_Opr t3_oop_opr; ++ static LIR_Opr t4_oop_opr; ++ static LIR_Opr t5_oop_opr; ++ static LIR_Opr t6_oop_opr; ++ static LIR_Opr t7_oop_opr; ++ static LIR_Opr t8_oop_opr; ++ static LIR_Opr fp_oop_opr; ++ static LIR_Opr s0_oop_opr; ++ static LIR_Opr s1_oop_opr; ++ static LIR_Opr s2_oop_opr; ++ static LIR_Opr s3_oop_opr; ++ static LIR_Opr s4_oop_opr; ++ static LIR_Opr s5_oop_opr; ++ static LIR_Opr s6_oop_opr; ++ static LIR_Opr s7_oop_opr; ++ static LIR_Opr s8_oop_opr; ++ ++ static LIR_Opr scr1_opr; ++ static LIR_Opr scr2_opr; ++ static LIR_Opr scr1_long_opr; ++ static LIR_Opr scr2_long_opr; ++ ++ static LIR_Opr a0_metadata_opr; ++ static LIR_Opr a1_metadata_opr; ++ static LIR_Opr a2_metadata_opr; ++ static LIR_Opr a3_metadata_opr; ++ static LIR_Opr a4_metadata_opr; ++ static LIR_Opr a5_metadata_opr; ++ ++ static LIR_Opr long0_opr; ++ static LIR_Opr long1_opr; ++ static LIR_Opr fpu0_float_opr; ++ static LIR_Opr fpu0_double_opr; ++ ++ static LIR_Opr as_long_opr(Register r) { ++ return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); ++ } ++ static LIR_Opr as_pointer_opr(Register r) { ++ return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); ++ } ++ ++ // VMReg name for spilled physical FPU stack slot n ++ static VMReg fpu_regname (int n); ++ ++ static bool is_caller_save_register(LIR_Opr opr) { return true; } ++ static bool is_caller_save_register(Register r) { return true; } ++ ++ static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; } ++ static int last_cpu_reg() { return pd_last_cpu_reg; } ++ static int last_byte_reg() { return pd_last_byte_reg; } ++ ++#endif // CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp +new file mode 100644 +index 0000000000..3b60899071 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp +@@ -0,0 +1,354 @@ ++/* ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "c1/c1_FrameMap.hpp" ++#include "c1/c1_LIR.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) { ++ LIR_Opr opr = LIR_OprFact::illegalOpr; ++ VMReg r_1 = reg->first(); ++ VMReg r_2 = reg->second(); ++ if (r_1->is_stack()) { ++ // Convert stack slot to an SP offset ++ // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value ++ // so we must add it in here. ++ int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; ++ opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type)); ++ } else if (r_1->is_Register()) { ++ Register reg = r_1->as_Register(); ++ if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) { ++ Register reg2 = r_2->as_Register(); ++ assert(reg2 == reg, "must be same register"); ++ opr = as_long_opr(reg); ++ } else if (is_reference_type(type)) { ++ opr = as_oop_opr(reg); ++ } else if (type == T_METADATA) { ++ opr = as_metadata_opr(reg); ++ } else if (type == T_ADDRESS) { ++ opr = as_address_opr(reg); ++ } else { ++ opr = as_opr(reg); ++ } ++ } else if (r_1->is_FloatRegister()) { ++ assert(type == T_DOUBLE || type == T_FLOAT, "wrong type"); ++ int num = r_1->as_FloatRegister()->encoding(); ++ if (type == T_FLOAT) { ++ opr = LIR_OprFact::single_fpu(num); ++ } else { ++ opr = LIR_OprFact::double_fpu(num); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ return opr; ++} ++ ++LIR_Opr FrameMap::r0_opr; ++LIR_Opr FrameMap::ra_opr; ++LIR_Opr FrameMap::tp_opr; ++LIR_Opr FrameMap::sp_opr; ++LIR_Opr FrameMap::a0_opr; ++LIR_Opr FrameMap::a1_opr; ++LIR_Opr FrameMap::a2_opr; ++LIR_Opr FrameMap::a3_opr; ++LIR_Opr FrameMap::a4_opr; ++LIR_Opr FrameMap::a5_opr; ++LIR_Opr FrameMap::a6_opr; ++LIR_Opr FrameMap::a7_opr; ++LIR_Opr FrameMap::t0_opr; ++LIR_Opr FrameMap::t1_opr; ++LIR_Opr FrameMap::t2_opr; ++LIR_Opr FrameMap::t3_opr; ++LIR_Opr FrameMap::t4_opr; ++LIR_Opr FrameMap::t5_opr; ++LIR_Opr FrameMap::t6_opr; ++LIR_Opr FrameMap::t7_opr; ++LIR_Opr FrameMap::t8_opr; ++LIR_Opr FrameMap::rx_opr; ++LIR_Opr FrameMap::fp_opr; ++LIR_Opr FrameMap::s0_opr; ++LIR_Opr FrameMap::s1_opr; ++LIR_Opr FrameMap::s2_opr; ++LIR_Opr FrameMap::s3_opr; ++LIR_Opr FrameMap::s4_opr; ++LIR_Opr FrameMap::s5_opr; ++LIR_Opr FrameMap::s6_opr; ++LIR_Opr FrameMap::s7_opr; ++LIR_Opr FrameMap::s8_opr; ++ ++LIR_Opr FrameMap::receiver_opr; ++ ++LIR_Opr FrameMap::ra_oop_opr; ++LIR_Opr FrameMap::a0_oop_opr; ++LIR_Opr FrameMap::a1_oop_opr; ++LIR_Opr FrameMap::a2_oop_opr; ++LIR_Opr FrameMap::a3_oop_opr; ++LIR_Opr FrameMap::a4_oop_opr; ++LIR_Opr FrameMap::a5_oop_opr; ++LIR_Opr FrameMap::a6_oop_opr; ++LIR_Opr FrameMap::a7_oop_opr; ++LIR_Opr FrameMap::t0_oop_opr; ++LIR_Opr FrameMap::t1_oop_opr; ++LIR_Opr FrameMap::t2_oop_opr; ++LIR_Opr FrameMap::t3_oop_opr; ++LIR_Opr FrameMap::t4_oop_opr; ++LIR_Opr FrameMap::t5_oop_opr; ++LIR_Opr FrameMap::t6_oop_opr; ++LIR_Opr FrameMap::t7_oop_opr; ++LIR_Opr FrameMap::t8_oop_opr; ++LIR_Opr FrameMap::fp_oop_opr; ++LIR_Opr FrameMap::s0_oop_opr; ++LIR_Opr FrameMap::s1_oop_opr; ++LIR_Opr FrameMap::s2_oop_opr; ++LIR_Opr FrameMap::s3_oop_opr; ++LIR_Opr FrameMap::s4_oop_opr; ++LIR_Opr FrameMap::s5_oop_opr; ++LIR_Opr FrameMap::s6_oop_opr; ++LIR_Opr FrameMap::s7_oop_opr; ++LIR_Opr FrameMap::s8_oop_opr; ++ ++LIR_Opr FrameMap::scr1_opr; ++LIR_Opr FrameMap::scr2_opr; ++LIR_Opr FrameMap::scr1_long_opr; ++LIR_Opr FrameMap::scr2_long_opr; ++ ++LIR_Opr FrameMap::a0_metadata_opr; ++LIR_Opr FrameMap::a1_metadata_opr; ++LIR_Opr FrameMap::a2_metadata_opr; ++LIR_Opr FrameMap::a3_metadata_opr; ++LIR_Opr FrameMap::a4_metadata_opr; ++LIR_Opr FrameMap::a5_metadata_opr; ++ ++LIR_Opr FrameMap::long0_opr; ++LIR_Opr FrameMap::long1_opr; ++LIR_Opr FrameMap::fpu0_float_opr; ++LIR_Opr FrameMap::fpu0_double_opr; ++ ++LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0 }; ++LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0 }; ++ ++//-------------------------------------------------------- ++// FrameMap ++//-------------------------------------------------------- ++ ++void FrameMap::initialize() { ++ assert(!_init_done, "once"); ++ int i = 0; ++ ++ // caller save register ++ map_register(i, A0); a0_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A1); a1_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A2); a2_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A3); a3_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A4); a4_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A5); a5_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A6); a6_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A7); a7_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T0); t0_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T1); t1_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T2); t2_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T3); t3_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T5); t5_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T6); t6_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T8); t8_opr = LIR_OprFact::single_cpu(i); i++; ++ ++ // callee save register ++ map_register(i, S0); s0_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S1); s1_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S2); s2_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S3); s3_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S4); s4_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S7); s7_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S8); s8_opr = LIR_OprFact::single_cpu(i); i++; ++ ++ // special register ++ map_register(i, S5); s5_opr = LIR_OprFact::single_cpu(i); i++; // heapbase ++ map_register(i, S6); s6_opr = LIR_OprFact::single_cpu(i); i++; // thread ++ map_register(i, TP); tp_opr = LIR_OprFact::single_cpu(i); i++; // tp ++ map_register(i, FP); fp_opr = LIR_OprFact::single_cpu(i); i++; // fp ++ map_register(i, RA); ra_opr = LIR_OprFact::single_cpu(i); i++; // ra ++ map_register(i, SP); sp_opr = LIR_OprFact::single_cpu(i); i++; // sp ++ ++ // tmp register ++ map_register(i, T7); t7_opr = LIR_OprFact::single_cpu(i); i++; // scr1 ++ map_register(i, T4); t4_opr = LIR_OprFact::single_cpu(i); i++; // scr2 ++ ++ scr1_opr = t7_opr; ++ scr2_opr = t4_opr; ++ scr1_long_opr = LIR_OprFact::double_cpu(t7_opr->cpu_regnr(), t7_opr->cpu_regnr()); ++ scr2_long_opr = LIR_OprFact::double_cpu(t4_opr->cpu_regnr(), t4_opr->cpu_regnr()); ++ ++ long0_opr = LIR_OprFact::double_cpu(a0_opr->cpu_regnr(), a0_opr->cpu_regnr()); ++ long1_opr = LIR_OprFact::double_cpu(a1_opr->cpu_regnr(), a1_opr->cpu_regnr()); ++ ++ fpu0_float_opr = LIR_OprFact::single_fpu(0); ++ fpu0_double_opr = LIR_OprFact::double_fpu(0); ++ ++ // scr1, scr2 not included ++ _caller_save_cpu_regs[0] = a0_opr; ++ _caller_save_cpu_regs[1] = a1_opr; ++ _caller_save_cpu_regs[2] = a2_opr; ++ _caller_save_cpu_regs[3] = a3_opr; ++ _caller_save_cpu_regs[4] = a4_opr; ++ _caller_save_cpu_regs[5] = a5_opr; ++ _caller_save_cpu_regs[6] = a6_opr; ++ _caller_save_cpu_regs[7] = a7_opr; ++ _caller_save_cpu_regs[8] = t0_opr; ++ _caller_save_cpu_regs[9] = t1_opr; ++ _caller_save_cpu_regs[10] = t2_opr; ++ _caller_save_cpu_regs[11] = t3_opr; ++ _caller_save_cpu_regs[12] = t5_opr; ++ _caller_save_cpu_regs[13] = t6_opr; ++ _caller_save_cpu_regs[14] = t8_opr; ++ ++ for (int i = 0; i < 8; i++) { ++ _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); ++ } ++ ++ _init_done = true; ++ ++ ra_oop_opr = as_oop_opr(RA); ++ a0_oop_opr = as_oop_opr(A0); ++ a1_oop_opr = as_oop_opr(A1); ++ a2_oop_opr = as_oop_opr(A2); ++ a3_oop_opr = as_oop_opr(A3); ++ a4_oop_opr = as_oop_opr(A4); ++ a5_oop_opr = as_oop_opr(A5); ++ a6_oop_opr = as_oop_opr(A6); ++ a7_oop_opr = as_oop_opr(A7); ++ t0_oop_opr = as_oop_opr(T0); ++ t1_oop_opr = as_oop_opr(T1); ++ t2_oop_opr = as_oop_opr(T2); ++ t3_oop_opr = as_oop_opr(T3); ++ t4_oop_opr = as_oop_opr(T4); ++ t5_oop_opr = as_oop_opr(T5); ++ t6_oop_opr = as_oop_opr(T6); ++ t7_oop_opr = as_oop_opr(T7); ++ t8_oop_opr = as_oop_opr(T8); ++ fp_oop_opr = as_oop_opr(FP); ++ s0_oop_opr = as_oop_opr(S0); ++ s1_oop_opr = as_oop_opr(S1); ++ s2_oop_opr = as_oop_opr(S2); ++ s3_oop_opr = as_oop_opr(S3); ++ s4_oop_opr = as_oop_opr(S4); ++ s5_oop_opr = as_oop_opr(S5); ++ s6_oop_opr = as_oop_opr(S6); ++ s7_oop_opr = as_oop_opr(S7); ++ s8_oop_opr = as_oop_opr(S8); ++ ++ a0_metadata_opr = as_metadata_opr(A0); ++ a1_metadata_opr = as_metadata_opr(A1); ++ a2_metadata_opr = as_metadata_opr(A2); ++ a3_metadata_opr = as_metadata_opr(A3); ++ a4_metadata_opr = as_metadata_opr(A4); ++ a5_metadata_opr = as_metadata_opr(A5); ++ ++ sp_opr = as_pointer_opr(SP); ++ fp_opr = as_pointer_opr(FP); ++ ++ VMRegPair regs; ++ BasicType sig_bt = T_OBJECT; ++ SharedRuntime::java_calling_convention(&sig_bt, ®s, 1, true); ++ receiver_opr = as_oop_opr(regs.first()->as_Register()); ++ ++ for (int i = 0; i < nof_caller_save_fpu_regs; i++) { ++ _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); ++ } ++} ++ ++Address FrameMap::make_new_address(ByteSize sp_offset) const { ++ // for sp, based address use this: ++ // return Address(sp, in_bytes(sp_offset) - (framesize() - 2) * 4); ++ return Address(SP, in_bytes(sp_offset)); ++} ++ ++// ----------------mapping----------------------- ++// all mapping is based on fp addressing, except for simple leaf methods where we access ++// the locals sp based (and no frame is built) ++ ++// Frame for simple leaf methods (quick entries) ++// ++// +----------+ ++// | ret addr | <- TOS ++// +----------+ ++// | args | ++// | ...... | ++ ++// Frame for standard methods ++// ++// | .........| <- TOS ++// | locals | ++// +----------+ ++// | old fp, | <- RFP ++// +----------+ ++// | ret addr | ++// +----------+ ++// | args | ++// | .........| ++ ++// For OopMaps, map a local variable or spill index to an VMRegImpl name. ++// This is the offset from sp() in the frame of the slot for the index, ++// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.) ++// ++// framesize + ++// stack0 stack0 0 <- VMReg ++// | | | ++// ...........|..............|.............| ++// 0 1 2 3 x x 4 5 6 ... | <- local indices ++// ^ ^ sp() ( x x indicate link ++// | | and return addr) ++// arguments non-argument locals ++ ++VMReg FrameMap::fpu_regname(int n) { ++ // Return the OptoReg name for the fpu stack slot "n" ++ // A spilled fpu stack slot comprises to two single-word OptoReg's. ++ return as_FloatRegister(n)->as_VMReg(); ++} ++ ++LIR_Opr FrameMap::stack_pointer() { ++ return FrameMap::sp_opr; ++} ++ ++// JSR 292 ++LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { ++ return LIR_OprFact::illegalOpr; // Not needed on LoongArch64 ++} ++ ++bool FrameMap::validate_frame() { ++ return true; ++} +diff --git a/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp +new file mode 100644 +index 0000000000..40d9408f1f +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp +@@ -0,0 +1,83 @@ ++/* ++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP ++ ++// ArrayCopyStub needs access to bailout ++friend class ArrayCopyStub; ++ ++ private: ++ int array_element_size(BasicType type) const; ++ ++ void arith_fpu_implementation(LIR_Code code, int left_index, int right_index, ++ int dest_index, bool pop_fpu_stack); ++ ++ // helper functions which checks for overflow and sets bailout if it ++ // occurs. Always returns a valid embeddable pointer but in the ++ // bailout case the pointer won't be to unique storage. ++ address float_constant(float f); ++ address double_constant(double d); ++ ++ address int_constant(jlong n); ++ ++ bool is_literal_address(LIR_Address* addr); ++ ++ // Ensure we have a valid Address (base+offset) to a stack-slot. ++ Address stack_slot_address(int index, uint shift, int adjust = 0); ++ ++ // Record the type of the receiver in ReceiverTypeData ++ void type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, ++ Register recv, Label* update_done); ++ void add_debug_info_for_branch(address adr, CodeEmitInfo* info); ++ ++ void casw(Register addr, Register newval, Register cmpval, bool sign); ++ void casl(Register addr, Register newval, Register cmpval); ++ ++ void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL); ++ ++ static const int max_tableswitches = 20; ++ struct tableswitch switches[max_tableswitches]; ++ int tableswitch_count; ++ ++ void init() { tableswitch_count = 0; } ++ ++ void deoptimize_trap(CodeEmitInfo *info); ++ ++ enum { ++ // call stub: CompiledStaticCall::to_interp_stub_size() + ++ // CompiledStaticCall::to_trampoline_stub_size() ++ _call_stub_size = 13 * NativeInstruction::nop_instruction_size, ++ _call_aot_stub_size = 0, ++ _exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175), ++ _deopt_handler_size = 7 * NativeInstruction::nop_instruction_size ++ }; ++ ++public: ++ void store_parameter(Register r, int offset_from_sp_in_words); ++ void store_parameter(jint c, int offset_from_sp_in_words); ++ void store_parameter(jobject c, int offset_from_sp_in_words); ++ ++#endif // CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp +new file mode 100644 +index 0000000000..c989e25c3a +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp +@@ -0,0 +1,3387 @@ ++/* ++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "asm/assembler.hpp" ++#include "c1/c1_CodeStubs.hpp" ++#include "c1/c1_Compilation.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "c1/c1_ValueStack.hpp" ++#include "ci/ciArrayKlass.hpp" ++#include "ci/ciInstance.hpp" ++#include "code/compiledIC.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "gc/shared/gc_globals.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++#ifndef PRODUCT ++#define COMMENT(x) do { __ block_comment(x); } while (0) ++#else ++#define COMMENT(x) ++#endif ++ ++NEEDS_CLEANUP // remove this definitions? ++ ++#define __ _masm-> ++ ++static void select_different_registers(Register preserve, Register extra, ++ Register &tmp1, Register &tmp2) { ++ if (tmp1 == preserve) { ++ assert_different_registers(tmp1, tmp2, extra); ++ tmp1 = extra; ++ } else if (tmp2 == preserve) { ++ assert_different_registers(tmp1, tmp2, extra); ++ tmp2 = extra; ++ } ++ assert_different_registers(preserve, tmp1, tmp2); ++} ++ ++static void select_different_registers(Register preserve, Register extra, ++ Register &tmp1, Register &tmp2, ++ Register &tmp3) { ++ if (tmp1 == preserve) { ++ assert_different_registers(tmp1, tmp2, tmp3, extra); ++ tmp1 = extra; ++ } else if (tmp2 == preserve) { ++ assert_different_registers(tmp1, tmp2, tmp3, extra); ++ tmp2 = extra; ++ } else if (tmp3 == preserve) { ++ assert_different_registers(tmp1, tmp2, tmp3, extra); ++ tmp3 = extra; ++ } ++ assert_different_registers(preserve, tmp1, tmp2, tmp3); ++} ++ ++bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; } ++ ++LIR_Opr LIR_Assembler::receiverOpr() { ++ return FrameMap::receiver_opr; ++} ++ ++LIR_Opr LIR_Assembler::osrBufferPointer() { ++ return FrameMap::as_pointer_opr(receiverOpr()->as_register()); ++} ++ ++//--------------fpu register translations----------------------- ++ ++address LIR_Assembler::float_constant(float f) { ++ address const_addr = __ float_constant(f); ++ if (const_addr == NULL) { ++ bailout("const section overflow"); ++ return __ code()->consts()->start(); ++ } else { ++ return const_addr; ++ } ++} ++ ++address LIR_Assembler::double_constant(double d) { ++ address const_addr = __ double_constant(d); ++ if (const_addr == NULL) { ++ bailout("const section overflow"); ++ return __ code()->consts()->start(); ++ } else { ++ return const_addr; ++ } ++} ++ ++void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { ++ ShouldNotReachHere(); ++} ++ ++void LIR_Assembler::set_24bit_FPU() { Unimplemented(); } ++ ++void LIR_Assembler::reset_FPU() { Unimplemented(); } ++ ++void LIR_Assembler::fpop() { Unimplemented(); } ++ ++void LIR_Assembler::fxch(int i) { Unimplemented(); } ++ ++void LIR_Assembler::fld(int i) { Unimplemented(); } ++ ++void LIR_Assembler::ffree(int i) { Unimplemented(); } ++ ++void LIR_Assembler::breakpoint() { Unimplemented(); } ++ ++void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); } ++ ++void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); } ++ ++bool LIR_Assembler::is_literal_address(LIR_Address* addr) { Unimplemented(); return false; } ++ ++static Register as_reg(LIR_Opr op) { ++ return op->is_double_cpu() ? op->as_register_lo() : op->as_register(); ++} ++ ++static jlong as_long(LIR_Opr data) { ++ jlong result; ++ switch (data->type()) { ++ case T_INT: ++ result = (data->as_jint()); ++ break; ++ case T_LONG: ++ result = (data->as_jlong()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ result = 0; // unreachable ++ } ++ return result; ++} ++ ++Address LIR_Assembler::as_Address(LIR_Address* addr) { ++ Register base = addr->base()->as_pointer_register(); ++ LIR_Opr opr = addr->index(); ++ if (opr->is_cpu_register()) { ++ Register index; ++ if (opr->is_single_cpu()) ++ index = opr->as_register(); ++ else ++ index = opr->as_register_lo(); ++ assert(addr->disp() == 0, "must be"); ++ return Address(base, index, Address::ScaleFactor(addr->scale())); ++ } else { ++ assert(addr->scale() == 0, "must be"); ++ return Address(base, addr->disp()); ++ } ++ return Address(); ++} ++ ++Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { ++ ShouldNotReachHere(); ++ return Address(); ++} ++ ++Address LIR_Assembler::as_Address_lo(LIR_Address* addr) { ++ return as_Address(addr); // Ouch ++ // FIXME: This needs to be much more clever. See x86. ++} ++ ++// Ensure a valid Address (base + offset) to a stack-slot. If stack access is ++// not encodable as a base + (immediate) offset, generate an explicit address ++// calculation to hold the address in a temporary register. ++Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) { ++ precond(size == 4 || size == 8); ++ Address addr = frame_map()->address_for_slot(index, adjust); ++ precond(addr.index() == noreg); ++ precond(addr.base() == SP); ++ precond(addr.disp() > 0); ++ uint mask = size - 1; ++ assert((addr.disp() & mask) == 0, "scaled offsets only"); ++ return addr; ++} ++ ++void LIR_Assembler::osr_entry() { ++ offsets()->set_value(CodeOffsets::OSR_Entry, code_offset()); ++ BlockBegin* osr_entry = compilation()->hir()->osr_entry(); ++ ValueStack* entry_state = osr_entry->state(); ++ int number_of_locks = entry_state->locks_size(); ++ ++ // we jump here if osr happens with the interpreter ++ // state set up to continue at the beginning of the ++ // loop that triggered osr - in particular, we have ++ // the following registers setup: ++ // ++ // A2: osr buffer ++ // ++ ++ // build frame ++ ciMethod* m = compilation()->method(); ++ __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); ++ ++ // OSR buffer is ++ // ++ // locals[nlocals-1..0] ++ // monitors[0..number_of_locks] ++ // ++ // locals is a direct copy of the interpreter frame so in the osr buffer ++ // so first slot in the local array is the last local from the interpreter ++ // and last slot is local[0] (receiver) from the interpreter ++ // ++ // Similarly with locks. The first lock slot in the osr buffer is the nth lock ++ // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock ++ // in the interpreter frame (the method lock if a sync method) ++ ++ // Initialize monitors in the compiled activation. ++ // A2: pointer to osr buffer ++ // ++ // All other registers are dead at this point and the locals will be ++ // copied into place by code emitted in the IR. ++ ++ Register OSR_buf = osrBufferPointer()->as_pointer_register(); ++ { ++ assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below"); ++ int monitor_offset = BytesPerWord * method()->max_locals() + (2 * BytesPerWord) * (number_of_locks - 1); ++ // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in ++ // the OSR buffer using 2 word entries: first the lock and then ++ // the oop. ++ for (int i = 0; i < number_of_locks; i++) { ++ int slot_offset = monitor_offset - ((i * 2) * BytesPerWord); ++#ifdef ASSERT ++ // verify the interpreter's monitor has a non-null object ++ { ++ Label L; ++ __ ld_ptr(SCR1, Address(OSR_buf, slot_offset + 1 * BytesPerWord)); ++ __ bnez(SCR1, L); ++ __ stop("locked object is NULL"); ++ __ bind(L); ++ } ++#endif ++ __ ld_ptr(S0, Address(OSR_buf, slot_offset + 0)); ++ __ st_ptr(S0, frame_map()->address_for_monitor_lock(i)); ++ __ ld_ptr(S0, Address(OSR_buf, slot_offset + 1*BytesPerWord)); ++ __ st_ptr(S0, frame_map()->address_for_monitor_object(i)); ++ } ++ } ++} ++ ++// inline cache check; done before the frame is built. ++int LIR_Assembler::check_icache() { ++ Register receiver = FrameMap::receiver_opr->as_register(); ++ Register ic_klass = IC_Klass; ++ int start_offset = __ offset(); ++ Label dont; ++ ++ __ verify_oop(receiver); ++ ++ // explicit NULL check not needed since load from [klass_offset] causes a trap ++ // check against inline cache ++ assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), ++ "must add explicit null check"); ++ ++ __ load_klass(SCR2, receiver); ++ __ beq(SCR2, ic_klass, dont); ++ ++ // if icache check fails, then jump to runtime routine ++ // Note: RECEIVER must still contain the receiver! ++ __ jmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); ++ ++ // We align the verified entry point unless the method body ++ // (including its inline cache check) will fit in a single 64-byte ++ // icache line. ++ if (!method()->is_accessor() || __ offset() - start_offset > 4 * 4) { ++ // force alignment after the cache check. ++ __ align(CodeEntryAlignment); ++ } ++ ++ __ bind(dont); ++ return start_offset; ++} ++ ++void LIR_Assembler::jobject2reg(jobject o, Register reg) { ++ if (o == NULL) { ++ __ move(reg, R0); ++ } else { ++ int oop_index = __ oop_recorder()->find_index(o); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ __ relocate(rspec); ++ __ patchable_li52(reg, (long)o); ++ } ++} ++ ++void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) { ++ address target = NULL; ++ ++ switch (patching_id(info)) { ++ case PatchingStub::access_field_id: ++ target = Runtime1::entry_for(Runtime1::access_field_patching_id); ++ break; ++ case PatchingStub::load_klass_id: ++ target = Runtime1::entry_for(Runtime1::load_klass_patching_id); ++ break; ++ case PatchingStub::load_mirror_id: ++ target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); ++ break; ++ case PatchingStub::load_appendix_id: ++ target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ call(target, relocInfo::runtime_call_type); ++ add_call_info_here(info); ++} ++ ++void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) { ++ deoptimize_trap(info); ++} ++ ++// This specifies the rsp decrement needed to build the frame ++int LIR_Assembler::initial_frame_size_in_bytes() const { ++ // if rounding, must let FrameMap know! ++ return in_bytes(frame_map()->framesize_in_bytes()); ++} ++ ++int LIR_Assembler::emit_exception_handler() { ++ // if the last instruction is a call (typically to do a throw which ++ // is coming at the end after block reordering) the return address ++ // must still point into the code area in order to avoid assertion ++ // failures when searching for the corresponding bci => add a nop ++ // (was bug 5/14/1999 - gri) ++ __ nop(); ++ ++ // generate code for exception handler ++ address handler_base = __ start_a_stub(exception_handler_size()); ++ if (handler_base == NULL) { ++ // not enough space left for the handler ++ bailout("exception handler overflow"); ++ return -1; ++ } ++ ++ int offset = code_offset(); ++ ++ // the exception oop and pc are in A0, and A1 ++ // no other registers need to be preserved, so invalidate them ++ __ invalidate_registers(false, true, true, true, true, true); ++ ++ // check that there is really an exception ++ __ verify_not_null_oop(A0); ++ ++ // search an exception handler (A0: exception oop, A1: throwing pc) ++ __ call(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id), relocInfo::runtime_call_type); ++ __ should_not_reach_here(); ++ guarantee(code_offset() - offset <= exception_handler_size(), "overflow"); ++ __ end_a_stub(); ++ ++ return offset; ++} ++ ++// Emit the code to remove the frame from the stack in the exception unwind path. ++int LIR_Assembler::emit_unwind_handler() { ++#ifndef PRODUCT ++ if (CommentedAssembly) { ++ _masm->block_comment("Unwind handler"); ++ } ++#endif ++ ++ int offset = code_offset(); ++ ++ // Fetch the exception from TLS and clear out exception related thread state ++ __ ld_ptr(A0, Address(TREG, JavaThread::exception_oop_offset())); ++ __ st_ptr(R0, Address(TREG, JavaThread::exception_oop_offset())); ++ __ st_ptr(R0, Address(TREG, JavaThread::exception_pc_offset())); ++ ++ __ bind(_unwind_handler_entry); ++ __ verify_not_null_oop(V0); ++ if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { ++ __ move(S0, V0); // Preserve the exception ++ } ++ ++ // Perform needed unlocking ++ MonitorExitStub* stub = NULL; ++ if (method()->is_synchronized()) { ++ monitor_address(0, FrameMap::a0_opr); ++ stub = new MonitorExitStub(FrameMap::a0_opr, true, 0); ++ __ unlock_object(A5, A4, A0, *stub->entry()); ++ __ bind(*stub->continuation()); ++ } ++ ++ if (compilation()->env()->dtrace_method_probes()) { ++ __ mov_metadata(A1, method()->constant_encoding()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), TREG, A1); ++ } ++ ++ if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { ++ __ move(A0, S0); // Restore the exception ++ } ++ ++ // remove the activation and dispatch to the unwind handler ++ __ block_comment("remove_frame and dispatch to the unwind handler"); ++ __ remove_frame(initial_frame_size_in_bytes()); ++ __ jmp(Runtime1::entry_for(Runtime1::unwind_exception_id), relocInfo::runtime_call_type); ++ ++ // Emit the slow path assembly ++ if (stub != NULL) { ++ stub->emit_code(this); ++ } ++ ++ return offset; ++} ++ ++int LIR_Assembler::emit_deopt_handler() { ++ // if the last instruction is a call (typically to do a throw which ++ // is coming at the end after block reordering) the return address ++ // must still point into the code area in order to avoid assertion ++ // failures when searching for the corresponding bci => add a nop ++ // (was bug 5/14/1999 - gri) ++ __ nop(); ++ ++ // generate code for exception handler ++ address handler_base = __ start_a_stub(deopt_handler_size()); ++ if (handler_base == NULL) { ++ // not enough space left for the handler ++ bailout("deopt handler overflow"); ++ return -1; ++ } ++ ++ int offset = code_offset(); ++ ++ __ call(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type); ++ guarantee(code_offset() - offset <= deopt_handler_size(), "overflow"); ++ __ end_a_stub(); ++ ++ return offset; ++} ++ ++void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { ++ _masm->code_section()->relocate(adr, relocInfo::poll_type); ++ int pc_offset = code_offset(); ++ flush_debug_info(pc_offset); ++ info->record_debug_info(compilation()->debug_info_recorder(), pc_offset); ++ if (info->exception_handlers() != NULL) { ++ compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers()); ++ } ++} ++ ++void LIR_Assembler::return_op(LIR_Opr result) { ++ assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == V0, ++ "word returns are in V0,"); ++ ++ // Pop the stack before the safepoint code ++ __ remove_frame(initial_frame_size_in_bytes()); ++ ++ if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) { ++ __ reserved_stack_check(); ++ } ++ ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ __ ld_ptr(SCR2, Address(TREG, JavaThread::polling_page_offset())); ++ } else { ++ __ li(SCR2, os::get_polling_page()); ++ } ++ __ relocate(relocInfo::poll_return_type); ++ __ ld_w(SCR1, SCR2, 0); ++ __ jr(RA); ++} ++ ++int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { ++ guarantee(info != NULL, "Shouldn't be NULL"); ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ __ ld_ptr(SCR2, Address(TREG, JavaThread::polling_page_offset())); ++ } else { ++ __ li(SCR2, os::get_polling_page()); ++ } ++ add_debug_info_for_branch(info); // This isn't just debug info: it's the oop map ++ __ relocate(relocInfo::poll_type); ++ __ ld_w(SCR1, SCR2, 0); ++ return __ offset(); ++} ++ ++void LIR_Assembler::move_regs(Register from_reg, Register to_reg) { ++ __ move(to_reg, from_reg); ++} ++ ++void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); } ++ ++void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { ++ assert(src->is_constant(), "should not call otherwise"); ++ assert(dest->is_register(), "should not call otherwise"); ++ LIR_Const* c = src->as_constant_ptr(); ++ ++ switch (c->type()) { ++ case T_INT: ++ assert(patch_code == lir_patch_none, "no patching handled here"); ++ __ li(dest->as_register(), c->as_jint()); ++ break; ++ case T_ADDRESS: ++ assert(patch_code == lir_patch_none, "no patching handled here"); ++ __ li(dest->as_register(), c->as_jint()); ++ break; ++ case T_LONG: ++ assert(patch_code == lir_patch_none, "no patching handled here"); ++ __ li(dest->as_register_lo(), (intptr_t)c->as_jlong()); ++ break; ++ case T_OBJECT: ++ if (patch_code == lir_patch_none) { ++ jobject2reg(c->as_jobject(), dest->as_register()); ++ } else { ++ jobject2reg_with_patching(dest->as_register(), info); ++ } ++ break; ++ case T_METADATA: ++ if (patch_code != lir_patch_none) { ++ klass2reg_with_patching(dest->as_register(), info); ++ } else { ++ __ mov_metadata(dest->as_register(), c->as_metadata()); ++ } ++ break; ++ case T_FLOAT: ++ __ lea(SCR1, InternalAddress(float_constant(c->as_jfloat()))); ++ __ fld_s(dest->as_float_reg(), SCR1, 0); ++ break; ++ case T_DOUBLE: ++ __ lea(SCR1, InternalAddress(double_constant(c->as_jdouble()))); ++ __ fld_d(dest->as_double_reg(), SCR1, 0); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) { ++ LIR_Const* c = src->as_constant_ptr(); ++ switch (c->type()) { ++ case T_OBJECT: ++ if (!c->as_jobject()) ++ __ st_ptr(R0, frame_map()->address_for_slot(dest->single_stack_ix())); ++ else { ++ const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL); ++ reg2stack(FrameMap::scr1_opr, dest, c->type(), false); ++ } ++ break; ++ case T_ADDRESS: ++ const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL); ++ reg2stack(FrameMap::scr1_opr, dest, c->type(), false); ++ case T_INT: ++ case T_FLOAT: ++ if (c->as_jint_bits() == 0) ++ __ st_w(R0, frame_map()->address_for_slot(dest->single_stack_ix())); ++ else { ++ __ li(SCR2, c->as_jint_bits()); ++ __ st_w(SCR2, frame_map()->address_for_slot(dest->single_stack_ix())); ++ } ++ break; ++ case T_LONG: ++ case T_DOUBLE: ++ if (c->as_jlong_bits() == 0) ++ __ st_ptr(R0, frame_map()->address_for_slot(dest->double_stack_ix(), ++ lo_word_offset_in_bytes)); ++ else { ++ __ li(SCR2, (intptr_t)c->as_jlong_bits()); ++ __ st_ptr(SCR2, frame_map()->address_for_slot(dest->double_stack_ix(), ++ lo_word_offset_in_bytes)); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, ++ CodeEmitInfo* info, bool wide) { ++ assert(src->is_constant(), "should not call otherwise"); ++ LIR_Const* c = src->as_constant_ptr(); ++ LIR_Address* to_addr = dest->as_address_ptr(); ++ ++ void (Assembler::* insn)(Register Rt, Address adr); ++ ++ switch (type) { ++ case T_ADDRESS: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::st_d; ++ break; ++ case T_LONG: ++ assert(c->as_jlong() == 0, "should be"); ++ insn = &Assembler::st_d; ++ break; ++ case T_INT: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::st_w; ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ assert(c->as_jobject() == 0, "should be"); ++ if (UseCompressedOops && !wide) { ++ insn = &Assembler::st_w; ++ } else { ++ insn = &Assembler::st_d; ++ } ++ break; ++ case T_CHAR: ++ case T_SHORT: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::st_h; ++ break; ++ case T_BOOLEAN: ++ case T_BYTE: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::st_b; ++ break; ++ default: ++ ShouldNotReachHere(); ++ insn = &Assembler::st_d; // unreachable ++ } ++ ++ if (info) add_debug_info_for_null_check_here(info); ++ (_masm->*insn)(R0, as_Address(to_addr)); ++} ++ ++void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) { ++ assert(src->is_register(), "should not call otherwise"); ++ assert(dest->is_register(), "should not call otherwise"); ++ ++ // move between cpu-registers ++ if (dest->is_single_cpu()) { ++ if (src->type() == T_LONG) { ++ // Can do LONG -> OBJECT ++ move_regs(src->as_register_lo(), dest->as_register()); ++ return; ++ } ++ assert(src->is_single_cpu(), "must match"); ++ if (src->type() == T_OBJECT) { ++ __ verify_oop(src->as_register()); ++ } ++ move_regs(src->as_register(), dest->as_register()); ++ } else if (dest->is_double_cpu()) { ++ if (is_reference_type(src->type())) { ++ // Surprising to me but we can see move of a long to t_object ++ __ verify_oop(src->as_register()); ++ move_regs(src->as_register(), dest->as_register_lo()); ++ return; ++ } ++ assert(src->is_double_cpu(), "must match"); ++ Register f_lo = src->as_register_lo(); ++ Register f_hi = src->as_register_hi(); ++ Register t_lo = dest->as_register_lo(); ++ Register t_hi = dest->as_register_hi(); ++ assert(f_hi == f_lo, "must be same"); ++ assert(t_hi == t_lo, "must be same"); ++ move_regs(f_lo, t_lo); ++ } else if (dest->is_single_fpu()) { ++ __ fmov_s(dest->as_float_reg(), src->as_float_reg()); ++ } else if (dest->is_double_fpu()) { ++ __ fmov_d(dest->as_double_reg(), src->as_double_reg()); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) { ++ precond(src->is_register() && dest->is_stack()); ++ ++ uint const c_sz32 = sizeof(uint32_t); ++ uint const c_sz64 = sizeof(uint64_t); ++ ++ if (src->is_single_cpu()) { ++ int index = dest->single_stack_ix(); ++ if (is_reference_type(type)) { ++ __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64)); ++ __ verify_oop(src->as_register()); ++ } else if (type == T_METADATA || type == T_DOUBLE || type == T_ADDRESS) { ++ __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64)); ++ } else { ++ __ st_w(src->as_register(), stack_slot_address(index, c_sz32)); ++ } ++ } else if (src->is_double_cpu()) { ++ int index = dest->double_stack_ix(); ++ Address dest_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes); ++ __ st_ptr(src->as_register_lo(), dest_addr_LO); ++ } else if (src->is_single_fpu()) { ++ int index = dest->single_stack_ix(); ++ __ fst_s(src->as_float_reg(), stack_slot_address(index, c_sz32)); ++ } else if (src->is_double_fpu()) { ++ int index = dest->double_stack_ix(); ++ __ fst_d(src->as_double_reg(), stack_slot_address(index, c_sz64)); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, ++ CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) { ++ LIR_Address* to_addr = dest->as_address_ptr(); ++ PatchingStub* patch = NULL; ++ Register compressed_src = SCR2; ++ ++ if (patch_code != lir_patch_none) { ++ deoptimize_trap(info); ++ return; ++ } ++ ++ if (is_reference_type(type)) { ++ __ verify_oop(src->as_register()); ++ ++ if (UseCompressedOops && !wide) { ++ __ encode_heap_oop(compressed_src, src->as_register()); ++ } else { ++ compressed_src = src->as_register(); ++ } ++ } ++ ++ int null_check_here = code_offset(); ++ switch (type) { ++ case T_FLOAT: ++ __ fst_s(src->as_float_reg(), as_Address(to_addr)); ++ break; ++ case T_DOUBLE: ++ __ fst_d(src->as_double_reg(), as_Address(to_addr)); ++ break; ++ case T_ARRAY: // fall through ++ case T_OBJECT: // fall through ++ if (UseCompressedOops && !wide) { ++ __ st_w(compressed_src, as_Address(to_addr)); ++ } else { ++ __ st_ptr(compressed_src, as_Address(to_addr)); ++ } ++ break; ++ case T_METADATA: ++ // We get here to store a method pointer to the stack to pass to ++ // a dtrace runtime call. This can't work on 64 bit with ++ // compressed klass ptrs: T_METADATA can be a compressed klass ++ // ptr or a 64 bit method pointer. ++ ShouldNotReachHere(); ++ __ st_ptr(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_ADDRESS: ++ __ st_ptr(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_INT: ++ __ st_w(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_LONG: ++ __ st_ptr(src->as_register_lo(), as_Address_lo(to_addr)); ++ break; ++ case T_BYTE: // fall through ++ case T_BOOLEAN: ++ __ st_b(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_CHAR: // fall through ++ case T_SHORT: ++ __ st_h(src->as_register(), as_Address(to_addr)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ if (info != NULL) { ++ add_debug_info_for_null_check(null_check_here, info); ++ } ++} ++ ++void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { ++ precond(src->is_stack() && dest->is_register()); ++ ++ uint const c_sz32 = sizeof(uint32_t); ++ uint const c_sz64 = sizeof(uint64_t); ++ ++ if (dest->is_single_cpu()) { ++ int index = src->single_stack_ix(); ++ if (is_reference_type(type)) { ++ __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64)); ++ __ verify_oop(dest->as_register()); ++ } else if (type == T_METADATA || type == T_ADDRESS) { ++ __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64)); ++ } else { ++ __ ld_w(dest->as_register(), stack_slot_address(index, c_sz32)); ++ } ++ } else if (dest->is_double_cpu()) { ++ int index = src->double_stack_ix(); ++ Address src_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes); ++ __ ld_ptr(dest->as_register_lo(), src_addr_LO); ++ } else if (dest->is_single_fpu()) { ++ int index = src->single_stack_ix(); ++ __ fld_s(dest->as_float_reg(), stack_slot_address(index, c_sz32)); ++ } else if (dest->is_double_fpu()) { ++ int index = src->double_stack_ix(); ++ __ fld_d(dest->as_double_reg(), stack_slot_address(index, c_sz64)); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) { ++ address target = NULL; ++ ++ switch (patching_id(info)) { ++ case PatchingStub::access_field_id: ++ target = Runtime1::entry_for(Runtime1::access_field_patching_id); ++ break; ++ case PatchingStub::load_klass_id: ++ target = Runtime1::entry_for(Runtime1::load_klass_patching_id); ++ break; ++ case PatchingStub::load_mirror_id: ++ target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); ++ break; ++ case PatchingStub::load_appendix_id: ++ target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ call(target, relocInfo::runtime_call_type); ++ add_call_info_here(info); ++} ++ ++void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { ++ LIR_Opr temp; ++ ++ if (type == T_LONG || type == T_DOUBLE) ++ temp = FrameMap::scr1_long_opr; ++ else ++ temp = FrameMap::scr1_opr; ++ ++ stack2reg(src, temp, src->type()); ++ reg2stack(temp, dest, dest->type(), false); ++} ++ ++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, ++ CodeEmitInfo* info, bool wide, bool /* unaligned */) { ++ LIR_Address* addr = src->as_address_ptr(); ++ LIR_Address* from_addr = src->as_address_ptr(); ++ ++ if (addr->base()->type() == T_OBJECT) { ++ __ verify_oop(addr->base()->as_pointer_register()); ++ } ++ ++ if (patch_code != lir_patch_none) { ++ deoptimize_trap(info); ++ return; ++ } ++ ++ if (info != NULL) { ++ add_debug_info_for_null_check_here(info); ++ } ++ int null_check_here = code_offset(); ++ switch (type) { ++ case T_FLOAT: ++ __ fld_s(dest->as_float_reg(), as_Address(from_addr)); ++ break; ++ case T_DOUBLE: ++ __ fld_d(dest->as_double_reg(), as_Address(from_addr)); ++ break; ++ case T_ARRAY: // fall through ++ case T_OBJECT: // fall through ++ if (UseCompressedOops && !wide) { ++ __ ld_wu(dest->as_register(), as_Address(from_addr)); ++ } else { ++ __ ld_ptr(dest->as_register(), as_Address(from_addr)); ++ } ++ break; ++ case T_METADATA: ++ // We get here to store a method pointer to the stack to pass to ++ // a dtrace runtime call. This can't work on 64 bit with ++ // compressed klass ptrs: T_METADATA can be a compressed klass ++ // ptr or a 64 bit method pointer. ++ ShouldNotReachHere(); ++ __ ld_ptr(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_ADDRESS: ++ // FIXME: OMG this is a horrible kludge. Any offset from an ++ // address that matches klass_offset_in_bytes() will be loaded ++ // as a word, not a long. ++ if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { ++ __ ld_wu(dest->as_register(), as_Address(from_addr)); ++ } else { ++ __ ld_ptr(dest->as_register(), as_Address(from_addr)); ++ } ++ break; ++ case T_INT: ++ __ ld_w(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_LONG: ++ __ ld_ptr(dest->as_register_lo(), as_Address_lo(from_addr)); ++ break; ++ case T_BYTE: ++ __ ld_b(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_BOOLEAN: ++ __ ld_bu(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_CHAR: ++ __ ld_hu(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_SHORT: ++ __ ld_h(dest->as_register(), as_Address(from_addr)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ if (is_reference_type(type)) { ++ if (UseCompressedOops && !wide) { ++ __ decode_heap_oop(dest->as_register()); ++ } ++ ++ if (!UseZGC) { ++ // Load barrier has not yet been applied, so ZGC can't verify the oop here ++ __ verify_oop(dest->as_register()); ++ } ++ } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) { ++ if (UseCompressedClassPointers) { ++ __ decode_klass_not_null(dest->as_register()); ++ } ++ } ++} ++ ++int LIR_Assembler::array_element_size(BasicType type) const { ++ int elem_size = type2aelembytes(type); ++ return exact_log2(elem_size); ++} ++ ++void LIR_Assembler::emit_op3(LIR_Op3* op) { ++ switch (op->code()) { ++ case lir_idiv: ++ case lir_irem: ++ arithmetic_idiv(op->code(), op->in_opr1(), op->in_opr2(), op->in_opr3(), ++ op->result_opr(), op->info()); ++ break; ++ case lir_fmad: ++ __ fmadd_d(op->result_opr()->as_double_reg(), op->in_opr1()->as_double_reg(), ++ op->in_opr2()->as_double_reg(), op->in_opr3()->as_double_reg()); ++ break; ++ case lir_fmaf: ++ __ fmadd_s(op->result_opr()->as_float_reg(), op->in_opr1()->as_float_reg(), ++ op->in_opr2()->as_float_reg(), op->in_opr3()->as_float_reg()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++} ++ ++void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { ++#ifdef ASSERT ++ assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label"); ++ if (op->block() != NULL) _branch_target_blocks.append(op->block()); ++ assert(op->cond() == lir_cond_always, "must be"); ++#endif ++ ++ if (op->info() != NULL) ++ add_debug_info_for_branch(op->info()); ++ ++ __ b_far(*(op->label())); ++} ++ ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++#ifdef ASSERT ++ assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label"); ++ if (op->block() != NULL) _branch_target_blocks.append(op->block()); ++ if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock()); ++#endif ++ ++ if (op->info() != NULL) { ++ assert(op->in_opr1()->is_address() || op->in_opr2()->is_address(), ++ "shouldn't be codeemitinfo for non-address operands"); ++ add_debug_info_for_null_check_here(op->info()); // exception possible ++ } ++ ++ Label& L = *(op->label()); ++ Assembler::Condition acond; ++ LIR_Opr opr1 = op->in_opr1(); ++ LIR_Opr opr2 = op->in_opr2(); ++ assert(op->condition() != lir_cond_always, "must be"); ++ ++ if (op->code() == lir_cmp_float_branch) { ++ bool is_unordered = (op->ublock() == op->block()); ++ if (opr1->is_single_fpu()) { ++ FloatRegister reg1 = opr1->as_float_reg(); ++ assert(opr2->is_single_fpu(), "expect single float register"); ++ FloatRegister reg2 = opr2->as_float_reg(); ++ switch(op->condition()) { ++ case lir_cond_equal: ++ if (is_unordered) ++ __ fcmp_cueq_s(FCC0, reg1, reg2); ++ else ++ __ fcmp_ceq_s(FCC0, reg1, reg2); ++ break; ++ case lir_cond_notEqual: ++ if (is_unordered) ++ __ fcmp_cune_s(FCC0, reg1, reg2); ++ else ++ __ fcmp_cne_s(FCC0, reg1, reg2); ++ break; ++ case lir_cond_less: ++ if (is_unordered) ++ __ fcmp_cult_s(FCC0, reg1, reg2); ++ else ++ __ fcmp_clt_s(FCC0, reg1, reg2); ++ break; ++ case lir_cond_lessEqual: ++ if (is_unordered) ++ __ fcmp_cule_s(FCC0, reg1, reg2); ++ else ++ __ fcmp_cle_s(FCC0, reg1, reg2); ++ break; ++ case lir_cond_greaterEqual: ++ if (is_unordered) ++ __ fcmp_cule_s(FCC0, reg2, reg1); ++ else ++ __ fcmp_cle_s(FCC0, reg2, reg1); ++ break; ++ case lir_cond_greater: ++ if (is_unordered) ++ __ fcmp_cult_s(FCC0, reg2, reg1); ++ else ++ __ fcmp_clt_s(FCC0, reg2, reg1); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (opr1->is_double_fpu()) { ++ FloatRegister reg1 = opr1->as_double_reg(); ++ assert(opr2->is_double_fpu(), "expect double float register"); ++ FloatRegister reg2 = opr2->as_double_reg(); ++ switch(op->condition()) { ++ case lir_cond_equal: ++ if (is_unordered) ++ __ fcmp_cueq_d(FCC0, reg1, reg2); ++ else ++ __ fcmp_ceq_d(FCC0, reg1, reg2); ++ break; ++ case lir_cond_notEqual: ++ if (is_unordered) ++ __ fcmp_cune_d(FCC0, reg1, reg2); ++ else ++ __ fcmp_cne_d(FCC0, reg1, reg2); ++ break; ++ case lir_cond_less: ++ if (is_unordered) ++ __ fcmp_cult_d(FCC0, reg1, reg2); ++ else ++ __ fcmp_clt_d(FCC0, reg1, reg2); ++ break; ++ case lir_cond_lessEqual: ++ if (is_unordered) ++ __ fcmp_cule_d(FCC0, reg1, reg2); ++ else ++ __ fcmp_cle_d(FCC0, reg1, reg2); ++ break; ++ case lir_cond_greaterEqual: ++ if (is_unordered) ++ __ fcmp_cule_d(FCC0, reg2, reg1); ++ else ++ __ fcmp_cle_d(FCC0, reg2, reg1); ++ break; ++ case lir_cond_greater: ++ if (is_unordered) ++ __ fcmp_cult_d(FCC0, reg2, reg1); ++ else ++ __ fcmp_clt_d(FCC0, reg2, reg1); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ __ bcnez(FCC0, L); ++ } else { ++ if (opr1->is_constant() && opr2->is_single_cpu()) { ++ // tableswitch ++ Unimplemented(); ++ } else if (opr1->is_single_cpu() || opr1->is_double_cpu()) { ++ Register reg1 = as_reg(opr1); ++ Register reg2 = noreg; ++ jlong imm2 = 0; ++ if (opr2->is_single_cpu()) { ++ // cpu register - cpu register ++ reg2 = opr2->as_register(); ++ } else if (opr2->is_double_cpu()) { ++ // cpu register - cpu register ++ reg2 = opr2->as_register_lo(); ++ } else if (opr2->is_constant()) { ++ switch(opr2->type()) { ++ case T_INT: ++ case T_ADDRESS: ++ imm2 = opr2->as_constant_ptr()->as_jint(); ++ break; ++ case T_LONG: ++ imm2 = opr2->as_constant_ptr()->as_jlong(); ++ break; ++ case T_METADATA: ++ imm2 = (intptr_t)opr2->as_constant_ptr()->as_metadata(); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (opr2->as_constant_ptr()->as_jobject() != NULL) { ++ reg2 = SCR1; ++ jobject2reg(opr2->as_constant_ptr()->as_jobject(), reg2); ++ } else { ++ reg2 = R0; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ if (reg2 == noreg) { ++ if (imm2 == 0) { ++ reg2 = R0; ++ } else { ++ reg2 = SCR1; ++ __ li(reg2, imm2); ++ } ++ } ++ switch (op->condition()) { ++ case lir_cond_equal: ++ __ beq_far(reg1, reg2, L); break; ++ case lir_cond_notEqual: ++ __ bne_far(reg1, reg2, L); break; ++ case lir_cond_less: ++ __ blt_far(reg1, reg2, L, true); break; ++ case lir_cond_lessEqual: ++ __ bge_far(reg2, reg1, L, true); break; ++ case lir_cond_greaterEqual: ++ __ bge_far(reg1, reg2, L, true); break; ++ case lir_cond_greater: ++ __ blt_far(reg2, reg1, L, true); break; ++ case lir_cond_belowEqual: ++ __ bge_far(reg2, reg1, L, false); break; ++ case lir_cond_aboveEqual: ++ __ bge_far(reg1, reg2, L, false); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ } ++} ++ ++void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { ++ LIR_Opr src = op->in_opr(); ++ LIR_Opr dest = op->result_opr(); ++ LIR_Opr tmp = op->tmp(); ++ ++ switch (op->bytecode()) { ++ case Bytecodes::_i2f: ++ __ movgr2fr_w(dest->as_float_reg(), src->as_register()); ++ __ ffint_s_w(dest->as_float_reg(), dest->as_float_reg()); ++ break; ++ case Bytecodes::_i2d: ++ __ movgr2fr_w(dest->as_double_reg(), src->as_register()); ++ __ ffint_d_w(dest->as_double_reg(), dest->as_double_reg()); ++ break; ++ case Bytecodes::_l2d: ++ __ movgr2fr_d(dest->as_double_reg(), src->as_register_lo()); ++ __ ffint_d_l(dest->as_double_reg(), dest->as_double_reg()); ++ break; ++ case Bytecodes::_l2f: ++ __ movgr2fr_d(dest->as_float_reg(), src->as_register_lo()); ++ __ ffint_s_l(dest->as_float_reg(), dest->as_float_reg()); ++ break; ++ case Bytecodes::_f2d: ++ __ fcvt_d_s(dest->as_double_reg(), src->as_float_reg()); ++ break; ++ case Bytecodes::_d2f: ++ __ fcvt_s_d(dest->as_float_reg(), src->as_double_reg()); ++ break; ++ case Bytecodes::_i2c: ++ __ bstrpick_w(dest->as_register(), src->as_register(), 15, 0); ++ break; ++ case Bytecodes::_i2l: ++ _masm->block_comment("FIXME: This could be a no-op"); ++ __ slli_w(dest->as_register_lo(), src->as_register(), 0); ++ break; ++ case Bytecodes::_i2s: ++ __ ext_w_h(dest->as_register(), src->as_register()); ++ break; ++ case Bytecodes::_i2b: ++ __ ext_w_b(dest->as_register(), src->as_register()); ++ break; ++ case Bytecodes::_l2i: ++ __ slli_w(dest->as_register(), src->as_register_lo(), 0); ++ break; ++ case Bytecodes::_d2l: ++ __ ftintrz_l_d(tmp->as_double_reg(), src->as_double_reg()); ++ __ movfr2gr_d(dest->as_register_lo(), tmp->as_double_reg()); ++ break; ++ case Bytecodes::_f2i: ++ __ ftintrz_w_s(tmp->as_float_reg(), src->as_float_reg()); ++ __ movfr2gr_s(dest->as_register(), tmp->as_float_reg()); ++ break; ++ case Bytecodes::_f2l: ++ __ ftintrz_l_s(tmp->as_float_reg(), src->as_float_reg()); ++ __ movfr2gr_d(dest->as_register_lo(), tmp->as_float_reg()); ++ break; ++ case Bytecodes::_d2i: ++ __ ftintrz_w_d(tmp->as_double_reg(), src->as_double_reg()); ++ __ movfr2gr_s(dest->as_register(), tmp->as_double_reg()); ++ break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { ++ if (op->init_check()) { ++ __ ld_bu(SCR1, Address(op->klass()->as_register(), InstanceKlass::init_state_offset())); ++ __ li(SCR2, InstanceKlass::fully_initialized); ++ add_debug_info_for_null_check_here(op->stub()->info()); ++ __ bne_far(SCR1, SCR2, *op->stub()->entry()); ++ } ++ __ allocate_object(op->obj()->as_register(), op->tmp1()->as_register(), ++ op->tmp2()->as_register(), op->header_size(), ++ op->object_size(), op->klass()->as_register(), ++ *op->stub()->entry()); ++ __ bind(*op->stub()->continuation()); ++} ++ ++void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { ++ Register len = op->len()->as_register(); ++ if (UseSlowPath || ++ (!UseFastNewObjectArray && is_reference_type(op->type())) || ++ (!UseFastNewTypeArray && !is_reference_type(op->type()))) { ++ __ b(*op->stub()->entry()); ++ } else { ++ Register tmp1 = op->tmp1()->as_register(); ++ Register tmp2 = op->tmp2()->as_register(); ++ Register tmp3 = op->tmp3()->as_register(); ++ if (len == tmp1) { ++ tmp1 = tmp3; ++ } else if (len == tmp2) { ++ tmp2 = tmp3; ++ } else if (len == tmp3) { ++ // everything is ok ++ } else { ++ __ move(tmp3, len); ++ } ++ __ allocate_array(op->obj()->as_register(), len, tmp1, tmp2, ++ arrayOopDesc::header_size(op->type()), ++ array_element_size(op->type()), ++ op->klass()->as_register(), ++ *op->stub()->entry()); ++ } ++ __ bind(*op->stub()->continuation()); ++} ++ ++void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, ++ Register recv, Label* update_done) { ++ for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { ++ Label next_test; ++ // See if the receiver is receiver[n]. ++ __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); ++ __ ld_ptr(SCR1, Address(SCR2)); ++ __ bne(recv, SCR1, next_test); ++ Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); ++ __ ld_ptr(SCR2, data_addr); ++ __ addi_d(SCR2, SCR2, DataLayout::counter_increment); ++ __ st_ptr(SCR2, data_addr); ++ __ b(*update_done); ++ __ bind(next_test); ++ } ++ ++ // Didn't find receiver; find next empty slot and fill it in ++ for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { ++ Label next_test; ++ __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); ++ Address recv_addr(SCR2); ++ __ ld_ptr(SCR1, recv_addr); ++ __ bnez(SCR1, next_test); ++ __ st_ptr(recv, recv_addr); ++ __ li(SCR1, DataLayout::counter_increment); ++ __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)))); ++ __ st_ptr(SCR1, Address(SCR2)); ++ __ b(*update_done); ++ __ bind(next_test); ++ } ++} ++ ++void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, ++ Label* failure, Label* obj_is_null) { ++ // we always need a stub for the failure case. ++ CodeStub* stub = op->stub(); ++ Register obj = op->object()->as_register(); ++ Register k_RInfo = op->tmp1()->as_register(); ++ Register klass_RInfo = op->tmp2()->as_register(); ++ Register dst = op->result_opr()->as_register(); ++ ciKlass* k = op->klass(); ++ Register Rtmp1 = noreg; ++ ++ // check if it needs to be profiled ++ ciMethodData* md; ++ ciProfileData* data; ++ ++ const bool should_profile = op->should_profile(); ++ ++ if (should_profile) { ++ ciMethod* method = op->profiled_method(); ++ assert(method != NULL, "Should have method"); ++ int bci = op->profiled_bci(); ++ md = method->method_data_or_null(); ++ assert(md != NULL, "Sanity"); ++ data = md->bci_to_data(bci); ++ assert(data != NULL, "need data for type check"); ++ assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); ++ } ++ ++ Label profile_cast_success, profile_cast_failure; ++ Label *success_target = should_profile ? &profile_cast_success : success; ++ Label *failure_target = should_profile ? &profile_cast_failure : failure; ++ ++ if (obj == k_RInfo) { ++ k_RInfo = dst; ++ } else if (obj == klass_RInfo) { ++ klass_RInfo = dst; ++ } ++ if (k->is_loaded() && !UseCompressedClassPointers) { ++ select_different_registers(obj, dst, k_RInfo, klass_RInfo); ++ } else { ++ Rtmp1 = op->tmp3()->as_register(); ++ select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1); ++ } ++ ++ assert_different_registers(obj, k_RInfo, klass_RInfo); ++ ++ if (should_profile) { ++ Label not_null; ++ __ bnez(obj, not_null); ++ // Object is null; update MDO and exit ++ Register mdo = klass_RInfo; ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); ++ __ ld_bu(SCR2, data_addr); ++ __ ori(SCR2, SCR2, BitData::null_seen_byte_constant()); ++ __ st_b(SCR2, data_addr); ++ __ b(*obj_is_null); ++ __ bind(not_null); ++ } else { ++ __ beqz(obj, *obj_is_null); ++ } ++ ++ if (!k->is_loaded()) { ++ klass2reg_with_patching(k_RInfo, op->info_for_patch()); ++ } else { ++ __ mov_metadata(k_RInfo, k->constant_encoding()); ++ } ++ __ verify_oop(obj); ++ ++ if (op->fast_check()) { ++ // get object class ++ // not a safepoint as obj null check happens earlier ++ __ load_klass(SCR2, obj); ++ __ bne_far(SCR2, k_RInfo, *failure_target); ++ // successful cast, fall through to profile or jump ++ } else { ++ // get object class ++ // not a safepoint as obj null check happens earlier ++ __ load_klass(klass_RInfo, obj); ++ if (k->is_loaded()) { ++ // See if we get an immediate positive hit ++ __ ld_ptr(SCR1, Address(klass_RInfo, int64_t(k->super_check_offset()))); ++ if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) { ++ __ bne_far(k_RInfo, SCR1, *failure_target); ++ // successful cast, fall through to profile or jump ++ } else { ++ // See if we get an immediate positive hit ++ __ beq_far(k_RInfo, SCR1, *success_target); ++ // check for self ++ __ beq_far(klass_RInfo, k_RInfo, *success_target); ++ ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); ++ __ ld_ptr(klass_RInfo, Address(SP, 0 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ // result is a boolean ++ __ beqz(klass_RInfo, *failure_target); ++ // successful cast, fall through to profile or jump ++ } ++ } else { ++ // perform the fast part of the checking logic ++ __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); ++ // call out-of-line instance of __ check_klass_subtype_slow_path(...): ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); ++ __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ // result is a boolean ++ __ beqz(k_RInfo, *failure_target); ++ // successful cast, fall through to profile or jump ++ } ++ } ++ if (should_profile) { ++ Register mdo = klass_RInfo, recv = k_RInfo; ++ __ bind(profile_cast_success); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ __ load_klass(recv, obj); ++ Label update_done; ++ type_profile_helper(mdo, md, data, recv, success); ++ __ b(*success); ++ ++ __ bind(profile_cast_failure); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address counter_addr = Address(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); ++ __ ld_ptr(SCR2, counter_addr); ++ __ addi_d(SCR2, SCR2, -DataLayout::counter_increment); ++ __ st_ptr(SCR2, counter_addr); ++ __ b(*failure); ++ } ++ __ b(*success); ++} ++ ++void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { ++ const bool should_profile = op->should_profile(); ++ ++ LIR_Code code = op->code(); ++ if (code == lir_store_check) { ++ Register value = op->object()->as_register(); ++ Register array = op->array()->as_register(); ++ Register k_RInfo = op->tmp1()->as_register(); ++ Register klass_RInfo = op->tmp2()->as_register(); ++ Register Rtmp1 = op->tmp3()->as_register(); ++ CodeStub* stub = op->stub(); ++ ++ // check if it needs to be profiled ++ ciMethodData* md; ++ ciProfileData* data; ++ ++ if (should_profile) { ++ ciMethod* method = op->profiled_method(); ++ assert(method != NULL, "Should have method"); ++ int bci = op->profiled_bci(); ++ md = method->method_data_or_null(); ++ assert(md != NULL, "Sanity"); ++ data = md->bci_to_data(bci); ++ assert(data != NULL, "need data for type check"); ++ assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); ++ } ++ Label profile_cast_success, profile_cast_failure, done; ++ Label *success_target = should_profile ? &profile_cast_success : &done; ++ Label *failure_target = should_profile ? &profile_cast_failure : stub->entry(); ++ ++ if (should_profile) { ++ Label not_null; ++ __ bnez(value, not_null); ++ // Object is null; update MDO and exit ++ Register mdo = klass_RInfo; ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); ++ __ ld_bu(SCR2, data_addr); ++ __ ori(SCR2, SCR2, BitData::null_seen_byte_constant()); ++ __ st_b(SCR2, data_addr); ++ __ b(done); ++ __ bind(not_null); ++ } else { ++ __ beqz(value, done); ++ } ++ ++ add_debug_info_for_null_check_here(op->info_for_exception()); ++ __ load_klass(k_RInfo, array); ++ __ load_klass(klass_RInfo, value); ++ ++ // get instance klass (it's already uncompressed) ++ __ ld_ptr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset())); ++ // perform the fast part of the checking logic ++ __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); ++ // call out-of-line instance of __ check_klass_subtype_slow_path(...): ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); ++ __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ // result is a boolean ++ __ beqz(k_RInfo, *failure_target); ++ // fall through to the success case ++ ++ if (should_profile) { ++ Register mdo = klass_RInfo, recv = k_RInfo; ++ __ bind(profile_cast_success); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ __ load_klass(recv, value); ++ Label update_done; ++ type_profile_helper(mdo, md, data, recv, &done); ++ __ b(done); ++ ++ __ bind(profile_cast_failure); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); ++ __ lea(SCR2, counter_addr); ++ __ ld_ptr(SCR1, Address(SCR2)); ++ __ addi_d(SCR1, SCR1, -DataLayout::counter_increment); ++ __ st_ptr(SCR1, Address(SCR2)); ++ __ b(*stub->entry()); ++ } ++ ++ __ bind(done); ++ } else if (code == lir_checkcast) { ++ Register obj = op->object()->as_register(); ++ Register dst = op->result_opr()->as_register(); ++ Label success; ++ emit_typecheck_helper(op, &success, op->stub()->entry(), &success); ++ __ bind(success); ++ if (dst != obj) { ++ __ move(dst, obj); ++ } ++ } else if (code == lir_instanceof) { ++ Register obj = op->object()->as_register(); ++ Register dst = op->result_opr()->as_register(); ++ Label success, failure, done; ++ emit_typecheck_helper(op, &success, &failure, &failure); ++ __ bind(failure); ++ __ move(dst, R0); ++ __ b(done); ++ __ bind(success); ++ __ li(dst, 1); ++ __ bind(done); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::casw(Register addr, Register newval, Register cmpval, bool sign) { ++ __ cmpxchg32(Address(addr, 0), cmpval, newval, SCR1, sign, ++ /* retold */ false, /* barrier */ true); ++} ++ ++void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) { ++ __ cmpxchg(Address(addr, 0), cmpval, newval, SCR1, ++ /* retold */ false, /* barrier */ true); ++} ++ ++void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { ++ assert(VM_Version::supports_cx8(), "wrong machine"); ++ Register addr; ++ if (op->addr()->is_register()) { ++ addr = as_reg(op->addr()); ++ } else { ++ assert(op->addr()->is_address(), "what else?"); ++ LIR_Address* addr_ptr = op->addr()->as_address_ptr(); ++ assert(addr_ptr->disp() == 0, "need 0 disp"); ++ assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index"); ++ addr = as_reg(addr_ptr->base()); ++ } ++ Register newval = as_reg(op->new_value()); ++ Register cmpval = as_reg(op->cmp_value()); ++ ++ if (op->code() == lir_cas_obj) { ++ if (UseCompressedOops) { ++ Register t1 = op->tmp1()->as_register(); ++ assert(op->tmp1()->is_valid(), "must be"); ++ __ encode_heap_oop(t1, cmpval); ++ cmpval = t1; ++ __ encode_heap_oop(SCR2, newval); ++ newval = SCR2; ++ casw(addr, newval, cmpval, false); ++ } else { ++ casl(addr, newval, cmpval); ++ } ++ } else if (op->code() == lir_cas_int) { ++ casw(addr, newval, cmpval, true); ++ } else { ++ casl(addr, newval, cmpval); ++ } ++} ++ ++void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, ++ LIR_Opr result, BasicType type) { ++ Unimplemented(); ++} ++ ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, ++ LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ assert(result->is_single_cpu() || result->is_double_cpu(), "expect single register for result"); ++ assert(left->is_single_cpu() || left->is_double_cpu(), "must be"); ++ Register regd = (result->type() == T_LONG) ? result->as_register_lo() : result->as_register(); ++ Register regl = as_reg(left); ++ Register regr = noreg; ++ Register reg1 = noreg; ++ Register reg2 = noreg; ++ jlong immr = 0; ++ ++ // comparison operands ++ if (right->is_single_cpu()) { ++ // cpu register - cpu register ++ regr = right->as_register(); ++ } else if (right->is_double_cpu()) { ++ // cpu register - cpu register ++ regr = right->as_register_lo(); ++ } else if (right->is_constant()) { ++ switch(right->type()) { ++ case T_INT: ++ case T_ADDRESS: ++ immr = right->as_constant_ptr()->as_jint(); ++ break; ++ case T_LONG: ++ immr = right->as_constant_ptr()->as_jlong(); ++ break; ++ case T_METADATA: ++ immr = (intptr_t)right->as_constant_ptr()->as_metadata(); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (right->as_constant_ptr()->as_jobject() != NULL) { ++ regr = SCR1; ++ jobject2reg(right->as_constant_ptr()->as_jobject(), regr); ++ } else { ++ immr = 0; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ ++ if (regr == noreg) { ++ switch (condition) { ++ case lir_cond_equal: ++ case lir_cond_notEqual: ++ if (!Assembler::is_simm(-immr, 12)) { ++ regr = SCR1; ++ __ li(regr, immr); ++ } ++ break; ++ default: ++ if (!Assembler::is_simm(immr, 12)) { ++ regr = SCR1; ++ __ li(regr, immr); ++ } ++ } ++ } ++ ++ // special cases ++ if (src1->is_constant() && src2->is_constant()) { ++ jlong val1 = 0, val2 = 0; ++ if (src1->type() == T_INT && src2->type() == T_INT) { ++ val1 = src1->as_jint(); ++ val2 = src2->as_jint(); ++ } else if (src1->type() == T_LONG && src2->type() == T_LONG) { ++ val1 = src1->as_jlong(); ++ val2 = src2->as_jlong(); ++ } ++ if (val1 == 0 && val2 == 1) { ++ if (regr == noreg) { ++ switch (condition) { ++ case lir_cond_equal: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ } else { ++ __ addi_d(SCR1, regl, -immr); ++ __ li(regd, 1); ++ __ maskeqz(regd, regd, SCR1); ++ } ++ break; ++ case lir_cond_notEqual: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ __ xori(regd, regd, 1); ++ } else { ++ __ addi_d(SCR1, regl, -immr); ++ __ li(regd, 1); ++ __ masknez(regd, regd, SCR1); ++ } ++ break; ++ case lir_cond_less: ++ __ slti(regd, regl, immr); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_lessEqual: ++ if (immr == 0) { ++ __ slt(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ slt(regd, SCR1, regl); ++ } ++ break; ++ case lir_cond_greater: ++ if (immr == 0) { ++ __ slt(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ slt(regd, SCR1, regl); ++ } ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_greaterEqual: ++ __ slti(regd, regl, immr); ++ break; ++ case lir_cond_belowEqual: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ sltu(regd, SCR1, regl); ++ } ++ break; ++ case lir_cond_aboveEqual: ++ __ sltui(regd, regl, immr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ switch (condition) { ++ case lir_cond_equal: ++ __ sub_d(SCR1, regl, regr); ++ __ li(regd, 1); ++ __ maskeqz(regd, regd, SCR1); ++ break; ++ case lir_cond_notEqual: ++ __ sub_d(SCR1, regl, regr); ++ __ li(regd, 1); ++ __ masknez(regd, regd, SCR1); ++ break; ++ case lir_cond_less: ++ __ slt(regd, regl, regr); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_lessEqual: ++ __ slt(regd, regr, regl); ++ break; ++ case lir_cond_greater: ++ __ slt(regd, regr, regl); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_greaterEqual: ++ __ slt(regd, regl, regr); ++ break; ++ case lir_cond_belowEqual: ++ __ sltu(regd, regr, regl); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltu(regd, regl, regr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ return; ++ } else if (val1 == 1 && val2 == 0) { ++ if (regr == noreg) { ++ switch (condition) { ++ case lir_cond_equal: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ __ xori(regd, regd, 1); ++ } else { ++ __ addi_d(SCR1, regl, -immr); ++ __ li(regd, 1); ++ __ masknez(regd, regd, SCR1); ++ } ++ break; ++ case lir_cond_notEqual: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ } else { ++ __ addi_d(SCR1, regl, -immr); ++ __ li(regd, 1); ++ __ maskeqz(regd, regd, SCR1); ++ } ++ break; ++ case lir_cond_less: ++ __ slti(regd, regl, immr); ++ break; ++ case lir_cond_lessEqual: ++ if (immr == 0) { ++ __ slt(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ slt(regd, SCR1, regl); ++ } ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_greater: ++ if (immr == 0) { ++ __ slt(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ slt(regd, SCR1, regl); ++ } ++ break; ++ case lir_cond_greaterEqual: ++ __ slti(regd, regl, immr); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_belowEqual: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ sltu(regd, SCR1, regl); ++ } ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltui(regd, regl, immr); ++ __ xori(regd, regd, 1); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ switch (condition) { ++ case lir_cond_equal: ++ __ sub_d(SCR1, regl, regr); ++ __ li(regd, 1); ++ __ masknez(regd, regd, SCR1); ++ break; ++ case lir_cond_notEqual: ++ __ sub_d(SCR1, regl, regr); ++ __ li(regd, 1); ++ __ maskeqz(regd, regd, SCR1); ++ break; ++ case lir_cond_less: ++ __ slt(regd, regl, regr); ++ break; ++ case lir_cond_lessEqual: ++ __ slt(regd, regr, regl); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_greater: ++ __ slt(regd, regr, regl); ++ break; ++ case lir_cond_greaterEqual: ++ __ slt(regd, regl, regr); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_belowEqual: ++ __ sltu(regd, regr, regl); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltu(regd, regl, regr); ++ __ xori(regd, regd, 1); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ return; ++ } ++ } ++ ++ // cmp ++ if (regr == noreg) { ++ switch (condition) { ++ case lir_cond_equal: ++ __ addi_d(SCR2, regl, -immr); ++ break; ++ case lir_cond_notEqual: ++ __ addi_d(SCR2, regl, -immr); ++ break; ++ case lir_cond_less: ++ __ slti(SCR2, regl, immr); ++ break; ++ case lir_cond_lessEqual: ++ __ li(SCR1, immr); ++ __ slt(SCR2, SCR1, regl); ++ break; ++ case lir_cond_greater: ++ __ li(SCR1, immr); ++ __ slt(SCR2, SCR1, regl); ++ break; ++ case lir_cond_greaterEqual: ++ __ slti(SCR2, regl, immr); ++ break; ++ case lir_cond_belowEqual: ++ __ li(SCR1, immr); ++ __ sltu(SCR2, SCR1, regl); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltui(SCR2, regl, immr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ switch (condition) { ++ case lir_cond_equal: ++ __ sub_d(SCR2, regl, regr); ++ break; ++ case lir_cond_notEqual: ++ __ sub_d(SCR2, regl, regr); ++ break; ++ case lir_cond_less: ++ __ slt(SCR2, regl, regr); ++ break; ++ case lir_cond_lessEqual: ++ __ slt(SCR2, regr, regl); ++ break; ++ case lir_cond_greater: ++ __ slt(SCR2, regr, regl); ++ break; ++ case lir_cond_greaterEqual: ++ __ slt(SCR2, regl, regr); ++ break; ++ case lir_cond_belowEqual: ++ __ sltu(SCR2, regr, regl); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltu(SCR2, regl, regr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ ++ // value operands ++ if (src1->is_stack()) { ++ stack2reg(src1, result, result->type()); ++ reg1 = regd; ++ } else if (src1->is_constant()) { ++ const2reg(src1, result, lir_patch_none, NULL); ++ reg1 = regd; ++ } else { ++ reg1 = (src1->type() == T_LONG) ? src1->as_register_lo() : src1->as_register(); ++ } ++ ++ if (src2->is_stack()) { ++ stack2reg(src2, FrameMap::scr1_opr, result->type()); ++ reg2 = SCR1; ++ } else if (src2->is_constant()) { ++ LIR_Opr tmp = src2->type() == T_LONG ? FrameMap::scr1_long_opr : FrameMap::scr1_opr; ++ const2reg(src2, tmp, lir_patch_none, NULL); ++ reg2 = SCR1; ++ } else { ++ reg2 = (src2->type() == T_LONG) ? src2->as_register_lo() : src2->as_register(); ++ } ++ ++ // cmove ++ switch (condition) { ++ case lir_cond_equal: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_notEqual: ++ __ maskeqz(regd, reg1, SCR2); ++ __ masknez(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_less: ++ __ maskeqz(regd, reg1, SCR2); ++ __ masknez(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_lessEqual: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_greater: ++ __ maskeqz(regd, reg1, SCR2); ++ __ masknez(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_greaterEqual: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_belowEqual: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_aboveEqual: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ __ OR(regd, regd, SCR2); ++} ++ ++void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, ++ CodeEmitInfo* info, bool pop_fpu_stack) { ++ assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); ++ ++ if (left->is_single_cpu()) { ++ Register lreg = left->as_register(); ++ Register dreg = as_reg(dest); ++ ++ if (right->is_single_cpu()) { ++ // cpu register - cpu register ++ assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT, "should be"); ++ Register rreg = right->as_register(); ++ switch (code) { ++ case lir_add: __ add_w (dest->as_register(), lreg, rreg); break; ++ case lir_sub: __ sub_w (dest->as_register(), lreg, rreg); break; ++ case lir_mul: __ mul_w (dest->as_register(), lreg, rreg); break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (right->is_double_cpu()) { ++ Register rreg = right->as_register_lo(); ++ // single_cpu + double_cpu: can happen with obj+long ++ assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); ++ switch (code) { ++ case lir_add: __ add_d(dreg, lreg, rreg); break; ++ case lir_sub: __ sub_d(dreg, lreg, rreg); break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (right->is_constant()) { ++ // cpu register - constant ++ jlong c; ++ ++ // FIXME: This is fugly: we really need to factor all this logic. ++ switch(right->type()) { ++ case T_LONG: ++ c = right->as_constant_ptr()->as_jlong(); ++ break; ++ case T_INT: ++ case T_ADDRESS: ++ c = right->as_constant_ptr()->as_jint(); ++ break; ++ default: ++ ShouldNotReachHere(); ++ c = 0; // unreachable ++ break; ++ } ++ ++ assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); ++ if (c == 0 && dreg == lreg) { ++ COMMENT("effective nop elided"); ++ return; ++ } ++ ++ switch(left->type()) { ++ case T_INT: ++ switch (code) { ++ case lir_add: __ addi_w(dreg, lreg, c); break; ++ case lir_sub: __ addi_w(dreg, lreg, -c); break; ++ default: ShouldNotReachHere(); ++ } ++ break; ++ case T_OBJECT: ++ case T_ADDRESS: ++ switch (code) { ++ case lir_add: __ addi_d(dreg, lreg, c); break; ++ case lir_sub: __ addi_d(dreg, lreg, -c); break; ++ default: ShouldNotReachHere(); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (left->is_double_cpu()) { ++ Register lreg_lo = left->as_register_lo(); ++ ++ if (right->is_double_cpu()) { ++ // cpu register - cpu register ++ Register rreg_lo = right->as_register_lo(); ++ switch (code) { ++ case lir_add: __ add_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ case lir_sub: __ sub_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ case lir_mul: __ mul_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ case lir_div: __ div_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ case lir_rem: __ mod_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ } else if (right->is_constant()) { ++ jlong c = right->as_constant_ptr()->as_jlong(); ++ Register dreg = as_reg(dest); ++ switch (code) { ++ case lir_add: ++ case lir_sub: ++ if (c == 0 && dreg == lreg_lo) { ++ COMMENT("effective nop elided"); ++ return; ++ } ++ code == lir_add ? __ addi_d(dreg, lreg_lo, c) : __ addi_d(dreg, lreg_lo, -c); ++ break; ++ case lir_div: ++ assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant"); ++ if (c == 1) { ++ // move lreg_lo to dreg if divisor is 1 ++ __ move(dreg, lreg_lo); ++ } else { ++ unsigned int shift = exact_log2_long(c); ++ // use scr1 as intermediate result register ++ __ srai_d(SCR1, lreg_lo, 63); ++ __ srli_d(SCR1, SCR1, 64 - shift); ++ __ add_d(SCR1, lreg_lo, SCR1); ++ __ srai_d(dreg, SCR1, shift); ++ } ++ break; ++ case lir_rem: ++ assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant"); ++ if (c == 1) { ++ // move 0 to dreg if divisor is 1 ++ __ move(dreg, R0); ++ } else { ++ // use scr1/2 as intermediate result register ++ __ sub_d(SCR1, R0, lreg_lo); ++ __ slt(SCR2, SCR1, R0); ++ __ andi(dreg, lreg_lo, c - 1); ++ __ andi(SCR1, SCR1, c - 1); ++ __ sub_d(SCR1, R0, SCR1); ++ __ maskeqz(dreg, dreg, SCR2); ++ __ masknez(SCR1, SCR1, SCR2); ++ __ OR(dreg, dreg, SCR1); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (left->is_single_fpu()) { ++ assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register"); ++ switch (code) { ++ case lir_add: __ fadd_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_sub: __ fsub_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_mul: __ fmul_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_div: __ fdiv_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (left->is_double_fpu()) { ++ if (right->is_double_fpu()) { ++ // fpu register - fpu register ++ switch (code) { ++ case lir_add: __ fadd_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_sub: __ fsub_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_mul_strictfp: // fall through ++ case lir_mul: __ fmul_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_div_strictfp: // fall through ++ case lir_div: __ fdiv_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ default: ShouldNotReachHere(); ++ } ++ } else { ++ if (right->is_constant()) { ++ ShouldNotReachHere(); ++ } ++ ShouldNotReachHere(); ++ } ++ } else if (left->is_single_stack() || left->is_address()) { ++ assert(left == dest, "left and dest must be equal"); ++ ShouldNotReachHere(); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, ++ int dest_index, bool pop_fpu_stack) { ++ Unimplemented(); ++} ++ ++void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) { ++ switch(code) { ++ case lir_abs : __ fabs_d(dest->as_double_reg(), value->as_double_reg()); break; ++ case lir_sqrt: __ fsqrt_d(dest->as_double_reg(), value->as_double_reg()); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) { ++ assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register"); ++ Register Rleft = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); ++ ++ if (dst->is_single_cpu()) { ++ Register Rdst = dst->as_register(); ++ if (right->is_constant()) { ++ switch (code) { ++ case lir_logic_and: ++ if (Assembler::is_uimm(right->as_jint(), 12)) { ++ __ andi(Rdst, Rleft, right->as_jint()); ++ } else { ++ __ li(AT, right->as_jint()); ++ __ AND(Rdst, Rleft, AT); ++ } ++ break; ++ case lir_logic_or: __ ori(Rdst, Rleft, right->as_jint()); break; ++ case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jint()); break; ++ default: ShouldNotReachHere(); break; ++ } ++ } else { ++ Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo(); ++ switch (code) { ++ case lir_logic_and: __ AND(Rdst, Rleft, Rright); break; ++ case lir_logic_or: __ OR(Rdst, Rleft, Rright); break; ++ case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break; ++ default: ShouldNotReachHere(); break; ++ } ++ } ++ } else { ++ Register Rdst = dst->as_register_lo(); ++ if (right->is_constant()) { ++ switch (code) { ++ case lir_logic_and: ++ if (Assembler::is_uimm(right->as_jlong(), 12)) { ++ __ andi(Rdst, Rleft, right->as_jlong()); ++ } else { ++ // We can guarantee that transform from HIR LogicOp is in range of ++ // uimm(12), but the common code directly generates LIR LogicAnd, ++ // and the right-operand is mask with all ones in the high bits. ++ __ li(AT, right->as_jlong()); ++ __ AND(Rdst, Rleft, AT); ++ } ++ break; ++ case lir_logic_or: __ ori(Rdst, Rleft, right->as_jlong()); break; ++ case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jlong()); break; ++ default: ShouldNotReachHere(); break; ++ } ++ } else { ++ Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo(); ++ switch (code) { ++ case lir_logic_and: __ AND(Rdst, Rleft, Rright); break; ++ case lir_logic_or: __ OR(Rdst, Rleft, Rright); break; ++ case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break; ++ default: ShouldNotReachHere(); break; ++ } ++ } ++ } ++} ++ ++void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, ++ LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) { ++ // opcode check ++ assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem"); ++ bool is_irem = (code == lir_irem); ++ ++ // operand check ++ assert(left->is_single_cpu(), "left must be register"); ++ assert(right->is_single_cpu() || right->is_constant(), "right must be register or constant"); ++ assert(result->is_single_cpu(), "result must be register"); ++ Register lreg = left->as_register(); ++ Register dreg = result->as_register(); ++ ++ // power-of-2 constant check and codegen ++ if (right->is_constant()) { ++ int c = right->as_constant_ptr()->as_jint(); ++ assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); ++ if (is_irem) { ++ if (c == 1) { ++ // move 0 to dreg if divisor is 1 ++ __ move(dreg, R0); ++ } else { ++ // use scr1/2 as intermediate result register ++ __ sub_w(SCR1, R0, lreg); ++ __ slt(SCR2, SCR1, R0); ++ __ andi(dreg, lreg, c - 1); ++ __ andi(SCR1, SCR1, c - 1); ++ __ sub_w(SCR1, R0, SCR1); ++ __ maskeqz(dreg, dreg, SCR2); ++ __ masknez(SCR1, SCR1, SCR2); ++ __ OR(dreg, dreg, SCR1); ++ } ++ } else { ++ if (c == 1) { ++ // move lreg to dreg if divisor is 1 ++ __ move(dreg, lreg); ++ } else { ++ unsigned int shift = exact_log2(c); ++ // use scr1 as intermediate result register ++ __ srai_w(SCR1, lreg, 31); ++ __ srli_w(SCR1, SCR1, 32 - shift); ++ __ add_w(SCR1, lreg, SCR1); ++ __ srai_w(dreg, SCR1, shift); ++ } ++ } ++ } else { ++ Register rreg = right->as_register(); ++ if (is_irem) ++ __ mod_w(dreg, lreg, rreg); ++ else ++ __ div_w(dreg, lreg, rreg); ++ } ++} ++ ++void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) { ++ Unimplemented(); ++} ++ ++void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){ ++ if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) { ++ bool is_unordered_less = (code == lir_ucmp_fd2i); ++ if (left->is_single_fpu()) { ++ if (is_unordered_less) { ++ __ fcmp_clt_s(FCC0, right->as_float_reg(), left->as_float_reg()); ++ __ fcmp_cult_s(FCC1, left->as_float_reg(), right->as_float_reg()); ++ } else { ++ __ fcmp_cult_s(FCC0, right->as_float_reg(), left->as_float_reg()); ++ __ fcmp_clt_s(FCC1, left->as_float_reg(), right->as_float_reg()); ++ } ++ } else if (left->is_double_fpu()) { ++ if (is_unordered_less) { ++ __ fcmp_clt_d(FCC0, right->as_double_reg(), left->as_double_reg()); ++ __ fcmp_cult_d(FCC1, left->as_double_reg(), right->as_double_reg()); ++ } else { ++ __ fcmp_cult_d(FCC0, right->as_double_reg(), left->as_double_reg()); ++ __ fcmp_clt_d(FCC1, left->as_double_reg(), right->as_double_reg()); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ __ movcf2gr(dst->as_register(), FCC0); ++ __ movcf2gr(SCR1, FCC1); ++ __ sub_d(dst->as_register(), dst->as_register(), SCR1); ++ } else if (code == lir_cmp_l2i) { ++ __ slt(SCR1, left->as_register_lo(), right->as_register_lo()); ++ __ slt(dst->as_register(), right->as_register_lo(), left->as_register_lo()); ++ __ sub_d(dst->as_register(), dst->as_register(), SCR1); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::align_call(LIR_Code code) {} ++ ++void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { ++ address call = __ trampoline_call(AddressLiteral(op->addr(), rtype)); ++ if (call == NULL) { ++ bailout("trampoline stub overflow"); ++ return; ++ } ++ add_call_info(code_offset(), op->info()); ++} ++ ++void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { ++ address call = __ ic_call(op->addr()); ++ if (call == NULL) { ++ bailout("trampoline stub overflow"); ++ return; ++ } ++ add_call_info(code_offset(), op->info()); ++} ++ ++void LIR_Assembler::emit_static_call_stub() { ++ address call_pc = __ pc(); ++ address stub = __ start_a_stub(call_stub_size()); ++ if (stub == NULL) { ++ bailout("static call stub overflow"); ++ return; ++ } ++ ++ int start = __ offset(); ++ ++ __ relocate(static_stub_Relocation::spec(call_pc)); ++ ++ // Code stream for loading method may be changed. ++ __ ibar(0); ++ ++ // Rmethod contains Method*, it should be relocated for GC ++ // static stub relocation also tags the Method* in the code-stream. ++ __ mov_metadata(Rmethod, NULL); ++ // This is recognized as unresolved by relocs/nativeInst/ic code ++ __ patchable_jump(__ pc()); ++ ++ assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() <= call_stub_size(), ++ "stub too big"); ++ __ end_a_stub(); ++} ++ ++void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) { ++ assert(exceptionOop->as_register() == A0, "must match"); ++ assert(exceptionPC->as_register() == A1, "must match"); ++ ++ // exception object is not added to oop map by LinearScan ++ // (LinearScan assumes that no oops are in fixed registers) ++ info->add_register_oop(exceptionOop); ++ Runtime1::StubID unwind_id; ++ ++ // get current pc information ++ // pc is only needed if the method has an exception handler, the unwind code does not need it. ++ if (compilation()->debug_info_recorder()->last_pc_offset() == __ offset()) { ++ // As no instructions have been generated yet for this LIR node it's ++ // possible that an oop map already exists for the current offset. ++ // In that case insert an dummy NOP here to ensure all oop map PCs ++ // are unique. See JDK-8237483. ++ __ nop(); ++ } ++ Label L; ++ int pc_for_athrow_offset = __ offset(); ++ __ bind(L); ++ __ lipc(exceptionPC->as_register(), L); ++ add_call_info(pc_for_athrow_offset, info); // for exception handler ++ ++ __ verify_not_null_oop(A0); ++ // search an exception handler (A0: exception oop, A1: throwing pc) ++ if (compilation()->has_fpu_code()) { ++ unwind_id = Runtime1::handle_exception_id; ++ } else { ++ unwind_id = Runtime1::handle_exception_nofpu_id; ++ } ++ __ call(Runtime1::entry_for(unwind_id), relocInfo::runtime_call_type); ++ ++ // FIXME: enough room for two byte trap ???? ++ __ nop(); ++} ++ ++void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) { ++ assert(exceptionOop->as_register() == A0, "must match"); ++ __ b(_unwind_handler_entry); ++} ++ ++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) { ++ Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); ++ Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); ++ ++ switch (left->type()) { ++ case T_INT: { ++ switch (code) { ++ case lir_shl: __ sll_w(dreg, lreg, count->as_register()); break; ++ case lir_shr: __ sra_w(dreg, lreg, count->as_register()); break; ++ case lir_ushr: __ srl_w(dreg, lreg, count->as_register()); break; ++ default: ShouldNotReachHere(); break; ++ } ++ break; ++ case T_LONG: ++ case T_ADDRESS: ++ case T_OBJECT: ++ switch (code) { ++ case lir_shl: __ sll_d(dreg, lreg, count->as_register()); break; ++ case lir_shr: __ sra_d(dreg, lreg, count->as_register()); break; ++ case lir_ushr: __ srl_d(dreg, lreg, count->as_register()); break; ++ default: ShouldNotReachHere(); break; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++} ++ ++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) { ++ Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); ++ Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); ++ ++ switch (left->type()) { ++ case T_INT: { ++ switch (code) { ++ case lir_shl: __ slli_w(dreg, lreg, count); break; ++ case lir_shr: __ srai_w(dreg, lreg, count); break; ++ case lir_ushr: __ srli_w(dreg, lreg, count); break; ++ default: ShouldNotReachHere(); break; ++ } ++ break; ++ case T_LONG: ++ case T_ADDRESS: ++ case T_OBJECT: ++ switch (code) { ++ case lir_shl: __ slli_d(dreg, lreg, count); break; ++ case lir_shr: __ srai_d(dreg, lreg, count); break; ++ case lir_ushr: __ srli_d(dreg, lreg, count); break; ++ default: ShouldNotReachHere(); break; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++} ++ ++void LIR_Assembler::store_parameter(Register r, int offset_from_sp_in_words) { ++ assert(offset_from_sp_in_words >= 0, "invalid offset from sp"); ++ int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord; ++ assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); ++ __ st_ptr(r, Address(SP, offset_from_sp_in_bytes)); ++} ++ ++void LIR_Assembler::store_parameter(jint c, int offset_from_sp_in_words) { ++ assert(offset_from_sp_in_words >= 0, "invalid offset from sp"); ++ int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord; ++ assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); ++ __ li(SCR2, c); ++ __ st_ptr(SCR2, Address(SP, offset_from_sp_in_bytes)); ++} ++ ++void LIR_Assembler::store_parameter(jobject o, int offset_from_sp_in_words) { ++ ShouldNotReachHere(); ++} ++ ++// This code replaces a call to arraycopy; no exception may ++// be thrown in this code, they must be thrown in the System.arraycopy ++// activation frame; we could save some checks if this would not be the case ++void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { ++ Register j_rarg0 = T0; ++ Register j_rarg1 = A0; ++ Register j_rarg2 = A1; ++ Register j_rarg3 = A2; ++ Register j_rarg4 = A3; ++ ++ ciArrayKlass* default_type = op->expected_type(); ++ Register src = op->src()->as_register(); ++ Register dst = op->dst()->as_register(); ++ Register src_pos = op->src_pos()->as_register(); ++ Register dst_pos = op->dst_pos()->as_register(); ++ Register length = op->length()->as_register(); ++ Register tmp = op->tmp()->as_register(); ++ ++ CodeStub* stub = op->stub(); ++ int flags = op->flags(); ++ BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; ++ if (is_reference_type(basic_type)) ++ basic_type = T_OBJECT; ++ ++ // if we don't know anything, just go through the generic arraycopy ++ if (default_type == NULL) { ++ Label done; ++ assert(src == T0 && src_pos == A0, "mismatch in calling convention"); ++ ++ // Save the arguments in case the generic arraycopy fails and we ++ // have to fall back to the JNI stub ++ __ st_ptr(dst, Address(SP, 0 * BytesPerWord)); ++ __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); ++ __ st_ptr(length, Address(SP, 2 * BytesPerWord)); ++ __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord)); ++ __ st_ptr(src, Address(SP, 4 * BytesPerWord)); ++ ++ address copyfunc_addr = StubRoutines::generic_arraycopy(); ++ assert(copyfunc_addr != NULL, "generic arraycopy stub required"); ++ ++ // The arguments are in java calling convention so we shift them ++ // to C convention ++ assert_different_registers(A4, j_rarg0, j_rarg1, j_rarg2, j_rarg3); ++ __ move(A4, j_rarg4); ++ assert_different_registers(A3, j_rarg0, j_rarg1, j_rarg2); ++ __ move(A3, j_rarg3); ++ assert_different_registers(A2, j_rarg0, j_rarg1); ++ __ move(A2, j_rarg2); ++ assert_different_registers(A1, j_rarg0); ++ __ move(A1, j_rarg1); ++ __ move(A0, j_rarg0); ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ __ li(SCR2, (address)&Runtime1::_generic_arraycopystub_cnt); ++ __ increment(SCR2, 1); ++ } ++#endif ++ __ call(copyfunc_addr, relocInfo::runtime_call_type); ++ ++ __ beqz(A0, *stub->continuation()); ++ __ move(tmp, A0); ++ ++ // Reload values from the stack so they are where the stub ++ // expects them. ++ __ ld_ptr(dst, Address(SP, 0 * BytesPerWord)); ++ __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); ++ __ ld_ptr(length, Address(SP, 2 * BytesPerWord)); ++ __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord)); ++ __ ld_ptr(src, Address(SP, 4 * BytesPerWord)); ++ ++ // tmp is -1^K where K == partial copied count ++ __ nor(SCR1, tmp, R0); ++ // adjust length down and src/end pos up by partial copied count ++ __ sub_w(length, length, SCR1); ++ __ add_w(src_pos, src_pos, SCR1); ++ __ add_w(dst_pos, dst_pos, SCR1); ++ __ b(*stub->entry()); ++ ++ __ bind(*stub->continuation()); ++ return; ++ } ++ ++ assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), ++ "must be true at this point"); ++ ++ int elem_size = type2aelembytes(basic_type); ++ Address::ScaleFactor scale = Address::times(elem_size); ++ ++ Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes()); ++ Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes()); ++ Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes()); ++ Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes()); ++ ++ // test for NULL ++ if (flags & LIR_OpArrayCopy::src_null_check) { ++ __ beqz(src, *stub->entry()); ++ } ++ if (flags & LIR_OpArrayCopy::dst_null_check) { ++ __ beqz(dst, *stub->entry()); ++ } ++ ++ // If the compiler was not able to prove that exact type of the source or the destination ++ // of the arraycopy is an array type, check at runtime if the source or the destination is ++ // an instance type. ++ if (flags & LIR_OpArrayCopy::type_check) { ++ if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) { ++ __ load_klass(tmp, dst); ++ __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset()))); ++ __ li(SCR2, (jlong) Klass::_lh_neutral_value); ++ __ bge_far(SCR1, SCR2, *stub->entry(), true); ++ } ++ ++ if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) { ++ __ load_klass(tmp, src); ++ __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset()))); ++ __ li(SCR2, (jlong) Klass::_lh_neutral_value); ++ __ bge_far(SCR1, SCR2, *stub->entry(), true); ++ } ++ } ++ ++ // check if negative ++ if (flags & LIR_OpArrayCopy::src_pos_positive_check) { ++ __ blt_far(src_pos, R0, *stub->entry(), true); ++ } ++ if (flags & LIR_OpArrayCopy::dst_pos_positive_check) { ++ __ blt_far(dst_pos, R0, *stub->entry(), true); ++ } ++ ++ if (flags & LIR_OpArrayCopy::length_positive_check) { ++ __ blt_far(length, R0, *stub->entry(), true); ++ } ++ ++ if (flags & LIR_OpArrayCopy::src_range_check) { ++ __ add_w(tmp, src_pos, length); ++ __ ld_wu(SCR1, src_length_addr); ++ __ blt_far(SCR1, tmp, *stub->entry(), false); ++ } ++ if (flags & LIR_OpArrayCopy::dst_range_check) { ++ __ add_w(tmp, dst_pos, length); ++ __ ld_wu(SCR1, dst_length_addr); ++ __ blt_far(SCR1, tmp, *stub->entry(), false); ++ } ++ ++ if (flags & LIR_OpArrayCopy::type_check) { ++ // We don't know the array types are compatible ++ if (basic_type != T_OBJECT) { ++ // Simple test for basic type arrays ++ if (UseCompressedClassPointers) { ++ __ ld_wu(tmp, src_klass_addr); ++ __ ld_wu(SCR1, dst_klass_addr); ++ } else { ++ __ ld_ptr(tmp, src_klass_addr); ++ __ ld_ptr(SCR1, dst_klass_addr); ++ } ++ __ bne_far(tmp, SCR1, *stub->entry()); ++ } else { ++ // For object arrays, if src is a sub class of dst then we can ++ // safely do the copy. ++ Label cont, slow; ++ ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(dst, Address(SP, 0 * wordSize)); ++ __ st_ptr(src, Address(SP, 1 * wordSize)); ++ ++ __ load_klass(src, src); ++ __ load_klass(dst, dst); ++ ++ __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL); ++ ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(dst, Address(SP, 0 * wordSize)); ++ __ st_ptr(src, Address(SP, 1 * wordSize)); ++ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); ++ __ ld_ptr(dst, Address(SP, 0 * wordSize)); ++ __ ld_ptr(src, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ ++ __ bnez(dst, cont); ++ ++ __ bind(slow); ++ __ ld_ptr(dst, Address(SP, 0 * wordSize)); ++ __ ld_ptr(src, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ ++ address copyfunc_addr = StubRoutines::checkcast_arraycopy(); ++ if (copyfunc_addr != NULL) { // use stub if available ++ // src is not a sub class of dst so we have to do a ++ // per-element check. ++ ++ int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray; ++ if ((flags & mask) != mask) { ++ // Check that at least both of them object arrays. ++ assert(flags & mask, "one of the two should be known to be an object array"); ++ ++ if (!(flags & LIR_OpArrayCopy::src_objarray)) { ++ __ load_klass(tmp, src); ++ } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { ++ __ load_klass(tmp, dst); ++ } ++ int lh_offset = in_bytes(Klass::layout_helper_offset()); ++ Address klass_lh_addr(tmp, lh_offset); ++ jint objArray_lh = Klass::array_layout_helper(T_OBJECT); ++ __ ld_w(SCR1, klass_lh_addr); ++ __ li(SCR2, objArray_lh); ++ __ XOR(SCR1, SCR1, SCR2); ++ __ bnez(SCR1, *stub->entry()); ++ } ++ ++ // Spill because stubs can use any register they like and it's ++ // easier to restore just those that we care about. ++ __ st_ptr(dst, Address(SP, 0 * BytesPerWord)); ++ __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); ++ __ st_ptr(length, Address(SP, 2 * BytesPerWord)); ++ __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord)); ++ __ st_ptr(src, Address(SP, 4 * BytesPerWord)); ++ ++ __ lea(A0, Address(src, src_pos, scale)); ++ __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type)); ++ assert_different_registers(A0, dst, dst_pos, length); ++ __ load_klass(A4, dst); ++ assert_different_registers(A4, dst, dst_pos, length); ++ __ lea(A1, Address(dst, dst_pos, scale)); ++ __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type)); ++ assert_different_registers(A1, length); ++ __ bstrpick_d(A2, length, 31, 0); ++ __ ld_ptr(A4, Address(A4, ObjArrayKlass::element_klass_offset())); ++ __ ld_w(A3, Address(A4, Klass::super_check_offset_offset())); ++ __ call(copyfunc_addr, relocInfo::runtime_call_type); ++ ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ Label failed; ++ __ bnez(A0, failed); ++ __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_cnt); ++ __ increment(SCR2, 1); ++ __ bind(failed); ++ } ++#endif ++ ++ __ beqz(A0, *stub->continuation()); ++ ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_attempt_cnt); ++ __ increment(SCR2, 1); ++ } ++#endif ++ assert_different_registers(dst, dst_pos, length, src_pos, src, tmp, SCR1); ++ __ move(tmp, A0); ++ ++ // Restore previously spilled arguments ++ __ ld_ptr(dst, Address(SP, 0 * BytesPerWord)); ++ __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); ++ __ ld_ptr(length, Address(SP, 2 * BytesPerWord)); ++ __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord)); ++ __ ld_ptr(src, Address(SP, 4 * BytesPerWord)); ++ ++ // return value is -1^K where K is partial copied count ++ __ nor(SCR1, tmp, R0); ++ // adjust length down and src/end pos up by partial copied count ++ __ sub_w(length, length, SCR1); ++ __ add_w(src_pos, src_pos, SCR1); ++ __ add_w(dst_pos, dst_pos, SCR1); ++ } ++ ++ __ b(*stub->entry()); ++ ++ __ bind(cont); ++ __ ld_ptr(dst, Address(SP, 0 * wordSize)); ++ __ ld_ptr(src, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ } ++ } ++ ++#ifdef ASSERT ++ if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { ++ // Sanity check the known type with the incoming class. For the ++ // primitive case the types must match exactly with src.klass and ++ // dst.klass each exactly matching the default type. For the ++ // object array case, if no type check is needed then either the ++ // dst type is exactly the expected type and the src type is a ++ // subtype which we can't check or src is the same array as dst ++ // but not necessarily exactly of type default_type. ++ Label known_ok, halt; ++ __ mov_metadata(tmp, default_type->constant_encoding()); ++ if (UseCompressedClassPointers) { ++ __ encode_klass_not_null(tmp); ++ } ++ ++ if (basic_type != T_OBJECT) { ++ ++ if (UseCompressedClassPointers) { ++ __ ld_wu(SCR1, dst_klass_addr); ++ } else { ++ __ ld_ptr(SCR1, dst_klass_addr); ++ } ++ __ bne(tmp, SCR1, halt); ++ if (UseCompressedClassPointers) { ++ __ ld_wu(SCR1, src_klass_addr); ++ } else { ++ __ ld_ptr(SCR1, src_klass_addr); ++ } ++ __ beq(tmp, SCR1, known_ok); ++ } else { ++ if (UseCompressedClassPointers) { ++ __ ld_wu(SCR1, dst_klass_addr); ++ } else { ++ __ ld_ptr(SCR1, dst_klass_addr); ++ } ++ __ beq(tmp, SCR1, known_ok); ++ __ beq(src, dst, known_ok); ++ } ++ __ bind(halt); ++ __ stop("incorrect type information in arraycopy"); ++ __ bind(known_ok); ++ } ++#endif ++ ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ __ li(SCR2, Runtime1::arraycopy_count_address(basic_type)); ++ __ increment(SCR2, 1); ++ } ++#endif ++ ++ __ lea(A0, Address(src, src_pos, scale)); ++ __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type)); ++ assert_different_registers(A0, dst, dst_pos, length); ++ __ lea(A1, Address(dst, dst_pos, scale)); ++ __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type)); ++ assert_different_registers(A1, length); ++ __ bstrpick_d(A2, length, 31, 0); ++ ++ bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0; ++ bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0; ++ const char *name; ++ address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false); ++ ++ CodeBlob *cb = CodeCache::find_blob(entry); ++ if (cb) { ++ __ call(entry, relocInfo::runtime_call_type); ++ } else { ++ __ call_VM_leaf(entry, 3); ++ } ++ ++ __ bind(*stub->continuation()); ++} ++ ++void LIR_Assembler::emit_lock(LIR_OpLock* op) { ++ Register obj = op->obj_opr()->as_register(); // may not be an oop ++ Register hdr = op->hdr_opr()->as_register(); ++ Register lock = op->lock_opr()->as_register(); ++ if (!UseFastLocking) { ++ __ b(*op->stub()->entry()); ++ } else if (op->code() == lir_lock) { ++ Register scratch = noreg; ++ if (UseBiasedLocking) { ++ scratch = op->scratch_opr()->as_register(); ++ } ++ assert(BasicLock::displaced_header_offset_in_bytes() == 0, ++ "lock_reg must point to the displaced header"); ++ // add debug info for NullPointerException only if one is possible ++ int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); ++ if (op->info() != NULL) { ++ add_debug_info_for_null_check(null_check_offset, op->info()); ++ } ++ // done ++ } else if (op->code() == lir_unlock) { ++ assert(BasicLock::displaced_header_offset_in_bytes() == 0, ++ "lock_reg must point to the displaced header"); ++ __ unlock_object(hdr, obj, lock, *op->stub()->entry()); ++ } else { ++ Unimplemented(); ++ } ++ __ bind(*op->stub()->continuation()); ++} ++ ++void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { ++ ciMethod* method = op->profiled_method(); ++ ciMethod* callee = op->profiled_callee(); ++ int bci = op->profiled_bci(); ++ ++ // Update counter for all call types ++ ciMethodData* md = method->method_data_or_null(); ++ assert(md != NULL, "Sanity"); ++ ciProfileData* data = md->bci_to_data(bci); ++ assert(data != NULL && data->is_CounterData(), "need CounterData for calls"); ++ assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); ++ Register mdo = op->mdo()->as_register(); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); ++ // Perform additional virtual call profiling for invokevirtual and ++ // invokeinterface bytecodes ++ if (op->should_profile_receiver_type()) { ++ assert(op->recv()->is_single_cpu(), "recv must be allocated"); ++ Register recv = op->recv()->as_register(); ++ assert_different_registers(mdo, recv); ++ assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); ++ ciKlass* known_klass = op->known_holder(); ++ if (C1OptimizeVirtualCallProfiling && known_klass != NULL) { ++ // We know the type that will be seen at this call site; we can ++ // statically update the MethodData* rather than needing to do ++ // dynamic tests on the receiver type ++ ++ // NOTE: we should probably put a lock around this search to ++ // avoid collisions by concurrent compilations ++ ciVirtualCallData* vc_data = (ciVirtualCallData*) data; ++ uint i; ++ for (i = 0; i < VirtualCallData::row_limit(); i++) { ++ ciKlass* receiver = vc_data->receiver(i); ++ if (known_klass->equals(receiver)) { ++ Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); ++ __ ld_ptr(SCR2, data_addr); ++ __ addi_d(SCR2, SCR2, DataLayout::counter_increment); ++ __ st_ptr(SCR2, data_addr); ++ return; ++ } ++ } ++ ++ // Receiver type not found in profile data; select an empty slot ++ ++ // Note that this is less efficient than it should be because it ++ // always does a write to the receiver part of the ++ // VirtualCallData rather than just the first time ++ for (i = 0; i < VirtualCallData::row_limit(); i++) { ++ ciKlass* receiver = vc_data->receiver(i); ++ if (receiver == NULL) { ++ Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); ++ __ mov_metadata(SCR2, known_klass->constant_encoding()); ++ __ lea(SCR1, recv_addr); ++ __ st_ptr(SCR2, SCR1, 0); ++ Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); ++ __ ld_ptr(SCR2, data_addr); ++ __ addi_d(SCR2, SCR1, DataLayout::counter_increment); ++ __ st_ptr(SCR2, data_addr); ++ return; ++ } ++ } ++ } else { ++ __ load_klass(recv, recv); ++ Label update_done; ++ type_profile_helper(mdo, md, data, recv, &update_done); ++ // Receiver did not match any saved receiver and there is no empty row for it. ++ // Increment total counter to indicate polymorphic case. ++ __ ld_ptr(SCR2, counter_addr); ++ __ addi_d(SCR2, SCR2, DataLayout::counter_increment); ++ __ st_ptr(SCR2, counter_addr); ++ ++ __ bind(update_done); ++ } ++ } else { ++ // Static call ++ __ ld_ptr(SCR2, counter_addr); ++ __ addi_d(SCR2, SCR2, DataLayout::counter_increment); ++ __ st_ptr(SCR2, counter_addr); ++ } ++} ++ ++void LIR_Assembler::emit_delay(LIR_OpDelay*) { ++ Unimplemented(); ++} ++ ++void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) { ++ __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no)); ++} ++ ++void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { ++ assert(op->crc()->is_single_cpu(), "crc must be register"); ++ assert(op->val()->is_single_cpu(), "byte value must be register"); ++ assert(op->result_opr()->is_single_cpu(), "result must be register"); ++ Register crc = op->crc()->as_register(); ++ Register val = op->val()->as_register(); ++ Register res = op->result_opr()->as_register(); ++ ++ assert_different_registers(val, crc, res); ++ __ li(res, StubRoutines::crc_table_addr()); ++ __ nor(crc, crc, R0); // ~crc ++ __ update_byte_crc32(crc, val, res); ++ __ nor(res, crc, R0); // ~crc ++} ++ ++void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { ++ COMMENT("emit_profile_type {"); ++ Register obj = op->obj()->as_register(); ++ Register tmp = op->tmp()->as_pointer_register(); ++ Address mdo_addr = as_Address(op->mdp()->as_address_ptr()); ++ ciKlass* exact_klass = op->exact_klass(); ++ intptr_t current_klass = op->current_klass(); ++ bool not_null = op->not_null(); ++ bool no_conflict = op->no_conflict(); ++ ++ Label update, next, none; ++ ++ bool do_null = !not_null; ++ bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass; ++ bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set; ++ ++ assert(do_null || do_update, "why are we here?"); ++ assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); ++ assert(mdo_addr.base() != SCR1, "wrong register"); ++ ++ __ verify_oop(obj); ++ ++ if (tmp != obj) { ++ __ move(tmp, obj); ++ } ++ if (do_null) { ++ __ bnez(tmp, update); ++ if (!TypeEntries::was_null_seen(current_klass)) { ++ __ ld_ptr(SCR2, mdo_addr); ++ __ ori(SCR2, SCR2, TypeEntries::null_seen); ++ __ st_ptr(SCR2, mdo_addr); ++ } ++ if (do_update) { ++#ifndef ASSERT ++ __ b(next); ++ } ++#else ++ __ b(next); ++ } ++ } else { ++ __ bnez(tmp, update); ++ __ stop("unexpected null obj"); ++#endif ++ } ++ ++ __ bind(update); ++ ++ if (do_update) { ++#ifdef ASSERT ++ if (exact_klass != NULL) { ++ Label ok; ++ __ load_klass(tmp, tmp); ++ __ mov_metadata(SCR1, exact_klass->constant_encoding()); ++ __ XOR(SCR1, tmp, SCR1); ++ __ beqz(SCR1, ok); ++ __ stop("exact klass and actual klass differ"); ++ __ bind(ok); ++ } ++#endif ++ if (!no_conflict) { ++ if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) { ++ if (exact_klass != NULL) { ++ __ mov_metadata(tmp, exact_klass->constant_encoding()); ++ } else { ++ __ load_klass(tmp, tmp); ++ } ++ ++ __ ld_ptr(SCR2, mdo_addr); ++ __ XOR(tmp, tmp, SCR2); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ __ bstrpick_d(SCR1, tmp, 63, 2); ++ // klass seen before, nothing to do. The unknown bit may have been ++ // set already but no need to check. ++ __ beqz(SCR1, next); ++ ++ __ andi(SCR1, tmp, TypeEntries::type_unknown); ++ __ bnez(SCR1, next); // already unknown. Nothing to do anymore. ++ ++ if (TypeEntries::is_type_none(current_klass)) { ++ __ beqz(SCR2, none); ++ __ li(SCR1, (u1)TypeEntries::null_seen); ++ __ beq(SCR2, SCR1, none); ++ // There is a chance that the checks above (re-reading profiling ++ // data from memory) fail if another thread has just set the ++ // profiling to this obj's klass ++ membar_acquire(); ++ __ ld_ptr(SCR2, mdo_addr); ++ __ XOR(tmp, tmp, SCR2); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ __ bstrpick_d(SCR1, tmp, 63, 2); ++ __ beqz(SCR1, next); ++ } ++ } else { ++ assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && ++ ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only"); ++ ++ __ ld_ptr(tmp, mdo_addr); ++ __ andi(SCR2, tmp, TypeEntries::type_unknown); ++ __ bnez(SCR2, next); // already unknown. Nothing to do anymore. ++ } ++ ++ // different than before. Cannot keep accurate profile. ++ __ ld_ptr(SCR2, mdo_addr); ++ __ ori(SCR2, SCR2, TypeEntries::type_unknown); ++ __ st_ptr(SCR2, mdo_addr); ++ ++ if (TypeEntries::is_type_none(current_klass)) { ++ __ b(next); ++ ++ __ bind(none); ++ // first time here. Set profile type. ++ __ st_ptr(tmp, mdo_addr); ++ } ++ } else { ++ // There's a single possible klass at this profile point ++ assert(exact_klass != NULL, "should be"); ++ if (TypeEntries::is_type_none(current_klass)) { ++ __ mov_metadata(tmp, exact_klass->constant_encoding()); ++ __ ld_ptr(SCR2, mdo_addr); ++ __ XOR(tmp, tmp, SCR2); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ __ bstrpick_d(SCR1, tmp, 63, 2); ++ __ beqz(SCR1, next); ++#ifdef ASSERT ++ { ++ Label ok; ++ __ ld_ptr(SCR1, mdo_addr); ++ __ beqz(SCR1, ok); ++ __ li(SCR2, (u1)TypeEntries::null_seen); ++ __ beq(SCR1, SCR2, ok); ++ // may have been set by another thread ++ membar_acquire(); ++ __ mov_metadata(SCR1, exact_klass->constant_encoding()); ++ __ ld_ptr(SCR2, mdo_addr); ++ __ XOR(SCR2, SCR1, SCR2); ++ assert(TypeEntries::type_mask == -2, "must be"); ++ __ bstrpick_d(SCR2, SCR2, 63, 1); ++ __ beqz(SCR2, ok); ++ ++ __ stop("unexpected profiling mismatch"); ++ __ bind(ok); ++ } ++#endif ++ // first time here. Set profile type. ++ __ st_ptr(tmp, mdo_addr); ++ } else { ++ assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && ++ ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent"); ++ ++ __ ld_ptr(tmp, mdo_addr); ++ __ andi(SCR1, tmp, TypeEntries::type_unknown); ++ __ bnez(SCR1, next); // already unknown. Nothing to do anymore. ++ ++ __ ori(tmp, tmp, TypeEntries::type_unknown); ++ __ st_ptr(tmp, mdo_addr); ++ // FIXME: Write barrier needed here? ++ } ++ } ++ ++ __ bind(next); ++ } ++ COMMENT("} emit_profile_type"); ++} ++ ++void LIR_Assembler::align_backward_branch_target() {} ++ ++void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) { ++ // tmp must be unused ++ assert(tmp->is_illegal(), "wasting a register if tmp is allocated"); ++ ++ if (left->is_single_cpu()) { ++ assert(dest->is_single_cpu(), "expect single result reg"); ++ __ sub_w(dest->as_register(), R0, left->as_register()); ++ } else if (left->is_double_cpu()) { ++ assert(dest->is_double_cpu(), "expect double result reg"); ++ __ sub_d(dest->as_register_lo(), R0, left->as_register_lo()); ++ } else if (left->is_single_fpu()) { ++ assert(dest->is_single_fpu(), "expect single float result reg"); ++ __ fneg_s(dest->as_float_reg(), left->as_float_reg()); ++ } else { ++ assert(left->is_double_fpu(), "expect double float operand reg"); ++ assert(dest->is_double_fpu(), "expect double float result reg"); ++ __ fneg_d(dest->as_double_reg(), left->as_double_reg()); ++ } ++} ++ ++void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, ++ CodeEmitInfo* info) { ++ if (patch_code != lir_patch_none) { ++ deoptimize_trap(info); ++ return; ++ } ++ ++ __ lea(dest->as_register_lo(), as_Address(addr->as_address_ptr())); ++} ++ ++void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, ++ LIR_Opr tmp, CodeEmitInfo* info) { ++ assert(!tmp->is_valid(), "don't need temporary"); ++ __ call(dest, relocInfo::runtime_call_type); ++ if (info != NULL) { ++ add_call_info_here(info); ++ } ++} ++ ++void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, ++ CodeEmitInfo* info) { ++ if (dest->is_address() || src->is_address()) { ++ move_op(src, dest, type, lir_patch_none, info, ++ /*pop_fpu_stack*/false, /*unaligned*/false, /*wide*/false); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++#ifdef ASSERT ++// emit run-time assertion ++void LIR_Assembler::emit_assert(LIR_OpAssert* op) { ++ assert(op->code() == lir_assert, "must be"); ++ Label ok; ++ ++ if (op->in_opr1()->is_valid()) { ++ assert(op->in_opr2()->is_valid(), "both operands must be valid"); ++ assert(op->in_opr1()->is_cpu_register() || op->in_opr2()->is_cpu_register(), "must be"); ++ Register reg1 = as_reg(op->in_opr1()); ++ Register reg2 = as_reg(op->in_opr2()); ++ switch (op->condition()) { ++ case lir_cond_equal: __ beq(reg1, reg2, ok); break; ++ case lir_cond_notEqual: __ bne(reg1, reg2, ok); break; ++ case lir_cond_less: __ blt(reg1, reg2, ok); break; ++ case lir_cond_lessEqual: __ bge(reg2, reg1, ok); break; ++ case lir_cond_greaterEqual: __ bge(reg1, reg2, ok); break; ++ case lir_cond_greater: __ blt(reg2, reg1, ok); break; ++ case lir_cond_belowEqual: __ bgeu(reg2, reg1, ok); break; ++ case lir_cond_aboveEqual: __ bgeu(reg1, reg2, ok); break; ++ default: ShouldNotReachHere(); ++ } ++ } else { ++ assert(op->in_opr2()->is_illegal(), "both operands must be illegal"); ++ assert(op->condition() == lir_cond_always, "no other conditions allowed"); ++ } ++ if (op->halt()) { ++ const char* str = __ code_string(op->msg()); ++ __ stop(str); ++ } else { ++ breakpoint(); ++ } ++ __ bind(ok); ++} ++#endif ++ ++#ifndef PRODUCT ++#define COMMENT(x) do { __ block_comment(x); } while (0) ++#else ++#define COMMENT(x) ++#endif ++ ++void LIR_Assembler::membar() { ++ COMMENT("membar"); ++ __ membar(Assembler::AnyAny); ++} ++ ++void LIR_Assembler::membar_acquire() { ++ __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad | Assembler::LoadStore)); ++} ++ ++void LIR_Assembler::membar_release() { ++ __ membar(Assembler::Membar_mask_bits(Assembler::LoadStore|Assembler::StoreStore)); ++} ++ ++void LIR_Assembler::membar_loadload() { ++ __ membar(Assembler::LoadLoad); ++} ++ ++void LIR_Assembler::membar_storestore() { ++ __ membar(MacroAssembler::StoreStore); ++} ++ ++void LIR_Assembler::membar_loadstore() { ++ __ membar(MacroAssembler::LoadStore); ++} ++ ++void LIR_Assembler::membar_storeload() { ++ __ membar(MacroAssembler::StoreLoad); ++} ++ ++void LIR_Assembler::on_spin_wait() { ++ Unimplemented(); ++} ++ ++void LIR_Assembler::get_thread(LIR_Opr result_reg) { ++ __ move(result_reg->as_register(), TREG); ++} ++ ++void LIR_Assembler::peephole(LIR_List *lir) { ++} ++ ++void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, ++ LIR_Opr dest, LIR_Opr tmp_op) { ++ Address addr = as_Address(src->as_address_ptr()); ++ BasicType type = src->type(); ++ Register dst = as_reg(dest); ++ Register tmp = as_reg(tmp_op); ++ bool is_oop = is_reference_type(type); ++ ++ if (Assembler::is_simm(addr.disp(), 12)) { ++ __ addi_d(tmp, addr.base(), addr.disp()); ++ } else { ++ __ li(tmp, addr.disp()); ++ __ add_d(tmp, addr.base(), tmp); ++ } ++ if (addr.index() != noreg) { ++ if (addr.scale() > Address::times_1) ++ __ alsl_d(tmp, addr.index(), tmp, addr.scale() - 1); ++ else ++ __ add_d(tmp, tmp, addr.index()); ++ } ++ ++ switch(type) { ++ case T_INT: ++ break; ++ case T_LONG: ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (UseCompressedOops) { ++ // unsigned int ++ } else { ++ // long ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ if (code == lir_xadd) { ++ Register inc = noreg; ++ if (data->is_constant()) { ++ inc = SCR1; ++ __ li(inc, as_long(data)); ++ } else { ++ inc = as_reg(data); ++ } ++ switch(type) { ++ case T_INT: ++ __ amadd_db_w(dst, inc, tmp); ++ break; ++ case T_LONG: ++ __ amadd_db_d(dst, inc, tmp); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (UseCompressedOops) { ++ __ amadd_db_w(dst, inc, tmp); ++ __ lu32i_d(dst, 0); ++ } else { ++ __ amadd_db_d(dst, inc, tmp); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (code == lir_xchg) { ++ Register obj = as_reg(data); ++ if (is_oop && UseCompressedOops) { ++ __ encode_heap_oop(SCR2, obj); ++ obj = SCR2; ++ } ++ switch(type) { ++ case T_INT: ++ __ amswap_db_w(dst, obj, tmp); ++ break; ++ case T_LONG: ++ __ amswap_db_d(dst, obj, tmp); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (UseCompressedOops) { ++ __ amswap_db_w(dst, obj, tmp); ++ __ lu32i_d(dst, 0); ++ } else { ++ __ amswap_db_d(dst, obj, tmp); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ if (is_oop && UseCompressedOops) { ++ __ decode_heap_oop(dst); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++#undef __ +diff --git a/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp +new file mode 100644 +index 0000000000..72a80f37c4 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp +@@ -0,0 +1,1396 @@ ++/* ++ * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "c1/c1_Compilation.hpp" ++#include "c1/c1_FrameMap.hpp" ++#include "c1/c1_Instruction.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_LIRGenerator.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "c1/c1_ValueStack.hpp" ++#include "ci/ciArray.hpp" ++#include "ci/ciObjArrayKlass.hpp" ++#include "ci/ciTypeArrayKlass.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#ifdef ASSERT ++#define __ gen()->lir(__FILE__, __LINE__)-> ++#else ++#define __ gen()->lir()-> ++#endif ++ ++// Item will be loaded into a byte register; Intel only ++void LIRItem::load_byte_item() { ++ load_item(); ++} ++ ++void LIRItem::load_nonconstant() { ++ LIR_Opr r = value()->operand(); ++ if (r->is_constant()) { ++ _result = r; ++ } else { ++ load_item(); ++ } ++} ++ ++//-------------------------------------------------------------- ++// LIRGenerator ++//-------------------------------------------------------------- ++ ++LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::a0_oop_opr; } ++LIR_Opr LIRGenerator::exceptionPcOpr() { return FrameMap::a1_opr; } ++LIR_Opr LIRGenerator::divInOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::divOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::remOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::shiftCountOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::syncLockOpr() { return new_register(T_INT); } ++LIR_Opr LIRGenerator::syncTempOpr() { return FrameMap::a0_opr; } ++LIR_Opr LIRGenerator::getThreadTemp() { return LIR_OprFact::illegalOpr; } ++ ++LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) { ++ LIR_Opr opr; ++ switch (type->tag()) { ++ case intTag: opr = FrameMap::a0_opr; break; ++ case objectTag: opr = FrameMap::a0_oop_opr; break; ++ case longTag: opr = FrameMap::long0_opr; break; ++ case floatTag: opr = FrameMap::fpu0_float_opr; break; ++ case doubleTag: opr = FrameMap::fpu0_double_opr; break; ++ case addressTag: ++ default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr; ++ } ++ ++ assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch"); ++ return opr; ++} ++ ++LIR_Opr LIRGenerator::rlock_byte(BasicType type) { ++ LIR_Opr reg = new_register(T_INT); ++ set_vreg_flag(reg, LIRGenerator::byte_reg); ++ return reg; ++} ++ ++//--------- loading items into registers -------------------------------- ++ ++bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const { ++ if (v->type()->as_IntConstant() != NULL) { ++ return v->type()->as_IntConstant()->value() == 0L; ++ } else if (v->type()->as_LongConstant() != NULL) { ++ return v->type()->as_LongConstant()->value() == 0L; ++ } else if (v->type()->as_ObjectConstant() != NULL) { ++ return v->type()->as_ObjectConstant()->value()->is_null_object(); ++ } else { ++ return false; ++ } ++} ++ ++bool LIRGenerator::can_inline_as_constant(Value v) const { ++ // FIXME: Just a guess ++ if (v->type()->as_IntConstant() != NULL) { ++ return Assembler::is_simm(v->type()->as_IntConstant()->value(), 12); ++ } else if (v->type()->as_LongConstant() != NULL) { ++ return v->type()->as_LongConstant()->value() == 0L; ++ } else if (v->type()->as_ObjectConstant() != NULL) { ++ return v->type()->as_ObjectConstant()->value()->is_null_object(); ++ } else { ++ return false; ++ } ++} ++ ++bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { return false; } ++ ++LIR_Opr LIRGenerator::safepoint_poll_register() { ++ return LIR_OprFact::illegalOpr; ++} ++ ++LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, ++ int shift, int disp, BasicType type) { ++ assert(base->is_register(), "must be"); ++ intx large_disp = disp; ++ ++ // accumulate fixed displacements ++ if (index->is_constant()) { ++ LIR_Const *constant = index->as_constant_ptr(); ++ if (constant->type() == T_INT) { ++ large_disp += index->as_jint() << shift; ++ } else { ++ assert(constant->type() == T_LONG, "should be"); ++ jlong c = index->as_jlong() << shift; ++ if ((jlong)((jint)c) == c) { ++ large_disp += c; ++ index = LIR_OprFact::illegalOpr; ++ } else { ++ LIR_Opr tmp = new_register(T_LONG); ++ __ move(index, tmp); ++ index = tmp; ++ // apply shift and displacement below ++ } ++ } ++ } ++ ++ if (index->is_register()) { ++ // apply the shift and accumulate the displacement ++ if (shift > 0) { ++ LIR_Opr tmp = new_pointer_register(); ++ __ shift_left(index, shift, tmp); ++ index = tmp; ++ } ++ if (large_disp != 0) { ++ LIR_Opr tmp = new_pointer_register(); ++ if (Assembler::is_simm(large_disp, 12)) { ++ __ add(index, LIR_OprFact::intptrConst(large_disp), tmp); ++ index = tmp; ++ } else { ++ __ move(LIR_OprFact::intptrConst(large_disp), tmp); ++ __ add(tmp, index, tmp); ++ index = tmp; ++ } ++ large_disp = 0; ++ } ++ } else if (large_disp != 0 && !Assembler::is_simm(large_disp, 12)) { ++ // index is illegal so replace it with the displacement loaded into a register ++ index = new_pointer_register(); ++ __ move(LIR_OprFact::intptrConst(large_disp), index); ++ large_disp = 0; ++ } ++ ++ // at this point we either have base + index or base + displacement ++ if (large_disp == 0 && index->is_register()) { ++ return new LIR_Address(base, index, type); ++ } else { ++ assert(Assembler::is_simm(large_disp, 12), "must be"); ++ return new LIR_Address(base, large_disp, type); ++ } ++} ++ ++LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, BasicType type) { ++ int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type); ++ int elem_size = type2aelembytes(type); ++ int shift = exact_log2(elem_size); ++ ++ LIR_Address* addr; ++ if (index_opr->is_constant()) { ++ addr = new LIR_Address(array_opr, offset_in_bytes + (intx)(index_opr->as_jint()) * elem_size, type); ++ } else { ++ if (offset_in_bytes) { ++ LIR_Opr tmp = new_pointer_register(); ++ __ add(array_opr, LIR_OprFact::intConst(offset_in_bytes), tmp); ++ array_opr = tmp; ++ offset_in_bytes = 0; ++ } ++ addr = new LIR_Address(array_opr, index_opr, LIR_Address::scale(type), offset_in_bytes, type); ++ } ++ return addr; ++} ++ ++LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { ++ LIR_Opr r; ++ if (type == T_LONG) { ++ r = LIR_OprFact::longConst(x); ++ if (!Assembler::is_simm(x, 12)) { ++ LIR_Opr tmp = new_register(type); ++ __ move(r, tmp); ++ return tmp; ++ } ++ } else if (type == T_INT) { ++ r = LIR_OprFact::intConst(x); ++ if (!Assembler::is_simm(x, 12)) { ++ // This is all rather nasty. We don't know whether our constant ++ // is required for a logical or an arithmetic operation, wo we ++ // don't know what the range of valid values is!! ++ LIR_Opr tmp = new_register(type); ++ __ move(r, tmp); ++ return tmp; ++ } ++ } else { ++ ShouldNotReachHere(); ++ r = NULL; // unreachable ++ } ++ return r; ++} ++ ++void LIRGenerator::increment_counter(address counter, BasicType type, int step) { ++ LIR_Opr pointer = new_pointer_register(); ++ __ move(LIR_OprFact::intptrConst(counter), pointer); ++ LIR_Address* addr = new LIR_Address(pointer, type); ++ increment_counter(addr, step); ++} ++ ++void LIRGenerator::increment_counter(LIR_Address* addr, int step) { ++ LIR_Opr imm = NULL; ++ switch(addr->type()) { ++ case T_INT: ++ imm = LIR_OprFact::intConst(step); ++ break; ++ case T_LONG: ++ imm = LIR_OprFact::longConst(step); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ LIR_Opr reg = new_register(addr->type()); ++ __ load(addr, reg); ++ __ add(reg, imm, reg); ++ __ store(reg, addr); ++} ++ ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, ++ int disp, int c, T tgt, CodeEmitInfo* info) { ++ LIR_Opr reg = new_register(T_INT); ++ __ load(generate_address(base, disp, T_INT), reg, info); ++ __ cmp_branch(condition, reg, LIR_OprFact::intConst(c), T_INT, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); ++ ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, ++ int disp, BasicType type, T tgt, CodeEmitInfo* info) { ++ LIR_Opr reg1 = new_register(T_INT); ++ __ load(generate_address(base, disp, type), reg1, info); ++ __ cmp_branch(condition, reg, reg1, type, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); ++ ++bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { ++ if (is_power_of_2(c - 1)) { ++ __ shift_left(left, exact_log2(c - 1), tmp); ++ __ add(tmp, left, result); ++ return true; ++ } else if (is_power_of_2(c + 1)) { ++ __ shift_left(left, exact_log2(c + 1), tmp); ++ __ sub(tmp, left, result); ++ return true; ++ } else { ++ return false; ++ } ++} ++ ++void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) { ++ BasicType type = item->type(); ++ __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type)); ++} ++ ++void LIRGenerator::array_store_check(LIR_Opr value, LIR_Opr array, CodeEmitInfo* store_check_info, ++ ciMethod* profiled_method, int profiled_bci) { ++ LIR_Opr tmp1 = new_register(objectType); ++ LIR_Opr tmp2 = new_register(objectType); ++ LIR_Opr tmp3 = new_register(objectType); ++ __ store_check(value, array, tmp1, tmp2, tmp3, store_check_info, profiled_method, profiled_bci); ++} ++ ++//---------------------------------------------------------------------- ++// visitor functions ++//---------------------------------------------------------------------- ++ ++void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { ++ assert(x->is_pinned(),""); ++ LIRItem obj(x->obj(), this); ++ obj.load_item(); ++ ++ set_no_result(x); ++ ++ // "lock" stores the address of the monitor stack slot, so this is not an oop ++ LIR_Opr lock = new_register(T_INT); ++ // Need a scratch register for biased locking ++ LIR_Opr scratch = LIR_OprFact::illegalOpr; ++ if (UseBiasedLocking) { ++ scratch = new_register(T_INT); ++ } ++ ++ CodeEmitInfo* info_for_exception = NULL; ++ if (x->needs_null_check()) { ++ info_for_exception = state_for(x); ++ } ++ // this CodeEmitInfo must not have the xhandlers because here the ++ // object is already locked (xhandlers expect object to be unlocked) ++ CodeEmitInfo* info = state_for(x, x->state(), true); ++ monitor_enter(obj.result(), lock, syncTempOpr(), scratch, ++ x->monitor_no(), info_for_exception, info); ++} ++ ++void LIRGenerator::do_MonitorExit(MonitorExit* x) { ++ assert(x->is_pinned(),""); ++ ++ LIRItem obj(x->obj(), this); ++ obj.dont_load_item(); ++ ++ LIR_Opr lock = new_register(T_INT); ++ LIR_Opr obj_temp = new_register(T_INT); ++ set_no_result(x); ++ monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no()); ++} ++ ++void LIRGenerator::do_NegateOp(NegateOp* x) { ++ LIRItem from(x->x(), this); ++ from.load_item(); ++ LIR_Opr result = rlock_result(x); ++ __ negate (from.result(), result); ++} ++ ++// for _fadd, _fmul, _fsub, _fdiv, _frem ++// _dadd, _dmul, _dsub, _ddiv, _drem ++void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { ++ if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) { ++ // float remainder is implemented as a direct call into the runtime ++ LIRItem right(x->x(), this); ++ LIRItem left(x->y(), this); ++ ++ BasicTypeList signature(2); ++ if (x->op() == Bytecodes::_frem) { ++ signature.append(T_FLOAT); ++ signature.append(T_FLOAT); ++ } else { ++ signature.append(T_DOUBLE); ++ signature.append(T_DOUBLE); ++ } ++ CallingConvention* cc = frame_map()->c_calling_convention(&signature); ++ ++ const LIR_Opr result_reg = result_register_for(x->type()); ++ left.load_item_force(cc->at(1)); ++ right.load_item(); ++ ++ __ move(right.result(), cc->at(0)); ++ ++ address entry; ++ if (x->op() == Bytecodes::_frem) { ++ entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem); ++ } else { ++ entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem); ++ } ++ ++ LIR_Opr result = rlock_result(x); ++ __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args()); ++ __ move(result_reg, result); ++ return; ++ } ++ ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ LIRItem* left_arg = &left; ++ LIRItem* right_arg = &right; ++ ++ // Always load right hand side. ++ right.load_item(); ++ ++ if (!left.is_register()) ++ left.load_item(); ++ ++ LIR_Opr reg = rlock(x); ++ ++ arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp()); ++ ++ set_result(x, round_item(reg)); ++} ++ ++// for _ladd, _lmul, _lsub, _ldiv, _lrem ++void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { ++ // missing test if instr is commutative and if we should swap ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ++ if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) { ++ left.load_item(); ++ bool need_zero_check = true; ++ if (right.is_constant()) { ++ jlong c = right.get_jlong_constant(); ++ // no need to do div-by-zero check if the divisor is a non-zero constant ++ if (c != 0) need_zero_check = false; ++ // do not load right if the divisor is a power-of-2 constant ++ if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) { ++ right.dont_load_item(); ++ } else { ++ right.load_item(); ++ } ++ } else { ++ right.load_item(); ++ } ++ if (need_zero_check) { ++ CodeEmitInfo* info = state_for(x); ++ CodeStub* stub = new DivByZeroStub(info); ++ __ cmp_branch(lir_cond_equal, right.result(), LIR_OprFact::longConst(0), T_LONG, stub); ++ } ++ ++ rlock_result(x); ++ switch (x->op()) { ++ case Bytecodes::_lrem: ++ __ rem (left.result(), right.result(), x->operand()); ++ break; ++ case Bytecodes::_ldiv: ++ __ div (left.result(), right.result(), x->operand()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } else { ++ assert(x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, ++ "expect lmul, ladd or lsub"); ++ // add, sub, mul ++ left.load_item(); ++ if (!right.is_register()) { ++ if (x->op() == Bytecodes::_lmul || !right.is_constant() || ++ (x->op() == Bytecodes::_ladd && !Assembler::is_simm(right.get_jlong_constant(), 12)) || ++ (x->op() == Bytecodes::_lsub && !Assembler::is_simm(-right.get_jlong_constant(), 12))) { ++ right.load_item(); ++ } else { // add, sub ++ assert(x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expect ladd or lsub"); ++ // don't load constants to save register ++ right.load_nonconstant(); ++ } ++ } ++ rlock_result(x); ++ arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL); ++ } ++} ++ ++// for: _iadd, _imul, _isub, _idiv, _irem ++void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) { ++ // Test if instr is commutative and if we should swap ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ LIRItem* left_arg = &left; ++ LIRItem* right_arg = &right; ++ if (x->is_commutative() && left.is_stack() && right.is_register()) { ++ // swap them if left is real stack (or cached) and right is real register(not cached) ++ left_arg = &right; ++ right_arg = &left; ++ } ++ ++ left_arg->load_item(); ++ ++ // do not need to load right, as we can handle stack and constants ++ if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) { ++ rlock_result(x); ++ bool need_zero_check = true; ++ if (right.is_constant()) { ++ jint c = right.get_jint_constant(); ++ // no need to do div-by-zero check if the divisor is a non-zero constant ++ if (c != 0) need_zero_check = false; ++ // do not load right if the divisor is a power-of-2 constant ++ if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) { ++ right_arg->dont_load_item(); ++ } else { ++ right_arg->load_item(); ++ } ++ } else { ++ right_arg->load_item(); ++ } ++ if (need_zero_check) { ++ CodeEmitInfo* info = state_for(x); ++ CodeStub* stub = new DivByZeroStub(info); ++ __ cmp_branch(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0), T_INT, stub); ++ } ++ ++ LIR_Opr ill = LIR_OprFact::illegalOpr; ++ if (x->op() == Bytecodes::_irem) { ++ __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); ++ } else if (x->op() == Bytecodes::_idiv) { ++ __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); ++ } ++ } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) { ++ if (right.is_constant() && ++ ((x->op() == Bytecodes::_iadd && Assembler::is_simm(right.get_jint_constant(), 12)) || ++ (x->op() == Bytecodes::_isub && Assembler::is_simm(-right.get_jint_constant(), 12)))) { ++ right.load_nonconstant(); ++ } else { ++ right.load_item(); ++ } ++ rlock_result(x); ++ arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr); ++ } else { ++ assert (x->op() == Bytecodes::_imul, "expect imul"); ++ if (right.is_constant()) { ++ jint c = right.get_jint_constant(); ++ if (c > 0 && c < max_jint && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) { ++ right_arg->dont_load_item(); ++ } else { ++ // Cannot use constant op. ++ right_arg->load_item(); ++ } ++ } else { ++ right.load_item(); ++ } ++ rlock_result(x); ++ arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT)); ++ } ++} ++ ++void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) { ++ // when an operand with use count 1 is the left operand, then it is ++ // likely that no move for 2-operand-LIR-form is necessary ++ if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) { ++ x->swap_operands(); ++ } ++ ++ ValueTag tag = x->type()->tag(); ++ assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters"); ++ switch (tag) { ++ case floatTag: ++ case doubleTag: do_ArithmeticOp_FPU(x); return; ++ case longTag: do_ArithmeticOp_Long(x); return; ++ case intTag: do_ArithmeticOp_Int(x); return; ++ default: ShouldNotReachHere(); return; ++ } ++} ++ ++// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr ++void LIRGenerator::do_ShiftOp(ShiftOp* x) { ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ++ left.load_item(); ++ ++ rlock_result(x); ++ if (right.is_constant()) { ++ right.dont_load_item(); ++ int c; ++ switch (x->op()) { ++ case Bytecodes::_ishl: ++ c = right.get_jint_constant() & 0x1f; ++ __ shift_left(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_ishr: ++ c = right.get_jint_constant() & 0x1f; ++ __ shift_right(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_iushr: ++ c = right.get_jint_constant() & 0x1f; ++ __ unsigned_shift_right(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_lshl: ++ c = right.get_jint_constant() & 0x3f; ++ __ shift_left(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_lshr: ++ c = right.get_jint_constant() & 0x3f; ++ __ shift_right(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_lushr: ++ c = right.get_jint_constant() & 0x3f; ++ __ unsigned_shift_right(left.result(), c, x->operand()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ right.load_item(); ++ LIR_Opr tmp = new_register(T_INT); ++ switch (x->op()) { ++ case Bytecodes::_ishl: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); ++ __ shift_left(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_ishr: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); ++ __ shift_right(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_iushr: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); ++ __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_lshl: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); ++ __ shift_left(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_lshr: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); ++ __ shift_right(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_lushr: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); ++ __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++} ++ ++// _iand, _land, _ior, _lor, _ixor, _lxor ++void LIRGenerator::do_LogicOp(LogicOp* x) { ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ++ left.load_item(); ++ ++ rlock_result(x); ++ if (right.is_constant() ++ && ((right.type()->tag() == intTag ++ && Assembler::is_uimm(right.get_jint_constant(), 12)) ++ || (right.type()->tag() == longTag ++ && Assembler::is_uimm(right.get_jlong_constant(), 12)))) { ++ right.dont_load_item(); ++ } else { ++ right.load_item(); ++ } ++ switch (x->op()) { ++ case Bytecodes::_iand: ++ case Bytecodes::_land: ++ __ logical_and(left.result(), right.result(), x->operand()); break; ++ case Bytecodes::_ior: ++ case Bytecodes::_lor: ++ __ logical_or (left.result(), right.result(), x->operand()); break; ++ case Bytecodes::_ixor: ++ case Bytecodes::_lxor: ++ __ logical_xor(left.result(), right.result(), x->operand()); break; ++ default: Unimplemented(); ++ } ++} ++ ++// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg ++void LIRGenerator::do_CompareOp(CompareOp* x) { ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ValueTag tag = x->x()->type()->tag(); ++ if (tag == longTag) { ++ left.set_destroys_register(); ++ } ++ left.load_item(); ++ right.load_item(); ++ LIR_Opr reg = rlock_result(x); ++ ++ if (x->x()->type()->is_float_kind()) { ++ Bytecodes::Code code = x->op(); ++ __ fcmp2int(left.result(), right.result(), reg, ++ (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl)); ++ } else if (x->x()->type()->tag() == longTag) { ++ __ lcmp2int(left.result(), right.result(), reg); ++ } else { ++ Unimplemented(); ++ } ++} ++ ++LIR_Opr LIRGenerator::atomic_cmpxchg(BasicType type, LIR_Opr addr, ++ LIRItem& cmp_value, LIRItem& new_value) { ++ LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience ++ new_value.load_item(); ++ cmp_value.load_item(); ++ LIR_Opr result = new_register(T_INT); ++ if (is_reference_type(type)) { ++ __ cas_obj(addr, cmp_value.result(), new_value.result(), ++ new_register(T_INT), new_register(T_INT), result); ++ } else if (type == T_INT) { ++ __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(), ++ new_value.result(), ill, ill); ++ } else if (type == T_LONG) { ++ __ cas_long(addr->as_address_ptr()->base(), cmp_value.result(), ++ new_value.result(), ill, ill); ++ } else { ++ ShouldNotReachHere(); ++ Unimplemented(); ++ } ++ __ move(FrameMap::scr1_opr, result); ++ return result; ++} ++ ++LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) { ++ bool is_oop = is_reference_type(type); ++ LIR_Opr result = new_register(type); ++ value.load_item(); ++ assert(type == T_INT || is_oop || type == T_LONG , "unexpected type"); ++ LIR_Opr tmp = new_register(T_INT); ++ __ xchg(addr, value.result(), result, tmp); ++ return result; ++} ++ ++LIR_Opr LIRGenerator::atomic_add(BasicType type, LIR_Opr addr, LIRItem& value) { ++ LIR_Opr result = new_register(type); ++ value.load_item(); ++ assert(type == T_INT || type == T_LONG , "unexpected type"); ++ LIR_Opr tmp = new_register(T_INT); ++ __ xadd(addr, value.result(), result, tmp); ++ return result; ++} ++ ++void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { ++ assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), ++ "wrong type"); ++ if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog || ++ x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos || ++ x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan || ++ x->id() == vmIntrinsics::_dlog10) { ++ do_LibmIntrinsic(x); ++ return; ++ } ++ switch (x->id()) { ++ case vmIntrinsics::_dabs: ++ case vmIntrinsics::_dsqrt: { ++ assert(x->number_of_arguments() == 1, "wrong type"); ++ LIRItem value(x->argument_at(0), this); ++ value.load_item(); ++ LIR_Opr dst = rlock_result(x); ++ ++ switch (x->id()) { ++ case vmIntrinsics::_dsqrt: ++ __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); ++ break; ++ case vmIntrinsics::_dabs: ++ __ abs(value.result(), dst, LIR_OprFact::illegalOpr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ } ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { ++ LIRItem value(x->argument_at(0), this); ++ value.set_destroys_register(); ++ ++ LIR_Opr calc_result = rlock_result(x); ++ LIR_Opr result_reg = result_register_for(x->type()); ++ ++ CallingConvention* cc = NULL; ++ ++ if (x->id() == vmIntrinsics::_dpow) { ++ LIRItem value1(x->argument_at(1), this); ++ ++ value1.set_destroys_register(); ++ ++ BasicTypeList signature(2); ++ signature.append(T_DOUBLE); ++ signature.append(T_DOUBLE); ++ cc = frame_map()->c_calling_convention(&signature); ++ value.load_item_force(cc->at(0)); ++ value1.load_item_force(cc->at(1)); ++ } else { ++ BasicTypeList signature(1); ++ signature.append(T_DOUBLE); ++ cc = frame_map()->c_calling_convention(&signature); ++ value.load_item_force(cc->at(0)); ++ } ++ ++ switch (x->id()) { ++ case vmIntrinsics::_dexp: ++ if (StubRoutines::dexp() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ case vmIntrinsics::_dlog: ++ if (StubRoutines::dlog() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ case vmIntrinsics::_dlog10: ++ if (StubRoutines::dlog10() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ case vmIntrinsics::_dpow: ++ if (StubRoutines::dpow() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ case vmIntrinsics::_dsin: ++ if (StubRoutines::dsin() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dsin(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ case vmIntrinsics::_dcos: ++ if (StubRoutines::dcos() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dcos(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ case vmIntrinsics::_dtan: ++ if (StubRoutines::dtan() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ default: ShouldNotReachHere(); ++ } ++ __ move(result_reg, calc_result); ++} ++ ++void LIRGenerator::do_ArrayCopy(Intrinsic* x) { ++ Register j_rarg0 = RT0; ++ Register j_rarg1 = A0; ++ Register j_rarg2 = A1; ++ Register j_rarg3 = A2; ++ Register j_rarg4 = A3; ++ Register j_rarg5 = A4; ++ ++ assert(x->number_of_arguments() == 5, "wrong type"); ++ ++ // Make all state_for calls early since they can emit code ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ LIRItem src(x->argument_at(0), this); ++ LIRItem src_pos(x->argument_at(1), this); ++ LIRItem dst(x->argument_at(2), this); ++ LIRItem dst_pos(x->argument_at(3), this); ++ LIRItem length(x->argument_at(4), this); ++ ++ // operands for arraycopy must use fixed registers, otherwise ++ // LinearScan will fail allocation (because arraycopy always needs a ++ // call) ++ ++ // The java calling convention will give us enough registers ++ // so that on the stub side the args will be perfect already. ++ // On the other slow/special case side we call C and the arg ++ // positions are not similar enough to pick one as the best. ++ // Also because the java calling convention is a "shifted" version ++ // of the C convention we can process the java args trivially into C ++ // args without worry of overwriting during the xfer ++ ++ src.load_item_force (FrameMap::as_oop_opr(j_rarg0)); ++ src_pos.load_item_force (FrameMap::as_opr(j_rarg1)); ++ dst.load_item_force (FrameMap::as_oop_opr(j_rarg2)); ++ dst_pos.load_item_force (FrameMap::as_opr(j_rarg3)); ++ length.load_item_force (FrameMap::as_opr(j_rarg4)); ++ ++ LIR_Opr tmp = FrameMap::as_opr(j_rarg5); ++ ++ set_no_result(x); ++ ++ int flags; ++ ciArrayKlass* expected_type; ++ arraycopy_helper(x, &flags, &expected_type); ++ ++ __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), ++ length.result(), tmp, expected_type, flags, info); // does add_safepoint ++} ++ ++void LIRGenerator::do_update_CRC32(Intrinsic* x) { ++ assert(UseCRC32Intrinsics, "why are we here?"); ++ // Make all state_for calls early since they can emit code ++ LIR_Opr result = rlock_result(x); ++ int flags = 0; ++ switch (x->id()) { ++ case vmIntrinsics::_updateCRC32: { ++ LIRItem crc(x->argument_at(0), this); ++ LIRItem val(x->argument_at(1), this); ++ // val is destroyed by update_crc32 ++ val.set_destroys_register(); ++ crc.load_item(); ++ val.load_item(); ++ __ update_crc32(crc.result(), val.result(), result); ++ break; ++ } ++ case vmIntrinsics::_updateBytesCRC32: ++ case vmIntrinsics::_updateByteBufferCRC32: { ++ bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32); ++ ++ LIRItem crc(x->argument_at(0), this); ++ LIRItem buf(x->argument_at(1), this); ++ LIRItem off(x->argument_at(2), this); ++ LIRItem len(x->argument_at(3), this); ++ buf.load_item(); ++ off.load_nonconstant(); ++ ++ LIR_Opr index = off.result(); ++ int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0; ++ if(off.result()->is_constant()) { ++ index = LIR_OprFact::illegalOpr; ++ offset += off.result()->as_jint(); ++ } ++ LIR_Opr base_op = buf.result(); ++ ++ if (index->is_valid()) { ++ LIR_Opr tmp = new_register(T_LONG); ++ __ convert(Bytecodes::_i2l, index, tmp); ++ index = tmp; ++ } ++ ++ if (offset) { ++ LIR_Opr tmp = new_pointer_register(); ++ __ add(base_op, LIR_OprFact::intConst(offset), tmp); ++ base_op = tmp; ++ offset = 0; ++ } ++ ++ LIR_Address* a = new LIR_Address(base_op, index, offset, T_BYTE); ++ BasicTypeList signature(3); ++ signature.append(T_INT); ++ signature.append(T_ADDRESS); ++ signature.append(T_INT); ++ CallingConvention* cc = frame_map()->c_calling_convention(&signature); ++ const LIR_Opr result_reg = result_register_for(x->type()); ++ ++ LIR_Opr addr = new_pointer_register(); ++ __ leal(LIR_OprFact::address(a), addr); ++ ++ crc.load_item_force(cc->at(0)); ++ __ move(addr, cc->at(1)); ++ len.load_item_force(cc->at(2)); ++ ++ __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args()); ++ __ move(result_reg, result); ++ ++ break; ++ } ++ default: { ++ ShouldNotReachHere(); ++ } ++ } ++} ++ ++void LIRGenerator::do_update_CRC32C(Intrinsic* x) { ++ assert(UseCRC32CIntrinsics, "why are we here?"); ++ // Make all state_for calls early since they can emit code ++ LIR_Opr result = rlock_result(x); ++ int flags = 0; ++ switch (x->id()) { ++ case vmIntrinsics::_updateBytesCRC32C: ++ case vmIntrinsics::_updateDirectByteBufferCRC32C: { ++ bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32C); ++ int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0; ++ ++ LIRItem crc(x->argument_at(0), this); ++ LIRItem buf(x->argument_at(1), this); ++ LIRItem off(x->argument_at(2), this); ++ LIRItem end(x->argument_at(3), this); ++ ++ buf.load_item(); ++ off.load_nonconstant(); ++ end.load_nonconstant(); ++ ++ // len = end - off ++ LIR_Opr len = end.result(); ++ LIR_Opr tmpA = new_register(T_INT); ++ LIR_Opr tmpB = new_register(T_INT); ++ __ move(end.result(), tmpA); ++ __ move(off.result(), tmpB); ++ __ sub(tmpA, tmpB, tmpA); ++ len = tmpA; ++ ++ LIR_Opr index = off.result(); ++ if(off.result()->is_constant()) { ++ index = LIR_OprFact::illegalOpr; ++ offset += off.result()->as_jint(); ++ } ++ LIR_Opr base_op = buf.result(); ++ ++ if (index->is_valid()) { ++ LIR_Opr tmp = new_register(T_LONG); ++ __ convert(Bytecodes::_i2l, index, tmp); ++ index = tmp; ++ } ++ ++ if (offset) { ++ LIR_Opr tmp = new_pointer_register(); ++ __ add(base_op, LIR_OprFact::intConst(offset), tmp); ++ base_op = tmp; ++ offset = 0; ++ } ++ ++ LIR_Address* a = new LIR_Address(base_op, index, offset, T_BYTE); ++ BasicTypeList signature(3); ++ signature.append(T_INT); ++ signature.append(T_ADDRESS); ++ signature.append(T_INT); ++ CallingConvention* cc = frame_map()->c_calling_convention(&signature); ++ const LIR_Opr result_reg = result_register_for(x->type()); ++ ++ LIR_Opr addr = new_pointer_register(); ++ __ leal(LIR_OprFact::address(a), addr); ++ ++ crc.load_item_force(cc->at(0)); ++ __ move(addr, cc->at(1)); ++ __ move(len, cc->at(2)); ++ ++ __ call_runtime_leaf(StubRoutines::updateBytesCRC32C(), getThreadTemp(), result_reg, cc->args()); ++ __ move(result_reg, result); ++ ++ break; ++ } ++ default: { ++ ShouldNotReachHere(); ++ } ++ } ++} ++ ++void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) { ++ assert(x->number_of_arguments() == 3, "wrong type"); ++ assert(UseFMA, "Needs FMA instructions support."); ++ LIRItem value(x->argument_at(0), this); ++ LIRItem value1(x->argument_at(1), this); ++ LIRItem value2(x->argument_at(2), this); ++ ++ value.load_item(); ++ value1.load_item(); ++ value2.load_item(); ++ ++ LIR_Opr calc_input = value.result(); ++ LIR_Opr calc_input1 = value1.result(); ++ LIR_Opr calc_input2 = value2.result(); ++ LIR_Opr calc_result = rlock_result(x); ++ ++ switch (x->id()) { ++ case vmIntrinsics::_fmaD: ++ __ fmad(calc_input, calc_input1, calc_input2, calc_result); ++ break; ++ case vmIntrinsics::_fmaF: ++ __ fmaf(calc_input, calc_input1, calc_input2, calc_result); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) { ++ fatal("vectorizedMismatch intrinsic is not implemented on this platform"); ++} ++ ++// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f ++// _i2b, _i2c, _i2s ++void LIRGenerator::do_Convert(Convert* x) { ++ LIRItem value(x->value(), this); ++ value.load_item(); ++ LIR_Opr input = value.result(); ++ LIR_Opr result = rlock(x); ++ ++ // arguments of lir_convert ++ LIR_Opr conv_input = input; ++ LIR_Opr conv_result = result; ++ ++ switch (x->op()) { ++ case Bytecodes::_f2i: ++ case Bytecodes::_f2l: ++ __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_FLOAT)); ++ break; ++ case Bytecodes::_d2i: ++ case Bytecodes::_d2l: ++ __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_DOUBLE)); ++ break; ++ default: ++ __ convert(x->op(), conv_input, conv_result); ++ break; ++ } ++ ++ assert(result->is_virtual(), "result must be virtual register"); ++ set_result(x, result); ++} ++ ++void LIRGenerator::do_NewInstance(NewInstance* x) { ++#ifndef PRODUCT ++ if (PrintNotLoaded && !x->klass()->is_loaded()) { ++ tty->print_cr(" ###class not loaded at new bci %d", x->printable_bci()); ++ } ++#endif ++ CodeEmitInfo* info = state_for(x, x->state()); ++ LIR_Opr reg = result_register_for(x->type()); ++ new_instance(reg, x->klass(), x->is_unresolved(), ++ FrameMap::t0_oop_opr, ++ FrameMap::t1_oop_opr, ++ FrameMap::a4_oop_opr, ++ LIR_OprFact::illegalOpr, ++ FrameMap::a3_metadata_opr, info); ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++void LIRGenerator::do_NewTypeArray(NewTypeArray* x) { ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ LIRItem length(x->length(), this); ++ length.load_item_force(FrameMap::s0_opr); ++ ++ LIR_Opr reg = result_register_for(x->type()); ++ LIR_Opr tmp1 = FrameMap::t0_oop_opr; ++ LIR_Opr tmp2 = FrameMap::t1_oop_opr; ++ LIR_Opr tmp3 = FrameMap::a5_oop_opr; ++ LIR_Opr tmp4 = reg; ++ LIR_Opr klass_reg = FrameMap::a3_metadata_opr; ++ LIR_Opr len = length.result(); ++ BasicType elem_type = x->elt_type(); ++ ++ __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg); ++ ++ CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info); ++ __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path); ++ ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { ++ LIRItem length(x->length(), this); ++ // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction ++ // and therefore provide the state before the parameters have been consumed ++ CodeEmitInfo* patching_info = NULL; ++ if (!x->klass()->is_loaded() || PatchALot) { ++ patching_info = state_for(x, x->state_before()); ++ } ++ ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ LIR_Opr reg = result_register_for(x->type()); ++ LIR_Opr tmp1 = FrameMap::t0_oop_opr; ++ LIR_Opr tmp2 = FrameMap::t1_oop_opr; ++ LIR_Opr tmp3 = FrameMap::a5_oop_opr; ++ LIR_Opr tmp4 = reg; ++ LIR_Opr klass_reg = FrameMap::a3_metadata_opr; ++ ++ length.load_item_force(FrameMap::s0_opr); ++ LIR_Opr len = length.result(); ++ ++ CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); ++ ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass()); ++ if (obj == ciEnv::unloaded_ciobjarrayklass()) { ++ BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); ++ } ++ klass2reg_with_patching(klass_reg, obj, patching_info); ++ __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); ++ ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++void LIRGenerator::do_NewMultiArray(NewMultiArray* x) { ++ Values* dims = x->dims(); ++ int i = dims->length(); ++ LIRItemList* items = new LIRItemList(i, i, NULL); ++ while (i-- > 0) { ++ LIRItem* size = new LIRItem(dims->at(i), this); ++ items->at_put(i, size); ++ } ++ ++ // Evaluate state_for early since it may emit code. ++ CodeEmitInfo* patching_info = NULL; ++ if (!x->klass()->is_loaded() || PatchALot) { ++ patching_info = state_for(x, x->state_before()); ++ ++ // Cannot re-use same xhandlers for multiple CodeEmitInfos, so ++ // clone all handlers (NOTE: Usually this is handled transparently ++ // by the CodeEmitInfo cloning logic in CodeStub constructors but ++ // is done explicitly here because a stub isn't being used). ++ x->set_exception_handlers(new XHandlers(x->exception_handlers())); ++ } ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ i = dims->length(); ++ while (i-- > 0) { ++ LIRItem* size = items->at(i); ++ size->load_item(); ++ ++ store_stack_parameter(size->result(), in_ByteSize(i*4)); ++ } ++ ++ LIR_Opr klass_reg = FrameMap::a0_metadata_opr; ++ klass2reg_with_patching(klass_reg, x->klass(), patching_info); ++ ++ LIR_Opr rank = FrameMap::s0_opr; ++ __ move(LIR_OprFact::intConst(x->rank()), rank); ++ LIR_Opr varargs = FrameMap::a2_opr; ++ __ move(FrameMap::sp_opr, varargs); ++ LIR_OprList* args = new LIR_OprList(3); ++ args->append(klass_reg); ++ args->append(rank); ++ args->append(varargs); ++ LIR_Opr reg = result_register_for(x->type()); ++ __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id), ++ LIR_OprFact::illegalOpr, ++ reg, args, info); ++ ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++void LIRGenerator::do_BlockBegin(BlockBegin* x) { ++ // nothing to do for now ++} ++ ++void LIRGenerator::do_CheckCast(CheckCast* x) { ++ LIRItem obj(x->obj(), this); ++ ++ CodeEmitInfo* patching_info = NULL; ++ if (!x->klass()->is_loaded() || ++ (PatchALot && !x->is_incompatible_class_change_check() && ++ !x->is_invokespecial_receiver_check())) { ++ // must do this before locking the destination register as an oop register, ++ // and before the obj is loaded (the latter is for deoptimization) ++ patching_info = state_for(x, x->state_before()); ++ } ++ obj.load_item(); ++ ++ // info for exceptions ++ CodeEmitInfo* info_for_exception = ++ (x->needs_exception_state() ? state_for(x) : ++ state_for(x, x->state_before(), true /*ignore_xhandler*/)); ++ ++ CodeStub* stub; ++ if (x->is_incompatible_class_change_check()) { ++ assert(patching_info == NULL, "can't patch this"); ++ stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, ++ LIR_OprFact::illegalOpr, info_for_exception); ++ } else if (x->is_invokespecial_receiver_check()) { ++ assert(patching_info == NULL, "can't patch this"); ++ stub = new DeoptimizeStub(info_for_exception, ++ Deoptimization::Reason_class_check, ++ Deoptimization::Action_none); ++ } else { ++ stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, ++ obj.result(), info_for_exception); ++ } ++ LIR_Opr reg = rlock_result(x); ++ LIR_Opr tmp3 = LIR_OprFact::illegalOpr; ++ if (!x->klass()->is_loaded() || UseCompressedClassPointers) { ++ tmp3 = new_register(objectType); ++ } ++ __ checkcast(reg, obj.result(), x->klass(), ++ new_register(objectType), new_register(objectType), tmp3, ++ x->direct_compare(), info_for_exception, patching_info, stub, ++ x->profiled_method(), x->profiled_bci()); ++} ++ ++void LIRGenerator::do_InstanceOf(InstanceOf* x) { ++ LIRItem obj(x->obj(), this); ++ ++ // result and test object may not be in same register ++ LIR_Opr reg = rlock_result(x); ++ CodeEmitInfo* patching_info = NULL; ++ if ((!x->klass()->is_loaded() || PatchALot)) { ++ // must do this before locking the destination register as an oop register ++ patching_info = state_for(x, x->state_before()); ++ } ++ obj.load_item(); ++ LIR_Opr tmp3 = LIR_OprFact::illegalOpr; ++ if (!x->klass()->is_loaded() || UseCompressedClassPointers) { ++ tmp3 = new_register(objectType); ++ } ++ __ instanceof(reg, obj.result(), x->klass(), ++ new_register(objectType), new_register(objectType), tmp3, ++ x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci()); ++} ++ ++void LIRGenerator::do_If(If* x) { ++ assert(x->number_of_sux() == 2, "inconsistency"); ++ ValueTag tag = x->x()->type()->tag(); ++ bool is_safepoint = x->is_safepoint(); ++ ++ If::Condition cond = x->cond(); ++ ++ LIRItem xitem(x->x(), this); ++ LIRItem yitem(x->y(), this); ++ LIRItem* xin = &xitem; ++ LIRItem* yin = &yitem; ++ ++ if (tag == longTag) { ++ // for longs, only conditions "eql", "neq", "lss", "geq" are valid; ++ // mirror for other conditions ++ if (cond == If::gtr || cond == If::leq) { ++ cond = Instruction::mirror(cond); ++ xin = &yitem; ++ yin = &xitem; ++ } ++ xin->set_destroys_register(); ++ } ++ xin->load_item(); ++ ++ if (tag == longTag) { ++ if (yin->is_constant() && yin->get_jlong_constant() == 0) { ++ yin->dont_load_item(); ++ } else { ++ yin->load_item(); ++ } ++ } else if (tag == intTag) { ++ if (yin->is_constant() && yin->get_jint_constant() == 0) { ++ yin->dont_load_item(); ++ } else { ++ yin->load_item(); ++ } ++ } else { ++ yin->load_item(); ++ } ++ ++ set_no_result(x); ++ ++ LIR_Opr left = xin->result(); ++ LIR_Opr right = yin->result(); ++ ++ // add safepoint before generating condition code so it can be recomputed ++ if (x->is_safepoint()) { ++ // increment backedge counter if needed ++ increment_backedge_counter_conditionally(lir_cond(cond), left, right, state_for(x, x->state_before()), ++ x->tsux()->bci(), x->fsux()->bci(), x->profiled_bci()); ++ __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before())); ++ } ++ ++ // Generate branch profiling. Profiling code doesn't kill flags. ++ profile_branch(x, cond, left, right); ++ move_to_phi(x->state()); ++ if (x->x()->type()->is_float_kind()) { ++ __ cmp_branch(lir_cond(cond), left, right, right->type(), x->tsux(), x->usux()); ++ } else { ++ __ cmp_branch(lir_cond(cond), left, right, right->type(), x->tsux()); ++ } ++ assert(x->default_sux() == x->fsux(), "wrong destination above"); ++ __ jump(x->default_sux()); ++} ++ ++LIR_Opr LIRGenerator::getThreadPointer() { ++ return FrameMap::as_pointer_opr(TREG); ++} ++ ++void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); } ++ ++void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address, ++ CodeEmitInfo* info) { ++ __ volatile_store_mem_reg(value, address, info); ++} ++ ++void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, ++ CodeEmitInfo* info) { ++ // 8179954: We need to make sure that the code generated for ++ // volatile accesses forms a sequentially-consistent set of ++ // operations when combined with STLR and LDAR. Without a leading ++ // membar it's possible for a simple Dekker test to fail if loads ++ // use LD;DMB but stores use STLR. This can happen if C2 compiles ++ // the stores in one method and C1 compiles the loads in another. ++ if (!UseBarriersForVolatile) { ++ __ membar(); ++ } ++ __ volatile_load_mem_reg(address, result, info); ++} +diff --git a/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp +new file mode 100644 +index 0000000000..6bb15fbf1d +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp +@@ -0,0 +1,75 @@ ++/* ++ * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/register.hpp" ++#include "c1/c1_LIR.hpp" ++ ++FloatRegister LIR_OprDesc::as_float_reg() const { ++ return as_FloatRegister(fpu_regnr()); ++} ++ ++FloatRegister LIR_OprDesc::as_double_reg() const { ++ return as_FloatRegister(fpu_regnrLo()); ++} ++ ++// Reg2 unused. ++LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { ++ assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform"); ++ return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | ++ (reg1 << LIR_OprDesc::reg2_shift) | ++ LIR_OprDesc::double_type | ++ LIR_OprDesc::fpu_register | ++ LIR_OprDesc::double_size); ++} ++ ++#ifndef PRODUCT ++void LIR_Address::verify() const { ++ assert(base()->is_cpu_register(), "wrong base operand"); ++ assert(index()->is_illegal() || index()->is_double_cpu() || ++ index()->is_single_cpu(), "wrong index operand"); ++ assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT || ++ base()->type() == T_LONG || base()->type() == T_METADATA, ++ "wrong type for addresses"); ++} ++#endif // PRODUCT ++ ++template ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { ++ append(new LIR_OpCmpBranch(condition, left, right, tgt, info)); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); ++ ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ append(new LIR_OpCmpBranch(condition, left, right, block, unordered)); ++} ++ ++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ append(new LIR_Op4(lir_cmp_cmove, condition, left, right, src1, src2, dst, type)); ++} +diff --git a/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp +new file mode 100644 +index 0000000000..f15dacafeb +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp +@@ -0,0 +1,70 @@ ++/* ++ * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP ++ ++inline bool LinearScan::is_processed_reg_num(int reg_num) { ++ return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map; ++} ++ ++inline int LinearScan::num_physical_regs(BasicType type) { ++ return 1; ++} ++ ++inline bool LinearScan::requires_adjacent_regs(BasicType type) { ++ return false; ++} ++ ++inline bool LinearScan::is_caller_save(int assigned_reg) { ++ assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers"); ++ if (assigned_reg < pd_first_callee_saved_reg) ++ return true; ++ if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg) ++ return true; ++ if (assigned_reg > pd_last_callee_saved_fpu_reg && assigned_reg < pd_last_fpu_reg) ++ return true; ++ return false; ++} ++ ++inline void LinearScan::pd_add_temps(LIR_Op* op) {} ++ ++// Implementation of LinearScanWalker ++inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) { ++ if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) { ++ assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only"); ++ _first_reg = pd_first_callee_saved_reg; ++ _last_reg = pd_last_callee_saved_reg; ++ return true; ++ } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT || ++ cur->type() == T_ADDRESS || cur->type() == T_METADATA) { ++ _first_reg = pd_first_cpu_reg; ++ _last_reg = pd_last_allocatable_cpu_reg; ++ return true; ++ } ++ return false; ++} ++ ++#endif // CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp +new file mode 100644 +index 0000000000..219b2e3671 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp +@@ -0,0 +1,33 @@ ++/* ++ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "c1/c1_Instruction.hpp" ++#include "c1/c1_LinearScan.hpp" ++#include "utilities/bitMap.inline.hpp" ++ ++void LinearScan::allocate_fpu_stack() { ++ // No FPU stack on LoongArch64 ++} +diff --git a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp +new file mode 100644 +index 0000000000..38ff4c5836 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp +@@ -0,0 +1,112 @@ ++/* ++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP ++ ++using MacroAssembler::build_frame; ++using MacroAssembler::null_check; ++ ++// C1_MacroAssembler contains high-level macros for C1 ++ ++ private: ++ int _rsp_offset; // track rsp changes ++ // initialization ++ void pd_init() { _rsp_offset = 0; } ++ ++ public: ++ void try_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ ++ void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2); ++ void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1, Register t2); ++ ++ // locking ++ // hdr : must be A0, contents destroyed ++ // obj : must point to the object to lock, contents preserved ++ // disp_hdr: must point to the displaced header location, contents preserved ++ // scratch : scratch register, contents destroyed ++ // returns code offset at which to add null check debug information ++ int lock_object (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case); ++ ++ // unlocking ++ // hdr : contents destroyed ++ // obj : must point to the object to lock, contents preserved ++ // disp_hdr: must be A0 & must point to the displaced header location, contents destroyed ++ void unlock_object(Register swap, Register obj, Register lock, Label& slow_case); ++ ++ void initialize_object( ++ Register obj, // result: pointer to object after successful allocation ++ Register klass, // object klass ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB ++ ); ++ ++ // allocation of fixed-size objects ++ // (can also be used to allocate fixed-size arrays, by setting ++ // hdr_size correctly and storing the array length afterwards) ++ // obj : will contain pointer to allocated object ++ // t1, t2 : scratch registers - contents destroyed ++ // header_size: size of object header in words ++ // object_size: total size of object in words ++ // slow_case : exit to slow case implementation if fast allocation fails ++ void allocate_object(Register obj, Register t1, Register t2, int header_size, ++ int object_size, Register klass, Label& slow_case); ++ ++ enum { ++ max_array_allocation_length = 0x00FFFFFF ++ }; ++ ++ // allocation of arrays ++ // obj : will contain pointer to allocated object ++ // len : array length in number of elements ++ // t : scratch register - contents destroyed ++ // header_size: size of object header in words ++ // f : element scale factor ++ // slow_case : exit to slow case implementation if fast allocation fails ++ void allocate_array(Register obj, Register len, Register t, Register t2, int header_size, ++ int f, Register klass, Label& slow_case); ++ ++ int rsp_offset() const { return _rsp_offset; } ++ void set_rsp_offset(int n) { _rsp_offset = n; } ++ ++ void invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2, bool inv_a3, ++ bool inv_a4, bool inv_a5) PRODUCT_RETURN; ++ ++ // This platform only uses signal-based null checks. The Label is not needed. ++ void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); } ++ ++ void load_parameter(int offset_in_words, Register reg); ++ ++#endif // CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp +new file mode 100644 +index 0000000000..17ff93a595 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp +@@ -0,0 +1,344 @@ ++/* ++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "interpreter/interpreter.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/markOop.hpp" ++#include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/os.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++ ++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { ++ const int aligned_mask = BytesPerWord -1; ++ const int hdr_offset = oopDesc::mark_offset_in_bytes(); ++ assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); ++ int null_check_offset = -1; ++ Label done; ++ ++ verify_oop(obj); ++ ++ // save object being locked into the BasicObjectLock ++ st_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ ++ if (UseBiasedLocking) { ++ assert(scratch != noreg, "should have scratch register at this point"); ++ null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case); ++ } else { ++ null_check_offset = offset(); ++ } ++ ++ // Load object header ++ ld_ptr(hdr, Address(obj, hdr_offset)); ++ // and mark it as unlocked ++ ori(hdr, hdr, markOopDesc::unlocked_value); ++ // save unlocked object header into the displaced header location on the stack ++ st_ptr(hdr, Address(disp_hdr, 0)); ++ // test if object header is still the same (i.e. unlocked), and if so, store the ++ // displaced header address in the object header - if it is not the same, get the ++ // object header instead ++ lea(SCR2, Address(obj, hdr_offset)); ++ cmpxchg(Address(SCR2, 0), hdr, disp_hdr, SCR1, true, false, done); ++ // if the object header was the same, we're done ++ // if the object header was not the same, it is now in the hdr register ++ // => test if it is a stack pointer into the same stack (recursive locking), i.e.: ++ // ++ // 1) (hdr & aligned_mask) == 0 ++ // 2) sp <= hdr ++ // 3) hdr <= sp + page_size ++ // ++ // these 3 tests can be done by evaluating the following expression: ++ // ++ // (hdr - sp) & (aligned_mask - page_size) ++ // ++ // assuming both the stack pointer and page_size have their least ++ // significant 2 bits cleared and page_size is a power of 2 ++ sub_d(hdr, hdr, SP); ++ li(SCR1, aligned_mask - os::vm_page_size()); ++ andr(hdr, hdr, SCR1); ++ // for recursive locking, the result is zero => save it in the displaced header ++ // location (NULL in the displaced hdr location indicates recursive locking) ++ st_ptr(hdr, Address(disp_hdr, 0)); ++ // otherwise we don't care about the result and handle locking via runtime call ++ bnez(hdr, slow_case); ++ // done ++ bind(done); ++ if (PrintBiasedLockingStatistics) { ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, SCR1, SCR2); ++ } ++ return null_check_offset; ++} ++ ++void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { ++ const int aligned_mask = BytesPerWord -1; ++ const int hdr_offset = oopDesc::mark_offset_in_bytes(); ++ assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); ++ Label done; ++ ++ if (UseBiasedLocking) { ++ // load object ++ ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ biased_locking_exit(obj, hdr, done); ++ } ++ ++ // load displaced header ++ ld_ptr(hdr, Address(disp_hdr, 0)); ++ // if the loaded hdr is NULL we had recursive locking ++ // if we had recursive locking, we are done ++ beqz(hdr, done); ++ if (!UseBiasedLocking) { ++ // load object ++ ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ } ++ verify_oop(obj); ++ // test if object header is pointing to the displaced header, and if so, restore ++ // the displaced header in the object - if the object header is not pointing to ++ // the displaced header, get the object header instead ++ // if the object header was not pointing to the displaced header, ++ // we do unlocking via runtime call ++ if (hdr_offset) { ++ lea(SCR1, Address(obj, hdr_offset)); ++ cmpxchg(Address(SCR1, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case); ++ } else { ++ cmpxchg(Address(obj, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case); ++ } ++ // done ++ bind(done); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, ++ int con_size_in_bytes, Register t1, Register t2, ++ Label& slow_case) { ++ if (UseTLAB) { ++ tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); ++ } else { ++ eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); ++ } ++} ++ ++void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, ++ Register t1, Register t2) { ++ assert_different_registers(obj, klass, len); ++ if (UseBiasedLocking && !len->is_valid()) { ++ assert_different_registers(obj, klass, len, t1, t2); ++ ld_ptr(t1, Address(klass, Klass::prototype_header_offset())); ++ } else { ++ // This assumes that all prototype bits fit in an int32_t ++ li(t1, (int32_t)(intptr_t)markOopDesc::prototype()); ++ } ++ st_ptr(t1, Address(obj, oopDesc::mark_offset_in_bytes())); ++ ++ if (UseCompressedClassPointers) { // Take care not to kill klass ++ encode_klass_not_null(t1, klass); ++ st_w(t1, Address(obj, oopDesc::klass_offset_in_bytes())); ++ } else { ++ st_ptr(klass, Address(obj, oopDesc::klass_offset_in_bytes())); ++ } ++ ++ if (len->is_valid()) { ++ st_w(len, Address(obj, arrayOopDesc::length_offset_in_bytes())); ++ } else if (UseCompressedClassPointers) { ++ store_klass_gap(obj, R0); ++ } ++} ++ ++// preserves obj, destroys len_in_bytes ++// ++// Scratch registers: t1 = T0, t2 = T1 ++// ++void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, ++ int hdr_size_in_bytes, Register t1, Register t2) { ++ assert(hdr_size_in_bytes >= 0, "header size must be positive or 0"); ++ assert(t1 == T0 && t2 == T1, "must be"); ++ Label done; ++ ++ // len_in_bytes is positive and ptr sized ++ addi_d(len_in_bytes, len_in_bytes, -hdr_size_in_bytes); ++ beqz(len_in_bytes, done); ++ ++ // zero_words() takes ptr in t1 and count in bytes in t2 ++ lea(t1, Address(obj, hdr_size_in_bytes)); ++ addi_d(t2, len_in_bytes, -BytesPerWord); ++ ++ Label loop; ++ bind(loop); ++ stx_d(R0, t1, t2); ++ addi_d(t2, t2, -BytesPerWord); ++ bge(t2, R0, loop); ++ ++ bind(done); ++} ++ ++void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size, ++ int object_size, Register klass, Label& slow_case) { ++ assert_different_registers(obj, t1, t2); ++ assert(header_size >= 0 && object_size >= header_size, "illegal sizes"); ++ ++ try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case); ++ ++ initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB); ++} ++ ++// Scratch registers: t1 = T0, t2 = T1 ++void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, ++ int con_size_in_bytes, Register t1, Register t2, ++ bool is_tlab_allocated) { ++ assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, ++ "con_size_in_bytes is not multiple of alignment"); ++ const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize; ++ ++ initialize_header(obj, klass, noreg, t1, t2); ++ ++ if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) { ++ // clear rest of allocated space ++ const Register index = t2; ++ if (var_size_in_bytes != noreg) { ++ move(index, var_size_in_bytes); ++ initialize_body(obj, index, hdr_size_in_bytes, t1, t2); ++ } else if (con_size_in_bytes > hdr_size_in_bytes) { ++ con_size_in_bytes -= hdr_size_in_bytes; ++ lea(t1, Address(obj, hdr_size_in_bytes)); ++ Label loop; ++ li(SCR1, con_size_in_bytes - BytesPerWord); ++ bind(loop); ++ stx_d(R0, t1, SCR1); ++ addi_d(SCR1, SCR1, -BytesPerWord); ++ bge(SCR1, R0, loop); ++ } ++ } ++ ++ membar(StoreStore); ++ ++ if (CURRENT_ENV->dtrace_alloc_probes()) { ++ assert(obj == A0, "must be"); ++ call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type); ++ } ++ ++ verify_oop(obj); ++} ++ ++void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, ++ int header_size, int f, Register klass, Label& slow_case) { ++ assert_different_registers(obj, len, t1, t2, klass); ++ ++ // determine alignment mask ++ assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work"); ++ ++ // check for negative or excessive length ++ li(SCR1, (int32_t)max_array_allocation_length); ++ bge_far(len, SCR1, slow_case, false); ++ ++ const Register arr_size = t2; // okay to be the same ++ // align object end ++ li(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask); ++ slli_w(SCR1, len, f); ++ add_d(arr_size, arr_size, SCR1); ++ bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0); ++ ++ try_allocate(obj, arr_size, 0, t1, t2, slow_case); ++ ++ initialize_header(obj, klass, len, t1, t2); ++ ++ // clear rest of allocated space ++ initialize_body(obj, arr_size, header_size * BytesPerWord, t1, t2); ++ ++ membar(StoreStore); ++ ++ if (CURRENT_ENV->dtrace_alloc_probes()) { ++ assert(obj == A0, "must be"); ++ call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type); ++ } ++ ++ verify_oop(obj); ++} ++ ++void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { ++ assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); ++ // Make sure there is enough stack space for this method's activation. ++ // Note that we do this before creating a frame. ++ generate_stack_overflow_check(bang_size_in_bytes); ++ MacroAssembler::build_frame(framesize); ++} ++ ++void C1_MacroAssembler::remove_frame(int framesize) { ++ MacroAssembler::remove_frame(framesize); ++} ++ ++void C1_MacroAssembler::verified_entry() { ++ // If we have to make this method not-entrant we'll overwrite its ++ // first instruction with a jump. For this action to be legal we ++ // must ensure that this first instruction is a b, bl, nop, break. ++ // Make it a NOP. ++ nop(); ++} ++ ++void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { ++ // rbp, + 0: link ++ // + 1: return address ++ // + 2: argument with offset 0 ++ // + 3: argument with offset 1 ++ // + 4: ... ++ ++ ld_ptr(reg, Address(FP, (offset_in_words + 2) * BytesPerWord)); ++} ++ ++#ifndef PRODUCT ++void C1_MacroAssembler::verify_stack_oop(int stack_offset) { ++ if (!VerifyOops) return; ++ verify_oop_addr(Address(SP, stack_offset), "oop"); ++} ++ ++void C1_MacroAssembler::verify_not_null_oop(Register r) { ++ if (!VerifyOops) return; ++ Label not_null; ++ bnez(r, not_null); ++ stop("non-null oop required"); ++ bind(not_null); ++ verify_oop(r); ++} ++ ++void C1_MacroAssembler::invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2, ++ bool inv_a3, bool inv_a4, bool inv_a5) { ++#ifdef ASSERT ++ static int nn; ++ if (inv_a0) li(A0, 0xDEAD); ++ if (inv_s0) li(S0, 0xDEAD); ++ if (inv_a2) li(A2, nn++); ++ if (inv_a3) li(A3, 0xDEAD); ++ if (inv_a4) li(A4, 0xDEAD); ++ if (inv_a5) li(A5, 0xDEAD); ++#endif ++} ++#endif // ifndef PRODUCT +diff --git a/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp +new file mode 100644 +index 0000000000..aaa708f71e +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp +@@ -0,0 +1,1138 @@ ++/* ++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "c1/c1_CodeStubs.hpp" ++#include "c1/c1_Defs.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "compiler/disassembler.hpp" ++#include "compiler/oopMap.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/universe.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "register_loongarch.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/vframe.hpp" ++#include "runtime/vframeArray.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T5 RT5 ++#define T6 RT6 ++#define T8 RT8 ++ ++// Implementation of StubAssembler ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) { ++ // setup registers ++ assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, ++ "registers must be different"); ++ assert(oop_result1 != TREG && metadata_result != TREG, "registers must be different"); ++ assert(args_size >= 0, "illegal args_size"); ++ bool align_stack = false; ++ ++ move(A0, TREG); ++ set_num_rt_args(0); // Nothing on stack ++ ++ Label retaddr; ++ set_last_Java_frame(SP, FP, retaddr); ++ ++ // do the call ++ call(entry, relocInfo::runtime_call_type); ++ bind(retaddr); ++ int call_offset = offset(); ++ // verify callee-saved register ++#ifdef ASSERT ++ { Label L; ++ get_thread(SCR1); ++ beq(TREG, SCR1, L); ++ stop("StubAssembler::call_RT: TREG not callee saved?"); ++ bind(L); ++ } ++#endif ++ reset_last_Java_frame(true); ++ ++ // check for pending exceptions ++ { Label L; ++ // check for pending exceptions (java_thread is set upon return) ++ ld_ptr(SCR1, Address(TREG, in_bytes(Thread::pending_exception_offset()))); ++ beqz(SCR1, L); ++ // exception pending => remove activation and forward to exception handler ++ // make sure that the vm_results are cleared ++ if (oop_result1->is_valid()) { ++ st_ptr(R0, Address(TREG, JavaThread::vm_result_offset())); ++ } ++ if (metadata_result->is_valid()) { ++ st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset())); ++ } ++ if (frame_size() == no_frame_size) { ++ leave(); ++ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ } else if (_stub_id == Runtime1::forward_exception_id) { ++ should_not_reach_here(); ++ } else { ++ jmp(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type); ++ } ++ bind(L); ++ } ++ // get oop results if there are any and reset the values in the thread ++ if (oop_result1->is_valid()) { ++ get_vm_result(oop_result1, TREG); ++ } ++ if (metadata_result->is_valid()) { ++ get_vm_result_2(metadata_result, TREG); ++ } ++ return call_offset; ++} ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, ++ address entry, Register arg1) { ++ move(A1, arg1); ++ return call_RT(oop_result1, metadata_result, entry, 1); ++} ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, ++ address entry, Register arg1, Register arg2) { ++ if (A1 == arg2) { ++ if (A2 == arg1) { ++ move(SCR1, arg1); ++ move(arg1, arg2); ++ move(arg2, SCR1); ++ } else { ++ move(A2, arg2); ++ move(A1, arg1); ++ } ++ } else { ++ move(A1, arg1); ++ move(A2, arg2); ++ } ++ return call_RT(oop_result1, metadata_result, entry, 2); ++} ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, ++ address entry, Register arg1, Register arg2, Register arg3) { ++ // if there is any conflict use the stack ++ if (arg1 == A2 || arg1 == A3 || ++ arg2 == A1 || arg2 == A3 || ++ arg3 == A1 || arg3 == A2) { ++ addi_d(SP, SP, -4 * wordSize); ++ st_ptr(arg1, Address(SP, 0 * wordSize)); ++ st_ptr(arg2, Address(SP, 1 * wordSize)); ++ st_ptr(arg3, Address(SP, 2 * wordSize)); ++ ld_ptr(arg1, Address(SP, 0 * wordSize)); ++ ld_ptr(arg2, Address(SP, 1 * wordSize)); ++ ld_ptr(arg3, Address(SP, 2 * wordSize)); ++ addi_d(SP, SP, 4 * wordSize); ++ } else { ++ move(A1, arg1); ++ move(A2, arg2); ++ move(A3, arg3); ++ } ++ return call_RT(oop_result1, metadata_result, entry, 3); ++} ++ ++enum return_state_t { ++ does_not_return, requires_return ++}; ++ ++// Implementation of StubFrame ++ ++class StubFrame: public StackObj { ++ private: ++ StubAssembler* _sasm; ++ bool _return_state; ++ ++ public: ++ StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, ++ return_state_t return_state=requires_return); ++ void load_argument(int offset_in_words, Register reg); ++ ++ ~StubFrame(); ++};; ++ ++void StubAssembler::prologue(const char* name, bool must_gc_arguments) { ++ set_info(name, must_gc_arguments); ++ enter(); ++} ++ ++void StubAssembler::epilogue() { ++ leave(); ++ jr(RA); ++} ++ ++#define __ _sasm-> ++ ++StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, ++ return_state_t return_state) { ++ _sasm = sasm; ++ _return_state = return_state; ++ __ prologue(name, must_gc_arguments); ++} ++ ++// load parameters that were stored with LIR_Assembler::store_parameter ++// Note: offsets for store_parameter and load_argument must match ++void StubFrame::load_argument(int offset_in_words, Register reg) { ++ __ load_parameter(offset_in_words, reg); ++} ++ ++StubFrame::~StubFrame() { ++ if (_return_state == requires_return) { ++ __ epilogue(); ++ } else { ++ __ should_not_reach_here(); ++ } ++} ++ ++#undef __ ++ ++// Implementation of Runtime1 ++ ++#define __ sasm-> ++ ++const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2; ++ ++// Stack layout for saving/restoring all the registers needed during a runtime ++// call (this includes deoptimization) ++// Note: note that users of this frame may well have arguments to some runtime ++// while these values are on the stack. These positions neglect those arguments ++// but the code in save_live_registers will take the argument count into ++// account. ++// ++ ++enum reg_save_layout { ++ reg_save_frame_size = 32 /* float */ + 30 /* integer, except zr, tp */ ++}; ++ ++// Save off registers which might be killed by calls into the runtime. ++// Tries to smart of about FP registers. In particular we separate ++// saving and describing the FPU registers for deoptimization since we ++// have to save the FPU registers twice if we describe them. The ++// deopt blob is the only thing which needs to describe FPU registers. ++// In all other cases it should be sufficient to simply save their ++// current value. ++ ++static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs]; ++static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs]; ++static int reg_save_size_in_words; ++static int frame_size_in_bytes = -1; ++ ++static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) { ++ int frame_size_in_bytes = reg_save_frame_size * BytesPerWord; ++ sasm->set_frame_size(frame_size_in_bytes / BytesPerWord); ++ int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size; ++ OopMap* oop_map = new OopMap(frame_size_in_slots, 0); ++ ++ for (int i = A0->encoding(); i <= T8->encoding(); i++) { ++ Register r = as_Register(i); ++ if (i != SCR1->encoding() && i != SCR2->encoding()) { ++ int sp_offset = cpu_reg_save_offsets[i]; ++ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); ++ } ++ } ++ ++ if (save_fpu_registers) { ++ for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { ++ FloatRegister r = as_FloatRegister(i); ++ int sp_offset = fpu_reg_save_offsets[i]; ++ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); ++ } ++ } ++ ++ return oop_map; ++} ++ ++static OopMap* save_live_registers(StubAssembler* sasm, ++ bool save_fpu_registers = true) { ++ __ block_comment("save_live_registers"); ++ ++ // integer registers except zr & ra & tp & sp ++ __ addi_d(SP, SP, -(32 - 4 + 32) * wordSize); ++ ++ for (int i = 4; i < 32; i++) ++ __ st_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize)); ++ ++ if (save_fpu_registers) { ++ for (int i = 0; i < 32; i++) ++ __ fst_d(as_FloatRegister(i), Address(SP, i * wordSize)); ++ } ++ ++ return generate_oop_map(sasm, save_fpu_registers); ++} ++ ++static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) { ++ if (restore_fpu_registers) { ++ for (int i = 0; i < 32; i ++) ++ __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize)); ++ } ++ ++ for (int i = 4; i < 32; i++) ++ __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize)); ++ ++ __ addi_d(SP, SP, (32 - 4 + 32) * wordSize); ++} ++ ++static void restore_live_registers_except_a0(StubAssembler* sasm, bool restore_fpu_registers = true) { ++ if (restore_fpu_registers) { ++ for (int i = 0; i < 32; i ++) ++ __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize)); ++ } ++ ++ for (int i = 5; i < 32; i++) ++ __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize)); ++ ++ __ addi_d(SP, SP, (32 - 4 + 32) * wordSize); ++} ++ ++void Runtime1::initialize_pd() { ++ int sp_offset = 0; ++ int i; ++ ++ // all float registers are saved explicitly ++ assert(FrameMap::nof_fpu_regs == 32, "double registers not handled here"); ++ for (i = 0; i < FrameMap::nof_fpu_regs; i++) { ++ fpu_reg_save_offsets[i] = sp_offset; ++ sp_offset += 2; // SP offsets are in halfwords ++ } ++ ++ for (i = 4; i < FrameMap::nof_cpu_regs; i++) { ++ Register r = as_Register(i); ++ cpu_reg_save_offsets[i] = sp_offset; ++ sp_offset += 2; // SP offsets are in halfwords ++ } ++} ++ ++// target: the entry point of the method that creates and posts the exception oop ++// has_argument: true if the exception needs arguments (passed in SCR1 and SCR2) ++ ++OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, ++ bool has_argument) { ++ // make a frame and preserve the caller's caller-save registers ++ OopMap* oop_map = save_live_registers(sasm); ++ int call_offset; ++ if (!has_argument) { ++ call_offset = __ call_RT(noreg, noreg, target); ++ } else { ++ __ move(A1, SCR1); ++ __ move(A2, SCR2); ++ call_offset = __ call_RT(noreg, noreg, target); ++ } ++ OopMapSet* oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ return oop_maps; ++} ++ ++OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { ++ __ block_comment("generate_handle_exception"); ++ ++ // incoming parameters ++ const Register exception_oop = A0; ++ const Register exception_pc = A1; ++ // other registers used in this stub ++ ++ // Save registers, if required. ++ OopMapSet* oop_maps = new OopMapSet(); ++ OopMap* oop_map = NULL; ++ switch (id) { ++ case forward_exception_id: ++ // We're handling an exception in the context of a compiled frame. ++ // The registers have been saved in the standard places. Perform ++ // an exception lookup in the caller and dispatch to the handler ++ // if found. Otherwise unwind and dispatch to the callers ++ // exception handler. ++ oop_map = generate_oop_map(sasm, 1 /*thread*/); ++ ++ // load and clear pending exception oop into A0 ++ __ ld_ptr(exception_oop, Address(TREG, Thread::pending_exception_offset())); ++ __ st_ptr(R0, Address(TREG, Thread::pending_exception_offset())); ++ ++ // load issuing PC (the return address for this stub) into A1 ++ __ ld_ptr(exception_pc, Address(FP, 1 * BytesPerWord)); ++ ++ // make sure that the vm_results are cleared (may be unnecessary) ++ __ st_ptr(R0, Address(TREG, JavaThread::vm_result_offset())); ++ __ st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset())); ++ break; ++ case handle_exception_nofpu_id: ++ case handle_exception_id: ++ // At this point all registers MAY be live. ++ oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id); ++ break; ++ case handle_exception_from_callee_id: { ++ // At this point all registers except exception oop (A0) and ++ // exception pc (RA) are dead. ++ const int frame_size = 2 /*fp, return address*/; ++ oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0); ++ sasm->set_frame_size(frame_size); ++ break; ++ } ++ default: ShouldNotReachHere(); ++ } ++ ++ // verify that only A0 and A1 are valid at this time ++ __ invalidate_registers(false, true, true, true, true, true); ++ // verify that A0 contains a valid exception ++ __ verify_not_null_oop(exception_oop); ++ ++#ifdef ASSERT ++ // check that fields in JavaThread for exception oop and issuing pc are ++ // empty before writing to them ++ Label oop_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset())); ++ __ beqz(SCR1, oop_empty); ++ __ stop("exception oop already set"); ++ __ bind(oop_empty); ++ ++ Label pc_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset())); ++ __ beqz(SCR1, pc_empty); ++ __ stop("exception pc already set"); ++ __ bind(pc_empty); ++#endif ++ ++ // save exception oop and issuing pc into JavaThread ++ // (exception handler will load it from here) ++ __ st_ptr(exception_oop, Address(TREG, JavaThread::exception_oop_offset())); ++ __ st_ptr(exception_pc, Address(TREG, JavaThread::exception_pc_offset())); ++ ++ // patch throwing pc into return address (has bci & oop map) ++ __ st_ptr(exception_pc, Address(FP, 1 * BytesPerWord)); ++ ++ // compute the exception handler. ++ // the exception oop and the throwing pc are read from the fields in JavaThread ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc)); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ ++ // A0: handler address ++ // will be the deopt blob if nmethod was deoptimized while we looked up ++ // handler regardless of whether handler existed in the nmethod. ++ ++ // only A0 is valid at this time, all other registers have been destroyed by the runtime call ++ __ invalidate_registers(false, true, true, true, true, true); ++ ++ // patch the return address, this stub will directly return to the exception handler ++ __ st_ptr(A0, Address(FP, 1 * BytesPerWord)); ++ ++ switch (id) { ++ case forward_exception_id: ++ case handle_exception_nofpu_id: ++ case handle_exception_id: ++ // Restore the registers that were saved at the beginning. ++ restore_live_registers(sasm, id != handle_exception_nofpu_id); ++ break; ++ case handle_exception_from_callee_id: ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ return oop_maps; ++} ++ ++void Runtime1::generate_unwind_exception(StubAssembler *sasm) { ++ // incoming parameters ++ const Register exception_oop = A0; ++ // callee-saved copy of exception_oop during runtime call ++ const Register exception_oop_callee_saved = S0; ++ // other registers used in this stub ++ const Register exception_pc = A1; ++ const Register handler_addr = A3; ++ ++ // verify that only A0, is valid at this time ++ __ invalidate_registers(false, true, true, true, true, true); ++ ++#ifdef ASSERT ++ // check that fields in JavaThread for exception oop and issuing pc are empty ++ Label oop_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset())); ++ __ beqz(SCR1, oop_empty); ++ __ stop("exception oop must be empty"); ++ __ bind(oop_empty); ++ ++ Label pc_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset())); ++ __ beqz(SCR1, pc_empty); ++ __ stop("exception pc must be empty"); ++ __ bind(pc_empty); ++#endif ++ ++ // Save our return address because ++ // exception_handler_for_return_address will destroy it. We also ++ // save exception_oop ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(RA, Address(SP, 0 * wordSize)); ++ __ st_ptr(exception_oop, Address(SP, 1 * wordSize)); ++ ++ // search the exception handler address of the caller (using the return address) ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), TREG, RA); ++ // V0: exception handler address of the caller ++ ++ // Only V0 is valid at this time; all other registers have been ++ // destroyed by the call. ++ __ invalidate_registers(false, true, true, true, false, true); ++ ++ // move result of call into correct register ++ __ move(handler_addr, A0); ++ ++ // get throwing pc (= return address). ++ // RA has been destroyed by the call ++ __ ld_ptr(RA, Address(SP, 0 * wordSize)); ++ __ ld_ptr(exception_oop, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ __ move(A1, RA); ++ ++ __ verify_not_null_oop(exception_oop); ++ ++ // continue at exception handler (return address removed) ++ // note: do *not* remove arguments when unwinding the ++ // activation since the caller assumes having ++ // all arguments on the stack when entering the ++ // runtime to determine the exception handler ++ // (GC happens at call site with arguments!) ++ // A0: exception oop ++ // A1: throwing pc ++ // A3: exception handler ++ __ jr(handler_addr); ++} ++ ++OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { ++ // use the maximum number of runtime-arguments here because it is difficult to ++ // distinguish each RT-Call. ++ // Note: This number affects also the RT-Call in generate_handle_exception because ++ // the oop-map is shared for all calls. ++ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); ++ assert(deopt_blob != NULL, "deoptimization blob must have been created"); ++ ++ OopMap* oop_map = save_live_registers(sasm); ++ ++ __ move(A0, TREG); ++ Label retaddr; ++ __ set_last_Java_frame(SP, FP, retaddr); ++ // do the call ++ __ call(target, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ OopMapSet* oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(__ offset(), oop_map); ++ // verify callee-saved register ++#ifdef ASSERT ++ { Label L; ++ __ get_thread(SCR1); ++ __ beq(TREG, SCR1, L); ++ __ stop("StubAssembler::call_RT: rthread not callee saved?"); ++ __ bind(L); ++ } ++#endif ++ ++ __ reset_last_Java_frame(true); ++ ++#ifdef ASSERT ++ // check that fields in JavaThread for exception oop and issuing pc are empty ++ Label oop_empty; ++ __ ld_ptr(SCR1, Address(TREG, Thread::pending_exception_offset())); ++ __ beqz(SCR1, oop_empty); ++ __ stop("exception oop must be empty"); ++ __ bind(oop_empty); ++ ++ Label pc_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset())); ++ __ beqz(SCR1, pc_empty); ++ __ stop("exception pc must be empty"); ++ __ bind(pc_empty); ++#endif ++ ++ // Runtime will return true if the nmethod has been deoptimized, this is the ++ // expected scenario and anything else is an error. Note that we maintain a ++ // check on the result purely as a defensive measure. ++ Label no_deopt; ++ __ beqz(A0, no_deopt); // Have we deoptimized? ++ ++ // Perform a re-execute. The proper return address is already on the stack, ++ // we just need to restore registers, pop all of our frame but the return ++ // address and jump to the deopt blob. ++ restore_live_registers(sasm); ++ __ leave(); ++ __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type); ++ ++ __ bind(no_deopt); ++ __ stop("deopt not performed"); ++ ++ return oop_maps; ++} ++ ++OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { ++ // for better readability ++ const bool must_gc_arguments = true; ++ const bool dont_gc_arguments = false; ++ ++ // default value; overwritten for some optimized stubs that are called ++ // from methods that do not use the fpu ++ bool save_fpu_registers = true; ++ ++ // stub code & info for the different stubs ++ OopMapSet* oop_maps = NULL; ++ OopMap* oop_map = NULL; ++ switch (id) { ++ { ++ case forward_exception_id: ++ { ++ oop_maps = generate_handle_exception(id, sasm); ++ __ leave(); ++ __ jr(RA); ++ } ++ break; ++ ++ case throw_div0_exception_id: ++ { ++ StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); ++ } ++ break; ++ ++ case throw_null_pointer_exception_id: ++ { ++ StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); ++ } ++ break; ++ ++ case new_instance_id: ++ case fast_new_instance_id: ++ case fast_new_instance_init_check_id: ++ { ++ Register klass = A3; // Incoming ++ Register obj = A0; // Result ++ ++ if (id == new_instance_id) { ++ __ set_info("new_instance", dont_gc_arguments); ++ } else if (id == fast_new_instance_id) { ++ __ set_info("fast new_instance", dont_gc_arguments); ++ } else { ++ assert(id == fast_new_instance_init_check_id, "bad StubID"); ++ __ set_info("fast new_instance init check", dont_gc_arguments); ++ } ++ ++ // If TLAB is disabled, see if there is support for inlining contiguous ++ // allocations. ++ // Otherwise, just go to the slow path. ++ if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) && ++ !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { ++ Label slow_path; ++ Register obj_size = S0; ++ Register t1 = T0; ++ Register t2 = T1; ++ assert_different_registers(klass, obj, obj_size, t1, t2); ++ ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(S0, Address(SP, 0)); ++ ++ if (id == fast_new_instance_init_check_id) { ++ // make sure the klass is initialized ++ __ ld_bu(SCR1, Address(klass, InstanceKlass::init_state_offset())); ++ __ li(SCR2, InstanceKlass::fully_initialized); ++ __ bne_far(SCR1, SCR2, slow_path); ++ } ++ ++#ifdef ASSERT ++ // assert object can be fast path allocated ++ { ++ Label ok, not_ok; ++ __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset())); ++ __ bge(R0, obj_size, not_ok); // make sure it's an instance (LH > 0) ++ __ andi(SCR1, obj_size, Klass::_lh_instance_slow_path_bit); ++ __ beqz(SCR1, ok); ++ __ bind(not_ok); ++ __ stop("assert(can be fast path allocated)"); ++ __ should_not_reach_here(); ++ __ bind(ok); ++ } ++#endif // ASSERT ++ ++ // get the instance size (size is postive so movl is fine for 64bit) ++ __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset())); ++ ++ __ eden_allocate(obj, obj_size, 0, t1, slow_path); ++ ++ __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false); ++ __ verify_oop(obj); ++ __ ld_ptr(S0, Address(SP, 0)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ __ jr(RA); ++ ++ __ bind(slow_path); ++ __ ld_ptr(S0, Address(SP, 0)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ } ++ ++ __ enter(); ++ OopMap* map = save_live_registers(sasm); ++ int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass); ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers_except_a0(sasm); ++ __ verify_oop(obj); ++ __ leave(); ++ __ jr(RA); ++ ++ // A0,: new instance ++ } ++ ++ break; ++ ++ case counter_overflow_id: ++ { ++ Register bci = A0, method = A1; ++ __ enter(); ++ OopMap* map = save_live_registers(sasm); ++ // Retrieve bci ++ __ ld_w(bci, Address(FP, 2 * BytesPerWord)); ++ // And a pointer to the Method* ++ __ ld_d(method, Address(FP, 3 * BytesPerWord)); ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method); ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm); ++ __ leave(); ++ __ jr(RA); ++ } ++ break; ++ ++ case new_type_array_id: ++ case new_object_array_id: ++ { ++ Register length = S0; // Incoming ++ Register klass = A3; // Incoming ++ Register obj = A0; // Result ++ ++ if (id == new_type_array_id) { ++ __ set_info("new_type_array", dont_gc_arguments); ++ } else { ++ __ set_info("new_object_array", dont_gc_arguments); ++ } ++ ++#ifdef ASSERT ++ // assert object type is really an array of the proper kind ++ { ++ Label ok; ++ Register t0 = obj; ++ __ ld_w(t0, Address(klass, Klass::layout_helper_offset())); ++ __ srai_w(t0, t0, Klass::_lh_array_tag_shift); ++ int tag = ((id == new_type_array_id) ++ ? Klass::_lh_array_tag_type_value ++ : Klass::_lh_array_tag_obj_value); ++ __ li(SCR1, tag); ++ __ beq(t0, SCR1, ok); ++ __ stop("assert(is an array klass)"); ++ __ should_not_reach_here(); ++ __ bind(ok); ++ } ++#endif // ASSERT ++ ++ // If TLAB is disabled, see if there is support for inlining contiguous ++ // allocations. ++ // Otherwise, just go to the slow path. ++ if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { ++ Register arr_size = A5; ++ Register t1 = T0; ++ Register t2 = T1; ++ Label slow_path; ++ assert_different_registers(length, klass, obj, arr_size, t1, t2); ++ ++ // check that array length is small enough for fast path. ++ __ li(SCR1, C1_MacroAssembler::max_array_allocation_length); ++ __ blt_far(SCR1, length, slow_path, false); ++ ++ // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) ++ // since size is positive ldrw does right thing on 64bit ++ __ ld_w(t1, Address(klass, Klass::layout_helper_offset())); ++ // since size is positive movw does right thing on 64bit ++ __ move(arr_size, length); ++ __ sll_w(arr_size, length, t1); ++ __ bstrpick_d(t1, t1, Klass::_lh_header_size_shift + ++ exact_log2(Klass::_lh_header_size_mask + 1) - 1, ++ Klass::_lh_header_size_shift); ++ __ add_d(arr_size, arr_size, t1); ++ __ addi_d(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up ++ __ bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0); ++ ++ __ eden_allocate(obj, arr_size, 0, t1, slow_path); // preserves arr_size ++ ++ __ initialize_header(obj, klass, length, t1, t2); ++ __ ld_bu(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte))); ++ assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); ++ assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); ++ __ andi(t1, t1, Klass::_lh_header_size_mask); ++ __ sub_d(arr_size, arr_size, t1); // body length ++ __ add_d(t1, t1, obj); // body start ++ __ initialize_body(t1, arr_size, 0, t1, t2); ++ __ membar(Assembler::StoreStore); ++ __ verify_oop(obj); ++ ++ __ jr(RA); ++ ++ __ bind(slow_path); ++ } ++ ++ __ enter(); ++ OopMap* map = save_live_registers(sasm); ++ int call_offset; ++ if (id == new_type_array_id) { ++ call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length); ++ } else { ++ call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length); ++ } ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers_except_a0(sasm); ++ ++ __ verify_oop(obj); ++ __ leave(); ++ __ jr(RA); ++ ++ // A0: new array ++ } ++ break; ++ ++ case new_multi_array_id: ++ { ++ StubFrame f(sasm, "new_multi_array", dont_gc_arguments); ++ // A0,: klass ++ // S0,: rank ++ // A2: address of 1st dimension ++ OopMap* map = save_live_registers(sasm); ++ __ move(A1, A0); ++ __ move(A3, A2); ++ __ move(A2, S0); ++ int call_offset = __ call_RT(A0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), A1, A2, A3); ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers_except_a0(sasm); ++ ++ // A0,: new multi array ++ __ verify_oop(A0); ++ } ++ break; ++ ++ case register_finalizer_id: ++ { ++ __ set_info("register_finalizer", dont_gc_arguments); ++ ++ // This is called via call_runtime so the arguments ++ // will be place in C abi locations ++ ++ __ verify_oop(A0); ++ ++ // load the klass and check the has finalizer flag ++ Label register_finalizer; ++ Register t = A5; ++ __ load_klass(t, A0); ++ __ ld_w(t, Address(t, Klass::access_flags_offset())); ++ __ li(SCR1, JVM_ACC_HAS_FINALIZER); ++ __ andr(SCR1, t, SCR1); ++ __ bnez(SCR1, register_finalizer); ++ __ jr(RA); ++ ++ __ bind(register_finalizer); ++ __ enter(); ++ OopMap* oop_map = save_live_registers(sasm); ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), A0); ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ ++ // Now restore all the live registers ++ restore_live_registers(sasm); ++ ++ __ leave(); ++ __ jr(RA); ++ } ++ break; ++ ++ case throw_class_cast_exception_id: ++ { ++ StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); ++ } ++ break; ++ ++ case throw_incompatible_class_change_error_id: ++ { ++ StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); ++ } ++ break; ++ ++ case slow_subtype_check_id: ++ { ++ // Typical calling sequence: ++ // __ push(klass_RInfo); // object klass or other subclass ++ // __ push(sup_k_RInfo); // array element klass or other superclass ++ // __ bl(slow_subtype_check); ++ // Note that the subclass is pushed first, and is therefore deepest. ++ enum layout { ++ a0_off, a0_off_hi, ++ a2_off, a2_off_hi, ++ a4_off, a4_off_hi, ++ a5_off, a5_off_hi, ++ sup_k_off, sup_k_off_hi, ++ klass_off, klass_off_hi, ++ framesize, ++ result_off = sup_k_off ++ }; ++ ++ __ set_info("slow_subtype_check", dont_gc_arguments); ++ __ addi_d(SP, SP, -4 * wordSize); ++ __ st_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size)); ++ __ st_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size)); ++ __ st_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size)); ++ __ st_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size)); ++ ++ // This is called by pushing args and not with C abi ++ __ ld_ptr(A4, Address(SP, klass_off * VMRegImpl::stack_slot_size)); // subclass ++ __ ld_ptr(A0, Address(SP, sup_k_off * VMRegImpl::stack_slot_size)); // superclass ++ ++ Label miss; ++ __ check_klass_subtype_slow_path(A4, A0, A2, A5, NULL, &miss); ++ ++ // fallthrough on success: ++ __ li(SCR1, 1); ++ __ st_ptr(SCR1, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result ++ __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size)); ++ __ addi_d(SP, SP, 4 * wordSize); ++ __ jr(RA); ++ ++ __ bind(miss); ++ __ st_ptr(R0, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result ++ __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size)); ++ __ addi_d(SP, SP, 4 * wordSize); ++ __ jr(RA); ++ } ++ break; ++ ++ case monitorenter_nofpu_id: ++ save_fpu_registers = false; ++ // fall through ++ case monitorenter_id: ++ { ++ StubFrame f(sasm, "monitorenter", dont_gc_arguments); ++ OopMap* map = save_live_registers(sasm, save_fpu_registers); ++ ++ // Called with store_parameter and not C abi ++ ++ f.load_argument(1, A0); // A0,: object ++ f.load_argument(0, A1); // A1,: lock address ++ ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), A0, A1); ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm, save_fpu_registers); ++ } ++ break; ++ ++ case monitorexit_nofpu_id: ++ save_fpu_registers = false; ++ // fall through ++ case monitorexit_id: ++ { ++ StubFrame f(sasm, "monitorexit", dont_gc_arguments); ++ OopMap* map = save_live_registers(sasm, save_fpu_registers); ++ ++ // Called with store_parameter and not C abi ++ ++ f.load_argument(0, A0); // A0,: lock address ++ ++ // note: really a leaf routine but must setup last java sp ++ // => use call_RT for now (speed can be improved by ++ // doing last java sp setup manually) ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), A0); ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm, save_fpu_registers); ++ } ++ break; ++ ++ case deoptimize_id: ++ { ++ StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return); ++ OopMap* oop_map = save_live_registers(sasm); ++ f.load_argument(0, A1); ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), A1); ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ restore_live_registers(sasm); ++ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); ++ assert(deopt_blob != NULL, "deoptimization blob must have been created"); ++ __ leave(); ++ __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type); ++ } ++ break; ++ ++ case throw_range_check_failed_id: ++ { ++ StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); ++ } ++ break; ++ ++ case unwind_exception_id: ++ { ++ __ set_info("unwind_exception", dont_gc_arguments); ++ // note: no stubframe since we are about to leave the current ++ // activation and we are calling a leaf VM function only. ++ generate_unwind_exception(sasm); ++ } ++ break; ++ ++ case access_field_patching_id: ++ { ++ StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); ++ } ++ break; ++ ++ case load_klass_patching_id: ++ { ++ StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); ++ } ++ break; ++ ++ case load_mirror_patching_id: ++ { ++ StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); ++ } ++ break; ++ ++ case load_appendix_patching_id: ++ { ++ StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); ++ } ++ break; ++ ++ case handle_exception_nofpu_id: ++ case handle_exception_id: ++ { ++ StubFrame f(sasm, "handle_exception", dont_gc_arguments); ++ oop_maps = generate_handle_exception(id, sasm); ++ } ++ break; ++ ++ case handle_exception_from_callee_id: ++ { ++ StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments); ++ oop_maps = generate_handle_exception(id, sasm); ++ } ++ break; ++ ++ case throw_index_exception_id: ++ { ++ StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); ++ } ++ break; ++ ++ case throw_array_store_exception_id: ++ { ++ StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return); ++ // tos + 0: link ++ // + 1: return address ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); ++ } ++ break; ++ ++ case predicate_failed_trap_id: ++ { ++ StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return); ++ ++ OopMap* map = save_live_registers(sasm); ++ ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap)); ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm); ++ __ leave(); ++ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); ++ assert(deopt_blob != NULL, "deoptimization blob must have been created"); ++ ++ __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type); ++ } ++ break; ++ ++ case dtrace_object_alloc_id: ++ { ++ // A0: object ++ StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments); ++ save_live_registers(sasm); ++ ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), A0); ++ ++ restore_live_registers(sasm); ++ } ++ break; ++ ++ default: ++ { ++ StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return); ++ __ li(A0, (int)id); ++ __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), A0); ++ } ++ break; ++ } ++ } ++ return oop_maps; ++} ++ ++#undef __ ++ ++const char *Runtime1::pd_name_for_address(address entry) { ++ Unimplemented(); ++ return 0; ++} +diff --git a/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp +new file mode 100644 +index 0000000000..164016e123 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp +@@ -0,0 +1,71 @@ ++/* ++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the client compiler. ++// (see c1_globals.hpp) ++ ++#ifndef COMPILER2 ++define_pd_global(bool, BackgroundCompilation, true ); ++define_pd_global(bool, UseTLAB, true ); ++define_pd_global(bool, ResizeTLAB, true ); ++define_pd_global(bool, InlineIntrinsics, true ); ++define_pd_global(bool, PreferInterpreterNativeStubs, false); ++define_pd_global(bool, ProfileTraps, false); ++define_pd_global(bool, UseOnStackReplacement, true ); ++define_pd_global(bool, TieredCompilation, false); ++define_pd_global(intx, CompileThreshold, 1500 ); ++ ++define_pd_global(intx, OnStackReplacePercentage, 933 ); ++define_pd_global(intx, FreqInlineSize, 325 ); ++define_pd_global(intx, NewSizeThreadIncrease, 4*K ); ++define_pd_global(intx, InitialCodeCacheSize, 160*K); ++define_pd_global(intx, ReservedCodeCacheSize, 32*M ); ++define_pd_global(intx, NonProfiledCodeHeapSize, 13*M ); ++define_pd_global(intx, ProfiledCodeHeapSize, 14*M ); ++define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); ++define_pd_global(bool, ProfileInterpreter, false); ++define_pd_global(intx, CodeCacheExpansionSize, 32*K ); ++define_pd_global(uintx, CodeCacheMinBlockLength, 1); ++define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++define_pd_global(uintx, MetaspaceSize, 12*M ); ++define_pd_global(bool, NeverActAsServerClassMachine, true ); ++define_pd_global(uint64_t,MaxRAM, 1ULL*G); ++define_pd_global(bool, CICompileOSR, true ); ++#endif // !COMPILER2 ++define_pd_global(bool, UseTypeProfile, false); ++define_pd_global(bool, RoundFPResults, true ); ++ ++define_pd_global(bool, LIRFillDelaySlots, false); ++define_pd_global(bool, OptimizeSinglePrecision, true ); ++define_pd_global(bool, CSEArrayLength, false); ++define_pd_global(bool, TwoOperandLIRForm, false ); ++ ++#endif // CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp +new file mode 100644 +index 0000000000..27a4ec5229 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp +@@ -0,0 +1,94 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP ++#define CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the server compiler. ++// (see c2_globals.hpp). Alpha-sorted. ++define_pd_global(bool, BackgroundCompilation, true); ++define_pd_global(bool, UseTLAB, true); ++define_pd_global(bool, ResizeTLAB, true); ++define_pd_global(bool, CICompileOSR, true); ++define_pd_global(bool, InlineIntrinsics, true); ++define_pd_global(bool, PreferInterpreterNativeStubs, false); ++define_pd_global(bool, ProfileTraps, true); ++define_pd_global(bool, UseOnStackReplacement, true); ++#ifdef CC_INTERP ++define_pd_global(bool, ProfileInterpreter, false); ++#else ++define_pd_global(bool, ProfileInterpreter, true); ++#endif // CC_INTERP ++define_pd_global(bool, TieredCompilation, true); ++define_pd_global(intx, CompileThreshold, 10000); ++define_pd_global(intx, BackEdgeThreshold, 100000); ++ ++define_pd_global(intx, OnStackReplacePercentage, 140); ++define_pd_global(intx, ConditionalMoveLimit, 3); ++define_pd_global(intx, FLOATPRESSURE, 6); ++define_pd_global(intx, FreqInlineSize, 325); ++define_pd_global(intx, MinJumpTableSize, 10); ++define_pd_global(intx, INTPRESSURE, 13); ++define_pd_global(intx, InteriorEntryAlignment, 16); ++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); ++define_pd_global(intx, LoopUnrollLimit, 60); ++define_pd_global(intx, LoopPercentProfileLimit, 10); ++// InitialCodeCacheSize derived from specjbb2000 run. ++define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize ++define_pd_global(intx, CodeCacheExpansionSize, 64*K); ++ ++// Ergonomics related flags ++define_pd_global(uint64_t,MaxRAM, 128ULL*G); ++define_pd_global(intx, RegisterCostAreaRatio, 16000); ++ ++// Peephole and CISC spilling both break the graph, and so makes the ++// scheduler sick. ++define_pd_global(bool, OptoPeephole, false); ++define_pd_global(bool, UseCISCSpill, false); ++define_pd_global(bool, OptoScheduling, false); ++define_pd_global(bool, OptoBundling, false); ++define_pd_global(bool, OptoRegScheduling, false); ++define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); ++define_pd_global(bool, IdealizeClearArrayNode, true); ++ ++define_pd_global(intx, ReservedCodeCacheSize, 48*M); ++define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); ++define_pd_global(intx, ProfiledCodeHeapSize, 22*M); ++define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); ++define_pd_global(uintx, CodeCacheMinBlockLength, 4); ++define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++ ++define_pd_global(bool, TrapBasedRangeChecks, false); ++ ++// Heap related flags ++define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); ++ ++// Ergonomics related flags ++define_pd_global(bool, NeverActAsServerClassMachine, false); ++ ++#endif // CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp b/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp +new file mode 100644 +index 0000000000..ec78b942d4 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "opto/compile.hpp" ++#include "opto/node.hpp" ++ ++// processor dependent initialization for LoongArch ++ ++extern void reg_mask_init(); ++ ++void Compile::pd_compiler2_init() { ++ guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); ++ reg_mask_init(); ++} +diff --git a/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp b/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp +new file mode 100644 +index 0000000000..653d95806b +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP ++#define CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP ++ ++private: ++ void pd_initialize() {} ++ ++public: ++ void flush_bundle(bool start_new_bundle) {} ++ ++#endif // CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp b/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp +new file mode 100644 +index 0000000000..d063d5d93e +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp +@@ -0,0 +1,148 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/compiledIC.hpp" ++#include "code/icBuffer.hpp" ++#include "code/nmethod.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/safepoint.hpp" ++ ++// ---------------------------------------------------------------------------- ++ ++#define __ _masm. ++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { ++ precond(cbuf.stubs()->start() != badAddress); ++ precond(cbuf.stubs()->end() != badAddress); ++ ++ if (mark == NULL) { ++ mark = cbuf.insts_mark(); // get mark within main instrs section ++ } ++ ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a stub. ++ MacroAssembler _masm(&cbuf); ++ ++ address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size()); ++ if (base == NULL) return NULL; // CodeBuffer::expand failed ++ // static stub relocation stores the instruction address of the call ++ ++ __ relocate(static_stub_Relocation::spec(mark), 0); ++ ++ // Code stream for loading method may be changed. ++ __ ibar(0); ++ ++ // Rmethod contains methodOop, it should be relocated for GC ++ // static stub relocation also tags the methodOop in the code-stream. ++ __ mov_metadata(Rmethod, NULL); ++ // This is recognized as unresolved by relocs/nativeInst/ic code ++ ++ cbuf.set_insts_mark(); ++ __ patchable_jump(__ pc()); ++ // Update current stubs pointer and restore code_end. ++ __ end_a_stub(); ++ return base; ++} ++#undef __ ++ ++int CompiledStaticCall::to_interp_stub_size() { ++ return NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeGeneralJump::instruction_size; ++} ++ ++int CompiledStaticCall::to_trampoline_stub_size() { ++ return NativeInstruction::nop_instruction_size + NativeCallTrampolineStub::instruction_size; ++} ++ ++// Relocation entries for call stub, compiled java to interpreter. ++int CompiledStaticCall::reloc_to_interp_stub() { ++ return 16; ++} ++ ++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { ++ address stub = find_stub(false /* is_aot */); ++ guarantee(stub != NULL, "stub not found"); ++ ++ if (TraceICs) { ++ ResourceMark rm; ++ tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", ++ p2i(instruction_address()), ++ callee->name_and_sig_as_C_string()); ++ } ++ ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ ++ assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(), ++ "a) MT-unsafe modification of inline cache"); ++ assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry, ++ "b) MT-unsafe modification of inline cache"); ++ ++ // Update stub. ++ method_holder->set_data((intptr_t)callee()); ++ jump->set_jump_destination(entry); ++ ++ // Update jump to call. ++ set_destination_mt_safe(stub); ++} ++ ++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { ++ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); ++ // Reset stub. ++ address stub = static_stub->addr(); ++ assert(stub != NULL, "stub not found"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ method_holder->set_data(0); ++ jump->set_jump_destination(jump->instruction_address()); ++} ++ ++//----------------------------------------------------------------------------- ++// Non-product mode code ++#ifndef PRODUCT ++ ++void CompiledDirectStaticCall::verify() { ++ // Verify call. ++ _call->verify(); ++ if (os::is_MP()) { ++ _call->verify_alignment(); ++ } ++ ++ // Verify stub. ++ address stub = find_stub(false /* is_aot */); ++ assert(stub != NULL, "no stub found for static call"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ ++ ++ // Verify state. ++ assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); ++} ++ ++#endif // !PRODUCT +diff --git a/src/hotspot/cpu/loongarch/copy_loongarch.hpp b/src/hotspot/cpu/loongarch/copy_loongarch.hpp +new file mode 100644 +index 0000000000..54b847a736 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/copy_loongarch.hpp +@@ -0,0 +1,77 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_COPY_LOONGARCH_HPP ++#define CPU_LOONGARCH_COPY_LOONGARCH_HPP ++ ++// Inline functions for memory copy and fill. ++ ++// Contains inline asm implementations ++#include OS_CPU_HEADER_INLINE(copy) ++ ++// Template for atomic, element-wise copy. ++template ++static void copy_conjoint_atomic(const T* from, T* to, size_t count) { ++ if (from > to) { ++ while (count-- > 0) { ++ // Copy forwards ++ *to++ = *from++; ++ } ++ } else { ++ from += count - 1; ++ to += count - 1; ++ while (count-- > 0) { ++ // Copy backwards ++ *to-- = *from--; ++ } ++ } ++} ++ ++ ++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { ++ julong* to = (julong*) tohw; ++ julong v = ((julong) value << 32) | value; ++ while (count-- > 0) { ++ *to++ = v; ++ } ++} ++ ++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { ++ pd_fill_to_words(tohw, count, value); ++} ++ ++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { ++ (void)memset(to, value, count); ++} ++ ++static void pd_zero_to_words(HeapWord* tohw, size_t count) { ++ pd_fill_to_words(tohw, count, 0); ++} ++ ++static void pd_zero_to_bytes(void* to, size_t count) { ++ (void)memset(to, 0, count); ++} ++ ++#endif //CPU_LOONGARCH_COPY_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp b/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp +new file mode 100644 +index 0000000000..e4a92d1035 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp +@@ -0,0 +1,30 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "compiler/disassembler.hpp" ++#include "depChecker_loongarch.hpp" ++ ++// Nothing to do on LoongArch +diff --git a/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp b/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp +new file mode 100644 +index 0000000000..29c292a74a +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP ++#define CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP ++ ++// Nothing to do on LoongArch ++ ++#endif // CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp b/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp +new file mode 100644 +index 0000000000..04359bc172 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP ++ ++ static int pd_instruction_alignment() { ++ return sizeof(int); ++ } ++ ++ static const char* pd_cpu_opts() { ++ return "gpr-names=64"; ++ } ++ ++#endif // CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.cpp b/src/hotspot/cpu/loongarch/frame_loongarch.cpp +new file mode 100644 +index 0000000000..9b4f3b88d4 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/frame_loongarch.cpp +@@ -0,0 +1,690 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/markOop.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/monitorChunk.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#ifdef ASSERT ++void RegisterMap::check_location_valid() { ++} ++#endif ++ ++ ++// Profiling/safepoint support ++// for Profiling - acting on another frame. walks sender frames ++// if valid. ++// frame profile_find_Java_sender_frame(JavaThread *thread); ++ ++bool frame::safe_for_sender(JavaThread *thread) { ++ address sp = (address)_sp; ++ address fp = (address)_fp; ++ address unextended_sp = (address)_unextended_sp; ++ ++ // consider stack guards when trying to determine "safe" stack pointers ++ static size_t stack_guard_size = os::uses_stack_guard_pages() ? ++ JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size() : 0; ++ size_t usable_stack_size = thread->stack_size() - stack_guard_size; ++ ++ // sp must be within the usable part of the stack (not in guards) ++ bool sp_safe = (sp < thread->stack_base()) && ++ (sp >= thread->stack_base() - usable_stack_size); ++ ++ ++ if (!sp_safe) { ++ return false; ++ } ++ ++ // unextended sp must be within the stack and above or equal sp ++ bool unextended_sp_safe = (unextended_sp < thread->stack_base()) && ++ (unextended_sp >= sp); ++ ++ if (!unextended_sp_safe) { ++ return false; ++ } ++ ++ // an fp must be within the stack and above (but not equal) sp ++ // second evaluation on fp+ is added to handle situation where fp is -1 ++ bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (java_frame_return_addr_offset * sizeof(void*))) < thread->stack_base()))); ++ ++ // We know sp/unextended_sp are safe only fp is questionable here ++ ++ // If the current frame is known to the code cache then we can attempt to ++ // construct the sender and do some validation of it. This goes a long way ++ // toward eliminating issues when we get in frame construction code ++ ++ if (_cb != NULL ) { ++ ++ // First check if frame is complete and tester is reliable ++ // Unfortunately we can only check frame complete for runtime stubs and nmethod ++ // other generic buffer blobs are more problematic so we just assume they are ++ // ok. adapter blobs never have a frame complete and are never ok. ++ ++ if (!_cb->is_frame_complete_at(_pc)) { ++ if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { ++ return false; ++ } ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!_cb->code_contains(_pc)) { ++ return false; ++ } ++ ++ // Entry frame checks ++ if (is_entry_frame()) { ++ // an entry frame must have a valid fp. ++ return fp_safe && is_entry_frame_valid(thread); ++ } ++ ++ intptr_t* sender_sp = NULL; ++ intptr_t* sender_unextended_sp = NULL; ++ address sender_pc = NULL; ++ intptr_t* saved_fp = NULL; ++ ++ if (is_interpreted_frame()) { ++ // fp must be safe ++ if (!fp_safe) { ++ return false; ++ } ++ ++ sender_pc = (address) this->fp()[java_frame_return_addr_offset]; ++ // for interpreted frames, the value below is the sender "raw" sp, ++ // which can be different from the sender unextended sp (the sp seen ++ // by the sender) because of current frame local variables ++ sender_sp = (intptr_t*) addr_at(java_frame_sender_sp_offset); ++ sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; ++ saved_fp = (intptr_t*) this->fp()[java_frame_link_offset]; ++ ++ } else { ++ // must be some sort of compiled/runtime frame ++ // fp does not have to be safe (although it could be check for c1?) ++ ++ // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc ++ if (_cb->frame_size() <= 0) { ++ return false; ++ } ++ ++ sender_sp = _unextended_sp + _cb->frame_size(); ++ // Is sender_sp safe? ++ if ((address)sender_sp >= thread->stack_base()) { ++ return false; ++ } ++ sender_unextended_sp = sender_sp; ++ // On LA the return_address is always the word on the stack ++ sender_pc = (address) *(sender_sp-1); ++ // Note: frame::java_frame_sender_sp_offset is only valid for compiled frame ++ saved_fp = (intptr_t*) *(sender_sp - frame::java_frame_sender_sp_offset); ++ } ++ ++ ++ // If the potential sender is the interpreter then we can do some more checking ++ if (Interpreter::contains(sender_pc)) { ++ ++ // FP is always saved in a recognizable place in any code we generate. However ++ // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP ++ // is really a frame pointer. ++ ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { ++ return false; ++ } ++ ++ // construct the potential sender ++ ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ return sender.is_interpreted_frame_valid(thread); ++ ++ } ++ ++ // We must always be able to find a recognizable pc ++ CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); ++ if (sender_pc == NULL || sender_blob == NULL) { ++ return false; ++ } ++ ++ // Could be a zombie method ++ if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { ++ return false; ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!sender_blob->code_contains(sender_pc)) { ++ return false; ++ } ++ ++ // We should never be able to see an adapter if the current frame is something from code cache ++ if (sender_blob->is_adapter_blob()) { ++ return false; ++ } ++ ++ // Could be the call_stub ++ if (StubRoutines::returns_to_call_stub(sender_pc)) { ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { ++ return false; ++ } ++ ++ // construct the potential sender ++ ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ // Validate the JavaCallWrapper an entry frame must have ++ address jcw = (address)sender.entry_frame_call_wrapper(); ++ ++ bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp()); ++ ++ return jcw_safe; ++ } ++ ++ CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); ++ if (nm != NULL) { ++ if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || ++ nm->method()->is_method_handle_intrinsic()) { ++ return false; ++ } ++ } ++ ++ // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size ++ // because the return address counts against the callee's frame. ++ ++ if (sender_blob->frame_size() <= 0) { ++ assert(!sender_blob->is_compiled(), "should count return address at least"); ++ return false; ++ } ++ ++ // We should never be able to see anything here except an nmethod. If something in the ++ // code cache (current frame) is called by an entity within the code cache that entity ++ // should not be anything but the call stub (already covered), the interpreter (already covered) ++ // or an nmethod. ++ ++ if (!sender_blob->is_compiled()) { ++ return false; ++ } ++ ++ // Could put some more validation for the potential non-interpreted sender ++ // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... ++ ++ // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb ++ ++ // We've validated the potential sender that would be created ++ return true; ++ } ++ ++ // Must be native-compiled frame. Since sender will try and use fp to find ++ // linkages it must be safe ++ ++ if (!fp_safe) { ++ return false; ++ } ++ ++ // Will the pc we fetch be non-zero (which we'll find at the oldest frame) ++ ++ if ( (address) this->fp()[java_frame_return_addr_offset] == NULL) return false; ++ ++ ++ // could try and do some more potential verification of native frame if we could think of some... ++ ++ return true; ++ ++} ++ ++void frame::patch_pc(Thread* thread, address pc) { ++ address* pc_addr = &(((address*) sp())[-1]); ++ if (TracePcPatching) { ++ tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", ++ p2i(pc_addr), p2i(*pc_addr), p2i(pc)); ++ } ++ // Either the return address is the original one or we are going to ++ // patch in the same address that's already there. ++ assert(_pc == *pc_addr || pc == *pc_addr, "must be"); ++ *pc_addr = pc; ++ _cb = CodeCache::find_blob(pc); ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ assert(original_pc == _pc, "expected original PC to be stored before patching"); ++ _deopt_state = is_deoptimized; ++ // leave _pc as is ++ } else { ++ _deopt_state = not_deoptimized; ++ _pc = pc; ++ } ++} ++ ++bool frame::is_interpreted_frame() const { ++ return Interpreter::contains(pc()); ++} ++ ++int frame::frame_size(RegisterMap* map) const { ++ frame sender = this->sender(map); ++ return sender.sp() - sp(); ++} ++ ++intptr_t* frame::entry_frame_argument_at(int offset) const { ++ // convert offset to index to deal with tsi ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ // Entry frame's arguments are always in relation to unextended_sp() ++ return &unextended_sp()[index]; ++} ++ ++// sender_sp ++#ifdef CC_INTERP ++intptr_t* frame::interpreter_frame_sender_sp() const { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ // QQQ why does this specialize method exist if frame::sender_sp() does same thing? ++ // seems odd and if we always know interpreted vs. non then sender_sp() is really ++ // doing too much work. ++ return get_interpreterState()->sender_sp(); ++} ++ ++// monitor elements ++ ++BasicObjectLock* frame::interpreter_frame_monitor_begin() const { ++ return get_interpreterState()->monitor_base(); ++} ++ ++BasicObjectLock* frame::interpreter_frame_monitor_end() const { ++ return (BasicObjectLock*) get_interpreterState()->stack_base(); ++} ++ ++#else // CC_INTERP ++ ++intptr_t* frame::interpreter_frame_sender_sp() const { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ return (intptr_t*) at(interpreter_frame_sender_sp_offset); ++} ++ ++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); ++} ++ ++ ++// monitor elements ++ ++BasicObjectLock* frame::interpreter_frame_monitor_begin() const { ++ return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); ++} ++ ++BasicObjectLock* frame::interpreter_frame_monitor_end() const { ++ BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); ++ // make sure the pointer points inside the frame ++ assert((intptr_t) fp() > (intptr_t) result, "result must < than frame pointer"); ++ assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer"); ++ return result; ++} ++ ++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { ++ *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; ++} ++ ++// Used by template based interpreter deoptimization ++void frame::interpreter_frame_set_last_sp(intptr_t* sp) { ++ *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp; ++} ++#endif // CC_INTERP ++ ++frame frame::sender_for_entry_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); ++ assert(!entry_frame_is_first(), "next Java fp must be non zero"); ++ assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); ++ map->clear(); ++ assert(map->include_argument_oops(), "should be set by clear"); ++ if (jfa->last_Java_pc() != NULL ) { ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); ++ return fr; ++ } ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp()); ++ return fr; ++} ++ ++frame frame::sender_for_interpreter_frame(RegisterMap* map) const { ++ // sp is the raw sp from the sender after adapter or interpreter extension ++ intptr_t* sender_sp = this->sender_sp(); ++ ++ // This is the sp before any possible extension (adapter/locals). ++ intptr_t* unextended_sp = interpreter_frame_sender_sp(); ++ ++ // The interpreter and compiler(s) always save FP in a known ++ // location on entry. We must record where that location is ++ // so this if FP was live on callout from c2 we can find ++ // the saved copy no matter what it called. ++ ++ // Since the interpreter always saves FP if we record where it is then ++ // we don't have to always save FP on entry and exit to c2 compiled ++ // code, on entry will be enough. ++#ifdef COMPILER2_OR_JVMCI ++ if (map->update_map()) { ++ update_map_with_saved_link(map, (intptr_t**) addr_at(java_frame_link_offset)); ++ } ++#endif // COMPILER2_OR_JVMCI ++ return frame(sender_sp, unextended_sp, link(), sender_pc()); ++} ++ ++ ++//------------------------------------------------------------------------------ ++// frame::verify_deopt_original_pc ++// ++// Verifies the calculated original PC of a deoptimization PC for the ++// given unextended SP. The unextended SP might also be the saved SP ++// for MethodHandle call sites. ++#ifdef ASSERT ++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) { ++ frame fr; ++ ++ // This is ugly but it's better than to change {get,set}_original_pc ++ // to take an SP value as argument. And it's only a debugging ++ // method anyway. ++ fr._unextended_sp = unextended_sp; ++ ++ address original_pc = nm->get_original_pc(&fr); ++ assert(nm->insts_contains(original_pc), ++ "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); ++} ++#endif ++ ++ ++//------------------------------------------------------------------------------ ++// frame::adjust_unextended_sp ++void frame::adjust_unextended_sp() { ++ // On LoongArch, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. ++ ++ if (_cb != NULL) { ++ CompiledMethod* sender_cm = _cb->as_compiled_method_or_null(); ++ if (sender_cm != NULL) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (sender_cm->is_deopt_entry(_pc) || ++ sender_cm->is_deopt_mh_entry(_pc)) { ++ DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp)); ++ } ++ } ++ } ++} ++ ++//------------------------------------------------------------------------------ ++// frame::update_map_with_saved_link ++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { ++ // The interpreter and compiler(s) always save fp in a known ++ // location on entry. We must record where that location is ++ // so that if fp was live on callout from c2 we can find ++ // the saved copy no matter what it called. ++ ++ // Since the interpreter always saves fp if we record where it is then ++ // we don't have to always save fp on entry and exit to c2 compiled ++ // code, on entry will be enough. ++ map->set_location(FP->as_VMReg(), (address) link_addr); ++ // this is weird "H" ought to be at a higher address however the ++ // oopMaps seems to have the "H" regs at the same address and the ++ // vanilla register. ++ // XXXX make this go away ++ if (true) { ++ map->set_location(FP->as_VMReg()->next(), (address) link_addr); ++ } ++} ++ ++//------------------------------sender_for_compiled_frame----------------------- ++frame frame::sender_for_compiled_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ ++ // frame owned by optimizing compiler ++ assert(_cb->frame_size() >= 0, "must have non-zero frame size"); ++ ++ intptr_t* sender_sp = unextended_sp() + _cb->frame_size(); ++ intptr_t* unextended_sp = sender_sp; ++ ++ // On Loongson the return_address is always the word on the stack ++ // the fp in compiler points to sender fp, but in interpreter, fp points to return address, ++ // so getting sender for compiled frame is not same as interpreter frame. ++ // we hard code here temporarily ++ // spark ++ address sender_pc = (address) *(sender_sp-1); ++ ++ intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::java_frame_sender_sp_offset); ++ ++ if (map->update_map()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); ++ if (_cb->oop_maps() != NULL) { ++ OopMapSet::update_register_map(this, map); ++ } ++ ++ // Since the prolog does the save and restore of epb there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ update_map_with_saved_link(map, saved_fp_addr); ++ } ++ assert(sender_sp != sp(), "must have changed"); ++ return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc); ++} ++ ++frame frame::sender(RegisterMap* map) const { ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ map->set_include_argument_oops(false); ++ ++ if (is_entry_frame()) return sender_for_entry_frame(map); ++ if (is_interpreted_frame()) return sender_for_interpreter_frame(map); ++ assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); ++ ++ if (_cb != NULL) { ++ return sender_for_compiled_frame(map); ++ } ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return frame(sender_sp(), link(), sender_pc()); ++} ++ ++bool frame::is_interpreted_frame_valid(JavaThread* thread) const { ++// QQQ ++#ifdef CC_INTERP ++#else ++ assert(is_interpreted_frame(), "Not an interpreted frame"); ++ // These are reasonable sanity checks ++ if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (fp() + interpreter_frame_initial_sp_offset < sp()) { ++ return false; ++ } ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (fp() <= sp()) { // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ // do some validation of frame elements ++ ++ // first the method ++ ++ Method* m = *interpreter_frame_method_addr(); ++ ++ // validate the method we'd find in this potential sender ++ if (!Method::is_valid_method(m)) return false; ++ ++ // stack frames shouldn't be much larger than max_stack elements ++ ++ //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) { ++ if (fp() - sp() > 4096) { // stack frames shouldn't be large. ++ return false; ++ } ++ ++ // validate bci/bcp ++ ++ address bcp = interpreter_frame_bcp(); ++ if (m->validate_bci_from_bcp(bcp) < 0) { ++ return false; ++ } ++ ++ // validate ConstantPoolCache* ++ ++ ConstantPoolCache* cp = *interpreter_frame_cache_addr(); ++ ++ if (MetaspaceObj::is_valid(cp) == false) return false; ++ ++ // validate locals ++ ++ address locals = (address) *interpreter_frame_locals_addr(); ++ ++ if (locals > thread->stack_base() || locals < (address) fp()) return false; ++ ++ // We'd have to be pretty unlucky to be mislead at this point ++ ++#endif // CC_INTERP ++ return true; ++} ++ ++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { ++#ifdef CC_INTERP ++ // Needed for JVMTI. The result should always be in the interpreterState object ++ assert(false, "NYI"); ++ interpreterState istate = get_interpreterState(); ++#endif // CC_INTERP ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ Method* method = interpreter_frame_method(); ++ BasicType type = method->result_type(); ++ ++ intptr_t* tos_addr; ++ if (method->is_native()) { ++ // Prior to calling into the runtime to report the method_exit the possible ++ // return value is pushed to the native stack. If the result is a jfloat/jdouble ++ // then ST0 is saved. See the note in generate_native_result ++ tos_addr = (intptr_t*)sp(); ++ if (type == T_FLOAT || type == T_DOUBLE) { ++ tos_addr += 2; ++ } ++ } else { ++ tos_addr = (intptr_t*)interpreter_frame_tos_address(); ++ } ++ ++ switch (type) { ++ case T_OBJECT : ++ case T_ARRAY : { ++ oop obj; ++ if (method->is_native()) { ++#ifdef CC_INTERP ++ obj = istate->_oop_temp; ++#else ++ obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); ++#endif // CC_INTERP ++ } else { ++ oop* obj_p = (oop*)tos_addr; ++ obj = (obj_p == NULL) ? (oop)NULL : *obj_p; ++ } ++ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); ++ *oop_result = obj; ++ break; ++ } ++ case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; ++ case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; ++ case T_CHAR : value_result->c = *(jchar*)tos_addr; break; ++ case T_SHORT : value_result->s = *(jshort*)tos_addr; break; ++ case T_INT : value_result->i = *(jint*)tos_addr; break; ++ case T_LONG : value_result->j = *(jlong*)tos_addr; break; ++ case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break; ++ case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; ++ case T_VOID : /* Nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ return type; ++} ++ ++ ++intptr_t* frame::interpreter_frame_tos_at(jint offset) const { ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ return &interpreter_frame_tos_address()[index]; ++} ++ ++#ifndef PRODUCT ++ ++#define DESCRIBE_FP_OFFSET(name) \ ++ values.describe(frame_no, fp() + frame::name##_offset, #name) ++ ++void frame::describe_pd(FrameValues& values, int frame_no) { ++ if (is_interpreted_frame()) { ++ DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_method); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mirror); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mdp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_cache); ++ DESCRIBE_FP_OFFSET(interpreter_frame_locals); ++ DESCRIBE_FP_OFFSET(interpreter_frame_bcp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); ++ } ++} ++#endif ++ ++intptr_t *frame::initial_deoptimization_info() { ++ // used to reset the saved FP ++ return fp(); ++} ++ ++intptr_t* frame::real_fp() const { ++ if (_cb != NULL) { ++ // use the frame size if valid ++ int size = _cb->frame_size(); ++ if (size > 0) { ++ return unextended_sp() + size; ++ } ++ } ++ // else rely on fp() ++ assert(! is_compiled_frame(), "unknown compiled frame size"); ++ return fp(); ++} ++ ++#ifndef PRODUCT ++// This is a generic constructor which is only used by pns() in debug.cpp. ++frame::frame(void* sp, void* fp, void* pc) { ++ init((intptr_t*)sp, (intptr_t*)fp, (address)pc); ++} ++ ++void frame::pd_ps() {} ++#endif +diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.hpp b/src/hotspot/cpu/loongarch/frame_loongarch.hpp +new file mode 100644 +index 0000000000..b16389b3a3 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/frame_loongarch.hpp +@@ -0,0 +1,171 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_FRAME_LOONGARCH_HPP ++#define CPU_LOONGARCH_FRAME_LOONGARCH_HPP ++ ++#include "runtime/synchronizer.hpp" ++ ++// A frame represents a physical stack frame (an activation). Frames can be ++// C or Java frames, and the Java frames can be interpreted or compiled. ++// In contrast, vframes represent source-level activations, so that one physical frame ++// can correspond to multiple source level frames because of inlining. ++// A frame is comprised of {pc, fp, sp} ++// ------------------------------ Asm interpreter ---------------------------------------- ++// Layout of asm interpreter frame: ++// Low ++// [expression stack ] * <- sp ++// [monitors ] \ ++// ... | monitor block size ++// [monitors ] / ++// [monitor block size ] ++// [byte code index/pointr] = bcx() bcx_offset ++// [pointer to locals ] = locals() locals_offset ++// [constant pool cache ] = cache() cache_offset ++// [methodData ] = mdp() mdx_offset ++// [methodOop ] = method() method_offset ++// [last sp ] = last_sp() last_sp_offset ++// [old stack pointer ] (sender_sp) sender_sp_offset ++// [old frame pointer ] <- fp = link() ++// [return pc ] ++// [oop temp ] (only for native calls) ++// [locals and parameters ] ++// High <- sender sp ++// ------------------------------ Asm interpreter ---------------------------------------- ++// ++// ------------------------------ Native (C frame) --------------------------------------- ++// Layout of C frame: ++// High ++// | ++// - <----- fp <- sender sp ++// fp -8 | [ra] = sender_pc() ++// fp-16 | [fp (sender)] = link() ++// | [...] ++// | ++// - <----- sp ++// | ++// v ++// Low ++// ------------------------------ Native (C frame) --------------------------------------- ++ ++ public: ++ enum { ++ pc_return_offset = 0, ++ ++ // Java frames ++ java_frame_link_offset = 0, ++ java_frame_return_addr_offset = 1, ++ java_frame_sender_sp_offset = 2, ++ ++ // Native frames ++ native_frame_link_offset = -2, ++ native_frame_return_addr_offset = -1, ++ native_frame_sender_sp_offset = 0, ++ ++ // Interpreter frames ++ interpreter_frame_result_handler_offset = 3, // for native calls only ++ interpreter_frame_oop_temp_offset = 2, // for native calls only ++ ++ interpreter_frame_sender_fp_offset = 0, ++ interpreter_frame_sender_sp_offset = -1, ++ // outgoing sp before a call to an invoked method ++ interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, ++ interpreter_frame_locals_offset = interpreter_frame_last_sp_offset - 1, ++ interpreter_frame_method_offset = interpreter_frame_locals_offset - 1, ++ interpreter_frame_mirror_offset = interpreter_frame_method_offset - 1, ++ interpreter_frame_mdp_offset = interpreter_frame_mirror_offset - 1, ++ interpreter_frame_cache_offset = interpreter_frame_mdp_offset - 1, ++ interpreter_frame_bcp_offset = interpreter_frame_cache_offset - 1, ++ interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1, ++ ++ interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, ++ interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, ++ ++ // Entry frames ++ entry_frame_call_wrapper_offset = -9, ++ ++ // Native frames ++ ++ native_frame_initial_param_offset = 2 ++ ++ }; ++ ++ intptr_t ptr_at(int offset) const { ++ return *ptr_at_addr(offset); ++ } ++ ++ void ptr_at_put(int offset, intptr_t value) { ++ *ptr_at_addr(offset) = value; ++ } ++ ++ private: ++ // an additional field beyond _sp and _pc: ++ intptr_t* _fp; // frame pointer ++ // The interpreter and adapters will extend the frame of the caller. ++ // Since oopMaps are based on the sp of the caller before extension ++ // we need to know that value. However in order to compute the address ++ // of the return address we need the real "raw" sp. Since sparc already ++ // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's ++ // original sp we use that convention. ++ ++ intptr_t* _unextended_sp; ++ void adjust_unextended_sp(); ++ ++ intptr_t* ptr_at_addr(int offset) const { ++ return (intptr_t*) addr_at(offset); ++ } ++#ifdef ASSERT ++ // Used in frame::sender_for_{interpreter,compiled}_frame ++ static void verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp); ++#endif ++ ++ public: ++ // Constructors ++ ++ frame(intptr_t* sp, intptr_t* fp, address pc); ++ ++ frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc); ++ ++ frame(intptr_t* sp, intptr_t* fp); ++ ++ void init(intptr_t* sp, intptr_t* fp, address pc); ++ ++ // accessors for the instance variables ++ intptr_t* fp() const { return _fp; } ++ ++ inline address* sender_pc_addr() const; ++ ++ // expression stack tos if we are nested in a java call ++ intptr_t* interpreter_frame_last_sp() const; ++ ++ // helper to update a map with callee-saved FP ++ static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); ++ ++ // deoptimization support ++ void interpreter_frame_set_last_sp(intptr_t* sp); ++ ++ static jint interpreter_frame_expression_stack_direction() { return -1; } ++ ++#endif // CPU_LOONGARCH_FRAME_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp +new file mode 100644 +index 0000000000..1ddc038eea +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp +@@ -0,0 +1,252 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP ++ ++#include "code/codeCache.hpp" ++#include "code/vmreg.inline.hpp" ++ ++// Inline functions for Loongson frames: ++ ++// Constructors: ++ ++inline frame::frame() { ++ _pc = NULL; ++ _sp = NULL; ++ _unextended_sp = NULL; ++ _fp = NULL; ++ _cb = NULL; ++ _deopt_state = unknown; ++} ++ ++inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) { ++ _sp = sp; ++ _unextended_sp = sp; ++ _fp = fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { ++ init(sp, fp, pc); ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) { ++ _sp = sp; ++ _unextended_sp = unextended_sp; ++ _fp = fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* fp) { ++ _sp = sp; ++ _unextended_sp = sp; ++ _fp = fp; ++ _pc = (address)(sp[-1]); ++ ++ // Here's a sticky one. This constructor can be called via AsyncGetCallTrace ++ // when last_Java_sp is non-null but the pc fetched is junk. If we are truly ++ // unlucky the junk value could be to a zombied method and we'll die on the ++ // find_blob call. This is also why we can have no asserts on the validity ++ // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler ++ // -> pd_last_frame should use a specialized version of pd_last_frame which could ++ // call a specilaized frame constructor instead of this one. ++ // Then we could use the assert below. However this assert is of somewhat dubious ++ // value. ++ // assert(_pc != NULL, "no pc?"); ++ ++ _cb = CodeCache::find_blob(_pc); ++ adjust_unextended_sp(); ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++// Accessors ++ ++inline bool frame::equal(frame other) const { ++ bool ret = sp() == other.sp() ++ && unextended_sp() == other.unextended_sp() ++ && fp() == other.fp() ++ && pc() == other.pc(); ++ assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); ++ return ret; ++} ++ ++// Return unique id for this frame. The id must have a value where we can distinguish ++// identity and younger/older relationship. NULL represents an invalid (incomparable) ++// frame. ++inline intptr_t* frame::id(void) const { return unextended_sp(); } ++ ++// Relationals on frames based ++// Return true if the frame is younger (more recent activation) than the frame represented by id ++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() < id ; } ++ ++// Return true if the frame is older (less recent activation) than the frame represented by id ++inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() > id ; } ++ ++ ++ ++inline intptr_t* frame::link() const { ++ if (is_java_frame()) ++ return (intptr_t*) *(intptr_t **)addr_at(java_frame_link_offset); ++ return (intptr_t*) *(intptr_t **)addr_at(native_frame_link_offset); ++} ++ ++inline intptr_t* frame::link_or_null() const { ++ intptr_t** ptr = is_java_frame() ? (intptr_t **)addr_at(java_frame_link_offset) ++ : (intptr_t **)addr_at(native_frame_link_offset); ++ return os::is_readable_pointer(ptr) ? *ptr : NULL; ++} ++ ++inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } ++ ++// Return address: ++ ++inline address* frame::sender_pc_addr() const { ++ if (is_java_frame()) ++ return (address*) addr_at(java_frame_return_addr_offset); ++ return (address*) addr_at(native_frame_return_addr_offset); ++} ++ ++inline address frame::sender_pc() const { return *sender_pc_addr(); } ++ ++inline intptr_t* frame::sender_sp() const { ++ if (is_java_frame()) ++ return addr_at(java_frame_sender_sp_offset); ++ return addr_at(native_frame_sender_sp_offset); ++} ++ ++inline intptr_t** frame::interpreter_frame_locals_addr() const { ++ return (intptr_t**)addr_at(interpreter_frame_locals_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_last_sp() const { ++ return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_bcp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_bcp_offset); ++} ++ ++ ++inline intptr_t* frame::interpreter_frame_mdp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_mdp_offset); ++} ++ ++ ++ ++// Constant pool cache ++ ++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { ++ return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); ++} ++ ++// Method ++ ++inline Method** frame::interpreter_frame_method_addr() const { ++ return (Method**)addr_at(interpreter_frame_method_offset); ++} ++ ++// Mirror ++ ++inline oop* frame::interpreter_frame_mirror_addr() const { ++ return (oop*)addr_at(interpreter_frame_mirror_offset); ++} ++ ++// top of expression stack ++inline intptr_t* frame::interpreter_frame_tos_address() const { ++ intptr_t* last_sp = interpreter_frame_last_sp(); ++ if (last_sp == NULL ) { ++ return sp(); ++ } else { ++ // sp() may have been extended by an adapter ++ assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos"); ++ return last_sp; ++ } ++} ++ ++inline oop* frame::interpreter_frame_temp_oop_addr() const { ++ return (oop *)(fp() + interpreter_frame_oop_temp_offset); ++} ++ ++inline int frame::interpreter_frame_monitor_size() { ++ return BasicObjectLock::size(); ++} ++ ++ ++// expression stack ++// (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++inline intptr_t* frame::interpreter_frame_expression_stack() const { ++ intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); ++ return monitor_end-1; ++} ++ ++// Entry frames ++ ++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { ++ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); ++} ++ ++// Compiled frames ++ ++inline oop frame::saved_oop_result(RegisterMap* map) const { ++ return *((oop*) map->location(V0->as_VMReg())); ++} ++ ++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { ++ *((oop*) map->location(V0->as_VMReg())) = obj; ++} ++ ++#endif // CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP +diff --git a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp +new file mode 100644 +index 0000000000..e1e4748c49 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp +@@ -0,0 +1,523 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/g1/g1BarrierSet.hpp" ++#include "gc/g1/g1BarrierSetAssembler.hpp" ++#include "gc/g1/g1BarrierSetRuntime.hpp" ++#include "gc/g1/g1CardTable.hpp" ++#include "gc/g1/g1ThreadLocalData.hpp" ++#include "gc/g1/heapRegion.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "utilities/macros.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "gc/g1/c1/g1BarrierSetC1.hpp" ++#endif ++ ++#define __ masm-> ++ ++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, RegSet saved_regs) { ++ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; ++ ++ if (!dest_uninitialized) { ++#ifndef OPT_THREAD ++ Register thread = T9; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ++ Label filtered; ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ ld_w(AT, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ ld_b(AT, in_progress); ++ } ++ ++ __ beqz(AT, filtered); ++ ++ __ push(saved_regs); ++ if (count == A0) { ++ if (addr == A1) { ++ __ move(AT, A0); ++ __ move(A0, A1); ++ __ move(A1, AT); ++ } else { ++ __ move(A1, count); ++ __ move(A0, addr); ++ } ++ } else { ++ __ move(A0, addr); ++ __ move(A1, count); ++ } ++ if (UseCompressedOops) { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); ++ } ++ __ pop(saved_regs); ++ ++ __ bind(filtered); ++ } ++} ++ ++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp, RegSet saved_regs) { ++ __ push(saved_regs); ++ if (count == A0) { ++ assert_different_registers(A1, addr); ++ __ move(A1, count); ++ __ move(A0, addr); ++ } else { ++ assert_different_registers(A0, count); ++ __ move(A0, addr); ++ __ move(A1, count); ++ } ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); ++ __ pop(saved_regs); ++} ++ ++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ bool on_oop = type == T_OBJECT || type == T_ARRAY; ++ bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; ++ bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; ++ bool on_reference = on_weak || on_phantom; ++ ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ if (on_oop && on_reference) { ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // RA is live. It must be saved around calls. ++ __ enter(); // barrier may call runtime ++ // Generate the G1 pre-barrier code to log the value of ++ // the referent field in an SATB buffer. ++ g1_write_barrier_pre(masm /* masm */, ++ noreg /* obj */, ++ dst /* pre_val */, ++ thread /* thread */, ++ tmp1 /* tmp */, ++ true /* tosca_live */, ++ true /* expand_call */); ++ __ leave(); ++ } ++} ++ ++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ // If expand_call is true then we expand the call_VM_leaf macro ++ // directly to skip generating the check by ++ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. ++ ++ assert(thread == TREG, "must be"); ++ ++ Label done; ++ Label runtime; ++ ++ assert(pre_val != noreg, "check this code"); ++ ++ if (obj != noreg) { ++ assert_different_registers(obj, pre_val, tmp); ++ assert(pre_val != V0, "check this code"); ++ } ++ ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ ld_w(AT, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ ld_b(AT, in_progress); ++ } ++ __ beqz(AT, done); ++ ++ // Do we need to load the previous value? ++ if (obj != noreg) { ++ __ load_heap_oop(pre_val, Address(obj, 0)); ++ } ++ ++ // Is the previous value null? ++ __ beqz(pre_val, done); ++ ++ // Can we store original value in the thread's buffer? ++ // Is index == 0? ++ // (The index field is typed as size_t.) ++ ++ __ ld_d(tmp, index); ++ __ beqz(tmp, runtime); ++ ++ __ addi_d(tmp, tmp, -1 * wordSize); ++ __ st_d(tmp, index); ++ __ ld_d(AT, buffer); ++ ++ // Record the previous value ++ __ stx_d(pre_val, tmp, AT); ++ __ b(done); ++ ++ __ bind(runtime); ++ // save the live input values ++ if (tosca_live) __ push(V0); ++ ++ if (obj != noreg && obj != V0) __ push(obj); ++ ++ if (pre_val != V0) __ push(pre_val); ++ ++ // Calling the runtime using the regular call_VM_leaf mechanism generates ++ // code (generated by InterpreterMacroAssember::call_VM_leaf_base) ++ // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. ++ // ++ // If we care generating the pre-barrier without a frame (e.g. in the ++ // intrinsified Reference.get() routine) then ebp might be pointing to ++ // the caller frame and so this check will most likely fail at runtime. ++ // ++ // Expanding the call directly bypasses the generation of the check. ++ // So when we do not have have a full interpreter frame on the stack ++ // expand_call should be passed true. ++ ++ if (expand_call) { ++ assert(pre_val != A1, "smashed arg"); ++ if (thread != A1) __ move(A1, thread); ++ if (pre_val != A0) __ move(A0, pre_val); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } ++ ++ // save the live input values ++ if (pre_val != V0) ++ __ pop(pre_val); ++ ++ if (obj != noreg && obj != V0) ++ __ pop(obj); ++ ++ if (tosca_live) __ pop(V0); ++ ++ __ bind(done); ++} ++ ++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2) { ++ assert_different_registers(tmp, tmp2, AT); ++ assert(thread == TREG, "must be"); ++ ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); ++ ++ CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set()); ++ assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ ++ Label done; ++ Label runtime; ++ ++ // Does store cross heap regions? ++ __ xorr(AT, store_addr, new_val); ++ __ srli_d(AT, AT, HeapRegion::LogOfHRGrainBytes); ++ __ beqz(AT, done); ++ ++ // crosses regions, storing NULL? ++ __ beqz(new_val, done); ++ ++ // storing region crossing non-NULL, is card already dirty? ++ const Register card_addr = tmp; ++ const Register cardtable = tmp2; ++ ++ __ move(card_addr, store_addr); ++ __ srli_d(card_addr, card_addr, CardTable::card_shift); ++ // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT ++ // a valid address and therefore is not properly handled by the relocation code. ++ __ li(cardtable, (intptr_t)ct->card_table()->byte_map_base()); ++ __ add_d(card_addr, card_addr, cardtable); ++ ++ __ ld_bu(AT, card_addr, 0); ++ __ addi_d(AT, AT, -1 * (int)G1CardTable::g1_young_card_val()); ++ __ beqz(AT, done); ++ ++ assert((int)CardTable::dirty_card_val() == 0, "must be 0"); ++ ++ __ membar(__ StoreLoad); ++ __ ld_bu(AT, card_addr, 0); ++ __ beqz(AT, done); ++ ++ // storing a region crossing, non-NULL oop, card is clean. ++ // dirty card and log. ++ __ st_b(R0, card_addr, 0); ++ ++ __ ld_d(AT, queue_index); ++ __ beqz(AT, runtime); ++ __ addi_d(AT, AT, -1 * wordSize); ++ __ st_d(AT, queue_index); ++ __ ld_d(tmp2, buffer); ++ __ ld_d(AT, queue_index); ++ __ stx_d(card_addr, tmp2, AT); ++ __ b(done); ++ ++ __ bind(runtime); ++ // save the live input values ++ __ push(store_addr); ++ __ push(new_val); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, TREG); ++ __ pop(new_val); ++ __ pop(store_addr); ++ ++ __ bind(done); ++} ++ ++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool as_normal = (decorators & AS_NORMAL) != 0; ++ assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported"); ++ ++ bool needs_pre_barrier = as_normal; ++ bool needs_post_barrier = val != noreg && in_heap; ++ ++ Register tmp3 = RT3; ++ Register rthread = TREG; ++ // flatten object address if needed ++ // We do it regardless of precise because we need the registers ++ if (dst.index() == noreg && dst.disp() == 0) { ++ if (dst.base() != tmp3) { ++ __ move(tmp3, dst.base()); ++ } ++ } else { ++ __ lea(tmp3, dst); ++ } ++ ++ if (needs_pre_barrier) { ++ g1_write_barrier_pre(masm /*masm*/, ++ tmp3 /* obj */, ++ tmp2 /* pre_val */, ++ rthread /* thread */, ++ tmp1 /* tmp */, ++ val != noreg /* tosca_live */, ++ false /* expand_call */); ++ } ++ if (val == noreg) { ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg); ++ } else { ++ Register new_val = val; ++ if (needs_post_barrier) { ++ // G1 barrier needs uncompressed oop for region cross check. ++ if (UseCompressedOops) { ++ new_val = tmp2; ++ __ move(new_val, val); ++ } ++ } ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg); ++ if (needs_post_barrier) { ++ g1_write_barrier_post(masm /*masm*/, ++ tmp3 /* store_adr */, ++ new_val /* new_val */, ++ rthread /* thread */, ++ tmp1 /* tmp */, ++ tmp2 /* tmp2 */); ++ } ++ } ++} ++ ++#ifdef COMPILER1 ++ ++#undef __ ++#define __ ce->masm()-> ++ ++void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { ++ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ // At this point we know that marking is in progress. ++ // If do_load() is true then we have to emit the ++ // load of the previous value; otherwise it has already ++ // been loaded into _pre_val. ++ ++ __ bind(*stub->entry()); ++ ++ assert(stub->pre_val()->is_register(), "Precondition."); ++ ++ Register pre_val_reg = stub->pre_val()->as_register(); ++ ++ if (stub->do_load()) { ++ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); ++ } ++ __ beqz(pre_val_reg, *stub->continuation()); ++ ce->store_parameter(stub->pre_val()->as_register(), 0); ++ __ call(bs->pre_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type); ++ __ b(*stub->continuation()); ++} ++ ++void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) { ++ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ __ bind(*stub->entry()); ++ assert(stub->addr()->is_register(), "Precondition."); ++ assert(stub->new_val()->is_register(), "Precondition."); ++ Register new_val_reg = stub->new_val()->as_register(); ++ __ beqz(new_val_reg, *stub->continuation()); ++ ce->store_parameter(stub->addr()->as_pointer_register(), 0); ++ __ call(bs->post_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type); ++ __ b(*stub->continuation()); ++} ++ ++#undef __ ++ ++#define __ sasm-> ++ ++void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { ++ __ prologue("g1_pre_barrier", false); ++ ++ // arg0 : previous value of memory ++ ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ ++ const Register pre_val = A0; ++ const Register thread = TREG; ++ const Register tmp = SCR2; ++ ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ Label done; ++ Label runtime; ++ ++ // Is marking still active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ ld_w(tmp, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ ld_b(tmp, in_progress); ++ } ++ __ beqz(tmp, done); ++ ++ // Can we store original value in the thread's buffer? ++ __ ld_ptr(tmp, queue_index); ++ __ beqz(tmp, runtime); ++ ++ __ addi_d(tmp, tmp, -wordSize); ++ __ st_ptr(tmp, queue_index); ++ __ ld_ptr(SCR1, buffer); ++ __ add_d(tmp, tmp, SCR1); ++ __ load_parameter(0, SCR1); ++ __ st_ptr(SCR1, Address(tmp, 0)); ++ __ b(done); ++ ++ __ bind(runtime); ++ __ pushad(); ++ __ load_parameter(0, pre_val); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ __ popad(); ++ __ bind(done); ++ ++ __ epilogue(); ++} ++ ++void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) { ++ __ prologue("g1_post_barrier", false); ++ ++ // arg0: store_address ++ Address store_addr(FP, 2 * BytesPerWord); ++ ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ ++ Label done; ++ Label runtime; ++ ++ // At this point we know new_value is non-NULL and the new_value crosses regions. ++ // Must check to see if card is already dirty ++ ++ const Register thread = TREG; ++ ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); ++ ++ const Register card_offset = SCR2; ++ // RA is free here, so we can use it to hold the byte_map_base. ++ const Register byte_map_base = RA; ++ ++ assert_different_registers(card_offset, byte_map_base, SCR1); ++ ++ __ load_parameter(0, card_offset); ++ __ srli_d(card_offset, card_offset, CardTable::card_shift); ++ __ load_byte_map_base(byte_map_base); ++ __ ldx_bu(SCR1, byte_map_base, card_offset); ++ __ addi_d(SCR1, SCR1, -(int)G1CardTable::g1_young_card_val()); ++ __ beqz(SCR1, done); ++ ++ assert((int)CardTable::dirty_card_val() == 0, "must be 0"); ++ ++ __ membar(__ StoreLoad); ++ __ ldx_bu(SCR1, byte_map_base, card_offset); ++ __ beqz(SCR1, done); ++ ++ // storing region crossing non-NULL, card is clean. ++ // dirty card and log. ++ __ stx_b(R0, byte_map_base, card_offset); ++ ++ // Convert card offset into an address in card_addr ++ Register card_addr = card_offset; ++ __ add_d(card_addr, byte_map_base, card_addr); ++ ++ __ ld_ptr(SCR1, queue_index); ++ __ beqz(SCR1, runtime); ++ __ addi_d(SCR1, SCR1, -wordSize); ++ __ st_ptr(SCR1, queue_index); ++ ++ // Reuse RA to hold buffer_addr ++ const Register buffer_addr = RA; ++ ++ __ ld_ptr(buffer_addr, buffer); ++ __ stx_d(card_addr, buffer_addr, SCR1); ++ __ b(done); ++ ++ __ bind(runtime); ++ __ pushad(); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); ++ __ popad(); ++ __ bind(done); ++ __ epilogue(); ++} ++ ++#undef __ ++ ++#endif // COMPILER1 +diff --git a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp +new file mode 100644 +index 0000000000..745046ac0c +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp +@@ -0,0 +1,71 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++class LIR_Assembler; ++class StubAssembler; ++class G1PreBarrierStub; ++class G1PostBarrierStub; ++ ++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { ++ protected: ++ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, RegSet saved_regs); ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp, RegSet saved_regs); ++ ++ void g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); ++ ++ void g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2); ++ ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++ ++ public: ++ void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); ++ void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); ++ ++ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); ++ void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); ++ ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++}; ++ ++#endif // CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp +new file mode 100644 +index 0000000000..a890cd3f62 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp +@@ -0,0 +1,255 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/jniHandles.hpp" ++#include "runtime/thread.hpp" ++ ++#define __ masm-> ++ ++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ // RA is live. It must be saved around calls. ++ ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ bool is_not_null = (decorators & IS_NOT_NULL) != 0; ++ ++ switch (type) { ++ case T_OBJECT: ++ case T_ARRAY: { ++ if (in_heap) { ++ if (UseCompressedOops) { ++ __ ld_wu(dst, src); ++ if (is_not_null) { ++ __ decode_heap_oop_not_null(dst); ++ } else { ++ __ decode_heap_oop(dst); ++ } ++ } else ++ { ++ __ ld_ptr(dst, src); ++ } ++ } else { ++ assert(in_native, "why else?"); ++ __ ld_ptr(dst, src); ++ } ++ break; ++ } ++ case T_BOOLEAN: __ ld_bu (dst, src); break; ++ case T_BYTE: __ ld_b (dst, src); break; ++ case T_CHAR: __ ld_hu (dst, src); break; ++ case T_SHORT: __ ld_h (dst, src); break; ++ case T_INT: __ ld_w (dst, src); break; ++ case T_LONG: __ ld_d (dst, src); break; ++ case T_ADDRESS: __ ld_ptr(dst, src); break; ++ case T_FLOAT: ++ assert(dst == noreg, "only to ftos"); ++ __ fld_s(FSF, src); ++ break; ++ case T_DOUBLE: ++ assert(dst == noreg, "only to dtos"); ++ __ fld_d(FSF, src); ++ break; ++ default: Unimplemented(); ++ } ++} ++ ++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ bool is_not_null = (decorators & IS_NOT_NULL) != 0; ++ ++ switch (type) { ++ case T_OBJECT: ++ case T_ARRAY: { ++ if (in_heap) { ++ if (val == noreg) { ++ assert(!is_not_null, "inconsistent access"); ++ if (UseCompressedOops) { ++ __ st_w(R0, dst); ++ } else { ++ __ st_d(R0, dst); ++ } ++ } else { ++ if (UseCompressedOops) { ++ assert(!dst.uses(val), "not enough registers"); ++ if (is_not_null) { ++ __ encode_heap_oop_not_null(val); ++ } else { ++ __ encode_heap_oop(val); ++ } ++ __ st_w(val, dst); ++ } else ++ { ++ __ st_ptr(val, dst); ++ } ++ } ++ } else { ++ assert(in_native, "why else?"); ++ assert(val != noreg, "not supported"); ++ __ st_ptr(val, dst); ++ } ++ break; ++ } ++ case T_BOOLEAN: ++ __ andi(val, val, 0x1); // boolean is true if LSB is 1 ++ __ st_b(val, dst); ++ break; ++ case T_BYTE: ++ __ st_b(val, dst); ++ break; ++ case T_SHORT: ++ __ st_h(val, dst); ++ break; ++ case T_CHAR: ++ __ st_h(val, dst); ++ break; ++ case T_INT: ++ __ st_w(val, dst); ++ break; ++ case T_LONG: ++ __ st_d(val, dst); ++ break; ++ case T_FLOAT: ++ assert(val == noreg, "only tos"); ++ __ fst_s(FSF, dst); ++ break; ++ case T_DOUBLE: ++ assert(val == noreg, "only tos"); ++ __ fst_d(FSF, dst); ++ break; ++ case T_ADDRESS: ++ __ st_ptr(val, dst); ++ break; ++ default: Unimplemented(); ++ } ++} ++ ++void BarrierSetAssembler::obj_equals(MacroAssembler* masm, ++ Register obj1, Address obj2) { ++ Unimplemented(); ++} ++ ++void BarrierSetAssembler::obj_equals(MacroAssembler* masm, ++ Register obj1, Register obj2) { ++ Unimplemented(); ++} ++ ++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath) { ++ __ clear_jweak_tag(obj); ++ __ ld_ptr(obj, Address(obj, 0)); ++} ++ ++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. ++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Register t2, ++ Label& slow_case) { ++ assert_different_registers(obj, t2); ++ assert_different_registers(obj, var_size_in_bytes); ++ Register end = t2; ++ ++ // verify_tlab(); ++ ++ __ ld_ptr(obj, Address(TREG, JavaThread::tlab_top_offset())); ++ if (var_size_in_bytes == noreg) { ++ __ lea(end, Address(obj, con_size_in_bytes)); ++ } else { ++ __ lea(end, Address(obj, var_size_in_bytes, Address::times_1, 0)); ++ } ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::tlab_end_offset())); ++ __ blt_far(SCR1, end, slow_case, false); ++ ++ // update the tlab top pointer ++ __ st_ptr(end, Address(TREG, JavaThread::tlab_top_offset())); ++ ++ // recover var_size_in_bytes if necessary ++ if (var_size_in_bytes == end) { ++ __ sub_d(var_size_in_bytes, var_size_in_bytes, obj); ++ } ++ // verify_tlab(); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Label& slow_case) { ++ assert_different_registers(obj, var_size_in_bytes, t1); ++ if (!Universe::heap()->supports_inline_contig_alloc()) { ++ __ b_far(slow_case); ++ } else { ++ Register end = t1; ++ Register heap_end = SCR2; ++ Label retry; ++ __ bind(retry); ++ ++ __ li(SCR1, (address)Universe::heap()->end_addr()); ++ __ ld_d(heap_end, SCR1, 0); ++ ++ // Get the current top of the heap ++ __ li(SCR1, (address) Universe::heap()->top_addr()); ++ __ ll_d(obj, SCR1, 0); ++ ++ // Adjust it my the size of our new object ++ if (var_size_in_bytes == noreg) ++ __ addi_d(end, obj, con_size_in_bytes); ++ else ++ __ add_d(end, obj, var_size_in_bytes); ++ ++ // if end < obj then we wrapped around high memory ++ __ blt_far(end, obj, slow_case, false); ++ __ blt_far(heap_end, end, slow_case, false); ++ ++ // If heap top hasn't been changed by some other thread, update it. ++ __ sc_d(end, SCR1, 0); ++ __ beqz(end, retry); ++ ++ incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, t1); ++ } ++} ++ ++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1) { ++ assert(t1->is_valid(), "need temp reg"); ++ ++ __ ld_ptr(t1, Address(TREG, in_bytes(JavaThread::allocated_bytes_offset()))); ++ if (var_size_in_bytes->is_valid()) ++ __ add_d(t1, t1, var_size_in_bytes); ++ else ++ __ addi_d(t1, t1, con_size_in_bytes); ++ __ st_ptr(t1, Address(TREG, in_bytes(JavaThread::allocated_bytes_offset()))); ++} +diff --git a/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp +new file mode 100644 +index 0000000000..a7ebbfaabb +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp +@@ -0,0 +1,88 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "memory/allocation.hpp" ++#include "oops/access.hpp" ++ ++class InterpreterMacroAssembler; ++ ++class BarrierSetAssembler: public CHeapObj { ++private: ++ void incr_allocated_bytes(MacroAssembler* masm, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1); ++ ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, RegSet saved_regs) {} ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch, RegSet saved_regs) {} ++ ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++ ++ ++ virtual void obj_equals(MacroAssembler* masm, ++ Register obj1, Register obj2); ++ virtual void obj_equals(MacroAssembler* masm, ++ Register obj1, Address obj2); ++ ++ virtual void resolve(MacroAssembler* masm, DecoratorSet decorators, Register obj) { ++ // Default implementation does not need to do anything. ++ } ++ ++ // Support for jniFastGetField to try resolving a jobject/jweak in native ++ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath); ++ ++ virtual void tlab_allocate(MacroAssembler* masm, ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ ++ void eden_allocate(MacroAssembler* masm, ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ ++ virtual void barrier_stubs_init() {} ++}; ++ ++#endif // CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp +new file mode 100644 +index 0000000000..d09e9a75a7 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp +@@ -0,0 +1,140 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/cardTableBarrierSetAssembler.hpp" ++ ++#define __ masm-> ++ ++#define T4 RT4 ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) ++ ++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp, ++ RegSet saved_regs) { ++ BarrierSet *bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ intptr_t disp = (intptr_t) ct->byte_map_base(); ++ ++ Label L_loop, L_done; ++ const Register end = count; ++ assert_different_registers(addr, end); ++ ++ __ beq(count, R0, L_done); // zero count - nothing to do ++ ++ if (ct->scanned_concurrently()) __ membar(__ StoreStore); ++ ++ __ li(tmp, disp); ++ ++ __ lea(end, Address(addr, count, TIMES_OOP, 0)); // end == addr+count*oop_size ++ __ addi_d(end, end, -BytesPerHeapOop); // end - 1 to make inclusive ++ __ shr(addr, CardTable::card_shift); ++ __ shr(end, CardTable::card_shift); ++ __ sub_d(end, end, addr); // end --> cards count ++ ++ __ add_d(addr, addr, tmp); ++ ++ __ BIND(L_loop); ++ __ stx_b(R0, addr, count); ++ __ addi_d(count, count, -1); ++ __ bge(count, R0, L_loop); ++ ++ __ BIND(L_done); ++} ++ ++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) { ++ // Does a store check for the oop in register obj. The content of ++ // register obj is destroyed afterwards. ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ ++ __ shr(obj, CardTable::card_shift); ++ ++ Address card_addr; ++ ++ intptr_t byte_map_base = (intptr_t)ct->byte_map_base(); ++ Register tmp = T4; ++ assert_different_registers(tmp, obj); ++ __ li(tmp, byte_map_base); ++ __ add_d(tmp, tmp, obj); ++ ++ assert(CardTable::dirty_card_val() == 0, "must be"); ++ ++ jbyte dirty = CardTable::dirty_card_val(); ++ if (UseCondCardMark) { ++ Label L_already_dirty; ++ __ membar(__ StoreLoad); ++ __ ld_b(AT, tmp, 0); ++ __ addi_d(AT, AT, -1 * dirty); ++ __ beq(AT, R0, L_already_dirty); ++ __ st_b(R0, tmp, 0); ++ __ bind(L_already_dirty); ++ } else { ++ if (ct->scanned_concurrently()) { ++ __ membar(Assembler::StoreStore); ++ } ++ __ st_b(R0, tmp, 0); ++ } ++} ++ ++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ ++ bool is_array = (decorators & IS_ARRAY) != 0; ++ bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; ++ bool precise = is_array || on_anonymous; ++ ++ bool needs_post_barrier = val != noreg && in_heap; ++ ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg); ++ if (needs_post_barrier) { ++ // flatten object address if needed ++ if (!precise || (dst.index() == noreg && dst.disp() == 0)) { ++ store_check(masm, dst.base(), dst); ++ } else { ++ __ lea(tmp1, dst); ++ store_check(masm, tmp1, dst); ++ } ++ } ++} +diff --git a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp +new file mode 100644 +index 0000000000..b37c2ba0bc +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp +@@ -0,0 +1,44 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { ++protected: ++ void store_check(MacroAssembler* masm, Register obj, Address dst); ++ ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp, ++ RegSet saved_regs); ++ ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++}; ++ ++#endif // CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp +new file mode 100644 +index 0000000000..14c41ea790 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp +@@ -0,0 +1,53 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++#define __ masm-> ++ ++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, RegSet saved_regs) { ++ if (is_oop) { ++ gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs); ++ } ++} ++ ++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch, RegSet saved_regs) { ++ if (is_oop) { ++ gen_write_ref_array_post_barrier(masm, decorators, dst, count, scratch, saved_regs); ++ } ++} ++ ++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ if (type == T_OBJECT || type == T_ARRAY) { ++ oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ } else { ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ } ++} +diff --git a/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp +new file mode 100644 +index 0000000000..8043220eff +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++ ++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other ++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected ++// accesses, which are overridden in the concrete BarrierSetAssembler. ++ ++class ModRefBarrierSetAssembler: public BarrierSetAssembler { ++protected: ++ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, RegSet saved_regs) {} ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp, RegSet saved_regs) {} ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) = 0; ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, RegSet saved_regs); ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch, RegSet saved_regs); ++ ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++}; ++ ++#endif // CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp b/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp +new file mode 100644 +index 0000000000..dc21d001cc +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp +@@ -0,0 +1,53 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP ++#define CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP ++// Size of LoongArch Instructions ++const int BytesPerInstWord = 4; ++ ++const int StackAlignmentInBytes = (2*wordSize); ++ ++// Indicates whether the C calling conventions require that ++// 32-bit integer argument values are properly extended to 64 bits. ++// If set, SharedRuntime::c_calling_convention() must adapt ++// signatures accordingly. ++const bool CCallingConventionRequiresIntsAsLongs = false; ++ ++#define SUPPORTS_NATIVE_CX8 ++ ++// FIXME: LA ++// This makes the games we play when patching difficult, so when we ++// come across an access that needs patching we deoptimize. There are ++// ways we can avoid this, but these would slow down C1-compiled code ++// in the default case. We could revisit this decision if we get any ++// evidence that it's worth doing. ++#define DEOPTIMIZE_WHEN_PATCHING ++ ++#define SUPPORT_RESERVED_STACK_AREA ++ ++#define THREAD_LOCAL_POLL ++ ++#endif // CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/globals_loongarch.hpp b/src/hotspot/cpu/loongarch/globals_loongarch.hpp +new file mode 100644 +index 0000000000..e6b758b554 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/globals_loongarch.hpp +@@ -0,0 +1,109 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP ++#define CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++define_pd_global(bool, ShareVtableStubs, true); ++define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this ++ ++define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks ++define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on x86. ++define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast ++ ++define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. ++define_pd_global(intx, CodeEntryAlignment, 16); ++define_pd_global(intx, OptoLoopAlignment, 16); ++define_pd_global(intx, InlineFrequencyCount, 100); ++define_pd_global(intx, InlineSmallCode, 2000); ++ ++#define DEFAULT_STACK_YELLOW_PAGES (2) ++#define DEFAULT_STACK_RED_PAGES (1) ++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+4)) ++#define DEFAULT_STACK_RESERVED_PAGES (1) ++ ++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES ++#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES ++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES ++#define MIN_STACK_RESERVED_PAGES (0) ++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); ++ ++define_pd_global(intx, StackYellowPages, 2); ++define_pd_global(intx, StackRedPages, 1); ++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); ++ ++define_pd_global(bool, RewriteBytecodes, true); ++define_pd_global(bool, RewriteFrequentPairs, true); ++define_pd_global(bool, UseMembar, true); ++// GC Ergo Flags ++define_pd_global(intx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread ++ ++define_pd_global(uintx, TypeProfileLevel, 111); ++ ++define_pd_global(bool, CompactStrings, true); ++ ++define_pd_global(bool, PreserveFramePointer, false); ++ ++define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); ++ ++define_pd_global(bool, ThreadLocalHandshakes, true); ++// Only c2 cares about this at the moment ++define_pd_global(intx, AllocatePrefetchStyle, 2); ++define_pd_global(intx, AllocatePrefetchDistance, -1); ++ ++#define ARCH_FLAGS(develop, \ ++ product, \ ++ diagnostic, \ ++ experimental, \ ++ notproduct, \ ++ range, \ ++ constraint, \ ++ writeable) \ ++ \ ++ product(bool, UseCodeCacheAllocOpt, true, \ ++ "Allocate code cache within 32-bit memory address space") \ ++ \ ++ product(bool, UseLSX, false, \ ++ "Use LSX 128-bit vector instructions") \ ++ \ ++ product(bool, UseLASX, false, \ ++ "Use LASX 256-bit vector instructions") \ ++ \ ++ product(bool, UseBarriersForVolatile, false, \ ++ "Use memory barriers to implement volatile accesses") \ ++ \ ++ product(bool, UseCRC32, false, \ ++ "Use CRC32 instructions for CRC32 computation") \ ++ \ ++ product(bool, UseActiveCoresMP, false, \ ++ "Eliminate barriers for single active cpu") ++ ++#endif // CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp b/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp +new file mode 100644 +index 0000000000..7b97694827 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp +@@ -0,0 +1,92 @@ ++/* ++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/icBuffer.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/bytecodes.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/oop.inline.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++int InlineCacheBuffer::ic_stub_code_size() { ++ return NativeMovConstReg::instruction_size + ++ NativeGeneralJump::instruction_size + ++ 1; ++ // so that code_end can be set in CodeBuffer ++ // 64bit 15 = 6 + 8 bytes + 1 byte ++ // 32bit 7 = 2 + 4 bytes + 1 byte ++} ++ ++ ++// we use T1 as cached oop(klass) now. this is the target of virtual call, ++// when reach here, the receiver in T0 ++// refer to shareRuntime_loongarch.cpp,gen_i2c2i_adapters ++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, ++ address entry_point) { ++ ResourceMark rm; ++ CodeBuffer code(code_begin, ic_stub_code_size()); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ // note: even though the code contains an embedded oop, we do not need reloc info ++ // because ++ // (1) the oop is old (i.e., doesn't matter for scavenges) ++ // (2) these ICStubs are removed *before* a GC happens, so the roots disappear ++ // assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop"); ++#define __ masm-> ++ __ patchable_li52(T1, (long)cached_value); ++ // TODO: confirm reloc ++ __ jmp(entry_point, relocInfo::runtime_call_type); ++ __ flush(); ++#undef __ ++} ++ ++ ++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object ++ NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); ++ return jump->jump_destination(); ++} ++ ++ ++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { ++ // creation also verifies the object ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); ++ // Verifies the jump ++ NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); ++ void* o= (void*)move->data(); ++ return o; ++} +diff --git a/src/hotspot/cpu/loongarch/icache_loongarch.cpp b/src/hotspot/cpu/loongarch/icache_loongarch.cpp +new file mode 100644 +index 0000000000..1ae7e5376c +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/icache_loongarch.cpp +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "runtime/icache.hpp" ++ ++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) ++{ ++#define __ _masm-> ++ StubCodeMark mark(this, "ICache", "flush_icache_stub"); ++ address start = __ pc(); ++ ++ __ ibar(0); ++ __ ori(V0, A2, 0); ++ __ jr(RA); ++ ++ *flush_icache_stub = (ICache::flush_icache_stub_t)start; ++#undef __ ++} +diff --git a/src/hotspot/cpu/loongarch/icache_loongarch.hpp b/src/hotspot/cpu/loongarch/icache_loongarch.hpp +new file mode 100644 +index 0000000000..3a180549fc +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/icache_loongarch.hpp +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_ICACHE_LOONGARCH_HPP ++#define CPU_LOONGARCH_ICACHE_LOONGARCH_HPP ++ ++// Interface for updating the instruction cache. Whenever the VM modifies ++// code, part of the processor instruction cache potentially has to be flushed. ++ ++class ICache : public AbstractICache { ++ public: ++ enum { ++ stub_size = 3 * BytesPerInstWord, // Size of the icache flush stub in bytes ++ line_size = 32, // flush instruction affects a dword ++ log2_line_size = 5 // log2(line_size) ++ }; ++}; ++ ++#endif // CPU_LOONGARCH_ICACHE_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp b/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp +new file mode 100644 +index 0000000000..53a06ba7fd +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp +@@ -0,0 +1,281 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP ++#define CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP ++ ++#include "asm/assembler.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "interpreter/invocationCounter.hpp" ++#include "runtime/frame.hpp" ++ ++// This file specializes the assember with interpreter-specific macros ++ ++typedef ByteSize (*OffsetFunction)(uint); ++ ++class InterpreterMacroAssembler: public MacroAssembler { ++#ifndef CC_INTERP ++ private: ++ ++ Register _locals_register; // register that contains the pointer to the locals ++ Register _bcp_register; // register that contains the bcp ++ ++ protected: ++ // Interpreter specific version of call_VM_base ++ virtual void call_VM_leaf_base(address entry_point, ++ int number_of_arguments); ++ ++ virtual void call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions); ++ ++ // base routine for all dispatches ++ void dispatch_base(TosState state, address* table, bool verifyoop = true, bool generate_poll = false); ++#endif // CC_INTERP ++ ++ public: ++ void jump_to_entry(address entry); ++ // narrow int return value ++ void narrow(Register result); ++ ++ InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {} ++ ++ void get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset); ++ void get_4_byte_integer_at_bcp(Register reg, int offset); ++ ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); ++ ++ void load_earlyret_value(TosState state); ++ ++#ifdef CC_INTERP ++ void save_bcp() { /* not needed in c++ interpreter and harmless */ } ++ void restore_bcp() { /* not needed in c++ interpreter and harmless */ } ++ ++ // Helpers for runtime call arguments/results ++ void get_method(Register reg); ++ ++#else ++ ++ // Interpreter-specific registers ++ void save_bcp() { ++ st_d(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize); ++ } ++ ++ void restore_bcp() { ++ ld_d(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize); ++ } ++ ++ void restore_locals() { ++ ld_d(LVP, FP, frame::interpreter_frame_locals_offset * wordSize); ++ } ++ ++ // Helpers for runtime call arguments/results ++ void get_method(Register reg) { ++ ld_d(reg, FP, frame::interpreter_frame_method_offset * wordSize); ++ } ++ ++ void get_const(Register reg){ ++ get_method(reg); ++ ld_d(reg, reg, in_bytes(Method::const_offset())); ++ } ++ ++ void get_constant_pool(Register reg) { ++ get_const(reg); ++ ld_d(reg, reg, in_bytes(ConstMethod::constants_offset())); ++ } ++ ++ void get_constant_pool_cache(Register reg) { ++ get_constant_pool(reg); ++ ld_d(reg, reg, ConstantPool::cache_offset_in_bytes()); ++ } ++ ++ void get_cpool_and_tags(Register cpool, Register tags) { ++ get_constant_pool(cpool); ++ ld_d(tags, cpool, ConstantPool::tags_offset_in_bytes()); ++ } ++ ++ void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); ++ void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_method_counters(Register method, Register mcs, Label& skip); ++ ++ // load cpool->resolved_references(index); ++ void load_resolved_reference_at_index(Register result, Register index, Register tmp); ++ ++ // load cpool->resolved_klass_at(index) ++ void load_resolved_klass_at_index(Register cpool, // the constant pool (corrupted on return) ++ Register index, // the constant pool index (corrupted on return) ++ Register klass); // contains the Klass on return ++ ++ void pop_ptr( Register r = FSR); ++ void pop_i( Register r = FSR); ++ void pop_l( Register r = FSR); ++ void pop_f(FloatRegister r = FSF); ++ void pop_d(FloatRegister r = FSF); ++ ++ void push_ptr( Register r = FSR); ++ void push_i( Register r = FSR); ++ void push_l( Register r = FSR); ++ void push_f(FloatRegister r = FSF); ++ void push_d(FloatRegister r = FSF); ++ ++ void pop(Register r ) { ((MacroAssembler*)this)->pop(r); } ++ ++ void push(Register r ) { ((MacroAssembler*)this)->push(r); } ++ ++ void pop(TosState state); // transition vtos -> state ++ void push(TosState state); // transition state -> vtos ++ ++ void empty_expression_stack() { ++ ld_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ // NULL last_sp until next java call ++ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ } ++ ++ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls ++ void load_ptr(int n, Register val); ++ void store_ptr(int n, Register val); ++ ++ // Generate a subtype check: branch to ok_is_subtype if sub_klass is ++ // a subtype of super_klass. ++ //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); ++ void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype ); ++ ++ // Dispatching ++ void dispatch_prolog(TosState state, int step = 0); ++ void dispatch_epilog(TosState state, int step = 0); ++ void dispatch_only(TosState state, bool generate_poll = false); ++ void dispatch_only_normal(TosState state); ++ void dispatch_only_noverify(TosState state); ++ void dispatch_next(TosState state, int step = 0, bool generate_poll = false); ++ void dispatch_via (TosState state, address* table); ++ ++ // jump to an invoked target ++ void prepare_to_jump_from_interpreted(); ++ void jump_from_interpreted(Register method, Register temp); ++ ++ ++ // Returning from interpreted functions ++ // ++ // Removes the current activation (incl. unlocking of monitors) ++ // and sets up the return address. This code is also used for ++ // exception unwindwing. In that case, we do not want to throw ++ // IllegalMonitorStateExceptions, since that might get us into an ++ // infinite rethrow exception loop. ++ // Additionally this code is used for popFrame and earlyReturn. ++ // In popFrame case we want to skip throwing an exception, ++ // installing an exception, and notifying jvmdi. ++ // In earlyReturn case we only want to skip throwing an exception ++ // and installing an exception. ++ void remove_activation(TosState state, Register ret_addr, ++ bool throw_monitor_exception = true, ++ bool install_monitor_exception = true, ++ bool notify_jvmdi = true); ++#endif // CC_INTERP ++ ++ // Object locking ++ void lock_object (Register lock_reg); ++ void unlock_object(Register lock_reg); ++ ++#ifndef CC_INTERP ++ ++ // Interpreter profiling operations ++ void set_method_data_pointer_for_bcp(); ++ void test_method_data_pointer(Register mdp, Label& zero_continue); ++ void verify_method_data_pointer(); ++ ++ void set_mdp_data_at(Register mdp_in, int constant, Register value); ++ void increment_mdp_data_at(Address data, bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, int constant, ++ bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, Register reg, int constant, ++ bool decrement = false); ++ void increment_mask_and_jump(Address counter_addr, ++ int increment, int mask, ++ Register scratch, bool preloaded, ++ Condition cond, Label* where); ++ void set_mdp_flag_at(Register mdp_in, int flag_constant); ++ void test_mdp_data_at(Register mdp_in, int offset, Register value, ++ Register test_value_out, ++ Label& not_equal_continue); ++ ++ void record_klass_in_profile(Register receiver, Register mdp, ++ Register reg2, bool is_virtual_call); ++ void record_klass_in_profile_helper(Register receiver, Register mdp, ++ Register reg2, int start_row, ++ Label& done, bool is_virtual_call); ++ ++ void record_item_in_profile_helper(Register item, Register mdp, ++ Register reg2, int start_row, Label& done, int total_rows, ++ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, ++ int non_profiled_offset); ++ void update_mdp_by_offset(Register mdp_in, int offset_of_offset); ++ void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); ++ void update_mdp_by_constant(Register mdp_in, int constant); ++ void update_mdp_for_ret(Register return_bci); ++ ++ void profile_taken_branch(Register mdp, Register bumped_count); ++ void profile_not_taken_branch(Register mdp); ++ void profile_call(Register mdp); ++ void profile_final_call(Register mdp); ++ void profile_virtual_call(Register receiver, Register mdp, ++ Register scratch2, ++ bool receiver_can_be_null = false); ++ void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN; ++ void profile_ret(Register return_bci, Register mdp); ++ void profile_null_seen(Register mdp); ++ void profile_typecheck(Register mdp, Register klass, Register scratch); ++ void profile_typecheck_failed(Register mdp); ++ void profile_switch_default(Register mdp); ++ void profile_switch_case(Register index_in_scratch, Register mdp, ++ Register scratch2); ++ ++ // Debugging ++ // only if +VerifyOops && state == atos ++ void verify_oop(Register reg, TosState state = atos); ++ // only if +VerifyFPU && (state == ftos || state == dtos) ++ void verify_FPU(int stack_depth, TosState state = ftos); ++ ++ void profile_obj_type(Register obj, const Address& mdo_addr); ++ void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); ++ void profile_return_type(Register mdp, Register ret, Register tmp); ++ void profile_parameters_type(Register mdp, Register tmp1, Register tmp2); ++#endif // !CC_INTERP ++ ++ typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; ++ ++ // support for jvmti/dtrace ++ void notify_method_entry(); ++ void notify_method_exit(TosState state, NotifyMethodExitMode mode); ++}; ++ ++#endif // CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP +diff --git a/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp b/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp +new file mode 100644 +index 0000000000..c533a57652 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp +@@ -0,0 +1,2043 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interp_masm_loongarch.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/markOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/thread.inline.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++// Implementation of InterpreterMacroAssembler ++ ++#ifdef CC_INTERP ++void InterpreterMacroAssembler::get_method(Register reg) { ++} ++#endif // CC_INTERP ++ ++void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) { ++ if (UseUnalignedAccesses) { ++ ld_hu(reg, BCP, offset); ++ } else { ++ ld_bu(reg, BCP, offset); ++ ld_bu(tmp, BCP, offset + 1); ++ bstrins_d(reg, tmp, 15, 8); ++ } ++} ++ ++void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, int offset) { ++ if (UseUnalignedAccesses) { ++ ld_wu(reg, BCP, offset); ++ } else { ++ ldr_w(reg, BCP, offset); ++ ldl_w(reg, BCP, offset + 3); ++ lu32i_d(reg, 0); ++ } ++} ++ ++void InterpreterMacroAssembler::jump_to_entry(address entry) { ++ assert(entry, "Entry must have been generated by now"); ++ jmp(entry); ++} ++ ++#ifndef CC_INTERP ++ ++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, ++ int number_of_arguments) { ++ // interpreter specific ++ // ++ // Note: No need to save/restore bcp & locals pointer ++ // since these are callee saved registers and no blocking/ ++ // GC can happen in leaf calls. ++ // Further Note: DO NOT save/restore bcp/locals. If a caller has ++ // already saved them so that it can use BCP/LVP as temporaries ++ // then a save/restore here will DESTROY the copy the caller ++ // saved! There used to be a save_bcp() that only happened in ++ // the ASSERT path (no restore_bcp). Which caused bizarre failures ++ // when jvm built with ASSERTs. ++#ifdef ASSERT ++ save_bcp(); ++ { ++ Label L; ++ ld_d(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize); ++ beq(AT,R0,L); ++ stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL"); ++ bind(L); ++ } ++#endif ++ // super call ++ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); ++ // interpreter specific ++ // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals ++ // but since they may not have been saved (and we don't want to ++ // save them here (see note above) the assert is invalid. ++} ++ ++void InterpreterMacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ // interpreter specific ++ // ++ // Note: Could avoid restoring locals ptr (callee saved) - however doesn't ++ // really make a difference for these runtime calls, since they are ++ // slow anyway. Btw., bcp must be saved/restored since it may change ++ // due to GC. ++ assert(java_thread == noreg , "not expecting a precomputed java thread"); ++ save_bcp(); ++#ifdef ASSERT ++ { ++ Label L; ++ ld_d(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ beq(AT, R0, L); ++ stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL"); ++ bind(L); ++ } ++#endif /* ASSERT */ ++ // super call ++ MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp, ++ entry_point, number_of_arguments, ++ check_exceptions); ++ // interpreter specific ++ restore_bcp(); ++ restore_locals(); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { ++ if (JvmtiExport::can_pop_frame()) { ++ Label L; ++ // Initiate popframe handling only if it is not already being ++ // processed. If the flag has the popframe_processing bit set, it ++ // means that this code is called *during* popframe handling - we ++ // don't want to reenter. ++ // This method is only called just after the call into the vm in ++ // call_VM_base, so the arg registers are available. ++ // Not clear if any other register is available, so load AT twice ++ assert(AT != java_thread, "check"); ++ ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); ++ andi(AT, AT, JavaThread::popframe_pending_bit); ++ beq(AT, R0, L); ++ ++ ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); ++ andi(AT, AT, JavaThread::popframe_processing_bit); ++ bne(AT, R0, L); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); ++ jr(V0); ++ bind(L); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::load_earlyret_value(TosState state) { ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ld_ptr(T8, thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ const Address tos_addr (T8, in_bytes(JvmtiThreadState::earlyret_tos_offset())); ++ const Address oop_addr (T8, in_bytes(JvmtiThreadState::earlyret_oop_offset())); ++ const Address val_addr (T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ //V0, oop_addr,V1,val_addr ++ switch (state) { ++ case atos: ++ ld_ptr(V0, oop_addr); ++ st_ptr(R0, oop_addr); ++ verify_oop(V0, state); ++ break; ++ case ltos: ++ ld_ptr(V0, val_addr); // fall through ++ break; ++ case btos: // fall through ++ case ztos: // fall through ++ case ctos: // fall through ++ case stos: // fall through ++ case itos: ++ ld_w(V0, val_addr); ++ break; ++ case ftos: ++ fld_s(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ break; ++ case dtos: ++ fld_d(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ break; ++ case vtos: /* nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ // Clean up tos value in the thread object ++ li(AT, (int)ilgl); ++ st_w(AT, tos_addr); ++ st_w(R0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { ++ if (JvmtiExport::can_force_early_return()) { ++ Label L; ++ Register tmp = T4; ++ ++ assert(java_thread != AT, "check"); ++ assert(java_thread != tmp, "check"); ++ ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ beq(AT, R0, L); ++ ++ // Initiate earlyret handling only if it is not already being processed. ++ // If the flag has the earlyret_processing bit set, it means that this code ++ // is called *during* earlyret handling - we don't want to reenter. ++ ld_w(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset())); ++ li(tmp, JvmtiThreadState::earlyret_pending); ++ bne(tmp, AT, L); ++ ++ // Call Interpreter::remove_activation_early_entry() to get the address of the ++ // same-named entrypoint in the generated interpreter code. ++ ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ ld_w(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset())); ++ move(A0, AT); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0); ++ jr(V0); ++ bind(L); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, ++ int bcp_offset) { ++ assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); ++ ld_bu(AT, BCP, bcp_offset); ++ ld_bu(reg, BCP, bcp_offset + 1); ++ bstrins_w(reg, AT, 15, 8); ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ if (index_size == sizeof(u2)) { ++ get_2_byte_integer_at_bcp(index, AT, bcp_offset); ++ } else if (index_size == sizeof(u4)) { ++ get_4_byte_integer_at_bcp(index, bcp_offset); ++ // Check if the secondary index definition is still ~x, otherwise ++ // we have to change the following assembler code to calculate the ++ // plain index. ++ assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); ++ nor(index, index, R0); ++ slli_w(index, index, 0); ++ } else if (index_size == sizeof(u1)) { ++ ld_bu(index, BCP, bcp_offset); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, ++ Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert_different_registers(cache, index); ++ get_cache_index_at_bcp(index, bcp_offset, index_size); ++ ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line"); ++ shl(index, 2); ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, ++ Register index, ++ Register bytecode, ++ int byte_no, ++ int bcp_offset, ++ size_t index_size) { ++ get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); ++ // We use a 32-bit load here since the layout of 64-bit words on ++ // little-endian machines allow us that. ++ alsl_d(AT, index, cache, Address::times_ptr - 1); ++ ld_w(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); ++ if(os::is_MP()) { ++ membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore)); ++ } ++ ++ const int shift_count = (1 + byte_no) * BitsPerByte; ++ assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) || ++ (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift), ++ "correct shift count"); ++ srli_d(bytecode, bytecode, shift_count); ++ assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask"); ++ li(AT, ConstantPoolCacheEntry::bytecode_1_mask); ++ andr(bytecode, bytecode, AT); ++} ++ ++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, ++ Register tmp, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ assert(cache != tmp, "must use different register"); ++ get_cache_index_at_bcp(tmp, bcp_offset, index_size); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ // convert from field index to ConstantPoolCacheEntry index ++ // and from word offset to byte offset ++ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); ++ shl(tmp, 2 + LogBytesPerWord); ++ ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize); ++ // skip past the header ++ addi_d(cache, cache, in_bytes(ConstantPoolCache::base_offset())); ++ add_d(cache, cache, tmp); ++} ++ ++void InterpreterMacroAssembler::get_method_counters(Register method, ++ Register mcs, Label& skip) { ++ Label has_counters; ++ ld_d(mcs, method, in_bytes(Method::method_counters_offset())); ++ bne(mcs, R0, has_counters); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::build_method_counters), method); ++ ld_d(mcs, method, in_bytes(Method::method_counters_offset())); ++ beq(mcs, R0, skip); // No MethodCounters allocated, OutOfMemory ++ bind(has_counters); ++} ++ ++// Load object from cpool->resolved_references(index) ++void InterpreterMacroAssembler::load_resolved_reference_at_index( ++ Register result, Register index, Register tmp) { ++ assert_different_registers(result, index); ++ // convert from field index to resolved_references() index and from ++ // word index to byte offset. Since this is a java object, it can be compressed ++ shl(index, LogBytesPerHeapOop); ++ ++ get_constant_pool(result); ++ // load pointer for resolved_references[] objArray ++ ld_d(result, result, ConstantPool::cache_offset_in_bytes()); ++ ld_d(result, result, ConstantPoolCache::resolved_references_offset_in_bytes()); ++ resolve_oop_handle(result, tmp); ++ // Add in the index ++ add_d(result, result, index); ++ load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), tmp); ++} ++ ++// load cpool->resolved_klass_at(index) ++void InterpreterMacroAssembler::load_resolved_klass_at_index(Register cpool, ++ Register index, Register klass) { ++ alsl_d(AT, index, cpool, Address::times_ptr - 1); ++ ld_h(index, AT, sizeof(ConstantPool)); ++ Register resolved_klasses = cpool; ++ ld_ptr(resolved_klasses, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); ++ alsl_d(AT, index, resolved_klasses, Address::times_ptr - 1); ++ ld_d(klass, AT, Array::base_offset_in_bytes()); ++} ++ ++// Resets LVP to locals. Register sub_klass cannot be any of the above. ++void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) { ++ ++ assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" ); ++ assert( Rsub_klass != T1, "T1 holds 2ndary super array length" ); ++ assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" ); ++ // Profile the not-null value's klass. ++ // Here T4 and T1 are used as temporary registers. ++ profile_typecheck(T4, Rsub_klass, T1); // blows T4, reloads T1 ++ ++ // Do the check. ++ check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1 ++ ++ // Profile the failure of the check. ++ profile_typecheck_failed(T4); // blows T4 ++ ++} ++ ++ ++ ++// Java Expression Stack ++ ++void InterpreterMacroAssembler::pop_ptr(Register r) { ++ ld_d(r, SP, 0); ++ addi_d(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_i(Register r) { ++ ld_w(r, SP, 0); ++ addi_d(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_l(Register r) { ++ ld_d(r, SP, 0); ++ addi_d(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_f(FloatRegister r) { ++ fld_s(r, SP, 0); ++ addi_d(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_d(FloatRegister r) { ++ fld_d(r, SP, 0); ++ addi_d(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_ptr(Register r) { ++ addi_d(SP, SP, - Interpreter::stackElementSize); ++ st_d(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_i(Register r) { ++ // For compatibility reason, don't change to sw. ++ addi_d(SP, SP, - Interpreter::stackElementSize); ++ st_d(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_l(Register r) { ++ addi_d(SP, SP, -2 * Interpreter::stackElementSize); ++ st_d(r, SP, 0); ++ st_d(R0, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_f(FloatRegister r) { ++ addi_d(SP, SP, - Interpreter::stackElementSize); ++ fst_s(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_d(FloatRegister r) { ++ addi_d(SP, SP, -2 * Interpreter::stackElementSize); ++ fst_d(r, SP, 0); ++ st_d(R0, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop(TosState state) { ++ switch (state) { ++ case atos: pop_ptr(); break; ++ case btos: ++ case ztos: ++ case ctos: ++ case stos: ++ case itos: pop_i(); break; ++ case ltos: pop_l(); break; ++ case ftos: pop_f(); break; ++ case dtos: pop_d(); break; ++ case vtos: /* nothing to do */ break; ++ default: ShouldNotReachHere(); ++ } ++ verify_oop(FSR, state); ++} ++ ++//FSR=V0,SSR=V1 ++void InterpreterMacroAssembler::push(TosState state) { ++ verify_oop(FSR, state); ++ switch (state) { ++ case atos: push_ptr(); break; ++ case btos: ++ case ztos: ++ case ctos: ++ case stos: ++ case itos: push_i(); break; ++ case ltos: push_l(); break; ++ case ftos: push_f(); break; ++ case dtos: push_d(); break; ++ case vtos: /* nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++void InterpreterMacroAssembler::load_ptr(int n, Register val) { ++ ld_d(val, SP, Interpreter::expr_offset_in_bytes(n)); ++} ++ ++void InterpreterMacroAssembler::store_ptr(int n, Register val) { ++ st_d(val, SP, Interpreter::expr_offset_in_bytes(n)); ++} ++ ++// Jump to from_interpreted entry of a call unless single stepping is possible ++// in this thread in which case we must call the i2i entry ++void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) { ++ // record last_sp ++ move(Rsender, SP); ++ st_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++#ifndef OPT_THREAD ++ Register thread = temp; ++ get_thread(temp); ++#else ++ Register thread = TREG; ++#endif ++ // interp_only is an int, on little endian it is sufficient to test the byte only ++ // Is a cmpl faster? ++ ld_w(AT, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(AT, R0, run_compiled_code); ++ ld_d(AT, method, in_bytes(Method::interpreter_entry_offset())); ++ jr(AT); ++ bind(run_compiled_code); ++ } ++ ++ ld_d(AT, method, in_bytes(Method::from_interpreted_offset())); ++ jr(AT); ++} ++ ++ ++// The following two routines provide a hook so that an implementation ++// can schedule the dispatch in two parts. LoongArch64 does not do this. ++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { ++ // Nothing LoongArch64 specific to be done here ++} ++ ++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { ++ dispatch_next(state, step); ++} ++ ++// assume the next bytecode in T8. ++void InterpreterMacroAssembler::dispatch_base(TosState state, ++ address* table, ++ bool verifyoop, ++ bool generate_poll) { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ ++ if (VerifyActivationFrameSize) { ++ Label L; ++ ++ sub_d(T2, FP, SP); ++ int min_frame_size = (frame::java_frame_link_offset - ++ frame::interpreter_frame_initial_sp_offset) * wordSize; ++ addi_d(T2, T2, -min_frame_size); ++ bge(T2, R0, L); ++ stop("broken stack frame"); ++ bind(L); ++ } ++ // FIXME: I do not know which register should pass to verify_oop ++ if (verifyoop) verify_oop(FSR, state); ++ ++ Label safepoint; ++ address* const safepoint_table = Interpreter::safept_table(state); ++ bool needs_thread_local_poll = generate_poll && ++ SafepointMechanism::uses_thread_local_poll() && table != safepoint_table; ++ ++ if (needs_thread_local_poll) { ++ NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); ++ ld_d(T3, thread, in_bytes(Thread::polling_page_offset())); ++ andi(T3, T3, SafepointMechanism::poll_bit()); ++ bne(T3, R0, safepoint); ++ } ++ ++ if((long)table >= (long)Interpreter::dispatch_table(btos) && ++ (long)table <= (long)Interpreter::dispatch_table(vtos)) { ++ int table_size = (long)Interpreter::dispatch_table(itos) - ++ (long)Interpreter::dispatch_table(stos); ++ int table_offset = ((int)state - (int)itos) * table_size; ++ ++ // S8 points to the starting address of Interpreter::dispatch_table(itos). ++ // See StubGenerator::generate_call_stub(address& return_address) for the initialization of S8. ++ if (table_offset != 0) { ++ if (is_simm(table_offset, 12)) { ++ alsl_d(T3, Rnext, S8, LogBytesPerWord - 1); ++ ld_d(T3, T3, table_offset); ++ } else { ++ li(T2, table_offset); ++ alsl_d(T3, Rnext, S8, LogBytesPerWord - 1); ++ ldx_d(T3, T2, T3); ++ } ++ } else { ++ slli_d(T2, Rnext, LogBytesPerWord); ++ ldx_d(T3, S8, T2); ++ } ++ } else { ++ li(T3, (long)table); ++ slli_d(T2, Rnext, LogBytesPerWord); ++ ldx_d(T3, T2, T3); ++ } ++ jr(T3); ++ ++ if (needs_thread_local_poll) { ++ bind(safepoint); ++ li(T3, (long)safepoint_table); ++ slli_d(T2, Rnext, LogBytesPerWord); ++ ldx_d(T3, T3, T2); ++ jr(T3); ++ } ++} ++ ++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) { ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { ++ dispatch_base(state, Interpreter::normal_table(state)); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { ++ dispatch_base(state, Interpreter::normal_table(state), false); ++} ++ ++ ++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) { ++ // load next bytecode ++ ld_bu(Rnext, BCP, step); ++ increment(BCP, step); ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); ++} ++ ++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { ++ // load current bytecode ++ ld_bu(Rnext, BCP, 0); ++ dispatch_base(state, table); ++} ++ ++// remove activation ++// ++// Unlock the receiver if this is a synchronized method. ++// Unlock any Java monitors from syncronized blocks. ++// Remove the activation from the stack. ++// ++// If there are locked Java monitors ++// If throw_monitor_exception ++// throws IllegalMonitorStateException ++// Else if install_monitor_exception ++// installs IllegalMonitorStateException ++// Else ++// no error processing ++// used registers : T1, T2, T3, T8 ++// T1 : thread, method access flags ++// T2 : monitor entry pointer ++// T3 : method, monitor top ++// T8 : unlock flag ++void InterpreterMacroAssembler::remove_activation( ++ TosState state, ++ Register ret_addr, ++ bool throw_monitor_exception, ++ bool install_monitor_exception, ++ bool notify_jvmdi) { ++ // Note: Registers V0, V1 and F0, F1 may be in use for the result ++ // check if synchronized method ++ Label unlocked, unlock, no_unlock; ++ ++ // get the value of _do_not_unlock_if_synchronized into T8 ++#ifndef OPT_THREAD ++ Register thread = T1; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ld_b(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ // reset the flag ++ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ // get method access flags ++ ld_d(T3, FP, frame::interpreter_frame_method_offset * wordSize); ++ ld_w(T1, T3, in_bytes(Method::access_flags_offset())); ++ andi(T1, T1, JVM_ACC_SYNCHRONIZED); ++ beq(T1, R0, unlocked); ++ ++ // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set. ++ bne(T8, R0, no_unlock); ++ // unlock monitor ++ push(state); // save result ++ ++ // BasicObjectLock will be first in list, since this is a ++ // synchronized method. However, need to check that the object has ++ // not been unlocked by an explicit monitorexit bytecode. ++ addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize ++ - (int)sizeof(BasicObjectLock)); ++ // address of first monitor ++ ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ bne(T1, R0, unlock); ++ pop(state); ++ if (throw_monitor_exception) { ++ // Entry already unlocked, need to throw exception ++ // I think LA do not need empty_FPU_stack ++ // remove possible return value from FPU-stack, otherwise stack could overflow ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Monitor already unlocked during a stack unroll. If requested, ++ // install an illegal_monitor_state_exception. Continue with ++ // stack unrolling. ++ if (install_monitor_exception) { ++ // remove possible return value from FPU-stack, ++ // otherwise stack could overflow ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ ++ } ++ ++ b(unlocked); ++ } ++ ++ bind(unlock); ++ unlock_object(c_rarg0); ++ pop(state); ++ ++ // Check that for block-structured locking (i.e., that all locked ++ // objects has been unlocked) ++ bind(unlocked); ++ ++ // V0, V1: Might contain return value ++ ++ // Check that all monitors are unlocked ++ { ++ Label loop, exception, entry, restart; ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ const Address monitor_block_top(FP, ++ frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ ++ bind(restart); ++ // points to current entry, starting with top-most entry ++ ld_d(c_rarg0, monitor_block_top); ++ // points to word before bottom of monitor block ++ addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ b(entry); ++ ++ // Entry already locked, need to throw exception ++ bind(exception); ++ ++ if (throw_monitor_exception) { ++ // Throw exception ++ // remove possible return value from FPU-stack, ++ // otherwise stack could overflow ++ empty_FPU_stack(); ++ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Stack unrolling. Unlock object and install illegal_monitor_exception ++ // Unlock does not block, so don't have to worry about the frame ++ // We don't have to preserve c_rarg0, since we are going to ++ // throw an exception ++ ++ push(state); ++ unlock_object(c_rarg0); ++ pop(state); ++ ++ if (install_monitor_exception) { ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ } ++ ++ b(restart); ++ } ++ ++ bind(loop); ++ ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ bne(T1, R0, exception);// check if current entry is used ++ ++ addi_d(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry ++ bind(entry); ++ bne(c_rarg0, T3, loop); // check if bottom reached ++ } ++ ++ bind(no_unlock); ++ ++ // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame) ++ if (notify_jvmdi) { ++ notify_method_exit(state, NotifyJVMTI); // preserve TOSCA ++ } else { ++ notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA ++ } ++ ++ // remove activation ++ ld_d(TSR, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ if (StackReservedPages > 0) { ++ // testing if reserved zone needs to be re-enabled ++ Label no_reserved_zone_enabling; ++ ++ ld_d(AT, Address(thread, JavaThread::reserved_stack_activation_offset())); ++ sub_d(AT, TSR, AT); ++ bge(R0, AT, no_reserved_zone_enabling); ++ ++ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_delayed_StackOverflowError)); ++ should_not_reach_here(); ++ ++ bind(no_reserved_zone_enabling); ++ } ++ ld_d(ret_addr, FP, frame::java_frame_return_addr_offset * wordSize); ++ ld_d(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); ++ move(SP, TSR); // set sp to sender sp ++} ++ ++#endif // CC_INTERP ++ ++// Lock object ++// ++// Args: ++// c_rarg0: BasicObjectLock to be used for locking ++// ++// Kills: ++// T1 ++// T2 ++void InterpreterMacroAssembler::lock_object(Register lock_reg) { ++ assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); ++ ++ if (UseHeavyMonitors) { ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); ++ } else { ++ Label done, slow_case; ++ const Register tmp_reg = T2; ++ const Register scr_reg = T1; ++ const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); ++ const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); ++ const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); ++ ++ // Load object pointer into scr_reg ++ ld_d(scr_reg, lock_reg, obj_offset); ++ ++ if (UseBiasedLocking) { ++ // Note: we use noreg for the temporary register since it's hard ++ // to come up with a free register on all incoming code paths ++ biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case); ++ } ++ ++ // Load (object->mark() | 1) into tmp_reg ++ ld_d(AT, scr_reg, 0); ++ ori(tmp_reg, AT, 1); ++ ++ // Save (object->mark() | 1) into BasicLock's displaced header ++ st_d(tmp_reg, lock_reg, mark_offset); ++ ++ assert(lock_offset == 0, "displached header must be first word in BasicObjectLock"); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label succ, fail; ++ cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail); ++ bind(succ); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); ++ b(done); ++ bind(fail); ++ } else { ++ cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done); ++ } ++ ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) SP <= mark < SP + os::pagesize() ++ // ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in tmp_reg as the result of cmpxchg ++ sub_d(tmp_reg, tmp_reg, SP); ++ li(AT, 7 - os::vm_page_size()); ++ andr(tmp_reg, tmp_reg, AT); ++ // Save the test result, for recursive case, the result is zero ++ st_d(tmp_reg, lock_reg, mark_offset); ++ if (PrintBiasedLockingStatistics) { ++ bnez(tmp_reg, slow_case); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); ++ } ++ beqz(tmp_reg, done); ++ ++ bind(slow_case); ++ // Call the runtime routine for slow case ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); ++ ++ bind(done); ++ } ++} ++ ++// Unlocks an object. Used in monitorexit bytecode and ++// remove_activation. Throws an IllegalMonitorException if object is ++// not locked by current thread. ++// ++// Args: ++// c_rarg0: BasicObjectLock for lock ++// ++// Kills: ++// T1 ++// T2 ++// T3 ++// Throw an IllegalMonitorException if object is not locked by current thread ++void InterpreterMacroAssembler::unlock_object(Register lock_reg) { ++ assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); ++ ++ if (UseHeavyMonitors) { ++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); ++ } else { ++ Label done; ++ const Register tmp_reg = T1; ++ const Register scr_reg = T2; ++ const Register hdr_reg = T3; ++ ++ save_bcp(); // Save in case of exception ++ ++ // Convert from BasicObjectLock structure to object and BasicLock structure ++ // Store the BasicLock address into tmp_reg ++ addi_d(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes()); ++ ++ // Load oop into scr_reg ++ ld_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); ++ // free entry ++ st_d(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes()); ++ if (UseBiasedLocking) { ++ biased_locking_exit(scr_reg, hdr_reg, done); ++ } ++ ++ // Load the old header from BasicLock structure ++ ld_d(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes()); ++ // zero for recursive case ++ beqz(hdr_reg, done); ++ ++ // Atomic swap back the old header ++ cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done); ++ ++ // Call the runtime routine for slow case. ++ st_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj ++ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), ++ lock_reg); ++ ++ bind(done); ++ ++ restore_bcp(); ++ } ++} ++ ++#ifndef CC_INTERP ++ ++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, ++ Label& zero_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ ld_d(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++ beq(mdp, R0, zero_continue); ++} ++ ++ ++// Set the method data pointer for the current bcp. ++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Label set_mdp; ++ ++ // V0 and T0 will be used as two temporary registers. ++ push2(V0, T0); ++ ++ get_method(T0); ++ // Test MDO to avoid the call if it is NULL. ++ ld_d(V0, T0, in_bytes(Method::method_data_offset())); ++ beq(V0, R0, set_mdp); ++ ++ // method: T0 ++ // bcp: BCP --> S0 ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP); ++ // mdi: V0 ++ // mdo is guaranteed to be non-zero here, we checked for it before the call. ++ get_method(T0); ++ ld_d(T0, T0, in_bytes(Method::method_data_offset())); ++ addi_d(T0, T0, in_bytes(MethodData::data_offset())); ++ add_d(V0, T0, V0); ++ bind(set_mdp); ++ st_d(V0, FP, frame::interpreter_frame_mdp_offset * wordSize); ++ pop2(V0, T0); ++} ++ ++void InterpreterMacroAssembler::verify_method_data_pointer() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++#ifdef ASSERT ++ Label verify_continue; ++ Register method = T5; ++ Register mdp = T6; ++ Register tmp = A0; ++ push(method); ++ push(mdp); ++ push(tmp); ++ test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue ++ get_method(method); ++ ++ // If the mdp is valid, it will point to a DataLayout header which is ++ // consistent with the bcp. The converse is highly probable also. ++ ld_hu(tmp, mdp, in_bytes(DataLayout::bci_offset())); ++ ld_d(AT, method, in_bytes(Method::const_offset())); ++ add_d(tmp, tmp, AT); ++ addi_d(tmp, tmp, in_bytes(ConstMethod::codes_offset())); ++ beq(tmp, BCP, verify_continue); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp); ++ bind(verify_continue); ++ pop(tmp); ++ pop(mdp); ++ pop(method); ++#endif // ASSERT ++} ++ ++ ++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, ++ int constant, ++ Register value) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Address data(mdp_in, constant); ++ st_d(value, data); ++} ++ ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ int constant, ++ bool decrement) { ++ // Counter address ++ Address data(mdp_in, constant); ++ ++ increment_mdp_data_at(data, decrement); ++} ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Address data, ++ bool decrement) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ // %%% this does 64bit counters at best it is wasting space ++ // at worst it is a rare bug when counters overflow ++ Register tmp = S0; ++ push(tmp); ++ if (decrement) { ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Decrement the register. ++ ld_d(AT, data); ++ sltu(tmp, R0, AT); ++ sub_d(AT, AT, tmp); ++ st_d(AT, data); ++ } else { ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Increment the register. ++ ld_d(AT, data); ++ addi_d(tmp, AT, DataLayout::counter_increment); ++ sltu(tmp, R0, tmp); ++ add_d(AT, AT, tmp); ++ st_d(AT, data); ++ } ++ pop(tmp); ++} ++ ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ Register reg, ++ int constant, ++ bool decrement) { ++ Register tmp = S0; ++ push(tmp); ++ if (decrement) { ++ assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !"); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Decrement the register. ++ add_d(tmp, mdp_in, reg); ++ ld_d(AT, tmp, constant); ++ sltu(tmp, R0, AT); ++ sub_d(AT, AT, tmp); ++ add_d(tmp, mdp_in, reg); ++ st_d(AT, tmp, constant); ++ } else { ++ assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !"); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Increment the register. ++ add_d(tmp, mdp_in, reg); ++ ld_d(AT, tmp, constant); ++ addi_d(tmp, AT, DataLayout::counter_increment); ++ sltu(tmp, R0, tmp); ++ add_d(AT, AT, tmp); ++ add_d(tmp, mdp_in, reg); ++ st_d(AT, tmp, constant); ++ } ++ pop(tmp); ++} ++ ++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, ++ int flag_byte_constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ int header_offset = in_bytes(DataLayout::header_offset()); ++ int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant); ++ // Set the flag ++ ld_w(AT, Address(mdp_in, header_offset)); ++ if(Assembler::is_simm(header_bits, 12)) { ++ ori(AT, AT, header_bits); ++ } else { ++ push(T8); ++ // T8 is used as a temporary register. ++ li(T8, header_bits); ++ orr(AT, AT, T8); ++ pop(T8); ++ } ++ st_w(AT, Address(mdp_in, header_offset)); ++} ++ ++ ++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, ++ int offset, ++ Register value, ++ Register test_value_out, ++ Label& not_equal_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if (test_value_out == noreg) { ++ ld_d(AT, Address(mdp_in, offset)); ++ bne(AT, value, not_equal_continue); ++ } else { ++ // Put the test value into a register, so caller can use it: ++ ld_d(test_value_out, Address(mdp_in, offset)); ++ bne(value, test_value_out, not_equal_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12"); ++ ld_d(AT, mdp_in, offset_of_disp); ++ add_d(mdp_in, mdp_in, AT); ++ st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ Register reg, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ add_d(AT, reg, mdp_in); ++ assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12"); ++ ld_d(AT, AT, offset_of_disp); ++ add_d(mdp_in, mdp_in, AT); ++ st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, ++ int constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if(Assembler::is_simm(constant, 12)) { ++ addi_d(mdp_in, mdp_in, constant); ++ } else { ++ li(AT, constant); ++ add_d(mdp_in, mdp_in, AT); ++ } ++ st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ push(return_bci); // save/restore across call_VM ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), ++ return_bci); ++ pop(return_bci); ++} ++ ++ ++void InterpreterMacroAssembler::profile_taken_branch(Register mdp, ++ Register bumped_count) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ // Otherwise, assign to mdp ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the taken count. ++ // We inline increment_mdp_data_at to return bumped_count in a register ++ //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset())); ++ ld_d(bumped_count, mdp, in_bytes(JumpData::taken_offset())); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ addi_d(AT, bumped_count, DataLayout::counter_increment); ++ sltu(AT, R0, AT); ++ add_d(bumped_count, bumped_count, AT); ++ st_d(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the not taken count. ++ increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); ++ ++ // The method data pointer needs to be updated to correspond to ++ // the next bytecode ++ update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_final_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_virtual_call(Register receiver, ++ Register mdp, ++ Register reg2, ++ bool receiver_can_be_null) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ Label skip_receiver_profile; ++ if (receiver_can_be_null) { ++ Label not_null; ++ bnez(receiver, not_null); ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ b(skip_receiver_profile); ++ bind(not_null); ++ } ++ ++ // Record the receiver type. ++ record_klass_in_profile(receiver, mdp, reg2, true); ++ bind(skip_receiver_profile); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++#if INCLUDE_JVMCI ++void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) { ++ assert_different_registers(method, mdp, reg2); ++ if (ProfileInterpreter && MethodProfileWidth > 0) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ Label done; ++ record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth, ++ &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset())); ++ bind(done); ++ ++ update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++#endif // INCLUDE_JVMCI ++ ++// This routine creates a state machine for updating the multi-row ++// type profile at a virtual call site (or other type-sensitive bytecode). ++// The machine visits each row (of receiver/count) until the receiver type ++// is found, or until it runs out of rows. At the same time, it remembers ++// the location of the first empty row. (An empty row records null for its ++// receiver, and can be allocated for a newly-observed receiver type.) ++// Because there are two degrees of freedom in the state, a simple linear ++// search will not work; it must be a decision tree. Hence this helper ++// function is recursive, to generate the required tree structured code. ++// It's the interpreter, so we are trading off code space for speed. ++// See below for example code. ++void InterpreterMacroAssembler::record_klass_in_profile_helper( ++ Register receiver, Register mdp, ++ Register reg2, int start_row, ++ Label& done, bool is_virtual_call) { ++ if (TypeProfileWidth == 0) { ++ if (is_virtual_call) { ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ } ++#if INCLUDE_JVMCI ++ else if (EnableJVMCI) { ++ increment_mdp_data_at(mdp, in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset())); ++ } ++#endif // INCLUDE_JVMCI ++ } else { ++ int non_profiled_offset = -1; ++ if (is_virtual_call) { ++ non_profiled_offset = in_bytes(CounterData::count_offset()); ++ } ++#if INCLUDE_JVMCI ++ else if (EnableJVMCI) { ++ non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()); ++ } ++#endif // INCLUDE_JVMCI ++ ++ record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth, ++ &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset); ++ } ++} ++ ++void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, Register mdp, ++ Register reg2, int start_row, Label& done, int total_rows, ++ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, ++ int non_profiled_offset) { ++ int last_row = total_rows - 1; ++ assert(start_row <= last_row, "must be work left to do"); ++ // Test this row for both the item and for null. ++ // Take any of three different outcomes: ++ // 1. found item => increment count and goto done ++ // 2. found null => keep looking for case 1, maybe allocate this cell ++ // 3. found something else => keep looking for cases 1 and 2 ++ // Case 3 is handled by a recursive call. ++ for (int row = start_row; row <= last_row; row++) { ++ Label next_test; ++ bool test_for_null_also = (row == start_row); ++ ++ // See if the receiver is item[n]. ++ int item_offset = in_bytes(item_offset_fn(row)); ++ test_mdp_data_at(mdp, item_offset, item, ++ (test_for_null_also ? reg2 : noreg), ++ next_test); ++ // (Reg2 now contains the item from the CallData.) ++ ++ // The receiver is item[n]. Increment count[n]. ++ int count_offset = in_bytes(item_count_offset_fn(row)); ++ increment_mdp_data_at(mdp, count_offset); ++ b(done); ++ bind(next_test); ++ ++ if (test_for_null_also) { ++ Label found_null; ++ // Failed the equality check on item[n]... Test for null. ++ if (start_row == last_row) { ++ // The only thing left to do is handle the null case. ++ if (non_profiled_offset >= 0) { ++ beqz(reg2, found_null); ++ // Item did not match any saved item and there is no empty row for it. ++ // Increment total counter to indicate polymorphic case. ++ increment_mdp_data_at(mdp, non_profiled_offset); ++ b(done); ++ bind(found_null); ++ } else { ++ bnez(reg2, done); ++ } ++ break; ++ } ++ // Since null is rare, make it be the branch-taken case. ++ beqz(reg2, found_null); ++ ++ // Put all the "Case 3" tests here. ++ record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows, ++ item_offset_fn, item_count_offset_fn, non_profiled_offset); ++ ++ // Found a null. Keep searching for a matching item, ++ // but remember that this is an empty (unused) slot. ++ bind(found_null); ++ } ++ } ++ ++ // In the fall-through case, we found no matching item, but we ++ // observed the item[start_row] is NULL. ++ ++ // Fill in the item field and increment the count. ++ int item_offset = in_bytes(item_offset_fn(start_row)); ++ set_mdp_data_at(mdp, item_offset, item); ++ int count_offset = in_bytes(item_count_offset_fn(start_row)); ++ li(reg2, DataLayout::counter_increment); ++ set_mdp_data_at(mdp, count_offset, reg2); ++ if (start_row > 0) { ++ b(done); ++ } ++} ++ ++// Example state machine code for three profile rows: ++// // main copy of decision tree, rooted at row[1] ++// if (row[0].rec == rec) { row[0].incr(); goto done; } ++// if (row[0].rec != NULL) { ++// // inner copy of decision tree, rooted at row[1] ++// if (row[1].rec == rec) { row[1].incr(); goto done; } ++// if (row[1].rec != NULL) { ++// // degenerate decision tree, rooted at row[2] ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// if (row[2].rec != NULL) { goto done; } // overflow ++// row[2].init(rec); goto done; ++// } else { ++// // remember row[1] is empty ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// row[1].init(rec); goto done; ++// } ++// } else { ++// // remember row[0] is empty ++// if (row[1].rec == rec) { row[1].incr(); goto done; } ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// row[0].init(rec); goto done; ++// } ++// done: ++ ++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, ++ Register mdp, Register reg2, ++ bool is_virtual_call) { ++ assert(ProfileInterpreter, "must be profiling"); ++ Label done; ++ ++ record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call); ++ ++ bind (done); ++} ++ ++void InterpreterMacroAssembler::profile_ret(Register return_bci, ++ Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ uint row; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the total ret count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ for (row = 0; row < RetData::row_limit(); row++) { ++ Label next_test; ++ ++ // See if return_bci is equal to bci[n]: ++ test_mdp_data_at(mdp, ++ in_bytes(RetData::bci_offset(row)), ++ return_bci, noreg, ++ next_test); ++ ++ // return_bci is equal to bci[n]. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, ++ in_bytes(RetData::bci_displacement_offset(row))); ++ b(profile_continue); ++ bind(next_test); ++ } ++ ++ update_mdp_for_ret(return_bci); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_null_seen(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { ++ if (ProfileInterpreter && TypeProfileCasts) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int count_offset = in_bytes(CounterData::count_offset()); ++ // Back up the address, since we have already bumped the mdp. ++ count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // *Decrement* the counter. We expect to see zero or small negatives. ++ increment_mdp_data_at(mdp, count_offset, true); ++ ++ bind (profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // Record the object type. ++ record_klass_in_profile(klass, mdp, reg2, false); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_switch_default(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the default case count ++ increment_mdp_data_at(mdp, ++ in_bytes(MultiBranchData::default_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ in_bytes(MultiBranchData:: ++ default_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_switch_case(Register index, ++ Register mdp, ++ Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Build the base (index * per_case_size_in_bytes()) + ++ // case_array_offset_in_bytes() ++ li(reg2, in_bytes(MultiBranchData::per_case_size())); ++ mul_d(index, index, reg2); ++ addi_d(index, index, in_bytes(MultiBranchData::case_array_offset())); ++ ++ // Update the case count ++ increment_mdp_data_at(mdp, ++ index, ++ in_bytes(MultiBranchData::relative_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ index, ++ in_bytes(MultiBranchData:: ++ relative_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::narrow(Register result) { ++ // Get method->_constMethod->_result_type ++ ld_d(T4, FP, frame::interpreter_frame_method_offset * wordSize); ++ ld_d(T4, T4, in_bytes(Method::const_offset())); ++ ld_bu(T4, T4, in_bytes(ConstMethod::result_type_offset())); ++ ++ Label done, notBool, notByte, notChar; ++ ++ // common case first ++ addi_d(AT, T4, -T_INT); ++ beq(AT, R0, done); ++ ++ // mask integer result to narrower return type. ++ addi_d(AT, T4, -T_BOOLEAN); ++ bne(AT, R0, notBool); ++ andi(result, result, 0x1); ++ beq(R0, R0, done); ++ ++ bind(notBool); ++ addi_d(AT, T4, -T_BYTE); ++ bne(AT, R0, notByte); ++ ext_w_b(result, result); ++ beq(R0, R0, done); ++ ++ bind(notByte); ++ addi_d(AT, T4, -T_CHAR); ++ bne(AT, R0, notChar); ++ bstrpick_d(result, result, 15, 0); ++ beq(R0, R0, done); ++ ++ bind(notChar); ++ ext_w_h(result, result); ++ ++ // Nothing to do for T_INT ++ bind(done); ++} ++ ++ ++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { ++ Label update, next, none; ++ ++ verify_oop(obj); ++ ++ if (mdo_addr.index() != noreg) { ++ guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !"); ++ guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !"); ++ push(T0); ++ alsl_d(T0, mdo_addr.index(), mdo_addr.base(), mdo_addr.scale() - 1); ++ } ++ ++ bnez(obj, update); ++ ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ ori(AT, AT, TypeEntries::null_seen); ++ if (mdo_addr.index() == noreg) { ++ st_d(AT, mdo_addr); ++ } else { ++ st_d(AT, T0, mdo_addr.disp()); ++ } ++ ++ b(next); ++ ++ bind(update); ++ load_klass(obj, obj); ++ ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ xorr(obj, obj, AT); ++ ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ bstrpick_d(AT, obj, 63, 2); ++ beqz(AT, next); ++ ++ andi(AT, obj, TypeEntries::type_unknown); ++ bnez(AT, next); ++ ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ beqz(AT, none); ++ ++ addi_d(AT, AT, -(TypeEntries::null_seen)); ++ beqz(AT, none); ++ ++ // There is a chance that the checks above (re-reading profiling ++ // data from memory) fail if another thread has just set the ++ // profiling to this obj's klass ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ xorr(obj, obj, AT); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ bstrpick_d(AT, obj, 63, 2); ++ beqz(AT, next); ++ ++ // different than before. Cannot keep accurate profile. ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ ori(AT, AT, TypeEntries::type_unknown); ++ if (mdo_addr.index() == noreg) { ++ st_d(AT, mdo_addr); ++ } else { ++ st_d(AT, T0, mdo_addr.disp()); ++ } ++ b(next); ++ ++ bind(none); ++ // first time here. Set profile type. ++ if (mdo_addr.index() == noreg) { ++ st_d(obj, mdo_addr); ++ } else { ++ st_d(obj, T0, mdo_addr.disp()); ++ } ++ ++ bind(next); ++ if (mdo_addr.index() != noreg) { ++ pop(T0); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { ++ if (!ProfileInterpreter) { ++ return; ++ } ++ ++ if (MethodData::profile_arguments() || MethodData::profile_return()) { ++ Label profile_continue; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); ++ ++ ld_b(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start); ++ li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag); ++ bne(tmp, AT, profile_continue); ++ ++ ++ if (MethodData::profile_arguments()) { ++ Label done; ++ int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); ++ if (Assembler::is_simm(off_to_args, 12)) { ++ addi_d(mdp, mdp, off_to_args); ++ } else { ++ li(AT, off_to_args); ++ add_d(mdp, mdp, AT); ++ } ++ ++ ++ for (int i = 0; i < TypeProfileArgsLimit; i++) { ++ if (i > 0 || MethodData::profile_return()) { ++ // If return value type is profiled we may have no argument to profile ++ ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); ++ ++ if (Assembler::is_simm(-1 * i * TypeStackSlotEntries::per_arg_count(), 12)) { ++ addi_w(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count()); ++ } else { ++ li(AT, i*TypeStackSlotEntries::per_arg_count()); ++ sub_w(tmp, tmp, AT); ++ } ++ ++ li(AT, TypeStackSlotEntries::per_arg_count()); ++ blt(tmp, AT, done); ++ } ++ ld_d(tmp, callee, in_bytes(Method::const_offset())); ++ ++ ld_hu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // stack offset o (zero based) from the start of the argument ++ // list, for n arguments translates into offset n - o - 1 from ++ // the end of the argument list ++ ld_d(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args); ++ sub_d(tmp, tmp, AT); ++ ++ addi_w(tmp, tmp, -1); ++ ++ Address arg_addr = argument_address(tmp); ++ ld_d(tmp, arg_addr); ++ ++ Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); ++ profile_obj_type(tmp, mdo_arg_addr); ++ ++ int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); ++ if (Assembler::is_simm(to_add, 12)) { ++ addi_d(mdp, mdp, to_add); ++ } else { ++ li(AT, to_add); ++ add_d(mdp, mdp, AT); ++ } ++ ++ off_to_args += to_add; ++ } ++ ++ if (MethodData::profile_return()) { ++ ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); ++ ++ int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(); ++ if (Assembler::is_simm(-1 * tmp_arg_counts, 12)) { ++ addi_w(tmp, tmp, -1 * tmp_arg_counts); ++ } else { ++ li(AT, tmp_arg_counts); ++ sub_w(mdp, mdp, AT); ++ } ++ } ++ ++ bind(done); ++ ++ if (MethodData::profile_return()) { ++ // We're right after the type profile for the last ++ // argument. tmp is the number of cells left in the ++ // CallTypeData/VirtualCallTypeData to reach its end. Non null ++ // if there's a return to profile. ++ assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); ++ slli_w(tmp, tmp, exact_log2(DataLayout::cell_size)); ++ add_d(mdp, mdp, tmp); ++ } ++ st_d(mdp, FP, frame::interpreter_frame_mdp_offset * wordSize); ++ } else { ++ assert(MethodData::profile_return(), "either profile call args or call ret"); ++ update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); ++ } ++ ++ // mdp points right after the end of the ++ // CallTypeData/VirtualCallTypeData, right after the cells for the ++ // return value type if there's one ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { ++ assert_different_registers(mdp, ret, tmp, _bcp_register); ++ if (ProfileInterpreter && MethodData::profile_return()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ if (MethodData::profile_return_jsr292_only()) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++ ++ // If we don't profile all invoke bytecodes we must make sure ++ // it's a bytecode we indeed profile. We can't go back to the ++ // begining of the ProfileData we intend to update to check its ++ // type because we're right after it and we don't known its ++ // length ++ Label do_profile; ++ ld_b(tmp, _bcp_register, 0); ++ addi_d(AT, tmp, -1 * Bytecodes::_invokedynamic); ++ beqz(AT, do_profile); ++ addi_d(AT, tmp, -1 * Bytecodes::_invokehandle); ++ beqz(AT, do_profile); ++ ++ get_method(tmp); ++ ld_hu(tmp, tmp, Method::intrinsic_id_offset_in_bytes()); ++ li(AT, vmIntrinsics::_compiledLambdaForm); ++ bne(tmp, AT, profile_continue); ++ ++ bind(do_profile); ++ } ++ ++ Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); ++ add_d(tmp, ret, R0); ++ profile_obj_type(tmp, mdo_ret_addr); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) { ++ guarantee(T4 == tmp1, "You are reqired to use T4 as the index register for LoongArch !"); ++ ++ if (ProfileInterpreter && MethodData::profile_parameters()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Load the offset of the area within the MDO used for ++ // parameters. If it's negative we're not profiling any parameters ++ ld_w(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())); ++ blt(tmp1, R0, profile_continue); ++ ++ // Compute a pointer to the area for parameters from the offset ++ // and move the pointer to the slot for the last ++ // parameters. Collect profiling from last parameter down. ++ // mdo start + parameters offset + array length - 1 ++ add_d(mdp, mdp, tmp1); ++ ld_d(tmp1, mdp, in_bytes(ArrayData::array_len_offset())); ++ decrement(tmp1, TypeStackSlotEntries::per_arg_count()); ++ ++ ++ Label loop; ++ bind(loop); ++ ++ int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); ++ int type_base = in_bytes(ParametersTypeData::type_offset(0)); ++ Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size); ++ Address arg_type(mdp, tmp1, per_arg_scale, type_base); ++ ++ // load offset on the stack from the slot for this parameter ++ alsl_d(AT, tmp1, mdp, per_arg_scale - 1); ++ ld_d(tmp2, AT, off_base); ++ ++ sub_d(tmp2, R0, tmp2); ++ ++ // read the parameter from the local area ++ slli_d(AT, tmp2, Interpreter::logStackElementSize); ++ ldx_d(tmp2, AT, _locals_register); ++ ++ // profile the parameter ++ profile_obj_type(tmp2, arg_type); ++ ++ // go to next parameter ++ decrement(tmp1, TypeStackSlotEntries::per_arg_count()); ++ blt(R0, tmp1, loop); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) { ++ if (state == atos) { ++ MacroAssembler::verify_oop(reg); ++ } ++} ++ ++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ++} ++#endif // !CC_INTERP ++ ++ ++void InterpreterMacroAssembler::notify_method_entry() { ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ Register tempreg = T0; ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label L; ++ ld_w(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(tempreg, R0, L); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_method_entry)); ++ bind(L); ++ } ++ ++ { ++ SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); ++ get_method(S3); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ //Rthread, ++ thread, ++ //Rmethod); ++ S3); ++ } ++} ++ ++void InterpreterMacroAssembler::notify_method_exit( ++ TosState state, NotifyMethodExitMode mode) { ++ Register tempreg = T0; ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { ++ Label skip; ++ // Note: frame::interpreter_frame_result has a dependency on how the ++ // method result is saved across the call to post_method_exit. If this ++ // is changed then the interpreter_frame_result implementation will ++ // need to be updated too. ++ ++ // template interpreter will leave it on the top of the stack. ++ push(state); ++ ld_w(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(tempreg, R0, skip); ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); ++ bind(skip); ++ pop(state); ++ } ++ ++ { ++ // Dtrace notification ++ SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); ++ push(state); ++ get_method(S3); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ //Rthread, Rmethod); ++ thread, S3); ++ pop(state); ++ } ++} ++ ++// Jump if ((*counter_addr += increment) & mask) satisfies the condition. ++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, ++ int increment, int mask, ++ Register scratch, bool preloaded, ++ Condition cond, Label* where) { ++ assert_different_registers(scratch, AT); ++ ++ if (!preloaded) { ++ ld_w(scratch, counter_addr); ++ } ++ addi_w(scratch, scratch, increment); ++ st_w(scratch, counter_addr); ++ ++ li(AT, mask); ++ andr(scratch, scratch, AT); ++ ++ if (cond == Assembler::zero) { ++ beq(scratch, R0, *where); ++ } else { ++ unimplemented(); ++ } ++} +diff --git a/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp b/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp +new file mode 100644 +index 0000000000..d53d951a16 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp +@@ -0,0 +1,62 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP ++#define CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP ++ ++// This is included in the middle of class Interpreter. ++// Do not include files here. ++ ++// native method calls ++ ++class SignatureHandlerGenerator: public NativeSignatureIterator { ++ private: ++ MacroAssembler* _masm; ++ unsigned int _num_fp_args; ++ unsigned int _num_int_args; ++ int _stack_offset; ++ ++ void move(int from_offset, int to_offset); ++ void box(int from_offset, int to_offset); ++ void pass_int(); ++ void pass_long(); ++ void pass_object(); ++ void pass_float(); ++ void pass_double(); ++ ++ public: ++ // Creation ++ SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); ++ ++ // Code generation ++ void generate(uint64_t fingerprint); ++ ++ // Code generation support ++ static Register from(); ++ static Register to(); ++ static Register temp(); ++}; ++ ++#endif // CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp b/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp +new file mode 100644 +index 0000000000..e2f31997b7 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp +@@ -0,0 +1,273 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "memory/universe.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/signature.hpp" ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++// Implementation of SignatureHandlerGenerator ++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( ++ const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) { ++ _masm = new MacroAssembler(buffer); ++ _num_int_args = (method->is_static() ? 1 : 0); ++ _num_fp_args = 0; ++ _stack_offset = 0; ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) { ++ __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(from_offset)); ++ __ st_d(temp(), to(), to_offset * longSize); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) { ++ __ addi_d(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) ); ++ __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(from_offset) ); ++ ++ __ maskeqz(temp(), temp(), AT); ++ __ st_w(temp(), to(), to_offset * wordSize); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { ++ // generate code to handle arguments ++ iterate(fingerprint); ++ // return result handler ++ __ li(V0, AbstractInterpreter::result_handler(method()->result_type())); ++ // return ++ __ jr(RA); ++ ++ __ flush(); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ __ ld_w(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ st_w(AT, to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2. ++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ __ ld_d(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else { ++ __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ __ st_d(AT, to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ Register reg = as_Register(++_num_int_args + A0->encoding()); ++ if (_num_int_args == 1) { ++ assert(offset() == 0, "argument register 1 can only be (non-null) receiver"); ++ __ addi_d(reg, from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ ld_d(reg, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ maskeqz(reg, AT, reg); ++ } ++ } else { ++ __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(offset())); ++ __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ maskeqz(temp(), AT, temp()); ++ __ st_d(temp(), to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { ++ if (_num_fp_args < Argument::n_float_register_parameters) { ++ __ fld_s(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else if (_num_int_args < Argument::n_register_parameters - 1) { ++ __ ld_w(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ st_w(AT, to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2. ++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { ++ if (_num_fp_args < Argument::n_float_register_parameters) { ++ __ fld_d(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else if (_num_int_args < Argument::n_register_parameters - 1) { ++ __ ld_d(as_Register(++_num_int_args + A0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else { ++ __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ __ st_d(AT, to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++ ++Register InterpreterRuntime::SignatureHandlerGenerator::from() { return LVP; } ++Register InterpreterRuntime::SignatureHandlerGenerator::to() { return SP; } ++Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return T8; } ++ ++// Implementation of SignatureHandlerLibrary ++ ++void SignatureHandlerLibrary::pd_set_handler(address handler) {} ++ ++ ++class SlowSignatureHandler ++ : public NativeSignatureIterator { ++ private: ++ address _from; ++ intptr_t* _to; ++ intptr_t* _int_args; ++ intptr_t* _fp_args; ++ intptr_t* _fp_identifiers; ++ unsigned int _num_int_args; ++ unsigned int _num_fp_args; ++ ++ virtual void pass_int() ++ { ++ jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = from_obj; ++ _num_int_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_long() ++ { ++ intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); ++ _from -= 2 * Interpreter::stackElementSize; ++ ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = from_obj; ++ _num_int_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_object() ++ { ++ intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; ++ _num_int_args++; ++ } else { ++ *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; ++ } ++ } ++ ++ virtual void pass_float() ++ { ++ jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_fp_args < Argument::n_float_register_parameters) { ++ *_fp_args++ = from_obj; ++ _num_fp_args++; ++ } else if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = from_obj; ++ _num_int_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_double() ++ { ++ intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); ++ _from -= 2*Interpreter::stackElementSize; ++ ++ if (_num_fp_args < Argument::n_float_register_parameters) { ++ *_fp_args++ = from_obj; ++ *_fp_identifiers |= (1 << _num_fp_args); // mark as double ++ _num_fp_args++; ++ } else if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = from_obj; ++ _num_int_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ public: ++ SlowSignatureHandler(methodHandle method, address from, intptr_t* to) ++ : NativeSignatureIterator(method) ++ { ++ _from = from; ++ _to = to; ++ ++ // see TemplateInterpreterGenerator::generate_slow_signature_handler() ++ _int_args = to - (method->is_static() ? 15 : 16); ++ _fp_args = to - 8; ++ _fp_identifiers = to - 9; ++ *(int*) _fp_identifiers = 0; ++ _num_int_args = (method->is_static() ? 1 : 0); ++ _num_fp_args = 0; ++ } ++}; ++ ++ ++IRT_ENTRY(address, ++ InterpreterRuntime::slow_signature_handler(JavaThread* thread, ++ Method* method, ++ intptr_t* from, ++ intptr_t* to)) ++ methodHandle m(thread, (Method*)method); ++ assert(m->is_native(), "sanity check"); ++ ++ // handle arguments ++ SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1)); ++ ++ // return result handler ++ return Interpreter::result_handler(m->result_type()); ++IRT_END +diff --git a/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp b/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp +new file mode 100644 +index 0000000000..6814fa44a0 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp +@@ -0,0 +1,87 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP ++#define CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP ++ ++private: ++ ++ // FP value associated with _last_Java_sp: ++ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to ++ ++public: ++ // Each arch must define reset, save, restore ++ // These are used by objects that only care about: ++ // 1 - initializing a new state (thread creation, javaCalls) ++ // 2 - saving a current state (javaCalls) ++ // 3 - restoring an old state (javaCalls) ++ ++ void clear(void) { ++ // clearing _last_Java_sp must be first ++ _last_Java_sp = NULL; ++ // fence? ++ _last_Java_fp = NULL; ++ _last_Java_pc = NULL; ++ } ++ ++ void copy(JavaFrameAnchor* src) { ++ // In order to make sure the transition state is valid for "this" ++ // We must clear _last_Java_sp before copying the rest of the new data ++ // ++ // Hack Alert: Temporary bugfix for 4717480/4721647 ++ // To act like previous version (pd_cache_state) don't NULL _last_Java_sp ++ // unless the value is changing ++ // ++ if (_last_Java_sp != src->_last_Java_sp) ++ _last_Java_sp = NULL; ++ ++ _last_Java_fp = src->_last_Java_fp; ++ _last_Java_pc = src->_last_Java_pc; ++ // Must be last so profiler will always see valid frame if has_last_frame() is true ++ _last_Java_sp = src->_last_Java_sp; ++ } ++ ++ // Always walkable ++ bool walkable(void) { return true; } ++ // Never any thing to do since we are always walkable and can find address of return addresses ++ void make_walkable(JavaThread* thread) { } ++ ++ intptr_t* last_Java_sp(void) const { return _last_Java_sp; } ++ ++ address last_Java_pc(void) { return _last_Java_pc; } ++ ++private: ++ ++ static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } ++ ++public: ++ ++ void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; } ++ ++ intptr_t* last_Java_fp(void) { return _last_Java_fp; } ++ // Assert (last_Java_sp == NULL || fp == NULL) ++ void set_last_Java_fp(intptr_t* fp) { _last_Java_fp = fp; } ++ ++#endif // CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp b/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp +new file mode 100644 +index 0000000000..dbcdb7a6a4 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp +@@ -0,0 +1,166 @@ ++/* ++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/codeBlob.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "runtime/safepoint.hpp" ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++#define BUFFER_SIZE 30*wordSize ++ ++// Instead of issuing lfence for LoadLoad barrier, we create data dependency ++// between loads, which is more efficient than lfence. ++ ++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { ++ const char *name = NULL; ++ switch (type) { ++ case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; ++ case T_BYTE: name = "jni_fast_GetByteField"; break; ++ case T_CHAR: name = "jni_fast_GetCharField"; break; ++ case T_SHORT: name = "jni_fast_GetShortField"; break; ++ case T_INT: name = "jni_fast_GetIntField"; break; ++ case T_LONG: name = "jni_fast_GetLongField"; break; ++ case T_FLOAT: name = "jni_fast_GetFloatField"; break; ++ case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; ++ default: ShouldNotReachHere(); ++ } ++ ResourceMark rm; ++ BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); ++ CodeBuffer cbuf(blob); ++ MacroAssembler* masm = new MacroAssembler(&cbuf); ++ address fast_entry = __ pc(); ++ ++ Label slow; ++ ++ // return pc RA ++ // jni env A0 ++ // obj A1 ++ // jfieldID A2 ++ ++ address counter_addr = SafepointSynchronize::safepoint_counter_addr(); ++ __ li(AT, (long)counter_addr); ++ __ ld_w(T1, AT, 0); ++ ++ // Parameters(A0~A3) should not be modified, since they will be used in slow path ++ __ andi(AT, T1, 1); ++ __ bne(AT, R0, slow); ++ ++ __ move(T0, A1); ++ // Both T0 and T4 are clobbered by try_resolve_jobject_in_native. ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->try_resolve_jobject_in_native(masm, /* jni_env */ A0, T0, T4, slow); ++ ++ __ srli_d(T2, A2, 2); // offset ++ __ add_d(T0, T0, T2); ++ ++ __ li(AT, (long)counter_addr); ++ __ ld_w(AT, AT, 0); ++ __ bne(T1, AT, slow); ++ ++ assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); ++ speculative_load_pclist[count] = __ pc(); ++ switch (type) { ++ case T_BOOLEAN: __ ld_bu (V0, T0, 0); break; ++ case T_BYTE: __ ld_b (V0, T0, 0); break; ++ case T_CHAR: __ ld_hu (V0, T0, 0); break; ++ case T_SHORT: __ ld_h (V0, T0, 0); break; ++ case T_INT: __ ld_w (V0, T0, 0); break; ++ case T_LONG: __ ld_d (V0, T0, 0); break; ++ case T_FLOAT: __ fld_s (F0, T0, 0); break; ++ case T_DOUBLE: __ fld_d (F0, T0, 0); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ jr(RA); ++ ++ slowcase_entry_pclist[count++] = __ pc(); ++ __ bind (slow); ++ address slow_case_addr = NULL; ++ switch (type) { ++ case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; ++ case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; ++ case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; ++ case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; ++ case T_INT: slow_case_addr = jni_GetIntField_addr(); break; ++ case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; ++ case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; ++ case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; ++ default: ShouldNotReachHere(); ++ } ++ __ jmp(slow_case_addr); ++ ++ __ flush (); ++ ++ return fast_entry; ++} ++ ++address JNI_FastGetField::generate_fast_get_boolean_field() { ++ return generate_fast_get_int_field0(T_BOOLEAN); ++} ++ ++address JNI_FastGetField::generate_fast_get_byte_field() { ++ return generate_fast_get_int_field0(T_BYTE); ++} ++ ++address JNI_FastGetField::generate_fast_get_char_field() { ++ return generate_fast_get_int_field0(T_CHAR); ++} ++ ++address JNI_FastGetField::generate_fast_get_short_field() { ++ return generate_fast_get_int_field0(T_SHORT); ++} ++ ++address JNI_FastGetField::generate_fast_get_int_field() { ++ return generate_fast_get_int_field0(T_INT); ++} ++ ++address JNI_FastGetField::generate_fast_get_long_field() { ++ return generate_fast_get_int_field0(T_LONG); ++} ++ ++address JNI_FastGetField::generate_fast_get_float_field() { ++ return generate_fast_get_int_field0(T_FLOAT); ++} ++ ++address JNI_FastGetField::generate_fast_get_double_field() { ++ return generate_fast_get_int_field0(T_DOUBLE); ++} +diff --git a/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp b/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp +new file mode 100644 +index 0000000000..b281f86372 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp +@@ -0,0 +1,144 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP ++#define CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP ++ ++#include "jni.h" ++#include "memory/allocation.hpp" ++#include "oops/oop.hpp" ++ ++// This file holds platform-dependent routines used to write primitive jni ++// types to the array of arguments passed into JavaCalls::call ++ ++class JNITypes : AllStatic { ++ // These functions write a java primitive type (in native format) ++ // to a java stack slot array to be passed as an argument to JavaCalls:calls. ++ // I.e., they are functionally 'push' operations if they have a 'pos' ++ // formal parameter. Note that jlong's and jdouble's are written ++ // _in reverse_ of the order in which they appear in the interpreter ++ // stack. This is because call stubs (see stubGenerator_sparc.cpp) ++ // reverse the argument list constructed by JavaCallArguments (see ++ // javaCalls.hpp). ++ ++private: ++ ++ // 32bit Helper routines. ++ static inline void put_int2r(jint *from, intptr_t *to) { *(jint *)(to++) = from[1]; ++ *(jint *)(to ) = from[0]; } ++ static inline void put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; } ++ ++public: ++ // In LOOGNARCH64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[] ++ // is 8 bytes. ++ // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values. ++ // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded. ++ // This error occurs in ReflectInvoke.java ++ // The parameter of DD(int) should be 4 instead of 0x550000004. ++ // ++ // See: [runtime/javaCalls.hpp] ++ ++ static inline void put_int(jint from, intptr_t *to) { *(intptr_t *)(to + 0 ) = from; } ++ static inline void put_int(jint from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = from; } ++ static inline void put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; } ++ ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to). ++ // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), ++ // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. ++ static inline void put_long(jlong from, intptr_t *to) { ++ *(jlong*) (to + 1) = from; ++ *(jlong*) (to) = from; ++ } ++ ++ // A long parameter occupies two slot. ++ // It must fit the layout rule in methodHandle. ++ // ++ // See: [runtime/reflection.cpp] Reflection::invoke() ++ // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); ++ ++ static inline void put_long(jlong from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = from; ++ *(jlong*) (to + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_long(jlong *from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = *from; ++ *(jlong*) (to + pos) = *from; ++ pos += 2; ++ } ++ ++ // Oops are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } ++ static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } ++ static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } ++ ++ // Floats are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } ++ static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } ++ static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } ++ ++#undef _JNI_SLOT_OFFSET ++#define _JNI_SLOT_OFFSET 0 ++ ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to). ++ // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), ++ // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. ++ static inline void put_double(jdouble from, intptr_t *to) { ++ *(jdouble*) (to + 1) = from; ++ *(jdouble*) (to) = from; ++ } ++ ++ // A long parameter occupies two slot. ++ // It must fit the layout rule in methodHandle. ++ // ++ // See: [runtime/reflection.cpp] Reflection::invoke() ++ // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); ++ ++ static inline void put_double(jdouble from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = from; ++ *(jdouble*) (to + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_double(jdouble *from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = *from; ++ *(jdouble*) (to + pos) = *from; ++ pos += 2; ++ } ++ ++ // The get_xxx routines, on the other hand, actually _do_ fetch ++ // java primitive types from the interpreter stack. ++ static inline jint get_int (intptr_t *from) { return *(jint *) from; } ++ static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } ++ static inline oop get_obj (intptr_t *from) { return *(oop *) from; } ++ static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } ++ static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } ++#undef _JNI_SLOT_OFFSET ++}; ++ ++#endif // CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp +new file mode 100644 +index 0000000000..ea481c7fa6 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp +@@ -0,0 +1,199 @@ ++/* ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "jvmci/jvmciCodeInstaller.hpp" ++#include "jvmci/jvmciRuntime.hpp" ++#include "jvmci/jvmciCompilerToVM.hpp" ++#include "jvmci/jvmciJavaClasses.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) { ++ if (inst->is_int_branch() || inst->is_float_branch()) { ++ return pc_offset + NativeInstruction::nop_instruction_size; ++ } else if (inst->is_call()) { ++ return pc_offset + NativeCall::instruction_size; ++ } else if (inst->is_far_call()) { ++ return pc_offset + NativeFarCall::instruction_size; ++ } else if (inst->is_jump()) { ++ return pc_offset + NativeGeneralJump::instruction_size; ++ } else if (inst->is_lu12iw_lu32id()) { ++ // match LoongArch64TestAssembler.java emitCall ++ // lu12i_w; lu32i_d; jirl ++ return pc_offset + 3 * NativeInstruction::nop_instruction_size; ++ } else { ++ JVMCI_ERROR_0("unsupported type of instruction for call site"); ++ } ++ return 0; ++} ++ ++void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) { ++ address pc = _instructions->start() + pc_offset; ++ Handle obj(THREAD, HotSpotObjectConstantImpl::object(constant)); ++ jobject value = JNIHandles::make_local(obj()); ++ if (HotSpotObjectConstantImpl::compressed(constant)) { ++ NativeMovConstReg* move = nativeMovConstReg_at(pc); ++ move->set_data((intptr_t)(CompressedOops::encode(cast_to_oop(cast_from_oop
(obj()))))); ++ int oop_index = _oop_recorder->find_index(value); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ _instructions->relocate(pc, rspec, Assembler::narrow_oop_operand); ++ } else { ++ NativeMovConstReg* move = nativeMovConstReg_at(pc); ++ move->set_data((intptr_t)(cast_from_oop
(obj()))); ++ int oop_index = _oop_recorder->find_index(value); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ _instructions->relocate(pc, rspec); ++ } ++} ++ ++void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) { ++ address pc = _instructions->start() + pc_offset; ++ if (HotSpotMetaspaceConstantImpl::compressed(constant)) { ++ NativeMovConstReg* move = nativeMovConstReg_at(pc); ++ narrowKlass narrowOop = record_narrow_metadata_reference(_instructions, pc, constant, CHECK); ++ move->set_data((intptr_t) narrowOop); ++ TRACE_jvmci_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/0x%x", p2i(pc), narrowOop); ++ } else { ++ NativeMovConstReg* move = nativeMovConstReg_at(pc); ++ void* reference = record_metadata_reference(_instructions, pc, constant, CHECK); ++ move->set_data((intptr_t) reference); ++ TRACE_jvmci_3("relocating (metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(reference)); ++ } ++} ++ ++void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) { ++ address pc = _instructions->start() + pc_offset; ++ NativeInstruction* inst = nativeInstruction_at(pc); ++ if (inst->is_pcaddu12i_add()) { ++ address dest = _constants->start() + data_offset; ++ _instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS)); ++ TRACE_jvmci_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset); ++ } else { ++ JVMCI_ERROR("unknown load or move instruction at " PTR_FORMAT, p2i(pc)); ++ } ++} ++ ++void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) { ++ address pc = (address) inst; ++ if (inst->is_call()) { ++ NativeCall* call = nativeCall_at(pc); ++ call->set_destination((address) foreign_call_destination); ++ _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec()); ++ } else if (inst->is_far_call()) { ++ NativeFarCall* call = nativeFarCall_at(pc); ++ call->set_destination((address) foreign_call_destination); ++ _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec()); ++ } else if (inst->is_jump()) { ++ NativeGeneralJump* jump = nativeGeneralJump_at(pc); ++ jump->set_jump_destination((address) foreign_call_destination); ++ _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec()); ++ } else if (inst->is_lu12iw_lu32id()) { ++ // match emitCall of LoongArch64TestAssembler.java ++ // lu12i_w; lu32i_d; jirl ++ MacroAssembler::pd_patch_instruction((address)inst, (address)foreign_call_destination); ++ } else { ++ JVMCI_ERROR("unknown call or jump instruction at " PTR_FORMAT, p2i(pc)); ++ } ++ TRACE_jvmci_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst)); ++} ++ ++void CodeInstaller::pd_relocate_JavaMethod(CodeBuffer &cbuf, Handle hotspot_method, jint pc_offset, TRAPS) { ++#ifdef ASSERT ++ Method* method = NULL; ++ // we need to check, this might also be an unresolved method ++ if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) { ++ method = getMethodFromHotSpotMethod(hotspot_method()); ++ } ++#endif ++ switch (_next_call_type) { ++ case INLINE_INVOKE: ++ break; ++ case INVOKEVIRTUAL: ++ case INVOKEINTERFACE: { ++ assert(!method->is_static(), "cannot call static method with invokeinterface"); ++ NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); ++ _instructions->relocate(call->instruction_address(), virtual_call_Relocation::spec(_invoke_mark_pc)); ++ call->trampoline_jump(cbuf, SharedRuntime::get_resolve_virtual_call_stub()); ++ break; ++ } ++ case INVOKESTATIC: { ++ assert(method->is_static(), "cannot call non-static method with invokestatic"); ++ NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); ++ _instructions->relocate(call->instruction_address(), relocInfo::static_call_type); ++ call->trampoline_jump(cbuf, SharedRuntime::get_resolve_static_call_stub()); ++ break; ++ } ++ case INVOKESPECIAL: { ++ assert(!method->is_static(), "cannot call static method with invokespecial"); ++ NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); ++ _instructions->relocate(call->instruction_address(), relocInfo::opt_virtual_call_type); ++ call->trampoline_jump(cbuf, SharedRuntime::get_resolve_opt_virtual_call_stub()); ++ break; ++ } ++ default: ++ JVMCI_ERROR("invalid _next_call_type value"); ++ break; ++ } ++} ++ ++void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) { ++ switch (mark) { ++ case POLL_NEAR: ++ JVMCI_ERROR("unimplemented"); ++ break; ++ case POLL_FAR: ++ _instructions->relocate(pc, relocInfo::poll_type); ++ break; ++ case POLL_RETURN_NEAR: ++ JVMCI_ERROR("unimplemented"); ++ break; ++ case POLL_RETURN_FAR: ++ _instructions->relocate(pc, relocInfo::poll_return_type); ++ break; ++ default: ++ JVMCI_ERROR("invalid mark value"); ++ break; ++ } ++} ++ ++// convert JVMCI register indices (as used in oop maps) to HotSpot registers ++VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) { ++ if (jvmci_reg < RegisterImpl::number_of_registers) { ++ return as_Register(jvmci_reg)->as_VMReg(); ++ } else { ++ jint floatRegisterNumber = jvmci_reg - RegisterImpl::number_of_registers; ++ if (floatRegisterNumber >= 0 && floatRegisterNumber < FloatRegisterImpl::number_of_registers) { ++ return as_FloatRegister(floatRegisterNumber)->as_VMReg(); ++ } ++ JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg); ++ } ++} ++ ++bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) { ++ return !hotspotRegister->is_FloatRegister(); ++} +diff --git a/src/hotspot/cpu/loongarch/loongarch.ad b/src/hotspot/cpu/loongarch/loongarch.ad +new file mode 100644 +index 0000000000..80dff0c762 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/loongarch.ad +@@ -0,0 +1,25 @@ ++// ++// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ +diff --git a/src/hotspot/cpu/loongarch/loongarch_64.ad b/src/hotspot/cpu/loongarch/loongarch_64.ad +new file mode 100644 +index 0000000000..cc3824a402 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/loongarch_64.ad +@@ -0,0 +1,13917 @@ ++// ++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++// GodSon3 Architecture Description File ++ ++//----------REGISTER DEFINITION BLOCK------------------------------------------ ++// This information is used by the matcher and the register allocator to ++// describe individual registers and classes of registers within the target ++// archtecture. ++ ++// format: ++// reg_def name (call convention, c-call convention, ideal type, encoding); ++// call convention : ++// NS = No-Save ++// SOC = Save-On-Call ++// SOE = Save-On-Entry ++// AS = Always-Save ++// ideal type : ++// see opto/opcodes.hpp for more info ++// reg_class name (reg, ...); ++// alloc_class name (reg, ...); ++register %{ ++ ++// General Registers ++// Integer Registers ++ reg_def R0 ( NS, NS, Op_RegI, 0, R0->as_VMReg()); ++ reg_def R0_H ( NS, NS, Op_RegI, 0, R0->as_VMReg()->next()); ++ reg_def RA ( NS, NS, Op_RegI, 1, RA->as_VMReg()); ++ reg_def RA_H ( NS, NS, Op_RegI, 1, RA->as_VMReg()->next()); ++ reg_def TP ( NS, NS, Op_RegI, 2, TP->as_VMReg()); ++ reg_def TP_H ( NS, NS, Op_RegI, 2, TP->as_VMReg()->next()); ++ reg_def SP ( NS, NS, Op_RegI, 3, SP->as_VMReg()); ++ reg_def SP_H ( NS, NS, Op_RegI, 3, SP->as_VMReg()->next()); ++ reg_def A0 (SOC, SOC, Op_RegI, 4, A0->as_VMReg()); ++ reg_def A0_H (SOC, SOC, Op_RegI, 4, A0->as_VMReg()->next()); ++ reg_def A1 (SOC, SOC, Op_RegI, 5, A1->as_VMReg()); ++ reg_def A1_H (SOC, SOC, Op_RegI, 5, A1->as_VMReg()->next()); ++ reg_def A2 (SOC, SOC, Op_RegI, 6, A2->as_VMReg()); ++ reg_def A2_H (SOC, SOC, Op_RegI, 6, A2->as_VMReg()->next()); ++ reg_def A3 (SOC, SOC, Op_RegI, 7, A3->as_VMReg()); ++ reg_def A3_H (SOC, SOC, Op_RegI, 7, A3->as_VMReg()->next()); ++ reg_def A4 (SOC, SOC, Op_RegI, 8, A4->as_VMReg()); ++ reg_def A4_H (SOC, SOC, Op_RegI, 8, A4->as_VMReg()->next()); ++ reg_def A5 (SOC, SOC, Op_RegI, 9, A5->as_VMReg()); ++ reg_def A5_H (SOC, SOC, Op_RegI, 9, A5->as_VMReg()->next()); ++ reg_def A6 (SOC, SOC, Op_RegI, 10, A6->as_VMReg()); ++ reg_def A6_H (SOC, SOC, Op_RegI, 10, A6->as_VMReg()->next()); ++ reg_def A7 (SOC, SOC, Op_RegI, 11, A7->as_VMReg()); ++ reg_def A7_H (SOC, SOC, Op_RegI, 11, A7->as_VMReg()->next()); ++ reg_def T0 (SOC, SOC, Op_RegI, 12, T0->as_VMReg()); ++ reg_def T0_H (SOC, SOC, Op_RegI, 12, T0->as_VMReg()->next()); ++ reg_def T1 (SOC, SOC, Op_RegI, 13, T1->as_VMReg()); ++ reg_def T1_H (SOC, SOC, Op_RegI, 13, T1->as_VMReg()->next()); ++ reg_def T2 (SOC, SOC, Op_RegI, 14, T2->as_VMReg()); ++ reg_def T2_H (SOC, SOC, Op_RegI, 14, T2->as_VMReg()->next()); ++ reg_def T3 (SOC, SOC, Op_RegI, 15, T3->as_VMReg()); ++ reg_def T3_H (SOC, SOC, Op_RegI, 15, T3->as_VMReg()->next()); ++ reg_def T4 (SOC, SOC, Op_RegI, 16, T4->as_VMReg()); ++ reg_def T4_H (SOC, SOC, Op_RegI, 16, T4->as_VMReg()->next()); ++ reg_def T5 (SOC, SOC, Op_RegI, 17, T5->as_VMReg()); ++ reg_def T5_H (SOC, SOC, Op_RegI, 17, T5->as_VMReg()->next()); ++ reg_def T6 (SOC, SOC, Op_RegI, 18, T6->as_VMReg()); ++ reg_def T6_H (SOC, SOC, Op_RegI, 18, T6->as_VMReg()->next()); ++ reg_def T7 (SOC, SOC, Op_RegI, 19, T7->as_VMReg()); ++ reg_def T7_H (SOC, SOC, Op_RegI, 19, T7->as_VMReg()->next()); ++ reg_def T8 (SOC, SOC, Op_RegI, 20, T8->as_VMReg()); ++ reg_def T8_H (SOC, SOC, Op_RegI, 20, T8->as_VMReg()->next()); ++ reg_def RX ( NS, NS, Op_RegI, 21, RX->as_VMReg()); ++ reg_def RX_H ( NS, NS, Op_RegI, 21, RX->as_VMReg()->next()); ++ reg_def FP ( NS, NS, Op_RegI, 22, FP->as_VMReg()); ++ reg_def FP_H ( NS, NS, Op_RegI, 22, FP->as_VMReg()->next()); ++ reg_def S0 (SOC, SOE, Op_RegI, 23, S0->as_VMReg()); ++ reg_def S0_H (SOC, SOE, Op_RegI, 23, S0->as_VMReg()->next()); ++ reg_def S1 (SOC, SOE, Op_RegI, 24, S1->as_VMReg()); ++ reg_def S1_H (SOC, SOE, Op_RegI, 24, S1->as_VMReg()->next()); ++ reg_def S2 (SOC, SOE, Op_RegI, 25, S2->as_VMReg()); ++ reg_def S2_H (SOC, SOE, Op_RegI, 25, S2->as_VMReg()->next()); ++ reg_def S3 (SOC, SOE, Op_RegI, 26, S3->as_VMReg()); ++ reg_def S3_H (SOC, SOE, Op_RegI, 26, S3->as_VMReg()->next()); ++ reg_def S4 (SOC, SOE, Op_RegI, 27, S4->as_VMReg()); ++ reg_def S4_H (SOC, SOE, Op_RegI, 27, S4->as_VMReg()->next()); ++ reg_def S5 (SOC, SOE, Op_RegI, 28, S5->as_VMReg()); ++ reg_def S5_H (SOC, SOE, Op_RegI, 28, S5->as_VMReg()->next()); ++ reg_def S6 (SOC, SOE, Op_RegI, 29, S6->as_VMReg()); ++ reg_def S6_H (SOC, SOE, Op_RegI, 29, S6->as_VMReg()->next()); ++ reg_def S7 (SOC, SOE, Op_RegI, 30, S7->as_VMReg()); ++ reg_def S7_H (SOC, SOE, Op_RegI, 30, S7->as_VMReg()->next()); ++ reg_def S8 (SOC, SOE, Op_RegI, 31, S8->as_VMReg()); ++ reg_def S8_H (SOC, SOE, Op_RegI, 31, S8->as_VMReg()->next()); ++ ++ ++// Floating/Vector registers. ++ reg_def F0 ( SOC, SOC, Op_RegF, 0, F0->as_VMReg() ); ++ reg_def F0_H ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next() ); ++ reg_def F0_J ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(2) ); ++ reg_def F0_K ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(3) ); ++ reg_def F0_L ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(4) ); ++ reg_def F0_M ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(5) ); ++ reg_def F0_N ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(6) ); ++ reg_def F0_O ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(7) ); ++ ++ reg_def F1 ( SOC, SOC, Op_RegF, 1, F1->as_VMReg() ); ++ reg_def F1_H ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next() ); ++ reg_def F1_J ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(2) ); ++ reg_def F1_K ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(3) ); ++ reg_def F1_L ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(4) ); ++ reg_def F1_M ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(5) ); ++ reg_def F1_N ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(6) ); ++ reg_def F1_O ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(7) ); ++ ++ reg_def F2 ( SOC, SOC, Op_RegF, 2, F2->as_VMReg() ); ++ reg_def F2_H ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next() ); ++ reg_def F2_J ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(2) ); ++ reg_def F2_K ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(3) ); ++ reg_def F2_L ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(4) ); ++ reg_def F2_M ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(5) ); ++ reg_def F2_N ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(6) ); ++ reg_def F2_O ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(7) ); ++ ++ reg_def F3 ( SOC, SOC, Op_RegF, 3, F3->as_VMReg() ); ++ reg_def F3_H ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next() ); ++ reg_def F3_J ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(2) ); ++ reg_def F3_K ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(3) ); ++ reg_def F3_L ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(4) ); ++ reg_def F3_M ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(5) ); ++ reg_def F3_N ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(6) ); ++ reg_def F3_O ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(7) ); ++ ++ reg_def F4 ( SOC, SOC, Op_RegF, 4, F4->as_VMReg() ); ++ reg_def F4_H ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next() ); ++ reg_def F4_J ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(2) ); ++ reg_def F4_K ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(3) ); ++ reg_def F4_L ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(4) ); ++ reg_def F4_M ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(5) ); ++ reg_def F4_N ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(6) ); ++ reg_def F4_O ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(7) ); ++ ++ reg_def F5 ( SOC, SOC, Op_RegF, 5, F5->as_VMReg() ); ++ reg_def F5_H ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next() ); ++ reg_def F5_J ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(2) ); ++ reg_def F5_K ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(3) ); ++ reg_def F5_L ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(4) ); ++ reg_def F5_M ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(5) ); ++ reg_def F5_N ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(6) ); ++ reg_def F5_O ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(7) ); ++ ++ reg_def F6 ( SOC, SOC, Op_RegF, 6, F6->as_VMReg() ); ++ reg_def F6_H ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next() ); ++ reg_def F6_J ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(2) ); ++ reg_def F6_K ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(3) ); ++ reg_def F6_L ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(4) ); ++ reg_def F6_M ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(5) ); ++ reg_def F6_N ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(6) ); ++ reg_def F6_O ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(7) ); ++ ++ reg_def F7 ( SOC, SOC, Op_RegF, 7, F7->as_VMReg() ); ++ reg_def F7_H ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next() ); ++ reg_def F7_J ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(2) ); ++ reg_def F7_K ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(3) ); ++ reg_def F7_L ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(4) ); ++ reg_def F7_M ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(5) ); ++ reg_def F7_N ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(6) ); ++ reg_def F7_O ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(7) ); ++ ++ reg_def F8 ( SOC, SOC, Op_RegF, 8, F8->as_VMReg() ); ++ reg_def F8_H ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next() ); ++ reg_def F8_J ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(2) ); ++ reg_def F8_K ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(3) ); ++ reg_def F8_L ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(4) ); ++ reg_def F8_M ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(5) ); ++ reg_def F8_N ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(6) ); ++ reg_def F8_O ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(7) ); ++ ++ reg_def F9 ( SOC, SOC, Op_RegF, 9, F9->as_VMReg() ); ++ reg_def F9_H ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next() ); ++ reg_def F9_J ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(2) ); ++ reg_def F9_K ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(3) ); ++ reg_def F9_L ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(4) ); ++ reg_def F9_M ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(5) ); ++ reg_def F9_N ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(6) ); ++ reg_def F9_O ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(7) ); ++ ++ reg_def F10 ( SOC, SOC, Op_RegF, 10, F10->as_VMReg() ); ++ reg_def F10_H ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next() ); ++ reg_def F10_J ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(2) ); ++ reg_def F10_K ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(3) ); ++ reg_def F10_L ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(4) ); ++ reg_def F10_M ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(5) ); ++ reg_def F10_N ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(6) ); ++ reg_def F10_O ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(7) ); ++ ++ reg_def F11 ( SOC, SOC, Op_RegF, 11, F11->as_VMReg() ); ++ reg_def F11_H ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next() ); ++ reg_def F11_J ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(2) ); ++ reg_def F11_K ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(3) ); ++ reg_def F11_L ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(4) ); ++ reg_def F11_M ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(5) ); ++ reg_def F11_N ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(6) ); ++ reg_def F11_O ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(7) ); ++ ++ reg_def F12 ( SOC, SOC, Op_RegF, 12, F12->as_VMReg() ); ++ reg_def F12_H ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next() ); ++ reg_def F12_J ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(2) ); ++ reg_def F12_K ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(3) ); ++ reg_def F12_L ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(4) ); ++ reg_def F12_M ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(5) ); ++ reg_def F12_N ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(6) ); ++ reg_def F12_O ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(7) ); ++ ++ reg_def F13 ( SOC, SOC, Op_RegF, 13, F13->as_VMReg() ); ++ reg_def F13_H ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next() ); ++ reg_def F13_J ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(2) ); ++ reg_def F13_K ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(3) ); ++ reg_def F13_L ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(4) ); ++ reg_def F13_M ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(5) ); ++ reg_def F13_N ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(6) ); ++ reg_def F13_O ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(7) ); ++ ++ reg_def F14 ( SOC, SOC, Op_RegF, 14, F14->as_VMReg() ); ++ reg_def F14_H ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next() ); ++ reg_def F14_J ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(2) ); ++ reg_def F14_K ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(3) ); ++ reg_def F14_L ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(4) ); ++ reg_def F14_M ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(5) ); ++ reg_def F14_N ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(6) ); ++ reg_def F14_O ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(7) ); ++ ++ reg_def F15 ( SOC, SOC, Op_RegF, 15, F15->as_VMReg() ); ++ reg_def F15_H ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next() ); ++ reg_def F15_J ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(2) ); ++ reg_def F15_K ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(3) ); ++ reg_def F15_L ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(4) ); ++ reg_def F15_M ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(5) ); ++ reg_def F15_N ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(6) ); ++ reg_def F15_O ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(7) ); ++ ++ reg_def F16 ( SOC, SOC, Op_RegF, 16, F16->as_VMReg() ); ++ reg_def F16_H ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next() ); ++ reg_def F16_J ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(2) ); ++ reg_def F16_K ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(3) ); ++ reg_def F16_L ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(4) ); ++ reg_def F16_M ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(5) ); ++ reg_def F16_N ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(6) ); ++ reg_def F16_O ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(7) ); ++ ++ reg_def F17 ( SOC, SOC, Op_RegF, 17, F17->as_VMReg() ); ++ reg_def F17_H ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next() ); ++ reg_def F17_J ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(2) ); ++ reg_def F17_K ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(3) ); ++ reg_def F17_L ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(4) ); ++ reg_def F17_M ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(5) ); ++ reg_def F17_N ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(6) ); ++ reg_def F17_O ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(7) ); ++ ++ reg_def F18 ( SOC, SOC, Op_RegF, 18, F18->as_VMReg() ); ++ reg_def F18_H ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next() ); ++ reg_def F18_J ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(2) ); ++ reg_def F18_K ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(3) ); ++ reg_def F18_L ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(4) ); ++ reg_def F18_M ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(5) ); ++ reg_def F18_N ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(6) ); ++ reg_def F18_O ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(7) ); ++ ++ reg_def F19 ( SOC, SOC, Op_RegF, 19, F19->as_VMReg() ); ++ reg_def F19_H ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next() ); ++ reg_def F19_J ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(2) ); ++ reg_def F19_K ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(3) ); ++ reg_def F19_L ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(4) ); ++ reg_def F19_M ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(5) ); ++ reg_def F19_N ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(6) ); ++ reg_def F19_O ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(7) ); ++ ++ reg_def F20 ( SOC, SOC, Op_RegF, 20, F20->as_VMReg() ); ++ reg_def F20_H ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next() ); ++ reg_def F20_J ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(2) ); ++ reg_def F20_K ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(3) ); ++ reg_def F20_L ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(4) ); ++ reg_def F20_M ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(5) ); ++ reg_def F20_N ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(6) ); ++ reg_def F20_O ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(7) ); ++ ++ reg_def F21 ( SOC, SOC, Op_RegF, 21, F21->as_VMReg() ); ++ reg_def F21_H ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next() ); ++ reg_def F21_J ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(2) ); ++ reg_def F21_K ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(3) ); ++ reg_def F21_L ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(4) ); ++ reg_def F21_M ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(5) ); ++ reg_def F21_N ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(6) ); ++ reg_def F21_O ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(7) ); ++ ++ reg_def F22 ( SOC, SOC, Op_RegF, 22, F22->as_VMReg() ); ++ reg_def F22_H ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next() ); ++ reg_def F22_J ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(2) ); ++ reg_def F22_K ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(3) ); ++ reg_def F22_L ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(4) ); ++ reg_def F22_M ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(5) ); ++ reg_def F22_N ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(6) ); ++ reg_def F22_O ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(7) ); ++ ++ reg_def F23 ( SOC, SOC, Op_RegF, 23, F23->as_VMReg() ); ++ reg_def F23_H ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next() ); ++ reg_def F23_J ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(2) ); ++ reg_def F23_K ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(3) ); ++ reg_def F23_L ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(4) ); ++ reg_def F23_M ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(5) ); ++ reg_def F23_N ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(6) ); ++ reg_def F23_O ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(7) ); ++ ++ reg_def F24 ( SOC, SOE, Op_RegF, 24, F24->as_VMReg() ); ++ reg_def F24_H ( SOC, SOE, Op_RegF, 24, F24->as_VMReg()->next() ); ++ reg_def F24_J ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(2) ); ++ reg_def F24_K ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(3) ); ++ reg_def F24_L ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(4) ); ++ reg_def F24_M ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(5) ); ++ reg_def F24_N ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(6) ); ++ reg_def F24_O ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(7) ); ++ ++ reg_def F25 ( SOC, SOE, Op_RegF, 25, F25->as_VMReg() ); ++ reg_def F25_H ( SOC, SOE, Op_RegF, 25, F25->as_VMReg()->next() ); ++ reg_def F25_J ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(2) ); ++ reg_def F25_K ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(3) ); ++ reg_def F25_L ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(4) ); ++ reg_def F25_M ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(5) ); ++ reg_def F25_N ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(6) ); ++ reg_def F25_O ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(7) ); ++ ++ reg_def F26 ( SOC, SOE, Op_RegF, 26, F26->as_VMReg() ); ++ reg_def F26_H ( SOC, SOE, Op_RegF, 26, F26->as_VMReg()->next() ); ++ reg_def F26_J ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(2) ); ++ reg_def F26_K ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(3) ); ++ reg_def F26_L ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(4) ); ++ reg_def F26_M ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(5) ); ++ reg_def F26_N ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(6) ); ++ reg_def F26_O ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(7) ); ++ ++ reg_def F27 ( SOC, SOE, Op_RegF, 27, F27->as_VMReg() ); ++ reg_def F27_H ( SOC, SOE, Op_RegF, 27, F27->as_VMReg()->next() ); ++ reg_def F27_J ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(2) ); ++ reg_def F27_K ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(3) ); ++ reg_def F27_L ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(4) ); ++ reg_def F27_M ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(5) ); ++ reg_def F27_N ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(6) ); ++ reg_def F27_O ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(7) ); ++ ++ reg_def F28 ( SOC, SOE, Op_RegF, 28, F28->as_VMReg() ); ++ reg_def F28_H ( SOC, SOE, Op_RegF, 28, F28->as_VMReg()->next() ); ++ reg_def F28_J ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(2) ); ++ reg_def F28_K ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(3) ); ++ reg_def F28_L ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(4) ); ++ reg_def F28_M ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(5) ); ++ reg_def F28_N ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(6) ); ++ reg_def F28_O ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(7) ); ++ ++ reg_def F29 ( SOC, SOE, Op_RegF, 29, F29->as_VMReg() ); ++ reg_def F29_H ( SOC, SOE, Op_RegF, 29, F29->as_VMReg()->next() ); ++ reg_def F29_J ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(2) ); ++ reg_def F29_K ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(3) ); ++ reg_def F29_L ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(4) ); ++ reg_def F29_M ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(5) ); ++ reg_def F29_N ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(6) ); ++ reg_def F29_O ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(7) ); ++ ++ reg_def F30 ( SOC, SOE, Op_RegF, 30, F30->as_VMReg() ); ++ reg_def F30_H ( SOC, SOE, Op_RegF, 30, F30->as_VMReg()->next() ); ++ reg_def F30_J ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(2) ); ++ reg_def F30_K ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(3) ); ++ reg_def F30_L ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(4) ); ++ reg_def F30_M ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(5) ); ++ reg_def F30_N ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(6) ); ++ reg_def F30_O ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(7) ); ++ ++ reg_def F31 ( SOC, SOE, Op_RegF, 31, F31->as_VMReg() ); ++ reg_def F31_H ( SOC, SOE, Op_RegF, 31, F31->as_VMReg()->next() ); ++ reg_def F31_J ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(2) ); ++ reg_def F31_K ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(3) ); ++ reg_def F31_L ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(4) ); ++ reg_def F31_M ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(5) ); ++ reg_def F31_N ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(6) ); ++ reg_def F31_O ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(7) ); ++ ++ ++// ---------------------------- ++// Special Registers ++//S6 is used for get_thread(S6) ++//S5 is uesd for heapbase of compressed oop ++alloc_class chunk0( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S5, S5_H, ++ S6, S6_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T8, T8_H, ++ T4, T4_H, ++ T1, T1_H, // inline_cache_reg ++ T6, T6_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ T5, T5_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H, ++ S8, S8_H ++ RA, RA_H, ++ SP, SP_H, // stack_pointer ++ FP, FP_H, // frame_pointer ++ ++ // non-allocatable registers ++ T7, T7_H, ++ TP, TP_H, ++ RX, RX_H, ++ R0, R0_H, ++ ); ++ ++// F23 is scratch reg ++alloc_class chunk1( F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O, ++ F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O, ++ F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O, ++ F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O, ++ F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O, ++ F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O, ++ F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O, ++ F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O, ++ F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O, ++ F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O, ++ F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O, ++ F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O, ++ F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O, ++ F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O, ++ F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O, ++ F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O, ++ F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O, ++ F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O, ++ F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O, ++ F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O, ++ F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O, ++ F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O, ++ F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O, ++ F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O, ++ F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O, ++ F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O, ++ F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O, ++ F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O, ++ F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O, ++ F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O, ++ F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O, ++ ++ // non-allocatable registers ++ F23, F23_H, F23_J, F23_K, F23_L, F23_M, F23_N, F23_O, ++ ); ++ ++reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 ); ++reg_class s0_reg( S0 ); ++reg_class s1_reg( S1 ); ++reg_class s2_reg( S2 ); ++reg_class s3_reg( S3 ); ++reg_class s4_reg( S4 ); ++reg_class s5_reg( S5 ); ++reg_class s6_reg( S6 ); ++reg_class s7_reg( S7 ); ++ ++reg_class t_reg( T0, T1, T2, T3, T8, T4 ); ++reg_class t0_reg( T0 ); ++reg_class t1_reg( T1 ); ++reg_class t2_reg( T2 ); ++reg_class t3_reg( T3 ); ++reg_class t8_reg( T8 ); ++reg_class t4_reg( T4 ); ++ ++reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 ); ++reg_class a0_reg( A0 ); ++reg_class a1_reg( A1 ); ++reg_class a2_reg( A2 ); ++reg_class a3_reg( A3 ); ++reg_class a4_reg( A4 ); ++reg_class a5_reg( A5 ); ++reg_class a6_reg( A6 ); ++reg_class a7_reg( A7 ); ++ ++// TODO: LA ++//reg_class v0_reg( A0 ); ++//reg_class v1_reg( A1 ); ++ ++reg_class sp_reg( SP, SP_H ); ++reg_class fp_reg( FP, FP_H ); ++ ++reg_class v0_long_reg( A0, A0_H ); ++reg_class v1_long_reg( A1, A1_H ); ++reg_class a0_long_reg( A0, A0_H ); ++reg_class a1_long_reg( A1, A1_H ); ++reg_class a2_long_reg( A2, A2_H ); ++reg_class a3_long_reg( A3, A3_H ); ++reg_class a4_long_reg( A4, A4_H ); ++reg_class a5_long_reg( A5, A5_H ); ++reg_class a6_long_reg( A6, A6_H ); ++reg_class a7_long_reg( A7, A7_H ); ++reg_class t0_long_reg( T0, T0_H ); ++reg_class t1_long_reg( T1, T1_H ); ++reg_class t2_long_reg( T2, T2_H ); ++reg_class t3_long_reg( T3, T3_H ); ++reg_class t8_long_reg( T8, T8_H ); ++reg_class t4_long_reg( T4, T4_H ); ++reg_class s0_long_reg( S0, S0_H ); ++reg_class s1_long_reg( S1, S1_H ); ++reg_class s2_long_reg( S2, S2_H ); ++reg_class s3_long_reg( S3, S3_H ); ++reg_class s4_long_reg( S4, S4_H ); ++reg_class s5_long_reg( S5, S5_H ); ++reg_class s6_long_reg( S6, S6_H ); ++reg_class s7_long_reg( S7, S7_H ); ++ ++//reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, A7, A6, A5, A4, T5, A3, A2, A1, A0, T0 ); ++ ++reg_class all_reg32( ++ S8, ++ S7, ++ S5, /* S5_heapbase */ ++ /* S6, S6 TREG */ ++ S4, ++ S3, ++ S2, ++ S1, ++ S0, ++ T8, ++ /* T7, AT */ ++ T6, ++ T5, ++ /* T4, jarl T4 */ ++ T3, ++ T2, ++ T1, ++ T0, ++ A7, ++ A6, ++ A5, ++ A4, ++ A3, ++ A2, ++ A1, ++ A0 ); ++ ++reg_class int_reg %{ ++ return _ANY_REG32_mask; ++%} ++ ++reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, T5, T0 ); ++ ++reg_class p_reg %{ ++ return _PTR_REG_mask; ++%} ++ ++reg_class no_T8_p_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++reg_class no_Ax_p_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ T0, T0_H ++ ); ++ ++reg_class all_reg( ++ S8, S8_H, ++ S7, S7_H, ++ /* S6, S6_H, S6 TREG */ ++ S5, S5_H, /* S5_heapbase */ ++ S4, S4_H, ++ S3, S3_H, ++ S2, S2_H, ++ S1, S1_H, ++ S0, S0_H, ++ T8, T8_H, ++ /* T7, T7_H, AT */ ++ T6, T6_H, ++ T5, T5_H, ++ /* T4, T4_H, jalr T4 */ ++ T3, T3_H, ++ T2, T2_H, ++ T1, T1_H, ++ T0, T0_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H ++ ); ++ ++ ++reg_class long_reg %{ ++ return _ANY_REG_mask; ++%} ++ ++// Floating point registers. ++// F31 are not used as temporary registers in D2I ++reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F24, F25, F26, F27, F28, F29, F30, F31); ++ ++reg_class dbl_reg( F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F8, F8_H, ++ F9, F9_H, ++ F10, F10_H, ++ F11, F11_H, ++ F12, F12_H, ++ F13, F13_H, ++ F14, F14_H, ++ F15, F15_H, ++ F16, F16_H, ++ F17, F17_H, ++ F18, F18_H, ++ F19, F19_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++ F28, F28_H, ++ F29, F29_H, ++ F30, F30_H, ++ F31, F31_H); ++ ++// Class for all 128bit vector registers ++reg_class vectorx_reg( F0, F0_H, F0_J, F0_K, ++ F1, F1_H, F1_J, F1_K, ++ F2, F2_H, F2_J, F2_K, ++ F3, F3_H, F3_J, F3_K, ++ F4, F4_H, F4_J, F4_K, ++ F5, F5_H, F5_J, F5_K, ++ F6, F6_H, F6_J, F6_K, ++ F7, F7_H, F7_J, F7_K, ++ F8, F8_H, F8_J, F8_K, ++ F9, F9_H, F9_J, F9_K, ++ F10, F10_H, F10_J, F10_K, ++ F11, F11_H, F11_J, F11_K, ++ F12, F12_H, F12_J, F12_K, ++ F13, F13_H, F13_J, F13_K, ++ F14, F14_H, F14_J, F14_K, ++ F15, F15_H, F15_J, F15_K, ++ F16, F16_H, F16_J, F16_K, ++ F17, F17_H, F17_J, F17_K, ++ F18, F18_H, F18_J, F18_K, ++ F19, F19_H, F19_J, F19_K, ++ F20, F20_H, F20_J, F20_K, ++ F21, F21_H, F21_J, F21_K, ++ F22, F22_H, F22_J, F22_K, ++ F24, F24_H, F24_J, F24_K, ++ F25, F25_H, F25_J, F25_K, ++ F26, F26_H, F26_J, F26_K, ++ F27, F27_H, F27_J, F27_K, ++ F28, F28_H, F28_J, F28_K, ++ F29, F29_H, F29_J, F29_K, ++ F30, F30_H, F30_J, F30_K, ++ F31, F31_H, F31_J, F31_K); ++ ++// Class for all 256bit vector registers ++reg_class vectory_reg( F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O, ++ F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O, ++ F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O, ++ F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O, ++ F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O, ++ F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O, ++ F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O, ++ F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O, ++ F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O, ++ F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O, ++ F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O, ++ F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O, ++ F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O, ++ F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O, ++ F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O, ++ F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O, ++ F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O, ++ F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O, ++ F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O, ++ F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O, ++ F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O, ++ F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O, ++ F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O, ++ F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O, ++ F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O, ++ F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O, ++ F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O, ++ F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O, ++ F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O, ++ F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O, ++ F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O); ++ ++// TODO: LA ++//reg_class flt_arg0( F0 ); ++//reg_class dbl_arg0( F0, F0_H ); ++//reg_class dbl_arg1( F1, F1_H ); ++ ++%} ++ ++//----------DEFINITION BLOCK--------------------------------------------------- ++// Define name --> value mappings to inform the ADLC of an integer valued name ++// Current support includes integer values in the range [0, 0x7FFFFFFF] ++// Format: ++// int_def ( , ); ++// Generated Code in ad_.hpp ++// #define () ++// // value == ++// Generated code in ad_.cpp adlc_verification() ++// assert( == , "Expect () to equal "); ++// ++definitions %{ ++ int_def DEFAULT_COST ( 100, 100); ++ int_def HUGE_COST (1000000, 1000000); ++ ++ // Memory refs are twice as expensive as run-of-the-mill. ++ int_def MEMORY_REF_COST ( 200, DEFAULT_COST * 2); ++ ++ // Branches are even more expensive. ++ int_def BRANCH_COST ( 300, DEFAULT_COST * 3); ++ // we use jr instruction to construct call, so more expensive ++ int_def CALL_COST ( 500, DEFAULT_COST * 5); ++/* ++ int_def EQUAL ( 1, 1 ); ++ int_def NOT_EQUAL ( 2, 2 ); ++ int_def GREATER ( 3, 3 ); ++ int_def GREATER_EQUAL ( 4, 4 ); ++ int_def LESS ( 5, 5 ); ++ int_def LESS_EQUAL ( 6, 6 ); ++*/ ++%} ++ ++ ++ ++//----------SOURCE BLOCK------------------------------------------------------- ++// This is a block of C++ code which provides values, functions, and ++// definitions necessary in the rest of the architecture description ++ ++source_hpp %{ ++// Header information of the source block. ++// Method declarations/definitions which are used outside ++// the ad-scope can conveniently be defined here. ++// ++// To keep related declarations/definitions/uses close together, ++// we switch between source %{ }% and source_hpp %{ }% freely as needed. ++ ++extern RegMask _ANY_REG32_mask; ++extern RegMask _ANY_REG_mask; ++extern RegMask _PTR_REG_mask; ++ ++class CallStubImpl { ++ ++ //-------------------------------------------------------------- ++ //---< Used for optimization in Compile::shorten_branches >--- ++ //-------------------------------------------------------------- ++ ++ public: ++ // Size of call trampoline stub. ++ static uint size_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++ ++ // number of relocations needed by a call trampoline stub ++ static uint reloc_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++}; ++ ++class HandlerImpl { ++ ++ public: ++ ++ static int emit_exception_handler(CodeBuffer &cbuf); ++ static int emit_deopt_handler(CodeBuffer& cbuf); ++ ++ static uint size_exception_handler() { ++ // NativeCall instruction size is the same as NativeJump. ++ // exception handler starts out as jump and can be patched to ++ // a call be deoptimization. (4932387) ++ // Note that this value is also credited (in output.cpp) to ++ // the size of the code section. ++ int size = NativeFarCall::instruction_size; ++ const uintx m = 16 - 1; ++ return mask_bits(size + m, ~m); ++ //return round_to(size, 16); ++ } ++ ++ static uint size_deopt_handler() { ++ int size = NativeFarCall::instruction_size; ++ const uintx m = 16 - 1; ++ return mask_bits(size + m, ~m); ++ //return round_to(size, 16); ++ } ++}; ++ ++bool is_CAS(int opcode); ++bool use_AMO(int opcode); ++ ++bool unnecessary_acquire(const Node *barrier); ++bool unnecessary_release(const Node *barrier); ++bool unnecessary_volatile(const Node *barrier); ++bool needs_releasing_store(const Node *store); ++ ++%} // end source_hpp ++ ++source %{ ++ ++#define NO_INDEX 0 ++#define RELOC_IMM64 Assembler::imm_operand ++#define RELOC_DISP32 Assembler::disp32_operand ++ ++#define V0_num A0_num ++#define V0_H_num A0_H_num ++ ++#define __ _masm. ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++RegMask _ANY_REG32_mask; ++RegMask _ANY_REG_mask; ++RegMask _PTR_REG_mask; ++ ++void reg_mask_init() { ++ _ANY_REG32_mask = _ALL_REG32_mask; ++ _ANY_REG_mask = _ALL_REG_mask; ++ _PTR_REG_mask = _ALL_REG_mask; ++ ++ if (UseCompressedOops && (Universe::narrow_ptrs_base() != NULL)) { ++ _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r28->as_VMReg())); ++ _ANY_REG_mask.SUBTRACT(_S5_LONG_REG_mask); ++ _PTR_REG_mask.SUBTRACT(_S5_LONG_REG_mask); ++ } ++} ++ ++// Emit exception handler code. ++// Stuff framesize into a register and call a VM stub routine. ++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_exception_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ ++ int offset = __ offset(); ++ ++ __ block_comment("; emit_exception_handler"); ++ ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point()); ++ assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} ++ ++// Emit deopt handler code. ++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_deopt_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ ++ int offset = __ offset(); ++ ++ __ block_comment("; emit_deopt_handler"); ++ ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_call(SharedRuntime::deopt_blob()->unpack()); ++ assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} ++ ++ ++const bool Matcher::match_rule_supported(int opcode) { ++ if (!has_match_rule(opcode)) ++ return false; ++ ++ return true; // Per default match rules are supported. ++} ++ ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { ++ // identify extra cases that we might want to provide match rules for ++ // e.g. Op_ vector nodes and other intrinsics while guarding with vlen ++ bool ret_value = match_rule_supported(opcode); ++ ++ return ret_value; // Per default match rules are supported. ++} ++ ++const bool Matcher::has_predicated_vectors(void) { ++ return false; ++} ++ ++const int Matcher::float_pressure(int default_pressure_threshold) { ++ Unimplemented(); ++ return default_pressure_threshold; ++} ++ ++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { ++ const int safety_zone = 3 * BytesPerInstWord; ++ int offs = offset - br_size + 4; ++ // To be conservative on LoongArch ++ // branch node should be end with: ++ // branch inst ++ offs = (offs < 0 ? offs - safety_zone : offs + safety_zone) >> 2; ++ switch (rule) { ++ case jmpDir_long_rule: ++ case jmpDir_short_rule: ++ return Assembler::is_simm(offs, 26); ++ case jmpCon_flags_long_rule: ++ case jmpCon_flags_short_rule: ++ case branchConP_0_long_rule: ++ case branchConP_0_short_rule: ++ case branchConN2P_0_long_rule: ++ case branchConN2P_0_short_rule: ++ case cmpN_null_branch_long_rule: ++ case cmpN_null_branch_short_rule: ++ case branchConF_reg_reg_long_rule: ++ case branchConF_reg_reg_short_rule: ++ case branchConD_reg_reg_long_rule: ++ case branchConD_reg_reg_short_rule: ++ return Assembler::is_simm(offs, 21); ++ default: ++ return Assembler::is_simm(offs, 16); ++ } ++ return false; ++} ++ ++ ++// No additional cost for CMOVL. ++const int Matcher::long_cmove_cost() { return 0; } ++ ++// No CMOVF/CMOVD with SSE2 ++const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } ++ ++// Does the CPU require late expand (see block.cpp for description of late expand)? ++const bool Matcher::require_postalloc_expand = false; ++ ++// Do we need to mask the count passed to shift instructions or does ++// the cpu only look at the lower 5/6 bits anyway? ++const bool Matcher::need_masked_shift_count = false; ++ ++bool Matcher::narrow_oop_use_complex_address() { ++ assert(UseCompressedOops, "only for compressed oops code"); ++ return false; ++} ++ ++bool Matcher::narrow_klass_use_complex_address() { ++ assert(UseCompressedClassPointers, "only for compressed klass code"); ++ return false; ++} ++ ++bool Matcher::const_oop_prefer_decode() { ++ // Prefer ConN+DecodeN over ConP. ++ return true; ++} ++ ++bool Matcher::const_klass_prefer_decode() { ++ // TODO: Either support matching DecodeNKlass (heap-based) in operand ++ // or condisider the following: ++ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. ++ //return Universe::narrow_klass_base() == NULL; ++ return true; ++} ++ ++// This is UltraSparc specific, true just means we have fast l2f conversion ++const bool Matcher::convL2FSupported(void) { ++ return true; ++} ++ ++// Vector ideal reg ++const uint Matcher::vector_ideal_reg(int size) { ++ assert(MaxVectorSize == 16 || MaxVectorSize == 32, ""); ++ switch(size) { ++ case 16: return Op_VecX; ++ case 32: return Op_VecY; ++ } ++ ShouldNotReachHere(); ++ return 0; ++} ++ ++// Only lowest bits of xmm reg are used for vector shift count. ++const uint Matcher::vector_shift_count_ideal_reg(int size) { ++ assert(MaxVectorSize == 16 || MaxVectorSize == 32, ""); ++ switch(size) { ++ case 16: return Op_VecX; ++ case 32: return Op_VecY; ++ } ++ ShouldNotReachHere(); ++ return 0; ++} ++ ++ ++const bool Matcher::convi2l_type_required = true; ++ ++// Should the Matcher clone shifts on addressing modes, expecting them ++// to be subsumed into complex addressing expressions or compute them ++// into registers? ++bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { ++ return clone_base_plus_offset_address(m, mstack, address_visited); ++} ++ ++void Compile::reshape_address(AddPNode* addp) { ++} ++ ++// Max vector size in bytes. 0 if not supported. ++const int Matcher::vector_width_in_bytes(BasicType bt) { ++ return (int)MaxVectorSize; ++} ++ ++// Limits on vector size (number of elements) loaded into vector. ++const int Matcher::max_vector_size(const BasicType bt) { ++ assert(is_java_primitive(bt), "only primitive type vectors"); ++ return vector_width_in_bytes(bt)/type2aelembytes(bt); ++} ++ ++const int Matcher::min_vector_size(const BasicType bt) { ++ int max_size = max_vector_size(bt); ++ int size = 0; ++ ++ if (UseLSX) size = 16; ++ size = size / type2aelembytes(bt); ++ return MIN2(size,max_size); ++} ++ ++// LoongArch supports misaligned vectors store/load? FIXME ++const bool Matcher::misaligned_vectors_ok() { ++ return false; ++ //return !AlignVector; // can be changed by flag ++} ++ ++// Register for DIVI projection of divmodI ++RegMask Matcher::divI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for MODI projection of divmodI ++RegMask Matcher::modI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for DIVL projection of divmodL ++RegMask Matcher::divL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++int Matcher::regnum_to_fpu_offset(int regnum) { ++ return regnum - 32; // The FP registers are in the second chunk ++} ++ ++ ++const bool Matcher::isSimpleConstant64(jlong value) { ++ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. ++ return true; ++} ++ ++ ++// Return whether or not this register is ever used as an argument. This ++// function is used on startup to build the trampoline stubs in generateOptoStub. ++// Registers not mentioned will be killed by the VM call in the trampoline, and ++// arguments in those registers not be available to the callee. ++bool Matcher::can_be_java_arg( int reg ) { ++ // Refer to: [sharedRuntime_loongarch_64.cpp] SharedRuntime::java_calling_convention() ++ if ( reg == T0_num || reg == T0_H_num ++ || reg == A0_num || reg == A0_H_num ++ || reg == A1_num || reg == A1_H_num ++ || reg == A2_num || reg == A2_H_num ++ || reg == A3_num || reg == A3_H_num ++ || reg == A4_num || reg == A4_H_num ++ || reg == A5_num || reg == A5_H_num ++ || reg == A6_num || reg == A6_H_num ++ || reg == A7_num || reg == A7_H_num ) ++ return true; ++ ++ if ( reg == F0_num || reg == F0_H_num ++ || reg == F1_num || reg == F1_H_num ++ || reg == F2_num || reg == F2_H_num ++ || reg == F3_num || reg == F3_H_num ++ || reg == F4_num || reg == F4_H_num ++ || reg == F5_num || reg == F5_H_num ++ || reg == F6_num || reg == F6_H_num ++ || reg == F7_num || reg == F7_H_num ) ++ return true; ++ ++ return false; ++} ++ ++bool Matcher::is_spillable_arg( int reg ) { ++ return can_be_java_arg(reg); ++} ++ ++bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { ++ return false; ++} ++ ++// Register for MODL projection of divmodL ++RegMask Matcher::modL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++const RegMask Matcher::method_handle_invoke_SP_save_mask() { ++ return FP_REG_mask(); ++} ++ ++// LoongArch doesn't support AES intrinsics ++const bool Matcher::pass_original_key_for_aes() { ++ return false; ++} ++ ++int CallStaticJavaDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallLeafNoFPDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallLeafDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallRuntimeDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++// If CPU can load and store mis-aligned doubles directly then no fixup is ++// needed. Else we split the double into 2 integer pieces and move it ++// piece-by-piece. Only happens when passing doubles into C code as the ++// Java calling convention forces doubles to be aligned. ++const bool Matcher::misaligned_doubles_ok = false; ++// Do floats take an entire double register or just half? ++//const bool Matcher::float_in_double = true; ++bool Matcher::float_in_double() { return false; } ++// Do ints take an entire long register or just half? ++const bool Matcher::int_in_long = true; ++// Is it better to copy float constants, or load them directly from memory? ++// Intel can load a float constant from a direct address, requiring no ++// extra registers. Most RISCs will have to materialize an address into a ++// register first, so they would do better to copy the constant from stack. ++const bool Matcher::rematerialize_float_constants = false; ++// Advertise here if the CPU requires explicit rounding operations ++// to implement the UseStrictFP mode. ++const bool Matcher::strict_fp_requires_explicit_rounding = false; ++// false => size gets scaled to BytesPerLong, ok. ++const bool Matcher::init_array_count_is_in_bytes = false; ++ ++// Indicate if the safepoint node needs the polling page as an input. ++// it does if the polling page is more than disp32 away. ++bool SafePointNode::needs_polling_address_input() { ++ return SafepointMechanism::uses_thread_local_poll(); ++} ++ ++#ifndef PRODUCT ++void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const { ++ st->print("BRK"); ++} ++#endif ++ ++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { ++ MacroAssembler _masm(&cbuf); ++ __ brk(5); ++} ++ ++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { ++ return MachNode::size(ra_); ++} ++ ++ ++ ++// !!!!! Special hack to get all type of calls to specify the byte offset ++// from the start of the call to the point where the return address ++// will point. ++int MachCallStaticJavaNode::ret_addr_offset() { ++ // bl ++ return NativeCall::instruction_size; ++} ++ ++int MachCallDynamicJavaNode::ret_addr_offset() { ++ // lu12i_w IC_Klass, ++ // ori IC_Klass, ++ // lu32i_d IC_Klass ++ // lu52i_d IC_Klass ++ ++ // bl ++ return NativeMovConstReg::instruction_size + NativeCall::instruction_size; ++} ++ ++//============================================================================= ++ ++// Figure out which register class each belongs in: rc_int, rc_float, rc_stack ++enum RC { rc_bad, rc_int, rc_float, rc_stack }; ++static enum RC rc_class( OptoReg::Name reg ) { ++ if( !OptoReg::is_valid(reg) ) return rc_bad; ++ if (OptoReg::is_stack(reg)) return rc_stack; ++ VMReg r = OptoReg::as_VMReg(reg); ++ if (r->is_Register()) return rc_int; ++ assert(r->is_FloatRegister(), "must be"); ++ return rc_float; ++} ++ ++// Helper methods for MachSpillCopyNode::implementation(). ++static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, ++ int src_hi, int dst_hi, uint ireg, outputStream* st) { ++ int size = 0; ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ int offset = __ offset(); ++ switch (ireg) { ++ case Op_VecX: ++ __ vori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0); ++ break; ++ case Op_VecY: ++ __ xvori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++#ifndef PRODUCT ++ } else if (!do_size) { ++ switch (ireg) { ++ case Op_VecX: ++ st->print("vori.b %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); ++ break; ++ case Op_VecY: ++ st->print("xvori.b %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++#endif ++ } ++ size += 4; ++ return size; ++} ++ ++static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, ++ int stack_offset, int reg, uint ireg, outputStream* st) { ++ int size = 0; ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ int offset = __ offset(); ++ if (is_load) { ++ switch (ireg) { ++ case Op_VecX: ++ __ vld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); ++ break; ++ case Op_VecY: ++ __ xvld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { // store ++ switch (ireg) { ++ case Op_VecX: ++ __ vst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); ++ break; ++ case Op_VecY: ++ __ xvst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++#ifndef PRODUCT ++ } else if (!do_size) { ++ if (is_load) { ++ switch (ireg) { ++ case Op_VecX: ++ st->print("vld %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); ++ break; ++ case Op_VecY: ++ st->print("xvld %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { // store ++ switch (ireg) { ++ case Op_VecX: ++ st->print("vst %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); ++ break; ++ case Op_VecY: ++ st->print("xvst %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++#endif ++ } ++ size += 4; ++ return size; ++} ++ ++static int vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, ++ int dst_offset, uint ireg, outputStream* st) { ++ int size = 0; ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ switch (ireg) { ++ case Op_VecX: ++ __ vld(F23, SP, src_offset); ++ __ vst(F23, SP, dst_offset); ++ break; ++ case Op_VecY: ++ __ xvld(F23, SP, src_offset); ++ __ xvst(F23, SP, dst_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++#ifndef PRODUCT ++ } else { ++ switch (ireg) { ++ case Op_VecX: ++ st->print("vld f23, %d(sp)\n\t" ++ "vst f23, %d(sp)\t# 128-bit mem-mem spill", ++ src_offset, dst_offset); ++ break; ++ case Op_VecY: ++ st->print("xvld f23, %d(sp)\n\t" ++ "xvst f23, %d(sp)\t# 256-bit mem-mem spill", ++ src_offset, dst_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++#endif ++ } ++ size += 8; ++ return size; ++} ++ ++uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { ++ // Get registers to move ++ OptoReg::Name src_second = ra_->get_reg_second(in(1)); ++ OptoReg::Name src_first = ra_->get_reg_first(in(1)); ++ OptoReg::Name dst_second = ra_->get_reg_second(this ); ++ OptoReg::Name dst_first = ra_->get_reg_first(this ); ++ ++ enum RC src_second_rc = rc_class(src_second); ++ enum RC src_first_rc = rc_class(src_first); ++ enum RC dst_second_rc = rc_class(dst_second); ++ enum RC dst_first_rc = rc_class(dst_first); ++ ++ assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); ++ ++ // Generate spill code! ++ ++ if( src_first == dst_first && src_second == dst_second ) ++ return 0; // Self copy, no move ++ ++ if (bottom_type()->isa_vect() != NULL) { ++ uint ireg = ideal_reg(); ++ assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); ++ if (src_first_rc == rc_stack && dst_first_rc == rc_stack) { ++ // mem -> mem ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); ++ } else if (src_first_rc == rc_float && dst_first_rc == rc_float) { ++ vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); ++ } else if (src_first_rc == rc_float && dst_first_rc == rc_stack) { ++ int stack_offset = ra_->reg2offset(dst_first); ++ vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); ++ } else if (src_first_rc == rc_stack && dst_first_rc == rc_float) { ++ int stack_offset = ra_->reg2offset(src_first); ++ vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); ++ } else { ++ ShouldNotReachHere(); ++ } ++ return 0; ++ } ++ ++ if (src_first_rc == rc_stack) { ++ // mem -> ++ if (dst_first_rc == rc_stack) { ++ // mem -> mem ++ assert(src_second != dst_first, "overlap"); ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ld_d(AT, Address(SP, src_offset)); ++ __ st_d(AT, Address(SP, dst_offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tld_d AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t" ++ "st_d AT, [SP + #%d]", ++ src_offset, dst_offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ // No pushl/popl, so: ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ld_w(AT, Address(SP, src_offset)); ++ __ st_w(AT, Address(SP, dst_offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tld_w AT, [SP + #%d] spill 2\n\t" ++ "st_w AT, [SP + #%d]\n\t", ++ src_offset, dst_offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // mem -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ld_d(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tld_d %s, [SP + #%d]\t# spill 3", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ if (this->ideal_reg() == Op_RegI) ++ __ ld_w(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++ else { ++ if (Assembler::is_simm(offset, 12)) { ++ __ ld_wu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++ } else { ++ __ li(AT, offset); ++ __ ldx_wu(as_Register(Matcher::_regEncode[dst_first]), SP, AT); ++ } ++ } ++#ifndef PRODUCT ++ } else { ++ if (this->ideal_reg() == Op_RegI) ++ st->print("\tld_w %s, [SP + #%d]\t# spill 4", ++ Matcher::regName[dst_first], ++ offset); ++ else ++ st->print("\tld_wu %s, [SP + #%d]\t# spill 5", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_float) { ++ // mem-> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ fld_d( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tfld_d %s, [SP + #%d]\t# spill 6", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ fld_s( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tfld_s %s, [SP + #%d]\t# spill 7", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } ++ } ++ return 0; ++ } else if (src_first_rc == rc_int) { ++ // gpr -> ++ if (dst_first_rc == rc_stack) { ++ // gpr -> mem ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ st_d(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tst_d %s, [SP + #%d] # spill 8", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ st_w(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tst_w %s, [SP + #%d]\t# spill 9", ++ Matcher::regName[src_first], offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // gpr -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ move(as_Register(Matcher::_regEncode[dst_first]), ++ as_Register(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\tmove(64bit) %s <-- %s\t# spill 10", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ return 0; ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ if (this->ideal_reg() == Op_RegI) ++ __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); ++ else ++ __ add_d(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("move(32-bit) %s <-- %s\t# spill 11", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ return 0; ++ } ++ } else if (dst_first_rc == rc_float) { ++ // gpr -> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ movgr2fr_d(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("movgr2fr_d %s, %s\t# spill 12", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ movgr2fr_w(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("movgr2fr_w %s, %s\t# spill 13", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } ++ } else if (src_first_rc == rc_float) { ++ // xmm -> ++ if (dst_first_rc == rc_stack) { ++ // xmm -> mem ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ fst_d( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) ); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("fst_d %s, [SP + #%d]\t# spill 14", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ fst_s(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("fst_s %s, [SP + #%d]\t# spill 15", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // xmm -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ movfr2gr_d( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("movfr2gr_d %s, %s\t# spill 16", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ movfr2gr_s( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("movfr2gr_s %s, %s\t# spill 17", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_float) { ++ // xmm -> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ fmov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("fmov_d %s <-- %s\t# spill 18", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ fmov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("fmov_s %s <-- %s\t# spill 19", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } ++ } ++ ++ assert(0," foo "); ++ Unimplemented(); ++ return 0; ++} ++ ++#ifndef PRODUCT ++void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ implementation( NULL, ra_, false, st ); ++} ++#endif ++ ++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ implementation( &cbuf, ra_, false, NULL ); ++} ++ ++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ Compile *C = ra_->C; ++ int framesize = C->frame_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ st->print_cr("addi_d SP, SP, %d # Rlease stack @ MachEpilogNode", framesize); ++ st->print("\t"); ++ st->print_cr("ld_d RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize); ++ st->print("\t"); ++ st->print_cr("ld_d FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2); ++ ++ if( do_polling() && C->is_method_compilation() ) { ++ st->print("\t"); ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ st->print_cr("ld_d AT, poll_offset[thread] #polling_page_address\n\t" ++ "ld_w AT, [AT]\t" ++ "# Safepoint: poll for GC"); ++ } else { ++ st->print_cr("Poll Safepoint # MachEpilogNode"); ++ } ++ } ++} ++#endif ++ ++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ Compile *C = ra_->C; ++ MacroAssembler _masm(&cbuf); ++ int framesize = C->frame_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ __ ld_d(RA, Address(SP, framesize - wordSize)); ++ __ ld_d(FP, Address(SP, framesize - wordSize * 2)); ++ if (Assembler::is_simm(framesize, 12)) { ++ __ addi_d(SP, SP, framesize); ++ } else { ++ __ li(AT, framesize); ++ __ add_d(SP, SP, AT); ++ } ++ ++ if (StackReservedPages > 0 && C->has_reserved_stack_access()) { ++ __ reserved_stack_check(); ++ } ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ if( do_polling() && C->is_method_compilation() ) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ __ ld_d(AT, thread, in_bytes(Thread::polling_page_offset())); ++ __ relocate(relocInfo::poll_return_type); ++ __ ld_w(AT, AT, 0); ++ } else { ++ __ li(AT, (long)os::get_polling_page()); ++ __ relocate(relocInfo::poll_return_type); ++ __ ld_w(AT, AT, 0); ++ } ++ } ++} ++ ++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); // too many variables; just compute it the hard way ++} ++ ++int MachEpilogNode::reloc() const { ++ return 0; // a large enough number ++} ++ ++const Pipeline * MachEpilogNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} ++ ++int MachEpilogNode::safepoint_offset() const { return 0; } ++ ++//============================================================================= ++ ++#ifndef PRODUCT ++void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_reg_first(this); ++ st->print("ADDI_D %s, SP, %d @BoxLockNode",Matcher::regName[reg],offset); ++} ++#endif ++ ++ ++uint BoxLockNode::size(PhaseRegAlloc *ra_) const { ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ ++ if (Assembler::is_simm(offset, 12)) ++ return 4; ++ else ++ return 3 * 4; ++} ++ ++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ MacroAssembler _masm(&cbuf); ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_encode(this); ++ ++ if (Assembler::is_simm(offset, 12)) { ++ __ addi_d(as_Register(reg), SP, offset); ++ } else { ++ __ lu12i_w(AT, Assembler::split_low20(offset >> 12)); ++ __ ori(AT, AT, Assembler::split_low12(offset)); ++ __ add_d(as_Register(reg), SP, AT); ++ } ++} ++ ++int MachCallRuntimeNode::ret_addr_offset() { ++ // pcaddu18i ++ // jirl ++ return NativeFarCall::instruction_size; ++} ++ ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const { ++ st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count); ++} ++#endif ++ ++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { ++ MacroAssembler _masm(&cbuf); ++ int i = 0; ++ for(i = 0; i < _count; i++) ++ __ nop(); ++} ++ ++uint MachNopNode::size(PhaseRegAlloc *) const { ++ return 4 * _count; ++} ++const Pipeline* MachNopNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} ++ ++//============================================================================= ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ st->print_cr("load_klass(T4, T0)"); ++ st->print_cr("\tbeq(T4, iCache, L)"); ++ st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)"); ++ st->print_cr(" L:"); ++} ++#endif ++ ++ ++void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ MacroAssembler _masm(&cbuf); ++ int ic_reg = Matcher::inline_cache_reg_encode(); ++ Label L; ++ Register receiver = T0; ++ Register iCache = as_Register(ic_reg); ++ ++ __ load_klass(T4, receiver); ++ __ beq(T4, iCache, L); ++ __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); ++ __ bind(L); ++} ++ ++uint MachUEPNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++ ++ ++//============================================================================= ++ ++const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask(); ++ ++int Compile::ConstantTable::calculate_table_base_offset() const { ++ return 0; // absolute addressing, no offset ++} ++ ++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } ++void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { ++ ShouldNotReachHere(); ++} ++ ++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { ++ Compile* C = ra_->C; ++ Compile::ConstantTable& constant_table = C->constant_table(); ++ MacroAssembler _masm(&cbuf); ++ ++ Register Rtoc = as_Register(ra_->get_encode(this)); ++ CodeSection* consts_section = cbuf.consts(); ++ int consts_size = consts_section->align_at_start(consts_section->size()); ++ assert(constant_table.size() == consts_size, "must be equal"); ++ ++ if (consts_section->size()) { ++ assert((CodeBuffer::SECT_CONSTS + 1) == CodeBuffer::SECT_INSTS, ++ "insts must be immediately follow consts"); ++ // Materialize the constant table base. ++ address baseaddr = cbuf.insts()->start() - consts_size + -(constant_table.table_base_offset()); ++ jint offs = (baseaddr - __ pc()) >> 2; ++ guarantee(Assembler::is_simm(offs, 20), "Not signed 20-bit offset"); ++ __ pcaddi(Rtoc, offs); ++ } ++} ++ ++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { ++ // pcaddi ++ return 1 * BytesPerInstWord; ++} ++ ++#ifndef PRODUCT ++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { ++ Register r = as_Register(ra_->get_encode(this)); ++ st->print("pcaddi %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name()); ++} ++#endif ++ ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ Compile* C = ra_->C; ++ ++ int framesize = C->frame_size_in_bytes(); ++ int bangsize = C->bang_size_in_bytes(); ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ // Calls to C2R adapters often do not accept exceptional returns. ++ // We require that their callers must bang for them. But be careful, because ++ // some VM calls (such as call site linkage) can use several kilobytes of ++ // stack. But the stack safety zone should account for that. ++ // See bugs 4446381, 4468289, 4497237. ++ if (C->need_stack_bang(bangsize)) { ++ st->print_cr("# stack bang"); st->print("\t"); ++ } ++ st->print("st_d RA, %d(SP) @ MachPrologNode\n\t", -wordSize); ++ st->print("st_d FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); ++ st->print("addi_d FP, SP, -%d \n\t", wordSize*2); ++ st->print("addi_d SP, SP, -%d \t",framesize); ++} ++#endif ++ ++ ++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ Compile* C = ra_->C; ++ MacroAssembler _masm(&cbuf); ++ ++ int framesize = C->frame_size_in_bytes(); ++ int bangsize = C->bang_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++#ifdef ASSERT ++ address start = __ pc(); ++#endif ++ ++ if (C->need_stack_bang(bangsize)) { ++ __ generate_stack_overflow_check(bangsize); ++ } ++ ++ if (Assembler::is_simm(-framesize, 12)) { ++ __ addi_d(SP, SP, -framesize); ++ } else { ++ __ li(AT, -framesize); ++ __ add_d(SP, SP, AT); ++ } ++ __ st_d(RA, Address(SP, framesize - wordSize)); ++ __ st_d(FP, Address(SP, framesize - wordSize * 2)); ++ if (Assembler::is_simm(framesize - wordSize * 2, 12)) { ++ __ addi_d(FP, SP, framesize - wordSize * 2); ++ } else { ++ __ li(AT, framesize - wordSize * 2); ++ __ add_d(FP, SP, AT); ++ } ++ ++ assert((__ pc() - start) >= 1 * BytesPerInstWord, "No enough room for patch_verified_entry"); ++ ++ C->set_frame_complete(cbuf.insts_size()); ++ if (C->has_mach_constant_base_node()) { ++ // NOTE: We set the table base offset here because users might be ++ // emitted before MachConstantBaseNode. ++ Compile::ConstantTable& constant_table = C->constant_table(); ++ constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); ++ } ++} ++ ++ ++uint MachPrologNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); // too many variables; just compute it the hard way ++} ++ ++int MachPrologNode::reloc() const { ++ return 0; // a large enough number ++} ++ ++bool is_CAS(int opcode) ++{ ++ switch(opcode) { ++ // We handle these ++ case Op_CompareAndSwapI: ++ case Op_CompareAndSwapL: ++ case Op_CompareAndSwapP: ++ case Op_CompareAndSwapN: ++ case Op_GetAndSetI: ++ case Op_GetAndSetL: ++ case Op_GetAndSetP: ++ case Op_GetAndSetN: ++ case Op_GetAndAddI: ++ case Op_GetAndAddL: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++bool use_AMO(int opcode) ++{ ++ switch(opcode) { ++ // We handle these ++ case Op_StoreI: ++ case Op_StoreL: ++ case Op_StoreP: ++ case Op_StoreN: ++ case Op_StoreNKlass: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++bool unnecessary_acquire(const Node *barrier) ++{ ++ assert(barrier->is_MemBar(), "expecting a membar"); ++ ++ if (UseBarriersForVolatile) { ++ // we need to plant a dbar ++ return false; ++ } ++ ++ MemBarNode* mb = barrier->as_MemBar(); ++ ++ if (mb->trailing_load_store()) { ++ Node* load_store = mb->in(MemBarNode::Precedent); ++ assert(load_store->is_LoadStore(), "unexpected graph shape"); ++ return is_CAS(load_store->Opcode()); ++ } ++ ++ return false; ++} ++ ++bool unnecessary_release(const Node *n) ++{ ++ assert((n->is_MemBar() && n->Opcode() == Op_MemBarRelease), "expecting a release membar"); ++ ++ if (UseBarriersForVolatile) { ++ // we need to plant a dbar ++ return false; ++ } ++ ++ MemBarNode *barrier = n->as_MemBar(); ++ ++ if (!barrier->leading()) { ++ return false; ++ } else { ++ Node* trailing = barrier->trailing_membar(); ++ MemBarNode* trailing_mb = trailing->as_MemBar(); ++ assert(trailing_mb->trailing(), "Not a trailing membar?"); ++ assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars"); ++ ++ Node* mem = trailing_mb->in(MemBarNode::Precedent); ++ if (mem->is_Store()) { ++ assert(mem->as_Store()->is_release(), ""); ++ assert(trailing_mb->Opcode() == Op_MemBarVolatile, ""); ++ return use_AMO(mem->Opcode()); ++ } else { ++ assert(mem->is_LoadStore(), ""); ++ assert(trailing_mb->Opcode() == Op_MemBarAcquire, ""); ++ return is_CAS(mem->Opcode()); ++ } ++ } ++ ++ return false; ++} ++ ++bool unnecessary_volatile(const Node *n) ++{ ++ // assert n->is_MemBar(); ++ if (UseBarriersForVolatile) { ++ // we need to plant a dbar ++ return false; ++ } ++ ++ MemBarNode *mbvol = n->as_MemBar(); ++ ++ bool release = false; ++ if (mbvol->trailing_store()) { ++ Node* mem = mbvol->in(MemBarNode::Precedent); ++ release = use_AMO(mem->Opcode()); ++ } ++ ++ assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), ""); ++#ifdef ASSERT ++ if (release) { ++ Node* leading = mbvol->leading_membar(); ++ assert(leading->Opcode() == Op_MemBarRelease, ""); ++ assert(leading->as_MemBar()->leading_store(), ""); ++ assert(leading->as_MemBar()->trailing_membar() == mbvol, ""); ++ } ++#endif ++ ++ return release; ++} ++ ++bool needs_releasing_store(const Node *n) ++{ ++ // assert n->is_Store(); ++ if (UseBarriersForVolatile) { ++ // we use a normal store and dbar combination ++ return false; ++ } ++ ++ StoreNode *st = n->as_Store(); ++ ++ return st->trailing_membar() != NULL; ++} ++ ++%} ++ ++//----------ENCODING BLOCK----------------------------------------------------- ++// This block specifies the encoding classes used by the compiler to output ++// byte streams. Encoding classes generate functions which are called by ++// Machine Instruction Nodes in order to generate the bit encoding of the ++// instruction. Operands specify their base encoding interface with the ++// interface keyword. There are currently supported four interfaces, ++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an ++// operand to generate a function which returns its register number when ++// queried. CONST_INTER causes an operand to generate a function which ++// returns the value of the constant when queried. MEMORY_INTER causes an ++// operand to generate four functions which return the Base Register, the ++// Index Register, the Scale Value, and the Offset Value of the operand when ++// queried. COND_INTER causes an operand to generate six functions which ++// return the encoding code (ie - encoding bits for the instruction) ++// associated with each basic boolean condition for a conditional instruction. ++// Instructions specify two basic values for encoding. They use the ++// ins_encode keyword to specify their encoding class (which must be one of ++// the class names specified in the encoding block), and they use the ++// opcode keyword to specify, in order, their primary, secondary, and ++// tertiary opcode. Only the opcode sections which a particular instruction ++// needs for encoding need to be specified. ++encode %{ ++ ++ enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf ++ MacroAssembler _masm(&cbuf); ++ // This is the instruction starting address for relocation info. ++ __ block_comment("Java_To_Runtime"); ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_call((address)$meth$$method); ++ %} ++ ++ enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL ++ // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine ++ // who we intended to call. ++ MacroAssembler _masm(&cbuf); ++ address addr = (address)$meth$$method; ++ address call; ++ __ block_comment("Java_Static_Call"); ++ ++ if ( !_method ) { ++ // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. ++ call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } else { ++ int method_index = resolved_method_index(cbuf); ++ RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) ++ : static_call_Relocation::spec(method_index); ++ call = __ trampoline_call(AddressLiteral(addr, rspec), &cbuf); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ // Emit stub for static call ++ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); ++ if (stub == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } ++ %} ++ ++ ++ // ++ // [Ref: LIR_Assembler::ic_call() ] ++ // ++ enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL ++ MacroAssembler _masm(&cbuf); ++ __ block_comment("Java_Dynamic_Call"); ++ address call = __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ %} ++ ++ ++ enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{ ++ Register result = $result$$Register; ++ Register sub = $sub$$Register; ++ Register super = $super$$Register; ++ Register length = $tmp$$Register; ++ Register tmp = T4; ++ Label miss; ++ ++ // result may be the same as sub ++ // 47c B40: # B21 B41 <- B20 Freq: 0.155379 ++ // 47c partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0 ++ // 4bc mov S2, NULL #@loadConP ++ // 4c0 beq S1, S2, B21 #@branchConP P=0.999999 C=-1.000000 ++ // ++ MacroAssembler _masm(&cbuf); ++ Label done; ++ __ check_klass_subtype_slow_path(sub, super, length, tmp, ++ NULL, &miss, ++ /*set_cond_codes:*/ true); ++ // Refer to X86_64's RDI ++ __ move(result, 0); ++ __ b(done); ++ ++ __ bind(miss); ++ __ li(result, 1); ++ __ bind(done); ++ %} ++ ++%} ++ ++ ++//---------LOONGARCH FRAME-------------------------------------------------------------- ++// Definition of frame structure and management information. ++// ++// S T A C K L A Y O U T Allocators stack-slot number ++// | (to get allocators register number ++// G Owned by | | v add SharedInfo::stack0) ++// r CALLER | | ++// o | +--------+ pad to even-align allocators stack-slot ++// w V | pad0 | numbers; owned by CALLER ++// t -----------+--------+----> Matcher::_in_arg_limit, unaligned ++// h ^ | in | 5 ++// | | args | 4 Holes in incoming args owned by SELF ++// | | old | | 3 ++// | | SP-+--------+----> Matcher::_old_SP, even aligned ++// v | | ret | 3 return address ++// Owned by +--------+ ++// Self | pad2 | 2 pad to align old SP ++// | +--------+ 1 ++// | | locks | 0 ++// | +--------+----> SharedInfo::stack0, even aligned ++// | | pad1 | 11 pad to align new SP ++// | +--------+ ++// | | | 10 ++// | | spills | 9 spills ++// V | | 8 (pad0 slot for callee) ++// -----------+--------+----> Matcher::_out_arg_limit, unaligned ++// ^ | out | 7 ++// | | args | 6 Holes in outgoing args owned by CALLEE ++// Owned by new | | ++// Callee SP-+--------+----> Matcher::_new_SP, even aligned ++// | | ++// ++// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is ++// known from SELF's arguments and the Java calling convention. ++// Region 6-7 is determined per call site. ++// Note 2: If the calling convention leaves holes in the incoming argument ++// area, those holes are owned by SELF. Holes in the outgoing area ++// are owned by the CALLEE. Holes should not be nessecary in the ++// incoming area, as the Java calling convention is completely under ++// the control of the AD file. Doubles can be sorted and packed to ++// avoid holes. Holes in the outgoing arguments may be nessecary for ++// varargs C calling conventions. ++// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is ++// even aligned with pad0 as needed. ++// Region 6 is even aligned. Region 6-7 is NOT even aligned; ++// region 6-11 is even aligned; it may be padded out more so that ++// the region from SP to FP meets the minimum stack alignment. ++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack ++// alignment. Region 11, pad1, may be dynamically extended so that ++// SP meets the minimum alignment. ++ ++ ++frame %{ ++ ++ stack_direction(TOWARDS_LOW); ++ ++ // These two registers define part of the calling convention ++ // between compiled code and the interpreter. ++ // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention ++ // for more information. ++ ++ inline_cache_reg(T1); // Inline Cache Register ++ interpreter_method_oop_reg(S3); // Method Oop Register when calling interpreter ++ ++ // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] ++ cisc_spilling_operand_name(indOffset32); ++ ++ // Number of stack slots consumed by locking an object ++ // generate Compile::sync_stack_slots ++ sync_stack_slots(2); ++ ++ frame_pointer(SP); ++ ++ // Interpreter stores its frame pointer in a register which is ++ // stored to the stack by I2CAdaptors. ++ // I2CAdaptors convert from interpreted java to compiled java. ++ ++ interpreter_frame_pointer(FP); ++ ++ // generate Matcher::stack_alignment ++ stack_alignment(StackAlignmentInBytes); //wordSize = sizeof(char*); ++ ++ // Number of stack slots between incoming argument block and the start of ++ // a new frame. The PROLOG must add this many slots to the stack. The ++ // EPILOG must remove this many slots. ++ in_preserve_stack_slots(4); //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp ++ ++ // Number of outgoing stack slots killed above the out_preserve_stack_slots ++ // for calls to C. Supports the var-args backing area for register parms. ++ varargs_C_out_slots_killed(0); ++ ++ // The after-PROLOG location of the return address. Location of ++ // return address specifies a type (REG or STACK) and a number ++ // representing the register number (i.e. - use a register name) or ++ // stack slot. ++ // Ret Addr is on stack in slot 0 if no locks or verification or alignment. ++ // Otherwise, it is above the locks and verification slot and alignment word ++ //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong)); ++ return_addr(REG RA); ++ ++ // Body of function which returns an integer array locating ++ // arguments either in registers or in stack slots. Passed an array ++ // of ideal registers called "sig" and a "length" count. Stack-slot ++ // offsets are based on outgoing arguments, i.e. a CALLER setting up ++ // arguments for a CALLEE. Incoming stack arguments are ++ // automatically biased by the preserve_stack_slots field above. ++ ++ ++ // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing) ++ // StartNode::calling_convention call this. ++ calling_convention %{ ++ SharedRuntime::java_calling_convention(sig_bt, regs, length, false); ++ %} ++ ++ ++ ++ ++ // Body of function which returns an integer array locating ++ // arguments either in registers or in stack slots. Passed an array ++ // of ideal registers called "sig" and a "length" count. Stack-slot ++ // offsets are based on outgoing arguments, i.e. a CALLER setting up ++ // arguments for a CALLEE. Incoming stack arguments are ++ // automatically biased by the preserve_stack_slots field above. ++ ++ ++ // SEE CallRuntimeNode::calling_convention for more information. ++ c_calling_convention %{ ++ (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); ++ %} ++ ++ ++ // Location of C & interpreter return values ++ // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR. ++ // SEE Matcher::match. ++ c_return_value %{ ++ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); ++ /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ ++ static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; ++ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num }; ++ return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); ++ %} ++ ++ // Location of return values ++ // register(s) contain(s) return value for Op_StartC2I and Op_Start. ++ // SEE Matcher::match. ++ ++ return_value %{ ++ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); ++ /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ ++ static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; ++ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num}; ++ return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); ++ %} ++ ++%} ++ ++//----------ATTRIBUTES--------------------------------------------------------- ++//----------Operand Attributes------------------------------------------------- ++op_attrib op_cost(0); // Required cost attribute ++ ++//----------Instruction Attributes--------------------------------------------- ++ins_attrib ins_cost(100); // Required cost attribute ++ins_attrib ins_size(32); // Required size attribute (in bits) ++ins_attrib ins_pc_relative(0); // Required PC Relative flag ++ins_attrib ins_short_branch(0); // Required flag: is this instruction a ++ // non-matching short branch variant of some ++ // long branch? ++ins_attrib ins_alignment(4); // Required alignment attribute (must be a power of 2) ++ // specifies the alignment that some part of the instruction (not ++ // necessarily the start) requires. If > 1, a compute_padding() ++ // function must be provided for the instruction ++ ++//----------OPERANDS----------------------------------------------------------- ++// Operand definitions must precede instruction definitions for correct parsing ++// in the ADLC because operands constitute user defined types which are used in ++// instruction definitions. ++ ++// Vectors ++ ++operand vecX() %{ ++ constraint(ALLOC_IN_RC(vectorx_reg)); ++ match(VecX); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand vecY() %{ ++ constraint(ALLOC_IN_RC(vectory_reg)); ++ match(VecY); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Flags register, used as output of compare instructions ++operand FlagsReg() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegFlags); ++ ++ format %{ "T0" %} ++ interface(REG_INTER); ++%} ++ ++//----------Simple Operands---------------------------------------------------- ++// TODO: Should we need to define some more special immediate number ? ++// Immediate Operands ++// Integer Immediate ++operand immI() %{ ++ match(ConI); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU1() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 1)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU2() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 3)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU3() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 7)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU4() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 15)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU5() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 31)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU6() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 63)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU8() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 255)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI10() %{ ++ predicate((-512 <= n->get_int()) && (n->get_int() <= 511)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI12() %{ ++ predicate((-2048 <= n->get_int()) && (n->get_int() <= 2047)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M65536() %{ ++ predicate(n->get_int() == -65536); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for decrement ++operand immI_M1() %{ ++ predicate(n->get_int() == -1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for zero ++operand immI_0() %{ ++ predicate(n->get_int() == 0); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_1() %{ ++ predicate(n->get_int() == 1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_2() %{ ++ predicate(n->get_int() == 2); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_16() %{ ++ predicate(n->get_int() == 16); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_24() %{ ++ predicate(n->get_int() == 24); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for long shifts ++operand immI_32() %{ ++ predicate(n->get_int() == 32); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for byte-wide masking ++operand immI_255() %{ ++ predicate(n->get_int() == 255); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_65535() %{ ++ predicate(n->get_int() == 65535); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_MaxI() %{ ++ predicate(n->get_int() == 2147483647); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M2047_2048() %{ ++ predicate((-2047 <= n->get_int()) && (n->get_int() <= 2048)); ++ match(ConI); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Valid scale values for addressing modes ++operand immI_0_3() %{ ++ predicate(0 <= n->get_int() && (n->get_int() <= 3)); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_31() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 31); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_4095() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 4095); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_1_4() %{ ++ predicate(1 <= n->get_int() && (n->get_int() <= 4)); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_32_63() %{ ++ predicate(n->get_int() >= 32 && n->get_int() <= 63); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M128_255() %{ ++ predicate((-128 <= n->get_int()) && (n->get_int() <= 255)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Operand for non-negtive integer mask ++operand immI_nonneg_mask() %{ ++ predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1)); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate ++operand immL() %{ ++ match(ConL); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immLU5() %{ ++ predicate((0 <= n->get_long()) && (n->get_long() <= 31)); ++ match(ConL); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL10() %{ ++ predicate((-512 <= n->get_long()) && (n->get_long() <= 511)); ++ match(ConL); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL12() %{ ++ predicate((-2048 <= n->get_long()) && (n->get_long() <= 2047)); ++ match(ConL); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate 32-bit signed ++operand immL32() ++%{ ++ predicate(n->get_long() == (int)n->get_long()); ++ match(ConL); ++ ++ op_cost(15); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 3..6 zero ++operand immL_M121() %{ ++ predicate(n->get_long() == -121L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 0..2 zero ++operand immL_M8() %{ ++ predicate(n->get_long() == -8L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 1..2 zero ++operand immL_M7() %{ ++ predicate(n->get_long() == -7L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 2 zero ++operand immL_M5() %{ ++ predicate(n->get_long() == -5L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 0..1 zero ++operand immL_M4() %{ ++ predicate(n->get_long() == -4L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate zero ++operand immL_0() %{ ++ predicate(n->get_long() == 0L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_7() %{ ++ predicate(n->get_long() == 7L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_MaxUI() %{ ++ predicate(n->get_long() == 0xFFFFFFFFL); ++ match(ConL); ++ op_cost(20); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_M2047_2048() %{ ++ predicate((-2047 <= n->get_long()) && (n->get_long() <= 2048)); ++ match(ConL); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_0_4095() %{ ++ predicate(n->get_long() >= 0 && n->get_long() <= 4095); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Operand for non-negtive long mask ++operand immL_nonneg_mask() %{ ++ predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1)); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immP() %{ ++ match(ConP); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// NULL Pointer Immediate ++operand immP_0() %{ ++ predicate(n->get_ptr() == 0); ++ match(ConP); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immP_no_oop_cheap() %{ ++ predicate(!n->bottom_type()->isa_oop_ptr()); ++ match(ConP); ++ ++ op_cost(5); ++ // formats are generated automatically for constants and base registers ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer for polling page ++operand immP_poll() %{ ++ predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page()); ++ match(ConP); ++ op_cost(5); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immN() %{ ++ match(ConN); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// NULL Pointer Immediate ++operand immN_0() %{ ++ predicate(n->get_narrowcon() == 0); ++ match(ConN); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immNKlass() %{ ++ match(ConNKlass); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Single-precision floating-point immediate ++operand immF() %{ ++ match(ConF); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Single-precision floating-point zero ++operand immF_0() %{ ++ predicate(jint_cast(n->getf()) == 0); ++ match(ConF); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Double-precision floating-point immediate ++operand immD() %{ ++ match(ConD); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Double-precision floating-point zero ++operand immD_0() %{ ++ predicate(jlong_cast(n->getd()) == 0); ++ match(ConD); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Register Operands ++// Integer Register ++operand mRegI() %{ ++ constraint(ALLOC_IN_RC(int_reg)); ++ match(RegI); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_Ax_mRegI() %{ ++ constraint(ALLOC_IN_RC(no_Ax_int_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mS0RegI() %{ ++ constraint(ALLOC_IN_RC(s0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S0" %} ++ interface(REG_INTER); ++%} ++ ++operand mS1RegI() %{ ++ constraint(ALLOC_IN_RC(s1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S1" %} ++ interface(REG_INTER); ++%} ++ ++operand mS3RegI() %{ ++ constraint(ALLOC_IN_RC(s3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S3" %} ++ interface(REG_INTER); ++%} ++ ++operand mS4RegI() %{ ++ constraint(ALLOC_IN_RC(s4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S4" %} ++ interface(REG_INTER); ++%} ++ ++operand mS5RegI() %{ ++ constraint(ALLOC_IN_RC(s5_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S5" %} ++ interface(REG_INTER); ++%} ++ ++operand mS6RegI() %{ ++ constraint(ALLOC_IN_RC(s6_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S6" %} ++ interface(REG_INTER); ++%} ++ ++operand mS7RegI() %{ ++ constraint(ALLOC_IN_RC(s7_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S7" %} ++ interface(REG_INTER); ++%} ++ ++ ++operand mT0RegI() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T0" %} ++ interface(REG_INTER); ++%} ++ ++operand mT1RegI() %{ ++ constraint(ALLOC_IN_RC(t1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T1" %} ++ interface(REG_INTER); ++%} ++ ++operand mT2RegI() %{ ++ constraint(ALLOC_IN_RC(t2_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T2" %} ++ interface(REG_INTER); ++%} ++ ++operand mT3RegI() %{ ++ constraint(ALLOC_IN_RC(t3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T3" %} ++ interface(REG_INTER); ++%} ++ ++operand mT8RegI() %{ ++ constraint(ALLOC_IN_RC(t8_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T8" %} ++ interface(REG_INTER); ++%} ++ ++operand mT4RegI() %{ ++ constraint(ALLOC_IN_RC(t4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T4" %} ++ interface(REG_INTER); ++%} ++ ++operand mA0RegI() %{ ++ constraint(ALLOC_IN_RC(a0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A0" %} ++ interface(REG_INTER); ++%} ++ ++operand mA1RegI() %{ ++ constraint(ALLOC_IN_RC(a1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A1" %} ++ interface(REG_INTER); ++%} ++ ++operand mA2RegI() %{ ++ constraint(ALLOC_IN_RC(a2_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A2" %} ++ interface(REG_INTER); ++%} ++ ++operand mA3RegI() %{ ++ constraint(ALLOC_IN_RC(a3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A3" %} ++ interface(REG_INTER); ++%} ++ ++operand mA4RegI() %{ ++ constraint(ALLOC_IN_RC(a4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A4" %} ++ interface(REG_INTER); ++%} ++ ++operand mA5RegI() %{ ++ constraint(ALLOC_IN_RC(a5_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A5" %} ++ interface(REG_INTER); ++%} ++ ++operand mA6RegI() %{ ++ constraint(ALLOC_IN_RC(a6_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A6" %} ++ interface(REG_INTER); ++%} ++ ++operand mA7RegI() %{ ++ constraint(ALLOC_IN_RC(a7_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A7" %} ++ interface(REG_INTER); ++%} ++ ++operand mRegN() %{ ++ constraint(ALLOC_IN_RC(int_reg)); ++ match(RegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0_RegN() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1_RegN() %{ ++ constraint(ALLOC_IN_RC(t1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3_RegN() %{ ++ constraint(ALLOC_IN_RC(t3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8_RegN() %{ ++ constraint(ALLOC_IN_RC(t8_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0_RegN() %{ ++ constraint(ALLOC_IN_RC(a0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a1_RegN() %{ ++ constraint(ALLOC_IN_RC(a1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2_RegN() %{ ++ constraint(ALLOC_IN_RC(a2_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3_RegN() %{ ++ constraint(ALLOC_IN_RC(a3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4_RegN() %{ ++ constraint(ALLOC_IN_RC(a4_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a5_RegN() %{ ++ constraint(ALLOC_IN_RC(a5_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6_RegN() %{ ++ constraint(ALLOC_IN_RC(a6_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7_RegN() %{ ++ constraint(ALLOC_IN_RC(a7_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s0_RegN() %{ ++ constraint(ALLOC_IN_RC(s0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1_RegN() %{ ++ constraint(ALLOC_IN_RC(s1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s2_RegN() %{ ++ constraint(ALLOC_IN_RC(s2_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3_RegN() %{ ++ constraint(ALLOC_IN_RC(s3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4_RegN() %{ ++ constraint(ALLOC_IN_RC(s4_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s5_RegN() %{ ++ constraint(ALLOC_IN_RC(s5_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s6_RegN() %{ ++ constraint(ALLOC_IN_RC(s6_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7_RegN() %{ ++ constraint(ALLOC_IN_RC(s7_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Pointer Register ++operand mRegP() %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(RegP); ++ match(a0_RegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_T8_mRegP() %{ ++ constraint(ALLOC_IN_RC(no_T8_p_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_Ax_mRegP() %{ ++ constraint(ALLOC_IN_RC(no_Ax_p_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s4_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s5_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s5_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s6_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s6_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s7_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t2_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t2_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t8_long_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a2_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a4_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++ ++operand a5_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a5_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a6_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a7_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(v0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(v1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mRegL() %{ ++ constraint(ALLOC_IN_RC(long_reg)); ++ match(RegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mRegI2L(mRegI reg) %{ ++ match(ConvI2L reg); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mRegL2I(mRegL reg) %{ ++ match(ConvL2I reg); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0RegL() %{ ++ constraint(ALLOC_IN_RC(v0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1RegL() %{ ++ constraint(ALLOC_IN_RC(v1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0RegL() %{ ++ constraint(ALLOC_IN_RC(a0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ "A0" %} ++ interface(REG_INTER); ++%} ++ ++operand a1RegL() %{ ++ constraint(ALLOC_IN_RC(a1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2RegL() %{ ++ constraint(ALLOC_IN_RC(a2_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3RegL() %{ ++ constraint(ALLOC_IN_RC(a3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0RegL() %{ ++ constraint(ALLOC_IN_RC(t0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1RegL() %{ ++ constraint(ALLOC_IN_RC(t1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3RegL() %{ ++ constraint(ALLOC_IN_RC(t3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8RegL() %{ ++ constraint(ALLOC_IN_RC(t8_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4RegL() %{ ++ constraint(ALLOC_IN_RC(a4_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a5RegL() %{ ++ constraint(ALLOC_IN_RC(a5_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6RegL() %{ ++ constraint(ALLOC_IN_RC(a6_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7RegL() %{ ++ constraint(ALLOC_IN_RC(a7_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s0RegL() %{ ++ constraint(ALLOC_IN_RC(s0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1RegL() %{ ++ constraint(ALLOC_IN_RC(s1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3RegL() %{ ++ constraint(ALLOC_IN_RC(s3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4RegL() %{ ++ constraint(ALLOC_IN_RC(s4_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7RegL() %{ ++ constraint(ALLOC_IN_RC(s7_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Floating register operands ++operand regF() %{ ++ constraint(ALLOC_IN_RC(flt_reg)); ++ match(RegF); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//Double Precision Floating register operands ++operand regD() %{ ++ constraint(ALLOC_IN_RC(dbl_reg)); ++ match(RegD); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//----------Memory Operands---------------------------------------------------- ++// Indirect Memory Operand ++operand indirect(mRegP reg) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(reg); ++ ++ format %{ "[$reg] @ indirect" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Plus Short Offset Operand ++operand indOffset12(mRegP reg, immL12 off) ++%{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP reg off); ++ ++ op_cost(10); ++ format %{ "[$reg + $off (12-bit)] @ indOffset12" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indOffset12I2L(mRegP reg, immI12 off) ++%{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP reg (ConvI2L off)); ++ ++ op_cost(10); ++ format %{ "[$reg + $off (12-bit)] @ indOffset12I2L" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// Indirect Memory Plus Index Register ++operand indIndex(mRegP addr, mRegL index) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP addr index); ++ ++ op_cost(20); ++ format %{"[$addr + $index] @ indIndex" %} ++ interface(MEMORY_INTER) %{ ++ base($addr); ++ index($index); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++operand indIndexI2L(mRegP reg, mRegI ireg) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg (ConvI2L ireg)); ++ op_cost(10); ++ format %{ "[$reg + $ireg] @ indIndexI2L" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($ireg); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Operand ++operand indirectNarrow(mRegN reg) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(DecodeN reg); ++ ++ format %{ "[$reg] @ indirectNarrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Plus Short Offset Operand ++operand indOffset12Narrow(mRegN reg, immL12 off) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(AddP (DecodeN reg) off); ++ ++ format %{ "[$reg + $off (12-bit)] @ indOffset12Narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++//----------Conditional Branch Operands---------------------------------------- ++// Comparison Op - This is the operation of the comparison, and is limited to ++// the following set of codes: ++// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) ++// ++// Other attributes of the comparison, such as unsignedness, are specified ++// by the comparison instruction that sets a condition code flags register. ++// That result is represented by a flags operand whose subtype is appropriate ++// to the unsignedness (etc.) of the comparison. ++// ++// Later, the instruction which matches both the Comparison Op (a Bool) and ++// the flags (produced by the Cmp) specifies the coding of the comparison op ++// by matching a specific subtype of Bool operand below, such as cmpOp. ++ ++// Comparision Code ++operand cmpOp() %{ ++ match(Bool); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x01); ++ not_equal(0x02); ++ greater(0x03); ++ greater_equal(0x04); ++ less(0x05); ++ less_equal(0x06); ++ overflow(0x7); ++ no_overflow(0x8); ++ %} ++%} ++ ++operand cmpOpEqNe() %{ ++ match(Bool); ++ predicate(n->as_Bool()->_test._test == BoolTest::ne ++ || n->as_Bool()->_test._test == BoolTest::eq); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x01); ++ not_equal(0x02); ++ greater(0x03); ++ greater_equal(0x04); ++ less(0x05); ++ less_equal(0x06); ++ overflow(0x7); ++ no_overflow(0x8); ++ %} ++%} ++ ++//----------Special Memory Operands-------------------------------------------- ++// Stack Slot Operand - This operand is used for loading and storing temporary ++// values on the stack where a match requires a value to ++// flow through memory. ++operand stackSlotP(sRegP reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotI(sRegI reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotF(sRegF reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotD(sRegD reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotL(sRegL reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++ ++//------------------------OPERAND CLASSES-------------------------------------- ++opclass memory( indirect, indOffset12, indOffset12I2L, indIndex, indIndexI2L, ++ indirectNarrow, indOffset12Narrow); ++opclass memory_loadRange(indOffset12, indirect); ++ ++opclass mRegLorI2L(mRegI2L, mRegL); ++opclass mRegIorL2I( mRegI, mRegL2I); ++ ++//----------PIPELINE----------------------------------------------------------- ++// Rules which define the behavior of the target architectures pipeline. ++ ++pipeline %{ ++ ++ //----------ATTRIBUTES--------------------------------------------------------- ++ attributes %{ ++ fixed_size_instructions; // Fixed size instructions ++ max_instructions_per_bundle = 1; // 1 instruction per bundle ++ max_bundles_per_cycle = 4; // Up to 4 bundles per cycle ++ bundle_unit_size=4; ++ instruction_unit_size = 4; // An instruction is 4 bytes long ++ instruction_fetch_unit_size = 16; // The processor fetches one line ++ instruction_fetch_units = 1; // of 16 bytes ++ ++ // List of nop instructions ++ nops( MachNop ); ++ %} ++ ++ //----------RESOURCES---------------------------------------------------------- ++ // Resources are the functional units available to the machine ++ ++ resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4, ALU1, ALU2, ALU = ALU1 | ALU2, FPU1, FPU2, FPU = FPU1 | FPU2, MEM, BR); ++ ++ //----------PIPELINE DESCRIPTION----------------------------------------------- ++ // Pipeline Description specifies the stages in the machine's pipeline ++ ++ // IF: fetch ++ // ID: decode ++ // RD: read ++ // CA: caculate ++ // WB: write back ++ // CM: commit ++ ++ pipe_desc(IF, ID, RD, CA, WB, CM); ++ ++ ++ //----------PIPELINE CLASSES--------------------------------------------------- ++ // Pipeline Classes describe the stages in which input and output are ++ // referenced by the hardware pipeline. ++ ++ //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{ ++ single_instruction; ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+1; ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.19 Integer mult operation : dst <-- reg1 mult reg2 ++ pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+5; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.19 Integer div operation : dst <-- reg1 div reg2 ++ pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.19 Integer mod operation : dst <-- reg1 mod reg2 ++ pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{ ++ instruction_count(2); ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{ ++ instruction_count(2); ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16 ++ pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{ ++ instruction_count(2); ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //no.16 load Long from memory : ++ pipe_class ialu_loadL(mRegL dst, memory mem) %{ ++ instruction_count(2); ++ mem : RD(read); ++ dst : WB(write)+5; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.17 Store Long to Memory : ++ pipe_class ialu_storeL(mRegL src, memory mem) %{ ++ instruction_count(2); ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16 ++ pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{ ++ single_instruction; ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.3 Integer move operation : dst <-- reg ++ pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.4 No instructions : do nothing ++ pipe_class empty( ) %{ ++ instruction_count(0); ++ %} ++ ++ //No.5 UnConditional branch : ++ pipe_class pipe_jump( label labl ) %{ ++ multiple_bundles; ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++ //No.6 ALU Conditional branch : ++ pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++ //no.7 load integer from memory : ++ pipe_class ialu_loadI(mRegI dst, memory mem) %{ ++ mem : RD(read); ++ dst : WB(write)+3; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.8 Store Integer to Memory : ++ pipe_class ialu_storeI(mRegI src, memory mem) %{ ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ ++ //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU : CA; ++ %} ++ ++ //No.22 Floating div operation : dst <-- reg1 div reg2 ++ pipe_class fpu_div(regF dst, regF src1, regF src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU2 : CA; ++ %} ++ ++ pipe_class fcvt_I2D(regD dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU1 : CA; ++ %} ++ ++ pipe_class fcvt_D2I(mRegI dst, regD src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU1 : CA; ++ %} ++ ++ pipe_class pipe_mfc1(mRegI dst, regD src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ pipe_class pipe_mtc1(regD dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ MEM : RD(5); ++ %} ++ ++ //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2 ++ pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU2 : CA; ++ %} ++ ++ //No.11 Load Floating from Memory : ++ pipe_class fpu_loadF(regF dst, memory mem) %{ ++ instruction_count(1); ++ mem : RD(read); ++ dst : WB(write)+3; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.12 Store Floating to Memory : ++ pipe_class fpu_storeF(regF src, memory mem) %{ ++ instruction_count(1); ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.13 FPU Conditional branch : ++ pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++//No.14 Floating FPU reg operation : dst <-- op reg ++ pipe_class fpu1_regF(regF dst, regF src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU : CA; ++ %} ++ ++ pipe_class long_memory_op() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(30); ++ %} ++ ++ pipe_class simple_call() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(200); ++ BR : RD; ++ %} ++ ++ pipe_class call() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(200); ++ %} ++ ++ //FIXME: ++ //No.9 Piple slow : for multi-instructions ++ pipe_class pipe_slow( ) %{ ++ instruction_count(20); ++ force_serialization; ++ multiple_bundles; ++ fixed_latency(50); ++ %} ++ ++%} ++ ++ ++ ++//----------INSTRUCTIONS------------------------------------------------------- ++// ++// match -- States which machine-independent subtree may be replaced ++// by this instruction. ++// ins_cost -- The estimated cost of this instruction is used by instruction ++// selection to identify a minimum cost tree of machine ++// instructions that matches a tree of machine-independent ++// instructions. ++// format -- A string providing the disassembly for this instruction. ++// The value of an instruction's operand may be inserted ++// by referring to it with a '$' prefix. ++// opcode -- Three instruction opcodes may be provided. These are referred ++// to within an encode class as $primary, $secondary, and $tertiary ++// respectively. The primary opcode is commonly used to ++// indicate the type of machine instruction, while secondary ++// and tertiary are often used for prefix options or addressing ++// modes. ++// ins_encode -- A list of encode classes with parameters. The encode class ++// name must have been defined in an 'enc_class' specification ++// in the encode section of the architecture description. ++ ++ ++// Load Integer ++instruct loadI(mRegI dst, memory mem) %{ ++ match(Set dst (LoadI mem)); ++ ++ ins_cost(125); ++ format %{ "ld_w $dst, $mem #@loadI" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadI_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadI mem))); ++ ++ ins_cost(125); ++ format %{ "ld_w $dst, $mem #@loadI_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Integer (32 bit signed) to Byte (8 bit signed) ++instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{ ++ match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); ++ ++ ins_cost(125); ++ format %{ "ld_b $dst, $mem\t# int -> byte #@loadI2B" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) ++instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI (LoadI mem) mask)); ++ ++ ins_cost(125); ++ format %{ "ld_bu $dst, $mem\t# int -> ubyte #@loadI2UB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Short (16 bit signed) ++instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{ ++ match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); ++ ++ ins_cost(125); ++ format %{ "ld_h $dst, $mem\t# int -> short #@loadI2S" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) ++instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{ ++ match(Set dst (AndI (LoadI mem) mask)); ++ ++ ins_cost(125); ++ format %{ "ld_hu $dst, $mem\t# int -> ushort/char #@loadI2US" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Long. ++instruct loadL(mRegL dst, memory mem) %{ ++// predicate(!((LoadLNode*)n)->require_atomic_access()); ++ match(Set dst (LoadL mem)); ++ ++ ins_cost(250); ++ format %{ "ld_d $dst, $mem #@loadL" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadL ); ++%} ++ ++// Load Long - UNaligned ++instruct loadL_unaligned(mRegL dst, memory mem) %{ ++ match(Set dst (LoadL_unaligned mem)); ++ ++ // FIXME: Need more effective ldl/ldr ++ ins_cost(450); ++ format %{ "ld_d $dst, $mem #@loadL_unaligned\n\t" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadL ); ++%} ++ ++// Store Long ++instruct storeL_reg(memory mem, mRegL src) %{ ++ match(Set mem (StoreL mem src)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(200); ++ format %{ "st_d $mem, $src #@storeL_reg\n" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++instruct storeL_reg_volatile(indirect mem, mRegL src) %{ ++ match(Set mem (StoreL mem src)); ++ ++ ins_cost(205); ++ format %{ "amswap_db_d R0, $src, $mem #@storeL_reg\n" %} ++ ins_encode %{ ++ __ amswap_db_d(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++instruct storeL_immL_0(memory mem, immL_0 zero) %{ ++ match(Set mem (StoreL mem zero)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(180); ++ format %{ "st_d zero, $mem #@storeL_immL_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++instruct storeL_immL_0_volatile(indirect mem, immL_0 zero) %{ ++ match(Set mem (StoreL mem zero)); ++ ++ ins_cost(185); ++ format %{ "amswap_db_d AT, R0, $mem #@storeL_immL_0" %} ++ ins_encode %{ ++ __ amswap_db_d(AT, R0, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++// Load Compressed Pointer ++instruct loadN(mRegN dst, memory mem) ++%{ ++ match(Set dst (LoadN mem)); ++ ++ ins_cost(125); // XXX ++ format %{ "ld_wu $dst, $mem\t# compressed ptr @ loadN" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++instruct loadN2P(mRegP dst, memory mem) ++%{ ++ match(Set dst (DecodeN (LoadN mem))); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "ld_wu $dst, $mem\t# @ loadN2P" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++// Load Pointer ++instruct loadP(mRegP dst, memory mem) %{ ++ match(Set dst (LoadP mem)); ++ ++ ins_cost(125); ++ format %{ "ld_d $dst, $mem #@loadP" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Klass Pointer ++instruct loadKlass(mRegP dst, memory mem) %{ ++ match(Set dst (LoadKlass mem)); ++ ++ ins_cost(125); ++ format %{ "MOV $dst,$mem @ loadKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load narrow Klass Pointer ++instruct loadNKlass(mRegN dst, memory mem) ++%{ ++ match(Set dst (LoadNKlass mem)); ++ ++ ins_cost(125); // XXX ++ format %{ "ld_wu $dst, $mem\t# compressed klass ptr @ loadNKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++instruct loadN2PKlass(mRegP dst, memory mem) ++%{ ++ match(Set dst (DecodeNKlass (LoadNKlass mem))); ++ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "ld_wu $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++// Load Constant ++instruct loadConI(mRegI dst, immI src) %{ ++ match(Set dst src); ++ ++ ins_cost(120); ++ format %{ "mov $dst, $src #@loadConI" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ int value = $src$$constant; ++ __ li(dst, value); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct loadConL(mRegL dst, immL src) %{ ++ match(Set dst src); ++ ins_cost(120); ++ format %{ "li $dst, $src @ loadConL" %} ++ ins_encode %{ ++ __ li($dst$$Register, $src$$constant); ++ %} ++ ins_pipe(ialu_regL_regL); ++%} ++ ++// Load Range ++instruct loadRange(mRegI dst, memory_loadRange mem) %{ ++ match(Set dst (LoadRange mem)); ++ ++ ins_cost(125); ++ format %{ "MOV $dst,$mem @ loadRange" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct storeP(memory mem, mRegP src ) %{ ++ match(Set mem (StoreP mem src)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(125); ++ format %{ "st_d $src, $mem #@storeP" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP_volatile(indirect mem, mRegP src ) %{ ++ match(Set mem (StoreP mem src)); ++ ++ ins_cost(130); ++ format %{ "amswap_db_d R0, $src, $mem #@storeP" %} ++ ins_encode %{ ++ __ amswap_db_d(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store NULL Pointer, mark word, or other simple pointer constant. ++instruct storeImmP_immP_0(memory mem, immP_0 zero) %{ ++ match(Set mem (StoreP mem zero)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(125); ++ format %{ "mov $mem, $zero #@storeImmP_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeImmP_immP_0_volatile(indirect mem, immP_0 zero) %{ ++ match(Set mem (StoreP mem zero)); ++ ++ ins_cost(130); ++ format %{ "amswap_db_d AT, R0, $mem #@storeImmP_0" %} ++ ins_encode %{ ++ __ amswap_db_d(AT, R0, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Compressed Pointer ++instruct storeN(memory mem, mRegN src) ++%{ ++ match(Set mem (StoreN mem src)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(125); // XXX ++ format %{ "st_w $mem, $src\t# compressed ptr @ storeN" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeN_volatile(indirect mem, mRegN src) ++%{ ++ match(Set mem (StoreN mem src)); ++ ++ ins_cost(130); // XXX ++ format %{ "amswap_db_w R0, $src, $mem # compressed ptr @ storeN" %} ++ ins_encode %{ ++ __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2N(memory mem, mRegP src) ++%{ ++ match(Set mem (StoreN mem (EncodeP src))); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0 && !needs_releasing_store(n)); ++ ++ ins_cost(125); // XXX ++ format %{ "st_w $mem, $src\t# @ storeP2N" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2N_volatile(indirect mem, mRegP src) ++%{ ++ match(Set mem (StoreN mem (EncodeP src))); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ ++ ins_cost(130); // XXX ++ format %{ "amswap_db_w R0, $src, $mem # @ storeP2N" %} ++ ins_encode %{ ++ __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeNKlass(memory mem, mRegN src) ++%{ ++ match(Set mem (StoreNKlass mem src)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(125); // XXX ++ format %{ "st_w $mem, $src\t# compressed klass ptr @ storeNKlass" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeNKlass_volatile(indirect mem, mRegN src) ++%{ ++ match(Set mem (StoreNKlass mem src)); ++ ++ ins_cost(130); ++ format %{ "amswap_db_w R0, $src, $mem # compressed klass ptr @ storeNKlass" %} ++ ins_encode %{ ++ __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2NKlass(memory mem, mRegP src) ++%{ ++ match(Set mem (StoreNKlass mem (EncodePKlass src))); ++ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0 && !needs_releasing_store(n)); ++ ++ ins_cost(125); // XXX ++ format %{ "st_w $mem, $src\t# @ storeP2NKlass" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2NKlass_volatile(indirect mem, mRegP src) ++%{ ++ match(Set mem (StoreNKlass mem (EncodePKlass src))); ++ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); ++ ++ ins_cost(130); ++ format %{ "amswap_db_w R0, $src, $mem # @ storeP2NKlass" %} ++ ins_encode %{ ++ __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeImmN_immN_0(memory mem, immN_0 zero) ++%{ ++ match(Set mem (StoreN mem zero)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(125); // XXX ++ format %{ "storeN0 zero, $mem\t# compressed ptr" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeImmN_immN_0_volatile(indirect mem, immN_0 zero) ++%{ ++ match(Set mem (StoreN mem zero)); ++ ++ ins_cost(130); // XXX ++ format %{ "amswap_db_w AT, R0, $mem # compressed ptr" %} ++ ins_encode %{ ++ __ amswap_db_w(AT, R0, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Byte ++instruct storeB_immB_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreB mem zero)); ++ ++ format %{ "mov $mem, zero #@storeB_immB_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeB(memory mem, mRegIorL2I src) %{ ++ match(Set mem (StoreB mem src)); ++ ++ ins_cost(125); ++ format %{ "st_b $src, $mem #@storeB" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Byte (8bit signed) ++instruct loadB(mRegI dst, memory mem) %{ ++ match(Set dst (LoadB mem)); ++ ++ ins_cost(125); ++ format %{ "ld_b $dst, $mem #@loadB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadB_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadB mem))); ++ ++ ins_cost(125); ++ format %{ "ld_b $dst, $mem #@loadB_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Byte (8bit UNsigned) ++instruct loadUB(mRegI dst, memory mem) %{ ++ match(Set dst (LoadUB mem)); ++ ++ ins_cost(125); ++ format %{ "ld_bu $dst, $mem #@loadUB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadUB_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadUB mem))); ++ ++ ins_cost(125); ++ format %{ "ld_bu $dst, $mem #@loadUB_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Short (16bit signed) ++instruct loadS(mRegI dst, memory mem) %{ ++ match(Set dst (LoadS mem)); ++ ++ ins_cost(125); ++ format %{ "ld_h $dst, $mem #@loadS" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Short (16 bit signed) to Byte (8 bit signed) ++instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{ ++ match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); ++ ++ ins_cost(125); ++ format %{ "ld_b $dst, $mem\t# short -> byte #@loadS2B" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct loadS_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadS mem))); ++ ++ ins_cost(125); ++ format %{ "ld_h $dst, $mem #@loadS_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Store Integer Immediate ++instruct storeI_immI_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreI mem zero)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(120); ++ format %{ "mov $mem, zero #@storeI_immI_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeI_immI_0_volatile(indirect mem, immI_0 zero) %{ ++ match(Set mem (StoreI mem zero)); ++ ++ ins_cost(125); ++ format %{ "amswap_db_w AT, R0, $mem #@storeI_immI_0" %} ++ ins_encode %{ ++ __ amswap_db_w(AT, R0, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Integer ++instruct storeI(memory mem, mRegIorL2I src) %{ ++ match(Set mem (StoreI mem src)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(125); ++ format %{ "st_w $mem, $src #@storeI" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeI_volatile(indirect mem, mRegIorL2I src) %{ ++ match(Set mem (StoreI mem src)); ++ ++ ins_cost(130); ++ format %{ "amswap_db_w R0, $src, $mem #@storeI" %} ++ ins_encode %{ ++ __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Float ++instruct loadF(regF dst, memory mem) %{ ++ match(Set dst (LoadF mem)); ++ ++ ins_cost(150); ++ format %{ "loadF $dst, $mem #@loadF" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_FLOAT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadConP_general(mRegP dst, immP src) %{ ++ match(Set dst src); ++ ++ ins_cost(120); ++ format %{ "li $dst, $src #@loadConP_general" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ long* value = (long*)$src$$constant; ++ ++ if($src->constant_reloc() == relocInfo::metadata_type){ ++ int klass_index = __ oop_recorder()->find_index((Klass*)value); ++ RelocationHolder rspec = metadata_Relocation::spec(klass_index); ++ ++ __ relocate(rspec); ++ __ patchable_li52(dst, (long)value); ++ } else if($src->constant_reloc() == relocInfo::oop_type){ ++ int oop_index = __ oop_recorder()->find_index((jobject)value); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ ++ __ relocate(rspec); ++ __ patchable_li52(dst, (long)value); ++ } else if ($src->constant_reloc() == relocInfo::none) { ++ __ li(dst, (long)value); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{ ++ match(Set dst src); ++ ++ ins_cost(80); ++ format %{ "li $dst, $src @ loadConP_no_oop_cheap" %} ++ ++ ins_encode %{ ++ if ($src->constant_reloc() == relocInfo::metadata_type) { ++ __ mov_metadata($dst$$Register, (Metadata*)$src$$constant); ++ } else { ++ __ li($dst$$Register, $src$$constant); ++ } ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++ ++instruct loadConP_poll(mRegP dst, immP_poll src) %{ ++ match(Set dst src); ++ ++ ins_cost(50); ++ format %{ "li $dst, $src #@loadConP_poll" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ intptr_t value = (intptr_t)$src$$constant; ++ ++ __ li(dst, (jlong)value); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConP_immP_0(mRegP dst, immP_0 src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(50); ++ format %{ "mov $dst, R0\t# ptr" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ __ add_d(dst_reg, R0, R0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConN_immN_0(mRegN dst, immN_0 src) %{ ++ match(Set dst src); ++ format %{ "move $dst, R0\t# compressed NULL ptr" %} ++ ins_encode %{ ++ __ move($dst$$Register, R0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConN(mRegN dst, immN src) %{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "li $dst, $src\t# compressed ptr @ loadConN" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ __ set_narrow_oop(dst, (jobject)$src$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); // XXX ++%} ++ ++instruct loadConNKlass(mRegN dst, immNKlass src) %{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "li $dst, $src\t# compressed klass ptr @ loadConNKlass" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ __ set_narrow_klass(dst, (Klass*)$src$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); // XXX ++%} ++ ++//FIXME ++// Tail Call; Jump from runtime stub to Java code. ++// Also known as an 'interprocedural jump'. ++// Target of jump will eventually return to caller. ++// TailJump below removes the return address. ++instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{ ++ match(TailCall jump_target method_oop ); ++ ins_cost(300); ++ format %{ "JMP $jump_target \t# @TailCalljmpInd" %} ++ ++ ins_encode %{ ++ Register target = $jump_target$$Register; ++ Register oop = $method_oop$$Register; ++ ++ // RA will be used in generate_forward_exception() ++ __ push(RA); ++ ++ __ move(S3, oop); ++ __ jr(target); ++ %} ++ ++ ins_pipe( pipe_jump ); ++%} ++ ++// Create exception oop: created by stack-crawling runtime code. ++// Created exception is now available to this handler, and is setup ++// just prior to jumping to this handler. No code emitted. ++instruct CreateException( a0_RegP ex_oop ) ++%{ ++ match(Set ex_oop (CreateEx)); ++ ++ // use the following format syntax ++ format %{ "# exception oop is in A0; no code emitted @CreateException" %} ++ ins_encode %{ ++ // X86 leaves this function empty ++ __ block_comment("CreateException is empty in LA"); ++ %} ++ ins_pipe( empty ); ++// ins_pipe( pipe_jump ); ++%} ++ ++ ++/* The mechanism of exception handling is clear now. ++ ++- Common try/catch: ++ [stubGenerator_loongarch.cpp] generate_forward_exception() ++ |- V0, V1 are created ++ |- T4 <= SharedRuntime::exception_handler_for_return_address ++ `- jr T4 ++ `- the caller's exception_handler ++ `- jr OptoRuntime::exception_blob ++ `- here ++- Rethrow(e.g. 'unwind'): ++ * The callee: ++ |- an exception is triggered during execution ++ `- exits the callee method through RethrowException node ++ |- The callee pushes exception_oop(T0) and exception_pc(RA) ++ `- The callee jumps to OptoRuntime::rethrow_stub() ++ * In OptoRuntime::rethrow_stub: ++ |- The VM calls _rethrow_Java to determine the return address in the caller method ++ `- exits the stub with tailjmpInd ++ |- pops exception_oop(V0) and exception_pc(V1) ++ `- jumps to the return address(usually an exception_handler) ++ * The caller: ++ `- continues processing the exception_blob with V0/V1 ++*/ ++ ++// Rethrow exception: ++// The exception oop will come in the first argument position. ++// Then JUMP (not call) to the rethrow stub code. ++instruct RethrowException() ++%{ ++ match(Rethrow); ++ ++ // use the following format syntax ++ format %{ "JMP rethrow_stub #@RethrowException" %} ++ ins_encode %{ ++ __ block_comment("@ RethrowException"); ++ ++ cbuf.set_insts_mark(); ++ cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec()); ++ ++ // call OptoRuntime::rethrow_stub to get the exception handler in parent method ++ __ patchable_jump((address)OptoRuntime::rethrow_stub()); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++// ============================================================================ ++// Branch Instructions --- long offset versions ++ ++// Jump Direct ++instruct jmpDir_long(label labl) %{ ++ match(Goto); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "JMP $labl #@jmpDir_long" %} ++ ++ ins_encode %{ ++ Label* L = $labl$$label; ++ __ jmp_far(*L); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ //ins_pc_relative(1); ++%} ++ ++// Jump Direct Conditional - Label defines a relative address from Jcc+1 ++instruct jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_long" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, L, true /* signed */); ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++instruct jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cop$$cmpcode; ++ int val = $src2$$constant; ++ ++ if (val == 0) { ++ __ cmp_branch_long(flag, op1, R0, L, true /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_long(flag, op1, AT, L, true /* signed */); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++ ++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! ++instruct jmpCon_flags_long(cmpOpEqNe cop, FlagsReg cr, label labl) %{ ++ match(If cop cr); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $labl #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_long" %} ++ ++ ins_encode %{ ++ Label* L = $labl$$label; ++ switch($cop$$cmpcode) { ++ case 0x01: //equal ++ __ bne_long($cr$$Register, R0, *L); ++ break; ++ case 0x02: //not equal ++ __ beq_long($cr$$Register, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++// Conditional jumps ++instruct branchConP_0_long(cmpOpEqNe cmp, mRegP op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConP_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, R0, L, true /* signed */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConN2P_0_long(cmpOpEqNe cmp, mRegN op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConN2P_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, R0, L, true /* signed */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConP_long(cmpOp cmp, mRegP op1, mRegP op2, label labl) %{ ++ match(If cmp (CmpP op1 op2)); ++// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); ++ effect(USE labl); ++ ++ ins_cost(200); ++ format %{ "b$cmp $op1, $op2, $labl #@branchConP_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct cmpN_null_branch_long(cmpOpEqNe cmp, mRegN op1, immN_0 null, label labl) %{ ++ match(If cmp (CmpN op1 null)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,0\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_null_branch_long" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, R0, L, true /* signed */); ++ %} ++//TODO: pipe_branchP or create pipe_branchN LEE ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ ++ match(If cmp (CmpN op1 op2)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,$op2\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_reg_branch_long" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */); ++ %} ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConIU_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConIU_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ if (val == 0) { ++ __ cmp_branch_long(flag, op1, R0, L, false /* unsigned */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_long(flag, op1, AT, L, false /* unsigned */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, L, true /* signed */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(200); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ if (val == 0) { ++ __ cmp_branch_long(flag, op1, R0, L, true /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_long(flag, op1, AT, L, true /* signed */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_long" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, target, true /* signed */); ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_long" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, target, false /* signed */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_long" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ long val = $src2$$constant; ++ ++ if (val == 0) { ++ __ cmp_branch_long(flag, op1, R0, target, true /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_long(flag, op1, AT, target, true /* signed */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_long" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ long val = $src2$$constant; ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ if (val == 0) { ++ __ cmp_branch_long(flag, op1, R0, target, false /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_long(flag, op1, AT, target, false /* signed */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++//FIXME ++instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{ ++ match( If cmp (CmpF src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_long" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x02: //not_equal ++ __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x03: //greater ++ __ fcmp_cule_s(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x04: //greater_equal ++ __ fcmp_cult_s(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x05: //less ++ __ fcmp_cult_s(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x06: //less_equal ++ __ fcmp_cule_s(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_slow); ++%} ++ ++instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{ ++ match( If cmp (CmpD src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_long" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x02: //not_equal ++ // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. ++ __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x03: //greater ++ __ fcmp_cule_d(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x04: //greater_equal ++ __ fcmp_cult_d(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x05: //less ++ __ fcmp_cult_d(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x06: //less_equal ++ __ fcmp_cule_d(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_slow); ++%} ++ ++ ++// ============================================================================ ++// Branch Instructions -- short offset versions ++ ++// Jump Direct ++instruct jmpDir_short(label labl) %{ ++ match(Goto); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "JMP $labl #@jmpDir_short" %} ++ ++ ins_encode %{ ++ Label &L = *($labl$$label); ++ if(&L) ++ __ b(L); ++ else ++ __ b(int(0)); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++// Jump Direct Conditional - Label defines a relative address from Jcc+1 ++instruct jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_short" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, L, true /* signed */); ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++instruct jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cop$$cmpcode; ++ int val = $src2$$constant; ++ ++ if (val == 0) { ++ __ cmp_branch_short(flag, op1, R0, L, true /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_short(flag, op1, AT, L, true /* signed */); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++ ++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! ++instruct jmpCon_flags_short(cmpOpEqNe cop, FlagsReg cr, label labl) %{ ++ match(If cop cr); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $labl #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_short" %} ++ ++ ins_encode %{ ++ Label &L = *($labl$$label); ++ switch($cop$$cmpcode) { ++ case 0x01: //equal ++ if (&L) ++ __ bnez($cr$$Register, L); ++ else ++ __ bnez($cr$$Register, (int)0); ++ break; ++ case 0x02: //not equal ++ if (&L) ++ __ beqz($cr$$Register, L); ++ else ++ __ beqz($cr$$Register, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++// Conditional jumps ++instruct branchConP_0_short(cmpOpEqNe cmp, mRegP op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConP_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branchEqNe_off21(flag, op1, L); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConN2P_0_short(cmpOpEqNe cmp, mRegN op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConN2P_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branchEqNe_off21(flag, op1, L); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConP_short(cmpOp cmp, mRegP op1, mRegP op2, label labl) %{ ++ match(If cmp (CmpP op1 op2)); ++// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); ++ effect(USE labl); ++ ++ ins_cost(200); ++ format %{ "b$cmp $op1, $op2, $labl #@branchConP_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ ++ match(If cmp (CmpN op1 null)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,0\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_null_branch_short" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branchEqNe_off21(flag, op1, L); ++ %} ++//TODO: pipe_branchP or create pipe_branchN LEE ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ ++ match(If cmp (CmpN op1 op2)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,$op2\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_reg_branch_short" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */); ++ %} ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConIU_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConIU_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ if (val == 0) { ++ __ cmp_branch_short(flag, op1, R0, L, false /* unsigned */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_short(flag, op1, AT, L, false /* unsigned */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, L, true /* signed */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(200); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ if (val == 0) { ++ __ cmp_branch_short(flag, op1, R0, L, true /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_short(flag, op1, AT, L, true /* signed */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_short" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, target, true /* signed */); ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_short" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ Label& target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, target, false /* signed */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_short" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ long val = $src2$$constant; ++ ++ if (val == 0) { ++ __ cmp_branch_short(flag, op1, R0, target, true /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_short(flag, op1, AT, target, true /* signed */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_short" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ long val = $src2$$constant; ++ Label& target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ if (val == 0) { ++ __ cmp_branch_short(flag, op1, R0, target, false /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_short(flag, op1, AT, target, false /* signed */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++//FIXME ++instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{ ++ match( If cmp (CmpF src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_short" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); ++ if (&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ case 0x02: //not_equal ++ __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); ++ if (&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x03: //greater ++ __ fcmp_cule_s(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x04: //greater_equal ++ __ fcmp_cult_s(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x05: //less ++ __ fcmp_cult_s(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ case 0x06: //less_equal ++ __ fcmp_cule_s(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_fpu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{ ++ match( If cmp (CmpD src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_short" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); ++ if (&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ case 0x02: //not_equal ++ // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. ++ __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); ++ if (&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x03: //greater ++ __ fcmp_cule_d(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x04: //greater_equal ++ __ fcmp_cult_d(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x05: //less ++ __ fcmp_cult_d(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ case 0x06: //less_equal ++ __ fcmp_cule_d(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_fpu_branch); ++ ins_short_branch(1); ++%} ++ ++// =================== End of branch instructions ========================== ++ ++// Call Runtime Instruction ++instruct CallRuntimeDirect(method meth) %{ ++ match(CallRuntime ); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL,runtime #@CallRuntimeDirect" %} ++ ins_encode( Java_To_Runtime( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_alignment(4); ++%} ++ ++ ++ ++//------------------------MemBar Instructions------------------------------- ++//Memory barrier flavors ++ ++instruct unnecessary_membar_acquire() %{ ++ predicate(unnecessary_acquire(n)); ++ match(MemBarAcquire); ++ ins_cost(0); ++ ++ format %{ "membar_acquire (elided)" %} ++ ++ ins_encode %{ ++ __ block_comment("membar_acquire (elided)"); ++ %} ++ ++ ins_pipe(empty); ++%} ++ ++instruct membar_acquire() %{ ++ match(MemBarAcquire); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-acquire @ membar_acquire" %} ++ ins_encode %{ ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ %} ++ ins_pipe(empty); ++%} ++ ++instruct load_fence() %{ ++ match(LoadFence); ++ ins_cost(400); ++ ++ format %{ "MEMBAR @ load_fence" %} ++ ins_encode %{ ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_acquire_lock() ++%{ ++ match(MemBarAcquireLock); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %} ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct unnecessary_membar_release() %{ ++ predicate(unnecessary_release(n)); ++ match(MemBarRelease); ++ ins_cost(0); ++ ++ format %{ "membar_release (elided)" %} ++ ++ ins_encode %{ ++ __ block_comment("membar_release (elided)"); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_release() %{ ++ match(MemBarRelease); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-release @ membar_release" %} ++ ++ ins_encode %{ ++ // Attention: DO NOT DELETE THIS GUY! ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct store_fence() %{ ++ match(StoreFence); ++ ins_cost(400); ++ ++ format %{ "MEMBAR @ store_fence" %} ++ ++ ins_encode %{ ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_release_lock() ++%{ ++ match(MemBarReleaseLock); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %} ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct unnecessary_membar_volatile() %{ ++ predicate(unnecessary_volatile(n)); ++ match(MemBarVolatile); ++ ins_cost(0); ++ ++ format %{ "membar_volatile (elided)" %} ++ ++ ins_encode %{ ++ __ block_comment("membar_volatile (elided)"); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_volatile() %{ ++ match(MemBarVolatile); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-volatile" %} ++ ins_encode %{ ++ if( !os::is_MP() ) return; // Not needed on single CPU ++ __ membar(__ StoreLoad); ++ ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_storestore() %{ ++ match(MemBarStoreStore); ++ ++ ins_cost(400); ++ format %{ "MEMBAR-storestore @ membar_storestore" %} ++ ins_encode %{ ++ __ membar(__ StoreStore); ++ %} ++ ins_pipe(empty); ++%} ++ ++//----------Move Instructions-------------------------------------------------- ++instruct castX2P(mRegP dst, mRegL src) %{ ++ match(Set dst (CastX2P src)); ++ format %{ "castX2P $dst, $src @ castX2P" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ if(src != dst) ++ __ move(dst, src); ++ %} ++ ins_cost(10); ++ ins_pipe( ialu_regI_mov ); ++%} ++ ++instruct castP2X(mRegL dst, mRegP src ) %{ ++ match(Set dst (CastP2X src)); ++ ++ format %{ "mov $dst, $src\t #@castP2X" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ if(src != dst) ++ __ move(dst, src); ++ %} ++ ins_pipe( ialu_regI_mov ); ++%} ++ ++instruct MoveF2I_reg_reg(mRegI dst, regF src) %{ ++ match(Set dst (MoveF2I src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveF2I $dst, $src @ MoveF2I_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ __ movfr2gr_s(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveI2F_reg_reg(regF dst, mRegI src) %{ ++ match(Set dst (MoveI2F src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveI2F $dst, $src @ MoveI2F_reg_reg" %} ++ ins_encode %{ ++ Register src = as_Register($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ movgr2fr_w(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveD2L_reg_reg(mRegL dst, regD src) %{ ++ match(Set dst (MoveD2L src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveD2L $dst, $src @ MoveD2L_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ __ movfr2gr_d(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveL2D_reg_reg(regD dst, mRegL src) %{ ++ match(Set dst (MoveL2D src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveL2D $dst, $src @ MoveL2D_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ movgr2fr_d(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++//----------Conditional Move--------------------------------------------------- ++// Conditional move ++instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src1, mRegI src2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpI src1 src2)) (Binary src1 src2))); ++ ins_cost(50); ++ format %{ ++ "CMP$cop $src1, $src2\t @cmovI_cmpI_reg_reg\n" ++ "\tCMOV $dst,$src1, $src2 \t @cmovI_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, true); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpI_reg_reg2(mRegI dst, mRegI src1, mRegI src2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpI src1 src2)) (Binary src2 src1))); ++ ins_cost(50); ++ format %{ ++ "CMP$cop $src1, $src2\t @cmovI_cmpI_reg_reg2\n" ++ "\tCMOV $dst,$src2, $src1 \t @cmovI_cmpI_reg_reg2" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, true); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpI_dst_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpI_dst_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpI_dst_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpL_reg_reg(mRegI dst, mRegIorL2I src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegIorL2I src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpI_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ Label L; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovN_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovN_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovN_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovN_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpI_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpI_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src1, mRegL src2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpL src1 src2)) (Binary src1 src2))); ++ ins_cost(50); ++ format %{ ++ "CMP$cop $src1, $src2\t @cmovL_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src1, $src2 \t @cmovL_cmpL_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, true); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src1, mRegL src2, cmpOp cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpUL src1 src2)) (Binary src1 src2))); ++ ins_cost(50); ++ format %{ ++ "CMP$cop $src1, $src2\t @cmovL_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src1, $src2 \t @cmovL_cmpUL_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpL_reg_reg2(mRegL dst, mRegL src1, mRegL src2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpL src1 src2)) (Binary src2 src1))); ++ ins_cost(50); ++ format %{ ++ "CMP$cop $src1, $src2\t @cmovL_cmpL_reg_reg2\n" ++ "\tCMOV $dst,$src2, $src1 \t @cmovL_cmpL_reg_reg2" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, true); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpUL_reg_reg2(mRegL dst, mRegL src1, mRegL src2, cmpOp cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpUL src1 src2)) (Binary src2 src1))); ++ ins_cost(50); ++ format %{ ++ "CMP$cop $src1, $src2\t @cmovL_cmpUL_reg_reg2\n" ++ "\tCMOV $dst,$src2, $src1 \t @cmovL_cmpUL_reg_reg2" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovL_cmpL_dst_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpL_dst_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpL_dst_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpUL_dst_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpUL_dst_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpUL_dst_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovD_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{ ++ match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovF_cmpI_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovF_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister tmp1 = as_FloatRegister($tmp3$$reg); ++ FloatRegister tmp2 = as_FloatRegister($tmp4$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{ ++ match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpI_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovD_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister tmp1 = as_FloatRegister($tmp3$$reg); ++ FloatRegister tmp2 = as_FloatRegister($tmp4$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop, regF tmp3, regF tmp4) %{ ++ match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpP_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovD_cmpP_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister tmp1 = as_FloatRegister($tmp3$$reg); ++ FloatRegister tmp2 = as_FloatRegister($tmp4$$reg); ++ int flag = $cop$$cmpcode; ++ ++ // Use signed comparison here, because the most significant bit of the ++ // user-space virtual address must be 0. ++ __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++//FIXME ++instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovF_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovF_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// Manifest a CmpL result in an integer register. Very painful. ++// This is the test to avoid. ++instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (CmpL3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpL3 $dst, $src1, $src2 @ cmpL3_reg_reg" %} ++ ins_encode %{ ++ Register opr1 = as_Register($src1$$reg); ++ Register opr2 = as_Register($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ slt(AT, opr1, opr2); ++ __ slt(dst, opr2, opr1); ++ __ sub_d(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ++// less_rsult = -1 ++// greater_result = 1 ++// equal_result = 0 ++// nan_result = -1 ++// ++instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{ ++ match(Set dst (CmpF3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpF3 $dst, $src1, $src2 @ cmpF3_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ fcmp_clt_s(FCC0, src2, src1); ++ __ fcmp_cult_s(FCC1, src1, src2); ++ __ movcf2gr(dst, FCC0); ++ __ movcf2gr(AT, FCC1); ++ __ sub_d(dst, dst, AT); ++ ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{ ++ match(Set dst (CmpD3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpD3 $dst, $src1, $src2 @ cmpD3_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ fcmp_clt_d(FCC0, src2, src1); ++ __ fcmp_cult_d(FCC1, src1, src2); ++ __ movcf2gr(dst, FCC0); ++ __ movcf2gr(AT, FCC1); ++ __ sub_d(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct clear_array(t8RegL cnt, t3_RegP base, Universe dummy) %{ ++ match(Set dummy (ClearArray cnt base)); ++ effect(USE_KILL cnt, USE_KILL base); ++ format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %} ++ ins_encode %{ ++ //Assume cnt is the number of bytes in an array to be cleared, ++ //and base points to the starting address of the array. ++ Register base = $base$$Register; ++ Register cnt = $cnt$$Register; ++ Label Loop, done; ++ ++ __ beq(cnt, R0, done); ++ ++ __ bind(Loop); ++ __ st_d(R0, base, 0); ++ __ addi_d(cnt, cnt, -1); ++ __ addi_d(base, base, wordSize); ++ __ bne(cnt, R0, Loop); ++ ++ __ bind(done); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct clear_array_imm(immL cnt, t3_RegP base, Universe dummy) %{ ++ match(Set dummy (ClearArray cnt base)); ++ effect(USE_KILL base); ++ format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %} ++ ins_encode %{ ++ //Assume cnt is the number of bytes in an array to be cleared, ++ //and base points to the starting address of the array. ++ Register base = $base$$Register; ++ long cnt = $cnt$$constant; ++ Label Loop, done; ++ ++ int tmp = cnt % 8; ++ int i = 0; ++ for (; i < tmp; i++) { ++ __ st_d(R0, base, i * 8); ++ } ++ if (cnt - tmp) { ++ __ li(AT, cnt); ++ __ alsl_d(AT, AT, base, 2); ++ __ addi_d(base, base, i * 8); ++ __ bind(Loop); ++ __ st_d(R0, base, 0); ++ __ st_d(R0, base, 8); ++ __ st_d(R0, base, 16); ++ __ st_d(R0, base, 24); ++ __ st_d(R0, base, 32); ++ __ st_d(R0, base, 40); ++ __ st_d(R0, base, 48); ++ __ st_d(R0, base, 56); ++ __ addi_d(base, base, 64); ++ __ blt(base, AT, Loop); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct has_negatives(a4_RegP ary1, mA5RegI len, no_Ax_mRegI result) %{ ++ match(Set result (HasNegatives ary1 len)); ++ effect(USE_KILL ary1, USE_KILL len); ++ format %{ "has negatives byte[] ary1:$ary1, len:$len -> $result @ has_negatives" %} ++ ++ ins_encode %{ ++ __ has_negatives($ary1$$Register, $len$$Register, $result$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_indexofU_char(a4_RegP str1, mA5RegI cnt1, mA6RegI ch, no_Ax_mRegI result, mRegL tmp1, mRegL tmp2, mRegL tmp3) %{ ++ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); ++ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ "String IndexOf char[] $str1, len:$cnt1, char:$ch, res:$result, tmp1:$tmp1, tmp2:$tmp2, tmp3:$tmp3 -> $result @ string_indexof_char" %} ++ ++ ins_encode %{ ++ __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, ++ $result$$Register, $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareL" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::LL); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare char[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareU" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::UU); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareLU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareLU" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::LU); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareUL" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::UL); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// fast char[] to byte[] compression ++instruct string_compress(a4_RegP src, a5_RegP dst, mA6RegI len, no_Ax_mRegI result, ++ mRegL tmp1, mRegL tmp2, mRegL tmp3) ++%{ ++ match(Set result (StrCompressedCopy src (Binary dst len))); ++ effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ "String Compress $src,$dst -> $result @ string_compress " %} ++ ins_encode %{ ++ __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, ++ $result$$Register, $tmp1$$Register, ++ $tmp2$$Register, $tmp3$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// byte[] to char[] inflation ++instruct string_inflate(Universe dummy, a4_RegP src, a5_RegP dst, mA6RegI len, ++ mRegL tmp1, mRegL tmp2) ++%{ ++ match(Set dummy (StrInflatedCopy src (Binary dst len))); ++ effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP tmp1, TEMP tmp2); ++ ++ format %{ "String Inflate $src,$dst @ string_inflate " %} ++ ins_encode %{ ++ __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, ++ $tmp1$$Register, $tmp2$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// intrinsic optimization ++instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, no_Ax_mRegI result, t8RegL tmp1, t3RegL tmp2) %{ ++ match(Set result (StrEquals (Binary str1 str2) cnt)); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp1, KILL tmp2); ++ ++ format %{ "String Equal $str1, $str2, len:$cnt, tmp1:$tmp1, tmp2:$tmp2 -> $result @ string_equals" %} ++ ins_encode %{ ++ __ arrays_equals($str1$$Register, $str2$$Register, ++ $cnt$$Register, $tmp1$$Register, $tmp2$$Register, $result$$Register, ++ false/* byte */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++//----------Arithmetic Instructions------------------------------------------- ++//----------Addition Instructions--------------------------------------------- ++instruct addI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ format %{ "add $dst, $src1, $src2 #@addI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ add_w(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addI_Reg_imm(mRegI dst, mRegIorL2I src1, immI12 src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ format %{ "add $dst, $src1, $src2 #@addI_Reg_imm12" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ int imm = $src2$$constant; ++ ++ __ addi_w(dst, src1, imm); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addI_salI_Reg_Reg_immI_1_4(mRegI dst, mRegI src1, mRegI src2, immI_1_4 shift) %{ ++ match(Set dst (AddI src1 (LShiftI src2 shift))); ++ ++ format %{ "alsl $dst, $src1, $src2, $shift #@addI_salI_Reg_Reg_immI_1_4" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ int sh = $shift$$constant; ++ __ alsl_w(dst, src2, src1, sh - 1); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct addP_reg_reg(mRegP dst, mRegP src1, mRegLorI2L src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addP_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ add_d(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_reg_M8(mRegP dst, mRegP src1, mRegLorI2L src2, immL_M8 M8) %{ ++ match(Set dst (AddP src1 (AndL src2 M8))); ++ format %{ "dadd $dst, $src1, $src2 #@addP_reg_reg_M8" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ bstrins_d(src2, R0, 2, 0); ++ __ add_d(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_imm12(mRegP dst, mRegP src1, immL12 src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addP_reg_imm12" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ long src2 = $src2$$constant; ++ Register dst = $dst$$Register; ++ ++ __ addi_d(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++instruct addP_salL_Reg_RegI2L_immI_1_4(mRegP dst, mRegP src1, mRegI src2, immI_1_4 shift) %{ ++ match(Set dst (AddP src1 (LShiftL (ConvI2L src2) shift))); ++ ++ format %{ "alsl $dst, $src1, $src2, $shift #@addP_salL_Reg_RegI2L_immI_1_4" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ int sh = $shift$$constant; ++ __ alsl_d(dst, src2, src1, sh - 1); ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Add Long Register with Register ++instruct addL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (AddL src1 src2)); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_Reg\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ add_d(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_Reg_imm(mRegL dst, mRegLorI2L src1, immL12 src2) ++%{ ++ match(Set dst (AddL src1 src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_imm " %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ int src2_imm = $src2$$constant; ++ ++ __ addi_d(dst_reg, src1_reg, src2_imm); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++//----------Abs Instructions------------------------------------------- ++ ++// Integer Absolute Instructions ++instruct absI_rReg(mRegI dst, mRegI src) ++%{ ++ match(Set dst (AbsI src)); ++ effect(TEMP dst); ++ format %{ "AbsI $dst, $src" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ srai_w(AT, src, 31); ++ __ xorr(dst, src, AT); ++ __ sub_w(dst, dst, AT); ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Long Absolute Instructions ++instruct absL_rReg(mRegL dst, mRegLorI2L src) ++%{ ++ match(Set dst (AbsL src)); ++ effect(TEMP dst); ++ format %{ "AbsL $dst, $src" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ srai_d(AT, src, 63); ++ __ xorr(dst, src, AT); ++ __ sub_d(dst, dst, AT); ++ %} ++ ++ ins_pipe(ialu_regL_regL); ++%} ++ ++//----------Subtraction Instructions------------------------------------------- ++// Integer Subtraction Instructions ++instruct subI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ins_cost(100); ++ ++ format %{ "sub $dst, $src1, $src2 #@subI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ sub_w(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct subI_Reg_immI_M2047_2048(mRegI dst, mRegIorL2I src1, immI_M2047_2048 src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ins_cost(80); ++ ++ format %{ "sub $dst, $src1, $src2 #@subI_Reg_immI_M2047_2048" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ __ addi_w(dst, src1, -1 * $src2$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct negI_Reg(mRegI dst, immI_0 zero, mRegIorL2I src) %{ ++ match(Set dst (SubI zero src)); ++ ins_cost(80); ++ ++ format %{ "neg $dst, $src #@negI_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ __ sub_w(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct negL_Reg(mRegL dst, immL_0 zero, mRegLorI2L src) %{ ++ match(Set dst (SubL zero src)); ++ ins_cost(80); ++ ++ format %{ "neg $dst, $src #@negL_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ __ sub_d(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct subL_Reg_immL_M2047_2048(mRegL dst, mRegL src1, immL_M2047_2048 src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(80); ++ ++ format %{ "sub $dst, $src1, $src2 #@subL_Reg_immL_M2047_2048" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ __ addi_d(dst, src1, -1 * $src2$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Subtract Long Register with Register. ++instruct subL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(100); ++ format %{ "SubL $dst, $src1, $src2 @ subL_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ sub_d(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Integer MOD with Register ++instruct modI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{ ++ match(Set dst (ModI src1 src2)); ++ ins_cost(300); ++ format %{ "modi $dst, $src1, $src2 @ modI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ mod_w(dst, src1, src2); ++ %} ++ ++ //ins_pipe( ialu_mod ); ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct modL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (ModL src1 src2)); ++ format %{ "modL $dst, $src1, $src2 @modL_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ __ mod_d(dst, op1, op2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (MulI src1 src2)); ++ ++ ins_cost(300); ++ format %{ "mul $dst, $src1, $src2 @ mulI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ ++ __ mul_w(dst, src1, src2); ++ %} ++ ins_pipe( ialu_mult ); ++%} ++ ++instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (DivI src1 src2)); ++ ++ ins_cost(300); ++ format %{ "div $dst, $src1, $src2 @ divI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ ++ __ div_w(dst, src1, src2); ++ ++ %} ++ ins_pipe( ialu_mod ); ++%} ++ ++instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (DivF src1 src2)); ++ ++ ins_cost(300); ++ format %{ "divF $dst, $src1, $src2 @ divF_Reg_Reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ fdiv_s(dst, src1, src2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (DivD src1 src2)); ++ ++ ins_cost(300); ++ format %{ "divD $dst, $src1, $src2 @ divD_Reg_Reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ fdiv_d(dst, src1, src2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (MulL src1 src2)); ++ format %{ "mulL $dst, $src1, $src2 @mulL_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ __ mul_d(dst, op1, op2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulHiL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (MulHiL src1 src2)); ++ format %{ "mulHiL $dst, $src1, $src2 @mulL_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ __ mulh_d(dst, op1, op2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (DivL src1 src2)); ++ format %{ "divL $dst, $src1, $src2 @divL_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ __ div_d(dst, op1, op2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (AddF src1 src2)); ++ format %{ "AddF $dst, $src1, $src2 @addF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fadd_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (SubF src1 src2)); ++ format %{ "SubF $dst, $src1, $src2 @subF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fsub_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (AddD src1 src2)); ++ format %{ "AddD $dst, $src1, $src2 @addD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fadd_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (SubD src1 src2)); ++ format %{ "SubD $dst, $src1, $src2 @subD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fsub_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct negF_reg(regF dst, regF src) %{ ++ match(Set dst (NegF src)); ++ format %{ "negF $dst, $src @negF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fneg_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct negD_reg(regD dst, regD src) %{ ++ match(Set dst (NegD src)); ++ format %{ "negD $dst, $src @negD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fneg_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (MulF src1 src2)); ++ format %{ "MULF $dst, $src1, $src2 @mulF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ fmul_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// Mul two double precision floating piont number ++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (MulD src1 src2)); ++ format %{ "MULD $dst, $src1, $src2 @mulD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ fmul_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct absF_reg(regF dst, regF src) %{ ++ match(Set dst (AbsF src)); ++ ins_cost(100); ++ format %{ "absF $dst, $src @absF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fabs_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++// intrinsics for math_native. ++// AbsD SqrtD CosD SinD TanD LogD Log10D ++ ++instruct absD_reg(regD dst, regD src) %{ ++ match(Set dst (AbsD src)); ++ ins_cost(100); ++ format %{ "absD $dst, $src @absD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fabs_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct sqrtD_reg(regD dst, regD src) %{ ++ match(Set dst (SqrtD src)); ++ ins_cost(100); ++ format %{ "SqrtD $dst, $src @sqrtD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fsqrt_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct sqrtF_reg(regF dst, regF src) %{ ++ match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); ++ ins_cost(100); ++ format %{ "SqrtF $dst, $src @sqrtF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fsqrt_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// src1 * src2 + src3 ++instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary src1 src2))); ++ ++ format %{ "fmadd_s $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 + src3 ++instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary src1 src2))); ++ ++ format %{ "fmadd_d $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 - src3 ++instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary src1 src2))); ++ ++ format %{ "fmsub_s $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 - src3 ++instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary src1 src2))); ++ ++ format %{ "fmsub_d $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 - src3 ++instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2))); ++ match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2)))); ++ ++ format %{ "fnmadds $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fnmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 - src3 ++instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2))); ++ match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2)))); ++ ++ format %{ "fnmaddd $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fnmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 + src3 ++instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary (NegF src1) src2))); ++ match(Set dst (FmaF src3 (Binary src1 (NegF src2)))); ++ ++ format %{ "fnmsubs $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fnmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 + src3 ++instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary (NegD src1) src2))); ++ match(Set dst (FmaD src3 (Binary src1 (NegD src2)))); ++ ++ format %{ "fnmsubd $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fnmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++instruct copySignF_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (CopySignF src1 src2)); ++ effect(TEMP_DEF dst, USE src1, USE src2); ++ ++ format %{ "fcopysign_s $dst $src1 $src2 @ copySignF_reg" %} ++ ++ ins_encode %{ ++ __ fcopysign_s($dst$$FloatRegister, ++ $src1$$FloatRegister, ++ $src2$$FloatRegister); ++ %} ++ ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct copySignD_reg(regD dst, regD src1, regD src2, immD_0 zero) %{ ++ match(Set dst (CopySignD src1 (Binary src2 zero))); ++ effect(TEMP_DEF dst, USE src1, USE src2); ++ ++ format %{ "fcopysign_d $dst $src1 $src2 @ copySignD_reg" %} ++ ++ ins_encode %{ ++ __ fcopysign_d($dst$$FloatRegister, ++ $src1$$FloatRegister, ++ $src2$$FloatRegister); ++ %} ++ ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++//----------------------------------Logical Instructions---------------------- ++//__________________________________Integer Logical Instructions------------- ++ ++//And Instuctions ++// And Register with Immediate ++instruct andI_Reg_imm_0_4095(mRegI dst, mRegI src1, immI_0_4095 src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1, immI_nonneg_mask mask) %{ ++ match(Set dst (AndI src1 mask)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int size = Assembler::is_int_mask($mask$$constant); ++ ++ __ bstrpick_w(dst, src, size-1, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1, immL_nonneg_mask mask) %{ ++ match(Set dst (AndL src1 mask)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int size = Assembler::is_jlong_mask($mask$$constant); ++ ++ __ bstrpick_d(dst, src, size-1, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorI_Reg_imm_0_4095(mRegI dst, mRegI src1, immI_0_4095 src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ins_cost(60); ++ ++ format %{ "xori $dst, $src1, $src2 #@xorI_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ xori(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorI_Reg_immI_M1(mRegI dst, mRegIorL2I src1, immI_M1 M1) %{ ++ match(Set dst (XorI src1 M1)); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorI_Reg_immI_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ orn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorL_Reg_imm_0_4095(mRegL dst, mRegL src1, immL_0_4095 src2) %{ ++ match(Set dst (XorL src1 src2)); ++ ins_cost(60); ++ ++ format %{ "xori $dst, $src1, $src2 #@xorL_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ xori(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct lbu_and_lmask(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI mask (LoadB mem))); ++ ins_cost(60); ++ ++ format %{ "lhu $dst, $mem #@lbu_and_lmask" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct lbu_and_rmask(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI (LoadB mem) mask)); ++ ins_cost(60); ++ ++ format %{ "lhu $dst, $mem #@lbu_and_rmask" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct andI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ andr(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andnI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (AndI src1 (XorI src2 M1))); ++ ++ format %{ "andn $dst, $src1, $src2 #@andnI_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ andn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct ornI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (OrI src1 (XorI src2 M1))); ++ ++ format %{ "orn $dst, $src1, $src2 #@ornI_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ orn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andnI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (AndI (XorI src1 M1) src2)); ++ ++ format %{ "andn $dst, $src2, $src1 #@andnI_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ andn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct ornI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (OrI (XorI src1 M1) src2)); ++ ++ format %{ "orn $dst, $src2, $src1 #@ornI_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ orn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// And Long Register with Register ++instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegLorI2L src2) %{ ++ match(Set dst (AndL src1 src2)); ++ format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ andr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct andL_Reg_imm_0_4095(mRegL dst, mRegL src1, immL_0_4095 src2) %{ ++ match(Set dst (AndL src1 src2)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andL_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ long val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL2I_Reg_imm_0_4095(mRegI dst, mRegL src1, immL_0_4095 src2) %{ ++ match(Set dst (ConvL2I (AndL src1 src2))); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andL2I_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ long val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct andL_Reg_immL_M8(mRegL dst, immL_M8 M8) %{ ++ match(Set dst (AndL dst M8)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M8 #@andL_Reg_immL_M8" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 2, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M5(mRegL dst, immL_M5 M5) %{ ++ match(Set dst (AndL dst M5)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M5 #@andL_Reg_immL_M5" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 2, 2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M7(mRegL dst, immL_M7 M7) %{ ++ match(Set dst (AndL dst M7)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M7 #@andL_Reg_immL_M7" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 2, 1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M4(mRegL dst, immL_M4 M4) %{ ++ match(Set dst (AndL dst M4)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M4 #@andL_Reg_immL_M4" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 1, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M121(mRegL dst, immL_M121 M121) %{ ++ match(Set dst (AndL dst M121)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M121 #@andL_Reg_immL_M121" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 6, 3); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Long Register with Register ++instruct orL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (OrL src1 src2)); ++ format %{ "OR $dst, $src1, $src2 @ orL_Reg_Reg\t" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ Register src1_reg = $src1$$Register; ++ Register src2_reg = $src2$$Register; ++ ++ __ orr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegLorI2L src2) %{ ++ match(Set dst (OrL (CastP2X src1) src2)); ++ format %{ "OR $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ Register src1_reg = $src1$$Register; ++ Register src2_reg = $src2$$Register; ++ ++ __ orr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Xor Long Register with Register ++instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (XorL src1 src2)); ++ format %{ "XOR $dst, $src1, $src2 @ xorL_Reg_Reg\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ xorr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Left by 5-bit immediate ++instruct salI_Reg_imm(mRegI dst, mRegIorL2I src, immIU5 shift) %{ ++ match(Set dst (LShiftI src shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ slli_w(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{ ++ match(Set dst (AndI (LShiftI src shift) mask)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_imm_and_M65536" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ slli_w(dst, src, 16); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen) ++%{ ++ match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen)); ++ ++ format %{ "andi $dst, $src, 7\t# @land7_2_s" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ andi(dst, src, 7); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. ++// This idiom is used by the compiler the i2s bytecode. ++instruct i2s(mRegI dst, mRegI src, immI_16 sixteen) ++%{ ++ match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); ++ ++ format %{ "i2s $dst, $src\t# @i2s" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ ext_w_h(dst, src); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. ++// This idiom is used by the compiler for the i2b bytecode. ++instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour) ++%{ ++ match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); ++ ++ format %{ "i2b $dst, $src\t# @i2b" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ ext_w_b(dst, src); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++ ++instruct salI_RegL2I_imm(mRegI dst, mRegL src, immIU5 shift) %{ ++ match(Set dst (LShiftI (ConvL2I src) shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_RegL2I_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ slli_w(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Shift Left by 8-bit immediate ++instruct salI_Reg_Reg(mRegI dst, mRegIorL2I src, mRegI shift) %{ ++ match(Set dst (LShiftI src shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shamt = $shift$$Register; ++ __ sll_w(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++// Shift Left Long 6-bit immI ++instruct salL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{ ++ match(Set dst (LShiftL src shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_Reg_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ slli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Left Long ++instruct salL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{ ++ match(Set dst (LShiftL src shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ sll_d(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long 6-bit ++instruct sarL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{ ++ match(Set dst (RShiftL src shift)); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL_Reg_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srai_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{ ++ match(Set dst (ConvL2I (RShiftL src shift))); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srai_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long arithmetically ++instruct sarL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{ ++ match(Set dst (RShiftL src shift)); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ sra_d(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long logically ++instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(100); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ srl_d(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegLorI2L src, immI_0_31 shift, immI_MaxI max_int) %{ ++ match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int)); ++ ins_cost(80); ++ format %{ "bstrpick_d $dst, $src, $shift+30, shift @ slrL_Reg_immI_0_31_and_max_int" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ bstrpick_d(dst_reg, src_reg, shamt+30, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{ ++ match(Set dst (URShiftL (CastP2X src) shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_convL2I(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{ ++ match(Set dst (ConvL2I (URShiftL src shift))); ++ predicate(n->in(1)->in(2)->get_int() > 32); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_convL2I" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{ ++ match(Set dst (URShiftL (CastP2X src) shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Xor Instructions ++// Xor Register with Register ++instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ++ format %{ "XOR $dst, $src1, $src2 #@xorI_Reg_Reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ xorr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Instructions ++instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_4095 src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_imm" %} ++ ins_encode %{ ++ __ ori($dst$$Register, $src1$$Register, $src2$$constant); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Register with Register ++instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ orr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{ ++ match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift))); ++ predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()))); ++ ++ format %{ "rotri_w $dst, $src, 1 ...\n\t" ++ "srli_w $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int rshift = $rshift$$constant; ++ ++ __ rotri_w(dst, src, 1); ++ if (rshift - 1) { ++ __ srli_w(dst, dst, rshift - 1); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{ ++ match(Set dst (OrI src1 (CastP2X src2))); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_castP2X" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ orr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Logical Shift Right by 5-bit immediate ++instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{ ++ match(Set dst (URShiftI src shift)); ++ //effect(KILL cr); ++ ++ format %{ "SRLI_W $dst, $src, $shift #@shr_logical_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shift = $shift$$constant; ++ ++ __ srli_w(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{ ++ match(Set dst (AndI (URShiftI src shift) mask)); ++ ++ format %{ "bstrpick_w $dst, $src, $shift+one-bits($mask)-1, shift #@shr_logical_Reg_imm_nonneg_mask" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int pos = $shift$$constant; ++ int size = Assembler::is_int_mask($mask$$constant); ++ ++ __ bstrpick_w(dst, src, pos+size-1, pos); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 lshift, immI_0_31 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); ++ match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_w $dst, $src, $rshift #@rolI_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_w(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_32_63 lshift, immI_0_31 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_d $dst, $src, $rshift #@rolL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_d(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_0_31 lshift, immI_32_63 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_d $dst, $src, $rshift #@rolL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_d(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); ++ match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_w $dst, $src, $rshift #@rorI_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_w(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 rshift, immI_32_63 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_d $dst, $src, $rshift #@rorL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_d(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 rshift, immI_0_31 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_d $dst, $src, $rshift #@rorL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_d(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Logical Shift Right ++instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (URShiftI src shift)); ++ ++ format %{ "SRL_W $dst, $src, $shift #@shr_logical_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shift = $shift$$Register; ++ __ srl_w(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{ ++ match(Set dst (RShiftI src shift)); ++ // effect(KILL cr); ++ ++ format %{ "SRAI_W $dst, $src, $shift #@shr_arith_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shift = $shift$$constant; ++ __ srai_w(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (RShiftI src shift)); ++ // effect(KILL cr); ++ ++ format %{ "SRA_W $dst, $src, $shift #@shr_arith_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shift = $shift$$Register; ++ __ sra_w(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++//----------Convert Int to Boolean--------------------------------------------- ++ ++instruct convI2B(mRegI dst, mRegI src) %{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(100); ++ format %{ "convI2B $dst, $src @ convI2B" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if (dst != src) { ++ __ addi_d(dst, R0, 1); ++ __ maskeqz(dst, dst, src); ++ } else { ++ __ move(AT, src); ++ __ addi_d(dst, R0, 1); ++ __ maskeqz(dst, dst, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct convI2L_reg( mRegL dst, mRegI src) %{ ++ match(Set dst (ConvI2L src)); ++ ++ ins_cost(100); ++ format %{ "SLLI_W $dst, $src @ convI2L_reg\t" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if(dst != src) __ slli_w(dst, src, 0); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct convL2I_reg( mRegI dst, mRegLorI2L src ) %{ ++ match(Set dst (ConvL2I src)); ++ ++ format %{ "MOV $dst, $src @ convL2I_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ slli_w(dst, src, 0); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct convL2D_reg( regD dst, mRegL src ) %{ ++ match(Set dst (ConvL2D src)); ++ format %{ "convL2D $dst, $src @ convL2D_reg" %} ++ ins_encode %{ ++ Register src = as_Register($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ movgr2fr_d(dst, src); ++ __ ffint_d_l(dst, dst); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++// Convert double to int. ++// If the double is NaN, stuff a zero in instead. ++instruct convD2I_reg_reg(mRegI dst, regD src, regD tmp) %{ ++ match(Set dst (ConvD2I src)); ++ effect(USE src, TEMP tmp); ++ ++ format %{ "convd2i $dst, $src, using $tmp as TEMP @ convD2I_reg_reg" %} ++ ++ ins_encode %{ ++ __ ftintrz_w_d($tmp$$FloatRegister, $src$$FloatRegister); ++ __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convD2L_reg_reg(mRegL dst, regD src, regD tmp) %{ ++ match(Set dst (ConvD2L src)); ++ effect(USE src, TEMP tmp); ++ ++ format %{ "convd2l $dst, $src, using $tmp as TEMP @ convD2L_reg_reg" %} ++ ++ ins_encode %{ ++ __ ftintrz_l_d($tmp$$FloatRegister, $src$$FloatRegister); ++ __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// Convert float to int. ++// If the float is NaN, stuff a zero in instead. ++instruct convF2I_reg_reg(mRegI dst, regF src, regF tmp) %{ ++ match(Set dst (ConvF2I src)); ++ effect(USE src, TEMP tmp); ++ ++ format %{ "convf2i $dst, $src, using $tmp as TEMP @ convF2I_reg_reg" %} ++ ++ ins_encode %{ ++ __ ftintrz_w_s($tmp$$FloatRegister, $src$$FloatRegister); ++ __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convF2L_reg_reg(mRegL dst, regF src, regF tmp) %{ ++ match(Set dst (ConvF2L src)); ++ effect(USE src, TEMP tmp); ++ ++ format %{ "convf2l $dst, $src, using $tmp as TEMP @ convF2L_reg_reg" %} ++ ++ ins_encode %{ ++ __ ftintrz_l_s($tmp$$FloatRegister, $src$$FloatRegister); ++ __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convL2F_reg( regF dst, mRegL src ) %{ ++ match(Set dst (ConvL2F src)); ++ format %{ "convl2f $dst, $src @ convL2F_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ Register src = as_Register($src$$reg); ++ Label L; ++ ++ __ movgr2fr_d(dst, src); ++ __ ffint_s_l(dst, dst); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convI2F_reg( regF dst, mRegI src ) %{ ++ match(Set dst (ConvI2F src)); ++ format %{ "convi2f $dst, $src @ convI2F_reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ movgr2fr_w(dst, src); ++ __ ffint_s_w(dst, dst); ++ %} ++ ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{ ++ match(Set dst (CmpLTMask p zero)); ++ ins_cost(100); ++ ++ format %{ "srai_w $dst, $p, 31 @ cmpLTMask_immI_0" %} ++ ins_encode %{ ++ Register src = $p$$Register; ++ Register dst = $dst$$Register; ++ ++ __ srai_w(dst, src, 31); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{ ++ match(Set dst (CmpLTMask p q)); ++ ins_cost(400); ++ ++ format %{ "cmpLTMask $dst, $p, $q @ cmpLTMask" %} ++ ins_encode %{ ++ Register p = $p$$Register; ++ Register q = $q$$Register; ++ Register dst = $dst$$Register; ++ ++ __ slt(dst, p, q); ++ __ sub_d(dst, R0, dst); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convP2B(mRegI dst, mRegP src) %{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(100); ++ format %{ "convP2B $dst, $src @ convP2B" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if (dst != src) { ++ __ addi_d(dst, R0, 1); ++ __ maskeqz(dst, dst, src); ++ } else { ++ __ move(AT, src); ++ __ addi_d(dst, R0, 1); ++ __ maskeqz(dst, dst, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++ ++instruct convI2D_reg_reg(regD dst, mRegI src) %{ ++ match(Set dst (ConvI2D src)); ++ format %{ "conI2D $dst, $src @convI2D_reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ FloatRegister dst = $dst$$FloatRegister; ++ __ movgr2fr_w(dst ,src); ++ __ ffint_d_w(dst, dst); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct convF2D_reg_reg(regD dst, regF src) %{ ++ match(Set dst (ConvF2D src)); ++ format %{ "convF2D $dst, $src\t# @convF2D_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ ++ __ fcvt_d_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct convD2F_reg_reg(regF dst, regD src) %{ ++ match(Set dst (ConvD2F src)); ++ format %{ "convD2F $dst, $src\t# @convD2F_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ ++ __ fcvt_s_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++// Convert oop pointer into compressed form ++instruct encodeHeapOop(mRegN dst, mRegP src) %{ ++ predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); ++ match(Set dst (EncodeP src)); ++ format %{ "encode_heap_oop $dst,$src" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ encode_heap_oop(dst, src); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{ ++ predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull); ++ match(Set dst (EncodeP src)); ++ format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %} ++ ins_encode %{ ++ __ encode_heap_oop_not_null($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeHeapOop(mRegP dst, mRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && ++ n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ ++ __ decode_heap_oop(d, s); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || ++ n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ if (s != d) { ++ __ decode_heap_oop_not_null(d, s); ++ } else { ++ __ decode_heap_oop_not_null(d); ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct encodeKlass_not_null(mRegN dst, mRegP src) %{ ++ match(Set dst (EncodePKlass src)); ++ format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %} ++ ins_encode %{ ++ __ encode_klass_not_null($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeKlass_not_null(mRegP dst, mRegN src) %{ ++ match(Set dst (DecodeNKlass src)); ++ format %{ "decode_heap_klass_not_null $dst,$src" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ if (s != d) { ++ __ decode_klass_not_null(d, s); ++ } else { ++ __ decode_klass_not_null(d); ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++//FIXME ++instruct tlsLoadP(mRegP dst) %{ ++ match(Set dst (ThreadLocal)); ++ ++ ins_cost(0); ++ format %{ " get_thread in $dst #@tlsLoadP" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++#ifdef OPT_THREAD ++ __ move(dst, TREG); ++#else ++ __ get_thread(dst); ++#endif ++ %} ++ ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct checkCastPP( mRegP dst ) %{ ++ match(Set dst (CheckCastPP dst)); ++ ++ format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %} ++ ins_encode( /*empty encoding*/ ); ++ ins_pipe( empty ); ++%} ++ ++instruct castPP(mRegP dst) ++%{ ++ match(Set dst (CastPP dst)); ++ ++ size(0); ++ format %{ "# castPP of $dst" %} ++ ins_encode(/* empty encoding */); ++ ins_pipe(empty); ++%} ++ ++instruct castII( mRegI dst ) %{ ++ match(Set dst (CastII dst)); ++ format %{ "#castII of $dst empty encoding" %} ++ ins_encode( /*empty encoding*/ ); ++ ins_cost(0); ++ ins_pipe( empty ); ++%} ++ ++// Return Instruction ++// Remove the return address & jump to it. ++instruct Ret() %{ ++ match(Return); ++ format %{ "RET #@Ret" %} ++ ++ ins_encode %{ ++ __ jr(RA); ++ %} ++ ++ ins_pipe( pipe_jump ); ++%} ++ ++ ++ ++// Tail Jump; remove the return address; jump to target. ++// TailCall above leaves the return address around. ++// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2). ++// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a ++// "restore" before this instruction (in Epilogue), we need to materialize it ++// in %i0. ++//FIXME ++instruct tailjmpInd(no_Ax_mRegP jump_target, mRegP ex_oop) %{ ++ match( TailJump jump_target ex_oop ); ++ ins_cost(200); ++ format %{ "Jmp $jump_target ; ex_oop = $ex_oop #@tailjmpInd" %} ++ ins_encode %{ ++ Register target = $jump_target$$Register; ++ ++ // V0, V1 are indicated in: ++ // [stubGenerator_loongarch.cpp] generate_forward_exception() ++ // [runtime_loongarch.cpp] OptoRuntime::generate_exception_blob() ++ // ++ Register oop = $ex_oop$$Register; ++ Register exception_oop = V0; ++ Register exception_pc = V1; ++ ++ __ move(exception_pc, RA); ++ __ move(exception_oop, oop); ++ ++ __ jr(target); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++// ============================================================================ ++// Procedure Call/Return Instructions ++// Call Java Static Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallStaticJavaDirect(method meth) %{ ++ match(CallStaticJava); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL,static #@CallStaticJavaDirect " %} ++ ins_encode( Java_Static_Call( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(4); ++%} ++ ++// Call Java Dynamic Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallDynamicJavaDirect(method meth) %{ ++ match(CallDynamicJava); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t" ++ "CallDynamic @ CallDynamicJavaDirect" %} ++ ins_encode( Java_Dynamic_Call( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(4); ++%} ++ ++instruct CallLeafNoFPDirect(method meth) %{ ++ match(CallLeafNoFP); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL_LEAF_NOFP,runtime " %} ++ ins_encode(Java_To_Runtime(meth)); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(4); ++%} ++ ++// Prefetch instructions for allocation. ++ ++instruct prefetchAlloc(memory mem) %{ ++ match(PrefetchAllocation mem); ++ ins_cost(125); ++ format %{ "preld $mem\t# Prefetch allocation @ prefetchAlloc" %} ++ ins_encode %{ ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if (index != 0) { ++ if (scale == 0) { ++ __ add_d(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ alsl_d(AT, as_Register(index), as_Register(base), scale - 1); ++ } ++ ++ if (Assembler::is_simm(disp, 12)) { ++ __ preld(8, AT, disp); ++ } else { ++ __ li(T4, disp); ++ __ add_d(AT, AT, T4); ++ __ preld(8, AT, 0); ++ } ++ } else { ++ if (Assembler::is_simm(disp, 12)) { ++ __ preld(8, as_Register(base), disp); ++ } else { ++ __ li(T4, disp); ++ __ add_d(AT, as_Register(base), T4); ++ __ preld(8, AT, 0); ++ } ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// Call runtime without safepoint ++instruct CallLeafDirect(method meth) %{ ++ match(CallLeaf); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL_LEAF,runtime #@CallLeafDirect " %} ++ ins_encode(Java_To_Runtime(meth)); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(4); ++%} ++ ++// Load Char (16bit unsigned) ++instruct loadUS(mRegI dst, memory mem) %{ ++ match(Set dst (LoadUS mem)); ++ ++ ins_cost(125); ++ format %{ "loadUS $dst,$mem @ loadC" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadUS_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadUS mem))); ++ ++ ins_cost(125); ++ format %{ "loadUS $dst,$mem @ loadUS_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Store Char (16bit unsigned) ++instruct storeC(memory mem, mRegIorL2I src) %{ ++ match(Set mem (StoreC mem src)); ++ ++ ins_cost(125); ++ format %{ "storeC $src, $mem @ storeC" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_CHAR); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct storeC_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreC mem zero)); ++ ++ ins_cost(125); ++ format %{ "storeC $zero, $mem @ storeC_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct loadConF_immF_0(regF dst, immF_0 zero) %{ ++ match(Set dst zero); ++ ins_cost(100); ++ ++ format %{ "mov $dst, zero @ loadConF_immF_0\n"%} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ movgr2fr_w(dst, R0); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++ ++instruct loadConF(regF dst, immF src) %{ ++ match(Set dst src); ++ ins_cost(125); ++ ++ format %{ "fld_s $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %} ++ ins_encode %{ ++ int con_offset = $constantoffset($src); ++ ++ if (Assembler::is_simm(con_offset, 12)) { ++ __ fld_s($dst$$FloatRegister, $constanttablebase, con_offset); ++ } else { ++ __ li(AT, con_offset); ++ __ fldx_s($dst$$FloatRegister, $constanttablebase, AT); ++ } ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++ ++instruct loadConD_immD_0(regD dst, immD_0 zero) %{ ++ match(Set dst zero); ++ ins_cost(100); ++ ++ format %{ "mov $dst, zero @ loadConD_immD_0"%} ++ ins_encode %{ ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ movgr2fr_d(dst, R0); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++instruct loadConD(regD dst, immD src) %{ ++ match(Set dst src); ++ ins_cost(125); ++ ++ format %{ "fld_d $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %} ++ ins_encode %{ ++ int con_offset = $constantoffset($src); ++ ++ if (Assembler::is_simm(con_offset, 12)) { ++ __ fld_d($dst$$FloatRegister, $constanttablebase, con_offset); ++ } else { ++ __ li(AT, con_offset); ++ __ fldx_d($dst$$FloatRegister, $constanttablebase, AT); ++ } ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++// Store register Float value (it is faster than store from FPU register) ++instruct storeF_reg( memory mem, regF src) %{ ++ match(Set mem (StoreF mem src)); ++ ++ ins_cost(50); ++ format %{ "store $mem, $src\t# store float @ storeF_reg" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_FLOAT); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct storeF_immF_0( memory mem, immF_0 zero) %{ ++ match(Set mem (StoreF mem zero)); ++ ++ ins_cost(40); ++ format %{ "store $mem, zero\t# store float @ storeF_immF_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Double ++instruct loadD(regD dst, memory mem) %{ ++ match(Set dst (LoadD mem)); ++ ++ ins_cost(150); ++ format %{ "loadD $dst, $mem #@loadD" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Double - UNaligned ++instruct loadD_unaligned(regD dst, memory mem ) %{ ++ match(Set dst (LoadD_unaligned mem)); ++ ins_cost(250); ++ // FIXME: Need more effective ldl/ldr ++ format %{ "loadD_unaligned $dst, $mem #@loadD_unaligned" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct storeD_reg( memory mem, regD src) %{ ++ match(Set mem (StoreD mem src)); ++ ++ ins_cost(50); ++ format %{ "store $mem, $src\t# store float @ storeD_reg" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct storeD_immD_0( memory mem, immD_0 zero) %{ ++ match(Set mem (StoreD mem zero)); ++ ++ ins_cost(40); ++ format %{ "store $mem, zero\t# store float @ storeD_immD_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct loadSSI(mRegI dst, stackSlotI src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld_w $dst, $src\t# int stk @ loadSSI" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSI) !"); ++ __ ld_w($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSI(stackSlotI dst, mRegI src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "st_w $dst, $src\t# int stk @ storeSSI" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSI) !"); ++ __ st_w($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSL(mRegL dst, stackSlotL src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld_d $dst, $src\t# long stk @ loadSSL" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSL) !"); ++ __ ld_d($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSL(stackSlotL dst, mRegL src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "st_d $dst, $src\t# long stk @ storeSSL" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSL) !"); ++ __ st_d($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSP(mRegP dst, stackSlotP src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld_d $dst, $src\t# ptr stk @ loadSSP" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSP) !"); ++ __ ld_d($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSP(stackSlotP dst, mRegP src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sd $dst, $src\t# ptr stk @ storeSSP" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSP) !"); ++ __ st_d($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSF(regF dst, stackSlotF src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "fld_s $dst, $src\t# float stk @ loadSSF" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSF) !"); ++ __ fld_s($dst$$FloatRegister, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSF(stackSlotF dst, regF src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "fst_s $dst, $src\t# float stk @ storeSSF" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSF) !"); ++ __ fst_s($src$$FloatRegister, SP, $dst$$disp); ++ %} ++ ins_pipe(fpu_storeF); ++%} ++ ++// Use the same format since predicate() can not be used here. ++instruct loadSSD(regD dst, stackSlotD src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "fld_d $dst, $src\t# double stk @ loadSSD" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSD) !"); ++ __ fld_d($dst$$FloatRegister, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSD(stackSlotD dst, regD src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "fst_d $dst, $src\t# double stk @ storeSSD" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSD) !"); ++ __ fst_d($src$$FloatRegister, SP, $dst$$disp); ++ %} ++ ins_pipe(fpu_storeF); ++%} ++ ++instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ ++ match(Set cr (FastLock object box)); ++ effect(TEMP tmp, TEMP scr); ++ ins_cost(300); ++ format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %} ++ ins_encode %{ ++ __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++%} ++ ++instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ ++ match(Set cr (FastUnlock object box)); ++ effect(TEMP tmp, TEMP scr); ++ ins_cost(300); ++ format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %} ++ ins_encode %{ ++ __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++%} ++ ++// Store CMS card-mark Immediate 0 ++instruct storeImmCM_order(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreCM mem zero)); ++ predicate(UseConcMarkSweepGC && !UseCondCardMark); ++ ins_cost(100); ++ format %{ "StoreCM MEMBAR storestore\n\t" ++ "st_b $mem, zero\t! card-mark imm0" %} ++ ins_encode %{ ++ __ membar(__ StoreStore); ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeImmCM(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreCM mem zero)); ++ ++ ins_cost(150); ++ format %{ "st_b $mem, zero\t! card-mark imm0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Die now ++instruct ShouldNotReachHere( ) ++%{ ++ match(Halt); ++ ins_cost(300); ++ ++ // Use the following format syntax ++ format %{ "ILLTRAP ;#@ShouldNotReachHere" %} ++ ins_encode %{ ++ if (is_reachable()) { ++ // Here we should emit illtrap! ++ __ stop("ShouldNotReachHere"); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++instruct leaP12Narrow(mRegP dst, indOffset12Narrow mem) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ match(Set dst mem); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, $mem\t# ptr off12narrow @ leaP12Narrow" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = as_Register($mem$$base); ++ int disp = $mem$$disp; ++ ++ __ addi_d(dst, base, disp); ++ %} ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++instruct leaPIdxScale(mRegP dst, mRegP reg, mRegLorI2L lreg, immI_0_3 scale) ++%{ ++ match(Set dst (AddP reg (LShiftL lreg scale))); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, [$reg + $lreg << $scale]\t# @ leaPIdxScale" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = $reg$$Register; ++ Register index = $lreg$$Register; ++ int scale = $scale$$constant; ++ ++ if (scale == 0) { ++ __ add_d($dst$$Register, $reg$$Register, index); ++ } else { ++ __ alsl_d(dst, index, base, scale - 1); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++ ++// ============================================================================ ++// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass ++// array for an instance of the superklass. Set a hidden internal cache on a ++// hit (cache is checked with exposed code in gen_subtype_check()). Return ++// NZ for a miss or zero for a hit. The encoding ALSO sets flags. ++instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{ ++ match(Set result (PartialSubtypeCheck sub super)); ++ effect(KILL tmp); ++ ins_cost(1100); // slightly larger than the next version ++ format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %} ++ ++ ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) ); ++ ins_pipe( pipe_slow ); ++%} ++ ++// Conditional-store of the updated heap-top. ++// Used during allocation of the shared heap. ++ ++instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{ ++ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); ++ ++ format %{ "move AT, $newval\n\t" ++ "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t" ++ "move $cr, AT\n" %} ++ ins_encode%{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp); ++ ++ int index = $heap_top_ptr$$index; ++ int scale = $heap_top_ptr$$scale; ++ int disp = $heap_top_ptr$$disp; ++ ++ guarantee(Assembler::is_simm(disp, 12), ""); ++ ++ if (index != 0) { ++ __ stop("in storePConditional: index != 0"); ++ } else { ++ __ move(AT, newval); ++ __ sc_d(AT, addr); ++ __ move($cr$$Register, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++// Conditional-store of an int value. ++// AT flag is set on success, reset otherwise. ++instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{ ++ match(Set cr (StoreIConditional mem (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, $mem, $oldval \t# @storeIConditional" %} ++ ++ ins_encode %{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Register cr = $cr$$Register; ++ Address addr(as_Register($mem$$base), $mem$$disp); ++ ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ guarantee(Assembler::is_simm(disp, 12), ""); ++ ++ if (index != 0) { ++ __ stop("in storeIConditional: index != 0"); ++ } else { ++ if (cr != addr.base() && cr != oldval && cr != newval) { ++ __ cmpxchg32(addr, oldval, newval, cr, true, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, true, false, true); ++ __ move(cr, AT); ++ } ++ } ++ %} ++ ++ ins_pipe(long_memory_op); ++%} ++ ++// Conditional-store of a long value. ++// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG. ++instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr) ++%{ ++ match(Set cr (StoreLConditional mem (Binary oldval newval))); ++ ++ format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %} ++ ins_encode%{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Register cr = $cr$$Register; ++ Address addr(as_Register($mem$$base), $mem$$disp); ++ ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ guarantee(Assembler::is_simm(disp, 12), ""); ++ ++ if (index != 0) { ++ __ stop("in storeIConditional: index != 0"); ++ } else { ++ if (cr != addr.base() && cr != oldval && cr != newval) { ++ __ cmpxchg(addr, oldval, newval, cr, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(cr, AT); ++ } ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++// Implement LoadPLocked. Must be ordered against changes of the memory location ++// by storePConditional. ++instruct loadPLocked(mRegP dst, memory mem) %{ ++ match(Set dst (LoadPLocked mem)); ++ ins_cost(MEMORY_REF_COST); ++ ++ format %{ "ll_d $dst, $mem #@loadPLocked\n\t" %} ++ size(12); ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LINKED_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{ ++ match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); ++ ins_cost(3 * MEMORY_REF_COST); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, true, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, true, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{ ++ predicate(VM_Version::supports_cx8()); ++ match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); ++ ins_cost(3 * MEMORY_REF_COST); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{ ++ match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); ++ ins_cost(3 * MEMORY_REF_COST); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{ ++ match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); ++ ins_cost(3 * MEMORY_REF_COST); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, false, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, false, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct get_and_setI(indirect mem, mRegI newv, mRegI prev) %{ ++ match(Set prev (GetAndSetI mem newv)); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ "amswap_db_w $prev, $newv, [$mem]" %} ++ ins_encode %{ ++ Register prev = $prev$$Register; ++ Register newv = $newv$$Register; ++ Register addr = as_Register($mem$$base); ++ if (prev == newv || prev == addr) { ++ __ amswap_db_w(AT, newv, addr); ++ __ move(prev, AT); ++ } else { ++ __ amswap_db_w(prev, newv, addr); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_setL(indirect mem, mRegL newv, mRegL prev) %{ ++ match(Set prev (GetAndSetL mem newv)); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ "amswap_db_d $prev, $newv, [$mem]" %} ++ ins_encode %{ ++ Register prev = $prev$$Register; ++ Register newv = $newv$$Register; ++ Register addr = as_Register($mem$$base); ++ if (prev == newv || prev == addr) { ++ __ amswap_db_d(AT, newv, addr); ++ __ move(prev, AT); ++ } else { ++ __ amswap_db_d(prev, newv, addr); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_setN(indirect mem, mRegN newv, mRegN prev) %{ ++ match(Set prev (GetAndSetN mem newv)); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ "amswap_db_w $prev, $newv, [$mem]" %} ++ ins_encode %{ ++ Register prev = $prev$$Register; ++ Register newv = $newv$$Register; ++ Register addr = as_Register($mem$$base); ++ __ amswap_db_w(AT, newv, addr); ++ __ bstrpick_d(prev, AT, 31, 0); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_setP(indirect mem, mRegP newv, mRegP prev) %{ ++ match(Set prev (GetAndSetP mem newv)); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ "amswap_db_d $prev, $newv, [$mem]" %} ++ ins_encode %{ ++ Register prev = $prev$$Register; ++ Register newv = $newv$$Register; ++ Register addr = as_Register($mem$$base); ++ if (prev == newv || prev == addr) { ++ __ amswap_db_d(AT, newv, addr); ++ __ move(prev, AT); ++ } else { ++ __ amswap_db_d(prev, newv, addr); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_addL(indirect mem, mRegL newval, mRegL incr) %{ ++ match(Set newval (GetAndAddL mem incr)); ++ ins_cost(2 * MEMORY_REF_COST + 1); ++ format %{ "amadd_db_d $newval, [$mem], $incr" %} ++ ins_encode %{ ++ Register newv = $newval$$Register; ++ Register incr = $incr$$Register; ++ Register addr = as_Register($mem$$base); ++ if (newv == incr || newv == addr) { ++ __ amadd_db_d(AT, incr, addr); ++ __ move(newv, AT); ++ } else { ++ __ amadd_db_d(newv, incr, addr); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_addL_no_res(indirect mem, Universe dummy, mRegL incr) %{ ++ predicate(n->as_LoadStore()->result_not_used()); ++ match(Set dummy (GetAndAddL mem incr)); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ "amadd_db_d [$mem], $incr" %} ++ ins_encode %{ ++ __ amadd_db_d(R0, $incr$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_addI(indirect mem, mRegI newval, mRegIorL2I incr) %{ ++ match(Set newval (GetAndAddI mem incr)); ++ ins_cost(2 * MEMORY_REF_COST + 1); ++ format %{ "amadd_db_w $newval, [$mem], $incr" %} ++ ins_encode %{ ++ Register newv = $newval$$Register; ++ Register incr = $incr$$Register; ++ Register addr = as_Register($mem$$base); ++ if (newv == incr || newv == addr) { ++ __ amadd_db_w(AT, incr, addr); ++ __ move(newv, AT); ++ } else { ++ __ amadd_db_w(newv, incr, addr); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_addI_no_res(indirect mem, Universe dummy, mRegIorL2I incr) %{ ++ predicate(n->as_LoadStore()->result_not_used()); ++ match(Set dummy (GetAndAddI mem incr)); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ "amadd_db_w [$mem], $incr" %} ++ ins_encode %{ ++ __ amadd_db_w(R0, $incr$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++//----------Max and Min-------------------------------------------------------- ++ ++// Min Register with Register (generic version) ++instruct minI_Reg_Reg(mRegI dst, mRegI src) %{ ++ match(Set dst (MinI dst src)); ++ //effect(KILL flags); ++ ins_cost(80); ++ ++ format %{ "MIN $dst, $src @minI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ slt(AT, src, dst); ++ __ masknez(dst, dst, AT); ++ __ maskeqz(AT, src, AT); ++ __ OR(dst, dst, AT); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// Max Register with Register (generic version) ++instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{ ++ match(Set dst (MaxI dst src)); ++ ins_cost(80); ++ ++ format %{ "MAX $dst, $src @maxI_Reg_Reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ slt(AT, dst, src); ++ __ masknez(dst, dst, AT); ++ __ maskeqz(AT, src, AT); ++ __ OR(dst, dst, AT); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{ ++ match(Set dst (MaxI dst zero)); ++ ins_cost(50); ++ ++ format %{ "MAX $dst, 0 @maxI_Reg_zero" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ slt(AT, dst, R0); ++ __ masknez(dst, dst, AT); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL src mask)); ++ ++ format %{ "movl $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ bstrpick_d(dst, src, 31, 0); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32) ++%{ ++ match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32))); ++ ++ format %{ "combine_i2l $dst, $src2(H), $src1(L) @ combine_i2l" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ if (src1 == dst) { ++ __ bstrins_d(dst, src2, 63, 32); ++ } else if (src2 == dst) { ++ __ slli_d(dst, dst, 32); ++ __ bstrins_d(dst, src1, 31, 0); ++ } else { ++ __ bstrpick_d(dst, src1, 31, 0); ++ __ bstrins_d(dst, src2, 63, 32); ++ } ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Zero-extend convert int to long ++instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL (ConvI2L src) mask)); ++ ++ format %{ "movl $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ bstrpick_d(dst, src, 31, 0); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL (ConvI2L (ConvL2I src)) mask)); ++ ++ format %{ "movl $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ bstrpick_d(dst, src, 31, 0); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Match loading integer and casting it to unsigned int in long register. ++// LoadI + ConvI2L + AndL 0xffffffff. ++instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{ ++ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); ++ ++ format %{ "ld_wu $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{ ++ match(Set dst (AndL mask (ConvI2L (LoadI mem)))); ++ ++ format %{ "ld_wu $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++ ++// ============================================================================ ++// Safepoint Instruction ++ ++instruct safePoint_poll() %{ ++ predicate(SafepointMechanism::uses_global_page_poll()); ++ match(SafePoint); ++ ++ ins_cost(105); ++ format %{ "poll for GC @ safePoint_poll" %} ++ ++ ins_encode %{ ++ __ block_comment("Safepoint:"); ++ __ li(T4, (long)os::get_polling_page()); ++ __ relocate(relocInfo::poll_type); ++ __ ld_w(AT, T4, 0); ++ %} ++ ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct safePoint_poll_tls(mRegP poll) %{ ++ match(SafePoint poll); ++ predicate(SafepointMechanism::uses_thread_local_poll()); ++ effect(USE poll); ++ ++ ins_cost(125); ++ format %{ "ld_w AT, [$poll]\t" ++ "Safepoint @ [$poll] : poll for GC" %} ++ size(4); ++ ins_encode %{ ++ Register poll_reg = $poll$$Register; ++ ++ __ block_comment("Safepoint:"); ++ __ relocate(relocInfo::poll_type); ++ address pre_pc = __ pc(); ++ __ ld_w(AT, poll_reg, 0); ++ assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit lw AT, [$poll]"); ++ %} ++ ++ ins_pipe( ialu_storeI ); ++%} ++ ++//----------Arithmetic Conversion Instructions--------------------------------- ++ ++instruct roundFloat_nop(regF dst) ++%{ ++ match(Set dst (RoundFloat dst)); ++ ++ ins_cost(0); ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct roundDouble_nop(regD dst) ++%{ ++ match(Set dst (RoundDouble dst)); ++ ++ ins_cost(0); ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++//----------BSWAP Instructions------------------------------------------------- ++instruct bytes_reverse_int(mRegI dst, mRegIorL2I src) %{ ++ match(Set dst (ReverseBytesI src)); ++ ++ format %{ "RevB_I $dst, $src" %} ++ ins_encode %{ ++ __ revb_2w($dst$$Register, $src$$Register); ++ __ slli_w($dst$$Register, $dst$$Register, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct bytes_reverse_long(mRegL dst, mRegL src) %{ ++ match(Set dst (ReverseBytesL src)); ++ ++ format %{ "RevB_L $dst, $src" %} ++ ins_encode %{ ++ __ revb_d($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct bytes_reverse_unsigned_short(mRegI dst, mRegIorL2I src) %{ ++ match(Set dst (ReverseBytesUS src)); ++ ++ format %{ "RevB_US $dst, $src" %} ++ ins_encode %{ ++ __ revb_2h($dst$$Register, $src$$Register); ++ __ bstrpick_d($dst$$Register, $dst$$Register, 15, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct bytes_reverse_short(mRegI dst, mRegIorL2I src) %{ ++ match(Set dst (ReverseBytesS src)); ++ ++ format %{ "RevB_S $dst, $src" %} ++ ins_encode %{ ++ __ revb_2h($dst$$Register, $src$$Register); ++ __ ext_w_h($dst$$Register, $dst$$Register); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++//---------- Zeros Count Instructions ------------------------------------------ ++// CountLeadingZerosINode CountTrailingZerosINode ++instruct countLeadingZerosI(mRegI dst, mRegIorL2I src) %{ ++ match(Set dst (CountLeadingZerosI src)); ++ ++ format %{ "clz_w $dst, $src\t# count leading zeros (int)" %} ++ ins_encode %{ ++ __ clz_w($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countLeadingZerosL(mRegI dst, mRegL src) %{ ++ match(Set dst (CountLeadingZerosL src)); ++ ++ format %{ "clz_d $dst, $src\t# count leading zeros (long)" %} ++ ins_encode %{ ++ __ clz_d($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countTrailingZerosI(mRegI dst, mRegIorL2I src) %{ ++ match(Set dst (CountTrailingZerosI src)); ++ ++ format %{ "ctz_w $dst, $src\t# count trailing zeros (int)" %} ++ ins_encode %{ ++ __ ctz_w($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countTrailingZerosL(mRegI dst, mRegL src) %{ ++ match(Set dst (CountTrailingZerosL src)); ++ ++ format %{ "ctz_d $dst, $src\t# count trailing zeros (long)" %} ++ ins_encode %{ ++ __ ctz_d($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// ====================VECTOR INSTRUCTIONS===================================== ++ ++// --------------------------------- Load ------------------------------------- ++ ++instruct loadV16(vecX dst, memory mem) %{ ++ predicate(n->as_LoadVector()->memory_size() == 16); ++ match(Set dst (LoadVector mem)); ++ format %{ "vload $dst, $mem\t# @loadV16" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_VECTORX); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct loadV32(vecY dst, memory mem) %{ ++ predicate(n->as_LoadVector()->memory_size() == 32); ++ match(Set dst (LoadVector mem)); ++ format %{ "xvload $dst, $mem\t# @loadV32" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_VECTORY); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- Store ------------------------------------ ++ ++instruct storeV16(memory mem, vecX src) %{ ++ predicate(n->as_StoreVector()->memory_size() == 16); ++ match(Set mem (StoreVector mem src)); ++ format %{ "vstore $src, $mem\t# @storeV16" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_VECTORX); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct storeV32(memory mem, vecY src) %{ ++ predicate(n->as_StoreVector()->memory_size() == 32); ++ match(Set mem (StoreVector mem src)); ++ format %{ "xvstore $src, $mem\t# @storeV32" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_VECTORY); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ------------------------------- Replicate ---------------------------------- ++ ++instruct repl16B(vecX dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (ReplicateB src)); ++ format %{ "vreplgr2vr.b $dst, $src\t# @repl16B" %} ++ ins_encode %{ ++ __ vreplgr2vr_b($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl16B_imm(vecX dst, immI_M128_255 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (ReplicateB imm)); ++ format %{ "vldi $dst, $imm\t# @repl16B_imm" %} ++ ins_encode %{ ++ __ vldi($dst$$FloatRegister, ($imm$$constant & 0xff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8S(vecX dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateS src)); ++ format %{ "vreplgr2vr.h $dst, $src\t# @repl8S" %} ++ ins_encode %{ ++ __ vreplgr2vr_h($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8S_imm(vecX dst, immI10 imm) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateS imm)); ++ format %{ "vldi $dst, $imm\t# @repl8S_imm" %} ++ ins_encode %{ ++ __ vldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4I(vecX dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateI src)); ++ format %{ "vreplgr2vr.w $dst, $src\t# @repl4I" %} ++ ins_encode %{ ++ __ vreplgr2vr_w($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4I_imm(vecX dst, immI10 imm) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateI imm)); ++ format %{ "vldi $dst, $imm\t# @repl4I_imm" %} ++ ins_encode %{ ++ __ vldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl2L(vecX dst, mRegL src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateL src)); ++ format %{ "vreplgr2vr.d $dst, $src\t# @repl2L" %} ++ ins_encode %{ ++ __ vreplgr2vr_d($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl2L_imm(vecX dst, immL10 imm) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateL imm)); ++ format %{ "vldi $dst, $imm\t# @repl2L_imm" %} ++ ins_encode %{ ++ __ vldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4F(vecX dst, regF src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateF src)); ++ format %{ "vreplvei.w $dst, $src, 0\t# @repl4F" %} ++ ins_encode %{ ++ __ vreplvei_w($dst$$FloatRegister, $src$$FloatRegister, 0); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl2D(vecX dst, regD src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateD src)); ++ format %{ "vreplvei.d $dst, $src, 0\t# @repl2D" %} ++ ins_encode %{ ++ __ vreplvei_d($dst$$FloatRegister, $src$$FloatRegister, 0); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl32B(vecY dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (ReplicateB src)); ++ format %{ "xvreplgr2vr.b $dst, $src\t# @repl32B" %} ++ ins_encode %{ ++ __ xvreplgr2vr_b($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl32B_imm(vecY dst, immI_M128_255 imm) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (ReplicateB imm)); ++ format %{ "xvldi $dst, $imm\t# @repl32B_imm" %} ++ ins_encode %{ ++ __ xvldi($dst$$FloatRegister, ($imm$$constant & 0xff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl16S(vecY dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (ReplicateS src)); ++ format %{ "xvreplgr2vr.h $dst, $src\t# @repl16S" %} ++ ins_encode %{ ++ __ xvreplgr2vr_h($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl16S_imm(vecY dst, immI10 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (ReplicateS imm)); ++ format %{ "xvldi $dst, $imm\t# @repl16S_imm" %} ++ ins_encode %{ ++ __ xvldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8I(vecY dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateI src)); ++ format %{ "xvreplgr2vr.w $dst, $src\t# @repl8I" %} ++ ins_encode %{ ++ __ xvreplgr2vr_w($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8I_imm(vecY dst, immI10 imm) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateI imm)); ++ format %{ "xvldi $dst, $imm\t# @repl8I_imm" %} ++ ins_encode %{ ++ __ xvldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4L(vecY dst, mRegL src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateL src)); ++ format %{ "xvreplgr2vr.d $dst, $src\t# @repl4L" %} ++ ins_encode %{ ++ __ xvreplgr2vr_d($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4L_imm(vecY dst, immL10 imm) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateL imm)); ++ format %{ "xvldi $dst, $imm\t# @repl4L_imm" %} ++ ins_encode %{ ++ __ xvldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8F(vecY dst, regF src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateF src)); ++ format %{ "xvreplve0.w $dst, $src\t# @repl8F" %} ++ ins_encode %{ ++ __ xvreplve0_w($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4D(vecY dst, regD src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateD src)); ++ format %{ "xvreplve0.d $dst, $src\t# @repl4D" %} ++ ins_encode %{ ++ __ xvreplve0_d($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- ADD -------------------------------------- ++ ++instruct add16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AddVB src1 src2)); ++ format %{ "vadd.b $dst, $src1, $src2\t# @add16B" %} ++ ins_encode %{ ++ __ vadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add16B_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AddVB src (ReplicateB imm))); ++ format %{ "vaddi.bu $dst, $src, $imm\t# @add16B_imm" %} ++ ins_encode %{ ++ __ vaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AddVS src1 src2)); ++ format %{ "vadd.h $dst, $src1, $src2\t# @add8S" %} ++ ins_encode %{ ++ __ vadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8S_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AddVS src (ReplicateS imm))); ++ format %{ "vaddi.hu $dst, $src, $imm\t# @add8S_imm" %} ++ ins_encode %{ ++ __ vaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVI src1 src2)); ++ format %{ "vadd.w $dst, $src1, src2\t# @add4I" %} ++ ins_encode %{ ++ __ vadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4I_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVI src (ReplicateI imm))); ++ format %{ "vaddi.wu $dst, $src, $imm\t# @add4I_imm" %} ++ ins_encode %{ ++ __ vaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVL src1 src2)); ++ format %{ "vadd.d $dst, $src1, $src2\t# @add2L" %} ++ ins_encode %{ ++ __ vadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add2L_imm(vecX dst, vecX src, immLU5 imm) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVL src (ReplicateL imm))); ++ format %{ "vaddi.du $dst, $src, $imm\t# @add2L_imm" %} ++ ins_encode %{ ++ __ vaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4F(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVF src1 src2)); ++ format %{ "vfadd.s $dst, $src1, $src2\t# @add4F" %} ++ ins_encode %{ ++ __ vfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add2D(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVD src1 src2)); ++ format %{ "vfadd.d $dst, $src1, $src2\t# @add2D" %} ++ ins_encode %{ ++ __ vfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (AddVB src1 src2)); ++ format %{ "xvadd.b $dst, $src1, $src2\t# @add32B" %} ++ ins_encode %{ ++ __ xvadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add32B_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (AddVB src (ReplicateB imm))); ++ format %{ "xvaddi.bu $dst, $src, $imm\t# @add32B_imm" %} ++ ins_encode %{ ++ __ xvaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AddVS src1 src2)); ++ format %{ "xvadd.h $dst, $src1, $src2\t# @add16S" %} ++ ins_encode %{ ++ __ xvadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add16S_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AddVS src (ReplicateS imm))); ++ format %{ "xvaddi.hu $dst, $src, $imm\t# @add16S_imm" %} ++ ins_encode %{ ++ __ xvaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AddVI src1 src2)); ++ format %{ "xvadd.wu $dst, $src1, $src2\t# @add8I" %} ++ ins_encode %{ ++ __ xvadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8I_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AddVI src (ReplicateI imm))); ++ format %{ "xvaddi.wu $dst, $src, $imm\t# @add8I_imm" %} ++ ins_encode %{ ++ __ xvaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVL src1 src2)); ++ format %{ "xvadd.d $dst, $src1, $src2\t# @add4L" %} ++ ins_encode %{ ++ __ xvadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4L_imm(vecY dst, vecY src, immLU5 imm) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVL src (ReplicateL imm))); ++ format %{ "xvaddi.du $dst, $src, $imm\t# @add4L_imm" %} ++ ins_encode %{ ++ __ xvaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8F(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AddVF src1 src2)); ++ format %{ "xvfadd.s $dst, $src1, $src2\t# @add8F" %} ++ ins_encode %{ ++ __ xvfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4D(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVD src1 src2)); ++ format %{ "xvfadd.d $dst, $src1, $src2\t# @add4D" %} ++ ins_encode %{ ++ __ xvfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- SUB -------------------------------------- ++ ++instruct sub16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (SubVB src1 src2)); ++ format %{ "vsub.b $dst, $src1, $src2\t# @sub16B" %} ++ ins_encode %{ ++ __ vsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub16B_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (SubVB src (ReplicateB imm))); ++ format %{ "vsubi.bu $dst, $src, $imm\t# @sub16B_imm" %} ++ ins_encode %{ ++ __ vsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SubVS src1 src2)); ++ format %{ "vsub.h $dst, $src1, $src2\t# @sub8S" %} ++ ins_encode %{ ++ __ vsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8S_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SubVS src (ReplicateS imm))); ++ format %{ "vsubi.hu $dst, $src, $imm\t# @sub8S_imm" %} ++ ins_encode %{ ++ __ vsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVI src1 src2)); ++ format %{ "vsub.w $dst, $src1, src2\t# @sub4I" %} ++ ins_encode %{ ++ __ vsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4I_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVI src (ReplicateI imm))); ++ format %{ "vsubi.wu $dst, $src, $imm\t# @sub4I_imm" %} ++ ins_encode %{ ++ __ vsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (SubVL src1 src2)); ++ format %{ "vsub.d $dst, $src1, $src2\t# @sub2L" %} ++ ins_encode %{ ++ __ vsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub2L_imm(vecX dst, vecX src, immLU5 imm) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (SubVL src (ReplicateL imm))); ++ format %{ "vsubi.du $dst, $src, $imm\t# @sub2L_imm" %} ++ ins_encode %{ ++ __ vsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4F(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVF src1 src2)); ++ format %{ "vfsub.s $dst, $src1, $src2\t# @sub4F" %} ++ ins_encode %{ ++ __ vfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub2D(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (SubVD src1 src2)); ++ format %{ "vfsub.d $dst, $src1, $src2\t# @sub2D" %} ++ ins_encode %{ ++ __ vfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (SubVB src1 src2)); ++ format %{ "xvsub.b $dst, $src1, $src2\t# @sub32B" %} ++ ins_encode %{ ++ __ xvsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub32B_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (SubVB src (ReplicateB imm))); ++ format %{ "xvsubi.bu $dst, $src, $imm\t# @sub32B_imm" %} ++ ins_encode %{ ++ __ xvsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (SubVS src1 src2)); ++ format %{ "xvsub.h $dst, $src1, $src2\t# @sub16S" %} ++ ins_encode %{ ++ __ xvsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub16S_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (SubVS src (ReplicateS imm))); ++ format %{ "xvsubi.hu $dst, $src, $imm\t# @sub16S_imm" %} ++ ins_encode %{ ++ __ xvsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SubVI src1 src2)); ++ format %{ "xvsub.w $dst, $src1, $src2\t# @sub8I" %} ++ ins_encode %{ ++ __ xvsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8I_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SubVI src (ReplicateI imm))); ++ format %{ "xvsubi.wu $dst, $src, $imm\t# @sub8I_imm" %} ++ ins_encode %{ ++ __ xvsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVL src1 src2)); ++ format %{ "xvsub.d $dst, $src1, $src2\t# @sub4L" %} ++ ins_encode %{ ++ __ xvsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4L_imm(vecY dst, vecY src, immLU5 imm) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVL src (ReplicateL imm))); ++ format %{ "xvsubi.du $dst, $src, $imm\t# @sub4L_imm" %} ++ ins_encode %{ ++ __ xvsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8F(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SubVF src1 src2)); ++ format %{ "xvfsub.s $dst, $src1, $src2\t# @sub8F" %} ++ ins_encode %{ ++ __ xvfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4D(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVD src1 src2)); ++ format %{ "xvfsub.d $dst,$src1,$src2\t# @sub4D" %} ++ ins_encode %{ ++ __ xvfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- MUL -------------------------------------- ++instruct mul16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (MulVB src1 src2)); ++ format %{ "vmul.b $dst, $src1, $src2\t# @mul16B" %} ++ ins_encode %{ ++ __ vmul_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (MulVS src1 src2)); ++ format %{ "vmul.h $dst, $src1, $src2\t# @mul8S" %} ++ ins_encode %{ ++ __ vmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (MulVI src1 src2)); ++ format %{ "vmul.w $dst, $src1, $src2\t# @mul4I" %} ++ ins_encode %{ ++ __ vmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (MulVL src1 src2)); ++ format %{ "vmul.d $dst, $src1, $src2\t# @mul2L" %} ++ ins_encode %{ ++ __ vmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul4F(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (MulVF src1 src2)); ++ format %{ "vfmul.s $dst, $src1, $src2\t# @mul4F" %} ++ ins_encode %{ ++ __ vfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul2D(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (MulVD src1 src2)); ++ format %{ "vfmul.d $dst, $src1, $src2\t# @mul2D" %} ++ ins_encode %{ ++ __ vfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (MulVB src1 src2)); ++ format %{ "xvmul.b $dst, $src1, $src2\t# @mul32B" %} ++ ins_encode %{ ++ __ xvmul_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (MulVS src1 src2)); ++ format %{ "xvmul.h $dst, $src1, $src2\t# @mul16S" %} ++ ins_encode %{ ++ __ xvmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (MulVI src1 src2)); ++ format %{ "xvmul.w $dst, $src1, $src2\t# @mul8I" %} ++ ins_encode %{ ++ __ xvmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (MulVL src1 src2)); ++ format %{ "xvmul.d $dst, $src1, $src2\t# @mul4L" %} ++ ins_encode %{ ++ __ xvmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul8F(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (MulVF src1 src2)); ++ format %{ "xvfmul.s $dst, $src1, $src2\t# @mul8F" %} ++ ins_encode %{ ++ __ xvfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul4D(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (MulVD src1 src2)); ++ format %{ "xvfmul.d $dst, $src1, $src2\t# @mul4D" %} ++ ins_encode %{ ++ __ xvfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- DIV -------------------------------------- ++instruct div4F(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (DivVF src1 src2)); ++ format %{ "vfdiv.s $dst, $src1, $src2\t# @div4F" %} ++ ins_encode %{ ++ __ vfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct div2D(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (DivVD src1 src2)); ++ format %{ "vfdiv.d $dst, $src1, $src2\t# @div2D" %} ++ ins_encode %{ ++ __ vfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct div8F(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (DivVF src1 src2)); ++ format %{ "xvfdiv.s $dst, $src1, $src2\t# @div8F" %} ++ ins_encode %{ ++ __ xvfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct div4D(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (DivVD src1 src2)); ++ format %{ "xvfdiv.d $dst, $src1, $src2\t# @div4D" %} ++ ins_encode %{ ++ __ xvfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- ABS -------------------------------------- ++ ++instruct abs16B(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AbsVB src)); ++ effect(TEMP_DEF dst); ++ format %{ "vabs $dst, $src\t# @abs16B" %} ++ ins_encode %{ ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ vabsd_b($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs8S(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AbsVS src)); ++ effect(TEMP_DEF dst); ++ format %{ "vabs $dst, $src\t# @abs8S" %} ++ ins_encode %{ ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ vabsd_h($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs4I(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AbsVI src)); ++ effect(TEMP_DEF dst); ++ format %{ "vabs $dst, $src\t# @abs4I" %} ++ ins_encode %{ ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ vabsd_w($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs2L(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AbsVL src)); ++ effect(TEMP_DEF dst); ++ format %{ "vabs $dst, $src\t# @abs2L" %} ++ ins_encode %{ ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ vabsd_d($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs4F(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AbsVF src)); ++ format %{ "vbitclri.w $dst, $src\t# @abs4F" %} ++ ins_encode %{ ++ __ vbitclri_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs2D(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AbsVD src)); ++ format %{ "vbitclri.d $dst, $src\t# @abs2D" %} ++ ins_encode %{ ++ __ vbitclri_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs32B(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (AbsVB src)); ++ effect(TEMP_DEF dst); ++ format %{ "xvabs $dst, $src\t# @abs32B" %} ++ ins_encode %{ ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ xvabsd_b($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs16S(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AbsVS src)); ++ effect(TEMP_DEF dst); ++ format %{ "xvabs $dst, $src\t# @abs16S" %} ++ ins_encode %{ ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ xvabsd_h($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs8I(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AbsVI src)); ++ effect(TEMP_DEF dst); ++ format %{ "xvabs $dst, $src\t# @abs8I" %} ++ ins_encode %{ ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ xvabsd_w($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs4L(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AbsVL src)); ++ effect(TEMP_DEF dst); ++ format %{ "xvabs $dst, $src\t# @abs4L" %} ++ ins_encode %{ ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ xvabsd_d($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs8F(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AbsVF src)); ++ format %{ "xvbitclri.w $dst, $src\t# @abs8F" %} ++ ins_encode %{ ++ __ xvbitclri_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs4D(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AbsVD src)); ++ format %{ "xvbitclri.d $dst, $src\t# @abs4D" %} ++ ins_encode %{ ++ __ xvbitclri_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- ABS DIFF --------------------------------- ++ ++instruct absd4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AbsVI (SubVI src1 src2))); ++ format %{ "vabsd.w $dst, $src1, $src2\t# @absd4I" %} ++ ins_encode %{ ++ __ vabsd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct absd2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AbsVL (SubVL src1 src2))); ++ format %{ "vabsd.d $dst, $src1, $src2\t# @absd2L" %} ++ ins_encode %{ ++ __ vabsd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct absd8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AbsVI (SubVI src1 src2))); ++ format %{ "xvabsd.w $dst, $src1, $src2\t# @absd8I" %} ++ ins_encode %{ ++ __ xvabsd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct absd4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AbsVL (SubVL src1 src2))); ++ format %{ "xvabsd.d $dst, $src1, $src2\t# @absd4L" %} ++ ins_encode %{ ++ __ xvabsd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- MAX -------------------------------------- ++ ++instruct max16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "vmax.b $dst, $src1, $src2\t# @max16B" %} ++ ins_encode %{ ++ __ vmax_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "vmax.h $dst, $src1, $src2\t# @max8S" %} ++ ins_encode %{ ++ __ vmax_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "vmax.w $dst, $src1, $src2\t# @max4I" %} ++ ins_encode %{ ++ __ vmax_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "vmax.d $dst, $src1, $src2\t# @max2L" %} ++ ins_encode %{ ++ __ vmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max4F(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{ ++ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MaxV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "vfmax $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max4F" %} ++ ins_encode %{ ++ __ vfmax_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max2D(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{ ++ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MaxV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "vfmax $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max2D" %} ++ ins_encode %{ ++ __ vfmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "xvmax.b $dst, $src1, $src2\t# @max32B" %} ++ ins_encode %{ ++ __ xvmax_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "xvmax.h $dst, $src1, $src2\t# @max16S" %} ++ ins_encode %{ ++ __ xvmax_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "xvmax.w $dst, $src1, $src2\t# @max8I" %} ++ ins_encode %{ ++ __ xvmax_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "xvmax.d $dst, $src1, $src2\t# @max4L" %} ++ ins_encode %{ ++ __ xvmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max8F(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{ ++ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MaxV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "xvfmax $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max8F" %} ++ ins_encode %{ ++ __ xvfmax_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max4D(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{ ++ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MaxV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "xvfmax $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max4D" %} ++ ins_encode %{ ++ __ xvfmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- MIN -------------------------------------- ++ ++instruct min16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MinV src1 src2)); ++ format %{ "vmin.b $dst, $src1, $src2\t# @min16B" %} ++ ins_encode %{ ++ __ vmin_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MinV src1 src2)); ++ format %{ "vmin.h $dst, $src1, $src2\t# @min8S" %} ++ ins_encode %{ ++ __ vmin_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MinV src1 src2)); ++ format %{ "vmin.w $dst, $src1, $src2\t# @min4I" %} ++ ins_encode %{ ++ __ vmin_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MinV src1 src2)); ++ format %{ "vmin.d $dst, $src1, $src2\t# @min2L" %} ++ ins_encode %{ ++ __ vmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min4F(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{ ++ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MinV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "vfmin $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min4F" %} ++ ins_encode %{ ++ __ vfmin_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min2D(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{ ++ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MinV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "vfmin $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min2D" %} ++ ins_encode %{ ++ __ vfmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MinV src1 src2)); ++ format %{ "xvmin.b $dst, $src1, $src2\t# @min32B" %} ++ ins_encode %{ ++ __ xvmin_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MinV src1 src2)); ++ format %{ "xvmin.h $dst, $src1, $src2\t# @min16S" %} ++ ins_encode %{ ++ __ xvmin_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MinV src1 src2)); ++ format %{ "xvmin.w $dst, $src1, $src2\t# @min8I" %} ++ ins_encode %{ ++ __ xvmin_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MinV src1 src2)); ++ format %{ "xvmin.d $dst, $src1, $src2\t# @min4L" %} ++ ins_encode %{ ++ __ xvmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min8F(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{ ++ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MinV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "xvfmin $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min8F" %} ++ ins_encode %{ ++ __ xvfmin_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min4D(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{ ++ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MinV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "xvfmin $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min4D" %} ++ ins_encode %{ ++ __ xvfmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- NEG -------------------------------------- ++ ++instruct neg4F(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (NegVF src)); ++ format %{ "vbitrevi.w $dst, $src\t# @neg4F" %} ++ ins_encode %{ ++ __ vbitrevi_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct neg2D(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (NegVD src)); ++ format %{ "vbitrevi.d $dst, $src\t# @neg2D" %} ++ ins_encode %{ ++ __ vbitrevi_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct neg8F(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (NegVF src)); ++ format %{ "xvbitrevi.w $dst, $src\t# @neg8F" %} ++ ins_encode %{ ++ __ xvbitrevi_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct neg4D(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (NegVD src)); ++ format %{ "xvbitrevi.d $dst, $src\t# @neg4D" %} ++ ins_encode %{ ++ __ xvbitrevi_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- SQRT ------------------------------------- ++ ++instruct sqrt4F(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SqrtVF src)); ++ format %{ "vfsqrt.s $dst, $src\t# @sqrt4F" %} ++ ins_encode %{ ++ __ vfsqrt_s($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sqrt2D(vecX dst, vecX src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (SqrtVD src)); ++ format %{ "vfsqrt.d $dst, $src\t# @sqrt2D" %} ++ ins_encode %{ ++ __ vfsqrt_d($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sqrt8F(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SqrtVF src)); ++ format %{ "xvfsqrt.s $dst, $src\t# @sqrt8F" %} ++ ins_encode %{ ++ __ xvfsqrt_s($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sqrt4D(vecY dst, vecY src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SqrtVD src)); ++ format %{ "xvfsqrt.d $dst, $src\t# @sqrt4D" %} ++ ins_encode %{ ++ __ xvfsqrt_d($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- MADD ------------------------------------- ++ ++instruct madd16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AddVB dst (MulVB src1 src2))); ++ format %{ "vmadd.b $dst, $src1, $src2\t# @madd16B" %} ++ ins_encode %{ ++ __ vmadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AddVS dst (MulVS src1 src2))); ++ format %{ "vmadd.h $dst, $src1, $src2\t# @madd8S" %} ++ ins_encode %{ ++ __ vmadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVI dst (MulVI src1 src2))); ++ format %{ "vmadd $dst, $src1, $src2\t# @madd4I" %} ++ ins_encode %{ ++ __ vmadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVL dst (MulVL src1 src2))); ++ format %{ "vmadd.d $dst, $src1, $src2\t# @madd2L" %} ++ ins_encode %{ ++ __ vmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 + src3 ++instruct madd4F(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 4); ++ match(Set dst (FmaVF src3 (Binary src1 src2))); ++ format %{ "vfmadd.s $dst, $src1, $src2, $src3\t# @madd4F" %} ++ ins_encode %{ ++ __ vfmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 + src3 ++instruct madd2D(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 2); ++ match(Set dst (FmaVD src3 (Binary src1 src2))); ++ format %{ "vfmadd.d $dst, $src1, $src2, $src3\t# @madd2D" %} ++ ins_encode %{ ++ __ vfmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (AddVB dst (MulVB src1 src2))); ++ format %{ "xvmadd.b $dst, $src1, $src2\t# @madd32B" %} ++ ins_encode %{ ++ __ xvmadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AddVS dst (MulVS src1 src2))); ++ format %{ "xvmadd.h $dst, $src1, $src2\t# @madd16S" %} ++ ins_encode %{ ++ __ xvmadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AddVI dst (MulVI src1 src2))); ++ format %{ "xvmadd.w $dst, $src1, $src2\t# @madd8I" %} ++ ins_encode %{ ++ __ xvmadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVL dst (MulVL src1 src2))); ++ format %{ "xvmadd.d $dst, $src1, $src2\t# @madd4L" %} ++ ins_encode %{ ++ __ xvmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 + src3 ++instruct madd8F(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 8); ++ match(Set dst (FmaVF src3 (Binary src1 src2))); ++ format %{ "xvfmadd.s $dst, $src1, $src2, $src3\t# @madd8F" %} ++ ins_encode %{ ++ __ xvfmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 + src3 ++instruct madd4D(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 4); ++ match(Set dst (FmaVD src3 (Binary src1 src2))); ++ format %{ "xvfmadd.d $dst, $src1, $src2, $src3\t# @madd4D" %} ++ ins_encode %{ ++ __ xvfmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- MSUB ------------------------------------- ++ ++instruct msub16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (SubVB dst (MulVB src1 src2))); ++ format %{ "vmsub.b $dst, $src1, $src2\t# @msub16B" %} ++ ins_encode %{ ++ __ vmsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SubVS dst (MulVS src1 src2))); ++ format %{ "vmsub.h $dst, $src1, $src2\t# @msub8S" %} ++ ins_encode %{ ++ __ vmsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVI dst (MulVI src1 src2))); ++ format %{ "vmsub.w $dst, $src1, $src2\t# @msub4I" %} ++ ins_encode %{ ++ __ vmsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (SubVL dst (MulVL src1 src2))); ++ format %{ "vmsub.d $dst, $src1, $src2\t# @msub2L" %} ++ ins_encode %{ ++ __ vmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 - src3 ++instruct msub4F(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 4); ++ match(Set dst (FmaVF (NegVF src3) (Binary src1 src2))); ++ format %{ "vfmsub.s $dst, $src1, $src2, $src3\t# @msub4F" %} ++ ins_encode %{ ++ __ vfmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 - src3 ++instruct msub2D(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 2); ++ match(Set dst (FmaVD (NegVD src3) (Binary src1 src2))); ++ format %{ "vfmsub.d $dst, $src1, $src2, $src3\t# @msub2D" %} ++ ins_encode %{ ++ __ vfmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (SubVB dst (MulVB src1 src2))); ++ format %{ "xvmsub.b $dst, $src1, $src2\t# @msub32B" %} ++ ins_encode %{ ++ __ xvmsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (SubVS dst (MulVS src1 src2))); ++ format %{ "xvmsub.h $dst, $src1, $src2\t# @msub16S" %} ++ ins_encode %{ ++ __ xvmsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SubVI dst (MulVI src1 src2))); ++ format %{ "xvmsub.w $dst, $src1, $src2\t# @msub8I" %} ++ ins_encode %{ ++ __ xvmsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVL dst (MulVL src1 src2))); ++ format %{ "xvmsub.d $dst, $src1, $src2\t# @msub4L" %} ++ ins_encode %{ ++ __ xvmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 - src3 ++instruct msub8F(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 8); ++ match(Set dst (FmaVF (NegVF src3) (Binary src1 src2))); ++ format %{ "xvfmsub.s $dst, $src1, $src2, $src3\t# @msub8F" %} ++ ins_encode %{ ++ __ xvfmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 - src3 ++instruct msub4D(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 4); ++ match(Set dst (FmaVD (NegVD src3) (Binary src1 src2))); ++ format %{ "xvfmsub.d $dst, $src1, $src2, $src3\t# @msub4D" %} ++ ins_encode %{ ++ __ xvfmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- FNMADD ----------------------------------- ++ ++// -src1 * src2 - src3 ++instruct nmadd4F(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 4); ++ match(Set dst (FmaVF (NegVF src3) (Binary (NegVF src1) src2))); ++ match(Set dst (FmaVF (NegVF src3) (Binary src1 (NegVF src2)))); ++ format %{ "vfnmadd.s $dst, $src1, $src2, $src3\t# @nmadd4F" %} ++ ins_encode %{ ++ __ vfnmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -src1 * src2 - src3 ++instruct nmadd2D(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 2); ++ match(Set dst (FmaVD (NegVD src3) (Binary (NegVD src1) src2))); ++ match(Set dst (FmaVD (NegVD src3) (Binary src1 (NegVD src2)))); ++ format %{ "vfnmadd.d $dst, $src1, $src2, $src3\t# @nmadd2D" %} ++ ins_encode %{ ++ __ vfnmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -src1 * src2 - src3 ++instruct nmadd8F(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 8); ++ match(Set dst (FmaVF (NegVF src3) (Binary (NegVF src1) src2))); ++ match(Set dst (FmaVF (NegVF src3) (Binary src1 (NegVF src2)))); ++ format %{ "xvfnmadd.s $dst, $src1, $src2, $src3\t# @nmadd8F" %} ++ ins_encode %{ ++ __ xvfnmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -src1 * src2 - src3 ++instruct nmadd4D(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 4); ++ match(Set dst (FmaVD (NegVD src3) (Binary (NegVD src1) src2))); ++ match(Set dst (FmaVD (NegVD src3) (Binary src1 (NegVD src2)))); ++ format %{ "xvfnmadd.d $dst, $src1, $src2, $src3\t# @nmadd4D" %} ++ ins_encode %{ ++ __ xvfnmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- FNMSUB ----------------------------------- ++ ++// -src1 * src2 + src3 ++instruct nmsub4F(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 4); ++ match(Set dst (FmaVF src3 (Binary (NegVF src1) src2))); ++ match(Set dst (FmaVF src3 (Binary src1 (NegVF src2)))); ++ format %{ "vfnmsub.s $dst, $src1, $src2, $src3\t# @nmsub4F" %} ++ ins_encode %{ ++ __ vfnmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -src1 * src2 + src3 ++instruct nmsub2D(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 2); ++ match(Set dst (FmaVD src3 (Binary (NegVD src1) src2))); ++ match(Set dst (FmaVD src3 (Binary src1 (NegVD src2)))); ++ format %{ "vfnmsub.d $dst, $src1, $src2, $src3\t# @nmsub2D" %} ++ ins_encode %{ ++ __ vfnmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -src1 * src2 + src3 ++instruct nmsub8F(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 8); ++ match(Set dst (FmaVF src3 (Binary (NegVF src1) src2))); ++ match(Set dst (FmaVF src3 (Binary src1 (NegVF src2)))); ++ format %{ "xvfnmsub.s $dst, $src1, $src2, $src3\t# @nmsub8F" %} ++ ins_encode %{ ++ __ xvfnmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -src1 * src2 + src3 ++instruct nmsub4D(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && n->as_Vector()->length() == 4); ++ match(Set dst (FmaVD src3 (Binary (NegVD src1) src2))); ++ match(Set dst (FmaVD src3 (Binary src1 (NegVD src2)))); ++ format %{ "xvfnmsub.d $dst, $src1, $src2, $src3\t# @nmsub4D" %} ++ ins_encode %{ ++ __ xvfnmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ------------------------------ Shift --------------------------------------- ++ ++instruct shiftcntX(vecX dst, mRegI cnt) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "vreplgr2vr.b $dst, $cnt\t# @shiftcntX" %} ++ ins_encode %{ ++ __ vreplgr2vr_b($dst$$FloatRegister, $cnt$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct shiftcntY(vecY dst, mRegI cnt) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "xvreplgr2vr.b $dst, $cnt\t# @shiftcntY" %} ++ ins_encode %{ ++ __ xvreplgr2vr_b($dst$$FloatRegister, $cnt$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ------------------------------ LeftShift ----------------------------------- ++ ++instruct sll16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (LShiftVB src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "vsll $dst, $src, $shift\t# TEMP($tmp) @sll16B" %} ++ ins_encode %{ ++ __ vsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll16B_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (LShiftVB src shift)); ++ format %{ "vslli.b $dst, $src, $shift\t# @sll16B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ vslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (LShiftVS src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "vsll $dst, $src, $shift\t# TEMP($tmp) @sll8S" %} ++ ins_encode %{ ++ __ vsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll8S_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (LShiftVS src shift)); ++ format %{ "vslli.h $dst, $src, $shift\t# @sll8S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ vslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll4I(vecX dst, vecX src, vecX shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (LShiftVI src shift)); ++ format %{ "vsll.w $dst, $src, $shift\t# @sll4I" %} ++ ins_encode %{ ++ __ vsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll4I_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (LShiftVI src shift)); ++ format %{ "vslli.w $dst, $src, $shift\t# @sll4I_imm" %} ++ ins_encode %{ ++ __ vslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll2L(vecX dst, vecX src, vecX shift) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (LShiftVL src shift)); ++ format %{ "vsll.d $dst, $src, $shift\t# @sll2L" %} ++ ins_encode %{ ++ __ vsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll2L_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (LShiftVL src shift)); ++ format %{ "vslli.d $dst, $src, $shift\t# @sll2L_imm" %} ++ ins_encode %{ ++ __ vslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll32B(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (LShiftVB src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "xvsll $dst, $src, $shift\t# TEMP($tmp) @sll32B" %} ++ ins_encode %{ ++ __ xvsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll32B_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (LShiftVB src shift)); ++ format %{ "xvslli.b $dst, $src, $shift\t# @sll32B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ xvslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll16S(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (LShiftVS src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "xvsll $dst, $src, $shift\t# TEMP($tmp) @sll16S" %} ++ ins_encode %{ ++ __ xvsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll16S_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (LShiftVS src shift)); ++ format %{ "xvslli.h $dst, $src, $shift\t# @sll16S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ xvslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll8I(vecY dst, vecY src, vecY shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (LShiftVI src shift)); ++ format %{ "xvsll.w $dst, $src, $shift\t# @sll8I" %} ++ ins_encode %{ ++ __ xvsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll8I_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (LShiftVI src shift)); ++ format %{ "xvslli.w $dst, $src, $shift\t# @sll8I_imm" %} ++ ins_encode %{ ++ __ xvslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll4L(vecY dst, vecY src, vecY shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (LShiftVL src shift)); ++ format %{ "xvsll.d $dst, $src, $shift\t# @sll4L" %} ++ ins_encode %{ ++ __ xvsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll4L_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (LShiftVL src shift)); ++ format %{ "xvslli.d $dst, $src, $shift\t# @sll4L_imm" %} ++ ins_encode %{ ++ __ xvslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ----------------------- LogicalRightShift ---------------------------------- ++ ++instruct srl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (URShiftVB src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "vsrl $dst, $src, $shift\t# TEMP($tmp) @srl16B" %} ++ ins_encode %{ ++ __ vsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl16B_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (URShiftVB src shift)); ++ format %{ "vsrli.b $dst, $src, $shift\t# @srl16B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ vsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (URShiftVS src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "vsrl $dst, $src, $shift\t# TEMP($tmp) @srl8S" %} ++ ins_encode %{ ++ __ vsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl8S_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (URShiftVS src shift)); ++ format %{ "vsrli.h $dst, $src, $shift\t# @srl8S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ vsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl4I(vecX dst, vecX src, vecX shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (URShiftVI src shift)); ++ format %{ "vsrl.w $dst, $src, $shift\t# @srl4I" %} ++ ins_encode %{ ++ __ vsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl4I_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (URShiftVI src shift)); ++ format %{ "vsrli.w $dst, $src, $shift\t# @srl4I_imm" %} ++ ins_encode %{ ++ __ vsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl2L(vecX dst, vecX src, vecX shift) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (URShiftVL src shift)); ++ format %{ "vsrl.d $dst, $src, $shift\t# @srl2L" %} ++ ins_encode %{ ++ __ vsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl2L_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (URShiftVL src shift)); ++ format %{ "vsrli.d $dst, $src, $shift\t# @srl2L_imm" %} ++ ins_encode %{ ++ __ vsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl32B(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (URShiftVB src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "xvsrl $dst, $src, $shift\t# TEMP($tmp) @srl32B" %} ++ ins_encode %{ ++ __ xvsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl32B_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (URShiftVB src shift)); ++ format %{ "xvsrli.b $dst, $src, $shift\t# @srl32B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ xvsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl16S(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (URShiftVS src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "xvsrl $dst, $src, $shift\t# TEMP($tmp) @srl16S" %} ++ ins_encode %{ ++ __ xvsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl16S_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (URShiftVS src shift)); ++ format %{ "xvsrli.h $dst, $src, $shift\t# @srl16S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ xvsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl8I(vecY dst, vecY src, vecY shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (URShiftVI src shift)); ++ format %{ "xvsrl.w $dst, $src, $shift\t# @srl8I" %} ++ ins_encode %{ ++ __ xvsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl8I_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (URShiftVI src shift)); ++ format %{ "xvsrli.w $dst, $src, $shift\t# @srl8I_imm" %} ++ ins_encode %{ ++ __ xvsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl4L(vecY dst, vecY src, vecY shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (URShiftVL src shift)); ++ format %{ "xvsrl.d $dst, $src, $shift\t# @srl4L" %} ++ ins_encode %{ ++ __ xvsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl4L_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (URShiftVL src shift)); ++ format %{ "xvsrli.d $dst, $src, $shift\t# @srl4L_imm" %} ++ ins_encode %{ ++ __ xvsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ------------------------- ArithmeticRightShift ----------------------------- ++ ++instruct sra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (RShiftVB src shift)); ++ effect(TEMP tmp); ++ format %{ "vsra $dst, $src, $shift\t# TEMP($tmp) @sra16B" %} ++ ins_encode %{ ++ __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); ++ __ vsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra16B_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (RShiftVB src shift)); ++ format %{ "vsrai.b $dst, $src, $shift\t# @sra16B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7); ++ } else { ++ __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (RShiftVS src shift)); ++ effect(TEMP tmp); ++ format %{ "vsra $dst, $src, $shift\t# TEMP($tmp) @sra8S" %} ++ ins_encode %{ ++ __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); ++ __ vsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra8S_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (RShiftVS src shift)); ++ format %{ "vsrai.h $dst, $src, $shift\t# @sra8S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15); ++ } else { ++ __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra4I(vecX dst, vecX src, vecX shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (RShiftVI src shift)); ++ format %{ "vsra.w $dst, $src, $shift\t# @sra4I" %} ++ ins_encode %{ ++ __ vsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra4I_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (RShiftVI src shift)); ++ format %{ "vsrai.w $dst, $src, $shift\t# @sra4I_imm" %} ++ ins_encode %{ ++ __ vsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra2L(vecX dst, vecX src, vecX shift) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (RShiftVL src shift)); ++ format %{ "vsra.d $dst, $src, $shift\t# @sra2L" %} ++ ins_encode %{ ++ __ vsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra2L_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (RShiftVL src shift)); ++ format %{ "vsrai.d $dst, $src, $shift\t# @sra2L_imm" %} ++ ins_encode %{ ++ __ vsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra32B(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (RShiftVB src shift)); ++ effect(TEMP tmp); ++ format %{ "xvsra $dst, $src, $shift\t# TEMP($tmp) @sra32B" %} ++ ins_encode %{ ++ __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); ++ __ xvsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra32B_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (RShiftVB src shift)); ++ format %{ "xvsrai.b $dst, $src, $shift\t# @sra32B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7); ++ } else { ++ __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra16S(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (RShiftVS src shift)); ++ effect(TEMP tmp); ++ format %{ "xvsra $dst, $src, $shift\t# TEMP($tmp) @sra16S" %} ++ ins_encode %{ ++ __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); ++ __ xvsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra16S_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (RShiftVS src shift)); ++ format %{ "xvsrai.h $dst, $src, $shift\t# @sra16S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15); ++ } else { ++ __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra8I(vecY dst, vecY src, vecY shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (RShiftVI src shift)); ++ format %{ "xvsra.w $dst, $src, $shift\t# @sra8I" %} ++ ins_encode %{ ++ __ xvsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra8I_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (RShiftVI src shift)); ++ format %{ "xvsrai.w $dst, $src, $shift\t# @sra8I_imm" %} ++ ins_encode %{ ++ __ xvsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra4L(vecY dst, vecY src, vecY shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (RShiftVL src shift)); ++ format %{ "xvsra.d $dst, $src, $shift\t# @sra4L" %} ++ ins_encode %{ ++ __ xvsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra4L_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (RShiftVL src shift)); ++ format %{ "xvsrai.d $dst, $src, $shift\t# @sra4L_imm" %} ++ ins_encode %{ ++ __ xvsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- AND -------------------------------------- ++ ++instruct andV16(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (AndV src1 src2)); ++ format %{ "vand.v $dst, $src1, $src2\t# @andV16" %} ++ ins_encode %{ ++ __ vand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct and16B_imm(vecX dst, vecX src, immIU8 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AndV src (ReplicateB imm))); ++ format %{ "vandi.b $dst, $src, $imm\t# @and16B_imm" %} ++ ins_encode %{ ++ __ vandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct andV32(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (AndV src1 src2)); ++ format %{ "xvand.v $dst, $src1, $src2\t# @andV32" %} ++ ins_encode %{ ++ __ xvand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct and32B_imm(vecY dst, vecY src, immIU8 imm) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (AndV src (ReplicateB imm))); ++ format %{ "xvandi.b $dst, $src, $imm\t# @and32B_imm" %} ++ ins_encode %{ ++ __ xvandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- OR --------------------------------------- ++ ++instruct orV16(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (OrV src1 src2)); ++ format %{ "vor.v $dst, $src1, $src2\t# @orV16" %} ++ ins_encode %{ ++ __ vor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct or16B_imm(vecX dst, vecX src, immIU8 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (OrV src (ReplicateB imm))); ++ format %{ "vori.b $dst, $src, $imm\t# @or16B_imm" %} ++ ins_encode %{ ++ __ vori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct orV32(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (OrV src1 src2)); ++ format %{ "xvor.v $dst, $src1, $src2\t# @orV32" %} ++ ins_encode %{ ++ __ xvor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct or32B_imm(vecY dst, vecY src, immIU8 imm) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (OrV src (ReplicateB imm))); ++ format %{ "xvori.b $dst, $src, $imm\t# @or32B_imm" %} ++ ins_encode %{ ++ __ xvori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- XOR -------------------------------------- ++ ++instruct xorV16(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (XorV src1 src2)); ++ format %{ "vxor.v $dst, $src1, $src2\t# @xorV16" %} ++ ins_encode %{ ++ __ vxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct xor16B_imm(vecX dst, vecX src, immIU8 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (XorV src (ReplicateB imm))); ++ format %{ "vxori.b $dst, $src, $imm\t# @xor16B_imm" %} ++ ins_encode %{ ++ __ vxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct xorV32(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (XorV src1 src2)); ++ format %{ "xvxor.v $dst, $src1, $src2\t# @xorV32" %} ++ ins_encode %{ ++ __ xvxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct xor32B_imm(vecX dst, vecX src, immIU8 imm) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (XorV src (ReplicateB imm))); ++ format %{ "xvxori.b $dst, $src, $imm\t# @xor32B_imm" %} ++ ins_encode %{ ++ __ xvxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- NOR -------------------------------------- ++ ++instruct norV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateB m1))); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateS m1))); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateI m1))); ++ format %{ "vnor.v $dst, $src1, $src2\t# @norV16" %} ++ ins_encode %{ ++ __ vnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct nor16B_imm(vecX dst, vecX src, immIU8 imm, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1))); ++ format %{ "vnori.b $dst, $src, $imm\t# @nor16B_imm" %} ++ ins_encode %{ ++ __ vnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct norV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateB m1))); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateS m1))); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateI m1))); ++ format %{ "xvnor.v $dst, $src1, $src2\t# @norV32" %} ++ ins_encode %{ ++ __ xvnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct nor32B_imm(vecY dst, vecY src, immIU8 imm, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1))); ++ format %{ "xvnori.b $dst, $src, $imm\t# @nor32B_imm" %} ++ ins_encode %{ ++ __ xvnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- ANDN ------------------------------------- ++ ++instruct andnV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateB m1)))); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateS m1)))); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateI m1)))); ++ format %{ "vandn.v $dst, $src1, $src2\t# @andnV16" %} ++ ins_encode %{ ++ __ vandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct andnV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateB m1)))); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateS m1)))); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateI m1)))); ++ format %{ "xvandn.v $dst, $src1, $src2\t# @andnV32" %} ++ ins_encode %{ ++ __ xvandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- ORN -------------------------------------- ++ ++instruct ornV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateB m1)))); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateS m1)))); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateI m1)))); ++ format %{ "vorn.v $dst, $src1, $src2\t# @ornV16" %} ++ ins_encode %{ ++ __ vorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct ornV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateB m1)))); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateS m1)))); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateI m1)))); ++ format %{ "xvorn.v $dst, $src1, $src2\t# @ornV32" %} ++ ins_encode %{ ++ __ xvorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ----------------------------- Reduction Add -------------------------------- ++ ++instruct reduce_add16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (AddReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add16B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (AddReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add8S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (AddReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add4I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (AddReductionVL src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add2L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add4F(regF dst, regF src, vecX vsrc, vecX tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (AddReductionVF src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add4F" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add2D(regD dst, regD src, vecX vsrc, vecX tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (AddReductionVD src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add2D" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (AddReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add32B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (AddReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add16S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (AddReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add8I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (AddReductionVL src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add4L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add8F(regF dst, regF src, vecY vsrc, vecY tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (AddReductionVF src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add8F" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add4D(regD dst, regD src, vecY vsrc, vecY tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (AddReductionVD src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add4D" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// ----------------------------- Reduction Mul -------------------------------- ++ ++instruct reduce_mul16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MulReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul16B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MulReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul8S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MulReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul4I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MulReductionVL src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul2L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul4F(regF dst, regF src, vecX vsrc, vecX tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MulReductionVF src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul4F" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul2D(regD dst, regD src, vecX vsrc, vecX tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MulReductionVD src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul2D" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MulReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul32B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MulReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul16S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MulReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul8I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MulReductionVL src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul4L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul8F(regF dst, regF src, vecY vsrc, vecY tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MulReductionVF src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul8F" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul4D(regD dst, regD src, vecY vsrc, vecY tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MulReductionVD src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul4D" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// ----------------------------- Reduction Max -------------------------------- ++ ++instruct reduce_max16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max16B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max8S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max4I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_max2L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max32B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max16S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max8I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max4L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// ----------------------------- Reduction Min -------------------------------- ++ ++instruct reduce_min16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min16B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min8S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min4I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_min2L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min32B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min16S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min8I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min4L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// ------------------------------ RoundDoubleModeV ---------------------------- ++ ++instruct round2D(vecX dst, vecX src, immI rmode) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (RoundDoubleModeV src rmode)); ++ format %{ "vfrint $dst, $src, $rmode\t# @round2D" %} ++ ins_encode %{ ++ DEBUG_ONLY(Unimplemented()); // unverified ++ switch ($rmode$$constant) { ++ case 0: __ vfrintrne_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ case 1: __ vfrintrm_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ case 2: __ vfrintrp_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct round4D(vecY dst, vecY src, immI rmode) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (RoundDoubleModeV src rmode)); ++ format %{ "xvfrint $dst, $src, $rmode\t# @round4D" %} ++ ins_encode %{ ++ DEBUG_ONLY(Unimplemented()); // unverified ++ switch ($rmode$$constant) { ++ case 0: __ xvfrintrne_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ case 1: __ xvfrintrm_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ case 2: __ xvfrintrp_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ---------------------------- PopCount -------------------------------------- ++ ++instruct popcount4I(vecX dst, vecX src) %{ ++ predicate(UsePopCountInstruction && n->as_Vector()->length() == 4); ++ match(Set dst (PopCountVI src)); ++ format %{ "vpcnt.w $dst, $src\t# @popcount4I" %} ++ ins_encode %{ ++ DEBUG_ONLY(Unimplemented()); // unverified ++ __ vpcnt_w($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct popcount8I(vecY dst, vecY src) %{ ++ predicate(UsePopCountInstruction && n->as_Vector()->length() == 8); ++ match(Set dst (PopCountVI src)); ++ format %{ "xvpcnt.w $dst, $src\t# @popcount8I" %} ++ ins_encode %{ ++ DEBUG_ONLY(Unimplemented()); // unverified ++ __ xvpcnt_w($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++//----------PEEPHOLE RULES----------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++// ++// peepmatch ( root_instr_name [preceeding_instruction]* ); ++// ++// peepconstraint %{ ++// (instruction_number.operand_name relational_op instruction_number.operand_name ++// [, ...] ); ++// // instruction numbers are zero-based using left to right order in peepmatch ++// ++// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); ++// // provide an instruction_number.operand_name for each operand that appears ++// // in the replacement instruction's match rule ++// ++// ---------VM FLAGS--------------------------------------------------------- ++// ++// All peephole optimizations can be turned off using -XX:-OptoPeephole ++// ++// Each peephole rule is given an identifying number starting with zero and ++// increasing by one in the order seen by the parser. An individual peephole ++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# ++// on the command-line. ++// ++// ---------CURRENT LIMITATIONS---------------------------------------------- ++// ++// Only match adjacent instructions in same basic block ++// Only equality constraints ++// Only constraints between operands, not (0.dest_reg == EAX_enc) ++// Only one replacement instruction ++// ++// ---------EXAMPLE---------------------------------------------------------- ++// ++// // pertinent parts of existing instructions in architecture description ++// instruct movI(eRegI dst, eRegI src) %{ ++// match(Set dst (CopyI src)); ++// %} ++// ++// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{ ++// match(Set dst (AddI dst src)); ++// effect(KILL cr); ++// %} ++// ++// // Change (inc mov) to lea ++// peephole %{ ++// // increment preceeded by register-register move ++// peepmatch ( incI_eReg movI ); ++// // require that the destination register of the increment ++// // match the destination register of the move ++// peepconstraint ( 0.dst == 1.dst ); ++// // construct a replacement instruction that sets ++// // the destination to ( move's source register + one ) ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// Implementation no longer uses movX instructions since ++// machine-independent system no longer uses CopyX nodes. ++// ++// peephole %{ ++// peepmatch ( incI_eReg movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( decI_eReg movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( addI_eReg_imm movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( addP_eReg_imm movP ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++ ++// // Change load of spilled value to only a spill ++// instruct storeI(memory mem, eRegI src) %{ ++// match(Set mem (StoreI mem src)); ++// %} ++// ++// instruct loadI(eRegI dst, memory mem) %{ ++// match(Set dst (LoadI mem)); ++// %} ++// ++//peephole %{ ++// peepmatch ( loadI storeI ); ++// peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); ++// peepreplace ( storeI( 1.mem 1.mem 1.src ) ); ++//%} ++ ++//----------SMARTSPILL RULES--------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++ +diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp +new file mode 100644 +index 0000000000..9720fd176d +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp +@@ -0,0 +1,4567 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2017, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "jvm.h" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "compiler/disassembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "memory/universe.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "runtime/os.hpp" ++#include "runtime/safepoint.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/macros.hpp" ++ ++#ifdef COMPILER2 ++#include "opto/compile.hpp" ++#include "opto/intrinsicnode.hpp" ++#endif ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++// Implementation of MacroAssembler ++ ++intptr_t MacroAssembler::i[32] = {0}; ++float MacroAssembler::f[32] = {0.0}; ++ ++void MacroAssembler::print(outputStream *s) { ++ unsigned int k; ++ for(k=0; kprint_cr("i%d = 0x%.16lx", k, i[k]); ++ } ++ s->cr(); ++ ++ for(k=0; kprint_cr("f%d = %f", k, f[k]); ++ } ++ s->cr(); ++} ++ ++int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; } ++int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; } ++ ++void MacroAssembler::save_registers(MacroAssembler *masm) { ++#define __ masm-> ++ for(int k=0; k<32; k++) { ++ __ st_w (as_Register(k), A0, i_offset(k)); ++ } ++ ++ for(int k=0; k<32; k++) { ++ __ fst_s (as_FloatRegister(k), A0, f_offset(k)); ++ } ++#undef __ ++} ++ ++void MacroAssembler::restore_registers(MacroAssembler *masm) { ++#define __ masm-> ++ for(int k=0; k<32; k++) { ++ __ ld_w (as_Register(k), A0, i_offset(k)); ++ } ++ ++ for(int k=0; k<32; k++) { ++ __ fld_s (as_FloatRegister(k), A0, f_offset(k)); ++ } ++#undef __ ++} ++ ++ ++void MacroAssembler::pd_patch_instruction(address branch, address target) { ++ jint& stub_inst = *(jint*)branch; ++ jint *pc = (jint *)branch; ++ ++ if (high(stub_inst, 7) == pcaddu18i_op) { ++ // far: ++ // pcaddu18i reg, si20 ++ // jirl r0, reg, si18 ++ ++ assert(high(pc[1], 6) == jirl_op, "Not a branch label patch"); ++ jlong offs = target - branch; ++ CodeBuffer cb(branch, 2 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ if (reachable_from_branch_short(offs)) { ++ // convert far to short ++#define __ masm. ++ __ b(target); ++ __ nop(); ++#undef __ ++ } else { ++ masm.patchable_jump_far(R0, offs); ++ } ++ return; ++ } else if (high(stub_inst, 7) == pcaddi_op) { ++ // see MacroAssembler::set_last_Java_frame: ++ // pcaddi reg, si20 ++ ++ jint offs = (target - branch) >> 2; ++ guarantee(is_simm(offs, 20), "Not signed 20-bit offset"); ++ CodeBuffer cb(branch, 1 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.pcaddi(as_Register(low(stub_inst, 5)), offs); ++ return; ++ } else if (high(stub_inst, 7) == pcaddu12i_op) { ++ // pc-relative ++ jlong offs = target - branch; ++ guarantee(is_simm(offs, 32), "Not signed 32-bit offset"); ++ jint si12, si20; ++ jint& stub_instNext = *(jint*)(branch+4); ++ split_simm32(offs, si12, si20); ++ CodeBuffer cb(branch, 2 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.pcaddu12i(as_Register(low(stub_inst, 5)), si20); ++ masm.addi_d(as_Register(low((stub_instNext), 5)), as_Register(low((stub_instNext) >> 5, 5)), si12); ++ return; ++ } else if (high(stub_inst, 7) == lu12i_w_op) { ++ // long call (absolute) ++ CodeBuffer cb(branch, 3 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.call_long(target); ++ return; ++ } ++ ++ stub_inst = patched_branch(target - branch, stub_inst, 0); ++} ++ ++bool MacroAssembler::reachable_from_branch_short(jlong offs) { ++ if (ForceUnreachable) { ++ return false; ++ } ++ return is_simm(offs >> 2, 26); ++} ++ ++void MacroAssembler::patchable_jump_far(Register ra, jlong offs) { ++ jint si18, si20; ++ guarantee(is_simm(offs, 38), "Not signed 38-bit offset"); ++ split_simm38(offs, si18, si20); ++ pcaddu18i(T4, si20); ++ jirl(ra, T4, si18); ++} ++ ++void MacroAssembler::patchable_jump(address target, bool force_patchable) { ++ assert(ReservedCodeCacheSize < 4*G, "branch out of range"); ++ assert(CodeCache::find_blob(target) != NULL, ++ "destination of jump not found in code cache"); ++ if (force_patchable || patchable_branches()) { ++ jlong offs = target - pc(); ++ if (reachable_from_branch_short(offs)) { // Short jump ++ b(offset26(target)); ++ nop(); ++ } else { // Far jump ++ patchable_jump_far(R0, offs); ++ } ++ } else { // Real short jump ++ b(offset26(target)); ++ } ++} ++ ++void MacroAssembler::patchable_call(address target, address call_site) { ++ jlong offs = target - (call_site ? call_site : pc()); ++ if (reachable_from_branch_short(offs - BytesPerInstWord)) { // Short call ++ nop(); ++ bl((offs - BytesPerInstWord) >> 2); ++ } else { // Far call ++ patchable_jump_far(RA, offs); ++ } ++} ++ ++// Maybe emit a call via a trampoline. If the code cache is small ++// trampolines won't be emitted. ++ ++address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) { ++ assert(JavaThread::current()->is_Compiler_thread(), "just checking"); ++ assert(entry.rspec().type() == relocInfo::runtime_call_type ++ || entry.rspec().type() == relocInfo::opt_virtual_call_type ++ || entry.rspec().type() == relocInfo::static_call_type ++ || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); ++ ++ // We need a trampoline if branches are far. ++ if (far_branches()) { ++ bool in_scratch_emit_size = false; ++#ifdef COMPILER2 ++ // We don't want to emit a trampoline if C2 is generating dummy ++ // code during its branch shortening phase. ++ CompileTask* task = ciEnv::current()->task(); ++ in_scratch_emit_size = ++ (task != NULL && is_c2_compile(task->comp_level()) && ++ Compile::current()->in_scratch_emit_size()); ++#endif ++ if (!in_scratch_emit_size) { ++ address stub = emit_trampoline_stub(offset(), entry.target()); ++ if (stub == NULL) { ++ postcond(pc() == badAddress); ++ return NULL; // CodeCache is full ++ } ++ } ++ } ++ ++ if (cbuf) cbuf->set_insts_mark(); ++ relocate(entry.rspec()); ++ if (!far_branches()) { ++ bl(entry.target()); ++ } else { ++ bl(pc()); ++ } ++ // just need to return a non-null address ++ postcond(pc() != badAddress); ++ return pc(); ++} ++ ++// Emit a trampoline stub for a call to a target which is too far away. ++// ++// code sequences: ++// ++// call-site: ++// branch-and-link to or ++// ++// Related trampoline stub for this call site in the stub section: ++// load the call target from the constant pool ++// branch (RA still points to the call site above) ++ ++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, ++ address dest) { ++ // Start the stub ++ address stub = start_a_stub(NativeInstruction::nop_instruction_size ++ + NativeCallTrampolineStub::instruction_size); ++ if (stub == NULL) { ++ return NULL; // CodeBuffer::expand failed ++ } ++ ++ // Create a trampoline stub relocation which relates this trampoline stub ++ // with the call instruction at insts_call_instruction_offset in the ++ // instructions code-section. ++ align(wordSize); ++ relocate(trampoline_stub_Relocation::spec(code()->insts()->start() ++ + insts_call_instruction_offset)); ++ const int stub_start_offset = offset(); ++ ++ // Now, create the trampoline stub's code: ++ // - load the call ++ // - call ++ pcaddi(T4, 0); ++ ld_d(T4, T4, 16); ++ jr(T4); ++ nop(); //align ++ assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, ++ "should be"); ++ emit_int64((int64_t)dest); ++ ++ const address stub_start_addr = addr_at(stub_start_offset); ++ ++ NativeInstruction* ni = nativeInstruction_at(stub_start_addr); ++ assert(ni->is_NativeCallTrampolineStub_at(), "doesn't look like a trampoline"); ++ ++ end_a_stub(); ++ return stub_start_addr; ++} ++ ++void MacroAssembler::beq_far(Register rs, Register rt, address entry) { ++ if (is_simm16((entry - pc()) >> 2)) { // Short jump ++ beq(rs, rt, offset16(entry)); ++ } else { // Far jump ++ Label not_jump; ++ bne(rs, rt, not_jump); ++ b_far(entry); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::beq_far(Register rs, Register rt, Label& L) { ++ if (L.is_bound()) { ++ beq_far(rs, rt, target(L)); ++ } else { ++ Label not_jump; ++ bne(rs, rt, not_jump); ++ b_far(L); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::bne_far(Register rs, Register rt, address entry) { ++ if (is_simm16((entry - pc()) >> 2)) { // Short jump ++ bne(rs, rt, offset16(entry)); ++ } else { // Far jump ++ Label not_jump; ++ beq(rs, rt, not_jump); ++ b_far(entry); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::bne_far(Register rs, Register rt, Label& L) { ++ if (L.is_bound()) { ++ bne_far(rs, rt, target(L)); ++ } else { ++ Label not_jump; ++ beq(rs, rt, not_jump); ++ b_far(L); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::blt_far(Register rs, Register rt, address entry, bool is_signed) { ++ if (is_simm16((entry - pc()) >> 2)) { // Short jump ++ if (is_signed) { ++ blt(rs, rt, offset16(entry)); ++ } else { ++ bltu(rs, rt, offset16(entry)); ++ } ++ } else { // Far jump ++ Label not_jump; ++ if (is_signed) { ++ bge(rs, rt, not_jump); ++ } else { ++ bgeu(rs, rt, not_jump); ++ } ++ b_far(entry); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::blt_far(Register rs, Register rt, Label& L, bool is_signed) { ++ if (L.is_bound()) { ++ blt_far(rs, rt, target(L), is_signed); ++ } else { ++ Label not_jump; ++ if (is_signed) { ++ bge(rs, rt, not_jump); ++ } else { ++ bgeu(rs, rt, not_jump); ++ } ++ b_far(L); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::bge_far(Register rs, Register rt, address entry, bool is_signed) { ++ if (is_simm16((entry - pc()) >> 2)) { // Short jump ++ if (is_signed) { ++ bge(rs, rt, offset16(entry)); ++ } else { ++ bgeu(rs, rt, offset16(entry)); ++ } ++ } else { // Far jump ++ Label not_jump; ++ if (is_signed) { ++ blt(rs, rt, not_jump); ++ } else { ++ bltu(rs, rt, not_jump); ++ } ++ b_far(entry); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::bge_far(Register rs, Register rt, Label& L, bool is_signed) { ++ if (L.is_bound()) { ++ bge_far(rs, rt, target(L), is_signed); ++ } else { ++ Label not_jump; ++ if (is_signed) { ++ blt(rs, rt, not_jump); ++ } else { ++ bltu(rs, rt, not_jump); ++ } ++ b_far(L); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::beq_long(Register rs, Register rt, Label& L) { ++ Label not_taken; ++ bne(rs, rt, not_taken); ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void MacroAssembler::bne_long(Register rs, Register rt, Label& L) { ++ Label not_taken; ++ beq(rs, rt, not_taken); ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void MacroAssembler::bc1t_long(Label& L) { ++ Label not_taken; ++ bceqz(FCC0, not_taken); ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void MacroAssembler::blt_long(Register rs, Register rt, Label& L, bool is_signed) { ++ Label not_taken; ++ if (is_signed) { ++ bge(rs, rt, not_taken); ++ } else { ++ bgeu(rs, rt, not_taken); ++ } ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void MacroAssembler::bge_long(Register rs, Register rt, Label& L, bool is_signed) { ++ Label not_taken; ++ if (is_signed) { ++ blt(rs, rt, not_taken); ++ } else { ++ bltu(rs, rt, not_taken); ++ } ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void MacroAssembler::bc1f_long(Label& L) { ++ Label not_taken; ++ bcnez(FCC0, not_taken); ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void MacroAssembler::b_far(Label& L) { ++ if (L.is_bound()) { ++ b_far(target(L)); ++ } else { ++ L.add_patch_at(code(), locator()); ++ if (ForceUnreachable) { ++ patchable_jump_far(R0, 0); ++ } else { ++ b(0); ++ } ++ } ++} ++ ++void MacroAssembler::b_far(address entry) { ++ jlong offs = entry - pc(); ++ if (reachable_from_branch_short(offs)) { // Short jump ++ b(offset26(entry)); ++ } else { // Far jump ++ patchable_jump_far(R0, offs); ++ } ++} ++ ++void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) { ++ ldx_d(rt, base, offset); ++} ++ ++void MacroAssembler::st_ptr(Register rt, Register base, Register offset) { ++ stx_d(rt, base, offset); ++} ++ ++Address MacroAssembler::as_Address(AddressLiteral adr) { ++ return Address(adr.target(), adr.rspec()); ++} ++ ++Address MacroAssembler::as_Address(ArrayAddress adr) { ++ return Address::make_array(adr); ++} ++ ++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved). ++void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) { ++ li(tmp_reg1, inc); ++ li(tmp_reg2, counter_addr); ++ amadd_w(R0, tmp_reg1, tmp_reg2); ++} ++ ++void MacroAssembler::reserved_stack_check() { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // testing if reserved zone needs to be enabled ++ Label no_reserved_zone_enabling; ++ ++ ld_d(AT, Address(thread, JavaThread::reserved_stack_activation_offset())); ++ sub_d(AT, SP, AT); ++ blt(AT, R0, no_reserved_zone_enabling); ++ ++ enter(); // RA and FP are live. ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread); ++ leave(); ++ ++ // We have already removed our own frame. ++ // throw_delayed_StackOverflowError will think that it's been ++ // called by our caller. ++ li(AT, (long)StubRoutines::throw_delayed_StackOverflowError_entry()); ++ jr(AT); ++ should_not_reach_here(); ++ ++ bind(no_reserved_zone_enabling); ++} ++ ++int MacroAssembler::biased_locking_enter(Register lock_reg, ++ Register obj_reg, ++ Register swap_reg, ++ Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, ++ Label* slow_case, ++ BiasedLockingCounters* counters) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ bool need_tmp_reg = false; ++ if (tmp_reg == noreg) { ++ need_tmp_reg = true; ++ tmp_reg = T4; ++ } ++ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT); ++ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); ++ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); ++ Address saved_mark_addr(lock_reg, 0); ++ ++ // Biased locking ++ // See whether the lock is currently biased toward our thread and ++ // whether the epoch is still valid ++ // Note that the runtime guarantees sufficient alignment of JavaThread ++ // pointers to allow age to be placed into low bits ++ // First check to see whether biasing is even enabled for this object ++ Label cas_label; ++ int null_check_offset = -1; ++ if (!swap_reg_contains_mark) { ++ null_check_offset = offset(); ++ ld_ptr(swap_reg, mark_addr); ++ } ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ move(tmp_reg, swap_reg); ++ andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); ++ addi_d(AT, R0, markOopDesc::biased_lock_pattern); ++ sub_d(AT, AT, tmp_reg); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ ++ bne(AT, R0, cas_label); ++ ++ ++ // The bias pattern is present in the object's header. Need to check ++ // whether the bias owner and the epoch are both still current. ++ // Note that because there is no current thread register on LA we ++ // need to store off the mark word we read out of the object to ++ // avoid reloading it and needing to recheck invariants below. This ++ // store is unfortunate but it makes the overall code shorter and ++ // simpler. ++ st_ptr(swap_reg, saved_mark_addr); ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ if (swap_reg_contains_mark) { ++ null_check_offset = offset(); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ xorr(tmp_reg, tmp_reg, swap_reg); ++#ifndef OPT_THREAD ++ get_thread(swap_reg); ++ xorr(swap_reg, swap_reg, tmp_reg); ++#else ++ xorr(swap_reg, TREG, tmp_reg); ++#endif ++ ++ li(AT, ~((int) markOopDesc::age_mask_in_place)); ++ andr(swap_reg, swap_reg, AT); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(swap_reg, R0, L); ++ push(tmp_reg); ++ push(A0); ++ atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg); ++ pop(A0); ++ pop(tmp_reg); ++ bind(L); ++ } ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ beq(swap_reg, R0, done); ++ Label try_revoke_bias; ++ Label try_rebias; ++ ++ // At this point we know that the header has the bias pattern and ++ // that we are not the bias owner in the current epoch. We need to ++ // figure out more details about the state of the header in order to ++ // know what operations can be legally performed on the object's ++ // header. ++ ++ // If the low three bits in the xor result aren't clear, that means ++ // the prototype header is no longer biased and we have to revoke ++ // the bias on this object. ++ ++ li(AT, markOopDesc::biased_lock_mask_in_place); ++ andr(AT, swap_reg, AT); ++ bne(AT, R0, try_revoke_bias); ++ // Biasing is still enabled for this data type. See whether the ++ // epoch of the current bias is still valid, meaning that the epoch ++ // bits of the mark word are equal to the epoch bits of the ++ // prototype header. (Note that the prototype header's epoch bits ++ // only change at a safepoint.) If not, attempt to rebias the object ++ // toward the current thread. Note that we must be absolutely sure ++ // that the current epoch is invalid in order to do this because ++ // otherwise the manipulations it performs on the mark word are ++ // illegal. ++ ++ li(AT, markOopDesc::epoch_mask_in_place); ++ andr(AT,swap_reg, AT); ++ bne(AT, R0, try_rebias); ++ // The epoch of the current bias is still valid but we know nothing ++ // about the owner; it might be set or it might be clear. Try to ++ // acquire the bias of the object using an atomic operation. If this ++ // fails we will go in to the runtime to revoke the object's bias. ++ // Note that we first construct the presumed unbiased header so we ++ // don't accidentally blow away another thread's valid bias. ++ ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ li(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); ++ andr(swap_reg, swap_reg, AT); ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++#ifndef OPT_THREAD ++ get_thread(tmp_reg); ++ orr(tmp_reg, tmp_reg, swap_reg); ++#else ++ orr(tmp_reg, TREG, swap_reg); ++#endif ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // If the biasing toward our thread failed, this means that ++ // another thread succeeded in biasing it toward itself and we ++ // need to revoke that bias. The revocation will occur in the ++ // interpreter runtime in the slow case. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ push(tmp_reg); ++ push(A0); ++ atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg); ++ pop(A0); ++ pop(tmp_reg); ++ bind(L); ++ } ++ if (slow_case != NULL) { ++ beq_far(AT, R0, *slow_case); ++ } ++ b(done); ++ ++ bind(try_rebias); ++ // At this point we know the epoch has expired, meaning that the ++ // current "bias owner", if any, is actually invalid. Under these ++ // circumstances _only_, we are allowed to use the current header's ++ // value as the comparison value when doing the cas to acquire the ++ // bias in the current epoch. In other words, we allow transfer of ++ // the bias from one thread to another directly in this situation. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++#ifndef OPT_THREAD ++ get_thread(swap_reg); ++ orr(tmp_reg, tmp_reg, swap_reg); ++#else ++ orr(tmp_reg, tmp_reg, TREG); ++#endif ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // If the biasing toward our thread failed, then another thread ++ // succeeded in biasing it toward itself and we need to revoke that ++ // bias. The revocation will occur in the runtime in the slow case. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ push(AT); ++ push(tmp_reg); ++ atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg); ++ pop(tmp_reg); ++ pop(AT); ++ bind(L); ++ } ++ if (slow_case != NULL) { ++ beq_far(AT, R0, *slow_case); ++ } ++ ++ b(done); ++ bind(try_revoke_bias); ++ // The prototype mark in the klass doesn't have the bias bit set any ++ // more, indicating that objects of this data type are not supposed ++ // to be biased any more. We are going to try to reset the mark of ++ // this object to the prototype value and fall through to the ++ // CAS-based locking scheme. Note that if our CAS fails, it means ++ // that another thread raced us for the privilege of revoking the ++ // bias of this particular object, so it's okay to continue in the ++ // normal locking code. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // Fall through to the normal CAS-based lock, because no matter what ++ // the result of the above CAS, some thread must have succeeded in ++ // removing the bias bit from the object's header. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ push(AT); ++ push(tmp_reg); ++ atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg); ++ pop(tmp_reg); ++ pop(AT); ++ bind(L); ++ } ++ ++ bind(cas_label); ++ return null_check_offset; ++} ++ ++void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ ++ // Check for biased locking unlock case, which is a no-op ++ // Note: we do not have to check the thread ID for two reasons. ++ // First, the interpreter checks for IllegalMonitorStateException at ++ // a higher level. Second, if the bias was revoked while we held the ++ // lock, the object could not be rebiased toward another thread, so ++ // the bias bit would be clear. ++ ld_d(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); ++ addi_d(AT, R0, markOopDesc::biased_lock_pattern); ++ ++ beq(AT, temp_reg, done); ++} ++ ++// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf ++// this method will handle the stack problem, you need not to preserve the stack space for the argument now ++void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) { ++ Label L, E; ++ ++ assert(number_of_arguments <= 4, "just check"); ++ ++ andi(AT, SP, 0xf); ++ beq(AT, R0, L); ++ addi_d(SP, SP, -8); ++ call(entry_point, relocInfo::runtime_call_type); ++ addi_d(SP, SP, 8); ++ b(E); ++ ++ bind(L); ++ call(entry_point, relocInfo::runtime_call_type); ++ bind(E); ++} ++ ++void MacroAssembler::jmp(address entry) { ++ jlong offs = entry - pc(); ++ if (reachable_from_branch_short(offs)) { // Short jump ++ b(offset26(entry)); ++ } else { // Far jump ++ patchable_jump_far(R0, offs); ++ } ++} ++ ++void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) { ++ switch (rtype) { ++ case relocInfo::none: ++ jmp(entry); ++ break; ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rtype); ++ patchable_jump(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::jmp_far(Label& L) { ++ if (L.is_bound()) { ++ assert(target(L) != NULL, "jmp most probably wrong"); ++ patchable_jump(target(L), true /* force patchable */); ++ } else { ++ L.add_patch_at(code(), locator()); ++ patchable_jump_far(R0, 0); ++ } ++} ++ ++void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { ++ int oop_index; ++ if (obj) { ++ oop_index = oop_recorder()->find_index(obj); ++ } else { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } ++ relocate(metadata_Relocation::spec(oop_index)); ++ patchable_li52(AT, (long)obj); ++ st_d(AT, dst); ++} ++ ++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { ++ int oop_index; ++ if (obj) { ++ oop_index = oop_recorder()->find_index(obj); ++ } else { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } ++ relocate(metadata_Relocation::spec(oop_index)); ++ patchable_li52(dst, (long)obj); ++} ++ ++void MacroAssembler::call(address entry) { ++ jlong offs = entry - pc(); ++ if (reachable_from_branch_short(offs)) { // Short call (pc-rel) ++ bl(offset26(entry)); ++ } else if (is_simm(offs, 38)) { // Far call (pc-rel) ++ patchable_jump_far(RA, offs); ++ } else { // Long call (absolute) ++ call_long(entry); ++ } ++} ++ ++void MacroAssembler::call(address entry, relocInfo::relocType rtype) { ++ switch (rtype) { ++ case relocInfo::none: ++ call(entry); ++ break; ++ case relocInfo::runtime_call_type: ++ if (!is_simm(entry - pc(), 38)) { ++ call_long(entry); ++ break; ++ } ++ // fallthrough ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rtype); ++ patchable_call(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::call(address entry, RelocationHolder& rh){ ++ switch (rh.type()) { ++ case relocInfo::none: ++ call(entry); ++ break; ++ case relocInfo::runtime_call_type: ++ if (!is_simm(entry - pc(), 38)) { ++ call_long(entry); ++ break; ++ } ++ // fallthrough ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rh); ++ patchable_call(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::call_long(address entry) { ++ jlong value = (jlong)entry; ++ lu12i_w(T4, split_low20(value >> 12)); ++ lu32i_d(T4, split_low20(value >> 32)); ++ jirl(RA, T4, split_low12(value)); ++} ++ ++address MacroAssembler::ic_call(address entry, jint method_index) { ++ RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); ++ patchable_li52(IC_Klass, (long)Universe::non_oop_word()); ++ assert(entry != NULL, "call most probably wrong"); ++ InstructionMark im(this); ++ return trampoline_call(AddressLiteral(entry, rh)); ++} ++ ++void MacroAssembler::c2bool(Register r) { ++ sltu(r, R0, r); ++} ++ ++#ifndef PRODUCT ++extern "C" void findpc(intptr_t x); ++#endif ++ ++void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) { ++ if ( ShowMessageBoxOnError ) { ++ JavaThreadState saved_state = JavaThread::current()->thread_state(); ++ JavaThread::current()->set_thread_state(_thread_in_vm); ++ { ++ // In order to get locks work, we need to fake a in_VM state ++ ttyLocker ttyl; ++ ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); ++ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { ++ BytecodeCounter::print(); ++ } ++ ++ } ++ ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); ++ } ++ else ++ ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); ++} ++ ++ ++void MacroAssembler::stop(const char* msg) { ++ li(A0, (long)msg); ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ brk(17); ++} ++ ++void MacroAssembler::warn(const char* msg) { ++ pushad(); ++ li(A0, (long)msg); ++ push(S2); ++ li(AT, -(StackAlignmentInBytes)); ++ move(S2, SP); // use S2 as a sender SP holder ++ andr(SP, SP, AT); // align stack as required by ABI ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ move(SP, S2); // use S2 as a sender SP holder ++ pop(S2); ++ popad(); ++} ++ ++void MacroAssembler::increment(Register reg, int imm) { ++ if (!imm) return; ++ if (is_simm(imm, 12)) { ++ addi_d(reg, reg, imm); ++ } else { ++ li(AT, imm); ++ add_d(reg, reg, AT); ++ } ++} ++ ++void MacroAssembler::decrement(Register reg, int imm) { ++ increment(reg, -imm); ++} ++ ++void MacroAssembler::increment(Address addr, int imm) { ++ if (!imm) return; ++ assert(is_simm(imm, 12), "must be"); ++ ld_ptr(AT, addr); ++ addi_d(AT, AT, imm); ++ st_ptr(AT, addr); ++} ++ ++void MacroAssembler::decrement(Address addr, int imm) { ++ increment(addr, -imm); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions) { ++ call_VM_helper(oop_result, entry_point, 0, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ call_VM_helper(oop_result, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ if (arg_2!=A2) move(A2, arg_2); ++ assert(arg_2 != A1, "smashed argument"); ++ call_VM_helper(oop_result, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); ++ call_VM_helper(oop_result, entry_point, 3, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); ++ call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); ++} ++ ++void MacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T2; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ // debugging support ++ assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); ++ assert(number_of_arguments <= 4 , "cannot have negative number of arguments"); ++ assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); ++ assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); ++ ++ assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp"); ++ ++ // set last Java frame before call ++ Label before_call; ++ bind(before_call); ++ set_last_Java_frame(java_thread, last_java_sp, FP, before_call); ++ ++ // do the call ++ move(A0, java_thread); ++ call(entry_point, relocInfo::runtime_call_type); ++ ++ // restore the thread (cannot use the pushed argument since arguments ++ // may be overwritten by C code generated by an optimizing compiler); ++ // however can use the register value directly if it is callee saved. ++#ifndef OPT_THREAD ++ get_thread(java_thread); ++#else ++#ifdef ASSERT ++ { ++ Label L; ++ get_thread(AT); ++ beq(java_thread, AT, L); ++ stop("MacroAssembler::call_VM_base: TREG not callee saved?"); ++ bind(L); ++ } ++#endif ++#endif ++ ++ // discard thread and arguments ++ ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // reset last Java frame ++ reset_last_Java_frame(java_thread, false); ++ ++ check_and_handle_popframe(java_thread); ++ check_and_handle_earlyret(java_thread); ++ if (check_exceptions) { ++ // check for pending exceptions (java_thread is set upon return) ++ Label L; ++ ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset())); ++ beq(AT, R0, L); ++ li(AT, target(before_call)); ++ push(AT); ++ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ bind(L); ++ } ++ ++ // get oop result if there is one and reset the value in the thread ++ if (oop_result->is_valid()) { ++ ld_d(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); ++ st_d(R0, java_thread, in_bytes(JavaThread::vm_result_offset())); ++ verify_oop(oop_result); ++ } ++} ++ ++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { ++ move(V0, SP); ++ //we also reserve space for java_thread here ++ li(AT, -(StackAlignmentInBytes)); ++ andr(SP, SP, AT); ++ call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { ++ call_VM_leaf_base(entry_point, number_of_arguments); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { ++ if (arg_0 != A0) move(A0, arg_0); ++ call_VM_leaf(entry_point, 1); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { ++ if (arg_0 != A0) move(A0, arg_0); ++ if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); ++ call_VM_leaf(entry_point, 2); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { ++ if (arg_0 != A0) move(A0, arg_0); ++ if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument"); ++ call_VM_leaf(entry_point, 3); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point) { ++ MacroAssembler::call_VM_leaf_base(entry_point, 0); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1) { ++ if (arg_1 != A0) move(A0, arg_1); ++ MacroAssembler::call_VM_leaf_base(entry_point, 1); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1, ++ Register arg_2) { ++ if (arg_1 != A0) move(A0, arg_1); ++ if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); ++ MacroAssembler::call_VM_leaf_base(entry_point, 2); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3) { ++ if (arg_1 != A0) move(A0, arg_1); ++ if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); ++ if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument"); ++ MacroAssembler::call_VM_leaf_base(entry_point, 3); ++} ++ ++void MacroAssembler::check_and_handle_earlyret(Register java_thread) { ++} ++ ++void MacroAssembler::check_and_handle_popframe(Register java_thread) { ++} ++ ++void MacroAssembler::null_check(Register reg, int offset) { ++ if (needs_explicit_null_check(offset)) { ++ // provoke OS NULL exception if reg = NULL by ++ // accessing M[reg] w/o changing any (non-CC) registers ++ // NOTE: cmpl is plenty here to provoke a segv ++ ld_w(AT, reg, 0); ++ } else { ++ // nothing to do, (later) access of M[reg + offset] ++ // will provoke OS NULL exception if reg = NULL ++ } ++} ++ ++void MacroAssembler::enter() { ++ push2(RA, FP); ++ move(FP, SP); ++} ++ ++void MacroAssembler::leave() { ++ move(SP, FP); ++ pop2(RA, FP); ++} ++ ++void MacroAssembler::build_frame(int framesize) { ++ assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA"); ++ assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment"); ++ if (Assembler::is_simm(-framesize, 12)) { ++ addi_d(SP, SP, -framesize); ++ st_ptr(FP, Address(SP, framesize - 2 * wordSize)); ++ st_ptr(RA, Address(SP, framesize - 1 * wordSize)); ++ if (PreserveFramePointer) ++ addi_d(FP, SP, framesize - 2 * wordSize); ++ } else { ++ addi_d(SP, SP, -2 * wordSize); ++ st_ptr(FP, Address(SP, 0 * wordSize)); ++ st_ptr(RA, Address(SP, 1 * wordSize)); ++ if (PreserveFramePointer) ++ move(FP, SP); ++ li(SCR1, framesize - 2 * wordSize); ++ sub_d(SP, SP, SCR1); ++ } ++} ++ ++void MacroAssembler::remove_frame(int framesize) { ++ assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA"); ++ assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); ++ if (Assembler::is_simm(framesize, 12)) { ++ ld_ptr(FP, Address(SP, framesize - 2 * wordSize)); ++ ld_ptr(RA, Address(SP, framesize - 1 * wordSize)); ++ addi_d(SP, SP, framesize); ++ } else { ++ li(SCR1, framesize - 2 * wordSize); ++ add_d(SP, SP, SCR1); ++ ld_ptr(FP, Address(SP, 0 * wordSize)); ++ ld_ptr(RA, Address(SP, 1 * wordSize)); ++ addi_d(SP, SP, 2 * wordSize); ++ } ++} ++ ++void MacroAssembler::unimplemented(const char* what) { ++ const char* buf = NULL; ++ { ++ ResourceMark rm; ++ stringStream ss; ++ ss.print("unimplemented: %s", what); ++ buf = code_string(ss.as_string()); ++ } ++ stop(buf); ++} ++ ++void MacroAssembler::get_thread(Register thread) { ++#ifdef MINIMIZE_RAM_USAGE ++ Register tmp; ++ ++ if (thread == AT) ++ tmp = T4; ++ else ++ tmp = AT; ++ ++ move(thread, SP); ++ shr(thread, PAGE_SHIFT); ++ ++ push(tmp); ++ li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1)); ++ andr(thread, thread, tmp); ++ shl(thread, Address::times_ptr); // sizeof(Thread *) ++ li(tmp, (long)ThreadLocalStorage::sp_map_addr()); ++ add_d(tmp, tmp, thread); ++ ld_ptr(thread, tmp, 0); ++ pop(tmp); ++#else ++ if (thread != V0) { ++ push(V0); ++ } ++ pushad_except_v0(); ++ ++ push(S5); ++ move(S5, SP); ++ li(AT, -StackAlignmentInBytes); ++ andr(SP, SP, AT); ++ // TODO: confirm reloc ++ call(CAST_FROM_FN_PTR(address, Thread::current), relocInfo::runtime_call_type); ++ move(SP, S5); ++ pop(S5); ++ ++ popad_except_v0(); ++ if (thread != V0) { ++ move(thread, V0); ++ pop(V0); ++ } ++#endif // MINIMIZE_RAM_USAGE ++} ++ ++void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T1; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // we must set sp to zero to clear frame ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // must clear fp, so that compiled frames are not confused; it is possible ++ // that we need it only for debugging ++ if(clear_fp) { ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); ++} ++ ++void MacroAssembler::reset_last_Java_frame(bool clear_fp) { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // we must set sp to zero to clear frame ++ st_d(R0, Address(thread, JavaThread::last_Java_sp_offset())); ++ // must clear fp, so that compiled frames are not confused; it is ++ // possible that we need it only for debugging ++ if (clear_fp) { ++ st_d(R0, Address(thread, JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ st_d(R0, Address(thread, JavaThread::last_Java_pc_offset())); ++} ++ ++// Write serialization page so VM thread can do a pseudo remote membar. ++// We use the current thread pointer to calculate a thread specific ++// offset to write to within the page. This minimizes bus traffic ++// due to cache line collision. ++void MacroAssembler::serialize_memory(Register thread, Register tmp) { ++ assert_different_registers(AT, tmp); ++ juint sps = os::get_serialize_page_shift_count(); ++ juint lsb = sps + 2; ++ juint msb = sps + log2_uint(os::vm_page_size()) - 1; ++ bstrpick_w(AT, thread, msb, lsb); ++ li(tmp, os::get_memory_serialize_page()); ++ alsl_d(tmp, AT, tmp, Address::times_2 - 1); ++ st_w(R0, tmp, 0); ++} ++ ++void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ ld_d(AT, thread_reg, in_bytes(Thread::polling_page_offset())); ++ andi(AT, AT, SafepointMechanism::poll_bit()); ++ bne(AT, R0, slow_path); ++ } else { ++ li(AT, SafepointSynchronize::address_of_state()); ++ ld_w(AT, AT, 0); ++ addi_d(AT, AT, -SafepointSynchronize::_not_synchronized); ++ bne(AT, R0, slow_path); ++ } ++} ++ ++// Just like safepoint_poll, but use an acquiring load for thread- ++// local polling. ++// ++// We need an acquire here to ensure that any subsequent load of the ++// global SafepointSynchronize::_state flag is ordered after this load ++// of the local Thread::_polling page. We don't want this poll to ++// return false (i.e. not safepointing) and a later poll of the global ++// SafepointSynchronize::_state spuriously to return true. ++// ++// This is to avoid a race when we're in a native->Java transition ++// racing the code which wakes up from a safepoint. ++// ++void MacroAssembler::safepoint_poll_acquire(Label& slow_path, Register thread_reg) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ ld_d(AT, thread_reg, in_bytes(Thread::polling_page_offset())); ++ membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore)); ++ andi(AT, AT, SafepointMechanism::poll_bit()); ++ bne(AT, R0, slow_path); ++ } else { ++ safepoint_poll(slow_path, thread_reg); ++ } ++} ++ ++// Calls to C land ++// ++// When entering C land, the fp, & sp of the last Java frame have to be recorded ++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp ++// has to be reset to 0. This is required to allow proper stack traversal. ++void MacroAssembler::set_last_Java_frame(Register java_thread, ++ Register last_java_sp, ++ Register last_java_fp, ++ Label& last_java_pc) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T2; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ ++ // last_java_fp is optional ++ if (last_java_fp->is_valid()) { ++ st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // last_java_pc ++ lipc(AT, last_java_pc); ++ st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + ++ JavaFrameAnchor::last_Java_pc_offset())); ++ ++ st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++} ++ ++void MacroAssembler::set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ Label& last_java_pc) { ++ set_last_Java_frame(NOREG, last_java_sp, last_java_fp, last_java_pc); ++} ++ ++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. ++void MacroAssembler::tlab_allocate(Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Register t2, ++ Label& slow_case) { ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void MacroAssembler::eden_allocate(Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Label& slow_case) { ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); ++} ++ ++ ++void MacroAssembler::incr_allocated_bytes(Register thread, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1) { ++ if (!thread->is_valid()) { ++#ifndef OPT_THREAD ++ assert(t1->is_valid(), "need temp reg"); ++ thread = t1; ++ get_thread(thread); ++#else ++ thread = TREG; ++#endif ++ } ++ ++ ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); ++ if (var_size_in_bytes->is_valid()) { ++ add_d(AT, AT, var_size_in_bytes); ++ } else { ++ addi_d(AT, AT, con_size_in_bytes); ++ } ++ st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); ++} ++ ++void MacroAssembler::li(Register rd, jlong value) { ++ jlong hi12 = bitfield(value, 52, 12); ++ jlong lo52 = bitfield(value, 0, 52); ++ ++ if (hi12 != 0 && lo52 == 0) { ++ lu52i_d(rd, R0, hi12); ++ } else { ++ jlong hi20 = bitfield(value, 32, 20); ++ jlong lo20 = bitfield(value, 12, 20); ++ jlong lo12 = bitfield(value, 0, 12); ++ ++ if (lo20 == 0) { ++ ori(rd, R0, lo12); ++ } else if (bitfield(simm12(lo12), 12, 20) == lo20) { ++ addi_w(rd, R0, simm12(lo12)); ++ } else { ++ lu12i_w(rd, lo20); ++ if (lo12 != 0) ++ ori(rd, rd, lo12); ++ } ++ if (hi20 != bitfield(simm20(lo20), 20, 20)) ++ lu32i_d(rd, hi20); ++ if (hi12 != bitfield(simm20(hi20), 20, 12)) ++ lu52i_d(rd, rd, hi12); ++ } ++} ++ ++void MacroAssembler::patchable_li52(Register rd, jlong value) { ++ int count = 0; ++ ++ if (value <= max_jint && value >= min_jint) { ++ if (is_simm(value, 12)) { ++ addi_d(rd, R0, value); ++ count++; ++ } else { ++ lu12i_w(rd, split_low20(value >> 12)); ++ count++; ++ if (split_low12(value)) { ++ ori(rd, rd, split_low12(value)); ++ count++; ++ } ++ } ++ } else if (is_simm(value, 52)) { ++ lu12i_w(rd, split_low20(value >> 12)); ++ count++; ++ if (split_low12(value)) { ++ ori(rd, rd, split_low12(value)); ++ count++; ++ } ++ lu32i_d(rd, split_low20(value >> 32)); ++ count++; ++ } else { ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 3) { ++ nop(); ++ count++; ++ } ++} ++ ++void MacroAssembler::lipc(Register rd, Label& L) { ++ if (L.is_bound()) { ++ jint offs = (target(L) - pc()) >> 2; ++ guarantee(is_simm(offs, 20), "Not signed 20-bit offset"); ++ pcaddi(rd, offs); ++ } else { ++ InstructionMark im(this); ++ L.add_patch_at(code(), locator()); ++ pcaddi(rd, 0); ++ } ++} ++ ++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { ++ assert(UseCompressedClassPointers, "should only be used for compressed header"); ++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ ++ int klass_index = oop_recorder()->find_index(k); ++ RelocationHolder rspec = metadata_Relocation::spec(klass_index); ++ long narrowKlass = (long)Klass::encode_klass(k); ++ ++ relocate(rspec, Assembler::narrow_oop_operand); ++ patchable_li52(dst, narrowKlass); ++} ++ ++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { ++ assert(UseCompressedOops, "should only be used for compressed header"); ++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ ++ int oop_index = oop_recorder()->find_index(obj); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ ++ relocate(rspec, Assembler::narrow_oop_operand); ++ patchable_li52(dst, oop_index); ++} ++ ++// ((OopHandle)result).resolve(); ++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { ++ // OopHandle::resolve is an indirection. ++ access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, NOREG); ++} ++ ++void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { ++ // get mirror ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ ld_ptr(mirror, method, in_bytes(Method::const_offset())); ++ ld_ptr(mirror, mirror, in_bytes(ConstMethod::constants_offset())); ++ ld_ptr(mirror, mirror, ConstantPool::pool_holder_offset_in_bytes()); ++ ld_ptr(mirror, mirror, mirror_offset); ++ resolve_oop_handle(mirror, tmp); ++} ++ ++void MacroAssembler::verify_oop(Register reg, const char* s) { ++ if (!VerifyOops) return; ++ ++ const char * b = NULL; ++ stringStream ss; ++ ss.print("verify_oop: %s: %s", reg->name(), s); ++ b = code_string(ss.as_string()); ++ ++ addi_d(SP, SP, -6 * wordSize); ++ st_ptr(SCR1, Address(SP, 0 * wordSize)); ++ st_ptr(SCR2, Address(SP, 1 * wordSize)); ++ st_ptr(RA, Address(SP, 2 * wordSize)); ++ st_ptr(A0, Address(SP, 3 * wordSize)); ++ st_ptr(A1, Address(SP, 4 * wordSize)); ++ ++ move(A1, reg); ++ patchable_li52(A0, (uintptr_t)(address)b); // Fixed size instructions ++ li(SCR2, StubRoutines::verify_oop_subroutine_entry_address()); ++ ld_ptr(SCR2, Address(SCR2)); ++ jalr(SCR2); ++ ++ ld_ptr(SCR1, Address(SP, 0 * wordSize)); ++ ld_ptr(SCR2, Address(SP, 1 * wordSize)); ++ ld_ptr(RA, Address(SP, 2 * wordSize)); ++ ld_ptr(A0, Address(SP, 3 * wordSize)); ++ ld_ptr(A1, Address(SP, 4 * wordSize)); ++ addi_d(SP, SP, 6 * wordSize); ++} ++ ++void MacroAssembler::verify_oop_addr(Address addr, const char* s) { ++ if (!VerifyOops) return; ++ ++ const char* b = NULL; ++ { ++ ResourceMark rm; ++ stringStream ss; ++ ss.print("verify_oop_addr: %s", s); ++ b = code_string(ss.as_string()); ++ } ++ ++ addi_d(SP, SP, -6 * wordSize); ++ st_ptr(SCR1, Address(SP, 0 * wordSize)); ++ st_ptr(SCR2, Address(SP, 1 * wordSize)); ++ st_ptr(RA, Address(SP, 2 * wordSize)); ++ st_ptr(A0, Address(SP, 3 * wordSize)); ++ st_ptr(A1, Address(SP, 4 * wordSize)); ++ ++ patchable_li52(A0, (uintptr_t)(address)b); // Fixed size instructions ++ // addr may contain sp so we will have to adjust it based on the ++ // pushes that we just did. ++ if (addr.uses(SP)) { ++ lea(A1, addr); ++ ld_ptr(A1, Address(A1, 6 * wordSize)); ++ } else { ++ ld_ptr(A1, addr); ++ } ++ ++ // call indirectly to solve generation ordering problem ++ li(SCR2, StubRoutines::verify_oop_subroutine_entry_address()); ++ ld_ptr(SCR2, Address(SCR2)); ++ jalr(SCR2); ++ ++ ld_ptr(SCR1, Address(SP, 0 * wordSize)); ++ ld_ptr(SCR2, Address(SP, 1 * wordSize)); ++ ld_ptr(RA, Address(SP, 2 * wordSize)); ++ ld_ptr(A0, Address(SP, 3 * wordSize)); ++ ld_ptr(A1, Address(SP, 4 * wordSize)); ++ addi_d(SP, SP, 6 * wordSize); ++} ++ ++// used registers : SCR1, SCR2 ++void MacroAssembler::verify_oop_subroutine() { ++ // RA: ra ++ // A0: char* error message ++ // A1: oop object to verify ++ Label exit, error; ++ // increment counter ++ li(SCR2, (long)StubRoutines::verify_oop_count_addr()); ++ ld_w(SCR1, SCR2, 0); ++ addi_d(SCR1, SCR1, 1); ++ st_w(SCR1, SCR2, 0); ++ ++ // make sure object is 'reasonable' ++ beqz(A1, exit); // if obj is NULL it is ok ++ ++ // Check if the oop is in the right area of memory ++ // const int oop_mask = Universe::verify_oop_mask(); ++ // const int oop_bits = Universe::verify_oop_bits(); ++ const uintptr_t oop_mask = Universe::verify_oop_mask(); ++ const uintptr_t oop_bits = Universe::verify_oop_bits(); ++ li(SCR1, oop_mask); ++ andr(SCR2, A1, SCR1); ++ li(SCR1, oop_bits); ++ bne(SCR2, SCR1, error); ++ ++ // make sure klass is 'reasonable' ++ // add for compressedoops ++ load_klass(SCR2, A1); ++ beqz(SCR2, error); // if klass is NULL it is broken ++ // return if everything seems ok ++ bind(exit); ++ ++ jr(RA); ++ ++ // handle errors ++ bind(error); ++ pushad(); ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ popad(); ++ jr(RA); ++} ++ ++void MacroAssembler::verify_tlab(Register t1, Register t2) { ++#ifdef ASSERT ++ assert_different_registers(t1, t2, AT); ++ if (UseTLAB && VerifyOops) { ++ Label next, ok; ++ ++ get_thread(t1); ++ ++ ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset())); ++ ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset())); ++ bgeu(t2, AT, next); ++ ++ stop("assert(top >= start)"); ++ ++ bind(next); ++ ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset())); ++ bgeu(AT, t2, ok); ++ ++ stop("assert(top <= end)"); ++ ++ bind(ok); ++ ++ } ++#endif ++} ++ ++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++ return RegisterOrConstant(tmp); ++} ++ ++void MacroAssembler::hswap(Register reg) { ++ //short ++ //andi(reg, reg, 0xffff); ++ srli_w(AT, reg, 8); ++ slli_w(reg, reg, 24); ++ srai_w(reg, reg, 16); ++ orr(reg, reg, AT); ++} ++ ++void MacroAssembler::huswap(Register reg) { ++ srli_d(AT, reg, 8); ++ slli_d(reg, reg, 24); ++ srli_d(reg, reg, 16); ++ orr(reg, reg, AT); ++ bstrpick_d(reg, reg, 15, 0); ++} ++ ++// something funny to do this will only one more register AT ++// 32 bits ++void MacroAssembler::swap(Register reg) { ++ srli_w(AT, reg, 8); ++ slli_w(reg, reg, 24); ++ orr(reg, reg, AT); ++ //reg : 4 1 2 3 ++ srli_w(AT, AT, 16); ++ xorr(AT, AT, reg); ++ andi(AT, AT, 0xff); ++ //AT : 0 0 0 1^3); ++ xorr(reg, reg, AT); ++ //reg : 4 1 2 1 ++ slli_w(AT, AT, 16); ++ xorr(reg, reg, AT); ++ //reg : 4 3 2 1 ++} ++ ++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, ++ Register resflag, bool retold, bool barrier) { ++ assert(oldval != resflag, "oldval != resflag"); ++ assert(newval != resflag, "newval != resflag"); ++ Label again, succ, fail; ++ ++ bind(again); ++ ll_d(resflag, addr); ++ bne(resflag, oldval, fail); ++ move(resflag, newval); ++ sc_d(resflag, addr); ++ beqz(resflag, again); ++ b(succ); ++ ++ bind(fail); ++ if (barrier) ++ dbar(0x700); ++ if (retold && oldval != R0) ++ move(oldval, resflag); ++ move(resflag, R0); ++ bind(succ); ++} ++ ++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, ++ Register tmp, bool retold, bool barrier, Label& succ, Label* fail) { ++ assert(oldval != tmp, "oldval != tmp"); ++ assert(newval != tmp, "newval != tmp"); ++ Label again, neq; ++ ++ bind(again); ++ ll_d(tmp, addr); ++ bne(tmp, oldval, neq); ++ move(tmp, newval); ++ sc_d(tmp, addr); ++ beqz(tmp, again); ++ b(succ); ++ ++ bind(neq); ++ if (barrier) ++ dbar(0x700); ++ if (retold && oldval != R0) ++ move(oldval, tmp); ++ if (fail) ++ b(*fail); ++} ++ ++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, ++ Register resflag, bool sign, bool retold, bool barrier) { ++ assert(oldval != resflag, "oldval != resflag"); ++ assert(newval != resflag, "newval != resflag"); ++ Label again, succ, fail; ++ ++ bind(again); ++ ll_w(resflag, addr); ++ if (!sign) ++ lu32i_d(resflag, 0); ++ bne(resflag, oldval, fail); ++ move(resflag, newval); ++ sc_w(resflag, addr); ++ beqz(resflag, again); ++ b(succ); ++ ++ bind(fail); ++ if (barrier) ++ dbar(0x700); ++ if (retold && oldval != R0) ++ move(oldval, resflag); ++ move(resflag, R0); ++ bind(succ); ++} ++ ++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, ++ bool sign, bool retold, bool barrier, Label& succ, Label* fail) { ++ assert(oldval != tmp, "oldval != tmp"); ++ assert(newval != tmp, "newval != tmp"); ++ Label again, neq; ++ ++ bind(again); ++ ll_w(tmp, addr); ++ if (!sign) ++ lu32i_d(tmp, 0); ++ bne(tmp, oldval, neq); ++ move(tmp, newval); ++ sc_w(tmp, addr); ++ beqz(tmp, again); ++ b(succ); ++ ++ bind(neq); ++ if (barrier) ++ dbar(0x700); ++ if (retold && oldval != R0) ++ move(oldval, tmp); ++ if (fail) ++ b(*fail); ++} ++ ++// be sure the three register is different ++void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++// be sure the three register is different ++void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++#ifdef COMPILER2 ++// Fast_Lock and Fast_Unlock used by C2 ++ ++// Because the transitions from emitted code to the runtime ++// monitorenter/exit helper stubs are so slow it's critical that ++// we inline both the stack-locking fast-path and the inflated fast path. ++// ++// See also: cmpFastLock and cmpFastUnlock. ++// ++// What follows is a specialized inline transliteration of the code ++// in slow_enter() and slow_exit(). If we're concerned about I$ bloat ++// another option would be to emit TrySlowEnter and TrySlowExit methods ++// at startup-time. These methods would accept arguments as ++// (Obj, Self, box, Scratch) and return success-failure ++// indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply ++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. ++// In practice, however, the # of lock sites is bounded and is usually small. ++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer ++// if the processor uses simple bimodal branch predictors keyed by EIP ++// Since the helper routines would be called from multiple synchronization ++// sites. ++// ++// An even better approach would be write "MonitorEnter()" and "MonitorExit()" ++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites ++// to those specialized methods. That'd give us a mostly platform-independent ++// implementation that the JITs could optimize and inline at their pleasure. ++// Done correctly, the only time we'd need to cross to native could would be ++// to park() or unpark() threads. We'd also need a few more unsafe operators ++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and ++// (b) explicit barriers or fence operations. ++// ++// TODO: ++// ++// * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). ++// This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. ++// Given TLAB allocation, Self is usually manifested in a register, so passing it into ++// the lock operators would typically be faster than reifying Self. ++// ++// * Ideally I'd define the primitives as: ++// fast_lock (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED. ++// fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED ++// Unfortunately ADLC bugs prevent us from expressing the ideal form. ++// Instead, we're stuck with a rather awkward and brittle register assignments below. ++// Furthermore the register assignments are overconstrained, possibly resulting in ++// sub-optimal code near the synchronization site. ++// ++// * Eliminate the sp-proximity tests and just use "== Self" tests instead. ++// Alternately, use a better sp-proximity test. ++// ++// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. ++// Either one is sufficient to uniquely identify a thread. ++// TODO: eliminate use of sp in _owner and use get_thread(tr) instead. ++// ++// * Intrinsify notify() and notifyAll() for the common cases where the ++// object is locked by the calling thread but the waitlist is empty. ++// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). ++// ++// * use jccb and jmpb instead of jcc and jmp to improve code density. ++// But beware of excessive branch density on AMD Opterons. ++// ++// * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success ++// or failure of the fast-path. If the fast-path fails then we pass ++// control to the slow-path, typically in C. In Fast_Lock and ++// Fast_Unlock we often branch to DONE_LABEL, just to find that C2 ++// will emit a conditional branch immediately after the node. ++// So we have branches to branches and lots of ICC.ZF games. ++// Instead, it might be better to have C2 pass a "FailureLabel" ++// into Fast_Lock and Fast_Unlock. In the case of success, control ++// will drop through the node. ICC.ZF is undefined at exit. ++// In the case of failure, the node will branch directly to the ++// FailureLabel ++ ++// obj: object to lock ++// box: on-stack box address (displaced header location) ++// tmp: tmp -- KILLED ++// scr: tmp -- KILLED ++void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg, ++ Register tmpReg, Register scrReg) { ++ Label IsInflated, DONE, DONE_SET; ++ ++ // Ensure the register assignents are disjoint ++ guarantee(objReg != boxReg, ""); ++ guarantee(objReg != tmpReg, ""); ++ guarantee(objReg != scrReg, ""); ++ guarantee(boxReg != tmpReg, ""); ++ guarantee(boxReg != scrReg, ""); ++ ++ block_comment("FastLock"); ++ ++ if (PrintBiasedLockingStatistics) { ++ atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg); ++ } ++ ++ if (EmitSync & 1) { ++ move(AT, R0); ++ return; ++ } else ++ if (EmitSync & 2) { ++ Label DONE_LABEL ; ++ if (UseBiasedLocking) { ++ // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. ++ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL); ++ } ++ ++ ld_d(tmpReg, Address(objReg, 0)) ; // fetch markword ++ ori(tmpReg, tmpReg, 0x1); ++ st_d(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS ++ ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_LABEL); // Updates tmpReg ++ ++ // Recursive locking ++ sub_d(tmpReg, tmpReg, SP); ++ li(AT, (7 - os::vm_page_size() )); ++ andr(tmpReg, tmpReg, AT); ++ st_d(tmpReg, Address(boxReg, 0)); ++ bind(DONE_LABEL) ; ++ } else { ++ // Possible cases that we'll encounter in fast_lock ++ // ------------------------------------------------ ++ // * Inflated ++ // -- unlocked ++ // -- Locked ++ // = by self ++ // = by other ++ // * biased ++ // -- by Self ++ // -- by other ++ // * neutral ++ // * stack-locked ++ // -- by self ++ // = sp-proximity test hits ++ // = sp-proximity test generates false-negative ++ // -- by other ++ // ++ ++ // TODO: optimize away redundant LDs of obj->mark and improve the markword triage ++ // order to reduce the number of conditional branches in the most common cases. ++ // Beware -- there's a subtle invariant that fetch of the markword ++ // at [FETCH], below, will never observe a biased encoding (*101b). ++ // If this invariant is not held we risk exclusion (safety) failure. ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ Label succ, fail; ++ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL); ++ b(fail); ++ bind(succ); ++ li(resReg, 1); ++ b(DONE); ++ bind(fail); ++ } ++ ++ ld_d(tmpReg, Address(objReg, 0)); //Fetch the markword of the object. ++ andi(AT, tmpReg, markOopDesc::monitor_value); ++ bnez(AT, IsInflated); // inflated vs stack-locked|neutral|bias ++ ++ // Attempt stack-locking ... ++ ori(tmpReg, tmpReg, markOopDesc::unlocked_value); ++ st_d(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS ++ ++ if (PrintBiasedLockingStatistics) { ++ Label SUCC, FAIL; ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg ++ bind(SUCC); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); ++ li(resReg, 1); ++ b(DONE); ++ bind(FAIL); ++ } else { ++ // If cmpxchg is succ, then scrReg = 1 ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg ++ } ++ ++ // Recursive locking ++ // The object is stack-locked: markword contains stack pointer to BasicLock. ++ // Locked by current thread if difference with current SP is less than one page. ++ sub_d(tmpReg, tmpReg, SP); ++ li(AT, 7 - os::vm_page_size()); ++ andr(tmpReg, tmpReg, AT); ++ st_d(tmpReg, Address(boxReg, 0)); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++ ++ bnez(tmpReg, L); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); ++ bind(L); ++ } ++ ++ sltui(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0 ++ b(DONE); ++ ++ bind(IsInflated); ++ // The object's monitor m is unlocked iff m->owner == NULL, ++ // otherwise m->owner may contain a thread or a stack address. ++ ++ // TODO: someday avoid the ST-before-CAS penalty by ++ // relocating (deferring) the following ST. ++ // We should also think about trying a CAS without having ++ // fetched _owner. If the CAS is successful we may ++ // avoid an RTO->RTS upgrade on the $line. ++ // Without cast to int32_t a movptr will destroy r10 which is typically obj ++ li(AT, (int32_t)intptr_t(markOopDesc::unused_mark())); ++ st_d(AT, Address(boxReg, 0)); ++ ++ ld_d(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ // if (m->owner != 0) => AT = 0, goto slow path. ++ move(scrReg, R0); ++ bnez(AT, DONE_SET); ++ ++#ifndef OPT_THREAD ++ get_thread(TREG) ; ++#endif ++ // It's inflated and appears unlocked ++ addi_d(tmpReg, tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2); ++ cmpxchg(Address(tmpReg, 0), R0, TREG, scrReg, false, false); ++ // Intentional fall-through into DONE ... ++ ++ bind(DONE_SET); ++ move(resReg, scrReg); ++ ++ // DONE is a hot target - we'd really like to place it at the ++ // start of cache line by padding with NOPs. ++ // See the AMD and Intel software optimization manuals for the ++ // most efficient "long" NOP encodings. ++ // Unfortunately none of our alignment mechanisms suffice. ++ bind(DONE); ++ // At DONE the resReg is set as follows ... ++ // Fast_Unlock uses the same protocol. ++ // resReg == 1 -> Success ++ // resREg == 0 -> Failure - force control through the slow-path ++ ++ // Avoid branch-to-branch on AMD processors ++ // This appears to be superstition. ++ if (EmitSync & 32) nop() ; ++ ++ } ++} ++ ++// obj: object to unlock ++// box: box address (displaced header location), killed. ++// tmp: killed tmp; cannot be obj nor box. ++// ++// Some commentary on balanced locking: ++// ++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. ++// Methods that don't have provably balanced locking are forced to run in the ++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock. ++// The interpreter provides two properties: ++// I1: At return-time the interpreter automatically and quietly unlocks any ++// objects acquired the current activation (frame). Recall that the ++// interpreter maintains an on-stack list of locks currently held by ++// a frame. ++// I2: If a method attempts to unlock an object that is not held by the ++// the frame the interpreter throws IMSX. ++// ++// Lets say A(), which has provably balanced locking, acquires O and then calls B(). ++// B() doesn't have provably balanced locking so it runs in the interpreter. ++// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O ++// is still locked by A(). ++// ++// The only other source of unbalanced locking would be JNI. The "Java Native Interface: ++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter ++// should not be unlocked by "normal" java-level locking and vice-versa. The specification ++// doesn't specify what will occur if a program engages in such mixed-mode locking, however. ++ ++void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg, ++ Register tmpReg, Register scrReg) { ++ Label DONE, DONE_SET, Stacked, Inflated; ++ ++ guarantee(objReg != boxReg, ""); ++ guarantee(objReg != tmpReg, ""); ++ guarantee(objReg != scrReg, ""); ++ guarantee(boxReg != tmpReg, ""); ++ guarantee(boxReg != scrReg, ""); ++ ++ block_comment("FastUnlock"); ++ ++ if (EmitSync & 4) { ++ // Disable - inhibit all inlining. Force control through the slow-path ++ move(AT, R0); ++ return; ++ } else ++ if (EmitSync & 8) { ++ Label DONE_LABEL ; ++ if (UseBiasedLocking) { ++ biased_locking_exit(objReg, tmpReg, DONE_LABEL); ++ } ++ // classic stack-locking code ... ++ ld_d(tmpReg, Address(boxReg, 0)) ; ++ assert_different_registers(AT, tmpReg); ++ li(AT, 0x1); ++ beq(tmpReg, R0, DONE_LABEL) ; ++ ++ cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); ++ bind(DONE_LABEL); ++ } else { ++ Label CheckSucc; ++ ++ // Critically, the biased locking test must have precedence over ++ // and appear before the (box->dhw == 0) recursive stack-lock test. ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ Label succ, fail; ++ biased_locking_exit(objReg, tmpReg, succ); ++ b(fail); ++ bind(succ); ++ li(resReg, 1); ++ b(DONE); ++ bind(fail); ++ } ++ ++ ld_d(tmpReg, Address(boxReg, 0)); // Examine the displaced header ++ sltui(AT, tmpReg, 1); ++ beqz(tmpReg, DONE_SET); // 0 indicates recursive stack-lock ++ ++ ld_d(tmpReg, Address(objReg, 0)); // Examine the object's markword ++ andi(AT, tmpReg, markOopDesc::monitor_value); ++ beqz(AT, Stacked); // Inflated? ++ ++ bind(Inflated); ++ // It's inflated. ++ // Despite our balanced locking property we still check that m->_owner == Self ++ // as java routines or native JNI code called by this thread might ++ // have released the lock. ++ // Refer to the comments in synchronizer.cpp for how we might encode extra ++ // state in _succ so we can avoid fetching EntryList|cxq. ++ // ++ // I'd like to add more cases in fast_lock() and fast_unlock() -- ++ // such as recursive enter and exit -- but we have to be wary of ++ // I$ bloat, T$ effects and BP$ effects. ++ // ++ // If there's no contention try a 1-0 exit. That is, exit without ++ // a costly MEMBAR or CAS. See synchronizer.cpp for details on how ++ // we detect and recover from the race that the 1-0 exit admits. ++ // ++ // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier ++ // before it STs null into _owner, releasing the lock. Updates ++ // to data protected by the critical section must be visible before ++ // we drop the lock (and thus before any other thread could acquire ++ // the lock and observe the fields protected by the lock). ++#ifndef OPT_THREAD ++ get_thread(TREG); ++#endif ++ ++ // It's inflated ++ ld_d(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ xorr(scrReg, scrReg, TREG); ++ ++ ld_d(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2)); ++ orr(scrReg, scrReg, AT); ++ ++ move(AT, R0); ++ bnez(scrReg, DONE_SET); ++ ++ ld_d(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2)); ++ ld_d(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2)); ++ orr(scrReg, scrReg, AT); ++ ++ move(AT, R0); ++ bnez(scrReg, DONE_SET); ++ ++ membar(Assembler::Membar_mask_bits(LoadStore|StoreStore)); ++ st_d(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ li(resReg, 1); ++ b(DONE); ++ ++ bind(Stacked); ++ ld_d(tmpReg, Address(boxReg, 0)); ++ cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); ++ ++ bind(DONE_SET); ++ move(resReg, AT); ++ ++ if (EmitSync & 65536) { ++ bind (CheckSucc); ++ } ++ ++ bind(DONE); ++ ++ // Avoid branch to branch on AMD processors ++ if (EmitSync & 32768) { nop() ; } ++ } ++} ++#endif // COMPILER2 ++ ++void MacroAssembler::align(int modulus) { ++ while (offset() % modulus != 0) nop(); ++} ++ ++ ++void MacroAssembler::verify_FPU(int stack_depth, const char* s) { ++ //Unimplemented(); ++} ++ ++Register caller_saved_registers[] = {T7, T5, T6, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T4, S8, RA, FP}; ++Register caller_saved_registers_except_v0[] = {T7, T5, T6, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T4, S8, RA, FP}; ++ ++ //TODO: LA ++//In LA, F0~23 are all caller-saved registers ++FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13}; ++ ++// We preserve all caller-saved register ++void MacroAssembler::pushad(){ ++ int i; ++ // Fixed-point registers ++ int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); ++ addi_d(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ st_d(caller_saved_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ ++ // Floating-point registers ++ len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ addi_d(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ fst_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++}; ++ ++void MacroAssembler::popad(){ ++ int i; ++ // Floating-point registers ++ int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ for (i = 0; i < len; i++) ++ { ++ fld_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ addi_d(SP, SP, len * wordSize); ++ ++ // Fixed-point registers ++ len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); ++ for (i = 0; i < len; i++) ++ { ++ ld_d(caller_saved_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ addi_d(SP, SP, len * wordSize); ++}; ++ ++// We preserve all caller-saved register except V0 ++void MacroAssembler::pushad_except_v0() { ++ int i; ++ // Fixed-point registers ++ int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); ++ addi_d(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ st_d(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); ++ } ++ ++ // Floating-point registers ++ len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ addi_d(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ fst_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++} ++ ++void MacroAssembler::popad_except_v0() { ++ int i; ++ // Floating-point registers ++ int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ for (i = 0; i < len; i++) { ++ fld_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ addi_d(SP, SP, len * wordSize); ++ ++ // Fixed-point registers ++ len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); ++ for (i = 0; i < len; i++) { ++ ld_d(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); ++ } ++ addi_d(SP, SP, len * wordSize); ++} ++ ++void MacroAssembler::push2(Register reg1, Register reg2) { ++ addi_d(SP, SP, -16); ++ st_d(reg1, SP, 8); ++ st_d(reg2, SP, 0); ++} ++ ++void MacroAssembler::pop2(Register reg1, Register reg2) { ++ ld_d(reg1, SP, 8); ++ ld_d(reg2, SP, 0); ++ addi_d(SP, SP, 16); ++} ++ ++void MacroAssembler::push(unsigned int bitset) { ++ unsigned char regs[31]; ++ int count = 0; ++ ++ bitset >>= 1; ++ for (int reg = 1; reg < 31; reg++) { ++ if (1 & bitset) ++ regs[count++] = reg; ++ bitset >>= 1; ++ } ++ ++ addi_d(SP, SP, -align_up(count, 2) * wordSize); ++ for (int i = 0; i < count; i ++) ++ st_d(as_Register(regs[i]), SP, i * wordSize); ++} ++ ++void MacroAssembler::pop(unsigned int bitset) { ++ unsigned char regs[31]; ++ int count = 0; ++ ++ bitset >>= 1; ++ for (int reg = 1; reg < 31; reg++) { ++ if (1 & bitset) ++ regs[count++] = reg; ++ bitset >>= 1; ++ } ++ ++ for (int i = 0; i < count; i ++) ++ ld_d(as_Register(regs[i]), SP, i * wordSize); ++ addi_d(SP, SP, align_up(count, 2) * wordSize); ++} ++ ++// for UseCompressedOops Option ++void MacroAssembler::load_klass(Register dst, Register src) { ++ if(UseCompressedClassPointers){ ++ ld_wu(dst, Address(src, oopDesc::klass_offset_in_bytes())); ++ decode_klass_not_null(dst); ++ } else { ++ ld_d(dst, src, oopDesc::klass_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::store_klass(Register dst, Register src) { ++ if(UseCompressedClassPointers){ ++ encode_klass_not_null(src); ++ st_w(src, dst, oopDesc::klass_offset_in_bytes()); ++ } else { ++ st_d(src, dst, oopDesc::klass_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::load_prototype_header(Register dst, Register src) { ++ load_klass(dst, src); ++ ld_d(dst, Address(dst, Klass::prototype_header_offset())); ++} ++ ++void MacroAssembler::store_klass_gap(Register dst, Register src) { ++ if (UseCompressedClassPointers) { ++ st_w(src, dst, oopDesc::klass_gap_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, ++ Register tmp1, Register thread_tmp) { ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } else { ++ bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } ++} ++ ++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, ++ Register tmp1, Register tmp2) { ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2); ++ } else { ++ bs->store_at(this, decorators, type, dst, src, tmp1, tmp2); ++ } ++} ++ ++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); ++} ++ ++// Doesn't do verfication, generates fixed size code ++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp); ++} ++ ++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, ++ Register tmp2, DecoratorSet decorators) { ++ access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); ++} ++ ++// Used for storing NULLs. ++void MacroAssembler::store_heap_oop_null(Address dst) { ++ access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); ++} ++ ++#ifdef ASSERT ++void MacroAssembler::verify_heapbase(const char* msg) { ++ assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++} ++#endif ++ ++// Algorithm must match oop.inline.hpp encode_heap_oop. ++void MacroAssembler::encode_heap_oop(Register r) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); ++#endif ++ verify_oop(r, "broken oop in encode_heap_oop"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++ return; ++ } ++ ++ sub_d(AT, r, S5_heapbase); ++ maskeqz(r, AT, r); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_heap_oop(Register dst, Register src) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); ++#endif ++ verify_oop(src, "broken oop in encode_heap_oop"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ srli_d(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++ return; ++ } ++ ++ sub_d(AT, src, S5_heapbase); ++ maskeqz(dst, AT, src); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_heap_oop_not_null(Register r) { ++ assert (UseCompressedOops, "should be compressed"); ++#ifdef ASSERT ++ if (CheckCompressedOops) { ++ Label ok; ++ bne(r, R0, ok); ++ stop("null oop passed to encode_heap_oop_not_null"); ++ bind(ok); ++ } ++#endif ++ verify_oop(r, "broken oop in encode_heap_oop_not_null"); ++ if (Universe::narrow_oop_base() != NULL) { ++ sub_d(r, r, S5_heapbase); ++ } ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { ++ assert (UseCompressedOops, "should be compressed"); ++#ifdef ASSERT ++ if (CheckCompressedOops) { ++ Label ok; ++ bne(src, R0, ok); ++ stop("null oop passed to encode_heap_oop_not_null2"); ++ bind(ok); ++ } ++#endif ++ verify_oop(src, "broken oop in encode_heap_oop_not_null2"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ srli_d(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++ return; ++ } ++ ++ sub_d(dst, src, S5_heapbase); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::decode_heap_oop(Register r) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); ++#endif ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shl(r, LogMinObjAlignmentInBytes); ++ } ++ return; ++ } ++ ++ move(AT, r); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (LogMinObjAlignmentInBytes <= 4) { ++ alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1); ++ } else { ++ shl(r, LogMinObjAlignmentInBytes); ++ add_d(r, r, S5_heapbase); ++ } ++ } else { ++ add_d(r, r, S5_heapbase); ++ } ++ maskeqz(r, r, AT); ++ verify_oop(r, "broken oop in decode_heap_oop"); ++} ++ ++void MacroAssembler::decode_heap_oop(Register dst, Register src) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); ++#endif ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ slli_d(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++ return; ++ } ++ ++ Register cond; ++ if (dst == src) { ++ cond = AT; ++ move(cond, src); ++ } else { ++ cond = src; ++ } ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (LogMinObjAlignmentInBytes <= 4) { ++ alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1); ++ } else { ++ slli_d(dst, src, LogMinObjAlignmentInBytes); ++ add_d(dst, dst, S5_heapbase); ++ } ++ } else { ++ add_d(dst, src, S5_heapbase); ++ } ++ maskeqz(dst, dst, cond); ++ verify_oop(dst, "broken oop in decode_heap_oop"); ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register r) { ++ // Note: it will change flags ++ assert(UseCompressedOops, "should only be used for compressed headers"); ++ assert(Universe::heap() != NULL, "java heap should be initialized"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (Universe::narrow_oop_base() != NULL) { ++ if (LogMinObjAlignmentInBytes <= 4) { ++ alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1); ++ } else { ++ shl(r, LogMinObjAlignmentInBytes); ++ add_d(r, r, S5_heapbase); ++ } ++ } else { ++ shl(r, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ assert(Universe::narrow_oop_base() == NULL, "sanity"); ++ } ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { ++ assert(UseCompressedOops, "should only be used for compressed headers"); ++ assert(Universe::heap() != NULL, "java heap should be initialized"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (Universe::narrow_oop_base() != NULL) { ++ if (LogMinObjAlignmentInBytes <= 4) { ++ alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1); ++ } else { ++ slli_d(dst, src, LogMinObjAlignmentInBytes); ++ add_d(dst, dst, S5_heapbase); ++ } ++ } else { ++ slli_d(dst, src, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ assert (Universe::narrow_oop_base() == NULL, "sanity"); ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++} ++ ++void MacroAssembler::encode_klass_not_null(Register r) { ++ if (Universe::narrow_klass_base() != NULL) { ++ if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0 ++ && Universe::narrow_klass_shift() == 0) { ++ bstrpick_d(r, r, 31, 0); ++ return; ++ } ++ assert(r != AT, "Encoding a klass in AT"); ++ li(AT, (int64_t)Universe::narrow_klass_base()); ++ sub_d(r, r, AT); ++ } ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shr(r, LogKlassAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_klass_not_null(Register dst, Register src) { ++ if (dst == src) { ++ encode_klass_not_null(src); ++ } else { ++ if (Universe::narrow_klass_base() != NULL) { ++ if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0 ++ && Universe::narrow_klass_shift() == 0) { ++ bstrpick_d(dst, src, 31, 0); ++ return; ++ } ++ li(dst, (int64_t)Universe::narrow_klass_base()); ++ sub_d(dst, src, dst); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shr(dst, LogKlassAlignmentInBytes); ++ } ++ } else { ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ srli_d(dst, src, LogKlassAlignmentInBytes); ++ } else { ++ move(dst, src); ++ } ++ } ++ } ++} ++ ++void MacroAssembler::decode_klass_not_null(Register r) { ++ assert(UseCompressedClassPointers, "should only be used for compressed headers"); ++ assert(r != AT, "Decoding a klass in AT"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_klass_base() != NULL) { ++ if (Universe::narrow_klass_shift() == 0) { ++ if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0) { ++ lu32i_d(r, (uint64_t)Universe::narrow_klass_base() >> 32); ++ } else { ++ li(AT, (int64_t)Universe::narrow_klass_base()); ++ add_d(r, r, AT); ++ } ++ } else { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); ++ li(AT, (int64_t)Universe::narrow_klass_base()); ++ alsl_d(r, r, AT, Address::times_8 - 1); ++ } ++ } else { ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shl(r, LogKlassAlignmentInBytes); ++ } ++ } ++} ++ ++void MacroAssembler::decode_klass_not_null(Register dst, Register src) { ++ assert(UseCompressedClassPointers, "should only be used for compressed headers"); ++ if (dst == src) { ++ decode_klass_not_null(dst); ++ } else { ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_klass_base() != NULL) { ++ if (Universe::narrow_klass_shift() == 0) { ++ if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0) { ++ move(dst, src); ++ lu32i_d(dst, (uint64_t)Universe::narrow_klass_base() >> 32); ++ } else { ++ li(dst, (int64_t)Universe::narrow_klass_base()); ++ add_d(dst, dst, src); ++ } ++ } else { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); ++ li(dst, (int64_t)Universe::narrow_klass_base()); ++ alsl_d(dst, src, dst, Address::times_8 - 1); ++ } ++ } else { ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ slli_d(dst, src, LogKlassAlignmentInBytes); ++ } else { ++ move(dst, src); ++ } ++ } ++ } ++} ++ ++void MacroAssembler::reinit_heapbase() { ++ if (UseCompressedOops || UseCompressedClassPointers) { ++ if (Universe::heap() != NULL) { ++ if (Universe::narrow_oop_base() == NULL) { ++ move(S5_heapbase, R0); ++ } else { ++ li(S5_heapbase, (int64_t)Universe::narrow_ptrs_base()); ++ } ++ } else { ++ li(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr()); ++ ld_d(S5_heapbase, S5_heapbase, 0); ++ } ++ } ++} ++ ++void MacroAssembler::check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label& L_success) { ++//implement ind gen_subtype_check ++ Label L_failure; ++ check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); ++ check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); ++ bind(L_failure); ++} ++ ++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ RegisterOrConstant super_check_offset) { ++ assert_different_registers(sub_klass, super_klass, temp_reg); ++ bool must_load_sco = (super_check_offset.constant_or_zero() == -1); ++ if (super_check_offset.is_register()) { ++ assert_different_registers(sub_klass, super_klass, ++ super_check_offset.as_register()); ++ } else if (must_load_sco) { ++ assert(temp_reg != noreg, "supply either a temp or a register offset"); ++ } ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ // If the pointers are equal, we are done (e.g., String[] elements). ++ // This self-check enables sharing of secondary supertype arrays among ++ // non-primary types such as array-of-interface. Otherwise, each such ++ // type would need its own customized SSA. ++ // We move this check to the front of the fast path because many ++ // type checks are in fact trivially successful in this manner, ++ // so we get a nicely predicted branch right at the start of the check. ++ beq(sub_klass, super_klass, *L_success); ++ // Check the supertype display: ++ if (must_load_sco) { ++ ld_wu(temp_reg, super_klass, sco_offset); ++ super_check_offset = RegisterOrConstant(temp_reg); ++ } ++ slli_d(AT, super_check_offset.register_or_noreg(), Address::times_1); ++ add_d(AT, sub_klass, AT); ++ ld_d(AT, AT, super_check_offset.constant_or_zero()*Address::times_1); ++ ++ // This check has worked decisively for primary supers. ++ // Secondary supers are sought in the super_cache ('super_cache_addr'). ++ // (Secondary supers are interfaces and very deeply nested subtypes.) ++ // This works in the same check above because of a tricky aliasing ++ // between the super_cache and the primary super display elements. ++ // (The 'super_check_addr' can address either, as the case requires.) ++ // Note that the cache is updated below if it does not help us find ++ // what we need immediately. ++ // So if it was a primary super, we can just fail immediately. ++ // Otherwise, it's the slow path for us (no success at this point). ++ ++ if (super_check_offset.is_register()) { ++ beq(super_klass, AT, *L_success); ++ addi_d(AT, super_check_offset.as_register(), -sc_offset); ++ if (L_failure == &L_fallthrough) { ++ beq(AT, R0, *L_slow_path); ++ } else { ++ bne_far(AT, R0, *L_failure); ++ b(*L_slow_path); ++ } ++ } else if (super_check_offset.as_constant() == sc_offset) { ++ // Need a slow path; fast failure is impossible. ++ if (L_slow_path == &L_fallthrough) { ++ beq(super_klass, AT, *L_success); ++ } else { ++ bne(super_klass, AT, *L_slow_path); ++ b(*L_success); ++ } ++ } else { ++ // No slow path; it's a fast decision. ++ if (L_failure == &L_fallthrough) { ++ beq(super_klass, AT, *L_success); ++ } else { ++ bne_far(super_klass, AT, *L_failure); ++ b(*L_success); ++ } ++ } ++ ++ bind(L_fallthrough); ++} ++ ++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Register temp2_reg, ++ Label* L_success, ++ Label* L_failure, ++ bool set_cond_codes) { ++ if (temp2_reg == noreg) ++ temp2_reg = TSR; ++ assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); ++#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ ++ // a couple of useful fields in sub_klass: ++ int ss_offset = in_bytes(Klass::secondary_supers_offset()); ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ Address secondary_supers_addr(sub_klass, ss_offset); ++ Address super_cache_addr( sub_klass, sc_offset); ++ ++ // Do a linear scan of the secondary super-klass chain. ++ // This code is rarely used, so simplicity is a virtue here. ++ // The repne_scan instruction uses fixed registers, which we must spill. ++ // Don't worry too much about pre-existing connections with the input regs. ++ ++#ifndef PRODUCT ++ int* pst_counter = &SharedRuntime::_partial_subtype_ctr; ++ ExternalAddress pst_counter_addr((address) pst_counter); ++#endif //PRODUCT ++ ++ // We will consult the secondary-super array. ++ ld_d(temp_reg, secondary_supers_addr); ++ // Load the array length. ++ ld_w(temp2_reg, Address(temp_reg, Array::length_offset_in_bytes())); ++ // Skip to start of data. ++ addi_d(temp_reg, temp_reg, Array::base_offset_in_bytes()); ++ ++ Label Loop, subtype; ++ bind(Loop); ++ beq(temp2_reg, R0, *L_failure); ++ ld_d(AT, temp_reg, 0); ++ addi_d(temp_reg, temp_reg, 1 * wordSize); ++ beq(AT, super_klass, subtype); ++ addi_d(temp2_reg, temp2_reg, -1); ++ b(Loop); ++ ++ bind(subtype); ++ st_d(super_klass, super_cache_addr); ++ if (L_success != &L_fallthrough) { ++ b(*L_success); ++ } ++ ++ // Success. Cache the super we found and proceed in triumph. ++#undef IS_A_TEMP ++ ++ bind(L_fallthrough); ++} ++ ++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { ++ ld_d(oop_result, Address(java_thread, JavaThread::vm_result_offset())); ++ st_d(R0, Address(java_thread, JavaThread::vm_result_offset())); ++ verify_oop(oop_result, "broken oop in call_VM_base"); ++} ++ ++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { ++ ld_d(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); ++ st_d(R0, Address(java_thread, JavaThread::vm_result_2_offset())); ++} ++ ++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, ++ int extra_slot_offset) { ++ // cf. TemplateTable::prepare_invoke(), if (load_receiver). ++ int stackElementSize = Interpreter::stackElementSize; ++ int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); ++#ifdef ASSERT ++ int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); ++ assert(offset1 - offset == stackElementSize, "correct arithmetic"); ++#endif ++ Register scale_reg = NOREG; ++ Address::ScaleFactor scale_factor = Address::no_scale; ++ if (arg_slot.is_constant()) { ++ offset += arg_slot.as_constant() * stackElementSize; ++ } else { ++ scale_reg = arg_slot.as_register(); ++ scale_factor = Address::times_8; ++ } ++ // We don't push RA on stack in prepare_invoke. ++ // offset += wordSize; // return PC is on stack ++ if(scale_reg==NOREG) return Address(SP, offset); ++ else { ++ alsl_d(scale_reg, scale_reg, SP, scale_factor - 1); ++ return Address(scale_reg, offset); ++ } ++} ++ ++SkipIfEqual::~SkipIfEqual() { ++ _masm->bind(_label); ++} ++ ++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { ++ switch (size_in_bytes) { ++ case 8: ld_d(dst, src); break; ++ case 4: ld_w(dst, src); break; ++ case 2: is_signed ? ld_h(dst, src) : ld_hu(dst, src); break; ++ case 1: is_signed ? ld_b( dst, src) : ld_bu( dst, src); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { ++ switch (size_in_bytes) { ++ case 8: st_d(src, dst); break; ++ case 4: st_w(src, dst); break; ++ case 2: st_h(src, dst); break; ++ case 1: st_b(src, dst); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++// Look up the method for a megamorphic invokeinterface call. ++// The target method is determined by . ++// The receiver klass is in recv_klass. ++// On success, the result will be in method_result, and execution falls through. ++// On failure, execution transfers to the given label. ++void MacroAssembler::lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_temp, ++ Label& L_no_such_interface, ++ bool return_method) { ++ assert_different_registers(recv_klass, intf_klass, scan_temp, AT); ++ assert_different_registers(method_result, intf_klass, scan_temp, AT); ++ assert(recv_klass != method_result || !return_method, ++ "recv_klass can be destroyed when method isn't needed"); ++ ++ assert(itable_index.is_constant() || itable_index.as_register() == method_result, ++ "caller must use same register for non-constant itable index as for method"); ++ ++ // Compute start of first itableOffsetEntry (which is at the end of the vtable) ++ int vtable_base = in_bytes(Klass::vtable_start_offset()); ++ int itentry_off = itableMethodEntry::method_offset_in_bytes(); ++ int scan_step = itableOffsetEntry::size() * wordSize; ++ int vte_size = vtableEntry::size() * wordSize; ++ Address::ScaleFactor times_vte_scale = Address::times_ptr; ++ assert(vte_size == wordSize, "else adjust times_vte_scale"); ++ ++ ld_w(scan_temp, Address(recv_klass, Klass::vtable_length_offset())); ++ ++ // %%% Could store the aligned, prescaled offset in the klassoop. ++ alsl_d(scan_temp, scan_temp, recv_klass, times_vte_scale - 1); ++ addi_d(scan_temp, scan_temp, vtable_base); ++ ++ if (return_method) { ++ // Adjust recv_klass by scaled itable_index, so we can free itable_index. ++ if (itable_index.is_constant()) { ++ li(AT, (itable_index.as_constant() * itableMethodEntry::size() * wordSize) + itentry_off); ++ add_d(recv_klass, recv_klass, AT); ++ } else { ++ assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); ++ alsl_d(AT, itable_index.as_register(), recv_klass, (int)Address::times_ptr - 1); ++ addi_d(recv_klass, AT, itentry_off); ++ } ++ } ++ ++ Label search, found_method; ++ ++ ld_d(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); ++ beq(intf_klass, method_result, found_method); ++ ++ bind(search); ++ // Check that the previous entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ beqz(method_result, L_no_such_interface); ++ addi_d(scan_temp, scan_temp, scan_step); ++ ld_d(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); ++ bne(intf_klass, method_result, search); ++ ++ bind(found_method); ++ if (return_method) { ++ // Got a hit. ++ ld_wu(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); ++ ldx_d(method_result, recv_klass, scan_temp); ++ } ++} ++ ++// virtual method calling ++void MacroAssembler::lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result) { ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); ++ ++ if (vtable_index.is_constant()) { ++ li(AT, vtable_index.as_constant()); ++ alsl_d(AT, AT, recv_klass, Address::times_ptr - 1); ++ } else { ++ alsl_d(AT, vtable_index.as_register(), recv_klass, Address::times_ptr - 1); ++ } ++ ++ ld_d(method_result, AT, base + vtableEntry::method_offset_in_bytes()); ++} ++ ++#ifdef COMPILER2 ++// Compare strings, used for char[] and byte[]. ++void MacroAssembler::string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ int ae) { ++ Label L, Loop, haveResult, done; ++ ++ bool isLL = ae == StrIntrinsicNode::LL; ++ bool isLU = ae == StrIntrinsicNode::LU; ++ bool isUL = ae == StrIntrinsicNode::UL; ++ ++ bool str1_isL = isLL || isLU; ++ bool str2_isL = isLL || isUL; ++ ++ if (!str1_isL) srli_w(cnt1, cnt1, 1); ++ if (!str2_isL) srli_w(cnt2, cnt2, 1); ++ ++ // compute the and difference of lengths (in result) ++ sub_d(result, cnt1, cnt2); // result holds the difference of two lengths ++ ++ // compute the shorter length (in cnt1) ++ bge(cnt2, cnt1, Loop); ++ move(cnt1, cnt2); ++ ++ // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register ++ bind(Loop); // Loop begin ++ if (str1_isL) { ++ ld_bu(AT, str1, 0); ++ } else { ++ ld_hu(AT, str1, 0); ++ } ++ beq(cnt1, R0, done); ++ ++ // compare current character ++ if (str2_isL) { ++ ld_bu(cnt2, str2, 0); ++ } else { ++ ld_hu(cnt2, str2, 0); ++ } ++ addi_d(str1, str1, str1_isL ? 1 : 2); ++ bne(AT, cnt2, haveResult); ++ addi_d(str2, str2, str2_isL ? 1 : 2); ++ addi_d(cnt1, cnt1, -1); ++ b(Loop); ++ ++ bind(haveResult); ++ sub_d(result, AT, cnt2); ++ ++ bind(done); ++} ++ ++// Compare char[] or byte[] arrays or substrings. ++void MacroAssembler::arrays_equals(Register str1, Register str2, ++ Register cnt, Register tmp1, Register tmp2, Register result, ++ bool is_char) { ++ Label Loop, LoopEnd, True, False; ++ ++ addi_d(result, R0, 1); ++ beq(str1, str2, True); // same char[] ? ++ beqz(cnt, True); ++ ++ addi_d(AT, R0, is_char ? wordSize/2 : wordSize); ++ bind(Loop); ++ blt(cnt, AT, LoopEnd); ++ ld_d(tmp1, str1, 0); ++ ld_d(tmp2, str2, 0); ++ bne(tmp1, tmp2, False); ++ addi_d(str1, str1, 8); ++ addi_d(str2, str2, 8); ++ addi_d(cnt, cnt, is_char ? -wordSize/2 : -wordSize); ++ b(Loop); ++ ++ bind(LoopEnd); ++ beqz(cnt, True); ++ // compare current character ++ if (is_char) { ++ ld_hu(tmp1, str1, 0); ++ ld_hu(tmp2, str2, 0); ++ } else { ++ ld_bu(tmp1, str1, 0); ++ ld_bu(tmp2, str2, 0); ++ } ++ bne(tmp1, tmp2, False); ++ addi_d(str1, str1, is_char ? 2 : 1); ++ addi_d(str2, str2, is_char ? 2 : 1); ++ addi_d(cnt, cnt, -1); ++ b(LoopEnd); ++ ++ bind(False); ++ addi_d(result, R0, 0); ++ ++ bind(True); ++} ++#endif // COMPILER2 ++ ++void MacroAssembler::load_byte_map_base(Register reg) { ++ jbyte *byte_map_base = ++ ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); ++ ++ // Strictly speaking the byte_map_base isn't an address at all, and it might ++ // even be negative. It is thus materialised as a constant. ++ li(reg, (uint64_t)byte_map_base); ++} ++ ++// This method checks if provided byte array contains byte with highest bit set. ++void MacroAssembler::has_negatives(Register ary1, Register len, Register result) { ++ Label Loop, End, Nega, Done; ++ ++ orr(result, R0, R0); ++ bge(R0, len, Done); ++ ++ li(AT, 0x8080808080808080); ++ ++ addi_d(len, len, -8); ++ blt(len, R0, End); ++ ++ bind(Loop); ++ ld_d(result, ary1, 0); ++ andr(result, result, AT); ++ bnez(result, Nega); ++ beqz(len, Done); ++ addi_d(len, len, -8); ++ addi_d(ary1, ary1, 8); ++ bge(len, R0, Loop); ++ ++ bind(End); ++ ld_d(result, ary1, 0); ++ slli_d(len, len, 3); ++ sub_d(len, R0, len); ++ sll_d(result, result, len); ++ andr(result, result, AT); ++ beqz(result, Done); ++ ++ bind(Nega); ++ ori(result, R0, 1); ++ ++ bind(Done); ++} ++ ++// Compress char[] to byte[]. len must be positive int. ++// jtreg: TestStringIntrinsicRangeChecks.java ++void MacroAssembler::char_array_compress(Register src, Register dst, ++ Register len, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3) { ++ Label Loop, Done, Once, Fail; ++ ++ move(result, len); ++ bge(R0, result, Done); ++ ++ srli_w(AT, len, 2); ++ andi(len, len, 3); ++ ++ li(tmp3, 0xff00ff00ff00ff00); ++ ++ bind(Loop); ++ beqz(AT, Once); ++ ld_d(tmp1, src, 0); ++ andr(tmp2, tmp3, tmp1); // not latin-1, stop here ++ bnez(tmp2, Fail); ++ ++ // 0x00a100b200c300d4 -> 0x00000000a1b2c3d4 ++ srli_d(tmp2, tmp1, 8); ++ orr(tmp2, tmp2, tmp1); // 0x00a1a1b2b2c3c3d4 ++ bstrpick_d(tmp1, tmp2, 47, 32); // 0x0000a1b2 ++ slli_d(tmp1, tmp1, 16); // 0xa1b20000 ++ bstrins_d(tmp1, tmp2, 15, 0); // 0xa1b2c3d4 ++ ++ st_w(tmp1, dst, 0); ++ addi_w(AT, AT, -1); ++ addi_d(dst, dst, 4); ++ addi_d(src, src, 8); ++ b(Loop); ++ ++ bind(Once); ++ beqz(len, Done); ++ ld_d(AT, src, 0); ++ ++ bstrpick_d(tmp1, AT, 15, 0); ++ andr(tmp2, tmp3, tmp1); ++ bnez(tmp2, Fail); ++ st_b(tmp1, dst, 0); ++ addi_w(len, len, -1); ++ ++ beqz(len, Done); ++ bstrpick_d(tmp1, AT, 31, 16); ++ andr(tmp2, tmp3, tmp1); ++ bnez(tmp2, Fail); ++ st_b(tmp1, dst, 1); ++ addi_w(len, len, -1); ++ ++ beqz(len, Done); ++ bstrpick_d(tmp1, AT, 47, 32); ++ andr(tmp2, tmp3, tmp1); ++ bnez(tmp2, Fail); ++ st_b(tmp1, dst, 2); ++ b(Done); ++ ++ bind(Fail); ++ move(result, R0); ++ ++ bind(Done); ++} ++ ++// Inflate byte[] to char[]. len must be positive int. ++// jtreg:test/jdk/sun/nio/cs/FindDecoderBugs.java ++void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len, ++ Register tmp1, Register tmp2) { ++ Label Loop, Once, Done; ++ ++ bge(R0, len, Done); ++ ++ srli_w(AT, len, 2); ++ andi(len, len, 3); ++ ++ bind(Loop); ++ beqz(AT, Once); ++ ld_wu(tmp1, src, 0); ++ ++ // 0x00000000a1b2c3d4 -> 0x00a100b200c300d4 ++ bstrpick_d(tmp2, tmp1, 7, 0); ++ srli_d(tmp1, tmp1, 8); ++ bstrins_d(tmp2, tmp1, 23, 16); ++ srli_d(tmp1, tmp1, 8); ++ bstrins_d(tmp2, tmp1, 39, 32); ++ srli_d(tmp1, tmp1, 8); ++ bstrins_d(tmp2, tmp1, 55, 48); ++ ++ st_d(tmp2, dst, 0); ++ addi_w(AT, AT, -1); ++ addi_d(dst, dst, 8); ++ addi_d(src, src, 4); ++ b(Loop); ++ ++ bind(Once); ++ beqz(len, Done); ++ ld_wu(tmp1, src, 0); ++ ++ bstrpick_d(tmp2, tmp1, 7, 0); ++ st_h(tmp2, dst, 0); ++ addi_w(len, len, -1); ++ ++ beqz(len, Done); ++ bstrpick_d(tmp2, tmp1, 15, 8); ++ st_h(tmp2, dst, 2); ++ addi_w(len, len, -1); ++ ++ beqz(len, Done); ++ bstrpick_d(tmp2, tmp1, 23, 16); ++ st_h(tmp2, dst, 4); ++ ++ bind(Done); ++} ++ ++void MacroAssembler::string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3) ++{ ++ Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, NOMATCH, DONE; ++ ++ beqz(cnt1, NOMATCH); ++ ++ move(result, R0); ++ ori(tmp1, R0, 4); ++ blt(cnt1, tmp1, DO1_LOOP); ++ ++ // UTF-16 char occupies 16 bits ++ // ch -> chchchch ++ bstrins_d(ch, ch, 31, 16); ++ bstrins_d(ch, ch, 63, 32); ++ ++ li(tmp2, 0x0001000100010001); ++ li(tmp3, 0x7fff7fff7fff7fff); ++ ++ bind(CH1_LOOP); ++ ld_d(AT, str1, 0); ++ xorr(AT, ch, AT); ++ sub_d(tmp1, AT, tmp2); ++ orr(AT, AT, tmp3); ++ andn(tmp1, tmp1, AT); ++ bnez(tmp1, HAS_ZERO); ++ addi_d(str1, str1, 8); ++ addi_d(result, result, 4); ++ ++ // meet the end of string ++ beq(cnt1, result, NOMATCH); ++ ++ addi_d(tmp1, result, 4); ++ bge(tmp1, cnt1, DO1_SHORT); ++ b(CH1_LOOP); ++ ++ bind(HAS_ZERO); ++ ctz_d(tmp1, tmp1); ++ srli_d(tmp1, tmp1, 4); ++ add_d(result, result, tmp1); ++ b(DONE); ++ ++ // restore ch ++ bind(DO1_SHORT); ++ bstrpick_d(ch, ch, 15, 0); ++ ++ bind(DO1_LOOP); ++ ld_hu(tmp1, str1, 0); ++ beq(ch, tmp1, DONE); ++ addi_d(str1, str1, 2); ++ addi_d(result, result, 1); ++ blt(result, cnt1, DO1_LOOP); ++ ++ bind(NOMATCH); ++ addi_d(result, R0, -1); ++ ++ bind(DONE); ++} ++ ++void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { ++ const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask); ++ STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code ++ // The inverted mask is sign-extended ++ li(AT, inverted_jweak_mask); ++ andr(possibly_jweak, AT, possibly_jweak); ++} ++ ++void MacroAssembler::resolve_jobject(Register value, ++ Register thread, ++ Register tmp) { ++ assert_different_registers(value, thread, tmp); ++ Label done, not_weak; ++ beq(value, R0, done); // Use NULL as-is. ++ li(AT, JNIHandles::weak_tag_mask); // Test for jweak tag. ++ andr(AT, value, AT); ++ beq(AT, R0, not_weak); ++ // Resolve jweak. ++ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, ++ value, Address(value, -JNIHandles::weak_tag_value), tmp, thread); ++ verify_oop(value); ++ b(done); ++ bind(not_weak); ++ // Resolve (untagged) jobject. ++ access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread); ++ verify_oop(value); ++ bind(done); ++} ++ ++void MacroAssembler::lea(Register rd, Address src) { ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index == noreg) { ++ if (is_simm(disp, 12)) { ++ addi_d(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ add_d(dst, base, AT); ++ } ++ } else { ++ if (scale == 0) { ++ if (is_simm(disp, 12)) { ++ add_d(AT, base, index); ++ addi_d(dst, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ add_d(AT, base, AT); ++ add_d(dst, AT, index); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ alsl_d(AT, index, base, scale - 1); ++ addi_d(dst, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ add_d(AT, AT, base); ++ alsl_d(dst, index, AT, scale - 1); ++ } ++ } ++ } ++} ++ ++void MacroAssembler::lea(Register dst, AddressLiteral adr) { ++ code_section()->relocate(pc(), adr.rspec()); ++ pcaddi(dst, (adr.target() - pc()) >> 2); ++} ++ ++int MacroAssembler::patched_branch(int dest_pos, int inst, int inst_pos) { ++ int v = (dest_pos - inst_pos) >> 2; ++ switch(high(inst, 6)) { ++ case beq_op: ++ case bne_op: ++ case blt_op: ++ case bge_op: ++ case bltu_op: ++ case bgeu_op: ++ assert(is_simm16(v), "must be simm16"); ++#ifndef PRODUCT ++ if(!is_simm16(v)) ++ { ++ tty->print_cr("must be simm16"); ++ tty->print_cr("Inst: %x", inst); ++ } ++#endif ++ ++ inst &= 0xfc0003ff; ++ inst |= ((v & 0xffff) << 10); ++ break; ++ case beqz_op: ++ case bnez_op: ++ case bccondz_op: ++ assert(is_simm(v, 21), "must be simm21"); ++#ifndef PRODUCT ++ if(!is_simm(v, 21)) ++ { ++ tty->print_cr("must be simm21"); ++ tty->print_cr("Inst: %x", inst); ++ } ++#endif ++ ++ inst &= 0xfc0003e0; ++ inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x1f) ); ++ break; ++ case b_op: ++ case bl_op: ++ assert(is_simm(v, 26), "must be simm26"); ++#ifndef PRODUCT ++ if(!is_simm(v, 26)) ++ { ++ tty->print_cr("must be simm26"); ++ tty->print_cr("Inst: %x", inst); ++ } ++#endif ++ ++ inst &= 0xfc000000; ++ inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x3ff) ); ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ return inst; ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src1, ++ Register src2, ++ CMCompare cmp, ++ bool is_signed) { ++ switch (cmp) { ++ case EQ: ++ sub_d(AT, op1, op2); ++ if (dst == src2) { ++ masknez(dst, src2, AT); ++ maskeqz(AT, src1, AT); ++ } else { ++ maskeqz(dst, src1, AT); ++ masknez(AT, src2, AT); ++ } ++ break; ++ ++ case NE: ++ sub_d(AT, op1, op2); ++ if (dst == src2) { ++ maskeqz(dst, src2, AT); ++ masknez(AT, src1, AT); ++ } else { ++ masknez(dst, src1, AT); ++ maskeqz(AT, src2, AT); ++ } ++ break; ++ ++ case GT: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ if(dst == src2) { ++ maskeqz(dst, src2, AT); ++ masknez(AT, src1, AT); ++ } else { ++ masknez(dst, src1, AT); ++ maskeqz(AT, src2, AT); ++ } ++ break; ++ case GE: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ if(dst == src2) { ++ masknez(dst, src2, AT); ++ maskeqz(AT, src1, AT); ++ } else { ++ maskeqz(dst, src1, AT); ++ masknez(AT, src2, AT); ++ } ++ break; ++ ++ case LT: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ if(dst == src2) { ++ maskeqz(dst, src2, AT); ++ masknez(AT, src1, AT); ++ } else { ++ masknez(dst, src1, AT); ++ maskeqz(AT, src2, AT); ++ } ++ break; ++ case LE: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ if(dst == src2) { ++ masknez(dst, src2, AT); ++ maskeqz(AT, src1, AT); ++ } else { ++ maskeqz(dst, src1, AT); ++ masknez(AT, src2, AT); ++ } ++ break; ++ default: ++ Unimplemented(); ++ } ++ OR(dst, dst, AT); ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src, ++ CMCompare cmp, ++ bool is_signed) { ++ switch (cmp) { ++ case EQ: ++ sub_d(AT, op1, op2); ++ maskeqz(dst, dst, AT); ++ masknez(AT, src, AT); ++ break; ++ ++ case NE: ++ sub_d(AT, op1, op2); ++ masknez(dst, dst, AT); ++ maskeqz(AT, src, AT); ++ break; ++ ++ case GT: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ masknez(dst, dst, AT); ++ maskeqz(AT, src, AT); ++ break; ++ ++ case GE: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ maskeqz(dst, dst, AT); ++ masknez(AT, src, AT); ++ break; ++ ++ case LT: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ masknez(dst, dst, AT); ++ maskeqz(AT, src, AT); ++ break; ++ ++ case LE: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ maskeqz(dst, dst, AT); ++ masknez(AT, src, AT); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ OR(dst, dst, AT); ++} ++ ++ ++void MacroAssembler::cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ Register dst, ++ Register src, ++ FloatRegister tmp1, ++ FloatRegister tmp2, ++ CMCompare cmp, ++ bool is_float) { ++ movgr2fr_d(tmp1, dst); ++ movgr2fr_d(tmp2, src); ++ ++ switch(cmp) { ++ case EQ: ++ if (is_float) { ++ fcmp_ceq_s(FCC0, op1, op2); ++ } else { ++ fcmp_ceq_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp1, tmp2, FCC0); ++ break; ++ ++ case NE: ++ if (is_float) { ++ fcmp_ceq_s(FCC0, op1, op2); ++ } else { ++ fcmp_ceq_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp2, tmp1, FCC0); ++ break; ++ ++ case GT: ++ if (is_float) { ++ fcmp_cule_s(FCC0, op1, op2); ++ } else { ++ fcmp_cule_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp2, tmp1, FCC0); ++ break; ++ ++ case GE: ++ if (is_float) { ++ fcmp_cult_s(FCC0, op1, op2); ++ } else { ++ fcmp_cult_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp2, tmp1, FCC0); ++ break; ++ ++ case LT: ++ if (is_float) { ++ fcmp_cult_s(FCC0, op1, op2); ++ } else { ++ fcmp_cult_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp1, tmp2, FCC0); ++ break; ++ ++ case LE: ++ if (is_float) { ++ fcmp_cule_s(FCC0, op1, op2); ++ } else { ++ fcmp_cule_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp1, tmp2, FCC0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ ++ movfr2gr_d(dst, tmp1); ++} ++ ++void MacroAssembler::cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp, ++ bool is_float) { ++ switch(cmp) { ++ case EQ: ++ if (!is_float) { ++ fcmp_ceq_d(FCC0, op1, op2); ++ } else { ++ fcmp_ceq_s(FCC0, op1, op2); ++ } ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ case NE: ++ if (!is_float) { ++ fcmp_ceq_d(FCC0, op1, op2); ++ } else { ++ fcmp_ceq_s(FCC0, op1, op2); ++ } ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case GT: ++ if (!is_float) { ++ fcmp_cule_d(FCC0, op1, op2); ++ } else { ++ fcmp_cule_s(FCC0, op1, op2); ++ } ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case GE: ++ if (!is_float) { ++ fcmp_cult_d(FCC0, op1, op2); ++ } else { ++ fcmp_cult_s(FCC0, op1, op2); ++ } ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case LT: ++ if (!is_float) { ++ fcmp_cult_d(FCC0, op1, op2); ++ } else { ++ fcmp_cult_s(FCC0, op1, op2); ++ } ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ case LE: ++ if (!is_float) { ++ fcmp_cule_d(FCC0, op1, op2); ++ } else { ++ fcmp_cule_s(FCC0, op1, op2); ++ } ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ FloatRegister dst, ++ FloatRegister src, ++ FloatRegister tmp1, ++ FloatRegister tmp2, ++ CMCompare cmp) { ++ movgr2fr_w(tmp1, R0); ++ ++ switch (cmp) { ++ case EQ: ++ sub_d(AT, op1, op2); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ case NE: ++ sub_d(AT, op1, op2); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case GT: ++ slt(AT, op2, op1); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case GE: ++ slt(AT, op1, op2); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ case LT: ++ slt(AT, op1, op2); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case LE: ++ slt(AT, op2, op1); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::loadstore(Register reg, Register base, int disp, int type) { ++ switch (type) { ++ case STORE_BYTE: st_b (reg, base, disp); break; ++ case STORE_CHAR: ++ case STORE_SHORT: st_h (reg, base, disp); break; ++ case STORE_INT: st_w (reg, base, disp); break; ++ case STORE_LONG: st_d (reg, base, disp); break; ++ case LOAD_BYTE: ld_b (reg, base, disp); break; ++ case LOAD_U_BYTE: ld_bu(reg, base, disp); break; ++ case LOAD_SHORT: ld_h (reg, base, disp); break; ++ case LOAD_U_SHORT: ld_hu(reg, base, disp); break; ++ case LOAD_INT: ld_w (reg, base, disp); break; ++ case LOAD_U_INT: ld_wu(reg, base, disp); break; ++ case LOAD_LONG: ld_d (reg, base, disp); break; ++ case LOAD_LINKED_LONG: ++ ll_d(reg, base, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::loadstore(Register reg, Register base, Register disp, int type) { ++ switch (type) { ++ case STORE_BYTE: stx_b (reg, base, disp); break; ++ case STORE_CHAR: ++ case STORE_SHORT: stx_h (reg, base, disp); break; ++ case STORE_INT: stx_w (reg, base, disp); break; ++ case STORE_LONG: stx_d (reg, base, disp); break; ++ case LOAD_BYTE: ldx_b (reg, base, disp); break; ++ case LOAD_U_BYTE: ldx_bu(reg, base, disp); break; ++ case LOAD_SHORT: ldx_h (reg, base, disp); break; ++ case LOAD_U_SHORT: ldx_hu(reg, base, disp); break; ++ case LOAD_INT: ldx_w (reg, base, disp); break; ++ case LOAD_U_INT: ldx_wu(reg, base, disp); break; ++ case LOAD_LONG: ldx_d (reg, base, disp); break; ++ case LOAD_LINKED_LONG: ++ add_d(AT, base, disp); ++ ll_d(reg, AT, 0); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) { ++ switch (type) { ++ case STORE_FLOAT: fst_s(reg, base, disp); break; ++ case STORE_DOUBLE: fst_d(reg, base, disp); break; ++ case STORE_VECTORX: vst (reg, base, disp); break; ++ case STORE_VECTORY: xvst (reg, base, disp); break; ++ case LOAD_FLOAT: fld_s(reg, base, disp); break; ++ case LOAD_DOUBLE: fld_d(reg, base, disp); break; ++ case LOAD_VECTORX: vld (reg, base, disp); break; ++ case LOAD_VECTORY: xvld (reg, base, disp); break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::loadstore(FloatRegister reg, Register base, Register disp, int type) { ++ switch (type) { ++ case STORE_FLOAT: fstx_s(reg, base, disp); break; ++ case STORE_DOUBLE: fstx_d(reg, base, disp); break; ++ case STORE_VECTORX: vstx (reg, base, disp); break; ++ case STORE_VECTORY: xvstx (reg, base, disp); break; ++ case LOAD_FLOAT: fldx_s(reg, base, disp); break; ++ case LOAD_DOUBLE: fldx_d(reg, base, disp); break; ++ case LOAD_VECTORX: vldx (reg, base, disp); break; ++ case LOAD_VECTORY: xvldx (reg, base, disp); break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++#ifdef COMPILER2 ++void MacroAssembler::reduce_ins_v(FloatRegister vec1, FloatRegister vec2, FloatRegister vec3, BasicType type, int opcode) { ++ switch (type) { ++ case T_BYTE: ++ switch (opcode) { ++ case Op_AddReductionVI: vadd_b(vec1, vec2, vec3); break; ++ case Op_MulReductionVI: vmul_b(vec1, vec2, vec3); break; ++ case Op_MaxReductionV: vmax_b(vec1, vec2, vec3); break; ++ case Op_MinReductionV: vmin_b(vec1, vec2, vec3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ case T_SHORT: ++ switch (opcode) { ++ case Op_AddReductionVI: vadd_h(vec1, vec2, vec3); break; ++ case Op_MulReductionVI: vmul_h(vec1, vec2, vec3); break; ++ case Op_MaxReductionV: vmax_h(vec1, vec2, vec3); break; ++ case Op_MinReductionV: vmin_h(vec1, vec2, vec3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ case T_INT: ++ switch (opcode) { ++ case Op_AddReductionVI: vadd_w(vec1, vec2, vec3); break; ++ case Op_MulReductionVI: vmul_w(vec1, vec2, vec3); break; ++ case Op_MaxReductionV: vmax_w(vec1, vec2, vec3); break; ++ case Op_MinReductionV: vmin_w(vec1, vec2, vec3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ case T_LONG: ++ switch (opcode) { ++ case Op_AddReductionVL: vadd_d(vec1, vec2, vec3); break; ++ case Op_MulReductionVL: vmul_d(vec1, vec2, vec3); break; ++ case Op_MaxReductionV: vmax_d(vec1, vec2, vec3); break; ++ case Op_MinReductionV: vmin_d(vec1, vec2, vec3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::reduce_ins_r(Register reg1, Register reg2, Register reg3, BasicType type, int opcode) { ++ switch (type) { ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ switch (opcode) { ++ case Op_AddReductionVI: add_w(reg1, reg2, reg3); break; ++ case Op_MulReductionVI: mul_w(reg1, reg2, reg3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ case T_LONG: ++ switch (opcode) { ++ case Op_AddReductionVL: add_d(reg1, reg2, reg3); break; ++ case Op_MulReductionVL: mul_d(reg1, reg2, reg3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::reduce_ins_f(FloatRegister reg1, FloatRegister reg2, FloatRegister reg3, BasicType type, int opcode) { ++ switch (type) { ++ case T_FLOAT: ++ switch (opcode) { ++ case Op_AddReductionVF: fadd_s(reg1, reg2, reg3); break; ++ case Op_MulReductionVF: fmul_s(reg1, reg2, reg3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ case T_DOUBLE: ++ switch (opcode) { ++ case Op_AddReductionVD: fadd_d(reg1, reg2, reg3); break; ++ case Op_MulReductionVD: fmul_d(reg1, reg2, reg3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::reduce(Register dst, Register src, FloatRegister vsrc, FloatRegister tmp1, FloatRegister tmp2, BasicType type, int opcode, int vector_size) { ++ if (vector_size == 32) { ++ xvpermi_d(tmp1, vsrc, 0b00001110); ++ reduce_ins_v(tmp1, vsrc, tmp1, type, opcode); ++ vpermi_w(tmp2, tmp1, 0b00001110); ++ reduce_ins_v(tmp1, tmp2, tmp1, type, opcode); ++ } else if (vector_size == 16) { ++ vpermi_w(tmp1, vsrc, 0b00001110); ++ reduce_ins_v(tmp1, vsrc, tmp1, type, opcode); ++ } else { ++ ShouldNotReachHere(); ++ } ++ ++ if (type != T_LONG) { ++ vshuf4i_w(tmp2, tmp1, 0b00000001); ++ reduce_ins_v(tmp1, tmp2, tmp1, type, opcode); ++ if (type != T_INT) { ++ vshuf4i_h(tmp2, tmp1, 0b00000001); ++ reduce_ins_v(tmp1, tmp2, tmp1, type, opcode); ++ if (type != T_SHORT) { ++ vshuf4i_b(tmp2, tmp1, 0b00000001); ++ reduce_ins_v(tmp1, tmp2, tmp1, type, opcode); ++ } ++ } ++ } ++ ++ switch (type) { ++ case T_BYTE: vpickve2gr_b(dst, tmp1, 0); break; ++ case T_SHORT: vpickve2gr_h(dst, tmp1, 0); break; ++ case T_INT: vpickve2gr_w(dst, tmp1, 0); break; ++ case T_LONG: vpickve2gr_d(dst, tmp1, 0); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ if (opcode == Op_MaxReductionV) { ++ slt(AT, dst, src); ++ masknez(dst, dst, AT); ++ maskeqz(AT, src, AT); ++ orr(dst, dst, AT); ++ } else if (opcode == Op_MinReductionV) { ++ slt(AT, src, dst); ++ masknez(dst, dst, AT); ++ maskeqz(AT, src, AT); ++ orr(dst, dst, AT); ++ } else { ++ reduce_ins_r(dst, dst, src, type, opcode); ++ } ++ switch (type) { ++ case T_BYTE: ext_w_b(dst, dst); break; ++ case T_SHORT: ext_w_h(dst, dst); break; ++ default: ++ break; ++ } ++} ++ ++void MacroAssembler::reduce(FloatRegister dst, FloatRegister src, FloatRegister vsrc, FloatRegister tmp, BasicType type, int opcode, int vector_size) { ++ if (vector_size == 32) { ++ switch (type) { ++ case T_FLOAT: ++ reduce_ins_f(dst, vsrc, src, type, opcode); ++ xvpickve_w(tmp, vsrc, 1); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_w(tmp, vsrc, 2); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_w(tmp, vsrc, 3); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_w(tmp, vsrc, 4); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_w(tmp, vsrc, 5); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_w(tmp, vsrc, 6); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_w(tmp, vsrc, 7); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ break; ++ case T_DOUBLE: ++ reduce_ins_f(dst, vsrc, src, type, opcode); ++ xvpickve_d(tmp, vsrc, 1); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_d(tmp, vsrc, 2); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_d(tmp, vsrc, 3); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (vector_size == 16) { ++ switch (type) { ++ case T_FLOAT: ++ reduce_ins_f(dst, vsrc, src, type, opcode); ++ vpermi_w(tmp, vsrc, 0b00000001); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ vpermi_w(tmp, vsrc, 0b00000010); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ vpermi_w(tmp, vsrc, 0b00000011); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ break; ++ case T_DOUBLE: ++ reduce_ins_f(dst, vsrc, src, type, opcode); ++ vpermi_w(tmp, vsrc, 0b00001110); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++#endif // COMPILER2 ++ ++/** ++ * Emits code to update CRC-32 with a byte value according to constants in table ++ * ++ * @param [in,out]crc Register containing the crc. ++ * @param [in]val Register containing the byte to fold into the CRC. ++ * @param [in]table Register containing the table of crc constants. ++ * ++ * uint32_t crc; ++ * val = crc_table[(val ^ crc) & 0xFF]; ++ * crc = val ^ (crc >> 8); ++**/ ++void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { ++ xorr(val, val, crc); ++ andi(val, val, 0xff); ++ ld_w(val, Address(table, val, Address::times_4, 0)); ++ srli_w(crc, crc, 8); ++ xorr(crc, val, crc); ++} ++ ++/** ++ * @param crc register containing existing CRC (32-bit) ++ * @param buf register pointing to input byte buffer (byte*) ++ * @param len register containing number of bytes ++ * @param tmp scratch register ++**/ ++void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register tmp) { ++ Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit; ++ assert_different_registers(crc, buf, len, tmp); ++ ++ nor(crc, crc, R0); ++ ++ addi_d(len, len, -64); ++ bge(len, R0, CRC_by64_loop); ++ addi_d(len, len, 64-4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ blt(R0, len, CRC_by1_loop); ++ b(L_exit); ++ ++ bind(CRC_by64_loop); ++ ld_d(tmp, buf, 0); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 8); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 16); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 24); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 32); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 40); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 48); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 56); ++ crc_w_d_w(crc, tmp, crc); ++ addi_d(buf, buf, 64); ++ addi_d(len, len, -64); ++ bge(len, R0, CRC_by64_loop); ++ addi_d(len, len, 64-4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ blt(R0, len, CRC_by1_loop); ++ b(L_exit); ++ ++ bind(CRC_by4_loop); ++ ld_w(tmp, buf, 0); ++ crc_w_w_w(crc, tmp, crc); ++ addi_d(buf, buf, 4); ++ addi_d(len, len, -4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ bge(R0, len, L_exit); ++ ++ bind(CRC_by1_loop); ++ ld_b(tmp, buf, 0); ++ crc_w_b_w(crc, tmp, crc); ++ addi_d(buf, buf, 1); ++ addi_d(len, len, -1); ++ blt(R0, len, CRC_by1_loop); ++ ++ bind(L_exit); ++ nor(crc, crc, R0); ++} ++ ++/** ++ * @param crc register containing existing CRC (32-bit) ++ * @param buf register pointing to input byte buffer (byte*) ++ * @param len register containing number of bytes ++ * @param tmp scratch register ++**/ ++void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len, Register tmp) { ++ Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit; ++ assert_different_registers(crc, buf, len, tmp); ++ ++ addi_d(len, len, -64); ++ bge(len, R0, CRC_by64_loop); ++ addi_d(len, len, 64-4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ blt(R0, len, CRC_by1_loop); ++ b(L_exit); ++ ++ bind(CRC_by64_loop); ++ ld_d(tmp, buf, 0); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 8); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 16); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 24); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 32); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 40); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 48); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 56); ++ crcc_w_d_w(crc, tmp, crc); ++ addi_d(buf, buf, 64); ++ addi_d(len, len, -64); ++ bge(len, R0, CRC_by64_loop); ++ addi_d(len, len, 64-4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ blt(R0, len, CRC_by1_loop); ++ b(L_exit); ++ ++ bind(CRC_by4_loop); ++ ld_w(tmp, buf, 0); ++ crcc_w_w_w(crc, tmp, crc); ++ addi_d(buf, buf, 4); ++ addi_d(len, len, -4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ bge(R0, len, L_exit); ++ ++ bind(CRC_by1_loop); ++ ld_b(tmp, buf, 0); ++ crcc_w_b_w(crc, tmp, crc); ++ addi_d(buf, buf, 1); ++ addi_d(len, len, -1); ++ blt(R0, len, CRC_by1_loop); ++ ++ bind(L_exit); ++} ++ ++#ifdef COMPILER2 ++void MacroAssembler::cmp_branch_short(int flag, Register op1, Register op2, Label& L, bool is_signed) { ++ ++ switch(flag) { ++ case 0x01: //equal ++ beq(op1, op2, L); ++ break; ++ case 0x02: //not_equal ++ bne(op1, op2, L); ++ break; ++ case 0x03: //above ++ if (is_signed) ++ blt(op2, op1, L); ++ else ++ bltu(op2, op1, L); ++ break; ++ case 0x04: //above_equal ++ if (is_signed) ++ bge(op1, op2, L); ++ else ++ bgeu(op1, op2, L); ++ break; ++ case 0x05: //below ++ if (is_signed) ++ blt(op1, op2, L); ++ else ++ bltu(op1, op2, L); ++ break; ++ case 0x06: //below_equal ++ if (is_signed) ++ bge(op2, op1, L); ++ else ++ bgeu(op2, op1, L); ++ break; ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_branch_long(int flag, Register op1, Register op2, Label* L, bool is_signed) { ++ switch(flag) { ++ case 0x01: //equal ++ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ if (is_signed) ++ blt_long(op2, op1, *L, true /* signed */); ++ else ++ blt_long(op2, op1, *L, false); ++ break; ++ case 0x04: //above_equal ++ if (is_signed) ++ bge_long(op1, op2, *L, true /* signed */); ++ else ++ bge_long(op1, op2, *L, false); ++ break; ++ case 0x05: //below ++ if (is_signed) ++ blt_long(op1, op2, *L, true /* signed */); ++ else ++ blt_long(op1, op2, *L, false); ++ break; ++ case 0x06: //below_equal ++ if (is_signed) ++ bge_long(op2, op1, *L, true /* signed */); ++ else ++ bge_long(op2, op1, *L, false); ++ break; ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_branchEqNe_off21(int flag, Register op1, Label& L) { ++ switch(flag) { ++ case 0x01: //equal ++ beqz(op1, L); ++ break; ++ case 0x02: //not_equal ++ bnez(op1, L); ++ break; ++ default: ++ Unimplemented(); ++ } ++} ++#endif // COMPILER2 ++ ++void MacroAssembler::membar(Membar_mask_bits hint){ ++ address prev = pc() - NativeInstruction::sync_instruction_size; ++ address last = code()->last_insn(); ++ if (last != NULL && ((NativeInstruction*)last)->is_sync() && prev == last) { ++ code()->set_last_insn(NULL); ++ NativeMembar *membar = (NativeMembar*)prev; ++ // merged membar ++ // e.g. LoadLoad and LoadLoad|LoadStore to LoadLoad|LoadStore ++ membar->set_hint(membar->get_hint() & (~hint & 0xF)); ++ block_comment("merged membar"); ++ } else { ++ code()->set_last_insn(pc()); ++ Assembler::membar(hint); ++ } ++} ++ ++// Code for BigInteger::mulAdd intrinsic ++// out = A0 ++// in = A1 ++// offset = A2 (already out.length-offset) ++// len = A3 ++// k = A4 ++// ++// pseudo code from java implementation: ++// long kLong = k & LONG_MASK; ++// carry = 0; ++// offset = out.length-offset - 1; ++// for (int j = len - 1; j >= 0; j--) { ++// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; ++// out[offset--] = (int)product; ++// carry = product >>> 32; ++// } ++// return (int)carry; ++void MacroAssembler::mul_add(Register out, Register in, Register offset, ++ Register len, Register k) { ++ Label L_tail_loop, L_unroll, L_end; ++ ++ move(SCR2, out); ++ move(out, R0); // should clear out ++ bge(R0, len, L_end); ++ ++ alsl_d(offset, offset, SCR2, LogBytesPerInt - 1); ++ alsl_d(in, len, in, LogBytesPerInt - 1); ++ ++ const int unroll = 16; ++ li(SCR2, unroll); ++ blt(len, SCR2, L_tail_loop); ++ ++ bind(L_unroll); ++ ++ addi_d(in, in, -unroll * BytesPerInt); ++ addi_d(offset, offset, -unroll * BytesPerInt); ++ ++ for (int i = unroll - 1; i >= 0; i--) { ++ ld_wu(SCR1, in, i * BytesPerInt); ++ mulw_d_wu(SCR1, SCR1, k); ++ add_d(out, out, SCR1); // out as scratch ++ ld_wu(SCR1, offset, i * BytesPerInt); ++ add_d(SCR1, SCR1, out); ++ st_w(SCR1, offset, i * BytesPerInt); ++ srli_d(out, SCR1, 32); // keep carry ++ } ++ ++ sub_w(len, len, SCR2); ++ bge(len, SCR2, L_unroll); ++ ++ bge(R0, len, L_end); // check tail ++ ++ bind(L_tail_loop); ++ ++ addi_d(in, in, -BytesPerInt); ++ ld_wu(SCR1, in, 0); ++ mulw_d_wu(SCR1, SCR1, k); ++ add_d(out, out, SCR1); // out as scratch ++ ++ addi_d(offset, offset, -BytesPerInt); ++ ld_wu(SCR1, offset, 0); ++ add_d(SCR1, SCR1, out); ++ st_w(SCR1, offset, 0); ++ ++ srli_d(out, SCR1, 32); // keep carry ++ ++ addi_w(len, len, -1); ++ blt(R0, len, L_tail_loop); ++ ++ bind(L_end); ++} ++ +diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp +new file mode 100644 +index 0000000000..1f96557543 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp +@@ -0,0 +1,825 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/assembler.hpp" ++#include "runtime/rtmLocking.hpp" ++#include "utilities/macros.hpp" ++ ++// MacroAssembler extends Assembler by frequently used macros. ++// ++// Instructions for which a 'better' code sequence exists depending ++// on arguments should also go in here. ++ ++class MacroAssembler: public Assembler { ++ friend class LIR_Assembler; ++ friend class Runtime1; // as_Address() ++ ++ public: ++ // Compare code ++ typedef enum { ++ EQ = 0x01, ++ NE = 0x02, ++ GT = 0x03, ++ GE = 0x04, ++ LT = 0x05, ++ LE = 0x06 ++ } CMCompare; ++ ++ protected: ++ ++ // Support for VM calls ++ // ++ // This is the base routine called by the different versions of call_VM_leaf. The interpreter ++ // may customize this version by overriding it for its purposes (e.g., to save/restore ++ // additional registers when doing a VM call). ++ #define VIRTUAL virtual ++ ++ VIRTUAL void call_VM_leaf_base( ++ address entry_point, // the entry point ++ int number_of_arguments // the number of arguments to pop after the call ++ ); ++ ++ // This is the base routine called by the different versions of call_VM. The interpreter ++ // may customize this version by overriding it for its purposes (e.g., to save/restore ++ // additional registers when doing a VM call). ++ // ++ // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base ++ // returns the register which contains the thread upon return. If a thread register has been ++ // specified, the return value will correspond to that register. If no last_java_sp is specified ++ // (noreg) than sp will be used instead. ++ VIRTUAL void call_VM_base( // returns the register containing the thread upon return ++ Register oop_result, // where an oop-result ends up if any; use noreg otherwise ++ Register java_thread, // the thread if computed before ; use noreg otherwise ++ Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise ++ address entry_point, // the entry point ++ int number_of_arguments, // the number of arguments (w/o thread) to pop after the call ++ bool check_exceptions // whether to check for pending exceptions after return ++ ); ++ ++ void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); ++ ++ // helpers for FPU flag access ++ // tmp is a temporary register, if none is available use noreg ++ ++ public: ++ MacroAssembler(CodeBuffer* code) : Assembler(code) {} ++ ++ // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. ++ // The implementation is only non-empty for the InterpreterMacroAssembler, ++ // as only the interpreter handles PopFrame and ForceEarlyReturn requests. ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); ++ ++ Address as_Address(AddressLiteral adr); ++ Address as_Address(ArrayAddress adr); ++ ++ static intptr_t i[32]; ++ static float f[32]; ++ static void print(outputStream *s); ++ ++ static int i_offset(unsigned int k); ++ static int f_offset(unsigned int k); ++ ++ static void save_registers(MacroAssembler *masm); ++ static void restore_registers(MacroAssembler *masm); ++ ++ // Support for NULL-checks ++ // ++ // Generates code that causes a NULL OS exception if the content of reg is NULL. ++ // If the accessed location is M[reg + offset] and the offset is known, provide the ++ // offset. No explicit code generation is needed if the offset is within a certain ++ // range (0 <= offset <= page_size). ++ ++ void null_check(Register reg, int offset = -1); ++ static bool needs_explicit_null_check(intptr_t offset); ++ ++ // Required platform-specific helpers for Label::patch_instructions. ++ // They _shadow_ the declarations in AbstractAssembler, which are undefined. ++ static void pd_patch_instruction(address branch, address target); ++ ++ address emit_trampoline_stub(int insts_call_instruction_offset, address target); ++ ++ // Support for inc/dec with optimal instruction selection depending on value ++ // void incrementl(Register reg, int value = 1); ++ // void decrementl(Register reg, int value = 1); ++ ++ ++ // Alignment ++ void align(int modulus); ++ ++ ++ // Stack frame creation/removal ++ void enter(); ++ void leave(); ++ ++ // Frame creation and destruction shared between JITs. ++ void build_frame(int framesize); ++ void remove_frame(int framesize); ++ ++ // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) ++ // The pointer will be loaded into the thread register. ++ void get_thread(Register thread); ++ ++ ++ // Support for VM calls ++ // ++ // It is imperative that all calls into the VM are handled via the call_VM macros. ++ // They make sure that the stack linkage is setup correctly. call_VM's correspond ++ // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. ++ ++ ++ void call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ // Overloadings with last_Java_sp ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments = 0, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, bool ++ check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ void get_vm_result (Register oop_result, Register thread); ++ void get_vm_result_2(Register metadata_result, Register thread); ++ void call_VM_leaf(address entry_point, ++ int number_of_arguments = 0); ++ void call_VM_leaf(address entry_point, ++ Register arg_1); ++ void call_VM_leaf(address entry_point, ++ Register arg_1, Register arg_2); ++ void call_VM_leaf(address entry_point, ++ Register arg_1, Register arg_2, Register arg_3); ++ ++ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls ++ void super_call_VM_leaf(address entry_point); ++ void super_call_VM_leaf(address entry_point, Register arg_1); ++ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); ++ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); ++ ++ // last Java Frame (fills frame anchor) ++ void set_last_Java_frame(Register thread, ++ Register last_java_sp, ++ Register last_java_fp, ++ Label& last_java_pc); ++ ++ // thread in the default location (S6) ++ void set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ Label& last_java_pc); ++ ++ void reset_last_Java_frame(Register thread, bool clear_fp); ++ ++ // thread in the default location (S6) ++ void reset_last_Java_frame(bool clear_fp); ++ ++ // jobjects ++ void clear_jweak_tag(Register possibly_jweak); ++ void resolve_jobject(Register value, Register thread, Register tmp); ++ ++ // C 'boolean' to Java boolean: x == 0 ? 0 : 1 ++ void c2bool(Register x); ++ ++ void resolve_oop_handle(Register result, Register tmp); ++ void load_mirror(Register dst, Register method, Register tmp); ++ ++ // oop manipulations ++ void load_klass(Register dst, Register src); ++ void store_klass(Register dst, Register src); ++ ++ void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, ++ Register tmp1, Register thread_tmp); ++ void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, ++ Register tmp1, Register tmp2); ++ ++ void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, ++ Register tmp2 = noreg, DecoratorSet decorators = 0); ++ ++ // Used for storing NULL. All other oop constants should be ++ // stored using routines that take a jobject. ++ void store_heap_oop_null(Address dst); ++ ++ void load_prototype_header(Register dst, Register src); ++ ++ void store_klass_gap(Register dst, Register src); ++ ++ void encode_heap_oop(Register r); ++ void encode_heap_oop(Register dst, Register src); ++ void decode_heap_oop(Register r); ++ void decode_heap_oop(Register dst, Register src); ++ void encode_heap_oop_not_null(Register r); ++ void decode_heap_oop_not_null(Register r); ++ void encode_heap_oop_not_null(Register dst, Register src); ++ void decode_heap_oop_not_null(Register dst, Register src); ++ ++ void encode_klass_not_null(Register r); ++ void decode_klass_not_null(Register r); ++ void encode_klass_not_null(Register dst, Register src); ++ void decode_klass_not_null(Register dst, Register src); ++ ++ // if heap base register is used - reinit it with the correct value ++ void reinit_heapbase(); ++ ++ DEBUG_ONLY(void verify_heapbase(const char* msg);) ++ ++ void set_narrow_klass(Register dst, Klass* k); ++ void set_narrow_oop(Register dst, jobject obj); ++ ++ // Sign extension ++ void sign_extend_short(Register reg) { ext_w_h(reg, reg); } ++ void sign_extend_byte(Register reg) { ext_w_b(reg, reg); } ++ void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); ++ void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); ++ ++ // allocation ++ void eden_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ void tlab_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ void incr_allocated_bytes(Register thread, ++ Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1 = noreg); ++ // interface method calling ++ void lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_temp, ++ Label& no_such_interface, ++ bool return_method = true); ++ ++ // virtual method calling ++ void lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result); ++ ++ // Test sub_klass against super_klass, with fast and slow paths. ++ ++ // The fast path produces a tri-state answer: yes / no / maybe-slow. ++ // One of the three labels can be NULL, meaning take the fall-through. ++ // If super_check_offset is -1, the value is loaded up from super_klass. ++ // No registers are killed, except temp_reg. ++ void check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); ++ ++ // The rest of the type check; must be wired to a corresponding fast path. ++ // It does not repeat the fast path logic, so don't use it standalone. ++ // The temp_reg and temp2_reg can be noreg, if no temps are available. ++ // Updates the sub's secondary super cache as necessary. ++ // If set_cond_codes, condition codes will be Z on success, NZ on failure. ++ void check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Register temp2_reg, ++ Label* L_success, ++ Label* L_failure, ++ bool set_cond_codes = false); ++ ++ // Simplified, combined version, good for typical uses. ++ // Falls through on failure. ++ void check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label& L_success); ++ ++ ++ // Debugging ++ ++ // only if +VerifyOops ++ void verify_oop(Register reg, const char* s = "broken oop"); ++ void verify_oop_addr(Address addr, const char * s = "broken oop addr"); ++ void verify_oop_subroutine(); ++ // TODO: verify method and klass metadata (compare against vptr?) ++ void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} ++ void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} ++ ++ #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) ++ #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) ++ ++ // only if +VerifyFPU ++ void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); ++ ++ // prints msg, dumps registers and stops execution ++ void stop(const char* msg); ++ ++ // prints msg and continues ++ void warn(const char* msg); ++ ++ static void debug(char* msg/*, RegistersForDebugging* regs*/); ++ static void debug64(char* msg, int64_t pc, int64_t regs[]); ++ ++ void untested() { stop("untested"); } ++ ++ void unimplemented(const char* what = ""); ++ ++ void should_not_reach_here() { stop("should not reach here"); } ++ ++ void print_CPU_state(); ++ ++ // Stack overflow checking ++ void bang_stack_with_offset(int offset) { ++ // stack grows down, caller passes positive offset ++ assert(offset > 0, "must bang with negative offset"); ++ if (offset <= 2048) { ++ st_w(A0, SP, -offset); ++ } else if (offset <= 32768 && !(offset & 3)) { ++ stptr_w(A0, SP, -offset); ++ } else { ++ li(AT, offset); ++ sub_d(AT, SP, AT); ++ st_w(A0, AT, 0); ++ } ++ } ++ ++ // Writes to stack successive pages until offset reached to check for ++ // stack overflow + shadow pages. Also, clobbers tmp ++ void bang_stack_size(Register size, Register tmp); ++ ++ // Check for reserved stack access in method being exited (for JIT) ++ void reserved_stack_check(); ++ ++ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset); ++ ++ // Support for serializing memory accesses between threads ++ void serialize_memory(Register thread, Register tmp); ++ ++ void safepoint_poll(Label& slow_path, Register thread_reg); ++ void safepoint_poll_acquire(Label& slow_path, Register thread_reg); ++ ++ //void verify_tlab(); ++ void verify_tlab(Register t1, Register t2); ++ ++ // Biased locking support ++ // lock_reg and obj_reg must be loaded up with the appropriate values. ++ // tmp_reg is optional. If it is supplied (i.e., != noreg) it will ++ // be killed; if not supplied, push/pop will be used internally to ++ // allocate a temporary (inefficient, avoid if possible). ++ // Optional slow case is for implementations (interpreter and C1) which branch to ++ // slow case directly. Leaves condition codes set for C2's Fast_Lock node. ++ // Returns offset of first potentially-faulting instruction for null ++ // check info (currently consumed only by C1). If ++ // swap_reg_contains_mark is true then returns -1 as it is assumed ++ // the calling code has already passed any potential faults. ++ int biased_locking_enter(Register lock_reg, Register obj_reg, ++ Register swap_reg, Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, Label* slow_case = NULL, ++ BiasedLockingCounters* counters = NULL); ++ void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); ++#ifdef COMPILER2 ++ void cmp_branch_short(int flag, Register op1, Register op2, Label& L, bool is_signed); ++ void cmp_branch_long(int flag, Register op1, Register op2, Label* L, bool is_signed); ++ void cmp_branchEqNe_off21(int flag, Register op1, Label& L); ++ void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr); ++ void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr); ++#endif ++ ++ ++ // the follow two might use AT register, be sure you have no meanful data in AT before you call them ++ void increment(Register reg, int imm); ++ void decrement(Register reg, int imm); ++ void increment(Address addr, int imm = 1); ++ void decrement(Address addr, int imm = 1); ++ void shl(Register reg, int sa) { slli_d(reg, reg, sa); } ++ void shr(Register reg, int sa) { srli_d(reg, reg, sa); } ++ void sar(Register reg, int sa) { srai_d(reg, reg, sa); } ++ // Helper functions for statistics gathering. ++ void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2); ++ ++ // Calls ++ void call(address entry); ++ void call(address entry, relocInfo::relocType rtype); ++ void call(address entry, RelocationHolder& rh); ++ void call_long(address entry); ++ ++ address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL); ++ ++ static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M); ++ ++ static bool far_branches() { ++ if (ForceUnreachable) { ++ return true; ++ } else { ++ return ReservedCodeCacheSize > branch_range; ++ } ++ } ++ ++ // Emit the CompiledIC call idiom ++ address ic_call(address entry, jint method_index = 0); ++ ++ // Jumps ++ void jmp(address entry); ++ void jmp(address entry, relocInfo::relocType rtype); ++ void jmp_far(Label& L); // patchable ++ ++ /* branches may exceed 16-bit offset */ ++ void b_far(address entry); ++ void b_far(Label& L); ++ ++ void bne_far (Register rs, Register rt, address entry); ++ void bne_far (Register rs, Register rt, Label& L); ++ ++ void beq_far (Register rs, Register rt, address entry); ++ void beq_far (Register rs, Register rt, Label& L); ++ ++ void blt_far (Register rs, Register rt, address entry, bool is_signed); ++ void blt_far (Register rs, Register rt, Label& L, bool is_signed); ++ ++ void bge_far (Register rs, Register rt, address entry, bool is_signed); ++ void bge_far (Register rs, Register rt, Label& L, bool is_signed); ++ ++ // For C2 to support long branches ++ void beq_long (Register rs, Register rt, Label& L); ++ void bne_long (Register rs, Register rt, Label& L); ++ void blt_long (Register rs, Register rt, Label& L, bool is_signed); ++ void bge_long (Register rs, Register rt, Label& L, bool is_signed); ++ void bc1t_long (Label& L); ++ void bc1f_long (Label& L); ++ ++ static bool patchable_branches() { ++ const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M); ++ return ReservedCodeCacheSize > branch_range; ++ } ++ ++ static bool reachable_from_branch_short(jlong offs); ++ ++ void patchable_jump_far(Register ra, jlong offs); ++ void patchable_jump(address target, bool force_patchable = false); ++ void patchable_call(address target, address call_size = 0); ++ ++ // Floating ++ void generate_dsin_dcos(bool isCos, address npio2_hw, address two_over_pi, ++ address pio2, address dsin_coef, address dcos_coef); ++ ++ // Data ++ ++ // Load and store values by size and signed-ness ++ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); ++ void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); ++ ++ // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs ++ inline void ld_ptr(Register rt, Address a) { ++ ld_d(rt, a); ++ } ++ ++ inline void ld_ptr(Register rt, Register base, int offset16) { ++ ld_d(rt, base, offset16); ++ } ++ ++ // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs ++ inline void st_ptr(Register rt, Address a) { ++ st_d(rt, a); ++ } ++ ++ inline void st_ptr(Register rt, Register base, int offset16) { ++ st_d(rt, base, offset16); ++ } ++ ++ void ld_ptr(Register rt, Register base, Register offset); ++ void st_ptr(Register rt, Register base, Register offset); ++ ++ // swap the two byte of the low 16-bit halfword ++ // this directive will use AT, be sure the high 16-bit of reg is zero ++ void hswap(Register reg); ++ void huswap(Register reg); ++ ++ // convert big endian integer to little endian integer ++ void swap(Register reg); ++ ++ void cmpxchg(Address addr, Register oldval, Register newval, Register resflag, ++ bool retold, bool barrier); ++ void cmpxchg(Address addr, Register oldval, Register newval, Register tmp, ++ bool retold, bool barrier, Label& succ, Label* fail = NULL); ++ void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag, ++ bool sign, bool retold, bool barrier); ++ void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, ++ bool sign, bool retold, bool barrier, Label& succ, Label* fail = NULL); ++ ++ void extend_sign(Register rh, Register rl) { /*stop("extend_sign");*/ guarantee(0, "LA not implemented yet");} ++ void neg(Register reg) { /*dsubu(reg, R0, reg);*/ guarantee(0, "LA not implemented yet");} ++ void push (Register reg) { addi_d(SP, SP, -8); st_d (reg, SP, 0); } ++ void push (FloatRegister reg) { addi_d(SP, SP, -8); fst_d (reg, SP, 0); } ++ void pop (Register reg) { ld_d (reg, SP, 0); addi_d(SP, SP, 8); } ++ void pop (FloatRegister reg) { fld_d (reg, SP, 0); addi_d(SP, SP, 8); } ++ void pop () { addi_d(SP, SP, 8); } ++ void pop2 () { addi_d(SP, SP, 16); } ++ void push2(Register reg1, Register reg2); ++ void pop2 (Register reg1, Register reg2); ++ //we need 2 fun to save and resotre general register ++ void pushad(); ++ void popad(); ++ void pushad_except_v0(); ++ void popad_except_v0(); ++ void push(RegSet regs) { if (regs.bits()) push(regs.bits()); } ++ void pop(RegSet regs) { if (regs.bits()) pop(regs.bits()); } ++ ++ void li(Register rd, jlong value); ++ void li(Register rd, address addr) { li(rd, (long)addr); } ++ void patchable_li52(Register rd, jlong value); ++ void lipc(Register rd, Label& L); ++ ++ void move(Register rd, Register rs) { orr(rd, rs, R0); } ++ void move_u32(Register rd, Register rs) { add_w(rd, rs, R0); } ++ void mov_metadata(Register dst, Metadata* obj); ++ void mov_metadata(Address dst, Metadata* obj); ++ ++ // Load the base of the cardtable byte map into reg. ++ void load_byte_map_base(Register reg); ++ ++ // Code for java.lang.StringCoding::hasNegatives() instrinsic. ++ void has_negatives(Register ary1, Register len, Register result); ++ ++ // Code for java.lang.StringUTF16::compress intrinsic. ++ void char_array_compress(Register src, Register dst, Register len, ++ Register result, Register tmp1, ++ Register tmp2, Register tmp3); ++ ++ // Code for java.lang.StringLatin1::inflate intrinsic. ++ void byte_array_inflate(Register src, Register dst, Register len, ++ Register tmp1, Register tmp2); ++ ++ // Find index of char in UTF-16 string ++ void string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3); ++ ++ //FIXME ++ void empty_FPU_stack(){/*need implemented*/}; ++ ++#ifdef COMPILER2 ++ // Compare strings. ++ void string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ int ae); ++ ++ // Compare char[] or byte[] arrays. ++ void arrays_equals(Register str1, Register str2, ++ Register cnt, Register tmp1, Register tmp2, Register result, ++ bool is_char); ++#endif ++ ++ // method handles (JSR 292) ++ Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); ++ ++ ++ // LA added: ++ void jr (Register reg) { jirl(R0, reg, 0); } ++ void jalr(Register reg) { jirl(RA, reg, 0); } ++ void nop () { andi(R0, R0, 0); } ++ void andr(Register rd, Register rj, Register rk) { AND(rd, rj, rk); } ++ void xorr(Register rd, Register rj, Register rk) { XOR(rd, rj, rk); } ++ void orr (Register rd, Register rj, Register rk) { OR(rd, rj, rk); } ++ void lea (Register rd, Address src); ++ void lea(Register dst, AddressLiteral adr); ++ static int patched_branch(int dest_pos, int inst, int inst_pos); ++ ++ // Conditional move ++ void cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src1, ++ Register src2, ++ CMCompare cmp = EQ, ++ bool is_signed = true); ++ void cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src, ++ CMCompare cmp = EQ, ++ bool is_signed = true); ++ void cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ Register dst, ++ Register src, ++ FloatRegister tmp1, ++ FloatRegister tmp2, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ void cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ void cmp_cmov(Register op1, ++ Register op2, ++ FloatRegister dst, ++ FloatRegister src, ++ FloatRegister tmp1, ++ FloatRegister tmp2, ++ CMCompare cmp = EQ); ++ ++ // CRC32 code for java.util.zip.CRC32::update() instrinsic. ++ void update_byte_crc32(Register crc, Register val, Register table); ++ ++ // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic. ++ void kernel_crc32(Register crc, Register buf, Register len, Register tmp); ++ ++ // CRC32C code for java.util.zip.CRC32C::updateBytes() instrinsic. ++ void kernel_crc32c(Register crc, Register buf, Register len, Register tmp); ++ ++ void membar(Membar_mask_bits hint); ++ ++ void bind(Label& L) { ++ Assembler::bind(L); ++ code()->clear_last_insn(); ++ } ++ ++ // Code for java.math.BigInteger::mulAdd intrinsic. ++ void mul_add(Register out, Register in, Register offset, ++ Register len, Register k); ++ ++#undef VIRTUAL ++ ++public: ++// Memory Data Type ++#define INT_TYPE 0x100 ++#define FLOAT_TYPE 0x200 ++#define SIGNED_TYPE 0x10 ++#define UNSIGNED_TYPE 0x20 ++ ++ typedef enum { ++ LOAD_BYTE = INT_TYPE | SIGNED_TYPE | 0x1, ++ LOAD_CHAR = INT_TYPE | SIGNED_TYPE | 0x2, ++ LOAD_SHORT = INT_TYPE | SIGNED_TYPE | 0x3, ++ LOAD_INT = INT_TYPE | SIGNED_TYPE | 0x4, ++ LOAD_LONG = INT_TYPE | SIGNED_TYPE | 0x5, ++ STORE_BYTE = INT_TYPE | SIGNED_TYPE | 0x6, ++ STORE_CHAR = INT_TYPE | SIGNED_TYPE | 0x7, ++ STORE_SHORT = INT_TYPE | SIGNED_TYPE | 0x8, ++ STORE_INT = INT_TYPE | SIGNED_TYPE | 0x9, ++ STORE_LONG = INT_TYPE | SIGNED_TYPE | 0xa, ++ LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb, ++ ++ LOAD_U_BYTE = INT_TYPE | UNSIGNED_TYPE | 0x1, ++ LOAD_U_SHORT = INT_TYPE | UNSIGNED_TYPE | 0x2, ++ LOAD_U_INT = INT_TYPE | UNSIGNED_TYPE | 0x3, ++ ++ LOAD_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x1, ++ LOAD_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x2, ++ LOAD_VECTORX = FLOAT_TYPE | SIGNED_TYPE | 0x3, ++ LOAD_VECTORY = FLOAT_TYPE | SIGNED_TYPE | 0x4, ++ STORE_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x5, ++ STORE_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x6, ++ STORE_VECTORX = FLOAT_TYPE | SIGNED_TYPE | 0x7, ++ STORE_VECTORY = FLOAT_TYPE | SIGNED_TYPE | 0x8 ++ } CMLoadStoreDataType; ++ ++ void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) { ++ assert((type & INT_TYPE), "must be General reg type"); ++ loadstore_t(reg, base, index, scale, disp, type); ++ } ++ ++ void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) { ++ assert((type & FLOAT_TYPE), "must be Float reg type"); ++ loadstore_t(reg, base, index, scale, disp, type); ++ } ++ ++#ifdef COMPILER2 ++ void reduce(Register dst, Register src, FloatRegister vsrc, FloatRegister tmp1, FloatRegister tmp2, BasicType type, int opcode, int vector_size); ++ void reduce(FloatRegister dst, FloatRegister src, FloatRegister vsrc, FloatRegister tmp, BasicType type, int opcode, int vector_size); ++#endif ++ ++private: ++ void push(unsigned int bitset); ++ void pop(unsigned int bitset); ++ ++ template ++ void loadstore_t(T reg, int base, int index, int scale, int disp, int type) { ++ if (index != 0) { ++ assert(((scale==0)&&(disp==0)), "only support base+index"); ++ loadstore(reg, as_Register(base), as_Register(index), type); ++ } else { ++ loadstore(reg, as_Register(base), disp, type); ++ } ++ } ++ void loadstore(Register reg, Register base, int disp, int type); ++ void loadstore(Register reg, Register base, Register disp, int type); ++ void loadstore(FloatRegister reg, Register base, int disp, int type); ++ void loadstore(FloatRegister reg, Register base, Register disp, int type); ++ ++#ifdef COMPILER2 ++ void reduce_ins_v(FloatRegister vec1, FloatRegister vec2, FloatRegister vec3, BasicType type, int opcode); ++ void reduce_ins_r(Register reg1, Register reg2, Register reg3, BasicType type, int opcode); ++ void reduce_ins_f(FloatRegister reg1, FloatRegister reg2, FloatRegister reg3, BasicType type, int opcode); ++#endif ++ void generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef); ++ void generate_kernel_cos(FloatRegister x, address dcos_coef); ++ void generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2); ++ void generate__kernel_rem_pio2(address two_over_pi, address pio2); ++}; ++ ++/** ++ * class SkipIfEqual: ++ * ++ * Instantiating this class will result in assembly code being output that will ++ * jump around any code emitted between the creation of the instance and it's ++ * automatic destruction at the end of a scope block, depending on the value of ++ * the flag passed to the constructor, which will be checked at run-time. ++ */ ++class SkipIfEqual { ++private: ++ MacroAssembler* _masm; ++ Label _label; ++ ++public: ++ inline SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) ++ : _masm(masm) { ++ _masm->li(AT, (address)flag_addr); ++ _masm->ld_b(AT, AT, 0); ++ if (value) { ++ _masm->bne(AT, R0, _label); ++ } else { ++ _masm->beq(AT, R0, _label); ++ } ++ } ++ ++ ~SkipIfEqual(); ++}; ++ ++#ifdef ASSERT ++inline bool AbstractAssembler::pd_check_instruction_mark() { return true; } ++#endif ++ ++struct tableswitch { ++ Register _reg; ++ int _insn_index; jint _first_key; jint _last_key; ++ Label _after; ++ Label _branches; ++}; ++ ++#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp +new file mode 100644 +index 0000000000..49302590c3 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp +@@ -0,0 +1,34 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/codeBuffer.hpp" ++#include "code/codeCache.hpp" ++ ++#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP +diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp +new file mode 100644 +index 0000000000..3ed4c36651 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp +@@ -0,0 +1,1625 @@ ++/* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT) ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "macroAssembler_loongarch.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++// The following code is a optimized version of fdlibm sin/cos implementation ++// (C code is in share/runtime/sharedRuntimeTrig.cpp) adapted for LOONGARCH64. ++ ++// Please refer to sin/cos approximation via polynomial and ++// trigonometric argument reduction techniques to the following literature: ++// ++// [1] Muller, Jean-Michel, Nicolas Brisebarre, Florent De Dinechin, ++// Claude-Pierre Jeannerod, Vincent Lefevre, Guillaume Melquiond, ++// Nathalie Revol, Damien Stehlé, and Serge Torres: ++// Handbook of floating-point arithmetic. ++// Springer Science & Business Media, 2009. ++// [2] K. C. Ng ++// Argument Reduction for Huge Arguments: Good to the Last Bit ++// July 13, 1992, SunPro ++// ++// HOW TO READ THIS CODE: ++// This code consists of several functions. Each function has following header: ++// 1) Description ++// 2) C-pseudo code with differences from fdlibm marked by comments starting ++// with "NOTE". Check unmodified fdlibm code in ++// share/runtime/SharedRuntimeTrig.cpp ++// 3) Brief textual description of changes between fdlibm and current ++// implementation along with optimization notes (if applicable) ++// 4) Assumptions, input and output ++// 5) (Optional) additional notes about intrinsic implementation ++// Each function is separated in blocks which follow the pseudo-code structure ++// ++// HIGH-LEVEL ALGORITHM DESCRIPTION: ++// - entry point: generate_dsin_dcos(...); ++// - check corner cases: NaN, INF, tiny argument. ++// - check if |x| < Pi/4. Then approximate sin/cos via polynomial (kernel_sin/kernel_cos) ++// -- else proceed to argument reduction routine (__ieee754_rem_pio2) and ++// use reduced argument to get result via kernel_sin/kernel_cos ++// ++// HIGH-LEVEL CHANGES BETWEEN INTRINSICS AND FDLIBM: ++// 1) two_over_pi table fdlibm representation is int[], while intrinsic version ++// has these int values converted to double representation to load converted ++// double values directly (see stubRoutines_aarch4::_two_over_pi) ++// 2) Several loops are unrolled and vectorized: see comments in code after ++// labels: SKIP_F_LOAD, RECOMP_FOR1_CHECK, RECOMP_FOR2 ++// 3) fdlibm npio2_hw table now has "prefix" with constants used in ++// calculation. These constants are loaded from npio2_hw table instead of ++// constructing it in code (see stubRoutines_loongarch64.cpp) ++// 4) Polynomial coefficients for sin and cos are moved to table sin_coef ++// and cos_coef to use the same optimization as in 3). It allows to load most of ++// required constants via single instruction ++// ++// ++// ++///* __ieee754_rem_pio2(x,y) ++// * ++// * returns the remainder of x rem pi/2 in y[0]+y[1] (i.e. like x div pi/2) ++// * x is input argument, y[] is hi and low parts of reduced argument (x) ++// * uses __kernel_rem_pio2() ++// */ ++// // use tables(see stubRoutines_loongarch64.cpp): two_over_pi and modified npio2_hw ++// ++// BEGIN __ieee754_rem_pio2 PSEUDO CODE ++// ++//static int __ieee754_rem_pio2(double x, double *y) { ++// double z,w,t,r,fn; ++// double tx[3]; ++// int e0,i,j,nx,n,ix,hx,i0; ++// ++// i0 = ((*(int*)&two24A)>>30)^1; /* high word index */ ++// hx = *(i0+(int*)&x); /* high word of x */ ++// ix = hx&0x7fffffff; ++// if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */ ++// if(hx>0) { ++// z = x - pio2_1; ++// if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ ++// y[0] = z - pio2_1t; ++// y[1] = (z-y[0])-pio2_1t; ++// } else { /* near pi/2, use 33+33+53 bit pi */ ++// z -= pio2_2; ++// y[0] = z - pio2_2t; ++// y[1] = (z-y[0])-pio2_2t; ++// } ++// return 1; ++// } else { /* negative x */ ++// z = x + pio2_1; ++// if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ ++// y[0] = z + pio2_1t; ++// y[1] = (z-y[0])+pio2_1t; ++// } else { /* near pi/2, use 33+33+53 bit pi */ ++// z += pio2_2; ++// y[0] = z + pio2_2t; ++// y[1] = (z-y[0])+pio2_2t; ++// } ++// return -1; ++// } ++// } ++// if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */ ++// t = fabsd(x); ++// n = (int) (t*invpio2+half); ++// fn = (double)n; ++// r = t-fn*pio2_1; ++// w = fn*pio2_1t; /* 1st round good to 85 bit */ ++// // NOTE: y[0] = r-w; is moved from if/else below to be before "if" ++// y[0] = r-w; ++// if(n<32&&ix!=npio2_hw[n-1]) { ++// // y[0] = r-w; /* quick check no cancellation */ // NOTE: moved earlier ++// } else { ++// j = ix>>20; ++// // y[0] = r-w; // NOTE: moved earlier ++// i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); ++// if(i>16) { /* 2nd iteration needed, good to 118 */ ++// t = r; ++// w = fn*pio2_2; ++// r = t-w; ++// w = fn*pio2_2t-((t-r)-w); ++// y[0] = r-w; ++// i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); ++// if(i>49) { /* 3rd iteration need, 151 bits acc */ ++// t = r; /* will cover all possible cases */ ++// w = fn*pio2_3; ++// r = t-w; ++// w = fn*pio2_3t-((t-r)-w); ++// y[0] = r-w; ++// } ++// } ++// } ++// y[1] = (r-y[0])-w; ++// if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} ++// else return n; ++// } ++// /* ++// * all other (large) arguments ++// */ ++// // NOTE: this check is removed, because it was checked in dsin/dcos ++// // if(ix>=0x7ff00000) { /* x is inf or NaN */ ++// // y[0]=y[1]=x-x; return 0; ++// // } ++// /* set z = scalbn(|x|,ilogb(x)-23) */ ++// *(1-i0+(int*)&z) = *(1-i0+(int*)&x); ++// e0 = (ix>>20)-1046; /* e0 = ilogb(z)-23; */ ++// *(i0+(int*)&z) = ix - (e0<<20); ++// ++// // NOTE: "for" loop below in unrolled. See comments in asm code ++// for(i=0;i<2;i++) { ++// tx[i] = (double)((int)(z)); ++// z = (z-tx[i])*two24A; ++// } ++// ++// tx[2] = z; ++// nx = 3; ++// ++// // NOTE: while(tx[nx-1]==zeroA) nx--; is unrolled. See comments in asm code ++// while(tx[nx-1]==zeroA) nx--; /* skip zero term */ ++// ++// n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi); ++// if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} ++// return n; ++//} ++// ++// END __ieee754_rem_pio2 PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic for __ieee754_rem_pio2: ++// 1. INF/NaN check for huge argument is removed in comparison with fdlibm ++// code, because this check is already done in dcos/dsin code ++// 2. Most constants are now loaded from table instead of direct initialization ++// 3. Two loops are unrolled ++// Assumptions: ++// 1. Assume |X| >= PI/4 ++// 2. Assume SCR1 = 0x3fe921fb00000000 (~ PI/4) ++// 3. Assume ix = A3 ++// Input and output: ++// 1. Input: X = A0 ++// 2. Return n in A2, y[0] == y0 == FA4, y[1] == y1 == FA5 ++// NOTE: general purpose register names match local variable names in C code ++// NOTE: fpu registers are actively reused. See comments in code about their usage ++void MacroAssembler::generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2) { ++ const int64_t PIO2_1t = 0x3DD0B4611A626331ULL; ++ const int64_t PIO2_2 = 0x3DD0B4611A600000ULL; ++ const int64_t PIO2_2t = 0x3BA3198A2E037073ULL; ++ Label X_IS_NEGATIVE, X_IS_MEDIUM_OR_LARGE, X_IS_POSITIVE_LONG_PI, LARGE_ELSE, ++ REDUCTION_DONE, X_IS_MEDIUM_BRANCH_DONE, X_IS_LARGE, NX_SET, ++ X_IS_NEGATIVE_LONG_PI; ++ Register X = A0, n = A2, ix = A3, jv = A4, tmp5 = A5, jx = A6, ++ tmp3 = A7, iqBase = T0, ih = T1, i = T2; ++ FloatRegister v0 = FA0, v1 = FA1, v2 = FA2, v3 = FA3, v4 = FA4, v5 = FA5, v6 = FA6, v7 = FA7, ++ vt = FT1, v24 = FT8, v26 = FT10, v27 = FT11, v28 = FT12, v29 = FT13, v31 = FT15; ++ ++ push2(S0, S1); ++ ++ // initializing constants first ++ li(SCR1, 0x3ff921fb54400000); // PIO2_1 ++ li(SCR2, 0x4002d97c); // 3*PI/4 high word ++ movgr2fr_d(v1, SCR1); // v1 = PIO2_1 ++ bge(ix, SCR2, X_IS_MEDIUM_OR_LARGE); ++ ++ block_comment("if(ix<0x4002d97c) {... /* |x| ~< 3pi/4 */ "); { ++ blt(X, R0, X_IS_NEGATIVE); ++ ++ block_comment("if(hx>0) {"); { ++ fsub_d(v2, v0, v1); // v2 = z = x - pio2_1 ++ srli_d(SCR1, SCR1, 32); ++ li(n, 1); ++ beq(ix, SCR1, X_IS_POSITIVE_LONG_PI); ++ ++ block_comment("case: hx > 0 && ix!=0x3ff921fb {"); { /* 33+53 bit pi is good enough */ ++ li(SCR2, PIO2_1t); ++ movgr2fr_d(v27, SCR2); ++ fsub_d(v4, v2, v27); // v4 = y[0] = z - pio2_1t; ++ fsub_d(v5, v2, v4); ++ fsub_d(v5, v5, v27); // v5 = y[1] = (z-y[0])-pio2_1t ++ b(REDUCTION_DONE); ++ } ++ ++ block_comment("case: hx > 0 &*& ix==0x3ff921fb {"); { /* near pi/2, use 33+33+53 bit pi */ ++ bind(X_IS_POSITIVE_LONG_PI); ++ li(SCR1, PIO2_2); ++ li(SCR2, PIO2_2t); ++ movgr2fr_d(v27, SCR1); ++ movgr2fr_d(v6, SCR2); ++ fsub_d(v2, v2, v27); // z-= pio2_2 ++ fsub_d(v4, v2, v6); // y[0] = z - pio2_2t ++ fsub_d(v5, v2, v4); ++ fsub_d(v5, v5, v6); // v5 = (z - y[0]) - pio2_2t ++ b(REDUCTION_DONE); ++ } ++ } ++ ++ block_comment("case: hx <= 0)"); { ++ bind(X_IS_NEGATIVE); ++ fadd_d(v2, v0, v1); // v2 = z = x + pio2_1 ++ srli_d(SCR1, SCR1, 32); ++ li(n, -1); ++ beq(ix, SCR1, X_IS_NEGATIVE_LONG_PI); ++ ++ block_comment("case: hx <= 0 && ix!=0x3ff921fb) {"); { /* 33+53 bit pi is good enough */ ++ li(SCR2, PIO2_1t); ++ movgr2fr_d(v27, SCR2); ++ fadd_d(v4, v2, v27); // v4 = y[0] = z + pio2_1t; ++ fsub_d(v5, v2, v4); ++ fadd_d(v5, v5, v27); // v5 = y[1] = (z-y[0]) + pio2_1t ++ b(REDUCTION_DONE); ++ } ++ ++ block_comment("case: hx <= 0 && ix==0x3ff921fb"); { /* near pi/2, use 33+33+53 bit pi */ ++ bind(X_IS_NEGATIVE_LONG_PI); ++ li(SCR1, PIO2_2); ++ li(SCR2, PIO2_2t); ++ movgr2fr_d(v27, SCR1); ++ movgr2fr_d(v6, SCR2); ++ fadd_d(v2, v2, v27); // z += pio2_2 ++ fadd_d(v4, v2, v6); // y[0] = z + pio2_2t ++ fsub_d(v5, v2, v4); ++ fadd_d(v5, v5, v6); // v5 = (z - y[0]) + pio2_2t ++ b(REDUCTION_DONE); ++ } ++ } ++ } ++ bind(X_IS_MEDIUM_OR_LARGE); ++ li(SCR1, 0x413921fb); ++ blt(SCR1, ix, X_IS_LARGE); // ix < = 0x413921fb ? ++ ++ block_comment("|x| ~<= 2^19*(pi/2), medium size"); { ++ li(ih, npio2_hw); ++ fld_d(v4, ih, 0); ++ fld_d(v5, ih, 8); ++ fld_d(v6, ih, 16); ++ fld_d(v7, ih, 24); ++ fabs_d(v31, v0); // v31 = t = |x| ++ addi_d(ih, ih, 64); ++ fmadd_d(v2, v31, v5, v4); // v2 = t * invpio2 + half (invpio2 = 53 bits of 2/pi, half = 0.5) ++ ftintrz_w_d(vt, v2); // n = (int) v2 ++ movfr2gr_s(n, vt); ++ vfrintrz_d(v2, v2); ++ fnmsub_d(v3, v2, v6, v31); // v3 = r = t - fn * pio2_1 ++ fmul_d(v26, v2, v7); // v26 = w = fn * pio2_1t ++ fsub_d(v4, v3, v26); // y[0] = r - w. Calculated before branch ++ li(SCR1, 32); ++ blt(SCR1, n, LARGE_ELSE); ++ addi_w(tmp5, n, -1); // tmp5 = n - 1 ++ alsl_d(tmp5, tmp5, ih, 2 - 1); ++ ld_w(jv, tmp5, 0); ++ bne(ix, jv, X_IS_MEDIUM_BRANCH_DONE); ++ ++ block_comment("else block for if(n<32&&ix!=npio2_hw[n-1])"); { ++ bind(LARGE_ELSE); ++ movfr2gr_d(jx, v4); ++ srli_d(tmp5, ix, 20); // j = ix >> 20 ++ slli_d(jx, jx, 1); ++ srli_d(tmp3, jx, 32 + 20 + 1); // r7 = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); ++ sub_d(tmp3, tmp5, tmp3); ++ ++ block_comment("if(i>16)"); { ++ li(SCR1, 16); ++ bge(SCR1, tmp3, X_IS_MEDIUM_BRANCH_DONE); ++ // i > 16. 2nd iteration needed ++ fld_d(v6, ih, -32); ++ fld_d(v7, ih, -24); ++ fmov_d(v28, v3); // t = r ++ fmul_d(v29, v2, v6); // w = v29 = fn * pio2_2 ++ fsub_d(v3, v28, v29); // r = t - w ++ fsub_d(v31, v28, v3); // v31 = (t - r) ++ fsub_d(v31, v29, v31); // v31 = w - (t - r) = - ((t - r) - w) ++ fmadd_d(v26, v2, v7, v31); // v26 = w = fn*pio2_2t - ((t - r) - w) ++ fsub_d(v4, v3, v26); // y[0] = r - w ++ movfr2gr_d(jx, v4); ++ slli_d(jx, jx, 1); ++ srli_d(tmp3, jx, 32 + 20 + 1); // r7 = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); ++ sub_d(tmp3, tmp5, tmp3); ++ ++ block_comment("if(i>49)"); { ++ li(SCR1, 49); ++ bge(SCR1, tmp3, X_IS_MEDIUM_BRANCH_DONE); ++ // 3rd iteration need, 151 bits acc ++ fld_d(v6, ih, -16); ++ fld_d(v7, ih, -8); ++ fmov_d(v28, v3); // save "r" ++ fmul_d(v29, v2, v6); // v29 = fn * pio2_3 ++ fsub_d(v3, v28, v29); // r = r - w ++ fsub_d(v31, v28, v3); // v31 = (t - r) ++ fsub_d(v31, v29, v31); // v31 = w - (t - r) = - ((t - r) - w) ++ fmadd_d(v26, v2, v7, v31); // v26 = w = fn*pio2_3t - ((t - r) - w) ++ fsub_d(v4, v3, v26); // y[0] = r - w ++ } ++ } ++ } ++ block_comment("medium x tail"); { ++ bind(X_IS_MEDIUM_BRANCH_DONE); ++ fsub_d(v5, v3, v4); // v5 = y[1] = (r - y[0]) ++ fsub_d(v5, v5, v26); // v5 = y[1] = (r - y[0]) - w ++ blt(R0, X, REDUCTION_DONE); ++ fneg_d(v4, v4); ++ sub_w(n, R0, n); ++ fneg_d(v5, v5); ++ b(REDUCTION_DONE); ++ } ++ } ++ ++ block_comment("all other (large) arguments"); { ++ bind(X_IS_LARGE); ++ srli_d(SCR1, ix, 20); // ix >> 20 ++ li(tmp5, 0x4170000000000000); ++ addi_w(SCR1, SCR1, -1046); // e0 ++ movgr2fr_d(v24, tmp5); // init two24A value ++ slli_w(jv, SCR1, 20); // ix - (e0<<20) ++ sub_w(jv, ix, jv); ++ slli_d(jv, jv, 32); ++ addi_w(SCR2, SCR1, -3); ++ bstrins_d(jv, X, 31, 0); // jv = z ++ li(i, 24); ++ movgr2fr_d(v26, jv); // v26 = z ++ ++ block_comment("unrolled for(i=0;i<2;i++) {tx[i] = (double)((int)(z));z = (z-tx[i])*two24A;}"); { ++ // tx[0,1,2] = v6,v7,v26 ++ vfrintrz_d(v6, v26); // v6 = (double)((int)v26) ++ div_w(jv, SCR2, i); // jv = (e0 - 3)/24 ++ fsub_d(v26, v26, v6); ++ addi_d(SP, SP, -560); ++ fmul_d(v26, v26, v24); ++ vfrintrz_d(v7, v26); // v7 = (double)((int)v26) ++ li(jx, 2); // calculate jx as nx - 1, which is initially 2. Not a part of unrolled loop ++ fsub_d(v26, v26, v7); ++ } ++ ++ block_comment("nx calculation with unrolled while(tx[nx-1]==zeroA) nx--;"); { ++ vxor_v(vt, vt, vt); ++ fcmp_cne_d(FCC0, v26, vt); // if NE then jx == 2. else it's 1 or 0 ++ addi_d(iqBase, SP, 480); // base of iq[] ++ fmul_d(v3, v26, v24); ++ bcnez(FCC0, NX_SET); ++ fcmp_cne_d(FCC0, v7, vt); // v7 == 0 => jx = 0. Else jx = 1 ++ movcf2gr(jx, FCC0); ++ } ++ bind(NX_SET); ++ generate__kernel_rem_pio2(two_over_pi, pio2); ++ // now we have y[0] = v4, y[1] = v5 and n = r2 ++ bge(X, R0, REDUCTION_DONE); ++ fneg_d(v4, v4); ++ fneg_d(v5, v5); ++ sub_w(n, R0, n); ++ } ++ bind(REDUCTION_DONE); ++ ++ pop2(S0, S1); ++} ++ ++///* ++// * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2) ++// * double x[],y[]; int e0,nx,prec; int ipio2[]; ++// * ++// * __kernel_rem_pio2 return the last three digits of N with ++// * y = x - N*pi/2 ++// * so that |y| < pi/2. ++// * ++// * The method is to compute the integer (mod 8) and fraction parts of ++// * (2/pi)*x without doing the full multiplication. In general we ++// * skip the part of the product that are known to be a huge integer ( ++// * more accurately, = 0 mod 8 ). Thus the number of operations are ++// * independent of the exponent of the input. ++// * ++// * NOTE: 2/pi int representation is converted to double ++// * // (2/pi) is represented by an array of 24-bit integers in ipio2[]. ++// * ++// * Input parameters: ++// * x[] The input value (must be positive) is broken into nx ++// * pieces of 24-bit integers in double precision format. ++// * x[i] will be the i-th 24 bit of x. The scaled exponent ++// * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 ++// * match x's up to 24 bits. ++// * ++// * Example of breaking a double positive z into x[0]+x[1]+x[2]: ++// * e0 = ilogb(z)-23 ++// * z = scalbn(z,-e0) ++// * for i = 0,1,2 ++// * x[i] = floor(z) ++// * z = (z-x[i])*2**24 ++// * ++// * ++// * y[] ouput result in an array of double precision numbers. ++// * The dimension of y[] is: ++// * 24-bit precision 1 ++// * 53-bit precision 2 ++// * 64-bit precision 2 ++// * 113-bit precision 3 ++// * The actual value is the sum of them. Thus for 113-bit ++// * precsion, one may have to do something like: ++// * ++// * long double t,w,r_head, r_tail; ++// * t = (long double)y[2] + (long double)y[1]; ++// * w = (long double)y[0]; ++// * r_head = t+w; ++// * r_tail = w - (r_head - t); ++// * ++// * e0 The exponent of x[0] ++// * ++// * nx dimension of x[] ++// * ++// * prec an interger indicating the precision: ++// * 0 24 bits (single) ++// * 1 53 bits (double) ++// * 2 64 bits (extended) ++// * 3 113 bits (quad) ++// * ++// * NOTE: ipio2[] array below is converted to double representation ++// * //ipio2[] ++// * // integer array, contains the (24*i)-th to (24*i+23)-th ++// * // bit of 2/pi after binary point. The corresponding ++// * // floating value is ++// * ++// * ipio2[i] * 2^(-24(i+1)). ++// * ++// * Here is the description of some local variables: ++// * ++// * jk jk+1 is the initial number of terms of ipio2[] needed ++// * in the computation. The recommended value is 2,3,4, ++// * 6 for single, double, extended,and quad. ++// * ++// * jz local integer variable indicating the number of ++// * terms of ipio2[] used. ++// * ++// * jx nx - 1 ++// * ++// * jv index for pointing to the suitable ipio2[] for the ++// * computation. In general, we want ++// * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 ++// * is an integer. Thus ++// * e0-3-24*jv >= 0 or (e0-3)/24 >= jv ++// * Hence jv = max(0,(e0-3)/24). ++// * ++// * jp jp+1 is the number of terms in PIo2[] needed, jp = jk. ++// * ++// * q[] double array with integral value, representing the ++// * 24-bits chunk of the product of x and 2/pi. ++// * ++// * q0 the corresponding exponent of q[0]. Note that the ++// * exponent for q[i] would be q0-24*i. ++// * ++// * PIo2[] double precision array, obtained by cutting pi/2 ++// * into 24 bits chunks. ++// * ++// * f[] ipio2[] in floating point ++// * ++// * iq[] integer array by breaking up q[] in 24-bits chunk. ++// * ++// * fq[] final product of x*(2/pi) in fq[0],..,fq[jk] ++// * ++// * ih integer. If >0 it indicates q[] is >= 0.5, hence ++// * it also indicates the *sign* of the result. ++// * ++// */ ++// ++// Use PIo2 table(see stubRoutines_loongarch64.cpp) ++// ++// BEGIN __kernel_rem_pio2 PSEUDO CODE ++// ++//static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, /* NOTE: converted to double */ const double *ipio2 // const int *ipio2) { ++// int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih; ++// double z,fw,f[20],fq[20],q[20]; ++// ++// /* initialize jk*/ ++// // jk = init_jk[prec]; // NOTE: prec==2 for double. jk is always 4. ++// jp = jk; // NOTE: always 4 ++// ++// /* determine jx,jv,q0, note that 3>q0 */ ++// jx = nx-1; ++// jv = (e0-3)/24; if(jv<0) jv=0; ++// q0 = e0-24*(jv+1); ++// ++// /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ ++// j = jv-jx; m = jx+jk; ++// ++// // NOTE: split into two for-loops: one with zeroB and one with ipio2[j]. It ++// // allows the use of wider loads/stores ++// for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; //(double) ipio2[j]; ++// ++// // NOTE: unrolled and vectorized "for". See comments in asm code ++// /* compute q[0],q[1],...q[jk] */ ++// for (i=0;i<=jk;i++) { ++// for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw; ++// } ++// ++// jz = jk; ++//recompute: ++// /* distill q[] into iq[] reversingly */ ++// for(i=0,j=jz,z=q[jz];j>0;i++,j--) { ++// fw = (double)((int)(twon24* z)); ++// iq[i] = (int)(z-two24B*fw); ++// z = q[j-1]+fw; ++// } ++// ++// /* compute n */ ++// z = scalbnA(z,q0); /* actual value of z */ ++// z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */ ++// n = (int) z; ++// z -= (double)n; ++// ih = 0; ++// if(q0>0) { /* need iq[jz-1] to determine n */ ++// i = (iq[jz-1]>>(24-q0)); n += i; ++// iq[jz-1] -= i<<(24-q0); ++// ih = iq[jz-1]>>(23-q0); ++// } ++// else if(q0==0) ih = iq[jz-1]>>23; ++// else if(z>=0.5) ih=2; ++// ++// if(ih>0) { /* q > 0.5 */ ++// n += 1; carry = 0; ++// for(i=0;i0) { /* rare case: chance is 1 in 12 */ ++// switch(q0) { ++// case 1: ++// iq[jz-1] &= 0x7fffff; break; ++// case 2: ++// iq[jz-1] &= 0x3fffff; break; ++// } ++// } ++// if(ih==2) { ++// z = one - z; ++// if(carry!=0) z -= scalbnA(one,q0); ++// } ++// } ++// ++// /* check if recomputation is needed */ ++// if(z==zeroB) { ++// j = 0; ++// for (i=jz-1;i>=jk;i--) j |= iq[i]; ++// if(j==0) { /* need recomputation */ ++// for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */ ++// ++// for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */ ++// f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i]; ++// for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; ++// q[i] = fw; ++// } ++// jz += k; ++// goto recompute; ++// } ++// } ++// ++// /* chop off zero terms */ ++// if(z==0.0) { ++// jz -= 1; q0 -= 24; ++// while(iq[jz]==0) { jz--; q0-=24;} ++// } else { /* break z into 24-bit if necessary */ ++// z = scalbnA(z,-q0); ++// if(z>=two24B) { ++// fw = (double)((int)(twon24*z)); ++// iq[jz] = (int)(z-two24B*fw); ++// jz += 1; q0 += 24; ++// iq[jz] = (int) fw; ++// } else iq[jz] = (int) z ; ++// } ++// ++// /* convert integer "bit" chunk to floating-point value */ ++// fw = scalbnA(one,q0); ++// for(i=jz;i>=0;i--) { ++// q[i] = fw*(double)iq[i]; fw*=twon24; ++// } ++// ++// /* compute PIo2[0,...,jp]*q[jz,...,0] */ ++// for(i=jz;i>=0;i--) { ++// for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k]; ++// fq[jz-i] = fw; ++// } ++// ++// // NOTE: switch below is eliminated, because prec is always 2 for doubles ++// /* compress fq[] into y[] */ ++// //switch(prec) { ++// //case 0: ++// // fw = 0.0; ++// // for (i=jz;i>=0;i--) fw += fq[i]; ++// // y[0] = (ih==0)? fw: -fw; ++// // break; ++// //case 1: ++// //case 2: ++// fw = 0.0; ++// for (i=jz;i>=0;i--) fw += fq[i]; ++// y[0] = (ih==0)? fw: -fw; ++// fw = fq[0]-fw; ++// for (i=1;i<=jz;i++) fw += fq[i]; ++// y[1] = (ih==0)? fw: -fw; ++// // break; ++// //case 3: /* painful */ ++// // for (i=jz;i>0;i--) { ++// // fw = fq[i-1]+fq[i]; ++// // fq[i] += fq[i-1]-fw; ++// // fq[i-1] = fw; ++// // } ++// // for (i=jz;i>1;i--) { ++// // fw = fq[i-1]+fq[i]; ++// // fq[i] += fq[i-1]-fw; ++// // fq[i-1] = fw; ++// // } ++// // for (fw=0.0,i=jz;i>=2;i--) fw += fq[i]; ++// // if(ih==0) { ++// // y[0] = fq[0]; y[1] = fq[1]; y[2] = fw; ++// // } else { ++// // y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw; ++// // } ++// //} ++// return n&7; ++//} ++// ++// END __kernel_rem_pio2 PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic: ++// 1. One loop is unrolled and vectorized (see comments in code) ++// 2. One loop is split into 2 loops (see comments in code) ++// 3. Non-double code is removed(last switch). Sevaral variables became ++// constants because of that (see comments in code) ++// 4. Use of jx, which is nx-1 instead of nx ++// Assumptions: ++// 1. Assume |X| >= PI/4 ++// Input and output: ++// 1. Input: X = A0, jx == nx - 1 == A6, e0 == SCR1 ++// 2. Return n in A2, y[0] == y0 == FA4, y[1] == y1 == FA5 ++// NOTE: general purpose register names match local variable names in C code ++// NOTE: fpu registers are actively reused. See comments in code about their usage ++void MacroAssembler::generate__kernel_rem_pio2(address two_over_pi, address pio2) { ++ Label Q_DONE, JX_IS_0, JX_IS_2, COMP_INNER_LOOP, RECOMP_FOR2, Q0_ZERO_CMP_LT, ++ RECOMP_CHECK_DONE_NOT_ZERO, Q0_ZERO_CMP_DONE, COMP_FOR, Q0_ZERO_CMP_EQ, ++ INIT_F_ZERO, RECOMPUTE, IH_FOR_INCREMENT, IH_FOR_STORE, RECOMP_CHECK_DONE, ++ Z_IS_LESS_THAN_TWO24B, Z_IS_ZERO, FW_Y1_NO_NEGATION, ++ RECOMP_FW_UPDATED, Z_ZERO_CHECK_DONE, FW_FOR1, IH_AFTER_SWITCH, IH_HANDLED, ++ CONVERTION_FOR, FW_Y0_NO_NEGATION, FW_FOR1_DONE, FW_FOR2, FW_FOR2_DONE, ++ IH_FOR, SKIP_F_LOAD, RECOMP_FOR1, RECOMP_FIRST_FOR, INIT_F_COPY, ++ RECOMP_FOR1_CHECK; ++ Register tmp2 = A1, n = A2, jv = A4, tmp5 = A5, jx = A6, ++ tmp3 = A7, iqBase = T0, ih = T1, i = T2, tmp1 = T3, ++ jz = S0, j = T5, twoOverPiBase = T6, tmp4 = S1, qBase = T8; ++ FloatRegister v0 = FA0, v1 = FA1, v2 = FA2, v3 = FA3, v4 = FA4, v5 = FA5, v6 = FA6, v7 = FA7, ++ vt = FT1, v17 = FT2, v18 = FT3, v19 = FT4, v20 = FT5, v21 = FT6, v22 = FT7, v24 = FT8, ++ v25 = FT9, v26 = FT10, v27 = FT11, v28 = FT12, v29 = FT13, v30 = FT14, v31 = FT15; ++ // jp = jk == init_jk[prec] = init_jk[2] == {2,3,4,6}[2] == 4 ++ // jx = nx - 1 ++ li(twoOverPiBase, two_over_pi); ++ slti(SCR2, jv, 0); ++ addi_w(tmp4, jx, 4); // tmp4 = m = jx + jk = jx + 4. jx is in {0,1,2} so m is in [4,5,6] ++ masknez(jv, jv, SCR2); ++ if (UseLASX) ++ xvxor_v(v26, v26, v26); ++ else ++ vxor_v(v26, v26, v26); ++ addi_w(tmp5, jv, 1); // jv+1 ++ sub_w(j, jv, jx); ++ addi_d(qBase, SP, 320); // base of q[] ++ mul_w(SCR2, i, tmp5); // q0 = e0-24*(jv+1) ++ sub_w(SCR1, SCR1, SCR2); ++ // use double f[20], fq[20], q[20], iq[20] on stack, which is ++ // (20 + 20 + 20) x 8 + 20 x 4 = 560 bytes. From lower to upper addresses it ++ // will contain f[20], fq[20], q[20], iq[20] ++ // now initialize f[20] indexes 0..m (inclusive) ++ // for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; // (double) ipio2[j]; ++ move(tmp5, SP); ++ ++ block_comment("for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; // (double) ipio2[j];"); { ++ xorr(i, i, i); ++ bge(j, R0, INIT_F_COPY); ++ bind(INIT_F_ZERO); ++ if (UseLASX) { ++ xvst(v26, tmp5, 0); ++ } else { ++ vst(v26, tmp5, 0); ++ vst(v26, tmp5, 16); ++ } ++ addi_d(tmp5, tmp5, 32); ++ addi_w(i, i, 4); ++ addi_w(j, j, 4); ++ blt(j, R0, INIT_F_ZERO); ++ sub_w(i, i, j); ++ move(j, R0); ++ bind(INIT_F_COPY); ++ alsl_d(tmp1, j, twoOverPiBase, 3 - 1); // ipio2[j] start address ++ if (UseLASX) { ++ xvld(v18, tmp1, 0); ++ xvld(v19, tmp1, 32); ++ } else { ++ vld(v18, tmp1, 0); ++ vld(v19, tmp1, 16); ++ vld(v20, tmp1, 32); ++ vld(v21, tmp1, 48); ++ } ++ alsl_d(tmp5, i, SP, 3 - 1); ++ if (UseLASX) { ++ xvst(v18, tmp5, 0); ++ xvst(v19, tmp5, 32); ++ } else { ++ vst(v18, tmp5, 0); ++ vst(v19, tmp5, 16); ++ vst(v20, tmp5, 32); ++ vst(v21, tmp5, 48); ++ } ++ } ++ // v18..v21 can actually contain f[0..7] ++ beqz(i, SKIP_F_LOAD); // i == 0 => f[i] == f[0] => already loaded ++ if (UseLASX) { ++ xvld(v18, SP, 0); // load f[0..7] ++ xvld(v19, SP, 32); ++ } else { ++ vld(v18, SP, 0); // load f[0..7] ++ vld(v19, SP, 16); ++ vld(v20, SP, 32); ++ vld(v21, SP, 48); ++ } ++ bind(SKIP_F_LOAD); ++ // calculate 2^q0 and 2^-q0, which we'll need further. ++ // q0 is exponent. So, calculate biased exponent(q0+1023) ++ sub_w(tmp4, R0, SCR1); ++ addi_w(tmp5, SCR1, 1023); ++ addi_w(tmp4, tmp4, 1023); ++ // Unroll following for(s) depending on jx in [0,1,2] ++ // for (i=0;i<=jk;i++) { ++ // for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw; ++ // } ++ // Unrolling for jx == 0 case: ++ // q[0] = x[0] * f[0] ++ // q[1] = x[0] * f[1] ++ // q[2] = x[0] * f[2] ++ // q[3] = x[0] * f[3] ++ // q[4] = x[0] * f[4] ++ // ++ // Vectorization for unrolled jx == 0 case: ++ // {q[0], q[1]} = {f[0], f[1]} * x[0] ++ // {q[2], q[3]} = {f[2], f[3]} * x[0] ++ // q[4] = f[4] * x[0] ++ // ++ // Unrolling for jx == 1 case: ++ // q[0] = x[0] * f[1] + x[1] * f[0] ++ // q[1] = x[0] * f[2] + x[1] * f[1] ++ // q[2] = x[0] * f[3] + x[1] * f[2] ++ // q[3] = x[0] * f[4] + x[1] * f[3] ++ // q[4] = x[0] * f[5] + x[1] * f[4] ++ // ++ // Vectorization for unrolled jx == 1 case: ++ // {q[0], q[1]} = {f[0], f[1]} * x[1] ++ // {q[2], q[3]} = {f[2], f[3]} * x[1] ++ // q[4] = f[4] * x[1] ++ // {q[0], q[1]} += {f[1], f[2]} * x[0] ++ // {q[2], q[3]} += {f[3], f[4]} * x[0] ++ // q[4] += f[5] * x[0] ++ // ++ // Unrolling for jx == 2 case: ++ // q[0] = x[0] * f[2] + x[1] * f[1] + x[2] * f[0] ++ // q[1] = x[0] * f[3] + x[1] * f[2] + x[2] * f[1] ++ // q[2] = x[0] * f[4] + x[1] * f[3] + x[2] * f[2] ++ // q[3] = x[0] * f[5] + x[1] * f[4] + x[2] * f[3] ++ // q[4] = x[0] * f[6] + x[1] * f[5] + x[2] * f[4] ++ // ++ // Vectorization for unrolled jx == 2 case: ++ // {q[0], q[1]} = {f[0], f[1]} * x[2] ++ // {q[2], q[3]} = {f[2], f[3]} * x[2] ++ // q[4] = f[4] * x[2] ++ // {q[0], q[1]} += {f[1], f[2]} * x[1] ++ // {q[2], q[3]} += {f[3], f[4]} * x[1] ++ // q[4] += f[5] * x[1] ++ // {q[0], q[1]} += {f[2], f[3]} * x[0] ++ // {q[2], q[3]} += {f[4], f[5]} * x[0] ++ // q[4] += f[6] * x[0] ++ block_comment("unrolled and vectorized computation of q[0]..q[jk]"); { ++ li(SCR2, 1); ++ slli_d(tmp5, tmp5, 52); // now it's 2^q0 double value ++ slli_d(tmp4, tmp4, 52); // now it's 2^-q0 double value ++ if (UseLASX) ++ xvpermi_d(v6, v6, 0); ++ else ++ vreplvei_d(v6, v6, 0); ++ blt(jx, SCR2, JX_IS_0); ++ addi_d(i, SP, 8); ++ if (UseLASX) { ++ xvld(v26, i, 0); // load f[1..4] ++ xvpermi_d(v3, v3, 0); ++ xvpermi_d(v7, v7, 0); ++ xvpermi_d(v20, v19, 85); ++ xvpermi_d(v21, v19, 170); ++ } else { ++ vld(v26, i, 0); // load f[1..4] ++ vld(v27, i, 16); ++ vreplvei_d(v3, v3, 0); ++ vreplvei_d(v7, v7, 0); ++ vreplvei_d(vt, v20, 1); ++ vreplvei_d(v21, v21, 0); ++ } ++ blt(SCR2, jx, JX_IS_2); ++ // jx == 1 ++ if (UseLASX) { ++ xvfmul_d(v28, v18, v7); // f[0,3] * x[1] ++ fmul_d(v30, v19, v7); // f[4] * x[1] ++ xvfmadd_d(v28, v26, v6, v28); ++ fmadd_d(v30, v6, v20, v30); // v30 += f[5] * x[0] ++ } else { ++ vfmul_d(v28, v18, v7); // f[0,1] * x[1] ++ vfmul_d(v29, v19, v7); // f[2,3] * x[1] ++ fmul_d(v30, v20, v7); // f[4] * x[1] ++ vfmadd_d(v28, v26, v6, v28); ++ vfmadd_d(v29, v27, v6, v29); ++ fmadd_d(v30, v6, vt, v30); // v30 += f[5] * x[0] ++ } ++ b(Q_DONE); ++ bind(JX_IS_2); ++ if (UseLASX) { ++ xvfmul_d(v28, v18, v3); // f[0,3] * x[2] ++ fmul_d(v30, v19, v3); // f[4] * x[2] ++ xvfmadd_d(v28, v26, v7, v28); ++ fmadd_d(v30, v7, v20, v30); // v30 += f[5] * x[1] ++ xvpermi_q(v18, v19, 3); ++ xvfmadd_d(v28, v18, v6, v28); ++ } else { ++ vfmul_d(v28, v18, v3); // f[0,1] * x[2] ++ vfmul_d(v29, v19, v3); // f[2,3] * x[2] ++ fmul_d(v30, v20, v3); // f[4] * x[2] ++ vfmadd_d(v28, v26, v7, v28); ++ vfmadd_d(v29, v27, v7, v29); ++ fmadd_d(v30, v7, vt, v30); // v30 += f[5] * x[1] ++ vfmadd_d(v28, v19, v6, v28); ++ vfmadd_d(v29, v20, v6, v29); ++ } ++ fmadd_d(v30, v6, v21, v30); // v30 += f[6] * x[0] ++ b(Q_DONE); ++ bind(JX_IS_0); ++ if (UseLASX) { ++ xvfmul_d(v28, v18, v6); // f[0,1] * x[0] ++ fmul_d(v30, v19, v6); // f[4] * x[0] ++ } else { ++ vfmul_d(v28, v18, v6); // f[0,1] * x[0] ++ vfmul_d(v29, v19, v6); // f[2,3] * x[0] ++ fmul_d(v30, v20, v6); // f[4] * x[0] ++ } ++ bind(Q_DONE); ++ if (UseLASX) { ++ xvst(v28, qBase, 0); // save calculated q[0]...q[jk] ++ } else { ++ vst(v28, qBase, 0); // save calculated q[0]...q[jk] ++ vst(v29, qBase, 16); ++ } ++ fst_d(v30, qBase, 32); ++ } ++ li(i, 0x3E70000000000000); ++ li(jz, 4); ++ movgr2fr_d(v17, i); // v17 = twon24 ++ movgr2fr_d(v30, tmp5); // 2^q0 ++ vldi(v21, -960); // 0.125 (0x3fc0000000000000) ++ vldi(v20, -992); // 8.0 (0x4020000000000000) ++ movgr2fr_d(v22, tmp4); // 2^-q0 ++ ++ block_comment("recompute loop"); { ++ bind(RECOMPUTE); ++ // for(i=0,j=jz,z=q[jz];j>0;i++,j--) { ++ // fw = (double)((int)(twon24* z)); ++ // iq[i] = (int)(z-two24A*fw); ++ // z = q[j-1]+fw; ++ // } ++ block_comment("distill q[] into iq[] reversingly"); { ++ xorr(i, i, i); ++ move(j, jz); ++ alsl_d(tmp2, jz, qBase, 3 - 1); // q[jz] address ++ fld_d(v18, tmp2, 0); // z = q[j] and moving address to q[j-1] ++ addi_d(tmp2, tmp2, -8); ++ bind(RECOMP_FIRST_FOR); ++ fld_d(v27, tmp2, 0); ++ addi_d(tmp2, tmp2, -8); ++ fmul_d(v29, v17, v18); // twon24*z ++ vfrintrz_d(v29, v29); // (double)(int) ++ fnmsub_d(v28, v24, v29, v18); // v28 = z-two24A*fw ++ ftintrz_w_d(vt, v28); // (int)(z-two24A*fw) ++ alsl_d(SCR2, i, iqBase, 2 - 1); ++ fst_s(vt, SCR2, 0); ++ fadd_d(v18, v27, v29); ++ addi_w(i, i, 1); ++ addi_w(j, j, -1); ++ blt(R0, j, RECOMP_FIRST_FOR); ++ } ++ // compute n ++ fmul_d(v18, v18, v30); ++ fmul_d(v2, v18, v21); ++ vfrintrm_d(v2, v2); // v2 = floor(v2) == rounding towards -inf ++ fnmsub_d(v18, v2, v20, v18); // z -= 8.0*floor(z*0.125); ++ li(ih, 2); ++ vfrintrz_d(v2, v18); // v2 = (double)((int)z) ++ ftintrz_w_d(vt, v18); // n = (int) z; ++ movfr2gr_s(n, vt); ++ fsub_d(v18, v18, v2); // z -= (double)n; ++ ++ block_comment("q0-dependent initialization"); { ++ blt(SCR1, R0, Q0_ZERO_CMP_LT); // if (q0 > 0) ++ addi_w(j, jz, -1); // j = jz - 1 ++ alsl_d(SCR2, j, iqBase, 2 - 1); ++ ld_w(tmp2, SCR2, 0); // tmp2 = iq[jz-1] ++ beq(SCR1, R0, Q0_ZERO_CMP_EQ); ++ li(tmp4, 24); ++ sub_w(tmp4, tmp4, SCR1); // == 24 - q0 ++ srl_w(i, tmp2, tmp4); // i = iq[jz-1] >> (24-q0) ++ sll_w(tmp5, i, tmp4); ++ sub_w(tmp2, tmp2, tmp5); // iq[jz-1] -= i<<(24-q0); ++ alsl_d(SCR2, j, iqBase, 2 - 1); ++ st_w(tmp2, SCR2, 0); // store iq[jz-1] ++ addi_w(SCR2, tmp4, -1); // == 23 - q0 ++ add_w(n, n, i); // n+=i ++ srl_w(ih, tmp2, SCR2); // ih = iq[jz-1] >> (23-q0) ++ b(Q0_ZERO_CMP_DONE); ++ bind(Q0_ZERO_CMP_EQ); ++ srli_d(ih, tmp2, 23); // ih = iq[z-1] >> 23 ++ b(Q0_ZERO_CMP_DONE); ++ bind(Q0_ZERO_CMP_LT); ++ vldi(v4, -928); // 0.5 (0x3fe0000000000000) ++ fcmp_clt_d(FCC0, v18, v4); ++ movcf2gr(SCR2, FCC0); ++ masknez(ih, ih, SCR2); // if (z<0.5) ih = 0 ++ } ++ bind(Q0_ZERO_CMP_DONE); ++ bge(R0, ih, IH_HANDLED); ++ ++ block_comment("if(ih>) {"); { ++ // use rscratch2 as carry ++ ++ block_comment("for(i=0;i0) {"); { ++ bge(R0, SCR1, IH_AFTER_SWITCH); ++ // tmp3 still has iq[jz-1] value. no need to reload ++ // now, zero high tmp3 bits (rscratch1 number of bits) ++ li(j, 0xffffffff); ++ addi_w(i, jz, -1); // set i to jz-1 ++ srl_d(j, j, SCR1); ++ srli_w(tmp1, j, 8); ++ andr(tmp3, tmp3, tmp1); // we have 24-bit-based constants ++ alsl_d(tmp1, i, iqBase, 2 - 1); ++ st_w(tmp3, tmp1, 0); // save iq[jz-1] ++ } ++ bind(IH_AFTER_SWITCH); ++ li(tmp1, 2); ++ bne(ih, tmp1, IH_HANDLED); ++ ++ block_comment("if(ih==2) {"); { ++ vldi(v25, -912); // 1.0 (0x3ff0000000000000) ++ fsub_d(v18, v25, v18); // z = one - z; ++ beqz(SCR2, IH_HANDLED); ++ fsub_d(v18, v18, v30); // z -= scalbnA(one,q0); ++ } ++ } ++ bind(IH_HANDLED); ++ // check if recomputation is needed ++ vxor_v(vt, vt, vt); ++ fcmp_cne_d(FCC0, v18, vt); ++ bcnez(FCC0, RECOMP_CHECK_DONE_NOT_ZERO); ++ ++ block_comment("if(z==zeroB) {"); { ++ ++ block_comment("for (i=jz-1;i>=jk;i--) j |= iq[i];"); { ++ addi_w(i, jz, -1); ++ xorr(j, j, j); ++ b(RECOMP_FOR1_CHECK); ++ bind(RECOMP_FOR1); ++ alsl_d(tmp1, i, iqBase, 2 - 1); ++ ld_w(tmp1, tmp1, 0); ++ orr(j, j, tmp1); ++ addi_w(i, i, -1); ++ bind(RECOMP_FOR1_CHECK); ++ li(SCR2, 4); ++ bge(i, SCR2, RECOMP_FOR1); ++ } ++ bnez(j, RECOMP_CHECK_DONE); ++ ++ block_comment("if(j==0) {"); { ++ // for(k=1;iq[jk-k]==0;k++); // let's unroll it. jk == 4. So, read ++ // iq[3], iq[2], iq[1], iq[0] until non-zero value ++ ld_d(tmp1, iqBase, 0); // iq[0..3] ++ ld_d(tmp3, iqBase, 8); ++ li(j, 2); ++ masknez(tmp1, tmp1, tmp3); // set register for further consideration ++ orr(tmp1, tmp1, tmp3); ++ masknez(j, j, tmp3); // set initial k. Use j as k ++ srli_d(SCR2, tmp1, 32); ++ sltu(SCR2, R0, SCR2); ++ addi_w(i, jz, 1); ++ add_w(j, j, SCR2); ++ ++ block_comment("for(i=jz+1;i<=jz+k;i++) {...}"); { ++ add_w(jz, i, j); // i = jz+1, j = k-1. j+i = jz+k (which is a new jz) ++ bind(RECOMP_FOR2); ++ add_w(tmp1, jv, i); ++ alsl_d(SCR2, tmp1, twoOverPiBase, 3 - 1); ++ fld_d(v29, SCR2, 0); ++ add_w(tmp2, jx, i); ++ alsl_d(SCR2, tmp2, SP, 3 - 1); ++ fst_d(v29, SCR2, 0); ++ // f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i]; ++ // since jx = 0, 1 or 2 we can unroll it: ++ // for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; ++ // f[jx+i-j] == (for first iteration) f[jx+i], which is already v29 ++ alsl_d(tmp2, tmp2, SP, 3 - 1); // address of f[jx+i] ++ fld_d(v4, tmp2, -16); // load f[jx+i-2] and f[jx+i-1] ++ fld_d(v5, tmp2, -8); ++ fmul_d(v26, v6, v29); // initial fw ++ beqz(jx, RECOMP_FW_UPDATED); ++ fmadd_d(v26, v7, v5, v26); ++ li(SCR2, 1); ++ beq(jx, SCR2, RECOMP_FW_UPDATED); ++ fmadd_d(v26, v3, v4, v26); ++ bind(RECOMP_FW_UPDATED); ++ alsl_d(SCR2, i, qBase, 3 - 1); ++ fst_d(v26, SCR2, 0); // q[i] = fw; ++ addi_w(i, i, 1); ++ bge(jz, i, RECOMP_FOR2); // jz here is "old jz" + k ++ } ++ b(RECOMPUTE); ++ } ++ } ++ } ++ bind(RECOMP_CHECK_DONE); ++ // chop off zero terms ++ vxor_v(vt, vt, vt); ++ fcmp_ceq_d(FCC0, v18, vt); ++ bcnez(FCC0, Z_IS_ZERO); ++ ++ block_comment("else block of if(z==0.0) {"); { ++ bind(RECOMP_CHECK_DONE_NOT_ZERO); ++ fmul_d(v18, v18, v22); ++ fcmp_clt_d(FCC0, v18, v24); // v24 is stil two24A ++ bcnez(FCC0, Z_IS_LESS_THAN_TWO24B); ++ fmul_d(v1, v18, v17); // twon24*z ++ vfrintrz_d(v1, v1); // v1 = (double)(int)(v1) ++ fnmsub_d(v2, v24, v1, v18); ++ ftintrz_w_d(vt, v1); // (int)fw ++ movfr2gr_s(tmp3, vt); ++ ftintrz_w_d(vt, v2); // double to int ++ movfr2gr_s(tmp2, vt); ++ alsl_d(SCR2, jz, iqBase, 2 - 1); ++ st_w(tmp2, SCR2, 0); ++ addi_w(SCR1, SCR1, 24); ++ addi_w(jz, jz, 1); ++ st_w(tmp3, SCR2, 0); // iq[jz] = (int) fw ++ b(Z_ZERO_CHECK_DONE); ++ bind(Z_IS_LESS_THAN_TWO24B); ++ ftintrz_w_d(vt, v18); // (int)z ++ movfr2gr_s(tmp3, vt); ++ alsl_d(SCR2, jz, iqBase, 2 - 1); ++ st_w(tmp3, SCR2, 0); // iq[jz] = (int) z ++ b(Z_ZERO_CHECK_DONE); ++ } ++ ++ block_comment("if(z==0.0) {"); { ++ bind(Z_IS_ZERO); ++ addi_w(jz, jz, -1); ++ alsl_d(SCR2, jz, iqBase, 2 - 1); ++ ld_w(tmp1, SCR2, 0); ++ addi_w(SCR1, SCR1, -24); ++ beqz(tmp1, Z_IS_ZERO); ++ } ++ bind(Z_ZERO_CHECK_DONE); ++ // convert integer "bit" chunk to floating-point value ++ // v17 = twon24 ++ // update v30, which was scalbnA(1.0, ); ++ addi_w(tmp2, SCR1, 1023); // biased exponent ++ slli_d(tmp2, tmp2, 52); // put at correct position ++ move(i, jz); ++ movgr2fr_d(v30, tmp2); ++ ++ block_comment("for(i=jz;i>=0;i--) {q[i] = fw*(double)iq[i]; fw*=twon24;}"); { ++ bind(CONVERTION_FOR); ++ alsl_d(SCR2, i, iqBase, 2 - 1); ++ fld_s(v31, SCR2, 0); ++ vffintl_d_w(v31, v31); ++ fmul_d(v31, v31, v30); ++ alsl_d(SCR2, i, qBase, 3 - 1); ++ fst_d(v31, SCR2, 0); ++ fmul_d(v30, v30, v17); ++ addi_w(i, i, -1); ++ bge(i, R0, CONVERTION_FOR); ++ } ++ addi_d(SCR2, SP, 160); // base for fq ++ // reusing twoOverPiBase ++ li(twoOverPiBase, pio2); ++ ++ block_comment("compute PIo2[0,...,jp]*q[jz,...,0]. for(i=jz;i>=0;i--) {...}"); { ++ move(i, jz); ++ move(tmp2, R0); // tmp2 will keep jz - i == 0 at start ++ bind(COMP_FOR); ++ // for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k]; ++ vxor_v(v30, v30, v30); ++ alsl_d(tmp5, i, qBase, 3 - 1); // address of q[i+k] for k==0 ++ li(tmp3, 4); ++ slti(tmp4, tmp2, 5); ++ alsl_d(tmp1, i, qBase, 3 - 1); // used as q[i] address ++ masknez(tmp3, tmp3, tmp4); // min(jz - i, jp); ++ maskeqz(tmp4, tmp2, tmp4); ++ orr(tmp3, tmp3, tmp4); ++ move(tmp4, R0); // used as k ++ ++ block_comment("for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];"); { ++ bind(COMP_INNER_LOOP); ++ alsl_d(tmp5, tmp4, tmp1, 3 - 1); ++ fld_d(v18, tmp5, 0); // q[i+k] ++ alsl_d(tmp5, tmp4, twoOverPiBase, 3 - 1); ++ fld_d(v19, tmp5, 0); // PIo2[k] ++ fmadd_d(v30, v18, v19, v30); // fw += PIo2[k]*q[i+k]; ++ addi_w(tmp4, tmp4, 1); // k++ ++ bge(tmp3, tmp4, COMP_INNER_LOOP); ++ } ++ alsl_d(tmp5, tmp2, SCR2, 3 - 1); ++ fst_d(v30, tmp5, 0); // fq[jz-i] ++ addi_d(tmp2, tmp2, 1); ++ addi_w(i, i, -1); ++ bge(i, R0, COMP_FOR); ++ } ++ ++ block_comment("switch(prec) {...}. case 2:"); { ++ // compress fq into y[] ++ // remember prec == 2 ++ ++ block_comment("for (i=jz;i>=0;i--) fw += fq[i];"); { ++ vxor_v(v4, v4, v4); ++ move(i, jz); ++ bind(FW_FOR1); ++ alsl_d(tmp5, i, SCR2, 3 - 1); ++ fld_d(v1, tmp5, 0); ++ addi_w(i, i, -1); ++ fadd_d(v4, v4, v1); ++ bge(i, R0, FW_FOR1); ++ } ++ bind(FW_FOR1_DONE); ++ // v1 contains fq[0]. so, keep it so far ++ fsub_d(v5, v1, v4); // fw = fq[0] - fw ++ beqz(ih, FW_Y0_NO_NEGATION); ++ fneg_d(v4, v4); ++ bind(FW_Y0_NO_NEGATION); ++ ++ block_comment("for (i=1;i<=jz;i++) fw += fq[i];"); { ++ li(i, 1); ++ blt(jz, i, FW_FOR2_DONE); ++ bind(FW_FOR2); ++ alsl_d(tmp5, i, SCR2, 3 - 1); ++ fld_d(v1, tmp5, 0); ++ addi_w(i, i, 1); ++ fadd_d(v5, v5, v1); ++ bge(jz, i, FW_FOR2); ++ } ++ bind(FW_FOR2_DONE); ++ beqz(ih, FW_Y1_NO_NEGATION); ++ fneg_d(v5, v5); ++ bind(FW_Y1_NO_NEGATION); ++ addi_d(SP, SP, 560); ++ } ++} ++ ++///* __kernel_sin( x, y, iy) ++// * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854 ++// * Input x is assumed to be bounded by ~pi/4 in magnitude. ++// * Input y is the tail of x. ++// * Input iy indicates whether y is 0. (if iy=0, y assume to be 0). ++// * ++// * Algorithm ++// * 1. Since sin(-x) = -sin(x), we need only to consider positive x. ++// * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0. ++// * 3. sin(x) is approximated by a polynomial of degree 13 on ++// * [0,pi/4] ++// * 3 13 ++// * sin(x) ~ x + S1*x + ... + S6*x ++// * where ++// * ++// * |sin(x) 2 4 6 8 10 12 | -58 ++// * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 ++// * | x | ++// * ++// * 4. sin(x+y) = sin(x) + sin'(x')*y ++// * ~ sin(x) + (1-x*x/2)*y ++// * For better accuracy, let ++// * 3 2 2 2 2 ++// * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) ++// * then 3 2 ++// * sin(x) = x + (S1*x + (x *(r-y/2)+y)) ++// */ ++//static const double ++//S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ ++//S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ ++//S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */ ++//S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */ ++//S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */ ++//S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ ++// ++// NOTE: S1..S6 were moved into a table: StubRoutines::la::_dsin_coef ++// ++// BEGIN __kernel_sin PSEUDO CODE ++// ++//static double __kernel_sin(double x, double y, bool iy) ++//{ ++// double z,r,v; ++// ++// // NOTE: not needed. moved to dsin/dcos ++// //int ix; ++// //ix = high(x)&0x7fffffff; /* high word of x */ ++// ++// // NOTE: moved to dsin/dcos ++// //if(ix<0x3e400000) /* |x| < 2**-27 */ ++// // {if((int)x==0) return x;} /* generate inexact */ ++// ++// z = x*x; ++// v = z*x; ++// r = S2+z*(S3+z*(S4+z*(S5+z*S6))); ++// if(iy==0) return x+v*(S1+z*r); ++// else return x-((z*(half*y-v*r)-y)-v*S1); ++//} ++// ++// END __kernel_sin PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic: ++// 1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos ++// 2. Constants are now loaded from table dsin_coef ++// 3. C code parameter "int iy" was modified to "bool iyIsOne", because ++// iy is always 0 or 1. Also, iyIsOne branch was moved into ++// generation phase instead of taking it during code execution ++// Input ans output: ++// 1. Input for generated function: X argument = x ++// 2. Input for generator: x = register to read argument from, iyIsOne ++// = flag to use low argument low part or not, dsin_coef = coefficients ++// table address ++// 3. Return sin(x) value in FA0 ++void MacroAssembler::generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef) { ++ FloatRegister y = FA5, z = FA6, v = FA7, r = FT0, s1 = FT1, s2 = FT2, ++ s3 = FT3, s4 = FT4, s5 = FT5, s6 = FT6, half = FT7; ++ li(SCR2, dsin_coef); ++ fld_d(s5, SCR2, 32); ++ fld_d(s6, SCR2, 40); ++ fmul_d(z, x, x); // z = x*x; ++ fld_d(s1, SCR2, 0); ++ fld_d(s2, SCR2, 8); ++ fld_d(s3, SCR2, 16); ++ fld_d(s4, SCR2, 24); ++ fmul_d(v, z, x); // v = z*x; ++ ++ block_comment("calculate r = S2+z*(S3+z*(S4+z*(S5+z*S6)))"); { ++ fmadd_d(r, z, s6, s5); ++ // initialize "half" in current block to utilize 2nd FPU. However, it's ++ // not a part of this block ++ vldi(half, -928); // 0.5 (0x3fe0000000000000) ++ fmadd_d(r, z, r, s4); ++ fmadd_d(r, z, r, s3); ++ fmadd_d(r, z, r, s2); ++ } ++ ++ if (!iyIsOne) { ++ // return x+v*(S1+z*r); ++ fmadd_d(s1, z, r, s1); ++ fmadd_d(FA0, v, s1, x); ++ } else { ++ // return x-((z*(half*y-v*r)-y)-v*S1); ++ fmul_d(s6, half, y); // half*y ++ fnmsub_d(s6, v, r, s6); // half*y-v*r ++ fnmsub_d(s6, z, s6, y); // y - z*(half*y-v*r) = - (z*(half*y-v*r)-y) ++ fmadd_d(s6, v, s1, s6); // - (z*(half*y-v*r)-y) + v*S1 == -((z*(half*y-v*r)-y)-v*S1) ++ fadd_d(FA0, x, s6); ++ } ++} ++ ++///* ++// * __kernel_cos( x, y ) ++// * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 ++// * Input x is assumed to be bounded by ~pi/4 in magnitude. ++// * Input y is the tail of x. ++// * ++// * Algorithm ++// * 1. Since cos(-x) = cos(x), we need only to consider positive x. ++// * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. ++// * 3. cos(x) is approximated by a polynomial of degree 14 on ++// * [0,pi/4] ++// * 4 14 ++// * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x ++// * where the remez error is ++// * ++// * | 2 4 6 8 10 12 14 | -58 ++// * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 ++// * | | ++// * ++// * 4 6 8 10 12 14 ++// * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then ++// * cos(x) = 1 - x*x/2 + r ++// * since cos(x+y) ~ cos(x) - sin(x)*y ++// * ~ cos(x) - x*y, ++// * a correction term is necessary in cos(x) and hence ++// * cos(x+y) = 1 - (x*x/2 - (r - x*y)) ++// * For better accuracy when x > 0.3, let qx = |x|/4 with ++// * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125. ++// * Then ++// * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)). ++// * Note that 1-qx and (x*x/2-qx) is EXACT here, and the ++// * magnitude of the latter is at least a quarter of x*x/2, ++// * thus, reducing the rounding error in the subtraction. ++// */ ++// ++//static const double ++//C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */ ++//C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */ ++//C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */ ++//C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */ ++//C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */ ++//C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ ++// ++// NOTE: C1..C6 were moved into a table: StubRoutines::la::_dcos_coef ++// ++// BEGIN __kernel_cos PSEUDO CODE ++// ++//static double __kernel_cos(double x, double y) ++//{ ++// double a,h,z,r,qx=0; ++// ++// // NOTE: ix is already initialized in dsin/dcos. Reuse value from register ++// //int ix; ++// //ix = high(x)&0x7fffffff; /* ix = |x|'s high word*/ ++// ++// // NOTE: moved to dsin/dcos ++// //if(ix<0x3e400000) { /* if x < 2**27 */ ++// // if(((int)x)==0) return one; /* generate inexact */ ++// //} ++// ++// z = x*x; ++// r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6))))); ++// if(ix < 0x3FD33333) /* if |x| < 0.3 */ ++// return one - (0.5*z - (z*r - x*y)); ++// else { ++// if(ix > 0x3fe90000) { /* x > 0.78125 */ ++// qx = 0.28125; ++// } else { ++// set_high(&qx, ix-0x00200000); /* x/4 */ ++// set_low(&qx, 0); ++// } ++// h = 0.5*z-qx; ++// a = one-qx; ++// return a - (h - (z*r-x*y)); ++// } ++//} ++// ++// END __kernel_cos PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic: ++// 1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos ++// 2. Constants are now loaded from table dcos_coef ++// Input and output: ++// 1. Input for generated function: X argument = x ++// 2. Input for generator: x = register to read argument from, dcos_coef ++// = coefficients table address ++// 3. Return cos(x) value in FA0 ++void MacroAssembler::generate_kernel_cos(FloatRegister x, address dcos_coef) { ++ Register ix = A3; ++ FloatRegister qx = FA1, h = FA2, a = FA3, y = FA5, z = FA6, r = FA7, C1 = FT0, ++ C2 = FT1, C3 = FT2, C4 = FT3, C5 = FT4, C6 = FT5, one = FT6, half = FT7; ++ Label IX_IS_LARGE, SET_QX_CONST, DONE, QX_SET; ++ li(SCR2, dcos_coef); ++ fld_d(C1, SCR2, 0); ++ fld_d(C2, SCR2, 8); ++ fld_d(C3, SCR2, 16); ++ fld_d(C4, SCR2, 24); ++ fld_d(C5, SCR2, 32); ++ fld_d(C6, SCR2, 40); ++ fmul_d(z, x, x); // z=x^2 ++ block_comment("calculate r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))))"); { ++ fmadd_d(r, z, C6, C5); ++ vldi(half, -928); // 0.5 (0x3fe0000000000000) ++ fmadd_d(r, z, r, C4); ++ fmul_d(y, x, y); ++ fmadd_d(r, z, r, C3); ++ li(SCR1, 0x3FD33333); ++ fmadd_d(r, z, r, C2); ++ fmul_d(x, z, z); // x = z^2 ++ fmadd_d(r, z, r, C1); // r = C1+z(C2+z(C4+z(C5+z*C6))) ++ } ++ // need to multiply r by z to have "final" r value ++ vldi(one, -912); // 1.0 (0x3ff0000000000000) ++ bge(ix, SCR1, IX_IS_LARGE); ++ block_comment("if(ix < 0x3FD33333) return one - (0.5*z - (z*r - x*y))"); { ++ // return 1.0 - (0.5*z - (z*r - x*y)) = 1.0 - (0.5*z + (x*y - z*r)) ++ fnmsub_d(FA0, x, r, y); ++ fmadd_d(FA0, half, z, FA0); ++ fsub_d(FA0, one, FA0); ++ b(DONE); ++ } ++ block_comment("if(ix >= 0x3FD33333)"); { ++ bind(IX_IS_LARGE); ++ li(SCR2, 0x3FE90000); ++ blt(SCR2, ix, SET_QX_CONST); ++ block_comment("set_high(&qx, ix-0x00200000); set_low(&qx, 0);"); { ++ li(SCR2, 0x00200000); ++ sub_w(SCR2, ix, SCR2); ++ slli_d(SCR2, SCR2, 32); ++ movgr2fr_d(qx, SCR2); ++ } ++ b(QX_SET); ++ bind(SET_QX_CONST); ++ block_comment("if(ix > 0x3fe90000) qx = 0.28125;"); { ++ vldi(qx, -942); // 0.28125 (0x3fd2000000000000) ++ } ++ bind(QX_SET); ++ fmsub_d(C6, x, r, y); // z*r - xy ++ fmsub_d(h, half, z, qx); // h = 0.5*z - qx ++ fsub_d(a, one, qx); // a = 1-qx ++ fsub_d(C6, h, C6); // = h - (z*r - x*y) ++ fsub_d(FA0, a, C6); ++ } ++ bind(DONE); ++} ++ ++// generate_dsin_dcos creates stub for dsin and dcos ++// Generation is done via single call because dsin and dcos code is almost the ++// same(see C code below). These functions work as follows: ++// 1) handle corner cases: |x| ~< pi/4, x is NaN or INF, |x| < 2**-27 ++// 2) perform argument reduction if required ++// 3) call kernel_sin or kernel_cos which approximate sin/cos via polynomial ++// ++// BEGIN dsin/dcos PSEUDO CODE ++// ++//dsin_dcos(jdouble x, bool isCos) { ++// double y[2],z=0.0; ++// int n, ix; ++// ++// /* High word of x. */ ++// ix = high(x); ++// ++// /* |x| ~< pi/4 */ ++// ix &= 0x7fffffff; ++// if(ix <= 0x3fe921fb) return isCos ? __kernel_cos : __kernel_sin(x,z,0); ++// ++// /* sin/cos(Inf or NaN) is NaN */ ++// else if (ix>=0x7ff00000) return x-x; ++// else if (ix<0x3e400000) { /* if ix < 2**27 */ ++// if(((int)x)==0) return isCos ? one : x; /* generate inexact */ ++// } ++// /* argument reduction needed */ ++// else { ++// n = __ieee754_rem_pio2(x,y); ++// switch(n&3) { ++// case 0: return isCos ? __kernel_cos(y[0],y[1]) : __kernel_sin(y[0],y[1], true); ++// case 1: return isCos ? -__kernel_sin(y[0],y[1],true) : __kernel_cos(y[0],y[1]); ++// case 2: return isCos ? -__kernel_cos(y[0],y[1]) : -__kernel_sin(y[0],y[1], true); ++// default: ++// return isCos ? __kernel_sin(y[0],y[1],1) : -__kernel_cos(y[0],y[1]); ++// } ++// } ++//} ++// END dsin/dcos PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic: ++// 1. Moved ix < 2**27 from kernel_sin/kernel_cos into dsin/dcos ++// 2. Final switch use equivalent bit checks(tbz/tbnz) ++// Input ans output: ++// 1. Input for generated function: X = A0 ++// 2. Input for generator: isCos = generate sin or cos, npio2_hw = address ++// of npio2_hw table, two_over_pi = address of two_over_pi table, ++// pio2 = address if pio2 table, dsin_coef = address if dsin_coef table, ++// dcos_coef = address of dcos_coef table ++// 3. Return result in FA0 ++// NOTE: general purpose register names match local variable names in C code ++void MacroAssembler::generate_dsin_dcos(bool isCos, address npio2_hw, ++ address two_over_pi, address pio2, ++ address dsin_coef, address dcos_coef) { ++ Label DONE, ARG_REDUCTION, TINY_X, RETURN_SIN, EARLY_CASE; ++ Register X = A0, absX = A1, n = A2, ix = A3; ++ FloatRegister y0 = FA4, y1 = FA5; ++ ++ block_comment("check |x| ~< pi/4, NaN, Inf and |x| < 2**-27 cases"); { ++ movfr2gr_d(X, FA0); ++ li(SCR2, 0x3e400000); ++ li(SCR1, 0x3fe921fb); // high word of pi/4. ++ bstrpick_d(absX, X, 62, 0); // absX ++ li(T0, 0x7ff0000000000000); ++ srli_d(ix, absX, 32); // set ix ++ blt(ix, SCR2, TINY_X); // handle tiny x (|x| < 2^-27) ++ bge(SCR1, ix, EARLY_CASE); // if(ix <= 0x3fe921fb) return ++ blt(absX, T0, ARG_REDUCTION); ++ // X is NaN or INF(i.e. 0x7FF* or 0xFFF*). Return NaN (mantissa != 0). ++ // Set last bit unconditionally to make it NaN ++ ori(T0, T0, 1); ++ movgr2fr_d(FA0, T0); ++ jr(RA); ++ } ++ block_comment("kernel_sin/kernel_cos: if(ix<0x3e400000) {}"); { ++ bind(TINY_X); ++ if (isCos) { ++ vldi(FA0, -912); // 1.0 (0x3ff0000000000000) ++ } ++ jr(RA); ++ } ++ bind(ARG_REDUCTION); /* argument reduction needed */ ++ block_comment("n = __ieee754_rem_pio2(x,y);"); { ++ generate__ieee754_rem_pio2(npio2_hw, two_over_pi, pio2); ++ } ++ block_comment("switch(n&3) {case ... }"); { ++ if (isCos) { ++ srli_w(T0, n, 1); ++ xorr(absX, n, T0); ++ andi(T0, n, 1); ++ bnez(T0, RETURN_SIN); ++ } else { ++ andi(T0, n, 1); ++ beqz(T0, RETURN_SIN); ++ } ++ generate_kernel_cos(y0, dcos_coef); ++ if (isCos) { ++ andi(T0, absX, 1); ++ beqz(T0, DONE); ++ } else { ++ andi(T0, n, 2); ++ beqz(T0, DONE); ++ } ++ fneg_d(FA0, FA0); ++ jr(RA); ++ bind(RETURN_SIN); ++ generate_kernel_sin(y0, true, dsin_coef); ++ if (isCos) { ++ andi(T0, absX, 1); ++ beqz(T0, DONE); ++ } else { ++ andi(T0, n, 2); ++ beqz(T0, DONE); ++ } ++ fneg_d(FA0, FA0); ++ jr(RA); ++ } ++ bind(EARLY_CASE); ++ vxor_v(y1, y1, y1); ++ if (isCos) { ++ generate_kernel_cos(FA0, dcos_coef); ++ } else { ++ generate_kernel_sin(FA0, false, dsin_coef); ++ } ++ bind(DONE); ++ jr(RA); ++} +diff --git a/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp b/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp +new file mode 100644 +index 0000000000..e517dcd415 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp +@@ -0,0 +1,564 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "classfile/javaClasses.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "utilities/preserveException.hpp" ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T8 RT8 ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) // nothing ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#define STOP(error) block_comment(error); __ stop(error) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { ++ if (VerifyMethodHandles) ++ verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), ++ "MH argument is a Class"); ++ __ ld_d(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); ++} ++ ++#ifdef ASSERT ++static int check_nonzero(const char* xname, int x) { ++ assert(x != 0, "%s should be nonzero", xname); ++ return x; ++} ++#define NONZERO(x) check_nonzero(#x, x) ++#else //ASSERT ++#define NONZERO(x) (x) ++#endif //ASSERT ++ ++#ifdef ASSERT ++void MethodHandles::verify_klass(MacroAssembler* _masm, ++ Register obj, SystemDictionary::WKID klass_id, ++ const char* error_message) { ++} ++ ++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { ++ Label L; ++ BLOCK_COMMENT("verify_ref_kind {"); ++ __ ld_w(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes()))); ++ __ srai_w(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT); ++ __ li(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK); ++ __ andr(temp, temp, AT); ++ __ li(AT, ref_kind); ++ __ beq(temp, AT, L); ++ { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal); ++ jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind); ++ if (ref_kind == JVM_REF_invokeVirtual || ++ ref_kind == JVM_REF_invokeSpecial) ++ // could do this for all ref_kinds, but would explode assembly code size ++ trace_method_handle(_masm, buf); ++ __ STOP(buf); ++ } ++ BLOCK_COMMENT("} verify_ref_kind"); ++ __ bind(L); ++} ++ ++#endif //ASSERT ++ ++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry) { ++ assert(method == Rmethod, "interpreter calling convention"); ++ ++ Label L_no_such_method; ++ __ beq(method, R0, L_no_such_method); ++ ++ __ verify_method_ptr(method); ++ ++ if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++ Register rthread = TREG; ++ // interp_only is an int, on little endian it is sufficient to test the byte only ++ // Is a cmpl faster? ++ __ ld_bu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset())); ++ __ beq(AT, R0, run_compiled_code); ++ __ ld_d(T4, method, in_bytes(Method::interpreter_entry_offset())); ++ __ jr(T4); ++ __ BIND(run_compiled_code); ++ } ++ ++ const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : ++ Method::from_interpreted_offset(); ++ __ ld_d(T4, method, in_bytes(entry_offset)); ++ __ jr(T4); ++ ++ __ bind(L_no_such_method); ++ address wrong_method = StubRoutines::throw_AbstractMethodError_entry(); ++ __ jmp(wrong_method, relocInfo::runtime_call_type); ++} ++ ++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry) { ++ BLOCK_COMMENT("jump_to_lambda_form {"); ++ // This is the initial entry point of a lazy method handle. ++ // After type checking, it picks up the invoker from the LambdaForm. ++ assert_different_registers(recv, method_temp, temp2); ++ assert(recv != noreg, "required register"); ++ assert(method_temp == Rmethod, "required register for loading method"); ++ ++ //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); }); ++ ++ // Load the invoker, as MH -> MH.form -> LF.vmentry ++ __ verify_oop(recv); ++ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()))); ++ __ verify_oop(method_temp); ++ __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg); ++ ++ if (VerifyMethodHandles && !for_compiler_entry) { ++ // make sure recv is already on stack ++ __ ld_d(temp2, Address(method_temp, Method::const_offset())); ++ __ load_sized_value(temp2, ++ Address(temp2, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), false); ++ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); ++ Label L; ++ Address recv_addr = __ argument_address(temp2, -1); ++ __ ld_d(AT, recv_addr); ++ __ beq(recv, AT, L); ++ ++ recv_addr = __ argument_address(temp2, -1); ++ __ ld_d(V0, recv_addr); ++ __ STOP("receiver not on stack"); ++ __ BIND(L); ++ } ++ ++ jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); ++ BLOCK_COMMENT("} jump_to_lambda_form"); ++} ++ ++ ++// Code generation ++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, ++ vmIntrinsics::ID iid) { ++ const bool not_for_compiler_entry = false; // this is the interpreter entry ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ if (iid == vmIntrinsics::_invokeGeneric || ++ iid == vmIntrinsics::_compiledLambdaForm) { ++ // Perhaps surprisingly, the symbolic references visible to Java are not directly used. ++ // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. ++ // They all allow an appendix argument. ++ __ stop("empty stubs make SG sick"); ++ return NULL; ++ } ++ ++ // Rmethod: Method* ++ // T4: argument locator (parameter slot count, added to sp) ++ // S7: used as temp to hold mh or receiver ++ Register t4_argp = T4; // argument list ptr, live on error paths ++ Register s7_mh = S7; // MH receiver; dies quickly and is recycled ++ Register rm_method = Rmethod; // eventual target of this invocation ++ ++ // here's where control starts out: ++ __ align(CodeEntryAlignment); ++ address entry_point = __ pc(); ++ ++ if (VerifyMethodHandles) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++ ++ Label L; ++ BLOCK_COMMENT("verify_intrinsic_id {"); ++ __ ld_hu(AT, rm_method, Method::intrinsic_id_offset_in_bytes()); ++ guarantee(Assembler::is_simm(iid, 12), "Oops, iid is not simm12! Change the instructions."); ++ __ addi_d(AT, AT, -1 * (int) iid); ++ __ beq(AT, R0, L); ++ if (iid == vmIntrinsics::_linkToVirtual || ++ iid == vmIntrinsics::_linkToSpecial) { ++ // could do this for all kinds, but would explode assembly code size ++ trace_method_handle(_masm, "bad Method*::intrinsic_id"); ++ } ++ __ STOP("bad Method*::intrinsic_id"); ++ __ bind(L); ++ BLOCK_COMMENT("} verify_intrinsic_id"); ++ } ++ ++ // First task: Find out how big the argument list is. ++ Address t4_first_arg_addr; ++ int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); ++ assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); ++ if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ __ ld_d(t4_argp, Address(rm_method, Method::const_offset())); ++ __ load_sized_value(t4_argp, ++ Address(t4_argp, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), false); ++ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); ++ t4_first_arg_addr = __ argument_address(t4_argp, -1); ++ } else { ++ DEBUG_ONLY(t4_argp = noreg); ++ } ++ ++ if (!is_signature_polymorphic_static(iid)) { ++ __ ld_d(s7_mh, t4_first_arg_addr); ++ DEBUG_ONLY(t4_argp = noreg); ++ } ++ ++ // t4_first_arg_addr is live! ++ ++ trace_method_handle_interpreter_entry(_masm, iid); ++ ++ if (iid == vmIntrinsics::_invokeBasic) { ++ generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry); ++ ++ } else { ++ // Adjust argument list by popping the trailing MemberName argument. ++ Register r_recv = noreg; ++ if (MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. ++ __ ld_d(r_recv = T2, t4_first_arg_addr); ++ } ++ DEBUG_ONLY(t4_argp = noreg); ++ Register rm_member = rm_method; // MemberName ptr; incoming method ptr is dead now ++ __ pop(rm_member); // extract last argument ++ generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry); ++ } ++ ++ return entry_point; ++} ++ ++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, ++ vmIntrinsics::ID iid, ++ Register receiver_reg, ++ Register member_reg, ++ bool for_compiler_entry) { ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ Register rm_method = Rmethod; // eventual target of this invocation ++ // temps used in this code are not used in *either* compiled or interpreted calling sequences ++ Register j_rarg0 = T0; ++ Register j_rarg1 = A0; ++ Register j_rarg2 = A1; ++ Register j_rarg3 = A2; ++ Register j_rarg4 = A3; ++ Register j_rarg5 = A4; ++ ++ Register temp1 = T8; ++ Register temp2 = T4; ++ Register temp3 = T5; ++ if (for_compiler_entry) { ++ assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); ++ assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ } ++ else { ++ assert_different_registers(temp1, temp2, temp3, saved_last_sp_register()); // don't trash lastSP ++ } ++ assert_different_registers(temp1, temp2, temp3, receiver_reg); ++ assert_different_registers(temp1, temp2, temp3, member_reg); ++ ++ if (iid == vmIntrinsics::_invokeBasic) { ++ // indirect through MH.form.vmentry.vmtarget ++ jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry); ++ ++ } else { ++ // The method is a member invoker used by direct method handles. ++ if (VerifyMethodHandles) { ++ // make sure the trailing argument really is a MemberName (caller responsibility) ++ verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), ++ "MemberName required for invokeVirtual etc."); ++ } ++ ++ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); ++ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); ++ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())); ++ Address vmtarget_method( rm_method, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())); ++ ++ Register temp1_recv_klass = temp1; ++ if (iid != vmIntrinsics::_linkToStatic) { ++ __ verify_oop(receiver_reg); ++ if (iid == vmIntrinsics::_linkToSpecial) { ++ // Don't actually load the klass; just null-check the receiver. ++ __ null_check(receiver_reg); ++ } else { ++ // load receiver klass itself ++ __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ BLOCK_COMMENT("check_receiver {"); ++ // The receiver for the MemberName must be in receiver_reg. ++ // Check the receiver against the MemberName.clazz ++ if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { ++ // Did not load it above... ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { ++ Label L_ok; ++ Register temp2_defc = temp2; ++ __ load_heap_oop(temp2_defc, member_clazz, temp3); ++ load_klass_from_Class(_masm, temp2_defc); ++ __ verify_klass_ptr(temp2_defc); ++ __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); ++ // If we get here, the type check failed! ++ __ STOP("receiver class disagrees with MemberName.clazz"); ++ __ bind(L_ok); ++ } ++ BLOCK_COMMENT("} check_receiver"); ++ } ++ if (iid == vmIntrinsics::_linkToSpecial || ++ iid == vmIntrinsics::_linkToStatic) { ++ DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass ++ } ++ ++ // Live registers at this point: ++ // member_reg - MemberName that was the trailing argument ++ // temp1_recv_klass - klass of stacked receiver, if needed ++ ++ Label L_incompatible_class_change_error; ++ switch (iid) { ++ case vmIntrinsics::_linkToSpecial: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); ++ } ++ __ load_heap_oop(rm_method, member_vmtarget); ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg); ++ break; ++ ++ case vmIntrinsics::_linkToStatic: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); ++ } ++ __ load_heap_oop(rm_method, member_vmtarget); ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg); ++ break; ++ ++ case vmIntrinsics::_linkToVirtual: ++ { ++ // same as TemplateTable::invokevirtual, ++ // minus the CP setup and profiling: ++ ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); ++ } ++ ++ // pick out the vtable index from the MemberName, and then we can discard it: ++ Register temp2_index = temp2; ++ __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg); ++ if (VerifyMethodHandles) { ++ Label L_index_ok; ++ __ blt(R0, temp2_index, L_index_ok); ++ __ STOP("no virtual index"); ++ __ BIND(L_index_ok); ++ } ++ ++ // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget ++ // at this point. And VerifyMethodHandles has already checked clazz, if needed. ++ ++ // get target Method* & entry point ++ __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method); ++ break; ++ } ++ ++ case vmIntrinsics::_linkToInterface: ++ { ++ // same as TemplateTable::invokeinterface ++ // (minus the CP setup and profiling, with different argument motion) ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); ++ } ++ ++ Register temp3_intf = temp3; ++ __ load_heap_oop(temp3_intf, member_clazz); ++ load_klass_from_Class(_masm, temp3_intf); ++ __ verify_klass_ptr(temp3_intf); ++ ++ Register rm_index = rm_method; ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_index, member_vmindex, noreg, noreg); ++ if (VerifyMethodHandles) { ++ Label L; ++ __ bge(rm_index, R0, L); ++ __ STOP("invalid vtable index for MH.invokeInterface"); ++ __ bind(L); ++ } ++ ++ // given intf, index, and recv klass, dispatch to the implementation method ++ __ lookup_interface_method(temp1_recv_klass, temp3_intf, ++ // note: next two args must be the same: ++ rm_index, rm_method, ++ temp2, ++ L_incompatible_class_change_error); ++ break; ++ } ++ ++ default: ++ fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); ++ break; ++ } ++ ++ // Live at this point: ++ // rm_method ++ ++ // After figuring out which concrete method to call, jump into it. ++ // Note that this works in the interpreter with no data motion. ++ // But the compiled version will require that r_recv be shifted out. ++ __ verify_method_ptr(rm_method); ++ jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry); ++ ++ if (iid == vmIntrinsics::_linkToInterface) { ++ __ bind(L_incompatible_class_change_error); ++ address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry(); ++ __ jmp(icce_entry, relocInfo::runtime_call_type); ++ } ++ } ++} ++ ++#ifndef PRODUCT ++void trace_method_handle_stub(const char* adaptername, ++ oop mh, ++ intptr_t* saved_regs, ++ intptr_t* entry_sp) { ++ // called as a leaf from native code: do not block the JVM! ++ bool has_mh = (strstr(adaptername, "/static") == NULL && ++ strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH ++ const char* mh_reg_name = has_mh ? "s7_mh" : "s7"; ++ tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT, ++ adaptername, mh_reg_name, ++ p2i(mh), p2i(entry_sp)); ++ ++ if (Verbose) { ++ tty->print_cr("Registers:"); ++ const int saved_regs_count = RegisterImpl::number_of_registers; ++ for (int i = 0; i < saved_regs_count; i++) { ++ Register r = as_Register(i); ++ // The registers are stored in reverse order on the stack (by pusha). ++ tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]); ++ if ((i + 1) % 4 == 0) { ++ tty->cr(); ++ } else { ++ tty->print(", "); ++ } ++ } ++ tty->cr(); ++ ++ { ++ // dumping last frame with frame::describe ++ ++ JavaThread* p = JavaThread::active(); ++ ++ ResourceMark rm; ++ PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here ++ FrameValues values; ++ ++ // Note: We want to allow trace_method_handle from any call site. ++ // While trace_method_handle creates a frame, it may be entered ++ // without a PC on the stack top (e.g. not just after a call). ++ // Walking that frame could lead to failures due to that invalid PC. ++ // => carefully detect that frame when doing the stack walking ++ ++ // Current C frame ++ frame cur_frame = os::current_frame(); ++ ++ // Robust search of trace_calling_frame (independant of inlining). ++ // Assumes saved_regs comes from a pusha in the trace_calling_frame. ++ assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?"); ++ frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame); ++ while (trace_calling_frame.fp() < saved_regs) { ++ trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame); ++ } ++ ++ // safely create a frame and call frame::describe ++ intptr_t *dump_sp = trace_calling_frame.sender_sp(); ++ intptr_t *dump_fp = trace_calling_frame.link(); ++ ++ bool walkable = has_mh; // whether the traced frame shoud be walkable ++ ++ if (walkable) { ++ // The previous definition of walkable may have to be refined ++ // if new call sites cause the next frame constructor to start ++ // failing. Alternatively, frame constructors could be ++ // modified to support the current or future non walkable ++ // frames (but this is more intrusive and is not considered as ++ // part of this RFE, which will instead use a simpler output). ++ frame dump_frame = frame(dump_sp, dump_fp); ++ dump_frame.describe(values, 1); ++ } else { ++ // Stack may not be walkable (invalid PC above FP): ++ // Add descriptions without building a Java frame to avoid issues ++ values.describe(-1, dump_fp, "fp for #1 "); ++ values.describe(-1, dump_sp, "sp for #1"); ++ } ++ values.describe(-1, entry_sp, "raw top of stack"); ++ ++ tty->print_cr("Stack layout:"); ++ values.print(p); ++ } ++ if (has_mh && oopDesc::is_oop(mh)) { ++ mh->print(); ++ if (java_lang_invoke_MethodHandle::is_instance(mh)) { ++ if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0) ++ java_lang_invoke_MethodHandle::form(mh)->print(); ++ } ++ } ++ } ++} ++ ++// The stub wraps the arguments in a struct on the stack to avoid ++// dealing with the different calling conventions for passing 6 ++// arguments. ++struct MethodHandleStubArguments { ++ const char* adaptername; ++ oopDesc* mh; ++ intptr_t* saved_regs; ++ intptr_t* entry_sp; ++}; ++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { ++ trace_method_handle_stub(args->adaptername, ++ args->mh, ++ args->saved_regs, ++ args->entry_sp); ++} ++ ++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { ++} ++#endif //PRODUCT +diff --git a/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp b/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp +new file mode 100644 +index 0000000000..f84337424b +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp +@@ -0,0 +1,62 @@ ++/* ++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// Platform-specific definitions for method handles. ++// These definitions are inlined into class MethodHandles. ++ ++// Adapters ++enum /* platform_dependent_constants */ { ++ adapter_code_size = 32000 DEBUG_ONLY(+ 150000) ++}; ++ ++// Additional helper methods for MethodHandles code generation: ++public: ++ static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); ++ ++ static void verify_klass(MacroAssembler* _masm, ++ Register obj, SystemDictionary::WKID klass_id, ++ const char* error_message = "wrong klass") NOT_DEBUG_RETURN; ++ ++ static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { ++ verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), ++ "reference is a MH"); ++ } ++ ++ static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; ++ ++ // Similar to InterpreterMacroAssembler::jump_from_interpreted. ++ // Takes care of special dispatch from single stepping too. ++ static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry); ++ ++ static void jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry); ++ ++ static Register saved_last_sp_register() { ++ // Should be in sharedRuntime, not here. ++ return R3; ++ } +diff --git a/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp +new file mode 100644 +index 0000000000..9234befae3 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp +@@ -0,0 +1,511 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/codeCache.hpp" ++#include "code/compiledIC.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/ostream.hpp" ++ ++#ifndef PRODUCT ++#include "compiler/disassembler.hpp" ++#endif ++ ++#include ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++void NativeInstruction::wrote(int offset) { ++ ICache::invalidate_word(addr_at(offset)); ++} ++ ++void NativeInstruction::set_long_at(int offset, long i) { ++ address addr = addr_at(offset); ++ *(long*)addr = i; ++ ICache::invalidate_range(addr, 8); ++} ++ ++bool NativeInstruction::is_int_branch() { ++ int op = Assembler::high(insn_word(), 6); ++ return op == Assembler::beqz_op || op == Assembler::bnez_op || ++ op == Assembler::beq_op || op == Assembler::bne_op || ++ op == Assembler::blt_op || op == Assembler::bge_op || ++ op == Assembler::bltu_op || op == Assembler::bgeu_op; ++} ++ ++bool NativeInstruction::is_float_branch() { ++ return Assembler::high(insn_word(), 6) == Assembler::bccondz_op; ++} ++ ++bool NativeInstruction::is_lu12iw_lu32id() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 7) == Assembler::lu32i_d_op; ++} ++ ++bool NativeInstruction::is_pcaddu12i_add() const { ++ return Assembler::high(int_at(0), 7) == Assembler::pcaddu12i_op && ++ Assembler::high(int_at(4), 10) == Assembler::addi_d_op; ++} ++ ++bool NativeCall::is_bl() const { ++ return Assembler::high(int_at(0), 6) == Assembler::bl_op; ++} ++ ++void NativeCall::verify() { ++ assert(is_bl(), "not a NativeCall"); ++} ++ ++address NativeCall::target_addr_for_bl(address orig_addr) const { ++ address addr = orig_addr ? orig_addr : addr_at(0); ++ ++ // bl ++ if (is_bl()) { ++ return addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) | ++ ((int_at(0) >> 10) & 0xffff)) << 2); ++ } ++ ++ fatal("not a NativeCall"); ++ return NULL; ++} ++ ++address NativeCall::destination() const { ++ address addr = (address)this; ++ address destination = target_addr_for_bl(); ++ // Do we use a trampoline stub for this call? ++ // Trampoline stubs are located behind the main code. ++ if (destination > addr) { ++ // Filter out recursive method invocation (call to verified/unverified entry point). ++ CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. ++ assert(cb && cb->is_nmethod(), "sanity"); ++ nmethod *nm = (nmethod *)cb; ++ NativeInstruction* ni = nativeInstruction_at(destination); ++ if (nm->stub_contains(destination) && ni->is_NativeCallTrampolineStub_at()) { ++ // Yes we do, so get the destination from the trampoline stub. ++ const address trampoline_stub_addr = destination; ++ destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); ++ } ++ } ++ return destination; ++} ++ ++// Similar to replace_mt_safe, but just changes the destination. The ++// important thing is that free-running threads are able to execute this ++// call instruction at all times. ++// ++// Used in the runtime linkage of calls; see class CompiledIC. ++// ++// Add parameter assert_lock to switch off assertion ++// during code generation, where no patching lock is needed. ++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { ++ assert(!assert_lock || ++ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), ++ "concurrent code patching"); ++ ++ ResourceMark rm; ++ address addr_call = addr_at(0); ++ bool reachable = MacroAssembler::reachable_from_branch_short(dest - addr_call); ++ assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); ++ ++ // Patch the call. ++ if (!reachable) { ++ address trampoline_stub_addr = get_trampoline(); ++ assert (trampoline_stub_addr != NULL, "we need a trampoline"); ++ guarantee(Assembler::is_simm((trampoline_stub_addr - addr_call) >> 2, 26), "cannot reach trampoline stub"); ++ ++ // Patch the constant in the call's trampoline stub. ++ NativeInstruction* ni = nativeInstruction_at(dest); ++ assert (! ni->is_NativeCallTrampolineStub_at(), "chained trampolines"); ++ nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); ++ dest = trampoline_stub_addr; ++ } ++ set_destination(dest); ++} ++ ++address NativeCall::get_trampoline() { ++ address call_addr = addr_at(0); ++ ++ CodeBlob *code = CodeCache::find_blob(call_addr); ++ assert(code != NULL, "Could not find the containing code blob"); ++ ++ address bl_destination ++ = nativeCall_at(call_addr)->target_addr_for_bl(); ++ NativeInstruction* ni = nativeInstruction_at(bl_destination); ++ if (code->contains(bl_destination) && ++ ni->is_NativeCallTrampolineStub_at()) ++ return bl_destination; ++ ++ if (code->is_nmethod()) { ++ return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); ++ } ++ ++ return NULL; ++} ++ ++void NativeCall::set_destination(address dest) { ++ address addr_call = addr_at(0); ++ CodeBuffer cb(addr_call, instruction_size); ++ MacroAssembler masm(&cb); ++ assert(is_call_at(addr_call), "unexpected call type"); ++ jlong offs = dest - addr_call; ++ masm.bl(offs >> 2); ++ ICache::invalidate_range(addr_call, instruction_size); ++} ++ ++// Generate a trampoline for a branch to dest. If there's no need for a ++// trampoline, simply patch the call directly to dest. ++address NativeCall::trampoline_jump(CodeBuffer &cbuf, address dest) { ++ MacroAssembler a(&cbuf); ++ address stub = NULL; ++ ++ if (a.far_branches() ++ && ! is_NativeCallTrampolineStub_at()) { ++ stub = a.emit_trampoline_stub(instruction_address() - cbuf.insts()->start(), dest); ++ } ++ ++ if (stub == NULL) { ++ // If we generated no stub, patch this call directly to dest. ++ // This will happen if we don't need far branches or if there ++ // already was a trampoline. ++ set_destination(dest); ++ } ++ ++ return stub; ++} ++ ++void NativeCall::print() { ++ tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT, ++ p2i(instruction_address()), p2i(destination())); ++} ++ ++// Inserts a native call instruction at a given pc ++void NativeCall::insert(address code_pos, address entry) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++// MT-safe patching of a call instruction. ++// First patches first word of instruction to two jmp's that jmps to them ++// selfs (spinlock). Then patches the last byte, and then atomicly replaces ++// the jmp's with the first 4 byte of the new instruction. ++void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) { ++ Unimplemented(); ++} ++ ++bool NativeFarCall::is_short() const { ++ return Assembler::high(int_at(0), 10) == Assembler::andi_op && ++ Assembler::low(int_at(0), 22) == 0 && ++ Assembler::high(int_at(4), 6) == Assembler::bl_op; ++} ++ ++bool NativeFarCall::is_far() const { ++ return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op && ++ Assembler::high(int_at(4), 6) == Assembler::jirl_op && ++ Assembler::low(int_at(4), 5) == RA->encoding(); ++} ++ ++address NativeFarCall::destination(address orig_addr) const { ++ address addr = orig_addr ? orig_addr : addr_at(0); ++ ++ if (is_short()) { ++ // short ++ return addr + BytesPerInstWord + ++ (Assembler::simm26(((int_at(4) & 0x3ff) << 16) | ++ ((int_at(4) >> 10) & 0xffff)) << 2); ++ } ++ ++ if (is_far()) { ++ // far ++ return addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) + ++ (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2); ++ } ++ ++ fatal("not a NativeFarCall"); ++ return NULL; ++} ++ ++void NativeFarCall::set_destination(address dest) { ++ address addr_call = addr_at(0); ++ CodeBuffer cb(addr_call, instruction_size); ++ MacroAssembler masm(&cb); ++ assert(is_far_call_at(addr_call), "unexpected call type"); ++ masm.patchable_call(dest, addr_call); ++ ICache::invalidate_range(addr_call, instruction_size); ++} ++ ++void NativeFarCall::verify() { ++ assert(is_short() || is_far(), "not a NativeFarcall"); ++} ++ ++//------------------------------------------------------------------- ++ ++bool NativeMovConstReg::is_lu12iw_ori_lu32id() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 10) == Assembler::ori_op && ++ Assembler::high(int_at(8), 7) == Assembler::lu32i_d_op; ++} ++ ++bool NativeMovConstReg::is_lu12iw_lu32id_nop() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 7) == Assembler::lu32i_d_op && ++ Assembler::high(int_at(8), 10) == Assembler::andi_op; ++} ++ ++bool NativeMovConstReg::is_lu12iw_2nop() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 10) == Assembler::andi_op && ++ Assembler::high(int_at(8), 10) == Assembler::andi_op; ++} ++ ++bool NativeMovConstReg::is_lu12iw_ori_nop() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 10) == Assembler::ori_op && ++ Assembler::high(int_at(8), 10) == Assembler::andi_op; ++} ++ ++bool NativeMovConstReg::is_addid_2nop() const { ++ return Assembler::high(int_at(0), 10) == Assembler::addi_d_op && ++ Assembler::high(int_at(4), 10) == Assembler::andi_op && ++ Assembler::high(int_at(8), 10) == Assembler::andi_op; ++} ++ ++void NativeMovConstReg::verify() { ++ assert(is_li52(), "not a mov reg, imm52"); ++} ++ ++void NativeMovConstReg::print() { ++ tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, ++ p2i(instruction_address()), data()); ++} ++ ++intptr_t NativeMovConstReg::data() const { ++ if (is_lu12iw_ori_lu32id()) { ++ return Assembler::merge((intptr_t)((int_at(4) >> 10) & 0xfff), ++ (intptr_t)((int_at(0) >> 5) & 0xfffff), ++ (intptr_t)((int_at(8) >> 5) & 0xfffff)); ++ } ++ ++ if (is_lu12iw_lu32id_nop()) { ++ return Assembler::merge((intptr_t)0, ++ (intptr_t)((int_at(0) >> 5) & 0xfffff), ++ (intptr_t)((int_at(4) >> 5) & 0xfffff)); ++ } ++ ++ if (is_lu12iw_2nop()) { ++ return Assembler::merge((intptr_t)0, ++ (intptr_t)((int_at(0) >> 5) & 0xfffff)); ++ } ++ ++ if (is_lu12iw_ori_nop()) { ++ return Assembler::merge((intptr_t)((int_at(4) >> 10) & 0xfff), ++ (intptr_t)((int_at(0) >> 5) & 0xfffff)); ++ } ++ ++ if (is_addid_2nop()) { ++ return Assembler::simm12((int_at(0) >> 10) & 0xfff); ++ } ++ ++#ifndef PRODUCT ++ Disassembler::decode(addr_at(0), addr_at(0) + 16, tty); ++#endif ++ fatal("not a mov reg, imm52"); ++ return 0; // unreachable ++} ++ ++void NativeMovConstReg::set_data(intptr_t x, intptr_t o) { ++ CodeBuffer cb(addr_at(0), instruction_size); ++ MacroAssembler masm(&cb); ++ masm.patchable_li52(as_Register(int_at(0) & 0x1f), x); ++ ICache::invalidate_range(addr_at(0), instruction_size); ++ ++ // Find and replace the oop/metadata corresponding to this ++ // instruction in oops section. ++ CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address()); ++ nmethod* nm = blob->as_nmethod_or_null(); ++ if (nm != NULL) { ++ o = o ? o : x; ++ RelocIterator iter(nm, instruction_address(), next_instruction_address()); ++ while (iter.next()) { ++ if (iter.type() == relocInfo::oop_type) { ++ oop* oop_addr = iter.oop_reloc()->oop_addr(); ++ *oop_addr = cast_to_oop(o); ++ break; ++ } else if (iter.type() == relocInfo::metadata_type) { ++ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); ++ *metadata_addr = (Metadata*)o; ++ break; ++ } ++ } ++ } ++} ++ ++//------------------------------------------------------------------- ++ ++int NativeMovRegMem::offset() const{ ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++ return 0; // mute compiler ++} ++ ++void NativeMovRegMem::set_offset(int x) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++void NativeMovRegMem::verify() { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++ ++void NativeMovRegMem::print() { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++bool NativeInstruction::is_sigill_zombie_not_entrant() { ++ return uint_at(0) == NativeIllegalInstruction::instruction_code; ++} ++ ++void NativeIllegalInstruction::insert(address code_pos) { ++ *(juint*)code_pos = instruction_code; ++ ICache::invalidate_range(code_pos, instruction_size); ++} ++ ++void NativeJump::verify() { ++ assert(is_short() || is_far(), "not a general jump instruction"); ++} ++ ++bool NativeJump::is_short() { ++ return Assembler::high(insn_word(), 6) == Assembler::b_op; ++} ++ ++bool NativeJump::is_far() { ++ return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op && ++ Assembler::high(int_at(4), 6) == Assembler::jirl_op && ++ Assembler::low(int_at(4), 5) == R0->encoding(); ++} ++ ++address NativeJump::jump_destination(address orig_addr) { ++ address addr = orig_addr ? orig_addr : addr_at(0); ++ address ret = (address)-1; ++ ++ // short ++ if (is_short()) { ++ ret = addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) | ++ ((int_at(0) >> 10) & 0xffff)) << 2); ++ return ret == instruction_address() ? (address)-1 : ret; ++ } ++ ++ // far ++ if (is_far()) { ++ ret = addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) + ++ (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2); ++ return ret == instruction_address() ? (address)-1 : ret; ++ } ++ ++ fatal("not a jump"); ++ return NULL; ++} ++ ++void NativeJump::set_jump_destination(address dest) { ++ OrderAccess::fence(); ++ ++ CodeBuffer cb(addr_at(0), instruction_size); ++ MacroAssembler masm(&cb); ++ masm.patchable_jump(dest); ++ ICache::invalidate_range(addr_at(0), instruction_size); ++} ++ ++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++// MT-safe patching of a long jump instruction. ++// First patches first word of instruction to two jmp's that jmps to them ++// selfs (spinlock). Then patches the last byte, and then atomicly replaces ++// the jmp's with the first 4 byte of the new instruction. ++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++// Must ensure atomicity ++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { ++ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); ++ jlong offs = dest - verified_entry; ++ ++ if (MacroAssembler::reachable_from_branch_short(offs)) { ++ CodeBuffer cb(verified_entry, 1 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.b(dest); ++ } else { ++ // We use an illegal instruction for marking a method as ++ // not_entrant or zombie ++ NativeIllegalInstruction::insert(verified_entry); ++ } ++ ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord); ++} ++ ++bool NativeInstruction::is_dtrace_trap() { ++ //return (*(int32_t*)this & 0xff) == 0xcc; ++ Unimplemented(); ++ return false; ++} ++ ++bool NativeInstruction::is_safepoint_poll() { ++ // ++ // 390 li T2, 0x0000000000400000 #@loadConP ++ // 394 st_w [SP + #12], V1 # spill 9 ++ // 398 Safepoint @ [T2] : poll for GC @ safePoint_poll # spec.benchmarks.compress.Decompressor::decompress @ bci:224 L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1 ++ // ++ // 0x000000ffe5815130: lu12i_w t2, 0x40 ++ // 0x000000ffe5815134: st_w v1, 0xc(sp) ; OopMap{a6=Oop off=920} ++ // ;*goto ++ // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) ++ // ++ // 0x000000ffe5815138: ld_w at, 0x0(t2) ;*goto <--- PC ++ // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) ++ // ++ ++ // Since there may be some spill instructions between the safePoint_poll and loadConP, ++ // we check the safepoint instruction like this. ++ return Assembler::high(insn_word(), 10) == Assembler::ld_w_op && ++ Assembler::low(insn_word(), 5) == AT->encoding(); ++} +diff --git a/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp +new file mode 100644 +index 0000000000..a6e9d4dd3c +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp +@@ -0,0 +1,528 @@ ++/* ++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP ++#define CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP ++ ++#include "asm/assembler.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/os.hpp" ++#include "runtime/orderAccess.hpp" ++#include "runtime/safepointMechanism.hpp" ++ ++// We have interfaces for the following instructions: ++// - NativeInstruction ++// - - NativeCall ++// - - NativeMovConstReg ++// - - NativeMovConstRegPatching ++// - - NativeMovRegMem ++// - - NativeMovRegMemPatching ++// - - NativeIllegalOpCode ++// - - NativeGeneralJump ++// - - NativePushConst ++// - - NativeTstRegMem ++ ++// The base class for different kinds of native instruction abstractions. ++// Provides the primitive operations to manipulate code relative to this. ++ ++class NativeInstruction { ++ friend class Relocation; ++ ++ public: ++ enum loongarch_specific_constants { ++ nop_instruction_code = 0, ++ nop_instruction_size = 4, ++ sync_instruction_code = 0xf, ++ sync_instruction_size = 4 ++ }; ++ ++ bool is_nop() { guarantee(0, "LA not implemented yet"); return long_at(0) == nop_instruction_code; } ++ bool is_sync() { return Assembler::high(insn_word(), 17) == Assembler::dbar_op; } ++ bool is_dtrace_trap(); ++ inline bool is_call(); ++ inline bool is_far_call(); ++ inline bool is_illegal(); ++ bool is_jump(); ++ bool is_safepoint_poll(); ++ ++ // Helper func for jvmci ++ bool is_lu12iw_lu32id() const; ++ bool is_pcaddu12i_add() const; ++ ++ // LoongArch has no instruction to generate a illegal instrucion exception? ++ // But `break 11` is not illegal instruction for LoongArch. ++ static int illegal_instruction(); ++ ++ bool is_int_branch(); ++ bool is_float_branch(); ++ ++ inline bool is_NativeCallTrampolineStub_at(); ++ //We use an illegal instruction for marking a method as not_entrant or zombie. ++ bool is_sigill_zombie_not_entrant(); ++ ++ protected: ++ address addr_at(int offset) const { return address(this) + offset; } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(BytesPerInstWord); } ++ address prev_instruction_address() const { return addr_at(-BytesPerInstWord); } ++ ++ s_char sbyte_at(int offset) const { return *(s_char*) addr_at(offset); } ++ u_char ubyte_at(int offset) const { return *(u_char*) addr_at(offset); } ++ ++ jint int_at(int offset) const { return *(jint*) addr_at(offset); } ++ juint uint_at(int offset) const { return *(juint*) addr_at(offset); } ++ ++ intptr_t ptr_at(int offset) const { return *(intptr_t*) addr_at(offset); } ++ ++ oop oop_at (int offset) const { return *(oop*) addr_at(offset); } ++ int long_at(int offset) const { return *(jint*)addr_at(offset); } ++ ++ ++ void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; wrote(offset); } ++ void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; wrote(offset); } ++ void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; wrote(offset); } ++ void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; wrote(offset); } ++ void set_long_at(int offset, long i); ++ ++ int insn_word() const { return long_at(0); } ++ ++ void wrote(int offset); ++ ++ public: ++ ++ // unit test stuff ++ static void test() {} // override for testing ++ ++ inline friend NativeInstruction* nativeInstruction_at(address address); ++}; ++ ++inline NativeInstruction* nativeInstruction_at(address address) { ++ NativeInstruction* inst = (NativeInstruction*)address; ++#ifdef ASSERT ++ //inst->verify(); ++#endif ++ return inst; ++} ++ ++inline NativeCall* nativeCall_at(address address); ++ ++// The NativeCall is an abstraction for accessing/manipulating native call ++// instructions (used to manipulate inline caches, primitive & dll calls, etc.). ++class NativeCall: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 1 * BytesPerInstWord, ++ return_address_offset = 1 * BytesPerInstWord, ++ displacement_offset = 0 ++ }; ++ ++ // We have only bl. ++ bool is_bl() const; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ ++ address next_instruction_address() const { ++ return addr_at(return_address_offset); ++ } ++ ++ address return_address() const { ++ return next_instruction_address(); ++ } ++ ++ address target_addr_for_bl(address orig_addr = 0) const; ++ address destination() const; ++ void set_destination(address dest); ++ ++ void verify_alignment() {} ++ void verify(); ++ void print(); ++ ++ // Creation ++ inline friend NativeCall* nativeCall_at(address address); ++ inline friend NativeCall* nativeCall_before(address return_address); ++ ++ static bool is_call_at(address instr) { ++ return nativeInstruction_at(instr)->is_call(); ++ } ++ ++ static bool is_call_before(address return_address) { ++ return is_call_at(return_address - return_address_offset); ++ } ++ ++ // MT-safe patching of a call instruction. ++ static void insert(address code_pos, address entry); ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++ ++ // Similar to replace_mt_safe, but just changes the destination. The ++ // important thing is that free-running threads are able to execute ++ // this call instruction at all times. If the call is an immediate bl ++ // instruction we can simply rely on atomicity of 32-bit writes to ++ // make sure other threads will see no intermediate states. ++ ++ // We cannot rely on locks here, since the free-running threads must run at ++ // full speed. ++ // ++ // Used in the runtime linkage of calls; see class CompiledIC. ++ ++ // The parameter assert_lock disables the assertion during code generation. ++ void set_destination_mt_safe(address dest, bool assert_lock = true); ++ ++ address get_trampoline(); ++ address trampoline_jump(CodeBuffer &cbuf, address dest); ++}; ++ ++inline NativeCall* nativeCall_at(address address) { ++ NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset); ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++inline NativeCall* nativeCall_before(address return_address) { ++ NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset); ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++// The NativeFarCall is an abstraction for accessing/manipulating native ++// call-anywhere instructions. ++// Used to call native methods which may be loaded anywhere in the address ++// space, possibly out of reach of a call instruction. ++class NativeFarCall: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 2 * BytesPerInstWord ++ }; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ ++ // We use MacroAssembler::patchable_call() for implementing a ++ // call-anywhere instruction. ++ bool is_short() const; ++ bool is_far() const; ++ ++ // Checks whether instr points at a NativeFarCall instruction. ++ static bool is_far_call_at(address address) { ++ return nativeInstruction_at(address)->is_far_call(); ++ } ++ ++ // Returns the NativeFarCall's destination. ++ address destination(address orig_addr = 0) const; ++ ++ // Sets the NativeFarCall's destination, not necessarily mt-safe. ++ // Used when relocating code. ++ void set_destination(address dest); ++ ++ void verify(); ++}; ++ ++// Instantiates a NativeFarCall object starting at the given instruction ++// address and returns the NativeFarCall object. ++inline NativeFarCall* nativeFarCall_at(address address) { ++ NativeFarCall* call = (NativeFarCall*)address; ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++// An interface for accessing/manipulating native set_oop imm, reg instructions ++// (used to manipulate inlined data references, etc.). ++class NativeMovConstReg: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 3 * BytesPerInstWord, ++ next_instruction_offset = 3 * BytesPerInstWord, ++ }; ++ ++ int insn_word() const { return long_at(instruction_offset); } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(next_instruction_offset); } ++ intptr_t data() const; ++ void set_data(intptr_t x, intptr_t o = 0); ++ ++ bool is_li52() const { ++ return is_lu12iw_ori_lu32id() || ++ is_lu12iw_lu32id_nop() || ++ is_lu12iw_2nop() || ++ is_lu12iw_ori_nop() || ++ is_addid_2nop(); ++ } ++ bool is_lu12iw_ori_lu32id() const; ++ bool is_lu12iw_lu32id_nop() const; ++ bool is_lu12iw_2nop() const; ++ bool is_lu12iw_ori_nop() const; ++ bool is_addid_2nop() const; ++ void verify(); ++ void print(); ++ ++ // unit test stuff ++ static void test() {} ++ ++ // Creation ++ inline friend NativeMovConstReg* nativeMovConstReg_at(address address); ++ inline friend NativeMovConstReg* nativeMovConstReg_before(address address); ++}; ++ ++inline NativeMovConstReg* nativeMovConstReg_at(address address) { ++ NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++inline NativeMovConstReg* nativeMovConstReg_before(address address) { ++ NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++class NativeMovConstRegPatching: public NativeMovConstReg { ++ private: ++ friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) { ++ NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset); ++ #ifdef ASSERT ++ test->verify(); ++ #endif ++ return test; ++ } ++}; ++ ++class NativeMovRegMem: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 4, ++ hiword_offset = 4, ++ ldst_offset = 12, ++ immediate_size = 4, ++ ldst_size = 16 ++ }; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ ++ int num_bytes_to_end_of_patch() const { return instruction_offset + instruction_size; } ++ ++ int offset() const; ++ ++ void set_offset(int x); ++ ++ void add_offset_in_bytes(int add_offset) { set_offset ( ( offset() + add_offset ) ); } ++ ++ void verify(); ++ void print (); ++ ++ // unit test stuff ++ static void test() {} ++ ++ private: ++ inline friend NativeMovRegMem* nativeMovRegMem_at (address address); ++}; ++ ++inline NativeMovRegMem* nativeMovRegMem_at (address address) { ++ NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++class NativeMovRegMemPatching: public NativeMovRegMem { ++ private: ++ friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) { ++ NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset); ++ #ifdef ASSERT ++ test->verify(); ++ #endif ++ return test; ++ } ++}; ++ ++ ++// Handles all kinds of jump on Loongson. ++// short: ++// b offs26 ++// nop ++// ++// far: ++// pcaddu18i reg, si20 ++// jirl r0, reg, si18 ++// ++class NativeJump: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 2 * BytesPerInstWord ++ }; ++ ++ bool is_short(); ++ bool is_far(); ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address jump_destination(address orig_addr = 0); ++ void set_jump_destination(address dest); ++ ++ // Creation ++ inline friend NativeJump* nativeJump_at(address address); ++ ++ // Insertion of native jump instruction ++ static void insert(address code_pos, address entry) { Unimplemented(); } ++ // MT-safe insertion of native jump at verified method entry ++ static void check_verified_entry_alignment(address entry, address verified_entry){} ++ static void patch_verified_entry(address entry, address verified_entry, address dest); ++ ++ void verify(); ++}; ++ ++inline NativeJump* nativeJump_at(address address) { ++ NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset); ++ debug_only(jump->verify();) ++ return jump; ++} ++ ++class NativeGeneralJump: public NativeJump { ++ public: ++ // Creation ++ inline friend NativeGeneralJump* nativeGeneralJump_at(address address); ++ ++ // Insertion of native general jump instruction ++ static void insert_unconditional(address code_pos, address entry); ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++}; ++ ++inline NativeGeneralJump* nativeGeneralJump_at(address address) { ++ NativeGeneralJump* jump = (NativeGeneralJump*)(address); ++ debug_only(jump->verify();) ++ return jump; ++} ++ ++class NativeIllegalInstruction: public NativeInstruction { ++public: ++ enum loongarch_specific_constants { ++ instruction_code = 0xbadc0de0, // TODO: LA ++ // Temporary LoongArch reserved instruction ++ instruction_size = 4, ++ instruction_offset = 0, ++ next_instruction_offset = 4 ++ }; ++ ++ // Insert illegal opcode as specific address ++ static void insert(address code_pos); ++}; ++ ++inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); } ++ ++inline bool NativeInstruction::is_call() { ++ NativeCall *call = (NativeCall*)instruction_address(); ++ return call->is_bl(); ++} ++ ++inline bool NativeInstruction::is_far_call() { ++ NativeFarCall *call = (NativeFarCall*)instruction_address(); ++ ++ // short ++ if (call->is_short()) { ++ return true; ++ } ++ ++ // far ++ if (call->is_far()) { ++ return true; ++ } ++ ++ return false; ++} ++ ++inline bool NativeInstruction::is_jump() ++{ ++ NativeGeneralJump *jump = (NativeGeneralJump*)instruction_address(); ++ ++ // short ++ if (jump->is_short()) { ++ return true; ++ } ++ ++ // far ++ if (jump->is_far()) { ++ return true; ++ } ++ ++ return false; ++} ++ ++// Call trampoline stubs. ++class NativeCallTrampolineStub : public NativeInstruction { ++ public: ++ ++ enum la_specific_constants { ++ instruction_size = 6 * 4, ++ instruction_offset = 0, ++ data_offset = 4 * 4, ++ next_instruction_offset = 6 * 4 ++ }; ++ ++ address destination() const { ++ return (address)ptr_at(data_offset); ++ } ++ ++ void set_destination(address new_destination) { ++ set_ptr_at(data_offset, (intptr_t)new_destination); ++ OrderAccess::fence(); ++ } ++}; ++ ++// Note: Other stubs must not begin with this pattern. ++inline bool NativeInstruction::is_NativeCallTrampolineStub_at() { ++ // pcaddi ++ // ld_d ++ // jirl ++ return Assembler::high(int_at(0), 7) == Assembler::pcaddi_op && ++ Assembler::high(int_at(4), 10) == Assembler::ld_d_op && ++ Assembler::high(int_at(8), 6) == Assembler::jirl_op && ++ Assembler::low(int_at(8), 5) == R0->encoding(); ++} ++ ++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { ++ NativeInstruction* ni = nativeInstruction_at(addr); ++ assert(ni->is_NativeCallTrampolineStub_at(), "no call trampoline found"); ++ return (NativeCallTrampolineStub*)addr; ++} ++ ++class NativeMembar : public NativeInstruction { ++public: ++ unsigned int get_hint() { return Assembler::low(insn_word(), 4); } ++ void set_hint(int hint) { Assembler::patch(addr_at(0), 4, hint); } ++}; ++ ++#endif // CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp +new file mode 100644 +index 0000000000..e9f0fc280d +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP ++#define CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP ++ ++// machine-dependent implemention for register maps ++ friend class frame; ++ ++ private: ++#ifndef CORE ++ // This is the hook for finding a register in an "well-known" location, ++ // such as a register block of a predetermined format. ++ // Since there is none, we just return NULL. ++ // See registerMap_sparc.hpp for an example of grabbing registers ++ // from register save areas of a standard layout. ++ address pd_location(VMReg reg) const {return NULL;} ++#endif ++ ++ // no PD state to clear or copy: ++ void pd_clear() {} ++ void pd_initialize() {} ++ void pd_initialize_from(const RegisterMap* map) {} ++ ++#endif // CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp b/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp +new file mode 100644 +index 0000000000..58f40b747c +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp +@@ -0,0 +1,103 @@ ++/* ++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/register.hpp" ++#include "register_loongarch.hpp" ++#ifdef TARGET_ARCH_MODEL_loongarch_32 ++# include "interp_masm_loongarch_32.hpp" ++#endif ++#ifdef TARGET_ARCH_MODEL_loongarch_64 ++# include "interp_masm_loongarch_64.hpp" ++#endif ++ ++REGISTER_DEFINITION(Register, noreg); ++REGISTER_DEFINITION(Register, r0); ++REGISTER_DEFINITION(Register, r1); ++REGISTER_DEFINITION(Register, r2); ++REGISTER_DEFINITION(Register, r3); ++REGISTER_DEFINITION(Register, r4); ++REGISTER_DEFINITION(Register, r5); ++REGISTER_DEFINITION(Register, r6); ++REGISTER_DEFINITION(Register, r7); ++REGISTER_DEFINITION(Register, r8); ++REGISTER_DEFINITION(Register, r9); ++REGISTER_DEFINITION(Register, r10); ++REGISTER_DEFINITION(Register, r11); ++REGISTER_DEFINITION(Register, r12); ++REGISTER_DEFINITION(Register, r13); ++REGISTER_DEFINITION(Register, r14); ++REGISTER_DEFINITION(Register, r15); ++REGISTER_DEFINITION(Register, r16); ++REGISTER_DEFINITION(Register, r17); ++REGISTER_DEFINITION(Register, r18); ++REGISTER_DEFINITION(Register, r19); ++REGISTER_DEFINITION(Register, r20); ++REGISTER_DEFINITION(Register, r21); ++REGISTER_DEFINITION(Register, r22); ++REGISTER_DEFINITION(Register, r23); ++REGISTER_DEFINITION(Register, r24); ++REGISTER_DEFINITION(Register, r25); ++REGISTER_DEFINITION(Register, r26); ++REGISTER_DEFINITION(Register, r27); ++REGISTER_DEFINITION(Register, r28); ++REGISTER_DEFINITION(Register, r29); ++REGISTER_DEFINITION(Register, r30); ++REGISTER_DEFINITION(Register, r31); ++ ++REGISTER_DEFINITION(FloatRegister, fnoreg); ++REGISTER_DEFINITION(FloatRegister, f0); ++REGISTER_DEFINITION(FloatRegister, f1); ++REGISTER_DEFINITION(FloatRegister, f2); ++REGISTER_DEFINITION(FloatRegister, f3); ++REGISTER_DEFINITION(FloatRegister, f4); ++REGISTER_DEFINITION(FloatRegister, f5); ++REGISTER_DEFINITION(FloatRegister, f6); ++REGISTER_DEFINITION(FloatRegister, f7); ++REGISTER_DEFINITION(FloatRegister, f8); ++REGISTER_DEFINITION(FloatRegister, f9); ++REGISTER_DEFINITION(FloatRegister, f10); ++REGISTER_DEFINITION(FloatRegister, f11); ++REGISTER_DEFINITION(FloatRegister, f12); ++REGISTER_DEFINITION(FloatRegister, f13); ++REGISTER_DEFINITION(FloatRegister, f14); ++REGISTER_DEFINITION(FloatRegister, f15); ++REGISTER_DEFINITION(FloatRegister, f16); ++REGISTER_DEFINITION(FloatRegister, f17); ++REGISTER_DEFINITION(FloatRegister, f18); ++REGISTER_DEFINITION(FloatRegister, f19); ++REGISTER_DEFINITION(FloatRegister, f20); ++REGISTER_DEFINITION(FloatRegister, f21); ++REGISTER_DEFINITION(FloatRegister, f22); ++REGISTER_DEFINITION(FloatRegister, f23); ++REGISTER_DEFINITION(FloatRegister, f24); ++REGISTER_DEFINITION(FloatRegister, f25); ++REGISTER_DEFINITION(FloatRegister, f26); ++REGISTER_DEFINITION(FloatRegister, f27); ++REGISTER_DEFINITION(FloatRegister, f28); ++REGISTER_DEFINITION(FloatRegister, f29); ++REGISTER_DEFINITION(FloatRegister, f30); ++REGISTER_DEFINITION(FloatRegister, f31); +diff --git a/src/hotspot/cpu/loongarch/register_loongarch.cpp b/src/hotspot/cpu/loongarch/register_loongarch.cpp +new file mode 100644 +index 0000000000..54d90167a5 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/register_loongarch.cpp +@@ -0,0 +1,59 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "register_loongarch.hpp" ++ ++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * RegisterImpl::max_slots_per_register; ++const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + ++ FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; ++ ++ ++const char* RegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "zero", "ra", "tp", "sp", "a0/v0", "a1/v1", "a2", "a3", ++ "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", ++ "t4", "t5", "t6", "t7", "t8", "x", "fp", "s0", ++ "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8" ++ }; ++ return is_valid() ? names[encoding()] : "noreg"; ++} ++ ++const char* FloatRegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", ++ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", ++ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", ++ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", ++ }; ++ return is_valid() ? names[encoding()] : "fnoreg"; ++} ++ ++const char* ConditionalFlagRegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7", ++ }; ++ return is_valid() ? names[encoding()] : "fccnoreg"; ++} +diff --git a/src/hotspot/cpu/loongarch/register_loongarch.hpp b/src/hotspot/cpu/loongarch/register_loongarch.hpp +new file mode 100644 +index 0000000000..da876a5083 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/register_loongarch.hpp +@@ -0,0 +1,495 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_REGISTER_LOONGARCH_HPP ++#define CPU_LOONGARCH_REGISTER_LOONGARCH_HPP ++ ++#include "asm/register.hpp" ++#include "utilities/formatBuffer.hpp" ++ ++class VMRegImpl; ++typedef VMRegImpl* VMReg; ++ ++// Use Register as shortcut ++class RegisterImpl; ++typedef RegisterImpl* Register; ++ ++inline Register as_Register(int encoding) { ++ return (Register)(intptr_t) encoding; ++} ++ ++class RegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 32, ++ max_slots_per_register = 2 ++ }; ++ ++ // derived registers, offsets, and addresses ++ Register successor() const { return as_Register(encoding() + 1); } ++ ++ // construction ++ inline friend Register as_Register(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register (%d)", (int)(intptr_t)this ); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++}; ++ ++// The integer registers of the LoongArch architecture ++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); ++ ++ ++CONSTANT_REGISTER_DECLARATION(Register, r0, (0)); ++CONSTANT_REGISTER_DECLARATION(Register, r1, (1)); ++CONSTANT_REGISTER_DECLARATION(Register, r2, (2)); ++CONSTANT_REGISTER_DECLARATION(Register, r3, (3)); ++CONSTANT_REGISTER_DECLARATION(Register, r4, (4)); ++CONSTANT_REGISTER_DECLARATION(Register, r5, (5)); ++CONSTANT_REGISTER_DECLARATION(Register, r6, (6)); ++CONSTANT_REGISTER_DECLARATION(Register, r7, (7)); ++CONSTANT_REGISTER_DECLARATION(Register, r8, (8)); ++CONSTANT_REGISTER_DECLARATION(Register, r9, (9)); ++CONSTANT_REGISTER_DECLARATION(Register, r10, (10)); ++CONSTANT_REGISTER_DECLARATION(Register, r11, (11)); ++CONSTANT_REGISTER_DECLARATION(Register, r12, (12)); ++CONSTANT_REGISTER_DECLARATION(Register, r13, (13)); ++CONSTANT_REGISTER_DECLARATION(Register, r14, (14)); ++CONSTANT_REGISTER_DECLARATION(Register, r15, (15)); ++CONSTANT_REGISTER_DECLARATION(Register, r16, (16)); ++CONSTANT_REGISTER_DECLARATION(Register, r17, (17)); ++CONSTANT_REGISTER_DECLARATION(Register, r18, (18)); ++CONSTANT_REGISTER_DECLARATION(Register, r19, (19)); ++CONSTANT_REGISTER_DECLARATION(Register, r20, (20)); ++CONSTANT_REGISTER_DECLARATION(Register, r21, (21)); ++CONSTANT_REGISTER_DECLARATION(Register, r22, (22)); ++CONSTANT_REGISTER_DECLARATION(Register, r23, (23)); ++CONSTANT_REGISTER_DECLARATION(Register, r24, (24)); ++CONSTANT_REGISTER_DECLARATION(Register, r25, (25)); ++CONSTANT_REGISTER_DECLARATION(Register, r26, (26)); ++CONSTANT_REGISTER_DECLARATION(Register, r27, (27)); ++CONSTANT_REGISTER_DECLARATION(Register, r28, (28)); ++CONSTANT_REGISTER_DECLARATION(Register, r29, (29)); ++CONSTANT_REGISTER_DECLARATION(Register, r30, (30)); ++CONSTANT_REGISTER_DECLARATION(Register, r31, (31)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define NOREG ((Register)(noreg_RegisterEnumValue)) ++ ++#define R0 ((Register)(r0_RegisterEnumValue)) ++#define R1 ((Register)(r1_RegisterEnumValue)) ++#define R2 ((Register)(r2_RegisterEnumValue)) ++#define R3 ((Register)(r3_RegisterEnumValue)) ++#define R4 ((Register)(r4_RegisterEnumValue)) ++#define R5 ((Register)(r5_RegisterEnumValue)) ++#define R6 ((Register)(r6_RegisterEnumValue)) ++#define R7 ((Register)(r7_RegisterEnumValue)) ++#define R8 ((Register)(r8_RegisterEnumValue)) ++#define R9 ((Register)(r9_RegisterEnumValue)) ++#define R10 ((Register)(r10_RegisterEnumValue)) ++#define R11 ((Register)(r11_RegisterEnumValue)) ++#define R12 ((Register)(r12_RegisterEnumValue)) ++#define R13 ((Register)(r13_RegisterEnumValue)) ++#define R14 ((Register)(r14_RegisterEnumValue)) ++#define R15 ((Register)(r15_RegisterEnumValue)) ++#define R16 ((Register)(r16_RegisterEnumValue)) ++#define R17 ((Register)(r17_RegisterEnumValue)) ++#define R18 ((Register)(r18_RegisterEnumValue)) ++#define R19 ((Register)(r19_RegisterEnumValue)) ++#define R20 ((Register)(r20_RegisterEnumValue)) ++#define R21 ((Register)(r21_RegisterEnumValue)) ++#define R22 ((Register)(r22_RegisterEnumValue)) ++#define R23 ((Register)(r23_RegisterEnumValue)) ++#define R24 ((Register)(r24_RegisterEnumValue)) ++#define R25 ((Register)(r25_RegisterEnumValue)) ++#define R26 ((Register)(r26_RegisterEnumValue)) ++#define R27 ((Register)(r27_RegisterEnumValue)) ++#define R28 ((Register)(r28_RegisterEnumValue)) ++#define R29 ((Register)(r29_RegisterEnumValue)) ++#define R30 ((Register)(r30_RegisterEnumValue)) ++#define R31 ((Register)(r31_RegisterEnumValue)) ++ ++ ++#define RA R1 ++#define TP R2 ++#define SP R3 ++#define A0 R4 ++#define A1 R5 ++#define A2 R6 ++#define A3 R7 ++#define A4 R8 ++#define A5 R9 ++#define A6 R10 ++#define A7 R11 ++#define RT0 R12 ++#define RT1 R13 ++#define RT2 R14 ++#define RT3 R15 ++#define RT4 R16 ++#define RT5 R17 ++#define RT6 R18 ++#define RT7 R19 ++#define RT8 R20 ++#define RX R21 ++#define FP R22 ++#define S0 R23 ++#define S1 R24 ++#define S2 R25 ++#define S3 R26 ++#define S4 R27 ++#define S5 R28 ++#define S6 R29 ++#define S7 R30 ++#define S8 R31 ++ ++#define c_rarg0 RT0 ++#define c_rarg1 RT1 ++#define Rmethod S3 ++#define Rsender S4 ++#define Rnext S1 ++ ++#define V0 A0 ++#define V1 A1 ++ ++#define SCR1 RT7 ++#define SCR2 RT4 ++ ++//for interpreter frame ++// bytecode pointer register ++#define BCP S0 ++// local variable pointer register ++#define LVP S7 ++// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM ++// be sure to save and restore its value in call_stub ++#define TSR S2 ++ ++#define OPT_THREAD 1 ++ ++#define TREG S6 ++ ++#define S5_heapbase S5 ++ ++#define FSR V0 ++#define SSR T6 ++#define FSF FV0 ++ ++#define RECEIVER T0 ++#define IC_Klass T1 ++ ++#define SHIFT_count T3 ++ ++// ---------- Scratch Register ---------- ++#define AT RT7 ++#define fscratch F23 ++ ++#endif // DONT_USE_REGISTER_DEFINES ++ ++// Use FloatRegister as shortcut ++class FloatRegisterImpl; ++typedef FloatRegisterImpl* FloatRegister; ++ ++inline FloatRegister as_FloatRegister(int encoding) { ++ return (FloatRegister)(intptr_t) encoding; ++} ++ ++// The implementation of floating point registers for the LoongArch architecture ++class FloatRegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 32, ++ save_slots_per_register = 2, ++ slots_per_lsx_register = 4, ++ slots_per_lasx_register = 8, ++ max_slots_per_register = 8 ++ }; ++ ++ // construction ++ inline friend FloatRegister as_FloatRegister(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // derived registers, offsets, and addresses ++ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++ ++}; ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue)) ++#define F0 ((FloatRegister)( f0_FloatRegisterEnumValue)) ++#define F1 ((FloatRegister)( f1_FloatRegisterEnumValue)) ++#define F2 ((FloatRegister)( f2_FloatRegisterEnumValue)) ++#define F3 ((FloatRegister)( f3_FloatRegisterEnumValue)) ++#define F4 ((FloatRegister)( f4_FloatRegisterEnumValue)) ++#define F5 ((FloatRegister)( f5_FloatRegisterEnumValue)) ++#define F6 ((FloatRegister)( f6_FloatRegisterEnumValue)) ++#define F7 ((FloatRegister)( f7_FloatRegisterEnumValue)) ++#define F8 ((FloatRegister)( f8_FloatRegisterEnumValue)) ++#define F9 ((FloatRegister)( f9_FloatRegisterEnumValue)) ++#define F10 ((FloatRegister)( f10_FloatRegisterEnumValue)) ++#define F11 ((FloatRegister)( f11_FloatRegisterEnumValue)) ++#define F12 ((FloatRegister)( f12_FloatRegisterEnumValue)) ++#define F13 ((FloatRegister)( f13_FloatRegisterEnumValue)) ++#define F14 ((FloatRegister)( f14_FloatRegisterEnumValue)) ++#define F15 ((FloatRegister)( f15_FloatRegisterEnumValue)) ++#define F16 ((FloatRegister)( f16_FloatRegisterEnumValue)) ++#define F17 ((FloatRegister)( f17_FloatRegisterEnumValue)) ++#define F18 ((FloatRegister)( f18_FloatRegisterEnumValue)) ++#define F19 ((FloatRegister)( f19_FloatRegisterEnumValue)) ++#define F20 ((FloatRegister)( f20_FloatRegisterEnumValue)) ++#define F21 ((FloatRegister)( f21_FloatRegisterEnumValue)) ++#define F22 ((FloatRegister)( f22_FloatRegisterEnumValue)) ++#define F23 ((FloatRegister)( f23_FloatRegisterEnumValue)) ++#define F24 ((FloatRegister)( f24_FloatRegisterEnumValue)) ++#define F25 ((FloatRegister)( f25_FloatRegisterEnumValue)) ++#define F26 ((FloatRegister)( f26_FloatRegisterEnumValue)) ++#define F27 ((FloatRegister)( f27_FloatRegisterEnumValue)) ++#define F28 ((FloatRegister)( f28_FloatRegisterEnumValue)) ++#define F29 ((FloatRegister)( f29_FloatRegisterEnumValue)) ++#define F30 ((FloatRegister)( f30_FloatRegisterEnumValue)) ++#define F31 ((FloatRegister)( f31_FloatRegisterEnumValue)) ++ ++#define FA0 F0 ++#define FA1 F1 ++#define FA2 F2 ++#define FA3 F3 ++#define FA4 F4 ++#define FA5 F5 ++#define FA6 F6 ++#define FA7 F7 ++ ++#define FV0 F0 ++#define FV1 F1 ++ ++#define FT0 F8 ++#define FT1 F9 ++#define FT2 F10 ++#define FT3 F11 ++#define FT4 F12 ++#define FT5 F13 ++#define FT6 F14 ++#define FT7 F15 ++#define FT8 F16 ++#define FT9 F17 ++#define FT10 F18 ++#define FT11 F19 ++#define FT12 F20 ++#define FT13 F21 ++#define FT14 F22 ++#define FT15 F23 ++ ++#define FS0 F24 ++#define FS1 F25 ++#define FS2 F26 ++#define FS3 F27 ++#define FS4 F28 ++#define FS5 F29 ++#define FS6 F30 ++#define FS7 F31 ++ ++#endif // DONT_USE_REGISTER_DEFINES ++ ++// Use ConditionalFlagRegister as shortcut ++class ConditionalFlagRegisterImpl; ++typedef ConditionalFlagRegisterImpl* ConditionalFlagRegister; ++ ++inline ConditionalFlagRegister as_ConditionalFlagRegister(int encoding) { ++ return (ConditionalFlagRegister)(intptr_t) encoding; ++} ++ ++// The implementation of floating point registers for the LoongArch architecture ++class ConditionalFlagRegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++// conditionalflag_arg_base = 12, ++ number_of_registers = 8 ++ }; ++ ++ // construction ++ inline friend ConditionalFlagRegister as_ConditionalFlagRegister(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // derived registers, offsets, and addresses ++ ConditionalFlagRegister successor() const { return as_ConditionalFlagRegister(encoding() + 1); } ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++ ++}; ++ ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fccnoreg , (-1)); ++ ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc7 , ( 7)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define FCCNOREG ((ConditionalFlagRegister)(fccnoreg_ConditionalFlagRegisterEnumValue)) ++#define FCC0 ((ConditionalFlagRegister)( fcc0_ConditionalFlagRegisterEnumValue)) ++#define FCC1 ((ConditionalFlagRegister)( fcc1_ConditionalFlagRegisterEnumValue)) ++#define FCC2 ((ConditionalFlagRegister)( fcc2_ConditionalFlagRegisterEnumValue)) ++#define FCC3 ((ConditionalFlagRegister)( fcc3_ConditionalFlagRegisterEnumValue)) ++#define FCC4 ((ConditionalFlagRegister)( fcc4_ConditionalFlagRegisterEnumValue)) ++#define FCC5 ((ConditionalFlagRegister)( fcc5_ConditionalFlagRegisterEnumValue)) ++#define FCC6 ((ConditionalFlagRegister)( fcc6_ConditionalFlagRegisterEnumValue)) ++#define FCC7 ((ConditionalFlagRegister)( fcc7_ConditionalFlagRegisterEnumValue)) ++ ++#endif // DONT_USE_REGISTER_DEFINES ++ ++// Need to know the total number of registers of all sorts for SharedInfo. ++// Define a class that exports it. ++class ConcreteRegisterImpl : public AbstractRegisterImpl { ++ public: ++ enum { ++ // A big enough number for C2: all the registers plus flags ++ // This number must be large enough to cover REG_COUNT (defined by c2) registers. ++ // There is no requirement that any ordering here matches any ordering c2 gives ++ // it's optoregs. ++ number_of_registers = RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + ++ FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers ++ }; ++ ++ static const int max_gpr; ++ static const int max_fpr; ++}; ++ ++// A set of registers ++template ++class AbstractRegSet { ++ uint32_t _bitset; ++ ++ AbstractRegSet(uint32_t bitset) : _bitset(bitset) { } ++ ++public: ++ ++ AbstractRegSet() : _bitset(0) { } ++ ++ AbstractRegSet(RegImpl r1) : _bitset(1 << r1->encoding()) { } ++ ++ AbstractRegSet operator+(const AbstractRegSet aSet) const { ++ AbstractRegSet result(_bitset | aSet._bitset); ++ return result; ++ } ++ ++ AbstractRegSet operator-(const AbstractRegSet aSet) const { ++ AbstractRegSet result(_bitset & ~aSet._bitset); ++ return result; ++ } ++ ++ AbstractRegSet &operator+=(const AbstractRegSet aSet) { ++ *this = *this + aSet; ++ return *this; ++ } ++ ++ AbstractRegSet &operator-=(const AbstractRegSet aSet) { ++ *this = *this - aSet; ++ return *this; ++ } ++ ++ static AbstractRegSet of(RegImpl r1) { ++ return AbstractRegSet(r1); ++ } ++ ++ static AbstractRegSet of(RegImpl r1, RegImpl r2) { ++ return of(r1) + r2; ++ } ++ ++ static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3) { ++ return of(r1, r2) + r3; ++ } ++ ++ static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3, RegImpl r4) { ++ return of(r1, r2, r3) + r4; ++ } ++ ++ static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3, RegImpl r4, RegImpl r5) { ++ return of(r1, r2, r3, r4) + r5; ++ } ++ ++ static AbstractRegSet range(RegImpl start, RegImpl end) { ++ uint32_t bits = ~0; ++ bits <<= start->encoding(); ++ bits <<= 31 - end->encoding(); ++ bits >>= 31 - end->encoding(); ++ ++ return AbstractRegSet(bits); ++ } ++ ++ uint32_t bits() const { return _bitset; } ++}; ++ ++typedef AbstractRegSet RegSet; ++ ++#endif //CPU_LOONGARCH_REGISTER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp b/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp +new file mode 100644 +index 0000000000..1caba43699 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp +@@ -0,0 +1,132 @@ ++/* ++ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/relocInfo.hpp" ++#include "compiler/disassembler.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/compressedOops.inline.hpp" ++#include "oops/oop.hpp" ++#include "oops/klass.inline.hpp" ++#include "runtime/safepoint.hpp" ++ ++ ++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { ++ x += o; ++ typedef Assembler::WhichOperand WhichOperand; ++ WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop ++ assert(which == Assembler::disp32_operand || ++ which == Assembler::narrow_oop_operand || ++ which == Assembler::imm_operand, "format unpacks ok"); ++ if (type() == relocInfo::internal_word_type || ++ type() == relocInfo::section_word_type) { ++ MacroAssembler::pd_patch_instruction(addr(), x); ++ } else if (which == Assembler::imm_operand) { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(x)); ++ } ++ } else if (which == Assembler::narrow_oop_operand) { ++ // both compressed oops and compressed classes look the same ++ if (Universe::heap()->is_in_reserved((oop)x)) { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)CompressedOops::encode((oop)x), "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedOops::encode(oop(x))), (intptr_t)(x)); ++ } ++ } else { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x)); ++ } ++ } ++ } else { ++ // Note: Use runtime_call_type relocations for call32_operand. ++ assert(0, "call32_operand not supported in LoongArch64"); ++ } ++} ++ ++ ++address Relocation::pd_call_destination(address orig_addr) { ++ NativeInstruction* ni = nativeInstruction_at(addr()); ++ if (ni->is_far_call()) { ++ return nativeFarCall_at(addr())->destination(orig_addr); ++ } else if (ni->is_call()) { ++ address trampoline = nativeCall_at(addr())->get_trampoline(); ++ if (trampoline) { ++ return nativeCallTrampolineStub_at(trampoline)->destination(); ++ } else { ++ address new_addr = nativeCall_at(addr())->target_addr_for_bl(orig_addr); ++ // If call is branch to self, don't try to relocate it, just leave it ++ // as branch to self. This happens during code generation if the code ++ // buffer expands. It will be relocated to the trampoline above once ++ // code generation is complete. ++ return (new_addr == orig_addr) ? addr() : new_addr; ++ } ++ } else if (ni->is_jump()) { ++ return nativeGeneralJump_at(addr())->jump_destination(orig_addr); ++ } else { ++ tty->print_cr("\nError!\ncall destination: " INTPTR_FORMAT, p2i(addr())); ++ Disassembler::decode(addr() - 10 * BytesPerInstWord, addr() + 10 * BytesPerInstWord, tty); ++ ShouldNotReachHere(); ++ return NULL; ++ } ++} ++ ++void Relocation::pd_set_call_destination(address x) { ++ NativeInstruction* ni = nativeInstruction_at(addr()); ++ if (ni->is_far_call()) { ++ nativeFarCall_at(addr())->set_destination(x); ++ } else if (ni->is_call()) { ++ address trampoline = nativeCall_at(addr())->get_trampoline(); ++ if (trampoline) { ++ nativeCall_at(addr())->set_destination_mt_safe(x, false); ++ } else { ++ nativeCall_at(addr())->set_destination(x); ++ } ++ } else if (ni->is_jump()) { ++ nativeGeneralJump_at(addr())->set_jump_destination(x); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++address* Relocation::pd_address_in_code() { ++ return (address*)addr(); ++} ++ ++address Relocation::pd_get_address_from_code() { ++ NativeMovConstReg* ni = nativeMovConstReg_at(addr()); ++ return (address)ni->data(); ++} ++ ++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++} ++ ++void metadata_Relocation::pd_fix_value(address x) { ++} +diff --git a/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp b/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp +new file mode 100644 +index 0000000000..c85ca4963f +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp +@@ -0,0 +1,44 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP ++#define CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP ++ ++ // machine-dependent parts of class relocInfo ++ private: ++ enum { ++ // Since LoongArch instructions are whole words, ++ // the two low-order offset bits can always be discarded. ++ offset_unit = 4, ++ ++ // imm_oop_operand vs. narrow_oop_operand ++ format_width = 2 ++ }; ++ ++ public: ++ ++ static bool mustIterateImmediateOopsInCode() { return false; } ++ ++#endif // CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp +new file mode 100644 +index 0000000000..334c783b37 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp +@@ -0,0 +1,191 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#ifdef COMPILER2 ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "classfile/systemDictionary.hpp" ++#include "code/vmreg.hpp" ++#include "interpreter/interpreter.hpp" ++#include "opto/runtime.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/globalDefinitions.hpp" ++#include "vmreg_loongarch.inline.hpp" ++#endif ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++//-------------- generate_exception_blob ----------- ++// creates _exception_blob. ++// The exception blob is jumped to from a compiled method. ++// (see emit_exception_handler in sparc.ad file) ++// ++// Given an exception pc at a call we call into the runtime for the ++// handler in this method. This handler might merely restore state ++// (i.e. callee save registers) unwind the frame and jump to the ++// exception handler for the nmethod if there is no Java level handler ++// for the nmethod. ++// ++// This code is entered with a jump, and left with a jump. ++// ++// Arguments: ++// V0: exception oop ++// V1: exception pc ++// ++// Results: ++// A0: exception oop ++// A1: exception pc in caller or ??? ++// jumps to: exception handler of caller ++// ++// Note: the exception pc MUST be at a call (precise debug information) ++// ++// [stubGenerator_loongarch_64.cpp] generate_forward_exception() ++// |- V0, V1 are created ++// |- T4 <= SharedRuntime::exception_handler_for_return_address ++// `- jr T4 ++// `- the caller's exception_handler ++// `- jr OptoRuntime::exception_blob ++// `- here ++// ++void OptoRuntime::generate_exception_blob() { ++ // Capture info about frame layout ++ enum layout { ++ fp_off, ++ return_off, // slot for return address ++ framesize ++ }; ++ ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ CodeBuffer buffer("exception_blob", 5120, 5120); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ address start = __ pc(); ++ ++ __ addi_d(SP, SP, -1 * framesize * wordSize); // Prolog! ++ ++ // this frame will be treated as the original caller method. ++ // So, the return pc should be filled with the original exception pc. ++ // ref: X86's implementation ++ __ st_d(V1, SP, return_off * wordSize); // return address ++ __ st_d(FP, SP, fp_off * wordSize); ++ ++ // Save callee saved registers. None for UseSSE=0, ++ // floats-only for UseSSE=1, and doubles for UseSSE=2. ++ ++ __ addi_d(FP, SP, fp_off * wordSize); ++ ++ // Store exception in Thread object. We cannot pass any arguments to the ++ // handle_exception call, since we do not want to make any assumption ++ // about the size of the frame where the exception happened in. ++ Register thread = TREG; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ __ st_d(V0, Address(thread, JavaThread::exception_oop_offset())); ++ __ st_d(V1, Address(thread, JavaThread::exception_pc_offset())); ++ ++ // This call does all the hard work. It checks if an exception handler ++ // exists in the method. ++ // If so, it returns the handler address. ++ // If not, it prepares for stack-unwinding, restoring the callee-save ++ // registers of the frame being removed. ++ Label L; ++ address the_pc = __ pc(); ++ __ bind(L); ++ __ set_last_Java_frame(thread, NOREG, NOREG, L); ++ ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ __ move(A0, thread); ++ // TODO: confirm reloc ++ __ call((address)OptoRuntime::handle_exception_C, relocInfo::runtime_call_type); ++ ++ // Set an oopmap for the call site ++ OopMapSet *oop_maps = new OopMapSet(); ++ ++ oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0)); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(thread, true); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog! ++ ++ // V0: exception handler ++ ++ // We have a handler in V0, (could be deopt blob) ++ __ move(T4, V0); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // Get the exception ++ __ ld_d(A0, Address(thread, JavaThread::exception_oop_offset())); ++ // Get the exception pc in case we are deoptimized ++ __ ld_d(A1, Address(thread, JavaThread::exception_pc_offset())); ++#ifdef ASSERT ++ __ st_d(R0, Address(thread, JavaThread::exception_handler_pc_offset())); ++ __ st_d(R0, Address(thread, JavaThread::exception_pc_offset())); ++#endif ++ // Clear the exception oop so GC no longer processes it as a root. ++ __ st_d(R0, Address(thread, JavaThread::exception_oop_offset())); ++ ++ // Fix seg fault when running: ++ // Eclipse + Plugin + Debug As ++ // This is the only condition where C2 calls SharedRuntime::generate_deopt_blob() ++ // ++ __ move(V0, A0); ++ __ move(V1, A1); ++ ++ // V0: exception oop ++ // T4: exception handler ++ // A1: exception pc ++ __ jr(T4); ++ ++ // make sure all code is generated ++ masm->flush(); ++ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize); ++} +diff --git a/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp +new file mode 100644 +index 0000000000..736ed0a85f +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp +@@ -0,0 +1,3621 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/debugInfoRec.hpp" ++#include "code/icBuffer.hpp" ++#include "code/vtableStubs.hpp" ++#include "interpreter/interpreter.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/vframeArray.hpp" ++#include "vmreg_loongarch.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++#if INCLUDE_JVMCI ++#include "jvmci/jvmciJavaClasses.hpp" ++#endif ++ ++#include ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; ++ ++class RegisterSaver { ++ // Capture info about frame layout ++ enum layout { ++ fpr0_off = 0, ++ fpr1_off, ++ fpr2_off, ++ fpr3_off, ++ fpr4_off, ++ fpr5_off, ++ fpr6_off, ++ fpr7_off, ++ fpr8_off, ++ fpr9_off, ++ fpr10_off, ++ fpr11_off, ++ fpr12_off, ++ fpr13_off, ++ fpr14_off, ++ fpr15_off, ++ fpr16_off, ++ fpr17_off, ++ fpr18_off, ++ fpr19_off, ++ fpr20_off, ++ fpr21_off, ++ fpr22_off, ++ fpr23_off, ++ fpr24_off, ++ fpr25_off, ++ fpr26_off, ++ fpr27_off, ++ fpr28_off, ++ fpr29_off, ++ fpr30_off, ++ fpr31_off, ++ a0_off, ++ a1_off, ++ a2_off, ++ a3_off, ++ a4_off, ++ a5_off, ++ a6_off, ++ a7_off, ++ t0_off, ++ t1_off, ++ t2_off, ++ t3_off, ++ t4_off, ++ t5_off, ++ t6_off, ++ t7_off, ++ t8_off, ++ s0_off, ++ s1_off, ++ s2_off, ++ s3_off, ++ s4_off, ++ s5_off, ++ s6_off, ++ s7_off, ++ s8_off, ++ fp_off, ++ ra_off, ++ fpr_size = fpr31_off - fpr0_off + 1, ++ gpr_size = ra_off - a0_off + 1, ++ }; ++ ++ const bool _save_vectors; ++ public: ++ RegisterSaver(bool save_vectors) : _save_vectors(save_vectors) {} ++ ++ OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); ++ void restore_live_registers(MacroAssembler* masm); ++ ++ int slots_save() { ++ int slots = gpr_size * VMRegImpl::slots_per_word; ++ ++ if (_save_vectors && UseLASX) ++ slots += FloatRegisterImpl::slots_per_lasx_register * fpr_size; ++ else if (_save_vectors && UseLSX) ++ slots += FloatRegisterImpl::slots_per_lsx_register * fpr_size; ++ else ++ slots += FloatRegisterImpl::save_slots_per_register * fpr_size; ++ ++ return slots; ++ } ++ ++ int gpr_offset(int off) { ++ int slots_per_fpr = FloatRegisterImpl::save_slots_per_register; ++ int slots_per_gpr = VMRegImpl::slots_per_word; ++ ++ if (_save_vectors && UseLASX) ++ slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register; ++ else if (_save_vectors && UseLSX) ++ slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register; ++ ++ return (fpr_size * slots_per_fpr + (off - a0_off) * slots_per_gpr) * VMRegImpl::stack_slot_size; ++ } ++ ++ int fpr_offset(int off) { ++ int slots_per_fpr = FloatRegisterImpl::save_slots_per_register; ++ ++ if (_save_vectors && UseLASX) ++ slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register; ++ else if (_save_vectors && UseLSX) ++ slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register; ++ ++ return off * slots_per_fpr * VMRegImpl::stack_slot_size; ++ } ++ ++ int ra_offset() { return gpr_offset(ra_off); } ++ int t5_offset() { return gpr_offset(t5_off); } ++ int s3_offset() { return gpr_offset(s3_off); } ++ int v0_offset() { return gpr_offset(a0_off); } ++ int v1_offset() { return gpr_offset(a1_off); } ++ ++ int fpr0_offset() { return fpr_offset(fpr0_off); } ++ int fpr1_offset() { return fpr_offset(fpr1_off); } ++ ++ // During deoptimization only the result register need to be restored ++ // all the other values have already been extracted. ++ void restore_result_registers(MacroAssembler* masm); ++}; ++ ++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { ++ ++ // Always make the frame size 16-byte aligned ++ int frame_size_in_bytes = align_up(additional_frame_words * wordSize + slots_save() * VMRegImpl::stack_slot_size, StackAlignmentInBytes); ++ // OopMap frame size is in compiler stack slots (jint's) not bytes or words ++ int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size; ++ // The caller will allocate additional_frame_words ++ int additional_frame_slots = additional_frame_words * wordSize / VMRegImpl::stack_slot_size; ++ // CodeBlob frame size is in words. ++ int frame_size_in_words = frame_size_in_bytes / wordSize; ++ ++ *total_frame_words = frame_size_in_words; ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap(frame_size_in_slots, 0); ++ ++ // save registers ++ __ addi_d(SP, SP, -slots_save() * VMRegImpl::stack_slot_size); ++ ++ for (int i = 0; i < fpr_size; i++) { ++ FloatRegister fpr = as_FloatRegister(i); ++ int off = fpr_offset(i); ++ ++ if (_save_vectors && UseLASX) ++ __ xvst(fpr, SP, off); ++ else if (_save_vectors && UseLSX) ++ __ vst(fpr, SP, off); ++ else ++ __ fst_d(fpr, SP, off); ++ map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), fpr->as_VMReg()); ++ } ++ ++ for (int i = a0_off; i <= a7_off; i++) { ++ Register gpr = as_Register(A0->encoding() + (i - a0_off)); ++ int off = gpr_offset(i); ++ ++ __ st_d(gpr, SP, gpr_offset(i)); ++ map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg()); ++ } ++ ++ for (int i = t0_off; i <= t6_off; i++) { ++ Register gpr = as_Register(T0->encoding() + (i - t0_off)); ++ int off = gpr_offset(i); ++ ++ __ st_d(gpr, SP, gpr_offset(i)); ++ map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg()); ++ } ++ __ st_d(T8, SP, gpr_offset(t8_off)); ++ map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(t8_off) / VMRegImpl::stack_slot_size + additional_frame_slots), T8->as_VMReg()); ++ ++ for (int i = s0_off; i <= s8_off; i++) { ++ Register gpr = as_Register(S0->encoding() + (i - s0_off)); ++ int off = gpr_offset(i); ++ ++ __ st_d(gpr, SP, gpr_offset(i)); ++ map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg()); ++ } ++ ++ __ st_d(FP, SP, gpr_offset(fp_off)); ++ map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(fp_off) / VMRegImpl::stack_slot_size + additional_frame_slots), FP->as_VMReg()); ++ __ st_d(RA, SP, gpr_offset(ra_off)); ++ map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(ra_off) / VMRegImpl::stack_slot_size + additional_frame_slots), RA->as_VMReg()); ++ ++ __ addi_d(FP, SP, gpr_offset(fp_off)); ++ ++ return map; ++} ++ ++ ++// Pop the current frame and restore all the registers that we ++// saved. ++void RegisterSaver::restore_live_registers(MacroAssembler* masm) { ++ for (int i = 0; i < fpr_size; i++) { ++ FloatRegister fpr = as_FloatRegister(i); ++ int off = fpr_offset(i); ++ ++ if (_save_vectors && UseLASX) ++ __ xvld(fpr, SP, off); ++ else if (_save_vectors && UseLSX) ++ __ vld(fpr, SP, off); ++ else ++ __ fld_d(fpr, SP, off); ++ } ++ ++ for (int i = a0_off; i <= a7_off; i++) { ++ Register gpr = as_Register(A0->encoding() + (i - a0_off)); ++ int off = gpr_offset(i); ++ ++ __ ld_d(gpr, SP, gpr_offset(i)); ++ } ++ ++ for (int i = t0_off; i <= t6_off; i++) { ++ Register gpr = as_Register(T0->encoding() + (i - t0_off)); ++ int off = gpr_offset(i); ++ ++ __ ld_d(gpr, SP, gpr_offset(i)); ++ } ++ __ ld_d(T8, SP, gpr_offset(t8_off)); ++ ++ for (int i = s0_off; i <= s8_off; i++) { ++ Register gpr = as_Register(S0->encoding() + (i - s0_off)); ++ int off = gpr_offset(i); ++ ++ __ ld_d(gpr, SP, gpr_offset(i)); ++ } ++ ++ __ ld_d(FP, SP, gpr_offset(fp_off)); ++ __ ld_d(RA, SP, gpr_offset(ra_off)); ++ ++ __ addi_d(SP, SP, slots_save() * VMRegImpl::stack_slot_size); ++} ++ ++// Pop the current frame and restore the registers that might be holding ++// a result. ++void RegisterSaver::restore_result_registers(MacroAssembler* masm) { ++ // Just restore result register. Only used by deoptimization. By ++ // now any callee save register that needs to be restore to a c2 ++ // caller of the deoptee has been extracted into the vframeArray ++ // and will be stuffed into the c2i adapter we create for later ++ // restoration so only result registers need to be restored here. ++ ++ __ ld_d(V0, SP, gpr_offset(a0_off)); ++ __ ld_d(V1, SP, gpr_offset(a1_off)); ++ ++ __ fld_d(F0, SP, fpr_offset(fpr0_off)); ++ __ fld_d(F1, SP, fpr_offset(fpr1_off)); ++ ++ __ addi_d(SP, SP, gpr_offset(ra_off)); ++} ++ ++// Is vector's size (in bytes) bigger than a size saved by default? ++// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. ++bool SharedRuntime::is_wide_vector(int size) { ++ return size > 16; ++} ++ ++size_t SharedRuntime::trampoline_size() { ++ return 32; ++} ++ ++void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { ++ // trampoline is not in CodeCache ++ __ li(T4, (long)destination); ++ __ jr(T4); ++} ++ ++// The java_calling_convention describes stack locations as ideal slots on ++// a frame with no abi restrictions. Since we must observe abi restrictions ++// (like the placement of the register window) the slots must be biased by ++// the following value. ++ ++static int reg2offset_in(VMReg r) { ++ // Account for saved fp and return address ++ // This should really be in_preserve_stack_slots ++ return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size); ++} ++ ++static int reg2offset_out(VMReg r) { ++ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; ++} ++ ++// --------------------------------------------------------------------------- ++// Read the array of BasicTypes from a signature, and compute where the ++// arguments should go. Values in the VMRegPair regs array refer to 4-byte ++// quantities. Values less than SharedInfo::stack0 are registers, those above ++// refer to 4-byte stack slots. All stack slots are based off of the stack pointer ++// as framesizes are fixed. ++// VMRegImpl::stack0 refers to the first slot 0(sp). ++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register ++// up to RegisterImpl::number_of_registers) are the 32-bit ++// integer registers. ++ ++// Pass first five oop/int args in registers T0, A0 - A3. ++// Pass float/double/long args in stack. ++// Doubles have precedence, so if you pass a mix of floats and doubles ++// the doubles will grab the registers before the floats will. ++ ++// Note: the INPUTS in sig_bt are in units of Java argument words, which are ++// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit ++// units regardless of build. ++ ++ ++// --------------------------------------------------------------------------- ++// The compiled Java calling convention. ++// Pass first five oop/int args in registers T0, A0 - A3. ++// Pass float/double/long args in stack. ++// Doubles have precedence, so if you pass a mix of floats and doubles ++// the doubles will grab the registers before the floats will. ++ ++int SharedRuntime::java_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ int total_args_passed, ++ int is_outgoing) { ++ ++ // Create the mapping between argument positions and registers. ++ static const Register INT_ArgReg[Argument::n_register_parameters + 1] = { ++ T0, A0, A1, A2, A3, A4, A5, A6, A7 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { ++ FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7 ++ }; ++ ++ uint int_args = 0; ++ uint fp_args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_VOID: ++ // halves of T_LONG or T_DOUBLE ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_BOOLEAN: ++ case T_CHAR: ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ if (int_args < Argument::n_register_parameters + 1) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ // fall through ++ case T_OBJECT: ++ case T_ARRAY: ++ case T_ADDRESS: ++ if (int_args < Argument::n_register_parameters + 1) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (fp_args < Argument::n_float_register_parameters) { ++ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (fp_args < Argument::n_float_register_parameters) { ++ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++ ++ return round_to(stk_args, 2); ++} ++ ++// Patch the callers callsite with entry to compiled code if it exists. ++static void patch_callers_callsite(MacroAssembler *masm) { ++ Label L; ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); ++ __ beq(AT, R0, L); ++ // Schedule the branch target address early. ++ // Call into the VM to patch the caller, then jump to compiled callee ++ // T5 isn't live so capture return address while we easily can ++ __ move(T5, RA); ++ ++ __ pushad(); ++#ifdef COMPILER2 ++ // C2 may leave the stack dirty if not in SSE2+ mode ++ __ empty_FPU_stack(); ++#endif ++ ++ // VM needs caller's callsite ++ // VM needs target method ++ ++ __ move(A0, Rmethod); ++ __ move(A1, T5); ++ // we should preserve the return address ++ __ move(TSR, SP); ++ __ li(AT, -(StackAlignmentInBytes)); // align the stack ++ __ andr(SP, SP, AT); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), ++ relocInfo::runtime_call_type); ++ ++ __ move(SP, TSR); ++ __ popad(); ++ __ bind(L); ++} ++ ++static void gen_c2i_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ Label& skip_fixup) { ++ ++ // Before we get into the guts of the C2I adapter, see if we should be here ++ // at all. We've come from compiled code and are attempting to jump to the ++ // interpreter, which means the caller made a static call to get here ++ // (vcalls always get a compiled target if there is one). Check for a ++ // compiled target. If there is one, we need to patch the caller's call. ++ // However we will run interpreted if we come thru here. The next pass ++ // thru the call site will run compiled. If we ran compiled here then ++ // we can (theorectically) do endless i2c->c2i->i2c transitions during ++ // deopt/uncommon trap cycles. If we always go interpreted here then ++ // we can have at most one and don't need to play any tricks to keep ++ // from endlessly growing the stack. ++ // ++ // Actually if we detected that we had an i2c->c2i transition here we ++ // ought to be able to reset the world back to the state of the interpreted ++ // call and not bother building another interpreter arg area. We don't ++ // do that at this point. ++ ++ patch_callers_callsite(masm); ++ __ bind(skip_fixup); ++ ++#ifdef COMPILER2 ++ __ empty_FPU_stack(); ++#endif ++ //this is for native ? ++ // Since all args are passed on the stack, total_args_passed * interpreter_ ++ // stack_element_size is the ++ // space we need. ++ int extraspace = total_args_passed * Interpreter::stackElementSize; ++ ++ // stack is aligned, keep it that way ++ extraspace = round_to(extraspace, 2*wordSize); ++ ++ // Get return address ++ __ move(T5, RA); ++ // set senderSP value ++ //refer to interpreter_loongarch.cpp:generate_asm_entry ++ __ move(Rsender, SP); ++ __ addi_d(SP, SP, -extraspace); ++ ++ // Now write the args into the outgoing interpreter space ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // st_off points to lowest address on stack. ++ int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize; ++ // Say 4 args: ++ // i st_off ++ // 0 12 T_LONG ++ // 1 8 T_VOID ++ // 2 4 T_OBJECT ++ // 3 0 T_BOOL ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // memory to memory use fpu stack top ++ int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; ++ if (!r_2->is_valid()) { ++ __ ld_ptr(AT, Address(SP, ld_off)); ++ __ st_ptr(AT, Address(SP, st_off)); ++ ++ } else { ++ ++ ++ int next_off = st_off - Interpreter::stackElementSize; ++ __ ld_ptr(AT, Address(SP, ld_off)); ++ __ st_ptr(AT, Address(SP, st_off)); ++ ++ // Ref to is_Register condition ++ if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ++ __ st_ptr(AT, SP, st_off - 8); ++ } ++ } else if (r_1->is_Register()) { ++ Register r = r_1->as_Register(); ++ if (!r_2->is_valid()) { ++ __ st_d(r, SP, st_off); ++ } else { ++ //FIXME, LA will not enter here ++ // long/double in gpr ++ __ st_d(r, SP, st_off); ++ // In [java/util/zip/ZipFile.java] ++ // ++ // private static native long open(String name, int mode, long lastModified); ++ // private static native int getTotal(long jzfile); ++ // ++ // We need to transfer T_LONG paramenters from a compiled method to a native method. ++ // It's a complex process: ++ // ++ // Caller -> lir_static_call -> gen_resolve_stub ++ // -> -- resolve_static_call_C ++ // `- gen_c2i_adapter() [*] ++ // | ++ // `- AdapterHandlerLibrary::get_create_apapter_index ++ // -> generate_native_entry ++ // -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**] ++ // ++ // In [**], T_Long parameter is stored in stack as: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // However, the sequence is reversed here: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry(). ++ // ++ if (sig_bt[i] == T_LONG) ++ __ st_d(r, SP, st_off - 8); ++ } ++ } else if (r_1->is_FloatRegister()) { ++ assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); ++ ++ FloatRegister fr = r_1->as_FloatRegister(); ++ if (sig_bt[i] == T_FLOAT) ++ __ fst_s(fr, SP, st_off); ++ else { ++ __ fst_d(fr, SP, st_off); ++ __ fst_d(fr, SP, st_off - 8); // T_DOUBLE needs two slots ++ } ++ } ++ } ++ ++ // Schedule the branch target address early. ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) ); ++ // And repush original return address ++ __ move(RA, T5); ++ __ jr (AT); ++} ++ ++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs) { ++ ++ // Generate an I2C adapter: adjust the I-frame to make space for the C-frame ++ // layout. Lesp was saved by the calling I-frame and will be restored on ++ // return. Meanwhile, outgoing arg space is all owned by the callee ++ // C-frame, so we can mangle it at will. After adjusting the frame size, ++ // hoist register arguments and repack other args according to the compiled ++ // code convention. Finally, end in a jump to the compiled code. The entry ++ // point address is the start of the buffer. ++ ++ // We will only enter here from an interpreted frame and never from after ++ // passing thru a c2i. Azul allowed this but we do not. If we lose the ++ // race and use a c2i we will remain interpreted for the race loser(s). ++ // This removes all sorts of headaches on the LA side and also eliminates ++ // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. ++ ++ __ move(T4, SP); ++ ++ // Cut-out for having no stack args. Since up to 2 int/oop args are passed ++ // in registers, we will occasionally have no stack args. ++ int comp_words_on_stack = 0; ++ if (comp_args_on_stack) { ++ // Sig words on the stack are greater-than VMRegImpl::stack0. Those in ++ // registers are below. By subtracting stack0, we either get a negative ++ // number (all values in registers) or the maximum stack slot accessed. ++ // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg); ++ // Convert 4-byte stack slots to words. ++ // did LA need round? FIXME ++ comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord; ++ // Round up to miminum stack alignment, in wordSize ++ comp_words_on_stack = round_to(comp_words_on_stack, 2); ++ __ addi_d(SP, SP, -comp_words_on_stack * wordSize); ++ } ++ ++ // Align the outgoing SP ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ // push the return address on the stack (note that pushing, rather ++ // than storing it, yields the correct frame alignment for the callee) ++ // Put saved SP in another register ++ const Register saved_sp = T5; ++ __ move(saved_sp, T4); ++ ++ ++ // Will jump to the compiled code just as if compiled code was doing it. ++ // Pre-load the register-jump target early, to schedule it better. ++ __ ld_d(T4, Rmethod, in_bytes(Method::from_compiled_offset())); ++ ++#if INCLUDE_JVMCI ++ if (EnableJVMCI) { ++ // check if this call should be routed towards a specific entry point ++ __ ld_d(AT, Address(TREG, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); ++ Label no_alternative_target; ++ __ beqz(AT, no_alternative_target); ++ __ move(T4, AT); ++ __ st_d(R0, Address(TREG, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); ++ __ bind(no_alternative_target); ++ } ++#endif // INCLUDE_JVMCI ++ ++ // Now generate the shuffle code. Pick up all register args and move the ++ // rest through the floating point stack top. ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ // Longs and doubles are passed in native word order, but misaligned ++ // in the 32-bit build. ++ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // Pick up 0, 1 or 2 words from SP+offset. ++ ++ assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); ++ // Load in argument order going down. ++ int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize; ++ // Point to interpreter value (vs. tag) ++ int next_off = ld_off - Interpreter::stackElementSize; ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // Convert stack slot to an SP offset (+ wordSize to ++ // account for return address ) ++ // NOTICE HERE!!!! I sub a wordSize here ++ int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; ++ //+ wordSize; ++ ++ if (!r_2->is_valid()) { ++ __ ld_d(AT, saved_sp, ld_off); ++ __ st_d(AT, SP, st_off); ++ } else { ++ // Interpreter local[n] == MSW, local[n+1] == LSW however locals ++ // are accessed as negative so LSW is at LOW address ++ ++ // ld_off is MSW so get LSW ++ // st_off is LSW (i.e. reg.first()) ++ ++ // [./org/eclipse/swt/graphics/GC.java] ++ // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, ++ // int destX, int destY, int destWidth, int destHeight, ++ // boolean simple, ++ // int imgWidth, int imgHeight, ++ // long maskPixmap, <-- Pass T_LONG in stack ++ // int maskType); ++ // Before this modification, Eclipse displays icons with solid black background. ++ // ++ __ ld_d(AT, saved_sp, ld_off); ++ if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ++ __ ld_d(AT, saved_sp, ld_off - 8); ++ __ st_d(AT, SP, st_off); ++ } ++ } else if (r_1->is_Register()) { // Register argument ++ Register r = r_1->as_Register(); ++ if (r_2->is_valid()) { ++ // Remember r_1 is low address (and LSB on LA) ++ // So r_2 gets loaded from high address regardless of the platform ++ assert(r_2->as_Register() == r_1->as_Register(), ""); ++ __ ld_d(r, saved_sp, ld_off); ++ ++ // ++ // For T_LONG type, the real layout is as below: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // We should load the low-8 bytes. ++ // ++ if (sig_bt[i] == T_LONG) ++ __ ld_d(r, saved_sp, ld_off - 8); ++ } else { ++ __ ld_w(r, saved_sp, ld_off); ++ } ++ } else if (r_1->is_FloatRegister()) { // Float Register ++ assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); ++ ++ FloatRegister fr = r_1->as_FloatRegister(); ++ if (sig_bt[i] == T_FLOAT) ++ __ fld_s(fr, saved_sp, ld_off); ++ else { ++ __ fld_d(fr, saved_sp, ld_off); ++ __ fld_d(fr, saved_sp, ld_off - 8); ++ } ++ } ++ } ++ ++ // 6243940 We might end up in handle_wrong_method if ++ // the callee is deoptimized as we race thru here. If that ++ // happens we don't want to take a safepoint because the ++ // caller frame will look interpreted and arguments are now ++ // "compiled" so it is much better to make this transition ++ // invisible to the stack walking code. Unfortunately if ++ // we try and find the callee by normal means a safepoint ++ // is possible. So we stash the desired callee in the thread ++ // and the vm will find there should this case occur. ++#ifndef OPT_THREAD ++ Register thread = T8; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ __ st_d(Rmethod, thread, in_bytes(JavaThread::callee_target_offset())); ++ ++ // move methodOop to T5 in case we end up in an c2i adapter. ++ // the c2i adapters expect methodOop in T5 (c2) because c2's ++ // resolve stubs return the result (the method) in T5. ++ // I'd love to fix this. ++ __ move(T5, Rmethod); ++ __ jr(T4); ++} ++ ++// --------------------------------------------------------------- ++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ AdapterFingerPrint* fingerprint) { ++ address i2c_entry = __ pc(); ++ ++ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); ++ ++ // ------------------------------------------------------------------------- ++ // Generate a C2I adapter. On entry we know G5 holds the methodOop. The ++ // args start out packed in the compiled layout. They need to be unpacked ++ // into the interpreter layout. This will almost always require some stack ++ // space. We grow the current (compiled) stack, then repack the args. We ++ // finally end in a jump to the generic interpreter entry point. On exit ++ // from the interpreter, the interpreter will restore our SP (lest the ++ // compiled code, which relys solely on SP and not FP, get sick). ++ ++ address c2i_unverified_entry = __ pc(); ++ Label skip_fixup; ++ { ++ Register holder = T1; ++ Register receiver = T0; ++ Register temp = T8; ++ address ic_miss = SharedRuntime::get_ic_miss_stub(); ++ ++ Label missed; ++ ++ //add for compressedoops ++ __ load_klass(temp, receiver); ++ ++ __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); ++ __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset()); ++ __ bne(AT, temp, missed); ++ // Method might have been compiled since the call site was patched to ++ // interpreted if that is the case treat it as a miss so we can get ++ // the call site corrected. ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); ++ __ beq(AT, R0, skip_fixup); ++ __ bind(missed); ++ ++ __ jmp(ic_miss, relocInfo::runtime_call_type); ++ } ++ address c2i_entry = __ pc(); ++ ++ gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); ++ ++ __ flush(); ++ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); ++} ++ ++int SharedRuntime::c_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ VMRegPair *regs2, ++ int total_args_passed) { ++ assert(regs2 == NULL, "not needed on LA"); ++ // Return the number of VMReg stack_slots needed for the args. ++ // This value does not include an abi space (like register window ++ // save area). ++ ++ // We return the amount of VMReg stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. Since we always ++ // have space for storing at least 6 registers to memory we start with that. ++ // See int_stk_helper for a further discussion. ++ // We return the amount of VMRegImpl stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. ++ static const Register INT_ArgReg[Argument::n_register_parameters] = { ++ A0, A1, A2, A3, A4, A5, A6, A7 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { ++ FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7 ++ }; ++ uint int_args = 0; ++ uint fp_args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++// Example: ++// n java.lang.UNIXProcess::forkAndExec ++// private native int forkAndExec(byte[] prog, ++// byte[] argBlock, int argc, ++// byte[] envBlock, int envc, ++// byte[] dir, ++// boolean redirectErrorStream, ++// FileDescriptor stdin_fd, ++// FileDescriptor stdout_fd, ++// FileDescriptor stderr_fd) ++// JNIEXPORT jint JNICALL ++// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env, ++// jobject process, ++// jbyteArray prog, ++// jbyteArray argBlock, jint argc, ++// jbyteArray envBlock, jint envc, ++// jbyteArray dir, ++// jboolean redirectErrorStream, ++// jobject stdin_fd, ++// jobject stdout_fd, ++// jobject stderr_fd) ++// ++// ::c_calling_convention ++// 0: // env <-- a0 ++// 1: L // klass/obj <-- t0 => a1 ++// 2: [ // prog[] <-- a0 => a2 ++// 3: [ // argBlock[] <-- a1 => a3 ++// 4: I // argc <-- a2 => a4 ++// 5: [ // envBlock[] <-- a3 => a5 ++// 6: I // envc <-- a4 => a5 ++// 7: [ // dir[] <-- a5 => a7 ++// 8: Z // redirectErrorStream <-- a6 => sp[0] ++// 9: L // stdin <-- a7 => sp[8] ++// 10: L // stdout fp[16] => sp[16] ++// 11: L // stderr fp[24] => sp[24] ++// ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_VOID: // Halves of longs and doubles ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_BOOLEAN: ++ case T_CHAR: ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ if (int_args < Argument::n_register_parameters) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ // fall through ++ case T_OBJECT: ++ case T_ARRAY: ++ case T_ADDRESS: ++ case T_METADATA: ++ if (int_args < Argument::n_register_parameters) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (fp_args < Argument::n_float_register_parameters) { ++ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else if (int_args < Argument::n_register_parameters) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (fp_args < Argument::n_float_register_parameters) { ++ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else if (int_args < Argument::n_register_parameters) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++ ++ return round_to(stk_args, 2); ++} ++ ++// --------------------------------------------------------------------------- ++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ fst_s(FSF, FP, -wordSize); ++ break; ++ case T_DOUBLE: ++ __ fst_d(FSF, FP, -wordSize ); ++ break; ++ case T_VOID: break; ++ case T_LONG: ++ __ st_d(V0, FP, -wordSize); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ __ st_d(V0, FP, -wordSize); ++ break; ++ default: { ++ __ st_w(V0, FP, -wordSize); ++ } ++ } ++} ++ ++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ fld_s(FSF, FP, -wordSize); ++ break; ++ case T_DOUBLE: ++ __ fld_d(FSF, FP, -wordSize ); ++ break; ++ case T_LONG: ++ __ ld_d(V0, FP, -wordSize); ++ break; ++ case T_VOID: break; ++ case T_OBJECT: ++ case T_ARRAY: ++ __ ld_d(V0, FP, -wordSize); ++ break; ++ default: { ++ __ ld_w(V0, FP, -wordSize); ++ } ++ } ++} ++ ++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ for ( int i = first_arg ; i < arg_count ; i++ ) { ++ if (args[i].first()->is_Register()) { ++ __ push(args[i].first()->as_Register()); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ push(args[i].first()->as_FloatRegister()); ++ } ++ } ++} ++ ++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { ++ if (args[i].first()->is_Register()) { ++ __ pop(args[i].first()->as_Register()); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ pop(args[i].first()->as_FloatRegister()); ++ } ++ } ++} ++ ++// A simple move of integer like type ++static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ // stack to stack ++ __ ld_w(AT, FP, reg2offset_in(src.first())); ++ __ st_d(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ // stack to reg ++ __ ld_w(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else if (dst.first()->is_stack()) { ++ // reg to stack ++ __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first())); ++ } else { ++ if (dst.first() != src.first()){ ++ __ move(dst.first()->as_Register(), src.first()->as_Register()); ++ } ++ } ++} ++ ++// An oop arg. Must pass a handle not the oop itself ++static void object_move(MacroAssembler* masm, ++ OopMap* map, ++ int oop_handle_offset, ++ int framesize_in_slots, ++ VMRegPair src, ++ VMRegPair dst, ++ bool is_receiver, ++ int* receiver_offset) { ++ ++ // must pass a handle. First figure out the location we use as a handle ++ ++ if (src.first()->is_stack()) { ++ // Oop is already on the stack as an argument ++ Register rHandle = T5; ++ Label nil; ++ __ xorr(rHandle, rHandle, rHandle); ++ __ ld_d(AT, FP, reg2offset_in(src.first())); ++ __ beq(AT, R0, nil); ++ __ lea(rHandle, Address(FP, reg2offset_in(src.first()))); ++ __ bind(nil); ++ if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first())); ++ else __ move( (dst.first())->as_Register(), rHandle); ++ ++ int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); ++ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); ++ if (is_receiver) { ++ *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; ++ } ++ } else { ++ // Oop is in an a register we must store it to the space we reserve ++ // on the stack for oop_handles ++ const Register rOop = src.first()->as_Register(); ++ assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register"); ++ const Register rHandle = T5; ++ //Important: refer to java_calling_convertion ++ int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset; ++ int offset = oop_slot*VMRegImpl::stack_slot_size; ++ Label skip; ++ __ st_d( rOop , SP, offset ); ++ map->set_oop(VMRegImpl::stack2reg(oop_slot)); ++ __ xorr( rHandle, rHandle, rHandle); ++ __ beq(rOop, R0, skip); ++ __ lea(rHandle, Address(SP, offset)); ++ __ bind(skip); ++ // Store the handle parameter ++ if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first())); ++ else __ move((dst.first())->as_Register(), rHandle); ++ ++ if (is_receiver) { ++ *receiver_offset = offset; ++ } ++ } ++} ++ ++// A float arg may have to do float reg int reg conversion ++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); ++ if (src.first()->is_stack()) { ++ // stack to stack/reg ++ if (dst.first()->is_stack()) { ++ __ ld_w(AT, FP, reg2offset_in(src.first())); ++ __ st_w(AT, SP, reg2offset_out(dst.first())); ++ } else if (dst.first()->is_FloatRegister()) { ++ __ fld_s(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); ++ } else { ++ __ ld_w(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else { ++ // reg to stack/reg ++ if(dst.first()->is_stack()) { ++ __ fst_s(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); ++ } else if (dst.first()->is_FloatRegister()) { ++ __ fmov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } else { ++ __ movfr2gr_s(dst.first()->as_Register(), src.first()->as_FloatRegister()); ++ } ++ } ++} ++ ++// A long move ++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ ++ // The only legal possibility for a long_move VMRegPair is: ++ // 1: two stack slots (possibly unaligned) ++ // as neither the java or C calling convention will use registers ++ // for longs. ++ if (src.first()->is_stack()) { ++ assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack"); ++ if( dst.first()->is_stack()){ ++ __ ld_d(AT, FP, reg2offset_in(src.first())); ++ __ st_d(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else { ++ if( dst.first()->is_stack()){ ++ __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first())); ++ } else { ++ __ move(dst.first()->as_Register(), src.first()->as_Register()); ++ } ++ } ++} ++ ++// A double move ++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ ++ // The only legal possibilities for a double_move VMRegPair are: ++ // The painful thing here is that like long_move a VMRegPair might be ++ ++ // Because of the calling convention we know that src is either ++ // 1: a single physical register (xmm registers only) ++ // 2: two stack slots (possibly unaligned) ++ // dst can only be a pair of stack slots. ++ ++ if (src.first()->is_stack()) { ++ // source is all stack ++ if( dst.first()->is_stack()){ ++ __ ld_d(AT, FP, reg2offset_in(src.first())); ++ __ st_d(AT, SP, reg2offset_out(dst.first())); ++ } else if (dst.first()->is_FloatRegister()) { ++ __ fld_d(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); ++ } else { ++ __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else { ++ // reg to stack/reg ++ // No worries about stack alignment ++ if( dst.first()->is_stack()){ ++ __ fst_d(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); ++ } else if (dst.first()->is_FloatRegister()) { ++ __ fmov_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } else { ++ __ movfr2gr_d(dst.first()->as_Register(), src.first()->as_FloatRegister()); ++ } ++ } ++} ++ ++static void verify_oop_args(MacroAssembler* masm, ++ methodHandle method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ Register temp_reg = T4; // not part of any compiled calling seq ++ if (VerifyOops) { ++ for (int i = 0; i < method->size_of_parameters(); i++) { ++ if (sig_bt[i] == T_OBJECT || ++ sig_bt[i] == T_ARRAY) { ++ VMReg r = regs[i].first(); ++ assert(r->is_valid(), "bad oop arg"); ++ if (r->is_stack()) { ++ __ ld_d(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); ++ __ verify_oop(temp_reg); ++ } else { ++ __ verify_oop(r->as_Register()); ++ } ++ } ++ } ++ } ++} ++ ++static void gen_special_dispatch(MacroAssembler* masm, ++ methodHandle method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ verify_oop_args(masm, method, sig_bt, regs); ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ ++ // Now write the args into the outgoing interpreter space ++ bool has_receiver = false; ++ Register receiver_reg = noreg; ++ int member_arg_pos = -1; ++ Register member_reg = noreg; ++ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); ++ if (ref_kind != 0) { ++ member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument ++ member_reg = S3; // known to be free at this point ++ has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); ++ } else if (iid == vmIntrinsics::_invokeBasic) { ++ has_receiver = true; ++ } else { ++ fatal("unexpected intrinsic id %d", iid); ++ } ++ ++ if (member_reg != noreg) { ++ // Load the member_arg into register, if necessary. ++ SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); ++ VMReg r = regs[member_arg_pos].first(); ++ if (r->is_stack()) { ++ __ ld_d(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ member_reg = r->as_Register(); ++ } ++ } ++ ++ if (has_receiver) { ++ // Make sure the receiver is loaded into a register. ++ assert(method->size_of_parameters() > 0, "oob"); ++ assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); ++ VMReg r = regs[0].first(); ++ assert(r->is_valid(), "bad receiver arg"); ++ if (r->is_stack()) { ++ // Porting note: This assumes that compiled calling conventions always ++ // pass the receiver oop in a register. If this is not true on some ++ // platform, pick a temp and load the receiver from stack. ++ fatal("receiver always in a register"); ++ receiver_reg = SSR; // known to be free at this point ++ __ ld_d(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ receiver_reg = r->as_Register(); ++ } ++ } ++ ++ // Figure out which address we are really jumping to: ++ MethodHandles::generate_method_handle_dispatch(masm, iid, ++ receiver_reg, member_reg, /*for_compiler_entry:*/ true); ++} ++ ++// --------------------------------------------------------------------------- ++// Generate a native wrapper for a given method. The method takes arguments ++// in the Java compiled code convention, marshals them to the native ++// convention (handlizes oops, etc), transitions to native, makes the call, ++// returns to java state (possibly blocking), unhandlizes any result and ++// returns. ++nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, ++ const methodHandle& method, ++ int compile_id, ++ BasicType* in_sig_bt, ++ VMRegPair* in_regs, ++ BasicType ret_type, ++ address critical_entry) { ++ if (method->is_method_handle_intrinsic()) { ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ intptr_t start = (intptr_t)__ pc(); ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ gen_special_dispatch(masm, ++ method, ++ in_sig_bt, ++ in_regs); ++ assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord, ++ "valid size for make_non_entrant"); ++ int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period ++ __ flush(); ++ int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually ++ return nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ in_ByteSize(-1), ++ in_ByteSize(-1), ++ (OopMapSet*)NULL); ++ } ++ ++ bool is_critical_native = true; ++ address native_func = critical_entry; ++ if (native_func == NULL) { ++ native_func = method->native_function(); ++ is_critical_native = false; ++ } ++ assert(native_func != NULL, "must have function"); ++ ++ // Native nmethod wrappers never take possesion of the oop arguments. ++ // So the caller will gc the arguments. The only thing we need an ++ // oopMap for is if the call is static ++ // ++ // An OopMap for lock (and class if static), and one for the VM call itself ++ OopMapSet *oop_maps = new OopMapSet(); ++ ++ // We have received a description of where all the java arg are located ++ // on entry to the wrapper. We need to convert these args to where ++ // the jni function will expect them. To figure out where they go ++ // we convert the java signature to a C signature by inserting ++ // the hidden arguments as arg[0] and possibly arg[1] (static method) ++ ++ const int total_in_args = method->size_of_parameters(); ++ int total_c_args = total_in_args; ++ if (!is_critical_native) { ++ total_c_args += 1; ++ if (method->is_static()) { ++ total_c_args++; ++ } ++ } else { ++ for (int i = 0; i < total_in_args; i++) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ total_c_args++; ++ } ++ } ++ } ++ ++ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); ++ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); ++ BasicType* in_elem_bt = NULL; ++ ++ int argc = 0; ++ if (!is_critical_native) { ++ out_sig_bt[argc++] = T_ADDRESS; ++ if (method->is_static()) { ++ out_sig_bt[argc++] = T_OBJECT; ++ } ++ ++ for (int i = 0; i < total_in_args ; i++ ) { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ } ++ } else { ++ Thread* THREAD = Thread::current(); ++ in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); ++ SignatureStream ss(method->signature()); ++ for (int i = 0; i < total_in_args ; i++ ) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ // Arrays are passed as int, elem* pair ++ out_sig_bt[argc++] = T_INT; ++ out_sig_bt[argc++] = T_ADDRESS; ++ Symbol* atype = ss.as_symbol(CHECK_NULL); ++ const char* at = atype->as_C_string(); ++ if (strlen(at) == 2) { ++ assert(at[0] == '[', "must be"); ++ switch (at[1]) { ++ case 'B': in_elem_bt[i] = T_BYTE; break; ++ case 'C': in_elem_bt[i] = T_CHAR; break; ++ case 'D': in_elem_bt[i] = T_DOUBLE; break; ++ case 'F': in_elem_bt[i] = T_FLOAT; break; ++ case 'I': in_elem_bt[i] = T_INT; break; ++ case 'J': in_elem_bt[i] = T_LONG; break; ++ case 'S': in_elem_bt[i] = T_SHORT; break; ++ case 'Z': in_elem_bt[i] = T_BOOLEAN; break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ } else { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ in_elem_bt[i] = T_VOID; ++ } ++ if (in_sig_bt[i] != T_VOID) { ++ assert(in_sig_bt[i] == ss.type(), "must match"); ++ ss.next(); ++ } ++ } ++ } ++ ++ // Now figure out where the args must be stored and how much stack space ++ // they require (neglecting out_preserve_stack_slots but space for storing ++ // the 1st six register arguments). It's weird see int_stk_helper. ++ // ++ int out_arg_slots; ++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); ++ ++ // Compute framesize for the wrapper. We need to handlize all oops in ++ // registers. We must create space for them here that is disjoint from ++ // the windowed save area because we have no control over when we might ++ // flush the window again and overwrite values that gc has since modified. ++ // (The live window race) ++ // ++ // We always just allocate 6 word for storing down these object. This allow ++ // us to simply record the base and use the Ireg number to decide which ++ // slot to use. (Note that the reg number is the inbound number not the ++ // outbound number). ++ // We must shuffle args to match the native convention, and include var-args space. ++ ++ // Calculate the total number of stack slots we will need. ++ ++ // First count the abi requirement plus all of the outgoing args ++ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; ++ ++ // Now the space for the inbound oop handle area ++ int total_save_slots = 9 * VMRegImpl::slots_per_word; // 9 arguments passed in registers ++ if (is_critical_native) { ++ // Critical natives may have to call out so they need a save area ++ // for register arguments. ++ int double_slots = 0; ++ int single_slots = 0; ++ for ( int i = 0; i < total_in_args; i++) { ++ if (in_regs[i].first()->is_Register()) { ++ const Register reg = in_regs[i].first()->as_Register(); ++ switch (in_sig_bt[i]) { ++ case T_BOOLEAN: ++ case T_BYTE: ++ case T_SHORT: ++ case T_CHAR: ++ case T_INT: single_slots++; break; ++ case T_ARRAY: ++ case T_LONG: double_slots++; break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ switch (in_sig_bt[i]) { ++ case T_FLOAT: single_slots++; break; ++ case T_DOUBLE: double_slots++; break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ } ++ total_save_slots = double_slots * 2 + single_slots; ++ // align the save area ++ if (double_slots != 0) { ++ stack_slots = round_to(stack_slots, 2); ++ } ++ } ++ ++ int oop_handle_offset = stack_slots; ++ stack_slots += total_save_slots; ++ ++ // Now any space we need for handlizing a klass if static method ++ ++ int klass_slot_offset = 0; ++ int klass_offset = -1; ++ int lock_slot_offset = 0; ++ bool is_static = false; ++ ++ if (method->is_static()) { ++ klass_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; ++ is_static = true; ++ } ++ ++ // Plus a lock if needed ++ ++ if (method->is_synchronized()) { ++ lock_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ } ++ ++ // Now a place to save return value or as a temporary for any gpr -> fpr moves ++ // + 2 for return address (which we own) and saved fp ++ stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7) ++ ++ // Ok The space we have allocated will look like: ++ // ++ // ++ // FP-> | | ++ // |---------------------| ++ // | 2 slots for moves | ++ // |---------------------| ++ // | lock box (if sync) | ++ // |---------------------| <- lock_slot_offset ++ // | klass (if static) | ++ // |---------------------| <- klass_slot_offset ++ // | oopHandle area | ++ // |---------------------| <- oop_handle_offset ++ // | outbound memory | ++ // | based arguments | ++ // | | ++ // |---------------------| ++ // | vararg area | ++ // |---------------------| ++ // | | ++ // SP-> | out_preserved_slots | ++ // ++ // ++ ++ ++ // Now compute actual number of stack words we need rounding to make ++ // stack properly aligned. ++ stack_slots = round_to(stack_slots, StackAlignmentInSlots); ++ ++ int stack_size = stack_slots * VMRegImpl::stack_slot_size; ++ ++ intptr_t start = (intptr_t)__ pc(); ++ ++ ++ ++ // First thing make an ic check to see if we should even be here ++ address ic_miss = SharedRuntime::get_ic_miss_stub(); ++ ++ // We are free to use all registers as temps without saving them and ++ // restoring them except fp. fp is the only callee save register ++ // as far as the interpreter and the compiler(s) are concerned. ++ ++ //refer to register_loongarch.hpp:IC_Klass ++ const Register ic_reg = T1; ++ const Register receiver = T0; ++ ++ Label hit; ++ Label exception_pending; ++ ++ __ verify_oop(receiver); ++ //add for compressedoops ++ __ load_klass(T4, receiver); ++ __ beq(T4, ic_reg, hit); ++ __ jmp(ic_miss, relocInfo::runtime_call_type); ++ __ bind(hit); ++ ++ int vep_offset = ((intptr_t)__ pc()) - start; ++#ifdef COMPILER1 ++ if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) { ++ // Object.hashCode can pull the hashCode from the header word ++ // instead of doing a full VM transition once it's been computed. ++ // Since hashCode is usually polymorphic at call sites we can't do ++ // this optimization at the call site without a lot of work. ++ Label slowCase; ++ Register receiver = T0; ++ Register result = V0; ++ __ ld_d ( result, receiver, oopDesc::mark_offset_in_bytes()); ++ // check if locked ++ __ andi(AT, result, markOopDesc::unlocked_value); ++ __ beq(AT, R0, slowCase); ++ if (UseBiasedLocking) { ++ // Check if biased and fall through to runtime if so ++ __ andi (AT, result, markOopDesc::biased_lock_bit_in_place); ++ __ bne(AT, R0, slowCase); ++ } ++ // get hash ++ __ li(AT, markOopDesc::hash_mask_in_place); ++ __ andr (AT, result, AT); ++ // test if hashCode exists ++ __ beq (AT, R0, slowCase); ++ __ shr(result, markOopDesc::hash_shift); ++ __ jr(RA); ++ __ bind (slowCase); ++ } ++#endif // COMPILER1 ++ ++ // Generate stack overflow check ++ if (UseStackBanging) { ++ __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size()); ++ } ++ ++ // The instruction at the verified entry point must be 4 bytes or longer ++ // because it can be patched on the fly by make_non_entrant. ++ if (((intptr_t)__ pc() - start - vep_offset) < 1 * BytesPerInstWord) { ++ __ nop(); ++ } ++ ++ // Generate a new frame for the wrapper. ++ // do LA need this ? ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ ++ __ enter(); ++ // -2 because return address is already present and so is saved fp ++ __ addi_d(SP, SP, -1 * (stack_size - 2*wordSize)); ++ ++ // Frame is now completed as far a size and linkage. ++ ++ int frame_complete = ((intptr_t)__ pc()) - start; ++ ++ // Calculate the difference between sp and fp. We need to know it ++ // after the native call because on windows Java Natives will pop ++ // the arguments and it is painful to do sp relative addressing ++ // in a platform independent way. So after the call we switch to ++ // fp relative addressing. ++ //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change ++ //the SP ++ int fp_adjustment = stack_size - 2*wordSize; ++ ++#ifdef COMPILER2 ++ // C2 may leave the stack dirty if not in SSE2+ mode ++ __ empty_FPU_stack(); ++#endif ++ ++ // Compute the fp offset for any slots used after the jni call ++ ++ int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; ++ // We use TREG as a thread pointer because it is callee save and ++ // if we load it once it is usable thru the entire wrapper ++ const Register thread = TREG; ++ ++ // We use S4 as the oop handle for the receiver/klass ++ // It is callee save so it survives the call to native ++ ++ const Register oop_handle_reg = S4; ++ if (is_critical_native) { ++ Unimplemented(); ++ // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, ++ // oop_handle_offset, oop_maps, in_regs, in_sig_bt); ++ } ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ // ++ // We immediately shuffle the arguments so that any vm call we have to ++ // make from here on out (sync slow path, jvmpi, etc.) we will have ++ // captured the oops from our caller and have a valid oopMap for ++ // them. ++ ++ // ----------------- ++ // The Grand Shuffle ++ // ++ // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* ++ // and, if static, the class mirror instead of a receiver. This pretty much ++ // guarantees that register layout will not match (and LA doesn't use reg ++ // parms though amd does). Since the native abi doesn't use register args ++ // and the java conventions does we don't have to worry about collisions. ++ // All of our moved are reg->stack or stack->stack. ++ // We ignore the extra arguments during the shuffle and handle them at the ++ // last moment. The shuffle is described by the two calling convention ++ // vectors we have in our possession. We simply walk the java vector to ++ // get the source locations and the c vector to get the destinations. ++ ++ int c_arg = method->is_static() ? 2 : 1 ; ++ ++ // Record sp-based slot for receiver on stack for non-static methods ++ int receiver_offset = -1; ++ ++ // This is a trick. We double the stack slots so we can claim ++ // the oops in the caller's frame. Since we are sure to have ++ // more args than the caller doubling is enough to make ++ // sure we can capture all the incoming oop args from the ++ // caller. ++ // ++ OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); ++ ++ // Mark location of fp (someday) ++ // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp)); ++ ++#ifdef ASSERT ++ bool reg_destroyed[RegisterImpl::number_of_registers]; ++ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; ++ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { ++ reg_destroyed[r] = false; ++ } ++ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { ++ freg_destroyed[f] = false; ++ } ++ ++#endif /* ASSERT */ ++ ++ // This may iterate in two different directions depending on the ++ // kind of native it is. The reason is that for regular JNI natives ++ // the incoming and outgoing registers are offset upwards and for ++ // critical natives they are offset down. ++ GrowableArray arg_order(2 * total_in_args); ++ VMRegPair tmp_vmreg; ++ tmp_vmreg.set2(T8->as_VMReg()); ++ ++ if (!is_critical_native) { ++ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { ++ arg_order.push(i); ++ arg_order.push(c_arg); ++ } ++ } else { ++ // Compute a valid move order, using tmp_vmreg to break any cycles ++ Unimplemented(); ++ // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); ++ } ++ ++ int temploc = -1; ++ for (int ai = 0; ai < arg_order.length(); ai += 2) { ++ int i = arg_order.at(ai); ++ int c_arg = arg_order.at(ai + 1); ++ __ block_comment(err_msg("move %d -> %d", i, c_arg)); ++ if (c_arg == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // This arg needs to be moved to a temporary ++ __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); ++ in_regs[i] = tmp_vmreg; ++ temploc = i; ++ continue; ++ } else if (i == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // Read from the temporary location ++ assert(temploc != -1, "must be valid"); ++ i = temploc; ++ temploc = -1; ++ } ++#ifdef ASSERT ++ if (in_regs[i].first()->is_Register()) { ++ assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); ++ } ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif /* ASSERT */ ++ switch (in_sig_bt[i]) { ++ case T_ARRAY: ++ if (is_critical_native) { ++ Unimplemented(); ++ // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); ++ c_arg++; ++#ifdef ASSERT ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif ++ break; ++ } ++ case T_OBJECT: ++ assert(!is_critical_native, "no oop arguments"); ++ object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], ++ ((i == 0) && (!is_static)), ++ &receiver_offset); ++ break; ++ case T_VOID: ++ break; ++ ++ case T_FLOAT: ++ float_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_DOUBLE: ++ assert( i + 1 < total_in_args && ++ in_sig_bt[i + 1] == T_VOID && ++ out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); ++ double_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_LONG : ++ long_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); ++ ++ default: ++ simple_move32(masm, in_regs[i], out_regs[c_arg]); ++ } ++ } ++ ++ // point c_arg at the first arg that is already loaded in case we ++ // need to spill before we call out ++ c_arg = total_c_args - total_in_args; ++ // Pre-load a static method's oop. Used both by locking code and ++ // the normal JNI call code. ++ ++ __ move(oop_handle_reg, A1); ++ ++ if (method->is_static() && !is_critical_native) { ++ ++ // load opp into a register ++ int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local( ++ (method->method_holder())->java_mirror())); ++ ++ ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ __ relocate(rspec); ++ __ patchable_li52(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror())); ++ // Now handlize the static class mirror it's known not-null. ++ __ st_d( oop_handle_reg, SP, klass_offset); ++ map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); ++ ++ // Now get the handle ++ __ lea(oop_handle_reg, Address(SP, klass_offset)); ++ // store the klass handle as second argument ++ __ move(A1, oop_handle_reg); ++ // and protect the arg if we must spill ++ c_arg--; ++ } ++ ++ // Change state to native (we save the return address in the thread, since it might not ++ // be pushed on the stack when we do a a stack traversal). It is enough that the pc() ++ // points into the right code segment. It does not have to be the correct return pc. ++ // We use the same pc/oopMap repeatedly when we call out ++ ++ Label native_return; ++ __ set_last_Java_frame(SP, noreg, native_return); ++ ++ // We have all of the arguments setup at this point. We must not touch any register ++ // argument registers at this point (what if we save/restore them there are no oop? ++ { ++ SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); ++ save_args(masm, total_c_args, c_arg, out_regs); ++ int metadata_index = __ oop_recorder()->find_index(method()); ++ RelocationHolder rspec = metadata_Relocation::spec(metadata_index); ++ __ relocate(rspec); ++ __ patchable_li52(AT, (long)(method())); ++ ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ thread, AT); ++ ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ } ++ ++ // These are register definitions we need for locking/unlocking ++ const Register swap_reg = T8; // Must use T8 for cmpxchg instruction ++ const Register obj_reg = T4; // Will contain the oop ++ //const Register lock_reg = T6; // Address of compiler lock object (BasicLock) ++ const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock) ++ ++ ++ ++ Label slow_path_lock; ++ Label lock_done; ++ ++ // Lock a synchronized method ++ if (method->is_synchronized()) { ++ assert(!is_critical_native, "unhandled"); ++ ++ const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); ++ ++ // Get the handle (the 2nd argument) ++ __ move(oop_handle_reg, A1); ++ ++ // Get address of the box ++ __ lea(lock_reg, Address(FP, lock_slot_fp_offset)); ++ ++ // Load the oop from the handle ++ __ ld_d(obj_reg, oop_handle_reg, 0); ++ ++ if (UseBiasedLocking) { ++ // Note that oop_handle_reg is trashed during this call ++ __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock); ++ } ++ ++ // Load immediate 1 into swap_reg %T8 ++ __ li(swap_reg, 1); ++ ++ __ ld_d(AT, obj_reg, 0); ++ __ orr(swap_reg, swap_reg, AT); ++ ++ __ st_d(swap_reg, lock_reg, mark_word_offset); ++ __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done); ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) sp <= mark < mark + os::pagesize() ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg ++ ++ __ sub_d(swap_reg, swap_reg, SP); ++ __ li(AT, 3 - os::vm_page_size()); ++ __ andr(swap_reg , swap_reg, AT); ++ // Save the test result, for recursive case, the result is zero ++ __ st_d(swap_reg, lock_reg, mark_word_offset); ++ __ bne(swap_reg, R0, slow_path_lock); ++ // Slow path will re-enter here ++ __ bind(lock_done); ++ ++ if (UseBiasedLocking) { ++ // Re-fetch oop_handle_reg as we trashed it above ++ __ move(A1, oop_handle_reg); ++ } ++ } ++ ++ ++ // Finally just about ready to make the JNI call ++ ++ ++ // get JNIEnv* which is first argument to native ++ if (!is_critical_native) { ++ __ addi_d(A0, thread, in_bytes(JavaThread::jni_environment_offset())); ++ } ++ ++ // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob) ++ // Load the second arguments into A1 ++ //__ ld(A1, SP , wordSize ); // klass ++ ++ // Now set thread in native ++ __ addi_d(AT, R0, _thread_in_native); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release ++ } ++ __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ // do the call ++ __ call(native_func, relocInfo::runtime_call_type); ++ __ bind(native_return); ++ ++ oop_maps->add_gc_map(((intptr_t)__ pc()) - start, map); ++ ++ // WARNING - on Windows Java Natives use pascal calling convention and pop the ++ // arguments off of the stack. We could just re-adjust the stack pointer here ++ // and continue to do SP relative addressing but we instead switch to FP ++ // relative addressing. ++ ++ // Unpack native results. ++ switch (ret_type) { ++ case T_BOOLEAN: __ c2bool(V0); break; ++ case T_CHAR : __ bstrpick_d(V0, V0, 15, 0); break; ++ case T_BYTE : __ sign_extend_byte (V0); break; ++ case T_SHORT : __ sign_extend_short(V0); break; ++ case T_INT : // nothing to do break; ++ case T_DOUBLE : ++ case T_FLOAT : ++ // Result is in st0 we'll save as needed ++ break; ++ case T_ARRAY: // Really a handle ++ case T_OBJECT: // Really a handle ++ break; // can't de-handlize until after safepoint check ++ case T_VOID: break; ++ case T_LONG: break; ++ default : ShouldNotReachHere(); ++ } ++ // Switch thread to "native transition" state before reading the synchronization state. ++ // This additional state is necessary because reading and testing the synchronization ++ // state is not atomic w.r.t. GC, as this scenario demonstrates: ++ // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. ++ // VM thread changes sync state to synchronizing and suspends threads for GC. ++ // Thread A is resumed to finish this native method, but doesn't block here since it ++ // didn't see any synchronization is progress, and escapes. ++ __ addi_d(AT, R0, _thread_in_native_trans); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release ++ } ++ __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ if(os::is_MP()) { ++ if (UseMembar) { ++ // Force this write out before the read below ++ __ membar(__ AnyAny); ++ } else { ++ // Write serialization page so VM thread can do a pseudo remote membar. ++ // We use the current thread pointer to calculate a thread specific ++ // offset to write to within the page. This minimizes bus traffic ++ // due to cache line collision. ++ __ serialize_memory(thread, T5); ++ } ++ } ++ ++ Label after_transition; ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { ++ Label Continue; ++ Label slow_path; ++ ++ __ safepoint_poll_acquire(slow_path, thread); ++ __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, Continue); ++ __ bind(slow_path); ++ ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // ++ save_native_result(masm, ret_type, stack_slots); ++ __ move(A0, thread); ++ __ addi_d(SP, SP, -wordSize); ++ __ push(S2); ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ if (!is_critical_native) { ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type); ++ } else { ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type); ++ } ++ __ move(SP, S2); // use S2 as a sender SP holder ++ __ pop(S2); ++ __ addi_d(SP, SP, wordSize); ++ // Restore any method result value ++ restore_native_result(masm, ret_type, stack_slots); ++ ++ if (is_critical_native) { ++ // The call above performed the transition to thread_in_Java so ++ // skip the transition logic below. ++ __ beq(R0, R0, after_transition); ++ } ++ ++ __ bind(Continue); ++ } ++ ++ // change thread state ++ __ addi_d(AT, R0, _thread_in_Java); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release ++ } ++ __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ bind(after_transition); ++ Label reguard; ++ Label reguard_done; ++ __ ld_w(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); ++ __ addi_d(AT, AT, -JavaThread::stack_guard_yellow_reserved_disabled); ++ __ beq(AT, R0, reguard); ++ // slow path reguard re-enters here ++ __ bind(reguard_done); ++ ++ // Handle possible exception (will unlock if necessary) ++ ++ // native result if any is live ++ ++ // Unlock ++ Label slow_path_unlock; ++ Label unlock_done; ++ if (method->is_synchronized()) { ++ ++ Label done; ++ ++ // Get locked oop from the handle we passed to jni ++ __ ld_d( obj_reg, oop_handle_reg, 0); ++ if (UseBiasedLocking) { ++ __ biased_locking_exit(obj_reg, T8, done); ++ ++ } ++ ++ // Simple recursive lock? ++ ++ __ ld_d(AT, FP, lock_slot_fp_offset); ++ __ beq(AT, R0, done); ++ // Must save FSF if if it is live now because cmpxchg must use it ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ ++ // get old displaced header ++ __ ld_d (T8, FP, lock_slot_fp_offset); ++ // get address of the stack lock ++ __ addi_d (c_rarg0, FP, lock_slot_fp_offset); ++ // Atomic swap old header if oop still contains the stack lock ++ __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock); ++ ++ // slow path re-enters here ++ __ bind(unlock_done); ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ ++ __ bind(done); ++ ++ } ++ { ++ SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); ++ // Tell dtrace about this method exit ++ save_native_result(masm, ret_type, stack_slots); ++ int metadata_index = __ oop_recorder()->find_index( (method())); ++ RelocationHolder rspec = metadata_Relocation::spec(metadata_index); ++ __ relocate(rspec); ++ __ patchable_li52(AT, (long)(method())); ++ ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ thread, AT); ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ ++ // We can finally stop using that last_Java_frame we setup ages ago ++ ++ __ reset_last_Java_frame(false); ++ ++ // Unpack oop result, e.g. JNIHandles::resolve value. ++ if (ret_type == T_OBJECT || ret_type == T_ARRAY) { ++ __ resolve_jobject(V0, thread, T4); ++ } ++ ++ if (CheckJNICalls) { ++ // clear_pending_jni_exception_check ++ __ st_d(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset())); ++ } ++ ++ if (!is_critical_native) { ++ // reset handle block ++ __ ld_d(AT, thread, in_bytes(JavaThread::active_handles_offset())); ++ __ st_w(R0, AT, JNIHandleBlock::top_offset_in_bytes()); ++ } ++ ++ if (!is_critical_native) { ++ // Any exception pending? ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, exception_pending); ++ } ++ // no exception, we're almost done ++ ++ // check that only result value is on FPU stack ++ __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit"); ++ ++ // Return ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ leave(); ++ ++ __ jr(RA); ++ // Unexpected paths are out of line and go here ++ // Slow path locking & unlocking ++ if (method->is_synchronized()) { ++ ++ // BEGIN Slow path lock ++ __ bind(slow_path_lock); ++ ++ // protect the args we've loaded ++ save_args(masm, total_c_args, c_arg, out_regs); ++ ++ // has last_Java_frame setup. No exceptions so do vanilla call not call_VM ++ // args are (oop obj, BasicLock* lock, JavaThread* thread) ++ ++ __ move(A0, obj_reg); ++ __ move(A1, lock_reg); ++ __ move(A2, thread); ++ __ addi_d(SP, SP, - 3*wordSize); ++ ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); ++ __ move(SP, S2); ++ __ addi_d(SP, SP, 3*wordSize); ++ ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ stop("no pending exception allowed on exit from monitorenter"); ++ __ bind(L); ++ } ++#endif ++ __ b(lock_done); ++ // END Slow path lock ++ ++ // BEGIN Slow path unlock ++ __ bind(slow_path_unlock); ++ ++ // Slow path unlock ++ ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ // Save pending exception around call to VM (which contains an EXCEPTION_MARK) ++ ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ push(AT); ++ __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset())); ++ ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ ++ // should be a peal ++ // +wordSize because of the push above ++ __ addi_d(A1, FP, lock_slot_fp_offset); ++ ++ __ move(A0, obj_reg); ++ __ move(A2, thread); ++ __ addi_d(SP, SP, -2*wordSize); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), ++ relocInfo::runtime_call_type); ++ __ addi_d(SP, SP, 2*wordSize); ++ __ move(SP, S2); ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_d( AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); ++ __ bind(L); ++ } ++#endif /* ASSERT */ ++ ++ __ pop(AT); ++ __ st_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ __ b(unlock_done); ++ // END Slow path unlock ++ ++ } ++ ++ // SLOW PATH Reguard the stack if needed ++ ++ __ bind(reguard); ++ save_native_result(masm, ret_type, stack_slots); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), ++ relocInfo::runtime_call_type); ++ restore_native_result(masm, ret_type, stack_slots); ++ __ b(reguard_done); ++ ++ // BEGIN EXCEPTION PROCESSING ++ if (!is_critical_native) { ++ // Forward the exception ++ __ bind(exception_pending); ++ ++ // remove possible return value from FPU register stack ++ __ empty_FPU_stack(); ++ ++ // pop our frame ++ //forward_exception_entry need return address on stack ++ __ move(SP, FP); ++ __ pop(FP); ++ ++ // and forward the exception ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ } ++ __ flush(); ++ ++ nmethod *nm = nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), ++ in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), ++ oop_maps); ++ ++ if (is_critical_native) { ++ nm->set_lazy_critical_native(true); ++ } ++ return nm; ++} ++ ++#ifdef HAVE_DTRACE_H ++// --------------------------------------------------------------------------- ++// Generate a dtrace nmethod for a given signature. The method takes arguments ++// in the Java compiled code convention, marshals them to the native ++// abi and then leaves nops at the position you would expect to call a native ++// function. When the probe is enabled the nops are replaced with a trap ++// instruction that dtrace inserts and the trace will cause a notification ++// to dtrace. ++// ++// The probes are only able to take primitive types and java/lang/String as ++// arguments. No other java types are allowed. Strings are converted to utf8 ++// strings so that from dtrace point of view java strings are converted to C ++// strings. There is an arbitrary fixed limit on the total space that a method ++// can use for converting the strings. (256 chars per string in the signature). ++// So any java string larger then this is truncated. ++ ++static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 }; ++static bool offsets_initialized = false; ++ ++static VMRegPair reg64_to_VMRegPair(Register r) { ++ VMRegPair ret; ++ if (wordSize == 8) { ++ ret.set2(r->as_VMReg()); ++ } else { ++ ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg()); ++ } ++ return ret; ++} ++ ++ ++nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm, ++ methodHandle method) { ++ ++ ++ // generate_dtrace_nmethod is guarded by a mutex so we are sure to ++ // be single threaded in this method. ++ assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be"); ++ ++ // Fill in the signature array, for the calling-convention call. ++ int total_args_passed = method->size_of_parameters(); ++ ++ BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); ++ VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); ++ ++ // The signature we are going to use for the trap that dtrace will see ++ // java/lang/String is converted. We drop "this" and any other object ++ // is converted to NULL. (A one-slot java/lang/Long object reference ++ // is converted to a two-slot long, which is why we double the allocation). ++ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2); ++ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2); ++ ++ int i=0; ++ int total_strings = 0; ++ int first_arg_to_pass = 0; ++ int total_c_args = 0; ++ ++ // Skip the receiver as dtrace doesn't want to see it ++ if( !method->is_static() ) { ++ in_sig_bt[i++] = T_OBJECT; ++ first_arg_to_pass = 1; ++ } ++ ++ SignatureStream ss(method->signature()); ++ for ( ; !ss.at_return_type(); ss.next()) { ++ BasicType bt = ss.type(); ++ in_sig_bt[i++] = bt; // Collect remaining bits of signature ++ out_sig_bt[total_c_args++] = bt; ++ if( bt == T_OBJECT) { ++ symbolOop s = ss.as_symbol_or_null(); ++ if (s == vmSymbols::java_lang_String()) { ++ total_strings++; ++ out_sig_bt[total_c_args-1] = T_ADDRESS; ++ } else if (s == vmSymbols::java_lang_Boolean() || ++ s == vmSymbols::java_lang_Byte()) { ++ out_sig_bt[total_c_args-1] = T_BYTE; ++ } else if (s == vmSymbols::java_lang_Character() || ++ s == vmSymbols::java_lang_Short()) { ++ out_sig_bt[total_c_args-1] = T_SHORT; ++ } else if (s == vmSymbols::java_lang_Integer() || ++ s == vmSymbols::java_lang_Float()) { ++ out_sig_bt[total_c_args-1] = T_INT; ++ } else if (s == vmSymbols::java_lang_Long() || ++ s == vmSymbols::java_lang_Double()) { ++ out_sig_bt[total_c_args-1] = T_LONG; ++ out_sig_bt[total_c_args++] = T_VOID; ++ } ++ } else if ( bt == T_LONG || bt == T_DOUBLE ) { ++ in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots ++ // We convert double to long ++ out_sig_bt[total_c_args-1] = T_LONG; ++ out_sig_bt[total_c_args++] = T_VOID; ++ } else if ( bt == T_FLOAT) { ++ // We convert float to int ++ out_sig_bt[total_c_args-1] = T_INT; ++ } ++ } ++ ++ assert(i==total_args_passed, "validly parsed signature"); ++ ++ // Now get the compiled-Java layout as input arguments ++ int comp_args_on_stack; ++ comp_args_on_stack = SharedRuntime::java_calling_convention( ++ in_sig_bt, in_regs, total_args_passed, false); ++ ++ // We have received a description of where all the java arg are located ++ // on entry to the wrapper. We need to convert these args to where ++ // the a native (non-jni) function would expect them. To figure out ++ // where they go we convert the java signature to a C signature and remove ++ // T_VOID for any long/double we might have received. ++ ++ ++ // Now figure out where the args must be stored and how much stack space ++ // they require (neglecting out_preserve_stack_slots but space for storing ++ // the 1st six register arguments). It's weird see int_stk_helper. ++ ++ int out_arg_slots; ++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); ++ ++ // Calculate the total number of stack slots we will need. ++ ++ // First count the abi requirement plus all of the outgoing args ++ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; ++ ++ // Plus a temp for possible converion of float/double/long register args ++ ++ int conversion_temp = stack_slots; ++ stack_slots += 2; ++ ++ ++ // Now space for the string(s) we must convert ++ ++ int string_locs = stack_slots; ++ stack_slots += total_strings * ++ (max_dtrace_string_size / VMRegImpl::stack_slot_size); ++ ++ // Ok The space we have allocated will look like: ++ // ++ // ++ // FP-> | | ++ // |---------------------| ++ // | string[n] | ++ // |---------------------| <- string_locs[n] ++ // | string[n-1] | ++ // |---------------------| <- string_locs[n-1] ++ // | ... | ++ // | ... | ++ // |---------------------| <- string_locs[1] ++ // | string[0] | ++ // |---------------------| <- string_locs[0] ++ // | temp | ++ // |---------------------| <- conversion_temp ++ // | outbound memory | ++ // | based arguments | ++ // | | ++ // |---------------------| ++ // | | ++ // SP-> | out_preserved_slots | ++ // ++ // ++ ++ // Now compute actual number of stack words we need rounding to make ++ // stack properly aligned. ++ stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word); ++ ++ int stack_size = stack_slots * VMRegImpl::stack_slot_size; ++ intptr_t start = (intptr_t)__ pc(); ++ ++ // First thing make an ic check to see if we should even be here ++ ++ { ++ Label L; ++ const Register temp_reg = G3_scratch; ++ Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub()); ++ __ verify_oop(O0); ++ __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg); ++ __ cmp(temp_reg, G5_inline_cache_reg); ++ __ brx(Assembler::equal, true, Assembler::pt, L); ++ ++ __ jump_to(ic_miss, 0); ++ __ align(CodeEntryAlignment); ++ __ bind(L); ++ } ++ ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ ++ // The instruction at the verified entry point must be 4 bytes or longer ++ // because it can be patched on the fly by make_non_entrant. The stack bang ++ // instruction fits that requirement. ++ ++ // Generate stack overflow check before creating frame ++ __ generate_stack_overflow_check(stack_size); ++ ++ assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord, ++ "valid size for make_non_entrant"); ++ ++ // Generate a new frame for the wrapper. ++ __ save(SP, -stack_size, SP); ++ ++ // Frame is now completed as far a size and linkage. ++ ++ int frame_complete = ((intptr_t)__ pc()) - start; ++ ++#ifdef ASSERT ++ bool reg_destroyed[RegisterImpl::number_of_registers]; ++ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; ++ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { ++ reg_destroyed[r] = false; ++ } ++ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { ++ freg_destroyed[f] = false; ++ } ++ ++#endif /* ASSERT */ ++ ++ VMRegPair zero; ++ const Register g0 = G0; // without this we get a compiler warning (why??) ++ zero.set2(g0->as_VMReg()); ++ ++ int c_arg, j_arg; ++ ++ Register conversion_off = noreg; ++ ++ for (j_arg = first_arg_to_pass, c_arg = 0 ; ++ j_arg < total_args_passed ; j_arg++, c_arg++ ) { ++ ++ VMRegPair src = in_regs[j_arg]; ++ VMRegPair dst = out_regs[c_arg]; ++ ++#ifdef ASSERT ++ if (src.first()->is_Register()) { ++ assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!"); ++ } else if (src.first()->is_FloatRegister()) { ++ assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding( ++ FloatRegisterImpl::S)], "ack!"); ++ } ++ if (dst.first()->is_Register()) { ++ reg_destroyed[dst.first()->as_Register()->encoding()] = true; ++ } else if (dst.first()->is_FloatRegister()) { ++ freg_destroyed[dst.first()->as_FloatRegister()->encoding( ++ FloatRegisterImpl::S)] = true; ++ } ++#endif /* ASSERT */ ++ ++ switch (in_sig_bt[j_arg]) { ++ case T_ARRAY: ++ case T_OBJECT: ++ { ++ if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT || ++ out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { ++ // need to unbox a one-slot value ++ Register in_reg = L0; ++ Register tmp = L2; ++ if ( src.first()->is_reg() ) { ++ in_reg = src.first()->as_Register(); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS), ++ "must be"); ++ __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg); ++ } ++ // If the final destination is an acceptable register ++ if ( dst.first()->is_reg() ) { ++ if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) { ++ tmp = dst.first()->as_Register(); ++ } ++ } ++ ++ Label skipUnbox; ++ if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) { ++ __ mov(G0, tmp->successor()); ++ } ++ __ mov(G0, tmp); ++ __ br_null(in_reg, true, Assembler::pn, skipUnbox); ++ ++ BasicType bt = out_sig_bt[c_arg]; ++ int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt); ++ switch (bt) { ++ case T_BYTE: ++ __ ldub(in_reg, box_offset, tmp); break; ++ case T_SHORT: ++ __ lduh(in_reg, box_offset, tmp); break; ++ case T_INT: ++ __ ld(in_reg, box_offset, tmp); break; ++ case T_LONG: ++ __ ld_long(in_reg, box_offset, tmp); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ bind(skipUnbox); ++ // If tmp wasn't final destination copy to final destination ++ if (tmp == L2) { ++ VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2); ++ if (out_sig_bt[c_arg] == T_LONG) { ++ long_move(masm, tmp_as_VM, dst); ++ } else { ++ move32_64(masm, tmp_as_VM, out_regs[c_arg]); ++ } ++ } ++ if (out_sig_bt[c_arg] == T_LONG) { ++ assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); ++ ++c_arg; // move over the T_VOID to keep the loop indices in sync ++ } ++ } else if (out_sig_bt[c_arg] == T_ADDRESS) { ++ Register s = ++ src.first()->is_reg() ? src.first()->as_Register() : L2; ++ Register d = ++ dst.first()->is_reg() ? dst.first()->as_Register() : L2; ++ ++ // We store the oop now so that the conversion pass can reach ++ // while in the inner frame. This will be the only store if ++ // the oop is NULL. ++ if (s != L2) { ++ // src is register ++ if (d != L2) { ++ // dst is register ++ __ mov(s, d); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } else { ++ // src not a register ++ assert(Assembler::is_simm13(reg2offset(src.first()) + ++ STACK_BIAS), "must be"); ++ __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d); ++ if (d == L2) { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } ++ } else if (out_sig_bt[c_arg] != T_VOID) { ++ // Convert the arg to NULL ++ if (dst.first()->is_reg()) { ++ __ mov(G0, dst.first()->as_Register()); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } ++ } ++ break; ++ case T_VOID: ++ break; ++ ++ case T_FLOAT: ++ if (src.first()->is_stack()) { ++ // Stack to stack/reg is simple ++ move32_64(masm, src, dst); ++ } else { ++ if (dst.first()->is_reg()) { ++ // freg -> reg ++ int off = ++ STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ Register d = dst.first()->as_Register(); ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, off); ++ __ ld(SP, off, d); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ __ ld(SP, conversion_off , d); ++ } ++ } else { ++ // freg -> mem ++ int off = STACK_BIAS + reg2offset(dst.first()); ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, off); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ } ++ } ++ } ++ break; ++ ++ case T_DOUBLE: ++ assert( j_arg + 1 < total_args_passed && ++ in_sig_bt[j_arg + 1] == T_VOID && ++ out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); ++ if (src.first()->is_stack()) { ++ // Stack to stack/reg is simple ++ long_move(masm, src, dst); ++ } else { ++ Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2; ++ ++ // Destination could be an odd reg on 32bit in which case ++ // we can't load direct to the destination. ++ ++ if (!d->is_even() && wordSize == 4) { ++ d = L2; ++ } ++ int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), ++ SP, off); ++ __ ld_long(SP, off, d); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ __ ld_long(SP, conversion_off, d); ++ } ++ if (d == L2) { ++ long_move(masm, reg64_to_VMRegPair(L2), dst); ++ } ++ } ++ break; ++ ++ case T_LONG : ++ // 32bit can't do a split move of something like g1 -> O0, O1 ++ // so use a memory temp ++ if (src.is_single_phys_reg() && wordSize == 4) { ++ Register tmp = L2; ++ if (dst.first()->is_reg() && ++ (wordSize == 8 || dst.first()->as_Register()->is_even())) { ++ tmp = dst.first()->as_Register(); ++ } ++ ++ int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ if (Assembler::is_simm13(off)) { ++ __ stx(src.first()->as_Register(), SP, off); ++ __ ld_long(SP, off, tmp); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stx(src.first()->as_Register(), SP, conversion_off); ++ __ ld_long(SP, conversion_off, tmp); ++ } ++ ++ if (tmp == L2) { ++ long_move(masm, reg64_to_VMRegPair(L2), dst); ++ } ++ } else { ++ long_move(masm, src, dst); ++ } ++ break; ++ ++ case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); ++ ++ default: ++ move32_64(masm, src, dst); ++ } ++ } ++ ++ ++ // If we have any strings we must store any register based arg to the stack ++ // This includes any still live xmm registers too. ++ ++ if (total_strings > 0 ) { ++ ++ // protect all the arg registers ++ __ save_frame(0); ++ __ mov(G2_thread, L7_thread_cache); ++ const Register L2_string_off = L2; ++ ++ // Get first string offset ++ __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off); ++ ++ for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) { ++ if (out_sig_bt[c_arg] == T_ADDRESS) { ++ ++ VMRegPair dst = out_regs[c_arg]; ++ const Register d = dst.first()->is_reg() ? ++ dst.first()->as_Register()->after_save() : noreg; ++ ++ // It's a string the oop and it was already copied to the out arg ++ // position ++ if (d != noreg) { ++ __ mov(d, O0); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), ++ "must be"); ++ __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0); ++ } ++ Label skip; ++ ++ __ add_d(FP, L2_string_off, O1); ++ __ br_null(O0, false, Assembler::pn, skip); ++ ++ if (d != noreg) { ++ __ mov(O1, d); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), ++ "must be"); ++ __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ ++ __ addi_d(L2_string_off, max_dtrace_string_size, L2_string_off); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf), ++ relocInfo::runtime_call_type); ++ ++ __ bind(skip); ++ ++ } ++ ++ } ++ __ mov(L7_thread_cache, G2_thread); ++ __ restore(); ++ ++ } ++ ++ ++ // Ok now we are done. Need to place the nop that dtrace wants in order to ++ // patch in the trap ++ ++ int patch_offset = ((intptr_t)__ pc()) - start; ++ ++ __ nop(); ++ ++ ++ // Return ++ ++ __ restore(); ++ __ ret(); ++ ++ __ flush(); ++ nmethod *nm = nmethod::new_dtrace_nmethod( ++ method, masm->code(), vep_offset, patch_offset, frame_complete, ++ stack_slots / VMRegImpl::slots_per_word); ++ return nm; ++} ++ ++#endif // HAVE_DTRACE_H ++ ++// this function returns the adjust size (in number of words) to a c2i adapter ++// activation for use during deoptimization ++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { ++ return (callee_locals - callee_parameters) * Interpreter::stackElementWords; ++} ++ ++// "Top of Stack" slots that may be unused by the calling convention but must ++// otherwise be preserved. ++// On Intel these are not necessary and the value can be zero. ++// On Sparc this describes the words reserved for storing a register window ++// when an interrupt occurs. ++uint SharedRuntime::out_preserve_stack_slots() { ++ return 0; ++} ++ ++//------------------------------generate_deopt_blob---------------------------- ++// Ought to generate an ideal graph & compile, but here's some SPARC ASM ++// instead. ++void SharedRuntime::generate_deopt_blob() { ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ int pad = 0; ++#if INCLUDE_JVMCI ++ if (EnableJVMCI) { ++ pad += 512; // Increase the buffer size when compiling for JVMCI ++ } ++#endif ++ //CodeBuffer buffer ("deopt_blob", 4000, 2048); ++ CodeBuffer buffer ("deopt_blob", 8000+pad, 2048); // FIXME for debug ++ MacroAssembler* masm = new MacroAssembler( & buffer); ++ int frame_size_in_words; ++ OopMap* map = NULL; ++ // Account for the extra args we place on the stack ++ // by the time we call fetch_unroll_info ++ const int additional_words = 2; // deopt kind, thread ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ RegisterSaver reg_save(COMPILER2_OR_JVMCI != 0); ++ ++ address start = __ pc(); ++ Label cont; ++ // we use S3 for DeOpt reason register ++ Register reason = S3; ++ // use S6 for thread register ++ Register thread = TREG; ++ // use S7 for fetch_unroll_info returned UnrollBlock ++ Register unroll = S7; ++ // Prolog for non exception case! ++ ++ // We have been called from the deopt handler of the deoptee. ++ // ++ // deoptee: ++ // ... ++ // call X ++ // ... ++ // deopt_handler: call_deopt_stub ++ // cur. return pc --> ... ++ // ++ // So currently RA points behind the call in the deopt handler. ++ // We adjust it such that it points to the start of the deopt handler. ++ // The return_pc has been stored in the frame of the deoptee and ++ // will replace the address of the deopt_handler in the call ++ // to Deoptimization::fetch_unroll_info below. ++ ++ // HandlerImpl::size_deopt_handler() ++ __ addi_d(RA, RA, - NativeFarCall::instruction_size); ++ // Save everything in sight. ++ map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ // Normal deoptimization ++ __ li(reason, Deoptimization::Unpack_deopt); ++ __ b(cont); ++ ++ int reexecute_offset = __ pc() - start; ++#if INCLUDE_JVMCI && !defined(COMPILER1) ++ if (EnableJVMCI && UseJVMCICompiler) { ++ // JVMCI does not use this kind of deoptimization ++ __ should_not_reach_here(); ++ } ++#endif ++ ++ // Reexecute case ++ // return address is the pc describes what bci to do re-execute at ++ ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ __ li(reason, Deoptimization::Unpack_reexecute); ++ __ b(cont); ++ ++#if INCLUDE_JVMCI ++ Label after_fetch_unroll_info_call; ++ int implicit_exception_uncommon_trap_offset = 0; ++ int uncommon_trap_offset = 0; ++ ++ if (EnableJVMCI) { ++ implicit_exception_uncommon_trap_offset = __ pc() - start; ++ ++ __ ld_d(RA, Address(TREG, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); ++ __ st_d(R0, Address(TREG, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); ++ ++ uncommon_trap_offset = __ pc() - start; ++ ++ // Save everything in sight. ++ (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ __ addi_d(SP, SP, -additional_words * wordSize); ++ // fetch_unroll_info needs to call last_java_frame() ++ Label retaddr; ++ __ set_last_Java_frame(NOREG, NOREG, retaddr); ++ ++ __ ld_w(A1, Address(TREG, in_bytes(JavaThread::pending_deoptimization_offset()))); ++ __ li(AT, -1); ++ __ st_w(AT, Address(TREG, in_bytes(JavaThread::pending_deoptimization_offset()))); ++ ++ __ li(reason, (int32_t)Deoptimization::Unpack_reexecute); ++ __ move(A0, TREG); ++ __ move(A2, reason); // exec mode ++ __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ oop_maps->add_gc_map( __ pc()-start, map->deep_copy()); ++ __ addi_d(SP, SP, additional_words * wordSize); ++ ++ __ reset_last_Java_frame(false); ++ ++ __ b(after_fetch_unroll_info_call); ++ } // EnableJVMCI ++#endif // INCLUDE_JVMCI ++ ++ int exception_offset = __ pc() - start; ++ // Prolog for exception case ++ ++ // all registers are dead at this entry point, except for V0 and ++ // V1 which contain the exception oop and exception pc ++ // respectively. Set them in TLS and fall thru to the ++ // unpack_with_exception_in_tls entry point. ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ int exception_in_tls_offset = __ pc() - start; ++ // new implementation because exception oop is now passed in JavaThread ++ ++ // Prolog for exception case ++ // All registers must be preserved because they might be used by LinearScan ++ // Exceptiop oop and throwing PC are passed in JavaThread ++ // tos: stack at point of call to method that threw the exception (i.e. only ++ // args are on the stack, no return address) ++ ++ // Return address will be patched later with the throwing pc. The correct value is not ++ // available now because loading it from memory would destroy registers. ++ // Save everything in sight. ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ ++ // Now it is safe to overwrite any register ++ // store the correct deoptimization type ++ __ li(reason, Deoptimization::Unpack_exception); ++ // load throwing pc from JavaThread and patch it as the return address ++ // of the current frame. Then clear the field in JavaThread ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(V1, SP, reg_save.ra_offset()); //save ra ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); ++ ++ ++#ifdef ASSERT ++ // verify that there is really an exception oop in JavaThread ++ __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset())); ++ __ verify_oop(AT); ++ // verify that there is no pending exception ++ Label no_pending_exception; ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, no_pending_exception); ++ __ stop("must not have pending exception here"); ++ __ bind(no_pending_exception); ++#endif ++ __ bind(cont); ++ // Compiled code leaves the floating point stack dirty, empty it. ++ __ empty_FPU_stack(); ++ ++ ++ // Call C code. Need thread and this frame, but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ __ move(A0, thread); ++ __ move(A1, reason); // exec_mode ++ __ addi_d(SP, SP, -additional_words * wordSize); ++ ++ Label retaddr; ++ __ set_last_Java_frame(NOREG, NOREG, retaddr); ++ ++ // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on ++ // this call, no GC can happen. Call should capture return values. ++ ++ // TODO: confirm reloc ++ __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ oop_maps->add_gc_map(__ pc() - start, map); ++ __ addi_d(SP, SP, additional_words * wordSize); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(false); ++ ++#if INCLUDE_JVMCI ++ if (EnableJVMCI) { ++ __ bind(after_fetch_unroll_info_call); ++ } ++#endif ++ ++ // Load UnrollBlock into S7 ++ __ move(unroll, V0); ++ ++ ++ // Move the unpack kind to a safe place in the UnrollBlock because ++ // we are very short of registers ++ ++ Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); ++ __ st_w(reason, unpack_kind); ++ // save the unpack_kind value ++ // Retrieve the possible live values (return values) ++ // All callee save registers representing jvm state ++ // are now in the vframeArray. ++ ++ Label noException; ++ __ li(AT, Deoptimization::Unpack_exception); ++ __ bne(AT, reason, noException);// Was exception pending? ++ __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ ++ __ verify_oop(V0); ++ ++ // Overwrite the result registers with the exception results. ++ __ st_ptr(V0, SP, reg_save.v0_offset()); ++ __ st_ptr(V1, SP, reg_save.v1_offset()); ++ ++ __ bind(noException); ++ ++ ++ // Stack is back to only having register save data on the stack. ++ // Now restore the result registers. Everything else is either dead or captured ++ // in the vframeArray. ++ ++ reg_save.restore_result_registers(masm); ++ // All of the register save area has been popped of the stack. Only the ++ // return address remains. ++ // Pop all the frames we must move/replace. ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: caller of deopting frame (could be compiled/interpreted). ++ // ++ // Note: by leaving the return address of self-frame on the stack ++ // and using the size of frame 2 to adjust the stack ++ // when we are done the return to frame 3 will still be on the stack. ++ ++ // register for the sender's sp ++ Register sender_sp = Rsender; ++ // register for frame pcs ++ Register pcs = T0; ++ // register for frame sizes ++ Register sizes = T1; ++ // register for frame count ++ Register count = T3; ++ ++ // Pop deoptimized frame ++ __ ld_w(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); ++ __ add_d(SP, SP, AT); ++ // sp should be pointing at the return address to the caller (3) ++ ++ // Load array of frame pcs into pcs ++ __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); ++ __ addi_d(SP, SP, wordSize); // trash the old pc ++ // Load array of frame sizes into T6 ++ __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); ++ ++ ++ ++ // Load count of frams into T3 ++ __ ld_w(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); ++ // Pick up the initial fp we should save ++ __ ld_d(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. ++ __ move(sender_sp, SP); ++ __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); ++ __ sub_d(SP, SP, AT); ++ ++ Label loop; ++ __ bind(loop); ++ __ ld_d(T2, sizes, 0); // Load frame size ++ __ ld_ptr(AT, pcs, 0); // save return address ++ __ addi_d(T2, T2, -2 * wordSize); // we'll push pc and fp, by hand ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ sub_d(SP, SP, T2); // Prolog! ++ // This value is corrected by layout_activation_impl ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable ++ __ move(sender_sp, SP); // pass to next frame ++ __ addi_d(count, count, -1); // decrement counter ++ __ addi_d(sizes, sizes, wordSize); // Bump array pointer (sizes) ++ __ addi_d(pcs, pcs, wordSize); // Bump array pointer (pcs) ++ __ bne(count, R0, loop); ++ __ ld_d(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0); ++ // Re-push self-frame ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ __ addi_d(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize); ++ ++ // Restore frame locals after moving the frame ++ __ st_d(V0, SP, reg_save.v0_offset()); ++ __ st_d(V1, SP, reg_save.v1_offset()); ++ __ fst_d(F0, SP, reg_save.fpr0_offset()); ++ __ fst_d(F1, SP, reg_save.fpr1_offset()); ++ ++ // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on ++ // this call, no GC can happen. ++ __ move(A1, reason); // exec_mode ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(A0, thread); // thread ++ __ addi_d(SP, SP, (-additional_words) *wordSize); ++ ++ // set last_Java_sp, last_Java_fp ++ Label L; ++ address the_pc = __ pc(); ++ __ bind(L); ++ __ set_last_Java_frame(NOREG, FP, L); ++ ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type); ++ // Revert SP alignment after call since we're going to do some SP relative addressing below ++ __ ld_d(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // Set an oopmap for the call site ++ oop_maps->add_gc_map(the_pc - start, new OopMap(frame_size_in_words, 0)); ++ ++ __ push(V0); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(true); ++ ++ // Collect return values ++ __ ld_d(V0, SP, reg_save.v0_offset() + (additional_words + 1) * wordSize); ++ __ ld_d(V1, SP, reg_save.v1_offset() + (additional_words + 1) * wordSize); ++ // Pop float stack and store in local ++ __ fld_d(F0, SP, reg_save.fpr0_offset() + (additional_words + 1) * wordSize); ++ __ fld_d(F1, SP, reg_save.fpr1_offset() + (additional_words + 1) * wordSize); ++ ++ //FIXME, ++ // Clear floating point stack before returning to interpreter ++ __ empty_FPU_stack(); ++ //FIXME, we should consider about float and double ++ // Push a float or double return value if necessary. ++ __ leave(); ++ ++ // Jump to interpreter ++ __ jr(RA); ++ ++ masm->flush(); ++ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); ++ _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); ++#if INCLUDE_JVMCI ++ if (EnableJVMCI) { ++ _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset); ++ _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset); ++ } ++#endif ++} ++ ++#ifdef COMPILER2 ++ ++//------------------------------generate_uncommon_trap_blob-------------------- ++// Ought to generate an ideal graph & compile, but here's some SPARC ASM ++// instead. ++void SharedRuntime::generate_uncommon_trap_blob() { ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 ); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ enum frame_layout { ++ fp_off, fp_off2, ++ return_off, return_off2, ++ framesize ++ }; ++ assert(framesize % 4 == 0, "sp not 16-byte aligned"); ++ address start = __ pc(); ++ ++ // S8 be used in C2 ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++ // Push self-frame. ++ __ addi_d(SP, SP, -framesize * BytesPerInt); ++ ++ __ st_d(RA, SP, return_off * BytesPerInt); ++ __ st_d(FP, SP, fp_off * BytesPerInt); ++ ++ __ addi_d(FP, SP, fp_off * BytesPerInt); ++ ++ // Clear the floating point exception stack ++ __ empty_FPU_stack(); ++ ++ Register thread = TREG; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // set last_Java_sp ++ Label retaddr; ++ __ set_last_Java_frame(NOREG, FP, retaddr); ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // capture callee-saved registers as well as return values. ++ __ move(A0, thread); ++ // argument already in T0 ++ __ move(A1, T0); ++ __ addi_d(A2, R0, Deoptimization::Unpack_uncommon_trap); ++ __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ ++ // Set an oopmap for the call site ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap( framesize, 0 ); ++ ++ oop_maps->add_gc_map(__ pc() - start, map); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(false); ++ ++ // Load UnrollBlock into S7 ++ Register unroll = S7; ++ __ move(unroll, V0); ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld_ptr(AT, unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); ++ __ li(T4, Deoptimization::Unpack_uncommon_trap); ++ __ beq(AT, T4, L); ++ __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap"); ++ __ bind(L); ++ } ++#endif ++ ++ // Pop all the frames we must move/replace. ++ // ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: possible-i2c-adapter-frame ++ // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an ++ // and c2i here) ++ ++ __ addi_d(SP, SP, framesize * BytesPerInt); ++ ++ // Pop deoptimized frame ++ __ ld_w(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); ++ __ add_d(SP, SP, AT); ++ ++ // register for frame pcs ++ Register pcs = T8; ++ // register for frame sizes ++ Register sizes = T4; ++ // register for frame count ++ Register count = T3; ++ // register for the sender's sp ++ Register sender_sp = T1; ++ ++ // sp should be pointing at the return address to the caller (4) ++ // Load array of frame pcs ++ __ ld_d(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); ++ ++ // Load array of frame sizes ++ __ ld_d(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); ++ __ ld_wu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); ++ ++ // Pick up the initial fp we should save ++ __ ld_d(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. ++ ++ __ move(sender_sp, SP); ++ __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); ++ __ sub_d(SP, SP, AT); ++ // Push interpreter frames in a loop ++ Label loop; ++ __ bind(loop); ++ __ ld_d(T2, sizes, 0); // Load frame size ++ __ ld_d(AT, pcs, 0); // save return address ++ __ addi_d(T2, T2, -2*wordSize); // we'll push pc and fp, by hand ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ sub_d(SP, SP, T2); // Prolog! ++ // This value is corrected by layout_activation_impl ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable ++ __ move(sender_sp, SP); // pass to next frame ++ __ addi_d(count, count, -1); // decrement counter ++ __ addi_d(sizes, sizes, wordSize); // Bump array pointer (sizes) ++ __ addi_d(pcs, pcs, wordSize); // Bump array pointer (pcs) ++ __ bne(count, R0, loop); ++ ++ __ ld_d(RA, pcs, 0); ++ ++ // Re-push self-frame ++ // save old & set new FP ++ // save final return address ++ __ enter(); ++ ++ // Use FP because the frames look interpreted now ++ // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. ++ // Don't need the precise return PC here, just precise enough to point into this code blob. ++ Label L; ++ address the_pc = __ pc(); ++ __ bind(L); ++ __ set_last_Java_frame(NOREG, FP, L); ++ ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // restore return values to their stack-slots with the new SP. ++ __ move(A0, thread); ++ __ li(A1, Deoptimization::Unpack_uncommon_trap); ++ __ call((address)Deoptimization::unpack_frames, relocInfo::runtime_call_type); ++ // Set an oopmap for the call site ++ oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0)); ++ ++ __ reset_last_Java_frame(true); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog! ++ ++ // Jump to interpreter ++ __ jr(RA); ++ // ------------- ++ // make sure all code is generated ++ masm->flush(); ++ _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2); ++} ++ ++#endif // COMPILER2 ++ ++//------------------------------generate_handler_blob------------------- ++// ++// Generate a special Compile2Runtime blob that saves all registers, and sets ++// up an OopMap and calls safepoint code to stop the compiled code for ++// a safepoint. ++// ++// This blob is jumped to (via a breakpoint and the signal handler) from a ++// safepoint in compiled code. ++ ++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { ++ ++ // Account for thread arg in our frame ++ const int additional_words = 0; ++ int frame_size_in_words; ++ ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ ++ ResourceMark rm; ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map; ++ ++ // allocate space for the code ++ // setup code generation tools ++ CodeBuffer buffer ("handler_blob", 2048, 512); ++ MacroAssembler* masm = new MacroAssembler( &buffer); ++ ++ const Register thread = TREG; ++ address start = __ pc(); ++ bool cause_return = (poll_type == POLL_AT_RETURN); ++ RegisterSaver reg_save(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ // The following is basically a call_VM. However, we need the precise ++ // address of the call in order to generate an oopmap. Hence, we do all the ++ // work outselvs. ++ ++ Label retaddr; ++ __ set_last_Java_frame(NOREG, NOREG, retaddr); ++ ++ if (!cause_return) { ++ // overwrite the return address pushed by save_live_registers ++ // Additionally, TSR is a callee-saved register so we can look at ++ // it later to determine if someone changed the return address for ++ // us! ++ __ ld_ptr(TSR, thread, in_bytes(JavaThread::saved_exception_pc_offset())); ++ __ st_ptr(TSR, SP, reg_save.ra_offset()); ++ } ++ ++ // Do the call ++ __ move(A0, thread); ++ // TODO: confirm reloc ++ __ call(call_ptr, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ ++ // Set an oopmap for the call site. This oopmap will map all ++ // oop-registers and debug-info registers as callee-saved. This ++ // will allow deoptimization at this safepoint to find all possible ++ // debug-info recordings, as well as let GC find all oops. ++ oop_maps->add_gc_map(__ pc() - start, map); ++ ++ Label noException; ++ ++ // Clear last_Java_sp again ++ __ reset_last_Java_frame(false); ++ ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, noException); ++ ++ // Exception pending ++ ++ reg_save.restore_live_registers(masm); ++ //forward_exception_entry need return address on the stack ++ __ push(RA); ++ // TODO: confirm reloc ++ __ jmp((address)StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ ++ // No exception case ++ __ bind(noException); ++ ++ Label no_adjust, bail; ++ if (SafepointMechanism::uses_thread_local_poll() && !cause_return) { ++ // If our stashed return pc was modified by the runtime we avoid touching it ++ __ ld_ptr(AT, SP, reg_save.ra_offset()); ++ __ bne(AT, TSR, no_adjust); ++ ++#ifdef ASSERT ++ // Verify the correct encoding of the poll we're about to skip. ++ // See NativeInstruction::is_safepoint_poll() ++ __ ld_wu(AT, TSR, 0); ++ __ push(T5); ++ __ li(T5, 0xffc0001f); ++ __ andr(AT, AT, T5); ++ __ li(T5, 0x28800013); ++ __ xorr(AT, AT, T5); ++ __ pop(T5); ++ __ bne(AT, R0, bail); ++#endif ++ // Adjust return pc forward to step over the safepoint poll instruction ++ __ addi_d(RA, TSR, 4); // NativeInstruction::instruction_size=4 ++ __ st_ptr(RA, SP, reg_save.ra_offset()); ++ } ++ ++ __ bind(no_adjust); ++ // Normal exit, register restoring and exit ++ reg_save.restore_live_registers(masm); ++ __ jr(RA); ++ ++#ifdef ASSERT ++ __ bind(bail); ++ __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); ++#endif ++ ++ // Make sure all code is generated ++ masm->flush(); ++ // Fill-out other meta info ++ return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); ++} ++ ++// ++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss ++// ++// Generate a stub that calls into vm to find out the proper destination ++// of a java call. All the argument registers are live at this point ++// but since this is generic code we don't know what they are and the caller ++// must do any gc of the args. ++// ++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ ++ // allocate space for the code ++ ResourceMark rm; ++ ++ //CodeBuffer buffer(name, 1000, 512); ++ //FIXME. code_size ++ CodeBuffer buffer(name, 2000, 2048); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ int frame_size_words; ++ RegisterSaver reg_save(false /* save_vectors */); ++ //we put the thread in A0 ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = NULL; ++ ++ address start = __ pc(); ++ map = reg_save.save_live_registers(masm, 0, &frame_size_words); ++ ++ ++ int frame_complete = __ offset(); ++#ifndef OPT_THREAD ++ const Register thread = T8; ++ __ get_thread(thread); ++#else ++ const Register thread = TREG; ++#endif ++ ++ __ move(A0, thread); ++ Label retaddr; ++ __ set_last_Java_frame(noreg, FP, retaddr); ++ // align the stack before invoke native ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ ++ // TODO: confirm reloc ++ __ call(destination, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ ++ // Set an oopmap for the call site. ++ // We need this not only for callee-saved registers, but also for volatile ++ // registers that the compiler might be keeping live across a safepoint. ++ oop_maps->add_gc_map(__ pc() - start, map); ++ // V0 contains the address we are going to jump to assuming no exception got installed ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // clear last_Java_sp ++ __ reset_last_Java_frame(true); ++ // check for pending exceptions ++ Label pending; ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, pending); ++ // get the returned Method* ++ __ get_vm_result_2(Rmethod, thread); ++ __ st_ptr(Rmethod, SP, reg_save.s3_offset()); ++ __ st_ptr(V0, SP, reg_save.t5_offset()); ++ reg_save.restore_live_registers(masm); ++ ++ // We are back the the original state on entry and ready to go the callee method. ++ __ jr(T5); ++ // Pending exception after the safepoint ++ ++ __ bind(pending); ++ ++ reg_save.restore_live_registers(masm); ++ ++ // exception pending => remove activation and forward to exception handler ++ //forward_exception_entry need return address on the stack ++ __ push(RA); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); ++ __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ // ++ // make sure all code is generated ++ masm->flush(); ++ RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); ++ return tmp; ++} ++ ++extern "C" int SpinPause() {return 0;} +diff --git a/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp +new file mode 100644 +index 0000000000..7f73863b2e +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp +@@ -0,0 +1,4804 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/instanceOop.hpp" ++#include "oops/method.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++// Declaration and definition of StubGenerator (no .hpp file). ++// For a more detailed description of the stub routine structure ++// see the comment in stubRoutines.hpp ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) ++ ++//#ifdef PRODUCT ++//#define BLOCK_COMMENT(str) /* nothing */ ++//#else ++//#define BLOCK_COMMENT(str) __ block_comment(str) ++//#endif ++ ++//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions ++ ++// Stub Code definitions ++ ++class StubGenerator: public StubCodeGenerator { ++ private: ++ ++ // This fig is not LA ABI. It is call Java from C ABI. ++ // Call stubs are used to call Java from C ++ // ++ // [ return_from_Java ] ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ // ... ++ // -8 [ S6 ] ++ // -7 [ S5 ] ++ // -6 [ S4 ] ++ // -5 [ S3 ] ++ // -4 [ S1 ] ++ // -3 [ TSR(S2) ] ++ // -2 [ LVP(S7) ] ++ // -1 [ BCP(S0) ] ++ // 0 [ saved fp ] <--- fp_after_call ++ // 1 [ return address ] ++ // 2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp ++ // 3 [ result ] <--- a1 ++ // 4 [ result_type ] <--- a2 ++ // 5 [ method ] <--- a3 ++ // 6 [ entry_point ] <--- a4 ++ // 7 [ parameters ] <--- a5 ++ // 8 [ parameter_size ] <--- a6 ++ // 9 [ thread ] <--- a7 ++ ++ // ++ // LA ABI does not save paras in sp. ++ // ++ // [ return_from_Java ] ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ //-22 [ F31 ] ++ // ... ++ //-15 [ F24 ] ++ //-14 [ S8 ] ++ //-13 [ thread ] ++ //-12 [ result_type ] <--- a2 ++ //-11 [ result ] <--- a1 ++ //-10 [ ] ++ // -9 [ ptr. to call wrapper ] <--- a0 ++ // -8 [ S6 ] ++ // -7 [ S5 ] ++ // -6 [ S4 ] ++ // -5 [ S3 ] ++ // -4 [ S1 ] ++ // -3 [ TSR(S2) ] ++ // -2 [ LVP(S7) ] ++ // -1 [ BCP(S0) ] ++ // 0 [ saved fp ] <--- fp_after_call ++ // 1 [ return address ] ++ // 2 [ ] <--- old sp ++ // ++ // Find a right place in the call_stub for S8. ++ // S8 will point to the starting point of Interpreter::dispatch_table(itos). ++ // It should be saved/restored before/after Java calls. ++ // ++ enum call_stub_layout { ++ RA_off = 1, ++ FP_off = 0, ++ BCP_off = -1, ++ LVP_off = -2, ++ TSR_off = -3, ++ S1_off = -4, ++ S3_off = -5, ++ S4_off = -6, ++ S5_off = -7, ++ S6_off = -8, ++ call_wrapper_off = -9, ++ result_off = -11, ++ result_type_off = -12, ++ thread_off = -13, ++ S8_off = -14, ++ F24_off = -15, ++ F25_off = -16, ++ F26_off = -17, ++ F27_off = -18, ++ F28_off = -19, ++ F29_off = -20, ++ F30_off = -21, ++ F31_off = -22, ++ total_off = F31_off, ++ }; ++ ++ address generate_call_stub(address& return_address) { ++ assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code"); ++ StubCodeMark mark(this, "StubRoutines", "call_stub"); ++ address start = __ pc(); ++ ++ // same as in generate_catch_exception()! ++ ++ // stub code ++ // save ra and fp ++ __ enter(); ++ // I think 14 is the max gap between argument and callee saved register ++ __ addi_d(SP, SP, total_off * wordSize); ++ __ st_d(BCP, FP, BCP_off * wordSize); ++ __ st_d(LVP, FP, LVP_off * wordSize); ++ __ st_d(TSR, FP, TSR_off * wordSize); ++ __ st_d(S1, FP, S1_off * wordSize); ++ __ st_d(S3, FP, S3_off * wordSize); ++ __ st_d(S4, FP, S4_off * wordSize); ++ __ st_d(S5, FP, S5_off * wordSize); ++ __ st_d(S6, FP, S6_off * wordSize); ++ __ st_d(A0, FP, call_wrapper_off * wordSize); ++ __ st_d(A1, FP, result_off * wordSize); ++ __ st_d(A2, FP, result_type_off * wordSize); ++ __ st_d(A7, FP, thread_off * wordSize); ++ __ st_d(S8, FP, S8_off * wordSize); ++ ++ __ fst_d(F24, FP, F24_off * wordSize); ++ __ fst_d(F25, FP, F25_off * wordSize); ++ __ fst_d(F26, FP, F26_off * wordSize); ++ __ fst_d(F27, FP, F27_off * wordSize); ++ __ fst_d(F28, FP, F28_off * wordSize); ++ __ fst_d(F29, FP, F29_off * wordSize); ++ __ fst_d(F30, FP, F30_off * wordSize); ++ __ fst_d(F31, FP, F31_off * wordSize); ++ ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++ ++#ifdef OPT_THREAD ++ __ move(TREG, A7); ++#endif ++ //add for compressedoops ++ __ reinit_heapbase(); ++ ++#ifdef ASSERT ++ // make sure we have no pending exceptions ++ { ++ Label L; ++ __ ld_d(AT, A7, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ /* FIXME: I do not know how to realize stop in LA, do it in the future */ ++ __ stop("StubRoutines::call_stub: entered with pending exception"); ++ __ bind(L); ++ } ++#endif ++ ++ // pass parameters if any ++ // A5: parameter ++ // A6: parameter_size ++ // T0: parameter_size_tmp(--) ++ // T2: offset(++) ++ // T3: tmp ++ Label parameters_done; ++ // judge if the parameter_size equals 0 ++ __ beq(A6, R0, parameters_done); ++ __ slli_d(AT, A6, Interpreter::logStackElementSize); ++ __ sub_d(SP, SP, AT); ++ __ li(AT, -StackAlignmentInBytes); ++ __ andr(SP, SP, AT); ++ // Copy Java parameters in reverse order (receiver last) ++ // Note that the argument order is inverted in the process ++ Label loop; ++ __ move(T0, A6); ++ __ move(T2, R0); ++ __ bind(loop); ++ ++ // get parameter ++ __ alsl_d(T3, T0, A5, LogBytesPerWord - 1); ++ __ ld_d(AT, T3, -wordSize); ++ __ alsl_d(T3, T2, SP, LogBytesPerWord - 1); ++ __ st_d(AT, T3, Interpreter::expr_offset_in_bytes(0)); ++ __ addi_d(T2, T2, 1); ++ __ addi_d(T0, T0, -1); ++ __ bne(T0, R0, loop); ++ // advance to next parameter ++ ++ // call Java function ++ __ bind(parameters_done); ++ ++ // receiver in V0, methodOop in Rmethod ++ ++ __ move(Rmethod, A3); ++ __ move(Rsender, SP); //set sender sp ++ __ jalr(A4); ++ return_address = __ pc(); ++ ++ Label common_return; ++ __ bind(common_return); ++ ++ // store result depending on type ++ // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) ++ __ ld_d(T0, FP, result_off * wordSize); // result --> T0 ++ Label is_long, is_float, is_double, exit; ++ __ ld_d(T2, FP, result_type_off * wordSize); // result_type --> T2 ++ __ addi_d(T3, T2, (-1) * T_LONG); ++ __ beq(T3, R0, is_long); ++ __ addi_d(T3, T2, (-1) * T_FLOAT); ++ __ beq(T3, R0, is_float); ++ __ addi_d(T3, T2, (-1) * T_DOUBLE); ++ __ beq(T3, R0, is_double); ++ ++ // handle T_INT case ++ __ st_d(V0, T0, 0 * wordSize); ++ __ bind(exit); ++ ++ // restore ++ __ ld_d(BCP, FP, BCP_off * wordSize); ++ __ ld_d(LVP, FP, LVP_off * wordSize); ++ __ ld_d(S8, FP, S8_off * wordSize); ++ __ ld_d(TSR, FP, TSR_off * wordSize); ++ ++ __ ld_d(S1, FP, S1_off * wordSize); ++ __ ld_d(S3, FP, S3_off * wordSize); ++ __ ld_d(S4, FP, S4_off * wordSize); ++ __ ld_d(S5, FP, S5_off * wordSize); ++ __ ld_d(S6, FP, S6_off * wordSize); ++ ++ __ fld_d(F24, FP, F24_off * wordSize); ++ __ fld_d(F25, FP, F25_off * wordSize); ++ __ fld_d(F26, FP, F26_off * wordSize); ++ __ fld_d(F27, FP, F27_off * wordSize); ++ __ fld_d(F28, FP, F28_off * wordSize); ++ __ fld_d(F29, FP, F29_off * wordSize); ++ __ fld_d(F30, FP, F30_off * wordSize); ++ __ fld_d(F31, FP, F31_off * wordSize); ++ ++ __ leave(); ++ ++ // return ++ __ jr(RA); ++ ++ // handle return types different from T_INT ++ __ bind(is_long); ++ __ st_d(V0, T0, 0 * wordSize); ++ __ b(exit); ++ ++ __ bind(is_float); ++ __ fst_s(FV0, T0, 0 * wordSize); ++ __ b(exit); ++ ++ __ bind(is_double); ++ __ fst_d(FV0, T0, 0 * wordSize); ++ __ b(exit); ++ StubRoutines::la::set_call_stub_compiled_return(__ pc()); ++ __ b(common_return); ++ return start; ++ } ++ ++ // Return point for a Java call if there's an exception thrown in ++ // Java code. The exception is caught and transformed into a ++ // pending exception stored in JavaThread that can be tested from ++ // within the VM. ++ // ++ // Note: Usually the parameters are removed by the callee. In case ++ // of an exception crossing an activation frame boundary, that is ++ // not the case if the callee is compiled code => need to setup the ++ // sp. ++ // ++ // V0: exception oop ++ ++ address generate_catch_exception() { ++ StubCodeMark mark(this, "StubRoutines", "catch_exception"); ++ address start = __ pc(); ++ ++ Register thread = TREG; ++ ++ // get thread directly ++#ifndef OPT_THREAD ++ __ ld_d(thread, FP, thread_off * wordSize); ++#endif ++ ++#ifdef ASSERT ++ // verify that threads correspond ++ { Label L; ++ __ get_thread(T8); ++ __ beq(T8, thread, L); ++ __ stop("StubRoutines::catch_exception: threads must correspond"); ++ __ bind(L); ++ } ++#endif ++ // set pending exception ++ __ verify_oop(V0); ++ __ st_d(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ li(AT, (long)__FILE__); ++ __ st_d(AT, thread, in_bytes(Thread::exception_file_offset ())); ++ __ li(AT, (long)__LINE__); ++ __ st_d(AT, thread, in_bytes(Thread::exception_line_offset ())); ++ ++ // complete return to VM ++ assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); ++ __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none); ++ return start; ++ } ++ ++ // Continuation point for runtime calls returning with a pending ++ // exception. The pending exception check happened in the runtime ++ // or native call stub. The pending exception in Thread is ++ // converted into a Java-level exception. ++ // ++ // Contract with Java-level exception handlers: ++ // V0: exception ++ // V1: throwing pc ++ // ++ // NOTE: At entry of this stub, exception-pc must be on stack !! ++ ++ address generate_forward_exception() { ++ StubCodeMark mark(this, "StubRoutines", "forward exception"); ++ //Register thread = TREG; ++ Register thread = TREG; ++ address start = __ pc(); ++ ++ // Upon entry, the sp points to the return address returning into ++ // Java (interpreted or compiled) code; i.e., the return address ++ // throwing pc. ++ // ++ // Arguments pushed before the runtime call are still on the stack ++ // but the exception handler will reset the stack pointer -> ++ // ignore them. A potential result in registers can be ignored as ++ // well. ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++#ifdef ASSERT ++ // make sure this code is only executed if there is a pending exception ++ { ++ Label L; ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, L); ++ __ stop("StubRoutines::forward exception: no pending exception (1)"); ++ __ bind(L); ++ } ++#endif ++ ++ // compute exception handler into T4 ++ __ ld_d(A1, SP, 0); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); ++ __ move(T4, V0); ++ __ pop(V1); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_d(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset())); ++ ++#ifdef ASSERT ++ // make sure exception is set ++ { ++ Label L; ++ __ bne(V0, R0, L); ++ __ stop("StubRoutines::forward exception: no pending exception (2)"); ++ __ bind(L); ++ } ++#endif ++ ++ // continue at exception handler (return address removed) ++ // V0: exception ++ // T4: exception handler ++ // V1: throwing pc ++ __ verify_oop(V0); ++ __ jr(T4); ++ return start; ++ } ++ ++ // Non-destructive plausibility checks for oops ++ // ++ address generate_verify_oop() { ++ StubCodeMark mark(this, "StubRoutines", "verify_oop"); ++ address start = __ pc(); ++ __ verify_oop_subroutine(); ++ address end = __ pc(); ++ return start; ++ } ++ ++ // ++ // Generate stub for array fill. If "aligned" is true, the ++ // "to" address is assumed to be heapword aligned. ++ // ++ // Arguments for generated stub: ++ // to: A0 ++ // value: A1 ++ // count: A2 treated as signed ++ // ++ address generate_fill(BasicType t, bool aligned, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ const Register to = A0; // source array address ++ const Register value = A1; // value ++ const Register count = A2; // elements count ++ ++ const Register end = T5; // source array address end ++ const Register tmp = T8; // temp register ++ ++ Label L_fill_elements; ++ ++ int shift = -1; ++ switch (t) { ++ case T_BYTE: ++ shift = 0; ++ __ slti(AT, count, 9); // Short arrays (<= 8 bytes) fill by element ++ __ bstrins_d(value, value, 15, 8); // 8 bit -> 16 bit ++ __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit ++ __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit ++ __ bnez(AT, L_fill_elements); ++ break; ++ case T_SHORT: ++ shift = 1; ++ __ slti(AT, count, 5); // Short arrays (<= 8 bytes) fill by element ++ __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit ++ __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit ++ __ bnez(AT, L_fill_elements); ++ break; ++ case T_INT: ++ shift = 2; ++ __ slti(AT, count, 3); // Short arrays (<= 8 bytes) fill by element ++ __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit ++ __ bnez(AT, L_fill_elements); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ switch (t) { ++ case T_BYTE: ++ __ add_d(end, to, count); ++ break; ++ case T_SHORT: ++ case T_INT: ++ __ alsl_d(end, count, to, shift-1); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ if (!aligned) { ++ __ st_d(value, to, 0); ++ __ bstrins_d(to, R0, 2, 0); ++ __ addi_d(to, to, 8); ++ } ++ __ st_d(value, end, -8); ++ __ bstrins_d(end, R0, 2, 0); ++ ++ // ++ // Fill large chunks ++ // ++ Label L_loop_begin, L_not_64bytes_fill, L_loop_end, L_jtab1, L_jtab2; ++ __ addi_d(AT, to, 64); ++ __ blt(end, AT, L_not_64bytes_fill); ++ __ addi_d(to, to, 64); ++ __ bind(L_loop_begin); ++ __ st_d(value, to, -8); ++ __ st_d(value, to, -16); ++ __ st_d(value, to, -24); ++ __ st_d(value, to, -32); ++ __ st_d(value, to, -40); ++ __ st_d(value, to, -48); ++ __ st_d(value, to, -56); ++ __ st_d(value, to, -64); ++ __ addi_d(to, to, 64); ++ __ bge(end, to, L_loop_begin); ++ __ addi_d(to, to, -64); ++ __ beq(to, end, L_loop_end); ++ ++ __ bind(L_not_64bytes_fill); ++ // There are 0 - 7 words ++ __ lipc(AT, L_jtab1); ++ __ sub_d(tmp, end, to); ++ __ alsl_d(AT, tmp, AT, 1); ++ __ jr(AT); ++ ++ __ bind(L_jtab1); ++ // 0: ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ st_d(value, to, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 4: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ st_d(value, to, 24); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ st_d(value, to, 24); ++ __ st_d(value, to, 32); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 6: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ st_d(value, to, 24); ++ __ st_d(value, to, 32); ++ __ st_d(value, to, 40); ++ __ jr(RA); ++ __ nop(); ++ ++ // 7: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ st_d(value, to, 24); ++ __ st_d(value, to, 32); ++ __ st_d(value, to, 40); ++ __ st_d(value, to, 48); ++ ++ __ bind(L_loop_end); ++ __ jr(RA); ++ ++ // Short arrays (<= 8 bytes) ++ __ bind(L_fill_elements); ++ __ lipc(AT, L_jtab2); ++ __ slli_d(tmp, count, 4 + shift); ++ __ add_d(AT, AT, tmp); ++ __ jr(AT); ++ ++ __ bind(L_jtab2); ++ // 0: ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ st_b(value, to, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ st_h(value, to, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ st_h(value, to, 0); ++ __ st_b(value, to, 2); ++ __ jr(RA); ++ __ nop(); ++ ++ // 4: ++ __ st_w(value, to, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ __ st_w(value, to, 0); ++ __ st_b(value, to, 4); ++ __ jr(RA); ++ __ nop(); ++ ++ // 6: ++ __ st_w(value, to, 0); ++ __ st_h(value, to, 4); ++ __ jr(RA); ++ __ nop(); ++ ++ // 7: ++ __ st_w(value, to, 0); ++ __ st_w(value, to, 3); ++ __ jr(RA); ++ __ nop(); ++ ++ // 8: ++ __ st_d(value, to, 0); ++ __ jr(RA); ++ return start; ++ } ++ ++ // ++ // Generate overlap test for array copy stubs ++ // ++ // Input: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count ++ // ++ // Temp: ++ // AT - destination array address - source array address ++ // T4 - element count * element size ++ // ++ void array_overlap_test(address no_overlap_target, int log2_elem_size) { ++ __ slli_d(T4, A2, log2_elem_size); ++ __ sub_d(AT, A1, A0); ++ __ bgeu(AT, T4, no_overlap_target); ++ } ++ ++ // disjoint large copy ++ void generate_disjoint_large_copy(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label loop, le32, le16, le8, lt8; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ ld_d(A6, A0, 0); ++ __ ld_d(A7, A2, -8); ++ ++ __ andi(T1, A0, 7); ++ __ sub_d(T0, R0, T1); ++ __ addi_d(T0, T0, 8); ++ ++ __ add_d(A0, A0, T0); ++ __ add_d(A5, A1, T0); ++ ++ __ addi_d(A4, A2, -64); ++ __ bgeu(A0, A4, le32); ++ ++ __ bind(loop); ++ __ ld_d(T0, A0, 0); ++ __ ld_d(T1, A0, 8); ++ __ ld_d(T2, A0, 16); ++ __ ld_d(T3, A0, 24); ++ __ ld_d(T4, A0, 32); ++ __ ld_d(T5, A0, 40); ++ __ ld_d(T6, A0, 48); ++ __ ld_d(T7, A0, 56); ++ __ addi_d(A0, A0, 64); ++ __ st_d(T0, A5, 0); ++ __ st_d(T1, A5, 8); ++ __ st_d(T2, A5, 16); ++ __ st_d(T3, A5, 24); ++ __ st_d(T4, A5, 32); ++ __ st_d(T5, A5, 40); ++ __ st_d(T6, A5, 48); ++ __ st_d(T7, A5, 56); ++ __ addi_d(A5, A5, 64); ++ __ bltu(A0, A4, loop); ++ ++ __ bind(le32); ++ __ addi_d(A4, A2, -32); ++ __ bgeu(A0, A4, le16); ++ __ ld_d(T0, A0, 0); ++ __ ld_d(T1, A0, 8); ++ __ ld_d(T2, A0, 16); ++ __ ld_d(T3, A0, 24); ++ __ addi_d(A0, A0, 32); ++ __ st_d(T0, A5, 0); ++ __ st_d(T1, A5, 8); ++ __ st_d(T2, A5, 16); ++ __ st_d(T3, A5, 24); ++ __ addi_d(A5, A5, 32); ++ ++ __ bind(le16); ++ __ addi_d(A4, A2, -16); ++ __ bgeu(A0, A4, le8); ++ __ ld_d(T0, A0, 0); ++ __ ld_d(T1, A0, 8); ++ __ addi_d(A0, A0, 16); ++ __ st_d(T0, A5, 0); ++ __ st_d(T1, A5, 8); ++ __ addi_d(A5, A5, 16); ++ ++ __ bind(le8); ++ __ addi_d(A4, A2, -8); ++ __ bgeu(A0, A4, lt8); ++ __ ld_d(T0, A0, 0); ++ __ st_d(T0, A5, 0); ++ ++ __ bind(lt8); ++ __ st_d(A6, A1, 0); ++ __ st_d(A7, A3, -8); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // disjoint large copy lsx ++ void generate_disjoint_large_copy_lsx(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label loop, le64, le32, le16, lt16; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ vld(F0, A0, 0); ++ __ vld(F1, A2, -16); ++ ++ __ andi(T1, A0, 15); ++ __ sub_d(T0, R0, T1); ++ __ addi_d(T0, T0, 16); ++ ++ __ add_d(A0, A0, T0); ++ __ add_d(A5, A1, T0); ++ ++ __ addi_d(A4, A2, -128); ++ __ bgeu(A0, A4, le64); ++ ++ __ bind(loop); ++ __ vld(FT0, A0, 0); ++ __ vld(FT1, A0, 16); ++ __ vld(FT2, A0, 32); ++ __ vld(FT3, A0, 48); ++ __ vld(FT4, A0, 64); ++ __ vld(FT5, A0, 80); ++ __ vld(FT6, A0, 96); ++ __ vld(FT7, A0, 112); ++ __ addi_d(A0, A0, 128); ++ __ vst(FT0, A5, 0); ++ __ vst(FT1, A5, 16); ++ __ vst(FT2, A5, 32); ++ __ vst(FT3, A5, 48); ++ __ vst(FT4, A5, 64); ++ __ vst(FT5, A5, 80); ++ __ vst(FT6, A5, 96); ++ __ vst(FT7, A5, 112); ++ __ addi_d(A5, A5, 128); ++ __ bltu(A0, A4, loop); ++ ++ __ bind(le64); ++ __ addi_d(A4, A2, -64); ++ __ bgeu(A0, A4, le32); ++ __ vld(FT0, A0, 0); ++ __ vld(FT1, A0, 16); ++ __ vld(FT2, A0, 32); ++ __ vld(FT3, A0, 48); ++ __ addi_d(A0, A0, 64); ++ __ vst(FT0, A5, 0); ++ __ vst(FT1, A5, 16); ++ __ vst(FT2, A5, 32); ++ __ vst(FT3, A5, 48); ++ __ addi_d(A5, A5, 64); ++ ++ __ bind(le32); ++ __ addi_d(A4, A2, -32); ++ __ bgeu(A0, A4, le16); ++ __ vld(FT0, A0, 0); ++ __ vld(FT1, A0, 16); ++ __ addi_d(A0, A0, 32); ++ __ vst(FT0, A5, 0); ++ __ vst(FT1, A5, 16); ++ __ addi_d(A5, A5, 32); ++ ++ __ bind(le16); ++ __ addi_d(A4, A2, -16); ++ __ bgeu(A0, A4, lt16); ++ __ vld(FT0, A0, 0); ++ __ vst(FT0, A5, 0); ++ ++ __ bind(lt16); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A3, -16); ++ ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // disjoint large copy lasx ++ void generate_disjoint_large_copy_lasx(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label loop, le128, le64, le32, lt32; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ xvld(F0, A0, 0); ++ __ xvld(F1, A2, -32); ++ ++ __ andi(T1, A0, 31); ++ __ sub_d(T0, R0, T1); ++ __ addi_d(T0, T0, 32); ++ ++ __ add_d(A0, A0, T0); ++ __ add_d(A5, A1, T0); ++ ++ __ addi_d(A4, A2, -256); ++ __ bgeu(A0, A4, le128); ++ ++ __ bind(loop); ++ __ xvld(FT0, A0, 0); ++ __ xvld(FT1, A0, 32); ++ __ xvld(FT2, A0, 64); ++ __ xvld(FT3, A0, 96); ++ __ xvld(FT4, A0, 128); ++ __ xvld(FT5, A0, 160); ++ __ xvld(FT6, A0, 192); ++ __ xvld(FT7, A0, 224); ++ __ addi_d(A0, A0, 256); ++ __ xvst(FT0, A5, 0); ++ __ xvst(FT1, A5, 32); ++ __ xvst(FT2, A5, 64); ++ __ xvst(FT3, A5, 96); ++ __ xvst(FT4, A5, 128); ++ __ xvst(FT5, A5, 160); ++ __ xvst(FT6, A5, 192); ++ __ xvst(FT7, A5, 224); ++ __ addi_d(A5, A5, 256); ++ __ bltu(A0, A4, loop); ++ ++ __ bind(le128); ++ __ addi_d(A4, A2, -128); ++ __ bgeu(A0, A4, le64); ++ __ xvld(FT0, A0, 0); ++ __ xvld(FT1, A0, 32); ++ __ xvld(FT2, A0, 64); ++ __ xvld(FT3, A0, 96); ++ __ addi_d(A0, A0, 128); ++ __ xvst(FT0, A5, 0); ++ __ xvst(FT1, A5, 32); ++ __ xvst(FT2, A5, 64); ++ __ xvst(FT3, A5, 96); ++ __ addi_d(A5, A5, 128); ++ ++ __ bind(le64); ++ __ addi_d(A4, A2, -64); ++ __ bgeu(A0, A4, le32); ++ __ xvld(FT0, A0, 0); ++ __ xvld(FT1, A0, 32); ++ __ addi_d(A0, A0, 64); ++ __ xvst(FT0, A5, 0); ++ __ xvst(FT1, A5, 32); ++ __ addi_d(A5, A5, 64); ++ ++ __ bind(le32); ++ __ addi_d(A4, A2, -32); ++ __ bgeu(A0, A4, lt32); ++ __ xvld(FT0, A0, 0); ++ __ xvst(FT0, A5, 0); ++ ++ __ bind(lt32); ++ __ xvst(F0, A1, 0); ++ __ xvst(F1, A3, -32); ++ ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // conjoint large copy ++ void generate_conjoint_large_copy(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label loop, le32, le16, le8, lt8; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ ld_d(A6, A0, 0); ++ __ ld_d(A7, A2, -8); ++ ++ __ andi(T1, A2, 7); ++ __ sub_d(A2, A2, T1); ++ __ sub_d(A5, A3, T1); ++ ++ __ addi_d(A4, A0, 64); ++ __ bgeu(A4, A2, le32); ++ ++ __ bind(loop); ++ __ ld_d(T0, A2, -8); ++ __ ld_d(T1, A2, -16); ++ __ ld_d(T2, A2, -24); ++ __ ld_d(T3, A2, -32); ++ __ ld_d(T4, A2, -40); ++ __ ld_d(T5, A2, -48); ++ __ ld_d(T6, A2, -56); ++ __ ld_d(T7, A2, -64); ++ __ addi_d(A2, A2, -64); ++ __ st_d(T0, A5, -8); ++ __ st_d(T1, A5, -16); ++ __ st_d(T2, A5, -24); ++ __ st_d(T3, A5, -32); ++ __ st_d(T4, A5, -40); ++ __ st_d(T5, A5, -48); ++ __ st_d(T6, A5, -56); ++ __ st_d(T7, A5, -64); ++ __ addi_d(A5, A5, -64); ++ __ bltu(A4, A2, loop); ++ ++ __ bind(le32); ++ __ addi_d(A4, A0, 32); ++ __ bgeu(A4, A2, le16); ++ __ ld_d(T0, A2, -8); ++ __ ld_d(T1, A2, -16); ++ __ ld_d(T2, A2, -24); ++ __ ld_d(T3, A2, -32); ++ __ addi_d(A2, A2, -32); ++ __ st_d(T0, A5, -8); ++ __ st_d(T1, A5, -16); ++ __ st_d(T2, A5, -24); ++ __ st_d(T3, A5, -32); ++ __ addi_d(A5, A5, -32); ++ ++ __ bind(le16); ++ __ addi_d(A4, A0, 16); ++ __ bgeu(A4, A2, le8); ++ __ ld_d(T0, A2, -8); ++ __ ld_d(T1, A2, -16); ++ __ addi_d(A2, A2, -16); ++ __ st_d(T0, A5, -8); ++ __ st_d(T1, A5, -16); ++ __ addi_d(A5, A5, -16); ++ ++ __ bind(le8); ++ __ addi_d(A4, A0, 8); ++ __ bgeu(A4, A2, lt8); ++ __ ld_d(T0, A2, -8); ++ __ st_d(T0, A5, -8); ++ ++ __ bind(lt8); ++ __ st_d(A6, A1, 0); ++ __ st_d(A7, A3, -8); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // conjoint large copy lsx ++ void generate_conjoint_large_copy_lsx(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label loop, le64, le32, le16, lt16; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ vld(F0, A0, 0); ++ __ vld(F1, A2, -16); ++ ++ __ andi(T1, A2, 15); ++ __ sub_d(A2, A2, T1); ++ __ sub_d(A5, A3, T1); ++ ++ __ addi_d(A4, A0, 128); ++ __ bgeu(A4, A2, le64); ++ ++ __ bind(loop); ++ __ vld(FT0, A2, -16); ++ __ vld(FT1, A2, -32); ++ __ vld(FT2, A2, -48); ++ __ vld(FT3, A2, -64); ++ __ vld(FT4, A2, -80); ++ __ vld(FT5, A2, -96); ++ __ vld(FT6, A2, -112); ++ __ vld(FT7, A2, -128); ++ __ addi_d(A2, A2, -128); ++ __ vst(FT0, A5, -16); ++ __ vst(FT1, A5, -32); ++ __ vst(FT2, A5, -48); ++ __ vst(FT3, A5, -64); ++ __ vst(FT4, A5, -80); ++ __ vst(FT5, A5, -96); ++ __ vst(FT6, A5, -112); ++ __ vst(FT7, A5, -128); ++ __ addi_d(A5, A5, -128); ++ __ bltu(A4, A2, loop); ++ ++ __ bind(le64); ++ __ addi_d(A4, A0, 64); ++ __ bgeu(A4, A2, le32); ++ __ vld(FT0, A2, -16); ++ __ vld(FT1, A2, -32); ++ __ vld(FT2, A2, -48); ++ __ vld(FT3, A2, -64); ++ __ addi_d(A2, A2, -64); ++ __ vst(FT0, A5, -16); ++ __ vst(FT1, A5, -32); ++ __ vst(FT2, A5, -48); ++ __ vst(FT3, A5, -64); ++ __ addi_d(A5, A5, -64); ++ ++ __ bind(le32); ++ __ addi_d(A4, A0, 32); ++ __ bgeu(A4, A2, le16); ++ __ vld(FT0, A2, -16); ++ __ vld(FT1, A2, -32); ++ __ addi_d(A2, A2, -32); ++ __ vst(FT0, A5, -16); ++ __ vst(FT1, A5, -32); ++ __ addi_d(A5, A5, -32); ++ ++ __ bind(le16); ++ __ addi_d(A4, A0, 16); ++ __ bgeu(A4, A2, lt16); ++ __ vld(FT0, A2, -16); ++ __ vst(FT0, A5, -16); ++ ++ __ bind(lt16); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A3, -16); ++ ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // conjoint large copy lasx ++ void generate_conjoint_large_copy_lasx(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label loop, le128, le64, le32, lt32; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ xvld(F0, A0, 0); ++ __ xvld(F1, A2, -32); ++ ++ __ andi(T1, A2, 31); ++ __ sub_d(A2, A2, T1); ++ __ sub_d(A5, A3, T1); ++ ++ __ addi_d(A4, A0, 256); ++ __ bgeu(A4, A2, le128); ++ ++ __ bind(loop); ++ __ xvld(FT0, A2, -32); ++ __ xvld(FT1, A2, -64); ++ __ xvld(FT2, A2, -96); ++ __ xvld(FT3, A2, -128); ++ __ xvld(FT4, A2, -160); ++ __ xvld(FT5, A2, -192); ++ __ xvld(FT6, A2, -224); ++ __ xvld(FT7, A2, -256); ++ __ addi_d(A2, A2, -256); ++ __ xvst(FT0, A5, -32); ++ __ xvst(FT1, A5, -64); ++ __ xvst(FT2, A5, -96); ++ __ xvst(FT3, A5, -128); ++ __ xvst(FT4, A5, -160); ++ __ xvst(FT5, A5, -192); ++ __ xvst(FT6, A5, -224); ++ __ xvst(FT7, A5, -256); ++ __ addi_d(A5, A5, -256); ++ __ bltu(A4, A2, loop); ++ ++ __ bind(le128); ++ __ addi_d(A4, A0, 128); ++ __ bgeu(A4, A2, le64); ++ __ xvld(FT0, A2, -32); ++ __ xvld(FT1, A2, -64); ++ __ xvld(FT2, A2, -96); ++ __ xvld(FT3, A2, -128); ++ __ addi_d(A2, A2, -128); ++ __ xvst(FT0, A5, -32); ++ __ xvst(FT1, A5, -64); ++ __ xvst(FT2, A5, -96); ++ __ xvst(FT3, A5, -128); ++ __ addi_d(A5, A5, -128); ++ ++ __ bind(le64); ++ __ addi_d(A4, A0, 64); ++ __ bgeu(A4, A2, le32); ++ __ xvld(FT0, A2, -32); ++ __ xvld(FT1, A2, -64); ++ __ addi_d(A2, A2, -64); ++ __ xvst(FT0, A5, -32); ++ __ xvst(FT1, A5, -64); ++ __ addi_d(A5, A5, -64); ++ ++ __ bind(le32); ++ __ addi_d(A4, A0, 32); ++ __ bgeu(A4, A2, lt32); ++ __ xvld(FT0, A2, -32); ++ __ xvst(FT0, A5, -32); ++ ++ __ bind(lt32); ++ __ xvst(F0, A1, 0); ++ __ xvst(F1, A3, -32); ++ ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // Byte small copy: less than { int:9, lsx:17, lasx:33 } elements. ++ void generate_byte_small_copy(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label L; ++ __ bind(entry); ++ __ lipc(AT, L); ++ __ slli_d(A2, A2, 5); ++ __ add_d(AT, AT, A2); ++ __ jr(AT); ++ ++ __ bind(L); ++ // 0: ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ ld_b(AT, A0, 0); ++ __ st_b(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ ld_h(AT, A0, 0); ++ __ st_h(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ ld_h(AT, A0, 0); ++ __ ld_b(A2, A0, 2); ++ __ st_h(AT, A1, 0); ++ __ st_b(A2, A1, 2); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 4: ++ __ ld_w(AT, A0, 0); ++ __ st_w(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ __ ld_w(AT, A0, 0); ++ __ ld_b(A2, A0, 4); ++ __ st_w(AT, A1, 0); ++ __ st_b(A2, A1, 4); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 6: ++ __ ld_w(AT, A0, 0); ++ __ ld_h(A2, A0, 4); ++ __ st_w(AT, A1, 0); ++ __ st_h(A2, A1, 4); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 7: ++ __ ld_w(AT, A0, 0); ++ __ ld_w(A2, A0, 3); ++ __ st_w(AT, A1, 0); ++ __ st_w(A2, A1, 3); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 8: ++ __ ld_d(AT, A0, 0); ++ __ st_d(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ ++ if (!UseLSX) ++ return; ++ ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 9: ++ __ ld_d(AT, A0, 0); ++ __ ld_b(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_b(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 10: ++ __ ld_d(AT, A0, 0); ++ __ ld_h(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_h(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 11: ++ __ ld_d(AT, A0, 0); ++ __ ld_w(A2, A0, 7); ++ __ st_d(AT, A1, 0); ++ __ st_w(A2, A1, 7); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 12: ++ __ ld_d(AT, A0, 0); ++ __ ld_w(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_w(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 13: ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 5); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 5); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 14: ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 6); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 6); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 15: ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 7); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 7); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 16: ++ __ vld(F0, A0, 0); ++ __ vst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ if (!UseLASX) ++ return; ++ ++ // 17: ++ __ vld(F0, A0, 0); ++ __ ld_b(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_b(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 18: ++ __ vld(F0, A0, 0); ++ __ ld_h(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_h(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 19: ++ __ vld(F0, A0, 0); ++ __ ld_w(AT, A0, 15); ++ __ vst(F0, A1, 0); ++ __ st_w(AT, A1, 15); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 20: ++ __ vld(F0, A0, 0); ++ __ ld_w(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_w(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 21: ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 13); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 13); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 22: ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 14); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 14); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 23: ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 15); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 15); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 24: ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 25: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 9); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 9); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 26: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 10); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 10); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 27: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 11); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 11); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 28: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 12); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 12); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 29: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 13); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 13); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 30: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 14); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 14); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 31: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 15); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 15); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 32: ++ __ xvld(F0, A0, 0); ++ __ xvst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_byte_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_byte_copy(). ++ // ++ address generate_disjoint_byte_copy(bool aligned, Label &small, Label &large, ++ Label &large_aligned, const char * name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ if (UseLASX) ++ __ sltui(T0, A2, 33); ++ else if (UseLSX) ++ __ sltui(T0, A2, 17); ++ else ++ __ sltui(T0, A2, 9); ++ __ bnez(T0, small); ++ ++ if (large_aligned.is_bound()) { ++ __ orr(T0, A0, A1); ++ __ andi(T0, T0, 7); ++ __ beqz(T0, large_aligned); ++ } ++ ++ __ b(large); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_byte_copy(bool aligned, Label &small, Label &large, ++ Label &large_aligned, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ array_overlap_test(StubRoutines::jbyte_disjoint_arraycopy(), 0); ++ ++ if (UseLASX) ++ __ sltui(T0, A2, 33); ++ else if (UseLSX) ++ __ sltui(T0, A2, 17); ++ else ++ __ sltui(T0, A2, 9); ++ __ bnez(T0, small); ++ ++ if (large_aligned.is_bound()) { ++ __ orr(T0, A0, A1); ++ __ andi(T0, T0, 7); ++ __ beqz(T0, large_aligned); ++ } ++ ++ __ b(large); ++ ++ return start; ++ } ++ ++ // Short small copy: less than { int:9, lsx:9, lasx:17 } elements. ++ void generate_short_small_copy(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label L; ++ __ bind(entry); ++ __ lipc(AT, L); ++ __ slli_d(A2, A2, 5); ++ __ add_d(AT, AT, A2); ++ __ jr(AT); ++ ++ __ bind(L); ++ // 0: ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ ld_h(AT, A0, 0); ++ __ st_h(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ ld_w(AT, A0, 0); ++ __ st_w(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ ld_w(AT, A0, 0); ++ __ ld_h(A2, A0, 4); ++ __ st_w(AT, A1, 0); ++ __ st_h(A2, A1, 4); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 4: ++ __ ld_d(AT, A0, 0); ++ __ st_d(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ __ ld_d(AT, A0, 0); ++ __ ld_h(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_h(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 6: ++ __ ld_d(AT, A0, 0); ++ __ ld_w(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_w(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 7: ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 6); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 6); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 8: ++ if (UseLSX) { ++ __ vld(F0, A0, 0); ++ __ vst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ } else { ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ if (!UseLASX) ++ return; ++ ++ __ nop(); ++ __ nop(); ++ ++ // 9: ++ __ vld(F0, A0, 0); ++ __ ld_h(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_h(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 10: ++ __ vld(F0, A0, 0); ++ __ ld_w(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_w(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 11: ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 14); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 14); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 12: ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 13: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 10); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 10); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 14: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 12); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 12); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 15: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 14); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 14); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 16: ++ __ xvld(F0, A0, 0); ++ __ xvst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_short_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_short_copy(). ++ // ++ address generate_disjoint_short_copy(bool aligned, Label &small, Label &large, ++ Label &large_aligned, const char * name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ if (UseLASX) ++ __ sltui(T0, A2, 17); ++ else ++ __ sltui(T0, A2, 9); ++ __ bnez(T0, small); ++ ++ __ slli_d(A2, A2, 1); ++ ++ if (large_aligned.is_bound()) { ++ __ orr(T0, A0, A1); ++ __ andi(T0, T0, 7); ++ __ beqz(T0, large_aligned); ++ } ++ ++ __ b(large); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we ++ // let the hardware handle it. The two or four words within dwords ++ // or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_short_copy(bool aligned, Label &small, Label &large, ++ Label &large_aligned, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ array_overlap_test(StubRoutines::jshort_disjoint_arraycopy(), 1); ++ ++ if (UseLASX) ++ __ sltui(T0, A2, 17); ++ else ++ __ sltui(T0, A2, 9); ++ __ bnez(T0, small); ++ ++ __ slli_d(A2, A2, 1); ++ ++ if (large_aligned.is_bound()) { ++ __ orr(T0, A0, A1); ++ __ andi(T0, T0, 7); ++ __ beqz(T0, large_aligned); ++ } ++ ++ __ b(large); ++ ++ return start; ++ } ++ ++ // Int small copy: less than { int:7, lsx:7, lasx:9 } elements. ++ void generate_int_small_copy(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label L; ++ __ bind(entry); ++ __ lipc(AT, L); ++ __ slli_d(A2, A2, 5); ++ __ add_d(AT, AT, A2); ++ __ jr(AT); ++ ++ __ bind(L); ++ // 0: ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ ld_w(AT, A0, 0); ++ __ st_w(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ ld_d(AT, A0, 0); ++ __ st_d(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ ld_d(AT, A0, 0); ++ __ ld_w(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_w(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 4: ++ if (UseLSX) { ++ __ vld(F0, A0, 0); ++ __ vst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ } else { ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ if (UseLSX) { ++ __ vld(F0, A0, 0); ++ __ ld_w(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_w(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ } else { ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ ld_w(A3, A0, 16); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ st_w(A3, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // 6: ++ if (UseLSX) { ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ } else { ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ ld_d(A3, A0, 16); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ st_d(A3, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ if (!UseLASX) ++ return; ++ ++ // 7: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 12); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 12); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 8: ++ __ xvld(F0, A0, 0); ++ __ xvst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // Generate maybe oop copy ++ void gen_maybe_oop_copy(bool is_oop, bool disjoint, bool aligned, Label &small, ++ Label &large, Label &large_aligned, const char *name, ++ int small_limit, int log2_elem_size, bool dest_uninitialized = false) { ++ Label post, _large; ++ DecoratorSet decorators = 0; ++ BarrierSetAssembler *bs = NULL; ++ ++ if (is_oop) { ++ decorators = IN_HEAP | IS_ARRAY; ++ ++ if (disjoint) { ++ decorators |= ARRAYCOPY_DISJOINT; ++ } ++ ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ ++ __ addi_d(SP, SP, -4 * wordSize); ++ __ st_d(A2, SP, 3 * wordSize); ++ __ st_d(A1, SP, 2 * wordSize); ++ __ st_d(A0, SP, 1 * wordSize); ++ __ st_d(RA, SP, 0 * wordSize); ++ ++ bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2, RegSet()); ++ ++ __ ld_d(A2, SP, 3 * wordSize); ++ __ ld_d(A1, SP, 2 * wordSize); ++ __ ld_d(A0, SP, 1 * wordSize); ++ } ++ ++ __ sltui(T0, A2, small_limit); ++ if (is_oop) { ++ __ beqz(T0, _large); ++ __ bl(small); ++ __ b(post); ++ } else { ++ __ bnez(T0, small); ++ } ++ ++ __ bind(_large); ++ __ slli_d(A2, A2, log2_elem_size); ++ ++ if (large_aligned.is_bound()) { ++ __ orr(T0, A0, A1); ++ __ andi(T0, T0, (1 << (log2_elem_size + 1)) - 1); ++ if (is_oop) { ++ Label skip; ++ __ bnez(T0, skip); ++ __ bl(large_aligned); ++ __ b(post); ++ __ bind(skip); ++ } else { ++ __ beqz(T0, large_aligned); ++ } ++ } ++ ++ if (is_oop) { ++ __ bl(large); ++ } else { ++ __ b(large); ++ } ++ ++ if (is_oop) { ++ __ bind(post); ++ __ ld_d(A2, SP, 3 * wordSize); ++ __ ld_d(A1, SP, 2 * wordSize); ++ ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1, RegSet()); ++ ++ __ ld_d(RA, SP, 0 * wordSize); ++ __ addi_d(SP, SP, 4 * wordSize); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, Label &small, ++ Label &large, Label &large_aligned, const char *name, ++ int small_limit, bool dest_uninitialized = false) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ gen_maybe_oop_copy(is_oop, true, aligned, small, large, large_aligned, ++ name, small_limit, 2, dest_uninitialized); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, Label &small, ++ Label &large, Label &large_aligned, const char *name, ++ int small_limit, bool dest_uninitialized = false) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ if (is_oop) { ++ array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 2); ++ } else { ++ array_overlap_test(StubRoutines::jint_disjoint_arraycopy(), 2); ++ } ++ ++ gen_maybe_oop_copy(is_oop, false, aligned, small, large, large_aligned, ++ name, small_limit, 2, dest_uninitialized); ++ ++ return start; ++ } ++ ++ // Long small copy: less than { int:4, lsx:4, lasx:5 } elements. ++ void generate_long_small_copy(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label L; ++ __ bind(entry); ++ __ lipc(AT, L); ++ __ slli_d(A2, A2, 5); ++ __ add_d(AT, AT, A2); ++ __ jr(AT); ++ ++ __ bind(L); ++ // 0: ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ ld_d(AT, A0, 0); ++ __ st_d(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ if (UseLSX) { ++ __ vld(F0, A0, 0); ++ __ vst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ } else { ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ if (UseLSX) { ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ } else { ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ ld_d(A3, A0, 16); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ st_d(A3, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ if (!UseLASX) ++ return; ++ ++ // 4: ++ __ xvld(F0, A0, 0); ++ __ xvst(F0, A1, 0); ++ ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, Label &small, ++ Label &large, Label &large_aligned, const char *name, ++ int small_limit, bool dest_uninitialized = false) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ gen_maybe_oop_copy(is_oop, true, aligned, small, large, large_aligned, ++ name, small_limit, 3, dest_uninitialized); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, Label &small, ++ Label &large, Label &large_aligned, const char *name, ++ int small_limit, bool dest_uninitialized = false) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ if (is_oop) { ++ array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 3); ++ } else { ++ array_overlap_test(StubRoutines::jlong_disjoint_arraycopy(), 3); ++ } ++ ++ gen_maybe_oop_copy(is_oop, false, aligned, small, large, large_aligned, ++ name, small_limit, 3, dest_uninitialized); ++ ++ return start; ++ } ++ ++ // Helper for generating a dynamic type check. ++ // Smashes scratch1, scratch2. ++ void generate_type_check(Register sub_klass, ++ Register super_check_offset, ++ Register super_klass, ++ Register tmp1, ++ Register tmp2, ++ Label& L_success) { ++ assert_different_registers(sub_klass, super_check_offset, super_klass); ++ ++ __ block_comment("type_check:"); ++ ++ Label L_miss; ++ ++ __ check_klass_subtype_fast_path(sub_klass, super_klass, tmp1, &L_success, &L_miss, NULL, ++ super_check_offset); ++ __ check_klass_subtype_slow_path(sub_klass, super_klass, tmp1, tmp2, &L_success, NULL); ++ ++ // Fall through on failure! ++ __ bind(L_miss); ++ } ++ ++ // ++ // Generate checkcasting array copy stub ++ // ++ // Input: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // A3 - size_t ckoff (super_check_offset) ++ // A4 - oop ckval (super_klass) ++ // ++ // Output: ++ // V0 == 0 - success ++ // V0 == -1^K - failure, where K is partial transfer count ++ // ++ address generate_checkcast_copy(const char *name, bool dest_uninitialized = false) { ++ Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop; ++ ++ // Input registers (after setup_arg_regs) ++ const Register from = A0; // source array address ++ const Register to = A1; // destination array address ++ const Register count = A2; // elementscount ++ const Register ckoff = A3; // super_check_offset ++ const Register ckval = A4; // super_klass ++ ++ RegSet wb_pre_saved_regs = RegSet::range(A0, A4); ++ RegSet wb_post_saved_regs = RegSet::of(count); ++ ++ // Registers used as temps (S0, S1, S2, S3 are save-on-entry) ++ const Register copied_oop = S0; // actual oop copied ++ const Register count_save = S1; // orig elementscount ++ const Register start_to = S2; // destination array start address ++ const Register oop_klass = S3; // oop._klass ++ const Register tmp1 = A5; ++ const Register tmp2 = A6; ++ ++ //--------------------------------------------------------------- ++ // Assembler stub will be used for this call to arraycopy ++ // if the two arrays are subtypes of Object[] but the ++ // destination array type is not equal to or a supertype ++ // of the source type. Each element must be separately ++ // checked. ++ ++ assert_different_registers(from, to, count, ckoff, ckval, start_to, ++ copied_oop, oop_klass, count_save); ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ // caller guarantees that the arrays really are different ++ // otherwise, we would have to make conjoint checks ++ ++ // Caller of this entry point must set up the argument registers. ++ __ block_comment("Entry:"); ++ ++ // Empty array: Nothing to do. ++ __ beqz(count, L_done); ++ ++ __ push(RegSet::of(S0, S1, S2, S3, RA)); ++ ++#ifdef ASSERT ++ __ block_comment("assert consistent ckoff/ckval"); ++ // The ckoff and ckval must be mutually consistent, ++ // even though caller generates both. ++ { Label L; ++ int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ __ ld_w(start_to, Address(ckval, sco_offset)); ++ __ beq(ckoff, start_to, L); ++ __ stop("super_check_offset inconsistent"); ++ __ bind(L); ++ } ++#endif //ASSERT ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT; ++ bool is_oop = true; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, to, count, wb_pre_saved_regs); ++ ++ // save the original count ++ __ move(count_save, count); ++ ++ // Copy from low to high addresses ++ __ move(start_to, to); // Save destination array start address ++ __ b(L_load_element); ++ ++ // ======== begin loop ======== ++ // (Loop is rotated; its entry is L_load_element.) ++ // Loop control: ++ // for (; count != 0; count--) { ++ // copied_oop = load_heap_oop(from++); ++ // ... generate_type_check ...; ++ // store_heap_oop(to++, copied_oop); ++ // } ++ __ align(OptoLoopAlignment); ++ ++ __ bind(L_store_element); ++ __ store_heap_oop(Address(to, 0), copied_oop, tmp1, tmp2, AS_RAW); // store the oop ++ __ addi_d(to, to, UseCompressedOops ? 4 : 8); ++ __ addi_d(count, count, -1); ++ __ beqz(count, L_do_card_marks); ++ ++ // ======== loop entry is here ======== ++ __ bind(L_load_element); ++ __ load_heap_oop(copied_oop, Address(from, 0), tmp1, tmp2, AS_RAW); // load the oop ++ __ addi_d(from, from, UseCompressedOops ? 4 : 8); ++ __ beqz(copied_oop, L_store_element); ++ ++ __ load_klass(oop_klass, copied_oop); // query the object klass ++ generate_type_check(oop_klass, ckoff, ckval, tmp1, tmp2, L_store_element); ++ // ======== end loop ======== ++ ++ // Register count = remaining oops, count_orig = total oops. ++ // Emit GC store barriers for the oops we have copied and report ++ // their number to the caller. ++ ++ __ sub_d(tmp1, count_save, count); // K = partially copied oop count ++ __ nor(count, tmp1, R0); // report (-1^K) to caller ++ __ beqz(tmp1, L_done_pop); ++ ++ __ bind(L_do_card_marks); ++ ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, tmp2, wb_post_saved_regs); ++ ++ __ bind(L_done_pop); ++ __ pop(RegSet::of(S0, S1, S2, S3, RA)); ++ ++#ifndef PRODUCT ++ __ li(SCR2, (address)&SharedRuntime::_checkcast_array_copy_ctr); ++ __ increment(Address(SCR2, 0), 1); ++#endif ++ ++ __ bind(L_done); ++ __ move(A0, count); ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ // ++ // Generate 'unsafe' array copy stub ++ // Though just as safe as the other stubs, it takes an unscaled ++ // size_t argument instead of an element count. ++ // ++ // Input: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - byte count, treated as ssize_t, can be zero ++ // ++ // Examines the alignment of the operands and dispatches ++ // to a long, int, short, or byte copy loop. ++ // ++ address generate_unsafe_copy(const char *name) { ++ Label L_long_aligned, L_int_aligned, L_short_aligned; ++ Register s = A0, d = A1, count = A2; ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ __ orr(AT, s, d); ++ __ orr(AT, AT, count); ++ ++ __ andi(AT, AT, BytesPerLong-1); ++ __ beqz(AT, L_long_aligned); ++ __ andi(AT, AT, BytesPerInt-1); ++ __ beqz(AT, L_int_aligned); ++ __ andi(AT, AT, BytesPerShort-1); ++ __ beqz(AT, L_short_aligned); ++ __ b(StubRoutines::_jbyte_arraycopy); ++ ++ __ bind(L_short_aligned); ++ __ srli_d(count, count, LogBytesPerShort); // size => short_count ++ __ b(StubRoutines::_jshort_arraycopy); ++ __ bind(L_int_aligned); ++ __ srli_d(count, count, LogBytesPerInt); // size => int_count ++ __ b(StubRoutines::_jint_arraycopy); ++ __ bind(L_long_aligned); ++ __ srli_d(count, count, LogBytesPerLong); // size => long_count ++ __ b(StubRoutines::_jlong_arraycopy); ++ ++ return start; ++ } ++ ++ // Perform range checks on the proposed arraycopy. ++ // Kills temp, but nothing else. ++ // Also, clean the sign bits of src_pos and dst_pos. ++ void arraycopy_range_checks(Register src, // source array oop (A0) ++ Register src_pos, // source position (A1) ++ Register dst, // destination array oo (A2) ++ Register dst_pos, // destination position (A3) ++ Register length, ++ Register temp, ++ Label& L_failed) { ++ __ block_comment("arraycopy_range_checks:"); ++ ++ assert_different_registers(SCR1, temp); ++ ++ // if (src_pos + length > arrayOop(src)->length()) FAIL; ++ __ ld_w(SCR1, Address(src, arrayOopDesc::length_offset_in_bytes())); ++ __ add_w(temp, length, src_pos); ++ __ bltu(SCR1, temp, L_failed); ++ ++ // if (dst_pos + length > arrayOop(dst)->length()) FAIL; ++ __ ld_w(SCR1, Address(dst, arrayOopDesc::length_offset_in_bytes())); ++ __ add_w(temp, length, dst_pos); ++ __ bltu(SCR1, temp, L_failed); ++ ++ // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'. ++ __ move(src_pos, src_pos); ++ __ move(dst_pos, dst_pos); ++ ++ __ block_comment("arraycopy_range_checks done"); ++ } ++ ++ // ++ // Generate generic array copy stubs ++ // ++ // Input: ++ // A0 - src oop ++ // A1 - src_pos (32-bits) ++ // A2 - dst oop ++ // A3 - dst_pos (32-bits) ++ // A4 - element count (32-bits) ++ // ++ // Output: ++ // V0 == 0 - success ++ // V0 == -1^K - failure, where K is partial transfer count ++ // ++ address generate_generic_copy(const char *name) { ++ Label L_failed, L_objArray; ++ Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs; ++ ++ // Input registers ++ const Register src = A0; // source array oop ++ const Register src_pos = A1; // source position ++ const Register dst = A2; // destination array oop ++ const Register dst_pos = A3; // destination position ++ const Register length = A4; ++ ++ // Registers used as temps ++ const Register dst_klass = A5; ++ ++ __ align(CodeEntryAlignment); ++ ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ address start = __ pc(); ++ ++#ifndef PRODUCT ++ // bump this on entry, not on exit: ++ __ li(SCR2, (address)&SharedRuntime::_generic_array_copy_ctr); ++ __ increment(Address(SCR2, 0), 1); ++#endif ++ ++ //----------------------------------------------------------------------- ++ // Assembler stub will be used for this call to arraycopy ++ // if the following conditions are met: ++ // ++ // (1) src and dst must not be null. ++ // (2) src_pos must not be negative. ++ // (3) dst_pos must not be negative. ++ // (4) length must not be negative. ++ // (5) src klass and dst klass should be the same and not NULL. ++ // (6) src and dst should be arrays. ++ // (7) src_pos + length must not exceed length of src. ++ // (8) dst_pos + length must not exceed length of dst. ++ // ++ ++ // if (src == NULL) return -1; ++ __ beqz(src, L_failed); ++ ++ // if (src_pos < 0) return -1; ++ __ blt(src_pos, R0, L_failed); ++ ++ // if (dst == NULL) return -1; ++ __ beqz(dst, L_failed); ++ ++ // if (dst_pos < 0) return -1; ++ __ blt(dst_pos, R0, L_failed); ++ ++ // registers used as temp ++ const Register scratch_length = T0; // elements count to copy ++ const Register scratch_src_klass = T1; // array klass ++ const Register lh = T2; // layout helper ++ const Register tmp1 = T3; ++ const Register tmp2 = T4; ++ ++ // if (length < 0) return -1; ++ __ move(scratch_length, length); // length (elements count, 32-bits value) ++ __ blt(scratch_length, R0, L_failed); ++ ++ __ load_klass(scratch_src_klass, src); ++#ifdef ASSERT ++ // assert(src->klass() != NULL); ++ { ++ __ block_comment("assert klasses not null {"); ++ Label L1, L2; ++ __ bnez(scratch_src_klass, L2); // it is broken if klass is NULL ++ __ bind(L1); ++ __ stop("broken null klass"); ++ __ bind(L2); ++ __ load_klass(SCR2, dst); ++ __ beqz(SCR2, L1); // this would be broken also ++ __ block_comment("} assert klasses not null done"); ++ } ++#endif ++ ++ // Load layout helper (32-bits) ++ // ++ // |array_tag| | header_size | element_type | |log2_element_size| ++ // 32 30 24 16 8 2 0 ++ // ++ // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 ++ // ++ ++ const int lh_offset = in_bytes(Klass::layout_helper_offset()); ++ ++ // Handle objArrays completely differently... ++ const jint objArray_lh = Klass::array_layout_helper(T_OBJECT); ++ __ ld_w(lh, Address(scratch_src_klass, lh_offset)); ++ __ li(SCR1, objArray_lh); ++ __ xorr(SCR2, lh, SCR1); ++ __ beqz(SCR2, L_objArray); ++ ++ // if (src->klass() != dst->klass()) return -1; ++ __ load_klass(SCR2, dst); ++ __ xorr(SCR2, SCR2, scratch_src_klass); ++ __ bnez(SCR2, L_failed); ++ ++ // if (!src->is_Array()) return -1; ++ __ bge(lh, R0, L_failed); // i.e. (lh >= 0) ++ ++ // At this point, it is known to be a typeArray (array_tag 0x3). ++#ifdef ASSERT ++ { ++ __ block_comment("assert primitive array {"); ++ Label L; ++ __ li(SCR2, (int)(Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); ++ __ bge(lh, SCR2, L); ++ __ stop("must be a primitive array"); ++ __ bind(L); ++ __ block_comment("} assert primitive array done"); ++ } ++#endif ++ ++ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, SCR2, L_failed); ++ ++ // TypeArrayKlass ++ // ++ // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); ++ // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); ++ // ++ ++ const Register scr1_offset = SCR1; // array offset ++ const Register elsize = lh; // element size ++ ++ __ bstrpick_d(scr1_offset, lh, Klass::_lh_header_size_shift + ++ exact_log2(Klass::_lh_header_size_mask+1) - 1, ++ Klass::_lh_header_size_shift); // array_offset ++ __ add_d(src, src, scr1_offset); // src array offset ++ __ add_d(dst, dst, scr1_offset); // dst array offset ++ __ block_comment("choose copy loop based on element size"); ++ ++ // next registers should be set before the jump to corresponding stub ++ const Register from = A0; // source array address ++ const Register to = A1; // destination array address ++ const Register count = A2; // elements count ++ ++ // 'from', 'to', 'count' registers should be set in such order ++ // since they are the same as 'src', 'src_pos', 'dst'. ++ ++ assert(Klass::_lh_log2_element_size_shift == 0, "fix this code"); ++ ++ // The possible values of elsize are 0-3, i.e. exact_log2(element ++ // size in bytes). We do a simple bitwise binary search. ++ __ bind(L_copy_bytes); ++ __ andi(tmp1, elsize, 2); ++ __ bnez(tmp1, L_copy_ints); ++ __ andi(tmp1, elsize, 1); ++ __ bnez(tmp1, L_copy_shorts); ++ __ lea(from, Address(src, src_pos, Address::times_1)); // src_addr ++ __ lea(to, Address(dst, dst_pos, Address::times_1)); // dst_addr ++ __ move(count, scratch_length); // length ++ __ b(StubRoutines::_jbyte_arraycopy); ++ ++ __ bind(L_copy_shorts); ++ __ lea(from, Address(src, src_pos, Address::times_2)); // src_addr ++ __ lea(to, Address(dst, dst_pos, Address::times_2)); // dst_addr ++ __ move(count, scratch_length); // length ++ __ b(StubRoutines::_jshort_arraycopy); ++ ++ __ bind(L_copy_ints); ++ __ andi(tmp1, elsize, 1); ++ __ bnez(tmp1, L_copy_longs); ++ __ lea(from, Address(src, src_pos, Address::times_4)); // src_addr ++ __ lea(to, Address(dst, dst_pos, Address::times_4)); // dst_addr ++ __ move(count, scratch_length); // length ++ __ b(StubRoutines::_jint_arraycopy); ++ ++ __ bind(L_copy_longs); ++#ifdef ASSERT ++ { ++ __ block_comment("assert long copy {"); ++ Label L; ++ __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> elsize ++ __ li(tmp1, LogBytesPerLong); ++ __ beq(elsize, tmp1, L); ++ __ stop("must be long copy, but elsize is wrong"); ++ __ bind(L); ++ __ block_comment("} assert long copy done"); ++ } ++#endif ++ __ lea(from, Address(src, src_pos, Address::times_8)); // src_addr ++ __ lea(to, Address(dst, dst_pos, Address::times_8)); // dst_addr ++ __ move(count, scratch_length); // length ++ __ b(StubRoutines::_jlong_arraycopy); ++ ++ // ObjArrayKlass ++ __ bind(L_objArray); ++ // live at this point: scratch_src_klass, scratch_length, src[_pos], dst[_pos] ++ ++ Label L_plain_copy, L_checkcast_copy; ++ // test array classes for subtyping ++ __ load_klass(tmp1, dst); ++ __ bne(scratch_src_klass, tmp1, L_checkcast_copy); // usual case is exact equality ++ ++ // Identically typed arrays can be copied without element-wise checks. ++ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, SCR2, L_failed); ++ ++ __ lea(from, Address(src, src_pos, Address::ScaleFactor(LogBytesPerHeapOop))); ++ __ addi_d(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ lea(to, Address(dst, dst_pos, Address::ScaleFactor(LogBytesPerHeapOop))); ++ __ addi_d(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ move(count, scratch_length); // length ++ __ bind(L_plain_copy); ++ __ b(StubRoutines::_oop_arraycopy); ++ ++ __ bind(L_checkcast_copy); ++ // live at this point: scratch_src_klass, scratch_length, tmp1 (dst_klass) ++ { ++ // Before looking at dst.length, make sure dst is also an objArray. ++ __ ld_w(SCR1, Address(tmp1, lh_offset)); ++ __ li(SCR2, objArray_lh); ++ __ xorr(SCR1, SCR1, SCR2); ++ __ bnez(SCR1, L_failed); ++ ++ // It is safe to examine both src.length and dst.length. ++ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, tmp1, L_failed); ++ ++ __ load_klass(dst_klass, dst); // reload ++ ++ // Marshal the base address arguments now, freeing registers. ++ __ lea(from, Address(src, src_pos, Address::ScaleFactor(LogBytesPerHeapOop))); ++ __ addi_d(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ lea(to, Address(dst, dst_pos, Address::ScaleFactor(LogBytesPerHeapOop))); ++ __ addi_d(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ move(count, length); // length (reloaded) ++ Register sco_temp = A3; // this register is free now ++ assert_different_registers(from, to, count, sco_temp, dst_klass, scratch_src_klass); ++ // assert_clean_int(count, sco_temp); ++ ++ // Generate the type check. ++ const int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ __ ld_w(sco_temp, Address(dst_klass, sco_offset)); ++ ++ // Smashes SCR1, SCR2 ++ generate_type_check(scratch_src_klass, sco_temp, dst_klass, tmp1, tmp2, L_plain_copy); ++ ++ // Fetch destination element klass from the ObjArrayKlass header. ++ int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); ++ __ ld_d(dst_klass, Address(dst_klass, ek_offset)); ++ __ ld_w(sco_temp, Address(dst_klass, sco_offset)); ++ ++ // the checkcast_copy loop needs two extra arguments: ++ assert(A3 == sco_temp, "#3 already in place"); ++ // Set up arguments for checkcast_arraycopy. ++ __ move(A4, dst_klass); // dst.klass.element_klass ++ __ b(StubRoutines::_checkcast_arraycopy); ++ } ++ ++ __ bind(L_failed); ++ __ li(V0, -1); ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ void generate_arraycopy_stubs() { ++ Label disjoint_large_copy, conjoint_large_copy; ++ Label disjoint_large_copy_lsx, conjoint_large_copy_lsx; ++ Label disjoint_large_copy_lasx, conjoint_large_copy_lasx; ++ Label byte_small_copy, short_small_copy, int_small_copy, long_small_copy; ++ Label none; ++ ++ generate_disjoint_large_copy(disjoint_large_copy, "disjoint_large_copy"); ++ generate_conjoint_large_copy(conjoint_large_copy, "conjoint_large_copy"); ++ if (UseLSX) { ++ generate_disjoint_large_copy_lsx(disjoint_large_copy_lsx, "disjoint_large_copy_lsx"); ++ generate_conjoint_large_copy_lsx(conjoint_large_copy_lsx, "conjoint_large_copy_lsx"); ++ } ++ if (UseLASX) { ++ generate_disjoint_large_copy_lasx(disjoint_large_copy_lasx, "disjoint_large_copy_lasx"); ++ generate_conjoint_large_copy_lasx(conjoint_large_copy_lasx, "conjoint_large_copy_lasx"); ++ } ++ generate_byte_small_copy(byte_small_copy, "jbyte_small_copy"); ++ generate_short_small_copy(short_small_copy, "jshort_small_copy"); ++ generate_int_small_copy(int_small_copy, "jint_small_copy"); ++ generate_long_small_copy(long_small_copy, "jlong_small_copy"); ++ ++ if (UseCompressedOops) { ++ if (UseLSX) { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "oop_disjoint_arraycopy", 7); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "oop_disjoint_arraycopy_uninit", 7, true); ++ } else { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy", 7); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy_uninit", 7, true); ++ } ++ if (UseLASX) { ++ StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "oop_arraycopy", 9); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "oop_arraycopy_uninit", 9, true); ++ } else if (UseLSX) { ++ StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "oop_arraycopy", 7); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "oop_arraycopy_uninit", 7, true); ++ } else { ++ StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, none, "oop_arraycopy", 7); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, none, "oop_arraycopy_uninit", 7, true); ++ } ++ } else { ++ if (UseLASX) { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lasx, "oop_disjoint_arraycopy", 5); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lasx, "oop_disjoint_arraycopy_uninit", 5, true); ++ StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lasx, "oop_arraycopy", 5); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lasx, "oop_arraycopy_uninit", 5, true); ++ } else if (UseLSX) { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lsx, "oop_disjoint_arraycopy", 4); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, disjoint_large_copy_lsx, "oop_disjoint_arraycopy_uninit", 4, true); ++ StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "oop_arraycopy", 4); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "oop_arraycopy_uninit", 4, true); ++ } else { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy", 4); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, none, "oop_disjoint_arraycopy_uninit", 4, true); ++ StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, none, "oop_arraycopy", 4); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "oop_arraycopy_uninit", 4, true); ++ } ++ } ++ ++ if (UseLASX) { ++ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy_lasx, disjoint_large_copy_lsx, "jbyte_disjoint_arraycopy"); ++ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy_lasx, disjoint_large_copy, "jshort_disjoint_arraycopy"); ++ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy_lasx, disjoint_large_copy, "jint_disjoint_arraycopy", 9); ++ ++ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy_lasx, conjoint_large_copy_lsx, "jbyte_arraycopy"); ++ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "jshort_arraycopy"); ++ StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy_lasx, conjoint_large_copy, "jint_arraycopy", 9); ++ } else if (UseLSX) { ++ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy_lsx, none, "jbyte_disjoint_arraycopy"); ++ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "jshort_disjoint_arraycopy"); ++ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy_lsx, disjoint_large_copy, "jint_disjoint_arraycopy", 7); ++ ++ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy_lsx, none, "jbyte_arraycopy"); ++ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "jshort_arraycopy"); ++ StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy_lsx, conjoint_large_copy, "jint_arraycopy", 7); ++ } else { ++ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy, none, "jbyte_disjoint_arraycopy"); ++ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy, none, "jshort_disjoint_arraycopy"); ++ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy, none, "jint_disjoint_arraycopy", 7); ++ ++ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy, none, "jbyte_arraycopy"); ++ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy, none, "jshort_arraycopy"); ++ StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy, none, "jint_arraycopy", 7); ++ } ++ ++ if (UseLASX) { ++ StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, disjoint_large_copy_lasx, "jlong_disjoint_arraycopy", 5); ++ StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, conjoint_large_copy_lasx, "jlong_arraycopy", 5); ++ } else if (UseLSX) { ++ StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, disjoint_large_copy_lsx, "jlong_disjoint_arraycopy", 4); ++ StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, conjoint_large_copy_lsx, "jlong_arraycopy", 4); ++ } else { ++ StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, none, "jlong_disjoint_arraycopy", 4); ++ StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, none, "jlong_arraycopy", 4); ++ } ++ ++ // We don't generate specialized code for HeapWord-aligned source ++ // arrays, so just use the code we've already generated ++ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy; ++ StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy; ++ ++ StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy; ++ StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy; ++ ++ StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; ++ StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; ++ ++ StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; ++ StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; ++ StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; ++ StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; ++ ++ StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy"); ++ StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", true); ++ ++ StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy"); ++ ++ StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy"); ++ ++ StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); ++ StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); ++ StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); ++ StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); ++ StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); ++ StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); ++ } ++ ++ // Arguments: ++ // ++ // Inputs: ++ // A0 - source byte array address ++ // A1 - destination byte array address ++ // A2 - K (key) in little endian int array ++ // A3 - r vector byte array address ++ // A4 - input length ++ // ++ // Output: ++ // A0 - input length ++ // ++ address generate_aescrypt_encryptBlock(bool cbc) { ++ static const uint32_t ft_consts[256] = { ++ 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, ++ 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, ++ 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, ++ 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, ++ 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, ++ 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, ++ 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, ++ 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, ++ 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, ++ 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, ++ 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, ++ 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, ++ 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, ++ 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, ++ 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, ++ 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, ++ 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, ++ 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, ++ 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, ++ 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, ++ 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, ++ 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, ++ 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, ++ 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, ++ 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, ++ 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, ++ 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, ++ 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, ++ 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, ++ 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, ++ 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, ++ 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, ++ 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, ++ 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, ++ 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, ++ 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, ++ 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, ++ 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, ++ 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, ++ 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, ++ 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, ++ 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, ++ 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, ++ 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, ++ 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, ++ 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, ++ 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, ++ 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, ++ 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, ++ 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, ++ 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, ++ 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, ++ 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, ++ 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, ++ 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, ++ 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, ++ 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, ++ 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, ++ 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, ++ 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, ++ 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, ++ 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, ++ 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, ++ 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a ++ }; ++ static const uint8_t fsb_consts[256] = { ++ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, ++ 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, ++ 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, ++ 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, ++ 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, ++ 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, ++ 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, ++ 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, ++ 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, ++ 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, ++ 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, ++ 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, ++ 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, ++ 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, ++ 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, ++ 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, ++ 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, ++ 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, ++ 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, ++ 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, ++ 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, ++ 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, ++ 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, ++ 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, ++ 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, ++ 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, ++ 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, ++ 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, ++ 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, ++ 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, ++ 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, ++ 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 ++ }; ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); ++ ++ // Allocate registers ++ Register src = A0; ++ Register dst = A1; ++ Register key = A2; ++ Register rve = A3; ++ Register srclen = A4; ++ Register keylen = T8; ++ Register srcend = A5; ++ Register keyold = A6; ++ Register t0 = A7; ++ Register t1, t2, t3, ftp; ++ Register xa[4] = { T0, T1, T2, T3 }; ++ Register ya[4] = { T4, T5, T6, T7 }; ++ ++ Label loop, tail, done; ++ address start = __ pc(); ++ ++ if (cbc) { ++ t1 = S0; ++ t2 = S1; ++ t3 = S2; ++ ftp = S3; ++ ++ __ beqz(srclen, done); ++ ++ __ addi_d(SP, SP, -4 * wordSize); ++ __ st_d(S3, SP, 3 * wordSize); ++ __ st_d(S2, SP, 2 * wordSize); ++ __ st_d(S1, SP, 1 * wordSize); ++ __ st_d(S0, SP, 0 * wordSize); ++ ++ __ add_d(srcend, src, srclen); ++ __ move(keyold, key); ++ } else { ++ t1 = A3; ++ t2 = A4; ++ t3 = A5; ++ ftp = A6; ++ } ++ ++ __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)); ++ ++ // Round 1 ++ if (cbc) { ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xa[i], rve, 4 * i); ++ } ++ ++ __ bind(loop); ++ ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(ya[i], src, 4 * i); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ XOR(xa[i], xa[i], ya[i]); ++ } ++ } else { ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xa[i], src, 4 * i); ++ } ++ } ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(ya[i], key, 4 * i); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ revb_2h(xa[i], xa[i]); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ rotri_w(xa[i], xa[i], 16); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ XOR(xa[i], xa[i], ya[i]); ++ } ++ ++ __ li(ftp, (intptr_t)ft_consts); ++ ++ // Round 2 - (N-1) ++ for (int r = 0; r < 14; r++) { ++ Register *xp; ++ Register *yp; ++ ++ if (r & 1) { ++ xp = xa; ++ yp = ya; ++ } else { ++ xp = ya; ++ yp = xa; ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xp[i], key, 4 * (4 * (r + 1) + i)); ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ bstrpick_d(t0, yp[(i + 3) & 3], 7, 0); ++ __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8); ++ __ bstrpick_d(t2, yp[(i + 1) & 3], 23, 16); ++ __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24); ++ __ slli_w(t0, t0, 2); ++ __ slli_w(t1, t1, 2); ++ __ slli_w(t2, t2, 2); ++ __ slli_w(t3, t3, 2); ++ __ ldx_w(t0, ftp, t0); ++ __ ldx_w(t1, ftp, t1); ++ __ ldx_w(t2, ftp, t2); ++ __ ldx_w(t3, ftp, t3); ++ __ rotri_w(t0, t0, 24); ++ __ rotri_w(t1, t1, 16); ++ __ rotri_w(t2, t2, 8); ++ __ XOR(xp[i], xp[i], t0); ++ __ XOR(t0, t1, t2); ++ __ XOR(xp[i], xp[i], t3); ++ __ XOR(xp[i], xp[i], t0); ++ } ++ ++ if (r == 8) { ++ // AES 128 ++ __ li(t0, 44); ++ __ beq(t0, keylen, tail); ++ } else if (r == 10) { ++ // AES 192 ++ __ li(t0, 52); ++ __ beq(t0, keylen, tail); ++ } ++ } ++ ++ __ bind(tail); ++ __ li(ftp, (intptr_t)fsb_consts); ++ __ alsl_d(key, keylen, key, 2 - 1); ++ ++ // Round N ++ for (int i = 0; i < 4; i++) { ++ __ bstrpick_d(t0, ya[(i + 3) & 3], 7, 0); ++ __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8); ++ __ bstrpick_d(t2, ya[(i + 1) & 3], 23, 16); ++ __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24); ++ __ ldx_bu(t0, ftp, t0); ++ __ ldx_bu(t1, ftp, t1); ++ __ ldx_bu(t2, ftp, t2); ++ __ ldx_bu(t3, ftp, t3); ++ __ ld_w(xa[i], key, 4 * i - 16); ++ __ slli_w(t1, t1, 8); ++ __ slli_w(t2, t2, 16); ++ __ slli_w(t3, t3, 24); ++ __ XOR(xa[i], xa[i], t0); ++ __ XOR(t0, t1, t2); ++ __ XOR(xa[i], xa[i], t3); ++ __ XOR(xa[i], xa[i], t0); ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ revb_2h(xa[i], xa[i]); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ rotri_w(xa[i], xa[i], 16); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ st_w(xa[i], dst, 4 * i); ++ } ++ ++ if (cbc) { ++ __ move(key, keyold); ++ __ addi_d(src, src, 16); ++ __ addi_d(dst, dst, 16); ++ __ blt(src, srcend, loop); ++ ++ for (int i = 0; i < 4; i++) { ++ __ st_w(xa[i], rve, 4 * i); ++ } ++ ++ __ ld_d(S3, SP, 3 * wordSize); ++ __ ld_d(S2, SP, 2 * wordSize); ++ __ ld_d(S1, SP, 1 * wordSize); ++ __ ld_d(S0, SP, 0 * wordSize); ++ __ addi_d(SP, SP, 4 * wordSize); ++ ++ __ bind(done); ++ __ move(A0, srclen); ++ } ++ ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ address generate_mulAdd() { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "mulAdd"); ++ ++ address entry = __ pc(); ++ ++ const Register out = A0; ++ const Register in = A1; ++ const Register offset = A2; ++ const Register len = A3; ++ const Register k = A4; ++ ++ __ block_comment("Entry:"); ++ __ mul_add(out, in, offset, len, k); ++ __ jr(RA); ++ ++ return entry; ++ } ++ ++ // Arguments: ++ // ++ // Inputs: ++ // A0 - source byte array address ++ // A1 - destination byte array address ++ // A2 - K (key) in little endian int array ++ // A3 - r vector byte array address ++ // A4 - input length ++ // ++ // Output: ++ // A0 - input length ++ // ++ address generate_aescrypt_decryptBlock(bool cbc) { ++ static const uint32_t rt_consts[256] = { ++ 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, ++ 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, ++ 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, ++ 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, ++ 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, ++ 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, ++ 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, ++ 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, ++ 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, ++ 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, ++ 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, ++ 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, ++ 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, ++ 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, ++ 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, ++ 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, ++ 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, ++ 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, ++ 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, ++ 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, ++ 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, ++ 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, ++ 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, ++ 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, ++ 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, ++ 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, ++ 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, ++ 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, ++ 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, ++ 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, ++ 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, ++ 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, ++ 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, ++ 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, ++ 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, ++ 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, ++ 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, ++ 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, ++ 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, ++ 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, ++ 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, ++ 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, ++ 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, ++ 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, ++ 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, ++ 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, ++ 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, ++ 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, ++ 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, ++ 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, ++ 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, ++ 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, ++ 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, ++ 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, ++ 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, ++ 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, ++ 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, ++ 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, ++ 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, ++ 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, ++ 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, ++ 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, ++ 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, ++ 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 ++ }; ++ static const uint8_t rsb_consts[256] = { ++ 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, ++ 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, ++ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, ++ 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, ++ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, ++ 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, ++ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, ++ 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, ++ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, ++ 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, ++ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, ++ 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, ++ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, ++ 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, ++ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, ++ 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, ++ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, ++ 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, ++ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, ++ 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, ++ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, ++ 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, ++ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, ++ 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, ++ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, ++ 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, ++ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, ++ 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, ++ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, ++ 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, ++ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, ++ 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d ++ }; ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); ++ ++ // Allocate registers ++ Register src = A0; ++ Register dst = A1; ++ Register key = A2; ++ Register rve = A3; ++ Register srclen = A4; ++ Register keylen = T8; ++ Register srcend = A5; ++ Register t0 = A6; ++ Register t1 = A7; ++ Register t2, t3, rtp, rvp; ++ Register xa[4] = { T0, T1, T2, T3 }; ++ Register ya[4] = { T4, T5, T6, T7 }; ++ ++ Label loop, tail, done; ++ address start = __ pc(); ++ ++ if (cbc) { ++ t2 = S0; ++ t3 = S1; ++ rtp = S2; ++ rvp = S3; ++ ++ __ beqz(srclen, done); ++ ++ __ addi_d(SP, SP, -4 * wordSize); ++ __ st_d(S3, SP, 3 * wordSize); ++ __ st_d(S2, SP, 2 * wordSize); ++ __ st_d(S1, SP, 1 * wordSize); ++ __ st_d(S0, SP, 0 * wordSize); ++ ++ __ add_d(srcend, src, srclen); ++ __ move(rvp, rve); ++ } else { ++ t2 = A3; ++ t3 = A4; ++ rtp = A5; ++ } ++ ++ __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)); ++ ++ __ bind(loop); ++ ++ // Round 1 ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xa[i], src, 4 * i); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(ya[i], key, 4 * (4 + i)); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ revb_2h(xa[i], xa[i]); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ rotri_w(xa[i], xa[i], 16); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ XOR(xa[i], xa[i], ya[i]); ++ } ++ ++ __ li(rtp, (intptr_t)rt_consts); ++ ++ // Round 2 - (N-1) ++ for (int r = 0; r < 14; r++) { ++ Register *xp; ++ Register *yp; ++ ++ if (r & 1) { ++ xp = xa; ++ yp = ya; ++ } else { ++ xp = ya; ++ yp = xa; ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xp[i], key, 4 * (4 * (r + 1) + 4 + i)); ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ bstrpick_d(t0, yp[(i + 1) & 3], 7, 0); ++ __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8); ++ __ bstrpick_d(t2, yp[(i + 3) & 3], 23, 16); ++ __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24); ++ __ slli_w(t0, t0, 2); ++ __ slli_w(t1, t1, 2); ++ __ slli_w(t2, t2, 2); ++ __ slli_w(t3, t3, 2); ++ __ ldx_w(t0, rtp, t0); ++ __ ldx_w(t1, rtp, t1); ++ __ ldx_w(t2, rtp, t2); ++ __ ldx_w(t3, rtp, t3); ++ __ rotri_w(t0, t0, 24); ++ __ rotri_w(t1, t1, 16); ++ __ rotri_w(t2, t2, 8); ++ __ XOR(xp[i], xp[i], t0); ++ __ XOR(t0, t1, t2); ++ __ XOR(xp[i], xp[i], t3); ++ __ XOR(xp[i], xp[i], t0); ++ } ++ ++ if (r == 8) { ++ // AES 128 ++ __ li(t0, 44); ++ __ beq(t0, keylen, tail); ++ } else if (r == 10) { ++ // AES 192 ++ __ li(t0, 52); ++ __ beq(t0, keylen, tail); ++ } ++ } ++ ++ __ bind(tail); ++ __ li(rtp, (intptr_t)rsb_consts); ++ ++ // Round N ++ for (int i = 0; i < 4; i++) { ++ __ bstrpick_d(t0, ya[(i + 1) & 3], 7, 0); ++ __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8); ++ __ bstrpick_d(t2, ya[(i + 3) & 3], 23, 16); ++ __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24); ++ __ ldx_bu(t0, rtp, t0); ++ __ ldx_bu(t1, rtp, t1); ++ __ ldx_bu(t2, rtp, t2); ++ __ ldx_bu(t3, rtp, t3); ++ __ ld_w(xa[i], key, 4 * i); ++ __ slli_w(t1, t1, 8); ++ __ slli_w(t2, t2, 16); ++ __ slli_w(t3, t3, 24); ++ __ XOR(xa[i], xa[i], t0); ++ __ XOR(t0, t1, t2); ++ __ XOR(xa[i], xa[i], t3); ++ __ XOR(xa[i], xa[i], t0); ++ } ++ ++ if (cbc) { ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(ya[i], rvp, 4 * i); ++ } ++ } ++ for (int i = 0; i < 4; i++) { ++ __ revb_2h(xa[i], xa[i]); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ rotri_w(xa[i], xa[i], 16); ++ } ++ if (cbc) { ++ for (int i = 0; i < 4; i++) { ++ __ XOR(xa[i], xa[i], ya[i]); ++ } ++ } ++ for (int i = 0; i < 4; i++) { ++ __ st_w(xa[i], dst, 4 * i); ++ } ++ ++ if (cbc) { ++ __ move(rvp, src); ++ __ addi_d(src, src, 16); ++ __ addi_d(dst, dst, 16); ++ __ blt(src, srcend, loop); ++ ++ __ ld_d(t0, src, -16); ++ __ ld_d(t1, src, -8); ++ __ st_d(t0, rve, 0); ++ __ st_d(t1, rve, 8); ++ ++ __ ld_d(S3, SP, 3 * wordSize); ++ __ ld_d(S2, SP, 2 * wordSize); ++ __ ld_d(S1, SP, 1 * wordSize); ++ __ ld_d(S0, SP, 0 * wordSize); ++ __ addi_d(SP, SP, 4 * wordSize); ++ ++ __ bind(done); ++ __ move(A0, srclen); ++ } ++ ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // ++ // Inputs: ++ // A0 - byte[] source+offset ++ // A1 - int[] SHA.state ++ // A2 - int offset ++ // A3 - int limit ++ // ++ void generate_sha1_implCompress(const char *name, address &entry, address &entry_mb) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ Label keys, loop; ++ ++ // Keys ++ __ bind(keys); ++ __ emit_int32(0x5a827999); ++ __ emit_int32(0x6ed9eba1); ++ __ emit_int32(0x8f1bbcdc); ++ __ emit_int32(0xca62c1d6); ++ ++ // Allocate registers ++ Register t0 = T5; ++ Register t1 = T6; ++ Register t2 = T7; ++ Register t3 = T8; ++ Register buf = A0; ++ Register state = A1; ++ Register ofs = A2; ++ Register limit = A3; ++ Register ka[4] = { A4, A5, A6, A7 }; ++ Register sa[5] = { T0, T1, T2, T3, T4 }; ++ ++ // Entry ++ entry = __ pc(); ++ __ move(ofs, R0); ++ __ move(limit, R0); ++ ++ // Entry MB ++ entry_mb = __ pc(); ++ ++ // Allocate scratch space ++ __ addi_d(SP, SP, -64); ++ ++ // Load keys ++ __ lipc(t0, keys); ++ __ ld_w(ka[0], t0, 0); ++ __ ld_w(ka[1], t0, 4); ++ __ ld_w(ka[2], t0, 8); ++ __ ld_w(ka[3], t0, 12); ++ ++ __ bind(loop); ++ // Load arguments ++ __ ld_w(sa[0], state, 0); ++ __ ld_w(sa[1], state, 4); ++ __ ld_w(sa[2], state, 8); ++ __ ld_w(sa[3], state, 12); ++ __ ld_w(sa[4], state, 16); ++ ++ // 80 rounds of hashing ++ for (int i = 0; i < 80; i++) { ++ Register a = sa[(5 - (i % 5)) % 5]; ++ Register b = sa[(6 - (i % 5)) % 5]; ++ Register c = sa[(7 - (i % 5)) % 5]; ++ Register d = sa[(8 - (i % 5)) % 5]; ++ Register e = sa[(9 - (i % 5)) % 5]; ++ ++ if (i < 16) { ++ __ ld_w(t0, buf, i * 4); ++ __ revb_2h(t0, t0); ++ __ rotri_w(t0, t0, 16); ++ __ add_w(e, e, t0); ++ __ st_w(t0, SP, i * 4); ++ __ XOR(t0, c, d); ++ __ AND(t0, t0, b); ++ __ XOR(t0, t0, d); ++ } else { ++ __ ld_w(t0, SP, ((i - 3) & 0xF) * 4); ++ __ ld_w(t1, SP, ((i - 8) & 0xF) * 4); ++ __ ld_w(t2, SP, ((i - 14) & 0xF) * 4); ++ __ ld_w(t3, SP, ((i - 16) & 0xF) * 4); ++ __ XOR(t0, t0, t1); ++ __ XOR(t0, t0, t2); ++ __ XOR(t0, t0, t3); ++ __ rotri_w(t0, t0, 31); ++ __ add_w(e, e, t0); ++ __ st_w(t0, SP, (i & 0xF) * 4); ++ ++ if (i < 20) { ++ __ XOR(t0, c, d); ++ __ AND(t0, t0, b); ++ __ XOR(t0, t0, d); ++ } else if (i < 40 || i >= 60) { ++ __ XOR(t0, b, c); ++ __ XOR(t0, t0, d); ++ } else if (i < 60) { ++ __ OR(t0, c, d); ++ __ AND(t0, t0, b); ++ __ AND(t2, c, d); ++ __ OR(t0, t0, t2); ++ } ++ } ++ ++ __ rotri_w(b, b, 2); ++ __ add_w(e, e, t0); ++ __ add_w(e, e, ka[i / 20]); ++ __ rotri_w(t0, a, 27); ++ __ add_w(e, e, t0); ++ } ++ ++ // Save updated state ++ __ ld_w(t0, state, 0); ++ __ ld_w(t1, state, 4); ++ __ ld_w(t2, state, 8); ++ __ ld_w(t3, state, 12); ++ __ add_w(sa[0], sa[0], t0); ++ __ ld_w(t0, state, 16); ++ __ add_w(sa[1], sa[1], t1); ++ __ add_w(sa[2], sa[2], t2); ++ __ add_w(sa[3], sa[3], t3); ++ __ add_w(sa[4], sa[4], t0); ++ __ st_w(sa[0], state, 0); ++ __ st_w(sa[1], state, 4); ++ __ st_w(sa[2], state, 8); ++ __ st_w(sa[3], state, 12); ++ __ st_w(sa[4], state, 16); ++ ++ __ addi_w(ofs, ofs, 64); ++ __ addi_d(buf, buf, 64); ++ __ bge(limit, ofs, loop); ++ __ move(V0, ofs); // return ofs ++ ++ __ addi_d(SP, SP, 64); ++ __ jr(RA); ++ } ++ ++ // Arguments: ++ // ++ // Inputs: ++ // A0 - byte[] source+offset ++ // A1 - int[] SHA.state ++ // A2 - int offset ++ // A3 - int limit ++ // ++ void generate_sha256_implCompress(const char *name, address &entry, address &entry_mb) { ++ static const uint32_t round_consts[64] = { ++ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, ++ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, ++ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, ++ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, ++ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, ++ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, ++ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, ++ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, ++ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, ++ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, ++ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, ++ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, ++ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, ++ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, ++ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, ++ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, ++ }; ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ Label loop; ++ ++ // Allocate registers ++ Register t0 = A4; ++ Register t1 = A5; ++ Register t2 = A6; ++ Register t3 = A7; ++ Register buf = A0; ++ Register state = A1; ++ Register ofs = A2; ++ Register limit = A3; ++ Register kptr = T8; ++ Register sa[8] = { T0, T1, T2, T3, T4, T5, T6, T7 }; ++ ++ // Entry ++ entry = __ pc(); ++ __ move(ofs, R0); ++ __ move(limit, R0); ++ ++ // Entry MB ++ entry_mb = __ pc(); ++ ++ // Allocate scratch space ++ __ addi_d(SP, SP, -64); ++ ++ // Load keys base address ++ __ li(kptr, (intptr_t)round_consts); ++ ++ __ bind(loop); ++ // Load state ++ __ ld_w(sa[0], state, 0); ++ __ ld_w(sa[1], state, 4); ++ __ ld_w(sa[2], state, 8); ++ __ ld_w(sa[3], state, 12); ++ __ ld_w(sa[4], state, 16); ++ __ ld_w(sa[5], state, 20); ++ __ ld_w(sa[6], state, 24); ++ __ ld_w(sa[7], state, 28); ++ ++ // Do 64 rounds of hashing ++ for (int i = 0; i < 64; i++) { ++ Register a = sa[(0 - i) & 7]; ++ Register b = sa[(1 - i) & 7]; ++ Register c = sa[(2 - i) & 7]; ++ Register d = sa[(3 - i) & 7]; ++ Register e = sa[(4 - i) & 7]; ++ Register f = sa[(5 - i) & 7]; ++ Register g = sa[(6 - i) & 7]; ++ Register h = sa[(7 - i) & 7]; ++ ++ if (i < 16) { ++ __ ld_w(t1, buf, i * 4); ++ __ revb_2h(t1, t1); ++ __ rotri_w(t1, t1, 16); ++ } else { ++ __ ld_w(t0, SP, ((i - 15) & 0xF) * 4); ++ __ ld_w(t1, SP, ((i - 16) & 0xF) * 4); ++ __ ld_w(t2, SP, ((i - 7) & 0xF) * 4); ++ __ add_w(t1, t1, t2); ++ __ rotri_w(t2, t0, 18); ++ __ srli_w(t3, t0, 3); ++ __ rotri_w(t0, t0, 7); ++ __ XOR(t2, t2, t3); ++ __ XOR(t0, t0, t2); ++ __ add_w(t1, t1, t0); ++ __ ld_w(t0, SP, ((i - 2) & 0xF) * 4); ++ __ rotri_w(t2, t0, 19); ++ __ srli_w(t3, t0, 10); ++ __ rotri_w(t0, t0, 17); ++ __ XOR(t2, t2, t3); ++ __ XOR(t0, t0, t2); ++ __ add_w(t1, t1, t0); ++ } ++ ++ __ rotri_w(t2, e, 11); ++ __ rotri_w(t3, e, 25); ++ __ rotri_w(t0, e, 6); ++ __ XOR(t2, t2, t3); ++ __ XOR(t0, t0, t2); ++ __ XOR(t2, g, f); ++ __ ld_w(t3, kptr, i * 4); ++ __ AND(t2, t2, e); ++ __ XOR(t2, t2, g); ++ __ add_w(t0, t0, t2); ++ __ add_w(t0, t0, t3); ++ __ add_w(h, h, t1); ++ __ add_w(h, h, t0); ++ __ add_w(d, d, h); ++ __ rotri_w(t2, a, 13); ++ __ rotri_w(t3, a, 22); ++ __ rotri_w(t0, a, 2); ++ __ XOR(t2, t2, t3); ++ __ XOR(t0, t0, t2); ++ __ add_w(h, h, t0); ++ __ OR(t0, c, b); ++ __ AND(t2, c, b); ++ __ AND(t0, t0, a); ++ __ OR(t0, t0, t2); ++ __ add_w(h, h, t0); ++ __ st_w(t1, SP, (i & 0xF) * 4); ++ } ++ ++ // Add to state ++ __ ld_w(t0, state, 0); ++ __ ld_w(t1, state, 4); ++ __ ld_w(t2, state, 8); ++ __ ld_w(t3, state, 12); ++ __ add_w(sa[0], sa[0], t0); ++ __ add_w(sa[1], sa[1], t1); ++ __ add_w(sa[2], sa[2], t2); ++ __ add_w(sa[3], sa[3], t3); ++ __ ld_w(t0, state, 16); ++ __ ld_w(t1, state, 20); ++ __ ld_w(t2, state, 24); ++ __ ld_w(t3, state, 28); ++ __ add_w(sa[4], sa[4], t0); ++ __ add_w(sa[5], sa[5], t1); ++ __ add_w(sa[6], sa[6], t2); ++ __ add_w(sa[7], sa[7], t3); ++ __ st_w(sa[0], state, 0); ++ __ st_w(sa[1], state, 4); ++ __ st_w(sa[2], state, 8); ++ __ st_w(sa[3], state, 12); ++ __ st_w(sa[4], state, 16); ++ __ st_w(sa[5], state, 20); ++ __ st_w(sa[6], state, 24); ++ __ st_w(sa[7], state, 28); ++ ++ __ addi_w(ofs, ofs, 64); ++ __ addi_d(buf, buf, 64); ++ __ bge(limit, ofs, loop); ++ __ move(V0, ofs); // return ofs ++ ++ __ addi_d(SP, SP, 64); ++ __ jr(RA); ++ } ++ ++ // Do NOT delete this node which stands for stub routine placeholder ++ address generate_updateBytesCRC32() { ++ assert(UseCRC32Intrinsics, "need CRC32 instructions support"); ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); ++ ++ address start = __ pc(); ++ ++ const Register crc = A0; // crc ++ const Register buf = A1; // source java byte array address ++ const Register len = A2; // length ++ const Register tmp = A3; ++ ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ kernel_crc32(crc, buf, len, tmp); ++ ++ __ leave(); // required for proper stackwalking of RuntimeStub frame ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ // Do NOT delete this node which stands for stub routine placeholder ++ address generate_updateBytesCRC32C() { ++ assert(UseCRC32CIntrinsics, "need CRC32C instructions support"); ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C"); ++ ++ address start = __ pc(); ++ ++ const Register crc = A0; // crc ++ const Register buf = A1; // source java byte array address ++ const Register len = A2; // length ++ const Register tmp = A3; ++ ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ kernel_crc32c(crc, buf, len, tmp); ++ ++ __ leave(); // required for proper stackwalking of RuntimeStub frame ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ address generate_dsin_dcos(bool isCos) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", isCos ? "libmDcos" : "libmDsin"); ++ address start = __ pc(); ++ __ generate_dsin_dcos(isCos, (address)StubRoutines::la::_npio2_hw, ++ (address)StubRoutines::la::_two_over_pi, ++ (address)StubRoutines::la::_pio2, ++ (address)StubRoutines::la::_dsin_coef, ++ (address)StubRoutines::la::_dcos_coef); ++ return start; ++ } ++ ++ // add a function to implement SafeFetch32 and SafeFetchN ++ void generate_safefetch(const char* name, int size, address* entry, ++ address* fault_pc, address* continuation_pc) { ++ // safefetch signatures: ++ // int SafeFetch32(int* adr, int errValue); ++ // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); ++ // ++ // arguments: ++ // A0 = adr ++ // A1 = errValue ++ // ++ // result: ++ // PPC_RET = *adr or errValue ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ // Entry point, pc or function descriptor. ++ *entry = __ pc(); ++ ++ // Load *adr into A1, may fault. ++ *fault_pc = __ pc(); ++ switch (size) { ++ case 4: ++ // int32_t ++ __ ld_w(A1, A0, 0); ++ break; ++ case 8: ++ // int64_t ++ __ ld_d(A1, A0, 0); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ // return errValue or *adr ++ *continuation_pc = __ pc(); ++ __ add_d(V0, A1, R0); ++ __ jr(RA); ++ } ++ ++ ++#undef __ ++#define __ masm-> ++ ++ // Continuation point for throwing of implicit exceptions that are ++ // not handled in the current activation. Fabricates an exception ++ // oop and initiates normal exception dispatching in this ++ // frame. Since we need to preserve callee-saved values (currently ++ // only for C2, but done for C1 as well) we need a callee-saved oop ++ // map and therefore have to make these stubs into RuntimeStubs ++ // rather than BufferBlobs. If the compiler needs all registers to ++ // be preserved between the fault point and the exception handler ++ // then it must assume responsibility for that in ++ // AbstractCompiler::continuation_for_implicit_null_exception or ++ // continuation_for_implicit_division_by_zero_exception. All other ++ // implicit exceptions (e.g., NullPointerException or ++ // AbstractMethodError on entry) are either at call sites or ++ // otherwise assume that stack unwinding will be initiated, so ++ // caller saved registers were assumed volatile in the compiler. ++ address generate_throw_exception(const char* name, ++ address runtime_entry, ++ bool restore_saved_exception_pc) { ++ // Information about frame layout at time of blocking runtime call. ++ // Note that we only have to preserve callee-saved registers since ++ // the compilers are responsible for supplying a continuation point ++ // if they expect all registers to be preserved. ++ enum layout { ++ thread_off, // last_java_sp ++ S7_off, // callee saved register sp + 1 ++ S6_off, // callee saved register sp + 2 ++ S5_off, // callee saved register sp + 3 ++ S4_off, // callee saved register sp + 4 ++ S3_off, // callee saved register sp + 5 ++ S2_off, // callee saved register sp + 6 ++ S1_off, // callee saved register sp + 7 ++ S0_off, // callee saved register sp + 8 ++ FP_off, ++ ret_address, ++ framesize ++ }; ++ ++ int insts_size = 2048; ++ int locs_size = 32; ++ ++ // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false, ++ // NULL, NULL, NULL, false, NULL, name, false); ++ CodeBuffer code (name , insts_size, locs_size); ++ OopMapSet* oop_maps = new OopMapSet(); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ ++ address start = __ pc(); ++ ++ // This is an inlined and slightly modified version of call_VM ++ // which has the ability to fetch the return PC out of ++ // thread-local storage and also sets up last_Java_sp slightly ++ // differently than the real call_VM ++#ifndef OPT_THREAD ++ Register java_thread = TREG; ++ __ get_thread(java_thread); ++#else ++ Register java_thread = TREG; ++#endif ++ if (restore_saved_exception_pc) { ++ __ ld_d(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); ++ } ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ addi_d(SP, SP, (-1) * (framesize-2) * wordSize); // prolog ++ __ st_d(S0, SP, S0_off * wordSize); ++ __ st_d(S1, SP, S1_off * wordSize); ++ __ st_d(S2, SP, S2_off * wordSize); ++ __ st_d(S3, SP, S3_off * wordSize); ++ __ st_d(S4, SP, S4_off * wordSize); ++ __ st_d(S5, SP, S5_off * wordSize); ++ __ st_d(S6, SP, S6_off * wordSize); ++ __ st_d(S7, SP, S7_off * wordSize); ++ ++ int frame_complete = __ pc() - start; ++ // push java thread (becomes first argument of C function) ++ __ st_d(java_thread, SP, thread_off * wordSize); ++ if (java_thread != A0) ++ __ move(A0, java_thread); ++ ++ // Set up last_Java_sp and last_Java_fp ++ Label before_call; ++ address the_pc = __ pc(); ++ __ bind(before_call); ++ __ set_last_Java_frame(java_thread, SP, FP, before_call); ++ // Align stack ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ ++ // Call runtime ++ // TODO: confirm reloc ++ __ call(runtime_entry, relocInfo::runtime_call_type); ++ // Generate oop map ++ OopMap* map = new OopMap(framesize, 0); ++ oop_maps->add_gc_map(the_pc - start, map); ++ ++ // restore the thread (cannot use the pushed argument since arguments ++ // may be overwritten by C code generated by an optimizing compiler); ++ // however can use the register value directly if it is callee saved. ++#ifndef OPT_THREAD ++ __ get_thread(java_thread); ++#endif ++ ++ __ ld_d(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ reset_last_Java_frame(java_thread, true); ++ ++ // Restore callee save registers. This must be done after resetting the Java frame ++ __ ld_d(S0, SP, S0_off * wordSize); ++ __ ld_d(S1, SP, S1_off * wordSize); ++ __ ld_d(S2, SP, S2_off * wordSize); ++ __ ld_d(S3, SP, S3_off * wordSize); ++ __ ld_d(S4, SP, S4_off * wordSize); ++ __ ld_d(S5, SP, S5_off * wordSize); ++ __ ld_d(S6, SP, S6_off * wordSize); ++ __ ld_d(S7, SP, S7_off * wordSize); ++ ++ // discard arguments ++ __ move(SP, FP); // epilog ++ __ pop(FP); ++ // check for pending exceptions ++#ifdef ASSERT ++ Label L; ++ __ ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, L); ++ __ should_not_reach_here(); ++ __ bind(L); ++#endif //ASSERT ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, ++ &code, ++ frame_complete, ++ framesize, ++ oop_maps, false); ++ return stub->entry_point(); ++ } ++ ++ class MontgomeryMultiplyGenerator : public MacroAssembler { ++ ++ Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Rlen2, Ra, Rb, Rm, ++ Rn, Iam, Ibn, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj; ++ ++ bool _squaring; ++ ++ public: ++ MontgomeryMultiplyGenerator (Assembler *as, bool squaring) ++ : MacroAssembler(as->code()), _squaring(squaring) { ++ ++ // Register allocation ++ ++ Register reg = A0; ++ Pa_base = reg; // Argument registers: ++ if (squaring) ++ Pb_base = Pa_base; ++ else ++ Pb_base = ++reg; ++ Pn_base = ++reg; ++ Rlen = ++reg; ++ inv = ++reg; ++ Rlen2 = inv; // Reuse inv ++ Pm_base = ++reg; ++ ++ // Working registers: ++ Ra = ++reg; // The current digit of a, b, n, and m. ++ Rb = ++reg; ++ Rm = ++reg; ++ Rn = ++reg; ++ ++ Iam = ++reg; // Index to the current/next digit of a, b, n, and m. ++ Ibn = ++reg; ++ ++ t0 = ++reg; // Three registers which form a ++ t1 = ++reg; // triple-precision accumuator. ++ t2 = ++reg; ++ ++ Ri = ++reg; // Inner and outer loop indexes. ++ Rj = ++reg; ++ ++ if (squaring) { ++ Rhi_ab = ++reg; // Product registers: low and high parts ++ reg = S0; ++ Rlo_ab = ++reg; // of a*b and m*n. ++ } else { ++ reg = S0; ++ Rhi_ab = reg; // Product registers: low and high parts ++ Rlo_ab = ++reg; // of a*b and m*n. ++ } ++ ++ Rhi_mn = ++reg; ++ Rlo_mn = ++reg; ++ } ++ ++ private: ++ void enter() { ++ addi_d(SP, SP, -6 * wordSize); ++ st_d(FP, SP, 0 * wordSize); ++ move(FP, SP); ++ } ++ ++ void leave() { ++ addi_d(T0, FP, 6 * wordSize); ++ ld_d(FP, FP, 0 * wordSize); ++ move(SP, T0); ++ } ++ ++ void save_regs() { ++ if (!_squaring) ++ st_d(Rhi_ab, FP, 5 * wordSize); ++ st_d(Rlo_ab, FP, 4 * wordSize); ++ st_d(Rhi_mn, FP, 3 * wordSize); ++ st_d(Rlo_mn, FP, 2 * wordSize); ++ st_d(Pm_base, FP, 1 * wordSize); ++ } ++ ++ void restore_regs() { ++ if (!_squaring) ++ ld_d(Rhi_ab, FP, 5 * wordSize); ++ ld_d(Rlo_ab, FP, 4 * wordSize); ++ ld_d(Rhi_mn, FP, 3 * wordSize); ++ ld_d(Rlo_mn, FP, 2 * wordSize); ++ ld_d(Pm_base, FP, 1 * wordSize); ++ } ++ ++ template ++ void unroll_2(Register count, T block, Register tmp) { ++ Label loop, end, odd; ++ andi(tmp, count, 1); ++ bnez(tmp, odd); ++ beqz(count, end); ++ align(16); ++ bind(loop); ++ (this->*block)(); ++ bind(odd); ++ (this->*block)(); ++ addi_w(count, count, -2); ++ blt(R0, count, loop); ++ bind(end); ++ } ++ ++ template ++ void unroll_2(Register count, T block, Register d, Register s, Register tmp) { ++ Label loop, end, odd; ++ andi(tmp, count, 1); ++ bnez(tmp, odd); ++ beqz(count, end); ++ align(16); ++ bind(loop); ++ (this->*block)(d, s, tmp); ++ bind(odd); ++ (this->*block)(d, s, tmp); ++ addi_w(count, count, -2); ++ blt(R0, count, loop); ++ bind(end); ++ } ++ ++ void acc(Register Rhi, Register Rlo, ++ Register t0, Register t1, Register t2, Register t, Register c) { ++ add_d(t0, t0, Rlo); ++ OR(t, t1, Rhi); ++ sltu(c, t0, Rlo); ++ add_d(t1, t1, Rhi); ++ add_d(t1, t1, c); ++ sltu(c, t1, t); ++ add_d(t2, t2, c); ++ } ++ ++ void pre1(Register i) { ++ block_comment("pre1"); ++ // Iam = 0; ++ // Ibn = i; ++ ++ slli_w(Ibn, i, LogBytesPerWord); ++ ++ // Ra = Pa_base[Iam]; ++ // Rb = Pb_base[Ibn]; ++ // Rm = Pm_base[Iam]; ++ // Rn = Pn_base[Ibn]; ++ ++ ld_d(Ra, Pa_base, 0); ++ ldx_d(Rb, Pb_base, Ibn); ++ ld_d(Rm, Pm_base, 0); ++ ldx_d(Rn, Pn_base, Ibn); ++ ++ move(Iam, R0); ++ ++ // Zero the m*n result. ++ move(Rhi_mn, R0); ++ move(Rlo_mn, R0); ++ } ++ ++ // The core multiply-accumulate step of a Montgomery ++ // multiplication. The idea is to schedule operations as a ++ // pipeline so that instructions with long latencies (loads and ++ // multiplies) have time to complete before their results are ++ // used. This most benefits in-order implementations of the ++ // architecture but out-of-order ones also benefit. ++ void step() { ++ block_comment("step"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ addi_d(Iam, Iam, wordSize); ++ addi_d(Ibn, Ibn, -wordSize); ++ mul_d(Rlo_ab, Ra, Rb); ++ mulh_du(Rhi_ab, Ra, Rb); ++ acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n from the ++ // previous iteration. ++ ldx_d(Ra, Pa_base, Iam); ++ ldx_d(Rb, Pb_base, Ibn); ++ ++ // MACC(Rm, Rn, t0, t1, t2); ++ // Rm = Pm_base[Iam]; ++ // Rn = Pn_base[Ibn]; ++ mul_d(Rlo_mn, Rm, Rn); ++ mulh_du(Rhi_mn, Rm, Rn); ++ acc(Rhi_ab, Rlo_ab, t0, t1, t2, Rm, Rn); ++ ldx_d(Rm, Pm_base, Iam); ++ ldx_d(Rn, Pn_base, Ibn); ++ } ++ ++ void post1() { ++ block_comment("post1"); ++ ++ // MACC(Ra, Rb, t0, t1, t2); ++ mul_d(Rlo_ab, Ra, Rb); ++ mulh_du(Rhi_ab, Ra, Rb); ++ acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n ++ acc(Rhi_ab, Rlo_ab, t0, t1, t2, Ra, Rb); ++ ++ // Pm_base[Iam] = Rm = t0 * inv; ++ mul_d(Rm, t0, inv); ++ stx_d(Rm, Pm_base, Iam); ++ ++ // MACC(Rm, Rn, t0, t1, t2); ++ // t0 = t1; t1 = t2; t2 = 0; ++ mulh_du(Rhi_mn, Rm, Rn); ++ ++#ifndef PRODUCT ++ // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply"); ++ { ++ mul_d(Rlo_mn, Rm, Rn); ++ add_d(Rlo_mn, t0, Rlo_mn); ++ Label ok; ++ beqz(Rlo_mn, ok); { ++ stop("broken Montgomery multiply"); ++ } bind(ok); ++ } ++#endif ++ ++ // We have very carefully set things up so that ++ // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate ++ // the lower half of Rm * Rn because we know the result already: ++ // it must be -t0. t0 + (-t0) must generate a carry iff ++ // t0 != 0. So, rather than do a mul and an adds we just set ++ // the carry flag iff t0 is nonzero. ++ // ++ // mul_d(Rlo_mn, Rm, Rn); ++ // add_d(t0, t0, Rlo_mn); ++ OR(Ra, t1, Rhi_mn); ++ sltu(Rb, R0, t0); ++ add_d(t0, t1, Rhi_mn); ++ add_d(t0, t0, Rb); ++ sltu(Rb, t0, Ra); ++ add_d(t1, t2, Rb); ++ move(t2, R0); ++ } ++ ++ void pre2(Register i, Register len) { ++ block_comment("pre2"); ++ ++ // Rj == i-len ++ sub_w(Rj, i, len); ++ ++ // Iam = i - len; ++ // Ibn = len; ++ slli_w(Iam, Rj, LogBytesPerWord); ++ slli_w(Ibn, len, LogBytesPerWord); ++ ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ addi_d(Iam, Iam, wordSize); ++ addi_d(Ibn, Ibn, -wordSize); ++ ++ ldx_d(Ra, Pa_base, Iam); ++ ldx_d(Rb, Pb_base, Ibn); ++ ldx_d(Rm, Pm_base, Iam); ++ ldx_d(Rn, Pn_base, Ibn); ++ ++ move(Rhi_mn, R0); ++ move(Rlo_mn, R0); ++ } ++ ++ void post2(Register i, Register len) { ++ block_comment("post2"); ++ ++ sub_w(Rj, i, len); ++ slli_w(Iam, Rj, LogBytesPerWord); ++ ++ add_d(t0, t0, Rlo_mn); // The pending m*n, low part ++ ++ // As soon as we know the least significant digit of our result, ++ // store it. ++ // Pm_base[i-len] = t0; ++ stx_d(t0, Pm_base, Iam); ++ ++ // t0 = t1; t1 = t2; t2 = 0; ++ OR(Ra, t1, Rhi_mn); ++ sltu(Rb, t0, Rlo_mn); ++ add_d(t0, t1, Rhi_mn); // The pending m*n, high part ++ add_d(t0, t0, Rb); ++ sltu(Rb, t0, Ra); ++ add_d(t1, t2, Rb); ++ move(t2, R0); ++ } ++ ++ // A carry in t0 after Montgomery multiplication means that we ++ // should subtract multiples of n from our result in m. We'll ++ // keep doing that until there is no carry. ++ void normalize(Register len) { ++ block_comment("normalize"); ++ // while (t0) ++ // t0 = sub(Pm_base, Pn_base, t0, len); ++ Label loop, post, again; ++ Register cnt = t1, i = t2, b = Ra, t = Rb; // Re-use registers; we're done with them now ++ beqz(t0, post); { ++ bind(again); { ++ move(i, R0); ++ move(b, R0); ++ slli_w(cnt, len, LogBytesPerWord); ++ align(16); ++ bind(loop); { ++ ldx_d(Rm, Pm_base, i); ++ ldx_d(Rn, Pn_base, i); ++ sltu(t, Rm, b); ++ sub_d(Rm, Rm, b); ++ sltu(b, Rm, Rn); ++ sub_d(Rm, Rm, Rn); ++ OR(b, b, t); ++ stx_d(Rm, Pm_base, i); ++ addi_w(i, i, BytesPerWord); ++ } blt(i, cnt, loop); ++ sub_d(t0, t0, b); ++ } bnez(t0, again); ++ } bind(post); ++ } ++ ++ // Move memory at s to d, reversing words. ++ // Increments d to end of copied memory ++ // Destroys tmp1, tmp2, tmp3 ++ // Preserves len ++ // Leaves s pointing to the address which was in d at start ++ void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) { ++ assert(tmp1 < S0 && tmp2 < S0, "register corruption"); ++ ++ alsl_d(s, len, s, LogBytesPerWord - 1); ++ move(tmp1, len); ++ unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2); ++ slli_w(s, len, LogBytesPerWord); ++ sub_d(s, d, s); ++ } ++ ++ // where ++ void reverse1(Register d, Register s, Register tmp) { ++ ld_d(tmp, s, -wordSize); ++ addi_d(s, s, -wordSize); ++ addi_d(d, d, wordSize); ++ rotri_d(tmp, tmp, 32); ++ st_d(tmp, d, -wordSize); ++ } ++ ++ public: ++ /** ++ * Fast Montgomery multiplication. The derivation of the ++ * algorithm is in A Cryptographic Library for the Motorola ++ * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. ++ * ++ * Arguments: ++ * ++ * Inputs for multiplication: ++ * A0 - int array elements a ++ * A1 - int array elements b ++ * A2 - int array elements n (the modulus) ++ * A3 - int length ++ * A4 - int inv ++ * A5 - int array elements m (the result) ++ * ++ * Inputs for squaring: ++ * A0 - int array elements a ++ * A1 - int array elements n (the modulus) ++ * A2 - int length ++ * A3 - int inv ++ * A4 - int array elements m (the result) ++ * ++ */ ++ address generate_multiply() { ++ Label argh, nothing; ++ bind(argh); ++ stop("MontgomeryMultiply total_allocation must be <= 8192"); ++ ++ align(CodeEntryAlignment); ++ address entry = pc(); ++ ++ beqz(Rlen, nothing); ++ ++ enter(); ++ ++ // Make room. ++ sltui(Ra, Rlen, 513); ++ beqz(Ra, argh); ++ slli_w(Ra, Rlen, exact_log2(4 * sizeof (jint))); ++ sub_d(Ra, SP, Ra); ++ ++ srli_w(Rlen, Rlen, 1); // length in longwords = len/2 ++ ++ { ++ // Copy input args, reversing as we go. We use Ra as a ++ // temporary variable. ++ reverse(Ra, Pa_base, Rlen, t0, t1); ++ if (!_squaring) ++ reverse(Ra, Pb_base, Rlen, t0, t1); ++ reverse(Ra, Pn_base, Rlen, t0, t1); ++ } ++ ++ // Push all call-saved registers and also Pm_base which we'll need ++ // at the end. ++ save_regs(); ++ ++#ifndef PRODUCT ++ // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); ++ { ++ ld_d(Rn, Pn_base, 0); ++ li(t0, -1); ++ mul_d(Rlo_mn, Rn, inv); ++ Label ok; ++ beq(Rlo_mn, t0, ok); { ++ stop("broken inverse in Montgomery multiply"); ++ } bind(ok); ++ } ++#endif ++ ++ move(Pm_base, Ra); ++ ++ move(t0, R0); ++ move(t1, R0); ++ move(t2, R0); ++ ++ block_comment("for (int i = 0; i < len; i++) {"); ++ move(Ri, R0); { ++ Label loop, end; ++ bge(Ri, Rlen, end); ++ ++ bind(loop); ++ pre1(Ri); ++ ++ block_comment(" for (j = i; j; j--) {"); { ++ move(Rj, Ri); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab); ++ } block_comment(" } // j"); ++ ++ post1(); ++ addi_w(Ri, Ri, 1); ++ blt(Ri, Rlen, loop); ++ bind(end); ++ block_comment("} // i"); ++ } ++ ++ block_comment("for (int i = len; i < 2*len; i++) {"); ++ move(Ri, Rlen); ++ slli_w(Rlen2, Rlen, 1); { ++ Label loop, end; ++ bge(Ri, Rlen2, end); ++ ++ bind(loop); ++ pre2(Ri, Rlen); ++ ++ block_comment(" for (j = len*2-i-1; j; j--) {"); { ++ sub_w(Rj, Rlen2, Ri); ++ addi_w(Rj, Rj, -1); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab); ++ } block_comment(" } // j"); ++ ++ post2(Ri, Rlen); ++ addi_w(Ri, Ri, 1); ++ blt(Ri, Rlen2, loop); ++ bind(end); ++ } ++ block_comment("} // i"); ++ ++ normalize(Rlen); ++ ++ move(Ra, Pm_base); // Save Pm_base in Ra ++ restore_regs(); // Restore caller's Pm_base ++ ++ // Copy our result into caller's Pm_base ++ reverse(Pm_base, Ra, Rlen, t0, t1); ++ ++ leave(); ++ bind(nothing); ++ jr(RA); ++ ++ return entry; ++ } ++ // In C, approximately: ++ ++ // void ++ // montgomery_multiply(unsigned long Pa_base[], unsigned long Pb_base[], ++ // unsigned long Pn_base[], unsigned long Pm_base[], ++ // unsigned long inv, int len) { ++ // unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator ++ // unsigned long Ra, Rb, Rn, Rm; ++ // int i, Iam, Ibn; ++ ++ // assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply"); ++ ++ // for (i = 0; i < len; i++) { ++ // int j; ++ ++ // Iam = 0; ++ // Ibn = i; ++ ++ // Ra = Pa_base[Iam]; ++ // Rb = Pb_base[Iam]; ++ // Rm = Pm_base[Ibn]; ++ // Rn = Pn_base[Ibn]; ++ ++ // int iters = i; ++ // for (j = 0; iters--; j++) { ++ // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Ra = Pa_base[++Iam]; ++ // Rb = pb_base[--Ibn]; ++ // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); ++ // MACC(Rm, Rn, t0, t1, t2); ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ // } ++ ++ // assert(Ra == Pa_base[i] && Rb == Pb_base[0], "must be"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Pm_base[Iam] = Rm = t0 * inv; ++ // assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be"); ++ // MACC(Rm, Rn, t0, t1, t2); ++ ++ // assert(t0 == 0, "broken Montgomery multiply"); ++ ++ // t0 = t1; t1 = t2; t2 = 0; ++ // } ++ ++ // for (i = len; i < 2*len; i++) { ++ // int j; ++ ++ // Iam = i - len; ++ // Ibn = len; ++ ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ ++ // int iters = len*2-i-1; ++ // for (j = i-len+1; iters--; j++) { ++ // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); ++ // MACC(Rm, Rn, t0, t1, t2); ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ // } ++ ++ // Pm_base[i-len] = t0; ++ // t0 = t1; t1 = t2; t2 = 0; ++ // } ++ ++ // while (t0) ++ // t0 = sub(Pm_base, Pn_base, t0, len); ++ // } ++ }; ++ ++ // Initialization ++ void generate_initial() { ++ // Generates all stubs and initializes the entry points ++ ++ //------------------------------------------------------------- ++ //----------------------------------------------------------- ++ // entry points that exist in all platforms ++ // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller ++ // than the disadvantage of having a much more complicated generator structure. ++ // See also comment in stubRoutines.hpp. ++ StubRoutines::_forward_exception_entry = generate_forward_exception(); ++ StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); ++ // is referenced by megamorphic call ++ StubRoutines::_catch_exception_entry = generate_catch_exception(); ++ ++ StubRoutines::_throw_StackOverflowError_entry = ++ generate_throw_exception("StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), ++ false); ++ StubRoutines::_throw_delayed_StackOverflowError_entry = ++ generate_throw_exception("delayed StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError), ++ false); ++ ++ if (UseCRC32Intrinsics) { ++ // set table address before stub generation which use it ++ StubRoutines::_crc_table_adr = (address)StubRoutines::la::_crc_table; ++ StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); ++ } ++ ++ if (UseCRC32CIntrinsics) { ++ StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(); ++ } ++ } ++ ++ void generate_all() { ++ // Generates all stubs and initializes the entry points ++ ++ // These entry points require SharedInfo::stack0 to be set up in ++ // non-core builds and need to be relocatable, so they each ++ // fabricate a RuntimeStub internally. ++ StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); ++ ++ StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false); ++ ++ StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); ++ ++ // entry points that are platform specific ++ ++ // support for verify_oop (must happen after universe_init) ++ StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); ++#ifndef CORE ++ // arraycopy stubs used by compilers ++ generate_arraycopy_stubs(); ++#endif ++ ++ if (UseLSX && vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) { ++ StubRoutines::_dsin = generate_dsin_dcos(/* isCos = */ false); ++ } ++ ++ if (UseLSX && vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) { ++ StubRoutines::_dcos = generate_dsin_dcos(/* isCos = */ true); ++ } ++ ++ // Safefetch stubs. ++ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, ++ &StubRoutines::_safefetch32_fault_pc, ++ &StubRoutines::_safefetch32_continuation_pc); ++ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, ++ &StubRoutines::_safefetchN_fault_pc, ++ &StubRoutines::_safefetchN_continuation_pc); ++ ++#ifdef COMPILER2 ++ if (UseMulAddIntrinsic) { ++ StubRoutines::_mulAdd = generate_mulAdd(); ++ } ++ ++ if (UseMontgomeryMultiplyIntrinsic) { ++ StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); ++ MontgomeryMultiplyGenerator g(_masm, false /* squaring */); ++ StubRoutines::_montgomeryMultiply = g.generate_multiply(); ++ } ++ ++ if (UseMontgomerySquareIntrinsic) { ++ StubCodeMark mark(this, "StubRoutines", "montgomerySquare"); ++ MontgomeryMultiplyGenerator g(_masm, true /* squaring */); ++ // We use generate_multiply() rather than generate_square() ++ // because it's faster for the sizes of modulus we care about. ++ StubRoutines::_montgomerySquare = g.generate_multiply(); ++ } ++#endif ++ ++ if (UseAESIntrinsics) { ++ StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(false); ++ StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(false); ++ StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_aescrypt_encryptBlock(true); ++ StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_aescrypt_decryptBlock(true); ++ } ++ ++ if (UseSHA1Intrinsics) { ++ generate_sha1_implCompress("sha1_implCompress", StubRoutines::_sha1_implCompress, StubRoutines::_sha1_implCompressMB); ++ } ++ ++ if (UseSHA256Intrinsics) { ++ generate_sha256_implCompress("sha256_implCompress", StubRoutines::_sha256_implCompress, StubRoutines::_sha256_implCompressMB); ++ } ++ } ++ ++ public: ++ StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { ++ if (all) { ++ generate_all(); ++ } else { ++ generate_initial(); ++ } ++ } ++}; // end class declaration ++ ++void StubGenerator_generate(CodeBuffer* code, bool all) { ++ StubGenerator g(code, all); ++} +diff --git a/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp +new file mode 100644 +index 0000000000..0ab07e1e9e +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp +@@ -0,0 +1,67 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP ++#define CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP ++ ++// This file holds the platform specific parts of the StubRoutines ++// definition. See stubRoutines.hpp for a description on how to ++// extend it. ++ ++static bool returns_to_call_stub(address return_pc){ ++ return return_pc == _call_stub_return_address||return_pc == la::get_call_stub_compiled_return(); ++} ++ ++enum platform_dependent_constants { ++ code_size1 = 20000, // simply increase if too small (assembler will crash if too small) ++ code_size2 = 60000 // simply increase if too small (assembler will crash if too small) ++}; ++ ++class la { ++ friend class StubGenerator; ++ friend class VMStructs; ++ private: ++ // If we call compiled code directly from the call stub we will ++ // need to adjust the return back to the call stub to a specialized ++ // piece of code that can handle compiled results and cleaning the fpu ++ // stack. The variable holds that location. ++ static address _call_stub_compiled_return; ++ static juint _crc_table[]; ++ // begin trigonometric tables block. See comments in .cpp file ++ static juint _npio2_hw[]; ++ static jdouble _two_over_pi[]; ++ static jdouble _pio2[]; ++ static jdouble _dsin_coef[]; ++ static jdouble _dcos_coef[]; ++ // end trigonometric tables block ++ ++public: ++ // Call back points for traps in compiled code ++ static address get_call_stub_compiled_return() { return _call_stub_compiled_return; } ++ static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; } ++ ++}; ++ ++#endif // CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP +diff --git a/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp +new file mode 100644 +index 0000000000..1a6ea3bcde +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp +@@ -0,0 +1,178 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++ ++// a description of how to extend it, see the stubRoutines.hpp file. ++ ++//find the last fp value ++address StubRoutines::la::_call_stub_compiled_return = NULL; ++ ++/** ++ * crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h ++ */ ++juint StubRoutines::la::_crc_table[] = ++{ ++ 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, ++ 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, ++ 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, ++ 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, ++ 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, ++ 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, ++ 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, ++ 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, ++ 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, ++ 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, ++ 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, ++ 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, ++ 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, ++ 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, ++ 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, ++ 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, ++ 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, ++ 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, ++ 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, ++ 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, ++ 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, ++ 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, ++ 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, ++ 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, ++ 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, ++ 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, ++ 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, ++ 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, ++ 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, ++ 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, ++ 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, ++ 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, ++ 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, ++ 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, ++ 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, ++ 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, ++ 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, ++ 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, ++ 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, ++ 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, ++ 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, ++ 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, ++ 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, ++ 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, ++ 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, ++ 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, ++ 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, ++ 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, ++ 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, ++ 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, ++ 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, ++ 0x2d02ef8dUL ++}; ++ ++ATTRIBUTE_ALIGNED(64) juint StubRoutines::la::_npio2_hw[] = { ++ // first, various coefficient values: 0.5, invpio2, pio2_1, pio2_1t, pio2_2, ++ // pio2_2t, pio2_3, pio2_3t ++ // This is a small optimization wich keeping double[8] values in int[] table ++ // to have less address calculation instructions ++ // ++ // invpio2: 53 bits of 2/pi (enough for cases when trigonometric argument is small) ++ // pio2_1: first 33 bit of pi/2 ++ // pio2_1t: pi/2 - pio2_1 ++ // pio2_2: second 33 bit of pi/2 ++ // pio2_2t: pi/2 - (pio2_1+pio2_2) ++ // pio2_3: third 33 bit of pi/2 ++ // pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3) ++ 0x00000000, 0x3fe00000, // 0.5 ++ 0x6DC9C883, 0x3FE45F30, // invpio2 = 6.36619772367581382433e-01 ++ 0x54400000, 0x3FF921FB, // pio2_1 = 1.57079632673412561417e+00 ++ 0x1A626331, 0x3DD0B461, // pio2_1t = 6.07710050650619224932e-11 ++ 0x1A600000, 0x3DD0B461, // pio2_2 = 6.07710050630396597660e-11 ++ 0x2E037073, 0x3BA3198A, // pio2_2t = 2.02226624879595063154e-21 ++ 0x2E000000, 0x3BA3198A, // pio2_3 = 2.02226624871116645580e-21 ++ 0x252049C1, 0x397B839A, // pio2_3t = 8.47842766036889956997e-32 ++ // now, npio2_hw itself ++ 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C, ++ 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C, ++ 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A, ++ 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C, ++ 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB, ++ 0x404858EB, 0x404921FB ++}; ++ ++// Coefficients for sin(x) polynomial approximation: S1..S6. ++// See kernel_sin comments in macroAssembler_loongarch64_trig.cpp for details ++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_dsin_coef[] = { ++ -1.66666666666666324348e-01, // 0xBFC5555555555549 ++ 8.33333333332248946124e-03, // 0x3F8111111110F8A6 ++ -1.98412698298579493134e-04, // 0xBF2A01A019C161D5 ++ 2.75573137070700676789e-06, // 0x3EC71DE357B1FE7D ++ -2.50507602534068634195e-08, // 0xBE5AE5E68A2B9CEB ++ 1.58969099521155010221e-10 // 0x3DE5D93A5ACFD57C ++}; ++ ++// Coefficients for cos(x) polynomial approximation: C1..C6. ++// See kernel_cos comments in macroAssembler_loongarch64_trig.cpp for details ++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_dcos_coef[] = { ++ 4.16666666666666019037e-02, // c0x3FA555555555554C ++ -1.38888888888741095749e-03, // 0xBF56C16C16C15177 ++ 2.48015872894767294178e-05, // 0x3EFA01A019CB1590 ++ -2.75573143513906633035e-07, // 0xBE927E4F809C52AD ++ 2.08757232129817482790e-09, // 0x3E21EE9EBDB4B1C4 ++ -1.13596475577881948265e-11 // 0xBDA8FAE9BE8838D4 ++}; ++ ++// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi. ++// Used in cases of very large argument. 396 hex digits is enough to support ++// required precision. ++// Converted to double to avoid unnecessary conversion in code ++// NOTE: table looks like original int table: {0xA2F983, 0x6E4E44,...} with ++// only (double) conversion added ++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_two_over_pi[] = { ++ (double)0xA2F983, (double)0x6E4E44, (double)0x1529FC, (double)0x2757D1, (double)0xF534DD, (double)0xC0DB62, ++ (double)0x95993C, (double)0x439041, (double)0xFE5163, (double)0xABDEBB, (double)0xC561B7, (double)0x246E3A, ++ (double)0x424DD2, (double)0xE00649, (double)0x2EEA09, (double)0xD1921C, (double)0xFE1DEB, (double)0x1CB129, ++ (double)0xA73EE8, (double)0x8235F5, (double)0x2EBB44, (double)0x84E99C, (double)0x7026B4, (double)0x5F7E41, ++ (double)0x3991D6, (double)0x398353, (double)0x39F49C, (double)0x845F8B, (double)0xBDF928, (double)0x3B1FF8, ++ (double)0x97FFDE, (double)0x05980F, (double)0xEF2F11, (double)0x8B5A0A, (double)0x6D1F6D, (double)0x367ECF, ++ (double)0x27CB09, (double)0xB74F46, (double)0x3F669E, (double)0x5FEA2D, (double)0x7527BA, (double)0xC7EBE5, ++ (double)0xF17B3D, (double)0x0739F7, (double)0x8A5292, (double)0xEA6BFB, (double)0x5FB11F, (double)0x8D5D08, ++ (double)0x560330, (double)0x46FC7B, (double)0x6BABF0, (double)0xCFBC20, (double)0x9AF436, (double)0x1DA9E3, ++ (double)0x91615E, (double)0xE61B08, (double)0x659985, (double)0x5F14A0, (double)0x68408D, (double)0xFFD880, ++ (double)0x4D7327, (double)0x310606, (double)0x1556CA, (double)0x73A8C9, (double)0x60E27B, (double)0xC08C6B, ++}; ++ ++// Pi over 2 value ++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_pio2[] = { ++ 1.57079625129699707031e+00, // 0x3FF921FB40000000 ++ 7.54978941586159635335e-08, // 0x3E74442D00000000 ++ 5.39030252995776476554e-15, // 0x3CF8469880000000 ++ 3.28200341580791294123e-22, // 0x3B78CC5160000000 ++ 1.27065575308067607349e-29, // 0x39F01B8380000000 ++ 1.22933308981111328932e-36, // 0x387A252040000000 ++ 2.73370053816464559624e-44, // 0x36E3822280000000 ++ 2.16741683877804819444e-51, // 0x3569F31D00000000 ++}; +diff --git a/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp +new file mode 100644 +index 0000000000..be1d28d4b8 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp +@@ -0,0 +1,2269 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interpreter/bytecodeHistogram.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/templateInterpreterGenerator.hpp" ++#include "interpreter/templateTable.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "runtime/timer.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/debug.hpp" ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++int TemplateInterpreter::InterpreterCodeSize = 500 * K; ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif ++ ++address TemplateInterpreterGenerator::generate_slow_signature_handler() { ++ address entry = __ pc(); ++ // Rmethod: method ++ // LVP: pointer to locals ++ // A3: first stack arg ++ __ move(A3, SP); ++ __ addi_d(SP, SP, -18 * wordSize); ++ __ st_d(RA, SP, 0); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::slow_signature_handler), ++ Rmethod, LVP, A3); ++ ++ // V0: result handler ++ ++ // Stack layout: ++ // ... ++ // 18 stack arg0 <--- old sp ++ // 17 floatReg arg7 ++ // ... ++ // 10 floatReg arg0 ++ // 9 float/double identifiers ++ // 8 IntReg arg7 ++ // ... ++ // 2 IntReg arg1 ++ // 1 aligned slot ++ // SP: 0 return address ++ ++ // Do FP first so we can use A3 as temp ++ __ ld_d(A3, Address(SP, 9 * wordSize)); // float/double identifiers ++ ++ for (int i= 0; i < Argument::n_float_register_parameters; i++) { ++ FloatRegister floatreg = as_FloatRegister(i + FA0->encoding()); ++ Label isdouble, done; ++ ++ __ andi(AT, A3, 1 << i); ++ __ bnez(AT, isdouble); ++ __ fld_s(floatreg, SP, (10 + i) * wordSize); ++ __ b(done); ++ __ bind(isdouble); ++ __ fld_d(floatreg, SP, (10 + i) * wordSize); ++ __ bind(done); ++ } ++ ++ // A0 is for env. ++ // If the mothed is not static, A1 will be corrected in generate_native_entry. ++ for (int i= 1; i < Argument::n_register_parameters; i++) { ++ Register reg = as_Register(i + A0->encoding()); ++ __ ld_d(reg, SP, (1 + i) * wordSize); ++ } ++ ++ // A0/V0 contains the result from the call of ++ // InterpreterRuntime::slow_signature_handler so we don't touch it ++ // here. It will be loaded with the JNIEnv* later. ++ __ ld_d(RA, SP, 0); ++ __ addi_d(SP, SP, 18 * wordSize); ++ __ jr(RA); ++ return entry; ++} ++ ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.update(int crc, int b) ++ */ ++address TemplateInterpreterGenerator::generate_CRC32_update_entry() { ++ if (UseCRC32Intrinsics) { ++ address entry = __ pc(); ++ ++ // rmethod: Method* ++ // Rsender: senderSP must preserved for slow path ++ // SP: args ++ ++ Label slow_path; ++ // If we need a safepoint check, generate full interpreter entry. ++ __ li(AT, SafepointSynchronize::_not_synchronized); ++ __ li(T8, (long)SafepointSynchronize::address_of_state()); ++ __ bne(T8, AT, slow_path); ++ ++ // We don't generate local frame and don't align stack because ++ // we call stub code and there is no safepoint on this path. ++ ++ const Register crc = A0; // crc ++ const Register val = A1; // source java byte value ++ const Register tbl = A2; // scratch ++ ++ // Arguments are reversed on java expression stack ++ __ ld_w(val, SP, 0); // byte value ++ __ ld_w(crc, SP, wordSize); // Initial CRC ++ ++ __ li(tbl, (long)StubRoutines::crc_table_addr()); ++ ++ __ nor(crc, crc, R0); // ~crc ++ __ update_byte_crc32(crc, val, tbl); ++ __ nor(crc, crc, R0); // ~crc ++ ++ // restore caller SP ++ __ move(SP, Rsender); ++ __ jr(RA); ++ ++ // generate a vanilla native entry as the slow path ++ __ bind(slow_path); ++ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); ++ return entry; ++ } ++ return NULL; ++} ++ ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) ++ * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) ++ */ ++address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { ++ if (UseCRC32Intrinsics) { ++ address entry = __ pc(); ++ ++ // rmethod: Method* ++ // Rsender: senderSP must preserved for slow path ++ // SP: args ++ ++ Label slow_path; ++ // If we need a safepoint check, generate full interpreter entry. ++ __ li(AT, SafepointSynchronize::_not_synchronized); ++ __ li(T8, (long)SafepointSynchronize::address_of_state()); ++ __ bne(T8, AT, slow_path); ++ ++ // We don't generate local frame and don't align stack because ++ // we call stub code and there is no safepoint on this path. ++ ++ const Register crc = A0; // crc ++ const Register buf = A1; // source java byte array address ++ const Register len = A2; // length ++ const Register tmp = A3; ++ ++ const Register off = len; // offset (never overlaps with 'len') ++ ++ // Arguments are reversed on java expression stack ++ // Calculate address of start element ++ __ ld_w(off, SP, wordSize); // int offset ++ __ ld_d(buf, SP, 2 * wordSize); // byte[] buf | long buf ++ __ add_d(buf, buf, off); // + offset ++ if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { ++ __ ld_w(crc, SP, 4 * wordSize); // long crc ++ } else { ++ __ addi_d(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size ++ __ ld_w(crc, SP, 3 * wordSize); // long crc ++ } ++ ++ // Can now load 'len' since we're finished with 'off' ++ __ ld_w(len, SP, 0); // length ++ ++ __ kernel_crc32(crc, buf, len, tmp); ++ ++ // restore caller SP ++ __ move(SP, Rsender); ++ __ jr(RA); ++ ++ // generate a vanilla native entry as the slow path ++ __ bind(slow_path); ++ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); ++ return entry; ++ } ++ return NULL; ++} ++ ++/** ++ * Method entry for intrinsic-candidate (non-native) methods: ++ * int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) ++ * int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end) ++ * Unlike CRC32, CRC32C does not have any methods marked as native ++ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses ++ */ ++address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { ++ if (UseCRC32CIntrinsics) { ++ address entry = __ pc(); ++ ++ const Register crc = A0; // initial crc ++ const Register buf = A1; // source java byte array address ++ const Register len = A2; // len argument to the kernel ++ const Register tmp = A3; ++ ++ const Register end = len; // index of last element to process ++ const Register off = crc; // offset ++ ++ __ ld_w(end, SP, 0); // int end ++ __ ld_w(off, SP, wordSize); // int offset ++ __ sub_w(len, end, off); // calculate length ++ __ ld_d(buf, SP, 2 * wordSize); // byte[] buf | long buf ++ __ add_d(buf, buf, off); // + offset ++ if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { ++ __ ld_w(crc, SP, 4 * wordSize); // int crc ++ } else { ++ __ addi_d(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size ++ __ ld_w(crc, SP, 3 * wordSize); // int crc ++ } ++ ++ __ kernel_crc32c(crc, buf, len, tmp); ++ ++ // restore caller SP ++ __ move(SP, Rsender); ++ __ jr(RA); ++ ++ return entry; ++ } ++ return NULL; ++} ++ ++// ++// Various method entries ++// ++ ++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { ++ if (!InlineIntrinsics) return NULL; // Generate a vanilla entry ++ ++ // These don't need a safepoint check because they aren't virtually ++ // callable. We won't enter these intrinsics from compiled code. ++ // If in the future we added an intrinsic which was virtually callable ++ // we'd have to worry about how to safepoint so that this code is used. ++ ++ // mathematical functions inlined by compiler ++ // (interpreter must provide identical implementation ++ // in order to avoid monotonicity bugs when switching ++ // from interpreter to compiler in the middle of some ++ // computation) ++ // ++ // stack: ++ // [ arg ] <-- sp ++ // [ arg ] ++ // retaddr in ra ++ ++ address entry_point = NULL; ++ switch (kind) { ++ case Interpreter::java_lang_math_abs: ++ entry_point = __ pc(); ++ __ fld_d(FA0, SP, 0); ++ __ fabs_d(F0, FA0); ++ __ move(SP, Rsender); ++ break; ++ case Interpreter::java_lang_math_sqrt: ++ entry_point = __ pc(); ++ __ fld_d(FA0, SP, 0); ++ __ fsqrt_d(F0, FA0); ++ __ move(SP, Rsender); ++ break; ++ case Interpreter::java_lang_math_sin : ++ case Interpreter::java_lang_math_cos : ++ case Interpreter::java_lang_math_tan : ++ case Interpreter::java_lang_math_log : ++ case Interpreter::java_lang_math_log10 : ++ case Interpreter::java_lang_math_exp : ++ entry_point = __ pc(); ++ __ fld_d(FA0, SP, 0); ++ __ move(SP, Rsender); ++ __ movgr2fr_d(FS0, RA); ++ __ movgr2fr_d(FS1, SP); ++ __ bstrins_d(SP, R0, exact_log2(StackAlignmentInBytes) - 1, 0); ++ generate_transcendental_entry(kind, 1); ++ __ movfr2gr_d(SP, FS1); ++ __ movfr2gr_d(RA, FS0); ++ break; ++ case Interpreter::java_lang_math_pow : ++ entry_point = __ pc(); ++ __ fld_d(FA0, SP, 2 * Interpreter::stackElementSize); ++ __ fld_d(FA1, SP, 0); ++ __ move(SP, Rsender); ++ __ movgr2fr_d(FS0, RA); ++ __ movgr2fr_d(FS1, SP); ++ __ bstrins_d(SP, R0, exact_log2(StackAlignmentInBytes) - 1, 0); ++ generate_transcendental_entry(kind, 2); ++ __ movfr2gr_d(SP, FS1); ++ __ movfr2gr_d(RA, FS0); ++ break; ++ case Interpreter::java_lang_math_fmaD : ++ if (UseFMA) { ++ entry_point = __ pc(); ++ __ fld_d(FA0, SP, 4 * Interpreter::stackElementSize); ++ __ fld_d(FA1, SP, 2 * Interpreter::stackElementSize); ++ __ fld_d(FA2, SP, 0); ++ __ fmadd_d(F0, FA0, FA1, FA2); ++ __ move(SP, Rsender); ++ } ++ break; ++ case Interpreter::java_lang_math_fmaF : ++ if (UseFMA) { ++ entry_point = __ pc(); ++ __ fld_s(FA0, SP, 2 * Interpreter::stackElementSize); ++ __ fld_s(FA1, SP, Interpreter::stackElementSize); ++ __ fld_s(FA2, SP, 0); ++ __ fmadd_s(F0, FA0, FA1, FA2); ++ __ move(SP, Rsender); ++ } ++ break; ++ default: ++ ; ++ } ++ if (entry_point) { ++ __ jr(RA); ++ } ++ ++ return entry_point; ++} ++ ++ // double trigonometrics and transcendentals ++ // static jdouble dsin(jdouble x); ++ // static jdouble dcos(jdouble x); ++ // static jdouble dtan(jdouble x); ++ // static jdouble dlog(jdouble x); ++ // static jdouble dlog10(jdouble x); ++ // static jdouble dexp(jdouble x); ++ // static jdouble dpow(jdouble x, jdouble y); ++ ++void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs) { ++ address fn; ++ switch (kind) { ++ case Interpreter::java_lang_math_sin : ++ if (StubRoutines::dsin() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin()); ++ } ++ break; ++ case Interpreter::java_lang_math_cos : ++ if (StubRoutines::dcos() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos()); ++ } ++ break; ++ case Interpreter::java_lang_math_tan : ++ if (StubRoutines::dtan() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan()); ++ } ++ break; ++ case Interpreter::java_lang_math_log : ++ if (StubRoutines::dlog() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog()); ++ } ++ break; ++ case Interpreter::java_lang_math_log10 : ++ if (StubRoutines::dlog10() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10()); ++ } ++ break; ++ case Interpreter::java_lang_math_exp : ++ if (StubRoutines::dexp() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp()); ++ } ++ break; ++ case Interpreter::java_lang_math_pow : ++ if (StubRoutines::dpow() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow()); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ fn = NULL; // unreachable ++ } ++ __ li(T4, fn); ++ __ jalr(T4); ++} ++ ++// Abstract method entry ++// Attempt to execute abstract method. Throw exception ++address TemplateInterpreterGenerator::generate_abstract_entry(void) { ++ ++ // Rmethod: methodOop ++ // V0: receiver (unused) ++ // Rsender : sender 's sp ++ address entry_point = __ pc(); ++ ++ // abstract method entry ++ // throw exception ++ // adjust stack to what a normal return would do ++ __ empty_expression_stack(); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorWithMethod), Rmethod); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ return entry_point; ++} ++ ++ ++const int method_offset = frame::interpreter_frame_method_offset * wordSize; ++const int bci_offset = frame::interpreter_frame_bcp_offset * wordSize; ++const int locals_offset = frame::interpreter_frame_locals_offset * wordSize; ++ ++//----------------------------------------------------------------------------- ++ ++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { ++ address entry = __ pc(); ++ ++#ifdef ASSERT ++ { ++ Label L; ++ __ addi_d(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ sub_d(T1, T1, SP); // T1 = maximal sp for current fp ++ __ bge(T1, R0, L); // check if frame is complete ++ __ stop("interpreter frame not set up"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ // Restore bcp under the assumption that the current frame is still ++ // interpreted ++ __ restore_bcp(); ++ ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // throw exception ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() { ++ address entry = __ pc(); ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // ??? convention: expect array in register A1 ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ClassCastException_handler() { ++ address entry = __ pc(); ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ __ empty_FPU_stack(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException), FSR); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_exception_handler_common( ++ const char* name, const char* message, bool pass_oop) { ++ assert(!pass_oop || message == NULL, "either oop or message but not both"); ++ address entry = __ pc(); ++ ++ // expression stack must be empty before entering the VM if an exception happened ++ __ empty_expression_stack(); ++ // setup parameters ++ __ li(A1, (long)name); ++ if (pass_oop) { ++ __ call_VM(V0, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR); ++ } else { ++ __ li(A2, (long)message); ++ __ call_VM(V0, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2); ++ } ++ // throw exception ++ __ jmp(Interpreter::throw_exception_entry(), relocInfo::none); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { ++ ++ address entry = __ pc(); ++ // S8 be used in C2 ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++ // Restore stack bottom in case i2c adjusted stack ++ __ ld_d(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); ++ // and NULL it as marker that sp is now tos until next java call ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ __ restore_bcp(); ++ __ restore_locals(); ++ ++ // mdp: T8 ++ // ret: FSR ++ // tmp: T4 ++ if (state == atos) { ++ Register mdp = T8; ++ Register tmp = T4; ++ __ profile_return_type(mdp, FSR, tmp); ++ } ++ ++ ++ const Register cache = T4; ++ const Register index = T3; ++ __ get_cache_and_index_at_bcp(cache, index, 1, index_size); ++ ++ const Register flags = cache; ++ __ alsl_d(AT, index, cache, Address::times_ptr - 1); ++ __ ld_w(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask); ++ __ alsl_d(SP, flags, SP, Interpreter::logStackElementSize - 1); ++ ++ Register java_thread; ++#ifndef OPT_THREAD ++ java_thread = T4; ++ __ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ ++ __ check_and_handle_popframe(java_thread); ++ __ check_and_handle_earlyret(java_thread); ++ ++ __ dispatch_next(state, step); ++ ++ return entry; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, ++ int step, ++ address continuation) { ++ address entry = __ pc(); ++ // S8 be used in C2 ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++ // NULL last_sp until next java call ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ restore_bcp(); ++ __ restore_locals(); ++ ++#if INCLUDE_JVMCI ++ // Check if we need to take lock at entry of synchronized method. This can ++ // only occur on method entry so emit it only for vtos with step 0. ++ if (EnableJVMCI && state == vtos && step == 0) { ++ Label L; ++ __ ld_b(AT, Address(TREG, JavaThread::pending_monitorenter_offset())); ++ __ beqz(AT, L); ++ // Clear flag. ++ __ st_b(R0, Address(TREG, JavaThread::pending_monitorenter_offset())); ++ // Take lock. ++ lock_method(); ++ __ bind(L); ++ } else { ++#ifdef ASSERT ++ if (EnableJVMCI) { ++ Label L; ++ __ ld_b(AT, Address(TREG, JavaThread::pending_monitorenter_offset())); ++ __ beqz(AT, L); ++ __ stop("unexpected pending monitor in deopt entry"); ++ __ bind(L); ++ } ++#endif ++ } ++#endif ++ ++ // handle exceptions ++ { ++ Label L; ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ if (continuation == NULL) { ++ __ dispatch_next(state, step); ++ } else { ++ __ jump_to_entry(continuation); ++ } ++ return entry; ++} ++ ++int AbstractInterpreter::BasicType_as_index(BasicType type) { ++ int i = 0; ++ switch (type) { ++ case T_BOOLEAN: i = 0; break; ++ case T_CHAR : i = 1; break; ++ case T_BYTE : i = 2; break; ++ case T_SHORT : i = 3; break; ++ case T_INT : // fall through ++ case T_LONG : // fall through ++ case T_VOID : i = 4; break; ++ case T_FLOAT : i = 5; break; ++ case T_DOUBLE : i = 6; break; ++ case T_OBJECT : // fall through ++ case T_ARRAY : i = 7; break; ++ default : ShouldNotReachHere(); ++ } ++ assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, ++ "index out of bounds"); ++ return i; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_result_handler_for( ++ BasicType type) { ++ address entry = __ pc(); ++ switch (type) { ++ case T_BOOLEAN: __ c2bool(V0); break; ++ case T_CHAR : __ bstrpick_d(V0, V0, 15, 0); break; ++ case T_BYTE : __ sign_extend_byte (V0); break; ++ case T_SHORT : __ sign_extend_short(V0); break; ++ case T_INT : /* nothing to do */ break; ++ case T_FLOAT : /* nothing to do */ break; ++ case T_DOUBLE : /* nothing to do */ break; ++ case T_OBJECT : ++ { ++ __ ld_d(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ verify_oop(V0); // and verify it ++ } ++ break; ++ default : ShouldNotReachHere(); ++ } ++ __ jr(RA); // return from result handler ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_safept_entry_for( ++ TosState state, ++ address runtime_entry) { ++ address entry = __ pc(); ++ __ push(state); ++ __ call_VM(noreg, runtime_entry); ++ __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); ++ return entry; ++} ++ ++ ++ ++// Helpers for commoning out cases in the various type of method entries. ++// ++ ++ ++// increment invocation count & check for overflow ++// ++// Note: checking for negative value instead of overflow ++// so we have a 'sticky' overflow test ++// ++// prerequisites : method in T0, invocation counter in T3 ++void TemplateInterpreterGenerator::generate_counter_incr( ++ Label* overflow, ++ Label* profile_method, ++ Label* profile_method_continue) { ++ Label done; ++ // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not. ++ if (TieredCompilation) { ++ int increment = InvocationCounter::count_increment; ++ int mask = ((1 << Tier0InvokeNotifyFreqLog) - 1) << InvocationCounter::count_shift; ++ Label no_mdo; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld_d(FSR, Address(Rmethod, Method::method_data_offset())); ++ __ beqz(FSR, no_mdo); ++ // Increment counter in the MDO ++ const Address mdo_invocation_counter(FSR, in_bytes(MethodData::invocation_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, T3, false, Assembler::zero, overflow); ++ __ b(done); ++ } ++ __ bind(no_mdo); ++ // Increment counter in MethodCounters ++ const Address invocation_counter(FSR, ++ MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset()); ++ __ get_method_counters(Rmethod, FSR, done); ++ __ increment_mask_and_jump(invocation_counter, increment, mask, T3, false, Assembler::zero, overflow); ++ __ bind(done); ++ } else { // not TieredCompilation ++ const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset()) ++ + in_bytes(InvocationCounter::counter_offset())); ++ const Address backedge_counter (FSR, in_bytes(MethodCounters::backedge_counter_offset()) ++ + in_bytes(InvocationCounter::counter_offset())); ++ ++ __ get_method_counters(Rmethod, FSR, done); ++ ++ if (ProfileInterpreter) { // %%% Merge this into methodDataOop ++ __ ld_w(T4, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); ++ __ addi_d(T4, T4, 1); ++ __ st_w(T4, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); ++ } ++ // Update standard invocation counters ++ __ ld_w(T3, invocation_counter); ++ __ increment(T3, InvocationCounter::count_increment); ++ __ st_w(T3, invocation_counter); // save invocation count ++ ++ __ ld_w(FSR, backedge_counter); // load backedge counter ++ __ li(AT, InvocationCounter::count_mask_value); // mask out the status bits ++ __ andr(FSR, FSR, AT); ++ ++ __ add_d(T3, T3, FSR); // add both counters ++ ++ if (ProfileInterpreter && profile_method != NULL) { ++ // Test to see if we should create a method data oop ++ if (Assembler::is_simm(InvocationCounter::InterpreterProfileLimit, 12)) { ++ __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit); ++ __ bne_far(AT, R0, *profile_method_continue); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); ++ __ ld_w(AT, AT, 0); ++ __ blt_far(T3, AT, *profile_method_continue, true /* signed */); ++ } ++ ++ // if no method data exists, go to profile_method ++ __ test_method_data_pointer(FSR, *profile_method); ++ } ++ ++ if (Assembler::is_simm(CompileThreshold, 12)) { ++ __ srli_w(AT, T3, InvocationCounter::count_shift); ++ __ slti(AT, AT, CompileThreshold); ++ __ beq_far(AT, R0, *overflow); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit); ++ __ ld_w(AT, AT, 0); ++ __ bge_far(T3, AT, *overflow, true /* signed */); ++ } ++ ++ __ bind(done); ++ } ++} ++ ++void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { ++ ++ // Asm interpreter on entry ++ // S7 - locals ++ // S0 - bcp ++ // Rmethod - method ++ // FP - interpreter frame ++ ++ // On return (i.e. jump to entry_point) ++ // Rmethod - method ++ // RA - return address of interpreter caller ++ // tos - the last parameter to Java method ++ // SP - sender_sp ++ ++ // the bcp is valid if and only if it's not null ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), R0); ++ __ ld_d(Rmethod, FP, method_offset); ++ // Preserve invariant that S0/S7 contain bcp/locals of sender frame ++ __ b_far(do_continue); ++} ++ ++// See if we've got enough room on the stack for locals plus overhead. ++// The expression stack grows down incrementally, so the normal guard ++// page mechanism will work for that. ++// ++// NOTE: Since the additional locals are also always pushed (wasn't ++// obvious in generate_method_entry) so the guard should work for them ++// too. ++// ++// Args: ++// T2: number of additional locals this frame needs (what we must check) ++// T0: Method* ++// ++void TemplateInterpreterGenerator::generate_stack_overflow_check(void) { ++ // see if we've got enough room on the stack for locals plus overhead. ++ // the expression stack grows down incrementally, so the normal guard ++ // page mechanism will work for that. ++ // ++ // Registers live on entry: ++ // ++ // T0: Method* ++ // T2: number of additional locals this frame needs (what we must check) ++ ++ // NOTE: since the additional locals are also always pushed (wasn't obvious in ++ // generate_method_entry) so the guard should work for them too. ++ // ++ ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++ // total overhead size: entry_size + (saved fp thru expr stack bottom). ++ // be sure to change this if you add/subtract anything to/from the overhead area ++ const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize) ++ + entry_size; ++ ++ const int page_size = os::vm_page_size(); ++ Label after_frame_check; ++ ++ // see if the frame is greater than one page in size. If so, ++ // then we need to verify there is enough stack space remaining ++ // for the additional locals. ++ __ li(AT, (page_size - overhead_size) / Interpreter::stackElementSize); ++ __ bge(AT, T2, after_frame_check); ++ ++ // compute sp as if this were going to be the last frame on ++ // the stack before the red zone ++#ifndef OPT_THREAD ++ Register thread = T1; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ++ // locals + overhead, in bytes ++ __ slli_d(T3, T2, Interpreter::logStackElementSize); ++ __ addi_d(T3, T3, overhead_size); // locals * 4 + overhead_size --> T3 ++ ++#ifdef ASSERT ++ Label stack_base_okay, stack_size_okay; ++ // verify that thread stack base is non-zero ++ __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset())); ++ __ bne(AT, R0, stack_base_okay); ++ __ stop("stack base is zero"); ++ __ bind(stack_base_okay); ++ // verify that thread stack size is non-zero ++ __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset())); ++ __ bne(AT, R0, stack_size_okay); ++ __ stop("stack size is zero"); ++ __ bind(stack_size_okay); ++#endif ++ ++ // Add stack base to locals and subtract stack size ++ __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT ++ __ add_d(T3, T3, AT); // locals * 4 + overhead_size + stack_base--> T3 ++ __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset())); // stack_size --> AT ++ __ sub_d(T3, T3, AT); // locals * 4 + overhead_size + stack_base - stack_size --> T3 ++ ++ // Use the bigger size for banging. ++ const int max_bang_size = (int)MAX2(JavaThread::stack_shadow_zone_size(), JavaThread::stack_guard_zone_size()); ++ ++ // add in the redzone and yellow size ++ __ li(AT, max_bang_size); ++ __ add_d(T3, T3, AT); ++ ++ // check against the current stack bottom ++ __ blt(T3, SP, after_frame_check); ++ ++ // Note: the restored frame is not necessarily interpreted. ++ // Use the shared runtime version of the StackOverflowError. ++ __ move(SP, Rsender); ++ assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); ++ __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type); ++ ++ // all done with frame size check ++ __ bind(after_frame_check); ++} ++ ++// Allocate monitor and lock method (asm interpreter) ++// Rmethod - Method* ++void TemplateInterpreterGenerator::lock_method(void) { ++ // synchronize method ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T0, T0, JVM_ACC_SYNCHRONIZED); ++ __ bne(T0, R0, L); ++ __ stop("method doesn't need synchronization"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ // get synchronization object ++ { ++ Label done; ++ __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T2, T0, JVM_ACC_STATIC); ++ __ ld_d(T0, LVP, Interpreter::local_offset_in_bytes(0)); ++ __ beq(T2, R0, done); ++ __ load_mirror(T0, Rmethod, T4); ++ __ bind(done); ++ } ++ // add space for monitor & lock ++ __ addi_d(SP, SP, (-1) * entry_size); // add space for a monitor entry ++ __ st_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ // set new monitor block top ++ __ st_d(T0, SP, BasicObjectLock::obj_offset_in_bytes()); // store object ++ // FIXME: I do not know what lock_object will do and what it will need ++ __ move(c_rarg0, SP); // object address ++ __ lock_object(c_rarg0); ++} ++ ++// Generate a fixed interpreter frame. This is identical setup for ++// interpreted methods and for native methods hence the shared code. ++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { ++ ++ // [ local var m-1 ] <--- sp ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- T0(sender's sp) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // initialize fixed part of activation frame ++ // sender's sp in Rsender ++ int i = 0; ++ int frame_size = 10; ++#ifndef CORE ++ ++frame_size; ++#endif ++ __ addi_d(SP, SP, (-frame_size) * wordSize); ++ __ st_d(RA, SP, (frame_size - 1) * wordSize); // save return address ++ __ st_d(FP, SP, (frame_size - 2) * wordSize); // save sender's fp ++ __ addi_d(FP, SP, (frame_size - 2) * wordSize); ++ __ st_d(Rsender, FP, (-++i) * wordSize); // save sender's sp ++ __ st_d(R0, FP,(-++i) * wordSize); //save last_sp as null ++ __ st_d(LVP, FP, (-++i) * wordSize); // save locals offset ++ __ ld_d(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop ++ __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase ++ __ st_d(Rmethod, FP, (-++i) * wordSize); // save Method* ++ // Get mirror and store it in the frame as GC root for this Method* ++ __ load_mirror(T2, Rmethod, T4); ++ __ st_d(T2, FP, (-++i) * wordSize); // Mirror ++#ifndef CORE ++ if (ProfileInterpreter) { ++ Label method_data_continue; ++ __ ld_d(AT, Rmethod, in_bytes(Method::method_data_offset())); ++ __ beq(AT, R0, method_data_continue); ++ __ addi_d(AT, AT, in_bytes(MethodData::data_offset())); ++ __ bind(method_data_continue); ++ __ st_d(AT, FP, (-++i) * wordSize); ++ } else { ++ __ st_d(R0, FP, (-++i) * wordSize); ++ } ++#endif // !CORE ++ ++ __ ld_d(T2, Rmethod, in_bytes(Method::const_offset())); ++ __ ld_d(T2, T2, in_bytes(ConstMethod::constants_offset())); ++ __ ld_d(T2, T2, ConstantPool::cache_offset_in_bytes()); ++ __ st_d(T2, FP, (-++i) * wordSize); // set constant pool cache ++ if (native_call) { ++ __ st_d(R0, FP, (-++i) * wordSize); // no bcp ++ } else { ++ __ st_d(BCP, FP, (-++i) * wordSize); // set bcp ++ } ++ __ st_d(SP, FP, (-++i) * wordSize); // reserve word for pointer to expression stack bottom ++ assert(i + 2 == frame_size, "i + 2 should be equal to frame_size"); ++} ++ ++// End of helpers ++ ++// Various method entries ++//------------------------------------------------------------------------------------------------------------------------ ++// ++// ++ ++// Method entry for java.lang.ref.Reference.get. ++address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { ++ // Code: _aload_0, _getfield, _areturn ++ // parameter size = 1 ++ // ++ // The code that gets generated by this routine is split into 2 parts: ++ // 1. The "intrinsified" code for G1 (or any SATB based GC), ++ // 2. The slow path - which is an expansion of the regular method entry. ++ // ++ // Notes:- ++ // * In the G1 code we do not check whether we need to block for ++ // a safepoint. If G1 is enabled then we must execute the specialized ++ // code for Reference.get (except when the Reference object is null) ++ // so that we can log the value in the referent field with an SATB ++ // update buffer. ++ // If the code for the getfield template is modified so that the ++ // G1 pre-barrier code is executed when the current method is ++ // Reference.get() then going through the normal method entry ++ // will be fine. ++ // * The G1 code can, however, check the receiver object (the instance ++ // of java.lang.Reference) and jump to the slow path if null. If the ++ // Reference object is null then we obviously cannot fetch the referent ++ // and so we don't need to call the G1 pre-barrier. Thus we can use the ++ // regular method entry code to generate the NPE. ++ // ++ // This code is based on generate_accessor_entry. ++ // ++ // Rmethod: Method* ++ // Rsender: senderSP must preserve for slow path, set SP to it on fast path ++ // RA is live. It must be saved around calls. ++ ++ address entry = __ pc(); ++ ++ const int referent_offset = java_lang_ref_Reference::referent_offset; ++ ++ Label slow_path; ++ const Register local_0 = A0; ++ // Check if local 0 != NULL ++ // If the receiver is null then it is OK to jump to the slow path. ++ __ ld_d(local_0, Address(SP, 0)); ++ __ beqz(local_0, slow_path); ++ ++ // Load the value of the referent field. ++ const Address field_address(local_0, referent_offset); ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ T4, /*tmp2*/ noreg); ++ ++ // areturn ++ __ move(SP, Rsender); ++ __ jr(RA); ++ ++ // generate a vanilla interpreter entry as the slow path ++ __ bind(slow_path); ++ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); ++ return entry; ++} ++ ++// Interpreter stub for calling a native method. (asm interpreter) ++// This sets up a somewhat different looking stack for calling the ++// native method than the typical interpreter frame setup. ++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ // Rsender: sender's sp ++ // Rmethod: Method* ++ address entry_point = __ pc(); ++ ++#ifndef CORE ++ const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset())); ++#endif ++ // get parameter size (always needed) ++ // the size in the java stack ++ __ ld_d(V0, Rmethod, in_bytes(Method::const_offset())); ++ __ ld_hu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // native calls don't need the stack size check since they have no expression stack ++ // and the arguments are already on the stack and we only add a handful of words ++ // to the stack ++ ++ // Rmethod: Method* ++ // V0: size of parameters ++ // Layout of frame at this point ++ // ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ ++ // for natives the size of locals is zero ++ ++ // compute beginning of parameters (S7) ++ __ slli_d(LVP, V0, Address::times_8); ++ __ addi_d(LVP, LVP, (-1) * wordSize); ++ __ add_d(LVP, LVP, SP); ++ ++ ++ // add 2 zero-initialized slots for native calls ++ // 1 slot for native oop temp offset (setup via runtime) ++ // 1 slot for static native result handler3 (setup via runtime) ++ __ push2(R0, R0); ++ ++ // Layout of frame at this point ++ // [ method holder mirror ] <--- sp ++ // [ result type info ] ++ // [ argument word n-1 ] <--- T0 ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++#ifndef CORE ++ if (inc_counter) __ ld_w(T3, invocation_counter); // (pre-)fetch invocation count ++#endif ++ ++ // initialize fixed part of activation frame ++ generate_fixed_frame(true); ++ // after this function, the layout of frame is as following ++ // ++ // [ monitor block top ] <--- sp ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- sender's sp ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ ++ // make sure method is native & not abstract ++#ifdef ASSERT ++ __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ { ++ Label L; ++ __ andi(AT, T0, JVM_ACC_NATIVE); ++ __ bne(AT, R0, L); ++ __ stop("tried to execute native method as non-native"); ++ __ bind(L); ++ } ++ { ++ Label L; ++ __ andi(AT, T0, JVM_ACC_ABSTRACT); ++ __ beq(AT, R0, L); ++ __ stop("tried to execute abstract method in interpreter"); ++ __ bind(L); ++ } ++#endif ++ ++ // Since at this point in the method invocation the exception handler ++ // would try to exit the monitor of synchronized methods which hasn't ++ // been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation will ++ // check this flag. ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ li(AT, (int)true); ++ __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++#ifndef CORE ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow, NULL, NULL); ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++#endif // CORE ++ ++ bang_stack_shadow_pages(true); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ if (synchronized) { ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, T0, JVM_ACC_SYNCHRONIZED); ++ __ beq(AT, R0, L); ++ __ stop("method needs synchronization"); ++ __ bind(L); ++ } ++#endif ++ } ++ ++ // after method_lock, the layout of frame is as following ++ // ++ // [ monitor entry ] <--- sp ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // start execution ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ beq(AT, SP, L); ++ __ stop("broken stack frame setup in interpreter in asm"); ++ __ bind(L); ++ } ++#endif ++ ++ // jvmti/jvmpi support ++ __ notify_method_entry(); ++ ++ // work registers ++ const Register method = Rmethod; ++ const Register t = T8; ++ ++ __ get_method(method); ++ { ++ Label L, Lstatic; ++ __ ld_d(t,method,in_bytes(Method::const_offset())); ++ __ ld_hu(t, t, in_bytes(ConstMethod::size_of_parameters_offset())); ++ // LoongArch ABI: caller does not reserve space for the register auguments. ++ // A0 and A1(if needed) ++ __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, AT, JVM_ACC_STATIC); ++ __ beq(AT, R0, Lstatic); ++ __ addi_d(t, t, 1); ++ __ bind(Lstatic); ++ __ addi_d(t, t, -7); ++ __ bge(R0, t, L); ++ __ slli_d(t, t, Address::times_8); ++ __ sub_d(SP, SP, t); ++ __ bind(L); ++ } ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ __ move(AT, SP); ++ // [ ] <--- sp ++ // ... (size of parameters - 8 ) ++ // [ monitor entry ] ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ // get signature handler ++ { ++ Label L; ++ __ ld_d(T4, method, in_bytes(Method::signature_handler_offset())); ++ __ bne(T4, R0, L); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::prepare_native_call), method); ++ __ get_method(method); ++ __ ld_d(T4, method, in_bytes(Method::signature_handler_offset())); ++ __ bind(L); ++ } ++ ++ // call signature handler ++ // FIXME: when change codes in InterpreterRuntime, note this point ++ // from: begin of parameters ++ assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code"); ++ // to: current sp ++ assert(InterpreterRuntime::SignatureHandlerGenerator::to () == SP, "adjust this code"); ++ // temp: T3 ++ assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t , "adjust this code"); ++ ++ __ jalr(T4); ++ __ get_method(method); ++ ++ // ++ // if native function is static, and its second parameter has type length of double word, ++ // and first parameter has type length of word, we have to reserve one word ++ // for the first parameter, according to LoongArch abi. ++ // if native function is not static, and its third parameter has type length of double word, ++ // and second parameter has type length of word, we have to reserve one word for the second ++ // parameter. ++ // ++ ++ ++ // result handler is in V0 ++ // set result handler ++ __ st_d(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize); ++ ++#define FIRSTPARA_SHIFT_COUNT 5 ++#define SECONDPARA_SHIFT_COUNT 9 ++#define THIRDPARA_SHIFT_COUNT 13 ++#define PARA_MASK 0xf ++ ++ // pass mirror handle if static call ++ { ++ Label L; ++ __ ld_w(t, method, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, t, JVM_ACC_STATIC); ++ __ beq(AT, R0, L); ++ ++ // get mirror ++ __ load_mirror(t, method, T4); ++ // copy mirror into activation frame ++ __ st_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ // pass handle to mirror ++ __ addi_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ move(A1, t); ++ __ bind(L); ++ } ++ ++ // [ mthd holder mirror ptr ] <--- sp --------------------| (only for static method) ++ // [ ] | ++ // ... size of parameters(or +1) | ++ // [ monitor entry ] | ++ // ... | ++ // [ monitor entry ] | ++ // [ monitor block top ] ( the top monitor entry ) | ++ // [ byte code pointer (0) ] (if native, bcp = 0) | ++ // [ constant pool cache ] | ++ // [ Mirror ] | ++ // [ Method* ] | ++ // [ locals offset ] | ++ // [ sender's sp ] | ++ // [ sender's fp ] | ++ // [ return address ] <--- fp | ++ // [ method holder mirror ] <----------------------------| ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // get native function entry point ++ { Label L; ++ __ ld_d(T4, method, in_bytes(Method::native_function_offset())); ++ __ li(T6, SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); ++ __ bne(T6, T4, L); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method); ++ __ get_method(method); ++ __ ld_d(T4, method, in_bytes(Method::native_function_offset())); ++ __ bind(L); ++ } ++ ++ // pass JNIEnv ++ // native function in T4 ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ addi_d(t, thread, in_bytes(JavaThread::jni_environment_offset())); ++ __ move(A0, t); ++ // [ jni environment ] <--- sp ++ // [ mthd holder mirror ptr ] ---------------------------->| (only for static method) ++ // [ ] | ++ // ... size of parameters | ++ // [ monitor entry ] | ++ // ... | ++ // [ monitor entry ] | ++ // [ monitor block top ] ( the top monitor entry ) | ++ // [ byte code pointer (0) ] (if native, bcp = 0) | ++ // [ constant pool cache ] | ++ // [ Mirror ] | ++ // [ Method* ] | ++ // [ locals offset ] | ++ // [ sender's sp ] | ++ // [ sender's fp ] | ++ // [ return address ] <--- fp | ++ // [ method holder mirror ] <----------------------------| ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // Set the last Java PC in the frame anchor to be the return address from ++ // the call to the native method: this will allow the debugger to ++ // generate an accurate stack trace. ++ Label native_return; ++ __ set_last_Java_frame(thread, SP, FP, native_return); ++ ++ // change thread state ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_w(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ addi_d(t, t, (-1) * _thread_in_Java); ++ __ beq(t, R0, L); ++ __ stop("Wrong thread state in native stub"); ++ __ bind(L); ++ } ++#endif ++ ++ __ li(t, _thread_in_native); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release ++ } ++ __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ // call native method ++ __ jalr(T4); ++ __ bind(native_return); ++ // result potentially in V0 or F0 ++ ++ ++ // via _last_native_pc and not via _last_jave_sp ++ // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result. ++ // If the order changes or anything else is added to the stack the code in ++ // interpreter_frame_result will have to be changed. ++ //FIXME, should modify here ++ // save return value to keep the value from being destroyed by other calls ++ __ push(dtos); ++ __ push(ltos); ++ ++ // change thread state ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ li(t, _thread_in_native_trans); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release ++ } ++ __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ if(os::is_MP()) { ++ if (UseMembar) { ++ // Force this write out before the read below ++ __ membar(__ AnyAny); ++ } else { ++ // Write serialization page so VM thread can do a pseudo remote membar. ++ // We use the current thread pointer to calculate a thread specific ++ // offset to write to within the page. This minimizes bus traffic ++ // due to cache line collision. ++ __ serialize_memory(thread, A0); ++ } ++ } ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { Label Continue; ++ ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are ++ // preserved and correspond to the bcp/locals pointers. So we do a runtime call ++ // by hand. ++ // ++ Label slow_path; ++ ++ __ safepoint_poll_acquire(slow_path, thread); ++ __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, Continue); ++ __ bind(slow_path); ++ __ move(A0, thread); ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), ++ relocInfo::runtime_call_type); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ //add for compressedoops ++ __ reinit_heapbase(); ++ __ bind(Continue); ++ } ++ ++ // change thread state ++ __ li(t, _thread_in_Java); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release ++ } ++ __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ reset_last_Java_frame(thread, true); ++ ++ if (CheckJNICalls) { ++ // clear_pending_jni_exception_check ++ __ st_d(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset())); ++ } ++ ++ // reset handle block ++ __ ld_d(t, thread, in_bytes(JavaThread::active_handles_offset())); ++ __ st_w(R0, t, JNIHandleBlock::top_offset_in_bytes()); ++ ++ // If result was an oop then unbox and save it in the frame ++ { ++ Label no_oop; ++ __ ld_d(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize); ++ __ li(T0, AbstractInterpreter::result_handler(T_OBJECT)); ++ __ bne(AT, T0, no_oop); ++ __ pop(ltos); ++ // Unbox oop result, e.g. JNIHandles::resolve value. ++ __ resolve_jobject(V0, thread, T4); ++ __ st_d(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize); ++ // keep stack depth as expected by pushing oop which will eventually be discarded ++ __ push(ltos); ++ __ bind(no_oop); ++ } ++ { ++ Label no_reguard; ++ __ ld_w(t, thread, in_bytes(JavaThread::stack_guard_state_offset())); ++ __ li(AT, (u1)JavaThread::stack_guard_yellow_reserved_disabled); ++ __ bne(t, AT, no_reguard); ++ __ pushad(); ++ __ move(S5_heapbase, SP); ++ __ li(AT, -StackAlignmentInBytes); ++ __ andr(SP, SP, AT); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type); ++ __ move(SP, S5_heapbase); ++ __ popad(); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ __ bind(no_reguard); ++ } ++ // restore BCP to have legal interpreter frame, ++ // i.e., bci == 0 <=> BCP == code_base() ++ // Can't call_VM until bcp is within reasonable. ++ __ get_method(method); // method is junk from thread_in_native to now. ++ __ ld_d(BCP, method, in_bytes(Method::const_offset())); ++ __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset()))); ++ // handle exceptions (exception handling will handle unlocking!) ++ { ++ Label L; ++ __ ld_d(t, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(t, R0, L); ++ // Note: At some point we may want to unify this with the code used in ++ // call_VM_base(); ++ // i.e., we should use the StubRoutines::forward_exception code. For now this ++ // doesn't work here because the sp is not correctly set at this point. ++ __ MacroAssembler::call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ ++ // do unlocking if necessary ++ { ++ Label L; ++ __ ld_w(t, method, in_bytes(Method::access_flags_offset())); ++ __ andi(t, t, JVM_ACC_SYNCHRONIZED); ++ __ addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock)); ++ __ beq(t, R0, L); ++ // the code below should be shared with interpreter macro assembler implementation ++ { ++ Label unlock; ++ // BasicObjectLock will be first in list, ++ // since this is a synchronized method. However, need ++ // to check that the object has not been unlocked by ++ // an explicit monitorexit bytecode. ++ // address of first monitor ++ ++ __ ld_d(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ bne(t, R0, unlock); ++ ++ // Entry already unlocked, need to throw exception ++ __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ __ bind(unlock); ++ __ unlock_object(c_rarg0); ++ } ++ __ bind(L); ++ } ++ ++ // jvmti/jvmpi support ++ // Note: This must happen _after_ handling/throwing any exceptions since ++ // the exception handler code notifies the runtime of method exits ++ // too. If this happens before, method entry/exit notifications are ++ // not properly paired (was bug - gri 11/22/99). ++ __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); ++ ++ // restore potential result in V0, ++ // call result handler to restore potential result in ST0 & handle result ++ ++ __ pop(ltos); ++ __ pop(dtos); ++ ++ __ ld_d(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize); ++ __ jalr(t); ++ ++ ++ // remove activation ++ __ ld_d(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp ++ __ ld_d(RA, FP, frame::java_frame_return_addr_offset * wordSize); // get return address ++ __ ld_d(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp ++ __ jr(RA); ++ ++#ifndef CORE ++ if (inc_counter) { ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(continue_after_compile); ++ // entry_point is the beginning of this ++ // function and checks again for compiled code ++ } ++#endif ++ return entry_point; ++} ++ ++void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { ++ // Quick & dirty stack overflow checking: bang the stack & handle trap. ++ // Note that we do the banging after the frame is setup, since the exception ++ // handling code expects to find a valid interpreter frame on the stack. ++ // Doing the banging earlier fails if the caller frame is not an interpreter ++ // frame. ++ // (Also, the exception throwing code expects to unlock any synchronized ++ // method receiever, so do the banging after locking the receiver.) ++ ++ // Bang each page in the shadow zone. We can't assume it's been done for ++ // an interpreter frame with greater than a page of locals, so each page ++ // needs to be checked. Only true for non-native. ++ if (UseStackBanging) { ++ const int page_size = os::vm_page_size(); ++ const int n_shadow_pages = ((int)JavaThread::stack_shadow_zone_size()) / page_size; ++ const int start_page = native_call ? n_shadow_pages : 1; ++ BLOCK_COMMENT("bang_stack_shadow_pages:"); ++ for (int pages = start_page; pages <= n_shadow_pages; pages++) { ++ __ bang_stack_with_offset(pages*page_size); ++ } ++ } ++} ++ ++// ++// Generic interpreted method entry to (asm) interpreter ++// ++// Layout of frame just at the entry ++// ++// [ argument word n-1 ] <--- sp ++// ... ++// [ argument word 0 ] ++// assume Method* in Rmethod before call this method. ++// prerequisites to the generated stub : the callee Method* in Rmethod ++// note you must save the caller bcp before call the generated stub ++// ++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ ++ // Rmethod: Method* ++ // Rsender: sender 's sp ++ address entry_point = __ pc(); ++ // S8 be used in C2 ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++ const Address invocation_counter(Rmethod, ++ in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset())); ++ ++ // get parameter size (always needed) ++ __ ld_d(T3, Rmethod, in_bytes(Method::const_offset())); //T3 --> Rmethod._constMethod ++ __ ld_hu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // Rmethod: Method* ++ // V0: size of parameters ++ // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i ++ // get size of locals in words to T2 ++ __ ld_hu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset())); ++ // T2 = no. of additional locals, locals include parameters ++ __ sub_d(T2, T2, V0); ++ ++ // see if we've got enough room on the stack for locals plus overhead. ++ // Layout of frame at this point ++ // ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ generate_stack_overflow_check(); ++ // after this function, the layout of frame does not change ++ ++ // compute beginning of parameters (LVP) ++ __ slli_d(LVP, V0, LogBytesPerWord); ++ __ addi_d(LVP, LVP, (-1) * wordSize); ++ __ add_d(LVP, LVP, SP); ++ ++ // T2 - # of additional locals ++ // allocate space for locals ++ // explicitly initialize locals ++ { ++ Label exit, loop; ++ __ beq(T2, R0, exit); ++ ++ __ bind(loop); ++ __ addi_d(SP, SP, (-1) * wordSize); ++ __ addi_d(T2, T2, -1); // until everything initialized ++ __ st_d(R0, SP, 0); // initialize local variables ++ __ bne(T2, R0, loop); ++ ++ __ bind(exit); ++ } ++ ++ // ++ // [ local var m-1 ] <--- sp ++ // ... ++ // [ local var 0 ] ++ // [ argument word n-1 ] <--- T0? ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ // initialize fixed part of activation frame ++ ++ generate_fixed_frame(false); ++ ++ ++ // after this function, the layout of frame is as following ++ // ++ // [ monitor block top ] <--- sp ( the top monitor entry ) ++ // [ byte code pointer ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] <--- fp ++ // [ return address ] ++ // [ local var m-1 ] ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++ // make sure method is not native & not abstract ++#ifdef ASSERT ++ __ ld_d(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ { ++ Label L; ++ __ andi(T2, AT, JVM_ACC_NATIVE); ++ __ beq(T2, R0, L); ++ __ stop("tried to execute native method as non-native"); ++ __ bind(L); ++ } ++ { ++ Label L; ++ __ andi(T2, AT, JVM_ACC_ABSTRACT); ++ __ beq(T2, R0, L); ++ __ stop("tried to execute abstract method in interpreter"); ++ __ bind(L); ++ } ++#endif ++ ++ // Since at this point in the method invocation the exception handler ++ // would try to exit the monitor of synchronized methods which hasn't ++ // been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation will ++ // check this flag. ++ ++#ifndef OPT_THREAD ++ Register thread = T8; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ __ li(AT, (int)true); ++ __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++#ifndef CORE ++ ++ // mdp : T8 ++ // tmp1: T4 ++ // tmp2: T2 ++ __ profile_parameters_type(T8, T4, T2); ++ ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ Label profile_method; ++ Label profile_method_continue; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow, ++ &profile_method, ++ &profile_method_continue); ++ if (ProfileInterpreter) { ++ __ bind(profile_method_continue); ++ } ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++ ++#endif // CORE ++ ++ bang_stack_shadow_pages(false); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ // ++ if (synchronized) { ++ // Allocate monitor and lock method ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ { Label L; ++ __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T2, AT, JVM_ACC_SYNCHRONIZED); ++ __ beq(T2, R0, L); ++ __ stop("method needs synchronization"); ++ __ bind(L); ++ } ++#endif ++ } ++ ++ // layout of frame after lock_method ++ // [ monitor entry ] <--- sp ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ local var m-1 ] ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++ // start execution ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ beq(AT, SP, L); ++ __ stop("broken stack frame setup in interpreter in native"); ++ __ bind(L); ++ } ++#endif ++ ++ // jvmti/jvmpi support ++ __ notify_method_entry(); ++ ++ __ dispatch_next(vtos); ++ ++ // invocation counter overflow ++ if (inc_counter) { ++ if (ProfileInterpreter) { ++ // We have decided to profile this method in the interpreter ++ __ bind(profile_method); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::profile_method)); ++ __ set_method_data_pointer_for_bcp(); ++ __ get_method(Rmethod); ++ __ b(profile_method_continue); ++ } ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(continue_after_compile); ++ } ++ ++ return entry_point; ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateInterpreterGenerator::generate_throw_exception() { ++ // Entry point in previous activation (i.e., if the caller was ++ // interpreted) ++ Interpreter::_rethrow_exception_entry = __ pc(); ++ // Restore sp to interpreter_frame_last_sp even though we are going ++ // to empty the expression stack for the exception processing. ++ __ st_d(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ // V0: exception ++ // V1: return address/pc that threw exception ++ __ restore_bcp(); // BCP points to call/send ++ __ restore_locals(); ++ ++ //add for compressedoops ++ __ reinit_heapbase(); ++ // S8 be used in C2 ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++ // Entry point for exceptions thrown within interpreter code ++ Interpreter::_throw_exception_entry = __ pc(); ++ // expression stack is undefined here ++ // V0: exception ++ // BCP: exception bcp ++ __ verify_oop(V0); ++ ++ // expression stack must be empty before entering the VM in case of an exception ++ __ empty_expression_stack(); ++ // find exception handler address and preserve exception oop ++ __ move(A1, V0); ++ __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1); ++ // V0: exception handler entry point ++ // V1: preserved exception oop ++ // S0: bcp for exception handler ++ __ push(V1); // push exception which is now the only value on the stack ++ __ jr(V0); // jump to exception handler (may be _remove_activation_entry!) ++ ++ // If the exception is not handled in the current frame the frame is removed and ++ // the exception is rethrown (i.e. exception continuation is _rethrow_exception). ++ // ++ // Note: At this point the bci is still the bxi for the instruction which caused ++ // the exception and the expression stack is empty. Thus, for any VM calls ++ // at this point, GC will find a legal oop map (with empty expression stack). ++ ++ // In current activation ++ // V0: exception ++ // BCP: exception bcp ++ ++ // ++ // JVMTI PopFrame support ++ // ++ ++ Interpreter::_remove_activation_preserving_args_entry = __ pc(); ++ __ empty_expression_stack(); ++ // Set the popframe_processing bit in pending_popframe_condition indicating that we are ++ // currently handling popframe, so that call_VMs that may happen later do not trigger new ++ // popframe handling cycles. ++#ifndef OPT_THREAD ++ Register thread = T2; ++ __ get_thread(T2); ++#else ++ Register thread = TREG; ++#endif ++ __ ld_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ __ ori(T3, T3, JavaThread::popframe_processing_bit); ++ __ st_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++#ifndef CORE ++ { ++ // Check to see whether we are returning to a deoptimized frame. ++ // (The PopFrame call ensures that the caller of the popped frame is ++ // either interpreted or compiled and deoptimizes it if compiled.) ++ // In this case, we can't call dispatch_next() after the frame is ++ // popped, but instead must save the incoming arguments and restore ++ // them after deoptimization has occurred. ++ // ++ // Note that we don't compare the return PC against the ++ // deoptimization blob's unpack entry because of the presence of ++ // adapter frames in C2. ++ Label caller_not_deoptimized; ++ __ ld_d(A0, FP, frame::java_frame_return_addr_offset * wordSize); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0); ++ __ bne(V0, R0, caller_not_deoptimized); ++ ++ // Compute size of arguments for saving when returning to deoptimized caller ++ __ get_method(A1); ++ __ verify_oop(A1); ++ __ ld_d(A1, A1, in_bytes(Method::const_offset())); ++ __ ld_hu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset())); ++ __ shl(A1, Interpreter::logStackElementSize); ++ __ restore_locals(); ++ __ sub_d(A2, LVP, A1); ++ __ addi_d(A2, A2, wordSize); ++ // Save these arguments ++#ifndef OPT_THREAD ++ __ get_thread(A0); ++#else ++ __ move(A0, TREG); ++#endif ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2); ++ ++ __ remove_activation(vtos, T4, false, false, false); ++ ++ // Inform deoptimization that it is responsible for restoring these arguments ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ li(AT, JavaThread::popframe_force_deopt_reexecution_bit); ++ __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ // Continue in deoptimization handler ++ __ jr(T4); ++ ++ __ bind(caller_not_deoptimized); ++ } ++#endif /* !CORE */ ++ ++ __ remove_activation(vtos, T3, ++ /* throw_monitor_exception */ false, ++ /* install_monitor_exception */ false, ++ /* notify_jvmdi */ false); ++ ++ // Clear the popframe condition flag ++ // Finish with popframe handling ++ // A previous I2C followed by a deoptimization might have moved the ++ // outgoing arguments further up the stack. PopFrame expects the ++ // mutations to those outgoing arguments to be preserved and other ++ // constraints basically require this frame to look exactly as ++ // though it had previously invoked an interpreted activation with ++ // no space between the top of the expression stack (current ++ // last_sp) and the top of stack. Rather than force deopt to ++ // maintain this kind of invariant all the time we call a small ++ // fixup routine to move the mutated arguments onto the top of our ++ // expression stack if necessary. ++ __ move(T8, SP); ++ __ ld_d(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // PC must point into interpreter here ++ Label L; ++ __ bind(L); ++ __ set_last_Java_frame(thread, noreg, FP, L); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2); ++ __ reset_last_Java_frame(thread, true); ++ // Restore the last_sp and null it out ++ __ ld_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ ++ ++ __ li(AT, JavaThread::popframe_inactive); ++ __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++ // Finish with popframe handling ++ __ restore_bcp(); ++ __ restore_locals(); ++ // S8 be used in C2 ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++#ifndef CORE ++ // The method data pointer was incremented already during ++ // call profiling. We have to restore the mdp for the current bcp. ++ if (ProfileInterpreter) { ++ __ set_method_data_pointer_for_bcp(); ++ } ++#endif // !CORE ++ // Clear the popframe condition flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ li(AT, JavaThread::popframe_inactive); ++ __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++#if INCLUDE_JVMTI ++ { ++ Label L_done; ++ ++ __ ld_bu(AT, BCP, 0); ++ __ addi_d(AT, AT, -1 * Bytecodes::_invokestatic); ++ __ bne(AT, R0, L_done); ++ ++ // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. ++ // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. ++ ++ __ get_method(T4); ++ __ ld_d(T8, LVP, 0); ++ __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T4, BCP); ++ ++ __ beq(T8, R0, L_done); ++ ++ __ st_d(T8, SP, 0); ++ __ bind(L_done); ++ } ++#endif // INCLUDE_JVMTI ++ ++ __ dispatch_next(vtos); ++ // end of PopFrame support ++ ++ Interpreter::_remove_activation_entry = __ pc(); ++ ++ // preserve exception over this code sequence ++ __ pop(T0); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_d(T0, thread, in_bytes(JavaThread::vm_result_offset())); ++ // remove the activation (without doing throws on illegalMonitorExceptions) ++ __ remove_activation(vtos, T3, false, true, false); ++ // restore exception ++ __ get_vm_result(T0, thread); ++ __ verify_oop(T0); ++ ++ // In between activations - previous activation type unknown yet ++ // compute continuation point - the continuation point expects ++ // the following registers set up: ++ // ++ // T0: exception ++ // T1: return address/pc that threw exception ++ // SP: expression stack of caller ++ // FP: fp of caller ++ __ push2(T0, T3); // save exception and return address ++ __ move(A1, T3); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); ++ __ move(T4, V0); // save exception handler ++ __ pop2(V0, V1); // restore return address and exception ++ ++ // Note that an "issuing PC" is actually the next PC after the call ++ __ jr(T4); // jump to exception handler of caller ++} ++ ++ ++// ++// JVMTI ForceEarlyReturn support ++// ++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { ++ address entry = __ pc(); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ empty_expression_stack(); ++ __ empty_FPU_stack(); ++ __ load_earlyret_value(state); ++ ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ __ ld_ptr(T4, TREG, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ const Address cond_addr(T4, in_bytes(JvmtiThreadState::earlyret_state_offset())); ++ // Clear the earlyret state ++ __ li(AT, JvmtiThreadState::earlyret_inactive); ++ __ st_w(AT, cond_addr); ++ __ membar(__ AnyAny);//no membar here for aarch64 ++ ++ ++ __ remove_activation(state, T0, ++ false, /* throw_monitor_exception */ ++ false, /* install_monitor_exception */ ++ true); /* notify_jvmdi */ ++ __ membar(__ AnyAny); ++ __ jr(T0); ++ ++ return entry; ++} // end of ForceEarlyReturn support ++ ++ ++//----------------------------------------------------------------------------- ++// Helper for vtos entry point generation ++ ++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, ++ address& bep, ++ address& cep, ++ address& sep, ++ address& aep, ++ address& iep, ++ address& lep, ++ address& fep, ++ address& dep, ++ address& vep) { ++ assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); ++ Label L; ++ fep = __ pc(); __ push(ftos); __ b(L); ++ dep = __ pc(); __ push(dtos); __ b(L); ++ lep = __ pc(); __ push(ltos); __ b(L); ++ aep =__ pc(); __ push(atos); __ b(L); ++ bep = cep = sep = ++ iep = __ pc(); __ push(itos); ++ vep = __ pc(); ++ __ bind(L); ++ generate_and_dispatch(t); ++} ++ ++//----------------------------------------------------------------------------- ++ ++// Non-product code ++#ifndef PRODUCT ++address TemplateInterpreterGenerator::generate_trace_code(TosState state) { ++ address entry = __ pc(); ++ ++ // prepare expression stack ++ __ push(state); // save tosca ++ ++ // tos & tos2 ++ // trace_bytecode need actually 4 args, the last two is tos&tos2 ++ // this work fine for x86. but LA ABI calling convention will store A2-A3 ++ // to the stack position it think is the tos&tos2 ++ // when the expression stack have no more than 2 data, error occur. ++ __ ld_d(A2, SP, 0); ++ __ ld_d(A3, SP, 1 * wordSize); ++ ++ // pass arguments & call tracer ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), RA, A2, A3); ++ __ move(RA, V0); // make sure return address is not destroyed by pop(state) ++ ++ // restore expression stack ++ __ pop(state); // restore tosca ++ ++ // return ++ __ jr(RA); ++ return entry; ++} ++ ++void TemplateInterpreterGenerator::count_bytecode() { ++ __ li(T8, (long)&BytecodeCounter::_counter_value); ++ __ ld_w(AT, T8, 0); ++ __ addi_d(AT, AT, 1); ++ __ st_w(AT, T8, 0); ++} ++ ++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ++ __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]); ++ __ ld_w(AT, T8, 0); ++ __ addi_d(AT, AT, 1); ++ __ st_w(AT, T8, 0); ++} ++ ++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ++ __ li(T8, (long)&BytecodePairHistogram::_index); ++ __ ld_w(T4, T8, 0); ++ __ srli_d(T4, T4, BytecodePairHistogram::log2_number_of_codes); ++ __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes); ++ __ orr(T4, T4, T8); ++ __ li(T8, (long)&BytecodePairHistogram::_index); ++ __ st_w(T4, T8, 0); ++ __ slli_d(T4, T4, 2); ++ __ li(T8, (long)BytecodePairHistogram::_counters); ++ __ add_d(T8, T8, T4); ++ __ ld_w(AT, T8, 0); ++ __ addi_d(AT, AT, 1); ++ __ st_w(AT, T8, 0); ++} ++ ++ ++void TemplateInterpreterGenerator::trace_bytecode(Template* t) { ++ // Call a little run-time stub to avoid blow-up for each bytecode. ++ // The run-time runtime saves the right registers, depending on ++ // the tosca in-state for the given template. ++ address entry = Interpreter::trace_code(t->tos_in()); ++ assert(entry != NULL, "entry must have been generated"); ++ __ call(entry, relocInfo::none); ++ //add for compressedoops ++ __ reinit_heapbase(); ++} ++ ++ ++void TemplateInterpreterGenerator::stop_interpreter_at() { ++ Label L; ++ __ li(T8, long(&BytecodeCounter::_counter_value)); ++ __ ld_w(T8, T8, 0); ++ __ li(AT, StopInterpreterAt); ++ __ bne(T8, AT, L); ++ __ brk(5); ++ __ bind(L); ++} ++#endif // !PRODUCT +diff --git a/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp b/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp +new file mode 100644 +index 0000000000..ddb38faf44 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP ++#define CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP ++ ++ static void prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index = noreg, // itable index, MethodType, etc. ++ Register recv = noreg, // if caller wants to see it ++ Register flags = noreg // if caller wants to test it ++ ); ++ static void invokevirtual_helper(Register index, Register recv, ++ Register flags); ++ static void volatile_barrier(); ++ ++ // Helpers ++ static void index_check(Register array, Register index); ++ static void index_check_without_pop(Register array, Register index); ++ ++#endif // CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP +diff --git a/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp +new file mode 100644 +index 0000000000..4f1d226a1a +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp +@@ -0,0 +1,4115 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/templateTable.hpp" ++#include "memory/universe.hpp" ++#include "oops/methodData.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "utilities/macros.hpp" ++ ++ ++#ifndef CC_INTERP ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T6 RT6 ++#define T8 RT8 ++ ++// Platform-dependent initialization ++ ++void TemplateTable::pd_initialize() { ++ // No LoongArch specific initialization ++} ++ ++// Address computation: local variables ++ ++static inline Address iaddress(int n) { ++ return Address(LVP, Interpreter::local_offset_in_bytes(n)); ++} ++ ++static inline Address laddress(int n) { ++ return iaddress(n + 1); ++} ++ ++static inline Address faddress(int n) { ++ return iaddress(n); ++} ++ ++static inline Address daddress(int n) { ++ return laddress(n); ++} ++ ++static inline Address aaddress(int n) { ++ return iaddress(n); ++} ++static inline Address haddress(int n) { return iaddress(n + 0); } ++ ++ ++static inline Address at_sp() { return Address(SP, 0); } ++static inline Address at_sp_p1() { return Address(SP, 1 * wordSize); } ++static inline Address at_sp_p2() { return Address(SP, 2 * wordSize); } ++ ++// At top of Java expression stack which may be different than sp(). ++// It isn't for category 1 objects. ++static inline Address at_tos () { ++ Address tos = Address(SP, Interpreter::expr_offset_in_bytes(0)); ++ return tos; ++} ++ ++static inline Address at_tos_p1() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(1)); ++} ++ ++static inline Address at_tos_p2() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(2)); ++} ++ ++static inline Address at_tos_p3() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(3)); ++} ++ ++// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator ++Address TemplateTable::at_bcp(int offset) { ++ assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); ++ return Address(BCP, offset); ++} ++ ++// Miscelaneous helper routines ++// Store an oop (or NULL) at the address described by obj. ++// If val == noreg this means store a NULL ++static void do_oop_store(InterpreterMacroAssembler* _masm, ++ Address dst, ++ Register val, ++ DecoratorSet decorators = 0) { ++ assert(val == noreg || val == V0, "parameter is just for looks"); ++ __ store_heap_oop(dst, val, T4, T1, decorators); ++} ++ ++static void do_oop_load(InterpreterMacroAssembler* _masm, ++ Address src, ++ Register dst, ++ DecoratorSet decorators = 0) { ++ __ load_heap_oop(dst, src, T4, T1, decorators); ++} ++ ++// bytecode folding ++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, ++ Register tmp_reg, bool load_bc_into_bc_reg/*=true*/, ++ int byte_no) { ++ if (!RewriteBytecodes) return; ++ Label L_patch_done; ++ ++ switch (bc) { ++ case Bytecodes::_fast_aputfield: ++ case Bytecodes::_fast_bputfield: ++ case Bytecodes::_fast_zputfield: ++ case Bytecodes::_fast_cputfield: ++ case Bytecodes::_fast_dputfield: ++ case Bytecodes::_fast_fputfield: ++ case Bytecodes::_fast_iputfield: ++ case Bytecodes::_fast_lputfield: ++ case Bytecodes::_fast_sputfield: ++ { ++ // We skip bytecode quickening for putfield instructions when ++ // the put_code written to the constant pool cache is zero. ++ // This is required so that every execution of this instruction ++ // calls out to InterpreterRuntime::resolve_get_put to do ++ // additional, required work. ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ assert(load_bc_into_bc_reg, "we use bc_reg as temp"); ++ __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1); ++ __ addi_d(bc_reg, R0, bc); ++ __ beq(tmp_reg, R0, L_patch_done); ++ } ++ break; ++ default: ++ assert(byte_no == -1, "sanity"); ++ // the pair bytecodes have already done the load. ++ if (load_bc_into_bc_reg) { ++ __ li(bc_reg, bc); ++ } ++ } ++ ++ if (JvmtiExport::can_post_breakpoint()) { ++ Label L_fast_patch; ++ // if a breakpoint is present we can't rewrite the stream directly ++ __ ld_bu(tmp_reg, at_bcp(0)); ++ __ li(AT, Bytecodes::_breakpoint); ++ __ bne(tmp_reg, AT, L_fast_patch); ++ ++ __ get_method(tmp_reg); ++ // Let breakpoint table handling rewrite to quicker bytecode ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg); ++ ++ __ b(L_patch_done); ++ __ bind(L_fast_patch); ++ } ++ ++#ifdef ASSERT ++ Label L_okay; ++ __ ld_bu(tmp_reg, at_bcp(0)); ++ __ li(AT, (int)Bytecodes::java_code(bc)); ++ __ beq(tmp_reg, AT, L_okay); ++ __ beq(tmp_reg, bc_reg, L_patch_done); ++ __ stop("patching the wrong bytecode"); ++ __ bind(L_okay); ++#endif ++ ++ // patch bytecode ++ __ st_b(bc_reg, at_bcp(0)); ++ __ bind(L_patch_done); ++} ++ ++ ++// Individual instructions ++ ++void TemplateTable::nop() { ++ transition(vtos, vtos); ++ // nothing to do ++} ++ ++void TemplateTable::shouldnotreachhere() { ++ transition(vtos, vtos); ++ __ stop("shouldnotreachhere bytecode"); ++} ++ ++void TemplateTable::aconst_null() { ++ transition(vtos, atos); ++ __ move(FSR, R0); ++} ++ ++void TemplateTable::iconst(int value) { ++ transition(vtos, itos); ++ if (value == 0) { ++ __ move(FSR, R0); ++ } else { ++ __ li(FSR, value); ++ } ++} ++ ++void TemplateTable::lconst(int value) { ++ transition(vtos, ltos); ++ if (value == 0) { ++ __ move(FSR, R0); ++ } else { ++ __ li(FSR, value); ++ } ++} ++ ++void TemplateTable::fconst(int value) { ++ transition(vtos, ftos); ++ switch( value ) { ++ case 0: __ movgr2fr_w(FSF, R0); return; ++ case 1: __ addi_d(AT, R0, 1); break; ++ case 2: __ addi_d(AT, R0, 2); break; ++ default: ShouldNotReachHere(); ++ } ++ __ movgr2fr_w(FSF, AT); ++ __ ffint_s_w(FSF, FSF); ++} ++ ++void TemplateTable::dconst(int value) { ++ transition(vtos, dtos); ++ switch( value ) { ++ case 0: __ movgr2fr_d(FSF, R0); ++ return; ++ case 1: __ addi_d(AT, R0, 1); ++ __ movgr2fr_d(FSF, AT); ++ __ ffint_d_w(FSF, FSF); ++ break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::bipush() { ++ transition(vtos, itos); ++ __ ld_b(FSR, at_bcp(1)); ++} ++ ++void TemplateTable::sipush() { ++ transition(vtos, itos); ++ __ ld_b(FSR, BCP, 1); ++ __ ld_bu(AT, BCP, 2); ++ __ slli_d(FSR, FSR, 8); ++ __ orr(FSR, FSR, AT); ++} ++ ++// T1 : tags ++// T2 : index ++// T3 : cpool ++// T8 : tag ++void TemplateTable::ldc(bool wide) { ++ transition(vtos, vtos); ++ Label call_ldc, notFloat, notClass, notInt, Done; ++ // get index in cpool ++ if (wide) { ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); ++ } else { ++ __ ld_bu(T2, at_bcp(1)); ++ } ++ ++ __ get_cpool_and_tags(T3, T1); ++ ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type ++ __ add_d(AT, T1, T2); ++ __ ld_b(T1, AT, tags_offset); ++ if(os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ } ++ //now T1 is the tag ++ ++ // unresolved class - get the resolved class ++ __ addi_d(AT, T1, - JVM_CONSTANT_UnresolvedClass); ++ __ beq(AT, R0, call_ldc); ++ ++ // unresolved class in error (resolution failed) - call into runtime ++ // so that the same error from first resolution attempt is thrown. ++ __ addi_d(AT, T1, -JVM_CONSTANT_UnresolvedClassInError); ++ __ beq(AT, R0, call_ldc); ++ ++ // resolved class - need to call vm to get java mirror of the class ++ __ addi_d(AT, T1, - JVM_CONSTANT_Class); ++ __ slli_d(T2, T2, Address::times_8); ++ __ bne(AT, R0, notClass); ++ ++ __ bind(call_ldc); ++ __ li(A1, wide); ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1); ++ //__ push(atos); ++ __ addi_d(SP, SP, - Interpreter::stackElementSize); ++ __ st_d(FSR, SP, 0); ++ __ b(Done); ++ ++ __ bind(notClass); ++ __ addi_d(AT, T1, -JVM_CONSTANT_Float); ++ __ bne(AT, R0, notFloat); ++ // ftos ++ __ add_d(AT, T3, T2); ++ __ fld_s(FSF, AT, base_offset); ++ //__ push_f(); ++ __ addi_d(SP, SP, - Interpreter::stackElementSize); ++ __ fst_s(FSF, SP, 0); ++ __ b(Done); ++ ++ __ bind(notFloat); ++ __ addi_d(AT, T1, -JVM_CONSTANT_Integer); ++ __ bne(AT, R0, notInt); ++ // itos ++ __ add_d(T0, T3, T2); ++ __ ld_w(FSR, T0, base_offset); ++ __ push(itos); ++ __ b(Done); ++ ++ // assume the tag is for condy; if not, the VM runtime will tell us ++ __ bind(notInt); ++ condy_helper(Done); ++ ++ __ bind(Done); ++} ++ ++void TemplateTable::condy_helper(Label& Done) { ++ const Register obj = FSR; ++ const Register off = SSR; ++ const Register flags = T3; ++ const Register rarg = A1; ++ __ li(rarg, (int)bytecode()); ++ __ call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg); ++ __ get_vm_result_2(flags, TREG); ++ // VMr = obj = base address to find primitive value to push ++ // VMr2 = flags = (tos, off) using format of CPCE::_flags ++ __ li(AT, ConstantPoolCacheEntry::field_index_mask); ++ __ andr(off, flags, AT); ++ __ add_d(obj, off, obj); ++ const Address field(obj, 0 * wordSize); ++ ++ // What sort of thing are we loading? ++ __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ ++ switch (bytecode()) { ++ case Bytecodes::_ldc: ++ case Bytecodes::_ldc_w: ++ { ++ // tos in (itos, ftos, stos, btos, ctos, ztos) ++ Label notInt, notFloat, notShort, notByte, notChar, notBool; ++ __ addi_d(AT, flags, -itos); ++ __ bne(AT, R0, notInt); ++ // itos ++ __ ld_d(obj, field); ++ __ push(itos); ++ __ b(Done); ++ ++ __ bind(notInt); ++ __ addi_d(AT, flags, -ftos); ++ __ bne(AT, R0, notFloat); ++ // ftos ++ __ fld_s(FSF, field); ++ __ push(ftos); ++ __ b(Done); ++ ++ __ bind(notFloat); ++ __ addi_d(AT, flags, -stos); ++ __ bne(AT, R0, notShort); ++ // stos ++ __ ld_h(obj, field); ++ __ push(stos); ++ __ b(Done); ++ ++ __ bind(notShort); ++ __ addi_d(AT, flags, -btos); ++ __ bne(AT, R0, notByte); ++ // btos ++ __ ld_b(obj, field); ++ __ push(btos); ++ __ b(Done); ++ ++ __ bind(notByte); ++ __ addi_d(AT, flags, -ctos); ++ __ bne(AT, R0, notChar); ++ // ctos ++ __ ld_hu(obj, field); ++ __ push(ctos); ++ __ b(Done); ++ ++ __ bind(notChar); ++ __ addi_d(AT, flags, -ztos); ++ __ bne(AT, R0, notBool); ++ // ztos ++ __ ld_bu(obj, field); ++ __ push(ztos); ++ __ b(Done); ++ ++ __ bind(notBool); ++ break; ++ } ++ ++ case Bytecodes::_ldc2_w: ++ { ++ Label notLong, notDouble; ++ __ addi_d(AT, flags, -ltos); ++ __ bne(AT, R0, notLong); ++ // ltos ++ __ ld_d(obj, field); ++ __ push(ltos); ++ __ b(Done); ++ ++ __ bind(notLong); ++ __ addi_d(AT, flags, -dtos); ++ __ bne(AT, R0, notDouble); ++ // dtos ++ __ fld_d(FSF, field); ++ __ push(dtos); ++ __ b(Done); ++ ++ __ bind(notDouble); ++ break; ++ } ++ ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ __ stop("bad ldc/condy"); ++} ++ ++// Fast path for caching oop constants. ++void TemplateTable::fast_aldc(bool wide) { ++ transition(vtos, atos); ++ ++ Register result = FSR; ++ Register tmp = SSR; ++ Register rarg = A1; ++ int index_size = wide ? sizeof(u2) : sizeof(u1); ++ ++ Label resolved; ++ ++ // We are resolved if the resolved reference cache entry contains a ++ // non-null object (String, MethodType, etc.) ++ assert_different_registers(result, tmp); ++ __ get_cache_index_at_bcp(tmp, 1, index_size); ++ __ load_resolved_reference_at_index(result, tmp, T4); ++ __ bne(result, R0, resolved); ++ ++ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); ++ // first time invocation - must resolve first ++ int i = (int)bytecode(); ++ __ li(rarg, i); ++ __ call_VM(result, entry, rarg); ++ ++ __ bind(resolved); ++ ++ { // Check for the null sentinel. ++ // If we just called the VM, it already did the mapping for us, ++ // but it's harmless to retry. ++ Label notNull; ++ __ li(rarg, (long)Universe::the_null_sentinel_addr()); ++ __ ld_ptr(tmp, Address(rarg)); ++ __ bne(tmp, result, notNull); ++ __ xorr(result, result, result); // NULL object reference ++ __ bind(notNull); ++ } ++ ++ if (VerifyOops) { ++ __ verify_oop(result); ++ } ++} ++ ++// used register: T2, T3, T1 ++// T2 : index ++// T3 : cpool ++// T1 : tag ++void TemplateTable::ldc2_w() { ++ transition(vtos, vtos); ++ Label notDouble, notLong, Done; ++ ++ // get index in cpool ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); ++ ++ __ get_cpool_and_tags(T3, T1); ++ ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type in T1 ++ __ add_d(AT, T1, T2); ++ __ ld_b(T1, AT, tags_offset); ++ ++ __ addi_d(AT, T1, -JVM_CONSTANT_Double); ++ __ bne(AT, R0, notDouble); ++ ++ // dtos ++ __ alsl_d(AT, T2, T3, Address::times_8 - 1); ++ __ fld_d(FSF, AT, base_offset); ++ __ push(dtos); ++ __ b(Done); ++ ++ __ bind(notDouble); ++ __ addi_d(AT, T1, -JVM_CONSTANT_Long); ++ __ bne(AT, R0, notLong); ++ ++ // ltos ++ __ slli_d(T2, T2, Address::times_8); ++ __ add_d(AT, T3, T2); ++ __ ld_d(FSR, AT, base_offset); ++ __ push(ltos); ++ __ b(Done); ++ ++ __ bind(notLong); ++ condy_helper(Done); ++ ++ __ bind(Done); ++} ++ ++// we compute the actual local variable address here ++void TemplateTable::locals_index(Register reg, int offset) { ++ __ ld_bu(reg, at_bcp(offset)); ++ __ slli_d(reg, reg, Address::times_8); ++ __ sub_d(reg, LVP, reg); ++} ++ ++void TemplateTable::iload() { ++ iload_internal(); ++} ++ ++void TemplateTable::nofast_iload() { ++ iload_internal(may_not_rewrite); ++} ++ ++// this method will do bytecode folding of the two form: ++// iload iload iload caload ++// used register : T2, T3 ++// T2 : bytecode ++// T3 : folded code ++void TemplateTable::iload_internal(RewriteControl rc) { ++ transition(vtos, itos); ++ if (RewriteFrequentPairs && rc == may_rewrite) { ++ Label rewrite, done; ++ // get the next bytecode in T2 ++ __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); ++ // if _iload, wait to rewrite to iload2. We only want to rewrite the ++ // last two iloads in a pair. Comparing against fast_iload means that ++ // the next bytecode is neither an iload or a caload, and therefore ++ // an iload pair. ++ __ li(AT, Bytecodes::_iload); ++ __ beq(AT, T2, done); ++ ++ __ li(T3, Bytecodes::_fast_iload2); ++ __ li(AT, Bytecodes::_fast_iload); ++ __ beq(AT, T2, rewrite); ++ ++ // if _caload, rewrite to fast_icaload ++ __ li(T3, Bytecodes::_fast_icaload); ++ __ li(AT, Bytecodes::_caload); ++ __ beq(AT, T2, rewrite); ++ ++ // rewrite so iload doesn't check again. ++ __ li(T3, Bytecodes::_fast_iload); ++ ++ // rewrite ++ // T3 : fast bytecode ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_iload, T3, T2, false); ++ __ bind(done); ++ } ++ ++ // Get the local value into tos ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fast_iload2() { ++ transition(vtos, itos); ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++ __ push(itos); ++ locals_index(T2, 3); ++ __ ld_w(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fast_iload() { ++ transition(vtos, itos); ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::lload() { ++ transition(vtos, ltos); ++ locals_index(T2); ++ __ ld_d(FSR, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fload() { ++ transition(vtos, ftos); ++ locals_index(T2); ++ __ fld_s(FSF, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::dload() { ++ transition(vtos, dtos); ++ locals_index(T2); ++ __ fld_d(FSF, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::aload() { ++ transition(vtos, atos); ++ locals_index(T2); ++ __ ld_d(FSR, T2, 0); ++} ++ ++void TemplateTable::locals_index_wide(Register reg) { ++ __ get_unsigned_2_byte_index_at_bcp(reg, 2); ++ __ slli_d(reg, reg, Address::times_8); ++ __ sub_d(reg, LVP, reg); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_iload() { ++ transition(vtos, itos); ++ locals_index_wide(T2); ++ __ ld_d(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_lload() { ++ transition(vtos, ltos); ++ locals_index_wide(T2); ++ __ ld_d(FSR, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_fload() { ++ transition(vtos, ftos); ++ locals_index_wide(T2); ++ __ fld_s(FSF, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_dload() { ++ transition(vtos, dtos); ++ locals_index_wide(T2); ++ __ fld_d(FSF, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_aload() { ++ transition(vtos, atos); ++ locals_index_wide(T2); ++ __ ld_d(FSR, T2, 0); ++} ++ ++// we use A2 as the regiser for index, BE CAREFUL! ++// we dont use our tge 29 now, for later optimization ++void TemplateTable::index_check(Register array, Register index) { ++ // Pop ptr into array ++ __ pop_ptr(array); ++ index_check_without_pop(array, index); ++} ++ ++void TemplateTable::index_check_without_pop(Register array, Register index) { ++ // destroys A2 ++ // check array ++ __ null_check(array, arrayOopDesc::length_offset_in_bytes()); ++ ++ // sign extend since tos (index) might contain garbage in upper bits ++ __ slli_w(index, index, 0); ++ ++ // check index ++ Label ok; ++ __ ld_w(AT, array, arrayOopDesc::length_offset_in_bytes()); ++ __ bltu(index, AT, ok); ++ ++ //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2 ++ if (A1 != array) __ move(A1, array); ++ if (A2 != index) __ move(A2, index); ++ __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); ++ __ bind(ok); ++} ++ ++void TemplateTable::iaload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, 1); ++ __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg); ++} ++ ++void TemplateTable::laload() { ++ transition(itos, ltos); ++ index_check(SSR, FSR); ++ __ alsl_d(T4, FSR, SSR, Address::times_8 - 1); ++ __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T4, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg); ++} ++ ++void TemplateTable::faload() { ++ transition(itos, ftos); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, Address::times_4 - 1); ++ __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg); ++} ++ ++void TemplateTable::daload() { ++ transition(itos, dtos); ++ index_check(SSR, FSR); ++ __ alsl_d(T4, FSR, SSR, 2); ++ __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T4, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg); ++} ++ ++void TemplateTable::aaload() { ++ transition(itos, atos); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, (UseCompressedOops ? Address::times_4 : Address::times_8) - 1); ++ //add for compressedoops ++ do_oop_load(_masm, ++ Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), ++ FSR, ++ IS_ARRAY); ++} ++ ++void TemplateTable::baload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ add_d(FSR, SSR, FSR); ++ __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg); ++} ++ ++void TemplateTable::caload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); ++} ++ ++// iload followed by caload frequent pair ++// used register : T2 ++// T2 : index ++void TemplateTable::fast_icaload() { ++ transition(vtos, itos); ++ // load index out of locals ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, 0); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); ++} ++ ++void TemplateTable::saload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1); ++ __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg); ++} ++ ++void TemplateTable::iload(int n) { ++ transition(vtos, itos); ++ __ ld_w(FSR, iaddress(n)); ++} ++ ++void TemplateTable::lload(int n) { ++ transition(vtos, ltos); ++ __ ld_d(FSR, laddress(n)); ++} ++ ++void TemplateTable::fload(int n) { ++ transition(vtos, ftos); ++ __ fld_s(FSF, faddress(n)); ++} ++ ++void TemplateTable::dload(int n) { ++ transition(vtos, dtos); ++ __ fld_d(FSF, laddress(n)); ++} ++ ++void TemplateTable::aload(int n) { ++ transition(vtos, atos); ++ __ ld_d(FSR, aaddress(n)); ++} ++ ++void TemplateTable::aload_0() { ++ aload_0_internal(); ++} ++ ++void TemplateTable::nofast_aload_0() { ++ aload_0_internal(may_not_rewrite); ++} ++ ++// used register : T2, T3 ++// T2 : bytecode ++// T3 : folded code ++void TemplateTable::aload_0_internal(RewriteControl rc) { ++ transition(vtos, atos); ++ // According to bytecode histograms, the pairs: ++ // ++ // _aload_0, _fast_igetfield ++ // _aload_0, _fast_agetfield ++ // _aload_0, _fast_fgetfield ++ // ++ // occur frequently. If RewriteFrequentPairs is set, the (slow) ++ // _aload_0 bytecode checks if the next bytecode is either ++ // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then ++ // rewrites the current bytecode into a pair bytecode; otherwise it ++ // rewrites the current bytecode into _fast_aload_0 that doesn't do ++ // the pair check anymore. ++ // ++ // Note: If the next bytecode is _getfield, the rewrite must be ++ // delayed, otherwise we may miss an opportunity for a pair. ++ // ++ // Also rewrite frequent pairs ++ // aload_0, aload_1 ++ // aload_0, iload_1 ++ // These bytecodes with a small amount of code are most profitable ++ // to rewrite ++ if (RewriteFrequentPairs && rc == may_rewrite) { ++ Label rewrite, done; ++ // get the next bytecode in T2 ++ __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); ++ ++ // do actual aload_0 ++ aload(0); ++ ++ // if _getfield then wait with rewrite ++ __ li(AT, Bytecodes::_getfield); ++ __ beq(AT, T2, done); ++ ++ // if _igetfield then reqrite to _fast_iaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ li(T3, Bytecodes::_fast_iaccess_0); ++ __ li(AT, Bytecodes::_fast_igetfield); ++ __ beq(AT, T2, rewrite); ++ ++ // if _agetfield then reqrite to _fast_aaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ li(T3, Bytecodes::_fast_aaccess_0); ++ __ li(AT, Bytecodes::_fast_agetfield); ++ __ beq(AT, T2, rewrite); ++ ++ // if _fgetfield then reqrite to _fast_faccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ li(T3, Bytecodes::_fast_faccess_0); ++ __ li(AT, Bytecodes::_fast_fgetfield); ++ __ beq(AT, T2, rewrite); ++ ++ // else rewrite to _fast_aload0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ li(T3, Bytecodes::_fast_aload_0); ++ ++ // rewrite ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_aload_0, T3, T2, false); ++ ++ __ bind(done); ++ } else { ++ aload(0); ++ } ++} ++ ++void TemplateTable::istore() { ++ transition(itos, vtos); ++ locals_index(T2); ++ __ st_w(FSR, T2, 0); ++} ++ ++void TemplateTable::lstore() { ++ transition(ltos, vtos); ++ locals_index(T2); ++ __ st_d(FSR, T2, -wordSize); ++} ++ ++void TemplateTable::fstore() { ++ transition(ftos, vtos); ++ locals_index(T2); ++ __ fst_s(FSF, T2, 0); ++} ++ ++void TemplateTable::dstore() { ++ transition(dtos, vtos); ++ locals_index(T2); ++ __ fst_d(FSF, T2, -wordSize); ++} ++ ++void TemplateTable::astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ locals_index(T2); ++ __ st_d(FSR, T2, 0); ++} ++ ++void TemplateTable::wide_istore() { ++ transition(vtos, vtos); ++ __ pop_i(FSR); ++ locals_index_wide(T2); ++ __ st_d(FSR, T2, 0); ++} ++ ++void TemplateTable::wide_lstore() { ++ transition(vtos, vtos); ++ __ pop_l(FSR); ++ locals_index_wide(T2); ++ __ st_d(FSR, T2, -wordSize); ++} ++ ++void TemplateTable::wide_fstore() { ++ wide_istore(); ++} ++ ++void TemplateTable::wide_dstore() { ++ wide_lstore(); ++} ++ ++void TemplateTable::wide_astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ locals_index_wide(T2); ++ __ st_d(FSR, T2, 0); ++} ++ ++// used register : T2 ++void TemplateTable::iastore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); // T2: array SSR: index ++ index_check(T2, SSR); // prefer index in SSR ++ __ alsl_d(T2, SSR, T2, Address::times_4 - 1); ++ __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg); ++} ++ ++// used register T2, T3 ++void TemplateTable::lastore() { ++ transition(ltos, vtos); ++ __ pop_i (T2); ++ index_check(T3, T2); ++ __ alsl_d(T3, T2, T3, Address::times_8 - 1); ++ __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg); ++} ++ ++// used register T2 ++void TemplateTable::fastore() { ++ transition(ftos, vtos); ++ __ pop_i(SSR); ++ index_check(T2, SSR); ++ __ alsl_d(T2, SSR, T2, Address::times_4 - 1); ++ __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg); ++} ++ ++// used register T2, T3 ++void TemplateTable::dastore() { ++ transition(dtos, vtos); ++ __ pop_i (T2); ++ index_check(T3, T2); ++ __ alsl_d(T3, T2, T3, Address::times_8 - 1); ++ __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg); ++} ++ ++// used register : T2, T3, T8 ++// T2 : array ++// T3 : subklass ++// T8 : supklass ++void TemplateTable::aastore() { ++ Label is_null, ok_is_subtype, done; ++ transition(vtos, vtos); ++ // stack: ..., array, index, value ++ __ ld_d(FSR, at_tos()); // Value ++ __ ld_w(SSR, at_tos_p1()); // Index ++ __ ld_d(T2, at_tos_p2()); // Array ++ ++ // index_check(T2, SSR); ++ index_check_without_pop(T2, SSR); ++ // do array store check - check for NULL value first ++ __ beq(FSR, R0, is_null); ++ ++ // Move subklass into T3 ++ //add for compressedoops ++ __ load_klass(T3, FSR); ++ // Move superklass into T8 ++ //add for compressedoops ++ __ load_klass(T8, T2); ++ __ ld_d(T8, Address(T8, ObjArrayKlass::element_klass_offset())); ++ // Compress array+index*4+12 into a single register. T2 ++ __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1); ++ __ addi_d(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ ++ // Generate subtype check. ++ // Superklass in T8. Subklass in T3. ++ __ gen_subtype_check(T8, T3, ok_is_subtype); ++ // Come here on failure ++ // object is at FSR ++ __ jmp(Interpreter::_throw_ArrayStoreException_entry); ++ // Come here on success ++ __ bind(ok_is_subtype); ++ do_oop_store(_masm, Address(T2, 0), FSR, IS_ARRAY); ++ __ b(done); ++ ++ // Have a NULL in FSR, T2=array, SSR=index. Store NULL at ary[idx] ++ __ bind(is_null); ++ __ profile_null_seen(T4); ++ __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1); ++ do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, IS_ARRAY); ++ ++ __ bind(done); ++ __ addi_d(SP, SP, 3 * Interpreter::stackElementSize); ++} ++ ++void TemplateTable::bastore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); ++ index_check(T2, SSR); ++ ++ // Need to check whether array is boolean or byte ++ // since both types share the bastore bytecode. ++ __ load_klass(T4, T2); ++ __ ld_w(T4, T4, in_bytes(Klass::layout_helper_offset())); ++ ++ int diffbit = Klass::layout_helper_boolean_diffbit(); ++ __ li(AT, diffbit); ++ ++ Label L_skip; ++ __ andr(AT, T4, AT); ++ __ beq(AT, R0, L_skip); ++ __ andi(FSR, FSR, 0x1); ++ __ bind(L_skip); ++ ++ __ add_d(SSR, T2, SSR); ++ __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg); ++} ++ ++void TemplateTable::castore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); ++ index_check(T2, SSR); ++ __ alsl_d(SSR, SSR, T2, Address::times_2 - 1); ++ __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg); ++} ++ ++void TemplateTable::sastore() { ++ castore(); ++} ++ ++void TemplateTable::istore(int n) { ++ transition(itos, vtos); ++ __ st_w(FSR, iaddress(n)); ++} ++ ++void TemplateTable::lstore(int n) { ++ transition(ltos, vtos); ++ __ st_d(FSR, laddress(n)); ++} ++ ++void TemplateTable::fstore(int n) { ++ transition(ftos, vtos); ++ __ fst_s(FSF, faddress(n)); ++} ++ ++void TemplateTable::dstore(int n) { ++ transition(dtos, vtos); ++ __ fst_d(FSF, laddress(n)); ++} ++ ++void TemplateTable::astore(int n) { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ __ st_d(FSR, aaddress(n)); ++} ++ ++void TemplateTable::pop() { ++ transition(vtos, vtos); ++ __ addi_d(SP, SP, Interpreter::stackElementSize); ++} ++ ++void TemplateTable::pop2() { ++ transition(vtos, vtos); ++ __ addi_d(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void TemplateTable::dup() { ++ transition(vtos, vtos); ++ // stack: ..., a ++ __ load_ptr(0, FSR); ++ __ push_ptr(FSR); ++ // stack: ..., a, a ++} ++ ++// blows FSR ++void TemplateTable::dup_x1() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ load_ptr(0, FSR); // load b ++ __ load_ptr(1, A5); // load a ++ __ store_ptr(1, FSR); // store b ++ __ store_ptr(0, A5); // store a ++ __ push_ptr(FSR); // push b ++ // stack: ..., b, a, b ++} ++ ++// blows FSR ++void TemplateTable::dup_x2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ load_ptr(0, FSR); // load c ++ __ load_ptr(2, A5); // load a ++ __ store_ptr(2, FSR); // store c in a ++ __ push_ptr(FSR); // push c ++ // stack: ..., c, b, c, c ++ __ load_ptr(2, FSR); // load b ++ __ store_ptr(2, A5); // store a in b ++ // stack: ..., c, a, c, c ++ __ store_ptr(1, FSR); // store b in c ++ // stack: ..., c, a, b, c ++} ++ ++// blows FSR ++void TemplateTable::dup2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ load_ptr(1, FSR); // load a ++ __ push_ptr(FSR); // push a ++ __ load_ptr(1, FSR); // load b ++ __ push_ptr(FSR); // push b ++ // stack: ..., a, b, a, b ++} ++ ++// blows FSR ++void TemplateTable::dup2_x1() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ load_ptr(0, T2); // load c ++ __ load_ptr(1, FSR); // load b ++ __ push_ptr(FSR); // push b ++ __ push_ptr(T2); // push c ++ // stack: ..., a, b, c, b, c ++ __ store_ptr(3, T2); // store c in b ++ // stack: ..., a, c, c, b, c ++ __ load_ptr(4, T2); // load a ++ __ store_ptr(2, T2); // store a in 2nd c ++ // stack: ..., a, c, a, b, c ++ __ store_ptr(4, FSR); // store b in a ++ // stack: ..., b, c, a, b, c ++ ++ // stack: ..., b, c, a, b, c ++} ++ ++// blows FSR, SSR ++void TemplateTable::dup2_x2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c, d ++ // stack: ..., a, b, c, d ++ __ load_ptr(0, T2); // load d ++ __ load_ptr(1, FSR); // load c ++ __ push_ptr(FSR); // push c ++ __ push_ptr(T2); // push d ++ // stack: ..., a, b, c, d, c, d ++ __ load_ptr(4, FSR); // load b ++ __ store_ptr(2, FSR); // store b in d ++ __ store_ptr(4, T2); // store d in b ++ // stack: ..., a, d, c, b, c, d ++ __ load_ptr(5, T2); // load a ++ __ load_ptr(3, FSR); // load c ++ __ store_ptr(3, T2); // store a in c ++ __ store_ptr(5, FSR); // store c in a ++ // stack: ..., c, d, a, b, c, d ++ ++ // stack: ..., c, d, a, b, c, d ++} ++ ++// blows FSR ++void TemplateTable::swap() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ ++ __ load_ptr(1, A5); // load a ++ __ load_ptr(0, FSR); // load b ++ __ store_ptr(0, A5); // store a in b ++ __ store_ptr(1, FSR); // store b in a ++ ++ // stack: ..., b, a ++} ++ ++void TemplateTable::iop2(Operation op) { ++ transition(itos, itos); ++ ++ __ pop_i(SSR); ++ switch (op) { ++ case add : __ add_w(FSR, SSR, FSR); break; ++ case sub : __ sub_w(FSR, SSR, FSR); break; ++ case mul : __ mul_w(FSR, SSR, FSR); break; ++ case _and : __ andr(FSR, SSR, FSR); break; ++ case _or : __ orr(FSR, SSR, FSR); break; ++ case _xor : __ xorr(FSR, SSR, FSR); break; ++ case shl : __ sll_w(FSR, SSR, FSR); break; ++ case shr : __ sra_w(FSR, SSR, FSR); break; ++ case ushr : __ srl_w(FSR, SSR, FSR); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++// the result stored in FSR, SSR, ++// used registers : T2, T3 ++void TemplateTable::lop2(Operation op) { ++ transition(ltos, ltos); ++ __ pop_l(T2); ++ ++ switch (op) { ++ case add : __ add_d(FSR, T2, FSR); break; ++ case sub : __ sub_d(FSR, T2, FSR); break; ++ case _and: __ andr(FSR, T2, FSR); break; ++ case _or : __ orr(FSR, T2, FSR); break; ++ case _xor: __ xorr(FSR, T2, FSR); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception, ++// the result is 0x80000000 ++// the godson2 cpu do the same, so we need not handle this specially like x86 ++void TemplateTable::idiv() { ++ transition(itos, itos); ++ Label not_zero; ++ ++ __ bne(FSR, R0, not_zero); ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ bind(not_zero); ++ ++ __ pop_i(SSR); ++ __ div_w(FSR, SSR, FSR); ++} ++ ++void TemplateTable::irem() { ++ transition(itos, itos); ++ Label not_zero; ++ __ pop_i(SSR); ++ ++ __ bne(FSR, R0, not_zero); ++ //__ brk(7); ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ ++ __ bind(not_zero); ++ __ mod_w(FSR, SSR, FSR); ++} ++ ++void TemplateTable::lmul() { ++ transition(ltos, ltos); ++ __ pop_l(T2); ++ __ mul_d(FSR, T2, FSR); ++} ++ ++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry ++void TemplateTable::ldiv() { ++ transition(ltos, ltos); ++ Label normal; ++ ++ __ bne(FSR, R0, normal); ++ ++ //__ brk(7); //generate FPE ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ ++ __ bind(normal); ++ __ pop_l(A2); ++ __ div_d(FSR, A2, FSR); ++} ++ ++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry ++void TemplateTable::lrem() { ++ transition(ltos, ltos); ++ Label normal; ++ ++ __ bne(FSR, R0, normal); ++ ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ ++ __ bind(normal); ++ __ pop_l (A2); ++ ++ __ mod_d(FSR, A2, FSR); ++} ++ ++// result in FSR ++// used registers : T0 ++void TemplateTable::lshl() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ sll_d(FSR, T0, FSR); ++} ++ ++// used registers : T0 ++void TemplateTable::lshr() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ sra_d(FSR, T0, FSR); ++} ++ ++// used registers : T0 ++void TemplateTable::lushr() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ srl_d(FSR, T0, FSR); ++} ++ ++// result in FSF ++void TemplateTable::fop2(Operation op) { ++ transition(ftos, ftos); ++ switch (op) { ++ case add: ++ __ fld_s(fscratch, at_sp()); ++ __ fadd_s(FSF, fscratch, FSF); ++ break; ++ case sub: ++ __ fld_s(fscratch, at_sp()); ++ __ fsub_s(FSF, fscratch, FSF); ++ break; ++ case mul: ++ __ fld_s(fscratch, at_sp()); ++ __ fmul_s(FSF, fscratch, FSF); ++ break; ++ case div: ++ __ fld_s(fscratch, at_sp()); ++ __ fdiv_s(FSF, fscratch, FSF); ++ break; ++ case rem: ++ __ fmov_s(FA1, FSF); ++ __ fld_s(FA0, at_sp()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); ++ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ __ addi_d(SP, SP, 1 * wordSize); ++} ++ ++// result in SSF||FSF ++// i dont handle the strict flags ++void TemplateTable::dop2(Operation op) { ++ transition(dtos, dtos); ++ switch (op) { ++ case add: ++ __ fld_d(fscratch, at_sp()); ++ __ fadd_d(FSF, fscratch, FSF); ++ break; ++ case sub: ++ __ fld_d(fscratch, at_sp()); ++ __ fsub_d(FSF, fscratch, FSF); ++ break; ++ case mul: ++ __ fld_d(fscratch, at_sp()); ++ __ fmul_d(FSF, fscratch, FSF); ++ break; ++ case div: ++ __ fld_d(fscratch, at_sp()); ++ __ fdiv_d(FSF, fscratch, FSF); ++ break; ++ case rem: ++ __ fmov_d(FA1, FSF); ++ __ fld_d(FA0, at_sp()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); ++ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ __ addi_d(SP, SP, 2 * wordSize); ++} ++ ++void TemplateTable::ineg() { ++ transition(itos, itos); ++ __ sub_w(FSR, R0, FSR); ++} ++ ++void TemplateTable::lneg() { ++ transition(ltos, ltos); ++ __ sub_d(FSR, R0, FSR); ++} ++ ++void TemplateTable::fneg() { ++ transition(ftos, ftos); ++ __ fneg_s(FSF, FSF); ++} ++ ++void TemplateTable::dneg() { ++ transition(dtos, dtos); ++ __ fneg_d(FSF, FSF); ++} ++ ++// used registers : T2 ++void TemplateTable::iinc() { ++ transition(vtos, vtos); ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++ __ ld_b(AT, at_bcp(2)); // get constant ++ __ add_d(FSR, FSR, AT); ++ __ st_w(FSR, T2, 0); ++} ++ ++// used register : T2 ++void TemplateTable::wide_iinc() { ++ transition(vtos, vtos); ++ locals_index_wide(T2); ++ __ get_2_byte_integer_at_bcp(FSR, AT, 4); ++ __ hswap(FSR); ++ __ ld_w(AT, T2, 0); ++ __ add_d(FSR, AT, FSR); ++ __ st_w(FSR, T2, 0); ++} ++ ++void TemplateTable::convert() { ++ // Checking ++#ifdef ASSERT ++ { ++ TosState tos_in = ilgl; ++ TosState tos_out = ilgl; ++ switch (bytecode()) { ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_in = itos; break; ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_l2d: tos_in = ltos; break; ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_f2d: tos_in = ftos; break; ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_d2l: // fall through ++ case Bytecodes::_d2f: tos_in = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ switch (bytecode()) { ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_out = itos; break; ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_d2l: tos_out = ltos; break; ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_d2f: tos_out = ftos; break; ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_l2d: // fall through ++ case Bytecodes::_f2d: tos_out = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ transition(tos_in, tos_out); ++ } ++#endif // ASSERT ++ // Conversion ++ switch (bytecode()) { ++ case Bytecodes::_i2l: ++ __ slli_w(FSR, FSR, 0); ++ break; ++ case Bytecodes::_i2f: ++ __ movgr2fr_w(FSF, FSR); ++ __ ffint_s_w(FSF, FSF); ++ break; ++ case Bytecodes::_i2d: ++ __ movgr2fr_w(FSF, FSR); ++ __ ffint_d_w(FSF, FSF); ++ break; ++ case Bytecodes::_i2b: ++ __ ext_w_b(FSR, FSR); ++ break; ++ case Bytecodes::_i2c: ++ __ bstrpick_d(FSR, FSR, 15, 0); // truncate upper 56 bits ++ break; ++ case Bytecodes::_i2s: ++ __ ext_w_h(FSR, FSR); ++ break; ++ case Bytecodes::_l2i: ++ __ slli_w(FSR, FSR, 0); ++ break; ++ case Bytecodes::_l2f: ++ __ movgr2fr_d(FSF, FSR); ++ __ ffint_s_l(FSF, FSF); ++ break; ++ case Bytecodes::_l2d: ++ __ movgr2fr_d(FSF, FSR); ++ __ ffint_d_l(FSF, FSF); ++ break; ++ case Bytecodes::_f2i: ++ __ ftintrz_w_s(fscratch, FSF); ++ __ movfr2gr_s(FSR, fscratch); ++ break; ++ case Bytecodes::_f2l: ++ __ ftintrz_l_s(fscratch, FSF); ++ __ movfr2gr_d(FSR, fscratch); ++ break; ++ case Bytecodes::_f2d: ++ __ fcvt_d_s(FSF, FSF); ++ break; ++ case Bytecodes::_d2i: ++ __ ftintrz_w_d(fscratch, FSF); ++ __ movfr2gr_s(FSR, fscratch); ++ break; ++ case Bytecodes::_d2l: ++ __ ftintrz_l_d(fscratch, FSF); ++ __ movfr2gr_d(FSR, fscratch); ++ break; ++ case Bytecodes::_d2f: ++ __ fcvt_s_d(FSF, FSF); ++ break; ++ default : ++ ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::lcmp() { ++ transition(ltos, itos); ++ ++ __ pop(T0); ++ __ pop(R0); ++ ++ __ slt(AT, T0, FSR); ++ __ slt(FSR, FSR, T0); ++ __ sub_d(FSR, FSR, AT); ++} ++ ++void TemplateTable::float_cmp(bool is_float, int unordered_result) { ++ if (is_float) { ++ __ fld_s(fscratch, at_sp()); ++ __ addi_d(SP, SP, 1 * wordSize); ++ ++ if (unordered_result < 0) { ++ __ fcmp_clt_s(FCC0, FSF, fscratch); ++ __ fcmp_cult_s(FCC1, fscratch, FSF); ++ } else { ++ __ fcmp_cult_s(FCC0, FSF, fscratch); ++ __ fcmp_clt_s(FCC1, fscratch, FSF); ++ } ++ } else { ++ __ fld_d(fscratch, at_sp()); ++ __ addi_d(SP, SP, 2 * wordSize); ++ ++ if (unordered_result < 0) { ++ __ fcmp_clt_d(FCC0, FSF, fscratch); ++ __ fcmp_cult_d(FCC1, fscratch, FSF); ++ } else { ++ __ fcmp_cult_d(FCC0, FSF, fscratch); ++ __ fcmp_clt_d(FCC1, fscratch, FSF); ++ } ++ } ++ ++ __ movcf2gr(FSR, FCC0); ++ __ movcf2gr(AT, FCC1); ++ __ sub_d(FSR, FSR, AT); ++} ++ ++// used registers : T3, A7, Rnext ++// FSR : return bci, this is defined by the vm specification ++// T2 : MDO taken count ++// T3 : method ++// A7 : offset ++// Rnext : next bytecode, this is required by dispatch_base ++void TemplateTable::branch(bool is_jsr, bool is_wide) { ++ __ get_method(T3); ++ __ profile_taken_branch(A7, T2); // only C2 meaningful ++ ++ const ByteSize be_offset = MethodCounters::backedge_counter_offset() + ++ InvocationCounter::counter_offset(); ++ const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset(); ++ ++ // Load up T4 with the branch displacement ++ if (!is_wide) { ++ __ ld_b(A7, BCP, 1); ++ __ ld_bu(AT, BCP, 2); ++ __ slli_d(A7, A7, 8); ++ __ orr(A7, A7, AT); ++ } else { ++ __ get_4_byte_integer_at_bcp(A7, 1); ++ __ swap(A7); ++ } ++ ++ // Handle all the JSR stuff here, then exit. ++ // It's much shorter and cleaner than intermingling with the non-JSR ++ // normal-branch stuff occuring below. ++ if (is_jsr) { ++ // Pre-load the next target bytecode into Rnext ++ __ ldx_bu(Rnext, BCP, A7); ++ ++ // compute return address as bci in FSR ++ __ addi_d(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset())); ++ __ ld_d(AT, T3, in_bytes(Method::const_offset())); ++ __ sub_d(FSR, FSR, AT); ++ // Adjust the bcp in BCP by the displacement in A7 ++ __ add_d(BCP, BCP, A7); ++ // jsr returns atos that is not an oop ++ // Push return address ++ __ push_i(FSR); ++ // jsr returns vtos ++ __ dispatch_only_noverify(vtos); ++ ++ return; ++ } ++ ++ // Normal (non-jsr) branch handling ++ ++ // Adjust the bcp in S0 by the displacement in T4 ++ __ add_d(BCP, BCP, A7); ++ ++ assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters"); ++ Label backedge_counter_overflow; ++ Label profile_method; ++ Label dispatch; ++ if (UseLoopCounter) { ++ // increment backedge counter for backward branches ++ // T3: method ++ // T4: target offset ++ // BCP: target bcp ++ // LVP: locals pointer ++ __ blt(R0, A7, dispatch); // check if forward or backward branch ++ ++ // check if MethodCounters exists ++ Label has_counters; ++ __ ld_d(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP ++ __ bne(AT, R0, has_counters); ++ __ push2(T3, A7); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), ++ T3); ++ __ pop2(T3, A7); ++ __ ld_d(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP ++ __ beq(AT, R0, dispatch); ++ __ bind(has_counters); ++ ++ if (TieredCompilation) { ++ Label no_mdo; ++ int increment = InvocationCounter::count_increment; ++ int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld_d(T0, Address(T3, in_bytes(Method::method_data_offset()))); ++ __ beq(T0, R0, no_mdo); ++ // Increment the MDO backedge counter ++ const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, ++ T1, false, Assembler::zero, &backedge_counter_overflow); ++ __ beq(R0, R0, dispatch); ++ } ++ __ bind(no_mdo); ++ // Increment backedge counter in MethodCounters* ++ __ ld_d(T0, Address(T3, Method::method_counters_offset())); ++ __ increment_mask_and_jump(Address(T0, be_offset), increment, mask, ++ T1, false, Assembler::zero, &backedge_counter_overflow); ++ if (!UseOnStackReplacement) { ++ __ bind(backedge_counter_overflow); ++ } ++ } else { ++ // increment back edge counter ++ __ ld_d(T1, T3, in_bytes(Method::method_counters_offset())); ++ __ ld_w(T0, T1, in_bytes(be_offset)); ++ __ increment(T0, InvocationCounter::count_increment); ++ __ st_w(T0, T1, in_bytes(be_offset)); ++ ++ // load invocation counter ++ __ ld_w(T1, T1, in_bytes(inv_offset)); ++ // buffer bit added, mask no needed ++ ++ // dadd backedge counter & invocation counter ++ __ add_d(T1, T1, T0); ++ ++ if (ProfileInterpreter) { ++ // Test to see if we should create a method data oop ++ // T1 : backedge counter & invocation counter ++ if (Assembler::is_simm(InvocationCounter::InterpreterProfileLimit, 12)) { ++ __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit); ++ __ bne(AT, R0, dispatch); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); ++ __ ld_w(AT, AT, 0); ++ __ blt(T1, AT, dispatch); ++ } ++ ++ // if no method data exists, go to profile method ++ __ test_method_data_pointer(T1, profile_method); ++ ++ if (UseOnStackReplacement) { ++ if (Assembler::is_simm(InvocationCounter::InterpreterBackwardBranchLimit, 12)) { ++ __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit); ++ __ bne(AT, R0, dispatch); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); ++ __ ld_w(AT, AT, 0); ++ __ blt(T2, AT, dispatch); ++ } ++ ++ // When ProfileInterpreter is on, the backedge_count comes ++ // from the methodDataOop, which value does not get reset on ++ // the call to frequency_counter_overflow(). ++ // To avoid excessive calls to the overflow routine while ++ // the method is being compiled, dadd a second test to make ++ // sure the overflow function is called only once every ++ // overflow_frequency. ++ const int overflow_frequency = 1024; ++ __ andi(AT, T2, overflow_frequency-1); ++ __ beq(AT, R0, backedge_counter_overflow); ++ } ++ } else { ++ if (UseOnStackReplacement) { ++ // check for overflow against AT, which is the sum of the counters ++ __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); ++ __ ld_w(AT, AT, 0); ++ __ bge(T1, AT, backedge_counter_overflow); ++ } ++ } ++ } ++ __ bind(dispatch); ++ } ++ ++ // Pre-load the next target bytecode into Rnext ++ __ ld_bu(Rnext, BCP, 0); ++ ++ // continue with the bytecode @ target ++ // FSR: return bci for jsr's, unused otherwise ++ // Rnext: target bytecode ++ // BCP: target bcp ++ __ dispatch_only(vtos, true); ++ ++ if (UseLoopCounter) { ++ if (ProfileInterpreter) { ++ // Out-of-line code to allocate method data oop. ++ __ bind(profile_method); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); ++ __ set_method_data_pointer_for_bcp(); ++ __ b(dispatch); ++ } ++ ++ if (UseOnStackReplacement) { ++ // invocation counter overflow ++ __ bind(backedge_counter_overflow); ++ __ sub_d(A7, BCP, A7); // branch bcp ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), A7); ++ ++ // V0: osr nmethod (osr ok) or NULL (osr not possible) ++ // V1: osr adapter frame return address ++ // LVP: locals pointer ++ // BCP: bcp ++ __ beq(V0, R0, dispatch); ++ // nmethod may have been invalidated (VM may block upon call_VM return) ++ __ ld_b(T3, V0, nmethod::state_offset()); ++ __ li(AT, nmethod::in_use); ++ __ bne(AT, T3, dispatch); ++ ++ // We have the address of an on stack replacement routine in rax. ++ // In preparation of invoking it, first we must migrate the locals ++ // and monitors from off the interpreter frame on the stack. ++ // Ensure to save the osr nmethod over the migration call, ++ // it will be preserved in Rnext. ++ __ move(Rnext, V0); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); ++ ++ // V0 is OSR buffer, move it to expected parameter location ++ // refer to osrBufferPointer in c1_LIRAssembler_loongarch.cpp ++ __ move(T0, V0); ++ ++ // pop the interpreter frame ++ __ ld_d(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); ++ // remove frame anchor ++ __ leave(); ++ __ move(LVP, RA); ++ __ move(SP, A7); ++ ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP , SP , AT); ++ ++ // push the (possibly adjusted) return address ++ // refer to osr_entry in c1_LIRAssembler_loongarch.cpp ++ __ ld_d(AT, Rnext, nmethod::osr_entry_point_offset()); ++ __ jr(AT); ++ } ++ } ++} ++ ++void TemplateTable::if_0cmp(Condition cc) { ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ switch(cc) { ++ case not_equal: ++ __ beq(FSR, R0, not_taken); ++ break; ++ case equal: ++ __ bne(FSR, R0, not_taken); ++ break; ++ case less: ++ __ bge(FSR, R0, not_taken); ++ break; ++ case less_equal: ++ __ blt(R0, FSR, not_taken); ++ break; ++ case greater: ++ __ bge(R0, FSR, not_taken); ++ break; ++ case greater_equal: ++ __ blt(FSR, R0, not_taken); ++ break; ++ } ++ ++ branch(false, false); ++ ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++void TemplateTable::if_icmp(Condition cc) { ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ ++ __ pop_i(SSR); ++ switch(cc) { ++ case not_equal: ++ __ beq(SSR, FSR, not_taken); ++ break; ++ case equal: ++ __ bne(SSR, FSR, not_taken); ++ break; ++ case less: ++ __ bge(SSR, FSR, not_taken); ++ break; ++ case less_equal: ++ __ blt(FSR, SSR, not_taken); ++ break; ++ case greater: ++ __ bge(FSR, SSR, not_taken); ++ break; ++ case greater_equal: ++ __ blt(SSR, FSR, not_taken); ++ break; ++ } ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++void TemplateTable::if_nullcmp(Condition cc) { ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ switch(cc) { ++ case not_equal: ++ __ beq(FSR, R0, not_taken); ++ break; ++ case equal: ++ __ bne(FSR, R0, not_taken); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++ ++void TemplateTable::if_acmp(Condition cc) { ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ // __ ld_w(SSR, SP, 0); ++ __ pop_ptr(SSR); ++ switch(cc) { ++ case not_equal: ++ __ beq(SSR, FSR, not_taken); ++ break; ++ case equal: ++ __ bne(SSR, FSR, not_taken); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ branch(false, false); ++ ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : method ++// T2 : returb bci ++void TemplateTable::ret() { ++ transition(vtos, vtos); ++ ++ locals_index(T2); ++ __ ld_d(T2, T2, 0); ++ __ profile_ret(T2, T3); ++ ++ __ get_method(T1); ++ __ ld_d(BCP, T1, in_bytes(Method::const_offset())); ++ __ add_d(BCP, BCP, T2); ++ __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); ++ ++ __ dispatch_next(vtos, 0, true); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : method ++// T2 : returb bci ++void TemplateTable::wide_ret() { ++ transition(vtos, vtos); ++ ++ locals_index_wide(T2); ++ __ ld_d(T2, T2, 0); // get return bci, compute return bcp ++ __ profile_ret(T2, T3); ++ ++ __ get_method(T1); ++ __ ld_d(BCP, T1, in_bytes(Method::const_offset())); ++ __ add_d(BCP, BCP, T2); ++ __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); ++ ++ __ dispatch_next(vtos, 0, true); ++} ++ ++// used register T2, T3, A7, Rnext ++// T2 : bytecode pointer ++// T3 : low ++// A7 : high ++// Rnext : dest bytecode, required by dispatch_base ++void TemplateTable::tableswitch() { ++ Label default_case, continue_execution; ++ transition(itos, vtos); ++ ++ // align BCP ++ __ addi_d(T2, BCP, BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(T2, T2, AT); ++ ++ // load lo & hi ++ __ ld_w(T3, T2, 1 * BytesPerInt); ++ __ swap(T3); ++ __ ld_w(A7, T2, 2 * BytesPerInt); ++ __ swap(A7); ++ ++ // check against lo & hi ++ __ blt(FSR, T3, default_case); ++ __ blt(A7, FSR, default_case); ++ ++ // lookup dispatch offset, in A7 big endian ++ __ sub_d(FSR, FSR, T3); ++ __ alsl_d(AT, FSR, T2, Address::times_4 - 1); ++ __ ld_w(A7, AT, 3 * BytesPerInt); ++ __ profile_switch_case(FSR, T4, T3); ++ ++ __ bind(continue_execution); ++ __ swap(A7); ++ __ add_d(BCP, BCP, A7); ++ __ ld_bu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++ ++ // handle default ++ __ bind(default_case); ++ __ profile_switch_default(FSR); ++ __ ld_w(A7, T2, 0); ++ __ b(continue_execution); ++} ++ ++void TemplateTable::lookupswitch() { ++ transition(itos, itos); ++ __ stop("lookupswitch bytecode should have been rewritten"); ++} ++ ++// used registers : T2, T3, A7, Rnext ++// T2 : bytecode pointer ++// T3 : pair index ++// A7 : offset ++// Rnext : dest bytecode ++// the data after the opcode is the same as lookupswitch ++// see Rewriter::rewrite_method for more information ++void TemplateTable::fast_linearswitch() { ++ transition(itos, vtos); ++ Label loop_entry, loop, found, continue_execution; ++ ++ // swap FSR so we can avoid swapping the table entries ++ __ swap(FSR); ++ ++ // align BCP ++ __ addi_d(T2, BCP, BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(T2, T2, AT); ++ ++ // set counter ++ __ ld_w(T3, T2, BytesPerInt); ++ __ swap(T3); ++ __ b(loop_entry); ++ ++ // table search ++ __ bind(loop); ++ // get the entry value ++ __ alsl_d(AT, T3, T2, Address::times_8 - 1); ++ __ ld_w(AT, AT, 2 * BytesPerInt); ++ ++ // found? ++ __ beq(FSR, AT, found); ++ ++ __ bind(loop_entry); ++ Label L1; ++ __ bge(R0, T3, L1); ++ __ addi_d(T3, T3, -1); ++ __ b(loop); ++ __ bind(L1); ++ __ addi_d(T3, T3, -1); ++ ++ // default case ++ __ profile_switch_default(FSR); ++ __ ld_w(A7, T2, 0); ++ __ b(continue_execution); ++ ++ // entry found -> get offset ++ __ bind(found); ++ __ alsl_d(AT, T3, T2, Address::times_8 - 1); ++ __ ld_w(A7, AT, 3 * BytesPerInt); ++ __ profile_switch_case(T3, FSR, T2); ++ ++ // continue execution ++ __ bind(continue_execution); ++ __ swap(A7); ++ __ add_d(BCP, BCP, A7); ++ __ ld_bu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++} ++ ++// used registers : T0, T1, T2, T3, A7, Rnext ++// T2 : pairs address(array) ++// Rnext : dest bytecode ++// the data after the opcode is the same as lookupswitch ++// see Rewriter::rewrite_method for more information ++void TemplateTable::fast_binaryswitch() { ++ transition(itos, vtos); ++ // Implementation using the following core algorithm: ++ // ++ // int binary_search(int key, LookupswitchPair* array, int n) { ++ // // Binary search according to "Methodik des Programmierens" by ++ // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. ++ // int i = 0; ++ // int j = n; ++ // while (i+1 < j) { ++ // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) ++ // // with Q: for all i: 0 <= i < n: key < a[i] ++ // // where a stands for the array and assuming that the (inexisting) ++ // // element a[n] is infinitely big. ++ // int h = (i + j) >> 1; ++ // // i < h < j ++ // if (key < array[h].fast_match()) { ++ // j = h; ++ // } else { ++ // i = h; ++ // } ++ // } ++ // // R: a[i] <= key < a[i+1] or Q ++ // // (i.e., if key is within array, i is the correct index) ++ // return i; ++ // } ++ ++ // register allocation ++ const Register array = T2; ++ const Register i = T3, j = A7; ++ const Register h = T1; ++ const Register temp = T0; ++ const Register key = FSR; ++ ++ // setup array ++ __ addi_d(array, BCP, 3*BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(array, array, AT); ++ ++ // initialize i & j ++ __ move(i, R0); ++ __ ld_w(j, array, - 1 * BytesPerInt); ++ // Convert j into native byteordering ++ __ swap(j); ++ ++ // and start ++ Label entry; ++ __ b(entry); ++ ++ // binary search loop ++ { ++ Label loop; ++ __ bind(loop); ++ // int h = (i + j) >> 1; ++ __ add_d(h, i, j); ++ __ srli_d(h, h, 1); ++ // if (key < array[h].fast_match()) { ++ // j = h; ++ // } else { ++ // i = h; ++ // } ++ // Convert array[h].match to native byte-ordering before compare ++ __ alsl_d(AT, h, array, Address::times_8 - 1); ++ __ ld_w(temp, AT, 0 * BytesPerInt); ++ __ swap(temp); ++ ++ __ slt(AT, key, temp); ++ __ maskeqz(i, i, AT); ++ __ masknez(temp, h, AT); ++ __ OR(i, i, temp); ++ __ masknez(j, j, AT); ++ __ maskeqz(temp, h, AT); ++ __ OR(j, j, temp); ++ ++ // while (i+1 < j) ++ __ bind(entry); ++ __ addi_d(h, i, 1); ++ __ blt(h, j, loop); ++ } ++ ++ // end of binary search, result index is i (must check again!) ++ Label default_case; ++ // Convert array[i].match to native byte-ordering before compare ++ __ alsl_d(AT, i, array, Address::times_8 - 1); ++ __ ld_w(temp, AT, 0 * BytesPerInt); ++ __ swap(temp); ++ __ bne(key, temp, default_case); ++ ++ // entry found -> j = offset ++ __ alsl_d(AT, i, array, Address::times_8 - 1); ++ __ ld_w(j, AT, 1 * BytesPerInt); ++ __ profile_switch_case(i, key, array); ++ __ swap(j); ++ ++ __ add_d(BCP, BCP, j); ++ __ ld_bu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++ ++ // default case -> j = default offset ++ __ bind(default_case); ++ __ profile_switch_default(i); ++ __ ld_w(j, array, - 2 * BytesPerInt); ++ __ swap(j); ++ __ add_d(BCP, BCP, j); ++ __ ld_bu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++} ++ ++void TemplateTable::_return(TosState state) { ++ transition(state, state); ++ assert(_desc->calls_vm(), ++ "inconsistent calls_vm information"); // call in remove_activation ++ ++ if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { ++ assert(state == vtos, "only valid state"); ++ __ ld_d(T1, aaddress(0)); ++ __ load_klass(LVP, T1); ++ __ ld_w(LVP, LVP, in_bytes(Klass::access_flags_offset())); ++ __ li(AT, JVM_ACC_HAS_FINALIZER); ++ __ andr(AT, AT, LVP); ++ Label skip_register_finalizer; ++ __ beq(AT, R0, skip_register_finalizer); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::register_finalizer), T1); ++ __ bind(skip_register_finalizer); ++ } ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) { ++ Label no_safepoint; ++ NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll")); ++ __ ld_b(AT, thread, in_bytes(Thread::polling_page_offset())); ++ __ andi(AT, AT, SafepointMechanism::poll_bit()); ++ __ beq(AT, R0, no_safepoint); ++ __ push(state); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::at_safepoint)); ++ __ pop(state); ++ __ bind(no_safepoint); ++ } ++ ++ // Narrow result if state is itos but result type is smaller. ++ // Need to narrow in the return bytecode rather than in generate_return_entry ++ // since compiled code callers expect the result to already be narrowed. ++ if (state == itos) { ++ __ narrow(FSR); ++ } ++ ++ __ remove_activation(state, T4); ++ __ membar(__ StoreStore); ++ ++ __ jr(T4); ++} ++ ++// we dont shift left 2 bits in get_cache_and_index_at_bcp ++// for we always need shift the index we use it. the ConstantPoolCacheEntry ++// is 16-byte long, index is the index in ++// ConstantPoolCache, so cache + base_offset() + index * 16 is ++// the corresponding ConstantPoolCacheEntry ++// used registers : T2 ++// NOTE : the returned index need also shift left 4 to get the address! ++void TemplateTable::resolve_cache_and_index(int byte_no, ++ Register Rcache, ++ Register index, ++ size_t index_size) { ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ const Register temp = A1; ++ assert_different_registers(Rcache, index); ++ ++ Label resolved; ++ ++ Bytecodes::Code code = bytecode(); ++ switch (code) { ++ case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; ++ case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; ++ default: break; ++ } ++ ++ __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); ++ // is resolved? ++ int i = (int)code; ++ __ addi_d(temp, temp, -i); ++ __ beq(temp, R0, resolved); ++ ++ // resolve first time through ++ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); ++ ++ __ li(temp, i); ++ __ call_VM(NOREG, entry, temp); ++ ++ // Update registers with resolved info ++ __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); ++ __ bind(resolved); ++} ++//END: LA ++ ++// The Rcache and index registers must be set before call ++void TemplateTable::load_field_cp_cache_entry(Register obj, ++ Register cache, ++ Register index, ++ Register off, ++ Register flags, ++ bool is_static = false) { ++ assert_different_registers(cache, index, flags, off); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ // Field offset ++ __ alsl_d(AT, index, cache, Address::times_ptr - 1); ++ __ ld_d(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset())); ++ // Flags ++ __ ld_d(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())); ++ ++ // klass overwrite register ++ if (is_static) { ++ __ ld_d(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())); ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ __ ld_d(obj, Address(obj, mirror_offset)); ++ ++ __ resolve_oop_handle(obj, T4); ++ } ++} ++ ++// get the method, itable_index and flags of the current invoke ++void TemplateTable::load_invoke_cp_cache_entry(int byte_no, ++ Register method, ++ Register itable_index, ++ Register flags, ++ bool is_invokevirtual, ++ bool is_invokevfinal, /*unused*/ ++ bool is_invokedynamic) { ++ // setup registers ++ const Register cache = T3; ++ const Register index = T1; ++ assert_different_registers(method, flags); ++ assert_different_registers(method, cache, index); ++ assert_different_registers(itable_index, flags); ++ assert_different_registers(itable_index, cache, index); ++ assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant"); ++ // determine constant pool cache field offsets ++ const int method_offset = in_bytes( ++ ConstantPoolCache::base_offset() + ++ ((byte_no == f2_byte) ++ ? ConstantPoolCacheEntry::f2_offset() ++ : ConstantPoolCacheEntry::f1_offset())); ++ const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::flags_offset()); ++ // access constant pool cache fields ++ const int index_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::f2_offset()); ++ ++ size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2)); ++ resolve_cache_and_index(byte_no, cache, index, index_size); ++ ++ __ alsl_d(AT, index, cache, Address::times_ptr - 1); ++ __ ld_d(method, AT, method_offset); ++ ++ if (itable_index != NOREG) { ++ __ ld_d(itable_index, AT, index_offset); ++ } ++ __ ld_d(flags, AT, flags_offset); ++} ++ ++// The registers cache and index expected to be set before call. ++// Correct values of the cache and index registers are preserved. ++void TemplateTable::jvmti_post_field_access(Register cache, Register index, ++ bool is_static, bool has_tos) { ++ // do the JVMTI work here to avoid disturbing the register state below ++ // We use c_rarg registers here because we want to use the register used in ++ // the call to the VM ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we ++ // take the time to call into the VM. ++ Label L1; ++ // kill FSR ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ assert_different_registers(cache, index, AT); ++ __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); ++ __ ld_w(AT, AT, 0); ++ __ beq(AT, R0, L1); ++ ++ __ get_cache_and_index_at_bcp(tmp2, tmp3, 1); ++ ++ // cache entry pointer ++ __ addi_d(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset())); ++ __ alsl_d(tmp2, tmp3, tmp2, LogBytesPerWord - 1); ++ ++ if (is_static) { ++ __ move(tmp1, R0); ++ } else { ++ __ ld_d(tmp1, SP, 0); ++ __ verify_oop(tmp1); ++ } ++ // tmp1: object pointer or NULL ++ // tmp2: cache entry pointer ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_access), ++ tmp1, tmp2); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++void TemplateTable::pop_and_check_object(Register r) { ++ __ pop_ptr(r); ++ __ null_check(r); // for field access must check obj. ++ __ verify_oop(r); ++} ++ ++// used registers : T1, T2, T3, T1 ++// T1 : flags ++// T2 : off ++// T3 : obj ++// T1 : field address ++// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the ++// following mapping to the TosState states: ++// btos: 0 ++// ctos: 1 ++// stos: 2 ++// itos: 3 ++// ltos: 4 ++// ftos: 5 ++// dtos: 6 ++// atos: 7 ++// vtos: 8 ++// see ConstantPoolCacheEntry::set_field for more info ++void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) { ++ transition(vtos, vtos); ++ ++ const Register cache = T3; ++ const Register index = T0; ++ ++ const Register obj = T3; ++ const Register off = T2; ++ const Register flags = T1; ++ ++ const Register scratch = T8; ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_access(cache, index, is_static, false); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); ++ ++ { ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, flags); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(MacroAssembler::AnyAny); ++ __ bind(notVolatile); ++ } ++ ++ if (!is_static) pop_and_check_object(obj); ++ __ add_d(index, obj, off); ++ ++ const Address field(index, 0); ++ ++ Label Done, notByte, notBool, notInt, notShort, notChar, ++ notLong, notFloat, notObj, notDouble; ++ ++ assert(btos == 0, "change code, btos != 0"); ++ __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); ++ __ bne(flags, R0, notByte); ++ ++ // btos ++ __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(btos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ ++ __ bind(notByte); ++ __ li(AT, ztos); ++ __ bne(flags, AT, notBool); ++ ++ // ztos ++ __ access_load_at(T_BOOLEAN, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(ztos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ ++ __ bind(notBool); ++ __ li(AT, itos); ++ __ bne(flags, AT, notInt); ++ ++ // itos ++ __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(itos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_igetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notInt); ++ __ li(AT, atos); ++ __ bne(flags, AT, notObj); ++ ++ // atos ++ //add for compressedoops ++ do_oop_load(_masm, Address(index, 0), FSR, IN_HEAP); ++ __ push(atos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_agetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notObj); ++ __ li(AT, ctos); ++ __ bne(flags, AT, notChar); ++ ++ // ctos ++ __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(ctos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notChar); ++ __ li(AT, stos); ++ __ bne(flags, AT, notShort); ++ ++ // stos ++ __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(stos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notShort); ++ __ li(AT, ltos); ++ __ bne(flags, AT, notLong); ++ ++ // ltos ++ __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, FSR, field, noreg, noreg); ++ __ push(ltos); ++ ++ // Don't rewrite to _fast_lgetfield for potential volatile case. ++ __ b(Done); ++ ++ __ bind(notLong); ++ __ li(AT, ftos); ++ __ bne(flags, AT, notFloat); ++ ++ // ftos ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); ++ __ push(ftos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notFloat); ++ __ li(AT, dtos); ++#ifdef ASSERT ++ __ bne(flags, AT, notDouble); ++#endif ++ ++ // dtos ++ __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg); ++ __ push(dtos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2); ++ } ++ ++#ifdef ASSERT ++ __ b(Done); ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++void TemplateTable::getfield(int byte_no) { ++ getfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::nofast_getfield(int byte_no) { ++ getfield_or_static(byte_no, false, may_not_rewrite); ++} ++ ++void TemplateTable::getstatic(int byte_no) { ++ getfield_or_static(byte_no, true); ++} ++ ++// The registers cache and index expected to be set before call. ++// The function may destroy various registers, just not the cache and index registers. ++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { ++ transition(vtos, vtos); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L1; ++ //kill AT, T1, T2, T3, T4 ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T4; ++ assert_different_registers(cache, index, tmp4); ++ ++ __ li(AT, JvmtiExport::get_field_modification_count_addr()); ++ __ ld_w(AT, AT, 0); ++ __ beq(AT, R0, L1); ++ ++ __ get_cache_and_index_at_bcp(tmp2, tmp4, 1); ++ ++ if (is_static) { ++ __ move(tmp1, R0); ++ } else { ++ // Life is harder. The stack holds the value on top, followed by ++ // the object. We don't know the size of the value, though; it ++ // could be one or two words depending on its type. As a result, ++ // we must find the type to determine where the object is. ++ Label two_word, valsize_known; ++ __ alsl_d(AT, tmp4, tmp2, Address::times_8 - 1); ++ __ ld_d(tmp3, AT, in_bytes(cp_base_offset + ++ ConstantPoolCacheEntry::flags_offset())); ++ __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift); ++ ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ __ move(tmp1, SP); ++ __ li(AT, ltos); ++ __ beq(tmp3, AT, two_word); ++ __ li(AT, dtos); ++ __ beq(tmp3, AT, two_word); ++ __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) ); ++ __ b(valsize_known); ++ ++ __ bind(two_word); ++ __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2)); ++ ++ __ bind(valsize_known); ++ // setup object pointer ++ __ ld_d(tmp1, tmp1, 0 * wordSize); ++ } ++ // cache entry pointer ++ __ addi_d(tmp2, tmp2, in_bytes(cp_base_offset)); ++ __ alsl_d(tmp2, tmp4, tmp2, LogBytesPerWord - 1); ++ // object (tos) ++ __ move(tmp3, SP); ++ // tmp1: object pointer set up above (NULL if static) ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ tmp1, tmp2, tmp3); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++// used registers : T0, T1, T2, T3, T8 ++// T1 : flags ++// T2 : off ++// T3 : obj ++// T8 : volatile bit ++// see ConstantPoolCacheEntry::set_field for more info ++void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { ++ transition(vtos, vtos); ++ ++ const Register cache = T3; ++ const Register index = T0; ++ const Register obj = T3; ++ const Register off = T2; ++ const Register flags = T1; ++ const Register bc = T3; ++ ++ const Register scratch = T8; ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_mod(cache, index, is_static); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); ++ ++ Label Done; ++ { ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, flags); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++ ++ ++ Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; ++ ++ assert(btos == 0, "change code, btos != 0"); ++ ++ // btos ++ __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); ++ __ bne(flags, R0, notByte); ++ ++ __ pop(btos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_BYTE, IN_HEAP, Address(T4), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // ztos ++ __ bind(notByte); ++ __ li(AT, ztos); ++ __ bne(flags, AT, notBool); ++ ++ __ pop(ztos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ andi(FSR, FSR, 0x1); ++ __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T4), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // itos ++ __ bind(notBool); ++ __ li(AT, itos); ++ __ bne(flags, AT, notInt); ++ ++ __ pop(itos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_INT, IN_HEAP, Address(T4), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // atos ++ __ bind(notInt); ++ __ li(AT, atos); ++ __ bne(flags, AT, notObj); ++ ++ __ pop(atos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ ++ do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // ctos ++ __ bind(notObj); ++ __ li(AT, ctos); ++ __ bne(flags, AT, notChar); ++ ++ __ pop(ctos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_CHAR, IN_HEAP, Address(T4), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // stos ++ __ bind(notChar); ++ __ li(AT, stos); ++ __ bne(flags, AT, notShort); ++ ++ __ pop(stos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_SHORT, IN_HEAP, Address(T4), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // ltos ++ __ bind(notShort); ++ __ li(AT, ltos); ++ __ bne(flags, AT, notLong); ++ ++ __ pop(ltos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_LONG, IN_HEAP, Address(T4), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // ftos ++ __ bind(notLong); ++ __ li(AT, ftos); ++ __ bne(flags, AT, notFloat); ++ ++ __ pop(ftos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_FLOAT, IN_HEAP, Address(T4), noreg, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ ++ // dtos ++ __ bind(notFloat); ++ __ li(AT, dtos); ++#ifdef ASSERT ++ __ bne(flags, AT, notDouble); ++#endif ++ ++ __ pop(dtos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_DOUBLE, IN_HEAP, Address(T4), noreg, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no); ++ } ++ ++#ifdef ASSERT ++ __ b(Done); ++ ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++void TemplateTable::putfield(int byte_no) { ++ putfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::nofast_putfield(int byte_no) { ++ putfield_or_static(byte_no, false, may_not_rewrite); ++} ++ ++void TemplateTable::putstatic(int byte_no) { ++ putfield_or_static(byte_no, true); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : cp_entry ++// T2 : obj ++// T3 : value pointer ++void TemplateTable::jvmti_post_fast_field_mod() { ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L2; ++ //kill AT, T1, T2, T3, T4 ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T4; ++ __ li(AT, JvmtiExport::get_field_modification_count_addr()); ++ __ ld_w(tmp3, AT, 0); ++ __ beq(tmp3, R0, L2); ++ __ pop_ptr(tmp1); ++ __ verify_oop(tmp1); ++ __ push_ptr(tmp1); ++ switch (bytecode()) { // load values into the jvalue object ++ case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ push_i(FSR); break; ++ case Bytecodes::_fast_dputfield: __ push_d(FSF); break; ++ case Bytecodes::_fast_fputfield: __ push_f(); break; ++ case Bytecodes::_fast_lputfield: __ push_l(FSR); break; ++ default: ShouldNotReachHere(); ++ } ++ __ move(tmp3, SP); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1); ++ __ verify_oop(tmp1); ++ // tmp1: object pointer copied above ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ tmp1, tmp2, tmp3); ++ ++ switch (bytecode()) { // restore tos values ++ case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ pop_i(FSR); break; ++ case Bytecodes::_fast_dputfield: __ pop_d(); break; ++ case Bytecodes::_fast_fputfield: __ pop_f(); break; ++ case Bytecodes::_fast_lputfield: __ pop_l(FSR); break; ++ } ++ __ bind(L2); ++ } ++} ++ ++// used registers : T2, T3, T1 ++// T2 : index & off & field address ++// T3 : cache & obj ++// T1 : flags ++void TemplateTable::fast_storefield(TosState state) { ++ transition(state, vtos); ++ ++ const Register scratch = T8; ++ ++ ByteSize base = ConstantPoolCache::base_offset(); ++ ++ jvmti_post_fast_field_mod(); ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ membar(__ LoadLoad); ++ ++ // test for volatile with T1 ++ __ alsl_d(AT, T2, T3, Address::times_8 - 1); ++ __ ld_d(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset())); ++ ++ // replace index with field offset from cache entry ++ __ ld_d(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset())); ++ ++ Label Done; ++ { ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, T1); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++ ++ // Get object from stack ++ pop_and_check_object(T3); ++ ++ if (bytecode() != Bytecodes::_fast_aputfield) { ++ // field address ++ __ add_d(T2, T3, T2); ++ } ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_zputfield: ++ __ andi(FSR, FSR, 0x1); // boolean is true if LSB is 1 ++ __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_bputfield: ++ __ access_store_at(T_BYTE, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_sputfield: ++ __ access_store_at(T_SHORT, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_cputfield: ++ __ access_store_at(T_CHAR, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_iputfield: ++ __ access_store_at(T_INT, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_lputfield: ++ __ access_store_at(T_LONG, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_fputfield: ++ __ access_store_at(T_FLOAT, IN_HEAP, Address(T2), noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_dputfield: ++ __ access_store_at(T_DOUBLE, IN_HEAP, Address(T2), noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_aputfield: ++ do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++// used registers : T2, T3, T1 ++// T3 : cp_entry & cache ++// T2 : index & offset ++void TemplateTable::fast_accessfield(TosState state) { ++ transition(atos, state); ++ ++ const Register scratch = T8; ++ ++ // do the JVMTI work here to avoid disturbing the register state below ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we take ++ // the time to call into the VM. ++ Label L1; ++ __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); ++ __ ld_w(T3, AT, 0); ++ __ beq(T3, R0, L1); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(T3, T1, 1); ++ __ move(TSR, FSR); ++ __ verify_oop(FSR); ++ // FSR: object pointer copied above ++ // T3: cache entry pointer ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), ++ FSR, T3); ++ __ move(FSR, TSR); ++ __ bind(L1); ++ } ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ membar(__ LoadLoad); ++ ++ // replace index with field offset from cache entry ++ __ alsl_d(AT, T2, T3, Address::times_8 - 1); ++ __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ ++ { ++ __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, AT); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(MacroAssembler::AnyAny); ++ __ bind(notVolatile); ++ } ++ ++ // FSR: object ++ __ verify_oop(FSR); ++ __ null_check(FSR); ++ // field addresses ++ __ add_d(FSR, FSR, T2); ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_bgetfield: ++ __ access_load_at(T_BYTE, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_sgetfield: ++ __ access_load_at(T_SHORT, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_cgetfield: ++ __ access_load_at(T_CHAR, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_igetfield: ++ __ access_load_at(T_INT, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_lgetfield: ++ __ stop("should not be rewritten"); ++ break; ++ case Bytecodes::_fast_fgetfield: ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_dgetfield: ++ __ access_load_at(T_DOUBLE, IN_HEAP, noreg, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_agetfield: ++ do_oop_load(_masm, Address(FSR, 0), FSR, IN_HEAP); ++ __ verify_oop(FSR); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0 ++// used registers : T1, T2, T3, T1 ++// T1 : obj & field address ++// T2 : off ++// T3 : cache ++// T1 : index ++void TemplateTable::fast_xaccess(TosState state) { ++ transition(vtos, state); ++ ++ const Register scratch = T8; ++ ++ // get receiver ++ __ ld_d(T1, aaddress(0)); ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 2); ++ __ alsl_d(AT, T2, T3, Address::times_8 - 1); ++ __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ ++ { ++ __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, AT); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(MacroAssembler::AnyAny); ++ __ bind(notVolatile); ++ } ++ ++ // make sure exception is reported in correct bcp range (getfield is ++ // next instruction) ++ __ addi_d(BCP, BCP, 1); ++ __ null_check(T1); ++ __ add_d(T1, T1, T2); ++ ++ if (state == itos) { ++ __ access_load_at(T_INT, IN_HEAP, FSR, Address(T1), noreg, noreg); ++ } else if (state == atos) { ++ do_oop_load(_masm, Address(T1, 0), FSR, IN_HEAP); ++ __ verify_oop(FSR); ++ } else if (state == ftos) { ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(T1), noreg, noreg); ++ } else { ++ ShouldNotReachHere(); ++ } ++ __ addi_d(BCP, BCP, -1); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++ ++//----------------------------------------------------------------------------- ++// Calls ++ ++void TemplateTable::count_calls(Register method, Register temp) { ++ // implemented elsewhere ++ ShouldNotReachHere(); ++} ++ ++// method, index, recv, flags: T1, T2, T3, T1 ++// byte_no = 2 for _invokevirtual, 1 else ++// T0 : return address ++// get the method & index of the invoke, and push the return address of ++// the invoke(first word in the frame) ++// this address is where the return code jmp to. ++// NOTE : this method will set T3&T1 as recv&flags ++void TemplateTable::prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index, // itable index, MethodType, etc. ++ Register recv, // if caller wants to see it ++ Register flags // if caller wants to test it ++ ) { ++ // determine flags ++ const Bytecodes::Code code = bytecode(); ++ const bool is_invokeinterface = code == Bytecodes::_invokeinterface; ++ const bool is_invokedynamic = code == Bytecodes::_invokedynamic; ++ const bool is_invokehandle = code == Bytecodes::_invokehandle; ++ const bool is_invokevirtual = code == Bytecodes::_invokevirtual; ++ const bool is_invokespecial = code == Bytecodes::_invokespecial; ++ const bool load_receiver = (recv != noreg); ++ const bool save_flags = (flags != noreg); ++ assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),""); ++ assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); ++ assert(flags == noreg || flags == T1, "error flags reg."); ++ assert(recv == noreg || recv == T3, "error recv reg."); ++ ++ // setup registers & access constant pool cache ++ if(recv == noreg) recv = T3; ++ if(flags == noreg) flags = T1; ++ assert_different_registers(method, index, recv, flags); ++ ++ // save 'interpreter return address' ++ __ save_bcp(); ++ ++ load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); ++ ++ if (is_invokedynamic || is_invokehandle) { ++ Label L_no_push; ++ __ li(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift)); ++ __ andr(AT, AT, flags); ++ __ beq(AT, R0, L_no_push); ++ // Push the appendix as a trailing parameter. ++ // This must be done before we get the receiver, ++ // since the parameter_size includes it. ++ Register tmp = SSR; ++ __ push(tmp); ++ __ move(tmp, index); ++ assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); ++ __ load_resolved_reference_at_index(index, tmp, recv); ++ __ pop(tmp); ++ __ push(index); // push appendix (MethodType, CallSite, etc.) ++ __ bind(L_no_push); ++ } ++ ++ // load receiver if needed (after appendix is pushed so parameter size is correct) ++ // Note: no return address pushed yet ++ if (load_receiver) { ++ __ li(AT, ConstantPoolCacheEntry::parameter_size_mask); ++ __ andr(recv, flags, AT); ++ // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0. ++ const int no_return_pc_pushed_yet = 0; // argument slot correction before we push return address ++ const int receiver_is_at_end = -1; // back off one slot to get receiver ++ Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end); ++ __ ld_d(recv, recv_addr); ++ __ verify_oop(recv); ++ } ++ if(save_flags) { ++ __ move(BCP, flags); ++ } ++ ++ // compute return type ++ __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, 0xf); ++ ++ // Make sure we don't need to mask flags for tos_state_shift after the above shift ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ // load return address ++ { ++ const address table = (address) Interpreter::invoke_return_entry_table_for(code); ++ __ li(AT, (long)table); ++ __ alsl_d(AT, flags, AT, LogBytesPerWord - 1); ++ __ ld_d(RA, AT, 0); ++ } ++ ++ if (save_flags) { ++ __ move(flags, BCP); ++ __ restore_bcp(); ++ } ++} ++ ++// used registers : T0, T3, T1, T2 ++// T3 : recv, this two register using convention is by prepare_invoke ++// T1 : flags, klass ++// Rmethod : method, index must be Rmethod ++void TemplateTable::invokevirtual_helper(Register index, ++ Register recv, ++ Register flags) { ++ ++ assert_different_registers(index, recv, flags, T2); ++ ++ // Test for an invoke of a final method ++ Label notFinal; ++ __ li(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); ++ __ andr(AT, flags, AT); ++ __ beq(AT, R0, notFinal); ++ ++ Register method = index; // method must be Rmethod ++ assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention"); ++ ++ // do the call - the index is actually the method to call ++ // the index is indeed methodOop, for this is vfinal, ++ // see ConstantPoolCacheEntry::set_method for more info ++ ++ // It's final, need a null check here! ++ __ null_check(recv); ++ ++ // profile this call ++ __ profile_final_call(T2); ++ ++ // T2: tmp, used for mdp ++ // method: callee ++ // T4: tmp ++ // is_virtual: true ++ __ profile_arguments_type(T2, method, T4, true); ++ ++ __ jump_from_interpreted(method, T2); ++ ++ __ bind(notFinal); ++ ++ // get receiver klass ++ __ null_check(recv, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(T2, recv); ++ ++ // profile this call ++ __ profile_virtual_call(T2, T0, T1); ++ ++ // get target methodOop & entry point ++ __ lookup_virtual_method(T2, index, method); ++ __ profile_arguments_type(T2, method, T4, true); ++ __ jump_from_interpreted(method, T2); ++} ++ ++void TemplateTable::invokevirtual(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG, T3, T1); ++ // now recv & flags in T3, T1 ++ invokevirtual_helper(Rmethod, T3, T1); ++} ++ ++// T4 : entry ++// Rmethod : method ++void TemplateTable::invokespecial(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG, T3); ++ // now recv & flags in T3, T1 ++ __ verify_oop(T3); ++ __ null_check(T3); ++ __ profile_call(T4); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T4: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T4, false); ++ ++ __ jump_from_interpreted(Rmethod, T4); ++ __ move(T0, T3); ++} ++ ++void TemplateTable::invokestatic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG); ++ ++ __ profile_call(T4); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T4: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T4, false); ++ ++ __ jump_from_interpreted(Rmethod, T4); ++} ++ ++// i have no idea what to do here, now. for future change. FIXME. ++void TemplateTable::fast_invokevfinal(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); ++ __ stop("fast_invokevfinal not used on LoongArch64"); ++} ++ ++// used registers : T0, T1, T2, T3, T1, A7 ++// T0 : itable, vtable, entry ++// T1 : interface ++// T3 : receiver ++// T1 : flags, klass ++// Rmethod : index, method, this is required by interpreter_entry ++void TemplateTable::invokeinterface(int byte_no) { ++ transition(vtos, vtos); ++ //this method will use T1-T4 and T0 ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, T2, Rmethod, T3, T1); ++ // T2: reference klass (from f1) if interface method ++ // Rmethod: method (from f2) ++ // T3: receiver ++ // T1: flags ++ ++ // First check for Object case, then private interface method, ++ // then regular interface method. ++ ++ // Special case of invokeinterface called for virtual method of ++ // java.lang.Object. See cpCache.cpp for details. ++ Label notObjectMethod; ++ __ li(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift)); ++ __ andr(AT, T1, AT); ++ __ beq(AT, R0, notObjectMethod); ++ ++ invokevirtual_helper(Rmethod, T3, T1); ++ // no return from above ++ __ bind(notObjectMethod); ++ ++ Label no_such_interface; // for receiver subtype check ++ Register recvKlass; // used for exception processing ++ ++ // Check for private method invocation - indicated by vfinal ++ Label notVFinal; ++ __ li(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); ++ __ andr(AT, T1, AT); ++ __ beq(AT, R0, notVFinal); ++ ++ // Get receiver klass into FSR - also a null check ++ __ null_check(T3, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(FSR, T3); ++ ++ Label subtype; ++ __ check_klass_subtype(FSR, T2, T0, subtype); ++ // If we get here the typecheck failed ++ recvKlass = T1; ++ __ move(recvKlass, FSR); ++ __ b(no_such_interface); ++ ++ __ bind(subtype); ++ ++ // do the call - rbx is actually the method to call ++ ++ __ profile_final_call(T1); ++ __ profile_arguments_type(T1, Rmethod, T0, true); ++ ++ __ jump_from_interpreted(Rmethod, T1); ++ // no return from above ++ __ bind(notVFinal); ++ ++ // Get receiver klass into T1 - also a null check ++ __ restore_locals(); ++ __ null_check(T3, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(T1, T3); ++ ++ Label no_such_method; ++ ++ // Preserve method for throw_AbstractMethodErrorVerbose. ++ __ move(T3, Rmethod); ++ // Receiver subtype check against REFC. ++ // Superklass in T2. Subklass in T1. ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ T1, T2, noreg, ++ // outputs: scan temp. reg, scan temp. reg ++ T0, FSR, ++ no_such_interface, ++ /*return_method=*/false); ++ ++ ++ // profile this call ++ __ restore_bcp(); ++ __ profile_virtual_call(T1, T0, FSR); ++ ++ // Get declaring interface class from method, and itable index ++ __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset())); ++ __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset())); ++ __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes()); ++ __ ld_w(Rmethod, Rmethod, in_bytes(Method::itable_index_offset())); ++ __ addi_d(Rmethod, Rmethod, (-1) * Method::itable_index_max); ++ __ sub_w(Rmethod, R0, Rmethod); ++ ++ // Preserve recvKlass for throw_AbstractMethodErrorVerbose. ++ __ move(FSR, T1); ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ FSR, T2, Rmethod, ++ // outputs: method, scan temp. reg ++ Rmethod, T0, ++ no_such_interface); ++ ++ // Rmethod: Method* to call ++ // T3: receiver ++ // Check for abstract method error ++ // Note: This should be done more efficiently via a throw_abstract_method_error ++ // interpreter entry point and a conditional jump to it in case of a null ++ // method. ++ __ beq(Rmethod, R0, no_such_method); ++ ++ __ profile_called_method(Rmethod, T0, T1); ++ __ profile_arguments_type(T1, Rmethod, T0, true); ++ ++ // do the call ++ // T3: receiver ++ // Rmethod: Method* ++ __ jump_from_interpreted(Rmethod, T1); ++ __ should_not_reach_here(); ++ ++ // exception handling code follows... ++ // note: must restore interpreter registers to canonical ++ // state for exception handling to work correctly! ++ ++ __ bind(no_such_method); ++ // throw exception ++ __ pop(Rmethod); // pop return address (pushed by prepare_invoke) ++ __ restore_bcp(); ++ __ restore_locals(); ++ // Pass arguments for generating a verbose error message. ++ recvKlass = A1; ++ Register method = A2; ++ if (recvKlass != T1) { __ move(recvKlass, T1); } ++ if (method != T3) { __ move(method, T3); } ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), recvKlass, method); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ __ bind(no_such_interface); ++ // throw exception ++ __ pop(Rmethod); // pop return address (pushed by prepare_invoke) ++ __ restore_bcp(); ++ __ restore_locals(); ++ // Pass arguments for generating a verbose error message. ++ if (recvKlass != T1) { __ move(recvKlass, T1); } ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), recvKlass, T2); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++} ++ ++ ++void TemplateTable::invokehandle(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ const Register T2_method = Rmethod; ++ const Register FSR_mtype = FSR; ++ const Register T3_recv = T3; ++ ++ prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv); ++ //??__ verify_method_ptr(T2_method); ++ __ verify_oop(T3_recv); ++ __ null_check(T3_recv); ++ ++ // T4: MethodType object (from cpool->resolved_references[f1], if necessary) ++ // T2_method: MH.invokeExact_MT method (from f2) ++ ++ // Note: T4 is already pushed (if necessary) by prepare_invoke ++ ++ // FIXME: profile the LambdaForm also ++ __ profile_final_call(T4); ++ ++ // T8: tmp, used for mdp ++ // T2_method: callee ++ // T4: tmp ++ // is_virtual: true ++ __ profile_arguments_type(T8, T2_method, T4, true); ++ ++ __ jump_from_interpreted(T2_method, T4); ++} ++ ++ void TemplateTable::invokedynamic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ ++ const Register T2_callsite = T2; ++ ++ prepare_invoke(byte_no, Rmethod, T2_callsite); ++ ++ // T2: CallSite object (from cpool->resolved_references[f1]) ++ // Rmethod: MH.linkToCallSite method (from f2) ++ ++ // Note: T2_callsite is already pushed by prepare_invoke ++ // %%% should make a type profile for any invokedynamic that takes a ref argument ++ // profile this call ++ __ profile_call(T4); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T4: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T4, false); ++ ++ __ verify_oop(T2_callsite); ++ ++ __ jump_from_interpreted(Rmethod, T4); ++ } ++ ++//----------------------------------------------------------------------------- ++// Allocation ++// T1 : tags & buffer end & thread ++// T2 : object end ++// T3 : klass ++// T1 : object size ++// A1 : cpool ++// A2 : cp index ++// return object in FSR ++void TemplateTable::_new() { ++ transition(vtos, atos); ++ __ get_unsigned_2_byte_index_at_bcp(A2, 1); ++ ++ Label slow_case; ++ Label done; ++ Label initialize_header; ++ Label initialize_object; // including clearing the fields ++ Label allocate_shared; ++ ++ __ get_cpool_and_tags(A1, T1); ++ ++ // make sure the class we're about to instantiate has been resolved. ++ // Note: slow_case does a pop of stack, which is why we loaded class/pushed above ++ const int tags_offset = Array::base_offset_in_bytes(); ++ __ add_d(T1, T1, A2); ++ __ ld_b(AT, T1, tags_offset); ++ if(os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ } ++ __ addi_d(AT, AT, -(int)JVM_CONSTANT_Class); ++ __ bne(AT, R0, slow_case); ++ ++ // get InstanceKlass ++ __ load_resolved_klass_at_index(A1, A2, T3); ++ ++ // make sure klass is initialized & doesn't have finalizer ++ // make sure klass is fully initialized ++ __ ld_hu(T1, T3, in_bytes(InstanceKlass::init_state_offset())); ++ __ addi_d(AT, T1, - (int)InstanceKlass::fully_initialized); ++ __ bne(AT, R0, slow_case); ++ ++ // has_finalizer ++ __ ld_w(T0, T3, in_bytes(Klass::layout_helper_offset()) ); ++ __ andi(AT, T0, Klass::_lh_instance_slow_path_bit); ++ __ bne(AT, R0, slow_case); ++ ++ // Allocate the instance ++ // 1) Try to allocate in the TLAB ++ // 2) if fail and the object is large allocate in the shared Eden ++ // 3) if the above fails (or is not applicable), go to a slow case ++ // (creates a new TLAB, etc.) ++ ++ const bool allow_shared_alloc = ++ Universe::heap()->supports_inline_contig_alloc(); ++ ++#ifndef OPT_THREAD ++ const Register thread = T8; ++ if (UseTLAB || allow_shared_alloc) { ++ __ get_thread(thread); ++ } ++#else ++ const Register thread = TREG; ++#endif ++ ++ if (UseTLAB) { ++ // get tlab_top ++ __ ld_d(FSR, thread, in_bytes(JavaThread::tlab_top_offset())); ++ // get tlab_end ++ __ ld_d(AT, thread, in_bytes(JavaThread::tlab_end_offset())); ++ __ add_d(T2, FSR, T0); ++ __ blt(AT, T2, allow_shared_alloc ? allocate_shared : slow_case); ++ __ st_d(T2, thread, in_bytes(JavaThread::tlab_top_offset())); ++ ++ if (ZeroTLAB) { ++ // the fields have been already cleared ++ __ beq(R0, R0, initialize_header); ++ } else { ++ // initialize both the header and fields ++ __ beq(R0, R0, initialize_object); ++ } ++ } ++ ++ // Allocation in the shared Eden , if allowed ++ // T0 : instance size in words ++ if(allow_shared_alloc){ ++ __ bind(allocate_shared); ++ ++ Label done, retry; ++ Address heap_top(T1); ++ __ li(T1, (long)Universe::heap()->top_addr()); ++ __ ld_d(FSR, heap_top); ++ ++ __ bind(retry); ++ __ li(AT, (long)Universe::heap()->end_addr()); ++ __ ld_d(AT, AT, 0); ++ __ add_d(T2, FSR, T0); ++ __ blt(AT, T2, slow_case); ++ ++ // Compare FSR with the top addr, and if still equal, store the new ++ // top addr in T2 at the address of the top addr pointer. Sets AT if was ++ // equal, and clears it otherwise. Use lock prefix for atomicity on MPs. ++ // ++ // FSR: object begin ++ // T2: object end ++ // T0: instance size in words ++ ++ // if someone beat us on the allocation, try again, otherwise continue ++ __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry); ++ ++ __ bind(done); ++ __ incr_allocated_bytes(thread, T0, 0); ++ } ++ ++ if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) { ++ // The object is initialized before the header. If the object size is ++ // zero, go directly to the header initialization. ++ __ bind(initialize_object); ++ __ li(AT, - sizeof(oopDesc)); ++ __ add_d(T0, T0, AT); ++ __ beq(T0, R0, initialize_header); ++ ++ // initialize remaining object fields: T0 is a multiple of 2 ++ { ++ Label loop; ++ __ add_d(T1, FSR, T0); ++ ++ __ bind(loop); ++ __ addi_d(T1, T1, -oopSize); ++ __ st_d(R0, T1, sizeof(oopDesc)); ++ __ bne(T1, FSR, loop); // dont clear header ++ } ++ ++ // klass in T3, ++ // initialize object header only. ++ __ bind(initialize_header); ++ if (UseBiasedLocking) { ++ __ ld_d(AT, T3, in_bytes(Klass::prototype_header_offset())); ++ __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes ()); ++ } else { ++ __ li(AT, (long)markOopDesc::prototype()); ++ __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes()); ++ } ++ ++ __ store_klass_gap(FSR, R0); ++ __ store_klass(FSR, T3); ++ ++ { ++ SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0); ++ // Trigger dtrace event for fastpath ++ __ push(atos); ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR); ++ __ pop(atos); ++ ++ } ++ __ b(done); ++ } ++ ++ // slow case ++ __ bind(slow_case); ++ __ get_constant_pool(A1); ++ __ get_unsigned_2_byte_index_at_bcp(A2, 1); ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2); ++ ++ // continue ++ __ bind(done); ++ __ membar(__ StoreStore); ++} ++ ++void TemplateTable::newarray() { ++ transition(itos, atos); ++ __ ld_bu(A1, at_bcp(1)); ++ // type, count ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR); ++ __ membar(__ StoreStore); ++} ++ ++void TemplateTable::anewarray() { ++ transition(itos, atos); ++ __ get_2_byte_integer_at_bcp(A2, AT, 1); ++ __ huswap(A2); ++ __ get_constant_pool(A1); ++ // cp, index, count ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR); ++ __ membar(__ StoreStore); ++} ++ ++void TemplateTable::arraylength() { ++ transition(atos, itos); ++ __ null_check(FSR, arrayOopDesc::length_offset_in_bytes()); ++ __ ld_w(FSR, FSR, arrayOopDesc::length_offset_in_bytes()); ++} ++ ++// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always) ++// T2 : sub klass ++// T3 : cpool ++// T3 : super klass ++void TemplateTable::checkcast() { ++ transition(atos, atos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ __ beq(FSR, R0, is_null); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(T3, T1); ++ __ get_2_byte_integer_at_bcp(T2, AT, 1); ++ __ huswap(T2); ++ ++ // See if bytecode has already been quicked ++ __ add_d(AT, T1, T2); ++ __ ld_b(AT, AT, Array::base_offset_in_bytes()); ++ if(os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ } ++ __ addi_d(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ beq(AT, R0, quicked); ++ ++ // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded. ++ // Then, GC will move the object in V0 to another places in heap. ++ // Therefore, We should never save such an object in register. ++ // Instead, we should save it in the stack. It can be modified automatically by the GC thread. ++ // After GC, the object address in FSR is changed to a new place. ++ // ++ __ push(atos); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ __ get_vm_result_2(T3, thread); ++ __ pop_ptr(FSR); ++ __ b(resolved); ++ ++ // klass already in cp, get superklass in T3 ++ __ bind(quicked); ++ __ load_resolved_klass_at_index(T3, T2, T3); ++ ++ __ bind(resolved); ++ ++ // get subklass in T2 ++ __ load_klass(T2, FSR); ++ // Superklass in T3. Subklass in T2. ++ __ gen_subtype_check(T3, T2, ok_is_subtype); ++ ++ // Come here on failure ++ // object is at FSR ++ __ jmp(Interpreter::_throw_ClassCastException_entry); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ ++ // Collect counts on whether this check-cast sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ b(done); ++ __ bind(is_null); ++ __ profile_null_seen(T3); ++ } else { ++ __ bind(is_null); ++ } ++ __ bind(done); ++} ++ ++// T3 as cpool, T1 as tags, T2 as index ++// object always in FSR, superklass in T3, subklass in T2 ++void TemplateTable::instanceof() { ++ transition(atos, itos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ ++ __ beq(FSR, R0, is_null); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(T3, T1); ++ // get index ++ __ get_2_byte_integer_at_bcp(T2, AT, 1); ++ __ huswap(T2); ++ ++ // See if bytecode has already been quicked ++ // quicked ++ __ add_d(AT, T1, T2); ++ __ ld_b(AT, AT, Array::base_offset_in_bytes()); ++ if(os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ } ++ __ addi_d(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ beq(AT, R0, quicked); ++ ++ __ push(atos); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ __ get_vm_result_2(T3, thread); ++ __ pop_ptr(FSR); ++ __ b(resolved); ++ ++ // get superklass in T3, subklass in T2 ++ __ bind(quicked); ++ __ load_resolved_klass_at_index(T3, T2, T3); ++ ++ __ bind(resolved); ++ // get subklass in T2 ++ __ load_klass(T2, FSR); ++ ++ // Superklass in T3. Subklass in T2. ++ __ gen_subtype_check(T3, T2, ok_is_subtype); ++ __ move(FSR, R0); ++ // Come here on failure ++ __ b(done); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ __ li(FSR, 1); ++ ++ // Collect counts on whether this test sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ beq(R0, R0, done); ++ __ bind(is_null); ++ __ profile_null_seen(T3); ++ } else { ++ __ bind(is_null); // same as 'done' ++ } ++ __ bind(done); ++ // FSR = 0: obj == NULL or obj is not an instanceof the specified klass ++ // FSR = 1: obj != NULL and obj is an instanceof the specified klass ++} ++ ++//-------------------------------------------------------- ++//-------------------------------------------- ++// Breakpoints ++void TemplateTable::_breakpoint() { ++ // Note: We get here even if we are single stepping.. ++ // jbug inists on setting breakpoints at every bytecode ++ // even if we are in single step mode. ++ ++ transition(vtos, vtos); ++ ++ // get the unpatched byte code ++ __ get_method(A1); ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::get_original_bytecode_at), ++ A1, BCP); ++ __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal ++ ++ // post the breakpoint event ++ __ get_method(A1); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP); ++ ++ // complete the execution of original bytecode ++ __ dispatch_only_normal(vtos); ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateTable::athrow() { ++ transition(atos, vtos); ++ __ null_check(FSR); ++ __ jmp(Interpreter::throw_exception_entry()); ++} ++ ++//----------------------------------------------------------------------------- ++// Synchronization ++// ++// Note: monitorenter & exit are symmetric routines; which is reflected ++// in the assembly code structure as well ++// ++// Stack layout: ++// ++// [expressions ] <--- SP = expression stack top ++// .. ++// [expressions ] ++// [monitor entry] <--- monitor block top = expression stack bot ++// .. ++// [monitor entry] ++// [frame data ] <--- monitor block bot ++// ... ++// [return addr ] <--- FP ++ ++// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer ++// object always in FSR ++void TemplateTable::monitorenter() { ++ transition(atos, vtos); ++ ++ // check for NULL object ++ __ null_check(FSR); ++ ++ const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset ++ * wordSize); ++ const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize); ++ Label allocated; ++ ++ // initialize entry pointer ++ __ move(c_rarg0, R0); ++ ++ // find a free slot in the monitor block (result in c_rarg0) ++ { ++ Label entry, loop, exit, next; ++ __ ld_d(T2, monitor_block_top); ++ __ addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ __ b(entry); ++ ++ // free slot? ++ __ bind(loop); ++ __ ld_d(AT, T2, BasicObjectLock::obj_offset_in_bytes()); ++ __ bne(AT, R0, next); ++ __ move(c_rarg0, T2); ++ ++ __ bind(next); ++ __ beq(FSR, AT, exit); ++ __ addi_d(T2, T2, entry_size); ++ ++ __ bind(entry); ++ __ bne(T3, T2, loop); ++ __ bind(exit); ++ } ++ ++ __ bne(c_rarg0, R0, allocated); ++ ++ // allocate one if there's no free slot ++ { ++ Label entry, loop; ++ // 1. compute new pointers // SP: old expression stack top ++ __ ld_d(c_rarg0, monitor_block_top); ++ __ addi_d(SP, SP, -entry_size); ++ __ addi_d(c_rarg0, c_rarg0, -entry_size); ++ __ st_d(c_rarg0, monitor_block_top); ++ __ move(T3, SP); ++ __ b(entry); ++ ++ // 2. move expression stack contents ++ __ bind(loop); ++ __ ld_d(AT, T3, entry_size); ++ __ st_d(AT, T3, 0); ++ __ addi_d(T3, T3, wordSize); ++ __ bind(entry); ++ __ bne(T3, c_rarg0, loop); ++ } ++ ++ __ bind(allocated); ++ // Increment bcp to point to the next bytecode, ++ // so exception handling for async. exceptions work correctly. ++ // The object has already been poped from the stack, so the ++ // expression stack looks correct. ++ __ addi_d(BCP, BCP, 1); ++ __ st_d(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ lock_object(c_rarg0); ++ // check to make sure this monitor doesn't cause stack overflow after locking ++ __ save_bcp(); // in case of exception ++ __ generate_stack_overflow_check(0); ++ // The bcp has already been incremented. Just need to dispatch to next instruction. ++ ++ __ dispatch_next(vtos); ++} ++ ++// T2 : top ++// c_rarg0 : entry ++void TemplateTable::monitorexit() { ++ transition(atos, vtos); ++ ++ __ null_check(FSR); ++ ++ const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize); ++ Label found; ++ ++ // find matching slot ++ { ++ Label entry, loop; ++ __ ld_d(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ addi_d(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ __ b(entry); ++ ++ __ bind(loop); ++ __ ld_d(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ beq(FSR, AT, found); ++ __ addi_d(c_rarg0, c_rarg0, entry_size); ++ __ bind(entry); ++ __ bne(T2, c_rarg0, loop); ++ } ++ ++ // error handling. Unlocking was not block-structured ++ Label end; ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ // call run-time routine ++ // c_rarg0: points to monitor entry ++ __ bind(found); ++ __ move(TSR, FSR); ++ __ unlock_object(c_rarg0); ++ __ move(FSR, TSR); ++ __ bind(end); ++} ++ ++ ++// Wide instructions ++void TemplateTable::wide() { ++ transition(vtos, vtos); ++ __ ld_bu(Rnext, at_bcp(1)); ++ __ slli_d(T4, Rnext, Address::times_8); ++ __ li(AT, (long)Interpreter::_wentry_point); ++ __ add_d(AT, T4, AT); ++ __ ld_d(T4, AT, 0); ++ __ jr(T4); ++} ++ ++ ++void TemplateTable::multianewarray() { ++ transition(vtos, atos); ++ // last dim is on top of stack; we want address of first one: ++ // first_addr = last_addr + (ndims - 1) * wordSize ++ __ ld_bu(A1, at_bcp(3)); // dimension ++ __ addi_d(A1, A1, -1); ++ __ alsl_d(A1, A1, SP, Address::times_8 - 1); // now A1 pointer to the count array on the stack ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1); ++ __ ld_bu(AT, at_bcp(3)); ++ __ alsl_d(SP, AT, SP, Address::times_8 - 1); ++ __ membar(__ AnyAny);//no membar here for aarch64 ++} ++#endif // !CC_INTERP +diff --git a/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp b/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp +new file mode 100644 +index 0000000000..5b9f7b7898 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp +@@ -0,0 +1,61 @@ ++/* ++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP ++#define CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP ++ ++// These are the CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) \ ++ \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_STRUCTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++ ++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_TYPES_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++ ++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++#endif // CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp +new file mode 100644 +index 0000000000..eb8f075c71 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp +@@ -0,0 +1,85 @@ ++/* ++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "memory/allocation.inline.hpp" ++#include "runtime/os.inline.hpp" ++#include "vm_version_ext_loongarch.hpp" ++ ++// VM_Version_Ext statics ++int VM_Version_Ext::_no_of_threads = 0; ++int VM_Version_Ext::_no_of_cores = 0; ++int VM_Version_Ext::_no_of_sockets = 0; ++bool VM_Version_Ext::_initialized = false; ++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; ++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; ++ ++void VM_Version_Ext::initialize_cpu_information(void) { ++ // do nothing if cpu info has been initialized ++ if (_initialized) { ++ return; ++ } ++ ++ _no_of_cores = os::processor_count(); ++ _no_of_threads = _no_of_cores; ++ _no_of_sockets = _no_of_cores; ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "LoongArch"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "LoongArch %s", cpu_features()); ++ _initialized = true; ++} ++ ++int VM_Version_Ext::number_of_threads(void) { ++ initialize_cpu_information(); ++ return _no_of_threads; ++} ++ ++int VM_Version_Ext::number_of_cores(void) { ++ initialize_cpu_information(); ++ return _no_of_cores; ++} ++ ++int VM_Version_Ext::number_of_sockets(void) { ++ initialize_cpu_information(); ++ return _no_of_sockets; ++} ++ ++const char* VM_Version_Ext::cpu_name(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); ++ return tmp; ++} ++ ++const char* VM_Version_Ext::cpu_description(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); ++ return tmp; ++} +diff --git a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp +new file mode 100644 +index 0000000000..1a93123134 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP ++ ++#include "runtime/vm_version.hpp" ++#include "utilities/macros.hpp" ++ ++class VM_Version_Ext : public VM_Version { ++ private: ++ static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; ++ static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; ++ ++ static int _no_of_threads; ++ static int _no_of_cores; ++ static int _no_of_sockets; ++ static bool _initialized; ++ static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; ++ static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; ++ ++ public: ++ static int number_of_threads(void); ++ static int number_of_cores(void); ++ static int number_of_sockets(void); ++ ++ static const char* cpu_name(void); ++ static const char* cpu_description(void); ++ static void initialize_cpu_information(void); ++}; ++ ++#endif // CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp +new file mode 100644 +index 0000000000..0a9b55d17e +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp +@@ -0,0 +1,397 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/java.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/vm_version.hpp" ++#ifdef TARGET_OS_FAMILY_linux ++# include "os_linux.inline.hpp" ++#endif ++ ++#define T5 RT5 ++ ++const char* VM_Version::_features_str = ""; ++VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; ++bool VM_Version::_cpu_info_is_initialized = false; ++ ++static BufferBlob* stub_blob; ++static const int stub_size = 600; ++ ++extern "C" { ++ typedef void (*get_cpu_info_stub_t)(void*); ++} ++static get_cpu_info_stub_t get_cpu_info_stub = NULL; ++ ++ ++class VM_Version_StubGenerator: public StubCodeGenerator { ++ public: ++ ++ VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} ++ ++ address generate_get_cpu_info() { ++ assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized"); ++ StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); ++# define __ _masm-> ++ ++ address start = __ pc(); ++ ++ __ enter(); ++ __ push(AT); ++ __ push(T5); ++ ++ __ li(AT, (long)0); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); ++ ++ __ li(AT, 1); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); ++ ++ __ li(AT, 2); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); ++ ++ __ li(AT, 3); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id3_offset())); ++ ++ __ li(AT, 4); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id4_offset())); ++ ++ __ li(AT, 5); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id5_offset())); ++ ++ __ li(AT, 6); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id6_offset())); ++ ++ __ li(AT, 10); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id10_offset())); ++ ++ __ li(AT, 11); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id11_offset())); ++ ++ __ li(AT, 12); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id12_offset())); ++ ++ __ li(AT, 13); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id13_offset())); ++ ++ __ li(AT, 14); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id14_offset())); ++ ++ __ pop(T5); ++ __ pop(AT); ++ __ leave(); ++ __ jr(RA); ++# undef __ ++ return start; ++ }; ++}; ++ ++uint32_t VM_Version::get_feature_flags_by_cpucfg() { ++ uint32_t result = 0; ++ if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b00 || _cpuid_info.cpucfg_info_id1.bits.ARCH == 0b01 ) { ++ result |= CPU_LA32; ++ } else if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b10 ) { ++ result |= CPU_LA64; ++ } ++ ++ if (_cpuid_info.cpucfg_info_id2.bits.FP_CFG != 0) ++ result |= CPU_FP; ++ ++ if (_cpuid_info.cpucfg_info_id3.bits.CCDMA != 0) ++ result |= CPU_CCDMA; ++ if (_cpuid_info.cpucfg_info_id3.bits.LLDBAR != 0) ++ result |= CPU_LLDBAR; ++ if (_cpuid_info.cpucfg_info_id3.bits.SCDLY != 0) ++ result |= CPU_SCDLY; ++ if (_cpuid_info.cpucfg_info_id3.bits.LLEXC != 0) ++ result |= CPU_LLEXC; ++ ++ result |= CPU_ULSYNC; ++ ++ return result; ++} ++ ++void VM_Version::get_processor_features() { ++ ++ clean_cpuFeatures(); ++ ++ get_os_cpu_info(); ++ ++ get_cpu_info_stub(&_cpuid_info); ++ _features |= get_feature_flags_by_cpucfg(); ++ ++ _supports_cx8 = true; ++ ++ if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) { ++ FLAG_SET_CMDLINE(uintx, MaxGCPauseMillis, 650); ++ } ++ ++ if (supports_lsx()) { ++ if (FLAG_IS_DEFAULT(UseLSX)) { ++ FLAG_SET_DEFAULT(UseLSX, true); ++ } ++ } else if (UseLSX) { ++ warning("LSX instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLSX, false); ++ } ++ ++ if (supports_lasx()) { ++ if (FLAG_IS_DEFAULT(UseLASX)) { ++ FLAG_SET_DEFAULT(UseLASX, true); ++ } ++ } else if (UseLASX) { ++ warning("LASX instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLASX, false); ++ } ++ ++ if (UseLASX && !UseLSX) { ++ warning("LASX instructions depends on LSX, setting UseLASX to false"); ++ FLAG_SET_DEFAULT(UseLASX, false); ++ } ++ ++#ifdef COMPILER2 ++ int max_vector_size = 0; ++ int min_vector_size = 0; ++ if (UseLASX) { ++ max_vector_size = 32; ++ min_vector_size = 16; ++ } ++ else if (UseLSX) { ++ max_vector_size = 16; ++ min_vector_size = 16; ++ } ++ ++ if (!FLAG_IS_DEFAULT(MaxVectorSize)) { ++ if (MaxVectorSize == 0) { ++ // do nothing ++ } else if (MaxVectorSize > max_vector_size) { ++ warning("MaxVectorSize must be at most %i on this platform", max_vector_size); ++ FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); ++ } else if (MaxVectorSize < min_vector_size) { ++ warning("MaxVectorSize must be at least %i or 0 on this platform, setting to: %i", min_vector_size, min_vector_size); ++ FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); ++ } else if (!is_power_of_2(MaxVectorSize)) { ++ warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); ++ FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); ++ } ++ } else { ++ // If default, use highest supported configuration ++ FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); ++ } ++#endif ++ ++ char buf[256]; ++ ++ // A note on the _features_string format: ++ // There are jtreg tests checking the _features_string for various properties. ++ // For some strange reason, these tests require the string to contain ++ // only _lowercase_ characters. Keep that in mind when being surprised ++ // about the unusual notation of features - and when adding new ones. ++ // Features may have one comma at the end. ++ // Furthermore, use one, and only one, separator space between features. ++ // Multiple spaces are considered separate tokens, messing up everything. ++ jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, " ++ "0x%lx, fp_ver: %d, lvz_ver: %d, ", ++ (is_la64() ? "la64" : ""), ++ (is_la32() ? "la32" : ""), ++ (supports_lsx() ? ", lsx" : ""), ++ (supports_lasx() ? ", lasx" : ""), ++ (supports_crypto() ? ", crypto" : ""), ++ (supports_lam() ? ", am" : ""), ++ (supports_ual() ? ", ual" : ""), ++ (supports_lldbar() ? ", lldbar" : ""), ++ (supports_scdly() ? ", scdly" : ""), ++ (supports_llexc() ? ", llexc" : ""), ++ (supports_lbt_x86() ? ", lbt_x86" : ""), ++ (supports_lbt_arm() ? ", lbt_arm" : ""), ++ (supports_lbt_mips() ? ", lbt_mips" : ""), ++ (needs_llsync() ? ", needs_llsync" : ""), ++ (needs_tgtsync() ? ", needs_tgtsync": ""), ++ (needs_ulsync() ? ", needs_ulsync": ""), ++ _cpuid_info.cpucfg_info_id0.bits.PRID, ++ _cpuid_info.cpucfg_info_id2.bits.FP_VER, ++ _cpuid_info.cpucfg_info_id2.bits.LVZ_VER); ++ _features_str = strdup(buf); ++ ++ assert(!is_la32(), "Should Not Reach Here, what is the cpu type?"); ++ assert( is_la64(), "Should be LoongArch64"); ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchLines, 3); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchDistance, 192); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) { ++ FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1); ++ } ++ ++ // Basic instructions are used to implement SHA Intrinsics on LA, so sha ++ // instructions support is not needed. ++ if (/*supports_crypto()*/ 1) { ++ if (FLAG_IS_DEFAULT(UseSHA)) { ++ FLAG_SET_DEFAULT(UseSHA, true); ++ } ++ } else if (UseSHA) { ++ warning("SHA instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseSHA, false); ++ } ++ ++ if (UseSHA/* && supports_crypto()*/) { ++ if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { ++ FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); ++ } ++ } else if (UseSHA1Intrinsics) { ++ warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); ++ } ++ ++ if (UseSHA/* && supports_crypto()*/) { ++ if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { ++ FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); ++ } ++ } else if (UseSHA256Intrinsics) { ++ warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); ++ } ++ ++ if (UseSHA512Intrinsics) { ++ warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); ++ } ++ ++ if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { ++ FLAG_SET_DEFAULT(UseSHA, false); ++ } ++ ++ // Basic instructions are used to implement AES Intrinsics on LA, so AES ++ // instructions support is not needed. ++ if (/*supports_crypto()*/ 1) { ++ if (FLAG_IS_DEFAULT(UseAES)) { ++ FLAG_SET_DEFAULT(UseAES, true); ++ } ++ } else if (UseAES) { ++ if (!FLAG_IS_DEFAULT(UseAES)) ++ warning("AES instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAES, false); ++ } ++ ++ if (UseAES/* && supports_crypto()*/) { ++ if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { ++ FLAG_SET_DEFAULT(UseAESIntrinsics, true); ++ } ++ } else if (UseAESIntrinsics) { ++ if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) ++ warning("AES intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAESIntrinsics, false); ++ } ++ ++ if (UseAESCTRIntrinsics) { ++ warning("AES/CTR intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseCRC32)) { ++ FLAG_SET_DEFAULT(UseCRC32, true); ++ } ++ ++ if (UseCRC32) { ++ if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { ++ UseCRC32Intrinsics = true; ++ } ++ ++ if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { ++ UseCRC32CIntrinsics = true; ++ } ++ } ++ ++#ifdef COMPILER2 ++ if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { ++ FLAG_SET_DEFAULT(UseMulAddIntrinsic, true); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { ++ UseMontgomeryMultiplyIntrinsic = true; ++ } ++ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { ++ UseMontgomerySquareIntrinsic = true; ++ } ++#endif ++ ++ // This machine allows unaligned memory accesses ++ if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { ++ FLAG_SET_DEFAULT(UseUnalignedAccesses, true); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseFMA)) { ++ FLAG_SET_DEFAULT(UseFMA, true); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { ++ FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); ++ } ++ ++ UNSUPPORTED_OPTION(CriticalJNINatives); ++} ++ ++void VM_Version::initialize() { ++ ResourceMark rm; ++ // Making this stub must be FIRST use of assembler ++ ++ stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size); ++ if (stub_blob == NULL) { ++ vm_exit_during_initialization("Unable to allocate get_cpu_info_stub"); ++ } ++ CodeBuffer c(stub_blob); ++ VM_Version_StubGenerator g(&c); ++ get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, ++ g.generate_get_cpu_info()); ++ ++ get_processor_features(); ++} +diff --git a/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp +new file mode 100644 +index 0000000000..00b8e608a1 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp +@@ -0,0 +1,292 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP ++ ++#include "runtime/abstract_vm_version.hpp" ++#include "runtime/globals_extension.hpp" ++#include "utilities/sizes.hpp" ++ ++class VM_Version: public Abstract_VM_Version { ++ friend class JVMCIVMStructs; ++ ++public: ++ ++ union LoongArch_Cpucfg_Id0 { ++ uint32_t value; ++ struct { ++ uint32_t PRID : 32; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id1 { ++ uint32_t value; ++ struct { ++ uint32_t ARCH : 2, ++ PGMMU : 1, ++ IOCSR : 1, ++ PALEN : 8, ++ VALEN : 8, ++ UAL : 1, // unaligned access ++ RI : 1, ++ EP : 1, ++ RPLV : 1, ++ HP : 1, ++ IOCSR_BRD : 1, ++ MSG_INT : 1, ++ : 5; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id2 { ++ uint32_t value; ++ struct { ++ uint32_t FP_CFG : 1, // FP is used, use FP_CFG instead ++ FP_SP : 1, ++ FP_DP : 1, ++ FP_VER : 3, ++ LSX : 1, ++ LASX : 1, ++ COMPLEX : 1, ++ CRYPTO : 1, ++ LVZ : 1, ++ LVZ_VER : 3, ++ LLFTP : 1, ++ LLFTP_VER : 3, ++ LBT_X86 : 1, ++ LBT_ARM : 1, ++ LBT_MIPS : 1, ++ LSPW : 1, ++ LAM : 1, ++ : 9; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id3 { ++ uint32_t value; ++ struct { ++ uint32_t CCDMA : 1, ++ SFB : 1, ++ UCACC : 1, ++ LLEXC : 1, ++ SCDLY : 1, ++ LLDBAR : 1, ++ ITLBHMC : 1, ++ ICHMC : 1, ++ SPW_LVL : 3, ++ SPW_HP_HF : 1, ++ RVA : 1, ++ RVAMAXM1 : 4, ++ : 15; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id4 { ++ uint32_t value; ++ struct { ++ uint32_t CC_FREQ : 32; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id5 { ++ uint32_t value; ++ struct { ++ uint32_t CC_MUL : 16, ++ CC_DIV : 16; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id6 { ++ uint32_t value; ++ struct { ++ uint32_t PMP : 1, ++ PMVER : 3, ++ PMNUM : 4, ++ PMBITS : 6, ++ UPM : 1, ++ : 17; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id10 { ++ uint32_t value; ++ struct { ++ uint32_t L1IU_PRESENT : 1, ++ L1IU_UNIFY : 1, ++ L1D_PRESENT : 1, ++ L2IU_PRESENT : 1, ++ L2IU_UNIFY : 1, ++ L2IU_PRIVATE : 1, ++ L2IU_INCLUSIVE : 1, ++ L2D_PRESENT : 1, ++ L2D_PRIVATE : 1, ++ L2D_INCLUSIVE : 1, ++ L3IU_PRESENT : 1, ++ L3IU_UNIFY : 1, ++ L3IU_PRIVATE : 1, ++ L3IU_INCLUSIVE : 1, ++ L3D_PRESENT : 1, ++ L3D_PRIVATE : 1, ++ L3D_INCLUSIVE : 1, ++ : 15; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id11 { ++ uint32_t value; ++ struct { ++ uint32_t WAYM1 : 16, ++ INDEXMLOG2 : 8, ++ LINESIZELOG2 : 7, ++ : 1; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id12 { ++ uint32_t value; ++ struct { ++ uint32_t WAYM1 : 16, ++ INDEXMLOG2 : 8, ++ LINESIZELOG2 : 7, ++ : 1; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id13 { ++ uint32_t value; ++ struct { ++ uint32_t WAYM1 : 16, ++ INDEXMLOG2 : 8, ++ LINESIZELOG2 : 7, ++ : 1; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id14 { ++ uint32_t value; ++ struct { ++ uint32_t WAYM1 : 16, ++ INDEXMLOG2 : 8, ++ LINESIZELOG2 : 7, ++ : 1; ++ } bits; ++ }; ++ ++protected: ++ ++ enum { ++ CPU_LAM = (1 << 1), ++ CPU_UAL = (1 << 2), ++ CPU_LSX = (1 << 4), ++ CPU_LASX = (1 << 5), ++ CPU_COMPLEX = (1 << 7), ++ CPU_CRYPTO = (1 << 8), ++ CPU_LBT_X86 = (1 << 10), ++ CPU_LBT_ARM = (1 << 11), ++ CPU_LBT_MIPS = (1 << 12), ++ // flags above must follow Linux HWCAP ++ CPU_LA32 = (1 << 13), ++ CPU_LA64 = (1 << 14), ++ CPU_FP = (1 << 15), ++ CPU_LLEXC = (1 << 16), ++ CPU_SCDLY = (1 << 17), ++ CPU_LLDBAR = (1 << 18), ++ CPU_CCDMA = (1 << 19), ++ CPU_LLSYNC = (1 << 20), ++ CPU_TGTSYNC = (1 << 21), ++ CPU_ULSYNC = (1 << 22), ++ ++ //////////////////////add some other feature here////////////////// ++ } cpuFeatureFlags; ++ ++ static const char* _features_str; ++ static bool _cpu_info_is_initialized; ++ ++ struct CpuidInfo { ++ LoongArch_Cpucfg_Id0 cpucfg_info_id0; ++ LoongArch_Cpucfg_Id1 cpucfg_info_id1; ++ LoongArch_Cpucfg_Id2 cpucfg_info_id2; ++ LoongArch_Cpucfg_Id3 cpucfg_info_id3; ++ LoongArch_Cpucfg_Id4 cpucfg_info_id4; ++ LoongArch_Cpucfg_Id5 cpucfg_info_id5; ++ LoongArch_Cpucfg_Id6 cpucfg_info_id6; ++ LoongArch_Cpucfg_Id10 cpucfg_info_id10; ++ LoongArch_Cpucfg_Id11 cpucfg_info_id11; ++ LoongArch_Cpucfg_Id12 cpucfg_info_id12; ++ LoongArch_Cpucfg_Id13 cpucfg_info_id13; ++ LoongArch_Cpucfg_Id14 cpucfg_info_id14; ++ }; ++ ++ // The actual cpuid info block ++ static CpuidInfo _cpuid_info; ++ ++ static uint32_t get_feature_flags_by_cpucfg(); ++ static void get_processor_features(); ++ static void get_os_cpu_info(); ++ ++public: ++ // Offsets for cpuid asm stub ++ static ByteSize Loongson_Cpucfg_id0_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id0); } ++ static ByteSize Loongson_Cpucfg_id1_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id1); } ++ static ByteSize Loongson_Cpucfg_id2_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id2); } ++ static ByteSize Loongson_Cpucfg_id3_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id3); } ++ static ByteSize Loongson_Cpucfg_id4_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id4); } ++ static ByteSize Loongson_Cpucfg_id5_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id5); } ++ static ByteSize Loongson_Cpucfg_id6_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id6); } ++ static ByteSize Loongson_Cpucfg_id10_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id10); } ++ static ByteSize Loongson_Cpucfg_id11_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id11); } ++ static ByteSize Loongson_Cpucfg_id12_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id12); } ++ static ByteSize Loongson_Cpucfg_id13_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id13); } ++ static ByteSize Loongson_Cpucfg_id14_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id14); } ++ ++ static void clean_cpuFeatures() { _features = 0; } ++ ++ // Initialization ++ static void initialize(); ++ ++ static bool cpu_info_is_initialized() { return _cpu_info_is_initialized; } ++ ++ static bool is_la32() { return _features & CPU_LA32; } ++ static bool is_la64() { return _features & CPU_LA64; } ++ static bool supports_crypto() { return _features & CPU_CRYPTO; } ++ static bool supports_lsx() { return _features & CPU_LSX; } ++ static bool supports_lasx() { return _features & CPU_LASX; } ++ static bool supports_lam() { return _features & CPU_LAM; } ++ static bool supports_llexc() { return _features & CPU_LLEXC; } ++ static bool supports_scdly() { return _features & CPU_SCDLY; } ++ static bool supports_lldbar() { return _features & CPU_LLDBAR; } ++ static bool supports_ual() { return _features & CPU_UAL; } ++ static bool supports_lbt_x86() { return _features & CPU_LBT_X86; } ++ static bool supports_lbt_arm() { return _features & CPU_LBT_ARM; } ++ static bool supports_lbt_mips() { return _features & CPU_LBT_MIPS; } ++ static bool needs_llsync() { return !supports_lldbar(); } ++ static bool needs_tgtsync() { return 1; } ++ static bool needs_ulsync() { return 1; } ++ ++ static const char* cpu_features() { return _features_str; } ++}; ++ ++#endif // CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp +new file mode 100644 +index 0000000000..43caba5187 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp +@@ -0,0 +1,53 @@ ++/* ++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "code/vmreg.hpp" ++ ++ ++ ++void VMRegImpl::set_regName() { ++ Register reg = ::as_Register(0); ++ int i; ++ for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) { ++ for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) { ++ regName[i++] = reg->name(); ++ } ++ reg = reg->successor(); ++ } ++ ++ FloatRegister freg = ::as_FloatRegister(0); ++ for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { ++ for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) { ++ regName[i++] = freg->name(); ++ } ++ freg = freg->successor(); ++ } ++ ++ for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) { ++ regName[i] = "NON-GPR-FPR"; ++ } ++} +diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp +new file mode 100644 +index 0000000000..819eaff0bb +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp +@@ -0,0 +1,58 @@ ++/* ++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VMREG_LOONGARCH_HPP ++#define CPU_LOONGARCH_VMREG_LOONGARCH_HPP ++ ++inline bool is_Register() { ++ return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; ++} ++ ++inline Register as_Register() { ++ assert( is_Register(), "must be"); ++ return ::as_Register(value() / RegisterImpl::max_slots_per_register); ++} ++ ++inline bool is_FloatRegister() { ++ return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; ++} ++ ++inline FloatRegister as_FloatRegister() { ++ assert( is_FloatRegister() && is_even(value()), "must be" ); ++ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) / ++ FloatRegisterImpl::max_slots_per_register); ++} ++ ++inline bool is_concrete() { ++ assert(is_reg(), "must be"); ++ if (is_FloatRegister()) { ++ int base = value() - ConcreteRegisterImpl::max_gpr; ++ return base % FloatRegisterImpl::max_slots_per_register == 0; ++ } else { ++ return is_even(value()); ++ } ++} ++ ++#endif // CPU_LOONGARCH_VMREG_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp +new file mode 100644 +index 0000000000..edb78e36da +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp +@@ -0,0 +1,39 @@ ++/* ++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP ++ ++inline VMReg RegisterImpl::as_VMReg() { ++ if( this==noreg ) return VMRegImpl::Bad(); ++ return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register); ++} ++ ++inline VMReg FloatRegisterImpl::as_VMReg() { ++ return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) + ++ ConcreteRegisterImpl::max_gpr); ++} ++ ++#endif // CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP +diff --git a/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp b/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp +new file mode 100644 +index 0000000000..2c4b60653b +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp +@@ -0,0 +1,322 @@ ++/* ++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/vtableStubs.hpp" ++#include "interp_masm_loongarch.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/klassVtable.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_loongarch.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++ ++// machine-dependent part of VtableStubs: create VtableStub of correct size and ++// initialize its code ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++#ifndef PRODUCT ++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); ++#endif ++ ++// used by compiler only; reciever in T0. ++// used registers : ++// Rmethod : receiver klass & method ++// NOTE: If this code is used by the C1, the receiver_location is always 0. ++// when reach here, receiver in T0, klass in T8 ++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { ++ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. ++ const int stub_code_length = code_size_limit(true); ++ VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); ++ // Can be NULL if there is no free space in the code cache. ++ if (s == NULL) { ++ return NULL; ++ } ++ ++ // Count unused bytes in instruction sequences of variable size. ++ // We add them to the computed buffer size in order to avoid ++ // overflow in subsequently generated stubs. ++ address start_pc; ++ int slop_bytes = 0; ++ int slop_delta = 0; ++ int load_const_maxLen = 4*BytesPerInstWord; // load_const generates 4 instructions. Assume that as max size for li ++ // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation. ++ const int index_dependent_slop = 0; ++ ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), stub_code_length); ++ MacroAssembler* masm = new MacroAssembler(&cb); ++ Register t1 = T8, t2 = Rmethod; ++#if (!defined(PRODUCT) && defined(COMPILER2)) ++ if (CountCompiledCalls) { ++ start_pc = __ pc(); ++ __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ ld_w(t1, AT , 0); ++ __ addi_w(t1, t1, 1); ++ __ st_w(t1, AT,0); ++ } ++#endif ++ ++ // get receiver (need to skip return address on top of stack) ++ //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0"); ++ ++ // get receiver klass ++ address npe_addr = __ pc(); ++ __ load_klass(t1, T0); ++ ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ // check offset vs vtable length ++ __ ld_w(t2, t1, in_bytes(Klass::vtable_length_offset())); ++ assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code"); ++ __ li(AT, vtable_index*vtableEntry::size()); ++ __ blt(AT, t2, L); ++ __ li(A2, vtable_index); ++ __ move(A1, A0); ++ ++ // VTABLE TODO: find upper bound for call_VM length. ++ start_pc = __ pc(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2); ++ const ptrdiff_t estimate = 512; ++ const ptrdiff_t codesize = __ pc() - start_pc; ++ slop_delta = estimate - codesize; // call_VM varies in length, depending on data ++ assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize); ++ __ bind(L); ++ } ++#endif // PRODUCT ++ const Register method = Rmethod; ++ ++ // load methodOop and target address ++ start_pc = __ pc(); ++ // lookup_virtual_method generates 6 instructions (worst case) ++ __ lookup_virtual_method(t1, vtable_index, method); ++ slop_delta = 6*BytesPerInstWord - (int)(__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ __ beq(method, R0, L); ++ __ ld_d(AT, method,in_bytes(Method::from_compiled_offset())); ++ __ bne(AT, R0, L); ++ __ stop("Vtable entry is NULL"); ++ __ bind(L); ++ } ++#endif // PRODUCT ++ ++ // T8: receiver klass ++ // T0: receiver ++ // Rmethod: methodOop ++ // T4: entry ++ address ame_addr = __ pc(); ++ __ ld_ptr(T4, method,in_bytes(Method::from_compiled_offset())); ++ __ jr(T4); ++ masm->flush(); ++ slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets ++ bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop); ++ ++ return s; ++} ++ ++ ++// used registers : ++// T1 T2 ++// when reach here, the receiver in T0, klass in T1 ++VtableStub* VtableStubs::create_itable_stub(int itable_index) { ++ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. ++ const int stub_code_length = code_size_limit(false); ++ VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); ++ // Can be NULL if there is no free space in the code cache. ++ if (s == NULL) { ++ return NULL; ++ } ++ // Count unused bytes in instruction sequences of variable size. ++ // We add them to the computed buffer size in order to avoid ++ // overflow in subsequently generated stubs. ++ address start_pc; ++ int slop_bytes = 0; ++ int slop_delta = 0; ++ int load_const_maxLen = 4*BytesPerInstWord; // load_const generates 4 instructions. Assume that as max size for li ++ ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), stub_code_length); ++ MacroAssembler *masm = new MacroAssembler(&cb); ++ ++ // we use T8, T4, T2 as temparary register, they are free from register allocator ++ Register t1 = T8, t2 = T2, t3 = T4; ++ // Entry arguments: ++ // T1: Interface ++ // T0: Receiver ++ ++#if (!defined(PRODUCT) && defined(COMPILER2)) ++ if (CountCompiledCalls) { ++ start_pc = __ pc(); ++ __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ ld_w(T8, AT, 0); ++ __ addi_w(T8, T8, 1); ++ __ st_w(T8, AT, 0); ++ } ++#endif // PRODUCT ++ ++ const Register holder_klass_reg = T1; // declaring interface klass (DECC) ++ const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC) ++ const Register icholder_reg = T1; ++ ++ Label L_no_such_interface; ++ ++ __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset()); ++ __ ld_ptr(holder_klass_reg, icholder_reg, CompiledICHolder::holder_metadata_offset()); ++ ++ // get receiver klass (also an implicit null-check) ++ address npe_addr = __ pc(); ++ __ load_klass(t1, T0); ++ ++ // x86 use lookup_interface_method, but lookup_interface_method makes more instructions. ++ // No dynamic code size variance here, so slop_bytes is not needed. ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); ++ assert(Assembler::is_simm16(base), "change this code"); ++ __ addi_d(t2, t1, base); ++ __ ld_w(AT, t1, in_bytes(Klass::vtable_length_offset())); ++ __ alsl_d(t2, AT, t2, Address::times_8 - 1); ++ ++ __ move(t3, t2); ++ { ++ Label hit, entry; ++ ++ __ ld_ptr(AT, t3, itableOffsetEntry::interface_offset_in_bytes()); ++ __ beq(AT, resolved_klass_reg, hit); ++ ++ __ bind(entry); ++ // Check that the entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ __ beqz(AT, L_no_such_interface); ++ ++ __ addi_d(t3, t3, itableOffsetEntry::size() * wordSize); ++ __ ld_ptr(AT, t3, itableOffsetEntry::interface_offset_in_bytes()); ++ __ bne(AT, resolved_klass_reg, entry); ++ ++ __ bind(hit); ++ } ++ ++ { ++ Label hit, entry; ++ ++ __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); ++ __ beq(AT, holder_klass_reg, hit); ++ ++ __ bind(entry); ++ // Check that the entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ __ beqz(AT, L_no_such_interface); ++ ++ __ addi_d(t2, t2, itableOffsetEntry::size() * wordSize); ++ __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); ++ __ bne(AT, holder_klass_reg, entry); ++ ++ __ bind(hit); ++ } ++ ++ // We found a hit, move offset into T4 ++ __ ld_wu(t2, t2, itableOffsetEntry::offset_offset_in_bytes()); ++ ++ // Compute itableMethodEntry. ++ const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) + ++ itableMethodEntry::method_offset_in_bytes(); ++ ++ // Get methodOop and entrypoint for compiler ++ const Register method = Rmethod; ++ ++ start_pc = __ pc(); ++ __ li(AT, method_offset); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ add_d(AT, AT, t2); ++ __ ldx_d(method, t1, AT); ++ ++#ifdef ASSERT ++ if (DebugVtables) { ++ Label L1; ++ __ beq(method, R0, L1); ++ __ ld_d(AT, method,in_bytes(Method::from_compiled_offset())); ++ __ bne(AT, R0, L1); ++ __ stop("methodOop is null"); ++ __ bind(L1); ++ } ++#endif // ASSERT ++ ++ // Rmethod: methodOop ++ // T0: receiver ++ // T4: entry point ++ address ame_addr = __ pc(); ++ __ ld_ptr(T4, method, in_bytes(Method::from_compiled_offset())); ++ __ jr(T4); ++ ++ __ bind(L_no_such_interface); ++ // Handle IncompatibleClassChangeError in itable stubs. ++ // More detailed error message. ++ // We force resolving of the call site by jumping to the "handle ++ // wrong method" stub, and so let the interpreter runtime do all the ++ // dirty work. ++ assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order"); ++ __ jmp((address)SharedRuntime::get_handle_wrong_method_stub(), relocInfo::runtime_call_type); ++ ++ masm->flush(); ++ bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0); ++ ++ return s; ++} ++ ++// NOTE : whenever you change the code above, dont forget to change the const here ++int VtableStub::pd_code_alignment() { ++ const unsigned int icache_line_size = wordSize; ++ return icache_line_size; ++} +diff --git a/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp b/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp +new file mode 100644 +index 0000000000..73f021c9b7 +--- /dev/null ++++ b/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp +@@ -0,0 +1,132 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "ci/ciMethod.hpp" ++#include "interpreter/interpreter.hpp" ++#include "runtime/frame.inline.hpp" ++ ++// asm based interpreter deoptimization helpers ++int AbstractInterpreter::size_activation(int max_stack, ++ int temps, ++ int extra_args, ++ int monitors, ++ int callee_params, ++ int callee_locals, ++ bool is_top_frame) { ++ // Note: This calculation must exactly parallel the frame setup ++ // in AbstractInterpreterGenerator::generate_method_entry. ++ ++ // fixed size of an interpreter frame: ++ int overhead = frame::sender_sp_offset - ++ frame::interpreter_frame_initial_sp_offset; ++ // Our locals were accounted for by the caller (or last_frame_adjust ++ // on the transistion) Since the callee parameters already account ++ // for the callee's params we only need to account for the extra ++ // locals. ++ int size = overhead + ++ (callee_locals - callee_params)*Interpreter::stackElementWords + ++ monitors * frame::interpreter_frame_monitor_size() + ++ temps* Interpreter::stackElementWords + extra_args; ++ ++ return size; ++} ++ ++// How much stack a method activation needs in words. ++int AbstractInterpreter::size_top_interpreter_activation(Method* method) { ++ ++ const int entry_size = frame::interpreter_frame_monitor_size(); ++ ++ // total overhead size: entry_size + (saved ebp thru expr stack bottom). ++ // be sure to change this if you add/subtract anything to/from the overhead area ++ const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size; ++ ++ const int stub_code = 6; // see generate_call_stub ++ // return overhead_size + method->max_locals() + method->max_stack() + stub_code; ++ const int method_stack = (method->max_locals() + method->max_stack()) * ++ Interpreter::stackElementWords; ++ return overhead_size + method_stack + stub_code; ++} ++ ++void AbstractInterpreter::layout_activation(Method* method, ++ int tempcount, ++ int popframe_extra_args, ++ int moncount, ++ int caller_actual_parameters, ++ int callee_param_count, ++ int callee_locals, ++ frame* caller, ++ frame* interpreter_frame, ++ bool is_top_frame, ++ bool is_bottom_frame) { ++ // Note: This calculation must exactly parallel the frame setup ++ // in AbstractInterpreterGenerator::generate_method_entry. ++ // If interpreter_frame!=NULL, set up the method, locals, and monitors. ++ // The frame interpreter_frame, if not NULL, is guaranteed to be the ++ // right size, as determined by a previous call to this method. ++ // It is also guaranteed to be walkable even though it is in a skeletal state ++ ++ // fixed size of an interpreter frame: ++ ++ int max_locals = method->max_locals() * Interpreter::stackElementWords; ++ int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords; ++ ++#ifdef ASSERT ++ assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); ++#endif ++ ++ interpreter_frame->interpreter_frame_set_method(method); ++ // NOTE the difference in using sender_sp and interpreter_frame_sender_sp ++ // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) ++ // and sender_sp is fp+8 ++ intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; ++ ++#ifdef ASSERT ++ if (caller->is_interpreted_frame()) { ++ assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); ++ } ++#endif ++ ++ interpreter_frame->interpreter_frame_set_locals(locals); ++ BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); ++ BasicObjectLock* monbot = montop - moncount; ++ interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount); ++ ++ //set last sp; ++ intptr_t* esp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords - ++ popframe_extra_args; ++ interpreter_frame->interpreter_frame_set_last_sp(esp); ++ // All frames but the initial interpreter frame we fill in have a ++ // value for sender_sp that allows walking the stack but isn't ++ // truly correct. Correct the value here. ++ // ++ if (extra_locals != 0 && ++ interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) { ++ interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals); ++ } ++ *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache(); ++ *interpreter_frame->interpreter_frame_mirror_addr() = method->method_holder()->java_mirror(); ++} ++ +diff --git a/src/hotspot/cpu/mips/assembler_mips.cpp b/src/hotspot/cpu/mips/assembler_mips.cpp +new file mode 100644 +index 0000000000..c8c7a5d4df +--- /dev/null ++++ b/src/hotspot/cpu/mips/assembler_mips.cpp +@@ -0,0 +1,759 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "runtime/os.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/macros.hpp" ++#ifndef PRODUCT ++#include "compiler/disassembler.hpp" ++#endif ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) block_comment(str) ++#define STOP(error) block_comment(error); stop(error) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++// Implementation of AddressLiteral ++ ++AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { ++ _is_lval = false; ++ _target = target; ++ _rspec = rspec_from_rtype(rtype, target); ++} ++ ++// Implementation of Address ++ ++Address Address::make_array(ArrayAddress adr) { ++ AddressLiteral base = adr.base(); ++ Address index = adr.index(); ++ assert(index._disp == 0, "must not have disp"); // maybe it can? ++ Address array(index._base, index._index, index._scale, (intptr_t) base.target()); ++ array._rspec = base._rspec; ++ return array; ++} ++ ++// exceedingly dangerous constructor ++Address::Address(address loc, RelocationHolder spec) { ++ _base = noreg; ++ _index = noreg; ++ _scale = no_scale; ++ _disp = (intptr_t) loc; ++ _rspec = spec; ++} ++ ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of Assembler ++const char *Assembler::ops_name[] = { ++ "special", "regimm", "j", "jal", "beq", "bne", "blez", "bgtz", ++ "addi", "addiu", "slti", "sltiu", "andi", "ori", "xori", "lui", ++ "cop0", "cop1", "cop2", "cop3", "beql", "bnel", "bleql", "bgtzl", ++ "daddi", "daddiu", "ldl", "ldr", "", "", "", "", ++ "lb", "lh", "lwl", "lw", "lbu", "lhu", "lwr", "lwu", ++ "sb", "sh", "swl", "sw", "sdl", "sdr", "swr", "cache", ++ "ll", "lwc1", "", "", "lld", "ldc1", "", "ld", ++ "sc", "swc1", "", "", "scd", "sdc1", "", "sd" ++}; ++ ++const char* Assembler::special_name[] = { ++ "sll", "", "srl", "sra", "sllv", "", "srlv", "srav", ++ "jr", "jalr", "movz", "movn", "syscall", "break", "", "sync", ++ "mfhi", "mthi", "mflo", "mtlo", "dsll", "", "dsrl", "dsra", ++ "mult", "multu", "div", "divu", "dmult", "dmultu", "ddiv", "ddivu", ++ "add", "addu", "sub", "subu", "and", "or", "xor", "nor", ++ "", "", "slt", "sltu", "dadd", "daddu", "dsub", "dsubu", ++ "tge", "tgeu", "tlt", "tltu", "teq", "", "tne", "", ++ "dsll", "", "dsrl", "dsra", "dsll32", "", "dsrl32", "dsra32" ++}; ++ ++const char* Assembler::cop1_name[] = { ++ "add", "sub", "mul", "div", "sqrt", "abs", "mov", "neg", ++ "round.l", "trunc.l", "ceil.l", "floor.l", "round.w", "trunc.w", "ceil.w", "floor.w", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "c.f", "c.un", "c.eq", "c.ueq", "c.olt", "c.ult", "c.ole", "c.ule", ++ "c.sf", "c.ngle", "c.seq", "c.ngl", "c.lt", "c.nge", "c.le", "c.ngt" ++}; ++ ++const char* Assembler::cop1x_name[] = { ++ "lwxc1", "ldxc1", "", "", "", "luxc1", "", "", ++ "swxc1", "sdxc1", "", "", "", "suxc1", "", "prefx", ++ "", "", "", "", "", "", "alnv.ps", "", ++ "", "", "", "", "", "", "", "", ++ "madd.s", "madd.d", "", "", "", "", "madd.ps", "", ++ "msub.s", "msub.d", "", "", "", "", "msub.ps", "", ++ "nmadd.s", "nmadd.d", "", "", "", "", "nmadd.ps", "", ++ "nmsub.s", "nmsub.d", "", "", "", "", "nmsub.ps", "" ++}; ++ ++const char* Assembler::special2_name[] = { ++ "madd", "", "mul", "", "msub", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "gsdmult", "", "", "gsdiv", "gsddiv", "", "", ++ "", "", "", "", "gsmod", "gsdmod", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "" ++}; ++ ++const char* Assembler::special3_name[] = { ++ "ext", "", "", "", "ins", "dinsm", "dinsu", "dins", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "bshfl", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++}; ++ ++const char* Assembler::regimm_name[] = { ++ "bltz", "bgez", "bltzl", "bgezl", "", "", "", "", ++ "tgei", "tgeiu", "tlti", "tltiu", "teqi", "", "tnei", "", ++ "bltzal", "bgezal", "bltzall", "bgezall" ++}; ++ ++const char* Assembler::gs_ldc2_name[] = { ++ "gslbx", "gslhx", "gslwx", "gsldx", "", "", "gslwxc1", "gsldxc1" ++}; ++ ++ ++const char* Assembler::gs_lwc2_name[] = { ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "gslble", "gslbgt", "gslhle", "gslhgt", "gslwle", "gslwgt", "gsldle", "gsldgt", ++ "", "", "", "gslwlec1", "gslwgtc1", "gsldlec1", "gsldgtc1", "",/*LWDIR, LWPTE, LDDIR and LDPTE have the same low 6 bits.*/ ++ "gslq", "" ++}; ++ ++const char* Assembler::gs_sdc2_name[] = { ++ "gssbx", "gsshx", "gsswx", "gssdx", "", "", "gsswxc1", "gssdxc1" ++}; ++ ++const char* Assembler::gs_swc2_name[] = { ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "gssble", "gssbgt", "gsshle", "gsshgt", "gsswle", "gsswgt", "gssdle", "gssdgt", ++ "", "", "", "", "gsswlec1", "gsswgtc1", "gssdlec1", "gssdgtc1", ++ "gssq", "" ++}; ++ ++//misleading name, print only branch/jump instruction ++void Assembler::print_instruction(int inst) { ++ const char *s; ++ switch( opcode(inst) ) { ++ default: ++ s = ops_name[opcode(inst)]; ++ break; ++ case special_op: ++ s = special_name[special(inst)]; ++ break; ++ case regimm_op: ++ s = special_name[rt(inst)]; ++ break; ++ } ++ ++ ::tty->print("%s", s); ++} ++ ++int Assembler::is_int_mask(int x) { ++ int xx = x; ++ int count = 0; ++ ++ while (x != 0) { ++ x &= (x - 1); ++ count++; ++ } ++ ++ if ((1<>2; ++ switch(opcode(inst)) { ++ case j_op: ++ case jal_op: ++ case lui_op: ++ case ori_op: ++ case daddiu_op: ++ ShouldNotReachHere(); ++ break; ++ default: ++ assert(is_simm16(v), "must be simm16"); ++#ifndef PRODUCT ++ if (!is_simm16(v)) { ++ tty->print_cr("must be simm16"); ++ tty->print_cr("Inst: %x", inst); ++ } ++#endif ++ ++ v = low16(v); ++ inst &= 0xffff0000; ++ break; ++ } ++ ++ return inst | v; ++} ++ ++int Assembler::branch_destination(int inst, int pos) { ++ int off = 0; ++ ++ switch(opcode(inst)) { ++ case j_op: ++ case jal_op: ++ assert(false, "should not use j/jal here"); ++ break; ++ default: ++ off = expand(low16(inst), 15); ++ break; ++ } ++ ++ return off ? pos + 4 + (off<<2) : 0; ++} ++ ++int AbstractAssembler::code_fill_byte() { ++ return 0x00; // illegal instruction 0x00000000 ++} ++ ++// Now the Assembler instruction (identical for 32/64 bits) ++ ++void Assembler::lb(Register rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lb(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lbu(Register rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lbu(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ld(Register rt, Address dst){ ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (Assembler::is_simm16(disp)) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ gsldx(src, base, index, disp); ++ } else { ++ dsll(AT, index, scale); ++ gsldx(src, base, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ ld(src, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gsldx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ ld(src, AT, 0); ++ } ++ } else { ++ assert_different_registers(src, AT); ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(src, split_low(disp >> 16)); ++ if (split_low(disp)) ori(src, src, split_low(disp)); ++ if (UseLEXT1) { ++ gsldx(src, AT, src, 0); ++ } else { ++ daddu(AT, AT, src); ++ ld(src, AT, 0); ++ } ++ } ++ } ++ } else { ++ if (Assembler::is_simm16(disp)) { ++ ld(src, base, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gsldx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ ld(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::ldl(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ldl(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ldr(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ldr(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lh(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lh(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lhu(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lhu(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ll(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ll(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lld(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lld(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lw(Register rt, Address dst){ ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (Assembler::is_simm16(disp)) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ gslwx(src, base, index, disp); ++ } else { ++ dsll(AT, index, scale); ++ gslwx(src, base, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ lw(src, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gslwx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ lw(src, AT, 0); ++ } ++ } else { ++ assert_different_registers(src, AT); ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(src, split_low(disp >> 16)); ++ if (split_low(disp)) ori(src, src, split_low(disp)); ++ if (UseLEXT1) { ++ gslwx(src, AT, src, 0); ++ } else { ++ daddu(AT, AT, src); ++ lw(src, AT, 0); ++ } ++ } ++ } ++ } else { ++ if (Assembler::is_simm16(disp)) { ++ lw(src, base, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gslwx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ lw(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::lea(Register rt, Address src) { ++ Register dst = rt; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index == noreg) { ++ if (is_simm16(disp)) { ++ daddiu(dst, base, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(dst, base, AT); ++ } ++ } else { ++ if (scale == 0) { ++ if (is_simm16(disp)) { ++ daddu(AT, base, index); ++ daddiu(dst, AT, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, base, AT); ++ daddu(dst, AT, index); ++ } ++ } else { ++ if (is_simm16(disp)) { ++ dsll(AT, index, scale); ++ daddu(AT, AT, base); ++ daddiu(dst, AT, disp); ++ } else { ++ assert_different_registers(dst, AT); ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ dsll(dst, index, scale); ++ daddu(dst, dst, AT); ++ } ++ } ++ } ++} ++ ++void Assembler::lwl(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwl(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lwr(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwr(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lwu(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwu(rt, src.base(), src.disp()); ++} ++ ++void Assembler::sb(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sb(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sc(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sc(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::scd(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ scd(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sd(Register rt, Address dst) { ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (is_simm16(disp)) { ++ if ( UseLEXT1 && is_simm(disp, 8)) { ++ if (scale == 0) { ++ gssdx(src, base, index, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ dsll(AT, index, scale); ++ gssdx(src, base, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ sd(src, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gssdx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ sd(src, AT, 0); ++ } ++ } else { ++ daddiu(SP, SP, -wordSize); ++ sd(T9, SP, 0); ++ ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(T9, split_low(disp >> 16)); ++ if (split_low(disp)) ori(T9, T9, split_low(disp)); ++ daddu(AT, AT, T9); ++ ld(T9, SP, 0); ++ daddiu(SP, SP, wordSize); ++ sd(src, AT, 0); ++ } ++ } ++ } else { ++ if (is_simm16(disp)) { ++ sd(src, base, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gssdx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ sd(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::sdl(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sdl(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sdr(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sdr(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sh(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sh(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sw(Register rt, Address dst) { ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if ( Assembler::is_simm16(disp) ) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ gsswx(src, base, index, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ dsll(AT, index, scale); ++ gsswx(src, base, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ sw(src, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gsswx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ sw(src, AT, 0); ++ } ++ } else { ++ daddiu(SP, SP, -wordSize); ++ sd(T9, SP, 0); ++ ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(T9, split_low(disp >> 16)); ++ if (split_low(disp)) ori(T9, T9, split_low(disp)); ++ daddu(AT, AT, T9); ++ ld(T9, SP, 0); ++ daddiu(SP, SP, wordSize); ++ sw(src, AT, 0); ++ } ++ } ++ } else { ++ if (Assembler::is_simm16(disp)) { ++ sw(src, base, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gsswx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ sw(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::swl(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ swl(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::swr(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ swr(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::lwc1(FloatRegister rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwc1(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ldc1(FloatRegister rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ldc1(rt, src.base(), src.disp()); ++} ++ ++void Assembler::swc1(FloatRegister rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ swc1(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sdc1(FloatRegister rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sdc1(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::j(address entry) { ++ int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2; ++ emit_long((j_op<<26) | dest); ++ has_delay_slot(); ++} ++ ++void Assembler::jal(address entry) { ++ int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2; ++ emit_long((jal_op<<26) | dest); ++ has_delay_slot(); ++} ++ ++void Assembler::emit_long(int x) { // shadows AbstractAssembler::emit_long ++ check_delay(); ++ AbstractAssembler::emit_int32(x); ++} ++ ++inline void Assembler::emit_data(int x) { emit_long(x); } ++inline void Assembler::emit_data(int x, relocInfo::relocType rtype) { ++ relocate(rtype); ++ emit_long(x); ++} ++ ++inline void Assembler::emit_data(int x, RelocationHolder const& rspec) { ++ relocate(rspec); ++ emit_long(x); ++} ++ ++inline void Assembler::check_delay() { ++#ifdef CHECK_DELAY ++ guarantee(delay_state != at_delay_slot, "must say delayed() when filling delay slot"); ++ delay_state = no_delay; ++#endif ++} +diff --git a/src/hotspot/cpu/mips/assembler_mips.hpp b/src/hotspot/cpu/mips/assembler_mips.hpp +new file mode 100644 +index 0000000000..102a7ba52f +--- /dev/null ++++ b/src/hotspot/cpu/mips/assembler_mips.hpp +@@ -0,0 +1,1789 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_HPP ++#define CPU_MIPS_VM_ASSEMBLER_MIPS_HPP ++ ++#include "asm/register.hpp" ++#include "runtime/vm_version.hpp" ++ ++class BiasedLockingCounters; ++ ++ ++// Note: A register location is represented via a Register, not ++// via an address for efficiency & simplicity reasons. ++ ++class ArrayAddress; ++ ++class Address { ++ public: ++ enum ScaleFactor { ++ no_scale = -1, ++ times_1 = 0, ++ times_2 = 1, ++ times_4 = 2, ++ times_8 = 3, ++ times_ptr = times_8 ++ }; ++ static ScaleFactor times(int size) { ++ assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size"); ++ if (size == 8) return times_8; ++ if (size == 4) return times_4; ++ if (size == 2) return times_2; ++ return times_1; ++ } ++ ++ private: ++ Register _base; ++ Register _index; ++ ScaleFactor _scale; ++ int _disp; ++ RelocationHolder _rspec; ++ ++ // Easily misused constructors make them private ++ Address(address loc, RelocationHolder spec); ++ Address(int disp, address loc, relocInfo::relocType rtype); ++ Address(int disp, address loc, RelocationHolder spec); ++ ++ public: ++ ++ // creation ++ Address() ++ : _base(noreg), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(0) { ++ } ++ ++ // No default displacement otherwise Register can be implicitly ++ // converted to 0(Register) which is quite a different animal. ++ ++ Address(Register base, int disp = 0) ++ : _base(base), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(disp) { ++ assert_different_registers(_base, AT); ++ } ++ ++ Address(Register base, Register index, ScaleFactor scale, int disp = 0) ++ : _base (base), ++ _index(index), ++ _scale(scale), ++ _disp (disp) { ++ assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); ++ assert_different_registers(_base, _index, AT); ++ } ++ ++ // The following two overloads are used in connection with the ++ // ByteSize type (see sizes.hpp). They simplify the use of ++ // ByteSize'd arguments in assembly code. Note that their equivalent ++ // for the optimized build are the member functions with int disp ++ // argument since ByteSize is mapped to an int type in that case. ++ // ++ // Note: DO NOT introduce similar overloaded functions for WordSize ++ // arguments as in the optimized mode, both ByteSize and WordSize ++ // are mapped to the same type and thus the compiler cannot make a ++ // distinction anymore (=> compiler errors). ++ ++#ifdef ASSERT ++ Address(Register base, ByteSize disp) ++ : _base(base), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(in_bytes(disp)) { ++ assert_different_registers(_base, AT); ++ } ++ ++ Address(Register base, Register index, ScaleFactor scale, ByteSize disp) ++ : _base(base), ++ _index(index), ++ _scale(scale), ++ _disp(in_bytes(disp)) { ++ assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); ++ assert_different_registers(_base, _index, AT); ++ } ++#endif // ASSERT ++ ++ // accessors ++ bool uses(Register reg) const { return _base == reg || _index == reg; } ++ Register base() const { return _base; } ++ Register index() const { return _index; } ++ ScaleFactor scale() const { return _scale; } ++ int disp() const { return _disp; } ++ ++ static Address make_array(ArrayAddress); ++ ++ friend class Assembler; ++ friend class MacroAssembler; ++ friend class LIR_Assembler; // base/index/scale/disp ++}; ++ ++// Calling convention ++class Argument { ++ private: ++ int _number; ++ public: ++ enum { ++ n_register_parameters = 8, // 8 integer registers used to pass parameters ++ n_float_register_parameters = 8 // 8 float registers used to pass parameters ++ }; ++ ++ Argument(int number):_number(number){ } ++ Argument successor() {return Argument(number() + 1);} ++ ++ int number()const {return _number;} ++ bool is_Register()const {return _number < n_register_parameters;} ++ bool is_FloatRegister()const {return _number < n_float_register_parameters;} ++ ++ Register as_Register()const { ++ assert(is_Register(), "must be a register argument"); ++ return ::as_Register(A0->encoding() + _number); ++ } ++ FloatRegister as_FloatRegister()const { ++ assert(is_FloatRegister(), "must be a float register argument"); ++ return ::as_FloatRegister(F12->encoding() + _number); ++ } ++ ++ Address as_caller_address()const {return Address(SP, (number() - n_register_parameters) * wordSize);} ++}; ++ ++// ++// AddressLiteral has been split out from Address because operands of this type ++// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out ++// the few instructions that need to deal with address literals are unique and the ++// MacroAssembler does not have to implement every instruction in the Assembler ++// in order to search for address literals that may need special handling depending ++// on the instruction and the platform. As small step on the way to merging i486/amd64 ++// directories. ++// ++class AddressLiteral { ++ friend class ArrayAddress; ++ RelocationHolder _rspec; ++ // Typically we use AddressLiterals we want to use their rval ++ // However in some situations we want the lval (effect address) of the item. ++ // We provide a special factory for making those lvals. ++ bool _is_lval; ++ ++ // If the target is far we'll need to load the ea of this to ++ // a register to reach it. Otherwise if near we can do rip ++ // relative addressing. ++ ++ address _target; ++ ++ protected: ++ // creation ++ AddressLiteral() ++ : _is_lval(false), ++ _target(NULL) ++ {} ++ ++ public: ++ ++ ++ AddressLiteral(address target, relocInfo::relocType rtype); ++ ++ AddressLiteral(address target, RelocationHolder const& rspec) ++ : _rspec(rspec), ++ _is_lval(false), ++ _target(target) ++ {} ++ ++ AddressLiteral addr() { ++ AddressLiteral ret = *this; ++ ret._is_lval = true; ++ return ret; ++ } ++ ++ ++ private: ++ ++ address target() { return _target; } ++ bool is_lval() { return _is_lval; } ++ ++ relocInfo::relocType reloc() const { return _rspec.type(); } ++ const RelocationHolder& rspec() const { return _rspec; } ++ ++ friend class Assembler; ++ friend class MacroAssembler; ++ friend class Address; ++ friend class LIR_Assembler; ++ RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { ++ switch (rtype) { ++ case relocInfo::external_word_type: ++ return external_word_Relocation::spec(addr); ++ case relocInfo::internal_word_type: ++ return internal_word_Relocation::spec(addr); ++ case relocInfo::opt_virtual_call_type: ++ return opt_virtual_call_Relocation::spec(); ++ case relocInfo::static_call_type: ++ return static_call_Relocation::spec(); ++ case relocInfo::runtime_call_type: ++ return runtime_call_Relocation::spec(); ++ case relocInfo::poll_type: ++ case relocInfo::poll_return_type: ++ return Relocation::spec_simple(rtype); ++ case relocInfo::none: ++ case relocInfo::oop_type: ++ // Oops are a special case. Normally they would be their own section ++ // but in cases like icBuffer they are literals in the code stream that ++ // we don't have a section for. We use none so that we get a literal address ++ // which is always patchable. ++ return RelocationHolder(); ++ default: ++ ShouldNotReachHere(); ++ return RelocationHolder(); ++ } ++ } ++ ++}; ++ ++// Convience classes ++class RuntimeAddress: public AddressLiteral { ++ ++ public: ++ ++ RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {} ++ ++}; ++ ++class OopAddress: public AddressLiteral { ++ ++ public: ++ ++ OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){} ++ ++}; ++ ++class ExternalAddress: public AddressLiteral { ++ ++ public: ++ ++ ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){} ++ ++}; ++ ++class InternalAddress: public AddressLiteral { ++ ++ public: ++ ++ InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {} ++ ++}; ++ ++// x86 can do array addressing as a single operation since disp can be an absolute ++// address amd64 can't. We create a class that expresses the concept but does extra ++// magic on amd64 to get the final result ++ ++class ArrayAddress { ++ private: ++ ++ AddressLiteral _base; ++ Address _index; ++ ++ public: ++ ++ ArrayAddress() {}; ++ ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {}; ++ AddressLiteral base() { return _base; } ++ Address index() { return _index; } ++ ++}; ++ ++const int FPUStateSizeInWords = 512 / wordSize; ++ ++// The MIPS LOONGSON Assembler: Pure assembler doing NO optimizations on the instruction ++// level ; i.e., what you write is what you get. The Assembler is generating code into ++// a CodeBuffer. ++ ++class Assembler : public AbstractAssembler { ++ friend class AbstractAssembler; // for the non-virtual hack ++ friend class LIR_Assembler; // as_Address() ++ friend class StubGenerator; ++ ++ public: ++ enum Condition { ++ zero , ++ notZero , ++ equal , ++ notEqual , ++ less , ++ lessEqual , ++ greater , ++ greaterEqual , ++ below , ++ belowEqual , ++ above , ++ aboveEqual ++ }; ++ ++ static const int LogInstructionSize = 2; ++ static const int InstructionSize = 1 << LogInstructionSize; ++ ++ // opcode, highest 6 bits: bits[31...26] ++ enum ops { ++ special_op = 0x00, // special_ops ++ regimm_op = 0x01, // regimm_ops ++ j_op = 0x02, ++ jal_op = 0x03, ++ beq_op = 0x04, ++ bne_op = 0x05, ++ blez_op = 0x06, ++ bgtz_op = 0x07, ++ addiu_op = 0x09, ++ slti_op = 0x0a, ++ sltiu_op = 0x0b, ++ andi_op = 0x0c, ++ ori_op = 0x0d, ++ xori_op = 0x0e, ++ lui_op = 0x0f, ++ cop0_op = 0x10, // cop0_ops ++ cop1_op = 0x11, // cop1_ops ++ gs_cop2_op = 0x12, // gs_cop2_ops ++ cop1x_op = 0x13, // cop1x_ops ++ beql_op = 0x14, ++ bnel_op = 0x15, ++ blezl_op = 0x16, ++ bgtzl_op = 0x17, ++ daddiu_op = 0x19, ++ ldl_op = 0x1a, ++ ldr_op = 0x1b, ++ special2_op = 0x1c, // special2_ops ++ msa_op = 0x1e, // msa_ops ++ special3_op = 0x1f, // special3_ops ++ lb_op = 0x20, ++ lh_op = 0x21, ++ lwl_op = 0x22, ++ lw_op = 0x23, ++ lbu_op = 0x24, ++ lhu_op = 0x25, ++ lwr_op = 0x26, ++ lwu_op = 0x27, ++ sb_op = 0x28, ++ sh_op = 0x29, ++ swl_op = 0x2a, ++ sw_op = 0x2b, ++ sdl_op = 0x2c, ++ sdr_op = 0x2d, ++ swr_op = 0x2e, ++ cache_op = 0x2f, ++ ll_op = 0x30, ++ lwc1_op = 0x31, ++ gs_lwc2_op = 0x32, //gs_lwc2_ops ++ pref_op = 0x33, ++ lld_op = 0x34, ++ ldc1_op = 0x35, ++ gs_ldc2_op = 0x36, //gs_ldc2_ops ++ ld_op = 0x37, ++ sc_op = 0x38, ++ swc1_op = 0x39, ++ gs_swc2_op = 0x3a, //gs_swc2_ops ++ scd_op = 0x3c, ++ sdc1_op = 0x3d, ++ gs_sdc2_op = 0x3e, //gs_sdc2_ops ++ sd_op = 0x3f ++ }; ++ ++ static const char *ops_name[]; ++ ++ //special family, the opcode is in low 6 bits. ++ enum special_ops { ++ sll_op = 0x00, ++ movci_op = 0x01, ++ srl_op = 0x02, ++ sra_op = 0x03, ++ sllv_op = 0x04, ++ srlv_op = 0x06, ++ srav_op = 0x07, ++ jr_op = 0x08, ++ jalr_op = 0x09, ++ movz_op = 0x0a, ++ movn_op = 0x0b, ++ syscall_op = 0x0c, ++ break_op = 0x0d, ++ sync_op = 0x0f, ++ mfhi_op = 0x10, ++ mthi_op = 0x11, ++ mflo_op = 0x12, ++ mtlo_op = 0x13, ++ dsllv_op = 0x14, ++ dsrlv_op = 0x16, ++ dsrav_op = 0x17, ++ mult_op = 0x18, ++ multu_op = 0x19, ++ div_op = 0x1a, ++ divu_op = 0x1b, ++ dmult_op = 0x1c, ++ dmultu_op = 0x1d, ++ ddiv_op = 0x1e, ++ ddivu_op = 0x1f, ++ addu_op = 0x21, ++ subu_op = 0x23, ++ and_op = 0x24, ++ or_op = 0x25, ++ xor_op = 0x26, ++ nor_op = 0x27, ++ slt_op = 0x2a, ++ sltu_op = 0x2b, ++ daddu_op = 0x2d, ++ dsubu_op = 0x2f, ++ tge_op = 0x30, ++ tgeu_op = 0x31, ++ tlt_op = 0x32, ++ tltu_op = 0x33, ++ teq_op = 0x34, ++ tne_op = 0x36, ++ dsll_op = 0x38, ++ dsrl_op = 0x3a, ++ dsra_op = 0x3b, ++ dsll32_op = 0x3c, ++ dsrl32_op = 0x3e, ++ dsra32_op = 0x3f ++ }; ++ ++ static const char* special_name[]; ++ ++ //regimm family, the opcode is in rt[16...20], 5 bits ++ enum regimm_ops { ++ bltz_op = 0x00, ++ bgez_op = 0x01, ++ bltzl_op = 0x02, ++ bgezl_op = 0x03, ++ tgei_op = 0x08, ++ tgeiu_op = 0x09, ++ tlti_op = 0x0a, ++ tltiu_op = 0x0b, ++ teqi_op = 0x0c, ++ tnei_op = 0x0e, ++ bltzal_op = 0x10, ++ bgezal_op = 0x11, ++ bltzall_op = 0x12, ++ bgezall_op = 0x13, ++ bposge32_op = 0x1c, ++ bposge64_op = 0x1d, ++ synci_op = 0x1f, ++ }; ++ ++ static const char* regimm_name[]; ++ ++ //cop0 family, the ops is in bits[25...21], 5 bits ++ enum cop0_ops { ++ mfc0_op = 0x00, ++ dmfc0_op = 0x01, ++ // ++ mxgc0_op = 0x03, //MFGC0, DMFGC0, MTGC0 ++ mtc0_op = 0x04, ++ dmtc0_op = 0x05, ++ rdpgpr_op = 0x0a, ++ inter_op = 0x0b, ++ wrpgpr_op = 0x0c ++ }; ++ ++ //cop1 family, the ops is in bits[25...21], 5 bits ++ enum cop1_ops { ++ mfc1_op = 0x00, ++ dmfc1_op = 0x01, ++ cfc1_op = 0x02, ++ mfhc1_op = 0x03, ++ mtc1_op = 0x04, ++ dmtc1_op = 0x05, ++ ctc1_op = 0x06, ++ mthc1_op = 0x07, ++ bc1f_op = 0x08, ++ single_fmt = 0x10, ++ double_fmt = 0x11, ++ word_fmt = 0x14, ++ long_fmt = 0x15, ++ ps_fmt = 0x16 ++ }; ++ ++ ++ //2 bist (bits[17...16]) of bc1x instructions (cop1) ++ enum bc_ops { ++ bcf_op = 0x0, ++ bct_op = 0x1, ++ bcfl_op = 0x2, ++ bctl_op = 0x3, ++ }; ++ ++ // low 6 bits of c_x_fmt instructions (cop1) ++ enum c_conds { ++ f_cond = 0x30, ++ un_cond = 0x31, ++ eq_cond = 0x32, ++ ueq_cond = 0x33, ++ olt_cond = 0x34, ++ ult_cond = 0x35, ++ ole_cond = 0x36, ++ ule_cond = 0x37, ++ sf_cond = 0x38, ++ ngle_cond = 0x39, ++ seq_cond = 0x3a, ++ ngl_cond = 0x3b, ++ lt_cond = 0x3c, ++ nge_cond = 0x3d, ++ le_cond = 0x3e, ++ ngt_cond = 0x3f ++ }; ++ ++ // low 6 bits of cop1 instructions ++ enum float_ops { ++ fadd_op = 0x00, ++ fsub_op = 0x01, ++ fmul_op = 0x02, ++ fdiv_op = 0x03, ++ fsqrt_op = 0x04, ++ fabs_op = 0x05, ++ fmov_op = 0x06, ++ fneg_op = 0x07, ++ froundl_op = 0x08, ++ ftruncl_op = 0x09, ++ fceill_op = 0x0a, ++ ffloorl_op = 0x0b, ++ froundw_op = 0x0c, ++ ftruncw_op = 0x0d, ++ fceilw_op = 0x0e, ++ ffloorw_op = 0x0f, ++ movf_f_op = 0x11, ++ movt_f_op = 0x11, ++ movz_f_op = 0x12, ++ movn_f_op = 0x13, ++ frecip_op = 0x15, ++ frsqrt_op = 0x16, ++ fcvts_op = 0x20, ++ fcvtd_op = 0x21, ++ fcvtw_op = 0x24, ++ fcvtl_op = 0x25, ++ fcvtps_op = 0x26, ++ fcvtspl_op = 0x28, ++ fpll_op = 0x2c, ++ fplu_op = 0x2d, ++ fpul_op = 0x2e, ++ fpuu_op = 0x2f ++ }; ++ ++ static const char* cop1_name[]; ++ ++ //cop1x family, the opcode is in low 6 bits. ++ enum cop1x_ops { ++ lwxc1_op = 0x00, ++ ldxc1_op = 0x01, ++ luxc1_op = 0x05, ++ swxc1_op = 0x08, ++ sdxc1_op = 0x09, ++ suxc1_op = 0x0d, ++ prefx_op = 0x0f, ++ ++ alnv_ps_op = 0x1e, ++ madd_s_op = 0x20, ++ madd_d_op = 0x21, ++ madd_ps_op = 0x26, ++ msub_s_op = 0x28, ++ msub_d_op = 0x29, ++ msub_ps_op = 0x2e, ++ nmadd_s_op = 0x30, ++ nmadd_d_op = 0x31, ++ nmadd_ps_op = 0x36, ++ nmsub_s_op = 0x38, ++ nmsub_d_op = 0x39, ++ nmsub_ps_op = 0x3e ++ }; ++ ++ static const char* cop1x_name[]; ++ ++ //special2 family, the opcode is in low 6 bits. ++ enum special2_ops { ++ madd_op = 0x00, ++ maddu_op = 0x01, ++ mul_op = 0x02, ++ gs0x03_op = 0x03, ++ msub_op = 0x04, ++ msubu_op = 0x05, ++ gs0x06_op = 0x06, ++ gsemul2_op = 0x07, ++ gsemul3_op = 0x08, ++ gsemul4_op = 0x09, ++ gsemul5_op = 0x0a, ++ gsemul6_op = 0x0b, ++ gsemul7_op = 0x0c, ++ gsemul8_op = 0x0d, ++ gsemul9_op = 0x0e, ++ gsemul10_op = 0x0f, ++ gsmult_op = 0x10, ++ gsdmult_op = 0x11, ++ gsmultu_op = 0x12, ++ gsdmultu_op = 0x13, ++ gsdiv_op = 0x14, ++ gsddiv_op = 0x15, ++ gsdivu_op = 0x16, ++ gsddivu_op = 0x17, ++ gsmod_op = 0x1c, ++ gsdmod_op = 0x1d, ++ gsmodu_op = 0x1e, ++ gsdmodu_op = 0x1f, ++ clz_op = 0x20, ++ clo_op = 0x21, ++ xctx_op = 0x22, //ctz, cto, dctz, dcto, gsX ++ gsrxr_x_op = 0x23, //gsX ++ dclz_op = 0x24, ++ dclo_op = 0x25, ++ gsle_op = 0x26, ++ gsgt_op = 0x27, ++ gs86j_op = 0x28, ++ gsloop_op = 0x29, ++ gsaj_op = 0x2a, ++ gsldpc_op = 0x2b, ++ gs86set_op = 0x30, ++ gstm_op = 0x31, ++ gscvt_ld_op = 0x32, ++ gscvt_ud_op = 0x33, ++ gseflag_op = 0x34, ++ gscam_op = 0x35, ++ gstop_op = 0x36, ++ gssettag_op = 0x37, ++ gssdbbp_op = 0x38 ++ }; ++ ++ static const char* special2_name[]; ++ ++ // special3 family, the opcode is in low 6 bits. ++ enum special3_ops { ++ ext_op = 0x00, ++ dextm_op = 0x01, ++ dextu_op = 0x02, ++ dext_op = 0x03, ++ ins_op = 0x04, ++ dinsm_op = 0x05, ++ dinsu_op = 0x06, ++ dins_op = 0x07, ++ lxx_op = 0x0a, //lwx, lhx, lbux, ldx ++ insv_op = 0x0c, ++ dinsv_op = 0x0d, ++ ar1_op = 0x10, //MIPS DSP ++ cmp1_op = 0x11, //MIPS DSP ++ re1_op = 0x12, //MIPS DSP, re1_ops ++ sh1_op = 0x13, //MIPS DSP ++ ar2_op = 0x14, //MIPS DSP ++ cmp2_op = 0x15, //MIPS DSP ++ re2_op = 0x16, //MIPS DSP, re2_ops ++ sh2_op = 0x17, //MIPS DSP ++ ar3_op = 0x18, //MIPS DSP ++ bshfl_op = 0x20 //seb, seh ++ }; ++ ++ // re1_ops ++ enum re1_ops { ++ absq_s_qb_op = 0x01, ++ repl_qb_op = 0x02, ++ replv_qb_op = 0x03, ++ absq_s_ph_op = 0x09, ++ repl_ph_op = 0x0a, ++ replv_ph_op = 0x0b, ++ absq_s_w_op = 0x11, ++ bitrev_op = 0x1b ++ }; ++ ++ // re2_ops ++ enum re2_ops { ++ repl_ob_op = 0x02, ++ replv_ob_op = 0x03, ++ absq_s_qh_op = 0x09, ++ repl_qh_op = 0x0a, ++ replv_qh_op = 0x0b, ++ absq_s_pw_op = 0x11, ++ repl_pw_op = 0x12, ++ replv_pw_op = 0x13 ++ }; ++ ++ static const char* special3_name[]; ++ ++ // lwc2/gs_lwc2 family, the opcode is in low 6 bits. ++ enum gs_lwc2_ops { ++ gslble_op = 0x10, ++ gslbgt_op = 0x11, ++ gslhle_op = 0x12, ++ gslhgt_op = 0x13, ++ gslwle_op = 0x14, ++ gslwgt_op = 0x15, ++ gsldle_op = 0x16, ++ gsldgt_op = 0x17, ++ gslwlec1_op = 0x1c, ++ gslwgtc1_op = 0x1d, ++ gsldlec1_op = 0x1e, ++ gsldgtc1_op = 0x1f, ++ gslq_op = 0x20 ++ }; ++ ++ static const char* gs_lwc2_name[]; ++ ++ // ldc2/gs_ldc2 family, the opcode is in low 3 bits. ++ enum gs_ldc2_ops { ++ gslbx_op = 0x0, ++ gslhx_op = 0x1, ++ gslwx_op = 0x2, ++ gsldx_op = 0x3, ++ gslwxc1_op = 0x6, ++ gsldxc1_op = 0x7 ++ }; ++ ++ static const char* gs_ldc2_name[]; ++ ++ // swc2/gs_swc2 family, the opcode is in low 6 bits. ++ enum gs_swc2_ops { ++ gssble_op = 0x10, ++ gssbgt_op = 0x11, ++ gsshle_op = 0x12, ++ gsshgt_op = 0x13, ++ gsswle_op = 0x14, ++ gsswgt_op = 0x15, ++ gssdle_op = 0x16, ++ gssdgt_op = 0x17, ++ gsswlec1_op = 0x1c, ++ gsswgtc1_op = 0x1d, ++ gssdlec1_op = 0x1e, ++ gssdgtc1_op = 0x1f, ++ gssq_op = 0x20 ++ }; ++ ++ static const char* gs_swc2_name[]; ++ ++ // sdc2/gs_sdc2 family, the opcode is in low 3 bits. ++ enum gs_sdc2_ops { ++ gssbx_op = 0x0, ++ gsshx_op = 0x1, ++ gsswx_op = 0x2, ++ gssdx_op = 0x3, ++ gsswxc1_op = 0x6, ++ gssdxc1_op = 0x7 ++ }; ++ ++ static const char* gs_sdc2_name[]; ++ ++ enum WhichOperand { ++ // input to locate_operand, and format code for relocations ++ imm_operand = 0, // embedded 32-bit|64-bit immediate operand ++ disp32_operand = 1, // embedded 32-bit displacement or address ++ call32_operand = 2, // embedded 32-bit self-relative displacement ++ narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop ++ _WhichOperand_limit = 4 ++ }; ++ ++ static int opcode(int insn) { return (insn>>26)&0x3f; } ++ static int rs(int insn) { return (insn>>21)&0x1f; } ++ static int rt(int insn) { return (insn>>16)&0x1f; } ++ static int rd(int insn) { return (insn>>11)&0x1f; } ++ static int sa(int insn) { return (insn>>6)&0x1f; } ++ static int special(int insn) { return insn&0x3f; } ++ static int imm_off(int insn) { return (short)low16(insn); } ++ ++ static int low (int x, int l) { return bitfield(x, 0, l); } ++ static int low16(int x) { return low(x, 16); } ++ static int low26(int x) { return low(x, 26); } ++ ++ protected: ++ //help methods for instruction ejection ++ ++ // I-Type (Immediate) ++ // 31 26 25 21 20 16 15 0 ++ //| opcode | rs | rt | immediat | ++ //| | | | | ++ // 6 5 5 16 ++ static int insn_ORRI(int op, int rs, int rt, int imm) { assert(is_simm16(imm), "not a signed 16-bit int"); return (op<<26) | (rs<<21) | (rt<<16) | low16(imm); } ++ ++ // R-Type (Register) ++ // 31 26 25 21 20 16 15 11 10 6 5 0 ++ //| special | rs | rt | rd | 0 | opcode | ++ //| 0 0 0 0 0 0 | | | | 0 0 0 0 0 | | ++ // 6 5 5 5 5 6 ++ static int insn_RRRO(int rs, int rt, int rd, int op) { return (rs<<21) | (rt<<16) | (rd<<11) | op; } ++ static int insn_RRSO(int rt, int rd, int sa, int op) { return (rt<<16) | (rd<<11) | (sa<<6) | op; } ++ static int insn_RRCO(int rs, int rt, int code, int op) { return (rs<<21) | (rt<<16) | (code<<6) | op; } ++ ++ static int insn_COP0(int op, int rt, int rd) { return (cop0_op<<26) | (op<<21) | (rt<<16) | (rd<<11); } ++ static int insn_COP1(int op, int rt, int fs) { return (cop1_op<<26) | (op<<21) | (rt<<16) | (fs<<11); } ++ ++ static int insn_F3RO(int fmt, int ft, int fs, int fd, int func) { ++ return (cop1_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func; ++ } ++ static int insn_F3ROX(int fmt, int ft, int fs, int fd, int func) { ++ return (cop1x_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func; ++ } ++ ++ static int high (int x, int l) { return bitfield(x, 32-l, l); } ++ static int high16(int x) { return high(x, 16); } ++ static int high6 (int x) { return high(x, 6); } ++ ++ //get the offset field of jump/branch instruction ++ int offset(address entry) { ++ assert(is_simm16((entry - pc() - 4) / 4), "change this code"); ++ if (!is_simm16((entry - pc() - 4) / 4)) { ++ tty->print_cr("!!! is_simm16: %lx", (entry - pc() - 4) / 4); ++ } ++ return (entry - pc() - 4) / 4; ++ } ++ ++ ++public: ++ using AbstractAssembler::offset; ++ ++ //sign expand with the sign bit is h ++ static int expand(int x, int h) { return -(x & (1<> 16; ++ } ++ ++ static int split_high(int x) { ++ return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff; ++ } ++ ++ static int merge(int low, int high) { ++ return expand(low, 15) + (high<<16); ++ } ++ ++ static intptr_t merge(intptr_t x0, intptr_t x16, intptr_t x32, intptr_t x48) { ++ return (x48 << 48) | (x32 << 32) | (x16 << 16) | x0; ++ } ++ ++ // Test if x is within signed immediate range for nbits. ++ static bool is_simm (int x, int nbits) { ++ assert(0 < nbits && nbits < 32, "out of bounds"); ++ const int min = -( ((int)1) << nbits-1 ); ++ const int maxplus1 = ( ((int)1) << nbits-1 ); ++ return min <= x && x < maxplus1; ++ } ++ ++ static bool is_simm(jlong x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 64, "out of bounds"); ++ const jlong min = -( ((jlong)1) << nbits-1 ); ++ const jlong maxplus1 = ( ((jlong)1) << nbits-1 ); ++ return min <= x && x < maxplus1; ++ } ++ ++ // Test if x is within unsigned immediate range for nbits ++ static bool is_uimm(int x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 32, "out of bounds"); ++ const int maxplus1 = ( ((int)1) << nbits ); ++ return 0 <= x && x < maxplus1; ++ } ++ ++ static bool is_uimm(jlong x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 64, "out of bounds"); ++ const jlong maxplus1 = ( ((jlong)1) << nbits ); ++ return 0 <= x && x < maxplus1; ++ } ++ ++ static bool is_simm16(int x) { return is_simm(x, 16); } ++ static bool is_simm16(long x) { return is_simm((jlong)x, (unsigned int)16); } ++ ++ static bool fit_in_jal(address target, address pc) { ++ intptr_t mask = 0xfffffffff0000000; ++ return ((intptr_t)(pc + 4) & mask) == ((intptr_t)target & mask); ++ } ++ ++ bool fit_int_branch(address entry) { ++ return is_simm16(offset(entry)); ++ } ++ ++protected: ++#ifdef ASSERT ++ #define CHECK_DELAY ++#endif ++#ifdef CHECK_DELAY ++ enum Delay_state { no_delay, at_delay_slot, filling_delay_slot } delay_state; ++#endif ++ ++public: ++ void assert_not_delayed() { ++#ifdef CHECK_DELAY ++ assert(delay_state == no_delay, "next instruction should not be a delay slot"); ++#endif ++ } ++ ++protected: ++ // Delay slot helpers ++ // cti is called when emitting control-transfer instruction, ++ // BEFORE doing the emitting. ++ // Only effective when assertion-checking is enabled. ++ ++ // called when emitting cti with a delay slot, AFTER emitting ++ void has_delay_slot() { ++#ifdef CHECK_DELAY ++ assert(delay_state == no_delay, "just checking"); ++ delay_state = at_delay_slot; ++#endif ++ } ++ ++public: ++ Assembler* delayed() { ++#ifdef CHECK_DELAY ++ guarantee( delay_state == at_delay_slot, "delayed instructition is not in delay slot"); ++ delay_state = filling_delay_slot; ++#endif ++ return this; ++ } ++ ++ void flush() { ++#ifdef CHECK_DELAY ++ guarantee( delay_state == no_delay, "ending code with a delay slot"); ++#endif ++ AbstractAssembler::flush(); ++ } ++ ++ void emit_long(int); // shadows AbstractAssembler::emit_long ++ void emit_data(int); ++ void emit_data(int, RelocationHolder const&); ++ void emit_data(int, relocInfo::relocType rtype); ++ void check_delay(); ++ ++ ++ // Generic instructions ++ // Does 32bit or 64bit as needed for the platform. In some sense these ++ // belong in macro assembler but there is no need for both varieties to exist ++ ++ void addu32(Register rd, Register rs, Register rt){ emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), addu_op)); } ++ void addiu32(Register rt, Register rs, int imm) { emit_long(insn_ORRI(addiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void addiu(Register rt, Register rs, int imm) { daddiu (rt, rs, imm);} ++ void addu(Register rd, Register rs, Register rt) { daddu (rd, rs, rt); } ++ ++ void andr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), and_op)); } ++ void andi(Register rt, Register rs, int imm) { emit_long(insn_ORRI(andi_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } ++ ++ void beq (Register rs, Register rt, int off) { emit_long(insn_ORRI(beq_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ void beql (Register rs, Register rt, int off) { emit_long(insn_ORRI(beql_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ void bgez (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgez_op, off)); has_delay_slot(); } ++ void bgezal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezal_op, off)); has_delay_slot(); } ++ void bgezall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezall_op, off)); has_delay_slot(); } ++ void bgezl (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezl_op, off)); has_delay_slot(); } ++ void bgtz (Register rs, int off) { emit_long(insn_ORRI(bgtz_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void bgtzl (Register rs, int off) { emit_long(insn_ORRI(bgtzl_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void blez (Register rs, int off) { emit_long(insn_ORRI(blez_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void blezl (Register rs, int off) { emit_long(insn_ORRI(blezl_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void bltz (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltz_op, off)); has_delay_slot(); } ++ void bltzal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzal_op, off)); has_delay_slot(); } ++ void bltzall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzall_op, off)); has_delay_slot(); } ++ void bltzl (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzl_op, off)); has_delay_slot(); } ++ void bne (Register rs, Register rt, int off) { emit_long(insn_ORRI(bne_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ void bnel (Register rs, Register rt, int off) { emit_long(insn_ORRI(bnel_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ // two versions of brk: ++ // the brk(code) version is according to MIPS64 Architecture For Programmers Volume II: The MIPS64 Instruction Set ++ // the brk(code1, code2) is according to disassembler of hsdis (binutils-2.27) ++ // both versions work ++ void brk (int code) { assert(is_uimm(code, 20), "code is 20 bits"); emit_long( (low(code, 20)<<6) | break_op ); } ++ void brk (int code1, int code2) { assert(is_uimm(code1, 10) && is_uimm(code2, 10), "code is 20 bits"); emit_long( (low(code1, 10)<<16) | (low(code2, 10)<<6) | break_op ); } ++ ++ void beq (Register rs, Register rt, address entry) { beq(rs, rt, offset(entry)); } ++ void beql (Register rs, Register rt, address entry) { beql(rs, rt, offset(entry));} ++ void bgez (Register rs, address entry) { bgez (rs, offset(entry)); } ++ void bgezal (Register rs, address entry) { bgezal (rs, offset(entry)); } ++ void bgezall(Register rs, address entry) { bgezall(rs, offset(entry)); } ++ void bgezl (Register rs, address entry) { bgezl (rs, offset(entry)); } ++ void bgtz (Register rs, address entry) { bgtz (rs, offset(entry)); } ++ void bgtzl (Register rs, address entry) { bgtzl (rs, offset(entry)); } ++ void blez (Register rs, address entry) { blez (rs, offset(entry)); } ++ void blezl (Register rs, address entry) { blezl (rs, offset(entry)); } ++ void bltz (Register rs, address entry) { bltz (rs, offset(entry)); } ++ void bltzal (Register rs, address entry) { bltzal (rs, offset(entry)); } ++ void bltzall(Register rs, address entry) { bltzall(rs, offset(entry)); } ++ void bltzl (Register rs, address entry) { bltzl (rs, offset(entry)); } ++ void bne (Register rs, Register rt, address entry) { bne(rs, rt, offset(entry)); } ++ void bnel (Register rs, Register rt, address entry) { bnel(rs, rt, offset(entry)); } ++ ++ void beq (Register rs, Register rt, Label& L) { beq(rs, rt, target(L)); } ++ void beql (Register rs, Register rt, Label& L) { beql(rs, rt, target(L)); } ++ void bgez (Register rs, Label& L){ bgez (rs, target(L)); } ++ void bgezal (Register rs, Label& L){ bgezal (rs, target(L)); } ++ void bgezall(Register rs, Label& L){ bgezall(rs, target(L)); } ++ void bgezl (Register rs, Label& L){ bgezl (rs, target(L)); } ++ void bgtz (Register rs, Label& L){ bgtz (rs, target(L)); } ++ void bgtzl (Register rs, Label& L){ bgtzl (rs, target(L)); } ++ void blez (Register rs, Label& L){ blez (rs, target(L)); } ++ void blezl (Register rs, Label& L){ blezl (rs, target(L)); } ++ void bltz (Register rs, Label& L){ bltz (rs, target(L)); } ++ void bltzal (Register rs, Label& L){ bltzal (rs, target(L)); } ++ void bltzall(Register rs, Label& L){ bltzall(rs, target(L)); } ++ void bltzl (Register rs, Label& L){ bltzl (rs, target(L)); } ++ void bne (Register rs, Register rt, Label& L){ bne(rs, rt, target(L)); } ++ void bnel (Register rs, Register rt, Label& L){ bnel(rs, rt, target(L)); } ++ ++ void daddiu(Register rt, Register rs, int imm) { emit_long(insn_ORRI(daddiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void daddu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), daddu_op)); } ++ void ddiv (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddiv_op)); } ++ void ddivu (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddivu_op)); } ++ ++ void movz (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), movz_op)); } ++ void movn (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), movn_op)); } ++ ++ void movt (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | (1 << 16) | ((int)rd->encoding() << 11) | movci_op); } ++ void movf (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | ((int)rd->encoding() << 11) | movci_op); } ++ ++ enum bshfl_ops { ++ seb_op = 0x10, ++ seh_op = 0x18 ++ }; ++ void seb (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seb_op << 6) | bshfl_op); } ++ void seh (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seh_op << 6) | bshfl_op); } ++ ++ void ext (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); ++ guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]"); ++ ++ int lsb = pos; ++ int msbd = size - 1; ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | ext_op); ++ } ++ ++ void dext (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); ++ guarantee((0 < pos + size) && (pos + size <= 63), "pos + size must be in (0, 63]"); ++ ++ int lsb = pos; ++ int msbd = size - 1; ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dext_op); ++ } ++ ++ void dextm (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((32 < size) && (size <= 64), "size must be in (32, 64]"); ++ guarantee((32 < pos + size) && (pos + size <= 64), "pos + size must be in (32, 64]"); ++ ++ int lsb = pos; ++ int msbd = size - 1 - 32; ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dextm_op); ++ } ++ ++ void rotr (Register rd, Register rt, int sa) { ++ emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | srl_op); ++ } ++ ++ void drotr (Register rd, Register rt, int sa) { ++ emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl_op); ++ } ++ ++ void drotr32 (Register rd, Register rt, int sa) { ++ emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl32_op); ++ } ++ ++ void rotrv (Register rd, Register rt, Register rs) { ++ emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | srlv_op); ++ } ++ ++ void drotrv (Register rd, Register rt, Register rs) { ++ emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | dsrlv_op); ++ } ++ ++ void div (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, div_op)); } ++ void divu (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, divu_op)); } ++ void dmult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmult_op)); } ++ void dmultu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmultu_op)); } ++ void dsll (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll_op)); } ++ void dsllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsllv_op)); } ++ void dsll32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll32_op)); } ++ void dsra (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra_op)); } ++ void dsrav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrav_op)); } ++ void dsra32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra32_op)); } ++ void dsrl (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl_op)); } ++ void dsrlv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrlv_op)); } ++ void dsrl32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl32_op)); } ++ void dsubu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsubu_op)); } ++ ++ void b(int off) { beq(R0, R0, off); } ++ void b(address entry) { b(offset(entry)); } ++ void b(Label& L) { b(target(L)); } ++ ++ void j(address entry); ++ void jal(address entry); ++ ++ void jalr(Register rd, Register rs) { emit_long( ((int)rs->encoding()<<21) | ((int)rd->encoding()<<11) | jalr_op); has_delay_slot(); } ++ void jalr(Register rs) { jalr(RA, rs); } ++ void jalr() { jalr(RT9); } ++ ++ void jr(Register rs) { emit_long(((int)rs->encoding()<<21) | jr_op); has_delay_slot(); } ++ void jr_hb(Register rs) { emit_long(((int)rs->encoding()<<21) | (1 << 10) | jr_op); has_delay_slot(); } ++ ++ void lb (Register rt, Register base, int off) { emit_long(insn_ORRI(lb_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lbu(Register rt, Register base, int off) { emit_long(insn_ORRI(lbu_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ld (Register rt, Register base, int off) { emit_long(insn_ORRI(ld_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ldl(Register rt, Register base, int off) { emit_long(insn_ORRI(ldl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ldr(Register rt, Register base, int off) { emit_long(insn_ORRI(ldr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lh (Register rt, Register base, int off) { emit_long(insn_ORRI(lh_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lhu(Register rt, Register base, int off) { emit_long(insn_ORRI(lhu_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ll (Register rt, Register base, int off) { emit_long(insn_ORRI(ll_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lld(Register rt, Register base, int off) { emit_long(insn_ORRI(lld_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lui(Register rt, int imm) { emit_long(insn_ORRI(lui_op, 0, (int)rt->encoding(), simm16(imm))); } ++ void lw (Register rt, Register base, int off) { emit_long(insn_ORRI(lw_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lwl(Register rt, Register base, int off) { emit_long(insn_ORRI(lwl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lwr(Register rt, Register base, int off) { emit_long(insn_ORRI(lwr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lwu(Register rt, Register base, int off) { emit_long(insn_ORRI(lwu_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ ++ void lb (Register rt, Address src); ++ void lbu(Register rt, Address src); ++ void ld (Register rt, Address src); ++ void ldl(Register rt, Address src); ++ void ldr(Register rt, Address src); ++ void lh (Register rt, Address src); ++ void lhu(Register rt, Address src); ++ void ll (Register rt, Address src); ++ void lld(Register rt, Address src); ++ void lw (Register rt, Address src); ++ void lwl(Register rt, Address src); ++ void lwr(Register rt, Address src); ++ void lwu(Register rt, Address src); ++ void lea(Register rt, Address src); ++ void pref(int hint, Register base, int off) { emit_long(insn_ORRI(pref_op, (int)base->encoding(), low(hint, 5), low(off, 16))); } ++ ++ void mfhi (Register rd) { emit_long( ((int)rd->encoding()<<11) | mfhi_op ); } ++ void mflo (Register rd) { emit_long( ((int)rd->encoding()<<11) | mflo_op ); } ++ void mthi (Register rs) { emit_long( ((int)rs->encoding()<<21) | mthi_op ); } ++ void mtlo (Register rs) { emit_long( ((int)rs->encoding()<<21) | mtlo_op ); } ++ ++ void mult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, mult_op)); } ++ void multu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, multu_op)); } ++ ++ void nor(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), nor_op)); } ++ ++ void orr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), or_op)); } ++ void ori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(ori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } ++ ++ void sb (Register rt, Register base, int off) { emit_long(insn_ORRI(sb_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sc (Register rt, Register base, int off) { emit_long(insn_ORRI(sc_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void scd (Register rt, Register base, int off) { emit_long(insn_ORRI(scd_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sd (Register rt, Register base, int off) { emit_long(insn_ORRI(sd_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sdl (Register rt, Register base, int off) { emit_long(insn_ORRI(sdl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sdr (Register rt, Register base, int off) { emit_long(insn_ORRI(sdr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sh (Register rt, Register base, int off) { emit_long(insn_ORRI(sh_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sll (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), sll_op)); } ++ void sllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), sllv_op)); } ++ void slt (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), slt_op)); } ++ void slti (Register rt, Register rs, int imm) { emit_long(insn_ORRI(slti_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void sltiu(Register rt, Register rs, int imm) { emit_long(insn_ORRI(sltiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void sltu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), sltu_op)); } ++ void sra (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), sra_op)); } ++ void srav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), srav_op)); } ++ void srl (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), srl_op)); } ++ void srlv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), srlv_op)); } ++ ++ void subu (Register rd, Register rs, Register rt) { dsubu (rd, rs, rt); } ++ void subu32 (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), subu_op)); } ++ void sw (Register rt, Register base, int off) { emit_long(insn_ORRI(sw_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void swl (Register rt, Register base, int off) { emit_long(insn_ORRI(swl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void swr (Register rt, Register base, int off) { emit_long(insn_ORRI(swr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void synci(Register base, int off) { emit_long(insn_ORRI(regimm_op, (int)base->encoding(), synci_op, off)); } ++ void sync () { ++ if (os::is_ActiveCoresMP()) ++ emit_long(0); ++ else ++ emit_long(sync_op); ++ } ++ void syscall(int code) { emit_long( (code<<6) | syscall_op ); } ++ ++ void sb(Register rt, Address dst); ++ void sc(Register rt, Address dst); ++ void scd(Register rt, Address dst); ++ void sd(Register rt, Address dst); ++ void sdl(Register rt, Address dst); ++ void sdr(Register rt, Address dst); ++ void sh(Register rt, Address dst); ++ void sw(Register rt, Address dst); ++ void swl(Register rt, Address dst); ++ void swr(Register rt, Address dst); ++ ++ void teq (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, teq_op)); } ++ void teqi (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), teqi_op, imm)); } ++ void tge (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tge_op)); } ++ void tgei (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgei_op, imm)); } ++ void tgeiu(Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgeiu_op, imm)); } ++ void tgeu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tgeu_op)); } ++ void tlt (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tlt_op)); } ++ void tlti (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tlti_op, imm)); } ++ void tltiu(Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tltiu_op, imm)); } ++ void tltu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tltu_op)); } ++ void tne (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tne_op)); } ++ void tnei (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tnei_op, imm)); } ++ ++ void xorr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), xor_op)); } ++ void xori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(xori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } ++ ++ void nop() { emit_long(0); } ++ ++ ++ ++ void ldc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(ldc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void lwc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(lwc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void ldc1(FloatRegister ft, Address src); ++ void lwc1(FloatRegister ft, Address src); ++ ++ //COP0 ++ void mfc0 (Register rt, Register rd) { emit_long(insn_COP0( mfc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ void dmfc0 (Register rt, FloatRegister rd) { emit_long(insn_COP0(dmfc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ // MFGC0, DMFGC0, MTGC0, DMTGC0 not implemented yet ++ void mtc0 (Register rt, Register rd) { emit_long(insn_COP0( mtc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ void dmtc0 (Register rt, FloatRegister rd) { emit_long(insn_COP0(dmtc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ //COP0 end ++ ++ ++ //COP1 ++ void mfc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1 (mfc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void dmfc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmfc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void cfc1 (Register rt, int fs) { emit_long(insn_COP1( cfc1_op, (int)rt->encoding(), fs)); } ++ void mfhc1(Register rt, int fs) { emit_long(insn_COP1(mfhc1_op, (int)rt->encoding(), fs)); } ++ void mtc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( mtc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void dmtc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmtc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void ctc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( ctc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void ctc1 (Register rt, int fs) { emit_long(insn_COP1(ctc1_op, (int)rt->encoding(), fs)); } ++ void mthc1(Register rt, int fs) { emit_long(insn_COP1(mthc1_op, (int)rt->encoding(), fs)); } ++ ++ void bc1f (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcf_op, off)); has_delay_slot(); } ++ void bc1fl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcfl_op, off)); has_delay_slot(); } ++ void bc1t (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bct_op, off)); has_delay_slot(); } ++ void bc1tl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bctl_op, off)); has_delay_slot(); } ++ ++ void bc1f (address entry) { bc1f(offset(entry)); } ++ void bc1fl(address entry) { bc1fl(offset(entry)); } ++ void bc1t (address entry) { bc1t(offset(entry)); } ++ void bc1tl(address entry) { bc1tl(offset(entry)); } ++ ++ void bc1f (Label& L) { bc1f(target(L)); } ++ void bc1fl(Label& L) { bc1fl(target(L)); } ++ void bc1t (Label& L) { bc1t(target(L)); } ++ void bc1tl(Label& L) { bc1tl(target(L)); } ++ ++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags. ++#define INSN_SINGLE(r1, r2, r3, op) \ ++ { emit_long(insn_F3RO(single_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ void add_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fadd_op)} ++ void sub_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fsub_op)} ++ void mul_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fmul_op)} ++ void div_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fdiv_op)} ++ void sqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fsqrt_op)} ++ void abs_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fabs_op)} ++ void mov_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fmov_op)} ++ void neg_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fneg_op)} ++ void round_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundl_op)} ++ void trunc_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncl_op)} ++ void ceil_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceill_op)} ++ void floor_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorl_op)} ++ void round_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundw_op)} ++ void trunc_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncw_op)} ++ void ceil_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceilw_op)} ++ void floor_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorw_op)} ++ //null ++ void movf_s(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movt_s(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movz_s (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movz_f_op)} ++ void movn_s (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movn_f_op)} ++ //null ++ void recip_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frecip_op)} ++ void rsqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frsqrt_op)} ++ //null ++ void cvt_d_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtd_op)} ++ //null ++ void cvt_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtw_op)} ++ void cvt_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtl_op)} ++ void cvt_ps_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fcvtps_op)} ++ //null ++ void c_f_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, f_cond)} ++ void c_un_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, un_cond)} ++ void c_eq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, eq_cond)} ++ void c_ueq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ueq_cond)} ++ void c_olt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, olt_cond)} ++ void c_ult_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ult_cond)} ++ void c_ole_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ole_cond)} ++ void c_ule_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ule_cond)} ++ void c_sf_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, sf_cond)} ++ void c_ngle_s(FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngle_cond)} ++ void c_seq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, seq_cond)} ++ void c_ngl_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngl_cond)} ++ void c_lt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, lt_cond)} ++ void c_nge_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, nge_cond)} ++ void c_le_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, le_cond)} ++ void c_ngt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngt_cond)} ++ ++#undef INSN_SINGLE ++ ++ ++//R0->encoding() is 0; INSN_DOUBLE is enclosed by {} for ctags. ++#define INSN_DOUBLE(r1, r2, r3, op) \ ++ { emit_long(insn_F3RO(double_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ ++ void add_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fadd_op)} ++ void sub_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fsub_op)} ++ void mul_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fmul_op)} ++ void div_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fdiv_op)} ++ void sqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fsqrt_op)} ++ void abs_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fabs_op)} ++ void mov_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fmov_op)} ++ void neg_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fneg_op)} ++ void round_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundl_op)} ++ void trunc_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncl_op)} ++ void ceil_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceill_op)} ++ void floor_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorl_op)} ++ void round_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundw_op)} ++ void trunc_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncw_op)} ++ void ceil_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceilw_op)} ++ void floor_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorw_op)} ++ //null ++ void movf_d(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movt_d(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movz_d (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movz_f_op)} ++ void movn_d (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movn_f_op)} ++ //null ++ void recip_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frecip_op)} ++ void rsqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frsqrt_op)} ++ //null ++ void cvt_s_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvts_op)} ++ void cvt_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtl_op)} ++ //null ++ void cvt_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtw_op)} ++ //null ++ void c_f_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, f_cond)} ++ void c_un_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, un_cond)} ++ void c_eq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, eq_cond)} ++ void c_ueq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ueq_cond)} ++ void c_olt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, olt_cond)} ++ void c_ult_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ult_cond)} ++ void c_ole_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ole_cond)} ++ void c_ule_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ule_cond)} ++ void c_sf_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, sf_cond)} ++ void c_ngle_d(FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngle_cond)} ++ void c_seq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, seq_cond)} ++ void c_ngl_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngl_cond)} ++ void c_lt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, lt_cond)} ++ void c_nge_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, nge_cond)} ++ void c_le_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, le_cond)} ++ void c_ngt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngt_cond)} ++ ++#undef INSN_DOUBLE ++ ++ ++ //null ++ void cvt_s_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); } ++ void cvt_d_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); } ++ //null ++ void cvt_s_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); } ++ void cvt_d_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); } ++ //null ++ ++ ++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags. ++#define INSN_PS(r1, r2, r3, op) \ ++ { emit_long(insn_F3RO(ps_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ ++ void add_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fadd_op)} ++ void sub_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fsub_op)} ++ void mul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fmul_op)} ++ //null ++ void abs_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fabs_op)} ++ void mov_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fmov_op)} ++ void neg_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fneg_op)} ++ //null ++ //void movf_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movf_ps")} ++ //void movt_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movt_ps") } ++ void movz_ps (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movz_f_op)} ++ void movn_ps (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movn_f_op)} ++ //null ++ void cvt_s_pu (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvts_op)} ++ //null ++ void cvt_s_pl (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvtspl_op)} ++ //null ++ void pll_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpll_op)} ++ void plu_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fplu_op)} ++ void pul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpul_op)} ++ void puu_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpuu_op)} ++ void c_f_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, f_cond)} ++ void c_un_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, un_cond)} ++ void c_eq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, eq_cond)} ++ void c_ueq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ueq_cond)} ++ void c_olt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, olt_cond)} ++ void c_ult_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ult_cond)} ++ void c_ole_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ole_cond)} ++ void c_ule_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ule_cond)} ++ void c_sf_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, sf_cond)} ++ void c_ngle_ps(FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngle_cond)} ++ void c_seq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, seq_cond)} ++ void c_ngl_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngl_cond)} ++ void c_lt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, lt_cond)} ++ void c_nge_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, nge_cond)} ++ void c_le_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, le_cond)} ++ void c_ngt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngt_cond)} ++ //null ++#undef INSN_PS ++ //COP1 end ++ ++ ++ //COP1X ++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags. ++#define INSN_COP1X(r0, r1, r2, r3, op) \ ++ { emit_long(insn_F3ROX((int)r0->encoding(), (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ void madd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_s_op) } ++ void madd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_d_op) } ++ void madd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, madd_ps_op) } ++ void msub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_s_op) } ++ void msub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_d_op) } ++ void msub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, msub_ps_op) } ++ void nmadd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_s_op) } ++ void nmadd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_d_op) } ++ void nmadd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmadd_ps_op) } ++ void nmsub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_s_op) } ++ void nmsub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_d_op) } ++ void nmsub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmsub_ps_op) } ++#undef INSN_COP1X ++ //COP1X end ++ ++ //SPECIAL2 ++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags. ++#define INSN_S2(op) \ ++ { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | op);} ++ ++ void madd (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | madd_op); } ++ void maddu (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | maddu_op); } ++ void mul (Register rd, Register rs, Register rt) { INSN_S2(mul_op) } ++ void gsandn (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x03_op) } ++ void msub (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msub_op); } ++ void msubu (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msubu_op); } ++ void gsorn (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x06_op) } ++ ++ void gsmult (Register rd, Register rs, Register rt) { INSN_S2(gsmult_op) } ++ void gsdmult (Register rd, Register rs, Register rt) { INSN_S2(gsdmult_op) } ++ void gsmultu (Register rd, Register rs, Register rt) { INSN_S2(gsmultu_op) } ++ void gsdmultu(Register rd, Register rs, Register rt) { INSN_S2(gsdmultu_op)} ++ void gsdiv (Register rd, Register rs, Register rt) { INSN_S2(gsdiv_op) } ++ void gsddiv (Register rd, Register rs, Register rt) { INSN_S2(gsddiv_op) } ++ void gsdivu (Register rd, Register rs, Register rt) { INSN_S2(gsdivu_op) } ++ void gsddivu (Register rd, Register rs, Register rt) { INSN_S2(gsddivu_op) } ++ void gsmod (Register rd, Register rs, Register rt) { INSN_S2(gsmod_op) } ++ void gsdmod (Register rd, Register rs, Register rt) { INSN_S2(gsdmod_op) } ++ void gsmodu (Register rd, Register rs, Register rt) { INSN_S2(gsmodu_op) } ++ void gsdmodu (Register rd, Register rs, Register rt) { INSN_S2(gsdmodu_op) } ++ void clz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clz_op); } ++ void clo (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clo_op); } ++ void ctz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 0 << 6| xctx_op); } ++ void cto (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 1 << 6| xctx_op); } ++ void dctz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 2 << 6| xctx_op); } ++ void dcto(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 3 << 6| xctx_op); } ++ void dclz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclz_op); } ++ void dclo(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclo_op); } ++ ++#undef INSN_S2 ++ ++ //SPECIAL3 ++/* ++// FIXME ++#define is_0_to_32(a, b) \ ++ assert (a >= 0, " just a check"); \ ++ assert (a <= 0, " just a check"); \ ++ assert (b >= 0, " just a check"); \ ++ assert (b <= 0, " just a check"); \ ++ assert (a+b >= 0, " just a check"); \ ++ assert (a+b <= 0, " just a check"); ++ */ ++#define is_0_to_32(a, b) ++ ++ void ins (Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | ins_op); } ++ void dinsm(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos, 5) << 6) | dinsm_op); } ++ void dinsu(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos-32, 5) << 6) | dinsu_op); } ++ void dins (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); ++ guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]"); ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | dins_op); ++ } ++ ++ void repl_qb (Register rd, int const8) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16) | ((int)rd->encoding() << 11) | repl_qb_op << 6 | re1_op); } ++ void replv_qb(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qb_op << 6 | re1_op ); } ++ void repl_ph (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_ph_op << 6 | re1_op); } ++ void replv_ph(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ph_op << 6 | re1_op ); } ++ ++ void repl_ob (Register rd, int const8) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16) | ((int)rd->encoding() << 11) | repl_ob_op << 6 | re2_op); } ++ void replv_ob(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ob_op << 6 | re2_op ); } ++ void repl_qh (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_qh_op << 6 | re2_op); } ++ void replv_qh(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qh_op << 6 | re2_op ); } ++ void repl_pw (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_pw_op << 6 | re2_op); } ++ void replv_pw(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_pw_op << 6 | re2_op ); } ++ ++ void sdc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(sdc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void sdc1(FloatRegister ft, Address dst); ++ void swc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(swc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void swc1(FloatRegister ft, Address dst); ++ ++ ++ static void print_instruction(int); ++ int patched_branch(int dest_pos, int inst, int inst_pos); ++ int branch_destination(int inst, int pos); ++ ++ // Loongson extension ++ ++ // gssq/gslq/gssqc1/gslqc1: vAddr = sign_extend(offset << 4 ) + GPR[base]. Therefore, the off should be ">> 4". ++ void gslble(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslble_op); ++ } ++ ++ void gslbgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslbgt_op); ++ } ++ ++ void gslhle(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhle_op); ++ } ++ ++ void gslhgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhgt_op); ++ } ++ ++ void gslwle(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwle_op); ++ } ++ ++ void gslwgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgt_op); ++ } ++ ++ void gsldle(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldle_op); ++ } ++ ++ void gsldgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgt_op); ++ } ++ ++ void gslwlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwlec1_op); ++ } ++ ++ void gslwgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgtc1_op); ++ } ++ ++ void gsldlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldlec1_op); ++ } ++ ++ void gsldgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgtc1_op); ++ } ++ ++ void gslq(Register rq, Register rt, Register base, int off) { ++ assert(!(off & 0xF), "gslq: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gslq: off exceeds 9 bits"); ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() ); ++ } ++ ++ void gslqc1(FloatRegister rq, FloatRegister rt, Register base, int off) { ++ assert(!(off & 0xF), "gslqc1: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gslqc1: off exceeds 9 bits"); ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() ); ++ } ++ ++ void gssble(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssble_op); ++ } ++ ++ void gssbgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssbgt_op); ++ } ++ ++ void gsshle(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshle_op); ++ } ++ ++ void gsshgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshgt_op); ++ } ++ ++ void gsswle(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswle_op); ++ } ++ ++ void gsswgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgt_op); ++ } ++ ++ void gssdle(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdle_op); ++ } ++ ++ void gssdgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgt_op); ++ } ++ ++ void gsswlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswlec1_op); ++ } ++ ++ void gsswgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgtc1_op); ++ } ++ ++ void gssdlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdlec1_op); ++ } ++ ++ void gssdgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgtc1_op); ++ } ++ ++ void gssq(Register rq, Register rt, Register base, int off) { ++ assert(!(off & 0xF), "gssq: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gssq: off exceeds 9 bits"); ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() ); ++ } ++ ++ void gssqc1(FloatRegister rq, FloatRegister rt, Register base, int off) { ++ assert(!(off & 0xF), "gssqc1: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gssqc1: off exceeds 9 bits"); ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() ); ++ } ++ ++ //LDC2 & SDC2 ++#define INSN(OPS, OP) \ ++ assert(is_simm(off, 8), "NAME: off exceeds 8 bits"); \ ++ assert(UseLEXT1, "check UseLEXT1"); \ ++ emit_long( (OPS << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | \ ++ ((int)index->encoding() << 11) | (low(off, 8) << 3) | OP); ++ ++#define INSN_LDC2(NAME, op) \ ++ void NAME(Register rt, Register base, Register index, int off) { \ ++ INSN(gs_ldc2_op, op) \ ++ } ++ ++#define INSN_LDC2_F(NAME, op) \ ++ void NAME(FloatRegister rt, Register base, Register index, int off) { \ ++ INSN(gs_ldc2_op, op) \ ++ } ++ ++#define INSN_SDC2(NAME, op) \ ++ void NAME(Register rt, Register base, Register index, int off) { \ ++ INSN(gs_sdc2_op, op) \ ++ } ++ ++#define INSN_SDC2_F(NAME, op) \ ++ void NAME(FloatRegister rt, Register base, Register index, int off) { \ ++ INSN(gs_sdc2_op, op) \ ++ } ++ ++/* ++ void gslbx(Register rt, Register base, Register index, int off) { ++ assert(is_simm(off, 8), "gslbx: off exceeds 8 bits"); ++ assert(UseLEXT1, "check UseLEXT1"); ++ emit_long( (gs_ldc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ++ ((int)index->encoding() << 11) | (low(off, 8) << 3) | gslbx_op); ++ void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op);} ++ ++ INSN_LDC2(gslbx, gslbx_op) ++ INSN_LDC2(gslhx, gslhx_op) ++ INSN_LDC2(gslwx, gslwx_op) ++ INSN_LDC2(gsldx, gsldx_op) ++ INSN_LDC2_F(gslwxc1, gslwxc1_op) ++ INSN_LDC2_F(gsldxc1, gsldxc1_op) ++ ++ INSN_SDC2(gssbx, gssbx_op) ++ INSN_SDC2(gsshx, gsshx_op) ++ INSN_SDC2(gsswx, gsswx_op) ++ INSN_SDC2(gssdx, gssdx_op) ++ INSN_SDC2_F(gsswxc1, gsswxc1_op) ++ INSN_SDC2_F(gssdxc1, gssdxc1_op) ++*/ ++ void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op) } ++ void gslhx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslhx_op) } ++ void gslwx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwx_op) } ++ void gsldx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldx_op) } ++ void gslwxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwxc1_op) } ++ void gsldxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldxc1_op) } ++ ++ void gssbx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssbx_op) } ++ void gsshx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsshx_op) } ++ void gsswx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswx_op) } ++ void gssdx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdx_op) } ++ void gsswxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswxc1_op) } ++ void gssdxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdxc1_op) } ++ ++#undef INSN ++#undef INSN_LDC2 ++#undef INSN_LDC2_F ++#undef INSN_SDC2 ++#undef INSN_SDC2_F ++ ++ // cpucfg on Loongson CPUs above 3A4000 ++ void cpucfg(Register rd, Register rs) { emit_long((gs_lwc2_op << 26) | ((int)rs->encoding() << 21) | (0b01000 << 16) | ((int)rd->encoding() << 11) | ( 0b00100 << 6) | 0b011000);} ++ ++ enum Membar_mask_bits { ++ StoreStore = 1 << 3, ++ LoadStore = 1 << 2, ++ StoreLoad = 1 << 1, ++ LoadLoad = 1 << 0 ++ }; ++ ++ // Serializes memory and blows flags ++ void membar(Membar_mask_bits order_constraint) { ++ sync(); ++ } ++ ++public: ++ // Creation ++ Assembler(CodeBuffer* code) : AbstractAssembler(code) { ++#ifdef CHECK_DELAY ++ delay_state = no_delay; ++#endif ++ } ++ ++ // Decoding ++ static address locate_operand(address inst, WhichOperand which); ++ static address locate_next_instruction(address inst); ++}; ++ ++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/assembler_mips.inline.hpp b/src/hotspot/cpu/mips/assembler_mips.inline.hpp +new file mode 100644 +index 0000000000..f35a06fc4e +--- /dev/null ++++ b/src/hotspot/cpu/mips/assembler_mips.inline.hpp +@@ -0,0 +1,33 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "asm/codeBuffer.hpp" ++#include "code/codeCache.hpp" ++ ++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP +diff --git a/src/hotspot/cpu/mips/bytes_mips.hpp b/src/hotspot/cpu/mips/bytes_mips.hpp +new file mode 100644 +index 0000000000..4172db219b +--- /dev/null ++++ b/src/hotspot/cpu/mips/bytes_mips.hpp +@@ -0,0 +1,181 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_BYTES_MIPS_HPP ++#define CPU_MIPS_VM_BYTES_MIPS_HPP ++ ++#include "memory/allocation.hpp" ++ ++class Bytes: AllStatic { ++ public: ++ // Returns true if the byte ordering used by Java is different from the native byte ordering ++ // of the underlying machine. For example, this is true for Intel x86, but false for Solaris ++ // on Sparc. ++ // we use mipsel, so return true ++ static inline bool is_Java_byte_ordering_different(){ return true; } ++ ++ ++ // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering ++ // (no special code is needed since x86 CPUs can access unaligned data) ++ static inline u2 get_native_u2(address p) { ++ if ((intptr_t)p & 0x1) { ++ return ((u2)p[1] << 8) | (u2)p[0]; ++ } else { ++ return *(u2*)p; ++ } ++ } ++ ++ static inline u4 get_native_u4(address p) { ++ if ((intptr_t)p & 3) { ++ u4 res; ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips64\n" ++ " .set noreorder\n" ++ ++ " lwr %[res], 0(%[addr]) \n" ++ " lwl %[res], 3(%[addr]) \n" ++ ++ " .set pop" ++ : [res] "=&r" (res) ++ : [addr] "r" (p) ++ : "memory" ++ ); ++ return res; ++ } else { ++ return *(u4*)p; ++ } ++ } ++ ++ static inline u8 get_native_u8(address p) { ++ u8 res; ++ u8 temp = 0; ++ // u4 tp;//tmp register ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips64\n" ++ " .set noreorder\n" ++ " .set noat\n" ++ " andi $1,%[addr],0x7 \n" ++ " beqz $1,1f \n" ++ " nop \n" ++ " ldr %[temp], 0(%[addr]) \n" ++ " ldl %[temp], 7(%[addr]) \n" ++ " b 2f \n" ++ " nop \n" ++ " 1:\t ld %[temp],0(%[addr]) \n" ++ " 2:\t sd %[temp], %[res] \n" ++ ++ " .set at\n" ++ " .set pop\n" ++ : [addr]"=r"(p), [temp]"=r" (temp) ++ : "[addr]"(p), "[temp]" (temp), [res]"m" (*(volatile jint*)&res) ++ : "memory" ++ ); ++ ++ return res; ++ } ++ ++ //use mips unaligned load instructions ++ static inline void put_native_u2(address p, u2 x) { ++ if((intptr_t)p & 0x1) { ++ p[0] = (u_char)(x); ++ p[1] = (u_char)(x>>8); ++ } else { ++ *(u2*)p = x; ++ } ++ } ++ ++ static inline void put_native_u4(address p, u4 x) { ++ // refer to sparc implementation. ++ // Note that sparc is big-endian, while mips is little-endian ++ switch ( intptr_t(p) & 3 ) { ++ case 0: *(u4*)p = x; ++ break; ++ ++ case 2: ((u2*)p)[1] = x >> 16; ++ ((u2*)p)[0] = x; ++ break; ++ ++ default: ((u1*)p)[3] = x >> 24; ++ ((u1*)p)[2] = x >> 16; ++ ((u1*)p)[1] = x >> 8; ++ ((u1*)p)[0] = x; ++ break; ++ } ++ } ++ ++ static inline void put_native_u8(address p, u8 x) { ++ // refer to sparc implementation. ++ // Note that sparc is big-endian, while mips is little-endian ++ switch ( intptr_t(p) & 7 ) { ++ case 0: *(u8*)p = x; ++ break; ++ ++ case 4: ((u4*)p)[1] = x >> 32; ++ ((u4*)p)[0] = x; ++ break; ++ ++ case 2: ((u2*)p)[3] = x >> 48; ++ ((u2*)p)[2] = x >> 32; ++ ((u2*)p)[1] = x >> 16; ++ ((u2*)p)[0] = x; ++ break; ++ ++ default: ((u1*)p)[7] = x >> 56; ++ ((u1*)p)[6] = x >> 48; ++ ((u1*)p)[5] = x >> 40; ++ ((u1*)p)[4] = x >> 32; ++ ((u1*)p)[3] = x >> 24; ++ ((u1*)p)[2] = x >> 16; ++ ((u1*)p)[1] = x >> 8; ++ ((u1*)p)[0] = x; ++ } ++ } ++ ++ ++ // Efficient reading and writing of unaligned unsigned data in Java ++ // byte ordering (i.e. big-endian ordering). Byte-order reversal is ++ // needed since MIPS64EL CPUs use little-endian format. ++ static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } ++ static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } ++ static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } ++ ++ static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); } ++ static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); } ++ static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } ++ ++ ++ // Efficient swapping of byte ordering ++ static inline u2 swap_u2(u2 x); // compiler-dependent implementation ++ static inline u4 swap_u4(u4 x); // compiler-dependent implementation ++ static inline u8 swap_u8(u8 x); ++}; ++ ++ ++// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base] ++#include OS_CPU_HEADER_INLINE(bytes) ++ ++#endif // CPU_MIPS_VM_BYTES_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/c2_globals_mips.hpp b/src/hotspot/cpu/mips/c2_globals_mips.hpp +new file mode 100644 +index 0000000000..ef11827abf +--- /dev/null ++++ b/src/hotspot/cpu/mips/c2_globals_mips.hpp +@@ -0,0 +1,95 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP ++#define CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the server compiler. ++// (see c2_globals.hpp). Alpha-sorted. ++define_pd_global(bool, BackgroundCompilation, true); ++define_pd_global(bool, UseTLAB, true); ++define_pd_global(bool, ResizeTLAB, true); ++define_pd_global(bool, CICompileOSR, true); ++define_pd_global(bool, InlineIntrinsics, true); ++define_pd_global(bool, PreferInterpreterNativeStubs, false); ++define_pd_global(bool, ProfileTraps, true); ++define_pd_global(bool, UseOnStackReplacement, true); ++#ifdef CC_INTERP ++define_pd_global(bool, ProfileInterpreter, false); ++#else ++define_pd_global(bool, ProfileInterpreter, true); ++#endif // CC_INTERP ++// Disable C1 in server JIT ++define_pd_global(bool, TieredCompilation, false); ++define_pd_global(intx, CompileThreshold, 10000); ++define_pd_global(intx, BackEdgeThreshold, 100000); ++ ++define_pd_global(intx, OnStackReplacePercentage, 140); ++define_pd_global(intx, ConditionalMoveLimit, 3); ++define_pd_global(intx, FLOATPRESSURE, 6); ++define_pd_global(intx, FreqInlineSize, 325); ++define_pd_global(intx, MinJumpTableSize, 10); ++define_pd_global(intx, INTPRESSURE, 13); ++define_pd_global(intx, InteriorEntryAlignment, 16); ++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); ++define_pd_global(intx, LoopUnrollLimit, 60); ++define_pd_global(intx, LoopPercentProfileLimit, 10); ++// InitialCodeCacheSize derived from specjbb2000 run. ++define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize ++define_pd_global(intx, CodeCacheExpansionSize, 64*K); ++ ++// Ergonomics related flags ++define_pd_global(uint64_t,MaxRAM, 128ULL*G); ++define_pd_global(intx, RegisterCostAreaRatio, 16000); ++ ++// Peephole and CISC spilling both break the graph, and so makes the ++// scheduler sick. ++define_pd_global(bool, OptoPeephole, false); ++define_pd_global(bool, UseCISCSpill, false); ++define_pd_global(bool, OptoScheduling, false); ++define_pd_global(bool, OptoBundling, false); ++define_pd_global(bool, OptoRegScheduling, false); ++define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); ++define_pd_global(bool, IdealizeClearArrayNode, true); ++ ++define_pd_global(intx, ReservedCodeCacheSize, 120*M); ++define_pd_global(intx, NonProfiledCodeHeapSize, 57*M); ++define_pd_global(intx, ProfiledCodeHeapSize, 58*M); ++define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); ++define_pd_global(uintx, CodeCacheMinBlockLength, 4); ++define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++ ++define_pd_global(bool, TrapBasedRangeChecks, false); ++ ++// Heap related flags ++define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); ++ ++// Ergonomics related flags ++define_pd_global(bool, NeverActAsServerClassMachine, false); ++ ++#endif // CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/c2_init_mips.cpp b/src/hotspot/cpu/mips/c2_init_mips.cpp +new file mode 100644 +index 0000000000..e6d5815f42 +--- /dev/null ++++ b/src/hotspot/cpu/mips/c2_init_mips.cpp +@@ -0,0 +1,34 @@ ++/* ++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "opto/compile.hpp" ++#include "opto/node.hpp" ++ ++// processor dependent initialization for mips ++ ++void Compile::pd_compiler2_init() { ++ guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); ++} +diff --git a/src/hotspot/cpu/mips/codeBuffer_mips.hpp b/src/hotspot/cpu/mips/codeBuffer_mips.hpp +new file mode 100644 +index 0000000000..3cc191006d +--- /dev/null ++++ b/src/hotspot/cpu/mips/codeBuffer_mips.hpp +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_CODEBUFFER_MIPS_HPP ++#define CPU_MIPS_VM_CODEBUFFER_MIPS_HPP ++ ++private: ++ void pd_initialize() {} ++ ++public: ++ void flush_bundle(bool start_new_bundle) {} ++ ++#endif // CPU_MIPS_VM_CODEBUFFER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/compiledIC_mips.cpp b/src/hotspot/cpu/mips/compiledIC_mips.cpp +new file mode 100644 +index 0000000000..068ca4799d +--- /dev/null ++++ b/src/hotspot/cpu/mips/compiledIC_mips.cpp +@@ -0,0 +1,151 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/compiledIC.hpp" ++#include "code/icBuffer.hpp" ++#include "code/nmethod.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/safepoint.hpp" ++ ++// ---------------------------------------------------------------------------- ++ ++#define __ _masm. ++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { ++ ++ if (mark == NULL) { ++ mark = cbuf.insts_mark(); // get mark within main instrs section ++ } ++ ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a stub. ++ MacroAssembler _masm(&cbuf); ++ ++ address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size()); ++ if (base == NULL) return NULL; // CodeBuffer::expand failed ++ // static stub relocation stores the instruction address of the call ++ ++ __ relocate(static_stub_Relocation::spec(mark), 0); ++ ++ // Code stream for loading method may be changed. ++ __ synci(R0, 0); ++ ++ // Rmethod contains methodOop, it should be relocated for GC ++ // static stub relocation also tags the methodOop in the code-stream. ++ __ mov_metadata(Rmethod, NULL); ++ // This is recognized as unresolved by relocs/nativeInst/ic code ++ ++ __ relocate(relocInfo::runtime_call_type); ++ ++ cbuf.set_insts_mark(); ++ address call_pc = (address)-1; ++ __ patchable_jump(call_pc); ++ __ align(16); ++ // Update current stubs pointer and restore code_end. ++ __ end_a_stub(); ++ return base; ++} ++#undef __ ++ ++int CompiledStaticCall::to_interp_stub_size() { ++ int size = NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeCall::instruction_size; ++ return round_to(size, 16); ++} ++ ++int CompiledStaticCall::to_trampoline_stub_size() { ++ return NativeInstruction::nop_instruction_size + NativeCallTrampolineStub::instruction_size; ++} ++ ++// Relocation entries for call stub, compiled java to interpreter. ++int CompiledStaticCall::reloc_to_interp_stub() { ++ return 16; ++} ++ ++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { ++ address stub = find_stub(false /* is_aot */); ++ guarantee(stub != NULL, "stub not found"); ++ ++ if (TraceICs) { ++ ResourceMark rm; ++ tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", ++ p2i(instruction_address()), ++ callee->name_and_sig_as_C_string()); ++ } ++ ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ ++ assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(), ++ "a) MT-unsafe modification of inline cache"); ++ assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry, ++ "b) MT-unsafe modification of inline cache"); ++ ++ // Update stub. ++ method_holder->set_data((intptr_t)callee()); ++ jump->set_jump_destination(entry); ++ ++ // Update jump to call. ++ set_destination_mt_safe(stub); ++} ++ ++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { ++ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); ++ // Reset stub. ++ address stub = static_stub->addr(); ++ assert(stub != NULL, "stub not found"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ method_holder->set_data(0); ++ jump->set_jump_destination((address)-1); ++} ++ ++//----------------------------------------------------------------------------- ++// Non-product mode code ++#ifndef PRODUCT ++ ++void CompiledDirectStaticCall::verify() { ++ // Verify call. ++ _call->verify(); ++ if (os::is_MP()) { ++ _call->verify_alignment(); ++ } ++ ++ // Verify stub. ++ address stub = find_stub(false /* is_aot */); ++ assert(stub != NULL, "no stub found for static call"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ ++ ++ // Verify state. ++ assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); ++} ++ ++#endif // !PRODUCT +diff --git a/src/hotspot/cpu/mips/copy_mips.hpp b/src/hotspot/cpu/mips/copy_mips.hpp +new file mode 100644 +index 0000000000..dcc77adfec +--- /dev/null ++++ b/src/hotspot/cpu/mips/copy_mips.hpp +@@ -0,0 +1,77 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_COPY_MIPS_HPP ++#define CPU_MIPS_VM_COPY_MIPS_HPP ++ ++// Inline functions for memory copy and fill. ++ ++// Contains inline asm implementations ++#include OS_CPU_HEADER_INLINE(copy) ++ ++// Template for atomic, element-wise copy. ++template ++static void copy_conjoint_atomic(const T* from, T* to, size_t count) { ++ if (from > to) { ++ while (count-- > 0) { ++ // Copy forwards ++ *to++ = *from++; ++ } ++ } else { ++ from += count - 1; ++ to += count - 1; ++ while (count-- > 0) { ++ // Copy backwards ++ *to-- = *from--; ++ } ++ } ++} ++ ++ ++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { ++ julong* to = (julong*) tohw; ++ julong v = ((julong) value << 32) | value; ++ while (count-- > 0) { ++ *to++ = v; ++ } ++} ++ ++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { ++ pd_fill_to_words(tohw, count, value); ++} ++ ++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { ++ (void)memset(to, value, count); ++} ++ ++static void pd_zero_to_words(HeapWord* tohw, size_t count) { ++ pd_fill_to_words(tohw, count, 0); ++} ++ ++static void pd_zero_to_bytes(void* to, size_t count) { ++ (void)memset(to, 0, count); ++} ++ ++#endif //CPU_MIPS_VM_COPY_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/depChecker_mips.cpp b/src/hotspot/cpu/mips/depChecker_mips.cpp +new file mode 100644 +index 0000000000..756ccb68f9 +--- /dev/null ++++ b/src/hotspot/cpu/mips/depChecker_mips.cpp +@@ -0,0 +1,30 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "compiler/disassembler.hpp" ++#include "depChecker_mips.hpp" ++ ++// Nothing to do on mips +diff --git a/src/hotspot/cpu/mips/depChecker_mips.hpp b/src/hotspot/cpu/mips/depChecker_mips.hpp +new file mode 100644 +index 0000000000..11e52b4e8f +--- /dev/null ++++ b/src/hotspot/cpu/mips/depChecker_mips.hpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_DEPCHECKER_MIPS_HPP ++#define CPU_MIPS_VM_DEPCHECKER_MIPS_HPP ++ ++// Nothing to do on MIPS ++ ++#endif // CPU_MIPS_VM_DEPCHECKER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/disassembler_mips.hpp b/src/hotspot/cpu/mips/disassembler_mips.hpp +new file mode 100644 +index 0000000000..c5f3a8888d +--- /dev/null ++++ b/src/hotspot/cpu/mips/disassembler_mips.hpp +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP ++#define CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP ++ ++ static int pd_instruction_alignment() { ++ return sizeof(int); ++ } ++ ++ static const char* pd_cpu_opts() { ++ return "gpr-names=64"; ++ } ++ ++#endif // CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/frame_mips.cpp b/src/hotspot/cpu/mips/frame_mips.cpp +new file mode 100644 +index 0000000000..d49bd6290d +--- /dev/null ++++ b/src/hotspot/cpu/mips/frame_mips.cpp +@@ -0,0 +1,690 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/markOop.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/monitorChunk.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "vmreg_mips.inline.hpp" ++ ++#ifdef ASSERT ++void RegisterMap::check_location_valid() { ++} ++#endif ++ ++ ++// Profiling/safepoint support ++// for Profiling - acting on another frame. walks sender frames ++// if valid. ++// frame profile_find_Java_sender_frame(JavaThread *thread); ++ ++bool frame::safe_for_sender(JavaThread *thread) { ++ address sp = (address)_sp; ++ address fp = (address)_fp; ++ address unextended_sp = (address)_unextended_sp; ++ ++ // consider stack guards when trying to determine "safe" stack pointers ++ static size_t stack_guard_size = os::uses_stack_guard_pages() ? ++ JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size() : 0; ++ size_t usable_stack_size = thread->stack_size() - stack_guard_size; ++ ++ // sp must be within the usable part of the stack (not in guards) ++ bool sp_safe = (sp < thread->stack_base()) && ++ (sp >= thread->stack_base() - usable_stack_size); ++ ++ ++ if (!sp_safe) { ++ return false; ++ } ++ ++ // unextended sp must be within the stack and above or equal sp ++ bool unextended_sp_safe = (unextended_sp < thread->stack_base()) && ++ (unextended_sp >= sp); ++ ++ if (!unextended_sp_safe) { ++ return false; ++ } ++ ++ // an fp must be within the stack and above (but not equal) sp ++ // second evaluation on fp+ is added to handle situation where fp is -1 ++ bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); ++ ++ // We know sp/unextended_sp are safe only fp is questionable here ++ ++ // If the current frame is known to the code cache then we can attempt to ++ // construct the sender and do some validation of it. This goes a long way ++ // toward eliminating issues when we get in frame construction code ++ ++ if (_cb != NULL ) { ++ ++ // First check if frame is complete and tester is reliable ++ // Unfortunately we can only check frame complete for runtime stubs and nmethod ++ // other generic buffer blobs are more problematic so we just assume they are ++ // ok. adapter blobs never have a frame complete and are never ok. ++ ++ if (!_cb->is_frame_complete_at(_pc)) { ++ if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { ++ return false; ++ } ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!_cb->code_contains(_pc)) { ++ return false; ++ } ++ ++ // Entry frame checks ++ if (is_entry_frame()) { ++ // an entry frame must have a valid fp. ++ return fp_safe && is_entry_frame_valid(thread); ++ } ++ ++ intptr_t* sender_sp = NULL; ++ intptr_t* sender_unextended_sp = NULL; ++ address sender_pc = NULL; ++ intptr_t* saved_fp = NULL; ++ ++ if (is_interpreted_frame()) { ++ // fp must be safe ++ if (!fp_safe) { ++ return false; ++ } ++ ++ sender_pc = (address) this->fp()[return_addr_offset]; ++ // for interpreted frames, the value below is the sender "raw" sp, ++ // which can be different from the sender unextended sp (the sp seen ++ // by the sender) because of current frame local variables ++ sender_sp = (intptr_t*) addr_at(sender_sp_offset); ++ sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; ++ saved_fp = (intptr_t*) this->fp()[link_offset]; ++ ++ } else { ++ // must be some sort of compiled/runtime frame ++ // fp does not have to be safe (although it could be check for c1?) ++ ++ // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc ++ if (_cb->frame_size() <= 0) { ++ return false; ++ } ++ ++ sender_sp = _unextended_sp + _cb->frame_size(); ++ // Is sender_sp safe? ++ if ((address)sender_sp >= thread->stack_base()) { ++ return false; ++ } ++ sender_unextended_sp = sender_sp; ++ // On MIPS the return_address is always the word on the stack ++ sender_pc = (address) *(sender_sp-1); ++ // Note: frame::sender_sp_offset is only valid for compiled frame ++ saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset); ++ } ++ ++ ++ // If the potential sender is the interpreter then we can do some more checking ++ if (Interpreter::contains(sender_pc)) { ++ ++ // FP is always saved in a recognizable place in any code we generate. However ++ // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP ++ // is really a frame pointer. ++ ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { ++ return false; ++ } ++ ++ // construct the potential sender ++ ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ return sender.is_interpreted_frame_valid(thread); ++ ++ } ++ ++ // We must always be able to find a recognizable pc ++ CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); ++ if (sender_pc == NULL || sender_blob == NULL) { ++ return false; ++ } ++ ++ // Could be a zombie method ++ if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { ++ return false; ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!sender_blob->code_contains(sender_pc)) { ++ return false; ++ } ++ ++ // We should never be able to see an adapter if the current frame is something from code cache ++ if (sender_blob->is_adapter_blob()) { ++ return false; ++ } ++ ++ // Could be the call_stub ++ if (StubRoutines::returns_to_call_stub(sender_pc)) { ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { ++ return false; ++ } ++ ++ // construct the potential sender ++ ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ // Validate the JavaCallWrapper an entry frame must have ++ address jcw = (address)sender.entry_frame_call_wrapper(); ++ ++ bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > (address)sender.fp()); ++ ++ return jcw_safe; ++ } ++ ++ CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); ++ if (nm != NULL) { ++ if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || ++ nm->method()->is_method_handle_intrinsic()) { ++ return false; ++ } ++ } ++ ++ // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size ++ // because the return address counts against the callee's frame. ++ ++ if (sender_blob->frame_size() <= 0) { ++ assert(!sender_blob->is_compiled(), "should count return address at least"); ++ return false; ++ } ++ ++ // We should never be able to see anything here except an nmethod. If something in the ++ // code cache (current frame) is called by an entity within the code cache that entity ++ // should not be anything but the call stub (already covered), the interpreter (already covered) ++ // or an nmethod. ++ ++ if (!sender_blob->is_compiled()) { ++ return false; ++ } ++ ++ // Could put some more validation for the potential non-interpreted sender ++ // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... ++ ++ // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb ++ ++ // We've validated the potential sender that would be created ++ return true; ++ } ++ ++ // Must be native-compiled frame. Since sender will try and use fp to find ++ // linkages it must be safe ++ ++ if (!fp_safe) { ++ return false; ++ } ++ ++ // Will the pc we fetch be non-zero (which we'll find at the oldest frame) ++ ++ if ( (address) this->fp()[return_addr_offset] == NULL) return false; ++ ++ ++ // could try and do some more potential verification of native frame if we could think of some... ++ ++ return true; ++ ++} ++ ++void frame::patch_pc(Thread* thread, address pc) { ++ address* pc_addr = &(((address*) sp())[-1]); ++ if (TracePcPatching) { ++ tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", ++ p2i(pc_addr), p2i(*pc_addr), p2i(pc)); ++ } ++ // Either the return address is the original one or we are going to ++ // patch in the same address that's already there. ++ assert(_pc == *pc_addr || pc == *pc_addr, "must be"); ++ *pc_addr = pc; ++ _cb = CodeCache::find_blob(pc); ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ assert(original_pc == _pc, "expected original PC to be stored before patching"); ++ _deopt_state = is_deoptimized; ++ // leave _pc as is ++ } else { ++ _deopt_state = not_deoptimized; ++ _pc = pc; ++ } ++} ++ ++bool frame::is_interpreted_frame() const { ++ return Interpreter::contains(pc()); ++} ++ ++int frame::frame_size(RegisterMap* map) const { ++ frame sender = this->sender(map); ++ return sender.sp() - sp(); ++} ++ ++intptr_t* frame::entry_frame_argument_at(int offset) const { ++ // convert offset to index to deal with tsi ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ // Entry frame's arguments are always in relation to unextended_sp() ++ return &unextended_sp()[index]; ++} ++ ++// sender_sp ++#ifdef CC_INTERP ++intptr_t* frame::interpreter_frame_sender_sp() const { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ // QQQ why does this specialize method exist if frame::sender_sp() does same thing? ++ // seems odd and if we always know interpreted vs. non then sender_sp() is really ++ // doing too much work. ++ return get_interpreterState()->sender_sp(); ++} ++ ++// monitor elements ++ ++BasicObjectLock* frame::interpreter_frame_monitor_begin() const { ++ return get_interpreterState()->monitor_base(); ++} ++ ++BasicObjectLock* frame::interpreter_frame_monitor_end() const { ++ return (BasicObjectLock*) get_interpreterState()->stack_base(); ++} ++ ++#else // CC_INTERP ++ ++intptr_t* frame::interpreter_frame_sender_sp() const { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ return (intptr_t*) at(interpreter_frame_sender_sp_offset); ++} ++ ++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); ++} ++ ++ ++// monitor elements ++ ++BasicObjectLock* frame::interpreter_frame_monitor_begin() const { ++ return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); ++} ++ ++BasicObjectLock* frame::interpreter_frame_monitor_end() const { ++ BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); ++ // make sure the pointer points inside the frame ++ assert((intptr_t) fp() > (intptr_t) result, "result must < than frame pointer"); ++ assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer"); ++ return result; ++} ++ ++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { ++ *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; ++} ++ ++// Used by template based interpreter deoptimization ++void frame::interpreter_frame_set_last_sp(intptr_t* sp) { ++ *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp; ++} ++#endif // CC_INTERP ++ ++frame frame::sender_for_entry_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); ++ assert(!entry_frame_is_first(), "next Java fp must be non zero"); ++ assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); ++ map->clear(); ++ assert(map->include_argument_oops(), "should be set by clear"); ++ if (jfa->last_Java_pc() != NULL ) { ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); ++ return fr; ++ } ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp()); ++ return fr; ++} ++ ++frame frame::sender_for_interpreter_frame(RegisterMap* map) const { ++ // sp is the raw sp from the sender after adapter or interpreter extension ++ intptr_t* sender_sp = this->sender_sp(); ++ ++ // This is the sp before any possible extension (adapter/locals). ++ intptr_t* unextended_sp = interpreter_frame_sender_sp(); ++ ++ // The interpreter and compiler(s) always save FP in a known ++ // location on entry. We must record where that location is ++ // so this if FP was live on callout from c2 we can find ++ // the saved copy no matter what it called. ++ ++ // Since the interpreter always saves FP if we record where it is then ++ // we don't have to always save FP on entry and exit to c2 compiled ++ // code, on entry will be enough. ++#ifdef COMPILER2 ++ if (map->update_map()) { ++ update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); ++ } ++#endif /* COMPILER2 */ ++ return frame(sender_sp, unextended_sp, link(), sender_pc()); ++} ++ ++ ++//------------------------------------------------------------------------------ ++// frame::verify_deopt_original_pc ++// ++// Verifies the calculated original PC of a deoptimization PC for the ++// given unextended SP. The unextended SP might also be the saved SP ++// for MethodHandle call sites. ++#ifdef ASSERT ++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) { ++ frame fr; ++ ++ // This is ugly but it's better than to change {get,set}_original_pc ++ // to take an SP value as argument. And it's only a debugging ++ // method anyway. ++ fr._unextended_sp = unextended_sp; ++ ++ address original_pc = nm->get_original_pc(&fr); ++ assert(nm->insts_contains(original_pc), ++ "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); ++} ++#endif ++ ++ ++//------------------------------------------------------------------------------ ++// frame::adjust_unextended_sp ++void frame::adjust_unextended_sp() { ++ // On MIPS, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. ++ ++ if (_cb != NULL) { ++ CompiledMethod* sender_cm = _cb->as_compiled_method_or_null(); ++ if (sender_cm != NULL) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (sender_cm->is_deopt_entry(_pc) || ++ sender_cm->is_deopt_mh_entry(_pc)) { ++ DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp)); ++ } ++ } ++ } ++} ++ ++//------------------------------------------------------------------------------ ++// frame::update_map_with_saved_link ++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { ++ // The interpreter and compiler(s) always save fp in a known ++ // location on entry. We must record where that location is ++ // so that if fp was live on callout from c2 we can find ++ // the saved copy no matter what it called. ++ ++ // Since the interpreter always saves fp if we record where it is then ++ // we don't have to always save fp on entry and exit to c2 compiled ++ // code, on entry will be enough. ++ map->set_location(FP->as_VMReg(), (address) link_addr); ++ // this is weird "H" ought to be at a higher address however the ++ // oopMaps seems to have the "H" regs at the same address and the ++ // vanilla register. ++ // XXXX make this go away ++ if (true) { ++ map->set_location(FP->as_VMReg()->next(), (address) link_addr); ++ } ++} ++ ++//------------------------------sender_for_compiled_frame----------------------- ++frame frame::sender_for_compiled_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ ++ // frame owned by optimizing compiler ++ assert(_cb->frame_size() >= 0, "must have non-zero frame size"); ++ ++ intptr_t* sender_sp = unextended_sp() + _cb->frame_size(); ++ intptr_t* unextended_sp = sender_sp; ++ ++ // On Loongson the return_address is always the word on the stack ++ // the fp in compiler points to sender fp, but in interpreter, fp points to return address, ++ // so getting sender for compiled frame is not same as interpreter frame. ++ // we hard code here temporarily ++ // spark ++ address sender_pc = (address) *(sender_sp-1); ++ ++ intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset); ++ ++ if (map->update_map()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); ++ if (_cb->oop_maps() != NULL) { ++ OopMapSet::update_register_map(this, map); ++ } ++ ++ // Since the prolog does the save and restore of epb there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ update_map_with_saved_link(map, saved_fp_addr); ++ } ++ assert(sender_sp != sp(), "must have changed"); ++ return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc); ++} ++ ++frame frame::sender(RegisterMap* map) const { ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ map->set_include_argument_oops(false); ++ ++ if (is_entry_frame()) return sender_for_entry_frame(map); ++ if (is_interpreted_frame()) return sender_for_interpreter_frame(map); ++ assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); ++ ++ if (_cb != NULL) { ++ return sender_for_compiled_frame(map); ++ } ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return frame(sender_sp(), link(), sender_pc()); ++} ++ ++bool frame::is_interpreted_frame_valid(JavaThread* thread) const { ++// QQQ ++#ifdef CC_INTERP ++#else ++ assert(is_interpreted_frame(), "Not an interpreted frame"); ++ // These are reasonable sanity checks ++ if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (fp() + interpreter_frame_initial_sp_offset < sp()) { ++ return false; ++ } ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (fp() <= sp()) { // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ // do some validation of frame elements ++ ++ // first the method ++ ++ Method* m = *interpreter_frame_method_addr(); ++ ++ // validate the method we'd find in this potential sender ++ if (!Method::is_valid_method(m)) return false; ++ ++ // stack frames shouldn't be much larger than max_stack elements ++ ++ //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) { ++ if (fp() - sp() > 4096) { // stack frames shouldn't be large. ++ return false; ++ } ++ ++ // validate bci/bcp ++ ++ address bcp = interpreter_frame_bcp(); ++ if (m->validate_bci_from_bcp(bcp) < 0) { ++ return false; ++ } ++ ++ // validate ConstantPoolCache* ++ ++ ConstantPoolCache* cp = *interpreter_frame_cache_addr(); ++ ++ if (MetaspaceObj::is_valid(cp) == false) return false; ++ ++ // validate locals ++ ++ address locals = (address) *interpreter_frame_locals_addr(); ++ ++ if (locals > thread->stack_base() || locals < (address) fp()) return false; ++ ++ // We'd have to be pretty unlucky to be mislead at this point ++ ++#endif // CC_INTERP ++ return true; ++} ++ ++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { ++#ifdef CC_INTERP ++ // Needed for JVMTI. The result should always be in the interpreterState object ++ assert(false, "NYI"); ++ interpreterState istate = get_interpreterState(); ++#endif // CC_INTERP ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ Method* method = interpreter_frame_method(); ++ BasicType type = method->result_type(); ++ ++ intptr_t* tos_addr; ++ if (method->is_native()) { ++ // Prior to calling into the runtime to report the method_exit the possible ++ // return value is pushed to the native stack. If the result is a jfloat/jdouble ++ // then ST0 is saved. See the note in generate_native_result ++ tos_addr = (intptr_t*)sp(); ++ if (type == T_FLOAT || type == T_DOUBLE) { ++ tos_addr += 2; ++ } ++ } else { ++ tos_addr = (intptr_t*)interpreter_frame_tos_address(); ++ } ++ ++ switch (type) { ++ case T_OBJECT : ++ case T_ARRAY : { ++ oop obj; ++ if (method->is_native()) { ++#ifdef CC_INTERP ++ obj = istate->_oop_temp; ++#else ++ obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); ++#endif // CC_INTERP ++ } else { ++ oop* obj_p = (oop*)tos_addr; ++ obj = (obj_p == NULL) ? (oop)NULL : *obj_p; ++ } ++ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); ++ *oop_result = obj; ++ break; ++ } ++ case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; ++ case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; ++ case T_CHAR : value_result->c = *(jchar*)tos_addr; break; ++ case T_SHORT : value_result->s = *(jshort*)tos_addr; break; ++ case T_INT : value_result->i = *(jint*)tos_addr; break; ++ case T_LONG : value_result->j = *(jlong*)tos_addr; break; ++ case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break; ++ case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; ++ case T_VOID : /* Nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ return type; ++} ++ ++ ++intptr_t* frame::interpreter_frame_tos_at(jint offset) const { ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ return &interpreter_frame_tos_address()[index]; ++} ++ ++#ifndef PRODUCT ++ ++#define DESCRIBE_FP_OFFSET(name) \ ++ values.describe(frame_no, fp() + frame::name##_offset, #name) ++ ++void frame::describe_pd(FrameValues& values, int frame_no) { ++ if (is_interpreted_frame()) { ++ DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_method); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mirror); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mdp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_cache); ++ DESCRIBE_FP_OFFSET(interpreter_frame_locals); ++ DESCRIBE_FP_OFFSET(interpreter_frame_bcp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); ++ } ++} ++#endif ++ ++intptr_t *frame::initial_deoptimization_info() { ++ // used to reset the saved FP ++ return fp(); ++} ++ ++intptr_t* frame::real_fp() const { ++ if (_cb != NULL) { ++ // use the frame size if valid ++ int size = _cb->frame_size(); ++ if (size > 0) { ++ return unextended_sp() + size; ++ } ++ } ++ // else rely on fp() ++ assert(! is_compiled_frame(), "unknown compiled frame size"); ++ return fp(); ++} ++ ++#ifndef PRODUCT ++// This is a generic constructor which is only used by pns() in debug.cpp. ++frame::frame(void* sp, void* fp, void* pc) { ++ init((intptr_t*)sp, (intptr_t*)fp, (address)pc); ++} ++ ++void frame::pd_ps() {} ++#endif +diff --git a/src/hotspot/cpu/mips/frame_mips.hpp b/src/hotspot/cpu/mips/frame_mips.hpp +new file mode 100644 +index 0000000000..bdbfa8aaa2 +--- /dev/null ++++ b/src/hotspot/cpu/mips/frame_mips.hpp +@@ -0,0 +1,215 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_FRAME_MIPS_HPP ++#define CPU_MIPS_VM_FRAME_MIPS_HPP ++ ++#include "runtime/synchronizer.hpp" ++ ++// A frame represents a physical stack frame (an activation). Frames can be ++// C or Java frames, and the Java frames can be interpreted or compiled. ++// In contrast, vframes represent source-level activations, so that one physical frame ++// can correspond to multiple source level frames because of inlining. ++// A frame is comprised of {pc, fp, sp} ++// ------------------------------ Asm interpreter ---------------------------------------- ++// Layout of asm interpreter frame: ++// [expression stack ] * <- sp ++// [monitors ] \ ++// ... | monitor block size ++// [monitors ] / ++// [monitor block size ] ++// [byte code index/pointr] = bcx() bcx_offset ++// [pointer to locals ] = locals() locals_offset ++// [constant pool cache ] = cache() cache_offset ++// [methodData ] = mdp() mdx_offset ++// [methodOop ] = method() method_offset ++// [last sp ] = last_sp() last_sp_offset ++// [old stack pointer ] (sender_sp) sender_sp_offset ++// [old frame pointer ] <- fp = link() ++// [return pc ] ++// [oop temp ] (only for native calls) ++// [locals and parameters ] ++// <- sender sp ++// ------------------------------ Asm interpreter ---------------------------------------- ++ ++// ------------------------------ C++ interpreter ---------------------------------------- ++// ++// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run) ++// ++// <- SP (current sp) ++// [local variables ] BytecodeInterpreter::run local variables ++// ... BytecodeInterpreter::run local variables ++// [local variables ] BytecodeInterpreter::run local variables ++// [old frame pointer ] fp [ BytecodeInterpreter::run's fp ] ++// [return pc ] (return to frame manager) ++// [interpreter_state* ] (arg to BytecodeInterpreter::run) -------------- ++// [expression stack ] <- last_Java_sp | ++// [... ] * <- interpreter_state.stack | ++// [expression stack ] * <- interpreter_state.stack_base | ++// [monitors ] \ | ++// ... | monitor block size | ++// [monitors ] / <- interpreter_state.monitor_base | ++// [struct interpretState ] <-----------------------------------------| ++// [return pc ] (return to callee of frame manager [1] ++// [locals and parameters ] ++// <- sender sp ++ ++// [1] When the c++ interpreter calls a new method it returns to the frame ++// manager which allocates a new frame on the stack. In that case there ++// is no real callee of this newly allocated frame. The frame manager is ++// aware of the additional frame(s) and will pop them as nested calls ++// complete. Howevers tTo make it look good in the debugger the frame ++// manager actually installs a dummy pc pointing to RecursiveInterpreterActivation ++// with a fake interpreter_state* parameter to make it easy to debug ++// nested calls. ++ ++// Note that contrary to the layout for the assembly interpreter the ++// expression stack allocated for the C++ interpreter is full sized. ++// However this is not as bad as it seems as the interpreter frame_manager ++// will truncate the unused space on succesive method calls. ++// ++// ------------------------------ C++ interpreter ---------------------------------------- ++ ++// Layout of interpreter frame: ++// ++// [ monitor entry ] <--- sp ++// ... ++// [ monitor entry ] ++// -9 [ monitor block top ] ( the top monitor entry ) ++// -8 [ byte code pointer ] (if native, bcp = 0) ++// -7 [ constant pool cache ] ++// -6 [ methodData ] mdx_offset(not core only) ++// -5 [ mirror ] ++// -4 [ methodOop ] ++// -3 [ locals offset ] ++// -2 [ last_sp ] ++// -1 [ sender's sp ] ++// 0 [ sender's fp ] <--- fp ++// 1 [ return address ] ++// 2 [ oop temp offset ] (only for native calls) ++// 3 [ result handler offset ] (only for native calls) ++// 4 [ result type info ] (only for native calls) ++// [ local var m-1 ] ++// ... ++// [ local var 0 ] ++// [ argumnet word n-1 ] <--- ( sender's sp ) ++// ... ++// [ argument word 0 ] <--- S7 ++ ++ public: ++ enum { ++ pc_return_offset = 0, ++ // All frames ++ link_offset = 0, ++ return_addr_offset = 1, ++ // non-interpreter frames ++ sender_sp_offset = 2, ++ ++ // Interpreter frames ++ interpreter_frame_return_addr_offset = 1, ++ interpreter_frame_result_handler_offset = 3, // for native calls only ++ interpreter_frame_oop_temp_offset = 2, // for native calls only ++ ++ interpreter_frame_sender_fp_offset = 0, ++ interpreter_frame_sender_sp_offset = -1, ++ // outgoing sp before a call to an invoked method ++ interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, ++ interpreter_frame_locals_offset = interpreter_frame_last_sp_offset - 1, ++ interpreter_frame_method_offset = interpreter_frame_locals_offset - 1, ++ interpreter_frame_mirror_offset = interpreter_frame_method_offset - 1, ++ interpreter_frame_mdp_offset = interpreter_frame_mirror_offset - 1, ++ interpreter_frame_cache_offset = interpreter_frame_mdp_offset - 1, ++ interpreter_frame_bcp_offset = interpreter_frame_cache_offset - 1, ++ interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1, ++ ++ interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, ++ interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, ++ ++ // Entry frames ++ entry_frame_call_wrapper_offset = -9, ++ ++ // Native frames ++ ++ native_frame_initial_param_offset = 2 ++ ++ }; ++ ++ intptr_t ptr_at(int offset) const { ++ return *ptr_at_addr(offset); ++ } ++ ++ void ptr_at_put(int offset, intptr_t value) { ++ *ptr_at_addr(offset) = value; ++ } ++ ++ private: ++ // an additional field beyond _sp and _pc: ++ intptr_t* _fp; // frame pointer ++ // The interpreter and adapters will extend the frame of the caller. ++ // Since oopMaps are based on the sp of the caller before extension ++ // we need to know that value. However in order to compute the address ++ // of the return address we need the real "raw" sp. Since sparc already ++ // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's ++ // original sp we use that convention. ++ ++ intptr_t* _unextended_sp; ++ void adjust_unextended_sp(); ++ ++ intptr_t* ptr_at_addr(int offset) const { ++ return (intptr_t*) addr_at(offset); ++ } ++#ifdef ASSERT ++ // Used in frame::sender_for_{interpreter,compiled}_frame ++ static void verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp); ++#endif ++ ++ public: ++ // Constructors ++ ++ frame(intptr_t* sp, intptr_t* fp, address pc); ++ ++ frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc); ++ ++ frame(intptr_t* sp, intptr_t* fp); ++ ++ void init(intptr_t* sp, intptr_t* fp, address pc); ++ ++ // accessors for the instance variables ++ intptr_t* fp() const { return _fp; } ++ ++ inline address* sender_pc_addr() const; ++ ++ // expression stack tos if we are nested in a java call ++ intptr_t* interpreter_frame_last_sp() const; ++ ++ // helper to update a map with callee-saved FP ++ static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); ++ ++ // deoptimization support ++ void interpreter_frame_set_last_sp(intptr_t* sp); ++ ++ static jint interpreter_frame_expression_stack_direction() { return -1; } ++ ++#endif // CPU_MIPS_VM_FRAME_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/frame_mips.inline.hpp b/src/hotspot/cpu/mips/frame_mips.inline.hpp +new file mode 100644 +index 0000000000..c408f01d69 +--- /dev/null ++++ b/src/hotspot/cpu/mips/frame_mips.inline.hpp +@@ -0,0 +1,238 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP ++ ++#include "code/codeCache.hpp" ++#include "code/vmreg.inline.hpp" ++ ++// Inline functions for Loongson frames: ++ ++// Constructors: ++ ++inline frame::frame() { ++ _pc = NULL; ++ _sp = NULL; ++ _unextended_sp = NULL; ++ _fp = NULL; ++ _cb = NULL; ++ _deopt_state = unknown; ++} ++ ++inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) { ++ _sp = sp; ++ _unextended_sp = sp; ++ _fp = fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { ++ init(sp, fp, pc); ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) { ++ _sp = sp; ++ _unextended_sp = unextended_sp; ++ _fp = fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* fp) { ++ _sp = sp; ++ _unextended_sp = sp; ++ _fp = fp; ++ _pc = (address)(sp[-1]); ++ ++ // Here's a sticky one. This constructor can be called via AsyncGetCallTrace ++ // when last_Java_sp is non-null but the pc fetched is junk. If we are truly ++ // unlucky the junk value could be to a zombied method and we'll die on the ++ // find_blob call. This is also why we can have no asserts on the validity ++ // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler ++ // -> pd_last_frame should use a specialized version of pd_last_frame which could ++ // call a specilaized frame constructor instead of this one. ++ // Then we could use the assert below. However this assert is of somewhat dubious ++ // value. ++ // assert(_pc != NULL, "no pc?"); ++ ++ _cb = CodeCache::find_blob(_pc); ++ adjust_unextended_sp(); ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++// Accessors ++ ++inline bool frame::equal(frame other) const { ++ bool ret = sp() == other.sp() ++ && unextended_sp() == other.unextended_sp() ++ && fp() == other.fp() ++ && pc() == other.pc(); ++ assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); ++ return ret; ++} ++ ++// Return unique id for this frame. The id must have a value where we can distinguish ++// identity and younger/older relationship. NULL represents an invalid (incomparable) ++// frame. ++inline intptr_t* frame::id(void) const { return unextended_sp(); } ++ ++// Relationals on frames based ++// Return true if the frame is younger (more recent activation) than the frame represented by id ++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() < id ; } ++ ++// Return true if the frame is older (less recent activation) than the frame represented by id ++inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() > id ; } ++ ++inline intptr_t* frame::link() const { ++ return (intptr_t*) *(intptr_t **)addr_at(link_offset); ++} ++ ++inline intptr_t* frame::link_or_null() const { ++ intptr_t** ptr = (intptr_t **)addr_at(link_offset); ++ return os::is_readable_pointer(ptr) ? *ptr : NULL; ++} ++ ++inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } ++ ++// Return address: ++ ++inline address* frame::sender_pc_addr() const { return (address*) addr_at( return_addr_offset); } ++inline address frame::sender_pc() const { return *sender_pc_addr(); } ++ ++inline intptr_t* frame::sender_sp() const { return addr_at( sender_sp_offset); } ++ ++inline intptr_t** frame::interpreter_frame_locals_addr() const { ++ return (intptr_t**)addr_at(interpreter_frame_locals_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_last_sp() const { ++ return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_bcp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_bcp_offset); ++} ++ ++ ++inline intptr_t* frame::interpreter_frame_mdp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_mdp_offset); ++} ++ ++ ++ ++// Constant pool cache ++ ++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { ++ return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); ++} ++ ++// Method ++ ++inline Method** frame::interpreter_frame_method_addr() const { ++ return (Method**)addr_at(interpreter_frame_method_offset); ++} ++ ++// Mirror ++ ++inline oop* frame::interpreter_frame_mirror_addr() const { ++ return (oop*)addr_at(interpreter_frame_mirror_offset); ++} ++ ++// top of expression stack ++inline intptr_t* frame::interpreter_frame_tos_address() const { ++ intptr_t* last_sp = interpreter_frame_last_sp(); ++ if (last_sp == NULL ) { ++ return sp(); ++ } else { ++ // sp() may have been extended by an adapter ++ assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos"); ++ return last_sp; ++ } ++} ++ ++inline oop* frame::interpreter_frame_temp_oop_addr() const { ++ return (oop *)(fp() + interpreter_frame_oop_temp_offset); ++} ++ ++inline int frame::interpreter_frame_monitor_size() { ++ return BasicObjectLock::size(); ++} ++ ++ ++// expression stack ++// (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++inline intptr_t* frame::interpreter_frame_expression_stack() const { ++ intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); ++ return monitor_end-1; ++} ++ ++// Entry frames ++ ++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { ++ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); ++} ++ ++// Compiled frames ++ ++inline oop frame::saved_oop_result(RegisterMap* map) const { ++ return *((oop*) map->location(V0->as_VMReg())); ++} ++ ++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { ++ *((oop*) map->location(V0->as_VMReg())) = obj; ++} ++ ++#endif // CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP +diff --git a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp +new file mode 100644 +index 0000000000..179f7703c8 +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp +@@ -0,0 +1,364 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/g1/g1BarrierSet.hpp" ++#include "gc/g1/g1BarrierSetAssembler.hpp" ++#include "gc/g1/g1BarrierSetRuntime.hpp" ++#include "gc/g1/g1CardTable.hpp" ++#include "gc/g1/g1ThreadLocalData.hpp" ++#include "gc/g1/heapRegion.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "utilities/macros.hpp" ++ ++#define __ masm-> ++ ++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count) { ++ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; ++ ++ if (!dest_uninitialized) { ++#ifndef OPT_THREAD ++ Register thread = T9; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ++ Label filtered; ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ lw(AT, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lb(AT, in_progress); ++ } ++ ++ __ beq(AT, R0, filtered); ++ __ delayed()->nop(); ++ ++ __ pushad(); // push registers ++ if (count == A0) { ++ if (addr == A1) { ++ __ move(AT, A0); ++ __ move(A0, A1); ++ __ move(A1, AT); ++ } else { ++ __ move(A1, count); ++ __ move(A0, addr); ++ } ++ } else { ++ __ move(A0, addr); ++ __ move(A1, count); ++ } ++ if (UseCompressedOops) { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); ++ } ++ __ popad(); ++ ++ __ bind(filtered); ++ } ++} ++ ++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp) { ++ __ pushad(); // push registers (overkill) ++ if (count == A0) { ++ assert_different_registers(A1, addr); ++ __ move(A1, count); ++ __ move(A0, addr); ++ } else { ++ assert_different_registers(A0, count); ++ __ move(A0, addr); ++ __ move(A1, count); ++ } ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); ++ __ popad(); ++} ++ ++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ bool on_oop = type == T_OBJECT || type == T_ARRAY; ++ bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; ++ bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; ++ bool on_reference = on_weak || on_phantom; ++ ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ if (on_oop && on_reference) { ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // Generate the G1 pre-barrier code to log the value of ++ // the referent field in an SATB buffer. ++ g1_write_barrier_pre(masm /* masm */, ++ noreg /* obj */, ++ dst /* pre_val */, ++ thread /* thread */, ++ tmp1 /* tmp */, ++ true /* tosca_live */, ++ true /* expand_call */); ++ } ++} ++ ++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ // If expand_call is true then we expand the call_VM_leaf macro ++ // directly to skip generating the check by ++ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. ++ ++ assert(thread == TREG, "must be"); ++ ++ Label done; ++ Label runtime; ++ ++ assert(pre_val != noreg, "check this code"); ++ ++ if (obj != noreg) { ++ assert_different_registers(obj, pre_val, tmp); ++ assert(pre_val != V0, "check this code"); ++ } ++ ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ lw(AT, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lb(AT, in_progress); ++ } ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ ++ // Do we need to load the previous value? ++ if (obj != noreg) { ++ __ load_heap_oop(pre_val, Address(obj, 0)); ++ } ++ ++ // Is the previous value null? ++ __ beq(pre_val, R0, done); ++ __ delayed()->nop(); ++ ++ // Can we store original value in the thread's buffer? ++ // Is index == 0? ++ // (The index field is typed as size_t.) ++ ++ __ ld(tmp, index); ++ __ beq(tmp, R0, runtime); ++ __ delayed()->nop(); ++ ++ __ daddiu(tmp, tmp, -1 * wordSize); ++ __ sd(tmp, index); ++ __ ld(AT, buffer); ++ __ daddu(tmp, tmp, AT); ++ ++ // Record the previous value ++ __ sd(pre_val, tmp, 0); ++ __ beq(R0, R0, done); ++ __ delayed()->nop(); ++ ++ __ bind(runtime); ++ // save the live input values ++ if (tosca_live) __ push(V0); ++ ++ if (obj != noreg && obj != V0) __ push(obj); ++ ++ if (pre_val != V0) __ push(pre_val); ++ ++ // Calling the runtime using the regular call_VM_leaf mechanism generates ++ // code (generated by InterpreterMacroAssember::call_VM_leaf_base) ++ // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. ++ // ++ // If we care generating the pre-barrier without a frame (e.g. in the ++ // intrinsified Reference.get() routine) then ebp might be pointing to ++ // the caller frame and so this check will most likely fail at runtime. ++ // ++ // Expanding the call directly bypasses the generation of the check. ++ // So when we do not have have a full interpreter frame on the stack ++ // expand_call should be passed true. ++ ++ if (expand_call) { ++ assert(pre_val != A1, "smashed arg"); ++ if (thread != A1) __ move(A1, thread); ++ if (pre_val != A0) __ move(A0, pre_val); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } ++ ++ // save the live input values ++ if (pre_val != V0) ++ __ pop(pre_val); ++ ++ if (obj != noreg && obj != V0) ++ __ pop(obj); ++ ++ if (tosca_live) __ pop(V0); ++ ++ __ bind(done); ++} ++ ++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2) { ++ assert_different_registers(tmp, tmp2, AT); ++ assert(thread == TREG, "must be"); ++ ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); ++ ++ CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set()); ++ assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ ++ Label done; ++ Label runtime; ++ ++ // Does store cross heap regions? ++ __ xorr(AT, store_addr, new_val); ++ __ dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes); ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ ++ // crosses regions, storing NULL? ++ __ beq(new_val, R0, done); ++ __ delayed()->nop(); ++ ++ // storing region crossing non-NULL, is card already dirty? ++ const Register card_addr = tmp; ++ const Register cardtable = tmp2; ++ ++ __ move(card_addr, store_addr); ++ __ dsrl(card_addr, card_addr, CardTable::card_shift); ++ // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT ++ // a valid address and therefore is not properly handled by the relocation code. ++ __ set64(cardtable, (intptr_t)ct->card_table()->byte_map_base()); ++ __ daddu(card_addr, card_addr, cardtable); ++ ++ __ lb(AT, card_addr, 0); ++ __ daddiu(AT, AT, -1 * (int)G1CardTable::g1_young_card_val()); ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ ++ __ sync(); ++ __ lb(AT, card_addr, 0); ++ __ daddiu(AT, AT, -1 * (int)G1CardTable::dirty_card_val()); ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ ++ // storing a region crossing, non-NULL oop, card is clean. ++ // dirty card and log. ++ __ move(AT, (int)G1CardTable::dirty_card_val()); ++ __ sb(AT, card_addr, 0); ++ ++ __ lw(AT, queue_index); ++ __ beq(AT, R0, runtime); ++ __ delayed()->nop(); ++ __ daddiu(AT, AT, -1 * wordSize); ++ __ sw(AT, queue_index); ++ __ ld(tmp2, buffer); ++ __ ld(AT, queue_index); ++ __ daddu(tmp2, tmp2, AT); ++ __ sd(card_addr, tmp2, 0); ++ __ beq(R0, R0, done); ++ __ delayed()->nop(); ++ ++ __ bind(runtime); ++ // save the live input values ++ __ push(store_addr); ++ __ push(new_val); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, TREG); ++ __ pop(new_val); ++ __ pop(store_addr); ++ ++ __ bind(done); ++} ++ ++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool as_normal = (decorators & AS_NORMAL) != 0; ++ assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported"); ++ ++ bool needs_pre_barrier = as_normal; ++ bool needs_post_barrier = val != noreg && in_heap; ++ ++ Register tmp3 = RT3; ++ Register rthread = TREG; ++ // flatten object address if needed ++ // We do it regardless of precise because we need the registers ++ if (dst.index() == noreg && dst.disp() == 0) { ++ if (dst.base() != tmp3) { ++ __ move(tmp3, dst.base()); ++ } ++ } else { ++ __ lea(tmp3, dst); ++ } ++ ++ if (needs_pre_barrier) { ++ g1_write_barrier_pre(masm /*masm*/, ++ tmp3 /* obj */, ++ tmp2 /* pre_val */, ++ rthread /* thread */, ++ tmp1 /* tmp */, ++ val != noreg /* tosca_live */, ++ false /* expand_call */); ++ } ++ if (val == noreg) { ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg); ++ } else { ++ Register new_val = val; ++ if (needs_post_barrier) { ++ // G1 barrier needs uncompressed oop for region cross check. ++ if (UseCompressedOops) { ++ new_val = tmp2; ++ __ move(new_val, val); ++ } ++ } ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg); ++ if (needs_post_barrier) { ++ g1_write_barrier_post(masm /*masm*/, ++ tmp3 /* store_adr */, ++ new_val /* new_val */, ++ rthread /* thread */, ++ tmp1 /* tmp */, ++ tmp2 /* tmp2 */); ++ } ++ } ++} +diff --git a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp +new file mode 100644 +index 0000000000..ec5c243c3f +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp +@@ -0,0 +1,71 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP ++#define CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++class LIR_Assembler; ++class StubAssembler; ++class G1PreBarrierStub; ++class G1PostBarrierStub; ++ ++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { ++ protected: ++ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count); ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp); ++ ++ void g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); ++ ++ void g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2); ++ ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++ ++ public: ++ void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); ++ void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); ++ ++ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); ++ void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); ++ ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++}; ++ ++#endif // CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp +new file mode 100644 +index 0000000000..071debdc3a +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp +@@ -0,0 +1,194 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/jniHandles.hpp" ++#include "runtime/thread.hpp" ++ ++#define __ masm-> ++ ++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ bool is_not_null = (decorators & IS_NOT_NULL) != 0; ++ ++ switch (type) { ++ case T_OBJECT: ++ case T_ARRAY: { ++ if (in_heap) { ++ if (UseCompressedOops) { ++ __ lwu(dst, src); ++ if (is_not_null) { ++ __ decode_heap_oop_not_null(dst); ++ } else { ++ __ decode_heap_oop(dst); ++ } ++ } else ++ { ++ __ ld_ptr(dst, src); ++ } ++ } else { ++ assert(in_native, "why else?"); ++ __ ld_ptr(dst, src); ++ } ++ break; ++ } ++ case T_BOOLEAN: __ lbu (dst, src); break; ++ case T_BYTE: __ lb (dst, src); break; ++ case T_CHAR: __ lhu (dst, src); break; ++ case T_SHORT: __ lh (dst, src); break; ++ case T_INT: __ lw (dst, src); break; ++ case T_LONG: __ ld (dst, src); break; ++ case T_ADDRESS: __ ld_ptr(dst, src); break; ++ case T_FLOAT: ++ assert(dst == noreg, "only to ftos"); ++ __ lwc1(FSF, src); ++ break; ++ case T_DOUBLE: ++ assert(dst == noreg, "only to dtos"); ++ __ ldc1(FSF, src); ++ break; ++ default: Unimplemented(); ++ } ++} ++ ++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ bool is_not_null = (decorators & IS_NOT_NULL) != 0; ++ ++ switch (type) { ++ case T_OBJECT: ++ case T_ARRAY: { ++ if (in_heap) { ++ if (val == noreg) { ++ assert(!is_not_null, "inconsistent access"); ++ if (UseCompressedOops) { ++ __ sw(R0, dst); ++ } else { ++ __ sd(R0, dst); ++ } ++ } else { ++ if (UseCompressedOops) { ++ assert(!dst.uses(val), "not enough registers"); ++ if (is_not_null) { ++ __ encode_heap_oop_not_null(val); ++ } else { ++ __ encode_heap_oop(val); ++ } ++ __ sw(val, dst); ++ } else ++ { ++ __ st_ptr(val, dst); ++ } ++ } ++ } else { ++ assert(in_native, "why else?"); ++ assert(val != noreg, "not supported"); ++ __ st_ptr(val, dst); ++ } ++ break; ++ } ++ case T_BOOLEAN: ++ __ andi(val, val, 0x1); // boolean is true if LSB is 1 ++ __ sb(val, dst); ++ break; ++ case T_BYTE: ++ __ sb(val, dst); ++ break; ++ case T_SHORT: ++ __ sh(val, dst); ++ break; ++ case T_CHAR: ++ __ sh(val, dst); ++ break; ++ case T_INT: ++ __ sw(val, dst); ++ break; ++ case T_LONG: ++ __ sd(val, dst); ++ break; ++ case T_FLOAT: ++ assert(val == noreg, "only tos"); ++ __ swc1(FSF, dst); ++ break; ++ case T_DOUBLE: ++ assert(val == noreg, "only tos"); ++ __ sdc1(FSF, dst); ++ break; ++ case T_ADDRESS: ++ __ st_ptr(val, dst); ++ break; ++ default: Unimplemented(); ++ } ++} ++ ++void BarrierSetAssembler::obj_equals(MacroAssembler* masm, ++ Register obj1, Address obj2) { ++ Unimplemented(); ++} ++ ++void BarrierSetAssembler::obj_equals(MacroAssembler* masm, ++ Register obj1, Register obj2) { ++ Unimplemented(); ++} ++ ++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath) { ++ __ clear_jweak_tag(obj); ++ __ ld_ptr(obj, Address(obj, 0)); ++} ++ ++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, ++ Register thread, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Register t2, ++ Label& slow_case) { ++ Unimplemented(); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, ++ Register thread, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Label& slow_case) { ++ Unimplemented(); ++} ++ ++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, Register thread, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1) { ++ Unimplemented(); ++} +diff --git a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp +new file mode 100644 +index 0000000000..b97ecbcca5 +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp +@@ -0,0 +1,83 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP ++#define CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "memory/allocation.hpp" ++#include "oops/access.hpp" ++ ++class InterpreterMacroAssembler; ++ ++class BarrierSetAssembler: public CHeapObj { ++private: ++ void incr_allocated_bytes(MacroAssembler* masm, Register thread, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1); ++ ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch = NOREG) {} ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch = NOREG) {} ++ ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++ ++ virtual void obj_equals(MacroAssembler* masm, ++ Register obj1, Register obj2); ++ virtual void obj_equals(MacroAssembler* masm, ++ Register obj1, Address obj2); ++ ++ virtual void resolve(MacroAssembler* masm, DecoratorSet decorators, Register obj) { ++ // Default implementation does not need to do anything. ++ } ++ ++ // Support for jniFastGetField to try resolving a jobject/jweak in native ++ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath); ++ ++ virtual void tlab_allocate(MacroAssembler* masm, ++ Register thread, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, Register t2, ++ Label& slow_case); ++ virtual void eden_allocate(MacroAssembler* masm, ++ Register thread, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Label& slow_case); ++ ++ virtual void barrier_stubs_init() {} ++}; ++ ++#endif // CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp +new file mode 100644 +index 0000000000..f33165334c +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp +@@ -0,0 +1,147 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/cardTableBarrierSetAssembler.hpp" ++ ++#define __ masm-> ++ ++#define T9 RT9 ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) ++ ++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp) { ++ BarrierSet *bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ intptr_t disp = (intptr_t) ct->byte_map_base(); ++ ++ Label L_loop, L_done; ++ const Register end = count; ++ assert_different_registers(addr, end); ++ ++ __ beq(count, R0, L_done); // zero count - nothing to do ++ __ delayed()->nop(); ++ ++ if (ct->scanned_concurrently()) __ membar(Assembler::StoreStore); ++ ++ __ set64(tmp, disp); ++ ++ __ lea(end, Address(addr, count, TIMES_OOP, 0)); // end == addr+count*oop_size ++ __ daddiu(end, end, -BytesPerHeapOop); // end - 1 to make inclusive ++ __ shr(addr, CardTable::card_shift); ++ __ shr(end, CardTable::card_shift); ++ __ dsubu(end, end, addr); // end --> cards count ++ ++ __ daddu(addr, addr, tmp); ++ ++ __ BIND(L_loop); ++ if (UseLEXT1) { ++ __ gssbx(R0, addr, count, 0); ++ } else { ++ __ daddu(AT, addr, count); ++ __ sb(R0, AT, 0); ++ } ++ __ daddiu(count, count, -1); ++ __ bgez(count, L_loop); ++ __ delayed()->nop(); ++ ++ __ BIND(L_done); ++} ++ ++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) { ++ // Does a store check for the oop in register obj. The content of ++ // register obj is destroyed afterwards. ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ ++ __ shr(obj, CardTable::card_shift); ++ ++ Address card_addr; ++ ++ intptr_t byte_map_base = (intptr_t)ct->byte_map_base(); ++ Register tmp = T9; ++ assert_different_registers(tmp, obj); ++ __ li(tmp, byte_map_base); ++ __ addu(tmp, tmp, obj); ++ ++ assert(CardTable::dirty_card_val() == 0, "must be"); ++ ++ jbyte dirty = CardTable::dirty_card_val(); ++ if (UseCondCardMark) { ++ Label L_already_dirty; ++ __ membar(Assembler::StoreLoad); ++ __ lb(AT, tmp, 0); ++ __ addiu(AT, AT, -1 * dirty); ++ __ beq(AT, R0, L_already_dirty); ++ __ delayed()->nop(); ++ __ sb(R0, tmp, 0); ++ __ bind(L_already_dirty); ++ } else { ++ if (ct->scanned_concurrently()) { ++ __ membar(Assembler::StoreStore); ++ } ++ __ sb(R0, tmp, 0); ++ } ++} ++ ++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ ++ bool is_array = (decorators & IS_ARRAY) != 0; ++ bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; ++ bool precise = is_array || on_anonymous; ++ ++ bool needs_post_barrier = val != noreg && in_heap; ++ ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg); ++ if (needs_post_barrier) { ++ // flatten object address if needed ++ if (!precise || (dst.index() == noreg && dst.disp() == 0)) { ++ store_check(masm, dst.base(), dst); ++ } else { ++ __ lea(tmp1, dst); ++ store_check(masm, tmp1, dst); ++ } ++ } ++} +diff --git a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp +new file mode 100644 +index 0000000000..49c2a0ea80 +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP ++#define CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { ++protected: ++ void store_check(MacroAssembler* masm, Register obj, Address dst); ++ ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp); ++ ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++}; ++ ++#endif // CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp +new file mode 100644 +index 0000000000..765259e626 +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp +@@ -0,0 +1,53 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++#define __ masm-> ++ ++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch) { ++ if (is_oop) { ++ gen_write_ref_array_pre_barrier(masm, decorators, dst, count); ++ } ++} ++ ++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch) { ++ if (is_oop) { ++ gen_write_ref_array_post_barrier(masm, decorators, dst, count, scratch); ++ } ++} ++ ++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ if (type == T_OBJECT || type == T_ARRAY) { ++ oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ } else { ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ } ++} +diff --git a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp +new file mode 100644 +index 0000000000..5320a4c0ad +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP ++#define CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++ ++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other ++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected ++// accesses, which are overridden in the concrete BarrierSetAssembler. ++ ++class ModRefBarrierSetAssembler: public BarrierSetAssembler { ++protected: ++ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count) {} ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp) {} ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) = 0; ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch = NOREG); ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch = NOREG); ++ ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++}; ++ ++#endif // CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/globalDefinitions_mips.hpp b/src/hotspot/cpu/mips/globalDefinitions_mips.hpp +new file mode 100644 +index 0000000000..abf8141e8b +--- /dev/null ++++ b/src/hotspot/cpu/mips/globalDefinitions_mips.hpp +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP ++#define CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP ++// Size of MIPS Instructions ++const int BytesPerInstWord = 4; ++ ++const int StackAlignmentInBytes = (2*wordSize); ++ ++// Indicates whether the C calling conventions require that ++// 32-bit integer argument values are properly extended to 64 bits. ++// If set, SharedRuntime::c_calling_convention() must adapt ++// signatures accordingly. ++const bool CCallingConventionRequiresIntsAsLongs = false; ++ ++#define SUPPORTS_NATIVE_CX8 ++ ++#define SUPPORT_RESERVED_STACK_AREA ++ ++#define THREAD_LOCAL_POLL ++ ++#endif // CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/globals_mips.hpp b/src/hotspot/cpu/mips/globals_mips.hpp +new file mode 100644 +index 0000000000..3bcad005d1 +--- /dev/null ++++ b/src/hotspot/cpu/mips/globals_mips.hpp +@@ -0,0 +1,137 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_GLOBALS_MIPS_HPP ++#define CPU_MIPS_VM_GLOBALS_MIPS_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++define_pd_global(bool, ShareVtableStubs, true); ++define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this ++ ++define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks ++define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on x86. ++define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast ++ ++define_pd_global(uintx, CodeCacheSegmentSize, 64); ++define_pd_global(intx, CodeEntryAlignment, 16); ++define_pd_global(intx, OptoLoopAlignment, 16); ++define_pd_global(intx, InlineFrequencyCount, 100); ++// MIPS generates 3x instructions than X86 ++define_pd_global(intx, InlineSmallCode, 4000); ++ ++#define DEFAULT_STACK_YELLOW_PAGES (2) ++#define DEFAULT_STACK_RED_PAGES (1) ++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+4)) ++#define DEFAULT_STACK_RESERVED_PAGES (1) ++define_pd_global(uintx, TLABSize, 0); ++define_pd_global(uintx, NewSize, 1024 * K); ++define_pd_global(intx, PreInflateSpin, 10); ++ ++define_pd_global(intx, PrefetchCopyIntervalInBytes, -1); ++define_pd_global(intx, PrefetchScanIntervalInBytes, -1); ++define_pd_global(intx, PrefetchFieldsAhead, -1); ++ ++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES ++#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES ++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES ++#define MIN_STACK_RESERVED_PAGES (0) ++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); ++ ++define_pd_global(intx, StackYellowPages, 2); ++define_pd_global(intx, StackRedPages, 1); ++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); ++ ++define_pd_global(bool, RewriteBytecodes, true); ++define_pd_global(bool, RewriteFrequentPairs, true); ++define_pd_global(bool, UseMembar, true); ++// GC Ergo Flags ++define_pd_global(intx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread ++ ++define_pd_global(uintx, TypeProfileLevel, 111); ++ ++define_pd_global(bool, CompactStrings, true); ++ ++define_pd_global(bool, PreserveFramePointer, false); ++ ++define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); ++ ++define_pd_global(bool, ThreadLocalHandshakes, true); ++// Only c2 cares about this at the moment ++define_pd_global(intx, AllocatePrefetchStyle, 2); ++define_pd_global(intx, AllocatePrefetchDistance, -1); ++ ++#define ARCH_FLAGS(develop, \ ++ product, \ ++ diagnostic, \ ++ experimental, \ ++ notproduct, \ ++ range, \ ++ constraint, \ ++ writeable) \ ++ \ ++ product(bool, UseLEXT1, false, \ ++ "Use LoongISA general EXTensions 1") \ ++ \ ++ product(bool, UseLEXT2, false, \ ++ "Use LoongISA general EXTensions 2") \ ++ \ ++ product(bool, UseLEXT3, false, \ ++ "Use LoongISA general EXTensions 3") \ ++ \ ++ product(bool, UseCodeCacheAllocOpt, true, \ ++ "Allocate code cache within 32-bit memory address space") \ ++ \ ++ product(intx, UseSyncLevel, 10000, \ ++ "The sync level on Loongson CPUs" \ ++ "UseSyncLevel == 10000, 111, for all Loongson CPUs, " \ ++ "UseSyncLevel == 4000, 101, maybe for GS464V" \ ++ "UseSyncLevel == 3000, 001, maybe for GS464V" \ ++ "UseSyncLevel == 2000, 011, maybe for GS464E/GS264" \ ++ "UseSyncLevel == 1000, 110, maybe for GS464") \ ++ \ ++ develop(bool, UseBoundCheckInstruction, false, \ ++ "Use bound check instruction") \ ++ \ ++ product(intx, SetFSFOFN, 999, \ ++ "Set the FS/FO/FN bits in FCSR" \ ++ "999 means FS/FO/FN will not be changed" \ ++ "=XYZ, with X:FS, Y:FO, Z:FN, X, Y and Z in 0=off, 1=on") \ ++ \ ++ /* assembler */ \ ++ product(bool, UseCountLeadingZerosInstructionMIPS64, true, \ ++ "Use count leading zeros instruction") \ ++ \ ++ product(bool, UseCountTrailingZerosInstructionMIPS64, false, \ ++ "Use count trailing zeros instruction") \ ++ \ ++ product(bool, UseActiveCoresMP, false, \ ++ "Eliminate barriers for single active cpu") ++ ++#endif // CPU_MIPS_VM_GLOBALS_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/icBuffer_mips.cpp b/src/hotspot/cpu/mips/icBuffer_mips.cpp +new file mode 100644 +index 0000000000..6586c63965 +--- /dev/null ++++ b/src/hotspot/cpu/mips/icBuffer_mips.cpp +@@ -0,0 +1,88 @@ ++/* ++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/icBuffer.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/bytecodes.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/oop.inline.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++int InlineCacheBuffer::ic_stub_code_size() { ++ return NativeMovConstReg::instruction_size + ++ NativeGeneralJump::instruction_size + ++ 1; ++ // so that code_end can be set in CodeBuffer ++ // 64bit 15 = 6 + 8 bytes + 1 byte ++ // 32bit 7 = 2 + 4 bytes + 1 byte ++} ++ ++ ++// we use T1 as cached oop(klass) now. this is the target of virtual call, ++// when reach here, the receiver in T0 ++// refer to shareRuntime_mips.cpp,gen_i2c2i_adapters ++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { ++ ResourceMark rm; ++ CodeBuffer code(code_begin, ic_stub_code_size()); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ // note: even though the code contains an embedded oop, we do not need reloc info ++ // because ++ // (1) the oop is old (i.e., doesn't matter for scavenges) ++ // (2) these ICStubs are removed *before* a GC happens, so the roots disappear ++// assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop"); ++#define __ masm-> ++ __ patchable_set48(T1, (long)cached_value); ++ ++ __ patchable_jump(entry_point); ++ __ flush(); ++#undef __ ++} ++ ++ ++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object ++ NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); ++ return jump->jump_destination(); ++} ++ ++ ++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { ++ // creation also verifies the object ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); ++ // Verifies the jump ++ NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); ++ void* o= (void*)move->data(); ++ return o; ++} +diff --git a/src/hotspot/cpu/mips/icache_mips.cpp b/src/hotspot/cpu/mips/icache_mips.cpp +new file mode 100644 +index 0000000000..e84e37358b +--- /dev/null ++++ b/src/hotspot/cpu/mips/icache_mips.cpp +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "runtime/icache.hpp" ++ ++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) ++{ ++#define __ _masm-> ++ StubCodeMark mark(this, "ICache", "flush_icache_stub"); ++ address start = __ pc(); ++ ++ __ jr_hb(RA); ++ __ delayed()->ori(V0, A2, 0); ++ ++ *flush_icache_stub = (ICache::flush_icache_stub_t)start; ++#undef __ ++} +diff --git a/src/hotspot/cpu/mips/icache_mips.hpp b/src/hotspot/cpu/mips/icache_mips.hpp +new file mode 100644 +index 0000000000..f90dee6eef +--- /dev/null ++++ b/src/hotspot/cpu/mips/icache_mips.hpp +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_ICACHE_MIPS_HPP ++#define CPU_MIPS_VM_ICACHE_MIPS_HPP ++ ++// Interface for updating the instruction cache. Whenever the VM modifies ++// code, part of the processor instruction cache potentially has to be flushed. ++ ++class ICache : public AbstractICache { ++ public: ++ enum { ++ stub_size = 2 * BytesPerInstWord, // Size of the icache flush stub in bytes ++ line_size = 32, // flush instruction affects a dword ++ log2_line_size = 5 // log2(line_size) ++ }; ++}; ++ ++#endif // CPU_MIPS_VM_ICACHE_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/interp_masm_mips.hpp b/src/hotspot/cpu/mips/interp_masm_mips.hpp +new file mode 100644 +index 0000000000..e526e39d53 +--- /dev/null ++++ b/src/hotspot/cpu/mips/interp_masm_mips.hpp +@@ -0,0 +1,276 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP ++#define CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP ++ ++#include "asm/assembler.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "interpreter/invocationCounter.hpp" ++#include "runtime/frame.hpp" ++ ++// This file specializes the assember with interpreter-specific macros ++ ++ ++class InterpreterMacroAssembler: public MacroAssembler { ++#ifndef CC_INTERP ++ private: ++ ++ Register _locals_register; // register that contains the pointer to the locals ++ Register _bcp_register; // register that contains the bcp ++ ++ protected: ++ // Interpreter specific version of call_VM_base ++ virtual void call_VM_leaf_base(address entry_point, ++ int number_of_arguments); ++ ++ virtual void call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions); ++ ++ // base routine for all dispatches ++ void dispatch_base(TosState state, address* table, bool verifyoop = true, bool generate_poll = false); ++#endif // CC_INTERP ++ ++ public: ++ void jump_to_entry(address entry); ++ // narrow int return value ++ void narrow(Register result); ++ ++ InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {} ++ ++ void get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset); ++ void get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset); ++ ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); ++ ++ void load_earlyret_value(TosState state); ++ ++#ifdef CC_INTERP ++ void save_bcp() { /* not needed in c++ interpreter and harmless */ } ++ void restore_bcp() { /* not needed in c++ interpreter and harmless */ } ++ ++ // Helpers for runtime call arguments/results ++ void get_method(Register reg); ++ ++#else ++ ++ // Interpreter-specific registers ++ void save_bcp() { ++ sd(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize); ++ } ++ ++ void restore_bcp() { ++ ld(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize); ++ } ++ ++ void restore_locals() { ++ ld(LVP, FP, frame::interpreter_frame_locals_offset * wordSize); ++ } ++ ++ // Helpers for runtime call arguments/results ++ void get_method(Register reg) { ++ ld(reg, FP, frame::interpreter_frame_method_offset * wordSize); ++ } ++ ++ void get_const(Register reg){ ++ get_method(reg); ++ ld(reg, reg, in_bytes(Method::const_offset())); ++ } ++ ++ void get_constant_pool(Register reg) { ++ get_const(reg); ++ ld(reg, reg, in_bytes(ConstMethod::constants_offset())); ++ } ++ ++ void get_constant_pool_cache(Register reg) { ++ get_constant_pool(reg); ++ ld(reg, reg, ConstantPool::cache_offset_in_bytes()); ++ } ++ ++ void get_cpool_and_tags(Register cpool, Register tags) { ++ get_constant_pool(cpool); ++ ld(tags, cpool, ConstantPool::tags_offset_in_bytes()); ++ } ++ ++ void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); ++ void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_method_counters(Register method, Register mcs, Label& skip); ++ ++ // load cpool->resolved_references(index); ++ void load_resolved_reference_at_index(Register result, Register index, Register tmp); ++ ++ // load cpool->resolved_klass_at(index) ++ void load_resolved_klass_at_index(Register cpool, // the constant pool (corrupted on return) ++ Register index, // the constant pool index (corrupted on return) ++ Register klass); // contains the Klass on return ++ ++ void pop_ptr( Register r = FSR); ++ void pop_i( Register r = FSR); ++ void pop_l( Register r = FSR); ++ void pop_f(FloatRegister r = FSF); ++ void pop_d(FloatRegister r = FSF); ++ ++ void push_ptr( Register r = FSR); ++ void push_i( Register r = FSR); ++ void push_l( Register r = FSR); ++ void push_f(FloatRegister r = FSF); ++ void push_d(FloatRegister r = FSF); ++ ++ void pop(Register r ) { ((MacroAssembler*)this)->pop(r); } ++ ++ void push(Register r ) { ((MacroAssembler*)this)->push(r); } ++ ++ void pop(TosState state); // transition vtos -> state ++ void push(TosState state); // transition state -> vtos ++ ++ void empty_expression_stack() { ++ ld(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ // NULL last_sp until next java call ++ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ } ++ ++ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls ++ void load_ptr(int n, Register val); ++ void store_ptr(int n, Register val); ++ ++ // Generate a subtype check: branch to ok_is_subtype if sub_klass is ++ // a subtype of super_klass. ++ //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); ++ void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype ); ++ ++ // Dispatching ++ void dispatch_prolog(TosState state, int step = 0); ++ void dispatch_epilog(TosState state, int step = 0); ++ void dispatch_only(TosState state, bool generate_poll = false); ++ void dispatch_only_normal(TosState state); ++ void dispatch_only_noverify(TosState state); ++ void dispatch_next(TosState state, int step = 0, bool generate_poll = false); ++ void dispatch_via (TosState state, address* table); ++ ++ // jump to an invoked target ++ void prepare_to_jump_from_interpreted(); ++ void jump_from_interpreted(Register method, Register temp); ++ ++ ++ // Returning from interpreted functions ++ // ++ // Removes the current activation (incl. unlocking of monitors) ++ // and sets up the return address. This code is also used for ++ // exception unwindwing. In that case, we do not want to throw ++ // IllegalMonitorStateExceptions, since that might get us into an ++ // infinite rethrow exception loop. ++ // Additionally this code is used for popFrame and earlyReturn. ++ // In popFrame case we want to skip throwing an exception, ++ // installing an exception, and notifying jvmdi. ++ // In earlyReturn case we only want to skip throwing an exception ++ // and installing an exception. ++ void remove_activation(TosState state, Register ret_addr, ++ bool throw_monitor_exception = true, ++ bool install_monitor_exception = true, ++ bool notify_jvmdi = true); ++#endif // CC_INTERP ++ ++ // Object locking ++ void lock_object (Register lock_reg); ++ void unlock_object(Register lock_reg); ++ ++#ifndef CC_INTERP ++ ++ // Interpreter profiling operations ++ void set_method_data_pointer_for_bcp(); ++ void test_method_data_pointer(Register mdp, Label& zero_continue); ++ void verify_method_data_pointer(); ++ ++ void set_mdp_data_at(Register mdp_in, int constant, Register value); ++ void increment_mdp_data_at(Address data, bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, int constant, ++ bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, Register reg, int constant, ++ bool decrement = false); ++ void increment_mask_and_jump(Address counter_addr, ++ int increment, int mask, ++ Register scratch, bool preloaded, ++ Condition cond, Label* where); ++ void set_mdp_flag_at(Register mdp_in, int flag_constant); ++ void test_mdp_data_at(Register mdp_in, int offset, Register value, ++ Register test_value_out, ++ Label& not_equal_continue); ++ ++ void record_klass_in_profile(Register receiver, Register mdp, ++ Register reg2, bool is_virtual_call); ++ void record_klass_in_profile_helper(Register receiver, Register mdp, ++ Register reg2, int start_row, ++ Label& done, bool is_virtual_call); ++ ++ void update_mdp_by_offset(Register mdp_in, int offset_of_offset); ++ void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); ++ void update_mdp_by_constant(Register mdp_in, int constant); ++ void update_mdp_for_ret(Register return_bci); ++ ++ void profile_taken_branch(Register mdp, Register bumped_count); ++ void profile_not_taken_branch(Register mdp); ++ void profile_call(Register mdp); ++ void profile_final_call(Register mdp); ++ void profile_virtual_call(Register receiver, Register mdp, ++ Register scratch2, ++ bool receiver_can_be_null = false); ++ void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN; ++ void profile_ret(Register return_bci, Register mdp); ++ void profile_null_seen(Register mdp); ++ void profile_typecheck(Register mdp, Register klass, Register scratch); ++ void profile_typecheck_failed(Register mdp); ++ void profile_switch_default(Register mdp); ++ void profile_switch_case(Register index_in_scratch, Register mdp, ++ Register scratch2); ++ ++ // Debugging ++ // only if +VerifyOops && state == atos ++ void verify_oop(Register reg, TosState state = atos); ++ // only if +VerifyFPU && (state == ftos || state == dtos) ++ void verify_FPU(int stack_depth, TosState state = ftos); ++ ++ void profile_obj_type(Register obj, const Address& mdo_addr); ++ void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); ++ void profile_return_type(Register mdp, Register ret, Register tmp); ++ void profile_parameters_type(Register mdp, Register tmp1, Register tmp2); ++#endif // !CC_INTERP ++ ++ typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; ++ ++ // support for jvmti/dtrace ++ void notify_method_entry(); ++ void notify_method_exit(TosState state, NotifyMethodExitMode mode); ++}; ++ ++#endif // CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP +diff --git a/src/hotspot/cpu/mips/interp_masm_mips_64.cpp b/src/hotspot/cpu/mips/interp_masm_mips_64.cpp +new file mode 100644 +index 0000000000..eb35bb0633 +--- /dev/null ++++ b/src/hotspot/cpu/mips/interp_masm_mips_64.cpp +@@ -0,0 +1,2126 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interp_masm_mips.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/markOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/thread.inline.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of InterpreterMacroAssembler ++ ++#ifdef CC_INTERP ++void InterpreterMacroAssembler::get_method(Register reg) { ++} ++#endif // CC_INTERP ++ ++void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) { ++ // The runtime address of BCP may be unaligned. ++ // Refer to the SPARC implementation. ++ lbu(reg, BCP, offset+1); ++ lbu(tmp, BCP, offset); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++} ++ ++void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset) { ++ assert(reg != tmp, "need separate temp register"); ++ if (offset & 3) { // Offset unaligned? ++ lbu(reg, BCP, offset+3); ++ lbu(tmp, BCP, offset+2); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++ lbu(tmp, BCP, offset+1); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++ lbu(tmp, BCP, offset); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++ } else { ++ lwu(reg, BCP, offset); ++ } ++} ++ ++void InterpreterMacroAssembler::jump_to_entry(address entry) { ++ assert(entry, "Entry must have been generated by now"); ++ jmp(entry); ++} ++ ++#ifndef CC_INTERP ++ ++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, ++ int number_of_arguments) { ++ // interpreter specific ++ // ++ // Note: No need to save/restore bcp & locals (r13 & r14) pointer ++ // since these are callee saved registers and no blocking/ ++ // GC can happen in leaf calls. ++ // Further Note: DO NOT save/restore bcp/locals. If a caller has ++ // already saved them so that it can use BCP/LVP as temporaries ++ // then a save/restore here will DESTROY the copy the caller ++ // saved! There used to be a save_bcp() that only happened in ++ // the ASSERT path (no restore_bcp). Which caused bizarre failures ++ // when jvm built with ASSERTs. ++#ifdef ASSERT ++ save_bcp(); ++ { ++ Label L; ++ ld(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize); ++ beq(AT,R0,L); ++ delayed()->nop(); ++ stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL"); ++ bind(L); ++ } ++#endif ++ // super call ++ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); ++ // interpreter specific ++ // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals ++ // but since they may not have been saved (and we don't want to ++ // save them here (see note above) the assert is invalid. ++} ++ ++void InterpreterMacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ // interpreter specific ++ // ++ // Note: Could avoid restoring locals ptr (callee saved) - however doesn't ++ // really make a difference for these runtime calls, since they are ++ // slow anyway. Btw., bcp must be saved/restored since it may change ++ // due to GC. ++ assert(java_thread == noreg , "not expecting a precomputed java thread"); ++ save_bcp(); ++#ifdef ASSERT ++ { ++ Label L; ++ ld(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL"); ++ bind(L); ++ } ++#endif /* ASSERT */ ++ // super call ++ MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp, ++ entry_point, number_of_arguments, ++ check_exceptions); ++ // interpreter specific ++ restore_bcp(); ++ restore_locals(); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { ++ if (JvmtiExport::can_pop_frame()) { ++ Label L; ++ // Initiate popframe handling only if it is not already being ++ // processed. If the flag has the popframe_processing bit set, it ++ // means that this code is called *during* popframe handling - we ++ // don't want to reenter. ++ // This method is only called just after the call into the vm in ++ // call_VM_base, so the arg registers are available. ++ // Not clear if any other register is available, so load AT twice ++ assert(AT != java_thread, "check"); ++ lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); ++ andi(AT, AT, JavaThread::popframe_pending_bit); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ ++ lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); ++ andi(AT, AT, JavaThread::popframe_processing_bit); ++ bne(AT, R0, L); ++ delayed()->nop(); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); ++ jr(V0); ++ delayed()->nop(); ++ bind(L); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::load_earlyret_value(TosState state) { ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ld_ptr(T8, thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ const Address tos_addr (T8, in_bytes(JvmtiThreadState::earlyret_tos_offset())); ++ const Address oop_addr (T8, in_bytes(JvmtiThreadState::earlyret_oop_offset())); ++ const Address val_addr (T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ //V0, oop_addr,V1,val_addr ++ switch (state) { ++ case atos: ++ ld_ptr(V0, oop_addr); ++ st_ptr(R0, oop_addr); ++ verify_oop(V0, state); ++ break; ++ case ltos: ++ ld_ptr(V0, val_addr); // fall through ++ break; ++ case btos: // fall through ++ case ztos: // fall through ++ case ctos: // fall through ++ case stos: // fall through ++ case itos: ++ lw(V0, val_addr); ++ break; ++ case ftos: ++ lwc1(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ break; ++ case dtos: ++ ldc1(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ break; ++ case vtos: /* nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ // Clean up tos value in the thread object ++ move(AT, (int)ilgl); ++ sw(AT, tos_addr); ++ sw(R0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { ++ if (JvmtiExport::can_force_early_return()) { ++ Label L; ++ Register tmp = T9; ++ ++ assert(java_thread != AT, "check"); ++ assert(java_thread != tmp, "check"); ++ ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ ++ // Initiate earlyret handling only if it is not already being processed. ++ // If the flag has the earlyret_processing bit set, it means that this code ++ // is called *during* earlyret handling - we don't want to reenter. ++ lw(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset())); ++ move(tmp, JvmtiThreadState::earlyret_pending); ++ bne(tmp, AT, L); ++ delayed()->nop(); ++ ++ // Call Interpreter::remove_activation_early_entry() to get the address of the ++ // same-named entrypoint in the generated interpreter code. ++ ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ lw(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset())); ++ move(A0, AT); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0); ++ jr(V0); ++ delayed()->nop(); ++ bind(L); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, ++ int bcp_offset) { ++ assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); ++ lbu(AT, BCP, bcp_offset); ++ lbu(reg, BCP, bcp_offset + 1); ++ ins(reg, AT, 8, 8); ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ if (index_size == sizeof(u2)) { ++ get_2_byte_integer_at_bcp(index, AT, bcp_offset); ++ } else if (index_size == sizeof(u4)) { ++ get_4_byte_integer_at_bcp(index, AT, bcp_offset); ++ // Check if the secondary index definition is still ~x, otherwise ++ // we have to change the following assembler code to calculate the ++ // plain index. ++ assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); ++ nor(index, index, R0); ++ sll(index, index, 0); ++ } else if (index_size == sizeof(u1)) { ++ lbu(index, BCP, bcp_offset); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, ++ Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert_different_registers(cache, index); ++ get_cache_index_at_bcp(index, bcp_offset, index_size); ++ ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line"); ++ shl(index, 2); ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, ++ Register index, ++ Register bytecode, ++ int byte_no, ++ int bcp_offset, ++ size_t index_size) { ++ get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); ++ // We use a 32-bit load here since the layout of 64-bit words on ++ // little-endian machines allow us that. ++ dsll(AT, index, Address::times_ptr); ++ daddu(AT, cache, AT); ++ lw(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); ++ if(os::is_MP()) { ++ sync(); // load acquire ++ } ++ ++ const int shift_count = (1 + byte_no) * BitsPerByte; ++ assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) || ++ (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift), ++ "correct shift count"); ++ dsrl(bytecode, bytecode, shift_count); ++ assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask"); ++ move(AT, ConstantPoolCacheEntry::bytecode_1_mask); ++ andr(bytecode, bytecode, AT); ++} ++ ++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, ++ Register tmp, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ assert(cache != tmp, "must use different register"); ++ get_cache_index_at_bcp(tmp, bcp_offset, index_size); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ // convert from field index to ConstantPoolCacheEntry index ++ // and from word offset to byte offset ++ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); ++ shl(tmp, 2 + LogBytesPerWord); ++ ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize); ++ // skip past the header ++ daddiu(cache, cache, in_bytes(ConstantPoolCache::base_offset())); ++ daddu(cache, cache, tmp); ++} ++ ++void InterpreterMacroAssembler::get_method_counters(Register method, ++ Register mcs, Label& skip) { ++ Label has_counters; ++ ld(mcs, method, in_bytes(Method::method_counters_offset())); ++ bne(mcs, R0, has_counters); ++ delayed()->nop(); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::build_method_counters), method); ++ ld(mcs, method, in_bytes(Method::method_counters_offset())); ++ beq(mcs, R0, skip); // No MethodCounters allocated, OutOfMemory ++ delayed()->nop(); ++ bind(has_counters); ++} ++ ++// Load object from cpool->resolved_references(index) ++void InterpreterMacroAssembler::load_resolved_reference_at_index( ++ Register result, Register index, Register tmp) { ++ assert_different_registers(result, index); ++ // convert from field index to resolved_references() index and from ++ // word index to byte offset. Since this is a java object, it can be compressed ++ shl(index, LogBytesPerHeapOop); ++ ++ get_constant_pool(result); ++ // load pointer for resolved_references[] objArray ++ ld(result, result, ConstantPool::cache_offset_in_bytes()); ++ ld(result, result, ConstantPoolCache::resolved_references_offset_in_bytes()); ++ resolve_oop_handle(result, tmp); ++ // Add in the index ++ daddu(result, result, index); ++ load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), tmp); ++} ++ ++// load cpool->resolved_klass_at(index) ++void InterpreterMacroAssembler::load_resolved_klass_at_index(Register cpool, ++ Register index, Register klass) { ++ dsll(AT, index, Address::times_ptr); ++ if (UseLEXT1 && Assembler::is_simm(sizeof(ConstantPool), 8)) { ++ gslhx(index, cpool, AT, sizeof(ConstantPool)); ++ } else { ++ daddu(AT, cpool, AT); ++ lh(index, AT, sizeof(ConstantPool)); ++ } ++ Register resolved_klasses = cpool; ++ ld_ptr(resolved_klasses, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); ++ dsll(AT, index, Address::times_ptr); ++ daddu(AT, resolved_klasses, AT); ++ ld(klass, AT, Array::base_offset_in_bytes()); ++} ++ ++// Resets LVP to locals. Register sub_klass cannot be any of the above. ++void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) { ++ assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" ); ++ assert( Rsub_klass != T1, "T1 holds 2ndary super array length" ); ++ assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" ); ++ // Profile the not-null value's klass. ++ // Here T9 and T1 are used as temporary registers. ++ profile_typecheck(T9, Rsub_klass, T1); // blows T9, reloads T1 ++ ++ // Do the check. ++ check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1 ++ ++ // Profile the failure of the check. ++ profile_typecheck_failed(T9); // blows T9 ++} ++ ++ ++ ++// Java Expression Stack ++ ++void InterpreterMacroAssembler::pop_ptr(Register r) { ++ ld(r, SP, 0); ++ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_i(Register r) { ++ lw(r, SP, 0); ++ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_l(Register r) { ++ ld(r, SP, 0); ++ daddiu(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_f(FloatRegister r) { ++ lwc1(r, SP, 0); ++ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_d(FloatRegister r) { ++ ldc1(r, SP, 0); ++ daddiu(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_ptr(Register r) { ++ daddiu(SP, SP, - Interpreter::stackElementSize); ++ sd(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_i(Register r) { ++ // For compatibility reason, don't change to sw. ++ daddiu(SP, SP, - Interpreter::stackElementSize); ++ sd(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_l(Register r) { ++ daddiu(SP, SP, -2 * Interpreter::stackElementSize); ++ sd(r, SP, 0); ++ sd(R0, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_f(FloatRegister r) { ++ daddiu(SP, SP, - Interpreter::stackElementSize); ++ swc1(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_d(FloatRegister r) { ++ daddiu(SP, SP, -2 * Interpreter::stackElementSize); ++ sdc1(r, SP, 0); ++ sd(R0, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop(TosState state) { ++ switch (state) { ++ case atos: pop_ptr(); break; ++ case btos: ++ case ztos: ++ case ctos: ++ case stos: ++ case itos: pop_i(); break; ++ case ltos: pop_l(); break; ++ case ftos: pop_f(); break; ++ case dtos: pop_d(); break; ++ case vtos: /* nothing to do */ break; ++ default: ShouldNotReachHere(); ++ } ++ verify_oop(FSR, state); ++} ++ ++//FSR=V0,SSR=V1 ++void InterpreterMacroAssembler::push(TosState state) { ++ verify_oop(FSR, state); ++ switch (state) { ++ case atos: push_ptr(); break; ++ case btos: ++ case ztos: ++ case ctos: ++ case stos: ++ case itos: push_i(); break; ++ case ltos: push_l(); break; ++ case ftos: push_f(); break; ++ case dtos: push_d(); break; ++ case vtos: /* nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++ ++ ++void InterpreterMacroAssembler::load_ptr(int n, Register val) { ++ ld(val, SP, Interpreter::expr_offset_in_bytes(n)); ++} ++ ++void InterpreterMacroAssembler::store_ptr(int n, Register val) { ++ sd(val, SP, Interpreter::expr_offset_in_bytes(n)); ++} ++ ++// Jump to from_interpreted entry of a call unless single stepping is possible ++// in this thread in which case we must call the i2i entry ++void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) { ++ // record last_sp ++ move(Rsender, SP); ++ sd(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++#ifndef OPT_THREAD ++ Register thread = temp; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ // interp_only is an int, on little endian it is sufficient to test the byte only ++ // Is a cmpl faster? ++ lw(AT, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(AT, R0, run_compiled_code); ++ delayed()->nop(); ++ ld(AT, method, in_bytes(Method::interpreter_entry_offset())); ++ jr(AT); ++ delayed()->nop(); ++ bind(run_compiled_code); ++ } ++ ++ ld(AT, method, in_bytes(Method::from_interpreted_offset())); ++ jr(AT); ++ delayed()->nop(); ++} ++ ++ ++// The following two routines provide a hook so that an implementation ++// can schedule the dispatch in two parts. mips64 does not do this. ++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { ++ // Nothing mips64 specific to be done here ++} ++ ++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { ++ dispatch_next(state, step); ++} ++ ++// assume the next bytecode in T8. ++void InterpreterMacroAssembler::dispatch_base(TosState state, ++ address* table, ++ bool verifyoop, ++ bool generate_poll) { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ ++ if (VerifyActivationFrameSize) { ++ Label L; ++ ++ dsubu(T2, FP, SP); ++ int min_frame_size = (frame::link_offset - ++ frame::interpreter_frame_initial_sp_offset) * wordSize; ++ daddiu(T2, T2, -min_frame_size); ++ bgez(T2, L); ++ delayed()->nop(); ++ stop("broken stack frame"); ++ bind(L); ++ } ++ // FIXME: I do not know which register should pass to verify_oop ++ if (verifyoop) verify_oop(FSR, state); ++ dsll(T2, Rnext, LogBytesPerWord); ++ ++ Label safepoint; ++ address* const safepoint_table = Interpreter::safept_table(state); ++ bool needs_thread_local_poll = generate_poll && ++ SafepointMechanism::uses_thread_local_poll() && table != safepoint_table; ++ ++ if (needs_thread_local_poll) { ++ NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); ++ ld(T3, thread, in_bytes(Thread::polling_page_offset())); ++ andi(T3, T3, SafepointMechanism::poll_bit()); ++ bne(T3, R0, safepoint); ++ delayed()->nop(); ++ } ++ ++ if((long)table >= (long)Interpreter::dispatch_table(btos) && ++ (long)table <= (long)Interpreter::dispatch_table(vtos) ++ ) { ++ int table_size = (long)Interpreter::dispatch_table(itos) - (long)Interpreter::dispatch_table(stos); ++ int table_offset = ((int)state - (int)itos) * table_size; ++ ++ // GP points to the starting address of Interpreter::dispatch_table(itos). ++ // See StubGenerator::generate_call_stub(address& return_address) for the initialization of GP. ++ if(table_offset != 0) { ++ daddiu(T3, GP, table_offset); ++ if (UseLEXT1) { ++ gsldx(T3, T2, T3, 0); ++ } else { ++ daddu(T3, T2, T3); ++ ld(T3, T3, 0); ++ } ++ } else { ++ if (UseLEXT1) { ++ gsldx(T3, T2, GP, 0); ++ } else { ++ daddu(T3, T2, GP); ++ ld(T3, T3, 0); ++ } ++ } ++ } else { ++ li(T3, (long)table); ++ if (UseLEXT1) { ++ gsldx(T3, T2, T3, 0); ++ } else { ++ daddu(T3, T2, T3); ++ ld(T3, T3, 0); ++ } ++ } ++ jr(T3); ++ delayed()->nop(); ++ ++ if (needs_thread_local_poll) { ++ bind(safepoint); ++ li(T3, (long)safepoint_table); ++ if (UseLEXT1) { ++ gsldx(T3, T2, T3, 0); ++ } else { ++ daddu(T3, T2, T3); ++ ld(T3, T3, 0); ++ } ++ jr(T3); ++ delayed()->nop(); ++ } ++} ++ ++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) { ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { ++ dispatch_base(state, Interpreter::normal_table(state)); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { ++ dispatch_base(state, Interpreter::normal_table(state), false); ++} ++ ++ ++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) { ++ // load next bytecode (load before advancing r13 to prevent AGI) ++ lbu(Rnext, BCP, step); ++ increment(BCP, step); ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); ++} ++ ++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { ++ // load current bytecode ++ lbu(Rnext, BCP, 0); ++ dispatch_base(state, table); ++} ++ ++// remove activation ++// ++// Unlock the receiver if this is a synchronized method. ++// Unlock any Java monitors from syncronized blocks. ++// Remove the activation from the stack. ++// ++// If there are locked Java monitors ++// If throw_monitor_exception ++// throws IllegalMonitorStateException ++// Else if install_monitor_exception ++// installs IllegalMonitorStateException ++// Else ++// no error processing ++// used registers : T1, T2, T3, T8 ++// T1 : thread, method access flags ++// T2 : monitor entry pointer ++// T3 : method, monitor top ++// T8 : unlock flag ++void InterpreterMacroAssembler::remove_activation( ++ TosState state, ++ Register ret_addr, ++ bool throw_monitor_exception, ++ bool install_monitor_exception, ++ bool notify_jvmdi) { ++ // Note: Registers V0, V1 and F0, F1 may be in use for the result ++ // check if synchronized method ++ Label unlocked, unlock, no_unlock; ++ ++ // get the value of _do_not_unlock_if_synchronized into T8 ++#ifndef OPT_THREAD ++ Register thread = T1; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ lb(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ // reset the flag ++ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ // get method access flags ++ ld(T3, FP, frame::interpreter_frame_method_offset * wordSize); ++ lw(T1, T3, in_bytes(Method::access_flags_offset())); ++ andi(T1, T1, JVM_ACC_SYNCHRONIZED); ++ beq(T1, R0, unlocked); ++ delayed()->nop(); ++ ++ // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set. ++ bne(T8, R0, no_unlock); ++ delayed()->nop(); ++ // unlock monitor ++ push(state); // save result ++ ++ // BasicObjectLock will be first in list, since this is a ++ // synchronized method. However, need to check that the object has ++ // not been unlocked by an explicit monitorexit bytecode. ++ daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize ++ - (int)sizeof(BasicObjectLock)); ++ // address of first monitor ++ ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ bne(T1, R0, unlock); ++ delayed()->nop(); ++ pop(state); ++ if (throw_monitor_exception) { ++ // Entry already unlocked, need to throw exception ++ // I think mips do not need empty_FPU_stack ++ // remove possible return value from FPU-stack, otherwise stack could overflow ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Monitor already unlocked during a stack unroll. If requested, ++ // install an illegal_monitor_state_exception. Continue with ++ // stack unrolling. ++ if (install_monitor_exception) { ++ // remove possible return value from FPU-stack, ++ // otherwise stack could overflow ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ ++ } ++ ++ b(unlocked); ++ delayed()->nop(); ++ } ++ ++ bind(unlock); ++ unlock_object(c_rarg0); ++ pop(state); ++ ++ // Check that for block-structured locking (i.e., that all locked ++ // objects has been unlocked) ++ bind(unlocked); ++ ++ // V0, V1: Might contain return value ++ ++ // Check that all monitors are unlocked ++ { ++ Label loop, exception, entry, restart; ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ const Address monitor_block_top(FP, ++ frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ ++ bind(restart); ++ // points to current entry, starting with top-most entry ++ ld(c_rarg0, monitor_block_top); ++ // points to word before bottom of monitor block ++ daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ b(entry); ++ delayed()->nop(); ++ ++ // Entry already locked, need to throw exception ++ bind(exception); ++ ++ if (throw_monitor_exception) { ++ // Throw exception ++ // remove possible return value from FPU-stack, ++ // otherwise stack could overflow ++ empty_FPU_stack(); ++ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Stack unrolling. Unlock object and install illegal_monitor_exception ++ // Unlock does not block, so don't have to worry about the frame ++ // We don't have to preserve c_rarg0, since we are going to ++ // throw an exception ++ ++ push(state); ++ unlock_object(c_rarg0); ++ pop(state); ++ ++ if (install_monitor_exception) { ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ } ++ ++ b(restart); ++ delayed()->nop(); ++ } ++ ++ bind(loop); ++ ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ bne(T1, R0, exception);// check if current entry is used ++ delayed()->nop(); ++ ++ daddiu(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry ++ bind(entry); ++ bne(c_rarg0, T3, loop); // check if bottom reached ++ delayed()->nop(); // if not at bottom then check this entry ++ } ++ ++ bind(no_unlock); ++ ++ // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame) ++ if (notify_jvmdi) { ++ notify_method_exit(state, NotifyJVMTI); // preserve TOSCA ++ } else { ++ notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA ++ } ++ ++ // remove activation ++ ld(TSR, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ if (StackReservedPages > 0) { ++ // testing if reserved zone needs to be re-enabled ++ Label no_reserved_zone_enabling; ++ ++ ld(AT, Address(thread, JavaThread::reserved_stack_activation_offset())); ++ dsubu(AT, TSR, AT); ++ blez(AT, no_reserved_zone_enabling); ++ delayed()->nop(); ++ ++ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_delayed_StackOverflowError)); ++ should_not_reach_here(); ++ ++ bind(no_reserved_zone_enabling); ++ } ++ ld(ret_addr, FP, frame::interpreter_frame_return_addr_offset * wordSize); ++ ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); ++ move(SP, TSR); // set sp to sender sp ++} ++ ++#endif // CC_INTERP ++ ++// Lock object ++// ++// Args: ++// c_rarg0: BasicObjectLock to be used for locking ++// ++// Kills: ++// T1 ++// T2 ++void InterpreterMacroAssembler::lock_object(Register lock_reg) { ++ assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); ++ ++ if (UseHeavyMonitors) { ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); ++ } else { ++ Label done, slow_case; ++ const Register tmp_reg = T2; ++ const Register scr_reg = T1; ++ const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); ++ const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); ++ const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); ++ ++ // Load object pointer into scr_reg ++ ld(scr_reg, lock_reg, obj_offset); ++ ++ if (UseBiasedLocking) { ++ // Note: we use noreg for the temporary register since it's hard ++ // to come up with a free register on all incoming code paths ++ biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case); ++ } ++ ++ // Load (object->mark() | 1) into tmp_reg ++ ld(AT, scr_reg, 0); ++ ori(tmp_reg, AT, 1); ++ ++ // Save (object->mark() | 1) into BasicLock's displaced header ++ sd(tmp_reg, lock_reg, mark_offset); ++ ++ assert(lock_offset == 0, "displached header must be first word in BasicObjectLock"); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label succ, fail; ++ cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail); ++ bind(succ); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); ++ b(done); ++ delayed()->nop(); ++ bind(fail); ++ } else { ++ cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done); ++ } ++ ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) SP <= mark < SP + os::pagesize() ++ // ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in tmp_reg as the result of cmpxchg ++ ++ dsubu(tmp_reg, tmp_reg, SP); ++ move(AT, 7 - os::vm_page_size()); ++ andr(tmp_reg, tmp_reg, AT); ++ // Save the test result, for recursive case, the result is zero ++ sd(tmp_reg, lock_reg, mark_offset); ++ if (PrintBiasedLockingStatistics) { ++ bne(tmp_reg, R0, slow_case); ++ delayed()->nop(); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); ++ } ++ beq(tmp_reg, R0, done); ++ delayed()->nop(); ++ ++ bind(slow_case); ++ // Call the runtime routine for slow case ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); ++ ++ bind(done); ++ } ++} ++ ++ ++// Unlocks an object. Used in monitorexit bytecode and ++// remove_activation. Throws an IllegalMonitorException if object is ++// not locked by current thread. ++// ++// Args: ++// c_rarg0: BasicObjectLock for lock ++// ++// Kills: ++// T1 ++// T2 ++// T3 ++// Throw an IllegalMonitorException if object is not locked by current thread ++void InterpreterMacroAssembler::unlock_object(Register lock_reg) { ++ assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); ++ ++ if (UseHeavyMonitors) { ++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); ++ } else { ++ Label done; ++ ++ const Register tmp_reg = T1; ++ const Register scr_reg = T2; ++ const Register hdr_reg = T3; ++ ++ save_bcp(); // Save in case of exception ++ ++ // Convert from BasicObjectLock structure to object and BasicLock structure ++ // Store the BasicLock address into %T2 ++ daddiu(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes()); ++ ++ // Load oop into scr_reg(%T1) ++ ld(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); ++ // free entry ++ sd(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes()); ++ if (UseBiasedLocking) { ++ biased_locking_exit(scr_reg, hdr_reg, done); ++ } ++ ++ // Load the old header from BasicLock structure ++ ld(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes()); ++ // zero for recursive case ++ beq(hdr_reg, R0, done); ++ delayed()->nop(); ++ ++ // Atomic swap back the old header ++ cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done); ++ ++ // Call the runtime routine for slow case. ++ sd(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj ++ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), ++ lock_reg); ++ ++ bind(done); ++ ++ restore_bcp(); ++ } ++} ++ ++#ifndef CC_INTERP ++ ++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, ++ Label& zero_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ ld(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++ beq(mdp, R0, zero_continue); ++ delayed()->nop(); ++} ++ ++ ++// Set the method data pointer for the current bcp. ++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Label set_mdp; ++ ++ // V0 and T0 will be used as two temporary registers. ++ push2(V0, T0); ++ ++ get_method(T0); ++ // Test MDO to avoid the call if it is NULL. ++ ld(V0, T0, in_bytes(Method::method_data_offset())); ++ beq(V0, R0, set_mdp); ++ delayed()->nop(); ++ ++ // method: T0 ++ // bcp: BCP --> S0 ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP); ++ // mdi: V0 ++ // mdo is guaranteed to be non-zero here, we checked for it before the call. ++ get_method(T0); ++ ld(T0, T0, in_bytes(Method::method_data_offset())); ++ daddiu(T0, T0, in_bytes(MethodData::data_offset())); ++ daddu(V0, T0, V0); ++ bind(set_mdp); ++ sd(V0, FP, frame::interpreter_frame_mdp_offset * wordSize); ++ pop2(V0, T0); ++} ++ ++void InterpreterMacroAssembler::verify_method_data_pointer() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++#ifdef ASSERT ++ Label verify_continue; ++ Register method = V0; ++ Register mdp = V1; ++ Register tmp = A0; ++ push(method); ++ push(mdp); ++ push(tmp); ++ test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue ++ get_method(method); ++ ++ // If the mdp is valid, it will point to a DataLayout header which is ++ // consistent with the bcp. The converse is highly probable also. ++ lhu(tmp, mdp, in_bytes(DataLayout::bci_offset())); ++ ld(AT, method, in_bytes(Method::const_offset())); ++ daddu(tmp, tmp, AT); ++ daddiu(tmp, tmp, in_bytes(ConstMethod::codes_offset())); ++ beq(tmp, BCP, verify_continue); ++ delayed()->nop(); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp); ++ bind(verify_continue); ++ pop(tmp); ++ pop(mdp); ++ pop(method); ++#endif // ASSERT ++} ++ ++ ++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, ++ int constant, ++ Register value) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Address data(mdp_in, constant); ++ sd(value, data); ++} ++ ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ int constant, ++ bool decrement) { ++ // Counter address ++ Address data(mdp_in, constant); ++ ++ increment_mdp_data_at(data, decrement); ++} ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Address data, ++ bool decrement) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ // %%% this does 64bit counters at best it is wasting space ++ // at worst it is a rare bug when counters overflow ++ Register tmp = S0; ++ push(tmp); ++ if (decrement) { ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Decrement the register. ++ ld(AT, data); ++ sltu(tmp, R0, AT); ++ dsubu(AT, AT, tmp); ++ sd(AT, data); ++ } else { ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Increment the register. ++ ld(AT, data); ++ daddiu(tmp, AT, DataLayout::counter_increment); ++ sltu(tmp, R0, tmp); ++ daddu(AT, AT, tmp); ++ sd(AT, data); ++ } ++ pop(tmp); ++} ++ ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ Register reg, ++ int constant, ++ bool decrement) { ++ Register tmp = S0; ++ push(tmp); ++ if (decrement) { ++ assert(Assembler::is_simm16(constant), "constant is not a simm16 !"); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Decrement the register. ++ daddu(tmp, mdp_in, reg); ++ ld(AT, tmp, constant); ++ sltu(tmp, R0, AT); ++ dsubu(AT, AT, tmp); ++ daddu(tmp, mdp_in, reg); ++ sd(AT, tmp, constant); ++ } else { ++ assert(Assembler::is_simm16(constant), "constant is not a simm16 !"); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Increment the register. ++ daddu(tmp, mdp_in, reg); ++ ld(AT, tmp, constant); ++ daddiu(tmp, AT, DataLayout::counter_increment); ++ sltu(tmp, R0, tmp); ++ daddu(AT, AT, tmp); ++ daddu(tmp, mdp_in, reg); ++ sd(AT, tmp, constant); ++ } ++ pop(tmp); ++} ++ ++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, ++ int flag_byte_constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ int header_offset = in_bytes(DataLayout::header_offset()); ++ int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant); ++ // Set the flag ++ lw(AT, Address(mdp_in, header_offset)); ++ if(Assembler::is_simm16(header_bits)) { ++ ori(AT, AT, header_bits); ++ } else { ++ push(T8); ++ // T8 is used as a temporary register. ++ move(T8, header_bits); ++ orr(AT, AT, T8); ++ pop(T8); ++ } ++ sw(AT, Address(mdp_in, header_offset)); ++} ++ ++ ++ ++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, ++ int offset, ++ Register value, ++ Register test_value_out, ++ Label& not_equal_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if (test_value_out == noreg) { ++ ld(AT, Address(mdp_in, offset)); ++ bne(AT, value, not_equal_continue); ++ delayed()->nop(); ++ } else { ++ // Put the test value into a register, so caller can use it: ++ ld(test_value_out, Address(mdp_in, offset)); ++ bne(value, test_value_out, not_equal_continue); ++ delayed()->nop(); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16"); ++ ld(AT, mdp_in, offset_of_disp); ++ daddu(mdp_in, mdp_in, AT); ++ sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ Register reg, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ daddu(AT, reg, mdp_in); ++ assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16"); ++ ld(AT, AT, offset_of_disp); ++ daddu(mdp_in, mdp_in, AT); ++ sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, ++ int constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if(Assembler::is_simm16(constant)) { ++ daddiu(mdp_in, mdp_in, constant); ++ } else { ++ move(AT, constant); ++ daddu(mdp_in, mdp_in, AT); ++ } ++ sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ push(return_bci); // save/restore across call_VM ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), ++ return_bci); ++ pop(return_bci); ++} ++ ++ ++void InterpreterMacroAssembler::profile_taken_branch(Register mdp, ++ Register bumped_count) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ // Otherwise, assign to mdp ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the taken count. ++ // We inline increment_mdp_data_at to return bumped_count in a register ++ //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset())); ++ ld(bumped_count, mdp, in_bytes(JumpData::taken_offset())); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ daddiu(AT, bumped_count, DataLayout::counter_increment); ++ sltu(AT, R0, AT); ++ daddu(bumped_count, bumped_count, AT); ++ sd(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the not taken count. ++ increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); ++ ++ // The method data pointer needs to be updated to correspond to ++ // the next bytecode ++ update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_final_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_virtual_call(Register receiver, ++ Register mdp, ++ Register reg2, ++ bool receiver_can_be_null) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ Label skip_receiver_profile; ++ if (receiver_can_be_null) { ++ Label not_null; ++ bne(receiver, R0, not_null); ++ delayed()->nop(); ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ beq(R0, R0, skip_receiver_profile); ++ delayed()->nop(); ++ bind(not_null); ++ } ++ ++ // Record the receiver type. ++ record_klass_in_profile(receiver, mdp, reg2, true); ++ bind(skip_receiver_profile); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++#if INCLUDE_JVMCI ++void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) { ++ assert_different_registers(method, mdp, reg2); ++ if (ProfileInterpreter && MethodProfileWidth > 0) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ Label done; ++ record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth, ++ &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset())); ++ bind(done); ++ ++ update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++#endif // INCLUDE_JVMCI ++ ++// This routine creates a state machine for updating the multi-row ++// type profile at a virtual call site (or other type-sensitive bytecode). ++// The machine visits each row (of receiver/count) until the receiver type ++// is found, or until it runs out of rows. At the same time, it remembers ++// the location of the first empty row. (An empty row records null for its ++// receiver, and can be allocated for a newly-observed receiver type.) ++// Because there are two degrees of freedom in the state, a simple linear ++// search will not work; it must be a decision tree. Hence this helper ++// function is recursive, to generate the required tree structured code. ++// It's the interpreter, so we are trading off code space for speed. ++// See below for example code. ++void InterpreterMacroAssembler::record_klass_in_profile_helper( ++ Register receiver, Register mdp, ++ Register reg2, int start_row, ++ Label& done, bool is_virtual_call) { ++ if (TypeProfileWidth == 0) { ++ if (is_virtual_call) { ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ } ++ return; ++ } ++ ++ int last_row = VirtualCallData::row_limit() - 1; ++ assert(start_row <= last_row, "must be work left to do"); ++ // Test this row for both the receiver and for null. ++ // Take any of three different outcomes: ++ // 1. found receiver => increment count and goto done ++ // 2. found null => keep looking for case 1, maybe allocate this cell ++ // 3. found something else => keep looking for cases 1 and 2 ++ // Case 3 is handled by a recursive call. ++ for (int row = start_row; row <= last_row; row++) { ++ Label next_test; ++ bool test_for_null_also = (row == start_row); ++ ++ // See if the receiver is receiver[n]. ++ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); ++ test_mdp_data_at(mdp, recvr_offset, receiver, ++ (test_for_null_also ? reg2 : noreg), ++ next_test); ++ // (Reg2 now contains the receiver from the CallData.) ++ ++ // The receiver is receiver[n]. Increment count[n]. ++ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); ++ increment_mdp_data_at(mdp, count_offset); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ bind(next_test); ++ ++ if (test_for_null_also) { ++ Label found_null; ++ // Failed the equality check on receiver[n]... Test for null. ++ if (start_row == last_row) { ++ // The only thing left to do is handle the null case. ++ if (is_virtual_call) { ++ beq(reg2, R0, found_null); ++ delayed()->nop(); ++ // Receiver did not match any saved receiver and there is no empty row for it. ++ // Increment total counter to indicate polymorphic case. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ bind(found_null); ++ } else { ++ bne(reg2, R0, done); ++ delayed()->nop(); ++ } ++ break; ++ } ++ // Since null is rare, make it be the branch-taken case. ++ beq(reg2, R0, found_null); ++ delayed()->nop(); ++ ++ // Put all the "Case 3" tests here. ++ record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call); ++ ++ // Found a null. Keep searching for a matching receiver, ++ // but remember that this is an empty (unused) slot. ++ bind(found_null); ++ } ++ } ++ ++ // In the fall-through case, we found no matching receiver, but we ++ // observed the receiver[start_row] is NULL. ++ ++ // Fill in the receiver field and increment the count. ++ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); ++ set_mdp_data_at(mdp, recvr_offset, receiver); ++ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); ++ move(reg2, DataLayout::counter_increment); ++ set_mdp_data_at(mdp, count_offset, reg2); ++ if (start_row > 0) { ++ beq(R0, R0, done); ++ delayed()->nop(); ++ } ++} ++ ++// Example state machine code for three profile rows: ++// // main copy of decision tree, rooted at row[1] ++// if (row[0].rec == rec) { row[0].incr(); goto done; } ++// if (row[0].rec != NULL) { ++// // inner copy of decision tree, rooted at row[1] ++// if (row[1].rec == rec) { row[1].incr(); goto done; } ++// if (row[1].rec != NULL) { ++// // degenerate decision tree, rooted at row[2] ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// if (row[2].rec != NULL) { goto done; } // overflow ++// row[2].init(rec); goto done; ++// } else { ++// // remember row[1] is empty ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// row[1].init(rec); goto done; ++// } ++// } else { ++// // remember row[0] is empty ++// if (row[1].rec == rec) { row[1].incr(); goto done; } ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// row[0].init(rec); goto done; ++// } ++// done: ++ ++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, ++ Register mdp, Register reg2, ++ bool is_virtual_call) { ++ assert(ProfileInterpreter, "must be profiling"); ++ Label done; ++ ++ record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call); ++ ++ bind (done); ++} ++ ++void InterpreterMacroAssembler::profile_ret(Register return_bci, ++ Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ uint row; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the total ret count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ for (row = 0; row < RetData::row_limit(); row++) { ++ Label next_test; ++ ++ // See if return_bci is equal to bci[n]: ++ test_mdp_data_at(mdp, ++ in_bytes(RetData::bci_offset(row)), ++ return_bci, noreg, ++ next_test); ++ ++ // return_bci is equal to bci[n]. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, ++ in_bytes(RetData::bci_displacement_offset(row))); ++ beq(R0, R0, profile_continue); ++ delayed()->nop(); ++ bind(next_test); ++ } ++ ++ update_mdp_for_ret(return_bci); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_null_seen(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { ++ if (ProfileInterpreter && TypeProfileCasts) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int count_offset = in_bytes(CounterData::count_offset()); ++ // Back up the address, since we have already bumped the mdp. ++ count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // *Decrement* the counter. We expect to see zero or small negatives. ++ increment_mdp_data_at(mdp, count_offset, true); ++ ++ bind (profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // Record the object type. ++ record_klass_in_profile(klass, mdp, reg2, false); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_switch_default(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the default case count ++ increment_mdp_data_at(mdp, ++ in_bytes(MultiBranchData::default_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ in_bytes(MultiBranchData:: ++ default_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_switch_case(Register index, ++ Register mdp, ++ Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Build the base (index * per_case_size_in_bytes()) + ++ // case_array_offset_in_bytes() ++ move(reg2, in_bytes(MultiBranchData::per_case_size())); ++ if (UseLEXT1) { ++ gsdmult(index, index, reg2); ++ } else { ++ dmult(index, reg2); ++ mflo(index); ++ } ++ daddiu(index, index, in_bytes(MultiBranchData::case_array_offset())); ++ ++ // Update the case count ++ increment_mdp_data_at(mdp, ++ index, ++ in_bytes(MultiBranchData::relative_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ index, ++ in_bytes(MultiBranchData:: ++ relative_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::narrow(Register result) { ++ ++ // Get method->_constMethod->_result_type ++ ld(T9, FP, frame::interpreter_frame_method_offset * wordSize); ++ ld(T9, T9, in_bytes(Method::const_offset())); ++ lbu(T9, T9, in_bytes(ConstMethod::result_type_offset())); ++ ++ Label done, notBool, notByte, notChar; ++ ++ // common case first ++ addiu(AT, T9, -T_INT); ++ beq(AT, R0, done); ++ delayed()->nop(); ++ ++ // mask integer result to narrower return type. ++ addiu(AT, T9, -T_BOOLEAN); ++ bne(AT, R0, notBool); ++ delayed()->nop(); ++ andi(result, result, 0x1); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ ++ bind(notBool); ++ addiu(AT, T9, -T_BYTE); ++ bne(AT, R0, notByte); ++ delayed()->nop(); ++ seb(result, result); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ ++ bind(notByte); ++ addiu(AT, T9, -T_CHAR); ++ bne(AT, R0, notChar); ++ delayed()->nop(); ++ andi(result, result, 0xFFFF); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ ++ bind(notChar); ++ seh(result, result); ++ ++ // Nothing to do for T_INT ++ bind(done); ++} ++ ++ ++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { ++ Label update, next, none; ++ ++ verify_oop(obj); ++ ++ if (mdo_addr.index() != noreg) { ++ guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !"); ++ guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !"); ++ push(T0); ++ dsll(T0, mdo_addr.index(), mdo_addr.scale()); ++ daddu(T0, T0, mdo_addr.base()); ++ } ++ ++ bne(obj, R0, update); ++ delayed()->nop(); ++ ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ ori(AT, AT, TypeEntries::null_seen); ++ if (mdo_addr.index() == noreg) { ++ sd(AT, mdo_addr); ++ } else { ++ sd(AT, T0, mdo_addr.disp()); ++ } ++ ++ beq(R0, R0, next); ++ delayed()->nop(); ++ ++ bind(update); ++ load_klass(obj, obj); ++ ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ xorr(obj, obj, AT); ++ ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ dextm(AT, obj, 2, 62); ++ beq(AT, R0, next); ++ delayed()->nop(); ++ ++ andi(AT, obj, TypeEntries::type_unknown); ++ bne(AT, R0, next); ++ delayed()->nop(); ++ ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ beq(AT, R0, none); ++ delayed()->nop(); ++ ++ daddiu(AT, AT, -(TypeEntries::null_seen)); ++ beq(AT, R0, none); ++ delayed()->nop(); ++ ++ // There is a chance that the checks above (re-reading profiling ++ // data from memory) fail if another thread has just set the ++ // profiling to this obj's klass ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ xorr(obj, obj, AT); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ dextm(AT, obj, 2, 62); ++ beq(AT, R0, next); ++ delayed()->nop(); ++ ++ // different than before. Cannot keep accurate profile. ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ ori(AT, AT, TypeEntries::type_unknown); ++ if (mdo_addr.index() == noreg) { ++ sd(AT, mdo_addr); ++ } else { ++ sd(AT, T0, mdo_addr.disp()); ++ } ++ beq(R0, R0, next); ++ delayed()->nop(); ++ ++ bind(none); ++ // first time here. Set profile type. ++ if (mdo_addr.index() == noreg) { ++ sd(obj, mdo_addr); ++ } else { ++ sd(obj, T0, mdo_addr.disp()); ++ } ++ ++ bind(next); ++ if (mdo_addr.index() != noreg) { ++ pop(T0); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { ++ if (!ProfileInterpreter) { ++ return; ++ } ++ ++ if (MethodData::profile_arguments() || MethodData::profile_return()) { ++ Label profile_continue; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); ++ ++ lb(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start); ++ li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag); ++ bne(tmp, AT, profile_continue); ++ delayed()->nop(); ++ ++ ++ if (MethodData::profile_arguments()) { ++ Label done; ++ int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); ++ if (Assembler::is_simm16(off_to_args)) { ++ daddiu(mdp, mdp, off_to_args); ++ } else { ++ move(AT, off_to_args); ++ daddu(mdp, mdp, AT); ++ } ++ ++ ++ for (int i = 0; i < TypeProfileArgsLimit; i++) { ++ if (i > 0 || MethodData::profile_return()) { ++ // If return value type is profiled we may have no argument to profile ++ ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); ++ ++ if (Assembler::is_simm16(-1 * i * TypeStackSlotEntries::per_arg_count())) { ++ addiu32(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count()); ++ } else { ++ li(AT, i*TypeStackSlotEntries::per_arg_count()); ++ subu32(tmp, tmp, AT); ++ } ++ ++ li(AT, TypeStackSlotEntries::per_arg_count()); ++ slt(AT, tmp, AT); ++ bne(AT, R0, done); ++ delayed()->nop(); ++ } ++ ld(tmp, callee, in_bytes(Method::const_offset())); ++ ++ lhu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // stack offset o (zero based) from the start of the argument ++ // list, for n arguments translates into offset n - o - 1 from ++ // the end of the argument list ++ ld(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args); ++ subu(tmp, tmp, AT); ++ ++ addiu32(tmp, tmp, -1); ++ ++ Address arg_addr = argument_address(tmp); ++ ld(tmp, arg_addr); ++ ++ Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); ++ profile_obj_type(tmp, mdo_arg_addr); ++ ++ int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); ++ if (Assembler::is_simm16(to_add)) { ++ daddiu(mdp, mdp, to_add); ++ } else { ++ move(AT, to_add); ++ daddu(mdp, mdp, AT); ++ } ++ ++ off_to_args += to_add; ++ } ++ ++ if (MethodData::profile_return()) { ++ ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); ++ ++ int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(); ++ if (Assembler::is_simm16(-1 * tmp_arg_counts)) { ++ addiu32(tmp, tmp, -1 * tmp_arg_counts); ++ } else { ++ move(AT, tmp_arg_counts); ++ subu32(mdp, mdp, AT); ++ } ++ } ++ ++ bind(done); ++ ++ if (MethodData::profile_return()) { ++ // We're right after the type profile for the last ++ // argument. tmp is the number of cells left in the ++ // CallTypeData/VirtualCallTypeData to reach its end. Non null ++ // if there's a return to profile. ++ assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); ++ sll(tmp, tmp, exact_log2(DataLayout::cell_size)); ++ daddu(mdp, mdp, tmp); ++ } ++ sd(mdp, FP, frame::interpreter_frame_mdp_offset * wordSize); ++ } else { ++ assert(MethodData::profile_return(), "either profile call args or call ret"); ++ update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); ++ } ++ ++ // mdp points right after the end of the ++ // CallTypeData/VirtualCallTypeData, right after the cells for the ++ // return value type if there's one ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { ++ assert_different_registers(mdp, ret, tmp, _bcp_register); ++ if (ProfileInterpreter && MethodData::profile_return()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ if (MethodData::profile_return_jsr292_only()) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++ ++ // If we don't profile all invoke bytecodes we must make sure ++ // it's a bytecode we indeed profile. We can't go back to the ++ // begining of the ProfileData we intend to update to check its ++ // type because we're right after it and we don't known its ++ // length ++ Label do_profile; ++ lb(tmp, _bcp_register, 0); ++ daddiu(AT, tmp, -1 * Bytecodes::_invokedynamic); ++ beq(AT, R0, do_profile); ++ delayed()->daddiu(AT, tmp, -1 * Bytecodes::_invokehandle); ++ beq(AT, R0, do_profile); ++ delayed()->nop(); ++ ++ get_method(tmp); ++ lhu(tmp, tmp, Method::intrinsic_id_offset_in_bytes()); ++ li(AT, vmIntrinsics::_compiledLambdaForm); ++ bne(tmp, AT, profile_continue); ++ delayed()->nop(); ++ ++ bind(do_profile); ++ } ++ ++ Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); ++ daddu(tmp, ret, R0); ++ profile_obj_type(tmp, mdo_ret_addr); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) { ++ guarantee(T9 == tmp1, "You are reqired to use T9 as the index register for MIPS !"); ++ ++ if (ProfileInterpreter && MethodData::profile_parameters()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Load the offset of the area within the MDO used for ++ // parameters. If it's negative we're not profiling any parameters ++ lw(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())); ++ bltz(tmp1, profile_continue); ++ delayed()->nop(); ++ ++ // Compute a pointer to the area for parameters from the offset ++ // and move the pointer to the slot for the last ++ // parameters. Collect profiling from last parameter down. ++ // mdo start + parameters offset + array length - 1 ++ daddu(mdp, mdp, tmp1); ++ ld(tmp1, mdp, in_bytes(ArrayData::array_len_offset())); ++ decrement(tmp1, TypeStackSlotEntries::per_arg_count()); ++ ++ ++ Label loop; ++ bind(loop); ++ ++ int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); ++ int type_base = in_bytes(ParametersTypeData::type_offset(0)); ++ Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size); ++ Address arg_type(mdp, tmp1, per_arg_scale, type_base); ++ ++ // load offset on the stack from the slot for this parameter ++ dsll(AT, tmp1, per_arg_scale); ++ daddu(AT, AT, mdp); ++ ld(tmp2, AT, off_base); ++ ++ subu(tmp2, R0, tmp2); ++ ++ // read the parameter from the local area ++ dsll(AT, tmp2, Interpreter::logStackElementSize); ++ daddu(AT, AT, _locals_register); ++ ld(tmp2, AT, 0); ++ ++ // profile the parameter ++ profile_obj_type(tmp2, arg_type); ++ ++ // go to next parameter ++ decrement(tmp1, TypeStackSlotEntries::per_arg_count()); ++ bgtz(tmp1, loop); ++ delayed()->nop(); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) { ++ if (state == atos) { ++ MacroAssembler::verify_oop(reg); ++ } ++} ++ ++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ++} ++#endif // !CC_INTERP ++ ++ ++void InterpreterMacroAssembler::notify_method_entry() { ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ Register tempreg = T0; ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label L; ++ lw(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(tempreg, R0, L); ++ delayed()->nop(); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_method_entry)); ++ bind(L); ++ } ++ ++ { ++ SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); ++ get_method(S3); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ //Rthread, ++ thread, ++ //Rmethod); ++ S3); ++ } ++ ++} ++ ++void InterpreterMacroAssembler::notify_method_exit( ++ TosState state, NotifyMethodExitMode mode) { ++ Register tempreg = T0; ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { ++ Label skip; ++ // Note: frame::interpreter_frame_result has a dependency on how the ++ // method result is saved across the call to post_method_exit. If this ++ // is changed then the interpreter_frame_result implementation will ++ // need to be updated too. ++ ++ // template interpreter will leave it on the top of the stack. ++ push(state); ++ lw(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(tempreg, R0, skip); ++ delayed()->nop(); ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); ++ bind(skip); ++ pop(state); ++ } ++ ++ { ++ // Dtrace notification ++ SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); ++ push(state); ++ get_method(S3); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ //Rthread, Rmethod); ++ thread, S3); ++ pop(state); ++ } ++} ++ ++// Jump if ((*counter_addr += increment) & mask) satisfies the condition. ++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, ++ int increment, int mask, ++ Register scratch, bool preloaded, ++ Condition cond, Label* where) { ++ assert_different_registers(scratch, AT); ++ ++ if (!preloaded) { ++ lw(scratch, counter_addr); ++ } ++ addiu32(scratch, scratch, increment); ++ sw(scratch, counter_addr); ++ ++ move(AT, mask); ++ andr(scratch, scratch, AT); ++ ++ if (cond == Assembler::zero) { ++ beq(scratch, R0, *where); ++ delayed()->nop(); ++ } else { ++ unimplemented(); ++ } ++} +diff --git a/src/hotspot/cpu/mips/interpreterRT_mips.hpp b/src/hotspot/cpu/mips/interpreterRT_mips.hpp +new file mode 100644 +index 0000000000..054138ea42 +--- /dev/null ++++ b/src/hotspot/cpu/mips/interpreterRT_mips.hpp +@@ -0,0 +1,60 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP ++#define CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP ++ ++// This is included in the middle of class Interpreter. ++// Do not include files here. ++ ++// native method calls ++ ++class SignatureHandlerGenerator: public NativeSignatureIterator { ++ private: ++ MacroAssembler* _masm; ++ ++ void move(int from_offset, int to_offset); ++ ++ void box(int from_offset, int to_offset); ++ void pass_int(); ++ void pass_long(); ++ void pass_object(); ++ void pass_float(); ++ void pass_double(); ++ ++ public: ++ // Creation ++ SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); ++ ++ // Code generation ++ void generate(uint64_t fingerprint); ++ ++ // Code generation support ++ static Register from(); ++ static Register to(); ++ static Register temp(); ++}; ++ ++#endif // CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp b/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp +new file mode 100644 +index 0000000000..e655b2a1a8 +--- /dev/null ++++ b/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp +@@ -0,0 +1,252 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "memory/universe.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/signature.hpp" ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of SignatureHandlerGenerator ++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( ++ const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) { ++ _masm = new MacroAssembler(buffer); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) { ++ __ ld(temp(), from(), Interpreter::local_offset_in_bytes(from_offset)); ++ __ sd(temp(), to(), to_offset * longSize); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) { ++ __ addiu(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) ); ++ __ lw(AT, from(), Interpreter::local_offset_in_bytes(from_offset) ); ++ ++ __ movz(temp(), R0, AT); ++ __ sw(temp(), to(), to_offset * wordSize); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { ++ // generate code to handle arguments ++ iterate(fingerprint); ++ // return result handler ++ __ li(V0, AbstractInterpreter::result_handler(method()->result_type())); ++ // return ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ __ flush(); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ lw(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset())); ++ __ sw(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2. ++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ ld(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else { ++ __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ __ sd(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { ++ Argument jni_arg(jni_offset()); ++ ++ // the handle for a receiver will never be null ++ bool do_NULL_check = offset() != 0 || is_static(); ++ if (do_NULL_check) { ++ __ ld(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ daddiu((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), from(), Interpreter::local_offset_in_bytes(offset())); ++ __ movz((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), R0, AT); ++ } else { ++ __ daddiu(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset())); ++ } ++ ++ if (!jni_arg.is_Register()) ++ __ sd(temp(), jni_arg.as_caller_address()); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ lwc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset())); ++ __ sw(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2. ++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ ldc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else { ++ __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ __ sd(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++ ++Register InterpreterRuntime::SignatureHandlerGenerator::from() { return LVP; } ++Register InterpreterRuntime::SignatureHandlerGenerator::to() { return SP; } ++Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return T8; } ++ ++// Implementation of SignatureHandlerLibrary ++ ++void SignatureHandlerLibrary::pd_set_handler(address handler) {} ++ ++ ++class SlowSignatureHandler ++ : public NativeSignatureIterator { ++ private: ++ address _from; ++ intptr_t* _to; ++ intptr_t* _reg_args; ++ intptr_t* _fp_identifiers; ++ unsigned int _num_args; ++ ++ virtual void pass_int() ++ { ++ jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_register_parameters) { ++ *_reg_args++ = from_obj; ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_long() ++ { ++ intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); ++ _from -= 2 * Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_register_parameters) { ++ *_reg_args++ = from_obj; ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_object() ++ { ++ intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ if (_num_args < Argument::n_register_parameters) { ++ *_reg_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; ++ _num_args++; ++ } else { ++ *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; ++ } ++ } ++ ++ virtual void pass_float() ++ { ++ jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_float_register_parameters) { ++ *_reg_args++ = from_obj; ++ *_fp_identifiers |= (0x01 << (_num_args*2)); // mark as float ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_double() ++ { ++ intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); ++ _from -= 2*Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_float_register_parameters) { ++ *_reg_args++ = from_obj; ++ *_fp_identifiers |= (0x3 << (_num_args*2)); // mark as double ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ public: ++ SlowSignatureHandler(methodHandle method, address from, intptr_t* to) ++ : NativeSignatureIterator(method) ++ { ++ _from = from; ++ _to = to; ++ ++ // see TemplateInterpreterGenerator::generate_slow_signature_handler() ++ _reg_args = to - Argument::n_register_parameters + jni_offset() - 1; ++ _fp_identifiers = to - 1; ++ *(int*) _fp_identifiers = 0; ++ _num_args = jni_offset(); ++ } ++}; ++ ++ ++IRT_ENTRY(address, ++ InterpreterRuntime::slow_signature_handler(JavaThread* thread, ++ Method* method, ++ intptr_t* from, ++ intptr_t* to)) ++ methodHandle m(thread, (Method*)method); ++ assert(m->is_native(), "sanity check"); ++ ++ // handle arguments ++ SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1)); ++ ++ // return result handler ++ return Interpreter::result_handler(m->result_type()); ++IRT_END +diff --git a/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp b/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp +new file mode 100644 +index 0000000000..dccdf6a019 +--- /dev/null ++++ b/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp +@@ -0,0 +1,87 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP ++#define CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP ++ ++private: ++ ++ // FP value associated with _last_Java_sp: ++ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to ++ ++public: ++ // Each arch must define reset, save, restore ++ // These are used by objects that only care about: ++ // 1 - initializing a new state (thread creation, javaCalls) ++ // 2 - saving a current state (javaCalls) ++ // 3 - restoring an old state (javaCalls) ++ ++ void clear(void) { ++ // clearing _last_Java_sp must be first ++ _last_Java_sp = NULL; ++ // fence? ++ _last_Java_fp = NULL; ++ _last_Java_pc = NULL; ++ } ++ ++ void copy(JavaFrameAnchor* src) { ++ // In order to make sure the transition state is valid for "this" ++ // We must clear _last_Java_sp before copying the rest of the new data ++ // ++ // Hack Alert: Temporary bugfix for 4717480/4721647 ++ // To act like previous version (pd_cache_state) don't NULL _last_Java_sp ++ // unless the value is changing ++ // ++ if (_last_Java_sp != src->_last_Java_sp) ++ _last_Java_sp = NULL; ++ ++ _last_Java_fp = src->_last_Java_fp; ++ _last_Java_pc = src->_last_Java_pc; ++ // Must be last so profiler will always see valid frame if has_last_frame() is true ++ _last_Java_sp = src->_last_Java_sp; ++ } ++ ++ // Always walkable ++ bool walkable(void) { return true; } ++ // Never any thing to do since we are always walkable and can find address of return addresses ++ void make_walkable(JavaThread* thread) { } ++ ++ intptr_t* last_Java_sp(void) const { return _last_Java_sp; } ++ ++ address last_Java_pc(void) { return _last_Java_pc; } ++ ++private: ++ ++ static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } ++ ++public: ++ ++ void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; } ++ ++ intptr_t* last_Java_fp(void) { return _last_Java_fp; } ++ // Assert (last_Java_sp == NULL || fp == NULL) ++ void set_last_Java_fp(intptr_t* fp) { _last_Java_fp = fp; } ++ ++#endif // CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp b/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp +new file mode 100644 +index 0000000000..bba5b7eee8 +--- /dev/null ++++ b/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp +@@ -0,0 +1,167 @@ ++/* ++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/codeBlob.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "runtime/safepoint.hpp" ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#define BUFFER_SIZE 30*wordSize ++ ++// Instead of issuing lfence for LoadLoad barrier, we create data dependency ++// between loads, which is more efficient than lfence. ++ ++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { ++ const char *name = NULL; ++ switch (type) { ++ case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; ++ case T_BYTE: name = "jni_fast_GetByteField"; break; ++ case T_CHAR: name = "jni_fast_GetCharField"; break; ++ case T_SHORT: name = "jni_fast_GetShortField"; break; ++ case T_INT: name = "jni_fast_GetIntField"; break; ++ case T_LONG: name = "jni_fast_GetLongField"; break; ++ case T_FLOAT: name = "jni_fast_GetFloatField"; break; ++ case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; ++ default: ShouldNotReachHere(); ++ } ++ ResourceMark rm; ++ BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); ++ CodeBuffer cbuf(blob); ++ MacroAssembler* masm = new MacroAssembler(&cbuf); ++ address fast_entry = __ pc(); ++ ++ Label slow; ++ ++ // return pc RA ++ // jni env A0 ++ // obj A1 ++ // jfieldID A2 ++ ++ address counter_addr = SafepointSynchronize::safepoint_counter_addr(); ++ __ set64(AT, (long)counter_addr); ++ __ lw(T1, AT, 0); ++ ++ // Parameters(A0~A3) should not be modified, since they will be used in slow path ++ __ andi(AT, T1, 1); ++ __ bne(AT, R0, slow); ++ __ delayed()->nop(); ++ ++ __ move(T0, A1); ++ // Both T0 and T9 are clobbered by try_resolve_jobject_in_native. ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->try_resolve_jobject_in_native(masm, /* jni_env */ A0, T0, T9, slow); ++ ++ __ dsrl(T2, A2, 2); // offset ++ __ daddu(T0, T0, T2); ++ ++ assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); ++ speculative_load_pclist[count] = __ pc(); ++ switch (type) { ++ case T_BOOLEAN: __ lbu (V0, T0, 0); break; ++ case T_BYTE: __ lb (V0, T0, 0); break; ++ case T_CHAR: __ lhu (V0, T0, 0); break; ++ case T_SHORT: __ lh (V0, T0, 0); break; ++ case T_INT: __ lw (V0, T0, 0); break; ++ case T_LONG: __ ld (V0, T0, 0); break; ++ case T_FLOAT: __ lwc1(F0, T0, 0); break; ++ case T_DOUBLE: __ ldc1(F0, T0, 0); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ set64(AT, (long)counter_addr); ++ __ lw(AT, AT, 0); ++ __ bne(T1, AT, slow); ++ __ delayed()->nop(); ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ slowcase_entry_pclist[count++] = __ pc(); ++ __ bind (slow); ++ address slow_case_addr = NULL; ++ switch (type) { ++ case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; ++ case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; ++ case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; ++ case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; ++ case T_INT: slow_case_addr = jni_GetIntField_addr(); break; ++ case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; ++ case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; ++ case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; ++ default: ShouldNotReachHere(); ++ } ++ __ jmp(slow_case_addr); ++ __ delayed()->nop(); ++ ++ __ flush (); ++ ++ return fast_entry; ++} ++ ++address JNI_FastGetField::generate_fast_get_boolean_field() { ++ return generate_fast_get_int_field0(T_BOOLEAN); ++} ++ ++address JNI_FastGetField::generate_fast_get_byte_field() { ++ return generate_fast_get_int_field0(T_BYTE); ++} ++ ++address JNI_FastGetField::generate_fast_get_char_field() { ++ return generate_fast_get_int_field0(T_CHAR); ++} ++ ++address JNI_FastGetField::generate_fast_get_short_field() { ++ return generate_fast_get_int_field0(T_SHORT); ++} ++ ++address JNI_FastGetField::generate_fast_get_int_field() { ++ return generate_fast_get_int_field0(T_INT); ++} ++ ++address JNI_FastGetField::generate_fast_get_long_field() { ++ return generate_fast_get_int_field0(T_LONG); ++} ++ ++address JNI_FastGetField::generate_fast_get_float_field() { ++ return generate_fast_get_int_field0(T_FLOAT); ++} ++ ++address JNI_FastGetField::generate_fast_get_double_field() { ++ return generate_fast_get_int_field0(T_DOUBLE); ++} +diff --git a/src/hotspot/cpu/mips/jniTypes_mips.hpp b/src/hotspot/cpu/mips/jniTypes_mips.hpp +new file mode 100644 +index 0000000000..e93237ffd9 +--- /dev/null ++++ b/src/hotspot/cpu/mips/jniTypes_mips.hpp +@@ -0,0 +1,144 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_JNITYPES_MIPS_HPP ++#define CPU_MIPS_VM_JNITYPES_MIPS_HPP ++ ++#include "jni.h" ++#include "memory/allocation.hpp" ++#include "oops/oop.hpp" ++ ++// This file holds platform-dependent routines used to write primitive jni ++// types to the array of arguments passed into JavaCalls::call ++ ++class JNITypes : AllStatic { ++ // These functions write a java primitive type (in native format) ++ // to a java stack slot array to be passed as an argument to JavaCalls:calls. ++ // I.e., they are functionally 'push' operations if they have a 'pos' ++ // formal parameter. Note that jlong's and jdouble's are written ++ // _in reverse_ of the order in which they appear in the interpreter ++ // stack. This is because call stubs (see stubGenerator_sparc.cpp) ++ // reverse the argument list constructed by JavaCallArguments (see ++ // javaCalls.hpp). ++ ++private: ++ ++ // 32bit Helper routines. ++ static inline void put_int2r(jint *from, intptr_t *to) { *(jint *)(to++) = from[1]; ++ *(jint *)(to ) = from[0]; } ++ static inline void put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; } ++ ++public: ++ // In MIPS64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[] ++ // is 8 bytes. ++ // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values. ++ // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded. ++ // This error occurs in ReflectInvoke.java ++ // The parameter of DD(int) should be 4 instead of 0x550000004. ++ // ++ // See: [runtime/javaCalls.hpp] ++ ++ static inline void put_int(jint from, intptr_t *to) { *(intptr_t *)(to + 0 ) = from; } ++ static inline void put_int(jint from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = from; } ++ static inline void put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; } ++ ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to). ++ // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), ++ // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. ++ static inline void put_long(jlong from, intptr_t *to) { ++ *(jlong*) (to + 1) = from; ++ *(jlong*) (to) = from; ++ } ++ ++ // A long parameter occupies two slot. ++ // It must fit the layout rule in methodHandle. ++ // ++ // See: [runtime/reflection.cpp] Reflection::invoke() ++ // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); ++ ++ static inline void put_long(jlong from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = from; ++ *(jlong*) (to + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_long(jlong *from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = *from; ++ *(jlong*) (to + pos) = *from; ++ pos += 2; ++ } ++ ++ // Oops are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } ++ static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } ++ static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } ++ ++ // Floats are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } ++ static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } ++ static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } ++ ++#undef _JNI_SLOT_OFFSET ++#define _JNI_SLOT_OFFSET 0 ++ ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to). ++ // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), ++ // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. ++ static inline void put_double(jdouble from, intptr_t *to) { ++ *(jdouble*) (to + 1) = from; ++ *(jdouble*) (to) = from; ++ } ++ ++ // A long parameter occupies two slot. ++ // It must fit the layout rule in methodHandle. ++ // ++ // See: [runtime/reflection.cpp] Reflection::invoke() ++ // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); ++ ++ static inline void put_double(jdouble from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = from; ++ *(jdouble*) (to + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_double(jdouble *from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = *from; ++ *(jdouble*) (to + pos) = *from; ++ pos += 2; ++ } ++ ++ // The get_xxx routines, on the other hand, actually _do_ fetch ++ // java primitive types from the interpreter stack. ++ static inline jint get_int (intptr_t *from) { return *(jint *) from; } ++ static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } ++ static inline oop get_obj (intptr_t *from) { return *(oop *) from; } ++ static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } ++ static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } ++#undef _JNI_SLOT_OFFSET ++}; ++ ++#endif // CPU_MIPS_VM_JNITYPES_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.cpp b/src/hotspot/cpu/mips/macroAssembler_mips.cpp +new file mode 100644 +index 0000000000..cc868cae55 +--- /dev/null ++++ b/src/hotspot/cpu/mips/macroAssembler_mips.cpp +@@ -0,0 +1,4257 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "jvm.h" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "compiler/disassembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "memory/universe.hpp" ++#include "nativeInst_mips.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "runtime/os.hpp" ++#include "runtime/safepoint.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/macros.hpp" ++ ++#ifdef COMPILER2 ++#include "opto/intrinsicnode.hpp" ++#endif ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of MacroAssembler ++ ++intptr_t MacroAssembler::i[32] = {0}; ++float MacroAssembler::f[32] = {0.0}; ++ ++void MacroAssembler::print(outputStream *s) { ++ unsigned int k; ++ for(k=0; kprint_cr("i%d = 0x%.16lx", k, i[k]); ++ } ++ s->cr(); ++ ++ for(k=0; kprint_cr("f%d = %f", k, f[k]); ++ } ++ s->cr(); ++} ++ ++int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; } ++int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; } ++ ++void MacroAssembler::save_registers(MacroAssembler *masm) { ++#define __ masm-> ++ for(int k=0; k<32; k++) { ++ __ sw (as_Register(k), A0, i_offset(k)); ++ } ++ ++ for(int k=0; k<32; k++) { ++ __ swc1 (as_FloatRegister(k), A0, f_offset(k)); ++ } ++#undef __ ++} ++ ++void MacroAssembler::restore_registers(MacroAssembler *masm) { ++#define __ masm-> ++ for(int k=0; k<32; k++) { ++ __ lw (as_Register(k), A0, i_offset(k)); ++ } ++ ++ for(int k=0; k<32; k++) { ++ __ lwc1 (as_FloatRegister(k), A0, f_offset(k)); ++ } ++#undef __ ++} ++ ++ ++void MacroAssembler::pd_patch_instruction(address branch, address target) { ++ jint& stub_inst = *(jint*) branch; ++ jint *pc = (jint *)branch; ++ ++ if((opcode(stub_inst) == special_op) && (special(stub_inst) == daddu_op)) { ++ //b_far: ++ // move(AT, RA); // daddu ++ // emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); ++ // nop(); ++ // lui(T9, 0); // to be patched ++ // ori(T9, 0); ++ // daddu(T9, T9, RA); ++ // move(RA, AT); ++ // jr(T9); ++ ++ assert(opcode(pc[3]) == lui_op ++ && opcode(pc[4]) == ori_op ++ && special(pc[5]) == daddu_op, "Not a branch label patch"); ++ if(!(opcode(pc[3]) == lui_op ++ && opcode(pc[4]) == ori_op ++ && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); } ++ ++ int offset = target - branch; ++ if (!is_simm16(offset)) { ++ pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12); ++ pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12); ++ } else { ++ // revert to "beq + nop" ++ CodeBuffer cb(branch, 4 * 10); ++ MacroAssembler masm(&cb); ++#define __ masm. ++ __ b(target); ++ __ delayed()->nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ } ++ return; ++ } else if (special(pc[4]) == jr_op ++ && opcode(pc[4]) == special_op ++ && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) { ++ //jmp_far: ++ // patchable_set48(T9, target); ++ // jr(T9); ++ // nop(); ++ ++ CodeBuffer cb(branch, 4 * 4); ++ MacroAssembler masm(&cb); ++ masm.patchable_set48(T9, (long)(target)); ++ return; ++ } ++ ++#ifndef PRODUCT ++ if (!is_simm16((target - branch - 4) >> 2)) { ++ tty->print_cr("Illegal patching: branch = " INTPTR_FORMAT ", target = " INTPTR_FORMAT, p2i(branch), p2i(target)); ++ tty->print_cr("======= Start decoding at branch = " INTPTR_FORMAT " =======", p2i(branch)); ++ Disassembler::decode(branch - 4 * 16, branch + 4 * 16, tty); ++ tty->print_cr("======= End of decoding ======="); ++ } ++#endif ++ ++ stub_inst = patched_branch(target - branch, stub_inst, 0); ++} ++ ++static inline address first_cache_address() { ++ return CodeCache::low_bound() + sizeof(HeapBlock::Header); ++} ++ ++static inline address last_cache_address() { ++ return CodeCache::high_bound() - Assembler::InstructionSize; ++} ++ ++int MacroAssembler::call_size(address target, bool far, bool patchable) { ++ if (patchable) return 6 << Assembler::LogInstructionSize; ++ if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop ++ return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize; ++} ++ ++// Can we reach target using jal/j from anywhere ++// in the code cache (because code can be relocated)? ++bool MacroAssembler::reachable_from_cache(address target) { ++ address cl = first_cache_address(); ++ address ch = last_cache_address(); ++ ++ return (cl <= target) && (target <= ch) && fit_in_jal(cl, ch); ++} ++ ++bool MacroAssembler::reachable_from_cache() { ++ if (ForceUnreachable) { ++ return false; ++ } else { ++ address cl = first_cache_address(); ++ address ch = last_cache_address(); ++ ++ return fit_in_jal(cl, ch); ++ } ++} ++ ++void MacroAssembler::general_jump(address target) { ++ if (reachable_from_cache(target)) { ++ j(target); ++ delayed()->nop(); ++ } else { ++ set64(T9, (long)target); ++ jr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_general_jump(address target) { ++ if (reachable_from_cache(target)) { ++ //j(target); ++ //nop(); ++ return 2; ++ } else { ++ //set64(T9, (long)target); ++ //jr(T9); ++ //nop(); ++ return insts_for_set64((jlong)target) + 2; ++ } ++} ++ ++void MacroAssembler::patchable_jump(address target) { ++ if (reachable_from_cache(target)) { ++ nop(); ++ nop(); ++ nop(); ++ nop(); ++ j(target); ++ delayed()->nop(); ++ } else { ++ patchable_set48(T9, (long)target); ++ jr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_patchable_jump(address target) { ++ return 6; ++} ++ ++void MacroAssembler::general_call(address target) { ++ if (reachable_from_cache(target)) { ++ jal(target); ++ delayed()->nop(); ++ } else { ++ set64(T9, (long)target); ++ jalr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_general_call(address target) { ++ if (reachable_from_cache(target)) { ++ //jal(target); ++ //nop(); ++ return 2; ++ } else { ++ //set64(T9, (long)target); ++ //jalr(T9); ++ //nop(); ++ return insts_for_set64((jlong)target) + 2; ++ } ++} ++ ++void MacroAssembler::patchable_call(address target) { ++ if (reachable_from_cache(target)) { ++ nop(); ++ nop(); ++ nop(); ++ nop(); ++ jal(target); ++ delayed()->nop(); ++ } else { ++ patchable_set48(T9, (long)target); ++ jalr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_patchable_call(address target) { ++ return 6; ++} ++ ++// Maybe emit a call via a trampoline. If the code cache is small ++// trampolines won't be emitted. ++ ++address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) { ++ assert(JavaThread::current()->is_Compiler_thread(), "just checking"); ++ assert(entry.rspec().type() == relocInfo::runtime_call_type ++ || entry.rspec().type() == relocInfo::opt_virtual_call_type ++ || entry.rspec().type() == relocInfo::static_call_type ++ || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); ++ ++ address target = entry.target(); ++ if (!reachable_from_cache()) { ++ address stub = emit_trampoline_stub(offset(), target); ++ if (stub == NULL) { ++ return NULL; // CodeCache is full ++ } ++ } ++ ++ if (cbuf) cbuf->set_insts_mark(); ++ relocate(entry.rspec()); ++ ++ if (reachable_from_cache()) { ++ nop(); ++ nop(); ++ nop(); ++ nop(); ++ jal(target); ++ delayed()->nop(); ++ } else { ++ // load the call target from the trampoline stub ++ // branch ++ long dest = (long)pc(); ++ dest += (dest & 0x8000) << 1; ++ lui(T9, dest >> 32); ++ ori(T9, T9, split_low(dest >> 16)); ++ dsll(T9, T9, 16); ++ ld(T9, T9, simm16(split_low(dest))); ++ jalr(T9); ++ delayed()->nop(); ++ } ++ return pc(); ++} ++ ++// Emit a trampoline stub for a call to a target which is too far away. ++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, ++ address dest) { ++ // Max stub size: alignment nop, TrampolineStub. ++ address stub = start_a_stub(NativeInstruction::nop_instruction_size ++ + NativeCallTrampolineStub::instruction_size); ++ if (stub == NULL) { ++ return NULL; // CodeBuffer::expand failed ++ } ++ ++ // Create a trampoline stub relocation which relates this trampoline stub ++ // with the call instruction at insts_call_instruction_offset in the ++ // instructions code-section. ++ align(wordSize); ++ relocate(trampoline_stub_Relocation::spec(code()->insts()->start() ++ + insts_call_instruction_offset)); ++ emit_int64((int64_t)dest); ++ end_a_stub(); ++ return stub; ++} ++ ++void MacroAssembler::beq_far(Register rs, Register rt, address entry) { ++ u_char * cur_pc = pc(); ++ ++ // Near/Far jump ++ if(is_simm16((entry - pc() - 4) / 4)) { ++ Assembler::beq(rs, rt, offset(entry)); ++ } else { ++ Label not_jump; ++ bne(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(entry); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::beq_far(Register rs, Register rt, Label& L) { ++ if (L.is_bound()) { ++ beq_far(rs, rt, target(L)); ++ } else { ++ u_char * cur_pc = pc(); ++ Label not_jump; ++ bne(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(L); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::bne_far(Register rs, Register rt, address entry) { ++ u_char * cur_pc = pc(); ++ ++ //Near/Far jump ++ if(is_simm16((entry - pc() - 4) / 4)) { ++ Assembler::bne(rs, rt, offset(entry)); ++ } else { ++ Label not_jump; ++ beq(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(entry); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::bne_far(Register rs, Register rt, Label& L) { ++ if (L.is_bound()) { ++ bne_far(rs, rt, target(L)); ++ } else { ++ u_char * cur_pc = pc(); ++ Label not_jump; ++ beq(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(L); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::beq_long(Register rs, Register rt, Label& L) { ++ Label not_taken; ++ ++ bne(rs, rt, not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void MacroAssembler::bne_long(Register rs, Register rt, Label& L) { ++ Label not_taken; ++ ++ beq(rs, rt, not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void MacroAssembler::bc1t_long(Label& L) { ++ Label not_taken; ++ ++ bc1f(not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void MacroAssembler::bc1f_long(Label& L) { ++ Label not_taken; ++ ++ bc1t(not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void MacroAssembler::b_far(Label& L) { ++ if (L.is_bound()) { ++ b_far(target(L)); ++ } else { ++ volatile address dest = target(L); ++// ++// MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8 ++// 0x00000055651ed514: daddu at, ra, zero ++// 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520 ++// ++// 0x00000055651ed51c: sll zero, zero, 0 ++// 0x00000055651ed520: lui t9, 0x0 ++// 0x00000055651ed524: ori t9, t9, 0x21b8 ++// 0x00000055651ed528: daddu t9, t9, ra ++// 0x00000055651ed52c: daddu ra, at, zero ++// 0x00000055651ed530: jr t9 ++// 0x00000055651ed534: sll zero, zero, 0 ++// ++ move(AT, RA); ++ emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); ++ nop(); ++ lui(T9, 0); // to be patched ++ ori(T9, T9, 0); ++ daddu(T9, T9, RA); ++ move(RA, AT); ++ jr(T9); ++ } ++} ++ ++void MacroAssembler::b_far(address entry) { ++ u_char * cur_pc = pc(); ++ ++ // Near/Far jump ++ if(is_simm16((entry - pc() - 4) / 4)) { ++ b(offset(entry)); ++ } else { ++ // address must be bounded ++ move(AT, RA); ++ emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); ++ nop(); ++ li32(T9, entry - pc()); ++ daddu(T9, T9, RA); ++ move(RA, AT); ++ jr(T9); ++ } ++} ++ ++void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) { ++ addu_long(AT, base, offset); ++ ld_ptr(rt, AT, 0); ++} ++ ++void MacroAssembler::st_ptr(Register rt, Register base, Register offset) { ++ guarantee(AT != rt, "AT must not equal rt"); ++ addu_long(AT, base, offset); ++ st_ptr(rt, AT, 0); ++} ++ ++Address MacroAssembler::as_Address(AddressLiteral adr) { ++ return Address(adr.target(), adr.rspec()); ++} ++ ++Address MacroAssembler::as_Address(ArrayAddress adr) { ++ return Address::make_array(adr); ++} ++ ++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved). ++void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) { ++ Label again; ++ ++ li(tmp_reg1, counter_addr); ++ bind(again); ++ if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync(); ++ ll(tmp_reg2, tmp_reg1, 0); ++ addiu(tmp_reg2, tmp_reg2, inc); ++ sc(tmp_reg2, tmp_reg1, 0); ++ beq(tmp_reg2, R0, again); ++ delayed()->nop(); ++} ++ ++void MacroAssembler::reserved_stack_check() { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // testing if reserved zone needs to be enabled ++ Label no_reserved_zone_enabling; ++ ++ ld(AT, Address(thread, JavaThread::reserved_stack_activation_offset())); ++ dsubu(AT, SP, AT); ++ bltz(AT, no_reserved_zone_enabling); ++ delayed()->nop(); ++ ++ enter(); // RA and FP are live. ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread); ++ leave(); ++ ++ // We have already removed our own frame. ++ // throw_delayed_StackOverflowError will think that it's been ++ // called by our caller. ++ li(AT, (long)StubRoutines::throw_delayed_StackOverflowError_entry()); ++ jr(AT); ++ delayed()->nop(); ++ should_not_reach_here(); ++ ++ bind(no_reserved_zone_enabling); ++} ++ ++int MacroAssembler::biased_locking_enter(Register lock_reg, ++ Register obj_reg, ++ Register swap_reg, ++ Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, ++ Label* slow_case, ++ BiasedLockingCounters* counters) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ bool need_tmp_reg = false; ++ if (tmp_reg == noreg) { ++ need_tmp_reg = true; ++ tmp_reg = T9; ++ } ++ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT); ++ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); ++ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); ++ Address saved_mark_addr(lock_reg, 0); ++ ++ // Biased locking ++ // See whether the lock is currently biased toward our thread and ++ // whether the epoch is still valid ++ // Note that the runtime guarantees sufficient alignment of JavaThread ++ // pointers to allow age to be placed into low bits ++ // First check to see whether biasing is even enabled for this object ++ Label cas_label; ++ int null_check_offset = -1; ++ if (!swap_reg_contains_mark) { ++ null_check_offset = offset(); ++ ld_ptr(swap_reg, mark_addr); ++ } ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ move(tmp_reg, swap_reg); ++ andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); ++ daddiu(AT, R0, markOopDesc::biased_lock_pattern); ++ dsubu(AT, AT, tmp_reg); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ ++ bne(AT, R0, cas_label); ++ delayed()->nop(); ++ ++ ++ // The bias pattern is present in the object's header. Need to check ++ // whether the bias owner and the epoch are both still current. ++ // Note that because there is no current thread register on MIPS we ++ // need to store off the mark word we read out of the object to ++ // avoid reloading it and needing to recheck invariants below. This ++ // store is unfortunate but it makes the overall code shorter and ++ // simpler. ++ st_ptr(swap_reg, saved_mark_addr); ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ if (swap_reg_contains_mark) { ++ null_check_offset = offset(); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ xorr(tmp_reg, tmp_reg, swap_reg); ++#ifndef OPT_THREAD ++ get_thread(swap_reg); ++ xorr(swap_reg, swap_reg, tmp_reg); ++#else ++ xorr(swap_reg, TREG, tmp_reg); ++#endif ++ ++ move(AT, ~((int) markOopDesc::age_mask_in_place)); ++ andr(swap_reg, swap_reg, AT); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(swap_reg, R0, L); ++ delayed()->nop(); ++ push(tmp_reg); ++ push(A0); ++ atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg); ++ pop(A0); ++ pop(tmp_reg); ++ bind(L); ++ } ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ beq(swap_reg, R0, done); ++ delayed()->nop(); ++ Label try_revoke_bias; ++ Label try_rebias; ++ ++ // At this point we know that the header has the bias pattern and ++ // that we are not the bias owner in the current epoch. We need to ++ // figure out more details about the state of the header in order to ++ // know what operations can be legally performed on the object's ++ // header. ++ ++ // If the low three bits in the xor result aren't clear, that means ++ // the prototype header is no longer biased and we have to revoke ++ // the bias on this object. ++ ++ move(AT, markOopDesc::biased_lock_mask_in_place); ++ andr(AT, swap_reg, AT); ++ bne(AT, R0, try_revoke_bias); ++ delayed()->nop(); ++ // Biasing is still enabled for this data type. See whether the ++ // epoch of the current bias is still valid, meaning that the epoch ++ // bits of the mark word are equal to the epoch bits of the ++ // prototype header. (Note that the prototype header's epoch bits ++ // only change at a safepoint.) If not, attempt to rebias the object ++ // toward the current thread. Note that we must be absolutely sure ++ // that the current epoch is invalid in order to do this because ++ // otherwise the manipulations it performs on the mark word are ++ // illegal. ++ ++ move(AT, markOopDesc::epoch_mask_in_place); ++ andr(AT,swap_reg, AT); ++ bne(AT, R0, try_rebias); ++ delayed()->nop(); ++ // The epoch of the current bias is still valid but we know nothing ++ // about the owner; it might be set or it might be clear. Try to ++ // acquire the bias of the object using an atomic operation. If this ++ // fails we will go in to the runtime to revoke the object's bias. ++ // Note that we first construct the presumed unbiased header so we ++ // don't accidentally blow away another thread's valid bias. ++ ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); ++ andr(swap_reg, swap_reg, AT); ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++#ifndef OPT_THREAD ++ get_thread(tmp_reg); ++ orr(tmp_reg, tmp_reg, swap_reg); ++#else ++ orr(tmp_reg, TREG, swap_reg); ++#endif ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // If the biasing toward our thread failed, this means that ++ // another thread succeeded in biasing it toward itself and we ++ // need to revoke that bias. The revocation will occur in the ++ // interpreter runtime in the slow case. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ delayed()->nop(); ++ push(tmp_reg); ++ push(A0); ++ atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg); ++ pop(A0); ++ pop(tmp_reg); ++ bind(L); ++ } ++ if (slow_case != NULL) { ++ beq_far(AT, R0, *slow_case); ++ delayed()->nop(); ++ } ++ b(done); ++ delayed()->nop(); ++ ++ bind(try_rebias); ++ // At this point we know the epoch has expired, meaning that the ++ // current "bias owner", if any, is actually invalid. Under these ++ // circumstances _only_, we are allowed to use the current header's ++ // value as the comparison value when doing the cas to acquire the ++ // bias in the current epoch. In other words, we allow transfer of ++ // the bias from one thread to another directly in this situation. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++#ifndef OPT_THREAD ++ get_thread(swap_reg); ++ orr(tmp_reg, tmp_reg, swap_reg); ++#else ++ orr(tmp_reg, tmp_reg, TREG); ++#endif ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // If the biasing toward our thread failed, then another thread ++ // succeeded in biasing it toward itself and we need to revoke that ++ // bias. The revocation will occur in the runtime in the slow case. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ delayed()->nop(); ++ push(AT); ++ push(tmp_reg); ++ atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg); ++ pop(tmp_reg); ++ pop(AT); ++ bind(L); ++ } ++ if (slow_case != NULL) { ++ beq_far(AT, R0, *slow_case); ++ delayed()->nop(); ++ } ++ ++ b(done); ++ delayed()->nop(); ++ bind(try_revoke_bias); ++ // The prototype mark in the klass doesn't have the bias bit set any ++ // more, indicating that objects of this data type are not supposed ++ // to be biased any more. We are going to try to reset the mark of ++ // this object to the prototype value and fall through to the ++ // CAS-based locking scheme. Note that if our CAS fails, it means ++ // that another thread raced us for the privilege of revoking the ++ // bias of this particular object, so it's okay to continue in the ++ // normal locking code. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // Fall through to the normal CAS-based lock, because no matter what ++ // the result of the above CAS, some thread must have succeeded in ++ // removing the bias bit from the object's header. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ delayed()->nop(); ++ push(AT); ++ push(tmp_reg); ++ atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg); ++ pop(tmp_reg); ++ pop(AT); ++ bind(L); ++ } ++ ++ bind(cas_label); ++ return null_check_offset; ++} ++ ++void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ ++ // Check for biased locking unlock case, which is a no-op ++ // Note: we do not have to check the thread ID for two reasons. ++ // First, the interpreter checks for IllegalMonitorStateException at ++ // a higher level. Second, if the bias was revoked while we held the ++ // lock, the object could not be rebiased toward another thread, so ++ // the bias bit would be clear. ++ ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); ++ daddiu(AT, R0, markOopDesc::biased_lock_pattern); ++ ++ beq(AT, temp_reg, done); ++ delayed()->nop(); ++} ++ ++// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf ++// this method will handle the stack problem, you need not to preserve the stack space for the argument now ++void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) { ++ Label L, E; ++ ++ assert(number_of_arguments <= 4, "just check"); ++ ++ andi(AT, SP, 0xf); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ daddiu(SP, SP, -8); ++ call(entry_point, relocInfo::runtime_call_type); ++ delayed()->nop(); ++ daddiu(SP, SP, 8); ++ b(E); ++ delayed()->nop(); ++ ++ bind(L); ++ call(entry_point, relocInfo::runtime_call_type); ++ delayed()->nop(); ++ bind(E); ++} ++ ++ ++void MacroAssembler::jmp(address entry) { ++ patchable_set48(T9, (long)entry); ++ jr(T9); ++} ++ ++void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) { ++ switch (rtype) { ++ case relocInfo::runtime_call_type: ++ case relocInfo::none: ++ jmp(entry); ++ break; ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rtype); ++ patchable_set48(T9, (long)entry); ++ jr(T9); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::jmp_far(Label& L) { ++ if (L.is_bound()) { ++ address entry = target(L); ++ assert(entry != NULL, "jmp most probably wrong"); ++ InstructionMark im(this); ++ ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(T9, (long)entry); ++ } else { ++ InstructionMark im(this); ++ L.add_patch_at(code(), locator()); ++ ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(T9, (long)pc()); ++ } ++ ++ jr(T9); ++ delayed()->nop(); ++} ++void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { ++ int oop_index; ++ if (obj) { ++ oop_index = oop_recorder()->find_index(obj); ++ } else { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } ++ relocate(metadata_Relocation::spec(oop_index)); ++ patchable_set48(AT, (long)obj); ++ sd(AT, dst); ++} ++ ++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { ++ int oop_index; ++ if (obj) { ++ oop_index = oop_recorder()->find_index(obj); ++ } else { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } ++ relocate(metadata_Relocation::spec(oop_index)); ++ patchable_set48(dst, (long)obj); ++} ++ ++void MacroAssembler::call(address entry) { ++// c/c++ code assume T9 is entry point, so we just always move entry to t9 ++// maybe there is some more graceful method to handle this. FIXME ++// For more info, see class NativeCall. ++ patchable_set48(T9, (long)entry); ++ jalr(T9); ++} ++ ++void MacroAssembler::call(address entry, relocInfo::relocType rtype) { ++ switch (rtype) { ++ case relocInfo::runtime_call_type: ++ case relocInfo::none: ++ call(entry); ++ break; ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rtype); ++ call(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::call(address entry, RelocationHolder& rh) ++{ ++ switch (rh.type()) { ++ case relocInfo::runtime_call_type: ++ case relocInfo::none: ++ call(entry); ++ break; ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rh); ++ call(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::ic_call(address entry, jint method_index) { ++ RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); ++ patchable_set48(IC_Klass, (long)Universe::non_oop_word()); ++ assert(entry != NULL, "call most probably wrong"); ++ InstructionMark im(this); ++ trampoline_call(AddressLiteral(entry, rh)); ++} ++ ++void MacroAssembler::c2bool(Register r) { ++ sltu(r, R0, r); ++} ++ ++#ifndef PRODUCT ++extern "C" void findpc(intptr_t x); ++#endif ++ ++void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) { ++ if ( ShowMessageBoxOnError ) { ++ JavaThreadState saved_state = JavaThread::current()->thread_state(); ++ JavaThread::current()->set_thread_state(_thread_in_vm); ++ { ++ // In order to get locks work, we need to fake a in_VM state ++ ttyLocker ttyl; ++ ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); ++ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { ++ BytecodeCounter::print(); ++ } ++ ++ } ++ ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); ++ } ++ else ++ ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); ++} ++ ++ ++void MacroAssembler::stop(const char* msg) { ++ li(A0, (long)msg); ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ brk(17); ++} ++ ++void MacroAssembler::warn(const char* msg) { ++ pushad(); ++ li(A0, (long)msg); ++ push(S2); ++ move(AT, -(StackAlignmentInBytes)); ++ move(S2, SP); // use S2 as a sender SP holder ++ andr(SP, SP, AT); // align stack as required by ABI ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ move(SP, S2); // use S2 as a sender SP holder ++ pop(S2); ++ popad(); ++} ++ ++void MacroAssembler::increment(Register reg, int imm) { ++ if (!imm) return; ++ if (is_simm16(imm)) { ++ daddiu(reg, reg, imm); ++ } else { ++ move(AT, imm); ++ daddu(reg, reg, AT); ++ } ++} ++ ++void MacroAssembler::decrement(Register reg, int imm) { ++ increment(reg, -imm); ++} ++ ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions) { ++ call_VM_helper(oop_result, entry_point, 0, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ call_VM_helper(oop_result, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ if (arg_2!=A2) move(A2, arg_2); ++ assert(arg_2 != A1, "smashed argument"); ++ call_VM_helper(oop_result, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); ++ call_VM_helper(oop_result, entry_point, 3, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); ++ call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); ++} ++ ++void MacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ ++ address before_call_pc; ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T2; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ // debugging support ++ assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); ++ assert(number_of_arguments <= 4 , "cannot have negative number of arguments"); ++ assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); ++ assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); ++ ++ assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp"); ++ ++ // set last Java frame before call ++ before_call_pc = (address)pc(); ++ set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc); ++ ++ // do the call ++ move(A0, java_thread); ++ call(entry_point, relocInfo::runtime_call_type); ++ delayed()->nop(); ++ ++ // restore the thread (cannot use the pushed argument since arguments ++ // may be overwritten by C code generated by an optimizing compiler); ++ // however can use the register value directly if it is callee saved. ++#ifndef OPT_THREAD ++ get_thread(java_thread); ++#else ++#ifdef ASSERT ++ { ++ Label L; ++ get_thread(AT); ++ beq(java_thread, AT, L); ++ delayed()->nop(); ++ stop("MacroAssembler::call_VM_base: TREG not callee saved?"); ++ bind(L); ++ } ++#endif ++#endif ++ ++ // discard thread and arguments ++ ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // reset last Java frame ++ reset_last_Java_frame(java_thread, false); ++ ++ check_and_handle_popframe(java_thread); ++ check_and_handle_earlyret(java_thread); ++ if (check_exceptions) { ++ // check for pending exceptions (java_thread is set upon return) ++ Label L; ++ ld(AT, java_thread, in_bytes(Thread::pending_exception_offset())); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ li(AT, before_call_pc); ++ push(AT); ++ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ bind(L); ++ } ++ ++ // get oop result if there is one and reset the value in the thread ++ if (oop_result->is_valid()) { ++ ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); ++ sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset())); ++ verify_oop(oop_result); ++ } ++} ++ ++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { ++ ++ move(V0, SP); ++ //we also reserve space for java_thread here ++ move(AT, -(StackAlignmentInBytes)); ++ andr(SP, SP, AT); ++ call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions); ++ ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { ++ call_VM_leaf_base(entry_point, number_of_arguments); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { ++ if (arg_0 != A0) move(A0, arg_0); ++ call_VM_leaf(entry_point, 1); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { ++ if (arg_0 != A0) move(A0, arg_0); ++ if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); ++ call_VM_leaf(entry_point, 2); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { ++ if (arg_0 != A0) move(A0, arg_0); ++ if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument"); ++ call_VM_leaf(entry_point, 3); ++} ++void MacroAssembler::super_call_VM_leaf(address entry_point) { ++ MacroAssembler::call_VM_leaf_base(entry_point, 0); ++} ++ ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1) { ++ if (arg_1 != A0) move(A0, arg_1); ++ MacroAssembler::call_VM_leaf_base(entry_point, 1); ++} ++ ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1, ++ Register arg_2) { ++ if (arg_1 != A0) move(A0, arg_1); ++ if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); ++ MacroAssembler::call_VM_leaf_base(entry_point, 2); ++} ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3) { ++ if (arg_1 != A0) move(A0, arg_1); ++ if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); ++ if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument"); ++ MacroAssembler::call_VM_leaf_base(entry_point, 3); ++} ++ ++void MacroAssembler::check_and_handle_earlyret(Register java_thread) { ++} ++ ++void MacroAssembler::check_and_handle_popframe(Register java_thread) { ++} ++ ++void MacroAssembler::null_check(Register reg, int offset) { ++ if (needs_explicit_null_check(offset)) { ++ // provoke OS NULL exception if reg = NULL by ++ // accessing M[reg] w/o changing any (non-CC) registers ++ // NOTE: cmpl is plenty here to provoke a segv ++ lw(AT, reg, 0); ++ } else { ++ // nothing to do, (later) access of M[reg + offset] ++ // will provoke OS NULL exception if reg = NULL ++ } ++} ++ ++void MacroAssembler::enter() { ++ push2(RA, FP); ++ move(FP, SP); ++} ++ ++void MacroAssembler::leave() { ++ move(SP, FP); ++ pop2(RA, FP); ++} ++ ++void MacroAssembler::unimplemented(const char* what) { ++ const char* buf = NULL; ++ { ++ ResourceMark rm; ++ stringStream ss; ++ ss.print("unimplemented: %s", what); ++ buf = code_string(ss.as_string()); ++ } ++ stop(buf); ++} ++ ++void MacroAssembler::get_thread(Register thread) { ++#ifdef MINIMIZE_RAM_USAGE ++// ++// In MIPS64, we don't use full 64-bit address space. ++// Only a small range is actually used. ++// ++// Example: ++// $ cat /proc/13352/maps ++// 120000000-120010000 r-xp 00000000 08:01 41077 /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java ++// 12001c000-120020000 rw-p 0000c000 08:01 41077 /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java ++// 120020000-1208dc000 rwxp 00000000 00:00 0 [heap] ++// 555d574000-555d598000 r-xp 00000000 08:01 2073768 /lib/ld-2.12.so ++// 555d598000-555d59c000 rw-p 00000000 00:00 0 ++// ...... ++// 558b1f8000-558b23c000 rwxp 00000000 00:00 0 ++// 558b23c000-558b248000 ---p 00000000 00:00 0 ++// 558b248000-558b28c000 rwxp 00000000 00:00 0 ++// ffff914000-ffff94c000 rwxp 00000000 00:00 0 [stack] ++// ffffffc000-10000000000 r-xp 00000000 00:00 0 [vdso] ++// ++// All stacks are positioned at 0x55________. ++// Therefore, we can utilize the same algorithm used in 32-bit. ++ // int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1); ++ // Thread* thread = _sp_map[index]; ++ Register tmp; ++ ++ if (thread == AT) ++ tmp = T9; ++ else ++ tmp = AT; ++ ++ move(thread, SP); ++ shr(thread, PAGE_SHIFT); ++ ++ push(tmp); ++ li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1)); ++ andr(thread, thread, tmp); ++ shl(thread, Address::times_ptr); // sizeof(Thread *) ++ li48(tmp, (long)ThreadLocalStorage::sp_map_addr()); ++ addu(tmp, tmp, thread); ++ ld_ptr(thread, tmp, 0); ++ pop(tmp); ++#else ++ if (thread != V0) { ++ push(V0); ++ } ++ pushad_except_v0(); ++ ++ push(S5); ++ move(S5, SP); ++ move(AT, -StackAlignmentInBytes); ++ andr(SP, SP, AT); ++ call(CAST_FROM_FN_PTR(address, Thread::current)); ++ //MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, Thread::current), 0); ++ delayed()->nop(); ++ move(SP, S5); ++ pop(S5); ++ ++ popad_except_v0(); ++ if (thread != V0) { ++ move(thread, V0); ++ pop(V0); ++ } ++#endif // MINIMIZE_RAM_USAGE ++} ++ ++void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T1; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // we must set sp to zero to clear frame ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // must clear fp, so that compiled frames are not confused; it is possible ++ // that we need it only for debugging ++ if(clear_fp) { ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); ++} ++ ++void MacroAssembler::reset_last_Java_frame(bool clear_fp) { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // we must set sp to zero to clear frame ++ sd(R0, Address(thread, JavaThread::last_Java_sp_offset())); ++ // must clear fp, so that compiled frames are not confused; it is ++ // possible that we need it only for debugging ++ if (clear_fp) { ++ sd(R0, Address(thread, JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ sd(R0, Address(thread, JavaThread::last_Java_pc_offset())); ++} ++ ++// Write serialization page so VM thread can do a pseudo remote membar. ++// We use the current thread pointer to calculate a thread specific ++// offset to write to within the page. This minimizes bus traffic ++// due to cache line collision. ++void MacroAssembler::serialize_memory(Register thread, Register tmp) { ++ int mask = os::vm_page_size() - sizeof(int); ++ assert_different_registers(AT, tmp); ++ assert(is_uimm(mask, 16), "Not a unsigned 16-bit"); ++ srl(AT, thread, os::get_serialize_page_shift_count()); ++ andi(AT, AT, mask); ++ li(tmp, os::get_memory_serialize_page()); ++ addu(tmp, tmp, AT); ++ sw(R0, tmp, 0); ++} ++ ++void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ ld(AT, thread_reg, in_bytes(Thread::polling_page_offset())); ++ andi(AT, AT, SafepointMechanism::poll_bit()); ++ bne(AT, R0, slow_path); ++ delayed()->nop(); ++ } else { ++ li(AT, SafepointSynchronize::address_of_state()); ++ lw(AT, AT, 0); ++ addiu(AT, AT, -SafepointSynchronize::_not_synchronized); ++ bne(AT, R0, slow_path); ++ delayed()->nop(); ++ } ++} ++ ++// Just like safepoint_poll, but use an acquiring load for thread- ++// local polling. ++// ++// We need an acquire here to ensure that any subsequent load of the ++// global SafepointSynchronize::_state flag is ordered after this load ++// of the local Thread::_polling page. We don't want this poll to ++// return false (i.e. not safepointing) and a later poll of the global ++// SafepointSynchronize::_state spuriously to return true. ++// ++// This is to avoid a race when we're in a native->Java transition ++// racing the code which wakes up from a safepoint. ++// ++void MacroAssembler::safepoint_poll_acquire(Label& slow_path, Register thread_reg) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ ld(AT, thread_reg, in_bytes(Thread::polling_page_offset())); ++ sync(); ++ andi(AT, AT, SafepointMechanism::poll_bit()); ++ bne(AT, R0, slow_path); ++ delayed()->nop(); ++ } else { ++ safepoint_poll(slow_path, thread_reg); ++ } ++} ++ ++// Calls to C land ++// ++// When entering C land, the fp, & sp of the last Java frame have to be recorded ++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp ++// has to be reset to 0. This is required to allow proper stack traversal. ++void MacroAssembler::set_last_Java_frame(Register java_thread, ++ Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T2; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ ++ // last_java_fp is optional ++ if (last_java_fp->is_valid()) { ++ st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // last_java_pc is optional ++ if (last_java_pc != NULL) { ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(AT, (long)last_java_pc); ++ st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ } ++ st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++} ++ ++void MacroAssembler::set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc) { ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // last_java_fp is optional ++ if (last_java_fp->is_valid()) { ++ sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset())); ++ } ++ ++ // last_java_pc is optional ++ if (last_java_pc != NULL) { ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(AT, (long)last_java_pc); ++ st_ptr(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ } ++ ++ sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset())); ++} ++ ++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. ++void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1, Register t2, Label& slow_case) { ++ Unimplemented(); ++ //BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ //bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1, Register t2, Label& slow_case) { ++ Unimplemented(); ++ //assert_different_registers(obj, var_size_in_bytes, t1, AT); ++ //BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ //bs->eden_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); ++} ++ ++void MacroAssembler::incr_allocated_bytes(Register thread, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1) { ++ if (!thread->is_valid()) { ++#ifndef OPT_THREAD ++ assert(t1->is_valid(), "need temp reg"); ++ thread = t1; ++ get_thread(thread); ++#else ++ thread = TREG; ++#endif ++ } ++ ++ ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); ++ if (var_size_in_bytes->is_valid()) { ++ addu(AT, AT, var_size_in_bytes); ++ } else { ++ addiu(AT, AT, con_size_in_bytes); ++ } ++ st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); ++} ++ ++void MacroAssembler::li(Register rd, long imm) { ++ if (imm <= max_jint && imm >= min_jint) { ++ li32(rd, (int)imm); ++ } else if (julong(imm) <= 0xFFFFFFFF) { ++ assert_not_delayed(); ++ // lui sign-extends, so we can't use that. ++ ori(rd, R0, julong(imm) >> 16); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm)); ++ } else if ((imm > 0) && is_simm16(imm >> 32)) { ++ // A 48-bit address ++ li48(rd, imm); ++ } else { ++ li64(rd, imm); ++ } ++} ++ ++void MacroAssembler::li32(Register reg, int imm) { ++ if (is_simm16(imm)) { ++ addiu(reg, R0, imm); ++ } else { ++ lui(reg, split_low(imm >> 16)); ++ if (split_low(imm)) ++ ori(reg, reg, split_low(imm)); ++ } ++} ++ ++void MacroAssembler::set64(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ } else { ++ lui(d, split_low(value >> 16)); ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ ori(d, R0, julong(value) >> 16); ++ dsll(d, d, 16); ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ } ++ } else if ((value> 0) && is_simm16(value >> 32)) { // li48 ++ // 4 insts ++ li48(d, value); ++ } else { // li64 ++ // 6 insts ++ li64(d, value); ++ } ++} ++ ++ ++int MacroAssembler::insts_for_set64(jlong value) { ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ //daddiu(d, R0, value); ++ count++; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ count++; ++ if (split_low(value)) { ++ //ori(d, d, split_low(value)); ++ count++; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ //ori(d, R0, julong(value) >> 16); ++ //dsll(d, d, 16); ++ count += 2; ++ if (split_low(value)) { ++ //ori(d, d, split_low(value)); ++ count++; ++ } ++ } else if ((value> 0) && is_simm16(value >> 32)) { // li48 ++ // 4 insts ++ //li48(d, value); ++ count += 4; ++ } else { // li64 ++ // 6 insts ++ //li64(d, value); ++ count += 6; ++ } ++ ++ return count; ++} ++ ++void MacroAssembler::patchable_set48(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ count += 1; ++ } else { ++ lui(d, split_low(value >> 16)); ++ count += 1; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ ori(d, R0, julong(value) >> 16); ++ dsll(d, d, 16); ++ count += 2; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && is_simm16(value >> 32)) { // li48 ++ // 4 insts ++ li48(d, value); ++ count += 4; ++ } else { // li64 ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ nop(); ++ count++; ++ } ++} ++ ++void MacroAssembler::patchable_set32(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ count += 1; ++ } else { ++ lui(d, split_low(value >> 16)); ++ count += 1; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ ori(d, R0, julong(value) >> 16); ++ dsll(d, d, 16); ++ count += 2; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } else { ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 3) { ++ nop(); ++ count++; ++ } ++} ++ ++void MacroAssembler::patchable_call32(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ count += 1; ++ } else { ++ lui(d, split_low(value >> 16)); ++ count += 1; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } ++ } else { ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 2) { ++ nop(); ++ count++; ++ } ++} ++ ++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { ++ assert(UseCompressedClassPointers, "should only be used for compressed header"); ++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ ++ int klass_index = oop_recorder()->find_index(k); ++ RelocationHolder rspec = metadata_Relocation::spec(klass_index); ++ long narrowKlass = (long)Klass::encode_klass(k); ++ ++ relocate(rspec, Assembler::narrow_oop_operand); ++ patchable_set48(dst, narrowKlass); ++} ++ ++ ++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { ++ assert(UseCompressedOops, "should only be used for compressed header"); ++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ ++ int oop_index = oop_recorder()->find_index(obj); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ ++ relocate(rspec, Assembler::narrow_oop_operand); ++ patchable_set48(dst, oop_index); ++} ++ ++// ((OopHandle)result).resolve(); ++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { ++ // OopHandle::resolve is an indirection. ++ access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, NOREG); ++} ++ ++void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { ++ // get mirror ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ ld_ptr(mirror, method, in_bytes(Method::const_offset())); ++ ld_ptr(mirror, mirror, in_bytes(ConstMethod::constants_offset())); ++ ld_ptr(mirror, mirror, ConstantPool::pool_holder_offset_in_bytes()); ++ ld_ptr(mirror, mirror, mirror_offset); ++ resolve_oop_handle(mirror, tmp); ++} ++ ++void MacroAssembler::li64(Register rd, long imm) { ++ assert_not_delayed(); ++ lui(rd, split_low(imm >> 48)); ++ ori(rd, rd, split_low(imm >> 32)); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm >> 16)); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm)); ++} ++ ++void MacroAssembler::li48(Register rd, long imm) { ++ assert_not_delayed(); ++ assert(is_simm16(imm >> 32), "Not a 48-bit address"); ++ lui(rd, imm >> 32); ++ ori(rd, rd, split_low(imm >> 16)); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm)); ++} ++ ++void MacroAssembler::verify_oop(Register reg, const char* s) { ++ if (!VerifyOops) return; ++ const char * b = NULL; ++ stringStream ss; ++ ss.print("verify_oop: %s: %s", reg->name(), s); ++ b = code_string(ss.as_string()); ++ pushad(); ++ move(A1, reg); ++ li(A0, (long)b); ++ li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); ++ ld(T9, AT, 0); ++ jalr(T9); ++ delayed()->nop(); ++ popad(); ++} ++ ++ ++void MacroAssembler::verify_oop_addr(Address addr, const char* s) { ++ if (!VerifyOops) { ++ nop(); ++ return; ++ } ++ // Pass register number to verify_oop_subroutine ++ const char * b = NULL; ++ stringStream ss; ++ ss.print("verify_oop_addr: %s", s); ++ b = code_string(ss.as_string()); ++ ++ addiu(SP, SP, - 7 * wordSize); ++ st_ptr(T0, SP, 6 * wordSize); ++ st_ptr(T1, SP, 5 * wordSize); ++ st_ptr(RA, SP, 4 * wordSize); ++ st_ptr(A0, SP, 3 * wordSize); ++ st_ptr(A1, SP, 2 * wordSize); ++ st_ptr(AT, SP, 1 * wordSize); ++ st_ptr(T9, SP, 0); ++ ++ // addr may contain sp so we will have to adjust it based on the ++ // pushes that we just did. ++ if (addr.uses(SP)) { ++ lea(A1, addr); ++ ld_ptr(A1, Address(A1, 7 * wordSize)); ++ } else { ++ ld_ptr(A1, addr); ++ } ++ li(A0, (long)b); ++ // call indirectly to solve generation ordering problem ++ li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); ++ ld_ptr(T9, AT, 0); ++ jalr(T9); ++ delayed()->nop(); ++ ld_ptr(T0, SP, 6* wordSize); ++ ld_ptr(T1, SP, 5* wordSize); ++ ld_ptr(RA, SP, 4* wordSize); ++ ld_ptr(A0, SP, 3* wordSize); ++ ld_ptr(A1, SP, 2* wordSize); ++ ld_ptr(AT, SP, 1* wordSize); ++ ld_ptr(T9, SP, 0* wordSize); ++ addiu(SP, SP, 7 * wordSize); ++} ++ ++// used registers : T0, T1 ++void MacroAssembler::verify_oop_subroutine() { ++ // RA: ra ++ // A0: char* error message ++ // A1: oop object to verify ++ ++ Label exit, error; ++ // increment counter ++ li(T0, (long)StubRoutines::verify_oop_count_addr()); ++ lw(AT, T0, 0); ++ daddiu(AT, AT, 1); ++ sw(AT, T0, 0); ++ ++ // make sure object is 'reasonable' ++ beq(A1, R0, exit); // if obj is NULL it is ok ++ delayed()->nop(); ++ ++ // Check if the oop is in the right area of memory ++ // const int oop_mask = Universe::verify_oop_mask(); ++ // const int oop_bits = Universe::verify_oop_bits(); ++ const uintptr_t oop_mask = Universe::verify_oop_mask(); ++ const uintptr_t oop_bits = Universe::verify_oop_bits(); ++ li(AT, oop_mask); ++ andr(T0, A1, AT); ++ li(AT, oop_bits); ++ bne(T0, AT, error); ++ delayed()->nop(); ++ ++ // make sure klass is 'reasonable' ++ // add for compressedoops ++ reinit_heapbase(); ++ // add for compressedoops ++ load_klass(T0, A1); ++ beq(T0, R0, error); // if klass is NULL it is broken ++ delayed()->nop(); ++ // return if everything seems ok ++ bind(exit); ++ ++ jr(RA); ++ delayed()->nop(); ++ ++ // handle errors ++ bind(error); ++ pushad(); ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ popad(); ++ jr(RA); ++ delayed()->nop(); ++} ++ ++void MacroAssembler::verify_tlab(Register t1, Register t2) { ++#ifdef ASSERT ++ assert_different_registers(t1, t2, AT); ++ if (UseTLAB && VerifyOops) { ++ Label next, ok; ++ ++ get_thread(t1); ++ ++ ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset())); ++ ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset())); ++ sltu(AT, t2, AT); ++ beq(AT, R0, next); ++ delayed()->nop(); ++ ++ stop("assert(top >= start)"); ++ ++ bind(next); ++ ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset())); ++ sltu(AT, AT, t2); ++ beq(AT, R0, ok); ++ delayed()->nop(); ++ ++ stop("assert(top <= end)"); ++ ++ bind(ok); ++ ++ } ++#endif ++} ++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset) { ++ intptr_t value = *delayed_value_addr; ++ if (value != 0) ++ return RegisterOrConstant(value + offset); ++ Unimplemented(); ++ //AddressLiteral a(delayed_value_addr); ++ // load indirectly to solve generation ordering problem ++ //movptr(tmp, ExternalAddress((address) delayed_value_addr)); ++ //ld(tmp, a); ++ if (offset != 0) ++ daddiu(tmp,tmp, offset); ++ ++ return RegisterOrConstant(tmp); ++} ++ ++void MacroAssembler::hswap(Register reg) { ++ //short ++ //andi(reg, reg, 0xffff); ++ srl(AT, reg, 8); ++ sll(reg, reg, 24); ++ sra(reg, reg, 16); ++ orr(reg, reg, AT); ++} ++ ++void MacroAssembler::huswap(Register reg) { ++ dsrl(AT, reg, 8); ++ dsll(reg, reg, 24); ++ dsrl(reg, reg, 16); ++ orr(reg, reg, AT); ++ andi(reg, reg, 0xffff); ++} ++ ++// something funny to do this will only one more register AT ++// 32 bits ++void MacroAssembler::swap(Register reg) { ++ srl(AT, reg, 8); ++ sll(reg, reg, 24); ++ orr(reg, reg, AT); ++ //reg : 4 1 2 3 ++ srl(AT, AT, 16); ++ xorr(AT, AT, reg); ++ andi(AT, AT, 0xff); ++ //AT : 0 0 0 1^3); ++ xorr(reg, reg, AT); ++ //reg : 4 1 2 1 ++ sll(AT, AT, 16); ++ xorr(reg, reg, AT); ++ //reg : 4 3 2 1 ++} ++ ++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, ++ Register resflag, bool retold, bool barrier) { ++ assert(oldval != resflag, "oldval != resflag"); ++ assert(newval != resflag, "newval != resflag"); ++ Label again, succ, fail; ++ bind(again); ++ lld(resflag, addr); ++ bne(resflag, oldval, fail); ++ delayed()->nop(); ++ move(resflag, newval); ++ scd(resflag, addr); ++ beq(resflag, R0, again); ++ delayed()->nop(); ++ b(succ); ++ delayed()->nop(); ++ bind(fail); ++ if (barrier) ++ sync(); ++ if (retold && oldval != R0) ++ move(oldval, resflag); ++ move(resflag, R0); ++ bind(succ); ++} ++ ++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, ++ Register tmp, bool retold, bool barrier, Label& succ, Label* fail) { ++ assert(oldval != tmp, "oldval != tmp"); ++ assert(newval != tmp, "newval != tmp"); ++ Label again, neq; ++ ++ bind(again); ++ lld(tmp, addr); ++ bne(tmp, oldval, neq); ++ delayed()->nop(); ++ move(tmp, newval); ++ scd(tmp, addr); ++ beq(tmp, R0, again); ++ delayed()->nop(); ++ b(succ); ++ delayed()->nop(); ++ ++ bind(neq); ++ if (barrier) ++ sync(); ++ if (retold && oldval != R0) ++ move(oldval, tmp); ++ if (fail) { ++ b(*fail); ++ delayed()->nop(); ++ } ++} ++ ++ ++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, ++ Register resflag, bool sign, bool retold, bool barrier) { ++ assert(oldval != resflag, "oldval != resflag"); ++ assert(newval != resflag, "newval != resflag"); ++ Label again, succ, fail; ++ bind(again); ++ ll(resflag, addr); ++ if (!sign) ++ dinsu(resflag, R0, 32, 32); ++ bne(resflag, oldval, fail); ++ delayed()->nop(); ++ ++ move(resflag, newval); ++ sc(resflag, addr); ++ beq(resflag, R0, again); ++ delayed()->nop(); ++ b(succ); ++ delayed()->nop(); ++ ++ bind(fail); ++ if (barrier) ++ sync(); ++ if (retold && oldval != R0) ++ move(oldval, resflag); ++ move(resflag, R0); ++ bind(succ); ++} ++ ++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, ++ bool sign, bool retold, bool barrier, Label& succ, Label* fail) { ++ assert(oldval != tmp, "oldval != tmp"); ++ assert(newval != tmp, "newval != tmp"); ++ Label again, neq; ++ ++ bind(again); ++ ll(tmp, addr); ++ if (!sign) ++ dinsu(tmp, R0, 32, 32); ++ bne(tmp, oldval, neq); ++ delayed()->nop(); ++ move(tmp, newval); ++ sc(tmp, addr); ++ beq(tmp, R0, again); ++ delayed()->nop(); ++ b(succ); ++ delayed()->nop(); ++ ++ bind(neq); ++ if (barrier) ++ sync(); ++ if (retold && oldval != R0) ++ move(oldval, tmp); ++ if (fail) { ++ b(*fail); ++ delayed()->nop(); ++ } ++} ++ ++void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) { ++ Label done, again, nequal; ++ ++ Register x_reg = x_regLo; ++ dsll32(x_regHi, x_regHi, 0); ++ dsll32(x_regLo, x_regLo, 0); ++ dsrl32(x_regLo, x_regLo, 0); ++ orr(x_reg, x_regLo, x_regHi); ++ ++ Register c_reg = c_regLo; ++ dsll32(c_regHi, c_regHi, 0); ++ dsll32(c_regLo, c_regLo, 0); ++ dsrl32(c_regLo, c_regLo, 0); ++ orr(c_reg, c_regLo, c_regHi); ++ ++ bind(again); ++ ++ if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync(); ++ lld(AT, dest); ++ bne(AT, c_reg, nequal); ++ delayed()->nop(); ++ ++ //move(AT, x_reg); ++ daddu(AT, x_reg, R0); ++ scd(AT, dest); ++ beq(AT, R0, again); ++ delayed()->nop(); ++ b(done); ++ delayed()->nop(); ++ ++ // not xchged ++ bind(nequal); ++ sync(); ++ //move(c_reg, AT); ++ //move(AT, R0); ++ daddu(c_reg, AT, R0); ++ daddu(AT, R0, R0); ++ bind(done); ++} ++ ++// be sure the three register is different ++void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { ++ assert_different_registers(tmp, fs, ft); ++ div_s(tmp, fs, ft); ++ trunc_l_s(tmp, tmp); ++ cvt_s_l(tmp, tmp); ++ mul_s(tmp, tmp, ft); ++ sub_s(fd, fs, tmp); ++} ++ ++// be sure the three register is different ++void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { ++ assert_different_registers(tmp, fs, ft); ++ div_d(tmp, fs, ft); ++ trunc_l_d(tmp, tmp); ++ cvt_d_l(tmp, tmp); ++ mul_d(tmp, tmp, ft); ++ sub_d(fd, fs, tmp); ++} ++ ++#ifdef COMPILER2 ++// Fast_Lock and Fast_Unlock used by C2 ++ ++// Because the transitions from emitted code to the runtime ++// monitorenter/exit helper stubs are so slow it's critical that ++// we inline both the stack-locking fast-path and the inflated fast path. ++// ++// See also: cmpFastLock and cmpFastUnlock. ++// ++// What follows is a specialized inline transliteration of the code ++// in slow_enter() and slow_exit(). If we're concerned about I$ bloat ++// another option would be to emit TrySlowEnter and TrySlowExit methods ++// at startup-time. These methods would accept arguments as ++// (Obj, Self, box, Scratch) and return success-failure ++// indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply ++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. ++// In practice, however, the # of lock sites is bounded and is usually small. ++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer ++// if the processor uses simple bimodal branch predictors keyed by EIP ++// Since the helper routines would be called from multiple synchronization ++// sites. ++// ++// An even better approach would be write "MonitorEnter()" and "MonitorExit()" ++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites ++// to those specialized methods. That'd give us a mostly platform-independent ++// implementation that the JITs could optimize and inline at their pleasure. ++// Done correctly, the only time we'd need to cross to native could would be ++// to park() or unpark() threads. We'd also need a few more unsafe operators ++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and ++// (b) explicit barriers or fence operations. ++// ++// TODO: ++// ++// * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). ++// This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. ++// Given TLAB allocation, Self is usually manifested in a register, so passing it into ++// the lock operators would typically be faster than reifying Self. ++// ++// * Ideally I'd define the primitives as: ++// fast_lock (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED. ++// fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED ++// Unfortunately ADLC bugs prevent us from expressing the ideal form. ++// Instead, we're stuck with a rather awkward and brittle register assignments below. ++// Furthermore the register assignments are overconstrained, possibly resulting in ++// sub-optimal code near the synchronization site. ++// ++// * Eliminate the sp-proximity tests and just use "== Self" tests instead. ++// Alternately, use a better sp-proximity test. ++// ++// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. ++// Either one is sufficient to uniquely identify a thread. ++// TODO: eliminate use of sp in _owner and use get_thread(tr) instead. ++// ++// * Intrinsify notify() and notifyAll() for the common cases where the ++// object is locked by the calling thread but the waitlist is empty. ++// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). ++// ++// * use jccb and jmpb instead of jcc and jmp to improve code density. ++// But beware of excessive branch density on AMD Opterons. ++// ++// * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success ++// or failure of the fast-path. If the fast-path fails then we pass ++// control to the slow-path, typically in C. In Fast_Lock and ++// Fast_Unlock we often branch to DONE_LABEL, just to find that C2 ++// will emit a conditional branch immediately after the node. ++// So we have branches to branches and lots of ICC.ZF games. ++// Instead, it might be better to have C2 pass a "FailureLabel" ++// into Fast_Lock and Fast_Unlock. In the case of success, control ++// will drop through the node. ICC.ZF is undefined at exit. ++// In the case of failure, the node will branch directly to the ++// FailureLabel ++ ++ ++// obj: object to lock ++// box: on-stack box address (displaced header location) ++// tmp: tmp -- KILLED ++// scr: tmp -- KILLED ++void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg, ++ Register tmpReg, Register scrReg) { ++ Label IsInflated, DONE, DONE_SET; ++ ++ // Ensure the register assignents are disjoint ++ guarantee(objReg != boxReg, ""); ++ guarantee(objReg != tmpReg, ""); ++ guarantee(objReg != scrReg, ""); ++ guarantee(boxReg != tmpReg, ""); ++ guarantee(boxReg != scrReg, ""); ++ ++ block_comment("FastLock"); ++ ++ if (PrintBiasedLockingStatistics) { ++ atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg); ++ } ++ ++ if (EmitSync & 1) { ++ move(AT, 0x0); ++ return; ++ } else ++ if (EmitSync & 2) { ++ Label DONE_LABEL ; ++ if (UseBiasedLocking) { ++ // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. ++ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL); ++ } ++ ++ ld(tmpReg, Address(objReg, 0)) ; // fetch markword ++ ori(tmpReg, tmpReg, 0x1); ++ sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS ++ ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_LABEL); // Updates tmpReg ++ delayed()->nop(); ++ ++ // Recursive locking ++ dsubu(tmpReg, tmpReg, SP); ++ li(AT, (7 - os::vm_page_size() )); ++ andr(tmpReg, tmpReg, AT); ++ sd(tmpReg, Address(boxReg, 0)); ++ bind(DONE_LABEL) ; ++ } else { ++ // Possible cases that we'll encounter in fast_lock ++ // ------------------------------------------------ ++ // * Inflated ++ // -- unlocked ++ // -- Locked ++ // = by self ++ // = by other ++ // * biased ++ // -- by Self ++ // -- by other ++ // * neutral ++ // * stack-locked ++ // -- by self ++ // = sp-proximity test hits ++ // = sp-proximity test generates false-negative ++ // -- by other ++ // ++ ++ // TODO: optimize away redundant LDs of obj->mark and improve the markword triage ++ // order to reduce the number of conditional branches in the most common cases. ++ // Beware -- there's a subtle invariant that fetch of the markword ++ // at [FETCH], below, will never observe a biased encoding (*101b). ++ // If this invariant is not held we risk exclusion (safety) failure. ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ Label succ, fail; ++ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL); ++ b(fail); ++ delayed()->nop(); ++ bind(succ); ++ b(DONE); ++ delayed()->ori(resReg, R0, 1); ++ bind(fail); ++ } ++ ++ ld(tmpReg, Address(objReg, 0)); //Fetch the markword of the object. ++ andi(AT, tmpReg, markOopDesc::monitor_value); ++ bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias ++ delayed()->nop(); ++ ++ // Attempt stack-locking ... ++ ori(tmpReg, tmpReg, markOopDesc::unlocked_value); ++ sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS ++ ++ if (PrintBiasedLockingStatistics) { ++ Label SUCC, FAIL; ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg ++ bind(SUCC); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); ++ b(DONE); ++ delayed()->ori(resReg, R0, 1); ++ bind(FAIL); ++ } else { ++ // If cmpxchg is succ, then scrReg = 1 ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg ++ } ++ ++ // Recursive locking ++ // The object is stack-locked: markword contains stack pointer to BasicLock. ++ // Locked by current thread if difference with current SP is less than one page. ++ dsubu(tmpReg, tmpReg, SP); ++ li(AT, 7 - os::vm_page_size()); ++ andr(tmpReg, tmpReg, AT); ++ sd(tmpReg, Address(boxReg, 0)); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++ ++ bne(tmpReg, R0, L); ++ delayed()->nop(); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); ++ bind(L); ++ } ++ b(DONE); ++ delayed()->sltiu(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0 ++ ++ bind(IsInflated); ++ // The object's monitor m is unlocked iff m->owner == NULL, ++ // otherwise m->owner may contain a thread or a stack address. ++ ++ // TODO: someday avoid the ST-before-CAS penalty by ++ // relocating (deferring) the following ST. ++ // We should also think about trying a CAS without having ++ // fetched _owner. If the CAS is successful we may ++ // avoid an RTO->RTS upgrade on the $line. ++ // Without cast to int32_t a movptr will destroy r10 which is typically obj ++ li(AT, (int32_t)intptr_t(markOopDesc::unused_mark())); ++ sd(AT, Address(boxReg, 0)); ++ ++ ld(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ // if (m->owner != 0) => AT = 0, goto slow path. ++ bne(AT, R0, DONE_SET); ++ delayed()->ori(scrReg, R0, 0); ++ ++#ifndef OPT_THREAD ++ get_thread(TREG); ++#endif ++ // It's inflated and appears unlocked ++ cmpxchg(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2), R0, TREG, scrReg, false, false) ; ++ // Intentional fall-through into DONE ... ++ ++ bind(DONE_SET); ++ move(resReg, scrReg); ++ ++ // DONE is a hot target - we'd really like to place it at the ++ // start of cache line by padding with NOPs. ++ // See the AMD and Intel software optimization manuals for the ++ // most efficient "long" NOP encodings. ++ // Unfortunately none of our alignment mechanisms suffice. ++ bind(DONE); ++ // At DONE the resReg is set as follows ... ++ // Fast_Unlock uses the same protocol. ++ // resReg == 1 -> Success ++ // resREg == 0 -> Failure - force control through the slow-path ++ ++ // Avoid branch-to-branch on AMD processors ++ // This appears to be superstition. ++ if (EmitSync & 32) nop() ; ++ ++ } ++} ++ ++// obj: object to unlock ++// box: box address (displaced header location), killed. ++// tmp: killed tmp; cannot be obj nor box. ++// ++// Some commentary on balanced locking: ++// ++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. ++// Methods that don't have provably balanced locking are forced to run in the ++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock. ++// The interpreter provides two properties: ++// I1: At return-time the interpreter automatically and quietly unlocks any ++// objects acquired the current activation (frame). Recall that the ++// interpreter maintains an on-stack list of locks currently held by ++// a frame. ++// I2: If a method attempts to unlock an object that is not held by the ++// the frame the interpreter throws IMSX. ++// ++// Lets say A(), which has provably balanced locking, acquires O and then calls B(). ++// B() doesn't have provably balanced locking so it runs in the interpreter. ++// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O ++// is still locked by A(). ++// ++// The only other source of unbalanced locking would be JNI. The "Java Native Interface: ++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter ++// should not be unlocked by "normal" java-level locking and vice-versa. The specification ++// doesn't specify what will occur if a program engages in such mixed-mode locking, however. ++ ++void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg, ++ Register tmpReg, Register scrReg) { ++ Label DONE, DONE_SET, Stacked, Inflated; ++ ++ guarantee(objReg != boxReg, ""); ++ guarantee(objReg != tmpReg, ""); ++ guarantee(objReg != scrReg, ""); ++ guarantee(boxReg != tmpReg, ""); ++ guarantee(boxReg != scrReg, ""); ++ ++ block_comment("FastUnlock"); ++ ++ if (EmitSync & 4) { ++ // Disable - inhibit all inlining. Force control through the slow-path ++ move(AT, 0x0); ++ return; ++ } else ++ if (EmitSync & 8) { ++ Label DONE_LABEL ; ++ if (UseBiasedLocking) { ++ biased_locking_exit(objReg, tmpReg, DONE_LABEL); ++ } ++ // classic stack-locking code ... ++ ld(tmpReg, Address(boxReg, 0)) ; ++ beq(tmpReg, R0, DONE_LABEL) ; ++ move(AT, 0x1); // delay slot ++ ++ cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); ++ bind(DONE_LABEL); ++ } else { ++ Label CheckSucc; ++ ++ // Critically, the biased locking test must have precedence over ++ // and appear before the (box->dhw == 0) recursive stack-lock test. ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ Label succ, fail; ++ biased_locking_exit(objReg, tmpReg, succ); ++ b(fail); ++ delayed()->nop(); ++ bind(succ); ++ b(DONE); ++ delayed()->ori(resReg, R0, 1); ++ bind(fail); ++ } ++ ++ ld(tmpReg, Address(boxReg, 0)); // Examine the displaced header ++ beq(tmpReg, R0, DONE_SET); // 0 indicates recursive stack-lock ++ delayed()->sltiu(AT, tmpReg, 1); ++ ++ ld(tmpReg, Address(objReg, 0)); // Examine the object's markword ++ andi(AT, tmpReg, markOopDesc::monitor_value); ++ beq(AT, R0, Stacked); // Inflated? ++ delayed()->nop(); ++ ++ bind(Inflated); ++ // It's inflated. ++ // Despite our balanced locking property we still check that m->_owner == Self ++ // as java routines or native JNI code called by this thread might ++ // have released the lock. ++ // Refer to the comments in synchronizer.cpp for how we might encode extra ++ // state in _succ so we can avoid fetching EntryList|cxq. ++ // ++ // I'd like to add more cases in fast_lock() and fast_unlock() -- ++ // such as recursive enter and exit -- but we have to be wary of ++ // I$ bloat, T$ effects and BP$ effects. ++ // ++ // If there's no contention try a 1-0 exit. That is, exit without ++ // a costly MEMBAR or CAS. See synchronizer.cpp for details on how ++ // we detect and recover from the race that the 1-0 exit admits. ++ // ++ // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier ++ // before it STs null into _owner, releasing the lock. Updates ++ // to data protected by the critical section must be visible before ++ // we drop the lock (and thus before any other thread could acquire ++ // the lock and observe the fields protected by the lock). ++#ifndef OPT_THREAD ++ get_thread(TREG); ++#endif ++ ++ // It's inflated ++ ld(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)) ; ++ xorr(scrReg, scrReg, TREG); ++ ++ ld(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2)) ; ++ orr(scrReg, scrReg, AT); ++ ++ bne(scrReg, R0, DONE_SET); ++ delayed()->ori(AT, R0, 0); ++ ++ ld(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2)); ++ ld(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2)); ++ orr(scrReg, scrReg, AT); ++ ++ bne(scrReg, R0, DONE_SET); ++ delayed()->ori(AT, R0, 0); ++ ++ sync(); ++ sd(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ b(DONE); ++ delayed()->ori(resReg, R0, 1); ++ ++ bind(Stacked); ++ ld(tmpReg, Address(boxReg, 0)); ++ cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); ++ ++ bind(DONE_SET); ++ move(resReg, AT); ++ ++ if (EmitSync & 65536) { ++ bind (CheckSucc); ++ } ++ ++ bind(DONE); ++ ++ // Avoid branch to branch on AMD processors ++ if (EmitSync & 32768) { nop() ; } ++ } ++} ++#endif // COMPILER2 ++ ++void MacroAssembler::align(int modulus) { ++ while (offset() % modulus != 0) nop(); ++} ++ ++ ++void MacroAssembler::verify_FPU(int stack_depth, const char* s) { ++ //Unimplemented(); ++} ++ ++Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; ++Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; ++ ++//In MIPS64, F0~23 are all caller-saved registers ++FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13}; ++ ++// We preserve all caller-saved register ++void MacroAssembler::pushad(){ ++ int i; ++ ++ // Fixed-point registers ++ int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) ++ { ++ sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ ++ // Floating-point registers ++ len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) ++ { ++ sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++}; ++ ++void MacroAssembler::popad(){ ++ int i; ++ ++ // Floating-point registers ++ int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ for (i = 0; i < len; i++) ++ { ++ ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++ ++ // Fixed-point registers ++ len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); ++ for (i = 0; i < len; i++) ++ { ++ ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++}; ++ ++// We preserve all caller-saved register except V0 ++void MacroAssembler::pushad_except_v0() { ++ int i; ++ ++ // Fixed-point registers ++ int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ sd(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); ++ } ++ ++ // Floating-point registers ++ len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++} ++ ++void MacroAssembler::popad_except_v0() { ++ int i; ++ ++ // Floating-point registers ++ int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ for (i = 0; i < len; i++) { ++ ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++ ++ // Fixed-point registers ++ len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); ++ for (i = 0; i < len; i++) { ++ ld(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++} ++ ++void MacroAssembler::push2(Register reg1, Register reg2) { ++ daddiu(SP, SP, -16); ++ sd(reg1, SP, 8); ++ sd(reg2, SP, 0); ++} ++ ++void MacroAssembler::pop2(Register reg1, Register reg2) { ++ ld(reg1, SP, 8); ++ ld(reg2, SP, 0); ++ daddiu(SP, SP, 16); ++} ++ ++// for UseCompressedOops Option ++void MacroAssembler::load_klass(Register dst, Register src) { ++ if(UseCompressedClassPointers){ ++ lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); ++ decode_klass_not_null(dst); ++ } else ++ ld(dst, src, oopDesc::klass_offset_in_bytes()); ++} ++ ++void MacroAssembler::store_klass(Register dst, Register src) { ++ if(UseCompressedClassPointers){ ++ encode_klass_not_null(src); ++ sw(src, dst, oopDesc::klass_offset_in_bytes()); ++ } else { ++ sd(src, dst, oopDesc::klass_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::load_prototype_header(Register dst, Register src) { ++ load_klass(dst, src); ++ ld(dst, Address(dst, Klass::prototype_header_offset())); ++} ++ ++void MacroAssembler::store_klass_gap(Register dst, Register src) { ++ if (UseCompressedClassPointers) { ++ sw(src, dst, oopDesc::klass_gap_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, ++ Register tmp1, Register thread_tmp) { ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } else { ++ bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } ++} ++ ++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, ++ Register tmp1, Register tmp2) { ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2); ++ } else { ++ bs->store_at(this, decorators, type, dst, src, tmp1, tmp2); ++ } ++} ++ ++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); ++} ++ ++// Doesn't do verfication, generates fixed size code ++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp); ++} ++ ++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, ++ Register tmp2, DecoratorSet decorators) { ++ access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); ++} ++ ++// Used for storing NULLs. ++void MacroAssembler::store_heap_oop_null(Address dst) { ++ access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); ++} ++ ++#ifdef ASSERT ++void MacroAssembler::verify_heapbase(const char* msg) { ++ assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++} ++#endif ++ ++ ++// Algorithm must match oop.inline.hpp encode_heap_oop. ++void MacroAssembler::encode_heap_oop(Register r) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); ++#endif ++ verify_oop(r, "broken oop in encode_heap_oop"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++ return; ++ } ++ ++ movz(r, S5_heapbase, r); ++ dsubu(r, r, S5_heapbase); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_heap_oop(Register dst, Register src) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); ++#endif ++ verify_oop(src, "broken oop in encode_heap_oop"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ dsrl(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) move(dst, src); ++ } ++ } else { ++ if (dst == src) { ++ movz(dst, S5_heapbase, dst); ++ dsubu(dst, dst, S5_heapbase); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ dsubu(dst, src, S5_heapbase); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++ movz(dst, R0, src); ++ } ++ } ++} ++ ++void MacroAssembler::encode_heap_oop_not_null(Register r) { ++ assert (UseCompressedOops, "should be compressed"); ++#ifdef ASSERT ++ if (CheckCompressedOops) { ++ Label ok; ++ bne(r, R0, ok); ++ delayed()->nop(); ++ stop("null oop passed to encode_heap_oop_not_null"); ++ bind(ok); ++ } ++#endif ++ verify_oop(r, "broken oop in encode_heap_oop_not_null"); ++ if (Universe::narrow_oop_base() != NULL) { ++ dsubu(r, r, S5_heapbase); ++ } ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++ ++} ++ ++void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { ++ assert (UseCompressedOops, "should be compressed"); ++#ifdef ASSERT ++ if (CheckCompressedOops) { ++ Label ok; ++ bne(src, R0, ok); ++ delayed()->nop(); ++ stop("null oop passed to encode_heap_oop_not_null2"); ++ bind(ok); ++ } ++#endif ++ verify_oop(src, "broken oop in encode_heap_oop_not_null2"); ++ ++ if (Universe::narrow_oop_base() != NULL) { ++ dsubu(dst, src, S5_heapbase); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ dsrl(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) move(dst, src); ++ } ++ } ++} ++ ++void MacroAssembler::decode_heap_oop(Register r) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); ++#endif ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shl(r, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ move(AT, r); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shl(r, LogMinObjAlignmentInBytes); ++ } ++ daddu(r, r, S5_heapbase); ++ movz(r, R0, AT); ++ } ++ verify_oop(r, "broken oop in decode_heap_oop"); ++} ++ ++void MacroAssembler::decode_heap_oop(Register dst, Register src) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); ++#endif ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (dst != src) nop(); // DON'T DELETE THIS GUY. ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) move(dst, src); ++ } ++ } else { ++ if (dst == src) { ++ move(AT, dst); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shl(dst, LogMinObjAlignmentInBytes); ++ } ++ daddu(dst, dst, S5_heapbase); ++ movz(dst, R0, AT); ++ } else { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ daddu(dst, dst, S5_heapbase); ++ } else { ++ daddu(dst, src, S5_heapbase); ++ } ++ movz(dst, R0, src); ++ } ++ } ++ verify_oop(dst, "broken oop in decode_heap_oop"); ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register r) { ++ // Note: it will change flags ++ assert (UseCompressedOops, "should only be used for compressed headers"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shl(r, LogMinObjAlignmentInBytes); ++ if (Universe::narrow_oop_base() != NULL) { ++ daddu(r, r, S5_heapbase); ++ } ++ } else { ++ assert (Universe::narrow_oop_base() == NULL, "sanity"); ++ } ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { ++ assert (UseCompressedOops, "should only be used for compressed headers"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++ ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ //lea(dst, Address(S5_heapbase, src, Address::times_8, 0)); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (LogMinObjAlignmentInBytes == Address::times_8) { ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ daddu(dst, dst, S5_heapbase); ++ } else { ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ if (Universe::narrow_oop_base() != NULL) { ++ daddu(dst, dst, S5_heapbase); ++ } ++ } ++ } else { ++ assert (Universe::narrow_oop_base() == NULL, "sanity"); ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++} ++ ++void MacroAssembler::encode_klass_not_null(Register r) { ++ if (Universe::narrow_klass_base() != NULL) { ++ assert(r != AT, "Encoding a klass in AT"); ++ set64(AT, (int64_t)Universe::narrow_klass_base()); ++ dsubu(r, r, AT); ++ } ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shr(r, LogKlassAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_klass_not_null(Register dst, Register src) { ++ if (dst == src) { ++ encode_klass_not_null(src); ++ } else { ++ if (Universe::narrow_klass_base() != NULL) { ++ set64(dst, (int64_t)Universe::narrow_klass_base()); ++ dsubu(dst, src, dst); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shr(dst, LogKlassAlignmentInBytes); ++ } ++ } else { ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ dsrl(dst, src, LogKlassAlignmentInBytes); ++ } else { ++ move(dst, src); ++ } ++ } ++ } ++} ++ ++// Function instr_size_for_decode_klass_not_null() counts the instructions ++// generated by decode_klass_not_null(register r) and reinit_heapbase(), ++// when (Universe::heap() != NULL). Hence, if the instructions they ++// generate change, then this method needs to be updated. ++int MacroAssembler::instr_size_for_decode_klass_not_null() { ++ assert (UseCompressedClassPointers, "only for compressed klass ptrs"); ++ if (Universe::narrow_klass_base() != NULL) { ++ // mov64 + addq + shlq? + mov64 (for reinit_heapbase()). ++ return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10); ++ } else { ++ // longest load decode klass function, mov64, leaq ++ return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1); ++ } ++} ++ ++void MacroAssembler::decode_klass_not_null(Register r) { ++ assert (UseCompressedClassPointers, "should only be used for compressed headers"); ++ assert(r != AT, "Decoding a klass in AT"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shl(r, LogKlassAlignmentInBytes); ++ } ++ if (Universe::narrow_klass_base() != NULL) { ++ set64(AT, (int64_t)Universe::narrow_klass_base()); ++ daddu(r, r, AT); ++ //Not neccessary for MIPS at all. ++ //reinit_heapbase(); ++ } ++} ++ ++void MacroAssembler::decode_klass_not_null(Register dst, Register src) { ++ assert (UseCompressedClassPointers, "should only be used for compressed headers"); ++ ++ if (dst == src) { ++ decode_klass_not_null(dst); ++ } else { ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ set64(dst, (int64_t)Universe::narrow_klass_base()); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); ++ dsll(AT, src, Address::times_8); ++ daddu(dst, dst, AT); ++ } else { ++ daddu(dst, src, dst); ++ } ++ } ++} ++ ++void MacroAssembler::incrementl(Register reg, int value) { ++ if (value == min_jint) { ++ move(AT, value); ++ addu32(reg, reg, AT); ++ return; ++ } ++ if (value < 0) { decrementl(reg, -value); return; } ++ if (value == 0) { ; return; } ++ ++ move(AT, value); ++ addu32(reg, reg, AT); ++} ++ ++void MacroAssembler::decrementl(Register reg, int value) { ++ if (value == min_jint) { ++ move(AT, value); ++ subu32(reg, reg, AT); ++ return; ++ } ++ if (value < 0) { incrementl(reg, -value); return; } ++ if (value == 0) { ; return; } ++ ++ move(AT, value); ++ subu32(reg, reg, AT); ++} ++ ++void MacroAssembler::reinit_heapbase() { ++ if (UseCompressedOops || UseCompressedClassPointers) { ++ if (Universe::heap() != NULL) { ++ if (Universe::narrow_oop_base() == NULL) { ++ move(S5_heapbase, R0); ++ } else { ++ set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base()); ++ } ++ } else { ++ set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr()); ++ ld(S5_heapbase, S5_heapbase, 0); ++ } ++ } ++} ++ ++void MacroAssembler::check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label& L_success) { ++//implement ind gen_subtype_check ++ Label L_failure; ++ check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); ++ check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); ++ bind(L_failure); ++} ++ ++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ RegisterOrConstant super_check_offset) { ++ assert_different_registers(sub_klass, super_klass, temp_reg); ++ bool must_load_sco = (super_check_offset.constant_or_zero() == -1); ++ if (super_check_offset.is_register()) { ++ assert_different_registers(sub_klass, super_klass, ++ super_check_offset.as_register()); ++ } else if (must_load_sco) { ++ assert(temp_reg != noreg, "supply either a temp or a register offset"); ++ } ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ // If the pointers are equal, we are done (e.g., String[] elements). ++ // This self-check enables sharing of secondary supertype arrays among ++ // non-primary types such as array-of-interface. Otherwise, each such ++ // type would need its own customized SSA. ++ // We move this check to the front of the fast path because many ++ // type checks are in fact trivially successful in this manner, ++ // so we get a nicely predicted branch right at the start of the check. ++ beq(sub_klass, super_klass, *L_success); ++ delayed()->nop(); ++ // Check the supertype display: ++ if (must_load_sco) { ++ lwu(temp_reg, super_klass, sco_offset); ++ super_check_offset = RegisterOrConstant(temp_reg); ++ } ++ daddu(AT, sub_klass, super_check_offset.register_or_noreg()); ++ ld(AT, AT, super_check_offset.constant_or_zero()); ++ ++ // This check has worked decisively for primary supers. ++ // Secondary supers are sought in the super_cache ('super_cache_addr'). ++ // (Secondary supers are interfaces and very deeply nested subtypes.) ++ // This works in the same check above because of a tricky aliasing ++ // between the super_cache and the primary super display elements. ++ // (The 'super_check_addr' can address either, as the case requires.) ++ // Note that the cache is updated below if it does not help us find ++ // what we need immediately. ++ // So if it was a primary super, we can just fail immediately. ++ // Otherwise, it's the slow path for us (no success at this point). ++ ++ if (super_check_offset.is_register()) { ++ beq(super_klass, AT, *L_success); ++ delayed()->nop(); ++ addiu(AT, super_check_offset.as_register(), -sc_offset); ++ if (L_failure == &L_fallthrough) { ++ beq(AT, R0, *L_slow_path); ++ delayed()->nop(); ++ } else { ++ bne_far(AT, R0, *L_failure); ++ delayed()->nop(); ++ b(*L_slow_path); ++ delayed()->nop(); ++ } ++ } else if (super_check_offset.as_constant() == sc_offset) { ++ // Need a slow path; fast failure is impossible. ++ if (L_slow_path == &L_fallthrough) { ++ beq(super_klass, AT, *L_success); ++ delayed()->nop(); ++ } else { ++ bne(super_klass, AT, *L_slow_path); ++ delayed()->nop(); ++ b(*L_success); ++ delayed()->nop(); ++ } ++ } else { ++ // No slow path; it's a fast decision. ++ if (L_failure == &L_fallthrough) { ++ beq(super_klass, AT, *L_success); ++ delayed()->nop(); ++ } else { ++ bne_far(super_klass, AT, *L_failure); ++ delayed()->nop(); ++ b(*L_success); ++ delayed()->nop(); ++ } ++ } ++ ++ bind(L_fallthrough); ++ ++} ++ ++ ++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Register temp2_reg, ++ Label* L_success, ++ Label* L_failure, ++ bool set_cond_codes) { ++ if (temp2_reg == noreg) ++ temp2_reg = TSR; ++ assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); ++#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ ++ // a couple of useful fields in sub_klass: ++ int ss_offset = in_bytes(Klass::secondary_supers_offset()); ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ Address secondary_supers_addr(sub_klass, ss_offset); ++ Address super_cache_addr( sub_klass, sc_offset); ++ ++ // Do a linear scan of the secondary super-klass chain. ++ // This code is rarely used, so simplicity is a virtue here. ++ // The repne_scan instruction uses fixed registers, which we must spill. ++ // Don't worry too much about pre-existing connections with the input regs. ++ ++#ifndef PRODUCT ++ int* pst_counter = &SharedRuntime::_partial_subtype_ctr; ++ ExternalAddress pst_counter_addr((address) pst_counter); ++#endif //PRODUCT ++ ++ // We will consult the secondary-super array. ++ ld(temp_reg, secondary_supers_addr); ++ // Load the array length. ++ lw(temp2_reg, Address(temp_reg, Array::length_offset_in_bytes())); ++ // Skip to start of data. ++ daddiu(temp_reg, temp_reg, Array::base_offset_in_bytes()); ++ ++ // OpenJDK8 never compresses klass pointers in secondary-super array. ++ Label Loop, subtype; ++ bind(Loop); ++ beq(temp2_reg, R0, *L_failure); ++ delayed()->nop(); ++ ld(AT, temp_reg, 0); ++ beq(AT, super_klass, subtype); ++ delayed()->daddiu(temp_reg, temp_reg, 1 * wordSize); ++ b(Loop); ++ delayed()->daddiu(temp2_reg, temp2_reg, -1); ++ ++ bind(subtype); ++ sd(super_klass, super_cache_addr); ++ if (L_success != &L_fallthrough) { ++ b(*L_success); ++ delayed()->nop(); ++ } ++ ++ // Success. Cache the super we found and proceed in triumph. ++#undef IS_A_TEMP ++ ++ bind(L_fallthrough); ++} ++ ++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { ++ ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); ++ sd(R0, Address(java_thread, JavaThread::vm_result_offset())); ++ verify_oop(oop_result, "broken oop in call_VM_base"); ++} ++ ++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { ++ ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); ++ sd(R0, Address(java_thread, JavaThread::vm_result_2_offset())); ++} ++ ++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, ++ int extra_slot_offset) { ++ // cf. TemplateTable::prepare_invoke(), if (load_receiver). ++ int stackElementSize = Interpreter::stackElementSize; ++ int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); ++#ifdef ASSERT ++ int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); ++ assert(offset1 - offset == stackElementSize, "correct arithmetic"); ++#endif ++ Register scale_reg = NOREG; ++ Address::ScaleFactor scale_factor = Address::no_scale; ++ if (arg_slot.is_constant()) { ++ offset += arg_slot.as_constant() * stackElementSize; ++ } else { ++ scale_reg = arg_slot.as_register(); ++ scale_factor = Address::times_8; ++ } ++ // We don't push RA on stack in prepare_invoke. ++ // offset += wordSize; // return PC is on stack ++ if(scale_reg==NOREG) return Address(SP, offset); ++ else { ++ dsll(scale_reg, scale_reg, scale_factor); ++ daddu(scale_reg, SP, scale_reg); ++ return Address(scale_reg, offset); ++ } ++} ++ ++SkipIfEqual::~SkipIfEqual() { ++ _masm->bind(_label); ++} ++ ++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { ++ switch (size_in_bytes) { ++ case 8: ld(dst, src); break; ++ case 4: lw(dst, src); break; ++ case 2: is_signed ? lh(dst, src) : lhu(dst, src); break; ++ case 1: is_signed ? lb( dst, src) : lbu( dst, src); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { ++ switch (size_in_bytes) { ++ case 8: sd(src, dst); break; ++ case 4: sw(src, dst); break; ++ case 2: sh(src, dst); break; ++ case 1: sb(src, dst); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++// Look up the method for a megamorphic invokeinterface call. ++// The target method is determined by . ++// The receiver klass is in recv_klass. ++// On success, the result will be in method_result, and execution falls through. ++// On failure, execution transfers to the given label. ++void MacroAssembler::lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_temp, ++ Label& L_no_such_interface, ++ bool return_method) { ++ assert_different_registers(recv_klass, intf_klass, scan_temp, AT); ++ assert_different_registers(method_result, intf_klass, scan_temp, AT); ++ assert(recv_klass != method_result || !return_method, ++ "recv_klass can be destroyed when method isn't needed"); ++ ++ assert(itable_index.is_constant() || itable_index.as_register() == method_result, ++ "caller must use same register for non-constant itable index as for method"); ++ ++ // Compute start of first itableOffsetEntry (which is at the end of the vtable) ++ int vtable_base = in_bytes(Klass::vtable_start_offset()); ++ int itentry_off = itableMethodEntry::method_offset_in_bytes(); ++ int scan_step = itableOffsetEntry::size() * wordSize; ++ int vte_size = vtableEntry::size() * wordSize; ++ Address::ScaleFactor times_vte_scale = Address::times_ptr; ++ assert(vte_size == wordSize, "else adjust times_vte_scale"); ++ ++ lw(scan_temp, Address(recv_klass, Klass::vtable_length_offset())); ++ ++ // %%% Could store the aligned, prescaled offset in the klassoop. ++ dsll(scan_temp, scan_temp, times_vte_scale); ++ daddu(scan_temp, recv_klass, scan_temp); ++ daddiu(scan_temp, scan_temp, vtable_base); ++ if (HeapWordsPerLong > 1) { ++ // Round up to align_object_offset boundary ++ // see code for InstanceKlass::start_of_itable! ++ round_to(scan_temp, BytesPerLong); ++ } ++ ++ if (return_method) { ++ // Adjust recv_klass by scaled itable_index, so we can free itable_index. ++ assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); ++ if (itable_index.is_constant()) { ++ set64(AT, (int)itable_index.is_constant()); ++ dsll(AT, AT, (int)Address::times_ptr); ++ } else { ++ dsll(AT, itable_index.as_register(), (int)Address::times_ptr); ++ } ++ daddu(AT, AT, recv_klass); ++ daddiu(recv_klass, AT, itentry_off); ++ } ++ ++ Label search, found_method; ++ ++ for (int peel = 1; peel >= 0; peel--) { ++ ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); ++ ++ if (peel) { ++ beq(intf_klass, method_result, found_method); ++ delayed()->nop(); ++ } else { ++ bne(intf_klass, method_result, search); ++ delayed()->nop(); ++ // (invert the test to fall through to found_method...) ++ } ++ ++ if (!peel) break; ++ ++ bind(search); ++ ++ // Check that the previous entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ beq(method_result, R0, L_no_such_interface); ++ delayed()->nop(); ++ daddiu(scan_temp, scan_temp, scan_step); ++ } ++ ++ bind(found_method); ++ ++ if (return_method) { ++ // Got a hit. ++ lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); ++ if (UseLEXT1) { ++ gsldx(method_result, recv_klass, scan_temp, 0); ++ } else { ++ daddu(AT, recv_klass, scan_temp); ++ ld(method_result, AT, 0); ++ } ++ } ++} ++ ++// virtual method calling ++void MacroAssembler::lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result) { ++ Register tmp = GP; ++ push(tmp); ++ ++ if (vtable_index.is_constant()) { ++ assert_different_registers(recv_klass, method_result, tmp); ++ } else { ++ assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp); ++ } ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); ++ if (vtable_index.is_constant()) { ++ set64(AT, vtable_index.as_constant()); ++ dsll(AT, AT, (int)Address::times_ptr); ++ } else { ++ dsll(AT, vtable_index.as_register(), (int)Address::times_ptr); ++ } ++ set64(tmp, base + vtableEntry::method_offset_in_bytes()); ++ daddu(tmp, tmp, AT); ++ daddu(tmp, tmp, recv_klass); ++ ld(method_result, tmp, 0); ++ ++ pop(tmp); ++} ++ ++void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) { ++ switch (type) { ++ case T_LONG: ++ st_ptr(src_reg, tmp_reg, disp); ++ break; ++ case T_ARRAY: ++ case T_OBJECT: ++ if (UseCompressedOops && !wide) { ++ sw(src_reg, tmp_reg, disp); ++ } else { ++ st_ptr(src_reg, tmp_reg, disp); ++ } ++ break; ++ case T_ADDRESS: ++ st_ptr(src_reg, tmp_reg, disp); ++ break; ++ case T_INT: ++ sw(src_reg, tmp_reg, disp); ++ break; ++ case T_CHAR: ++ case T_SHORT: ++ sh(src_reg, tmp_reg, disp); ++ break; ++ case T_BYTE: ++ case T_BOOLEAN: ++ sb(src_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) { ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } ++} ++ ++void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) { ++ switch (type) { ++ case T_DOUBLE: ++ sdc1(src_reg, tmp_reg, disp); ++ break; ++ case T_FLOAT: ++ swc1(src_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) { ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } ++} ++ ++void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) { ++ switch (type) { ++ case T_LONG: ++ ld_ptr(dst_reg, tmp_reg, disp); ++ break; ++ case T_ARRAY: ++ case T_OBJECT: ++ if (UseCompressedOops && !wide) { ++ lwu(dst_reg, tmp_reg, disp); ++ } else { ++ ld_ptr(dst_reg, tmp_reg, disp); ++ } ++ break; ++ case T_ADDRESS: ++ if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) { ++ lwu(dst_reg, tmp_reg, disp); ++ } else { ++ ld_ptr(dst_reg, tmp_reg, disp); ++ } ++ break; ++ case T_INT: ++ lw(dst_reg, tmp_reg, disp); ++ break; ++ case T_CHAR: ++ lhu(dst_reg, tmp_reg, disp); ++ break; ++ case T_SHORT: ++ lh(dst_reg, tmp_reg, disp); ++ break; ++ case T_BYTE: ++ case T_BOOLEAN: ++ lb(dst_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) { ++ int code_offset = 0; ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } ++ ++ return code_offset; ++} ++ ++#ifdef COMPILER2 ++// Compare strings, used for char[] and byte[]. ++void MacroAssembler::string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ int ae) { ++ Label L, Loop, haveResult, done; ++ ++ bool isLL = ae == StrIntrinsicNode::LL; ++ bool isLU = ae == StrIntrinsicNode::LU; ++ bool isUL = ae == StrIntrinsicNode::UL; ++ ++ bool str1_isL = isLL || isLU; ++ bool str2_isL = isLL || isUL; ++ ++ if (!str1_isL) srl(cnt1, cnt1, 1); ++ if (!str2_isL) srl(cnt2, cnt2, 1); ++ ++ // compute the and difference of lengths (in result) ++ subu(result, cnt1, cnt2); // result holds the difference of two lengths ++ ++ // compute the shorter length (in cnt1) ++ slt(AT, cnt2, cnt1); ++ movn(cnt1, cnt2, AT); ++ ++ // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register ++ bind(Loop); // Loop begin ++ beq(cnt1, R0, done); ++ if (str1_isL) { ++ delayed()->lbu(AT, str1, 0); ++ } else { ++ delayed()->lhu(AT, str1, 0); ++ } ++ ++ // compare current character ++ if (str2_isL) { ++ lbu(cnt2, str2, 0); ++ } else { ++ lhu(cnt2, str2, 0); ++ } ++ bne(AT, cnt2, haveResult); ++ delayed()->addiu(str1, str1, str1_isL ? 1 : 2); ++ addiu(str2, str2, str2_isL ? 1 : 2); ++ b(Loop); ++ delayed()->addiu(cnt1, cnt1, -1); // Loop end ++ ++ bind(haveResult); ++ subu(result, AT, cnt2); ++ ++ bind(done); ++} ++ ++// Compare char[] or byte[] arrays or substrings. ++void MacroAssembler::arrays_equals(Register str1, Register str2, ++ Register cnt, Register tmp, Register result, ++ bool is_char) { ++ Label Loop, True, False; ++ ++ beq(str1, str2, True); // same char[] ? ++ delayed()->daddiu(result, R0, 1); ++ ++ beq(cnt, R0, True); ++ delayed()->nop(); // count == 0 ++ ++ bind(Loop); ++ ++ // compare current character ++ if (is_char) { ++ lhu(AT, str1, 0); ++ lhu(tmp, str2, 0); ++ } else { ++ lbu(AT, str1, 0); ++ lbu(tmp, str2, 0); ++ } ++ bne(AT, tmp, False); ++ delayed()->addiu(str1, str1, is_char ? 2 : 1); ++ addiu(cnt, cnt, -1); ++ bne(cnt, R0, Loop); ++ delayed()->addiu(str2, str2, is_char ? 2 : 1); ++ ++ b(True); ++ delayed()->nop(); ++ ++ bind(False); ++ daddiu(result, R0, 0); ++ ++ bind(True); ++} ++#endif // COMPILER2 ++ ++void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) { ++ switch (type) { ++ case T_DOUBLE: ++ ldc1(dst_reg, tmp_reg, disp); ++ break; ++ case T_FLOAT: ++ lwc1(dst_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) { ++ int code_offset = 0; ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } ++ ++ return code_offset; ++} ++ ++void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { ++ const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask); ++ STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code ++ // The inverted mask is sign-extended ++ move(AT, inverted_jweak_mask); ++ andr(possibly_jweak, AT, possibly_jweak); ++} ++ ++void MacroAssembler::resolve_jobject(Register value, ++ Register thread, ++ Register tmp) { ++ assert_different_registers(value, thread, tmp); ++ Label done, not_weak; ++ beq(value, R0, done); // Use NULL as-is. ++ delayed()->nop(); ++ move(AT, JNIHandles::weak_tag_mask); // Test for jweak tag. ++ andr(AT, value, AT); ++ beq(AT, R0, not_weak); ++ delayed()->nop(); ++ // Resolve jweak. ++ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, ++ value, Address(value, -JNIHandles::weak_tag_value), tmp, thread); ++ verify_oop(value); ++ b(done); ++ delayed()->nop(); ++ bind(not_weak); ++ // Resolve (untagged) jobject. ++ access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread); ++ verify_oop(value); ++ bind(done); ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src, ++ CMCompare cmp, ++ bool is_signed) { ++ switch (cmp) { ++ case EQ: ++ subu(AT, op1, op2); ++ movz(dst, src, AT); ++ break; ++ ++ case NE: ++ subu(AT, op1, op2); ++ movn(dst, src, AT); ++ break; ++ ++ case GT: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ movn(dst, src, AT); ++ break; ++ ++ case GE: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ movz(dst, src, AT); ++ break; ++ ++ case LT: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ movn(dst, src, AT); ++ break; ++ ++ case LE: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ movz(dst, src, AT); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ Register dst, ++ Register src, ++ CMCompare cmp, ++ bool is_float) { ++ switch(cmp) { ++ case EQ: ++ if (is_float) { ++ c_eq_s(op1, op2); ++ } else { ++ c_eq_d(op1, op2); ++ } ++ movt(dst, src); ++ break; ++ ++ case NE: ++ if (is_float) { ++ c_eq_s(op1, op2); ++ } else { ++ c_eq_d(op1, op2); ++ } ++ movf(dst, src); ++ break; ++ ++ case GT: ++ if (is_float) { ++ c_ule_s(op1, op2); ++ } else { ++ c_ule_d(op1, op2); ++ } ++ movf(dst, src); ++ break; ++ ++ case GE: ++ if (is_float) { ++ c_ult_s(op1, op2); ++ } else { ++ c_ult_d(op1, op2); ++ } ++ movf(dst, src); ++ break; ++ ++ case LT: ++ if (is_float) { ++ c_ult_s(op1, op2); ++ } else { ++ c_ult_d(op1, op2); ++ } ++ movt(dst, src); ++ break; ++ ++ case LE: ++ if (is_float) { ++ c_ule_s(op1, op2); ++ } else { ++ c_ule_d(op1, op2); ++ } ++ movt(dst, src); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp, ++ bool is_float) { ++ switch(cmp) { ++ case EQ: ++ if (!is_float) { ++ c_eq_d(op1, op2); ++ movt_d(dst, src); ++ } else { ++ c_eq_s(op1, op2); ++ movt_s(dst, src); ++ } ++ break; ++ ++ case NE: ++ if (!is_float) { ++ c_eq_d(op1, op2); ++ movf_d(dst, src); ++ } else { ++ c_eq_s(op1, op2); ++ movf_s(dst, src); ++ } ++ break; ++ ++ case GT: ++ if (!is_float) { ++ c_ule_d(op1, op2); ++ movf_d(dst, src); ++ } else { ++ c_ule_s(op1, op2); ++ movf_s(dst, src); ++ } ++ break; ++ ++ case GE: ++ if (!is_float) { ++ c_ult_d(op1, op2); ++ movf_d(dst, src); ++ } else { ++ c_ult_s(op1, op2); ++ movf_s(dst, src); ++ } ++ break; ++ ++ case LT: ++ if (!is_float) { ++ c_ult_d(op1, op2); ++ movt_d(dst, src); ++ } else { ++ c_ult_s(op1, op2); ++ movt_s(dst, src); ++ } ++ break; ++ ++ case LE: ++ if (!is_float) { ++ c_ule_d(op1, op2); ++ movt_d(dst, src); ++ } else { ++ c_ule_s(op1, op2); ++ movt_s(dst, src); ++ } ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp, ++ bool is_float) { ++ Label L; ++ ++ switch(cmp) { ++ case EQ: ++ bne(op1, op2, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case NE: ++ beq(op1, op2, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case GT: ++ slt(AT, op2, op1); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case GE: ++ slt(AT, op1, op2); ++ bne(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case LT: ++ slt(AT, op1, op2); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case LE: ++ slt(AT, op2, op1); ++ bne(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::gs_loadstore(Register reg, Register base, Register index, int disp, int type) { ++ switch (type) { ++ case STORE_BYTE: ++ gssbx(reg, base, index, disp); ++ break; ++ case STORE_CHAR: ++ case STORE_SHORT: ++ gsshx(reg, base, index, disp); ++ break; ++ case STORE_INT: ++ gsswx(reg, base, index, disp); ++ break; ++ case STORE_LONG: ++ gssdx(reg, base, index, disp); ++ break; ++ case LOAD_BYTE: ++ gslbx(reg, base, index, disp); ++ break; ++ case LOAD_SHORT: ++ gslhx(reg, base, index, disp); ++ break; ++ case LOAD_INT: ++ gslwx(reg, base, index, disp); ++ break; ++ case LOAD_LONG: ++ gsldx(reg, base, index, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::gs_loadstore(FloatRegister reg, Register base, Register index, int disp, int type) { ++ switch (type) { ++ case STORE_FLOAT: ++ gsswxc1(reg, base, index, disp); ++ break; ++ case STORE_DOUBLE: ++ gssdxc1(reg, base, index, disp); ++ break; ++ case LOAD_FLOAT: ++ gslwxc1(reg, base, index, disp); ++ break; ++ case LOAD_DOUBLE: ++ gsldxc1(reg, base, index, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::loadstore(Register reg, Register base, int disp, int type) { ++ switch (type) { ++ case STORE_BYTE: ++ sb(reg, base, disp); ++ break; ++ case STORE_CHAR: ++ case STORE_SHORT: ++ sh(reg, base, disp); ++ break; ++ case STORE_INT: ++ sw(reg, base, disp); ++ break; ++ case STORE_LONG: ++ sd(reg, base, disp); ++ break; ++ case LOAD_BYTE: ++ lb(reg, base, disp); ++ break; ++ case LOAD_U_BYTE: ++ lbu(reg, base, disp); ++ break; ++ case LOAD_SHORT: ++ lh(reg, base, disp); ++ break; ++ case LOAD_U_SHORT: ++ lhu(reg, base, disp); ++ break; ++ case LOAD_INT: ++ lw(reg, base, disp); ++ break; ++ case LOAD_U_INT: ++ lwu(reg, base, disp); ++ break; ++ case LOAD_LONG: ++ ld(reg, base, disp); ++ break; ++ case LOAD_LINKED_LONG: ++ lld(reg, base, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) { ++ switch (type) { ++ case STORE_FLOAT: ++ swc1(reg, base, disp); ++ break; ++ case STORE_DOUBLE: ++ sdc1(reg, base, disp); ++ break; ++ case LOAD_FLOAT: ++ lwc1(reg, base, disp); ++ break; ++ case LOAD_DOUBLE: ++ ldc1(reg, base, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} +diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.hpp b/src/hotspot/cpu/mips/macroAssembler_mips.hpp +new file mode 100644 +index 0000000000..55ec29e91b +--- /dev/null ++++ b/src/hotspot/cpu/mips/macroAssembler_mips.hpp +@@ -0,0 +1,818 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP ++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP ++ ++#include "asm/assembler.hpp" ++#include "runtime/rtmLocking.hpp" ++#include "utilities/macros.hpp" ++ ++// MacroAssembler extends Assembler by frequently used macros. ++// ++// Instructions for which a 'better' code sequence exists depending ++// on arguments should also go in here. ++ ++class MacroAssembler: public Assembler { ++ friend class LIR_Assembler; ++ friend class Runtime1; // as_Address() ++ ++ public: ++ // Compare code ++ typedef enum { ++ EQ = 0x01, ++ NE = 0x02, ++ GT = 0x03, ++ GE = 0x04, ++ LT = 0x05, ++ LE = 0x06 ++ } CMCompare; ++ ++ protected: ++ ++ // Support for VM calls ++ // ++ // This is the base routine called by the different versions of call_VM_leaf. The interpreter ++ // may customize this version by overriding it for its purposes (e.g., to save/restore ++ // additional registers when doing a VM call). ++ #define VIRTUAL virtual ++ ++ VIRTUAL void call_VM_leaf_base( ++ address entry_point, // the entry point ++ int number_of_arguments // the number of arguments to pop after the call ++ ); ++ ++ // This is the base routine called by the different versions of call_VM. The interpreter ++ // may customize this version by overriding it for its purposes (e.g., to save/restore ++ // additional registers when doing a VM call). ++ // ++ // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base ++ // returns the register which contains the thread upon return. If a thread register has been ++ // specified, the return value will correspond to that register. If no last_java_sp is specified ++ // (noreg) than sp will be used instead. ++ VIRTUAL void call_VM_base( // returns the register containing the thread upon return ++ Register oop_result, // where an oop-result ends up if any; use noreg otherwise ++ Register java_thread, // the thread if computed before ; use noreg otherwise ++ Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise ++ address entry_point, // the entry point ++ int number_of_arguments, // the number of arguments (w/o thread) to pop after the call ++ bool check_exceptions // whether to check for pending exceptions after return ++ ); ++ ++ void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); ++ ++ // helpers for FPU flag access ++ // tmp is a temporary register, if none is available use noreg ++ ++ public: ++ MacroAssembler(CodeBuffer* code) : Assembler(code) {} ++ ++ // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. ++ // The implementation is only non-empty for the InterpreterMacroAssembler, ++ // as only the interpreter handles PopFrame and ForceEarlyReturn requests. ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); ++ ++ Address as_Address(AddressLiteral adr); ++ Address as_Address(ArrayAddress adr); ++ ++ static intptr_t i[32]; ++ static float f[32]; ++ static void print(outputStream *s); ++ ++ static int i_offset(unsigned int k); ++ static int f_offset(unsigned int k); ++ ++ static void save_registers(MacroAssembler *masm); ++ static void restore_registers(MacroAssembler *masm); ++ ++ // Support for NULL-checks ++ // ++ // Generates code that causes a NULL OS exception if the content of reg is NULL. ++ // If the accessed location is M[reg + offset] and the offset is known, provide the ++ // offset. No explicit code generation is needed if the offset is within a certain ++ // range (0 <= offset <= page_size). ++ ++ void null_check(Register reg, int offset = -1); ++ static bool needs_explicit_null_check(intptr_t offset); ++ ++ // Required platform-specific helpers for Label::patch_instructions. ++ // They _shadow_ the declarations in AbstractAssembler, which are undefined. ++ void pd_patch_instruction(address branch, address target); ++ ++ address emit_trampoline_stub(int insts_call_instruction_offset, address target); ++ ++ // Support for inc/dec with optimal instruction selection depending on value ++ void incrementl(Register reg, int value = 1); ++ void decrementl(Register reg, int value = 1); ++ ++ ++ // Alignment ++ void align(int modulus); ++ ++ ++ // Stack frame creation/removal ++ void enter(); ++ void leave(); ++ ++ // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) ++ // The pointer will be loaded into the thread register. ++ void get_thread(Register thread); ++ ++ ++ // Support for VM calls ++ // ++ // It is imperative that all calls into the VM are handled via the call_VM macros. ++ // They make sure that the stack linkage is setup correctly. call_VM's correspond ++ // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. ++ ++ ++ void call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ // Overloadings with last_Java_sp ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments = 0, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, bool ++ check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ void get_vm_result (Register oop_result, Register thread); ++ void get_vm_result_2(Register metadata_result, Register thread); ++ void call_VM_leaf(address entry_point, ++ int number_of_arguments = 0); ++ void call_VM_leaf(address entry_point, ++ Register arg_1); ++ void call_VM_leaf(address entry_point, ++ Register arg_1, Register arg_2); ++ void call_VM_leaf(address entry_point, ++ Register arg_1, Register arg_2, Register arg_3); ++ ++ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls ++ void super_call_VM_leaf(address entry_point); ++ void super_call_VM_leaf(address entry_point, Register arg_1); ++ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); ++ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); ++ ++ // last Java Frame (fills frame anchor) ++ void set_last_Java_frame(Register thread, ++ Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc); ++ ++ // thread in the default location (S6) ++ void set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc); ++ ++ void reset_last_Java_frame(Register thread, bool clear_fp); ++ ++ // thread in the default location (S6) ++ void reset_last_Java_frame(bool clear_fp); ++ ++ // jobjects ++ void clear_jweak_tag(Register possibly_jweak); ++ void resolve_jobject(Register value, Register thread, Register tmp); ++ ++ // C 'boolean' to Java boolean: x == 0 ? 0 : 1 ++ void c2bool(Register x); ++ ++ void resolve_oop_handle(Register result, Register tmp); ++ void load_mirror(Register dst, Register method, Register tmp); ++ ++ // oop manipulations ++ void load_klass(Register dst, Register src); ++ void store_klass(Register dst, Register src); ++ ++ void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, ++ Register tmp1, Register thread_tmp); ++ void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, ++ Register tmp1, Register tmp2); ++ ++ void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, ++ Register tmp2 = noreg, DecoratorSet decorators = 0); ++ ++ // Used for storing NULL. All other oop constants should be ++ // stored using routines that take a jobject. ++ void store_heap_oop_null(Address dst); ++ ++ void load_prototype_header(Register dst, Register src); ++ ++ void store_klass_gap(Register dst, Register src); ++ ++ void encode_heap_oop(Register r); ++ void encode_heap_oop(Register dst, Register src); ++ void decode_heap_oop(Register r); ++ void decode_heap_oop(Register dst, Register src); ++ void encode_heap_oop_not_null(Register r); ++ void decode_heap_oop_not_null(Register r); ++ void encode_heap_oop_not_null(Register dst, Register src); ++ void decode_heap_oop_not_null(Register dst, Register src); ++ ++ void encode_klass_not_null(Register r); ++ void decode_klass_not_null(Register r); ++ void encode_klass_not_null(Register dst, Register src); ++ void decode_klass_not_null(Register dst, Register src); ++ ++ // Returns the byte size of the instructions generated by decode_klass_not_null() ++ // when compressed klass pointers are being used. ++ static int instr_size_for_decode_klass_not_null(); ++ ++ // if heap base register is used - reinit it with the correct value ++ void reinit_heapbase(); ++ ++ DEBUG_ONLY(void verify_heapbase(const char* msg);) ++ ++ void set_narrow_klass(Register dst, Klass* k); ++ void set_narrow_oop(Register dst, jobject obj); ++ ++ ++ ++ ++ // Sign extension ++ void sign_extend_short(Register reg) { /*dsll32(reg, reg, 16); dsra32(reg, reg, 16);*/ seh(reg, reg); } ++ void sign_extend_byte(Register reg) { /*dsll32(reg, reg, 24); dsra32(reg, reg, 24);*/ seb(reg, reg); } ++ void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); ++ void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); ++ ++ // allocation ++ void eden_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ void tlab_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ void incr_allocated_bytes(Register thread, ++ Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1 = noreg); ++ // interface method calling ++ void lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_temp, ++ Label& no_such_interface, ++ bool return_method = true); ++ ++ // virtual method calling ++ void lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result); ++ ++ // Test sub_klass against super_klass, with fast and slow paths. ++ ++ // The fast path produces a tri-state answer: yes / no / maybe-slow. ++ // One of the three labels can be NULL, meaning take the fall-through. ++ // If super_check_offset is -1, the value is loaded up from super_klass. ++ // No registers are killed, except temp_reg. ++ void check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); ++ ++ // The rest of the type check; must be wired to a corresponding fast path. ++ // It does not repeat the fast path logic, so don't use it standalone. ++ // The temp_reg and temp2_reg can be noreg, if no temps are available. ++ // Updates the sub's secondary super cache as necessary. ++ // If set_cond_codes, condition codes will be Z on success, NZ on failure. ++ void check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Register temp2_reg, ++ Label* L_success, ++ Label* L_failure, ++ bool set_cond_codes = false); ++ ++ // Simplified, combined version, good for typical uses. ++ // Falls through on failure. ++ void check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label& L_success); ++ ++ ++ // Debugging ++ ++ // only if +VerifyOops ++ void verify_oop(Register reg, const char* s = "broken oop"); ++ void verify_oop_addr(Address addr, const char * s = "broken oop addr"); ++ void verify_oop_subroutine(); ++ // TODO: verify method and klass metadata (compare against vptr?) ++ void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} ++ void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} ++ ++ #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) ++ #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) ++ ++ // only if +VerifyFPU ++ void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); ++ ++ // prints msg, dumps registers and stops execution ++ void stop(const char* msg); ++ ++ // prints msg and continues ++ void warn(const char* msg); ++ ++ static void debug(char* msg/*, RegistersForDebugging* regs*/); ++ static void debug64(char* msg, int64_t pc, int64_t regs[]); ++ ++ void print_reg(Register reg); ++ void print_reg(FloatRegister reg); ++ ++ void untested() { stop("untested"); } ++ ++ void unimplemented(const char* what = ""); ++ ++ void should_not_reach_here() { stop("should not reach here"); } ++ ++ void print_CPU_state(); ++ ++ // Stack overflow checking ++ void bang_stack_with_offset(int offset) { ++ // stack grows down, caller passes positive offset ++ assert(offset > 0, "must bang with negative offset"); ++ if (offset <= 32768) { ++ sw(A0, SP, -offset); ++ } else { ++ li(AT, offset); ++ dsubu(AT, SP, AT); ++ sw(A0, AT, 0); ++ } ++ } ++ ++ // Writes to stack successive pages until offset reached to check for ++ // stack overflow + shadow pages. Also, clobbers tmp ++ void bang_stack_size(Register size, Register tmp); ++ ++ // Check for reserved stack access in method being exited (for JIT) ++ void reserved_stack_check(); ++ ++ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset); ++ ++ // Support for serializing memory accesses between threads ++ void serialize_memory(Register thread, Register tmp); ++ ++ void safepoint_poll(Label& slow_path, Register thread_reg); ++ void safepoint_poll_acquire(Label& slow_path, Register thread_reg); ++ ++ //void verify_tlab(); ++ void verify_tlab(Register t1, Register t2); ++ ++ // Biased locking support ++ // lock_reg and obj_reg must be loaded up with the appropriate values. ++ // tmp_reg is optional. If it is supplied (i.e., != noreg) it will ++ // be killed; if not supplied, push/pop will be used internally to ++ // allocate a temporary (inefficient, avoid if possible). ++ // Optional slow case is for implementations (interpreter and C1) which branch to ++ // slow case directly. Leaves condition codes set for C2's Fast_Lock node. ++ // Returns offset of first potentially-faulting instruction for null ++ // check info (currently consumed only by C1). If ++ // swap_reg_contains_mark is true then returns -1 as it is assumed ++ // the calling code has already passed any potential faults. ++ int biased_locking_enter(Register lock_reg, Register obj_reg, ++ Register swap_reg, Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, Label* slow_case = NULL, ++ BiasedLockingCounters* counters = NULL); ++ void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); ++#ifdef COMPILER2 ++ void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr); ++ void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr); ++#endif ++ ++ ++ // Arithmetics ++ // Regular vs. d* versions ++ inline void addu_long(Register rd, Register rs, Register rt) { ++ daddu(rd, rs, rt); ++ } ++ inline void addu_long(Register rd, Register rs, long imm32_64) { ++ daddiu(rd, rs, imm32_64); ++ } ++ ++ void round_to(Register reg, int modulus) { ++ assert_different_registers(reg, AT); ++ increment(reg, modulus - 1); ++ move(AT, - modulus); ++ andr(reg, reg, AT); ++ } ++ ++ // the follow two might use AT register, be sure you have no meanful data in AT before you call them ++ void increment(Register reg, int imm); ++ void decrement(Register reg, int imm); ++ ++ void shl(Register reg, int sa) { dsll(reg, reg, sa); } ++ void shr(Register reg, int sa) { dsrl(reg, reg, sa); } ++ void sar(Register reg, int sa) { dsra(reg, reg, sa); } ++ ++ // Helper functions for statistics gathering. ++ void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2); ++ ++ // Calls ++ void call(address entry); ++ void call(address entry, relocInfo::relocType rtype); ++ void call(address entry, RelocationHolder& rh); ++ ++ address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL); ++ ++ // Emit the CompiledIC call idiom ++ void ic_call(address entry, jint method_index = 0); ++ ++ // Jumps ++ void jmp(address entry); ++ void jmp(address entry, relocInfo::relocType rtype); ++ void jmp_far(Label& L); // always long jumps ++ ++ /* branches may exceed 16-bit offset */ ++ void b_far(address entry); ++ void b_far(Label& L); ++ ++ void bne_far (Register rs, Register rt, address entry); ++ void bne_far (Register rs, Register rt, Label& L); ++ ++ void beq_far (Register rs, Register rt, address entry); ++ void beq_far (Register rs, Register rt, Label& L); ++ ++ // For C2 to support long branches ++ void beq_long (Register rs, Register rt, Label& L); ++ void bne_long (Register rs, Register rt, Label& L); ++ void bc1t_long (Label& L); ++ void bc1f_long (Label& L); ++ ++ void patchable_call(address target); ++ void general_call(address target); ++ ++ void patchable_jump(address target); ++ void general_jump(address target); ++ ++ static int insts_for_patchable_call(address target); ++ static int insts_for_general_call(address target); ++ ++ static int insts_for_patchable_jump(address target); ++ static int insts_for_general_jump(address target); ++ ++ // Floating ++ // Data ++ ++ // Load and store values by size and signed-ness ++ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); ++ void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); ++ ++ // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs ++ inline void ld_ptr(Register rt, Address a) { ++ ld(rt, a); ++ } ++ ++ inline void ld_ptr(Register rt, Register base, int offset16) { ++ ld(rt, base, offset16); ++ } ++ ++ // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs ++ inline void st_ptr(Register rt, Address a) { ++ sd(rt, a); ++ } ++ ++ inline void st_ptr(Register rt, Register base, int offset16) { ++ sd(rt, base, offset16); ++ } ++ ++ void ld_ptr(Register rt, Register base, Register offset); ++ void st_ptr(Register rt, Register base, Register offset); ++ ++ // swap the two byte of the low 16-bit halfword ++ // this directive will use AT, be sure the high 16-bit of reg is zero ++ void hswap(Register reg); ++ void huswap(Register reg); ++ ++ // convert big endian integer to little endian integer ++ void swap(Register reg); ++ ++ // implement the x86 instruction semantic ++ // if c_reg == *dest then *dest <= x_reg ++ // else c_reg <= *dest ++ // the AT indicate if xchg occurred, 1 for xchged, else 0 ++ void cmpxchg(Address addr, Register oldval, Register newval, Register resflag, ++ bool retold, bool barrier); ++ void cmpxchg(Address addr, Register oldval, Register newval, Register tmp, ++ bool retold, bool barrier, Label& succ, Label* fail = NULL); ++ void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag, ++ bool sign, bool retold, bool barrier); ++ void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, ++ bool sign, bool retold, bool barrier, Label& succ, Label* fail = NULL); ++ void cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi); ++ ++ //pop & push ++ void extend_sign(Register rh, Register rl) { stop("extend_sign"); } ++ void neg(Register reg) { dsubu(reg, R0, reg); } ++ void push (Register reg) { daddiu(SP, SP, -8); sd (reg, SP, 0); } ++ void push (FloatRegister reg) { daddiu(SP, SP, -8); sdc1(reg, SP, 0); } ++ void pop (Register reg) { ld (reg, SP, 0); daddiu(SP, SP, 8); } ++ void pop (FloatRegister reg) { ldc1(reg, SP, 0); daddiu(SP, SP, 8); } ++ void pop () { daddiu(SP, SP, 8); } ++ void pop2 () { daddiu(SP, SP, 16); } ++ void push2(Register reg1, Register reg2); ++ void pop2 (Register reg1, Register reg2); ++ void dpush (Register reg) { daddiu(SP, SP, -8); sd (reg, SP, 0); } ++ void dpop (Register reg) { ld (reg, SP, 0); daddiu(SP, SP, 8); } ++ //we need 2 fun to save and resotre general register ++ void pushad(); ++ void popad(); ++ void pushad_except_v0(); ++ void popad_except_v0(); ++ ++ //move an 32-bit immediate to Register ++ void move(Register reg, int imm32) { li32(reg, imm32); } ++ void li (Register rd, long imm); ++ void li (Register rd, address addr) { li(rd, (long)addr); } ++ //replace move(Register reg, int imm) ++ void li32(Register rd, int imm32); // sign-extends to 64 bits on mips64 ++ void set64(Register d, jlong value); ++ static int insts_for_set64(jlong value); ++ ++ void patchable_set48(Register d, jlong value); ++ void patchable_set32(Register d, jlong value); ++ ++ void patchable_call32(Register d, jlong value); ++ ++ static int call_size(address target, bool far, bool patchable); ++ ++ static bool reachable_from_cache(address target); ++ static bool reachable_from_cache(); ++ ++ ++ void dli(Register rd, long imm) { li(rd, imm); } ++ void li64(Register rd, long imm); ++ void li48(Register rd, long imm); ++ ++ void move(Register rd, Register rs) { daddu(rd, rs, R0); } ++ void move_u32(Register rd, Register rs) { addu32(rd, rs, R0); } ++ void dmove(Register rd, Register rs) { daddu(rd, rs, R0); } ++ void mov_metadata(Register dst, Metadata* obj); ++ void mov_metadata(Address dst, Metadata* obj); ++ ++ void store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide); ++ void store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type); ++ void store_for_type(Register src_reg, Address addr, BasicType type = T_INT, bool wide = false); ++ void store_for_type(FloatRegister src_reg, Address addr, BasicType type = T_INT); ++ void load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide); ++ void load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type); ++ int load_for_type(Register dst_reg, Address addr, BasicType type = T_INT, bool wide = false); ++ int load_for_type(FloatRegister dst_reg, Address addr, BasicType type = T_INT); ++ ++#ifndef PRODUCT ++ static void pd_print_patched_instruction(address branch) { ++ jint stub_inst = *(jint*) branch; ++ print_instruction(stub_inst); ++ ::tty->print("%s", " (unresolved)"); ++ ++ } ++#endif ++ ++ //FIXME ++ void empty_FPU_stack(){/*need implemented*/}; ++ ++#ifdef COMPILER2 ++ // Compare strings. ++ void string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ int ae); ++ ++ // Compare char[] or byte[] arrays. ++ void arrays_equals(Register str1, Register str2, ++ Register cnt, Register tmp, Register result, ++ bool is_char); ++#endif ++ ++ // method handles (JSR 292) ++ Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); ++ ++ // Conditional move ++ void cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src, ++ CMCompare cmp = EQ, ++ bool is_signed = true); ++ void cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ Register dst, ++ Register src, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ void cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ void cmp_cmov(Register op1, ++ Register op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ ++#undef VIRTUAL ++ ++public: ++ ++// Memory Data Type ++#define INT_TYPE 0x100 ++#define FLOAT_TYPE 0x200 ++#define SIGNED_TYPE 0x10 ++#define UNSIGNED_TYPE 0x20 ++ ++ typedef enum { ++ LOAD_BYTE = INT_TYPE | SIGNED_TYPE | 0x1, ++ LOAD_CHAR = INT_TYPE | SIGNED_TYPE | 0x2, ++ LOAD_SHORT = INT_TYPE | SIGNED_TYPE | 0x3, ++ LOAD_INT = INT_TYPE | SIGNED_TYPE | 0x4, ++ LOAD_LONG = INT_TYPE | SIGNED_TYPE | 0x5, ++ STORE_BYTE = INT_TYPE | SIGNED_TYPE | 0x6, ++ STORE_CHAR = INT_TYPE | SIGNED_TYPE | 0x7, ++ STORE_SHORT = INT_TYPE | SIGNED_TYPE | 0x8, ++ STORE_INT = INT_TYPE | SIGNED_TYPE | 0x9, ++ STORE_LONG = INT_TYPE | SIGNED_TYPE | 0xa, ++ LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb, ++ ++ LOAD_U_BYTE = INT_TYPE | UNSIGNED_TYPE | 0x1, ++ LOAD_U_SHORT = INT_TYPE | UNSIGNED_TYPE | 0x2, ++ LOAD_U_INT = INT_TYPE | UNSIGNED_TYPE | 0x3, ++ ++ LOAD_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x1, ++ LOAD_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x2, ++ STORE_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x3, ++ STORE_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x4 ++ } CMLoadStoreDataType; ++ ++ void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) { ++ assert((type & INT_TYPE), "must be General reg type"); ++ loadstore_t(reg, base, index, scale, disp, type); ++ } ++ ++ void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) { ++ assert((type & FLOAT_TYPE), "must be Float reg type"); ++ loadstore_t(reg, base, index, scale, disp, type); ++ } ++ ++private: ++ ++ template ++ void loadstore_t(T reg, int base, int index, int scale, int disp, int type) { ++ if (index != 0) { ++ if (Assembler::is_simm16(disp)) { ++ if (UseLEXT1 && (type & SIGNED_TYPE) && Assembler::is_simm(disp, 8)) { ++ if (scale == 0) { ++ gs_loadstore(reg, as_Register(base), as_Register(index), disp, type); ++ } else { ++ dsll(AT, as_Register(index), scale); ++ gs_loadstore(reg, as_Register(base), AT, disp, type); ++ } ++ } else { ++ if (scale == 0) { ++ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ dsll(AT, as_Register(index), scale); ++ addu(AT, as_Register(base), AT); ++ } ++ loadstore(reg, AT, disp, type); ++ } ++ } else { ++ if (scale == 0) { ++ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ dsll(AT, as_Register(index), scale); ++ addu(AT, as_Register(base), AT); ++ } ++ move(RT9, disp); ++ if (UseLEXT1 && (type & SIGNED_TYPE)) { ++ gs_loadstore(reg, AT, RT9, 0, type); ++ } else { ++ addu(AT, AT, RT9); ++ loadstore(reg, AT, 0, type); ++ } ++ } ++ } else { ++ if (Assembler::is_simm16(disp)) { ++ loadstore(reg, as_Register(base), disp, type); ++ } else { ++ move(RT9, disp); ++ if (UseLEXT1 && (type & SIGNED_TYPE)) { ++ gs_loadstore(reg, as_Register(base), RT9, 0, type); ++ } else { ++ addu(AT, as_Register(base), RT9); ++ loadstore(reg, AT, 0, type); ++ } ++ } ++ } ++ } ++ void loadstore(Register reg, Register base, int disp, int type); ++ void loadstore(FloatRegister reg, Register base, int disp, int type); ++ void gs_loadstore(Register reg, Register base, Register index, int disp, int type); ++ void gs_loadstore(FloatRegister reg, Register base, Register index, int disp, int type); ++}; ++ ++/** ++ * class SkipIfEqual: ++ * ++ * Instantiating this class will result in assembly code being output that will ++ * jump around any code emitted between the creation of the instance and it's ++ * automatic destruction at the end of a scope block, depending on the value of ++ * the flag passed to the constructor, which will be checked at run-time. ++ */ ++class SkipIfEqual { ++private: ++ MacroAssembler* _masm; ++ Label _label; ++ ++public: ++ inline SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) ++ : _masm(masm) { ++ _masm->li(AT, (address)flag_addr); ++ _masm->lb(AT, AT, 0); ++ if (value) { ++ _masm->bne(AT, R0, _label); ++ } else { ++ _masm->beq(AT, R0, _label); ++ } ++ _masm->delayed()->nop(); ++ } ++ ++ ~SkipIfEqual(); ++}; ++ ++#ifdef ASSERT ++inline bool AbstractAssembler::pd_check_instruction_mark() { return true; } ++#endif ++ ++ ++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp b/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp +new file mode 100644 +index 0000000000..92c05fb726 +--- /dev/null ++++ b/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp +@@ -0,0 +1,34 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2017, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/codeBuffer.hpp" ++#include "code/codeCache.hpp" ++ ++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP +diff --git a/src/hotspot/cpu/mips/methodHandles_mips.cpp b/src/hotspot/cpu/mips/methodHandles_mips.cpp +new file mode 100644 +index 0000000000..e9788ac52c +--- /dev/null ++++ b/src/hotspot/cpu/mips/methodHandles_mips.cpp +@@ -0,0 +1,576 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "classfile/javaClasses.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "utilities/preserveException.hpp" ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) // nothing ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#define STOP(error) block_comment(error); __ stop(error) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { ++ if (VerifyMethodHandles) ++ verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), ++ "MH argument is a Class"); ++ __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); ++} ++ ++#ifdef ASSERT ++static int check_nonzero(const char* xname, int x) { ++ assert(x != 0, "%s should be nonzero", xname); ++ return x; ++} ++#define NONZERO(x) check_nonzero(#x, x) ++#else //ASSERT ++#define NONZERO(x) (x) ++#endif //ASSERT ++ ++#ifdef ASSERT ++void MethodHandles::verify_klass(MacroAssembler* _masm, ++ Register obj, SystemDictionary::WKID klass_id, ++ const char* error_message) { ++} ++ ++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { ++ Label L; ++ BLOCK_COMMENT("verify_ref_kind {"); ++ __ lw(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes()))); ++ __ sra(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT); ++ __ move(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK); ++ __ andr(temp, temp, AT); ++ __ move(AT, ref_kind); ++ __ beq(temp, AT, L); ++ __ delayed()->nop(); ++ { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal); ++ jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind); ++ if (ref_kind == JVM_REF_invokeVirtual || ++ ref_kind == JVM_REF_invokeSpecial) ++ // could do this for all ref_kinds, but would explode assembly code size ++ trace_method_handle(_masm, buf); ++ __ STOP(buf); ++ } ++ BLOCK_COMMENT("} verify_ref_kind"); ++ __ bind(L); ++} ++ ++#endif //ASSERT ++ ++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry) { ++ assert(method == Rmethod, "interpreter calling convention"); ++ ++ Label L_no_such_method; ++ __ beq(method, R0, L_no_such_method); ++ __ delayed()->nop(); ++ ++ __ verify_method_ptr(method); ++ ++ if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++ Register rthread = TREG; ++ // interp_only is an int, on little endian it is sufficient to test the byte only ++ // Is a cmpl faster? ++ __ lbu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset())); ++ __ beq(AT, R0, run_compiled_code); ++ __ delayed()->nop(); ++ __ ld(T9, method, in_bytes(Method::interpreter_entry_offset())); ++ __ jr(T9); ++ __ delayed()->nop(); ++ __ BIND(run_compiled_code); ++ } ++ ++ const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : ++ Method::from_interpreted_offset(); ++ __ ld(T9, method, in_bytes(entry_offset)); ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ __ bind(L_no_such_method); ++ address wrong_method = StubRoutines::throw_AbstractMethodError_entry(); ++ __ jmp(wrong_method, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++} ++ ++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry) { ++ BLOCK_COMMENT("jump_to_lambda_form {"); ++ // This is the initial entry point of a lazy method handle. ++ // After type checking, it picks up the invoker from the LambdaForm. ++ assert_different_registers(recv, method_temp, temp2); ++ assert(recv != noreg, "required register"); ++ assert(method_temp == Rmethod, "required register for loading method"); ++ ++ //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); }); ++ ++ // Load the invoker, as MH -> MH.form -> LF.vmentry ++ __ verify_oop(recv); ++ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()))); ++ __ verify_oop(method_temp); ++ __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg); ++ ++ if (VerifyMethodHandles && !for_compiler_entry) { ++ // make sure recv is already on stack ++ __ ld(temp2, Address(method_temp, Method::const_offset())); ++ __ load_sized_value(temp2, ++ Address(temp2, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), false); ++ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); ++ Label L; ++ Address recv_addr = __ argument_address(temp2, -1); ++ __ ld(AT, recv_addr); ++ __ beq(recv, AT, L); ++ __ delayed()->nop(); ++ ++ recv_addr = __ argument_address(temp2, -1); ++ __ ld(V0, recv_addr); ++ __ STOP("receiver not on stack"); ++ __ BIND(L); ++ } ++ ++ jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); ++ BLOCK_COMMENT("} jump_to_lambda_form"); ++} ++ ++ ++// Code generation ++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, ++ vmIntrinsics::ID iid) { ++ const bool not_for_compiler_entry = false; // this is the interpreter entry ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ if (iid == vmIntrinsics::_invokeGeneric || ++ iid == vmIntrinsics::_compiledLambdaForm) { ++ // Perhaps surprisingly, the symbolic references visible to Java are not directly used. ++ // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. ++ // They all allow an appendix argument. ++ __ stop("empty stubs make SG sick"); ++ return NULL; ++ } ++ ++ // Rmethod: Method* ++ // T9: argument locator (parameter slot count, added to sp) ++ // S7: used as temp to hold mh or receiver ++ Register t9_argp = T9; // argument list ptr, live on error paths ++ Register s7_mh = S7; // MH receiver; dies quickly and is recycled ++ Register rm_method = Rmethod; // eventual target of this invocation ++ ++ // here's where control starts out: ++ __ align(CodeEntryAlignment); ++ address entry_point = __ pc(); ++ ++ if (VerifyMethodHandles) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++ ++ Label L; ++ BLOCK_COMMENT("verify_intrinsic_id {"); ++ __ lhu(AT, rm_method, Method::intrinsic_id_offset_in_bytes()); ++ guarantee(Assembler::is_simm16(iid), "Oops, iid is not simm16! Change the instructions."); ++ __ addiu(AT, AT, -1 * (int) iid); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ if (iid == vmIntrinsics::_linkToVirtual || ++ iid == vmIntrinsics::_linkToSpecial) { ++ // could do this for all kinds, but would explode assembly code size ++ trace_method_handle(_masm, "bad Method*::intrinsic_id"); ++ } ++ __ STOP("bad Method*::intrinsic_id"); ++ __ bind(L); ++ BLOCK_COMMENT("} verify_intrinsic_id"); ++ } ++ ++ // First task: Find out how big the argument list is. ++ Address t9_first_arg_addr; ++ int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); ++ assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); ++ if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ __ ld(t9_argp, Address(rm_method, Method::const_offset())); ++ __ load_sized_value(t9_argp, ++ Address(t9_argp, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), false); ++ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); ++ t9_first_arg_addr = __ argument_address(t9_argp, -1); ++ } else { ++ DEBUG_ONLY(t9_argp = noreg); ++ } ++ ++ if (!is_signature_polymorphic_static(iid)) { ++ __ ld(s7_mh, t9_first_arg_addr); ++ DEBUG_ONLY(t9_argp = noreg); ++ } ++ ++ // t9_first_arg_addr is live! ++ ++ trace_method_handle_interpreter_entry(_masm, iid); ++ ++ if (iid == vmIntrinsics::_invokeBasic) { ++ generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry); ++ ++ } else { ++ // Adjust argument list by popping the trailing MemberName argument. ++ Register r_recv = noreg; ++ if (MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. ++ __ ld(r_recv = T2, t9_first_arg_addr); ++ } ++ DEBUG_ONLY(t9_argp = noreg); ++ Register rm_member = rm_method; // MemberName ptr; incoming method ptr is dead now ++ __ pop(rm_member); // extract last argument ++ generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry); ++ } ++ ++ return entry_point; ++} ++ ++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, ++ vmIntrinsics::ID iid, ++ Register receiver_reg, ++ Register member_reg, ++ bool for_compiler_entry) { ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ Register rm_method = Rmethod; // eventual target of this invocation ++ // temps used in this code are not used in *either* compiled or interpreted calling sequences ++ Register j_rarg0 = T0; ++ Register j_rarg1 = A0; ++ Register j_rarg2 = A1; ++ Register j_rarg3 = A2; ++ Register j_rarg4 = A3; ++ Register j_rarg5 = A4; ++ ++ Register temp1 = T8; ++ Register temp2 = T9; ++ Register temp3 = V0; ++ if (for_compiler_entry) { ++ assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); ++ assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ } ++ else { ++ assert_different_registers(temp1, temp2, temp3, saved_last_sp_register()); // don't trash lastSP ++ } ++ assert_different_registers(temp1, temp2, temp3, receiver_reg); ++ assert_different_registers(temp1, temp2, temp3, member_reg); ++ ++ if (iid == vmIntrinsics::_invokeBasic) { ++ // indirect through MH.form.vmentry.vmtarget ++ jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry); ++ ++ } else { ++ // The method is a member invoker used by direct method handles. ++ if (VerifyMethodHandles) { ++ // make sure the trailing argument really is a MemberName (caller responsibility) ++ verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), ++ "MemberName required for invokeVirtual etc."); ++ } ++ ++ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); ++ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); ++ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())); ++ Address vmtarget_method( rm_method, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())); ++ ++ Register temp1_recv_klass = temp1; ++ if (iid != vmIntrinsics::_linkToStatic) { ++ __ verify_oop(receiver_reg); ++ if (iid == vmIntrinsics::_linkToSpecial) { ++ // Don't actually load the klass; just null-check the receiver. ++ __ null_check(receiver_reg); ++ } else { ++ // load receiver klass itself ++ __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ BLOCK_COMMENT("check_receiver {"); ++ // The receiver for the MemberName must be in receiver_reg. ++ // Check the receiver against the MemberName.clazz ++ if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { ++ // Did not load it above... ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { ++ Label L_ok; ++ Register temp2_defc = temp2; ++ __ load_heap_oop(temp2_defc, member_clazz, temp3); ++ load_klass_from_Class(_masm, temp2_defc); ++ __ verify_klass_ptr(temp2_defc); ++ __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); ++ // If we get here, the type check failed! ++ __ STOP("receiver class disagrees with MemberName.clazz"); ++ __ bind(L_ok); ++ } ++ BLOCK_COMMENT("} check_receiver"); ++ } ++ if (iid == vmIntrinsics::_linkToSpecial || ++ iid == vmIntrinsics::_linkToStatic) { ++ DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass ++ } ++ ++ // Live registers at this point: ++ // member_reg - MemberName that was the trailing argument ++ // temp1_recv_klass - klass of stacked receiver, if needed ++ ++ Label L_incompatible_class_change_error; ++ switch (iid) { ++ case vmIntrinsics::_linkToSpecial: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); ++ } ++ __ load_heap_oop(rm_method, member_vmtarget); ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg); ++ break; ++ ++ case vmIntrinsics::_linkToStatic: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); ++ } ++ __ load_heap_oop(rm_method, member_vmtarget); ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg); ++ break; ++ ++ case vmIntrinsics::_linkToVirtual: ++ { ++ // same as TemplateTable::invokevirtual, ++ // minus the CP setup and profiling: ++ ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); ++ } ++ ++ // pick out the vtable index from the MemberName, and then we can discard it: ++ Register temp2_index = temp2; ++ __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg); ++ if (VerifyMethodHandles) { ++ Label L_index_ok; ++ __ slt(AT, R0, temp2_index); ++ __ bne(AT, R0, L_index_ok); ++ __ delayed()->nop(); ++ __ STOP("no virtual index"); ++ __ BIND(L_index_ok); ++ } ++ ++ // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget ++ // at this point. And VerifyMethodHandles has already checked clazz, if needed. ++ ++ // get target Method* & entry point ++ __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method); ++ break; ++ } ++ ++ case vmIntrinsics::_linkToInterface: ++ { ++ // same as TemplateTable::invokeinterface ++ // (minus the CP setup and profiling, with different argument motion) ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); ++ } ++ ++ Register temp3_intf = temp3; ++ __ load_heap_oop(temp3_intf, member_clazz); ++ load_klass_from_Class(_masm, temp3_intf); ++ __ verify_klass_ptr(temp3_intf); ++ ++ Register rm_index = rm_method; ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_index, member_vmindex, noreg, noreg); ++ if (VerifyMethodHandles) { ++ Label L; ++ __ slt(AT, rm_index, R0); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ STOP("invalid vtable index for MH.invokeInterface"); ++ __ bind(L); ++ } ++ ++ // given intf, index, and recv klass, dispatch to the implementation method ++ __ lookup_interface_method(temp1_recv_klass, temp3_intf, ++ // note: next two args must be the same: ++ rm_index, rm_method, ++ temp2, ++ L_incompatible_class_change_error); ++ break; ++ } ++ ++ default: ++ fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); ++ break; ++ } ++ ++ // Live at this point: ++ // rm_method ++ ++ // After figuring out which concrete method to call, jump into it. ++ // Note that this works in the interpreter with no data motion. ++ // But the compiled version will require that r_recv be shifted out. ++ __ verify_method_ptr(rm_method); ++ jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry); ++ ++ if (iid == vmIntrinsics::_linkToInterface) { ++ __ bind(L_incompatible_class_change_error); ++ address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry(); ++ __ jmp(icce_entry, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } ++ } ++} ++ ++#ifndef PRODUCT ++void trace_method_handle_stub(const char* adaptername, ++ oop mh, ++ intptr_t* saved_regs, ++ intptr_t* entry_sp) { ++ // called as a leaf from native code: do not block the JVM! ++ bool has_mh = (strstr(adaptername, "/static") == NULL && ++ strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH ++ const char* mh_reg_name = has_mh ? "s7_mh" : "s7"; ++ tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT, ++ adaptername, mh_reg_name, ++ p2i(mh), p2i(entry_sp)); ++ ++ if (Verbose) { ++ tty->print_cr("Registers:"); ++ const int saved_regs_count = RegisterImpl::number_of_registers; ++ for (int i = 0; i < saved_regs_count; i++) { ++ Register r = as_Register(i); ++ // The registers are stored in reverse order on the stack (by pusha). ++ tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]); ++ if ((i + 1) % 4 == 0) { ++ tty->cr(); ++ } else { ++ tty->print(", "); ++ } ++ } ++ tty->cr(); ++ ++ { ++ // dumping last frame with frame::describe ++ ++ JavaThread* p = JavaThread::active(); ++ ++ ResourceMark rm; ++ PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here ++ FrameValues values; ++ ++ // Note: We want to allow trace_method_handle from any call site. ++ // While trace_method_handle creates a frame, it may be entered ++ // without a PC on the stack top (e.g. not just after a call). ++ // Walking that frame could lead to failures due to that invalid PC. ++ // => carefully detect that frame when doing the stack walking ++ ++ // Current C frame ++ frame cur_frame = os::current_frame(); ++ ++ // Robust search of trace_calling_frame (independant of inlining). ++ // Assumes saved_regs comes from a pusha in the trace_calling_frame. ++ assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?"); ++ frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame); ++ while (trace_calling_frame.fp() < saved_regs) { ++ trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame); ++ } ++ ++ // safely create a frame and call frame::describe ++ intptr_t *dump_sp = trace_calling_frame.sender_sp(); ++ intptr_t *dump_fp = trace_calling_frame.link(); ++ ++ bool walkable = has_mh; // whether the traced frame shoud be walkable ++ ++ if (walkable) { ++ // The previous definition of walkable may have to be refined ++ // if new call sites cause the next frame constructor to start ++ // failing. Alternatively, frame constructors could be ++ // modified to support the current or future non walkable ++ // frames (but this is more intrusive and is not considered as ++ // part of this RFE, which will instead use a simpler output). ++ frame dump_frame = frame(dump_sp, dump_fp); ++ dump_frame.describe(values, 1); ++ } else { ++ // Stack may not be walkable (invalid PC above FP): ++ // Add descriptions without building a Java frame to avoid issues ++ values.describe(-1, dump_fp, "fp for #1 "); ++ values.describe(-1, dump_sp, "sp for #1"); ++ } ++ values.describe(-1, entry_sp, "raw top of stack"); ++ ++ tty->print_cr("Stack layout:"); ++ values.print(p); ++ } ++ if (has_mh && oopDesc::is_oop(mh)) { ++ mh->print(); ++ if (java_lang_invoke_MethodHandle::is_instance(mh)) { ++ if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0) ++ java_lang_invoke_MethodHandle::form(mh)->print(); ++ } ++ } ++ } ++} ++ ++// The stub wraps the arguments in a struct on the stack to avoid ++// dealing with the different calling conventions for passing 6 ++// arguments. ++struct MethodHandleStubArguments { ++ const char* adaptername; ++ oopDesc* mh; ++ intptr_t* saved_regs; ++ intptr_t* entry_sp; ++}; ++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { ++ trace_method_handle_stub(args->adaptername, ++ args->mh, ++ args->saved_regs, ++ args->entry_sp); ++} ++ ++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { ++} ++#endif //PRODUCT +diff --git a/src/hotspot/cpu/mips/methodHandles_mips.hpp b/src/hotspot/cpu/mips/methodHandles_mips.hpp +new file mode 100644 +index 0000000000..03b65fc8ef +--- /dev/null ++++ b/src/hotspot/cpu/mips/methodHandles_mips.hpp +@@ -0,0 +1,62 @@ ++/* ++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// Platform-specific definitions for method handles. ++// These definitions are inlined into class MethodHandles. ++ ++// Adapters ++enum /* platform_dependent_constants */ { ++ adapter_code_size = 32000 DEBUG_ONLY(+ 150000) ++}; ++ ++// Additional helper methods for MethodHandles code generation: ++public: ++ static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); ++ ++ static void verify_klass(MacroAssembler* _masm, ++ Register obj, SystemDictionary::WKID klass_id, ++ const char* error_message = "wrong klass") NOT_DEBUG_RETURN; ++ ++ static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { ++ verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), ++ "reference is a MH"); ++ } ++ ++ static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; ++ ++ // Similar to InterpreterMacroAssembler::jump_from_interpreted. ++ // Takes care of special dispatch from single stepping too. ++ static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry); ++ ++ static void jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry); ++ ++ static Register saved_last_sp_register() { ++ // Should be in sharedRuntime, not here. ++ return I29; ++ } +diff --git a/src/hotspot/cpu/mips/mips.ad b/src/hotspot/cpu/mips/mips.ad +new file mode 100644 +index 0000000000..3563bbe0e5 +--- /dev/null ++++ b/src/hotspot/cpu/mips/mips.ad +@@ -0,0 +1,25 @@ ++// ++// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ +diff --git a/src/hotspot/cpu/mips/mips_64.ad b/src/hotspot/cpu/mips/mips_64.ad +new file mode 100644 +index 0000000000..b4acbd83f7 +--- /dev/null ++++ b/src/hotspot/cpu/mips/mips_64.ad +@@ -0,0 +1,12243 @@ ++// ++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++// GodSon3 Architecture Description File ++ ++//----------REGISTER DEFINITION BLOCK------------------------------------------ ++// This information is used by the matcher and the register allocator to ++// describe individual registers and classes of registers within the target ++// archtecture. ++ ++// format: ++// reg_def name (call convention, c-call convention, ideal type, encoding); ++// call convention : ++// NS = No-Save ++// SOC = Save-On-Call ++// SOE = Save-On-Entry ++// AS = Always-Save ++// ideal type : ++// see opto/opcodes.hpp for more info ++// reg_class name (reg, ...); ++// alloc_class name (reg, ...); ++register %{ ++ ++// General Registers ++// Integer Registers ++ reg_def R0 ( NS, NS, Op_RegI, 0, VMRegImpl::Bad()); ++ reg_def AT ( NS, NS, Op_RegI, 1, AT->as_VMReg()); ++ reg_def AT_H ( NS, NS, Op_RegI, 1, AT->as_VMReg()->next()); ++ reg_def V0 (SOC, SOC, Op_RegI, 2, V0->as_VMReg()); ++ reg_def V0_H (SOC, SOC, Op_RegI, 2, V0->as_VMReg()->next()); ++ reg_def V1 (SOC, SOC, Op_RegI, 3, V1->as_VMReg()); ++ reg_def V1_H (SOC, SOC, Op_RegI, 3, V1->as_VMReg()->next()); ++ reg_def A0 (SOC, SOC, Op_RegI, 4, A0->as_VMReg()); ++ reg_def A0_H (SOC, SOC, Op_RegI, 4, A0->as_VMReg()->next()); ++ reg_def A1 (SOC, SOC, Op_RegI, 5, A1->as_VMReg()); ++ reg_def A1_H (SOC, SOC, Op_RegI, 5, A1->as_VMReg()->next()); ++ reg_def A2 (SOC, SOC, Op_RegI, 6, A2->as_VMReg()); ++ reg_def A2_H (SOC, SOC, Op_RegI, 6, A2->as_VMReg()->next()); ++ reg_def A3 (SOC, SOC, Op_RegI, 7, A3->as_VMReg()); ++ reg_def A3_H (SOC, SOC, Op_RegI, 7, A3->as_VMReg()->next()); ++ reg_def A4 (SOC, SOC, Op_RegI, 8, A4->as_VMReg()); ++ reg_def A4_H (SOC, SOC, Op_RegI, 8, A4->as_VMReg()->next()); ++ reg_def A5 (SOC, SOC, Op_RegI, 9, A5->as_VMReg()); ++ reg_def A5_H (SOC, SOC, Op_RegI, 9, A5->as_VMReg()->next()); ++ reg_def A6 (SOC, SOC, Op_RegI, 10, A6->as_VMReg()); ++ reg_def A6_H (SOC, SOC, Op_RegI, 10, A6->as_VMReg()->next()); ++ reg_def A7 (SOC, SOC, Op_RegI, 11, A7->as_VMReg()); ++ reg_def A7_H (SOC, SOC, Op_RegI, 11, A7->as_VMReg()->next()); ++ reg_def T0 (SOC, SOC, Op_RegI, 12, T0->as_VMReg()); ++ reg_def T0_H (SOC, SOC, Op_RegI, 12, T0->as_VMReg()->next()); ++ reg_def T1 (SOC, SOC, Op_RegI, 13, T1->as_VMReg()); ++ reg_def T1_H (SOC, SOC, Op_RegI, 13, T1->as_VMReg()->next()); ++ reg_def T2 (SOC, SOC, Op_RegI, 14, T2->as_VMReg()); ++ reg_def T2_H (SOC, SOC, Op_RegI, 14, T2->as_VMReg()->next()); ++ reg_def T3 (SOC, SOC, Op_RegI, 15, T3->as_VMReg()); ++ reg_def T3_H (SOC, SOC, Op_RegI, 15, T3->as_VMReg()->next()); ++ reg_def S0 (SOC, SOE, Op_RegI, 16, S0->as_VMReg()); ++ reg_def S0_H (SOC, SOE, Op_RegI, 16, S0->as_VMReg()->next()); ++ reg_def S1 (SOC, SOE, Op_RegI, 17, S1->as_VMReg()); ++ reg_def S1_H (SOC, SOE, Op_RegI, 17, S1->as_VMReg()->next()); ++ reg_def S2 (SOC, SOE, Op_RegI, 18, S2->as_VMReg()); ++ reg_def S2_H (SOC, SOE, Op_RegI, 18, S2->as_VMReg()->next()); ++ reg_def S3 (SOC, SOE, Op_RegI, 19, S3->as_VMReg()); ++ reg_def S3_H (SOC, SOE, Op_RegI, 19, S3->as_VMReg()->next()); ++ reg_def S4 (SOC, SOE, Op_RegI, 20, S4->as_VMReg()); ++ reg_def S4_H (SOC, SOE, Op_RegI, 20, S4->as_VMReg()->next()); ++ reg_def S5 (SOC, SOE, Op_RegI, 21, S5->as_VMReg()); ++ reg_def S5_H (SOC, SOE, Op_RegI, 21, S5->as_VMReg()->next()); ++ reg_def S6 (SOC, SOE, Op_RegI, 22, S6->as_VMReg()); ++ reg_def S6_H (SOC, SOE, Op_RegI, 22, S6->as_VMReg()->next()); ++ reg_def S7 (SOC, SOE, Op_RegI, 23, S7->as_VMReg()); ++ reg_def S7_H (SOC, SOE, Op_RegI, 23, S7->as_VMReg()->next()); ++ reg_def T8 (SOC, SOC, Op_RegI, 24, T8->as_VMReg()); ++ reg_def T8_H (SOC, SOC, Op_RegI, 24, T8->as_VMReg()->next()); ++ reg_def T9 (SOC, SOC, Op_RegI, 25, T9->as_VMReg()); ++ reg_def T9_H (SOC, SOC, Op_RegI, 25, T9->as_VMReg()->next()); ++ ++// Special Registers ++ reg_def K0 ( NS, NS, Op_RegI, 26, K0->as_VMReg()); ++ reg_def K1 ( NS, NS, Op_RegI, 27, K1->as_VMReg()); ++ reg_def GP ( NS, NS, Op_RegI, 28, GP->as_VMReg()); ++ reg_def GP_H ( NS, NS, Op_RegI, 28, GP->as_VMReg()->next()); ++ reg_def SP ( NS, NS, Op_RegI, 29, SP->as_VMReg()); ++ reg_def SP_H ( NS, NS, Op_RegI, 29, SP->as_VMReg()->next()); ++ reg_def FP ( NS, NS, Op_RegI, 30, FP->as_VMReg()); ++ reg_def FP_H ( NS, NS, Op_RegI, 30, FP->as_VMReg()->next()); ++ reg_def RA ( NS, NS, Op_RegI, 31, RA->as_VMReg()); ++ reg_def RA_H ( NS, NS, Op_RegI, 31, RA->as_VMReg()->next()); ++ ++// Floating registers. ++reg_def F0 ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()); ++reg_def F0_H ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next()); ++reg_def F1 ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()); ++reg_def F1_H ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next()); ++reg_def F2 ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()); ++reg_def F2_H ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next()); ++reg_def F3 ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()); ++reg_def F3_H ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next()); ++reg_def F4 ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()); ++reg_def F4_H ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next()); ++reg_def F5 ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()); ++reg_def F5_H ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next()); ++reg_def F6 ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()); ++reg_def F6_H ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next()); ++reg_def F7 ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()); ++reg_def F7_H ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next()); ++reg_def F8 ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()); ++reg_def F8_H ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next()); ++reg_def F9 ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()); ++reg_def F9_H ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next()); ++reg_def F10 ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()); ++reg_def F10_H ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next()); ++reg_def F11 ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()); ++reg_def F11_H ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next()); ++reg_def F12 ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()); ++reg_def F12_H ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next()); ++reg_def F13 ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()); ++reg_def F13_H ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next()); ++reg_def F14 ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()); ++reg_def F14_H ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next()); ++reg_def F15 ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()); ++reg_def F15_H ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next()); ++reg_def F16 ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()); ++reg_def F16_H ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next()); ++reg_def F17 ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()); ++reg_def F17_H ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next()); ++reg_def F18 ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()); ++reg_def F18_H ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next()); ++reg_def F19 ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()); ++reg_def F19_H ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next()); ++reg_def F20 ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()); ++reg_def F20_H ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next()); ++reg_def F21 ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()); ++reg_def F21_H ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next()); ++reg_def F22 ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()); ++reg_def F22_H ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next()); ++reg_def F23 ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()); ++reg_def F23_H ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next()); ++reg_def F24 ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()); ++reg_def F24_H ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next()); ++reg_def F25 ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()); ++reg_def F25_H ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next()); ++reg_def F26 ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()); ++reg_def F26_H ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next()); ++reg_def F27 ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()); ++reg_def F27_H ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next()); ++reg_def F28 ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()); ++reg_def F28_H ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next()); ++reg_def F29 ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()); ++reg_def F29_H ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next()); ++reg_def F30 ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()); ++reg_def F30_H ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next()); ++reg_def F31 ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()); ++reg_def F31_H ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next()); ++ ++ ++// ---------------------------- ++// Special Registers ++//S6 is used for get_thread(S6) ++//S5 is uesd for heapbase of compressed oop ++alloc_class chunk0( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S5, S5_H, ++ S6, S6_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T8, T8_H, ++ T9, T9_H, ++ T1, T1_H, // inline_cache_reg ++ V1, V1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ V0, V0_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H, ++ GP, GP_H ++ RA, RA_H, ++ SP, SP_H, // stack_pointer ++ FP, FP_H // frame_pointer ++ ); ++ ++alloc_class chunk1( F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F8, F8_H, ++ F9, F9_H, ++ F10, F10_H, ++ F11, F11_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F23, F23_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++ F28, F28_H, ++ F19, F19_H, ++ F18, F18_H, ++ F17, F17_H, ++ F16, F16_H, ++ F15, F15_H, ++ F14, F14_H, ++ F13, F13_H, ++ F12, F12_H, ++ F29, F29_H, ++ F30, F30_H, ++ F31, F31_H); ++ ++reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 ); ++reg_class s0_reg( S0 ); ++reg_class s1_reg( S1 ); ++reg_class s2_reg( S2 ); ++reg_class s3_reg( S3 ); ++reg_class s4_reg( S4 ); ++reg_class s5_reg( S5 ); ++reg_class s6_reg( S6 ); ++reg_class s7_reg( S7 ); ++ ++reg_class t_reg( T0, T1, T2, T3, T8, T9 ); ++reg_class t0_reg( T0 ); ++reg_class t1_reg( T1 ); ++reg_class t2_reg( T2 ); ++reg_class t3_reg( T3 ); ++reg_class t8_reg( T8 ); ++reg_class t9_reg( T9 ); ++ ++reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 ); ++reg_class a0_reg( A0 ); ++reg_class a1_reg( A1 ); ++reg_class a2_reg( A2 ); ++reg_class a3_reg( A3 ); ++reg_class a4_reg( A4 ); ++reg_class a5_reg( A5 ); ++reg_class a6_reg( A6 ); ++reg_class a7_reg( A7 ); ++ ++reg_class v0_reg( V0 ); ++reg_class v1_reg( V1 ); ++ ++reg_class sp_reg( SP, SP_H ); ++reg_class fp_reg( FP, FP_H ); ++ ++reg_class v0_long_reg( V0, V0_H ); ++reg_class v1_long_reg( V1, V1_H ); ++reg_class a0_long_reg( A0, A0_H ); ++reg_class a1_long_reg( A1, A1_H ); ++reg_class a2_long_reg( A2, A2_H ); ++reg_class a3_long_reg( A3, A3_H ); ++reg_class a4_long_reg( A4, A4_H ); ++reg_class a5_long_reg( A5, A5_H ); ++reg_class a6_long_reg( A6, A6_H ); ++reg_class a7_long_reg( A7, A7_H ); ++reg_class t0_long_reg( T0, T0_H ); ++reg_class t1_long_reg( T1, T1_H ); ++reg_class t2_long_reg( T2, T2_H ); ++reg_class t3_long_reg( T3, T3_H ); ++reg_class t8_long_reg( T8, T8_H ); ++reg_class t9_long_reg( T9, T9_H ); ++reg_class s0_long_reg( S0, S0_H ); ++reg_class s1_long_reg( S1, S1_H ); ++reg_class s2_long_reg( S2, S2_H ); ++reg_class s3_long_reg( S3, S3_H ); ++reg_class s4_long_reg( S4, S4_H ); ++reg_class s5_long_reg( S5, S5_H ); ++reg_class s6_long_reg( S6, S6_H ); ++reg_class s7_long_reg( S7, S7_H ); ++ ++reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, A7, A6, A5, A4, V0, A3, A2, A1, A0, T0 ); ++ ++reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, V0, T0 ); ++ ++reg_class p_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T8, T8_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++reg_class no_T8_p_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++reg_class long_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T8, T8_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++ ++// Floating point registers. ++// F31 are not used as temporary registers in D2I ++reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F31); ++reg_class dbl_reg( F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F8, F8_H, ++ F9, F9_H, ++ F10, F10_H, ++ F11, F11_H, ++ F12, F12_H, ++ F13, F13_H, ++ F14, F14_H, ++ F15, F15_H, ++ F16, F16_H, ++ F17, F17_H, ++ F18, F18_H, ++ F19, F19_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F23, F23_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++ F28, F28_H, ++ F29, F29_H, ++ F31, F31_H); ++ ++reg_class flt_arg0( F12 ); ++reg_class dbl_arg0( F12, F12_H ); ++reg_class dbl_arg1( F14, F14_H ); ++ ++%} ++ ++//----------DEFINITION BLOCK--------------------------------------------------- ++// Define name --> value mappings to inform the ADLC of an integer valued name ++// Current support includes integer values in the range [0, 0x7FFFFFFF] ++// Format: ++// int_def ( , ); ++// Generated Code in ad_.hpp ++// #define () ++// // value == ++// Generated code in ad_.cpp adlc_verification() ++// assert( == , "Expect () to equal "); ++// ++definitions %{ ++ int_def DEFAULT_COST ( 100, 100); ++ int_def HUGE_COST (1000000, 1000000); ++ ++ // Memory refs are twice as expensive as run-of-the-mill. ++ int_def MEMORY_REF_COST ( 200, DEFAULT_COST * 2); ++ ++ // Branches are even more expensive. ++ int_def BRANCH_COST ( 300, DEFAULT_COST * 3); ++ // we use jr instruction to construct call, so more expensive ++ int_def CALL_COST ( 500, DEFAULT_COST * 5); ++/* ++ int_def EQUAL ( 1, 1 ); ++ int_def NOT_EQUAL ( 2, 2 ); ++ int_def GREATER ( 3, 3 ); ++ int_def GREATER_EQUAL ( 4, 4 ); ++ int_def LESS ( 5, 5 ); ++ int_def LESS_EQUAL ( 6, 6 ); ++*/ ++%} ++ ++ ++ ++//----------SOURCE BLOCK------------------------------------------------------- ++// This is a block of C++ code which provides values, functions, and ++// definitions necessary in the rest of the architecture description ++ ++source_hpp %{ ++// Header information of the source block. ++// Method declarations/definitions which are used outside ++// the ad-scope can conveniently be defined here. ++// ++// To keep related declarations/definitions/uses close together, ++// we switch between source %{ }% and source_hpp %{ }% freely as needed. ++ ++class CallStubImpl { ++ ++ //-------------------------------------------------------------- ++ //---< Used for optimization in Compile::shorten_branches >--- ++ //-------------------------------------------------------------- ++ ++ public: ++ // Size of call trampoline stub. ++ static uint size_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++ ++ // number of relocations needed by a call trampoline stub ++ static uint reloc_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++}; ++ ++class HandlerImpl { ++ ++ public: ++ ++ static int emit_exception_handler(CodeBuffer &cbuf); ++ static int emit_deopt_handler(CodeBuffer& cbuf); ++ ++ static uint size_exception_handler() { ++ // NativeCall instruction size is the same as NativeJump. ++ // exception handler starts out as jump and can be patched to ++ // a call be deoptimization. (4932387) ++ // Note that this value is also credited (in output.cpp) to ++ // the size of the code section. ++ int size = NativeCall::instruction_size; ++ const uintx m = 16 - 1; ++ return mask_bits(size + m, ~m); ++ //return round_to(size, 16); ++ } ++ ++ static uint size_deopt_handler() { ++ int size = NativeCall::instruction_size; ++ const uintx m = 16 - 1; ++ return mask_bits(size + m, ~m); ++ //return round_to(size, 16); ++ } ++}; ++ ++%} // end source_hpp ++ ++source %{ ++ ++#define NO_INDEX 0 ++#define RELOC_IMM64 Assembler::imm_operand ++#define RELOC_DISP32 Assembler::disp32_operand ++ ++ ++#define __ _masm. ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++ ++// Emit exception handler code. ++// Stuff framesize into a register and call a VM stub routine. ++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_exception_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ ++ int offset = __ offset(); ++ ++ __ block_comment("; emit_exception_handler"); ++ ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point()); ++ __ align(16); ++ assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} ++ ++// Emit deopt handler code. ++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_deopt_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ ++ int offset = __ offset(); ++ ++ __ block_comment("; emit_deopt_handler"); ++ ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_call(SharedRuntime::deopt_blob()->unpack()); ++ __ align(16); ++ assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} ++ ++ ++const bool Matcher::match_rule_supported(int opcode) { ++ if (!has_match_rule(opcode)) ++ return false; ++ ++ switch (opcode) { ++ //Op_CountLeadingZerosI Op_CountLeadingZerosL can be deleted, all MIPS CPUs support clz & dclz. ++ case Op_CountLeadingZerosI: ++ case Op_CountLeadingZerosL: ++ if (!UseCountLeadingZerosInstructionMIPS64) ++ return false; ++ break; ++ case Op_CountTrailingZerosI: ++ case Op_CountTrailingZerosL: ++ if (!UseCountTrailingZerosInstructionMIPS64) ++ return false; ++ break; ++ } ++ ++ return true; // Per default match rules are supported. ++} ++ ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { ++ // TODO ++ // identify extra cases that we might want to provide match rules for ++ // e.g. Op_ vector nodes and other intrinsics while guarding with vlen ++ bool ret_value = match_rule_supported(opcode); ++ // Add rules here. ++ ++ return ret_value; // Per default match rules are supported. ++} ++ ++const bool Matcher::has_predicated_vectors(void) { ++ return false; ++} ++ ++const int Matcher::float_pressure(int default_pressure_threshold) { ++ Unimplemented(); ++ return default_pressure_threshold; ++} ++ ++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { ++ int offs = offset - br_size + 4; ++ // To be conservative on MIPS ++ // branch node should be end with: ++ // branch inst ++ // delay slot ++ const int safety_zone = 3 * BytesPerInstWord; ++ return Assembler::is_simm16((offs<0 ? offs-safety_zone : offs+safety_zone) >> 2); ++} ++ ++ ++// No additional cost for CMOVL. ++const int Matcher::long_cmove_cost() { return 0; } ++ ++// No CMOVF/CMOVD with SSE2 ++const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } ++ ++// Does the CPU require late expand (see block.cpp for description of late expand)? ++const bool Matcher::require_postalloc_expand = false; ++ ++// Do we need to mask the count passed to shift instructions or does ++// the cpu only look at the lower 5/6 bits anyway? ++const bool Matcher::need_masked_shift_count = false; ++ ++bool Matcher::narrow_oop_use_complex_address() { ++ assert(UseCompressedOops, "only for compressed oops code"); ++ return false; ++} ++ ++bool Matcher::narrow_klass_use_complex_address() { ++ assert(UseCompressedClassPointers, "only for compressed klass code"); ++ return false; ++} ++ ++bool Matcher::const_oop_prefer_decode() { ++ // Prefer ConN+DecodeN over ConP. ++ return true; ++} ++ ++bool Matcher::const_klass_prefer_decode() { ++ // TODO: Either support matching DecodeNKlass (heap-based) in operand ++ // or condisider the following: ++ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. ++ //return Universe::narrow_klass_base() == NULL; ++ return true; ++} ++ ++// This is UltraSparc specific, true just means we have fast l2f conversion ++const bool Matcher::convL2FSupported(void) { ++ return true; ++} ++ ++// Max vector size in bytes. 0 if not supported. ++const int Matcher::vector_width_in_bytes(BasicType bt) { ++ if (MaxVectorSize == 0) ++ return 0; ++ assert(MaxVectorSize == 8, ""); ++ return 8; ++} ++ ++// Vector ideal reg ++const uint Matcher::vector_ideal_reg(int size) { ++ assert(MaxVectorSize == 8, ""); ++ switch(size) { ++ case 8: return Op_VecD; ++ } ++ ShouldNotReachHere(); ++ return 0; ++} ++ ++// Only lowest bits of xmm reg are used for vector shift count. ++const uint Matcher::vector_shift_count_ideal_reg(int size) { ++ fatal("vector shift is not supported"); ++ return Node::NotAMachineReg; ++} ++ ++ ++const bool Matcher::convi2l_type_required = true; ++ ++// Should the Matcher clone shifts on addressing modes, expecting them ++// to be subsumed into complex addressing expressions or compute them ++// into registers? ++bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { ++ return clone_base_plus_offset_address(m, mstack, address_visited); ++} ++ ++void Compile::reshape_address(AddPNode* addp) { ++} ++ ++// Limits on vector size (number of elements) loaded into vector. ++const int Matcher::max_vector_size(const BasicType bt) { ++ assert(is_java_primitive(bt), "only primitive type vectors"); ++ return vector_width_in_bytes(bt)/type2aelembytes(bt); ++} ++ ++const int Matcher::min_vector_size(const BasicType bt) { ++ return max_vector_size(bt); // Same as max. ++} ++ ++// MIPS supports misaligned vectors store/load? FIXME ++const bool Matcher::misaligned_vectors_ok() { ++ return false; ++ //return !AlignVector; // can be changed by flag ++} ++ ++// Register for DIVI projection of divmodI ++RegMask Matcher::divI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for MODI projection of divmodI ++RegMask Matcher::modI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for DIVL projection of divmodL ++RegMask Matcher::divL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++int Matcher::regnum_to_fpu_offset(int regnum) { ++ return regnum - 32; // The FP registers are in the second chunk ++} ++ ++ ++const bool Matcher::isSimpleConstant64(jlong value) { ++ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. ++ return true; ++} ++ ++ ++// Return whether or not this register is ever used as an argument. This ++// function is used on startup to build the trampoline stubs in generateOptoStub. ++// Registers not mentioned will be killed by the VM call in the trampoline, and ++// arguments in those registers not be available to the callee. ++bool Matcher::can_be_java_arg( int reg ) { ++ // Refer to: [sharedRuntime_mips_64.cpp] SharedRuntime::java_calling_convention() ++ if ( reg == T0_num || reg == T0_H_num ++ || reg == A0_num || reg == A0_H_num ++ || reg == A1_num || reg == A1_H_num ++ || reg == A2_num || reg == A2_H_num ++ || reg == A3_num || reg == A3_H_num ++ || reg == A4_num || reg == A4_H_num ++ || reg == A5_num || reg == A5_H_num ++ || reg == A6_num || reg == A6_H_num ++ || reg == A7_num || reg == A7_H_num ) ++ return true; ++ ++ if ( reg == F12_num || reg == F12_H_num ++ || reg == F13_num || reg == F13_H_num ++ || reg == F14_num || reg == F14_H_num ++ || reg == F15_num || reg == F15_H_num ++ || reg == F16_num || reg == F16_H_num ++ || reg == F17_num || reg == F17_H_num ++ || reg == F18_num || reg == F18_H_num ++ || reg == F19_num || reg == F19_H_num ) ++ return true; ++ ++ return false; ++} ++ ++bool Matcher::is_spillable_arg( int reg ) { ++ return can_be_java_arg(reg); ++} ++ ++bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { ++ return false; ++} ++ ++// Register for MODL projection of divmodL ++RegMask Matcher::modL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++const RegMask Matcher::method_handle_invoke_SP_save_mask() { ++ return FP_REG_mask(); ++} ++ ++// MIPS doesn't support AES intrinsics ++const bool Matcher::pass_original_key_for_aes() { ++ return false; ++} ++ ++int CallStaticJavaDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallLeafNoFPDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallLeafDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallRuntimeDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++// If CPU can load and store mis-aligned doubles directly then no fixup is ++// needed. Else we split the double into 2 integer pieces and move it ++// piece-by-piece. Only happens when passing doubles into C code as the ++// Java calling convention forces doubles to be aligned. ++const bool Matcher::misaligned_doubles_ok = false; ++// Do floats take an entire double register or just half? ++//const bool Matcher::float_in_double = true; ++bool Matcher::float_in_double() { return false; } ++// Do ints take an entire long register or just half? ++const bool Matcher::int_in_long = true; ++// Is it better to copy float constants, or load them directly from memory? ++// Intel can load a float constant from a direct address, requiring no ++// extra registers. Most RISCs will have to materialize an address into a ++// register first, so they would do better to copy the constant from stack. ++const bool Matcher::rematerialize_float_constants = false; ++// Advertise here if the CPU requires explicit rounding operations ++// to implement the UseStrictFP mode. ++const bool Matcher::strict_fp_requires_explicit_rounding = false; ++// false => size gets scaled to BytesPerLong, ok. ++const bool Matcher::init_array_count_is_in_bytes = false; ++ ++// Indicate if the safepoint node needs the polling page as an input. ++// it does if the polling page is more than disp32 away. ++bool SafePointNode::needs_polling_address_input() { ++ return SafepointMechanism::uses_thread_local_poll(); ++} ++ ++#ifndef PRODUCT ++void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const { ++ st->print("BRK"); ++} ++#endif ++ ++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { ++ MacroAssembler _masm(&cbuf); ++ __ brk(5); ++} ++ ++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { ++ return MachNode::size(ra_); ++} ++ ++ ++ ++// !!!!! Special hack to get all type of calls to specify the byte offset ++// from the start of the call to the point where the return address ++// will point. ++int MachCallStaticJavaNode::ret_addr_offset() { ++ //lui ++ //ori ++ //nop ++ //nop ++ //jalr ++ //nop ++ return 24; ++} ++ ++int MachCallDynamicJavaNode::ret_addr_offset() { ++ //lui IC_Klass, ++ //ori IC_Klass, ++ //dsll IC_Klass ++ //ori IC_Klass ++ ++ //lui T9 ++ //ori T9 ++ //nop ++ //nop ++ //jalr T9 ++ //nop ++ return 4 * 4 + 4 * 6; ++} ++ ++//============================================================================= ++ ++// Figure out which register class each belongs in: rc_int, rc_float, rc_stack ++enum RC { rc_bad, rc_int, rc_float, rc_stack }; ++static enum RC rc_class( OptoReg::Name reg ) { ++ if( !OptoReg::is_valid(reg) ) return rc_bad; ++ if (OptoReg::is_stack(reg)) return rc_stack; ++ VMReg r = OptoReg::as_VMReg(reg); ++ if (r->is_Register()) return rc_int; ++ assert(r->is_FloatRegister(), "must be"); ++ return rc_float; ++} ++ ++uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { ++ // Get registers to move ++ OptoReg::Name src_second = ra_->get_reg_second(in(1)); ++ OptoReg::Name src_first = ra_->get_reg_first(in(1)); ++ OptoReg::Name dst_second = ra_->get_reg_second(this ); ++ OptoReg::Name dst_first = ra_->get_reg_first(this ); ++ ++ enum RC src_second_rc = rc_class(src_second); ++ enum RC src_first_rc = rc_class(src_first); ++ enum RC dst_second_rc = rc_class(dst_second); ++ enum RC dst_first_rc = rc_class(dst_first); ++ ++ assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); ++ ++ // Generate spill code! ++ ++ if( src_first == dst_first && src_second == dst_second ) ++ return 0; // Self copy, no move ++ ++ if (src_first_rc == rc_stack) { ++ // mem -> ++ if (dst_first_rc == rc_stack) { ++ // mem -> mem ++ assert(src_second != dst_first, "overlap"); ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ld(AT, Address(SP, src_offset)); ++ __ sd(AT, Address(SP, dst_offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("ld AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t" ++ "sd AT, [SP + #%d]", ++ src_offset, dst_offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ // No pushl/popl, so: ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ lw(AT, Address(SP, src_offset)); ++ __ sw(AT, Address(SP, dst_offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("lw AT, [SP + #%d] spill 2\n\t" ++ "sw AT, [SP + #%d]\n\t", ++ src_offset, dst_offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // mem -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ld(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("ld %s, [SP + #%d]\t# spill 3", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ if (this->ideal_reg() == Op_RegI) ++ __ lw(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++ else ++ __ lwu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ if (this->ideal_reg() == Op_RegI) ++ st->print("lw %s, [SP + #%d]\t# spill 4", ++ Matcher::regName[dst_first], ++ offset); ++ else ++ st->print("lwu %s, [SP + #%d]\t# spill 5", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_float) { ++ // mem-> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ldc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("ldc1 %s, [SP + #%d]\t# spill 6", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ lwc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("lwc1 %s, [SP + #%d]\t# spill 7", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } ++ } else if (src_first_rc == rc_int) { ++ // gpr -> ++ if (dst_first_rc == rc_stack) { ++ // gpr -> mem ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ sd(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("sd %s, [SP + #%d] # spill 8", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ sw(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("sw %s, [SP + #%d]\t# spill 9", ++ Matcher::regName[src_first], offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // gpr -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ move(as_Register(Matcher::_regEncode[dst_first]), ++ as_Register(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("move(64bit) %s <-- %s\t# spill 10", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ return 0; ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ if (this->ideal_reg() == Op_RegI) ++ __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); ++ else ++ __ daddu(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("move(32-bit) %s <-- %s\t# spill 11", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ return 0; ++ } ++ } else if (dst_first_rc == rc_float) { ++ // gpr -> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ dmtc1(as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("dmtc1 %s, %s\t# spill 12", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ mtc1( as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]) ); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("mtc1 %s, %s\t# spill 13", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } ++ } else if (src_first_rc == rc_float) { ++ // xmm -> ++ if (dst_first_rc == rc_stack) { ++ // xmm -> mem ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ sdc1( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) ); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("sdc1 %s, [SP + #%d]\t# spill 14", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ swc1(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("swc1 %s, [SP + #%d]\t# spill 15", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // xmm -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ dmfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("dmfc1 %s, %s\t# spill 16", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ mfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("mfc1 %s, %s\t# spill 17", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_float) { ++ // xmm -> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ mov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("mov_d %s <-- %s\t# spill 18", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ mov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("mov_s %s <-- %s\t# spill 19", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } ++ } ++ ++ assert(0," foo "); ++ Unimplemented(); ++ return 0; ++} ++ ++#ifndef PRODUCT ++void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ implementation( NULL, ra_, false, st ); ++} ++#endif ++ ++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ implementation( &cbuf, ra_, false, NULL ); ++} ++ ++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ Compile *C = ra_->C; ++ int framesize = C->frame_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ st->print_cr("daddiu SP, SP, %d # Rlease stack @ MachEpilogNode", framesize); ++ st->print("\t"); ++ if (UseLEXT1) { ++ st->print_cr("gslq RA, FP, SP, %d # Restore FP & RA @ MachEpilogNode", -wordSize*2); ++ } else { ++ st->print_cr("ld RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize); ++ st->print("\t"); ++ st->print_cr("ld FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2); ++ } ++ ++ if( do_polling() && C->is_method_compilation() ) { ++ st->print("\t"); ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ st->print_cr("ld AT, poll_offset[thread] #polling_page_address\n\t" ++ "lw AT, [AT]\t" ++ "# Safepoint: poll for GC"); ++ } else { ++ st->print_cr("Poll Safepoint # MachEpilogNode"); ++ } ++ } ++} ++#endif ++ ++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ Compile *C = ra_->C; ++ MacroAssembler _masm(&cbuf); ++ int framesize = C->frame_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ assert(Assembler::is_simm16(framesize), "daddiu uses a signed 16-bit int"); ++ ++ if (UseLEXT1) { ++ __ gslq(RA, FP, SP, framesize - wordSize * 2); ++ } else { ++ __ ld(RA, SP, framesize - wordSize ); ++ __ ld(FP, SP, framesize - wordSize * 2); ++ } ++ __ daddiu(SP, SP, framesize); ++ ++ if (StackReservedPages > 0 && C->has_reserved_stack_access()) { ++ __ reserved_stack_check(); ++ } ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ if( do_polling() && C->is_method_compilation() ) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ __ ld(AT, thread, in_bytes(Thread::polling_page_offset())); ++ __ relocate(relocInfo::poll_return_type); ++ __ lw(AT, AT, 0); ++ } else { ++ __ set64(AT, (long)os::get_polling_page()); ++ __ relocate(relocInfo::poll_return_type); ++ __ lw(AT, AT, 0); ++ } ++ } ++} ++ ++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); // too many variables; just compute it the hard way fujie debug ++} ++ ++int MachEpilogNode::reloc() const { ++ return 0; // a large enough number ++} ++ ++const Pipeline * MachEpilogNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} ++ ++int MachEpilogNode::safepoint_offset() const { return 0; } ++ ++//============================================================================= ++ ++#ifndef PRODUCT ++void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_reg_first(this); ++ st->print("ADDI %s, SP, %d @BoxLockNode",Matcher::regName[reg],offset); ++} ++#endif ++ ++ ++uint BoxLockNode::size(PhaseRegAlloc *ra_) const { ++ return 4; ++} ++ ++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ MacroAssembler _masm(&cbuf); ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_encode(this); ++ ++ __ addiu(as_Register(reg), SP, offset); ++} ++ ++ ++//static int sizeof_FFree_Float_Stack_All = -1; ++ ++int MachCallRuntimeNode::ret_addr_offset() { ++ //lui ++ //ori ++ //dsll ++ //ori ++ //jalr ++ //nop ++ assert(NativeCall::instruction_size == 24, "in MachCallRuntimeNode::ret_addr_offset()"); ++ return NativeCall::instruction_size; ++} ++ ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const { ++ st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count); ++} ++#endif ++ ++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { ++ MacroAssembler _masm(&cbuf); ++ int i = 0; ++ for(i = 0; i < _count; i++) ++ __ nop(); ++} ++ ++uint MachNopNode::size(PhaseRegAlloc *) const { ++ return 4 * _count; ++} ++const Pipeline* MachNopNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} ++ ++//============================================================================= ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ st->print_cr("load_klass(T9, T0)"); ++ st->print_cr("\tbeq(T9, iCache, L)"); ++ st->print_cr("\tnop"); ++ st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)"); ++ st->print_cr("\tnop"); ++ st->print_cr("\tnop"); ++ st->print_cr(" L:"); ++} ++#endif ++ ++ ++void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ MacroAssembler _masm(&cbuf); ++ int ic_reg = Matcher::inline_cache_reg_encode(); ++ Label L; ++ Register receiver = T0; ++ Register iCache = as_Register(ic_reg); ++ ++ __ load_klass(T9, receiver); ++ __ beq(T9, iCache, L); ++ __ delayed()->nop(); ++ __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ bind(L); ++} ++ ++uint MachUEPNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++ ++ ++//============================================================================= ++ ++const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask(); ++ ++int Compile::ConstantTable::calculate_table_base_offset() const { ++ return 0; // absolute addressing, no offset ++} ++ ++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } ++void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { ++ ShouldNotReachHere(); ++} ++ ++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { ++ Compile* C = ra_->C; ++ Compile::ConstantTable& constant_table = C->constant_table(); ++ MacroAssembler _masm(&cbuf); ++ ++ Register Rtoc = as_Register(ra_->get_encode(this)); ++ CodeSection* consts_section = __ code()->consts(); ++ int consts_size = consts_section->align_at_start(consts_section->size()); ++ assert(constant_table.size() == consts_size, "must be equal"); ++ ++ if (consts_section->size()) { ++ // Materialize the constant table base. ++ address baseaddr = consts_section->start() + -(constant_table.table_base_offset()); ++ // RelocationHolder rspec = internal_word_Relocation::spec(baseaddr); ++ __ relocate(relocInfo::internal_word_type); ++ __ patchable_set48(Rtoc, (long)baseaddr); ++ } ++} ++ ++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { ++ // patchable_set48 (4 insts) ++ return 4 * 4; ++} ++ ++#ifndef PRODUCT ++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { ++ Register r = as_Register(ra_->get_encode(this)); ++ st->print("patchable_set48 %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name()); ++} ++#endif ++ ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ Compile* C = ra_->C; ++ ++ int framesize = C->frame_size_in_bytes(); ++ int bangsize = C->bang_size_in_bytes(); ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ // Calls to C2R adapters often do not accept exceptional returns. ++ // We require that their callers must bang for them. But be careful, because ++ // some VM calls (such as call site linkage) can use several kilobytes of ++ // stack. But the stack safety zone should account for that. ++ // See bugs 4446381, 4468289, 4497237. ++ if (C->need_stack_bang(bangsize)) { ++ st->print_cr("# stack bang"); st->print("\t"); ++ } ++ if (UseLEXT1) { ++ st->print("gssq RA, FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); ++ } else { ++ st->print("sd RA, %d(SP) @ MachPrologNode\n\t", -wordSize); ++ st->print("sd FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); ++ } ++ st->print("daddiu FP, SP, -%d \n\t", wordSize*2); ++ st->print("daddiu SP, SP, -%d \t",framesize); ++} ++#endif ++ ++ ++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ Compile* C = ra_->C; ++ MacroAssembler _masm(&cbuf); ++ ++ int framesize = C->frame_size_in_bytes(); ++ int bangsize = C->bang_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ assert(Assembler::is_simm16(-framesize), "daddiu uses a signed 16-bit int"); ++ ++ // Make enough room for patch_verified_entry ++ __ nop(); ++ __ nop(); ++ ++ if (C->need_stack_bang(bangsize)) { ++ __ generate_stack_overflow_check(bangsize); ++ } ++ ++ __ daddiu(SP, SP, -framesize); ++ if (UseLEXT1) { ++ __ gssq(RA, FP, SP, framesize - wordSize * 2); ++ } else { ++ __ sd(RA, SP, framesize - wordSize); ++ __ sd(FP, SP, framesize - wordSize * 2); ++ } ++ __ daddiu(FP, SP, framesize - wordSize * 2); ++ ++ C->set_frame_complete(cbuf.insts_size()); ++ if (C->has_mach_constant_base_node()) { ++ // NOTE: We set the table base offset here because users might be ++ // emitted before MachConstantBaseNode. ++ Compile::ConstantTable& constant_table = C->constant_table(); ++ constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); ++ } ++} ++ ++ ++uint MachPrologNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); // too many variables; just compute it the hard way ++} ++ ++int MachPrologNode::reloc() const { ++ return 0; // a large enough number ++} ++ ++%} ++ ++//----------ENCODING BLOCK----------------------------------------------------- ++// This block specifies the encoding classes used by the compiler to output ++// byte streams. Encoding classes generate functions which are called by ++// Machine Instruction Nodes in order to generate the bit encoding of the ++// instruction. Operands specify their base encoding interface with the ++// interface keyword. There are currently supported four interfaces, ++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an ++// operand to generate a function which returns its register number when ++// queried. CONST_INTER causes an operand to generate a function which ++// returns the value of the constant when queried. MEMORY_INTER causes an ++// operand to generate four functions which return the Base Register, the ++// Index Register, the Scale Value, and the Offset Value of the operand when ++// queried. COND_INTER causes an operand to generate six functions which ++// return the encoding code (ie - encoding bits for the instruction) ++// associated with each basic boolean condition for a conditional instruction. ++// Instructions specify two basic values for encoding. They use the ++// ins_encode keyword to specify their encoding class (which must be one of ++// the class names specified in the encoding block), and they use the ++// opcode keyword to specify, in order, their primary, secondary, and ++// tertiary opcode. Only the opcode sections which a particular instruction ++// needs for encoding need to be specified. ++encode %{ ++ ++ enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf ++ MacroAssembler _masm(&cbuf); ++ // This is the instruction starting address for relocation info. ++ __ block_comment("Java_To_Runtime"); ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_call((address)$meth$$method); ++ %} ++ ++ enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL ++ // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine ++ // who we intended to call. ++ MacroAssembler _masm(&cbuf); ++ address addr = (address)$meth$$method; ++ address call; ++ __ block_comment("Java_Static_Call"); ++ ++ if ( !_method ) { ++ // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. ++ call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf); ++ } else { ++ int method_index = resolved_method_index(cbuf); ++ RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) ++ : static_call_Relocation::spec(method_index); ++ call = __ trampoline_call(AddressLiteral(addr, rspec), &cbuf); ++ ++ // Emit stub for static call ++ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); ++ if (stub == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ %} ++ ++ ++ // ++ // [Ref: LIR_Assembler::ic_call() ] ++ // ++ enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL ++ MacroAssembler _masm(&cbuf); ++ __ block_comment("Java_Dynamic_Call"); ++ __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); ++ %} ++ ++ ++ enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{ ++ Register result = $result$$Register; ++ Register sub = $sub$$Register; ++ Register super = $super$$Register; ++ Register length = $tmp$$Register; ++ Register tmp = T9; ++ Label miss; ++ ++ // result may be the same as sub ++ // 47c B40: # B21 B41 <- B20 Freq: 0.155379 ++ // 47c partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0 ++ // 4bc mov S2, NULL #@loadConP ++ // 4c0 beq S1, S2, B21 #@branchConP P=0.999999 C=-1.000000 ++ // ++ MacroAssembler _masm(&cbuf); ++ Label done; ++ __ check_klass_subtype_slow_path(sub, super, length, tmp, ++ NULL, &miss, ++ /*set_cond_codes:*/ true); ++ // Refer to X86_64's RDI ++ __ move(result, 0); ++ __ b(done); ++ __ delayed()->nop(); ++ ++ __ bind(miss); ++ __ move(result, 1); ++ __ bind(done); ++ %} ++ ++%} ++ ++ ++//---------MIPS FRAME-------------------------------------------------------------- ++// Definition of frame structure and management information. ++// ++// S T A C K L A Y O U T Allocators stack-slot number ++// | (to get allocators register number ++// G Owned by | | v add SharedInfo::stack0) ++// r CALLER | | ++// o | +--------+ pad to even-align allocators stack-slot ++// w V | pad0 | numbers; owned by CALLER ++// t -----------+--------+----> Matcher::_in_arg_limit, unaligned ++// h ^ | in | 5 ++// | | args | 4 Holes in incoming args owned by SELF ++// | | old | | 3 ++// | | SP-+--------+----> Matcher::_old_SP, even aligned ++// v | | ret | 3 return address ++// Owned by +--------+ ++// Self | pad2 | 2 pad to align old SP ++// | +--------+ 1 ++// | | locks | 0 ++// | +--------+----> SharedInfo::stack0, even aligned ++// | | pad1 | 11 pad to align new SP ++// | +--------+ ++// | | | 10 ++// | | spills | 9 spills ++// V | | 8 (pad0 slot for callee) ++// -----------+--------+----> Matcher::_out_arg_limit, unaligned ++// ^ | out | 7 ++// | | args | 6 Holes in outgoing args owned by CALLEE ++// Owned by new | | ++// Callee SP-+--------+----> Matcher::_new_SP, even aligned ++// | | ++// ++// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is ++// known from SELF's arguments and the Java calling convention. ++// Region 6-7 is determined per call site. ++// Note 2: If the calling convention leaves holes in the incoming argument ++// area, those holes are owned by SELF. Holes in the outgoing area ++// are owned by the CALLEE. Holes should not be nessecary in the ++// incoming area, as the Java calling convention is completely under ++// the control of the AD file. Doubles can be sorted and packed to ++// avoid holes. Holes in the outgoing arguments may be nessecary for ++// varargs C calling conventions. ++// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is ++// even aligned with pad0 as needed. ++// Region 6 is even aligned. Region 6-7 is NOT even aligned; ++// region 6-11 is even aligned; it may be padded out more so that ++// the region from SP to FP meets the minimum stack alignment. ++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack ++// alignment. Region 11, pad1, may be dynamically extended so that ++// SP meets the minimum alignment. ++ ++ ++frame %{ ++ ++ stack_direction(TOWARDS_LOW); ++ ++ // These two registers define part of the calling convention ++ // between compiled code and the interpreter. ++ // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention ++ // for more information. ++ ++ inline_cache_reg(T1); // Inline Cache Register ++ interpreter_method_oop_reg(S3); // Method Oop Register when calling interpreter ++ ++ // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] ++ cisc_spilling_operand_name(indOffset32); ++ ++ // Number of stack slots consumed by locking an object ++ // generate Compile::sync_stack_slots ++ sync_stack_slots(2); ++ ++ frame_pointer(SP); ++ ++ // Interpreter stores its frame pointer in a register which is ++ // stored to the stack by I2CAdaptors. ++ // I2CAdaptors convert from interpreted java to compiled java. ++ ++ interpreter_frame_pointer(FP); ++ ++ // generate Matcher::stack_alignment ++ stack_alignment(StackAlignmentInBytes); //wordSize = sizeof(char*); ++ ++ // Number of stack slots between incoming argument block and the start of ++ // a new frame. The PROLOG must add this many slots to the stack. The ++ // EPILOG must remove this many slots. ++ in_preserve_stack_slots(4); //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp ++ ++ // Number of outgoing stack slots killed above the out_preserve_stack_slots ++ // for calls to C. Supports the var-args backing area for register parms. ++ varargs_C_out_slots_killed(0); ++ ++ // The after-PROLOG location of the return address. Location of ++ // return address specifies a type (REG or STACK) and a number ++ // representing the register number (i.e. - use a register name) or ++ // stack slot. ++ // Ret Addr is on stack in slot 0 if no locks or verification or alignment. ++ // Otherwise, it is above the locks and verification slot and alignment word ++ //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong)); ++ return_addr(REG RA); ++ ++ // Body of function which returns an integer array locating ++ // arguments either in registers or in stack slots. Passed an array ++ // of ideal registers called "sig" and a "length" count. Stack-slot ++ // offsets are based on outgoing arguments, i.e. a CALLER setting up ++ // arguments for a CALLEE. Incoming stack arguments are ++ // automatically biased by the preserve_stack_slots field above. ++ ++ ++ // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing) ++ // StartNode::calling_convention call this. ++ calling_convention %{ ++ SharedRuntime::java_calling_convention(sig_bt, regs, length, false); ++ %} ++ ++ ++ ++ ++ // Body of function which returns an integer array locating ++ // arguments either in registers or in stack slots. Passed an array ++ // of ideal registers called "sig" and a "length" count. Stack-slot ++ // offsets are based on outgoing arguments, i.e. a CALLER setting up ++ // arguments for a CALLEE. Incoming stack arguments are ++ // automatically biased by the preserve_stack_slots field above. ++ ++ ++ // SEE CallRuntimeNode::calling_convention for more information. ++ c_calling_convention %{ ++ (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); ++ %} ++ ++ ++ // Location of C & interpreter return values ++ // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR. ++ // SEE Matcher::match. ++ c_return_value %{ ++ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); ++ /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ ++ static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; ++ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num }; ++ return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); ++ %} ++ ++ // Location of return values ++ // register(s) contain(s) return value for Op_StartC2I and Op_Start. ++ // SEE Matcher::match. ++ ++ return_value %{ ++ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); ++ /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ ++ static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; ++ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num}; ++ return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); ++ %} ++ ++%} ++ ++//----------ATTRIBUTES--------------------------------------------------------- ++//----------Operand Attributes------------------------------------------------- ++op_attrib op_cost(0); // Required cost attribute ++ ++//----------Instruction Attributes--------------------------------------------- ++ins_attrib ins_cost(100); // Required cost attribute ++ins_attrib ins_size(32); // Required size attribute (in bits) ++ins_attrib ins_pc_relative(0); // Required PC Relative flag ++ins_attrib ins_short_branch(0); // Required flag: is this instruction a ++ // non-matching short branch variant of some ++ // long branch? ++ins_attrib ins_alignment(4); // Required alignment attribute (must be a power of 2) ++ // specifies the alignment that some part of the instruction (not ++ // necessarily the start) requires. If > 1, a compute_padding() ++ // function must be provided for the instruction ++ ++//----------OPERANDS----------------------------------------------------------- ++// Operand definitions must precede instruction definitions for correct parsing ++// in the ADLC because operands constitute user defined types which are used in ++// instruction definitions. ++ ++// Vectors ++operand vecD() %{ ++ constraint(ALLOC_IN_RC(dbl_reg)); ++ match(VecD); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Flags register, used as output of compare instructions ++operand FlagsReg() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegFlags); ++ ++ format %{ "T0" %} ++ interface(REG_INTER); ++%} ++ ++//----------Simple Operands---------------------------------------------------- ++// TODO: Should we need to define some more special immediate number ? ++// Immediate Operands ++// Integer Immediate ++operand immI() %{ ++ match(ConI); ++ // TODO: should not match immI8 here LEE ++ match(immI8); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI8() %{ ++ predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI16() %{ ++ predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); ++ match(ConI); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M65536() %{ ++ predicate(n->get_int() == -65536); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for decrement ++operand immI_M1() %{ ++ predicate(n->get_int() == -1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for test vs zero ++operand immI_0() %{ ++ predicate(n->get_int() == 0); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for increment ++operand immI_1() %{ ++ predicate(n->get_int() == 1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constants for increment ++operand immI_16() %{ ++ predicate(n->get_int() == 16); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_24() %{ ++ predicate(n->get_int() == 24); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for long shifts ++operand immI_32() %{ ++ predicate(n->get_int() == 32); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for byte-wide masking ++operand immI_255() %{ ++ predicate(n->get_int() == 255); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_65535() %{ ++ predicate(n->get_int() == 65535); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_MaxI() %{ ++ predicate(n->get_int() == 2147483647); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M32767_32768() %{ ++ predicate((-32767 <= n->get_int()) && (n->get_int() <= 32768)); ++ match(ConI); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Valid scale values for addressing modes ++operand immI_0_3() %{ ++ predicate(0 <= n->get_int() && (n->get_int() <= 3)); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_31() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 31); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_32767() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 32767); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_65535() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 65535); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_32_63() %{ ++ predicate(n->get_int() >= 32 && n->get_int() <= 63); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Operand for non-negtive integer mask ++operand immI_nonneg_mask() %{ ++ predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1)); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate ++operand immL() %{ ++ match(ConL); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate 8-bit ++operand immL8() %{ ++ predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L); ++ match(ConL); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer for polling page ++operand immP_poll() %{ ++ predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page()); ++ match(ConP); ++ op_cost(5); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL16() %{ ++ predicate((-32768 <= n->get_long()) && (n->get_long() <= 32767)); ++ match(ConL); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate 32-bit signed ++operand immL32() %{ ++ predicate(n->get_long() == (int)(n->get_long())); ++ match(ConL); ++ ++ op_cost(15); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 3..6 zero ++operand immL_M121() %{ ++ predicate(n->get_long() == -121L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 0..2 zero ++operand immL_M8() %{ ++ predicate(n->get_long() == -8L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 1..2 zero ++operand immL_M7() %{ ++ predicate(n->get_long() == -7L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 2 zero ++operand immL_M5() %{ ++ predicate(n->get_long() == -5L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 0..1 zero ++operand immL_M4() %{ ++ predicate(n->get_long() == -4L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_M1() %{ ++ predicate(n->get_long() == -1L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate zero ++operand immL_0() %{ ++ predicate(n->get_long() == 0L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_7() %{ ++ predicate(n->get_long() == 7L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate: low 32-bit mask ++operand immL_MaxUI() %{ ++ predicate(n->get_long() == 0xFFFFFFFFL); ++ match(ConL); ++ op_cost(20); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_M32767_32768() %{ ++ predicate((-32767 <= n->get_long()) && (n->get_long() <= 32768)); ++ match(ConL); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_0_65535() %{ ++ predicate(n->get_long() >= 0 && n->get_long() <= 65535); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Operand for non-negtive long mask ++operand immL_nonneg_mask() %{ ++ predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1)); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immP() %{ ++ match(ConP); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// NULL Pointer Immediate ++operand immP_0() %{ ++ predicate(n->get_ptr() == 0); ++ match(ConP); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate: 64-bit ++operand immP_no_oop_cheap() %{ ++ predicate(!n->bottom_type()->isa_oop_ptr() && (MacroAssembler::insts_for_set64(n->get_ptr()) <= 3)); ++ match(ConP); ++ ++ op_cost(5); ++ // formats are generated automatically for constants and base registers ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immN() %{ ++ match(ConN); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immNKlass() %{ ++ match(ConNKlass); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// NULL Pointer Immediate ++operand immN_0() %{ ++ predicate(n->get_narrowcon() == 0); ++ match(ConN); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Single-precision floating-point immediate ++operand immF() %{ ++ match(ConF); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Single-precision floating-point zero ++operand immF_0() %{ ++ predicate(jint_cast(n->getf()) == 0); ++ match(ConF); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Double-precision floating-point immediate ++operand immD() %{ ++ match(ConD); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Double-precision floating-point zero ++operand immD_0() %{ ++ predicate(jlong_cast(n->getd()) == 0); ++ match(ConD); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Register Operands ++// Integer Register ++operand mRegI() %{ ++ constraint(ALLOC_IN_RC(int_reg)); ++ match(RegI); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_Ax_mRegI() %{ ++ constraint(ALLOC_IN_RC(no_Ax_int_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mS0RegI() %{ ++ constraint(ALLOC_IN_RC(s0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S0" %} ++ interface(REG_INTER); ++%} ++ ++operand mS1RegI() %{ ++ constraint(ALLOC_IN_RC(s1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S1" %} ++ interface(REG_INTER); ++%} ++ ++operand mS3RegI() %{ ++ constraint(ALLOC_IN_RC(s3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S3" %} ++ interface(REG_INTER); ++%} ++ ++operand mS4RegI() %{ ++ constraint(ALLOC_IN_RC(s4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S4" %} ++ interface(REG_INTER); ++%} ++ ++operand mS5RegI() %{ ++ constraint(ALLOC_IN_RC(s5_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S5" %} ++ interface(REG_INTER); ++%} ++ ++operand mS6RegI() %{ ++ constraint(ALLOC_IN_RC(s6_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S6" %} ++ interface(REG_INTER); ++%} ++ ++operand mS7RegI() %{ ++ constraint(ALLOC_IN_RC(s7_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S7" %} ++ interface(REG_INTER); ++%} ++ ++ ++operand mT0RegI() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T0" %} ++ interface(REG_INTER); ++%} ++ ++operand mT1RegI() %{ ++ constraint(ALLOC_IN_RC(t1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T1" %} ++ interface(REG_INTER); ++%} ++ ++operand mT2RegI() %{ ++ constraint(ALLOC_IN_RC(t2_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T2" %} ++ interface(REG_INTER); ++%} ++ ++operand mT3RegI() %{ ++ constraint(ALLOC_IN_RC(t3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T3" %} ++ interface(REG_INTER); ++%} ++ ++operand mT8RegI() %{ ++ constraint(ALLOC_IN_RC(t8_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T8" %} ++ interface(REG_INTER); ++%} ++ ++operand mT9RegI() %{ ++ constraint(ALLOC_IN_RC(t9_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T9" %} ++ interface(REG_INTER); ++%} ++ ++operand mA0RegI() %{ ++ constraint(ALLOC_IN_RC(a0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A0" %} ++ interface(REG_INTER); ++%} ++ ++operand mA1RegI() %{ ++ constraint(ALLOC_IN_RC(a1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A1" %} ++ interface(REG_INTER); ++%} ++ ++operand mA2RegI() %{ ++ constraint(ALLOC_IN_RC(a2_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A2" %} ++ interface(REG_INTER); ++%} ++ ++operand mA3RegI() %{ ++ constraint(ALLOC_IN_RC(a3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A3" %} ++ interface(REG_INTER); ++%} ++ ++operand mA4RegI() %{ ++ constraint(ALLOC_IN_RC(a4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A4" %} ++ interface(REG_INTER); ++%} ++ ++operand mA5RegI() %{ ++ constraint(ALLOC_IN_RC(a5_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A5" %} ++ interface(REG_INTER); ++%} ++ ++operand mA6RegI() %{ ++ constraint(ALLOC_IN_RC(a6_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A6" %} ++ interface(REG_INTER); ++%} ++ ++operand mA7RegI() %{ ++ constraint(ALLOC_IN_RC(a7_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A7" %} ++ interface(REG_INTER); ++%} ++ ++operand mV0RegI() %{ ++ constraint(ALLOC_IN_RC(v0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "V0" %} ++ interface(REG_INTER); ++%} ++ ++operand mV1RegI() %{ ++ constraint(ALLOC_IN_RC(v1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "V1" %} ++ interface(REG_INTER); ++%} ++ ++operand mRegN() %{ ++ constraint(ALLOC_IN_RC(int_reg)); ++ match(RegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0_RegN() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1_RegN() %{ ++ constraint(ALLOC_IN_RC(t1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3_RegN() %{ ++ constraint(ALLOC_IN_RC(t3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8_RegN() %{ ++ constraint(ALLOC_IN_RC(t8_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t9_RegN() %{ ++ constraint(ALLOC_IN_RC(t9_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0_RegN() %{ ++ constraint(ALLOC_IN_RC(a0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a1_RegN() %{ ++ constraint(ALLOC_IN_RC(a1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2_RegN() %{ ++ constraint(ALLOC_IN_RC(a2_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3_RegN() %{ ++ constraint(ALLOC_IN_RC(a3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4_RegN() %{ ++ constraint(ALLOC_IN_RC(a4_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a5_RegN() %{ ++ constraint(ALLOC_IN_RC(a5_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6_RegN() %{ ++ constraint(ALLOC_IN_RC(a6_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7_RegN() %{ ++ constraint(ALLOC_IN_RC(a7_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s0_RegN() %{ ++ constraint(ALLOC_IN_RC(s0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1_RegN() %{ ++ constraint(ALLOC_IN_RC(s1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s2_RegN() %{ ++ constraint(ALLOC_IN_RC(s2_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3_RegN() %{ ++ constraint(ALLOC_IN_RC(s3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4_RegN() %{ ++ constraint(ALLOC_IN_RC(s4_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s5_RegN() %{ ++ constraint(ALLOC_IN_RC(s5_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s6_RegN() %{ ++ constraint(ALLOC_IN_RC(s6_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7_RegN() %{ ++ constraint(ALLOC_IN_RC(s7_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0_RegN() %{ ++ constraint(ALLOC_IN_RC(v0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1_RegN() %{ ++ constraint(ALLOC_IN_RC(v1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Pointer Register ++operand mRegP() %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(RegP); ++ match(a0_RegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_T8_mRegP() %{ ++ constraint(ALLOC_IN_RC(no_T8_p_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s4_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s5_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s5_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s6_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s6_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s7_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t2_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t2_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t8_long_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t9_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t9_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a2_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a4_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++ ++operand a5_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a5_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a6_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a7_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(v0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(v1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++/* ++operand mSPRegP(mRegP reg) %{ ++ constraint(ALLOC_IN_RC(sp_reg)); ++ match(reg); ++ ++ format %{ "SP" %} ++ interface(REG_INTER); ++%} ++ ++operand mFPRegP(mRegP reg) %{ ++ constraint(ALLOC_IN_RC(fp_reg)); ++ match(reg); ++ ++ format %{ "FP" %} ++ interface(REG_INTER); ++%} ++*/ ++ ++operand mRegL() %{ ++ constraint(ALLOC_IN_RC(long_reg)); ++ match(RegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0RegL() %{ ++ constraint(ALLOC_IN_RC(v0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1RegL() %{ ++ constraint(ALLOC_IN_RC(v1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0RegL() %{ ++ constraint(ALLOC_IN_RC(a0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ "A0" %} ++ interface(REG_INTER); ++%} ++ ++operand a1RegL() %{ ++ constraint(ALLOC_IN_RC(a1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2RegL() %{ ++ constraint(ALLOC_IN_RC(a2_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3RegL() %{ ++ constraint(ALLOC_IN_RC(a3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0RegL() %{ ++ constraint(ALLOC_IN_RC(t0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1RegL() %{ ++ constraint(ALLOC_IN_RC(t1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3RegL() %{ ++ constraint(ALLOC_IN_RC(t3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8RegL() %{ ++ constraint(ALLOC_IN_RC(t8_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4RegL() %{ ++ constraint(ALLOC_IN_RC(a4_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a5RegL() %{ ++ constraint(ALLOC_IN_RC(a5_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6RegL() %{ ++ constraint(ALLOC_IN_RC(a6_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7RegL() %{ ++ constraint(ALLOC_IN_RC(a7_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s0RegL() %{ ++ constraint(ALLOC_IN_RC(s0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1RegL() %{ ++ constraint(ALLOC_IN_RC(s1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3RegL() %{ ++ constraint(ALLOC_IN_RC(s3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4RegL() %{ ++ constraint(ALLOC_IN_RC(s4_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7RegL() %{ ++ constraint(ALLOC_IN_RC(s7_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Floating register operands ++operand regF() %{ ++ constraint(ALLOC_IN_RC(flt_reg)); ++ match(RegF); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//Double Precision Floating register operands ++operand regD() %{ ++ constraint(ALLOC_IN_RC(dbl_reg)); ++ match(RegD); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//----------Memory Operands---------------------------------------------------- ++// Indirect Memory Operand ++operand indirect(mRegP reg) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(reg); ++ ++ format %{ "[$reg] @ indirect" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Plus Short Offset Operand ++operand indOffset8(mRegP reg, immL8 off) ++%{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP reg off); ++ ++ op_cost(10); ++ format %{ "[$reg + $off (8-bit)] @ indOffset8" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// Indirect Memory Times Scale Plus Index Register ++operand indIndexScale(mRegP reg, mRegL lreg, immI_0_3 scale) ++%{ ++ predicate(UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP reg (LShiftL lreg scale)); ++ ++ op_cost(10); ++ format %{"[$reg + $lreg << $scale] @ indIndexScale" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale($scale); ++ disp(0x0); ++ %} ++%} ++ ++ ++// [base + index + offset] ++operand baseIndexOffset8(mRegP base, mRegL index, immL8 off) ++%{ ++ predicate(UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(5); ++ match(AddP (AddP base index) off); ++ ++ format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8" %} ++ interface(MEMORY_INTER) %{ ++ base($base); ++ index($index); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// [base + index + offset] ++operand baseIndexOffset8_convI2L(mRegP base, mRegI index, immL8 off) ++%{ ++ predicate(UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(5); ++ match(AddP (AddP base (ConvI2L index)) off); ++ ++ format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8_convI2L" %} ++ interface(MEMORY_INTER) %{ ++ base($base); ++ index($index); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// [base + index<in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0); ++ op_cost(10); ++ match(AddP (AddP base (LShiftL (ConvI2L index) scale)) off); ++ ++ format %{ "[$base + $index << $scale + $off (8-bit)] @ basePosIndexScaleOffset8" %} ++ interface(MEMORY_INTER) %{ ++ base($base); ++ index($index); ++ scale($scale); ++ disp($off); ++ %} ++%} ++ ++//FIXME: I think it's better to limit the immI to be 16-bit at most! ++// Indirect Memory Plus Long Offset Operand ++operand indOffset32(mRegP reg, immL32 off) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(20); ++ match(AddP reg off); ++ ++ format %{ "[$reg + $off (32-bit)] @ indOffset32" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// Indirect Memory Plus Index Register ++operand indIndex(mRegP addr, mRegL index) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP addr index); ++ ++ op_cost(20); ++ format %{"[$addr + $index] @ indIndex" %} ++ interface(MEMORY_INTER) %{ ++ base($addr); ++ index($index); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++operand indirectNarrowKlass(mRegN reg) ++%{ ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(DecodeNKlass reg); ++ ++ format %{ "[$reg] @ indirectNarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++operand indOffset8NarrowKlass(mRegN reg, immL8 off) ++%{ ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(AddP (DecodeNKlass reg) off); ++ ++ format %{ "[$reg + $off (8-bit)] @ indOffset8NarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indOffset32NarrowKlass(mRegN reg, immL32 off) ++%{ ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(AddP (DecodeNKlass reg) off); ++ ++ format %{ "[$reg + $off (32-bit)] @ indOffset32NarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indIndexOffsetNarrowKlass(mRegN reg, mRegL lreg, immL32 off) ++%{ ++ predicate(UseLEXT1); ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP (AddP (DecodeNKlass reg) lreg) off); ++ ++ op_cost(10); ++ format %{"[$reg + $off + $lreg] @ indIndexOffsetNarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indIndexNarrowKlass(mRegN reg, mRegL lreg) ++%{ ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP (DecodeNKlass reg) lreg); ++ ++ op_cost(10); ++ format %{"[$reg + $lreg] @ indIndexNarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Operand ++operand indirectNarrow(mRegN reg) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(DecodeN reg); ++ ++ format %{ "[$reg] @ indirectNarrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Plus Short Offset Operand ++operand indOffset8Narrow(mRegN reg, immL8 off) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(AddP (DecodeN reg) off); ++ ++ format %{ "[$reg + $off (8-bit)] @ indOffset8Narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// Indirect Memory Plus Index Register Plus Offset Operand ++operand indIndexOffset8Narrow(mRegN reg, mRegL lreg, immL8 off) ++%{ ++ predicate((Universe::narrow_oop_shift() == 0) && UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP (AddP (DecodeN reg) lreg) off); ++ ++ op_cost(10); ++ format %{"[$reg + $off + $lreg] @ indIndexOffset8Narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++//----------Conditional Branch Operands---------------------------------------- ++// Comparison Op - This is the operation of the comparison, and is limited to ++// the following set of codes: ++// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) ++// ++// Other attributes of the comparison, such as unsignedness, are specified ++// by the comparison instruction that sets a condition code flags register. ++// That result is represented by a flags operand whose subtype is appropriate ++// to the unsignedness (etc.) of the comparison. ++// ++// Later, the instruction which matches both the Comparison Op (a Bool) and ++// the flags (produced by the Cmp) specifies the coding of the comparison op ++// by matching a specific subtype of Bool operand below, such as cmpOpU. ++ ++// Comparision Code ++operand cmpOp() %{ ++ match(Bool); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x01); ++ not_equal(0x02); ++ greater(0x03); ++ greater_equal(0x04); ++ less(0x05); ++ less_equal(0x06); ++ overflow(0x7); ++ no_overflow(0x8); ++ %} ++%} ++ ++ ++// Comparision Code ++// Comparison Code, unsigned compare. Used by FP also, with ++// C2 (unordered) turned into GT or LT already. The other bits ++// C0 and C3 are turned into Carry & Zero flags. ++operand cmpOpU() %{ ++ match(Bool); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x01); ++ not_equal(0x02); ++ greater(0x03); ++ greater_equal(0x04); ++ less(0x05); ++ less_equal(0x06); ++ overflow(0x7); ++ no_overflow(0x8); ++ %} ++%} ++ ++ ++//----------Special Memory Operands-------------------------------------------- ++// Stack Slot Operand - This operand is used for loading and storing temporary ++// values on the stack where a match requires a value to ++// flow through memory. ++operand stackSlotP(sRegP reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotI(sRegI reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotF(sRegF reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotD(sRegD reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotL(sRegL reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++ ++//------------------------OPERAND CLASSES-------------------------------------- ++//opclass memory( direct, indirect, indOffset16, indOffset32, indOffset32X, indIndexOffset ); ++opclass memory( indirect, indirectNarrow, indOffset8, indOffset32, indIndex, indIndexScale, baseIndexOffset8, baseIndexOffset8_convI2L, indOffset8Narrow, indIndexOffset8Narrow); ++ ++ ++//----------PIPELINE----------------------------------------------------------- ++// Rules which define the behavior of the target architectures pipeline. ++ ++pipeline %{ ++ ++ //----------ATTRIBUTES--------------------------------------------------------- ++ attributes %{ ++ fixed_size_instructions; // Fixed size instructions ++ branch_has_delay_slot; // branch have delay slot in gs2 ++ max_instructions_per_bundle = 1; // 1 instruction per bundle ++ max_bundles_per_cycle = 4; // Up to 4 bundles per cycle ++ bundle_unit_size=4; ++ instruction_unit_size = 4; // An instruction is 4 bytes long ++ instruction_fetch_unit_size = 16; // The processor fetches one line ++ instruction_fetch_units = 1; // of 16 bytes ++ ++ // List of nop instructions ++ nops( MachNop ); ++ %} ++ ++ //----------RESOURCES---------------------------------------------------------- ++ // Resources are the functional units available to the machine ++ ++ resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4, ALU1, ALU2, ALU = ALU1 | ALU2, FPU1, FPU2, FPU = FPU1 | FPU2, MEM, BR); ++ ++ //----------PIPELINE DESCRIPTION----------------------------------------------- ++ // Pipeline Description specifies the stages in the machine's pipeline ++ ++ // IF: fetch ++ // ID: decode ++ // RD: read ++ // CA: caculate ++ // WB: write back ++ // CM: commit ++ ++ pipe_desc(IF, ID, RD, CA, WB, CM); ++ ++ ++ //----------PIPELINE CLASSES--------------------------------------------------- ++ // Pipeline Classes describe the stages in which input and output are ++ // referenced by the hardware pipeline. ++ ++ //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{ ++ single_instruction; ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+1; ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.19 Integer mult operation : dst <-- reg1 mult reg2 ++ pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+5; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.19 Integer div operation : dst <-- reg1 div reg2 ++ pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.19 Integer mod operation : dst <-- reg1 mod reg2 ++ pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{ ++ instruction_count(2); ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{ ++ instruction_count(2); ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16 ++ pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{ ++ instruction_count(2); ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //no.16 load Long from memory : ++ pipe_class ialu_loadL(mRegL dst, memory mem) %{ ++ instruction_count(2); ++ mem : RD(read); ++ dst : WB(write)+5; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.17 Store Long to Memory : ++ pipe_class ialu_storeL(mRegL src, memory mem) %{ ++ instruction_count(2); ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16 ++ pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{ ++ single_instruction; ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.3 Integer move operation : dst <-- reg ++ pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.4 No instructions : do nothing ++ pipe_class empty( ) %{ ++ instruction_count(0); ++ %} ++ ++ //No.5 UnConditional branch : ++ pipe_class pipe_jump( label labl ) %{ ++ multiple_bundles; ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++ //No.6 ALU Conditional branch : ++ pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++ //no.7 load integer from memory : ++ pipe_class ialu_loadI(mRegI dst, memory mem) %{ ++ mem : RD(read); ++ dst : WB(write)+3; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.8 Store Integer to Memory : ++ pipe_class ialu_storeI(mRegI src, memory mem) %{ ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ ++ //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU : CA; ++ %} ++ ++ //No.22 Floating div operation : dst <-- reg1 div reg2 ++ pipe_class fpu_div(regF dst, regF src1, regF src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU2 : CA; ++ %} ++ ++ pipe_class fcvt_I2D(regD dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU1 : CA; ++ %} ++ ++ pipe_class fcvt_D2I(mRegI dst, regD src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU1 : CA; ++ %} ++ ++ pipe_class pipe_mfc1(mRegI dst, regD src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ pipe_class pipe_mtc1(regD dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ MEM : RD(5); ++ %} ++ ++ //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2 ++ pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU2 : CA; ++ %} ++ ++ //No.11 Load Floating from Memory : ++ pipe_class fpu_loadF(regF dst, memory mem) %{ ++ instruction_count(1); ++ mem : RD(read); ++ dst : WB(write)+3; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.12 Store Floating to Memory : ++ pipe_class fpu_storeF(regF src, memory mem) %{ ++ instruction_count(1); ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.13 FPU Conditional branch : ++ pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++//No.14 Floating FPU reg operation : dst <-- op reg ++ pipe_class fpu1_regF(regF dst, regF src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU : CA; ++ %} ++ ++ pipe_class long_memory_op() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(30); ++ %} ++ ++ pipe_class simple_call() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(200); ++ BR : RD; ++ %} ++ ++ pipe_class call() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(200); ++ %} ++ ++ //FIXME: ++ //No.9 Piple slow : for multi-instructions ++ pipe_class pipe_slow( ) %{ ++ instruction_count(20); ++ force_serialization; ++ multiple_bundles; ++ fixed_latency(50); ++ %} ++ ++%} ++ ++ ++ ++//----------INSTRUCTIONS------------------------------------------------------- ++// ++// match -- States which machine-independent subtree may be replaced ++// by this instruction. ++// ins_cost -- The estimated cost of this instruction is used by instruction ++// selection to identify a minimum cost tree of machine ++// instructions that matches a tree of machine-independent ++// instructions. ++// format -- A string providing the disassembly for this instruction. ++// The value of an instruction's operand may be inserted ++// by referring to it with a '$' prefix. ++// opcode -- Three instruction opcodes may be provided. These are referred ++// to within an encode class as $primary, $secondary, and $tertiary ++// respectively. The primary opcode is commonly used to ++// indicate the type of machine instruction, while secondary ++// and tertiary are often used for prefix options or addressing ++// modes. ++// ins_encode -- A list of encode classes with parameters. The encode class ++// name must have been defined in an 'enc_class' specification ++// in the encode section of the architecture description. ++ ++ ++// Load Integer ++instruct loadI(mRegI dst, memory mem) %{ ++ match(Set dst (LoadI mem)); ++ ++ ins_cost(125); ++ format %{ "lw $dst, $mem #@loadI" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadI_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadI mem))); ++ ++ ins_cost(125); ++ format %{ "lw $dst, $mem #@loadI_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Integer (32 bit signed) to Byte (8 bit signed) ++instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{ ++ match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem\t# int -> byte #@loadI2B" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) ++instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI (LoadI mem) mask)); ++ ++ ins_cost(125); ++ format %{ "lbu $dst, $mem\t# int -> ubyte #@loadI2UB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Short (16 bit signed) ++instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{ ++ match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); ++ ++ ins_cost(125); ++ format %{ "lh $dst, $mem\t# int -> short #@loadI2S" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) ++instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{ ++ match(Set dst (AndI (LoadI mem) mask)); ++ ++ ins_cost(125); ++ format %{ "lhu $dst, $mem\t# int -> ushort/char #@loadI2US" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Long. ++instruct loadL(mRegL dst, memory mem) %{ ++// predicate(!((LoadLNode*)n)->require_atomic_access()); ++ match(Set dst (LoadL mem)); ++ ++ ins_cost(250); ++ format %{ "ld $dst, $mem #@loadL" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadL ); ++%} ++ ++// Load Long - UNaligned ++instruct loadL_unaligned(mRegL dst, memory mem) %{ ++ match(Set dst (LoadL_unaligned mem)); ++ ++ // FIXME: Need more effective ldl/ldr ++ ins_cost(450); ++ format %{ "ld $dst, $mem #@loadL_unaligned\n\t" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadL ); ++%} ++ ++// Store Long ++instruct storeL_reg(memory mem, mRegL src) %{ ++ match(Set mem (StoreL mem src)); ++ ++ ins_cost(200); ++ format %{ "sd $mem, $src #@storeL_reg\n" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++instruct storeL_immL_0(memory mem, immL_0 zero) %{ ++ match(Set mem (StoreL mem zero)); ++ ++ ins_cost(180); ++ format %{ "sd zero, $mem #@storeL_immL_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++// Load Compressed Pointer ++instruct loadN(mRegN dst, memory mem) ++%{ ++ match(Set dst (LoadN mem)); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# compressed ptr @ loadN" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++instruct loadN2P(mRegP dst, memory mem) ++%{ ++ match(Set dst (DecodeN (LoadN mem))); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# @ loadN2P" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++// Load Pointer ++instruct loadP(mRegP dst, memory mem) %{ ++ match(Set dst (LoadP mem)); ++ ++ ins_cost(125); ++ format %{ "ld $dst, $mem #@loadP" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Klass Pointer ++instruct loadKlass(mRegP dst, memory mem) %{ ++ match(Set dst (LoadKlass mem)); ++ ++ ins_cost(125); ++ format %{ "MOV $dst,$mem @ loadKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load narrow Klass Pointer ++instruct loadNKlass(mRegN dst, memory mem) ++%{ ++ match(Set dst (LoadNKlass mem)); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# compressed klass ptr @ loadNKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++instruct loadN2PKlass(mRegP dst, memory mem) ++%{ ++ match(Set dst (DecodeNKlass (LoadNKlass mem))); ++ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++// Load Constant ++instruct loadConI(mRegI dst, immI src) %{ ++ match(Set dst src); ++ ++ ins_cost(150); ++ format %{ "mov $dst, $src #@loadConI" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ int value = $src$$constant; ++ __ move(dst, value); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct loadConL_set64(mRegL dst, immL src) %{ ++ match(Set dst src); ++ ins_cost(120); ++ format %{ "li $dst, $src @ loadConL_set64" %} ++ ins_encode %{ ++ __ set64($dst$$Register, $src$$constant); ++ %} ++ ins_pipe(ialu_regL_regL); ++%} ++ ++instruct loadConL16(mRegL dst, immL16 src) %{ ++ match(Set dst src); ++ ins_cost(105); ++ format %{ "mov $dst, $src #@loadConL16" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ int value = $src$$constant; ++ __ daddiu(dst_reg, R0, value); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++ ++instruct loadConL_immL_0(mRegL dst, immL_0 src) %{ ++ match(Set dst src); ++ ins_cost(100); ++ format %{ "mov $dst, zero #@loadConL_immL_0" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ __ daddu(dst_reg, R0, R0); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Load Range ++instruct loadRange(mRegI dst, memory mem) %{ ++ match(Set dst (LoadRange mem)); ++ ++ ins_cost(125); ++ format %{ "MOV $dst,$mem @ loadRange" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct storeP(memory mem, mRegP src ) %{ ++ match(Set mem (StoreP mem src)); ++ ++ ins_cost(125); ++ format %{ "sd $src, $mem #@storeP" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store NULL Pointer, mark word, or other simple pointer constant. ++instruct storeImmP_immP_0(memory mem, immP_0 zero) %{ ++ match(Set mem (StoreP mem zero)); ++ ++ ins_cost(125); ++ format %{ "mov $mem, $zero #@storeImmP_immP_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Compressed Pointer ++instruct storeN(memory mem, mRegN src) ++%{ ++ match(Set mem (StoreN mem src)); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# compressed ptr @ storeN" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2N(memory mem, mRegP src) ++%{ ++ match(Set mem (StoreN mem (EncodeP src))); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# @ storeP2N" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeNKlass(memory mem, mRegN src) ++%{ ++ match(Set mem (StoreNKlass mem src)); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# compressed klass ptr @ storeNKlass" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2NKlass(memory mem, mRegP src) ++%{ ++ match(Set mem (StoreNKlass mem (EncodePKlass src))); ++ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# @ storeP2NKlass" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeImmN_immN_0(memory mem, immN_0 zero) ++%{ ++ match(Set mem (StoreN mem zero)); ++ ++ ins_cost(125); // XXX ++ format %{ "storeN0 zero, $mem\t# compressed ptr" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Byte ++instruct storeB_immB_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreB mem zero)); ++ ++ format %{ "mov $mem, zero #@storeB_immB_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeB(memory mem, mRegI src) %{ ++ match(Set mem (StoreB mem src)); ++ ++ ins_cost(125); ++ format %{ "sb $src, $mem #@storeB" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeB_convL2I(memory mem, mRegL src) %{ ++ match(Set mem (StoreB mem (ConvL2I src))); ++ ++ ins_cost(125); ++ format %{ "sb $src, $mem #@storeB_convL2I" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Byte (8bit signed) ++instruct loadB(mRegI dst, memory mem) %{ ++ match(Set dst (LoadB mem)); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem #@loadB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadB_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadB mem))); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem #@loadB_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Byte (8bit UNsigned) ++instruct loadUB(mRegI dst, memory mem) %{ ++ match(Set dst (LoadUB mem)); ++ ++ ins_cost(125); ++ format %{ "lbu $dst, $mem #@loadUB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadUB_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadUB mem))); ++ ++ ins_cost(125); ++ format %{ "lbu $dst, $mem #@loadUB_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Short (16bit signed) ++instruct loadS(mRegI dst, memory mem) %{ ++ match(Set dst (LoadS mem)); ++ ++ ins_cost(125); ++ format %{ "lh $dst, $mem #@loadS" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Short (16 bit signed) to Byte (8 bit signed) ++instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{ ++ match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem\t# short -> byte #@loadS2B" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct loadS_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadS mem))); ++ ++ ins_cost(125); ++ format %{ "lh $dst, $mem #@loadS_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Store Integer Immediate ++instruct storeI_immI_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreI mem zero)); ++ ++ format %{ "mov $mem, zero #@storeI_immI_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Integer ++instruct storeI(memory mem, mRegI src) %{ ++ match(Set mem (StoreI mem src)); ++ ++ ins_cost(125); ++ format %{ "sw $mem, $src #@storeI" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeI_convL2I(memory mem, mRegL src) %{ ++ match(Set mem (StoreI mem (ConvL2I src))); ++ ++ ins_cost(125); ++ format %{ "sw $mem, $src #@storeI_convL2I" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Float ++instruct loadF(regF dst, memory mem) %{ ++ match(Set dst (LoadF mem)); ++ ++ ins_cost(150); ++ format %{ "loadF $dst, $mem #@loadF" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_FLOAT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadConP_general(mRegP dst, immP src) %{ ++ match(Set dst src); ++ ++ ins_cost(120); ++ format %{ "li $dst, $src #@loadConP_general" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ long* value = (long*)$src$$constant; ++ ++ if($src->constant_reloc() == relocInfo::metadata_type){ ++ int klass_index = __ oop_recorder()->find_index((Klass*)value); ++ RelocationHolder rspec = metadata_Relocation::spec(klass_index); ++ ++ __ relocate(rspec); ++ __ patchable_set48(dst, (long)value); ++ } else if($src->constant_reloc() == relocInfo::oop_type){ ++ int oop_index = __ oop_recorder()->find_index((jobject)value); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ ++ __ relocate(rspec); ++ __ patchable_set48(dst, (long)value); ++ } else if ($src->constant_reloc() == relocInfo::none) { ++ __ set64(dst, (long)value); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{ ++ match(Set dst src); ++ ++ ins_cost(80); ++ format %{ "li $dst, $src @ loadConP_no_oop_cheap" %} ++ ++ ins_encode %{ ++ __ set64($dst$$Register, $src$$constant); ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++ ++instruct loadConP_poll(mRegP dst, immP_poll src) %{ ++ match(Set dst src); ++ ++ ins_cost(50); ++ format %{ "li $dst, $src #@loadConP_poll" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ intptr_t value = (intptr_t)$src$$constant; ++ ++ __ set64(dst, (jlong)value); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConP_immP_0(mRegP dst, immP_0 src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(50); ++ format %{ "mov $dst, R0\t# ptr" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ __ daddu(dst_reg, R0, R0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConN_immN_0(mRegN dst, immN_0 src) %{ ++ match(Set dst src); ++ format %{ "move $dst, R0\t# compressed NULL ptr" %} ++ ins_encode %{ ++ __ move($dst$$Register, R0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConN(mRegN dst, immN src) %{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "li $dst, $src\t# compressed ptr @ loadConN" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ __ set_narrow_oop(dst, (jobject)$src$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); // XXX ++%} ++ ++instruct loadConNKlass(mRegN dst, immNKlass src) %{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "li $dst, $src\t# compressed klass ptr @ loadConNKlass" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ __ set_narrow_klass(dst, (Klass*)$src$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); // XXX ++%} ++ ++//FIXME ++// Tail Call; Jump from runtime stub to Java code. ++// Also known as an 'interprocedural jump'. ++// Target of jump will eventually return to caller. ++// TailJump below removes the return address. ++instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{ ++ match(TailCall jump_target method_oop ); ++ ins_cost(300); ++ format %{ "JMP $jump_target \t# @TailCalljmpInd" %} ++ ++ ins_encode %{ ++ Register target = $jump_target$$Register; ++ Register oop = $method_oop$$Register; ++ ++ // RA will be used in generate_forward_exception() ++ __ push(RA); ++ ++ __ move(S3, oop); ++ __ jr(target); ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++%} ++ ++// Create exception oop: created by stack-crawling runtime code. ++// Created exception is now available to this handler, and is setup ++// just prior to jumping to this handler. No code emitted. ++instruct CreateException( a0_RegP ex_oop ) ++%{ ++ match(Set ex_oop (CreateEx)); ++ ++ // use the following format syntax ++ format %{ "# exception oop is in A0; no code emitted @CreateException" %} ++ ins_encode %{ ++ // X86 leaves this function empty ++ __ block_comment("CreateException is empty in MIPS"); ++ %} ++ ins_pipe( empty ); ++// ins_pipe( pipe_jump ); ++%} ++ ++ ++/* The mechanism of exception handling is clear now. ++ ++- Common try/catch: ++ [stubGenerator_mips.cpp] generate_forward_exception() ++ |- V0, V1 are created ++ |- T9 <= SharedRuntime::exception_handler_for_return_address ++ `- jr T9 ++ `- the caller's exception_handler ++ `- jr OptoRuntime::exception_blob ++ `- here ++- Rethrow(e.g. 'unwind'): ++ * The callee: ++ |- an exception is triggered during execution ++ `- exits the callee method through RethrowException node ++ |- The callee pushes exception_oop(T0) and exception_pc(RA) ++ `- The callee jumps to OptoRuntime::rethrow_stub() ++ * In OptoRuntime::rethrow_stub: ++ |- The VM calls _rethrow_Java to determine the return address in the caller method ++ `- exits the stub with tailjmpInd ++ |- pops exception_oop(V0) and exception_pc(V1) ++ `- jumps to the return address(usually an exception_handler) ++ * The caller: ++ `- continues processing the exception_blob with V0/V1 ++*/ ++ ++// Rethrow exception: ++// The exception oop will come in the first argument position. ++// Then JUMP (not call) to the rethrow stub code. ++instruct RethrowException() ++%{ ++ match(Rethrow); ++ ++ // use the following format syntax ++ format %{ "JMP rethrow_stub #@RethrowException" %} ++ ins_encode %{ ++ __ block_comment("@ RethrowException"); ++ ++ cbuf.set_insts_mark(); ++ cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec()); ++ ++ // call OptoRuntime::rethrow_stub to get the exception handler in parent method ++ __ patchable_jump((address)OptoRuntime::rethrow_stub()); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++// ============================================================================ ++// Branch Instructions --- long offset versions ++ ++// Jump Direct ++instruct jmpDir_long(label labl) %{ ++ match(Goto); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "JMP $labl #@jmpDir_long" %} ++ ++ ins_encode %{ ++ Label* L = $labl$$label; ++ __ jmp_far(*L); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ //ins_pc_relative(1); ++%} ++ ++// Jump Direct Conditional - Label defines a relative address from Jcc+1 ++instruct jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_long" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cop$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++instruct jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = AT; ++ Label* L = $labl$$label; ++ int flag = $cop$$cmpcode; ++ ++ __ move(op2, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++ ++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! ++instruct jmpCon_flags_long(cmpOp cop, FlagsReg cr, label labl) %{ ++ match(If cop cr); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $labl #mips uses T0 as equivalent to eflag @jmpCon_flags_long" %} ++ ++ ins_encode %{ ++ Label* L = $labl$$label; ++ switch($cop$$cmpcode) { ++ case 0x01: //equal ++ __ bne_long($cr$$Register, R0, *L); ++ break; ++ case 0x02: //not equal ++ __ beq_long($cr$$Register, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++// Conditional jumps ++instruct branchConP_zero_long(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConP_zero_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConN2P_zero_long(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConN2P_zero_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) ++ { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConP_long(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{ ++ match(If cmp (CmpP op1 op2)); ++// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); ++ effect(USE labl); ++ ++ ins_cost(200); ++ format %{ "b$cmp $op1, $op2, $labl #@branchConP_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct cmpN_null_branch_long(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ ++ match(If cmp (CmpN op1 null)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,0\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_null_branch_long" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++//TODO: pipe_branchP or create pipe_branchN LEE ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ ++ match(If cmp (CmpN op1 op2)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,$op2\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_reg_branch_long" %} ++ ins_encode %{ ++ Register op1_reg = $op1$$Register; ++ Register op2_reg = $op2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1_reg, op2_reg, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1_reg, op2_reg, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2_reg, op1_reg); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1_reg, op2_reg); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1_reg, op2_reg); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2_reg, op1_reg); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConIU_reg_reg_long(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConIU_reg_imm_long(cmpOpU cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, AT, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, AT, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, AT, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, AT); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, AT); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, AT, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_immI_0_long(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(170); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_immI_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, R0, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, R0, *L); ++ break; ++ case 0x03: //greater ++ __ slt(AT, R0, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //greater_equal ++ __ slt(AT, op1, R0); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //less ++ __ slt(AT, op1, R0); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //less_equal ++ __ slt(AT, R0, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(200); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, AT, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, AT, *L); ++ break; ++ case 0x03: //greater ++ __ slt(AT, AT, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //greater_equal ++ __ slt(AT, op1, AT); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //less ++ __ slt(AT, op1, AT); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //less_equal ++ __ slt(AT, AT, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConIU_reg_immI_0_long(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{ ++ match( If cmp (CmpU src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConIU_reg_immI_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, R0, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, R0, *L); ++ break; ++ case 0x03: //above ++ __ bne_long(R0, op1, *L); ++ break; ++ case 0x04: //above_equal ++ __ beq_long(R0, R0, *L); ++ break; ++ case 0x05: //below ++ return; ++ break; ++ case 0x06: //below_equal ++ __ beq_long(op1, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConIU_reg_immI16_long(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ ins_cost(180); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_immI16_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ move(AT, val); ++ __ beq_long(op1, AT, *L); ++ break; ++ case 0x02: //not_equal ++ __ move(AT, val); ++ __ bne_long(op1, AT, *L); ++ break; ++ case 0x03: //above ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltiu(AT, op1, val); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltiu(AT, op1, val); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_long" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_long" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: // not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++instruct branchConL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match( If cmp (CmpL src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConL_regL_immL_0_long" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = R0; ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match(If cmp (CmpUL src1 zero)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConUL_regL_immL_0_long" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = R0; ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ case 0x04: // greater_equal ++ case 0x06: // less_equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: // not_equal ++ case 0x03: // greater ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x05: // less ++ __ beq_long(R0, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_long" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_long" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: // not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++//FIXME ++instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{ ++ match( If cmp (CmpF src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_long" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_s(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x02: // not_equal ++ __ c_eq_s(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x03: // greater ++ __ c_ule_s(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_s(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x05: // less ++ __ c_ult_s(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_s(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_slow); ++%} ++ ++instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{ ++ match( If cmp (CmpD src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_long" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_d(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x02: // not_equal ++ // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. ++ __ c_eq_d(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x03: // greater ++ __ c_ule_d(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_d(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x05: // less ++ __ c_ult_d(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_d(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_slow); ++%} ++ ++ ++// ============================================================================ ++// Branch Instructions -- short offset versions ++ ++// Jump Direct ++instruct jmpDir_short(label labl) %{ ++ match(Goto); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "JMP $labl #@jmpDir_short" %} ++ ++ ins_encode %{ ++ Label &L = *($labl$$label); ++ if(&L) ++ __ b(L); ++ else ++ __ b(int(0)); ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++// Jump Direct Conditional - Label defines a relative address from Jcc+1 ++instruct jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_short" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cop$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++instruct jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = AT; ++ Label &L = *($labl$$label); ++ int flag = $cop$$cmpcode; ++ ++ __ move(op2, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++ ++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! ++instruct jmpCon_flags_short(cmpOp cop, FlagsReg cr, label labl) %{ ++ match(If cop cr); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $labl #mips uses T0 as equivalent to eflag @jmpCon_flags_short" %} ++ ++ ins_encode %{ ++ Label &L = *($labl$$label); ++ switch($cop$$cmpcode) { ++ case 0x01: //equal ++ if (&L) ++ __ bne($cr$$Register, R0, L); ++ else ++ __ bne($cr$$Register, R0, (int)0); ++ break; ++ case 0x02: //not equal ++ if (&L) ++ __ beq($cr$$Register, R0, L); ++ else ++ __ beq($cr$$Register, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++// Conditional jumps ++instruct branchConP_zero_short(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConP_zero_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConN2P_zero_short(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConN2P_zero_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) ++ { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConP_short(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{ ++ match(If cmp (CmpP op1 op2)); ++// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); ++ effect(USE labl); ++ ++ ins_cost(200); ++ format %{ "b$cmp $op1, $op2, $labl #@branchConP_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ ++ match(If cmp (CmpN op1 null)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,0\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_null_branch_short" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++//TODO: pipe_branchP or create pipe_branchN LEE ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ ++ match(If cmp (CmpN op1 op2)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,$op2\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_reg_branch_short" %} ++ ins_encode %{ ++ Register op1_reg = $op1$$Register; ++ Register op2_reg = $op2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1_reg, op2_reg, L); ++ else ++ __ beq(op1_reg, op2_reg, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1_reg, op2_reg, L); ++ else ++ __ bne(op1_reg, op2_reg, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2_reg, op1_reg); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1_reg, op2_reg); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1_reg, op2_reg); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2_reg, op1_reg); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConIU_reg_reg_short(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConIU_reg_imm_short(cmpOpU cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, AT, L); ++ else ++ __ beq(op1, AT, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, AT, L); ++ else ++ __ bne(op1, AT, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, AT); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, AT); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConI_reg_immI_0_short(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(170); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_immI_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, R0, L); ++ else ++ __ beq(op1, R0, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, R0, L); ++ else ++ __ bne(op1, R0, (int)0); ++ break; ++ case 0x03: //greater ++ if(&L) ++ __ bgtz(op1, L); ++ else ++ __ bgtz(op1, (int)0); ++ break; ++ case 0x04: //greater_equal ++ if(&L) ++ __ bgez(op1, L); ++ else ++ __ bgez(op1, (int)0); ++ break; ++ case 0x05: //less ++ if(&L) ++ __ bltz(op1, L); ++ else ++ __ bltz(op1, (int)0); ++ break; ++ case 0x06: //less_equal ++ if(&L) ++ __ blez(op1, L); ++ else ++ __ blez(op1, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(200); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, AT, L); ++ else ++ __ beq(op1, AT, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, AT, L); ++ else ++ __ bne(op1, AT, (int)0); ++ break; ++ case 0x03: //greater ++ __ slt(AT, AT, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //greater_equal ++ __ slt(AT, op1, AT); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //less ++ __ slt(AT, op1, AT); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //less_equal ++ __ slt(AT, AT, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConIU_reg_immI_0_short(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{ ++ match( If cmp (CmpU src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConIU_reg_immI_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, R0, L); ++ else ++ __ beq(op1, R0, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, R0, L); ++ else ++ __ bne(op1, R0, (int)0); ++ break; ++ case 0x03: //above ++ if(&L) ++ __ bne(R0, op1, L); ++ else ++ __ bne(R0, op1, (int)0); ++ break; ++ case 0x04: //above_equal ++ if(&L) ++ __ beq(R0, R0, L); ++ else ++ __ beq(R0, R0, (int)0); ++ break; ++ case 0x05: //below ++ return; ++ break; ++ case 0x06: //below_equal ++ if(&L) ++ __ beq(op1, R0, L); ++ else ++ __ beq(op1, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConIU_reg_immI16_short(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ ins_cost(180); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_immI16_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ move(AT, val); ++ if (&L) ++ __ beq(op1, AT, L); ++ else ++ __ beq(op1, AT, (int)0); ++ break; ++ case 0x02: //not_equal ++ __ move(AT, val); ++ if (&L) ++ __ bne(op1, AT, L); ++ else ++ __ bne(op1, AT, (int)0); ++ break; ++ case 0x03: //above ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltiu(AT, op1, val); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltiu(AT, op1, val); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_short" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x02: //not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match( If cmp (CmpUL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_short" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x02: // not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match( If cmp (CmpL src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConL_regL_immL_0_short" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beq(opr1_reg, R0, target); ++ else ++ __ beq(opr1_reg, R0, int(0)); ++ break; ++ ++ case 0x02: //not_equal ++ if(&target) ++ __ bne(opr1_reg, R0, target); ++ else ++ __ bne(opr1_reg, R0, (int)0); ++ break; ++ ++ case 0x03: //greater ++ if(&target) ++ __ bgtz(opr1_reg, target); ++ else ++ __ bgtz(opr1_reg, (int)0); ++ break; ++ ++ case 0x04: //greater_equal ++ if(&target) ++ __ bgez(opr1_reg, target); ++ else ++ __ bgez(opr1_reg, (int)0); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, R0); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x06: //less_equal ++ if (&target) ++ __ blez(opr1_reg, target); ++ else ++ __ blez(opr1_reg, int(0)); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match(If cmp (CmpUL src1 zero)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConUL_regL_immL_0_short" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ case 0x04: // greater_equal ++ case 0x06: // less_equal ++ if (&target) ++ __ beq(opr1_reg, R0, target); ++ else ++ __ beq(opr1_reg, R0, int(0)); ++ break; ++ ++ case 0x02: // not_equal ++ case 0x03: // greater ++ if(&target) ++ __ bne(opr1_reg, R0, target); ++ else ++ __ bne(opr1_reg, R0, (int)0); ++ break; ++ ++ case 0x05: // less ++ if(&target) ++ __ beq(R0, R0, target); ++ else ++ __ beq(R0, R0, (int)0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_short" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x02: //not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_short" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: // equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x02: // not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++//FIXME ++instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{ ++ match( If cmp (CmpF src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_short" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label& L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_s(reg_op1, reg_op2); ++ if (&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x02: // not_equal ++ __ c_eq_s(reg_op1, reg_op2); ++ if (&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x03: // greater ++ __ c_ule_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x05: // less ++ __ c_ult_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_fpu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{ ++ match( If cmp (CmpD src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_short" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label& L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_d(reg_op1, reg_op2); ++ if (&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x02: // not_equal ++ // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. ++ __ c_eq_d(reg_op1, reg_op2); ++ if (&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x03: // greater ++ __ c_ule_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x05: // less ++ __ c_ult_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_fpu_branch); ++ ins_short_branch(1); ++%} ++ ++// =================== End of branch instructions ========================== ++ ++// Call Runtime Instruction ++instruct CallRuntimeDirect(method meth) %{ ++ match(CallRuntime ); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL,runtime #@CallRuntimeDirect" %} ++ ins_encode( Java_To_Runtime( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_alignment(16); ++%} ++ ++ ++ ++//------------------------MemBar Instructions------------------------------- ++//Memory barrier flavors ++ ++instruct membar_acquire() %{ ++ match(MemBarAcquire); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-acquire @ membar_acquire" %} ++ ins_encode %{ ++ __ sync(); ++ %} ++ ins_pipe(empty); ++%} ++ ++instruct load_fence() %{ ++ match(LoadFence); ++ ins_cost(400); ++ ++ format %{ "MEMBAR @ load_fence" %} ++ ins_encode %{ ++ __ sync(); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_acquire_lock() ++%{ ++ match(MemBarAcquireLock); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %} ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct membar_release() %{ ++ match(MemBarRelease); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-release @ membar_release" %} ++ ++ ins_encode %{ ++ // Attention: DO NOT DELETE THIS GUY! ++ __ sync(); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct store_fence() %{ ++ match(StoreFence); ++ ins_cost(400); ++ ++ format %{ "MEMBAR @ store_fence" %} ++ ++ ins_encode %{ ++ __ sync(); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_release_lock() ++%{ ++ match(MemBarReleaseLock); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %} ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++ ++instruct membar_volatile() %{ ++ match(MemBarVolatile); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-volatile" %} ++ ins_encode %{ ++ if( !os::is_MP() ) return; // Not needed on single CPU ++ __ sync(); ++ ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct unnecessary_membar_volatile() %{ ++ match(MemBarVolatile); ++ predicate(Matcher::post_store_load_barrier(n)); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-volatile (unnecessary so empty encoding) @ unnecessary_membar_volatile" %} ++ ins_encode( ); ++ ins_pipe(empty); ++%} ++ ++instruct membar_storestore() %{ ++ match(MemBarStoreStore); ++ ++ ins_cost(400); ++ format %{ "MEMBAR-storestore @ membar_storestore" %} ++ ins_encode %{ ++ __ sync(); ++ %} ++ ins_pipe(empty); ++%} ++ ++//----------Move Instructions-------------------------------------------------- ++instruct castX2P(mRegP dst, mRegL src) %{ ++ match(Set dst (CastX2P src)); ++ format %{ "castX2P $dst, $src @ castX2P" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ if(src != dst) ++ __ move(dst, src); ++ %} ++ ins_cost(10); ++ ins_pipe( ialu_regI_mov ); ++%} ++ ++instruct castP2X(mRegL dst, mRegP src ) %{ ++ match(Set dst (CastP2X src)); ++ ++ format %{ "mov $dst, $src\t #@castP2X" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ if(src != dst) ++ __ move(dst, src); ++ %} ++ ins_pipe( ialu_regI_mov ); ++%} ++ ++instruct MoveF2I_reg_reg(mRegI dst, regF src) %{ ++ match(Set dst (MoveF2I src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveF2I $dst, $src @ MoveF2I_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ __ mfc1(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveI2F_reg_reg(regF dst, mRegI src) %{ ++ match(Set dst (MoveI2F src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveI2F $dst, $src @ MoveI2F_reg_reg" %} ++ ins_encode %{ ++ Register src = as_Register($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ mtc1(src, dst); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveD2L_reg_reg(mRegL dst, regD src) %{ ++ match(Set dst (MoveD2L src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveD2L $dst, $src @ MoveD2L_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ __ dmfc1(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveL2D_reg_reg(regD dst, mRegL src) %{ ++ match(Set dst (MoveL2D src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveL2D $dst, $src @ MoveL2D_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ dmtc1(src, dst); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++//----------Conditional Move--------------------------------------------------- ++// Conditional move ++instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpI_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpI_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovN_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovN_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovN_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovN_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpI_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpI_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovD_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovF_cmpI_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovF_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpI_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovD_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpP_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovD_cmpP_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++//FIXME ++instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovF_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovF_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// Manifest a CmpL result in an integer register. Very painful. ++// This is the test to avoid. ++instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (CmpL3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpL3 $dst, $src1, $src2 @ cmpL3_reg_reg" %} ++ ins_encode %{ ++ Register opr1 = as_Register($src1$$reg); ++ Register opr2 = as_Register($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ slt(AT, opr1, opr2); ++ __ slt(dst, opr2, opr1); ++ __ subu(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ++// less_rsult = -1 ++// greater_result = 1 ++// equal_result = 0 ++// nan_result = -1 ++// ++instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{ ++ match(Set dst (CmpF3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpF3 $dst, $src1, $src2 @ cmpF3_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ ori(dst, R0, 1); ++ __ ori(AT, R0, 1); ++ __ c_olt_s(src2, src1); ++ __ movf(dst, R0); ++ __ c_ult_s(src1, src2); ++ __ movf(AT, R0); ++ __ subu(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{ ++ match(Set dst (CmpD3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpD3 $dst, $src1, $src2 @ cmpD3_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ ori(dst, R0, 1); ++ __ ori(AT, R0, 1); ++ __ c_olt_d(src2, src1); ++ __ movf(dst, R0); ++ __ c_ult_d(src1, src2); ++ __ movf(AT, R0); ++ __ subu(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct clear_array(mRegL cnt, mRegP base, Universe dummy) %{ ++ match(Set dummy (ClearArray cnt base)); ++ format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %} ++ ins_encode %{ ++ //Assume cnt is the number of bytes in an array to be cleared, ++ //and base points to the starting address of the array. ++ Register base = $base$$Register; ++ Register num = $cnt$$Register; ++ Label Loop, done; ++ ++ __ beq(num, R0, done); ++ __ delayed()->daddu(AT, base, R0); ++ ++ __ move(T9, num); /* T9 = words */ ++ ++ __ bind(Loop); ++ __ sd(R0, AT, 0); ++ __ daddiu(T9, T9, -1); ++ __ bne(T9, R0, Loop); ++ __ delayed()->daddiu(AT, AT, wordSize); ++ ++ __ bind(done); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareL" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::LL); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare char[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareU" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::UU); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareLU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareLU" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::LU); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareUL" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::UL); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// intrinsic optimization ++instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, mA7RegI temp, no_Ax_mRegI result) %{ ++ match(Set result (StrEquals (Binary str1 str2) cnt)); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL temp); ++ ++ format %{ "String Equal $str1, $str2, len:$cnt tmp:$temp -> $result @ string_equals" %} ++ ins_encode %{ ++ __ arrays_equals($str1$$Register, $str2$$Register, ++ $cnt$$Register, $temp$$Register, $result$$Register, ++ false/* byte */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++//----------Arithmetic Instructions------------------------------------------- ++//----------Addition Instructions--------------------------------------------- ++instruct addI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ format %{ "add $dst, $src1, $src2 #@addI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ addu32(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addI_Reg_imm(mRegI dst, mRegI src1, immI src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ format %{ "add $dst, $src1, $src2 #@addI_Reg_imm" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ int imm = $src2$$constant; ++ ++ if(Assembler::is_simm16(imm)) { ++ __ addiu32(dst, src1, imm); ++ } else { ++ __ move(AT, imm); ++ __ addu32(dst, src1, AT); ++ } ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_reg(mRegP dst, mRegP src1, mRegL src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ++ format %{ "dadd $dst, $src1, $src2 #@addP_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ daddu(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_reg_convI2L(mRegP dst, mRegP src1, mRegI src2) %{ ++ match(Set dst (AddP src1 (ConvI2L src2))); ++ ++ format %{ "dadd $dst, $src1, $src2 #@addP_reg_reg_convI2L" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ daddu(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_imm(mRegP dst, mRegP src1, immL16 src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ++ format %{ "daddi $dst, $src1, $src2 #@addP_reg_imm" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ long src2 = $src2$$constant; ++ Register dst = $dst$$Register; ++ ++ __ daddiu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++// Add Long Register with Register ++instruct addL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (AddL src1 src2)); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_Reg\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_Reg_imm(mRegL dst, mRegL src1, immL16 src2) ++%{ ++ match(Set dst (AddL src1 src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_imm " %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ int src2_imm = $src2$$constant; ++ ++ __ daddiu(dst_reg, src1_reg, src2_imm); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_RegI2L_imm(mRegL dst, mRegI src1, immL16 src2) ++%{ ++ match(Set dst (AddL (ConvI2L src1) src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_imm " %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ int src2_imm = $src2$$constant; ++ ++ __ daddiu(dst_reg, src1_reg, src2_imm); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{ ++ match(Set dst (AddL (ConvI2L src1) src2)); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_Reg\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AddL (ConvI2L src1) (ConvI2L src2))); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_RegI2L\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (AddL src1 (ConvI2L src2))); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_RegI2L\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++//----------Abs Instructions------------------------------------------- ++ ++// Integer Absolute Instructions ++instruct absI_rReg(mRegI dst, mRegI src) ++%{ ++ match(Set dst (AbsI src)); ++ effect(TEMP dst); ++ format %{ "AbsI $dst, $src" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ sra(AT, src, 31); ++ __ xorr(dst, src, AT); ++ __ subu32(dst, dst, AT); ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Long Absolute Instructions ++instruct absL_rReg(mRegL dst, mRegL src) ++%{ ++ match(Set dst (AbsL src)); ++ effect(TEMP dst); ++ format %{ "AbsL $dst, $src" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ dsra32(AT, src, 31); ++ __ xorr(dst, src, AT); ++ __ subu(dst, dst, AT); ++ %} ++ ++ ins_pipe(ialu_regL_regL); ++%} ++ ++//----------Subtraction Instructions------------------------------------------- ++// Integer Subtraction Instructions ++instruct subI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ins_cost(100); ++ ++ format %{ "sub $dst, $src1, $src2 #@subI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ subu32(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct subI_Reg_immI_M32767_32768(mRegI dst, mRegI src1, immI_M32767_32768 src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ins_cost(80); ++ ++ format %{ "sub $dst, $src1, $src2 #@subI_Reg_immI_M32767_32768" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ __ addiu32(dst, src1, -1 * $src2$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct negI_Reg(mRegI dst, immI_0 zero, mRegI src) %{ ++ match(Set dst (SubI zero src)); ++ ins_cost(80); ++ ++ format %{ "neg $dst, $src #@negI_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ __ subu32(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct negL_Reg(mRegL dst, immL_0 zero, mRegL src) %{ ++ match(Set dst (SubL zero src)); ++ ins_cost(80); ++ ++ format %{ "neg $dst, $src #@negL_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ __ subu(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct subL_Reg_immL_M32767_32768(mRegL dst, mRegL src1, immL_M32767_32768 src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(80); ++ ++ format %{ "sub $dst, $src1, $src2 #@subL_Reg_immL_M32767_32768" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ __ daddiu(dst, src1, -1 * $src2$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Subtract Long Register with Register. ++instruct subL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(100); ++ format %{ "SubL $dst, $src1, $src2 @ subL_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct subL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (SubL src1 (ConvI2L src2))); ++ ins_cost(100); ++ format %{ "SubL $dst, $src1, $src2 @ subL_Reg_RegI2L" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct subL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{ ++ match(Set dst (SubL (ConvI2L src1) src2)); ++ ins_cost(200); ++ format %{ "SubL $dst, $src1, $src2 @ subL_RegI2L_Reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct subL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (SubL (ConvI2L src1) (ConvI2L src2))); ++ ins_cost(200); ++ format %{ "SubL $dst, $src1, $src2 @ subL_RegI2L_RegI2L" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Integer MOD with Register ++instruct modI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (ModI src1 src2)); ++ ins_cost(300); ++ format %{ "modi $dst, $src1, $src2 @ modI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ //if (UseLEXT1) { ++ if (0) { ++ // Experiments show that gsmod is slower that div+mfhi. ++ // So I just disable it here. ++ __ gsmod(dst, src1, src2); ++ } else { ++ __ div(src1, src2); ++ __ mfhi(dst); ++ } ++ %} ++ ++ //ins_pipe( ialu_mod ); ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct modL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (ModL src1 src2)); ++ format %{ "modL $dst, $src1, $src2 @modL_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsdmod(dst, op1, op2); ++ } else { ++ __ ddiv(op1, op2); ++ __ mfhi(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (MulI src1 src2)); ++ ++ ins_cost(300); ++ format %{ "mul $dst, $src1, $src2 @ mulI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ ++ __ mul(dst, src1, src2); ++ %} ++ ins_pipe( ialu_mult ); ++%} ++ ++instruct maddI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2, mRegI src3) %{ ++ match(Set dst (AddI (MulI src1 src2) src3)); ++ ++ ins_cost(999); ++ format %{ "madd $dst, $src1 * $src2 + $src3 #@maddI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register src3 = $src3$$Register; ++ Register dst = $dst$$Register; ++ ++ __ mtlo(src3); ++ __ madd(src1, src2); ++ __ mflo(dst); ++ %} ++ ins_pipe( ialu_mult ); ++%} ++ ++instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (DivI src1 src2)); ++ ++ ins_cost(300); ++ format %{ "div $dst, $src1, $src2 @ divI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ ++ // In MIPS, div does not cause exception. ++ // We must trap an exception manually. ++ __ teq(R0, src2, 0x7); ++ ++ if (UseLEXT1) { ++ __ gsdiv(dst, src1, src2); ++ } else { ++ __ div(src1, src2); ++ ++ __ nop(); ++ __ nop(); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( ialu_mod ); ++%} ++ ++instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (DivF src1 src2)); ++ ++ ins_cost(300); ++ format %{ "divF $dst, $src1, $src2 @ divF_Reg_Reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ /* Here do we need to trap an exception manually ? */ ++ __ div_s(dst, src1, src2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (DivD src1 src2)); ++ ++ ins_cost(300); ++ format %{ "divD $dst, $src1, $src2 @ divD_Reg_Reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ /* Here do we need to trap an exception manually ? */ ++ __ div_d(dst, src1, src2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (MulL src1 src2)); ++ format %{ "mulL $dst, $src1, $src2 @mulL_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsdmult(dst, op1, op2); ++ } else { ++ __ dmult(op1, op2); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulL_reg_regI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (MulL src1 (ConvI2L src2))); ++ format %{ "mulL $dst, $src1, $src2 @mulL_reg_regI2L" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsdmult(dst, op1, op2); ++ } else { ++ __ dmult(op1, op2); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (DivL src1 src2)); ++ format %{ "divL $dst, $src1, $src2 @divL_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsddiv(dst, op1, op2); ++ } else { ++ __ ddiv(op1, op2); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (AddF src1 src2)); ++ format %{ "AddF $dst, $src1, $src2 @addF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ add_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (SubF src1 src2)); ++ format %{ "SubF $dst, $src1, $src2 @subF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sub_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (AddD src1 src2)); ++ format %{ "AddD $dst, $src1, $src2 @addD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ add_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (SubD src1 src2)); ++ format %{ "SubD $dst, $src1, $src2 @subD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sub_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct negF_reg(regF dst, regF src) %{ ++ match(Set dst (NegF src)); ++ format %{ "negF $dst, $src @negF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ neg_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct negD_reg(regD dst, regD src) %{ ++ match(Set dst (NegD src)); ++ format %{ "negD $dst, $src @negD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ neg_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (MulF src1 src2)); ++ format %{ "MULF $dst, $src1, $src2 @mulF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mul_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// Mul two double precision floating piont number ++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (MulD src1 src2)); ++ format %{ "MULD $dst, $src1, $src2 @mulD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mul_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct absF_reg(regF dst, regF src) %{ ++ match(Set dst (AbsF src)); ++ ins_cost(100); ++ format %{ "absF $dst, $src @absF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ abs_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++// intrinsics for math_native. ++// AbsD SqrtD CosD SinD TanD LogD Log10D ++ ++instruct absD_reg(regD dst, regD src) %{ ++ match(Set dst (AbsD src)); ++ ins_cost(100); ++ format %{ "absD $dst, $src @absD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ abs_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct sqrtD_reg(regD dst, regD src) %{ ++ match(Set dst (SqrtD src)); ++ ins_cost(100); ++ format %{ "SqrtD $dst, $src @sqrtD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sqrt_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct sqrtF_reg(regF dst, regF src) %{ ++ match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); ++ ins_cost(100); ++ format %{ "SqrtF $dst, $src @sqrtF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sqrt_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// src1 * src2 + src3 ++instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary src1 src2))); ++ ++ format %{ "madd_s $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ madd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 + src3 ++instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary src1 src2))); ++ ++ format %{ "madd_d $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ madd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 - src3 ++instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary src1 src2))); ++ ++ format %{ "msub_s $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ msub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 - src3 ++instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary src1 src2))); ++ ++ format %{ "msub_d $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ msub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 - src3 ++instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2))); ++ match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2)))); ++ ++ format %{ "nmadds $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ nmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 - src3 ++instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2))); ++ match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2)))); ++ ++ format %{ "nmaddd $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ nmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 + src3 ++instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary (NegF src1) src2))); ++ match(Set dst (FmaF src3 (Binary src1 (NegF src2)))); ++ ++ format %{ "nmsubs $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ nmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 + src3 ++instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary (NegD src1) src2))); ++ match(Set dst (FmaD src3 (Binary src1 (NegD src2)))); ++ ++ format %{ "nmsubd $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ nmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++//----------------------------------Logical Instructions---------------------- ++//__________________________________Integer Logical Instructions------------- ++ ++//And Instuctions ++// And Register with Immediate ++instruct andI_Reg_immI(mRegI dst, mRegI src1, immI src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_immI" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ move(AT, val); ++ __ andr(dst, src, AT); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andI_Reg_imm_0_65535(mRegI dst, mRegI src1, immI_0_65535 src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1, immI_nonneg_mask mask) %{ ++ match(Set dst (AndI src1 mask)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int size = Assembler::is_int_mask($mask$$constant); ++ ++ __ ext(dst, src, 0, size); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1, immL_nonneg_mask mask) %{ ++ match(Set dst (AndL src1 mask)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int size = Assembler::is_jlong_mask($mask$$constant); ++ ++ __ dext(dst, src, 0, size); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorI_Reg_imm_0_65535(mRegI dst, mRegI src1, immI_0_65535 src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ins_cost(60); ++ ++ format %{ "xori $dst, $src1, $src2 #@xorI_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ xori(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorI_Reg_immI_M1(mRegI dst, mRegI src1, immI_M1 M1) %{ ++ match(Set dst (XorI src1 M1)); ++ predicate(UseLEXT3); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorI_Reg_immI_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ gsorn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorL2I_Reg_immI_M1(mRegI dst, mRegL src1, immI_M1 M1) %{ ++ match(Set dst (XorI (ConvL2I src1) M1)); ++ predicate(UseLEXT3); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorL2I_Reg_immI_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ gsorn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorL_Reg_imm_0_65535(mRegL dst, mRegL src1, immL_0_65535 src2) %{ ++ match(Set dst (XorL src1 src2)); ++ ins_cost(60); ++ ++ format %{ "xori $dst, $src1, $src2 #@xorL_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ xori(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++/* ++instruct xorL_Reg_immL_M1(mRegL dst, mRegL src1, immL_M1 M1) %{ ++ match(Set dst (XorL src1 M1)); ++ predicate(UseLEXT3); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorL_Reg_immL_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ gsorn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++instruct lbu_and_lmask(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI mask (LoadB mem))); ++ ins_cost(60); ++ ++ format %{ "lhu $dst, $mem #@lbu_and_lmask" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct lbu_and_rmask(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI (LoadB mem) mask)); ++ ins_cost(60); ++ ++ format %{ "lhu $dst, $mem #@lbu_and_rmask" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct andI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ andr(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andnI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (AndI src1 (XorI src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src1, $src2 #@andnI_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct ornI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (OrI src1 (XorI src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src1, $src2 #@ornI_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andnI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (AndI (XorI src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src2, $src1 #@andnI_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct ornI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (OrI (XorI src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src2, $src1 #@ornI_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// And Long Register with Register ++instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (AndL src1 src2)); ++ format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ andr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct andL_Reg_Reg_convI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (AndL src1 (ConvI2L src2))); ++ format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg_convI2L\n\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ andr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct andL_Reg_imm_0_65535(mRegL dst, mRegL src1, immL_0_65535 src2) %{ ++ match(Set dst (AndL src1 src2)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andL_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ long val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL2I_Reg_imm_0_65535(mRegI dst, mRegL src1, immL_0_65535 src2) %{ ++ match(Set dst (ConvL2I (AndL src1 src2))); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andL2I_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ long val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++/* ++instruct andnL_Reg_nReg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (AndL src1 (XorL src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src1, $src2 #@andnL_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++/* ++instruct ornL_Reg_nReg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (OrL src1 (XorL src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src1, $src2 #@ornL_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++/* ++instruct andnL_nReg_Reg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (AndL (XorL src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src2, $src1 #@andnL_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++/* ++instruct ornL_nReg_Reg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (OrL (XorL src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src2, $src1 #@ornL_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++instruct andL_Reg_immL_M8(mRegL dst, immL_M8 M8) %{ ++ match(Set dst (AndL dst M8)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M8 #@andL_Reg_immL_M8" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 0, 3); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M5(mRegL dst, immL_M5 M5) %{ ++ match(Set dst (AndL dst M5)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M5 #@andL_Reg_immL_M5" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 2, 1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M7(mRegL dst, immL_M7 M7) %{ ++ match(Set dst (AndL dst M7)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M7 #@andL_Reg_immL_M7" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 1, 2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M4(mRegL dst, immL_M4 M4) %{ ++ match(Set dst (AndL dst M4)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M4 #@andL_Reg_immL_M4" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 0, 2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M121(mRegL dst, immL_M121 M121) %{ ++ match(Set dst (AndL dst M121)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M121 #@andL_Reg_immL_M121" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 3, 4); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Long Register with Register ++instruct orL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (OrL src1 src2)); ++ format %{ "OR $dst, $src1, $src2 @ orL_Reg_Reg\t" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ Register src1_reg = $src1$$Register; ++ Register src2_reg = $src2$$Register; ++ ++ __ orr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegL src2) %{ ++ match(Set dst (OrL (CastP2X src1) src2)); ++ format %{ "OR $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ Register src1_reg = $src1$$Register; ++ Register src2_reg = $src2$$Register; ++ ++ __ orr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Xor Long Register with Register ++instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (XorL src1 src2)); ++ format %{ "XOR $dst, $src1, $src2 @ xorL_Reg_Reg\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ xorr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Left by 8-bit immediate ++instruct salI_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ ++ match(Set dst (LShiftI src shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ sll(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct salL2I_Reg_imm(mRegI dst, mRegL src, immI8 shift) %{ ++ match(Set dst (LShiftI (ConvL2I src) shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salL2I_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ sll(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{ ++ match(Set dst (AndI (LShiftI src shift) mask)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_imm_and_M65536" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ sll(dst, src, 16); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen) ++%{ ++ match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen)); ++ ++ format %{ "andi $dst, $src, 7\t# @land7_2_s" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ andi(dst, src, 7); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. ++// This idiom is used by the compiler the i2s bytecode. ++instruct i2s(mRegI dst, mRegI src, immI_16 sixteen) ++%{ ++ match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); ++ ++ format %{ "i2s $dst, $src\t# @i2s" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ seh(dst, src); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. ++// This idiom is used by the compiler for the i2b bytecode. ++instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour) ++%{ ++ match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); ++ ++ format %{ "i2b $dst, $src\t# @i2b" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ seb(dst, src); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++ ++instruct salI_RegL2I_imm(mRegI dst, mRegL src, immI8 shift) %{ ++ match(Set dst (LShiftI (ConvL2I src) shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_RegL2I_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ sll(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Shift Left by 8-bit immediate ++instruct salI_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (LShiftI src shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shamt = $shift$$Register; ++ __ sllv(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++// Shift Left Long ++instruct salL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{ ++ match(Set dst (LShiftL src shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_Reg_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ if (__ is_simm(shamt, 5)) ++ __ dsll(dst_reg, src_reg, shamt); ++ else { ++ int sa = Assembler::low(shamt, 6); ++ if (sa < 32) { ++ __ dsll(dst_reg, src_reg, sa); ++ } else { ++ __ dsll32(dst_reg, src_reg, sa - 32); ++ } ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct salL_RegI2L_imm(mRegL dst, mRegI src, immI8 shift) %{ ++ match(Set dst (LShiftL (ConvI2L src) shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_RegI2L_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ if (__ is_simm(shamt, 5)) ++ __ dsll(dst_reg, src_reg, shamt); ++ else { ++ int sa = Assembler::low(shamt, 6); ++ if (sa < 32) { ++ __ dsll(dst_reg, src_reg, sa); ++ } else { ++ __ dsll32(dst_reg, src_reg, sa - 32); ++ } ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Left Long ++instruct salL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ ++ match(Set dst (LShiftL src shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ dsllv(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long ++instruct sarL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{ ++ match(Set dst (RShiftL src shift)); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL_Reg_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = ($shift$$constant & 0x3f); ++ if (__ is_simm(shamt, 5)) ++ __ dsra(dst_reg, src_reg, shamt); ++ else { ++ int sa = Assembler::low(shamt, 6); ++ if (sa < 32) { ++ __ dsra(dst_reg, src_reg, sa); ++ } else { ++ __ dsra32(dst_reg, src_reg, sa - 32); ++ } ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegL src, immI_32_63 shift) %{ ++ match(Set dst (ConvL2I (RShiftL src shift))); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsra32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long arithmetically ++instruct sarL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ ++ match(Set dst (RShiftL src shift)); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ dsrav(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long logically ++instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(100); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ dsrlv(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegL src, immI_0_31 shift, immI_MaxI max_int) %{ ++ match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int)); ++ ins_cost(80); ++ format %{ "dext $dst, $src, $shift, 31 @ slrL_Reg_immI_0_31_and_max_int" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dext(dst_reg, src_reg, shamt, 31); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{ ++ match(Set dst (URShiftL (CastP2X src) shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_convL2I(mRegI dst, mRegL src, immI_32_63 shift) %{ ++ match(Set dst (ConvL2I (URShiftL src shift))); ++ predicate(n->in(1)->in(2)->get_int() > 32); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_convL2I" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{ ++ match(Set dst (URShiftL (CastP2X src) shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Xor Instructions ++// Xor Register with Register ++instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ++ format %{ "XOR $dst, $src1, $src2 #@xorI_Reg_Reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ xorr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Instructions ++instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_32767 src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_imm" %} ++ ins_encode %{ ++ __ ori($dst$$Register, $src1$$Register, $src2$$constant); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++// Or Register with Register ++instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ orr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{ ++ match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift))); ++ predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()))); ++ ++ format %{ "rotr $dst, $src, 1 ...\n\t" ++ "srl $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int rshift = $rshift$$constant; ++ ++ __ rotr(dst, src, 1); ++ if (rshift - 1) { ++ __ srl(dst, dst, rshift - 1); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{ ++ match(Set dst (OrI src1 (CastP2X src2))); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_castP2X" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ orr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Logical Shift Right by 8-bit immediate ++instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ ++ match(Set dst (URShiftI src shift)); ++ //effect(KILL cr); ++ ++ format %{ "SRL $dst, $src, $shift #@shr_logical_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shift = $shift$$constant; ++ ++ __ srl(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{ ++ match(Set dst (AndI (URShiftI src shift) mask)); ++ ++ format %{ "ext $dst, $src, $shift, one-bits($mask) #@shr_logical_Reg_imm_nonneg_mask" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int pos = $shift$$constant; ++ int size = Assembler::is_int_mask($mask$$constant); ++ ++ __ ext(dst, src, pos, size); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolI_Reg_immI_0_31(mRegI dst, immI_0_31 lshift, immI_0_31 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); ++ match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $dst, $rshift #@rolI_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotr(dst, dst, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolL_Reg_immI_0_31(mRegL dst, mRegL src, immI_32_63 lshift, immI_0_31 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rolL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolL_Reg_immI_32_63(mRegL dst, mRegL src, immI_0_31 lshift, immI_32_63 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rolL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr32(dst, src, sa - 32); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); ++ match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rorI_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotr(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 rshift, immI_32_63 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rorL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 rshift, immI_0_31 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rorL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr32(dst, src, sa - 32); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Logical Shift Right ++instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (URShiftI src shift)); ++ ++ format %{ "SRL $dst, $src, $shift #@shr_logical_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shift = $shift$$Register; ++ __ srlv(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ ++ match(Set dst (RShiftI src shift)); ++ // effect(KILL cr); ++ ++ format %{ "SRA $dst, $src, $shift #@shr_arith_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shift = $shift$$constant; ++ __ sra(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (RShiftI src shift)); ++ // effect(KILL cr); ++ ++ format %{ "SRA $dst, $src, $shift #@shr_arith_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shift = $shift$$Register; ++ __ srav(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++//----------Convert Int to Boolean--------------------------------------------- ++ ++instruct convI2B(mRegI dst, mRegI src) %{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(100); ++ format %{ "convI2B $dst, $src @ convI2B" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if (dst != src) { ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, src); ++ } else { ++ __ move(AT, src); ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct convI2L_reg( mRegL dst, mRegI src) %{ ++ match(Set dst (ConvI2L src)); ++ ++ ins_cost(100); ++ format %{ "SLL $dst, $src @ convI2L_reg\t" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if(dst != src) __ sll(dst, src, 0); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++ ++instruct convL2I_reg( mRegI dst, mRegL src ) %{ ++ match(Set dst (ConvL2I src)); ++ ++ format %{ "MOV $dst, $src @ convL2I_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ sll(dst, src, 0); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct convL2I2L_reg( mRegL dst, mRegL src ) %{ ++ match(Set dst (ConvI2L (ConvL2I src))); ++ ++ format %{ "sll $dst, $src, 0 @ convL2I2L_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ sll(dst, src, 0); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct convL2D_reg( regD dst, mRegL src ) %{ ++ match(Set dst (ConvL2D src)); ++ format %{ "convL2D $dst, $src @ convL2D_reg" %} ++ ins_encode %{ ++ Register src = as_Register($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ dmtc1(src, dst); ++ __ cvt_d_l(dst, dst); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convD2L_reg_fast( mRegL dst, regD src ) %{ ++ match(Set dst (ConvD2L src)); ++ ins_cost(150); ++ format %{ "convD2L $dst, $src @ convD2L_reg_fast" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ Label Done; ++ ++ __ trunc_l_d(F30, src); ++ // max_long: 0x7fffffffffffffff ++ // __ set64(AT, 0x7fffffffffffffff); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(dst, F30); ++ ++ __ bne(dst, AT, Done); ++ __ delayed()->mtc1(R0, F30); ++ ++ __ cvt_d_w(F30, F30); ++ __ c_ult_d(src, F30); ++ __ bc1f(Done); ++ __ delayed()->daddiu(T9, R0, -1); ++ ++ __ c_un_d(src, src); //NaN? ++ __ subu(dst, T9, AT); ++ __ movt(dst, R0); ++ ++ __ bind(Done); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convD2L_reg_slow( mRegL dst, regD src ) %{ ++ match(Set dst (ConvD2L src)); ++ ins_cost(250); ++ format %{ "convD2L $dst, $src @ convD2L_reg_slow" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ Label L; ++ ++ __ c_un_d(src, src); //NaN? ++ __ bc1t(L); ++ __ delayed(); ++ __ move(dst, R0); ++ ++ __ trunc_l_d(F30, src); ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->dmfc1(dst, F30); ++ ++ __ mov_d(F12, src); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1); ++ __ move(dst, V0); ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convF2I_reg_fast( mRegI dst, regF src ) %{ ++ match(Set dst (ConvF2I src)); ++ ins_cost(150); ++ format %{ "convf2i $dst, $src @ convF2I_reg_fast" %} ++ ins_encode %{ ++ Register dreg = $dst$$Register; ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ trunc_w_s(F30, fval); ++ __ move(AT, 0x7fffffff); ++ __ mfc1(dreg, F30); ++ __ c_un_s(fval, fval); //NaN? ++ __ movt(dreg, R0); ++ ++ __ bne(AT, dreg, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, fval); ++ __ andr(AT, AT, T9); ++ ++ __ movn(dreg, T9, AT); ++ ++ __ bind(L); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++ ++instruct convF2I_reg_slow( mRegI dst, regF src ) %{ ++ match(Set dst (ConvF2I src)); ++ ins_cost(250); ++ format %{ "convf2i $dst, $src @ convF2I_reg_slow" %} ++ ins_encode %{ ++ Register dreg = $dst$$Register; ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ c_un_s(fval, fval); //NaN? ++ __ bc1t(L); ++ __ delayed(); ++ __ move(dreg, R0); ++ ++ __ trunc_w_s(F30, fval); ++ ++ /* Call SharedRuntime:f2i() to do valid convention */ ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->mfc1(dreg, F30); ++ ++ __ mov_s(F12, fval); ++ ++ //This bug was found when running ezDS's control-panel. ++ // J 982 C2 javax.swing.text.BoxView.layoutMajorAxis(II[I[I)V (283 bytes) @ 0x000000555c46aa74 ++ // ++ // An interger array index has been assigned to V0, and then changed from 1 to Integer.MAX_VALUE. ++ // V0 is corrupted during call_VM_leaf(), and should be preserved. ++ // ++ __ push(fval); ++ if(dreg != V0) { ++ __ push(V0); ++ } ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1); ++ if(dreg != V0) { ++ __ move(dreg, V0); ++ __ pop(V0); ++ } ++ __ pop(fval); ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convF2L_reg_fast( mRegL dst, regF src ) %{ ++ match(Set dst (ConvF2L src)); ++ ins_cost(150); ++ format %{ "convf2l $dst, $src @ convF2L_reg_fast" %} ++ ins_encode %{ ++ Register dreg = $dst$$Register; ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ trunc_l_s(F30, fval); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(dreg, F30); ++ __ c_un_s(fval, fval); //NaN? ++ __ movt(dreg, R0); ++ ++ __ bne(AT, dreg, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, fval); ++ __ andr(AT, AT, T9); ++ ++ __ dsll32(T9, T9, 0); ++ __ movn(dreg, T9, AT); ++ ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convF2L_reg_slow( mRegL dst, regF src ) %{ ++ match(Set dst (ConvF2L src)); ++ ins_cost(250); ++ format %{ "convf2l $dst, $src @ convF2L_reg_slow" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ c_un_s(fval, fval); //NaN? ++ __ bc1t(L); ++ __ delayed(); ++ __ move(dst, R0); ++ ++ __ trunc_l_s(F30, fval); ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->dmfc1(dst, F30); ++ ++ __ mov_s(F12, fval); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1); ++ __ move(dst, V0); ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convL2F_reg( regF dst, mRegL src ) %{ ++ match(Set dst (ConvL2F src)); ++ format %{ "convl2f $dst, $src @ convL2F_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ Register src = as_Register($src$$reg); ++ Label L; ++ ++ __ dmtc1(src, dst); ++ __ cvt_s_l(dst, dst); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convI2F_reg( regF dst, mRegI src ) %{ ++ match(Set dst (ConvI2F src)); ++ format %{ "convi2f $dst, $src @ convI2F_reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mtc1(src, dst); ++ __ cvt_s_w(dst, dst); ++ %} ++ ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{ ++ match(Set dst (CmpLTMask p zero)); ++ ins_cost(100); ++ ++ format %{ "sra $dst, $p, 31 @ cmpLTMask_immI_0" %} ++ ins_encode %{ ++ Register src = $p$$Register; ++ Register dst = $dst$$Register; ++ ++ __ sra(dst, src, 31); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{ ++ match(Set dst (CmpLTMask p q)); ++ ins_cost(400); ++ ++ format %{ "cmpLTMask $dst, $p, $q @ cmpLTMask" %} ++ ins_encode %{ ++ Register p = $p$$Register; ++ Register q = $q$$Register; ++ Register dst = $dst$$Register; ++ ++ __ slt(dst, p, q); ++ __ subu(dst, R0, dst); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convP2B(mRegI dst, mRegP src) %{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(100); ++ format %{ "convP2B $dst, $src @ convP2B" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if (dst != src) { ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, src); ++ } else { ++ __ move(AT, src); ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++ ++instruct convI2D_reg_reg(regD dst, mRegI src) %{ ++ match(Set dst (ConvI2D src)); ++ format %{ "conI2D $dst, $src @convI2D_reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ FloatRegister dst = $dst$$FloatRegister; ++ __ mtc1(src, dst); ++ __ cvt_d_w(dst, dst); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct convF2D_reg_reg(regD dst, regF src) %{ ++ match(Set dst (ConvF2D src)); ++ format %{ "convF2D $dst, $src\t# @convF2D_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ ++ __ cvt_d_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct convD2F_reg_reg(regF dst, regD src) %{ ++ match(Set dst (ConvD2F src)); ++ format %{ "convD2F $dst, $src\t# @convD2F_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ ++ __ cvt_s_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++// Convert a double to an int. If the double is a NAN, stuff a zero in instead. ++instruct convD2I_reg_reg_fast( mRegI dst, regD src ) %{ ++ match(Set dst (ConvD2I src)); ++ ++ ins_cost(150); ++ format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_fast" %} ++ ++ ins_encode %{ ++ FloatRegister src = $src$$FloatRegister; ++ Register dst = $dst$$Register; ++ ++ Label Done; ++ ++ __ trunc_w_d(F30, src); ++ // max_int: 2147483647 ++ __ move(AT, 0x7fffffff); ++ __ mfc1(dst, F30); ++ ++ __ bne(dst, AT, Done); ++ __ delayed()->mtc1(R0, F30); ++ ++ __ cvt_d_w(F30, F30); ++ __ c_ult_d(src, F30); ++ __ bc1f(Done); ++ __ delayed()->addiu(T9, R0, -1); ++ ++ __ c_un_d(src, src); //NaN? ++ __ subu32(dst, T9, AT); ++ __ movt(dst, R0); ++ ++ __ bind(Done); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convD2I_reg_reg_slow( mRegI dst, regD src ) %{ ++ match(Set dst (ConvD2I src)); ++ ++ ins_cost(250); ++ format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_slow" %} ++ ++ ins_encode %{ ++ FloatRegister src = $src$$FloatRegister; ++ Register dst = $dst$$Register; ++ Label L; ++ ++ __ trunc_w_d(F30, src); ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->mfc1(dst, F30); ++ ++ __ mov_d(F12, src); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1); ++ __ move(dst, V0); ++ __ bind(L); ++ ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// Convert oop pointer into compressed form ++instruct encodeHeapOop(mRegN dst, mRegP src) %{ ++ predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); ++ match(Set dst (EncodeP src)); ++ format %{ "encode_heap_oop $dst,$src" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ encode_heap_oop(dst, src); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{ ++ predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull); ++ match(Set dst (EncodeP src)); ++ format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %} ++ ins_encode %{ ++ __ encode_heap_oop_not_null($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeHeapOop(mRegP dst, mRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && ++ n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ ++ __ decode_heap_oop(d, s); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || ++ n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ if (s != d) { ++ __ decode_heap_oop_not_null(d, s); ++ } else { ++ __ decode_heap_oop_not_null(d); ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct encodeKlass_not_null(mRegN dst, mRegP src) %{ ++ match(Set dst (EncodePKlass src)); ++ format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %} ++ ins_encode %{ ++ __ encode_klass_not_null($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeKlass_not_null(mRegP dst, mRegN src) %{ ++ match(Set dst (DecodeNKlass src)); ++ format %{ "decode_heap_klass_not_null $dst,$src" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ if (s != d) { ++ __ decode_klass_not_null(d, s); ++ } else { ++ __ decode_klass_not_null(d); ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++//FIXME ++instruct tlsLoadP(mRegP dst) %{ ++ match(Set dst (ThreadLocal)); ++ ++ ins_cost(0); ++ format %{ " get_thread in $dst #@tlsLoadP" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++#ifdef OPT_THREAD ++ __ move(dst, TREG); ++#else ++ __ get_thread(dst); ++#endif ++ %} ++ ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct checkCastPP( mRegP dst ) %{ ++ match(Set dst (CheckCastPP dst)); ++ ++ format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %} ++ ins_encode( /*empty encoding*/ ); ++ ins_pipe( empty ); ++%} ++ ++instruct castPP(mRegP dst) ++%{ ++ match(Set dst (CastPP dst)); ++ ++ size(0); ++ format %{ "# castPP of $dst" %} ++ ins_encode(/* empty encoding */); ++ ins_pipe(empty); ++%} ++ ++instruct castII( mRegI dst ) %{ ++ match(Set dst (CastII dst)); ++ format %{ "#castII of $dst empty encoding" %} ++ ins_encode( /*empty encoding*/ ); ++ ins_cost(0); ++ ins_pipe( empty ); ++%} ++ ++// Return Instruction ++// Remove the return address & jump to it. ++instruct Ret() %{ ++ match(Return); ++ format %{ "RET #@Ret" %} ++ ++ ins_encode %{ ++ __ jr(RA); ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++%} ++ ++/* ++// For Loongson CPUs, jr seems too slow, so this rule shouldn't be imported. ++instruct jumpXtnd(mRegL switch_val) %{ ++ match(Jump switch_val); ++ ++ ins_cost(350); ++ ++ format %{ "load T9 <-- [$constanttablebase, $switch_val, $constantoffset] @ jumpXtnd\n\t" ++ "jr T9\n\t" ++ "nop" %} ++ ins_encode %{ ++ Register table_base = $constanttablebase; ++ int con_offset = $constantoffset; ++ Register switch_reg = $switch_val$$Register; ++ ++ if (UseLEXT1) { ++ if (Assembler::is_simm(con_offset, 8)) { ++ __ gsldx(T9, table_base, switch_reg, con_offset); ++ } else if (Assembler::is_simm16(con_offset)) { ++ __ daddu(T9, table_base, switch_reg); ++ __ ld(T9, T9, con_offset); ++ } else { ++ __ move(T9, con_offset); ++ __ daddu(AT, table_base, switch_reg); ++ __ gsldx(T9, AT, T9, 0); ++ } ++ } else { ++ if (Assembler::is_simm16(con_offset)) { ++ __ daddu(T9, table_base, switch_reg); ++ __ ld(T9, T9, con_offset); ++ } else { ++ __ move(T9, con_offset); ++ __ daddu(AT, table_base, switch_reg); ++ __ daddu(AT, T9, AT); ++ __ ld(T9, AT, 0); ++ } ++ } ++ ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ %} ++ ins_pipe(pipe_jump); ++%} ++*/ ++ ++ ++// Tail Jump; remove the return address; jump to target. ++// TailCall above leaves the return address around. ++// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2). ++// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a ++// "restore" before this instruction (in Epilogue), we need to materialize it ++// in %i0. ++//FIXME ++instruct tailjmpInd(mRegP jump_target,mRegP ex_oop) %{ ++ match( TailJump jump_target ex_oop ); ++ ins_cost(200); ++ format %{ "Jmp $jump_target ; ex_oop = $ex_oop #@tailjmpInd" %} ++ ins_encode %{ ++ Register target = $jump_target$$Register; ++ ++ // V0, V1 are indicated in: ++ // [stubGenerator_mips.cpp] generate_forward_exception() ++ // [runtime_mips.cpp] OptoRuntime::generate_exception_blob() ++ // ++ Register oop = $ex_oop$$Register; ++ Register exception_oop = V0; ++ Register exception_pc = V1; ++ ++ __ move(exception_pc, RA); ++ __ move(exception_oop, oop); ++ ++ __ jr(target); ++ __ delayed()->nop(); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++// ============================================================================ ++// Procedure Call/Return Instructions ++// Call Java Static Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallStaticJavaDirect(method meth) %{ ++ match(CallStaticJava); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL,static #@CallStaticJavaDirect " %} ++ ins_encode( Java_Static_Call( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++// Call Java Dynamic Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallDynamicJavaDirect(method meth) %{ ++ match(CallDynamicJava); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t" ++ "CallDynamic @ CallDynamicJavaDirect" %} ++ ins_encode( Java_Dynamic_Call( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++instruct CallLeafNoFPDirect(method meth) %{ ++ match(CallLeafNoFP); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL_LEAF_NOFP,runtime " %} ++ ins_encode(Java_To_Runtime(meth)); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++// Prefetch instructions for allocation. ++ ++instruct prefetchAllocNTA( memory mem ) %{ ++ match(PrefetchAllocation mem); ++ ins_cost(125); ++ format %{ "pref $mem\t# Prefetch allocation @ prefetchAllocNTA" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++ ++// Call runtime without safepoint ++instruct CallLeafDirect(method meth) %{ ++ match(CallLeaf); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL_LEAF,runtime #@CallLeafDirect " %} ++ ins_encode(Java_To_Runtime(meth)); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++// Load Char (16bit unsigned) ++instruct loadUS(mRegI dst, memory mem) %{ ++ match(Set dst (LoadUS mem)); ++ ++ ins_cost(125); ++ format %{ "loadUS $dst,$mem @ loadC" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadUS_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadUS mem))); ++ ++ ins_cost(125); ++ format %{ "loadUS $dst,$mem @ loadUS_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Store Char (16bit unsigned) ++instruct storeC(memory mem, mRegI src) %{ ++ match(Set mem (StoreC mem src)); ++ ++ ins_cost(125); ++ format %{ "storeC $src, $mem @ storeC" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_CHAR); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct storeC_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreC mem zero)); ++ ++ ins_cost(125); ++ format %{ "storeC $zero, $mem @ storeC_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct loadConF_immF_0(regF dst, immF_0 zero) %{ ++ match(Set dst zero); ++ ins_cost(100); ++ ++ format %{ "mov $dst, zero @ loadConF_immF_0\n"%} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mtc1(R0, dst); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++ ++instruct loadConF(regF dst, immF src) %{ ++ match(Set dst src); ++ ins_cost(125); ++ ++ format %{ "lwc1 $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %} ++ ins_encode %{ ++ int con_offset = $constantoffset($src); ++ ++ if (Assembler::is_simm16(con_offset)) { ++ __ lwc1($dst$$FloatRegister, $constanttablebase, con_offset); ++ } else { ++ __ set64(AT, con_offset); ++ if (UseLEXT1) { ++ __ gslwxc1($dst$$FloatRegister, $constanttablebase, AT, 0); ++ } else { ++ __ daddu(AT, $constanttablebase, AT); ++ __ lwc1($dst$$FloatRegister, AT, 0); ++ } ++ } ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++ ++instruct loadConD_immD_0(regD dst, immD_0 zero) %{ ++ match(Set dst zero); ++ ins_cost(100); ++ ++ format %{ "mov $dst, zero @ loadConD_immD_0"%} ++ ins_encode %{ ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ dmtc1(R0, dst); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++instruct loadConD(regD dst, immD src) %{ ++ match(Set dst src); ++ ins_cost(125); ++ ++ format %{ "ldc1 $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %} ++ ins_encode %{ ++ int con_offset = $constantoffset($src); ++ ++ if (Assembler::is_simm16(con_offset)) { ++ __ ldc1($dst$$FloatRegister, $constanttablebase, con_offset); ++ } else { ++ __ set64(AT, con_offset); ++ if (UseLEXT1) { ++ __ gsldxc1($dst$$FloatRegister, $constanttablebase, AT, 0); ++ } else { ++ __ daddu(AT, $constanttablebase, AT); ++ __ ldc1($dst$$FloatRegister, AT, 0); ++ } ++ } ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++// Store register Float value (it is faster than store from FPU register) ++instruct storeF_reg( memory mem, regF src) %{ ++ match(Set mem (StoreF mem src)); ++ ++ ins_cost(50); ++ format %{ "store $mem, $src\t# store float @ storeF_reg" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_FLOAT); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct storeF_immF_0( memory mem, immF_0 zero) %{ ++ match(Set mem (StoreF mem zero)); ++ ++ ins_cost(40); ++ format %{ "store $mem, zero\t# store float @ storeF_immF_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Double ++instruct loadD(regD dst, memory mem) %{ ++ match(Set dst (LoadD mem)); ++ ++ ins_cost(150); ++ format %{ "loadD $dst, $mem #@loadD" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Double - UNaligned ++instruct loadD_unaligned(regD dst, memory mem ) %{ ++ match(Set dst (LoadD_unaligned mem)); ++ ins_cost(250); ++ // FIXME: Need more effective ldl/ldr ++ format %{ "loadD_unaligned $dst, $mem #@loadD_unaligned" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct storeD_reg( memory mem, regD src) %{ ++ match(Set mem (StoreD mem src)); ++ ++ ins_cost(50); ++ format %{ "store $mem, $src\t# store float @ storeD_reg" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct storeD_immD_0( memory mem, immD_0 zero) %{ ++ match(Set mem (StoreD mem zero)); ++ ++ ins_cost(40); ++ format %{ "store $mem, zero\t# store float @ storeD_immD_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct loadSSI(mRegI dst, stackSlotI src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "lw $dst, $src\t# int stk @ loadSSI" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSI) !"); ++ __ lw($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSI(stackSlotI dst, mRegI src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sw $dst, $src\t# int stk @ storeSSI" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSI) !"); ++ __ sw($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSL(mRegL dst, stackSlotL src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld $dst, $src\t# long stk @ loadSSL" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSL) !"); ++ __ ld($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSL(stackSlotL dst, mRegL src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sd $dst, $src\t# long stk @ storeSSL" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSL) !"); ++ __ sd($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSP(mRegP dst, stackSlotP src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld $dst, $src\t# ptr stk @ loadSSP" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSP) !"); ++ __ ld($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSP(stackSlotP dst, mRegP src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sd $dst, $src\t# ptr stk @ storeSSP" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSP) !"); ++ __ sd($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSF(regF dst, stackSlotF src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "lwc1 $dst, $src\t# float stk @ loadSSF" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSF) !"); ++ __ lwc1($dst$$FloatRegister, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSF(stackSlotF dst, regF src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "swc1 $dst, $src\t# float stk @ storeSSF" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSF) !"); ++ __ swc1($src$$FloatRegister, SP, $dst$$disp); ++ %} ++ ins_pipe(fpu_storeF); ++%} ++ ++// Use the same format since predicate() can not be used here. ++instruct loadSSD(regD dst, stackSlotD src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ldc1 $dst, $src\t# double stk @ loadSSD" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSD) !"); ++ __ ldc1($dst$$FloatRegister, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSD(stackSlotD dst, regD src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sdc1 $dst, $src\t# double stk @ storeSSD" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSD) !"); ++ __ sdc1($src$$FloatRegister, SP, $dst$$disp); ++ %} ++ ins_pipe(fpu_storeF); ++%} ++ ++instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ ++ match(Set cr (FastLock object box)); ++ effect(TEMP tmp, TEMP scr); ++ ins_cost(300); ++ format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %} ++ ins_encode %{ ++ __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++%} ++ ++instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ ++ match(Set cr (FastUnlock object box)); ++ effect(TEMP tmp, TEMP scr); ++ ins_cost(300); ++ format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %} ++ ins_encode %{ ++ __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++%} ++ ++// Store CMS card-mark Immediate 0 ++instruct storeImmCM(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreCM mem zero)); ++ ++ ins_cost(150); ++ format %{ "MEMBAR\n\t" ++ "sb $mem, zero\t! CMS card-mark imm0" %} ++ ins_encode %{ ++ __ sync(); ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Die now ++instruct ShouldNotReachHere( ) ++%{ ++ match(Halt); ++ ins_cost(300); ++ ++ // Use the following format syntax ++ format %{ "ILLTRAP ;#@ShouldNotReachHere" %} ++ ins_encode %{ ++ // Here we should emit illtrap ! ++ ++ __ stop("in ShoudNotReachHere"); ++ ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++instruct leaP8Narrow(mRegP dst, indOffset8Narrow mem) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ match(Set dst mem); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, $mem\t# ptr off8narrow @ leaP8Narrow" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = as_Register($mem$$base); ++ int disp = $mem$$disp; ++ ++ __ daddiu(dst, base, disp); ++ %} ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++instruct leaPPosIdxScaleOff8(mRegP dst, basePosIndexScaleOffset8 mem) ++%{ ++ match(Set dst mem); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, $mem\t# @ PosIdxScaleOff8" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = as_Register($mem$$base); ++ Register index = as_Register($mem$$index); ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if (scale == 0) { ++ __ daddu(AT, base, index); ++ __ daddiu(dst, AT, disp); ++ } else { ++ __ dsll(AT, index, scale); ++ __ daddu(AT, base, AT); ++ __ daddiu(dst, AT, disp); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++instruct leaPIdxScale(mRegP dst, indIndexScale mem) ++%{ ++ match(Set dst mem); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, $mem\t# @ leaPIdxScale" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = as_Register($mem$$base); ++ Register index = as_Register($mem$$index); ++ int scale = $mem$$scale; ++ ++ if (scale == 0) { ++ __ daddu(dst, base, index); ++ } else { ++ __ dsll(AT, index, scale); ++ __ daddu(dst, base, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++ ++// ============================================================================ ++// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass ++// array for an instance of the superklass. Set a hidden internal cache on a ++// hit (cache is checked with exposed code in gen_subtype_check()). Return ++// NZ for a miss or zero for a hit. The encoding ALSO sets flags. ++instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{ ++ match(Set result (PartialSubtypeCheck sub super)); ++ effect(KILL tmp); ++ ins_cost(1100); // slightly larger than the next version ++ format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %} ++ ++ ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) ); ++ ins_pipe( pipe_slow ); ++%} ++ ++// Conditional-store of the updated heap-top. ++// Used during allocation of the shared heap. ++ ++instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{ ++ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); ++ ++ format %{ "move AT, $newval\n\t" ++ "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t" ++ "move $cr, AT\n" %} ++ ins_encode%{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp); ++ ++ int index = $heap_top_ptr$$index; ++ int scale = $heap_top_ptr$$scale; ++ int disp = $heap_top_ptr$$disp; ++ ++ guarantee(Assembler::is_simm16(disp), ""); ++ ++ if (index != 0) { ++ __ stop("in storePConditional: index != 0"); ++ } else { ++ __ move(AT, newval); ++ __ scd(AT, addr); ++ __ move($cr$$Register, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++// Conditional-store of an int value. ++// AT flag is set on success, reset otherwise. ++instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{ ++ match(Set cr (StoreIConditional mem (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, $mem, $oldval \t# @storeIConditional" %} ++ ++ ins_encode %{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Register cr = $cr$$Register; ++ Address addr(as_Register($mem$$base), $mem$$disp); ++ ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ guarantee(Assembler::is_simm16(disp), ""); ++ ++ if (index != 0) { ++ __ stop("in storeIConditional: index != 0"); ++ } else { ++ if (cr != addr.base() && cr != oldval && cr != newval) { ++ __ cmpxchg32(addr, oldval, newval, cr, true, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, true, false, true); ++ __ move(cr, AT); ++ } ++ } ++%} ++ ++ ins_pipe(long_memory_op); ++%} ++ ++// Conditional-store of a long value. ++// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG. ++instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr) ++%{ ++ match(Set cr (StoreLConditional mem (Binary oldval newval))); ++ ++ format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %} ++ ins_encode%{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Register cr = $cr$$Register; ++ Address addr(as_Register($mem$$base), $mem$$disp); ++ ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ guarantee(Assembler::is_simm16(disp), ""); ++ ++ if (index != 0) { ++ __ stop("in storeIConditional: index != 0"); ++ } else { ++ if (cr != addr.base() && cr != oldval && cr != newval) { ++ __ cmpxchg(addr, oldval, newval, cr, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(cr, AT); ++ } ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++// Implement LoadPLocked. Must be ordered against changes of the memory location ++// by storePConditional. ++instruct loadPLocked(mRegP dst, memory mem) %{ ++ match(Set dst (LoadPLocked mem)); ++ ins_cost(MEMORY_REF_COST); ++ ++ format %{ "lld $dst, $mem #@loadPLocked\n\t" %} ++ size(12); ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LINKED_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{ ++ match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, true, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, true, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{ ++ predicate(VM_Version::supports_cx8()); ++ match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{ ++ match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{ ++ match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, false, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, false, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++//----------Max and Min-------------------------------------------------------- ++// Min Instructions ++//// ++// *** Min and Max using the conditional move are slower than the ++// *** branch version on a Pentium III. ++// // Conditional move for min ++//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ ++// effect( USE_DEF op2, USE op1, USE cr ); ++// format %{ "CMOVlt $op2,$op1\t! min" %} ++// opcode(0x4C,0x0F); ++// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); ++// ins_pipe( pipe_cmov_reg ); ++//%} ++// ++//// Min Register with Register (P6 version) ++//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{ ++// predicate(VM_Version::supports_cmov() ); ++// match(Set op2 (MinI op1 op2)); ++// ins_cost(200); ++// expand %{ ++// eFlagsReg cr; ++// compI_eReg(cr,op1,op2); ++// cmovI_reg_lt(op2,op1,cr); ++// %} ++//%} ++ ++// Min Register with Register (generic version) ++instruct minI_Reg_Reg(mRegI dst, mRegI src) %{ ++ match(Set dst (MinI dst src)); ++ //effect(KILL flags); ++ ins_cost(80); ++ ++ format %{ "MIN $dst, $src @minI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ slt(AT, src, dst); ++ __ movn(dst, src, AT); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// Max Register with Register ++// *** Min and Max using the conditional move are slower than the ++// *** branch version on a Pentium III. ++// // Conditional move for max ++//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ ++// effect( USE_DEF op2, USE op1, USE cr ); ++// format %{ "CMOVgt $op2,$op1\t! max" %} ++// opcode(0x4F,0x0F); ++// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); ++// ins_pipe( pipe_cmov_reg ); ++//%} ++// ++// // Max Register with Register (P6 version) ++//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{ ++// predicate(VM_Version::supports_cmov() ); ++// match(Set op2 (MaxI op1 op2)); ++// ins_cost(200); ++// expand %{ ++// eFlagsReg cr; ++// compI_eReg(cr,op1,op2); ++// cmovI_reg_gt(op2,op1,cr); ++// %} ++//%} ++ ++// Max Register with Register (generic version) ++instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{ ++ match(Set dst (MaxI dst src)); ++ ins_cost(80); ++ ++ format %{ "MAX $dst, $src @maxI_Reg_Reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ slt(AT, dst, src); ++ __ movn(dst, src, AT); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{ ++ match(Set dst (MaxI dst zero)); ++ ins_cost(50); ++ ++ format %{ "MAX $dst, 0 @maxI_Reg_zero" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ slt(AT, dst, R0); ++ __ movn(dst, R0, AT); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL src mask)); ++ ++ format %{ "movl $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ dext(dst, src, 0, 32); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32) ++%{ ++ match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32))); ++ ++ format %{ "combine_i2l $dst, $src2(H), $src1(L) @ combine_i2l" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ if (src1 == dst) { ++ __ dinsu(dst, src2, 32, 32); ++ } else if (src2 == dst) { ++ __ dsll32(dst, dst, 0); ++ __ dins(dst, src1, 0, 32); ++ } else { ++ __ dext(dst, src1, 0, 32); ++ __ dinsu(dst, src2, 32, 32); ++ } ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Zero-extend convert int to long ++instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL (ConvI2L src) mask)); ++ ++ format %{ "movl $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ dext(dst, src, 0, 32); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL (ConvI2L (ConvL2I src)) mask)); ++ ++ format %{ "movl $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ dext(dst, src, 0, 32); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Match loading integer and casting it to unsigned int in long register. ++// LoadI + ConvI2L + AndL 0xffffffff. ++instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{ ++ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); ++ ++ format %{ "lwu $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{ ++ match(Set dst (AndL mask (ConvI2L (LoadI mem)))); ++ ++ format %{ "lwu $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++ ++// ============================================================================ ++// Safepoint Instruction ++ ++instruct safePoint_poll() %{ ++ predicate(SafepointMechanism::uses_global_page_poll()); ++ match(SafePoint); ++ ++ ins_cost(105); ++ format %{ "poll for GC @ safePoint_poll" %} ++ ++ ins_encode %{ ++ __ block_comment("Safepoint:"); ++ __ set64(T9, (long)os::get_polling_page()); ++ __ relocate(relocInfo::poll_type); ++ __ lw(AT, T9, 0); ++ %} ++ ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct safePoint_poll_tls(mRegP poll) %{ ++ match(SafePoint poll); ++ predicate(SafepointMechanism::uses_thread_local_poll()); ++ effect(USE poll); ++ ++ ins_cost(125); ++ format %{ "lw AT, [$poll]\t" ++ "Safepoint @ [$poll] : poll for GC" %} ++ size(4); ++ ins_encode %{ ++ Register poll_reg = $poll$$Register; ++ ++ __ block_comment("Safepoint:"); ++ __ relocate(relocInfo::poll_type); ++ address pre_pc = __ pc(); ++ __ lw(AT, poll_reg, 0); ++ assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit lw AT, [$poll]"); ++ %} ++ ++ ins_pipe( ialu_storeI ); ++%} ++ ++//----------Arithmetic Conversion Instructions--------------------------------- ++ ++instruct roundFloat_nop(regF dst) ++%{ ++ match(Set dst (RoundFloat dst)); ++ ++ ins_cost(0); ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct roundDouble_nop(regD dst) ++%{ ++ match(Set dst (RoundDouble dst)); ++ ++ ins_cost(0); ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++//---------- Zeros Count Instructions ------------------------------------------ ++// CountLeadingZerosINode CountTrailingZerosINode ++instruct countLeadingZerosI(mRegI dst, mRegI src) %{ ++ predicate(UseCountLeadingZerosInstructionMIPS64); ++ match(Set dst (CountLeadingZerosI src)); ++ ++ format %{ "clz $dst, $src\t# count leading zeros (int)" %} ++ ins_encode %{ ++ __ clz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countLeadingZerosL(mRegI dst, mRegL src) %{ ++ predicate(UseCountLeadingZerosInstructionMIPS64); ++ match(Set dst (CountLeadingZerosL src)); ++ ++ format %{ "dclz $dst, $src\t# count leading zeros (long)" %} ++ ins_encode %{ ++ __ dclz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countTrailingZerosI(mRegI dst, mRegI src) %{ ++ predicate(UseCountTrailingZerosInstructionMIPS64); ++ match(Set dst (CountTrailingZerosI src)); ++ ++ format %{ "ctz $dst, $src\t# count trailing zeros (int)" %} ++ ins_encode %{ ++ // ctz and dctz is gs instructions. ++ __ ctz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countTrailingZerosL(mRegI dst, mRegL src) %{ ++ predicate(UseCountTrailingZerosInstructionMIPS64); ++ match(Set dst (CountTrailingZerosL src)); ++ ++ format %{ "dcto $dst, $src\t# count trailing zeros (long)" %} ++ ins_encode %{ ++ __ dctz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// ====================VECTOR INSTRUCTIONS===================================== ++ ++// Load vectors (8 bytes long) ++instruct loadV8(vecD dst, memory mem) %{ ++ predicate(n->as_LoadVector()->memory_size() == 8); ++ match(Set dst (LoadVector mem)); ++ ins_cost(125); ++ format %{ "load $dst, $mem\t! load vector (8 bytes)" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++// Store vectors (8 bytes long) ++instruct storeV8(memory mem, vecD src) %{ ++ predicate(n->as_StoreVector()->memory_size() == 8); ++ match(Set mem (StoreVector mem src)); ++ ins_cost(145); ++ format %{ "store $mem, $src\t! store vector (8 bytes)" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct Repl8B_DSP(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 8 && UseLEXT3); ++ match(Set dst (ReplicateB src)); ++ ins_cost(100); ++ format %{ "replv_ob AT, $src\n\t" ++ "dmtc1 AT, $dst\t! replicate8B" %} ++ ins_encode %{ ++ __ replv_ob(AT, $src$$Register); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB src)); ++ ins_cost(140); ++ format %{ "move AT, $src\n\t" ++ "dins AT, AT, 8, 8\n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate8B" %} ++ ins_encode %{ ++ __ move(AT, $src$$Register); ++ __ dins(AT, AT, 8, 8); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_imm_DSP(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 8 && UseLEXT3 && VM_Version::supports_dsp()); ++ match(Set dst (ReplicateB con)); ++ ins_cost(110); ++ format %{ "repl_ob AT, [$con]\n\t" ++ "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %} ++ ins_encode %{ ++ int val = $con$$constant; ++ __ repl_ob(AT, val); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_imm(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB con)); ++ ins_cost(150); ++ format %{ "move AT, [$con]\n\t" ++ "dins AT, AT, 8, 8\n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %} ++ ins_encode %{ ++ __ move(AT, $con$$constant); ++ __ dins(AT, AT, 8, 8); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_zero(vecD dst, immI_0 zero) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB zero)); ++ ins_cost(90); ++ format %{ "dmtc1 R0, $dst\t! replicate8B zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_M1(vecD dst, immI_M1 M1) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB M1)); ++ ins_cost(80); ++ format %{ "dmtc1 -1, $dst\t! replicate8B -1" %} ++ ins_encode %{ ++ __ nor(AT, R0, R0); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_DSP(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 4 && UseLEXT3 && VM_Version::supports_dsp()); ++ match(Set dst (ReplicateS src)); ++ ins_cost(100); ++ format %{ "replv_qh AT, $src\n\t" ++ "dmtc1 AT, $dst\t! replicate4S" %} ++ ins_encode %{ ++ __ replv_qh(AT, $src$$Register); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS src)); ++ ins_cost(120); ++ format %{ "move AT, $src \n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate4S" %} ++ ins_encode %{ ++ __ move(AT, $src$$Register); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_imm_DSP(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 4 && UseLEXT3 && VM_Version::supports_dsp()); ++ match(Set dst (ReplicateS con)); ++ ins_cost(100); ++ format %{ "repl_qh AT, [$con]\n\t" ++ "dmtc1 AT, $dst\t! replicate4S($con)" %} ++ ins_encode %{ ++ int val = $con$$constant; ++ if ( Assembler::is_simm(val, 10)) { ++ //repl_qh supports 10 bits immediate ++ __ repl_qh(AT, val); ++ } else { ++ __ li32(AT, val); ++ __ replv_qh(AT, AT); ++ } ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_imm(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS con)); ++ ins_cost(110); ++ format %{ "move AT, [$con]\n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate4S($con)" %} ++ ins_encode %{ ++ __ move(AT, $con$$constant); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_zero(vecD dst, immI_0 zero) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS zero)); ++ format %{ "dmtc1 R0, $dst\t! replicate4S zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_M1(vecD dst, immI_M1 M1) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS M1)); ++ format %{ "dmtc1 -1, $dst\t! replicate4S -1" %} ++ ins_encode %{ ++ __ nor(AT, R0, R0); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar to be vector ++instruct Repl2I(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI src)); ++ format %{ "dins AT, $src, 0, 32\n\t" ++ "dinsu AT, $src, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate2I" %} ++ ins_encode %{ ++ __ dins(AT, $src$$Register, 0, 32); ++ __ dinsu(AT, $src$$Register, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar immediate to be vector by loading from const table. ++instruct Repl2I_imm(vecD dst, immI con, mA7RegI tmp) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI con)); ++ effect(KILL tmp); ++ format %{ "li32 AT, [$con], 32\n\t" ++ "dinsu AT, AT\n\t" ++ "dmtc1 AT, $dst\t! replicate2I($con)" %} ++ ins_encode %{ ++ int val = $con$$constant; ++ __ li32(AT, val); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar zero to be vector ++instruct Repl2I_zero(vecD dst, immI_0 zero) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI zero)); ++ format %{ "dmtc1 R0, $dst\t! replicate2I zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar -1 to be vector ++instruct Repl2I_M1(vecD dst, immI_M1 M1) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI M1)); ++ format %{ "dmtc1 -1, $dst\t! replicate2I -1, use AT" %} ++ ins_encode %{ ++ __ nor(AT, R0, R0); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate float (4 byte) scalar to be vector ++instruct Repl2F(vecD dst, regF src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateF src)); ++ format %{ "cvt.ps $dst, $src, $src\t! replicate2F" %} ++ ins_encode %{ ++ __ cvt_ps_s($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// Replicate float (4 byte) scalar zero to be vector ++instruct Repl2F_zero(vecD dst, immF_0 zero) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateF zero)); ++ format %{ "dmtc1 R0, $dst\t! replicate2F zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++ ++// ====================VECTOR ARITHMETIC======================================= ++ ++// --------------------------------- ADD -------------------------------------- ++ ++// Floats vector add ++// kernel does not have emulation of PS instructions yet, so PS instructions is disabled. ++instruct vadd2F(vecD dst, vecD src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVF dst src)); ++ format %{ "add.ps $dst,$src\t! add packed2F" %} ++ ins_encode %{ ++ __ add_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct vadd2F3(vecD dst, vecD src1, vecD src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVF src1 src2)); ++ format %{ "add.ps $dst,$src1,$src2\t! add packed2F" %} ++ ins_encode %{ ++ __ add_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// --------------------------------- SUB -------------------------------------- ++ ++// Floats vector sub ++instruct vsub2F(vecD dst, vecD src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (SubVF dst src)); ++ format %{ "sub.ps $dst,$src\t! sub packed2F" %} ++ ins_encode %{ ++ __ sub_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// --------------------------------- MUL -------------------------------------- ++ ++// Floats vector mul ++instruct vmul2F(vecD dst, vecD src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (MulVF dst src)); ++ format %{ "mul.ps $dst, $src\t! mul packed2F" %} ++ ins_encode %{ ++ __ mul_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct vmul2F3(vecD dst, vecD src1, vecD src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (MulVF src1 src2)); ++ format %{ "mul.ps $dst, $src1, $src2\t! mul packed2F" %} ++ ins_encode %{ ++ __ mul_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// --------------------------------- DIV -------------------------------------- ++// MIPS do not have div.ps ++ ++// --------------------------------- MADD -------------------------------------- ++// Floats vector madd ++//instruct vmadd2F(vecD dst, vecD src1, vecD src2, vecD src3) %{ ++// predicate(n->as_Vector()->length() == 2); ++// match(Set dst (AddVF (MulVF src1 src2) src3)); ++// ins_cost(50); ++// format %{ "madd.ps $dst, $src3, $src1, $src2\t! madd packed2F" %} ++// ins_encode %{ ++// __ madd_ps($dst$$FloatRegister, $src3$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++// %} ++// ins_pipe( fpu_regF_regF ); ++//%} ++ ++ ++//----------PEEPHOLE RULES----------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++// ++// peepmatch ( root_instr_name [preceeding_instruction]* ); ++// ++// peepconstraint %{ ++// (instruction_number.operand_name relational_op instruction_number.operand_name ++// [, ...] ); ++// // instruction numbers are zero-based using left to right order in peepmatch ++// ++// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); ++// // provide an instruction_number.operand_name for each operand that appears ++// // in the replacement instruction's match rule ++// ++// ---------VM FLAGS--------------------------------------------------------- ++// ++// All peephole optimizations can be turned off using -XX:-OptoPeephole ++// ++// Each peephole rule is given an identifying number starting with zero and ++// increasing by one in the order seen by the parser. An individual peephole ++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# ++// on the command-line. ++// ++// ---------CURRENT LIMITATIONS---------------------------------------------- ++// ++// Only match adjacent instructions in same basic block ++// Only equality constraints ++// Only constraints between operands, not (0.dest_reg == EAX_enc) ++// Only one replacement instruction ++// ++// ---------EXAMPLE---------------------------------------------------------- ++// ++// // pertinent parts of existing instructions in architecture description ++// instruct movI(eRegI dst, eRegI src) %{ ++// match(Set dst (CopyI src)); ++// %} ++// ++// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{ ++// match(Set dst (AddI dst src)); ++// effect(KILL cr); ++// %} ++// ++// // Change (inc mov) to lea ++// peephole %{ ++// // increment preceeded by register-register move ++// peepmatch ( incI_eReg movI ); ++// // require that the destination register of the increment ++// // match the destination register of the move ++// peepconstraint ( 0.dst == 1.dst ); ++// // construct a replacement instruction that sets ++// // the destination to ( move's source register + one ) ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// Implementation no longer uses movX instructions since ++// machine-independent system no longer uses CopyX nodes. ++// ++// peephole %{ ++// peepmatch ( incI_eReg movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( decI_eReg movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( addI_eReg_imm movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( addP_eReg_imm movP ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++ ++// // Change load of spilled value to only a spill ++// instruct storeI(memory mem, eRegI src) %{ ++// match(Set mem (StoreI mem src)); ++// %} ++// ++// instruct loadI(eRegI dst, memory mem) %{ ++// match(Set dst (LoadI mem)); ++// %} ++// ++//peephole %{ ++// peepmatch ( loadI storeI ); ++// peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); ++// peepreplace ( storeI( 1.mem 1.mem 1.src ) ); ++//%} ++ ++//----------SMARTSPILL RULES--------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++ +diff --git a/src/hotspot/cpu/mips/nativeInst_mips.cpp b/src/hotspot/cpu/mips/nativeInst_mips.cpp +new file mode 100644 +index 0000000000..96a147eaa5 +--- /dev/null ++++ b/src/hotspot/cpu/mips/nativeInst_mips.cpp +@@ -0,0 +1,1821 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "compiler/disassembler.hpp" ++#include "code/codeCache.hpp" ++#include "code/compiledIC.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/ostream.hpp" ++ ++#include ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++void NativeInstruction::wrote(int offset) { ++ ICache::invalidate_word(addr_at(offset)); ++} ++ ++void NativeInstruction::set_long_at(int offset, long i) { ++ address addr = addr_at(offset); ++ *(long*)addr = i; ++ ICache::invalidate_range(addr, 8); ++} ++ ++static int illegal_instruction_bits = 0; ++ ++int NativeInstruction::illegal_instruction() { ++ if (illegal_instruction_bits == 0) { ++ ResourceMark rm; ++ char buf[40]; ++ CodeBuffer cbuf((address)&buf[0], 20); ++ MacroAssembler* a = new MacroAssembler(&cbuf); ++ address ia = a->pc(); ++ a->brk(11); ++ int bits = *(int*)ia; ++ illegal_instruction_bits = bits; ++ } ++ return illegal_instruction_bits; ++} ++ ++bool NativeInstruction::is_int_branch() { ++ switch(Assembler::opcode(insn_word())) { ++ case Assembler::beq_op: ++ case Assembler::beql_op: ++ case Assembler::bgtz_op: ++ case Assembler::bgtzl_op: ++ case Assembler::blez_op: ++ case Assembler::blezl_op: ++ case Assembler::bne_op: ++ case Assembler::bnel_op: ++ return true; ++ case Assembler::regimm_op: ++ switch(Assembler::rt(insn_word())) { ++ case Assembler::bgez_op: ++ case Assembler::bgezal_op: ++ case Assembler::bgezall_op: ++ case Assembler::bgezl_op: ++ case Assembler::bltz_op: ++ case Assembler::bltzal_op: ++ case Assembler::bltzall_op: ++ case Assembler::bltzl_op: ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++bool NativeInstruction::is_float_branch() { ++ if (!is_op(Assembler::cop1_op) || ++ !is_rs((Register)Assembler::bc1f_op)) return false; ++ ++ switch(Assembler::rt(insn_word())) { ++ case Assembler::bcf_op: ++ case Assembler::bcfl_op: ++ case Assembler::bct_op: ++ case Assembler::bctl_op: ++ return true; ++ } ++ ++ return false; ++} ++ ++ ++void NativeCall::verify() { ++ // make sure code pattern is actually a call instruction ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // jal target ++ // nop ++ if ( is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_op(int_at(16), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return; ++ } ++ ++ // jal targe ++ // nop ++ if ( is_op(int_at(0), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ return; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) && ++ is_special_op(int_at(24), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ // FIXME: why add jr_op here? ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ if (nativeInstruction_at(addr_at(0))->is_trampoline_call()) ++ return; ++ ++ fatal("not a call"); ++} ++ ++address NativeCall::target_addr_for_insn() const { ++ // jal target ++ // nop ++ if ( is_op(int_at(0), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(4))->is_nop()) { ++ int instr_index = int_at(0) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // jal target ++ // nop ++ if ( nativeInstruction_at(addr_at(0))->is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_op(int_at(16), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(20))->is_nop()) { ++ int instr_index = int_at(16) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff), ++ (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff)); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ld dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ld_op) ) { ++ ++ address dest = (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ return dest + Assembler::simm16((intptr_t)int_at(12) & 0xffff); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(0), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop <-- optional ++ //nop <-- optional ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop <-- optional ++ //nop <-- optional ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop <-- optional ++ //nop <-- optional ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ tty->print_cr("not a call: addr = " INTPTR_FORMAT , p2i(addr_at(0))); ++ tty->print_cr("======= Start decoding at addr = " INTPTR_FORMAT " =======", p2i(addr_at(0))); ++ Disassembler::decode(addr_at(0) - 2 * 4, addr_at(0) + 8 * 4, tty); ++ tty->print_cr("======= End of decoding ======="); ++ fatal("not a call"); ++ return NULL; // unreachable ++} ++ ++// Extract call destination from a NativeCall. The call might use a trampoline stub. ++address NativeCall::destination() const { ++ address addr = (address)this; ++ address destination = target_addr_for_insn(); ++ // Do we use a trampoline stub for this call? ++ // Trampoline stubs are located behind the main code. ++ if (destination > addr) { ++ // Filter out recursive method invocation (call to verified/unverified entry point). ++ CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. ++ assert(cb && cb->is_nmethod(), "sanity"); ++ nmethod *nm = (nmethod *)cb; ++ NativeInstruction* ni = nativeInstruction_at(addr); ++ if (nm->stub_contains(destination) && ni->is_trampoline_call()) { ++ // Yes we do, so get the destination from the trampoline stub. ++ const address trampoline_stub_addr = destination; ++ destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); ++ } ++ } ++ return destination; ++} ++ ++// Similar to replace_mt_safe, but just changes the destination. The ++// important thing is that free-running threads are able to execute this ++// call instruction at all times. ++// ++// Used in the runtime linkage of calls; see class CompiledIC. ++// ++// Add parameter assert_lock to switch off assertion ++// during code generation, where no patching lock is needed. ++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { ++ assert(!assert_lock || ++ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), ++ "concurrent code patching"); ++ ++ ResourceMark rm; ++ address addr_call = addr_at(0); ++ assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); ++ // Patch the constant in the call's trampoline stub. ++ if (MacroAssembler::reachable_from_cache()) { ++ set_destination(dest); ++ } else { ++ address trampoline_stub_addr = nativeCall_at(addr_call)->target_addr_for_insn(); ++ assert (get_trampoline() != NULL && trampoline_stub_addr == get_trampoline(), "we need a trampoline"); ++ nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); ++ } ++} ++ ++address NativeCall::get_trampoline() { ++ address call_addr = addr_at(0); ++ ++ CodeBlob *code = CodeCache::find_blob(call_addr); ++ assert(code != NULL, "Could not find the containing code blob"); ++ ++ if (code->is_nmethod()) { ++ return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); ++ } ++ return NULL; ++} ++ ++// manual implementation of GSSQ ++// ++// 00000001200009c0 : ++// 1200009c0: 0085202d daddu a0, a0, a1 ++// 1200009c4: e8860027 gssq a2, a3, 0(a0) ++// 1200009c8: 03e00008 jr ra ++// 1200009cc: 00000000 nop ++// ++typedef void (* atomic_store128_ptr)(long *addr, int offset, long low64, long hi64); ++ ++static int *buf; ++ ++static atomic_store128_ptr get_atomic_store128_func() { ++ assert(UseLEXT1, "UseLEXT1 must be true"); ++ static atomic_store128_ptr p = NULL; ++ if (p != NULL) ++ return p; ++ ++ buf = (int *)mmap(NULL, 1024, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, ++ -1, 0); ++ buf[0] = 0x0085202d; ++ buf[1] = (0x3a << 26) | (4 << 21) | (6 << 16) | 0x27; /* gssq $a2, $a3, 0($a0) */ ++ buf[2] = 0x03e00008; ++ buf[3] = 0; ++ ++ asm("sync"); ++ p = (atomic_store128_ptr)buf; ++ return p; ++} ++ ++void NativeCall::patch_on_jal_only(address dst) { ++ long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint jal_inst = (Assembler::jal_op << 26) | dest; ++ set_int_at(0, jal_inst); ++ ICache::invalidate_range(addr_at(0), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeCall::patch_on_jal_gs(address dst) { ++ long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint jal_inst = (Assembler::jal_op << 26) | dest; ++ set_int_at(16, jal_inst); ++ ICache::invalidate_range(addr_at(16), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeCall::patch_on_jal(address dst) { ++ patch_on_jal_gs(dst); ++} ++ ++void NativeCall::patch_on_trampoline(address dest) { ++ assert(nativeInstruction_at(addr_at(0))->is_trampoline_call(), "unexpected code at call site"); ++ jlong dst = (jlong) dest; ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ld dst, dst, imm16 ++ if ((dst> 0) && Assembler::is_simm16(dst >> 32)) { ++ dst += (dst & 0x8000) << 1; ++ set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low(dst >> 32) & 0xffff)); ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(dst >> 16) & 0xffff)); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low(dst) & 0xffff)); ++ ++ ICache::invalidate_range(addr_at(0), 24); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeCall::patch_on_jalr_gs(address dst) { ++ patch_set48_gs(dst); ++} ++ ++void NativeCall::patch_on_jalr(address dst) { ++ patch_set48(dst); ++} ++ ++void NativeCall::patch_set48_gs(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ int count = 0; ++ int insts[4] = {0, 0, 0, 0}; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ insts[count] = 0; ++ count++; ++ } ++ ++ guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned"); ++ atomic_store128_ptr func = get_atomic_store128_func(); ++ (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]); ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeCall::patch_set32_gs(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ int insts[2] = {0, 0}; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ //daddiu(d, R0, value); ++ //set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ //set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ //set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 2) { ++ //nop(); ++ //set_int_at(count << 2, 0); ++ insts[count] = 0; ++ count++; ++ } ++ ++ long inst = insts[1]; ++ inst = inst << 32; ++ inst = inst + insts[0]; ++ ++ set_long_at(0, inst); ++} ++ ++void NativeCall::patch_set48(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ //daddiu(d, R0, value); ++ set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ //ori(d, R0, julong(value) >> 16); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); ++ count += 1; ++ //dsll(d, d, 16); ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ //lui(d, value >> 32); ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); ++ count += 1; ++ //ori(d, d, split_low(value >> 16)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ //dsll(d, d, 16); ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ //nop(); ++ set_int_at(count << 2, 0); ++ count++; ++ } ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeCall::patch_set32(address dest) { ++ patch_set32_gs(dest); ++} ++ ++void NativeCall::set_destination(address dest) { ++ OrderAccess::fence(); ++ ++ // li64 ++ if (is_special_op(int_at(16), Assembler::dsll_op)) { ++ int first_word = int_at(0); ++ set_int_at(0, 0x1000ffff); /* .1: b .1 */ ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 32) & 0xffff)); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 16) & 0xffff)); ++ set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)dest) & 0xffff)); ++ set_int_at(0, (first_word & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 48) & 0xffff)); ++ ICache::invalidate_range(addr_at(0), 24); ++ } else if (is_op(int_at(16), Assembler::jal_op)) { ++ if (UseLEXT1) { ++ patch_on_jal_gs(dest); ++ } else { ++ patch_on_jal(dest); ++ } ++ } else if (is_op(int_at(0), Assembler::jal_op)) { ++ patch_on_jal_only(dest); ++ } else if (is_special_op(int_at(16), Assembler::jalr_op)) { ++ if (UseLEXT1) { ++ patch_on_jalr_gs(dest); ++ } else { ++ patch_on_jalr(dest); ++ } ++ } else if (is_special_op(int_at(8), Assembler::jalr_op)) { ++ guarantee(!os::is_MP() || (((long)addr_at(0) % 8) == 0), "destination must be aligned by 8"); ++ if (UseLEXT1) { ++ patch_set32_gs(dest); ++ } else { ++ patch_set32(dest); ++ } ++ ICache::invalidate_range(addr_at(0), 8); ++ } else { ++ fatal("not a call"); ++ } ++} ++ ++void NativeCall::print() { ++ tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT, ++ p2i(instruction_address()), p2i(destination())); ++} ++ ++// Inserts a native call instruction at a given pc ++void NativeCall::insert(address code_pos, address entry) { ++ NativeCall *call = nativeCall_at(code_pos); ++ CodeBuffer cb(call->addr_at(0), instruction_size); ++ MacroAssembler masm(&cb); ++#define __ masm. ++ __ li48(T9, (long)entry); ++ __ jalr (); ++ __ delayed()->nop(); ++#undef __ ++ ++ ICache::invalidate_range(call->addr_at(0), instruction_size); ++} ++ ++// MT-safe patching of a call instruction. ++// First patches first word of instruction to two jmp's that jmps to them ++// selfs (spinlock). Then patches the last byte, and then atomicly replaces ++// the jmp's with the first 4 byte of the new instruction. ++void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) { ++ Unimplemented(); ++} ++ ++//------------------------------------------------------------------- ++ ++void NativeMovConstReg::verify() { ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ return; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ return; ++ } ++ ++ fatal("not a mov reg, imm64/imm48"); ++} ++ ++void NativeMovConstReg::print() { ++ tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, ++ p2i(instruction_address()), data()); ++} ++ ++intptr_t NativeMovConstReg::data() const { ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ ++ return Assembler::merge( (intptr_t)(int_at(20) & 0xffff), ++ (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff)); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ ++ return Assembler::merge( (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return Assembler::merge( (intptr_t)(int_at(8) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return Assembler::merge( (intptr_t)(0), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ fatal("not a mov reg, imm64/imm48"); ++ return 0; // unreachable ++} ++ ++void NativeMovConstReg::patch_set48(intptr_t x) { ++ jlong value = (jlong) x; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ //daddiu(d, R0, value); ++ set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ set_int_at(count << 2, 0); ++ count++; ++ } ++} ++ ++void NativeMovConstReg::set_data(intptr_t x, intptr_t o) { ++ // li64 or li48 ++ if ((!nativeInstruction_at(addr_at(12))->is_nop()) && is_special_op(int_at(16), Assembler::dsll_op) && is_op(long_at(20), Assembler::ori_op)) { ++ set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 48) & 0xffff)); ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 32) & 0xffff)); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 16) & 0xffff)); ++ set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)x) & 0xffff)); ++ } else { ++ patch_set48(x); ++ } ++ ++ ICache::invalidate_range(addr_at(0), 24); ++ ++ // Find and replace the oop/metadata corresponding to this ++ // instruction in oops section. ++ CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address()); ++ nmethod* nm = blob->as_nmethod_or_null(); ++ if (nm != NULL) { ++ o = o ? o : x; ++ RelocIterator iter(nm, instruction_address(), next_instruction_address()); ++ while (iter.next()) { ++ if (iter.type() == relocInfo::oop_type) { ++ oop* oop_addr = iter.oop_reloc()->oop_addr(); ++ *oop_addr = cast_to_oop(o); ++ break; ++ } else if (iter.type() == relocInfo::metadata_type) { ++ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); ++ *metadata_addr = (Metadata*)o; ++ break; ++ } ++ } ++ } ++} ++ ++//------------------------------------------------------------------- ++ ++int NativeMovRegMem::offset() const{ ++ if (is_immediate()) ++ return (short)(int_at(instruction_offset)&0xffff); ++ else ++ return Assembler::merge(int_at(hiword_offset)&0xffff, long_at(instruction_offset)&0xffff); ++} ++ ++void NativeMovRegMem::set_offset(int x) { ++ if (is_immediate()) { ++ assert(Assembler::is_simm16(x), "just check"); ++ set_int_at(0, (int_at(0)&0xffff0000) | (x&0xffff) ); ++ if (is_64ldst()) { ++ assert(Assembler::is_simm16(x+4), "just check"); ++ set_int_at(4, (int_at(4)&0xffff0000) | ((x+4)&0xffff) ); ++ } ++ } else { ++ set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_high(x) & 0xffff)); ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(x) & 0xffff)); ++ } ++ ICache::invalidate_range(addr_at(0), 8); ++} ++ ++void NativeMovRegMem::verify() { ++ int offset = 0; ++ ++ if ( Assembler::opcode(int_at(0)) == Assembler::lui_op ) { ++ ++ if ( Assembler::opcode(int_at(4)) != Assembler::ori_op ) { ++ fatal ("not a mov [reg+offs], reg instruction"); ++ } ++ ++ offset += 12; ++ } ++ ++ switch(Assembler::opcode(int_at(offset))) { ++ case Assembler::lb_op: ++ case Assembler::lbu_op: ++ case Assembler::lh_op: ++ case Assembler::lhu_op: ++ case Assembler::lw_op: ++ case Assembler::lwu_op: ++ case Assembler::ld_op: ++ case Assembler::lwc1_op: ++ case Assembler::ldc1_op: ++ case Assembler::sb_op: ++ case Assembler::sh_op: ++ case Assembler::sw_op: ++ case Assembler::sd_op: ++ case Assembler::swc1_op: ++ case Assembler::sdc1_op: ++ break; ++ default: ++ fatal ("not a mov [reg+offs], reg instruction"); ++ } ++} ++ ++ ++void NativeMovRegMem::print() { ++ tty->print_cr(PTR_FORMAT ": mov reg, [reg + %x]", p2i(instruction_address()), offset()); ++} ++ ++bool NativeInstruction::is_sigill_zombie_not_entrant() { ++ return uint_at(0) == NativeIllegalInstruction::instruction_code; ++} ++ ++void NativeIllegalInstruction::insert(address code_pos) { ++ *(juint*)code_pos = instruction_code; ++ ICache::invalidate_range(code_pos, instruction_size); ++} ++ ++void NativeJump::verify() { ++ assert(((NativeInstruction *)this)->is_jump() || ++ ((NativeInstruction *)this)->is_cond_jump(), "not a general jump instruction"); ++} ++ ++void NativeJump::patch_set48_gs(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ int insts[4] = {0, 0, 0, 0}; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ insts[count] = 0; ++ count++; ++ } ++ ++ guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned"); ++ atomic_store128_ptr func = get_atomic_store128_func(); ++ (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]); ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeJump::patch_set48(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ set_int_at(count << 2, 0); ++ count++; ++ } ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeJump::patch_on_j_only(address dst) { ++ long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint j_inst = (Assembler::j_op << 26) | dest; ++ set_int_at(0, j_inst); ++ ICache::invalidate_range(addr_at(0), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++ ++void NativeJump::patch_on_j_gs(address dst) { ++ long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint j_inst = (Assembler::j_op << 26) | dest; ++ set_int_at(16, j_inst); ++ ICache::invalidate_range(addr_at(16), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeJump::patch_on_j(address dst) { ++ patch_on_j_gs(dst); ++} ++ ++void NativeJump::patch_on_jr_gs(address dst) { ++ patch_set48_gs(dst); ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeJump::patch_on_jr(address dst) { ++ patch_set48(dst); ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++ ++void NativeJump::set_jump_destination(address dest) { ++ OrderAccess::fence(); ++ ++ if (is_short()) { ++ assert(Assembler::is_simm16(dest-addr_at(4)), "change this code"); ++ set_int_at(0, (int_at(0) & 0xffff0000) | (dest - addr_at(4)) & 0xffff ); ++ ICache::invalidate_range(addr_at(0), 4); ++ } else if (is_b_far()) { ++ int offset = dest - addr_at(12); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (offset >> 16)); ++ set_int_at(16, (int_at(16) & 0xffff0000) | (offset & 0xffff)); ++ } else { ++ if (is_op(int_at(16), Assembler::j_op)) { ++ if (UseLEXT1) { ++ patch_on_j_gs(dest); ++ } else { ++ patch_on_j(dest); ++ } ++ } else if (is_op(int_at(0), Assembler::j_op)) { ++ patch_on_j_only(dest); ++ } else if (is_special_op(int_at(16), Assembler::jr_op)) { ++ if (UseLEXT1) { ++ //guarantee(!os::is_MP() || (((long)addr_at(0) % 16) == 0), "destination must be aligned for GSSD"); ++ //patch_on_jr_gs(dest); ++ patch_on_jr(dest); ++ } else { ++ patch_on_jr(dest); ++ } ++ } else { ++ fatal("not a jump"); ++ } ++ } ++} ++ ++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { ++ CodeBuffer cb(code_pos, instruction_size); ++ MacroAssembler masm(&cb); ++#define __ masm. ++ if (Assembler::is_simm16((entry - code_pos - 4) / 4)) { ++ __ b(entry); ++ __ delayed()->nop(); ++ } else { ++ // Attention: We have to use a relative jump here since PC reloc-operation isn't allowed here. ++ int offset = entry - code_pos; ++ ++ Label L; ++ __ bgezal(R0, L); ++ __ delayed()->lui(T9, (offset - 8) >> 16); ++ __ bind(L); ++ __ ori(T9, T9, (offset - 8) & 0xffff); ++ __ daddu(T9, T9, RA); ++ __ jr(T9); ++ __ delayed()->nop(); ++ } ++ ++#undef __ ++ ++ ICache::invalidate_range(code_pos, instruction_size); ++} ++ ++bool NativeJump::is_b_far() { ++// ++// 0x000000556809f198: daddu at, ra, zero ++// 0x000000556809f19c: [4110001]bgezal zero, 0x000000556809f1a4 ++// ++// 0x000000556809f1a0: nop ++// 0x000000556809f1a4: lui t9, 0xfffffffd ++// 0x000000556809f1a8: ori t9, t9, 0x14dc ++// 0x000000556809f1ac: daddu t9, t9, ra ++// 0x000000556809f1b0: daddu ra, at, zero ++// 0x000000556809f1b4: jr t9 ++// 0x000000556809f1b8: nop ++// ;; ImplicitNullCheckStub slow case ++// 0x000000556809f1bc: lui t9, 0x55 ++// ++ return is_op(int_at(12), Assembler::lui_op); ++} ++ ++address NativeJump::jump_destination() { ++ if ( is_short() ) { ++ return addr_at(4) + Assembler::imm_off(int_at(instruction_offset)) * 4; ++ } ++ // Assembler::merge() is not correct in MIPS_64! ++ // ++ // Example: ++ // hi16 = 0xfffd, ++ // lo16 = f7a4, ++ // ++ // offset=0xfffdf7a4 (Right) ++ // Assembler::merge = 0xfffcf7a4 (Wrong) ++ // ++ if ( is_b_far() ) { ++ int hi16 = int_at(12)&0xffff; ++ int low16 = int_at(16)&0xffff; ++ address target = addr_at(12) + (hi16 << 16) + low16; ++ return target; ++ } ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // j target ++ // nop ++ if ( nativeInstruction_at(addr_at(0))->is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_op(int_at(16), Assembler::j_op) && ++ nativeInstruction_at(addr_at(20))->is_nop()) { ++ int instr_index = int_at(16) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // j target ++ // nop ++ if ( is_op(int_at(0), Assembler::j_op) && ++ nativeInstruction_at(addr_at(4))->is_nop()) { ++ int instr_index = int_at(0) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff), ++ (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff)); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(0), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ fatal("not a jump"); ++ return NULL; // unreachable ++} ++ ++// MT-safe patching of a long jump instruction. ++// First patches first word of instruction to two jmp's that jmps to them ++// selfs (spinlock). Then patches the last byte, and then atomicly replaces ++// the jmp's with the first 4 byte of the new instruction. ++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { ++ NativeGeneralJump* h_jump = nativeGeneralJump_at (instr_addr); ++ assert((int)instruction_size == (int)NativeCall::instruction_size, ++ "note::Runtime1::patch_code uses NativeCall::instruction_size"); ++ ++ // ensure 100% atomicity ++ guarantee(!os::is_MP() || (((long)instr_addr % BytesPerWord) == 0), "destination must be aligned for SD"); ++ ++ int *p = (int *)instr_addr; ++ int jr_word = p[4]; ++ ++ p[4] = 0x1000fffb; /* .1: --; --; --; --; b .1; nop */ ++ memcpy(instr_addr, code_buffer, NativeCall::instruction_size - 8); ++ *(long *)(instr_addr + 16) = *(long *)(code_buffer + 16); ++} ++ ++// Must ensure atomicity ++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { ++ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); ++ assert(nativeInstruction_at(verified_entry + BytesPerInstWord)->is_nop(), "mips64 cannot replace non-nop with jump"); ++ ++ if (MacroAssembler::reachable_from_cache(dest)) { ++ CodeBuffer cb(verified_entry, 1 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.j(dest); ++ } else { ++ // We use an illegal instruction for marking a method as ++ // not_entrant or zombie ++ NativeIllegalInstruction::insert(verified_entry); ++ } ++ ++ ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord); ++} ++ ++bool NativeInstruction::is_jump() ++{ ++ if ((int_at(0) & NativeGeneralJump::b_mask) == NativeGeneralJump::beq_opcode) ++ return true; ++ if (is_op(int_at(4), Assembler::lui_op)) // simplified b_far ++ return true; ++ if (is_op(int_at(12), Assembler::lui_op)) // original b_far ++ return true; ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // j target ++ // nop ++ if ( is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ nativeInstruction_at(addr_at(16))->is_op(Assembler::j_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return true; ++ } ++ ++ if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::j_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ return true; ++ } ++ ++ // lui rd, imm(63...48); ++ // ori rd, rd, imm(47...32); ++ // dsll rd, rd, 16; ++ // ori rd, rd, imm(31...16); ++ // dsll rd, rd, 16; ++ // ori rd, rd, imm(15...0); ++ // jr rd ++ // nop ++ if (is_op(int_at(0), Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) && ++ is_special_op(int_at(24), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if (is_op(int_at(0), Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ return false; ++} ++ ++bool NativeInstruction::is_dtrace_trap() { ++ //return (*(int32_t*)this & 0xff) == 0xcc; ++ Unimplemented(); ++ return false; ++} ++ ++bool NativeInstruction::is_safepoint_poll() { ++ // ++ // 390 li T2, 0x0000000000400000 #@loadConP ++ // 394 sw [SP + #12], V1 # spill 9 ++ // 398 Safepoint @ [T2] : poll for GC @ safePoint_poll # spec.benchmarks.compress.Decompressor::decompress @ bci:224 L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1 ++ // ++ // 0x000000ffe5815130: lui t2, 0x40 ++ // 0x000000ffe5815134: sw v1, 0xc(sp) ; OopMap{a6=Oop off=920} ++ // ;*goto ++ // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) ++ // ++ // 0x000000ffe5815138: lw at, 0x0(t2) ;*goto <--- PC ++ // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) ++ // ++ ++ // Since there may be some spill instructions between the safePoint_poll and loadConP, ++ // we check the safepoint instruction like the this. ++ return is_op(Assembler::lw_op) && is_rt(AT); ++} +diff --git a/src/hotspot/cpu/mips/nativeInst_mips.hpp b/src/hotspot/cpu/mips/nativeInst_mips.hpp +new file mode 100644 +index 0000000000..fb4f99c9c6 +--- /dev/null ++++ b/src/hotspot/cpu/mips/nativeInst_mips.hpp +@@ -0,0 +1,734 @@ ++/* ++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_NATIVEINST_MIPS_HPP ++#define CPU_MIPS_VM_NATIVEINST_MIPS_HPP ++ ++#include "asm/assembler.hpp" ++#include "asm/macroAssembler.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/os.hpp" ++#include "runtime/safepointMechanism.hpp" ++ ++// We have interfaces for the following instructions: ++// - NativeInstruction ++// - - NativeCall ++// - - NativeMovConstReg ++// - - NativeMovConstRegPatching ++// - - NativeMovRegMem ++// - - NativeMovRegMemPatching ++// - - NativeJump ++// - - NativeIllegalOpCode ++// - - NativeGeneralJump ++// - - NativeReturn ++// - - NativeReturnX (return with argument) ++// - - NativePushConst ++// - - NativeTstRegMem ++ ++// The base class for different kinds of native instruction abstractions. ++// Provides the primitive operations to manipulate code relative to this. ++ ++class NativeInstruction { ++ friend class Relocation; ++ ++ public: ++ enum mips_specific_constants { ++ nop_instruction_code = 0, ++ nop_instruction_size = 4, ++ sync_instruction_code = 0xf ++ }; ++ ++ bool is_nop() { return long_at(0) == nop_instruction_code; } ++ bool is_sync() { return long_at(0) == sync_instruction_code; } ++ bool is_dtrace_trap(); ++ inline bool is_call(); ++ inline bool is_illegal(); ++ inline bool is_return(); ++ bool is_jump(); ++ inline bool is_cond_jump(); ++ bool is_safepoint_poll(); ++ ++ //mips has no instruction to generate a illegal instrucion exception ++ //we define ours: break 11 ++ static int illegal_instruction(); ++ ++ bool is_int_branch(); ++ bool is_float_branch(); ++ ++ inline bool is_trampoline_call(); ++ ++ //We use an illegal instruction for marking a method as not_entrant or zombie. ++ bool is_sigill_zombie_not_entrant(); ++ ++ protected: ++ address addr_at(int offset) const { return address(this) + offset; } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(BytesPerInstWord); } ++ address prev_instruction_address() const { return addr_at(-BytesPerInstWord); } ++ ++ s_char sbyte_at(int offset) const { return *(s_char*) addr_at(offset); } ++ u_char ubyte_at(int offset) const { return *(u_char*) addr_at(offset); } ++ ++ jint int_at(int offset) const { return *(jint*) addr_at(offset); } ++ juint uint_at(int offset) const { return *(juint*) addr_at(offset); } ++ ++ intptr_t ptr_at(int offset) const { return *(intptr_t*) addr_at(offset); } ++ ++ oop oop_at (int offset) const { return *(oop*) addr_at(offset); } ++ int long_at(int offset) const { return *(jint*)addr_at(offset); } ++ ++ ++ void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; wrote(offset); } ++ void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; wrote(offset); } ++ void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; wrote(offset); } ++ void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; wrote(offset); } ++ void set_long_at(int offset, long i); ++ ++ int insn_word() const { return long_at(0); } ++ static bool is_op (int insn, Assembler::ops op) { return Assembler::opcode(insn) == (int)op; } ++ bool is_op (Assembler::ops op) const { return is_op(insn_word(), op); } ++ bool is_rs (int insn, Register rs) const { return Assembler::rs(insn) == (int)rs->encoding(); } ++ bool is_rs (Register rs) const { return is_rs(insn_word(), rs); } ++ bool is_rt (int insn, Register rt) const { return Assembler::rt(insn) == (int)rt->encoding(); } ++ bool is_rt (Register rt) const { return is_rt(insn_word(), rt); } ++ ++ static bool is_special_op (int insn, Assembler::special_ops op) { ++ return is_op(insn, Assembler::special_op) && Assembler::special(insn)==(int)op; ++ } ++ bool is_special_op (Assembler::special_ops op) const { return is_special_op(insn_word(), op); } ++ ++ void wrote(int offset); ++ ++ public: ++ ++ // unit test stuff ++ static void test() {} // override for testing ++ ++ inline friend NativeInstruction* nativeInstruction_at(address address); ++}; ++ ++inline NativeInstruction* nativeInstruction_at(address address) { ++ NativeInstruction* inst = (NativeInstruction*)address; ++#ifdef ASSERT ++ //inst->verify(); ++#endif ++ return inst; ++} ++ ++inline NativeCall* nativeCall_at(address address); ++// The NativeCall is an abstraction for accessing/manipulating native call imm32/imm64 ++// instructions (used to manipulate inline caches, primitive & dll calls, etc.). ++// MIPS has no call instruction with imm32/imm64. Usually, a call was done like this: ++// 32 bits: ++// lui rt, imm16 ++// addiu rt, rt, imm16 ++// jalr rt ++// nop ++// ++// 64 bits: ++// lui rd, imm(63...48); ++// ori rd, rd, imm(47...32); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(31...16); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(15...0); ++// jalr rd ++// nop ++// ++ ++// we just consider the above for instruction as one call instruction ++class NativeCall: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 6 * BytesPerInstWord, ++ return_address_offset_short = 4 * BytesPerInstWord, ++ return_address_offset_long = 6 * BytesPerInstWord, ++ displacement_offset = 0 ++ }; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ ++ address next_instruction_address() const { ++ if (is_special_op(int_at(8), Assembler::jalr_op)) { ++ return addr_at(return_address_offset_short); ++ } else { ++ return addr_at(return_address_offset_long); ++ } ++ } ++ ++ address return_address() const { ++ return next_instruction_address(); ++ } ++ ++ address target_addr_for_insn() const; ++ address destination() const; ++ void set_destination(address dest); ++ ++ void patch_set48_gs(address dest); ++ void patch_set48(address dest); ++ ++ void patch_on_jalr_gs(address dest); ++ void patch_on_jalr(address dest); ++ ++ void patch_on_jal_gs(address dest); ++ void patch_on_jal(address dest); ++ ++ void patch_on_trampoline(address dest); ++ ++ void patch_on_jal_only(address dest); ++ ++ void patch_set32_gs(address dest); ++ void patch_set32(address dest); ++ ++ void verify_alignment() { } ++ void verify(); ++ void print(); ++ ++ // Creation ++ inline friend NativeCall* nativeCall_at(address address); ++ inline friend NativeCall* nativeCall_before(address return_address); ++ ++ static bool is_call_at(address instr) { ++ return nativeInstruction_at(instr)->is_call(); ++ } ++ ++ static bool is_call_before(address return_address) { ++ return is_call_at(return_address - return_address_offset_short) | is_call_at(return_address - return_address_offset_long); ++ } ++ ++ static bool is_call_to(address instr, address target) { ++ return nativeInstruction_at(instr)->is_call() && ++nativeCall_at(instr)->destination() == target; ++ } ++ ++ // MT-safe patching of a call instruction. ++ static void insert(address code_pos, address entry); ++ ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++ ++ // Similar to replace_mt_safe, but just changes the destination. The ++ // important thing is that free-running threads are able to execute ++ // this call instruction at all times. If the call is an immediate jal ++ // instruction we can simply rely on atomicity of 32-bit writes to ++ // make sure other threads will see no intermediate states. ++ ++ // We cannot rely on locks here, since the free-running threads must run at ++ // full speed. ++ // ++ // Used in the runtime linkage of calls; see class CompiledIC. ++ ++ // The parameter assert_lock disables the assertion during code generation. ++ void set_destination_mt_safe(address dest, bool assert_lock = true); ++ ++ address get_trampoline(); ++}; ++ ++inline NativeCall* nativeCall_at(address address) { ++ NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset); ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++inline NativeCall* nativeCall_before(address return_address) { ++ NativeCall* call = NULL; ++ if (NativeCall::is_call_at(return_address - NativeCall::return_address_offset_long)) { ++ call = (NativeCall*)(return_address - NativeCall::return_address_offset_long); ++ } else { ++ call = (NativeCall*)(return_address - NativeCall::return_address_offset_short); ++ } ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++class NativeMovConstReg: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 4 * BytesPerInstWord, ++ next_instruction_offset = 4 * BytesPerInstWord, ++ }; ++ ++ int insn_word() const { return long_at(instruction_offset); } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(next_instruction_offset); } ++ intptr_t data() const; ++ void set_data(intptr_t x, intptr_t o = 0); ++ ++ void patch_set48(intptr_t x); ++ ++ void verify(); ++ void print(); ++ ++ // unit test stuff ++ static void test() {} ++ ++ // Creation ++ inline friend NativeMovConstReg* nativeMovConstReg_at(address address); ++ inline friend NativeMovConstReg* nativeMovConstReg_before(address address); ++}; ++ ++inline NativeMovConstReg* nativeMovConstReg_at(address address) { ++ NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++inline NativeMovConstReg* nativeMovConstReg_before(address address) { ++ NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++class NativeMovConstRegPatching: public NativeMovConstReg { ++ private: ++ friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) { ++ NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset); ++ #ifdef ASSERT ++ test->verify(); ++ #endif ++ return test; ++ } ++}; ++ ++// An interface for accessing/manipulating native moves of the form: ++// lui AT, split_high(offset) ++// addiu AT, split_low(offset) ++// addu reg, reg, AT ++// lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, 0 ++// [lw/sw/lwc1/swc1 dest, reg, 4] ++// or ++// lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, offset ++// [lw/sw/lwc1/swc1 dest, reg, offset+4] ++// ++// Warning: These routines must be able to handle any instruction sequences ++// that are generated as a result of the load/store byte,word,long ++// macros. ++ ++class NativeMovRegMem: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ hiword_offset = 4, ++ ldst_offset = 12, ++ immediate_size = 4, ++ ldst_size = 16 ++ }; ++ ++ //offset is less than 16 bits. ++ bool is_immediate() const { return !is_op(long_at(instruction_offset), Assembler::lui_op); } ++ bool is_64ldst() const { ++ if (is_immediate()) { ++ return (Assembler::opcode(long_at(hiword_offset)) == Assembler::opcode(long_at(instruction_offset))) && ++ (Assembler::imm_off(long_at(hiword_offset)) == Assembler::imm_off(long_at(instruction_offset)) + wordSize); ++ } else { ++ return (Assembler::opcode(long_at(ldst_offset+hiword_offset)) == Assembler::opcode(long_at(ldst_offset))) && ++ (Assembler::imm_off(long_at(ldst_offset+hiword_offset)) == Assembler::imm_off(long_at(ldst_offset)) + wordSize); ++ } ++ } ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address next_instruction_address() const { ++ return addr_at( (is_immediate()? immediate_size : ldst_size) + (is_64ldst()? 4 : 0)); ++ } ++ ++ int offset() const; ++ ++ void set_offset(int x); ++ ++ void add_offset_in_bytes(int add_offset) { set_offset ( ( offset() + add_offset ) ); } ++ ++ void verify(); ++ void print (); ++ ++ // unit test stuff ++ static void test() {} ++ ++ private: ++ inline friend NativeMovRegMem* nativeMovRegMem_at (address address); ++}; ++ ++inline NativeMovRegMem* nativeMovRegMem_at (address address) { ++ NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++class NativeMovRegMemPatching: public NativeMovRegMem { ++ private: ++ friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) { ++ NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset); ++ #ifdef ASSERT ++ test->verify(); ++ #endif ++ return test; ++ } ++}; ++ ++ ++// Handles all kinds of jump on Loongson. Long/far, conditional/unconditional ++// 32 bits: ++// far jump: ++// lui reg, split_high(addr) ++// addiu reg, split_low(addr) ++// jr reg ++// nop ++// or ++// beq ZERO, ZERO, offset ++// nop ++// ++ ++//64 bits: ++// far jump: ++// lui rd, imm(63...48); ++// ori rd, rd, imm(47...32); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(31...16); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(15...0); ++// jalr rd ++// nop ++// ++class NativeJump: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ beq_opcode = 0x10000000,//000100|00000|00000|offset ++ b_mask = 0xffff0000, ++ short_size = 8, ++ instruction_size = 6 * BytesPerInstWord ++ }; ++ ++ bool is_short() const { return (long_at(instruction_offset) & b_mask) == beq_opcode; } ++ bool is_b_far(); ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address jump_destination(); ++ ++ void patch_set48_gs(address dest); ++ void patch_set48(address dest); ++ ++ void patch_on_jr_gs(address dest); ++ void patch_on_jr(address dest); ++ ++ void patch_on_j_gs(address dest); ++ void patch_on_j(address dest); ++ ++ void patch_on_j_only(address dest); ++ ++ void set_jump_destination(address dest); ++ ++ // Creation ++ inline friend NativeJump* nativeJump_at(address address); ++ ++ // Insertion of native jump instruction ++ static void insert(address code_pos, address entry) { Unimplemented(); } ++ // MT-safe insertion of native jump at verified method entry ++ static void check_verified_entry_alignment(address entry, address verified_entry) {} ++ static void patch_verified_entry(address entry, address verified_entry, address dest); ++ ++ void verify(); ++}; ++ ++inline NativeJump* nativeJump_at(address address) { ++ NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset); ++ debug_only(jump->verify();) ++ return jump; ++} ++ ++class NativeGeneralJump: public NativeJump { ++ public: ++ // Creation ++ inline friend NativeGeneralJump* nativeGeneralJump_at(address address); ++ ++ // Insertion of native general jump instruction ++ static void insert_unconditional(address code_pos, address entry); ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++}; ++ ++inline NativeGeneralJump* nativeGeneralJump_at(address address) { ++ NativeGeneralJump* jump = (NativeGeneralJump*)(address); ++ debug_only(jump->verify();) ++ return jump; ++} ++ ++class NativeIllegalInstruction: public NativeInstruction { ++public: ++ enum mips_specific_constants { ++ instruction_code = 0x42000029, // mips reserved instruction ++ instruction_size = 4, ++ instruction_offset = 0, ++ next_instruction_offset = 4 ++ }; ++ ++ // Insert illegal opcode as specific address ++ static void insert(address code_pos); ++}; ++ ++// return instruction that does not pop values of the stack ++// jr RA ++// delay slot ++class NativeReturn: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_size = 8, ++ instruction_offset = 0, ++ next_instruction_offset = 8 ++ }; ++}; ++ ++ ++ ++ ++class NativeCondJump; ++inline NativeCondJump* nativeCondJump_at(address address); ++class NativeCondJump: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_size = 16, ++ instruction_offset = 12, ++ next_instruction_offset = 20 ++ }; ++ ++ ++ int insn_word() const { return long_at(instruction_offset); } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(next_instruction_offset); } ++ ++ // Creation ++ inline friend NativeCondJump* nativeCondJump_at(address address); ++ ++ address jump_destination() const { ++ return ::nativeCondJump_at(addr_at(12))->jump_destination(); ++ } ++ ++ void set_jump_destination(address dest) { ++ ::nativeCondJump_at(addr_at(12))->set_jump_destination(dest); ++ } ++ ++}; ++ ++inline NativeCondJump* nativeCondJump_at(address address) { ++ NativeCondJump* jump = (NativeCondJump*)(address); ++ return jump; ++} ++ ++ ++ ++inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); } ++ ++inline bool NativeInstruction::is_call() { ++ // jal target ++ // nop ++ if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::jal_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ return true; ++ } ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // jal target ++ // nop ++ if ( is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ nativeInstruction_at(addr_at(16))->is_op(Assembler::jal_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return true; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) && ++ is_special_op(int_at(24), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ if(is_trampoline_call()) ++ return true; ++ ++ return false; ++ ++} ++ ++inline bool NativeInstruction::is_return() { return is_special_op(Assembler::jr_op) && is_rs(RA);} ++ ++inline bool NativeInstruction::is_cond_jump() { return is_int_branch() || is_float_branch(); } ++ ++// Call trampoline stubs. ++class NativeCallTrampolineStub : public NativeInstruction { ++ public: ++ ++ enum mips_specific_constants { ++ instruction_size = 2 * BytesPerInstWord, ++ instruction_offset = 0, ++ next_instruction_offset = 2 * BytesPerInstWord ++ }; ++ ++ address destination() const { ++ return (address)ptr_at(0); ++ } ++ ++ void set_destination(address new_destination) { ++ set_ptr_at(0, (intptr_t)new_destination); ++ } ++}; ++ ++inline bool NativeInstruction::is_trampoline_call() { ++ // lui dst, imm16 ++ // ori dst, dst, imm16 ++ // dsll dst, dst, 16 ++ // ld target, dst, imm16 ++ // jalr target ++ // nop ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ld_op) && ++ is_special_op(int_at(16), Assembler::jalr_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return true; ++ } ++ ++ return false; ++} ++ ++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { ++ return (NativeCallTrampolineStub*)addr; ++} ++#endif // CPU_MIPS_VM_NATIVEINST_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/registerMap_mips.hpp b/src/hotspot/cpu/mips/registerMap_mips.hpp +new file mode 100644 +index 0000000000..7f800eb107 +--- /dev/null ++++ b/src/hotspot/cpu/mips/registerMap_mips.hpp +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_REGISTERMAP_MIPS_HPP ++#define CPU_MIPS_VM_REGISTERMAP_MIPS_HPP ++ ++// machine-dependent implemention for register maps ++ friend class frame; ++ ++ private: ++#ifndef CORE ++ // This is the hook for finding a register in an "well-known" location, ++ // such as a register block of a predetermined format. ++ // Since there is none, we just return NULL. ++ // See registerMap_sparc.hpp for an example of grabbing registers ++ // from register save areas of a standard layout. ++ address pd_location(VMReg reg) const {return NULL;} ++#endif ++ ++ // no PD state to clear or copy: ++ void pd_clear() {} ++ void pd_initialize() {} ++ void pd_initialize_from(const RegisterMap* map) {} ++ ++#endif // CPU_MIPS_VM_REGISTERMAP_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/register_definitions_mips.cpp b/src/hotspot/cpu/mips/register_definitions_mips.cpp +new file mode 100644 +index 0000000000..4af2531834 +--- /dev/null ++++ b/src/hotspot/cpu/mips/register_definitions_mips.cpp +@@ -0,0 +1,103 @@ ++/* ++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/register.hpp" ++#include "register_mips.hpp" ++#ifdef TARGET_ARCH_MODEL_mips_32 ++# include "interp_masm_mips_32.hpp" ++#endif ++#ifdef TARGET_ARCH_MODEL_mips_64 ++# include "interp_masm_mips_64.hpp" ++#endif ++ ++REGISTER_DEFINITION(Register, noreg); ++REGISTER_DEFINITION(Register, i0); ++REGISTER_DEFINITION(Register, i1); ++REGISTER_DEFINITION(Register, i2); ++REGISTER_DEFINITION(Register, i3); ++REGISTER_DEFINITION(Register, i4); ++REGISTER_DEFINITION(Register, i5); ++REGISTER_DEFINITION(Register, i6); ++REGISTER_DEFINITION(Register, i7); ++REGISTER_DEFINITION(Register, i8); ++REGISTER_DEFINITION(Register, i9); ++REGISTER_DEFINITION(Register, i10); ++REGISTER_DEFINITION(Register, i11); ++REGISTER_DEFINITION(Register, i12); ++REGISTER_DEFINITION(Register, i13); ++REGISTER_DEFINITION(Register, i14); ++REGISTER_DEFINITION(Register, i15); ++REGISTER_DEFINITION(Register, i16); ++REGISTER_DEFINITION(Register, i17); ++REGISTER_DEFINITION(Register, i18); ++REGISTER_DEFINITION(Register, i19); ++REGISTER_DEFINITION(Register, i20); ++REGISTER_DEFINITION(Register, i21); ++REGISTER_DEFINITION(Register, i22); ++REGISTER_DEFINITION(Register, i23); ++REGISTER_DEFINITION(Register, i24); ++REGISTER_DEFINITION(Register, i25); ++REGISTER_DEFINITION(Register, i26); ++REGISTER_DEFINITION(Register, i27); ++REGISTER_DEFINITION(Register, i28); ++REGISTER_DEFINITION(Register, i29); ++REGISTER_DEFINITION(Register, i30); ++REGISTER_DEFINITION(Register, i31); ++ ++REGISTER_DEFINITION(FloatRegister, fnoreg); ++REGISTER_DEFINITION(FloatRegister, f0); ++REGISTER_DEFINITION(FloatRegister, f1); ++REGISTER_DEFINITION(FloatRegister, f2); ++REGISTER_DEFINITION(FloatRegister, f3); ++REGISTER_DEFINITION(FloatRegister, f4); ++REGISTER_DEFINITION(FloatRegister, f5); ++REGISTER_DEFINITION(FloatRegister, f6); ++REGISTER_DEFINITION(FloatRegister, f7); ++REGISTER_DEFINITION(FloatRegister, f8); ++REGISTER_DEFINITION(FloatRegister, f9); ++REGISTER_DEFINITION(FloatRegister, f10); ++REGISTER_DEFINITION(FloatRegister, f11); ++REGISTER_DEFINITION(FloatRegister, f12); ++REGISTER_DEFINITION(FloatRegister, f13); ++REGISTER_DEFINITION(FloatRegister, f14); ++REGISTER_DEFINITION(FloatRegister, f15); ++REGISTER_DEFINITION(FloatRegister, f16); ++REGISTER_DEFINITION(FloatRegister, f17); ++REGISTER_DEFINITION(FloatRegister, f18); ++REGISTER_DEFINITION(FloatRegister, f19); ++REGISTER_DEFINITION(FloatRegister, f20); ++REGISTER_DEFINITION(FloatRegister, f21); ++REGISTER_DEFINITION(FloatRegister, f22); ++REGISTER_DEFINITION(FloatRegister, f23); ++REGISTER_DEFINITION(FloatRegister, f24); ++REGISTER_DEFINITION(FloatRegister, f25); ++REGISTER_DEFINITION(FloatRegister, f26); ++REGISTER_DEFINITION(FloatRegister, f27); ++REGISTER_DEFINITION(FloatRegister, f28); ++REGISTER_DEFINITION(FloatRegister, f29); ++REGISTER_DEFINITION(FloatRegister, f30); ++REGISTER_DEFINITION(FloatRegister, f31); +diff --git a/src/hotspot/cpu/mips/register_mips.cpp b/src/hotspot/cpu/mips/register_mips.cpp +new file mode 100644 +index 0000000000..4a9b22bfef +--- /dev/null ++++ b/src/hotspot/cpu/mips/register_mips.cpp +@@ -0,0 +1,52 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "register_mips.hpp" ++ ++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1; ++const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + ++ 2 * FloatRegisterImpl::number_of_registers; ++ ++const char* RegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3", ++ "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", ++ "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", ++ "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra" ++ }; ++ return is_valid() ? names[encoding()] : "noreg"; ++} ++ ++const char* FloatRegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", ++ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", ++ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", ++ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", ++ }; ++ return is_valid() ? names[encoding()] : "fnoreg"; ++} ++ +diff --git a/src/hotspot/cpu/mips/register_mips.hpp b/src/hotspot/cpu/mips/register_mips.hpp +new file mode 100644 +index 0000000000..ea216fbcb9 +--- /dev/null ++++ b/src/hotspot/cpu/mips/register_mips.hpp +@@ -0,0 +1,341 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_REGISTER_MIPS_HPP ++#define CPU_MIPS_VM_REGISTER_MIPS_HPP ++ ++#include "asm/register.hpp" ++#include "utilities/formatBuffer.hpp" ++ ++class VMRegImpl; ++typedef VMRegImpl* VMReg; ++ ++// Use Register as shortcut ++class RegisterImpl; ++typedef RegisterImpl* Register; ++ ++inline Register as_Register(int encoding) { ++ return (Register)(intptr_t) encoding; ++} ++ ++class RegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 32 ++ }; ++ ++ // derived registers, offsets, and addresses ++ Register successor() const { return as_Register(encoding() + 1); } ++ ++ // construction ++ inline friend Register as_Register(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register (%d)", (int)(intptr_t)this ); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++}; ++ ++ ++// The integer registers of the MIPS32 architecture ++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); ++ ++ ++CONSTANT_REGISTER_DECLARATION(Register, i0, (0)); ++CONSTANT_REGISTER_DECLARATION(Register, i1, (1)); ++CONSTANT_REGISTER_DECLARATION(Register, i2, (2)); ++CONSTANT_REGISTER_DECLARATION(Register, i3, (3)); ++CONSTANT_REGISTER_DECLARATION(Register, i4, (4)); ++CONSTANT_REGISTER_DECLARATION(Register, i5, (5)); ++CONSTANT_REGISTER_DECLARATION(Register, i6, (6)); ++CONSTANT_REGISTER_DECLARATION(Register, i7, (7)); ++CONSTANT_REGISTER_DECLARATION(Register, i8, (8)); ++CONSTANT_REGISTER_DECLARATION(Register, i9, (9)); ++CONSTANT_REGISTER_DECLARATION(Register, i10, (10)); ++CONSTANT_REGISTER_DECLARATION(Register, i11, (11)); ++CONSTANT_REGISTER_DECLARATION(Register, i12, (12)); ++CONSTANT_REGISTER_DECLARATION(Register, i13, (13)); ++CONSTANT_REGISTER_DECLARATION(Register, i14, (14)); ++CONSTANT_REGISTER_DECLARATION(Register, i15, (15)); ++CONSTANT_REGISTER_DECLARATION(Register, i16, (16)); ++CONSTANT_REGISTER_DECLARATION(Register, i17, (17)); ++CONSTANT_REGISTER_DECLARATION(Register, i18, (18)); ++CONSTANT_REGISTER_DECLARATION(Register, i19, (19)); ++CONSTANT_REGISTER_DECLARATION(Register, i20, (20)); ++CONSTANT_REGISTER_DECLARATION(Register, i21, (21)); ++CONSTANT_REGISTER_DECLARATION(Register, i22, (22)); ++CONSTANT_REGISTER_DECLARATION(Register, i23, (23)); ++CONSTANT_REGISTER_DECLARATION(Register, i24, (24)); ++CONSTANT_REGISTER_DECLARATION(Register, i25, (25)); ++CONSTANT_REGISTER_DECLARATION(Register, i26, (26)); ++CONSTANT_REGISTER_DECLARATION(Register, i27, (27)); ++CONSTANT_REGISTER_DECLARATION(Register, i28, (28)); ++CONSTANT_REGISTER_DECLARATION(Register, i29, (29)); ++CONSTANT_REGISTER_DECLARATION(Register, i30, (30)); ++CONSTANT_REGISTER_DECLARATION(Register, i31, (31)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define NOREG ((Register)(noreg_RegisterEnumValue)) ++ ++#define I0 ((Register)(i0_RegisterEnumValue)) ++#define I1 ((Register)(i1_RegisterEnumValue)) ++#define I2 ((Register)(i2_RegisterEnumValue)) ++#define I3 ((Register)(i3_RegisterEnumValue)) ++#define I4 ((Register)(i4_RegisterEnumValue)) ++#define I5 ((Register)(i5_RegisterEnumValue)) ++#define I6 ((Register)(i6_RegisterEnumValue)) ++#define I7 ((Register)(i7_RegisterEnumValue)) ++#define I8 ((Register)(i8_RegisterEnumValue)) ++#define I9 ((Register)(i9_RegisterEnumValue)) ++#define I10 ((Register)(i10_RegisterEnumValue)) ++#define I11 ((Register)(i11_RegisterEnumValue)) ++#define I12 ((Register)(i12_RegisterEnumValue)) ++#define I13 ((Register)(i13_RegisterEnumValue)) ++#define I14 ((Register)(i14_RegisterEnumValue)) ++#define I15 ((Register)(i15_RegisterEnumValue)) ++#define I16 ((Register)(i16_RegisterEnumValue)) ++#define I17 ((Register)(i17_RegisterEnumValue)) ++#define I18 ((Register)(i18_RegisterEnumValue)) ++#define I19 ((Register)(i19_RegisterEnumValue)) ++#define I20 ((Register)(i20_RegisterEnumValue)) ++#define I21 ((Register)(i21_RegisterEnumValue)) ++#define I22 ((Register)(i22_RegisterEnumValue)) ++#define I23 ((Register)(i23_RegisterEnumValue)) ++#define I24 ((Register)(i24_RegisterEnumValue)) ++#define I25 ((Register)(i25_RegisterEnumValue)) ++#define I26 ((Register)(i26_RegisterEnumValue)) ++#define I27 ((Register)(i27_RegisterEnumValue)) ++#define I28 ((Register)(i28_RegisterEnumValue)) ++#define I29 ((Register)(i29_RegisterEnumValue)) ++#define I30 ((Register)(i30_RegisterEnumValue)) ++#define I31 ((Register)(i31_RegisterEnumValue)) ++ ++#define R0 ((Register)(i0_RegisterEnumValue)) ++#define AT ((Register)(i1_RegisterEnumValue)) ++#define V0 ((Register)(i2_RegisterEnumValue)) ++#define V1 ((Register)(i3_RegisterEnumValue)) ++#define A0 ((Register)(i4_RegisterEnumValue)) ++#define A1 ((Register)(i5_RegisterEnumValue)) ++#define A2 ((Register)(i6_RegisterEnumValue)) ++#define A3 ((Register)(i7_RegisterEnumValue)) ++#define A4 ((Register)(i8_RegisterEnumValue)) ++#define A5 ((Register)(i9_RegisterEnumValue)) ++#define A6 ((Register)(i10_RegisterEnumValue)) ++#define A7 ((Register)(i11_RegisterEnumValue)) ++#define RT0 ((Register)(i12_RegisterEnumValue)) ++#define RT1 ((Register)(i13_RegisterEnumValue)) ++#define RT2 ((Register)(i14_RegisterEnumValue)) ++#define RT3 ((Register)(i15_RegisterEnumValue)) ++#define S0 ((Register)(i16_RegisterEnumValue)) ++#define S1 ((Register)(i17_RegisterEnumValue)) ++#define S2 ((Register)(i18_RegisterEnumValue)) ++#define S3 ((Register)(i19_RegisterEnumValue)) ++#define S4 ((Register)(i20_RegisterEnumValue)) ++#define S5 ((Register)(i21_RegisterEnumValue)) ++#define S6 ((Register)(i22_RegisterEnumValue)) ++#define S7 ((Register)(i23_RegisterEnumValue)) ++#define RT8 ((Register)(i24_RegisterEnumValue)) ++#define RT9 ((Register)(i25_RegisterEnumValue)) ++#define K0 ((Register)(i26_RegisterEnumValue)) ++#define K1 ((Register)(i27_RegisterEnumValue)) ++#define GP ((Register)(i28_RegisterEnumValue)) ++#define SP ((Register)(i29_RegisterEnumValue)) ++#define FP ((Register)(i30_RegisterEnumValue)) ++#define S8 ((Register)(i30_RegisterEnumValue)) ++#define RA ((Register)(i31_RegisterEnumValue)) ++ ++#define c_rarg0 RT0 ++#define c_rarg1 RT1 ++#define Rmethod S3 ++#define Rsender S4 ++#define Rnext S1 ++ ++/* ++#define RT0 T0 ++#define RT1 T1 ++#define RT2 T2 ++#define RT3 T3 ++#define RT4 T8 ++#define RT5 T9 ++*/ ++ ++ ++//for interpreter frame ++// bytecode pointer register ++#define BCP S0 ++// local variable pointer register ++#define LVP S7 ++// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM ++// be sure to save and restore its value in call_stub ++#define TSR S2 ++ ++#define OPT_THREAD 1 ++ ++#define TREG S6 ++ ++#define S5_heapbase S5 ++ ++#define mh_SP_save SP ++ ++#define FSR V0 ++#define SSR V1 ++#define FSF F0 ++#define SSF F1 ++#define FTF F14 ++#define STF F15 ++ ++#define AFT F30 ++ ++#define RECEIVER T0 ++#define IC_Klass T1 ++ ++#define SHIFT_count T3 ++ ++#endif // DONT_USE_REGISTER_DEFINES ++ ++// Use FloatRegister as shortcut ++class FloatRegisterImpl; ++typedef FloatRegisterImpl* FloatRegister; ++ ++inline FloatRegister as_FloatRegister(int encoding) { ++ return (FloatRegister)(intptr_t) encoding; ++} ++ ++// The implementation of floating point registers for the architecture ++class FloatRegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ float_arg_base = 12, ++ number_of_registers = 32 ++ }; ++ ++ // construction ++ inline friend FloatRegister as_FloatRegister(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // derived registers, offsets, and addresses ++ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++ ++}; ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue)) ++#define F0 ((FloatRegister)( f0_FloatRegisterEnumValue)) ++#define F1 ((FloatRegister)( f1_FloatRegisterEnumValue)) ++#define F2 ((FloatRegister)( f2_FloatRegisterEnumValue)) ++#define F3 ((FloatRegister)( f3_FloatRegisterEnumValue)) ++#define F4 ((FloatRegister)( f4_FloatRegisterEnumValue)) ++#define F5 ((FloatRegister)( f5_FloatRegisterEnumValue)) ++#define F6 ((FloatRegister)( f6_FloatRegisterEnumValue)) ++#define F7 ((FloatRegister)( f7_FloatRegisterEnumValue)) ++#define F8 ((FloatRegister)( f8_FloatRegisterEnumValue)) ++#define F9 ((FloatRegister)( f9_FloatRegisterEnumValue)) ++#define F10 ((FloatRegister)( f10_FloatRegisterEnumValue)) ++#define F11 ((FloatRegister)( f11_FloatRegisterEnumValue)) ++#define F12 ((FloatRegister)( f12_FloatRegisterEnumValue)) ++#define F13 ((FloatRegister)( f13_FloatRegisterEnumValue)) ++#define F14 ((FloatRegister)( f14_FloatRegisterEnumValue)) ++#define F15 ((FloatRegister)( f15_FloatRegisterEnumValue)) ++#define F16 ((FloatRegister)( f16_FloatRegisterEnumValue)) ++#define F17 ((FloatRegister)( f17_FloatRegisterEnumValue)) ++#define F18 ((FloatRegister)( f18_FloatRegisterEnumValue)) ++#define F19 ((FloatRegister)( f19_FloatRegisterEnumValue)) ++#define F20 ((FloatRegister)( f20_FloatRegisterEnumValue)) ++#define F21 ((FloatRegister)( f21_FloatRegisterEnumValue)) ++#define F22 ((FloatRegister)( f22_FloatRegisterEnumValue)) ++#define F23 ((FloatRegister)( f23_FloatRegisterEnumValue)) ++#define F24 ((FloatRegister)( f24_FloatRegisterEnumValue)) ++#define F25 ((FloatRegister)( f25_FloatRegisterEnumValue)) ++#define F26 ((FloatRegister)( f26_FloatRegisterEnumValue)) ++#define F27 ((FloatRegister)( f27_FloatRegisterEnumValue)) ++#define F28 ((FloatRegister)( f28_FloatRegisterEnumValue)) ++#define F29 ((FloatRegister)( f29_FloatRegisterEnumValue)) ++#define F30 ((FloatRegister)( f30_FloatRegisterEnumValue)) ++#define F31 ((FloatRegister)( f31_FloatRegisterEnumValue)) ++#endif // DONT_USE_REGISTER_DEFINES ++ ++ ++const int MIPS_ARGS_IN_REGS_NUM = 4; ++ ++// Need to know the total number of registers of all sorts for SharedInfo. ++// Define a class that exports it. ++class ConcreteRegisterImpl : public AbstractRegisterImpl { ++ public: ++ enum { ++ // A big enough number for C2: all the registers plus flags ++ // This number must be large enough to cover REG_COUNT (defined by c2) registers. ++ // There is no requirement that any ordering here matches any ordering c2 gives ++ // it's optoregs. ++ number_of_registers = (RegisterImpl::number_of_registers + FloatRegisterImpl::number_of_registers) * 2 ++ }; ++ ++ static const int max_gpr; ++ static const int max_fpr; ++}; ++ ++#endif //CPU_MIPS_VM_REGISTER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/relocInfo_mips.cpp b/src/hotspot/cpu/mips/relocInfo_mips.cpp +new file mode 100644 +index 0000000000..ff8028032b +--- /dev/null ++++ b/src/hotspot/cpu/mips/relocInfo_mips.cpp +@@ -0,0 +1,160 @@ ++/* ++ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/relocInfo.hpp" ++#include "compiler/disassembler.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/compressedOops.inline.hpp" ++#include "oops/oop.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/safepoint.hpp" ++ ++ ++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { ++ x += o; ++ typedef Assembler::WhichOperand WhichOperand; ++ WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop ++ assert(which == Assembler::disp32_operand || ++ which == Assembler::narrow_oop_operand || ++ which == Assembler::imm_operand, "format unpacks ok"); ++ if (which == Assembler::imm_operand) { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(x)); ++ } ++ } else if (which == Assembler::narrow_oop_operand) { ++ // both compressed oops and compressed classes look the same ++ if (Universe::heap()->is_in_reserved((oop)x)) { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)CompressedOops::encode((oop)x), "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedOops::encode(oop(x))), (intptr_t)(x)); ++ } ++ } else { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x)); ++ } ++ } ++ } else { ++ // Note: Use runtime_call_type relocations for call32_operand. ++ assert(0, "call32_operand not supported in MIPS64"); ++ } ++} ++ ++ ++//NOTICE HERE, this relocate is not need for MIPS, since MIPS USE abosolutly target, ++//Maybe We should FORGET CALL RELOCATION ++address Relocation::pd_call_destination(address orig_addr) { ++ intptr_t adj = 0; ++ NativeInstruction* ni = nativeInstruction_at(addr()); ++ if (ni->is_call()) { ++ if (!ni->is_trampoline_call()) { ++ return nativeCall_at(addr())->target_addr_for_insn(); ++ } else { ++ address trampoline = nativeCall_at(addr())->get_trampoline(); ++ if (trampoline) { ++ return nativeCallTrampolineStub_at(trampoline)->destination(); ++ } else { ++ return (address) -1; ++ } ++ } ++ } else if (ni->is_jump()) { ++ return nativeGeneralJump_at(addr())->jump_destination() + adj; ++ } else if (ni->is_cond_jump()) { ++ return nativeCondJump_at(addr())->jump_destination() +adj; ++ } else { ++ tty->print_cr("\nError!\ncall destination: " INTPTR_FORMAT, p2i(addr())); ++ Disassembler::decode(addr() - 10 * 4, addr() + 10 * 4, tty); ++ ShouldNotReachHere(); ++ return NULL; ++ } ++} ++ ++ ++void Relocation::pd_set_call_destination(address x) { ++ NativeInstruction* ni = nativeInstruction_at(addr()); ++ if (ni->is_call()) { ++ NativeCall* call = nativeCall_at(addr()); ++ if (!ni->is_trampoline_call()) { ++ call->set_destination(x); ++ } else { ++ address trampoline_stub_addr = call->get_trampoline(); ++ if (trampoline_stub_addr != NULL) { ++ address orig = call->target_addr_for_insn(); ++ if (orig != trampoline_stub_addr) { ++ call->patch_on_trampoline(trampoline_stub_addr); ++ } ++ call->set_destination_mt_safe(x, false); ++ } ++ } ++ } else if (ni->is_jump()) ++ nativeGeneralJump_at(addr())->set_jump_destination(x); ++ else if (ni->is_cond_jump()) ++ nativeCondJump_at(addr())->set_jump_destination(x); ++ else ++ { ShouldNotReachHere(); } ++ ++ // Unresolved jumps are recognized by a destination of -1 ++ // However 64bit can't actually produce such an address ++ // and encodes a jump to self but jump_destination will ++ // return a -1 as the signal. We must not relocate this ++ // jmp or the ic code will not see it as unresolved. ++} ++ ++ ++address* Relocation::pd_address_in_code() { ++ return (address*)addr(); ++} ++ ++ ++address Relocation::pd_get_address_from_code() { ++ NativeMovConstReg* ni = nativeMovConstReg_at(addr()); ++ return (address)ni->data(); ++} ++ ++ ++ ++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++} ++ ++/* ++void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++} ++*/ ++ ++void internal_pc_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++ address target =0; ++ NativeMovConstReg* ni = nativeMovConstReg_at(addr()); ++ target = new_addr_for((address)ni->data(), src, dest); ++ ni->set_data((intptr_t)target); ++} ++ ++void metadata_Relocation::pd_fix_value(address x) { ++} +diff --git a/src/hotspot/cpu/mips/relocInfo_mips.hpp b/src/hotspot/cpu/mips/relocInfo_mips.hpp +new file mode 100644 +index 0000000000..1e1e170fd8 +--- /dev/null ++++ b/src/hotspot/cpu/mips/relocInfo_mips.hpp +@@ -0,0 +1,44 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_RELOCINFO_MIPS_HPP ++#define CPU_MIPS_VM_RELOCINFO_MIPS_HPP ++ ++ // machine-dependent parts of class relocInfo ++ private: ++ enum { ++ // Since MIPS instructions are whole words, ++ // the two low-order offset bits can always be discarded. ++ offset_unit = 4, ++ ++ // imm_oop_operand vs. narrow_oop_operand ++ format_width = 2 ++ }; ++ ++ public: ++ ++ static bool mustIterateImmediateOopsInCode() { return false; } ++ ++#endif // CPU_MIPS_VM_RELOCINFO_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/runtime_mips_64.cpp b/src/hotspot/cpu/mips/runtime_mips_64.cpp +new file mode 100644 +index 0000000000..2a0488cd01 +--- /dev/null ++++ b/src/hotspot/cpu/mips/runtime_mips_64.cpp +@@ -0,0 +1,198 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#ifdef COMPILER2 ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "classfile/systemDictionary.hpp" ++#include "code/vmreg.hpp" ++#include "interpreter/interpreter.hpp" ++#include "opto/runtime.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/globalDefinitions.hpp" ++#include "vmreg_mips.inline.hpp" ++#endif ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++//-------------- generate_exception_blob ----------- ++// creates _exception_blob. ++// The exception blob is jumped to from a compiled method. ++// (see emit_exception_handler in sparc.ad file) ++// ++// Given an exception pc at a call we call into the runtime for the ++// handler in this method. This handler might merely restore state ++// (i.e. callee save registers) unwind the frame and jump to the ++// exception handler for the nmethod if there is no Java level handler ++// for the nmethod. ++// ++// This code is entered with a jump, and left with a jump. ++// ++// Arguments: ++// V0: exception oop ++// V1: exception pc ++// ++// Results: ++// A0: exception oop ++// A1: exception pc in caller or ??? ++// jumps to: exception handler of caller ++// ++// Note: the exception pc MUST be at a call (precise debug information) ++// ++// [stubGenerator_mips.cpp] generate_forward_exception() ++// |- V0, V1 are created ++// |- T9 <= SharedRuntime::exception_handler_for_return_address ++// `- jr T9 ++// `- the caller's exception_handler ++// `- jr OptoRuntime::exception_blob ++// `- here ++// ++void OptoRuntime::generate_exception_blob() { ++ // Capture info about frame layout ++ enum layout { ++ fp_off, ++ return_off, // slot for return address ++ framesize ++ }; ++ ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ CodeBuffer buffer("exception_blob", 5120, 5120); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ ++ address start = __ pc(); ++ ++ __ daddiu(SP, SP, -1 * framesize * wordSize); // Prolog! ++ ++ // this frame will be treated as the original caller method. ++ // So, the return pc should be filled with the original exception pc. ++ // ref: X86's implementation ++ __ sd(V1, SP, return_off *wordSize); // return address ++ __ sd(FP, SP, fp_off *wordSize); ++ ++ // Save callee saved registers. None for UseSSE=0, ++ // floats-only for UseSSE=1, and doubles for UseSSE=2. ++ ++ __ daddiu(FP, SP, fp_off * wordSize); ++ ++ // Store exception in Thread object. We cannot pass any arguments to the ++ // handle_exception call, since we do not want to make any assumption ++ // about the size of the frame where the exception happened in. ++ Register thread = TREG; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ __ sd(V0, Address(thread, JavaThread::exception_oop_offset())); ++ __ sd(V1, Address(thread, JavaThread::exception_pc_offset())); ++ ++ // This call does all the hard work. It checks if an exception handler ++ // exists in the method. ++ // If so, it returns the handler address. ++ // If not, it prepares for stack-unwinding, restoring the callee-save ++ // registers of the frame being removed. ++ __ set_last_Java_frame(thread, NOREG, NOREG, NULL); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ __ relocate(relocInfo::internal_pc_type); ++ ++ { ++ long save_pc = (long)__ pc() + 48; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ ++ __ move(A0, thread); ++ __ patchable_set48(T9, (long)OptoRuntime::handle_exception_C); ++ __ jalr(T9); ++ __ delayed()->nop(); ++ ++ // Set an oopmap for the call site ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap( framesize, 0 ); ++ ++ oop_maps->add_gc_map( __ offset(), map); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(thread, true); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog! ++ ++ // V0: exception handler ++ ++ // We have a handler in V0, (could be deopt blob) ++ __ move(T9, V0); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // Get the exception ++ __ ld(A0, Address(thread, JavaThread::exception_oop_offset())); ++ // Get the exception pc in case we are deoptimized ++ __ ld(A1, Address(thread, JavaThread::exception_pc_offset())); ++#ifdef ASSERT ++ __ sd(R0, Address(thread, JavaThread::exception_handler_pc_offset())); ++ __ sd(R0, Address(thread, JavaThread::exception_pc_offset())); ++#endif ++ // Clear the exception oop so GC no longer processes it as a root. ++ __ sd(R0, Address(thread, JavaThread::exception_oop_offset())); ++ ++ // Fix seg fault when running: ++ // Eclipse + Plugin + Debug As ++ // This is the only condition where C2 calls SharedRuntime::generate_deopt_blob() ++ // ++ __ move(V0, A0); ++ __ move(V1, A1); ++ ++ // V0: exception oop ++ // T9: exception handler ++ // A1: exception pc ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ // make sure all code is generated ++ masm->flush(); ++ ++ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize); ++} +diff --git a/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp b/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp +new file mode 100644 +index 0000000000..4a9791d4cb +--- /dev/null ++++ b/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp +@@ -0,0 +1,3879 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/debugInfoRec.hpp" ++#include "code/icBuffer.hpp" ++#include "code/nativeInst.hpp" ++#include "code/vtableStubs.hpp" ++#include "interpreter/interpreter.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/klass.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/vframeArray.hpp" ++#include "vmreg_mips.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++#include ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; ++ ++class RegisterSaver { ++ enum { FPU_regs_live = 32 }; ++ // Capture info about frame layout ++ enum layout { ++#define DEF_LAYOUT_OFFS(regname) regname ## _off, regname ## H_off, ++ DEF_LAYOUT_OFFS(for_16_bytes_aligned) ++ DEF_LAYOUT_OFFS(fpr0) ++ DEF_LAYOUT_OFFS(fpr1) ++ DEF_LAYOUT_OFFS(fpr2) ++ DEF_LAYOUT_OFFS(fpr3) ++ DEF_LAYOUT_OFFS(fpr4) ++ DEF_LAYOUT_OFFS(fpr5) ++ DEF_LAYOUT_OFFS(fpr6) ++ DEF_LAYOUT_OFFS(fpr7) ++ DEF_LAYOUT_OFFS(fpr8) ++ DEF_LAYOUT_OFFS(fpr9) ++ DEF_LAYOUT_OFFS(fpr10) ++ DEF_LAYOUT_OFFS(fpr11) ++ DEF_LAYOUT_OFFS(fpr12) ++ DEF_LAYOUT_OFFS(fpr13) ++ DEF_LAYOUT_OFFS(fpr14) ++ DEF_LAYOUT_OFFS(fpr15) ++ DEF_LAYOUT_OFFS(fpr16) ++ DEF_LAYOUT_OFFS(fpr17) ++ DEF_LAYOUT_OFFS(fpr18) ++ DEF_LAYOUT_OFFS(fpr19) ++ DEF_LAYOUT_OFFS(fpr20) ++ DEF_LAYOUT_OFFS(fpr21) ++ DEF_LAYOUT_OFFS(fpr22) ++ DEF_LAYOUT_OFFS(fpr23) ++ DEF_LAYOUT_OFFS(fpr24) ++ DEF_LAYOUT_OFFS(fpr25) ++ DEF_LAYOUT_OFFS(fpr26) ++ DEF_LAYOUT_OFFS(fpr27) ++ DEF_LAYOUT_OFFS(fpr28) ++ DEF_LAYOUT_OFFS(fpr29) ++ DEF_LAYOUT_OFFS(fpr30) ++ DEF_LAYOUT_OFFS(fpr31) ++ ++ DEF_LAYOUT_OFFS(v0) ++ DEF_LAYOUT_OFFS(v1) ++ DEF_LAYOUT_OFFS(a0) ++ DEF_LAYOUT_OFFS(a1) ++ DEF_LAYOUT_OFFS(a2) ++ DEF_LAYOUT_OFFS(a3) ++ DEF_LAYOUT_OFFS(a4) ++ DEF_LAYOUT_OFFS(a5) ++ DEF_LAYOUT_OFFS(a6) ++ DEF_LAYOUT_OFFS(a7) ++ DEF_LAYOUT_OFFS(t0) ++ DEF_LAYOUT_OFFS(t1) ++ DEF_LAYOUT_OFFS(t2) ++ DEF_LAYOUT_OFFS(t3) ++ DEF_LAYOUT_OFFS(s0) ++ DEF_LAYOUT_OFFS(s1) ++ DEF_LAYOUT_OFFS(s2) ++ DEF_LAYOUT_OFFS(s3) ++ DEF_LAYOUT_OFFS(s4) ++ DEF_LAYOUT_OFFS(s5) ++ DEF_LAYOUT_OFFS(s6) ++ DEF_LAYOUT_OFFS(s7) ++ DEF_LAYOUT_OFFS(t8) ++ DEF_LAYOUT_OFFS(t9) ++ ++ DEF_LAYOUT_OFFS(gp) ++ DEF_LAYOUT_OFFS(fp) ++ DEF_LAYOUT_OFFS(return) ++ reg_save_size ++ }; ++ ++ public: ++ ++ static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false ); ++ static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); ++ static int raOffset(void) { return return_off / 2; } ++ //Rmethod ++ static int methodOffset(void) { return s3_off / 2; } ++ ++ static int v0Offset(void) { return v0_off / 2; } ++ static int v1Offset(void) { return v1_off / 2; } ++ ++ static int fpResultOffset(void) { return fpr0_off / 2; } ++ ++ // During deoptimization only the result register need to be restored ++ // all the other values have already been extracted. ++ static void restore_result_registers(MacroAssembler* masm); ++}; ++ ++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) { ++ ++ // Always make the frame size 16-byte aligned ++ int frame_size_in_bytes = round_to(additional_frame_words*wordSize + ++ reg_save_size*BytesPerInt, 16); ++ // OopMap frame size is in compiler stack slots (jint's) not bytes or words ++ int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; ++ // The caller will allocate additional_frame_words ++ int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt; ++ // CodeBlob frame size is in words. ++ int frame_size_in_words = frame_size_in_bytes / wordSize; ++ *total_frame_words = frame_size_in_words; ++ ++ // save registers ++ ++ __ daddiu(SP, SP, - reg_save_size * jintSize); ++ ++ __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize); ++ __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize); ++ __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize); ++ __ sdc1(F6, SP, fpr6_off * jintSize); __ sdc1(F7, SP, fpr7_off * jintSize); ++ __ sdc1(F8, SP, fpr8_off * jintSize); __ sdc1(F9, SP, fpr9_off * jintSize); ++ __ sdc1(F10, SP, fpr10_off * jintSize); __ sdc1(F11, SP, fpr11_off * jintSize); ++ __ sdc1(F12, SP, fpr12_off * jintSize); __ sdc1(F13, SP, fpr13_off * jintSize); ++ __ sdc1(F14, SP, fpr14_off * jintSize); __ sdc1(F15, SP, fpr15_off * jintSize); ++ __ sdc1(F16, SP, fpr16_off * jintSize); __ sdc1(F17, SP, fpr17_off * jintSize); ++ __ sdc1(F18, SP, fpr18_off * jintSize); __ sdc1(F19, SP, fpr19_off * jintSize); ++ __ sdc1(F20, SP, fpr20_off * jintSize); __ sdc1(F21, SP, fpr21_off * jintSize); ++ __ sdc1(F22, SP, fpr22_off * jintSize); __ sdc1(F23, SP, fpr23_off * jintSize); ++ __ sdc1(F24, SP, fpr24_off * jintSize); __ sdc1(F25, SP, fpr25_off * jintSize); ++ __ sdc1(F26, SP, fpr26_off * jintSize); __ sdc1(F27, SP, fpr27_off * jintSize); ++ __ sdc1(F28, SP, fpr28_off * jintSize); __ sdc1(F29, SP, fpr29_off * jintSize); ++ __ sdc1(F30, SP, fpr30_off * jintSize); __ sdc1(F31, SP, fpr31_off * jintSize); ++ __ sd(V0, SP, v0_off * jintSize); __ sd(V1, SP, v1_off * jintSize); ++ __ sd(A0, SP, a0_off * jintSize); __ sd(A1, SP, a1_off * jintSize); ++ __ sd(A2, SP, a2_off * jintSize); __ sd(A3, SP, a3_off * jintSize); ++ __ sd(A4, SP, a4_off * jintSize); __ sd(A5, SP, a5_off * jintSize); ++ __ sd(A6, SP, a6_off * jintSize); __ sd(A7, SP, a7_off * jintSize); ++ __ sd(T0, SP, t0_off * jintSize); ++ __ sd(T1, SP, t1_off * jintSize); ++ __ sd(T2, SP, t2_off * jintSize); ++ __ sd(T3, SP, t3_off * jintSize); ++ __ sd(S0, SP, s0_off * jintSize); ++ __ sd(S1, SP, s1_off * jintSize); ++ __ sd(S2, SP, s2_off * jintSize); ++ __ sd(S3, SP, s3_off * jintSize); ++ __ sd(S4, SP, s4_off * jintSize); ++ __ sd(S5, SP, s5_off * jintSize); ++ __ sd(S6, SP, s6_off * jintSize); ++ __ sd(S7, SP, s7_off * jintSize); ++ ++ __ sd(T8, SP, t8_off * jintSize); ++ __ sd(T9, SP, t9_off * jintSize); ++ ++ __ sd(GP, SP, gp_off * jintSize); ++ __ sd(FP, SP, fp_off * jintSize); ++ __ sd(RA, SP, return_off * jintSize); ++ __ daddiu(FP, SP, fp_off * jintSize); ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ //OopMap* map = new OopMap( frame_words, 0 ); ++ OopMap* map = new OopMap( frame_size_in_slots, 0 ); ++ ++ ++//#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words) ++#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) ++ map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg()); ++ ++ map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg()); ++ ++#undef STACK_OFFSET ++ return map; ++} ++ ++ ++// Pop the current frame and restore all the registers that we ++// saved. ++void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { ++ __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize); ++ __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize); ++ __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize); ++ __ ldc1(F6, SP, fpr6_off * jintSize); __ ldc1(F7, SP, fpr7_off * jintSize); ++ __ ldc1(F8, SP, fpr8_off * jintSize); __ ldc1(F9, SP, fpr9_off * jintSize); ++ __ ldc1(F10, SP, fpr10_off * jintSize); __ ldc1(F11, SP, fpr11_off * jintSize); ++ __ ldc1(F12, SP, fpr12_off * jintSize); __ ldc1(F13, SP, fpr13_off * jintSize); ++ __ ldc1(F14, SP, fpr14_off * jintSize); __ ldc1(F15, SP, fpr15_off * jintSize); ++ __ ldc1(F16, SP, fpr16_off * jintSize); __ ldc1(F17, SP, fpr17_off * jintSize); ++ __ ldc1(F18, SP, fpr18_off * jintSize); __ ldc1(F19, SP, fpr19_off * jintSize); ++ __ ldc1(F20, SP, fpr20_off * jintSize); __ ldc1(F21, SP, fpr21_off * jintSize); ++ __ ldc1(F22, SP, fpr22_off * jintSize); __ ldc1(F23, SP, fpr23_off * jintSize); ++ __ ldc1(F24, SP, fpr24_off * jintSize); __ ldc1(F25, SP, fpr25_off * jintSize); ++ __ ldc1(F26, SP, fpr26_off * jintSize); __ ldc1(F27, SP, fpr27_off * jintSize); ++ __ ldc1(F28, SP, fpr28_off * jintSize); __ ldc1(F29, SP, fpr29_off * jintSize); ++ __ ldc1(F30, SP, fpr30_off * jintSize); __ ldc1(F31, SP, fpr31_off * jintSize); ++ ++ __ ld(V0, SP, v0_off * jintSize); __ ld(V1, SP, v1_off * jintSize); ++ __ ld(A0, SP, a0_off * jintSize); __ ld(A1, SP, a1_off * jintSize); ++ __ ld(A2, SP, a2_off * jintSize); __ ld(A3, SP, a3_off * jintSize); ++ __ ld(A4, SP, a4_off * jintSize); __ ld(A5, SP, a5_off * jintSize); ++ __ ld(A6, SP, a6_off * jintSize); __ ld(A7, SP, a7_off * jintSize); ++ __ ld(T0, SP, t0_off * jintSize); ++ __ ld(T1, SP, t1_off * jintSize); ++ __ ld(T2, SP, t2_off * jintSize); ++ __ ld(T3, SP, t3_off * jintSize); ++ __ ld(S0, SP, s0_off * jintSize); ++ __ ld(S1, SP, s1_off * jintSize); ++ __ ld(S2, SP, s2_off * jintSize); ++ __ ld(S3, SP, s3_off * jintSize); ++ __ ld(S4, SP, s4_off * jintSize); ++ __ ld(S5, SP, s5_off * jintSize); ++ __ ld(S6, SP, s6_off * jintSize); ++ __ ld(S7, SP, s7_off * jintSize); ++ ++ __ ld(T8, SP, t8_off * jintSize); ++ __ ld(T9, SP, t9_off * jintSize); ++ ++ __ ld(GP, SP, gp_off * jintSize); ++ __ ld(FP, SP, fp_off * jintSize); ++ __ ld(RA, SP, return_off * jintSize); ++ ++ __ addiu(SP, SP, reg_save_size * jintSize); ++} ++ ++// Pop the current frame and restore the registers that might be holding ++// a result. ++void RegisterSaver::restore_result_registers(MacroAssembler* masm) { ++ ++ // Just restore result register. Only used by deoptimization. By ++ // now any callee save register that needs to be restore to a c2 ++ // caller of the deoptee has been extracted into the vframeArray ++ // and will be stuffed into the c2i adapter we create for later ++ // restoration so only result registers need to be restored here. ++ ++ __ ld(V0, SP, v0_off * jintSize); ++ __ ld(V1, SP, v1_off * jintSize); ++ __ ldc1(F0, SP, fpr0_off * jintSize); ++ __ ldc1(F1, SP, fpr1_off * jintSize); ++ __ addiu(SP, SP, return_off * jintSize); ++} ++ ++// Is vector's size (in bytes) bigger than a size saved by default? ++// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. ++bool SharedRuntime::is_wide_vector(int size) { ++ return size > 16; ++} ++ ++size_t SharedRuntime::trampoline_size() { ++ return 32; ++} ++ ++void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { ++ // trampoline is not in CodeCache ++ __ set64(T9, (long)destination); ++ __ jr(T9); ++ __ delayed()->nop(); ++} ++ ++// The java_calling_convention describes stack locations as ideal slots on ++// a frame with no abi restrictions. Since we must observe abi restrictions ++// (like the placement of the register window) the slots must be biased by ++// the following value. ++ ++static int reg2offset_in(VMReg r) { ++ // Account for saved fp and return address ++ // This should really be in_preserve_stack_slots ++ return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size); ++} ++ ++static int reg2offset_out(VMReg r) { ++ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; ++} ++ ++// --------------------------------------------------------------------------- ++// Read the array of BasicTypes from a signature, and compute where the ++// arguments should go. Values in the VMRegPair regs array refer to 4-byte ++// quantities. Values less than SharedInfo::stack0 are registers, those above ++// refer to 4-byte stack slots. All stack slots are based off of the stack pointer ++// as framesizes are fixed. ++// VMRegImpl::stack0 refers to the first slot 0(sp). ++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register ++// up to RegisterImpl::number_of_registers) are the 32-bit ++// integer registers. ++ ++// Pass first five oop/int args in registers T0, A0 - A3. ++// Pass float/double/long args in stack. ++// Doubles have precedence, so if you pass a mix of floats and doubles ++// the doubles will grab the registers before the floats will. ++ ++// Note: the INPUTS in sig_bt are in units of Java argument words, which are ++// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit ++// units regardless of build. ++ ++ ++// --------------------------------------------------------------------------- ++// The compiled Java calling convention. ++// Pass first five oop/int args in registers T0, A0 - A3. ++// Pass float/double/long args in stack. ++// Doubles have precedence, so if you pass a mix of floats and doubles ++// the doubles will grab the registers before the floats will. ++ ++int SharedRuntime::java_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ int total_args_passed, ++ int is_outgoing) { ++ ++ // Create the mapping between argument positions and registers. ++ static const Register INT_ArgReg[Argument::n_register_parameters] = { ++ T0, A0, A1, A2, A3, A4, A5, A6 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { ++ F12, F13, F14, F15, F16, F17, F18, F19 ++ }; ++ ++ uint args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_VOID: ++ // halves of T_LONG or T_DOUBLE ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_BOOLEAN: ++ case T_CHAR: ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set1(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ // fall through ++ case T_OBJECT: ++ case T_ARRAY: ++ case T_ADDRESS: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set2(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set1(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set2(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++ ++ return round_to(stk_args, 2); ++} ++ ++// Patch the callers callsite with entry to compiled code if it exists. ++static void patch_callers_callsite(MacroAssembler *masm) { ++ Label L; ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ // Schedule the branch target address early. ++ // Call into the VM to patch the caller, then jump to compiled callee ++ // V0 isn't live so capture return address while we easily can ++ __ move(V0, RA); ++ ++ __ pushad(); ++#ifdef COMPILER2 ++ // C2 may leave the stack dirty if not in SSE2+ mode ++ __ empty_FPU_stack(); ++#endif ++ ++ // VM needs caller's callsite ++ // VM needs target method ++ ++ __ move(A0, Rmethod); ++ __ move(A1, V0); ++ // we should preserve the return address ++ __ move(TSR, SP); ++ __ move(AT, -(StackAlignmentInBytes)); // align the stack ++ __ andr(SP, SP, AT); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), ++ relocInfo::runtime_call_type); ++ ++ __ delayed()->nop(); ++ __ move(SP, TSR); ++ __ popad(); ++ __ bind(L); ++} ++ ++static void gen_c2i_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ Label& skip_fixup) { ++ ++ // Before we get into the guts of the C2I adapter, see if we should be here ++ // at all. We've come from compiled code and are attempting to jump to the ++ // interpreter, which means the caller made a static call to get here ++ // (vcalls always get a compiled target if there is one). Check for a ++ // compiled target. If there is one, we need to patch the caller's call. ++ // However we will run interpreted if we come thru here. The next pass ++ // thru the call site will run compiled. If we ran compiled here then ++ // we can (theorectically) do endless i2c->c2i->i2c transitions during ++ // deopt/uncommon trap cycles. If we always go interpreted here then ++ // we can have at most one and don't need to play any tricks to keep ++ // from endlessly growing the stack. ++ // ++ // Actually if we detected that we had an i2c->c2i transition here we ++ // ought to be able to reset the world back to the state of the interpreted ++ // call and not bother building another interpreter arg area. We don't ++ // do that at this point. ++ ++ patch_callers_callsite(masm); ++ __ bind(skip_fixup); ++ ++#ifdef COMPILER2 ++ __ empty_FPU_stack(); ++#endif ++ //this is for native ? ++ // Since all args are passed on the stack, total_args_passed * interpreter_ ++ // stack_element_size is the ++ // space we need. ++ int extraspace = total_args_passed * Interpreter::stackElementSize; ++ ++ // stack is aligned, keep it that way ++ extraspace = round_to(extraspace, 2*wordSize); ++ ++ // Get return address ++ __ move(V0, RA); ++ // set senderSP value ++ //refer to interpreter_mips.cpp:generate_asm_entry ++ __ move(Rsender, SP); ++ __ addiu(SP, SP, -extraspace); ++ ++ // Now write the args into the outgoing interpreter space ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // st_off points to lowest address on stack. ++ int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize; ++ // Say 4 args: ++ // i st_off ++ // 0 12 T_LONG ++ // 1 8 T_VOID ++ // 2 4 T_OBJECT ++ // 3 0 T_BOOL ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // memory to memory use fpu stack top ++ int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; ++ if (!r_2->is_valid()) { ++ __ ld_ptr(AT, SP, ld_off); ++ __ st_ptr(AT, SP, st_off); ++ ++ } else { ++ ++ ++ int next_off = st_off - Interpreter::stackElementSize; ++ __ ld_ptr(AT, SP, ld_off); ++ __ st_ptr(AT, SP, st_off); ++ ++ // Ref to is_Register condition ++ if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ++ __ st_ptr(AT, SP, st_off - 8); ++ } ++ } else if (r_1->is_Register()) { ++ Register r = r_1->as_Register(); ++ if (!r_2->is_valid()) { ++ __ sd(r, SP, st_off); ++ } else { ++ //FIXME, mips will not enter here ++ // long/double in gpr ++ __ sd(r, SP, st_off); ++ // In [java/util/zip/ZipFile.java] ++ // ++ // private static native long open(String name, int mode, long lastModified); ++ // private static native int getTotal(long jzfile); ++ // ++ // We need to transfer T_LONG paramenters from a compiled method to a native method. ++ // It's a complex process: ++ // ++ // Caller -> lir_static_call -> gen_resolve_stub ++ // -> -- resolve_static_call_C ++ // `- gen_c2i_adapter() [*] ++ // | ++ // `- AdapterHandlerLibrary::get_create_apapter_index ++ // -> generate_native_entry ++ // -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**] ++ // ++ // In [**], T_Long parameter is stored in stack as: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // However, the sequence is reversed here: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry(). ++ // ++ if (sig_bt[i] == T_LONG) ++ __ sd(r, SP, st_off - 8); ++ } ++ } else if (r_1->is_FloatRegister()) { ++ assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); ++ ++ FloatRegister fr = r_1->as_FloatRegister(); ++ if (sig_bt[i] == T_FLOAT) ++ __ swc1(fr, SP, st_off); ++ else { ++ __ sdc1(fr, SP, st_off); ++ __ sdc1(fr, SP, st_off - 8); // T_DOUBLE needs two slots ++ } ++ } ++ } ++ ++ // Schedule the branch target address early. ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) ); ++ // And repush original return address ++ __ move(RA, V0); ++ __ jr (AT); ++ __ delayed()->nop(); ++} ++ ++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs) { ++ ++ // Generate an I2C adapter: adjust the I-frame to make space for the C-frame ++ // layout. Lesp was saved by the calling I-frame and will be restored on ++ // return. Meanwhile, outgoing arg space is all owned by the callee ++ // C-frame, so we can mangle it at will. After adjusting the frame size, ++ // hoist register arguments and repack other args according to the compiled ++ // code convention. Finally, end in a jump to the compiled code. The entry ++ // point address is the start of the buffer. ++ ++ // We will only enter here from an interpreted frame and never from after ++ // passing thru a c2i. Azul allowed this but we do not. If we lose the ++ // race and use a c2i we will remain interpreted for the race loser(s). ++ // This removes all sorts of headaches on the mips side and also eliminates ++ // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. ++ ++ ++ __ move(T9, SP); ++ ++ // Cut-out for having no stack args. Since up to 2 int/oop args are passed ++ // in registers, we will occasionally have no stack args. ++ int comp_words_on_stack = 0; ++ if (comp_args_on_stack) { ++ // Sig words on the stack are greater-than VMRegImpl::stack0. Those in ++ // registers are below. By subtracting stack0, we either get a negative ++ // number (all values in registers) or the maximum stack slot accessed. ++ // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg); ++ // Convert 4-byte stack slots to words. ++ comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord; ++ // Round up to miminum stack alignment, in wordSize ++ comp_words_on_stack = round_to(comp_words_on_stack, 2); ++ __ daddiu(SP, SP, -comp_words_on_stack * wordSize); ++ } ++ ++ // Align the outgoing SP ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ // push the return address on the stack (note that pushing, rather ++ // than storing it, yields the correct frame alignment for the callee) ++ // Put saved SP in another register ++ const Register saved_sp = V0; ++ __ move(saved_sp, T9); ++ ++ ++ // Will jump to the compiled code just as if compiled code was doing it. ++ // Pre-load the register-jump target early, to schedule it better. ++ __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset())); ++ ++ // Now generate the shuffle code. Pick up all register args and move the ++ // rest through the floating point stack top. ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ // Longs and doubles are passed in native word order, but misaligned ++ // in the 32-bit build. ++ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // Pick up 0, 1 or 2 words from SP+offset. ++ ++ //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); ++ // Load in argument order going down. ++ int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize; ++ // Point to interpreter value (vs. tag) ++ int next_off = ld_off - Interpreter::stackElementSize; ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // Convert stack slot to an SP offset (+ wordSize to ++ // account for return address ) ++ // NOTICE HERE!!!! I sub a wordSize here ++ int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; ++ //+ wordSize; ++ ++ if (!r_2->is_valid()) { ++ __ ld(AT, saved_sp, ld_off); ++ __ sd(AT, SP, st_off); ++ } else { ++ // Interpreter local[n] == MSW, local[n+1] == LSW however locals ++ // are accessed as negative so LSW is at LOW address ++ ++ // ld_off is MSW so get LSW ++ // st_off is LSW (i.e. reg.first()) ++ ++ // [./org/eclipse/swt/graphics/GC.java] ++ // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, ++ // int destX, int destY, int destWidth, int destHeight, ++ // boolean simple, ++ // int imgWidth, int imgHeight, ++ // long maskPixmap, <-- Pass T_LONG in stack ++ // int maskType); ++ // Before this modification, Eclipse displays icons with solid black background. ++ // ++ __ ld(AT, saved_sp, ld_off); ++ if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ++ __ ld(AT, saved_sp, ld_off - 8); ++ __ sd(AT, SP, st_off); ++ } ++ } else if (r_1->is_Register()) { // Register argument ++ Register r = r_1->as_Register(); ++ if (r_2->is_valid()) { ++ // Remember r_1 is low address (and LSB on mips) ++ // So r_2 gets loaded from high address regardless of the platform ++ assert(r_2->as_Register() == r_1->as_Register(), ""); ++ __ ld(r, saved_sp, ld_off); ++ ++ // ++ // For T_LONG type, the real layout is as below: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // We should load the low-8 bytes. ++ // ++ if (sig_bt[i] == T_LONG) ++ __ ld(r, saved_sp, ld_off - 8); ++ } else { ++ __ lw(r, saved_sp, ld_off); ++ } ++ } else if (r_1->is_FloatRegister()) { // Float Register ++ assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); ++ ++ FloatRegister fr = r_1->as_FloatRegister(); ++ if (sig_bt[i] == T_FLOAT) ++ __ lwc1(fr, saved_sp, ld_off); ++ else { ++ __ ldc1(fr, saved_sp, ld_off); ++ __ ldc1(fr, saved_sp, ld_off - 8); ++ } ++ } ++ } ++ ++ // 6243940 We might end up in handle_wrong_method if ++ // the callee is deoptimized as we race thru here. If that ++ // happens we don't want to take a safepoint because the ++ // caller frame will look interpreted and arguments are now ++ // "compiled" so it is much better to make this transition ++ // invisible to the stack walking code. Unfortunately if ++ // we try and find the callee by normal means a safepoint ++ // is possible. So we stash the desired callee in the thread ++ // and the vm will find there should this case occur. ++#ifndef OPT_THREAD ++ Register thread = T8; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ __ sd(Rmethod, thread, in_bytes(JavaThread::callee_target_offset())); ++ ++ // move methodOop to V0 in case we end up in an c2i adapter. ++ // the c2i adapters expect methodOop in V0 (c2) because c2's ++ // resolve stubs return the result (the method) in V0. ++ // I'd love to fix this. ++ __ move(V0, Rmethod); ++ __ jr(T9); ++ __ delayed()->nop(); ++} ++ ++// --------------------------------------------------------------- ++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ AdapterFingerPrint* fingerprint) { ++ address i2c_entry = __ pc(); ++ ++ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); ++ ++ // ------------------------------------------------------------------------- ++ // Generate a C2I adapter. On entry we know G5 holds the methodOop. The ++ // args start out packed in the compiled layout. They need to be unpacked ++ // into the interpreter layout. This will almost always require some stack ++ // space. We grow the current (compiled) stack, then repack the args. We ++ // finally end in a jump to the generic interpreter entry point. On exit ++ // from the interpreter, the interpreter will restore our SP (lest the ++ // compiled code, which relys solely on SP and not FP, get sick). ++ ++ address c2i_unverified_entry = __ pc(); ++ Label skip_fixup; ++ { ++ Register holder = T1; ++ Register receiver = T0; ++ Register temp = T8; ++ address ic_miss = SharedRuntime::get_ic_miss_stub(); ++ ++ Label missed; ++ ++ //add for compressedoops ++ __ load_klass(temp, receiver); ++ ++ __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); ++ __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset()); ++ __ bne(AT, temp, missed); ++ __ delayed()->nop(); ++ // Method might have been compiled since the call site was patched to ++ // interpreted if that is the case treat it as a miss so we can get ++ // the call site corrected. ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); ++ __ beq(AT, R0, skip_fixup); ++ __ delayed()->nop(); ++ __ bind(missed); ++ ++ __ jmp(ic_miss, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } ++ ++ address c2i_entry = __ pc(); ++ ++ gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); ++ ++ __ flush(); ++ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); ++} ++ ++int SharedRuntime::c_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ VMRegPair *regs2, ++ int total_args_passed) { ++ assert(regs2 == NULL, "not needed on MIPS"); ++ // Return the number of VMReg stack_slots needed for the args. ++ // This value does not include an abi space (like register window ++ // save area). ++ ++ // We return the amount of VMReg stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. Since we always ++ // have space for storing at least 6 registers to memory we start with that. ++ // See int_stk_helper for a further discussion. ++ // We return the amount of VMRegImpl stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. ++ static const Register INT_ArgReg[Argument::n_register_parameters] = { ++ A0, A1, A2, A3, A4, A5, A6, A7 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { ++ F12, F13, F14, F15, F16, F17, F18, F19 ++ }; ++ uint args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++// Example: ++// n java.lang.UNIXProcess::forkAndExec ++// private native int forkAndExec(byte[] prog, ++// byte[] argBlock, int argc, ++// byte[] envBlock, int envc, ++// byte[] dir, ++// boolean redirectErrorStream, ++// FileDescriptor stdin_fd, ++// FileDescriptor stdout_fd, ++// FileDescriptor stderr_fd) ++// JNIEXPORT jint JNICALL ++// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env, ++// jobject process, ++// jbyteArray prog, ++// jbyteArray argBlock, jint argc, ++// jbyteArray envBlock, jint envc, ++// jbyteArray dir, ++// jboolean redirectErrorStream, ++// jobject stdin_fd, ++// jobject stdout_fd, ++// jobject stderr_fd) ++// ++// ::c_calling_convention ++// 0: // env <-- a0 ++// 1: L // klass/obj <-- t0 => a1 ++// 2: [ // prog[] <-- a0 => a2 ++// 3: [ // argBlock[] <-- a1 => a3 ++// 4: I // argc <-- a2 => a4 ++// 5: [ // envBlock[] <-- a3 => a5 ++// 6: I // envc <-- a4 => a5 ++// 7: [ // dir[] <-- a5 => a7 ++// 8: Z // redirectErrorStream <-- a6 => sp[0] ++// 9: L // stdin fp[16] => sp[8] ++// 10: L // stdout fp[24] => sp[16] ++// 11: L // stderr fp[32] => sp[24] ++// ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_VOID: // Halves of longs and doubles ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_BOOLEAN: ++ case T_CHAR: ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set1(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ // fall through ++ case T_OBJECT: ++ case T_ARRAY: ++ case T_ADDRESS: ++ case T_METADATA: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set2(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set1(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set2(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++ ++ return round_to(stk_args, 2); ++} ++ ++// --------------------------------------------------------------------------- ++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ swc1(FSF, FP, -wordSize); ++ break; ++ case T_DOUBLE: ++ __ sdc1(FSF, FP, -wordSize ); ++ break; ++ case T_VOID: break; ++ case T_LONG: ++ __ sd(V0, FP, -wordSize); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ __ sd(V0, FP, -wordSize); ++ break; ++ default: { ++ __ sw(V0, FP, -wordSize); ++ } ++ } ++} ++ ++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ lwc1(FSF, FP, -wordSize); ++ break; ++ case T_DOUBLE: ++ __ ldc1(FSF, FP, -wordSize ); ++ break; ++ case T_LONG: ++ __ ld(V0, FP, -wordSize); ++ break; ++ case T_VOID: break; ++ case T_OBJECT: ++ case T_ARRAY: ++ __ ld(V0, FP, -wordSize); ++ break; ++ default: { ++ __ lw(V0, FP, -wordSize); ++ } ++ } ++} ++ ++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ for ( int i = first_arg ; i < arg_count ; i++ ) { ++ if (args[i].first()->is_Register()) { ++ __ push(args[i].first()->as_Register()); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ push(args[i].first()->as_FloatRegister()); ++ } ++ } ++} ++ ++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { ++ if (args[i].first()->is_Register()) { ++ __ pop(args[i].first()->as_Register()); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ pop(args[i].first()->as_FloatRegister()); ++ } ++ } ++} ++ ++// A simple move of integer like type ++static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ // stack to stack ++ __ lw(AT, FP, reg2offset_in(src.first())); ++ __ sd(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ // stack to reg ++ __ lw(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else if (dst.first()->is_stack()) { ++ // reg to stack ++ __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first())); ++ } else { ++ if (dst.first() != src.first()){ ++ __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first() ++ } ++ } ++} ++ ++// An oop arg. Must pass a handle not the oop itself ++static void object_move(MacroAssembler* masm, ++ OopMap* map, ++ int oop_handle_offset, ++ int framesize_in_slots, ++ VMRegPair src, ++ VMRegPair dst, ++ bool is_receiver, ++ int* receiver_offset) { ++ ++ // must pass a handle. First figure out the location we use as a handle ++ ++ //FIXME, for mips, dst can be register ++ if (src.first()->is_stack()) { ++ // Oop is already on the stack as an argument ++ Register rHandle = V0; ++ Label nil; ++ __ xorr(rHandle, rHandle, rHandle); ++ __ ld(AT, FP, reg2offset_in(src.first())); ++ __ beq(AT, R0, nil); ++ __ delayed()->nop(); ++ __ lea(rHandle, Address(FP, reg2offset_in(src.first()))); ++ __ bind(nil); ++ if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); ++ else __ move( (dst.first())->as_Register(), rHandle); ++ //if dst is register ++ //FIXME, do mips need out preserve stack slots? ++ int offset_in_older_frame = src.first()->reg2stack() ++ + SharedRuntime::out_preserve_stack_slots(); ++ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); ++ if (is_receiver) { ++ *receiver_offset = (offset_in_older_frame ++ + framesize_in_slots) * VMRegImpl::stack_slot_size; ++ } ++ } else { ++ // Oop is in an a register we must store it to the space we reserve ++ // on the stack for oop_handles ++ const Register rOop = src.first()->as_Register(); ++ assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register"); ++ const Register rHandle = V0; ++ //Important: refer to java_calling_convertion ++ int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset; ++ int offset = oop_slot*VMRegImpl::stack_slot_size; ++ Label skip; ++ __ sd( rOop , SP, offset ); ++ map->set_oop(VMRegImpl::stack2reg(oop_slot)); ++ __ xorr( rHandle, rHandle, rHandle); ++ __ beq(rOop, R0, skip); ++ __ delayed()->nop(); ++ __ lea(rHandle, Address(SP, offset)); ++ __ bind(skip); ++ // Store the handle parameter ++ if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); ++ else __ move((dst.first())->as_Register(), rHandle); ++ //if dst is register ++ ++ if (is_receiver) { ++ *receiver_offset = offset; ++ } ++ } ++} ++ ++// A float arg may have to do float reg int reg conversion ++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); ++ ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ __ lw(AT, FP, reg2offset_in(src.first())); ++ __ sw(AT, SP, reg2offset_out(dst.first())); ++ } ++ else ++ __ lwc1(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); ++ } else { ++ // reg to stack ++ if(dst.first()->is_stack()) ++ __ swc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); ++ else ++ __ mov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } ++} ++ ++// A long move ++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ ++ // The only legal possibility for a long_move VMRegPair is: ++ // 1: two stack slots (possibly unaligned) ++ // as neither the java or C calling convention will use registers ++ // for longs. ++ ++ if (src.first()->is_stack()) { ++ assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack"); ++ if( dst.first()->is_stack()){ ++ __ ld(AT, FP, reg2offset_in(src.first())); ++ __ sd(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first())); ++ } ++ } else { ++ if( dst.first()->is_stack()){ ++ __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first())); ++ } else { ++ __ move( (dst.first())->as_Register() , (src.first())->as_Register()); ++ } ++ } ++} ++ ++// A double move ++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ ++ // The only legal possibilities for a double_move VMRegPair are: ++ // The painful thing here is that like long_move a VMRegPair might be ++ ++ // Because of the calling convention we know that src is either ++ // 1: a single physical register (xmm registers only) ++ // 2: two stack slots (possibly unaligned) ++ // dst can only be a pair of stack slots. ++ ++ ++ if (src.first()->is_stack()) { ++ // source is all stack ++ if( dst.first()->is_stack()){ ++ __ ld(AT, FP, reg2offset_in(src.first())); ++ __ sd(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first())); ++ } ++ ++ } else { ++ // reg to stack ++ // No worries about stack alignment ++ if( dst.first()->is_stack()){ ++ __ sdc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); ++ } ++ else ++ __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ ++ } ++} ++ ++static void verify_oop_args(MacroAssembler* masm, ++ methodHandle method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ Register temp_reg = T9; // not part of any compiled calling seq ++ if (VerifyOops) { ++ for (int i = 0; i < method->size_of_parameters(); i++) { ++ if (sig_bt[i] == T_OBJECT || ++ sig_bt[i] == T_ARRAY) { ++ VMReg r = regs[i].first(); ++ assert(r->is_valid(), "bad oop arg"); ++ if (r->is_stack()) { ++ __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); ++ __ verify_oop(temp_reg); ++ } else { ++ __ verify_oop(r->as_Register()); ++ } ++ } ++ } ++ } ++} ++ ++static void gen_special_dispatch(MacroAssembler* masm, ++ methodHandle method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ verify_oop_args(masm, method, sig_bt, regs); ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ ++ // Now write the args into the outgoing interpreter space ++ bool has_receiver = false; ++ Register receiver_reg = noreg; ++ int member_arg_pos = -1; ++ Register member_reg = noreg; ++ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); ++ if (ref_kind != 0) { ++ member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument ++ member_reg = S3; // known to be free at this point ++ has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); ++ } else if (iid == vmIntrinsics::_invokeBasic) { ++ has_receiver = true; ++ } else { ++ fatal("unexpected intrinsic id %d", iid); ++ } ++ ++ if (member_reg != noreg) { ++ // Load the member_arg into register, if necessary. ++ SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); ++ VMReg r = regs[member_arg_pos].first(); ++ if (r->is_stack()) { ++ __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ member_reg = r->as_Register(); ++ } ++ } ++ ++ if (has_receiver) { ++ // Make sure the receiver is loaded into a register. ++ assert(method->size_of_parameters() > 0, "oob"); ++ assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); ++ VMReg r = regs[0].first(); ++ assert(r->is_valid(), "bad receiver arg"); ++ if (r->is_stack()) { ++ // Porting note: This assumes that compiled calling conventions always ++ // pass the receiver oop in a register. If this is not true on some ++ // platform, pick a temp and load the receiver from stack. ++ fatal("receiver always in a register"); ++ receiver_reg = SSR; // known to be free at this point ++ __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ receiver_reg = r->as_Register(); ++ } ++ } ++ ++ // Figure out which address we are really jumping to: ++ MethodHandles::generate_method_handle_dispatch(masm, iid, ++ receiver_reg, member_reg, /*for_compiler_entry:*/ true); ++} ++ ++// --------------------------------------------------------------------------- ++// Generate a native wrapper for a given method. The method takes arguments ++// in the Java compiled code convention, marshals them to the native ++// convention (handlizes oops, etc), transitions to native, makes the call, ++// returns to java state (possibly blocking), unhandlizes any result and ++// returns. ++nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, ++ const methodHandle& method, ++ int compile_id, ++ BasicType* in_sig_bt, ++ VMRegPair* in_regs, ++ BasicType ret_type, ++ address critical_entry) { ++ if (method->is_method_handle_intrinsic()) { ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ intptr_t start = (intptr_t)__ pc(); ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ // Make enough room for patch_verified_entry ++ __ nop(); ++ __ nop(); ++ gen_special_dispatch(masm, ++ method, ++ in_sig_bt, ++ in_regs); ++ int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period ++ __ flush(); ++ int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually ++ return nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ in_ByteSize(-1), ++ in_ByteSize(-1), ++ (OopMapSet*)NULL); ++ } ++ bool is_critical_native = true; ++ address native_func = critical_entry; ++ if (native_func == NULL) { ++ native_func = method->native_function(); ++ is_critical_native = false; ++ } ++ assert(native_func != NULL, "must have function"); ++ ++ // Native nmethod wrappers never take possesion of the oop arguments. ++ // So the caller will gc the arguments. The only thing we need an ++ // oopMap for is if the call is static ++ // ++ // An OopMap for lock (and class if static), and one for the VM call itself ++ OopMapSet *oop_maps = new OopMapSet(); ++ ++ // We have received a description of where all the java arg are located ++ // on entry to the wrapper. We need to convert these args to where ++ // the jni function will expect them. To figure out where they go ++ // we convert the java signature to a C signature by inserting ++ // the hidden arguments as arg[0] and possibly arg[1] (static method) ++ ++ const int total_in_args = method->size_of_parameters(); ++ int total_c_args = total_in_args; ++ if (!is_critical_native) { ++ total_c_args += 1; ++ if (method->is_static()) { ++ total_c_args++; ++ } ++ } else { ++ for (int i = 0; i < total_in_args; i++) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ total_c_args++; ++ } ++ } ++ } ++ ++ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); ++ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); ++ BasicType* in_elem_bt = NULL; ++ ++ int argc = 0; ++ if (!is_critical_native) { ++ out_sig_bt[argc++] = T_ADDRESS; ++ if (method->is_static()) { ++ out_sig_bt[argc++] = T_OBJECT; ++ } ++ ++ for (int i = 0; i < total_in_args ; i++ ) { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ } ++ } else { ++ Thread* THREAD = Thread::current(); ++ in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); ++ SignatureStream ss(method->signature()); ++ for (int i = 0; i < total_in_args ; i++ ) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ // Arrays are passed as int, elem* pair ++ out_sig_bt[argc++] = T_INT; ++ out_sig_bt[argc++] = T_ADDRESS; ++ Symbol* atype = ss.as_symbol(CHECK_NULL); ++ const char* at = atype->as_C_string(); ++ if (strlen(at) == 2) { ++ assert(at[0] == '[', "must be"); ++ switch (at[1]) { ++ case 'B': in_elem_bt[i] = T_BYTE; break; ++ case 'C': in_elem_bt[i] = T_CHAR; break; ++ case 'D': in_elem_bt[i] = T_DOUBLE; break; ++ case 'F': in_elem_bt[i] = T_FLOAT; break; ++ case 'I': in_elem_bt[i] = T_INT; break; ++ case 'J': in_elem_bt[i] = T_LONG; break; ++ case 'S': in_elem_bt[i] = T_SHORT; break; ++ case 'Z': in_elem_bt[i] = T_BOOLEAN; break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ } else { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ in_elem_bt[i] = T_VOID; ++ } ++ if (in_sig_bt[i] != T_VOID) { ++ assert(in_sig_bt[i] == ss.type(), "must match"); ++ ss.next(); ++ } ++ } ++ } ++ ++ // Now figure out where the args must be stored and how much stack space ++ // they require (neglecting out_preserve_stack_slots but space for storing ++ // the 1st six register arguments). It's weird see int_stk_helper. ++ // ++ int out_arg_slots; ++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); ++ ++ // Compute framesize for the wrapper. We need to handlize all oops in ++ // registers. We must create space for them here that is disjoint from ++ // the windowed save area because we have no control over when we might ++ // flush the window again and overwrite values that gc has since modified. ++ // (The live window race) ++ // ++ // We always just allocate 6 word for storing down these object. This allow ++ // us to simply record the base and use the Ireg number to decide which ++ // slot to use. (Note that the reg number is the inbound number not the ++ // outbound number). ++ // We must shuffle args to match the native convention, and include var-args space. ++ ++ // Calculate the total number of stack slots we will need. ++ ++ // First count the abi requirement plus all of the outgoing args ++ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; ++ ++ // Now the space for the inbound oop handle area ++ int total_save_slots = 9 * VMRegImpl::slots_per_word; // 9 arguments passed in registers ++ if (is_critical_native) { ++ // Critical natives may have to call out so they need a save area ++ // for register arguments. ++ int double_slots = 0; ++ int single_slots = 0; ++ for ( int i = 0; i < total_in_args; i++) { ++ if (in_regs[i].first()->is_Register()) { ++ const Register reg = in_regs[i].first()->as_Register(); ++ switch (in_sig_bt[i]) { ++ case T_BOOLEAN: ++ case T_BYTE: ++ case T_SHORT: ++ case T_CHAR: ++ case T_INT: single_slots++; break; ++ case T_ARRAY: ++ case T_LONG: double_slots++; break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ switch (in_sig_bt[i]) { ++ case T_FLOAT: single_slots++; break; ++ case T_DOUBLE: double_slots++; break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ } ++ total_save_slots = double_slots * 2 + single_slots; ++ // align the save area ++ if (double_slots != 0) { ++ stack_slots = round_to(stack_slots, 2); ++ } ++ } ++ ++ int oop_handle_offset = stack_slots; ++ stack_slots += total_save_slots; ++ ++ // Now any space we need for handlizing a klass if static method ++ ++ int klass_slot_offset = 0; ++ int klass_offset = -1; ++ int lock_slot_offset = 0; ++ bool is_static = false; ++ ++ if (method->is_static()) { ++ klass_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; ++ is_static = true; ++ } ++ ++ // Plus a lock if needed ++ ++ if (method->is_synchronized()) { ++ lock_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ } ++ ++ // Now a place to save return value or as a temporary for any gpr -> fpr moves ++ // + 2 for return address (which we own) and saved fp ++ stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7) ++ ++ // Ok The space we have allocated will look like: ++ // ++ // ++ // FP-> | | ++ // |---------------------| ++ // | 2 slots for moves | ++ // |---------------------| ++ // | lock box (if sync) | ++ // |---------------------| <- lock_slot_offset ++ // | klass (if static) | ++ // |---------------------| <- klass_slot_offset ++ // | oopHandle area | ++ // |---------------------| <- oop_handle_offset ++ // | outbound memory | ++ // | based arguments | ++ // | | ++ // |---------------------| ++ // | vararg area | ++ // |---------------------| ++ // | | ++ // SP-> | out_preserved_slots | ++ // ++ // ++ ++ ++ // Now compute actual number of stack words we need rounding to make ++ // stack properly aligned. ++ stack_slots = round_to(stack_slots, StackAlignmentInSlots); ++ ++ int stack_size = stack_slots * VMRegImpl::stack_slot_size; ++ ++ intptr_t start = (intptr_t)__ pc(); ++ ++ ++ ++ // First thing make an ic check to see if we should even be here ++ address ic_miss = SharedRuntime::get_ic_miss_stub(); ++ ++ // We are free to use all registers as temps without saving them and ++ // restoring them except fp. fp is the only callee save register ++ // as far as the interpreter and the compiler(s) are concerned. ++ ++ //refer to register_mips.hpp:IC_Klass ++ const Register ic_reg = T1; ++ const Register receiver = T0; ++ ++ Label hit; ++ Label exception_pending; ++ ++ __ verify_oop(receiver); ++ //add for compressedoops ++ __ load_klass(T9, receiver); ++ __ beq(T9, ic_reg, hit); ++ __ delayed()->nop(); ++ __ jmp(ic_miss, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ bind(hit); ++ ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ ++ // Make enough room for patch_verified_entry ++ __ nop(); ++ __ nop(); ++ ++ // Generate stack overflow check ++ if (UseStackBanging) { ++ __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size()); ++ } ++ ++ // Generate a new frame for the wrapper. ++ // do mips need this ? ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ ++ __ enter(); ++ // -2 because return address is already present and so is saved fp ++ __ addiu(SP, SP, -1 * (stack_size - 2*wordSize)); ++ ++ // Frame is now completed as far a size and linkage. ++ ++ int frame_complete = ((intptr_t)__ pc()) - start; ++ ++ // Calculate the difference between sp and fp. We need to know it ++ // after the native call because on windows Java Natives will pop ++ // the arguments and it is painful to do sp relative addressing ++ // in a platform independent way. So after the call we switch to ++ // fp relative addressing. ++ //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change ++ //the SP ++ int fp_adjustment = stack_size - 2*wordSize; ++ ++#ifdef COMPILER2 ++ // C2 may leave the stack dirty if not in SSE2+ mode ++ __ empty_FPU_stack(); ++#endif ++ ++ // Compute the fp offset for any slots used after the jni call ++ ++ int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; ++ // We use TREG as a thread pointer because it is callee save and ++ // if we load it once it is usable thru the entire wrapper ++ const Register thread = TREG; ++ ++ // We use S4 as the oop handle for the receiver/klass ++ // It is callee save so it survives the call to native ++ ++ const Register oop_handle_reg = S4; ++ if (is_critical_native) { ++ Unimplemented(); ++ // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, ++ // oop_handle_offset, oop_maps, in_regs, in_sig_bt); ++ } ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ // ++ // We immediately shuffle the arguments so that any vm call we have to ++ // make from here on out (sync slow path, jvmpi, etc.) we will have ++ // captured the oops from our caller and have a valid oopMap for ++ // them. ++ ++ // ----------------- ++ // The Grand Shuffle ++ // ++ // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* ++ // and, if static, the class mirror instead of a receiver. This pretty much ++ // guarantees that register layout will not match (and mips doesn't use reg ++ // parms though amd does). Since the native abi doesn't use register args ++ // and the java conventions does we don't have to worry about collisions. ++ // All of our moved are reg->stack or stack->stack. ++ // We ignore the extra arguments during the shuffle and handle them at the ++ // last moment. The shuffle is described by the two calling convention ++ // vectors we have in our possession. We simply walk the java vector to ++ // get the source locations and the c vector to get the destinations. ++ ++ int c_arg = method->is_static() ? 2 : 1 ; ++ ++ // Record sp-based slot for receiver on stack for non-static methods ++ int receiver_offset = -1; ++ ++ // This is a trick. We double the stack slots so we can claim ++ // the oops in the caller's frame. Since we are sure to have ++ // more args than the caller doubling is enough to make ++ // sure we can capture all the incoming oop args from the ++ // caller. ++ // ++ OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); ++ ++ // Mark location of fp (someday) ++ // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp)); ++ ++#ifdef ASSERT ++ bool reg_destroyed[RegisterImpl::number_of_registers]; ++ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; ++ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { ++ reg_destroyed[r] = false; ++ } ++ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { ++ freg_destroyed[f] = false; ++ } ++ ++#endif /* ASSERT */ ++ ++ // This may iterate in two different directions depending on the ++ // kind of native it is. The reason is that for regular JNI natives ++ // the incoming and outgoing registers are offset upwards and for ++ // critical natives they are offset down. ++ GrowableArray arg_order(2 * total_in_args); ++ VMRegPair tmp_vmreg; ++ tmp_vmreg.set2(T8->as_VMReg()); ++ ++ if (!is_critical_native) { ++ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { ++ arg_order.push(i); ++ arg_order.push(c_arg); ++ } ++ } else { ++ // Compute a valid move order, using tmp_vmreg to break any cycles ++ Unimplemented(); ++ // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); ++ } ++ ++ int temploc = -1; ++ for (int ai = 0; ai < arg_order.length(); ai += 2) { ++ int i = arg_order.at(ai); ++ int c_arg = arg_order.at(ai + 1); ++ __ block_comment(err_msg("move %d -> %d", i, c_arg)); ++ if (c_arg == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // This arg needs to be moved to a temporary ++ __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); ++ in_regs[i] = tmp_vmreg; ++ temploc = i; ++ continue; ++ } else if (i == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // Read from the temporary location ++ assert(temploc != -1, "must be valid"); ++ i = temploc; ++ temploc = -1; ++ } ++#ifdef ASSERT ++ if (in_regs[i].first()->is_Register()) { ++ assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); ++ } ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif /* ASSERT */ ++ switch (in_sig_bt[i]) { ++ case T_ARRAY: ++ if (is_critical_native) { ++ Unimplemented(); ++ // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); ++ c_arg++; ++#ifdef ASSERT ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif ++ break; ++ } ++ case T_OBJECT: ++ assert(!is_critical_native, "no oop arguments"); ++ object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], ++ ((i == 0) && (!is_static)), ++ &receiver_offset); ++ break; ++ case T_VOID: ++ break; ++ ++ case T_FLOAT: ++ float_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_DOUBLE: ++ assert( i + 1 < total_in_args && ++ in_sig_bt[i + 1] == T_VOID && ++ out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); ++ double_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_LONG : ++ long_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); ++ ++ default: ++ simple_move32(masm, in_regs[i], out_regs[c_arg]); ++ } ++ } ++ ++ // point c_arg at the first arg that is already loaded in case we ++ // need to spill before we call out ++ c_arg = total_c_args - total_in_args; ++ // Pre-load a static method's oop. Used both by locking code and ++ // the normal JNI call code. ++ ++ __ move(oop_handle_reg, A1); ++ ++ if (method->is_static() && !is_critical_native) { ++ ++ // load opp into a register ++ int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local( ++ (method->method_holder())->java_mirror())); ++ ++ ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ __ relocate(rspec); ++ __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror())); ++ // Now handlize the static class mirror it's known not-null. ++ __ sd( oop_handle_reg, SP, klass_offset); ++ map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); ++ ++ // Now get the handle ++ __ lea(oop_handle_reg, Address(SP, klass_offset)); ++ // store the klass handle as second argument ++ __ move(A1, oop_handle_reg); ++ // and protect the arg if we must spill ++ c_arg--; ++ } ++ ++ // Change state to native (we save the return address in the thread, since it might not ++ // be pushed on the stack when we do a a stack traversal). It is enough that the pc() ++ // points into the right code segment. It does not have to be the correct return pc. ++ // We use the same pc/oopMap repeatedly when we call out ++ ++ intptr_t the_pc = (intptr_t) __ pc(); ++ oop_maps->add_gc_map(the_pc - start, map); ++ ++ __ set_last_Java_frame(SP, noreg, NULL); ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)the_pc ; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ ++ ++ // We have all of the arguments setup at this point. We must not touch any register ++ // argument registers at this point (what if we save/restore them there are no oop? ++ { ++ SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); ++ save_args(masm, total_c_args, c_arg, out_regs); ++ int metadata_index = __ oop_recorder()->find_index(method()); ++ RelocationHolder rspec = metadata_Relocation::spec(metadata_index); ++ __ relocate(rspec); ++ __ patchable_set48(AT, (long)(method())); ++ ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ thread, AT); ++ ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ } ++ ++ // These are register definitions we need for locking/unlocking ++ const Register swap_reg = T8; // Must use T8 for cmpxchg instruction ++ const Register obj_reg = T9; // Will contain the oop ++ //const Register lock_reg = T6; // Address of compiler lock object (BasicLock) ++ const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock) ++ ++ ++ ++ Label slow_path_lock; ++ Label lock_done; ++ ++ // Lock a synchronized method ++ if (method->is_synchronized()) { ++ assert(!is_critical_native, "unhandled"); ++ ++ const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); ++ ++ // Get the handle (the 2nd argument) ++ __ move(oop_handle_reg, A1); ++ ++ // Get address of the box ++ __ lea(lock_reg, Address(FP, lock_slot_fp_offset)); ++ ++ // Load the oop from the handle ++ __ ld(obj_reg, oop_handle_reg, 0); ++ ++ if (UseBiasedLocking) { ++ // Note that oop_handle_reg is trashed during this call ++ __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock); ++ } ++ ++ // Load immediate 1 into swap_reg %T8 ++ __ move(swap_reg, 1); ++ ++ __ ld(AT, obj_reg, 0); ++ __ orr(swap_reg, swap_reg, AT); ++ ++ __ sd(swap_reg, lock_reg, mark_word_offset); ++ __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done); ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) sp <= mark < mark + os::pagesize() ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg ++ ++ __ dsubu(swap_reg, swap_reg, SP); ++ __ move(AT, 3 - os::vm_page_size()); ++ __ andr(swap_reg , swap_reg, AT); ++ // Save the test result, for recursive case, the result is zero ++ __ sd(swap_reg, lock_reg, mark_word_offset); ++ __ bne(swap_reg, R0, slow_path_lock); ++ __ delayed()->nop(); ++ // Slow path will re-enter here ++ __ bind(lock_done); ++ ++ if (UseBiasedLocking) { ++ // Re-fetch oop_handle_reg as we trashed it above ++ __ move(A1, oop_handle_reg); ++ } ++ } ++ ++ ++ // Finally just about ready to make the JNI call ++ ++ ++ // get JNIEnv* which is first argument to native ++ if (!is_critical_native) { ++ __ addiu(A0, thread, in_bytes(JavaThread::jni_environment_offset())); ++ } ++ ++ // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob) ++ // Load the second arguments into A1 ++ //__ ld(A1, SP , wordSize ); // klass ++ ++ // Now set thread in native ++ __ addiu(AT, R0, _thread_in_native); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ // do the call ++ __ call(native_func, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ // WARNING - on Windows Java Natives use pascal calling convention and pop the ++ // arguments off of the stack. We could just re-adjust the stack pointer here ++ // and continue to do SP relative addressing but we instead switch to FP ++ // relative addressing. ++ ++ // Unpack native results. ++ switch (ret_type) { ++ case T_BOOLEAN: __ c2bool(V0); break; ++ case T_CHAR : __ andi(V0, V0, 0xFFFF); break; ++ case T_BYTE : __ sign_extend_byte (V0); break; ++ case T_SHORT : __ sign_extend_short(V0); break; ++ case T_INT : // nothing to do break; ++ case T_DOUBLE : ++ case T_FLOAT : ++ // Result is in st0 we'll save as needed ++ break; ++ case T_ARRAY: // Really a handle ++ case T_OBJECT: // Really a handle ++ break; // can't de-handlize until after safepoint check ++ case T_VOID: break; ++ case T_LONG: break; ++ default : ShouldNotReachHere(); ++ } ++ // Switch thread to "native transition" state before reading the synchronization state. ++ // This additional state is necessary because reading and testing the synchronization ++ // state is not atomic w.r.t. GC, as this scenario demonstrates: ++ // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. ++ // VM thread changes sync state to synchronizing and suspends threads for GC. ++ // Thread A is resumed to finish this native method, but doesn't block here since it ++ // didn't see any synchronization is progress, and escapes. ++ __ addiu(AT, R0, _thread_in_native_trans); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ if(os::is_MP()) { ++ if (UseMembar) { ++ // Force this write out before the read below ++ __ sync(); ++ } else { ++ // Write serialization page so VM thread can do a pseudo remote membar. ++ // We use the current thread pointer to calculate a thread specific ++ // offset to write to within the page. This minimizes bus traffic ++ // due to cache line collision. ++ __ serialize_memory(thread, A0); ++ } ++ } ++ ++ Label after_transition; ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { ++ Label Continue; ++ Label slow_path; ++ ++ __ safepoint_poll_acquire(slow_path, thread); ++ __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, Continue); ++ __ delayed()->nop(); ++ __ bind(slow_path); ++ ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // ++ save_native_result(masm, ret_type, stack_slots); ++ __ move(A0, thread); ++ __ addiu(SP, SP, -wordSize); ++ __ push(S2); ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ if (!is_critical_native) { ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } else { ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } ++ __ move(SP, S2); // use S2 as a sender SP holder ++ __ pop(S2); ++ __ addiu(SP, SP, wordSize); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ // Restore any method result value ++ restore_native_result(masm, ret_type, stack_slots); ++ ++ if (is_critical_native) { ++ // The call above performed the transition to thread_in_Java so ++ // skip the transition logic below. ++ __ beq(R0, R0, after_transition); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(Continue); ++ } ++ ++ // change thread state ++ __ addiu(AT, R0, _thread_in_Java); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ bind(after_transition); ++ Label reguard; ++ Label reguard_done; ++ __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); ++ __ addiu(AT, AT, -JavaThread::stack_guard_yellow_reserved_disabled); ++ __ beq(AT, R0, reguard); ++ __ delayed()->nop(); ++ // slow path reguard re-enters here ++ __ bind(reguard_done); ++ ++ // Handle possible exception (will unlock if necessary) ++ ++ // native result if any is live ++ ++ // Unlock ++ Label slow_path_unlock; ++ Label unlock_done; ++ if (method->is_synchronized()) { ++ ++ Label done; ++ ++ // Get locked oop from the handle we passed to jni ++ __ ld( obj_reg, oop_handle_reg, 0); ++ if (UseBiasedLocking) { ++ __ biased_locking_exit(obj_reg, T8, done); ++ ++ } ++ ++ // Simple recursive lock? ++ ++ __ ld(AT, FP, lock_slot_fp_offset); ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ // Must save FSF if if it is live now because cmpxchg must use it ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ ++ // get old displaced header ++ __ ld (T8, FP, lock_slot_fp_offset); ++ // get address of the stack lock ++ __ addiu(c_rarg0, FP, lock_slot_fp_offset); ++ // Atomic swap old header if oop still contains the stack lock ++ __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock); ++ ++ // slow path re-enters here ++ __ bind(unlock_done); ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ ++ __ bind(done); ++ ++ } ++ { ++ SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); ++ // Tell dtrace about this method exit ++ save_native_result(masm, ret_type, stack_slots); ++ int metadata_index = __ oop_recorder()->find_index( (method())); ++ RelocationHolder rspec = metadata_Relocation::spec(metadata_index); ++ __ relocate(rspec); ++ __ patchable_set48(AT, (long)(method())); ++ ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ thread, AT); ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ ++ // We can finally stop using that last_Java_frame we setup ages ago ++ ++ __ reset_last_Java_frame(false); ++ ++ // Unpack oop result, e.g. JNIHandles::resolve value. ++ if (ret_type == T_OBJECT || ret_type == T_ARRAY) { ++ __ resolve_jobject(V0, thread, T9); ++ } ++ ++ if (CheckJNICalls) { ++ // clear_pending_jni_exception_check ++ __ sd(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset())); ++ } ++ ++ if (!is_critical_native) { ++ // reset handle block ++ __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset())); ++ __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes()); ++ } ++ ++ if (!is_critical_native) { ++ // Any exception pending? ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, exception_pending); ++ __ delayed()->nop(); ++ } ++ // no exception, we're almost done ++ ++ // check that only result value is on FPU stack ++ __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit"); ++ ++ // Return ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ leave(); ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ // Unexpected paths are out of line and go here ++ // Slow path locking & unlocking ++ if (method->is_synchronized()) { ++ ++ // BEGIN Slow path lock ++ __ bind(slow_path_lock); ++ ++ // protect the args we've loaded ++ save_args(masm, total_c_args, c_arg, out_regs); ++ ++ // has last_Java_frame setup. No exceptions so do vanilla call not call_VM ++ // args are (oop obj, BasicLock* lock, JavaThread* thread) ++ ++ __ move(A0, obj_reg); ++ __ move(A1, lock_reg); ++ __ move(A2, thread); ++ __ addiu(SP, SP, - 3*wordSize); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ move(SP, S2); ++ __ addiu(SP, SP, 3*wordSize); ++ ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("no pending exception allowed on exit from monitorenter"); ++ __ bind(L); ++ } ++#endif ++ __ b(lock_done); ++ __ delayed()->nop(); ++ // END Slow path lock ++ ++ // BEGIN Slow path unlock ++ __ bind(slow_path_unlock); ++ ++ // Slow path unlock ++ ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ // Save pending exception around call to VM (which contains an EXCEPTION_MARK) ++ ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ push(AT); ++ __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ ++ // should be a peal ++ // +wordSize because of the push above ++ __ addiu(A1, FP, lock_slot_fp_offset); ++ ++ __ move(A0, obj_reg); ++ __ move(A2, thread); ++ __ addiu(SP, SP, -2*wordSize); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), ++ relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ addiu(SP, SP, 2*wordSize); ++ __ move(SP, S2); ++ //add for compressedoops ++ __ reinit_heapbase(); ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld( AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); ++ __ bind(L); ++ } ++#endif /* ASSERT */ ++ ++ __ pop(AT); ++ __ sd(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ __ b(unlock_done); ++ __ delayed()->nop(); ++ // END Slow path unlock ++ ++ } ++ ++ // SLOW PATH Reguard the stack if needed ++ ++ __ bind(reguard); ++ save_native_result(masm, ret_type, stack_slots); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), ++ relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ restore_native_result(masm, ret_type, stack_slots); ++ __ b(reguard_done); ++ __ delayed()->nop(); ++ ++ // BEGIN EXCEPTION PROCESSING ++ if (!is_critical_native) { ++ // Forward the exception ++ __ bind(exception_pending); ++ ++ // remove possible return value from FPU register stack ++ __ empty_FPU_stack(); ++ ++ // pop our frame ++ //forward_exception_entry need return address on stack ++ __ move(SP, FP); ++ __ pop(FP); ++ ++ // and forward the exception ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } ++ __ flush(); ++ ++ nmethod *nm = nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), ++ in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), ++ oop_maps); ++ ++ if (is_critical_native) { ++ nm->set_lazy_critical_native(true); ++ } ++ ++ return nm; ++ ++} ++ ++#ifdef HAVE_DTRACE_H ++// --------------------------------------------------------------------------- ++// Generate a dtrace nmethod for a given signature. The method takes arguments ++// in the Java compiled code convention, marshals them to the native ++// abi and then leaves nops at the position you would expect to call a native ++// function. When the probe is enabled the nops are replaced with a trap ++// instruction that dtrace inserts and the trace will cause a notification ++// to dtrace. ++// ++// The probes are only able to take primitive types and java/lang/String as ++// arguments. No other java types are allowed. Strings are converted to utf8 ++// strings so that from dtrace point of view java strings are converted to C ++// strings. There is an arbitrary fixed limit on the total space that a method ++// can use for converting the strings. (256 chars per string in the signature). ++// So any java string larger then this is truncated. ++ ++static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 }; ++static bool offsets_initialized = false; ++ ++static VMRegPair reg64_to_VMRegPair(Register r) { ++ VMRegPair ret; ++ if (wordSize == 8) { ++ ret.set2(r->as_VMReg()); ++ } else { ++ ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg()); ++ } ++ return ret; ++} ++ ++ ++nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm, ++ methodHandle method) { ++ ++ ++ // generate_dtrace_nmethod is guarded by a mutex so we are sure to ++ // be single threaded in this method. ++ assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be"); ++ ++ // Fill in the signature array, for the calling-convention call. ++ int total_args_passed = method->size_of_parameters(); ++ ++ BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); ++ VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); ++ ++ // The signature we are going to use for the trap that dtrace will see ++ // java/lang/String is converted. We drop "this" and any other object ++ // is converted to NULL. (A one-slot java/lang/Long object reference ++ // is converted to a two-slot long, which is why we double the allocation). ++ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2); ++ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2); ++ ++ int i=0; ++ int total_strings = 0; ++ int first_arg_to_pass = 0; ++ int total_c_args = 0; ++ ++ // Skip the receiver as dtrace doesn't want to see it ++ if( !method->is_static() ) { ++ in_sig_bt[i++] = T_OBJECT; ++ first_arg_to_pass = 1; ++ } ++ ++ SignatureStream ss(method->signature()); ++ for ( ; !ss.at_return_type(); ss.next()) { ++ BasicType bt = ss.type(); ++ in_sig_bt[i++] = bt; // Collect remaining bits of signature ++ out_sig_bt[total_c_args++] = bt; ++ if( bt == T_OBJECT) { ++ symbolOop s = ss.as_symbol_or_null(); ++ if (s == vmSymbols::java_lang_String()) { ++ total_strings++; ++ out_sig_bt[total_c_args-1] = T_ADDRESS; ++ } else if (s == vmSymbols::java_lang_Boolean() || ++ s == vmSymbols::java_lang_Byte()) { ++ out_sig_bt[total_c_args-1] = T_BYTE; ++ } else if (s == vmSymbols::java_lang_Character() || ++ s == vmSymbols::java_lang_Short()) { ++ out_sig_bt[total_c_args-1] = T_SHORT; ++ } else if (s == vmSymbols::java_lang_Integer() || ++ s == vmSymbols::java_lang_Float()) { ++ out_sig_bt[total_c_args-1] = T_INT; ++ } else if (s == vmSymbols::java_lang_Long() || ++ s == vmSymbols::java_lang_Double()) { ++ out_sig_bt[total_c_args-1] = T_LONG; ++ out_sig_bt[total_c_args++] = T_VOID; ++ } ++ } else if ( bt == T_LONG || bt == T_DOUBLE ) { ++ in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots ++ // We convert double to long ++ out_sig_bt[total_c_args-1] = T_LONG; ++ out_sig_bt[total_c_args++] = T_VOID; ++ } else if ( bt == T_FLOAT) { ++ // We convert float to int ++ out_sig_bt[total_c_args-1] = T_INT; ++ } ++ } ++ ++ assert(i==total_args_passed, "validly parsed signature"); ++ ++ // Now get the compiled-Java layout as input arguments ++ int comp_args_on_stack; ++ comp_args_on_stack = SharedRuntime::java_calling_convention( ++ in_sig_bt, in_regs, total_args_passed, false); ++ ++ // We have received a description of where all the java arg are located ++ // on entry to the wrapper. We need to convert these args to where ++ // the a native (non-jni) function would expect them. To figure out ++ // where they go we convert the java signature to a C signature and remove ++ // T_VOID for any long/double we might have received. ++ ++ ++ // Now figure out where the args must be stored and how much stack space ++ // they require (neglecting out_preserve_stack_slots but space for storing ++ // the 1st six register arguments). It's weird see int_stk_helper. ++ ++ int out_arg_slots; ++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); ++ ++ // Calculate the total number of stack slots we will need. ++ ++ // First count the abi requirement plus all of the outgoing args ++ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; ++ ++ // Plus a temp for possible converion of float/double/long register args ++ ++ int conversion_temp = stack_slots; ++ stack_slots += 2; ++ ++ ++ // Now space for the string(s) we must convert ++ ++ int string_locs = stack_slots; ++ stack_slots += total_strings * ++ (max_dtrace_string_size / VMRegImpl::stack_slot_size); ++ ++ // Ok The space we have allocated will look like: ++ // ++ // ++ // FP-> | | ++ // |---------------------| ++ // | string[n] | ++ // |---------------------| <- string_locs[n] ++ // | string[n-1] | ++ // |---------------------| <- string_locs[n-1] ++ // | ... | ++ // | ... | ++ // |---------------------| <- string_locs[1] ++ // | string[0] | ++ // |---------------------| <- string_locs[0] ++ // | temp | ++ // |---------------------| <- conversion_temp ++ // | outbound memory | ++ // | based arguments | ++ // | | ++ // |---------------------| ++ // | | ++ // SP-> | out_preserved_slots | ++ // ++ // ++ ++ // Now compute actual number of stack words we need rounding to make ++ // stack properly aligned. ++ stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word); ++ ++ int stack_size = stack_slots * VMRegImpl::stack_slot_size; ++ ++ intptr_t start = (intptr_t)__ pc(); ++ ++ // First thing make an ic check to see if we should even be here ++ ++ { ++ Label L; ++ const Register temp_reg = G3_scratch; ++ Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub()); ++ __ verify_oop(O0); ++ __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg); ++ __ cmp(temp_reg, G5_inline_cache_reg); ++ __ brx(Assembler::equal, true, Assembler::pt, L); ++ __ delayed()->nop(); ++ ++ __ jump_to(ic_miss, 0); ++ __ delayed()->nop(); ++ __ align(CodeEntryAlignment); ++ __ bind(L); ++ } ++ ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ ++ // Make enough room for patch_verified_entry ++ __ nop(); ++ __ nop(); ++ ++ // Generate stack overflow check before creating frame ++ __ generate_stack_overflow_check(stack_size); ++ ++ // Generate a new frame for the wrapper. ++ __ save(SP, -stack_size, SP); ++ ++ // Frame is now completed as far a size and linkage. ++ ++ int frame_complete = ((intptr_t)__ pc()) - start; ++ ++#ifdef ASSERT ++ bool reg_destroyed[RegisterImpl::number_of_registers]; ++ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; ++ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { ++ reg_destroyed[r] = false; ++ } ++ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { ++ freg_destroyed[f] = false; ++ } ++ ++#endif /* ASSERT */ ++ ++ VMRegPair zero; ++ const Register g0 = G0; // without this we get a compiler warning (why??) ++ zero.set2(g0->as_VMReg()); ++ ++ int c_arg, j_arg; ++ ++ Register conversion_off = noreg; ++ ++ for (j_arg = first_arg_to_pass, c_arg = 0 ; ++ j_arg < total_args_passed ; j_arg++, c_arg++ ) { ++ ++ VMRegPair src = in_regs[j_arg]; ++ VMRegPair dst = out_regs[c_arg]; ++ ++#ifdef ASSERT ++ if (src.first()->is_Register()) { ++ assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!"); ++ } else if (src.first()->is_FloatRegister()) { ++ assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding( ++ FloatRegisterImpl::S)], "ack!"); ++ } ++ if (dst.first()->is_Register()) { ++ reg_destroyed[dst.first()->as_Register()->encoding()] = true; ++ } else if (dst.first()->is_FloatRegister()) { ++ freg_destroyed[dst.first()->as_FloatRegister()->encoding( ++ FloatRegisterImpl::S)] = true; ++ } ++#endif /* ASSERT */ ++ ++ switch (in_sig_bt[j_arg]) { ++ case T_ARRAY: ++ case T_OBJECT: ++ { ++ if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT || ++ out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { ++ // need to unbox a one-slot value ++ Register in_reg = L0; ++ Register tmp = L2; ++ if ( src.first()->is_reg() ) { ++ in_reg = src.first()->as_Register(); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS), ++ "must be"); ++ __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg); ++ } ++ // If the final destination is an acceptable register ++ if ( dst.first()->is_reg() ) { ++ if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) { ++ tmp = dst.first()->as_Register(); ++ } ++ } ++ ++ Label skipUnbox; ++ if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) { ++ __ mov(G0, tmp->successor()); ++ } ++ __ br_null(in_reg, true, Assembler::pn, skipUnbox); ++ __ delayed()->mov(G0, tmp); ++ ++ BasicType bt = out_sig_bt[c_arg]; ++ int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt); ++ switch (bt) { ++ case T_BYTE: ++ __ ldub(in_reg, box_offset, tmp); break; ++ case T_SHORT: ++ __ lduh(in_reg, box_offset, tmp); break; ++ case T_INT: ++ __ ld(in_reg, box_offset, tmp); break; ++ case T_LONG: ++ __ ld_long(in_reg, box_offset, tmp); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ bind(skipUnbox); ++ // If tmp wasn't final destination copy to final destination ++ if (tmp == L2) { ++ VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2); ++ if (out_sig_bt[c_arg] == T_LONG) { ++ long_move(masm, tmp_as_VM, dst); ++ } else { ++ move32_64(masm, tmp_as_VM, out_regs[c_arg]); ++ } ++ } ++ if (out_sig_bt[c_arg] == T_LONG) { ++ assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); ++ ++c_arg; // move over the T_VOID to keep the loop indices in sync ++ } ++ } else if (out_sig_bt[c_arg] == T_ADDRESS) { ++ Register s = ++ src.first()->is_reg() ? src.first()->as_Register() : L2; ++ Register d = ++ dst.first()->is_reg() ? dst.first()->as_Register() : L2; ++ ++ // We store the oop now so that the conversion pass can reach ++ // while in the inner frame. This will be the only store if ++ // the oop is NULL. ++ if (s != L2) { ++ // src is register ++ if (d != L2) { ++ // dst is register ++ __ mov(s, d); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } else { ++ // src not a register ++ assert(Assembler::is_simm13(reg2offset(src.first()) + ++ STACK_BIAS), "must be"); ++ __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d); ++ if (d == L2) { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } ++ } else if (out_sig_bt[c_arg] != T_VOID) { ++ // Convert the arg to NULL ++ if (dst.first()->is_reg()) { ++ __ mov(G0, dst.first()->as_Register()); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } ++ } ++ break; ++ case T_VOID: ++ break; ++ ++ case T_FLOAT: ++ if (src.first()->is_stack()) { ++ // Stack to stack/reg is simple ++ move32_64(masm, src, dst); ++ } else { ++ if (dst.first()->is_reg()) { ++ // freg -> reg ++ int off = ++ STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ Register d = dst.first()->as_Register(); ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, off); ++ __ ld(SP, off, d); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ __ ld(SP, conversion_off , d); ++ } ++ } else { ++ // freg -> mem ++ int off = STACK_BIAS + reg2offset(dst.first()); ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, off); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ } ++ } ++ } ++ break; ++ ++ case T_DOUBLE: ++ assert( j_arg + 1 < total_args_passed && ++ in_sig_bt[j_arg + 1] == T_VOID && ++ out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); ++ if (src.first()->is_stack()) { ++ // Stack to stack/reg is simple ++ long_move(masm, src, dst); ++ } else { ++ Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2; ++ ++ // Destination could be an odd reg on 32bit in which case ++ // we can't load direct to the destination. ++ ++ if (!d->is_even() && wordSize == 4) { ++ d = L2; ++ } ++ int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), ++ SP, off); ++ __ ld_long(SP, off, d); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ __ ld_long(SP, conversion_off, d); ++ } ++ if (d == L2) { ++ long_move(masm, reg64_to_VMRegPair(L2), dst); ++ } ++ } ++ break; ++ ++ case T_LONG : ++ // 32bit can't do a split move of something like g1 -> O0, O1 ++ // so use a memory temp ++ if (src.is_single_phys_reg() && wordSize == 4) { ++ Register tmp = L2; ++ if (dst.first()->is_reg() && ++ (wordSize == 8 || dst.first()->as_Register()->is_even())) { ++ tmp = dst.first()->as_Register(); ++ } ++ ++ int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ if (Assembler::is_simm13(off)) { ++ __ stx(src.first()->as_Register(), SP, off); ++ __ ld_long(SP, off, tmp); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stx(src.first()->as_Register(), SP, conversion_off); ++ __ ld_long(SP, conversion_off, tmp); ++ } ++ ++ if (tmp == L2) { ++ long_move(masm, reg64_to_VMRegPair(L2), dst); ++ } ++ } else { ++ long_move(masm, src, dst); ++ } ++ break; ++ ++ case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); ++ ++ default: ++ move32_64(masm, src, dst); ++ } ++ } ++ ++ ++ // If we have any strings we must store any register based arg to the stack ++ // This includes any still live xmm registers too. ++ ++ if (total_strings > 0 ) { ++ ++ // protect all the arg registers ++ __ save_frame(0); ++ __ mov(G2_thread, L7_thread_cache); ++ const Register L2_string_off = L2; ++ ++ // Get first string offset ++ __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off); ++ ++ for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) { ++ if (out_sig_bt[c_arg] == T_ADDRESS) { ++ ++ VMRegPair dst = out_regs[c_arg]; ++ const Register d = dst.first()->is_reg() ? ++ dst.first()->as_Register()->after_save() : noreg; ++ ++ // It's a string the oop and it was already copied to the out arg ++ // position ++ if (d != noreg) { ++ __ mov(d, O0); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), ++ "must be"); ++ __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0); ++ } ++ Label skip; ++ ++ __ br_null(O0, false, Assembler::pn, skip); ++ __ delayed()->addu(FP, L2_string_off, O1); ++ ++ if (d != noreg) { ++ __ mov(O1, d); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), ++ "must be"); ++ __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf), ++ relocInfo::runtime_call_type); ++ __ delayed()->addu(L2_string_off, max_dtrace_string_size, L2_string_off); ++ ++ __ bind(skip); ++ ++ } ++ ++ } ++ __ mov(L7_thread_cache, G2_thread); ++ __ restore(); ++ ++ } ++ ++ ++ // Ok now we are done. Need to place the nop that dtrace wants in order to ++ // patch in the trap ++ ++ int patch_offset = ((intptr_t)__ pc()) - start; ++ ++ __ nop(); ++ ++ ++ // Return ++ ++ __ ret(); ++ __ delayed()->restore(); ++ ++ __ flush(); ++ ++ nmethod *nm = nmethod::new_dtrace_nmethod( ++ method, masm->code(), vep_offset, patch_offset, frame_complete, ++ stack_slots / VMRegImpl::slots_per_word); ++ return nm; ++ ++} ++ ++#endif // HAVE_DTRACE_H ++ ++// this function returns the adjust size (in number of words) to a c2i adapter ++// activation for use during deoptimization ++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { ++ return (callee_locals - callee_parameters) * Interpreter::stackElementWords; ++} ++ ++// "Top of Stack" slots that may be unused by the calling convention but must ++// otherwise be preserved. ++// On Intel these are not necessary and the value can be zero. ++// On Sparc this describes the words reserved for storing a register window ++// when an interrupt occurs. ++uint SharedRuntime::out_preserve_stack_slots() { ++ return 0; ++} ++ ++//------------------------------generate_deopt_blob---------------------------- ++// Ought to generate an ideal graph & compile, but here's some SPARC ASM ++// instead. ++void SharedRuntime::generate_deopt_blob() { ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ //CodeBuffer buffer ("deopt_blob", 4000, 2048); ++ CodeBuffer buffer ("deopt_blob", 8000, 2048); ++ MacroAssembler* masm = new MacroAssembler( & buffer); ++ int frame_size_in_words; ++ OopMap* map = NULL; ++ // Account for the extra args we place on the stack ++ // by the time we call fetch_unroll_info ++ const int additional_words = 2; // deopt kind, thread ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ ++ address start = __ pc(); ++ Label cont; ++ // we use S3 for DeOpt reason register ++ Register reason = S3; ++ // use S6 for thread register ++ Register thread = TREG; ++ // use S7 for fetch_unroll_info returned UnrollBlock ++ Register unroll = S7; ++ // Prolog for non exception case! ++ // Correct the return address we were given. ++ //FIXME, return address is on the tos or Ra? ++ __ addiu(RA, RA, - (NativeCall::return_address_offset_long)); ++ // Save everything in sight. ++ map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); ++ // Normal deoptimization ++ __ move(reason, Deoptimization::Unpack_deopt); ++ __ b(cont); ++ __ delayed()->nop(); ++ ++ int reexecute_offset = __ pc() - start; ++ ++ // Reexecute case ++ // return address is the pc describes what bci to do re-execute at ++ ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); ++ __ move(reason, Deoptimization::Unpack_reexecute); ++ __ b(cont); ++ __ delayed()->nop(); ++ ++ int exception_offset = __ pc() - start; ++ // Prolog for exception case ++ ++ // all registers are dead at this entry point, except for V0 and ++ // V1 which contain the exception oop and exception pc ++ // respectively. Set them in TLS and fall thru to the ++ // unpack_with_exception_in_tls entry point. ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ int exception_in_tls_offset = __ pc() - start; ++ // new implementation because exception oop is now passed in JavaThread ++ ++ // Prolog for exception case ++ // All registers must be preserved because they might be used by LinearScan ++ // Exceptiop oop and throwing PC are passed in JavaThread ++ // tos: stack at point of call to method that threw the exception (i.e. only ++ // args are on the stack, no return address) ++ ++ // Return address will be patched later with the throwing pc. The correct value is not ++ // available now because loading it from memory would destroy registers. ++ // Save everything in sight. ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ __ addiu(RA, RA, - (NativeCall::return_address_offset_long)); ++ (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); ++ ++ // Now it is safe to overwrite any register ++ // store the correct deoptimization type ++ __ move(reason, Deoptimization::Unpack_exception); ++ // load throwing pc from JavaThread and patch it as the return address ++ // of the current frame. Then clear the field in JavaThread ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); ++ ++ ++#ifdef ASSERT ++ // verify that there is really an exception oop in JavaThread ++ __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset())); ++ __ verify_oop(AT); ++ // verify that there is no pending exception ++ Label no_pending_exception; ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, no_pending_exception); ++ __ delayed()->nop(); ++ __ stop("must not have pending exception here"); ++ __ bind(no_pending_exception); ++#endif ++ __ bind(cont); ++ // Compiled code leaves the floating point stack dirty, empty it. ++ __ empty_FPU_stack(); ++ ++ ++ // Call C code. Need thread and this frame, but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ __ move(A0, thread); ++ __ move(A1, reason); // exec_mode ++ __ addiu(SP, SP, -additional_words * wordSize); ++ ++ __ set_last_Java_frame(NOREG, NOREG, NULL); ++ ++ // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on ++ // this call, no GC can happen. Call should capture return values. ++ ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ ++ __ call((address)Deoptimization::fetch_unroll_info); ++ //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ oop_maps->add_gc_map(__ pc() - start, map); ++ __ addiu(SP, SP, additional_words * wordSize); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(false); ++ ++ // Load UnrollBlock into S7 ++ __ move(unroll, V0); ++ ++ ++ // Move the unpack kind to a safe place in the UnrollBlock because ++ // we are very short of registers ++ ++ Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); ++ __ sw(reason, unpack_kind); ++ // save the unpack_kind value ++ // Retrieve the possible live values (return values) ++ // All callee save registers representing jvm state ++ // are now in the vframeArray. ++ ++ Label noException; ++ __ move(AT, Deoptimization::Unpack_exception); ++ __ bne(AT, reason, noException);// Was exception pending? ++ __ delayed()->nop(); ++ __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ ++ __ verify_oop(V0); ++ ++ // Overwrite the result registers with the exception results. ++ __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize); ++ __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize); ++ ++ __ bind(noException); ++ ++ ++ // Stack is back to only having register save data on the stack. ++ // Now restore the result registers. Everything else is either dead or captured ++ // in the vframeArray. ++ ++ RegisterSaver::restore_result_registers(masm); ++ // All of the register save area has been popped of the stack. Only the ++ // return address remains. ++ // Pop all the frames we must move/replace. ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: caller of deopting frame (could be compiled/interpreted). ++ // ++ // Note: by leaving the return address of self-frame on the stack ++ // and using the size of frame 2 to adjust the stack ++ // when we are done the return to frame 3 will still be on the stack. ++ ++ // register for the sender's sp ++ Register sender_sp = Rsender; ++ // register for frame pcs ++ Register pcs = T0; ++ // register for frame sizes ++ Register sizes = T1; ++ // register for frame count ++ Register count = T3; ++ ++ // Pop deoptimized frame ++ __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); ++ __ addu(SP, SP, AT); ++ // sp should be pointing at the return address to the caller (3) ++ ++ // Load array of frame pcs into pcs ++ __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); ++ __ addiu(SP, SP, wordSize); // trash the old pc ++ // Load array of frame sizes into T6 ++ __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); ++ ++ ++ ++ // Load count of frams into T3 ++ __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); ++ // Pick up the initial fp we should save ++ __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. ++ __ move(sender_sp, SP); ++ __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); ++ __ subu(SP, SP, AT); ++ ++ // Push interpreter frames in a loop ++ // ++ //Loop: ++ // 0x000000555bd82d18: lw t2, 0x0(t1) ; lw sizes[i] <--- error lw->ld ++ // 0x000000555bd82d1c: ld at, 0x0(t0) ; ld pcs[i] ++ // 0x000000555bd82d20: daddiu t2, t2, 0xfffffff0 ; t2 -= 16 ++ // 0x000000555bd82d24: daddiu sp, sp, 0xfffffff0 ++ // 0x000000555bd82d28: sd fp, 0x0(sp) ; push fp ++ // 0x000000555bd82d2c: sd at, 0x8(sp) ; push at ++ // 0x000000555bd82d30: daddu fp, sp, zero ; fp <- sp ++ // 0x000000555bd82d34: dsubu sp, sp, t2 ; sp -= t2 ++ // 0x000000555bd82d38: sd zero, 0xfffffff0(fp) ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ // 0x000000555bd82d3c: sd s4, 0xfffffff8(fp) ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ // 0x000000555bd82d40: daddu s4, sp, zero ; move(sender_sp, SP); ++ // 0x000000555bd82d44: daddiu t3, t3, 0xffffffff ; count -- ++ // 0x000000555bd82d48: daddiu t1, t1, 0x4 ; sizes += 4 ++ // 0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18 ++ // 0x000000555bd82d50: daddiu t0, t0, 0x4 ; <--- error t0 += 8 ++ // ++ // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split ++ Label loop; ++ __ bind(loop); ++ __ ld(T2, sizes, 0); // Load frame size ++ __ ld_ptr(AT, pcs, 0); // save return address ++ __ addiu(T2, T2, -2*wordSize); // we'll push pc and fp, by hand ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ subu(SP, SP, T2); // Prolog! ++ // This value is corrected by layout_activation_impl ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable ++ __ move(sender_sp, SP); // pass to next frame ++ __ addiu(count, count, -1); // decrement counter ++ __ addiu(sizes, sizes, wordSize); // Bump array pointer (sizes) ++ __ bne(count, R0, loop); ++ __ delayed()->addiu(pcs, pcs, wordSize); // Bump array pointer (pcs) ++ __ ld(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0); ++ // Re-push self-frame ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ __ addiu(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize); ++ ++ // Restore frame locals after moving the frame ++ __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize); ++ __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize); ++ __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local ++ __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize); ++ ++ ++ // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on ++ // this call, no GC can happen. ++ __ move(A1, reason); // exec_mode ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(A0, thread); // thread ++ __ addiu(SP, SP, (-additional_words) *wordSize); ++ ++ // set last_Java_sp, last_Java_fp ++ __ set_last_Java_frame(NOREG, FP, NULL); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ ++ __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ // Revert SP alignment after call since we're going to do some SP relative addressing below ++ __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // Set an oopmap for the call site ++ oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0)); ++ ++ __ push(V0); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(true); ++ ++ // Collect return values ++ __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words + 1) * wordSize); ++ __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words + 1) * wordSize); ++ __ ldc1(F0, SP, (RegisterSaver::fpResultOffset() + additional_words + 1) * wordSize);// Pop float stack and store in local ++ __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + additional_words + 2) * wordSize); ++ //FIXME, ++ // Clear floating point stack before returning to interpreter ++ __ empty_FPU_stack(); ++ //FIXME, we should consider about float and double ++ // Push a float or double return value if necessary. ++ __ leave(); ++ ++ // Jump to interpreter ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ masm->flush(); ++ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); ++ _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); ++} ++ ++#ifdef COMPILER2 ++ ++//------------------------------generate_uncommon_trap_blob-------------------- ++// Ought to generate an ideal graph & compile, but here's some SPARC ASM ++// instead. ++void SharedRuntime::generate_uncommon_trap_blob() { ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 ); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ enum frame_layout { ++ fp_off, fp_off2, ++ return_off, return_off2, ++ framesize ++ }; ++ assert(framesize % 4 == 0, "sp not 16-byte aligned"); ++ ++ address start = __ pc(); ++ ++ // Push self-frame. ++ __ daddiu(SP, SP, -framesize * BytesPerInt); ++ ++ __ sd(RA, SP, return_off * BytesPerInt); ++ __ sd(FP, SP, fp_off * BytesPerInt); ++ ++ __ daddiu(FP, SP, fp_off * BytesPerInt); ++ ++ // Clear the floating point exception stack ++ __ empty_FPU_stack(); ++ ++ Register thread = TREG; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // set last_Java_sp ++ __ set_last_Java_frame(NOREG, FP, NULL); ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ long save_pc = (long)__ pc() + 56; ++ __ patchable_set48(AT, (long)save_pc); ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ } ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // capture callee-saved registers as well as return values. ++ __ move(A0, thread); ++ // argument already in T0 ++ __ move(A1, T0); ++ __ addiu(A2, R0, Deoptimization::Unpack_uncommon_trap); ++ __ patchable_call((address)Deoptimization::uncommon_trap); ++ ++ // Set an oopmap for the call site ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap( framesize, 0 ); ++ ++ //oop_maps->add_gc_map( __ offset(), true, map); ++ oop_maps->add_gc_map( __ offset(), map); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(false); ++ ++ // Load UnrollBlock into S7 ++ Register unroll = S7; ++ __ move(unroll, V0); ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld_ptr(AT, unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); ++ __ li(T9, Deoptimization::Unpack_uncommon_trap); ++ __ beq(AT, T9, L); ++ __ delayed()->nop(); ++ __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap"); ++ __ bind(L); ++ } ++#endif ++ ++ // Pop all the frames we must move/replace. ++ // ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: possible-i2c-adapter-frame ++ // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an ++ // and c2i here) ++ ++ __ daddiu(SP, SP, framesize * BytesPerInt); ++ ++ // Pop deoptimized frame ++ __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); ++ __ daddu(SP, SP, AT); ++ ++ // register for frame pcs ++ Register pcs = T8; ++ // register for frame sizes ++ Register sizes = T9; ++ // register for frame count ++ Register count = T3; ++ // register for the sender's sp ++ Register sender_sp = T1; ++ ++ // sp should be pointing at the return address to the caller (4) ++ // Load array of frame pcs ++ __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); ++ ++ // Load array of frame sizes ++ __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); ++ __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); ++ ++ // Pick up the initial fp we should save ++ __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. ++ ++ __ move(sender_sp, SP); ++ __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); ++ __ dsubu(SP, SP, AT); ++ // Push interpreter frames in a loop ++ Label loop; ++ __ bind(loop); ++ __ ld(T2, sizes, 0); // Load frame size ++ __ ld(AT, pcs, 0); // save return address ++ __ daddiu(T2, T2, -2*wordSize); // we'll push pc and fp, by hand ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ dsubu(SP, SP, T2); // Prolog! ++ // This value is corrected by layout_activation_impl ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable ++ __ move(sender_sp, SP); // pass to next frame ++ __ daddiu(count, count, -1); // decrement counter ++ __ daddiu(sizes, sizes, wordSize); // Bump array pointer (sizes) ++ __ addiu(pcs, pcs, wordSize); // Bump array pointer (pcs) ++ __ bne(count, R0, loop); ++ __ delayed()->nop(); // Bump array pointer (pcs) ++ ++ __ ld(RA, pcs, 0); ++ ++ // Re-push self-frame ++ // save old & set new FP ++ // save final return address ++ __ enter(); ++ ++ // Use FP because the frames look interpreted now ++ // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. ++ // Don't need the precise return PC here, just precise enough to point into this code blob. ++ address the_pc = __ pc(); ++ __ set_last_Java_frame(NOREG, FP, the_pc); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // restore return values to their stack-slots with the new SP. ++ __ move(A0, thread); ++ __ addiu(A1, R0, Deoptimization::Unpack_uncommon_trap); ++ __ patchable_call((address)Deoptimization::unpack_frames); ++ // Set an oopmap for the call site ++ oop_maps->add_gc_map( __ offset(), new OopMap( framesize, 0 ) ); ++ ++ __ reset_last_Java_frame(true); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog! ++ ++ // Jump to interpreter ++ __ jr(RA); ++ __ delayed()->nop(); ++ // ------------- ++ // make sure all code is generated ++ masm->flush(); ++ ++ _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2); ++} ++ ++#endif // COMPILER2 ++ ++//------------------------------generate_handler_blob------------------- ++// ++// Generate a special Compile2Runtime blob that saves all registers, and sets ++// up an OopMap and calls safepoint code to stop the compiled code for ++// a safepoint. ++// ++// This blob is jumped to (via a breakpoint and the signal handler) from a ++// safepoint in compiled code. ++ ++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) { ++ ++ // Account for thread arg in our frame ++ const int additional_words = 0; ++ int frame_size_in_words; ++ ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ ++ ResourceMark rm; ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map; ++ ++ // allocate space for the code ++ // setup code generation tools ++ CodeBuffer buffer ("handler_blob", 2048, 512); ++ MacroAssembler* masm = new MacroAssembler( &buffer); ++ ++ const Register thread = TREG; ++ address start = __ pc(); ++ address call_pc = NULL; ++ bool cause_return = (pool_type == POLL_AT_RETURN); ++ bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ // The following is basically a call_VM. However, we need the precise ++ // address of the call in order to generate an oopmap. Hence, we do all the ++ // work outselvs. ++ ++ __ set_last_Java_frame(NOREG, NOREG, NULL); ++ ++ if (!cause_return) { ++ // overwrite the return address pushed by save_live_registers ++ // Additionally, TSR is a callee-saved register so we can look at ++ // it later to determine if someone changed the return address for ++ // us! ++ __ ld_ptr(TSR, thread, in_bytes(JavaThread::saved_exception_pc_offset())); ++ __ st_ptr(TSR, SP, RegisterSaver::raOffset() * wordSize); ++ } ++ ++ // Do the call ++ __ move(A0, thread); ++ __ call(call_ptr); ++ __ delayed()->nop(); ++ ++ // Set an oopmap for the call site. This oopmap will map all ++ // oop-registers and debug-info registers as callee-saved. This ++ // will allow deoptimization at this safepoint to find all possible ++ // debug-info recordings, as well as let GC find all oops. ++ oop_maps->add_gc_map(__ offset(), map); ++ ++ Label noException; ++ ++ // Clear last_Java_sp again ++ __ reset_last_Java_frame(false); ++ ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, noException); ++ __ delayed()->nop(); ++ ++ // Exception pending ++ ++ RegisterSaver::restore_live_registers(masm, save_vectors); ++ //forward_exception_entry need return address on the stack ++ __ push(RA); ++ __ patchable_jump((address)StubRoutines::forward_exception_entry()); ++ ++ // No exception case ++ __ bind(noException); ++ ++ Label no_adjust, bail; ++ if (SafepointMechanism::uses_thread_local_poll() && !cause_return) { ++ // If our stashed return pc was modified by the runtime we avoid touching it ++ __ ld_ptr(AT, SP, RegisterSaver::raOffset() * wordSize); ++ __ bne(AT, TSR, no_adjust); ++ __ delayed()->nop(); ++ ++#ifdef ASSERT ++ // Verify the correct encoding of the poll we're about to skip. ++ // See NativeInstruction::is_safepoint_poll() ++ __ lwu(AT, TSR, 0); ++ __ dsrl(AT, AT, 16); ++ __ andi(AT, AT, 0xfc1f); ++ __ xori(AT, AT, 0x8c01); ++ __ bne(AT, R0, bail); ++ __ delayed()->nop(); ++#endif ++ // Adjust return pc forward to step over the safepoint poll instruction ++ __ addiu(RA, TSR, 4); // NativeInstruction::instruction_size=4 ++ __ st_ptr(RA, SP, RegisterSaver::raOffset() * wordSize); ++ } ++ ++ __ bind(no_adjust); ++ // Normal exit, register restoring and exit ++ RegisterSaver::restore_live_registers(masm, save_vectors); ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++#ifdef ASSERT ++ __ bind(bail); ++ __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); ++#endif ++ ++ // Make sure all code is generated ++ masm->flush(); ++ ++ // Fill-out other meta info ++ return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); ++} ++ ++// ++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss ++// ++// Generate a stub that calls into vm to find out the proper destination ++// of a java call. All the argument registers are live at this point ++// but since this is generic code we don't know what they are and the caller ++// must do any gc of the args. ++// ++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ ++ // allocate space for the code ++ ResourceMark rm; ++ ++ //CodeBuffer buffer(name, 1000, 512); ++ CodeBuffer buffer(name, 2000, 2048); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ int frame_size_words; ++ //we put the thread in A0 ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = NULL; ++ ++ int start = __ offset(); ++ map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); ++ ++ ++ int frame_complete = __ offset(); ++ ++#ifndef OPT_THREAD ++ const Register thread = T8; ++ __ get_thread(thread); ++#else ++ const Register thread = TREG; ++#endif ++ ++ __ move(A0, thread); ++ __ set_last_Java_frame(noreg, FP, NULL); ++ //align the stack before invoke native ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 24 + 1 * BytesPerInstWord; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ ++ __ call(destination); ++ __ delayed()->nop(); ++ ++ // Set an oopmap for the call site. ++ // We need this not only for callee-saved registers, but also for volatile ++ // registers that the compiler might be keeping live across a safepoint. ++ oop_maps->add_gc_map( __ offset() - start, map); ++ // V0 contains the address we are going to jump to assuming no exception got installed ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // clear last_Java_sp ++ __ reset_last_Java_frame(true); ++ // check for pending exceptions ++ Label pending; ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, pending); ++ __ delayed()->nop(); ++ // get the returned Method* ++ //FIXME, do mips need this ? ++ __ get_vm_result_2(Rmethod, thread); // Refer to OpenJDK8 ++ __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize); ++ __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize); ++ RegisterSaver::restore_live_registers(masm); ++ ++ // We are back the the original state on entry and ready to go the callee method. ++ __ jr(V0); ++ __ delayed()->nop(); ++ // Pending exception after the safepoint ++ ++ __ bind(pending); ++ ++ RegisterSaver::restore_live_registers(masm); ++ ++ // exception pending => remove activation and forward to exception handler ++ //forward_exception_entry need return address on the stack ++ __ push(RA); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); ++ __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ // ++ // make sure all code is generated ++ masm->flush(); ++ ++ RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); ++ return tmp; ++} ++ ++extern "C" int SpinPause() {return 0;} ++ ++ ++//------------------------------Montgomery multiplication------------------------ ++// ++ ++// Subtract 0:b from carry:a. Return carry. ++static unsigned long ++sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) { ++ long borrow = 0, t = 0; ++ unsigned long tmp0, tmp1; ++ __asm__ __volatile__ ( ++ "0: \n" ++ "ld %[tmp0], 0(%[a]) \n" ++ "ld %[tmp1], 0(%[b]) \n" ++ "sltu %[t], %[tmp0], %[borrow] \n" ++ "dsubu %[tmp0], %[tmp0], %[borrow] \n" ++ "sltu %[borrow], %[tmp0], %[tmp1] \n" ++ "or %[borrow], %[borrow], %[t] \n" ++ "dsubu %[tmp0], %[tmp0], %[tmp1] \n" ++ "sd %[tmp0], 0(%[a]) \n" ++ "daddiu %[a], %[a], 8 \n" ++ "daddiu %[b], %[b], 8 \n" ++ "daddiu %[len], %[len], -1 \n" ++ "bgtz %[len], 0b \n" ++ "dsubu %[tmp0], %[carry], %[borrow] \n" ++ : [len]"+r"(len), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [borrow]"+r"(borrow), [a]"+r"(a), [b]"+r"(b), [t]"+r"(t) ++ : [carry]"r"(carry) ++ : "memory" ++ ); ++ return tmp0; ++} ++ ++// Multiply (unsigned) Long A by Long B, accumulating the double- ++// length result into the accumulator formed of t0, t1, and t2. ++inline void MACC(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) { ++ unsigned long hi, lo, carry = 0, t = 0; ++ __asm__ __volatile__( ++ "dmultu %[A], %[B] \n" ++ "mfhi %[hi] \n" ++ "mflo %[lo] \n" ++ "daddu %[t0], %[t0], %[lo] \n" ++ "sltu %[carry], %[t0], %[lo] \n" ++ "daddu %[t1], %[t1], %[carry] \n" ++ "sltu %[t], %[t1], %[carry] \n" ++ "daddu %[t1], %[t1], %[hi] \n" ++ "sltu %[carry], %[t1], %[hi] \n" ++ "or %[carry], %[carry], %[t] \n" ++ "daddu %[t2], %[t2], %[carry] \n" ++ : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t) ++ : [A]"r"(A), [B]"r"(B) ++ : ++ ); ++} ++ ++// As above, but add twice the double-length result into the ++// accumulator. ++inline void MACC2(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) { ++ unsigned long hi, lo, carry = 0, t = 0; ++ __asm__ __volatile__( ++ "dmultu %[A], %[B] \n" ++ "mfhi %[hi] \n" ++ "mflo %[lo] \n" ++ "daddu %[t0], %[t0], %[lo] \n" ++ "sltu %[carry], %[t0], %[lo] \n" ++ "daddu %[t1], %[t1], %[carry] \n" ++ "sltu %[t], %[t1], %[carry] \n" ++ "daddu %[t1], %[t1], %[hi] \n" ++ "sltu %[carry], %[t1], %[hi] \n" ++ "or %[carry], %[carry], %[t] \n" ++ "daddu %[t2], %[t2], %[carry] \n" ++ "daddu %[t0], %[t0], %[lo] \n" ++ "sltu %[carry], %[t0], %[lo] \n" ++ "daddu %[t1], %[t1], %[carry] \n" ++ "sltu %[t], %[t1], %[carry] \n" ++ "daddu %[t1], %[t1], %[hi] \n" ++ "sltu %[carry], %[t1], %[hi] \n" ++ "or %[carry], %[carry], %[t] \n" ++ "daddu %[t2], %[t2], %[carry] \n" ++ : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t) ++ : [A]"r"(A), [B]"r"(B) ++ : ++ ); ++} ++ ++// Fast Montgomery multiplication. The derivation of the algorithm is ++// in A Cryptographic Library for the Motorola DSP56000, ++// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. ++ ++static void __attribute__((noinline)) ++montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[], ++ unsigned long m[], unsigned long inv, int len) { ++ unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator ++ int i; ++ ++ assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); ++ ++ for (i = 0; i < len; i++) { ++ int j; ++ for (j = 0; j < i; j++) { ++ MACC(a[j], b[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ MACC(a[i], b[0], t0, t1, t2); ++ m[i] = t0 * inv; ++ MACC(m[i], n[0], t0, t1, t2); ++ ++ assert(t0 == 0, "broken Montgomery multiply"); ++ ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ for (i = len; i < 2*len; i++) { ++ int j; ++ for (j = i-len+1; j < len; j++) { ++ MACC(a[j], b[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ m[i-len] = t0; ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ while (t0) ++ t0 = sub(m, n, t0, len); ++} ++ ++// Fast Montgomery squaring. This uses asymptotically 25% fewer ++// multiplies so it should be up to 25% faster than Montgomery ++// multiplication. However, its loop control is more complex and it ++// may actually run slower on some machines. ++ ++static void __attribute__((noinline)) ++montgomery_square(unsigned long a[], unsigned long n[], ++ unsigned long m[], unsigned long inv, int len) { ++ unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator ++ int i; ++ ++ assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); ++ ++ for (i = 0; i < len; i++) { ++ int j; ++ int end = (i+1)/2; ++ for (j = 0; j < end; j++) { ++ MACC2(a[j], a[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ if ((i & 1) == 0) { ++ MACC(a[j], a[j], t0, t1, t2); ++ } ++ for (; j < i; j++) { ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ m[i] = t0 * inv; ++ MACC(m[i], n[0], t0, t1, t2); ++ ++ assert(t0 == 0, "broken Montgomery square"); ++ ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ for (i = len; i < 2*len; i++) { ++ int start = i-len+1; ++ int end = start + (len - start)/2; ++ int j; ++ for (j = start; j < end; j++) { ++ MACC2(a[j], a[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ if ((i & 1) == 0) { ++ MACC(a[j], a[j], t0, t1, t2); ++ } ++ for (; j < len; j++) { ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ m[i-len] = t0; ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ while (t0) ++ t0 = sub(m, n, t0, len); ++} ++ ++// Swap words in a longword. ++static unsigned long swap(unsigned long x) { ++ return (x << 32) | (x >> 32); ++} ++ ++// Copy len longwords from s to d, word-swapping as we go. The ++// destination array is reversed. ++static void reverse_words(unsigned long *s, unsigned long *d, int len) { ++ d += len; ++ while(len-- > 0) { ++ d--; ++ *d = swap(*s); ++ s++; ++ } ++} ++ ++// The threshold at which squaring is advantageous was determined ++// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. ++// Doesn't seem to be relevant for MIPS64 so we use the same value. ++#define MONTGOMERY_SQUARING_THRESHOLD 64 ++ ++void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, ++ jint len, jlong inv, ++ jint *m_ints) { ++ assert(len % 2 == 0, "array length in montgomery_multiply must be even"); ++ int longwords = len/2; ++ ++ // Make very sure we don't use so much space that the stack might ++ // overflow. 512 jints corresponds to an 16384-bit integer and ++ // will use here a total of 8k bytes of stack space. ++ int total_allocation = longwords * sizeof (unsigned long) * 4; ++ guarantee(total_allocation <= 8192, "must be"); ++ unsigned long *scratch = (unsigned long *)alloca(total_allocation); ++ ++ // Local scratch arrays ++ unsigned long ++ *a = scratch + 0 * longwords, ++ *b = scratch + 1 * longwords, ++ *n = scratch + 2 * longwords, ++ *m = scratch + 3 * longwords; ++ ++ reverse_words((unsigned long *)a_ints, a, longwords); ++ reverse_words((unsigned long *)b_ints, b, longwords); ++ reverse_words((unsigned long *)n_ints, n, longwords); ++ ++ ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords); ++ ++ reverse_words(m, (unsigned long *)m_ints, longwords); ++} ++ ++void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, ++ jint len, jlong inv, ++ jint *m_ints) { ++ assert(len % 2 == 0, "array length in montgomery_square must be even"); ++ int longwords = len/2; ++ ++ // Make very sure we don't use so much space that the stack might ++ // overflow. 512 jints corresponds to an 16384-bit integer and ++ // will use here a total of 6k bytes of stack space. ++ int total_allocation = longwords * sizeof (unsigned long) * 3; ++ guarantee(total_allocation <= 8192, "must be"); ++ unsigned long *scratch = (unsigned long *)alloca(total_allocation); ++ ++ // Local scratch arrays ++ unsigned long ++ *a = scratch + 0 * longwords, ++ *n = scratch + 1 * longwords, ++ *m = scratch + 2 * longwords; ++ ++ reverse_words((unsigned long *)a_ints, a, longwords); ++ reverse_words((unsigned long *)n_ints, n, longwords); ++ ++ if (len >= MONTGOMERY_SQUARING_THRESHOLD) { ++ ::montgomery_square(a, n, m, (unsigned long)inv, longwords); ++ } else { ++ ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords); ++ } ++ ++ reverse_words(m, (unsigned long *)m_ints, longwords); ++} +diff --git a/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp b/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp +new file mode 100644 +index 0000000000..9fe2bc8377 +--- /dev/null ++++ b/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp +@@ -0,0 +1,2162 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/instanceOop.hpp" ++#include "oops/method.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++// Declaration and definition of StubGenerator (no .hpp file). ++// For a more detailed description of the stub routine structure ++// see the comment in stubRoutines.hpp ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) ++//#define a__ ((Assembler*)_masm)-> ++ ++//#ifdef PRODUCT ++//#define BLOCK_COMMENT(str) /* nothing */ ++//#else ++//#define BLOCK_COMMENT(str) __ block_comment(str) ++//#endif ++ ++//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions ++ ++// Stub Code definitions ++ ++class StubGenerator: public StubCodeGenerator { ++ private: ++ ++ // ABI mips n64 ++ // This fig is not MIPS ABI. It is call Java from C ABI. ++ // Call stubs are used to call Java from C ++ // ++ // [ return_from_Java ] ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ // ... ++ // -8 [ S6 ] ++ // -7 [ S5 ] ++ // -6 [ S4 ] ++ // -5 [ S3 ] ++ // -4 [ S1 ] ++ // -3 [ TSR(S2) ] ++ // -2 [ LVP(S7) ] ++ // -1 [ BCP(S1) ] ++ // 0 [ saved fp ] <--- fp_after_call ++ // 1 [ return address ] ++ // 2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp ++ // 3 [ result ] <--- a1 ++ // 4 [ result_type ] <--- a2 ++ // 5 [ method ] <--- a3 ++ // 6 [ entry_point ] <--- a4 ++ // 7 [ parameters ] <--- a5 ++ // 8 [ parameter_size ] <--- a6 ++ // 9 [ thread ] <--- a7 ++ ++ // ++ // n64 does not save paras in sp. ++ // ++ // [ return_from_Java ] ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ // ... ++ //-13 [ thread ] ++ //-12 [ result_type ] <--- a2 ++ //-11 [ result ] <--- a1 ++ //-10 [ ] ++ // -9 [ ptr. to call wrapper ] <--- a0 ++ // -8 [ S6 ] ++ // -7 [ S5 ] ++ // -6 [ S4 ] ++ // -5 [ S3 ] ++ // -4 [ S1 ] ++ // -3 [ TSR(S2) ] ++ // -2 [ LVP(S7) ] ++ // -1 [ BCP(S1) ] ++ // 0 [ saved fp ] <--- fp_after_call ++ // 1 [ return address ] ++ // 2 [ ] <--- old sp ++ // ++ // Find a right place in the call_stub for GP. ++ // GP will point to the starting point of Interpreter::dispatch_table(itos). ++ // It should be saved/restored before/after Java calls. ++ // ++ enum call_stub_layout { ++ RA_off = 1, ++ FP_off = 0, ++ BCP_off = -1, ++ LVP_off = -2, ++ TSR_off = -3, ++ S1_off = -4, ++ S3_off = -5, ++ S4_off = -6, ++ S5_off = -7, ++ S6_off = -8, ++ call_wrapper_off = -9, ++ result_off = -11, ++ result_type_off = -12, ++ thread_off = -13, ++ total_off = thread_off - 1, ++ GP_off = -14, ++ }; ++ ++ address generate_call_stub(address& return_address) { ++ ++ StubCodeMark mark(this, "StubRoutines", "call_stub"); ++ address start = __ pc(); ++ ++ // same as in generate_catch_exception()! ++ ++ // stub code ++ // save ra and fp ++ __ enter(); ++ // I think 14 is the max gap between argument and callee saved register ++ assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code"); ++ __ daddiu(SP, SP, total_off * wordSize); ++ __ sd(BCP, FP, BCP_off * wordSize); ++ __ sd(LVP, FP, LVP_off * wordSize); ++ __ sd(TSR, FP, TSR_off * wordSize); ++ __ sd(S1, FP, S1_off * wordSize); ++ __ sd(S3, FP, S3_off * wordSize); ++ __ sd(S4, FP, S4_off * wordSize); ++ __ sd(S5, FP, S5_off * wordSize); ++ __ sd(S6, FP, S6_off * wordSize); ++ __ sd(A0, FP, call_wrapper_off * wordSize); ++ __ sd(A1, FP, result_off * wordSize); ++ __ sd(A2, FP, result_type_off * wordSize); ++ __ sd(A7, FP, thread_off * wordSize); ++ __ sd(GP, FP, GP_off * wordSize); ++ ++ __ set64(GP, (long)Interpreter::dispatch_table(itos)); ++ ++#ifdef OPT_THREAD ++ __ move(TREG, A7); ++#endif ++ //add for compressedoops ++ __ reinit_heapbase(); ++ ++#ifdef ASSERT ++ // make sure we have no pending exceptions ++ { ++ Label L; ++ __ ld(AT, A7, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ /* FIXME: I do not know how to realize stop in mips arch, do it in the future */ ++ __ stop("StubRoutines::call_stub: entered with pending exception"); ++ __ bind(L); ++ } ++#endif ++ ++ // pass parameters if any ++ // A5: parameter ++ // A6: parameter_size ++ // T0: parameter_size_tmp(--) ++ // T2: offset(++) ++ // T3: tmp ++ Label parameters_done; ++ // judge if the parameter_size equals 0 ++ __ beq(A6, R0, parameters_done); ++ __ delayed()->nop(); ++ __ dsll(AT, A6, Interpreter::logStackElementSize); ++ __ dsubu(SP, SP, AT); ++ __ move(AT, -StackAlignmentInBytes); ++ __ andr(SP, SP , AT); ++ // Copy Java parameters in reverse order (receiver last) ++ // Note that the argument order is inverted in the process ++ Label loop; ++ __ move(T0, A6); ++ __ move(T2, R0); ++ __ bind(loop); ++ ++ // get parameter ++ __ dsll(T3, T0, LogBytesPerWord); ++ __ daddu(T3, T3, A5); ++ __ ld(AT, T3, -wordSize); ++ __ dsll(T3, T2, LogBytesPerWord); ++ __ daddu(T3, T3, SP); ++ __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0)); ++ __ daddiu(T2, T2, 1); ++ __ daddiu(T0, T0, -1); ++ __ bne(T0, R0, loop); ++ __ delayed()->nop(); ++ // advance to next parameter ++ ++ // call Java function ++ __ bind(parameters_done); ++ ++ // receiver in V0, methodOop in Rmethod ++ ++ __ move(Rmethod, A3); ++ __ move(Rsender, SP); //set sender sp ++ __ jalr(A4); ++ __ delayed()->nop(); ++ return_address = __ pc(); ++ ++ Label common_return; ++ __ bind(common_return); ++ ++ // store result depending on type ++ // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) ++ __ ld(T0, FP, result_off * wordSize); // result --> T0 ++ Label is_long, is_float, is_double, exit; ++ __ ld(T2, FP, result_type_off * wordSize); // result_type --> T2 ++ __ daddiu(T3, T2, (-1) * T_LONG); ++ __ beq(T3, R0, is_long); ++ __ delayed()->daddiu(T3, T2, (-1) * T_FLOAT); ++ __ beq(T3, R0, is_float); ++ __ delayed()->daddiu(T3, T2, (-1) * T_DOUBLE); ++ __ beq(T3, R0, is_double); ++ __ delayed()->nop(); ++ ++ // handle T_INT case ++ __ sd(V0, T0, 0 * wordSize); ++ __ bind(exit); ++ ++ // restore ++ __ ld(BCP, FP, BCP_off * wordSize); ++ __ ld(LVP, FP, LVP_off * wordSize); ++ __ ld(GP, FP, GP_off * wordSize); ++ __ ld(TSR, FP, TSR_off * wordSize); ++ ++ __ ld(S1, FP, S1_off * wordSize); ++ __ ld(S3, FP, S3_off * wordSize); ++ __ ld(S4, FP, S4_off * wordSize); ++ __ ld(S5, FP, S5_off * wordSize); ++ __ ld(S6, FP, S6_off * wordSize); ++ ++ __ leave(); ++ ++ // return ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ // handle return types different from T_INT ++ __ bind(is_long); ++ __ sd(V0, T0, 0 * wordSize); ++ __ b(exit); ++ __ delayed()->nop(); ++ ++ __ bind(is_float); ++ __ swc1(F0, T0, 0 * wordSize); ++ __ b(exit); ++ __ delayed()->nop(); ++ ++ __ bind(is_double); ++ __ sdc1(F0, T0, 0 * wordSize); ++ __ b(exit); ++ __ delayed()->nop(); ++ //FIXME, 1.6 mips version add operation of fpu here ++ StubRoutines::gs2::set_call_stub_compiled_return(__ pc()); ++ __ b(common_return); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Return point for a Java call if there's an exception thrown in ++ // Java code. The exception is caught and transformed into a ++ // pending exception stored in JavaThread that can be tested from ++ // within the VM. ++ // ++ // Note: Usually the parameters are removed by the callee. In case ++ // of an exception crossing an activation frame boundary, that is ++ // not the case if the callee is compiled code => need to setup the ++ // sp. ++ // ++ // V0: exception oop ++ ++ address generate_catch_exception() { ++ StubCodeMark mark(this, "StubRoutines", "catch_exception"); ++ address start = __ pc(); ++ ++ Register thread = TREG; ++ ++ // get thread directly ++#ifndef OPT_THREAD ++ __ ld(thread, FP, thread_off * wordSize); ++#endif ++ ++#ifdef ASSERT ++ // verify that threads correspond ++ { Label L; ++ __ get_thread(T8); ++ __ beq(T8, thread, L); ++ __ delayed()->nop(); ++ __ stop("StubRoutines::catch_exception: threads must correspond"); ++ __ bind(L); ++ } ++#endif ++ // set pending exception ++ __ verify_oop(V0); ++ __ sd(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ li(AT, (long)__FILE__); ++ __ sd(AT, thread, in_bytes(Thread::exception_file_offset ())); ++ __ li(AT, (long)__LINE__); ++ __ sd(AT, thread, in_bytes(Thread::exception_line_offset ())); ++ ++ // complete return to VM ++ assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); ++ __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Continuation point for runtime calls returning with a pending ++ // exception. The pending exception check happened in the runtime ++ // or native call stub. The pending exception in Thread is ++ // converted into a Java-level exception. ++ // ++ // Contract with Java-level exception handlers: ++ // V0: exception ++ // V1: throwing pc ++ // ++ // NOTE: At entry of this stub, exception-pc must be on stack !! ++ ++ address generate_forward_exception() { ++ StubCodeMark mark(this, "StubRoutines", "forward exception"); ++ //Register thread = TREG; ++ Register thread = TREG; ++ address start = __ pc(); ++ ++ // Upon entry, the sp points to the return address returning into ++ // Java (interpreted or compiled) code; i.e., the return address ++ // throwing pc. ++ // ++ // Arguments pushed before the runtime call are still on the stack ++ // but the exception handler will reset the stack pointer -> ++ // ignore them. A potential result in registers can be ignored as ++ // well. ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++#ifdef ASSERT ++ // make sure this code is only executed if there is a pending exception ++ { ++ Label L; ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("StubRoutines::forward exception: no pending exception (1)"); ++ __ bind(L); ++ } ++#endif ++ ++ // compute exception handler into T9 ++ __ ld(A1, SP, 0); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); ++ __ move(T9, V0); ++ __ pop(V1); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); ++ ++#ifdef ASSERT ++ // make sure exception is set ++ { ++ Label L; ++ __ bne(V0, R0, L); ++ __ delayed()->nop(); ++ __ stop("StubRoutines::forward exception: no pending exception (2)"); ++ __ bind(L); ++ } ++#endif ++ ++ // continue at exception handler (return address removed) ++ // V0: exception ++ // T9: exception handler ++ // V1: throwing pc ++ __ verify_oop(V0); ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Non-destructive plausibility checks for oops ++ // ++ address generate_verify_oop() { ++ StubCodeMark mark(this, "StubRoutines", "verify_oop"); ++ address start = __ pc(); ++ __ reinit_heapbase(); ++ __ verify_oop_subroutine(); ++ address end = __ pc(); ++ return start; ++ } ++ ++ // ++ // Generate overlap test for array copy stubs ++ // ++ // Input: ++ // A0 - array1 ++ // A1 - array2 ++ // A2 - element count ++ // ++ ++ // use T9 as temp ++ void array_overlap_test(address no_overlap_target, int log2_elem_size) { ++ int elem_size = 1 << log2_elem_size; ++ Address::ScaleFactor sf = Address::times_1; ++ ++ switch (log2_elem_size) { ++ case 0: sf = Address::times_1; break; ++ case 1: sf = Address::times_2; break; ++ case 2: sf = Address::times_4; break; ++ case 3: sf = Address::times_8; break; ++ } ++ ++ __ dsll(AT, A2, sf); ++ __ daddu(AT, AT, A0); ++ __ daddiu(T9, AT, -elem_size); ++ __ dsubu(AT, A1, A0); ++ __ blez(AT, no_overlap_target); ++ __ delayed()->nop(); ++ __ dsubu(AT, A1, T9); ++ __ bgtz(AT, no_overlap_target); ++ __ delayed()->nop(); ++ ++ // If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target ++ Label L; ++ __ bgez(A0, L); ++ __ delayed()->nop(); ++ __ bgtz(A1, no_overlap_target); ++ __ delayed()->nop(); ++ __ bind(L); ++ ++ } ++ ++ // ++ // Generate stub for array fill. If "aligned" is true, the ++ // "to" address is assumed to be heapword aligned. ++ // ++ // Arguments for generated stub: ++ // to: c_rarg0 ++ // value: c_rarg1 ++ // count: c_rarg2 treated as signed ++ // ++ address generate_fill(BasicType t, bool aligned, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ const Register to = A0; // source array address ++ const Register value = A1; // value ++ const Register count = A2; // elements count ++ ++ const Register cnt_words = T8; // temp register ++ ++ __ enter(); ++ ++ Label L_fill_elements, L_exit1; ++ ++ int shift = -1; ++ switch (t) { ++ case T_BYTE: ++ shift = 0; ++ __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element ++ __ dins(value, value, 8, 8); // 8 bit -> 16 bit ++ __ dins(value, value, 16, 16); // 16 bit -> 32 bit ++ __ bne(AT, R0, L_fill_elements); ++ __ delayed()->nop(); ++ break; ++ case T_SHORT: ++ shift = 1; ++ __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element ++ __ dins(value, value, 16, 16); // 16 bit -> 32 bit ++ __ bne(AT, R0, L_fill_elements); ++ __ delayed()->nop(); ++ break; ++ case T_INT: ++ shift = 2; ++ __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element ++ __ bne(AT, R0, L_fill_elements); ++ __ delayed()->nop(); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ // Align source address at 8 bytes address boundary. ++ Label L_skip_align1, L_skip_align2, L_skip_align4; ++ if (!aligned) { ++ switch (t) { ++ case T_BYTE: ++ // One byte misalignment happens only for byte arrays. ++ __ andi(AT, to, 1); ++ __ beq(AT, R0, L_skip_align1); ++ __ delayed()->nop(); ++ __ sb(value, to, 0); ++ __ daddiu(to, to, 1); ++ __ addiu32(count, count, -1); ++ __ bind(L_skip_align1); ++ // Fallthrough ++ case T_SHORT: ++ // Two bytes misalignment happens only for byte and short (char) arrays. ++ __ andi(AT, to, 1 << 1); ++ __ beq(AT, R0, L_skip_align2); ++ __ delayed()->nop(); ++ __ sh(value, to, 0); ++ __ daddiu(to, to, 2); ++ __ addiu32(count, count, -(2 >> shift)); ++ __ bind(L_skip_align2); ++ // Fallthrough ++ case T_INT: ++ // Align to 8 bytes, we know we are 4 byte aligned to start. ++ __ andi(AT, to, 1 << 2); ++ __ beq(AT, R0, L_skip_align4); ++ __ delayed()->nop(); ++ __ sw(value, to, 0); ++ __ daddiu(to, to, 4); ++ __ addiu32(count, count, -(4 >> shift)); ++ __ bind(L_skip_align4); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ ++ // ++ // Fill large chunks ++ // ++ __ srl(cnt_words, count, 3 - shift); // number of words ++ __ dinsu(value, value, 32, 32); // 32 bit -> 64 bit ++ __ sll(AT, cnt_words, 3 - shift); ++ __ subu32(count, count, AT); ++ ++ Label L_loop_begin, L_loop_not_64bytes_fill, L_loop_end; ++ __ addiu32(AT, cnt_words, -8); ++ __ bltz(AT, L_loop_not_64bytes_fill); ++ __ delayed()->nop(); ++ __ bind(L_loop_begin); ++ __ sd(value, to, 0); ++ __ sd(value, to, 8); ++ __ sd(value, to, 16); ++ __ sd(value, to, 24); ++ __ sd(value, to, 32); ++ __ sd(value, to, 40); ++ __ sd(value, to, 48); ++ __ sd(value, to, 56); ++ __ daddiu(to, to, 64); ++ __ addiu32(cnt_words, cnt_words, -8); ++ __ addiu32(AT, cnt_words, -8); ++ __ bgez(AT, L_loop_begin); ++ __ delayed()->nop(); ++ ++ __ bind(L_loop_not_64bytes_fill); ++ __ beq(cnt_words, R0, L_loop_end); ++ __ delayed()->nop(); ++ __ sd(value, to, 0); ++ __ daddiu(to, to, 8); ++ __ addiu32(cnt_words, cnt_words, -1); ++ __ b(L_loop_not_64bytes_fill); ++ __ delayed()->nop(); ++ __ bind(L_loop_end); ++ ++ // Remaining count is less than 8 bytes. Fill it by a single store. ++ // Note that the total length is no less than 8 bytes. ++ if (t == T_BYTE || t == T_SHORT) { ++ Label L_exit1; ++ __ beq(count, R0, L_exit1); ++ __ delayed()->nop(); ++ __ sll(AT, count, shift); ++ __ daddu(to, to, AT); // points to the end ++ __ sd(value, to, -8); // overwrite some elements ++ __ bind(L_exit1); ++ __ leave(); ++ __ jr(RA); ++ __ delayed()->nop(); ++ } ++ ++ // Handle copies less than 8 bytes. ++ Label L_fill_2, L_fill_4, L_exit2; ++ __ bind(L_fill_elements); ++ switch (t) { ++ case T_BYTE: ++ __ andi(AT, count, 1); ++ __ beq(AT, R0, L_fill_2); ++ __ delayed()->nop(); ++ __ sb(value, to, 0); ++ __ daddiu(to, to, 1); ++ __ bind(L_fill_2); ++ __ andi(AT, count, 1 << 1); ++ __ beq(AT, R0, L_fill_4); ++ __ delayed()->nop(); ++ __ sh(value, to, 0); ++ __ daddiu(to, to, 2); ++ __ bind(L_fill_4); ++ __ andi(AT, count, 1 << 2); ++ __ beq(AT, R0, L_exit2); ++ __ delayed()->nop(); ++ __ sw(value, to, 0); ++ break; ++ case T_SHORT: ++ __ andi(AT, count, 1); ++ __ beq(AT, R0, L_fill_4); ++ __ delayed()->nop(); ++ __ sh(value, to, 0); ++ __ daddiu(to, to, 2); ++ __ bind(L_fill_4); ++ __ andi(AT, count, 1 << 1); ++ __ beq(AT, R0, L_exit2); ++ __ delayed()->nop(); ++ __ sw(value, to, 0); ++ break; ++ case T_INT: ++ __ beq(count, R0, L_exit2); ++ __ delayed()->nop(); ++ __ sw(value, to, 0); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ __ bind(L_exit2); ++ __ leave(); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_byte_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_byte_copy(). ++ // ++ address generate_disjoint_byte_copy(bool aligned, const char * name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ ++ ++ Register tmp1 = T0; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ ++ address start = __ pc(); ++ ++ __ push(tmp1); ++ __ push(tmp2); ++ __ push(tmp3); ++ __ move(tmp1, A0); ++ __ move(tmp2, A1); ++ __ move(tmp3, A2); ++ ++ ++ Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11; ++ Label l_debug; ++ ++ __ daddiu(AT, tmp3, -9); //why the number is 9 ? ++ __ blez(AT, l_9); ++ __ delayed()->nop(); ++ ++ if (!aligned) { ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 1); ++ __ bne(AT, R0, l_9); // if arrays don't have the same alignment mod 2, do 1 element copy ++ __ delayed()->nop(); ++ ++ __ andi(AT, tmp1, 1); ++ __ beq(AT, R0, l_10); //copy 1 enlement if necessary to aligh to 2 bytes ++ __ delayed()->nop(); ++ ++ __ lb(AT, tmp1, 0); ++ __ daddiu(tmp1, tmp1, 1); ++ __ sb(AT, tmp2, 0); ++ __ daddiu(tmp2, tmp2, 1); ++ __ daddiu(tmp3, tmp3, -1); ++ __ bind(l_10); ++ ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 3); ++ __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 2 elements copy ++ __ delayed()->nop(); ++ ++ // At this point it is guaranteed that both, from and to have the same alignment mod 4. ++ ++ // Copy 2 elements if necessary to align to 4 bytes. ++ __ andi(AT, tmp1, 3); ++ __ beq(AT, R0, l_2); ++ __ delayed()->nop(); ++ ++ __ lhu(AT, tmp1, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(tmp3, tmp3, -2); ++ __ bind(l_2); ++ ++ // At this point the positions of both, from and to, are at least 4 byte aligned. ++ ++ // Copy 4 elements at a time. ++ // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 7); ++ __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned ++ __ delayed()->nop(); ++ ++ // Copy a 4 elements if necessary to align to 8 bytes. ++ __ andi(AT, tmp1, 7); ++ __ beq(AT, R0, l_7); ++ __ delayed()->nop(); ++ ++ __ lw(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -4); ++ __ sw(AT, tmp2, 0); ++ { // FasterArrayCopy ++ __ daddiu(tmp1, tmp1, 4); ++ __ daddiu(tmp2, tmp2, 4); ++ } ++ } ++ ++ __ bind(l_7); ++ ++ // Copy 4 elements at a time; either the loads or the stores can ++ // be unaligned if aligned == false. ++ ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -7); ++ __ blez(AT, l_6); // copy 4 at a time if less than 4 elements remain ++ __ delayed()->nop(); ++ ++ __ bind(l_8); ++ // For Loongson, there is 128-bit memory access. TODO ++ __ ld(AT, tmp1, 0); ++ __ sd(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 8); ++ __ daddiu(tmp2, tmp2, 8); ++ __ daddiu(tmp3, tmp3, -8); ++ __ daddiu(AT, tmp3, -8); ++ __ bgez(AT, l_8); ++ __ delayed()->nop(); ++ } ++ __ bind(l_6); ++ ++ // copy 4 bytes at a time ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -3); ++ __ blez(AT, l_1); ++ __ delayed()->nop(); ++ ++ __ bind(l_3); ++ __ lw(AT, tmp1, 0); ++ __ sw(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 4); ++ __ daddiu(tmp2, tmp2, 4); ++ __ daddiu(tmp3, tmp3, -4); ++ __ daddiu(AT, tmp3, -4); ++ __ bgez(AT, l_3); ++ __ delayed()->nop(); ++ ++ } ++ ++ // do 2 bytes copy ++ __ bind(l_1); ++ { ++ __ daddiu(AT, tmp3, -1); ++ __ blez(AT, l_9); ++ __ delayed()->nop(); ++ ++ __ bind(l_5); ++ __ lhu(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -2); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(AT, tmp3, -2); ++ __ bgez(AT, l_5); ++ __ delayed()->nop(); ++ } ++ ++ //do 1 element copy--byte ++ __ bind(l_9); ++ __ beq(R0, tmp3, l_4); ++ __ delayed()->nop(); ++ ++ { ++ __ bind(l_11); ++ __ lb(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -1); ++ __ sb(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 1); ++ __ daddiu(tmp2, tmp2, 1); ++ __ daddiu(AT, tmp3, -1); ++ __ bgez(AT, l_11); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(l_4); ++ __ pop(tmp3); ++ __ pop(tmp2); ++ __ pop(tmp1); ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_byte_copy(bool aligned, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit; ++ Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned; ++ ++ address nooverlap_target = aligned ? ++ StubRoutines::arrayof_jbyte_disjoint_arraycopy() : ++ StubRoutines::jbyte_disjoint_arraycopy(); ++ ++ array_overlap_test(nooverlap_target, 0); ++ ++ const Register from = A0; // source array address ++ const Register to = A1; // destination array address ++ const Register count = A2; // elements count ++ const Register end_from = T3; // source array end address ++ const Register end_to = T0; // destination array end address ++ const Register end_count = T1; // destination array end address ++ ++ __ push(end_from); ++ __ push(end_to); ++ __ push(end_count); ++ __ push(T8); ++ ++ // copy from high to low ++ __ move(end_count, count); ++ __ daddu(end_from, from, end_count); ++ __ daddu(end_to, to, end_count); ++ ++ // If end_from and end_to has differante alignment, unaligned copy is performed. ++ __ andi(AT, end_from, 3); ++ __ andi(T8, end_to, 3); ++ __ bne(AT, T8, l_copy_byte); ++ __ delayed()->nop(); ++ ++ // First deal with the unaligned data at the top. ++ __ bind(l_unaligned); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_from, 3); ++ __ bne(AT, R0, l_from_unaligned); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_to, 3); ++ __ beq(AT, R0, l_4_bytes_aligned); ++ __ delayed()->nop(); ++ ++ __ bind(l_from_unaligned); ++ __ lb(AT, end_from, -1); ++ __ sb(AT, end_to, -1); ++ __ daddiu(end_from, end_from, -1); ++ __ daddiu(end_to, end_to, -1); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_unaligned); ++ __ delayed()->nop(); ++ ++ // now end_to, end_from point to 4-byte aligned high-ends ++ // end_count contains byte count that is not copied. ++ // copy 4 bytes at a time ++ __ bind(l_4_bytes_aligned); ++ ++ __ move(T8, end_count); ++ __ daddiu(AT, end_count, -3); ++ __ blez(AT, l_copy_suffix); ++ __ delayed()->nop(); ++ ++ //__ andi(T8, T8, 3); ++ __ lea(end_from, Address(end_from, -4)); ++ __ lea(end_to, Address(end_to, -4)); ++ ++ __ dsrl(end_count, end_count, 2); ++ __ align(16); ++ __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes ++ __ lw(AT, end_from, 0); ++ __ sw(AT, end_to, 0); ++ __ addiu(end_from, end_from, -4); ++ __ addiu(end_to, end_to, -4); ++ __ addiu(end_count, end_count, -1); ++ __ bne(end_count, R0, l_copy_4_bytes_loop); ++ __ delayed()->nop(); ++ ++ __ b(l_copy_suffix); ++ __ delayed()->nop(); ++ // copy dwords aligned or not with repeat move ++ // l_copy_suffix ++ // copy suffix (0-3 bytes) ++ __ bind(l_copy_suffix); ++ __ andi(T8, T8, 3); ++ __ beq(T8, R0, l_exit); ++ __ delayed()->nop(); ++ __ addiu(end_from, end_from, 3); ++ __ addiu(end_to, end_to, 3); ++ __ bind(l_copy_suffix_loop); ++ __ lb(AT, end_from, 0); ++ __ sb(AT, end_to, 0); ++ __ addiu(end_from, end_from, -1); ++ __ addiu(end_to, end_to, -1); ++ __ addiu(T8, T8, -1); ++ __ bne(T8, R0, l_copy_suffix_loop); ++ __ delayed()->nop(); ++ ++ __ bind(l_copy_byte); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ __ lb(AT, end_from, -1); ++ __ sb(AT, end_to, -1); ++ __ daddiu(end_from, end_from, -1); ++ __ daddiu(end_to, end_to, -1); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_copy_byte); ++ __ delayed()->nop(); ++ ++ __ bind(l_exit); ++ __ pop(T8); ++ __ pop(end_count); ++ __ pop(end_to); ++ __ pop(end_from); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Generate stub for disjoint short copy. If "aligned" is true, the ++ // "from" and "to" addresses are assumed to be heapword aligned. ++ // ++ // Arguments for generated stub: ++ // from: A0 ++ // to: A1 ++ // elm.count: A2 treated as signed ++ // one element: 2 bytes ++ // ++ // Strategy for aligned==true: ++ // ++ // If length <= 9: ++ // 1. copy 1 elements at a time (l_5) ++ // ++ // If length > 9: ++ // 1. copy 4 elements at a time until less than 4 elements are left (l_7) ++ // 2. copy 2 elements at a time until less than 2 elements are left (l_6) ++ // 3. copy last element if one was left in step 2. (l_1) ++ // ++ // ++ // Strategy for aligned==false: ++ // ++ // If length <= 9: same as aligned==true case ++ // ++ // If length > 9: ++ // 1. continue with step 7. if the alignment of from and to mod 4 ++ // is different. ++ // 2. align from and to to 4 bytes by copying 1 element if necessary ++ // 3. at l_2 from and to are 4 byte aligned; continue with ++ // 6. if they cannot be aligned to 8 bytes because they have ++ // got different alignment mod 8. ++ // 4. at this point we know that both, from and to, have the same ++ // alignment mod 8, now copy one element if necessary to get ++ // 8 byte alignment of from and to. ++ // 5. copy 4 elements at a time until less than 4 elements are ++ // left; depending on step 3. all load/stores are aligned. ++ // 6. copy 2 elements at a time until less than 2 elements are ++ // left. (l_6) ++ // 7. copy 1 element at a time. (l_5) ++ // 8. copy last element if one was left in step 6. (l_1) ++ ++ address generate_disjoint_short_copy(bool aligned, const char * name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ ++ Register tmp1 = T0; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T8; ++ Register tmp5 = T9; ++ Register tmp6 = T2; ++ ++ address start = __ pc(); ++ ++ __ push(tmp1); ++ __ push(tmp2); ++ __ push(tmp3); ++ __ move(tmp1, A0); ++ __ move(tmp2, A1); ++ __ move(tmp3, A2); ++ ++ Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11, l_12, l_13, l_14; ++ Label l_debug; ++ // don't try anything fancy if arrays don't have many elements ++ __ daddiu(AT, tmp3, -23); ++ __ blez(AT, l_14); ++ __ delayed()->nop(); ++ // move push here ++ __ push(tmp4); ++ __ push(tmp5); ++ __ push(tmp6); ++ ++ if (!aligned) { ++ __ xorr(AT, A0, A1); ++ __ andi(AT, AT, 1); ++ __ bne(AT, R0, l_debug); // if arrays don't have the same alignment mod 2, can this happen? ++ __ delayed()->nop(); ++ ++ __ xorr(AT, A0, A1); ++ __ andi(AT, AT, 3); ++ __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 1 element copy ++ __ delayed()->nop(); ++ ++ // At this point it is guaranteed that both, from and to have the same alignment mod 4. ++ ++ // Copy 1 element if necessary to align to 4 bytes. ++ __ andi(AT, A0, 3); ++ __ beq(AT, R0, l_2); ++ __ delayed()->nop(); ++ ++ __ lhu(AT, tmp1, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(tmp3, tmp3, -1); ++ __ bind(l_2); ++ ++ // At this point the positions of both, from and to, are at least 4 byte aligned. ++ ++ // Copy 4 elements at a time. ++ // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 7); ++ __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned ++ __ delayed()->nop(); ++ ++ // Copy a 2-element word if necessary to align to 8 bytes. ++ __ andi(AT, tmp1, 7); ++ __ beq(AT, R0, l_7); ++ __ delayed()->nop(); ++ ++ __ lw(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -2); ++ __ sw(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 4); ++ __ daddiu(tmp2, tmp2, 4); ++ }// end of if (!aligned) ++ ++ __ bind(l_7); ++ // At this time the position of both, from and to, are at least 8 byte aligned. ++ // Copy 8 elemnets at a time. ++ // Align to 16 bytes, but only if both from and to have same alignment mod 8. ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 15); ++ __ bne(AT, R0, l_9); ++ __ delayed()->nop(); ++ ++ // Copy 4-element word if necessary to align to 16 bytes, ++ __ andi(AT, tmp1, 15); ++ __ beq(AT, R0, l_10); ++ __ delayed()->nop(); ++ ++ __ ld(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -4); ++ __ sd(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 8); ++ __ daddiu(tmp2, tmp2, 8); ++ ++ __ bind(l_10); ++ ++ // Copy 8 elements at a time; either the loads or the stores can ++ // be unalligned if aligned == false ++ ++ { // FasterArrayCopy ++ __ bind(l_11); ++ // For loongson the 128-bit memory access instruction is gslq/gssq ++ if (UseLEXT1) { ++ __ gslq(AT, tmp4, tmp1, 0); ++ __ gslq(tmp5, tmp6, tmp1, 16); ++ __ daddiu(tmp1, tmp1, 32); ++ __ daddiu(tmp2, tmp2, 32); ++ __ gssq(AT, tmp4, tmp2, -32); ++ __ gssq(tmp5, tmp6, tmp2, -16); ++ } else { ++ __ ld(AT, tmp1, 0); ++ __ ld(tmp4, tmp1, 8); ++ __ ld(tmp5, tmp1, 16); ++ __ ld(tmp6, tmp1, 24); ++ __ daddiu(tmp1, tmp1, 32); ++ __ sd(AT, tmp2, 0); ++ __ sd(tmp4, tmp2, 8); ++ __ sd(tmp5, tmp2, 16); ++ __ sd(tmp6, tmp2, 24); ++ __ daddiu(tmp2, tmp2, 32); ++ } ++ __ daddiu(tmp3, tmp3, -16); ++ __ daddiu(AT, tmp3, -16); ++ __ bgez(AT, l_11); ++ __ delayed()->nop(); ++ } ++ __ bind(l_9); ++ ++ // Copy 4 elements at a time; either the loads or the stores can ++ // be unaligned if aligned == false. ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -15);// loop unrolling 4 times, so if the elements should not be less than 16 ++ __ blez(AT, l_4); // copy 2 at a time if less than 16 elements remain ++ __ delayed()->nop(); ++ ++ __ bind(l_8); ++ __ ld(AT, tmp1, 0); ++ __ ld(tmp4, tmp1, 8); ++ __ ld(tmp5, tmp1, 16); ++ __ ld(tmp6, tmp1, 24); ++ __ sd(AT, tmp2, 0); ++ __ sd(tmp4, tmp2, 8); ++ __ sd(tmp5, tmp2,16); ++ __ daddiu(tmp1, tmp1, 32); ++ __ daddiu(tmp2, tmp2, 32); ++ __ daddiu(tmp3, tmp3, -16); ++ __ daddiu(AT, tmp3, -16); ++ __ bgez(AT, l_8); ++ __ delayed()->sd(tmp6, tmp2, -8); ++ } ++ __ bind(l_6); ++ ++ // copy 2 element at a time ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -7); ++ __ blez(AT, l_4); ++ __ delayed()->nop(); ++ ++ __ bind(l_3); ++ __ lw(AT, tmp1, 0); ++ __ lw(tmp4, tmp1, 4); ++ __ lw(tmp5, tmp1, 8); ++ __ lw(tmp6, tmp1, 12); ++ __ sw(AT, tmp2, 0); ++ __ sw(tmp4, tmp2, 4); ++ __ sw(tmp5, tmp2, 8); ++ __ daddiu(tmp1, tmp1, 16); ++ __ daddiu(tmp2, tmp2, 16); ++ __ daddiu(tmp3, tmp3, -8); ++ __ daddiu(AT, tmp3, -8); ++ __ bgez(AT, l_3); ++ __ delayed()->sw(tmp6, tmp2, -4); ++ } ++ ++ __ bind(l_1); ++ // do single element copy (8 bit), can this happen? ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -3); ++ __ blez(AT, l_4); ++ __ delayed()->nop(); ++ ++ __ bind(l_5); ++ __ lhu(AT, tmp1, 0); ++ __ lhu(tmp4, tmp1, 2); ++ __ lhu(tmp5, tmp1, 4); ++ __ lhu(tmp6, tmp1, 6); ++ __ sh(AT, tmp2, 0); ++ __ sh(tmp4, tmp2, 2); ++ __ sh(tmp5, tmp2, 4); ++ __ daddiu(tmp1, tmp1, 8); ++ __ daddiu(tmp2, tmp2, 8); ++ __ daddiu(tmp3, tmp3, -4); ++ __ daddiu(AT, tmp3, -4); ++ __ bgez(AT, l_5); ++ __ delayed()->sh(tmp6, tmp2, -2); ++ } ++ // single element ++ __ bind(l_4); ++ ++ __ pop(tmp6); ++ __ pop(tmp5); ++ __ pop(tmp4); ++ ++ __ bind(l_14); ++ { // FasterArrayCopy ++ __ beq(R0, tmp3, l_13); ++ __ delayed()->nop(); ++ ++ __ bind(l_12); ++ __ lhu(AT, tmp1, 0); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(tmp3, tmp3, -1); ++ __ daddiu(AT, tmp3, -1); ++ __ bgez(AT, l_12); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(l_13); ++ __ pop(tmp3); ++ __ pop(tmp2); ++ __ pop(tmp1); ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ __ bind(l_debug); ++ __ stop("generate_disjoint_short_copy should not reach here"); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we ++ // let the hardware handle it. The two or four words within dwords ++ // or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_short_copy(bool aligned, const char *name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ Label l_exit, l_copy_short, l_from_unaligned, l_unaligned, l_4_bytes_aligned; ++ ++ address nooverlap_target = aligned ? ++ StubRoutines::arrayof_jshort_disjoint_arraycopy() : ++ StubRoutines::jshort_disjoint_arraycopy(); ++ ++ array_overlap_test(nooverlap_target, 1); ++ ++ const Register from = A0; // source array address ++ const Register to = A1; // destination array address ++ const Register count = A2; // elements count ++ const Register end_from = T3; // source array end address ++ const Register end_to = T0; // destination array end address ++ const Register end_count = T1; // destination array end address ++ ++ __ push(end_from); ++ __ push(end_to); ++ __ push(end_count); ++ __ push(T8); ++ ++ // copy from high to low ++ __ move(end_count, count); ++ __ sll(AT, end_count, Address::times_2); ++ __ daddu(end_from, from, AT); ++ __ daddu(end_to, to, AT); ++ ++ // If end_from and end_to has differante alignment, unaligned copy is performed. ++ __ andi(AT, end_from, 3); ++ __ andi(T8, end_to, 3); ++ __ bne(AT, T8, l_copy_short); ++ __ delayed()->nop(); ++ ++ // First deal with the unaligned data at the top. ++ __ bind(l_unaligned); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_from, 3); ++ __ bne(AT, R0, l_from_unaligned); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_to, 3); ++ __ beq(AT, R0, l_4_bytes_aligned); ++ __ delayed()->nop(); ++ ++ // Copy 1 element if necessary to align to 4 bytes. ++ __ bind(l_from_unaligned); ++ __ lhu(AT, end_from, -2); ++ __ sh(AT, end_to, -2); ++ __ daddiu(end_from, end_from, -2); ++ __ daddiu(end_to, end_to, -2); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_unaligned); ++ __ delayed()->nop(); ++ ++ // now end_to, end_from point to 4-byte aligned high-ends ++ // end_count contains byte count that is not copied. ++ // copy 4 bytes at a time ++ __ bind(l_4_bytes_aligned); ++ ++ __ daddiu(AT, end_count, -1); ++ __ blez(AT, l_copy_short); ++ __ delayed()->nop(); ++ ++ __ lw(AT, end_from, -4); ++ __ sw(AT, end_to, -4); ++ __ addiu(end_from, end_from, -4); ++ __ addiu(end_to, end_to, -4); ++ __ addiu(end_count, end_count, -2); ++ __ b(l_4_bytes_aligned); ++ __ delayed()->nop(); ++ ++ // copy 1 element at a time ++ __ bind(l_copy_short); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ __ lhu(AT, end_from, -2); ++ __ sh(AT, end_to, -2); ++ __ daddiu(end_from, end_from, -2); ++ __ daddiu(end_to, end_to, -2); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_copy_short); ++ __ delayed()->nop(); ++ ++ __ bind(l_exit); ++ __ pop(T8); ++ __ pop(end_count); ++ __ pop(end_to); ++ __ pop(end_from); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_3, l_4, l_5, l_6, l_7; ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); ++ ++ if(!aligned) { ++ __ xorr(AT, T3, T0); ++ __ andi(AT, AT, 7); ++ __ bne(AT, R0, l_5); // not same alignment mod 8 -> copy 1 element each time ++ __ delayed()->nop(); ++ ++ __ andi(AT, T3, 7); ++ __ beq(AT, R0, l_6); //copy 2 elements each time ++ __ delayed()->nop(); ++ ++ __ lw(AT, T3, 0); ++ __ daddiu(T1, T1, -1); ++ __ sw(AT, T0, 0); ++ __ daddiu(T3, T3, 4); ++ __ daddiu(T0, T0, 4); ++ } ++ ++ { ++ __ bind(l_6); ++ __ daddiu(AT, T1, -1); ++ __ blez(AT, l_5); ++ __ delayed()->nop(); ++ ++ __ bind(l_7); ++ __ ld(AT, T3, 0); ++ __ sd(AT, T0, 0); ++ __ daddiu(T3, T3, 8); ++ __ daddiu(T0, T0, 8); ++ __ daddiu(T1, T1, -2); ++ __ daddiu(AT, T1, -2); ++ __ bgez(AT, l_7); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(l_5); ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_3); ++ __ lw(AT, T3, 0); ++ __ sw(AT, T0, 0); ++ __ addiu(T3, T3, 4); ++ __ addiu(T0, T0, 4); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_3); ++ __ delayed()->nop(); ++ ++ // exit ++ __ bind(l_4); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_2, l_4; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ address nooverlap_target; ++ ++ if (is_oop) { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_oop_disjoint_arraycopy() : ++ StubRoutines::oop_disjoint_arraycopy(); ++ } else { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_jint_disjoint_arraycopy() : ++ StubRoutines::jint_disjoint_arraycopy(); ++ } ++ ++ array_overlap_test(nooverlap_target, 2); ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ // no registers are destroyed by this call ++ bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ // T3: source array address ++ // T0: destination array address ++ // T1: element count ++ ++ __ sll(AT, T1, Address::times_4); ++ __ addu(AT, T3, AT); ++ __ daddiu(T3, AT, -4); ++ __ sll(AT, T1, Address::times_4); ++ __ addu(AT, T0, AT); ++ __ daddiu(T0, AT, -4); ++ ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_2); ++ __ lw(AT, T3, 0); ++ __ sw(AT, T0, 0); ++ __ addiu(T3, T3, -4); ++ __ addiu(T0, T0, -4); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_2); ++ __ delayed()->nop(); ++ ++ __ bind(l_4); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_3, l_4; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ // T3: source array address ++ // T0: destination array address ++ // T1: element count ++ ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_3); ++ __ ld(AT, T3, 0); ++ __ sd(AT, T0, 0); ++ __ addiu(T3, T3, 8); ++ __ addiu(T0, T0, 8); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_3); ++ __ delayed()->nop(); ++ ++ // exit ++ __ bind(l_4); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_2, l_4; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ address nooverlap_target; ++ ++ if (is_oop) { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_oop_disjoint_arraycopy() : ++ StubRoutines::oop_disjoint_arraycopy(); ++ } else { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_jlong_disjoint_arraycopy() : ++ StubRoutines::jlong_disjoint_arraycopy(); ++ } ++ ++ array_overlap_test(nooverlap_target, 3); ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T3, AT); ++ __ daddiu(T3, AT, -8); ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T0, AT); ++ __ daddiu(T0, AT, -8); ++ ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_2); ++ __ ld(AT, T3, 0); ++ __ sd(AT, T0, 0); ++ __ addiu(T3, T3, -8); ++ __ addiu(T0, T0, -8); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_2); ++ __ delayed()->nop(); ++ ++ // exit ++ __ bind(l_4); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ //FIXME ++ address generate_disjoint_long_copy(bool aligned, const char *name) { ++ Label l_1, l_2; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ b(l_2); ++ __ delayed()->nop(); ++ __ align(16); ++ __ bind(l_1); ++ __ ld(AT, T3, 0); ++ __ sd (AT, T0, 0); ++ __ addiu(T3, T3, 8); ++ __ addiu(T0, T0, 8); ++ __ bind(l_2); ++ __ addiu(T1, T1, -1); ++ __ bgez(T1, l_1); ++ __ delayed()->nop(); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ ++ address generate_conjoint_long_copy(bool aligned, const char *name) { ++ Label l_1, l_2; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ address nooverlap_target = aligned ? ++ StubRoutines::arrayof_jlong_disjoint_arraycopy() : ++ StubRoutines::jlong_disjoint_arraycopy(); ++ array_overlap_test(nooverlap_target, 3); ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T3, AT); ++ __ daddiu(T3, AT, -8); ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T0, AT); ++ __ daddiu(T0, AT, -8); ++ ++ __ b(l_2); ++ __ delayed()->nop(); ++ __ align(16); ++ __ bind(l_1); ++ __ ld(AT, T3, 0); ++ __ sd (AT, T0, 0); ++ __ addiu(T3, T3, -8); ++ __ addiu(T0, T0,-8); ++ __ bind(l_2); ++ __ addiu(T1, T1, -1); ++ __ bgez(T1, l_1); ++ __ delayed()->nop(); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ void generate_arraycopy_stubs() { ++ if (UseCompressedOops) { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, ++ "oop_disjoint_arraycopy"); ++ StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, ++ "oop_arraycopy"); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, ++ "oop_disjoint_arraycopy_uninit", true); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, ++ "oop_arraycopy_uninit", true); ++ } else { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, ++ "oop_disjoint_arraycopy"); ++ StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, ++ "oop_arraycopy"); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, ++ "oop_disjoint_arraycopy_uninit", true); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, ++ "oop_arraycopy_uninit", true); ++ } ++ ++ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy"); ++ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); ++ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy"); ++ StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy"); ++ ++ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy"); ++ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); ++ StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy"); ++ StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy"); ++ ++ // We don't generate specialized code for HeapWord-aligned source ++ // arrays, so just use the code we've already generated ++ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy; ++ StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy; ++ ++ StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy; ++ StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy; ++ ++ StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; ++ StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; ++ ++ StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; ++ StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; ++ StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; ++ StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; ++ ++ StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); ++ StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); ++ StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); ++ StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); ++ StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); ++ StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); ++ } ++ ++ // add a function to implement SafeFetch32 and SafeFetchN ++ void generate_safefetch(const char* name, int size, address* entry, ++ address* fault_pc, address* continuation_pc) { ++ // safefetch signatures: ++ // int SafeFetch32(int* adr, int errValue); ++ // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); ++ // ++ // arguments: ++ // A0 = adr ++ // A1 = errValue ++ // ++ // result: ++ // PPC_RET = *adr or errValue ++ ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ // Entry point, pc or function descriptor. ++ *entry = __ pc(); ++ ++ // Load *adr into A1, may fault. ++ *fault_pc = __ pc(); ++ switch (size) { ++ case 4: ++ // int32_t ++ __ lw(A1, A0, 0); ++ break; ++ case 8: ++ // int64_t ++ __ ld(A1, A0, 0); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ // return errValue or *adr ++ *continuation_pc = __ pc(); ++ __ addu(V0,A1,R0); ++ __ jr(RA); ++ __ delayed()->nop(); ++ } ++ ++ ++#undef __ ++#define __ masm-> ++ ++ // Continuation point for throwing of implicit exceptions that are ++ // not handled in the current activation. Fabricates an exception ++ // oop and initiates normal exception dispatching in this ++ // frame. Since we need to preserve callee-saved values (currently ++ // only for C2, but done for C1 as well) we need a callee-saved oop ++ // map and therefore have to make these stubs into RuntimeStubs ++ // rather than BufferBlobs. If the compiler needs all registers to ++ // be preserved between the fault point and the exception handler ++ // then it must assume responsibility for that in ++ // AbstractCompiler::continuation_for_implicit_null_exception or ++ // continuation_for_implicit_division_by_zero_exception. All other ++ // implicit exceptions (e.g., NullPointerException or ++ // AbstractMethodError on entry) are either at call sites or ++ // otherwise assume that stack unwinding will be initiated, so ++ // caller saved registers were assumed volatile in the compiler. ++ address generate_throw_exception(const char* name, ++ address runtime_entry, ++ bool restore_saved_exception_pc) { ++ // Information about frame layout at time of blocking runtime call. ++ // Note that we only have to preserve callee-saved registers since ++ // the compilers are responsible for supplying a continuation point ++ // if they expect all registers to be preserved. ++ enum layout { ++ thread_off, // last_java_sp ++ S7_off, // callee saved register sp + 1 ++ S6_off, // callee saved register sp + 2 ++ S5_off, // callee saved register sp + 3 ++ S4_off, // callee saved register sp + 4 ++ S3_off, // callee saved register sp + 5 ++ S2_off, // callee saved register sp + 6 ++ S1_off, // callee saved register sp + 7 ++ S0_off, // callee saved register sp + 8 ++ FP_off, ++ ret_address, ++ framesize ++ }; ++ ++ int insts_size = 2048; ++ int locs_size = 32; ++ ++ // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false, ++ // NULL, NULL, NULL, false, NULL, name, false); ++ CodeBuffer code (name , insts_size, locs_size); ++ OopMapSet* oop_maps = new OopMapSet(); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ ++ address start = __ pc(); ++ ++ // This is an inlined and slightly modified version of call_VM ++ // which has the ability to fetch the return PC out of ++ // thread-local storage and also sets up last_Java_sp slightly ++ // differently than the real call_VM ++#ifndef OPT_THREAD ++ Register java_thread = TREG; ++ __ get_thread(java_thread); ++#else ++ Register java_thread = TREG; ++#endif ++ if (restore_saved_exception_pc) { ++ __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); ++ } ++ ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ addiu(SP, SP, (-1) * (framesize-2) * wordSize); // prolog ++ __ sd(S0, SP, S0_off * wordSize); ++ __ sd(S1, SP, S1_off * wordSize); ++ __ sd(S2, SP, S2_off * wordSize); ++ __ sd(S3, SP, S3_off * wordSize); ++ __ sd(S4, SP, S4_off * wordSize); ++ __ sd(S5, SP, S5_off * wordSize); ++ __ sd(S6, SP, S6_off * wordSize); ++ __ sd(S7, SP, S7_off * wordSize); ++ ++ int frame_complete = __ pc() - start; ++ // push java thread (becomes first argument of C function) ++ __ sd(java_thread, SP, thread_off * wordSize); ++ if (java_thread != A0) ++ __ move(A0, java_thread); ++ ++ // Set up last_Java_sp and last_Java_fp ++ __ set_last_Java_frame(java_thread, SP, FP, NULL); ++ // Align stack ++ __ set64(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ ++ // Call runtime ++ __ call(runtime_entry); ++ __ delayed()->nop(); ++ // Generate oop map ++ OopMap* map = new OopMap(framesize, 0); ++ oop_maps->add_gc_map(__ offset(), map); ++ ++ // restore the thread (cannot use the pushed argument since arguments ++ // may be overwritten by C code generated by an optimizing compiler); ++ // however can use the register value directly if it is callee saved. ++#ifndef OPT_THREAD ++ __ get_thread(java_thread); ++#endif ++ ++ __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ reset_last_Java_frame(java_thread, true); ++ ++ // Restore callee save registers. This must be done after resetting the Java frame ++ __ ld(S0, SP, S0_off * wordSize); ++ __ ld(S1, SP, S1_off * wordSize); ++ __ ld(S2, SP, S2_off * wordSize); ++ __ ld(S3, SP, S3_off * wordSize); ++ __ ld(S4, SP, S4_off * wordSize); ++ __ ld(S5, SP, S5_off * wordSize); ++ __ ld(S6, SP, S6_off * wordSize); ++ __ ld(S7, SP, S7_off * wordSize); ++ ++ // discard arguments ++ __ move(SP, FP); // epilog ++ __ pop(FP); ++ ++ // check for pending exceptions ++#ifdef ASSERT ++ Label L; ++ __ ld(AT, java_thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ should_not_reach_here(); ++ __ bind(L); ++#endif //ASSERT ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, ++ &code, ++ frame_complete, ++ framesize, ++ oop_maps, false); ++ return stub->entry_point(); ++ } ++ ++ // Initialization ++ void generate_initial() { ++ // Generates all stubs and initializes the entry points ++ ++ //------------------------------------------------------------- ++ //----------------------------------------------------------- ++ // entry points that exist in all platforms ++ // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller ++ // than the disadvantage of having a much more complicated generator structure. ++ // See also comment in stubRoutines.hpp. ++ StubRoutines::_forward_exception_entry = generate_forward_exception(); ++ StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); ++ // is referenced by megamorphic call ++ StubRoutines::_catch_exception_entry = generate_catch_exception(); ++ ++ StubRoutines::_throw_StackOverflowError_entry = ++ generate_throw_exception("StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), ++ false); ++ StubRoutines::_throw_delayed_StackOverflowError_entry = ++ generate_throw_exception("delayed StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError), ++ false); ++ } ++ ++ void generate_all() { ++ // Generates all stubs and initializes the entry points ++ ++ // These entry points require SharedInfo::stack0 to be set up in ++ // non-core builds and need to be relocatable, so they each ++ // fabricate a RuntimeStub internally. ++ StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); ++ ++ StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false); ++ ++ StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); ++ ++ // entry points that are platform specific ++ ++ // support for verify_oop (must happen after universe_init) ++ StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); ++#ifndef CORE ++ // arraycopy stubs used by compilers ++ generate_arraycopy_stubs(); ++#endif ++ ++ // Safefetch stubs. ++ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, ++ &StubRoutines::_safefetch32_fault_pc, ++ &StubRoutines::_safefetch32_continuation_pc); ++ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, ++ &StubRoutines::_safefetchN_fault_pc, ++ &StubRoutines::_safefetchN_continuation_pc); ++ ++#ifdef COMPILER2 ++ if (UseMontgomeryMultiplyIntrinsic) { ++ StubRoutines::_montgomeryMultiply ++ = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); ++ } ++ if (UseMontgomerySquareIntrinsic) { ++ StubRoutines::_montgomerySquare ++ = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square); ++ } ++#endif ++ } ++ ++ public: ++ StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { ++ if (all) { ++ generate_all(); ++ } else { ++ generate_initial(); ++ } ++ } ++}; // end class declaration ++ ++void StubGenerator_generate(CodeBuffer* code, bool all) { ++ StubGenerator g(code, all); ++} +diff --git a/src/hotspot/cpu/mips/stubRoutines_mips.hpp b/src/hotspot/cpu/mips/stubRoutines_mips.hpp +new file mode 100644 +index 0000000000..920c08844e +--- /dev/null ++++ b/src/hotspot/cpu/mips/stubRoutines_mips.hpp +@@ -0,0 +1,59 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP ++#define CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP ++ ++// This file holds the platform specific parts of the StubRoutines ++// definition. See stubRoutines.hpp for a description on how to ++// extend it. ++ ++static bool returns_to_call_stub(address return_pc){ ++ return return_pc == _call_stub_return_address||return_pc == gs2::get_call_stub_compiled_return(); ++} ++ ++enum platform_dependent_constants { ++ code_size1 = 20000, // simply increase if too small (assembler will crash if too small) ++ code_size2 = 40000 // simply increase if too small (assembler will crash if too small) ++}; ++ ++class gs2 { ++ friend class StubGenerator; ++ friend class VMStructs; ++ private: ++ // If we call compiled code directly from the call stub we will ++ // need to adjust the return back to the call stub to a specialized ++ // piece of code that can handle compiled results and cleaning the fpu ++ // stack. The variable holds that location. ++ static address _call_stub_compiled_return; ++ ++public: ++ // Call back points for traps in compiled code ++ static address get_call_stub_compiled_return() { return _call_stub_compiled_return; } ++ static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; } ++ ++}; ++ ++#endif // CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP +diff --git a/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp b/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp +new file mode 100644 +index 0000000000..358d580d52 +--- /dev/null ++++ b/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++ ++// a description of how to extend it, see the stubRoutines.hpp file. ++ ++//find the last fp value ++address StubRoutines::gs2::_call_stub_compiled_return = NULL; +diff --git a/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp b/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp +new file mode 100644 +index 0000000000..19e2f29c59 +--- /dev/null ++++ b/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp +@@ -0,0 +1,2149 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/bytecodeHistogram.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/templateInterpreterGenerator.hpp" ++#include "interpreter/templateTable.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "runtime/timer.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/debug.hpp" ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++int TemplateInterpreter::InterpreterCodeSize = 500 * K; ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif ++ ++address TemplateInterpreterGenerator::generate_slow_signature_handler() { ++ address entry = __ pc(); ++ ++ // Rmethod: method ++ // LVP: pointer to locals ++ // A3: first stack arg ++ __ move(A3, SP); ++ __ daddiu(SP, SP, -10 * wordSize); ++ __ sd(RA, SP, 0); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::slow_signature_handler), ++ Rmethod, LVP, A3); ++ ++ // V0: result handler ++ ++ // Stack layout: ++ // ... ++ // 10 stack arg0 <--- old sp ++ // 9 float/double identifiers ++ // 8 register arg7 ++ // ... ++ // 2 register arg1 ++ // 1 aligned slot ++ // SP: 0 return address ++ ++ // Do FP first so we can use T3 as temp ++ __ ld(T3, Address(SP, 9 * wordSize)); // float/double identifiers ++ ++ // A0 is for env. ++ // If the mothed is not static, A1 will be corrected in generate_native_entry. ++ for ( int i = 1; i < Argument::n_register_parameters; i++ ) { ++ Register reg = as_Register(i + A0->encoding()); ++ FloatRegister floatreg = as_FloatRegister(i + F12->encoding()); ++ Label isfloatordouble, isdouble, next; ++ ++ __ andi(AT, T3, 1 << (i*2)); // Float or Double? ++ __ bne(AT, R0, isfloatordouble); ++ __ delayed()->nop(); ++ ++ // Do Int register here ++ __ ld(reg, SP, (1 + i) * wordSize); ++ __ b (next); ++ __ delayed()->nop(); ++ ++ __ bind(isfloatordouble); ++ __ andi(AT, T3, 1 << ((i*2)+1)); // Double? ++ __ bne(AT, R0, isdouble); ++ __ delayed()->nop(); ++ ++ // Do Float Here ++ __ lwc1(floatreg, SP, (1 + i) * wordSize); ++ __ b(next); ++ __ delayed()->nop(); ++ ++ // Do Double here ++ __ bind(isdouble); ++ __ ldc1(floatreg, SP, (1 + i) * wordSize); ++ ++ __ bind(next); ++ } ++ ++ __ ld(RA, SP, 0); ++ __ daddiu(SP, SP, 10 * wordSize); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return entry; ++} ++ ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.update(int crc, int b) ++ */ ++address TemplateInterpreterGenerator::generate_CRC32_update_entry() { ++ if (UseCRC32Intrinsics) { ++ address entry = __ pc(); ++ Unimplemented(); ++ return entry; ++ } ++ return NULL; ++} ++ ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) ++ * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) ++ */ ++address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { ++ if (UseCRC32Intrinsics) { ++ address entry = __ pc(); ++ Unimplemented(); ++ return entry; ++ } ++ return NULL; ++} ++ ++/** ++* Method entry for static (non-native) methods: ++* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) ++* int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end) ++*/ ++address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { ++ if (UseCRC32CIntrinsics) { ++ address entry = __ pc(); ++ Unimplemented(); ++ return entry; ++ } ++ return NULL; ++} ++ ++// ++// Various method entries ++// ++ ++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { ++ if (!InlineIntrinsics) return NULL; // Generate a vanilla entry ++ ++ // These don't need a safepoint check because they aren't virtually ++ // callable. We won't enter these intrinsics from compiled code. ++ // If in the future we added an intrinsic which was virtually callable ++ // we'd have to worry about how to safepoint so that this code is used. ++ ++ // mathematical functions inlined by compiler ++ // (interpreter must provide identical implementation ++ // in order to avoid monotonicity bugs when switching ++ // from interpreter to compiler in the middle of some ++ // computation) ++ // ++ // stack: ++ // [ arg ] <-- sp ++ // [ arg ] ++ // retaddr in ra ++ ++ address entry_point = NULL; ++ switch (kind) { ++ case Interpreter::java_lang_math_abs: ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 0); ++ __ abs_d(F0, F12); ++ __ move(SP, Rsender); ++ break; ++ case Interpreter::java_lang_math_sqrt: ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 0); ++ __ sqrt_d(F0, F12); ++ __ move(SP, Rsender); ++ break; ++ case Interpreter::java_lang_math_sin : ++ case Interpreter::java_lang_math_cos : ++ case Interpreter::java_lang_math_tan : ++ case Interpreter::java_lang_math_log : ++ case Interpreter::java_lang_math_log10 : ++ case Interpreter::java_lang_math_exp : ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 0); ++ __ move(SP, Rsender); ++ __ dmtc1(RA, F24); ++ __ dmtc1(SP, F25); ++ __ dins(SP, R0, 0, exact_log2(StackAlignmentInBytes)); ++ generate_transcendental_entry(kind, 1); ++ __ dmfc1(SP, F25); ++ __ dmfc1(RA, F24); ++ break; ++ case Interpreter::java_lang_math_pow : ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 2 * Interpreter::stackElementSize); ++ __ ldc1(F13, SP, 0); ++ __ move(SP, Rsender); ++ __ dmtc1(RA, F24); ++ __ dmtc1(SP, F25); ++ __ dins(SP, R0, 0, exact_log2(StackAlignmentInBytes)); ++ generate_transcendental_entry(kind, 2); ++ __ dmfc1(SP, F25); ++ __ dmfc1(RA, F24); ++ break; ++ case Interpreter::java_lang_math_fmaD : ++ if (UseFMA) { ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 4 * Interpreter::stackElementSize); ++ __ ldc1(F13, SP, 2 * Interpreter::stackElementSize); ++ __ ldc1(F14, SP, 0); ++ __ madd_d(F0, F14, F13, F12); ++ __ move(SP, Rsender); ++ } ++ break; ++ case Interpreter::java_lang_math_fmaF : ++ if (UseFMA) { ++ entry_point = __ pc(); ++ __ lwc1(F12, SP, 2 * Interpreter::stackElementSize); ++ __ lwc1(F13, SP, Interpreter::stackElementSize); ++ __ lwc1(F14, SP, 0); ++ __ madd_s(F0, F14, F13, F12); ++ __ move(SP, Rsender); ++ } ++ break; ++ default: ++ ; ++ } ++ if (entry_point) { ++ __ jr(RA); ++ __ delayed()->nop(); ++ } ++ ++ return entry_point; ++} ++ ++ // double trigonometrics and transcendentals ++ // static jdouble dsin(jdouble x); ++ // static jdouble dcos(jdouble x); ++ // static jdouble dtan(jdouble x); ++ // static jdouble dlog(jdouble x); ++ // static jdouble dlog10(jdouble x); ++ // static jdouble dexp(jdouble x); ++ // static jdouble dpow(jdouble x, jdouble y); ++ ++void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs) { ++ address fn; ++ switch (kind) { ++ case Interpreter::java_lang_math_sin : ++ if (StubRoutines::dsin() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin()); ++ } ++ break; ++ case Interpreter::java_lang_math_cos : ++ if (StubRoutines::dcos() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos()); ++ } ++ break; ++ case Interpreter::java_lang_math_tan : ++ if (StubRoutines::dtan() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan()); ++ } ++ break; ++ case Interpreter::java_lang_math_log : ++ if (StubRoutines::dlog() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog()); ++ } ++ break; ++ case Interpreter::java_lang_math_log10 : ++ if (StubRoutines::dlog10() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10()); ++ } ++ break; ++ case Interpreter::java_lang_math_exp : ++ if (StubRoutines::dexp() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp()); ++ } ++ break; ++ case Interpreter::java_lang_math_pow : ++ if (StubRoutines::dpow() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow()); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ fn = NULL; // unreachable ++ } ++ __ li(T9, fn); ++ __ jalr(T9); ++ __ delayed()->nop(); ++} ++ ++// Abstract method entry ++// Attempt to execute abstract method. Throw exception ++address TemplateInterpreterGenerator::generate_abstract_entry(void) { ++ ++ // Rmethod: methodOop ++ // V0: receiver (unused) ++ // Rsender : sender 's sp ++ address entry_point = __ pc(); ++ ++ // abstract method entry ++ // throw exception ++ // adjust stack to what a normal return would do ++ __ empty_expression_stack(); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorWithMethod), Rmethod); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ return entry_point; ++} ++ ++ ++const int method_offset = frame::interpreter_frame_method_offset * wordSize; ++const int bci_offset = frame::interpreter_frame_bcp_offset * wordSize; ++const int locals_offset = frame::interpreter_frame_locals_offset * wordSize; ++ ++//----------------------------------------------------------------------------- ++ ++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { ++ address entry = __ pc(); ++ ++#ifdef ASSERT ++ { ++ Label L; ++ __ addiu(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ subu(T1, T1, SP); // T1 = maximal sp for current fp ++ __ bgez(T1, L); // check if frame is complete ++ __ delayed()->nop(); ++ __ stop("interpreter frame not set up"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ // Restore bcp under the assumption that the current frame is still ++ // interpreted ++ // FIXME: please change the func restore_bcp ++ // S0 is the conventional register for bcp ++ __ restore_bcp(); ++ ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // throw exception ++ // FIXME: why do not pass parameter thread ? ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() { ++ address entry = __ pc(); ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // ??? convention: expect array in register A1 ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ClassCastException_handler() { ++ address entry = __ pc(); ++ ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ __ empty_FPU_stack(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException), FSR); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_exception_handler_common( ++ const char* name, const char* message, bool pass_oop) { ++ assert(!pass_oop || message == NULL, "either oop or message but not both"); ++ address entry = __ pc(); ++ ++ // expression stack must be empty before entering the VM if an exception happened ++ __ empty_expression_stack(); ++ // setup parameters ++ __ li(A1, (long)name); ++ if (pass_oop) { ++ __ call_VM(V0, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR); ++ } else { ++ __ li(A2, (long)message); ++ __ call_VM(V0, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2); ++ } ++ // throw exception ++ __ jmp(Interpreter::throw_exception_entry(), relocInfo::none); ++ __ delayed()->nop(); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { ++ ++ address entry = __ pc(); ++ ++ // Restore stack bottom in case i2c adjusted stack ++ __ ld(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); ++ // and NULL it as marker that sp is now tos until next java call ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ __ restore_bcp(); ++ __ restore_locals(); ++ ++ // mdp: T8 ++ // ret: FSR ++ // tmp: T9 ++ if (state == atos) { ++ Register mdp = T8; ++ Register tmp = T9; ++ __ profile_return_type(mdp, FSR, tmp); ++ } ++ ++ ++ const Register cache = T9; ++ const Register index = T3; ++ __ get_cache_and_index_at_bcp(cache, index, 1, index_size); ++ ++ const Register flags = cache; ++ __ dsll(AT, index, Address::times_ptr); ++ __ daddu(AT, cache, AT); ++ __ lw(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask); ++ __ dsll(AT, flags, Interpreter::logStackElementSize); ++ __ daddu(SP, SP, AT); ++ ++ Register java_thread; ++#ifndef OPT_THREAD ++ java_thread = T9; ++ __ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ ++ __ check_and_handle_popframe(java_thread); ++ __ check_and_handle_earlyret(java_thread); ++ ++ __ dispatch_next(state, step); ++ ++ return entry; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, ++ int step, ++ address continuation) { ++ address entry = __ pc(); ++ // NULL last_sp until next java call ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ restore_bcp(); ++ __ restore_locals(); ++ // handle exceptions ++ { ++ Label L; ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ if (continuation == NULL) { ++ __ dispatch_next(state, step); ++ } else { ++ __ jump_to_entry(continuation); ++ __ delayed()->nop(); ++ } ++ return entry; ++} ++ ++int AbstractInterpreter::BasicType_as_index(BasicType type) { ++ int i = 0; ++ switch (type) { ++ case T_BOOLEAN: i = 0; break; ++ case T_CHAR : i = 1; break; ++ case T_BYTE : i = 2; break; ++ case T_SHORT : i = 3; break; ++ case T_INT : // fall through ++ case T_LONG : // fall through ++ case T_VOID : i = 4; break; ++ case T_FLOAT : i = 5; break; ++ case T_DOUBLE : i = 6; break; ++ case T_OBJECT : // fall through ++ case T_ARRAY : i = 7; break; ++ default : ShouldNotReachHere(); ++ } ++ assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, ++ "index out of bounds"); ++ return i; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_result_handler_for( ++ BasicType type) { ++ address entry = __ pc(); ++ switch (type) { ++ case T_BOOLEAN: __ c2bool(V0); break; ++ case T_CHAR : __ andi(V0, V0, 0xFFFF); break; ++ case T_BYTE : __ sign_extend_byte (V0); break; ++ case T_SHORT : __ sign_extend_short(V0); break; ++ case T_INT : /* nothing to do */ break; ++ case T_FLOAT : /* nothing to do */ break; ++ case T_DOUBLE : /* nothing to do */ break; ++ case T_OBJECT : ++ { ++ __ ld(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ verify_oop(V0); // and verify it ++ } ++ break; ++ default : ShouldNotReachHere(); ++ } ++ __ jr(RA); // return from result handler ++ __ delayed()->nop(); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_safept_entry_for( ++ TosState state, ++ address runtime_entry) { ++ address entry = __ pc(); ++ __ push(state); ++ __ call_VM(noreg, runtime_entry); ++ __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); ++ return entry; ++} ++ ++ ++ ++// Helpers for commoning out cases in the various type of method entries. ++// ++ ++ ++// increment invocation count & check for overflow ++// ++// Note: checking for negative value instead of overflow ++// so we have a 'sticky' overflow test ++// ++// prerequisites : method in T0, invocation counter in T3 ++void TemplateInterpreterGenerator::generate_counter_incr( ++ Label* overflow, ++ Label* profile_method, ++ Label* profile_method_continue) { ++ Label done; ++ const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset()) ++ + in_bytes(InvocationCounter::counter_offset())); ++ const Address backedge_counter (FSR, in_bytes(MethodCounters::backedge_counter_offset()) ++ + in_bytes(InvocationCounter::counter_offset())); ++ ++ __ get_method_counters(Rmethod, FSR, done); ++ ++ if (ProfileInterpreter) { // %%% Merge this into methodDataOop ++ __ lw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); ++ __ incrementl(T9, 1); ++ __ sw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); ++ } ++ // Update standard invocation counters ++ __ lw(T3, invocation_counter); ++ __ increment(T3, InvocationCounter::count_increment); ++ __ sw(T3, invocation_counter); // save invocation count ++ ++ __ lw(FSR, backedge_counter); // load backedge counter ++ __ li(AT, InvocationCounter::count_mask_value); // mask out the status bits ++ __ andr(FSR, FSR, AT); ++ ++ __ daddu(T3, T3, FSR); // add both counters ++ ++ if (ProfileInterpreter && profile_method != NULL) { ++ // Test to see if we should create a method data oop ++ if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) { ++ __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T3, AT); ++ } ++ ++ __ bne_far(AT, R0, *profile_method_continue); ++ __ delayed()->nop(); ++ ++ // if no method data exists, go to profile_method ++ __ test_method_data_pointer(FSR, *profile_method); ++ } ++ ++ if (Assembler::is_simm16(CompileThreshold)) { ++ __ srl(AT, T3, InvocationCounter::count_shift); ++ __ slti(AT, AT, CompileThreshold); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T3, AT); ++ } ++ ++ __ beq_far(AT, R0, *overflow); ++ __ delayed()->nop(); ++ __ bind(done); ++} ++ ++void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { ++ ++ // Asm interpreter on entry ++ // S7 - locals ++ // S0 - bcp ++ // Rmethod - method ++ // FP - interpreter frame ++ ++ // On return (i.e. jump to entry_point) ++ // Rmethod - method ++ // RA - return address of interpreter caller ++ // tos - the last parameter to Java method ++ // SP - sender_sp ++ ++ // the bcp is valid if and only if it's not null ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), R0); ++ __ ld(Rmethod, FP, method_offset); ++ // Preserve invariant that S0/S7 contain bcp/locals of sender frame ++ __ b_far(do_continue); ++ __ delayed()->nop(); ++} ++ ++// See if we've got enough room on the stack for locals plus overhead. ++// The expression stack grows down incrementally, so the normal guard ++// page mechanism will work for that. ++// ++// NOTE: Since the additional locals are also always pushed (wasn't ++// obvious in generate_method_entry) so the guard should work for them ++// too. ++// ++// Args: ++// T2: number of additional locals this frame needs (what we must check) ++// T0: Method* ++// ++void TemplateInterpreterGenerator::generate_stack_overflow_check(void) { ++ // see if we've got enough room on the stack for locals plus overhead. ++ // the expression stack grows down incrementally, so the normal guard ++ // page mechanism will work for that. ++ // ++ // Registers live on entry: ++ // ++ // T0: Method* ++ // T2: number of additional locals this frame needs (what we must check) ++ ++ // NOTE: since the additional locals are also always pushed (wasn't obvious in ++ // generate_method_entry) so the guard should work for them too. ++ // ++ ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++ // total overhead size: entry_size + (saved fp thru expr stack bottom). ++ // be sure to change this if you add/subtract anything to/from the overhead area ++ const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize) ++ + entry_size; ++ ++ const int page_size = os::vm_page_size(); ++ ++ Label after_frame_check; ++ ++ // see if the frame is greater than one page in size. If so, ++ // then we need to verify there is enough stack space remaining ++ // for the additional locals. ++ __ move(AT, (page_size - overhead_size) / Interpreter::stackElementSize); ++ __ slt(AT, AT, T2); ++ __ beq(AT, R0, after_frame_check); ++ __ delayed()->nop(); ++ ++ // compute sp as if this were going to be the last frame on ++ // the stack before the red zone ++#ifndef OPT_THREAD ++ Register thread = T1; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ++ // locals + overhead, in bytes ++ __ dsll(T3, T2, Interpreter::logStackElementSize); ++ __ daddiu(T3, T3, overhead_size); // locals * 4 + overhead_size --> T3 ++ ++#ifdef ASSERT ++ Label stack_base_okay, stack_size_okay; ++ // verify that thread stack base is non-zero ++ __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); ++ __ bne(AT, R0, stack_base_okay); ++ __ delayed()->nop(); ++ __ stop("stack base is zero"); ++ __ bind(stack_base_okay); ++ // verify that thread stack size is non-zero ++ __ ld(AT, thread, in_bytes(Thread::stack_size_offset())); ++ __ bne(AT, R0, stack_size_okay); ++ __ delayed()->nop(); ++ __ stop("stack size is zero"); ++ __ bind(stack_size_okay); ++#endif ++ ++ // Add stack base to locals and subtract stack size ++ __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT ++ __ daddu(T3, T3, AT); // locals * 4 + overhead_size + stack_base--> T3 ++ __ ld(AT, thread, in_bytes(Thread::stack_size_offset())); // stack_size --> AT ++ __ dsubu(T3, T3, AT); // locals * 4 + overhead_size + stack_base - stack_size --> T3 ++ ++ // Use the bigger size for banging. ++ const int max_bang_size = (int)MAX2(JavaThread::stack_shadow_zone_size(), JavaThread::stack_guard_zone_size()); ++ ++ // add in the redzone and yellow size ++ __ move(AT, max_bang_size); ++ __ addu(T3, T3, AT); ++ ++ // check against the current stack bottom ++ __ slt(AT, T3, SP); ++ __ bne(AT, R0, after_frame_check); ++ __ delayed()->nop(); ++ ++ // Note: the restored frame is not necessarily interpreted. ++ // Use the shared runtime version of the StackOverflowError. ++ __ move(SP, Rsender); ++ assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); ++ __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ ++ // all done with frame size check ++ __ bind(after_frame_check); ++} ++ ++// Allocate monitor and lock method (asm interpreter) ++// Rmethod - Method* ++void TemplateInterpreterGenerator::lock_method(void) { ++ // synchronize method ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++#ifdef ASSERT ++ { Label L; ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T0, T0, JVM_ACC_SYNCHRONIZED); ++ __ bne(T0, R0, L); ++ __ delayed()->nop(); ++ __ stop("method doesn't need synchronization"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ // get synchronization object ++ { ++ Label done; ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T2, T0, JVM_ACC_STATIC); ++ __ ld(T0, LVP, Interpreter::local_offset_in_bytes(0)); ++ __ beq(T2, R0, done); ++ __ delayed()->nop(); ++ __ load_mirror(T0, Rmethod, T9); ++ __ bind(done); ++ } ++ // add space for monitor & lock ++ __ daddiu(SP, SP, (-1) * entry_size); // add space for a monitor entry ++ __ sd(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ // set new monitor block top ++ __ sd(T0, SP, BasicObjectLock::obj_offset_in_bytes()); // store object ++ // FIXME: I do not know what lock_object will do and what it will need ++ __ move(c_rarg0, SP); // object address ++ __ lock_object(c_rarg0); ++} ++ ++// Generate a fixed interpreter frame. This is identical setup for ++// interpreted methods and for native methods hence the shared code. ++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { ++ ++ // [ local var m-1 ] <--- sp ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- T0(sender's sp) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // initialize fixed part of activation frame ++ // sender's sp in Rsender ++ int i = 0; ++ int frame_size = 10; ++#ifndef CORE ++ ++frame_size; ++#endif ++ __ daddiu(SP, SP, (-frame_size) * wordSize); ++ __ sd(RA, SP, (frame_size - 1) * wordSize); // save return address ++ __ sd(FP, SP, (frame_size - 2) * wordSize); // save sender's fp ++ __ daddiu(FP, SP, (frame_size - 2) * wordSize); ++ __ sd(Rsender, FP, (-++i) * wordSize); // save sender's sp ++ __ sd(R0, FP,(-++i) * wordSize); //save last_sp as null ++ __ sd(LVP, FP, (-++i) * wordSize); // save locals offset ++ __ ld(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop ++ __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase ++ __ sd(Rmethod, FP, (-++i) * wordSize); // save Method* ++ // Get mirror and store it in the frame as GC root for this Method* ++ __ load_mirror(T2, Rmethod, T9); ++ __ sd(T2, FP, (-++i) * wordSize); // Mirror ++#ifndef CORE ++ if (ProfileInterpreter) { ++ Label method_data_continue; ++ __ ld(AT, Rmethod, in_bytes(Method::method_data_offset())); ++ __ beq(AT, R0, method_data_continue); ++ __ delayed()->nop(); ++ __ daddiu(AT, AT, in_bytes(MethodData::data_offset())); ++ __ bind(method_data_continue); ++ __ sd(AT, FP, (-++i) * wordSize); ++ } else { ++ __ sd(R0, FP, (-++i) * wordSize); ++ } ++#endif // !CORE ++ ++ __ ld(T2, Rmethod, in_bytes(Method::const_offset())); ++ __ ld(T2, T2, in_bytes(ConstMethod::constants_offset())); ++ __ ld(T2, T2, ConstantPool::cache_offset_in_bytes()); ++ __ sd(T2, FP, (-++i) * wordSize); // set constant pool cache ++ if (native_call) { ++ __ sd(R0, FP, (-++i) * wordSize); // no bcp ++ } else { ++ __ sd(BCP, FP, (-++i) * wordSize); // set bcp ++ } ++ __ sd(SP, FP, (-++i) * wordSize); // reserve word for pointer to expression stack bottom ++ assert(i + 2 == frame_size, "i + 2 should be equal to frame_size"); ++} ++ ++// End of helpers ++ ++// Various method entries ++//------------------------------------------------------------------------------------------------------------------------ ++// ++// ++ ++// Method entry for java.lang.ref.Reference.get. ++address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { ++ address entry = __ pc(); ++ Label slow_path; ++ __ b(slow_path); ++ __ delayed()->nop(); ++ ++ // generate a vanilla interpreter entry as the slow path ++ __ bind(slow_path); ++ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); ++ __ delayed()->nop(); ++ return entry; ++} ++ ++// Interpreter stub for calling a native method. (asm interpreter) ++// This sets up a somewhat different looking stack for calling the ++// native method than the typical interpreter frame setup. ++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ // Rsender: sender's sp ++ // Rmethod: Method* ++ address entry_point = __ pc(); ++ ++#ifndef CORE ++ const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset())); ++#endif ++ ++ // get parameter size (always needed) ++ // the size in the java stack ++ __ ld(V0, Rmethod, in_bytes(Method::const_offset())); ++ __ lhu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // native calls don't need the stack size check since they have no expression stack ++ // and the arguments are already on the stack and we only add a handful of words ++ // to the stack ++ ++ // Rmethod: Method* ++ // V0: size of parameters ++ // Layout of frame at this point ++ // ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ ++ // for natives the size of locals is zero ++ ++ // compute beginning of parameters (S7) ++ __ dsll(LVP, V0, Address::times_8); ++ __ daddiu(LVP, LVP, (-1) * wordSize); ++ __ daddu(LVP, LVP, SP); ++ ++ ++ // add 2 zero-initialized slots for native calls ++ // 1 slot for native oop temp offset (setup via runtime) ++ // 1 slot for static native result handler3 (setup via runtime) ++ __ push2(R0, R0); ++ ++ // Layout of frame at this point ++ // [ method holder mirror ] <--- sp ++ // [ result type info ] ++ // [ argument word n-1 ] <--- T0 ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++#ifndef CORE ++ if (inc_counter) __ lw(T3, invocation_counter); // (pre-)fetch invocation count ++#endif ++ ++ // initialize fixed part of activation frame ++ generate_fixed_frame(true); ++ // after this function, the layout of frame is as following ++ // ++ // [ monitor block top ] <--- sp ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- sender's sp ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ ++ // make sure method is native & not abstract ++#ifdef ASSERT ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ { ++ Label L; ++ __ andi(AT, T0, JVM_ACC_NATIVE); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute native method as non-native"); ++ __ bind(L); ++ } ++ { ++ Label L; ++ __ andi(AT, T0, JVM_ACC_ABSTRACT); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute abstract method in interpreter"); ++ __ bind(L); ++ } ++#endif ++ ++ // Since at this point in the method invocation the exception handler ++ // would try to exit the monitor of synchronized methods which hasn't ++ // been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation will ++ // check this flag. ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(AT, (int)true); ++ __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++#ifndef CORE ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow, NULL, NULL); ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++#endif // CORE ++ ++ bang_stack_shadow_pages(true); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ if (synchronized) { ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ { ++ Label L; ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, T0, JVM_ACC_SYNCHRONIZED); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("method needs synchronization"); ++ __ bind(L); ++ } ++#endif ++ } ++ ++ // after method_lock, the layout of frame is as following ++ // ++ // [ monitor entry ] <--- sp ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // start execution ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ beq(AT, SP, L); ++ __ delayed()->nop(); ++ __ stop("broken stack frame setup in interpreter in asm"); ++ __ bind(L); ++ } ++#endif ++ ++ // jvmti/jvmpi support ++ __ notify_method_entry(); ++ ++ // work registers ++ const Register method = Rmethod; ++ const Register t = T8; ++ ++ __ get_method(method); ++ { ++ Label L, Lstatic; ++ __ ld(t,method,in_bytes(Method::const_offset())); ++ __ lhu(t, t, in_bytes(ConstMethod::size_of_parameters_offset())); ++ // MIPS n64 ABI: caller does not reserve space for the register auguments. ++ // A0 and A1(if needed) ++ __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, AT, JVM_ACC_STATIC); ++ __ beq(AT, R0, Lstatic); ++ __ delayed()->nop(); ++ __ daddiu(t, t, 1); ++ __ bind(Lstatic); ++ __ daddiu(t, t, -7); ++ __ blez(t, L); ++ __ delayed()->nop(); ++ __ dsll(t, t, Address::times_8); ++ __ dsubu(SP, SP, t); ++ __ bind(L); ++ } ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ __ move(AT, SP); ++ // [ ] <--- sp ++ // ... (size of parameters - 8 ) ++ // [ monitor entry ] ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ // get signature handler ++ { ++ Label L; ++ __ ld(T9, method, in_bytes(Method::signature_handler_offset())); ++ __ bne(T9, R0, L); ++ __ delayed()->nop(); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::prepare_native_call), method); ++ __ get_method(method); ++ __ ld(T9, method, in_bytes(Method::signature_handler_offset())); ++ __ bind(L); ++ } ++ ++ // call signature handler ++ // FIXME: when change codes in InterpreterRuntime, note this point ++ // from: begin of parameters ++ assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code"); ++ // to: current sp ++ assert(InterpreterRuntime::SignatureHandlerGenerator::to () == SP, "adjust this code"); ++ // temp: T3 ++ assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t , "adjust this code"); ++ ++ __ jalr(T9); ++ __ delayed()->nop(); ++ __ get_method(method); ++ ++ // ++ // if native function is static, and its second parameter has type length of double word, ++ // and first parameter has type length of word, we have to reserve one word ++ // for the first parameter, according to mips o32 abi. ++ // if native function is not static, and its third parameter has type length of double word, ++ // and second parameter has type length of word, we have to reserve one word for the second ++ // parameter. ++ // ++ ++ ++ // result handler is in V0 ++ // set result handler ++ __ sd(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize); ++ ++#define FIRSTPARA_SHIFT_COUNT 5 ++#define SECONDPARA_SHIFT_COUNT 9 ++#define THIRDPARA_SHIFT_COUNT 13 ++#define PARA_MASK 0xf ++ ++ // pass mirror handle if static call ++ { ++ Label L; ++ __ lw(t, method, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, t, JVM_ACC_STATIC); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ ++ // get mirror ++ __ load_mirror(t, method, T9); ++ // copy mirror into activation frame ++ __ sd(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ // pass handle to mirror ++ __ daddiu(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ move(A1, t); ++ __ bind(L); ++ } ++ ++ // [ mthd holder mirror ptr ] <--- sp --------------------| (only for static method) ++ // [ ] | ++ // ... size of parameters(or +1) | ++ // [ monitor entry ] | ++ // ... | ++ // [ monitor entry ] | ++ // [ monitor block top ] ( the top monitor entry ) | ++ // [ byte code pointer (0) ] (if native, bcp = 0) | ++ // [ constant pool cache ] | ++ // [ Mirror ] | ++ // [ Method* ] | ++ // [ locals offset ] | ++ // [ sender's sp ] | ++ // [ sender's fp ] | ++ // [ return address ] <--- fp | ++ // [ method holder mirror ] <----------------------------| ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // get native function entry point ++ { Label L; ++ __ ld(T9, method, in_bytes(Method::native_function_offset())); ++ __ li(V1, SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); ++ __ bne(V1, T9, L); ++ __ delayed()->nop(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method); ++ __ get_method(method); ++ __ ld(T9, method, in_bytes(Method::native_function_offset())); ++ __ bind(L); ++ } ++ ++ // pass JNIEnv ++ // native function in T9 ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ daddiu(t, thread, in_bytes(JavaThread::jni_environment_offset())); ++ __ move(A0, t); ++ // [ jni environment ] <--- sp ++ // [ mthd holder mirror ptr ] ---------------------------->| (only for static method) ++ // [ ] | ++ // ... size of parameters | ++ // [ monitor entry ] | ++ // ... | ++ // [ monitor entry ] | ++ // [ monitor block top ] ( the top monitor entry ) | ++ // [ byte code pointer (0) ] (if native, bcp = 0) | ++ // [ constant pool cache ] | ++ // [ Mirror ] | ++ // [ Method* ] | ++ // [ locals offset ] | ++ // [ sender's sp ] | ++ // [ sender's fp ] | ++ // [ return address ] <--- fp | ++ // [ method holder mirror ] <----------------------------| ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // set_last_Java_frame_before_call ++ __ sd(FP, thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ // Change state to native (we save the return address in the thread, since it might not ++ // be pushed on the stack when we do a a stack traversal). It is enough that the pc() ++ // points into the right code segment. It does not have to be the correct return pc. ++ __ li(t, __ pc()); ++ __ sd(t, thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ __ sd(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ ++ // change thread state ++#ifdef ASSERT ++ { ++ Label L; ++ __ lw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ daddiu(t, t, (-1) * _thread_in_Java); ++ __ beq(t, R0, L); ++ __ delayed()->nop(); ++ __ stop("Wrong thread state in native stub"); ++ __ bind(L); ++ } ++#endif ++ ++ __ move(t, _thread_in_native); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ // call native method ++ __ jalr(T9); ++ __ delayed()->nop(); ++ // result potentially in V0 or F0 ++ ++ ++ // via _last_native_pc and not via _last_jave_sp ++ // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result. ++ // If the order changes or anything else is added to the stack the code in ++ // interpreter_frame_result will have to be changed. ++ //FIXME, should modify here ++ // save return value to keep the value from being destroyed by other calls ++ __ push(dtos); ++ __ push(ltos); ++ ++ // change thread state ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(t, _thread_in_native_trans); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ if(os::is_MP()) { ++ if (UseMembar) { ++ // Force this write out before the read below ++ __ sync(); ++ } else { ++ // Write serialization page so VM thread can do a pseudo remote membar. ++ // We use the current thread pointer to calculate a thread specific ++ // offset to write to within the page. This minimizes bus traffic ++ // due to cache line collision. ++ __ serialize_memory(thread, A0); ++ } ++ } ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { Label Continue; ++ ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are ++ // preserved and correspond to the bcp/locals pointers. So we do a runtime call ++ // by hand. ++ // ++ Label slow_path; ++ ++ __ safepoint_poll_acquire(slow_path, thread); ++ __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, Continue); ++ __ delayed()->nop(); ++ __ bind(slow_path); ++ __ move(A0, thread); ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), ++ relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ //add for compressedoops ++ __ reinit_heapbase(); ++ __ bind(Continue); ++ } ++ ++ // change thread state ++ __ move(t, _thread_in_Java); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ reset_last_Java_frame(thread, true); ++ ++ if (CheckJNICalls) { ++ // clear_pending_jni_exception_check ++ __ sd(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset())); ++ } ++ ++ // reset handle block ++ __ ld(t, thread, in_bytes(JavaThread::active_handles_offset())); ++ __ sw(R0, t, JNIHandleBlock::top_offset_in_bytes()); ++ ++ // If result was an oop then unbox and save it in the frame ++ { ++ Label no_oop; ++ //FIXME, addi only support 16-bit imeditate ++ __ ld(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize); ++ __ li(T0, AbstractInterpreter::result_handler(T_OBJECT)); ++ __ bne(AT, T0, no_oop); ++ __ delayed()->nop(); ++ __ pop(ltos); ++ // Unbox oop result, e.g. JNIHandles::resolve value. ++ __ resolve_jobject(V0, thread, T9); ++ __ sd(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize); ++ // keep stack depth as expected by pushing oop which will eventually be discarded ++ __ push(ltos); ++ __ bind(no_oop); ++ } ++ { ++ Label no_reguard; ++ __ lw(t, thread, in_bytes(JavaThread::stack_guard_state_offset())); ++ __ move(AT, (u1)JavaThread::stack_guard_yellow_reserved_disabled); ++ __ bne(t, AT, no_reguard); ++ __ delayed()->nop(); ++ __ pushad(); ++ __ move(S5_heapbase, SP); ++ __ move(AT, -StackAlignmentInBytes); ++ __ andr(SP, SP, AT); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ move(SP, S5_heapbase); ++ __ popad(); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ __ bind(no_reguard); ++ } ++ // restore BCP to have legal interpreter frame, ++ // i.e., bci == 0 <=> BCP == code_base() ++ // Can't call_VM until bcp is within reasonable. ++ __ get_method(method); // method is junk from thread_in_native to now. ++ __ ld(BCP, method, in_bytes(Method::const_offset())); ++ __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset()))); ++ // handle exceptions (exception handling will handle unlocking!) ++ { ++ Label L; ++ __ ld(t, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(t, R0, L); ++ __ delayed()->nop(); ++ // Note: At some point we may want to unify this with the code used in ++ // call_VM_base(); ++ // i.e., we should use the StubRoutines::forward_exception code. For now this ++ // doesn't work here because the sp is not correctly set at this point. ++ __ MacroAssembler::call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ ++ // do unlocking if necessary ++ { ++ Label L; ++ __ lw(t, method, in_bytes(Method::access_flags_offset())); ++ __ andi(t, t, JVM_ACC_SYNCHRONIZED); ++ __ beq(t, R0, L); ++ // the code below should be shared with interpreter macro assembler implementation ++ { ++ Label unlock; ++ // BasicObjectLock will be first in list, ++ // since this is a synchronized method. However, need ++ // to check that the object has not been unlocked by ++ // an explicit monitorexit bytecode. ++ __ delayed()->daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock)); ++ // address of first monitor ++ ++ __ ld(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ bne(t, R0, unlock); ++ __ delayed()->nop(); ++ ++ // Entry already unlocked, need to throw exception ++ __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ __ bind(unlock); ++ __ unlock_object(c_rarg0); ++ } ++ __ bind(L); ++ } ++ ++ // jvmti/jvmpi support ++ // Note: This must happen _after_ handling/throwing any exceptions since ++ // the exception handler code notifies the runtime of method exits ++ // too. If this happens before, method entry/exit notifications are ++ // not properly paired (was bug - gri 11/22/99). ++ __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); ++ ++ // restore potential result in V0, ++ // call result handler to restore potential result in ST0 & handle result ++ ++ __ pop(ltos); ++ __ pop(dtos); ++ ++ __ ld(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize); ++ __ jalr(t); ++ __ delayed()->nop(); ++ ++ ++ // remove activation ++ __ ld(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp ++ __ ld(RA, FP, frame::interpreter_frame_return_addr_offset * wordSize); // get return address ++ __ ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++#ifndef CORE ++ if (inc_counter) { ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(continue_after_compile); ++ // entry_point is the beginning of this ++ // function and checks again for compiled code ++ } ++#endif ++ return entry_point; ++} ++ ++void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { ++ // Quick & dirty stack overflow checking: bang the stack & handle trap. ++ // Note that we do the banging after the frame is setup, since the exception ++ // handling code expects to find a valid interpreter frame on the stack. ++ // Doing the banging earlier fails if the caller frame is not an interpreter ++ // frame. ++ // (Also, the exception throwing code expects to unlock any synchronized ++ // method receiever, so do the banging after locking the receiver.) ++ ++ // Bang each page in the shadow zone. We can't assume it's been done for ++ // an interpreter frame with greater than a page of locals, so each page ++ // needs to be checked. Only true for non-native. ++ if (UseStackBanging) { ++ const int page_size = os::vm_page_size(); ++ const int n_shadow_pages = ((int)JavaThread::stack_shadow_zone_size()) / page_size; ++ const int start_page = native_call ? n_shadow_pages : 1; ++ BLOCK_COMMENT("bang_stack_shadow_pages:"); ++ for (int pages = start_page; pages <= n_shadow_pages; pages++) { ++ __ bang_stack_with_offset(pages*page_size); ++ } ++ } ++} ++ ++// ++// Generic interpreted method entry to (asm) interpreter ++// ++// Layout of frame just at the entry ++// ++// [ argument word n-1 ] <--- sp ++// ... ++// [ argument word 0 ] ++// assume Method* in Rmethod before call this method. ++// prerequisites to the generated stub : the callee Method* in Rmethod ++// note you must save the caller bcp before call the generated stub ++// ++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ ++ // Rmethod: Method* ++ // Rsender: sender 's sp ++ address entry_point = __ pc(); ++ ++ const Address invocation_counter(Rmethod, ++ in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset())); ++ ++ // get parameter size (always needed) ++ __ ld(T3, Rmethod, in_bytes(Method::const_offset())); //T3 --> Rmethod._constMethod ++ __ lhu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // Rmethod: Method* ++ // V0: size of parameters ++ // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i ++ // get size of locals in words to T2 ++ __ lhu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset())); ++ // T2 = no. of additional locals, locals include parameters ++ __ dsubu(T2, T2, V0); ++ ++ // see if we've got enough room on the stack for locals plus overhead. ++ // Layout of frame at this point ++ // ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ generate_stack_overflow_check(); ++ // after this function, the layout of frame does not change ++ ++ // compute beginning of parameters (LVP) ++ __ dsll(LVP, V0, LogBytesPerWord); ++ __ daddiu(LVP, LVP, (-1) * wordSize); ++ __ daddu(LVP, LVP, SP); ++ ++ // T2 - # of additional locals ++ // allocate space for locals ++ // explicitly initialize locals ++ { ++ Label exit, loop; ++ __ beq(T2, R0, exit); ++ __ delayed()->nop(); ++ ++ __ bind(loop); ++ __ daddiu(SP, SP, (-1) * wordSize); ++ __ daddiu(T2, T2, -1); // until everything initialized ++ __ bne(T2, R0, loop); ++ __ delayed()->sd(R0, SP, 0); // initialize local variables ++ ++ __ bind(exit); ++ } ++ ++ // ++ // [ local var m-1 ] <--- sp ++ // ... ++ // [ local var 0 ] ++ // [ argument word n-1 ] <--- T0? ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ // initialize fixed part of activation frame ++ ++ generate_fixed_frame(false); ++ ++ ++ // after this function, the layout of frame is as following ++ // ++ // [ monitor block top ] <--- sp ( the top monitor entry ) ++ // [ byte code pointer ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] <--- fp ++ // [ return address ] ++ // [ local var m-1 ] ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++ // make sure method is not native & not abstract ++#ifdef ASSERT ++ __ ld(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ { ++ Label L; ++ __ andi(T2, AT, JVM_ACC_NATIVE); ++ __ beq(T2, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute native method as non-native"); ++ __ bind(L); ++ } ++ { ++ Label L; ++ __ andi(T2, AT, JVM_ACC_ABSTRACT); ++ __ beq(T2, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute abstract method in interpreter"); ++ __ bind(L); ++ } ++#endif ++ ++ // Since at this point in the method invocation the exception handler ++ // would try to exit the monitor of synchronized methods which hasn't ++ // been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation will ++ // check this flag. ++ ++#ifndef OPT_THREAD ++ Register thread = T8; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ __ move(AT, (int)true); ++ __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++#ifndef CORE ++ ++ // mdp : T8 ++ // tmp1: T9 ++ // tmp2: T2 ++ __ profile_parameters_type(T8, T9, T2); ++ ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ Label profile_method; ++ Label profile_method_continue; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow, ++ &profile_method, ++ &profile_method_continue); ++ if (ProfileInterpreter) { ++ __ bind(profile_method_continue); ++ } ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++ ++#endif // CORE ++ ++ bang_stack_shadow_pages(false); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ // ++ if (synchronized) { ++ // Allocate monitor and lock method ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ { Label L; ++ __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T2, AT, JVM_ACC_SYNCHRONIZED); ++ __ beq(T2, R0, L); ++ __ delayed()->nop(); ++ __ stop("method needs synchronization"); ++ __ bind(L); ++ } ++#endif ++ } ++ ++ // layout of frame after lock_method ++ // [ monitor entry ] <--- sp ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ local var m-1 ] ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++ // start execution ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ beq(AT, SP, L); ++ __ delayed()->nop(); ++ __ stop("broken stack frame setup in interpreter in native"); ++ __ bind(L); ++ } ++#endif ++ ++ // jvmti/jvmpi support ++ __ notify_method_entry(); ++ ++ __ dispatch_next(vtos); ++ ++ // invocation counter overflow ++ if (inc_counter) { ++ if (ProfileInterpreter) { ++ // We have decided to profile this method in the interpreter ++ __ bind(profile_method); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::profile_method)); ++ __ set_method_data_pointer_for_bcp(); ++ __ get_method(Rmethod); ++ __ b(profile_method_continue); ++ __ delayed()->nop(); ++ } ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(continue_after_compile); ++ } ++ ++ return entry_point; ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateInterpreterGenerator::generate_throw_exception() { ++ // Entry point in previous activation (i.e., if the caller was ++ // interpreted) ++ Interpreter::_rethrow_exception_entry = __ pc(); ++ // Restore sp to interpreter_frame_last_sp even though we are going ++ // to empty the expression stack for the exception processing. ++ __ sd(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ // V0: exception ++ // V1: return address/pc that threw exception ++ __ restore_bcp(); // BCP points to call/send ++ __ restore_locals(); ++ ++ //add for compressedoops ++ __ reinit_heapbase(); ++ // Entry point for exceptions thrown within interpreter code ++ Interpreter::_throw_exception_entry = __ pc(); ++ // expression stack is undefined here ++ // V0: exception ++ // BCP: exception bcp ++ __ verify_oop(V0); ++ ++ // expression stack must be empty before entering the VM in case of an exception ++ __ empty_expression_stack(); ++ // find exception handler address and preserve exception oop ++ __ move(A1, V0); ++ __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1); ++ // V0: exception handler entry point ++ // V1: preserved exception oop ++ // S0: bcp for exception handler ++ __ push(V1); // push exception which is now the only value on the stack ++ __ jr(V0); // jump to exception handler (may be _remove_activation_entry!) ++ __ delayed()->nop(); ++ ++ // If the exception is not handled in the current frame the frame is removed and ++ // the exception is rethrown (i.e. exception continuation is _rethrow_exception). ++ // ++ // Note: At this point the bci is still the bxi for the instruction which caused ++ // the exception and the expression stack is empty. Thus, for any VM calls ++ // at this point, GC will find a legal oop map (with empty expression stack). ++ ++ // In current activation ++ // V0: exception ++ // BCP: exception bcp ++ ++ // ++ // JVMTI PopFrame support ++ // ++ ++ Interpreter::_remove_activation_preserving_args_entry = __ pc(); ++ __ empty_expression_stack(); ++ // Set the popframe_processing bit in pending_popframe_condition indicating that we are ++ // currently handling popframe, so that call_VMs that may happen later do not trigger new ++ // popframe handling cycles. ++#ifndef OPT_THREAD ++ Register thread = T2; ++ __ get_thread(T2); ++#else ++ Register thread = TREG; ++#endif ++ __ lw(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ __ ori(T3, T3, JavaThread::popframe_processing_bit); ++ __ sw(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++#ifndef CORE ++ { ++ // Check to see whether we are returning to a deoptimized frame. ++ // (The PopFrame call ensures that the caller of the popped frame is ++ // either interpreted or compiled and deoptimizes it if compiled.) ++ // In this case, we can't call dispatch_next() after the frame is ++ // popped, but instead must save the incoming arguments and restore ++ // them after deoptimization has occurred. ++ // ++ // Note that we don't compare the return PC against the ++ // deoptimization blob's unpack entry because of the presence of ++ // adapter frames in C2. ++ Label caller_not_deoptimized; ++ __ ld(A0, FP, frame::return_addr_offset * wordSize); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0); ++ __ bne(V0, R0, caller_not_deoptimized); ++ __ delayed()->nop(); ++ ++ // Compute size of arguments for saving when returning to deoptimized caller ++ __ get_method(A1); ++ __ verify_oop(A1); ++ __ ld( A1, A1, in_bytes(Method::const_offset())); ++ __ lhu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset())); ++ __ shl(A1, Interpreter::logStackElementSize); ++ __ restore_locals(); ++ __ dsubu(A2, LVP, A1); ++ __ daddiu(A2, A2, wordSize); ++ // Save these arguments ++#ifndef OPT_THREAD ++ __ get_thread(A0); ++#else ++ __ move(A0, TREG); ++#endif ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2); ++ ++ __ remove_activation(vtos, T9, false, false, false); ++ ++ // Inform deoptimization that it is responsible for restoring these arguments ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(AT, JavaThread::popframe_force_deopt_reexecution_bit); ++ __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ // Continue in deoptimization handler ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ __ bind(caller_not_deoptimized); ++ } ++#endif /* !CORE */ ++ ++ __ remove_activation(vtos, T3, ++ /* throw_monitor_exception */ false, ++ /* install_monitor_exception */ false, ++ /* notify_jvmdi */ false); ++ ++ // Clear the popframe condition flag ++ // Finish with popframe handling ++ // A previous I2C followed by a deoptimization might have moved the ++ // outgoing arguments further up the stack. PopFrame expects the ++ // mutations to those outgoing arguments to be preserved and other ++ // constraints basically require this frame to look exactly as ++ // though it had previously invoked an interpreted activation with ++ // no space between the top of the expression stack (current ++ // last_sp) and the top of stack. Rather than force deopt to ++ // maintain this kind of invariant all the time we call a small ++ // fixup routine to move the mutated arguments onto the top of our ++ // expression stack if necessary. ++ __ move(T8, SP); ++ __ ld(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // PC must point into interpreter here ++ __ set_last_Java_frame(thread, noreg, FP, __ pc()); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2); ++ __ reset_last_Java_frame(thread, true); ++ // Restore the last_sp and null it out ++ __ ld(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ ++ ++ __ move(AT, JavaThread::popframe_inactive); ++ __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++ // Finish with popframe handling ++ __ restore_bcp(); ++ __ restore_locals(); ++#ifndef CORE ++ // The method data pointer was incremented already during ++ // call profiling. We have to restore the mdp for the current bcp. ++ if (ProfileInterpreter) { ++ __ set_method_data_pointer_for_bcp(); ++ } ++#endif // !CORE ++ // Clear the popframe condition flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(AT, JavaThread::popframe_inactive); ++ __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++#if INCLUDE_JVMTI ++ { ++ Label L_done; ++ ++ __ lbu(AT, BCP, 0); ++ __ daddiu(AT, AT, -1 * Bytecodes::_invokestatic); ++ __ bne(AT, R0, L_done); ++ __ delayed()->nop(); ++ ++ // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. ++ // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. ++ ++ __ get_method(T9); ++ __ ld(T8, LVP, 0); ++ __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T9, BCP); ++ ++ __ beq(T8, R0, L_done); ++ __ delayed()->nop(); ++ ++ __ sd(T8, SP, 0); ++ __ bind(L_done); ++ } ++#endif // INCLUDE_JVMTI ++ ++ __ dispatch_next(vtos); ++ // end of PopFrame support ++ ++ Interpreter::_remove_activation_entry = __ pc(); ++ ++ // preserve exception over this code sequence ++ __ pop(T0); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ sd(T0, thread, in_bytes(JavaThread::vm_result_offset())); ++ // remove the activation (without doing throws on illegalMonitorExceptions) ++ __ remove_activation(vtos, T3, false, true, false); ++ // restore exception ++ __ get_vm_result(T0, thread); ++ __ verify_oop(T0); ++ ++ // In between activations - previous activation type unknown yet ++ // compute continuation point - the continuation point expects ++ // the following registers set up: ++ // ++ // T0: exception ++ // T1: return address/pc that threw exception ++ // SP: expression stack of caller ++ // FP: fp of caller ++ __ push2(T0, T3); // save exception and return address ++ __ move(A1, T3); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); ++ __ move(T9, V0); // save exception handler ++ __ pop2(V0, V1); // restore return address and exception ++ ++ // Note that an "issuing PC" is actually the next PC after the call ++ __ jr(T9); // jump to exception handler of caller ++ __ delayed()->nop(); ++} ++ ++ ++// ++// JVMTI ForceEarlyReturn support ++// ++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { ++ address entry = __ pc(); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ empty_expression_stack(); ++ __ empty_FPU_stack(); ++ __ load_earlyret_value(state); ++ ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ __ ld_ptr(T9, TREG, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ const Address cond_addr(T9, in_bytes(JvmtiThreadState::earlyret_state_offset())); ++ // Clear the earlyret state ++ __ move(AT, JvmtiThreadState::earlyret_inactive); ++ __ sw(AT, cond_addr); ++ __ sync(); ++ ++ ++ __ remove_activation(state, T0, ++ false, /* throw_monitor_exception */ ++ false, /* install_monitor_exception */ ++ true); /* notify_jvmdi */ ++ __ sync(); ++ __ jr(T0); ++ __ delayed()->nop(); ++ return entry; ++} // end of ForceEarlyReturn support ++ ++ ++//----------------------------------------------------------------------------- ++// Helper for vtos entry point generation ++ ++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, ++ address& bep, ++ address& cep, ++ address& sep, ++ address& aep, ++ address& iep, ++ address& lep, ++ address& fep, ++ address& dep, ++ address& vep) { ++ assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); ++ Label L; ++ fep = __ pc(); __ push(ftos); __ b(L); __ delayed()->nop(); ++ dep = __ pc(); __ push(dtos); __ b(L); __ delayed()->nop(); ++ lep = __ pc(); __ push(ltos); __ b(L); __ delayed()->nop(); ++ aep =__ pc(); __ push(atos); __ b(L); __ delayed()->nop(); ++ bep = cep = sep = ++ iep = __ pc(); __ push(itos); ++ vep = __ pc(); ++ __ bind(L); ++ generate_and_dispatch(t); ++} ++ ++ ++/* ++//----------------------------------------------------------------------------- ++// Generation of individual instructions ++ ++// helpers for generate_and_dispatch ++ ++ ++InterpreterGenerator::InterpreterGenerator(StubQueue* code) ++ : TemplateInterpreterGenerator(code) { ++ generate_all(); // down here so it can be "virtual" ++} ++*/ ++ ++//----------------------------------------------------------------------------- ++ ++// Non-product code ++#ifndef PRODUCT ++address TemplateInterpreterGenerator::generate_trace_code(TosState state) { ++ address entry = __ pc(); ++ ++ // prepare expression stack ++ __ push(state); // save tosca ++ ++ // tos & tos2 ++ // trace_bytecode need actually 4 args, the last two is tos&tos2 ++ // this work fine for x86. but mips o32 call convention will store A2-A3 ++ // to the stack position it think is the tos&tos2 ++ // when the expression stack have no more than 2 data, error occur. ++ __ ld(A2, SP, 0); ++ __ ld(A3, SP, 1 * wordSize); ++ ++ // pass arguments & call tracer ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), RA, A2, A3); ++ __ move(RA, V0); // make sure return address is not destroyed by pop(state) ++ ++ // restore expression stack ++ __ pop(state); // restore tosca ++ ++ // return ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return entry; ++} ++ ++void TemplateInterpreterGenerator::count_bytecode() { ++ __ li(T8, (long)&BytecodeCounter::_counter_value); ++ __ lw(AT, T8, 0); ++ __ daddiu(AT, AT, 1); ++ __ sw(AT, T8, 0); ++} ++ ++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ++ __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]); ++ __ lw(AT, T8, 0); ++ __ daddiu(AT, AT, 1); ++ __ sw(AT, T8, 0); ++} ++ ++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ++ __ li(T8, (long)&BytecodePairHistogram::_index); ++ __ lw(T9, T8, 0); ++ __ dsrl(T9, T9, BytecodePairHistogram::log2_number_of_codes); ++ __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes); ++ __ orr(T9, T9, T8); ++ __ li(T8, (long)&BytecodePairHistogram::_index); ++ __ sw(T9, T8, 0); ++ __ dsll(T9, T9, 2); ++ __ li(T8, (long)BytecodePairHistogram::_counters); ++ __ daddu(T8, T8, T9); ++ __ lw(AT, T8, 0); ++ __ daddiu(AT, AT, 1); ++ __ sw(AT, T8, 0); ++} ++ ++ ++void TemplateInterpreterGenerator::trace_bytecode(Template* t) { ++ // Call a little run-time stub to avoid blow-up for each bytecode. ++ // The run-time runtime saves the right registers, depending on ++ // the tosca in-state for the given template. ++ ++ address entry = Interpreter::trace_code(t->tos_in()); ++ assert(entry != NULL, "entry must have been generated"); ++ __ call(entry, relocInfo::none); ++ __ delayed()->nop(); ++ //add for compressedoops ++ __ reinit_heapbase(); ++} ++ ++ ++void TemplateInterpreterGenerator::stop_interpreter_at() { ++ Label L; ++ __ li(T8, long(&BytecodeCounter::_counter_value)); ++ __ lw(T8, T8, 0); ++ __ move(AT, StopInterpreterAt); ++ __ bne(T8, AT, L); ++ __ delayed()->nop(); ++ __ brk(5); ++ __ delayed()->nop(); ++ __ bind(L); ++} ++#endif // !PRODUCT +diff --git a/src/hotspot/cpu/mips/templateTable_mips.hpp b/src/hotspot/cpu/mips/templateTable_mips.hpp +new file mode 100644 +index 0000000000..46a88aba26 +--- /dev/null ++++ b/src/hotspot/cpu/mips/templateTable_mips.hpp +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP ++#define CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP ++ ++ static void prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index = noreg, // itable index, MethodType, etc. ++ Register recv = noreg, // if caller wants to see it ++ Register flags = noreg // if caller wants to test it ++ ); ++ static void invokevirtual_helper(Register index, Register recv, ++ Register flags); ++ static void volatile_barrier(); ++ ++ // Helpers ++ static void index_check(Register array, Register index); ++ static void index_check_without_pop(Register array, Register index); ++ ++#endif // CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP +diff --git a/src/hotspot/cpu/mips/templateTable_mips_64.cpp b/src/hotspot/cpu/mips/templateTable_mips_64.cpp +new file mode 100644 +index 0000000000..5265483830 +--- /dev/null ++++ b/src/hotspot/cpu/mips/templateTable_mips_64.cpp +@@ -0,0 +1,4688 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/templateTable.hpp" ++#include "memory/universe.hpp" ++#include "oops/methodData.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "utilities/macros.hpp" ++ ++ ++#ifndef CC_INTERP ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Platform-dependent initialization ++ ++void TemplateTable::pd_initialize() { ++ // No mips specific initialization ++} ++ ++// Address computation: local variables ++ ++static inline Address iaddress(int n) { ++ return Address(LVP, Interpreter::local_offset_in_bytes(n)); ++} ++ ++static inline Address laddress(int n) { ++ return iaddress(n + 1); ++} ++ ++static inline Address faddress(int n) { ++ return iaddress(n); ++} ++ ++static inline Address daddress(int n) { ++ return laddress(n); ++} ++ ++static inline Address aaddress(int n) { ++ return iaddress(n); ++} ++static inline Address haddress(int n) { return iaddress(n + 0); } ++ ++ ++static inline Address at_sp() { return Address(SP, 0); } ++static inline Address at_sp_p1() { return Address(SP, 1 * wordSize); } ++static inline Address at_sp_p2() { return Address(SP, 2 * wordSize); } ++ ++// At top of Java expression stack which may be different than sp(). It ++// isn't for category 1 objects. ++static inline Address at_tos () { ++ Address tos = Address(SP, Interpreter::expr_offset_in_bytes(0)); ++ return tos; ++} ++ ++static inline Address at_tos_p1() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(1)); ++} ++ ++static inline Address at_tos_p2() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(2)); ++} ++ ++static inline Address at_tos_p3() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(3)); ++} ++ ++// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator ++Address TemplateTable::at_bcp(int offset) { ++ assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); ++ return Address(BCP, offset); ++} ++ ++// Miscelaneous helper routines ++// Store an oop (or NULL) at the address described by obj. ++// If val == noreg this means store a NULL ++ ++static void do_oop_store(InterpreterMacroAssembler* _masm, ++ Address dst, ++ Register val, ++ DecoratorSet decorators = 0) { ++ assert(val == noreg || val == V0, "parameter is just for looks"); ++ __ store_heap_oop(dst, val, T9, T1, decorators); ++} ++ ++static void do_oop_load(InterpreterMacroAssembler* _masm, ++ Address src, ++ Register dst, ++ DecoratorSet decorators = 0) { ++ __ load_heap_oop(dst, src, T9, T1, decorators); ++} ++ ++// bytecode folding ++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, ++ Register tmp_reg, bool load_bc_into_bc_reg/*=true*/, ++ int byte_no) { ++ if (!RewriteBytecodes) return; ++ Label L_patch_done; ++ ++ switch (bc) { ++ case Bytecodes::_fast_aputfield: ++ case Bytecodes::_fast_bputfield: ++ case Bytecodes::_fast_zputfield: ++ case Bytecodes::_fast_cputfield: ++ case Bytecodes::_fast_dputfield: ++ case Bytecodes::_fast_fputfield: ++ case Bytecodes::_fast_iputfield: ++ case Bytecodes::_fast_lputfield: ++ case Bytecodes::_fast_sputfield: ++ { ++ // We skip bytecode quickening for putfield instructions when ++ // the put_code written to the constant pool cache is zero. ++ // This is required so that every execution of this instruction ++ // calls out to InterpreterRuntime::resolve_get_put to do ++ // additional, required work. ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ assert(load_bc_into_bc_reg, "we use bc_reg as temp"); ++ __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1); ++ __ daddiu(bc_reg, R0, bc); ++ __ beq(tmp_reg, R0, L_patch_done); ++ __ delayed()->nop(); ++ } ++ break; ++ default: ++ assert(byte_no == -1, "sanity"); ++ // the pair bytecodes have already done the load. ++ if (load_bc_into_bc_reg) { ++ __ move(bc_reg, bc); ++ } ++ } ++ ++ if (JvmtiExport::can_post_breakpoint()) { ++ Label L_fast_patch; ++ // if a breakpoint is present we can't rewrite the stream directly ++ __ lbu(tmp_reg, at_bcp(0)); ++ __ move(AT, Bytecodes::_breakpoint); ++ __ bne(tmp_reg, AT, L_fast_patch); ++ __ delayed()->nop(); ++ ++ __ get_method(tmp_reg); ++ // Let breakpoint table handling rewrite to quicker bytecode ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg); ++ ++ __ b(L_patch_done); ++ __ delayed()->nop(); ++ __ bind(L_fast_patch); ++ } ++ ++#ifdef ASSERT ++ Label L_okay; ++ __ lbu(tmp_reg, at_bcp(0)); ++ __ move(AT, (int)Bytecodes::java_code(bc)); ++ __ beq(tmp_reg, AT, L_okay); ++ __ delayed()->nop(); ++ __ beq(tmp_reg, bc_reg, L_patch_done); ++ __ delayed()->nop(); ++ __ stop("patching the wrong bytecode"); ++ __ bind(L_okay); ++#endif ++ ++ // patch bytecode ++ __ sb(bc_reg, at_bcp(0)); ++ __ bind(L_patch_done); ++} ++ ++ ++// Individual instructions ++ ++void TemplateTable::nop() { ++ transition(vtos, vtos); ++ // nothing to do ++} ++ ++void TemplateTable::shouldnotreachhere() { ++ transition(vtos, vtos); ++ __ stop("shouldnotreachhere bytecode"); ++} ++ ++void TemplateTable::aconst_null() { ++ transition(vtos, atos); ++ __ move(FSR, R0); ++} ++ ++void TemplateTable::iconst(int value) { ++ transition(vtos, itos); ++ if (value == 0) { ++ __ move(FSR, R0); ++ } else { ++ __ move(FSR, value); ++ } ++} ++ ++void TemplateTable::lconst(int value) { ++ transition(vtos, ltos); ++ if (value == 0) { ++ __ move(FSR, R0); ++ } else { ++ __ move(FSR, value); ++ } ++} ++ ++void TemplateTable::fconst(int value) { ++ transition(vtos, ftos); ++ switch( value ) { ++ case 0: __ mtc1(R0, FSF); return; ++ case 1: __ addiu(AT, R0, 1); break; ++ case 2: __ addiu(AT, R0, 2); break; ++ default: ShouldNotReachHere(); ++ } ++ __ mtc1(AT, FSF); ++ __ cvt_s_w(FSF, FSF); ++} ++ ++void TemplateTable::dconst(int value) { ++ transition(vtos, dtos); ++ switch( value ) { ++ case 0: __ dmtc1(R0, FSF); ++ return; ++ case 1: __ daddiu(AT, R0, 1); ++ __ dmtc1(AT, FSF); ++ __ cvt_d_w(FSF, FSF); ++ break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::bipush() { ++ transition(vtos, itos); ++ __ lb(FSR, at_bcp(1)); ++} ++ ++void TemplateTable::sipush() { ++ transition(vtos, itos); ++ __ lb(FSR, BCP, 1); ++ __ lbu(AT, BCP, 2); ++ __ dsll(FSR, FSR, 8); ++ __ orr(FSR, FSR, AT); ++} ++ ++// T1 : tags ++// T2 : index ++// T3 : cpool ++// T8 : tag ++void TemplateTable::ldc(bool wide) { ++ transition(vtos, vtos); ++ Label call_ldc, notFloat, notClass, notInt, Done; ++ // get index in cpool ++ if (wide) { ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); ++ } else { ++ __ lbu(T2, at_bcp(1)); ++ } ++ ++ __ get_cpool_and_tags(T3, T1); ++ ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type ++ if (UseLEXT1 && Assembler::is_simm(sizeof(tags_offset), 8)) { ++ __ gslbx(T1, T1, T2, tags_offset); ++ } else { ++ __ daddu(AT, T1, T2); ++ __ lb(T1, AT, tags_offset); ++ } ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ //now T1 is the tag ++ ++ // unresolved class - get the resolved class ++ __ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedClass); ++ __ beq(AT, R0, call_ldc); ++ __ delayed()->nop(); ++ ++ // unresolved class in error (resolution failed) - call into runtime ++ // so that the same error from first resolution attempt is thrown. ++ __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError); ++ __ beq(AT, R0, call_ldc); ++ __ delayed()->nop(); ++ ++ // resolved class - need to call vm to get java mirror of the class ++ __ daddiu(AT, T1, - JVM_CONSTANT_Class); ++ __ bne(AT, R0, notClass); ++ __ delayed()->dsll(T2, T2, Address::times_8); ++ ++ __ bind(call_ldc); ++ __ move(A1, wide); ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1); ++ //__ push(atos); ++ __ daddiu(SP, SP, - Interpreter::stackElementSize); ++ __ b(Done); ++ __ delayed()->sd(FSR, SP, 0); // added for performance issue ++ ++ __ bind(notClass); ++ __ daddiu(AT, T1, -JVM_CONSTANT_Float); ++ __ bne(AT, R0, notFloat); ++ __ delayed()->nop(); ++ // ftos ++ if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) { ++ __ gslwxc1(FSF, T3, T2, base_offset); ++ } else { ++ __ daddu(AT, T3, T2); ++ __ lwc1(FSF, AT, base_offset); ++ } ++ //__ push_f(); ++ __ daddiu(SP, SP, - Interpreter::stackElementSize); ++ __ b(Done); ++ __ delayed()->swc1(FSF, SP, 0); ++ ++ __ bind(notFloat); ++ __ daddiu(AT, T1, -JVM_CONSTANT_Integer); ++ __ bne(AT, R0, notInt); ++ __ delayed()->nop(); ++ // itos ++ if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) { ++ __ gslwx(FSR, T3, T2, base_offset); ++ } else { ++ __ daddu(T0, T3, T2); ++ __ lw(FSR, T0, base_offset); ++ } ++ __ push(itos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // assume the tag is for condy; if not, the VM runtime will tell us ++ __ bind(notInt); ++ condy_helper(Done); ++ ++ __ bind(Done); ++} ++ ++void TemplateTable::condy_helper(Label& Done) { ++ const Register obj = FSR; ++ const Register off = SSR; ++ const Register flags = T3; ++ const Register rarg = A1; ++ __ move(rarg, (int)bytecode()); ++ __ call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg); ++ __ get_vm_result_2(flags, TREG); ++ // VMr = obj = base address to find primitive value to push ++ // VMr2 = flags = (tos, off) using format of CPCE::_flags ++ __ andi(off, flags, ConstantPoolCacheEntry::field_index_mask); ++ __ daddu(obj, off, obj); ++ const Address field(obj, 0 * wordSize); ++ ++ // What sort of thing are we loading? ++ __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ ++ switch (bytecode()) { ++ case Bytecodes::_ldc: ++ case Bytecodes::_ldc_w: ++ { ++ // tos in (itos, ftos, stos, btos, ctos, ztos) ++ Label notInt, notFloat, notShort, notByte, notChar, notBool; ++ __ daddiu(AT, flags, -itos); ++ __ bne(AT, R0, notInt); ++ __ delayed()->nop(); ++ // itos ++ __ ld(obj, field); ++ __ push(itos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notInt); ++ __ daddiu(AT, flags, -ftos); ++ __ bne(AT, R0, notFloat); ++ __ delayed()->nop(); ++ // ftos ++ __ lwc1(FSF, field); ++ __ push(ftos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notFloat); ++ __ daddiu(AT, flags, -stos); ++ __ bne(AT, R0, notShort); ++ __ delayed()->nop(); ++ // stos ++ __ lh(obj, field); ++ __ push(stos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notShort); ++ __ daddiu(AT, flags, -btos); ++ __ bne(AT, R0, notByte); ++ __ delayed()->nop(); ++ // btos ++ __ lb(obj, field); ++ __ push(btos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notByte); ++ __ daddiu(AT, flags, -ctos); ++ __ bne(AT, R0, notChar); ++ __ delayed()->nop(); ++ // ctos ++ __ lhu(obj, field); ++ __ push(ctos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notChar); ++ __ daddiu(AT, flags, -ztos); ++ __ bne(AT, R0, notBool); ++ __ delayed()->nop(); ++ // ztos ++ __ lbu(obj, field); ++ __ push(ztos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notBool); ++ break; ++ } ++ ++ case Bytecodes::_ldc2_w: ++ { ++ Label notLong, notDouble; ++ __ daddiu(AT, flags, -ltos); ++ __ bne(AT, R0, notLong); ++ __ delayed()->nop(); ++ // ltos ++ __ ld(obj, field); ++ __ push(ltos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notLong); ++ __ daddiu(AT, flags, -dtos); ++ __ bne(AT, R0, notDouble); ++ __ delayed()->nop(); ++ // dtos ++ __ ldc1(FSF, field); ++ __ push(dtos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notDouble); ++ break; ++ } ++ ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ __ stop("bad ldc/condy"); ++} ++ ++// Fast path for caching oop constants. ++void TemplateTable::fast_aldc(bool wide) { ++ transition(vtos, atos); ++ ++ Register result = FSR; ++ Register tmp = SSR; ++ Register rarg = A1; ++ int index_size = wide ? sizeof(u2) : sizeof(u1); ++ ++ Label resolved; ++ ++ // We are resolved if the resolved reference cache entry contains a ++ // non-null object (String, MethodType, etc.) ++ assert_different_registers(result, tmp); ++ __ get_cache_index_at_bcp(tmp, 1, index_size); ++ __ load_resolved_reference_at_index(result, tmp, T9); ++ __ bne(result, R0, resolved); ++ __ delayed()->nop(); ++ ++ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); ++ // first time invocation - must resolve first ++ int i = (int)bytecode(); ++ __ move(rarg, i); ++ __ call_VM(result, entry, rarg); ++ ++ __ bind(resolved); ++ ++ { // Check for the null sentinel. ++ // If we just called the VM, it already did the mapping for us, ++ // but it's harmless to retry. ++ Label notNull; ++ __ set64(rarg, (long)Universe::the_null_sentinel_addr()); ++ __ ld_ptr(tmp, Address(rarg)); ++ __ bne(tmp, result, notNull); ++ __ delayed()->nop(); ++ __ xorr(result, result, result); // NULL object reference ++ __ bind(notNull); ++ } ++ ++ if (VerifyOops) { ++ __ verify_oop(result); ++ } ++} ++ ++ ++// used register: T2, T3, T1 ++// T2 : index ++// T3 : cpool ++// T1 : tag ++void TemplateTable::ldc2_w() { ++ transition(vtos, vtos); ++ Label notDouble, notLong, Done; ++ ++ // get index in cpool ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); ++ ++ __ get_cpool_and_tags(T3, T1); ++ ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type in T1 ++ if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) { ++ __ gslbx(T1, T1, T2, tags_offset); ++ } else { ++ __ daddu(AT, T1, T2); ++ __ lb(T1, AT, tags_offset); ++ } ++ ++ __ daddiu(AT, T1, -JVM_CONSTANT_Double); ++ __ bne(AT, R0, notDouble); ++ __ delayed()->nop(); ++ ++ // dtos ++ __ dsll(T2, T2, Address::times_8); ++ if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) { ++ __ gsldxc1(FSF, T3, T2, base_offset); ++ } else { ++ __ daddu(AT, T3, T2); ++ __ ldc1(FSF, AT, base_offset); ++ } ++ __ push(dtos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notDouble); ++ __ daddiu(AT, T1, -JVM_CONSTANT_Long); ++ __ bne(AT, R0, notLong); ++ __ delayed()->nop(); ++ ++ // ltos ++ __ dsll(T2, T2, Address::times_8); ++ if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) { ++ __ gsldx(FSR, T3, T2, base_offset); ++ } else { ++ __ daddu(AT, T3, T2); ++ __ ld(FSR, AT, base_offset); ++ } ++ __ push(ltos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notLong); ++ condy_helper(Done); ++ ++ __ bind(Done); ++} ++ ++// we compute the actual local variable address here ++// the x86 dont do so for it has scaled index memory access model, we dont have, so do here ++void TemplateTable::locals_index(Register reg, int offset) { ++ __ lbu(reg, at_bcp(offset)); ++ __ dsll(reg, reg, Address::times_8); ++ __ dsubu(reg, LVP, reg); ++} ++ ++void TemplateTable::iload() { ++ iload_internal(); ++} ++ ++void TemplateTable::nofast_iload() { ++ iload_internal(may_not_rewrite); ++} ++ ++// this method will do bytecode folding of the two form: ++// iload iload iload caload ++// used register : T2, T3 ++// T2 : bytecode ++// T3 : folded code ++void TemplateTable::iload_internal(RewriteControl rc) { ++ transition(vtos, itos); ++ if (RewriteFrequentPairs && rc == may_rewrite) { ++ Label rewrite, done; ++ // get the next bytecode in T2 ++ __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); ++ // if _iload, wait to rewrite to iload2. We only want to rewrite the ++ // last two iloads in a pair. Comparing against fast_iload means that ++ // the next bytecode is neither an iload or a caload, and therefore ++ // an iload pair. ++ __ move(AT, Bytecodes::_iload); ++ __ beq(AT, T2, done); ++ __ delayed()->nop(); ++ ++ __ move(T3, Bytecodes::_fast_iload2); ++ __ move(AT, Bytecodes::_fast_iload); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // if _caload, rewrite to fast_icaload ++ __ move(T3, Bytecodes::_fast_icaload); ++ __ move(AT, Bytecodes::_caload); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // rewrite so iload doesn't check again. ++ __ move(T3, Bytecodes::_fast_iload); ++ ++ // rewrite ++ // T3 : fast bytecode ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_iload, T3, T2, false); ++ __ bind(done); ++ } ++ ++ // Get the local value into tos ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fast_iload2() { ++ transition(vtos, itos); ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++ __ push(itos); ++ locals_index(T2, 3); ++ __ lw(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fast_iload() { ++ transition(vtos, itos); ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::lload() { ++ transition(vtos, ltos); ++ locals_index(T2); ++ __ ld(FSR, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fload() { ++ transition(vtos, ftos); ++ locals_index(T2); ++ __ lwc1(FSF, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::dload() { ++ transition(vtos, dtos); ++ locals_index(T2); ++ __ ldc1(FSF, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::aload() { ++ transition(vtos, atos); ++ locals_index(T2); ++ __ ld(FSR, T2, 0); ++} ++ ++void TemplateTable::locals_index_wide(Register reg) { ++ __ get_unsigned_2_byte_index_at_bcp(reg, 2); ++ __ dsll(reg, reg, Address::times_8); ++ __ dsubu(reg, LVP, reg); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_iload() { ++ transition(vtos, itos); ++ locals_index_wide(T2); ++ __ ld(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_lload() { ++ transition(vtos, ltos); ++ locals_index_wide(T2); ++ __ ld(FSR, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_fload() { ++ transition(vtos, ftos); ++ locals_index_wide(T2); ++ __ lwc1(FSF, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_dload() { ++ transition(vtos, dtos); ++ locals_index_wide(T2); ++ __ ldc1(FSF, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_aload() { ++ transition(vtos, atos); ++ locals_index_wide(T2); ++ __ ld(FSR, T2, 0); ++} ++ ++// we use A2 as the regiser for index, BE CAREFUL! ++// we dont use our tge 29 now, for later optimization ++void TemplateTable::index_check(Register array, Register index) { ++ // Pop ptr into array ++ __ pop_ptr(array); ++ index_check_without_pop(array, index); ++} ++ ++void TemplateTable::index_check_without_pop(Register array, Register index) { ++ // destroys A2 ++ // check array ++ __ null_check(array, arrayOopDesc::length_offset_in_bytes()); ++ ++ // sign extend since tos (index) might contain garbage in upper bits ++ __ sll(index, index, 0); ++ ++ // check index ++ Label ok; ++ __ lw(AT, array, arrayOopDesc::length_offset_in_bytes()); ++#ifndef OPT_RANGECHECK ++ __ sltu(AT, index, AT); ++ __ bne(AT, R0, ok); ++ __ delayed()->nop(); ++ ++ //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2 ++ if (A1 != array) __ move(A1, array); ++ if (A2 != index) __ move(A2, index); ++ __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); ++ __ delayed()->nop(); ++ __ bind(ok); ++#else ++ __ lw(AT, array, arrayOopDesc::length_offset_in_bytes()); ++ __ move(A2, index); ++ __ tgeu(A2, AT, 29); ++#endif ++} ++ ++void TemplateTable::iaload() { ++ transition(itos, itos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, 2); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT)); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, 2); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT)); ++ ++ __ warn("iaload Unimplemented yet"); ++ __ gslwle(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, 2); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::laload() { ++ transition(itos, ltos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, Address::times_8); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); ++ ++ __ warn("laload Unimplemented yet"); ++ __ gsldle(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(AT, FSR, Address::times_8); ++ __ daddu(T9, SSR, AT); ++ __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T9, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::faload() { ++ transition(itos, ftos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ shl(FSR, 2); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ shl(AT, 2); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); ++ ++ __ warn("faload Unimplemented yet"); ++ __ gslwlec1(FSF, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ shl(FSR, 2); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::daload() { ++ transition(itos, dtos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, 3); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, 3); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); ++ ++ __ warn("daload Unimplemented yet"); ++ __ gsldlec1(FSF, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(AT, FSR, 3); ++ __ daddu(T9, SSR, AT); ++ __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T9, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::aaload() { ++ transition(itos, atos); ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, UseCompressedOops ? Address::times_4 : Address::times_8); ++ __ daddu(FSR, SSR, FSR); ++ //add for compressedoops ++ do_oop_load(_masm, ++ Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), ++ FSR, ++ IS_ARRAY); ++} ++ ++void TemplateTable::baload() { ++ transition(itos, itos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR:index ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound ++ ++ __ warn("baload Unimplemented yet"); ++ __ gslble(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::caload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, Address::times_2); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); ++} ++ ++// iload followed by caload frequent pair ++// used register : T2 ++// T2 : index ++void TemplateTable::fast_icaload() { ++ transition(vtos, itos); ++ // load index out of locals ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, 1); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); ++} ++ ++void TemplateTable::saload() { ++ transition(itos, itos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, Address::times_2); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, Address::times_2); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT)); ++ ++ __ warn("saload Unimplemented yet"); ++ __ gslhle(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, Address::times_2); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::iload(int n) { ++ transition(vtos, itos); ++ __ lw(FSR, iaddress(n)); ++} ++ ++void TemplateTable::lload(int n) { ++ transition(vtos, ltos); ++ __ ld(FSR, laddress(n)); ++} ++ ++void TemplateTable::fload(int n) { ++ transition(vtos, ftos); ++ __ lwc1(FSF, faddress(n)); ++} ++ ++void TemplateTable::dload(int n) { ++ transition(vtos, dtos); ++ __ ldc1(FSF, laddress(n)); ++} ++ ++void TemplateTable::aload(int n) { ++ transition(vtos, atos); ++ __ ld(FSR, aaddress(n)); ++} ++ ++void TemplateTable::aload_0() { ++ aload_0_internal(); ++} ++ ++void TemplateTable::nofast_aload_0() { ++ aload_0_internal(may_not_rewrite); ++} ++ ++// used register : T2, T3 ++// T2 : bytecode ++// T3 : folded code ++void TemplateTable::aload_0_internal(RewriteControl rc) { ++ transition(vtos, atos); ++ // According to bytecode histograms, the pairs: ++ // ++ // _aload_0, _fast_igetfield ++ // _aload_0, _fast_agetfield ++ // _aload_0, _fast_fgetfield ++ // ++ // occur frequently. If RewriteFrequentPairs is set, the (slow) ++ // _aload_0 bytecode checks if the next bytecode is either ++ // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then ++ // rewrites the current bytecode into a pair bytecode; otherwise it ++ // rewrites the current bytecode into _fast_aload_0 that doesn't do ++ // the pair check anymore. ++ // ++ // Note: If the next bytecode is _getfield, the rewrite must be ++ // delayed, otherwise we may miss an opportunity for a pair. ++ // ++ // Also rewrite frequent pairs ++ // aload_0, aload_1 ++ // aload_0, iload_1 ++ // These bytecodes with a small amount of code are most profitable ++ // to rewrite ++ if (RewriteFrequentPairs && rc == may_rewrite) { ++ Label rewrite, done; ++ // get the next bytecode in T2 ++ __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); ++ ++ // do actual aload_0 ++ aload(0); ++ ++ // if _getfield then wait with rewrite ++ __ move(AT, Bytecodes::_getfield); ++ __ beq(AT, T2, done); ++ __ delayed()->nop(); ++ ++ // if _igetfield then reqrite to _fast_iaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_iaccess_0); ++ __ move(AT, Bytecodes::_fast_igetfield); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // if _agetfield then reqrite to _fast_aaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_aaccess_0); ++ __ move(AT, Bytecodes::_fast_agetfield); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // if _fgetfield then reqrite to _fast_faccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_faccess_0); ++ __ move(AT, Bytecodes::_fast_fgetfield); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // else rewrite to _fast_aload0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_aload_0); ++ ++ // rewrite ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_aload_0, T3, T2, false); ++ ++ __ bind(done); ++ } else { ++ aload(0); ++ } ++} ++ ++void TemplateTable::istore() { ++ transition(itos, vtos); ++ locals_index(T2); ++ __ sw(FSR, T2, 0); ++} ++ ++void TemplateTable::lstore() { ++ transition(ltos, vtos); ++ locals_index(T2); ++ __ sd(FSR, T2, -wordSize); ++} ++ ++void TemplateTable::fstore() { ++ transition(ftos, vtos); ++ locals_index(T2); ++ __ swc1(FSF, T2, 0); ++} ++ ++void TemplateTable::dstore() { ++ transition(dtos, vtos); ++ locals_index(T2); ++ __ sdc1(FSF, T2, -wordSize); ++} ++ ++void TemplateTable::astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ locals_index(T2); ++ __ sd(FSR, T2, 0); ++} ++ ++void TemplateTable::wide_istore() { ++ transition(vtos, vtos); ++ __ pop_i(FSR); ++ locals_index_wide(T2); ++ __ sd(FSR, T2, 0); ++} ++ ++void TemplateTable::wide_lstore() { ++ transition(vtos, vtos); ++ __ pop_l(FSR); ++ locals_index_wide(T2); ++ __ sd(FSR, T2, -wordSize); ++} ++ ++void TemplateTable::wide_fstore() { ++ wide_istore(); ++} ++ ++void TemplateTable::wide_dstore() { ++ wide_lstore(); ++} ++ ++void TemplateTable::wide_astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ locals_index_wide(T2); ++ __ sd(FSR, T2, 0); ++} ++ ++// used register : T2 ++void TemplateTable::iastore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); // T2: array SSR: index ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T2); ++ __ dsll(SSR, SSR, Address::times_4); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_4); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT)); //bound ++ ++ __ warn("iastore Unimplemented yet"); ++ __ gsswle(FSR, SSR, AT); ++ } else { ++ index_check(T2, SSR); // prefer index in SSR ++ __ dsll(SSR, SSR, Address::times_4); ++ __ daddu(T2, T2, SSR); ++ __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg); ++ } ++} ++ ++ ++ ++// used register T2, T3 ++void TemplateTable::lastore() { ++ transition(ltos, vtos); ++ __ pop_i (T2); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T3); ++ __ dsll(T2, T2, Address::times_8); ++ __ daddu(T2, T3, T2); ++ __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); // base ++ ++ __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); //bound ++ ++ __ warn("lastore Unimplemented yet"); ++ __ gssdle(FSR, T2, AT); ++ } else { ++ index_check(T3, T2); ++ __ dsll(T2, T2, Address::times_8); ++ __ daddu(T3, T3, T2); ++ __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg); ++ } ++} ++ ++// used register T2 ++void TemplateTable::fastore() { ++ transition(ftos, vtos); ++ __ pop_i(SSR); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T2); ++ __ dsll(SSR, SSR, Address::times_4); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_4); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); //bound ++ ++ __ warn("fastore Unimplemented yet"); ++ __ gsswlec1(FSF, SSR, AT); ++ } else { ++ index_check(T2, SSR); ++ __ dsll(SSR, SSR, Address::times_4); ++ __ daddu(T2, T2, SSR); ++ __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg); ++ } ++} ++ ++// used register T2, T3 ++void TemplateTable::dastore() { ++ transition(dtos, vtos); ++ __ pop_i (T2); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T3); ++ __ dsll(T2, T2, Address::times_8); ++ __ daddu(T2, T3, T2); ++ __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); // base ++ ++ __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); //bound ++ ++ __ warn("dastore Unimplemented yet"); ++ __ gssdlec1(FSF, T2, AT); ++ } else { ++ index_check(T3, T2); ++ __ dsll(T2, T2, Address::times_8); ++ __ daddu(T3, T3, T2); ++ __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg); ++ } ++} ++ ++// used register : T2, T3, T8 ++// T2 : array ++// T3 : subklass ++// T8 : supklass ++void TemplateTable::aastore() { ++ Label is_null, ok_is_subtype, done; ++ transition(vtos, vtos); ++ // stack: ..., array, index, value ++ __ ld(FSR, at_tos()); // Value ++ __ lw(SSR, at_tos_p1()); // Index ++ __ ld(T2, at_tos_p2()); // Array ++ ++ // index_check(T2, SSR); ++ index_check_without_pop(T2, SSR); ++ // do array store check - check for NULL value first ++ __ beq(FSR, R0, is_null); ++ __ delayed()->nop(); ++ ++ // Move subklass into T3 ++ //add for compressedoops ++ __ load_klass(T3, FSR); ++ // Move superklass into T8 ++ //add for compressedoops ++ __ load_klass(T8, T2); ++ __ ld(T8, Address(T8, ObjArrayKlass::element_klass_offset())); ++ // Compress array+index*4+12 into a single register. T2 ++ __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8); ++ __ daddu(T2, T2, AT); ++ __ daddiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ ++ // Generate subtype check. ++ // Superklass in T8. Subklass in T3. ++ __ gen_subtype_check(T8, T3, ok_is_subtype); ++ // Come here on failure ++ // object is at FSR ++ __ jmp(Interpreter::_throw_ArrayStoreException_entry); ++ __ delayed()->nop(); ++ // Come here on success ++ __ bind(ok_is_subtype); ++ do_oop_store(_masm, Address(T2, 0), FSR, IS_ARRAY); ++ __ b(done); ++ __ delayed()->nop(); ++ ++ // Have a NULL in FSR, T2=array, SSR=index. Store NULL at ary[idx] ++ __ bind(is_null); ++ __ profile_null_seen(T9); ++ __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8); ++ __ daddu(T2, T2, AT); ++ do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, IS_ARRAY); ++ ++ __ bind(done); ++ __ daddiu(SP, SP, 3 * Interpreter::stackElementSize); ++} ++ ++void TemplateTable::bastore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); ++ if(UseBoundCheckInstruction) { ++ guarantee(false, "unimplemented yet!"); ++ __ pop_ptr(T2); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound ++ ++ __ warn("bastore Unimplemented yet"); ++ __ gssble(FSR, SSR, AT); ++ } else { ++ index_check(T2, SSR); ++ ++ // Need to check whether array is boolean or byte ++ // since both types share the bastore bytecode. ++ __ load_klass(T9, T2); ++ __ lw(T9, T9, in_bytes(Klass::layout_helper_offset())); ++ ++ int diffbit = Klass::layout_helper_boolean_diffbit(); ++ __ move(AT, diffbit); ++ ++ Label L_skip; ++ __ andr(AT, T9, AT); ++ __ beq(AT, R0, L_skip); ++ __ delayed()->nop(); ++ __ andi(FSR, FSR, 0x1); ++ __ bind(L_skip); ++ ++ __ daddu(SSR, T2, SSR); ++ __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg); ++ } ++} ++ ++void TemplateTable::castore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T2); ++ __ dsll(SSR, SSR, Address::times_2); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_2); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR)); //bound ++ ++ __ warn("castore Unimplemented yet"); ++ __ gsshle(FSR, SSR, AT); ++ } else { ++ index_check(T2, SSR); ++ __ dsll(SSR, SSR, Address::times_2); ++ __ daddu(SSR, T2, SSR); ++ __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg); ++ } ++} ++ ++void TemplateTable::sastore() { ++ castore(); ++} ++ ++void TemplateTable::istore(int n) { ++ transition(itos, vtos); ++ __ sw(FSR, iaddress(n)); ++} ++ ++void TemplateTable::lstore(int n) { ++ transition(ltos, vtos); ++ __ sd(FSR, laddress(n)); ++} ++ ++void TemplateTable::fstore(int n) { ++ transition(ftos, vtos); ++ __ swc1(FSF, faddress(n)); ++} ++ ++void TemplateTable::dstore(int n) { ++ transition(dtos, vtos); ++ __ sdc1(FSF, laddress(n)); ++} ++ ++void TemplateTable::astore(int n) { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ __ sd(FSR, aaddress(n)); ++} ++ ++void TemplateTable::pop() { ++ transition(vtos, vtos); ++ __ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void TemplateTable::pop2() { ++ transition(vtos, vtos); ++ __ daddiu(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void TemplateTable::dup() { ++ transition(vtos, vtos); ++ // stack: ..., a ++ __ load_ptr(0, FSR); ++ __ push_ptr(FSR); ++ // stack: ..., a, a ++} ++ ++// blows FSR ++void TemplateTable::dup_x1() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ load_ptr(0, FSR); // load b ++ __ load_ptr(1, A5); // load a ++ __ store_ptr(1, FSR); // store b ++ __ store_ptr(0, A5); // store a ++ __ push_ptr(FSR); // push b ++ // stack: ..., b, a, b ++} ++ ++// blows FSR ++void TemplateTable::dup_x2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ load_ptr(0, FSR); // load c ++ __ load_ptr(2, A5); // load a ++ __ store_ptr(2, FSR); // store c in a ++ __ push_ptr(FSR); // push c ++ // stack: ..., c, b, c, c ++ __ load_ptr(2, FSR); // load b ++ __ store_ptr(2, A5); // store a in b ++ // stack: ..., c, a, c, c ++ __ store_ptr(1, FSR); // store b in c ++ // stack: ..., c, a, b, c ++} ++ ++// blows FSR ++void TemplateTable::dup2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ load_ptr(1, FSR); // load a ++ __ push_ptr(FSR); // push a ++ __ load_ptr(1, FSR); // load b ++ __ push_ptr(FSR); // push b ++ // stack: ..., a, b, a, b ++} ++ ++// blows FSR ++void TemplateTable::dup2_x1() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ load_ptr(0, T2); // load c ++ __ load_ptr(1, FSR); // load b ++ __ push_ptr(FSR); // push b ++ __ push_ptr(T2); // push c ++ // stack: ..., a, b, c, b, c ++ __ store_ptr(3, T2); // store c in b ++ // stack: ..., a, c, c, b, c ++ __ load_ptr(4, T2); // load a ++ __ store_ptr(2, T2); // store a in 2nd c ++ // stack: ..., a, c, a, b, c ++ __ store_ptr(4, FSR); // store b in a ++ // stack: ..., b, c, a, b, c ++ ++ // stack: ..., b, c, a, b, c ++} ++ ++// blows FSR, SSR ++void TemplateTable::dup2_x2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c, d ++ // stack: ..., a, b, c, d ++ __ load_ptr(0, T2); // load d ++ __ load_ptr(1, FSR); // load c ++ __ push_ptr(FSR); // push c ++ __ push_ptr(T2); // push d ++ // stack: ..., a, b, c, d, c, d ++ __ load_ptr(4, FSR); // load b ++ __ store_ptr(2, FSR); // store b in d ++ __ store_ptr(4, T2); // store d in b ++ // stack: ..., a, d, c, b, c, d ++ __ load_ptr(5, T2); // load a ++ __ load_ptr(3, FSR); // load c ++ __ store_ptr(3, T2); // store a in c ++ __ store_ptr(5, FSR); // store c in a ++ // stack: ..., c, d, a, b, c, d ++ ++ // stack: ..., c, d, a, b, c, d ++} ++ ++// blows FSR ++void TemplateTable::swap() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ ++ __ load_ptr(1, A5); // load a ++ __ load_ptr(0, FSR); // load b ++ __ store_ptr(0, A5); // store a in b ++ __ store_ptr(1, FSR); // store b in a ++ ++ // stack: ..., b, a ++} ++ ++void TemplateTable::iop2(Operation op) { ++ transition(itos, itos); ++ ++ __ pop_i(SSR); ++ switch (op) { ++ case add : __ addu32(FSR, SSR, FSR); break; ++ case sub : __ subu32(FSR, SSR, FSR); break; ++ case mul : __ mul(FSR, SSR, FSR); break; ++ case _and : __ andr(FSR, SSR, FSR); break; ++ case _or : __ orr(FSR, SSR, FSR); break; ++ case _xor : __ xorr(FSR, SSR, FSR); break; ++ case shl : __ sllv(FSR, SSR, FSR); break; ++ case shr : __ srav(FSR, SSR, FSR); break; ++ case ushr : __ srlv(FSR, SSR, FSR); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++// the result stored in FSR, SSR, ++// used registers : T2, T3 ++void TemplateTable::lop2(Operation op) { ++ transition(ltos, ltos); ++ __ pop_l(T2); ++ ++ switch (op) { ++ case add : __ daddu(FSR, T2, FSR); break; ++ case sub : __ dsubu(FSR, T2, FSR); break; ++ case _and: __ andr(FSR, T2, FSR); break; ++ case _or : __ orr(FSR, T2, FSR); break; ++ case _xor: __ xorr(FSR, T2, FSR); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception, ++// the result is 0x80000000 ++// the godson2 cpu do the same, so we need not handle this specially like x86 ++void TemplateTable::idiv() { ++ transition(itos, itos); ++ Label not_zero; ++ ++ __ bne(FSR, R0, not_zero); ++ __ delayed()->nop(); ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ __ bind(not_zero); ++ ++ __ pop_i(SSR); ++ if (UseLEXT1) { ++ __ gsdiv(FSR, SSR, FSR); ++ } else { ++ __ div(SSR, FSR); ++ __ mflo(FSR); ++ } ++} ++ ++void TemplateTable::irem() { ++ transition(itos, itos); ++ Label not_zero; ++ __ pop_i(SSR); ++ __ div(SSR, FSR); ++ ++ __ bne(FSR, R0, not_zero); ++ __ delayed()->nop(); ++ //__ brk(7); ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ ++ __ bind(not_zero); ++ __ mfhi(FSR); ++} ++ ++void TemplateTable::lmul() { ++ transition(ltos, ltos); ++ __ pop_l(T2); ++ if (UseLEXT1) { ++ __ gsdmult(FSR, T2, FSR); ++ } else { ++ __ dmult(T2, FSR); ++ __ mflo(FSR); ++ } ++} ++ ++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry ++void TemplateTable::ldiv() { ++ transition(ltos, ltos); ++ Label normal; ++ ++ __ bne(FSR, R0, normal); ++ __ delayed()->nop(); ++ ++ //__ brk(7); //generate FPE ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ ++ __ bind(normal); ++ __ pop_l(A2); ++ if (UseLEXT1) { ++ __ gsddiv(FSR, A2, FSR); ++ } else { ++ __ ddiv(A2, FSR); ++ __ mflo(FSR); ++ } ++} ++ ++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry ++void TemplateTable::lrem() { ++ transition(ltos, ltos); ++ Label normal; ++ ++ __ bne(FSR, R0, normal); ++ __ delayed()->nop(); ++ ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ ++ __ bind(normal); ++ __ pop_l (A2); ++ ++ if (UseLEXT1) { ++ __ gsdmod(FSR, A2, FSR); ++ } else { ++ __ ddiv(A2, FSR); ++ __ mfhi(FSR); ++ } ++} ++ ++// result in FSR ++// used registers : T0 ++void TemplateTable::lshl() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ dsllv(FSR, T0, FSR); ++} ++ ++// used registers : T0 ++void TemplateTable::lshr() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ dsrav(FSR, T0, FSR); ++} ++ ++// used registers : T0 ++void TemplateTable::lushr() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ dsrlv(FSR, T0, FSR); ++} ++ ++// result in FSF ++void TemplateTable::fop2(Operation op) { ++ transition(ftos, ftos); ++ switch (op) { ++ case add: ++ __ lwc1(FTF, at_sp()); ++ __ add_s(FSF, FTF, FSF); ++ break; ++ case sub: ++ __ lwc1(FTF, at_sp()); ++ __ sub_s(FSF, FTF, FSF); ++ break; ++ case mul: ++ __ lwc1(FTF, at_sp()); ++ __ mul_s(FSF, FTF, FSF); ++ break; ++ case div: ++ __ lwc1(FTF, at_sp()); ++ __ div_s(FSF, FTF, FSF); ++ break; ++ case rem: ++ __ mov_s(F13, FSF); ++ __ lwc1(F12, at_sp()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); ++ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ __ daddiu(SP, SP, 1 * wordSize); ++} ++ ++// result in SSF||FSF ++// i dont handle the strict flags ++void TemplateTable::dop2(Operation op) { ++ transition(dtos, dtos); ++ switch (op) { ++ case add: ++ __ ldc1(FTF, at_sp()); ++ __ add_d(FSF, FTF, FSF); ++ break; ++ case sub: ++ __ ldc1(FTF, at_sp()); ++ __ sub_d(FSF, FTF, FSF); ++ break; ++ case mul: ++ __ ldc1(FTF, at_sp()); ++ __ mul_d(FSF, FTF, FSF); ++ break; ++ case div: ++ __ ldc1(FTF, at_sp()); ++ __ div_d(FSF, FTF, FSF); ++ break; ++ case rem: ++ __ mov_d(F13, FSF); ++ __ ldc1(F12, at_sp()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); ++ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ __ daddiu(SP, SP, 2 * wordSize); ++} ++ ++void TemplateTable::ineg() { ++ transition(itos, itos); ++ __ subu32(FSR, R0, FSR); ++} ++ ++void TemplateTable::lneg() { ++ transition(ltos, ltos); ++ __ dsubu(FSR, R0, FSR); ++} ++ ++void TemplateTable::fneg() { ++ transition(ftos, ftos); ++ __ neg_s(FSF, FSF); ++} ++ ++void TemplateTable::dneg() { ++ transition(dtos, dtos); ++ __ neg_d(FSF, FSF); ++} ++ ++// used registers : T2 ++void TemplateTable::iinc() { ++ transition(vtos, vtos); ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++ __ lb(AT, at_bcp(2)); // get constant ++ __ daddu(FSR, FSR, AT); ++ __ sw(FSR, T2, 0); ++} ++ ++// used register : T2 ++void TemplateTable::wide_iinc() { ++ transition(vtos, vtos); ++ locals_index_wide(T2); ++ __ get_2_byte_integer_at_bcp(FSR, AT, 4); ++ __ hswap(FSR); ++ __ lw(AT, T2, 0); ++ __ daddu(FSR, AT, FSR); ++ __ sw(FSR, T2, 0); ++} ++ ++void TemplateTable::convert() { ++ // Checking ++#ifdef ASSERT ++ { ++ TosState tos_in = ilgl; ++ TosState tos_out = ilgl; ++ switch (bytecode()) { ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_in = itos; break; ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_l2d: tos_in = ltos; break; ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_f2d: tos_in = ftos; break; ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_d2l: // fall through ++ case Bytecodes::_d2f: tos_in = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ switch (bytecode()) { ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_out = itos; break; ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_d2l: tos_out = ltos; break; ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_d2f: tos_out = ftos; break; ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_l2d: // fall through ++ case Bytecodes::_f2d: tos_out = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ transition(tos_in, tos_out); ++ } ++#endif // ASSERT ++ ++ // Conversion ++ switch (bytecode()) { ++ case Bytecodes::_i2l: ++ __ sll(FSR, FSR, 0); ++ break; ++ case Bytecodes::_i2f: ++ __ mtc1(FSR, FSF); ++ __ cvt_s_w(FSF, FSF); ++ break; ++ case Bytecodes::_i2d: ++ __ mtc1(FSR, FSF); ++ __ cvt_d_w(FSF, FSF); ++ break; ++ case Bytecodes::_i2b: ++ __ seb(FSR, FSR); ++ break; ++ case Bytecodes::_i2c: ++ __ andi(FSR, FSR, 0xFFFF); // truncate upper 56 bits ++ break; ++ case Bytecodes::_i2s: ++ __ seh(FSR, FSR); ++ break; ++ case Bytecodes::_l2i: ++ __ sll(FSR, FSR, 0); ++ break; ++ case Bytecodes::_l2f: ++ __ dmtc1(FSR, FSF); ++ __ cvt_s_l(FSF, FSF); ++ break; ++ case Bytecodes::_l2d: ++ __ dmtc1(FSR, FSF); ++ __ cvt_d_l(FSF, FSF); ++ break; ++ case Bytecodes::_f2i: ++ { ++ Label L; ++ ++ __ trunc_w_s(F12, FSF); ++ __ move(AT, 0x7fffffff); ++ __ mfc1(FSR, F12); ++ __ c_un_s(FSF, FSF); //NaN? ++ __ movt(FSR, R0); ++ ++ __ bne(AT, FSR, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, FSF); ++ __ andr(AT, AT, T9); ++ ++ __ movn(FSR, T9, AT); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_f2l: ++ { ++ Label L; ++ ++ __ trunc_l_s(F12, FSF); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(FSR, F12); ++ __ c_un_s(FSF, FSF); //NaN? ++ __ movt(FSR, R0); ++ ++ __ bne(AT, FSR, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, FSF); ++ __ andr(AT, AT, T9); ++ ++ __ dsll32(T9, T9, 0); ++ __ movn(FSR, T9, AT); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_f2d: ++ __ cvt_d_s(FSF, FSF); ++ break; ++ case Bytecodes::_d2i: ++ { ++ Label L; ++ ++ __ trunc_w_d(F12, FSF); ++ __ move(AT, 0x7fffffff); ++ __ mfc1(FSR, F12); ++ ++ __ bne(FSR, AT, L); ++ __ delayed()->mtc1(R0, F12); ++ ++ __ cvt_d_w(F12, F12); ++ __ c_ult_d(FSF, F12); ++ __ bc1f(L); ++ __ delayed()->addiu(T9, R0, -1); ++ ++ __ c_un_d(FSF, FSF); //NaN? ++ __ subu32(FSR, T9, AT); ++ __ movt(FSR, R0); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_d2l: ++ { ++ Label L; ++ ++ __ trunc_l_d(F12, FSF); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(FSR, F12); ++ ++ __ bne(FSR, AT, L); ++ __ delayed()->mtc1(R0, F12); ++ ++ __ cvt_d_w(F12, F12); ++ __ c_ult_d(FSF, F12); ++ __ bc1f(L); ++ __ delayed()->daddiu(T9, R0, -1); ++ ++ __ c_un_d(FSF, FSF); //NaN? ++ __ subu(FSR, T9, AT); ++ __ movt(FSR, R0); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_d2f: ++ __ cvt_s_d(FSF, FSF); ++ break; ++ default : ++ ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::lcmp() { ++ transition(ltos, itos); ++ ++ __ pop(T0); ++ __ pop(R0); ++ ++ __ slt(AT, T0, FSR); ++ __ slt(FSR, FSR, T0); ++ __ subu(FSR, FSR, AT); ++} ++ ++void TemplateTable::float_cmp(bool is_float, int unordered_result) { ++ __ ori(FSR, R0, 1); ++ __ ori(AT, R0, 1); ++ ++ if (is_float) { ++ __ lwc1(FTF, at_sp()); ++ __ daddiu(SP, SP, 1 * wordSize); ++ if (unordered_result < 0) { ++ __ c_olt_s(FSF, FTF); ++ __ movf(FSR, R0); ++ __ c_ult_s(FTF, FSF); ++ } else { ++ __ c_ult_s(FSF, FTF); ++ __ movf(FSR, R0); ++ __ c_olt_s(FTF, FSF); ++ } ++ } else { ++ __ ldc1(FTF, at_sp()); ++ __ daddiu(SP, SP, 2 * wordSize); ++ if (unordered_result < 0) { ++ __ c_olt_d(FSF, FTF); ++ __ movf(FSR, R0); ++ __ c_ult_d(FTF, FSF); ++ } else { ++ __ c_ult_d(FSF, FTF); ++ __ movf(FSR, R0); ++ __ c_olt_d(FTF, FSF); ++ } ++ } ++ ++ __ movf(AT, R0); ++ __ subu(FSR, FSR, AT); ++} ++ ++ ++// used registers : T3, A7, Rnext ++// FSR : return bci, this is defined by the vm specification ++// T2 : MDO taken count ++// T3 : method ++// A7 : offset ++// Rnext : next bytecode, this is required by dispatch_base ++void TemplateTable::branch(bool is_jsr, bool is_wide) { ++ __ get_method(T3); ++ __ profile_taken_branch(A7, T2); // only C2 meaningful ++ ++ const ByteSize be_offset = MethodCounters::backedge_counter_offset() + ++ InvocationCounter::counter_offset(); ++ const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset(); ++ ++ // Load up T4 with the branch displacement ++ if (!is_wide) { ++ __ lb(A7, BCP, 1); ++ __ lbu(AT, BCP, 2); ++ __ dsll(A7, A7, 8); ++ __ orr(A7, A7, AT); ++ } else { ++ __ get_4_byte_integer_at_bcp(A7, AT, 1); ++ __ swap(A7); ++ } ++ ++ // Handle all the JSR stuff here, then exit. ++ // It's much shorter and cleaner than intermingling with the non-JSR ++ // normal-branch stuff occuring below. ++ if (is_jsr) { ++ // Pre-load the next target bytecode into Rnext ++ __ daddu(AT, BCP, A7); ++ __ lbu(Rnext, AT, 0); ++ ++ // compute return address as bci in FSR ++ __ daddiu(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset())); ++ __ ld(AT, T3, in_bytes(Method::const_offset())); ++ __ dsubu(FSR, FSR, AT); ++ // Adjust the bcp in BCP by the displacement in A7 ++ __ daddu(BCP, BCP, A7); ++ // jsr returns atos that is not an oop ++ // Push return address ++ __ push_i(FSR); ++ // jsr returns vtos ++ __ dispatch_only_noverify(vtos); ++ ++ return; ++ } ++ ++ // Normal (non-jsr) branch handling ++ ++ // Adjust the bcp in S0 by the displacement in T4 ++ __ daddu(BCP, BCP, A7); ++ ++ assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters"); ++ Label backedge_counter_overflow; ++ Label profile_method; ++ Label dispatch; ++ if (UseLoopCounter) { ++ // increment backedge counter for backward branches ++ // T3: method ++ // T4: target offset ++ // BCP: target bcp ++ // LVP: locals pointer ++ __ bgtz(A7, dispatch); // check if forward or backward branch ++ __ delayed()->nop(); ++ ++ // check if MethodCounters exists ++ Label has_counters; ++ __ ld(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP ++ __ bne(AT, R0, has_counters); ++ __ delayed()->nop(); ++ __ push(T3); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), ++ T3); ++ __ pop(T3); ++ __ ld(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP ++ __ beq(AT, R0, dispatch); ++ __ delayed()->nop(); ++ __ bind(has_counters); ++ ++ if (TieredCompilation) { ++ Label no_mdo; ++ int increment = InvocationCounter::count_increment; ++ int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld(T0, Address(T3, in_bytes(Method::method_data_offset()))); ++ __ beq(T0, R0, no_mdo); ++ __ delayed()->nop(); ++ // Increment the MDO backedge counter ++ const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, ++ T1, false, Assembler::zero, &backedge_counter_overflow); ++ __ beq(R0, R0, dispatch); ++ __ delayed()->nop(); ++ } ++ __ bind(no_mdo); ++ // Increment backedge counter in MethodCounters* ++ __ ld(T0, Address(T3, Method::method_counters_offset())); ++ __ increment_mask_and_jump(Address(T0, be_offset), increment, mask, ++ T1, false, Assembler::zero, &backedge_counter_overflow); ++ if (!UseOnStackReplacement) { ++ __ bind(backedge_counter_overflow); ++ } ++ } else { ++ // increment back edge counter ++ __ ld(T1, T3, in_bytes(Method::method_counters_offset())); ++ __ lw(T0, T1, in_bytes(be_offset)); ++ __ increment(T0, InvocationCounter::count_increment); ++ __ sw(T0, T1, in_bytes(be_offset)); ++ ++ // load invocation counter ++ __ lw(T1, T1, in_bytes(inv_offset)); ++ // buffer bit added, mask no needed ++ ++ // dadd backedge counter & invocation counter ++ __ daddu(T1, T1, T0); ++ ++ if (ProfileInterpreter) { ++ // Test to see if we should create a method data oop ++ // T1 : backedge counter & invocation counter ++ if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) { ++ __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T1, AT); ++ } ++ ++ __ bne(AT, R0, dispatch); ++ __ delayed()->nop(); ++ ++ // if no method data exists, go to profile method ++ __ test_method_data_pointer(T1, profile_method); ++ ++ if (UseOnStackReplacement) { ++ if (Assembler::is_simm16(InvocationCounter::InterpreterBackwardBranchLimit)) { ++ __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T2, AT); ++ } ++ ++ __ bne(AT, R0, dispatch); ++ __ delayed()->nop(); ++ ++ // When ProfileInterpreter is on, the backedge_count comes ++ // from the methodDataOop, which value does not get reset on ++ // the call to frequency_counter_overflow(). ++ // To avoid excessive calls to the overflow routine while ++ // the method is being compiled, dadd a second test to make ++ // sure the overflow function is called only once every ++ // overflow_frequency. ++ const int overflow_frequency = 1024; ++ __ andi(AT, T2, overflow_frequency-1); ++ __ beq(AT, R0, backedge_counter_overflow); ++ __ delayed()->nop(); ++ } ++ } else { ++ if (UseOnStackReplacement) { ++ // check for overflow against AT, which is the sum of the counters ++ __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T1, AT); ++ __ beq(AT, R0, backedge_counter_overflow); ++ __ delayed()->nop(); ++ } ++ } ++ } ++ __ bind(dispatch); ++ } ++ ++ // Pre-load the next target bytecode into Rnext ++ __ lbu(Rnext, BCP, 0); ++ ++ // continue with the bytecode @ target ++ // FSR: return bci for jsr's, unused otherwise ++ // Rnext: target bytecode ++ // BCP: target bcp ++ __ dispatch_only(vtos, true); ++ ++ if (UseLoopCounter) { ++ if (ProfileInterpreter) { ++ // Out-of-line code to allocate method data oop. ++ __ bind(profile_method); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); ++ __ set_method_data_pointer_for_bcp(); ++ __ b(dispatch); ++ __ delayed()->nop(); ++ } ++ ++ if (UseOnStackReplacement) { ++ // invocation counter overflow ++ __ bind(backedge_counter_overflow); ++ __ subu(A7, BCP, A7); // branch bcp ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), A7); ++ ++ // V0: osr nmethod (osr ok) or NULL (osr not possible) ++ // V1: osr adapter frame return address ++ // LVP: locals pointer ++ // BCP: bcp ++ __ beq(V0, R0, dispatch); ++ __ delayed()->nop(); ++ // nmethod may have been invalidated (VM may block upon call_VM return) ++ __ lb(T3, V0, nmethod::state_offset()); ++ __ move(AT, nmethod::in_use); ++ __ bne(AT, T3, dispatch); ++ __ delayed()->nop(); ++ ++ // We have the address of an on stack replacement routine in rax. ++ // In preparation of invoking it, first we must migrate the locals ++ // and monitors from off the interpreter frame on the stack. ++ // Ensure to save the osr nmethod over the migration call, ++ // it will be preserved in Rnext. ++ __ move(Rnext, V0); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); ++ ++ // V0 is OSR buffer, move it to expected parameter location ++ // refer to osrBufferPointer in c1_LIRAssembler_mips.cpp ++ __ move(T0, V0); ++ ++ // pop the interpreter frame ++ __ ld(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); ++ //FIXME, shall we keep the return address on the stack? ++ __ leave(); // remove frame anchor ++ __ move(LVP, RA); ++ __ move(SP, A7); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP , SP , AT); ++ ++ // push the (possibly adjusted) return address ++ //refer to osr_entry in c1_LIRAssembler_mips.cpp ++ __ ld(AT, Rnext, nmethod::osr_entry_point_offset()); ++ __ jr(AT); ++ __ delayed()->nop(); ++ } ++ } ++} ++ ++ ++void TemplateTable::if_0cmp(Condition cc) { ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ switch(cc) { ++ case not_equal: ++ __ beq(FSR, R0, not_taken); ++ break; ++ case equal: ++ __ bne(FSR, R0, not_taken); ++ break; ++ case less: ++ __ bgez(FSR, not_taken); ++ break; ++ case less_equal: ++ __ bgtz(FSR, not_taken); ++ break; ++ case greater: ++ __ blez(FSR, not_taken); ++ break; ++ case greater_equal: ++ __ bltz(FSR, not_taken); ++ break; ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++void TemplateTable::if_icmp(Condition cc) { ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ ++ __ pop_i(SSR); ++ switch(cc) { ++ case not_equal: ++ __ beq(SSR, FSR, not_taken); ++ break; ++ case equal: ++ __ bne(SSR, FSR, not_taken); ++ break; ++ case less: ++ __ slt(AT, SSR, FSR); ++ __ beq(AT, R0, not_taken); ++ break; ++ case less_equal: ++ __ slt(AT, FSR, SSR); ++ __ bne(AT, R0, not_taken); ++ break; ++ case greater: ++ __ slt(AT, FSR, SSR); ++ __ beq(AT, R0, not_taken); ++ break; ++ case greater_equal: ++ __ slt(AT, SSR, FSR); ++ __ bne(AT, R0, not_taken); ++ break; ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++void TemplateTable::if_nullcmp(Condition cc) { ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ switch(cc) { ++ case not_equal: ++ __ beq(FSR, R0, not_taken); ++ break; ++ case equal: ++ __ bne(FSR, R0, not_taken); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++ ++void TemplateTable::if_acmp(Condition cc) { ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ // __ lw(SSR, SP, 0); ++ __ pop_ptr(SSR); ++ switch(cc) { ++ case not_equal: ++ __ beq(SSR, FSR, not_taken); ++ break; ++ case equal: ++ __ bne(SSR, FSR, not_taken); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : method ++// T2 : returb bci ++void TemplateTable::ret() { ++ transition(vtos, vtos); ++ ++ locals_index(T2); ++ __ ld(T2, T2, 0); ++ __ profile_ret(T2, T3); ++ ++ __ get_method(T1); ++ __ ld(BCP, T1, in_bytes(Method::const_offset())); ++ __ daddu(BCP, BCP, T2); ++ __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); ++ ++ __ dispatch_next(vtos, 0, true); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : method ++// T2 : returb bci ++void TemplateTable::wide_ret() { ++ transition(vtos, vtos); ++ ++ locals_index_wide(T2); ++ __ ld(T2, T2, 0); // get return bci, compute return bcp ++ __ profile_ret(T2, T3); ++ ++ __ get_method(T1); ++ __ ld(BCP, T1, in_bytes(Method::const_offset())); ++ __ daddu(BCP, BCP, T2); ++ __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); ++ ++ __ dispatch_next(vtos, 0, true); ++} ++ ++// used register T2, T3, A7, Rnext ++// T2 : bytecode pointer ++// T3 : low ++// A7 : high ++// Rnext : dest bytecode, required by dispatch_base ++void TemplateTable::tableswitch() { ++ Label default_case, continue_execution; ++ transition(itos, vtos); ++ ++ // align BCP ++ __ daddiu(T2, BCP, BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(T2, T2, AT); ++ ++ // load lo & hi ++ __ lw(T3, T2, 1 * BytesPerInt); ++ __ swap(T3); ++ __ lw(A7, T2, 2 * BytesPerInt); ++ __ swap(A7); ++ ++ // check against lo & hi ++ __ slt(AT, FSR, T3); ++ __ bne(AT, R0, default_case); ++ __ delayed()->nop(); ++ ++ __ slt(AT, A7, FSR); ++ __ bne(AT, R0, default_case); ++ __ delayed()->nop(); ++ ++ // lookup dispatch offset, in A7 big endian ++ __ dsubu(FSR, FSR, T3); ++ __ dsll(AT, FSR, Address::times_4); ++ __ daddu(AT, T2, AT); ++ __ lw(A7, AT, 3 * BytesPerInt); ++ __ profile_switch_case(FSR, T9, T3); ++ ++ __ bind(continue_execution); ++ __ swap(A7); ++ __ daddu(BCP, BCP, A7); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++ ++ // handle default ++ __ bind(default_case); ++ __ profile_switch_default(FSR); ++ __ lw(A7, T2, 0); ++ __ b(continue_execution); ++ __ delayed()->nop(); ++} ++ ++void TemplateTable::lookupswitch() { ++ transition(itos, itos); ++ __ stop("lookupswitch bytecode should have been rewritten"); ++} ++ ++// used registers : T2, T3, A7, Rnext ++// T2 : bytecode pointer ++// T3 : pair index ++// A7 : offset ++// Rnext : dest bytecode ++// the data after the opcode is the same as lookupswitch ++// see Rewriter::rewrite_method for more information ++void TemplateTable::fast_linearswitch() { ++ transition(itos, vtos); ++ Label loop_entry, loop, found, continue_execution; ++ ++ // swap FSR so we can avoid swapping the table entries ++ __ swap(FSR); ++ ++ // align BCP ++ __ daddiu(T2, BCP, BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(T2, T2, AT); ++ ++ // set counter ++ __ lw(T3, T2, BytesPerInt); ++ __ swap(T3); ++ __ b(loop_entry); ++ __ delayed()->nop(); ++ ++ // table search ++ __ bind(loop); ++ // get the entry value ++ __ dsll(AT, T3, Address::times_8); ++ __ daddu(AT, T2, AT); ++ __ lw(AT, AT, 2 * BytesPerInt); ++ ++ // found? ++ __ beq(FSR, AT, found); ++ __ delayed()->nop(); ++ ++ __ bind(loop_entry); ++ __ bgtz(T3, loop); ++ __ delayed()->daddiu(T3, T3, -1); ++ ++ // default case ++ __ profile_switch_default(FSR); ++ __ lw(A7, T2, 0); ++ __ b(continue_execution); ++ __ delayed()->nop(); ++ ++ // entry found -> get offset ++ __ bind(found); ++ __ dsll(AT, T3, Address::times_8); ++ __ daddu(AT, T2, AT); ++ __ lw(A7, AT, 3 * BytesPerInt); ++ __ profile_switch_case(T3, FSR, T2); ++ ++ // continue execution ++ __ bind(continue_execution); ++ __ swap(A7); ++ __ daddu(BCP, BCP, A7); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++} ++ ++// used registers : T0, T1, T2, T3, A7, Rnext ++// T2 : pairs address(array) ++// Rnext : dest bytecode ++// the data after the opcode is the same as lookupswitch ++// see Rewriter::rewrite_method for more information ++void TemplateTable::fast_binaryswitch() { ++ transition(itos, vtos); ++ // Implementation using the following core algorithm: ++ // ++ // int binary_search(int key, LookupswitchPair* array, int n) { ++ // // Binary search according to "Methodik des Programmierens" by ++ // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. ++ // int i = 0; ++ // int j = n; ++ // while (i+1 < j) { ++ // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) ++ // // with Q: for all i: 0 <= i < n: key < a[i] ++ // // where a stands for the array and assuming that the (inexisting) ++ // // element a[n] is infinitely big. ++ // int h = (i + j) >> 1; ++ // // i < h < j ++ // if (key < array[h].fast_match()) { ++ // j = h; ++ // } else { ++ // i = h; ++ // } ++ // } ++ // // R: a[i] <= key < a[i+1] or Q ++ // // (i.e., if key is within array, i is the correct index) ++ // return i; ++ // } ++ ++ // register allocation ++ const Register array = T2; ++ const Register i = T3, j = A7; ++ const Register h = T1; ++ const Register temp = T0; ++ const Register key = FSR; ++ ++ // setup array ++ __ daddiu(array, BCP, 3*BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(array, array, AT); ++ ++ // initialize i & j ++ __ move(i, R0); ++ __ lw(j, array, - 1 * BytesPerInt); ++ // Convert j into native byteordering ++ __ swap(j); ++ ++ // and start ++ Label entry; ++ __ b(entry); ++ __ delayed()->nop(); ++ ++ // binary search loop ++ { ++ Label loop; ++ __ bind(loop); ++ // int h = (i + j) >> 1; ++ __ daddu(h, i, j); ++ __ dsrl(h, h, 1); ++ // if (key < array[h].fast_match()) { ++ // j = h; ++ // } else { ++ // i = h; ++ // } ++ // Convert array[h].match to native byte-ordering before compare ++ __ dsll(AT, h, Address::times_8); ++ __ daddu(AT, array, AT); ++ __ lw(temp, AT, 0 * BytesPerInt); ++ __ swap(temp); ++ ++ __ slt(AT, key, temp); ++ __ movz(i, h, AT); ++ __ movn(j, h, AT); ++ ++ // while (i+1 < j) ++ __ bind(entry); ++ __ daddiu(h, i, 1); ++ __ slt(AT, h, j); ++ __ bne(AT, R0, loop); ++ __ delayed()->nop(); ++ } ++ ++ // end of binary search, result index is i (must check again!) ++ Label default_case; ++ // Convert array[i].match to native byte-ordering before compare ++ __ dsll(AT, i, Address::times_8); ++ __ daddu(AT, array, AT); ++ __ lw(temp, AT, 0 * BytesPerInt); ++ __ swap(temp); ++ __ bne(key, temp, default_case); ++ __ delayed()->nop(); ++ ++ // entry found -> j = offset ++ __ dsll(AT, i, Address::times_8); ++ __ daddu(AT, array, AT); ++ __ lw(j, AT, 1 * BytesPerInt); ++ __ profile_switch_case(i, key, array); ++ __ swap(j); ++ ++ __ daddu(BCP, BCP, j); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++ ++ // default case -> j = default offset ++ __ bind(default_case); ++ __ profile_switch_default(i); ++ __ lw(j, array, - 2 * BytesPerInt); ++ __ swap(j); ++ __ daddu(BCP, BCP, j); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++} ++ ++void TemplateTable::_return(TosState state) { ++ transition(state, state); ++ assert(_desc->calls_vm(), ++ "inconsistent calls_vm information"); // call in remove_activation ++ ++ if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { ++ assert(state == vtos, "only valid state"); ++ __ ld(T1, aaddress(0)); ++ __ load_klass(LVP, T1); ++ __ lw(LVP, LVP, in_bytes(Klass::access_flags_offset())); ++ __ move(AT, JVM_ACC_HAS_FINALIZER); ++ __ andr(AT, AT, LVP); ++ Label skip_register_finalizer; ++ __ beq(AT, R0, skip_register_finalizer); ++ __ delayed()->nop(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::register_finalizer), T1); ++ __ bind(skip_register_finalizer); ++ } ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) { ++ Label no_safepoint; ++ NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll")); ++ __ lb(AT, thread, in_bytes(Thread::polling_page_offset())); ++ __ andi(AT, AT, SafepointMechanism::poll_bit()); ++ __ beq(AT, R0, no_safepoint); ++ __ delayed()->nop(); ++ __ push(state); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::at_safepoint)); ++ __ pop(state); ++ __ bind(no_safepoint); ++ } ++ ++ // Narrow result if state is itos but result type is smaller. ++ // Need to narrow in the return bytecode rather than in generate_return_entry ++ // since compiled code callers expect the result to already be narrowed. ++ if (state == itos) { ++ __ narrow(FSR); ++ } ++ ++ __ remove_activation(state, T9); ++ __ sync(); ++ ++ __ jr(T9); ++ __ delayed()->nop(); ++} ++ ++// ---------------------------------------------------------------------------- ++// Volatile variables demand their effects be made known to all CPU's ++// in order. Store buffers on most chips allow reads & writes to ++// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode ++// without some kind of memory barrier (i.e., it's not sufficient that ++// the interpreter does not reorder volatile references, the hardware ++// also must not reorder them). ++// ++// According to the new Java Memory Model (JMM): ++// (1) All volatiles are serialized wrt to each other. ALSO reads & ++// writes act as aquire & release, so: ++// (2) A read cannot let unrelated NON-volatile memory refs that ++// happen after the read float up to before the read. It's OK for ++// non-volatile memory refs that happen before the volatile read to ++// float down below it. ++// (3) Similar a volatile write cannot let unrelated NON-volatile ++// memory refs that happen BEFORE the write float down to after the ++// write. It's OK for non-volatile memory refs that happen after the ++// volatile write to float up before it. ++// ++// We only put in barriers around volatile refs (they are expensive), ++// not _between_ memory refs (that would require us to track the ++// flavor of the previous memory refs). Requirements (2) and (3) ++// require some barriers before volatile stores and after volatile ++// loads. These nearly cover requirement (1) but miss the ++// volatile-store-volatile-load case. This final case is placed after ++// volatile-stores although it could just as well go before ++// volatile-loads. ++void TemplateTable::volatile_barrier() { ++ if(os::is_MP()) __ sync(); ++} ++ ++// we dont shift left 2 bits in get_cache_and_index_at_bcp ++// for we always need shift the index we use it. the ConstantPoolCacheEntry ++// is 16-byte long, index is the index in ++// ConstantPoolCache, so cache + base_offset() + index * 16 is ++// the corresponding ConstantPoolCacheEntry ++// used registers : T2 ++// NOTE : the returned index need also shift left 4 to get the address! ++void TemplateTable::resolve_cache_and_index(int byte_no, ++ Register Rcache, ++ Register index, ++ size_t index_size) { ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ const Register temp = A1; ++ assert_different_registers(Rcache, index); ++ ++ Label resolved; ++ ++ Bytecodes::Code code = bytecode(); ++ switch (code) { ++ case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; ++ case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; ++ default: break; ++ } ++ ++ __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); ++ // is resolved? ++ int i = (int)code; ++ __ addiu(temp, temp, -i); ++ __ beq(temp, R0, resolved); ++ __ delayed()->nop(); ++ ++ // resolve first time through ++ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); ++ ++ __ move(temp, i); ++ __ call_VM(NOREG, entry, temp); ++ ++ // Update registers with resolved info ++ __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); ++ __ bind(resolved); ++} ++ ++// The Rcache and index registers must be set before call ++void TemplateTable::load_field_cp_cache_entry(Register obj, ++ Register cache, ++ Register index, ++ Register off, ++ Register flags, ++ bool is_static = false) { ++ assert_different_registers(cache, index, flags, off); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ // Field offset ++ __ dsll(AT, index, Address::times_ptr); ++ __ daddu(AT, cache, AT); ++ __ ld(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset())); ++ // Flags ++ __ ld(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())); ++ ++ // klass overwrite register ++ if (is_static) { ++ __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())); ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ __ ld(obj, Address(obj, mirror_offset)); ++ ++ __ resolve_oop_handle(obj, T9); ++ } ++} ++ ++// get the method, itable_index and flags of the current invoke ++void TemplateTable::load_invoke_cp_cache_entry(int byte_no, ++ Register method, ++ Register itable_index, ++ Register flags, ++ bool is_invokevirtual, ++ bool is_invokevfinal, /*unused*/ ++ bool is_invokedynamic) { ++ // setup registers ++ const Register cache = T3; ++ const Register index = T1; ++ assert_different_registers(method, flags); ++ assert_different_registers(method, cache, index); ++ assert_different_registers(itable_index, flags); ++ assert_different_registers(itable_index, cache, index); ++ assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant"); ++ // determine constant pool cache field offsets ++ const int method_offset = in_bytes( ++ ConstantPoolCache::base_offset() + ++ ((byte_no == f2_byte) ++ ? ConstantPoolCacheEntry::f2_offset() ++ : ConstantPoolCacheEntry::f1_offset())); ++ const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::flags_offset()); ++ // access constant pool cache fields ++ const int index_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::f2_offset()); ++ ++ size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2)); ++ resolve_cache_and_index(byte_no, cache, index, index_size); ++ ++ //assert(wordSize == 8, "adjust code below"); ++ // note we shift 4 not 2, for we get is the true inde ++ // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version ++ __ dsll(AT, index, Address::times_ptr); ++ __ daddu(AT, cache, AT); ++ __ ld(method, AT, method_offset); ++ ++ if (itable_index != NOREG) { ++ __ ld(itable_index, AT, index_offset); ++ } ++ __ ld(flags, AT, flags_offset); ++} ++ ++// The registers cache and index expected to be set before call. ++// Correct values of the cache and index registers are preserved. ++void TemplateTable::jvmti_post_field_access(Register cache, Register index, ++ bool is_static, bool has_tos) { ++ // do the JVMTI work here to avoid disturbing the register state below ++ // We use c_rarg registers here because we want to use the register used in ++ // the call to the VM ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we ++ // take the time to call into the VM. ++ Label L1; ++ // kill FSR ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ assert_different_registers(cache, index, AT); ++ __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); ++ __ lw(AT, AT, 0); ++ __ beq(AT, R0, L1); ++ __ delayed()->nop(); ++ ++ __ get_cache_and_index_at_bcp(tmp2, tmp3, 1); ++ ++ // cache entry pointer ++ __ daddiu(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset())); ++ __ shl(tmp3, LogBytesPerWord); ++ __ daddu(tmp2, tmp2, tmp3); ++ if (is_static) { ++ __ move(tmp1, R0); ++ } else { ++ __ ld(tmp1, SP, 0); ++ __ verify_oop(tmp1); ++ } ++ // tmp1: object pointer or NULL ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_access), ++ tmp1, tmp2, tmp3); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++void TemplateTable::pop_and_check_object(Register r) { ++ __ pop_ptr(r); ++ __ null_check(r); // for field access must check obj. ++ __ verify_oop(r); ++} ++ ++// used registers : T1, T2, T3, T1 ++// T1 : flags ++// T2 : off ++// T3 : obj ++// T1 : field address ++// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the ++// following mapping to the TosState states: ++// btos: 0 ++// ctos: 1 ++// stos: 2 ++// itos: 3 ++// ltos: 4 ++// ftos: 5 ++// dtos: 6 ++// atos: 7 ++// vtos: 8 ++// see ConstantPoolCacheEntry::set_field for more info ++void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) { ++ transition(vtos, vtos); ++ ++ const Register cache = T3; ++ const Register index = T0; ++ ++ const Register obj = T3; ++ const Register off = T2; ++ const Register flags = T1; ++ ++ const Register scratch = T8; ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_access(cache, index, is_static, false); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); ++ ++ { ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, flags); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ if (!is_static) pop_and_check_object(obj); ++ __ daddu(index, obj, off); ++ ++ const Address field(index, 0); ++ ++ Label Done, notByte, notBool, notInt, notShort, notChar, ++ notLong, notFloat, notObj, notDouble; ++ ++ assert(btos == 0, "change code, btos != 0"); ++ __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); ++ __ bne(flags, R0, notByte); ++ __ delayed()->nop(); ++ ++ // btos ++ __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(btos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ ++ __ bind(notByte); ++ __ move(AT, ztos); ++ __ bne(flags, AT, notBool); ++ __ delayed()->nop(); ++ ++ // ztos ++ __ access_load_at(T_BOOLEAN, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(ztos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ ++ __ bind(notBool); ++ __ move(AT, itos); ++ __ bne(flags, AT, notInt); ++ __ delayed()->nop(); ++ ++ // itos ++ __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(itos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_igetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notInt); ++ __ move(AT, atos); ++ __ bne(flags, AT, notObj); ++ __ delayed()->nop(); ++ ++ // atos ++ //add for compressedoops ++ do_oop_load(_masm, Address(index, 0), FSR, IN_HEAP); ++ __ push(atos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_agetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notObj); ++ __ move(AT, ctos); ++ __ bne(flags, AT, notChar); ++ __ delayed()->nop(); ++ ++ // ctos ++ __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(ctos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notChar); ++ __ move(AT, stos); ++ __ bne(flags, AT, notShort); ++ __ delayed()->nop(); ++ ++ // stos ++ __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(stos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notShort); ++ __ move(AT, ltos); ++ __ bne(flags, AT, notLong); ++ __ delayed()->nop(); ++ ++ // FIXME : the load/store should be atomic, we have no simple method to do this in mips32 ++ // ltos ++ __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, FSR, field, noreg, noreg); ++ __ push(ltos); ++ ++ // Don't rewrite to _fast_lgetfield for potential volatile case. ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notLong); ++ __ move(AT, ftos); ++ __ bne(flags, AT, notFloat); ++ __ delayed()->nop(); ++ ++ // ftos ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); ++ __ push(ftos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notFloat); ++ __ move(AT, dtos); ++#ifdef ASSERT ++ __ bne(flags, AT, notDouble); ++ __ delayed()->nop(); ++#endif ++ ++ // dtos ++ __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg); ++ __ push(dtos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2); ++ } ++ ++#ifdef ASSERT ++ __ b(Done); ++ __ delayed()->nop(); ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++ ++void TemplateTable::getfield(int byte_no) { ++ getfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::nofast_getfield(int byte_no) { ++ getfield_or_static(byte_no, false, may_not_rewrite); ++} ++ ++void TemplateTable::getstatic(int byte_no) { ++ getfield_or_static(byte_no, true); ++} ++ ++// The registers cache and index expected to be set before call. ++// The function may destroy various registers, just not the cache and index registers. ++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { ++ transition(vtos, vtos); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L1; ++ //kill AT, T1, T2, T3, T9 ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T9; ++ assert_different_registers(cache, index, tmp4); ++ ++ __ li(AT, JvmtiExport::get_field_modification_count_addr()); ++ __ lw(AT, AT, 0); ++ __ beq(AT, R0, L1); ++ __ delayed()->nop(); ++ ++ __ get_cache_and_index_at_bcp(tmp2, tmp4, 1); ++ ++ if (is_static) { ++ __ move(tmp1, R0); ++ } else { ++ // Life is harder. The stack holds the value on top, followed by ++ // the object. We don't know the size of the value, though; it ++ // could be one or two words depending on its type. As a result, ++ // we must find the type to determine where the object is. ++ Label two_word, valsize_known; ++ __ dsll(AT, tmp4, Address::times_8); ++ __ daddu(AT, tmp2, AT); ++ __ ld(tmp3, AT, in_bytes(cp_base_offset + ++ ConstantPoolCacheEntry::flags_offset())); ++ __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift); ++ ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ __ move(tmp1, SP); ++ __ move(AT, ltos); ++ __ beq(tmp3, AT, two_word); ++ __ delayed()->nop(); ++ __ move(AT, dtos); ++ __ beq(tmp3, AT, two_word); ++ __ delayed()->nop(); ++ __ b(valsize_known); ++ __ delayed()->daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) ); ++ ++ __ bind(two_word); ++ __ daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2)); ++ ++ __ bind(valsize_known); ++ // setup object pointer ++ __ ld(tmp1, tmp1, 0*wordSize); ++ } ++ // cache entry pointer ++ __ daddiu(tmp2, tmp2, in_bytes(cp_base_offset)); ++ __ shl(tmp4, LogBytesPerWord); ++ __ daddu(tmp2, tmp2, tmp4); ++ // object (tos) ++ __ move(tmp3, SP); ++ // tmp1: object pointer set up above (NULL if static) ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ tmp1, tmp2, tmp3); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++// used registers : T0, T1, T2, T3, T8 ++// T1 : flags ++// T2 : off ++// T3 : obj ++// T8 : volatile bit ++// see ConstantPoolCacheEntry::set_field for more info ++void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { ++ transition(vtos, vtos); ++ ++ const Register cache = T3; ++ const Register index = T0; ++ const Register obj = T3; ++ const Register off = T2; ++ const Register flags = T1; ++ const Register bc = T3; ++ ++ const Register scratch = T8; ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_mod(cache, index, is_static); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); ++ ++ Label Done; ++ { ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, flags); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ ++ Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; ++ ++ assert(btos == 0, "change code, btos != 0"); ++ ++ // btos ++ __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); ++ __ bne(flags, R0, notByte); ++ __ delayed()->nop(); ++ ++ __ pop(btos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_BYTE, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ztos ++ __ bind(notByte); ++ __ move(AT, ztos); ++ __ bne(flags, AT, notBool); ++ __ delayed()->nop(); ++ ++ __ pop(ztos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ andi(FSR, FSR, 0x1); ++ __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // itos ++ __ bind(notBool); ++ __ move(AT, itos); ++ __ bne(flags, AT, notInt); ++ __ delayed()->nop(); ++ ++ __ pop(itos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_INT, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // atos ++ __ bind(notInt); ++ __ move(AT, atos); ++ __ bne(flags, AT, notObj); ++ __ delayed()->nop(); ++ ++ __ pop(atos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ ++ do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ctos ++ __ bind(notObj); ++ __ move(AT, ctos); ++ __ bne(flags, AT, notChar); ++ __ delayed()->nop(); ++ ++ __ pop(ctos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_CHAR, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // stos ++ __ bind(notChar); ++ __ move(AT, stos); ++ __ bne(flags, AT, notShort); ++ __ delayed()->nop(); ++ ++ __ pop(stos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_SHORT, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ltos ++ __ bind(notShort); ++ __ move(AT, ltos); ++ __ bne(flags, AT, notLong); ++ __ delayed()->nop(); ++ ++ __ pop(ltos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_LONG, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ftos ++ __ bind(notLong); ++ __ move(AT, ftos); ++ __ bne(flags, AT, notFloat); ++ __ delayed()->nop(); ++ ++ __ pop(ftos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_FLOAT, IN_HEAP, Address(T9), noreg, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ ++ // dtos ++ __ bind(notFloat); ++ __ move(AT, dtos); ++#ifdef ASSERT ++ __ bne(flags, AT, notDouble); ++ __ delayed()->nop(); ++#endif ++ ++ __ pop(dtos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_DOUBLE, IN_HEAP, Address(T9), noreg, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no); ++ } ++ ++#ifdef ASSERT ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++void TemplateTable::putfield(int byte_no) { ++ putfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::nofast_putfield(int byte_no) { ++ putfield_or_static(byte_no, false, may_not_rewrite); ++} ++ ++void TemplateTable::putstatic(int byte_no) { ++ putfield_or_static(byte_no, true); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : cp_entry ++// T2 : obj ++// T3 : value pointer ++void TemplateTable::jvmti_post_fast_field_mod() { ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L2; ++ //kill AT, T1, T2, T3, T9 ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T9; ++ __ li(AT, JvmtiExport::get_field_modification_count_addr()); ++ __ lw(tmp3, AT, 0); ++ __ beq(tmp3, R0, L2); ++ __ delayed()->nop(); ++ __ pop_ptr(tmp1); ++ __ verify_oop(tmp1); ++ __ push_ptr(tmp1); ++ switch (bytecode()) { // load values into the jvalue object ++ case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ push_i(FSR); break; ++ case Bytecodes::_fast_dputfield: __ push_d(FSF); break; ++ case Bytecodes::_fast_fputfield: __ push_f(); break; ++ case Bytecodes::_fast_lputfield: __ push_l(FSR); break; ++ default: ShouldNotReachHere(); ++ } ++ __ move(tmp3, SP); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1); ++ __ verify_oop(tmp1); ++ // tmp1: object pointer copied above ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ tmp1, tmp2, tmp3); ++ ++ switch (bytecode()) { // restore tos values ++ case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ pop_i(FSR); break; ++ case Bytecodes::_fast_dputfield: __ pop_d(); break; ++ case Bytecodes::_fast_fputfield: __ pop_f(); break; ++ case Bytecodes::_fast_lputfield: __ pop_l(FSR); break; ++ default: break; ++ } ++ __ bind(L2); ++ } ++} ++ ++// used registers : T2, T3, T1 ++// T2 : index & off & field address ++// T3 : cache & obj ++// T1 : flags ++void TemplateTable::fast_storefield(TosState state) { ++ transition(state, vtos); ++ ++ const Register scratch = T8; ++ ++ ByteSize base = ConstantPoolCache::base_offset(); ++ ++ jvmti_post_fast_field_mod(); ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ sync(); ++ ++ // test for volatile with T1 ++ __ dsll(AT, T2, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ ld(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset())); ++ ++ // replace index with field offset from cache entry ++ __ ld(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset())); ++ ++ Label Done; ++ { ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, T1); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ // Get object from stack ++ pop_and_check_object(T3); ++ ++ if (bytecode() != Bytecodes::_fast_aputfield) { ++ // field address ++ __ daddu(T2, T3, T2); ++ } ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_zputfield: ++ __ andi(FSR, FSR, 0x1); // boolean is true if LSB is 1 ++ __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_bputfield: ++ __ access_store_at(T_BYTE, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_sputfield: ++ __ access_store_at(T_SHORT, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_cputfield: ++ __ access_store_at(T_CHAR, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_iputfield: ++ __ access_store_at(T_INT, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_lputfield: ++ __ access_store_at(T_LONG, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_fputfield: ++ __ access_store_at(T_FLOAT, IN_HEAP, Address(T2), noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_dputfield: ++ __ access_store_at(T_DOUBLE, IN_HEAP, Address(T2), noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_aputfield: ++ do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++// used registers : T2, T3, T1 ++// T3 : cp_entry & cache ++// T2 : index & offset ++void TemplateTable::fast_accessfield(TosState state) { ++ transition(atos, state); ++ ++ const Register scratch = T8; ++ ++ // do the JVMTI work here to avoid disturbing the register state below ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we take ++ // the time to call into the VM. ++ Label L1; ++ __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); ++ __ lw(T3, AT, 0); ++ __ beq(T3, R0, L1); ++ __ delayed()->nop(); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(T3, T1, 1); ++ __ move(TSR, FSR); ++ __ verify_oop(FSR); ++ // FSR: object pointer copied above ++ // T3: cache entry pointer ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), ++ FSR, T3); ++ __ move(FSR, TSR); ++ __ bind(L1); ++ } ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ sync(); ++ ++ // replace index with field offset from cache entry ++ __ dsll(AT, T2, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ ++ { ++ __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, AT); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ // FSR: object ++ __ verify_oop(FSR); ++ __ null_check(FSR); ++ // field addresses ++ __ daddu(FSR, FSR, T2); ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_bgetfield: ++ __ access_load_at(T_BYTE, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_sgetfield: ++ __ access_load_at(T_SHORT, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_cgetfield: ++ __ access_load_at(T_CHAR, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_igetfield: ++ __ access_load_at(T_INT, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_lgetfield: ++ __ stop("should not be rewritten"); ++ break; ++ case Bytecodes::_fast_fgetfield: ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_dgetfield: ++ __ access_load_at(T_DOUBLE, IN_HEAP, noreg, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_agetfield: ++ //add for compressedoops ++ do_oop_load(_masm, Address(FSR, 0), FSR, IN_HEAP); ++ __ verify_oop(FSR); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0 ++// used registers : T1, T2, T3, T1 ++// T1 : obj & field address ++// T2 : off ++// T3 : cache ++// T1 : index ++void TemplateTable::fast_xaccess(TosState state) { ++ transition(vtos, state); ++ ++ const Register scratch = T8; ++ ++ // get receiver ++ __ ld(T1, aaddress(0)); ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 2); ++ __ dsll(AT, T2, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ ++ { ++ __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, AT); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ // make sure exception is reported in correct bcp range (getfield is ++ // next instruction) ++ __ daddiu(BCP, BCP, 1); ++ __ null_check(T1); ++ __ daddu(T1, T1, T2); ++ ++ if (state == itos) { ++ __ access_load_at(T_INT, IN_HEAP, FSR, Address(T1), noreg, noreg); ++ } else if (state == atos) { ++ do_oop_load(_masm, Address(T1, 0), FSR, IN_HEAP); ++ __ verify_oop(FSR); ++ } else if (state == ftos) { ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(T1), noreg, noreg); ++ } else { ++ ShouldNotReachHere(); ++ } ++ __ daddiu(BCP, BCP, -1); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++ ++ ++//----------------------------------------------------------------------------- ++// Calls ++ ++void TemplateTable::count_calls(Register method, Register temp) { ++ // implemented elsewhere ++ ShouldNotReachHere(); ++} ++ ++// method, index, recv, flags: T1, T2, T3, T1 ++// byte_no = 2 for _invokevirtual, 1 else ++// T0 : return address ++// get the method & index of the invoke, and push the return address of ++// the invoke(first word in the frame) ++// this address is where the return code jmp to. ++// NOTE : this method will set T3&T1 as recv&flags ++void TemplateTable::prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index, // itable index, MethodType, etc. ++ Register recv, // if caller wants to see it ++ Register flags // if caller wants to test it ++ ) { ++ // determine flags ++ const Bytecodes::Code code = bytecode(); ++ const bool is_invokeinterface = code == Bytecodes::_invokeinterface; ++ const bool is_invokedynamic = code == Bytecodes::_invokedynamic; ++ const bool is_invokehandle = code == Bytecodes::_invokehandle; ++ const bool is_invokevirtual = code == Bytecodes::_invokevirtual; ++ const bool is_invokespecial = code == Bytecodes::_invokespecial; ++ const bool load_receiver = (recv != noreg); ++ const bool save_flags = (flags != noreg); ++ assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),""); ++ assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); ++ assert(flags == noreg || flags == T1, "error flags reg."); ++ assert(recv == noreg || recv == T3, "error recv reg."); ++ ++ // setup registers & access constant pool cache ++ if(recv == noreg) recv = T3; ++ if(flags == noreg) flags = T1; ++ assert_different_registers(method, index, recv, flags); ++ ++ // save 'interpreter return address' ++ __ save_bcp(); ++ ++ load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); ++ ++ if (is_invokedynamic || is_invokehandle) { ++ Label L_no_push; ++ __ move(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift)); ++ __ andr(AT, AT, flags); ++ __ beq(AT, R0, L_no_push); ++ __ delayed()->nop(); ++ // Push the appendix as a trailing parameter. ++ // This must be done before we get the receiver, ++ // since the parameter_size includes it. ++ Register tmp = SSR; ++ __ push(tmp); ++ __ move(tmp, index); ++ assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); ++ __ load_resolved_reference_at_index(index, tmp, recv); ++ __ pop(tmp); ++ __ push(index); // push appendix (MethodType, CallSite, etc.) ++ __ bind(L_no_push); ++ } ++ ++ // load receiver if needed (after appendix is pushed so parameter size is correct) ++ // Note: no return address pushed yet ++ if (load_receiver) { ++ __ move(AT, ConstantPoolCacheEntry::parameter_size_mask); ++ __ andr(recv, flags, AT); ++ // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0. ++ const int no_return_pc_pushed_yet = 0; // argument slot correction before we push return address ++ const int receiver_is_at_end = -1; // back off one slot to get receiver ++ Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end); ++ __ ld(recv, recv_addr); ++ __ verify_oop(recv); ++ } ++ if(save_flags) { ++ __ move(BCP, flags); ++ } ++ ++ // compute return type ++ __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, 0xf); ++ ++ // Make sure we don't need to mask flags for tos_state_shift after the above shift ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ // load return address ++ { ++ const address table = (address) Interpreter::invoke_return_entry_table_for(code); ++ __ li(AT, (long)table); ++ __ dsll(flags, flags, LogBytesPerWord); ++ __ daddu(AT, AT, flags); ++ __ ld(RA, AT, 0); ++ } ++ ++ if (save_flags) { ++ __ move(flags, BCP); ++ __ restore_bcp(); ++ } ++} ++ ++// used registers : T0, T3, T1, T2 ++// T3 : recv, this two register using convention is by prepare_invoke ++// T1 : flags, klass ++// Rmethod : method, index must be Rmethod ++void TemplateTable::invokevirtual_helper(Register index, ++ Register recv, ++ Register flags) { ++ ++ assert_different_registers(index, recv, flags, T2); ++ ++ // Test for an invoke of a final method ++ Label notFinal; ++ __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); ++ __ andr(AT, flags, AT); ++ __ beq(AT, R0, notFinal); ++ __ delayed()->nop(); ++ ++ Register method = index; // method must be Rmethod ++ assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention"); ++ ++ // do the call - the index is actually the method to call ++ // the index is indeed methodOop, for this is vfinal, ++ // see ConstantPoolCacheEntry::set_method for more info ++ ++ ++ // It's final, need a null check here! ++ __ null_check(recv); ++ ++ // profile this call ++ __ profile_final_call(T2); ++ ++ // T2: tmp, used for mdp ++ // method: callee ++ // T9: tmp ++ // is_virtual: true ++ __ profile_arguments_type(T2, method, T9, true); ++ ++ __ jump_from_interpreted(method, T2); ++ ++ __ bind(notFinal); ++ ++ // get receiver klass ++ __ null_check(recv, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(T2, recv); ++ ++ // profile this call ++ __ profile_virtual_call(T2, T0, T1); ++ ++ // get target methodOop & entry point ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); ++ __ dsll(AT, index, Address::times_ptr); ++ // T2: receiver ++ __ daddu(AT, T2, AT); ++ //this is a ualign read ++ __ ld(method, AT, base + vtableEntry::method_offset_in_bytes()); ++ __ profile_arguments_type(T2, method, T9, true); ++ __ jump_from_interpreted(method, T2); ++ ++} ++ ++void TemplateTable::invokevirtual(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG, T3, T1); ++ // now recv & flags in T3, T1 ++ invokevirtual_helper(Rmethod, T3, T1); ++} ++ ++// T9 : entry ++// Rmethod : method ++void TemplateTable::invokespecial(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG, T3); ++ // now recv & flags in T3, T1 ++ __ verify_oop(T3); ++ __ null_check(T3); ++ __ profile_call(T9); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T9: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T9, false); ++ ++ __ jump_from_interpreted(Rmethod, T9); ++ __ move(T0, T3); ++} ++ ++void TemplateTable::invokestatic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG); ++ ++ __ profile_call(T9); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T9: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T9, false); ++ ++ __ jump_from_interpreted(Rmethod, T9); ++} ++ ++// i have no idea what to do here, now. for future change. FIXME. ++void TemplateTable::fast_invokevfinal(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); ++ __ stop("fast_invokevfinal not used on mips64"); ++} ++ ++// used registers : T0, T1, T2, T3, T1, A7 ++// T0 : itable, vtable, entry ++// T1 : interface ++// T3 : receiver ++// T1 : flags, klass ++// Rmethod : index, method, this is required by interpreter_entry ++void TemplateTable::invokeinterface(int byte_no) { ++ transition(vtos, vtos); ++ //this method will use T1-T4 and T0 ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, T2, Rmethod, T3, T1); ++ // T2: reference klass (from f1) if interface method ++ // Rmethod: method (from f2) ++ // T3: receiver ++ // T1: flags ++ ++ // First check for Object case, then private interface method, ++ // then regular interface method. ++ ++ // Special case of invokeinterface called for virtual method of ++ // java.lang.Object. See cpCache.cpp for details. ++ Label notObjectMethod; ++ __ move(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift)); ++ __ andr(AT, T1, AT); ++ __ beq(AT, R0, notObjectMethod); ++ __ delayed()->nop(); ++ ++ invokevirtual_helper(Rmethod, T3, T1); ++ // no return from above ++ __ bind(notObjectMethod); ++ ++ Label no_such_interface; // for receiver subtype check ++ Register recvKlass; // used for exception processing ++ ++ // Check for private method invocation - indicated by vfinal ++ Label notVFinal; ++ __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); ++ __ andr(AT, T1, AT); ++ __ beq(AT, R0, notVFinal); ++ __ delayed()->nop(); ++ ++ // Get receiver klass into FSR - also a null check ++ __ null_check(T3, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(FSR, T3); ++ ++ Label subtype; ++ __ check_klass_subtype(FSR, T2, T0, subtype); ++ // If we get here the typecheck failed ++ recvKlass = T1; ++ __ move(recvKlass, FSR); ++ __ b(no_such_interface); ++ __ delayed()->nop(); ++ ++ __ bind(subtype); ++ ++ // do the call - rbx is actually the method to call ++ ++ __ profile_final_call(T1); ++ __ profile_arguments_type(T1, Rmethod, T0, true); ++ ++ __ jump_from_interpreted(Rmethod, T1); ++ // no return from above ++ __ bind(notVFinal); ++ ++ // Get receiver klass into T1 - also a null check ++ __ restore_locals(); ++ __ null_check(T3, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(T1, T3); ++ ++ Label no_such_method; ++ ++ // Preserve method for throw_AbstractMethodErrorVerbose. ++ __ move(T3, Rmethod); ++ // Receiver subtype check against REFC. ++ // Superklass in T2. Subklass in T1. ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ T1, T2, noreg, ++ // outputs: scan temp. reg, scan temp. reg ++ T0, FSR, ++ no_such_interface, ++ /*return_method=*/false); ++ ++ ++ // profile this call ++ __ restore_bcp(); ++ __ profile_virtual_call(T1, T0, FSR); ++ ++ // Get declaring interface class from method, and itable index ++ __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset())); ++ __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset())); ++ __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes()); ++ __ lw(Rmethod, Rmethod, in_bytes(Method::itable_index_offset())); ++ __ addiu(Rmethod, Rmethod, (-1) * Method::itable_index_max); ++ __ subu32(Rmethod, R0, Rmethod); ++ ++ // Preserve recvKlass for throw_AbstractMethodErrorVerbose. ++ __ move(FSR, T1); ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ FSR, T2, Rmethod, ++ // outputs: method, scan temp. reg ++ Rmethod, T0, ++ no_such_interface); ++ ++ // Rmethod: Method* to call ++ // T3: receiver ++ // Check for abstract method error ++ // Note: This should be done more efficiently via a throw_abstract_method_error ++ // interpreter entry point and a conditional jump to it in case of a null ++ // method. ++ __ beq(Rmethod, R0, no_such_method); ++ __ delayed()->nop(); ++ ++ __ profile_called_method(Rmethod, T0, T1); ++ __ profile_arguments_type(T1, Rmethod, T0, true); ++ ++ // do the call ++ // T3: receiver ++ // Rmethod: Method* ++ __ jump_from_interpreted(Rmethod, T1); ++ __ should_not_reach_here(); ++ ++ // exception handling code follows... ++ // note: must restore interpreter registers to canonical ++ // state for exception handling to work correctly! ++ ++ __ bind(no_such_method); ++ // throw exception ++ __ pop(Rmethod); // pop return address (pushed by prepare_invoke) ++ __ restore_bcp(); ++ __ restore_locals(); ++ // Pass arguments for generating a verbose error message. ++ recvKlass = A1; ++ Register method = A2; ++ if (recvKlass != T1) { __ move(recvKlass, T1); } ++ if (method != T3) { __ move(method, T3); } ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), recvKlass, method); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ __ bind(no_such_interface); ++ // throw exception ++ __ pop(Rmethod); // pop return address (pushed by prepare_invoke) ++ __ restore_bcp(); ++ __ restore_locals(); ++ // Pass arguments for generating a verbose error message. ++ if (recvKlass != T1) { __ move(recvKlass, T1); } ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), recvKlass, T2); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++} ++ ++ ++void TemplateTable::invokehandle(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ const Register T2_method = Rmethod; ++ const Register FSR_mtype = FSR; ++ const Register T3_recv = T3; ++ ++ prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv); ++ //??__ verify_method_ptr(T2_method); ++ __ verify_oop(T3_recv); ++ __ null_check(T3_recv); ++ ++ // T9: MethodType object (from cpool->resolved_references[f1], if necessary) ++ // T2_method: MH.invokeExact_MT method (from f2) ++ ++ // Note: T9 is already pushed (if necessary) by prepare_invoke ++ ++ // FIXME: profile the LambdaForm also ++ __ profile_final_call(T9); ++ ++ // T8: tmp, used for mdp ++ // T2_method: callee ++ // T9: tmp ++ // is_virtual: true ++ __ profile_arguments_type(T8, T2_method, T9, true); ++ ++ __ jump_from_interpreted(T2_method, T9); ++} ++ ++ void TemplateTable::invokedynamic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ ++ //const Register Rmethod = T2; ++ const Register T2_callsite = T2; ++ ++ prepare_invoke(byte_no, Rmethod, T2_callsite); ++ ++ // T2: CallSite object (from cpool->resolved_references[f1]) ++ // Rmethod: MH.linkToCallSite method (from f2) ++ ++ // Note: T2_callsite is already pushed by prepare_invoke ++ // %%% should make a type profile for any invokedynamic that takes a ref argument ++ // profile this call ++ __ profile_call(T9); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T9: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T9, false); ++ ++ __ verify_oop(T2_callsite); ++ ++ __ jump_from_interpreted(Rmethod, T9); ++ } ++ ++//----------------------------------------------------------------------------- ++// Allocation ++// T1 : tags & buffer end & thread ++// T2 : object end ++// T3 : klass ++// T1 : object size ++// A1 : cpool ++// A2 : cp index ++// return object in FSR ++void TemplateTable::_new() { ++ transition(vtos, atos); ++ __ get_unsigned_2_byte_index_at_bcp(A2, 1); ++ ++ Label slow_case; ++ Label done; ++ Label initialize_header; ++ Label initialize_object; // including clearing the fields ++ Label allocate_shared; ++ ++ __ get_cpool_and_tags(A1, T1); ++ ++ // make sure the class we're about to instantiate has been resolved. ++ // Note: slow_case does a pop of stack, which is why we loaded class/pushed above ++ const int tags_offset = Array::base_offset_in_bytes(); ++ if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) { ++ __ gslbx(AT, T1, A2, tags_offset); ++ } else { ++ __ daddu(T1, T1, A2); ++ __ lb(AT, T1, tags_offset); ++ } ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ // get InstanceKlass ++ __ load_resolved_klass_at_index(A1, A2, T3); ++ ++ // make sure klass is initialized & doesn't have finalizer ++ // make sure klass is fully initialized ++ __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset())); ++ __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ // has_finalizer ++ __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) ); ++ __ andi(AT, T0, Klass::_lh_instance_slow_path_bit); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ // Allocate the instance ++ // 1) Try to allocate in the TLAB ++ // 2) if fail and the object is large allocate in the shared Eden ++ // 3) if the above fails (or is not applicable), go to a slow case ++ // (creates a new TLAB, etc.) ++ ++ const bool allow_shared_alloc = ++ Universe::heap()->supports_inline_contig_alloc(); ++ ++#ifndef OPT_THREAD ++ const Register thread = T8; ++ if (UseTLAB || allow_shared_alloc) { ++ __ get_thread(thread); ++ } ++#else ++ const Register thread = TREG; ++#endif ++ ++ if (UseTLAB) { ++ // get tlab_top ++ __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset())); ++ // get tlab_end ++ __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset())); ++ __ daddu(T2, FSR, T0); ++ __ slt(AT, AT, T2); ++ __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case); ++ __ delayed()->nop(); ++ __ sd(T2, thread, in_bytes(JavaThread::tlab_top_offset())); ++ ++ if (ZeroTLAB) { ++ // the fields have been already cleared ++ __ beq(R0, R0, initialize_header); ++ } else { ++ // initialize both the header and fields ++ __ beq(R0, R0, initialize_object); ++ } ++ __ delayed()->nop(); ++ } ++ ++ // Allocation in the shared Eden , if allowed ++ // T0 : instance size in words ++ if(allow_shared_alloc){ ++ __ bind(allocate_shared); ++ ++ Label done, retry; ++ Address heap_top(T1); ++ __ set64(T1, (long)Universe::heap()->top_addr()); ++ __ ld(FSR, heap_top); ++ ++ __ bind(retry); ++ __ set64(AT, (long)Universe::heap()->end_addr()); ++ __ ld(AT, AT, 0); ++ __ daddu(T2, FSR, T0); ++ __ slt(AT, AT, T2); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ // Compare FSR with the top addr, and if still equal, store the new ++ // top addr in T2 at the address of the top addr pointer. Sets AT if was ++ // equal, and clears it otherwise. Use lock prefix for atomicity on MPs. ++ // ++ // FSR: object begin ++ // T2: object end ++ // T0: instance size in words ++ ++ // if someone beat us on the allocation, try again, otherwise continue ++ __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry); ++ ++ __ bind(done); ++ ++ __ incr_allocated_bytes(thread, T0, 0); ++ } ++ ++ if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) { ++ // The object is initialized before the header. If the object size is ++ // zero, go directly to the header initialization. ++ __ bind(initialize_object); ++ __ set64(AT, - sizeof(oopDesc)); ++ __ daddu(T0, T0, AT); ++ __ beq(T0, R0, initialize_header); ++ __ delayed()->nop(); ++ ++ // initialize remaining object fields: T0 is a multiple of 2 ++ { ++ Label loop; ++ __ daddu(T1, FSR, T0); ++ __ daddiu(T1, T1, -oopSize); ++ ++ __ bind(loop); ++ __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize); ++ __ bne(T1, FSR, loop); //dont clear header ++ __ delayed()->daddiu(T1, T1, -oopSize); ++ } ++ ++ //klass in T3, ++ // initialize object header only. ++ __ bind(initialize_header); ++ if (UseBiasedLocking) { ++ __ ld(AT, T3, in_bytes(Klass::prototype_header_offset())); ++ __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ()); ++ } else { ++ __ set64(AT, (long)markOopDesc::prototype()); ++ __ sd(AT, FSR, oopDesc::mark_offset_in_bytes()); ++ } ++ ++ __ store_klass_gap(FSR, R0); ++ __ store_klass(FSR, T3); ++ ++ { ++ SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0); ++ // Trigger dtrace event for fastpath ++ __ push(atos); ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR); ++ __ pop(atos); ++ ++ } ++ __ b(done); ++ __ delayed()->nop(); ++ } ++ ++ // slow case ++ __ bind(slow_case); ++ __ get_constant_pool(A1); ++ __ get_unsigned_2_byte_index_at_bcp(A2, 1); ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2); ++ ++ // continue ++ __ bind(done); ++ __ sync(); ++} ++ ++void TemplateTable::newarray() { ++ transition(itos, atos); ++ __ lbu(A1, at_bcp(1)); ++ //type, count ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR); ++ __ sync(); ++} ++ ++void TemplateTable::anewarray() { ++ transition(itos, atos); ++ __ get_2_byte_integer_at_bcp(A2, AT, 1); ++ __ huswap(A2); ++ __ get_constant_pool(A1); ++ // cp, index, count ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR); ++ __ sync(); ++} ++ ++void TemplateTable::arraylength() { ++ transition(atos, itos); ++ __ null_check(FSR, arrayOopDesc::length_offset_in_bytes()); ++ __ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes()); ++} ++ ++// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always) ++// T2 : sub klass ++// T3 : cpool ++// T3 : super klass ++void TemplateTable::checkcast() { ++ transition(atos, atos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ __ beq(FSR, R0, is_null); ++ __ delayed()->nop(); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(T3, T1); ++ __ get_2_byte_integer_at_bcp(T2, AT, 1); ++ __ huswap(T2); ++ ++ // See if bytecode has already been quicked ++ __ daddu(AT, T1, T2); ++ __ lb(AT, AT, Array::base_offset_in_bytes()); ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ beq(AT, R0, quicked); ++ __ delayed()->nop(); ++ ++ // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded. ++ // Then, GC will move the object in V0 to another places in heap. ++ // Therefore, We should never save such an object in register. ++ // Instead, we should save it in the stack. It can be modified automatically by the GC thread. ++ // After GC, the object address in FSR is changed to a new place. ++ // ++ __ push(atos); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ __ get_vm_result_2(T3, thread); ++ __ pop_ptr(FSR); ++ __ b(resolved); ++ __ delayed()->nop(); ++ ++ // klass already in cp, get superklass in T3 ++ __ bind(quicked); ++ __ load_resolved_klass_at_index(T3, T2, T3); ++ ++ __ bind(resolved); ++ ++ // get subklass in T2 ++ //add for compressedoops ++ __ load_klass(T2, FSR); ++ // Superklass in T3. Subklass in T2. ++ __ gen_subtype_check(T3, T2, ok_is_subtype); ++ ++ // Come here on failure ++ // object is at FSR ++ __ jmp(Interpreter::_throw_ClassCastException_entry); ++ __ delayed()->nop(); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ ++ // Collect counts on whether this check-cast sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ b(done); ++ __ delayed()->nop(); ++ __ bind(is_null); ++ __ profile_null_seen(T3); ++ } else { ++ __ bind(is_null); ++ } ++ __ bind(done); ++} ++ ++// i use T3 as cpool, T1 as tags, T2 as index ++// object always in FSR, superklass in T3, subklass in T2 ++void TemplateTable::instanceof() { ++ transition(atos, itos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ ++ __ beq(FSR, R0, is_null); ++ __ delayed()->nop(); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(T3, T1); ++ // get index ++ __ get_2_byte_integer_at_bcp(T2, AT, 1); ++ __ huswap(T2); ++ ++ // See if bytecode has already been quicked ++ // quicked ++ __ daddu(AT, T1, T2); ++ __ lb(AT, AT, Array::base_offset_in_bytes()); ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ beq(AT, R0, quicked); ++ __ delayed()->nop(); ++ ++ __ push(atos); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ __ get_vm_result_2(T3, thread); ++ __ pop_ptr(FSR); ++ __ b(resolved); ++ __ delayed()->nop(); ++ ++ // get superklass in T3, subklass in T2 ++ __ bind(quicked); ++ __ load_resolved_klass_at_index(T3, T2, T3); ++ ++ __ bind(resolved); ++ // get subklass in T2 ++ //add for compressedoops ++ __ load_klass(T2, FSR); ++ ++ // Superklass in T3. Subklass in T2. ++ __ gen_subtype_check(T3, T2, ok_is_subtype); ++ // Come here on failure ++ __ b(done); ++ __ delayed(); __ move(FSR, R0); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ __ move(FSR, 1); ++ ++ // Collect counts on whether this test sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ beq(R0, R0, done); ++ __ delayed()->nop(); ++ __ bind(is_null); ++ __ profile_null_seen(T3); ++ } else { ++ __ bind(is_null); // same as 'done' ++ } ++ __ bind(done); ++ // FSR = 0: obj == NULL or obj is not an instanceof the specified klass ++ // FSR = 1: obj != NULL and obj is an instanceof the specified klass ++} ++ ++//-------------------------------------------------------- ++//-------------------------------------------- ++// Breakpoints ++void TemplateTable::_breakpoint() { ++ // Note: We get here even if we are single stepping.. ++ // jbug inists on setting breakpoints at every bytecode ++ // even if we are in single step mode. ++ ++ transition(vtos, vtos); ++ ++ // get the unpatched byte code ++ __ get_method(A1); ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::get_original_bytecode_at), ++ A1, BCP); ++ __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal ++ ++ // post the breakpoint event ++ __ get_method(A1); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP); ++ ++ // complete the execution of original bytecode ++ __ dispatch_only_normal(vtos); ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateTable::athrow() { ++ transition(atos, vtos); ++ __ null_check(FSR); ++ __ jmp(Interpreter::throw_exception_entry()); ++ __ delayed()->nop(); ++} ++ ++//----------------------------------------------------------------------------- ++// Synchronization ++// ++// Note: monitorenter & exit are symmetric routines; which is reflected ++// in the assembly code structure as well ++// ++// Stack layout: ++// ++// [expressions ] <--- SP = expression stack top ++// .. ++// [expressions ] ++// [monitor entry] <--- monitor block top = expression stack bot ++// .. ++// [monitor entry] ++// [frame data ] <--- monitor block bot ++// ... ++// [return addr ] <--- FP ++ ++// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer ++// object always in FSR ++void TemplateTable::monitorenter() { ++ transition(atos, vtos); ++ ++ // check for NULL object ++ __ null_check(FSR); ++ ++ const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset ++ * wordSize); ++ const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize); ++ Label allocated; ++ ++ // initialize entry pointer ++ __ move(c_rarg0, R0); ++ ++ // find a free slot in the monitor block (result in c_rarg0) ++ { ++ Label entry, loop, exit; ++ __ ld(T2, monitor_block_top); ++ __ b(entry); ++ __ delayed()->daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ ++ // free slot? ++ __ bind(loop); ++ __ ld(AT, T2, BasicObjectLock::obj_offset_in_bytes()); ++ __ movz(c_rarg0, T2, AT); ++ ++ __ beq(FSR, AT, exit); ++ __ delayed()->nop(); ++ __ daddiu(T2, T2, entry_size); ++ ++ __ bind(entry); ++ __ bne(T3, T2, loop); ++ __ delayed()->nop(); ++ __ bind(exit); ++ } ++ ++ __ bne(c_rarg0, R0, allocated); ++ __ delayed()->nop(); ++ ++ // allocate one if there's no free slot ++ { ++ Label entry, loop; ++ // 1. compute new pointers // SP: old expression stack top ++ __ ld(c_rarg0, monitor_block_top); ++ __ daddiu(SP, SP, - entry_size); ++ __ daddiu(c_rarg0, c_rarg0, - entry_size); ++ __ sd(c_rarg0, monitor_block_top); ++ __ b(entry); ++ __ delayed(); __ move(T3, SP); ++ ++ // 2. move expression stack contents ++ __ bind(loop); ++ __ ld(AT, T3, entry_size); ++ __ sd(AT, T3, 0); ++ __ daddiu(T3, T3, wordSize); ++ __ bind(entry); ++ __ bne(T3, c_rarg0, loop); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(allocated); ++ // Increment bcp to point to the next bytecode, ++ // so exception handling for async. exceptions work correctly. ++ // The object has already been poped from the stack, so the ++ // expression stack looks correct. ++ __ daddiu(BCP, BCP, 1); ++ __ sd(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ lock_object(c_rarg0); ++ // check to make sure this monitor doesn't cause stack overflow after locking ++ __ save_bcp(); // in case of exception ++ __ generate_stack_overflow_check(0); ++ // The bcp has already been incremented. Just need to dispatch to next instruction. ++ ++ __ dispatch_next(vtos); ++} ++ ++// T2 : top ++// c_rarg0 : entry ++void TemplateTable::monitorexit() { ++ transition(atos, vtos); ++ ++ __ null_check(FSR); ++ ++ const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize); ++ Label found; ++ ++ // find matching slot ++ { ++ Label entry, loop; ++ __ ld(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ b(entry); ++ __ delayed()->daddiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ ++ __ bind(loop); ++ __ ld(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ beq(FSR, AT, found); ++ __ delayed()->nop(); ++ __ daddiu(c_rarg0, c_rarg0, entry_size); ++ __ bind(entry); ++ __ bne(T2, c_rarg0, loop); ++ __ delayed()->nop(); ++ } ++ ++ // error handling. Unlocking was not block-structured ++ Label end; ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ // call run-time routine ++ // c_rarg0: points to monitor entry ++ __ bind(found); ++ __ move(TSR, FSR); ++ __ unlock_object(c_rarg0); ++ __ move(FSR, TSR); ++ __ bind(end); ++} ++ ++ ++// Wide instructions ++void TemplateTable::wide() { ++ transition(vtos, vtos); ++ __ lbu(Rnext, at_bcp(1)); ++ __ dsll(T9, Rnext, Address::times_8); ++ __ li(AT, (long)Interpreter::_wentry_point); ++ __ daddu(AT, T9, AT); ++ __ ld(T9, AT, 0); ++ __ jr(T9); ++ __ delayed()->nop(); ++} ++ ++ ++void TemplateTable::multianewarray() { ++ transition(vtos, atos); ++ // last dim is on top of stack; we want address of first one: ++ // first_addr = last_addr + (ndims - 1) * wordSize ++ __ lbu(A1, at_bcp(3)); // dimension ++ __ daddiu(A1, A1, -1); ++ __ dsll(A1, A1, Address::times_8); ++ __ daddu(A1, SP, A1); // now A1 pointer to the count array on the stack ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1); ++ __ lbu(AT, at_bcp(3)); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(SP, SP, AT); ++ __ sync(); ++} ++#endif // !CC_INTERP +diff --git a/src/hotspot/cpu/mips/vmStructs_mips.hpp b/src/hotspot/cpu/mips/vmStructs_mips.hpp +new file mode 100644 +index 0000000000..6939914356 +--- /dev/null ++++ b/src/hotspot/cpu/mips/vmStructs_mips.hpp +@@ -0,0 +1,68 @@ ++/* ++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP ++#define CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP ++ ++// These are the CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ \ ++ /******************************/ \ ++ /* JavaCallWrapper */ \ ++ /******************************/ \ ++ /******************************/ \ ++ /* JavaFrameAnchor */ \ ++ /******************************/ \ ++ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) \ ++ \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_STRUCTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++ ++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_TYPES_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++ ++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++#endif // CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/vm_version_ext_mips.cpp b/src/hotspot/cpu/mips/vm_version_ext_mips.cpp +new file mode 100644 +index 0000000000..ac2a43edce +--- /dev/null ++++ b/src/hotspot/cpu/mips/vm_version_ext_mips.cpp +@@ -0,0 +1,90 @@ ++/* ++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "memory/allocation.inline.hpp" ++#include "runtime/os.inline.hpp" ++#include "vm_version_ext_mips.hpp" ++ ++// VM_Version_Ext statics ++int VM_Version_Ext::_no_of_threads = 0; ++int VM_Version_Ext::_no_of_cores = 0; ++int VM_Version_Ext::_no_of_sockets = 0; ++bool VM_Version_Ext::_initialized = false; ++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; ++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; ++ ++void VM_Version_Ext::initialize_cpu_information(void) { ++ // do nothing if cpu info has been initialized ++ if (_initialized) { ++ return; ++ } ++ ++ _no_of_cores = os::processor_count(); ++ _no_of_threads = _no_of_cores; ++ _no_of_sockets = _no_of_cores; ++ if (is_loongson()) { ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "Loongson MIPS"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "Loongson MIPS %s", cpu_features()); ++ } else { ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "MIPS"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "MIPS %s", cpu_features()); ++ } ++ _initialized = true; ++} ++ ++int VM_Version_Ext::number_of_threads(void) { ++ initialize_cpu_information(); ++ return _no_of_threads; ++} ++ ++int VM_Version_Ext::number_of_cores(void) { ++ initialize_cpu_information(); ++ return _no_of_cores; ++} ++ ++int VM_Version_Ext::number_of_sockets(void) { ++ initialize_cpu_information(); ++ return _no_of_sockets; ++} ++ ++const char* VM_Version_Ext::cpu_name(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); ++ return tmp; ++} ++ ++const char* VM_Version_Ext::cpu_description(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); ++ return tmp; ++} +diff --git a/src/hotspot/cpu/mips/vm_version_ext_mips.hpp b/src/hotspot/cpu/mips/vm_version_ext_mips.hpp +new file mode 100644 +index 0000000000..ffdcff0677 +--- /dev/null ++++ b/src/hotspot/cpu/mips/vm_version_ext_mips.hpp +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP ++#define CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP ++ ++#include "runtime/vm_version.hpp" ++#include "utilities/macros.hpp" ++ ++class VM_Version_Ext : public VM_Version { ++ private: ++ static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; ++ static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; ++ ++ static int _no_of_threads; ++ static int _no_of_cores; ++ static int _no_of_sockets; ++ static bool _initialized; ++ static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; ++ static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; ++ ++ public: ++ static int number_of_threads(void); ++ static int number_of_cores(void); ++ static int number_of_sockets(void); ++ ++ static const char* cpu_name(void); ++ static const char* cpu_description(void); ++ static void initialize_cpu_information(void); ++}; ++ ++#endif // CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/vm_version_mips.cpp b/src/hotspot/cpu/mips/vm_version_mips.cpp +new file mode 100644 +index 0000000000..2e7b61390e +--- /dev/null ++++ b/src/hotspot/cpu/mips/vm_version_mips.cpp +@@ -0,0 +1,516 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/java.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/vm_version.hpp" ++#ifdef TARGET_OS_FAMILY_linux ++# include "os_linux.inline.hpp" ++#endif ++ ++int VM_Version::_cpuFeatures; ++const char* VM_Version::_features_str = ""; ++VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; ++volatile bool VM_Version::_is_determine_cpucfg_supported_running = false; ++bool VM_Version::_is_cpucfg_instruction_supported = true; ++bool VM_Version::_cpu_info_is_initialized = false; ++ ++static BufferBlob* stub_blob; ++static const int stub_size = 600; ++ ++extern "C" { ++ typedef void (*get_cpu_info_stub_t)(void*); ++} ++static get_cpu_info_stub_t get_cpu_info_stub = NULL; ++ ++ ++class VM_Version_StubGenerator: public StubCodeGenerator { ++ public: ++ ++ VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} ++ ++ address generate_get_cpu_info() { ++ assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized"); ++ StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); ++# define __ _masm-> ++ ++ address start = __ pc(); ++ ++ __ enter(); ++ __ push(AT); ++ __ push(V0); ++ ++ __ li(AT, (long)0); ++ __ cpucfg(V0, AT); ++ __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); ++ __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); ++ ++ __ li(AT, 1); ++ __ cpucfg(V0, AT); ++ __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); ++ __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); ++ ++ __ li(AT, 2); ++ __ cpucfg(V0, AT); ++ __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); ++ __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); ++ ++ __ pop(V0); ++ __ pop(AT); ++ __ leave(); ++ __ jr(RA); ++ __ delayed()->nop(); ++# undef __ ++ ++ return start; ++ }; ++}; ++ ++uint32_t VM_Version::get_feature_flags_by_cpucfg() { ++ uint32_t result = 0; ++ if (_cpuid_info.cpucfg_info_id1.bits.MMI != 0) ++ result |= CPU_MMI; ++ if (_cpuid_info.cpucfg_info_id1.bits.MSA1 != 0) ++ result |= CPU_MSA1_0; ++ if (_cpuid_info.cpucfg_info_id1.bits.MSA2 != 0) ++ result |= CPU_MSA2_0; ++ if (_cpuid_info.cpucfg_info_id1.bits.CGP != 0) ++ result |= CPU_CGP; ++ if (_cpuid_info.cpucfg_info_id1.bits.LSX1 != 0) ++ result |= CPU_LSX1; ++ if (_cpuid_info.cpucfg_info_id1.bits.LSX2 != 0) ++ result |= CPU_LSX2; ++ if (_cpuid_info.cpucfg_info_id1.bits.LASX != 0) ++ result |= CPU_LASX; ++ if (_cpuid_info.cpucfg_info_id1.bits.LLSYNC != 0) ++ result |= CPU_LLSYNC; ++ if (_cpuid_info.cpucfg_info_id1.bits.TGTSYNC != 0) ++ result |= CPU_TGTSYNC; ++ if (_cpuid_info.cpucfg_info_id1.bits.MUALP != 0) ++ result |= CPU_MUALP; ++ if (_cpuid_info.cpucfg_info_id2.bits.LEXT1 != 0) ++ result |= CPU_LEXT1; ++ if (_cpuid_info.cpucfg_info_id2.bits.LEXT2 != 0) ++ result |= CPU_LEXT2; ++ if (_cpuid_info.cpucfg_info_id2.bits.LEXT3 != 0) ++ result |= CPU_LEXT3; ++ if (_cpuid_info.cpucfg_info_id2.bits.LAMO != 0) ++ result |= CPU_LAMO; ++ if (_cpuid_info.cpucfg_info_id2.bits.LPIXU != 0) ++ result |= CPU_LPIXU; ++ ++ result |= CPU_ULSYNC; ++ ++ return result; ++} ++ ++void read_cpu_info(const char *path, char *result) { ++ FILE *ptr; ++ char buf[1024]; ++ int i = 0; ++ if((ptr=fopen(path, "r")) != NULL) { ++ while(fgets(buf, 1024, ptr)!=NULL) { ++ strcat(result,buf); ++ i++; ++ if (i == 10) break; ++ } ++ fclose(ptr); ++ } else { ++ warning("Can't detect CPU info - cannot open %s", path); ++ } ++} ++ ++void strlwr(char *str) { ++ for (; *str!='\0'; str++) ++ *str = tolower(*str); ++} ++ ++int VM_Version::get_feature_flags_by_cpuinfo(int features) { ++ assert(!cpu_info_is_initialized(), "VM_Version should not be initialized"); ++ ++ char res[10240]; ++ int i; ++ memset(res, '\0', 10240 * sizeof(char)); ++ read_cpu_info("/proc/cpuinfo", res); ++ // res is converted to lower case ++ strlwr(res); ++ ++ if (strstr(res, "loongson")) { ++ // Loongson CPU ++ features |= CPU_LOONGSON; ++ ++ const struct Loongson_Cpuinfo loongson_cpuinfo[] = { ++ {L_3A1000, "3a1000"}, ++ {L_3B1500, "3b1500"}, ++ {L_3A2000, "3a2000"}, ++ {L_3B2000, "3b2000"}, ++ {L_3A3000, "3a3000"}, ++ {L_3B3000, "3b3000"}, ++ {L_2K1000, "2k1000"}, ++ {L_UNKNOWN, "unknown"} ++ }; ++ ++ // Loongson Family ++ int detected = 0; ++ for (i = 0; i <= L_UNKNOWN; i++) { ++ switch (i) { ++ // 3A1000 and 3B1500 may use an old kernel and further comparsion is needed ++ // test PRID REV in /proc/cpuinfo ++ // 3A1000: V0.5, model name: ICT Loongson-3A V0.5 FPU V0.1 ++ // 3B1500: V0.7, model name: ICT Loongson-3B V0.7 FPU V0.1 ++ case L_3A1000: ++ if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3a v0.5")) { ++ features |= CPU_LOONGSON_GS464; ++ detected++; ++ //tty->print_cr("3A1000 platform"); ++ } ++ break; ++ case L_3B1500: ++ if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3b v0.7")) { ++ features |= CPU_LOONGSON_GS464; ++ detected++; ++ //tty->print_cr("3B1500 platform"); ++ } ++ break; ++ case L_3A2000: ++ case L_3B2000: ++ case L_3A3000: ++ case L_3B3000: ++ if (strstr(res, loongson_cpuinfo[i].match_str)) { ++ features |= CPU_LOONGSON_GS464E; ++ detected++; ++ //tty->print_cr("3A2000/3A3000/3B2000/3B3000 platform"); ++ } ++ break; ++ case L_2K1000: ++ if (strstr(res, loongson_cpuinfo[i].match_str)) { ++ features |= CPU_LOONGSON_GS264; ++ detected++; ++ //tty->print_cr("2K1000 platform"); ++ } ++ break; ++ case L_UNKNOWN: ++ if (detected == 0) { ++ detected++; ++ //tty->print_cr("unknown Loongson platform"); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ assert (detected == 1, "one and only one of LOONGSON_CPU_FAMILY should be detected"); ++ } else { // not Loongson ++ // Not Loongson CPU ++ //tty->print_cr("MIPS platform"); ++ } ++ ++ if (features & CPU_LOONGSON_GS264) { ++ features |= CPU_LEXT1; ++ features |= CPU_LEXT2; ++ features |= CPU_TGTSYNC; ++ features |= CPU_ULSYNC; ++ features |= CPU_MSA1_0; ++ features |= CPU_LSX1; ++ } else if (features & CPU_LOONGSON_GS464) { ++ features |= CPU_LEXT1; ++ features |= CPU_LLSYNC; ++ features |= CPU_TGTSYNC; ++ } else if (features & CPU_LOONGSON_GS464E) { ++ features |= CPU_LEXT1; ++ features |= CPU_LEXT2; ++ features |= CPU_LEXT3; ++ features |= CPU_TGTSYNC; ++ features |= CPU_ULSYNC; ++ } else if (features & CPU_LOONGSON) { ++ // unknow loongson ++ features |= CPU_LLSYNC; ++ features |= CPU_TGTSYNC; ++ features |= CPU_ULSYNC; ++ } ++ VM_Version::_cpu_info_is_initialized = true; ++ ++ return features; ++} ++ ++void VM_Version::get_processor_features() { ++ ++ clean_cpuFeatures(); ++ ++ // test if cpucfg instruction is supported ++ VM_Version::_is_determine_cpucfg_supported_running = true; ++ __asm__ __volatile__( ++ ".insn \n\t" ++ ".word (0xc8080118)\n\t" // cpucfg zero, zero ++ : ++ : ++ : ++ ); ++ VM_Version::_is_determine_cpucfg_supported_running = false; ++ ++ if (supports_cpucfg()) { ++ get_cpu_info_stub(&_cpuid_info); ++ _cpuFeatures = get_feature_flags_by_cpucfg(); ++ // Only Loongson CPUs support cpucfg ++ _cpuFeatures |= CPU_LOONGSON; ++ } else { ++ _cpuFeatures = get_feature_flags_by_cpuinfo(0); ++ } ++ ++ _supports_cx8 = true; ++ ++ if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) { ++ FLAG_SET_CMDLINE(uintx, MaxGCPauseMillis, 650); ++ } ++ ++#ifdef COMPILER2 ++ if (MaxVectorSize > 0) { ++ if (!is_power_of_2(MaxVectorSize)) { ++ warning("MaxVectorSize must be a power of 2"); ++ MaxVectorSize = 8; ++ } ++ if (MaxVectorSize > 0 && supports_ps()) { ++ MaxVectorSize = 8; ++ } else { ++ MaxVectorSize = 0; ++ } ++ } ++ // ++ // Vector optimization of MIPS works in most cases, but cannot pass hotspot/test/compiler/6340864/TestFloatVect.java. ++ // Vector optimization was closed by default. ++ // The reasons: ++ // 1. The kernel does not have emulation of PS instructions yet, so the emulation of PS instructions must be done in JVM, see JVM_handle_linux_signal. ++ // 2. It seems the gcc4.4.7 had some bug related to ucontext_t, which is used in signal handler to emulate PS instructions. ++ // ++ if (FLAG_IS_DEFAULT(MaxVectorSize)) { ++ MaxVectorSize = 0; ++ } ++ ++#endif ++ ++ if (needs_llsync() && needs_tgtsync() && !needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 1000); ++ } ++ } else if (!needs_llsync() && needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 2000); ++ } ++ } else if (!needs_llsync() && !needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 3000); ++ } ++ } else if (needs_llsync() && !needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 4000); ++ } ++ } else if (needs_llsync() && needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 10000); ++ } ++ } else { ++ assert(false, "Should Not Reach Here, what is the cpu type?"); ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 10000); ++ } ++ } ++ ++ if (supports_lext1()) { ++ if (FLAG_IS_DEFAULT(UseLEXT1)) { ++ FLAG_SET_DEFAULT(UseLEXT1, true); ++ } ++ } else if (UseLEXT1) { ++ warning("LEXT1 instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLEXT1, false); ++ } ++ ++ if (supports_lext2()) { ++ if (FLAG_IS_DEFAULT(UseLEXT2)) { ++ FLAG_SET_DEFAULT(UseLEXT2, true); ++ } ++ } else if (UseLEXT2) { ++ warning("LEXT2 instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLEXT2, false); ++ } ++ ++ if (supports_lext3()) { ++ if (FLAG_IS_DEFAULT(UseLEXT3)) { ++ FLAG_SET_DEFAULT(UseLEXT3, true); ++ } ++ } else if (UseLEXT3) { ++ warning("LEXT3 instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLEXT3, false); ++ } ++ ++ if (UseLEXT2) { ++ if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) { ++ FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 1); ++ } ++ } else if (UseCountTrailingZerosInstructionMIPS64) { ++ if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) ++ warning("ctz/dctz instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 0); ++ } ++ ++ if (TieredCompilation) { ++ if (!FLAG_IS_DEFAULT(TieredCompilation)) ++ warning("TieredCompilation not supported"); ++ FLAG_SET_DEFAULT(TieredCompilation, false); ++ } ++ ++ char buf[256]; ++ bool is_unknown_loongson_cpu = is_loongson() && !is_gs464() && !is_gs464e() && !is_gs264() && !supports_cpucfg(); ++ ++ // A note on the _features_string format: ++ // There are jtreg tests checking the _features_string for various properties. ++ // For some strange reason, these tests require the string to contain ++ // only _lowercase_ characters. Keep that in mind when being surprised ++ // about the unusual notation of features - and when adding new ones. ++ // Features may have one comma at the end. ++ // Furthermore, use one, and only one, separator space between features. ++ // Multiple spaces are considered separate tokens, messing up everything. ++ jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, usesynclevel:%d", ++ (is_loongson() ? "mips-compatible loongson cpu" : "mips cpu"), ++ (is_gs464() ? ", gs464 (3a1000/3b1500)" : ""), ++ (is_gs464e() ? ", gs464e (3a2000/3a3000/3b2000/3b3000)" : ""), ++ (is_gs264() ? ", gs264 (2k1000)" : ""), ++ (is_unknown_loongson_cpu ? ", unknown loongson cpu" : ""), ++ (supports_dsp() ? ", dsp" : ""), ++ (supports_ps() ? ", ps" : ""), ++ (supports_3d() ? ", 3d" : ""), ++ (supports_mmi() ? ", mmi" : ""), ++ (supports_msa1_0() ? ", msa1_0" : ""), ++ (supports_msa2_0() ? ", msa2_0" : ""), ++ (supports_lsx1() ? ", lsx1" : ""), ++ (supports_lsx2() ? ", lsx2" : ""), ++ (supports_lasx() ? ", lasx" : ""), ++ (supports_lext1() ? ", lext1" : ""), ++ (supports_lext2() ? ", lext2" : ""), ++ (supports_lext3() ? ", lext3" : ""), ++ (supports_cgp() ? ", aes, crc, sha1, sha256, sha512" : ""), ++ (supports_lamo() ? ", lamo" : ""), ++ (supports_lpixu() ? ", lpixu" : ""), ++ (needs_llsync() ? ", llsync" : ""), ++ (needs_tgtsync() ? ", tgtsync": ""), ++ (needs_ulsync() ? ", ulsync": ""), ++ (supports_mualp() ? ", mualp" : ""), ++ UseSyncLevel); ++ _features_str = strdup(buf); ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchLines, 1); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) { ++ FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1); ++ } ++ ++ if (UseSHA) { ++ warning("SHA instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseSHA, false); ++ } ++ ++ if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) { ++ warning("SHA intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); ++ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); ++ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); ++ } ++ ++ if (UseAES) { ++ if (!FLAG_IS_DEFAULT(UseAES)) { ++ warning("AES instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAES, false); ++ } ++ } ++ ++ if (UseCRC32Intrinsics) { ++ if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { ++ warning("CRC32Intrinsics instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); ++ } ++ } ++ ++ if (UseCRC32CIntrinsics) { ++ if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { ++ warning("CRC32CIntrinsics instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); ++ } ++ } ++ ++ if (UseAESIntrinsics) { ++ if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) { ++ warning("AES intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAESIntrinsics, false); ++ } ++ } ++ ++#ifdef COMPILER2 ++ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { ++ UseMontgomeryMultiplyIntrinsic = true; ++ } ++ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { ++ UseMontgomerySquareIntrinsic = true; ++ } ++#endif ++ ++ if (FLAG_IS_DEFAULT(UseFMA)) { ++ FLAG_SET_DEFAULT(UseFMA, true); ++ } ++ ++ UNSUPPORTED_OPTION(CriticalJNINatives); ++} ++ ++void VM_Version::initialize() { ++ ResourceMark rm; ++ // Making this stub must be FIRST use of assembler ++ ++ stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size); ++ if (stub_blob == NULL) { ++ vm_exit_during_initialization("Unable to allocate get_cpu_info_stub"); ++ } ++ CodeBuffer c(stub_blob); ++ VM_Version_StubGenerator g(&c); ++ get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, ++ g.generate_get_cpu_info()); ++ ++ get_processor_features(); ++} +diff --git a/src/hotspot/cpu/mips/vm_version_mips.hpp b/src/hotspot/cpu/mips/vm_version_mips.hpp +new file mode 100644 +index 0000000000..733a0af295 +--- /dev/null ++++ b/src/hotspot/cpu/mips/vm_version_mips.hpp +@@ -0,0 +1,221 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VM_VERSION_MIPS_HPP ++#define CPU_MIPS_VM_VM_VERSION_MIPS_HPP ++ ++#include "runtime/abstract_vm_version.hpp" ++#include "runtime/globals_extension.hpp" ++#include "utilities/sizes.hpp" ++ ++class VM_Version: public Abstract_VM_Version { ++public: ++ ++ union Loongson_Cpucfg_Id1 { ++ uint32_t value; ++ struct { ++ uint32_t FP_CFG : 1, ++ FPREV : 3, ++ MMI : 1, ++ MSA1 : 1, ++ MSA2 : 1, ++ CGP : 1, ++ WRP : 1, ++ LSX1 : 1, ++ LSX2 : 1, ++ LASX : 1, ++ R6FXP : 1, ++ R6CRCP : 1, ++ R6FPP : 1, ++ CNT64 : 1, ++ LSLDR0 : 1, ++ LSPREF : 1, ++ LSPREFX : 1, ++ LSSYNCI : 1, ++ LSUCA : 1, ++ LLSYNC : 1, ++ TGTSYNC : 1, ++ LLEXC : 1, ++ SCRAND : 1, ++ MUALP : 1, ++ KMUALEn : 1, ++ ITLBT : 1, ++ LSUPERF : 1, ++ SFBP : 1, ++ CDMAP : 1, ++ : 1; ++ } bits; ++ }; ++ ++ union Loongson_Cpucfg_Id2 { ++ uint32_t value; ++ struct { ++ uint32_t LEXT1 : 1, ++ LEXT2 : 1, ++ LEXT3 : 1, ++ LSPW : 1, ++ LBT1 : 1, ++ LBT2 : 1, ++ LBT3 : 1, ++ LBTMMU : 1, ++ LPMP : 1, ++ LPMRev : 3, ++ LAMO : 1, ++ LPIXU : 1, ++ LPIXNU : 1, ++ LVZP : 1, ++ LVZRev : 3, ++ LGFTP : 1, ++ LGFTRev : 3, ++ LLFTP : 1, ++ LLFTRev : 3, ++ LCSRP : 1, ++ DISBLKLY : 1, ++ : 3; ++ } bits; ++ }; ++ ++protected: ++ ++ enum { ++ CPU_LOONGSON = (1 << 1), ++ CPU_LOONGSON_GS464 = (1 << 2), ++ CPU_LOONGSON_GS464E = (1 << 3), ++ CPU_LOONGSON_GS264 = (1 << 4), ++ CPU_MMI = (1 << 11), ++ CPU_MSA1_0 = (1 << 12), ++ CPU_MSA2_0 = (1 << 13), ++ CPU_CGP = (1 << 14), ++ CPU_LSX1 = (1 << 15), ++ CPU_LSX2 = (1 << 16), ++ CPU_LASX = (1 << 17), ++ CPU_LEXT1 = (1 << 18), ++ CPU_LEXT2 = (1 << 19), ++ CPU_LEXT3 = (1 << 20), ++ CPU_LAMO = (1 << 21), ++ CPU_LPIXU = (1 << 22), ++ CPU_LLSYNC = (1 << 23), ++ CPU_TGTSYNC = (1 << 24), ++ CPU_ULSYNC = (1 << 25), ++ CPU_MUALP = (1 << 26), ++ ++ //////////////////////add some other feature here////////////////// ++ } cpuFeatureFlags; ++ ++ enum Loongson_Family { ++ L_3A1000 = 0, ++ L_3B1500 = 1, ++ L_3A2000 = 2, ++ L_3B2000 = 3, ++ L_3A3000 = 4, ++ L_3B3000 = 5, ++ L_2K1000 = 6, ++ L_UNKNOWN = 7 ++ }; ++ ++ struct Loongson_Cpuinfo { ++ Loongson_Family id; ++ const char* const match_str; ++ }; ++ ++ static int _cpuFeatures; ++ static const char* _features_str; ++ static volatile bool _is_determine_cpucfg_supported_running; ++ static bool _is_cpucfg_instruction_supported; ++ static bool _cpu_info_is_initialized; ++ ++ struct CpuidInfo { ++ uint32_t cpucfg_info_id0; ++ Loongson_Cpucfg_Id1 cpucfg_info_id1; ++ Loongson_Cpucfg_Id2 cpucfg_info_id2; ++ uint32_t cpucfg_info_id3; ++ uint32_t cpucfg_info_id4; ++ uint32_t cpucfg_info_id5; ++ uint32_t cpucfg_info_id6; ++ uint32_t cpucfg_info_id8; ++ }; ++ ++ // The actual cpuid info block ++ static CpuidInfo _cpuid_info; ++ ++ static uint32_t get_feature_flags_by_cpucfg(); ++ static int get_feature_flags_by_cpuinfo(int features); ++ static void get_processor_features(); ++ ++public: ++ // Offsets for cpuid asm stub ++ static ByteSize Loongson_Cpucfg_id0_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id0); } ++ static ByteSize Loongson_Cpucfg_id1_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id1); } ++ static ByteSize Loongson_Cpucfg_id2_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id2); } ++ static ByteSize Loongson_Cpucfg_id3_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id3); } ++ static ByteSize Loongson_Cpucfg_id4_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id4); } ++ static ByteSize Loongson_Cpucfg_id5_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id5); } ++ static ByteSize Loongson_Cpucfg_id6_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id6); } ++ static ByteSize Loongson_Cpucfg_id8_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id8); } ++ ++ static bool is_determine_features_test_running() { return _is_determine_cpucfg_supported_running; } ++ ++ static void clean_cpuFeatures() { _cpuFeatures = 0; } ++ ++ // Initialization ++ static void initialize(); ++ ++ static bool cpu_info_is_initialized() { return _cpu_info_is_initialized; } ++ ++ static bool supports_cpucfg() { return _is_cpucfg_instruction_supported; } ++ static bool set_supports_cpucfg(bool value) { return _is_cpucfg_instruction_supported = value; } ++ ++ static bool is_loongson() { return _cpuFeatures & CPU_LOONGSON; } ++ static bool is_gs264() { return _cpuFeatures & CPU_LOONGSON_GS264; } ++ static bool is_gs464() { return _cpuFeatures & CPU_LOONGSON_GS464; } ++ static bool is_gs464e() { return _cpuFeatures & CPU_LOONGSON_GS464E; } ++ static bool supports_dsp() { return 0; /*not supported yet*/} ++ static bool supports_ps() { return 0; /*not supported yet*/} ++ static bool supports_3d() { return 0; /*not supported yet*/} ++ static bool supports_msa1_0() { return _cpuFeatures & CPU_MSA1_0; } ++ static bool supports_msa2_0() { return _cpuFeatures & CPU_MSA2_0; } ++ static bool supports_cgp() { return _cpuFeatures & CPU_CGP; } ++ static bool supports_mmi() { return _cpuFeatures & CPU_MMI; } ++ static bool supports_lsx1() { return _cpuFeatures & CPU_LSX1; } ++ static bool supports_lsx2() { return _cpuFeatures & CPU_LSX2; } ++ static bool supports_lasx() { return _cpuFeatures & CPU_LASX; } ++ static bool supports_lext1() { return _cpuFeatures & CPU_LEXT1; } ++ static bool supports_lext2() { return _cpuFeatures & CPU_LEXT2; } ++ static bool supports_lext3() { return _cpuFeatures & CPU_LEXT3; } ++ static bool supports_lamo() { return _cpuFeatures & CPU_LAMO; } ++ static bool supports_lpixu() { return _cpuFeatures & CPU_LPIXU; } ++ static bool needs_llsync() { return _cpuFeatures & CPU_LLSYNC; } ++ static bool needs_tgtsync() { return _cpuFeatures & CPU_TGTSYNC; } ++ static bool needs_ulsync() { return _cpuFeatures & CPU_ULSYNC; } ++ static bool supports_mualp() { return _cpuFeatures & CPU_MUALP; } ++ ++ //mips has no such instructions, use ll/sc instead ++ static bool supports_compare_and_exchange() { return false; } ++ ++ static const char* cpu_features() { return _features_str; } ++ ++}; ++ ++#endif // CPU_MIPS_VM_VM_VERSION_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/vmreg_mips.cpp b/src/hotspot/cpu/mips/vmreg_mips.cpp +new file mode 100644 +index 0000000000..86bd74d430 +--- /dev/null ++++ b/src/hotspot/cpu/mips/vmreg_mips.cpp +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "code/vmreg.hpp" ++ ++ ++ ++void VMRegImpl::set_regName() { ++ Register reg = ::as_Register(0); ++ int i; ++ for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) { ++ regName[i++] = reg->name(); ++ regName[i++] = reg->name(); ++ reg = reg->successor(); ++ } ++ ++ FloatRegister freg = ::as_FloatRegister(0); ++ for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { ++ regName[i++] = freg->name(); ++ regName[i++] = freg->name(); ++ freg = freg->successor(); ++ } ++ ++ for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) { ++ regName[i] = "NON-GPR-FPR"; ++ } ++} +diff --git a/src/hotspot/cpu/mips/vmreg_mips.hpp b/src/hotspot/cpu/mips/vmreg_mips.hpp +new file mode 100644 +index 0000000000..8ccc8c513c +--- /dev/null ++++ b/src/hotspot/cpu/mips/vmreg_mips.hpp +@@ -0,0 +1,56 @@ ++/* ++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VMREG_MIPS_HPP ++#define CPU_MIPS_VM_VMREG_MIPS_HPP ++ ++inline Register as_Register() { ++ assert( is_Register(), "must be"); ++ return ::as_Register(value() >> 1); ++} ++ ++inline FloatRegister as_FloatRegister() { ++ assert( is_FloatRegister(), "must be" ); ++ assert( is_even(value()), "must be" ); ++ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1); ++} ++ ++inline bool is_Register() { ++ return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; ++} ++ ++inline bool is_FloatRegister() { ++ return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; ++} ++ ++inline bool is_concrete() { ++ assert(is_reg(), "must be"); ++ if(is_Register()) return true; ++ if(is_FloatRegister()) return true; ++ assert(false, "what register?"); ++ return false; ++} ++ ++#endif // CPU_MIPS_VM_VMREG_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/vmreg_mips.inline.hpp b/src/hotspot/cpu/mips/vmreg_mips.inline.hpp +new file mode 100644 +index 0000000000..12ad7361aa +--- /dev/null ++++ b/src/hotspot/cpu/mips/vmreg_mips.inline.hpp +@@ -0,0 +1,38 @@ ++/* ++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP ++ ++inline VMReg RegisterImpl::as_VMReg() { ++ if( this==noreg ) return VMRegImpl::Bad(); ++ return VMRegImpl::as_VMReg(encoding() << 1 ); ++} ++ ++inline VMReg FloatRegisterImpl::as_VMReg() { ++ return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr); ++} ++ ++#endif // CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP +diff --git a/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp b/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp +new file mode 100644 +index 0000000000..75c23e8088 +--- /dev/null ++++ b/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp +@@ -0,0 +1,340 @@ ++/* ++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/vtableStubs.hpp" ++#include "interp_masm_mips.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/klass.inline.hpp" ++#include "oops/klassVtable.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_mips.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++ ++// machine-dependent part of VtableStubs: create VtableStub of correct size and ++// initialize its code ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#ifndef PRODUCT ++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); ++#endif ++ ++// used by compiler only; reciever in T0. ++// used registers : ++// Rmethod : receiver klass & method ++// NOTE: If this code is used by the C1, the receiver_location is always 0. ++// when reach here, receiver in T0, klass in T8 ++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { ++ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. ++ const int stub_code_length = code_size_limit(true); ++ VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); ++ // Can be NULL if there is no free space in the code cache. ++ if (s == NULL) { ++ return NULL; ++ } ++ ++ // Count unused bytes in instruction sequences of variable size. ++ // We add them to the computed buffer size in order to avoid ++ // overflow in subsequently generated stubs. ++ address start_pc; ++ int slop_bytes = 0; ++ int slop_delta = 0; ++ int load_const_maxLen = 6*BytesPerInstWord; // load_const generates 6 instructions. Assume that as max size for li ++ // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation. ++ const int index_dependent_slop = 0; ++ ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), stub_code_length); ++ MacroAssembler* masm = new MacroAssembler(&cb); ++ Register t1 = T8, t2 = Rmethod; ++#if (!defined(PRODUCT) && defined(COMPILER2)) ++ if (CountCompiledCalls) { ++ start_pc = __ pc(); ++ __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ lw(t1, AT , 0); ++ __ addiu(t1, t1, 1); ++ __ sw(t1, AT,0); ++ } ++#endif ++ ++ // get receiver (need to skip return address on top of stack) ++ //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0"); ++ ++ // get receiver klass ++ address npe_addr = __ pc(); ++ //add for compressedoops ++ __ load_klass(t1, T0); ++ ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ // check offset vs vtable length ++ __ lw(t2, t1, in_bytes(Klass::vtable_length_offset())); ++ assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code"); ++ __ move(AT, vtable_index*vtableEntry::size()); ++ __ slt(AT, AT, t2); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ move(A2, vtable_index); ++ __ move(A1, A0); ++ ++ // VTABLE TODO: find upper bound for call_VM length. ++ start_pc = __ pc(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2); ++ const ptrdiff_t estimate = 512; ++ const ptrdiff_t codesize = __ pc() - start_pc; ++ slop_delta = estimate - codesize; // call_VM varies in length, depending on data ++ assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize); ++ __ bind(L); ++ } ++#endif // PRODUCT ++ const Register method = Rmethod; ++ ++ // load methodOop and target address ++ start_pc = __ pc(); ++ // lookup_virtual_method generates 18 instructions (worst case) ++ __ lookup_virtual_method(t1, vtable_index, method); ++ slop_delta = 18*BytesPerInstWord - (int)(__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ __ beq(method, R0, L); ++ __ delayed()->nop(); ++ __ ld(AT, method,in_bytes(Method::from_compiled_offset())); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("Vtable entry is NULL"); ++ __ bind(L); ++ } ++#endif // PRODUCT ++ ++ // T8: receiver klass ++ // T0: receiver ++ // Rmethod: methodOop ++ // T9: entry ++ address ame_addr = __ pc(); ++ __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset())); ++ __ jr(T9); ++ __ delayed()->nop(); ++ masm->flush(); ++ slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets ++ bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop); ++ ++ return s; ++} ++ ++ ++// used registers : ++// T1 T2 ++// when reach here, the receiver in T0, klass in T1 ++VtableStub* VtableStubs::create_itable_stub(int itable_index) { ++ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. ++ const int stub_code_length = code_size_limit(false); ++ VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); ++ // Can be NULL if there is no free space in the code cache. ++ if (s == NULL) { ++ return NULL; ++ } ++ // Count unused bytes in instruction sequences of variable size. ++ // We add them to the computed buffer size in order to avoid ++ // overflow in subsequently generated stubs. ++ address start_pc; ++ int slop_bytes = 0; ++ int slop_delta = 0; ++ int load_const_maxLen = 6*BytesPerInstWord; // load_const generates 6 instructions. Assume that as max size for li ++ ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), stub_code_length); ++ MacroAssembler *masm = new MacroAssembler(&cb); ++ ++ // we T8,T9 as temparary register, they are free from register allocator ++ Register t1 = T8, t2 = T2; ++ // Entry arguments: ++ // T1: Interface ++ // T0: Receiver ++ ++#if (!defined(PRODUCT) && defined(COMPILER2)) ++ if (CountCompiledCalls) { ++ start_pc = __ pc(); ++ __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ lw(T8, AT, 0); ++ __ addiu(T8, T8,1); ++ __ sw(T8, AT, 0); ++ } ++#endif // PRODUCT ++ ++ const Register holder_klass_reg = T1; // declaring interface klass (DECC) ++ const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC) ++ ++ const Register icholder_reg = T1; ++ __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset()); ++ __ ld_ptr(holder_klass_reg, icholder_reg, CompiledICHolder::holder_metadata_offset()); ++ ++ Label L_no_such_interface; ++ ++ // get receiver klass (also an implicit null-check) ++ address npe_addr = __ pc(); ++ __ load_klass(t1, T0); ++ { ++ // x86 use lookup_interface_method, but lookup_interface_method does not work on MIPS. ++ // No dynamic code size variance here, so slop_bytes is not needed. ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); ++ assert(Assembler::is_simm16(base), "change this code"); ++ __ daddiu(t2, t1, base); ++ __ lw(AT, t1, in_bytes(Klass::vtable_length_offset())); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(t2, t2, AT); ++ if (HeapWordsPerLong > 1) { ++ __ round_to(t2, BytesPerLong); ++ } ++ ++ Label hit, entry; ++ __ bind(entry); ++ ++ // Check that the entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); ++ __ beq(AT, R0, L_no_such_interface); ++ __ delayed()->nop(); ++ ++ __ bne(AT, resolved_klass_reg, entry); ++ __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize); ++ ++ } ++ ++ // add for compressedoops ++ __ load_klass(t1, T0); ++ // compute itable entry offset (in words) ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ __ daddiu(t2, t1, base); ++ __ lw(AT, t1, in_bytes(Klass::vtable_length_offset())); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(t2, t2, AT); ++ if (HeapWordsPerLong > 1) { ++ __ round_to(t2, BytesPerLong); ++ } ++ ++ Label hit, entry; ++ __ bind(entry); ++ ++ // Check that the entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); ++ __ beq(AT, R0, L_no_such_interface); ++ __ delayed()->nop(); ++ ++ __ bne(AT, holder_klass_reg, entry); ++ __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize); ++ ++ // We found a hit, move offset into T9 ++ __ ld_ptr(t2, t2, itableOffsetEntry::offset_offset_in_bytes() - itableOffsetEntry::size() * wordSize); ++ ++ // Compute itableMethodEntry. ++ const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) + ++ itableMethodEntry::method_offset_in_bytes(); ++ ++ // Get methodOop and entrypoint for compiler ++ const Register method = Rmethod; ++ __ dsll(AT, t2, Address::times_1); ++ __ addu(AT, AT, t1 ); ++ start_pc = __ pc(); ++ __ set64(t1, method_offset); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ addu(AT, AT, t1 ); ++ __ ld_ptr(method, AT, 0); ++ ++#ifdef ASSERT ++ if (DebugVtables) { ++ Label L1; ++ __ beq(method, R0, L1); ++ __ delayed()->nop(); ++ __ ld(AT, method,in_bytes(Method::from_compiled_offset())); ++ __ bne(AT, R0, L1); ++ __ delayed()->nop(); ++ __ stop("methodOop is null"); ++ __ bind(L1); ++ } ++#endif // ASSERT ++ ++ // Rmethod: methodOop ++ // T0: receiver ++ // T9: entry point ++ address ame_addr = __ pc(); ++ __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset())); ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ __ bind(L_no_such_interface); ++ // Handle IncompatibleClassChangeError in itable stubs. ++ // More detailed error message. ++ // We force resolving of the call site by jumping to the "handle ++ // wrong method" stub, and so let the interpreter runtime do all the ++ // dirty work. ++ start_pc = __ pc(); ++ __ set64(T9, (long)SharedRuntime::get_handle_wrong_method_stub()); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ masm->flush(); ++ bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0); ++ ++ return s; ++} ++ ++// NOTE : whenever you change the code above, dont forget to change the const here ++int VtableStub::pd_code_alignment() { ++ const unsigned int icache_line_size = wordSize; ++ return icache_line_size; ++} +diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp +index 847f7d61d2..f570946090 100644 +--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp ++++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp +@@ -488,6 +488,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { + } + } + ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + Bytecodes::Code code = op->bytecode(); +@@ -1608,6 +1611,10 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L + __ bind(skip); + } + ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ ShouldNotReachHere(); ++} ++ + + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, + CodeEmitInfo* info, bool pop_fpu_stack) { +diff --git a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp +index d34ea45c0b..f6b6dbdee3 100644 +--- a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp ++++ b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp +@@ -273,21 +273,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + __ move(temp, addr); + } + +- +-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { + LIR_Opr tmp = FrameMap::R0_opr; + __ load(new LIR_Address(base, disp, T_INT), tmp, info); +- __ cmp(condition, tmp, c); ++ __ cmp_branch(condition, tmp, c, T_INT, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); + +-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, +- int disp, BasicType type, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { + LIR_Opr tmp = FrameMap::R0_opr; + __ load(new LIR_Address(base, disp, type), tmp, info); +- __ cmp(condition, reg, tmp); ++ __ cmp_branch(condition, reg, tmp, type, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); + + bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { + assert(left != result, "should be different registers"); +diff --git a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp +index ef9b0833d3..c6b25bf10e 100644 +--- a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp ++++ b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp +@@ -62,3 +62,24 @@ void LIR_Address::verify() const { + #endif + } + #endif // PRODUCT ++ ++template ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { ++ cmp(condition, left, right, info); ++ branch(condition, type, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); ++ ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ cmp(condition, left, right); ++ branch(condition, type, block, unordered); ++} ++ ++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ cmp(condition, left, right); ++ cmove(condition, src1, src2, dst, type); ++} +diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +index 897be2209e..0c27cc20f3 100644 +--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp ++++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +@@ -379,6 +379,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { + } + } + ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); +@@ -1503,6 +1506,10 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L + } + } + ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ ShouldNotReachHere(); ++} ++ + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, + CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); +diff --git a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp +index ae297ac635..c786803e0f 100644 +--- a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp ++++ b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp +@@ -213,16 +213,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr); + } + +-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { + LIR_Opr scratch = FrameMap::Z_R1_opr; + __ load(new LIR_Address(base, disp, T_INT), scratch, info); +- __ cmp(condition, scratch, c); ++ __ cmp_branch(condition, scratch, c, T_INT, tgt); + } + +-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); ++ ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { + __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info); ++ __ branch(condition, type, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); ++ + bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { + if (tmp->is_valid()) { + if (is_power_of_2(c + 1)) { +diff --git a/src/hotspot/cpu/s390/c1_LIR_s390.cpp b/src/hotspot/cpu/s390/c1_LIR_s390.cpp +index 9507ca0856..2116e9af2b 100644 +--- a/src/hotspot/cpu/s390/c1_LIR_s390.cpp ++++ b/src/hotspot/cpu/s390/c1_LIR_s390.cpp +@@ -56,3 +56,23 @@ void LIR_Address::verify() const { + } + #endif // PRODUCT + ++template ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { ++ cmp(condition, left, right, info); ++ branch(condition, type, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); ++ ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ cmp(condition, left, right); ++ branch(condition, type, block, unordered); ++} ++ ++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ cmp(condition, left, right); ++ cmove(condition, src1, src2, dst, type); ++} +diff --git a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp +index e503159eb7..2e5609fec8 100644 +--- a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp ++++ b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp +@@ -599,6 +599,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { + // The peephole pass fills the delay slot + } + ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + Bytecodes::Code code = op->bytecode(); +@@ -1638,6 +1641,9 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L + __ bind(skip); + } + ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "unused on this code path"); +diff --git a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp +index a09a159722..a02ffafc77 100644 +--- a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp ++++ b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp +@@ -267,19 +267,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + __ move(temp, addr); + } + +-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { + LIR_Opr o7opr = FrameMap::O7_opr; + __ load(new LIR_Address(base, disp, T_INT), o7opr, info); +- __ cmp(condition, o7opr, c); ++ __ cmp_branch(condition, o7opr, c, T_INT, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); + +-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { + LIR_Opr o7opr = FrameMap::O7_opr; + __ load(new LIR_Address(base, disp, type), o7opr, info); +- __ cmp(condition, reg, o7opr); ++ __ cmp_branch(condition, reg, o7opr, type, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); + + bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) { + assert(left != result, "should be different registers"); +diff --git a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp +index c21d2c1d9a..9cebb387e2 100644 +--- a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp ++++ b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp +@@ -54,3 +54,24 @@ void LIR_Address::verify() const { + "wrong type for addresses"); + } + #endif // PRODUCT ++ ++template ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { ++ cmp(condition, left, right, info); ++ branch(condition, type, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); ++ ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ cmp(condition, left, right); ++ branch(condition, type, block, unordered); ++} ++ ++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ cmp(condition, left, right); ++ cmove(condition, src1, src2, dst, type); ++} +diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +index cee3140f4f..7b76eb0b9e 100644 +--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp ++++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +@@ -1442,6 +1442,10 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { + } + } + ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++ ShouldNotReachHere(); ++} ++ + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); + LIR_Opr dest = op->result_opr(); +@@ -2030,6 +2034,9 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L + } + } + ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); +diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +index 905708a9fa..1c6774e1d6 100644 +--- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp ++++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +@@ -255,15 +255,27 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr); + } + +-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { + __ cmp_mem_int(condition, base, disp, c, info); ++ __ branch(condition, T_INT, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); + +-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { + __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info); ++ __ branch(condition, type, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); + + bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { + if (tmp->is_valid() && c > 0 && c < max_jint) { +diff --git a/src/hotspot/cpu/x86/c1_LIR_x86.cpp b/src/hotspot/cpu/x86/c1_LIR_x86.cpp +index 92277ee063..20e283e302 100644 +--- a/src/hotspot/cpu/x86/c1_LIR_x86.cpp ++++ b/src/hotspot/cpu/x86/c1_LIR_x86.cpp +@@ -72,3 +72,24 @@ void LIR_Address::verify() const { + #endif + } + #endif // PRODUCT ++ ++template ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { ++ cmp(condition, left, right, info); ++ branch(condition, type, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); ++ ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ cmp(condition, left, right); ++ branch(condition, type, block, unordered); ++} ++ ++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ cmp(condition, left, right); ++ cmove(condition, src1, src2, dst, type); ++} +diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp +index 95d7e51501..8d7b623ee7 100644 +--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp ++++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp +@@ -263,7 +263,8 @@ void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, + #define __ ce->masm()-> + + void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce, +- LIR_Opr ref) const { ++ LIR_Opr ref, ++ LIR_Opr res) const { + __ testptr(ref->as_register(), address_bad_mask_from_thread(r15_thread)); + } + +diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp +index 3687754e71..791e4ed43f 100644 +--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp ++++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp +@@ -77,7 +77,8 @@ public: + + #ifdef COMPILER1 + void generate_c1_load_barrier_test(LIR_Assembler* ce, +- LIR_Opr ref) const; ++ LIR_Opr ref, ++ LIR_Opr res) const; + + void generate_c1_load_barrier_stub(LIR_Assembler* ce, + ZLoadBarrierStubC1* stub) const; +diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp +index 74945999e7..2b8ac3dd2a 100644 +--- a/src/hotspot/os/linux/os_linux.cpp ++++ b/src/hotspot/os/linux/os_linux.cpp +@@ -23,6 +23,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2021 Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + // no precompiled headers + #include "jvm.h" + #include "classfile/classLoader.hpp" +@@ -3966,6 +3972,8 @@ size_t os::Linux::find_large_page_size() { + IA64_ONLY(256 * M) + PPC_ONLY(4 * M) + S390_ONLY(1 * M) ++ MIPS64_ONLY(4 * M) ++ LOONGARCH64_ONLY(4 * M); //In MIPS _large_page_size is seted 4*M. // TODO: LA + SPARC_ONLY(4 * M); + #endif // ZERO + +diff --git a/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp +new file mode 100644 +index 0000000000..30719a0340 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp +@@ -0,0 +1,24 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ +diff --git a/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp +new file mode 100644 +index 0000000000..8403e7838a +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp +@@ -0,0 +1,160 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP ++ ++#include "runtime/vm_version.hpp" ++ ++// Implementation of class atomic ++ ++template ++struct Atomic::PlatformAdd ++ : Atomic::AddAndFetch > ++{ ++ template ++ D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { ++ //Unimplemented(); ++ return __sync_add_and_fetch(dest, add_value); ++ } ++}; ++ ++template<> ++template ++inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, ++ T volatile* dest, ++ atomic_memory_order order) const { ++ T __ret, __tmp; ++ ++ STATIC_ASSERT(4 == sizeof(T)); ++ __asm__ __volatile__ ( ++ "1: ll.w %[__ret], %[__dest] \n\t" ++ " move %[__tmp], %[__val] \n\t" ++ " sc.w %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __ret; ++} ++ ++template<> ++template ++inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, ++ T volatile* dest, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(8 == sizeof(T)); ++ T __ret; ++ jlong __tmp; ++ __asm__ __volatile__ ( ++ "1: ll.d %[__ret], %[__dest] \n\t" ++ " move %[__tmp], %[__val] \n\t" ++ " sc.d %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "ZC" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __ret; ++} ++ ++#if 0 ++template<> ++template ++inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, ++ T volatile* dest, ++ T compare_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(1 == sizeof(T)); ++} ++ ++#else ++// No direct support for cmpxchg of bytes; emulate using int. ++template<> ++struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {}; ++#endif ++ ++template<> ++template ++inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, ++ T volatile* dest, ++ T compare_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(4 == sizeof(T)); ++ T __prev; ++ jint __cmp; ++ ++ __asm__ __volatile__ ( ++ "1: ll.w %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $r0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " sc.w %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ "2: \n\t" ++ " dbar 0x700 \n\t" ++ ++ : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) ++ : [__dest] "ZC" (*(volatile jint*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __prev; ++} ++ ++template<> ++template ++inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, ++ T volatile* dest, ++ T compare_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(8 == sizeof(T)); ++ T __prev; ++ jlong __cmp; ++ ++ __asm__ __volatile__ ( ++ "1: ll.d %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $r0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " sc.d %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ "2: \n\t" ++ " dbar 0x700 \n\t" ++ ++ : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) ++ : [__dest] "ZC" (*(volatile jlong*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : "memory" ++ ); ++ return __prev; ++} ++ ++ ++#endif // OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp +new file mode 100644 +index 0000000000..c9f675baca +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP ++#define OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP ++ ++#include ++ ++// Efficient swapping of data bytes from Java byte ++// ordering to native byte ordering and vice versa. ++inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); } ++inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); } ++inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); } ++ ++#endif // OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp +new file mode 100644 +index 0000000000..826c1fe39a +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp +@@ -0,0 +1,125 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP ++#define OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP ++ ++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ (void)memmove(to, from, count * HeapWordSize); ++} ++ ++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ (void)memcpy(to, from, count * HeapWordSize); ++ break; ++ } ++} ++ ++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ while (count-- > 0) { ++ *to++ = *from++; ++ } ++ break; ++ } ++} ++ ++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_words(from, to, count); ++} ++ ++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_disjoint_words(from, to, count); ++} ++ ++static void pd_conjoint_bytes(const void* from, void* to, size_t count) { ++ (void)memmove(to, from, count); ++} ++ ++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { ++ pd_conjoint_bytes(from, to, count); ++} ++ ++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { ++ //assert(!UseCompressedOops, "foo!"); ++ assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size"); ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_bytes_atomic(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count); ++} ++ ++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jints_atomic((jint*)from, (jint*)to, count); ++} ++ ++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count); ++} ++ ++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { ++ //assert(!UseCompressedOops, "foo!"); ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); ++ pd_conjoint_oops_atomic((oop*)from, (oop*)to, count); ++} ++ ++#endif // OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp +new file mode 100644 +index 0000000000..0b5247aa0b +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++define_pd_global(bool, DontYieldALot, false); ++define_pd_global(intx, ThreadStackSize, 2048); // 0 => use system default ++define_pd_global(intx, VMThreadStackSize, 2048); ++ ++define_pd_global(intx, CompilerThreadStackSize, 2048); ++ ++define_pd_global(uintx,JVMInvokeMethodSlack, 8192); ++ ++// Used on 64 bit platforms for UseCompressedOops base address ++define_pd_global(uintx,HeapBaseMinAddress, 2*G); ++ ++#endif // OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s +new file mode 100644 +index 0000000000..ebd73af0c5 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s +@@ -0,0 +1,25 @@ ++# ++# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++# ++# This code is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License version 2 only, as ++# published by the Free Software Foundation. ++# ++# This code is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++# version 2 for more details (a copy is included in the LICENSE file that ++# accompanied this code). ++# ++# You should have received a copy of the GNU General Public License version ++# 2 along with this work; if not, write to the Free Software Foundation, ++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++# ++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++# or visit www.oracle.com if you need additional information or have any ++# questions. ++# ++ ++ +diff --git a/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp +new file mode 100644 +index 0000000000..5429a1055a +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP ++ ++#include "runtime/os.hpp" ++ ++// Included in orderAccess.hpp header file. ++ ++// Implementation of class OrderAccess. ++#define inlasm_sync(v) if (os::is_ActiveCoresMP()) \ ++ __asm__ __volatile__ ("nop" : : : "memory"); \ ++ else \ ++ __asm__ __volatile__ ("dbar %0" : :"K"(v) : "memory"); ++ ++inline void OrderAccess::loadload() { inlasm_sync(0x15); } ++inline void OrderAccess::storestore() { inlasm_sync(0x1a); } ++inline void OrderAccess::loadstore() { inlasm_sync(0x16); } ++inline void OrderAccess::storeload() { inlasm_sync(0x19); } ++ ++inline void OrderAccess::acquire() { inlasm_sync(0x14); } ++inline void OrderAccess::release() { inlasm_sync(0x12); } ++inline void OrderAccess::fence() { inlasm_sync(0x10); } ++ ++ ++#undef inlasm_sync ++ ++#endif // OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp +new file mode 100644 +index 0000000000..cf5fff0d04 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp +@@ -0,0 +1,710 @@ ++/* ++ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// no precompiled headers ++#include "asm/macroAssembler.hpp" ++#include "classfile/classLoader.hpp" ++#include "classfile/systemDictionary.hpp" ++#include "classfile/vmSymbols.hpp" ++#include "code/icBuffer.hpp" ++#include "code/vtableStubs.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/allocation.inline.hpp" ++#include "os_share_linux.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/extendedPC.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/java.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/osThread.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/timer.hpp" ++#include "utilities/events.hpp" ++#include "utilities/vmError.hpp" ++#include "compiler/disassembler.hpp" ++ ++// put OS-includes here ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++ ++#define REG_SP 3 ++#define REG_FP 22 ++ ++NOINLINE address os::current_stack_pointer() { ++ register void *sp __asm__ ("$r3"); ++ return (address) sp; ++} ++ ++char* os::non_memory_address_word() { ++ // Must never look like an address returned by reserve_memory, ++ // even in its subfields (as defined by the CPU immediate fields, ++ // if the CPU splits constants across multiple instructions). ++ ++ return (char*) -1; ++} ++ ++address os::Linux::ucontext_get_pc(const ucontext_t * uc) { ++ return (address)uc->uc_mcontext.__pc; ++} ++ ++void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) { ++ uc->uc_mcontext.__pc = (intptr_t)pc; ++} ++ ++intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP]; ++} ++ ++intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP]; ++} ++ ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread ++// is currently interrupted by SIGPROF. ++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal ++// frames. Currently we don't do that on Linux, so it's the same as ++// os::fetch_frame_from_context(). ++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, ++ const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ assert(thread != NULL, "just checking"); ++ assert(ret_sp != NULL, "just checking"); ++ assert(ret_fp != NULL, "just checking"); ++ ++ return os::fetch_frame_from_context(uc, ret_sp, ret_fp); ++} ++ ++ExtendedPC os::fetch_frame_from_context(const void* ucVoid, ++ intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ ExtendedPC epc; ++ ucontext_t* uc = (ucontext_t*)ucVoid; ++ ++ if (uc != NULL) { ++ epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); ++ if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc); ++ if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc); ++ } else { ++ // construct empty ExtendedPC for return value checking ++ epc = ExtendedPC(NULL); ++ if (ret_sp) *ret_sp = (intptr_t *)NULL; ++ if (ret_fp) *ret_fp = (intptr_t *)NULL; ++ } ++ ++ return epc; ++} ++ ++frame os::fetch_frame_from_context(const void* ucVoid) { ++ intptr_t* sp; ++ intptr_t* fp; ++ ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp); ++ return frame(sp, fp, epc.pc()); ++} ++ ++bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) { ++ address pc = (address) os::Linux::ucontext_get_pc(uc); ++ if (Interpreter::contains(pc)) { ++ // interpreter performs stack banging after the fixed frame header has ++ // been generated while the compilers perform it before. To maintain ++ // semantic consistency between interpreted and compiled frames, the ++ // method returns the Java sender of the current frame. ++ *fr = os::fetch_frame_from_context(uc); ++ if (!fr->is_first_java_frame()) { ++ assert(fr->safe_for_sender(thread), "Safety check"); ++ *fr = fr->java_sender(); ++ } ++ } else { ++ // more complex code with compiled code ++ assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above"); ++ CodeBlob* cb = CodeCache::find_blob(pc); ++ if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) { ++ // Not sure where the pc points to, fallback to default ++ // stack overflow handling ++ return false; ++ } else { ++ // In compiled code, the stack banging is performed before LR ++ // has been saved in the frame. RA is live, and SP and FP ++ // belong to the caller. ++ intptr_t* fp = os::Linux::ucontext_get_fp(uc); ++ intptr_t* sp = os::Linux::ucontext_get_sp(uc); ++ address pc = (address)(uc->uc_mcontext.__gregs[1]); ++ *fr = frame(sp, fp, pc); ++ if (!fr->is_java_frame()) { ++ assert(fr->safe_for_sender(thread), "Safety check"); ++ assert(!fr->is_first_frame(), "Safety check"); ++ *fr = fr->java_sender(); ++ } ++ } ++ } ++ assert(fr->is_java_frame(), "Safety check"); ++ return true; ++} ++ ++// By default, gcc always save frame pointer on stack. It may get ++// turned off by -fomit-frame-pointer, ++frame os::get_sender_for_C_frame(frame* fr) { ++ return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); ++} ++ ++frame os::current_frame() { ++ intptr_t *fp = ((intptr_t **)__builtin_frame_address(0))[frame::native_frame_link_offset]; ++ frame myframe((intptr_t*)os::current_stack_pointer(), ++ (intptr_t*)fp, ++ CAST_FROM_FN_PTR(address, os::current_frame)); ++ if (os::is_first_C_frame(&myframe)) { ++ // stack is not walkable ++ return frame(); ++ } else { ++ return os::get_sender_for_C_frame(&myframe); ++ } ++} ++ ++extern "C" int ++JVM_handle_linux_signal(int sig, ++ siginfo_t* info, ++ void* ucVoid, ++ int abort_if_unrecognized) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx", ++ info->si_signo, ++ info->si_code, ++ info->si_errno, ++ info->si_addr); ++#endif ++ ++ ucontext_t* uc = (ucontext_t*) ucVoid; ++ ++ Thread* t = Thread::current_or_null_safe(); ++ ++ SignalHandlerMark shm(t); ++ ++ // Note: it's not uncommon that JNI code uses signal/sigset to install ++ // then restore certain signal handler (e.g. to temporarily block SIGPIPE, ++ // or have a SIGILL handler when detecting CPU type). When that happens, ++ // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To ++ // avoid unnecessary crash when libjsig is not preloaded, try handle signals ++ // that do not require siginfo/ucontext first. ++ ++ if (sig == SIGPIPE/* || sig == SIGXFSZ*/) { ++ // allow chained handler to go first ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++ return true; ++ } else { ++ if (PrintMiscellaneous && (WizardMode || Verbose)) { ++ warning("Ignoring SIGPIPE - see bug 4229104"); ++ } ++ return true; ++ } ++ } ++ ++#ifdef CAN_SHOW_REGISTERS_ON_ASSERT ++ if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) { ++ handle_assert_poison_fault(ucVoid, info->si_addr); ++ return 1; ++ } ++#endif ++ ++ JavaThread* thread = NULL; ++ VMThread* vmthread = NULL; ++ if (os::Linux::signal_handlers_are_installed) { ++ if (t != NULL ){ ++ if(t->is_Java_thread()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("this thread is a java thread"); ++#endif ++ thread = (JavaThread*)t; ++ } ++ else if(t->is_VM_thread()){ ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("this thread is a VM thread\n"); ++#endif ++ vmthread = (VMThread *)t; ++ } ++ } ++ } ++ ++ // Handle SafeFetch faults: ++ if (uc != NULL) { ++ address const pc = (address) os::Linux::ucontext_get_pc(uc); ++ if (pc && StubRoutines::is_safefetch_fault(pc)) { ++ os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); ++ return 1; ++ } ++ } ++ ++ // decide if this trap can be handled by a stub ++ address stub = NULL; ++ address pc = NULL; ++ ++ pc = (address) os::Linux::ucontext_get_pc(uc); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("pc=%lx", pc); ++ os::print_context(tty, uc); ++#endif ++ //%note os_trap_1 ++ if (info != NULL && uc != NULL && thread != NULL) { ++ pc = (address) os::Linux::ucontext_get_pc(uc); ++ ++ // Handle ALL stack overflow variations here ++ if (sig == SIGSEGV) { ++ address addr = (address) info->si_addr; ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("handle all stack overflow variations: "); ++ /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n", ++ addr, ++ thread->stack_base(), ++ thread->stack_base() - thread->stack_size()); ++ */ ++#endif ++ ++ // check if fault address is within thread stack ++ if (thread->on_local_stack(addr)) { ++ // stack overflow ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("stack exception check \n"); ++#endif ++ if (thread->in_stack_yellow_reserved_zone(addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is in yellow zone\n"); ++#endif ++ if (thread->thread_state() == _thread_in_Java) { ++ if (thread->in_stack_reserved_zone(addr)) { ++ frame fr; ++ if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) { ++ assert(fr.is_java_frame(), "Must be a Java frame"); ++ frame activation = ++ SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr); ++ if (activation.sp() != NULL) { ++ thread->disable_stack_reserved_zone(); ++ if (activation.is_interpreted_frame()) { ++ thread->set_reserved_stack_activation((address)( ++ activation.fp() + frame::interpreter_frame_initial_sp_offset)); ++ } else { ++ thread->set_reserved_stack_activation((address)activation.unextended_sp()); ++ } ++ return 1; ++ } ++ } ++ } ++ // Throw a stack overflow exception. Guard pages will be reenabled ++ // while unwinding the stack. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("this thread is in java\n"); ++#endif ++ thread->disable_stack_yellow_reserved_zone(); ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); ++ } else { ++ // Thread was in the vm or native code. Return and try to finish. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("this thread is in vm or native codes and return\n"); ++#endif ++ thread->disable_stack_yellow_reserved_zone(); ++ return 1; ++ } ++ } else if (thread->in_stack_red_zone(addr)) { ++ // Fatal red zone violation. Disable the guard pages and fall through ++ // to handle_unexpected_exception way down below. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is in red zone\n"); ++#endif ++ thread->disable_stack_red_zone(); ++ tty->print_raw_cr("An irrecoverable stack overflow has occurred."); ++ ++ // This is a likely cause, but hard to verify. Let's just print ++ // it as a hint. ++ tty->print_raw_cr("Please check if any of your loaded .so files has " ++ "enabled executable stack (see man page execstack(8))"); ++ } else { ++ // Accessing stack address below sp may cause SEGV if current ++ // thread has MAP_GROWSDOWN stack. This should only happen when ++ // current thread was created by user code with MAP_GROWSDOWN flag ++ // and then attached to VM. See notes in os_linux.cpp. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is neither in yellow zone nor in the red one\n"); ++#endif ++ if (thread->osthread()->expanding_stack() == 0) { ++ thread->osthread()->set_expanding_stack(); ++ if (os::Linux::manually_expand_stack(thread, addr)) { ++ thread->osthread()->clear_expanding_stack(); ++ return 1; ++ } ++ thread->osthread()->clear_expanding_stack(); ++ } else { ++ fatal("recursive segv. expanding stack."); ++ } ++ } ++ } ++ } // sig == SIGSEGV ++ ++ if (thread->thread_state() == _thread_in_Java) { ++ // Java thread running in Java code => find exception handler if any ++ // a fault inside compiled code, the interpreter, or a stub ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("java thread running in java code\n"); ++#endif ++ ++ // Handle signal from NativeJump::patch_verified_entry(). ++ if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig); ++#endif ++ stub = SharedRuntime::get_handle_wrong_method_stub(); ++ } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig); ++#endif ++ stub = SharedRuntime::get_poll_stub(pc); ++ } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { ++ // BugId 4454115: A read from a MappedByteBuffer can fault ++ // here if the underlying file has been truncated. ++ // Do not crash the VM in such a case. ++ CodeBlob* cb = CodeCache::find_blob_unsafe(pc); ++ CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("cb = %lx, nm = %lx\n", cb, nm); ++#endif ++ if (nm != NULL && nm->has_unsafe_access()) { ++ address next_pc = (address)((unsigned long)pc + sizeof(unsigned int)); ++ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); ++ } ++ } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) { ++ // HACK: si_code does not work on linux 2.2.12-20!!! ++ int op = pc[0] & 0x3f; ++ int op1 = pc[3] & 0x3f; ++ //FIXME, Must port to LA code!! ++ switch (op) { ++ case 0x1e: //ddiv ++ case 0x1f: //ddivu ++ case 0x1a: //div ++ case 0x1b: //divu ++ case 0x34: //trap ++ // In LA, div_by_zero exception can only be triggered by explicit 'trap'. ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, ++ pc, ++ SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO); ++ break; ++ default: ++ // TODO: handle more cases if we are using other x86 instructions ++ // that can generate SIGFPE signal on linux. ++ tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1); ++ //fatal("please update this code."); ++ } ++ } else if (sig == SIGSEGV && ++ !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("continuation for implicit exception\n"); ++#endif ++ // Determination of interpreter/vtable stub/compiled code null exception ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("continuation_for_implicit_exception stub: %lx", stub); ++#endif ++ } ++ } else if (thread->thread_state() == _thread_in_vm && ++ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ ++ thread->doing_unsafe_access()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("SIGBUS in vm thread \n"); ++#endif ++ address next_pc = (address)((unsigned long)pc + sizeof(unsigned int)); ++ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); ++ } ++ ++ // jni_fast_GetField can trap at certain pc's if a GC kicks in ++ // and the heap gets shrunk before the field access. ++ if ((sig == SIGSEGV) || (sig == SIGBUS)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("jni fast get trap: "); ++#endif ++ address addr = JNI_FastGetField::find_slowcase_pc(pc); ++ if (addr != (address)-1) { ++ stub = addr; ++ } ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("addr = %d, stub = %lx", addr, stub); ++#endif ++ } ++ ++ // Check to see if we caught the safepoint code in the ++ // process of write protecting the memory serialization page. ++ // It write enables the page immediately after protecting it ++ // so we can just return to retry the write. ++ if ((sig == SIGSEGV) && ++ os::is_memory_serialize_page(thread, (address) info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("write protecting the memory serialiazation page\n"); ++#endif ++ // Block current thread until the memory serialize page permission restored. ++ os::block_on_serialize_page_trap(); ++ return true; ++ } ++ } ++ ++ if (stub != NULL) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("resolved stub=%lx\n",stub); ++#endif ++ // save all thread context in case we need to restore it ++ if (thread != NULL) thread->set_saved_exception_pc(pc); ++ ++ os::Linux::ucontext_set_pc(uc, stub); ++ return true; ++ } ++ ++ // signal-chaining ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("signal chaining\n"); ++#endif ++ return true; ++ } ++ ++ if (!abort_if_unrecognized) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("abort becauce of unrecognized\n"); ++#endif ++ // caller wants another chance, so give it to him ++ return false; ++ } ++ ++ if (pc == NULL && uc != NULL) { ++ pc = os::Linux::ucontext_get_pc(uc); ++ } ++ ++ // unmask current signal ++ sigset_t newset; ++ sigemptyset(&newset); ++ sigaddset(&newset, sig); ++ sigprocmask(SIG_UNBLOCK, &newset, NULL); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("VMError in signal handler\n"); ++#endif ++ VMError::report_and_die(t, sig, pc, info, ucVoid); ++ ++ ShouldNotReachHere(); ++ return true; // Mute compiler ++} ++ ++void os::Linux::init_thread_fpu_state(void) { ++} ++ ++int os::Linux::get_fpu_control_word(void) { ++ return 0; // mute compiler ++} ++ ++void os::Linux::set_fpu_control_word(int fpu_control) { ++} ++ ++bool os::is_allocatable(size_t bytes) { ++ ++ if (bytes < 2 * G) { ++ return true; ++ } ++ ++ char* addr = reserve_memory(bytes, NULL); ++ ++ if (addr != NULL) { ++ release_memory(addr, bytes); ++ } ++ ++ return addr != NULL; ++} ++ ++//////////////////////////////////////////////////////////////////////////////// ++// thread stack ++ ++// Minimum usable stack sizes required to get to user code. Space for ++// HotSpot guard pages is added later. ++size_t os::Posix::_compiler_thread_min_stack_allowed = 48 * K; ++size_t os::Posix::_java_thread_min_stack_allowed = 40 * K; ++size_t os::Posix::_vm_internal_thread_min_stack_allowed = 64 * K; ++ ++// Return default stack size for thr_type ++size_t os::Posix::default_stack_size(os::ThreadType thr_type) { ++ // Default stack size (compiler thread needs larger stack) ++ size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K); ++ return s; ++} ++ ++///////////////////////////////////////////////////////////////////////////// ++// helper functions for fatal error handler ++void os::print_register_info(outputStream *st, const void *context) { ++ if (context == NULL) return; ++ ++ ucontext_t *uc = (ucontext_t*)context; ++ ++ st->print_cr("Register to memory mapping:"); ++ st->cr(); ++ // this is horrendously verbose but the layout of the registers in the ++ // // context does not match how we defined our abstract Register set, so ++ // // we can't just iterate through the gregs area ++ // ++ // // this is only for the "general purpose" registers ++ st->print("ZERO=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[0]); ++ st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[1]); ++ st->print("TP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[2]); ++ st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[3]); ++ st->cr(); ++ st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[4]); ++ st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[5]); ++ st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[6]); ++ st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[7]); ++ st->cr(); ++ st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[8]); ++ st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[9]); ++ st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[10]); ++ st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[11]); ++ st->cr(); ++ st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[12]); ++ st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[13]); ++ st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[14]); ++ st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[15]); ++ st->cr(); ++ st->print("T4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[16]); ++ st->print("T5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[17]); ++ st->print("T6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[18]); ++ st->print("T7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[19]); ++ st->cr(); ++ st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[20]); ++ st->print("RX=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[21]); ++ st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[22]); ++ st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[23]); ++ st->cr(); ++ st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[24]); ++ st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[25]); ++ st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[26]); ++ st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[27]); ++ st->cr(); ++ st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[28]); ++ st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[29]); ++ st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[30]); ++ st->print("S8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[31]); ++ st->cr(); ++ ++} ++ ++void os::print_context(outputStream *st, const void *context) { ++ if (context == NULL) return; ++ ++ const ucontext_t *uc = (const ucontext_t*)context; ++ st->print_cr("Registers:"); ++ st->print( "ZERO=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[0]); ++ st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[1]); ++ st->print(", TP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[2]); ++ st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[3]); ++ st->cr(); ++ st->print( "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[4]); ++ st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[5]); ++ st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[6]); ++ st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[7]); ++ st->cr(); ++ st->print( "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[8]); ++ st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[9]); ++ st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[10]); ++ st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[11]); ++ st->cr(); ++ st->print( "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[12]); ++ st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[13]); ++ st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[14]); ++ st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[15]); ++ st->cr(); ++ st->print( "T4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[16]); ++ st->print(", T5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[17]); ++ st->print(", T6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[18]); ++ st->print(", T7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[19]); ++ st->cr(); ++ st->print( "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[20]); ++ st->print(", RX=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[21]); ++ st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[22]); ++ st->print(", S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[23]); ++ st->cr(); ++ st->print( "S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[24]); ++ st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[25]); ++ st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[26]); ++ st->print(", S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[27]); ++ st->cr(); ++ st->print( "S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[28]); ++ st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[29]); ++ st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[30]); ++ st->print(", S8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[31]); ++ st->cr(); ++ st->cr(); ++ ++ intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); ++ st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp)); ++ print_hex_dump(st, (address)(sp - 32), (address)(sp + 32), sizeof(intptr_t)); ++ st->cr(); ++ ++ // Note: it may be unsafe to inspect memory near pc. For example, pc may ++ // point to garbage if entry point in an nmethod is corrupted. Leave ++ // this at the end, and hope for the best. ++ address pc = os::Linux::ucontext_get_pc(uc); ++ st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc)); ++ print_hex_dump(st, pc - 64, pc + 64, sizeof(char)); ++ Disassembler::decode(pc - 80, pc + 80, st); ++} ++ ++void os::setup_fpu() { ++ // no use for LA ++} ++ ++#ifndef PRODUCT ++void os::verify_stack_alignment() { ++ assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); ++} ++#endif ++ ++int os::extra_bang_size_in_bytes() { ++ // LA does not require the additional stack bang. ++ return 0; ++} ++ ++bool os::is_ActiveCoresMP() { ++ return UseActiveCoresMP && _initial_active_processor_count == 1; ++} +diff --git a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp +new file mode 100644 +index 0000000000..fa02f8ba2f +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp +@@ -0,0 +1,38 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP ++ ++ static void setup_fpu(); ++ static bool is_allocatable(size_t bytes); ++ ++ // Used to register dynamic code cache area with the OS ++ // Note: Currently only used in 64 bit Windows implementations ++ static bool register_code_area(char *low, char *high) { return true; } ++ ++ static bool is_ActiveCoresMP(); ++ ++#endif // OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp +new file mode 100644 +index 0000000000..cf3a596387 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp +@@ -0,0 +1,56 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP ++#define OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP ++ ++ ++inline void Prefetch::read (void *loc, intx interval) { ++// According to previous and present SPECjbb2015 score, ++// comment prefetch is better than if (interval >= 0) prefetch branch. ++// So choose comment prefetch as the base line. ++#if 0 ++ __asm__ __volatile__ ( ++ " preld 0, %[__loc] \n" ++ : ++ : [__loc] "m"( *((address)loc + interval) ) ++ : "memory" ++ ); ++#endif ++} ++ ++inline void Prefetch::write(void *loc, intx interval) { ++// Ditto ++#if 0 ++ __asm__ __volatile__ ( ++ " preld 8, %[__loc] \n" ++ : ++ : [__loc] "m"( *((address)loc + interval) ) ++ : "memory" ++ ); ++#endif ++} ++ ++#endif // OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp +new file mode 100644 +index 0000000000..a1a9f181bd +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp +@@ -0,0 +1,116 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "memory/metaspaceShared.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++ ++void JavaThread::pd_initialize() ++{ ++ _anchor.clear(); ++} ++ ++frame JavaThread::pd_last_frame() { ++ assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); ++ if (_anchor.last_Java_pc() != NULL) { ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); ++ } else { ++ // This will pick up pc from sp ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp()); ++ } ++} ++ ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is ++// currently interrupted by SIGPROF ++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, ++ void* ucontext, bool isInJava) { ++ ++ assert(Thread::current() == this, "caller must be current thread"); ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++ ++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { ++ assert(this->is_Java_thread(), "must be JavaThread"); ++ JavaThread* jt = (JavaThread *)this; ++ ++ // If we have a last_Java_frame, then we should use it even if ++ // isInJava == true. It should be more reliable than ucontext info. ++ if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) { ++ *fr_addr = jt->pd_last_frame(); ++ return true; ++ } ++ ++ // At this point, we don't have a last_Java_frame, so ++ // we try to glean some information out of the ucontext ++ // if we were running Java code when SIGPROF came in. ++ if (isInJava) { ++ ucontext_t* uc = (ucontext_t*) ucontext; ++ ++ intptr_t* ret_fp; ++ intptr_t* ret_sp; ++ ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, ++ &ret_sp, &ret_fp); ++ if (addr.pc() == NULL || ret_sp == NULL ) { ++ // ucontext wasn't useful ++ return false; ++ } ++ ++ if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) { ++ // In the middle of a trampoline call. Bail out for safety. ++ // This happens rarely so shouldn't affect profiling. ++ return false; ++ } ++ ++ frame ret_frame(ret_sp, ret_fp, addr.pc()); ++ if (!ret_frame.safe_for_sender(jt)) { ++#ifdef COMPILER2 ++ // C2 and JVMCI use ebp as a general register see if NULL fp helps ++ frame ret_frame2(ret_sp, NULL, addr.pc()); ++ if (!ret_frame2.safe_for_sender(jt)) { ++ // nothing else to try if the frame isn't good ++ return false; ++ } ++ ret_frame = ret_frame2; ++#else ++ // nothing else to try if the frame isn't good ++ return false; ++#endif // COMPILER2_OR_JVMCI ++ } ++ *fr_addr = ret_frame; ++ return true; ++ } ++ ++ // nothing else to try ++ return false; ++} ++ ++void JavaThread::cache_global_variables() { } +diff --git a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp +new file mode 100644 +index 0000000000..a3ac28ebd3 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp +@@ -0,0 +1,66 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP ++ ++ private: ++ void pd_initialize(); ++ ++ frame pd_last_frame(); ++ ++ public: ++ // Mutators are highly dangerous.... ++ intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } ++ void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } ++ ++ void set_base_of_stack_pointer(intptr_t* base_sp) { ++ } ++ ++ static ByteSize last_Java_fp_offset() { ++ return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); ++ } ++ ++ intptr_t* base_of_stack_pointer() { ++ return NULL; ++ } ++ void record_base_of_stack_pointer() { ++ } ++ ++ bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, ++ bool isInJava); ++ ++ bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); ++private: ++ bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); ++public: ++ ++ // These routines are only used on cpu architectures that ++ // have separate register stacks (Itanium). ++ static bool register_stack_overflow() { return false; } ++ static void enable_register_stack_guard() {} ++ static void disable_register_stack_guard() {} ++ ++#endif // OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp +new file mode 100644 +index 0000000000..a39cb79bb1 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP ++ ++// These are the OS and CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ \ ++ /******************************/ \ ++ /* Threads (NOTE: incomplete) */ \ ++ /******************************/ \ ++ nonstatic_field(OSThread, _thread_id, pid_t) \ ++ nonstatic_field(OSThread, _pthread_id, pthread_t) ++ ++ ++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ \ ++ /**********************/ \ ++ /* Posix Thread IDs */ \ ++ /**********************/ \ ++ \ ++ declare_integer_type(pid_t) \ ++ declare_unsigned_integer_type(pthread_t) ++ ++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#endif // OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp +new file mode 100644 +index 0000000000..edc148ef91 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp +@@ -0,0 +1,93 @@ ++/* ++ * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/os.hpp" ++#include "runtime/vm_version.hpp" ++ ++#include ++#include ++ ++#ifndef HWCAP_LOONGARCH_LAM ++#define HWCAP_LOONGARCH_LAM (1 << 1) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_UAL ++#define HWCAP_LOONGARCH_UAL (1 << 2) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_LSX ++#define HWCAP_LOONGARCH_LSX (1 << 4) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_LASX ++#define HWCAP_LOONGARCH_LASX (1 << 5) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_COMPLEX ++#define HWCAP_LOONGARCH_COMPLEX (1 << 7) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_CRYPTO ++#define HWCAP_LOONGARCH_CRYPTO (1 << 8) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_LBT_X86 ++#define HWCAP_LOONGARCH_LBT_X86 (1 << 10) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_LBT_ARM ++#define HWCAP_LOONGARCH_LBT_ARM (1 << 11) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_LBT_MIPS ++#define HWCAP_LOONGARCH_LBT_MIPS (1 << 12) ++#endif ++ ++void VM_Version::get_os_cpu_info() { ++ ++ uint64_t auxv = getauxval(AT_HWCAP); ++ ++ STATIC_ASSERT(CPU_LAM == HWCAP_LOONGARCH_LAM); ++ STATIC_ASSERT(CPU_UAL == HWCAP_LOONGARCH_UAL); ++ STATIC_ASSERT(CPU_LSX == HWCAP_LOONGARCH_LSX); ++ STATIC_ASSERT(CPU_LASX == HWCAP_LOONGARCH_LASX); ++ STATIC_ASSERT(CPU_COMPLEX == HWCAP_LOONGARCH_COMPLEX); ++ STATIC_ASSERT(CPU_CRYPTO == HWCAP_LOONGARCH_CRYPTO); ++ STATIC_ASSERT(CPU_LBT_X86 == HWCAP_LOONGARCH_LBT_X86); ++ STATIC_ASSERT(CPU_LBT_ARM == HWCAP_LOONGARCH_LBT_ARM); ++ STATIC_ASSERT(CPU_LBT_MIPS == HWCAP_LOONGARCH_LBT_MIPS); ++ ++ _features = auxv & ( ++ HWCAP_LOONGARCH_LAM | ++ HWCAP_LOONGARCH_UAL | ++ HWCAP_LOONGARCH_LSX | ++ HWCAP_LOONGARCH_LASX | ++ HWCAP_LOONGARCH_COMPLEX | ++ HWCAP_LOONGARCH_CRYPTO | ++ HWCAP_LOONGARCH_LBT_X86 | ++ HWCAP_LOONGARCH_LBT_ARM | ++ HWCAP_LOONGARCH_LBT_MIPS); ++} +diff --git a/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp +new file mode 100644 +index 0000000000..30719a0340 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp +@@ -0,0 +1,24 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ +diff --git a/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp +new file mode 100644 +index 0000000000..cd7cecad63 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp +@@ -0,0 +1,191 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP ++ ++#include "runtime/vm_version.hpp" ++ ++// Implementation of class atomic ++ ++template ++struct Atomic::PlatformAdd ++ : Atomic::AddAndFetch > ++{ ++ template ++ D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { ++ //Unimplemented(); ++ return __sync_add_and_fetch(dest, add_value); ++ } ++}; ++ ++template<> ++template ++inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, ++ T volatile* dest, ++ atomic_memory_order order) const { ++ T __ret, __tmp; ++ ++ STATIC_ASSERT(4 == sizeof(T)); ++ __asm__ __volatile__ ( ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1: sync\n\t" ++ " ll %[__ret], %[__dest] \n\t" ++ " move %[__tmp], %[__val] \n\t" ++ " sc %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ " nop \n\t" ++ ++ " .set pop\n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __ret; ++} ++ ++template<> ++template ++inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, ++ T volatile* dest, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(8 == sizeof(T)); ++ T __ret; ++ jlong __tmp; ++ __asm__ __volatile__ ( ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1: sync\n\t" ++ " lld %[__ret], %[__dest] \n\t" ++ " move %[__tmp], %[__val] \n\t" ++ " scd %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ " nop \n\t" ++ ++ " .set pop\n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "m" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value) ++ : "memory" ++ ); ++ return __ret; ++} ++ ++#if 0 ++template<> ++template ++inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, ++ T volatile* dest, ++ T compare_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(1 == sizeof(T)); ++} ++ ++#else ++// No direct support for cmpxchg of bytes; emulate using int. ++template<> ++struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {}; ++#endif ++ ++template<> ++template ++inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, ++ T volatile* dest, ++ T compare_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(4 == sizeof(T)); ++ T __prev; ++ jint __cmp; ++ ++ __asm__ __volatile__ ( ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1:sync \n\t" ++ " ll %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " sc %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ " nop \n\t" ++ "2: \n\t" ++ " sync \n\t" ++ ++ " .set pop\n\t" ++ ++ : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) ++ : [__dest] "m" (*(volatile jint*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __prev; ++} ++ ++template<> ++template ++inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, ++ T volatile* dest, ++ T compare_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(8 == sizeof(T)); ++ T __prev; ++ jlong __cmp; ++ ++ __asm__ __volatile__ ( ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1:sync \n\t" ++ " lld %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " scd %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ " nop \n\t" ++ "2: \n\t" ++ " sync \n\t" ++ ++ " .set pop\n\t" ++ ++ : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) ++ : [__dest] "m" (*(volatile jlong*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : "memory" ++ ); ++ return __prev; ++} ++ ++ ++#endif // OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp +new file mode 100644 +index 0000000000..5b5cd10aa5 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP ++#define OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP ++ ++#include ++ ++// Efficient swapping of data bytes from Java byte ++// ordering to native byte ordering and vice versa. ++inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); } ++inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); } ++inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); } ++ ++#endif // OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp +new file mode 100644 +index 0000000000..3fd6ef7b36 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp +@@ -0,0 +1,125 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP ++#define OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP ++ ++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ (void)memmove(to, from, count * HeapWordSize); ++} ++ ++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ (void)memcpy(to, from, count * HeapWordSize); ++ break; ++ } ++} ++ ++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ while (count-- > 0) { ++ *to++ = *from++; ++ } ++ break; ++ } ++} ++ ++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_words(from, to, count); ++} ++ ++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_disjoint_words(from, to, count); ++} ++ ++static void pd_conjoint_bytes(const void* from, void* to, size_t count) { ++ (void)memmove(to, from, count); ++} ++ ++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { ++ pd_conjoint_bytes(from, to, count); ++} ++ ++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { ++ //assert(!UseCompressedOops, "foo!"); ++ assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size"); ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_bytes_atomic(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count); ++} ++ ++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jints_atomic((jint*)from, (jint*)to, count); ++} ++ ++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count); ++} ++ ++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { ++ //assert(!UseCompressedOops, "foo!"); ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); ++ pd_conjoint_oops_atomic((oop*)from, (oop*)to, count); ++} ++ ++#endif // OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp +new file mode 100644 +index 0000000000..f1599ac5f1 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++define_pd_global(bool, DontYieldALot, false); ++#ifdef MIPS64 ++define_pd_global(intx, ThreadStackSize, 1024); // 0 => use system default ++define_pd_global(intx, VMThreadStackSize, 1024); ++#else ++// ThreadStackSize 320 allows a couple of test cases to run while ++// keeping the number of threads that can be created high. System ++// default ThreadStackSize appears to be 512 which is too big. ++define_pd_global(intx, ThreadStackSize, 320); ++define_pd_global(intx, VMThreadStackSize, 512); ++#endif // MIPS64 ++ ++define_pd_global(intx, CompilerThreadStackSize, 0); ++ ++define_pd_global(uintx,JVMInvokeMethodSlack, 8192); ++ ++// Used on 64 bit platforms for UseCompressedOops base address ++define_pd_global(uintx,HeapBaseMinAddress, 2*G); ++ ++#endif // OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/linux_mips.s b/src/hotspot/os_cpu/linux_mips/linux_mips.s +new file mode 100644 +index 0000000000..36c8d810c3 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/linux_mips.s +@@ -0,0 +1,25 @@ ++# ++# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++# ++# This code is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License version 2 only, as ++# published by the Free Software Foundation. ++# ++# This code is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++# version 2 for more details (a copy is included in the LICENSE file that ++# accompanied this code). ++# ++# You should have received a copy of the GNU General Public License version ++# 2 along with this work; if not, write to the Free Software Foundation, ++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++# ++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++# or visit www.oracle.com if you need additional information or have any ++# questions. ++# ++ ++ +diff --git a/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp +new file mode 100644 +index 0000000000..bf9d679730 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP ++ ++#include "runtime/os.hpp" ++ ++// Included in orderAccess.hpp header file. ++ ++// Implementation of class OrderAccess. ++#define inlasm_sync() if (os::is_ActiveCoresMP()) \ ++ __asm__ __volatile__ ("nop" : : : "memory"); \ ++ else \ ++ __asm__ __volatile__ ("sync" : : : "memory"); ++ ++inline void OrderAccess::loadload() { inlasm_sync(); } ++inline void OrderAccess::storestore() { inlasm_sync(); } ++inline void OrderAccess::loadstore() { inlasm_sync(); } ++inline void OrderAccess::storeload() { inlasm_sync(); } ++ ++inline void OrderAccess::acquire() { inlasm_sync(); } ++inline void OrderAccess::release() { inlasm_sync(); } ++inline void OrderAccess::fence() { inlasm_sync(); } ++ ++ ++#undef inlasm_sync ++ ++#endif // OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp +new file mode 100644 +index 0000000000..d035d8edbb +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp +@@ -0,0 +1,1020 @@ ++/* ++ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// no precompiled headers ++#include "asm/macroAssembler.hpp" ++#include "classfile/classLoader.hpp" ++#include "classfile/systemDictionary.hpp" ++#include "classfile/vmSymbols.hpp" ++#include "code/icBuffer.hpp" ++#include "code/vtableStubs.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/allocation.inline.hpp" ++#include "os_share_linux.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/extendedPC.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/java.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/osThread.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/timer.hpp" ++#include "utilities/events.hpp" ++#include "utilities/vmError.hpp" ++#include "compiler/disassembler.hpp" ++ ++// put OS-includes here ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++ ++#define REG_SP 29 ++#define REG_FP 30 ++ ++address os::current_stack_pointer() { ++ register void *sp __asm__ ("$29"); ++ return (address) sp; ++} ++ ++char* os::non_memory_address_word() { ++ // Must never look like an address returned by reserve_memory, ++ // even in its subfields (as defined by the CPU immediate fields, ++ // if the CPU splits constants across multiple instructions). ++ ++ return (char*) -1; ++} ++ ++address os::Linux::ucontext_get_pc(const ucontext_t * uc) { ++ return (address)uc->uc_mcontext.pc; ++} ++ ++void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) { ++ uc->uc_mcontext.pc = (intptr_t)pc; ++} ++ ++intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.gregs[REG_SP]; ++} ++ ++intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.gregs[REG_FP]; ++} ++ ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread ++// is currently interrupted by SIGPROF. ++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal ++// frames. Currently we don't do that on Linux, so it's the same as ++// os::fetch_frame_from_context(). ++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, ++ const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ assert(thread != NULL, "just checking"); ++ assert(ret_sp != NULL, "just checking"); ++ assert(ret_fp != NULL, "just checking"); ++ ++ return os::fetch_frame_from_context(uc, ret_sp, ret_fp); ++} ++ ++ExtendedPC os::fetch_frame_from_context(const void* ucVoid, ++ intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ ExtendedPC epc; ++ ucontext_t* uc = (ucontext_t*)ucVoid; ++ ++ if (uc != NULL) { ++ epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); ++ if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc); ++ if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc); ++ } else { ++ // construct empty ExtendedPC for return value checking ++ epc = ExtendedPC(NULL); ++ if (ret_sp) *ret_sp = (intptr_t *)NULL; ++ if (ret_fp) *ret_fp = (intptr_t *)NULL; ++ } ++ ++ return epc; ++} ++ ++frame os::fetch_frame_from_context(const void* ucVoid) { ++ intptr_t* sp; ++ intptr_t* fp; ++ ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp); ++ return frame(sp, fp, epc.pc()); ++} ++ ++bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) { ++ address pc = (address) os::Linux::ucontext_get_pc(uc); ++ if (Interpreter::contains(pc)) { ++ // interpreter performs stack banging after the fixed frame header has ++ // been generated while the compilers perform it before. To maintain ++ // semantic consistency between interpreted and compiled frames, the ++ // method returns the Java sender of the current frame. ++ *fr = os::fetch_frame_from_context(uc); ++ if (!fr->is_first_java_frame()) { ++ assert(fr->safe_for_sender(thread), "Safety check"); ++ *fr = fr->java_sender(); ++ } ++ } else { ++ // more complex code with compiled code ++ assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above"); ++ CodeBlob* cb = CodeCache::find_blob(pc); ++ if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) { ++ // Not sure where the pc points to, fallback to default ++ // stack overflow handling ++ return false; ++ } else { ++ // In compiled code, the stack banging is performed before LR ++ // has been saved in the frame. RA is live, and SP and FP ++ // belong to the caller. ++ intptr_t* fp = os::Linux::ucontext_get_fp(uc); ++ intptr_t* sp = os::Linux::ucontext_get_sp(uc); ++ address pc = (address)(uc->uc_mcontext.gregs[31]); ++ *fr = frame(sp, fp, pc); ++ if (!fr->is_java_frame()) { ++ assert(fr->safe_for_sender(thread), "Safety check"); ++ assert(!fr->is_first_frame(), "Safety check"); ++ *fr = fr->java_sender(); ++ } ++ } ++ } ++ assert(fr->is_java_frame(), "Safety check"); ++ return true; ++} ++ ++// By default, gcc always save frame pointer (%ebp/%rbp) on stack. It may get ++// turned off by -fomit-frame-pointer, ++frame os::get_sender_for_C_frame(frame* fr) { ++ return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); ++} ++ ++//intptr_t* _get_previous_fp() { ++intptr_t* __attribute__((noinline)) os::get_previous_fp() { ++ int *pc; ++ intptr_t sp; ++ int *pc_limit = (int*)(void*)&os::get_previous_fp; ++ int insn; ++ ++ { ++ l_pc:; ++ pc = (int*)&&l_pc; ++ __asm__ __volatile__ ("move %0, $sp" : "=r" (sp)); ++ } ++ ++ do { ++ insn = *pc; ++ switch(bitfield(insn, 16, 16)) { ++ case 0x27bd: /* addiu $sp,$sp,-i */ ++ case 0x67bd: /* daddiu $sp,$sp,-i */ ++ assert ((short)bitfield(insn, 0, 16)<0, "bad frame"); ++ sp -= (short)bitfield(insn, 0, 16); ++ return (intptr_t*)sp; ++ } ++ --pc; ++ } while (pc>=pc_limit); // The initial value of pc may be equal to pc_limit, because of GCC optimization. ++ ++ ShouldNotReachHere(); ++ return NULL; // mute compiler ++} ++ ++ ++frame os::current_frame() { ++ intptr_t* fp = (intptr_t*)get_previous_fp(); ++ frame myframe((intptr_t*)os::current_stack_pointer(), ++ (intptr_t*)fp, ++ CAST_FROM_FN_PTR(address, os::current_frame)); ++ if (os::is_first_C_frame(&myframe)) { ++ // stack is not walkable ++ return frame(); ++ } else { ++ return os::get_sender_for_C_frame(&myframe); ++ } ++} ++ ++//x86 add 2 new assemble function here! ++extern "C" int ++JVM_handle_linux_signal(int sig, ++ siginfo_t* info, ++ void* ucVoid, ++ int abort_if_unrecognized) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx", ++ info->si_signo, ++ info->si_code, ++ info->si_errno, ++ info->si_addr); ++#endif ++ ++ ucontext_t* uc = (ucontext_t*) ucVoid; ++ ++ Thread* t = Thread::current_or_null_safe(); ++ ++ SignalHandlerMark shm(t); ++ ++ // Note: it's not uncommon that JNI code uses signal/sigset to install ++ // then restore certain signal handler (e.g. to temporarily block SIGPIPE, ++ // or have a SIGILL handler when detecting CPU type). When that happens, ++ // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To ++ // avoid unnecessary crash when libjsig is not preloaded, try handle signals ++ // that do not require siginfo/ucontext first. ++ ++ if (sig == SIGPIPE/* || sig == SIGXFSZ*/) { ++ // allow chained handler to go first ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++ return true; ++ } else { ++ if (PrintMiscellaneous && (WizardMode || Verbose)) { ++ warning("Ignoring SIGPIPE - see bug 4229104"); ++ } ++ return true; ++ } ++ } ++ ++#ifdef CAN_SHOW_REGISTERS_ON_ASSERT ++ if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) { ++ handle_assert_poison_fault(ucVoid, info->si_addr); ++ return 1; ++ } ++#endif ++ ++ JavaThread* thread = NULL; ++ VMThread* vmthread = NULL; ++ if (os::Linux::signal_handlers_are_installed) { ++ if (t != NULL ){ ++ if(t->is_Java_thread()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("this thread is a java thread"); ++#endif ++ thread = (JavaThread*)t; ++ } ++ else if(t->is_VM_thread()){ ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("this thread is a VM thread\n"); ++#endif ++ vmthread = (VMThread *)t; ++ } ++ } ++ } ++ ++ // Handle SafeFetch faults: ++ if (uc != NULL) { ++ address const pc = (address) os::Linux::ucontext_get_pc(uc); ++ if (pc && StubRoutines::is_safefetch_fault(pc)) { ++ os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); ++ return 1; ++ } ++ } ++ ++ // decide if this trap can be handled by a stub ++ address stub = NULL; ++ address pc = NULL; ++ ++ pc = (address) os::Linux::ucontext_get_pc(uc); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("pc=%lx", pc); ++ os::print_context(tty, uc); ++#endif ++ //%note os_trap_1 ++ if (info != NULL && uc != NULL && thread != NULL) { ++ pc = (address) os::Linux::ucontext_get_pc(uc); ++ ++ // Handle ALL stack overflow variations here ++ if (sig == SIGSEGV) { ++ address addr = (address) info->si_addr; ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("handle all stack overflow variations: "); ++ /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n", ++ addr, ++ thread->stack_base(), ++ thread->stack_base() - thread->stack_size()); ++ */ ++#endif ++ ++ // check if fault address is within thread stack ++ if (thread->on_local_stack(addr)) { ++ // stack overflow ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("stack exception check \n"); ++#endif ++ if (thread->in_stack_yellow_reserved_zone(addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is in yellow zone\n"); ++#endif ++ if (thread->thread_state() == _thread_in_Java) { ++ if (thread->in_stack_reserved_zone(addr)) { ++ frame fr; ++ if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) { ++ assert(fr.is_java_frame(), "Must be a Java frame"); ++ frame activation = ++ SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr); ++ if (activation.sp() != NULL) { ++ thread->disable_stack_reserved_zone(); ++ if (activation.is_interpreted_frame()) { ++ thread->set_reserved_stack_activation((address)( ++ activation.fp() + frame::interpreter_frame_initial_sp_offset)); ++ } else { ++ thread->set_reserved_stack_activation((address)activation.unextended_sp()); ++ } ++ return 1; ++ } ++ } ++ } ++ // Throw a stack overflow exception. Guard pages will be reenabled ++ // while unwinding the stack. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("this thread is in java\n"); ++#endif ++ thread->disable_stack_yellow_reserved_zone(); ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); ++ } else { ++ // Thread was in the vm or native code. Return and try to finish. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("this thread is in vm or native codes and return\n"); ++#endif ++ thread->disable_stack_yellow_reserved_zone(); ++ return 1; ++ } ++ } else if (thread->in_stack_red_zone(addr)) { ++ // Fatal red zone violation. Disable the guard pages and fall through ++ // to handle_unexpected_exception way down below. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is in red zone\n"); ++#endif ++ thread->disable_stack_red_zone(); ++ tty->print_raw_cr("An irrecoverable stack overflow has occurred."); ++ ++ // This is a likely cause, but hard to verify. Let's just print ++ // it as a hint. ++ tty->print_raw_cr("Please check if any of your loaded .so files has " ++ "enabled executable stack (see man page execstack(8))"); ++ } else { ++ // Accessing stack address below sp may cause SEGV if current ++ // thread has MAP_GROWSDOWN stack. This should only happen when ++ // current thread was created by user code with MAP_GROWSDOWN flag ++ // and then attached to VM. See notes in os_linux.cpp. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is neither in yellow zone nor in the red one\n"); ++#endif ++ if (thread->osthread()->expanding_stack() == 0) { ++ thread->osthread()->set_expanding_stack(); ++ if (os::Linux::manually_expand_stack(thread, addr)) { ++ thread->osthread()->clear_expanding_stack(); ++ return 1; ++ } ++ thread->osthread()->clear_expanding_stack(); ++ } else { ++ fatal("recursive segv. expanding stack."); ++ } ++ } ++ } //addr < ++ } //sig == SIGSEGV ++ ++ if (thread->thread_state() == _thread_in_Java) { ++ // Java thread running in Java code => find exception handler if any ++ // a fault inside compiled code, the interpreter, or a stub ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("java thread running in java code\n"); ++#endif ++ ++ // Handle signal from NativeJump::patch_verified_entry(). ++ if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig); ++#endif ++ stub = SharedRuntime::get_handle_wrong_method_stub(); ++ } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig); ++#endif ++ stub = SharedRuntime::get_poll_stub(pc); ++ } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { ++ // BugId 4454115: A read from a MappedByteBuffer can fault ++ // here if the underlying file has been truncated. ++ // Do not crash the VM in such a case. ++ CodeBlob* cb = CodeCache::find_blob_unsafe(pc); ++ CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("cb = %lx, nm = %lx\n", cb, nm); ++#endif ++ if (nm != NULL && nm->has_unsafe_access()) { ++ address next_pc = (address)((unsigned long)pc + sizeof(unsigned int)); ++ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); ++ } ++ } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) { ++ // HACK: si_code does not work on linux 2.2.12-20!!! ++ int op = pc[0] & 0x3f; ++ int op1 = pc[3] & 0x3f; ++ //FIXME, Must port to mips code!! ++ switch (op) { ++ case 0x1e: //ddiv ++ case 0x1f: //ddivu ++ case 0x1a: //div ++ case 0x1b: //divu ++ case 0x34: //trap ++ /* In MIPS, div_by_zero exception can only be triggered by explicit 'trap'. ++ * Ref: [c1_LIRAssembler_mips.cpp] arithmetic_idiv() ++ */ ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, ++ pc, ++ SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO); ++ break; ++ default: ++ // TODO: handle more cases if we are using other x86 instructions ++ // that can generate SIGFPE signal on linux. ++ tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1); ++ //fatal("please update this code."); ++ } ++ } else if (sig == SIGSEGV && ++ !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("continuation for implicit exception\n"); ++#endif ++ // Determination of interpreter/vtable stub/compiled code null exception ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("continuation_for_implicit_exception stub: %lx", stub); ++#endif ++ } else if (/*thread->thread_state() == _thread_in_Java && */sig == SIGILL) { ++ //Since kernel does not have emulation of PS instructions yet, the emulation must be handled here. ++ //The method is to trigger kernel emulation of float emulation. ++ int inst = *(int*)pc; ++ int ops = (inst >> 26) & 0x3f; ++ int ops_fmt = (inst >> 21) & 0x1f; ++ int op = inst & 0x3f; ++ if (ops == Assembler::cop1_op && ops_fmt == Assembler::ps_fmt) { ++ int ft, fs, fd; ++ ft = (inst >> 16) & 0x1f; ++ fs = (inst >> 11) & 0x1f; ++ fd = (inst >> 6) & 0x1f; ++ float ft_upper, ft_lower, fs_upper, fs_lower, fd_upper, fd_lower; ++ double ft_value, fs_value, fd_value; ++ ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft]; ++ fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs]; ++ __asm__ __volatile__ ( ++ "cvt.s.pl %0, %4\n\t" ++ "cvt.s.pu %1, %4\n\t" ++ "cvt.s.pl %2, %5\n\t" ++ "cvt.s.pu %3, %5\n\t" ++ : "=f" (fs_lower), "=f" (fs_upper), "=f" (ft_lower), "=f" (ft_upper) ++ : "f" (fs_value), "f" (ft_value) ++ ); ++ ++ switch (op) { ++ case Assembler::fadd_op: ++ __asm__ __volatile__ ( ++ "add.s %1, %3, %5\n\t" ++ "add.s %2, %4, %6\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) ++ : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ case Assembler::fsub_op: ++ //fd = fs - ft ++ __asm__ __volatile__ ( ++ "sub.s %1, %3, %5\n\t" ++ "sub.s %2, %4, %6\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) ++ : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ case Assembler::fmul_op: ++ __asm__ __volatile__ ( ++ "mul.s %1, %3, %5\n\t" ++ "mul.s %2, %4, %6\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) ++ : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ default: ++ tty->print_cr("unknown cop1 opcode 0x%x with SIGILL.", op); ++ } ++ } else if (ops == Assembler::cop1x_op /*&& op == Assembler::nmadd_ps_op*/) { ++ // madd.ps is not used, the code below were not tested ++ int fr, ft, fs, fd; ++ float fr_upper, fr_lower, fs_upper, fs_lower, ft_upper, ft_lower, fd_upper, fd_lower; ++ double fr_value, ft_value, fs_value, fd_value; ++ switch (op) { ++ case Assembler::madd_ps_op: ++ // fd = (fs * ft) + fr ++ fr = (inst >> 21) & 0x1f; ++ ft = (inst >> 16) & 0x1f; ++ fs = (inst >> 11) & 0x1f; ++ fd = (inst >> 6) & 0x1f; ++ fr_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fr]; ++ ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft]; ++ fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs]; ++ __asm__ __volatile__ ( ++ "cvt.s.pu %3, %9\n\t" ++ "cvt.s.pl %4, %9\n\t" ++ "cvt.s.pu %5, %10\n\t" ++ "cvt.s.pl %6, %10\n\t" ++ "cvt.s.pu %7, %11\n\t" ++ "cvt.s.pl %8, %11\n\t" ++ "madd.s %1, %3, %5, %7\n\t" ++ "madd.s %2, %4, %6, %8\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower), "=f" (fr_upper), "=f" (fr_lower), "=f" (fs_upper), "=f" (fs_lower), "=f" (ft_upper), "=f" (ft_lower) ++ : "f" (fr_value)/*9*/, "f" (fs_value)/*10*/, "f" (ft_value)/*11*/ ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ default: ++ tty->print_cr("unknown cop1x opcode 0x%x with SIGILL.", op); ++ } ++ } ++ } //SIGILL ++ } else if (sig == SIGILL && VM_Version::is_determine_features_test_running()) { ++ // thread->thread_state() != _thread_in_Java ++ // SIGILL must be caused by VM_Version::determine_features(). ++ VM_Version::set_supports_cpucfg(false); ++ stub = pc + 4; // continue with next instruction. ++ } else if (thread->thread_state() == _thread_in_vm && ++ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ ++ thread->doing_unsafe_access()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("SIGBUS in vm thread \n"); ++#endif ++ address next_pc = (address)((unsigned long)pc + sizeof(unsigned int)); ++ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); ++ } ++ ++ // jni_fast_GetField can trap at certain pc's if a GC kicks in ++ // and the heap gets shrunk before the field access. ++ if ((sig == SIGSEGV) || (sig == SIGBUS)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("jni fast get trap: "); ++#endif ++ address addr = JNI_FastGetField::find_slowcase_pc(pc); ++ if (addr != (address)-1) { ++ stub = addr; ++ } ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("addr = %d, stub = %lx", addr, stub); ++#endif ++ } ++ ++ // Check to see if we caught the safepoint code in the ++ // process of write protecting the memory serialization page. ++ // It write enables the page immediately after protecting it ++ // so we can just return to retry the write. ++ if ((sig == SIGSEGV) && ++ os::is_memory_serialize_page(thread, (address) info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("write protecting the memory serialiazation page\n"); ++#endif ++ // Block current thread until the memory serialize page permission restored. ++ os::block_on_serialize_page_trap(); ++ return true; ++ } ++ } ++ ++ // Execution protection violation ++ // ++ // This should be kept as the last step in the triage. We don't ++ // have a dedicated trap number for a no-execute fault, so be ++ // conservative and allow other handlers the first shot. ++ // ++ // Note: We don't test that info->si_code == SEGV_ACCERR here. ++ // this si_code is so generic that it is almost meaningless; and ++ // the si_code for this condition may change in the future. ++ // Furthermore, a false-positive should be harmless. ++ if (UnguardOnExecutionViolation > 0 && ++ //(sig == SIGSEGV || sig == SIGBUS) && ++ //uc->uc_mcontext.gregs[REG_TRAPNO] == trap_page_fault) { ++ (sig == SIGSEGV || sig == SIGBUS ++#ifdef OPT_RANGECHECK ++ || sig == SIGSYS ++#endif ++ ) && ++ //(uc->uc_mcontext.cause == 2 || uc->uc_mcontext.cause == 3)) { ++ (uc->uc_mcontext.hi1 == 2 || uc->uc_mcontext.hi1 == 3)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("execution protection violation\n"); ++#endif ++ ++ int page_size = os::vm_page_size(); ++ address addr = (address) info->si_addr; ++ address pc = os::Linux::ucontext_get_pc(uc); ++ // Make sure the pc and the faulting address are sane. ++ // ++ // If an instruction spans a page boundary, and the page containing ++ // the beginning of the instruction is executable but the following ++ // page is not, the pc and the faulting address might be slightly ++ // different - we still want to unguard the 2nd page in this case. ++ // ++ // 15 bytes seems to be a (very) safe value for max instruction size. ++ bool pc_is_near_addr = ++ (pointer_delta((void*) addr, (void*) pc, sizeof(char)) < 15); ++Untested("Unimplemented yet"); ++ bool instr_spans_page_boundary = ++/* ++ (align_size_down((intptr_t) pc ^ (intptr_t) addr, ++ (intptr_t) page_size) > 0); ++*/ ++ (align_down((intptr_t) pc ^ (intptr_t) addr, ++ (intptr_t) page_size) > 0); ++ ++ if (pc == addr || (pc_is_near_addr && instr_spans_page_boundary)) { ++ static volatile address last_addr = ++ (address) os::non_memory_address_word(); ++ ++ // In conservative mode, don't unguard unless the address is in the VM ++ if (addr != last_addr && ++ (UnguardOnExecutionViolation > 1 || os::address_is_in_vm(addr))) { ++ ++ // Set memory to RWX and retry ++Untested("Unimplemented yet"); ++/* ++ address page_start = ++ (address) align_size_down((intptr_t) addr, (intptr_t) page_size); ++*/ ++ address page_start = align_down(addr, page_size); ++ bool res = os::protect_memory((char*) page_start, page_size, ++ os::MEM_PROT_RWX); ++ ++ if (PrintMiscellaneous && Verbose) { ++ char buf[256]; ++ jio_snprintf(buf, sizeof(buf), "Execution protection violation " ++ "at " INTPTR_FORMAT ++ ", unguarding " INTPTR_FORMAT ": %s, errno=%d", addr, ++ page_start, (res ? "success" : "failed"), errno); ++ tty->print_raw_cr(buf); ++ } ++ stub = pc; ++ ++ // Set last_addr so if we fault again at the same address, we don't end ++ // up in an endless loop. ++ // ++ // There are two potential complications here. Two threads trapping at ++ // the same address at the same time could cause one of the threads to ++ // think it already unguarded, and abort the VM. Likely very rare. ++ // ++ // The other race involves two threads alternately trapping at ++ // different addresses and failing to unguard the page, resulting in ++ // an endless loop. This condition is probably even more unlikely than ++ // the first. ++ // ++ // Although both cases could be avoided by using locks or thread local ++ // last_addr, these solutions are unnecessary complication: this ++ // handler is a best-effort safety net, not a complete solution. It is ++ // disabled by default and should only be used as a workaround in case ++ // we missed any no-execute-unsafe VM code. ++ ++ last_addr = addr; ++ } ++ } ++ } ++ ++ if (stub != NULL) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("resolved stub=%lx\n",stub); ++#endif ++ // save all thread context in case we need to restore it ++ if (thread != NULL) thread->set_saved_exception_pc(pc); ++ ++ os::Linux::ucontext_set_pc(uc, stub); ++ return true; ++ } ++ ++ // signal-chaining ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("signal chaining\n"); ++#endif ++ return true; ++ } ++ ++ if (!abort_if_unrecognized) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("abort becauce of unrecognized\n"); ++#endif ++ // caller wants another chance, so give it to him ++ return false; ++ } ++ ++ if (pc == NULL && uc != NULL) { ++ pc = os::Linux::ucontext_get_pc(uc); ++ } ++ ++ // unmask current signal ++ sigset_t newset; ++ sigemptyset(&newset); ++ sigaddset(&newset, sig); ++ sigprocmask(SIG_UNBLOCK, &newset, NULL); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("VMError in signal handler\n"); ++#endif ++ VMError::report_and_die(t, sig, pc, info, ucVoid); ++ ++ ShouldNotReachHere(); ++ return true; // Mute compiler ++} ++ ++// FCSR:...|24| 23 |22|21|... ++// ...|FS|FCC0|FO|FN|... ++void os::Linux::init_thread_fpu_state(void) { ++ if (SetFSFOFN == 999) ++ return; ++ int fs = (SetFSFOFN / 100)? 1:0; ++ int fo = ((SetFSFOFN % 100) / 10)? 1:0; ++ int fn = (SetFSFOFN % 10)? 1:0; ++ int mask = fs << 24 | fo << 22 | fn << 21; ++ ++ int fcsr = get_fpu_control_word(); ++ fcsr = fcsr | mask; ++ set_fpu_control_word(fcsr); ++ /* ++ if (fcsr != get_fpu_control_word()) ++ tty->print_cr(" fail to set to %lx, get_fpu_control_word:%lx", fcsr, get_fpu_control_word()); ++ */ ++} ++ ++int os::Linux::get_fpu_control_word(void) { ++ int fcsr; ++ __asm__ __volatile__ ( ++ ".set noat;" ++ "daddiu %0, $0, 0;" ++ "cfc1 %0, $31;" ++ : "=r" (fcsr) ++ ); ++ return fcsr; ++} ++ ++void os::Linux::set_fpu_control_word(int fpu_control) { ++ __asm__ __volatile__ ( ++ ".set noat;" ++ "ctc1 %0, $31;" ++ : ++ : "r" (fpu_control) ++ ); ++} ++ ++bool os::is_allocatable(size_t bytes) { ++ ++ if (bytes < 2 * G) { ++ return true; ++ } ++ ++ char* addr = reserve_memory(bytes, NULL); ++ ++ if (addr != NULL) { ++ release_memory(addr, bytes); ++ } ++ ++ return addr != NULL; ++} ++ ++//////////////////////////////////////////////////////////////////////////////// ++// thread stack ++ ++//size_t os::Linux::min_stack_allowed = 96 * K; ++size_t os::Posix::_compiler_thread_min_stack_allowed = 48 * K; ++size_t os::Posix::_java_thread_min_stack_allowed = 40 * K; ++size_t os::Posix::_vm_internal_thread_min_stack_allowed = 64 * K; ++ ++ ++/* ++// Test if pthread library can support variable thread stack size. LinuxThreads ++// in fixed stack mode allocates 2M fixed slot for each thread. LinuxThreads ++// in floating stack mode and NPTL support variable stack size. ++bool os::Linux::supports_variable_stack_size() { ++ if (os::Linux::is_NPTL()) { ++ // NPTL, yes ++ return true; ++ ++ } else { ++ // Note: We can't control default stack size when creating a thread. ++ // If we use non-default stack size (pthread_attr_setstacksize), both ++ // floating stack and non-floating stack LinuxThreads will return the ++ // same value. This makes it impossible to implement this function by ++ // detecting thread stack size directly. ++ // ++ // An alternative approach is to check %gs. Fixed-stack LinuxThreads ++ // do not use %gs, so its value is 0. Floating-stack LinuxThreads use ++ // %gs (either as LDT selector or GDT selector, depending on kernel) ++ // to access thread specific data. ++ // ++ // Note that %gs is a reserved glibc register since early 2001, so ++ // applications are not allowed to change its value (Ulrich Drepper from ++ // Redhat confirmed that all known offenders have been modified to use ++ // either %fs or TSD). In the worst case scenario, when VM is embedded in ++ // a native application that plays with %gs, we might see non-zero %gs ++ // even LinuxThreads is running in fixed stack mode. As the result, we'll ++ // return true and skip _thread_safety_check(), so we may not be able to ++ // detect stack-heap collisions. But otherwise it's harmless. ++ // ++ return false; ++ } ++} ++*/ ++ ++// Return default stack size for thr_type ++size_t os::Posix::default_stack_size(os::ThreadType thr_type) { ++ // Default stack size (compiler thread needs larger stack) ++ size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K); ++ return s; ++} ++ ++///////////////////////////////////////////////////////////////////////////// ++// helper functions for fatal error handler ++void os::print_register_info(outputStream *st, const void *context) { ++ if (context == NULL) return; ++ ++ ucontext_t *uc = (ucontext_t*)context; ++ ++ st->print_cr("Register to memory mapping:"); ++ st->cr(); ++ // this is horrendously verbose but the layout of the registers in the ++ // // context does not match how we defined our abstract Register set, so ++ // // we can't just iterate through the gregs area ++ // ++ // // this is only for the "general purpose" registers ++ st->print("R0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[0]); ++ st->print("AT=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[1]); ++ st->print("V0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[2]); ++ st->print("V1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[3]); ++ st->cr(); ++ st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[4]); ++ st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[5]); ++ st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[6]); ++ st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[7]); ++ st->cr(); ++ st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[8]); ++ st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[9]); ++ st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[10]); ++ st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[11]); ++ st->cr(); ++ st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[12]); ++ st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[13]); ++ st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[14]); ++ st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[15]); ++ st->cr(); ++ st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[16]); ++ st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[17]); ++ st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[18]); ++ st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[19]); ++ st->cr(); ++ st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[20]); ++ st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[21]); ++ st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[22]); ++ st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[23]); ++ st->cr(); ++ st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[24]); ++ st->print("T9=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[25]); ++ st->print("K0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[26]); ++ st->print("K1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[27]); ++ st->cr(); ++ st->print("GP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[28]); ++ st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[29]); ++ st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[30]); ++ st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[31]); ++ st->cr(); ++ ++} ++ ++void os::print_context(outputStream *st, const void *context) { ++ if (context == NULL) return; ++ ++ const ucontext_t *uc = (const ucontext_t*)context; ++ st->print_cr("Registers:"); ++ st->print( "R0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[0]); ++ st->print(", AT=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[1]); ++ st->print(", V0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[2]); ++ st->print(", V1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[3]); ++ st->cr(); ++ st->print( "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[4]); ++ st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[5]); ++ st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[6]); ++ st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[7]); ++ st->cr(); ++ st->print( "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[8]); ++ st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[9]); ++ st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[10]); ++ st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[11]); ++ st->cr(); ++ st->print( "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[12]); ++ st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[13]); ++ st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[14]); ++ st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[15]); ++ st->cr(); ++ st->print( "S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[16]); ++ st->print(", S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[17]); ++ st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[18]); ++ st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[19]); ++ st->cr(); ++ st->print( "S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[20]); ++ st->print(", S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[21]); ++ st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[22]); ++ st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[23]); ++ st->cr(); ++ st->print( "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[24]); ++ st->print(", T9=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[25]); ++ st->print(", K0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[26]); ++ st->print(", K1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[27]); ++ st->cr(); ++ st->print( "GP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[28]); ++ st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[29]); ++ st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[30]); ++ st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[31]); ++ st->cr(); ++ st->cr(); ++ ++ intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); ++ st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp)); ++ print_hex_dump(st, (address)(sp - 32), (address)(sp + 32), sizeof(intptr_t)); ++ st->cr(); ++ ++ // Note: it may be unsafe to inspect memory near pc. For example, pc may ++ // point to garbage if entry point in an nmethod is corrupted. Leave ++ // this at the end, and hope for the best. ++ address pc = os::Linux::ucontext_get_pc(uc); ++ st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc)); ++ print_hex_dump(st, pc - 64, pc + 64, sizeof(char)); ++ Disassembler::decode(pc - 80, pc + 80, st); ++} ++ ++void os::setup_fpu() { ++ /* ++ //no use for MIPS ++ int fcsr; ++ address fpu_cntrl = StubRoutines::addr_fpu_cntrl_wrd_std(); ++ __asm__ __volatile__ ( ++ ".set noat;" ++ "cfc1 %0, $31;" ++ "sw %0, 0(%1);" ++ : "=r" (fcsr) ++ : "r" (fpu_cntrl) ++ : "memory" ++ ); ++ printf("fpu_cntrl: %lx\n", fpu_cntrl); ++ */ ++} ++ ++#ifndef PRODUCT ++void os::verify_stack_alignment() { ++ assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); ++} ++#endif ++ ++int os::extra_bang_size_in_bytes() { ++ // MIPS does not require the additional stack bang. ++ return 0; ++} ++ ++bool os::is_ActiveCoresMP() { ++ return UseActiveCoresMP && _initial_active_processor_count == 1; ++} +diff --git a/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp +new file mode 100644 +index 0000000000..c07d08156f +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp +@@ -0,0 +1,39 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP ++ ++ static void setup_fpu(); ++ static bool is_allocatable(size_t bytes); ++ static intptr_t *get_previous_fp(); ++ ++ // Used to register dynamic code cache area with the OS ++ // Note: Currently only used in 64 bit Windows implementations ++ static bool register_code_area(char *low, char *high) { return true; } ++ ++ static bool is_ActiveCoresMP(); ++ ++#endif // OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp +new file mode 100644 +index 0000000000..93490345f0 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp +@@ -0,0 +1,58 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP ++#define OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP ++ ++ ++inline void Prefetch::read (void *loc, intx interval) { ++ // 'pref' is implemented as NOP in Loongson 3A ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips32\n" ++ " .set noreorder\n" ++ " pref 0, 0(%[__loc]) \n" ++ " .set pop\n" ++ : [__loc] "=&r"(loc) ++ : ++ : "memory" ++ ); ++} ++ ++inline void Prefetch::write(void *loc, intx interval) { ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips32\n" ++ " .set noreorder\n" ++ " pref 1, 0(%[__loc]) \n" ++ " .set pop\n" ++ : [__loc] "=&r"(loc) ++ : ++ : "memory" ++ ); ++ ++} ++ ++#endif // OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp +new file mode 100644 +index 0000000000..dbe8efe164 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp +@@ -0,0 +1,117 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "compiler/compileBroker.hpp" ++#include "memory/metaspaceShared.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++ ++void JavaThread::pd_initialize() ++{ ++ _anchor.clear(); ++} ++ ++frame JavaThread::pd_last_frame() { ++ assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); ++ if (_anchor.last_Java_pc() != NULL) { ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); ++ } else { ++ // This will pick up pc from sp ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp()); ++ } ++} ++ ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is ++// currently interrupted by SIGPROF ++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, ++ void* ucontext, bool isInJava) { ++ ++ assert(Thread::current() == this, "caller must be current thread"); ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++ ++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { ++ assert(this->is_Java_thread(), "must be JavaThread"); ++ JavaThread* jt = (JavaThread *)this; ++ ++ // If we have a last_Java_frame, then we should use it even if ++ // isInJava == true. It should be more reliable than ucontext info. ++ if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) { ++ *fr_addr = jt->pd_last_frame(); ++ return true; ++ } ++ ++ // At this point, we don't have a last_Java_frame, so ++ // we try to glean some information out of the ucontext ++ // if we were running Java code when SIGPROF came in. ++ if (isInJava) { ++ ucontext_t* uc = (ucontext_t*) ucontext; ++ ++ intptr_t* ret_fp; ++ intptr_t* ret_sp; ++ ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, ++ &ret_sp, &ret_fp); ++ if (addr.pc() == NULL || ret_sp == NULL ) { ++ // ucontext wasn't useful ++ return false; ++ } ++ ++ if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) { ++ // In the middle of a trampoline call. Bail out for safety. ++ // This happens rarely so shouldn't affect profiling. ++ return false; ++ } ++ ++ frame ret_frame(ret_sp, ret_fp, addr.pc()); ++ if (!ret_frame.safe_for_sender(jt)) { ++#ifdef COMPILER2 ++ // C2 and JVMCI use ebp as a general register see if NULL fp helps ++ frame ret_frame2(ret_sp, NULL, addr.pc()); ++ if (!ret_frame2.safe_for_sender(jt)) { ++ // nothing else to try if the frame isn't good ++ return false; ++ } ++ ret_frame = ret_frame2; ++#else ++ // nothing else to try if the frame isn't good ++ return false; ++#endif // COMPILER2_OR_JVMCI ++ } ++ *fr_addr = ret_frame; ++ return true; ++ } ++ ++ // nothing else to try ++ return false; ++} ++ ++void JavaThread::cache_global_variables() { } +diff --git a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp +new file mode 100644 +index 0000000000..8b8dbe219c +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp +@@ -0,0 +1,66 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP ++ ++ private: ++ void pd_initialize(); ++ ++ frame pd_last_frame(); ++ ++ public: ++ // Mutators are highly dangerous.... ++ intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } ++ void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } ++ ++ void set_base_of_stack_pointer(intptr_t* base_sp) { ++ } ++ ++ static ByteSize last_Java_fp_offset() { ++ return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); ++ } ++ ++ intptr_t* base_of_stack_pointer() { ++ return NULL; ++ } ++ void record_base_of_stack_pointer() { ++ } ++ ++ bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, ++ bool isInJava); ++ ++ bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); ++private: ++ bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); ++public: ++ ++ // These routines are only used on cpu architectures that ++ // have separate register stacks (Itanium). ++ static bool register_stack_overflow() { return false; } ++ static void enable_register_stack_guard() {} ++ static void disable_register_stack_guard() {} ++ ++#endif // OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp +new file mode 100644 +index 0000000000..b7454bf045 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP ++ ++// These are the OS and CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ \ ++ /******************************/ \ ++ /* Threads (NOTE: incomplete) */ \ ++ /******************************/ \ ++ nonstatic_field(OSThread, _thread_id, pid_t) \ ++ nonstatic_field(OSThread, _pthread_id, pthread_t) ++ ++ ++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ \ ++ /**********************/ \ ++ /* Posix Thread IDs */ \ ++ /**********************/ \ ++ \ ++ declare_integer_type(pid_t) \ ++ declare_unsigned_integer_type(pthread_t) ++ ++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#endif // OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp +new file mode 100644 +index 0000000000..93e4bea04c +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp +@@ -0,0 +1,28 @@ ++/* ++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/os.hpp" ++#include "runtime/vm_version.hpp" +diff --git a/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp b/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp +index 2b0fa83c1a..270e0bc180 100644 +--- a/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp ++++ b/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp +@@ -85,4 +85,6 @@ const uintptr_t ZPlatformAddressSpaceSize = ((uintptr_t)1 << ZPlatformAddres + + const size_t ZPlatformCacheLineSize = 64; + ++const bool ZPlatformLoadBarrierTestResultInRegister = false; ++ + #endif // OS_CPU_LINUX_X86_ZGLOBALS_LINUX_X86_HPP +diff --git a/src/hotspot/share/asm/codeBuffer.cpp b/src/hotspot/share/asm/codeBuffer.cpp +index 4912f88056..a420f7807b 100644 +--- a/src/hotspot/share/asm/codeBuffer.cpp ++++ b/src/hotspot/share/asm/codeBuffer.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2023. These ++ * modifications are Copyright (c) 2018, 2023, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "asm/codeBuffer.hpp" + #include "compiler/disassembler.hpp" +@@ -351,6 +357,7 @@ void CodeSection::relocate(address at, RelocationHolder const& spec, int format) + assert(rtype == relocInfo::none || + rtype == relocInfo::runtime_call_type || + rtype == relocInfo::internal_word_type|| ++ NOT_ZERO(MIPS64_ONLY(rtype == relocInfo::internal_pc_type ||)) + rtype == relocInfo::section_word_type || + rtype == relocInfo::external_word_type, + "code needs relocation information"); +diff --git a/src/hotspot/share/c1/c1_Compiler.cpp b/src/hotspot/share/c1/c1_Compiler.cpp +index aff12954b3..caa93fc804 100644 +--- a/src/hotspot/share/c1/c1_Compiler.cpp ++++ b/src/hotspot/share/c1/c1_Compiler.cpp +@@ -44,6 +44,12 @@ + #include "utilities/bitMap.inline.hpp" + #include "utilities/macros.hpp" + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + + Compiler::Compiler() : AbstractCompiler(compiler_c1) { + } +@@ -211,7 +217,7 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) { + case vmIntrinsics::_updateCRC32: + case vmIntrinsics::_updateBytesCRC32: + case vmIntrinsics::_updateByteBufferCRC32: +-#if defined(SPARC) || defined(S390) || defined(PPC64) || defined(AARCH64) ++#if defined(SPARC) || defined(S390) || defined(PPC64) || defined(AARCH64) || defined(LOONGARCH64) + case vmIntrinsics::_updateBytesCRC32C: + case vmIntrinsics::_updateDirectByteBufferCRC32C: + #endif +diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp +index e30d39f73d..7461b7449a 100644 +--- a/src/hotspot/share/c1/c1_LIR.cpp ++++ b/src/hotspot/share/c1/c1_LIR.cpp +@@ -250,6 +250,18 @@ void LIR_Op2::verify() const { + #endif + } + ++void LIR_Op4::verify() const { ++#ifdef ASSERT ++ switch (code()) { ++ case lir_cmp_cmove: ++ break; ++ ++ default: ++ assert(!result_opr()->is_register() || !result_opr()->is_oop_register(), ++ "can't produce oops from arith"); ++ } ++#endif ++} + + LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block) + : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) +@@ -308,6 +320,56 @@ void LIR_OpBranch::negate_cond() { + } + + ++LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeStub* stub, CodeEmitInfo* info) ++ : LIR_Op2(lir_cmp_branch, cond, left, right, info) ++ , _label(stub->entry()) ++ , _block(NULL) ++ , _ublock(NULL) ++ , _stub(stub) { ++} ++ ++LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, CodeEmitInfo* info) ++ : LIR_Op2(lir_cmp_branch, cond, left, right, info) ++ , _label(block->label()) ++ , _block(block) ++ , _ublock(NULL) ++ , _stub(NULL) { ++} ++ ++LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, BlockBegin* ublock, CodeEmitInfo* info) ++ : LIR_Op2(lir_cmp_float_branch, cond, left, right, info) ++ , _label(block->label()) ++ , _block(block) ++ , _ublock(ublock) ++ , _stub(NULL) { ++} ++ ++void LIR_OpCmpBranch::change_block(BlockBegin* b) { ++ assert(_block != NULL, "must have old block"); ++ assert(_block->label() == label(), "must be equal"); ++ ++ _block = b; ++ _label = b->label(); ++} ++ ++void LIR_OpCmpBranch::change_ublock(BlockBegin* b) { ++ assert(_ublock != NULL, "must have old block"); ++ ++ _ublock = b; ++} ++ ++void LIR_OpCmpBranch::negate_cond() { ++ switch (condition()) { ++ case lir_cond_equal: set_condition(lir_cond_notEqual); break; ++ case lir_cond_notEqual: set_condition(lir_cond_equal); break; ++ case lir_cond_less: set_condition(lir_cond_greaterEqual); break; ++ case lir_cond_lessEqual: set_condition(lir_cond_greater); break; ++ case lir_cond_greaterEqual: set_condition(lir_cond_less); break; ++ case lir_cond_greater: set_condition(lir_cond_lessEqual); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ + LIR_OpTypeCheck::LIR_OpTypeCheck(LIR_Code code, LIR_Opr result, LIR_Opr object, ciKlass* klass, + LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, + bool fast_check, CodeEmitInfo* info_for_exception, CodeEmitInfo* info_for_patch, +@@ -509,10 +571,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) { + assert(opConvert->_info == NULL, "must be"); + if (opConvert->_opr->is_valid()) do_input(opConvert->_opr); + if (opConvert->_result->is_valid()) do_output(opConvert->_result); +-#ifdef PPC32 +- if (opConvert->_tmp1->is_valid()) do_temp(opConvert->_tmp1); +- if (opConvert->_tmp2->is_valid()) do_temp(opConvert->_tmp2); +-#endif ++ if (opConvert->_tmp->is_valid()) do_temp(opConvert->_tmp); + do_stub(opConvert->_stub); + + break; +@@ -611,6 +670,25 @@ void LIR_OpVisitState::visit(LIR_Op* op) { + break; + } + ++// LIR_OpCmpBranch; ++ case lir_cmp_branch: // may have info, input and result register always invalid ++ case lir_cmp_float_branch: // may have info, input and result register always invalid ++ { ++ assert(op->as_OpCmpBranch() != NULL, "must be"); ++ LIR_OpCmpBranch* opCmpBranch = (LIR_OpCmpBranch*)op; ++ assert(opCmpBranch->_tmp2->is_illegal() && opCmpBranch->_tmp3->is_illegal() && ++ opCmpBranch->_tmp4->is_illegal() && opCmpBranch->_tmp5->is_illegal(), "not used"); ++ ++ if (opCmpBranch->_info) do_info(opCmpBranch->_info); ++ if (opCmpBranch->_opr1->is_valid()) do_input(opCmpBranch->_opr1); ++ if (opCmpBranch->_opr2->is_valid()) do_input(opCmpBranch->_opr2); ++ if (opCmpBranch->_tmp1->is_valid()) do_temp(opCmpBranch->_tmp1); ++ if (opCmpBranch->_stub != NULL) opCmpBranch->stub()->visit(this); ++ assert(opCmpBranch->_result->is_illegal(), "not used"); ++ ++ break; ++ } ++ + // special handling for cmove: right input operand must not be equal + // to the result operand, otherwise the backend fails + case lir_cmove: +@@ -711,6 +789,29 @@ void LIR_OpVisitState::visit(LIR_Op* op) { + break; + } + ++// LIR_Op4 ++ // special handling for cmp cmove: src2(opr4) operand must not be equal ++ // to the result operand, otherwise the backend fails ++ case lir_cmp_cmove: ++ { ++ assert(op->as_Op4() != NULL, "must be"); ++ LIR_Op4* op4 = (LIR_Op4*)op; ++ ++ assert(op4->_info == NULL, "not used"); ++ assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && ++ op4->_opr3->is_valid() && op4->_opr4->is_valid() && ++ op4->_result->is_valid(), "used"); ++ ++ do_input(op4->_opr1); ++ do_input(op4->_opr2); ++ do_input(op4->_opr3); ++ do_input(op4->_opr4); ++ do_temp(op4->_opr4); ++ do_output(op4->_result); ++ ++ break; ++ } ++ + // LIR_OpJavaCall + case lir_static_call: + case lir_optvirtual_call: +@@ -1028,6 +1129,13 @@ void LIR_Op2::emit_code(LIR_Assembler* masm) { + masm->emit_op2(this); + } + ++void LIR_OpCmpBranch::emit_code(LIR_Assembler* masm) { ++ masm->emit_opCmpBranch(this); ++ if (stub()) { ++ masm->append_code_stub(stub()); ++ } ++} ++ + void LIR_OpAllocArray::emit_code(LIR_Assembler* masm) { + masm->emit_alloc_array(this); + masm->append_code_stub(stub()); +@@ -1048,6 +1156,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) { + masm->emit_op3(this); + } + ++void LIR_Op4::emit_code(LIR_Assembler* masm) { ++ masm->emit_op4(this); ++} ++ + void LIR_OpLock::emit_code(LIR_Assembler* masm) { + masm->emit_lock(this); + if (stub()) { +@@ -1424,8 +1536,7 @@ void LIR_List::null_check(LIR_Opr opr, CodeEmitInfo* info, bool deoptimize_on_nu + if (deoptimize_on_null) { + // Emit an explicit null check and deoptimize if opr is null + CodeStub* deopt = new DeoptimizeStub(info, Deoptimization::Reason_null_check, Deoptimization::Action_none); +- cmp(lir_cond_equal, opr, LIR_OprFact::oopConst(NULL)); +- branch(lir_cond_equal, T_OBJECT, deopt); ++ cmp_branch(lir_cond_equal, opr, LIR_OprFact::oopConst(NULL), T_OBJECT, deopt); + } else { + // Emit an implicit null check + append(new LIR_Op1(lir_null_check, opr, info)); +@@ -1680,6 +1791,8 @@ const char * LIR_Op::name() const { + case lir_cmp_l2i: s = "cmp_l2i"; break; + case lir_ucmp_fd2i: s = "ucomp_fd2i"; break; + case lir_cmp_fd2i: s = "comp_fd2i"; break; ++ case lir_cmp_branch: s = "cmp_branch"; break; ++ case lir_cmp_float_branch: s = "cmp_fbranch"; break; + case lir_cmove: s = "cmove"; break; + case lir_add: s = "add"; break; + case lir_sub: s = "sub"; break; +@@ -1705,6 +1818,8 @@ const char * LIR_Op::name() const { + case lir_irem: s = "irem"; break; + case lir_fmad: s = "fmad"; break; + case lir_fmaf: s = "fmaf"; break; ++ // LIR_Op4 ++ case lir_cmp_cmove: s = "cmp_cmove"; break; + // LIR_OpJavaCall + case lir_static_call: s = "static"; break; + case lir_optvirtual_call: s = "optvirtual"; break; +@@ -1856,6 +1971,26 @@ void LIR_OpBranch::print_instr(outputStream* out) const { + } + } + ++// LIR_OpCmpBranch ++void LIR_OpCmpBranch::print_instr(outputStream* out) const { ++ print_condition(out, condition()); out->print(" "); ++ in_opr1()->print(out); out->print(" "); ++ in_opr2()->print(out); out->print(" "); ++ if (block() != NULL) { ++ out->print("[B%d] ", block()->block_id()); ++ } else if (stub() != NULL) { ++ out->print("["); ++ stub()->print_name(out); ++ out->print(": " INTPTR_FORMAT "]", p2i(stub())); ++ if (stub()->info() != NULL) out->print(" [bci:%d]", stub()->info()->stack()->bci()); ++ } else { ++ out->print("[label:" INTPTR_FORMAT "] ", p2i(label())); ++ } ++ if (ublock() != NULL) { ++ out->print("unordered: [B%d] ", ublock()->block_id()); ++ } ++} ++ + void LIR_Op::print_condition(outputStream* out, LIR_Condition cond) { + switch(cond) { + case lir_cond_equal: out->print("[EQ]"); break; +@@ -1876,12 +2011,9 @@ void LIR_OpConvert::print_instr(outputStream* out) const { + print_bytecode(out, bytecode()); + in_opr()->print(out); out->print(" "); + result_opr()->print(out); out->print(" "); +-#ifdef PPC32 +- if(tmp1()->is_valid()) { +- tmp1()->print(out); out->print(" "); +- tmp2()->print(out); out->print(" "); ++ if(tmp()->is_valid()) { ++ tmp()->print(out); out->print(" "); + } +-#endif + } + + void LIR_OpConvert::print_bytecode(outputStream* out, Bytecodes::Code code) { +@@ -1979,6 +2111,19 @@ void LIR_Op3::print_instr(outputStream* out) const { + } + + ++// LIR_Op4 ++void LIR_Op4::print_instr(outputStream* out) const { ++ if (code() == lir_cmp_cmove) { ++ print_condition(out, condition()); out->print(" "); ++ } ++ in_opr1()->print(out); out->print(" "); ++ in_opr2()->print(out); out->print(" "); ++ in_opr3()->print(out); out->print(" "); ++ in_opr4()->print(out); out->print(" "); ++ result_opr()->print(out); ++} ++ ++ + void LIR_OpLock::print_instr(outputStream* out) const { + hdr_opr()->print(out); out->print(" "); + obj_opr()->print(out); out->print(" "); +diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp +index 3234ca018b..1f46e44c77 100644 +--- a/src/hotspot/share/c1/c1_LIR.hpp ++++ b/src/hotspot/share/c1/c1_LIR.hpp +@@ -864,9 +864,11 @@ class LIR_OpConvert; + class LIR_OpAllocObj; + class LIR_OpRoundFP; + class LIR_Op2; ++class LIR_OpCmpBranch; + class LIR_OpDelay; + class LIR_Op3; + class LIR_OpAllocArray; ++class LIR_Op4; + class LIR_OpCall; + class LIR_OpJavaCall; + class LIR_OpRTCall; +@@ -933,6 +935,8 @@ enum LIR_Code { + , lir_cmp_l2i + , lir_ucmp_fd2i + , lir_cmp_fd2i ++ , lir_cmp_branch ++ , lir_cmp_float_branch + , lir_cmove + , lir_add + , lir_sub +@@ -964,6 +968,9 @@ enum LIR_Code { + , lir_fmad + , lir_fmaf + , end_op3 ++ , begin_op4 ++ , lir_cmp_cmove ++ , end_op4 + , begin_opJavaCall + , lir_static_call + , lir_optvirtual_call +@@ -1128,12 +1135,14 @@ class LIR_Op: public CompilationResourceObj { + virtual LIR_OpAllocObj* as_OpAllocObj() { return NULL; } + virtual LIR_OpRoundFP* as_OpRoundFP() { return NULL; } + virtual LIR_OpBranch* as_OpBranch() { return NULL; } ++ virtual LIR_OpCmpBranch* as_OpCmpBranch() { return NULL; } + virtual LIR_OpRTCall* as_OpRTCall() { return NULL; } + virtual LIR_OpConvert* as_OpConvert() { return NULL; } + virtual LIR_Op0* as_Op0() { return NULL; } + virtual LIR_Op1* as_Op1() { return NULL; } + virtual LIR_Op2* as_Op2() { return NULL; } + virtual LIR_Op3* as_Op3() { return NULL; } ++ virtual LIR_Op4* as_Op4() { return NULL; } + virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; } + virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; } + virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; } +@@ -1463,15 +1472,18 @@ class LIR_OpConvert: public LIR_Op1 { + private: + Bytecodes::Code _bytecode; + ConversionStub* _stub; ++ LIR_Opr _tmp; + + public: +- LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub) ++ LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub, LIR_Opr tmp) + : LIR_Op1(lir_convert, opr, result) + , _stub(stub) +- , _bytecode(code) {} ++ , _bytecode(code) ++ , _tmp(tmp) {} + + Bytecodes::Code bytecode() const { return _bytecode; } + ConversionStub* stub() const { return _stub; } ++ LIR_Opr tmp() const { return _tmp; } + + virtual void emit_code(LIR_Assembler* masm); + virtual LIR_OpConvert* as_OpConvert() { return this; } +@@ -1626,7 +1638,7 @@ class LIR_Op2: public LIR_Op { + , _tmp3(LIR_OprFact::illegalOpr) + , _tmp4(LIR_OprFact::illegalOpr) + , _tmp5(LIR_OprFact::illegalOpr) { +- assert(code == lir_cmp || code == lir_assert, "code check"); ++ assert(code == lir_cmp || code == lir_cmp_branch || code == lir_cmp_float_branch || code == lir_assert, "code check"); + } + + LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) +@@ -1658,7 +1670,7 @@ class LIR_Op2: public LIR_Op { + , _tmp3(LIR_OprFact::illegalOpr) + , _tmp4(LIR_OprFact::illegalOpr) + , _tmp5(LIR_OprFact::illegalOpr) { +- assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); ++ assert((code != lir_cmp && code != lir_cmp_branch && code != lir_cmp_float_branch) && is_in_range(code, begin_op2, end_op2), "code check"); + } + + LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr, +@@ -1674,7 +1686,7 @@ class LIR_Op2: public LIR_Op { + , _tmp3(tmp3) + , _tmp4(tmp4) + , _tmp5(tmp5) { +- assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); ++ assert((code != lir_cmp && code != lir_cmp_branch && code != lir_cmp_float_branch) && is_in_range(code, begin_op2, end_op2), "code check"); + } + + LIR_Opr in_opr1() const { return _opr1; } +@@ -1686,10 +1698,12 @@ class LIR_Op2: public LIR_Op { + LIR_Opr tmp4_opr() const { return _tmp4; } + LIR_Opr tmp5_opr() const { return _tmp5; } + LIR_Condition condition() const { +- assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition; ++ assert(code() == lir_cmp || code() == lir_cmp_branch || code() == lir_cmp_float_branch || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); ++ return _condition; + } + void set_condition(LIR_Condition condition) { +- assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove"); _condition = condition; ++ assert(code() == lir_cmp || code() == lir_cmp_branch || code() == lir_cmp_float_branch || code() == lir_cmove, "only valid for cmp and cmove"); ++ _condition = condition; + } + + void set_fpu_stack_size(int size) { _fpu_stack_size = size; } +@@ -1703,6 +1717,43 @@ class LIR_Op2: public LIR_Op { + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; + }; + ++class LIR_OpCmpBranch: public LIR_Op2 { ++ friend class LIR_OpVisitState; ++ ++ private: ++ Label* _label; ++ BlockBegin* _block; // if this is a branch to a block, this is the block ++ BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block ++ CodeStub* _stub; // if this is a branch to a stub, this is the stub ++ ++ public: ++ LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, Label* lbl, CodeEmitInfo* info = NULL) ++ : LIR_Op2(lir_cmp_branch, cond, left, right, info) ++ , _label(lbl) ++ , _block(NULL) ++ , _ublock(NULL) ++ , _stub(NULL) { } ++ ++ LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeStub* stub, CodeEmitInfo* info = NULL); ++ LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, CodeEmitInfo* info = NULL); ++ ++ // for unordered comparisons ++ LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, BlockBegin* ublock, CodeEmitInfo* info = NULL); ++ ++ Label* label() const { return _label; } ++ BlockBegin* block() const { return _block; } ++ BlockBegin* ublock() const { return _ublock; } ++ CodeStub* stub() const { return _stub; } ++ ++ void change_block(BlockBegin* b); ++ void change_ublock(BlockBegin* b); ++ void negate_cond(); ++ ++ virtual void emit_code(LIR_Assembler* masm); ++ virtual LIR_OpCmpBranch* as_OpCmpBranch() { return this; } ++ virtual void print_instr(outputStream* out) const PRODUCT_RETURN; ++}; ++ + class LIR_OpAllocArray : public LIR_Op { + friend class LIR_OpVisitState; + +@@ -1767,6 +1818,48 @@ class LIR_Op3: public LIR_Op { + }; + + ++class LIR_Op4: public LIR_Op { ++ friend class LIR_OpVisitState; ++ ++ private: ++ LIR_Opr _opr1; ++ LIR_Opr _opr2; ++ LIR_Opr _opr3; ++ LIR_Opr _opr4; ++ BasicType _type; ++ LIR_Condition _condition; ++ ++ void verify() const; ++ ++ public: ++ LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4, LIR_Opr result, BasicType type) ++ : LIR_Op(code, result, NULL) ++ , _opr1(opr1) ++ , _opr2(opr2) ++ , _opr3(opr3) ++ , _opr4(opr4) ++ , _type(type) ++ , _condition(condition) { ++ assert(is_in_range(code, begin_op4, end_op4), "code check"); ++ assert(type != T_ILLEGAL, "cmove should have type"); ++ } ++ LIR_Opr in_opr1() const { return _opr1; } ++ LIR_Opr in_opr2() const { return _opr2; } ++ LIR_Opr in_opr3() const { return _opr3; } ++ LIR_Opr in_opr4() const { return _opr4; } ++ BasicType type() const { return _type; } ++ LIR_Condition condition() const { ++ assert(code() == lir_cmp_cmove, "only valid for cmp cmove"); return _condition; ++ } ++ void set_condition(LIR_Condition condition) { ++ assert(code() == lir_cmp_cmove, "only valid for cmp cmove"); _condition = condition; ++ } ++ ++ virtual void emit_code(LIR_Assembler* masm); ++ virtual LIR_Op4* as_Op4() { return this; } ++ virtual void print_instr(outputStream* out) const PRODUCT_RETURN; ++}; ++ + //-------------------------------- + class LabelObj: public CompilationResourceObj { + private: +@@ -2115,7 +2208,9 @@ class LIR_List: public CompilationResourceObj { + + void safepoint(LIR_Opr tmp, CodeEmitInfo* info) { append(new LIR_Op1(lir_safepoint, tmp, info)); } + +- void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL/*, bool is_32bit = false*/) { append(new LIR_OpConvert(code, left, dst, stub)); } ++ void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL, LIR_Opr tmp = LIR_OprFact::illegalOpr) { ++ append(new LIR_OpConvert(code, left, dst, stub, tmp)); ++ } + + void logical_and (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_and, left, right, dst)); } + void logical_or (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_or, left, right, dst)); } +@@ -2146,6 +2241,15 @@ class LIR_List: public CompilationResourceObj { + cmp(condition, left, LIR_OprFact::intConst(right), info); + } + ++ // machine dependent ++ template ++ void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info = NULL); ++ template ++ void cmp_branch(LIR_Condition condition, LIR_Opr left, int right, BasicType type, T tgt, CodeEmitInfo* info = NULL) { ++ cmp_branch(condition, left, LIR_OprFact::intConst(right), type, tgt, info); ++ } ++ void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered); ++ + void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info); + void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info); + +@@ -2153,6 +2257,9 @@ class LIR_List: public CompilationResourceObj { + append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type)); + } + ++ // machine dependent ++ void cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type); ++ + void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, + LIR_Opr t1, LIR_Opr t2, LIR_Opr result = LIR_OprFact::illegalOpr); + void cas_obj(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, +diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp +index 160483d5f7..bec297ebd2 100644 +--- a/src/hotspot/share/c1/c1_LIRAssembler.cpp ++++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp +@@ -777,6 +777,18 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { + } + + ++void LIR_Assembler::emit_op4(LIR_Op4* op) { ++ switch (op->code()) { ++ case lir_cmp_cmove: ++ cmp_cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->in_opr3(), op->in_opr4(), op->result_opr(), op->type()); ++ break; ++ ++ default: ++ Unimplemented(); ++ break; ++ } ++} ++ + void LIR_Assembler::build_frame() { + _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); + } +diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp +index 44a5bcbe54..114b155f92 100644 +--- a/src/hotspot/share/c1/c1_LIRAssembler.hpp ++++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp +@@ -190,7 +190,9 @@ class LIR_Assembler: public CompilationResourceObj { + void emit_op1(LIR_Op1* op); + void emit_op2(LIR_Op2* op); + void emit_op3(LIR_Op3* op); ++ void emit_op4(LIR_Op4* op); + void emit_opBranch(LIR_OpBranch* op); ++ void emit_opCmpBranch(LIR_OpCmpBranch* op); + void emit_opLabel(LIR_OpLabel* op); + void emit_arraycopy(LIR_OpArrayCopy* op); + void emit_updatecrc32(LIR_OpUpdateCRC32* op); +@@ -223,6 +225,7 @@ class LIR_Assembler: public CompilationResourceObj { + void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); // info set for null exceptions + void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op); + void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type); ++ void cmp_cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type); + + void call( LIR_OpJavaCall* op, relocInfo::relocType rtype); + void ic_call( LIR_OpJavaCall* op); +diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp +index f4b156d59b..fc35f02772 100644 +--- a/src/hotspot/share/c1/c1_LIRGenerator.cpp ++++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp +@@ -479,13 +479,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index, + CodeEmitInfo* null_check_info, CodeEmitInfo* range_check_info) { + CodeStub* stub = new RangeCheckStub(range_check_info, index, array); + if (index->is_constant()) { +- cmp_mem_int(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(), +- index->as_jint(), null_check_info); +- __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch ++ cmp_mem_int_branch(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(), ++ index->as_jint(), stub, null_check_info); // forward branch + } else { +- cmp_reg_mem(lir_cond_aboveEqual, index, array, +- arrayOopDesc::length_offset_in_bytes(), T_INT, null_check_info); +- __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch ++ cmp_reg_mem_branch(lir_cond_aboveEqual, index, array, arrayOopDesc::length_offset_in_bytes(), ++ T_INT, stub, null_check_info); // forward branch + } + } + +@@ -493,12 +491,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index, + void LIRGenerator::nio_range_check(LIR_Opr buffer, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) { + CodeStub* stub = new RangeCheckStub(info, index); + if (index->is_constant()) { +- cmp_mem_int(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), index->as_jint(), info); +- __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch ++ cmp_mem_int_branch(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), ++ index->as_jint(), stub, info); // forward branch + } else { +- cmp_reg_mem(lir_cond_aboveEqual, index, buffer, +- java_nio_Buffer::limit_offset(), T_INT, info); +- __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch ++ cmp_reg_mem_branch(lir_cond_aboveEqual, index, buffer, java_nio_Buffer::limit_offset(), ++ T_INT, stub, info); // forward branch + } + __ move(index, result); + } +@@ -934,7 +931,7 @@ LIR_Opr LIRGenerator::force_to_spill(LIR_Opr value, BasicType t) { + return tmp; + } + +-void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { ++void LIRGenerator::profile_branch(If* if_instr, If::Condition cond, LIR_Opr left, LIR_Opr right) { + if (if_instr->should_profile()) { + ciMethod* method = if_instr->profiled_method(); + assert(method != NULL, "method should be set if branch is profiled"); +@@ -955,10 +952,17 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { + __ metadata2reg(md->constant_encoding(), md_reg); + + LIR_Opr data_offset_reg = new_pointer_register(); +- __ cmove(lir_cond(cond), +- LIR_OprFact::intptrConst(taken_count_offset), +- LIR_OprFact::intptrConst(not_taken_count_offset), +- data_offset_reg, as_BasicType(if_instr->x()->type())); ++ if (left == LIR_OprFact::illegalOpr && right == LIR_OprFact::illegalOpr) { ++ __ cmove(lir_cond(cond), ++ LIR_OprFact::intptrConst(taken_count_offset), ++ LIR_OprFact::intptrConst(not_taken_count_offset), ++ data_offset_reg, as_BasicType(if_instr->x()->type())); ++ } else { ++ __ cmp_cmove(lir_cond(cond), left, right, ++ LIR_OprFact::intptrConst(taken_count_offset), ++ LIR_OprFact::intptrConst(not_taken_count_offset), ++ data_offset_reg, as_BasicType(if_instr->x()->type())); ++ } + + // MDO cells are intptr_t, so the data_reg width is arch-dependent. + LIR_Opr data_reg = new_pointer_register(); +@@ -1315,8 +1319,8 @@ void LIRGenerator::do_isPrimitive(Intrinsic* x) { + } + + __ move(new LIR_Address(rcvr.result(), java_lang_Class::klass_offset_in_bytes(), T_ADDRESS), temp, info); +- __ cmp(lir_cond_notEqual, temp, LIR_OprFact::metadataConst(0)); +- __ cmove(lir_cond_notEqual, LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN); ++ __ cmp_cmove(lir_cond_notEqual, temp, LIR_OprFact::metadataConst(0), ++ LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN); + } + + +@@ -1598,8 +1602,8 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) { + + if (GenerateRangeChecks && needs_range_check) { + if (use_length) { +- __ cmp(lir_cond_belowEqual, length.result(), index.result()); +- __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result(), array.result())); ++ CodeStub* stub = new RangeCheckStub(range_check_info, index.result(), array.result()); ++ __ cmp_branch(lir_cond_belowEqual, length.result(), index.result(), T_INT, stub); + } else { + array_range_check(array.result(), index.result(), null_check_info, range_check_info); + // range_check also does the null check +@@ -1777,12 +1781,9 @@ void LIRGenerator::do_NIOCheckIndex(Intrinsic* x) { + CodeEmitInfo* info = state_for(x); + CodeStub* stub = new RangeCheckStub(info, index.result()); + if (index.result()->is_constant()) { +- cmp_mem_int(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), info); +- __ branch(lir_cond_belowEqual, T_INT, stub); ++ cmp_mem_int_branch(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), stub, info); + } else { +- cmp_reg_mem(lir_cond_aboveEqual, index.result(), buf.result(), +- java_nio_Buffer::limit_offset(), T_INT, info); +- __ branch(lir_cond_aboveEqual, T_INT, stub); ++ cmp_reg_mem_branch(lir_cond_aboveEqual, index.result(), buf.result(), java_nio_Buffer::limit_offset(), T_INT, stub, info); + } + __ move(index.result(), result); + } else { +@@ -1860,8 +1861,8 @@ void LIRGenerator::do_LoadIndexed(LoadIndexed* x) { + } else if (use_length) { + // TODO: use a (modified) version of array_range_check that does not require a + // constant length to be loaded to a register +- __ cmp(lir_cond_belowEqual, length.result(), index.result()); +- __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result(), array.result())); ++ CodeStub* stub = new RangeCheckStub(range_check_info, index.result(), array.result()); ++ __ cmp_branch(lir_cond_belowEqual, length.result(), index.result(), T_INT, stub); + } else { + array_range_check(array.result(), index.result(), null_check_info, range_check_info); + // The range check performs the null check, so clear it out for the load +@@ -2234,19 +2235,14 @@ void LIRGenerator::do_SwitchRanges(SwitchRangeArray* x, LIR_Opr value, BlockBegi + int high_key = one_range->high_key(); + BlockBegin* dest = one_range->sux(); + if (low_key == high_key) { +- __ cmp(lir_cond_equal, value, low_key); +- __ branch(lir_cond_equal, T_INT, dest); ++ __ cmp_branch(lir_cond_equal, value, low_key, T_INT, dest); + } else if (high_key - low_key == 1) { +- __ cmp(lir_cond_equal, value, low_key); +- __ branch(lir_cond_equal, T_INT, dest); +- __ cmp(lir_cond_equal, value, high_key); +- __ branch(lir_cond_equal, T_INT, dest); ++ __ cmp_branch(lir_cond_equal, value, low_key, T_INT, dest); ++ __ cmp_branch(lir_cond_equal, value, high_key, T_INT, dest); + } else { + LabelObj* L = new LabelObj(); +- __ cmp(lir_cond_less, value, low_key); +- __ branch(lir_cond_less, T_INT, L->label()); +- __ cmp(lir_cond_lessEqual, value, high_key); +- __ branch(lir_cond_lessEqual, T_INT, dest); ++ __ cmp_branch(lir_cond_less, value, low_key, T_INT, L->label()); ++ __ cmp_branch(lir_cond_lessEqual, value, high_key, T_INT, dest); + __ branch_destination(L->label()); + } + } +@@ -2346,12 +2342,11 @@ void LIRGenerator::do_TableSwitch(TableSwitch* x) { + __ move(LIR_OprFact::intptrConst(default_count_offset), data_offset_reg); + for (int i = 0; i < len; i++) { + int count_offset = md->byte_offset_of_slot(data, MultiBranchData::case_count_offset(i)); +- __ cmp(lir_cond_equal, value, i + lo_key); + __ move(data_offset_reg, tmp_reg); +- __ cmove(lir_cond_equal, +- LIR_OprFact::intptrConst(count_offset), +- tmp_reg, +- data_offset_reg, T_INT); ++ __ cmp_cmove(lir_cond_equal, value, LIR_OprFact::intConst(i + lo_key), ++ LIR_OprFact::intptrConst(count_offset), ++ tmp_reg, ++ data_offset_reg, T_INT); + } + + LIR_Opr data_reg = new_pointer_register(); +@@ -2365,8 +2360,7 @@ void LIRGenerator::do_TableSwitch(TableSwitch* x) { + do_SwitchRanges(create_lookup_ranges(x), value, x->default_sux()); + } else { + for (int i = 0; i < len; i++) { +- __ cmp(lir_cond_equal, value, i + lo_key); +- __ branch(lir_cond_equal, T_INT, x->sux_at(i)); ++ __ cmp_branch(lir_cond_equal, value, i + lo_key, T_INT, x->sux_at(i)); + } + __ jump(x->default_sux()); + } +@@ -2404,12 +2398,11 @@ void LIRGenerator::do_LookupSwitch(LookupSwitch* x) { + __ move(LIR_OprFact::intptrConst(default_count_offset), data_offset_reg); + for (int i = 0; i < len; i++) { + int count_offset = md->byte_offset_of_slot(data, MultiBranchData::case_count_offset(i)); +- __ cmp(lir_cond_equal, value, x->key_at(i)); + __ move(data_offset_reg, tmp_reg); +- __ cmove(lir_cond_equal, +- LIR_OprFact::intptrConst(count_offset), +- tmp_reg, +- data_offset_reg, T_INT); ++ __ cmp_cmove(lir_cond_equal, value, LIR_OprFact::intConst(x->key_at(i)), ++ LIR_OprFact::intptrConst(count_offset), ++ tmp_reg, ++ data_offset_reg, T_INT); + } + + LIR_Opr data_reg = new_pointer_register(); +@@ -2424,8 +2417,7 @@ void LIRGenerator::do_LookupSwitch(LookupSwitch* x) { + } else { + int len = x->length(); + for (int i = 0; i < len; i++) { +- __ cmp(lir_cond_equal, value, x->key_at(i)); +- __ branch(lir_cond_equal, T_INT, x->sux_at(i)); ++ __ cmp_branch(lir_cond_equal, value, x->key_at(i), T_INT, x->sux_at(i)); + } + __ jump(x->default_sux()); + } +@@ -2935,8 +2927,8 @@ void LIRGenerator::do_IfOp(IfOp* x) { + f_val.dont_load_item(); + LIR_Opr reg = rlock_result(x); + +- __ cmp(lir_cond(x->cond()), left.result(), right.result()); +- __ cmove(lir_cond(x->cond()), t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type())); ++ __ cmp_cmove(lir_cond(x->cond()), left.result(), right.result(), ++ t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type())); + } + + #ifdef JFR_HAVE_INTRINSICS +@@ -2980,8 +2972,7 @@ void LIRGenerator::do_getEventWriter(Intrinsic* x) { + __ move(LIR_OprFact::oopConst(NULL), result); + LIR_Opr jobj = new_register(T_METADATA); + __ move_wide(jobj_addr, jobj); +- __ cmp(lir_cond_equal, jobj, LIR_OprFact::metadataConst(0)); +- __ branch(lir_cond_equal, T_OBJECT, L_end->label()); ++ __ cmp_branch(lir_cond_equal, jobj, LIR_OprFact::metadataConst(0), T_OBJECT, L_end->label()); + + access_load(IN_NATIVE, T_OBJECT, LIR_OprFact::address(new LIR_Address(jobj, T_OBJECT)), result); + +@@ -3286,21 +3277,24 @@ void LIRGenerator::do_ProfileInvoke(ProfileInvoke* x) { + + void LIRGenerator::increment_backedge_counter_conditionally(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeEmitInfo* info, int left_bci, int right_bci, int bci) { + if (compilation()->count_backedges()) { ++ LIR_Opr step = new_register(T_INT); ++ LIR_Opr plus_one = LIR_OprFact::intConst(InvocationCounter::count_increment); ++ LIR_Opr zero = LIR_OprFact::intConst(0); + #if defined(X86) && !defined(_LP64) + // BEWARE! On 32-bit x86 cmp clobbers its left argument so we need a temp copy. + LIR_Opr left_copy = new_register(left->type()); + __ move(left, left_copy); + __ cmp(cond, left_copy, right); +-#else +- __ cmp(cond, left, right); +-#endif +- LIR_Opr step = new_register(T_INT); +- LIR_Opr plus_one = LIR_OprFact::intConst(InvocationCounter::count_increment); +- LIR_Opr zero = LIR_OprFact::intConst(0); + __ cmove(cond, + (left_bci < bci) ? plus_one : zero, + (right_bci < bci) ? plus_one : zero, + step, left->type()); ++#else ++ __ cmp_cmove(cond, left, right, ++ (left_bci < bci) ? plus_one : zero, ++ (right_bci < bci) ? plus_one : zero, ++ step, left->type()); ++#endif + increment_backedge_counter(info, step, bci); + } + } +@@ -3339,8 +3333,7 @@ void LIRGenerator::decrement_age(CodeEmitInfo* info) { + // DeoptimizeStub will reexecute from the current state in code info. + CodeStub* deopt = new DeoptimizeStub(info, Deoptimization::Reason_tenured, + Deoptimization::Action_make_not_entrant); +- __ cmp(lir_cond_lessEqual, result, LIR_OprFact::intConst(0)); +- __ branch(lir_cond_lessEqual, T_INT, deopt); ++ __ cmp_branch(lir_cond_lessEqual, result, LIR_OprFact::intConst(0), T_INT, deopt); + } + } + +@@ -3386,8 +3379,7 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, + int freq = frequency << InvocationCounter::count_shift; + if (freq == 0) { + if (!step->is_constant()) { +- __ cmp(lir_cond_notEqual, step, LIR_OprFact::intConst(0)); +- __ branch(lir_cond_notEqual, T_ILLEGAL, overflow); ++ __ cmp_branch(lir_cond_notEqual, step, LIR_OprFact::intConst(0), T_ILLEGAL, overflow); + } else { + __ branch(lir_cond_always, T_ILLEGAL, overflow); + } +@@ -3395,12 +3387,11 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, + LIR_Opr mask = load_immediate(freq, T_INT); + if (!step->is_constant()) { + // If step is 0, make sure the overflow check below always fails +- __ cmp(lir_cond_notEqual, step, LIR_OprFact::intConst(0)); +- __ cmove(lir_cond_notEqual, result, LIR_OprFact::intConst(InvocationCounter::count_increment), result, T_INT); ++ __ cmp_cmove(lir_cond_notEqual, step, LIR_OprFact::intConst(0), ++ result, LIR_OprFact::intConst(InvocationCounter::count_increment), result, T_INT); + } + __ logical_and(result, mask, result); +- __ cmp(lir_cond_equal, result, LIR_OprFact::intConst(0)); +- __ branch(lir_cond_equal, T_INT, overflow); ++ __ cmp_branch(lir_cond_equal, result, LIR_OprFact::intConst(0), T_INT, overflow); + } + __ branch_destination(overflow->continuation()); + } +@@ -3513,8 +3504,7 @@ void LIRGenerator::do_RangeCheckPredicate(RangeCheckPredicate *x) { + CodeEmitInfo *info = state_for(x, x->state()); + CodeStub* stub = new PredicateFailedStub(info); + +- __ cmp(lir_cond(cond), left, right); +- __ branch(lir_cond(cond), right->type(), stub); ++ __ cmp_branch(lir_cond(cond), left, right, right->type(), stub); + } + } + +@@ -3661,8 +3651,8 @@ LIR_Opr LIRGenerator::mask_boolean(LIR_Opr array, LIR_Opr value, CodeEmitInfo*& + __ move(new LIR_Address(klass, in_bytes(Klass::layout_helper_offset()), T_INT), layout); + int diffbit = Klass::layout_helper_boolean_diffbit(); + __ logical_and(layout, LIR_OprFact::intConst(diffbit), layout); +- __ cmp(lir_cond_notEqual, layout, LIR_OprFact::intConst(0)); +- __ cmove(lir_cond_notEqual, value_fixed, value, value_fixed, T_BYTE); ++ __ cmp_cmove(lir_cond_notEqual, layout, LIR_OprFact::intConst(0), ++ value_fixed, value, value_fixed, T_BYTE); + value = value_fixed; + return value; + } +diff --git a/src/hotspot/share/c1/c1_LIRGenerator.hpp b/src/hotspot/share/c1/c1_LIRGenerator.hpp +index 3ad325d759..f377b27859 100644 +--- a/src/hotspot/share/c1/c1_LIRGenerator.hpp ++++ b/src/hotspot/share/c1/c1_LIRGenerator.hpp +@@ -363,8 +363,10 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { + void new_instance (LIR_Opr dst, ciInstanceKlass* klass, bool is_unresolved, LIR_Opr scratch1, LIR_Opr scratch2, LIR_Opr scratch3, LIR_Opr scratch4, LIR_Opr klass_reg, CodeEmitInfo* info); + + // machine dependent +- void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info); +- void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info); ++ template ++ void cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info); ++ template ++ void cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info); + + void arraycopy_helper(Intrinsic* x, int* flags, ciArrayKlass** expected_type); + +@@ -391,7 +393,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { + + LIR_Opr safepoint_poll_register(); + +- void profile_branch(If* if_instr, If::Condition cond); ++ void profile_branch(If* if_instr, If::Condition cond, LIR_Opr left = LIR_OprFact::illegalOpr, LIR_Opr right = LIR_OprFact::illegalOpr); + void increment_event_counter_impl(CodeEmitInfo* info, + ciMethod *method, LIR_Opr step, int frequency, + int bci, bool backedge, bool notify); +diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp +index c28055fd99..4e7df88102 100644 +--- a/src/hotspot/share/c1/c1_LinearScan.cpp ++++ b/src/hotspot/share/c1/c1_LinearScan.cpp +@@ -35,6 +35,12 @@ + #include "runtime/timerTrace.hpp" + #include "utilities/bitMap.inline.hpp" + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef PRODUCT + + static LinearScanStatistic _stat_before_alloc; +@@ -1258,6 +1264,23 @@ void LinearScan::add_register_hints(LIR_Op* op) { + } + break; + } ++ case lir_cmp_cmove: { ++ assert(op->as_Op4() != NULL, "lir_cmp_cmove must be LIR_Op4"); ++ LIR_Op4* cmove = (LIR_Op4*)op; ++ ++ LIR_Opr move_from = cmove->in_opr3(); ++ LIR_Opr move_to = cmove->result_opr(); ++ ++ if (move_to->is_register() && move_from->is_register()) { ++ Interval* from = interval_at(reg_num(move_from)); ++ Interval* to = interval_at(reg_num(move_to)); ++ if (from != NULL && to != NULL) { ++ to->set_register_hint(from); ++ TRACE_LINEAR_SCAN(4, tty->print_cr("operation at op_id %d: added hint from interval %d to %d", cmove->id(), from->reg_num(), to->reg_num())); ++ } ++ } ++ break; ++ } + default: + break; + } +@@ -3342,7 +3365,9 @@ void LinearScan::verify_no_oops_in_fixed_intervals() { + check_live = (move->patch_code() == lir_patch_none); + } + LIR_OpBranch* branch = op->as_OpBranch(); +- if (branch != NULL && branch->stub() != NULL && branch->stub()->is_exception_throw_stub()) { ++ LIR_OpCmpBranch* cmp_branch = op->as_OpCmpBranch(); ++ if ((branch != NULL && branch->stub() != NULL && branch->stub()->is_exception_throw_stub()) || ++ (cmp_branch != NULL && cmp_branch->stub() != NULL && cmp_branch->stub()->is_exception_throw_stub())) { + // Don't bother checking the stub in this case since the + // exception stub will never return to normal control flow. + check_live = false; +@@ -6192,6 +6217,16 @@ void ControlFlowOptimizer::substitute_branch_target(BlockBegin* block, BlockBegi + assert(op->as_OpBranch() != NULL, "branch must be of type LIR_OpBranch"); + LIR_OpBranch* branch = (LIR_OpBranch*)op; + ++ if (branch->block() == target_from) { ++ branch->change_block(target_to); ++ } ++ if (branch->ublock() == target_from) { ++ branch->change_ublock(target_to); ++ } ++ } else if (op->code() == lir_cmp_branch || op->code() == lir_cmp_float_branch) { ++ assert(op->as_OpCmpBranch() != NULL, "branch must be of type LIR_OpCmpBranch"); ++ LIR_OpCmpBranch* branch = (LIR_OpCmpBranch*)op; ++ + if (branch->block() == target_from) { + branch->change_block(target_to); + } +@@ -6320,6 +6355,20 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) { + } + } + } ++ } else if (prev_op->code() == lir_cmp_branch || prev_op->code() == lir_cmp_float_branch) { ++ assert(prev_op->as_OpCmpBranch() != NULL, "branch must be of type LIR_OpCmpBranch"); ++ LIR_OpCmpBranch* prev_branch = (LIR_OpCmpBranch*)prev_op; ++ ++ if (prev_branch->stub() == NULL) { ++ if (prev_branch->block() == code->at(i + 1) && prev_branch->info() == NULL) { ++ TRACE_LINEAR_SCAN(3, tty->print_cr("Negating conditional branch and deleting unconditional branch at end of block B%d", block->block_id())); ++ ++ // eliminate a conditional branch to the immediate successor ++ prev_branch->change_block(last_branch->block()); ++ prev_branch->negate_cond(); ++ instructions->trunc_to(instructions->length() - 1); ++ } ++ } + } + } + } +@@ -6395,6 +6444,13 @@ void ControlFlowOptimizer::verify(BlockList* code) { + assert(op_branch->block() == NULL || code->find(op_branch->block()) != -1, "branch target not valid"); + assert(op_branch->ublock() == NULL || code->find(op_branch->ublock()) != -1, "branch target not valid"); + } ++ ++ LIR_OpCmpBranch* op_cmp_branch = instructions->at(j)->as_OpCmpBranch(); ++ ++ if (op_cmp_branch != NULL) { ++ assert(op_cmp_branch->block() == NULL || code->find(op_cmp_branch->block()) != -1, "branch target not valid"); ++ assert(op_cmp_branch->ublock() == NULL || code->find(op_cmp_branch->ublock()) != -1, "branch target not valid"); ++ } + } + + for (j = 0; j < block->number_of_sux() - 1; j++) { +@@ -6639,6 +6695,24 @@ void LinearScanStatistic::collect(LinearScan* allocator) { + break; + } + ++ case lir_cmp_branch: ++ case lir_cmp_float_branch: { ++ LIR_OpCmpBranch* branch = op->as_OpCmpBranch(); ++ if (branch->block() == NULL) { ++ inc_counter(counter_stub_branch); ++ } else { ++ inc_counter(counter_cond_branch); ++ } ++ inc_counter(counter_cmp); ++ break; ++ } ++ ++ case lir_cmp_cmove: { ++ inc_counter(counter_misc_inst); ++ inc_counter(counter_cmp); ++ break; ++ } ++ + case lir_neg: + case lir_add: + case lir_sub: +diff --git a/src/hotspot/share/code/nmethod.cpp b/src/hotspot/share/code/nmethod.cpp +index 747971af41..093831ac09 100644 +--- a/src/hotspot/share/code/nmethod.cpp ++++ b/src/hotspot/share/code/nmethod.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "jvm.h" + #include "code/codeCache.hpp" +@@ -2155,7 +2161,8 @@ void nmethod::verify_scopes() { + //verify_interrupt_point(iter.addr()); + break; + case relocInfo::runtime_call_type: +- case relocInfo::runtime_call_w_cp_type: { ++ NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:) ++ { + address destination = iter.reloc()->value(); + // Right now there is no way to find out which entries support + // an interrupt point. It would be nice if we had this +@@ -2392,7 +2399,8 @@ const char* nmethod::reloc_string_for(u_char* begin, u_char* end) { + return st.as_string(); + } + case relocInfo::runtime_call_type: +- case relocInfo::runtime_call_w_cp_type: { ++ NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:) ++ { + stringStream st; + st.print("runtime_call"); + CallRelocation* r = (CallRelocation*)iter.reloc(); +diff --git a/src/hotspot/share/code/relocInfo.cpp b/src/hotspot/share/code/relocInfo.cpp +index a20de8dde6..c6f49cf7d6 100644 +--- a/src/hotspot/share/code/relocInfo.cpp ++++ b/src/hotspot/share/code/relocInfo.cpp +@@ -433,6 +433,7 @@ void virtual_call_Relocation::unpack_data() { + _cached_value = x0==0? NULL: address_from_scaled_offset(x0, point); + } + ++#ifndef MIPS64 + void runtime_call_w_cp_Relocation::pack_data_to(CodeSection * dest) { + short* p = pack_1_int_to((short *)dest->locs_end(), (jint)(_offset >> 2)); + dest->set_locs_end((relocInfo*) p); +@@ -441,6 +442,7 @@ void runtime_call_w_cp_Relocation::pack_data_to(CodeSection * dest) { + void runtime_call_w_cp_Relocation::unpack_data() { + _offset = unpack_1_int() << 2; + } ++#endif + + void static_stub_Relocation::pack_data_to(CodeSection* dest) { + short* p = (short*) dest->locs_end(); +@@ -910,7 +912,7 @@ void RelocIterator::print_current() { + break; + } + case relocInfo::runtime_call_type: +- case relocInfo::runtime_call_w_cp_type: ++ NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:) + { + CallRelocation* r = (CallRelocation*) reloc(); + tty->print(" | [destination=" INTPTR_FORMAT "]", p2i(r->destination())); +diff --git a/src/hotspot/share/code/relocInfo.hpp b/src/hotspot/share/code/relocInfo.hpp +index 57931a1a6a..fb56fd3ab1 100644 +--- a/src/hotspot/share/code/relocInfo.hpp ++++ b/src/hotspot/share/code/relocInfo.hpp +@@ -269,7 +269,11 @@ class relocInfo { + poll_return_type = 11, // polling instruction for safepoints at return + metadata_type = 12, // metadata that used to be oops + trampoline_stub_type = 13, // stub-entry for trampoline ++#ifndef MIPS64 + runtime_call_w_cp_type = 14, // Runtime call which may load its target from the constant pool ++#else ++ internal_pc_type = 14, // tag for internal data ++#endif + data_prefix_tag = 15, // tag for a prefix (carries data arguments) + type_mask = 15 // A mask which selects only the above values + }; +@@ -304,13 +308,13 @@ class relocInfo { + visitor(static_call) \ + visitor(static_stub) \ + visitor(runtime_call) \ +- visitor(runtime_call_w_cp) \ ++ NOT_MIPS64(visitor(runtime_call_w_cp)) \ + visitor(external_word) \ + visitor(internal_word) \ + visitor(poll) \ + visitor(poll_return) \ +- visitor(section_word) \ + visitor(trampoline_stub) \ ++ NOT_MIPS64(visitor(section_word))MIPS64_ONLY(ZERO_ONLY(visitor(section_word))NOT_ZERO(visitor(internal_pc))) + + + public: +@@ -1174,6 +1178,15 @@ class runtime_call_Relocation : public CallRelocation { + }; + + ++#ifdef MIPS64 ++// to handle the set_last_java_frame pc ++class internal_pc_Relocation : public Relocation { ++ relocInfo::relocType type() { return relocInfo::internal_pc_type; } ++ public: ++ address pc() { return pd_get_address_from_code(); } ++ void fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest); ++}; ++#else + class runtime_call_w_cp_Relocation : public CallRelocation { + relocInfo::relocType type() { return relocInfo::runtime_call_w_cp_type; } + +@@ -1202,6 +1215,7 @@ class runtime_call_w_cp_Relocation : public CallRelocation { + void pack_data_to(CodeSection * dest); + void unpack_data(); + }; ++#endif + + // Trampoline Relocations. + // A trampoline allows to encode a small branch in the code, even if there +diff --git a/src/hotspot/share/code/vtableStubs.cpp b/src/hotspot/share/code/vtableStubs.cpp +index 7892cc85b3..3637aefe10 100644 +--- a/src/hotspot/share/code/vtableStubs.cpp ++++ b/src/hotspot/share/code/vtableStubs.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "code/vtableStubs.hpp" + #include "compiler/compileBroker.hpp" +@@ -98,7 +104,11 @@ int VtableStubs::_itab_stub_size = 0; + + #if defined(PRODUCT) + // These values are good for the PRODUCT case (no tracing). ++#if defined MIPS64 || defined LOONGARCH64 ++ static const int first_vtableStub_size = 128; ++#else + static const int first_vtableStub_size = 64; ++#endif + static const int first_itableStub_size = 256; + #else + // These values are good for the non-PRODUCT case (when tracing can be switched on). +@@ -109,6 +119,7 @@ int VtableStubs::_itab_stub_size = 0; + // vtable itable + // aarch64: 460 324 + // arm: ? ? ++ // mips64: 728 328 + // ppc (linux, BE): 404 288 + // ppc (linux, LE): 356 276 + // ppc (AIX): 416 296 +diff --git a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp +index 4289e5e5c4..9502463bd5 100644 +--- a/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp ++++ b/src/hotspot/share/gc/g1/c1/g1BarrierSetC1.cpp +@@ -74,7 +74,6 @@ void G1BarrierSetC1::pre_barrier(LIRAccess& access, LIR_Opr addr_opr, + // Read the marking-in-progress flag. + LIR_Opr flag_val = gen->new_register(T_INT); + __ load(mark_active_flag_addr, flag_val); +- __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0)); + + LIR_PatchCode pre_val_patch_code = lir_patch_none; + +@@ -103,7 +102,7 @@ void G1BarrierSetC1::pre_barrier(LIRAccess& access, LIR_Opr addr_opr, + slow = new G1PreBarrierStub(pre_val); + } + +- __ branch(lir_cond_notEqual, T_INT, slow); ++ __ cmp_branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow); + __ branch_destination(slow->continuation()); + } + +@@ -168,10 +167,9 @@ void G1BarrierSetC1::post_barrier(LIRAccess& access, LIR_OprDesc* addr, LIR_OprD + } + assert(new_val->is_register(), "must be a register at this point"); + +- __ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD)); +- + CodeStub* slow = new G1PostBarrierStub(addr, new_val); +- __ branch(lir_cond_notEqual, LP64_ONLY(T_LONG) NOT_LP64(T_INT), slow); ++ __ cmp_branch(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD), ++ LP64_ONLY(T_LONG) NOT_LP64(T_INT), slow); + __ branch_destination(slow->continuation()); + } + +diff --git a/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp b/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp +index 98a2fe7f1c..b43a441066 100644 +--- a/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp ++++ b/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_GC_G1_G1MARKSTACK_INLINE_HPP + #define SHARE_VM_GC_G1_G1MARKSTACK_INLINE_HPP + +@@ -71,6 +77,7 @@ template inline void G1FullGCMarker::mark_and_push(T* p) { + _oop_stack.push(obj); + assert(_bitmap->is_marked(obj), "Must be marked now - map self"); + } else { ++ DEBUG_ONLY(OrderAccess::loadload()); + assert(_bitmap->is_marked(obj) || G1ArchiveAllocator::is_closed_archive_object(obj), + "Must be marked by other or closed archive object"); + } +diff --git a/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp b/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp +index 1ef900783d..b30456429d 100644 +--- a/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp ++++ b/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp +@@ -51,8 +51,9 @@ template + inline void PSPromotionManager::claim_or_forward_internal_depth(T* p) { + if (p != NULL) { // XXX: error if p != NULL here + oop o = RawAccess::oop_load(p); +- if (o->is_forwarded()) { +- o = o->forwardee(); ++ markOop m = o->mark_raw(); ++ if (m->is_marked()) { ++ o = (oop) m->decode_pointer(); + // Card mark + if (PSScavenge::is_obj_in_young(o)) { + PSScavenge::card_table()->inline_write_ref_field_gc(p, o); +@@ -282,13 +283,17 @@ inline void PSPromotionManager::copy_and_push_safe_barrier(T* p) { + assert(should_scavenge(p, true), "revisiting object?"); + + oop o = RawAccess::oop_load(p); +- oop new_obj = o->is_forwarded() +- ? o->forwardee() +- : copy_to_survivor_space(o); ++ oop new_obj; ++ markOop m = o->mark_raw(); ++ if (m->is_marked()) { ++ new_obj = (oop) m->decode_pointer(); ++ } else { ++ new_obj = copy_to_survivor_space(o); ++ } + + // This code must come after the CAS test, or it will print incorrect + // information. +- if (log_develop_is_enabled(Trace, gc, scavenge) && o->is_forwarded()) { ++ if (log_develop_is_enabled(Trace, gc, scavenge) && m->is_marked()) { + log_develop_trace(gc, scavenge)("{%s %s " PTR_FORMAT " -> " PTR_FORMAT " (%d)}", + "forwarding", + new_obj->klass()->internal_name(), p2i((void *)o), p2i((void *)new_obj), new_obj->size()); +diff --git a/src/hotspot/share/gc/parallel/psScavenge.inline.hpp b/src/hotspot/share/gc/parallel/psScavenge.inline.hpp +index 0c58fd4b3f..415990ff5f 100644 +--- a/src/hotspot/share/gc/parallel/psScavenge.inline.hpp ++++ b/src/hotspot/share/gc/parallel/psScavenge.inline.hpp +@@ -104,8 +104,9 @@ class PSScavengeFromCLDClosure: public OopClosure { + + oop o = *p; + oop new_obj; +- if (o->is_forwarded()) { +- new_obj = o->forwardee(); ++ markOop m = o->mark_raw(); ++ if (m->is_marked()) { ++ new_obj = (oop) m->decode_pointer(); + } else { + new_obj = _pm->copy_to_survivor_space(o); + } +diff --git a/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp b/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp +index 5241322a91..0ddabb4dae 100644 +--- a/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp ++++ b/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp +@@ -192,8 +192,7 @@ void BarrierSetC1::load_at_resolved(LIRAccess& access, LIR_Opr result) { + /* Normalize boolean value returned by unsafe operation, i.e., value != 0 ? value = true : value false. */ + if (mask_boolean) { + LabelObj* equalZeroLabel = new LabelObj(); +- __ cmp(lir_cond_equal, result, 0); +- __ branch(lir_cond_equal, T_BOOLEAN, equalZeroLabel->label()); ++ __ cmp_branch(lir_cond_equal, result, 0, T_BOOLEAN, equalZeroLabel->label()); + __ move(LIR_OprFact::intConst(1), result); + __ branch_destination(equalZeroLabel->label()); + } +@@ -320,14 +319,12 @@ void BarrierSetC1::generate_referent_check(LIRAccess& access, LabelObj* cont) { + referent_off = gen->new_register(T_LONG); + __ move(LIR_OprFact::longConst(java_lang_ref_Reference::referent_offset), referent_off); + } +- __ cmp(lir_cond_notEqual, offset, referent_off); +- __ branch(lir_cond_notEqual, offset->type(), cont->label()); ++ __ cmp_branch(lir_cond_notEqual, offset, referent_off, offset->type(), cont->label()); + } + if (gen_source_check) { + // offset is a const and equals referent offset + // if (source == null) -> continue +- __ cmp(lir_cond_equal, base_reg, LIR_OprFact::oopConst(NULL)); +- __ branch(lir_cond_equal, T_OBJECT, cont->label()); ++ __ cmp_branch(lir_cond_equal, base_reg, LIR_OprFact::oopConst(NULL), T_OBJECT, cont->label()); + } + LIR_Opr src_klass = gen->new_register(T_METADATA); + if (gen_type_check) { +@@ -337,8 +334,7 @@ void BarrierSetC1::generate_referent_check(LIRAccess& access, LabelObj* cont) { + LIR_Address* reference_type_addr = new LIR_Address(src_klass, in_bytes(InstanceKlass::reference_type_offset()), T_BYTE); + LIR_Opr reference_type = gen->new_register(T_INT); + __ move(reference_type_addr, reference_type); +- __ cmp(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE)); +- __ branch(lir_cond_equal, T_INT, cont->label()); ++ __ cmp_branch(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE), T_INT, cont->label()); + } + } + } +diff --git a/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp b/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp +index 84815adea8..57e29f1295 100644 +--- a/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp ++++ b/src/hotspot/share/gc/shared/c1/cardTableBarrierSetC1.cpp +@@ -89,8 +89,7 @@ void CardTableBarrierSetC1::post_barrier(LIRAccess& access, LIR_OprDesc* addr, L + __ move(card_addr, cur_value); + + LabelObj* L_already_dirty = new LabelObj(); +- __ cmp(lir_cond_equal, cur_value, dirty); +- __ branch(lir_cond_equal, T_BYTE, L_already_dirty->label()); ++ __ cmp_branch(lir_cond_equal, cur_value, dirty, T_BYTE, L_already_dirty->label()); + __ move(dirty, card_addr); + __ branch_destination(L_already_dirty->label()); + } else { +diff --git a/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp b/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp +index f51d186484..506f0301fe 100644 +--- a/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp ++++ b/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp +@@ -73,7 +73,6 @@ void ShenandoahBarrierSetC1::pre_barrier(LIRGenerator* gen, CodeEmitInfo* info, + // Read the marking-in-progress flag. + LIR_Opr flag_val = gen->new_register(T_INT); + __ load(mark_active_flag_addr, flag_val); +- __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0)); + + LIR_PatchCode pre_val_patch_code = lir_patch_none; + +@@ -101,7 +100,7 @@ void ShenandoahBarrierSetC1::pre_barrier(LIRGenerator* gen, CodeEmitInfo* info, + slow = new ShenandoahPreBarrierStub(pre_val); + } + +- __ branch(lir_cond_notEqual, T_INT, slow); ++ __ cmp_branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow); + __ branch_destination(slow->continuation()); + } + +@@ -144,10 +143,9 @@ LIR_Opr ShenandoahBarrierSetC1::load_reference_barrier_impl(LIRGenerator* gen, L + __ logical_and(flag_val, mask_reg, masked_flag); + flag_val = masked_flag; + } +- __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0)); + + CodeStub* slow = new ShenandoahLoadReferenceBarrierStub(obj, addr, result, tmp1, tmp2); +- __ branch(lir_cond_notEqual, T_INT, slow); ++ __ cmp_branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow); + __ branch_destination(slow->continuation()); + + return result; +diff --git a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp +index 9f8ce74243..3c1862d826 100644 +--- a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp ++++ b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp +@@ -105,15 +105,20 @@ public: + + virtual void visit(LIR_OpVisitState* state) { + state->do_input(_opr); ++ if (_result->is_valid()) { ++ state->do_temp(_opr); ++ state->do_output(_result); ++ } + } + + virtual void emit_code(LIR_Assembler* ce) { +- ZBarrierSet::assembler()->generate_c1_load_barrier_test(ce, _opr); ++ ZBarrierSet::assembler()->generate_c1_load_barrier_test(ce, _opr, result_opr()); + } + + virtual void print_instr(outputStream* out) const { + _opr->print(out); + out->print(" "); ++ result_opr()->print(out); + } + + #ifndef PRODUCT +@@ -149,13 +154,21 @@ address ZBarrierSetC1::load_barrier_on_oop_field_preloaded_runtime_stub(Decorato + #endif + + void ZBarrierSetC1::load_barrier(LIRAccess& access, LIR_Opr result) const { ++ LIR_Op* op = new LIR_OpZLoadBarrierTest(result); ++ + // Fast path +- __ append(new LIR_OpZLoadBarrierTest(result)); ++ __ append(op); + + // Slow path + const address runtime_stub = load_barrier_on_oop_field_preloaded_runtime_stub(access.decorators()); + CodeStub* const stub = new ZLoadBarrierStubC1(access, result, runtime_stub); +- __ branch(lir_cond_notEqual, T_ADDRESS, stub); ++ if (ZPlatformLoadBarrierTestResultInRegister) { ++ LIR_Opr res = access.gen()->new_register(result->type()); ++ op->set_result_opr(res); ++ __ cmp_branch(lir_cond_notEqual, res, LIR_OprFact::intptrConst(NULL_WORD), T_ADDRESS, stub); ++ } else { ++ __ branch(lir_cond_notEqual, T_ADDRESS, stub); ++ } + __ branch_destination(stub->continuation()); + } + +diff --git a/src/hotspot/share/interpreter/interpreterRuntime.cpp b/src/hotspot/share/interpreter/interpreterRuntime.cpp +index 24e4c98175..a6b310290d 100644 +--- a/src/hotspot/share/interpreter/interpreterRuntime.cpp ++++ b/src/hotspot/share/interpreter/interpreterRuntime.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "classfile/javaClasses.inline.hpp" + #include "classfile/systemDictionary.hpp" +@@ -1506,7 +1512,7 @@ IRT_ENTRY(void, InterpreterRuntime::prepare_native_call(JavaThread* thread, Meth + // preparing the same method will be sure to see non-null entry & mirror. + IRT_END + +-#if defined(IA32) || defined(AMD64) || defined(ARM) ++#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(MIPS64) || defined(LOONGARCH64) + IRT_LEAF(void, InterpreterRuntime::popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address)) + if (src_address == dest_address) { + return; +diff --git a/src/hotspot/share/interpreter/interpreterRuntime.hpp b/src/hotspot/share/interpreter/interpreterRuntime.hpp +index 87e84c893f..3043fa634b 100644 +--- a/src/hotspot/share/interpreter/interpreterRuntime.hpp ++++ b/src/hotspot/share/interpreter/interpreterRuntime.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_INTERPRETER_INTERPRETERRUNTIME_HPP + #define SHARE_VM_INTERPRETER_INTERPRETERRUNTIME_HPP + +@@ -146,7 +152,7 @@ class InterpreterRuntime: AllStatic { + Method* method, + intptr_t* from, intptr_t* to); + +-#if defined(IA32) || defined(AMD64) || defined(ARM) ++#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(MIPS64) || defined(LOONGARCH64) + // Popframe support (only needed on x86, AMD64 and ARM) + static void popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address); + #endif +diff --git a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp +index 965f6b0d10..07942993cd 100644 +--- a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp ++++ b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP + #define SHARE_VM_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP + +@@ -114,9 +120,9 @@ class TemplateInterpreterGenerator: public AbstractInterpreterGenerator { + void restore_native_result(void); + #endif // SPARC + +-#ifdef AARCH64 ++#if defined(AARCH64) || defined(MIPS64) || defined(LOONGARCH64) + void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs); +-#endif // AARCH64 ++#endif // AARCH64 || MIPS64 || LOONGARCH64 + + #ifdef PPC + void lock_method(Register Rflags, Register Rscratch1, Register Rscratch2, bool flags_preloaded=false); +diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp +index e01a242a57..0661f3b9d1 100644 +--- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp ++++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_JFR_UTILITIES_JFRBIGENDIAN_HPP + #define SHARE_VM_JFR_UTILITIES_JFRBIGENDIAN_HPP + +@@ -102,7 +108,7 @@ inline T JfrBigEndian::read_unaligned(const address location) { + inline bool JfrBigEndian::platform_supports_unaligned_reads(void) { + #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390) + return true; +-#elif defined(SPARC) || defined(ARM) || defined(AARCH64) ++#elif defined(SPARC) || defined(ARM) || defined(AARCH64) || defined(MIPS) || defined(LOONGARCH) + return false; + #else + #warning "Unconfigured platform" +diff --git a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp +index 8927063330..b5bb5c2887 100644 +--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp ++++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "code/codeBlob.hpp" + #include "compiler/abstractCompiler.hpp" +@@ -715,6 +721,35 @@ + #endif + + ++#ifdef LOONGARCH64 ++ ++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) ++ ++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ declare_constant(VM_Version::CPU_LA32) \ ++ declare_constant(VM_Version::CPU_LA64) \ ++ declare_constant(VM_Version::CPU_LLEXC) \ ++ declare_constant(VM_Version::CPU_SCDLY) \ ++ declare_constant(VM_Version::CPU_LLDBAR) \ ++ declare_constant(VM_Version::CPU_LBT_X86) \ ++ declare_constant(VM_Version::CPU_LBT_ARM) \ ++ declare_constant(VM_Version::CPU_LBT_MIPS) \ ++ declare_constant(VM_Version::CPU_CCDMA) \ ++ declare_constant(VM_Version::CPU_COMPLEX) \ ++ declare_constant(VM_Version::CPU_FP) \ ++ declare_constant(VM_Version::CPU_CRYPTO) \ ++ declare_constant(VM_Version::CPU_LSX) \ ++ declare_constant(VM_Version::CPU_LASX) \ ++ declare_constant(VM_Version::CPU_LAM) \ ++ declare_constant(VM_Version::CPU_LLSYNC) \ ++ declare_constant(VM_Version::CPU_TGTSYNC) \ ++ declare_constant(VM_Version::CPU_ULSYNC) \ ++ declare_constant(VM_Version::CPU_UAL) ++ ++#endif ++ ++ + #ifdef X86 + + #define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ +diff --git a/src/hotspot/share/memory/metaspace.cpp b/src/hotspot/share/memory/metaspace.cpp +index 80958b0469..08d13a4189 100644 +--- a/src/hotspot/share/memory/metaspace.cpp ++++ b/src/hotspot/share/memory/metaspace.cpp +@@ -1082,12 +1082,12 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a + // Don't use large pages for the class space. + bool large_pages = false; + +-#if !(defined(AARCH64) || defined(PPC64)) ++#if !(defined(AARCH64) || defined(PPC64) || defined(MIPS64) || defined(LOONGARCH64)) + ReservedSpace metaspace_rs = ReservedSpace(compressed_class_space_size(), + _reserve_alignment, + large_pages, + requested_addr); +-#else // AARCH64 || PPC64 ++#else // AARCH64 || PPC64 || MIPS64 || LOONGARCH64 + + ReservedSpace metaspace_rs; + +@@ -1113,7 +1113,8 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a + // below 32g to get a zerobased CCS. For simplicity we reuse the search + // strategy for AARCH64. + +- size_t increment = AARCH64_ONLY(4*)G; ++ // MIPS: Cannot mmap for 1G space at 4G position, and prepare for future optimization. ++ size_t increment = AARCH64_ONLY(4*)MIPS64_ONLY(4*)LOONGARCH64_ONLY(4*)G; + for (char *a = align_up(requested_addr, increment); + a < (char*)(1024*G); + a += increment) { +@@ -1144,7 +1145,7 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a + } + } + +-#endif // AARCH64 || PPC64 ++#endif // AARCH64 || PPC64 || MIPS64 || LOONGARCH64 + + if (!metaspace_rs.is_reserved()) { + #if INCLUDE_CDS +diff --git a/src/hotspot/share/oops/oop.inline.hpp b/src/hotspot/share/oops/oop.inline.hpp +index 6c631f5458..9865106720 100644 +--- a/src/hotspot/share/oops/oop.inline.hpp ++++ b/src/hotspot/share/oops/oop.inline.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_OOPS_OOP_INLINE_HPP + #define SHARE_VM_OOPS_OOP_INLINE_HPP + +@@ -389,7 +395,7 @@ oop oopDesc::forward_to_atomic(oop p, atomic_memory_order order) { + // forwarding pointer. + oldMark = curMark; + } +- return forwardee(); ++ return (oop)oldMark->decode_pointer(); + } + + // Note that the forwardee is not the same thing as the displaced_mark. +diff --git a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp +index 569fbc6d69..c1f1b82ffa 100644 +--- a/src/hotspot/share/opto/compile.hpp ++++ b/src/hotspot/share/opto/compile.hpp +@@ -1204,7 +1204,7 @@ class Compile : public Phase { + bool in_scratch_emit_size() const { return _in_scratch_emit_size; } + + enum ScratchBufferBlob { +-#if defined(PPC64) ++#if defined(PPC64) || defined(MIPS64) || defined(LOONGARCH64) + MAX_inst_size = 2048, + #else + MAX_inst_size = 1024, +diff --git a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp +index b6540e06a3..52d1fc9fb9 100644 +--- a/src/hotspot/share/opto/output.cpp ++++ b/src/hotspot/share/opto/output.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "asm/assembler.inline.hpp" + #include "asm/macroAssembler.inline.hpp" +@@ -731,6 +737,27 @@ void Compile::Process_OopMap_Node(MachNode *mach, int current_offset) { + // Add the safepoint in the DebugInfoRecorder + if( !mach->is_MachCall() ) { + mcall = NULL; ++#if defined(MIPS) || defined(LOONGARCH) ++ // safepoint_pc_offset should point to tha last instruction in safePoint. ++ // In X86 and sparc, their safePoints only contain one instruction. ++ // However, we should add current_offset with the size of safePoint in MIPS. ++ // 0x2d6ff22c: lw s2, 0x14(s2) ++ // last_pd->pc_offset()=308, pc_offset=304, bci=64 ++ // last_pd->pc_offset()=312, pc_offset=312, bci=64 ++ // src/hotspot/share/code/debugInfoRec.cpp:295, assert(last_pd->pc_offset() == pc_offset, "must be last pc") ++ // ++ // ;; Safepoint: ++ // ---> pc_offset=304 ++ // 0x2d6ff230: lui at, 0x2b7a ; OopMap{s2=Oop s5=Oop t4=Oop off=308} ++ // ;*goto ++ // ; - java.util.Hashtable::get@64 (line 353) ++ // ---> last_pd(308) ++ // 0x2d6ff234: lw at, 0xffffc100(at) ;*goto ++ // ; - java.util.Hashtable::get@64 (line 353) ++ // ; {poll} ++ // 0x2d6ff238: addiu s0, zero, 0x0 ++ safepoint_pc_offset += sfn->size(_regalloc) - 4; ++#endif + debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map); + } else { + mcall = mach->as_MachCall(); +@@ -1393,6 +1420,22 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { + DEBUG_ONLY(uint instr_offset = cb->insts_size()); + n->emit(*cb, _regalloc); + current_offset = cb->insts_size(); ++#if defined(MIPS) || defined(LOONGARCH) ++ if (!n->is_Proj() && (cb->insts()->end() != badAddress)) { ++ // For MIPS, the first instruction of the previous node (usually a instruction sequence) sometime ++ // is not the instruction which access memory. adjust is needed. previous_offset points to the ++ // instruction which access memory. Instruction size is 4. cb->insts_size() and ++ // cb->insts()->end() are the location of current instruction. ++ int adjust = 4; ++ NativeInstruction* inst = (NativeInstruction*) (cb->insts()->end() - 4); ++ if (inst->is_sync()) { ++ // a sync may be the last instruction, see store_B_immI_enc_sync ++ adjust += 4; ++ inst = (NativeInstruction*) (cb->insts()->end() - 8); ++ } ++ previous_offset = current_offset - adjust; ++ } ++#endif + + // Above we only verified that there is enough space in the instruction section. + // However, the instruction may emit stubs that cause code buffer expansion. +diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp +index 7d767c47c9..23ec34e5e2 100644 +--- a/src/hotspot/share/opto/type.cpp ++++ b/src/hotspot/share/opto/type.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "ci/ciMethodData.hpp" + #include "ci/ciTypeFlow.hpp" +@@ -78,6 +84,12 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = { + { Bad, T_ILLEGAL, "vectorx:", false, 0, relocInfo::none }, // VectorX + { Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY + { Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ ++#elif defined(LOONGARCH64) ++ { Bad, T_ILLEGAL, "vectors:", false, 0, relocInfo::none }, // VectorS ++ { Bad, T_ILLEGAL, "vectord:", false, 0, relocInfo::none }, // VectorD ++ { Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX ++ { Bad, T_ILLEGAL, "vectory:", false, Op_VecY, relocInfo::none }, // VectorY ++ { Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ + #else // all other + { Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS + { Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD +diff --git a/src/hotspot/share/runtime/java.cpp b/src/hotspot/share/runtime/java.cpp +index 84123b29ec..77fbacf2d8 100644 +--- a/src/hotspot/share/runtime/java.cpp ++++ b/src/hotspot/share/runtime/java.cpp +@@ -68,6 +68,7 @@ + #include "runtime/thread.inline.hpp" + #include "runtime/timer.hpp" + #include "runtime/vmOperations.hpp" ++#include "runtime/vmThread.hpp" + #include "services/memTracker.hpp" + #include "utilities/dtrace.hpp" + #include "utilities/globalDefinitions.hpp" +diff --git a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp +index e0f4a2af1f..09cc4b1ba5 100644 +--- a/src/hotspot/share/runtime/os.cpp ++++ b/src/hotspot/share/runtime/os.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "jvm.h" + #include "classfile/classLoader.hpp" +@@ -1242,7 +1248,8 @@ bool os::is_first_C_frame(frame* fr) { + if ((uintptr_t)fr->sender_sp() == (uintptr_t)-1 || is_pointer_bad(fr->sender_sp())) return true; + + uintptr_t old_fp = (uintptr_t)fr->link_or_null(); +- if (old_fp == 0 || old_fp == (uintptr_t)-1 || old_fp == ufp || ++ // The check for old_fp and ufp is harmful on LoongArch and MIPS due to their special ABIs. ++ if (old_fp == 0 || old_fp == (uintptr_t)-1 NOT_LOONGARCH64_AND_MIPS64(|| old_fp == ufp) || + is_pointer_bad(fr->link_or_null())) return true; + + // stack grows downwards; if old_fp is below current fp or if the stack +diff --git a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp +index e086f794cd..f480195775 100644 +--- a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp ++++ b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp +@@ -22,6 +22,13 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2015, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ ++ + #include "precompiled.hpp" + #include "jni.h" + #include "runtime/interfaceSupport.inline.hpp" +@@ -512,6 +519,14 @@ static int __ieee754_rem_pio2(double x, double *y) { + * sin(x) = x + (S1*x + (x *(r-y/2)+y)) + */ + ++#if defined(MIPS)|| defined(LOONGARCH) ++#undef S1 ++#undef S2 ++#undef S3 ++#undef S4 ++#undef S5 ++#undef S6 ++#endif + static const double + S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ + S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ +diff --git a/src/hotspot/share/utilities/globalDefinitions.hpp b/src/hotspot/share/utilities/globalDefinitions.hpp +index c758fc5743..a8c4638f6a 100644 +--- a/src/hotspot/share/utilities/globalDefinitions.hpp ++++ b/src/hotspot/share/utilities/globalDefinitions.hpp +@@ -1161,6 +1161,15 @@ inline int exact_log2_long(jlong x) { + return log2_long(x); + } + ++#if defined(MIPS64) || defined(LOONGARCH64) ++// returns integer round-up to the nearest multiple of s (s must be a power of two) ++inline intptr_t round_to(intptr_t x, uintx s) { ++ assert(is_power_of_2(s), "s must be a power of 2: " JLONG_FORMAT, x); ++ const uintx m = s - 1; ++ return mask_bits(x + m, ~m); ++} ++#endif ++ + inline bool is_odd (intx x) { return x & 1; } + inline bool is_even(intx x) { return !is_odd(x); } + +diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp +index cf80253868..f611daf36d 100644 +--- a/src/hotspot/share/utilities/macros.hpp ++++ b/src/hotspot/share/utilities/macros.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_UTILITIES_MACROS_HPP + #define SHARE_VM_UTILITIES_MACROS_HPP + +@@ -531,6 +537,38 @@ + #define NOT_SPARC(code) code + #endif + ++#ifdef MIPS64 ++#ifndef MIPS ++#define MIPS ++#endif ++#define MIPS64_ONLY(code) code ++#define NOT_MIPS64(code) ++#else ++#undef MIPS ++#define MIPS64_ONLY(code) ++#define NOT_MIPS64(code) code ++#endif ++ ++#ifdef LOONGARCH64 ++#ifndef LOONGARCH ++#define LOONGARCH ++#endif ++#define LOONGARCH64_ONLY(code) code ++#define NOT_LOONGARCH64(code) ++#else ++#undef LOONGARCH ++#define LOONGARCH64_ONLY(code) ++#define NOT_LOONGARCH64(code) code ++#endif ++ ++#if defined(MIPS64) || defined(LOONGARCH64) ++#define LOONGARCH64_AND_MIPS64_ONLY(code) code ++#define NOT_LOONGARCH64_AND_MIPS64(code) ++#else ++#define LOONGARCH64_AND_MIPS64_ONLY(code) ++#define NOT_LOONGARCH64_AND_MIPS64(code) code ++#endif ++ + #if defined(PPC32) || defined(PPC64) + #ifndef PPC + #define PPC +@@ -623,16 +661,34 @@ + // OS_CPU_HEADER(vmStructs) --> vmStructs_linux_sparc.hpp + // + // basename.hpp / basename.inline.hpp ++#if defined(MIPS) && !defined(ZERO) ++#define CPU_HEADER_H(basename) XSTR(basename ## _mips.h) ++#define CPU_HEADER(basename) XSTR(basename ## _mips.hpp) ++#define CPU_HEADER_INLINE(basename) XSTR(basename ## _mips.inline.hpp) ++#elif defined(LOONGARCH) && !defined(ZERO) ++#define CPU_HEADER_H(basename) XSTR(basename ## _loongarch.h) ++#define CPU_HEADER(basename) XSTR(basename ## _loongarch.hpp) ++#define CPU_HEADER_INLINE(basename) XSTR(basename ## _loongarch.inline.hpp) ++#else + #define CPU_HEADER_H(basename) XSTR(CPU_HEADER_STEM(basename).h) + #define CPU_HEADER(basename) XSTR(CPU_HEADER_STEM(basename).hpp) + #define CPU_HEADER_INLINE(basename) XSTR(CPU_HEADER_STEM(basename).inline.hpp) ++#endif + // basename.hpp / basename.inline.hpp + #define OS_HEADER_H(basename) XSTR(OS_HEADER_STEM(basename).h) + #define OS_HEADER(basename) XSTR(OS_HEADER_STEM(basename).hpp) + #define OS_HEADER_INLINE(basename) XSTR(OS_HEADER_STEM(basename).inline.hpp) + // basename.hpp / basename.inline.hpp ++#if defined(MIPS) && !defined(ZERO) ++#define OS_CPU_HEADER(basename) XSTR(basename ## _linux_mips.hpp) ++#define OS_CPU_HEADER_INLINE(basename) XSTR(basename ## _linux_mips.inline.hpp) ++#elif defined(LOONGARCH) && !defined(ZERO) ++#define OS_CPU_HEADER(basename) XSTR(basename ## _linux_loongarch.hpp) ++#define OS_CPU_HEADER_INLINE(basename) XSTR(basename ## _linux_loongarch.inline.hpp) ++#else + #define OS_CPU_HEADER(basename) XSTR(OS_CPU_HEADER_STEM(basename).hpp) + #define OS_CPU_HEADER_INLINE(basename) XSTR(OS_CPU_HEADER_STEM(basename).inline.hpp) ++#endif + // basename.hpp / basename.inline.hpp + #define COMPILER_HEADER(basename) XSTR(COMPILER_HEADER_STEM(basename).hpp) + #define COMPILER_HEADER_INLINE(basename) XSTR(COMPILER_HEADER_STEM(basename).inline.hpp) +diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +index 0d834302c5..6afafea095 100644 +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +@@ -22,6 +22,13 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ * ++ */ ++ + #include + #include "libproc.h" + #include "proc_service.h" +@@ -54,10 +61,18 @@ + #include "sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext.h" + #endif + ++#if defined(mips64) || defined(mips64el) ++#include "sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext.h" ++#endif ++ + #ifdef aarch64 + #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h" + #endif + ++#ifdef loongarch64 ++#include "sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext.h" ++#endif ++ + static jfieldID p_ps_prochandle_ID = 0; + static jfieldID threadList_ID = 0; + static jfieldID loadObjectList_ID = 0; +@@ -397,7 +412,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + return (err == PS_OK)? array : 0; + } + +-#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) ++#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(loongarch64) + JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0 + (JNIEnv *env, jobject this_obj, jint lwp_id) { + +@@ -425,8 +440,14 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + #if defined(sparc) || defined(sparcv9) + #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG + #endif ++#ifdef loongarch64 ++#define NPRGREG sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_NPRGREG ++#endif + #if defined(ppc64) || defined(ppc64le) + #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG ++#endif ++#if defined(mips64) || defined(mips64el) ++#define NPRGREG sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext_NPRGREG + #endif + + +@@ -534,6 +555,18 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + } + #endif /* aarch64 */ + ++#if defined(loongarch64) ++ ++#define REG_INDEX(reg) sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_##reg ++ ++ { ++ int i; ++ for (i = 0; i < 31; i++) ++ regs[i] = gregs.regs[i]; ++ regs[REG_INDEX(PC)] = gregs.csr_era; ++ } ++#endif /* loongarch64 */ ++ + #if defined(ppc64) || defined(ppc64le) + #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg + +@@ -574,6 +607,45 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + + #endif + ++#if defined(mips64) || defined(mips64el) ++ ++#define REG_INDEX(reg) sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext_##reg ++ ++ regs[REG_INDEX(ZERO)] = gregs.regs[0]; ++ regs[REG_INDEX(AT)] = gregs.regs[1]; ++ regs[REG_INDEX(V0)] = gregs.regs[2]; ++ regs[REG_INDEX(V1)] = gregs.regs[3]; ++ regs[REG_INDEX(A0)] = gregs.regs[4]; ++ regs[REG_INDEX(A1)] = gregs.regs[5]; ++ regs[REG_INDEX(A2)] = gregs.regs[6]; ++ regs[REG_INDEX(A3)] = gregs.regs[7]; ++ regs[REG_INDEX(T0)] = gregs.regs[8]; ++ regs[REG_INDEX(T1)] = gregs.regs[9]; ++ regs[REG_INDEX(T2)] = gregs.regs[10]; ++ regs[REG_INDEX(T3)] = gregs.regs[11]; ++ regs[REG_INDEX(T4)] = gregs.regs[12]; ++ regs[REG_INDEX(T5)] = gregs.regs[13]; ++ regs[REG_INDEX(T6)] = gregs.regs[14]; ++ regs[REG_INDEX(T7)] = gregs.regs[15]; ++ regs[REG_INDEX(S0)] = gregs.regs[16]; ++ regs[REG_INDEX(S1)] = gregs.regs[17]; ++ regs[REG_INDEX(S2)] = gregs.regs[18]; ++ regs[REG_INDEX(S3)] = gregs.regs[19]; ++ regs[REG_INDEX(S4)] = gregs.regs[20]; ++ regs[REG_INDEX(S5)] = gregs.regs[21]; ++ regs[REG_INDEX(S6)] = gregs.regs[22]; ++ regs[REG_INDEX(S7)] = gregs.regs[23]; ++ regs[REG_INDEX(T8)] = gregs.regs[24]; ++ regs[REG_INDEX(T9)] = gregs.regs[25]; ++ regs[REG_INDEX(K0)] = gregs.regs[26]; ++ regs[REG_INDEX(K1)] = gregs.regs[27]; ++ regs[REG_INDEX(GP)] = gregs.regs[28]; ++ regs[REG_INDEX(SP)] = gregs.regs[29]; ++ regs[REG_INDEX(FP)] = gregs.regs[30]; ++ regs[REG_INDEX(S8)] = gregs.regs[30]; ++ regs[REG_INDEX(RA)] = gregs.regs[31]; ++#endif /* mips */ ++ + (*env)->ReleaseLongArrayElements(env, array, regs, JNI_COMMIT); + return array; + } +diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +index 8318e8e021..07064e76ee 100644 +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +@@ -22,6 +22,13 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ * ++ */ ++ + #ifndef _LIBPROC_H_ + #define _LIBPROC_H_ + +@@ -37,13 +44,17 @@ + #include + #define user_regs_struct pt_regs + #endif +-#if defined(aarch64) || defined(arm64) ++#if defined(aarch64) || defined(arm64) || defined(loongarch64) + #include + #define user_regs_struct user_pt_regs + #elif defined(arm) + #include + #define user_regs_struct pt_regs + #endif ++#if defined(mips) || defined(mipsel) || defined(mips64) || defined(mips64el) ++#include ++#define user_regs_struct pt_regs ++#endif + + // This C bool type must be int for compatibility with Linux calls and + // it would be a mistake to equivalence it to C++ bool on many platforms +diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +index de5254d859..eefe55959c 100644 +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include + #include + #include +@@ -142,7 +148,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use + #define PTRACE_GETREGS_REQ PT_GETREGS + #endif + +-#ifdef PTRACE_GETREGS_REQ ++#if defined(PTRACE_GETREGS_REQ) && !defined(loongarch64) + if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) { + print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid); + return false; +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java +index 0f5f0119c7..1b2f11a065 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java +@@ -23,6 +23,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ * ++ */ + package sun.jvm.hotspot; + + import java.rmi.RemoteException; +@@ -39,6 +45,8 @@ import sun.jvm.hotspot.debugger.MachineDescriptionAArch64; + import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86; + import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit; + import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit; ++import sun.jvm.hotspot.debugger.MachineDescriptionMIPS64; ++import sun.jvm.hotspot.debugger.MachineDescriptionLOONGARCH64; + import sun.jvm.hotspot.debugger.NoSuchSymbolException; + import sun.jvm.hotspot.debugger.bsd.BsdDebuggerLocal; + import sun.jvm.hotspot.debugger.linux.LinuxDebuggerLocal; +@@ -598,6 +606,10 @@ public class HotSpotAgent { + } else { + machDesc = new MachineDescriptionSPARC32Bit(); + } ++ } else if (cpu.equals("mips64")) { ++ machDesc = new MachineDescriptionMIPS64(); ++ } else if (cpu.equals("loongarch64")) { ++ machDesc = new MachineDescriptionLOONGARCH64(); + } else { + try { + machDesc = (MachineDescription) +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java +new file mode 100644 +index 0000000000..99cea8c7f1 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger; ++ ++public class MachineDescriptionLOONGARCH64 extends MachineDescriptionTwosComplement implements MachineDescription { ++ public long getAddressSize() { ++ return 8; ++ } ++ ++ ++ public boolean isBigEndian() { ++ return false; ++ } ++ ++ public boolean isLP64() { ++ return true; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java +new file mode 100644 +index 0000000000..1b49efd201 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger; ++ ++public class MachineDescriptionMIPS64 extends MachineDescriptionTwosComplement implements MachineDescription { ++ public long getAddressSize() { ++ return 8; ++ } ++ ++ ++ public boolean isBigEndian() { ++ return "big".equals(System.getProperty("sun.cpu.endian")); ++ } ++ ++ public boolean isLP64() { ++ return true; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +index 5e5a6bb714..7d7f6424e6 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +@@ -23,6 +23,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package sun.jvm.hotspot.debugger.linux; + + import java.io.*; +@@ -34,12 +40,16 @@ import sun.jvm.hotspot.debugger.x86.*; + import sun.jvm.hotspot.debugger.amd64.*; + import sun.jvm.hotspot.debugger.aarch64.*; + import sun.jvm.hotspot.debugger.sparc.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; + import sun.jvm.hotspot.debugger.ppc64.*; + import sun.jvm.hotspot.debugger.linux.x86.*; + import sun.jvm.hotspot.debugger.linux.amd64.*; + import sun.jvm.hotspot.debugger.linux.sparc.*; + import sun.jvm.hotspot.debugger.linux.ppc64.*; + import sun.jvm.hotspot.debugger.linux.aarch64.*; ++import sun.jvm.hotspot.debugger.linux.mips64.*; ++import sun.jvm.hotspot.debugger.linux.loongarch64.*; + import sun.jvm.hotspot.utilities.*; + + class LinuxCDebugger implements CDebugger { +@@ -102,7 +112,21 @@ class LinuxCDebugger implements CDebugger { + Address pc = context.getRegisterAsAddress(SPARCThreadContext.R_O7); + if (pc == null) return null; + return new LinuxSPARCCFrame(dbg, sp, pc, LinuxDebuggerLocal.getAddressSize()); +- } else if (cpu.equals("ppc64")) { ++ } else if (cpu.equals("mips64")) { ++ MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext(); ++ Address sp = context.getRegisterAsAddress(MIPS64ThreadContext.SP); ++ if (sp == null) return null; ++ Address pc = context.getRegisterAsAddress(MIPS64ThreadContext.PC); ++ if (pc == null) return null; ++ return new LinuxMIPS64CFrame(dbg, sp, pc); ++ } else if (cpu.equals("loongarch64")) { ++ LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext(); ++ Address fp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.FP); ++ if (fp == null) return null; ++ Address pc = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC); ++ if (pc == null) return null; ++ return new LinuxLOONGARCH64CFrame(dbg, fp, pc); ++ } else if (cpu.equals("ppc64")) { + PPC64ThreadContext context = (PPC64ThreadContext) thread.getContext(); + Address sp = context.getRegisterAsAddress(PPC64ThreadContext.SP); + if (sp == null) return null; +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java +index 4b786eecc9..4ead33827c 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package sun.jvm.hotspot.debugger.linux; + + import java.lang.reflect.*; +@@ -30,6 +36,8 @@ import sun.jvm.hotspot.debugger.linux.amd64.*; + import sun.jvm.hotspot.debugger.linux.x86.*; + import sun.jvm.hotspot.debugger.linux.ppc64.*; + import sun.jvm.hotspot.debugger.linux.sparc.*; ++import sun.jvm.hotspot.debugger.linux.mips64.*; ++import sun.jvm.hotspot.debugger.linux.loongarch64.*; + + class LinuxThreadContextFactory { + static ThreadContext createThreadContext(LinuxDebugger dbg) { +@@ -40,7 +48,11 @@ class LinuxThreadContextFactory { + return new LinuxAMD64ThreadContext(dbg); + } else if (cpu.equals("sparc")) { + return new LinuxSPARCThreadContext(dbg); +- } else if (cpu.equals("ppc64")) { ++ } else if (cpu.equals("mips64")) { ++ return new LinuxMIPS64ThreadContext(dbg); ++ } else if (cpu.equals("loongarch64")) { ++ return new LinuxLOONGARCH64ThreadContext(dbg); ++ } else if (cpu.equals("ppc64")) { + return new LinuxPPC64ThreadContext(dbg); + } else { + try { +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java +new file mode 100644 +index 0000000000..0e6caee5a4 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java +@@ -0,0 +1,92 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.linux.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++import sun.jvm.hotspot.debugger.cdbg.basic.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++ ++final public class LinuxLOONGARCH64CFrame extends BasicCFrame { ++ // package/class internals only ++ public LinuxLOONGARCH64CFrame(LinuxDebugger dbg, Address fp, Address pc) { ++ super(dbg.getCDebugger()); ++ this.fp = fp; ++ this.pc = pc; ++ this.dbg = dbg; ++ } ++ ++ // override base class impl to avoid ELF parsing ++ public ClosestSymbol closestSymbolToPC() { ++ // try native lookup in debugger. ++ return dbg.lookup(dbg.getAddressValue(pc())); ++ } ++ ++ public Address pc() { ++ return pc; ++ } ++ ++ public Address localVariableBase() { ++ return fp; ++ } ++ ++ public CFrame sender(ThreadProxy thread) { ++ LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext(); ++ Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); ++ Address nextFP; ++ Address nextPC; ++ ++ if ((fp == null) || fp.lessThan(sp)) { ++ return null; ++ } ++ ++ try { ++ nextFP = fp.getAddressAt(-2 * ADDRESS_SIZE); ++ } catch (Exception e) { ++ return null; ++ } ++ if (nextFP == null) { ++ return null; ++ } ++ ++ try { ++ nextPC = fp.getAddressAt(-1 * ADDRESS_SIZE); ++ } catch (Exception e) { ++ return null; ++ } ++ if (nextPC == null) { ++ return null; ++ } ++ ++ return new LinuxLOONGARCH64CFrame(dbg, nextFP, nextPC); ++ } ++ ++ private static final int ADDRESS_SIZE = 8; ++ private Address pc; ++ private Address fp; ++ private LinuxDebugger dbg; ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java +new file mode 100644 +index 0000000000..604642598e +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.linux.*; ++ ++public class LinuxLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext { ++ private LinuxDebugger debugger; ++ ++ public LinuxLOONGARCH64ThreadContext(LinuxDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java +new file mode 100644 +index 0000000000..2e3eb564da +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java +@@ -0,0 +1,80 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.linux.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++import sun.jvm.hotspot.debugger.cdbg.basic.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++ ++final public class LinuxMIPS64CFrame extends BasicCFrame { ++ // package/class internals only ++ public LinuxMIPS64CFrame(LinuxDebugger dbg, Address ebp, Address pc) { ++ super(dbg.getCDebugger()); ++ this.ebp = ebp; ++ this.pc = pc; ++ this.dbg = dbg; ++ } ++ ++ // override base class impl to avoid ELF parsing ++ public ClosestSymbol closestSymbolToPC() { ++ // try native lookup in debugger. ++ return dbg.lookup(dbg.getAddressValue(pc())); ++ } ++ ++ public Address pc() { ++ return pc; ++ } ++ ++ public Address localVariableBase() { ++ return ebp; ++ } ++ ++ public CFrame sender(ThreadProxy thread) { ++ MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext(); ++ Address esp = context.getRegisterAsAddress(MIPS64ThreadContext.SP); ++ ++ if ( (ebp == null) || ebp.lessThan(esp) ) { ++ return null; ++ } ++ ++ Address nextEBP = ebp.getAddressAt( 0 * ADDRESS_SIZE); ++ if (nextEBP == null) { ++ return null; ++ } ++ Address nextPC = ebp.getAddressAt( 1 * ADDRESS_SIZE); ++ if (nextPC == null) { ++ return null; ++ } ++ return new LinuxMIPS64CFrame(dbg, nextEBP, nextPC); ++ } ++ ++ private static final int ADDRESS_SIZE = 4; ++ private Address pc; ++ private Address ebp; ++ private LinuxDebugger dbg; ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java +new file mode 100644 +index 0000000000..98e0f3f0bc +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.linux.*; ++ ++public class LinuxMIPS64ThreadContext extends MIPS64ThreadContext { ++ private LinuxDebugger debugger; ++ ++ public LinuxMIPS64ThreadContext(LinuxDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java +new file mode 100644 +index 0000000000..1de3cb1a47 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java +@@ -0,0 +1,128 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.loongarch64; ++ ++import java.lang.annotation.Native; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++ ++/** Specifies the thread context on loongarch64 platforms; only a sub-portion ++ of the context is guaranteed to be present on all operating ++ systems. */ ++ ++public abstract class LOONGARCH64ThreadContext implements ThreadContext { ++ ++ // NOTE: the indices for the various registers must be maintained as ++ // listed across various operating systems. However, only a small ++ // subset of the registers' values are guaranteed to be present (and ++ // must be present for the SA's stack walking to work): EAX, EBX, ++ // ECX, EDX, ESI, EDI, EBP, ESP, and EIP. ++ ++ // One instance of the Native annotation is enough to trigger header generation ++ // for this file. ++ @Native ++ public static final int ZERO = 0; ++ public static final int RA = 1; ++ public static final int TP = 2; ++ public static final int SP = 3; ++ public static final int A0 = 4; ++ public static final int A1 = 5; ++ public static final int A2 = 6; ++ public static final int A3 = 7; ++ public static final int A4 = 8; ++ public static final int A5 = 9; ++ public static final int A6 = 10; ++ public static final int A7 = 11; ++ public static final int T0 = 12; ++ public static final int T1 = 13; ++ public static final int T2 = 14; ++ public static final int T3 = 15; ++ public static final int T4 = 16; ++ public static final int T5 = 17; ++ public static final int T6 = 18; ++ public static final int T7 = 19; ++ public static final int T8 = 20; ++ public static final int RX = 21; ++ public static final int FP = 22; ++ public static final int S0 = 23; ++ public static final int S1 = 24; ++ public static final int S2 = 25; ++ public static final int S3 = 26; ++ public static final int S4 = 27; ++ public static final int S5 = 28; ++ public static final int S6 = 29; ++ public static final int S7 = 30; ++ public static final int S8 = 31; ++ public static final int PC = 32; ++ public static final int NPRGREG = 33; ++ ++ private static final String[] regNames = { ++ "ZERO", "RA", "TP", "SP", ++ "A0", "A1", "A2", "A3", ++ "A4", "A5", "A6", "A7", ++ "T0", "T1", "T2", "T3", ++ "T4", "T5", "T6", "T7", ++ "T8", "RX", "FP", "S0", ++ "S1", "S2", "S3", "S4", ++ "S5", "S6", "S7", "S8", ++ "PC" ++ }; ++ ++ private long[] data; ++ ++ public LOONGARCH64ThreadContext() { ++ data = new long[NPRGREG]; ++ } ++ ++ public int getNumRegisters() { ++ return NPRGREG; ++ } ++ ++ public String getRegisterName(int index) { ++ return regNames[index]; ++ } ++ ++ public void setRegister(int index, long value) { ++ data[index] = value; ++ } ++ ++ public long getRegister(int index) { ++ return data[index]; ++ } ++ ++ public CFrame getTopFrame(Debugger dbg) { ++ return null; ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public abstract void setRegisterAsAddress(int index, Address value); ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public abstract Address getRegisterAsAddress(int index); ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java +new file mode 100644 +index 0000000000..d3479a65ea +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java +@@ -0,0 +1,128 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.mips64; ++ ++import java.lang.annotation.Native; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++ ++/** Specifies the thread context on mips64 platforms; only a sub-portion ++ of the context is guaranteed to be present on all operating ++ systems. */ ++ ++public abstract class MIPS64ThreadContext implements ThreadContext { ++ ++ // NOTE: the indices for the various registers must be maintained as ++ // listed across various operating systems. However, only a small ++ // subset of the registers' values are guaranteed to be present (and ++ // must be present for the SA's stack walking to work): EAX, EBX, ++ // ECX, EDX, ESI, EDI, EBP, ESP, and EIP. ++ ++ // One instance of the Native annotation is enough to trigger header generation ++ // for this file. ++ @Native ++ public static final int ZERO = 0; ++ public static final int AT = 1; ++ public static final int V0 = 2; ++ public static final int V1 = 3; ++ public static final int A0 = 4; ++ public static final int A1 = 5; ++ public static final int A2 = 6; ++ public static final int A3 = 7; ++ public static final int T0 = 8; ++ public static final int T1 = 9; ++ public static final int T2 = 10; ++ public static final int T3 = 11; ++ public static final int T4 = 12; ++ public static final int T5 = 13; ++ public static final int T6 = 14; ++ public static final int T7 = 15; ++ public static final int S0 = 16; ++ public static final int S1 = 17; ++ public static final int S2 = 18; ++ public static final int S3 = 19; ++ public static final int S4 = 20; ++ public static final int S5 = 21; ++ public static final int S6 = 22; ++ public static final int S7 = 23; ++ public static final int T8 = 24; ++ public static final int T9 = 25; ++ public static final int K0 = 26; ++ public static final int K1 = 27; ++ public static final int GP = 28; ++ public static final int SP = 29; ++ public static final int FP = 30; ++ public static final int RA = 31; ++ public static final int PC = 32; ++ public static final int NPRGREG = 33; ++ ++ private static final String[] regNames = { ++ "ZERO", "AT", "V0", "V1", ++ "A0", "A1", "A2", "A3", ++ "T0", "T1", "T2", "T3", ++ "T4", "T5", "T6", "T7", ++ "S0", "S1", "S2", "S3", ++ "S4", "S5", "S6", "S7", ++ "T8", "T9", "K0", "K1", ++ "GP", "SP", "FP", "RA", ++ "PC" ++ }; ++ ++ private long[] data; ++ ++ public MIPS64ThreadContext() { ++ data = new long[NPRGREG]; ++ } ++ ++ public int getNumRegisters() { ++ return NPRGREG; ++ } ++ ++ public String getRegisterName(int index) { ++ return regNames[index]; ++ } ++ ++ public void setRegister(int index, long value) { ++ data[index] = value; ++ } ++ ++ public long getRegister(int index) { ++ return data[index]; ++ } ++ ++ public CFrame getTopFrame(Debugger dbg) { ++ return null; ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public abstract void setRegisterAsAddress(int index, Address value); ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public abstract Address getRegisterAsAddress(int index); ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java +index 7113a3a497..de47531db7 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package sun.jvm.hotspot.debugger.posix.elf; + + import java.io.FileInputStream; +@@ -63,6 +69,8 @@ public interface ELFHeader { + public static final int ARCH_i860 = 7; + /** MIPS architecture type. */ + public static final int ARCH_MIPS = 8; ++ /** LOONGARCH architecture type. */ ++ public static final int ARCH_LOONGARCH = 9; + + /** Returns a file type which is defined by the file type constants. */ + public short getFileType(); +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java +index 74e957d94b..46ece3611f 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java +@@ -32,11 +32,13 @@ import sun.jvm.hotspot.debugger.*; + import sun.jvm.hotspot.debugger.cdbg.*; + import sun.jvm.hotspot.debugger.proc.amd64.*; + import sun.jvm.hotspot.debugger.proc.aarch64.*; ++import sun.jvm.hotspot.debugger.proc.mips64.*; + import sun.jvm.hotspot.debugger.proc.sparc.*; + import sun.jvm.hotspot.debugger.proc.ppc64.*; + import sun.jvm.hotspot.debugger.proc.x86.*; + import sun.jvm.hotspot.debugger.ppc64.*; + import sun.jvm.hotspot.debugger.amd64.*; ++import sun.jvm.hotspot.debugger.mips64.*; + import sun.jvm.hotspot.debugger.aarch64.*; + import sun.jvm.hotspot.debugger.sparc.*; + import sun.jvm.hotspot.debugger.x86.*; +@@ -90,6 +92,10 @@ public class ProcDebuggerLocal extends DebuggerBase implements ProcDebugger { + threadFactory = new ProcAMD64ThreadFactory(this); + pcRegIndex = AMD64ThreadContext.RIP; + fpRegIndex = AMD64ThreadContext.RBP; ++ } else if (cpu.equals("mips64") || cpu.equals("mips64el")) { ++ threadFactory = new ProcMIPS64ThreadFactory(this); ++ pcRegIndex = MIPS64ThreadContext.PC; ++ fpRegIndex = MIPS64ThreadContext.FP; + } else if (cpu.equals("aarch64")) { + threadFactory = new ProcAARCH64ThreadFactory(this); + pcRegIndex = AARCH64ThreadContext.PC; +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java +new file mode 100644 +index 0000000000..1f60fa6cfb +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java +@@ -0,0 +1,92 @@ ++/* ++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class ProcLOONGARCH64Thread implements ThreadProxy { ++ private ProcDebugger debugger; ++ private int id; ++ ++ public ProcLOONGARCH64Thread(ProcDebugger debugger, Address addr) { ++ this.debugger = debugger; ++ ++ // FIXME: the size here should be configurable. However, making it ++ // so would produce a dependency on the "types" package from the ++ // debugger package, which is not desired. ++ this.id = (int) addr.getCIntegerAt(0, 4, true); ++ } ++ ++ public ProcLOONGARCH64Thread(ProcDebugger debugger, long id) { ++ this.debugger = debugger; ++ this.id = (int) id; ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ ProcLOONGARCH64ThreadContext context = new ProcLOONGARCH64ThreadContext(debugger); ++ long[] regs = debugger.getThreadIntegerRegisterSet(id); ++ /* ++ _NGREG in reg.h is defined to be 19. Because we have included ++ debug registers LOONGARCH64ThreadContext.NPRGREG is 25. ++ */ ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length <= LOONGARCH64ThreadContext.NPRGREG, "size of register set is greater than " + LOONGARCH64ThreadContext.NPRGREG); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++ ++ public boolean canSetContext() throws DebuggerException { ++ return false; ++ } ++ ++ public void setContext(ThreadContext context) ++ throws IllegalThreadStateException, DebuggerException { ++ throw new DebuggerException("Unimplemented"); ++ } ++ ++ public String toString() { ++ return "t@" + id; ++ } ++ ++ public boolean equals(Object obj) { ++ if ((obj == null) || !(obj instanceof ProcLOONGARCH64Thread)) { ++ return false; ++ } ++ ++ return (((ProcLOONGARCH64Thread) obj).id == id); ++ } ++ ++ public int hashCode() { ++ return id; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java +new file mode 100644 +index 0000000000..ef5597ac4e +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext { ++ private ProcDebugger debugger; ++ ++ public ProcLOONGARCH64ThreadContext(ProcDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java +new file mode 100644 +index 0000000000..abad1bb38b +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcLOONGARCH64ThreadFactory implements ProcThreadFactory { ++ private ProcDebugger debugger; ++ ++ public ProcLOONGARCH64ThreadFactory(ProcDebugger debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new ProcLOONGARCH64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new ProcLOONGARCH64Thread(debugger, id); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java +new file mode 100644 +index 0000000000..5c1e0be893 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java +@@ -0,0 +1,92 @@ ++/* ++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class ProcMIPS64Thread implements ThreadProxy { ++ private ProcDebugger debugger; ++ private int id; ++ ++ public ProcMIPS64Thread(ProcDebugger debugger, Address addr) { ++ this.debugger = debugger; ++ ++ // FIXME: the size here should be configurable. However, making it ++ // so would produce a dependency on the "types" package from the ++ // debugger package, which is not desired. ++ this.id = (int) addr.getCIntegerAt(0, 4, true); ++ } ++ ++ public ProcMIPS64Thread(ProcDebugger debugger, long id) { ++ this.debugger = debugger; ++ this.id = (int) id; ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ ProcMIPS64ThreadContext context = new ProcMIPS64ThreadContext(debugger); ++ long[] regs = debugger.getThreadIntegerRegisterSet(id); ++ /* ++ _NGREG in reg.h is defined to be 19. Because we have included ++ debug registers MIPS64ThreadContext.NPRGREG is 25. ++ */ ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length <= MIPS64ThreadContext.NPRGREG, "size of register set is greater than " + MIPS64ThreadContext.NPRGREG); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++ ++ public boolean canSetContext() throws DebuggerException { ++ return false; ++ } ++ ++ public void setContext(ThreadContext context) ++ throws IllegalThreadStateException, DebuggerException { ++ throw new DebuggerException("Unimplemented"); ++ } ++ ++ public String toString() { ++ return "t@" + id; ++ } ++ ++ public boolean equals(Object obj) { ++ if ((obj == null) || !(obj instanceof ProcMIPS64Thread)) { ++ return false; ++ } ++ ++ return (((ProcMIPS64Thread) obj).id == id); ++ } ++ ++ public int hashCode() { ++ return id; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java +new file mode 100644 +index 0000000000..d44223d768 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcMIPS64ThreadContext extends MIPS64ThreadContext { ++ private ProcDebugger debugger; ++ ++ public ProcMIPS64ThreadContext(ProcDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java +new file mode 100644 +index 0000000000..bad478fc5c +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcMIPS64ThreadFactory implements ProcThreadFactory { ++ private ProcDebugger debugger; ++ ++ public ProcMIPS64ThreadFactory(ProcDebugger debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new ProcMIPS64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new ProcMIPS64Thread(debugger, id); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java +index b6253f6d63..5eecb08a10 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package sun.jvm.hotspot.debugger.remote; + + import java.rmi.*; +@@ -34,6 +40,8 @@ import sun.jvm.hotspot.debugger.remote.sparc.*; + import sun.jvm.hotspot.debugger.remote.x86.*; + import sun.jvm.hotspot.debugger.remote.amd64.*; + import sun.jvm.hotspot.debugger.remote.ppc64.*; ++import sun.jvm.hotspot.debugger.remote.mips64.*; ++import sun.jvm.hotspot.debugger.remote.loongarch64.*; + + /** An implementation of Debugger which wraps a + RemoteDebugger, providing remote debugging via RMI. +@@ -76,6 +84,16 @@ public class RemoteDebuggerClient extends DebuggerBase implements JVMDebugger { + cachePageSize = 4096; + cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize); + unalignedAccessesOkay = true; ++ } else if (cpu.equals("mips64") || cpu.equals("mips64el")) { ++ threadFactory = new RemoteMIPS64ThreadFactory(this); ++ cachePageSize = 4096; ++ cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize); ++ unalignedAccessesOkay = true; ++ } else if (cpu.equals("loongarch64")) { ++ threadFactory = new RemoteLOONGARCH64ThreadFactory(this); ++ cachePageSize = 4096; ++ cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize); ++ unalignedAccessesOkay = true; + } else { + try { + Class tf = Class.forName("sun.jvm.hotspot.debugger.remote." + +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java +new file mode 100644 +index 0000000000..242dd279e1 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class RemoteLOONGARCH64Thread extends RemoteThread { ++ public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, Address addr) { ++ super(debugger, addr); ++ } ++ ++ public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, long id) { ++ super(debugger, id); ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ RemoteLOONGARCH64ThreadContext context = new RemoteLOONGARCH64ThreadContext(debugger); ++ long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : ++ debugger.getThreadIntegerRegisterSet(id); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length == LOONGARCH64ThreadContext.NPRGREG, "size of register set must match"); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java +new file mode 100644 +index 0000000000..634d5ad049 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteLOONGARCH64ThreadContext(RemoteDebuggerClient debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java +new file mode 100644 +index 0000000000..4fb9cc7c06 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteLOONGARCH64ThreadFactory implements RemoteThreadFactory { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteLOONGARCH64ThreadFactory(RemoteDebuggerClient debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new RemoteLOONGARCH64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new RemoteLOONGARCH64Thread(debugger, id); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java +new file mode 100644 +index 0000000000..c2f7d841f2 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class RemoteMIPS64Thread extends RemoteThread { ++ public RemoteMIPS64Thread(RemoteDebuggerClient debugger, Address addr) { ++ super(debugger, addr); ++ } ++ ++ public RemoteMIPS64Thread(RemoteDebuggerClient debugger, long id) { ++ super(debugger, id); ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ RemoteMIPS64ThreadContext context = new RemoteMIPS64ThreadContext(debugger); ++ long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : ++ debugger.getThreadIntegerRegisterSet(id); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length == MIPS64ThreadContext.NPRGREG, "size of register set must match"); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java +new file mode 100644 +index 0000000000..23646905d7 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteMIPS64ThreadContext extends MIPS64ThreadContext { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteMIPS64ThreadContext(RemoteDebuggerClient debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java +new file mode 100644 +index 0000000000..b39b014490 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteMIPS64ThreadFactory implements RemoteThreadFactory { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteMIPS64ThreadFactory(RemoteDebuggerClient debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new RemoteMIPS64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new RemoteMIPS64Thread(debugger, id); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java +index 190062785a..04681fa0e7 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package sun.jvm.hotspot.runtime; + + import java.util.*; +@@ -39,6 +45,8 @@ import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess; ++import sun.jvm.hotspot.runtime.linux_mips64.LinuxMIPS64JavaThreadPDAccess; ++import sun.jvm.hotspot.runtime.linux_loongarch64.LinuxLOONGARCH64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess; + import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.bsd_amd64.BsdAMD64JavaThreadPDAccess; +@@ -99,6 +107,10 @@ public class Threads { + access = new LinuxPPC64JavaThreadPDAccess(); + } else if (cpu.equals("aarch64")) { + access = new LinuxAARCH64JavaThreadPDAccess(); ++ } else if (cpu.equals("mips64")) { ++ access = new LinuxMIPS64JavaThreadPDAccess(); ++ } else if (cpu.equals("loongarch64")) { ++ access = new LinuxLOONGARCH64JavaThreadPDAccess(); + } else { + try { + access = (JavaThreadPDAccess) +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java +new file mode 100644 +index 0000000000..ee1003e352 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java +@@ -0,0 +1,133 @@ ++/* ++ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.linux_loongarch64; ++ ++import java.io.*; ++import java.util.*; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.runtime.loongarch64.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class LinuxLOONGARCH64JavaThreadPDAccess implements JavaThreadPDAccess { ++ private static AddressField lastJavaFPField; ++ private static AddressField osThreadField; ++ ++ // Field from OSThread ++ private static CIntegerField osThreadThreadIDField; ++ ++ // This is currently unneeded but is being kept in case we change ++ // the currentFrameGuess algorithm ++ private static final long GUESS_SCAN_RANGE = 128 * 1024; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaThread"); ++ osThreadField = type.getAddressField("_osthread"); ++ ++ Type anchorType = db.lookupType("JavaFrameAnchor"); ++ lastJavaFPField = anchorType.getAddressField("_last_Java_fp"); ++ ++ Type osThreadType = db.lookupType("OSThread"); ++ osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); ++ } ++ ++ public Address getLastJavaFP(Address addr) { ++ return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset())); ++ } ++ ++ public Address getLastJavaPC(Address addr) { ++ return null; ++ } ++ ++ public Address getBaseOfStackPointer(Address addr) { ++ return null; ++ } ++ ++ public Frame getLastFramePD(JavaThread thread, Address addr) { ++ Address fp = thread.getLastJavaFP(); ++ if (fp == null) { ++ return null; // no information ++ } ++ return new LOONGARCH64Frame(thread.getLastJavaSP(), fp); ++ } ++ ++ public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { ++ return new LOONGARCH64RegisterMap(thread, updateMap); ++ } ++ ++ public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext(); ++ LOONGARCH64CurrentFrameGuess guesser = new LOONGARCH64CurrentFrameGuess(context, thread); ++ if (!guesser.run(GUESS_SCAN_RANGE)) { ++ return null; ++ } ++ if (guesser.getPC() == null) { ++ return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP()); ++ } else { ++ return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); ++ } ++ } ++ ++ public void printThreadIDOn(Address addr, PrintStream tty) { ++ tty.print(getThreadProxy(addr)); ++ } ++ ++ public void printInfoOn(Address threadAddr, PrintStream tty) { ++ tty.print("Thread id: "); ++ printThreadIDOn(threadAddr, tty); ++ // tty.println("\nPostJavaState: " + getPostJavaState(threadAddr)); ++ } ++ ++ public Address getLastSP(Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext(); ++ return context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); ++ } ++ ++ public ThreadProxy getThreadProxy(Address addr) { ++ // Addr is the address of the JavaThread. ++ // Fetch the OSThread (for now and for simplicity, not making a ++ // separate "OSThread" class in this package) ++ Address osThreadAddr = osThreadField.getValue(addr); ++ // Get the address of the _thread_id from the OSThread ++ Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); ++ ++ JVMDebugger debugger = VM.getVM().getDebugger(); ++ return debugger.getThreadForIdentifierAddress(threadIdAddr); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java +new file mode 100644 +index 0000000000..181f431b64 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java +@@ -0,0 +1,133 @@ ++/* ++ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.linux_mips64; ++ ++import java.io.*; ++import java.util.*; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.runtime.mips64.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class LinuxMIPS64JavaThreadPDAccess implements JavaThreadPDAccess { ++ private static AddressField osThreadField; ++ ++ // Field from OSThread ++ private static CIntegerField osThreadThreadIDField; ++ ++ // This is currently unneeded but is being kept in case we change ++ // the currentFrameGuess algorithm ++ private static final long GUESS_SCAN_RANGE = 128 * 1024; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaThread"); ++ osThreadField = type.getAddressField("_osthread"); ++ ++ Type osThreadType = db.lookupType("OSThread"); ++ osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); ++ } ++ ++ public Address getLastJavaFP(Address addr) { ++ return null; ++ } ++ ++ public Address getLastJavaPC(Address addr) { ++ return null; ++ } ++ ++ public Address getBaseOfStackPointer(Address addr) { ++ return null; ++ } ++ ++ public Frame getLastFramePD(JavaThread thread, Address addr) { ++ Address fp = thread.getLastJavaFP(); ++ if (fp == null) { ++ return null; // no information ++ } ++ return new MIPS64Frame(thread.getLastJavaSP(), fp); ++ } ++ ++ public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { ++ return new MIPS64RegisterMap(thread, updateMap); ++ } ++ ++ public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext(); ++ MIPS64CurrentFrameGuess guesser = new MIPS64CurrentFrameGuess(context, thread); ++ if (!guesser.run(GUESS_SCAN_RANGE)) { ++ return null; ++ } ++ if (guesser.getPC() == null) { ++ return new MIPS64Frame(guesser.getSP(), guesser.getFP()); ++ } else { ++ return new MIPS64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); ++ } ++ } ++ ++ public void printThreadIDOn(Address addr, PrintStream tty) { ++ tty.print(getThreadProxy(addr)); ++ } ++ ++ public void printInfoOn(Address threadAddr, PrintStream tty) { ++ tty.print("Thread id: "); ++ printThreadIDOn(threadAddr, tty); ++ // tty.println("\nPostJavaState: " + getPostJavaState(threadAddr)); ++ } ++ ++ public Address getLastSP(Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext(); ++ return context.getRegisterAsAddress(MIPS64ThreadContext.SP); ++ } ++ ++ public Address getLastFP(Address addr) { ++ return getLastSP(addr).getAddressAt(0); ++ } ++ ++ public ThreadProxy getThreadProxy(Address addr) { ++ // Addr is the address of the JavaThread. ++ // Fetch the OSThread (for now and for simplicity, not making a ++ // separate "OSThread" class in this package) ++ Address osThreadAddr = osThreadField.getValue(addr); ++ // Get the address of the _thread_id from the OSThread ++ Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); ++ ++ JVMDebugger debugger = VM.getVM().getDebugger(); ++ return debugger.getThreadForIdentifierAddress(threadIdAddr); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java +new file mode 100644 +index 0000000000..824270e132 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java +@@ -0,0 +1,250 @@ ++/* ++ * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.interpreter.*; ++import sun.jvm.hotspot.runtime.*; ++ ++/**

Should be able to be used on all loongarch64 platforms we support ++ (Win32, Solaris/loongarch64, and soon Linux) to implement JavaThread's ++ "currentFrameGuess()" functionality. Input is an LOONGARCH64ThreadContext; ++ output is SP, FP, and PC for an LOONGARCH64Frame. Instantiation of the ++ LOONGARCH64Frame is left to the caller, since we may need to subclass ++ LOONGARCH64Frame to support signal handler frames on Unix platforms.

++ ++

Algorithm is to walk up the stack within a given range (say, ++ 512K at most) looking for a plausible PC and SP for a Java frame, ++ also considering those coming in from the context. If we find a PC ++ that belongs to the VM (i.e., in generated code like the ++ interpreter or CodeCache) then we try to find an associated EBP. ++ We repeat this until we either find a complete frame or run out of ++ stack to look at.

*/ ++ ++public class LOONGARCH64CurrentFrameGuess { ++ private LOONGARCH64ThreadContext context; ++ private JavaThread thread; ++ private Address spFound; ++ private Address fpFound; ++ private Address pcFound; ++ ++ private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG") ++ != null; ++ ++ public LOONGARCH64CurrentFrameGuess(LOONGARCH64ThreadContext context, ++ JavaThread thread) { ++ this.context = context; ++ this.thread = thread; ++ } ++ ++ /** Returns false if not able to find a frame within a reasonable range. */ ++ public boolean run(long regionInBytesToSearch) { ++ Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); ++ Address pc = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC); ++ Address fp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.FP); ++ if (sp == null) { ++ // Bail out if no last java frame eithe ++ if (thread.getLastJavaSP() != null) { ++ setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); ++ return true; ++ } ++ // Bail out ++ return false; ++ } ++ Address end = sp.addOffsetTo(regionInBytesToSearch); ++ VM vm = VM.getVM(); ++ ++ setValues(null, null, null); // Assume we're not going to find anything ++ ++ if (vm.isJavaPCDbg(pc)) { ++ if (vm.isClientCompiler()) { ++ // If the topmost frame is a Java frame, we are (pretty much) ++ // guaranteed to have a viable EBP. We should be more robust ++ // than this (we have the potential for losing entire threads' ++ // stack traces) but need to see how much work we really have ++ // to do here. Searching the stack for an (SP, FP) pair is ++ // hard since it's easy to misinterpret inter-frame stack ++ // pointers as base-of-frame pointers; we also don't know the ++ // sizes of C1 frames (not registered in the nmethod) so can't ++ // derive them from ESP. ++ ++ setValues(sp, fp, pc); ++ return true; ++ } else { ++ if (vm.getInterpreter().contains(pc)) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + ++ sp + ", fp = " + fp + ", pc = " + pc); ++ } ++ setValues(sp, fp, pc); ++ return true; ++ } ++ ++ // For the server compiler, EBP is not guaranteed to be valid ++ // for compiled code. In addition, an earlier attempt at a ++ // non-searching algorithm (see below) failed because the ++ // stack pointer from the thread context was pointing ++ // (considerably) beyond the ostensible end of the stack, into ++ // garbage; walking from the topmost frame back caused a crash. ++ // ++ // This algorithm takes the current PC as a given and tries to ++ // find the correct corresponding SP by walking up the stack ++ // and repeatedly performing stackwalks (very inefficient). ++ // ++ // FIXME: there is something wrong with stackwalking across ++ // adapter frames...this is likely to be the root cause of the ++ // failure with the simpler algorithm below. ++ ++ for (long offset = 0; ++ offset < regionInBytesToSearch; ++ offset += vm.getAddressSize()) { ++ try { ++ Address curSP = sp.addOffsetTo(offset); ++ Frame frame = new LOONGARCH64Frame(curSP, null, pc); ++ RegisterMap map = thread.newRegisterMap(false); ++ while (frame != null) { ++ if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { ++ // We were able to traverse all the way to the ++ // bottommost Java frame. ++ // This sp looks good. Keep it. ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); ++ } ++ setValues(curSP, null, pc); ++ return true; ++ } ++ frame = frame.sender(map); ++ } ++ } catch (Exception e) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); ++ } ++ // Bad SP. Try another. ++ } ++ } ++ ++ // Were not able to find a plausible SP to go with this PC. ++ // Bail out. ++ return false; ++ ++ /* ++ // Original algorithm which does not work because SP was ++ // pointing beyond where it should have: ++ ++ // For the server compiler, EBP is not guaranteed to be valid ++ // for compiled code. We see whether the PC is in the ++ // interpreter and take care of that, otherwise we run code ++ // (unfortunately) duplicated from LOONGARCH64Frame.senderForCompiledFrame. ++ ++ CodeCache cc = vm.getCodeCache(); ++ if (cc.contains(pc)) { ++ CodeBlob cb = cc.findBlob(pc); ++ ++ // See if we can derive a frame pointer from SP and PC ++ // NOTE: This is the code duplicated from LOONGARCH64Frame ++ Address saved_fp = null; ++ int llink_offset = cb.getLinkOffset(); ++ if (llink_offset >= 0) { ++ // Restore base-pointer, since next frame might be an interpreter frame. ++ Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset); ++ saved_fp = fp_addr.getAddressAt(0); ++ } ++ ++ setValues(sp, saved_fp, pc); ++ return true; ++ } ++ */ ++ } ++ } else { ++ // If the current program counter was not known to us as a Java ++ // PC, we currently assume that we are in the run-time system ++ // and attempt to look to thread-local storage for saved ESP and ++ // EBP. Note that if these are null (because we were, in fact, ++ // in Java code, i.e., vtable stubs or similar, and the SA ++ // didn't have enough insight into the target VM to understand ++ // that) then we are going to lose the entire stack trace for ++ // the thread, which is sub-optimal. FIXME. ++ ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + ++ thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); ++ } ++ if (thread.getLastJavaSP() == null) { ++ return false; // No known Java frames on stack ++ } ++ ++ // The runtime has a nasty habit of not saving fp in the frame ++ // anchor, leaving us to grovel about in the stack to find a ++ // plausible address. Fortunately, this only happens in ++ // compiled code; there we always have a valid PC, and we always ++ // push LR and FP onto the stack as a pair, with FP at the lower ++ // address. ++ pc = thread.getLastJavaPC(); ++ fp = thread.getLastJavaFP(); ++ sp = thread.getLastJavaSP(); ++ ++ if (fp == null) { ++ CodeCache cc = vm.getCodeCache(); ++ if (cc.contains(pc)) { ++ CodeBlob cb = cc.findBlob(pc); ++ if (DEBUG) { ++ System.out.println("FP is null. Found blob frame size " + cb.getFrameSize()); ++ } ++ // See if we can derive a frame pointer from SP and PC ++ long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize(); ++ if (link_offset >= 0) { ++ fp = sp.addOffsetTo(link_offset); ++ } ++ } ++ } ++ ++ // We found a PC in the frame anchor. Check that it's plausible, and ++ // if it is, use it. ++ if (vm.isJavaPCDbg(pc)) { ++ setValues(sp, fp, pc); ++ } else { ++ setValues(sp, fp, null); ++ } ++ ++ return true; ++ } ++ } ++ ++ public Address getSP() { return spFound; } ++ public Address getFP() { return fpFound; } ++ /** May be null if getting values from thread-local storage; take ++ care to call the correct LOONGARCH64Frame constructor to recover this if ++ necessary */ ++ public Address getPC() { return pcFound; } ++ ++ private void setValues(Address sp, Address fp, Address pc) { ++ spFound = sp; ++ fpFound = fp; ++ pcFound = pc; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java +new file mode 100644 +index 0000000000..058afc94d0 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java +@@ -0,0 +1,526 @@ ++/* ++ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.loongarch64; ++ ++import java.util.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.compiler.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.oops.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++ ++/** Specialization of and implementation of abstract methods of the ++ Frame class for the loongarch64 family of CPUs. */ ++ ++public class LOONGARCH64Frame extends Frame { ++ private static final boolean DEBUG; ++ static { ++ DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG") != null; ++ } ++ ++ // Java frames ++ private static final int JAVA_FRAME_LINK_OFFSET = 0; ++ private static final int JAVA_FRAME_RETURN_ADDR_OFFSET = 1; ++ private static final int JAVA_FRAME_SENDER_SP_OFFSET = 2; ++ ++ // Native frames ++ private static final int NATIVE_FRAME_LINK_OFFSET = -2; ++ private static final int NATIVE_FRAME_RETURN_ADDR_OFFSET = -1; ++ private static final int NATIVE_FRAME_SENDER_SP_OFFSET = 0; ++ ++ // Interpreter frames ++ private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1; ++ private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_MIRROR_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_MIRROR_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ private static final int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ ++ // Entry frames ++ private static final int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -9; ++ ++ private static VMReg fp = new VMReg(22 << 1); ++ ++ // an additional field beyond sp and pc: ++ Address raw_fp; // frame pointer ++ private Address raw_unextendedSP; ++ ++ private LOONGARCH64Frame() { ++ } ++ ++ private void adjustForDeopt() { ++ if ( pc != null) { ++ // Look for a deopt pc and if it is deopted convert to original pc ++ CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); ++ if (cb != null && cb.isJavaMethod()) { ++ NMethod nm = (NMethod) cb; ++ if (pc.equals(nm.deoptHandlerBegin())) { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); ++ } ++ // adjust pc if frame is deoptimized. ++ pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); ++ deoptimized = true; ++ } ++ } ++ } ++ } ++ ++ public LOONGARCH64Frame(Address raw_sp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("LOONGARCH64Frame(sp, fp, pc): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public LOONGARCH64Frame(Address raw_sp, Address raw_fp) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ this.pc = raw_fp.getAddressAt(1 * VM.getVM().getAddressSize()); ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("LOONGARCH64Frame(sp, fp): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public LOONGARCH64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_unextendedSp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("LOONGARCH64Frame(sp, unextendedSP, fp, pc): " + this); ++ dumpStack(); ++ } ++ ++ } ++ ++ public Object clone() { ++ LOONGARCH64Frame frame = new LOONGARCH64Frame(); ++ frame.raw_sp = raw_sp; ++ frame.raw_unextendedSP = raw_unextendedSP; ++ frame.raw_fp = raw_fp; ++ frame.pc = pc; ++ frame.deoptimized = deoptimized; ++ return frame; ++ } ++ ++ public boolean equals(Object arg) { ++ if (arg == null) { ++ return false; ++ } ++ ++ if (!(arg instanceof LOONGARCH64Frame)) { ++ return false; ++ } ++ ++ LOONGARCH64Frame other = (LOONGARCH64Frame) arg; ++ ++ return (AddressOps.equal(getSP(), other.getSP()) && ++ AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && ++ AddressOps.equal(getFP(), other.getFP()) && ++ AddressOps.equal(getPC(), other.getPC())); ++ } ++ ++ public int hashCode() { ++ if (raw_sp == null) { ++ return 0; ++ } ++ ++ return raw_sp.hashCode(); ++ } ++ ++ public String toString() { ++ return "sp: " + (getSP() == null? "null" : getSP().toString()) + ++ ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + ++ ", fp: " + (getFP() == null? "null" : getFP().toString()) + ++ ", pc: " + (pc == null? "null" : pc.toString()); ++ } ++ ++ // accessors for the instance variables ++ public Address getFP() { return raw_fp; } ++ public Address getSP() { return raw_sp; } ++ public Address getID() { return raw_sp; } ++ ++ // FIXME: not implemented yet (should be done for Solaris/LOONGARCH) ++ public boolean isSignalHandlerFrameDbg() { return false; } ++ public int getSignalNumberDbg() { return 0; } ++ public String getSignalNameDbg() { return null; } ++ ++ public boolean isInterpretedFrameValid() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "Not an interpreted frame"); ++ } ++ ++ // These are reasonable sanity checks ++ if (getFP() == null || getFP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getSP() == null || getSP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { ++ return false; ++ } ++ ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (getFP().lessThanOrEqual(getSP())) { ++ // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { ++ // stack frames shouldn't be large. ++ return false; ++ } ++ ++ return true; ++ } ++ ++ // FIXME: not applicable in current system ++ // void patch_pc(Thread* thread, address pc); ++ ++ public Frame sender(RegisterMap regMap, CodeBlob cb) { ++ LOONGARCH64RegisterMap map = (LOONGARCH64RegisterMap) regMap; ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ map.setIncludeArgumentOops(false); ++ ++ if (isEntryFrame()) return senderForEntryFrame(map); ++ if (isInterpretedFrame()) return senderForInterpreterFrame(map); ++ ++ if(cb == null) { ++ cb = VM.getVM().getCodeCache().findBlob(getPC()); ++ } else { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); ++ } ++ } ++ ++ if (cb != null) { ++ return senderForCompiledFrame(map, cb); ++ } ++ ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return new LOONGARCH64Frame(getSenderSP(), getLink(), getSenderPC()); ++ } ++ ++ private Frame senderForEntryFrame(LOONGARCH64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForEntryFrame"); ++ } ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ LOONGARCH64JavaCallWrapper jcw = (LOONGARCH64JavaCallWrapper) getEntryFrameCallWrapper(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); ++ Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); ++ } ++ LOONGARCH64Frame fr; ++ if (jcw.getLastJavaPC() != null) { ++ fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); ++ } else { ++ fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); ++ } ++ map.clear(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); ++ } ++ return fr; ++ } ++ ++ //------------------------------------------------------------------------------ ++ // frame::adjust_unextended_sp ++ private void adjustUnextendedSP() { ++ // On loongarch, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. ++ ++ CodeBlob cb = cb(); ++ NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); ++ if (senderNm != null) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (senderNm.isDeoptEntry(getPC()) || ++ senderNm.isDeoptMhEntry(getPC())) { ++ // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp)); ++ } ++ } ++ } ++ ++ private Frame senderForInterpreterFrame(LOONGARCH64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForInterpreterFrame"); ++ } ++ Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ Address sp = getSenderSP(); ++ // We do not need to update the callee-save register mapping because above ++ // us is either another interpreter frame or a converter-frame, but never ++ // directly a compiled frame. ++ // 11/24/04 SFG. With the removal of adapter frames this is no longer true. ++ // However c2 no longer uses callee save register for java calls so there ++ // are no callee register to find. ++ ++ if (map.getUpdateMap()) ++ updateMapWithSavedLink(map, addressOfStackSlot(JAVA_FRAME_LINK_OFFSET)); ++ ++ return new LOONGARCH64Frame(sp, unextendedSP, getLink(), getSenderPC()); ++ } ++ ++ private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { ++ map.setLocation(fp, savedFPAddr); ++ } ++ ++ private Frame senderForCompiledFrame(LOONGARCH64RegisterMap map, CodeBlob cb) { ++ if (DEBUG) { ++ System.out.println("senderForCompiledFrame"); ++ } ++ ++ // ++ // NOTE: some of this code is (unfortunately) duplicated in LOONGARCH64CurrentFrameGuess ++ // ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // frame owned by optimizing compiler ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); ++ } ++ Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); ++ ++ // On Intel the return_address is always the word on the stack ++ Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); ++ ++ // This is the saved value of EBP which may or may not really be an FP. ++ // It is only an FP if the sender is an interpreter frame (or C1?). ++ Address savedFPAddr = senderSP.addOffsetTo(- JAVA_FRAME_SENDER_SP_OFFSET * VM.getVM().getAddressSize()); ++ ++ if (map.getUpdateMap()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map.setIncludeArgumentOops(cb.callerMustGCArguments()); ++ ++ if (cb.getOopMaps() != null) { ++ ImmutableOopMapSet.updateRegisterMap(this, cb, map, true); ++ } ++ ++ // Since the prolog does the save and restore of EBP there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ updateMapWithSavedLink(map, savedFPAddr); ++ } ++ ++ return new LOONGARCH64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); ++ } ++ ++ protected boolean hasSenderPD() { ++ // FIXME ++ // Check for null ebp? Need to do some tests. ++ return true; ++ } ++ ++ public long frameSize() { ++ return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); ++ } ++ ++ public Address getLink() { ++ if (isJavaFrame()) ++ return addressOfStackSlot(JAVA_FRAME_LINK_OFFSET).getAddressAt(0); ++ return addressOfStackSlot(NATIVE_FRAME_LINK_OFFSET).getAddressAt(0); ++ } ++ ++ public Address getUnextendedSP() { return raw_unextendedSP; } ++ ++ // Return address: ++ public Address getSenderPCAddr() { ++ if (isJavaFrame()) ++ return addressOfStackSlot(JAVA_FRAME_RETURN_ADDR_OFFSET); ++ return addressOfStackSlot(NATIVE_FRAME_RETURN_ADDR_OFFSET); ++ } ++ ++ public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } ++ ++ public Address getSenderSP() { ++ if (isJavaFrame()) ++ return addressOfStackSlot(JAVA_FRAME_SENDER_SP_OFFSET); ++ return addressOfStackSlot(NATIVE_FRAME_SENDER_SP_OFFSET); ++ } ++ ++ public Address addressOfInterpreterFrameLocals() { ++ return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); ++ } ++ ++ private Address addressOfInterpreterFrameBCX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); ++ } ++ ++ public int getInterpreterFrameBCI() { ++ // FIXME: this is not atomic with respect to GC and is unsuitable ++ // for use in a non-debugging, or reflective, system. Need to ++ // figure out how to express this. ++ Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); ++ Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); ++ Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); ++ return bcpToBci(bcp, method); ++ } ++ ++ public Address addressOfInterpreterFrameMDX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); ++ } ++ ++ // FIXME ++ //inline int frame::interpreter_frame_monitor_size() { ++ // return BasicObjectLock::size(); ++ //} ++ ++ // expression stack ++ // (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++ public Address addressOfInterpreterFrameExpressionStack() { ++ Address monitorEnd = interpreterFrameMonitorEnd().address(); ++ return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); ++ } ++ ++ public int getInterpreterFrameExpressionStackDirection() { return -1; } ++ ++ // top of expression stack ++ public Address addressOfInterpreterFrameTOS() { ++ return getSP(); ++ } ++ ++ /** Expression stack from top down */ ++ public Address addressOfInterpreterFrameTOSAt(int slot) { ++ return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); ++ } ++ ++ public Address getInterpreterFrameSenderSP() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "interpreted frame expected"); ++ } ++ return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ } ++ ++ // Monitors ++ public BasicObjectLock interpreterFrameMonitorBegin() { ++ return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); ++ } ++ ++ public BasicObjectLock interpreterFrameMonitorEnd() { ++ Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); ++ if (Assert.ASSERTS_ENABLED) { ++ // make sure the pointer points inside the frame ++ Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); ++ Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); ++ } ++ return new BasicObjectLock(result); ++ } ++ ++ public int interpreterFrameMonitorSize() { ++ return BasicObjectLock.size(); ++ } ++ ++ // Method ++ public Address addressOfInterpreterFrameMethod() { ++ return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); ++ } ++ ++ // Constant pool cache ++ public Address addressOfInterpreterFrameCPCache() { ++ return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); ++ } ++ ++ // Entry frames ++ public JavaCallWrapper getEntryFrameCallWrapper() { ++ return new LOONGARCH64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); ++ } ++ ++ protected Address addressOfSavedOopResult() { ++ // offset is 2 for compiler2 and 3 for compiler1 ++ return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * ++ VM.getVM().getAddressSize()); ++ } ++ ++ protected Address addressOfSavedReceiver() { ++ return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); ++ } ++ ++ private void dumpStack() { ++ if (getFP() != null) { ++ for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); ++ AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize())); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ } else { ++ for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); ++ AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ } ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java +new file mode 100644 +index 0000000000..0625e10a41 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java +@@ -0,0 +1,57 @@ ++/* ++ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.loongarch64; ++ ++import java.util.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class LOONGARCH64JavaCallWrapper extends JavaCallWrapper { ++ private static AddressField lastJavaFPField; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaFrameAnchor"); ++ ++ lastJavaFPField = type.getAddressField("_last_Java_fp"); ++ } ++ ++ public LOONGARCH64JavaCallWrapper(Address addr) { ++ super(addr); ++ } ++ ++ public Address getLastJavaFP() { ++ return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java +new file mode 100644 +index 0000000000..2cf904d388 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java +@@ -0,0 +1,52 @@ ++/* ++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class LOONGARCH64RegisterMap extends RegisterMap { ++ ++ /** This is the only public constructor */ ++ public LOONGARCH64RegisterMap(JavaThread thread, boolean updateMap) { ++ super(thread, updateMap); ++ } ++ ++ protected LOONGARCH64RegisterMap(RegisterMap map) { ++ super(map); ++ } ++ ++ public Object clone() { ++ LOONGARCH64RegisterMap retval = new LOONGARCH64RegisterMap(this); ++ return retval; ++ } ++ ++ // no PD state to clear or copy: ++ protected void clearPD() {} ++ protected void initializePD() {} ++ protected void initializeFromPD(RegisterMap map) {} ++ protected Address getLocationPD(VMReg reg) { return null; } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java +new file mode 100644 +index 0000000000..c11458abe2 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java +@@ -0,0 +1,217 @@ ++/* ++ * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.interpreter.*; ++import sun.jvm.hotspot.runtime.*; ++ ++/**

Should be able to be used on all mips64 platforms we support ++ (Win32, Solaris/mips64, and soon Linux) to implement JavaThread's ++ "currentFrameGuess()" functionality. Input is an MIPS64ThreadContext; ++ output is SP, FP, and PC for an MIPS64Frame. Instantiation of the ++ MIPS64Frame is left to the caller, since we may need to subclass ++ MIPS64Frame to support signal handler frames on Unix platforms.

++ ++

Algorithm is to walk up the stack within a given range (say, ++ 512K at most) looking for a plausible PC and SP for a Java frame, ++ also considering those coming in from the context. If we find a PC ++ that belongs to the VM (i.e., in generated code like the ++ interpreter or CodeCache) then we try to find an associated EBP. ++ We repeat this until we either find a complete frame or run out of ++ stack to look at.

*/ ++ ++public class MIPS64CurrentFrameGuess { ++ private MIPS64ThreadContext context; ++ private JavaThread thread; ++ private Address spFound; ++ private Address fpFound; ++ private Address pcFound; ++ ++ private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG") ++ != null; ++ ++ public MIPS64CurrentFrameGuess(MIPS64ThreadContext context, ++ JavaThread thread) { ++ this.context = context; ++ this.thread = thread; ++ } ++ ++ /** Returns false if not able to find a frame within a reasonable range. */ ++ public boolean run(long regionInBytesToSearch) { ++ Address sp = context.getRegisterAsAddress(MIPS64ThreadContext.SP); ++ Address pc = context.getRegisterAsAddress(MIPS64ThreadContext.PC); ++ Address fp = context.getRegisterAsAddress(MIPS64ThreadContext.FP); ++ if (sp == null) { ++ // Bail out if no last java frame eithe ++ if (thread.getLastJavaSP() != null) { ++ setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); ++ return true; ++ } ++ // Bail out ++ return false; ++ } ++ Address end = sp.addOffsetTo(regionInBytesToSearch); ++ VM vm = VM.getVM(); ++ ++ setValues(null, null, null); // Assume we're not going to find anything ++ ++ if (vm.isJavaPCDbg(pc)) { ++ if (vm.isClientCompiler()) { ++ // If the topmost frame is a Java frame, we are (pretty much) ++ // guaranteed to have a viable EBP. We should be more robust ++ // than this (we have the potential for losing entire threads' ++ // stack traces) but need to see how much work we really have ++ // to do here. Searching the stack for an (SP, FP) pair is ++ // hard since it's easy to misinterpret inter-frame stack ++ // pointers as base-of-frame pointers; we also don't know the ++ // sizes of C1 frames (not registered in the nmethod) so can't ++ // derive them from ESP. ++ ++ setValues(sp, fp, pc); ++ return true; ++ } else { ++ if (vm.getInterpreter().contains(pc)) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + ++ sp + ", fp = " + fp + ", pc = " + pc); ++ } ++ setValues(sp, fp, pc); ++ return true; ++ } ++ ++ // For the server compiler, EBP is not guaranteed to be valid ++ // for compiled code. In addition, an earlier attempt at a ++ // non-searching algorithm (see below) failed because the ++ // stack pointer from the thread context was pointing ++ // (considerably) beyond the ostensible end of the stack, into ++ // garbage; walking from the topmost frame back caused a crash. ++ // ++ // This algorithm takes the current PC as a given and tries to ++ // find the correct corresponding SP by walking up the stack ++ // and repeatedly performing stackwalks (very inefficient). ++ // ++ // FIXME: there is something wrong with stackwalking across ++ // adapter frames...this is likely to be the root cause of the ++ // failure with the simpler algorithm below. ++ ++ for (long offset = 0; ++ offset < regionInBytesToSearch; ++ offset += vm.getAddressSize()) { ++ try { ++ Address curSP = sp.addOffsetTo(offset); ++ Frame frame = new MIPS64Frame(curSP, null, pc); ++ RegisterMap map = thread.newRegisterMap(false); ++ while (frame != null) { ++ if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { ++ // We were able to traverse all the way to the ++ // bottommost Java frame. ++ // This sp looks good. Keep it. ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); ++ } ++ setValues(curSP, null, pc); ++ return true; ++ } ++ frame = frame.sender(map); ++ } ++ } catch (Exception e) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); ++ } ++ // Bad SP. Try another. ++ } ++ } ++ ++ // Were not able to find a plausible SP to go with this PC. ++ // Bail out. ++ return false; ++ ++ /* ++ // Original algorithm which does not work because SP was ++ // pointing beyond where it should have: ++ ++ // For the server compiler, EBP is not guaranteed to be valid ++ // for compiled code. We see whether the PC is in the ++ // interpreter and take care of that, otherwise we run code ++ // (unfortunately) duplicated from MIPS64Frame.senderForCompiledFrame. ++ ++ CodeCache cc = vm.getCodeCache(); ++ if (cc.contains(pc)) { ++ CodeBlob cb = cc.findBlob(pc); ++ ++ // See if we can derive a frame pointer from SP and PC ++ // NOTE: This is the code duplicated from MIPS64Frame ++ Address saved_fp = null; ++ int llink_offset = cb.getLinkOffset(); ++ if (llink_offset >= 0) { ++ // Restore base-pointer, since next frame might be an interpreter frame. ++ Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset); ++ saved_fp = fp_addr.getAddressAt(0); ++ } ++ ++ setValues(sp, saved_fp, pc); ++ return true; ++ } ++ */ ++ } ++ } else { ++ // If the current program counter was not known to us as a Java ++ // PC, we currently assume that we are in the run-time system ++ // and attempt to look to thread-local storage for saved ESP and ++ // EBP. Note that if these are null (because we were, in fact, ++ // in Java code, i.e., vtable stubs or similar, and the SA ++ // didn't have enough insight into the target VM to understand ++ // that) then we are going to lose the entire stack trace for ++ // the thread, which is sub-optimal. FIXME. ++ ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + ++ thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); ++ } ++ if (thread.getLastJavaSP() == null) { ++ return false; // No known Java frames on stack ++ } ++ setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); ++ return true; ++ } ++ } ++ ++ public Address getSP() { return spFound; } ++ public Address getFP() { return fpFound; } ++ /** May be null if getting values from thread-local storage; take ++ care to call the correct MIPS64Frame constructor to recover this if ++ necessary */ ++ public Address getPC() { return pcFound; } ++ ++ private void setValues(Address sp, Address fp, Address pc) { ++ spFound = sp; ++ fpFound = fp; ++ pcFound = pc; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java +new file mode 100644 +index 0000000000..65d88016ea +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java +@@ -0,0 +1,537 @@ ++/* ++ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.mips64; ++ ++import java.util.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.compiler.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.oops.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++ ++/** Specialization of and implementation of abstract methods of the ++ Frame class for the mips64 family of CPUs. */ ++ ++public class MIPS64Frame extends Frame { ++ private static final boolean DEBUG; ++ static { ++ DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG") != null; ++ } ++ ++ // All frames ++ private static final int LINK_OFFSET = 0; ++ private static final int RETURN_ADDR_OFFSET = 1; ++ private static final int SENDER_SP_OFFSET = 2; ++ ++ // Interpreter frames ++ private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1; ++ private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; ++ private static int INTERPRETER_FRAME_MIRROR_OFFSET; ++ private static int INTERPRETER_FRAME_MDX_OFFSET; // Non-core builds only ++ private static int INTERPRETER_FRAME_CACHE_OFFSET; ++ private static int INTERPRETER_FRAME_LOCALS_OFFSET; ++ private static int INTERPRETER_FRAME_BCX_OFFSET; ++ private static int INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ private static int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET; ++ private static int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET; ++ ++ // Entry frames ++ private static int ENTRY_FRAME_CALL_WRAPPER_OFFSET; ++ ++ private static VMReg rbp; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ INTERPRETER_FRAME_MIRROR_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1; ++ INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_MIRROR_OFFSET - 1; ++ INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; ++ INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; ++ INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; ++ INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; ++ INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ ++ ENTRY_FRAME_CALL_WRAPPER_OFFSET = db.lookupIntConstant("frame::entry_frame_call_wrapper_offset"); ++ if (VM.getVM().getAddressSize() == 4) { ++ rbp = new VMReg(5); ++ } else { ++ rbp = new VMReg(5 << 1); ++ } ++ } ++ ++ ++ // an additional field beyond sp and pc: ++ Address raw_fp; // frame pointer ++ private Address raw_unextendedSP; ++ ++ private MIPS64Frame() { ++ } ++ ++ private void adjustForDeopt() { ++ if ( pc != null) { ++ // Look for a deopt pc and if it is deopted convert to original pc ++ CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); ++ if (cb != null && cb.isJavaMethod()) { ++ NMethod nm = (NMethod) cb; ++ if (pc.equals(nm.deoptHandlerBegin())) { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); ++ } ++ // adjust pc if frame is deoptimized. ++ pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); ++ deoptimized = true; ++ } ++ } ++ } ++ } ++ ++ public MIPS64Frame(Address raw_sp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("MIPS64Frame(sp, fp, pc): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public MIPS64Frame(Address raw_sp, Address raw_fp) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ this.pc = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize()); ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("MIPS64Frame(sp, fp): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public MIPS64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_unextendedSp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("MIPS64Frame(sp, unextendedSP, fp, pc): " + this); ++ dumpStack(); ++ } ++ ++ } ++ ++ public Object clone() { ++ MIPS64Frame frame = new MIPS64Frame(); ++ frame.raw_sp = raw_sp; ++ frame.raw_unextendedSP = raw_unextendedSP; ++ frame.raw_fp = raw_fp; ++ frame.pc = pc; ++ frame.deoptimized = deoptimized; ++ return frame; ++ } ++ ++ public boolean equals(Object arg) { ++ if (arg == null) { ++ return false; ++ } ++ ++ if (!(arg instanceof MIPS64Frame)) { ++ return false; ++ } ++ ++ MIPS64Frame other = (MIPS64Frame) arg; ++ ++ return (AddressOps.equal(getSP(), other.getSP()) && ++ AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && ++ AddressOps.equal(getFP(), other.getFP()) && ++ AddressOps.equal(getPC(), other.getPC())); ++ } ++ ++ public int hashCode() { ++ if (raw_sp == null) { ++ return 0; ++ } ++ ++ return raw_sp.hashCode(); ++ } ++ ++ public String toString() { ++ return "sp: " + (getSP() == null? "null" : getSP().toString()) + ++ ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + ++ ", fp: " + (getFP() == null? "null" : getFP().toString()) + ++ ", pc: " + (pc == null? "null" : pc.toString()); ++ } ++ ++ // accessors for the instance variables ++ public Address getFP() { return raw_fp; } ++ public Address getSP() { return raw_sp; } ++ public Address getID() { return raw_sp; } ++ ++ // FIXME: not implemented yet (should be done for Solaris/MIPS) ++ public boolean isSignalHandlerFrameDbg() { return false; } ++ public int getSignalNumberDbg() { return 0; } ++ public String getSignalNameDbg() { return null; } ++ ++ public boolean isInterpretedFrameValid() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "Not an interpreted frame"); ++ } ++ ++ // These are reasonable sanity checks ++ if (getFP() == null || getFP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getSP() == null || getSP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { ++ return false; ++ } ++ ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (getFP().lessThanOrEqual(getSP())) { ++ // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { ++ // stack frames shouldn't be large. ++ return false; ++ } ++ ++ return true; ++ } ++ ++ // FIXME: not applicable in current system ++ // void patch_pc(Thread* thread, address pc); ++ ++ public Frame sender(RegisterMap regMap, CodeBlob cb) { ++ MIPS64RegisterMap map = (MIPS64RegisterMap) regMap; ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ map.setIncludeArgumentOops(false); ++ ++ if (isEntryFrame()) return senderForEntryFrame(map); ++ if (isInterpretedFrame()) return senderForInterpreterFrame(map); ++ ++ if(cb == null) { ++ cb = VM.getVM().getCodeCache().findBlob(getPC()); ++ } else { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); ++ } ++ } ++ ++ if (cb != null) { ++ return senderForCompiledFrame(map, cb); ++ } ++ ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return new MIPS64Frame(getSenderSP(), getLink(), getSenderPC()); ++ } ++ ++ private Frame senderForEntryFrame(MIPS64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForEntryFrame"); ++ } ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ MIPS64JavaCallWrapper jcw = (MIPS64JavaCallWrapper) getEntryFrameCallWrapper(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); ++ Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); ++ } ++ MIPS64Frame fr; ++ if (jcw.getLastJavaPC() != null) { ++ fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); ++ } else { ++ fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); ++ } ++ map.clear(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); ++ } ++ return fr; ++ } ++ ++ //------------------------------------------------------------------------------ ++ // frame::adjust_unextended_sp ++ private void adjustUnextendedSP() { ++ // On mips, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. ++ ++ CodeBlob cb = cb(); ++ NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); ++ if (senderNm != null) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (senderNm.isDeoptEntry(getPC()) || ++ senderNm.isDeoptMhEntry(getPC())) { ++ // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp)); ++ } ++ } ++ } ++ ++ private Frame senderForInterpreterFrame(MIPS64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForInterpreterFrame"); ++ } ++ Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ Address sp = addressOfStackSlot(SENDER_SP_OFFSET); ++ // We do not need to update the callee-save register mapping because above ++ // us is either another interpreter frame or a converter-frame, but never ++ // directly a compiled frame. ++ // 11/24/04 SFG. With the removal of adapter frames this is no longer true. ++ // However c2 no longer uses callee save register for java calls so there ++ // are no callee register to find. ++ ++ if (map.getUpdateMap()) ++ updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET)); ++ ++ return new MIPS64Frame(sp, unextendedSP, getLink(), getSenderPC()); ++ } ++ ++ private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { ++ map.setLocation(rbp, savedFPAddr); ++ } ++ ++ private Frame senderForCompiledFrame(MIPS64RegisterMap map, CodeBlob cb) { ++ if (DEBUG) { ++ System.out.println("senderForCompiledFrame"); ++ } ++ ++ // ++ // NOTE: some of this code is (unfortunately) duplicated in MIPS64CurrentFrameGuess ++ // ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // frame owned by optimizing compiler ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); ++ } ++ Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); ++ ++ // On Intel the return_address is always the word on the stack ++ Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); ++ ++ // This is the saved value of EBP which may or may not really be an FP. ++ // It is only an FP if the sender is an interpreter frame (or C1?). ++ Address savedFPAddr = senderSP.addOffsetTo(- SENDER_SP_OFFSET * VM.getVM().getAddressSize()); ++ ++ if (map.getUpdateMap()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map.setIncludeArgumentOops(cb.callerMustGCArguments()); ++ ++ if (cb.getOopMaps() != null) { ++ ImmutableOopMapSet.updateRegisterMap(this, cb, map, true); ++ } ++ ++ // Since the prolog does the save and restore of EBP there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ updateMapWithSavedLink(map, savedFPAddr); ++ } ++ ++ return new MIPS64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); ++ } ++ ++ protected boolean hasSenderPD() { ++ // FIXME ++ // Check for null ebp? Need to do some tests. ++ return true; ++ } ++ ++ public long frameSize() { ++ return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); ++ } ++ ++ public Address getLink() { ++ return addressOfStackSlot(LINK_OFFSET).getAddressAt(0); ++ } ++ ++ public Address getUnextendedSP() { return raw_unextendedSP; } ++ ++ // Return address: ++ public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); } ++ public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } ++ ++ public Address getSenderSP() { return addressOfStackSlot(SENDER_SP_OFFSET); } ++ ++ public Address addressOfInterpreterFrameLocals() { ++ return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); ++ } ++ ++ private Address addressOfInterpreterFrameBCX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); ++ } ++ ++ public int getInterpreterFrameBCI() { ++ // FIXME: this is not atomic with respect to GC and is unsuitable ++ // for use in a non-debugging, or reflective, system. Need to ++ // figure out how to express this. ++ Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); ++ Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); ++ Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); ++ return bcpToBci(bcp, method); ++ } ++ ++ public Address addressOfInterpreterFrameMDX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); ++ } ++ ++ // FIXME ++ //inline int frame::interpreter_frame_monitor_size() { ++ // return BasicObjectLock::size(); ++ //} ++ ++ // expression stack ++ // (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++ public Address addressOfInterpreterFrameExpressionStack() { ++ Address monitorEnd = interpreterFrameMonitorEnd().address(); ++ return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); ++ } ++ ++ public int getInterpreterFrameExpressionStackDirection() { return -1; } ++ ++ // top of expression stack ++ public Address addressOfInterpreterFrameTOS() { ++ return getSP(); ++ } ++ ++ /** Expression stack from top down */ ++ public Address addressOfInterpreterFrameTOSAt(int slot) { ++ return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); ++ } ++ ++ public Address getInterpreterFrameSenderSP() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "interpreted frame expected"); ++ } ++ return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ } ++ ++ // Monitors ++ public BasicObjectLock interpreterFrameMonitorBegin() { ++ return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); ++ } ++ ++ public BasicObjectLock interpreterFrameMonitorEnd() { ++ Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); ++ if (Assert.ASSERTS_ENABLED) { ++ // make sure the pointer points inside the frame ++ Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); ++ Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); ++ } ++ return new BasicObjectLock(result); ++ } ++ ++ public int interpreterFrameMonitorSize() { ++ return BasicObjectLock.size(); ++ } ++ ++ // Method ++ public Address addressOfInterpreterFrameMethod() { ++ return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); ++ } ++ ++ // Constant pool cache ++ public Address addressOfInterpreterFrameCPCache() { ++ return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); ++ } ++ ++ // Entry frames ++ public JavaCallWrapper getEntryFrameCallWrapper() { ++ return new MIPS64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); ++ } ++ ++ protected Address addressOfSavedOopResult() { ++ // offset is 2 for compiler2 and 3 for compiler1 ++ return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * ++ VM.getVM().getAddressSize()); ++ } ++ ++ protected Address addressOfSavedReceiver() { ++ return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); ++ } ++ ++ private void dumpStack() { ++ if (getFP() != null) { ++ for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); ++ AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize())); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ } else { ++ for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); ++ AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ } ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java +new file mode 100644 +index 0000000000..dfe3066af0 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java +@@ -0,0 +1,57 @@ ++/* ++ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.mips64; ++ ++import java.util.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class MIPS64JavaCallWrapper extends JavaCallWrapper { ++ private static AddressField lastJavaFPField; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaFrameAnchor"); ++ ++ lastJavaFPField = type.getAddressField("_last_Java_fp"); ++ } ++ ++ public MIPS64JavaCallWrapper(Address addr) { ++ super(addr); ++ } ++ ++ public Address getLastJavaFP() { ++ return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java +new file mode 100644 +index 0000000000..f2da760af4 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java +@@ -0,0 +1,52 @@ ++/* ++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class MIPS64RegisterMap extends RegisterMap { ++ ++ /** This is the only public constructor */ ++ public MIPS64RegisterMap(JavaThread thread, boolean updateMap) { ++ super(thread, updateMap); ++ } ++ ++ protected MIPS64RegisterMap(RegisterMap map) { ++ super(map); ++ } ++ ++ public Object clone() { ++ MIPS64RegisterMap retval = new MIPS64RegisterMap(this); ++ return retval; ++ } ++ ++ // no PD state to clear or copy: ++ protected void clearPD() {} ++ protected void initializePD() {} ++ protected void initializeFromPD(RegisterMap map) {} ++ protected Address getLocationPD(VMReg reg) { return null; } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +index 7d7a6107ca..06d79318d9 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +@@ -22,6 +22,13 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ * ++ */ ++ + package sun.jvm.hotspot.utilities; + + /** Provides canonicalized OS and CPU information for the rest of the +@@ -54,7 +61,7 @@ public class PlatformInfo { + + public static boolean knownCPU(String cpu) { + final String[] KNOWN = +- new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"}; ++ new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "mips64", "mips64el", "loongarch64"}; + + for(String s : KNOWN) { + if(s.equals(cpu)) +@@ -101,6 +108,12 @@ public class PlatformInfo { + if (cpu.equals("ppc64le")) + return "ppc64"; + ++ if (cpu.equals("mips64el")) ++ return "mips64"; ++ ++ if (cpu.equals("loongarch64")) ++ return "loongarch64"; ++ + return cpu; + + } +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java +new file mode 100644 +index 0000000000..0d3953ddff +--- /dev/null ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java +@@ -0,0 +1,220 @@ ++/* ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package jdk.vm.ci.hotspot.loongarch64; ++ ++import static java.util.Collections.emptyMap; ++import static jdk.vm.ci.common.InitTimer.timer; ++ ++import java.util.EnumSet; ++import java.util.Map; ++ ++import jdk.vm.ci.loongarch64.LoongArch64; ++import jdk.vm.ci.loongarch64.LoongArch64.CPUFeature; ++import jdk.vm.ci.code.Architecture; ++import jdk.vm.ci.code.RegisterConfig; ++import jdk.vm.ci.code.TargetDescription; ++import jdk.vm.ci.code.stack.StackIntrospection; ++import jdk.vm.ci.common.InitTimer; ++import jdk.vm.ci.hotspot.HotSpotCodeCacheProvider; ++import jdk.vm.ci.hotspot.HotSpotConstantReflectionProvider; ++import jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory; ++import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime; ++import jdk.vm.ci.hotspot.HotSpotMetaAccessProvider; ++import jdk.vm.ci.hotspot.HotSpotStackIntrospection; ++import jdk.vm.ci.meta.ConstantReflectionProvider; ++import jdk.vm.ci.runtime.JVMCIBackend; ++ ++public class LoongArch64HotSpotJVMCIBackendFactory implements HotSpotJVMCIBackendFactory { ++ ++ protected EnumSet computeFeatures(@SuppressWarnings("unused") LoongArch64HotSpotVMConfig config) { ++ // Configure the feature set using the HotSpot flag settings. ++ EnumSet features = EnumSet.noneOf(LoongArch64.CPUFeature.class); ++ ++ if ((config.vmVersionFeatures & config.loongarch64LA32) != 0) { ++ features.add(LoongArch64.CPUFeature.LA32); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LA64) != 0) { ++ features.add(LoongArch64.CPUFeature.LA64); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LLEXC) != 0) { ++ features.add(LoongArch64.CPUFeature.LLEXC); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64SCDLY) != 0) { ++ features.add(LoongArch64.CPUFeature.SCDLY); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LLDBAR) != 0) { ++ features.add(LoongArch64.CPUFeature.LLDBAR); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LBT_X86) != 0) { ++ features.add(LoongArch64.CPUFeature.LBT_X86); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LBT_ARM) != 0) { ++ features.add(LoongArch64.CPUFeature.LBT_ARM); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LBT_MIPS) != 0) { ++ features.add(LoongArch64.CPUFeature.LBT_MIPS); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64CCDMA) != 0) { ++ features.add(LoongArch64.CPUFeature.CCDMA); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64COMPLEX) != 0) { ++ features.add(LoongArch64.CPUFeature.COMPLEX); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64FP) != 0) { ++ features.add(LoongArch64.CPUFeature.FP); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64CRYPTO) != 0) { ++ features.add(LoongArch64.CPUFeature.CRYPTO); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LSX) != 0) { ++ features.add(LoongArch64.CPUFeature.LSX); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LASX) != 0) { ++ features.add(LoongArch64.CPUFeature.LASX); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LAM) != 0) { ++ features.add(LoongArch64.CPUFeature.LAM); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LLSYNC) != 0) { ++ features.add(LoongArch64.CPUFeature.LLSYNC); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64TGTSYNC) != 0) { ++ features.add(LoongArch64.CPUFeature.TGTSYNC); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64ULSYNC) != 0) { ++ features.add(LoongArch64.CPUFeature.ULSYNC); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64UAL) != 0) { ++ features.add(LoongArch64.CPUFeature.UAL); ++ } ++ ++ return features; ++ } ++ ++ protected EnumSet computeFlags(@SuppressWarnings("unused") LoongArch64HotSpotVMConfig config) { ++ EnumSet flags = EnumSet.noneOf(LoongArch64.Flag.class); ++ ++ if (config.useLSX) { ++ flags.add(LoongArch64.Flag.useLSX); ++ } ++ ++ if (config.useLASX) { ++ flags.add(LoongArch64.Flag.useLASX); ++ } ++ ++ return flags; ++ } ++ ++ protected TargetDescription createTarget(LoongArch64HotSpotVMConfig config) { ++ final int stackFrameAlignment = 16; ++ final int implicitNullCheckLimit = 4096; ++ final boolean inlineObjects = true; ++ Architecture arch = new LoongArch64(computeFeatures(config), computeFlags(config)); ++ return new TargetDescription(arch, true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects); ++ } ++ ++ protected HotSpotConstantReflectionProvider createConstantReflection(HotSpotJVMCIRuntime runtime) { ++ return new HotSpotConstantReflectionProvider(runtime); ++ } ++ ++ protected RegisterConfig createRegisterConfig(LoongArch64HotSpotVMConfig config, TargetDescription target) { ++ return new LoongArch64HotSpotRegisterConfig(target, config.useCompressedOops); ++ } ++ ++ protected HotSpotCodeCacheProvider createCodeCache(HotSpotJVMCIRuntime runtime, TargetDescription target, RegisterConfig regConfig) { ++ return new HotSpotCodeCacheProvider(runtime, runtime.getConfig(), target, regConfig); ++ } ++ ++ protected HotSpotMetaAccessProvider createMetaAccess(HotSpotJVMCIRuntime runtime) { ++ return new HotSpotMetaAccessProvider(runtime); ++ } ++ ++ @Override ++ public String getArchitecture() { ++ return "loongarch64"; ++ } ++ ++ @Override ++ public String toString() { ++ return "JVMCIBackend:" + getArchitecture(); ++ } ++ ++ @Override ++ @SuppressWarnings("try") ++ public JVMCIBackend createJVMCIBackend(HotSpotJVMCIRuntime runtime, JVMCIBackend host) { ++ ++ assert host == null; ++ LoongArch64HotSpotVMConfig config = new LoongArch64HotSpotVMConfig(runtime.getConfigStore()); ++ TargetDescription target = createTarget(config); ++ ++ RegisterConfig regConfig; ++ HotSpotCodeCacheProvider codeCache; ++ ConstantReflectionProvider constantReflection; ++ HotSpotMetaAccessProvider metaAccess; ++ StackIntrospection stackIntrospection; ++ try (InitTimer t = timer("create providers")) { ++ try (InitTimer rt = timer("create MetaAccess provider")) { ++ metaAccess = createMetaAccess(runtime); ++ } ++ try (InitTimer rt = timer("create RegisterConfig")) { ++ regConfig = createRegisterConfig(config, target); ++ } ++ try (InitTimer rt = timer("create CodeCache provider")) { ++ codeCache = createCodeCache(runtime, target, regConfig); ++ } ++ try (InitTimer rt = timer("create ConstantReflection provider")) { ++ constantReflection = createConstantReflection(runtime); ++ } ++ try (InitTimer rt = timer("create StackIntrospection provider")) { ++ stackIntrospection = new HotSpotStackIntrospection(runtime); ++ } ++ } ++ try (InitTimer rt = timer("instantiate backend")) { ++ return createBackend(metaAccess, codeCache, constantReflection, stackIntrospection); ++ } ++ } ++ ++ protected JVMCIBackend createBackend(HotSpotMetaAccessProvider metaAccess, HotSpotCodeCacheProvider codeCache, ConstantReflectionProvider constantReflection, ++ StackIntrospection stackIntrospection) { ++ return new JVMCIBackend(metaAccess, codeCache, constantReflection, stackIntrospection); ++ } ++} +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java +new file mode 100644 +index 0000000000..2ee6a4b847 +--- /dev/null ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java +@@ -0,0 +1,297 @@ ++/* ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package jdk.vm.ci.hotspot.loongarch64; ++ ++import static jdk.vm.ci.loongarch64.LoongArch64.ra; ++import static jdk.vm.ci.loongarch64.LoongArch64.a0; ++import static jdk.vm.ci.loongarch64.LoongArch64.a1; ++import static jdk.vm.ci.loongarch64.LoongArch64.a2; ++import static jdk.vm.ci.loongarch64.LoongArch64.a3; ++import static jdk.vm.ci.loongarch64.LoongArch64.a4; ++import static jdk.vm.ci.loongarch64.LoongArch64.a5; ++import static jdk.vm.ci.loongarch64.LoongArch64.a6; ++import static jdk.vm.ci.loongarch64.LoongArch64.a7; ++import static jdk.vm.ci.loongarch64.LoongArch64.SCR1; ++import static jdk.vm.ci.loongarch64.LoongArch64.SCR2; ++import static jdk.vm.ci.loongarch64.LoongArch64.t0; ++import static jdk.vm.ci.loongarch64.LoongArch64.v0; ++import static jdk.vm.ci.loongarch64.LoongArch64.s5; ++import static jdk.vm.ci.loongarch64.LoongArch64.s6; ++import static jdk.vm.ci.loongarch64.LoongArch64.sp; ++import static jdk.vm.ci.loongarch64.LoongArch64.fp; ++import static jdk.vm.ci.loongarch64.LoongArch64.tp; ++import static jdk.vm.ci.loongarch64.LoongArch64.rx; ++import static jdk.vm.ci.loongarch64.LoongArch64.f0; ++import static jdk.vm.ci.loongarch64.LoongArch64.f1; ++import static jdk.vm.ci.loongarch64.LoongArch64.f2; ++import static jdk.vm.ci.loongarch64.LoongArch64.f3; ++import static jdk.vm.ci.loongarch64.LoongArch64.f4; ++import static jdk.vm.ci.loongarch64.LoongArch64.f5; ++import static jdk.vm.ci.loongarch64.LoongArch64.f6; ++import static jdk.vm.ci.loongarch64.LoongArch64.f7; ++import static jdk.vm.ci.loongarch64.LoongArch64.fv0; ++import static jdk.vm.ci.loongarch64.LoongArch64.zero; ++ ++import java.util.ArrayList; ++import java.util.HashSet; ++import java.util.List; ++import java.util.Set; ++ ++import jdk.vm.ci.loongarch64.LoongArch64; ++import jdk.vm.ci.code.Architecture; ++import jdk.vm.ci.code.CallingConvention; ++import jdk.vm.ci.code.CallingConvention.Type; ++import jdk.vm.ci.code.Register; ++import jdk.vm.ci.code.RegisterArray; ++import jdk.vm.ci.code.RegisterAttributes; ++import jdk.vm.ci.code.RegisterConfig; ++import jdk.vm.ci.code.StackSlot; ++import jdk.vm.ci.code.TargetDescription; ++import jdk.vm.ci.code.ValueKindFactory; ++import jdk.vm.ci.common.JVMCIError; ++import jdk.vm.ci.hotspot.HotSpotCallingConventionType; ++import jdk.vm.ci.meta.AllocatableValue; ++import jdk.vm.ci.meta.JavaKind; ++import jdk.vm.ci.meta.JavaType; ++import jdk.vm.ci.meta.PlatformKind; ++import jdk.vm.ci.meta.Value; ++import jdk.vm.ci.meta.ValueKind; ++ ++public class LoongArch64HotSpotRegisterConfig implements RegisterConfig { ++ ++ private final TargetDescription target; ++ ++ private final RegisterArray allocatable; ++ ++ /** ++ * The caller saved registers always include all parameter registers. ++ */ ++ private final RegisterArray callerSaved; ++ ++ private final boolean allAllocatableAreCallerSaved; ++ ++ private final RegisterAttributes[] attributesMap; ++ ++ @Override ++ public RegisterArray getAllocatableRegisters() { ++ return allocatable; ++ } ++ ++ @Override ++ public RegisterArray filterAllocatableRegisters(PlatformKind kind, RegisterArray registers) { ++ ArrayList list = new ArrayList<>(); ++ for (Register reg : registers) { ++ if (target.arch.canStoreValue(reg.getRegisterCategory(), kind)) { ++ list.add(reg); ++ } ++ } ++ ++ return new RegisterArray(list); ++ } ++ ++ @Override ++ public RegisterAttributes[] getAttributesMap() { ++ return attributesMap.clone(); ++ } ++ ++ private final RegisterArray javaGeneralParameterRegisters = new RegisterArray(t0, a0, a1, a2, a3, a4, a5, a6, a7); ++ private final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(a0, a1, a2, a3, a4, a5, a6, a7); ++ private final RegisterArray floatParameterRegisters = new RegisterArray(f0, f1, f2, f3, f4, f5, f6, f7); ++ ++ public static final Register heapBaseRegister = s5; ++ public static final Register TREG = s6; ++ ++ private static final RegisterArray reservedRegisters = new RegisterArray(fp, ra, zero, sp, tp, rx, SCR1, SCR2, TREG); ++ ++ private static RegisterArray initAllocatable(Architecture arch, boolean reserveForHeapBase) { ++ RegisterArray allRegisters = arch.getAvailableValueRegisters(); ++ Register[] registers = new Register[allRegisters.size() - reservedRegisters.size() - (reserveForHeapBase ? 1 : 0)]; ++ List reservedRegistersList = reservedRegisters.asList(); ++ ++ int idx = 0; ++ for (Register reg : allRegisters) { ++ if (reservedRegistersList.contains(reg)) { ++ // skip reserved registers ++ continue; ++ } ++ if (reserveForHeapBase && reg.equals(heapBaseRegister)) { ++ // skip heap base register ++ continue; ++ } ++ ++ registers[idx++] = reg; ++ } ++ ++ assert idx == registers.length; ++ return new RegisterArray(registers); ++ } ++ ++ public LoongArch64HotSpotRegisterConfig(TargetDescription target, boolean useCompressedOops) { ++ this(target, initAllocatable(target.arch, useCompressedOops)); ++ assert callerSaved.size() >= allocatable.size(); ++ } ++ ++ public LoongArch64HotSpotRegisterConfig(TargetDescription target, RegisterArray allocatable) { ++ this.target = target; ++ ++ this.allocatable = allocatable; ++ Set callerSaveSet = new HashSet<>(); ++ allocatable.addTo(callerSaveSet); ++ floatParameterRegisters.addTo(callerSaveSet); ++ javaGeneralParameterRegisters.addTo(callerSaveSet); ++ nativeGeneralParameterRegisters.addTo(callerSaveSet); ++ callerSaved = new RegisterArray(callerSaveSet); ++ ++ allAllocatableAreCallerSaved = true; ++ attributesMap = RegisterAttributes.createMap(this, LoongArch64.allRegisters); ++ } ++ ++ @Override ++ public RegisterArray getCallerSaveRegisters() { ++ return callerSaved; ++ } ++ ++ @Override ++ public RegisterArray getCalleeSaveRegisters() { ++ return null; ++ } ++ ++ @Override ++ public boolean areAllAllocatableRegistersCallerSaved() { ++ return allAllocatableAreCallerSaved; ++ } ++ ++ @Override ++ public CallingConvention getCallingConvention(Type type, JavaType returnType, JavaType[] parameterTypes, ValueKindFactory valueKindFactory) { ++ HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type; ++ if (type == HotSpotCallingConventionType.NativeCall) { ++ return callingConvention(nativeGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory); ++ } ++ // On x64, parameter locations are the same whether viewed ++ // from the caller or callee perspective ++ return callingConvention(javaGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory); ++ } ++ ++ @Override ++ public RegisterArray getCallingConventionRegisters(Type type, JavaKind kind) { ++ HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type; ++ switch (kind) { ++ case Boolean: ++ case Byte: ++ case Short: ++ case Char: ++ case Int: ++ case Long: ++ case Object: ++ return hotspotType == HotSpotCallingConventionType.NativeCall ? nativeGeneralParameterRegisters : javaGeneralParameterRegisters; ++ case Float: ++ case Double: ++ return floatParameterRegisters; ++ default: ++ throw JVMCIError.shouldNotReachHere(); ++ } ++ } ++ ++ private CallingConvention callingConvention(RegisterArray generalParameterRegisters, JavaType returnType, JavaType[] parameterTypes, HotSpotCallingConventionType type, ++ ValueKindFactory valueKindFactory) { ++ AllocatableValue[] locations = new AllocatableValue[parameterTypes.length]; ++ ++ int currentGeneral = 0; ++ int currentFloat = 0; ++ int currentStackOffset = 0; ++ ++ for (int i = 0; i < parameterTypes.length; i++) { ++ final JavaKind kind = parameterTypes[i].getJavaKind().getStackKind(); ++ ++ switch (kind) { ++ case Byte: ++ case Boolean: ++ case Short: ++ case Char: ++ case Int: ++ case Long: ++ case Object: ++ if (currentGeneral < generalParameterRegisters.size()) { ++ Register register = generalParameterRegisters.get(currentGeneral++); ++ locations[i] = register.asValue(valueKindFactory.getValueKind(kind)); ++ } ++ break; ++ case Float: ++ case Double: ++ if (currentFloat < floatParameterRegisters.size()) { ++ Register register = floatParameterRegisters.get(currentFloat++); ++ locations[i] = register.asValue(valueKindFactory.getValueKind(kind)); ++ } else if (currentGeneral < generalParameterRegisters.size()) { ++ Register register = generalParameterRegisters.get(currentGeneral++); ++ locations[i] = register.asValue(valueKindFactory.getValueKind(kind)); ++ } ++ break; ++ default: ++ throw JVMCIError.shouldNotReachHere(); ++ } ++ ++ if (locations[i] == null) { ++ ValueKind valueKind = valueKindFactory.getValueKind(kind); ++ locations[i] = StackSlot.get(valueKind, currentStackOffset, !type.out); ++ currentStackOffset += Math.max(valueKind.getPlatformKind().getSizeInBytes(), target.wordSize); ++ } ++ } ++ ++ JavaKind returnKind = returnType == null ? JavaKind.Void : returnType.getJavaKind(); ++ AllocatableValue returnLocation = returnKind == JavaKind.Void ? Value.ILLEGAL : getReturnRegister(returnKind).asValue(valueKindFactory.getValueKind(returnKind.getStackKind())); ++ return new CallingConvention(currentStackOffset, returnLocation, locations); ++ } ++ ++ @Override ++ public Register getReturnRegister(JavaKind kind) { ++ switch (kind) { ++ case Boolean: ++ case Byte: ++ case Char: ++ case Short: ++ case Int: ++ case Long: ++ case Object: ++ return v0; ++ case Float: ++ case Double: ++ return fv0; ++ case Void: ++ case Illegal: ++ return null; ++ default: ++ throw new UnsupportedOperationException("no return register for type " + kind); ++ } ++ } ++ ++ @Override ++ public Register getFrameRegister() { ++ return sp; ++ } ++ ++ @Override ++ public String toString() { ++ return String.format("Allocatable: " + getAllocatableRegisters() + "%n" + "CallerSave: " + getCallerSaveRegisters() + "%n"); ++ } ++} +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java +new file mode 100644 +index 0000000000..c8605976a0 +--- /dev/null ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java +@@ -0,0 +1,77 @@ ++/* ++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package jdk.vm.ci.hotspot.loongarch64; ++ ++import jdk.vm.ci.hotspot.HotSpotVMConfigAccess; ++import jdk.vm.ci.hotspot.HotSpotVMConfigStore; ++import jdk.vm.ci.services.Services; ++ ++/** ++ * Used to access native configuration details. ++ * ++ * All non-static, public fields in this class are so that they can be compiled as constants. ++ */ ++class LoongArch64HotSpotVMConfig extends HotSpotVMConfigAccess { ++ ++ LoongArch64HotSpotVMConfig(HotSpotVMConfigStore config) { ++ super(config); ++ } ++ ++ final boolean useCompressedOops = getFlag("UseCompressedOops", Boolean.class); ++ ++ // CPU Capabilities ++ ++ /* ++ * These flags are set based on the corresponding command line flags. ++ */ ++ final boolean useLSX = getFlag("UseLSX", Boolean.class); ++ final boolean useLASX = getFlag("UseLASX", Boolean.class); ++ ++ final long vmVersionFeatures = getFieldValue("Abstract_VM_Version::_features", Long.class, "uint64_t"); ++ ++ /* ++ * These flags are set if the corresponding support is in the hardware. ++ */ ++ // Checkstyle: stop ++ final long loongarch64LA32 = getConstant("VM_Version::CPU_LA32", Long.class); ++ final long loongarch64LA64 = getConstant("VM_Version::CPU_LA64", Long.class); ++ final long loongarch64LLEXC = getConstant("VM_Version::CPU_LLEXC", Long.class); ++ final long loongarch64SCDLY = getConstant("VM_Version::CPU_SCDLY", Long.class); ++ final long loongarch64LLDBAR = getConstant("VM_Version::CPU_LLDBAR", Long.class); ++ final long loongarch64LBT_X86 = getConstant("VM_Version::CPU_LBT_X86", Long.class); ++ final long loongarch64LBT_ARM = getConstant("VM_Version::CPU_LBT_ARM", Long.class); ++ final long loongarch64LBT_MIPS = getConstant("VM_Version::CPU_LBT_MIPS", Long.class); ++ final long loongarch64CCDMA = getConstant("VM_Version::CPU_CCDMA", Long.class); ++ final long loongarch64COMPLEX = getConstant("VM_Version::CPU_COMPLEX", Long.class); ++ final long loongarch64FP = getConstant("VM_Version::CPU_FP", Long.class); ++ final long loongarch64CRYPTO = getConstant("VM_Version::CPU_CRYPTO", Long.class); ++ final long loongarch64LSX = getConstant("VM_Version::CPU_LSX", Long.class); ++ final long loongarch64LASX = getConstant("VM_Version::CPU_LASX", Long.class); ++ final long loongarch64LAM = getConstant("VM_Version::CPU_LAM", Long.class); ++ final long loongarch64LLSYNC = getConstant("VM_Version::CPU_LLSYNC", Long.class); ++ final long loongarch64TGTSYNC = getConstant("VM_Version::CPU_TGTSYNC", Long.class); ++ final long loongarch64ULSYNC = getConstant("VM_Version::CPU_ULSYNC", Long.class); ++ final long loongarch64UAL = getConstant("VM_Version::CPU_UAL", Long.class); ++ // Checkstyle: resume ++} +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java +new file mode 100644 +index 0000000000..1048ea9d64 +--- /dev/null ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java +@@ -0,0 +1,28 @@ ++/* ++ * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++/** ++ * The LoongArch64 HotSpot specific portions of the JVMCI API. ++ */ ++package jdk.vm.ci.hotspot.loongarch64; +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java +new file mode 100644 +index 0000000000..1bb12e7a5f +--- /dev/null ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java +@@ -0,0 +1,247 @@ ++/* ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package jdk.vm.ci.loongarch64; ++ ++import java.nio.ByteOrder; ++import java.util.EnumSet; ++ ++import jdk.vm.ci.code.Architecture; ++import jdk.vm.ci.code.Register; ++import jdk.vm.ci.code.Register.RegisterCategory; ++import jdk.vm.ci.code.RegisterArray; ++import jdk.vm.ci.meta.JavaKind; ++import jdk.vm.ci.meta.PlatformKind; ++ ++/** ++ * Represents the LoongArch64 architecture. ++ */ ++public class LoongArch64 extends Architecture { ++ ++ public static final RegisterCategory CPU = new RegisterCategory("CPU"); ++ ++ // General purpose CPU registers ++ public static final Register zero = new Register(0, 0, "r0", CPU); ++ public static final Register ra = new Register(1, 1, "r1", CPU); ++ public static final Register tp = new Register(2, 2, "r2", CPU); ++ public static final Register sp = new Register(3, 3, "r3", CPU); ++ public static final Register a0 = new Register(4, 4, "r4", CPU); ++ public static final Register a1 = new Register(5, 5, "r5", CPU); ++ public static final Register a2 = new Register(6, 6, "r6", CPU); ++ public static final Register a3 = new Register(7, 7, "r7", CPU); ++ public static final Register a4 = new Register(8, 8, "r8", CPU); ++ public static final Register a5 = new Register(9, 9, "r9", CPU); ++ public static final Register a6 = new Register(10, 10, "r10", CPU); ++ public static final Register a7 = new Register(11, 11, "r11", CPU); ++ public static final Register t0 = new Register(12, 12, "r12", CPU); ++ public static final Register t1 = new Register(13, 13, "r13", CPU); ++ public static final Register t2 = new Register(14, 14, "r14", CPU); ++ public static final Register t3 = new Register(15, 15, "r15", CPU); ++ public static final Register t4 = new Register(16, 16, "r16", CPU); ++ public static final Register t5 = new Register(17, 17, "r17", CPU); ++ public static final Register t6 = new Register(18, 18, "r18", CPU); ++ public static final Register t7 = new Register(19, 19, "r19", CPU); ++ public static final Register t8 = new Register(20, 20, "r20", CPU); ++ public static final Register rx = new Register(21, 21, "r21", CPU); ++ public static final Register fp = new Register(22, 22, "r22", CPU); ++ public static final Register s0 = new Register(23, 23, "r23", CPU); ++ public static final Register s1 = new Register(24, 24, "r24", CPU); ++ public static final Register s2 = new Register(25, 25, "r25", CPU); ++ public static final Register s3 = new Register(26, 26, "r26", CPU); ++ public static final Register s4 = new Register(27, 27, "r27", CPU); ++ public static final Register s5 = new Register(28, 28, "r28", CPU); ++ public static final Register s6 = new Register(29, 29, "r29", CPU); ++ public static final Register s7 = new Register(30, 30, "r30", CPU); ++ public static final Register s8 = new Register(31, 31, "r31", CPU); ++ ++ public static final Register SCR1 = t7; ++ public static final Register SCR2 = t4; ++ public static final Register v0 = a0; ++ ++ // @formatter:off ++ public static final RegisterArray cpuRegisters = new RegisterArray( ++ zero, ra, tp, sp, a0, a1, a2, a3, ++ a4, a5, a6, a7, t0, t1, t2, t3, ++ t4, t5, t6, t7, t8, rx, fp, s0, ++ s1, s2, s3, s4, s5, s6, s7, s8 ++ ); ++ // @formatter:on ++ ++ public static final RegisterCategory SIMD = new RegisterCategory("SIMD"); ++ ++ // Simd registers ++ public static final Register f0 = new Register(32, 0, "f0", SIMD); ++ public static final Register f1 = new Register(33, 1, "f1", SIMD); ++ public static final Register f2 = new Register(34, 2, "f2", SIMD); ++ public static final Register f3 = new Register(35, 3, "f3", SIMD); ++ public static final Register f4 = new Register(36, 4, "f4", SIMD); ++ public static final Register f5 = new Register(37, 5, "f5", SIMD); ++ public static final Register f6 = new Register(38, 6, "f6", SIMD); ++ public static final Register f7 = new Register(39, 7, "f7", SIMD); ++ public static final Register f8 = new Register(40, 8, "f8", SIMD); ++ public static final Register f9 = new Register(41, 9, "f9", SIMD); ++ public static final Register f10 = new Register(42, 10, "f10", SIMD); ++ public static final Register f11 = new Register(43, 11, "f11", SIMD); ++ public static final Register f12 = new Register(44, 12, "f12", SIMD); ++ public static final Register f13 = new Register(45, 13, "f13", SIMD); ++ public static final Register f14 = new Register(46, 14, "f14", SIMD); ++ public static final Register f15 = new Register(47, 15, "f15", SIMD); ++ public static final Register f16 = new Register(48, 16, "f16", SIMD); ++ public static final Register f17 = new Register(49, 17, "f17", SIMD); ++ public static final Register f18 = new Register(50, 18, "f18", SIMD); ++ public static final Register f19 = new Register(51, 19, "f19", SIMD); ++ public static final Register f20 = new Register(52, 20, "f20", SIMD); ++ public static final Register f21 = new Register(53, 21, "f21", SIMD); ++ public static final Register f22 = new Register(54, 22, "f22", SIMD); ++ public static final Register f23 = new Register(55, 23, "f23", SIMD); ++ public static final Register f24 = new Register(56, 24, "f24", SIMD); ++ public static final Register f25 = new Register(57, 25, "f25", SIMD); ++ public static final Register f26 = new Register(58, 26, "f26", SIMD); ++ public static final Register f27 = new Register(59, 27, "f27", SIMD); ++ public static final Register f28 = new Register(60, 28, "f28", SIMD); ++ public static final Register f29 = new Register(61, 29, "f29", SIMD); ++ public static final Register f30 = new Register(62, 30, "f30", SIMD); ++ public static final Register f31 = new Register(63, 31, "f31", SIMD); ++ ++ public static final Register fv0 = f0; ++ ++ // @formatter:off ++ public static final RegisterArray simdRegisters = new RegisterArray( ++ f0, f1, f2, f3, f4, f5, f6, f7, ++ f8, f9, f10, f11, f12, f13, f14, f15, ++ f16, f17, f18, f19, f20, f21, f22, f23, ++ f24, f25, f26, f27, f28, f29, f30, f31 ++ ); ++ // @formatter:on ++ ++ // @formatter:off ++ public static final RegisterArray allRegisters = new RegisterArray( ++ zero, ra, tp, sp, a0, a1, a2, a3, ++ a4, a5, a6, a7, t0, t1, t2, t3, ++ t4, t5, t6, t7, t8, rx, fp, s0, ++ s1, s2, s3, s4, s5, s6, s7, s8, ++ ++ f0, f1, f2, f3, f4, f5, f6, f7, ++ f8, f9, f10, f11, f12, f13, f14, f15, ++ f16, f17, f18, f19, f20, f21, f22, f23, ++ f24, f25, f26, f27, f28, f29, f30, f31 ++ ); ++ // @formatter:on ++ ++ /** ++ * Basic set of CPU features mirroring what is returned from the cpuid instruction. See: ++ * {@code VM_Version::cpuFeatureFlags}. ++ */ ++ public enum CPUFeature { ++ LA32, ++ LA64, ++ LLEXC, ++ SCDLY, ++ LLDBAR, ++ LBT_X86, ++ LBT_ARM, ++ LBT_MIPS, ++ CCDMA, ++ COMPLEX, ++ FP, ++ CRYPTO, ++ LSX, ++ LASX, ++ LAM, ++ LLSYNC, ++ TGTSYNC, ++ ULSYNC, ++ UAL ++ } ++ ++ private final EnumSet features; ++ ++ /** ++ * Set of flags to control code emission. ++ */ ++ public enum Flag { ++ useLSX, ++ useLASX ++ } ++ ++ private final EnumSet flags; ++ ++ public LoongArch64(EnumSet features, EnumSet flags) { ++ super("loongarch64", LoongArch64Kind.QWORD, ByteOrder.LITTLE_ENDIAN, true, allRegisters, 0, 0, 0); ++ this.features = features; ++ this.flags = flags; ++ } ++ ++ public EnumSet getFeatures() { ++ return features; ++ } ++ ++ public EnumSet getFlags() { ++ return flags; ++ } ++ ++ @Override ++ public PlatformKind getPlatformKind(JavaKind javaKind) { ++ switch (javaKind) { ++ case Boolean: ++ case Byte: ++ return LoongArch64Kind.BYTE; ++ case Short: ++ case Char: ++ return LoongArch64Kind.WORD; ++ case Int: ++ return LoongArch64Kind.DWORD; ++ case Long: ++ case Object: ++ return LoongArch64Kind.QWORD; ++ case Float: ++ return LoongArch64Kind.SINGLE; ++ case Double: ++ return LoongArch64Kind.DOUBLE; ++ default: ++ return null; ++ } ++ } ++ ++ @Override ++ public boolean canStoreValue(RegisterCategory category, PlatformKind platformKind) { ++ LoongArch64Kind kind = (LoongArch64Kind) platformKind; ++ if (kind.isInteger()) { ++ return category.equals(CPU); ++ } else if (kind.isSIMD()) { ++ return category.equals(SIMD); ++ } ++ return false; ++ } ++ ++ @Override ++ public LoongArch64Kind getLargestStorableKind(RegisterCategory category) { ++ if (category.equals(CPU)) { ++ return LoongArch64Kind.QWORD; ++ } else if (category.equals(SIMD)) { ++ return LoongArch64Kind.V256_QWORD; ++ } else { ++ return null; ++ } ++ } ++} +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java +new file mode 100644 +index 0000000000..84b7f2027f +--- /dev/null ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java +@@ -0,0 +1,163 @@ ++/* ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package jdk.vm.ci.loongarch64; ++ ++import jdk.vm.ci.meta.PlatformKind; ++ ++public enum LoongArch64Kind implements PlatformKind { ++ ++ // scalar ++ BYTE(1), ++ WORD(2), ++ DWORD(4), ++ QWORD(8), ++ UBYTE(1), ++ UWORD(2), ++ UDWORD(4), ++ SINGLE(4), ++ DOUBLE(8), ++ ++ // SIMD ++ V128_BYTE(16, BYTE), ++ V128_WORD(16, WORD), ++ V128_DWORD(16, DWORD), ++ V128_QWORD(16, QWORD), ++ V128_SINGLE(16, SINGLE), ++ V128_DOUBLE(16, DOUBLE), ++ V256_BYTE(32, BYTE), ++ V256_WORD(32, WORD), ++ V256_DWORD(32, DWORD), ++ V256_QWORD(32, QWORD), ++ V256_SINGLE(32, SINGLE), ++ V256_DOUBLE(32, DOUBLE); ++ ++ private final int size; ++ private final int vectorLength; ++ ++ private final LoongArch64Kind scalar; ++ private final EnumKey key = new EnumKey<>(this); ++ ++ LoongArch64Kind(int size) { ++ this.size = size; ++ this.scalar = this; ++ this.vectorLength = 1; ++ } ++ ++ LoongArch64Kind(int size, LoongArch64Kind scalar) { ++ this.size = size; ++ this.scalar = scalar; ++ ++ assert size % scalar.size == 0; ++ this.vectorLength = size / scalar.size; ++ } ++ ++ public LoongArch64Kind getScalar() { ++ return scalar; ++ } ++ ++ @Override ++ public int getSizeInBytes() { ++ return size; ++ } ++ ++ @Override ++ public int getVectorLength() { ++ return vectorLength; ++ } ++ ++ @Override ++ public Key getKey() { ++ return key; ++ } ++ ++ public boolean isInteger() { ++ switch (this) { ++ case BYTE: ++ case WORD: ++ case DWORD: ++ case QWORD: ++ case UBYTE: ++ case UWORD: ++ case UDWORD: ++ return true; ++ default: ++ return false; ++ } ++ } ++ ++ public boolean isSIMD() { ++ switch (this) { ++ case SINGLE: ++ case DOUBLE: ++ case V128_BYTE: ++ case V128_WORD: ++ case V128_DWORD: ++ case V128_QWORD: ++ case V128_SINGLE: ++ case V128_DOUBLE: ++ case V256_BYTE: ++ case V256_WORD: ++ case V256_DWORD: ++ case V256_QWORD: ++ case V256_SINGLE: ++ case V256_DOUBLE: ++ return true; ++ default: ++ return false; ++ } ++ } ++ ++ @Override ++ public char getTypeChar() { ++ switch (this) { ++ case BYTE: ++ return 'b'; ++ case WORD: ++ return 'w'; ++ case DWORD: ++ return 'd'; ++ case QWORD: ++ return 'q'; ++ case SINGLE: ++ return 'S'; ++ case DOUBLE: ++ return 'D'; ++ case V128_BYTE: ++ case V128_WORD: ++ case V128_DWORD: ++ case V128_QWORD: ++ case V128_SINGLE: ++ case V128_DOUBLE: ++ case V256_BYTE: ++ case V256_WORD: ++ case V256_DWORD: ++ case V256_QWORD: ++ case V256_SINGLE: ++ case V256_DOUBLE: ++ return 'v'; ++ default: ++ return '-'; ++ } ++ } ++} +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java +new file mode 100644 +index 0000000000..9d020833ea +--- /dev/null ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java +@@ -0,0 +1,28 @@ ++/* ++ * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++/** ++ * The LoongArch64 platform independent portions of the JVMCI API. ++ */ ++package jdk.vm.ci.loongarch64; +diff --git a/src/jdk.internal.vm.ci/share/classes/module-info.java b/src/jdk.internal.vm.ci/share/classes/module-info.java +index fed310d386..661f106d30 100644 +--- a/src/jdk.internal.vm.ci/share/classes/module-info.java ++++ b/src/jdk.internal.vm.ci/share/classes/module-info.java +@@ -23,6 +23,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + module jdk.internal.vm.ci { + exports jdk.vm.ci.services to jdk.internal.vm.compiler; + exports jdk.vm.ci.runtime to +@@ -37,6 +43,7 @@ module jdk.internal.vm.ci { + + provides jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory with + jdk.vm.ci.hotspot.aarch64.AArch64HotSpotJVMCIBackendFactory, ++ jdk.vm.ci.hotspot.loongarch64.LoongArch64HotSpotJVMCIBackendFactory, + jdk.vm.ci.hotspot.amd64.AMD64HotSpotJVMCIBackendFactory, + jdk.vm.ci.hotspot.sparc.SPARCHotSpotJVMCIBackendFactory; + } +diff --git a/src/utils/hsdis/Makefile b/src/utils/hsdis/Makefile +index 2514a895da..08fbe3b953 100644 +--- a/src/utils/hsdis/Makefile ++++ b/src/utils/hsdis/Makefile +@@ -94,6 +94,9 @@ CC = gcc + endif + CFLAGS += -O + DLDFLAGS += -shared ++ifeq ($(ARCH), mips64) ++DLDFLAGS += -Wl,-z,noexecstack ++endif + LDFLAGS += -ldl + OUTFLAGS += -o $@ + else +diff --git a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java +index ac17e567b0..9b004a2033 100644 +--- a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java ++++ b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java +@@ -21,12 +21,18 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + /* + * @test + * @library /test/lib / + * @modules java.base/jdk.internal.misc + * java.management +- * @requires vm.cpu.features ~= ".*aes.*" & !vm.graal.enabled ++ * @requires (vm.cpu.features ~= ".*aes.*" | os.arch == "loongarch64") & !vm.graal.enabled + * @build sun.hotspot.WhiteBox + * @run driver ClassFileInstaller sun.hotspot.WhiteBox + * sun.hotspot.WhiteBox$WhiteBoxPermission +diff --git a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java +index 60b2d03321..981a239979 100644 +--- a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java ++++ b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + /* + * @test + * @library /test/lib / +@@ -28,7 +34,7 @@ + * java.management + * + * @build sun.hotspot.WhiteBox +- * @requires !(vm.cpu.features ~= ".*aes.*") ++ * @requires !(vm.cpu.features ~= ".*aes.*" | os.arch == "loongarch64") + * @requires vm.compiler1.enabled | !vm.graal.enabled + * @run driver ClassFileInstaller sun.hotspot.WhiteBox + * sun.hotspot.WhiteBox$WhiteBoxPermission +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java +index faa9fdbae6..a635f03d24 100644 +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package compiler.intrinsics.sha.cli.testcases; + + import compiler.intrinsics.sha.cli.SHAOptionsBase; +@@ -32,19 +38,20 @@ import jdk.test.lib.cli.predicate.OrPredicate; + + /** + * Generic test case for SHA-related options targeted to any CPU except +- * AArch64, PPC, S390x, SPARC and X86. ++ * AArch64, PPC, S390x, SPARC, LoongArch64 and X86. + */ + public class GenericTestCaseForOtherCPU extends + SHAOptionsBase.TestCase { + public GenericTestCaseForOtherCPU(String optionName) { +- // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC and X86. ++ // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC, LoongArch64 and X86. + super(optionName, new NotPredicate( + new OrPredicate(Platform::isAArch64, + new OrPredicate(Platform::isS390x, + new OrPredicate(Platform::isSparc, + new OrPredicate(Platform::isPPC, ++ new OrPredicate(Platform::isLoongArch64, + new OrPredicate(Platform::isX64, +- Platform::isX86))))))); ++ Platform::isX86)))))))); + } + + @Override +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java +index 62d0e99155..c3fa3fb93e 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java +@@ -29,6 +29,7 @@ import jdk.vm.ci.code.InstalledCode; + import jdk.vm.ci.code.TargetDescription; + import jdk.vm.ci.code.test.amd64.AMD64TestAssembler; + import jdk.vm.ci.code.test.sparc.SPARCTestAssembler; ++import jdk.vm.ci.code.test.loongarch64.LoongArch64TestAssembler; + import jdk.vm.ci.hotspot.HotSpotCompiledCode; + import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime; + import jdk.vm.ci.hotspot.HotSpotResolvedJavaMethod; +@@ -37,6 +38,7 @@ import jdk.vm.ci.meta.MetaAccessProvider; + import jdk.vm.ci.runtime.JVMCI; + import jdk.vm.ci.runtime.JVMCIBackend; + import jdk.vm.ci.sparc.SPARC; ++import jdk.vm.ci.loongarch64.LoongArch64; + import org.junit.Assert; + + import java.lang.reflect.Method; +@@ -72,6 +74,8 @@ public class CodeInstallationTest { + return new AMD64TestAssembler(codeCache, config); + } else if (arch instanceof SPARC) { + return new SPARCTestAssembler(codeCache, config); ++ } else if (arch instanceof LoongArch64) { ++ return new LoongArch64TestAssembler(codeCache, config); + } else { + Assert.fail("unsupported architecture"); + return null; +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java +index 8afc7d7b98..520d7707a2 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -32,7 +32,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.DataPatchTest + */ + +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java +index 75d0748da5..a6826e2ffe 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.code + * jdk.internal.vm.ci/jdk.vm.ci.code.site +@@ -32,7 +32,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.common + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.InterpreterFrameSizeTest + */ + +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java +index a67fa2c1df..59cce6454d 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -33,7 +33,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.MaxOopMapStackOffsetTest + */ + +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java +index d9e1f24c30..259218b305 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @library /test/lib / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.code +@@ -33,7 +33,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.common + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java TestHotSpotVMConfig.java NativeCallTest.java TestAssembler.java sparc/SPARCTestAssembler.java amd64/AMD64TestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java TestHotSpotVMConfig.java NativeCallTest.java TestAssembler.java sparc/SPARCTestAssembler.java amd64/AMD64TestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm/native -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Xbootclasspath/a:. jdk.vm.ci.code.test.NativeCallTest + */ + package jdk.vm.ci.code.test; +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java +index 9b92114055..00d0f53cdb 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -32,7 +32,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.SimpleCodeInstallationTest + */ + +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java +index 5b2204868c..ecfcb1cf01 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -32,7 +32,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.SimpleDebugInfoTest + */ + +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java +index a10e90acda..5b1a58c74b 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -32,7 +32,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.VirtualObjectDebugInfoTest + */ + +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java +new file mode 100644 +index 0000000000..4c76868453 +--- /dev/null ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java +@@ -0,0 +1,568 @@ ++/* ++ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++package jdk.vm.ci.code.test.loongarch64; ++ ++import jdk.vm.ci.loongarch64.LoongArch64; ++import jdk.vm.ci.loongarch64.LoongArch64Kind; ++import jdk.vm.ci.code.CallingConvention; ++import jdk.vm.ci.code.CodeCacheProvider; ++import jdk.vm.ci.code.DebugInfo; ++import jdk.vm.ci.code.Register; ++import jdk.vm.ci.code.RegisterArray; ++import jdk.vm.ci.code.RegisterValue; ++import jdk.vm.ci.code.StackSlot; ++import jdk.vm.ci.code.site.ConstantReference; ++import jdk.vm.ci.code.site.DataSectionReference; ++import jdk.vm.ci.code.test.TestAssembler; ++import jdk.vm.ci.code.test.TestHotSpotVMConfig; ++import jdk.vm.ci.hotspot.HotSpotCallingConventionType; ++import jdk.vm.ci.hotspot.HotSpotConstant; ++import jdk.vm.ci.hotspot.HotSpotForeignCallTarget; ++import jdk.vm.ci.meta.AllocatableValue; ++import jdk.vm.ci.meta.JavaKind; ++import jdk.vm.ci.meta.VMConstant; ++ ++public class LoongArch64TestAssembler extends TestAssembler { ++ ++ private static final Register scratchRegister = LoongArch64.SCR1; ++ private static final Register doubleScratch = LoongArch64.f23; ++ private static final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(LoongArch64.a0, ++ LoongArch64.a1, LoongArch64.a2, ++ LoongArch64.a3, LoongArch64.a4, ++ LoongArch64.a5, LoongArch64.a6, ++ LoongArch64.a7); ++ private static final RegisterArray floatParameterRegisters = new RegisterArray(LoongArch64.f0, ++ LoongArch64.f1, LoongArch64.f2, ++ LoongArch64.f3, LoongArch64.f4, ++ LoongArch64.f5, LoongArch64.f6, ++ LoongArch64.f7); ++ private static int currentGeneral = 0; ++ private static int currentFloat = 0; ++ public LoongArch64TestAssembler(CodeCacheProvider codeCache, TestHotSpotVMConfig config) { ++ super(codeCache, config, ++ 16 /* initialFrameSize */, 16 /* stackAlignment */, ++ LoongArch64Kind.UDWORD /* narrowOopKind */, ++ /* registers */ ++ LoongArch64.a0, LoongArch64.a1, LoongArch64.a2, LoongArch64.a3, ++ LoongArch64.a4, LoongArch64.a5, LoongArch64.a6, LoongArch64.a7); ++ } ++ ++ private static int low(int x, int l) { ++ assert l < 32; ++ return (x >> 0) & ((1 << l)-1); ++ } ++ ++ private static int low16(int x) { ++ return low(x, 16); ++ } ++ ++ private void emitNop() { ++ code.emitInt(0x3400000); ++ } ++ ++ private void emitPcaddu12i(Register rj, int si20) { ++ // pcaddu12i ++ code.emitInt((0b0001110 << 25) ++ | (low(si20, 20) << 5) ++ | rj.encoding); ++ } ++ ++ private void emitAdd(Register rd, Register rj, Register rk) { ++ // add_d ++ code.emitInt((0b00000000000100001 << 15) ++ | (rk.encoding << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitAdd(Register rd, Register rj, int si12) { ++ // addi_d ++ code.emitInt((0b0000001011 << 22) ++ | (low(si12, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitSub(Register rd, Register rj, Register rk) { ++ // sub_d ++ code.emitInt((0b00000000000100011 << 15) ++ | (rk.encoding << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitShiftLeft(Register rd, Register rj, int shift) { ++ // slli_d ++ code.emitInt((0b00000000010000 << 18) ++ | (low(( (0b01 << 6) | shift ), 8) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitLu12i_w(Register rj, int imm20) { ++ // lu12i_w ++ code.emitInt((0b0001010 << 25) ++ | (low(imm20, 20)<<5) ++ | rj.encoding); ++ } ++ ++ private void emitOri(Register rd, Register rj, int ui12) { ++ // ori ++ code.emitInt((0b0000001110 << 22) ++ | (low(ui12, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitLu32i_d(Register rj, int imm20) { ++ // lu32i_d ++ code.emitInt((0b0001011 << 25) ++ | (low(imm20, 20)<<5) ++ | rj.encoding); ++ } ++ ++ private void emitLu52i_d(Register rd, Register rj, int imm12) { ++ // lu52i_d ++ code.emitInt((0b0000001100 << 22) ++ | (low(imm12, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitLoadImmediate(Register rd, int imm32) { ++ emitLu12i_w(rd, (imm32 >> 12) & 0xfffff); ++ emitOri(rd, rd, imm32 & 0xfff); ++ } ++ ++ private void emitLi52(Register rj, long imm) { ++ emitLu12i_w(rj, (int) ((imm >> 12) & 0xfffff)); ++ emitOri(rj, rj, (int) (imm & 0xfff)); ++ emitLu32i_d(rj, (int) ((imm >> 32) & 0xfffff)); ++ } ++ ++ private void emitLi64(Register rj, long imm) { ++ emitLu12i_w(rj, (int) ((imm >> 12) & 0xfffff)); ++ emitOri(rj, rj, (int) (imm & 0xfff)); ++ emitLu32i_d(rj, (int) ((imm >> 32) & 0xfffff)); ++ emitLu52i_d(rj, rj, (int) ((imm >> 52) & 0xfff)); ++ } ++ ++ private void emitOr(Register rd, Register rj, Register rk) { ++ // orr ++ code.emitInt((0b00000000000101010 << 15) ++ | (rk.encoding << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitMove(Register rd, Register rs) { ++ // move ++ emitOr(rd, rs, LoongArch64.zero); ++ } ++ ++ private void emitMovfr2gr(Register rd, LoongArch64Kind kind, Register rj) { ++ // movfr2gr_s/movfr2gr_d ++ int opc = 0; ++ switch (kind) { ++ case SINGLE: opc = 0b0000000100010100101101; break; ++ case DOUBLE: opc = 0b0000000100010100101110; break; ++ default: throw new IllegalArgumentException(); ++ } ++ code.emitInt((opc << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitLoadRegister(Register rd, LoongArch64Kind kind, Register rj, int offset) { ++ // load ++ assert offset >= 0; ++ int opc = 0; ++ switch (kind) { ++ case BYTE: opc = 0b0010100000; break; ++ case WORD: opc = 0b0010100001; break; ++ case DWORD: opc = 0b0010100010; break; ++ case QWORD: opc = 0b0010100011; break; ++ case UDWORD: opc = 0b0010101010; break; ++ case SINGLE: opc = 0b0010101100; break; ++ case DOUBLE: opc = 0b0010101110; break; ++ default: throw new IllegalArgumentException(); ++ } ++ code.emitInt((opc << 22) ++ | (low(offset, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitStoreRegister(Register rd, LoongArch64Kind kind, Register rj, int offset) { ++ // store ++ assert offset >= 0; ++ int opc = 0; ++ switch (kind) { ++ case BYTE: opc = 0b0010100100; break; ++ case WORD: opc = 0b0010100101; break; ++ case DWORD: opc = 0b0010100110; break; ++ case QWORD: opc = 0b0010100111; break; ++ case SINGLE: opc = 0b0010101101; break; ++ case DOUBLE: opc = 0b0010101111; break; ++ default: throw new IllegalArgumentException(); ++ } ++ code.emitInt((opc << 22) ++ | (low(offset, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitJirl(Register rd, Register rj, int offs) { ++ // jirl ++ code.emitInt((0b010011 << 26) ++ | (low16(offs >> 2) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ @Override ++ public void emitGrowStack(int size) { ++ assert size % 16 == 0; ++ if (size > -4096 && size < 0) { ++ emitAdd(LoongArch64.sp, LoongArch64.sp, -size); ++ } else if (size == 0) { ++ // No-op ++ } else if (size < 4096) { ++ emitAdd(LoongArch64.sp, LoongArch64.sp, -size); ++ } else if (size < 65535) { ++ emitLoadImmediate(scratchRegister, size); ++ emitSub(LoongArch64.sp, LoongArch64.sp, scratchRegister); ++ } else { ++ throw new IllegalArgumentException(); ++ } ++ } ++ ++ @Override ++ public void emitPrologue() { ++ // Must be patchable by NativeJump::patch_verified_entry ++ emitNop(); ++ emitGrowStack(32); ++ emitStoreRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 24); ++ emitStoreRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 16); ++ emitGrowStack(-16); ++ emitMove(LoongArch64.fp, LoongArch64.sp); ++ setDeoptRescueSlot(newStackSlot(LoongArch64Kind.QWORD)); ++ } ++ ++ @Override ++ public void emitEpilogue() { ++ recordMark(config.MARKID_DEOPT_HANDLER_ENTRY); ++ recordCall(new HotSpotForeignCallTarget(config.handleDeoptStub), 4*4, true, null); ++ emitCall(0xdeaddeaddeadL); ++ } ++ ++ @Override ++ public void emitCallPrologue(CallingConvention cc, Object... prim) { ++ emitGrowStack(cc.getStackSize()); ++ frameSize += cc.getStackSize(); ++ AllocatableValue[] args = cc.getArguments(); ++ for (int i = 0; i < args.length; i++) { ++ emitLoad(args[i], prim[i]); ++ } ++ currentGeneral = 0; ++ currentFloat = 0; ++ } ++ ++ @Override ++ public void emitCallEpilogue(CallingConvention cc) { ++ emitGrowStack(-cc.getStackSize()); ++ frameSize -= cc.getStackSize(); ++ } ++ ++ @Override ++ public void emitCall(long addr) { ++ // long call (absolute) ++ // lu12i_w(T4, split_low20(value >> 12)); ++ // lu32i_d(T4, split_low20(value >> 32)); ++ // jirl(RA, T4, split_low12(value)); ++ emitLu12i_w(LoongArch64.t4, (int) ((addr >> 12) & 0xfffff)); ++ emitLu32i_d(LoongArch64.t4, (int) ((addr >> 32) & 0xfffff)); ++ emitJirl(LoongArch64.ra, LoongArch64.t4, (int) (addr & 0xfff)); ++ } ++ ++ @Override ++ public void emitLoad(AllocatableValue av, Object prim) { ++ if (av instanceof RegisterValue) { ++ Register reg = ((RegisterValue) av).getRegister(); ++ if (prim instanceof Float) { ++ if (currentFloat < floatParameterRegisters.size()) { ++ currentFloat++; ++ emitLoadFloat(reg, (Float) prim); ++ } else if (currentGeneral < nativeGeneralParameterRegisters.size()) { ++ currentGeneral++; ++ emitLoadFloat(doubleScratch, (Float) prim); ++ emitMovfr2gr(reg, LoongArch64Kind.SINGLE, doubleScratch); ++ } ++ } else if (prim instanceof Double) { ++ if (currentFloat < floatParameterRegisters.size()) { ++ currentFloat++; ++ emitLoadDouble(reg, (Double) prim); ++ } else if (currentGeneral < nativeGeneralParameterRegisters.size()) { ++ currentGeneral++; ++ emitLoadDouble(doubleScratch, (Double) prim); ++ emitMovfr2gr(reg, LoongArch64Kind.DOUBLE, doubleScratch); ++ } ++ } else if (prim instanceof Integer) { ++ emitLoadInt(reg, (Integer) prim); ++ } else if (prim instanceof Long) { ++ emitLoadLong(reg, (Long) prim); ++ } ++ } else if (av instanceof StackSlot) { ++ StackSlot slot = (StackSlot) av; ++ if (prim instanceof Float) { ++ emitFloatToStack(slot, emitLoadFloat(doubleScratch, (Float) prim)); ++ } else if (prim instanceof Double) { ++ emitDoubleToStack(slot, emitLoadDouble(doubleScratch, (Double) prim)); ++ } else if (prim instanceof Integer) { ++ emitIntToStack(slot, emitLoadInt(scratchRegister, (Integer) prim)); ++ } else if (prim instanceof Long) { ++ emitLongToStack(slot, emitLoadLong(scratchRegister, (Long) prim)); ++ } else { ++ assert false : "Unimplemented"; ++ } ++ } else { ++ throw new IllegalArgumentException("Unknown value " + av); ++ } ++ } ++ ++ @Override ++ public Register emitLoadPointer(HotSpotConstant c) { ++ recordDataPatchInCode(new ConstantReference((VMConstant) c)); ++ ++ Register ret = newRegister(); ++ // need to match patchable_li52 instruction sequence ++ // lu12i_ori_lu32i ++ emitLi52(ret, 0xdeaddead); ++ return ret; ++ } ++ ++ @Override ++ public Register emitLoadPointer(Register b, int offset) { ++ Register ret = newRegister(); ++ emitLoadRegister(ret, LoongArch64Kind.QWORD, b, offset); ++ return ret; ++ } ++ ++ @Override ++ public Register emitLoadNarrowPointer(DataSectionReference ref) { ++ recordDataPatchInCode(ref); ++ ++ Register ret = newRegister(); ++ emitPcaddu12i(ret, 0xdead >> 12); ++ emitAdd(ret, ret, 0xdead & 0xfff); ++ emitLoadRegister(ret, LoongArch64Kind.UDWORD, ret, 0); ++ return ret; ++ } ++ ++ @Override ++ public Register emitLoadPointer(DataSectionReference ref) { ++ recordDataPatchInCode(ref); ++ ++ Register ret = newRegister(); ++ emitPcaddu12i(ret, 0xdead >> 12); ++ emitAdd(ret, ret, 0xdead & 0xfff); ++ emitLoadRegister(ret, LoongArch64Kind.QWORD, ret, 0); ++ return ret; ++ } ++ ++ private Register emitLoadDouble(Register reg, double c) { ++ DataSectionReference ref = new DataSectionReference(); ++ ref.setOffset(data.position()); ++ data.emitDouble(c); ++ ++ recordDataPatchInCode(ref); ++ emitPcaddu12i(scratchRegister, 0xdead >> 12); ++ emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff); ++ emitLoadRegister(reg, LoongArch64Kind.DOUBLE, scratchRegister, 0); ++ return reg; ++ } ++ ++ private Register emitLoadFloat(Register reg, float c) { ++ DataSectionReference ref = new DataSectionReference(); ++ ref.setOffset(data.position()); ++ data.emitFloat(c); ++ ++ recordDataPatchInCode(ref); ++ emitPcaddu12i(scratchRegister, 0xdead >> 12); ++ emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff); ++ emitLoadRegister(reg, LoongArch64Kind.SINGLE, scratchRegister, 0); ++ return reg; ++ } ++ ++ @Override ++ public Register emitLoadFloat(float c) { ++ Register ret = LoongArch64.fv0; ++ return emitLoadFloat(ret, c); ++ } ++ ++ private Register emitLoadLong(Register reg, long c) { ++ emitLi64(reg, c); ++ return reg; ++ } ++ ++ @Override ++ public Register emitLoadLong(long c) { ++ Register ret = newRegister(); ++ return emitLoadLong(ret, c); ++ } ++ ++ private Register emitLoadInt(Register reg, int c) { ++ emitLoadImmediate(reg, c); ++ return reg; ++ } ++ ++ @Override ++ public Register emitLoadInt(int c) { ++ Register ret = newRegister(); ++ return emitLoadInt(ret, c); ++ } ++ ++ @Override ++ public Register emitIntArg0() { ++ return codeCache.getRegisterConfig() ++ .getCallingConventionRegisters(HotSpotCallingConventionType.JavaCall, JavaKind.Int) ++ .get(0); ++ } ++ ++ @Override ++ public Register emitIntArg1() { ++ return codeCache.getRegisterConfig() ++ .getCallingConventionRegisters(HotSpotCallingConventionType.JavaCall, JavaKind.Int) ++ .get(1); ++ } ++ ++ @Override ++ public Register emitIntAdd(Register a, Register b) { ++ emitAdd(a, a, b); ++ return a; ++ } ++ ++ @Override ++ public void emitTrap(DebugInfo info) { ++ // Dereference null pointer ++ emitMove(scratchRegister, LoongArch64.zero); ++ recordImplicitException(info); ++ emitLoadRegister(LoongArch64.zero, LoongArch64Kind.QWORD, scratchRegister, 0); ++ } ++ ++ @Override ++ public void emitIntRet(Register a) { ++ emitMove(LoongArch64.v0, a); ++ emitMove(LoongArch64.sp, LoongArch64.fp); ++ emitLoadRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 8); ++ emitLoadRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 0); ++ emitGrowStack(-16); ++ emitJirl(LoongArch64.zero, LoongArch64.ra, 0); ++ } ++ ++ @Override ++ public void emitFloatRet(Register a) { ++ assert a == LoongArch64.fv0 : "Unimplemented move " + a; ++ emitMove(LoongArch64.sp, LoongArch64.fp); ++ emitLoadRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 8); ++ emitLoadRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 0); ++ emitGrowStack(-16); ++ emitJirl(LoongArch64.zero, LoongArch64.ra, 0); ++ } ++ ++ @Override ++ public void emitPointerRet(Register a) { ++ emitIntRet(a); ++ } ++ ++ @Override ++ public StackSlot emitPointerToStack(Register a) { ++ return emitLongToStack(a); ++ } ++ ++ @Override ++ public StackSlot emitNarrowPointerToStack(Register a) { ++ return emitIntToStack(a); ++ } ++ ++ @Override ++ public Register emitUncompressPointer(Register compressed, long base, int shift) { ++ if (shift > 0) { ++ emitShiftLeft(compressed, compressed, shift); ++ } ++ ++ if (base != 0) { ++ emitLoadLong(scratchRegister, base); ++ emitAdd(compressed, compressed, scratchRegister); ++ } ++ ++ return compressed; ++ } ++ ++ private StackSlot emitDoubleToStack(StackSlot slot, Register a) { ++ emitStoreRegister(a, LoongArch64Kind.DOUBLE, LoongArch64.sp, slot.getOffset(frameSize)); ++ return slot; ++ } ++ ++ @Override ++ public StackSlot emitDoubleToStack(Register a) { ++ StackSlot ret = newStackSlot(LoongArch64Kind.DOUBLE); ++ return emitDoubleToStack(ret, a); ++ } ++ ++ private StackSlot emitFloatToStack(StackSlot slot, Register a) { ++ emitStoreRegister(a, LoongArch64Kind.SINGLE, LoongArch64.sp, slot.getOffset(frameSize)); ++ return slot; ++ } ++ ++ @Override ++ public StackSlot emitFloatToStack(Register a) { ++ StackSlot ret = newStackSlot(LoongArch64Kind.SINGLE); ++ return emitFloatToStack(ret, a); ++ } ++ ++ private StackSlot emitIntToStack(StackSlot slot, Register a) { ++ emitStoreRegister(a, LoongArch64Kind.DWORD, LoongArch64.sp, slot.getOffset(frameSize)); ++ return slot; ++ } ++ ++ @Override ++ public StackSlot emitIntToStack(Register a) { ++ StackSlot ret = newStackSlot(LoongArch64Kind.DWORD); ++ return emitIntToStack(ret, a); ++ } ++ ++ private StackSlot emitLongToStack(StackSlot slot, Register a) { ++ emitStoreRegister(a, LoongArch64Kind.QWORD, LoongArch64.sp, slot.getOffset(frameSize)); ++ return slot; ++ } ++ ++ @Override ++ public StackSlot emitLongToStack(Register a) { ++ StackSlot ret = newStackSlot(LoongArch64Kind.QWORD); ++ return emitLongToStack(ret, a); ++ } ++ ++} +diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java +index acb86812d2..664ea11d0d 100644 +--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java ++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java +@@ -21,10 +21,17 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + + /* @test + * @bug 8167409 + * @requires (os.arch != "aarch64") & (os.arch != "arm") ++ * @requires (os.arch != "mips64el") & (os.arch != "loongarch64") + * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs + */ + package compiler.runtime.criticalnatives.argumentcorruption; +diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java +index eab36f9311..ee5ab2f6dd 100644 +--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java ++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java +@@ -21,10 +21,17 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + + /* @test + * @bug 8167408 + * @requires (os.arch != "aarch64") & (os.arch != "arm") ++ * @requires (os.arch != "mips64el") & (os.arch != "loongarch64") + * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp + */ + package compiler.runtime.criticalnatives.lookup; +diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +index 7774dabcb5..c1cb6e00f3 100644 +--- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java ++++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package compiler.testlibrary.sha.predicate; + + import jdk.test.lib.Platform; +@@ -63,10 +69,12 @@ public class IntrinsicPredicates { + = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null), + new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha1" }, null), + new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha1" }, null), ++ // Basic instructions are used to implement SHA1 Intrinsics on LA, so "sha1" feature is not needed. ++ new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, null), + // x86 variants + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), +- new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null)))))); ++ new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null))))))); + + public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE + = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256" }, null), +@@ -74,12 +82,14 @@ public class IntrinsicPredicates { + new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha256" }, null), + new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("ppc64le.*", new String[] { "sha" }, null), ++ // Basic instructions are used to implement SHA256 Intrinsics on LA, so "sha256" feature is not needed. ++ new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, null), + // x86 variants + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null), +- new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null)))))))))); ++ new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))))); + + public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE + = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512" }, null), +diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java +index 127bb6abcd..c9277604ae 100644 +--- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java ++++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + /* + * @test ReservedStackTest + * +@@ -239,7 +245,7 @@ public class ReservedStackTest { + return Platform.isAix() || + (Platform.isLinux() && + (Platform.isPPC() || Platform.isS390x() || Platform.isX64() || +- Platform.isX86())) || ++ Platform.isX86() || Platform.isMIPS() || Platform.isLoongArch64())) || + Platform.isOSX() || + Platform.isSolaris(); + } +diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +index 77458554b7..05aee6b84c 100644 +--- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java ++++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +@@ -45,7 +45,7 @@ import java.util.Set; + */ + public class TestMutuallyExclusivePlatformPredicates { + private static enum MethodGroup { +- ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), ++ ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86", "isMIPS", "isLoongArch64"), + BITNESS("is32bit", "is64bit"), + OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"), + VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"), +diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java +index 7990c49a1f..025048c6b0 100644 +--- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java ++++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java +@@ -23,6 +23,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package jdk.jfr.event.os; + + import java.util.List; +@@ -54,8 +60,8 @@ public class TestCPUInformation { + Events.assertField(event, "hwThreads").atLeast(1); + Events.assertField(event, "cores").atLeast(1); + Events.assertField(event, "sockets").atLeast(1); +- Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390"); +- Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390"); ++ Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390", "MIPS", "LoongArch"); ++ Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390", "MIPS", "LoongArch"); + } + } + } +diff --git a/test/jdk/sun/security/pkcs11/PKCS11Test.java b/test/jdk/sun/security/pkcs11/PKCS11Test.java +index 0c74c5bdad..421f37e390 100644 +--- a/test/jdk/sun/security/pkcs11/PKCS11Test.java ++++ b/test/jdk/sun/security/pkcs11/PKCS11Test.java +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + // common infrastructure for SunPKCS11 tests + + import java.io.BufferedReader; +@@ -747,6 +753,9 @@ public abstract class PKCS11Test { + "/usr/lib64/" }); + osMap.put("Linux-ppc64-64", new String[] { "/usr/lib64/" }); + osMap.put("Linux-ppc64le-64", new String[] { "/usr/lib64/" }); ++ osMap.put("Linux-mips64el-64", new String[]{"/usr/lib64/"}); ++ osMap.put("Linux-loongarch64-64", new String[]{"/usr/lib/loongarch64-linux-gnu/", ++ "/usr/lib64/" }); + osMap.put("Linux-s390x-64", new String[] { "/usr/lib64/" }); + osMap.put("Windows-x86-32", new String[] {}); + osMap.put("Windows-amd64-64", new String[] {}); +diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java +index f4ee0546c7..a600d15b61 100644 +--- a/test/lib/jdk/test/lib/Platform.java ++++ b/test/lib/jdk/test/lib/Platform.java +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package jdk.test.lib; + + import java.io.FileNotFoundException; +@@ -226,6 +232,14 @@ public class Platform { + return isArch("(i386)|(x86(?!_64))"); + } + ++ public static boolean isLoongArch64() { ++ return isArch("loongarch64"); ++ } ++ ++ public static boolean isMIPS() { ++ return isArch("mips.*"); ++ } ++ + public static String getOsArch() { + return osArch; + } +diff --git a/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java b/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java +new file mode 100644 +index 0000000000..81fd956a4e +--- /dev/null ++++ b/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java +@@ -0,0 +1,87 @@ ++// ++// Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++package org.openjdk.bench.java.lang; ++ ++import java.util.Random; ++import java.util.concurrent.TimeUnit; ++import org.openjdk.jmh.annotations.*; ++import org.openjdk.jmh.infra.Blackhole; ++ ++@OutputTimeUnit(TimeUnit.MILLISECONDS) ++@State(Scope.Thread) ++@BenchmarkMode(Mode.Throughput) ++public class RotateBenchmark { ++ ++ @Param({"1024"}) ++ public int TESTSIZE; ++ ++ @Param({"20"}) ++ public int SHIFT; ++ ++ public long [] larr; ++ public int [] iarr; ++ ++ public long [] lres; ++ public int [] ires; ++ ++ ++ @Setup(Level.Trial) ++ public void BmSetup() { ++ Random r = new Random(1024); ++ larr = new long[TESTSIZE]; ++ iarr = new int[TESTSIZE]; ++ lres = new long[TESTSIZE]; ++ ires = new int[TESTSIZE]; ++ ++ for (int i = 0; i < TESTSIZE; i++) { ++ larr[i] = r.nextLong(); ++ } ++ ++ for (int i = 0; i < TESTSIZE; i++) { ++ iarr[i] = r.nextInt(); ++ } ++ } ++ ++ @Benchmark ++ public void testRotateLeftI() { ++ for (int i = 0; i < TESTSIZE; i++) ++ ires[i] = Integer.rotateLeft(iarr[i], SHIFT); ++ } ++ @Benchmark ++ public void testRotateRightI() { ++ for (int i = 0; i < TESTSIZE; i++) ++ ires[i] = Integer.rotateRight(iarr[i], SHIFT); ++ } ++ @Benchmark ++ public void testRotateLeftL() { ++ for (int i = 0; i < TESTSIZE; i++) ++ lres[i] = Long.rotateLeft(larr[i], SHIFT); ++ } ++ @Benchmark ++ public void testRotateRightL() { ++ for (int i = 0; i < TESTSIZE; i++) ++ lres[i] = Long.rotateRight(larr[i], SHIFT); ++ } ++ ++} +diff --git a/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java b/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java +new file mode 100644 +index 0000000000..58400cadf6 +--- /dev/null ++++ b/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java +@@ -0,0 +1,125 @@ ++/* ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package org.openjdk.bench.vm.compiler; ++ ++import org.openjdk.jmh.annotations.*; ++import org.openjdk.jmh.infra.*; ++ ++import java.util.concurrent.TimeUnit; ++import java.util.Random; ++ ++@BenchmarkMode(Mode.Throughput) ++@OutputTimeUnit(TimeUnit.SECONDS) ++@State(Scope.Thread) ++public class MacroLogicOpt { ++ @Param({"64","128","256","512","1024","2048","4096"}) private int VECLEN; ++ ++ private int [] ai = new int[VECLEN]; ++ private int [] bi = new int[VECLEN]; ++ private int [] ci = new int[VECLEN]; ++ private int [] ri = new int[VECLEN]; ++ ++ private long [] al = new long[VECLEN]; ++ private long [] bl = new long[VECLEN]; ++ private long [] cl = new long[VECLEN]; ++ private long [] dl = new long[VECLEN]; ++ private long [] el = new long[VECLEN]; ++ private long [] fl = new long[VECLEN]; ++ private long [] rl = new long[VECLEN]; ++ ++ private Random r = new Random(); ++ ++ @Setup ++ public void init() { ++ ai = new int[VECLEN]; ++ bi = new int[VECLEN]; ++ ci = new int[VECLEN]; ++ ri = new int[VECLEN]; ++ ++ al = new long[VECLEN]; ++ bl = new long[VECLEN]; ++ cl = new long[VECLEN]; ++ dl = new long[VECLEN]; ++ el = new long[VECLEN]; ++ fl = new long[VECLEN]; ++ rl = new long[VECLEN]; ++ for (int i=0; imax_locals() + method->max_stack() + stub_code; ++ const int method_stack = (method->max_locals() + method->max_stack()) * ++ Interpreter::stackElementWords; ++ return overhead_size + method_stack + stub_code; ++} ++ ++void AbstractInterpreter::layout_activation(Method* method, ++ int tempcount, ++ int popframe_extra_args, ++ int moncount, ++ int caller_actual_parameters, ++ int callee_param_count, ++ int callee_locals, ++ frame* caller, ++ frame* interpreter_frame, ++ bool is_top_frame, ++ bool is_bottom_frame) { ++ // Note: This calculation must exactly parallel the frame setup ++ // in AbstractInterpreterGenerator::generate_method_entry. ++ // If interpreter_frame!=NULL, set up the method, locals, and monitors. ++ // The frame interpreter_frame, if not NULL, is guaranteed to be the ++ // right size, as determined by a previous call to this method. ++ // It is also guaranteed to be walkable even though it is in a skeletal state ++ ++ // fixed size of an interpreter frame: ++ ++ int max_locals = method->max_locals() * Interpreter::stackElementWords; ++ int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords; ++ ++#ifdef ASSERT ++ assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); ++#endif ++ ++ interpreter_frame->interpreter_frame_set_method(method); ++ // NOTE the difference in using sender_sp and interpreter_frame_sender_sp ++ // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) ++ // and sender_sp is fp+8 ++ intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; ++ ++#ifdef ASSERT ++ if (caller->is_interpreted_frame()) { ++ assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); ++ } ++#endif ++ ++ interpreter_frame->interpreter_frame_set_locals(locals); ++ BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); ++ BasicObjectLock* monbot = montop - moncount; ++ interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount); ++ ++ //set last sp; ++ intptr_t* esp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords - ++ popframe_extra_args; ++ interpreter_frame->interpreter_frame_set_last_sp(esp); ++ // All frames but the initial interpreter frame we fill in have a ++ // value for sender_sp that allows walking the stack but isn't ++ // truly correct. Correct the value here. ++ // ++ if (extra_locals != 0 && ++ interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) { ++ interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals); ++ } ++ *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache(); ++ *interpreter_frame->interpreter_frame_mirror_addr() = method->method_holder()->java_mirror(); ++} ++ +diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.cpp b/src/hotspot/cpu/loongarch/assembler_loongarch.cpp +new file mode 100644 +index 00000000000..e6e62cccad0 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/assembler_loongarch.cpp +@@ -0,0 +1,849 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "runtime/os.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/macros.hpp" ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) block_comment(str) ++#define STOP(error) block_comment(error); stop(error) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++// Implementation of AddressLiteral ++ ++AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { ++ _is_lval = false; ++ _target = target; ++ _rspec = rspec_from_rtype(rtype, target); ++} ++ ++// Implementation of Address ++ ++Address Address::make_array(ArrayAddress adr) { ++ AddressLiteral base = adr.base(); ++ Address index = adr.index(); ++ assert(index._disp == 0, "must not have disp"); // maybe it can? ++ Address array(index._base, index._index, index._scale, (intptr_t) base.target()); ++ array._rspec = base._rspec; ++ return array; ++} ++ ++// exceedingly dangerous constructor ++Address::Address(address loc, RelocationHolder spec) { ++ _base = noreg; ++ _index = noreg; ++ _scale = no_scale; ++ _disp = (intptr_t) loc; ++ _rspec = spec; ++} ++ ++ ++int Assembler::is_int_mask(int x) { ++ int xx = x; ++ int count = 0; ++ ++ while (x != 0) { ++ x &= (x - 1); ++ count++; ++ } ++ ++ if ((1<> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_b(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_b(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_b(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_bu(Register rd, Address src) { ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_bu(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_bu(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_bu(dst, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_bu(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_bu(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_bu(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_d(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_d(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_d(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_d(dst, AT, disp); ++ } ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ if (scale == 0) { ++ add_d(AT, base, index); ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ } ++ ldptr_d(dst, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_d(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_d(dst, base, disp); ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ ldptr_d(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_d(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_h(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_h(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_h(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_h(dst, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_h(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_h(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_h(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_hu(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_hu(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_hu(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_hu(dst, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_hu(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_hu(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_hu(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ll_w(Register rd, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ll_w(rd, src.base(), src.disp()); ++} ++ ++void Assembler::ll_d(Register rd, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ll_d(rd, src.base(), src.disp()); ++} ++ ++void Assembler::ld_w(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_w(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_w(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_w(dst, AT, disp); ++ } ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ if (scale == 0) { ++ add_d(AT, base, index); ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ } ++ ldptr_w(dst, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_w(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_w(dst, base, disp); ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ ldptr_w(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_w(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_wu(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_wu(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_wu(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_wu(dst, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_wu(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_wu(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_wu(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::st_b(Register rd, Address dst) { ++ Register src = rd; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ assert_different_registers(src, AT); ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ stx_b(src, base, index); ++ } else { ++ add_d(AT, base, index); ++ st_b(src, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ st_b(src, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ stx_b(src, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ st_b(src, base, disp); ++ } else { ++ assert_different_registers(src, AT); ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ stx_b(src, base, AT); ++ } ++ } ++} ++ ++void Assembler::sc_w(Register rd, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sc_w(rd, dst.base(), dst.disp()); ++} ++ ++void Assembler::sc_d(Register rd, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sc_d(rd, dst.base(), dst.disp()); ++} ++ ++void Assembler::st_d(Register rd, Address dst) { ++ Register src = rd; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ assert_different_registers(src, AT); ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ stx_d(src, base, index); ++ } else { ++ add_d(AT, base, index); ++ st_d(src, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ st_d(src, AT, disp); ++ } ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ if (scale == 0) { ++ add_d(AT, base, index); ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ } ++ stptr_d(src, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ stx_d(src, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ st_d(src, base, disp); ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ stptr_d(src, base, disp); ++ } else { ++ assert_different_registers(src, AT); ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ stx_d(src, base, AT); ++ } ++ } ++} ++ ++void Assembler::st_h(Register rd, Address dst) { ++ Register src = rd; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ assert_different_registers(src, AT); ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ stx_h(src, base, index); ++ } else { ++ add_d(AT, base, index); ++ st_h(src, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ st_h(src, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ stx_h(src, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ st_h(src, base, disp); ++ } else { ++ assert_different_registers(src, AT); ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ stx_h(src, base, AT); ++ } ++ } ++} ++ ++void Assembler::st_w(Register rd, Address dst) { ++ Register src = rd; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ assert_different_registers(src, AT); ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ stx_w(src, base, index); ++ } else { ++ add_d(AT, base, index); ++ st_w(src, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ st_w(src, AT, disp); ++ } ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ if (scale == 0) { ++ add_d(AT, base, index); ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ } ++ stptr_w(src, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ stx_w(src, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ st_w(src, base, disp); ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ stptr_w(src, base, disp); ++ } else { ++ assert_different_registers(src, AT); ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ stx_w(src, base, AT); ++ } ++ } ++} ++ ++void Assembler::fld_s(FloatRegister fd, Address src) { ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ fldx_s(fd, base, index); ++ } else { ++ add_d(AT, base, index); ++ fld_s(fd, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ fld_s(fd, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ fldx_s(fd, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ fld_s(fd, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ fldx_s(fd, base, AT); ++ } ++ } ++} ++ ++void Assembler::fld_d(FloatRegister fd, Address src) { ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ fldx_d(fd, base, index); ++ } else { ++ add_d(AT, base, index); ++ fld_d(fd, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ fld_d(fd, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ fldx_d(fd, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ fld_d(fd, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ fldx_d(fd, base, AT); ++ } ++ } ++} ++ ++void Assembler::fst_s(FloatRegister fd, Address dst) { ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ fstx_s(fd, base, index); ++ } else { ++ add_d(AT, base, index); ++ fst_s(fd, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ fst_s(fd, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ fstx_s(fd, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ fst_s(fd, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ fstx_s(fd, base, AT); ++ } ++ } ++} ++ ++void Assembler::fst_d(FloatRegister fd, Address dst) { ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ fstx_d(fd, base, index); ++ } else { ++ add_d(AT, base, index); ++ fst_d(fd, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ fst_d(fd, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ fstx_d(fd, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ fst_d(fd, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ fstx_d(fd, base, AT); ++ } ++ } ++} +diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp +new file mode 100644 +index 00000000000..5eae8b9995c +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp +@@ -0,0 +1,2831 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/register.hpp" ++#include "runtime/vm_version.hpp" ++ ++class BiasedLockingCounters; ++ ++ ++// Note: A register location is represented via a Register, not ++// via an address for efficiency & simplicity reasons. ++ ++class ArrayAddress; ++ ++class Address { ++ public: ++ enum ScaleFactor { ++ no_scale = 0, ++ times_2 = 1, ++ times_4 = 2, ++ times_8 = 3, ++ times_ptr = times_8 ++ }; ++ static ScaleFactor times(int size) { ++ assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size"); ++ if (size == 8) return times_8; ++ if (size == 4) return times_4; ++ if (size == 2) return times_2; ++ return no_scale; ++ } ++ ++ private: ++ Register _base; ++ Register _index; ++ ScaleFactor _scale; ++ int _disp; ++ RelocationHolder _rspec; ++ ++ // Easily misused constructors make them private ++ Address(address loc, RelocationHolder spec); ++ Address(int disp, address loc, relocInfo::relocType rtype); ++ Address(int disp, address loc, RelocationHolder spec); ++ ++ public: ++ ++ // creation ++ Address() ++ : _base(noreg), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(0) { ++ } ++ ++ // No default displacement otherwise Register can be implicitly ++ // converted to 0(Register) which is quite a different animal. ++ ++ Address(Register base, int disp = 0) ++ : _base(base), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(disp) { ++ assert_different_registers(_base, AT); ++ } ++ ++ Address(Register base, Register index, ScaleFactor scale, int disp = 0) ++ : _base (base), ++ _index(index), ++ _scale(scale), ++ _disp (disp) { ++ assert_different_registers(_base, _index, AT); ++ } ++ ++ // The following overloads are used in connection with the ++ // ByteSize type (see sizes.hpp). They simplify the use of ++ // ByteSize'd arguments in assembly code. ++ ++ Address(Register base, ByteSize disp) ++ : Address(base, in_bytes(disp)) {} ++ ++ Address(Register base, Register index, ScaleFactor scale, ByteSize disp) ++ : Address(base, index, scale, in_bytes(disp)) {} ++ ++ // accessors ++ bool uses(Register reg) const { return _base == reg || _index == reg; } ++ Register base() const { return _base; } ++ Register index() const { return _index; } ++ ScaleFactor scale() const { return _scale; } ++ int disp() const { return _disp; } ++ ++ static Address make_array(ArrayAddress); ++ ++ friend class Assembler; ++ friend class MacroAssembler; ++ friend class LIR_Assembler; // base/index/scale/disp ++}; ++ ++// Calling convention ++class Argument { ++ public: ++ enum { ++ n_register_parameters = 8, // 8 integer registers used to pass parameters ++ n_float_register_parameters = 8 // 8 float registers used to pass parameters ++ }; ++}; ++ ++// ++// AddressLiteral has been split out from Address because operands of this type ++// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out ++// the few instructions that need to deal with address literals are unique and the ++// MacroAssembler does not have to implement every instruction in the Assembler ++// in order to search for address literals that may need special handling depending ++// on the instruction and the platform. As small step on the way to merging i486/amd64 ++// directories. ++// ++class AddressLiteral { ++ friend class ArrayAddress; ++ RelocationHolder _rspec; ++ // Typically we use AddressLiterals we want to use their rval ++ // However in some situations we want the lval (effect address) of the item. ++ // We provide a special factory for making those lvals. ++ bool _is_lval; ++ ++ // If the target is far we'll need to load the ea of this to ++ // a register to reach it. Otherwise if near we can do rip ++ // relative addressing. ++ ++ address _target; ++ ++ protected: ++ // creation ++ AddressLiteral() ++ : _is_lval(false), ++ _target(NULL) ++ {} ++ ++ public: ++ ++ ++ AddressLiteral(address target, relocInfo::relocType rtype); ++ ++ AddressLiteral(address target, RelocationHolder const& rspec) ++ : _rspec(rspec), ++ _is_lval(false), ++ _target(target) ++ {} ++ ++ AddressLiteral addr() { ++ AddressLiteral ret = *this; ++ ret._is_lval = true; ++ return ret; ++ } ++ ++ ++ private: ++ ++ address target() { return _target; } ++ bool is_lval() { return _is_lval; } ++ ++ relocInfo::relocType reloc() const { return _rspec.type(); } ++ const RelocationHolder& rspec() const { return _rspec; } ++ ++ friend class Assembler; ++ friend class MacroAssembler; ++ friend class Address; ++ friend class LIR_Assembler; ++ RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { ++ switch (rtype) { ++ case relocInfo::external_word_type: ++ return external_word_Relocation::spec(addr); ++ case relocInfo::internal_word_type: ++ return internal_word_Relocation::spec(addr); ++ case relocInfo::opt_virtual_call_type: ++ return opt_virtual_call_Relocation::spec(); ++ case relocInfo::static_call_type: ++ return static_call_Relocation::spec(); ++ case relocInfo::runtime_call_type: ++ return runtime_call_Relocation::spec(); ++ case relocInfo::poll_type: ++ case relocInfo::poll_return_type: ++ return Relocation::spec_simple(rtype); ++ case relocInfo::none: ++ case relocInfo::oop_type: ++ // Oops are a special case. Normally they would be their own section ++ // but in cases like icBuffer they are literals in the code stream that ++ // we don't have a section for. We use none so that we get a literal address ++ // which is always patchable. ++ return RelocationHolder(); ++ default: ++ ShouldNotReachHere(); ++ return RelocationHolder(); ++ } ++ } ++ ++}; ++ ++// Convience classes ++class RuntimeAddress: public AddressLiteral { ++ ++ public: ++ ++ RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {} ++ ++}; ++ ++class OopAddress: public AddressLiteral { ++ ++ public: ++ ++ OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){} ++ ++}; ++ ++class ExternalAddress: public AddressLiteral { ++ ++ public: ++ ++ ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){} ++ ++}; ++ ++class InternalAddress: public AddressLiteral { ++ ++ public: ++ ++ InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {} ++ ++}; ++ ++// x86 can do array addressing as a single operation since disp can be an absolute ++// address amd64 can't. We create a class that expresses the concept but does extra ++// magic on amd64 to get the final result ++ ++class ArrayAddress { ++ private: ++ ++ AddressLiteral _base; ++ Address _index; ++ ++ public: ++ ++ ArrayAddress() {}; ++ ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {}; ++ AddressLiteral base() { return _base; } ++ Address index() { return _index; } ++ ++}; ++ ++// The LoongArch Assembler: Pure assembler doing NO optimizations on the instruction ++// level ; i.e., what you write is what you get. The Assembler is generating code into ++// a CodeBuffer. ++ ++class Assembler : public AbstractAssembler { ++ friend class AbstractAssembler; // for the non-virtual hack ++ friend class LIR_Assembler; // as_Address() ++ friend class StubGenerator; ++ ++ public: ++ // 22-bit opcode, highest 22 bits: bits[31...10] ++ enum ops22 { ++ clo_w_op = 0b0000000000000000000100, ++ clz_w_op = 0b0000000000000000000101, ++ cto_w_op = 0b0000000000000000000110, ++ ctz_w_op = 0b0000000000000000000111, ++ clo_d_op = 0b0000000000000000001000, ++ clz_d_op = 0b0000000000000000001001, ++ cto_d_op = 0b0000000000000000001010, ++ ctz_d_op = 0b0000000000000000001011, ++ revb_2h_op = 0b0000000000000000001100, ++ revb_4h_op = 0b0000000000000000001101, ++ revb_2w_op = 0b0000000000000000001110, ++ revb_d_op = 0b0000000000000000001111, ++ revh_2w_op = 0b0000000000000000010000, ++ revh_d_op = 0b0000000000000000010001, ++ bitrev_4b_op = 0b0000000000000000010010, ++ bitrev_8b_op = 0b0000000000000000010011, ++ bitrev_w_op = 0b0000000000000000010100, ++ bitrev_d_op = 0b0000000000000000010101, ++ ext_w_h_op = 0b0000000000000000010110, ++ ext_w_b_op = 0b0000000000000000010111, ++ rdtimel_w_op = 0b0000000000000000011000, ++ rdtimeh_w_op = 0b0000000000000000011001, ++ rdtime_d_op = 0b0000000000000000011010, ++ cpucfg_op = 0b0000000000000000011011, ++ fabs_s_op = 0b0000000100010100000001, ++ fabs_d_op = 0b0000000100010100000010, ++ fneg_s_op = 0b0000000100010100000101, ++ fneg_d_op = 0b0000000100010100000110, ++ flogb_s_op = 0b0000000100010100001001, ++ flogb_d_op = 0b0000000100010100001010, ++ fclass_s_op = 0b0000000100010100001101, ++ fclass_d_op = 0b0000000100010100001110, ++ fsqrt_s_op = 0b0000000100010100010001, ++ fsqrt_d_op = 0b0000000100010100010010, ++ frecip_s_op = 0b0000000100010100010101, ++ frecip_d_op = 0b0000000100010100010110, ++ frsqrt_s_op = 0b0000000100010100011001, ++ frsqrt_d_op = 0b0000000100010100011010, ++ fmov_s_op = 0b0000000100010100100101, ++ fmov_d_op = 0b0000000100010100100110, ++ movgr2fr_w_op = 0b0000000100010100101001, ++ movgr2fr_d_op = 0b0000000100010100101010, ++ movgr2frh_w_op = 0b0000000100010100101011, ++ movfr2gr_s_op = 0b0000000100010100101101, ++ movfr2gr_d_op = 0b0000000100010100101110, ++ movfrh2gr_s_op = 0b0000000100010100101111, ++ movgr2fcsr_op = 0b0000000100010100110000, ++ movfcsr2gr_op = 0b0000000100010100110010, ++ movfr2cf_op = 0b0000000100010100110100, ++ movcf2fr_op = 0b0000000100010100110101, ++ movgr2cf_op = 0b0000000100010100110110, ++ movcf2gr_op = 0b0000000100010100110111, ++ fcvt_s_d_op = 0b0000000100011001000110, ++ fcvt_d_s_op = 0b0000000100011001001001, ++ ftintrm_w_s_op = 0b0000000100011010000001, ++ ftintrm_w_d_op = 0b0000000100011010000010, ++ ftintrm_l_s_op = 0b0000000100011010001001, ++ ftintrm_l_d_op = 0b0000000100011010001010, ++ ftintrp_w_s_op = 0b0000000100011010010001, ++ ftintrp_w_d_op = 0b0000000100011010010010, ++ ftintrp_l_s_op = 0b0000000100011010011001, ++ ftintrp_l_d_op = 0b0000000100011010011010, ++ ftintrz_w_s_op = 0b0000000100011010100001, ++ ftintrz_w_d_op = 0b0000000100011010100010, ++ ftintrz_l_s_op = 0b0000000100011010101001, ++ ftintrz_l_d_op = 0b0000000100011010101010, ++ ftintrne_w_s_op = 0b0000000100011010110001, ++ ftintrne_w_d_op = 0b0000000100011010110010, ++ ftintrne_l_s_op = 0b0000000100011010111001, ++ ftintrne_l_d_op = 0b0000000100011010111010, ++ ftint_w_s_op = 0b0000000100011011000001, ++ ftint_w_d_op = 0b0000000100011011000010, ++ ftint_l_s_op = 0b0000000100011011001001, ++ ftint_l_d_op = 0b0000000100011011001010, ++ ffint_s_w_op = 0b0000000100011101000100, ++ ffint_s_l_op = 0b0000000100011101000110, ++ ffint_d_w_op = 0b0000000100011101001000, ++ ffint_d_l_op = 0b0000000100011101001010, ++ frint_s_op = 0b0000000100011110010001, ++ frint_d_op = 0b0000000100011110010010, ++ iocsrrd_b_op = 0b0000011001001000000000, ++ iocsrrd_h_op = 0b0000011001001000000001, ++ iocsrrd_w_op = 0b0000011001001000000010, ++ iocsrrd_d_op = 0b0000011001001000000011, ++ iocsrwr_b_op = 0b0000011001001000000100, ++ iocsrwr_h_op = 0b0000011001001000000101, ++ iocsrwr_w_op = 0b0000011001001000000110, ++ iocsrwr_d_op = 0b0000011001001000000111, ++ vpcnt_b_op = 0b0111001010011100001000, ++ vpcnt_h_op = 0b0111001010011100001001, ++ vpcnt_w_op = 0b0111001010011100001010, ++ vpcnt_d_op = 0b0111001010011100001011, ++ vneg_b_op = 0b0111001010011100001100, ++ vneg_h_op = 0b0111001010011100001101, ++ vneg_w_op = 0b0111001010011100001110, ++ vneg_d_op = 0b0111001010011100001111, ++ vfclass_s_op = 0b0111001010011100110101, ++ vfclass_d_op = 0b0111001010011100110110, ++ vfsqrt_s_op = 0b0111001010011100111001, ++ vfsqrt_d_op = 0b0111001010011100111010, ++ vfrint_s_op = 0b0111001010011101001101, ++ vfrint_d_op = 0b0111001010011101001110, ++ vfrintrm_s_op = 0b0111001010011101010001, ++ vfrintrm_d_op = 0b0111001010011101010010, ++ vfrintrp_s_op = 0b0111001010011101010101, ++ vfrintrp_d_op = 0b0111001010011101010110, ++ vfrintrz_s_op = 0b0111001010011101011001, ++ vfrintrz_d_op = 0b0111001010011101011010, ++ vfrintrne_s_op = 0b0111001010011101011101, ++ vfrintrne_d_op = 0b0111001010011101011110, ++ vfcvtl_s_h_op = 0b0111001010011101111010, ++ vfcvth_s_h_op = 0b0111001010011101111011, ++ vfcvtl_d_s_op = 0b0111001010011101111100, ++ vfcvth_d_s_op = 0b0111001010011101111101, ++ vffint_s_w_op = 0b0111001010011110000000, ++ vffint_s_wu_op = 0b0111001010011110000001, ++ vffint_d_l_op = 0b0111001010011110000010, ++ vffint_d_lu_op = 0b0111001010011110000011, ++ vffintl_d_w_op = 0b0111001010011110000100, ++ vffinth_d_w_op = 0b0111001010011110000101, ++ vftint_w_s_op = 0b0111001010011110001100, ++ vftint_l_d_op = 0b0111001010011110001101, ++ vftintrm_w_s_op = 0b0111001010011110001110, ++ vftintrm_l_d_op = 0b0111001010011110001111, ++ vftintrp_w_s_op = 0b0111001010011110010000, ++ vftintrp_l_d_op = 0b0111001010011110010001, ++ vftintrz_w_s_op = 0b0111001010011110010010, ++ vftintrz_l_d_op = 0b0111001010011110010011, ++ vftintrne_w_s_op = 0b0111001010011110010100, ++ vftintrne_l_d_op = 0b0111001010011110010101, ++ vftint_wu_s = 0b0111001010011110010110, ++ vftint_lu_d = 0b0111001010011110010111, ++ vftintrz_wu_f = 0b0111001010011110011100, ++ vftintrz_lu_d = 0b0111001010011110011101, ++ vftintl_l_s_op = 0b0111001010011110100000, ++ vftinth_l_s_op = 0b0111001010011110100001, ++ vftintrml_l_s_op = 0b0111001010011110100010, ++ vftintrmh_l_s_op = 0b0111001010011110100011, ++ vftintrpl_l_s_op = 0b0111001010011110100100, ++ vftintrph_l_s_op = 0b0111001010011110100101, ++ vftintrzl_l_s_op = 0b0111001010011110100110, ++ vftintrzh_l_s_op = 0b0111001010011110100111, ++ vftintrnel_l_s_op = 0b0111001010011110101000, ++ vftintrneh_l_s_op = 0b0111001010011110101001, ++ vreplgr2vr_b_op = 0b0111001010011111000000, ++ vreplgr2vr_h_op = 0b0111001010011111000001, ++ vreplgr2vr_w_op = 0b0111001010011111000010, ++ vreplgr2vr_d_op = 0b0111001010011111000011, ++ xvpcnt_b_op = 0b0111011010011100001000, ++ xvpcnt_h_op = 0b0111011010011100001001, ++ xvpcnt_w_op = 0b0111011010011100001010, ++ xvpcnt_d_op = 0b0111011010011100001011, ++ xvneg_b_op = 0b0111011010011100001100, ++ xvneg_h_op = 0b0111011010011100001101, ++ xvneg_w_op = 0b0111011010011100001110, ++ xvneg_d_op = 0b0111011010011100001111, ++ xvfclass_s_op = 0b0111011010011100110101, ++ xvfclass_d_op = 0b0111011010011100110110, ++ xvfsqrt_s_op = 0b0111011010011100111001, ++ xvfsqrt_d_op = 0b0111011010011100111010, ++ xvfrint_s_op = 0b0111011010011101001101, ++ xvfrint_d_op = 0b0111011010011101001110, ++ xvfrintrm_s_op = 0b0111011010011101010001, ++ xvfrintrm_d_op = 0b0111011010011101010010, ++ xvfrintrp_s_op = 0b0111011010011101010101, ++ xvfrintrp_d_op = 0b0111011010011101010110, ++ xvfrintrz_s_op = 0b0111011010011101011001, ++ xvfrintrz_d_op = 0b0111011010011101011010, ++ xvfrintrne_s_op = 0b0111011010011101011101, ++ xvfrintrne_d_op = 0b0111011010011101011110, ++ xvfcvtl_s_h_op = 0b0111011010011101111010, ++ xvfcvth_s_h_op = 0b0111011010011101111011, ++ xvfcvtl_d_s_op = 0b0111011010011101111100, ++ xvfcvth_d_s_op = 0b0111011010011101111101, ++ xvffint_s_w_op = 0b0111011010011110000000, ++ xvffint_s_wu_op = 0b0111011010011110000001, ++ xvffint_d_l_op = 0b0111011010011110000010, ++ xvffint_d_lu_op = 0b0111011010011110000011, ++ xvffintl_d_w_op = 0b0111011010011110000100, ++ xvffinth_d_w_op = 0b0111011010011110000101, ++ xvftint_w_s_op = 0b0111011010011110001100, ++ xvftint_l_d_op = 0b0111011010011110001101, ++ xvftintrm_w_s_op = 0b0111011010011110001110, ++ xvftintrm_l_d_op = 0b0111011010011110001111, ++ xvftintrp_w_s_op = 0b0111011010011110010000, ++ xvftintrp_l_d_op = 0b0111011010011110010001, ++ xvftintrz_w_s_op = 0b0111011010011110010010, ++ xvftintrz_l_d_op = 0b0111011010011110010011, ++ xvftintrne_w_s_op = 0b0111011010011110010100, ++ xvftintrne_l_d_op = 0b0111011010011110010101, ++ xvftint_wu_s = 0b0111011010011110010110, ++ xvftint_lu_d = 0b0111011010011110010111, ++ xvftintrz_wu_f = 0b0111011010011110011100, ++ xvftintrz_lu_d = 0b0111011010011110011101, ++ xvftintl_l_s_op = 0b0111011010011110100000, ++ xvftinth_l_s_op = 0b0111011010011110100001, ++ xvftintrml_l_s_op = 0b0111011010011110100010, ++ xvftintrmh_l_s_op = 0b0111011010011110100011, ++ xvftintrpl_l_s_op = 0b0111011010011110100100, ++ xvftintrph_l_s_op = 0b0111011010011110100101, ++ xvftintrzl_l_s_op = 0b0111011010011110100110, ++ xvftintrzh_l_s_op = 0b0111011010011110100111, ++ xvftintrnel_l_s_op = 0b0111011010011110101000, ++ xvftintrneh_l_s_op = 0b0111011010011110101001, ++ xvreplgr2vr_b_op = 0b0111011010011111000000, ++ xvreplgr2vr_h_op = 0b0111011010011111000001, ++ xvreplgr2vr_w_op = 0b0111011010011111000010, ++ xvreplgr2vr_d_op = 0b0111011010011111000011, ++ vext2xv_h_b_op = 0b0111011010011111000100, ++ vext2xv_w_b_op = 0b0111011010011111000101, ++ vext2xv_d_b_op = 0b0111011010011111000110, ++ vext2xv_w_h_op = 0b0111011010011111000111, ++ vext2xv_d_h_op = 0b0111011010011111001000, ++ vext2xv_d_w_op = 0b0111011010011111001001, ++ vext2xv_hu_bu_op = 0b0111011010011111001010, ++ vext2xv_wu_bu_op = 0b0111011010011111001011, ++ vext2xv_du_bu_op = 0b0111011010011111001100, ++ vext2xv_wu_hu_op = 0b0111011010011111001101, ++ vext2xv_du_hu_op = 0b0111011010011111001110, ++ vext2xv_du_wu_op = 0b0111011010011111001111, ++ xvreplve0_b_op = 0b0111011100000111000000, ++ xvreplve0_h_op = 0b0111011100000111100000, ++ xvreplve0_w_op = 0b0111011100000111110000, ++ xvreplve0_d_op = 0b0111011100000111111000, ++ xvreplve0_q_op = 0b0111011100000111111100, ++ ++ unknow_ops22 = 0b1111111111111111111111 ++ }; ++ ++ // 21-bit opcode, highest 21 bits: bits[31...11] ++ enum ops21 { ++ vinsgr2vr_d_op = 0b011100101110101111110, ++ vpickve2gr_d_op = 0b011100101110111111110, ++ vpickve2gr_du_op = 0b011100101111001111110, ++ vreplvei_d_op = 0b011100101111011111110, ++ ++ unknow_ops21 = 0b111111111111111111111 ++ }; ++ ++ // 20-bit opcode, highest 20 bits: bits[31...12] ++ enum ops20 { ++ vinsgr2vr_w_op = 0b01110010111010111110, ++ vpickve2gr_w_op = 0b01110010111011111110, ++ vpickve2gr_wu_op = 0b01110010111100111110, ++ vreplvei_w_op = 0b01110010111101111110, ++ xvinsgr2vr_d_op = 0b01110110111010111110, ++ xvpickve2gr_d_op = 0b01110110111011111110, ++ xvpickve2gr_du_op = 0b01110110111100111110, ++ xvinsve0_d_op = 0b01110110111111111110, ++ xvpickve_d_op = 0b01110111000000111110, ++ ++ unknow_ops20 = 0b11111111111111111111 ++ }; ++ ++ // 19-bit opcode, highest 19 bits: bits[31...13] ++ enum ops19 { ++ vrotri_b_op = 0b0111001010100000001, ++ vinsgr2vr_h_op = 0b0111001011101011110, ++ vpickve2gr_h_op = 0b0111001011101111110, ++ vpickve2gr_hu_op = 0b0111001011110011110, ++ vreplvei_h_op = 0b0111001011110111110, ++ vbitclri_b_op = 0b0111001100010000001, ++ vbitseti_b_op = 0b0111001100010100001, ++ vbitrevi_b_op = 0b0111001100011000001, ++ vslli_b_op = 0b0111001100101100001, ++ vsrli_b_op = 0b0111001100110000001, ++ vsrai_b_op = 0b0111001100110100001, ++ xvrotri_b_op = 0b0111011010100000001, ++ xvinsgr2vr_w_op = 0b0111011011101011110, ++ xvpickve2gr_w_op = 0b0111011011101111110, ++ xvpickve2gr_wu_op = 0b0111011011110011110, ++ xvinsve0_w_op = 0b0111011011111111110, ++ xvpickve_w_op = 0b0111011100000011110, ++ xvbitclri_b_op = 0b0111011100010000001, ++ xvbitseti_b_op = 0b0111011100010100001, ++ xvbitrevi_b_op = 0b0111011100011000001, ++ xvslli_b_op = 0b0111011100101100001, ++ xvsrli_b_op = 0b0111011100110000001, ++ xvsrai_b_op = 0b0111011100110100001, ++ ++ unknow_ops19 = 0b1111111111111111111 ++ }; ++ ++ // 18-bit opcode, highest 18 bits: bits[31...14] ++ enum ops18 { ++ vrotri_h_op = 0b011100101010000001, ++ vinsgr2vr_b_op = 0b011100101110101110, ++ vpickve2gr_b_op = 0b011100101110111110, ++ vpickve2gr_bu_op = 0b011100101111001110, ++ vreplvei_b_op = 0b011100101111011110, ++ vbitclri_h_op = 0b011100110001000001, ++ vbitseti_h_op = 0b011100110001010001, ++ vbitrevi_h_op = 0b011100110001100001, ++ vslli_h_op = 0b011100110010110001, ++ vsrli_h_op = 0b011100110011000001, ++ vsrai_h_op = 0b011100110011010001, ++ vsrlni_b_h_op = 0b011100110100000001, ++ xvrotri_h_op = 0b011101101010000001, ++ xvbitclri_h_op = 0b011101110001000001, ++ xvbitseti_h_op = 0b011101110001010001, ++ xvbitrevi_h_op = 0b011101110001100001, ++ xvslli_h_op = 0b011101110010110001, ++ xvsrli_h_op = 0b011101110011000001, ++ xvsrai_h_op = 0b011101110011010001, ++ ++ unknow_ops18 = 0b111111111111111111 ++ }; ++ ++ // 17-bit opcode, highest 17 bits: bits[31...15] ++ enum ops17 { ++ asrtle_d_op = 0b00000000000000010, ++ asrtgt_d_op = 0b00000000000000011, ++ add_w_op = 0b00000000000100000, ++ add_d_op = 0b00000000000100001, ++ sub_w_op = 0b00000000000100010, ++ sub_d_op = 0b00000000000100011, ++ slt_op = 0b00000000000100100, ++ sltu_op = 0b00000000000100101, ++ maskeqz_op = 0b00000000000100110, ++ masknez_op = 0b00000000000100111, ++ nor_op = 0b00000000000101000, ++ and_op = 0b00000000000101001, ++ or_op = 0b00000000000101010, ++ xor_op = 0b00000000000101011, ++ orn_op = 0b00000000000101100, ++ andn_op = 0b00000000000101101, ++ sll_w_op = 0b00000000000101110, ++ srl_w_op = 0b00000000000101111, ++ sra_w_op = 0b00000000000110000, ++ sll_d_op = 0b00000000000110001, ++ srl_d_op = 0b00000000000110010, ++ sra_d_op = 0b00000000000110011, ++ rotr_w_op = 0b00000000000110110, ++ rotr_d_op = 0b00000000000110111, ++ mul_w_op = 0b00000000000111000, ++ mulh_w_op = 0b00000000000111001, ++ mulh_wu_op = 0b00000000000111010, ++ mul_d_op = 0b00000000000111011, ++ mulh_d_op = 0b00000000000111100, ++ mulh_du_op = 0b00000000000111101, ++ mulw_d_w_op = 0b00000000000111110, ++ mulw_d_wu_op = 0b00000000000111111, ++ div_w_op = 0b00000000001000000, ++ mod_w_op = 0b00000000001000001, ++ div_wu_op = 0b00000000001000010, ++ mod_wu_op = 0b00000000001000011, ++ div_d_op = 0b00000000001000100, ++ mod_d_op = 0b00000000001000101, ++ div_du_op = 0b00000000001000110, ++ mod_du_op = 0b00000000001000111, ++ crc_w_b_w_op = 0b00000000001001000, ++ crc_w_h_w_op = 0b00000000001001001, ++ crc_w_w_w_op = 0b00000000001001010, ++ crc_w_d_w_op = 0b00000000001001011, ++ crcc_w_b_w_op = 0b00000000001001100, ++ crcc_w_h_w_op = 0b00000000001001101, ++ crcc_w_w_w_op = 0b00000000001001110, ++ crcc_w_d_w_op = 0b00000000001001111, ++ break_op = 0b00000000001010100, ++ fadd_s_op = 0b00000001000000001, ++ fadd_d_op = 0b00000001000000010, ++ fsub_s_op = 0b00000001000000101, ++ fsub_d_op = 0b00000001000000110, ++ fmul_s_op = 0b00000001000001001, ++ fmul_d_op = 0b00000001000001010, ++ fdiv_s_op = 0b00000001000001101, ++ fdiv_d_op = 0b00000001000001110, ++ fmax_s_op = 0b00000001000010001, ++ fmax_d_op = 0b00000001000010010, ++ fmin_s_op = 0b00000001000010101, ++ fmin_d_op = 0b00000001000010110, ++ fmaxa_s_op = 0b00000001000011001, ++ fmaxa_d_op = 0b00000001000011010, ++ fmina_s_op = 0b00000001000011101, ++ fmina_d_op = 0b00000001000011110, ++ fscaleb_s_op = 0b00000001000100001, ++ fscaleb_d_op = 0b00000001000100010, ++ fcopysign_s_op = 0b00000001000100101, ++ fcopysign_d_op = 0b00000001000100110, ++ ldx_b_op = 0b00111000000000000, ++ ldx_h_op = 0b00111000000001000, ++ ldx_w_op = 0b00111000000010000, ++ ldx_d_op = 0b00111000000011000, ++ stx_b_op = 0b00111000000100000, ++ stx_h_op = 0b00111000000101000, ++ stx_w_op = 0b00111000000110000, ++ stx_d_op = 0b00111000000111000, ++ ldx_bu_op = 0b00111000001000000, ++ ldx_hu_op = 0b00111000001001000, ++ ldx_wu_op = 0b00111000001010000, ++ fldx_s_op = 0b00111000001100000, ++ fldx_d_op = 0b00111000001101000, ++ fstx_s_op = 0b00111000001110000, ++ fstx_d_op = 0b00111000001111000, ++ vldx_op = 0b00111000010000000, ++ vstx_op = 0b00111000010001000, ++ xvldx_op = 0b00111000010010000, ++ xvstx_op = 0b00111000010011000, ++ amswap_w_op = 0b00111000011000000, ++ amswap_d_op = 0b00111000011000001, ++ amadd_w_op = 0b00111000011000010, ++ amadd_d_op = 0b00111000011000011, ++ amand_w_op = 0b00111000011000100, ++ amand_d_op = 0b00111000011000101, ++ amor_w_op = 0b00111000011000110, ++ amor_d_op = 0b00111000011000111, ++ amxor_w_op = 0b00111000011001000, ++ amxor_d_op = 0b00111000011001001, ++ ammax_w_op = 0b00111000011001010, ++ ammax_d_op = 0b00111000011001011, ++ ammin_w_op = 0b00111000011001100, ++ ammin_d_op = 0b00111000011001101, ++ ammax_wu_op = 0b00111000011001110, ++ ammax_du_op = 0b00111000011001111, ++ ammin_wu_op = 0b00111000011010000, ++ ammin_du_op = 0b00111000011010001, ++ amswap_db_w_op = 0b00111000011010010, ++ amswap_db_d_op = 0b00111000011010011, ++ amadd_db_w_op = 0b00111000011010100, ++ amadd_db_d_op = 0b00111000011010101, ++ amand_db_w_op = 0b00111000011010110, ++ amand_db_d_op = 0b00111000011010111, ++ amor_db_w_op = 0b00111000011011000, ++ amor_db_d_op = 0b00111000011011001, ++ amxor_db_w_op = 0b00111000011011010, ++ amxor_db_d_op = 0b00111000011011011, ++ ammax_db_w_op = 0b00111000011011100, ++ ammax_db_d_op = 0b00111000011011101, ++ ammin_db_w_op = 0b00111000011011110, ++ ammin_db_d_op = 0b00111000011011111, ++ ammax_db_wu_op = 0b00111000011100000, ++ ammax_db_du_op = 0b00111000011100001, ++ ammin_db_wu_op = 0b00111000011100010, ++ ammin_db_du_op = 0b00111000011100011, ++ dbar_op = 0b00111000011100100, ++ ibar_op = 0b00111000011100101, ++ fldgt_s_op = 0b00111000011101000, ++ fldgt_d_op = 0b00111000011101001, ++ fldle_s_op = 0b00111000011101010, ++ fldle_d_op = 0b00111000011101011, ++ fstgt_s_op = 0b00111000011101100, ++ fstgt_d_op = 0b00111000011101101, ++ fstle_s_op = 0b00111000011101110, ++ fstle_d_op = 0b00111000011101111, ++ ldgt_b_op = 0b00111000011110000, ++ ldgt_h_op = 0b00111000011110001, ++ ldgt_w_op = 0b00111000011110010, ++ ldgt_d_op = 0b00111000011110011, ++ ldle_b_op = 0b00111000011110100, ++ ldle_h_op = 0b00111000011110101, ++ ldle_w_op = 0b00111000011110110, ++ ldle_d_op = 0b00111000011110111, ++ stgt_b_op = 0b00111000011111000, ++ stgt_h_op = 0b00111000011111001, ++ stgt_w_op = 0b00111000011111010, ++ stgt_d_op = 0b00111000011111011, ++ stle_b_op = 0b00111000011111100, ++ stle_h_op = 0b00111000011111101, ++ stle_w_op = 0b00111000011111110, ++ stle_d_op = 0b00111000011111111, ++ vseq_b_op = 0b01110000000000000, ++ vseq_h_op = 0b01110000000000001, ++ vseq_w_op = 0b01110000000000010, ++ vseq_d_op = 0b01110000000000011, ++ vsle_b_op = 0b01110000000000100, ++ vsle_h_op = 0b01110000000000101, ++ vsle_w_op = 0b01110000000000110, ++ vsle_d_op = 0b01110000000000111, ++ vsle_bu_op = 0b01110000000001000, ++ vsle_hu_op = 0b01110000000001001, ++ vsle_wu_op = 0b01110000000001010, ++ vsle_du_op = 0b01110000000001011, ++ vslt_b_op = 0b01110000000001100, ++ vslt_h_op = 0b01110000000001101, ++ vslt_w_op = 0b01110000000001110, ++ vslt_d_op = 0b01110000000001111, ++ vslt_bu_op = 0b01110000000010000, ++ vslt_hu_op = 0b01110000000010001, ++ vslt_wu_op = 0b01110000000010010, ++ vslt_du_op = 0b01110000000010011, ++ vadd_b_op = 0b01110000000010100, ++ vadd_h_op = 0b01110000000010101, ++ vadd_w_op = 0b01110000000010110, ++ vadd_d_op = 0b01110000000010111, ++ vsub_b_op = 0b01110000000011000, ++ vsub_h_op = 0b01110000000011001, ++ vsub_w_op = 0b01110000000011010, ++ vsub_d_op = 0b01110000000011011, ++ vabsd_b_op = 0b01110000011000000, ++ vabsd_h_op = 0b01110000011000001, ++ vabsd_w_op = 0b01110000011000010, ++ vabsd_d_op = 0b01110000011000011, ++ vmax_b_op = 0b01110000011100000, ++ vmax_h_op = 0b01110000011100001, ++ vmax_w_op = 0b01110000011100010, ++ vmax_d_op = 0b01110000011100011, ++ vmin_b_op = 0b01110000011100100, ++ vmin_h_op = 0b01110000011100101, ++ vmin_w_op = 0b01110000011100110, ++ vmin_d_op = 0b01110000011100111, ++ vmul_b_op = 0b01110000100001000, ++ vmul_h_op = 0b01110000100001001, ++ vmul_w_op = 0b01110000100001010, ++ vmul_d_op = 0b01110000100001011, ++ vmuh_b_op = 0b01110000100001100, ++ vmuh_h_op = 0b01110000100001101, ++ vmuh_w_op = 0b01110000100001110, ++ vmuh_d_op = 0b01110000100001111, ++ vmuh_bu_op = 0b01110000100010000, ++ vmuh_hu_op = 0b01110000100010001, ++ vmuh_wu_op = 0b01110000100010010, ++ vmuh_du_op = 0b01110000100010011, ++ vmulwev_h_b_op = 0b01110000100100000, ++ vmulwev_w_h_op = 0b01110000100100001, ++ vmulwev_d_w_op = 0b01110000100100010, ++ vmulwev_q_d_op = 0b01110000100100011, ++ vmulwod_h_b_op = 0b01110000100100100, ++ vmulwod_w_h_op = 0b01110000100100101, ++ vmulwod_d_w_op = 0b01110000100100110, ++ vmulwod_q_d_op = 0b01110000100100111, ++ vmadd_b_op = 0b01110000101010000, ++ vmadd_h_op = 0b01110000101010001, ++ vmadd_w_op = 0b01110000101010010, ++ vmadd_d_op = 0b01110000101010011, ++ vmsub_b_op = 0b01110000101010100, ++ vmsub_h_op = 0b01110000101010101, ++ vmsub_w_op = 0b01110000101010110, ++ vmsub_d_op = 0b01110000101010111, ++ vsll_b_op = 0b01110000111010000, ++ vsll_h_op = 0b01110000111010001, ++ vsll_w_op = 0b01110000111010010, ++ vsll_d_op = 0b01110000111010011, ++ vsrl_b_op = 0b01110000111010100, ++ vsrl_h_op = 0b01110000111010101, ++ vsrl_w_op = 0b01110000111010110, ++ vsrl_d_op = 0b01110000111010111, ++ vsra_b_op = 0b01110000111011000, ++ vsra_h_op = 0b01110000111011001, ++ vsra_w_op = 0b01110000111011010, ++ vsra_d_op = 0b01110000111011011, ++ vrotr_b_op = 0b01110000111011100, ++ vrotr_h_op = 0b01110000111011101, ++ vrotr_w_op = 0b01110000111011110, ++ vrotr_d_op = 0b01110000111011111, ++ vbitclr_b_op = 0b01110001000011000, ++ vbitclr_h_op = 0b01110001000011001, ++ vbitclr_w_op = 0b01110001000011010, ++ vbitclr_d_op = 0b01110001000011011, ++ vbitset_b_op = 0b01110001000011100, ++ vbitset_h_op = 0b01110001000011101, ++ vbitset_w_op = 0b01110001000011110, ++ vbitset_d_op = 0b01110001000011111, ++ vbitrev_b_op = 0b01110001000100000, ++ vbitrev_h_op = 0b01110001000100001, ++ vbitrev_w_op = 0b01110001000100010, ++ vbitrev_d_op = 0b01110001000100011, ++ vand_v_op = 0b01110001001001100, ++ vor_v_op = 0b01110001001001101, ++ vxor_v_op = 0b01110001001001110, ++ vnor_v_op = 0b01110001001001111, ++ vandn_v_op = 0b01110001001010000, ++ vorn_v_op = 0b01110001001010001, ++ vadd_q_op = 0b01110001001011010, ++ vsub_q_op = 0b01110001001011011, ++ vfadd_s_op = 0b01110001001100001, ++ vfadd_d_op = 0b01110001001100010, ++ vfsub_s_op = 0b01110001001100101, ++ vfsub_d_op = 0b01110001001100110, ++ vfmul_s_op = 0b01110001001110001, ++ vfmul_d_op = 0b01110001001110010, ++ vfdiv_s_op = 0b01110001001110101, ++ vfdiv_d_op = 0b01110001001110110, ++ vfmax_s_op = 0b01110001001111001, ++ vfmax_d_op = 0b01110001001111010, ++ vfmin_s_op = 0b01110001001111101, ++ vfmin_d_op = 0b01110001001111110, ++ vfcvt_h_s_op = 0b01110001010001100, ++ vfcvt_s_d_op = 0b01110001010001101, ++ vffint_s_l_op = 0b01110001010010000, ++ vftint_w_d_op = 0b01110001010010011, ++ vftintrm_w_d_op = 0b01110001010010100, ++ vftintrp_w_d_op = 0b01110001010010101, ++ vftintrz_w_d_op = 0b01110001010010110, ++ vftintrne_w_d_op = 0b01110001010010111, ++ vshuf_h_op = 0b01110001011110101, ++ vshuf_w_op = 0b01110001011110110, ++ vshuf_d_op = 0b01110001011110111, ++ vslti_bu_op = 0b01110010100010000, ++ vslti_hu_op = 0b01110010100010001, ++ vslti_wu_op = 0b01110010100010010, ++ vslti_du_op = 0b01110010100010011, ++ vaddi_bu_op = 0b01110010100010100, ++ vaddi_hu_op = 0b01110010100010101, ++ vaddi_wu_op = 0b01110010100010110, ++ vaddi_du_op = 0b01110010100010111, ++ vsubi_bu_op = 0b01110010100011000, ++ vsubi_hu_op = 0b01110010100011001, ++ vsubi_wu_op = 0b01110010100011010, ++ vsubi_du_op = 0b01110010100011011, ++ vrotri_w_op = 0b01110010101000001, ++ vbitclri_w_op = 0b01110011000100001, ++ vbitseti_w_op = 0b01110011000101001, ++ vbitrevi_w_op = 0b01110011000110001, ++ vslli_w_op = 0b01110011001011001, ++ vsrli_w_op = 0b01110011001100001, ++ vsrai_w_op = 0b01110011001101001, ++ vsrlni_h_w_op = 0b01110011010000001, ++ xvseq_b_op = 0b01110100000000000, ++ xvseq_h_op = 0b01110100000000001, ++ xvseq_w_op = 0b01110100000000010, ++ xvseq_d_op = 0b01110100000000011, ++ xvsle_b_op = 0b01110100000000100, ++ xvsle_h_op = 0b01110100000000101, ++ xvsle_w_op = 0b01110100000000110, ++ xvsle_d_op = 0b01110100000000111, ++ xvsle_bu_op = 0b01110100000001000, ++ xvsle_hu_op = 0b01110100000001001, ++ xvsle_wu_op = 0b01110100000001010, ++ xvsle_du_op = 0b01110100000001011, ++ xvslt_b_op = 0b01110100000001100, ++ xvslt_h_op = 0b01110100000001101, ++ xvslt_w_op = 0b01110100000001110, ++ xvslt_d_op = 0b01110100000001111, ++ xvslt_bu_op = 0b01110100000010000, ++ xvslt_hu_op = 0b01110100000010001, ++ xvslt_wu_op = 0b01110100000010010, ++ xvslt_du_op = 0b01110100000010011, ++ xvadd_b_op = 0b01110100000010100, ++ xvadd_h_op = 0b01110100000010101, ++ xvadd_w_op = 0b01110100000010110, ++ xvadd_d_op = 0b01110100000010111, ++ xvsub_b_op = 0b01110100000011000, ++ xvsub_h_op = 0b01110100000011001, ++ xvsub_w_op = 0b01110100000011010, ++ xvsub_d_op = 0b01110100000011011, ++ xvabsd_b_op = 0b01110100011000000, ++ xvabsd_h_op = 0b01110100011000001, ++ xvabsd_w_op = 0b01110100011000010, ++ xvabsd_d_op = 0b01110100011000011, ++ xvmax_b_op = 0b01110100011100000, ++ xvmax_h_op = 0b01110100011100001, ++ xvmax_w_op = 0b01110100011100010, ++ xvmax_d_op = 0b01110100011100011, ++ xvmin_b_op = 0b01110100011100100, ++ xvmin_h_op = 0b01110100011100101, ++ xvmin_w_op = 0b01110100011100110, ++ xvmin_d_op = 0b01110100011100111, ++ xvmul_b_op = 0b01110100100001000, ++ xvmul_h_op = 0b01110100100001001, ++ xvmul_w_op = 0b01110100100001010, ++ xvmul_d_op = 0b01110100100001011, ++ xvmuh_b_op = 0b01110100100001100, ++ xvmuh_h_op = 0b01110100100001101, ++ xvmuh_w_op = 0b01110100100001110, ++ xvmuh_d_op = 0b01110100100001111, ++ xvmuh_bu_op = 0b01110100100010000, ++ xvmuh_hu_op = 0b01110100100010001, ++ xvmuh_wu_op = 0b01110100100010010, ++ xvmuh_du_op = 0b01110100100010011, ++ xvmulwev_h_b_op = 0b01110100100100000, ++ xvmulwev_w_h_op = 0b01110100100100001, ++ xvmulwev_d_w_op = 0b01110100100100010, ++ xvmulwev_q_d_op = 0b01110100100100011, ++ xvmulwod_h_b_op = 0b01110100100100100, ++ xvmulwod_w_h_op = 0b01110100100100101, ++ xvmulwod_d_w_op = 0b01110100100100110, ++ xvmulwod_q_d_op = 0b01110100100100111, ++ xvmadd_b_op = 0b01110100101010000, ++ xvmadd_h_op = 0b01110100101010001, ++ xvmadd_w_op = 0b01110100101010010, ++ xvmadd_d_op = 0b01110100101010011, ++ xvmsub_b_op = 0b01110100101010100, ++ xvmsub_h_op = 0b01110100101010101, ++ xvmsub_w_op = 0b01110100101010110, ++ xvmsub_d_op = 0b01110100101010111, ++ xvsll_b_op = 0b01110100111010000, ++ xvsll_h_op = 0b01110100111010001, ++ xvsll_w_op = 0b01110100111010010, ++ xvsll_d_op = 0b01110100111010011, ++ xvsrl_b_op = 0b01110100111010100, ++ xvsrl_h_op = 0b01110100111010101, ++ xvsrl_w_op = 0b01110100111010110, ++ xvsrl_d_op = 0b01110100111010111, ++ xvsra_b_op = 0b01110100111011000, ++ xvsra_h_op = 0b01110100111011001, ++ xvsra_w_op = 0b01110100111011010, ++ xvsra_d_op = 0b01110100111011011, ++ xvrotr_b_op = 0b01110100111011100, ++ xvrotr_h_op = 0b01110100111011101, ++ xvrotr_w_op = 0b01110100111011110, ++ xvrotr_d_op = 0b01110100111011111, ++ xvbitclr_b_op = 0b01110101000011000, ++ xvbitclr_h_op = 0b01110101000011001, ++ xvbitclr_w_op = 0b01110101000011010, ++ xvbitclr_d_op = 0b01110101000011011, ++ xvbitset_b_op = 0b01110101000011100, ++ xvbitset_h_op = 0b01110101000011101, ++ xvbitset_w_op = 0b01110101000011110, ++ xvbitset_d_op = 0b01110101000011111, ++ xvbitrev_b_op = 0b01110101000100000, ++ xvbitrev_h_op = 0b01110101000100001, ++ xvbitrev_w_op = 0b01110101000100010, ++ xvbitrev_d_op = 0b01110101000100011, ++ xvand_v_op = 0b01110101001001100, ++ xvor_v_op = 0b01110101001001101, ++ xvxor_v_op = 0b01110101001001110, ++ xvnor_v_op = 0b01110101001001111, ++ xvandn_v_op = 0b01110101001010000, ++ xvorn_v_op = 0b01110101001010001, ++ xvadd_q_op = 0b01110101001011010, ++ xvsub_q_op = 0b01110101001011011, ++ xvfadd_s_op = 0b01110101001100001, ++ xvfadd_d_op = 0b01110101001100010, ++ xvfsub_s_op = 0b01110101001100101, ++ xvfsub_d_op = 0b01110101001100110, ++ xvfmul_s_op = 0b01110101001110001, ++ xvfmul_d_op = 0b01110101001110010, ++ xvfdiv_s_op = 0b01110101001110101, ++ xvfdiv_d_op = 0b01110101001110110, ++ xvfmax_s_op = 0b01110101001111001, ++ xvfmax_d_op = 0b01110101001111010, ++ xvfmin_s_op = 0b01110101001111101, ++ xvfmin_d_op = 0b01110101001111110, ++ xvfcvt_h_s_op = 0b01110101010001100, ++ xvfcvt_s_d_op = 0b01110101010001101, ++ xvffint_s_l_op = 0b01110101010010000, ++ xvftint_w_d_op = 0b01110101010010011, ++ xvftintrm_w_d_op = 0b01110101010010100, ++ xvftintrp_w_d_op = 0b01110101010010101, ++ xvftintrz_w_d_op = 0b01110101010010110, ++ xvftintrne_w_d_op = 0b01110101010010111, ++ xvshuf_h_op = 0b01110101011110101, ++ xvshuf_w_op = 0b01110101011110110, ++ xvshuf_d_op = 0b01110101011110111, ++ xvperm_w_op = 0b01110101011111010, ++ xvslti_bu_op = 0b01110110100010000, ++ xvslti_hu_op = 0b01110110100010001, ++ xvslti_wu_op = 0b01110110100010010, ++ xvslti_du_op = 0b01110110100010011, ++ xvaddi_bu_op = 0b01110110100010100, ++ xvaddi_hu_op = 0b01110110100010101, ++ xvaddi_wu_op = 0b01110110100010110, ++ xvaddi_du_op = 0b01110110100010111, ++ xvsubi_bu_op = 0b01110110100011000, ++ xvsubi_hu_op = 0b01110110100011001, ++ xvsubi_wu_op = 0b01110110100011010, ++ xvsubi_du_op = 0b01110110100011011, ++ xvrotri_w_op = 0b01110110101000001, ++ xvbitclri_w_op = 0b01110111000100001, ++ xvbitseti_w_op = 0b01110111000101001, ++ xvbitrevi_w_op = 0b01110111000110001, ++ xvslli_w_op = 0b01110111001011001, ++ xvsrli_w_op = 0b01110111001100001, ++ xvsrai_w_op = 0b01110111001101001, ++ ++ unknow_ops17 = 0b11111111111111111 ++ }; ++ ++ // 16-bit opcode, highest 16 bits: bits[31...16] ++ enum ops16 { ++ vrotri_d_op = 0b0111001010100001, ++ vbitclri_d_op = 0b0111001100010001, ++ vbitseti_d_op = 0b0111001100010101, ++ vbitrevi_d_op = 0b0111001100011001, ++ vslli_d_op = 0b0111001100101101, ++ vsrli_d_op = 0b0111001100110001, ++ vsrai_d_op = 0b0111001100110101, ++ vsrlni_w_d_op = 0b0111001101000001, ++ xvrotri_d_op = 0b0111011010100001, ++ xvbitclri_d_op = 0b0111011100010001, ++ xvbitseti_d_op = 0b0111011100010101, ++ xvbitrevi_d_op = 0b0111011100011001, ++ xvslli_d_op = 0b0111011100101101, ++ xvsrli_d_op = 0b0111011100110001, ++ xvsrai_d_op = 0b0111011100110101, ++ ++ unknow_ops16 = 0b1111111111111111 ++ }; ++ ++ // 15-bit opcode, highest 15 bits: bits[31...17] ++ enum ops15 { ++ vsrlni_d_q_op = 0b011100110100001, ++ ++ unknow_ops15 = 0b111111111111111 ++ }; ++ ++ // 14-bit opcode, highest 14 bits: bits[31...18] ++ enum ops14 { ++ alsl_w_op = 0b00000000000001, ++ bytepick_w_op = 0b00000000000010, ++ bytepick_d_op = 0b00000000000011, ++ alsl_d_op = 0b00000000001011, ++ slli_op = 0b00000000010000, ++ srli_op = 0b00000000010001, ++ srai_op = 0b00000000010010, ++ rotri_op = 0b00000000010011, ++ lddir_op = 0b00000110010000, ++ ldpte_op = 0b00000110010001, ++ vshuf4i_b_op = 0b01110011100100, ++ vshuf4i_h_op = 0b01110011100101, ++ vshuf4i_w_op = 0b01110011100110, ++ vshuf4i_d_op = 0b01110011100111, ++ vandi_b_op = 0b01110011110100, ++ vori_b_op = 0b01110011110101, ++ vxori_b_op = 0b01110011110110, ++ vnori_b_op = 0b01110011110111, ++ vldi_op = 0b01110011111000, ++ vpermi_w_op = 0b01110011111001, ++ xvshuf4i_b_op = 0b01110111100100, ++ xvshuf4i_h_op = 0b01110111100101, ++ xvshuf4i_w_op = 0b01110111100110, ++ xvshuf4i_d_op = 0b01110111100111, ++ xvandi_b_op = 0b01110111110100, ++ xvori_b_op = 0b01110111110101, ++ xvxori_b_op = 0b01110111110110, ++ xvnori_b_op = 0b01110111110111, ++ xvldi_op = 0b01110111111000, ++ xvpermi_w_op = 0b01110111111001, ++ xvpermi_d_op = 0b01110111111010, ++ xvpermi_q_op = 0b01110111111011, ++ ++ unknow_ops14 = 0b11111111111111 ++ }; ++ ++ // 12-bit opcode, highest 12 bits: bits[31...20] ++ enum ops12 { ++ fmadd_s_op = 0b000010000001, ++ fmadd_d_op = 0b000010000010, ++ fmsub_s_op = 0b000010000101, ++ fmsub_d_op = 0b000010000110, ++ fnmadd_s_op = 0b000010001001, ++ fnmadd_d_op = 0b000010001010, ++ fnmsub_s_op = 0b000010001101, ++ fnmsub_d_op = 0b000010001110, ++ vfmadd_s_op = 0b000010010001, ++ vfmadd_d_op = 0b000010010010, ++ vfmsub_s_op = 0b000010010101, ++ vfmsub_d_op = 0b000010010110, ++ vfnmadd_s_op = 0b000010011001, ++ vfnmadd_d_op = 0b000010011010, ++ vfnmsub_s_op = 0b000010011101, ++ vfnmsub_d_op = 0b000010011110, ++ xvfmadd_s_op = 0b000010100001, ++ xvfmadd_d_op = 0b000010100010, ++ xvfmsub_s_op = 0b000010100101, ++ xvfmsub_d_op = 0b000010100110, ++ xvfnmadd_s_op = 0b000010101001, ++ xvfnmadd_d_op = 0b000010101010, ++ xvfnmsub_s_op = 0b000010101101, ++ xvfnmsub_d_op = 0b000010101110, ++ fcmp_cond_s_op = 0b000011000001, ++ fcmp_cond_d_op = 0b000011000010, ++ vfcmp_cond_s_op = 0b000011000101, ++ vfcmp_cond_d_op = 0b000011000110, ++ xvfcmp_cond_s_op = 0b000011001001, ++ xvfcmp_cond_d_op = 0b000011001010, ++ fsel_op = 0b000011010000, ++ vbitsel_v_op = 0b000011010001, ++ xvbitsel_v_op = 0b000011010010, ++ vshuf_b_op = 0b000011010101, ++ xvshuf_b_op = 0b000011010110, ++ ++ unknow_ops12 = 0b111111111111 ++ }; ++ ++ // 10-bit opcode, highest 10 bits: bits[31...22] ++ enum ops10 { ++ bstr_w_op = 0b0000000001, ++ bstrins_d_op = 0b0000000010, ++ bstrpick_d_op = 0b0000000011, ++ slti_op = 0b0000001000, ++ sltui_op = 0b0000001001, ++ addi_w_op = 0b0000001010, ++ addi_d_op = 0b0000001011, ++ lu52i_d_op = 0b0000001100, ++ andi_op = 0b0000001101, ++ ori_op = 0b0000001110, ++ xori_op = 0b0000001111, ++ ld_b_op = 0b0010100000, ++ ld_h_op = 0b0010100001, ++ ld_w_op = 0b0010100010, ++ ld_d_op = 0b0010100011, ++ st_b_op = 0b0010100100, ++ st_h_op = 0b0010100101, ++ st_w_op = 0b0010100110, ++ st_d_op = 0b0010100111, ++ ld_bu_op = 0b0010101000, ++ ld_hu_op = 0b0010101001, ++ ld_wu_op = 0b0010101010, ++ preld_op = 0b0010101011, ++ fld_s_op = 0b0010101100, ++ fst_s_op = 0b0010101101, ++ fld_d_op = 0b0010101110, ++ fst_d_op = 0b0010101111, ++ vld_op = 0b0010110000, ++ vst_op = 0b0010110001, ++ xvld_op = 0b0010110010, ++ xvst_op = 0b0010110011, ++ ldl_w_op = 0b0010111000, ++ ldr_w_op = 0b0010111001, ++ ++ unknow_ops10 = 0b1111111111 ++ }; ++ ++ // 8-bit opcode, highest 8 bits: bits[31...22] ++ enum ops8 { ++ ll_w_op = 0b00100000, ++ sc_w_op = 0b00100001, ++ ll_d_op = 0b00100010, ++ sc_d_op = 0b00100011, ++ ldptr_w_op = 0b00100100, ++ stptr_w_op = 0b00100101, ++ ldptr_d_op = 0b00100110, ++ stptr_d_op = 0b00100111, ++ csr_op = 0b00000100, ++ ++ unknow_ops8 = 0b11111111 ++ }; ++ ++ // 7-bit opcode, highest 7 bits: bits[31...25] ++ enum ops7 { ++ lu12i_w_op = 0b0001010, ++ lu32i_d_op = 0b0001011, ++ pcaddi_op = 0b0001100, ++ pcalau12i_op = 0b0001101, ++ pcaddu12i_op = 0b0001110, ++ pcaddu18i_op = 0b0001111, ++ ++ unknow_ops7 = 0b1111111 ++ }; ++ ++ // 6-bit opcode, highest 6 bits: bits[31...25] ++ enum ops6 { ++ addu16i_d_op = 0b000100, ++ beqz_op = 0b010000, ++ bnez_op = 0b010001, ++ bccondz_op = 0b010010, ++ jirl_op = 0b010011, ++ b_op = 0b010100, ++ bl_op = 0b010101, ++ beq_op = 0b010110, ++ bne_op = 0b010111, ++ blt_op = 0b011000, ++ bge_op = 0b011001, ++ bltu_op = 0b011010, ++ bgeu_op = 0b011011, ++ ++ unknow_ops6 = 0b111111 ++ }; ++ ++ enum fcmp_cond { ++ fcmp_caf = 0x00, ++ fcmp_cun = 0x08, ++ fcmp_ceq = 0x04, ++ fcmp_cueq = 0x0c, ++ fcmp_clt = 0x02, ++ fcmp_cult = 0x0a, ++ fcmp_cle = 0x06, ++ fcmp_cule = 0x0e, ++ fcmp_cne = 0x10, ++ fcmp_cor = 0x14, ++ fcmp_cune = 0x18, ++ fcmp_saf = 0x01, ++ fcmp_sun = 0x09, ++ fcmp_seq = 0x05, ++ fcmp_sueq = 0x0d, ++ fcmp_slt = 0x03, ++ fcmp_sult = 0x0b, ++ fcmp_sle = 0x07, ++ fcmp_sule = 0x0f, ++ fcmp_sne = 0x11, ++ fcmp_sor = 0x15, ++ fcmp_sune = 0x19 ++ }; ++ ++ enum Condition { ++ zero , ++ notZero , ++ equal , ++ notEqual , ++ less , ++ lessEqual , ++ greater , ++ greaterEqual , ++ below , ++ belowEqual , ++ above , ++ aboveEqual ++ }; ++ ++ static const int LogInstructionSize = 2; ++ static const int InstructionSize = 1 << LogInstructionSize; ++ ++ enum WhichOperand { ++ // input to locate_operand, and format code for relocations ++ imm_operand = 0, // embedded 32-bit|64-bit immediate operand ++ disp32_operand = 1, // embedded 32-bit displacement or address ++ call32_operand = 2, // embedded 32-bit self-relative displacement ++ narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop ++ _WhichOperand_limit = 4 ++ }; ++ ++ static int low (int x, int l) { return bitfield(x, 0, l); } ++ static int low16(int x) { return low(x, 16); } ++ static int low26(int x) { return low(x, 26); } ++ ++ static int high (int x, int l) { return bitfield(x, 32-l, l); } ++ static int high16(int x) { return high(x, 16); } ++ static int high6 (int x) { return high(x, 6); } ++ ++ ++ static ALWAYSINLINE void patch(address a, int length, uint32_t val) { ++ guarantee(val < (1ULL << length), "Field too big for insn"); ++ guarantee(length > 0, "length > 0"); ++ unsigned target = *(unsigned *)a; ++ target = (target >> length) << length; ++ target |= val; ++ *(unsigned *)a = target; ++ } ++ ++ protected: ++ // help methods for instruction ejection ++ ++ // 2R-type ++ // 31 10 9 5 4 0 ++ // | opcode | rj | rd | ++ static inline int insn_RR (int op, int rj, int rd) { return (op<<10) | (rj<<5) | rd; } ++ ++ // 3R-type ++ // 31 15 14 10 9 5 4 0 ++ // | opcode | rk | rj | rd | ++ static inline int insn_RRR (int op, int rk, int rj, int rd) { return (op<<15) | (rk<<10) | (rj<<5) | rd; } ++ ++ // 4R-type ++ // 31 20 19 15 14 10 9 5 4 0 ++ // | opcode | ra | rk | rj | rd | ++ static inline int insn_RRRR (int op, int ra, int rk, int rj, int rd) { return (op<<20) | (ra << 15) | (rk<<10) | (rj<<5) | rd; } ++ ++ // 2RI1-type ++ // 31 11 10 9 5 4 0 ++ // | opcode | I1 | vj | rd | ++ static inline int insn_I1RR (int op, int ui1, int vj, int rd) { assert(is_uimm(ui1, 1), "not a unsigned 1-bit int"); return (op<<11) | (low(ui1, 1)<<10) | (vj<<5) | rd; } ++ ++ // 2RI2-type ++ // 31 12 11 10 9 5 4 0 ++ // | opcode | I2 | vj | rd | ++ static inline int insn_I2RR (int op, int ui2, int vj, int rd) { assert(is_uimm(ui2, 2), "not a unsigned 2-bit int"); return (op<<12) | (low(ui2, 2)<<10) | (vj<<5) | rd; } ++ ++ // 2RI3-type ++ // 31 13 12 10 9 5 4 0 ++ // | opcode | I3 | vj | vd | ++ static inline int insn_I3RR (int op, int ui3, int vj, int vd) { assert(is_uimm(ui3, 3), "not a unsigned 3-bit int"); return (op<<13) | (low(ui3, 3)<<10) | (vj<<5) | vd; } ++ ++ // 2RI4-type ++ // 31 14 13 10 9 5 4 0 ++ // | opcode | I4 | vj | vd | ++ static inline int insn_I4RR (int op, int ui4, int vj, int vd) { assert(is_uimm(ui4, 4), "not a unsigned 4-bit int"); return (op<<14) | (low(ui4, 4)<<10) | (vj<<5) | vd; } ++ ++ // 2RI5-type ++ // 31 15 14 10 9 5 4 0 ++ // | opcode | I5 | vj | vd | ++ static inline int insn_I5RR (int op, int ui5, int vj, int vd) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); return (op<<15) | (low(ui5, 5)<<10) | (vj<<5) | vd; } ++ ++ // 2RI6-type ++ // 31 16 15 10 9 5 4 0 ++ // | opcode | I6 | vj | vd | ++ static inline int insn_I6RR (int op, int ui6, int vj, int vd) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); return (op<<16) | (low(ui6, 6)<<10) | (vj<<5) | vd; } ++ ++ // 2RI7-type ++ // 31 17 16 10 9 5 4 0 ++ // | opcode | I7 | vj | vd | ++ static inline int insn_I7RR (int op, int ui7, int vj, int vd) { assert(is_uimm(ui7, 7), "not a unsigned 7-bit int"); return (op<<17) | (low(ui7, 6)<<10) | (vj<<5) | vd; } ++ ++ // 2RI8-type ++ // 31 18 17 10 9 5 4 0 ++ // | opcode | I8 | rj | rd | ++ static inline int insn_I8RR (int op, int imm8, int rj, int rd) { /*assert(is_simm(imm8, 8), "not a signed 8-bit int");*/ return (op<<18) | (low(imm8, 8)<<10) | (rj<<5) | rd; } ++ ++ // 2RI12-type ++ // 31 22 21 10 9 5 4 0 ++ // | opcode | I12 | rj | rd | ++ static inline int insn_I12RR(int op, int imm12, int rj, int rd) { /* assert(is_simm(imm12, 12), "not a signed 12-bit int");*/ return (op<<22) | (low(imm12, 12)<<10) | (rj<<5) | rd; } ++ ++ // 2RI14-type ++ // 31 24 23 10 9 5 4 0 ++ // | opcode | I14 | rj | rd | ++ static inline int insn_I14RR(int op, int imm14, int rj, int rd) { assert(is_simm(imm14, 14), "not a signed 14-bit int"); return (op<<24) | (low(imm14, 14)<<10) | (rj<<5) | rd; } ++ ++ // 2RI16-type ++ // 31 26 25 10 9 5 4 0 ++ // | opcode | I16 | rj | rd | ++ static inline int insn_I16RR(int op, int imm16, int rj, int rd) { assert(is_simm16(imm16), "not a signed 16-bit int"); return (op<<26) | (low16(imm16)<<10) | (rj<<5) | rd; } ++ ++ // 1RI13-type (?) ++ // 31 18 17 5 4 0 ++ // | opcode | I13 | vd | ++ static inline int insn_I13R (int op, int imm13, int vd) { assert(is_simm(imm13, 13), "not a signed 13-bit int"); return (op<<18) | (low(imm13, 13)<<5) | vd; } ++ ++ // 1RI20-type (?) ++ // 31 25 24 5 4 0 ++ // | opcode | I20 | rd | ++ static inline int insn_I20R (int op, int imm20, int rd) { assert(is_simm(imm20, 20), "not a signed 20-bit int"); return (op<<25) | (low(imm20, 20)<<5) | rd; } ++ ++ // 1RI21-type ++ // 31 26 25 10 9 5 4 0 ++ // | opcode | I21[15:0] | rj |I21[20:16]| ++ static inline int insn_IRI(int op, int imm21, int rj) { assert(is_simm(imm21, 21), "not a signed 21-bit int"); return (op << 26) | (low16(imm21) << 10) | (rj << 5) | low(imm21 >> 16, 5); } ++ ++ // I26-type ++ // 31 26 25 10 9 0 ++ // | opcode | I26[15:0] | I26[25:16] | ++ static inline int insn_I26(int op, int imm26) { assert(is_simm(imm26, 26), "not a signed 26-bit int"); return (op << 26) | (low16(imm26) << 10) | low(imm26 >> 16, 10); } ++ ++ // imm15 ++ // 31 15 14 0 ++ // | opcode | I15 | ++ static inline int insn_I15 (int op, int imm15) { assert(is_uimm(imm15, 15), "not a unsigned 15-bit int"); return (op<<15) | low(imm15, 15); } ++ ++ ++ // get the offset field of beq, bne, blt[u], bge[u] instruction ++ int offset16(address entry) { ++ assert(is_simm16((entry - pc()) / 4), "change this code"); ++ if (!is_simm16((entry - pc()) / 4)) { ++ tty->print_cr("!!! is_simm16: %lx", (entry - pc()) / 4); ++ } ++ return (entry - pc()) / 4; ++ } ++ ++ // get the offset field of beqz, bnez instruction ++ int offset21(address entry) { ++ assert(is_simm((int)(entry - pc()) / 4, 21), "change this code"); ++ if (!is_simm((int)(entry - pc()) / 4, 21)) { ++ tty->print_cr("!!! is_simm21: %lx", (entry - pc()) / 4); ++ } ++ return (entry - pc()) / 4; ++ } ++ ++ // get the offset field of b instruction ++ int offset26(address entry) { ++ assert(is_simm((int)(entry - pc()) / 4, 26), "change this code"); ++ if (!is_simm((int)(entry - pc()) / 4, 26)) { ++ tty->print_cr("!!! is_simm26: %lx", (entry - pc()) / 4); ++ } ++ return (entry - pc()) / 4; ++ } ++ ++public: ++ using AbstractAssembler::offset; ++ ++ //sign expand with the sign bit is h ++ static int expand(int x, int h) { return -(x & (1<> 16; ++ } ++ ++ static int split_high16(int x) { ++ return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff; ++ } ++ ++ static int split_low20(int x) { ++ return (x & 0xfffff); ++ } ++ ++ // Convert 20-bit x to a sign-extended 20-bit integer ++ static int simm20(int x) { ++ assert(x == (x & 0xFFFFF), "must be 20-bit only"); ++ return (x << 12) >> 12; ++ } ++ ++ static int split_low12(int x) { ++ return (x & 0xfff); ++ } ++ ++ static inline void split_simm32(jlong si32, jint& si12, jint& si20) { ++ si12 = ((jint)(si32 & 0xfff) << 20) >> 20; ++ si32 += (si32 & 0x800) << 1; ++ si20 = si32 >> 12; ++ } ++ ++ static inline void split_simm38(jlong si38, jint& si18, jint& si20) { ++ si18 = ((jint)(si38 & 0x3ffff) << 14) >> 14; ++ si38 += (si38 & 0x20000) << 1; ++ si20 = si38 >> 18; ++ } ++ ++ // Convert 12-bit x to a sign-extended 12-bit integer ++ static int simm12(int x) { ++ assert(x == (x & 0xFFF), "must be 12-bit only"); ++ return (x << 20) >> 20; ++ } ++ ++ // Convert 26-bit x to a sign-extended 26-bit integer ++ static int simm26(int x) { ++ assert(x == (x & 0x3FFFFFF), "must be 26-bit only"); ++ return (x << 6) >> 6; ++ } ++ ++ static intptr_t merge(intptr_t x0, intptr_t x12) { ++ //lu12i, ori ++ return (((x12 << 12) | x0) << 32) >> 32; ++ } ++ ++ static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32) { ++ //lu32i, lu12i, ori ++ return (((x32 << 32) | (x12 << 12) | x0) << 12) >> 12; ++ } ++ ++ static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32, intptr_t x52) { ++ //lu52i, lu32i, lu12i, ori ++ return (x52 << 52) | (x32 << 32) | (x12 << 12) | x0; ++ } ++ ++ // Test if x is within signed immediate range for nbits. ++ static bool is_simm (int x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 32, "out of bounds"); ++ const int min = -( ((int)1) << nbits-1 ); ++ const int maxplus1 = ( ((int)1) << nbits-1 ); ++ return min <= x && x < maxplus1; ++ } ++ ++ static bool is_simm(jlong x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 64, "out of bounds"); ++ const jlong min = -( ((jlong)1) << nbits-1 ); ++ const jlong maxplus1 = ( ((jlong)1) << nbits-1 ); ++ return min <= x && x < maxplus1; ++ } ++ ++ static bool is_simm16(int x) { return is_simm(x, 16); } ++ static bool is_simm16(long x) { return is_simm((jlong)x, (unsigned int)16); } ++ ++ // Test if x is within unsigned immediate range for nbits ++ static bool is_uimm(int x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 32, "out of bounds"); ++ const int maxplus1 = ( ((int)1) << nbits ); ++ return 0 <= x && x < maxplus1; ++ } ++ ++ static bool is_uimm(jlong x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 64, "out of bounds"); ++ const jlong maxplus1 = ( ((jlong)1) << nbits ); ++ return 0 <= x && x < maxplus1; ++ } ++ ++public: ++ ++ void flush() { ++ AbstractAssembler::flush(); ++ } ++ ++ inline void emit_int32(int x) { ++ AbstractAssembler::emit_int32(x); ++ } ++ ++ inline void emit_data(int x) { emit_int32(x); } ++ inline void emit_data(int x, relocInfo::relocType rtype) { ++ relocate(rtype); ++ emit_int32(x); ++ } ++ ++ inline void emit_data(int x, RelocationHolder const& rspec) { ++ relocate(rspec); ++ emit_int32(x); ++ } ++ ++ inline void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0) { ++ if (rtype == relocInfo::none) { ++ emit_int64(data); ++ } else { ++ emit_data64(data, Relocation::spec_simple(rtype), format); ++ } ++ } ++ ++ inline void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0) { ++ assert(inst_mark() != NULL, "must be inside InstructionMark"); ++ // Do not use AbstractAssembler::relocate, which is not intended for ++ // embedded words. Instead, relocate to the enclosing instruction. ++ code_section()->relocate(inst_mark(), rspec, format); ++ emit_int64(data); ++ } ++ ++ //---< calculate length of instruction >--- ++ // With LoongArch being a RISC architecture, this always is BytesPerInstWord ++ // instruction must start at passed address ++ static unsigned int instr_len(unsigned char *instr) { return BytesPerInstWord; } ++ ++ //---< longest instructions >--- ++ static unsigned int instr_maxlen() { return BytesPerInstWord; } ++ ++ ++ // Generic instructions ++ // Does 32bit or 64bit as needed for the platform. In some sense these ++ // belong in macro assembler but there is no need for both varieties to exist ++ ++ void clo_w (Register rd, Register rj) { emit_int32(insn_RR(clo_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void clz_w (Register rd, Register rj) { emit_int32(insn_RR(clz_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void cto_w (Register rd, Register rj) { emit_int32(insn_RR(cto_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void ctz_w (Register rd, Register rj) { emit_int32(insn_RR(ctz_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void clo_d (Register rd, Register rj) { emit_int32(insn_RR(clo_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void clz_d (Register rd, Register rj) { emit_int32(insn_RR(clz_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void cto_d (Register rd, Register rj) { emit_int32(insn_RR(cto_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void ctz_d (Register rd, Register rj) { emit_int32(insn_RR(ctz_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void revb_2h(Register rd, Register rj) { emit_int32(insn_RR(revb_2h_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revb_4h(Register rd, Register rj) { emit_int32(insn_RR(revb_4h_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revb_2w(Register rd, Register rj) { emit_int32(insn_RR(revb_2w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revb_d (Register rd, Register rj) { emit_int32(insn_RR( revb_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revh_2w(Register rd, Register rj) { emit_int32(insn_RR(revh_2w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revh_d (Register rd, Register rj) { emit_int32(insn_RR( revh_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void bitrev_4b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_4b_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void bitrev_8b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_8b_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void bitrev_w (Register rd, Register rj) { emit_int32(insn_RR(bitrev_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void bitrev_d (Register rd, Register rj) { emit_int32(insn_RR(bitrev_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void ext_w_h(Register rd, Register rj) { emit_int32(insn_RR(ext_w_h_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void ext_w_b(Register rd, Register rj) { emit_int32(insn_RR(ext_w_b_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void rdtimel_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimel_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void rdtimeh_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimeh_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void rdtime_d(Register rd, Register rj) { emit_int32(insn_RR(rdtime_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void cpucfg(Register rd, Register rj) { emit_int32(insn_RR(cpucfg_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void asrtle_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtle_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); } ++ void asrtgt_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtgt_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); } ++ ++ void alsl_w(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ void alsl_wu(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_w_op, ( (1 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bytepick_w(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(bytepick_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bytepick_d(Register rd, Register rj, Register rk, int sa3) { assert(is_uimm(sa3, 3), "not a unsigned 3-bit int"); emit_int32(insn_I8RR(bytepick_d_op, ( (sa3 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void add_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void add_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sub_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sub_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void slt (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(slt_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sltu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sltu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void maskeqz (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(maskeqz_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void masknez (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(masknez_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void nor (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(nor_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void AND (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(and_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void OR (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(or_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void XOR (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(xor_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void orn (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(orn_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void andn(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(andn_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void sll_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void srl_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sra_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sll_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void srl_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sra_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void rotr_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void rotr_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void mul_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulh_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulh_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mul_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulh_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulh_du (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulw_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulw_d_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void div_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mod_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void div_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mod_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void div_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mod_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void div_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mod_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void crc_w_b_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_b_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crc_w_h_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_h_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crc_w_w_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_w_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crc_w_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crcc_w_b_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_b_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crcc_w_h_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_h_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crcc_w_w_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_w_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crcc_w_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void brk(int code) { assert(is_uimm(code, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(break_op, code)); } ++ ++ void alsl_d(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_d_op, ( (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void slli_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void slli_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void srli_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void srli_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void srai_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void srai_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void rotri_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void rotri_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void bstrins_w (Register rd, Register rj, int msbw, int lsbw) { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (0<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bstrpick_w (Register rd, Register rj, int msbw, int lsbw) { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (1<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bstrins_d (Register rd, Register rj, int msbd, int lsbd) { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrins_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bstrpick_d (Register rd, Register rj, int msbd, int lsbd) { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrpick_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void fadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmul_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmul_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fdiv_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fdiv_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmax_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmax_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmin_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmin_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmaxa_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmaxa_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmina_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmina_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void fscaleb_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fscaleb_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fcopysign_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fcopysign_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void fabs_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fabs_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fabs_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fabs_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fneg_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fneg_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fneg_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fneg_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void flogb_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(flogb_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void flogb_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(flogb_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fclass_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fclass_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fclass_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fclass_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fsqrt_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fsqrt_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frecip_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frecip_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frecip_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frecip_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frsqrt_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frsqrt_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fmov_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fmov_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fmov_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fmov_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void movgr2fr_w (FloatRegister fd, Register rj) { emit_int32(insn_RR(movgr2fr_w_op, (int)rj->encoding(), (int)fd->encoding())); } ++ void movgr2fr_d (FloatRegister fd, Register rj) { emit_int32(insn_RR(movgr2fr_d_op, (int)rj->encoding(), (int)fd->encoding())); } ++ void movgr2frh_w(FloatRegister fd, Register rj) { emit_int32(insn_RR(movgr2frh_w_op, (int)rj->encoding(), (int)fd->encoding())); } ++ void movfr2gr_s (Register rd, FloatRegister fj) { emit_int32(insn_RR(movfr2gr_s_op, (int)fj->encoding(), (int)rd->encoding())); } ++ void movfr2gr_d (Register rd, FloatRegister fj) { emit_int32(insn_RR(movfr2gr_d_op, (int)fj->encoding(), (int)rd->encoding())); } ++ void movfrh2gr_s(Register rd, FloatRegister fj) { emit_int32(insn_RR(movfrh2gr_s_op, (int)fj->encoding(), (int)rd->encoding())); } ++ void movgr2fcsr (int fcsr, Register rj) { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movgr2fcsr_op, (int)rj->encoding(), fcsr)); } ++ void movfcsr2gr (Register rd, int fcsr) { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movfcsr2gr_op, fcsr, (int)rd->encoding())); } ++ void movfr2cf (ConditionalFlagRegister cd, FloatRegister fj) { emit_int32(insn_RR(movfr2cf_op, (int)fj->encoding(), (int)cd->encoding())); } ++ void movcf2fr (FloatRegister fd, ConditionalFlagRegister cj) { emit_int32(insn_RR(movcf2fr_op, (int)cj->encoding(), (int)fd->encoding())); } ++ void movgr2cf (ConditionalFlagRegister cd, Register rj) { emit_int32(insn_RR(movgr2cf_op, (int)rj->encoding(), (int)cd->encoding())); } ++ void movcf2gr (Register rd, ConditionalFlagRegister cj) { emit_int32(insn_RR(movcf2gr_op, (int)cj->encoding(), (int)rd->encoding())); } ++ ++ void fcvt_s_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fcvt_s_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fcvt_d_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fcvt_d_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void ftintrm_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrm_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrm_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrm_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrp_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrp_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrp_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrp_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrz_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrz_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrz_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrz_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrne_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrne_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrne_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrne_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftint_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftint_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftint_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftint_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ffint_s_w(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_s_w_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ffint_s_l(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_s_l_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ffint_d_w(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_d_w_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ffint_d_l(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_d_l_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frint_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frint_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frint_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frint_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void slti (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(slti_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void sltui (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(sltui_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void addi_w(Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void addi_d(Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void lu52i_d(Register rd, Register rj, int si12) { /*assert(is_simm(si12, 12), "not a signed 12-bit int");*/ emit_int32(insn_I12RR(lu52i_d_op, simm12(si12), (int)rj->encoding(), (int)rd->encoding())); } ++ void andi (Register rd, Register rj, int ui12) { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(andi_op, ui12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ori (Register rd, Register rj, int ui12) { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(ori_op, ui12, (int)rj->encoding(), (int)rd->encoding())); } ++ void xori (Register rd, Register rj, int ui12) { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(xori_op, ui12, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void fmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fnmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fnmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fnmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fnmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void fcmp_caf_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cun_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_ceq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_clt_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cle_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cne_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cor_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_saf_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sun_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_seq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_slt_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sle_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sne_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sor_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ ++ void fcmp_caf_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cun_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_ceq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_clt_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cle_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cne_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cor_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_saf_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sun_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_seq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_slt_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sle_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sne_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sor_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ ++ void fsel (FloatRegister fd, FloatRegister fj, FloatRegister fk, ConditionalFlagRegister ca) { emit_int32(insn_RRRR(fsel_op, (int)ca->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void addu16i_d(Register rj, Register rd, int si16) { assert(is_simm(si16, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(addu16i_d_op, si16, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void lu12i_w(Register rj, int si20) { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu12i_w_op, simm20(si20), (int)rj->encoding())); } ++ void lu32i_d(Register rj, int si20) { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu32i_d_op, simm20(si20), (int)rj->encoding())); } ++ void pcaddi(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddi_op, si20, (int)rj->encoding())); } ++ void pcalau12i(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcalau12i_op, si20, (int)rj->encoding())); } ++ void pcaddu12i(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu12i_op, si20, (int)rj->encoding())); } ++ void pcaddu18i(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu18i_op, si20, (int)rj->encoding())); } ++ ++ void ll_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void sc_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void ll_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void sc_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void ldptr_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void stptr_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void ldptr_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void stptr_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void csrrd (Register rd, int csr) { emit_int32(insn_I14RR(csr_op, csr, 0, (int)rd->encoding())); } ++ void csrwr (Register rd, int csr) { emit_int32(insn_I14RR(csr_op, csr, 1, (int)rd->encoding())); } ++ ++ void ld_b (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_b_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_h (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_h_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_d (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void st_b (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_b_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void st_h (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_h_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void st_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void st_d (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_bu (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_bu_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_hu (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_hu_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_wu (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_wu_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void preld (int hint, Register rj, int si12) { assert(is_uimm(hint, 5), "not a unsigned 5-bit int"); assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(preld_op, si12, (int)rj->encoding(), hint)); } ++ void fld_s (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); } ++ void fst_s (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); } ++ void fld_d (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); } ++ void fst_d (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); } ++ void ldl_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldl_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ldr_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldr_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void ldx_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stx_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stx_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stx_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stx_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_bu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_bu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_hu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_hu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fldx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } ++ void fldx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } ++ void fstx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } ++ void fstx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } ++ ++ void ld_b (Register rd, Address src); ++ void ld_bu (Register rd, Address src); ++ void ld_d (Register rd, Address src); ++ void ld_h (Register rd, Address src); ++ void ld_hu (Register rd, Address src); ++ void ll_w (Register rd, Address src); ++ void ll_d (Register rd, Address src); ++ void ld_wu (Register rd, Address src); ++ void ld_w (Register rd, Address src); ++ void st_b (Register rd, Address dst); ++ void st_d (Register rd, Address dst); ++ void st_w (Register rd, Address dst); ++ void sc_w (Register rd, Address dst); ++ void sc_d (Register rd, Address dst); ++ void st_h (Register rd, Address dst); ++ void fld_s (FloatRegister fd, Address src); ++ void fld_d (FloatRegister fd, Address src); ++ void fst_s (FloatRegister fd, Address dst); ++ void fst_d (FloatRegister fd, Address dst); ++ ++ void amswap_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amswap_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amadd_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amadd_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rj->encoding())); } ++ void amand_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amand_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amor_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amor_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amxor_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amxor_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_wu (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_du (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_wu (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_du (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amswap_db_w(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amswap_db_d(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amadd_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amadd_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amand_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amand_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amor_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amor_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amxor_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amxor_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void dbar(int hint) { ++ assert(is_uimm(hint, 15), "not a unsigned 15-bit int"); ++ ++ if (os::is_ActiveCoresMP()) ++ andi(R0, R0, 0); ++ else ++ emit_int32(insn_I15(dbar_op, hint)); ++ } ++ void ibar(int hint) { assert(is_uimm(hint, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(ibar_op, hint)); } ++ ++ void fldgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fldle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fstgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fstgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fstle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fstle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void ldgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void beqz(Register rj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(beqz_op, offs, (int)rj->encoding())); } ++ void bnez(Register rj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bnez_op, offs, (int)rj->encoding())); } ++ void bceqz(ConditionalFlagRegister cj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b00<<3) | (int)cj->encoding()))); } ++ void bcnez(ConditionalFlagRegister cj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b01<<3) | (int)cj->encoding()))); } ++ ++ void jirl(Register rd, Register rj, int offs) { assert(is_simm(offs, 18) && ((offs & 3) == 0), "not a signed 18-bit int"); emit_int32(insn_I16RR(jirl_op, offs >> 2, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void b(int offs) { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(b_op, offs)); } ++ void bl(int offs) { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(bl_op, offs)); } ++ ++ ++ void beq(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(beq_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void bne(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bne_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void blt(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(blt_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void bge(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bge_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void bltu(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bltu_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void bgeu(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bgeu_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void beq (Register rj, Register rd, address entry) { beq (rj, rd, offset16(entry)); } ++ void bne (Register rj, Register rd, address entry) { bne (rj, rd, offset16(entry)); } ++ void blt (Register rj, Register rd, address entry) { blt (rj, rd, offset16(entry)); } ++ void bge (Register rj, Register rd, address entry) { bge (rj, rd, offset16(entry)); } ++ void bltu (Register rj, Register rd, address entry) { bltu (rj, rd, offset16(entry)); } ++ void bgeu (Register rj, Register rd, address entry) { bgeu (rj, rd, offset16(entry)); } ++ void beqz (Register rj, address entry) { beqz (rj, offset21(entry)); } ++ void bnez (Register rj, address entry) { bnez (rj, offset21(entry)); } ++ void b(address entry) { b(offset26(entry)); } ++ void bl(address entry) { bl(offset26(entry)); } ++ void bceqz(ConditionalFlagRegister cj, address entry) { bceqz(cj, offset21(entry)); } ++ void bcnez(ConditionalFlagRegister cj, address entry) { bcnez(cj, offset21(entry)); } ++ ++ void beq (Register rj, Register rd, Label& L) { beq (rj, rd, target(L)); } ++ void bne (Register rj, Register rd, Label& L) { bne (rj, rd, target(L)); } ++ void blt (Register rj, Register rd, Label& L) { blt (rj, rd, target(L)); } ++ void bge (Register rj, Register rd, Label& L) { bge (rj, rd, target(L)); } ++ void bltu (Register rj, Register rd, Label& L) { bltu (rj, rd, target(L)); } ++ void bgeu (Register rj, Register rd, Label& L) { bgeu (rj, rd, target(L)); } ++ void beqz (Register rj, Label& L) { beqz (rj, target(L)); } ++ void bnez (Register rj, Label& L) { bnez (rj, target(L)); } ++ void b(Label& L) { b(target(L)); } ++ void bl(Label& L) { bl(target(L)); } ++ void bceqz(ConditionalFlagRegister cj, Label& L) { bceqz(cj, target(L)); } ++ void bcnez(ConditionalFlagRegister cj, Label& L) { bcnez(cj, target(L)); } ++ ++ typedef enum { ++ // hint[4] ++ Completion = 0, ++ Ordering = (1 << 4), ++ ++ // The bitwise-not of the below constants is corresponding to the hint. This is convenient for OR operation. ++ // hint[3:2] and hint[1:0] ++ LoadLoad = ((1 << 3) | (1 << 1)), ++ LoadStore = ((1 << 3) | (1 << 0)), ++ StoreLoad = ((1 << 2) | (1 << 1)), ++ StoreStore = ((1 << 2) | (1 << 0)), ++ AnyAny = ((3 << 2) | (3 << 0)), ++ } Membar_mask_bits; ++ ++ // Serializes memory and blows flags ++ void membar(Membar_mask_bits hint) { ++ assert((hint & (3 << 0)) != 0, "membar mask unsupported!"); ++ assert((hint & (3 << 2)) != 0, "membar mask unsupported!"); ++ dbar(Ordering | (~hint & 0xf)); ++ } ++ ++ // LSX and LASX ++#define ASSERT_LSX assert(UseLSX, ""); ++#define ASSERT_LASX assert(UseLASX, ""); ++ ++ void vadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vadd_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvadd_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsub_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsub_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vaddi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vaddi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vaddi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vaddi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvaddi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvaddi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvaddi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvaddi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsubi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsubi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsubi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsubi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsubi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsubi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsubi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsubi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vneg_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_b_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vneg_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_h_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vneg_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_w_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vneg_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvneg_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvneg_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvneg_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvneg_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vabsd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vabsd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vabsd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vabsd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvabsd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvabsd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvabsd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvabsd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmax_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmax_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmax_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmax_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmax_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmax_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmin_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmin_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmin_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmin_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmin_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmin_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmul_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmul_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmul_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmul_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmul_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmul_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmuh_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmuh_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmuh_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmuh_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmulwev_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwev_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwev_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwev_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmulwev_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwev_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwev_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwev_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmulwod_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwod_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwod_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwod_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmulwod_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwod_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwod_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwod_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vext2xv_h_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_h_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_w_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_d_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_w_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_d_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vext2xv_hu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_hu_bu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_wu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_bu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_du_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_bu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_wu_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_hu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_du_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_hu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_du_wu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_wu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vldi(FloatRegister vd, int i13) { ASSERT_LSX emit_int32(insn_I13R( vldi_op, i13, (int)vd->encoding())); } ++ void xvldi(FloatRegister xd, int i13) { ASSERT_LASX emit_int32(insn_I13R(xvldi_op, i13, (int)xd->encoding())); } ++ ++ void vand_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vand_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvand_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvand_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vxor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vxor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvxor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvxor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vnor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vnor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvnor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvnor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vandn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vandn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvandn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvandn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vorn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vorn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvorn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvorn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vandi_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vandi_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvandi_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvandi_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vxori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vxori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvxori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvxori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vnori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vnori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvnori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvnori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsll_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsll_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsll_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsll_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsll_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsll_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsll_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsll_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vslli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vslli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vslli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vslli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvslli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvslli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvslli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvslli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsrl_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrl_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrl_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrl_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsrl_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrl_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrl_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrl_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsrli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vsrli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vsrli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsrli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vsrli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsrli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsra_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsra_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsra_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsra_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsra_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsra_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsra_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsra_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsrai_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vsrai_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrai_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vsrai_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrai_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsrai_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrai_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vsrai_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsrai_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrai_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrai_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrai_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrai_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrai_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrai_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrai_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vrotr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvrotr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vrotri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vrotri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vrotri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vrotri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vrotri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvrotri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvrotri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvrotri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvrotri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvrotri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsrlni_b_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vsrlni_b_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrlni_h_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsrlni_h_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrlni_w_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vsrlni_w_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrlni_d_q(FloatRegister vd, FloatRegister vj, int ui7) { ASSERT_LSX emit_int32(insn_I7RR( vsrlni_d_q_op, ui7, (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void vpcnt_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_b_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vpcnt_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_h_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vpcnt_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_w_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vpcnt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvpcnt_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvpcnt_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvpcnt_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvpcnt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitclr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitclr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitclri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vbitclri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vbitclri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vbitclri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vbitclri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitclri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitclri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitclri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitclri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitclri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitset_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitset_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitset_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitset_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitset_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitset_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitset_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitset_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitseti_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vbitseti_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitseti_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vbitseti_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitseti_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vbitseti_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitseti_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vbitseti_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitseti_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitseti_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitseti_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitseti_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitseti_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitseti_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitseti_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitseti_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitrev_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrev_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrev_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrev_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitrev_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrev_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrev_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrev_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitrevi_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vbitrevi_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrevi_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vbitrevi_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrevi_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vbitrevi_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrevi_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vbitrevi_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitrevi_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitrevi_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrevi_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitrevi_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrevi_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitrevi_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrevi_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitrevi_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfadd_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfsub_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmul_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmul_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmul_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfdiv_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfdiv_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfdiv_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfdiv_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfdiv_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfdiv_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfnmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfnmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfnmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfnmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfnmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfnmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfnmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfnmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmax_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmax_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmax_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmin_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmin_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmin_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfclass_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfclass_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfclass_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfclass_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfclass_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfclass_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfsqrt_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfsqrt_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfsqrt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfsqrt_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfsqrt_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfsqrt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfcvtl_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvtl_s_h_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vfcvtl_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvtl_d_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvfcvtl_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_s_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcvtl_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_d_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfcvth_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvth_s_h_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vfcvth_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvth_d_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvfcvth_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_s_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcvth_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_d_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfcvt_h_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfcvt_h_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcvt_s_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfcvt_s_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfcvt_h_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_h_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcvt_s_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_s_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrintrne_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrne_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrintrne_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrne_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrintrne_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrintrne_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrintrz_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrz_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrintrz_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrz_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrintrz_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrintrz_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrintrp_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrp_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrintrp_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrp_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrintrp_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrintrp_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrintrm_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrm_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrintrm_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrm_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrintrm_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrintrm_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrint_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrint_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrint_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrint_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrint_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrint_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrne_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrne_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftintrne_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrne_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrne_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftintrne_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrz_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrz_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftintrz_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrz_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrz_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftintrz_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrp_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrp_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftintrp_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrp_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrp_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftintrp_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrm_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrm_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftintrm_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrm_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrm_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftintrm_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftint_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftint_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftint_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftint_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftint_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftint_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrne_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrne_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftintrne_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrne_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrz_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrz_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftintrz_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrz_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrp_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrp_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftintrp_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrp_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrm_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrm_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftintrm_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrm_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftint_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftint_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftint_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftint_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrnel_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrnel_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrnel_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrnel_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrneh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrneh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrneh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrneh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrzl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrzl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrzl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrzh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrzh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrzh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrpl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrpl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrpl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrpl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrph_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrph_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrph_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrph_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrml_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrml_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrml_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrml_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrmh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrmh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrmh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrmh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftinth_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftinth_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftinth_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftinth_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vffint_s_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffint_s_w_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vffint_d_l(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffint_d_l_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvffint_s_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_s_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvffint_d_l(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_d_l_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vffint_s_l(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vffint_s_l_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvffint_s_l(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvffint_s_l_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vffintl_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffintl_d_w_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvffintl_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffintl_d_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vffinth_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffinth_d_w_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvffinth_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffinth_d_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vseq_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vseq_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vseq_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vseq_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvseq_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvseq_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvseq_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvseq_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsle_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsle_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsle_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsle_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vslt_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvslt_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vslt_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvslt_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vslti_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslti_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslti_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslti_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvslti_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslti_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslti_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslti_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfcmp_caf_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cun_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_ceq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_clt_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cle_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cne_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cor_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_saf_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sun_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_seq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_slt_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sle_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sne_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sor_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void vfcmp_caf_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cun_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_ceq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_clt_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cle_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cne_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cor_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_saf_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sun_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_seq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_slt_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sle_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sne_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sor_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void xvfcmp_caf_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cun_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_ceq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_clt_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cle_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cne_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cor_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_saf_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sun_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_seq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_slt_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sle_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sne_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sor_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvfcmp_caf_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cun_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_ceq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_clt_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cle_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cne_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cor_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_saf_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sun_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_seq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_slt_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sle_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sne_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sor_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitsel_v(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vbitsel_v_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitsel_v(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvbitsel_v_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vinsgr2vr_b(FloatRegister vd, Register rj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vinsgr2vr_b_op, ui4, (int)rj->encoding(), (int)vd->encoding())); } ++ void vinsgr2vr_h(FloatRegister vd, Register rj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vinsgr2vr_h_op, ui3, (int)rj->encoding(), (int)vd->encoding())); } ++ void vinsgr2vr_w(FloatRegister vd, Register rj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR( vinsgr2vr_w_op, ui2, (int)rj->encoding(), (int)vd->encoding())); } ++ void vinsgr2vr_d(FloatRegister vd, Register rj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR( vinsgr2vr_d_op, ui1, (int)rj->encoding(), (int)vd->encoding())); } ++ ++ void xvinsgr2vr_w(FloatRegister xd, Register rj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsgr2vr_w_op, ui3, (int)rj->encoding(), (int)xd->encoding())); } ++ void xvinsgr2vr_d(FloatRegister xd, Register rj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsgr2vr_d_op, ui2, (int)rj->encoding(), (int)xd->encoding())); } ++ ++ void vpickve2gr_b(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vpickve2gr_b_op, ui4, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_h(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vpickve2gr_h_op, ui3, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_w(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR( vpickve2gr_w_op, ui2, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_d(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR( vpickve2gr_d_op, ui1, (int)vj->encoding(), (int)rd->encoding())); } ++ ++ void vpickve2gr_bu(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vpickve2gr_bu_op, ui4, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_hu(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vpickve2gr_hu_op, ui3, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_wu(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR( vpickve2gr_wu_op, ui2, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_du(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR( vpickve2gr_du_op, ui1, (int)vj->encoding(), (int)rd->encoding())); } ++ ++ void xvpickve2gr_w(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_w_op, ui3, (int)xj->encoding(), (int)rd->encoding())); } ++ void xvpickve2gr_d(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_d_op, ui2, (int)xj->encoding(), (int)rd->encoding())); } ++ ++ void xvpickve2gr_wu(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_wu_op, ui3, (int)xj->encoding(), (int)rd->encoding())); } ++ void xvpickve2gr_du(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_du_op, ui2, (int)xj->encoding(), (int)rd->encoding())); } ++ ++ void vreplgr2vr_b(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_b_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vreplgr2vr_h(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_h_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vreplgr2vr_w(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_w_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vreplgr2vr_d(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvreplgr2vr_b(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_b_op, (int)rj->encoding(), (int)xd->encoding())); } ++ void xvreplgr2vr_h(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_h_op, (int)rj->encoding(), (int)xd->encoding())); } ++ void xvreplgr2vr_w(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_w_op, (int)rj->encoding(), (int)xd->encoding())); } ++ void xvreplgr2vr_d(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_d_op, (int)rj->encoding(), (int)xd->encoding())); } ++ ++ void vreplvei_b(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR(vreplvei_b_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vreplvei_h(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR(vreplvei_h_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vreplvei_w(FloatRegister vd, FloatRegister vj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR(vreplvei_w_op, ui2, (int)vj->encoding(), (int)vd->encoding())); } ++ void vreplvei_d(FloatRegister vd, FloatRegister vj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR(vreplvei_d_op, ui1, (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void xvreplve0_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvreplve0_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvreplve0_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvreplve0_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvreplve0_q(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_q_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvinsve0_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsve0_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvinsve0_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsve0_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvpickve_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvpickve_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vshuf_b(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vshuf_b_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvshuf_b(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvshuf_b_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vshuf_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vshuf_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vshuf_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vshuf_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vshuf_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vshuf_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void xvshuf_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvshuf_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvshuf_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvperm_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvperm_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vshuf4i_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void vshuf4i_h(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_h_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void vshuf4i_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvshuf4i_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvshuf4i_h(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_h_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvshuf4i_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vshuf4i_d(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_d_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvshuf4i_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vpermi_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vpermi_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvpermi_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvpermi_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvpermi_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvpermi_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvpermi_q(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvpermi_q_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vld(FloatRegister vd, Register rj, int si12) { ASSERT_LSX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vld_op, si12, (int)rj->encoding(), (int)vd->encoding()));} ++ void xvld(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvld_op, si12, (int)rj->encoding(), (int)xd->encoding()));} ++ ++ void vst(FloatRegister vd, Register rj, int si12) { ASSERT_LSX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vst_op, si12, (int)rj->encoding(), (int)vd->encoding()));} ++ void xvst(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvst_op, si12, (int)rj->encoding(), (int)xd->encoding()));} ++ ++ void vldx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX emit_int32(insn_RRR( vldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); } ++ void xvldx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); } ++ ++ void vstx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX emit_int32(insn_RRR( vstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); } ++ void xvstx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); } ++ ++#undef ASSERT_LSX ++#undef ASSERT_LASX ++ ++public: ++ // Creation ++ Assembler(CodeBuffer* code) : AbstractAssembler(code) {} ++ ++ // Decoding ++ static address locate_operand(address inst, WhichOperand which); ++ static address locate_next_instruction(address inst); ++}; ++ ++#endif // CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp +new file mode 100644 +index 00000000000..9ca0cd45047 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp +@@ -0,0 +1,33 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "asm/codeBuffer.hpp" ++#include "code/codeCache.hpp" ++ ++#endif // CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP +diff --git a/src/hotspot/cpu/loongarch/bytes_loongarch.hpp b/src/hotspot/cpu/loongarch/bytes_loongarch.hpp +new file mode 100644 +index 00000000000..c15344eb390 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/bytes_loongarch.hpp +@@ -0,0 +1,73 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_BYTES_LOONGARCH_HPP ++#define CPU_LOONGARCH_BYTES_LOONGARCH_HPP ++ ++#include "memory/allocation.hpp" ++ ++class Bytes: AllStatic { ++ public: ++ // Returns true if the byte ordering used by Java is different from the native byte ordering ++ // of the underlying machine. For example, this is true for Intel x86, but false for Solaris ++ // on Sparc. ++ // we use LoongArch, so return true ++ static inline bool is_Java_byte_ordering_different(){ return true; } ++ ++ ++ // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering ++ // (no special code is needed since LoongArch CPUs can access unaligned data) ++ static inline u2 get_native_u2(address p) { return *(u2*)p; } ++ static inline u4 get_native_u4(address p) { return *(u4*)p; } ++ static inline u8 get_native_u8(address p) { return *(u8*)p; } ++ ++ static inline void put_native_u2(address p, u2 x) { *(u2*)p = x; } ++ static inline void put_native_u4(address p, u4 x) { *(u4*)p = x; } ++ static inline void put_native_u8(address p, u8 x) { *(u8*)p = x; } ++ ++ ++ // Efficient reading and writing of unaligned unsigned data in Java ++ // byte ordering (i.e. big-endian ordering). Byte-order reversal is ++ // needed since LoongArch64 CPUs use little-endian format. ++ static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } ++ static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } ++ static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } ++ ++ static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); } ++ static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); } ++ static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } ++ ++ ++ // Efficient swapping of byte ordering ++ static inline u2 swap_u2(u2 x); // compiler-dependent implementation ++ static inline u4 swap_u4(u4 x); // compiler-dependent implementation ++ static inline u8 swap_u8(u8 x); ++}; ++ ++ ++// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base] ++#include OS_CPU_HEADER_INLINE(bytes) ++ ++#endif // CPU_LOONGARCH_BYTES_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp +new file mode 100644 +index 00000000000..663a9aec2a9 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp +@@ -0,0 +1,360 @@ ++/* ++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "c1/c1_CodeStubs.hpp" ++#include "c1/c1_FrameMap.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "classfile/javaClasses.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#define A0 RA0 ++#define A3 RA3 ++ ++#define __ ce->masm()-> ++ ++void C1SafepointPollStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ InternalAddress safepoint_pc(ce->masm()->pc() - ce->masm()->offset() + safepoint_offset()); ++ __ lea(SCR2, safepoint_pc); ++ __ st_ptr(SCR2, TREG, in_bytes(JavaThread::saved_exception_pc_offset())); ++ ++ assert(SharedRuntime::polling_page_return_handler_blob() != NULL, ++ "polling page return stub not created yet"); ++ address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); ++ ++ __ jmp(stub, relocInfo::runtime_call_type); ++} ++ ++void CounterOverflowStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ Metadata *m = _method->as_constant_ptr()->as_metadata(); ++ __ mov_metadata(SCR2, m); ++ ce->store_parameter(SCR2, 1); ++ ce->store_parameter(_bci, 0); ++ __ call(Runtime1::entry_for(Runtime1::counter_overflow_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ __ b(_continuation); ++} ++ ++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) ++ : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) { ++ assert(info != NULL, "must have info"); ++ _info = new CodeEmitInfo(info); ++} ++ ++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) ++ : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) { ++ assert(info != NULL, "must have info"); ++ _info = new CodeEmitInfo(info); ++} ++ ++void RangeCheckStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ if (_info->deoptimize_on_exception()) { ++ address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); ++ __ call(a, relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++ return; ++ } ++ ++ if (_index->is_cpu_register()) { ++ __ move(SCR1, _index->as_register()); ++ } else { ++ __ li(SCR1, _index->as_jint()); ++ } ++ Runtime1::StubID stub_id; ++ if (_throw_index_out_of_bounds_exception) { ++ stub_id = Runtime1::throw_index_exception_id; ++ } else { ++ assert(_array != NULL, "sanity"); ++ __ move(SCR2, _array->as_pointer_register()); ++ stub_id = Runtime1::throw_range_check_failed_id; ++ } ++ __ call(Runtime1::entry_for(stub_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { ++ _info = new CodeEmitInfo(info); ++} ++ ++void PredicateFailedStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); ++ __ call(a, relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++void DivByZeroStub::emit_code(LIR_Assembler* ce) { ++ if (_offset != -1) { ++ ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); ++ } ++ __ bind(_entry); ++ __ call(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++#ifdef ASSERT ++ __ should_not_reach_here(); ++#endif ++} ++ ++// Implementation of NewInstanceStub ++ ++NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, ++ CodeEmitInfo* info, Runtime1::StubID stub_id) { ++ _result = result; ++ _klass = klass; ++ _klass_reg = klass_reg; ++ _info = new CodeEmitInfo(info); ++ assert(stub_id == Runtime1::new_instance_id || ++ stub_id == Runtime1::fast_new_instance_id || ++ stub_id == Runtime1::fast_new_instance_init_check_id, ++ "need new_instance id"); ++ _stub_id = stub_id; ++} ++ ++void NewInstanceStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ __ move(A3, _klass_reg->as_register()); ++ __ call(Runtime1::entry_for(_stub_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ assert(_result->as_register() == A0, "result must in A0"); ++ __ b(_continuation); ++} ++ ++// Implementation of NewTypeArrayStub ++ ++NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, ++ CodeEmitInfo* info) { ++ _klass_reg = klass_reg; ++ _length = length; ++ _result = result; ++ _info = new CodeEmitInfo(info); ++} ++ ++void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ assert(_length->as_register() == S0, "length must in S0,"); ++ assert(_klass_reg->as_register() == A3, "klass_reg must in A3"); ++ __ call(Runtime1::entry_for(Runtime1::new_type_array_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ assert(_result->as_register() == A0, "result must in A0"); ++ __ b(_continuation); ++} ++ ++// Implementation of NewObjectArrayStub ++ ++NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, ++ CodeEmitInfo* info) { ++ _klass_reg = klass_reg; ++ _result = result; ++ _length = length; ++ _info = new CodeEmitInfo(info); ++} ++ ++void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ assert(_length->as_register() == S0, "length must in S0,"); ++ assert(_klass_reg->as_register() == A3, "klass_reg must in A3"); ++ __ call(Runtime1::entry_for(Runtime1::new_object_array_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ assert(_result->as_register() == A0, "result must in A0"); ++ __ b(_continuation); ++} ++ ++// Implementation of MonitorAccessStubs ++ ++MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) ++ : MonitorAccessStub(obj_reg, lock_reg) { ++ _info = new CodeEmitInfo(info); ++} ++ ++void MonitorEnterStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ ce->store_parameter(_obj_reg->as_register(), 1); ++ ce->store_parameter(_lock_reg->as_register(), 0); ++ Runtime1::StubID enter_id; ++ if (ce->compilation()->has_fpu_code()) { ++ enter_id = Runtime1::monitorenter_id; ++ } else { ++ enter_id = Runtime1::monitorenter_nofpu_id; ++ } ++ __ call(Runtime1::entry_for(enter_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ __ b(_continuation); ++} ++ ++void MonitorExitStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ if (_compute_lock) { ++ // lock_reg was destroyed by fast unlocking attempt => recompute it ++ ce->monitor_address(_monitor_ix, _lock_reg); ++ } ++ ce->store_parameter(_lock_reg->as_register(), 0); ++ // note: non-blocking leaf routine => no call info needed ++ Runtime1::StubID exit_id; ++ if (ce->compilation()->has_fpu_code()) { ++ exit_id = Runtime1::monitorexit_id; ++ } else { ++ exit_id = Runtime1::monitorexit_nofpu_id; ++ } ++ __ lipc(RA, _continuation); ++ __ jmp(Runtime1::entry_for(exit_id), relocInfo::runtime_call_type); ++} ++ ++// Implementation of patching: ++// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes) ++// - Replace original code with a call to the stub ++// At Runtime: ++// - call to stub, jump to runtime ++// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object) ++// - in runtime: after initializing class, restore original code, reexecute instruction ++ ++int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size; ++ ++void PatchingStub::align_patch_site(MacroAssembler* masm) { ++} ++ ++void PatchingStub::emit_code(LIR_Assembler* ce) { ++ assert(false, "LoongArch64 should not use C1 runtime patching"); ++} ++ ++void DeoptimizeStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ ce->store_parameter(_trap_request, 0); ++ __ call(Runtime1::entry_for(Runtime1::deoptimize_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ DEBUG_ONLY(__ should_not_reach_here()); ++} ++ ++void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { ++ address a; ++ if (_info->deoptimize_on_exception()) { ++ // Deoptimize, do not throw the exception, because it is probably wrong to do it here. ++ a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); ++ } else { ++ a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id); ++ } ++ ++ ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); ++ __ bind(_entry); ++ __ call(a, relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ ++ __ bind(_entry); ++ // pass the object in a scratch register because all other registers ++ // must be preserved ++ if (_obj->is_cpu_register()) { ++ __ move(SCR1, _obj->as_register()); ++ } ++ __ call(Runtime1::entry_for(_stub), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++void ArrayCopyStub::emit_code(LIR_Assembler* ce) { ++ //---------------slow case: call to native----------------- ++ __ bind(_entry); ++ // Figure out where the args should go ++ // This should really convert the IntrinsicID to the Method* and signature ++ // but I don't know how to do that. ++ // ++ VMRegPair args[5]; ++ BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT}; ++ SharedRuntime::java_calling_convention(signature, args, 5); ++ ++ // push parameters ++ // (src, src_pos, dest, destPos, length) ++ Register r[5]; ++ r[0] = src()->as_register(); ++ r[1] = src_pos()->as_register(); ++ r[2] = dst()->as_register(); ++ r[3] = dst_pos()->as_register(); ++ r[4] = length()->as_register(); ++ ++ // next registers will get stored on the stack ++ for (int i = 0; i < 5 ; i++ ) { ++ VMReg r_1 = args[i].first(); ++ if (r_1->is_stack()) { ++ int st_off = r_1->reg2stack() * wordSize; ++ __ stptr_d (r[i], SP, st_off); ++ } else { ++ assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg "); ++ } ++ } ++ ++ ce->align_call(lir_static_call); ++ ++ ce->emit_static_call_stub(); ++ if (ce->compilation()->bailed_out()) { ++ return; // CodeCache is full ++ } ++ AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(), ++ relocInfo::static_call_type); ++ address call = __ trampoline_call(resolve); ++ if (call == NULL) { ++ ce->bailout("trampoline stub overflow"); ++ return; ++ } ++ ce->add_call_info_here(info()); ++ ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ __ li(SCR2, (address)&Runtime1::_arraycopy_slowcase_cnt); ++ __ increment(Address(SCR2)); ++ } ++#endif ++ ++ __ b(_continuation); ++} ++ ++#undef __ +diff --git a/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp +new file mode 100644 +index 00000000000..1140e44431d +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_Defs_loongarch.hpp +@@ -0,0 +1,79 @@ ++/* ++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP ++ ++// native word offsets from memory address (little endian) ++enum { ++ pd_lo_word_offset_in_bytes = 0, ++ pd_hi_word_offset_in_bytes = BytesPerWord ++}; ++ ++// explicit rounding operations are required to implement the strictFP mode ++enum { ++ pd_strict_fp_requires_explicit_rounding = false ++}; ++ ++// FIXME: There are no callee-saved ++ ++// registers ++enum { ++ pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers, // number of registers used during code emission ++ pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers, // number of registers used during code emission ++ ++ pd_nof_caller_save_cpu_regs_frame_map = 15, // number of registers killed by calls ++ pd_nof_caller_save_fpu_regs_frame_map = 32, // number of registers killed by calls ++ ++ pd_first_callee_saved_reg = pd_nof_caller_save_cpu_regs_frame_map, ++ pd_last_callee_saved_reg = 21, ++ ++ pd_last_allocatable_cpu_reg = pd_nof_caller_save_cpu_regs_frame_map - 1, ++ ++ pd_nof_cpu_regs_reg_alloc = pd_nof_caller_save_cpu_regs_frame_map, // number of registers that are visible to register allocator ++ pd_nof_fpu_regs_reg_alloc = 32, // number of registers that are visible to register allocator ++ ++ pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan ++ pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of registers visible to linear scan ++ pd_nof_xmm_regs_linearscan = 0, // don't have vector registers ++ pd_first_cpu_reg = 0, ++ pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1, ++ pd_first_byte_reg = 0, ++ pd_last_byte_reg = pd_nof_cpu_regs_reg_alloc - 1, ++ pd_first_fpu_reg = pd_nof_cpu_regs_frame_map, ++ pd_last_fpu_reg = pd_first_fpu_reg + 31, ++ ++ pd_first_callee_saved_fpu_reg = 24 + pd_first_fpu_reg, ++ pd_last_callee_saved_fpu_reg = 31 + pd_first_fpu_reg, ++}; ++ ++// Encoding of float value in debug info. This is true on x86 where ++// floats are extended to doubles when stored in the stack, false for ++// LoongArch64 where floats and doubles are stored in their native form. ++enum { ++ pd_float_saved_as_double = false ++}; ++ ++#endif // CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp +new file mode 100644 +index 00000000000..047412d036a +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp +@@ -0,0 +1,32 @@ ++/* ++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP ++ ++// No FPU stack on LoongArch ++class FpuStackSim; ++ ++#endif // CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp +new file mode 100644 +index 00000000000..1a89c437a83 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch_64.cpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++//-------------------------------------------------------- ++// FpuStackSim ++//-------------------------------------------------------- ++ ++// No FPU stack on LoongArch64 ++#include "precompiled.hpp" +diff --git a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp +new file mode 100644 +index 00000000000..4f0cf053617 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch.hpp +@@ -0,0 +1,143 @@ ++/* ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP ++ ++// On LoongArch64 the frame looks as follows: ++// ++// +-----------------------------+---------+----------------------------------------+----------------+----------- ++// | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling . ++// +-----------------------------+---------+----------------------------------------+----------------+----------- ++ ++ public: ++ static const int pd_c_runtime_reserved_arg_size; ++ ++ enum { ++ first_available_sp_in_frame = 0, ++ frame_pad_in_bytes = 16, ++ nof_reg_args = 8 ++ }; ++ ++ public: ++ static LIR_Opr receiver_opr; ++ ++ static LIR_Opr r0_opr; ++ static LIR_Opr ra_opr; ++ static LIR_Opr tp_opr; ++ static LIR_Opr sp_opr; ++ static LIR_Opr a0_opr; ++ static LIR_Opr a1_opr; ++ static LIR_Opr a2_opr; ++ static LIR_Opr a3_opr; ++ static LIR_Opr a4_opr; ++ static LIR_Opr a5_opr; ++ static LIR_Opr a6_opr; ++ static LIR_Opr a7_opr; ++ static LIR_Opr t0_opr; ++ static LIR_Opr t1_opr; ++ static LIR_Opr t2_opr; ++ static LIR_Opr t3_opr; ++ static LIR_Opr t4_opr; ++ static LIR_Opr t5_opr; ++ static LIR_Opr t6_opr; ++ static LIR_Opr t7_opr; ++ static LIR_Opr t8_opr; ++ static LIR_Opr rx_opr; ++ static LIR_Opr fp_opr; ++ static LIR_Opr s0_opr; ++ static LIR_Opr s1_opr; ++ static LIR_Opr s2_opr; ++ static LIR_Opr s3_opr; ++ static LIR_Opr s4_opr; ++ static LIR_Opr s5_opr; ++ static LIR_Opr s6_opr; ++ static LIR_Opr s7_opr; ++ static LIR_Opr s8_opr; ++ ++ static LIR_Opr ra_oop_opr; ++ static LIR_Opr a0_oop_opr; ++ static LIR_Opr a1_oop_opr; ++ static LIR_Opr a2_oop_opr; ++ static LIR_Opr a3_oop_opr; ++ static LIR_Opr a4_oop_opr; ++ static LIR_Opr a5_oop_opr; ++ static LIR_Opr a6_oop_opr; ++ static LIR_Opr a7_oop_opr; ++ static LIR_Opr t0_oop_opr; ++ static LIR_Opr t1_oop_opr; ++ static LIR_Opr t2_oop_opr; ++ static LIR_Opr t3_oop_opr; ++ static LIR_Opr t4_oop_opr; ++ static LIR_Opr t5_oop_opr; ++ static LIR_Opr t6_oop_opr; ++ static LIR_Opr t7_oop_opr; ++ static LIR_Opr t8_oop_opr; ++ static LIR_Opr fp_oop_opr; ++ static LIR_Opr s0_oop_opr; ++ static LIR_Opr s1_oop_opr; ++ static LIR_Opr s2_oop_opr; ++ static LIR_Opr s3_oop_opr; ++ static LIR_Opr s4_oop_opr; ++ static LIR_Opr s5_oop_opr; ++ static LIR_Opr s6_oop_opr; ++ static LIR_Opr s7_oop_opr; ++ static LIR_Opr s8_oop_opr; ++ ++ static LIR_Opr scr1_opr; ++ static LIR_Opr scr2_opr; ++ static LIR_Opr scr1_long_opr; ++ static LIR_Opr scr2_long_opr; ++ ++ static LIR_Opr a0_metadata_opr; ++ static LIR_Opr a1_metadata_opr; ++ static LIR_Opr a2_metadata_opr; ++ static LIR_Opr a3_metadata_opr; ++ static LIR_Opr a4_metadata_opr; ++ static LIR_Opr a5_metadata_opr; ++ ++ static LIR_Opr long0_opr; ++ static LIR_Opr long1_opr; ++ static LIR_Opr fpu0_float_opr; ++ static LIR_Opr fpu0_double_opr; ++ ++ static LIR_Opr as_long_opr(Register r) { ++ return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); ++ } ++ static LIR_Opr as_pointer_opr(Register r) { ++ return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); ++ } ++ ++ // VMReg name for spilled physical FPU stack slot n ++ static VMReg fpu_regname (int n); ++ ++ static bool is_caller_save_register(LIR_Opr opr) { return true; } ++ static bool is_caller_save_register(Register r) { return true; } ++ ++ static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; } ++ static int last_cpu_reg() { return pd_last_cpu_reg; } ++ static int last_byte_reg() { return pd_last_byte_reg; } ++ ++#endif // CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp +new file mode 100644 +index 00000000000..8d439fda060 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp +@@ -0,0 +1,362 @@ ++/* ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "c1/c1_FrameMap.hpp" ++#include "c1/c1_LIR.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) { ++ LIR_Opr opr = LIR_OprFact::illegalOpr; ++ VMReg r_1 = reg->first(); ++ VMReg r_2 = reg->second(); ++ if (r_1->is_stack()) { ++ // Convert stack slot to an SP offset ++ // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value ++ // so we must add it in here. ++ int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; ++ opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type)); ++ } else if (r_1->is_Register()) { ++ Register reg = r_1->as_Register(); ++ if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) { ++ Register reg2 = r_2->as_Register(); ++ assert(reg2 == reg, "must be same register"); ++ opr = as_long_opr(reg); ++ } else if (is_reference_type(type)) { ++ opr = as_oop_opr(reg); ++ } else if (type == T_METADATA) { ++ opr = as_metadata_opr(reg); ++ } else if (type == T_ADDRESS) { ++ opr = as_address_opr(reg); ++ } else { ++ opr = as_opr(reg); ++ } ++ } else if (r_1->is_FloatRegister()) { ++ assert(type == T_DOUBLE || type == T_FLOAT, "wrong type"); ++ int num = r_1->as_FloatRegister()->encoding(); ++ if (type == T_FLOAT) { ++ opr = LIR_OprFact::single_fpu(num); ++ } else { ++ opr = LIR_OprFact::double_fpu(num); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ return opr; ++} ++ ++LIR_Opr FrameMap::r0_opr; ++LIR_Opr FrameMap::ra_opr; ++LIR_Opr FrameMap::tp_opr; ++LIR_Opr FrameMap::sp_opr; ++LIR_Opr FrameMap::a0_opr; ++LIR_Opr FrameMap::a1_opr; ++LIR_Opr FrameMap::a2_opr; ++LIR_Opr FrameMap::a3_opr; ++LIR_Opr FrameMap::a4_opr; ++LIR_Opr FrameMap::a5_opr; ++LIR_Opr FrameMap::a6_opr; ++LIR_Opr FrameMap::a7_opr; ++LIR_Opr FrameMap::t0_opr; ++LIR_Opr FrameMap::t1_opr; ++LIR_Opr FrameMap::t2_opr; ++LIR_Opr FrameMap::t3_opr; ++LIR_Opr FrameMap::t4_opr; ++LIR_Opr FrameMap::t5_opr; ++LIR_Opr FrameMap::t6_opr; ++LIR_Opr FrameMap::t7_opr; ++LIR_Opr FrameMap::t8_opr; ++LIR_Opr FrameMap::rx_opr; ++LIR_Opr FrameMap::fp_opr; ++LIR_Opr FrameMap::s0_opr; ++LIR_Opr FrameMap::s1_opr; ++LIR_Opr FrameMap::s2_opr; ++LIR_Opr FrameMap::s3_opr; ++LIR_Opr FrameMap::s4_opr; ++LIR_Opr FrameMap::s5_opr; ++LIR_Opr FrameMap::s6_opr; ++LIR_Opr FrameMap::s7_opr; ++LIR_Opr FrameMap::s8_opr; ++ ++LIR_Opr FrameMap::receiver_opr; ++ ++LIR_Opr FrameMap::ra_oop_opr; ++LIR_Opr FrameMap::a0_oop_opr; ++LIR_Opr FrameMap::a1_oop_opr; ++LIR_Opr FrameMap::a2_oop_opr; ++LIR_Opr FrameMap::a3_oop_opr; ++LIR_Opr FrameMap::a4_oop_opr; ++LIR_Opr FrameMap::a5_oop_opr; ++LIR_Opr FrameMap::a6_oop_opr; ++LIR_Opr FrameMap::a7_oop_opr; ++LIR_Opr FrameMap::t0_oop_opr; ++LIR_Opr FrameMap::t1_oop_opr; ++LIR_Opr FrameMap::t2_oop_opr; ++LIR_Opr FrameMap::t3_oop_opr; ++LIR_Opr FrameMap::t4_oop_opr; ++LIR_Opr FrameMap::t5_oop_opr; ++LIR_Opr FrameMap::t6_oop_opr; ++LIR_Opr FrameMap::t7_oop_opr; ++LIR_Opr FrameMap::t8_oop_opr; ++LIR_Opr FrameMap::fp_oop_opr; ++LIR_Opr FrameMap::s0_oop_opr; ++LIR_Opr FrameMap::s1_oop_opr; ++LIR_Opr FrameMap::s2_oop_opr; ++LIR_Opr FrameMap::s3_oop_opr; ++LIR_Opr FrameMap::s4_oop_opr; ++LIR_Opr FrameMap::s5_oop_opr; ++LIR_Opr FrameMap::s6_oop_opr; ++LIR_Opr FrameMap::s7_oop_opr; ++LIR_Opr FrameMap::s8_oop_opr; ++ ++LIR_Opr FrameMap::scr1_opr; ++LIR_Opr FrameMap::scr2_opr; ++LIR_Opr FrameMap::scr1_long_opr; ++LIR_Opr FrameMap::scr2_long_opr; ++ ++LIR_Opr FrameMap::a0_metadata_opr; ++LIR_Opr FrameMap::a1_metadata_opr; ++LIR_Opr FrameMap::a2_metadata_opr; ++LIR_Opr FrameMap::a3_metadata_opr; ++LIR_Opr FrameMap::a4_metadata_opr; ++LIR_Opr FrameMap::a5_metadata_opr; ++ ++LIR_Opr FrameMap::long0_opr; ++LIR_Opr FrameMap::long1_opr; ++LIR_Opr FrameMap::fpu0_float_opr; ++LIR_Opr FrameMap::fpu0_double_opr; ++ ++LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0 }; ++LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0 }; ++ ++//-------------------------------------------------------- ++// FrameMap ++//-------------------------------------------------------- ++ ++void FrameMap::initialize() { ++ assert(!_init_done, "once"); ++ int i = 0; ++ ++ // caller save register ++ map_register(i, A0); a0_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A1); a1_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A2); a2_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A3); a3_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A4); a4_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A5); a5_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A6); a6_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A7); a7_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T0); t0_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T1); t1_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T2); t2_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T3); t3_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T5); t5_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T6); t6_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T8); t8_opr = LIR_OprFact::single_cpu(i); i++; ++ ++ // callee save register ++ map_register(i, S0); s0_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S1); s1_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S2); s2_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S3); s3_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S4); s4_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S7); s7_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S8); s8_opr = LIR_OprFact::single_cpu(i); i++; ++ ++ // special register ++ map_register(i, S5); s5_opr = LIR_OprFact::single_cpu(i); i++; // heapbase ++ map_register(i, S6); s6_opr = LIR_OprFact::single_cpu(i); i++; // thread ++ map_register(i, TP); tp_opr = LIR_OprFact::single_cpu(i); i++; // tp ++ map_register(i, FP); fp_opr = LIR_OprFact::single_cpu(i); i++; // fp ++ map_register(i, RA); ra_opr = LIR_OprFact::single_cpu(i); i++; // ra ++ map_register(i, SP); sp_opr = LIR_OprFact::single_cpu(i); i++; // sp ++ ++ // tmp register ++ map_register(i, T7); t7_opr = LIR_OprFact::single_cpu(i); i++; // scr1 ++ map_register(i, T4); t4_opr = LIR_OprFact::single_cpu(i); i++; // scr2 ++ ++ scr1_opr = t7_opr; ++ scr2_opr = t4_opr; ++ scr1_long_opr = LIR_OprFact::double_cpu(t7_opr->cpu_regnr(), t7_opr->cpu_regnr()); ++ scr2_long_opr = LIR_OprFact::double_cpu(t4_opr->cpu_regnr(), t4_opr->cpu_regnr()); ++ ++ long0_opr = LIR_OprFact::double_cpu(a0_opr->cpu_regnr(), a0_opr->cpu_regnr()); ++ long1_opr = LIR_OprFact::double_cpu(a1_opr->cpu_regnr(), a1_opr->cpu_regnr()); ++ ++ fpu0_float_opr = LIR_OprFact::single_fpu(0); ++ fpu0_double_opr = LIR_OprFact::double_fpu(0); ++ ++ // scr1, scr2 not included ++ _caller_save_cpu_regs[0] = a0_opr; ++ _caller_save_cpu_regs[1] = a1_opr; ++ _caller_save_cpu_regs[2] = a2_opr; ++ _caller_save_cpu_regs[3] = a3_opr; ++ _caller_save_cpu_regs[4] = a4_opr; ++ _caller_save_cpu_regs[5] = a5_opr; ++ _caller_save_cpu_regs[6] = a6_opr; ++ _caller_save_cpu_regs[7] = a7_opr; ++ _caller_save_cpu_regs[8] = t0_opr; ++ _caller_save_cpu_regs[9] = t1_opr; ++ _caller_save_cpu_regs[10] = t2_opr; ++ _caller_save_cpu_regs[11] = t3_opr; ++ _caller_save_cpu_regs[12] = t5_opr; ++ _caller_save_cpu_regs[13] = t6_opr; ++ _caller_save_cpu_regs[14] = t8_opr; ++ ++ for (int i = 0; i < 8; i++) { ++ _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); ++ } ++ ++ _init_done = true; ++ ++ ra_oop_opr = as_oop_opr(RA); ++ a0_oop_opr = as_oop_opr(A0); ++ a1_oop_opr = as_oop_opr(A1); ++ a2_oop_opr = as_oop_opr(A2); ++ a3_oop_opr = as_oop_opr(A3); ++ a4_oop_opr = as_oop_opr(A4); ++ a5_oop_opr = as_oop_opr(A5); ++ a6_oop_opr = as_oop_opr(A6); ++ a7_oop_opr = as_oop_opr(A7); ++ t0_oop_opr = as_oop_opr(T0); ++ t1_oop_opr = as_oop_opr(T1); ++ t2_oop_opr = as_oop_opr(T2); ++ t3_oop_opr = as_oop_opr(T3); ++ t4_oop_opr = as_oop_opr(T4); ++ t5_oop_opr = as_oop_opr(T5); ++ t6_oop_opr = as_oop_opr(T6); ++ t7_oop_opr = as_oop_opr(T7); ++ t8_oop_opr = as_oop_opr(T8); ++ fp_oop_opr = as_oop_opr(FP); ++ s0_oop_opr = as_oop_opr(S0); ++ s1_oop_opr = as_oop_opr(S1); ++ s2_oop_opr = as_oop_opr(S2); ++ s3_oop_opr = as_oop_opr(S3); ++ s4_oop_opr = as_oop_opr(S4); ++ s5_oop_opr = as_oop_opr(S5); ++ s6_oop_opr = as_oop_opr(S6); ++ s7_oop_opr = as_oop_opr(S7); ++ s8_oop_opr = as_oop_opr(S8); ++ ++ a0_metadata_opr = as_metadata_opr(A0); ++ a1_metadata_opr = as_metadata_opr(A1); ++ a2_metadata_opr = as_metadata_opr(A2); ++ a3_metadata_opr = as_metadata_opr(A3); ++ a4_metadata_opr = as_metadata_opr(A4); ++ a5_metadata_opr = as_metadata_opr(A5); ++ ++ sp_opr = as_pointer_opr(SP); ++ fp_opr = as_pointer_opr(FP); ++ ++ VMRegPair regs; ++ BasicType sig_bt = T_OBJECT; ++ SharedRuntime::java_calling_convention(&sig_bt, ®s, 1); ++ receiver_opr = as_oop_opr(regs.first()->as_Register()); ++ ++ for (int i = 0; i < nof_caller_save_fpu_regs; i++) { ++ _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); ++ } ++} ++ ++Address FrameMap::make_new_address(ByteSize sp_offset) const { ++ // for sp, based address use this: ++ // return Address(sp, in_bytes(sp_offset) - (framesize() - 2) * 4); ++ return Address(SP, in_bytes(sp_offset)); ++} ++ ++// ----------------mapping----------------------- ++// all mapping is based on fp addressing, except for simple leaf methods where we access ++// the locals sp based (and no frame is built) ++ ++// Frame for simple leaf methods (quick entries) ++// ++// +----------+ ++// | ret addr | <- TOS ++// +----------+ ++// | args | ++// | ...... | ++ ++// Frame for standard methods ++// ++// | .........| <- TOS ++// | locals | ++// +----------+ ++// | old fp, | <- RFP ++// +----------+ ++// | ret addr | ++// +----------+ ++// | args | ++// | .........| ++ ++// For OopMaps, map a local variable or spill index to an VMRegImpl name. ++// This is the offset from sp() in the frame of the slot for the index, ++// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.) ++// ++// framesize + ++// stack0 stack0 0 <- VMReg ++// | | | ++// ...........|..............|.............| ++// 0 1 2 3 x x 4 5 6 ... | <- local indices ++// ^ ^ sp() ( x x indicate link ++// | | and return addr) ++// arguments non-argument locals ++ ++VMReg FrameMap::fpu_regname(int n) { ++ // Return the OptoReg name for the fpu stack slot "n" ++ // A spilled fpu stack slot comprises to two single-word OptoReg's. ++ return as_FloatRegister(n)->as_VMReg(); ++} ++ ++LIR_Opr FrameMap::stack_pointer() { ++ return FrameMap::sp_opr; ++} ++ ++// JSR 292 ++LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { ++ return LIR_OprFact::illegalOpr; // Not needed on LoongArch64 ++} ++ ++bool FrameMap::validate_frame() { ++ return true; ++} +diff --git a/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp +new file mode 100644 +index 00000000000..baadeebb243 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch.hpp +@@ -0,0 +1,84 @@ ++/* ++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP ++ ++// ArrayCopyStub needs access to bailout ++friend class ArrayCopyStub; ++ ++ private: ++ int array_element_size(BasicType type) const; ++ ++ void arith_fpu_implementation(LIR_Code code, int left_index, int right_index, ++ int dest_index, bool pop_fpu_stack); ++ ++ // helper functions which checks for overflow and sets bailout if it ++ // occurs. Always returns a valid embeddable pointer but in the ++ // bailout case the pointer won't be to unique storage. ++ address float_constant(float f); ++ address double_constant(double d); ++ ++ address int_constant(jlong n); ++ ++ bool is_literal_address(LIR_Address* addr); ++ ++ // Ensure we have a valid Address (base+offset) to a stack-slot. ++ Address stack_slot_address(int index, uint shift, int adjust = 0); ++ ++ // Record the type of the receiver in ReceiverTypeData ++ void type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, ++ Register recv, Label* update_done); ++ void add_debug_info_for_branch(address adr, CodeEmitInfo* info); ++ ++ void casw(Register addr, Register newval, Register cmpval, bool sign); ++ void casl(Register addr, Register newval, Register cmpval); ++ ++ void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL); ++ ++ static const int max_tableswitches = 20; ++ struct tableswitch switches[max_tableswitches]; ++ int tableswitch_count; ++ ++ void init() { tableswitch_count = 0; } ++ ++ void deoptimize_trap(CodeEmitInfo *info); ++ ++ void emit_cmp_branch(LIR_OpBranch* op); ++ ++ enum { ++ // call stub: CompiledStaticCall::to_interp_stub_size() + ++ // CompiledStaticCall::to_trampoline_stub_size() ++ _call_stub_size = 13 * NativeInstruction::nop_instruction_size, ++ _exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175), ++ _deopt_handler_size = 7 * NativeInstruction::nop_instruction_size ++ }; ++ ++public: ++ void store_parameter(Register r, int offset_from_sp_in_words); ++ void store_parameter(jint c, int offset_from_sp_in_words); ++ void store_parameter(jobject c, int offset_from_sp_in_words); ++ ++#endif // CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp +new file mode 100644 +index 00000000000..2ddf19a6e5a +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_LIRAssembler_loongarch_64.cpp +@@ -0,0 +1,3384 @@ ++/* ++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "asm/assembler.hpp" ++#include "c1/c1_CodeStubs.hpp" ++#include "c1/c1_Compilation.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "c1/c1_ValueStack.hpp" ++#include "ci/ciArrayKlass.hpp" ++#include "ci/ciInstance.hpp" ++#include "code/compiledIC.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "gc/shared/gc_globals.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/powerOfTwo.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++#ifndef PRODUCT ++#define COMMENT(x) do { __ block_comment(x); } while (0) ++#else ++#define COMMENT(x) ++#endif ++ ++NEEDS_CLEANUP // remove this definitions? ++ ++#define __ _masm-> ++ ++static void select_different_registers(Register preserve, Register extra, ++ Register &tmp1, Register &tmp2) { ++ if (tmp1 == preserve) { ++ assert_different_registers(tmp1, tmp2, extra); ++ tmp1 = extra; ++ } else if (tmp2 == preserve) { ++ assert_different_registers(tmp1, tmp2, extra); ++ tmp2 = extra; ++ } ++ assert_different_registers(preserve, tmp1, tmp2); ++} ++ ++static void select_different_registers(Register preserve, Register extra, ++ Register &tmp1, Register &tmp2, ++ Register &tmp3) { ++ if (tmp1 == preserve) { ++ assert_different_registers(tmp1, tmp2, tmp3, extra); ++ tmp1 = extra; ++ } else if (tmp2 == preserve) { ++ assert_different_registers(tmp1, tmp2, tmp3, extra); ++ tmp2 = extra; ++ } else if (tmp3 == preserve) { ++ assert_different_registers(tmp1, tmp2, tmp3, extra); ++ tmp3 = extra; ++ } ++ assert_different_registers(preserve, tmp1, tmp2, tmp3); ++} ++ ++bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; } ++ ++LIR_Opr LIR_Assembler::receiverOpr() { ++ return FrameMap::receiver_opr; ++} ++ ++LIR_Opr LIR_Assembler::osrBufferPointer() { ++ return FrameMap::as_pointer_opr(receiverOpr()->as_register()); ++} ++ ++//--------------fpu register translations----------------------- ++ ++address LIR_Assembler::float_constant(float f) { ++ address const_addr = __ float_constant(f); ++ if (const_addr == NULL) { ++ bailout("const section overflow"); ++ return __ code()->consts()->start(); ++ } else { ++ return const_addr; ++ } ++} ++ ++address LIR_Assembler::double_constant(double d) { ++ address const_addr = __ double_constant(d); ++ if (const_addr == NULL) { ++ bailout("const section overflow"); ++ return __ code()->consts()->start(); ++ } else { ++ return const_addr; ++ } ++} ++ ++void LIR_Assembler::breakpoint() { Unimplemented(); } ++ ++void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); } ++ ++void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); } ++ ++bool LIR_Assembler::is_literal_address(LIR_Address* addr) { Unimplemented(); return false; } ++ ++static Register as_reg(LIR_Opr op) { ++ return op->is_double_cpu() ? op->as_register_lo() : op->as_register(); ++} ++ ++static jlong as_long(LIR_Opr data) { ++ jlong result; ++ switch (data->type()) { ++ case T_INT: ++ result = (data->as_jint()); ++ break; ++ case T_LONG: ++ result = (data->as_jlong()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ result = 0; // unreachable ++ } ++ return result; ++} ++ ++Address LIR_Assembler::as_Address(LIR_Address* addr) { ++ Register base = addr->base()->as_pointer_register(); ++ LIR_Opr opr = addr->index(); ++ if (opr->is_cpu_register()) { ++ Register index; ++ if (opr->is_single_cpu()) ++ index = opr->as_register(); ++ else ++ index = opr->as_register_lo(); ++ assert(addr->disp() == 0, "must be"); ++ return Address(base, index, Address::ScaleFactor(addr->scale())); ++ } else { ++ assert(addr->scale() == 0, "must be"); ++ return Address(base, addr->disp()); ++ } ++ return Address(); ++} ++ ++Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { ++ ShouldNotReachHere(); ++ return Address(); ++} ++ ++Address LIR_Assembler::as_Address_lo(LIR_Address* addr) { ++ return as_Address(addr); // Ouch ++ // FIXME: This needs to be much more clever. See x86. ++} ++ ++// Ensure a valid Address (base + offset) to a stack-slot. If stack access is ++// not encodable as a base + (immediate) offset, generate an explicit address ++// calculation to hold the address in a temporary register. ++Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) { ++ precond(size == 4 || size == 8); ++ Address addr = frame_map()->address_for_slot(index, adjust); ++ precond(addr.index() == noreg); ++ precond(addr.base() == SP); ++ precond(addr.disp() > 0); ++ uint mask = size - 1; ++ assert((addr.disp() & mask) == 0, "scaled offsets only"); ++ return addr; ++} ++ ++void LIR_Assembler::osr_entry() { ++ offsets()->set_value(CodeOffsets::OSR_Entry, code_offset()); ++ BlockBegin* osr_entry = compilation()->hir()->osr_entry(); ++ ValueStack* entry_state = osr_entry->state(); ++ int number_of_locks = entry_state->locks_size(); ++ ++ // we jump here if osr happens with the interpreter ++ // state set up to continue at the beginning of the ++ // loop that triggered osr - in particular, we have ++ // the following registers setup: ++ // ++ // A2: osr buffer ++ // ++ ++ // build frame ++ ciMethod* m = compilation()->method(); ++ __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); ++ ++ // OSR buffer is ++ // ++ // locals[nlocals-1..0] ++ // monitors[0..number_of_locks] ++ // ++ // locals is a direct copy of the interpreter frame so in the osr buffer ++ // so first slot in the local array is the last local from the interpreter ++ // and last slot is local[0] (receiver) from the interpreter ++ // ++ // Similarly with locks. The first lock slot in the osr buffer is the nth lock ++ // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock ++ // in the interpreter frame (the method lock if a sync method) ++ ++ // Initialize monitors in the compiled activation. ++ // A2: pointer to osr buffer ++ // ++ // All other registers are dead at this point and the locals will be ++ // copied into place by code emitted in the IR. ++ ++ Register OSR_buf = osrBufferPointer()->as_pointer_register(); ++ { ++ assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below"); ++ int monitor_offset = BytesPerWord * method()->max_locals() + (2 * BytesPerWord) * (number_of_locks - 1); ++ // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in ++ // the OSR buffer using 2 word entries: first the lock and then ++ // the oop. ++ for (int i = 0; i < number_of_locks; i++) { ++ int slot_offset = monitor_offset - ((i * 2) * BytesPerWord); ++#ifdef ASSERT ++ // verify the interpreter's monitor has a non-null object ++ { ++ Label L; ++ __ ld_ptr(SCR1, Address(OSR_buf, slot_offset + 1 * BytesPerWord)); ++ __ bnez(SCR1, L); ++ __ stop("locked object is NULL"); ++ __ bind(L); ++ } ++#endif ++ __ ld_ptr(S0, Address(OSR_buf, slot_offset + 0)); ++ __ st_ptr(S0, frame_map()->address_for_monitor_lock(i)); ++ __ ld_ptr(S0, Address(OSR_buf, slot_offset + 1*BytesPerWord)); ++ __ st_ptr(S0, frame_map()->address_for_monitor_object(i)); ++ } ++ } ++} ++ ++// inline cache check; done before the frame is built. ++int LIR_Assembler::check_icache() { ++ Register receiver = FrameMap::receiver_opr->as_register(); ++ Register ic_klass = IC_Klass; ++ int start_offset = __ offset(); ++ Label dont; ++ ++ __ verify_oop(receiver); ++ ++ // explicit NULL check not needed since load from [klass_offset] causes a trap ++ // check against inline cache ++ assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), ++ "must add explicit null check"); ++ ++ __ load_klass(SCR2, receiver); ++ __ beq(SCR2, ic_klass, dont); ++ ++ // if icache check fails, then jump to runtime routine ++ // Note: RECEIVER must still contain the receiver! ++ __ jmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); ++ ++ // We align the verified entry point unless the method body ++ // (including its inline cache check) will fit in a single 64-byte ++ // icache line. ++ if (!method()->is_accessor() || __ offset() - start_offset > 4 * 4) { ++ // force alignment after the cache check. ++ __ align(CodeEntryAlignment); ++ } ++ ++ __ bind(dont); ++ return start_offset; ++} ++ ++void LIR_Assembler::clinit_barrier(ciMethod* method) { ++ assert(VM_Version::supports_fast_class_init_checks(), "sanity"); ++ assert(!method->holder()->is_not_initialized(), "initialization should have been started"); ++ Label L_skip_barrier; ++ ++ __ mov_metadata(SCR2, method->holder()->constant_encoding()); ++ __ clinit_barrier(SCR2, SCR1, &L_skip_barrier /*L_fast_path*/); ++ __ jmp(SharedRuntime::get_handle_wrong_method_stub(), relocInfo::runtime_call_type); ++ __ bind(L_skip_barrier); ++} ++ ++void LIR_Assembler::jobject2reg(jobject o, Register reg) { ++ if (o == NULL) { ++ __ move(reg, R0); ++ } else { ++ int oop_index = __ oop_recorder()->find_index(o); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ __ relocate(rspec); ++ __ patchable_li52(reg, (long)o); ++ } ++} ++ ++void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) { ++ address target = NULL; ++ ++ switch (patching_id(info)) { ++ case PatchingStub::access_field_id: ++ target = Runtime1::entry_for(Runtime1::access_field_patching_id); ++ break; ++ case PatchingStub::load_klass_id: ++ target = Runtime1::entry_for(Runtime1::load_klass_patching_id); ++ break; ++ case PatchingStub::load_mirror_id: ++ target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); ++ break; ++ case PatchingStub::load_appendix_id: ++ target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ call(target, relocInfo::runtime_call_type); ++ add_call_info_here(info); ++} ++ ++void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) { ++ deoptimize_trap(info); ++} ++ ++// This specifies the rsp decrement needed to build the frame ++int LIR_Assembler::initial_frame_size_in_bytes() const { ++ // if rounding, must let FrameMap know! ++ return in_bytes(frame_map()->framesize_in_bytes()); ++} ++ ++int LIR_Assembler::emit_exception_handler() { ++ // if the last instruction is a call (typically to do a throw which ++ // is coming at the end after block reordering) the return address ++ // must still point into the code area in order to avoid assertion ++ // failures when searching for the corresponding bci => add a nop ++ // (was bug 5/14/1999 - gri) ++ __ nop(); ++ ++ // generate code for exception handler ++ address handler_base = __ start_a_stub(exception_handler_size()); ++ if (handler_base == NULL) { ++ // not enough space left for the handler ++ bailout("exception handler overflow"); ++ return -1; ++ } ++ ++ int offset = code_offset(); ++ ++ // the exception oop and pc are in A0, and A1 ++ // no other registers need to be preserved, so invalidate them ++ __ invalidate_registers(false, true, true, true, true, true); ++ ++ // check that there is really an exception ++ __ verify_not_null_oop(A0); ++ ++ // search an exception handler (A0: exception oop, A1: throwing pc) ++ __ call(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id), relocInfo::runtime_call_type); ++ __ should_not_reach_here(); ++ guarantee(code_offset() - offset <= exception_handler_size(), "overflow"); ++ __ end_a_stub(); ++ ++ return offset; ++} ++ ++// Emit the code to remove the frame from the stack in the exception unwind path. ++int LIR_Assembler::emit_unwind_handler() { ++#ifndef PRODUCT ++ if (CommentedAssembly) { ++ _masm->block_comment("Unwind handler"); ++ } ++#endif ++ ++ int offset = code_offset(); ++ ++ // Fetch the exception from TLS and clear out exception related thread state ++ __ ld_ptr(A0, Address(TREG, JavaThread::exception_oop_offset())); ++ __ st_ptr(R0, Address(TREG, JavaThread::exception_oop_offset())); ++ __ st_ptr(R0, Address(TREG, JavaThread::exception_pc_offset())); ++ ++ __ bind(_unwind_handler_entry); ++ __ verify_not_null_oop(V0); ++ if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { ++ __ move(S0, V0); // Preserve the exception ++ } ++ ++ // Perform needed unlocking ++ MonitorExitStub* stub = NULL; ++ if (method()->is_synchronized()) { ++ monitor_address(0, FrameMap::a0_opr); ++ stub = new MonitorExitStub(FrameMap::a0_opr, true, 0); ++ __ unlock_object(A5, A4, A0, *stub->entry()); ++ __ bind(*stub->continuation()); ++ } ++ ++ if (compilation()->env()->dtrace_method_probes()) { ++ __ mov_metadata(A1, method()->constant_encoding()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), TREG, A1); ++ } ++ ++ if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { ++ __ move(A0, S0); // Restore the exception ++ } ++ ++ // remove the activation and dispatch to the unwind handler ++ __ block_comment("remove_frame and dispatch to the unwind handler"); ++ __ remove_frame(initial_frame_size_in_bytes()); ++ __ jmp(Runtime1::entry_for(Runtime1::unwind_exception_id), relocInfo::runtime_call_type); ++ ++ // Emit the slow path assembly ++ if (stub != NULL) { ++ stub->emit_code(this); ++ } ++ ++ return offset; ++} ++ ++int LIR_Assembler::emit_deopt_handler() { ++ // if the last instruction is a call (typically to do a throw which ++ // is coming at the end after block reordering) the return address ++ // must still point into the code area in order to avoid assertion ++ // failures when searching for the corresponding bci => add a nop ++ // (was bug 5/14/1999 - gri) ++ __ nop(); ++ ++ // generate code for exception handler ++ address handler_base = __ start_a_stub(deopt_handler_size()); ++ if (handler_base == NULL) { ++ // not enough space left for the handler ++ bailout("deopt handler overflow"); ++ return -1; ++ } ++ ++ int offset = code_offset(); ++ ++ __ call(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type); ++ guarantee(code_offset() - offset <= deopt_handler_size(), "overflow"); ++ __ end_a_stub(); ++ ++ return offset; ++} ++ ++void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { ++ _masm->code_section()->relocate(adr, relocInfo::poll_type); ++ int pc_offset = code_offset(); ++ flush_debug_info(pc_offset); ++ info->record_debug_info(compilation()->debug_info_recorder(), pc_offset); ++ if (info->exception_handlers() != NULL) { ++ compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers()); ++ } ++} ++ ++void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { ++ assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == V0, ++ "word returns are in V0,"); ++ ++ // Pop the stack before the safepoint code ++ __ remove_frame(initial_frame_size_in_bytes()); ++ ++ if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) { ++ __ reserved_stack_check(); ++ } ++ ++ code_stub->set_safepoint_offset(__ offset()); ++ __ relocate(relocInfo::poll_return_type); ++ __ safepoint_poll(*code_stub->entry(), TREG, true /* at_return */, false /* acquire */, true /* in_nmethod */); ++ ++ __ jr(RA); ++} ++ ++int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { ++ guarantee(info != NULL, "Shouldn't be NULL"); ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::polling_page_offset())); ++ add_debug_info_for_branch(info); // This isn't just debug info: it's the oop map ++ __ relocate(relocInfo::poll_type); ++ __ ld_w(SCR1, SCR1, 0); ++ return __ offset(); ++} ++ ++void LIR_Assembler::move_regs(Register from_reg, Register to_reg) { ++ __ move(to_reg, from_reg); ++} ++ ++void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); } ++ ++void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { ++ assert(src->is_constant(), "should not call otherwise"); ++ assert(dest->is_register(), "should not call otherwise"); ++ LIR_Const* c = src->as_constant_ptr(); ++ ++ switch (c->type()) { ++ case T_INT: ++ assert(patch_code == lir_patch_none, "no patching handled here"); ++ __ li(dest->as_register(), c->as_jint()); ++ break; ++ case T_ADDRESS: ++ assert(patch_code == lir_patch_none, "no patching handled here"); ++ __ li(dest->as_register(), c->as_jint()); ++ break; ++ case T_LONG: ++ assert(patch_code == lir_patch_none, "no patching handled here"); ++ __ li(dest->as_register_lo(), (intptr_t)c->as_jlong()); ++ break; ++ case T_OBJECT: ++ if (patch_code == lir_patch_none) { ++ jobject2reg(c->as_jobject(), dest->as_register()); ++ } else { ++ jobject2reg_with_patching(dest->as_register(), info); ++ } ++ break; ++ case T_METADATA: ++ if (patch_code != lir_patch_none) { ++ klass2reg_with_patching(dest->as_register(), info); ++ } else { ++ __ mov_metadata(dest->as_register(), c->as_metadata()); ++ } ++ break; ++ case T_FLOAT: ++ __ lea(SCR1, InternalAddress(float_constant(c->as_jfloat()))); ++ __ fld_s(dest->as_float_reg(), SCR1, 0); ++ break; ++ case T_DOUBLE: ++ __ lea(SCR1, InternalAddress(double_constant(c->as_jdouble()))); ++ __ fld_d(dest->as_double_reg(), SCR1, 0); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) { ++ LIR_Const* c = src->as_constant_ptr(); ++ switch (c->type()) { ++ case T_OBJECT: ++ if (!c->as_jobject()) ++ __ st_ptr(R0, frame_map()->address_for_slot(dest->single_stack_ix())); ++ else { ++ const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL); ++ reg2stack(FrameMap::scr1_opr, dest, c->type(), false); ++ } ++ break; ++ case T_ADDRESS: ++ const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL); ++ reg2stack(FrameMap::scr1_opr, dest, c->type(), false); ++ case T_INT: ++ case T_FLOAT: ++ if (c->as_jint_bits() == 0) ++ __ st_w(R0, frame_map()->address_for_slot(dest->single_stack_ix())); ++ else { ++ __ li(SCR2, c->as_jint_bits()); ++ __ st_w(SCR2, frame_map()->address_for_slot(dest->single_stack_ix())); ++ } ++ break; ++ case T_LONG: ++ case T_DOUBLE: ++ if (c->as_jlong_bits() == 0) ++ __ st_ptr(R0, frame_map()->address_for_slot(dest->double_stack_ix(), ++ lo_word_offset_in_bytes)); ++ else { ++ __ li(SCR2, (intptr_t)c->as_jlong_bits()); ++ __ st_ptr(SCR2, frame_map()->address_for_slot(dest->double_stack_ix(), ++ lo_word_offset_in_bytes)); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, ++ CodeEmitInfo* info, bool wide) { ++ assert(src->is_constant(), "should not call otherwise"); ++ LIR_Const* c = src->as_constant_ptr(); ++ LIR_Address* to_addr = dest->as_address_ptr(); ++ ++ void (Assembler::* insn)(Register Rt, Address adr); ++ ++ switch (type) { ++ case T_ADDRESS: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::st_d; ++ break; ++ case T_LONG: ++ assert(c->as_jlong() == 0, "should be"); ++ insn = &Assembler::st_d; ++ break; ++ case T_INT: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::st_w; ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ assert(c->as_jobject() == 0, "should be"); ++ if (UseCompressedOops && !wide) { ++ insn = &Assembler::st_w; ++ } else { ++ insn = &Assembler::st_d; ++ } ++ break; ++ case T_CHAR: ++ case T_SHORT: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::st_h; ++ break; ++ case T_BOOLEAN: ++ case T_BYTE: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::st_b; ++ break; ++ default: ++ ShouldNotReachHere(); ++ insn = &Assembler::st_d; // unreachable ++ } ++ ++ if (info) add_debug_info_for_null_check_here(info); ++ (_masm->*insn)(R0, as_Address(to_addr)); ++} ++ ++void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) { ++ assert(src->is_register(), "should not call otherwise"); ++ assert(dest->is_register(), "should not call otherwise"); ++ ++ // move between cpu-registers ++ if (dest->is_single_cpu()) { ++ if (src->type() == T_LONG) { ++ // Can do LONG -> OBJECT ++ move_regs(src->as_register_lo(), dest->as_register()); ++ return; ++ } ++ assert(src->is_single_cpu(), "must match"); ++ if (src->type() == T_OBJECT) { ++ __ verify_oop(src->as_register()); ++ } ++ move_regs(src->as_register(), dest->as_register()); ++ } else if (dest->is_double_cpu()) { ++ if (is_reference_type(src->type())) { ++ // Surprising to me but we can see move of a long to t_object ++ __ verify_oop(src->as_register()); ++ move_regs(src->as_register(), dest->as_register_lo()); ++ return; ++ } ++ assert(src->is_double_cpu(), "must match"); ++ Register f_lo = src->as_register_lo(); ++ Register f_hi = src->as_register_hi(); ++ Register t_lo = dest->as_register_lo(); ++ Register t_hi = dest->as_register_hi(); ++ assert(f_hi == f_lo, "must be same"); ++ assert(t_hi == t_lo, "must be same"); ++ move_regs(f_lo, t_lo); ++ } else if (dest->is_single_fpu()) { ++ __ fmov_s(dest->as_float_reg(), src->as_float_reg()); ++ } else if (dest->is_double_fpu()) { ++ __ fmov_d(dest->as_double_reg(), src->as_double_reg()); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) { ++ precond(src->is_register() && dest->is_stack()); ++ ++ uint const c_sz32 = sizeof(uint32_t); ++ uint const c_sz64 = sizeof(uint64_t); ++ ++ if (src->is_single_cpu()) { ++ int index = dest->single_stack_ix(); ++ if (is_reference_type(type)) { ++ __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64)); ++ __ verify_oop(src->as_register()); ++ } else if (type == T_METADATA || type == T_DOUBLE || type == T_ADDRESS) { ++ __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64)); ++ } else { ++ __ st_w(src->as_register(), stack_slot_address(index, c_sz32)); ++ } ++ } else if (src->is_double_cpu()) { ++ int index = dest->double_stack_ix(); ++ Address dest_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes); ++ __ st_ptr(src->as_register_lo(), dest_addr_LO); ++ } else if (src->is_single_fpu()) { ++ int index = dest->single_stack_ix(); ++ __ fst_s(src->as_float_reg(), stack_slot_address(index, c_sz32)); ++ } else if (src->is_double_fpu()) { ++ int index = dest->double_stack_ix(); ++ __ fst_d(src->as_double_reg(), stack_slot_address(index, c_sz64)); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, ++ CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) { ++ LIR_Address* to_addr = dest->as_address_ptr(); ++ PatchingStub* patch = NULL; ++ Register compressed_src = SCR2; ++ ++ if (patch_code != lir_patch_none) { ++ deoptimize_trap(info); ++ return; ++ } ++ ++ if (is_reference_type(type)) { ++ __ verify_oop(src->as_register()); ++ ++ if (UseCompressedOops && !wide) { ++ __ encode_heap_oop(compressed_src, src->as_register()); ++ } else { ++ compressed_src = src->as_register(); ++ } ++ } ++ ++ int null_check_here = code_offset(); ++ switch (type) { ++ case T_FLOAT: ++ __ fst_s(src->as_float_reg(), as_Address(to_addr)); ++ break; ++ case T_DOUBLE: ++ __ fst_d(src->as_double_reg(), as_Address(to_addr)); ++ break; ++ case T_ARRAY: // fall through ++ case T_OBJECT: // fall through ++ if (UseCompressedOops && !wide) { ++ __ st_w(compressed_src, as_Address(to_addr)); ++ } else { ++ __ st_ptr(compressed_src, as_Address(to_addr)); ++ } ++ break; ++ case T_METADATA: ++ // We get here to store a method pointer to the stack to pass to ++ // a dtrace runtime call. This can't work on 64 bit with ++ // compressed klass ptrs: T_METADATA can be a compressed klass ++ // ptr or a 64 bit method pointer. ++ ShouldNotReachHere(); ++ __ st_ptr(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_ADDRESS: ++ __ st_ptr(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_INT: ++ __ st_w(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_LONG: ++ __ st_ptr(src->as_register_lo(), as_Address_lo(to_addr)); ++ break; ++ case T_BYTE: // fall through ++ case T_BOOLEAN: ++ __ st_b(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_CHAR: // fall through ++ case T_SHORT: ++ __ st_h(src->as_register(), as_Address(to_addr)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ if (info != NULL) { ++ add_debug_info_for_null_check(null_check_here, info); ++ } ++} ++ ++void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { ++ precond(src->is_stack() && dest->is_register()); ++ ++ uint const c_sz32 = sizeof(uint32_t); ++ uint const c_sz64 = sizeof(uint64_t); ++ ++ if (dest->is_single_cpu()) { ++ int index = src->single_stack_ix(); ++ if (is_reference_type(type)) { ++ __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64)); ++ __ verify_oop(dest->as_register()); ++ } else if (type == T_METADATA || type == T_ADDRESS) { ++ __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64)); ++ } else { ++ __ ld_w(dest->as_register(), stack_slot_address(index, c_sz32)); ++ } ++ } else if (dest->is_double_cpu()) { ++ int index = src->double_stack_ix(); ++ Address src_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes); ++ __ ld_ptr(dest->as_register_lo(), src_addr_LO); ++ } else if (dest->is_single_fpu()) { ++ int index = src->single_stack_ix(); ++ __ fld_s(dest->as_float_reg(), stack_slot_address(index, c_sz32)); ++ } else if (dest->is_double_fpu()) { ++ int index = src->double_stack_ix(); ++ __ fld_d(dest->as_double_reg(), stack_slot_address(index, c_sz64)); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) { ++ address target = NULL; ++ ++ switch (patching_id(info)) { ++ case PatchingStub::access_field_id: ++ target = Runtime1::entry_for(Runtime1::access_field_patching_id); ++ break; ++ case PatchingStub::load_klass_id: ++ target = Runtime1::entry_for(Runtime1::load_klass_patching_id); ++ break; ++ case PatchingStub::load_mirror_id: ++ target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); ++ break; ++ case PatchingStub::load_appendix_id: ++ target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ call(target, relocInfo::runtime_call_type); ++ add_call_info_here(info); ++} ++ ++void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { ++ LIR_Opr temp; ++ ++ if (type == T_LONG || type == T_DOUBLE) ++ temp = FrameMap::scr1_long_opr; ++ else ++ temp = FrameMap::scr1_opr; ++ ++ stack2reg(src, temp, src->type()); ++ reg2stack(temp, dest, dest->type(), false); ++} ++ ++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, ++ CodeEmitInfo* info, bool wide, bool /* unaligned */) { ++ LIR_Address* addr = src->as_address_ptr(); ++ LIR_Address* from_addr = src->as_address_ptr(); ++ ++ if (addr->base()->type() == T_OBJECT) { ++ __ verify_oop(addr->base()->as_pointer_register()); ++ } ++ ++ if (patch_code != lir_patch_none) { ++ deoptimize_trap(info); ++ return; ++ } ++ ++ if (info != NULL) { ++ add_debug_info_for_null_check_here(info); ++ } ++ int null_check_here = code_offset(); ++ switch (type) { ++ case T_FLOAT: ++ __ fld_s(dest->as_float_reg(), as_Address(from_addr)); ++ break; ++ case T_DOUBLE: ++ __ fld_d(dest->as_double_reg(), as_Address(from_addr)); ++ break; ++ case T_ARRAY: // fall through ++ case T_OBJECT: // fall through ++ if (UseCompressedOops && !wide) { ++ __ ld_wu(dest->as_register(), as_Address(from_addr)); ++ } else { ++ __ ld_ptr(dest->as_register(), as_Address(from_addr)); ++ } ++ break; ++ case T_METADATA: ++ // We get here to store a method pointer to the stack to pass to ++ // a dtrace runtime call. This can't work on 64 bit with ++ // compressed klass ptrs: T_METADATA can be a compressed klass ++ // ptr or a 64 bit method pointer. ++ ShouldNotReachHere(); ++ __ ld_ptr(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_ADDRESS: ++ __ ld_ptr(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_INT: ++ __ ld_w(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_LONG: ++ __ ld_ptr(dest->as_register_lo(), as_Address_lo(from_addr)); ++ break; ++ case T_BYTE: ++ __ ld_b(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_BOOLEAN: ++ __ ld_bu(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_CHAR: ++ __ ld_hu(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_SHORT: ++ __ ld_h(dest->as_register(), as_Address(from_addr)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ if (is_reference_type(type)) { ++ if (UseCompressedOops && !wide) { ++ __ decode_heap_oop(dest->as_register()); ++ } ++ ++ if (!UseZGC) { ++ // Load barrier has not yet been applied, so ZGC can't verify the oop here ++ __ verify_oop(dest->as_register()); ++ } ++ } ++} ++ ++int LIR_Assembler::array_element_size(BasicType type) const { ++ int elem_size = type2aelembytes(type); ++ return exact_log2(elem_size); ++} ++ ++void LIR_Assembler::emit_op3(LIR_Op3* op) { ++ switch (op->code()) { ++ case lir_idiv: ++ case lir_irem: ++ arithmetic_idiv(op->code(), op->in_opr1(), op->in_opr2(), op->in_opr3(), ++ op->result_opr(), op->info()); ++ break; ++ case lir_fmad: ++ __ fmadd_d(op->result_opr()->as_double_reg(), op->in_opr1()->as_double_reg(), ++ op->in_opr2()->as_double_reg(), op->in_opr3()->as_double_reg()); ++ break; ++ case lir_fmaf: ++ __ fmadd_s(op->result_opr()->as_float_reg(), op->in_opr1()->as_float_reg(), ++ op->in_opr2()->as_float_reg(), op->in_opr3()->as_float_reg()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++} ++ ++void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { ++#ifdef ASSERT ++ assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label"); ++ if (op->block() != NULL) _branch_target_blocks.append(op->block()); ++#endif ++ ++ if (op->cond() == lir_cond_always) { ++ if (op->info() != NULL) ++ add_debug_info_for_branch(op->info()); ++ ++ __ b_far(*(op->label())); ++ } else { ++ emit_cmp_branch(op); ++ } ++} ++ ++void LIR_Assembler::emit_cmp_branch(LIR_OpBranch* op) { ++#ifdef ASSERT ++ if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock()); ++#endif ++ ++ if (op->info() != NULL) { ++ assert(op->in_opr1()->is_address() || op->in_opr2()->is_address(), ++ "shouldn't be codeemitinfo for non-address operands"); ++ add_debug_info_for_null_check_here(op->info()); // exception possible ++ } ++ ++ Label& L = *(op->label()); ++ Assembler::Condition acond; ++ LIR_Opr opr1 = op->in_opr1(); ++ LIR_Opr opr2 = op->in_opr2(); ++ assert(op->condition() != lir_cond_always, "must be"); ++ ++ if (op->code() == lir_cond_float_branch) { ++ bool is_unordered = (op->ublock() == op->block()); ++ if (opr1->is_single_fpu()) { ++ FloatRegister reg1 = opr1->as_float_reg(); ++ assert(opr2->is_single_fpu(), "expect single float register"); ++ FloatRegister reg2 = opr2->as_float_reg(); ++ switch(op->condition()) { ++ case lir_cond_equal: ++ if (is_unordered) ++ __ fcmp_cueq_s(FCC0, reg1, reg2); ++ else ++ __ fcmp_ceq_s(FCC0, reg1, reg2); ++ break; ++ case lir_cond_notEqual: ++ if (is_unordered) ++ __ fcmp_cune_s(FCC0, reg1, reg2); ++ else ++ __ fcmp_cne_s(FCC0, reg1, reg2); ++ break; ++ case lir_cond_less: ++ if (is_unordered) ++ __ fcmp_cult_s(FCC0, reg1, reg2); ++ else ++ __ fcmp_clt_s(FCC0, reg1, reg2); ++ break; ++ case lir_cond_lessEqual: ++ if (is_unordered) ++ __ fcmp_cule_s(FCC0, reg1, reg2); ++ else ++ __ fcmp_cle_s(FCC0, reg1, reg2); ++ break; ++ case lir_cond_greaterEqual: ++ if (is_unordered) ++ __ fcmp_cule_s(FCC0, reg2, reg1); ++ else ++ __ fcmp_cle_s(FCC0, reg2, reg1); ++ break; ++ case lir_cond_greater: ++ if (is_unordered) ++ __ fcmp_cult_s(FCC0, reg2, reg1); ++ else ++ __ fcmp_clt_s(FCC0, reg2, reg1); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (opr1->is_double_fpu()) { ++ FloatRegister reg1 = opr1->as_double_reg(); ++ assert(opr2->is_double_fpu(), "expect double float register"); ++ FloatRegister reg2 = opr2->as_double_reg(); ++ switch(op->condition()) { ++ case lir_cond_equal: ++ if (is_unordered) ++ __ fcmp_cueq_d(FCC0, reg1, reg2); ++ else ++ __ fcmp_ceq_d(FCC0, reg1, reg2); ++ break; ++ case lir_cond_notEqual: ++ if (is_unordered) ++ __ fcmp_cune_d(FCC0, reg1, reg2); ++ else ++ __ fcmp_cne_d(FCC0, reg1, reg2); ++ break; ++ case lir_cond_less: ++ if (is_unordered) ++ __ fcmp_cult_d(FCC0, reg1, reg2); ++ else ++ __ fcmp_clt_d(FCC0, reg1, reg2); ++ break; ++ case lir_cond_lessEqual: ++ if (is_unordered) ++ __ fcmp_cule_d(FCC0, reg1, reg2); ++ else ++ __ fcmp_cle_d(FCC0, reg1, reg2); ++ break; ++ case lir_cond_greaterEqual: ++ if (is_unordered) ++ __ fcmp_cule_d(FCC0, reg2, reg1); ++ else ++ __ fcmp_cle_d(FCC0, reg2, reg1); ++ break; ++ case lir_cond_greater: ++ if (is_unordered) ++ __ fcmp_cult_d(FCC0, reg2, reg1); ++ else ++ __ fcmp_clt_d(FCC0, reg2, reg1); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ __ bcnez(FCC0, L); ++ } else { ++ if (opr1->is_constant() && opr2->is_single_cpu()) { ++ // tableswitch ++ Unimplemented(); ++ } else if (opr1->is_single_cpu() || opr1->is_double_cpu()) { ++ Register reg1 = as_reg(opr1); ++ Register reg2 = noreg; ++ jlong imm2 = 0; ++ if (opr2->is_single_cpu()) { ++ // cpu register - cpu register ++ reg2 = opr2->as_register(); ++ } else if (opr2->is_double_cpu()) { ++ // cpu register - cpu register ++ reg2 = opr2->as_register_lo(); ++ } else if (opr2->is_constant()) { ++ switch(opr2->type()) { ++ case T_INT: ++ case T_ADDRESS: ++ imm2 = opr2->as_constant_ptr()->as_jint(); ++ break; ++ case T_LONG: ++ imm2 = opr2->as_constant_ptr()->as_jlong(); ++ break; ++ case T_METADATA: ++ imm2 = (intptr_t)opr2->as_constant_ptr()->as_metadata(); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (opr2->as_constant_ptr()->as_jobject() != NULL) { ++ reg2 = SCR1; ++ jobject2reg(opr2->as_constant_ptr()->as_jobject(), reg2); ++ } else { ++ reg2 = R0; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ if (reg2 == noreg) { ++ if (imm2 == 0) { ++ reg2 = R0; ++ } else { ++ reg2 = SCR1; ++ __ li(reg2, imm2); ++ } ++ } ++ switch (op->condition()) { ++ case lir_cond_equal: ++ __ beq_far(reg1, reg2, L); break; ++ case lir_cond_notEqual: ++ __ bne_far(reg1, reg2, L); break; ++ case lir_cond_less: ++ __ blt_far(reg1, reg2, L, true); break; ++ case lir_cond_lessEqual: ++ __ bge_far(reg2, reg1, L, true); break; ++ case lir_cond_greaterEqual: ++ __ bge_far(reg1, reg2, L, true); break; ++ case lir_cond_greater: ++ __ blt_far(reg2, reg1, L, true); break; ++ case lir_cond_belowEqual: ++ __ bge_far(reg2, reg1, L, false); break; ++ case lir_cond_aboveEqual: ++ __ bge_far(reg1, reg2, L, false); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ } ++} ++ ++void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { ++ LIR_Opr src = op->in_opr(); ++ LIR_Opr dest = op->result_opr(); ++ LIR_Opr tmp = op->tmp(); ++ ++ switch (op->bytecode()) { ++ case Bytecodes::_i2f: ++ __ movgr2fr_w(dest->as_float_reg(), src->as_register()); ++ __ ffint_s_w(dest->as_float_reg(), dest->as_float_reg()); ++ break; ++ case Bytecodes::_i2d: ++ __ movgr2fr_w(dest->as_double_reg(), src->as_register()); ++ __ ffint_d_w(dest->as_double_reg(), dest->as_double_reg()); ++ break; ++ case Bytecodes::_l2d: ++ __ movgr2fr_d(dest->as_double_reg(), src->as_register_lo()); ++ __ ffint_d_l(dest->as_double_reg(), dest->as_double_reg()); ++ break; ++ case Bytecodes::_l2f: ++ __ movgr2fr_d(dest->as_float_reg(), src->as_register_lo()); ++ __ ffint_s_l(dest->as_float_reg(), dest->as_float_reg()); ++ break; ++ case Bytecodes::_f2d: ++ __ fcvt_d_s(dest->as_double_reg(), src->as_float_reg()); ++ break; ++ case Bytecodes::_d2f: ++ __ fcvt_s_d(dest->as_float_reg(), src->as_double_reg()); ++ break; ++ case Bytecodes::_i2c: ++ __ bstrpick_w(dest->as_register(), src->as_register(), 15, 0); ++ break; ++ case Bytecodes::_i2l: ++ _masm->block_comment("FIXME: This could be a no-op"); ++ __ slli_w(dest->as_register_lo(), src->as_register(), 0); ++ break; ++ case Bytecodes::_i2s: ++ __ ext_w_h(dest->as_register(), src->as_register()); ++ break; ++ case Bytecodes::_i2b: ++ __ ext_w_b(dest->as_register(), src->as_register()); ++ break; ++ case Bytecodes::_l2i: ++ __ slli_w(dest->as_register(), src->as_register_lo(), 0); ++ break; ++ case Bytecodes::_d2l: ++ __ ftintrz_l_d(tmp->as_double_reg(), src->as_double_reg()); ++ __ movfr2gr_d(dest->as_register_lo(), tmp->as_double_reg()); ++ break; ++ case Bytecodes::_f2i: ++ __ ftintrz_w_s(tmp->as_float_reg(), src->as_float_reg()); ++ __ movfr2gr_s(dest->as_register(), tmp->as_float_reg()); ++ break; ++ case Bytecodes::_f2l: ++ __ ftintrz_l_s(tmp->as_float_reg(), src->as_float_reg()); ++ __ movfr2gr_d(dest->as_register_lo(), tmp->as_float_reg()); ++ break; ++ case Bytecodes::_d2i: ++ __ ftintrz_w_d(tmp->as_double_reg(), src->as_double_reg()); ++ __ movfr2gr_s(dest->as_register(), tmp->as_double_reg()); ++ break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { ++ if (op->init_check()) { ++ __ ld_bu(SCR1, Address(op->klass()->as_register(), InstanceKlass::init_state_offset())); ++ __ li(SCR2, InstanceKlass::fully_initialized); ++ add_debug_info_for_null_check_here(op->stub()->info()); ++ __ bne_far(SCR1, SCR2, *op->stub()->entry()); ++ } ++ __ allocate_object(op->obj()->as_register(), op->tmp1()->as_register(), ++ op->tmp2()->as_register(), op->header_size(), ++ op->object_size(), op->klass()->as_register(), ++ *op->stub()->entry()); ++ __ bind(*op->stub()->continuation()); ++} ++ ++void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { ++ Register len = op->len()->as_register(); ++ if (UseSlowPath || ++ (!UseFastNewObjectArray && is_reference_type(op->type())) || ++ (!UseFastNewTypeArray && !is_reference_type(op->type()))) { ++ __ b(*op->stub()->entry()); ++ } else { ++ Register tmp1 = op->tmp1()->as_register(); ++ Register tmp2 = op->tmp2()->as_register(); ++ Register tmp3 = op->tmp3()->as_register(); ++ if (len == tmp1) { ++ tmp1 = tmp3; ++ } else if (len == tmp2) { ++ tmp2 = tmp3; ++ } else if (len == tmp3) { ++ // everything is ok ++ } else { ++ __ move(tmp3, len); ++ } ++ __ allocate_array(op->obj()->as_register(), len, tmp1, tmp2, ++ arrayOopDesc::header_size(op->type()), ++ array_element_size(op->type()), ++ op->klass()->as_register(), ++ *op->stub()->entry()); ++ } ++ __ bind(*op->stub()->continuation()); ++} ++ ++void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, ++ Register recv, Label* update_done) { ++ for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { ++ Label next_test; ++ // See if the receiver is receiver[n]. ++ __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); ++ __ ld_ptr(SCR1, Address(SCR2)); ++ __ bne(recv, SCR1, next_test); ++ Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); ++ __ ld_ptr(SCR2, data_addr); ++ __ addi_d(SCR2, SCR2, DataLayout::counter_increment); ++ __ st_ptr(SCR2, data_addr); ++ __ b(*update_done); ++ __ bind(next_test); ++ } ++ ++ // Didn't find receiver; find next empty slot and fill it in ++ for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { ++ Label next_test; ++ __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); ++ Address recv_addr(SCR2); ++ __ ld_ptr(SCR1, recv_addr); ++ __ bnez(SCR1, next_test); ++ __ st_ptr(recv, recv_addr); ++ __ li(SCR1, DataLayout::counter_increment); ++ __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)))); ++ __ st_ptr(SCR1, Address(SCR2)); ++ __ b(*update_done); ++ __ bind(next_test); ++ } ++} ++ ++void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, ++ Label* failure, Label* obj_is_null) { ++ // we always need a stub for the failure case. ++ CodeStub* stub = op->stub(); ++ Register obj = op->object()->as_register(); ++ Register k_RInfo = op->tmp1()->as_register(); ++ Register klass_RInfo = op->tmp2()->as_register(); ++ Register dst = op->result_opr()->as_register(); ++ ciKlass* k = op->klass(); ++ Register Rtmp1 = noreg; ++ ++ // check if it needs to be profiled ++ ciMethodData* md; ++ ciProfileData* data; ++ ++ const bool should_profile = op->should_profile(); ++ ++ if (should_profile) { ++ ciMethod* method = op->profiled_method(); ++ assert(method != NULL, "Should have method"); ++ int bci = op->profiled_bci(); ++ md = method->method_data_or_null(); ++ assert(md != NULL, "Sanity"); ++ data = md->bci_to_data(bci); ++ assert(data != NULL, "need data for type check"); ++ assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); ++ } ++ ++ Label profile_cast_success, profile_cast_failure; ++ Label *success_target = should_profile ? &profile_cast_success : success; ++ Label *failure_target = should_profile ? &profile_cast_failure : failure; ++ ++ if (obj == k_RInfo) { ++ k_RInfo = dst; ++ } else if (obj == klass_RInfo) { ++ klass_RInfo = dst; ++ } ++ if (k->is_loaded() && !UseCompressedClassPointers) { ++ select_different_registers(obj, dst, k_RInfo, klass_RInfo); ++ } else { ++ Rtmp1 = op->tmp3()->as_register(); ++ select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1); ++ } ++ ++ assert_different_registers(obj, k_RInfo, klass_RInfo); ++ ++ if (should_profile) { ++ Label not_null; ++ __ bnez(obj, not_null); ++ // Object is null; update MDO and exit ++ Register mdo = klass_RInfo; ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); ++ __ ld_bu(SCR2, data_addr); ++ __ ori(SCR2, SCR2, BitData::null_seen_byte_constant()); ++ __ st_b(SCR2, data_addr); ++ __ b(*obj_is_null); ++ __ bind(not_null); ++ } else { ++ __ beqz(obj, *obj_is_null); ++ } ++ ++ if (!k->is_loaded()) { ++ klass2reg_with_patching(k_RInfo, op->info_for_patch()); ++ } else { ++ __ mov_metadata(k_RInfo, k->constant_encoding()); ++ } ++ __ verify_oop(obj); ++ ++ if (op->fast_check()) { ++ // get object class ++ // not a safepoint as obj null check happens earlier ++ __ load_klass(SCR2, obj); ++ __ bne_far(SCR2, k_RInfo, *failure_target); ++ // successful cast, fall through to profile or jump ++ } else { ++ // get object class ++ // not a safepoint as obj null check happens earlier ++ __ load_klass(klass_RInfo, obj); ++ if (k->is_loaded()) { ++ // See if we get an immediate positive hit ++ __ ld_ptr(SCR1, Address(klass_RInfo, int64_t(k->super_check_offset()))); ++ if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) { ++ __ bne_far(k_RInfo, SCR1, *failure_target); ++ // successful cast, fall through to profile or jump ++ } else { ++ // See if we get an immediate positive hit ++ __ beq_far(k_RInfo, SCR1, *success_target); ++ // check for self ++ __ beq_far(klass_RInfo, k_RInfo, *success_target); ++ ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); ++ __ ld_ptr(klass_RInfo, Address(SP, 0 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ // result is a boolean ++ __ beqz(klass_RInfo, *failure_target); ++ // successful cast, fall through to profile or jump ++ } ++ } else { ++ // perform the fast part of the checking logic ++ __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); ++ // call out-of-line instance of __ check_klass_subtype_slow_path(...): ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); ++ __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ // result is a boolean ++ __ beqz(k_RInfo, *failure_target); ++ // successful cast, fall through to profile or jump ++ } ++ } ++ if (should_profile) { ++ Register mdo = klass_RInfo, recv = k_RInfo; ++ __ bind(profile_cast_success); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ __ load_klass(recv, obj); ++ Label update_done; ++ type_profile_helper(mdo, md, data, recv, success); ++ __ b(*success); ++ ++ __ bind(profile_cast_failure); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address counter_addr = Address(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); ++ __ ld_ptr(SCR2, counter_addr); ++ __ addi_d(SCR2, SCR2, -DataLayout::counter_increment); ++ __ st_ptr(SCR2, counter_addr); ++ __ b(*failure); ++ } ++ __ b(*success); ++} ++ ++void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { ++ const bool should_profile = op->should_profile(); ++ ++ LIR_Code code = op->code(); ++ if (code == lir_store_check) { ++ Register value = op->object()->as_register(); ++ Register array = op->array()->as_register(); ++ Register k_RInfo = op->tmp1()->as_register(); ++ Register klass_RInfo = op->tmp2()->as_register(); ++ Register Rtmp1 = op->tmp3()->as_register(); ++ CodeStub* stub = op->stub(); ++ ++ // check if it needs to be profiled ++ ciMethodData* md; ++ ciProfileData* data; ++ ++ if (should_profile) { ++ ciMethod* method = op->profiled_method(); ++ assert(method != NULL, "Should have method"); ++ int bci = op->profiled_bci(); ++ md = method->method_data_or_null(); ++ assert(md != NULL, "Sanity"); ++ data = md->bci_to_data(bci); ++ assert(data != NULL, "need data for type check"); ++ assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); ++ } ++ Label profile_cast_success, profile_cast_failure, done; ++ Label *success_target = should_profile ? &profile_cast_success : &done; ++ Label *failure_target = should_profile ? &profile_cast_failure : stub->entry(); ++ ++ if (should_profile) { ++ Label not_null; ++ __ bnez(value, not_null); ++ // Object is null; update MDO and exit ++ Register mdo = klass_RInfo; ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); ++ __ ld_bu(SCR2, data_addr); ++ __ ori(SCR2, SCR2, BitData::null_seen_byte_constant()); ++ __ st_b(SCR2, data_addr); ++ __ b(done); ++ __ bind(not_null); ++ } else { ++ __ beqz(value, done); ++ } ++ ++ add_debug_info_for_null_check_here(op->info_for_exception()); ++ __ load_klass(k_RInfo, array); ++ __ load_klass(klass_RInfo, value); ++ ++ // get instance klass (it's already uncompressed) ++ __ ld_ptr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset())); ++ // perform the fast part of the checking logic ++ __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); ++ // call out-of-line instance of __ check_klass_subtype_slow_path(...): ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); ++ __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ // result is a boolean ++ __ beqz(k_RInfo, *failure_target); ++ // fall through to the success case ++ ++ if (should_profile) { ++ Register mdo = klass_RInfo, recv = k_RInfo; ++ __ bind(profile_cast_success); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ __ load_klass(recv, value); ++ Label update_done; ++ type_profile_helper(mdo, md, data, recv, &done); ++ __ b(done); ++ ++ __ bind(profile_cast_failure); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); ++ __ lea(SCR2, counter_addr); ++ __ ld_ptr(SCR1, Address(SCR2)); ++ __ addi_d(SCR1, SCR1, -DataLayout::counter_increment); ++ __ st_ptr(SCR1, Address(SCR2)); ++ __ b(*stub->entry()); ++ } ++ ++ __ bind(done); ++ } else if (code == lir_checkcast) { ++ Register obj = op->object()->as_register(); ++ Register dst = op->result_opr()->as_register(); ++ Label success; ++ emit_typecheck_helper(op, &success, op->stub()->entry(), &success); ++ __ bind(success); ++ if (dst != obj) { ++ __ move(dst, obj); ++ } ++ } else if (code == lir_instanceof) { ++ Register obj = op->object()->as_register(); ++ Register dst = op->result_opr()->as_register(); ++ Label success, failure, done; ++ emit_typecheck_helper(op, &success, &failure, &failure); ++ __ bind(failure); ++ __ move(dst, R0); ++ __ b(done); ++ __ bind(success); ++ __ li(dst, 1); ++ __ bind(done); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::casw(Register addr, Register newval, Register cmpval, bool sign) { ++ __ cmpxchg32(Address(addr, 0), cmpval, newval, SCR1, sign, ++ /* retold */ false, /* barrier */ true, /* weak */ false, /* exchage */ false); ++} ++ ++void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) { ++ __ cmpxchg(Address(addr, 0), cmpval, newval, SCR1, ++ /* retold */ false, /* barrier */ true, /* weak */ false, /* exchage */ false); ++} ++ ++void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { ++ assert(VM_Version::supports_cx8(), "wrong machine"); ++ Register addr; ++ if (op->addr()->is_register()) { ++ addr = as_reg(op->addr()); ++ } else { ++ assert(op->addr()->is_address(), "what else?"); ++ LIR_Address* addr_ptr = op->addr()->as_address_ptr(); ++ assert(addr_ptr->disp() == 0, "need 0 disp"); ++ assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index"); ++ addr = as_reg(addr_ptr->base()); ++ } ++ Register newval = as_reg(op->new_value()); ++ Register cmpval = as_reg(op->cmp_value()); ++ ++ if (op->code() == lir_cas_obj) { ++ if (UseCompressedOops) { ++ Register t1 = op->tmp1()->as_register(); ++ assert(op->tmp1()->is_valid(), "must be"); ++ __ encode_heap_oop(t1, cmpval); ++ cmpval = t1; ++ __ encode_heap_oop(SCR2, newval); ++ newval = SCR2; ++ casw(addr, newval, cmpval, false); ++ } else { ++ casl(addr, newval, cmpval); ++ } ++ } else if (op->code() == lir_cas_int) { ++ casw(addr, newval, cmpval, true); ++ } else { ++ casl(addr, newval, cmpval); ++ } ++} ++ ++void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type, ++ LIR_Opr left, LIR_Opr right) { ++ assert(result->is_single_cpu() || result->is_double_cpu(), "expect single register for result"); ++ assert(left->is_single_cpu() || left->is_double_cpu(), "must be"); ++ Register regd = (result->type() == T_LONG) ? result->as_register_lo() : result->as_register(); ++ Register regl = as_reg(left); ++ Register regr = noreg; ++ Register reg1 = noreg; ++ Register reg2 = noreg; ++ jlong immr = 0; ++ ++ // comparison operands ++ if (right->is_single_cpu()) { ++ // cpu register - cpu register ++ regr = right->as_register(); ++ } else if (right->is_double_cpu()) { ++ // cpu register - cpu register ++ regr = right->as_register_lo(); ++ } else if (right->is_constant()) { ++ switch(right->type()) { ++ case T_INT: ++ case T_ADDRESS: ++ immr = right->as_constant_ptr()->as_jint(); ++ break; ++ case T_LONG: ++ immr = right->as_constant_ptr()->as_jlong(); ++ break; ++ case T_METADATA: ++ immr = (intptr_t)right->as_constant_ptr()->as_metadata(); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (right->as_constant_ptr()->as_jobject() != NULL) { ++ regr = SCR1; ++ jobject2reg(right->as_constant_ptr()->as_jobject(), regr); ++ } else { ++ immr = 0; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ ++ if (regr == noreg) { ++ switch (condition) { ++ case lir_cond_equal: ++ case lir_cond_notEqual: ++ if (!Assembler::is_simm(-immr, 12)) { ++ regr = SCR1; ++ __ li(regr, immr); ++ } ++ break; ++ default: ++ if (!Assembler::is_simm(immr, 12)) { ++ regr = SCR1; ++ __ li(regr, immr); ++ } ++ } ++ } ++ ++ // special cases ++ if (src1->is_constant() && src2->is_constant()) { ++ jlong val1 = 0, val2 = 0; ++ if (src1->type() == T_INT && src2->type() == T_INT) { ++ val1 = src1->as_jint(); ++ val2 = src2->as_jint(); ++ } else if (src1->type() == T_LONG && src2->type() == T_LONG) { ++ val1 = src1->as_jlong(); ++ val2 = src2->as_jlong(); ++ } ++ if (val1 == 0 && val2 == 1) { ++ if (regr == noreg) { ++ switch (condition) { ++ case lir_cond_equal: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ } else { ++ __ addi_d(SCR1, regl, -immr); ++ __ li(regd, 1); ++ __ maskeqz(regd, regd, SCR1); ++ } ++ break; ++ case lir_cond_notEqual: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ __ xori(regd, regd, 1); ++ } else { ++ __ addi_d(SCR1, regl, -immr); ++ __ li(regd, 1); ++ __ masknez(regd, regd, SCR1); ++ } ++ break; ++ case lir_cond_less: ++ __ slti(regd, regl, immr); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_lessEqual: ++ if (immr == 0) { ++ __ slt(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ slt(regd, SCR1, regl); ++ } ++ break; ++ case lir_cond_greater: ++ if (immr == 0) { ++ __ slt(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ slt(regd, SCR1, regl); ++ } ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_greaterEqual: ++ __ slti(regd, regl, immr); ++ break; ++ case lir_cond_belowEqual: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ sltu(regd, SCR1, regl); ++ } ++ break; ++ case lir_cond_aboveEqual: ++ __ sltui(regd, regl, immr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ switch (condition) { ++ case lir_cond_equal: ++ __ sub_d(SCR1, regl, regr); ++ __ li(regd, 1); ++ __ maskeqz(regd, regd, SCR1); ++ break; ++ case lir_cond_notEqual: ++ __ sub_d(SCR1, regl, regr); ++ __ li(regd, 1); ++ __ masknez(regd, regd, SCR1); ++ break; ++ case lir_cond_less: ++ __ slt(regd, regl, regr); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_lessEqual: ++ __ slt(regd, regr, regl); ++ break; ++ case lir_cond_greater: ++ __ slt(regd, regr, regl); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_greaterEqual: ++ __ slt(regd, regl, regr); ++ break; ++ case lir_cond_belowEqual: ++ __ sltu(regd, regr, regl); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltu(regd, regl, regr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ return; ++ } else if (val1 == 1 && val2 == 0) { ++ if (regr == noreg) { ++ switch (condition) { ++ case lir_cond_equal: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ __ xori(regd, regd, 1); ++ } else { ++ __ addi_d(SCR1, regl, -immr); ++ __ li(regd, 1); ++ __ masknez(regd, regd, SCR1); ++ } ++ break; ++ case lir_cond_notEqual: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ } else { ++ __ addi_d(SCR1, regl, -immr); ++ __ li(regd, 1); ++ __ maskeqz(regd, regd, SCR1); ++ } ++ break; ++ case lir_cond_less: ++ __ slti(regd, regl, immr); ++ break; ++ case lir_cond_lessEqual: ++ if (immr == 0) { ++ __ slt(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ slt(regd, SCR1, regl); ++ } ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_greater: ++ if (immr == 0) { ++ __ slt(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ slt(regd, SCR1, regl); ++ } ++ break; ++ case lir_cond_greaterEqual: ++ __ slti(regd, regl, immr); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_belowEqual: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ sltu(regd, SCR1, regl); ++ } ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltui(regd, regl, immr); ++ __ xori(regd, regd, 1); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ switch (condition) { ++ case lir_cond_equal: ++ __ sub_d(SCR1, regl, regr); ++ __ li(regd, 1); ++ __ masknez(regd, regd, SCR1); ++ break; ++ case lir_cond_notEqual: ++ __ sub_d(SCR1, regl, regr); ++ __ li(regd, 1); ++ __ maskeqz(regd, regd, SCR1); ++ break; ++ case lir_cond_less: ++ __ slt(regd, regl, regr); ++ break; ++ case lir_cond_lessEqual: ++ __ slt(regd, regr, regl); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_greater: ++ __ slt(regd, regr, regl); ++ break; ++ case lir_cond_greaterEqual: ++ __ slt(regd, regl, regr); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_belowEqual: ++ __ sltu(regd, regr, regl); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltu(regd, regl, regr); ++ __ xori(regd, regd, 1); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ return; ++ } ++ } ++ ++ // cmp ++ if (regr == noreg) { ++ switch (condition) { ++ case lir_cond_equal: ++ __ addi_d(SCR2, regl, -immr); ++ break; ++ case lir_cond_notEqual: ++ __ addi_d(SCR2, regl, -immr); ++ break; ++ case lir_cond_less: ++ __ slti(SCR2, regl, immr); ++ break; ++ case lir_cond_lessEqual: ++ __ li(SCR1, immr); ++ __ slt(SCR2, SCR1, regl); ++ break; ++ case lir_cond_greater: ++ __ li(SCR1, immr); ++ __ slt(SCR2, SCR1, regl); ++ break; ++ case lir_cond_greaterEqual: ++ __ slti(SCR2, regl, immr); ++ break; ++ case lir_cond_belowEqual: ++ __ li(SCR1, immr); ++ __ sltu(SCR2, SCR1, regl); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltui(SCR2, regl, immr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ switch (condition) { ++ case lir_cond_equal: ++ __ sub_d(SCR2, regl, regr); ++ break; ++ case lir_cond_notEqual: ++ __ sub_d(SCR2, regl, regr); ++ break; ++ case lir_cond_less: ++ __ slt(SCR2, regl, regr); ++ break; ++ case lir_cond_lessEqual: ++ __ slt(SCR2, regr, regl); ++ break; ++ case lir_cond_greater: ++ __ slt(SCR2, regr, regl); ++ break; ++ case lir_cond_greaterEqual: ++ __ slt(SCR2, regl, regr); ++ break; ++ case lir_cond_belowEqual: ++ __ sltu(SCR2, regr, regl); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltu(SCR2, regl, regr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ ++ // value operands ++ if (src1->is_stack()) { ++ stack2reg(src1, result, result->type()); ++ reg1 = regd; ++ } else if (src1->is_constant()) { ++ const2reg(src1, result, lir_patch_none, NULL); ++ reg1 = regd; ++ } else { ++ reg1 = (src1->type() == T_LONG) ? src1->as_register_lo() : src1->as_register(); ++ } ++ ++ if (src2->is_stack()) { ++ stack2reg(src2, FrameMap::scr1_opr, result->type()); ++ reg2 = SCR1; ++ } else if (src2->is_constant()) { ++ LIR_Opr tmp = src2->type() == T_LONG ? FrameMap::scr1_long_opr : FrameMap::scr1_opr; ++ const2reg(src2, tmp, lir_patch_none, NULL); ++ reg2 = SCR1; ++ } else { ++ reg2 = (src2->type() == T_LONG) ? src2->as_register_lo() : src2->as_register(); ++ } ++ ++ // cmove ++ switch (condition) { ++ case lir_cond_equal: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_notEqual: ++ __ maskeqz(regd, reg1, SCR2); ++ __ masknez(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_less: ++ __ maskeqz(regd, reg1, SCR2); ++ __ masknez(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_lessEqual: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_greater: ++ __ maskeqz(regd, reg1, SCR2); ++ __ masknez(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_greaterEqual: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_belowEqual: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_aboveEqual: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ __ OR(regd, regd, SCR2); ++} ++ ++void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, ++ CodeEmitInfo* info, bool pop_fpu_stack) { ++ assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); ++ ++ if (left->is_single_cpu()) { ++ Register lreg = left->as_register(); ++ Register dreg = as_reg(dest); ++ ++ if (right->is_single_cpu()) { ++ // cpu register - cpu register ++ assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT, "should be"); ++ Register rreg = right->as_register(); ++ switch (code) { ++ case lir_add: __ add_w (dest->as_register(), lreg, rreg); break; ++ case lir_sub: __ sub_w (dest->as_register(), lreg, rreg); break; ++ case lir_mul: __ mul_w (dest->as_register(), lreg, rreg); break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (right->is_double_cpu()) { ++ Register rreg = right->as_register_lo(); ++ // single_cpu + double_cpu: can happen with obj+long ++ assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); ++ switch (code) { ++ case lir_add: __ add_d(dreg, lreg, rreg); break; ++ case lir_sub: __ sub_d(dreg, lreg, rreg); break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (right->is_constant()) { ++ // cpu register - constant ++ jlong c; ++ ++ // FIXME: This is fugly: we really need to factor all this logic. ++ switch(right->type()) { ++ case T_LONG: ++ c = right->as_constant_ptr()->as_jlong(); ++ break; ++ case T_INT: ++ case T_ADDRESS: ++ c = right->as_constant_ptr()->as_jint(); ++ break; ++ default: ++ ShouldNotReachHere(); ++ c = 0; // unreachable ++ break; ++ } ++ ++ assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); ++ if (c == 0 && dreg == lreg) { ++ COMMENT("effective nop elided"); ++ return; ++ } ++ ++ switch(left->type()) { ++ case T_INT: ++ switch (code) { ++ case lir_add: __ addi_w(dreg, lreg, c); break; ++ case lir_sub: __ addi_w(dreg, lreg, -c); break; ++ default: ShouldNotReachHere(); ++ } ++ break; ++ case T_OBJECT: ++ case T_ADDRESS: ++ switch (code) { ++ case lir_add: __ addi_d(dreg, lreg, c); break; ++ case lir_sub: __ addi_d(dreg, lreg, -c); break; ++ default: ShouldNotReachHere(); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (left->is_double_cpu()) { ++ Register lreg_lo = left->as_register_lo(); ++ ++ if (right->is_double_cpu()) { ++ // cpu register - cpu register ++ Register rreg_lo = right->as_register_lo(); ++ switch (code) { ++ case lir_add: __ add_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ case lir_sub: __ sub_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ case lir_mul: __ mul_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ case lir_div: __ div_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ case lir_rem: __ mod_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ } else if (right->is_constant()) { ++ jlong c = right->as_constant_ptr()->as_jlong(); ++ Register dreg = as_reg(dest); ++ switch (code) { ++ case lir_add: ++ case lir_sub: ++ if (c == 0 && dreg == lreg_lo) { ++ COMMENT("effective nop elided"); ++ return; ++ } ++ code == lir_add ? __ addi_d(dreg, lreg_lo, c) : __ addi_d(dreg, lreg_lo, -c); ++ break; ++ case lir_div: ++ assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); ++ if (c == 1) { ++ // move lreg_lo to dreg if divisor is 1 ++ __ move(dreg, lreg_lo); ++ } else { ++ unsigned int shift = log2i_exact(c); ++ // use scr1 as intermediate result register ++ __ srai_d(SCR1, lreg_lo, 63); ++ __ srli_d(SCR1, SCR1, 64 - shift); ++ __ add_d(SCR1, lreg_lo, SCR1); ++ __ srai_d(dreg, SCR1, shift); ++ } ++ break; ++ case lir_rem: ++ assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); ++ if (c == 1) { ++ // move 0 to dreg if divisor is 1 ++ __ move(dreg, R0); ++ } else { ++ // use scr1/2 as intermediate result register ++ __ sub_d(SCR1, R0, lreg_lo); ++ __ slt(SCR2, SCR1, R0); ++ __ andi(dreg, lreg_lo, c - 1); ++ __ andi(SCR1, SCR1, c - 1); ++ __ sub_d(SCR1, R0, SCR1); ++ __ maskeqz(dreg, dreg, SCR2); ++ __ masknez(SCR1, SCR1, SCR2); ++ __ OR(dreg, dreg, SCR1); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (left->is_single_fpu()) { ++ assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register"); ++ switch (code) { ++ case lir_add: __ fadd_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_sub: __ fsub_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_mul: __ fmul_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_div: __ fdiv_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (left->is_double_fpu()) { ++ if (right->is_double_fpu()) { ++ // fpu register - fpu register ++ switch (code) { ++ case lir_add: __ fadd_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_sub: __ fsub_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_mul: __ fmul_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_div: __ fdiv_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ default: ShouldNotReachHere(); ++ } ++ } else { ++ if (right->is_constant()) { ++ ShouldNotReachHere(); ++ } ++ ShouldNotReachHere(); ++ } ++ } else if (left->is_single_stack() || left->is_address()) { ++ assert(left == dest, "left and dest must be equal"); ++ ShouldNotReachHere(); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, ++ int dest_index, bool pop_fpu_stack) { ++ Unimplemented(); ++} ++ ++void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) { ++ switch(code) { ++ case lir_abs : __ fabs_d(dest->as_double_reg(), value->as_double_reg()); break; ++ case lir_sqrt: __ fsqrt_d(dest->as_double_reg(), value->as_double_reg()); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) { ++ assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register"); ++ Register Rleft = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); ++ ++ if (dst->is_single_cpu()) { ++ Register Rdst = dst->as_register(); ++ if (right->is_constant()) { ++ switch (code) { ++ case lir_logic_and: ++ if (Assembler::is_uimm(right->as_jint(), 12)) { ++ __ andi(Rdst, Rleft, right->as_jint()); ++ } else { ++ __ li(AT, right->as_jint()); ++ __ AND(Rdst, Rleft, AT); ++ } ++ break; ++ case lir_logic_or: __ ori(Rdst, Rleft, right->as_jint()); break; ++ case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jint()); break; ++ default: ShouldNotReachHere(); break; ++ } ++ } else { ++ Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo(); ++ switch (code) { ++ case lir_logic_and: __ AND(Rdst, Rleft, Rright); break; ++ case lir_logic_or: __ OR(Rdst, Rleft, Rright); break; ++ case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break; ++ default: ShouldNotReachHere(); break; ++ } ++ } ++ } else { ++ Register Rdst = dst->as_register_lo(); ++ if (right->is_constant()) { ++ switch (code) { ++ case lir_logic_and: ++ if (Assembler::is_uimm(right->as_jlong(), 12)) { ++ __ andi(Rdst, Rleft, right->as_jlong()); ++ } else { ++ // We can guarantee that transform from HIR LogicOp is in range of ++ // uimm(12), but the common code directly generates LIR LogicAnd, ++ // and the right-operand is mask with all ones in the high bits. ++ __ li(AT, right->as_jlong()); ++ __ AND(Rdst, Rleft, AT); ++ } ++ break; ++ case lir_logic_or: __ ori(Rdst, Rleft, right->as_jlong()); break; ++ case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jlong()); break; ++ default: ShouldNotReachHere(); break; ++ } ++ } else { ++ Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo(); ++ switch (code) { ++ case lir_logic_and: __ AND(Rdst, Rleft, Rright); break; ++ case lir_logic_or: __ OR(Rdst, Rleft, Rright); break; ++ case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break; ++ default: ShouldNotReachHere(); break; ++ } ++ } ++ } ++} ++ ++void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, ++ LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) { ++ // opcode check ++ assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem"); ++ bool is_irem = (code == lir_irem); ++ ++ // operand check ++ assert(left->is_single_cpu(), "left must be register"); ++ assert(right->is_single_cpu() || right->is_constant(), "right must be register or constant"); ++ assert(result->is_single_cpu(), "result must be register"); ++ Register lreg = left->as_register(); ++ Register dreg = result->as_register(); ++ ++ // power-of-2 constant check and codegen ++ if (right->is_constant()) { ++ int c = right->as_constant_ptr()->as_jint(); ++ assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); ++ if (is_irem) { ++ if (c == 1) { ++ // move 0 to dreg if divisor is 1 ++ __ move(dreg, R0); ++ } else { ++ // use scr1/2 as intermediate result register ++ __ sub_w(SCR1, R0, lreg); ++ __ slt(SCR2, SCR1, R0); ++ __ andi(dreg, lreg, c - 1); ++ __ andi(SCR1, SCR1, c - 1); ++ __ sub_w(SCR1, R0, SCR1); ++ __ maskeqz(dreg, dreg, SCR2); ++ __ masknez(SCR1, SCR1, SCR2); ++ __ OR(dreg, dreg, SCR1); ++ } ++ } else { ++ if (c == 1) { ++ // move lreg to dreg if divisor is 1 ++ __ move(dreg, lreg); ++ } else { ++ unsigned int shift = exact_log2(c); ++ // use scr1 as intermediate result register ++ __ srai_w(SCR1, lreg, 31); ++ __ srli_w(SCR1, SCR1, 32 - shift); ++ __ add_w(SCR1, lreg, SCR1); ++ __ srai_w(dreg, SCR1, shift); ++ } ++ } ++ } else { ++ Register rreg = right->as_register(); ++ if (is_irem) ++ __ mod_w(dreg, lreg, rreg); ++ else ++ __ div_w(dreg, lreg, rreg); ++ } ++} ++ ++void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) { ++ Unimplemented(); ++} ++ ++void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){ ++ if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) { ++ bool is_unordered_less = (code == lir_ucmp_fd2i); ++ if (left->is_single_fpu()) { ++ if (is_unordered_less) { ++ __ fcmp_clt_s(FCC0, right->as_float_reg(), left->as_float_reg()); ++ __ fcmp_cult_s(FCC1, left->as_float_reg(), right->as_float_reg()); ++ } else { ++ __ fcmp_cult_s(FCC0, right->as_float_reg(), left->as_float_reg()); ++ __ fcmp_clt_s(FCC1, left->as_float_reg(), right->as_float_reg()); ++ } ++ } else if (left->is_double_fpu()) { ++ if (is_unordered_less) { ++ __ fcmp_clt_d(FCC0, right->as_double_reg(), left->as_double_reg()); ++ __ fcmp_cult_d(FCC1, left->as_double_reg(), right->as_double_reg()); ++ } else { ++ __ fcmp_cult_d(FCC0, right->as_double_reg(), left->as_double_reg()); ++ __ fcmp_clt_d(FCC1, left->as_double_reg(), right->as_double_reg()); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ __ movcf2gr(dst->as_register(), FCC0); ++ __ movcf2gr(SCR1, FCC1); ++ __ sub_d(dst->as_register(), dst->as_register(), SCR1); ++ } else if (code == lir_cmp_l2i) { ++ __ slt(SCR1, left->as_register_lo(), right->as_register_lo()); ++ __ slt(dst->as_register(), right->as_register_lo(), left->as_register_lo()); ++ __ sub_d(dst->as_register(), dst->as_register(), SCR1); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::align_call(LIR_Code code) {} ++ ++void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { ++ address call = __ trampoline_call(AddressLiteral(op->addr(), rtype)); ++ if (call == NULL) { ++ bailout("trampoline stub overflow"); ++ return; ++ } ++ add_call_info(code_offset(), op->info()); ++} ++ ++void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { ++ address call = __ ic_call(op->addr()); ++ if (call == NULL) { ++ bailout("trampoline stub overflow"); ++ return; ++ } ++ add_call_info(code_offset(), op->info()); ++} ++ ++void LIR_Assembler::emit_static_call_stub() { ++ address call_pc = __ pc(); ++ address stub = __ start_a_stub(call_stub_size()); ++ if (stub == NULL) { ++ bailout("static call stub overflow"); ++ return; ++ } ++ ++ int start = __ offset(); ++ ++ __ relocate(static_stub_Relocation::spec(call_pc)); ++ ++ // Code stream for loading method may be changed. ++ __ ibar(0); ++ ++ // Rmethod contains Method*, it should be relocated for GC ++ // static stub relocation also tags the Method* in the code-stream. ++ __ mov_metadata(Rmethod, NULL); ++ // This is recognized as unresolved by relocs/nativeInst/ic code ++ __ patchable_jump(__ pc()); ++ ++ assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() <= call_stub_size(), ++ "stub too big"); ++ __ end_a_stub(); ++} ++ ++void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) { ++ assert(exceptionOop->as_register() == A0, "must match"); ++ assert(exceptionPC->as_register() == A1, "must match"); ++ ++ // exception object is not added to oop map by LinearScan ++ // (LinearScan assumes that no oops are in fixed registers) ++ info->add_register_oop(exceptionOop); ++ Runtime1::StubID unwind_id; ++ ++ // get current pc information ++ // pc is only needed if the method has an exception handler, the unwind code does not need it. ++ if (compilation()->debug_info_recorder()->last_pc_offset() == __ offset()) { ++ // As no instructions have been generated yet for this LIR node it's ++ // possible that an oop map already exists for the current offset. ++ // In that case insert an dummy NOP here to ensure all oop map PCs ++ // are unique. See JDK-8237483. ++ __ nop(); ++ } ++ Label L; ++ int pc_for_athrow_offset = __ offset(); ++ __ bind(L); ++ __ lipc(exceptionPC->as_register(), L); ++ add_call_info(pc_for_athrow_offset, info); // for exception handler ++ ++ __ verify_not_null_oop(A0); ++ // search an exception handler (A0: exception oop, A1: throwing pc) ++ if (compilation()->has_fpu_code()) { ++ unwind_id = Runtime1::handle_exception_id; ++ } else { ++ unwind_id = Runtime1::handle_exception_nofpu_id; ++ } ++ __ call(Runtime1::entry_for(unwind_id), relocInfo::runtime_call_type); ++ ++ // FIXME: enough room for two byte trap ???? ++ __ nop(); ++} ++ ++void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) { ++ assert(exceptionOop->as_register() == A0, "must match"); ++ __ b(_unwind_handler_entry); ++} ++ ++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) { ++ Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); ++ Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); ++ ++ switch (left->type()) { ++ case T_INT: { ++ switch (code) { ++ case lir_shl: __ sll_w(dreg, lreg, count->as_register()); break; ++ case lir_shr: __ sra_w(dreg, lreg, count->as_register()); break; ++ case lir_ushr: __ srl_w(dreg, lreg, count->as_register()); break; ++ default: ShouldNotReachHere(); break; ++ } ++ break; ++ case T_LONG: ++ case T_ADDRESS: ++ case T_OBJECT: ++ switch (code) { ++ case lir_shl: __ sll_d(dreg, lreg, count->as_register()); break; ++ case lir_shr: __ sra_d(dreg, lreg, count->as_register()); break; ++ case lir_ushr: __ srl_d(dreg, lreg, count->as_register()); break; ++ default: ShouldNotReachHere(); break; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++} ++ ++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) { ++ Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); ++ Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); ++ ++ switch (left->type()) { ++ case T_INT: { ++ switch (code) { ++ case lir_shl: __ slli_w(dreg, lreg, count); break; ++ case lir_shr: __ srai_w(dreg, lreg, count); break; ++ case lir_ushr: __ srli_w(dreg, lreg, count); break; ++ default: ShouldNotReachHere(); break; ++ } ++ break; ++ case T_LONG: ++ case T_ADDRESS: ++ case T_OBJECT: ++ switch (code) { ++ case lir_shl: __ slli_d(dreg, lreg, count); break; ++ case lir_shr: __ srai_d(dreg, lreg, count); break; ++ case lir_ushr: __ srli_d(dreg, lreg, count); break; ++ default: ShouldNotReachHere(); break; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++} ++ ++void LIR_Assembler::store_parameter(Register r, int offset_from_sp_in_words) { ++ assert(offset_from_sp_in_words >= 0, "invalid offset from sp"); ++ int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord; ++ assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); ++ __ st_ptr(r, Address(SP, offset_from_sp_in_bytes)); ++} ++ ++void LIR_Assembler::store_parameter(jint c, int offset_from_sp_in_words) { ++ assert(offset_from_sp_in_words >= 0, "invalid offset from sp"); ++ int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord; ++ assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); ++ __ li(SCR2, c); ++ __ st_ptr(SCR2, Address(SP, offset_from_sp_in_bytes)); ++} ++ ++void LIR_Assembler::store_parameter(jobject o, int offset_from_sp_in_words) { ++ ShouldNotReachHere(); ++} ++ ++// This code replaces a call to arraycopy; no exception may ++// be thrown in this code, they must be thrown in the System.arraycopy ++// activation frame; we could save some checks if this would not be the case ++void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { ++ Register j_rarg0 = T0; ++ Register j_rarg1 = A0; ++ Register j_rarg2 = A1; ++ Register j_rarg3 = A2; ++ Register j_rarg4 = A3; ++ ++ ciArrayKlass* default_type = op->expected_type(); ++ Register src = op->src()->as_register(); ++ Register dst = op->dst()->as_register(); ++ Register src_pos = op->src_pos()->as_register(); ++ Register dst_pos = op->dst_pos()->as_register(); ++ Register length = op->length()->as_register(); ++ Register tmp = op->tmp()->as_register(); ++ ++ CodeStub* stub = op->stub(); ++ int flags = op->flags(); ++ BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; ++ if (is_reference_type(basic_type)) ++ basic_type = T_OBJECT; ++ ++ // if we don't know anything, just go through the generic arraycopy ++ if (default_type == NULL) { ++ Label done; ++ assert(src == T0 && src_pos == A0, "mismatch in calling convention"); ++ ++ // Save the arguments in case the generic arraycopy fails and we ++ // have to fall back to the JNI stub ++ __ st_ptr(dst, Address(SP, 0 * BytesPerWord)); ++ __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); ++ __ st_ptr(length, Address(SP, 2 * BytesPerWord)); ++ __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord)); ++ __ st_ptr(src, Address(SP, 4 * BytesPerWord)); ++ ++ address copyfunc_addr = StubRoutines::generic_arraycopy(); ++ assert(copyfunc_addr != NULL, "generic arraycopy stub required"); ++ ++ // The arguments are in java calling convention so we shift them ++ // to C convention ++ assert_different_registers(A4, j_rarg0, j_rarg1, j_rarg2, j_rarg3); ++ __ move(A4, j_rarg4); ++ assert_different_registers(A3, j_rarg0, j_rarg1, j_rarg2); ++ __ move(A3, j_rarg3); ++ assert_different_registers(A2, j_rarg0, j_rarg1); ++ __ move(A2, j_rarg2); ++ assert_different_registers(A1, j_rarg0); ++ __ move(A1, j_rarg1); ++ __ move(A0, j_rarg0); ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ __ li(SCR2, (address)&Runtime1::_generic_arraycopystub_cnt); ++ __ increment(SCR2, 1); ++ } ++#endif ++ __ call(copyfunc_addr, relocInfo::runtime_call_type); ++ ++ __ beqz(A0, *stub->continuation()); ++ __ move(tmp, A0); ++ ++ // Reload values from the stack so they are where the stub ++ // expects them. ++ __ ld_ptr(dst, Address(SP, 0 * BytesPerWord)); ++ __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); ++ __ ld_ptr(length, Address(SP, 2 * BytesPerWord)); ++ __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord)); ++ __ ld_ptr(src, Address(SP, 4 * BytesPerWord)); ++ ++ // tmp is -1^K where K == partial copied count ++ __ nor(SCR1, tmp, R0); ++ // adjust length down and src/end pos up by partial copied count ++ __ sub_w(length, length, SCR1); ++ __ add_w(src_pos, src_pos, SCR1); ++ __ add_w(dst_pos, dst_pos, SCR1); ++ __ b(*stub->entry()); ++ ++ __ bind(*stub->continuation()); ++ return; ++ } ++ ++ assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), ++ "must be true at this point"); ++ ++ int elem_size = type2aelembytes(basic_type); ++ Address::ScaleFactor scale = Address::times(elem_size); ++ ++ Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes()); ++ Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes()); ++ Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes()); ++ Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes()); ++ ++ // test for NULL ++ if (flags & LIR_OpArrayCopy::src_null_check) { ++ __ beqz(src, *stub->entry()); ++ } ++ if (flags & LIR_OpArrayCopy::dst_null_check) { ++ __ beqz(dst, *stub->entry()); ++ } ++ ++ // If the compiler was not able to prove that exact type of the source or the destination ++ // of the arraycopy is an array type, check at runtime if the source or the destination is ++ // an instance type. ++ if (flags & LIR_OpArrayCopy::type_check) { ++ if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) { ++ __ load_klass(tmp, dst); ++ __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset()))); ++ __ li(SCR2, Klass::_lh_neutral_value); ++ __ bge_far(SCR1, SCR2, *stub->entry(), true); ++ } ++ ++ if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) { ++ __ load_klass(tmp, src); ++ __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset()))); ++ __ li(SCR2, Klass::_lh_neutral_value); ++ __ bge_far(SCR1, SCR2, *stub->entry(), true); ++ } ++ } ++ ++ // check if negative ++ if (flags & LIR_OpArrayCopy::src_pos_positive_check) { ++ __ blt_far(src_pos, R0, *stub->entry(), true); ++ } ++ if (flags & LIR_OpArrayCopy::dst_pos_positive_check) { ++ __ blt_far(dst_pos, R0, *stub->entry(), true); ++ } ++ ++ if (flags & LIR_OpArrayCopy::length_positive_check) { ++ __ blt_far(length, R0, *stub->entry(), true); ++ } ++ ++ if (flags & LIR_OpArrayCopy::src_range_check) { ++ __ add_w(tmp, src_pos, length); ++ __ ld_wu(SCR1, src_length_addr); ++ __ blt_far(SCR1, tmp, *stub->entry(), false); ++ } ++ if (flags & LIR_OpArrayCopy::dst_range_check) { ++ __ add_w(tmp, dst_pos, length); ++ __ ld_wu(SCR1, dst_length_addr); ++ __ blt_far(SCR1, tmp, *stub->entry(), false); ++ } ++ ++ if (flags & LIR_OpArrayCopy::type_check) { ++ // We don't know the array types are compatible ++ if (basic_type != T_OBJECT) { ++ // Simple test for basic type arrays ++ if (UseCompressedClassPointers) { ++ __ ld_wu(tmp, src_klass_addr); ++ __ ld_wu(SCR1, dst_klass_addr); ++ } else { ++ __ ld_ptr(tmp, src_klass_addr); ++ __ ld_ptr(SCR1, dst_klass_addr); ++ } ++ __ bne_far(tmp, SCR1, *stub->entry()); ++ } else { ++ // For object arrays, if src is a sub class of dst then we can ++ // safely do the copy. ++ Label cont, slow; ++ ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(dst, Address(SP, 0 * wordSize)); ++ __ st_ptr(src, Address(SP, 1 * wordSize)); ++ ++ __ load_klass(src, src); ++ __ load_klass(dst, dst); ++ ++ __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL); ++ ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(dst, Address(SP, 0 * wordSize)); ++ __ st_ptr(src, Address(SP, 1 * wordSize)); ++ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); ++ __ ld_ptr(dst, Address(SP, 0 * wordSize)); ++ __ ld_ptr(src, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ ++ __ bnez(dst, cont); ++ ++ __ bind(slow); ++ __ ld_ptr(dst, Address(SP, 0 * wordSize)); ++ __ ld_ptr(src, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ ++ address copyfunc_addr = StubRoutines::checkcast_arraycopy(); ++ if (copyfunc_addr != NULL) { // use stub if available ++ // src is not a sub class of dst so we have to do a ++ // per-element check. ++ ++ int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray; ++ if ((flags & mask) != mask) { ++ // Check that at least both of them object arrays. ++ assert(flags & mask, "one of the two should be known to be an object array"); ++ ++ if (!(flags & LIR_OpArrayCopy::src_objarray)) { ++ __ load_klass(tmp, src); ++ } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { ++ __ load_klass(tmp, dst); ++ } ++ int lh_offset = in_bytes(Klass::layout_helper_offset()); ++ Address klass_lh_addr(tmp, lh_offset); ++ jint objArray_lh = Klass::array_layout_helper(T_OBJECT); ++ __ ld_w(SCR1, klass_lh_addr); ++ __ li(SCR2, objArray_lh); ++ __ XOR(SCR1, SCR1, SCR2); ++ __ bnez(SCR1, *stub->entry()); ++ } ++ ++ // Spill because stubs can use any register they like and it's ++ // easier to restore just those that we care about. ++ __ st_ptr(dst, Address(SP, 0 * BytesPerWord)); ++ __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); ++ __ st_ptr(length, Address(SP, 2 * BytesPerWord)); ++ __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord)); ++ __ st_ptr(src, Address(SP, 4 * BytesPerWord)); ++ ++ __ lea(A0, Address(src, src_pos, scale)); ++ __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type)); ++ assert_different_registers(A0, dst, dst_pos, length); ++ __ load_klass(A4, dst); ++ assert_different_registers(A4, dst, dst_pos, length); ++ __ lea(A1, Address(dst, dst_pos, scale)); ++ __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type)); ++ assert_different_registers(A1, length); ++ __ bstrpick_d(A2, length, 31, 0); ++ __ ld_ptr(A4, Address(A4, ObjArrayKlass::element_klass_offset())); ++ __ ld_w(A3, Address(A4, Klass::super_check_offset_offset())); ++ __ call(copyfunc_addr, relocInfo::runtime_call_type); ++ ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ Label failed; ++ __ bnez(A0, failed); ++ __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_cnt); ++ __ increment(SCR2, 1); ++ __ bind(failed); ++ } ++#endif ++ ++ __ beqz(A0, *stub->continuation()); ++ ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_attempt_cnt); ++ __ increment(SCR2, 1); ++ } ++#endif ++ assert_different_registers(dst, dst_pos, length, src_pos, src, tmp, SCR1); ++ __ move(tmp, A0); ++ ++ // Restore previously spilled arguments ++ __ ld_ptr(dst, Address(SP, 0 * BytesPerWord)); ++ __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); ++ __ ld_ptr(length, Address(SP, 2 * BytesPerWord)); ++ __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord)); ++ __ ld_ptr(src, Address(SP, 4 * BytesPerWord)); ++ ++ // return value is -1^K where K is partial copied count ++ __ nor(SCR1, tmp, R0); ++ // adjust length down and src/end pos up by partial copied count ++ __ sub_w(length, length, SCR1); ++ __ add_w(src_pos, src_pos, SCR1); ++ __ add_w(dst_pos, dst_pos, SCR1); ++ } ++ ++ __ b(*stub->entry()); ++ ++ __ bind(cont); ++ __ ld_ptr(dst, Address(SP, 0 * wordSize)); ++ __ ld_ptr(src, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ } ++ } ++ ++#ifdef ASSERT ++ if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { ++ // Sanity check the known type with the incoming class. For the ++ // primitive case the types must match exactly with src.klass and ++ // dst.klass each exactly matching the default type. For the ++ // object array case, if no type check is needed then either the ++ // dst type is exactly the expected type and the src type is a ++ // subtype which we can't check or src is the same array as dst ++ // but not necessarily exactly of type default_type. ++ Label known_ok, halt; ++ __ mov_metadata(tmp, default_type->constant_encoding()); ++ if (UseCompressedClassPointers) { ++ __ encode_klass_not_null(tmp); ++ } ++ ++ if (basic_type != T_OBJECT) { ++ ++ if (UseCompressedClassPointers) { ++ __ ld_wu(SCR1, dst_klass_addr); ++ } else { ++ __ ld_ptr(SCR1, dst_klass_addr); ++ } ++ __ bne(tmp, SCR1, halt); ++ if (UseCompressedClassPointers) { ++ __ ld_wu(SCR1, src_klass_addr); ++ } else { ++ __ ld_ptr(SCR1, src_klass_addr); ++ } ++ __ beq(tmp, SCR1, known_ok); ++ } else { ++ if (UseCompressedClassPointers) { ++ __ ld_wu(SCR1, dst_klass_addr); ++ } else { ++ __ ld_ptr(SCR1, dst_klass_addr); ++ } ++ __ beq(tmp, SCR1, known_ok); ++ __ beq(src, dst, known_ok); ++ } ++ __ bind(halt); ++ __ stop("incorrect type information in arraycopy"); ++ __ bind(known_ok); ++ } ++#endif ++ ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ __ li(SCR2, Runtime1::arraycopy_count_address(basic_type)); ++ __ increment(SCR2, 1); ++ } ++#endif ++ ++ __ lea(A0, Address(src, src_pos, scale)); ++ __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type)); ++ assert_different_registers(A0, dst, dst_pos, length); ++ __ lea(A1, Address(dst, dst_pos, scale)); ++ __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type)); ++ assert_different_registers(A1, length); ++ __ bstrpick_d(A2, length, 31, 0); ++ ++ bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0; ++ bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0; ++ const char *name; ++ address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false); ++ ++ CodeBlob *cb = CodeCache::find_blob(entry); ++ if (cb) { ++ __ call(entry, relocInfo::runtime_call_type); ++ } else { ++ __ call_VM_leaf(entry, 3); ++ } ++ ++ __ bind(*stub->continuation()); ++} ++ ++void LIR_Assembler::emit_lock(LIR_OpLock* op) { ++ Register obj = op->obj_opr()->as_register(); // may not be an oop ++ Register hdr = op->hdr_opr()->as_register(); ++ Register lock = op->lock_opr()->as_register(); ++ if (!UseFastLocking) { ++ __ b(*op->stub()->entry()); ++ } else if (op->code() == lir_lock) { ++ Register scratch = noreg; ++ if (UseBiasedLocking) { ++ scratch = op->scratch_opr()->as_register(); ++ } ++ assert(BasicLock::displaced_header_offset_in_bytes() == 0, ++ "lock_reg must point to the displaced header"); ++ // add debug info for NullPointerException only if one is possible ++ int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); ++ if (op->info() != NULL) { ++ add_debug_info_for_null_check(null_check_offset, op->info()); ++ } ++ // done ++ } else if (op->code() == lir_unlock) { ++ assert(BasicLock::displaced_header_offset_in_bytes() == 0, ++ "lock_reg must point to the displaced header"); ++ __ unlock_object(hdr, obj, lock, *op->stub()->entry()); ++ } else { ++ Unimplemented(); ++ } ++ __ bind(*op->stub()->continuation()); ++} ++ ++void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { ++ Register obj = op->obj()->as_pointer_register(); ++ Register result = op->result_opr()->as_pointer_register(); ++ ++ CodeEmitInfo* info = op->info(); ++ if (info != NULL) { ++ add_debug_info_for_null_check_here(info); ++ } ++ ++ if (UseCompressedClassPointers) { ++ __ ld_wu(result, obj, oopDesc::klass_offset_in_bytes()); ++ __ decode_klass_not_null(result); ++ } else { ++ __ ld_ptr(result, obj, oopDesc::klass_offset_in_bytes()); ++ } ++} ++ ++void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { ++ ciMethod* method = op->profiled_method(); ++ ciMethod* callee = op->profiled_callee(); ++ int bci = op->profiled_bci(); ++ ++ // Update counter for all call types ++ ciMethodData* md = method->method_data_or_null(); ++ assert(md != NULL, "Sanity"); ++ ciProfileData* data = md->bci_to_data(bci); ++ assert(data != NULL && data->is_CounterData(), "need CounterData for calls"); ++ assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); ++ Register mdo = op->mdo()->as_register(); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); ++ // Perform additional virtual call profiling for invokevirtual and ++ // invokeinterface bytecodes ++ if (op->should_profile_receiver_type()) { ++ assert(op->recv()->is_single_cpu(), "recv must be allocated"); ++ Register recv = op->recv()->as_register(); ++ assert_different_registers(mdo, recv); ++ assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); ++ ciKlass* known_klass = op->known_holder(); ++ if (C1OptimizeVirtualCallProfiling && known_klass != NULL) { ++ // We know the type that will be seen at this call site; we can ++ // statically update the MethodData* rather than needing to do ++ // dynamic tests on the receiver type ++ ++ // NOTE: we should probably put a lock around this search to ++ // avoid collisions by concurrent compilations ++ ciVirtualCallData* vc_data = (ciVirtualCallData*) data; ++ uint i; ++ for (i = 0; i < VirtualCallData::row_limit(); i++) { ++ ciKlass* receiver = vc_data->receiver(i); ++ if (known_klass->equals(receiver)) { ++ Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); ++ __ ld_ptr(SCR2, data_addr); ++ __ addi_d(SCR2, SCR2, DataLayout::counter_increment); ++ __ st_ptr(SCR2, data_addr); ++ return; ++ } ++ } ++ ++ // Receiver type not found in profile data; select an empty slot ++ ++ // Note that this is less efficient than it should be because it ++ // always does a write to the receiver part of the ++ // VirtualCallData rather than just the first time ++ for (i = 0; i < VirtualCallData::row_limit(); i++) { ++ ciKlass* receiver = vc_data->receiver(i); ++ if (receiver == NULL) { ++ Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); ++ __ mov_metadata(SCR2, known_klass->constant_encoding()); ++ __ lea(SCR1, recv_addr); ++ __ st_ptr(SCR2, SCR1, 0); ++ Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); ++ __ ld_ptr(SCR2, data_addr); ++ __ addi_d(SCR2, SCR1, DataLayout::counter_increment); ++ __ st_ptr(SCR2, data_addr); ++ return; ++ } ++ } ++ } else { ++ __ load_klass(recv, recv); ++ Label update_done; ++ type_profile_helper(mdo, md, data, recv, &update_done); ++ // Receiver did not match any saved receiver and there is no empty row for it. ++ // Increment total counter to indicate polymorphic case. ++ __ ld_ptr(SCR2, counter_addr); ++ __ addi_d(SCR2, SCR2, DataLayout::counter_increment); ++ __ st_ptr(SCR2, counter_addr); ++ ++ __ bind(update_done); ++ } ++ } else { ++ // Static call ++ __ ld_ptr(SCR2, counter_addr); ++ __ addi_d(SCR2, SCR2, DataLayout::counter_increment); ++ __ st_ptr(SCR2, counter_addr); ++ } ++} ++ ++void LIR_Assembler::emit_delay(LIR_OpDelay*) { ++ Unimplemented(); ++} ++ ++void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) { ++ __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no)); ++} ++ ++void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { ++ assert(op->crc()->is_single_cpu(), "crc must be register"); ++ assert(op->val()->is_single_cpu(), "byte value must be register"); ++ assert(op->result_opr()->is_single_cpu(), "result must be register"); ++ Register crc = op->crc()->as_register(); ++ Register val = op->val()->as_register(); ++ Register res = op->result_opr()->as_register(); ++ ++ assert_different_registers(val, crc, res); ++ __ li(res, StubRoutines::crc_table_addr()); ++ __ nor(crc, crc, R0); // ~crc ++ __ update_byte_crc32(crc, val, res); ++ __ nor(res, crc, R0); // ~crc ++} ++ ++void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { ++ COMMENT("emit_profile_type {"); ++ Register obj = op->obj()->as_register(); ++ Register tmp = op->tmp()->as_pointer_register(); ++ Address mdo_addr = as_Address(op->mdp()->as_address_ptr()); ++ ciKlass* exact_klass = op->exact_klass(); ++ intptr_t current_klass = op->current_klass(); ++ bool not_null = op->not_null(); ++ bool no_conflict = op->no_conflict(); ++ ++ Label update, next, none; ++ ++ bool do_null = !not_null; ++ bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass; ++ bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set; ++ ++ assert(do_null || do_update, "why are we here?"); ++ assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); ++ assert(mdo_addr.base() != SCR1, "wrong register"); ++ ++ __ verify_oop(obj); ++ ++ if (tmp != obj) { ++ __ move(tmp, obj); ++ } ++ if (do_null) { ++ __ bnez(tmp, update); ++ if (!TypeEntries::was_null_seen(current_klass)) { ++ __ ld_ptr(SCR2, mdo_addr); ++ __ ori(SCR2, SCR2, TypeEntries::null_seen); ++ __ st_ptr(SCR2, mdo_addr); ++ } ++ if (do_update) { ++#ifndef ASSERT ++ __ b(next); ++ } ++#else ++ __ b(next); ++ } ++ } else { ++ __ bnez(tmp, update); ++ __ stop("unexpected null obj"); ++#endif ++ } ++ ++ __ bind(update); ++ ++ if (do_update) { ++#ifdef ASSERT ++ if (exact_klass != NULL) { ++ Label ok; ++ __ load_klass(tmp, tmp); ++ __ mov_metadata(SCR1, exact_klass->constant_encoding()); ++ __ XOR(SCR1, tmp, SCR1); ++ __ beqz(SCR1, ok); ++ __ stop("exact klass and actual klass differ"); ++ __ bind(ok); ++ } ++#endif ++ if (!no_conflict) { ++ if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) { ++ if (exact_klass != NULL) { ++ __ mov_metadata(tmp, exact_klass->constant_encoding()); ++ } else { ++ __ load_klass(tmp, tmp); ++ } ++ ++ __ ld_ptr(SCR2, mdo_addr); ++ __ XOR(tmp, tmp, SCR2); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ __ bstrpick_d(SCR1, tmp, 63, 2); ++ // klass seen before, nothing to do. The unknown bit may have been ++ // set already but no need to check. ++ __ beqz(SCR1, next); ++ ++ __ andi(SCR1, tmp, TypeEntries::type_unknown); ++ __ bnez(SCR1, next); // already unknown. Nothing to do anymore. ++ ++ if (TypeEntries::is_type_none(current_klass)) { ++ __ beqz(SCR2, none); ++ __ li(SCR1, (u1)TypeEntries::null_seen); ++ __ beq(SCR2, SCR1, none); ++ // There is a chance that the checks above (re-reading profiling ++ // data from memory) fail if another thread has just set the ++ // profiling to this obj's klass ++ membar_acquire(); ++ __ ld_ptr(SCR2, mdo_addr); ++ __ XOR(tmp, tmp, SCR2); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ __ bstrpick_d(SCR1, tmp, 63, 2); ++ __ beqz(SCR1, next); ++ } ++ } else { ++ assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && ++ ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only"); ++ ++ __ ld_ptr(tmp, mdo_addr); ++ __ andi(SCR2, tmp, TypeEntries::type_unknown); ++ __ bnez(SCR2, next); // already unknown. Nothing to do anymore. ++ } ++ ++ // different than before. Cannot keep accurate profile. ++ __ ld_ptr(SCR2, mdo_addr); ++ __ ori(SCR2, SCR2, TypeEntries::type_unknown); ++ __ st_ptr(SCR2, mdo_addr); ++ ++ if (TypeEntries::is_type_none(current_klass)) { ++ __ b(next); ++ ++ __ bind(none); ++ // first time here. Set profile type. ++ __ st_ptr(tmp, mdo_addr); ++ } ++ } else { ++ // There's a single possible klass at this profile point ++ assert(exact_klass != NULL, "should be"); ++ if (TypeEntries::is_type_none(current_klass)) { ++ __ mov_metadata(tmp, exact_klass->constant_encoding()); ++ __ ld_ptr(SCR2, mdo_addr); ++ __ XOR(tmp, tmp, SCR2); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ __ bstrpick_d(SCR1, tmp, 63, 2); ++ __ beqz(SCR1, next); ++#ifdef ASSERT ++ { ++ Label ok; ++ __ ld_ptr(SCR1, mdo_addr); ++ __ beqz(SCR1, ok); ++ __ li(SCR2, (u1)TypeEntries::null_seen); ++ __ beq(SCR1, SCR2, ok); ++ // may have been set by another thread ++ membar_acquire(); ++ __ mov_metadata(SCR1, exact_klass->constant_encoding()); ++ __ ld_ptr(SCR2, mdo_addr); ++ __ XOR(SCR2, SCR1, SCR2); ++ assert(TypeEntries::type_mask == -2, "must be"); ++ __ bstrpick_d(SCR2, SCR2, 63, 1); ++ __ beqz(SCR2, ok); ++ ++ __ stop("unexpected profiling mismatch"); ++ __ bind(ok); ++ } ++#endif ++ // first time here. Set profile type. ++ __ st_ptr(tmp, mdo_addr); ++ } else { ++ assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && ++ ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent"); ++ ++ __ ld_ptr(tmp, mdo_addr); ++ __ andi(SCR1, tmp, TypeEntries::type_unknown); ++ __ bnez(SCR1, next); // already unknown. Nothing to do anymore. ++ ++ __ ori(tmp, tmp, TypeEntries::type_unknown); ++ __ st_ptr(tmp, mdo_addr); ++ // FIXME: Write barrier needed here? ++ } ++ } ++ ++ __ bind(next); ++ } ++ COMMENT("} emit_profile_type"); ++} ++ ++void LIR_Assembler::align_backward_branch_target() {} ++ ++void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) { ++ // tmp must be unused ++ assert(tmp->is_illegal(), "wasting a register if tmp is allocated"); ++ ++ if (left->is_single_cpu()) { ++ assert(dest->is_single_cpu(), "expect single result reg"); ++ __ sub_w(dest->as_register(), R0, left->as_register()); ++ } else if (left->is_double_cpu()) { ++ assert(dest->is_double_cpu(), "expect double result reg"); ++ __ sub_d(dest->as_register_lo(), R0, left->as_register_lo()); ++ } else if (left->is_single_fpu()) { ++ assert(dest->is_single_fpu(), "expect single float result reg"); ++ __ fneg_s(dest->as_float_reg(), left->as_float_reg()); ++ } else { ++ assert(left->is_double_fpu(), "expect double float operand reg"); ++ assert(dest->is_double_fpu(), "expect double float result reg"); ++ __ fneg_d(dest->as_double_reg(), left->as_double_reg()); ++ } ++} ++ ++void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, ++ CodeEmitInfo* info) { ++ if (patch_code != lir_patch_none) { ++ deoptimize_trap(info); ++ return; ++ } ++ ++ __ lea(dest->as_register_lo(), as_Address(addr->as_address_ptr())); ++} ++ ++void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, ++ LIR_Opr tmp, CodeEmitInfo* info) { ++ assert(!tmp->is_valid(), "don't need temporary"); ++ __ call(dest, relocInfo::runtime_call_type); ++ if (info != NULL) { ++ add_call_info_here(info); ++ } ++} ++ ++void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, ++ CodeEmitInfo* info) { ++ if (dest->is_address() || src->is_address()) { ++ move_op(src, dest, type, lir_patch_none, info, ++ /*pop_fpu_stack*/false, /*unaligned*/false, /*wide*/false); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++#ifdef ASSERT ++// emit run-time assertion ++void LIR_Assembler::emit_assert(LIR_OpAssert* op) { ++ assert(op->code() == lir_assert, "must be"); ++ Label ok; ++ ++ if (op->in_opr1()->is_valid()) { ++ assert(op->in_opr2()->is_valid(), "both operands must be valid"); ++ assert(op->in_opr1()->is_cpu_register() || op->in_opr2()->is_cpu_register(), "must be"); ++ Register reg1 = as_reg(op->in_opr1()); ++ Register reg2 = as_reg(op->in_opr2()); ++ switch (op->condition()) { ++ case lir_cond_equal: __ beq(reg1, reg2, ok); break; ++ case lir_cond_notEqual: __ bne(reg1, reg2, ok); break; ++ case lir_cond_less: __ blt(reg1, reg2, ok); break; ++ case lir_cond_lessEqual: __ bge(reg2, reg1, ok); break; ++ case lir_cond_greaterEqual: __ bge(reg1, reg2, ok); break; ++ case lir_cond_greater: __ blt(reg2, reg1, ok); break; ++ case lir_cond_belowEqual: __ bgeu(reg2, reg1, ok); break; ++ case lir_cond_aboveEqual: __ bgeu(reg1, reg2, ok); break; ++ default: ShouldNotReachHere(); ++ } ++ } else { ++ assert(op->in_opr2()->is_illegal(), "both operands must be illegal"); ++ assert(op->condition() == lir_cond_always, "no other conditions allowed"); ++ } ++ if (op->halt()) { ++ const char* str = __ code_string(op->msg()); ++ __ stop(str); ++ } else { ++ breakpoint(); ++ } ++ __ bind(ok); ++} ++#endif ++ ++#ifndef PRODUCT ++#define COMMENT(x) do { __ block_comment(x); } while (0) ++#else ++#define COMMENT(x) ++#endif ++ ++void LIR_Assembler::membar() { ++ COMMENT("membar"); ++ __ membar(Assembler::AnyAny); ++} ++ ++void LIR_Assembler::membar_acquire() { ++ __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad | Assembler::LoadStore)); ++} ++ ++void LIR_Assembler::membar_release() { ++ __ membar(Assembler::Membar_mask_bits(Assembler::LoadStore|Assembler::StoreStore)); ++} ++ ++void LIR_Assembler::membar_loadload() { ++ __ membar(Assembler::LoadLoad); ++} ++ ++void LIR_Assembler::membar_storestore() { ++ __ membar(MacroAssembler::StoreStore); ++} ++ ++void LIR_Assembler::membar_loadstore() { ++ __ membar(MacroAssembler::LoadStore); ++} ++ ++void LIR_Assembler::membar_storeload() { ++ __ membar(MacroAssembler::StoreLoad); ++} ++ ++void LIR_Assembler::on_spin_wait() { ++ Unimplemented(); ++} ++ ++void LIR_Assembler::get_thread(LIR_Opr result_reg) { ++ __ move(result_reg->as_register(), TREG); ++} ++ ++void LIR_Assembler::peephole(LIR_List *lir) { ++} ++ ++void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, ++ LIR_Opr dest, LIR_Opr tmp_op) { ++ Address addr = as_Address(src->as_address_ptr()); ++ BasicType type = src->type(); ++ Register dst = as_reg(dest); ++ Register tmp = as_reg(tmp_op); ++ bool is_oop = is_reference_type(type); ++ ++ if (Assembler::is_simm(addr.disp(), 12)) { ++ __ addi_d(tmp, addr.base(), addr.disp()); ++ } else { ++ __ li(tmp, addr.disp()); ++ __ add_d(tmp, addr.base(), tmp); ++ } ++ if (addr.index() != noreg) { ++ if (addr.scale() != Address::no_scale) ++ __ alsl_d(tmp, addr.index(), tmp, addr.scale() - 1); ++ else ++ __ add_d(tmp, tmp, addr.index()); ++ } ++ ++ switch(type) { ++ case T_INT: ++ break; ++ case T_LONG: ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (UseCompressedOops) { ++ // unsigned int ++ } else { ++ // long ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ if (code == lir_xadd) { ++ Register inc = noreg; ++ if (data->is_constant()) { ++ inc = SCR1; ++ __ li(inc, as_long(data)); ++ } else { ++ inc = as_reg(data); ++ } ++ switch(type) { ++ case T_INT: ++ __ amadd_db_w(dst, inc, tmp); ++ break; ++ case T_LONG: ++ __ amadd_db_d(dst, inc, tmp); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (UseCompressedOops) { ++ __ amadd_db_w(dst, inc, tmp); ++ __ lu32i_d(dst, 0); ++ } else { ++ __ amadd_db_d(dst, inc, tmp); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (code == lir_xchg) { ++ Register obj = as_reg(data); ++ if (is_oop && UseCompressedOops) { ++ __ encode_heap_oop(SCR2, obj); ++ obj = SCR2; ++ } ++ switch(type) { ++ case T_INT: ++ __ amswap_db_w(dst, obj, tmp); ++ break; ++ case T_LONG: ++ __ amswap_db_d(dst, obj, tmp); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (UseCompressedOops) { ++ __ amswap_db_w(dst, obj, tmp); ++ __ lu32i_d(dst, 0); ++ } else { ++ __ amswap_db_d(dst, obj, tmp); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ if (is_oop && UseCompressedOops) { ++ __ decode_heap_oop(dst); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++#undef __ +diff --git a/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp +new file mode 100644 +index 00000000000..fedcc547d48 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp +@@ -0,0 +1,1384 @@ ++/* ++ * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "c1/c1_Compilation.hpp" ++#include "c1/c1_FrameMap.hpp" ++#include "c1/c1_Instruction.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_LIRGenerator.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "c1/c1_ValueStack.hpp" ++#include "ci/ciArray.hpp" ++#include "ci/ciObjArrayKlass.hpp" ++#include "ci/ciTypeArrayKlass.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/powerOfTwo.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#ifdef ASSERT ++#define __ gen()->lir(__FILE__, __LINE__)-> ++#else ++#define __ gen()->lir()-> ++#endif ++ ++// Item will be loaded into a byte register; Intel only ++void LIRItem::load_byte_item() { ++ load_item(); ++} ++ ++void LIRItem::load_nonconstant() { ++ LIR_Opr r = value()->operand(); ++ if (r->is_constant()) { ++ _result = r; ++ } else { ++ load_item(); ++ } ++} ++ ++//-------------------------------------------------------------- ++// LIRGenerator ++//-------------------------------------------------------------- ++ ++LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::a0_oop_opr; } ++LIR_Opr LIRGenerator::exceptionPcOpr() { return FrameMap::a1_opr; } ++LIR_Opr LIRGenerator::divInOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::divOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::remOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::shiftCountOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::syncLockOpr() { return new_register(T_INT); } ++LIR_Opr LIRGenerator::syncTempOpr() { return FrameMap::a0_opr; } ++LIR_Opr LIRGenerator::getThreadTemp() { return LIR_OprFact::illegalOpr; } ++ ++LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) { ++ LIR_Opr opr; ++ switch (type->tag()) { ++ case intTag: opr = FrameMap::a0_opr; break; ++ case objectTag: opr = FrameMap::a0_oop_opr; break; ++ case longTag: opr = FrameMap::long0_opr; break; ++ case floatTag: opr = FrameMap::fpu0_float_opr; break; ++ case doubleTag: opr = FrameMap::fpu0_double_opr; break; ++ case addressTag: ++ default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr; ++ } ++ ++ assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch"); ++ return opr; ++} ++ ++LIR_Opr LIRGenerator::rlock_byte(BasicType type) { ++ LIR_Opr reg = new_register(T_INT); ++ set_vreg_flag(reg, LIRGenerator::byte_reg); ++ return reg; ++} ++ ++//--------- loading items into registers -------------------------------- ++ ++bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const { ++ if (v->type()->as_IntConstant() != NULL) { ++ return v->type()->as_IntConstant()->value() == 0L; ++ } else if (v->type()->as_LongConstant() != NULL) { ++ return v->type()->as_LongConstant()->value() == 0L; ++ } else if (v->type()->as_ObjectConstant() != NULL) { ++ return v->type()->as_ObjectConstant()->value()->is_null_object(); ++ } else { ++ return false; ++ } ++} ++ ++bool LIRGenerator::can_inline_as_constant(Value v) const { ++ // FIXME: Just a guess ++ if (v->type()->as_IntConstant() != NULL) { ++ return Assembler::is_simm(v->type()->as_IntConstant()->value(), 12); ++ } else if (v->type()->as_LongConstant() != NULL) { ++ return v->type()->as_LongConstant()->value() == 0L; ++ } else if (v->type()->as_ObjectConstant() != NULL) { ++ return v->type()->as_ObjectConstant()->value()->is_null_object(); ++ } else { ++ return false; ++ } ++} ++ ++bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { return false; } ++ ++LIR_Opr LIRGenerator::safepoint_poll_register() { ++ return LIR_OprFact::illegalOpr; ++} ++ ++LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, ++ int shift, int disp, BasicType type) { ++ assert(base->is_register(), "must be"); ++ intx large_disp = disp; ++ ++ // accumulate fixed displacements ++ if (index->is_constant()) { ++ LIR_Const *constant = index->as_constant_ptr(); ++ if (constant->type() == T_INT) { ++ large_disp += index->as_jint() << shift; ++ } else { ++ assert(constant->type() == T_LONG, "should be"); ++ jlong c = index->as_jlong() << shift; ++ if ((jlong)((jint)c) == c) { ++ large_disp += c; ++ index = LIR_OprFact::illegalOpr; ++ } else { ++ LIR_Opr tmp = new_register(T_LONG); ++ __ move(index, tmp); ++ index = tmp; ++ // apply shift and displacement below ++ } ++ } ++ } ++ ++ if (index->is_register()) { ++ // apply the shift and accumulate the displacement ++ if (shift > 0) { ++ LIR_Opr tmp = new_pointer_register(); ++ __ shift_left(index, shift, tmp); ++ index = tmp; ++ } ++ if (large_disp != 0) { ++ LIR_Opr tmp = new_pointer_register(); ++ if (Assembler::is_simm(large_disp, 12)) { ++ __ add(index, LIR_OprFact::intptrConst(large_disp), tmp); ++ index = tmp; ++ } else { ++ __ move(LIR_OprFact::intptrConst(large_disp), tmp); ++ __ add(tmp, index, tmp); ++ index = tmp; ++ } ++ large_disp = 0; ++ } ++ } else if (large_disp != 0 && !Assembler::is_simm(large_disp, 12)) { ++ // index is illegal so replace it with the displacement loaded into a register ++ index = new_pointer_register(); ++ __ move(LIR_OprFact::intptrConst(large_disp), index); ++ large_disp = 0; ++ } ++ ++ // at this point we either have base + index or base + displacement ++ if (large_disp == 0 && index->is_register()) { ++ return new LIR_Address(base, index, type); ++ } else { ++ assert(Assembler::is_simm(large_disp, 12), "must be"); ++ return new LIR_Address(base, large_disp, type); ++ } ++} ++ ++LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, BasicType type) { ++ int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type); ++ int elem_size = type2aelembytes(type); ++ int shift = exact_log2(elem_size); ++ ++ LIR_Address* addr; ++ if (index_opr->is_constant()) { ++ addr = new LIR_Address(array_opr, offset_in_bytes + (intx)(index_opr->as_jint()) * elem_size, type); ++ } else { ++ if (offset_in_bytes) { ++ LIR_Opr tmp = new_pointer_register(); ++ __ add(array_opr, LIR_OprFact::intConst(offset_in_bytes), tmp); ++ array_opr = tmp; ++ offset_in_bytes = 0; ++ } ++ addr = new LIR_Address(array_opr, index_opr, LIR_Address::scale(type), offset_in_bytes, type); ++ } ++ return addr; ++} ++ ++LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { ++ LIR_Opr r; ++ if (type == T_LONG) { ++ r = LIR_OprFact::longConst(x); ++ if (!Assembler::is_simm(x, 12)) { ++ LIR_Opr tmp = new_register(type); ++ __ move(r, tmp); ++ return tmp; ++ } ++ } else if (type == T_INT) { ++ r = LIR_OprFact::intConst(x); ++ if (!Assembler::is_simm(x, 12)) { ++ // This is all rather nasty. We don't know whether our constant ++ // is required for a logical or an arithmetic operation, wo we ++ // don't know what the range of valid values is!! ++ LIR_Opr tmp = new_register(type); ++ __ move(r, tmp); ++ return tmp; ++ } ++ } else { ++ ShouldNotReachHere(); ++ r = NULL; // unreachable ++ } ++ return r; ++} ++ ++void LIRGenerator::increment_counter(address counter, BasicType type, int step) { ++ LIR_Opr pointer = new_pointer_register(); ++ __ move(LIR_OprFact::intptrConst(counter), pointer); ++ LIR_Address* addr = new LIR_Address(pointer, type); ++ increment_counter(addr, step); ++} ++ ++void LIRGenerator::increment_counter(LIR_Address* addr, int step) { ++ LIR_Opr imm = NULL; ++ switch(addr->type()) { ++ case T_INT: ++ imm = LIR_OprFact::intConst(step); ++ break; ++ case T_LONG: ++ imm = LIR_OprFact::longConst(step); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ LIR_Opr reg = new_register(addr->type()); ++ __ load(addr, reg); ++ __ add(reg, imm, reg); ++ __ store(reg, addr); ++} ++ ++void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++ LIR_Opr reg = new_register(T_INT); ++ __ load(generate_address(base, disp, T_INT), reg, info); ++ __ cmp(condition, reg, LIR_OprFact::intConst(c)); ++} ++ ++void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { ++ LIR_Opr reg1 = new_register(T_INT); ++ __ load(generate_address(base, disp, type), reg1, info); ++ __ cmp(condition, reg, reg1); ++} ++ ++bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { ++ if (is_power_of_2(c - 1)) { ++ __ shift_left(left, exact_log2(c - 1), tmp); ++ __ add(tmp, left, result); ++ return true; ++ } else if (is_power_of_2(c + 1)) { ++ __ shift_left(left, exact_log2(c + 1), tmp); ++ __ sub(tmp, left, result); ++ return true; ++ } else { ++ return false; ++ } ++} ++ ++void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) { ++ BasicType type = item->type(); ++ __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type)); ++} ++ ++void LIRGenerator::array_store_check(LIR_Opr value, LIR_Opr array, CodeEmitInfo* store_check_info, ++ ciMethod* profiled_method, int profiled_bci) { ++ LIR_Opr tmp1 = new_register(objectType); ++ LIR_Opr tmp2 = new_register(objectType); ++ LIR_Opr tmp3 = new_register(objectType); ++ __ store_check(value, array, tmp1, tmp2, tmp3, store_check_info, profiled_method, profiled_bci); ++} ++ ++//---------------------------------------------------------------------- ++// visitor functions ++//---------------------------------------------------------------------- ++ ++void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { ++ assert(x->is_pinned(),""); ++ LIRItem obj(x->obj(), this); ++ obj.load_item(); ++ ++ set_no_result(x); ++ ++ // "lock" stores the address of the monitor stack slot, so this is not an oop ++ LIR_Opr lock = new_register(T_INT); ++ // Need a scratch register for biased locking ++ LIR_Opr scratch = LIR_OprFact::illegalOpr; ++ if (UseBiasedLocking) { ++ scratch = new_register(T_INT); ++ } ++ ++ CodeEmitInfo* info_for_exception = NULL; ++ if (x->needs_null_check()) { ++ info_for_exception = state_for(x); ++ } ++ // this CodeEmitInfo must not have the xhandlers because here the ++ // object is already locked (xhandlers expect object to be unlocked) ++ CodeEmitInfo* info = state_for(x, x->state(), true); ++ monitor_enter(obj.result(), lock, syncTempOpr(), scratch, ++ x->monitor_no(), info_for_exception, info); ++} ++ ++void LIRGenerator::do_MonitorExit(MonitorExit* x) { ++ assert(x->is_pinned(),""); ++ ++ LIRItem obj(x->obj(), this); ++ obj.dont_load_item(); ++ ++ LIR_Opr lock = new_register(T_INT); ++ LIR_Opr obj_temp = new_register(T_INT); ++ set_no_result(x); ++ monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no()); ++} ++ ++void LIRGenerator::do_NegateOp(NegateOp* x) { ++ LIRItem from(x->x(), this); ++ from.load_item(); ++ LIR_Opr result = rlock_result(x); ++ __ negate (from.result(), result); ++} ++ ++// for _fadd, _fmul, _fsub, _fdiv, _frem ++// _dadd, _dmul, _dsub, _ddiv, _drem ++void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { ++ if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) { ++ // float remainder is implemented as a direct call into the runtime ++ LIRItem right(x->x(), this); ++ LIRItem left(x->y(), this); ++ ++ BasicTypeList signature(2); ++ if (x->op() == Bytecodes::_frem) { ++ signature.append(T_FLOAT); ++ signature.append(T_FLOAT); ++ } else { ++ signature.append(T_DOUBLE); ++ signature.append(T_DOUBLE); ++ } ++ CallingConvention* cc = frame_map()->c_calling_convention(&signature); ++ ++ const LIR_Opr result_reg = result_register_for(x->type()); ++ left.load_item_force(cc->at(1)); ++ right.load_item(); ++ ++ __ move(right.result(), cc->at(0)); ++ ++ address entry; ++ if (x->op() == Bytecodes::_frem) { ++ entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem); ++ } else { ++ entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem); ++ } ++ ++ LIR_Opr result = rlock_result(x); ++ __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args()); ++ __ move(result_reg, result); ++ return; ++ } ++ ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ LIRItem* left_arg = &left; ++ LIRItem* right_arg = &right; ++ ++ // Always load right hand side. ++ right.load_item(); ++ ++ if (!left.is_register()) ++ left.load_item(); ++ ++ LIR_Opr reg = rlock(x); ++ ++ arithmetic_op_fpu(x->op(), reg, left.result(), right.result()); ++ ++ set_result(x, round_item(reg)); ++} ++ ++// for _ladd, _lmul, _lsub, _ldiv, _lrem ++void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { ++ // missing test if instr is commutative and if we should swap ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ++ if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) { ++ left.load_item(); ++ bool need_zero_check = true; ++ if (right.is_constant()) { ++ jlong c = right.get_jlong_constant(); ++ // no need to do div-by-zero check if the divisor is a non-zero constant ++ if (c != 0) need_zero_check = false; ++ // do not load right if the divisor is a power-of-2 constant ++ if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) { ++ right.dont_load_item(); ++ } else { ++ right.load_item(); ++ } ++ } else { ++ right.load_item(); ++ } ++ if (need_zero_check) { ++ CodeEmitInfo* info = state_for(x); ++ __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0)); ++ __ branch(lir_cond_equal, new DivByZeroStub(info)); ++ } ++ ++ rlock_result(x); ++ switch (x->op()) { ++ case Bytecodes::_lrem: ++ __ rem (left.result(), right.result(), x->operand()); ++ break; ++ case Bytecodes::_ldiv: ++ __ div (left.result(), right.result(), x->operand()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } else { ++ assert(x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, ++ "expect lmul, ladd or lsub"); ++ // add, sub, mul ++ left.load_item(); ++ if (!right.is_register()) { ++ if (x->op() == Bytecodes::_lmul || !right.is_constant() || ++ (x->op() == Bytecodes::_ladd && !Assembler::is_simm(right.get_jlong_constant(), 12)) || ++ (x->op() == Bytecodes::_lsub && !Assembler::is_simm(-right.get_jlong_constant(), 12))) { ++ right.load_item(); ++ } else { // add, sub ++ assert(x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expect ladd or lsub"); ++ // don't load constants to save register ++ right.load_nonconstant(); ++ } ++ } ++ rlock_result(x); ++ arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL); ++ } ++} ++ ++// for: _iadd, _imul, _isub, _idiv, _irem ++void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) { ++ // Test if instr is commutative and if we should swap ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ LIRItem* left_arg = &left; ++ LIRItem* right_arg = &right; ++ if (x->is_commutative() && left.is_stack() && right.is_register()) { ++ // swap them if left is real stack (or cached) and right is real register(not cached) ++ left_arg = &right; ++ right_arg = &left; ++ } ++ ++ left_arg->load_item(); ++ ++ // do not need to load right, as we can handle stack and constants ++ if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) { ++ rlock_result(x); ++ bool need_zero_check = true; ++ if (right.is_constant()) { ++ jint c = right.get_jint_constant(); ++ // no need to do div-by-zero check if the divisor is a non-zero constant ++ if (c != 0) need_zero_check = false; ++ // do not load right if the divisor is a power-of-2 constant ++ if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) { ++ right_arg->dont_load_item(); ++ } else { ++ right_arg->load_item(); ++ } ++ } else { ++ right_arg->load_item(); ++ } ++ if (need_zero_check) { ++ CodeEmitInfo* info = state_for(x); ++ __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0)); ++ __ branch(lir_cond_equal, new DivByZeroStub(info)); ++ } ++ ++ LIR_Opr ill = LIR_OprFact::illegalOpr; ++ if (x->op() == Bytecodes::_irem) { ++ __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); ++ } else if (x->op() == Bytecodes::_idiv) { ++ __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); ++ } ++ } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) { ++ if (right.is_constant() && ++ ((x->op() == Bytecodes::_iadd && Assembler::is_simm(right.get_jint_constant(), 12)) || ++ (x->op() == Bytecodes::_isub && Assembler::is_simm(-right.get_jint_constant(), 12)))) { ++ right.load_nonconstant(); ++ } else { ++ right.load_item(); ++ } ++ rlock_result(x); ++ arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr); ++ } else { ++ assert (x->op() == Bytecodes::_imul, "expect imul"); ++ if (right.is_constant()) { ++ jint c = right.get_jint_constant(); ++ if (c > 0 && c < max_jint && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) { ++ right_arg->dont_load_item(); ++ } else { ++ // Cannot use constant op. ++ right_arg->load_item(); ++ } ++ } else { ++ right.load_item(); ++ } ++ rlock_result(x); ++ arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT)); ++ } ++} ++ ++void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) { ++ // when an operand with use count 1 is the left operand, then it is ++ // likely that no move for 2-operand-LIR-form is necessary ++ if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) { ++ x->swap_operands(); ++ } ++ ++ ValueTag tag = x->type()->tag(); ++ assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters"); ++ switch (tag) { ++ case floatTag: ++ case doubleTag: do_ArithmeticOp_FPU(x); return; ++ case longTag: do_ArithmeticOp_Long(x); return; ++ case intTag: do_ArithmeticOp_Int(x); return; ++ default: ShouldNotReachHere(); return; ++ } ++} ++ ++// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr ++void LIRGenerator::do_ShiftOp(ShiftOp* x) { ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ++ left.load_item(); ++ ++ rlock_result(x); ++ if (right.is_constant()) { ++ right.dont_load_item(); ++ int c; ++ switch (x->op()) { ++ case Bytecodes::_ishl: ++ c = right.get_jint_constant() & 0x1f; ++ __ shift_left(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_ishr: ++ c = right.get_jint_constant() & 0x1f; ++ __ shift_right(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_iushr: ++ c = right.get_jint_constant() & 0x1f; ++ __ unsigned_shift_right(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_lshl: ++ c = right.get_jint_constant() & 0x3f; ++ __ shift_left(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_lshr: ++ c = right.get_jint_constant() & 0x3f; ++ __ shift_right(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_lushr: ++ c = right.get_jint_constant() & 0x3f; ++ __ unsigned_shift_right(left.result(), c, x->operand()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ right.load_item(); ++ LIR_Opr tmp = new_register(T_INT); ++ switch (x->op()) { ++ case Bytecodes::_ishl: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); ++ __ shift_left(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_ishr: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); ++ __ shift_right(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_iushr: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); ++ __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_lshl: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); ++ __ shift_left(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_lshr: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); ++ __ shift_right(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_lushr: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); ++ __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++} ++ ++// _iand, _land, _ior, _lor, _ixor, _lxor ++void LIRGenerator::do_LogicOp(LogicOp* x) { ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ++ left.load_item(); ++ ++ rlock_result(x); ++ if (right.is_constant() ++ && ((right.type()->tag() == intTag ++ && Assembler::is_uimm(right.get_jint_constant(), 12)) ++ || (right.type()->tag() == longTag ++ && Assembler::is_uimm(right.get_jlong_constant(), 12)))) { ++ right.dont_load_item(); ++ } else { ++ right.load_item(); ++ } ++ switch (x->op()) { ++ case Bytecodes::_iand: ++ case Bytecodes::_land: ++ __ logical_and(left.result(), right.result(), x->operand()); break; ++ case Bytecodes::_ior: ++ case Bytecodes::_lor: ++ __ logical_or (left.result(), right.result(), x->operand()); break; ++ case Bytecodes::_ixor: ++ case Bytecodes::_lxor: ++ __ logical_xor(left.result(), right.result(), x->operand()); break; ++ default: Unimplemented(); ++ } ++} ++ ++// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg ++void LIRGenerator::do_CompareOp(CompareOp* x) { ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ValueTag tag = x->x()->type()->tag(); ++ if (tag == longTag) { ++ left.set_destroys_register(); ++ } ++ left.load_item(); ++ right.load_item(); ++ LIR_Opr reg = rlock_result(x); ++ ++ if (x->x()->type()->is_float_kind()) { ++ Bytecodes::Code code = x->op(); ++ __ fcmp2int(left.result(), right.result(), reg, ++ (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl)); ++ } else if (x->x()->type()->tag() == longTag) { ++ __ lcmp2int(left.result(), right.result(), reg); ++ } else { ++ Unimplemented(); ++ } ++} ++ ++LIR_Opr LIRGenerator::atomic_cmpxchg(BasicType type, LIR_Opr addr, ++ LIRItem& cmp_value, LIRItem& new_value) { ++ LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience ++ new_value.load_item(); ++ cmp_value.load_item(); ++ LIR_Opr result = new_register(T_INT); ++ if (is_reference_type(type)) { ++ __ cas_obj(addr, cmp_value.result(), new_value.result(), ++ new_register(T_INT), new_register(T_INT), result); ++ } else if (type == T_INT) { ++ __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(), ++ new_value.result(), ill, ill); ++ } else if (type == T_LONG) { ++ __ cas_long(addr->as_address_ptr()->base(), cmp_value.result(), ++ new_value.result(), ill, ill); ++ } else { ++ ShouldNotReachHere(); ++ Unimplemented(); ++ } ++ __ move(FrameMap::scr1_opr, result); ++ return result; ++} ++ ++LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) { ++ bool is_oop = is_reference_type(type); ++ LIR_Opr result = new_register(type); ++ value.load_item(); ++ assert(type == T_INT || is_oop || type == T_LONG, "unexpected type"); ++ LIR_Opr tmp = new_register(T_INT); ++ __ xchg(addr, value.result(), result, tmp); ++ return result; ++} ++ ++LIR_Opr LIRGenerator::atomic_add(BasicType type, LIR_Opr addr, LIRItem& value) { ++ LIR_Opr result = new_register(type); ++ value.load_item(); ++ assert(type == T_INT || type == T_LONG, "unexpected type"); ++ LIR_Opr tmp = new_register(T_INT); ++ __ xadd(addr, value.result(), result, tmp); ++ return result; ++} ++ ++void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { ++ assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), ++ "wrong type"); ++ if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog || ++ x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos || ++ x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan || ++ x->id() == vmIntrinsics::_dlog10) { ++ do_LibmIntrinsic(x); ++ return; ++ } ++ switch (x->id()) { ++ case vmIntrinsics::_dabs: ++ case vmIntrinsics::_dsqrt: { ++ assert(x->number_of_arguments() == 1, "wrong type"); ++ LIRItem value(x->argument_at(0), this); ++ value.load_item(); ++ LIR_Opr dst = rlock_result(x); ++ ++ switch (x->id()) { ++ case vmIntrinsics::_dsqrt: ++ __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); ++ break; ++ case vmIntrinsics::_dabs: ++ __ abs(value.result(), dst, LIR_OprFact::illegalOpr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ } ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { ++ LIRItem value(x->argument_at(0), this); ++ value.set_destroys_register(); ++ ++ LIR_Opr calc_result = rlock_result(x); ++ LIR_Opr result_reg = result_register_for(x->type()); ++ ++ CallingConvention* cc = NULL; ++ ++ if (x->id() == vmIntrinsics::_dpow) { ++ LIRItem value1(x->argument_at(1), this); ++ ++ value1.set_destroys_register(); ++ ++ BasicTypeList signature(2); ++ signature.append(T_DOUBLE); ++ signature.append(T_DOUBLE); ++ cc = frame_map()->c_calling_convention(&signature); ++ value.load_item_force(cc->at(0)); ++ value1.load_item_force(cc->at(1)); ++ } else { ++ BasicTypeList signature(1); ++ signature.append(T_DOUBLE); ++ cc = frame_map()->c_calling_convention(&signature); ++ value.load_item_force(cc->at(0)); ++ } ++ ++ switch (x->id()) { ++ case vmIntrinsics::_dexp: ++ if (StubRoutines::dexp() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ case vmIntrinsics::_dlog: ++ if (StubRoutines::dlog() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ case vmIntrinsics::_dlog10: ++ if (StubRoutines::dlog10() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ case vmIntrinsics::_dpow: ++ if (StubRoutines::dpow() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ case vmIntrinsics::_dsin: ++ if (StubRoutines::dsin() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dsin(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ case vmIntrinsics::_dcos: ++ if (StubRoutines::dcos() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dcos(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ case vmIntrinsics::_dtan: ++ if (StubRoutines::dtan() != NULL) { ++ __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args()); ++ } else { ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args()); ++ } ++ break; ++ default: ShouldNotReachHere(); ++ } ++ __ move(result_reg, calc_result); ++} ++ ++void LIRGenerator::do_ArrayCopy(Intrinsic* x) { ++ Register j_rarg0 = RT0; ++ Register j_rarg1 = RA0; ++ Register j_rarg2 = RA1; ++ Register j_rarg3 = RA2; ++ Register j_rarg4 = RA3; ++ Register j_rarg5 = RA4; ++ ++ assert(x->number_of_arguments() == 5, "wrong type"); ++ ++ // Make all state_for calls early since they can emit code ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ LIRItem src(x->argument_at(0), this); ++ LIRItem src_pos(x->argument_at(1), this); ++ LIRItem dst(x->argument_at(2), this); ++ LIRItem dst_pos(x->argument_at(3), this); ++ LIRItem length(x->argument_at(4), this); ++ ++ // operands for arraycopy must use fixed registers, otherwise ++ // LinearScan will fail allocation (because arraycopy always needs a ++ // call) ++ ++ // The java calling convention will give us enough registers ++ // so that on the stub side the args will be perfect already. ++ // On the other slow/special case side we call C and the arg ++ // positions are not similar enough to pick one as the best. ++ // Also because the java calling convention is a "shifted" version ++ // of the C convention we can process the java args trivially into C ++ // args without worry of overwriting during the xfer ++ ++ src.load_item_force (FrameMap::as_oop_opr(j_rarg0)); ++ src_pos.load_item_force (FrameMap::as_opr(j_rarg1)); ++ dst.load_item_force (FrameMap::as_oop_opr(j_rarg2)); ++ dst_pos.load_item_force (FrameMap::as_opr(j_rarg3)); ++ length.load_item_force (FrameMap::as_opr(j_rarg4)); ++ ++ LIR_Opr tmp = FrameMap::as_opr(j_rarg5); ++ ++ set_no_result(x); ++ ++ int flags; ++ ciArrayKlass* expected_type; ++ arraycopy_helper(x, &flags, &expected_type); ++ ++ __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), ++ length.result(), tmp, expected_type, flags, info); // does add_safepoint ++} ++ ++void LIRGenerator::do_update_CRC32(Intrinsic* x) { ++ assert(UseCRC32Intrinsics, "why are we here?"); ++ // Make all state_for calls early since they can emit code ++ LIR_Opr result = rlock_result(x); ++ int flags = 0; ++ switch (x->id()) { ++ case vmIntrinsics::_updateCRC32: { ++ LIRItem crc(x->argument_at(0), this); ++ LIRItem val(x->argument_at(1), this); ++ // val is destroyed by update_crc32 ++ val.set_destroys_register(); ++ crc.load_item(); ++ val.load_item(); ++ __ update_crc32(crc.result(), val.result(), result); ++ break; ++ } ++ case vmIntrinsics::_updateBytesCRC32: ++ case vmIntrinsics::_updateByteBufferCRC32: { ++ bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32); ++ ++ LIRItem crc(x->argument_at(0), this); ++ LIRItem buf(x->argument_at(1), this); ++ LIRItem off(x->argument_at(2), this); ++ LIRItem len(x->argument_at(3), this); ++ buf.load_item(); ++ off.load_nonconstant(); ++ ++ LIR_Opr index = off.result(); ++ int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0; ++ if(off.result()->is_constant()) { ++ index = LIR_OprFact::illegalOpr; ++ offset += off.result()->as_jint(); ++ } ++ LIR_Opr base_op = buf.result(); ++ ++ if (index->is_valid()) { ++ LIR_Opr tmp = new_register(T_LONG); ++ __ convert(Bytecodes::_i2l, index, tmp); ++ index = tmp; ++ } ++ ++ if (offset) { ++ LIR_Opr tmp = new_pointer_register(); ++ __ add(base_op, LIR_OprFact::intConst(offset), tmp); ++ base_op = tmp; ++ offset = 0; ++ } ++ ++ LIR_Address* a = new LIR_Address(base_op, index, offset, T_BYTE); ++ BasicTypeList signature(3); ++ signature.append(T_INT); ++ signature.append(T_ADDRESS); ++ signature.append(T_INT); ++ CallingConvention* cc = frame_map()->c_calling_convention(&signature); ++ const LIR_Opr result_reg = result_register_for(x->type()); ++ ++ LIR_Opr addr = new_pointer_register(); ++ __ leal(LIR_OprFact::address(a), addr); ++ ++ crc.load_item_force(cc->at(0)); ++ __ move(addr, cc->at(1)); ++ len.load_item_force(cc->at(2)); ++ ++ __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args()); ++ __ move(result_reg, result); ++ ++ break; ++ } ++ default: { ++ ShouldNotReachHere(); ++ } ++ } ++} ++ ++void LIRGenerator::do_update_CRC32C(Intrinsic* x) { ++ assert(UseCRC32CIntrinsics, "why are we here?"); ++ // Make all state_for calls early since they can emit code ++ LIR_Opr result = rlock_result(x); ++ int flags = 0; ++ switch (x->id()) { ++ case vmIntrinsics::_updateBytesCRC32C: ++ case vmIntrinsics::_updateDirectByteBufferCRC32C: { ++ bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32C); ++ int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0; ++ ++ LIRItem crc(x->argument_at(0), this); ++ LIRItem buf(x->argument_at(1), this); ++ LIRItem off(x->argument_at(2), this); ++ LIRItem end(x->argument_at(3), this); ++ ++ buf.load_item(); ++ off.load_nonconstant(); ++ end.load_nonconstant(); ++ ++ // len = end - off ++ LIR_Opr len = end.result(); ++ LIR_Opr tmpA = new_register(T_INT); ++ LIR_Opr tmpB = new_register(T_INT); ++ __ move(end.result(), tmpA); ++ __ move(off.result(), tmpB); ++ __ sub(tmpA, tmpB, tmpA); ++ len = tmpA; ++ ++ LIR_Opr index = off.result(); ++ if(off.result()->is_constant()) { ++ index = LIR_OprFact::illegalOpr; ++ offset += off.result()->as_jint(); ++ } ++ LIR_Opr base_op = buf.result(); ++ ++ if (index->is_valid()) { ++ LIR_Opr tmp = new_register(T_LONG); ++ __ convert(Bytecodes::_i2l, index, tmp); ++ index = tmp; ++ } ++ ++ if (offset) { ++ LIR_Opr tmp = new_pointer_register(); ++ __ add(base_op, LIR_OprFact::intConst(offset), tmp); ++ base_op = tmp; ++ offset = 0; ++ } ++ ++ LIR_Address* a = new LIR_Address(base_op, index, offset, T_BYTE); ++ BasicTypeList signature(3); ++ signature.append(T_INT); ++ signature.append(T_ADDRESS); ++ signature.append(T_INT); ++ CallingConvention* cc = frame_map()->c_calling_convention(&signature); ++ const LIR_Opr result_reg = result_register_for(x->type()); ++ ++ LIR_Opr addr = new_pointer_register(); ++ __ leal(LIR_OprFact::address(a), addr); ++ ++ crc.load_item_force(cc->at(0)); ++ __ move(addr, cc->at(1)); ++ __ move(len, cc->at(2)); ++ ++ __ call_runtime_leaf(StubRoutines::updateBytesCRC32C(), getThreadTemp(), result_reg, cc->args()); ++ __ move(result_reg, result); ++ ++ break; ++ } ++ default: { ++ ShouldNotReachHere(); ++ } ++ } ++} ++ ++void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) { ++ assert(x->number_of_arguments() == 3, "wrong type"); ++ assert(UseFMA, "Needs FMA instructions support."); ++ LIRItem value(x->argument_at(0), this); ++ LIRItem value1(x->argument_at(1), this); ++ LIRItem value2(x->argument_at(2), this); ++ ++ value.load_item(); ++ value1.load_item(); ++ value2.load_item(); ++ ++ LIR_Opr calc_input = value.result(); ++ LIR_Opr calc_input1 = value1.result(); ++ LIR_Opr calc_input2 = value2.result(); ++ LIR_Opr calc_result = rlock_result(x); ++ ++ switch (x->id()) { ++ case vmIntrinsics::_fmaD: ++ __ fmad(calc_input, calc_input1, calc_input2, calc_result); ++ break; ++ case vmIntrinsics::_fmaF: ++ __ fmaf(calc_input, calc_input1, calc_input2, calc_result); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) { ++ fatal("vectorizedMismatch intrinsic is not implemented on this platform"); ++} ++ ++// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f ++// _i2b, _i2c, _i2s ++void LIRGenerator::do_Convert(Convert* x) { ++ LIRItem value(x->value(), this); ++ value.load_item(); ++ LIR_Opr input = value.result(); ++ LIR_Opr result = rlock(x); ++ ++ // arguments of lir_convert ++ LIR_Opr conv_input = input; ++ LIR_Opr conv_result = result; ++ ++ switch (x->op()) { ++ case Bytecodes::_f2i: ++ case Bytecodes::_f2l: ++ __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_FLOAT)); ++ break; ++ case Bytecodes::_d2i: ++ case Bytecodes::_d2l: ++ __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_DOUBLE)); ++ break; ++ default: ++ __ convert(x->op(), conv_input, conv_result); ++ break; ++ } ++ ++ assert(result->is_virtual(), "result must be virtual register"); ++ set_result(x, result); ++} ++ ++void LIRGenerator::do_NewInstance(NewInstance* x) { ++#ifndef PRODUCT ++ if (PrintNotLoaded && !x->klass()->is_loaded()) { ++ tty->print_cr(" ###class not loaded at new bci %d", x->printable_bci()); ++ } ++#endif ++ CodeEmitInfo* info = state_for(x, x->state()); ++ LIR_Opr reg = result_register_for(x->type()); ++ new_instance(reg, x->klass(), x->is_unresolved(), ++ FrameMap::t0_oop_opr, ++ FrameMap::t1_oop_opr, ++ FrameMap::a4_oop_opr, ++ LIR_OprFact::illegalOpr, ++ FrameMap::a3_metadata_opr, info); ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++void LIRGenerator::do_NewTypeArray(NewTypeArray* x) { ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ LIRItem length(x->length(), this); ++ length.load_item_force(FrameMap::s0_opr); ++ ++ LIR_Opr reg = result_register_for(x->type()); ++ LIR_Opr tmp1 = FrameMap::t0_oop_opr; ++ LIR_Opr tmp2 = FrameMap::t1_oop_opr; ++ LIR_Opr tmp3 = FrameMap::a5_oop_opr; ++ LIR_Opr tmp4 = reg; ++ LIR_Opr klass_reg = FrameMap::a3_metadata_opr; ++ LIR_Opr len = length.result(); ++ BasicType elem_type = x->elt_type(); ++ ++ __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg); ++ ++ CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info); ++ __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path); ++ ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { ++ LIRItem length(x->length(), this); ++ // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction ++ // and therefore provide the state before the parameters have been consumed ++ CodeEmitInfo* patching_info = NULL; ++ if (!x->klass()->is_loaded() || PatchALot) { ++ patching_info = state_for(x, x->state_before()); ++ } ++ ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ LIR_Opr reg = result_register_for(x->type()); ++ LIR_Opr tmp1 = FrameMap::t0_oop_opr; ++ LIR_Opr tmp2 = FrameMap::t1_oop_opr; ++ LIR_Opr tmp3 = FrameMap::a5_oop_opr; ++ LIR_Opr tmp4 = reg; ++ LIR_Opr klass_reg = FrameMap::a3_metadata_opr; ++ ++ length.load_item_force(FrameMap::s0_opr); ++ LIR_Opr len = length.result(); ++ ++ CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); ++ ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass()); ++ if (obj == ciEnv::unloaded_ciobjarrayklass()) { ++ BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); ++ } ++ klass2reg_with_patching(klass_reg, obj, patching_info); ++ __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); ++ ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++void LIRGenerator::do_NewMultiArray(NewMultiArray* x) { ++ Values* dims = x->dims(); ++ int i = dims->length(); ++ LIRItemList* items = new LIRItemList(i, i, NULL); ++ while (i-- > 0) { ++ LIRItem* size = new LIRItem(dims->at(i), this); ++ items->at_put(i, size); ++ } ++ ++ // Evaluate state_for early since it may emit code. ++ CodeEmitInfo* patching_info = NULL; ++ if (!x->klass()->is_loaded() || PatchALot) { ++ patching_info = state_for(x, x->state_before()); ++ ++ // Cannot re-use same xhandlers for multiple CodeEmitInfos, so ++ // clone all handlers (NOTE: Usually this is handled transparently ++ // by the CodeEmitInfo cloning logic in CodeStub constructors but ++ // is done explicitly here because a stub isn't being used). ++ x->set_exception_handlers(new XHandlers(x->exception_handlers())); ++ } ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ i = dims->length(); ++ while (i-- > 0) { ++ LIRItem* size = items->at(i); ++ size->load_item(); ++ ++ store_stack_parameter(size->result(), in_ByteSize(i*4)); ++ } ++ ++ LIR_Opr klass_reg = FrameMap::a0_metadata_opr; ++ klass2reg_with_patching(klass_reg, x->klass(), patching_info); ++ ++ LIR_Opr rank = FrameMap::s0_opr; ++ __ move(LIR_OprFact::intConst(x->rank()), rank); ++ LIR_Opr varargs = FrameMap::a2_opr; ++ __ move(FrameMap::sp_opr, varargs); ++ LIR_OprList* args = new LIR_OprList(3); ++ args->append(klass_reg); ++ args->append(rank); ++ args->append(varargs); ++ LIR_Opr reg = result_register_for(x->type()); ++ __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id), ++ LIR_OprFact::illegalOpr, ++ reg, args, info); ++ ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++void LIRGenerator::do_BlockBegin(BlockBegin* x) { ++ // nothing to do for now ++} ++ ++void LIRGenerator::do_CheckCast(CheckCast* x) { ++ LIRItem obj(x->obj(), this); ++ ++ CodeEmitInfo* patching_info = NULL; ++ if (!x->klass()->is_loaded() || ++ (PatchALot && !x->is_incompatible_class_change_check() && ++ !x->is_invokespecial_receiver_check())) { ++ // must do this before locking the destination register as an oop register, ++ // and before the obj is loaded (the latter is for deoptimization) ++ patching_info = state_for(x, x->state_before()); ++ } ++ obj.load_item(); ++ ++ // info for exceptions ++ CodeEmitInfo* info_for_exception = ++ (x->needs_exception_state() ? state_for(x) : ++ state_for(x, x->state_before(), true /*ignore_xhandler*/)); ++ ++ CodeStub* stub; ++ if (x->is_incompatible_class_change_check()) { ++ assert(patching_info == NULL, "can't patch this"); ++ stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, ++ LIR_OprFact::illegalOpr, info_for_exception); ++ } else if (x->is_invokespecial_receiver_check()) { ++ assert(patching_info == NULL, "can't patch this"); ++ stub = new DeoptimizeStub(info_for_exception, ++ Deoptimization::Reason_class_check, ++ Deoptimization::Action_none); ++ } else { ++ stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, ++ obj.result(), info_for_exception); ++ } ++ LIR_Opr reg = rlock_result(x); ++ LIR_Opr tmp3 = LIR_OprFact::illegalOpr; ++ if (!x->klass()->is_loaded() || UseCompressedClassPointers) { ++ tmp3 = new_register(objectType); ++ } ++ __ checkcast(reg, obj.result(), x->klass(), ++ new_register(objectType), new_register(objectType), tmp3, ++ x->direct_compare(), info_for_exception, patching_info, stub, ++ x->profiled_method(), x->profiled_bci()); ++} ++ ++void LIRGenerator::do_InstanceOf(InstanceOf* x) { ++ LIRItem obj(x->obj(), this); ++ ++ // result and test object may not be in same register ++ LIR_Opr reg = rlock_result(x); ++ CodeEmitInfo* patching_info = NULL; ++ if ((!x->klass()->is_loaded() || PatchALot)) { ++ // must do this before locking the destination register as an oop register ++ patching_info = state_for(x, x->state_before()); ++ } ++ obj.load_item(); ++ LIR_Opr tmp3 = LIR_OprFact::illegalOpr; ++ if (!x->klass()->is_loaded() || UseCompressedClassPointers) { ++ tmp3 = new_register(objectType); ++ } ++ __ instanceof(reg, obj.result(), x->klass(), ++ new_register(objectType), new_register(objectType), tmp3, ++ x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci()); ++} ++ ++void LIRGenerator::do_If(If* x) { ++ assert(x->number_of_sux() == 2, "inconsistency"); ++ ValueTag tag = x->x()->type()->tag(); ++ bool is_safepoint = x->is_safepoint(); ++ ++ If::Condition cond = x->cond(); ++ ++ LIRItem xitem(x->x(), this); ++ LIRItem yitem(x->y(), this); ++ LIRItem* xin = &xitem; ++ LIRItem* yin = &yitem; ++ ++ if (tag == longTag) { ++ // for longs, only conditions "eql", "neq", "lss", "geq" are valid; ++ // mirror for other conditions ++ if (cond == If::gtr || cond == If::leq) { ++ cond = Instruction::mirror(cond); ++ xin = &yitem; ++ yin = &xitem; ++ } ++ xin->set_destroys_register(); ++ } ++ xin->load_item(); ++ ++ if (tag == longTag) { ++ if (yin->is_constant() && yin->get_jlong_constant() == 0) { ++ yin->dont_load_item(); ++ } else { ++ yin->load_item(); ++ } ++ } else if (tag == intTag) { ++ if (yin->is_constant() && yin->get_jint_constant() == 0) { ++ yin->dont_load_item(); ++ } else { ++ yin->load_item(); ++ } ++ } else { ++ yin->load_item(); ++ } ++ ++ set_no_result(x); ++ ++ LIR_Opr left = xin->result(); ++ LIR_Opr right = yin->result(); ++ ++ // add safepoint before generating condition code so it can be recomputed ++ if (x->is_safepoint()) { ++ // increment backedge counter if needed ++ increment_backedge_counter_conditionally(lir_cond(cond), left, right, state_for(x, x->state_before()), ++ x->tsux()->bci(), x->fsux()->bci(), x->profiled_bci()); ++ __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before())); ++ } ++ ++ __ cmp(lir_cond(cond), left, right); ++ // Generate branch profiling. Profiling code doesn't kill flags. ++ profile_branch(x, cond); ++ move_to_phi(x->state()); ++ if (x->x()->type()->is_float_kind()) { ++ __ branch(lir_cond(cond), x->tsux(), x->usux()); ++ } else { ++ __ branch(lir_cond(cond), x->tsux()); ++ } ++ assert(x->default_sux() == x->fsux(), "wrong destination above"); ++ __ jump(x->default_sux()); ++} ++ ++LIR_Opr LIRGenerator::getThreadPointer() { ++ return FrameMap::as_pointer_opr(TREG); ++} ++ ++void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); } ++ ++void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address, ++ CodeEmitInfo* info) { ++ __ volatile_store_mem_reg(value, address, info); ++} ++ ++void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, ++ CodeEmitInfo* info) { ++ // 8179954: We need to make sure that the code generated for ++ // volatile accesses forms a sequentially-consistent set of ++ // operations when combined with STLR and LDAR. Without a leading ++ // membar it's possible for a simple Dekker test to fail if loads ++ // use LD;DMB but stores use STLR. This can happen if C2 compiles ++ // the stores in one method and C1 compiles the loads in another. ++ if (!CompilerConfig::is_c1_only_no_jvmci()) { ++ __ membar(); ++ } ++ __ volatile_load_mem_reg(address, result, info); ++} +diff --git a/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp +new file mode 100644 +index 00000000000..01e8c9f270e +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_LIR_loongarch_64.cpp +@@ -0,0 +1,57 @@ ++/* ++ * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/register.hpp" ++#include "c1/c1_LIR.hpp" ++ ++FloatRegister LIR_OprDesc::as_float_reg() const { ++ return as_FloatRegister(fpu_regnr()); ++} ++ ++FloatRegister LIR_OprDesc::as_double_reg() const { ++ return as_FloatRegister(fpu_regnrLo()); ++} ++ ++// Reg2 unused. ++LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { ++ assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform"); ++ return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | ++ (reg1 << LIR_OprDesc::reg2_shift) | ++ LIR_OprDesc::double_type | ++ LIR_OprDesc::fpu_register | ++ LIR_OprDesc::double_size); ++} ++ ++#ifndef PRODUCT ++void LIR_Address::verify() const { ++ assert(base()->is_cpu_register(), "wrong base operand"); ++ assert(index()->is_illegal() || index()->is_double_cpu() || ++ index()->is_single_cpu(), "wrong index operand"); ++ assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT || ++ base()->type() == T_LONG || base()->type() == T_METADATA, ++ "wrong type for addresses"); ++} ++#endif // PRODUCT +diff --git a/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp +new file mode 100644 +index 00000000000..f15dacafeba +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch.hpp +@@ -0,0 +1,70 @@ ++/* ++ * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP ++ ++inline bool LinearScan::is_processed_reg_num(int reg_num) { ++ return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map; ++} ++ ++inline int LinearScan::num_physical_regs(BasicType type) { ++ return 1; ++} ++ ++inline bool LinearScan::requires_adjacent_regs(BasicType type) { ++ return false; ++} ++ ++inline bool LinearScan::is_caller_save(int assigned_reg) { ++ assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers"); ++ if (assigned_reg < pd_first_callee_saved_reg) ++ return true; ++ if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg) ++ return true; ++ if (assigned_reg > pd_last_callee_saved_fpu_reg && assigned_reg < pd_last_fpu_reg) ++ return true; ++ return false; ++} ++ ++inline void LinearScan::pd_add_temps(LIR_Op* op) {} ++ ++// Implementation of LinearScanWalker ++inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) { ++ if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) { ++ assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only"); ++ _first_reg = pd_first_callee_saved_reg; ++ _last_reg = pd_last_callee_saved_reg; ++ return true; ++ } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT || ++ cur->type() == T_ADDRESS || cur->type() == T_METADATA) { ++ _first_reg = pd_first_cpu_reg; ++ _last_reg = pd_last_allocatable_cpu_reg; ++ return true; ++ } ++ return false; ++} ++ ++#endif // CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp +new file mode 100644 +index 00000000000..219b2e3671c +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_LinearScan_loongarch_64.cpp +@@ -0,0 +1,33 @@ ++/* ++ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "c1/c1_Instruction.hpp" ++#include "c1/c1_LinearScan.hpp" ++#include "utilities/bitMap.inline.hpp" ++ ++void LinearScan::allocate_fpu_stack() { ++ // No FPU stack on LoongArch64 ++} +diff --git a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp +new file mode 100644 +index 00000000000..38ff4c58369 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch.hpp +@@ -0,0 +1,112 @@ ++/* ++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP ++ ++using MacroAssembler::build_frame; ++using MacroAssembler::null_check; ++ ++// C1_MacroAssembler contains high-level macros for C1 ++ ++ private: ++ int _rsp_offset; // track rsp changes ++ // initialization ++ void pd_init() { _rsp_offset = 0; } ++ ++ public: ++ void try_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ ++ void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2); ++ void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1, Register t2); ++ ++ // locking ++ // hdr : must be A0, contents destroyed ++ // obj : must point to the object to lock, contents preserved ++ // disp_hdr: must point to the displaced header location, contents preserved ++ // scratch : scratch register, contents destroyed ++ // returns code offset at which to add null check debug information ++ int lock_object (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case); ++ ++ // unlocking ++ // hdr : contents destroyed ++ // obj : must point to the object to lock, contents preserved ++ // disp_hdr: must be A0 & must point to the displaced header location, contents destroyed ++ void unlock_object(Register swap, Register obj, Register lock, Label& slow_case); ++ ++ void initialize_object( ++ Register obj, // result: pointer to object after successful allocation ++ Register klass, // object klass ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB ++ ); ++ ++ // allocation of fixed-size objects ++ // (can also be used to allocate fixed-size arrays, by setting ++ // hdr_size correctly and storing the array length afterwards) ++ // obj : will contain pointer to allocated object ++ // t1, t2 : scratch registers - contents destroyed ++ // header_size: size of object header in words ++ // object_size: total size of object in words ++ // slow_case : exit to slow case implementation if fast allocation fails ++ void allocate_object(Register obj, Register t1, Register t2, int header_size, ++ int object_size, Register klass, Label& slow_case); ++ ++ enum { ++ max_array_allocation_length = 0x00FFFFFF ++ }; ++ ++ // allocation of arrays ++ // obj : will contain pointer to allocated object ++ // len : array length in number of elements ++ // t : scratch register - contents destroyed ++ // header_size: size of object header in words ++ // f : element scale factor ++ // slow_case : exit to slow case implementation if fast allocation fails ++ void allocate_array(Register obj, Register len, Register t, Register t2, int header_size, ++ int f, Register klass, Label& slow_case); ++ ++ int rsp_offset() const { return _rsp_offset; } ++ void set_rsp_offset(int n) { _rsp_offset = n; } ++ ++ void invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2, bool inv_a3, ++ bool inv_a4, bool inv_a5) PRODUCT_RETURN; ++ ++ // This platform only uses signal-based null checks. The Label is not needed. ++ void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); } ++ ++ void load_parameter(int offset_in_words, Register reg); ++ ++#endif // CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp +new file mode 100644 +index 00000000000..56c6281d415 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp +@@ -0,0 +1,365 @@ ++/* ++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "gc/shared/tlab_globals.hpp" ++#include "interpreter/interpreter.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/markWord.hpp" ++#include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/os.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++ ++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { ++ const int aligned_mask = BytesPerWord -1; ++ const int hdr_offset = oopDesc::mark_offset_in_bytes(); ++ assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); ++ int null_check_offset = -1; ++ Label done; ++ ++ verify_oop(obj); ++ ++ // save object being locked into the BasicObjectLock ++ st_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ ++ null_check_offset = offset(); ++ ++ if (DiagnoseSyncOnValueBasedClasses != 0) { ++ load_klass(hdr, obj); ++ ld_w(hdr, Address(hdr, Klass::access_flags_offset())); ++ li(SCR1, JVM_ACC_IS_VALUE_BASED_CLASS); ++ andr(SCR1, hdr, SCR1); ++ bnez(SCR1, slow_case); ++ } ++ ++ if (UseBiasedLocking) { ++ assert(scratch != noreg, "should have scratch register at this point"); ++ biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case); ++ } ++ ++ // Load object header ++ ld_ptr(hdr, Address(obj, hdr_offset)); ++ // and mark it as unlocked ++ ori(hdr, hdr, markWord::unlocked_value); ++ // save unlocked object header into the displaced header location on the stack ++ st_ptr(hdr, Address(disp_hdr, 0)); ++ // test if object header is still the same (i.e. unlocked), and if so, store the ++ // displaced header address in the object header - if it is not the same, get the ++ // object header instead ++ lea(SCR2, Address(obj, hdr_offset)); ++ cmpxchg(Address(SCR2, 0), hdr, disp_hdr, SCR1, true, false, done); ++ // if the object header was the same, we're done ++ // if the object header was not the same, it is now in the hdr register ++ // => test if it is a stack pointer into the same stack (recursive locking), i.e.: ++ // ++ // 1) (hdr & aligned_mask) == 0 ++ // 2) sp <= hdr ++ // 3) hdr <= sp + page_size ++ // ++ // these 3 tests can be done by evaluating the following expression: ++ // ++ // (hdr - sp) & (aligned_mask - page_size) ++ // ++ // assuming both the stack pointer and page_size have their least ++ // significant 2 bits cleared and page_size is a power of 2 ++ sub_d(hdr, hdr, SP); ++ li(SCR1, aligned_mask - os::vm_page_size()); ++ andr(hdr, hdr, SCR1); ++ // for recursive locking, the result is zero => save it in the displaced header ++ // location (NULL in the displaced hdr location indicates recursive locking) ++ st_ptr(hdr, Address(disp_hdr, 0)); ++ // otherwise we don't care about the result and handle locking via runtime call ++ bnez(hdr, slow_case); ++ // done ++ bind(done); ++ if (PrintBiasedLockingStatistics) { ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, SCR1, SCR2); ++ } ++ return null_check_offset; ++} ++ ++void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { ++ const int aligned_mask = BytesPerWord -1; ++ const int hdr_offset = oopDesc::mark_offset_in_bytes(); ++ assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); ++ Label done; ++ ++ if (UseBiasedLocking) { ++ // load object ++ ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ biased_locking_exit(obj, hdr, done); ++ } ++ ++ // load displaced header ++ ld_ptr(hdr, Address(disp_hdr, 0)); ++ // if the loaded hdr is NULL we had recursive locking ++ // if we had recursive locking, we are done ++ beqz(hdr, done); ++ if (!UseBiasedLocking) { ++ // load object ++ ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ } ++ verify_oop(obj); ++ // test if object header is pointing to the displaced header, and if so, restore ++ // the displaced header in the object - if the object header is not pointing to ++ // the displaced header, get the object header instead ++ // if the object header was not pointing to the displaced header, ++ // we do unlocking via runtime call ++ if (hdr_offset) { ++ lea(SCR1, Address(obj, hdr_offset)); ++ cmpxchg(Address(SCR1, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case); ++ } else { ++ cmpxchg(Address(obj, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case); ++ } ++ // done ++ bind(done); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, ++ int con_size_in_bytes, Register t1, Register t2, ++ Label& slow_case) { ++ if (UseTLAB) { ++ tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); ++ } else { ++ eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); ++ } ++} ++ ++void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, ++ Register t1, Register t2) { ++ assert_different_registers(obj, klass, len); ++ if (UseBiasedLocking && !len->is_valid()) { ++ assert_different_registers(obj, klass, len, t1, t2); ++ ld_ptr(t1, Address(klass, Klass::prototype_header_offset())); ++ } else { ++ // This assumes that all prototype bits fit in an int32_t ++ li(t1, (int32_t)(intptr_t)markWord::prototype().value()); ++ } ++ st_ptr(t1, Address(obj, oopDesc::mark_offset_in_bytes())); ++ ++ if (UseCompressedClassPointers) { // Take care not to kill klass ++ encode_klass_not_null(t1, klass); ++ st_w(t1, Address(obj, oopDesc::klass_offset_in_bytes())); ++ } else { ++ st_ptr(klass, Address(obj, oopDesc::klass_offset_in_bytes())); ++ } ++ ++ if (len->is_valid()) { ++ st_w(len, Address(obj, arrayOopDesc::length_offset_in_bytes())); ++ } else if (UseCompressedClassPointers) { ++ store_klass_gap(obj, R0); ++ } ++} ++ ++// preserves obj, destroys len_in_bytes ++// ++// Scratch registers: t1 = T0, t2 = T1 ++// ++void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, ++ int hdr_size_in_bytes, Register t1, Register t2) { ++ assert(hdr_size_in_bytes >= 0, "header size must be positive or 0"); ++ assert(t1 == T0 && t2 == T1, "must be"); ++ Label done; ++ ++ // len_in_bytes is positive and ptr sized ++ addi_d(len_in_bytes, len_in_bytes, -hdr_size_in_bytes); ++ beqz(len_in_bytes, done); ++ ++ // zero_words() takes ptr in t1 and count in bytes in t2 ++ lea(t1, Address(obj, hdr_size_in_bytes)); ++ addi_d(t2, len_in_bytes, -BytesPerWord); ++ ++ Label loop; ++ bind(loop); ++ stx_d(R0, t1, t2); ++ addi_d(t2, t2, -BytesPerWord); ++ bge(t2, R0, loop); ++ ++ bind(done); ++} ++ ++void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size, ++ int object_size, Register klass, Label& slow_case) { ++ assert_different_registers(obj, t1, t2); ++ assert(header_size >= 0 && object_size >= header_size, "illegal sizes"); ++ ++ try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case); ++ ++ initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB); ++} ++ ++// Scratch registers: t1 = T0, t2 = T1 ++void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, ++ int con_size_in_bytes, Register t1, Register t2, ++ bool is_tlab_allocated) { ++ assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, ++ "con_size_in_bytes is not multiple of alignment"); ++ const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize; ++ ++ initialize_header(obj, klass, noreg, t1, t2); ++ ++ if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) { ++ // clear rest of allocated space ++ const Register index = t2; ++ if (var_size_in_bytes != noreg) { ++ move(index, var_size_in_bytes); ++ initialize_body(obj, index, hdr_size_in_bytes, t1, t2); ++ } else if (con_size_in_bytes > hdr_size_in_bytes) { ++ con_size_in_bytes -= hdr_size_in_bytes; ++ lea(t1, Address(obj, hdr_size_in_bytes)); ++ Label loop; ++ li(SCR1, con_size_in_bytes - BytesPerWord); ++ bind(loop); ++ stx_d(R0, t1, SCR1); ++ addi_d(SCR1, SCR1, -BytesPerWord); ++ bge(SCR1, R0, loop); ++ } ++ } ++ ++ membar(StoreStore); ++ ++ if (CURRENT_ENV->dtrace_alloc_probes()) { ++ assert(obj == A0, "must be"); ++ call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type); ++ } ++ ++ verify_oop(obj); ++} ++ ++void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, ++ int header_size, int f, Register klass, Label& slow_case) { ++ assert_different_registers(obj, len, t1, t2, klass); ++ ++ // determine alignment mask ++ assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work"); ++ ++ // check for negative or excessive length ++ li(SCR1, (int32_t)max_array_allocation_length); ++ bge_far(len, SCR1, slow_case, false); ++ ++ const Register arr_size = t2; // okay to be the same ++ // align object end ++ li(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask); ++ slli_w(SCR1, len, f); ++ add_d(arr_size, arr_size, SCR1); ++ bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0); ++ ++ try_allocate(obj, arr_size, 0, t1, t2, slow_case); ++ ++ initialize_header(obj, klass, len, t1, t2); ++ ++ // clear rest of allocated space ++ initialize_body(obj, arr_size, header_size * BytesPerWord, t1, t2); ++ ++ membar(StoreStore); ++ ++ if (CURRENT_ENV->dtrace_alloc_probes()) { ++ assert(obj == A0, "must be"); ++ call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type); ++ } ++ ++ verify_oop(obj); ++} ++ ++void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { ++ assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); ++ // Make sure there is enough stack space for this method's activation. ++ // Note that we do this before creating a frame. ++ generate_stack_overflow_check(bang_size_in_bytes); ++ MacroAssembler::build_frame(framesize); ++ ++ // Insert nmethod entry barrier into frame. ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->nmethod_entry_barrier(this); ++} ++ ++void C1_MacroAssembler::remove_frame(int framesize) { ++ MacroAssembler::remove_frame(framesize); ++} ++ ++void C1_MacroAssembler::verified_entry(bool breakAtEntry) { ++ // If we have to make this method not-entrant we'll overwrite its ++ // first instruction with a jump. For this action to be legal we ++ // must ensure that this first instruction is a b, bl, nop, break. ++ // Make it a NOP. ++ nop(); ++} ++ ++void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { ++ // FP + -2: link ++ // + -1: return address ++ // + 0: argument with offset 0 ++ // + 1: argument with offset 1 ++ // + 2: ... ++ ++ ld_ptr(reg, Address(FP, offset_in_words * BytesPerWord)); ++} ++ ++#ifndef PRODUCT ++void C1_MacroAssembler::verify_stack_oop(int stack_offset) { ++ if (!VerifyOops) return; ++ verify_oop_addr(Address(SP, stack_offset), "oop"); ++} ++ ++void C1_MacroAssembler::verify_not_null_oop(Register r) { ++ if (!VerifyOops) return; ++ Label not_null; ++ bnez(r, not_null); ++ stop("non-null oop required"); ++ bind(not_null); ++ verify_oop(r); ++} ++ ++void C1_MacroAssembler::invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2, ++ bool inv_a3, bool inv_a4, bool inv_a5) { ++#ifdef ASSERT ++ static int nn; ++ if (inv_a0) li(A0, 0xDEAD); ++ if (inv_s0) li(S0, 0xDEAD); ++ if (inv_a2) li(A2, nn++); ++ if (inv_a3) li(A3, 0xDEAD); ++ if (inv_a4) li(A4, 0xDEAD); ++ if (inv_a5) li(A5, 0xDEAD); ++#endif ++} ++#endif // ifndef PRODUCT +diff --git a/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp +new file mode 100644 +index 00000000000..87da18e294a +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_Runtime1_loongarch_64.cpp +@@ -0,0 +1,1148 @@ ++/* ++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "c1/c1_CodeStubs.hpp" ++#include "c1/c1_Defs.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "compiler/disassembler.hpp" ++#include "compiler/oopMap.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "gc/shared/tlab_globals.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/universe.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "register_loongarch.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/vframe.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/powerOfTwo.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T5 RT5 ++#define T6 RT6 ++#define T8 RT8 ++ ++// Implementation of StubAssembler ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) { ++ // setup registers ++ assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, ++ "registers must be different"); ++ assert(oop_result1 != TREG && metadata_result != TREG, "registers must be different"); ++ assert(args_size >= 0, "illegal args_size"); ++ bool align_stack = false; ++ ++ move(A0, TREG); ++ set_num_rt_args(0); // Nothing on stack ++ ++ Label retaddr; ++ set_last_Java_frame(SP, FP, retaddr); ++ ++ // do the call ++ call(entry, relocInfo::runtime_call_type); ++ bind(retaddr); ++ int call_offset = offset(); ++ // verify callee-saved register ++#ifdef ASSERT ++ { Label L; ++ get_thread(SCR1); ++ beq(TREG, SCR1, L); ++ stop("StubAssembler::call_RT: TREG not callee saved?"); ++ bind(L); ++ } ++#endif ++ reset_last_Java_frame(true); ++ ++ // check for pending exceptions ++ { Label L; ++ // check for pending exceptions (java_thread is set upon return) ++ ld_ptr(SCR1, Address(TREG, in_bytes(Thread::pending_exception_offset()))); ++ beqz(SCR1, L); ++ // exception pending => remove activation and forward to exception handler ++ // make sure that the vm_results are cleared ++ if (oop_result1->is_valid()) { ++ st_ptr(R0, Address(TREG, JavaThread::vm_result_offset())); ++ } ++ if (metadata_result->is_valid()) { ++ st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset())); ++ } ++ if (frame_size() == no_frame_size) { ++ leave(); ++ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ } else if (_stub_id == Runtime1::forward_exception_id) { ++ should_not_reach_here(); ++ } else { ++ jmp(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type); ++ } ++ bind(L); ++ } ++ // get oop results if there are any and reset the values in the thread ++ if (oop_result1->is_valid()) { ++ get_vm_result(oop_result1, TREG); ++ } ++ if (metadata_result->is_valid()) { ++ get_vm_result_2(metadata_result, TREG); ++ } ++ return call_offset; ++} ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, ++ address entry, Register arg1) { ++ move(A1, arg1); ++ return call_RT(oop_result1, metadata_result, entry, 1); ++} ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, ++ address entry, Register arg1, Register arg2) { ++ if (A1 == arg2) { ++ if (A2 == arg1) { ++ move(SCR1, arg1); ++ move(arg1, arg2); ++ move(arg2, SCR1); ++ } else { ++ move(A2, arg2); ++ move(A1, arg1); ++ } ++ } else { ++ move(A1, arg1); ++ move(A2, arg2); ++ } ++ return call_RT(oop_result1, metadata_result, entry, 2); ++} ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, ++ address entry, Register arg1, Register arg2, Register arg3) { ++ // if there is any conflict use the stack ++ if (arg1 == A2 || arg1 == A3 || ++ arg2 == A1 || arg2 == A3 || ++ arg3 == A1 || arg3 == A2) { ++ addi_d(SP, SP, -4 * wordSize); ++ st_ptr(arg1, Address(SP, 0 * wordSize)); ++ st_ptr(arg2, Address(SP, 1 * wordSize)); ++ st_ptr(arg3, Address(SP, 2 * wordSize)); ++ ld_ptr(arg1, Address(SP, 0 * wordSize)); ++ ld_ptr(arg2, Address(SP, 1 * wordSize)); ++ ld_ptr(arg3, Address(SP, 2 * wordSize)); ++ addi_d(SP, SP, 4 * wordSize); ++ } else { ++ move(A1, arg1); ++ move(A2, arg2); ++ move(A3, arg3); ++ } ++ return call_RT(oop_result1, metadata_result, entry, 3); ++} ++ ++enum return_state_t { ++ does_not_return, requires_return ++}; ++ ++// Implementation of StubFrame ++ ++class StubFrame: public StackObj { ++ private: ++ StubAssembler* _sasm; ++ bool _return_state; ++ ++ public: ++ StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, ++ return_state_t return_state=requires_return); ++ void load_argument(int offset_in_words, Register reg); ++ ++ ~StubFrame(); ++};; ++ ++void StubAssembler::prologue(const char* name, bool must_gc_arguments) { ++ set_info(name, must_gc_arguments); ++ enter(); ++} ++ ++void StubAssembler::epilogue() { ++ leave(); ++ jr(RA); ++} ++ ++#define __ _sasm-> ++ ++StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, ++ return_state_t return_state) { ++ _sasm = sasm; ++ _return_state = return_state; ++ __ prologue(name, must_gc_arguments); ++} ++ ++// load parameters that were stored with LIR_Assembler::store_parameter ++// Note: offsets for store_parameter and load_argument must match ++void StubFrame::load_argument(int offset_in_words, Register reg) { ++ __ load_parameter(offset_in_words, reg); ++} ++ ++StubFrame::~StubFrame() { ++ if (_return_state == requires_return) { ++ __ epilogue(); ++ } else { ++ __ should_not_reach_here(); ++ } ++} ++ ++#undef __ ++ ++// Implementation of Runtime1 ++ ++#define __ sasm-> ++ ++const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2; ++ ++// Stack layout for saving/restoring all the registers needed during a runtime ++// call (this includes deoptimization) ++// Note: note that users of this frame may well have arguments to some runtime ++// while these values are on the stack. These positions neglect those arguments ++// but the code in save_live_registers will take the argument count into ++// account. ++// ++ ++enum reg_save_layout { ++ reg_save_frame_size = 32 /* float */ + 30 /* integer, except zr, tp */ ++}; ++ ++// Save off registers which might be killed by calls into the runtime. ++// Tries to smart of about FP registers. In particular we separate ++// saving and describing the FPU registers for deoptimization since we ++// have to save the FPU registers twice if we describe them. The ++// deopt blob is the only thing which needs to describe FPU registers. ++// In all other cases it should be sufficient to simply save their ++// current value. ++ ++static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs]; ++static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs]; ++static int reg_save_size_in_words; ++static int frame_size_in_bytes = -1; ++ ++static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) { ++ int frame_size_in_bytes = reg_save_frame_size * BytesPerWord; ++ sasm->set_frame_size(frame_size_in_bytes / BytesPerWord); ++ int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size; ++ OopMap* oop_map = new OopMap(frame_size_in_slots, 0); ++ ++ for (int i = A0->encoding(); i <= T8->encoding(); i++) { ++ Register r = as_Register(i); ++ if (i != SCR1->encoding() && i != SCR2->encoding()) { ++ int sp_offset = cpu_reg_save_offsets[i]; ++ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); ++ } ++ } ++ ++ if (save_fpu_registers) { ++ for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { ++ FloatRegister r = as_FloatRegister(i); ++ int sp_offset = fpu_reg_save_offsets[i]; ++ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); ++ } ++ } ++ ++ return oop_map; ++} ++ ++static OopMap* save_live_registers(StubAssembler* sasm, ++ bool save_fpu_registers = true) { ++ __ block_comment("save_live_registers"); ++ ++ // integer registers except zr & ra & tp & sp ++ __ addi_d(SP, SP, -(32 - 4 + 32) * wordSize); ++ ++ for (int i = 4; i < 32; i++) ++ __ st_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize)); ++ ++ if (save_fpu_registers) { ++ for (int i = 0; i < 32; i++) ++ __ fst_d(as_FloatRegister(i), Address(SP, i * wordSize)); ++ } ++ ++ return generate_oop_map(sasm, save_fpu_registers); ++} ++ ++static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) { ++ if (restore_fpu_registers) { ++ for (int i = 0; i < 32; i ++) ++ __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize)); ++ } ++ ++ for (int i = 4; i < 32; i++) ++ __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize)); ++ ++ __ addi_d(SP, SP, (32 - 4 + 32) * wordSize); ++} ++ ++static void restore_live_registers_except_a0(StubAssembler* sasm, bool restore_fpu_registers = true) { ++ if (restore_fpu_registers) { ++ for (int i = 0; i < 32; i ++) ++ __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize)); ++ } ++ ++ for (int i = 5; i < 32; i++) ++ __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize)); ++ ++ __ addi_d(SP, SP, (32 - 4 + 32) * wordSize); ++} ++ ++void Runtime1::initialize_pd() { ++ int sp_offset = 0; ++ int i; ++ ++ // all float registers are saved explicitly ++ assert(FrameMap::nof_fpu_regs == 32, "double registers not handled here"); ++ for (i = 0; i < FrameMap::nof_fpu_regs; i++) { ++ fpu_reg_save_offsets[i] = sp_offset; ++ sp_offset += 2; // SP offsets are in halfwords ++ } ++ ++ for (i = 4; i < FrameMap::nof_cpu_regs; i++) { ++ Register r = as_Register(i); ++ cpu_reg_save_offsets[i] = sp_offset; ++ sp_offset += 2; // SP offsets are in halfwords ++ } ++} ++ ++// target: the entry point of the method that creates and posts the exception oop ++// has_argument: true if the exception needs arguments (passed in SCR1 and SCR2) ++ ++OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, ++ bool has_argument) { ++ // make a frame and preserve the caller's caller-save registers ++ OopMap* oop_map = save_live_registers(sasm); ++ int call_offset; ++ if (!has_argument) { ++ call_offset = __ call_RT(noreg, noreg, target); ++ } else { ++ __ move(A1, SCR1); ++ __ move(A2, SCR2); ++ call_offset = __ call_RT(noreg, noreg, target); ++ } ++ OopMapSet* oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ return oop_maps; ++} ++ ++OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { ++ __ block_comment("generate_handle_exception"); ++ ++ // incoming parameters ++ const Register exception_oop = A0; ++ const Register exception_pc = A1; ++ // other registers used in this stub ++ ++ // Save registers, if required. ++ OopMapSet* oop_maps = new OopMapSet(); ++ OopMap* oop_map = NULL; ++ switch (id) { ++ case forward_exception_id: ++ // We're handling an exception in the context of a compiled frame. ++ // The registers have been saved in the standard places. Perform ++ // an exception lookup in the caller and dispatch to the handler ++ // if found. Otherwise unwind and dispatch to the callers ++ // exception handler. ++ oop_map = generate_oop_map(sasm, 1 /*thread*/); ++ ++ // load and clear pending exception oop into A0 ++ __ ld_ptr(exception_oop, Address(TREG, Thread::pending_exception_offset())); ++ __ st_ptr(R0, Address(TREG, Thread::pending_exception_offset())); ++ ++ // load issuing PC (the return address for this stub) into A1 ++ __ ld_ptr(exception_pc, Address(FP, frame::return_addr_offset * BytesPerWord)); ++ ++ // make sure that the vm_results are cleared (may be unnecessary) ++ __ st_ptr(R0, Address(TREG, JavaThread::vm_result_offset())); ++ __ st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset())); ++ break; ++ case handle_exception_nofpu_id: ++ case handle_exception_id: ++ // At this point all registers MAY be live. ++ oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id); ++ break; ++ case handle_exception_from_callee_id: { ++ // At this point all registers except exception oop (A0) and ++ // exception pc (RA) are dead. ++ const int frame_size = 2 /*fp, return address*/; ++ oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0); ++ sasm->set_frame_size(frame_size); ++ break; ++ } ++ default: ShouldNotReachHere(); ++ } ++ ++ // verify that only A0 and A1 are valid at this time ++ __ invalidate_registers(false, true, true, true, true, true); ++ // verify that A0 contains a valid exception ++ __ verify_not_null_oop(exception_oop); ++ ++#ifdef ASSERT ++ // check that fields in JavaThread for exception oop and issuing pc are ++ // empty before writing to them ++ Label oop_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset())); ++ __ beqz(SCR1, oop_empty); ++ __ stop("exception oop already set"); ++ __ bind(oop_empty); ++ ++ Label pc_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset())); ++ __ beqz(SCR1, pc_empty); ++ __ stop("exception pc already set"); ++ __ bind(pc_empty); ++#endif ++ ++ // save exception oop and issuing pc into JavaThread ++ // (exception handler will load it from here) ++ __ st_ptr(exception_oop, Address(TREG, JavaThread::exception_oop_offset())); ++ __ st_ptr(exception_pc, Address(TREG, JavaThread::exception_pc_offset())); ++ ++ // patch throwing pc into return address (has bci & oop map) ++ __ st_ptr(exception_pc, Address(FP, frame::return_addr_offset * BytesPerWord)); ++ ++ // compute the exception handler. ++ // the exception oop and the throwing pc are read from the fields in JavaThread ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc)); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ ++ // A0: handler address ++ // will be the deopt blob if nmethod was deoptimized while we looked up ++ // handler regardless of whether handler existed in the nmethod. ++ ++ // only A0 is valid at this time, all other registers have been destroyed by the runtime call ++ __ invalidate_registers(false, true, true, true, true, true); ++ ++ // patch the return address, this stub will directly return to the exception handler ++ __ st_ptr(A0, Address(FP, frame::return_addr_offset * BytesPerWord)); ++ ++ switch (id) { ++ case forward_exception_id: ++ case handle_exception_nofpu_id: ++ case handle_exception_id: ++ // Restore the registers that were saved at the beginning. ++ restore_live_registers(sasm, id != handle_exception_nofpu_id); ++ break; ++ case handle_exception_from_callee_id: ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ return oop_maps; ++} ++ ++void Runtime1::generate_unwind_exception(StubAssembler *sasm) { ++ // incoming parameters ++ const Register exception_oop = A0; ++ // callee-saved copy of exception_oop during runtime call ++ const Register exception_oop_callee_saved = S0; ++ // other registers used in this stub ++ const Register exception_pc = A1; ++ const Register handler_addr = A3; ++ ++ // verify that only A0, is valid at this time ++ __ invalidate_registers(false, true, true, true, true, true); ++ ++#ifdef ASSERT ++ // check that fields in JavaThread for exception oop and issuing pc are empty ++ Label oop_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset())); ++ __ beqz(SCR1, oop_empty); ++ __ stop("exception oop must be empty"); ++ __ bind(oop_empty); ++ ++ Label pc_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset())); ++ __ beqz(SCR1, pc_empty); ++ __ stop("exception pc must be empty"); ++ __ bind(pc_empty); ++#endif ++ ++ // Save our return address because ++ // exception_handler_for_return_address will destroy it. We also ++ // save exception_oop ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(RA, Address(SP, 0 * wordSize)); ++ __ st_ptr(exception_oop, Address(SP, 1 * wordSize)); ++ ++ // search the exception handler address of the caller (using the return address) ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), TREG, RA); ++ // V0: exception handler address of the caller ++ ++ // Only V0 is valid at this time; all other registers have been ++ // destroyed by the call. ++ __ invalidate_registers(false, true, true, true, false, true); ++ ++ // move result of call into correct register ++ __ move(handler_addr, A0); ++ ++ // get throwing pc (= return address). ++ // RA has been destroyed by the call ++ __ ld_ptr(RA, Address(SP, 0 * wordSize)); ++ __ ld_ptr(exception_oop, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ __ move(A1, RA); ++ ++ __ verify_not_null_oop(exception_oop); ++ ++ // continue at exception handler (return address removed) ++ // note: do *not* remove arguments when unwinding the ++ // activation since the caller assumes having ++ // all arguments on the stack when entering the ++ // runtime to determine the exception handler ++ // (GC happens at call site with arguments!) ++ // A0: exception oop ++ // A1: throwing pc ++ // A3: exception handler ++ __ jr(handler_addr); ++} ++ ++OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { ++ // use the maximum number of runtime-arguments here because it is difficult to ++ // distinguish each RT-Call. ++ // Note: This number affects also the RT-Call in generate_handle_exception because ++ // the oop-map is shared for all calls. ++ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); ++ assert(deopt_blob != NULL, "deoptimization blob must have been created"); ++ ++ OopMap* oop_map = save_live_registers(sasm); ++ ++ __ move(A0, TREG); ++ Label retaddr; ++ __ set_last_Java_frame(SP, FP, retaddr); ++ // do the call ++ __ call(target, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ OopMapSet* oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(__ offset(), oop_map); ++ // verify callee-saved register ++#ifdef ASSERT ++ { Label L; ++ __ get_thread(SCR1); ++ __ beq(TREG, SCR1, L); ++ __ stop("StubAssembler::call_RT: rthread not callee saved?"); ++ __ bind(L); ++ } ++#endif ++ ++ __ reset_last_Java_frame(true); ++ ++#ifdef ASSERT ++ // check that fields in JavaThread for exception oop and issuing pc are empty ++ Label oop_empty; ++ __ ld_ptr(SCR1, Address(TREG, Thread::pending_exception_offset())); ++ __ beqz(SCR1, oop_empty); ++ __ stop("exception oop must be empty"); ++ __ bind(oop_empty); ++ ++ Label pc_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset())); ++ __ beqz(SCR1, pc_empty); ++ __ stop("exception pc must be empty"); ++ __ bind(pc_empty); ++#endif ++ ++ // Runtime will return true if the nmethod has been deoptimized, this is the ++ // expected scenario and anything else is an error. Note that we maintain a ++ // check on the result purely as a defensive measure. ++ Label no_deopt; ++ __ beqz(A0, no_deopt); // Have we deoptimized? ++ ++ // Perform a re-execute. The proper return address is already on the stack, ++ // we just need to restore registers, pop all of our frame but the return ++ // address and jump to the deopt blob. ++ restore_live_registers(sasm); ++ __ leave(); ++ __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type); ++ ++ __ bind(no_deopt); ++ __ stop("deopt not performed"); ++ ++ return oop_maps; ++} ++ ++OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { ++ // for better readability ++ const bool must_gc_arguments = true; ++ const bool dont_gc_arguments = false; ++ ++ // default value; overwritten for some optimized stubs that are called ++ // from methods that do not use the fpu ++ bool save_fpu_registers = true; ++ ++ // stub code & info for the different stubs ++ OopMapSet* oop_maps = NULL; ++ OopMap* oop_map = NULL; ++ switch (id) { ++ { ++ case forward_exception_id: ++ { ++ oop_maps = generate_handle_exception(id, sasm); ++ __ leave(); ++ __ jr(RA); ++ } ++ break; ++ ++ case throw_div0_exception_id: ++ { ++ StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); ++ } ++ break; ++ ++ case throw_null_pointer_exception_id: ++ { ++ StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); ++ } ++ break; ++ ++ case new_instance_id: ++ case fast_new_instance_id: ++ case fast_new_instance_init_check_id: ++ { ++ Register klass = A3; // Incoming ++ Register obj = A0; // Result ++ ++ if (id == new_instance_id) { ++ __ set_info("new_instance", dont_gc_arguments); ++ } else if (id == fast_new_instance_id) { ++ __ set_info("fast new_instance", dont_gc_arguments); ++ } else { ++ assert(id == fast_new_instance_init_check_id, "bad StubID"); ++ __ set_info("fast new_instance init check", dont_gc_arguments); ++ } ++ ++ // If TLAB is disabled, see if there is support for inlining contiguous ++ // allocations. ++ // Otherwise, just go to the slow path. ++ if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) && ++ !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { ++ Label slow_path; ++ Register obj_size = S0; ++ Register t1 = T0; ++ Register t2 = T1; ++ assert_different_registers(klass, obj, obj_size, t1, t2); ++ ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(S0, Address(SP, 0)); ++ ++ if (id == fast_new_instance_init_check_id) { ++ // make sure the klass is initialized ++ __ ld_bu(SCR1, Address(klass, InstanceKlass::init_state_offset())); ++ __ li(SCR2, InstanceKlass::fully_initialized); ++ __ bne_far(SCR1, SCR2, slow_path); ++ } ++ ++#ifdef ASSERT ++ // assert object can be fast path allocated ++ { ++ Label ok, not_ok; ++ __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset())); ++ __ bge(R0, obj_size, not_ok); // make sure it's an instance (LH > 0) ++ __ andi(SCR1, obj_size, Klass::_lh_instance_slow_path_bit); ++ __ beqz(SCR1, ok); ++ __ bind(not_ok); ++ __ stop("assert(can be fast path allocated)"); ++ __ should_not_reach_here(); ++ __ bind(ok); ++ } ++#endif // ASSERT ++ ++ // get the instance size (size is postive so movl is fine for 64bit) ++ __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset())); ++ ++ __ eden_allocate(obj, obj_size, 0, t1, slow_path); ++ ++ __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false); ++ __ verify_oop(obj); ++ __ ld_ptr(S0, Address(SP, 0)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ __ jr(RA); ++ ++ __ bind(slow_path); ++ __ ld_ptr(S0, Address(SP, 0)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ } ++ ++ __ enter(); ++ OopMap* map = save_live_registers(sasm); ++ int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass); ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers_except_a0(sasm); ++ __ verify_oop(obj); ++ __ leave(); ++ __ jr(RA); ++ ++ // A0,: new instance ++ } ++ ++ break; ++ ++ case counter_overflow_id: ++ { ++ Register bci = A0, method = A1; ++ __ enter(); ++ OopMap* map = save_live_registers(sasm); ++ // Retrieve bci ++ __ ld_w(bci, Address(FP, 0 * BytesPerWord)); ++ // And a pointer to the Method* ++ __ ld_d(method, Address(FP, 1 * BytesPerWord)); ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method); ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm); ++ __ leave(); ++ __ jr(RA); ++ } ++ break; ++ ++ case new_type_array_id: ++ case new_object_array_id: ++ { ++ Register length = S0; // Incoming ++ Register klass = A3; // Incoming ++ Register obj = A0; // Result ++ ++ if (id == new_type_array_id) { ++ __ set_info("new_type_array", dont_gc_arguments); ++ } else { ++ __ set_info("new_object_array", dont_gc_arguments); ++ } ++ ++#ifdef ASSERT ++ // assert object type is really an array of the proper kind ++ { ++ Label ok; ++ Register t0 = obj; ++ __ ld_w(t0, Address(klass, Klass::layout_helper_offset())); ++ __ srai_w(t0, t0, Klass::_lh_array_tag_shift); ++ int tag = ((id == new_type_array_id) ++ ? Klass::_lh_array_tag_type_value ++ : Klass::_lh_array_tag_obj_value); ++ __ li(SCR1, tag); ++ __ beq(t0, SCR1, ok); ++ __ stop("assert(is an array klass)"); ++ __ should_not_reach_here(); ++ __ bind(ok); ++ } ++#endif // ASSERT ++ ++ // If TLAB is disabled, see if there is support for inlining contiguous ++ // allocations. ++ // Otherwise, just go to the slow path. ++ if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { ++ Register arr_size = A5; ++ Register t1 = T0; ++ Register t2 = T1; ++ Label slow_path; ++ assert_different_registers(length, klass, obj, arr_size, t1, t2); ++ ++ // check that array length is small enough for fast path. ++ __ li(SCR1, C1_MacroAssembler::max_array_allocation_length); ++ __ blt_far(SCR1, length, slow_path, false); ++ ++ // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) ++ // since size is positive ldrw does right thing on 64bit ++ __ ld_w(t1, Address(klass, Klass::layout_helper_offset())); ++ // since size is positive movw does right thing on 64bit ++ __ move(arr_size, length); ++ __ sll_w(arr_size, length, t1); ++ __ bstrpick_d(t1, t1, Klass::_lh_header_size_shift + ++ exact_log2(Klass::_lh_header_size_mask + 1) - 1, ++ Klass::_lh_header_size_shift); ++ __ add_d(arr_size, arr_size, t1); ++ __ addi_d(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up ++ __ bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0); ++ ++ __ eden_allocate(obj, arr_size, 0, t1, slow_path); // preserves arr_size ++ ++ __ initialize_header(obj, klass, length, t1, t2); ++ __ ld_bu(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte))); ++ assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); ++ assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); ++ __ andi(t1, t1, Klass::_lh_header_size_mask); ++ __ sub_d(arr_size, arr_size, t1); // body length ++ __ add_d(t1, t1, obj); // body start ++ __ initialize_body(t1, arr_size, 0, t1, t2); ++ __ membar(Assembler::StoreStore); ++ __ verify_oop(obj); ++ ++ __ jr(RA); ++ ++ __ bind(slow_path); ++ } ++ ++ __ enter(); ++ OopMap* map = save_live_registers(sasm); ++ int call_offset; ++ if (id == new_type_array_id) { ++ call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length); ++ } else { ++ call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length); ++ } ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers_except_a0(sasm); ++ ++ __ verify_oop(obj); ++ __ leave(); ++ __ jr(RA); ++ ++ // A0: new array ++ } ++ break; ++ ++ case new_multi_array_id: ++ { ++ StubFrame f(sasm, "new_multi_array", dont_gc_arguments); ++ // A0,: klass ++ // S0,: rank ++ // A2: address of 1st dimension ++ OopMap* map = save_live_registers(sasm); ++ __ move(A1, A0); ++ __ move(A3, A2); ++ __ move(A2, S0); ++ int call_offset = __ call_RT(A0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), A1, A2, A3); ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers_except_a0(sasm); ++ ++ // A0,: new multi array ++ __ verify_oop(A0); ++ } ++ break; ++ ++ case register_finalizer_id: ++ { ++ __ set_info("register_finalizer", dont_gc_arguments); ++ ++ // This is called via call_runtime so the arguments ++ // will be place in C abi locations ++ ++ __ verify_oop(A0); ++ ++ // load the klass and check the has finalizer flag ++ Label register_finalizer; ++ Register t = A5; ++ __ load_klass(t, A0); ++ __ ld_w(t, Address(t, Klass::access_flags_offset())); ++ __ li(SCR1, JVM_ACC_HAS_FINALIZER); ++ __ andr(SCR1, t, SCR1); ++ __ bnez(SCR1, register_finalizer); ++ __ jr(RA); ++ ++ __ bind(register_finalizer); ++ __ enter(); ++ OopMap* oop_map = save_live_registers(sasm); ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), A0); ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ ++ // Now restore all the live registers ++ restore_live_registers(sasm); ++ ++ __ leave(); ++ __ jr(RA); ++ } ++ break; ++ ++ case throw_class_cast_exception_id: ++ { ++ StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); ++ } ++ break; ++ ++ case throw_incompatible_class_change_error_id: ++ { ++ StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); ++ } ++ break; ++ ++ case slow_subtype_check_id: ++ { ++ // Typical calling sequence: ++ // __ push(klass_RInfo); // object klass or other subclass ++ // __ push(sup_k_RInfo); // array element klass or other superclass ++ // __ bl(slow_subtype_check); ++ // Note that the subclass is pushed first, and is therefore deepest. ++ enum layout { ++ a0_off, a0_off_hi, ++ a2_off, a2_off_hi, ++ a4_off, a4_off_hi, ++ a5_off, a5_off_hi, ++ sup_k_off, sup_k_off_hi, ++ klass_off, klass_off_hi, ++ framesize, ++ result_off = sup_k_off ++ }; ++ ++ __ set_info("slow_subtype_check", dont_gc_arguments); ++ __ addi_d(SP, SP, -4 * wordSize); ++ __ st_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size)); ++ __ st_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size)); ++ __ st_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size)); ++ __ st_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size)); ++ ++ // This is called by pushing args and not with C abi ++ __ ld_ptr(A4, Address(SP, klass_off * VMRegImpl::stack_slot_size)); // subclass ++ __ ld_ptr(A0, Address(SP, sup_k_off * VMRegImpl::stack_slot_size)); // superclass ++ ++ Label miss; ++ __ check_klass_subtype_slow_path(A4, A0, A2, A5, NULL, &miss); ++ ++ // fallthrough on success: ++ __ li(SCR1, 1); ++ __ st_ptr(SCR1, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result ++ __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size)); ++ __ addi_d(SP, SP, 4 * wordSize); ++ __ jr(RA); ++ ++ __ bind(miss); ++ __ st_ptr(R0, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result ++ __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size)); ++ __ addi_d(SP, SP, 4 * wordSize); ++ __ jr(RA); ++ } ++ break; ++ ++ case monitorenter_nofpu_id: ++ save_fpu_registers = false; ++ // fall through ++ case monitorenter_id: ++ { ++ StubFrame f(sasm, "monitorenter", dont_gc_arguments); ++ OopMap* map = save_live_registers(sasm, save_fpu_registers); ++ ++ // Called with store_parameter and not C abi ++ ++ f.load_argument(1, A0); // A0,: object ++ f.load_argument(0, A1); // A1,: lock address ++ ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), A0, A1); ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm, save_fpu_registers); ++ } ++ break; ++ ++ case monitorexit_nofpu_id: ++ save_fpu_registers = false; ++ // fall through ++ case monitorexit_id: ++ { ++ StubFrame f(sasm, "monitorexit", dont_gc_arguments); ++ OopMap* map = save_live_registers(sasm, save_fpu_registers); ++ ++ // Called with store_parameter and not C abi ++ ++ f.load_argument(0, A0); // A0,: lock address ++ ++ // note: really a leaf routine but must setup last java sp ++ // => use call_RT for now (speed can be improved by ++ // doing last java sp setup manually) ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), A0); ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm, save_fpu_registers); ++ } ++ break; ++ ++ case deoptimize_id: ++ { ++ StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return); ++ OopMap* oop_map = save_live_registers(sasm); ++ f.load_argument(0, A1); ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), A1); ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ restore_live_registers(sasm); ++ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); ++ assert(deopt_blob != NULL, "deoptimization blob must have been created"); ++ __ leave(); ++ __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type); ++ } ++ break; ++ ++ case throw_range_check_failed_id: ++ { ++ StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); ++ } ++ break; ++ ++ case unwind_exception_id: ++ { ++ __ set_info("unwind_exception", dont_gc_arguments); ++ // note: no stubframe since we are about to leave the current ++ // activation and we are calling a leaf VM function only. ++ generate_unwind_exception(sasm); ++ } ++ break; ++ ++ case access_field_patching_id: ++ { ++ StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); ++ } ++ break; ++ ++ case load_klass_patching_id: ++ { ++ StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); ++ } ++ break; ++ ++ case load_mirror_patching_id: ++ { ++ StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); ++ } ++ break; ++ ++ case load_appendix_patching_id: ++ { ++ StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); ++ } ++ break; ++ ++ case handle_exception_nofpu_id: ++ case handle_exception_id: ++ { ++ StubFrame f(sasm, "handle_exception", dont_gc_arguments); ++ oop_maps = generate_handle_exception(id, sasm); ++ } ++ break; ++ ++ case handle_exception_from_callee_id: ++ { ++ StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments); ++ oop_maps = generate_handle_exception(id, sasm); ++ } ++ break; ++ ++ case throw_index_exception_id: ++ { ++ StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); ++ } ++ break; ++ ++ case throw_array_store_exception_id: ++ { ++ StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return); ++ // tos + 0: link ++ // + 1: return address ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); ++ } ++ break; ++ ++ case predicate_failed_trap_id: ++ { ++ StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return); ++ ++ OopMap* map = save_live_registers(sasm); ++ ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap)); ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm); ++ __ leave(); ++ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); ++ assert(deopt_blob != NULL, "deoptimization blob must have been created"); ++ ++ __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type); ++ } ++ break; ++ ++ case dtrace_object_alloc_id: ++ { ++ // A0: object ++ StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments); ++ save_live_registers(sasm); ++ ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), A0); ++ ++ restore_live_registers(sasm); ++ } ++ break; ++ ++ default: ++ { ++ StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return); ++ __ li(A0, (int)id); ++ __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), A0); ++ } ++ break; ++ } ++ } ++ return oop_maps; ++} ++ ++#undef __ ++ ++const char *Runtime1::pd_name_for_address(address entry) { ++ Unimplemented(); ++ return 0; ++} +diff --git a/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp +new file mode 100644 +index 00000000000..ce84af28c9b +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp +@@ -0,0 +1,65 @@ ++/* ++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the client compiler. ++// (see c1_globals.hpp) ++ ++#ifndef COMPILER2 ++define_pd_global(bool, BackgroundCompilation, true ); ++define_pd_global(bool, InlineIntrinsics, true ); ++define_pd_global(bool, PreferInterpreterNativeStubs, false); ++define_pd_global(bool, ProfileTraps, false); ++define_pd_global(bool, UseOnStackReplacement, true ); ++define_pd_global(bool, TieredCompilation, false); ++define_pd_global(intx, CompileThreshold, 1500 ); ++ ++define_pd_global(intx, OnStackReplacePercentage, 933 ); ++define_pd_global(intx, NewSizeThreadIncrease, 4*K ); ++define_pd_global(intx, InitialCodeCacheSize, 160*K); ++define_pd_global(intx, ReservedCodeCacheSize, 32*M ); ++define_pd_global(intx, NonProfiledCodeHeapSize, 13*M ); ++define_pd_global(intx, ProfiledCodeHeapSize, 14*M ); ++define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); ++define_pd_global(bool, ProfileInterpreter, false); ++define_pd_global(intx, CodeCacheExpansionSize, 32*K ); ++define_pd_global(uintx, CodeCacheMinBlockLength, 1); ++define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++define_pd_global(bool, NeverActAsServerClassMachine, true ); ++define_pd_global(uint64_t,MaxRAM, 1ULL*G); ++define_pd_global(bool, CICompileOSR, true ); ++#endif // !COMPILER2 ++define_pd_global(bool, UseTypeProfile, false); ++ ++define_pd_global(bool, OptimizeSinglePrecision, true ); ++define_pd_global(bool, CSEArrayLength, false); ++define_pd_global(bool, TwoOperandLIRForm, false ); ++ ++#endif // CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.cpp +new file mode 100644 +index 00000000000..ef520a39ff3 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.cpp +@@ -0,0 +1,1872 @@ ++/* ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "opto/c2_MacroAssembler.hpp" ++#include "opto/intrinsicnode.hpp" ++#include "opto/subnode.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "runtime/stubRoutines.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++// Fast_Lock and Fast_Unlock used by C2 ++ ++// Because the transitions from emitted code to the runtime ++// monitorenter/exit helper stubs are so slow it's critical that ++// we inline both the stack-locking fast-path and the inflated fast path. ++// ++// See also: cmpFastLock and cmpFastUnlock. ++// ++// What follows is a specialized inline transliteration of the code ++// in slow_enter() and slow_exit(). If we're concerned about I$ bloat ++// another option would be to emit TrySlowEnter and TrySlowExit methods ++// at startup-time. These methods would accept arguments as ++// (Obj, Self, box, Scratch) and return success-failure ++// indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply ++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. ++// In practice, however, the # of lock sites is bounded and is usually small. ++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer ++// if the processor uses simple bimodal branch predictors keyed by EIP ++// Since the helper routines would be called from multiple synchronization ++// sites. ++// ++// An even better approach would be write "MonitorEnter()" and "MonitorExit()" ++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites ++// to those specialized methods. That'd give us a mostly platform-independent ++// implementation that the JITs could optimize and inline at their pleasure. ++// Done correctly, the only time we'd need to cross to native could would be ++// to park() or unpark() threads. We'd also need a few more unsafe operators ++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and ++// (b) explicit barriers or fence operations. ++// ++// TODO: ++// ++// * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). ++// This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. ++// Given TLAB allocation, Self is usually manifested in a register, so passing it into ++// the lock operators would typically be faster than reifying Self. ++// ++// * Ideally I'd define the primitives as: ++// fast_lock (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED. ++// fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED ++// Unfortunately ADLC bugs prevent us from expressing the ideal form. ++// Instead, we're stuck with a rather awkward and brittle register assignments below. ++// Furthermore the register assignments are overconstrained, possibly resulting in ++// sub-optimal code near the synchronization site. ++// ++// * Eliminate the sp-proximity tests and just use "== Self" tests instead. ++// Alternately, use a better sp-proximity test. ++// ++// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. ++// Either one is sufficient to uniquely identify a thread. ++// TODO: eliminate use of sp in _owner and use get_thread(tr) instead. ++// ++// * Intrinsify notify() and notifyAll() for the common cases where the ++// object is locked by the calling thread but the waitlist is empty. ++// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). ++// ++// * use jccb and jmpb instead of jcc and jmp to improve code density. ++// But beware of excessive branch density on AMD Opterons. ++// ++// * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success ++// or failure of the fast-path. If the fast-path fails then we pass ++// control to the slow-path, typically in C. In Fast_Lock and ++// Fast_Unlock we often branch to DONE_LABEL, just to find that C2 ++// will emit a conditional branch immediately after the node. ++// So we have branches to branches and lots of ICC.ZF games. ++// Instead, it might be better to have C2 pass a "FailureLabel" ++// into Fast_Lock and Fast_Unlock. In the case of success, control ++// will drop through the node. ICC.ZF is undefined at exit. ++// In the case of failure, the node will branch directly to the ++// FailureLabel ++ ++// obj: object to lock ++// box: on-stack box address (displaced header location) ++// tmp: tmp -- KILLED ++// scr: tmp -- KILLED ++void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg, ++ Register tmpReg, Register scrReg) { ++ Label IsInflated, DONE, DONE_SET; ++ ++ // Ensure the register assignents are disjoint ++ guarantee(objReg != boxReg, ""); ++ guarantee(objReg != tmpReg, ""); ++ guarantee(objReg != scrReg, ""); ++ guarantee(boxReg != tmpReg, ""); ++ guarantee(boxReg != scrReg, ""); ++ ++ block_comment("FastLock"); ++ ++ if (PrintBiasedLockingStatistics) { ++ atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg); ++ } ++ ++ // Possible cases that we'll encounter in fast_lock ++ // ------------------------------------------------ ++ // * Inflated ++ // -- unlocked ++ // -- Locked ++ // = by self ++ // = by other ++ // * biased ++ // -- by Self ++ // -- by other ++ // * neutral ++ // * stack-locked ++ // -- by self ++ // = sp-proximity test hits ++ // = sp-proximity test generates false-negative ++ // -- by other ++ // ++ ++ if (DiagnoseSyncOnValueBasedClasses != 0) { ++ load_klass(tmpReg, objReg); ++ ld_w(tmpReg, Address(tmpReg, Klass::access_flags_offset())); ++ li(AT, JVM_ACC_IS_VALUE_BASED_CLASS); ++ andr(AT, tmpReg, AT); ++ sltui(scrReg, AT, 1); ++ beqz(scrReg, DONE_SET); ++ } ++ ++ // TODO: optimize away redundant LDs of obj->mark and improve the markword triage ++ // order to reduce the number of conditional branches in the most common cases. ++ // Beware -- there's a subtle invariant that fetch of the markword ++ // at [FETCH], below, will never observe a biased encoding (*101b). ++ // If this invariant is not held we risk exclusion (safety) failure. ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ Label succ, fail; ++ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL); ++ b(fail); ++ bind(succ); ++ li(resReg, 1); ++ b(DONE); ++ bind(fail); ++ } ++ ++ ld_d(tmpReg, Address(objReg, 0)); //Fetch the markword of the object. ++ andi(AT, tmpReg, markWord::monitor_value); ++ bnez(AT, IsInflated); // inflated vs stack-locked|neutral|bias ++ ++ // Attempt stack-locking ... ++ ori(tmpReg, tmpReg, markWord::unlocked_value); ++ st_d(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS ++ ++ if (PrintBiasedLockingStatistics) { ++ Label SUCC, FAIL; ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg ++ bind(SUCC); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); ++ li(resReg, 1); ++ b(DONE); ++ bind(FAIL); ++ } else { ++ // If cmpxchg is succ, then scrReg = 1 ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg ++ } ++ ++ // Recursive locking ++ // The object is stack-locked: markword contains stack pointer to BasicLock. ++ // Locked by current thread if difference with current SP is less than one page. ++ sub_d(tmpReg, tmpReg, SP); ++ li(AT, 7 - os::vm_page_size()); ++ andr(tmpReg, tmpReg, AT); ++ st_d(tmpReg, Address(boxReg, 0)); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++ ++ bnez(tmpReg, L); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); ++ bind(L); ++ } ++ ++ sltui(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0 ++ b(DONE); ++ ++ bind(IsInflated); ++ // The object's monitor m is unlocked iff m->owner == NULL, ++ // otherwise m->owner may contain a thread or a stack address. ++ ++ // TODO: someday avoid the ST-before-CAS penalty by ++ // relocating (deferring) the following ST. ++ // We should also think about trying a CAS without having ++ // fetched _owner. If the CAS is successful we may ++ // avoid an RTO->RTS upgrade on the $line. ++ // Without cast to int32_t a movptr will destroy r10 which is typically obj ++ li(AT, (int32_t)intptr_t(markWord::unused_mark().value())); ++ st_d(AT, Address(boxReg, 0)); ++ ++ ld_d(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ // if (m->owner != 0) => AT = 0, goto slow path. ++ move(scrReg, R0); ++ bnez(AT, DONE_SET); ++ ++#ifndef OPT_THREAD ++ get_thread(TREG); ++#endif ++ // It's inflated and appears unlocked ++ addi_d(tmpReg, tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2); ++ cmpxchg(Address(tmpReg, 0), R0, TREG, scrReg, false, false); ++ // Intentional fall-through into DONE ... ++ ++ bind(DONE_SET); ++ move(resReg, scrReg); ++ ++ // DONE is a hot target - we'd really like to place it at the ++ // start of cache line by padding with NOPs. ++ // See the AMD and Intel software optimization manuals for the ++ // most efficient "long" NOP encodings. ++ // Unfortunately none of our alignment mechanisms suffice. ++ bind(DONE); ++ // At DONE the resReg is set as follows ... ++ // Fast_Unlock uses the same protocol. ++ // resReg == 1 -> Success ++ // resREg == 0 -> Failure - force control through the slow-path ++} ++ ++// obj: object to unlock ++// box: box address (displaced header location), killed. ++// tmp: killed tmp; cannot be obj nor box. ++// ++// Some commentary on balanced locking: ++// ++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. ++// Methods that don't have provably balanced locking are forced to run in the ++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock. ++// The interpreter provides two properties: ++// I1: At return-time the interpreter automatically and quietly unlocks any ++// objects acquired the current activation (frame). Recall that the ++// interpreter maintains an on-stack list of locks currently held by ++// a frame. ++// I2: If a method attempts to unlock an object that is not held by the ++// the frame the interpreter throws IMSX. ++// ++// Lets say A(), which has provably balanced locking, acquires O and then calls B(). ++// B() doesn't have provably balanced locking so it runs in the interpreter. ++// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O ++// is still locked by A(). ++// ++// The only other source of unbalanced locking would be JNI. The "Java Native Interface: ++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter ++// should not be unlocked by "normal" java-level locking and vice-versa. The specification ++// doesn't specify what will occur if a program engages in such mixed-mode locking, however. ++ ++void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg, ++ Register tmpReg, Register scrReg) { ++ Label DONE, DONE_SET, Stacked, Inflated; ++ ++ guarantee(objReg != boxReg, ""); ++ guarantee(objReg != tmpReg, ""); ++ guarantee(objReg != scrReg, ""); ++ guarantee(boxReg != tmpReg, ""); ++ guarantee(boxReg != scrReg, ""); ++ ++ block_comment("FastUnlock"); ++ ++ // Critically, the biased locking test must have precedence over ++ // and appear before the (box->dhw == 0) recursive stack-lock test. ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ Label succ, fail; ++ biased_locking_exit(objReg, tmpReg, succ); ++ b(fail); ++ bind(succ); ++ li(resReg, 1); ++ b(DONE); ++ bind(fail); ++ } ++ ++ ld_d(tmpReg, Address(boxReg, 0)); // Examine the displaced header ++ sltui(AT, tmpReg, 1); ++ beqz(tmpReg, DONE_SET); // 0 indicates recursive stack-lock ++ ++ ld_d(tmpReg, Address(objReg, 0)); // Examine the object's markword ++ andi(AT, tmpReg, markWord::monitor_value); ++ beqz(AT, Stacked); // Inflated? ++ ++ bind(Inflated); ++ // It's inflated. ++ // Despite our balanced locking property we still check that m->_owner == Self ++ // as java routines or native JNI code called by this thread might ++ // have released the lock. ++ // Refer to the comments in synchronizer.cpp for how we might encode extra ++ // state in _succ so we can avoid fetching EntryList|cxq. ++ // ++ // I'd like to add more cases in fast_lock() and fast_unlock() -- ++ // such as recursive enter and exit -- but we have to be wary of ++ // I$ bloat, T$ effects and BP$ effects. ++ // ++ // If there's no contention try a 1-0 exit. That is, exit without ++ // a costly MEMBAR or CAS. See synchronizer.cpp for details on how ++ // we detect and recover from the race that the 1-0 exit admits. ++ // ++ // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier ++ // before it STs null into _owner, releasing the lock. Updates ++ // to data protected by the critical section must be visible before ++ // we drop the lock (and thus before any other thread could acquire ++ // the lock and observe the fields protected by the lock). ++#ifndef OPT_THREAD ++ get_thread(TREG); ++#endif ++ ++ // It's inflated ++ ld_d(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ xorr(scrReg, scrReg, TREG); ++ ++ ld_d(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2)); ++ orr(scrReg, scrReg, AT); ++ ++ move(AT, R0); ++ bnez(scrReg, DONE_SET); ++ ++ ld_d(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2)); ++ ld_d(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2)); ++ orr(scrReg, scrReg, AT); ++ ++ move(AT, R0); ++ bnez(scrReg, DONE_SET); ++ ++ membar(Assembler::Membar_mask_bits(LoadStore|StoreStore)); ++ st_d(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ li(resReg, 1); ++ b(DONE); ++ ++ bind(Stacked); ++ ld_d(tmpReg, Address(boxReg, 0)); ++ cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); ++ ++ bind(DONE_SET); ++ move(resReg, AT); ++ ++ bind(DONE); ++} ++ ++void C2_MacroAssembler::beq_long(Register rs, Register rt, Label& L) { ++ Label not_taken; ++ ++ bne(rs, rt, not_taken); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void C2_MacroAssembler::bne_long(Register rs, Register rt, Label& L) { ++ Label not_taken; ++ ++ beq(rs, rt, not_taken); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void C2_MacroAssembler::blt_long(Register rs, Register rt, Label& L, bool is_signed) { ++ Label not_taken; ++ if (is_signed) { ++ bge(rs, rt, not_taken); ++ } else { ++ bgeu(rs, rt, not_taken); ++ } ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void C2_MacroAssembler::bge_long(Register rs, Register rt, Label& L, bool is_signed) { ++ Label not_taken; ++ if (is_signed) { ++ blt(rs, rt, not_taken); ++ } else { ++ bltu(rs, rt, not_taken); ++ } ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void C2_MacroAssembler::bc1t_long(Label& L) { ++ Label not_taken; ++ ++ bceqz(FCC0, not_taken); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void C2_MacroAssembler::bc1f_long(Label& L) { ++ Label not_taken; ++ ++ bcnez(FCC0, not_taken); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++typedef void (MacroAssembler::* load_chr_insn)(Register rd, Address adr); ++ ++void C2_MacroAssembler::string_indexof(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register result, int ae) ++{ ++ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); ++ ++ Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; ++ ++ bool isLL = ae == StrIntrinsicNode::LL; ++ ++ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; ++ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; ++ ++ int needle_chr_size = needle_isL ? 1 : 2; ++ int haystack_chr_size = haystack_isL ? 1 : 2; ++ ++ Address::ScaleFactor needle_chr_shift = needle_isL ? Address::no_scale ++ : Address::times_2; ++ Address::ScaleFactor haystack_chr_shift = haystack_isL ? Address::no_scale ++ : Address::times_2; ++ ++ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::ld_bu ++ : (load_chr_insn)&MacroAssembler::ld_hu; ++ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::ld_bu ++ : (load_chr_insn)&MacroAssembler::ld_hu; ++ ++ // Note, inline_string_indexOf() generates checks: ++ // if (pattern.count > src.count) return -1; ++ // if (pattern.count == 0) return 0; ++ ++ // We have two strings, a source string in haystack, haystack_len and a pattern string ++ // in needle, needle_len. Find the first occurrence of pattern in source or return -1. ++ ++ // For larger pattern and source we use a simplified Boyer Moore algorithm. ++ // With a small pattern and source we use linear scan. ++ ++ // needle_len >= 8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm. ++ ++ // needle_len < 8, use linear scan ++ li(AT, 8); ++ blt(needle_len, AT, LINEARSEARCH); ++ ++ // needle_len >= 256, use linear scan ++ li(AT, 256); ++ bge(needle_len, AT, LINEARSTUB); ++ ++ // needle_len >= haystack_len/4, use linear scan ++ srli_d(AT, haystack_len, 2); ++ bge(needle_len, AT, LINEARSTUB); ++ ++ // Boyer-Moore-Horspool introduction: ++ // The Boyer Moore alogorithm is based on the description here:- ++ // ++ // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm ++ // ++ // This describes and algorithm with 2 shift rules. The 'Bad Character' rule ++ // and the 'Good Suffix' rule. ++ // ++ // These rules are essentially heuristics for how far we can shift the ++ // pattern along the search string. ++ // ++ // The implementation here uses the 'Bad Character' rule only because of the ++ // complexity of initialisation for the 'Good Suffix' rule. ++ // ++ // This is also known as the Boyer-Moore-Horspool algorithm: ++ // ++ // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm ++ // ++ // #define ASIZE 256 ++ // ++ // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { ++ // int i, j; ++ // unsigned c; ++ // unsigned char bc[ASIZE]; ++ // ++ // /* Preprocessing */ ++ // for (i = 0; i < ASIZE; ++i) ++ // bc[i] = m; ++ // for (i = 0; i < m - 1; ) { ++ // c = pattern[i]; ++ // ++i; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef PATTERN_STRING_IS_LATIN1 ++ // bc[c] = m - i; ++ // #else ++ // if (c < ASIZE) bc[c] = m - i; ++ // #endif ++ // } ++ // ++ // /* Searching */ ++ // j = 0; ++ // while (j <= n - m) { ++ // c = src[i+j]; ++ // if (pattern[m-1] == c) ++ // int k; ++ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); ++ // if (k < 0) return j; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 ++ // // LL case: (c< 256) always true. Remove branch ++ // j += bc[pattern[j+m-1]]; ++ // #endif ++ // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF ++ // // UU case: need if (c if not. ++ // if (c < ASIZE) ++ // j += bc[pattern[j+m-1]]; ++ // else ++ // j += m ++ // #endif ++ // } ++ // return -1; ++ // } ++ ++ Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, ++ BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD; ++ ++ Register haystack_end = haystack_len; ++ Register result_tmp = result; ++ ++ Register nlen_tmp = T0; // needle len tmp ++ Register skipch = T1; ++ Register last_byte = T2; ++ Register last_dword = T3; ++ Register orig_haystack = T4; ++ Register ch1 = T5; ++ Register ch2 = T6; ++ ++ RegSet spilled_regs = RegSet::range(T0, T6); ++ ++ push(spilled_regs); ++ ++ // pattern length is >=8, so, we can read at least 1 register for cases when ++ // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for ++ // UL case. We'll re-read last character in inner pre-loop code to have ++ // single outer pre-loop load ++ const int first_step = isLL ? 7 : 3; ++ ++ const int ASIZE = 256; ++ ++ addi_d(SP, SP, -ASIZE); ++ ++ // init BC offset table with default value: needle_len ++ // ++ // for (i = 0; i < ASIZE; ++i) ++ // bc[i] = m; ++ if (UseLASX) { ++ xvreplgr2vr_b(fscratch, needle_len); ++ ++ for (int i = 0; i < ASIZE; i += 32) { ++ xvst(fscratch, SP, i); ++ } ++ } else if (UseLSX) { ++ vreplgr2vr_b(fscratch, needle_len); ++ ++ for (int i = 0; i < ASIZE; i += 16) { ++ vst(fscratch, SP, i); ++ } ++ } else { ++ move(AT, needle_len); ++ bstrins_d(AT, AT, 15, 8); ++ bstrins_d(AT, AT, 31, 16); ++ bstrins_d(AT, AT, 63, 32); ++ ++ for (int i = 0; i < ASIZE; i += 8) { ++ st_d(AT, SP, i); ++ } ++ } ++ ++ sub_d(nlen_tmp, haystack_len, needle_len); ++ lea(haystack_end, Address(haystack, nlen_tmp, haystack_chr_shift, 0)); ++ addi_d(ch2, needle_len, -1); // bc offset init value ++ move(nlen_tmp, needle); ++ ++ // for (i = 0; i < m - 1; ) { ++ // c = pattern[i]; ++ // ++i; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef PATTERN_STRING_IS_LATIN1 ++ // bc[c] = m - i; ++ // #else ++ // if (c < ASIZE) bc[c] = m - i; ++ // #endif ++ // } ++ bind(BCLOOP); ++ (this->*needle_load_1chr)(ch1, Address(nlen_tmp)); ++ addi_d(nlen_tmp, nlen_tmp, needle_chr_size); ++ if (!needle_isL) { ++ // ae == StrIntrinsicNode::UU ++ li(AT, 256u); ++ bgeu(ch1, AT, BCSKIP); // GE for UTF ++ } ++ stx_b(ch2, SP, ch1); // store skip offset to BC offset table ++ ++ bind(BCSKIP); ++ addi_d(ch2, ch2, -1); // for next pattern element, skip distance -1 ++ blt(R0, ch2, BCLOOP); ++ ++ if (needle_isL == haystack_isL) { ++ // load last 8 pattern bytes (8LL/4UU symbols) ++ ld_d(last_dword, Address(needle, needle_len, needle_chr_shift, -wordSize)); ++ addi_d(nlen_tmp, needle_len, -1); // m - 1, index of the last element in pattern ++ move(orig_haystack, haystack); ++ bstrpick_d(last_byte, last_dword, 63, 64 - 8 * needle_chr_size); // UU/LL: pattern[m-1] ++ } else { ++ // UL: from UTF-16(source) search Latin1(pattern) ++ // load last 4 bytes(4 symbols) ++ ld_wu(last_byte, Address(needle, needle_len, Address::no_scale, -wordSize / 2)); ++ addi_d(nlen_tmp, needle_len, -1); // m - 1, index of the last element in pattern ++ move(orig_haystack, haystack); ++ // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d ++ bstrpick_d(last_dword, last_byte, 7, 0); ++ srli_d(last_byte, last_byte, 8); ++ bstrins_d(last_dword, last_byte, 23, 16); ++ srli_d(last_byte, last_byte, 8); ++ bstrins_d(last_dword, last_byte, 39, 32); ++ srli_d(last_byte, last_byte, 8); // last_byte: 0x0000000a ++ bstrins_d(last_dword, last_byte, 55, 48); // last_dword: 0x0a0b0c0d ++ } ++ ++ // i = m - 1; ++ // skipch = j + i; ++ // if (skipch == pattern[m - 1] ++ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); ++ // else ++ // move j with bad char offset table ++ bind(BMLOOPSTR2); ++ // compare pattern to source string backward ++ (this->*haystack_load_1chr)(skipch, Address(haystack, nlen_tmp, haystack_chr_shift, 0)); ++ addi_d(nlen_tmp, nlen_tmp, -first_step); // nlen_tmp is positive here, because needle_len >= 8 ++ bne(last_byte, skipch, BMSKIP); // if not equal, skipch is bad char ++ ld_d(ch2, Address(haystack, nlen_tmp, haystack_chr_shift, 0)); // load 8 bytes from source string ++ move(ch1, last_dword); ++ if (isLL) { ++ b(BMLOOPSTR1_AFTER_LOAD); ++ } else { ++ addi_d(nlen_tmp, nlen_tmp, -1); // no need to branch for UU/UL case. cnt1 >= 8 ++ b(BMLOOPSTR1_CMP); ++ } ++ ++ bind(BMLOOPSTR1); ++ (this->*needle_load_1chr)(ch1, Address(needle, nlen_tmp, needle_chr_shift, 0)); ++ (this->*haystack_load_1chr)(ch2, Address(haystack, nlen_tmp, haystack_chr_shift, 0)); ++ ++ bind(BMLOOPSTR1_AFTER_LOAD); ++ addi_d(nlen_tmp, nlen_tmp, -1); ++ blt(nlen_tmp, R0, BMLOOPSTR1_LASTCMP); ++ ++ bind(BMLOOPSTR1_CMP); ++ beq(ch1, ch2, BMLOOPSTR1); ++ ++ bind(BMSKIP); ++ if (!isLL) { ++ // if we've met UTF symbol while searching Latin1 pattern, then we can ++ // skip needle_len symbols ++ if (needle_isL != haystack_isL) { ++ move(result_tmp, needle_len); ++ } else { ++ li(result_tmp, 1); ++ } ++ li(AT, 256u); ++ bgeu(skipch, AT, BMADV); // GE for UTF ++ } ++ ldx_bu(result_tmp, SP, skipch); // load skip offset ++ ++ bind(BMADV); ++ addi_d(nlen_tmp, needle_len, -1); ++ // move haystack after bad char skip offset ++ lea(haystack, Address(haystack, result_tmp, haystack_chr_shift, 0)); ++ bge(haystack_end, haystack, BMLOOPSTR2); ++ addi_d(SP, SP, ASIZE); ++ b(NOMATCH); ++ ++ bind(BMLOOPSTR1_LASTCMP); ++ bne(ch1, ch2, BMSKIP); ++ ++ bind(BMMATCH); ++ sub_d(result, haystack, orig_haystack); ++ if (!haystack_isL) { ++ srli_d(result, result, 1); ++ } ++ addi_d(SP, SP, ASIZE); ++ pop(spilled_regs); ++ b(DONE); ++ ++ bind(LINEARSTUB); ++ li(AT, 16); // small patterns still should be handled by simple algorithm ++ blt(needle_len, AT, LINEARSEARCH); ++ move(result, R0); ++ address stub; ++ if (isLL) { ++ stub = StubRoutines::la::string_indexof_linear_ll(); ++ assert(stub != NULL, "string_indexof_linear_ll stub has not been generated"); ++ } else if (needle_isL) { ++ stub = StubRoutines::la::string_indexof_linear_ul(); ++ assert(stub != NULL, "string_indexof_linear_ul stub has not been generated"); ++ } else { ++ stub = StubRoutines::la::string_indexof_linear_uu(); ++ assert(stub != NULL, "string_indexof_linear_uu stub has not been generated"); ++ } ++ trampoline_call(RuntimeAddress(stub)); ++ b(DONE); ++ ++ bind(NOMATCH); ++ li(result, -1); ++ pop(spilled_regs); ++ b(DONE); ++ ++ bind(LINEARSEARCH); ++ string_indexof_linearscan(haystack, needle, haystack_len, needle_len, -1, result, ae); ++ ++ bind(DONE); ++} ++ ++void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ int needle_con_cnt, Register result, int ae) ++{ ++ // Note: ++ // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant ++ // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1 ++ assert(needle_con_cnt <= 4, "Invalid needle constant count"); ++ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); ++ ++ Register hlen_neg = haystack_len; ++ Register nlen_neg = needle_len; ++ Register result_tmp = result; ++ ++ Register nlen_tmp = A0, hlen_tmp = A1; ++ Register first = A2, ch1 = A3, ch2 = AT; ++ ++ RegSet spilled_regs = RegSet::range(A0, A3); ++ ++ push(spilled_regs); ++ ++ bool isLL = ae == StrIntrinsicNode::LL; ++ ++ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; ++ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; ++ int needle_chr_shift = needle_isL ? 0 : 1; ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ int needle_chr_size = needle_isL ? 1 : 2; ++ int haystack_chr_size = haystack_isL ? 1 : 2; ++ ++ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::ld_bu ++ : (load_chr_insn)&MacroAssembler::ld_hu; ++ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::ld_bu ++ : (load_chr_insn)&MacroAssembler::ld_hu; ++ load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::ld_hu ++ : (load_chr_insn)&MacroAssembler::ld_wu; ++ load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::ld_wu ++ : (load_chr_insn)&MacroAssembler::ld_d; ++ ++ Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; ++ ++ if (needle_con_cnt == -1) { ++ Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; ++ ++ li(AT, needle_isL == haystack_isL ? 4 : 2); // UU/LL:4, UL:2 ++ blt(needle_len, AT, DOSHORT); ++ ++ sub_d(result_tmp, haystack_len, needle_len); ++ ++ (this->*needle_load_1chr)(first, Address(needle)); ++ if (!haystack_isL) slli_d(result_tmp, result_tmp, haystack_chr_shift); ++ add_d(haystack, haystack, result_tmp); ++ sub_d(hlen_neg, R0, result_tmp); ++ if (!needle_isL) slli_d(needle_len, needle_len, needle_chr_shift); ++ add_d(needle, needle, needle_len); ++ sub_d(nlen_neg, R0, needle_len); ++ ++ bind(FIRST_LOOP); ++ (this->*haystack_load_1chr)(ch2, Address(haystack, hlen_neg, Address::no_scale, 0)); ++ beq(first, ch2, STR1_LOOP); ++ ++ bind(STR2_NEXT); ++ addi_d(hlen_neg, hlen_neg, haystack_chr_size); ++ bge(R0, hlen_neg, FIRST_LOOP); ++ b(NOMATCH); ++ ++ bind(STR1_LOOP); ++ addi_d(nlen_tmp, nlen_neg, needle_chr_size); ++ addi_d(hlen_tmp, hlen_neg, haystack_chr_size); ++ bge(nlen_tmp, R0, MATCH); ++ ++ bind(STR1_NEXT); ++ (this->*needle_load_1chr)(ch1, Address(needle, nlen_tmp, Address::no_scale, 0)); ++ (this->*haystack_load_1chr)(ch2, Address(haystack, hlen_tmp, Address::no_scale, 0)); ++ bne(ch1, ch2, STR2_NEXT); ++ addi_d(nlen_tmp, nlen_tmp, needle_chr_size); ++ addi_d(hlen_tmp, hlen_tmp, haystack_chr_size); ++ blt(nlen_tmp, R0, STR1_NEXT); ++ b(MATCH); ++ ++ bind(DOSHORT); ++ if (needle_isL == haystack_isL) { ++ li(AT, 2); ++ blt(needle_len, AT, DO1); // needle_len == 1 ++ blt(AT, needle_len, DO3); // needle_len == 3 ++ // if needle_len == 2 then goto DO2 ++ } ++ } ++ ++ if (needle_con_cnt == 4) { ++ Label CH1_LOOP; ++ (this->*load_4chr)(ch1, Address(needle)); ++ addi_d(result_tmp, haystack_len, -4); ++ if (!haystack_isL) slli_d(result_tmp, result_tmp, haystack_chr_shift); ++ add_d(haystack, haystack, result_tmp); ++ sub_d(hlen_neg, R0, result_tmp); ++ ++ bind(CH1_LOOP); ++ (this->*load_4chr)(ch2, Address(haystack, hlen_neg, Address::no_scale, 0)); ++ beq(ch1, ch2, MATCH); ++ addi_d(hlen_neg, hlen_neg, haystack_chr_size); ++ bge(R0, hlen_neg, CH1_LOOP); ++ b(NOMATCH); ++ } ++ ++ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { ++ Label CH1_LOOP; ++ bind(DO2); ++ (this->*load_2chr)(ch1, Address(needle)); ++ addi_d(result_tmp, haystack_len, -2); ++ if (!haystack_isL) slli_d(result_tmp, result_tmp, haystack_chr_shift); ++ add_d(haystack, haystack, result_tmp); ++ sub_d(hlen_neg, R0, result_tmp); ++ ++ bind(CH1_LOOP); ++ (this->*load_2chr)(ch2, Address(haystack, hlen_neg, Address::no_scale, 0)); ++ beq(ch1, ch2, MATCH); ++ addi_d(hlen_neg, hlen_neg, haystack_chr_size); ++ bge(R0, hlen_neg, CH1_LOOP); ++ b(NOMATCH); ++ } ++ ++ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { ++ Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; ++ ++ bind(DO3); ++ (this->*load_2chr)(first, Address(needle)); ++ (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size)); ++ addi_d(result_tmp, haystack_len, -3); ++ if (!haystack_isL) slli_d(result_tmp, result_tmp, haystack_chr_shift); ++ add_d(haystack, haystack, result_tmp); ++ sub_d(hlen_neg, R0, result_tmp); ++ ++ bind(FIRST_LOOP); ++ (this->*load_2chr)(ch2, Address(haystack, hlen_neg, Address::no_scale, 0)); ++ beq(first, ch2, STR1_LOOP); ++ ++ bind(STR2_NEXT); ++ addi_d(hlen_neg, hlen_neg, haystack_chr_size); ++ bge(R0, hlen_neg, FIRST_LOOP); ++ b(NOMATCH); ++ ++ bind(STR1_LOOP); ++ (this->*haystack_load_1chr)(ch2, Address(haystack, hlen_neg, Address::no_scale, 2 * haystack_chr_size)); ++ bne(ch1, ch2, STR2_NEXT); ++ b(MATCH); ++ } ++ ++ if (needle_con_cnt == -1 || needle_con_cnt == 1) { ++ Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP; ++ Register mask01 = nlen_tmp; ++ Register mask7f = hlen_tmp; ++ Register masked = first; ++ ++ bind(DO1); ++ (this->*needle_load_1chr)(ch1, Address(needle)); ++ li(AT, 8); ++ blt(haystack_len, AT, DO1_SHORT); ++ ++ addi_d(result_tmp, haystack_len, -8 / haystack_chr_size); ++ if (!haystack_isL) slli_d(result_tmp, result_tmp, haystack_chr_shift); ++ add_d(haystack, haystack, result_tmp); ++ sub_d(hlen_neg, R0, result_tmp); ++ ++ if (haystack_isL) bstrins_d(ch1, ch1, 15, 8); ++ bstrins_d(ch1, ch1, 31, 16); ++ bstrins_d(ch1, ch1, 63, 32); ++ ++ li(mask01, haystack_isL ? 0x0101010101010101 : 0x0001000100010001); ++ li(mask7f, haystack_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); ++ ++ bind(CH1_LOOP); ++ ldx_d(ch2, haystack, hlen_neg); ++ xorr(ch2, ch1, ch2); ++ sub_d(masked, ch2, mask01); ++ orr(ch2, ch2, mask7f); ++ andn(masked, masked, ch2); ++ bnez(masked, HAS_ZERO); ++ addi_d(hlen_neg, hlen_neg, 8); ++ blt(hlen_neg, R0, CH1_LOOP); ++ ++ li(AT, 8); ++ bge(hlen_neg, AT, NOMATCH); ++ move(hlen_neg, R0); ++ b(CH1_LOOP); ++ ++ bind(HAS_ZERO); ++ ctz_d(masked, masked); ++ srli_d(masked, masked, 3); ++ add_d(hlen_neg, hlen_neg, masked); ++ b(MATCH); ++ ++ bind(DO1_SHORT); ++ addi_d(result_tmp, haystack_len, -1); ++ if (!haystack_isL) slli_d(result_tmp, result_tmp, haystack_chr_shift); ++ add_d(haystack, haystack, result_tmp); ++ sub_d(hlen_neg, R0, result_tmp); ++ ++ bind(DO1_LOOP); ++ (this->*haystack_load_1chr)(ch2, Address(haystack, hlen_neg, Address::no_scale, 0)); ++ beq(ch1, ch2, MATCH); ++ addi_d(hlen_neg, hlen_neg, haystack_chr_size); ++ bge(R0, hlen_neg, DO1_LOOP); ++ } ++ ++ bind(NOMATCH); ++ li(result, -1); ++ b(DONE); ++ ++ bind(MATCH); ++ add_d(result, result_tmp, hlen_neg); ++ if (!haystack_isL) srai_d(result, result, haystack_chr_shift); ++ ++ bind(DONE); ++ pop(spilled_regs); ++} ++ ++void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3) ++{ ++ Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, NOMATCH, DONE; ++ ++ beqz(cnt1, NOMATCH); ++ ++ move(result, R0); ++ ori(tmp1, R0, 4); ++ blt(cnt1, tmp1, DO1_LOOP); ++ ++ // UTF-16 char occupies 16 bits ++ // ch -> chchchch ++ bstrins_d(ch, ch, 31, 16); ++ bstrins_d(ch, ch, 63, 32); ++ ++ li(tmp2, 0x0001000100010001); ++ li(tmp3, 0x7fff7fff7fff7fff); ++ ++ bind(CH1_LOOP); ++ ld_d(AT, str1, 0); ++ xorr(AT, ch, AT); ++ sub_d(tmp1, AT, tmp2); ++ orr(AT, AT, tmp3); ++ andn(tmp1, tmp1, AT); ++ bnez(tmp1, HAS_ZERO); ++ addi_d(str1, str1, 8); ++ addi_d(result, result, 4); ++ ++ // meet the end of string ++ beq(cnt1, result, NOMATCH); ++ ++ addi_d(tmp1, result, 4); ++ bge(tmp1, cnt1, DO1_SHORT); ++ b(CH1_LOOP); ++ ++ bind(HAS_ZERO); ++ ctz_d(tmp1, tmp1); ++ srli_d(tmp1, tmp1, 4); ++ add_d(result, result, tmp1); ++ b(DONE); ++ ++ // restore ch ++ bind(DO1_SHORT); ++ bstrpick_d(ch, ch, 15, 0); ++ ++ bind(DO1_LOOP); ++ ld_hu(tmp1, str1, 0); ++ beq(ch, tmp1, DONE); ++ addi_d(str1, str1, 2); ++ addi_d(result, result, 1); ++ blt(result, cnt1, DO1_LOOP); ++ ++ bind(NOMATCH); ++ addi_d(result, R0, -1); ++ ++ bind(DONE); ++} ++ ++void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3) ++{ ++ Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, NOMATCH, DONE; ++ ++ beqz(cnt1, NOMATCH); ++ ++ move(result, R0); ++ ori(tmp1, R0, 8); ++ blt(cnt1, tmp1, DO1_LOOP); ++ ++ // Latin-1 char occupies 8 bits ++ // ch -> chchchchchchchch ++ bstrins_d(ch, ch, 15, 8); ++ bstrins_d(ch, ch, 31, 16); ++ bstrins_d(ch, ch, 63, 32); ++ ++ li(tmp2, 0x0101010101010101); ++ li(tmp3, 0x7f7f7f7f7f7f7f7f); ++ ++ bind(CH1_LOOP); ++ ld_d(AT, str1, 0); ++ xorr(AT, ch, AT); ++ sub_d(tmp1, AT, tmp2); ++ orr(AT, AT, tmp3); ++ andn(tmp1, tmp1, AT); ++ bnez(tmp1, HAS_ZERO); ++ addi_d(str1, str1, 8); ++ addi_d(result, result, 8); ++ ++ // meet the end of string ++ beq(cnt1, result, NOMATCH); ++ ++ addi_d(tmp1, result, 8); ++ bge(tmp1, cnt1, DO1_SHORT); ++ b(CH1_LOOP); ++ ++ bind(HAS_ZERO); ++ ctz_d(tmp1, tmp1); ++ srli_d(tmp1, tmp1, 3); ++ add_d(result, result, tmp1); ++ b(DONE); ++ ++ // restore ch ++ bind(DO1_SHORT); ++ bstrpick_d(ch, ch, 7, 0); ++ ++ bind(DO1_LOOP); ++ ld_bu(tmp1, str1, 0); ++ beq(ch, tmp1, DONE); ++ addi_d(str1, str1, 1); ++ addi_d(result, result, 1); ++ blt(result, cnt1, DO1_LOOP); ++ ++ bind(NOMATCH); ++ addi_d(result, R0, -1); ++ ++ bind(DONE); ++} ++ ++// Compare strings, used for char[] and byte[]. ++void C2_MacroAssembler::string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ int ae, Register tmp1, Register tmp2) { ++ Label L, Loop, LoopEnd, HaveResult, Done; ++ ++ bool isLL = ae == StrIntrinsicNode::LL; ++ bool isLU = ae == StrIntrinsicNode::LU; ++ bool isUL = ae == StrIntrinsicNode::UL; ++ ++ bool str1_isL = isLL || isLU; ++ bool str2_isL = isLL || isUL; ++ ++ int charsInWord = isLL ? wordSize : wordSize/2; ++ ++ if (!str1_isL) srli_w(cnt1, cnt1, 1); ++ if (!str2_isL) srli_w(cnt2, cnt2, 1); ++ ++ // compute the difference of lengths (in result) ++ sub_d(result, cnt1, cnt2); // result holds the difference of two lengths ++ ++ // compute the shorter length (in cnt1) ++ ori(AT, R0, charsInWord); ++ bge(cnt2, cnt1, Loop); ++ move(cnt1, cnt2); ++ ++ // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register ++ // ++ // For example: ++ // If isLL == true and cnt1 > 8, we load 8 bytes from str1 and str2. (Suppose A1 and B1 are different) ++ // tmp1: A7 A6 A5 A4 A3 A2 A1 A0 ++ // tmp2: B7 B6 B5 B4 B3 B2 B1 B0 ++ // ++ // Then Use xor to find the difference between tmp1 and tmp2, right shift. ++ // tmp1: 00 A7 A6 A5 A4 A3 A2 A1 ++ // tmp2: 00 B7 B6 B5 B4 B3 B2 B1 ++ // ++ // Fetch 0 to 7 bits of tmp1 and tmp2, subtract to get the result. ++ // Other types are similar to isLL. ++ bind(Loop); ++ blt(cnt1, AT, LoopEnd); ++ if (isLL) { ++ ld_d(tmp1, str1, 0); ++ ld_d(tmp2, str2, 0); ++ beq(tmp1, tmp2, L); ++ xorr(cnt2, tmp1, tmp2); ++ ctz_d(cnt2, cnt2); ++ andi(cnt2, cnt2, 0x38); ++ srl_d(tmp1, tmp1, cnt2); ++ srl_d(tmp2, tmp2, cnt2); ++ bstrpick_d(tmp1, tmp1, 7, 0); ++ bstrpick_d(tmp2, tmp2, 7, 0); ++ sub_d(result, tmp1, tmp2); ++ b(Done); ++ bind(L); ++ addi_d(str1, str1, 8); ++ addi_d(str2, str2, 8); ++ addi_d(cnt1, cnt1, -charsInWord); ++ b(Loop); ++ } else if (isLU) { ++ ld_wu(cnt2, str1, 0); ++ andr(tmp1, R0, R0); ++ bstrins_d(tmp1, cnt2, 7, 0); ++ srli_d(cnt2, cnt2, 8); ++ bstrins_d(tmp1, cnt2, 23, 16); ++ srli_d(cnt2, cnt2, 8); ++ bstrins_d(tmp1, cnt2, 39, 32); ++ srli_d(cnt2, cnt2, 8); ++ bstrins_d(tmp1, cnt2, 55, 48); ++ ld_d(tmp2, str2, 0); ++ beq(tmp1, tmp2, L); ++ xorr(cnt2, tmp1, tmp2); ++ ctz_d(cnt2, cnt2); ++ andi(cnt2, cnt2, 0x30); ++ srl_d(tmp1, tmp1, cnt2); ++ srl_d(tmp2, tmp2, cnt2); ++ bstrpick_d(tmp1, tmp1, 15, 0); ++ bstrpick_d(tmp2, tmp2, 15, 0); ++ sub_d(result, tmp1, tmp2); ++ b(Done); ++ bind(L); ++ addi_d(str1, str1, 4); ++ addi_d(str2, str2, 8); ++ addi_d(cnt1, cnt1, -charsInWord); ++ b(Loop); ++ } else if (isUL) { ++ ld_wu(cnt2, str2, 0); ++ andr(tmp2, R0, R0); ++ bstrins_d(tmp2, cnt2, 7, 0); ++ srli_d(cnt2, cnt2, 8); ++ bstrins_d(tmp2, cnt2, 23, 16); ++ srli_d(cnt2, cnt2, 8); ++ bstrins_d(tmp2, cnt2, 39, 32); ++ srli_d(cnt2, cnt2, 8); ++ bstrins_d(tmp2, cnt2, 55, 48); ++ ld_d(tmp1, str1, 0); ++ beq(tmp1, tmp2, L); ++ xorr(cnt2, tmp1, tmp2); ++ ctz_d(cnt2, cnt2); ++ andi(cnt2, cnt2, 0x30); ++ srl_d(tmp1, tmp1, cnt2); ++ srl_d(tmp2, tmp2, cnt2); ++ bstrpick_d(tmp1, tmp1, 15, 0); ++ bstrpick_d(tmp2, tmp2, 15, 0); ++ sub_d(result, tmp1, tmp2); ++ b(Done); ++ bind(L); ++ addi_d(str1, str1, 8); ++ addi_d(str2, str2, 4); ++ addi_d(cnt1, cnt1, -charsInWord); ++ b(Loop); ++ } else { // isUU ++ ld_d(tmp1, str1, 0); ++ ld_d(tmp2, str2, 0); ++ beq(tmp1, tmp2, L); ++ xorr(cnt2, tmp1, tmp2); ++ ctz_d(cnt2, cnt2); ++ andi(cnt2, cnt2, 0x30); ++ srl_d(tmp1, tmp1, cnt2); ++ srl_d(tmp2, tmp2, cnt2); ++ bstrpick_d(tmp1, tmp1, 15, 0); ++ bstrpick_d(tmp2, tmp2, 15, 0); ++ sub_d(result, tmp1, tmp2); ++ b(Done); ++ bind(L); ++ addi_d(str1, str1, 8); ++ addi_d(str2, str2, 8); ++ addi_d(cnt1, cnt1, -charsInWord); ++ b(Loop); ++ } ++ ++ bind(LoopEnd); ++ beqz(cnt1, Done); ++ if (str1_isL) { ++ ld_bu(tmp1, str1, 0); ++ } else { ++ ld_hu(tmp1, str1, 0); ++ } ++ ++ // compare current character ++ if (str2_isL) { ++ ld_bu(tmp2, str2, 0); ++ } else { ++ ld_hu(tmp2, str2, 0); ++ } ++ bne(tmp1, tmp2, HaveResult); ++ addi_d(str1, str1, str1_isL ? 1 : 2); ++ addi_d(str2, str2, str2_isL ? 1 : 2); ++ addi_d(cnt1, cnt1, -1); ++ b(LoopEnd); ++ ++ bind(HaveResult); ++ sub_d(result, tmp1, tmp2); ++ ++ bind(Done); ++} ++ ++// Compare char[] or byte[] arrays or substrings. ++void C2_MacroAssembler::arrays_equals(Register str1, Register str2, ++ Register cnt, Register tmp1, Register tmp2, Register result, ++ bool is_char) { ++ Label Loop, LoopEnd, True, False; ++ ++ addi_d(result, R0, 1); ++ beq(str1, str2, True); // same char[] ? ++ beqz(cnt, True); ++ ++ addi_d(AT, R0, is_char ? wordSize/2 : wordSize); ++ bind(Loop); ++ blt(cnt, AT, LoopEnd); ++ ld_d(tmp1, str1, 0); ++ ld_d(tmp2, str2, 0); ++ bne(tmp1, tmp2, False); ++ addi_d(str1, str1, 8); ++ addi_d(str2, str2, 8); ++ addi_d(cnt, cnt, is_char ? -wordSize/2 : -wordSize); ++ b(Loop); ++ ++ bind(LoopEnd); ++ beqz(cnt, True); ++ // compare current character ++ if (is_char) { ++ ld_hu(tmp1, str1, 0); ++ ld_hu(tmp2, str2, 0); ++ } else { ++ ld_bu(tmp1, str1, 0); ++ ld_bu(tmp2, str2, 0); ++ } ++ bne(tmp1, tmp2, False); ++ addi_d(str1, str1, is_char ? 2 : 1); ++ addi_d(str2, str2, is_char ? 2 : 1); ++ addi_d(cnt, cnt, -1); ++ b(LoopEnd); ++ ++ bind(False); ++ addi_d(result, R0, 0); ++ ++ bind(True); ++} ++ ++void C2_MacroAssembler::loadstore(Register reg, Register base, int disp, int type) { ++ switch (type) { ++ case STORE_BYTE: st_b (reg, base, disp); break; ++ case STORE_CHAR: ++ case STORE_SHORT: st_h (reg, base, disp); break; ++ case STORE_INT: st_w (reg, base, disp); break; ++ case STORE_LONG: st_d (reg, base, disp); break; ++ case LOAD_BYTE: ld_b (reg, base, disp); break; ++ case LOAD_U_BYTE: ld_bu(reg, base, disp); break; ++ case LOAD_SHORT: ld_h (reg, base, disp); break; ++ case LOAD_U_SHORT: ld_hu(reg, base, disp); break; ++ case LOAD_INT: ld_w (reg, base, disp); break; ++ case LOAD_U_INT: ld_wu(reg, base, disp); break; ++ case LOAD_LONG: ld_d (reg, base, disp); break; ++ case LOAD_LINKED_LONG: ++ ll_d(reg, base, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void C2_MacroAssembler::loadstore(Register reg, Register base, Register disp, int type) { ++ switch (type) { ++ case STORE_BYTE: stx_b (reg, base, disp); break; ++ case STORE_CHAR: ++ case STORE_SHORT: stx_h (reg, base, disp); break; ++ case STORE_INT: stx_w (reg, base, disp); break; ++ case STORE_LONG: stx_d (reg, base, disp); break; ++ case LOAD_BYTE: ldx_b (reg, base, disp); break; ++ case LOAD_U_BYTE: ldx_bu(reg, base, disp); break; ++ case LOAD_SHORT: ldx_h (reg, base, disp); break; ++ case LOAD_U_SHORT: ldx_hu(reg, base, disp); break; ++ case LOAD_INT: ldx_w (reg, base, disp); break; ++ case LOAD_U_INT: ldx_wu(reg, base, disp); break; ++ case LOAD_LONG: ldx_d (reg, base, disp); break; ++ case LOAD_LINKED_LONG: ++ add_d(AT, base, disp); ++ ll_d(reg, AT, 0); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void C2_MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) { ++ switch (type) { ++ case STORE_FLOAT: fst_s(reg, base, disp); break; ++ case STORE_DOUBLE: fst_d(reg, base, disp); break; ++ case STORE_VECTORX: vst (reg, base, disp); break; ++ case STORE_VECTORY: xvst (reg, base, disp); break; ++ case LOAD_FLOAT: fld_s(reg, base, disp); break; ++ case LOAD_DOUBLE: fld_d(reg, base, disp); break; ++ case LOAD_VECTORX: vld (reg, base, disp); break; ++ case LOAD_VECTORY: xvld (reg, base, disp); break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void C2_MacroAssembler::loadstore(FloatRegister reg, Register base, Register disp, int type) { ++ switch (type) { ++ case STORE_FLOAT: fstx_s(reg, base, disp); break; ++ case STORE_DOUBLE: fstx_d(reg, base, disp); break; ++ case STORE_VECTORX: vstx (reg, base, disp); break; ++ case STORE_VECTORY: xvstx (reg, base, disp); break; ++ case LOAD_FLOAT: fldx_s(reg, base, disp); break; ++ case LOAD_DOUBLE: fldx_d(reg, base, disp); break; ++ case LOAD_VECTORX: vldx (reg, base, disp); break; ++ case LOAD_VECTORY: xvldx (reg, base, disp); break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void C2_MacroAssembler::reduce_ins_v(FloatRegister vec1, FloatRegister vec2, FloatRegister vec3, BasicType type, int opcode) { ++ switch (type) { ++ case T_BYTE: ++ switch (opcode) { ++ case Op_AddReductionVI: vadd_b(vec1, vec2, vec3); break; ++ case Op_MulReductionVI: vmul_b(vec1, vec2, vec3); break; ++ case Op_MaxReductionV: vmax_b(vec1, vec2, vec3); break; ++ case Op_MinReductionV: vmin_b(vec1, vec2, vec3); break; ++ case Op_AndReductionV: vand_v(vec1, vec2, vec3); break; ++ case Op_OrReductionV: vor_v(vec1, vec2, vec3); break; ++ case Op_XorReductionV: vxor_v(vec1, vec2, vec3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ case T_SHORT: ++ switch (opcode) { ++ case Op_AddReductionVI: vadd_h(vec1, vec2, vec3); break; ++ case Op_MulReductionVI: vmul_h(vec1, vec2, vec3); break; ++ case Op_MaxReductionV: vmax_h(vec1, vec2, vec3); break; ++ case Op_MinReductionV: vmin_h(vec1, vec2, vec3); break; ++ case Op_AndReductionV: vand_v(vec1, vec2, vec3); break; ++ case Op_OrReductionV: vor_v(vec1, vec2, vec3); break; ++ case Op_XorReductionV: vxor_v(vec1, vec2, vec3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ case T_INT: ++ switch (opcode) { ++ case Op_AddReductionVI: vadd_w(vec1, vec2, vec3); break; ++ case Op_MulReductionVI: vmul_w(vec1, vec2, vec3); break; ++ case Op_MaxReductionV: vmax_w(vec1, vec2, vec3); break; ++ case Op_MinReductionV: vmin_w(vec1, vec2, vec3); break; ++ case Op_AndReductionV: vand_v(vec1, vec2, vec3); break; ++ case Op_OrReductionV: vor_v(vec1, vec2, vec3); break; ++ case Op_XorReductionV: vxor_v(vec1, vec2, vec3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ case T_LONG: ++ switch (opcode) { ++ case Op_AddReductionVL: vadd_d(vec1, vec2, vec3); break; ++ case Op_MulReductionVL: vmul_d(vec1, vec2, vec3); break; ++ case Op_MaxReductionV: vmax_d(vec1, vec2, vec3); break; ++ case Op_MinReductionV: vmin_d(vec1, vec2, vec3); break; ++ case Op_AndReductionV: vand_v(vec1, vec2, vec3); break; ++ case Op_OrReductionV: vor_v(vec1, vec2, vec3); break; ++ case Op_XorReductionV: vxor_v(vec1, vec2, vec3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void C2_MacroAssembler::reduce_ins_r(Register reg1, Register reg2, Register reg3, BasicType type, int opcode) { ++ switch (type) { ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ switch (opcode) { ++ case Op_AddReductionVI: add_w(reg1, reg2, reg3); break; ++ case Op_MulReductionVI: mul_w(reg1, reg2, reg3); break; ++ case Op_AndReductionV: andr(reg1, reg2, reg3); break; ++ case Op_OrReductionV: orr(reg1, reg2, reg3); break; ++ case Op_XorReductionV: xorr(reg1, reg2, reg3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ case T_LONG: ++ switch (opcode) { ++ case Op_AddReductionVL: add_d(reg1, reg2, reg3); break; ++ case Op_MulReductionVL: mul_d(reg1, reg2, reg3); break; ++ case Op_AndReductionV: andr(reg1, reg2, reg3); break; ++ case Op_OrReductionV: orr(reg1, reg2, reg3); break; ++ case Op_XorReductionV: xorr(reg1, reg2, reg3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void C2_MacroAssembler::reduce_ins_f(FloatRegister reg1, FloatRegister reg2, FloatRegister reg3, BasicType type, int opcode) { ++ switch (type) { ++ case T_FLOAT: ++ switch (opcode) { ++ case Op_AddReductionVF: fadd_s(reg1, reg2, reg3); break; ++ case Op_MulReductionVF: fmul_s(reg1, reg2, reg3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ case T_DOUBLE: ++ switch (opcode) { ++ case Op_AddReductionVD: fadd_d(reg1, reg2, reg3); break; ++ case Op_MulReductionVD: fmul_d(reg1, reg2, reg3); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void C2_MacroAssembler::reduce(Register dst, Register src, FloatRegister vsrc, FloatRegister tmp1, FloatRegister tmp2, BasicType type, int opcode, int vector_size) { ++ if (vector_size == 32) { ++ xvpermi_d(tmp1, vsrc, 0b00001110); ++ reduce_ins_v(tmp1, vsrc, tmp1, type, opcode); ++ vpermi_w(tmp2, tmp1, 0b00001110); ++ reduce_ins_v(tmp1, tmp2, tmp1, type, opcode); ++ } else if (vector_size == 16) { ++ vpermi_w(tmp1, vsrc, 0b00001110); ++ reduce_ins_v(tmp1, vsrc, tmp1, type, opcode); ++ } else { ++ ShouldNotReachHere(); ++ } ++ ++ if (type != T_LONG) { ++ vshuf4i_w(tmp2, tmp1, 0b00000001); ++ reduce_ins_v(tmp1, tmp2, tmp1, type, opcode); ++ if (type != T_INT) { ++ vshuf4i_h(tmp2, tmp1, 0b00000001); ++ reduce_ins_v(tmp1, tmp2, tmp1, type, opcode); ++ if (type != T_SHORT) { ++ vshuf4i_b(tmp2, tmp1, 0b00000001); ++ reduce_ins_v(tmp1, tmp2, tmp1, type, opcode); ++ } ++ } ++ } ++ ++ switch (type) { ++ case T_BYTE: vpickve2gr_b(dst, tmp1, 0); break; ++ case T_SHORT: vpickve2gr_h(dst, tmp1, 0); break; ++ case T_INT: vpickve2gr_w(dst, tmp1, 0); break; ++ case T_LONG: vpickve2gr_d(dst, tmp1, 0); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ if (opcode == Op_MaxReductionV) { ++ slt(AT, dst, src); ++ masknez(dst, dst, AT); ++ maskeqz(AT, src, AT); ++ orr(dst, dst, AT); ++ } else if (opcode == Op_MinReductionV) { ++ slt(AT, src, dst); ++ masknez(dst, dst, AT); ++ maskeqz(AT, src, AT); ++ orr(dst, dst, AT); ++ } else { ++ reduce_ins_r(dst, dst, src, type, opcode); ++ } ++ switch (type) { ++ case T_BYTE: ext_w_b(dst, dst); break; ++ case T_SHORT: ext_w_h(dst, dst); break; ++ default: ++ break; ++ } ++} ++ ++void C2_MacroAssembler::reduce(FloatRegister dst, FloatRegister src, FloatRegister vsrc, FloatRegister tmp, BasicType type, int opcode, int vector_size) { ++ if (vector_size == 32) { ++ switch (type) { ++ case T_FLOAT: ++ reduce_ins_f(dst, vsrc, src, type, opcode); ++ xvpickve_w(tmp, vsrc, 1); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_w(tmp, vsrc, 2); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_w(tmp, vsrc, 3); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_w(tmp, vsrc, 4); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_w(tmp, vsrc, 5); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_w(tmp, vsrc, 6); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_w(tmp, vsrc, 7); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ break; ++ case T_DOUBLE: ++ reduce_ins_f(dst, vsrc, src, type, opcode); ++ xvpickve_d(tmp, vsrc, 1); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_d(tmp, vsrc, 2); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ xvpickve_d(tmp, vsrc, 3); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (vector_size == 16) { ++ switch (type) { ++ case T_FLOAT: ++ reduce_ins_f(dst, vsrc, src, type, opcode); ++ vpermi_w(tmp, vsrc, 0b00000001); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ vpermi_w(tmp, vsrc, 0b00000010); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ vpermi_w(tmp, vsrc, 0b00000011); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ break; ++ case T_DOUBLE: ++ reduce_ins_f(dst, vsrc, src, type, opcode); ++ vpermi_w(tmp, vsrc, 0b00001110); ++ reduce_ins_f(dst, tmp, dst, type, opcode); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void C2_MacroAssembler::vector_compare(FloatRegister dst, FloatRegister src1, FloatRegister src2, BasicType bt, int cond, int vector_size) { ++ if (vector_size == 32) { ++ if (bt == T_BYTE) { ++ switch (cond) { ++ case BoolTest::ne: xvseq_b (dst, src1, src2); xvxori_b(dst, dst, 0xff); break; ++ case BoolTest::eq: xvseq_b (dst, src1, src2); break; ++ case BoolTest::ge: xvsle_b (dst, src2, src1); break; ++ case BoolTest::gt: xvslt_b (dst, src2, src1); break; ++ case BoolTest::le: xvsle_b (dst, src1, src2); break; ++ case BoolTest::lt: xvslt_b (dst, src1, src2); break; ++ case BoolTest::uge: xvsle_bu(dst, src2, src1); break; ++ case BoolTest::ugt: xvslt_bu(dst, src2, src1); break; ++ case BoolTest::ule: xvsle_bu(dst, src1, src2); break; ++ case BoolTest::ult: xvslt_bu(dst, src1, src2); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (bt == T_SHORT) { ++ switch (cond) { ++ case BoolTest::ne: xvseq_h (dst, src1, src2); xvxori_b(dst, dst, 0xff); break; ++ case BoolTest::eq: xvseq_h (dst, src1, src2); break; ++ case BoolTest::ge: xvsle_h (dst, src2, src1); break; ++ case BoolTest::gt: xvslt_h (dst, src2, src1); break; ++ case BoolTest::le: xvsle_h (dst, src1, src2); break; ++ case BoolTest::lt: xvslt_h (dst, src1, src2); break; ++ case BoolTest::uge: xvsle_hu(dst, src2, src1); break; ++ case BoolTest::ugt: xvslt_hu(dst, src2, src1); break; ++ case BoolTest::ule: xvsle_hu(dst, src1, src2); break; ++ case BoolTest::ult: xvslt_hu(dst, src1, src2); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (bt == T_INT) { ++ switch (cond) { ++ case BoolTest::ne: xvseq_w (dst, src1, src2); xvxori_b(dst, dst, 0xff); break; ++ case BoolTest::eq: xvseq_w (dst, src1, src2); break; ++ case BoolTest::ge: xvsle_w (dst, src2, src1); break; ++ case BoolTest::gt: xvslt_w (dst, src2, src1); break; ++ case BoolTest::le: xvsle_w (dst, src1, src2); break; ++ case BoolTest::lt: xvslt_w (dst, src1, src2); break; ++ case BoolTest::uge: xvsle_wu(dst, src2, src1); break; ++ case BoolTest::ugt: xvslt_wu(dst, src2, src1); break; ++ case BoolTest::ule: xvsle_wu(dst, src1, src2); break; ++ case BoolTest::ult: xvslt_wu(dst, src1, src2); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (bt == T_LONG) { ++ switch (cond) { ++ case BoolTest::ne: xvseq_d (dst, src1, src2); xvxori_b(dst, dst, 0xff); break; ++ case BoolTest::eq: xvseq_d (dst, src1, src2); break; ++ case BoolTest::ge: xvsle_d (dst, src2, src1); break; ++ case BoolTest::gt: xvslt_d (dst, src2, src1); break; ++ case BoolTest::le: xvsle_d (dst, src1, src2); break; ++ case BoolTest::lt: xvslt_d (dst, src1, src2); break; ++ case BoolTest::uge: xvsle_du(dst, src2, src1); break; ++ case BoolTest::ugt: xvslt_du(dst, src2, src1); break; ++ case BoolTest::ule: xvsle_du(dst, src1, src2); break; ++ case BoolTest::ult: xvslt_du(dst, src1, src2); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (bt == T_FLOAT) { ++ switch (cond) { ++ case BoolTest::ne: xvfcmp_cune_s(dst, src1, src2); break; ++ case BoolTest::eq: xvfcmp_ceq_s (dst, src1, src2); break; ++ case BoolTest::ge: xvfcmp_cle_s (dst, src2, src1); break; ++ case BoolTest::gt: xvfcmp_clt_s (dst, src2, src1); break; ++ case BoolTest::le: xvfcmp_cule_s(dst, src1, src2); break; ++ case BoolTest::lt: xvfcmp_cult_s(dst, src1, src2); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (bt == T_DOUBLE) { ++ switch (cond) { ++ case BoolTest::ne: xvfcmp_cune_d(dst, src1, src2); break; ++ case BoolTest::eq: xvfcmp_ceq_d (dst, src1, src2); break; ++ case BoolTest::ge: xvfcmp_cle_d (dst, src2, src1); break; ++ case BoolTest::gt: xvfcmp_clt_d (dst, src2, src1); break; ++ case BoolTest::le: xvfcmp_cule_d(dst, src1, src2); break; ++ case BoolTest::lt: xvfcmp_cult_d(dst, src1, src2); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (vector_size == 16) { ++ if (bt == T_BYTE) { ++ switch (cond) { ++ case BoolTest::ne: vseq_b (dst, src1, src2); vxori_b(dst, dst, 0xff); break; ++ case BoolTest::eq: vseq_b (dst, src1, src2); break; ++ case BoolTest::ge: vsle_b (dst, src2, src1); break; ++ case BoolTest::gt: vslt_b (dst, src2, src1); break; ++ case BoolTest::le: vsle_b (dst, src1, src2); break; ++ case BoolTest::lt: vslt_b (dst, src1, src2); break; ++ case BoolTest::uge: vsle_bu(dst, src2, src1); break; ++ case BoolTest::ugt: vslt_bu(dst, src2, src1); break; ++ case BoolTest::ule: vsle_bu(dst, src1, src2); break; ++ case BoolTest::ult: vslt_bu(dst, src1, src2); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (bt == T_SHORT) { ++ switch (cond) { ++ case BoolTest::ne: vseq_h (dst, src1, src2); vxori_b(dst, dst, 0xff); break; ++ case BoolTest::eq: vseq_h (dst, src1, src2); break; ++ case BoolTest::ge: vsle_h (dst, src2, src1); break; ++ case BoolTest::gt: vslt_h (dst, src2, src1); break; ++ case BoolTest::le: vsle_h (dst, src1, src2); break; ++ case BoolTest::lt: vslt_h (dst, src1, src2); break; ++ case BoolTest::uge: vsle_hu(dst, src2, src1); break; ++ case BoolTest::ugt: vslt_hu(dst, src2, src1); break; ++ case BoolTest::ule: vsle_hu(dst, src1, src2); break; ++ case BoolTest::ult: vslt_hu(dst, src1, src2); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (bt == T_INT) { ++ switch (cond) { ++ case BoolTest::ne: vseq_w (dst, src1, src2); vxori_b(dst, dst, 0xff); break; ++ case BoolTest::eq: vseq_w (dst, src1, src2); break; ++ case BoolTest::ge: vsle_w (dst, src2, src1); break; ++ case BoolTest::gt: vslt_w (dst, src2, src1); break; ++ case BoolTest::le: vsle_w (dst, src1, src2); break; ++ case BoolTest::lt: vslt_w (dst, src1, src2); break; ++ case BoolTest::uge: vsle_wu(dst, src2, src1); break; ++ case BoolTest::ugt: vslt_wu(dst, src2, src1); break; ++ case BoolTest::ule: vsle_wu(dst, src1, src2); break; ++ case BoolTest::ult: vslt_wu(dst, src1, src2); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (bt == T_LONG) { ++ switch (cond) { ++ case BoolTest::ne: vseq_d (dst, src1, src2); vxori_b(dst, dst, 0xff); break; ++ case BoolTest::eq: vseq_d (dst, src1, src2); break; ++ case BoolTest::ge: vsle_d (dst, src2, src1); break; ++ case BoolTest::gt: vslt_d (dst, src2, src1); break; ++ case BoolTest::le: vsle_d (dst, src1, src2); break; ++ case BoolTest::lt: vslt_d (dst, src1, src2); break; ++ case BoolTest::uge: vsle_du(dst, src2, src1); break; ++ case BoolTest::ugt: vslt_du(dst, src2, src1); break; ++ case BoolTest::ule: vsle_du(dst, src1, src2); break; ++ case BoolTest::ult: vslt_du(dst, src1, src2); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (bt == T_FLOAT) { ++ switch (cond) { ++ case BoolTest::ne: vfcmp_cune_s(dst, src1, src2); break; ++ case BoolTest::eq: vfcmp_ceq_s (dst, src1, src2); break; ++ case BoolTest::ge: vfcmp_cle_s (dst, src2, src1); break; ++ case BoolTest::gt: vfcmp_clt_s (dst, src2, src1); break; ++ case BoolTest::le: vfcmp_cule_s(dst, src1, src2); break; ++ case BoolTest::lt: vfcmp_cult_s(dst, src1, src2); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (bt == T_DOUBLE) { ++ switch (cond) { ++ case BoolTest::ne: vfcmp_cune_d(dst, src1, src2); break; ++ case BoolTest::eq: vfcmp_ceq_d (dst, src1, src2); break; ++ case BoolTest::ge: vfcmp_cle_d (dst, src2, src1); break; ++ case BoolTest::gt: vfcmp_clt_d (dst, src2, src1); break; ++ case BoolTest::le: vfcmp_cule_d(dst, src1, src2); break; ++ case BoolTest::lt: vfcmp_cult_d(dst, src1, src2); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void C2_MacroAssembler::cmp_branch_short(int flag, Register op1, Register op2, Label& L, bool is_signed) { ++ ++ switch(flag) { ++ case 0x01: //equal ++ beq(op1, op2, L); ++ break; ++ case 0x02: //not_equal ++ bne(op1, op2, L); ++ break; ++ case 0x03: //above ++ if (is_signed) ++ blt(op2, op1, L); ++ else ++ bltu(op2, op1, L); ++ break; ++ case 0x04: //above_equal ++ if (is_signed) ++ bge(op1, op2, L); ++ else ++ bgeu(op1, op2, L); ++ break; ++ case 0x05: //below ++ if (is_signed) ++ blt(op1, op2, L); ++ else ++ bltu(op1, op2, L); ++ break; ++ case 0x06: //below_equal ++ if (is_signed) ++ bge(op2, op1, L); ++ else ++ bgeu(op2, op1, L); ++ break; ++ default: ++ Unimplemented(); ++ } ++} ++ ++void C2_MacroAssembler::cmp_branch_long(int flag, Register op1, Register op2, Label* L, bool is_signed) { ++ switch(flag) { ++ case 0x01: //equal ++ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ if (is_signed) ++ blt_long(op2, op1, *L, true /* signed */); ++ else ++ blt_long(op2, op1, *L, false); ++ break; ++ case 0x04: //above_equal ++ if (is_signed) ++ bge_long(op1, op2, *L, true /* signed */); ++ else ++ bge_long(op1, op2, *L, false); ++ break; ++ case 0x05: //below ++ if (is_signed) ++ blt_long(op1, op2, *L, true /* signed */); ++ else ++ blt_long(op1, op2, *L, false); ++ break; ++ case 0x06: //below_equal ++ if (is_signed) ++ bge_long(op2, op1, *L, true /* signed */); ++ else ++ bge_long(op2, op1, *L, false); ++ break; ++ default: ++ Unimplemented(); ++ } ++} ++ ++void C2_MacroAssembler::cmp_branchEqNe_off21(int flag, Register op1, Label& L) { ++ switch(flag) { ++ case 0x01: //equal ++ beqz(op1, L); ++ break; ++ case 0x02: //not_equal ++ bnez(op1, L); ++ break; ++ default: ++ Unimplemented(); ++ } ++} +diff --git a/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.hpp +new file mode 100644 +index 00000000000..2babade2e22 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.hpp +@@ -0,0 +1,144 @@ ++/* ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C2_MACROASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_C2_MACROASSEMBLER_LOONGARCH_HPP ++ ++// C2_MacroAssembler contains high-level macros for C2 ++ ++public: ++ ++ void cmp_branch_short(int flag, Register op1, Register op2, Label& L, bool is_signed); ++ void cmp_branch_long(int flag, Register op1, Register op2, Label* L, bool is_signed); ++ void cmp_branchEqNe_off21(int flag, Register op1, Label& L); ++ void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr); ++ void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr); ++ ++ // For C2 to support long branches ++ void beq_long (Register rs, Register rt, Label& L); ++ void bne_long (Register rs, Register rt, Label& L); ++ void blt_long (Register rs, Register rt, Label& L, bool is_signed); ++ void bge_long (Register rs, Register rt, Label& L, bool is_signed); ++ void bc1t_long (Label& L); ++ void bc1f_long (Label& L); ++ ++ // Compare strings. ++ void string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ int ae, Register tmp1, Register tmp2); ++ ++ // Find index of char in Latin-1 string ++ void stringL_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3); ++ ++ // Find index of char in UTF-16 string ++ void string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3); ++ ++ void string_indexof(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register result, int ae); ++ ++ void string_indexof_linearscan(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ int needle_con_cnt, Register result, int ae); ++ ++ // Compare char[] or byte[] arrays. ++ void arrays_equals(Register str1, Register str2, ++ Register cnt, Register tmp1, Register tmp2, Register result, ++ bool is_char); ++ ++ // Memory Data Type ++ #define INT_TYPE 0x100 ++ #define FLOAT_TYPE 0x200 ++ #define SIGNED_TYPE 0x10 ++ #define UNSIGNED_TYPE 0x20 ++ ++ typedef enum { ++ LOAD_BYTE = INT_TYPE | SIGNED_TYPE | 0x1, ++ LOAD_CHAR = INT_TYPE | SIGNED_TYPE | 0x2, ++ LOAD_SHORT = INT_TYPE | SIGNED_TYPE | 0x3, ++ LOAD_INT = INT_TYPE | SIGNED_TYPE | 0x4, ++ LOAD_LONG = INT_TYPE | SIGNED_TYPE | 0x5, ++ STORE_BYTE = INT_TYPE | SIGNED_TYPE | 0x6, ++ STORE_CHAR = INT_TYPE | SIGNED_TYPE | 0x7, ++ STORE_SHORT = INT_TYPE | SIGNED_TYPE | 0x8, ++ STORE_INT = INT_TYPE | SIGNED_TYPE | 0x9, ++ STORE_LONG = INT_TYPE | SIGNED_TYPE | 0xa, ++ LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb, ++ ++ LOAD_U_BYTE = INT_TYPE | UNSIGNED_TYPE | 0x1, ++ LOAD_U_SHORT = INT_TYPE | UNSIGNED_TYPE | 0x2, ++ LOAD_U_INT = INT_TYPE | UNSIGNED_TYPE | 0x3, ++ ++ LOAD_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x1, ++ LOAD_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x2, ++ LOAD_VECTORX = FLOAT_TYPE | SIGNED_TYPE | 0x3, ++ LOAD_VECTORY = FLOAT_TYPE | SIGNED_TYPE | 0x4, ++ STORE_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x5, ++ STORE_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x6, ++ STORE_VECTORX = FLOAT_TYPE | SIGNED_TYPE | 0x7, ++ STORE_VECTORY = FLOAT_TYPE | SIGNED_TYPE | 0x8 ++ } CMLoadStoreDataType; ++ ++ void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) { ++ assert((type & INT_TYPE), "must be General reg type"); ++ loadstore_t(reg, base, index, scale, disp, type); ++ } ++ ++ void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) { ++ assert((type & FLOAT_TYPE), "must be Float reg type"); ++ loadstore_t(reg, base, index, scale, disp, type); ++ } ++ ++ void reduce(Register dst, Register src, FloatRegister vsrc, FloatRegister tmp1, FloatRegister tmp2, BasicType type, int opcode, int vector_size); ++ void reduce(FloatRegister dst, FloatRegister src, FloatRegister vsrc, FloatRegister tmp, BasicType type, int opcode, int vector_size); ++ ++ void vector_compare(FloatRegister dst, FloatRegister src1, FloatRegister src2, BasicType type, int cond, int vector_size); ++ ++private: ++ ++ template ++ void loadstore_t(T reg, int base, int index, int scale, int disp, int type) { ++ if (index != -1) { ++ assert(((scale==0)&&(disp==0)), "only support base+index"); ++ loadstore(reg, as_Register(base), as_Register(index), type); ++ } else { ++ loadstore(reg, as_Register(base), disp, type); ++ } ++ } ++ void loadstore(Register reg, Register base, int disp, int type); ++ void loadstore(Register reg, Register base, Register disp, int type); ++ void loadstore(FloatRegister reg, Register base, int disp, int type); ++ void loadstore(FloatRegister reg, Register base, Register disp, int type); ++ ++ void reduce_ins_v(FloatRegister vec1, FloatRegister vec2, FloatRegister vec3, BasicType type, int opcode); ++ void reduce_ins_r(Register reg1, Register reg2, Register reg3, BasicType type, int opcode); ++ void reduce_ins_f(FloatRegister reg1, FloatRegister reg2, FloatRegister reg3, BasicType type, int opcode); ++#endif // CPU_LOONGARCH_C2_MACROASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp +new file mode 100644 +index 00000000000..0e40e2ca7cf +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp +@@ -0,0 +1,90 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP ++#define CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the server compiler. ++// (see c2_globals.hpp). Alpha-sorted. ++define_pd_global(bool, BackgroundCompilation, true); ++define_pd_global(bool, UseTLAB, true); ++define_pd_global(bool, ResizeTLAB, true); ++define_pd_global(bool, CICompileOSR, true); ++define_pd_global(bool, InlineIntrinsics, true); ++define_pd_global(bool, PreferInterpreterNativeStubs, false); ++define_pd_global(bool, ProfileTraps, true); ++define_pd_global(bool, UseOnStackReplacement, true); ++define_pd_global(bool, ProfileInterpreter, true); ++define_pd_global(bool, TieredCompilation, true); ++define_pd_global(intx, CompileThreshold, 10000); ++define_pd_global(intx, BackEdgeThreshold, 100000); ++ ++define_pd_global(intx, OnStackReplacePercentage, 140); ++define_pd_global(intx, ConditionalMoveLimit, 3); ++define_pd_global(intx, FLOATPRESSURE, 31); ++define_pd_global(intx, FreqInlineSize, 325); ++define_pd_global(intx, MinJumpTableSize, 10); ++define_pd_global(intx, INTPRESSURE, 23); ++define_pd_global(intx, InteriorEntryAlignment, 16); ++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); ++define_pd_global(intx, LoopUnrollLimit, 60); ++define_pd_global(intx, LoopPercentProfileLimit, 10); ++// InitialCodeCacheSize derived from specjbb2000 run. ++define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize ++define_pd_global(intx, CodeCacheExpansionSize, 64*K); ++ ++// Ergonomics related flags ++define_pd_global(uint64_t,MaxRAM, 128ULL*G); ++define_pd_global(intx, RegisterCostAreaRatio, 16000); ++ ++// Peephole and CISC spilling both break the graph, and so makes the ++// scheduler sick. ++define_pd_global(bool, OptoPeephole, false); ++define_pd_global(bool, UseCISCSpill, false); ++define_pd_global(bool, OptoScheduling, false); ++define_pd_global(bool, OptoBundling, false); ++define_pd_global(bool, OptoRegScheduling, false); ++define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); ++define_pd_global(bool, IdealizeClearArrayNode, true); ++ ++define_pd_global(intx, ReservedCodeCacheSize, 48*M); ++define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); ++define_pd_global(intx, ProfiledCodeHeapSize, 22*M); ++define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); ++define_pd_global(uintx, CodeCacheMinBlockLength, 4); ++define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++ ++define_pd_global(bool, TrapBasedRangeChecks, false); ++ ++// Heap related flags ++define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); ++ ++// Ergonomics related flags ++define_pd_global(bool, NeverActAsServerClassMachine, false); ++ ++#endif // CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp b/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp +new file mode 100644 +index 00000000000..ec78b942d40 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "opto/compile.hpp" ++#include "opto/node.hpp" ++ ++// processor dependent initialization for LoongArch ++ ++extern void reg_mask_init(); ++ ++void Compile::pd_compiler2_init() { ++ guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); ++ reg_mask_init(); ++} +diff --git a/src/hotspot/cpu/loongarch/c2_safepointPollStubTable_loongarch.cpp b/src/hotspot/cpu/loongarch/c2_safepointPollStubTable_loongarch.cpp +new file mode 100644 +index 00000000000..866858b26b8 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/c2_safepointPollStubTable_loongarch.cpp +@@ -0,0 +1,50 @@ ++/* ++ * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "opto/compile.hpp" ++#include "opto/node.hpp" ++#include "opto/output.hpp" ++#include "runtime/sharedRuntime.hpp" ++ ++#define __ masm. ++void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const { ++ assert(SharedRuntime::polling_page_return_handler_blob() != NULL, ++ "polling page return stub not created yet"); ++ address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ ++ __ bind(entry->_stub_label); ++ InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset); ++ __ lea(AT, safepoint_pc); ++ __ st_d(AT, Address(thread, JavaThread::saved_exception_pc_offset())); ++ __ jmp(stub, relocInfo::runtime_call_type); ++} ++#undef __ +diff --git a/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp b/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp +new file mode 100644 +index 00000000000..653d95806bf +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP ++#define CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP ++ ++private: ++ void pd_initialize() {} ++ ++public: ++ void flush_bundle(bool start_new_bundle) {} ++ ++#endif // CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp b/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp +new file mode 100644 +index 00000000000..b3f70a1665b +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp +@@ -0,0 +1,144 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/compiledIC.hpp" ++#include "code/icBuffer.hpp" ++#include "code/nmethod.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/safepoint.hpp" ++ ++// ---------------------------------------------------------------------------- ++ ++#define __ _masm. ++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { ++ precond(cbuf.stubs()->start() != badAddress); ++ precond(cbuf.stubs()->end() != badAddress); ++ ++ if (mark == NULL) { ++ mark = cbuf.insts_mark(); // get mark within main instrs section ++ } ++ ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a stub. ++ MacroAssembler _masm(&cbuf); ++ ++ address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size()); ++ if (base == NULL) return NULL; // CodeBuffer::expand failed ++ // static stub relocation stores the instruction address of the call ++ ++ __ relocate(static_stub_Relocation::spec(mark), 0); ++ ++ // Code stream for loading method may be changed. ++ __ ibar(0); ++ ++ // Rmethod contains Method*, it should be relocated for GC ++ // static stub relocation also tags the Method* in the code-stream. ++ __ mov_metadata(Rmethod, NULL); ++ // This is recognized as unresolved by relocs/nativeInst/ic code ++ ++ cbuf.set_insts_mark(); ++ __ patchable_jump(__ pc()); ++ // Update current stubs pointer and restore code_end. ++ __ end_a_stub(); ++ return base; ++} ++#undef __ ++ ++int CompiledStaticCall::to_interp_stub_size() { ++ return NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeGeneralJump::instruction_size; ++} ++ ++int CompiledStaticCall::to_trampoline_stub_size() { ++ return NativeInstruction::nop_instruction_size + NativeCallTrampolineStub::instruction_size; ++} ++ ++// Relocation entries for call stub, compiled java to interpreter. ++int CompiledStaticCall::reloc_to_interp_stub() { ++ return 16; ++} ++ ++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { ++ address stub = find_stub(); ++ guarantee(stub != NULL, "stub not found"); ++ ++ if (TraceICs) { ++ ResourceMark rm; ++ tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", ++ p2i(instruction_address()), ++ callee->name_and_sig_as_C_string()); ++ } ++ ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ verify_mt_safe(callee, entry, method_holder, jump); ++ ++ // Update stub. ++ method_holder->set_data((intptr_t)callee()); ++ jump->set_jump_destination(entry); ++ ++ // Update jump to call. ++ set_destination_mt_safe(stub); ++} ++ ++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { ++ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); ++ // Reset stub. ++ address stub = static_stub->addr(); ++ assert(stub != NULL, "stub not found"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ method_holder->set_data(0); ++ jump->set_jump_destination(jump->instruction_address()); ++} ++ ++//----------------------------------------------------------------------------- ++// Non-product mode code ++#ifndef PRODUCT ++ ++void CompiledDirectStaticCall::verify() { ++ // Verify call. ++ _call->verify(); ++ if (os::is_MP()) { ++ _call->verify_alignment(); ++ } ++ ++ // Verify stub. ++ address stub = find_stub(); ++ assert(stub != NULL, "no stub found for static call"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ ++ ++ // Verify state. ++ assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); ++} ++ ++#endif // !PRODUCT +diff --git a/src/hotspot/cpu/loongarch/copy_loongarch.hpp b/src/hotspot/cpu/loongarch/copy_loongarch.hpp +new file mode 100644 +index 00000000000..54b847a7369 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/copy_loongarch.hpp +@@ -0,0 +1,77 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_COPY_LOONGARCH_HPP ++#define CPU_LOONGARCH_COPY_LOONGARCH_HPP ++ ++// Inline functions for memory copy and fill. ++ ++// Contains inline asm implementations ++#include OS_CPU_HEADER_INLINE(copy) ++ ++// Template for atomic, element-wise copy. ++template ++static void copy_conjoint_atomic(const T* from, T* to, size_t count) { ++ if (from > to) { ++ while (count-- > 0) { ++ // Copy forwards ++ *to++ = *from++; ++ } ++ } else { ++ from += count - 1; ++ to += count - 1; ++ while (count-- > 0) { ++ // Copy backwards ++ *to-- = *from--; ++ } ++ } ++} ++ ++ ++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { ++ julong* to = (julong*) tohw; ++ julong v = ((julong) value << 32) | value; ++ while (count-- > 0) { ++ *to++ = v; ++ } ++} ++ ++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { ++ pd_fill_to_words(tohw, count, value); ++} ++ ++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { ++ (void)memset(to, value, count); ++} ++ ++static void pd_zero_to_words(HeapWord* tohw, size_t count) { ++ pd_fill_to_words(tohw, count, 0); ++} ++ ++static void pd_zero_to_bytes(void* to, size_t count) { ++ (void)memset(to, 0, count); ++} ++ ++#endif //CPU_LOONGARCH_COPY_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp b/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp +new file mode 100644 +index 00000000000..fd176c8f056 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp +@@ -0,0 +1,57 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP ++ ++ static int pd_instruction_alignment() { ++ return sizeof(int); ++ } ++ ++ static const char* pd_cpu_opts() { ++ return "gpr-names=64"; ++ } ++ ++ // Returns address of n-th instruction preceding addr, ++ // NULL if no preceding instruction can be found. ++ // With LoongArch being a RISC architecture, this always is BytesPerInstWord ++ // It might be beneficial to check "is_readable" as we do on ppc and s390. ++ static address find_prev_instr(address addr, int n_instr) { ++ return addr - BytesPerInstWord*n_instr; ++ } ++ ++ // special-case instruction decoding. ++ // There may be cases where the binutils disassembler doesn't do ++ // the perfect job. In those cases, decode_instruction0 may kick in ++ // and do it right. ++ // If nothing had to be done, just return "here", otherwise return "here + instr_len(here)" ++ static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) { ++ return here; ++ } ++ ++ // platform-specific instruction annotations (like value of loaded constants) ++ static void annotate(address pc, outputStream* st) { }; ++ ++#endif // CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/foreign_globals_loongarch.cpp b/src/hotspot/cpu/loongarch/foreign_globals_loongarch.cpp +new file mode 100644 +index 00000000000..fb4647c2723 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/foreign_globals_loongarch.cpp +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 2020, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#include "precompiled.hpp" ++#include "prims/foreign_globals.hpp" ++#include "utilities/debug.hpp" ++ ++const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const { ++ Unimplemented(); ++ return {}; ++} ++ ++const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const { ++ Unimplemented(); ++ return {}; ++} ++ ++const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const { ++ ShouldNotCallThis(); ++ return {}; ++} +diff --git a/src/hotspot/cpu/loongarch/foreign_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/foreign_globals_loongarch.hpp +new file mode 100644 +index 00000000000..680a27363ec +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/foreign_globals_loongarch.hpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 2020, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#ifndef CPU_LOONGARCH_FOREIGN_GLOBALS_LOONGARCH_HPP ++#define CPU_LOONGARCH_FOREIGN_GLOBALS_LOONGARCH_HPP ++ ++class BufferLayout {}; ++class ABIDescriptor {}; ++ ++#endif // CPU_LOONGARCH_FOREIGN_GLOBALS_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.cpp b/src/hotspot/cpu/loongarch/frame_loongarch.cpp +new file mode 100644 +index 00000000000..1aba8e4dd27 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/frame_loongarch.cpp +@@ -0,0 +1,668 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "compiler/oopMap.hpp" ++#include "interpreter/interpreter.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/markWord.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/monitorChunk.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stackWatermarkSet.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#ifdef ASSERT ++void RegisterMap::check_location_valid() { ++} ++#endif ++ ++ ++// Profiling/safepoint support ++ ++bool frame::safe_for_sender(JavaThread *thread) { ++ address sp = (address)_sp; ++ address fp = (address)_fp; ++ address unextended_sp = (address)_unextended_sp; ++ ++ // consider stack guards when trying to determine "safe" stack pointers ++ // sp must be within the usable part of the stack (not in guards) ++ if (!thread->is_in_usable_stack(sp)) { ++ return false; ++ } ++ ++ // unextended sp must be within the stack and above or equal sp ++ if (!thread->is_in_stack_range_incl(unextended_sp, sp)) { ++ return false; ++ } ++ ++ // an fp must be within the stack and above (but not equal) sp ++ // second evaluation on fp+ is added to handle situation where fp is -1 ++ bool fp_safe = thread->is_in_stack_range_excl(fp, sp) && ++ thread->is_in_full_stack_checked(fp + (return_addr_offset * sizeof(void*))); ++ ++ // We know sp/unextended_sp are safe only fp is questionable here ++ ++ // If the current frame is known to the code cache then we can attempt to ++ // construct the sender and do some validation of it. This goes a long way ++ // toward eliminating issues when we get in frame construction code ++ ++ if (_cb != NULL ) { ++ ++ // First check if frame is complete and tester is reliable ++ // Unfortunately we can only check frame complete for runtime stubs and nmethod ++ // other generic buffer blobs are more problematic so we just assume they are ++ // ok. adapter blobs never have a frame complete and are never ok. ++ ++ if (!_cb->is_frame_complete_at(_pc)) { ++ if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { ++ return false; ++ } ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!_cb->code_contains(_pc)) { ++ return false; ++ } ++ ++ // Entry frame checks ++ if (is_entry_frame()) { ++ // an entry frame must have a valid fp. ++ return fp_safe && is_entry_frame_valid(thread); ++ } ++ ++ intptr_t* sender_sp = NULL; ++ intptr_t* sender_unextended_sp = NULL; ++ address sender_pc = NULL; ++ intptr_t* saved_fp = NULL; ++ ++ if (is_interpreted_frame()) { ++ // fp must be safe ++ if (!fp_safe) { ++ return false; ++ } ++ ++ sender_pc = (address) this->fp()[return_addr_offset]; ++ // for interpreted frames, the value below is the sender "raw" sp, ++ // which can be different from the sender unextended sp (the sp seen ++ // by the sender) because of current frame local variables ++ sender_sp = (intptr_t*) addr_at(sender_sp_offset); ++ sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; ++ saved_fp = (intptr_t*) this->fp()[link_offset]; ++ ++ } else { ++ // must be some sort of compiled/runtime frame ++ // fp does not have to be safe (although it could be check for c1?) ++ ++ // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc ++ if (_cb->frame_size() <= 0) { ++ return false; ++ } ++ ++ sender_sp = _unextended_sp + _cb->frame_size(); ++ // Is sender_sp safe? ++ if (!thread->is_in_full_stack_checked((address)sender_sp)) { ++ return false; ++ } ++ sender_unextended_sp = sender_sp; ++ // On LA the return_address is always the word on the stack ++ sender_pc = (address) *(sender_sp - 1); ++ // Note: frame::sender_sp_offset is only valid for compiled frame ++ saved_fp = (intptr_t*) *(sender_sp - 2); ++ } ++ ++ ++ // If the potential sender is the interpreter then we can do some more checking ++ if (Interpreter::contains(sender_pc)) { ++ ++ // FP is always saved in a recognizable place in any code we generate. However ++ // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP ++ // is really a frame pointer. ++ ++ if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { ++ return false; ++ } ++ ++ // construct the potential sender ++ ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ return sender.is_interpreted_frame_valid(thread); ++ ++ } ++ ++ // We must always be able to find a recognizable pc ++ CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); ++ if (sender_pc == NULL || sender_blob == NULL) { ++ return false; ++ } ++ ++ // Could be a zombie method ++ if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { ++ return false; ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!sender_blob->code_contains(sender_pc)) { ++ return false; ++ } ++ ++ // We should never be able to see an adapter if the current frame is something from code cache ++ if (sender_blob->is_adapter_blob()) { ++ return false; ++ } ++ ++ // Could be the call_stub ++ if (StubRoutines::returns_to_call_stub(sender_pc)) { ++ if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { ++ return false; ++ } ++ ++ // construct the potential sender ++ ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ // Validate the JavaCallWrapper an entry frame must have ++ address jcw = (address)sender.entry_frame_call_wrapper(); ++ ++ return thread->is_in_stack_range_excl(jcw, (address)sender.fp()); ++ } ++ ++ CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); ++ if (nm != NULL) { ++ if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || ++ nm->method()->is_method_handle_intrinsic()) { ++ return false; ++ } ++ } ++ ++ // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size ++ // because the return address counts against the callee's frame. ++ ++ if (sender_blob->frame_size() <= 0) { ++ assert(!sender_blob->is_compiled(), "should count return address at least"); ++ return false; ++ } ++ ++ // We should never be able to see anything here except an nmethod. If something in the ++ // code cache (current frame) is called by an entity within the code cache that entity ++ // should not be anything but the call stub (already covered), the interpreter (already covered) ++ // or an nmethod. ++ ++ if (!sender_blob->is_compiled()) { ++ return false; ++ } ++ ++ // Could put some more validation for the potential non-interpreted sender ++ // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... ++ ++ // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb ++ ++ // We've validated the potential sender that would be created ++ return true; ++ } ++ ++ // Must be native-compiled frame. Since sender will try and use fp to find ++ // linkages it must be safe ++ ++ if (!fp_safe) { ++ return false; ++ } ++ ++ // Will the pc we fetch be non-zero (which we'll find at the oldest frame) ++ ++ if ( (address) this->fp()[return_addr_offset] == NULL) return false; ++ ++ ++ // could try and do some more potential verification of native frame if we could think of some... ++ ++ return true; ++ ++} ++ ++void frame::patch_pc(Thread* thread, address pc) { ++ assert(_cb == CodeCache::find_blob(pc), "unexpected pc"); ++ address* pc_addr = &(((address*) sp())[-1]); ++ if (TracePcPatching) { ++ tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", ++ p2i(pc_addr), p2i(*pc_addr), p2i(pc)); ++ } ++ // Either the return address is the original one or we are going to ++ // patch in the same address that's already there. ++ assert(_pc == *pc_addr || pc == *pc_addr, "must be"); ++ *pc_addr = pc; ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ assert(original_pc == _pc, "expected original PC to be stored before patching"); ++ _deopt_state = is_deoptimized; ++ // leave _pc as is ++ } else { ++ _deopt_state = not_deoptimized; ++ _pc = pc; ++ } ++} ++ ++bool frame::is_interpreted_frame() const { ++ return Interpreter::contains(pc()); ++} ++ ++int frame::frame_size(RegisterMap* map) const { ++ frame sender = this->sender(map); ++ return sender.sp() - sp(); ++} ++ ++intptr_t* frame::entry_frame_argument_at(int offset) const { ++ // convert offset to index to deal with tsi ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ // Entry frame's arguments are always in relation to unextended_sp() ++ return &unextended_sp()[index]; ++} ++ ++// sender_sp ++intptr_t* frame::interpreter_frame_sender_sp() const { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ return (intptr_t*) at(interpreter_frame_sender_sp_offset); ++} ++ ++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); ++} ++ ++ ++// monitor elements ++ ++BasicObjectLock* frame::interpreter_frame_monitor_begin() const { ++ return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); ++} ++ ++BasicObjectLock* frame::interpreter_frame_monitor_end() const { ++ BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); ++ // make sure the pointer points inside the frame ++ assert((intptr_t) fp() > (intptr_t) result, "result must < than frame pointer"); ++ assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer"); ++ return result; ++} ++ ++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { ++ *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; ++} ++ ++// Used by template based interpreter deoptimization ++void frame::interpreter_frame_set_last_sp(intptr_t* sp) { ++ *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp; ++} ++ ++frame frame::sender_for_entry_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); ++ assert(!entry_frame_is_first(), "next Java fp must be non zero"); ++ assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); ++ map->clear(); ++ assert(map->include_argument_oops(), "should be set by clear"); ++ if (jfa->last_Java_pc() != NULL ) { ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); ++ return fr; ++ } ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp()); ++ return fr; ++} ++ ++OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const { ++ ShouldNotCallThis(); ++ return nullptr; ++} ++ ++bool frame::optimized_entry_frame_is_first() const { ++ ShouldNotCallThis(); ++ return false; ++} ++ ++frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const { ++ ShouldNotCallThis(); ++ return {}; ++} ++ ++ ++//------------------------------------------------------------------------------ ++// frame::verify_deopt_original_pc ++// ++// Verifies the calculated original PC of a deoptimization PC for the ++// given unextended SP. The unextended SP might also be the saved SP ++// for MethodHandle call sites. ++#ifdef ASSERT ++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) { ++ frame fr; ++ ++ // This is ugly but it's better than to change {get,set}_original_pc ++ // to take an SP value as argument. And it's only a debugging ++ // method anyway. ++ fr._unextended_sp = unextended_sp; ++ ++ address original_pc = nm->get_original_pc(&fr); ++ assert(nm->insts_contains(original_pc), ++ "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); ++} ++#endif ++ ++//------------------------------------------------------------------------------ ++// frame::adjust_unextended_sp ++void frame::adjust_unextended_sp() { ++ // On LoongArch, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. ++ ++ if (_cb != NULL) { ++ CompiledMethod* sender_cm = _cb->as_compiled_method_or_null(); ++ if (sender_cm != NULL) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (sender_cm->is_deopt_entry(_pc) || ++ sender_cm->is_deopt_mh_entry(_pc)) { ++ DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp)); ++ } ++ } ++ } ++} ++ ++//------------------------------------------------------------------------------ ++// frame::update_map_with_saved_link ++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { ++ // The interpreter and compiler(s) always save fp in a known ++ // location on entry. We must record where that location is ++ // so that if fp was live on callout from c2 we can find ++ // the saved copy no matter what it called. ++ ++ // Since the interpreter always saves fp if we record where it is then ++ // we don't have to always save fp on entry and exit to c2 compiled ++ // code, on entry will be enough. ++ map->set_location(FP->as_VMReg(), (address) link_addr); ++ // this is weird "H" ought to be at a higher address however the ++ // oopMaps seems to have the "H" regs at the same address and the ++ // vanilla register. ++ // XXXX make this go away ++ if (true) { ++ map->set_location(FP->as_VMReg()->next(), (address) link_addr); ++ } ++} ++ ++ ++//------------------------------------------------------------------------------ ++// frame::sender_for_interpreter_frame ++frame frame::sender_for_interpreter_frame(RegisterMap* map) const { ++ // sp is the raw sp from the sender after adapter or interpreter extension ++ intptr_t* sender_sp = this->sender_sp(); ++ ++ // This is the sp before any possible extension (adapter/locals). ++ intptr_t* unextended_sp = interpreter_frame_sender_sp(); ++ ++ // The interpreter and compiler(s) always save FP in a known ++ // location on entry. We must record where that location is ++ // so this if FP was live on callout from c2 we can find ++ // the saved copy no matter what it called. ++ ++ // Since the interpreter always saves FP if we record where it is then ++ // we don't have to always save FP on entry and exit to c2 compiled ++ // code, on entry will be enough. ++#ifdef COMPILER2_OR_JVMCI ++ if (map->update_map()) { ++ update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); ++ } ++#endif // COMPILER2_OR_JVMCI ++ return frame(sender_sp, unextended_sp, link(), sender_pc()); ++} ++ ++ ++//------------------------------------------------------------------------------ ++// frame::sender_for_compiled_frame ++frame frame::sender_for_compiled_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ ++ // frame owned by optimizing compiler ++ assert(_cb->frame_size() >= 0, "must have non-zero frame size"); ++ ++ intptr_t* sender_sp = unextended_sp() + _cb->frame_size(); ++ intptr_t* unextended_sp = sender_sp; ++ ++ // On Loongson the return_address is always the word on the stack ++ // the fp in compiler points to sender fp, but in interpreter, fp points to return address, ++ // so getting sender for compiled frame is not same as interpreter frame. ++ // we hard code here temporarily ++ // spark ++ address sender_pc = (address) *(sender_sp - 1); ++ ++ intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - 2); ++ ++ if (map->update_map()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); ++ if (_cb->oop_maps() != NULL) { ++ OopMapSet::update_register_map(this, map); ++ } ++ ++ // Since the prolog does the save and restore of epb there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ update_map_with_saved_link(map, saved_fp_addr); ++ } ++ assert(sender_sp != sp(), "must have changed"); ++ return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc); ++} ++ ++//------------------------------------------------------------------------------ ++// frame::sender_raw ++frame frame::sender_raw(RegisterMap* map) const { ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ map->set_include_argument_oops(false); ++ ++ if (is_entry_frame()) ++ return sender_for_entry_frame(map); ++ if (is_interpreted_frame()) ++ return sender_for_interpreter_frame(map); ++ assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); ++ ++ // This test looks odd: why is it not is_compiled_frame() ? That's ++ // because stubs also have OOP maps. ++ if (_cb != NULL) { ++ return sender_for_compiled_frame(map); ++ } ++ ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return frame(sender_sp(), link(), sender_pc()); ++} ++ ++frame frame::sender(RegisterMap* map) const { ++ frame result = sender_raw(map); ++ ++ if (map->process_frames()) { ++ StackWatermarkSet::on_iteration(map->thread(), result); ++ } ++ ++ return result; ++} ++ ++bool frame::is_interpreted_frame_valid(JavaThread* thread) const { ++ assert(is_interpreted_frame(), "Not an interpreted frame"); ++ // These are reasonable sanity checks ++ if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (fp() + interpreter_frame_initial_sp_offset < sp()) { ++ return false; ++ } ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (fp() <= sp()) { // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ // do some validation of frame elements ++ ++ // first the method ++ ++ Method* m = safe_interpreter_frame_method(); ++ ++ // validate the method we'd find in this potential sender ++ if (!Method::is_valid_method(m)) return false; ++ ++ // stack frames shouldn't be much larger than max_stack elements ++ ++ //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) { ++ if (fp() - sp() > 4096) { // stack frames shouldn't be large. ++ return false; ++ } ++ ++ // validate bci/bcp ++ ++ address bcp = interpreter_frame_bcp(); ++ if (m->validate_bci_from_bcp(bcp) < 0) { ++ return false; ++ } ++ ++ // validate ConstantPoolCache* ++ ConstantPoolCache* cp = *interpreter_frame_cache_addr(); ++ if (MetaspaceObj::is_valid(cp) == false) return false; ++ ++ // validate locals ++ ++ address locals = (address) *interpreter_frame_locals_addr(); ++ return thread->is_in_stack_range_incl(locals, (address)fp()); ++} ++ ++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ Method* method = interpreter_frame_method(); ++ BasicType type = method->result_type(); ++ ++ intptr_t* tos_addr; ++ if (method->is_native()) { ++ // Prior to calling into the runtime to report the method_exit the possible ++ // return value is pushed to the native stack. If the result is a jfloat/jdouble ++ // then ST0 is saved. See the note in generate_native_result ++ tos_addr = (intptr_t*)sp(); ++ if (type == T_FLOAT || type == T_DOUBLE) { ++ tos_addr += 2; ++ } ++ } else { ++ tos_addr = (intptr_t*)interpreter_frame_tos_address(); ++ } ++ ++ switch (type) { ++ case T_OBJECT : ++ case T_ARRAY : { ++ oop obj; ++ if (method->is_native()) { ++ obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); ++ } else { ++ oop* obj_p = (oop*)tos_addr; ++ obj = (obj_p == NULL) ? (oop)NULL : *obj_p; ++ } ++ assert(Universe::is_in_heap_or_null(obj), "sanity check"); ++ *oop_result = obj; ++ break; ++ } ++ case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; ++ case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; ++ case T_CHAR : value_result->c = *(jchar*)tos_addr; break; ++ case T_SHORT : value_result->s = *(jshort*)tos_addr; break; ++ case T_INT : value_result->i = *(jint*)tos_addr; break; ++ case T_LONG : value_result->j = *(jlong*)tos_addr; break; ++ case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break; ++ case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; ++ case T_VOID : /* Nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ return type; ++} ++ ++ ++intptr_t* frame::interpreter_frame_tos_at(jint offset) const { ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ return &interpreter_frame_tos_address()[index]; ++} ++ ++#ifndef PRODUCT ++ ++#define DESCRIBE_FP_OFFSET(name) \ ++ values.describe(frame_no, fp() + frame::name##_offset, #name) ++ ++void frame::describe_pd(FrameValues& values, int frame_no) { ++ if (is_interpreted_frame()) { ++ DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_method); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mirror); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mdp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_cache); ++ DESCRIBE_FP_OFFSET(interpreter_frame_locals); ++ DESCRIBE_FP_OFFSET(interpreter_frame_bcp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); ++ } ++} ++#endif ++ ++intptr_t *frame::initial_deoptimization_info() { ++ // used to reset the saved FP ++ return fp(); ++} ++ ++intptr_t* frame::real_fp() const { ++ if (_cb != NULL) { ++ // use the frame size if valid ++ int size = _cb->frame_size(); ++ if (size > 0) { ++ return unextended_sp() + size; ++ } ++ } ++ // else rely on fp() ++ assert(! is_compiled_frame(), "unknown compiled frame size"); ++ return fp(); ++} ++ ++#ifndef PRODUCT ++// This is a generic constructor which is only used by pns() in debug.cpp. ++frame::frame(void* sp, void* fp, void* pc) { ++ init((intptr_t*)sp, (intptr_t*)fp, (address)pc); ++} ++ ++void frame::pd_ps() {} ++#endif +diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.hpp b/src/hotspot/cpu/loongarch/frame_loongarch.hpp +new file mode 100644 +index 00000000000..738b5306366 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/frame_loongarch.hpp +@@ -0,0 +1,162 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_FRAME_LOONGARCH_HPP ++#define CPU_LOONGARCH_FRAME_LOONGARCH_HPP ++ ++#include "runtime/synchronizer.hpp" ++ ++// A frame represents a physical stack frame (an activation). Frames can be ++// C or Java frames, and the Java frames can be interpreted or compiled. ++// In contrast, vframes represent source-level activations, so that one physical frame ++// can correspond to multiple source level frames because of inlining. ++// A frame is comprised of {pc, fp, sp} ++// ------------------------------ Asm interpreter ---------------------------------------- ++// Layout of asm interpreter frame: ++// Low ++// [expression stack ] * <- sp ++// [monitors ] \ ++// ... | monitor block size ++// [monitors ] / ++// [monitor block size ] ++// [byte code index/pointr] = bcx() bcx_offset ++// [pointer to locals ] = locals() locals_offset ++// [constant pool cache ] = cache() cache_offset ++// [methodData ] = mdp() mdx_offset ++// [Method ] = method() method_offset ++// [last sp ] = last_sp() last_sp_offset ++// [old stack pointer ] (sender_sp) sender_sp_offset ++// [old frame pointer ] = link() ++// [return pc ] ++// [oop temp ] <- fp (only for native calls) ++// [locals and parameters ] ++// High <- sender sp ++// ------------------------------ Asm interpreter ---------------------------------------- ++// ++// ------------------------------ Native (C frame) --------------------------------------- ++// Layout of C frame: ++// High ++// | ++// - <----- fp <- sender sp ++// fp -8 | [ra] = sender_pc() ++// fp-16 | [fp (sender)] = link() ++// | [...] ++// | ++// - <----- sp ++// | ++// v ++// Low ++// ------------------------------ Native (C frame) --------------------------------------- ++ ++ public: ++ enum { ++ pc_return_offset = 0, ++ ++ link_offset = -2, ++ return_addr_offset = -1, ++ sender_sp_offset = 0, ++ ++ // Interpreter frames ++ interpreter_frame_result_handler_offset = 1, // for native calls only ++ interpreter_frame_oop_temp_offset = 0, // for native calls only ++ ++ interpreter_frame_sender_sp_offset = -3, ++ // outgoing sp before a call to an invoked method ++ interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, ++ interpreter_frame_locals_offset = interpreter_frame_last_sp_offset - 1, ++ interpreter_frame_method_offset = interpreter_frame_locals_offset - 1, ++ interpreter_frame_mirror_offset = interpreter_frame_method_offset - 1, ++ interpreter_frame_mdp_offset = interpreter_frame_mirror_offset - 1, ++ interpreter_frame_cache_offset = interpreter_frame_mdp_offset - 1, ++ interpreter_frame_bcp_offset = interpreter_frame_cache_offset - 1, ++ interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1, ++ ++ interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, ++ interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, ++ ++ // Entry frames ++ entry_frame_call_wrapper_offset = -11, ++ }; ++ ++ intptr_t ptr_at(int offset) const { ++ return *ptr_at_addr(offset); ++ } ++ ++ void ptr_at_put(int offset, intptr_t value) { ++ *ptr_at_addr(offset) = value; ++ } ++ ++ private: ++ // an additional field beyond _sp and _pc: ++ intptr_t* _fp; // frame pointer ++ // The interpreter and adapters will extend the frame of the caller. ++ // Since oopMaps are based on the sp of the caller before extension ++ // we need to know that value. However in order to compute the address ++ // of the return address we need the real "raw" sp. Since sparc already ++ // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's ++ // original sp we use that convention. ++ ++ intptr_t* _unextended_sp; ++ void adjust_unextended_sp(); ++ ++ intptr_t* ptr_at_addr(int offset) const { ++ return (intptr_t*) addr_at(offset); ++ } ++#ifdef ASSERT ++ // Used in frame::sender_for_{interpreter,compiled}_frame ++ static void verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp); ++#endif ++ ++ public: ++ // Constructors ++ ++ frame(intptr_t* sp, intptr_t* fp, address pc); ++ ++ frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc); ++ ++ frame(intptr_t* sp, intptr_t* fp); ++ ++ void init(intptr_t* sp, intptr_t* fp, address pc); ++ ++ // accessors for the instance variables ++ intptr_t* fp() const { return _fp; } ++ ++ inline address* sender_pc_addr() const; ++ ++ // expression stack tos if we are nested in a java call ++ intptr_t* interpreter_frame_last_sp() const; ++ ++ // helper to update a map with callee-saved FP ++ static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); ++ ++ // deoptimization support ++ void interpreter_frame_set_last_sp(intptr_t* sp); ++ ++ static jint interpreter_frame_expression_stack_direction() { return -1; } ++ ++ // returns the sending frame, without applying any barriers ++ frame sender_raw(RegisterMap* map) const; ++ ++#endif // CPU_LOONGARCH_FRAME_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp +new file mode 100644 +index 00000000000..8b0e0502701 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp +@@ -0,0 +1,245 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP ++ ++#include "code/codeCache.hpp" ++#include "code/vmreg.inline.hpp" ++ ++// Inline functions for Loongson frames: ++ ++// Constructors: ++ ++inline frame::frame() { ++ _pc = NULL; ++ _sp = NULL; ++ _unextended_sp = NULL; ++ _fp = NULL; ++ _cb = NULL; ++ _deopt_state = unknown; ++} ++ ++inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) { ++ _sp = sp; ++ _unextended_sp = sp; ++ _fp = fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { ++ init(sp, fp, pc); ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) { ++ _sp = sp; ++ _unextended_sp = unextended_sp; ++ _fp = fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* fp) { ++ _sp = sp; ++ _unextended_sp = sp; ++ _fp = fp; ++ _pc = (address)(sp[-1]); ++ ++ // Here's a sticky one. This constructor can be called via AsyncGetCallTrace ++ // when last_Java_sp is non-null but the pc fetched is junk. If we are truly ++ // unlucky the junk value could be to a zombied method and we'll die on the ++ // find_blob call. This is also why we can have no asserts on the validity ++ // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler ++ // -> pd_last_frame should use a specialized version of pd_last_frame which could ++ // call a specilaized frame constructor instead of this one. ++ // Then we could use the assert below. However this assert is of somewhat dubious ++ // value. ++ // assert(_pc != NULL, "no pc?"); ++ ++ _cb = CodeCache::find_blob(_pc); ++ adjust_unextended_sp(); ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++// Accessors ++ ++inline bool frame::equal(frame other) const { ++ bool ret = sp() == other.sp() ++ && unextended_sp() == other.unextended_sp() ++ && fp() == other.fp() ++ && pc() == other.pc(); ++ assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); ++ return ret; ++} ++ ++// Return unique id for this frame. The id must have a value where we can distinguish ++// identity and younger/older relationship. NULL represents an invalid (incomparable) ++// frame. ++inline intptr_t* frame::id(void) const { return unextended_sp(); } ++ ++// Relationals on frames based ++// Return true if the frame is younger (more recent activation) than the frame represented by id ++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() < id ; } ++ ++// Return true if the frame is older (less recent activation) than the frame represented by id ++inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() > id ; } ++ ++ ++ ++inline intptr_t* frame::link() const { ++ return (intptr_t*) *(intptr_t **)addr_at(link_offset); ++} ++ ++inline intptr_t* frame::link_or_null() const { ++ intptr_t** ptr = (intptr_t **)addr_at(link_offset); ++ return os::is_readable_pointer(ptr) ? *ptr : NULL; ++} ++ ++inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } ++ ++// Return address: ++ ++inline address* frame::sender_pc_addr() const { ++ return (address*) addr_at(return_addr_offset); ++} ++ ++inline address frame::sender_pc() const { return *sender_pc_addr(); } ++ ++inline intptr_t* frame::sender_sp() const { ++ return addr_at(sender_sp_offset); ++} ++ ++inline intptr_t** frame::interpreter_frame_locals_addr() const { ++ return (intptr_t**)addr_at(interpreter_frame_locals_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_last_sp() const { ++ return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_bcp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_bcp_offset); ++} ++ ++ ++inline intptr_t* frame::interpreter_frame_mdp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_mdp_offset); ++} ++ ++ ++ ++// Constant pool cache ++ ++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { ++ return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); ++} ++ ++// Method ++ ++inline Method** frame::interpreter_frame_method_addr() const { ++ return (Method**)addr_at(interpreter_frame_method_offset); ++} ++ ++// Mirror ++ ++inline oop* frame::interpreter_frame_mirror_addr() const { ++ return (oop*)addr_at(interpreter_frame_mirror_offset); ++} ++ ++// top of expression stack ++inline intptr_t* frame::interpreter_frame_tos_address() const { ++ intptr_t* last_sp = interpreter_frame_last_sp(); ++ if (last_sp == NULL ) { ++ return sp(); ++ } else { ++ // sp() may have been extended by an adapter ++ assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos"); ++ return last_sp; ++ } ++} ++ ++inline oop* frame::interpreter_frame_temp_oop_addr() const { ++ return (oop *)(fp() + interpreter_frame_oop_temp_offset); ++} ++ ++inline int frame::interpreter_frame_monitor_size() { ++ return BasicObjectLock::size(); ++} ++ ++ ++// expression stack ++// (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++inline intptr_t* frame::interpreter_frame_expression_stack() const { ++ intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); ++ return monitor_end-1; ++} ++ ++// Entry frames ++ ++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { ++ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); ++} ++ ++// Compiled frames ++ ++inline oop frame::saved_oop_result(RegisterMap* map) const { ++ return *((oop*) map->location(V0->as_VMReg())); ++} ++ ++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { ++ *((oop*) map->location(V0->as_VMReg())) = obj; ++} ++ ++#endif // CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP +diff --git a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp +new file mode 100644 +index 00000000000..e129264506b +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp +@@ -0,0 +1,532 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/g1/g1BarrierSet.hpp" ++#include "gc/g1/g1BarrierSetAssembler.hpp" ++#include "gc/g1/g1BarrierSetRuntime.hpp" ++#include "gc/g1/g1CardTable.hpp" ++#include "gc/g1/g1ThreadLocalData.hpp" ++#include "gc/g1/heapRegion.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "utilities/macros.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "gc/g1/c1/g1BarrierSetC1.hpp" ++#endif ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++ ++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, RegSet saved_regs) { ++ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; ++ ++ if (!dest_uninitialized) { ++#ifndef OPT_THREAD ++ Register thread = T9; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ++ Label filtered; ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ ld_w(AT, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ ld_b(AT, in_progress); ++ } ++ ++ __ beqz(AT, filtered); ++ ++ __ push(saved_regs); ++ if (count == A0) { ++ if (addr == A1) { ++ __ move(AT, A0); ++ __ move(A0, A1); ++ __ move(A1, AT); ++ } else { ++ __ move(A1, count); ++ __ move(A0, addr); ++ } ++ } else { ++ __ move(A0, addr); ++ __ move(A1, count); ++ } ++ if (UseCompressedOops) { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); ++ } ++ __ pop(saved_regs); ++ ++ __ bind(filtered); ++ } ++} ++ ++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp, RegSet saved_regs) { ++ __ push(saved_regs); ++ if (count == A0) { ++ assert_different_registers(A1, addr); ++ __ move(A1, count); ++ __ move(A0, addr); ++ } else { ++ assert_different_registers(A0, count); ++ __ move(A0, addr); ++ __ move(A1, count); ++ } ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); ++ __ pop(saved_regs); ++} ++ ++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ bool on_oop = is_reference_type(type); ++ bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; ++ bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; ++ bool on_reference = on_weak || on_phantom; ++ ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ if (on_oop && on_reference) { ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // RA is live. It must be saved around calls. ++ __ enter(); // barrier may call runtime ++ // Generate the G1 pre-barrier code to log the value of ++ // the referent field in an SATB buffer. ++ g1_write_barrier_pre(masm /* masm */, ++ noreg /* obj */, ++ dst /* pre_val */, ++ thread /* thread */, ++ tmp1 /* tmp */, ++ true /* tosca_live */, ++ true /* expand_call */); ++ __ leave(); ++ } ++} ++ ++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ // If expand_call is true then we expand the call_VM_leaf macro ++ // directly to skip generating the check by ++ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. ++ ++ assert(thread == TREG, "must be"); ++ ++ Label done; ++ Label runtime; ++ ++ assert(pre_val != noreg, "check this code"); ++ ++ if (obj != noreg) { ++ assert_different_registers(obj, pre_val, tmp); ++ assert(pre_val != V0, "check this code"); ++ } ++ ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ ld_w(AT, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ ld_b(AT, in_progress); ++ } ++ __ beqz(AT, done); ++ ++ // Do we need to load the previous value? ++ if (obj != noreg) { ++ __ load_heap_oop(pre_val, Address(obj, 0), tmp); ++ } ++ ++ // Is the previous value null? ++ __ beqz(pre_val, done); ++ ++ // Can we store original value in the thread's buffer? ++ // Is index == 0? ++ // (The index field is typed as size_t.) ++ ++ __ ld_d(tmp, index); ++ __ beqz(tmp, runtime); ++ ++ __ addi_d(tmp, tmp, -1 * wordSize); ++ __ st_d(tmp, index); ++ __ ld_d(AT, buffer); ++ ++ // Record the previous value ++ __ stx_d(pre_val, tmp, AT); ++ __ b(done); ++ ++ __ bind(runtime); ++ // save the live input values ++ if (tosca_live) __ push(V0); ++ ++ if (obj != noreg && obj != V0) __ push(obj); ++ ++ if (pre_val != V0) __ push(pre_val); ++ ++ // Calling the runtime using the regular call_VM_leaf mechanism generates ++ // code (generated by InterpreterMacroAssember::call_VM_leaf_base) ++ // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. ++ // ++ // If we care generating the pre-barrier without a frame (e.g. in the ++ // intrinsified Reference.get() routine) then ebp might be pointing to ++ // the caller frame and so this check will most likely fail at runtime. ++ // ++ // Expanding the call directly bypasses the generation of the check. ++ // So when we do not have have a full interpreter frame on the stack ++ // expand_call should be passed true. ++ ++ if (expand_call) { ++ assert(pre_val != A1, "smashed arg"); ++ if (thread != A1) __ move(A1, thread); ++ if (pre_val != A0) __ move(A0, pre_val); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } ++ ++ // save the live input values ++ if (pre_val != V0) ++ __ pop(pre_val); ++ ++ if (obj != noreg && obj != V0) ++ __ pop(obj); ++ ++ if (tosca_live) __ pop(V0); ++ ++ __ bind(done); ++} ++ ++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2) { ++ assert_different_registers(tmp, tmp2, AT); ++ assert(thread == TREG, "must be"); ++ ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); ++ ++ CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set()); ++ assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ ++ Label done; ++ Label runtime; ++ ++ // Does store cross heap regions? ++ __ xorr(AT, store_addr, new_val); ++ __ srli_d(AT, AT, HeapRegion::LogOfHRGrainBytes); ++ __ beqz(AT, done); ++ ++ // crosses regions, storing NULL? ++ __ beqz(new_val, done); ++ ++ // storing region crossing non-NULL, is card already dirty? ++ const Register card_addr = tmp; ++ const Register cardtable = tmp2; ++ ++ __ move(card_addr, store_addr); ++ __ srli_d(card_addr, card_addr, CardTable::card_shift); ++ // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT ++ // a valid address and therefore is not properly handled by the relocation code. ++ __ li(cardtable, (intptr_t)ct->card_table()->byte_map_base()); ++ __ add_d(card_addr, card_addr, cardtable); ++ ++ __ ld_bu(AT, card_addr, 0); ++ __ addi_d(AT, AT, -1 * (int)G1CardTable::g1_young_card_val()); ++ __ beqz(AT, done); ++ ++ assert((int)CardTable::dirty_card_val() == 0, "must be 0"); ++ ++ __ membar(__ StoreLoad); ++ __ ld_bu(AT, card_addr, 0); ++ __ beqz(AT, done); ++ ++ // storing a region crossing, non-NULL oop, card is clean. ++ // dirty card and log. ++ __ st_b(R0, card_addr, 0); ++ ++ __ ld_d(AT, queue_index); ++ __ beqz(AT, runtime); ++ __ addi_d(AT, AT, -1 * wordSize); ++ __ st_d(AT, queue_index); ++ __ ld_d(tmp2, buffer); ++ __ ld_d(AT, queue_index); ++ __ stx_d(card_addr, tmp2, AT); ++ __ b(done); ++ ++ __ bind(runtime); ++ // save the live input values ++ __ push(store_addr); ++ __ push(new_val); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, TREG); ++ __ pop(new_val); ++ __ pop(store_addr); ++ ++ __ bind(done); ++} ++ ++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool as_normal = (decorators & AS_NORMAL) != 0; ++ assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported"); ++ ++ bool needs_pre_barrier = as_normal; ++ bool needs_post_barrier = val != noreg && in_heap; ++ ++ Register tmp3 = RT3; ++ Register rthread = TREG; ++ // flatten object address if needed ++ // We do it regardless of precise because we need the registers ++ if (dst.index() == noreg && dst.disp() == 0) { ++ if (dst.base() != tmp3) { ++ __ move(tmp3, dst.base()); ++ } ++ } else { ++ __ lea(tmp3, dst); ++ } ++ ++ if (needs_pre_barrier) { ++ g1_write_barrier_pre(masm /*masm*/, ++ tmp3 /* obj */, ++ tmp2 /* pre_val */, ++ rthread /* thread */, ++ tmp1 /* tmp */, ++ val != noreg /* tosca_live */, ++ false /* expand_call */); ++ } ++ if (val == noreg) { ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg); ++ } else { ++ Register new_val = val; ++ if (needs_post_barrier) { ++ // G1 barrier needs uncompressed oop for region cross check. ++ if (UseCompressedOops) { ++ new_val = tmp2; ++ __ move(new_val, val); ++ } ++ } ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg); ++ if (needs_post_barrier) { ++ g1_write_barrier_post(masm /*masm*/, ++ tmp3 /* store_adr */, ++ new_val /* new_val */, ++ rthread /* thread */, ++ tmp1 /* tmp */, ++ tmp2 /* tmp2 */); ++ } ++ } ++} ++ ++#ifdef COMPILER1 ++ ++#undef __ ++#define __ ce->masm()-> ++ ++void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { ++ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ // At this point we know that marking is in progress. ++ // If do_load() is true then we have to emit the ++ // load of the previous value; otherwise it has already ++ // been loaded into _pre_val. ++ ++ __ bind(*stub->entry()); ++ ++ assert(stub->pre_val()->is_register(), "Precondition."); ++ ++ Register pre_val_reg = stub->pre_val()->as_register(); ++ ++ if (stub->do_load()) { ++ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); ++ } ++ __ beqz(pre_val_reg, *stub->continuation()); ++ ce->store_parameter(stub->pre_val()->as_register(), 0); ++ __ call(bs->pre_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type); ++ __ b(*stub->continuation()); ++} ++ ++void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) { ++ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ __ bind(*stub->entry()); ++ assert(stub->addr()->is_register(), "Precondition."); ++ assert(stub->new_val()->is_register(), "Precondition."); ++ Register new_val_reg = stub->new_val()->as_register(); ++ __ beqz(new_val_reg, *stub->continuation()); ++ ce->store_parameter(stub->addr()->as_pointer_register(), 0); ++ __ call(bs->post_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type); ++ __ b(*stub->continuation()); ++} ++ ++#undef __ ++ ++#define __ sasm-> ++ ++void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { ++ __ prologue("g1_pre_barrier", false); ++ ++ // arg0 : previous value of memory ++ ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ ++ const Register pre_val = A0; ++ const Register thread = TREG; ++ const Register tmp = SCR2; ++ ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ Label done; ++ Label runtime; ++ ++ // Is marking still active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ ld_w(tmp, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ ld_b(tmp, in_progress); ++ } ++ __ beqz(tmp, done); ++ ++ // Can we store original value in the thread's buffer? ++ __ ld_ptr(tmp, queue_index); ++ __ beqz(tmp, runtime); ++ ++ __ addi_d(tmp, tmp, -wordSize); ++ __ st_ptr(tmp, queue_index); ++ __ ld_ptr(SCR1, buffer); ++ __ add_d(tmp, tmp, SCR1); ++ __ load_parameter(0, SCR1); ++ __ st_ptr(SCR1, Address(tmp, 0)); ++ __ b(done); ++ ++ __ bind(runtime); ++ __ push_call_clobbered_registers(); ++ __ load_parameter(0, pre_val); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ __ pop_call_clobbered_registers(); ++ __ bind(done); ++ ++ __ epilogue(); ++} ++ ++void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) { ++ __ prologue("g1_post_barrier", false); ++ ++ // arg0: store_address, not use? ++ Address store_addr(FP, 2 * BytesPerWord); ++ ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ ++ Label done; ++ Label runtime; ++ ++ // At this point we know new_value is non-NULL and the new_value crosses regions. ++ // Must check to see if card is already dirty ++ ++ const Register thread = TREG; ++ ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); ++ ++ const Register card_offset = SCR2; ++ // RA is free here, so we can use it to hold the byte_map_base. ++ const Register byte_map_base = RA; ++ ++ assert_different_registers(card_offset, byte_map_base, SCR1); ++ ++ __ load_parameter(0, card_offset); ++ __ srli_d(card_offset, card_offset, CardTable::card_shift); ++ __ load_byte_map_base(byte_map_base); ++ __ ldx_bu(SCR1, byte_map_base, card_offset); ++ __ addi_d(SCR1, SCR1, -(int)G1CardTable::g1_young_card_val()); ++ __ beqz(SCR1, done); ++ ++ assert((int)CardTable::dirty_card_val() == 0, "must be 0"); ++ ++ __ membar(Assembler::StoreLoad); ++ __ ldx_bu(SCR1, byte_map_base, card_offset); ++ __ beqz(SCR1, done); ++ ++ // storing region crossing non-NULL, card is clean. ++ // dirty card and log. ++ __ stx_b(R0, byte_map_base, card_offset); ++ ++ // Convert card offset into an address in card_addr ++ Register card_addr = card_offset; ++ __ add_d(card_addr, byte_map_base, card_addr); ++ ++ __ ld_ptr(SCR1, queue_index); ++ __ beqz(SCR1, runtime); ++ __ addi_d(SCR1, SCR1, -wordSize); ++ __ st_ptr(SCR1, queue_index); ++ ++ // Reuse RA to hold buffer_addr ++ const Register buffer_addr = RA; ++ ++ __ ld_ptr(buffer_addr, buffer); ++ __ stx_d(card_addr, buffer_addr, SCR1); ++ __ b(done); ++ ++ __ bind(runtime); ++ __ push_call_clobbered_registers(); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); ++ __ pop_call_clobbered_registers(); ++ __ bind(done); ++ __ epilogue(); ++} ++ ++#undef __ ++ ++#endif // COMPILER1 +diff --git a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp +new file mode 100644 +index 00000000000..745046ac0cc +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.hpp +@@ -0,0 +1,71 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++class LIR_Assembler; ++class StubAssembler; ++class G1PreBarrierStub; ++class G1PostBarrierStub; ++ ++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { ++ protected: ++ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, RegSet saved_regs); ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp, RegSet saved_regs); ++ ++ void g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); ++ ++ void g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2); ++ ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++ ++ public: ++ void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); ++ void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); ++ ++ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); ++ void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); ++ ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++}; ++ ++#endif // CPU_LOONGARCH_GC_G1_G1BARRIERSETASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/gc/g1/g1Globals_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/g1/g1Globals_loongarch.hpp +new file mode 100644 +index 00000000000..44b7ff1485f +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/g1/g1Globals_loongarch.hpp +@@ -0,0 +1,30 @@ ++/* ++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#ifndef CPU_LOONGARCH_GC_G1_G1GLOBALS_LOONGARCH_HPP ++#define CPU_LOONGARCH_GC_G1_G1GLOBALS_LOONGARCH_HPP ++ ++const size_t G1MergeHeapRootsPrefetchCacheSize = 8; ++ ++#endif // CPU_LOONGARCH_GC_G1_G1GLOBALS_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp +new file mode 100644 +index 00000000000..4706559a837 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.cpp +@@ -0,0 +1,320 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "classfile/classLoaderData.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/barrierSetNMethod.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/jniHandles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.hpp" ++ ++#define __ masm-> ++ ++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ // RA is live. It must be saved around calls. ++ ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ bool is_not_null = (decorators & IS_NOT_NULL) != 0; ++ ++ switch (type) { ++ case T_OBJECT: ++ case T_ARRAY: { ++ if (in_heap) { ++ if (UseCompressedOops) { ++ __ ld_wu(dst, src); ++ if (is_not_null) { ++ __ decode_heap_oop_not_null(dst); ++ } else { ++ __ decode_heap_oop(dst); ++ } ++ } else ++ { ++ __ ld_ptr(dst, src); ++ } ++ } else { ++ assert(in_native, "why else?"); ++ __ ld_ptr(dst, src); ++ } ++ break; ++ } ++ case T_BOOLEAN: __ ld_bu (dst, src); break; ++ case T_BYTE: __ ld_b (dst, src); break; ++ case T_CHAR: __ ld_hu (dst, src); break; ++ case T_SHORT: __ ld_h (dst, src); break; ++ case T_INT: __ ld_w (dst, src); break; ++ case T_LONG: __ ld_d (dst, src); break; ++ case T_ADDRESS: __ ld_ptr(dst, src); break; ++ case T_FLOAT: ++ assert(dst == noreg, "only to ftos"); ++ __ fld_s(FSF, src); ++ break; ++ case T_DOUBLE: ++ assert(dst == noreg, "only to dtos"); ++ __ fld_d(FSF, src); ++ break; ++ default: Unimplemented(); ++ } ++} ++ ++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ bool is_not_null = (decorators & IS_NOT_NULL) != 0; ++ ++ switch (type) { ++ case T_OBJECT: ++ case T_ARRAY: { ++ if (in_heap) { ++ if (val == noreg) { ++ assert(!is_not_null, "inconsistent access"); ++ if (UseCompressedOops) { ++ __ st_w(R0, dst); ++ } else { ++ __ st_d(R0, dst); ++ } ++ } else { ++ if (UseCompressedOops) { ++ assert(!dst.uses(val), "not enough registers"); ++ if (is_not_null) { ++ __ encode_heap_oop_not_null(val); ++ } else { ++ __ encode_heap_oop(val); ++ } ++ __ st_w(val, dst); ++ } else ++ { ++ __ st_ptr(val, dst); ++ } ++ } ++ } else { ++ assert(in_native, "why else?"); ++ assert(val != noreg, "not supported"); ++ __ st_ptr(val, dst); ++ } ++ break; ++ } ++ case T_BOOLEAN: ++ __ andi(val, val, 0x1); // boolean is true if LSB is 1 ++ __ st_b(val, dst); ++ break; ++ case T_BYTE: ++ __ st_b(val, dst); ++ break; ++ case T_SHORT: ++ __ st_h(val, dst); ++ break; ++ case T_CHAR: ++ __ st_h(val, dst); ++ break; ++ case T_INT: ++ __ st_w(val, dst); ++ break; ++ case T_LONG: ++ __ st_d(val, dst); ++ break; ++ case T_FLOAT: ++ assert(val == noreg, "only tos"); ++ __ fst_s(FSF, dst); ++ break; ++ case T_DOUBLE: ++ assert(val == noreg, "only tos"); ++ __ fst_d(FSF, dst); ++ break; ++ case T_ADDRESS: ++ __ st_ptr(val, dst); ++ break; ++ default: Unimplemented(); ++ } ++} ++ ++void BarrierSetAssembler::obj_equals(MacroAssembler* masm, ++ Register obj1, Address obj2) { ++ Unimplemented(); ++} ++ ++void BarrierSetAssembler::obj_equals(MacroAssembler* masm, ++ Register obj1, Register obj2) { ++ Unimplemented(); ++} ++ ++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath) { ++ __ clear_jweak_tag(obj); ++ __ ld_ptr(obj, Address(obj, 0)); ++} ++ ++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. ++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Register t2, ++ Label& slow_case) { ++ assert_different_registers(obj, t2); ++ assert_different_registers(obj, var_size_in_bytes); ++ Register end = t2; ++ ++ // verify_tlab(); ++ ++ __ ld_ptr(obj, Address(TREG, JavaThread::tlab_top_offset())); ++ if (var_size_in_bytes == noreg) { ++ __ lea(end, Address(obj, con_size_in_bytes)); ++ } else { ++ __ lea(end, Address(obj, var_size_in_bytes, Address::no_scale, 0)); ++ } ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::tlab_end_offset())); ++ __ blt_far(SCR1, end, slow_case, false); ++ ++ // update the tlab top pointer ++ __ st_ptr(end, Address(TREG, JavaThread::tlab_top_offset())); ++ ++ // recover var_size_in_bytes if necessary ++ if (var_size_in_bytes == end) { ++ __ sub_d(var_size_in_bytes, var_size_in_bytes, obj); ++ } ++ // verify_tlab(); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Label& slow_case) { ++ assert_different_registers(obj, var_size_in_bytes, t1); ++ if (!Universe::heap()->supports_inline_contig_alloc()) { ++ __ b_far(slow_case); ++ } else { ++ Register end = t1; ++ Register heap_end = SCR2; ++ Label retry; ++ __ bind(retry); ++ ++ __ li(SCR1, (address)Universe::heap()->end_addr()); ++ __ ld_d(heap_end, SCR1, 0); ++ ++ // Get the current top of the heap ++ __ li(SCR1, (address) Universe::heap()->top_addr()); ++ __ ll_d(obj, SCR1, 0); ++ ++ // Adjust it my the size of our new object ++ if (var_size_in_bytes == noreg) ++ __ addi_d(end, obj, con_size_in_bytes); ++ else ++ __ add_d(end, obj, var_size_in_bytes); ++ ++ // if end < obj then we wrapped around high memory ++ __ blt_far(end, obj, slow_case, false); ++ __ blt_far(heap_end, end, slow_case, false); ++ ++ // If heap top hasn't been changed by some other thread, update it. ++ __ sc_d(end, SCR1, 0); ++ __ beqz(end, retry); ++ ++ incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, t1); ++ } ++} ++ ++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1) { ++ assert(t1->is_valid(), "need temp reg"); ++ ++ __ ld_ptr(t1, Address(TREG, in_bytes(JavaThread::allocated_bytes_offset()))); ++ if (var_size_in_bytes->is_valid()) ++ __ add_d(t1, t1, var_size_in_bytes); ++ else ++ __ addi_d(t1, t1, con_size_in_bytes); ++ __ st_ptr(t1, Address(TREG, in_bytes(JavaThread::allocated_bytes_offset()))); ++} ++ ++void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { ++ BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); ++ ++ if (bs_nm == NULL) { ++ return; ++ } ++ ++ Label skip, guard; ++ Address thread_disarmed_addr(TREG, in_bytes(bs_nm->thread_disarmed_offset())); ++ ++ __ lipc(SCR1, guard); ++ __ ld_w(SCR1, SCR1, 0); ++ ++ // Subsequent loads of oops must occur after load of guard value. ++ // BarrierSetNMethod::disarm sets guard with release semantics. ++ __ membar(__ LoadLoad); ++ __ ld_w(SCR2, thread_disarmed_addr); ++ __ beq(SCR1, SCR2, skip); ++ ++ __ call_long(StubRoutines::la::method_entry_barrier()); ++ __ b(skip); ++ ++ __ bind(guard); ++ __ emit_int32(0); // nmethod guard value. Skipped over in common case. ++ ++ __ bind(skip); ++} ++ ++void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { ++ BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod(); ++ if (bs == NULL) { ++ return; ++ } ++ ++ Label bad_call; ++ __ beqz(Rmethod, bad_call); ++ ++ // Pointer chase to the method holder to find out if the method is concurrently unloading. ++ Label method_live; ++ __ load_method_holder_cld(SCR2, Rmethod); ++ ++ // Is it a strong CLD? ++ __ ld_w(SCR1, Address(SCR2, ClassLoaderData::keep_alive_offset())); ++ __ bnez(SCR1, method_live); ++ ++ // Is it a weak but alive CLD? ++ __ push2(RT2, RT8); ++ __ ld_ptr(RT8, Address(SCR2, ClassLoaderData::holder_offset())); ++ __ resolve_weak_handle(RT8, RT2); // Assembler occupies SCR1. ++ __ move(SCR1, RT8); ++ __ pop2(RT2, RT8); ++ __ bnez(SCR1, method_live); ++ ++ __ bind(bad_call); ++ ++ __ jmp(SharedRuntime::get_handle_wrong_method_stub(), relocInfo::runtime_call_type); ++ __ bind(method_live); ++} ++ +diff --git a/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp +new file mode 100644 +index 00000000000..f87c2061132 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/shared/barrierSetAssembler_loongarch.hpp +@@ -0,0 +1,93 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetNMethod.hpp" ++#include "memory/allocation.hpp" ++#include "oops/access.hpp" ++ ++class InterpreterMacroAssembler; ++ ++class BarrierSetAssembler: public CHeapObj { ++private: ++ void incr_allocated_bytes(MacroAssembler* masm, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1); ++ ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs) {} ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch, RegSet saved_regs) {} ++ ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++ ++ ++ virtual void obj_equals(MacroAssembler* masm, ++ Register obj1, Register obj2); ++ virtual void obj_equals(MacroAssembler* masm, ++ Register obj1, Address obj2); ++ ++ virtual void resolve(MacroAssembler* masm, DecoratorSet decorators, Register obj) { ++ // Default implementation does not need to do anything. ++ } ++ ++ // Support for jniFastGetField to try resolving a jobject/jweak in native ++ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath); ++ ++ virtual void tlab_allocate(MacroAssembler* masm, ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ ++ void eden_allocate(MacroAssembler* masm, ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ ++ virtual void barrier_stubs_init() {} ++ ++ virtual void nmethod_entry_barrier(MacroAssembler* masm); ++ virtual void c2i_entry_barrier(MacroAssembler* masm); ++ ++}; ++ ++#endif // CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/gc/shared/barrierSetNMethod_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/barrierSetNMethod_loongarch.cpp +new file mode 100644 +index 00000000000..65cee60f69d +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/shared/barrierSetNMethod_loongarch.cpp +@@ -0,0 +1,157 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2019, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "code/codeCache.hpp" ++#include "code/nativeInst.hpp" ++#include "gc/shared/barrierSetNMethod.hpp" ++#include "logging/log.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/registerMap.hpp" ++#include "runtime/thread.hpp" ++#include "utilities/align.hpp" ++#include "utilities/debug.hpp" ++ ++class NativeNMethodBarrier: public NativeInstruction { ++ address instruction_address() const { return addr_at(0); } ++ ++ int *guard_addr() { ++ return reinterpret_cast(instruction_address() + 9 * 4); ++ } ++ ++public: ++ int get_value() { ++ return Atomic::load_acquire(guard_addr()); ++ } ++ ++ void set_value(int value) { ++ Atomic::release_store(guard_addr(), value); ++ } ++ ++ void verify() const; ++}; ++ ++// Store the instruction bitmask, bits and name for checking the barrier. ++struct CheckInsn { ++ uint32_t mask; ++ uint32_t bits; ++ const char *name; ++}; ++ ++static const struct CheckInsn barrierInsn[] = { ++ { 0xfe000000, 0x18000000, "pcaddi"}, ++ { 0xffc00000, 0x28800000, "ld.w"}, ++ { 0xffff8000, 0x38720000, "dbar"}, ++ { 0xffc00000, 0x28800000, "ld.w"}, ++ { 0xfc000000, 0x58000000, "beq"}, ++ { 0xfe000000, 0x14000000, "lu12i.w"}, ++ { 0xfe000000, 0x16000000, "lu32i.d"}, ++ { 0xfc000000, 0x4c000000, "jirl"}, ++ { 0xfc000000, 0x50000000, "b"} ++}; ++ ++// The encodings must match the instructions emitted by ++// BarrierSetAssembler::nmethod_entry_barrier. The matching ignores the specific ++// register numbers and immediate values in the encoding. ++void NativeNMethodBarrier::verify() const { ++ intptr_t addr = (intptr_t) instruction_address(); ++ for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) { ++ uint32_t inst = *((uint32_t*) addr); ++ if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) { ++ tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", addr, inst); ++ fatal("not an %s instruction.", barrierInsn[i].name); ++ } ++ addr +=4; ++ } ++} ++ ++void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { ++ ++ typedef struct { ++ intptr_t *sp; intptr_t *fp; address ra; address pc; ++ } frame_pointers_t; ++ ++ frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5); ++ ++ JavaThread *thread = JavaThread::current(); ++ RegisterMap reg_map(thread, false); ++ frame frame = thread->last_frame(); ++ ++ assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be"); ++ assert(frame.cb() == nm, "must be"); ++ frame = frame.sender(®_map); ++ ++ LogTarget(Trace, nmethod, barrier) out; ++ if (out.is_enabled()) { ++ ResourceMark mark; ++ log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p", ++ nm->method()->name_and_sig_as_C_string(), ++ nm, *(address *) return_address_ptr, nm->is_osr_method(), thread, ++ thread->get_thread_name(), frame.sp(), nm->verified_entry_point()); ++ } ++ ++ new_frame->sp = frame.sp(); ++ new_frame->fp = frame.fp(); ++ new_frame->ra = frame.pc(); ++ new_frame->pc = SharedRuntime::get_handle_wrong_method_stub(); ++} ++ ++// This is the offset of the entry barrier from where the frame is completed. ++// If any code changes between the end of the verified entry where the entry ++// barrier resides, and the completion of the frame, then ++// NativeNMethodCmpBarrier::verify() will immediately complain when it does ++// not find the expected native instruction at this offset, which needs updating. ++// Note that this offset is invariant of PreserveFramePointer. ++ ++static const int entry_barrier_offset = -4 * 10; ++ ++static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) { ++ address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset; ++ NativeNMethodBarrier* barrier = reinterpret_cast(barrier_address); ++ debug_only(barrier->verify()); ++ return barrier; ++} ++ ++void BarrierSetNMethod::disarm(nmethod* nm) { ++ if (!supports_entry_barrier(nm)) { ++ return; ++ } ++ ++ // Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier. ++ // Symmetric "LD.W; DBAR" is in the nmethod barrier. ++ NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); ++ ++ barrier->set_value(disarmed_value()); ++} ++ ++bool BarrierSetNMethod::is_armed(nmethod* nm) { ++ if (!supports_entry_barrier(nm)) { ++ return false; ++ } ++ ++ NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); ++ return barrier->get_value() != disarmed_value(); ++} +diff --git a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp +new file mode 100644 +index 00000000000..96dc9562a02 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp +@@ -0,0 +1,119 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/cardTableBarrierSetAssembler.hpp" ++ ++#define __ masm-> ++ ++#define T4 RT4 ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) ++ ++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp, ++ RegSet saved_regs) { ++ BarrierSet *bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ intptr_t disp = (intptr_t) ct->byte_map_base(); ++ ++ Label L_loop, L_done; ++ const Register end = count; ++ assert_different_registers(addr, end); ++ ++ __ beq(count, R0, L_done); // zero count - nothing to do ++ ++ __ li(tmp, disp); ++ ++ __ lea(end, Address(addr, count, TIMES_OOP, 0)); // end == addr+count*oop_size ++ __ addi_d(end, end, -BytesPerHeapOop); // end - 1 to make inclusive ++ __ shr(addr, CardTable::card_shift); ++ __ shr(end, CardTable::card_shift); ++ __ sub_d(end, end, addr); // end --> cards count ++ ++ __ add_d(addr, addr, tmp); ++ ++ __ BIND(L_loop); ++ __ stx_b(R0, addr, count); ++ __ addi_d(count, count, -1); ++ __ bge(count, R0, L_loop); ++ ++ __ BIND(L_done); ++} ++ ++// Does a store check for the oop in register obj. ++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) { ++ assert_different_registers(obj, tmp, SCR1); ++ ++ __ shr(obj, CardTable::card_shift); ++ ++ __ load_byte_map_base(tmp); ++ ++ assert(CardTable::dirty_card_val() == 0, "must be"); ++ ++ if (UseCondCardMark) { ++ Label L_already_dirty; ++ __ ldx_b(SCR1, obj, tmp); ++ __ beqz(SCR1, L_already_dirty); ++ __ stx_b(R0, obj, tmp); ++ __ bind(L_already_dirty); ++ } else { ++ __ stx_b(R0, obj, tmp); ++ } ++} ++ ++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool is_array = (decorators & IS_ARRAY) != 0; ++ bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; ++ bool precise = is_array || on_anonymous; ++ ++ bool needs_post_barrier = val != noreg && in_heap; ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg); ++ if (needs_post_barrier) { ++ // flatten object address if needed ++ if (!precise || (dst.index() == noreg && dst.disp() == 0)) { ++ store_check(masm, dst.base(), tmp1); ++ } else { ++ __ lea(tmp1, dst); ++ store_check(masm, tmp1, tmp2); ++ } ++ } ++} +diff --git a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp +new file mode 100644 +index 00000000000..7d628ca6a85 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.hpp +@@ -0,0 +1,44 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { ++protected: ++ void store_check(MacroAssembler* masm, Register obj, Register tmp); ++ ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp, ++ RegSet saved_regs); ++ ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++}; ++ ++#endif // CPU_LOONGARCH_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp +new file mode 100644 +index 00000000000..71c3cb49386 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.cpp +@@ -0,0 +1,53 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++#define __ masm-> ++ ++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs) { ++ if (is_oop) { ++ gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs); ++ } ++} ++ ++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch, RegSet saved_regs) { ++ if (is_oop) { ++ gen_write_ref_array_post_barrier(masm, decorators, dst, count, scratch, saved_regs); ++ } ++} ++ ++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ if (type == T_OBJECT || type == T_ARRAY) { ++ oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ } else { ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ } ++} +diff --git a/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp +new file mode 100644 +index 00000000000..086bdebb506 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/shared/modRefBarrierSetAssembler_loongarch.hpp +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++ ++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other ++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected ++// accesses, which are overridden in the concrete BarrierSetAssembler. ++ ++class ModRefBarrierSetAssembler: public BarrierSetAssembler { ++protected: ++ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, RegSet saved_regs) {} ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp, RegSet saved_regs) {} ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) = 0; ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs); ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch, RegSet saved_regs); ++ ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++}; ++ ++#endif // CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/gc/shenandoah/c1/shenandoahBarrierSetC1_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shenandoah/c1/shenandoahBarrierSetC1_loongarch.cpp +new file mode 100644 +index 00000000000..f82a2500d41 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/shenandoah/c1/shenandoahBarrierSetC1_loongarch.cpp +@@ -0,0 +1,130 @@ ++/* ++ * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "gc/shared/gc_globals.hpp" ++#include "gc/shenandoah/shenandoahBarrierSet.hpp" ++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" ++#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" ++ ++#define __ masm->masm()-> ++ ++void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) { ++ Register addr = _addr->as_register_lo(); ++ Register newval = _new_value->as_register(); ++ Register cmpval = _cmp_value->as_register(); ++ Register result = result_opr()->as_register(); ++ ++ ShenandoahBarrierSet::assembler()->iu_barrier(masm->masm(), newval, SCR2); ++ ++ if (UseCompressedOops) { ++ Register tmp1 = _tmp1->as_register(); ++ Register tmp2 = _tmp2->as_register(); ++ ++ __ encode_heap_oop(tmp1, cmpval); ++ cmpval = tmp1; ++ __ encode_heap_oop(tmp2, newval); ++ newval = tmp2; ++ } ++ ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /*acquire*/ true, /*is_cae*/ false, result); ++ ++ if (CompilerConfig::is_c1_only_no_jvmci()) { ++ // The membar here is necessary to prevent reordering between the ++ // release store in the CAS above and a subsequent volatile load. ++ // However for tiered compilation C1 inserts a full barrier before ++ // volatile loads which means we don't need an additional barrier ++ // here (see LIRGenerator::volatile_field_load()). ++ __ membar(__ AnyAny); ++ } ++} ++ ++#undef __ ++ ++#ifdef ASSERT ++#define __ gen->lir(__FILE__, __LINE__)-> ++#else ++#define __ gen->lir()-> ++#endif ++ ++LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) { ++ if (access.is_oop()) { ++ LIRGenerator *gen = access.gen(); ++ if (ShenandoahSATBBarrier) { ++ pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(), ++ LIR_OprFact::illegalOpr /* pre_val */); ++ } ++ if (ShenandoahCASBarrier) { ++ cmp_value.load_item(); ++ new_value.load_item(); ++ ++ LIR_Opr t1 = LIR_OprFact::illegalOpr; ++ LIR_Opr t2 = LIR_OprFact::illegalOpr; ++ LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base(); ++ LIR_Opr result = gen->new_register(T_INT); ++ ++ if (UseCompressedOops) { ++ t1 = gen->new_register(T_OBJECT); ++ t2 = gen->new_register(T_OBJECT); ++ } ++ ++ __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), t1, t2, result)); ++ return result; ++ } ++ } ++ return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value); ++} ++ ++LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) { ++ LIRGenerator* gen = access.gen(); ++ BasicType type = access.type(); ++ ++ LIR_Opr result = gen->new_register(type); ++ value.load_item(); ++ LIR_Opr value_opr = value.result(); ++ ++ if (access.is_oop()) { ++ value_opr = iu_barrier(access.gen(), value_opr, access.access_emit_info(), access.decorators()); ++ } ++ ++ assert(type == T_INT || is_reference_type(type) || type == T_LONG, "unexpected type"); ++ LIR_Opr tmp = gen->new_register(T_INT); ++ __ xchg(access.resolved_addr(), value_opr, result, tmp); ++ ++ if (access.is_oop()) { ++ result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators()); ++ LIR_Opr tmp = gen->new_register(type); ++ __ move(result, tmp); ++ result = tmp; ++ if (ShenandoahSATBBarrier) { ++ pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr, ++ result /* pre_val */); ++ } ++ } ++ ++ return result; ++} +diff --git a/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoahBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoahBarrierSetAssembler_loongarch.cpp +new file mode 100644 +index 00000000000..7cf552e283a +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoahBarrierSetAssembler_loongarch.cpp +@@ -0,0 +1,784 @@ ++/* ++ * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc/shenandoah/shenandoahBarrierSet.hpp" ++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" ++#include "gc/shenandoah/shenandoahForwarding.hpp" ++#include "gc/shenandoah/shenandoahHeap.inline.hpp" ++#include "gc/shenandoah/shenandoahHeapRegion.hpp" ++#include "gc/shenandoah/shenandoahRuntime.hpp" ++#include "gc/shenandoah/shenandoahThreadLocalData.hpp" ++#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/thread.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" ++#endif ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A3 RA3 ++ ++void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs) { ++ if (is_oop) { ++ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; ++ if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) { ++ Label done; ++ ++ // Avoid calling runtime if count == 0 ++ __ beqz(count, done); ++ ++ // Is GC active? ++ Address gc_state(TREG, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); ++ __ ld_b(SCR1, gc_state); ++ if (ShenandoahSATBBarrier && dest_uninitialized) { ++ __ andi(SCR1, SCR1, ShenandoahHeap::HAS_FORWARDED); ++ __ beqz(SCR1, done); ++ } else { ++ __ andi(SCR1, SCR1, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING); ++ __ beqz(SCR1, done); ++ } ++ ++ __ push(saved_regs); ++ if (UseCompressedOops) { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry), src, dst, count); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count); ++ } ++ __ pop(saved_regs); ++ __ bind(done); ++ } ++ } ++} ++ ++void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ if (ShenandoahSATBBarrier) { ++ satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); ++ } ++} ++ ++void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ // If expand_call is true then we expand the call_VM_leaf macro ++ // directly to skip generating the check by ++ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. ++ ++ assert(thread == TREG, "must be"); ++ ++ Label done; ++ Label runtime; ++ ++ assert_different_registers(obj, pre_val, tmp, SCR1); ++ assert(pre_val != noreg && tmp != noreg, "expecting a register"); ++ ++ Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); ++ Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ ld_w(tmp, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ ld_b(tmp, in_progress); ++ } ++ __ beqz(tmp, done); ++ ++ // Do we need to load the previous value? ++ if (obj != noreg) { ++ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); ++ } ++ ++ // Is the previous value null? ++ __ beqz(pre_val, done); ++ ++ // Can we store original value in the thread's buffer? ++ // Is index == 0? ++ // (The index field is typed as size_t.) ++ ++ __ ld_d(tmp, index); // tmp := *index_adr ++ __ beqz(tmp, runtime); // tmp == 0? ++ // If yes, goto runtime ++ ++ __ addi_d(tmp, tmp, -wordSize); // tmp := tmp - wordSize ++ __ st_d(tmp, index); // *index_adr := tmp ++ __ ld_d(SCR1, buffer); ++ __ add_d(tmp, tmp, SCR1); // tmp := tmp + *buffer_adr ++ ++ // Record the previous value ++ __ st_d(pre_val, Address(tmp, 0)); ++ __ b(done); ++ ++ __ bind(runtime); ++ // save the live input values ++ RegSet saved = RegSet::of(pre_val); ++ if (tosca_live) saved += RegSet::of(V0); ++ if (obj != noreg) saved += RegSet::of(obj); ++ ++ __ push(saved); ++ ++ // Calling the runtime using the regular call_VM_leaf mechanism generates ++ // code (generated by InterpreterMacroAssember::call_VM_leaf_base) ++ // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL. ++ // ++ // If we care generating the pre-barrier without a frame (e.g. in the ++ // intrinsified Reference.get() routine) then ebp might be pointing to ++ // the caller frame and so this check will most likely fail at runtime. ++ // ++ // Expanding the call directly bypasses the generation of the check. ++ // So when we do not have have a full interpreter frame on the stack ++ // expand_call should be passed true. ++ ++ if (expand_call) { ++ assert(pre_val != A1, "smashed arg"); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } ++ ++ __ pop(saved); ++ ++ __ bind(done); ++} ++ ++void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) { ++ assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled"); ++ Label is_null; ++ __ beqz(dst, is_null); ++ resolve_forward_pointer_not_null(masm, dst, tmp); ++ __ bind(is_null); ++} ++ ++// IMPORTANT: This must preserve all registers, even SCR1 and SCR2, except those explicitely ++// passed in. ++void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) { ++ assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled"); ++ // The below loads the mark word, checks if the lowest two bits are ++ // set, and if so, clear the lowest two bits and copy the result ++ // to dst. Otherwise it leaves dst alone. ++ // Implementing this is surprisingly awkward. I do it here by: ++ // - Inverting the mark word ++ // - Test lowest two bits == 0 ++ // - If so, set the lowest two bits ++ // - Invert the result back, and copy to dst ++ ++ Register scr = RA; ++ bool borrow_reg = (tmp == noreg); ++ if (borrow_reg) { ++ // No free registers available. Make one useful. ++ tmp = SCR1; ++ if (tmp == dst) { ++ tmp = SCR2; ++ } ++ __ push(tmp); ++ } ++ ++ assert_different_registers(tmp, scr, dst); ++ ++ Label done; ++ __ movgr2fr_d(fscratch, scr); ++ __ ld_d(tmp, dst, oopDesc::mark_offset_in_bytes()); ++ __ nor(tmp, tmp, R0); ++ __ andi(scr, tmp, markWord::lock_mask_in_place); ++ __ bnez(scr, done); ++ __ ori(tmp, tmp, markWord::marked_value); ++ __ nor(dst, tmp, R0); ++ __ bind(done); ++ __ movfr2gr_d(scr, fscratch); ++ ++ if (borrow_reg) { ++ __ pop(tmp); ++ } ++} ++ ++void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators) { ++ assert(ShenandoahLoadRefBarrier, "Should be enabled"); ++ assert_different_registers(load_addr.base(), load_addr.index(), SCR1, SCR2); ++ ++ bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); ++ bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); ++ bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); ++ bool is_native = ShenandoahBarrierSet::is_native_access(decorators); ++ bool is_narrow = UseCompressedOops && !is_native; ++ ++ Label heap_stable, not_cset; ++ __ enter(); ++ __ bstrins_d(SP, R0, 3, 0); ++ Address gc_state(TREG, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); ++ Register tmp = (dst == SCR1) ? SCR2 : SCR1; ++ ++ // Check for heap stability ++ if (is_strong) { ++ __ ld_b(tmp, gc_state); ++ __ andi(tmp, tmp, ShenandoahHeap::HAS_FORWARDED); ++ __ beqz(tmp, heap_stable); ++ } else { ++ Label lrb; ++ __ ld_b(tmp, gc_state); ++ __ andi(tmp, tmp, ShenandoahHeap::WEAK_ROOTS); ++ __ bnez(tmp, lrb); ++ ++ __ ld_b(tmp, gc_state); ++ __ andi(tmp, tmp, ShenandoahHeap::HAS_FORWARDED); ++ __ beqz(tmp, heap_stable); ++ __ bind(lrb); ++ } ++ ++ // use A1 for load address ++ Register result_dst = dst; ++ if (dst == A1) { ++ __ move(tmp, dst); ++ dst = tmp; ++ } ++ ++ // Save A0 and A1, unless it is an output register ++ __ push2(A0, A1); ++ __ lea(A1, load_addr); ++ __ move(A0, dst); ++ ++ // Test for in-cset ++ if (is_strong) { ++ __ li(SCR2, ShenandoahHeap::in_cset_fast_test_addr()); ++ __ srli_d(SCR1, A0, ShenandoahHeapRegion::region_size_bytes_shift_jint()); ++ __ ldx_b(SCR2, SCR2, SCR1); ++ __ beqz(SCR2, not_cset); ++ } ++ ++ __ push_call_clobbered_registers_except(RegSet::of(V0)); ++ if (is_strong) { ++ if (is_narrow) { ++ __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow)); ++ } else { ++ __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong)); ++ } ++ } else if (is_weak) { ++ if (is_narrow) { ++ __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow)); ++ } else { ++ __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak)); ++ } ++ } else { ++ assert(is_phantom, "only remaining strength"); ++ assert(!is_narrow, "phantom access cannot be narrow"); ++ __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom)); ++ } ++ __ jalr(RA); ++ __ pop_call_clobbered_registers_except(RegSet::of(V0)); ++ ++ __ bind(not_cset); ++ ++ __ move(result_dst, A0); ++ if (result_dst == A0) ++ __ pop2(R0, A1); ++ else ++ __ pop2(A0, A1); ++ ++ __ bind(heap_stable); ++ __ leave(); ++} ++ ++void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) { ++ if (ShenandoahIUBarrier) { ++ __ push_call_clobbered_registers(); ++ satb_write_barrier_pre(masm, noreg, dst, TREG, tmp, true, false); ++ __ pop_call_clobbered_registers(); ++ } ++} ++ ++// ++// Arguments: ++// ++// Inputs: ++// src: oop location to load from, might be clobbered ++// ++// Output: ++// dst: oop loaded from src location ++// ++// Kill: ++// SCR1 (scratch reg) ++// ++// Alias: ++// dst: SCR1 (might use SCR1 as temporary output register to avoid clobbering src) ++// ++void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ // 1: non-reference load, no additional barrier is needed ++ if (!is_reference_type(type)) { ++ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ return; ++ } ++ ++ // 2: load a reference from src location and apply LRB if needed ++ if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { ++ Register result_dst = dst; ++ ++ // Preserve src location for LRB ++ if (dst == src.base() || dst == src.index() || dst == SCR1) { ++ dst = SCR2; ++ } ++ assert_different_registers(dst, src.base(), src.index()); ++ ++ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ ++ load_reference_barrier(masm, dst, src, decorators); ++ ++ if (dst != result_dst) { ++ __ move(result_dst, dst); ++ dst = result_dst; ++ } ++ } else { ++ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ } ++ ++ // 3: apply keep-alive barrier if needed ++ if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { ++ __ enter(); ++ __ push_call_clobbered_registers(); ++ satb_write_barrier_pre(masm /* masm */, ++ noreg /* obj */, ++ dst /* pre_val */, ++ TREG /* thread */, ++ tmp1 /* tmp */, ++ true /* tosca_live */, ++ true /* expand_call */); ++ __ pop_call_clobbered_registers(); ++ __ leave(); ++ } ++} ++ ++void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool on_oop = is_reference_type(type); ++ if (!on_oop) { ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ return; ++ } ++ ++ // flatten object address if needed ++ if (dst.index() == noreg && dst.disp() == 0) { ++ if (dst.base() != A3) { ++ __ move(A3, dst.base()); ++ } ++ } else { ++ __ lea(A3, dst); ++ } ++ ++ shenandoah_write_barrier_pre(masm, ++ A3 /* obj */, ++ tmp2 /* pre_val */, ++ TREG /* thread */, ++ tmp1 /* tmp */, ++ val != noreg /* tosca_live */, ++ false /* expand_call */); ++ ++ if (val == noreg) { ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(A3, 0), noreg, noreg, noreg); ++ } else { ++ iu_barrier(masm, val, tmp1); ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(A3, 0), val, noreg, noreg); ++ } ++} ++ ++void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath) { ++ Label done; ++ // Resolve jobject ++ BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); ++ ++ // Check for null. ++ __ beqz(obj, done); ++ ++ assert(obj != SCR1, "need SCR1"); ++ Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); ++ __ lea(SCR1, gc_state); ++ __ ld_b(SCR1, SCR1, 0); ++ ++ // Check for heap in evacuation phase ++ __ andi(SCR1, SCR1, ShenandoahHeap::EVACUATION); ++ __ bnez(SCR1, slowpath); ++ ++ __ bind(done); ++} ++ ++// Special Shenandoah CAS implementation that handles false negatives due ++// to concurrent evacuation. The service is more complex than a ++// traditional CAS operation because the CAS operation is intended to ++// succeed if the reference at addr exactly matches expected or if the ++// reference at addr holds a pointer to a from-space object that has ++// been relocated to the location named by expected. There are two ++// races that must be addressed: ++// a) A parallel thread may mutate the contents of addr so that it points ++// to a different object. In this case, the CAS operation should fail. ++// b) A parallel thread may heal the contents of addr, replacing a ++// from-space pointer held in addr with the to-space pointer ++// representing the new location of the object. ++// Upon entry to cmpxchg_oop, it is assured that new_val equals NULL ++// or it refers to an object that is not being evacuated out of ++// from-space, or it refers to the to-space version of an object that ++// is being evacuated out of from-space. ++// ++// By default the value held in the result register following execution ++// of the generated code sequence is 0 to indicate failure of CAS, ++// non-zero to indicate success. If is_cae, the result is the value most ++// recently fetched from addr rather than a boolean success indicator. ++// ++// Clobbers SCR1, SCR2 ++void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, ++ Register mem, ++ Register expected, ++ Register new_val, ++ bool acquire, bool is_cae, ++ Register result) { ++ Register tmp1 = SCR2; ++ Register tmp2 = SCR1; ++ bool is_narrow = UseCompressedOops; ++ ++ assert_different_registers(mem, expected, tmp1, tmp2); ++ assert_different_registers(mem, new_val, tmp1, tmp2); ++ ++ Address addr(mem); ++ Label step4, done_succ, done_fail, done; ++ ++ // There are two ways to reach this label. Initial entry into the ++ // cmpxchg_oop code expansion starts at step1 (which is equivalent ++ // to label step4). Additionally, in the rare case that four steps ++ // are required to perform the requested operation, the fourth step ++ // is the same as the first. On a second pass through step 1, ++ // control may flow through step 2 on its way to failure. It will ++ // not flow from step 2 to step 3 since we are assured that the ++ // memory at addr no longer holds a from-space pointer. ++ // ++ // The comments that immediately follow the step4 label apply only ++ // to the case in which control reaches this label by branch from ++ // step 3. ++ ++ __ bind (step4); ++ ++ // Step 4. CAS has failed because the value most recently fetched ++ // from addr is no longer the from-space pointer held in tmp2. If a ++ // different thread replaced the in-memory value with its equivalent ++ // to-space pointer, then CAS may still be able to succeed. The ++ // value held in the expected register has not changed. ++ // ++ // It is extremely rare we reach this point. For this reason, the ++ // implementation opts for smaller rather than potentially faster ++ // code. Ultimately, smaller code for this rare case most likely ++ // delivers higher overall throughput by enabling improved icache ++ // performance. ++ ++ // Step 1. Fast-path. ++ // ++ // Try to CAS with given arguments. If successful, then we are done. ++ // ++ // No label required for step 1. ++ ++ if (is_narrow) { ++ __ cmpxchg32(addr, expected, new_val, tmp2, false /* sign */, false /* retold */, ++ acquire /* barrier */, false /* weak */, true /* exchange */); ++ } else { ++ __ cmpxchg(addr, expected, new_val, tmp2, false /* retold */, acquire /* barrier */, ++ false /* weak */, true /* exchange */); ++ } ++ // tmp2 holds value fetched. ++ ++ // If expected equals null but tmp2 does not equal null, the ++ // following branches to done to report failure of CAS. If both ++ // expected and tmp2 equal null, the following branches to done to ++ // report success of CAS. There's no need for a special test of ++ // expected equal to null. ++ ++ __ beq(tmp2, expected, done_succ); ++ // if CAS failed, fall through to step 2 ++ ++ // Step 2. CAS has failed because the value held at addr does not ++ // match expected. This may be a false negative because the value fetched ++ // from addr (now held in tmp2) may be a from-space pointer to the ++ // original copy of same object referenced by to-space pointer expected. ++ // ++ // To resolve this, it suffices to find the forward pointer associated ++ // with fetched value. If this matches expected, retry CAS with new ++ // parameters. If this mismatches, then we have a legitimate ++ // failure, and we're done. ++ // ++ // No need for step2 label. ++ ++ // overwrite tmp1 with from-space pointer fetched from memory ++ __ move(tmp1, tmp2); ++ ++ if (is_narrow) { ++ // Decode tmp1 in order to resolve its forward pointer ++ __ decode_heap_oop(tmp1); ++ } ++ resolve_forward_pointer(masm, tmp1); ++ if (is_narrow) { ++ // Encode tmp1 to compare against expected. ++ __ encode_heap_oop(tmp1); ++ } ++ ++ // Does forwarded value of fetched from-space pointer match original ++ // value of expected? If tmp1 holds null, this comparison will fail ++ // because we know from step1 that expected is not null. There is ++ // no need for a separate test for tmp1 (the value originally held ++ // in memory) equal to null. ++ ++ // If not, then the failure was legitimate and we're done. ++ // Branching to done with NE condition denotes failure. ++ __ bne(tmp1, expected, done_fail); ++ ++ // Fall through to step 3. No need for step3 label. ++ ++ // Step 3. We've confirmed that the value originally held in memory ++ // (now held in tmp2) pointed to from-space version of original ++ // expected value. Try the CAS again with the from-space expected ++ // value. If it now succeeds, we're good. ++ // ++ // Note: tmp2 holds encoded from-space pointer that matches to-space ++ // object residing at expected. tmp2 is the new "expected". ++ ++ // Note that macro implementation of __cmpxchg cannot use same register ++ // tmp2 for result and expected since it overwrites result before it ++ // compares result with expected. ++ if (is_narrow) { ++ __ cmpxchg32(addr, tmp2, new_val, tmp1, false /* sign */, false /* retold */, ++ acquire /* barrier */, false /* weak */, false /* exchange */); ++ } else { ++ __ cmpxchg(addr, tmp2, new_val, tmp1, false /* retold */, acquire /* barrier */, ++ false /* weak */, false /* exchange */); ++ } ++ // tmp1 set iff success, tmp2 holds value fetched. ++ ++ // If fetched value did not equal the new expected, this could ++ // still be a false negative because some other thread may have ++ // newly overwritten the memory value with its to-space equivalent. ++ __ beqz(tmp1, step4); ++ ++ if (is_cae) { ++ // We're falling through to done to indicate success. ++ __ move(tmp2, expected); ++ } ++ ++ __ bind(done_succ); ++ if (!is_cae) { ++ __ li(tmp2, 1L); ++ } ++ __ b(done); ++ ++ __ bind(done_fail); ++ if (!is_cae) { ++ __ li(tmp2, 0L); ++ } ++ ++ __ bind(done); ++ __ move(result, tmp2); ++} ++ ++#undef __ ++ ++#ifdef COMPILER1 ++ ++#define __ ce->masm()-> ++ ++void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { ++ ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ // At this point we know that marking is in progress. ++ // If do_load() is true then we have to emit the ++ // load of the previous value; otherwise it has already ++ // been loaded into _pre_val. ++ ++ __ bind(*stub->entry()); ++ ++ assert(stub->pre_val()->is_register(), "Precondition."); ++ ++ Register pre_val_reg = stub->pre_val()->as_register(); ++ ++ if (stub->do_load()) { ++ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); ++ } ++ __ beqz(pre_val_reg, *stub->continuation()); ++ ce->store_parameter(stub->pre_val()->as_register(), 0); ++ __ call(bs->pre_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type); ++ __ b(*stub->continuation()); ++} ++ ++void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) { ++ ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ __ bind(*stub->entry()); ++ ++ DecoratorSet decorators = stub->decorators(); ++ bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); ++ bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); ++ bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); ++ bool is_native = ShenandoahBarrierSet::is_native_access(decorators); ++ ++ Register obj = stub->obj()->as_register(); ++ Register res = stub->result()->as_register(); ++ Register addr = stub->addr()->as_pointer_register(); ++ Register tmp1 = stub->tmp1()->as_register(); ++ Register tmp2 = stub->tmp2()->as_register(); ++ ++ assert(res == V0, "result must arrive in V0"); ++ ++ if (res != obj) { ++ __ move(res, obj); ++ } ++ ++ if (is_strong) { ++ // Check for object in cset. ++ __ li(tmp2, ShenandoahHeap::in_cset_fast_test_addr()); ++ __ srli_d(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint()); ++ __ ldx_b(tmp2, tmp2, tmp1); ++ __ beqz(tmp2, *stub->continuation()); ++ } ++ ++ ce->store_parameter(res, 0); ++ ce->store_parameter(addr, 1); ++ if (is_strong) { ++ if (is_native) { ++ __ call(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin(), relocInfo::runtime_call_type); ++ } else { ++ __ call(bs->load_reference_barrier_strong_rt_code_blob()->code_begin(), relocInfo::runtime_call_type); ++ } ++ } else if (is_weak) { ++ __ call(bs->load_reference_barrier_weak_rt_code_blob()->code_begin(), relocInfo::runtime_call_type); ++ } else { ++ assert(is_phantom, "only remaining strength"); ++ __ call(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin(), relocInfo::runtime_call_type); ++ } ++ ++ __ b(*stub->continuation()); ++} ++ ++#undef __ ++ ++#define __ sasm-> ++ ++void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { ++ __ prologue("shenandoah_pre_barrier", false); ++ ++ // arg0 : previous value of memory ++ ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ ++ const Register pre_val = A0; ++ const Register thread = TREG; ++ const Register tmp = SCR1; ++ ++ Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ Label done; ++ Label runtime; ++ ++ // Is marking still active? ++ Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); ++ __ ld_b(tmp, gc_state); ++ __ andi(tmp, tmp, ShenandoahHeap::MARKING); ++ __ beqz(tmp, done); ++ ++ // Can we store original value in the thread's buffer? ++ __ ld_d(tmp, queue_index); ++ __ beqz(tmp, runtime); ++ ++ __ addi_d(tmp, tmp, -wordSize); ++ __ st_d(tmp, queue_index); ++ __ ld_d(SCR2, buffer); ++ __ add_d(tmp, tmp, SCR2); ++ __ load_parameter(0, SCR2); ++ __ st_d(SCR2, tmp, 0); ++ __ b(done); ++ ++ __ bind(runtime); ++ __ push_call_clobbered_registers(); ++ __ load_parameter(0, pre_val); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); ++ __ pop_call_clobbered_registers(); ++ __ bind(done); ++ ++ __ epilogue(); ++} ++ ++void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators) { ++ __ prologue("shenandoah_load_reference_barrier", false); ++ __ bstrins_d(SP, R0, 3, 0); ++ // arg0 : object to be resolved ++ ++ __ push_call_clobbered_registers_except(RegSet::of(V0)); ++ __ load_parameter(0, A0); ++ __ load_parameter(1, A1); ++ ++ bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); ++ bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); ++ bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); ++ bool is_native = ShenandoahBarrierSet::is_native_access(decorators); ++ if (is_strong) { ++ if (is_native) { ++ __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong)); ++ } else { ++ if (UseCompressedOops) { ++ __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow)); ++ } else { ++ __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong)); ++ } ++ } ++ } else if (is_weak) { ++ assert(!is_native, "weak must not be called off-heap"); ++ if (UseCompressedOops) { ++ __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow)); ++ } else { ++ __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak)); ++ } ++ } else { ++ assert(is_phantom, "only remaining strength"); ++ assert(is_native, "phantom must only be called off-heap"); ++ __ li(RA, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom)); ++ } ++ __ jalr(RA); ++ __ pop_call_clobbered_registers_except(RegSet::of(V0)); ++ ++ __ epilogue(); ++} ++ ++#undef __ ++ ++#endif // COMPILER1 +diff --git a/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoahBarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoahBarrierSetAssembler_loongarch.hpp +new file mode 100644 +index 00000000000..e2c862f6c5c +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoahBarrierSetAssembler_loongarch.hpp +@@ -0,0 +1,85 @@ ++/* ++ * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shenandoah/shenandoahBarrierSet.hpp" ++#ifdef COMPILER1 ++class LIR_Assembler; ++class ShenandoahPreBarrierStub; ++class ShenandoahLoadReferenceBarrierStub; ++class StubAssembler; ++#endif ++class StubCodeGenerator; ++ ++class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { ++private: ++ ++ void satb_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); ++ void shenandoah_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); ++ ++ void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg); ++ void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg); ++ void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators); ++ ++public: ++ ++ void iu_barrier(MacroAssembler* masm, Register dst, Register tmp); ++ ++#ifdef COMPILER1 ++ void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); ++ void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); ++ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); ++ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators); ++#endif ++ ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs); ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath); ++ void cmpxchg_oop(MacroAssembler* masm, Register mem, Register expected, Register new_val, ++ bool acquire, bool is_cae, Register result); ++}; ++ ++#endif // CPU_LOONGARCH_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoah_loongarch_64.ad b/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoah_loongarch_64.ad +new file mode 100644 +index 00000000000..04f67d23157 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/shenandoah/shenandoah_loongarch_64.ad +@@ -0,0 +1,222 @@ ++// ++// Copyright (c) 2018, Red Hat, Inc. All rights reserved. ++// Copyright (c) 2022, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++source_hpp %{ ++#include "gc/shenandoah/shenandoahBarrierSet.hpp" ++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" ++%} ++ ++encode %{ ++ enc_class loongarch_enc_cmpxchg_oop_shenandoah(memory mem, mRegP oldval, mRegP newval, mRegI res) %{ ++ MacroAssembler _masm(&cbuf); ++ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register, ++ /*acquire*/ false, /*is_cae*/ false, $res$$Register); ++ %} ++ ++ enc_class loongarch_enc_cmpxchg_acq_oop_shenandoah(memory mem, mRegP oldval, mRegP newval, mRegI res) %{ ++ MacroAssembler _masm(&cbuf); ++ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register, ++ /*acquire*/ true, /*is_cae*/ false, $res$$Register); ++ %} ++%} ++ ++instruct compareAndSwapP_shenandoah(mRegI res, indirect mem, mRegP oldval, mRegP newval) %{ ++ match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); ++ ++ format %{ ++ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" ++ %} ++ ++ ins_encode(loongarch_enc_cmpxchg_oop_shenandoah(mem, oldval, newval, res)); ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndSwapN_shenandoah(mRegI res, indirect mem, mRegN oldval, mRegN newval) %{ ++ match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); ++ ++ format %{ ++ "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" ++ %} ++ ++ ins_encode %{ ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register, ++ /*acquire*/ false, /*is_cae*/ false, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndSwapPAcq_shenandoah(mRegI res, indirect mem, mRegP oldval, mRegP newval) %{ ++ match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); ++ ++ format %{ ++ "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" ++ %} ++ ++ ins_encode(loongarch_enc_cmpxchg_acq_oop_shenandoah(mem, oldval, newval, res)); ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndSwapNAcq_shenandoah(mRegI res, indirect mem, mRegN oldval, mRegN newval) %{ ++ match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); ++ ++ format %{ ++ "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" ++ %} ++ ++ ins_encode %{ ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register, ++ /*acquire*/ true, /*is_cae*/ false, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeN_shenandoah(mRegN res, indirect mem, mRegN oldval, mRegN newval) %{ ++ match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); ++ ++ format %{ ++ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval" ++ %} ++ ++ ins_encode %{ ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register, ++ /*acquire*/ false, /*is_cae*/ true, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeP_shenandoah(mRegP res, indirect mem, mRegP oldval, mRegP newval) %{ ++ match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); ++ ++ format %{ ++ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" ++ %} ++ ++ ins_encode %{ ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register, ++ /*acquire*/ false, /*is_cae*/ true, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeNAcq_shenandoah(mRegN res, indirect mem, mRegN oldval, mRegN newval) %{ ++ match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); ++ ++ format %{ ++ "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval" ++ %} ++ ++ ins_encode %{ ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register, ++ /*acquire*/ true, /*is_cae*/ true, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangePAcq_shenandoah(mRegP res, indirect mem, mRegP oldval, mRegP newval) %{ ++ match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); ++ ++ format %{ ++ "cmpxchg_acq_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" ++ %} ++ ++ ins_encode %{ ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register, ++ /*acquire*/ true, /*is_cae*/ true, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapN_shenandoah(mRegI res, indirect mem, mRegN oldval, mRegN newval) %{ ++ match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); ++ ++ format %{ ++ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval" ++ %} ++ ins_encode %{ ++ // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register, ++ /*acquire*/ false, /*is_cae*/ false, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapP_shenandoah(mRegI res, indirect mem, mRegP oldval, mRegP newval) %{ ++ match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); ++ ++ format %{ ++ "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval" ++ %} ++ ++ ins_encode %{ ++ // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register, ++ /*acquire*/ false, /*is_cae*/ false, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapNAcq_shenandoah(mRegI res, indirect mem, mRegN oldval, mRegN newval) %{ ++ match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); ++ ++ format %{ ++ "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval" ++ %} ++ ++ ins_encode %{ ++ // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register, ++ /*acquire*/ true, /*is_cae*/ false, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapPAcq_shenandoah(mRegI res, indirect mem, mRegP oldval, mRegP newval) %{ ++ match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); ++ ++ format %{ ++ "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval" ++ %} ++ ++ ins_encode %{ ++ // Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, $oldval$$Register, $newval$$Register, ++ /*acquire*/ true, /*is_cae*/ false, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} +diff --git a/src/hotspot/cpu/loongarch/gc/z/zBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/z/zBarrierSetAssembler_loongarch.cpp +new file mode 100644 +index 00000000000..372d80cf11b +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/z/zBarrierSetAssembler_loongarch.cpp +@@ -0,0 +1,462 @@ ++/* ++ * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/codeBlob.hpp" ++#include "code/vmreg.inline.hpp" ++#include "gc/z/zBarrier.inline.hpp" ++#include "gc/z/zBarrierSet.hpp" ++#include "gc/z/zBarrierSetAssembler.hpp" ++#include "gc/z/zBarrierSetRuntime.hpp" ++#include "gc/z/zThreadLocalData.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "utilities/macros.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "gc/z/c1/zBarrierSetC1.hpp" ++#endif // COMPILER1 ++#ifdef COMPILER2 ++#include "gc/z/c2/zBarrierSetC2.hpp" ++#endif // COMPILER2 ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif ++ ++#undef __ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define T4 RT4 ++ ++void ZBarrierSetAssembler::load_at(MacroAssembler* masm, ++ DecoratorSet decorators, ++ BasicType type, ++ Register dst, ++ Address src, ++ Register tmp1, ++ Register tmp_thread) { ++ if (!ZBarrierSet::barrier_needed(decorators, type)) { ++ // Barrier not needed ++ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ return; ++ } ++ ++ // Allocate scratch register ++ Register scratch = tmp1; ++ ++ assert_different_registers(dst, scratch, SCR1); ++ ++ Label done; ++ ++ // ++ // Fast Path ++ // ++ ++ // Load address ++ __ lea(scratch, src); ++ ++ // Load oop at address ++ __ ld_ptr(dst, scratch, 0); ++ ++ // Test address bad mask ++ __ ld_ptr(SCR1, address_bad_mask_from_thread(TREG)); ++ __ andr(SCR1, dst, SCR1); ++ __ beqz(SCR1, done); ++ ++ // ++ // Slow path ++ // ++ __ enter(); ++ ++ if (dst != V0) { ++ __ push(V0); ++ } ++ __ push_call_clobbered_registers_except(RegSet::of(V0)); ++ ++ if (dst != A0) { ++ __ move(A0, dst); ++ } ++ __ move(A1, scratch); ++ __ MacroAssembler::call_VM_leaf_base(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); ++ ++ __ pop_call_clobbered_registers_except(RegSet::of(V0)); ++ ++ // Make sure dst has the return value. ++ if (dst != V0) { ++ __ move(dst, V0); ++ __ pop(V0); ++ } ++ __ leave(); ++ ++ __ bind(done); ++} ++ ++#ifdef ASSERT ++ ++void ZBarrierSetAssembler::store_at(MacroAssembler* masm, ++ DecoratorSet decorators, ++ BasicType type, ++ Address dst, ++ Register val, ++ Register tmp1, ++ Register tmp2) { ++ // Verify value ++ if (is_reference_type(type)) { ++ // Note that src could be noreg, which means we ++ // are storing null and can skip verification. ++ if (val != noreg) { ++ Label done; ++ ++ // tmp1 and tmp2 are often set to noreg. ++ ++ __ ld_ptr(AT, address_bad_mask_from_thread(TREG)); ++ __ andr(AT, val, AT); ++ __ beqz(AT, done); ++ __ stop("Verify oop store failed"); ++ __ should_not_reach_here(); ++ __ bind(done); ++ } ++ } ++ ++ // Store value ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++} ++ ++#endif // ASSERT ++ ++void ZBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, ++ DecoratorSet decorators, ++ bool is_oop, ++ Register src, ++ Register dst, ++ Register count, ++ RegSet saved_regs) { ++ if (!is_oop) { ++ // Barrier not needed ++ return; ++ } ++ ++ BLOCK_COMMENT("ZBarrierSetAssembler::arraycopy_prologue {"); ++ ++ __ push(saved_regs); ++ ++ if (count == A0) { ++ if (src == A1) { ++ // exactly backwards!! ++ __ move(AT, A0); ++ __ move(A0, A1); ++ __ move(A1, AT); ++ } else { ++ __ move(A1, count); ++ __ move(A0, src); ++ } ++ } else { ++ __ move(A0, src); ++ __ move(A1, count); ++ } ++ ++ __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_array_addr(), 2); ++ ++ __ pop(saved_regs); ++ ++ BLOCK_COMMENT("} ZBarrierSetAssembler::arraycopy_prologue"); ++} ++ ++void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, ++ Register jni_env, ++ Register robj, ++ Register tmp, ++ Label& slowpath) { ++ BLOCK_COMMENT("ZBarrierSetAssembler::try_resolve_jobject_in_native {"); ++ ++ assert_different_registers(jni_env, robj, tmp); ++ ++ // Resolve jobject ++ BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, robj, tmp, slowpath); ++ ++ // The Address offset is too large to direct load - -784. Our range is +127, -128. ++ __ li(tmp, (int64_t)(in_bytes(ZThreadLocalData::address_bad_mask_offset()) - ++ in_bytes(JavaThread::jni_environment_offset()))); ++ ++ // Load address bad mask ++ __ ldx_d(tmp, jni_env, tmp); ++ ++ // Check address bad mask ++ __ andr(AT, robj, tmp); ++ __ bnez(AT, slowpath); ++ ++ BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_jobject_in_native"); ++} ++ ++#ifdef COMPILER1 ++ ++#undef __ ++#define __ ce->masm()-> ++ ++void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce, ++ LIR_Opr ref) const { ++ assert_different_registers(SCR1, TREG, ref->as_register()); ++ __ ld_d(SCR1, address_bad_mask_from_thread(TREG)); ++ __ andr(SCR1, SCR1, ref->as_register()); ++} ++ ++void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce, ++ ZLoadBarrierStubC1* stub) const { ++ // Stub entry ++ __ bind(*stub->entry()); ++ ++ Register ref = stub->ref()->as_register(); ++ Register ref_addr = noreg; ++ Register tmp = noreg; ++ ++ if (stub->tmp()->is_valid()) { ++ // Load address into tmp register ++ ce->leal(stub->ref_addr(), stub->tmp()); ++ ref_addr = tmp = stub->tmp()->as_pointer_register(); ++ } else { ++ // Address already in register ++ ref_addr = stub->ref_addr()->as_address_ptr()->base()->as_pointer_register(); ++ } ++ ++ assert_different_registers(ref, ref_addr, noreg); ++ ++ // Save V0 unless it is the result or tmp register ++ // Set up SP to accomodate parameters and maybe V0. ++ if (ref != V0 && tmp != V0) { ++ __ addi_d(SP, SP, -32); ++ __ st_d(V0, SP, 16); ++ } else { ++ __ addi_d(SP, SP, -16); ++ } ++ ++ // Setup arguments and call runtime stub ++ ce->store_parameter(ref_addr, 1); ++ ce->store_parameter(ref, 0); ++ ++ __ call(stub->runtime_stub(), relocInfo::runtime_call_type); ++ ++ // Verify result ++ __ verify_oop(V0, "Bad oop"); ++ ++ // Move result into place ++ if (ref != V0) { ++ __ move(ref, V0); ++ } ++ ++ // Restore V0 unless it is the result or tmp register ++ if (ref != V0 && tmp != V0) { ++ __ ld_d(V0, SP, 16); ++ __ addi_d(SP, SP, 32); ++ } else { ++ __ addi_d(SP, SP, 16); ++ } ++ ++ // Stub exit ++ __ b(*stub->continuation()); ++} ++ ++#undef __ ++#define __ sasm-> ++ ++void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, ++ DecoratorSet decorators) const { ++ __ prologue("zgc_load_barrier stub", false); ++ ++ __ push_call_clobbered_registers_except(RegSet::of(V0)); ++ ++ // Setup arguments ++ __ load_parameter(0, A0); ++ __ load_parameter(1, A1); ++ ++ __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); ++ ++ __ pop_call_clobbered_registers_except(RegSet::of(V0)); ++ ++ __ epilogue(); ++} ++#endif // COMPILER1 ++ ++#ifdef COMPILER2 ++ ++OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { ++ if (!OptoReg::is_reg(opto_reg)) { ++ return OptoReg::Bad; ++ } ++ ++ const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); ++ if (vm_reg->is_FloatRegister()) { ++ return opto_reg & ~1; ++ } ++ ++ return opto_reg; ++} ++ ++#undef __ ++#define __ _masm-> ++ ++class ZSaveLiveRegisters { ++private: ++ MacroAssembler* const _masm; ++ RegSet _gp_regs; ++ FloatRegSet _fp_regs; ++ FloatRegSet _lsx_vp_regs; ++ FloatRegSet _lasx_vp_regs; ++ ++public: ++ void initialize(ZLoadBarrierStubC2* stub) { ++ // Record registers that needs to be saved/restored ++ RegMaskIterator rmi(stub->live()); ++ while (rmi.has_next()) { ++ const OptoReg::Name opto_reg = rmi.next(); ++ if (OptoReg::is_reg(opto_reg)) { ++ const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); ++ if (vm_reg->is_Register()) { ++ _gp_regs += RegSet::of(vm_reg->as_Register()); ++ } else if (vm_reg->is_FloatRegister()) { ++ if (UseLASX && vm_reg->next(7)) ++ _lasx_vp_regs += FloatRegSet::of(vm_reg->as_FloatRegister()); ++ else if (UseLSX && vm_reg->next(3)) ++ _lsx_vp_regs += FloatRegSet::of(vm_reg->as_FloatRegister()); ++ else ++ _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister()); ++ } else { ++ fatal("Unknown register type"); ++ } ++ } ++ } ++ ++ // Remove C-ABI SOE registers, scratch regs and _ref register that will be updated ++ _gp_regs -= RegSet::range(S0, S7) + RegSet::of(SP, SCR1, SCR2, stub->ref()); ++ } ++ ++ ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : ++ _masm(masm), ++ _gp_regs(), ++ _fp_regs(), ++ _lsx_vp_regs(), ++ _lasx_vp_regs() { ++ ++ // Figure out what registers to save/restore ++ initialize(stub); ++ ++ // Save registers ++ __ push(_gp_regs); ++ __ push_fpu(_fp_regs); ++ __ push_vp(_lsx_vp_regs /* UseLSX */); ++ __ push_vp(_lasx_vp_regs /* UseLASX */); ++ } ++ ++ ~ZSaveLiveRegisters() { ++ // Restore registers ++ __ pop_vp(_lasx_vp_regs /* UseLASX */); ++ __ pop_vp(_lsx_vp_regs /* UseLSX */); ++ __ pop_fpu(_fp_regs); ++ __ pop(_gp_regs); ++ } ++}; ++ ++#undef __ ++#define __ _masm-> ++ ++class ZSetupArguments { ++private: ++ MacroAssembler* const _masm; ++ const Register _ref; ++ const Address _ref_addr; ++ ++public: ++ ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : ++ _masm(masm), ++ _ref(stub->ref()), ++ _ref_addr(stub->ref_addr()) { ++ ++ // Setup arguments ++ if (_ref_addr.base() == noreg) { ++ // No self healing ++ if (_ref != A0) { ++ __ move(A0, _ref); ++ } ++ __ move(A1, 0); ++ } else { ++ // Self healing ++ if (_ref == A0) { ++ // _ref is already at correct place ++ __ lea(A1, _ref_addr); ++ } else if (_ref != A1) { ++ // _ref is in wrong place, but not in A1, so fix it first ++ __ lea(A1, _ref_addr); ++ __ move(A0, _ref); ++ } else if (_ref_addr.base() != A0 && _ref_addr.index() != A0) { ++ assert(_ref == A1, "Mov ref first, vacating A0"); ++ __ move(A0, _ref); ++ __ lea(A1, _ref_addr); ++ } else { ++ assert(_ref == A1, "Need to vacate A1 and _ref_addr is using A0"); ++ if (_ref_addr.base() == A0 || _ref_addr.index() == A0) { ++ __ move(T4, A1); ++ __ lea(A1, _ref_addr); ++ __ move(A0, T4); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } ++ } ++ } ++ ++ ~ZSetupArguments() { ++ // Transfer result ++ if (_ref != V0) { ++ __ move(_ref, V0); ++ } ++ } ++}; ++ ++#undef __ ++#define __ masm-> ++ ++void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const { ++ BLOCK_COMMENT("ZLoadBarrierStubC2"); ++ ++ // Stub entry ++ __ bind(*stub->entry()); ++ ++ { ++ ZSaveLiveRegisters save_live_registers(masm, stub); ++ ZSetupArguments setup_arguments(masm, stub); ++ __ call_VM_leaf(stub->slow_path(), 2); ++ } ++ // Stub exit ++ __ b(*stub->continuation()); ++} ++ ++#undef __ ++ ++#endif // COMPILER2 +diff --git a/src/hotspot/cpu/loongarch/gc/z/zBarrierSetAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/z/zBarrierSetAssembler_loongarch.hpp +new file mode 100644 +index 00000000000..6a96d6fdd60 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/z/zBarrierSetAssembler_loongarch.hpp +@@ -0,0 +1,101 @@ ++/* ++ * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#ifndef CPU_LOONGARCH_GC_Z_ZBARRIERSETASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_GC_Z_ZBARRIERSETASSEMBLER_LOONGARCH_HPP ++ ++#include "code/vmreg.hpp" ++#include "oops/accessDecorators.hpp" ++#ifdef COMPILER2 ++#include "opto/optoreg.hpp" ++#endif // COMPILER2 ++ ++#ifdef COMPILER1 ++class LIR_Assembler; ++class LIR_OprDesc; ++typedef LIR_OprDesc* LIR_Opr; ++class StubAssembler; ++class ZLoadBarrierStubC1; ++#endif // COMPILER1 ++ ++#ifdef COMPILER2 ++class Node; ++class ZLoadBarrierStubC2; ++#endif // COMPILER2 ++ ++class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { ++public: ++ virtual void load_at(MacroAssembler* masm, ++ DecoratorSet decorators, ++ BasicType type, ++ Register dst, ++ Address src, ++ Register tmp1, ++ Register tmp_thread); ++ ++#ifdef ASSERT ++ virtual void store_at(MacroAssembler* masm, ++ DecoratorSet decorators, ++ BasicType type, ++ Address dst, ++ Register val, ++ Register tmp1, ++ Register tmp2); ++#endif // ASSERT ++ ++ virtual void arraycopy_prologue(MacroAssembler* masm, ++ DecoratorSet decorators, ++ bool is_oop, ++ Register src, ++ Register dst, ++ Register count, ++ RegSet saved_regs); ++ ++ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, ++ Register jni_env, ++ Register robj, ++ Register tmp, ++ Label& slowpath); ++ ++#ifdef COMPILER1 ++ void generate_c1_load_barrier_test(LIR_Assembler* ce, ++ LIR_Opr ref) const; ++ ++ void generate_c1_load_barrier_stub(LIR_Assembler* ce, ++ ZLoadBarrierStubC1* stub) const; ++ ++ void generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, ++ DecoratorSet decorators) const; ++#endif // COMPILER1 ++ ++#ifdef COMPILER2 ++ OptoReg::Name refine_register(const Node* node, ++ OptoReg::Name opto_reg); ++ ++ void generate_c2_load_barrier_stub(MacroAssembler* masm, ++ ZLoadBarrierStubC2* stub) const; ++#endif // COMPILER2 ++}; ++ ++#endif // CPU_LOONGARCH_GC_Z_ZBARRIERSETASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/gc/z/zGlobals_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/z/zGlobals_loongarch.cpp +new file mode 100644 +index 00000000000..85db1cf1854 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/z/zGlobals_loongarch.cpp +@@ -0,0 +1,211 @@ ++/* ++ * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#include "precompiled.hpp" ++#include "gc/shared/gcLogPrecious.hpp" ++#include "gc/shared/gc_globals.hpp" ++#include "gc/z/zGlobals.hpp" ++#include "runtime/globals.hpp" ++#include "runtime/os.hpp" ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/powerOfTwo.hpp" ++ ++#ifdef LINUX ++#include ++#endif // LINUX ++ ++// ++// The heap can have three different layouts, depending on the max heap size. ++// ++// Address Space & Pointer Layout 1 ++// -------------------------------- ++// ++// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) ++// . . ++// . . ++// . . ++// +--------------------------------+ 0x0000014000000000 (20TB) ++// | Remapped View | ++// +--------------------------------+ 0x0000010000000000 (16TB) ++// . . ++// +--------------------------------+ 0x00000c0000000000 (12TB) ++// | Marked1 View | ++// +--------------------------------+ 0x0000080000000000 (8TB) ++// | Marked0 View | ++// +--------------------------------+ 0x0000040000000000 (4TB) ++// . . ++// +--------------------------------+ 0x0000000000000000 ++// ++// 6 4 4 4 4 ++// 3 6 5 2 1 0 ++// +--------------------+----+-----------------------------------------------+ ++// |00000000 00000000 00|1111|11 11111111 11111111 11111111 11111111 11111111| ++// +--------------------+----+-----------------------------------------------+ ++// | | | ++// | | * 41-0 Object Offset (42-bits, 4TB address space) ++// | | ++// | * 45-42 Metadata Bits (4-bits) 0001 = Marked0 (Address view 4-8TB) ++// | 0010 = Marked1 (Address view 8-12TB) ++// | 0100 = Remapped (Address view 16-20TB) ++// | 1000 = Finalizable (Address view N/A) ++// | ++// * 63-46 Fixed (18-bits, always zero) ++// ++// ++// Address Space & Pointer Layout 2 ++// -------------------------------- ++// ++// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) ++// . . ++// . . ++// . . ++// +--------------------------------+ 0x0000280000000000 (40TB) ++// | Remapped View | ++// +--------------------------------+ 0x0000200000000000 (32TB) ++// . . ++// +--------------------------------+ 0x0000180000000000 (24TB) ++// | Marked1 View | ++// +--------------------------------+ 0x0000100000000000 (16TB) ++// | Marked0 View | ++// +--------------------------------+ 0x0000080000000000 (8TB) ++// . . ++// +--------------------------------+ 0x0000000000000000 ++// ++// 6 4 4 4 4 ++// 3 7 6 3 2 0 ++// +------------------+-----+------------------------------------------------+ ++// |00000000 00000000 0|1111|111 11111111 11111111 11111111 11111111 11111111| ++// +-------------------+----+------------------------------------------------+ ++// | | | ++// | | * 42-0 Object Offset (43-bits, 8TB address space) ++// | | ++// | * 46-43 Metadata Bits (4-bits) 0001 = Marked0 (Address view 8-16TB) ++// | 0010 = Marked1 (Address view 16-24TB) ++// | 0100 = Remapped (Address view 32-40TB) ++// | 1000 = Finalizable (Address view N/A) ++// | ++// * 63-47 Fixed (17-bits, always zero) ++// ++// ++// Address Space & Pointer Layout 3 ++// -------------------------------- ++// ++// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) ++// . . ++// . . ++// . . ++// +--------------------------------+ 0x0000500000000000 (80TB) ++// | Remapped View | ++// +--------------------------------+ 0x0000400000000000 (64TB) ++// . . ++// +--------------------------------+ 0x0000300000000000 (48TB) ++// | Marked1 View | ++// +--------------------------------+ 0x0000200000000000 (32TB) ++// | Marked0 View | ++// +--------------------------------+ 0x0000100000000000 (16TB) ++// . . ++// +--------------------------------+ 0x0000000000000000 ++// ++// 6 4 4 4 4 ++// 3 8 7 4 3 0 ++// +------------------+----+-------------------------------------------------+ ++// |00000000 00000000 |1111|1111 11111111 11111111 11111111 11111111 11111111| ++// +------------------+----+-------------------------------------------------+ ++// | | | ++// | | * 43-0 Object Offset (44-bits, 16TB address space) ++// | | ++// | * 47-44 Metadata Bits (4-bits) 0001 = Marked0 (Address view 16-32TB) ++// | 0010 = Marked1 (Address view 32-48TB) ++// | 0100 = Remapped (Address view 64-80TB) ++// | 1000 = Finalizable (Address view N/A) ++// | ++// * 63-48 Fixed (16-bits, always zero) ++// ++ ++// Default value if probing is not implemented for a certain platform: 128TB ++static const size_t DEFAULT_MAX_ADDRESS_BIT = 47; ++// Minimum value returned, if probing fails: 64GB ++static const size_t MINIMUM_MAX_ADDRESS_BIT = 36; ++ ++static size_t probe_valid_max_address_bit() { ++#ifdef LINUX ++ size_t max_address_bit = 0; ++ const size_t page_size = os::vm_page_size(); ++ for (size_t i = DEFAULT_MAX_ADDRESS_BIT; i > MINIMUM_MAX_ADDRESS_BIT; --i) { ++ const uintptr_t base_addr = ((uintptr_t) 1U) << i; ++ if (msync((void*)base_addr, page_size, MS_ASYNC) == 0) { ++ // msync suceeded, the address is valid, and maybe even already mapped. ++ max_address_bit = i; ++ break; ++ } ++ if (errno != ENOMEM) { ++ // Some error occured. This should never happen, but msync ++ // has some undefined behavior, hence ignore this bit. ++#ifdef ASSERT ++ fatal("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); ++#else // ASSERT ++ log_warning_p(gc)("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); ++#endif // ASSERT ++ continue; ++ } ++ // Since msync failed with ENOMEM, the page might not be mapped. ++ // Try to map it, to see if the address is valid. ++ void* const result_addr = mmap((void*) base_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); ++ if (result_addr != MAP_FAILED) { ++ munmap(result_addr, page_size); ++ } ++ if ((uintptr_t) result_addr == base_addr) { ++ // address is valid ++ max_address_bit = i; ++ break; ++ } ++ } ++ if (max_address_bit == 0) { ++ // probing failed, allocate a very high page and take that bit as the maximum ++ const uintptr_t high_addr = ((uintptr_t) 1U) << DEFAULT_MAX_ADDRESS_BIT; ++ void* const result_addr = mmap((void*) high_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); ++ if (result_addr != MAP_FAILED) { ++ max_address_bit = BitsPerSize_t - count_leading_zeros((size_t) result_addr) - 1; ++ munmap(result_addr, page_size); ++ } ++ } ++ log_info_p(gc, init)("Probing address space for the highest valid bit: " SIZE_FORMAT, max_address_bit); ++ return MAX2(max_address_bit, MINIMUM_MAX_ADDRESS_BIT); ++#else // LINUX ++ return DEFAULT_MAX_ADDRESS_BIT; ++#endif // LINUX ++} ++ ++size_t ZPlatformAddressOffsetBits() { ++ const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1; ++ const size_t max_address_offset_bits = valid_max_address_offset_bits - 3; ++ const size_t min_address_offset_bits = max_address_offset_bits - 2; ++ const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio); ++ const size_t address_offset_bits = log2i_exact(address_offset); ++ return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits); ++} ++ ++size_t ZPlatformAddressMetadataShift() { ++ return ZPlatformAddressOffsetBits(); ++} +diff --git a/src/hotspot/cpu/loongarch/gc/z/zGlobals_loongarch.hpp b/src/hotspot/cpu/loongarch/gc/z/zGlobals_loongarch.hpp +new file mode 100644 +index 00000000000..542fd267434 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/z/zGlobals_loongarch.hpp +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#ifndef CPU_LOONGARCH_GC_Z_ZGLOBALS_LOONGARCH_HPP ++#define CPU_LOONGARCH_GC_Z_ZGLOBALS_LOONGARCH_HPP ++ ++const size_t ZPlatformGranuleSizeShift = 21; // 2MB ++const size_t ZPlatformHeapViews = 3; ++const size_t ZPlatformCacheLineSize = 64; ++ ++size_t ZPlatformAddressOffsetBits(); ++size_t ZPlatformAddressMetadataShift(); ++ ++#endif // CPU_LOONGARCH_GC_Z_ZGLOBALS_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/gc/z/z_loongarch_64.ad b/src/hotspot/cpu/loongarch/gc/z/z_loongarch_64.ad +new file mode 100644 +index 00000000000..59656e75376 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/gc/z/z_loongarch_64.ad +@@ -0,0 +1,273 @@ ++// ++// Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2021, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++ ++source_hpp %{ ++ ++#include "gc/shared/gc_globals.hpp" ++#include "gc/z/c2/zBarrierSetC2.hpp" ++#include "gc/z/zThreadLocalData.hpp" ++ ++%} ++ ++source %{ ++ ++static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, uint8_t barrier_data) { ++ if (barrier_data == ZLoadBarrierElided) { ++ return; ++ } ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, barrier_data); ++ __ ld_ptr(tmp, Address(thread, ZThreadLocalData::address_bad_mask_offset())); ++ __ andr(tmp, tmp, ref); ++ __ bnez(tmp, *stub->entry()); ++ __ bind(*stub->continuation()); ++} ++ ++static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) { ++ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, ZLoadBarrierStrong); ++ __ b(*stub->entry()); ++ __ bind(*stub->continuation()); ++} ++ ++static void z_compare_and_swap(MacroAssembler& _masm, const MachNode* node, ++ Register res, Register mem, Register oldval, Register newval, ++ Register tmp, bool weak, bool acquire) { ++ // z-specific load barrier requires strong CAS operations. ++ // Weak CAS operations are thus only emitted if the barrier is elided. ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ Address addr(mem); ++ if (node->barrier_data() == ZLoadBarrierElided) { ++ __ cmpxchg(addr, oldval, newval, tmp, false /* retold */, acquire /* barrier */, ++ weak /* weak */, false /* exchange */); ++ __ move(res, tmp); ++ } else { ++ __ move(tmp, oldval); ++ __ cmpxchg(addr, tmp, newval, AT, true /* retold */, acquire /* barrier */, ++ false /* weak */, false /* exchange */); ++ __ move(res, AT); ++ ++ Label good; ++ __ ld_ptr(AT, Address(thread, ZThreadLocalData::address_bad_mask_offset())); ++ __ andr(AT, AT, tmp); ++ __ beqz(AT, good); ++ z_load_barrier_slow_path(_masm, node, addr, tmp, res /* used as tmp */); ++ __ cmpxchg(addr, oldval, newval, tmp, false /* retold */, acquire /* barrier */, weak /* weak */, false /* exchange */); ++ __ move(res, tmp); ++ __ bind(good); ++ } ++} ++ ++static void z_compare_and_exchange(MacroAssembler& _masm, const MachNode* node, ++ Register res, Register mem, Register oldval, Register newval, Register tmp, ++ bool weak, bool acquire) { ++ // z-specific load barrier requires strong CAS operations. ++ // Weak CAS operations are thus only emitted if the barrier is elided. ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ Address addr(mem); ++ __ cmpxchg(addr, oldval, newval, res, false /* retold */, acquire /* barrier */, ++ weak && node->barrier_data() == ZLoadBarrierElided /* weak */, true /* exchange */); ++ if (node->barrier_data() != ZLoadBarrierElided) { ++ Label good; ++ __ ld_ptr(tmp, Address(thread, ZThreadLocalData::address_bad_mask_offset())); ++ __ andr(tmp, tmp, res); ++ __ beqz(tmp, good); ++ z_load_barrier_slow_path(_masm, node, addr, res /* ref */, tmp); ++ __ cmpxchg(addr, oldval, newval, res, false /* retold */, acquire /* barrier */, weak /* weak */, true /* exchange */); ++ __ bind(good); ++ } ++} ++ ++%} ++ ++// Load Pointer ++instruct zLoadP(mRegP dst, memory mem, mRegP tmp, FlagsReg cr) ++%{ ++ match(Set dst (LoadP mem)); ++ effect(TEMP_DEF dst, TEMP tmp, KILL cr); ++ ins_cost(125);//must be equal loadP in loongarch_64.ad ++ ++ predicate(UseZGC && n->as_Load()->barrier_data() != 0); ++ ++ format %{ "zLoadP $dst, $mem" %} ++ ++ ins_encode %{ ++ Address ref_addr = Address(as_Register($mem$$base), as_Register($mem$$index), Address::no_scale, $mem$$disp); ++ __ block_comment("zLoadP"); ++ __ ld_ptr($dst$$Register, ref_addr); ++ z_load_barrier(_masm, this, ref_addr, $dst$$Register, $tmp$$Register, barrier_data()); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct zCompareAndSwapP(mRegI res, mRegP mem, mRegP oldval, mRegP newval, mRegP tmp, FlagsReg cr) %{ ++ match(Set res (CompareAndSwapP mem (Binary oldval newval))); ++ effect(KILL cr, TEMP_DEF res, TEMP tmp); ++ ++ predicate((UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong) ++ && (((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*) n)->order() != MemNode::seqcst)); ++ ins_cost(3 * MEMORY_REF_COST);//must be equal compareAndSwapP in loongarch_64.ad ++ ++ format %{ "CMPXCHG $res, $mem, $oldval, $newval; as bool; ptr" %} ++ ins_encode %{ ++ __ block_comment("zCompareAndSwapP"); ++ z_compare_and_swap(_masm, this, ++ $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register, ++ $tmp$$Register, false /* weak */, false /* acquire */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct zCompareAndSwapP_acq(mRegI res, mRegP mem, mRegP oldval, mRegP newval, mRegP tmp, FlagsReg cr) %{ ++ match(Set res (CompareAndSwapP mem (Binary oldval newval))); ++ effect(KILL cr, TEMP_DEF res, TEMP tmp); ++ ++ predicate((UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); ++ ins_cost(4 * MEMORY_REF_COST);//must be larger than zCompareAndSwapP ++ ++ format %{ "CMPXCHG acq $res, $mem, $oldval, $newval; as bool; ptr" %} ++ ins_encode %{ ++ __ block_comment("zCompareAndSwapP_acq"); ++ z_compare_and_swap(_masm, this, ++ $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register, ++ $tmp$$Register, false /* weak */, true /* acquire */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct zCompareAndSwapPWeak(mRegI res, mRegP mem, mRegP oldval, mRegP newval, mRegP tmp, FlagsReg cr) %{ ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ++ effect(KILL cr, TEMP_DEF res, TEMP tmp); ++ ++ predicate((UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong) ++ && ((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*) n)->order() != MemNode::seqcst); ++ ++ ins_cost(MEMORY_REF_COST);//must be equal weakCompareAndSwapP in loongarch_64.ad ++ ++ format %{ "weak CMPXCHG $res, $mem, $oldval, $newval; as bool; ptr" %} ++ ins_encode %{ ++ __ block_comment("zCompareAndSwapPWeak"); ++ z_compare_and_swap(_masm, this, ++ $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register, ++ $tmp$$Register, true /* weak */, false /* acquire */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct zCompareAndSwapPWeak_acq(mRegI res, mRegP mem, mRegP oldval, mRegP newval, mRegP tmp, FlagsReg cr) %{ ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ++ effect(KILL cr, TEMP_DEF res, TEMP tmp); ++ ++ predicate((UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); ++ ins_cost(2* MEMORY_REF_COST);//must be equal weakCompareAndSwapP_acq in loongarch_64.ad ++ ++ format %{ "weak CMPXCHG acq $res, $mem, $oldval, $newval; as bool; ptr" %} ++ ins_encode %{ ++ __ block_comment("zCompareAndSwapPWeak_acq"); ++ z_compare_and_swap(_masm, this, ++ $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register, ++ $tmp$$Register, true /* weak */, true /* acquire */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct zCompareAndExchangeP(mRegP res, mRegP mem, mRegP oldval, mRegP newval, mRegP tmp, FlagsReg cr) %{ ++ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ++ effect(TEMP_DEF res, TEMP tmp, KILL cr); ++ ins_cost(2* MEMORY_REF_COST);//must be equal compareAndExchangeP in loongarch_64.ad ++ ++ predicate((UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong) ++ && ( ++ ((CompareAndSwapNode*)n)->order() != MemNode::acquire ++ && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst ++ )); ++ ++ format %{ "CMPXCHG $res, $mem, $oldval, $newval; as ptr; ptr" %} ++ ins_encode %{ ++ __ block_comment("zCompareAndExchangeP"); ++ z_compare_and_exchange(_masm, this, ++ $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register, $tmp$$Register, ++ false /* weak */, false /* acquire */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct zCompareAndExchangeP_acq(mRegP res, mRegP mem, mRegP oldval, mRegP newval, mRegP tmp, FlagsReg cr) %{ ++ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ++ effect(TEMP_DEF res, TEMP tmp, KILL cr); ++ ++ predicate((UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong) ++ && ( ++ ((CompareAndSwapNode*)n)->order() == MemNode::acquire ++ || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst ++ )); ++ ++ format %{ "CMPXCHG acq $res, $mem, $oldval, $newval; as ptr; ptr" %} ++ ins_encode %{ ++ __ block_comment("zCompareAndExchangeP_acq"); ++ z_compare_and_exchange(_masm, this, ++ $res$$Register, $mem$$Register, $oldval$$Register, $newval$$Register, $tmp$$Register, ++ false /* weak */, true /* acquire */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct zGetAndSetP(mRegP mem, mRegP newv, mRegP prev, mRegP tmp, FlagsReg cr) %{ ++ match(Set prev (GetAndSetP mem newv)); ++ effect(TEMP_DEF prev, TEMP tmp, KILL cr); ++ ++ predicate(UseZGC && n->as_LoadStore()->barrier_data() != 0); ++ ++ format %{ "GetAndSetP $prev, $mem, $newv" %} ++ ins_encode %{ ++ Register prev = $prev$$Register; ++ Register newv = $newv$$Register; ++ Register addr = $mem$$Register; ++ __ block_comment("zGetAndSetP"); ++ if (prev == newv || prev == addr) { ++ __ amswap_db_d(AT, newv, addr); ++ __ move(prev, AT); ++ } else { ++ __ amswap_db_d(prev, newv, addr); ++ } ++ z_load_barrier(_masm, this, Address(noreg, 0), prev, $tmp$$Register, barrier_data()); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} +diff --git a/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp b/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp +new file mode 100644 +index 00000000000..363cd6e2092 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP ++#define CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP ++// Size of LoongArch Instructions ++const int BytesPerInstWord = 4; ++ ++const int StackAlignmentInBytes = (2*wordSize); ++ ++// Indicates whether the C calling conventions require that ++// 32-bit integer argument values are properly extended to 64 bits. ++// If set, SharedRuntime::c_calling_convention() must adapt ++// signatures accordingly. ++const bool CCallingConventionRequiresIntsAsLongs = false; ++ ++#define SUPPORTS_NATIVE_CX8 ++ ++// FIXME: LA ++// This makes the games we play when patching difficult, so when we ++// come across an access that needs patching we deoptimize. There are ++// ways we can avoid this, but these would slow down C1-compiled code ++// in the default case. We could revisit this decision if we get any ++// evidence that it's worth doing. ++#define DEOPTIMIZE_WHEN_PATCHING ++ ++#define SUPPORT_RESERVED_STACK_AREA ++ ++#define PREFERRED_METASPACE_ALIGNMENT ++ ++#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false ++ ++#endif // CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/globals_loongarch.hpp b/src/hotspot/cpu/loongarch/globals_loongarch.hpp +new file mode 100644 +index 00000000000..2358ca31596 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/globals_loongarch.hpp +@@ -0,0 +1,106 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP ++#define CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++define_pd_global(bool, ShareVtableStubs, true); ++define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this ++ ++define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks ++define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on x86. ++define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast ++ ++define_pd_global(uintx, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment. ++define_pd_global(intx, CodeEntryAlignment, 16); ++define_pd_global(intx, OptoLoopAlignment, 16); ++define_pd_global(intx, InlineFrequencyCount, 100); ++define_pd_global(intx, InlineSmallCode, 2000); ++ ++#define DEFAULT_STACK_YELLOW_PAGES (2) ++#define DEFAULT_STACK_RED_PAGES (1) ++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+4)) ++#define DEFAULT_STACK_RESERVED_PAGES (1) ++ ++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES ++#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES ++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES ++#define MIN_STACK_RESERVED_PAGES (0) ++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); ++ ++define_pd_global(intx, StackYellowPages, 2); ++define_pd_global(intx, StackRedPages, 1); ++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); ++ ++define_pd_global(bool, RewriteBytecodes, true); ++define_pd_global(bool, RewriteFrequentPairs, true); ++ ++define_pd_global(uintx, TypeProfileLevel, 111); ++ ++define_pd_global(bool, CompactStrings, true); ++ ++define_pd_global(bool, PreserveFramePointer, false); ++ ++define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); ++ ++// Only c2 cares about this at the moment ++define_pd_global(intx, AllocatePrefetchStyle, 2); ++define_pd_global(intx, AllocatePrefetchDistance, -1); ++ ++#define ARCH_FLAGS(develop, \ ++ product, \ ++ notproduct, \ ++ range, \ ++ constraint) \ ++ \ ++ product(bool, UseCodeCacheAllocOpt, true, \ ++ "Allocate code cache within 32-bit memory address space") \ ++ \ ++ product(bool, UseLSX, false, \ ++ "Use LSX 128-bit vector instructions") \ ++ \ ++ product(bool, UseLASX, false, \ ++ "Use LASX 256-bit vector instructions") \ ++ \ ++ product(bool, UseBarriersForVolatile, false, \ ++ "Use memory barriers to implement volatile accesses") \ ++ \ ++ product(bool, UseCRC32, false, \ ++ "Use CRC32 instructions for CRC32 computation") \ ++ \ ++ product(bool, UseActiveCoresMP, false, \ ++ "Eliminate barriers for single active cpu") \ ++ \ ++ product(bool, TraceTraps, false, "Trace all traps the signal handler") ++ ++// end of ARCH_FLAGS ++ ++#endif // CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp b/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp +new file mode 100644 +index 00000000000..a8f08bbf422 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp +@@ -0,0 +1,100 @@ ++/* ++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/icBuffer.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/bytecodes.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/oop.inline.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++int InlineCacheBuffer::ic_stub_code_size() { ++ return NativeMovConstReg::instruction_size + ++ NativeGeneralJump::instruction_size + ++ 1; ++ // so that code_end can be set in CodeBuffer ++ // 64bit 15 = 6 + 8 bytes + 1 byte ++ // 32bit 7 = 2 + 4 bytes + 1 byte ++} ++ ++ ++// we use T1 as cached oop(klass) now. this is the target of virtual call, ++// when reach here, the receiver in T0 ++// refer to shareRuntime_loongarch.cpp,gen_i2c2i_adapters ++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, ++ address entry_point) { ++ ResourceMark rm; ++ CodeBuffer code(code_begin, ic_stub_code_size()); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ // note: even though the code contains an embedded oop, we do not need reloc info ++ // because ++ // (1) the oop is old (i.e., doesn't matter for scavenges) ++ // (2) these ICStubs are removed *before* a GC happens, so the roots disappear ++ // assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop"); ++#define __ masm-> ++ __ patchable_li52(T1, (long)cached_value); ++ // TODO: confirm reloc ++ __ jmp(entry_point, relocInfo::runtime_call_type); ++ __ flush(); ++#undef __ ++} ++ ++ ++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object ++ NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); ++ return jump->jump_destination(); ++} ++ ++ ++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { ++ // creation also verifies the object ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); ++ // Verifies the jump ++ NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); ++ void* o= (void*)move->data(); ++ return o; ++} +diff --git a/src/hotspot/cpu/loongarch/icache_loongarch.cpp b/src/hotspot/cpu/loongarch/icache_loongarch.cpp +new file mode 100644 +index 00000000000..d577e41f59c +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/icache_loongarch.cpp +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "runtime/icache.hpp" ++ ++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) ++{ ++#define __ _masm-> ++ StubCodeMark mark(this, "ICache", "flush_icache_stub"); ++ address start = __ pc(); ++ ++ __ ibar(0); ++ __ ori(V0, RA2, 0); ++ __ jr(RA); ++ ++ *flush_icache_stub = (ICache::flush_icache_stub_t)start; ++#undef __ ++} +diff --git a/src/hotspot/cpu/loongarch/icache_loongarch.hpp b/src/hotspot/cpu/loongarch/icache_loongarch.hpp +new file mode 100644 +index 00000000000..3a180549fc6 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/icache_loongarch.hpp +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_ICACHE_LOONGARCH_HPP ++#define CPU_LOONGARCH_ICACHE_LOONGARCH_HPP ++ ++// Interface for updating the instruction cache. Whenever the VM modifies ++// code, part of the processor instruction cache potentially has to be flushed. ++ ++class ICache : public AbstractICache { ++ public: ++ enum { ++ stub_size = 3 * BytesPerInstWord, // Size of the icache flush stub in bytes ++ line_size = 32, // flush instruction affects a dword ++ log2_line_size = 5 // log2(line_size) ++ }; ++}; ++ ++#endif // CPU_LOONGARCH_ICACHE_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp b/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp +new file mode 100644 +index 00000000000..7dea5deb183 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp +@@ -0,0 +1,270 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP ++#define CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP ++ ++#include "asm/assembler.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "interpreter/invocationCounter.hpp" ++#include "runtime/frame.hpp" ++ ++// This file specializes the assember with interpreter-specific macros ++ ++typedef ByteSize (*OffsetFunction)(uint); ++ ++class InterpreterMacroAssembler: public MacroAssembler { ++ private: ++ ++ Register _locals_register; // register that contains the pointer to the locals ++ Register _bcp_register; // register that contains the bcp ++ ++ protected: ++ // Interpreter specific version of call_VM_base ++ virtual void call_VM_leaf_base(address entry_point, ++ int number_of_arguments); ++ ++ virtual void call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions); ++ ++ // base routine for all dispatches ++ void dispatch_base(TosState state, address* table, bool verifyoop = true, bool generate_poll = false); ++ ++ public: ++ void jump_to_entry(address entry); ++ // narrow int return value ++ void narrow(Register result); ++ ++ InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {} ++ ++ void get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset); ++ void get_4_byte_integer_at_bcp(Register reg, int offset); ++ ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); ++ ++ void load_earlyret_value(TosState state); ++ ++ // Interpreter-specific registers ++ void save_bcp() { ++ st_d(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize); ++ } ++ ++ void restore_bcp() { ++ ld_d(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize); ++ } ++ ++ void restore_locals() { ++ ld_d(LVP, FP, frame::interpreter_frame_locals_offset * wordSize); ++ } ++ ++ // Helpers for runtime call arguments/results ++ void get_method(Register reg) { ++ ld_d(reg, FP, frame::interpreter_frame_method_offset * wordSize); ++ } ++ ++ void get_const(Register reg){ ++ get_method(reg); ++ ld_d(reg, reg, in_bytes(Method::const_offset())); ++ } ++ ++ void get_constant_pool(Register reg) { ++ get_const(reg); ++ ld_d(reg, reg, in_bytes(ConstMethod::constants_offset())); ++ } ++ ++ void get_constant_pool_cache(Register reg) { ++ get_constant_pool(reg); ++ ld_d(reg, reg, ConstantPool::cache_offset_in_bytes()); ++ } ++ ++ void get_cpool_and_tags(Register cpool, Register tags) { ++ get_constant_pool(cpool); ++ ld_d(tags, cpool, ConstantPool::tags_offset_in_bytes()); ++ } ++ ++ void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); ++ void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_method_counters(Register method, Register mcs, Label& skip); ++ ++ // load cpool->resolved_references(index); ++ void load_resolved_reference_at_index(Register result, Register index, Register tmp); ++ ++ // load cpool->resolved_klass_at(index) ++ void load_resolved_klass_at_index(Register cpool, // the constant pool (corrupted on return) ++ Register index, // the constant pool index (corrupted on return) ++ Register klass); // contains the Klass on return ++ ++ void load_resolved_method_at_index(int byte_no, ++ Register method, ++ Register cache, ++ Register index); ++ ++ void pop_ptr( Register r = FSR); ++ void pop_i( Register r = FSR); ++ void pop_l( Register r = FSR); ++ void pop_f(FloatRegister r = FSF); ++ void pop_d(FloatRegister r = FSF); ++ ++ void push_ptr( Register r = FSR); ++ void push_i( Register r = FSR); ++ void push_l( Register r = FSR); ++ void push_f(FloatRegister r = FSF); ++ void push_d(FloatRegister r = FSF); ++ ++ void pop(Register r ) { ((MacroAssembler*)this)->pop(r); } ++ ++ void push(Register r ) { ((MacroAssembler*)this)->push(r); } ++ ++ void pop(TosState state); // transition vtos -> state ++ void push(TosState state); // transition state -> vtos ++ ++ void empty_expression_stack() { ++ ld_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ // NULL last_sp until next java call ++ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ } ++ ++ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls ++ void load_ptr(int n, Register val); ++ void store_ptr(int n, Register val); ++ ++ // Generate a subtype check: branch to ok_is_subtype if sub_klass is ++ // a subtype of super_klass. ++ //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); ++ void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype ); ++ ++ // Dispatching ++ void dispatch_prolog(TosState state, int step = 0); ++ void dispatch_epilog(TosState state, int step = 0); ++ void dispatch_only(TosState state, bool generate_poll = false); ++ void dispatch_only_normal(TosState state); ++ void dispatch_only_noverify(TosState state); ++ void dispatch_next(TosState state, int step = 0, bool generate_poll = false); ++ void dispatch_via (TosState state, address* table); ++ ++ // jump to an invoked target ++ void prepare_to_jump_from_interpreted(); ++ void jump_from_interpreted(Register method, Register temp); ++ ++ ++ // Returning from interpreted functions ++ // ++ // Removes the current activation (incl. unlocking of monitors) ++ // and sets up the return address. This code is also used for ++ // exception unwindwing. In that case, we do not want to throw ++ // IllegalMonitorStateExceptions, since that might get us into an ++ // infinite rethrow exception loop. ++ // Additionally this code is used for popFrame and earlyReturn. ++ // In popFrame case we want to skip throwing an exception, ++ // installing an exception, and notifying jvmdi. ++ // In earlyReturn case we only want to skip throwing an exception ++ // and installing an exception. ++ void remove_activation(TosState state, Register ret_addr, ++ bool throw_monitor_exception = true, ++ bool install_monitor_exception = true, ++ bool notify_jvmdi = true); ++ ++ // Object locking ++ void lock_object (Register lock_reg); ++ void unlock_object(Register lock_reg); ++ ++ // Interpreter profiling operations ++ void set_method_data_pointer_for_bcp(); ++ void test_method_data_pointer(Register mdp, Label& zero_continue); ++ void verify_method_data_pointer(); ++ ++ void set_mdp_data_at(Register mdp_in, int constant, Register value); ++ void increment_mdp_data_at(Address data, bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, int constant, ++ bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, Register reg, int constant, ++ bool decrement = false); ++ void increment_mask_and_jump(Address counter_addr, ++ int increment, Address mask, ++ Register scratch, bool preloaded, ++ Condition cond, Label* where); ++ void set_mdp_flag_at(Register mdp_in, int flag_constant); ++ void test_mdp_data_at(Register mdp_in, int offset, Register value, ++ Register test_value_out, ++ Label& not_equal_continue); ++ ++ void record_klass_in_profile(Register receiver, Register mdp, ++ Register reg2, bool is_virtual_call); ++ void record_klass_in_profile_helper(Register receiver, Register mdp, ++ Register reg2, int start_row, ++ Label& done, bool is_virtual_call); ++ ++ void record_item_in_profile_helper(Register item, Register mdp, ++ Register reg2, int start_row, Label& done, int total_rows, ++ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, ++ int non_profiled_offset); ++ void update_mdp_by_offset(Register mdp_in, int offset_of_offset); ++ void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); ++ void update_mdp_by_constant(Register mdp_in, int constant); ++ void update_mdp_for_ret(Register return_bci); ++ ++ void profile_taken_branch(Register mdp, Register bumped_count); ++ void profile_not_taken_branch(Register mdp); ++ void profile_call(Register mdp); ++ void profile_final_call(Register mdp); ++ void profile_virtual_call(Register receiver, Register mdp, ++ Register scratch2, ++ bool receiver_can_be_null = false); ++ void profile_ret(Register return_bci, Register mdp); ++ void profile_null_seen(Register mdp); ++ void profile_typecheck(Register mdp, Register klass, Register scratch); ++ void profile_typecheck_failed(Register mdp); ++ void profile_switch_default(Register mdp); ++ void profile_switch_case(Register index_in_scratch, Register mdp, ++ Register scratch2); ++ ++ // Debugging ++ // only if +VerifyOops && state == atos ++ void verify_oop(Register reg, TosState state = atos); ++ // only if +VerifyFPU && (state == ftos || state == dtos) ++ void verify_FPU(int stack_depth, TosState state = ftos); ++ ++ void profile_obj_type(Register obj, const Address& mdo_addr); ++ void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); ++ void profile_return_type(Register mdp, Register ret, Register tmp); ++ void profile_parameters_type(Register mdp, Register tmp1, Register tmp2); ++ ++ typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; ++ ++ // support for jvmti/dtrace ++ void notify_method_entry(); ++ void notify_method_exit(TosState state, NotifyMethodExitMode mode); ++}; ++ ++#endif // CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP +diff --git a/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp b/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp +new file mode 100644 +index 00000000000..fa65d10765c +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp +@@ -0,0 +1,2040 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interp_masm_loongarch.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/markWord.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/thread.inline.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++// Implementation of InterpreterMacroAssembler ++ ++void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) { ++ if (UseUnalignedAccesses) { ++ ld_hu(reg, BCP, offset); ++ } else { ++ ld_bu(reg, BCP, offset); ++ ld_bu(tmp, BCP, offset + 1); ++ bstrins_d(reg, tmp, 15, 8); ++ } ++} ++ ++void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, int offset) { ++ if (UseUnalignedAccesses) { ++ ld_wu(reg, BCP, offset); ++ } else { ++ ldr_w(reg, BCP, offset); ++ ldl_w(reg, BCP, offset + 3); ++ lu32i_d(reg, 0); ++ } ++} ++ ++void InterpreterMacroAssembler::jump_to_entry(address entry) { ++ assert(entry, "Entry must have been generated by now"); ++ jmp(entry); ++} ++ ++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, ++ int number_of_arguments) { ++ // interpreter specific ++ // ++ // Note: No need to save/restore bcp & locals pointer ++ // since these are callee saved registers and no blocking/ ++ // GC can happen in leaf calls. ++ // Further Note: DO NOT save/restore bcp/locals. If a caller has ++ // already saved them so that it can use BCP/LVP as temporaries ++ // then a save/restore here will DESTROY the copy the caller ++ // saved! There used to be a save_bcp() that only happened in ++ // the ASSERT path (no restore_bcp). Which caused bizarre failures ++ // when jvm built with ASSERTs. ++#ifdef ASSERT ++ save_bcp(); ++ { ++ Label L; ++ ld_d(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize); ++ beq(AT,R0,L); ++ stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL"); ++ bind(L); ++ } ++#endif ++ // super call ++ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); ++ // interpreter specific ++ // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals ++ // but since they may not have been saved (and we don't want to ++ // save them here (see note above) the assert is invalid. ++} ++ ++void InterpreterMacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ // interpreter specific ++ // ++ // Note: Could avoid restoring locals ptr (callee saved) - however doesn't ++ // really make a difference for these runtime calls, since they are ++ // slow anyway. Btw., bcp must be saved/restored since it may change ++ // due to GC. ++ assert(java_thread == noreg , "not expecting a precomputed java thread"); ++ save_bcp(); ++#ifdef ASSERT ++ { ++ Label L; ++ ld_d(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ beq(AT, R0, L); ++ stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL"); ++ bind(L); ++ } ++#endif /* ASSERT */ ++ // super call ++ MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp, ++ entry_point, number_of_arguments, ++ check_exceptions); ++ // interpreter specific ++ restore_bcp(); ++ restore_locals(); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { ++ if (JvmtiExport::can_pop_frame()) { ++ Label L; ++ // Initiate popframe handling only if it is not already being ++ // processed. If the flag has the popframe_processing bit set, it ++ // means that this code is called *during* popframe handling - we ++ // don't want to reenter. ++ // This method is only called just after the call into the vm in ++ // call_VM_base, so the arg registers are available. ++ // Not clear if any other register is available, so load AT twice ++ assert(AT != java_thread, "check"); ++ ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); ++ andi(AT, AT, JavaThread::popframe_pending_bit); ++ beq(AT, R0, L); ++ ++ ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); ++ andi(AT, AT, JavaThread::popframe_processing_bit); ++ bne(AT, R0, L); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); ++ jr(V0); ++ bind(L); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::load_earlyret_value(TosState state) { ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ld_ptr(T8, thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ const Address tos_addr (T8, in_bytes(JvmtiThreadState::earlyret_tos_offset())); ++ const Address oop_addr (T8, in_bytes(JvmtiThreadState::earlyret_oop_offset())); ++ const Address val_addr (T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ //V0, oop_addr,V1,val_addr ++ switch (state) { ++ case atos: ++ ld_ptr(V0, oop_addr); ++ st_ptr(R0, oop_addr); ++ verify_oop(V0, state); ++ break; ++ case ltos: ++ ld_ptr(V0, val_addr); // fall through ++ break; ++ case btos: // fall through ++ case ztos: // fall through ++ case ctos: // fall through ++ case stos: // fall through ++ case itos: ++ ld_w(V0, val_addr); ++ break; ++ case ftos: ++ fld_s(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ break; ++ case dtos: ++ fld_d(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ break; ++ case vtos: /* nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ // Clean up tos value in the thread object ++ li(AT, (int)ilgl); ++ st_w(AT, tos_addr); ++ st_w(R0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { ++ if (JvmtiExport::can_force_early_return()) { ++ assert(java_thread != AT, "check"); ++ ++ Label L; ++ ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ beqz(AT, L); ++ ++ // Initiate earlyret handling only if it is not already being processed. ++ // If the flag has the earlyret_processing bit set, it means that this code ++ // is called *during* earlyret handling - we don't want to reenter. ++ ld_w(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset())); ++ addi_w(AT, AT, -JvmtiThreadState::earlyret_pending); ++ bnez(AT, L); ++ ++ // Call Interpreter::remove_activation_early_entry() to get the address of the ++ // same-named entrypoint in the generated interpreter code. ++ ld_ptr(A0, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ ld_w(A0, A0, in_bytes(JvmtiThreadState::earlyret_tos_offset())); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0); ++ jr(A0); ++ bind(L); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, ++ int bcp_offset) { ++ assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); ++ ld_bu(AT, BCP, bcp_offset); ++ ld_bu(reg, BCP, bcp_offset + 1); ++ bstrins_w(reg, AT, 15, 8); ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ if (index_size == sizeof(u2)) { ++ get_2_byte_integer_at_bcp(index, AT, bcp_offset); ++ } else if (index_size == sizeof(u4)) { ++ get_4_byte_integer_at_bcp(index, bcp_offset); ++ // Check if the secondary index definition is still ~x, otherwise ++ // we have to change the following assembler code to calculate the ++ // plain index. ++ assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); ++ nor(index, index, R0); ++ slli_w(index, index, 0); ++ } else if (index_size == sizeof(u1)) { ++ ld_bu(index, BCP, bcp_offset); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, ++ Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert_different_registers(cache, index); ++ get_cache_index_at_bcp(index, bcp_offset, index_size); ++ ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line"); ++ shl(index, 2); ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, ++ Register index, ++ Register bytecode, ++ int byte_no, ++ int bcp_offset, ++ size_t index_size) { ++ get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); ++ // We use a 32-bit load here since the layout of 64-bit words on ++ // little-endian machines allow us that. ++ alsl_d(AT, index, cache, Address::times_ptr - 1); ++ ld_w(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); ++ if(os::is_MP()) { ++ membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore)); ++ } ++ ++ const int shift_count = (1 + byte_no) * BitsPerByte; ++ assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) || ++ (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift), ++ "correct shift count"); ++ srli_d(bytecode, bytecode, shift_count); ++ assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask"); ++ li(AT, ConstantPoolCacheEntry::bytecode_1_mask); ++ andr(bytecode, bytecode, AT); ++} ++ ++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, ++ Register tmp, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ assert(cache != tmp, "must use different register"); ++ get_cache_index_at_bcp(tmp, bcp_offset, index_size); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ // convert from field index to ConstantPoolCacheEntry index ++ // and from word offset to byte offset ++ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); ++ shl(tmp, 2 + LogBytesPerWord); ++ ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize); ++ // skip past the header ++ addi_d(cache, cache, in_bytes(ConstantPoolCache::base_offset())); ++ add_d(cache, cache, tmp); ++} ++ ++void InterpreterMacroAssembler::get_method_counters(Register method, ++ Register mcs, Label& skip) { ++ Label has_counters; ++ ld_d(mcs, method, in_bytes(Method::method_counters_offset())); ++ bne(mcs, R0, has_counters); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::build_method_counters), method); ++ ld_d(mcs, method, in_bytes(Method::method_counters_offset())); ++ beq(mcs, R0, skip); // No MethodCounters allocated, OutOfMemory ++ bind(has_counters); ++} ++ ++// Load object from cpool->resolved_references(index) ++void InterpreterMacroAssembler::load_resolved_reference_at_index( ++ Register result, Register index, Register tmp) { ++ assert_different_registers(result, index); ++ // convert from field index to resolved_references() index and from ++ // word index to byte offset. Since this is a java object, it can be compressed ++ shl(index, LogBytesPerHeapOop); ++ ++ get_constant_pool(result); ++ // load pointer for resolved_references[] objArray ++ ld_d(result, result, ConstantPool::cache_offset_in_bytes()); ++ ld_d(result, result, ConstantPoolCache::resolved_references_offset_in_bytes()); ++ resolve_oop_handle(result, tmp); ++ // Add in the index ++ add_d(result, result, index); ++ load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), tmp); ++} ++ ++// load cpool->resolved_klass_at(index) ++void InterpreterMacroAssembler::load_resolved_klass_at_index(Register cpool, ++ Register index, Register klass) { ++ alsl_d(AT, index, cpool, Address::times_ptr - 1); ++ ld_h(index, AT, sizeof(ConstantPool)); ++ Register resolved_klasses = cpool; ++ ld_ptr(resolved_klasses, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); ++ alsl_d(AT, index, resolved_klasses, Address::times_ptr - 1); ++ ld_d(klass, AT, Array::base_offset_in_bytes()); ++} ++ ++void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no, ++ Register method, ++ Register cache, ++ Register index) { ++ const int method_offset = in_bytes( ++ ConstantPoolCache::base_offset() + ++ ((byte_no == TemplateTable::f2_byte) ++ ? ConstantPoolCacheEntry::f2_offset() ++ : ConstantPoolCacheEntry::f1_offset())); ++ ++ ld_d(method, Address(cache, index, Address::times_ptr, method_offset)); // get f1 Method* ++} ++ ++// Resets LVP to locals. Register sub_klass cannot be any of the above. ++void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) { ++ ++ assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" ); ++ assert( Rsub_klass != T1, "T1 holds 2ndary super array length" ); ++ assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" ); ++ // Profile the not-null value's klass. ++ // Here T4 and T1 are used as temporary registers. ++ profile_typecheck(T4, Rsub_klass, T1); // blows T4, reloads T1 ++ ++ // Do the check. ++ check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1 ++ ++ // Profile the failure of the check. ++ profile_typecheck_failed(T4); // blows T4 ++ ++} ++ ++ ++ ++// Java Expression Stack ++ ++void InterpreterMacroAssembler::pop_ptr(Register r) { ++ ld_d(r, SP, 0); ++ addi_d(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_i(Register r) { ++ ld_w(r, SP, 0); ++ addi_d(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_l(Register r) { ++ ld_d(r, SP, 0); ++ addi_d(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_f(FloatRegister r) { ++ fld_s(r, SP, 0); ++ addi_d(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_d(FloatRegister r) { ++ fld_d(r, SP, 0); ++ addi_d(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_ptr(Register r) { ++ addi_d(SP, SP, - Interpreter::stackElementSize); ++ st_d(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_i(Register r) { ++ // For compatibility reason, don't change to sw. ++ addi_d(SP, SP, - Interpreter::stackElementSize); ++ st_d(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_l(Register r) { ++ addi_d(SP, SP, -2 * Interpreter::stackElementSize); ++ st_d(r, SP, 0); ++ st_d(R0, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_f(FloatRegister r) { ++ addi_d(SP, SP, - Interpreter::stackElementSize); ++ fst_s(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_d(FloatRegister r) { ++ addi_d(SP, SP, -2 * Interpreter::stackElementSize); ++ fst_d(r, SP, 0); ++ st_d(R0, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop(TosState state) { ++ switch (state) { ++ case atos: pop_ptr(); break; ++ case btos: ++ case ztos: ++ case ctos: ++ case stos: ++ case itos: pop_i(); break; ++ case ltos: pop_l(); break; ++ case ftos: pop_f(); break; ++ case dtos: pop_d(); break; ++ case vtos: /* nothing to do */ break; ++ default: ShouldNotReachHere(); ++ } ++ verify_oop(FSR, state); ++} ++ ++//FSR=V0,SSR=V1 ++void InterpreterMacroAssembler::push(TosState state) { ++ verify_oop(FSR, state); ++ switch (state) { ++ case atos: push_ptr(); break; ++ case btos: ++ case ztos: ++ case ctos: ++ case stos: ++ case itos: push_i(); break; ++ case ltos: push_l(); break; ++ case ftos: push_f(); break; ++ case dtos: push_d(); break; ++ case vtos: /* nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++void InterpreterMacroAssembler::load_ptr(int n, Register val) { ++ ld_d(val, SP, Interpreter::expr_offset_in_bytes(n)); ++} ++ ++void InterpreterMacroAssembler::store_ptr(int n, Register val) { ++ st_d(val, SP, Interpreter::expr_offset_in_bytes(n)); ++} ++ ++// Jump to from_interpreted entry of a call unless single stepping is possible ++// in this thread in which case we must call the i2i entry ++void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) { ++ // record last_sp ++ move(Rsender, SP); ++ st_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++#ifndef OPT_THREAD ++ Register thread = temp; ++ get_thread(temp); ++#else ++ Register thread = TREG; ++#endif ++ // interp_only is an int, on little endian it is sufficient to test the byte only ++ // Is a cmpl faster? ++ ld_w(AT, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(AT, R0, run_compiled_code); ++ ld_d(AT, method, in_bytes(Method::interpreter_entry_offset())); ++ jr(AT); ++ bind(run_compiled_code); ++ } ++ ++ ld_d(AT, method, in_bytes(Method::from_interpreted_offset())); ++ jr(AT); ++} ++ ++ ++// The following two routines provide a hook so that an implementation ++// can schedule the dispatch in two parts. LoongArch64 does not do this. ++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { ++ // Nothing LoongArch64 specific to be done here ++} ++ ++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { ++ dispatch_next(state, step); ++} ++ ++// assume the next bytecode in T8. ++void InterpreterMacroAssembler::dispatch_base(TosState state, ++ address* table, ++ bool verifyoop, ++ bool generate_poll) { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ ++ if (VerifyActivationFrameSize) { ++ Label L; ++ ++ sub_d(T2, FP, SP); ++ int min_frame_size = (frame::sender_sp_offset - ++ frame::interpreter_frame_initial_sp_offset) * wordSize; ++ addi_d(T2, T2, -min_frame_size); ++ bge(T2, R0, L); ++ stop("broken stack frame"); ++ bind(L); ++ } ++ // FIXME: I do not know which register should pass to verify_oop ++ if (verifyoop) verify_oop(FSR, state); ++ ++ Label safepoint; ++ address* const safepoint_table = Interpreter::safept_table(state); ++ bool needs_thread_local_poll = generate_poll && table != safepoint_table; ++ ++ if (needs_thread_local_poll) { ++ NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); ++ ld_d(T3, thread, in_bytes(JavaThread::polling_word_offset())); ++ andi(T3, T3, SafepointMechanism::poll_bit()); ++ bne(T3, R0, safepoint); ++ } ++ ++ if((long)table >= (long)Interpreter::dispatch_table(btos) && ++ (long)table <= (long)Interpreter::dispatch_table(vtos)) { ++ int table_size = (long)Interpreter::dispatch_table(itos) - ++ (long)Interpreter::dispatch_table(stos); ++ int table_offset = ((int)state - (int)itos) * table_size; ++ ++ // S8 points to the starting address of Interpreter::dispatch_table(itos). ++ // See StubGenerator::generate_call_stub(address& return_address) for the initialization of S8. ++ if (table_offset != 0) { ++ if (is_simm(table_offset, 12)) { ++ alsl_d(T3, Rnext, S8, LogBytesPerWord - 1); ++ ld_d(T3, T3, table_offset); ++ } else { ++ li(T2, table_offset); ++ alsl_d(T3, Rnext, S8, LogBytesPerWord - 1); ++ ldx_d(T3, T2, T3); ++ } ++ } else { ++ alsl_d(T2, Rnext, S8, LogBytesPerWord - 1); ++ ld_d(T3, T2, 0); ++ } ++ } else { ++ li(T3, (long)table); ++ alsl_d(T2, Rnext, T3, LogBytesPerWord - 1); ++ ld_d(T3, T2, 0); ++ } ++ jr(T3); ++ ++ if (needs_thread_local_poll) { ++ bind(safepoint); ++ li(T3, (long)safepoint_table); ++ alsl_d(T2, Rnext, T3, LogBytesPerWord - 1); ++ ld_d(T3, T2, 0); ++ jr(T3); ++ } ++} ++ ++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) { ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { ++ dispatch_base(state, Interpreter::normal_table(state)); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { ++ dispatch_base(state, Interpreter::normal_table(state), false); ++} ++ ++ ++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) { ++ // load next bytecode ++ ld_bu(Rnext, BCP, step); ++ increment(BCP, step); ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); ++} ++ ++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { ++ // load current bytecode ++ ld_bu(Rnext, BCP, 0); ++ dispatch_base(state, table); ++} ++ ++// remove activation ++// ++// Apply stack watermark barrier. ++// Unlock the receiver if this is a synchronized method. ++// Unlock any Java monitors from syncronized blocks. ++// Remove the activation from the stack. ++// ++// If there are locked Java monitors ++// If throw_monitor_exception ++// throws IllegalMonitorStateException ++// Else if install_monitor_exception ++// installs IllegalMonitorStateException ++// Else ++// no error processing ++// used registers : T1, T2, T3, T8 ++// T1 : thread, method access flags ++// T2 : monitor entry pointer ++// T3 : method, monitor top ++// T8 : unlock flag ++void InterpreterMacroAssembler::remove_activation( ++ TosState state, ++ Register ret_addr, ++ bool throw_monitor_exception, ++ bool install_monitor_exception, ++ bool notify_jvmdi) { ++ // Note: Registers V0, V1 and F0, F1 may be in use for the result ++ // check if synchronized method ++ Label unlocked, unlock, no_unlock; ++ ++#ifndef OPT_THREAD ++ Register thread = T1; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ++ // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, ++ // that would normally not be safe to use. Such bad returns into unsafe territory of ++ // the stack, will call InterpreterRuntime::at_unwind. ++ Label slow_path; ++ Label fast_path; ++ safepoint_poll(slow_path, thread, true /* at_return */, false /* acquire */, false /* in_nmethod */); ++ b(fast_path); ++ bind(slow_path); ++ push(state); ++ Label L; ++ address the_pc = pc(); ++ bind(L); ++ set_last_Java_frame(thread, SP, FP, L); ++ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), thread); ++ reset_last_Java_frame(true); ++ pop(state); ++ bind(fast_path); ++ ++ // get the value of _do_not_unlock_if_synchronized into T8 ++ ld_b(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ // reset the flag ++ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ // get method access flags ++ ld_d(T3, FP, frame::interpreter_frame_method_offset * wordSize); ++ ld_w(T1, T3, in_bytes(Method::access_flags_offset())); ++ andi(T1, T1, JVM_ACC_SYNCHRONIZED); ++ beq(T1, R0, unlocked); ++ ++ // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set. ++ bne(T8, R0, no_unlock); ++ // unlock monitor ++ push(state); // save result ++ ++ // BasicObjectLock will be first in list, since this is a ++ // synchronized method. However, need to check that the object has ++ // not been unlocked by an explicit monitorexit bytecode. ++ addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize ++ - (int)sizeof(BasicObjectLock)); ++ // address of first monitor ++ ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ bne(T1, R0, unlock); ++ pop(state); ++ if (throw_monitor_exception) { ++ // Entry already unlocked, need to throw exception ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Monitor already unlocked during a stack unroll. If requested, ++ // install an illegal_monitor_state_exception. Continue with ++ // stack unrolling. ++ if (install_monitor_exception) { ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ } ++ b(unlocked); ++ } ++ ++ bind(unlock); ++ unlock_object(c_rarg0); ++ pop(state); ++ ++ // Check that for block-structured locking (i.e., that all locked ++ // objects has been unlocked) ++ bind(unlocked); ++ ++ // V0, V1: Might contain return value ++ ++ // Check that all monitors are unlocked ++ { ++ Label loop, exception, entry, restart; ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ const Address monitor_block_top(FP, ++ frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ ++ bind(restart); ++ // points to current entry, starting with top-most entry ++ ld_d(c_rarg0, monitor_block_top); ++ // points to word before bottom of monitor block ++ addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ b(entry); ++ ++ // Entry already locked, need to throw exception ++ bind(exception); ++ ++ if (throw_monitor_exception) { ++ // Throw exception ++ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Stack unrolling. Unlock object and install illegal_monitor_exception ++ // Unlock does not block, so don't have to worry about the frame ++ // We don't have to preserve c_rarg0, since we are going to ++ // throw an exception ++ ++ push(state); ++ unlock_object(c_rarg0); ++ pop(state); ++ ++ if (install_monitor_exception) { ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ } ++ ++ b(restart); ++ } ++ ++ bind(loop); ++ ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ bne(T1, R0, exception);// check if current entry is used ++ ++ addi_d(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry ++ bind(entry); ++ bne(c_rarg0, T3, loop); // check if bottom reached ++ } ++ ++ bind(no_unlock); ++ ++ // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame) ++ if (notify_jvmdi) { ++ notify_method_exit(state, NotifyJVMTI); // preserve TOSCA ++ } else { ++ notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA ++ } ++ ++ // remove activation ++ ld_d(TSR, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ if (StackReservedPages > 0) { ++ // testing if reserved zone needs to be re-enabled ++ Label no_reserved_zone_enabling; ++ ++ ld_d(AT, Address(thread, JavaThread::reserved_stack_activation_offset())); ++ sub_d(AT, TSR, AT); ++ bge(R0, AT, no_reserved_zone_enabling); ++ ++ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_delayed_StackOverflowError)); ++ should_not_reach_here(); ++ ++ bind(no_reserved_zone_enabling); ++ } ++ ld_d(ret_addr, FP, frame::return_addr_offset * wordSize); ++ ld_d(FP, FP, frame::link_offset * wordSize); ++ move(SP, TSR); // set sp to sender sp ++} ++ ++// Lock object ++// ++// Args: ++// c_rarg0: BasicObjectLock to be used for locking ++// ++// Kills: ++// T1 ++// T2 ++void InterpreterMacroAssembler::lock_object(Register lock_reg) { ++ assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); ++ ++ if (UseHeavyMonitors) { ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); ++ } else { ++ Label done, slow_case; ++ const Register tmp_reg = T2; ++ const Register scr_reg = T1; ++ const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); ++ const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); ++ const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); ++ ++ // Load object pointer into scr_reg ++ ld_d(scr_reg, lock_reg, obj_offset); ++ ++ if (DiagnoseSyncOnValueBasedClasses != 0) { ++ load_klass(tmp_reg, scr_reg); ++ ld_w(tmp_reg, Address(tmp_reg, Klass::access_flags_offset())); ++ li(AT, JVM_ACC_IS_VALUE_BASED_CLASS); ++ andr(AT, AT, tmp_reg); ++ bnez(AT, slow_case); ++ } ++ ++ if (UseBiasedLocking) { ++ // Note: we use noreg for the temporary register since it's hard ++ // to come up with a free register on all incoming code paths ++ biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case); ++ } ++ ++ // Load (object->mark() | 1) into tmp_reg ++ ld_d(AT, scr_reg, 0); ++ ori(tmp_reg, AT, 1); ++ ++ // Save (object->mark() | 1) into BasicLock's displaced header ++ st_d(tmp_reg, lock_reg, mark_offset); ++ ++ assert(lock_offset == 0, "displached header must be first word in BasicObjectLock"); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label succ, fail; ++ cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail); ++ bind(succ); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); ++ b(done); ++ bind(fail); ++ } else { ++ cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done); ++ } ++ ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) SP <= mark < SP + os::pagesize() ++ // ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in tmp_reg as the result of cmpxchg ++ sub_d(tmp_reg, tmp_reg, SP); ++ li(AT, 7 - os::vm_page_size()); ++ andr(tmp_reg, tmp_reg, AT); ++ // Save the test result, for recursive case, the result is zero ++ st_d(tmp_reg, lock_reg, mark_offset); ++ if (PrintBiasedLockingStatistics) { ++ bnez(tmp_reg, slow_case); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); ++ } ++ beqz(tmp_reg, done); ++ ++ bind(slow_case); ++ // Call the runtime routine for slow case ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); ++ ++ bind(done); ++ } ++} ++ ++// Unlocks an object. Used in monitorexit bytecode and ++// remove_activation. Throws an IllegalMonitorException if object is ++// not locked by current thread. ++// ++// Args: ++// c_rarg0: BasicObjectLock for lock ++// ++// Kills: ++// T1 ++// T2 ++// T3 ++// Throw an IllegalMonitorException if object is not locked by current thread ++void InterpreterMacroAssembler::unlock_object(Register lock_reg) { ++ assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); ++ ++ if (UseHeavyMonitors) { ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); ++ } else { ++ Label done; ++ const Register tmp_reg = T1; ++ const Register scr_reg = T2; ++ const Register hdr_reg = T3; ++ ++ save_bcp(); // Save in case of exception ++ ++ // Convert from BasicObjectLock structure to object and BasicLock structure ++ // Store the BasicLock address into tmp_reg ++ addi_d(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes()); ++ ++ // Load oop into scr_reg ++ ld_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); ++ // free entry ++ st_d(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes()); ++ if (UseBiasedLocking) { ++ biased_locking_exit(scr_reg, hdr_reg, done); ++ } ++ ++ // Load the old header from BasicLock structure ++ ld_d(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes()); ++ // zero for recursive case ++ beqz(hdr_reg, done); ++ ++ // Atomic swap back the old header ++ cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done); ++ ++ // Call the runtime routine for slow case. ++ st_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); ++ ++ bind(done); ++ ++ restore_bcp(); ++ } ++} ++ ++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, ++ Label& zero_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ ld_d(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++ beq(mdp, R0, zero_continue); ++} ++ ++// Set the method data pointer for the current bcp. ++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Label set_mdp; ++ ++ // V0 and T0 will be used as two temporary registers. ++ push2(V0, T0); ++ ++ get_method(T0); ++ // Test MDO to avoid the call if it is NULL. ++ ld_d(V0, T0, in_bytes(Method::method_data_offset())); ++ beq(V0, R0, set_mdp); ++ ++ // method: T0 ++ // bcp: BCP --> S0 ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP); ++ // mdi: V0 ++ // mdo is guaranteed to be non-zero here, we checked for it before the call. ++ get_method(T0); ++ ld_d(T0, T0, in_bytes(Method::method_data_offset())); ++ addi_d(T0, T0, in_bytes(MethodData::data_offset())); ++ add_d(V0, T0, V0); ++ bind(set_mdp); ++ st_d(V0, FP, frame::interpreter_frame_mdp_offset * wordSize); ++ pop2(V0, T0); ++} ++ ++void InterpreterMacroAssembler::verify_method_data_pointer() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++#ifdef ASSERT ++ Label verify_continue; ++ Register method = T5; ++ Register mdp = T6; ++ Register tmp = A0; ++ push(method); ++ push(mdp); ++ push(tmp); ++ test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue ++ get_method(method); ++ ++ // If the mdp is valid, it will point to a DataLayout header which is ++ // consistent with the bcp. The converse is highly probable also. ++ ld_hu(tmp, mdp, in_bytes(DataLayout::bci_offset())); ++ ld_d(AT, method, in_bytes(Method::const_offset())); ++ add_d(tmp, tmp, AT); ++ addi_d(tmp, tmp, in_bytes(ConstMethod::codes_offset())); ++ beq(tmp, BCP, verify_continue); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp); ++ bind(verify_continue); ++ pop(tmp); ++ pop(mdp); ++ pop(method); ++#endif // ASSERT ++} ++ ++ ++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, ++ int constant, ++ Register value) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Address data(mdp_in, constant); ++ st_d(value, data); ++} ++ ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ int constant, ++ bool decrement) { ++ // Counter address ++ Address data(mdp_in, constant); ++ ++ increment_mdp_data_at(data, decrement); ++} ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Address data, ++ bool decrement) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ // %%% this does 64bit counters at best it is wasting space ++ // at worst it is a rare bug when counters overflow ++ Register tmp = S0; ++ push(tmp); ++ if (decrement) { ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Decrement the register. ++ ld_d(AT, data); ++ sltu(tmp, R0, AT); ++ sub_d(AT, AT, tmp); ++ st_d(AT, data); ++ } else { ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Increment the register. ++ ld_d(AT, data); ++ addi_d(tmp, AT, DataLayout::counter_increment); ++ sltu(tmp, R0, tmp); ++ add_d(AT, AT, tmp); ++ st_d(AT, data); ++ } ++ pop(tmp); ++} ++ ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ Register reg, ++ int constant, ++ bool decrement) { ++ Register tmp = S0; ++ push(tmp); ++ if (decrement) { ++ assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !"); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Decrement the register. ++ add_d(tmp, mdp_in, reg); ++ ld_d(AT, tmp, constant); ++ sltu(tmp, R0, AT); ++ sub_d(AT, AT, tmp); ++ add_d(tmp, mdp_in, reg); ++ st_d(AT, tmp, constant); ++ } else { ++ assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !"); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Increment the register. ++ add_d(tmp, mdp_in, reg); ++ ld_d(AT, tmp, constant); ++ addi_d(tmp, AT, DataLayout::counter_increment); ++ sltu(tmp, R0, tmp); ++ add_d(AT, AT, tmp); ++ add_d(tmp, mdp_in, reg); ++ st_d(AT, tmp, constant); ++ } ++ pop(tmp); ++} ++ ++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, ++ int flag_byte_constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ int header_offset = in_bytes(DataLayout::header_offset()); ++ int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant); ++ // Set the flag ++ ld_w(AT, Address(mdp_in, header_offset)); ++ if(Assembler::is_simm(header_bits, 12)) { ++ ori(AT, AT, header_bits); ++ } else { ++ push(T8); ++ // T8 is used as a temporary register. ++ li(T8, header_bits); ++ orr(AT, AT, T8); ++ pop(T8); ++ } ++ st_w(AT, Address(mdp_in, header_offset)); ++} ++ ++ ++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, ++ int offset, ++ Register value, ++ Register test_value_out, ++ Label& not_equal_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if (test_value_out == noreg) { ++ ld_d(AT, Address(mdp_in, offset)); ++ bne(AT, value, not_equal_continue); ++ } else { ++ // Put the test value into a register, so caller can use it: ++ ld_d(test_value_out, Address(mdp_in, offset)); ++ bne(value, test_value_out, not_equal_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12"); ++ ld_d(AT, mdp_in, offset_of_disp); ++ add_d(mdp_in, mdp_in, AT); ++ st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ Register reg, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ add_d(AT, reg, mdp_in); ++ assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12"); ++ ld_d(AT, AT, offset_of_disp); ++ add_d(mdp_in, mdp_in, AT); ++ st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, ++ int constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if(Assembler::is_simm(constant, 12)) { ++ addi_d(mdp_in, mdp_in, constant); ++ } else { ++ li(AT, constant); ++ add_d(mdp_in, mdp_in, AT); ++ } ++ st_d(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ push(return_bci); // save/restore across call_VM ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), ++ return_bci); ++ pop(return_bci); ++} ++ ++ ++void InterpreterMacroAssembler::profile_taken_branch(Register mdp, ++ Register bumped_count) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ // Otherwise, assign to mdp ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the taken count. ++ // We inline increment_mdp_data_at to return bumped_count in a register ++ //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset())); ++ ld_d(bumped_count, mdp, in_bytes(JumpData::taken_offset())); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ addi_d(AT, bumped_count, DataLayout::counter_increment); ++ sltu(AT, R0, AT); ++ add_d(bumped_count, bumped_count, AT); ++ st_d(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the not taken count. ++ increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); ++ ++ // The method data pointer needs to be updated to correspond to ++ // the next bytecode ++ update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_final_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_virtual_call(Register receiver, ++ Register mdp, ++ Register reg2, ++ bool receiver_can_be_null) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ Label skip_receiver_profile; ++ if (receiver_can_be_null) { ++ Label not_null; ++ bnez(receiver, not_null); ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ b(skip_receiver_profile); ++ bind(not_null); ++ } ++ ++ // Record the receiver type. ++ record_klass_in_profile(receiver, mdp, reg2, true); ++ bind(skip_receiver_profile); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++// This routine creates a state machine for updating the multi-row ++// type profile at a virtual call site (or other type-sensitive bytecode). ++// The machine visits each row (of receiver/count) until the receiver type ++// is found, or until it runs out of rows. At the same time, it remembers ++// the location of the first empty row. (An empty row records null for its ++// receiver, and can be allocated for a newly-observed receiver type.) ++// Because there are two degrees of freedom in the state, a simple linear ++// search will not work; it must be a decision tree. Hence this helper ++// function is recursive, to generate the required tree structured code. ++// It's the interpreter, so we are trading off code space for speed. ++// See below for example code. ++void InterpreterMacroAssembler::record_klass_in_profile_helper( ++ Register receiver, Register mdp, ++ Register reg2, int start_row, ++ Label& done, bool is_virtual_call) { ++ if (TypeProfileWidth == 0) { ++ if (is_virtual_call) { ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ } ++#if INCLUDE_JVMCI ++ else if (EnableJVMCI) { ++ increment_mdp_data_at(mdp, in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset())); ++ } ++#endif // INCLUDE_JVMCI ++ } else { ++ int non_profiled_offset = -1; ++ if (is_virtual_call) { ++ non_profiled_offset = in_bytes(CounterData::count_offset()); ++ } ++#if INCLUDE_JVMCI ++ else if (EnableJVMCI) { ++ non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()); ++ } ++#endif // INCLUDE_JVMCI ++ ++ record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth, ++ &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset); ++ } ++} ++ ++void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, Register mdp, ++ Register reg2, int start_row, Label& done, int total_rows, ++ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, ++ int non_profiled_offset) { ++ int last_row = total_rows - 1; ++ assert(start_row <= last_row, "must be work left to do"); ++ // Test this row for both the item and for null. ++ // Take any of three different outcomes: ++ // 1. found item => increment count and goto done ++ // 2. found null => keep looking for case 1, maybe allocate this cell ++ // 3. found something else => keep looking for cases 1 and 2 ++ // Case 3 is handled by a recursive call. ++ for (int row = start_row; row <= last_row; row++) { ++ Label next_test; ++ bool test_for_null_also = (row == start_row); ++ ++ // See if the receiver is item[n]. ++ int item_offset = in_bytes(item_offset_fn(row)); ++ test_mdp_data_at(mdp, item_offset, item, ++ (test_for_null_also ? reg2 : noreg), ++ next_test); ++ // (Reg2 now contains the item from the CallData.) ++ ++ // The receiver is item[n]. Increment count[n]. ++ int count_offset = in_bytes(item_count_offset_fn(row)); ++ increment_mdp_data_at(mdp, count_offset); ++ b(done); ++ bind(next_test); ++ ++ if (test_for_null_also) { ++ Label found_null; ++ // Failed the equality check on item[n]... Test for null. ++ if (start_row == last_row) { ++ // The only thing left to do is handle the null case. ++ if (non_profiled_offset >= 0) { ++ beqz(reg2, found_null); ++ // Item did not match any saved item and there is no empty row for it. ++ // Increment total counter to indicate polymorphic case. ++ increment_mdp_data_at(mdp, non_profiled_offset); ++ b(done); ++ bind(found_null); ++ } else { ++ bnez(reg2, done); ++ } ++ break; ++ } ++ // Since null is rare, make it be the branch-taken case. ++ beqz(reg2, found_null); ++ ++ // Put all the "Case 3" tests here. ++ record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows, ++ item_offset_fn, item_count_offset_fn, non_profiled_offset); ++ ++ // Found a null. Keep searching for a matching item, ++ // but remember that this is an empty (unused) slot. ++ bind(found_null); ++ } ++ } ++ ++ // In the fall-through case, we found no matching item, but we ++ // observed the item[start_row] is NULL. ++ ++ // Fill in the item field and increment the count. ++ int item_offset = in_bytes(item_offset_fn(start_row)); ++ set_mdp_data_at(mdp, item_offset, item); ++ int count_offset = in_bytes(item_count_offset_fn(start_row)); ++ li(reg2, DataLayout::counter_increment); ++ set_mdp_data_at(mdp, count_offset, reg2); ++ if (start_row > 0) { ++ b(done); ++ } ++} ++ ++// Example state machine code for three profile rows: ++// // main copy of decision tree, rooted at row[1] ++// if (row[0].rec == rec) { row[0].incr(); goto done; } ++// if (row[0].rec != NULL) { ++// // inner copy of decision tree, rooted at row[1] ++// if (row[1].rec == rec) { row[1].incr(); goto done; } ++// if (row[1].rec != NULL) { ++// // degenerate decision tree, rooted at row[2] ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// if (row[2].rec != NULL) { goto done; } // overflow ++// row[2].init(rec); goto done; ++// } else { ++// // remember row[1] is empty ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// row[1].init(rec); goto done; ++// } ++// } else { ++// // remember row[0] is empty ++// if (row[1].rec == rec) { row[1].incr(); goto done; } ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// row[0].init(rec); goto done; ++// } ++// done: ++ ++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, ++ Register mdp, Register reg2, ++ bool is_virtual_call) { ++ assert(ProfileInterpreter, "must be profiling"); ++ Label done; ++ ++ record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call); ++ ++ bind (done); ++} ++ ++void InterpreterMacroAssembler::profile_ret(Register return_bci, ++ Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ uint row; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the total ret count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ for (row = 0; row < RetData::row_limit(); row++) { ++ Label next_test; ++ ++ // See if return_bci is equal to bci[n]: ++ test_mdp_data_at(mdp, ++ in_bytes(RetData::bci_offset(row)), ++ return_bci, noreg, ++ next_test); ++ ++ // return_bci is equal to bci[n]. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, ++ in_bytes(RetData::bci_displacement_offset(row))); ++ b(profile_continue); ++ bind(next_test); ++ } ++ ++ update_mdp_for_ret(return_bci); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_null_seen(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { ++ if (ProfileInterpreter && TypeProfileCasts) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int count_offset = in_bytes(CounterData::count_offset()); ++ // Back up the address, since we have already bumped the mdp. ++ count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // *Decrement* the counter. We expect to see zero or small negatives. ++ increment_mdp_data_at(mdp, count_offset, true); ++ ++ bind (profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // Record the object type. ++ record_klass_in_profile(klass, mdp, reg2, false); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_switch_default(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the default case count ++ increment_mdp_data_at(mdp, ++ in_bytes(MultiBranchData::default_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ in_bytes(MultiBranchData:: ++ default_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_switch_case(Register index, ++ Register mdp, ++ Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Build the base (index * per_case_size_in_bytes()) + ++ // case_array_offset_in_bytes() ++ li(reg2, in_bytes(MultiBranchData::per_case_size())); ++ mul_d(index, index, reg2); ++ addi_d(index, index, in_bytes(MultiBranchData::case_array_offset())); ++ ++ // Update the case count ++ increment_mdp_data_at(mdp, ++ index, ++ in_bytes(MultiBranchData::relative_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ index, ++ in_bytes(MultiBranchData:: ++ relative_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::narrow(Register result) { ++ // Get method->_constMethod->_result_type ++ ld_d(T4, FP, frame::interpreter_frame_method_offset * wordSize); ++ ld_d(T4, T4, in_bytes(Method::const_offset())); ++ ld_bu(T4, T4, in_bytes(ConstMethod::result_type_offset())); ++ ++ Label done, notBool, notByte, notChar; ++ ++ // common case first ++ addi_d(AT, T4, -T_INT); ++ beq(AT, R0, done); ++ ++ // mask integer result to narrower return type. ++ addi_d(AT, T4, -T_BOOLEAN); ++ bne(AT, R0, notBool); ++ andi(result, result, 0x1); ++ beq(R0, R0, done); ++ ++ bind(notBool); ++ addi_d(AT, T4, -T_BYTE); ++ bne(AT, R0, notByte); ++ ext_w_b(result, result); ++ beq(R0, R0, done); ++ ++ bind(notByte); ++ addi_d(AT, T4, -T_CHAR); ++ bne(AT, R0, notChar); ++ bstrpick_d(result, result, 15, 0); ++ beq(R0, R0, done); ++ ++ bind(notChar); ++ ext_w_h(result, result); ++ ++ // Nothing to do for T_INT ++ bind(done); ++} ++ ++ ++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { ++ Label update, next, none; ++ ++ verify_oop(obj); ++ ++ if (mdo_addr.index() != noreg) { ++ guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !"); ++ guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !"); ++ push(T0); ++ alsl_d(T0, mdo_addr.index(), mdo_addr.base(), mdo_addr.scale() - 1); ++ } ++ ++ bnez(obj, update); ++ ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ ori(AT, AT, TypeEntries::null_seen); ++ if (mdo_addr.index() == noreg) { ++ st_d(AT, mdo_addr); ++ } else { ++ st_d(AT, T0, mdo_addr.disp()); ++ } ++ ++ b(next); ++ ++ bind(update); ++ load_klass(obj, obj); ++ ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ xorr(obj, obj, AT); ++ ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ bstrpick_d(AT, obj, 63, 2); ++ beqz(AT, next); ++ ++ andi(AT, obj, TypeEntries::type_unknown); ++ bnez(AT, next); ++ ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ beqz(AT, none); ++ ++ addi_d(AT, AT, -(TypeEntries::null_seen)); ++ beqz(AT, none); ++ ++ // There is a chance that the checks above (re-reading profiling ++ // data from memory) fail if another thread has just set the ++ // profiling to this obj's klass ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ xorr(obj, obj, AT); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ bstrpick_d(AT, obj, 63, 2); ++ beqz(AT, next); ++ ++ // different than before. Cannot keep accurate profile. ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ ori(AT, AT, TypeEntries::type_unknown); ++ if (mdo_addr.index() == noreg) { ++ st_d(AT, mdo_addr); ++ } else { ++ st_d(AT, T0, mdo_addr.disp()); ++ } ++ b(next); ++ ++ bind(none); ++ // first time here. Set profile type. ++ if (mdo_addr.index() == noreg) { ++ st_d(obj, mdo_addr); ++ } else { ++ st_d(obj, T0, mdo_addr.disp()); ++ } ++ ++ bind(next); ++ if (mdo_addr.index() != noreg) { ++ pop(T0); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { ++ if (!ProfileInterpreter) { ++ return; ++ } ++ ++ if (MethodData::profile_arguments() || MethodData::profile_return()) { ++ Label profile_continue; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); ++ ++ ld_b(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start); ++ li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag); ++ bne(tmp, AT, profile_continue); ++ ++ ++ if (MethodData::profile_arguments()) { ++ Label done; ++ int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); ++ if (Assembler::is_simm(off_to_args, 12)) { ++ addi_d(mdp, mdp, off_to_args); ++ } else { ++ li(AT, off_to_args); ++ add_d(mdp, mdp, AT); ++ } ++ ++ ++ for (int i = 0; i < TypeProfileArgsLimit; i++) { ++ if (i > 0 || MethodData::profile_return()) { ++ // If return value type is profiled we may have no argument to profile ++ ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); ++ ++ if (Assembler::is_simm(-1 * i * TypeStackSlotEntries::per_arg_count(), 12)) { ++ addi_w(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count()); ++ } else { ++ li(AT, i*TypeStackSlotEntries::per_arg_count()); ++ sub_w(tmp, tmp, AT); ++ } ++ ++ li(AT, TypeStackSlotEntries::per_arg_count()); ++ blt(tmp, AT, done); ++ } ++ ld_d(tmp, callee, in_bytes(Method::const_offset())); ++ ++ ld_hu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // stack offset o (zero based) from the start of the argument ++ // list, for n arguments translates into offset n - o - 1 from ++ // the end of the argument list ++ ld_d(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args); ++ sub_d(tmp, tmp, AT); ++ ++ addi_w(tmp, tmp, -1); ++ ++ Address arg_addr = argument_address(tmp); ++ ld_d(tmp, arg_addr); ++ ++ Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); ++ profile_obj_type(tmp, mdo_arg_addr); ++ ++ int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); ++ if (Assembler::is_simm(to_add, 12)) { ++ addi_d(mdp, mdp, to_add); ++ } else { ++ li(AT, to_add); ++ add_d(mdp, mdp, AT); ++ } ++ ++ off_to_args += to_add; ++ } ++ ++ if (MethodData::profile_return()) { ++ ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); ++ ++ int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(); ++ if (Assembler::is_simm(-1 * tmp_arg_counts, 12)) { ++ addi_w(tmp, tmp, -1 * tmp_arg_counts); ++ } else { ++ li(AT, tmp_arg_counts); ++ sub_w(mdp, mdp, AT); ++ } ++ } ++ ++ bind(done); ++ ++ if (MethodData::profile_return()) { ++ // We're right after the type profile for the last ++ // argument. tmp is the number of cells left in the ++ // CallTypeData/VirtualCallTypeData to reach its end. Non null ++ // if there's a return to profile. ++ assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); ++ slli_w(tmp, tmp, exact_log2(DataLayout::cell_size)); ++ add_d(mdp, mdp, tmp); ++ } ++ st_d(mdp, FP, frame::interpreter_frame_mdp_offset * wordSize); ++ } else { ++ assert(MethodData::profile_return(), "either profile call args or call ret"); ++ update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); ++ } ++ ++ // mdp points right after the end of the ++ // CallTypeData/VirtualCallTypeData, right after the cells for the ++ // return value type if there's one ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { ++ assert_different_registers(mdp, ret, tmp, _bcp_register); ++ if (ProfileInterpreter && MethodData::profile_return()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ if (MethodData::profile_return_jsr292_only()) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++ ++ // If we don't profile all invoke bytecodes we must make sure ++ // it's a bytecode we indeed profile. We can't go back to the ++ // begining of the ProfileData we intend to update to check its ++ // type because we're right after it and we don't known its ++ // length ++ Label do_profile; ++ ld_b(tmp, _bcp_register, 0); ++ addi_d(AT, tmp, -1 * Bytecodes::_invokedynamic); ++ beqz(AT, do_profile); ++ addi_d(AT, tmp, -1 * Bytecodes::_invokehandle); ++ beqz(AT, do_profile); ++ ++ get_method(tmp); ++ ld_hu(tmp, tmp, Method::intrinsic_id_offset_in_bytes()); ++ li(AT, static_cast(vmIntrinsics::_compiledLambdaForm)); ++ bne(tmp, AT, profile_continue); ++ ++ bind(do_profile); ++ } ++ ++ Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); ++ add_d(tmp, ret, R0); ++ profile_obj_type(tmp, mdo_ret_addr); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) { ++ guarantee(T4 == tmp1, "You are reqired to use T4 as the index register for LoongArch !"); ++ ++ if (ProfileInterpreter && MethodData::profile_parameters()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Load the offset of the area within the MDO used for ++ // parameters. If it's negative we're not profiling any parameters ++ ld_w(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())); ++ blt(tmp1, R0, profile_continue); ++ ++ // Compute a pointer to the area for parameters from the offset ++ // and move the pointer to the slot for the last ++ // parameters. Collect profiling from last parameter down. ++ // mdo start + parameters offset + array length - 1 ++ add_d(mdp, mdp, tmp1); ++ ld_d(tmp1, mdp, in_bytes(ArrayData::array_len_offset())); ++ decrement(tmp1, TypeStackSlotEntries::per_arg_count()); ++ ++ ++ Label loop; ++ bind(loop); ++ ++ int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); ++ int type_base = in_bytes(ParametersTypeData::type_offset(0)); ++ Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size); ++ Address arg_type(mdp, tmp1, per_arg_scale, type_base); ++ ++ // load offset on the stack from the slot for this parameter ++ alsl_d(AT, tmp1, mdp, per_arg_scale - 1); ++ ld_d(tmp2, AT, off_base); ++ ++ sub_d(tmp2, R0, tmp2); ++ ++ // read the parameter from the local area ++ alsl_d(AT, tmp2, _locals_register, Interpreter::logStackElementSize - 1); ++ ld_d(tmp2, AT, 0); ++ ++ // profile the parameter ++ profile_obj_type(tmp2, arg_type); ++ ++ // go to next parameter ++ decrement(tmp1, TypeStackSlotEntries::per_arg_count()); ++ blt(R0, tmp1, loop); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) { ++ if (state == atos) { ++ MacroAssembler::verify_oop(reg); ++ } ++} ++ ++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ++} ++ ++void InterpreterMacroAssembler::notify_method_entry() { ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ Register tempreg = T0; ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label L; ++ ld_w(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(tempreg, R0, L); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_method_entry)); ++ bind(L); ++ } ++ ++ { ++ SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); ++ get_method(S3); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ //Rthread, ++ thread, ++ //Rmethod); ++ S3); ++ } ++} ++ ++void InterpreterMacroAssembler::notify_method_exit( ++ TosState state, NotifyMethodExitMode mode) { ++ Register tempreg = T0; ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { ++ Label skip; ++ // Note: frame::interpreter_frame_result has a dependency on how the ++ // method result is saved across the call to post_method_exit. If this ++ // is changed then the interpreter_frame_result implementation will ++ // need to be updated too. ++ ++ // template interpreter will leave it on the top of the stack. ++ push(state); ++ ld_w(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(tempreg, R0, skip); ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); ++ bind(skip); ++ pop(state); ++ } ++ ++ { ++ // Dtrace notification ++ SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); ++ push(state); ++ get_method(S3); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ //Rthread, Rmethod); ++ thread, S3); ++ pop(state); ++ } ++} ++ ++// Jump if ((*counter_addr += increment) & mask) satisfies the condition. ++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, ++ int increment, Address mask, ++ Register scratch, bool preloaded, ++ Condition cond, Label* where) { ++ assert_different_registers(scratch, AT); ++ ++ if (!preloaded) { ++ ld_w(scratch, counter_addr); ++ } ++ addi_w(scratch, scratch, increment); ++ st_w(scratch, counter_addr); ++ ++ ld_w(AT, mask); ++ andr(scratch, scratch, AT); ++ ++ if (cond == Assembler::zero) { ++ beq(scratch, R0, *where); ++ } else { ++ unimplemented(); ++ } ++} +diff --git a/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp b/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp +new file mode 100644 +index 00000000000..d53d951a160 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp +@@ -0,0 +1,62 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP ++#define CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP ++ ++// This is included in the middle of class Interpreter. ++// Do not include files here. ++ ++// native method calls ++ ++class SignatureHandlerGenerator: public NativeSignatureIterator { ++ private: ++ MacroAssembler* _masm; ++ unsigned int _num_fp_args; ++ unsigned int _num_int_args; ++ int _stack_offset; ++ ++ void move(int from_offset, int to_offset); ++ void box(int from_offset, int to_offset); ++ void pass_int(); ++ void pass_long(); ++ void pass_object(); ++ void pass_float(); ++ void pass_double(); ++ ++ public: ++ // Creation ++ SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); ++ ++ // Code generation ++ void generate(uint64_t fingerprint); ++ ++ // Code generation support ++ static Register from(); ++ static Register to(); ++ static Register temp(); ++}; ++ ++#endif // CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp b/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp +new file mode 100644 +index 00000000000..85a199e760d +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp +@@ -0,0 +1,282 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "memory/universe.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/signature.hpp" ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++// Implementation of SignatureHandlerGenerator ++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( ++ const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) { ++ _masm = new MacroAssembler(buffer); ++ _num_int_args = (method->is_static() ? 1 : 0); ++ _num_fp_args = 0; ++ _stack_offset = 0; ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) { ++ __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(from_offset)); ++ __ st_d(temp(), to(), to_offset * longSize); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) { ++ __ addi_d(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) ); ++ __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(from_offset) ); ++ ++ __ maskeqz(temp(), temp(), AT); ++ __ st_w(temp(), to(), to_offset * wordSize); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { ++ // generate code to handle arguments ++ iterate(fingerprint); ++ // return result handler ++ __ li(V0, AbstractInterpreter::result_handler(method()->result_type())); ++ // return ++ __ jr(RA); ++ ++ __ flush(); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ __ ld_w(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ st_w(AT, to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2. ++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ __ ld_d(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else { ++ __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ __ st_d(AT, to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ Register reg = as_Register(++_num_int_args + RA0->encoding()); ++ if (_num_int_args == 1) { ++ assert(offset() == 0, "argument register 1 can only be (non-null) receiver"); ++ __ addi_d(reg, from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ ld_d(reg, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ maskeqz(reg, AT, reg); ++ } ++ } else { ++ __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(offset())); ++ __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ maskeqz(temp(), AT, temp()); ++ __ st_d(temp(), to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { ++ if (_num_fp_args < Argument::n_float_register_parameters) { ++ __ fld_s(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else if (_num_int_args < Argument::n_register_parameters - 1) { ++ __ ld_w(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ st_w(AT, to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2. ++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { ++ if (_num_fp_args < Argument::n_float_register_parameters) { ++ __ fld_d(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else if (_num_int_args < Argument::n_register_parameters - 1) { ++ __ ld_d(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else { ++ __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ __ st_d(AT, to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++ ++Register InterpreterRuntime::SignatureHandlerGenerator::from() { return LVP; } ++Register InterpreterRuntime::SignatureHandlerGenerator::to() { return SP; } ++Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return T8; } ++ ++// Implementation of SignatureHandlerLibrary ++ ++void SignatureHandlerLibrary::pd_set_handler(address handler) {} ++ ++ ++class SlowSignatureHandler ++ : public NativeSignatureIterator { ++ private: ++ address _from; ++ intptr_t* _to; ++ intptr_t* _int_args; ++ intptr_t* _fp_args; ++ intptr_t* _fp_identifiers; ++ unsigned int _num_int_args; ++ unsigned int _num_fp_args; ++ ++ virtual void pass_int() ++ { ++ jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = from_obj; ++ _num_int_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_long() ++ { ++ intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); ++ _from -= 2 * Interpreter::stackElementSize; ++ ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = from_obj; ++ _num_int_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_object() ++ { ++ intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; ++ _num_int_args++; ++ } else { ++ *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; ++ } ++ } ++ ++ virtual void pass_float() ++ { ++ jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_fp_args < Argument::n_float_register_parameters) { ++ *_fp_args++ = from_obj; ++ _num_fp_args++; ++ } else if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = from_obj; ++ _num_int_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_double() ++ { ++ intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); ++ _from -= 2*Interpreter::stackElementSize; ++ ++ if (_num_fp_args < Argument::n_float_register_parameters) { ++ *_fp_args++ = from_obj; ++ *_fp_identifiers |= (1 << _num_fp_args); // mark as double ++ _num_fp_args++; ++ } else if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = from_obj; ++ _num_int_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ public: ++ SlowSignatureHandler(methodHandle method, address from, intptr_t* to) ++ : NativeSignatureIterator(method) ++ { ++ _from = from; ++ _to = to; ++ ++ // see TemplateInterpreterGenerator::generate_slow_signature_handler() ++ _int_args = to - (method->is_static() ? 15 : 16); ++ _fp_args = to - 8; ++ _fp_identifiers = to - 9; ++ *(int*) _fp_identifiers = 0; ++ _num_int_args = (method->is_static() ? 1 : 0); ++ _num_fp_args = 0; ++ } ++}; ++ ++ ++JRT_ENTRY(address, ++ InterpreterRuntime::slow_signature_handler(JavaThread* current, ++ Method* method, ++ intptr_t* from, ++ intptr_t* to)) ++ methodHandle m(current, (Method*)method); ++ assert(m->is_native(), "sanity check"); ++ ++ // handle arguments ++ SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1)); ++ ++ // return result handler ++ return Interpreter::result_handler(m->result_type()); ++JRT_END +diff --git a/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp b/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp +new file mode 100644 +index 00000000000..048107c2425 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp +@@ -0,0 +1,87 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP ++#define CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP ++ ++private: ++ ++ // FP value associated with _last_Java_sp: ++ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to ++ ++public: ++ // Each arch must define reset, save, restore ++ // These are used by objects that only care about: ++ // 1 - initializing a new state (thread creation, javaCalls) ++ // 2 - saving a current state (javaCalls) ++ // 3 - restoring an old state (javaCalls) ++ ++ void clear(void) { ++ // clearing _last_Java_sp must be first ++ _last_Java_sp = NULL; ++ // fence? ++ _last_Java_fp = NULL; ++ _last_Java_pc = NULL; ++ } ++ ++ void copy(JavaFrameAnchor* src) { ++ // In order to make sure the transition state is valid for "this" ++ // We must clear _last_Java_sp before copying the rest of the new data ++ // ++ // Hack Alert: Temporary bugfix for 4717480/4721647 ++ // To act like previous version (pd_cache_state) don't NULL _last_Java_sp ++ // unless the value is changing ++ // ++ if (_last_Java_sp != src->_last_Java_sp) ++ _last_Java_sp = NULL; ++ ++ _last_Java_fp = src->_last_Java_fp; ++ _last_Java_pc = src->_last_Java_pc; ++ // Must be last so profiler will always see valid frame if has_last_frame() is true ++ _last_Java_sp = src->_last_Java_sp; ++ } ++ ++ // Always walkable ++ bool walkable(void) { return true; } ++ // Never any thing to do since we are always walkable and can find address of return addresses ++ void make_walkable() { } ++ ++ intptr_t* last_Java_sp(void) const { return _last_Java_sp; } ++ ++ address last_Java_pc(void) { return _last_Java_pc; } ++ ++private: ++ ++ static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } ++ ++public: ++ ++ void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; } ++ ++ intptr_t* last_Java_fp(void) { return _last_Java_fp; } ++ // Assert (last_Java_sp == NULL || fp == NULL) ++ void set_last_Java_fp(intptr_t* fp) { _last_Java_fp = fp; } ++ ++#endif // CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp b/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp +new file mode 100644 +index 00000000000..5b08280921c +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp +@@ -0,0 +1,197 @@ ++/* ++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/codeBlob.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "runtime/safepoint.hpp" ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++#define BUFFER_SIZE 30*wordSize ++ ++// Instead of issuing membar for LoadLoad barrier, we create address dependency ++// between loads, which is more efficient than membar. ++ ++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { ++ const char *name = NULL; ++ switch (type) { ++ case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; ++ case T_BYTE: name = "jni_fast_GetByteField"; break; ++ case T_CHAR: name = "jni_fast_GetCharField"; break; ++ case T_SHORT: name = "jni_fast_GetShortField"; break; ++ case T_INT: name = "jni_fast_GetIntField"; break; ++ case T_LONG: name = "jni_fast_GetLongField"; break; ++ case T_FLOAT: name = "jni_fast_GetFloatField"; break; ++ case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; ++ default: ShouldNotReachHere(); ++ } ++ ResourceMark rm; ++ BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); ++ CodeBuffer cbuf(blob); ++ MacroAssembler* masm = new MacroAssembler(&cbuf); ++ address fast_entry = __ pc(); ++ Label slow; ++ ++ const Register env = A0; ++ const Register obj = A1; ++ const Register fid = A2; ++ const Register tmp1 = AT; ++ const Register tmp2 = T4; ++ const Register obj_addr = T0; ++ const Register field_val = T0; ++ const Register field_addr = T0; ++ const Register counter_addr = T2; ++ const Register counter_prev_val = T1; ++ ++ __ li(counter_addr, SafepointSynchronize::safepoint_counter_addr()); ++ __ ld_w(counter_prev_val, counter_addr, 0); ++ ++ // Parameters(A0~A3) should not be modified, since they will be used in slow path ++ __ andi(tmp1, counter_prev_val, 1); ++ __ bnez(tmp1, slow); ++ ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we ++ // take the fast path. ++ __ li(tmp2, JvmtiExport::get_field_access_count_addr()); ++ // address dependency ++ __ XOR(tmp1, counter_prev_val, counter_prev_val); ++ __ ldx_w(tmp1, tmp2, tmp1); ++ __ bnez(tmp1, slow); ++ } ++ ++ __ move(obj_addr, obj); ++ // Both obj_addr and tmp2 are clobbered by try_resolve_jobject_in_native. ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->try_resolve_jobject_in_native(masm, env, obj_addr, tmp2, slow); ++ ++ __ srli_d(tmp1, fid, 2); // offset ++ __ add_d(field_addr, obj_addr, tmp1); ++ // address dependency ++ __ XOR(tmp1, counter_prev_val, counter_prev_val); ++ ++ assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); ++ speculative_load_pclist[count] = __ pc(); ++ switch (type) { ++ case T_BOOLEAN: __ ldx_bu (field_val, field_addr, tmp1); break; ++ case T_BYTE: __ ldx_b (field_val, field_addr, tmp1); break; ++ case T_CHAR: __ ldx_hu (field_val, field_addr, tmp1); break; ++ case T_SHORT: __ ldx_h (field_val, field_addr, tmp1); break; ++ case T_INT: __ ldx_w (field_val, field_addr, tmp1); break; ++ case T_LONG: __ ldx_d (field_val, field_addr, tmp1); break; ++ case T_FLOAT: __ ldx_wu (field_val, field_addr, tmp1); break; ++ case T_DOUBLE: __ ldx_d (field_val, field_addr, tmp1); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ // address dependency ++ __ XOR(tmp1, field_val, field_val); ++ __ ldx_w(tmp1, counter_addr, tmp1); ++ __ bne(counter_prev_val, tmp1, slow); ++ ++ switch (type) { ++ case T_FLOAT: __ movgr2fr_w(F0, field_val); break; ++ case T_DOUBLE: __ movgr2fr_d(F0, field_val); break; ++ default: __ move(V0, field_val); break; ++ } ++ ++ __ jr(RA); ++ ++ slowcase_entry_pclist[count++] = __ pc(); ++ __ bind (slow); ++ address slow_case_addr = NULL; ++ switch (type) { ++ case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; ++ case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; ++ case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; ++ case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; ++ case T_INT: slow_case_addr = jni_GetIntField_addr(); break; ++ case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; ++ case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; ++ case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; ++ default: ShouldNotReachHere(); ++ } ++ __ jmp(slow_case_addr); ++ ++ __ flush (); ++ return fast_entry; ++} ++ ++address JNI_FastGetField::generate_fast_get_boolean_field() { ++ return generate_fast_get_int_field0(T_BOOLEAN); ++} ++ ++address JNI_FastGetField::generate_fast_get_byte_field() { ++ return generate_fast_get_int_field0(T_BYTE); ++} ++ ++address JNI_FastGetField::generate_fast_get_char_field() { ++ return generate_fast_get_int_field0(T_CHAR); ++} ++ ++address JNI_FastGetField::generate_fast_get_short_field() { ++ return generate_fast_get_int_field0(T_SHORT); ++} ++ ++address JNI_FastGetField::generate_fast_get_int_field() { ++ return generate_fast_get_int_field0(T_INT); ++} ++ ++address JNI_FastGetField::generate_fast_get_long_field() { ++ return generate_fast_get_int_field0(T_LONG); ++} ++ ++address JNI_FastGetField::generate_fast_get_float_field() { ++ return generate_fast_get_int_field0(T_FLOAT); ++} ++ ++address JNI_FastGetField::generate_fast_get_double_field() { ++ return generate_fast_get_int_field0(T_DOUBLE); ++} +diff --git a/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp b/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp +new file mode 100644 +index 00000000000..0ee04e042b5 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp +@@ -0,0 +1,143 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP ++#define CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP ++ ++#include "jni.h" ++#include "memory/allocation.hpp" ++#include "oops/oop.hpp" ++ ++// This file holds platform-dependent routines used to write primitive jni ++// types to the array of arguments passed into JavaCalls::call ++ ++class JNITypes : AllStatic { ++ // These functions write a java primitive type (in native format) ++ // to a java stack slot array to be passed as an argument to JavaCalls:calls. ++ // I.e., they are functionally 'push' operations if they have a 'pos' ++ // formal parameter. Note that jlong's and jdouble's are written ++ // _in reverse_ of the order in which they appear in the interpreter ++ // stack. This is because call stubs (see stubGenerator_sparc.cpp) ++ // reverse the argument list constructed by JavaCallArguments (see ++ // javaCalls.hpp). ++ ++private: ++ ++ // 32bit Helper routines. ++ static inline void put_int2r(jint *from, intptr_t *to) { *(jint *)(to++) = from[1]; ++ *(jint *)(to ) = from[0]; } ++ static inline void put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; } ++ ++public: ++ // In LoongArch64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[] ++ // is 8 bytes. ++ // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values. ++ // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded. ++ // This error occurs in ReflectInvoke.java ++ // The parameter of DD(int) should be 4 instead of 0x550000004. ++ // ++ // See: [runtime/javaCalls.hpp] ++ ++ static inline void put_int(jint from, intptr_t *to) { *(intptr_t *)(to + 0 ) = from; } ++ static inline void put_int(jint from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = from; } ++ static inline void put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; } ++ ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to). ++ // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), ++ // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. ++ static inline void put_long(jlong from, intptr_t *to) { ++ *(jlong*) (to + 1) = from; ++ *(jlong*) (to) = from; ++ } ++ ++ // A long parameter occupies two slot. ++ // It must fit the layout rule in methodHandle. ++ // ++ // See: [runtime/reflection.cpp] Reflection::invoke() ++ // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); ++ ++ static inline void put_long(jlong from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = from; ++ *(jlong*) (to + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_long(jlong *from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = *from; ++ *(jlong*) (to + pos) = *from; ++ pos += 2; ++ } ++ ++ // Oops are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); } ++ static inline void put_obj(jobject from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle; } ++ ++ // Floats are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } ++ static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } ++ static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } ++ ++#undef _JNI_SLOT_OFFSET ++#define _JNI_SLOT_OFFSET 0 ++ ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to). ++ // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), ++ // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. ++ static inline void put_double(jdouble from, intptr_t *to) { ++ *(jdouble*) (to + 1) = from; ++ *(jdouble*) (to) = from; ++ } ++ ++ // A long parameter occupies two slot. ++ // It must fit the layout rule in methodHandle. ++ // ++ // See: [runtime/reflection.cpp] Reflection::invoke() ++ // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); ++ ++ static inline void put_double(jdouble from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = from; ++ *(jdouble*) (to + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_double(jdouble *from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = *from; ++ *(jdouble*) (to + pos) = *from; ++ pos += 2; ++ } ++ ++ // The get_xxx routines, on the other hand, actually _do_ fetch ++ // java primitive types from the interpreter stack. ++ static inline jint get_int (intptr_t *from) { return *(jint *) from; } ++ static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } ++ static inline oop get_obj (intptr_t *from) { return *(oop *) from; } ++ static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } ++ static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } ++#undef _JNI_SLOT_OFFSET ++}; ++ ++#endif // CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp +new file mode 100644 +index 00000000000..eb75830ec9c +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp +@@ -0,0 +1,202 @@ ++/* ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "jvmci/jvmci.hpp" ++#include "jvmci/jvmciCodeInstaller.hpp" ++#include "jvmci/jvmciRuntime.hpp" ++#include "jvmci/jvmciCompilerToVM.hpp" ++#include "jvmci/jvmciJavaClasses.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/jniHandles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, JVMCIObject method, JVMCI_TRAPS) { ++ address pc = (address) inst; ++ if (inst->is_int_branch() || inst->is_float_branch()) { ++ return pc_offset + NativeInstruction::nop_instruction_size; ++ } else if (inst->is_call()) { ++ return pc_offset + NativeCall::instruction_size; ++ } else if (inst->is_far_call()) { ++ return pc_offset + NativeFarCall::instruction_size; ++ } else if (inst->is_jump()) { ++ return pc_offset + NativeGeneralJump::instruction_size; ++ } else if (inst->is_lu12iw_lu32id()) { ++ // match LoongArch64TestAssembler.java emitCall ++ // lu12i_w; lu32i_d; jirl ++ return pc_offset + 3 * NativeInstruction::nop_instruction_size; ++ } else { ++ JVMCI_ERROR_0("unsupported type of instruction for call site"); ++ } ++ return 0; ++} ++ ++void CodeInstaller::pd_patch_OopConstant(int pc_offset, JVMCIObject constant, JVMCI_TRAPS) { ++ address pc = _instructions->start() + pc_offset; ++ Handle obj = jvmci_env()->asConstant(constant, JVMCI_CHECK); ++ jobject value = JNIHandles::make_local(obj()); ++ if (jvmci_env()->get_HotSpotObjectConstantImpl_compressed(constant)) { ++ NativeMovConstReg* move = nativeMovConstReg_at(pc); ++ move->set_data((intptr_t)(CompressedOops::encode(cast_to_oop(cast_from_oop
(obj()))))); ++ int oop_index = _oop_recorder->find_index(value); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ _instructions->relocate(pc, rspec, Assembler::narrow_oop_operand); ++ } else { ++ NativeMovConstReg* move = nativeMovConstReg_at(pc); ++ move->set_data((intptr_t)(cast_from_oop
(obj()))); ++ int oop_index = _oop_recorder->find_index(value); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ _instructions->relocate(pc, rspec); ++ } ++} ++ ++void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, JVMCIObject constant, JVMCI_TRAPS) { ++ address pc = _instructions->start() + pc_offset; ++ if (jvmci_env()->get_HotSpotMetaspaceConstantImpl_compressed(constant)) { ++ NativeMovConstReg* move = nativeMovConstReg_at(pc); ++ narrowKlass narrowOop = record_narrow_metadata_reference(_instructions, pc, constant, JVMCI_CHECK); ++ move->set_data((intptr_t) narrowOop); ++ JVMCI_event_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/0x%x", p2i(pc), narrowOop); ++ } else { ++ NativeMovConstReg* move = nativeMovConstReg_at(pc); ++ void* reference = record_metadata_reference(_instructions, pc, constant, JVMCI_CHECK); ++ move->set_data((intptr_t) reference); ++ JVMCI_event_3("relocating (metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(reference)); ++ } ++} ++ ++void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, JVMCI_TRAPS) { ++ address pc = _instructions->start() + pc_offset; ++ NativeInstruction* inst = nativeInstruction_at(pc); ++ if (inst->is_pcaddu12i_add()) { ++ address dest = _constants->start() + data_offset; ++ _instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS)); ++ JVMCI_event_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset); ++ } else { ++ JVMCI_ERROR("unknown load or move instruction at " PTR_FORMAT, p2i(pc)); ++ } ++} ++ ++void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, JVMCI_TRAPS) { ++ address pc = (address) inst; ++ if (inst->is_call()) { ++ NativeCall* call = nativeCall_at(pc); ++ call->set_destination((address) foreign_call_destination); ++ _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec()); ++ } else if (inst->is_far_call()) { ++ NativeFarCall* call = nativeFarCall_at(pc); ++ call->set_destination((address) foreign_call_destination); ++ _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec()); ++ } else if (inst->is_jump()) { ++ NativeGeneralJump* jump = nativeGeneralJump_at(pc); ++ jump->set_jump_destination((address) foreign_call_destination); ++ _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec()); ++ } else if (inst->is_lu12iw_lu32id()) { ++ // match emitCall of LoongArch64TestAssembler.java ++ // lu12i_w; lu32i_d; jirl ++ MacroAssembler::pd_patch_instruction((address)inst, (address)foreign_call_destination); ++ } else { ++ JVMCI_ERROR("unknown call or jump instruction at " PTR_FORMAT, p2i(pc)); ++ } ++ JVMCI_event_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst)); ++} ++ ++void CodeInstaller::pd_relocate_JavaMethod(CodeBuffer &cbuf, JVMCIObject hotspot_method, jint pc_offset, JVMCI_TRAPS) { ++#ifdef ASSERT ++ Method* method = NULL; ++ // we need to check, this might also be an unresolved method ++ if (JVMCIENV->isa_HotSpotResolvedJavaMethodImpl(hotspot_method)) { ++ method = JVMCIENV->asMethod(hotspot_method); ++ } ++#endif ++ switch (_next_call_type) { ++ case INLINE_INVOKE: ++ break; ++ case INVOKEVIRTUAL: ++ case INVOKEINTERFACE: { ++ assert(!method->is_static(), "cannot call static method with invokeinterface"); ++ NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); ++ _instructions->relocate(call->instruction_address(), virtual_call_Relocation::spec(_invoke_mark_pc)); ++ call->trampoline_jump(cbuf, SharedRuntime::get_resolve_virtual_call_stub()); ++ break; ++ } ++ case INVOKESTATIC: { ++ assert(method->is_static(), "cannot call non-static method with invokestatic"); ++ NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); ++ _instructions->relocate(call->instruction_address(), relocInfo::static_call_type); ++ call->trampoline_jump(cbuf, SharedRuntime::get_resolve_static_call_stub()); ++ break; ++ } ++ case INVOKESPECIAL: { ++ assert(!method->is_static(), "cannot call static method with invokespecial"); ++ NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); ++ _instructions->relocate(call->instruction_address(), relocInfo::opt_virtual_call_type); ++ call->trampoline_jump(cbuf, SharedRuntime::get_resolve_opt_virtual_call_stub()); ++ break; ++ } ++ default: ++ JVMCI_ERROR("invalid _next_call_type value"); ++ break; ++ } ++} ++ ++void CodeInstaller::pd_relocate_poll(address pc, jint mark, JVMCI_TRAPS) { ++ switch (mark) { ++ case POLL_NEAR: ++ JVMCI_ERROR("unimplemented"); ++ break; ++ case POLL_FAR: ++ _instructions->relocate(pc, relocInfo::poll_type); ++ break; ++ case POLL_RETURN_NEAR: ++ JVMCI_ERROR("unimplemented"); ++ break; ++ case POLL_RETURN_FAR: ++ _instructions->relocate(pc, relocInfo::poll_return_type); ++ break; ++ default: ++ JVMCI_ERROR("invalid mark value"); ++ break; ++ } ++} ++ ++// convert JVMCI register indices (as used in oop maps) to HotSpot registers ++VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, JVMCI_TRAPS) { ++ if (jvmci_reg < RegisterImpl::number_of_registers) { ++ return as_Register(jvmci_reg)->as_VMReg(); ++ } else { ++ jint floatRegisterNumber = jvmci_reg - RegisterImpl::number_of_registers; ++ if (floatRegisterNumber >= 0 && floatRegisterNumber < FloatRegisterImpl::number_of_registers) { ++ return as_FloatRegister(floatRegisterNumber)->as_VMReg(); ++ } ++ JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg); ++ } ++} ++ ++bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) { ++ return !hotspotRegister->is_FloatRegister(); ++} +diff --git a/src/hotspot/cpu/loongarch/loongarch.ad b/src/hotspot/cpu/loongarch/loongarch.ad +new file mode 100644 +index 00000000000..80dff0c7626 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/loongarch.ad +@@ -0,0 +1,25 @@ ++// ++// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ +diff --git a/src/hotspot/cpu/loongarch/loongarch_64.ad b/src/hotspot/cpu/loongarch/loongarch_64.ad +new file mode 100644 +index 00000000000..f1bb1c2f6cb +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/loongarch_64.ad +@@ -0,0 +1,15678 @@ ++// ++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++// GodSon3 Architecture Description File ++ ++//----------REGISTER DEFINITION BLOCK------------------------------------------ ++// This information is used by the matcher and the register allocator to ++// describe individual registers and classes of registers within the target ++// archtecture. ++ ++// format: ++// reg_def name (call convention, c-call convention, ideal type, encoding); ++// call convention : ++// NS = No-Save ++// SOC = Save-On-Call ++// SOE = Save-On-Entry ++// AS = Always-Save ++// ideal type : ++// see opto/opcodes.hpp for more info ++// reg_class name (reg, ...); ++// alloc_class name (reg, ...); ++register %{ ++ ++// General Registers ++// Integer Registers ++ reg_def R0 ( NS, NS, Op_RegI, 0, R0->as_VMReg()); ++ reg_def R0_H ( NS, NS, Op_RegI, 0, R0->as_VMReg()->next()); ++ reg_def RA ( NS, NS, Op_RegI, 1, RA->as_VMReg()); ++ reg_def RA_H ( NS, NS, Op_RegI, 1, RA->as_VMReg()->next()); ++ reg_def TP ( NS, NS, Op_RegI, 2, TP->as_VMReg()); ++ reg_def TP_H ( NS, NS, Op_RegI, 2, TP->as_VMReg()->next()); ++ reg_def SP ( NS, NS, Op_RegI, 3, SP->as_VMReg()); ++ reg_def SP_H ( NS, NS, Op_RegI, 3, SP->as_VMReg()->next()); ++ reg_def A0 (SOC, SOC, Op_RegI, 4, A0->as_VMReg()); ++ reg_def A0_H (SOC, SOC, Op_RegI, 4, A0->as_VMReg()->next()); ++ reg_def A1 (SOC, SOC, Op_RegI, 5, A1->as_VMReg()); ++ reg_def A1_H (SOC, SOC, Op_RegI, 5, A1->as_VMReg()->next()); ++ reg_def A2 (SOC, SOC, Op_RegI, 6, A2->as_VMReg()); ++ reg_def A2_H (SOC, SOC, Op_RegI, 6, A2->as_VMReg()->next()); ++ reg_def A3 (SOC, SOC, Op_RegI, 7, A3->as_VMReg()); ++ reg_def A3_H (SOC, SOC, Op_RegI, 7, A3->as_VMReg()->next()); ++ reg_def A4 (SOC, SOC, Op_RegI, 8, A4->as_VMReg()); ++ reg_def A4_H (SOC, SOC, Op_RegI, 8, A4->as_VMReg()->next()); ++ reg_def A5 (SOC, SOC, Op_RegI, 9, A5->as_VMReg()); ++ reg_def A5_H (SOC, SOC, Op_RegI, 9, A5->as_VMReg()->next()); ++ reg_def A6 (SOC, SOC, Op_RegI, 10, A6->as_VMReg()); ++ reg_def A6_H (SOC, SOC, Op_RegI, 10, A6->as_VMReg()->next()); ++ reg_def A7 (SOC, SOC, Op_RegI, 11, A7->as_VMReg()); ++ reg_def A7_H (SOC, SOC, Op_RegI, 11, A7->as_VMReg()->next()); ++ reg_def T0 (SOC, SOC, Op_RegI, 12, T0->as_VMReg()); ++ reg_def T0_H (SOC, SOC, Op_RegI, 12, T0->as_VMReg()->next()); ++ reg_def T1 (SOC, SOC, Op_RegI, 13, T1->as_VMReg()); ++ reg_def T1_H (SOC, SOC, Op_RegI, 13, T1->as_VMReg()->next()); ++ reg_def T2 (SOC, SOC, Op_RegI, 14, T2->as_VMReg()); ++ reg_def T2_H (SOC, SOC, Op_RegI, 14, T2->as_VMReg()->next()); ++ reg_def T3 (SOC, SOC, Op_RegI, 15, T3->as_VMReg()); ++ reg_def T3_H (SOC, SOC, Op_RegI, 15, T3->as_VMReg()->next()); ++ reg_def T4 (SOC, SOC, Op_RegI, 16, T4->as_VMReg()); ++ reg_def T4_H (SOC, SOC, Op_RegI, 16, T4->as_VMReg()->next()); ++ reg_def T5 (SOC, SOC, Op_RegI, 17, T5->as_VMReg()); ++ reg_def T5_H (SOC, SOC, Op_RegI, 17, T5->as_VMReg()->next()); ++ reg_def T6 (SOC, SOC, Op_RegI, 18, T6->as_VMReg()); ++ reg_def T6_H (SOC, SOC, Op_RegI, 18, T6->as_VMReg()->next()); ++ reg_def T7 (SOC, SOC, Op_RegI, 19, T7->as_VMReg()); ++ reg_def T7_H (SOC, SOC, Op_RegI, 19, T7->as_VMReg()->next()); ++ reg_def T8 (SOC, SOC, Op_RegI, 20, T8->as_VMReg()); ++ reg_def T8_H (SOC, SOC, Op_RegI, 20, T8->as_VMReg()->next()); ++ reg_def RX ( NS, NS, Op_RegI, 21, RX->as_VMReg()); ++ reg_def RX_H ( NS, NS, Op_RegI, 21, RX->as_VMReg()->next()); ++ reg_def FP ( NS, NS, Op_RegI, 22, FP->as_VMReg()); ++ reg_def FP_H ( NS, NS, Op_RegI, 22, FP->as_VMReg()->next()); ++ reg_def S0 (SOC, SOE, Op_RegI, 23, S0->as_VMReg()); ++ reg_def S0_H (SOC, SOE, Op_RegI, 23, S0->as_VMReg()->next()); ++ reg_def S1 (SOC, SOE, Op_RegI, 24, S1->as_VMReg()); ++ reg_def S1_H (SOC, SOE, Op_RegI, 24, S1->as_VMReg()->next()); ++ reg_def S2 (SOC, SOE, Op_RegI, 25, S2->as_VMReg()); ++ reg_def S2_H (SOC, SOE, Op_RegI, 25, S2->as_VMReg()->next()); ++ reg_def S3 (SOC, SOE, Op_RegI, 26, S3->as_VMReg()); ++ reg_def S3_H (SOC, SOE, Op_RegI, 26, S3->as_VMReg()->next()); ++ reg_def S4 (SOC, SOE, Op_RegI, 27, S4->as_VMReg()); ++ reg_def S4_H (SOC, SOE, Op_RegI, 27, S4->as_VMReg()->next()); ++ reg_def S5 (SOC, SOE, Op_RegI, 28, S5->as_VMReg()); ++ reg_def S5_H (SOC, SOE, Op_RegI, 28, S5->as_VMReg()->next()); ++ reg_def S6 (SOC, SOE, Op_RegI, 29, S6->as_VMReg()); ++ reg_def S6_H (SOC, SOE, Op_RegI, 29, S6->as_VMReg()->next()); ++ reg_def S7 (SOC, SOE, Op_RegI, 30, S7->as_VMReg()); ++ reg_def S7_H (SOC, SOE, Op_RegI, 30, S7->as_VMReg()->next()); ++ reg_def S8 (SOC, SOE, Op_RegI, 31, S8->as_VMReg()); ++ reg_def S8_H (SOC, SOE, Op_RegI, 31, S8->as_VMReg()->next()); ++ ++ ++// Floating/Vector registers. ++ reg_def F0 ( SOC, SOC, Op_RegF, 0, F0->as_VMReg() ); ++ reg_def F0_H ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next() ); ++ reg_def F0_J ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(2) ); ++ reg_def F0_K ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(3) ); ++ reg_def F0_L ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(4) ); ++ reg_def F0_M ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(5) ); ++ reg_def F0_N ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(6) ); ++ reg_def F0_O ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(7) ); ++ ++ reg_def F1 ( SOC, SOC, Op_RegF, 1, F1->as_VMReg() ); ++ reg_def F1_H ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next() ); ++ reg_def F1_J ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(2) ); ++ reg_def F1_K ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(3) ); ++ reg_def F1_L ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(4) ); ++ reg_def F1_M ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(5) ); ++ reg_def F1_N ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(6) ); ++ reg_def F1_O ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(7) ); ++ ++ reg_def F2 ( SOC, SOC, Op_RegF, 2, F2->as_VMReg() ); ++ reg_def F2_H ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next() ); ++ reg_def F2_J ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(2) ); ++ reg_def F2_K ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(3) ); ++ reg_def F2_L ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(4) ); ++ reg_def F2_M ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(5) ); ++ reg_def F2_N ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(6) ); ++ reg_def F2_O ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(7) ); ++ ++ reg_def F3 ( SOC, SOC, Op_RegF, 3, F3->as_VMReg() ); ++ reg_def F3_H ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next() ); ++ reg_def F3_J ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(2) ); ++ reg_def F3_K ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(3) ); ++ reg_def F3_L ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(4) ); ++ reg_def F3_M ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(5) ); ++ reg_def F3_N ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(6) ); ++ reg_def F3_O ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(7) ); ++ ++ reg_def F4 ( SOC, SOC, Op_RegF, 4, F4->as_VMReg() ); ++ reg_def F4_H ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next() ); ++ reg_def F4_J ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(2) ); ++ reg_def F4_K ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(3) ); ++ reg_def F4_L ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(4) ); ++ reg_def F4_M ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(5) ); ++ reg_def F4_N ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(6) ); ++ reg_def F4_O ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(7) ); ++ ++ reg_def F5 ( SOC, SOC, Op_RegF, 5, F5->as_VMReg() ); ++ reg_def F5_H ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next() ); ++ reg_def F5_J ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(2) ); ++ reg_def F5_K ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(3) ); ++ reg_def F5_L ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(4) ); ++ reg_def F5_M ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(5) ); ++ reg_def F5_N ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(6) ); ++ reg_def F5_O ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(7) ); ++ ++ reg_def F6 ( SOC, SOC, Op_RegF, 6, F6->as_VMReg() ); ++ reg_def F6_H ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next() ); ++ reg_def F6_J ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(2) ); ++ reg_def F6_K ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(3) ); ++ reg_def F6_L ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(4) ); ++ reg_def F6_M ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(5) ); ++ reg_def F6_N ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(6) ); ++ reg_def F6_O ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(7) ); ++ ++ reg_def F7 ( SOC, SOC, Op_RegF, 7, F7->as_VMReg() ); ++ reg_def F7_H ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next() ); ++ reg_def F7_J ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(2) ); ++ reg_def F7_K ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(3) ); ++ reg_def F7_L ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(4) ); ++ reg_def F7_M ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(5) ); ++ reg_def F7_N ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(6) ); ++ reg_def F7_O ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(7) ); ++ ++ reg_def F8 ( SOC, SOC, Op_RegF, 8, F8->as_VMReg() ); ++ reg_def F8_H ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next() ); ++ reg_def F8_J ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(2) ); ++ reg_def F8_K ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(3) ); ++ reg_def F8_L ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(4) ); ++ reg_def F8_M ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(5) ); ++ reg_def F8_N ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(6) ); ++ reg_def F8_O ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(7) ); ++ ++ reg_def F9 ( SOC, SOC, Op_RegF, 9, F9->as_VMReg() ); ++ reg_def F9_H ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next() ); ++ reg_def F9_J ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(2) ); ++ reg_def F9_K ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(3) ); ++ reg_def F9_L ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(4) ); ++ reg_def F9_M ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(5) ); ++ reg_def F9_N ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(6) ); ++ reg_def F9_O ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(7) ); ++ ++ reg_def F10 ( SOC, SOC, Op_RegF, 10, F10->as_VMReg() ); ++ reg_def F10_H ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next() ); ++ reg_def F10_J ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(2) ); ++ reg_def F10_K ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(3) ); ++ reg_def F10_L ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(4) ); ++ reg_def F10_M ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(5) ); ++ reg_def F10_N ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(6) ); ++ reg_def F10_O ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(7) ); ++ ++ reg_def F11 ( SOC, SOC, Op_RegF, 11, F11->as_VMReg() ); ++ reg_def F11_H ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next() ); ++ reg_def F11_J ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(2) ); ++ reg_def F11_K ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(3) ); ++ reg_def F11_L ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(4) ); ++ reg_def F11_M ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(5) ); ++ reg_def F11_N ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(6) ); ++ reg_def F11_O ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(7) ); ++ ++ reg_def F12 ( SOC, SOC, Op_RegF, 12, F12->as_VMReg() ); ++ reg_def F12_H ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next() ); ++ reg_def F12_J ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(2) ); ++ reg_def F12_K ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(3) ); ++ reg_def F12_L ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(4) ); ++ reg_def F12_M ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(5) ); ++ reg_def F12_N ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(6) ); ++ reg_def F12_O ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(7) ); ++ ++ reg_def F13 ( SOC, SOC, Op_RegF, 13, F13->as_VMReg() ); ++ reg_def F13_H ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next() ); ++ reg_def F13_J ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(2) ); ++ reg_def F13_K ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(3) ); ++ reg_def F13_L ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(4) ); ++ reg_def F13_M ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(5) ); ++ reg_def F13_N ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(6) ); ++ reg_def F13_O ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(7) ); ++ ++ reg_def F14 ( SOC, SOC, Op_RegF, 14, F14->as_VMReg() ); ++ reg_def F14_H ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next() ); ++ reg_def F14_J ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(2) ); ++ reg_def F14_K ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(3) ); ++ reg_def F14_L ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(4) ); ++ reg_def F14_M ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(5) ); ++ reg_def F14_N ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(6) ); ++ reg_def F14_O ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(7) ); ++ ++ reg_def F15 ( SOC, SOC, Op_RegF, 15, F15->as_VMReg() ); ++ reg_def F15_H ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next() ); ++ reg_def F15_J ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(2) ); ++ reg_def F15_K ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(3) ); ++ reg_def F15_L ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(4) ); ++ reg_def F15_M ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(5) ); ++ reg_def F15_N ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(6) ); ++ reg_def F15_O ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(7) ); ++ ++ reg_def F16 ( SOC, SOC, Op_RegF, 16, F16->as_VMReg() ); ++ reg_def F16_H ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next() ); ++ reg_def F16_J ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(2) ); ++ reg_def F16_K ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(3) ); ++ reg_def F16_L ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(4) ); ++ reg_def F16_M ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(5) ); ++ reg_def F16_N ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(6) ); ++ reg_def F16_O ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(7) ); ++ ++ reg_def F17 ( SOC, SOC, Op_RegF, 17, F17->as_VMReg() ); ++ reg_def F17_H ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next() ); ++ reg_def F17_J ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(2) ); ++ reg_def F17_K ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(3) ); ++ reg_def F17_L ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(4) ); ++ reg_def F17_M ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(5) ); ++ reg_def F17_N ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(6) ); ++ reg_def F17_O ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(7) ); ++ ++ reg_def F18 ( SOC, SOC, Op_RegF, 18, F18->as_VMReg() ); ++ reg_def F18_H ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next() ); ++ reg_def F18_J ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(2) ); ++ reg_def F18_K ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(3) ); ++ reg_def F18_L ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(4) ); ++ reg_def F18_M ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(5) ); ++ reg_def F18_N ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(6) ); ++ reg_def F18_O ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(7) ); ++ ++ reg_def F19 ( SOC, SOC, Op_RegF, 19, F19->as_VMReg() ); ++ reg_def F19_H ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next() ); ++ reg_def F19_J ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(2) ); ++ reg_def F19_K ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(3) ); ++ reg_def F19_L ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(4) ); ++ reg_def F19_M ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(5) ); ++ reg_def F19_N ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(6) ); ++ reg_def F19_O ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(7) ); ++ ++ reg_def F20 ( SOC, SOC, Op_RegF, 20, F20->as_VMReg() ); ++ reg_def F20_H ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next() ); ++ reg_def F20_J ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(2) ); ++ reg_def F20_K ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(3) ); ++ reg_def F20_L ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(4) ); ++ reg_def F20_M ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(5) ); ++ reg_def F20_N ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(6) ); ++ reg_def F20_O ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(7) ); ++ ++ reg_def F21 ( SOC, SOC, Op_RegF, 21, F21->as_VMReg() ); ++ reg_def F21_H ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next() ); ++ reg_def F21_J ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(2) ); ++ reg_def F21_K ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(3) ); ++ reg_def F21_L ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(4) ); ++ reg_def F21_M ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(5) ); ++ reg_def F21_N ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(6) ); ++ reg_def F21_O ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(7) ); ++ ++ reg_def F22 ( SOC, SOC, Op_RegF, 22, F22->as_VMReg() ); ++ reg_def F22_H ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next() ); ++ reg_def F22_J ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(2) ); ++ reg_def F22_K ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(3) ); ++ reg_def F22_L ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(4) ); ++ reg_def F22_M ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(5) ); ++ reg_def F22_N ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(6) ); ++ reg_def F22_O ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(7) ); ++ ++ reg_def F23 ( SOC, SOC, Op_RegF, 23, F23->as_VMReg() ); ++ reg_def F23_H ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next() ); ++ reg_def F23_J ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(2) ); ++ reg_def F23_K ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(3) ); ++ reg_def F23_L ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(4) ); ++ reg_def F23_M ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(5) ); ++ reg_def F23_N ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(6) ); ++ reg_def F23_O ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(7) ); ++ ++ reg_def F24 ( SOC, SOE, Op_RegF, 24, F24->as_VMReg() ); ++ reg_def F24_H ( SOC, SOE, Op_RegF, 24, F24->as_VMReg()->next() ); ++ reg_def F24_J ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(2) ); ++ reg_def F24_K ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(3) ); ++ reg_def F24_L ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(4) ); ++ reg_def F24_M ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(5) ); ++ reg_def F24_N ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(6) ); ++ reg_def F24_O ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(7) ); ++ ++ reg_def F25 ( SOC, SOE, Op_RegF, 25, F25->as_VMReg() ); ++ reg_def F25_H ( SOC, SOE, Op_RegF, 25, F25->as_VMReg()->next() ); ++ reg_def F25_J ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(2) ); ++ reg_def F25_K ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(3) ); ++ reg_def F25_L ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(4) ); ++ reg_def F25_M ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(5) ); ++ reg_def F25_N ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(6) ); ++ reg_def F25_O ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(7) ); ++ ++ reg_def F26 ( SOC, SOE, Op_RegF, 26, F26->as_VMReg() ); ++ reg_def F26_H ( SOC, SOE, Op_RegF, 26, F26->as_VMReg()->next() ); ++ reg_def F26_J ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(2) ); ++ reg_def F26_K ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(3) ); ++ reg_def F26_L ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(4) ); ++ reg_def F26_M ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(5) ); ++ reg_def F26_N ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(6) ); ++ reg_def F26_O ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(7) ); ++ ++ reg_def F27 ( SOC, SOE, Op_RegF, 27, F27->as_VMReg() ); ++ reg_def F27_H ( SOC, SOE, Op_RegF, 27, F27->as_VMReg()->next() ); ++ reg_def F27_J ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(2) ); ++ reg_def F27_K ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(3) ); ++ reg_def F27_L ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(4) ); ++ reg_def F27_M ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(5) ); ++ reg_def F27_N ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(6) ); ++ reg_def F27_O ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(7) ); ++ ++ reg_def F28 ( SOC, SOE, Op_RegF, 28, F28->as_VMReg() ); ++ reg_def F28_H ( SOC, SOE, Op_RegF, 28, F28->as_VMReg()->next() ); ++ reg_def F28_J ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(2) ); ++ reg_def F28_K ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(3) ); ++ reg_def F28_L ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(4) ); ++ reg_def F28_M ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(5) ); ++ reg_def F28_N ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(6) ); ++ reg_def F28_O ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(7) ); ++ ++ reg_def F29 ( SOC, SOE, Op_RegF, 29, F29->as_VMReg() ); ++ reg_def F29_H ( SOC, SOE, Op_RegF, 29, F29->as_VMReg()->next() ); ++ reg_def F29_J ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(2) ); ++ reg_def F29_K ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(3) ); ++ reg_def F29_L ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(4) ); ++ reg_def F29_M ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(5) ); ++ reg_def F29_N ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(6) ); ++ reg_def F29_O ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(7) ); ++ ++ reg_def F30 ( SOC, SOE, Op_RegF, 30, F30->as_VMReg() ); ++ reg_def F30_H ( SOC, SOE, Op_RegF, 30, F30->as_VMReg()->next() ); ++ reg_def F30_J ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(2) ); ++ reg_def F30_K ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(3) ); ++ reg_def F30_L ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(4) ); ++ reg_def F30_M ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(5) ); ++ reg_def F30_N ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(6) ); ++ reg_def F30_O ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(7) ); ++ ++ reg_def F31 ( SOC, SOE, Op_RegF, 31, F31->as_VMReg() ); ++ reg_def F31_H ( SOC, SOE, Op_RegF, 31, F31->as_VMReg()->next() ); ++ reg_def F31_J ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(2) ); ++ reg_def F31_K ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(3) ); ++ reg_def F31_L ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(4) ); ++ reg_def F31_M ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(5) ); ++ reg_def F31_N ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(6) ); ++ reg_def F31_O ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(7) ); ++ ++ ++// ---------------------------- ++// Special Registers ++//S6 is used for get_thread(S6) ++//S5 is uesd for heapbase of compressed oop ++alloc_class chunk0( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S5, S5_H, ++ S6, S6_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T8, T8_H, ++ T4, T4_H, ++ T1, T1_H, // inline_cache_reg ++ T6, T6_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ T5, T5_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H, ++ S8, S8_H ++ RA, RA_H, ++ SP, SP_H, // stack_pointer ++ FP, FP_H, // frame_pointer ++ ++ // non-allocatable registers ++ T7, T7_H, ++ TP, TP_H, ++ RX, RX_H, ++ R0, R0_H, ++ ); ++ ++// F23 is scratch reg ++alloc_class chunk1( F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O, ++ F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O, ++ F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O, ++ F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O, ++ F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O, ++ F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O, ++ F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O, ++ F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O, ++ F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O, ++ F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O, ++ F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O, ++ F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O, ++ F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O, ++ F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O, ++ F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O, ++ F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O, ++ F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O, ++ F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O, ++ F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O, ++ F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O, ++ F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O, ++ F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O, ++ F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O, ++ F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O, ++ F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O, ++ F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O, ++ F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O, ++ F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O, ++ F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O, ++ F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O, ++ F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O, ++ ++ // non-allocatable registers ++ F23, F23_H, F23_J, F23_K, F23_L, F23_M, F23_N, F23_O, ++ ); ++ ++reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 ); ++reg_class s0_reg( S0 ); ++reg_class s1_reg( S1 ); ++reg_class s2_reg( S2 ); ++reg_class s3_reg( S3 ); ++reg_class s4_reg( S4 ); ++reg_class s5_reg( S5 ); ++reg_class s6_reg( S6 ); ++reg_class s7_reg( S7 ); ++ ++reg_class t_reg( T0, T1, T2, T3, T8, T4 ); ++reg_class t0_reg( T0 ); ++reg_class t1_reg( T1 ); ++reg_class t2_reg( T2 ); ++reg_class t3_reg( T3 ); ++reg_class t8_reg( T8 ); ++reg_class t4_reg( T4 ); ++ ++reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 ); ++reg_class a0_reg( A0 ); ++reg_class a1_reg( A1 ); ++reg_class a2_reg( A2 ); ++reg_class a3_reg( A3 ); ++reg_class a4_reg( A4 ); ++reg_class a5_reg( A5 ); ++reg_class a6_reg( A6 ); ++reg_class a7_reg( A7 ); ++ ++// TODO: LA ++//reg_class v0_reg( A0 ); ++//reg_class v1_reg( A1 ); ++ ++reg_class sp_reg( SP, SP_H ); ++reg_class fp_reg( FP, FP_H ); ++ ++reg_class v0_long_reg( A0, A0_H ); ++reg_class v1_long_reg( A1, A1_H ); ++reg_class a0_long_reg( A0, A0_H ); ++reg_class a1_long_reg( A1, A1_H ); ++reg_class a2_long_reg( A2, A2_H ); ++reg_class a3_long_reg( A3, A3_H ); ++reg_class a4_long_reg( A4, A4_H ); ++reg_class a5_long_reg( A5, A5_H ); ++reg_class a6_long_reg( A6, A6_H ); ++reg_class a7_long_reg( A7, A7_H ); ++reg_class t0_long_reg( T0, T0_H ); ++reg_class t1_long_reg( T1, T1_H ); ++reg_class t2_long_reg( T2, T2_H ); ++reg_class t3_long_reg( T3, T3_H ); ++reg_class t8_long_reg( T8, T8_H ); ++reg_class t4_long_reg( T4, T4_H ); ++reg_class s0_long_reg( S0, S0_H ); ++reg_class s1_long_reg( S1, S1_H ); ++reg_class s2_long_reg( S2, S2_H ); ++reg_class s3_long_reg( S3, S3_H ); ++reg_class s4_long_reg( S4, S4_H ); ++reg_class s5_long_reg( S5, S5_H ); ++reg_class s6_long_reg( S6, S6_H ); ++reg_class s7_long_reg( S7, S7_H ); ++ ++reg_class all_reg32( ++ S8, ++ S7, ++ S5, /* S5_heapbase */ ++ /* S6, S6 TREG */ ++ S4, ++ S3, ++ S2, ++ S1, ++ S0, ++ T8, ++ /* T7, AT */ ++ T6, ++ T5, ++ /* T4, jarl T4 */ ++ T3, ++ T2, ++ T1, ++ T0, ++ A7, ++ A6, ++ A5, ++ A4, ++ A3, ++ A2, ++ A1, ++ A0, ++ FP ); ++ ++reg_class int_reg %{ ++ return _ANY_REG32_mask; ++%} ++ ++reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, T5, T0 ); ++ ++reg_class p_reg %{ ++ return _PTR_REG_mask; ++%} ++ ++reg_class no_T8_p_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++reg_class no_Ax_p_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ T0, T0_H ++ ); ++ ++reg_class all_reg( ++ S8, S8_H, ++ S7, S7_H, ++ /* S6, S6_H, S6 TREG */ ++ S5, S5_H, /* S5_heapbase */ ++ S4, S4_H, ++ S3, S3_H, ++ S2, S2_H, ++ S1, S1_H, ++ S0, S0_H, ++ T8, T8_H, ++ /* T7, T7_H, AT */ ++ T6, T6_H, ++ T5, T5_H, ++ /* T4, T4_H, jalr T4 */ ++ T3, T3_H, ++ T2, T2_H, ++ T1, T1_H, ++ T0, T0_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ FP, FP_H ++ ); ++ ++ ++reg_class long_reg %{ ++ return _ANY_REG_mask; ++%} ++ ++// Floating point registers. ++// F31 are not used as temporary registers in D2I ++reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F24, F25, F26, F27, F28, F29, F30, F31); ++reg_class dbl_reg( F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F8, F8_H, ++ F9, F9_H, ++ F10, F10_H, ++ F11, F11_H, ++ F12, F12_H, ++ F13, F13_H, ++ F14, F14_H, ++ F15, F15_H, ++ F16, F16_H, ++ F17, F17_H, ++ F18, F18_H, ++ F19, F19_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++ F28, F28_H, ++ F29, F29_H, ++ F30, F30_H, ++ F31, F31_H); ++ ++// Class for all 128bit vector registers ++reg_class vectorx_reg( F0, F0_H, F0_J, F0_K, ++ F1, F1_H, F1_J, F1_K, ++ F2, F2_H, F2_J, F2_K, ++ F3, F3_H, F3_J, F3_K, ++ F4, F4_H, F4_J, F4_K, ++ F5, F5_H, F5_J, F5_K, ++ F6, F6_H, F6_J, F6_K, ++ F7, F7_H, F7_J, F7_K, ++ F8, F8_H, F8_J, F8_K, ++ F9, F9_H, F9_J, F9_K, ++ F10, F10_H, F10_J, F10_K, ++ F11, F11_H, F11_J, F11_K, ++ F12, F12_H, F12_J, F12_K, ++ F13, F13_H, F13_J, F13_K, ++ F14, F14_H, F14_J, F14_K, ++ F15, F15_H, F15_J, F15_K, ++ F16, F16_H, F16_J, F16_K, ++ F17, F17_H, F17_J, F17_K, ++ F18, F18_H, F18_J, F18_K, ++ F19, F19_H, F19_J, F19_K, ++ F20, F20_H, F20_J, F20_K, ++ F21, F21_H, F21_J, F21_K, ++ F22, F22_H, F22_J, F22_K, ++ F24, F24_H, F24_J, F24_K, ++ F25, F25_H, F25_J, F25_K, ++ F26, F26_H, F26_J, F26_K, ++ F27, F27_H, F27_J, F27_K, ++ F28, F28_H, F28_J, F28_K, ++ F29, F29_H, F29_J, F29_K, ++ F30, F30_H, F30_J, F30_K, ++ F31, F31_H, F31_J, F31_K); ++ ++// Class for all 256bit vector registers ++reg_class vectory_reg( F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O, ++ F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O, ++ F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O, ++ F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O, ++ F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O, ++ F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O, ++ F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O, ++ F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O, ++ F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O, ++ F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O, ++ F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O, ++ F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O, ++ F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O, ++ F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O, ++ F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O, ++ F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O, ++ F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O, ++ F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O, ++ F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O, ++ F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O, ++ F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O, ++ F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O, ++ F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O, ++ F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O, ++ F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O, ++ F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O, ++ F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O, ++ F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O, ++ F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O, ++ F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O, ++ F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O); ++ ++// TODO: LA ++//reg_class flt_arg0( F0 ); ++//reg_class dbl_arg0( F0, F0_H ); ++//reg_class dbl_arg1( F1, F1_H ); ++ ++%} ++ ++//----------DEFINITION BLOCK--------------------------------------------------- ++// Define name --> value mappings to inform the ADLC of an integer valued name ++// Current support includes integer values in the range [0, 0x7FFFFFFF] ++// Format: ++// int_def ( , ); ++// Generated Code in ad_.hpp ++// #define () ++// // value == ++// Generated code in ad_.cpp adlc_verification() ++// assert( == , "Expect () to equal "); ++// ++definitions %{ ++ int_def DEFAULT_COST ( 100, 100); ++ int_def HUGE_COST (1000000, 1000000); ++ ++ // Memory refs are twice as expensive as run-of-the-mill. ++ int_def MEMORY_REF_COST ( 200, DEFAULT_COST * 2); ++ ++ // Branches are even more expensive. ++ int_def BRANCH_COST ( 300, DEFAULT_COST * 3); ++ // we use jr instruction to construct call, so more expensive ++ int_def CALL_COST ( 500, DEFAULT_COST * 5); ++/* ++ int_def EQUAL ( 1, 1 ); ++ int_def NOT_EQUAL ( 2, 2 ); ++ int_def GREATER ( 3, 3 ); ++ int_def GREATER_EQUAL ( 4, 4 ); ++ int_def LESS ( 5, 5 ); ++ int_def LESS_EQUAL ( 6, 6 ); ++*/ ++%} ++ ++ ++ ++//----------SOURCE BLOCK------------------------------------------------------- ++// This is a block of C++ code which provides values, functions, and ++// definitions necessary in the rest of the architecture description ++ ++source_hpp %{ ++// Header information of the source block. ++// Method declarations/definitions which are used outside ++// the ad-scope can conveniently be defined here. ++// ++// To keep related declarations/definitions/uses close together, ++// we switch between source %{ }% and source_hpp %{ }% freely as needed. ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "opto/addnode.hpp" ++#include "opto/convertnode.hpp" ++#include "runtime/objectMonitor.hpp" ++ ++extern RegMask _ANY_REG32_mask; ++extern RegMask _ANY_REG_mask; ++extern RegMask _PTR_REG_mask; ++ ++class CallStubImpl { ++ ++ //-------------------------------------------------------------- ++ //---< Used for optimization in Compile::shorten_branches >--- ++ //-------------------------------------------------------------- ++ ++ public: ++ // Size of call trampoline stub. ++ static uint size_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++ ++ // number of relocations needed by a call trampoline stub ++ static uint reloc_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++}; ++ ++class HandlerImpl { ++ ++ public: ++ ++ static int emit_exception_handler(CodeBuffer &cbuf); ++ static int emit_deopt_handler(CodeBuffer& cbuf); ++ ++ static uint size_exception_handler() { ++ // NativeCall instruction size is the same as NativeJump. ++ // exception handler starts out as jump and can be patched to ++ // a call be deoptimization. (4932387) ++ // Note that this value is also credited (in output.cpp) to ++ // the size of the code section. ++ int size = NativeFarCall::instruction_size; ++ const uintx m = 16 - 1; ++ return mask_bits(size + m, ~m); ++ //return round_to(size, 16); ++ } ++ ++ static uint size_deopt_handler() { ++ int size = NativeFarCall::instruction_size; ++ const uintx m = 16 - 1; ++ return mask_bits(size + m, ~m); ++ //return round_to(size, 16); ++ } ++}; ++ ++inline uint vector_length(const Node* n) { ++ const TypeVect* vt = n->bottom_type()->is_vect(); ++ return vt->length(); ++} ++ ++inline uint vector_length(const MachNode* use, MachOper* opnd) { ++ uint def_idx = use->operand_index(opnd); ++ Node* def = use->in(def_idx); ++ return def->bottom_type()->is_vect()->length(); ++} ++ ++inline uint vector_length_in_bytes(const Node* n) { ++ const TypeVect* vt = n->bottom_type()->is_vect(); ++ return vt->length_in_bytes(); ++} ++ ++inline uint vector_length_in_bytes(const MachNode* use, MachOper* opnd) { ++ uint def_idx = use->operand_index(opnd); ++ Node* def = use->in(def_idx); ++ return def->bottom_type()->is_vect()->length_in_bytes(); ++} ++ ++inline BasicType vector_element_basic_type(const Node *n) { ++ return n->bottom_type()->is_vect()->element_basic_type(); ++} ++ ++inline BasicType vector_element_basic_type(const MachNode *use, MachOper* opnd) { ++ uint def_idx = use->operand_index(opnd); ++ Node* def = use->in(def_idx); ++ return def->bottom_type()->is_vect()->element_basic_type(); ++} ++ ++class Node::PD { ++public: ++ enum NodeFlags { ++ _last_flag = Node::_last_flag ++ }; ++}; ++ ++bool is_CAS(int opcode); ++bool use_AMO(int opcode); ++ ++bool unnecessary_acquire(const Node *barrier); ++bool unnecessary_release(const Node *barrier); ++bool unnecessary_volatile(const Node *barrier); ++bool needs_releasing_store(const Node *store); ++ ++%} // end source_hpp ++ ++source %{ ++ ++#define NO_INDEX 0 ++#define RELOC_IMM64 Assembler::imm_operand ++#define RELOC_DISP32 Assembler::disp32_operand ++ ++#define V0_num A0_num ++#define V0_H_num A0_H_num ++ ++#define __ _masm. ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++RegMask _ANY_REG32_mask; ++RegMask _ANY_REG_mask; ++RegMask _PTR_REG_mask; ++ ++void reg_mask_init() { ++ _ANY_REG32_mask = _ALL_REG32_mask; ++ _ANY_REG_mask = _ALL_REG_mask; ++ _PTR_REG_mask = _ALL_REG_mask; ++ ++ if (UseCompressedOops && (CompressedOops::ptrs_base() != NULL)) { ++ _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r28->as_VMReg())); ++ _ANY_REG_mask.SUBTRACT(_S5_LONG_REG_mask); ++ _PTR_REG_mask.SUBTRACT(_S5_LONG_REG_mask); ++ } ++ ++ // FP(r22) is not allocatable when PreserveFramePointer is on ++ if (PreserveFramePointer) { ++ _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r22->as_VMReg())); ++ _ANY_REG_mask.SUBTRACT(_FP_REG_mask); ++ _PTR_REG_mask.SUBTRACT(_FP_REG_mask); ++ } ++} ++ ++void PhaseOutput::pd_perform_mach_node_analysis() { ++} ++ ++int MachNode::pd_alignment_required() const { ++ return 1; ++} ++ ++int MachNode::compute_padding(int current_offset) const { ++ return 0; ++} ++ ++// Emit exception handler code. ++// Stuff framesize into a register and call a VM stub routine. ++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ C2_MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_exception_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ ++ int offset = __ offset(); ++ ++ __ block_comment("; emit_exception_handler"); ++ ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point()); ++ assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} ++ ++// Emit deopt handler code. ++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ C2_MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_deopt_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ ++ int offset = __ offset(); ++ ++ __ block_comment("; emit_deopt_handler"); ++ ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_call(SharedRuntime::deopt_blob()->unpack()); ++ assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} ++ ++ ++const bool Matcher::match_rule_supported(int opcode) { ++ if (!has_match_rule(opcode)) ++ return false; ++ ++ switch (opcode) { ++ case Op_RoundDoubleMode: ++ if (!UseLSX) ++ return false; ++ case Op_PopCountI: ++ case Op_PopCountL: ++ return UsePopCountInstruction; ++ default: ++ break; ++ } ++ ++ return true; // Per default match rules are supported. ++} ++ ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { ++ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) ++ return false; ++ ++ switch (opcode) { ++ case Op_RotateRightV: ++ case Op_RotateLeftV: ++ if (bt != T_INT && bt != T_LONG) { ++ return false; ++ } ++ break; ++ case Op_MaxReductionV: ++ case Op_MinReductionV: ++ if (bt == T_FLOAT || bt == T_DOUBLE) { ++ return false; ++ } ++ break; ++ case Op_VectorCastB2X: ++ case Op_VectorLoadMask: ++ case Op_VectorStoreMask: ++ case Op_VectorLoadShuffle: ++ case Op_VectorRearrange: ++ if (vlen < 16) ++ return false; ++ break; ++ case Op_VectorCastS2X: ++ if (vlen < 8) ++ return false; ++ break; ++ case Op_VectorCastI2X: ++ case Op_VectorCastF2X: ++ if (vlen < 4) ++ return false; ++ break; ++ default: ++ break; ++ } ++ ++ return true; ++} ++ ++// Vector calling convention not yet implemented. ++const bool Matcher::supports_vector_calling_convention(void) { ++ return false; ++} ++ ++OptoRegPair Matcher::vector_return_value(uint ideal_reg) { ++ Unimplemented(); ++ return OptoRegPair(0, 0); ++} ++ ++const int Matcher::float_pressure(int default_pressure_threshold) { ++ Unimplemented(); ++ return default_pressure_threshold; ++} ++ ++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { ++ const int safety_zone = 3 * BytesPerInstWord; ++ int offs = offset - br_size + 4; ++ // To be conservative on LoongArch ++ // branch node should be end with: ++ // branch inst ++ offs = (offs < 0 ? offs - safety_zone : offs + safety_zone) >> 2; ++ switch (rule) { ++ case jmpDir_long_rule: ++ case jmpDir_short_rule: ++ return Assembler::is_simm(offs, 26); ++ case jmpCon_flags_long_rule: ++ case jmpCon_flags_short_rule: ++ case branchConP_0_long_rule: ++ case branchConP_0_short_rule: ++ case branchConN2P_0_long_rule: ++ case branchConN2P_0_short_rule: ++ case cmpN_null_branch_long_rule: ++ case cmpN_null_branch_short_rule: ++ case branchConF_reg_reg_long_rule: ++ case branchConF_reg_reg_short_rule: ++ case branchConD_reg_reg_long_rule: ++ case branchConD_reg_reg_short_rule: ++ return Assembler::is_simm(offs, 21); ++ default: ++ return Assembler::is_simm(offs, 16); ++ } ++ return false; ++} ++ ++MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { ++ ShouldNotReachHere(); // generic vector operands not supported ++ return NULL; ++} ++ ++bool Matcher::is_generic_reg2reg_move(MachNode* m) { ++ ShouldNotReachHere(); // generic vector operands not supported ++ return false; ++} ++ ++bool Matcher::is_generic_vector(MachOper* opnd) { ++ ShouldNotReachHere(); // generic vector operands not supported ++ return false; ++} ++ ++const RegMask* Matcher::predicate_reg_mask(void) { ++ return NULL; ++} ++ ++const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) { ++ return NULL; ++} ++ ++const int Matcher::scalable_vector_reg_size(const BasicType bt) { ++ return -1; ++} ++ ++// Vector ideal reg ++const uint Matcher::vector_ideal_reg(int size) { ++ assert(MaxVectorSize == 16 || MaxVectorSize == 32, ""); ++ switch(size) { ++ case 16: return Op_VecX; ++ case 32: return Op_VecY; ++ } ++ ShouldNotReachHere(); ++ return 0; ++} ++ ++// Should the matcher clone input 'm' of node 'n'? ++bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { ++ return false; ++} ++ ++// Should the Matcher clone shifts on addressing modes, expecting them ++// to be subsumed into complex addressing expressions or compute them ++// into registers? ++bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { ++ return clone_base_plus_offset_address(m, mstack, address_visited); ++} ++ ++// Max vector size in bytes. 0 if not supported. ++const int Matcher::vector_width_in_bytes(BasicType bt) { ++ return (int)MaxVectorSize; ++} ++ ++// Limits on vector size (number of elements) loaded into vector. ++const int Matcher::max_vector_size(const BasicType bt) { ++ assert(is_java_primitive(bt), "only primitive type vectors"); ++ return vector_width_in_bytes(bt)/type2aelembytes(bt); ++} ++ ++const int Matcher::min_vector_size(const BasicType bt) { ++ int max_size = max_vector_size(bt); ++ int size = 0; ++ ++ if (UseLSX) size = 16; ++ size = size / type2aelembytes(bt); ++ return MIN2(size,max_size); ++} ++ ++// Register for DIVI projection of divmodI ++RegMask Matcher::divI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for MODI projection of divmodI ++RegMask Matcher::modI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for DIVL projection of divmodL ++RegMask Matcher::divL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Return whether or not this register is ever used as an argument. This ++// function is used on startup to build the trampoline stubs in generateOptoStub. ++// Registers not mentioned will be killed by the VM call in the trampoline, and ++// arguments in those registers not be available to the callee. ++bool Matcher::can_be_java_arg( int reg ) { ++ // Refer to: [sharedRuntime_loongarch_64.cpp] SharedRuntime::java_calling_convention() ++ if ( reg == T0_num || reg == T0_H_num ++ || reg == A0_num || reg == A0_H_num ++ || reg == A1_num || reg == A1_H_num ++ || reg == A2_num || reg == A2_H_num ++ || reg == A3_num || reg == A3_H_num ++ || reg == A4_num || reg == A4_H_num ++ || reg == A5_num || reg == A5_H_num ++ || reg == A6_num || reg == A6_H_num ++ || reg == A7_num || reg == A7_H_num ) ++ return true; ++ ++ if ( reg == F0_num || reg == F0_H_num ++ || reg == F1_num || reg == F1_H_num ++ || reg == F2_num || reg == F2_H_num ++ || reg == F3_num || reg == F3_H_num ++ || reg == F4_num || reg == F4_H_num ++ || reg == F5_num || reg == F5_H_num ++ || reg == F6_num || reg == F6_H_num ++ || reg == F7_num || reg == F7_H_num ) ++ return true; ++ ++ return false; ++} ++ ++bool Matcher::is_spillable_arg( int reg ) { ++ return can_be_java_arg(reg); ++} ++ ++bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { ++ return false; ++} ++ ++// Register for MODL projection of divmodL ++RegMask Matcher::modL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++const RegMask Matcher::method_handle_invoke_SP_save_mask() { ++ return FP_REG_mask(); ++} ++ ++int CallStaticJavaDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallLeafNoFPDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallLeafDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallRuntimeDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++#ifndef PRODUCT ++void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const { ++ st->print("BRK"); ++} ++#endif ++ ++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { ++ C2_MacroAssembler _masm(&cbuf); ++ __ brk(5); ++} ++ ++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { ++ return MachNode::size(ra_); ++} ++ ++ ++ ++// !!!!! Special hack to get all type of calls to specify the byte offset ++// from the start of the call to the point where the return address ++// will point. ++int MachCallStaticJavaNode::ret_addr_offset() { ++ // bl ++ return NativeCall::instruction_size; ++} ++ ++int MachCallDynamicJavaNode::ret_addr_offset() { ++ // lu12i_w IC_Klass, ++ // ori IC_Klass, ++ // lu32i_d IC_Klass ++ // lu52i_d IC_Klass ++ ++ // bl ++ return NativeMovConstReg::instruction_size + NativeCall::instruction_size; ++} ++ ++//============================================================================= ++ ++// Figure out which register class each belongs in: rc_int, rc_float, rc_stack ++enum RC { rc_bad, rc_int, rc_float, rc_stack }; ++static enum RC rc_class( OptoReg::Name reg ) { ++ if( !OptoReg::is_valid(reg) ) return rc_bad; ++ if (OptoReg::is_stack(reg)) return rc_stack; ++ VMReg r = OptoReg::as_VMReg(reg); ++ if (r->is_Register()) return rc_int; ++ assert(r->is_FloatRegister(), "must be"); ++ return rc_float; ++} ++ ++// Helper methods for MachSpillCopyNode::implementation(). ++static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, ++ int src_hi, int dst_hi, uint ireg, outputStream* st) { ++ int size = 0; ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ int offset = __ offset(); ++ switch (ireg) { ++ case Op_VecX: ++ __ vori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0); ++ break; ++ case Op_VecY: ++ __ xvori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++#ifndef PRODUCT ++ } else if (!do_size) { ++ switch (ireg) { ++ case Op_VecX: ++ st->print("vori.b %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); ++ break; ++ case Op_VecY: ++ st->print("xvori.b %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++#endif ++ } ++ size += 4; ++ return size; ++} ++ ++static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, ++ int stack_offset, int reg, uint ireg, outputStream* st) { ++ int size = 0; ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ int offset = __ offset(); ++ if (is_load) { ++ switch (ireg) { ++ case Op_VecX: ++ __ vld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); ++ break; ++ case Op_VecY: ++ __ xvld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { // store ++ switch (ireg) { ++ case Op_VecX: ++ __ vst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); ++ break; ++ case Op_VecY: ++ __ xvst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++#ifndef PRODUCT ++ } else if (!do_size) { ++ if (is_load) { ++ switch (ireg) { ++ case Op_VecX: ++ st->print("vld %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); ++ break; ++ case Op_VecY: ++ st->print("xvld %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { // store ++ switch (ireg) { ++ case Op_VecX: ++ st->print("vst %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); ++ break; ++ case Op_VecY: ++ st->print("xvst %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++#endif ++ } ++ size += 4; ++ return size; ++} ++ ++static int vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, ++ int dst_offset, uint ireg, outputStream* st) { ++ int size = 0; ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ switch (ireg) { ++ case Op_VecX: ++ __ vld(F23, SP, src_offset); ++ __ vst(F23, SP, dst_offset); ++ break; ++ case Op_VecY: ++ __ xvld(F23, SP, src_offset); ++ __ xvst(F23, SP, dst_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++#ifndef PRODUCT ++ } else { ++ switch (ireg) { ++ case Op_VecX: ++ st->print("vld f23, %d(sp)\n\t" ++ "vst f23, %d(sp)\t# 128-bit mem-mem spill", ++ src_offset, dst_offset); ++ break; ++ case Op_VecY: ++ st->print("xvld f23, %d(sp)\n\t" ++ "xvst f23, %d(sp)\t# 256-bit mem-mem spill", ++ src_offset, dst_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++#endif ++ } ++ size += 8; ++ return size; ++} ++ ++uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { ++ // Get registers to move ++ OptoReg::Name src_second = ra_->get_reg_second(in(1)); ++ OptoReg::Name src_first = ra_->get_reg_first(in(1)); ++ OptoReg::Name dst_second = ra_->get_reg_second(this ); ++ OptoReg::Name dst_first = ra_->get_reg_first(this ); ++ ++ enum RC src_second_rc = rc_class(src_second); ++ enum RC src_first_rc = rc_class(src_first); ++ enum RC dst_second_rc = rc_class(dst_second); ++ enum RC dst_first_rc = rc_class(dst_first); ++ ++ assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); ++ ++ // Generate spill code! ++ ++ if( src_first == dst_first && src_second == dst_second ) ++ return 0; // Self copy, no move ++ ++ if (bottom_type()->isa_vect() != NULL) { ++ uint ireg = ideal_reg(); ++ assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); ++ if (src_first_rc == rc_stack && dst_first_rc == rc_stack) { ++ // mem -> mem ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); ++ } else if (src_first_rc == rc_float && dst_first_rc == rc_float) { ++ vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); ++ } else if (src_first_rc == rc_float && dst_first_rc == rc_stack) { ++ int stack_offset = ra_->reg2offset(dst_first); ++ vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); ++ } else if (src_first_rc == rc_stack && dst_first_rc == rc_float) { ++ int stack_offset = ra_->reg2offset(src_first); ++ vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); ++ } else { ++ ShouldNotReachHere(); ++ } ++ return 0; ++ } ++ ++ if (src_first_rc == rc_stack) { ++ // mem -> ++ if (dst_first_rc == rc_stack) { ++ // mem -> mem ++ assert(src_second != dst_first, "overlap"); ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ ld_d(AT, Address(SP, src_offset)); ++ __ st_d(AT, Address(SP, dst_offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tld_d AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t" ++ "st_d AT, [SP + #%d]", ++ src_offset, dst_offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ // No pushl/popl, so: ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ ld_w(AT, Address(SP, src_offset)); ++ __ st_w(AT, Address(SP, dst_offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tld_w AT, [SP + #%d] spill 2\n\t" ++ "st_w AT, [SP + #%d]\n\t", ++ src_offset, dst_offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // mem -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ ld_d(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tld_d %s, [SP + #%d]\t# spill 3", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ if (this->ideal_reg() == Op_RegI) ++ __ ld_w(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++ else { ++ if (Assembler::is_simm(offset, 12)) { ++ __ ld_wu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++ } else { ++ __ li(AT, offset); ++ __ ldx_wu(as_Register(Matcher::_regEncode[dst_first]), SP, AT); ++ } ++ } ++#ifndef PRODUCT ++ } else { ++ if (this->ideal_reg() == Op_RegI) ++ st->print("\tld_w %s, [SP + #%d]\t# spill 4", ++ Matcher::regName[dst_first], ++ offset); ++ else ++ st->print("\tld_wu %s, [SP + #%d]\t# spill 5", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_float) { ++ // mem-> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ if (Assembler::is_simm(offset, 12)) { ++ __ fld_d( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++ } else { ++ __ li(AT, offset); ++ __ fldx_d( as_FloatRegister(Matcher::_regEncode[dst_first]), SP, AT); ++ } ++#ifndef PRODUCT ++ } else { ++ st->print("\tfld_d %s, [SP + #%d]\t# spill 6", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ fld_s( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tfld_s %s, [SP + #%d]\t# spill 7", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } ++ } ++ return 0; ++ } else if (src_first_rc == rc_int) { ++ // gpr -> ++ if (dst_first_rc == rc_stack) { ++ // gpr -> mem ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ st_d(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tst_d %s, [SP + #%d] # spill 8", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ st_w(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\tst_w %s, [SP + #%d]\t# spill 9", ++ Matcher::regName[src_first], offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // gpr -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ move(as_Register(Matcher::_regEncode[dst_first]), ++ as_Register(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\tmove(64bit) %s <-- %s\t# spill 10", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ return 0; ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ if (this->ideal_reg() == Op_RegI) ++ __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); ++ else ++ __ add_d(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("move(32-bit) %s <-- %s\t# spill 11", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ return 0; ++ } ++ } else if (dst_first_rc == rc_float) { ++ // gpr -> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ movgr2fr_d(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("movgr2fr_d %s, %s\t# spill 12", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ movgr2fr_w(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("movgr2fr_w %s, %s\t# spill 13", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } ++ } else if (src_first_rc == rc_float) { ++ // xmm -> ++ if (dst_first_rc == rc_stack) { ++ // xmm -> mem ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ if (Assembler::is_simm(offset, 12)) { ++ __ fst_d( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) ); ++ } ++ else { ++ __ li(AT, offset); ++ __ fstx_d( as_FloatRegister(Matcher::_regEncode[src_first]), SP, AT); ++ } ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("fst_d %s, [SP + #%d]\t# spill 14", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ fst_s(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("fst_s %s, [SP + #%d]\t# spill 15", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // xmm -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ movfr2gr_d( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("movfr2gr_d %s, %s\t# spill 16", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ movfr2gr_s( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("movfr2gr_s %s, %s\t# spill 17", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_float) { ++ // xmm -> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ fmov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("fmov_d %s <-- %s\t# spill 18", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ fmov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("fmov_s %s <-- %s\t# spill 19", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } ++ } ++ ++ assert(0," foo "); ++ Unimplemented(); ++ return 0; ++} ++ ++#ifndef PRODUCT ++void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ implementation( NULL, ra_, false, st ); ++} ++#endif ++ ++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ implementation( &cbuf, ra_, false, NULL ); ++} ++ ++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ Compile *C = ra_->C; ++ int framesize = C->output()->frame_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ st->print_cr("ld_d RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize); ++ st->print("\t"); ++ st->print_cr("ld_d FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2); ++ st->print("\t"); ++ if (Assembler::is_simm(framesize, 12)) { ++ st->print_cr("addi_d SP, SP, %d # Rlease stack @ MachEpilogNode", framesize); ++ } else { ++ st->print_cr("li AT, %d # Rlease stack @ MachEpilogNode", framesize); ++ st->print_cr("add_d SP, SP, AT # Rlease stack @ MachEpilogNode"); ++ } ++ if( do_polling() && C->is_method_compilation() ) { ++ st->print("\t"); ++ st->print_cr("ld_d AT, poll_offset[thread] #polling_word_offset\n\t" ++ "ld_w AT, [AT]\t" ++ "# Safepoint: poll for GC"); ++ } ++} ++#endif ++ ++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ Compile *C = ra_->C; ++ C2_MacroAssembler _masm(&cbuf); ++ int framesize = C->output()->frame_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ __ remove_frame(framesize); ++ ++ if (StackReservedPages > 0 && C->has_reserved_stack_access()) { ++ __ reserved_stack_check(); ++ } ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ if( do_polling() && C->is_method_compilation() ) { ++ Label dummy_label; ++ Label* code_stub = &dummy_label; ++ if (!C->output()->in_scratch_emit_size()) { ++ code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset()); ++ } ++ __ relocate(relocInfo::poll_return_type); ++ __ safepoint_poll(*code_stub, thread, true /* at_return */, false /* acquire */, true /* in_nmethod */); ++ } ++} ++ ++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); // too many variables; just compute it the hard way ++} ++ ++int MachEpilogNode::reloc() const { ++ return 0; // a large enough number ++} ++ ++const Pipeline * MachEpilogNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} ++ ++//============================================================================= ++ ++#ifndef PRODUCT ++void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_reg_first(this); ++ st->print("ADDI_D %s, SP, %d @BoxLockNode",Matcher::regName[reg],offset); ++} ++#endif ++ ++ ++uint BoxLockNode::size(PhaseRegAlloc *ra_) const { ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ ++ if (Assembler::is_simm(offset, 12)) ++ return 4; ++ else ++ return 3 * 4; ++} ++ ++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ C2_MacroAssembler _masm(&cbuf); ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_encode(this); ++ ++ if (Assembler::is_simm(offset, 12)) { ++ __ addi_d(as_Register(reg), SP, offset); ++ } else { ++ __ lu12i_w(AT, Assembler::split_low20(offset >> 12)); ++ __ ori(AT, AT, Assembler::split_low12(offset)); ++ __ add_d(as_Register(reg), SP, AT); ++ } ++} ++ ++int MachCallRuntimeNode::ret_addr_offset() { ++ // pcaddu18i ++ // jirl ++ return NativeFarCall::instruction_size; ++} ++ ++int MachCallNativeNode::ret_addr_offset() { ++ Unimplemented(); ++ return -1; ++} ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const { ++ st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count); ++} ++#endif ++ ++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { ++ C2_MacroAssembler _masm(&cbuf); ++ int i = 0; ++ for(i = 0; i < _count; i++) ++ __ nop(); ++} ++ ++uint MachNopNode::size(PhaseRegAlloc *) const { ++ return 4 * _count; ++} ++const Pipeline* MachNopNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} ++ ++//============================================================================= ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ st->print_cr("load_klass(T4, T0)"); ++ st->print_cr("\tbeq(T4, iCache, L)"); ++ st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)"); ++ st->print_cr(" L:"); ++} ++#endif ++ ++ ++void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ C2_MacroAssembler _masm(&cbuf); ++ int ic_reg = Matcher::inline_cache_reg_encode(); ++ Label L; ++ Register receiver = T0; ++ Register iCache = as_Register(ic_reg); ++ ++ __ load_klass(T4, receiver); ++ __ beq(T4, iCache, L); ++ __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); ++ __ bind(L); ++} ++ ++uint MachUEPNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++ ++ ++//============================================================================= ++ ++const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask(); ++ ++int ConstantTable::calculate_table_base_offset() const { ++ return 0; // absolute addressing, no offset ++} ++ ++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } ++void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { ++ ShouldNotReachHere(); ++} ++ ++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { ++ Compile* C = ra_->C; ++ ConstantTable& constant_table = C->output()->constant_table(); ++ C2_MacroAssembler _masm(&cbuf); ++ ++ Register Rtoc = as_Register(ra_->get_encode(this)); ++ CodeSection* consts_section = cbuf.consts(); ++ int consts_size = consts_section->align_at_start(consts_section->size()); ++ assert(constant_table.size() == consts_size, "must be equal"); ++ ++ if (consts_section->size()) { ++ assert((CodeBuffer::SECT_CONSTS + 1) == CodeBuffer::SECT_INSTS, ++ "insts must be immediately follow consts"); ++ // Materialize the constant table base. ++ address baseaddr = cbuf.insts()->start() - consts_size + -(constant_table.table_base_offset()); ++ jint offs = (baseaddr - __ pc()) >> 2; ++ guarantee(Assembler::is_simm(offs, 20), "Not signed 20-bit offset"); ++ __ pcaddi(Rtoc, offs); ++ } ++} ++ ++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { ++ // pcaddi ++ return 1 * BytesPerInstWord; ++} ++ ++#ifndef PRODUCT ++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { ++ Register r = as_Register(ra_->get_encode(this)); ++ st->print("pcaddi %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name()); ++} ++#endif ++ ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ Compile* C = ra_->C; ++ ++ int framesize = C->output()->frame_size_in_bytes(); ++ int bangsize = C->output()->bang_size_in_bytes(); ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ // Calls to C2R adapters often do not accept exceptional returns. ++ // We require that their callers must bang for them. But be careful, because ++ // some VM calls (such as call site linkage) can use several kilobytes of ++ // stack. But the stack safety zone should account for that. ++ // See bugs 4446381, 4468289, 4497237. ++ if (C->output()->need_stack_bang(bangsize)) { ++ st->print_cr("# stack bang"); st->print("\t"); ++ } ++ st->print("st_d RA, %d(SP) @ MachPrologNode\n\t", -wordSize); ++ st->print("st_d FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); ++ if (PreserveFramePointer) { ++ if (Assembler::is_simm((framesize - wordSize * 2), 12)) { ++ st->print("addi_d FP, SP, %d \n\t", framesize); ++ } else { ++ st->print("li AT, %d \n\t", framesize); ++ st->print("add_d FP, AT \n\t"); ++ } ++ } ++ st->print("addi_d SP, SP, -%d \t",framesize); ++ if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) { ++ st->print("\n\t"); ++ st->print("ld_d T1, guard, 0\n\t"); ++ st->print("membar LoadLoad\n\t"); ++ st->print("ld_d T2, TREG, thread_disarmed_offset\n\t"); ++ st->print("beq T1, T2, skip\n\t"); ++ st->print("\n\t"); ++ st->print("jalr #nmethod_entry_barrier_stub\n\t"); ++ st->print("b skip\n\t"); ++ st->print("guard: int\n\t"); ++ st->print("\n\t"); ++ st->print("skip:\n\t"); ++ } ++} ++#endif ++ ++ ++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ Compile* C = ra_->C; ++ C2_MacroAssembler _masm(&cbuf); ++ ++ int framesize = C->output()->frame_size_in_bytes(); ++ int bangsize = C->output()->bang_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++#ifdef ASSERT ++ address start = __ pc(); ++#endif ++ ++ if (C->clinit_barrier_on_entry()) { ++ assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started"); ++ ++ Label L_skip_barrier; ++ ++ __ mov_metadata(T4, C->method()->holder()->constant_encoding()); ++ __ clinit_barrier(T4, AT, &L_skip_barrier); ++ __ jmp((address)SharedRuntime::get_handle_wrong_method_stub(), relocInfo::runtime_call_type); ++ __ bind(L_skip_barrier); ++ } ++ ++ if (C->output()->need_stack_bang(bangsize)) { ++ __ generate_stack_overflow_check(bangsize); ++ } ++ ++ __ build_frame(framesize); ++ ++ assert((__ pc() - start) >= 1 * BytesPerInstWord, "No enough room for patch_verified_entry"); ++ ++ if (C->stub_function() == NULL) { ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->nmethod_entry_barrier(&_masm); ++ } ++ ++ C->output()->set_frame_complete(cbuf.insts_size()); ++ if (C->has_mach_constant_base_node()) { ++ // NOTE: We set the table base offset here because users might be ++ // emitted before MachConstantBaseNode. ++ ConstantTable& constant_table = C->output()->constant_table(); ++ constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); ++ } ++} ++ ++ ++uint MachPrologNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); // too many variables; just compute it the hard way ++} ++ ++int MachPrologNode::reloc() const { ++ return 0; // a large enough number ++} ++ ++bool is_CAS(int opcode) ++{ ++ switch(opcode) { ++ // We handle these ++ case Op_CompareAndSwapI: ++ case Op_CompareAndSwapL: ++ case Op_CompareAndSwapP: ++ case Op_CompareAndSwapN: ++ case Op_ShenandoahCompareAndSwapP: ++ case Op_ShenandoahCompareAndSwapN: ++ case Op_ShenandoahWeakCompareAndSwapP: ++ case Op_ShenandoahWeakCompareAndSwapN: ++ case Op_ShenandoahCompareAndExchangeP: ++ case Op_ShenandoahCompareAndExchangeN: ++ case Op_GetAndSetI: ++ case Op_GetAndSetL: ++ case Op_GetAndSetP: ++ case Op_GetAndSetN: ++ case Op_GetAndAddI: ++ case Op_GetAndAddL: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++bool use_AMO(int opcode) ++{ ++ switch(opcode) { ++ // We handle these ++ case Op_StoreI: ++ case Op_StoreL: ++ case Op_StoreP: ++ case Op_StoreN: ++ case Op_StoreNKlass: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++bool unnecessary_acquire(const Node *barrier) ++{ ++ assert(barrier->is_MemBar(), "expecting a membar"); ++ ++ if (UseBarriersForVolatile) { ++ // we need to plant a dbar ++ return false; ++ } ++ ++ MemBarNode* mb = barrier->as_MemBar(); ++ ++ if (mb->trailing_load_store()) { ++ Node* load_store = mb->in(MemBarNode::Precedent); ++ assert(load_store->is_LoadStore(), "unexpected graph shape"); ++ return is_CAS(load_store->Opcode()); ++ } ++ ++ return false; ++} ++ ++bool unnecessary_release(const Node *n) ++{ ++ assert((n->is_MemBar() && n->Opcode() == Op_MemBarRelease), "expecting a release membar"); ++ ++ if (UseBarriersForVolatile) { ++ // we need to plant a dbar ++ return false; ++ } ++ ++ MemBarNode *barrier = n->as_MemBar(); ++ ++ if (!barrier->leading()) { ++ return false; ++ } else { ++ Node* trailing = barrier->trailing_membar(); ++ MemBarNode* trailing_mb = trailing->as_MemBar(); ++ assert(trailing_mb->trailing(), "Not a trailing membar?"); ++ assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars"); ++ ++ Node* mem = trailing_mb->in(MemBarNode::Precedent); ++ if (mem->is_Store()) { ++ assert(mem->as_Store()->is_release(), ""); ++ assert(trailing_mb->Opcode() == Op_MemBarVolatile, ""); ++ return use_AMO(mem->Opcode()); ++ } else { ++ assert(mem->is_LoadStore(), ""); ++ assert(trailing_mb->Opcode() == Op_MemBarAcquire, ""); ++ return is_CAS(mem->Opcode()); ++ } ++ } ++ ++ return false; ++} ++ ++bool unnecessary_volatile(const Node *n) ++{ ++ // assert n->is_MemBar(); ++ if (UseBarriersForVolatile) { ++ // we need to plant a dbar ++ return false; ++ } ++ ++ MemBarNode *mbvol = n->as_MemBar(); ++ ++ bool release = false; ++ if (mbvol->trailing_store()) { ++ Node* mem = mbvol->in(MemBarNode::Precedent); ++ release = use_AMO(mem->Opcode()); ++ } ++ ++ assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), ""); ++#ifdef ASSERT ++ if (release) { ++ Node* leading = mbvol->leading_membar(); ++ assert(leading->Opcode() == Op_MemBarRelease, ""); ++ assert(leading->as_MemBar()->leading_store(), ""); ++ assert(leading->as_MemBar()->trailing_membar() == mbvol, ""); ++ } ++#endif ++ ++ return release; ++} ++ ++bool needs_releasing_store(const Node *n) ++{ ++ // assert n->is_Store(); ++ if (UseBarriersForVolatile) { ++ // we use a normal store and dbar combination ++ return false; ++ } ++ ++ StoreNode *st = n->as_Store(); ++ ++ return st->trailing_membar() != NULL; ++} ++ ++%} ++ ++//----------ENCODING BLOCK----------------------------------------------------- ++// This block specifies the encoding classes used by the compiler to output ++// byte streams. Encoding classes generate functions which are called by ++// Machine Instruction Nodes in order to generate the bit encoding of the ++// instruction. Operands specify their base encoding interface with the ++// interface keyword. There are currently supported four interfaces, ++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an ++// operand to generate a function which returns its register number when ++// queried. CONST_INTER causes an operand to generate a function which ++// returns the value of the constant when queried. MEMORY_INTER causes an ++// operand to generate four functions which return the Base Register, the ++// Index Register, the Scale Value, and the Offset Value of the operand when ++// queried. COND_INTER causes an operand to generate six functions which ++// return the encoding code (ie - encoding bits for the instruction) ++// associated with each basic boolean condition for a conditional instruction. ++// Instructions specify two basic values for encoding. They use the ++// ins_encode keyword to specify their encoding class (which must be one of ++// the class names specified in the encoding block), and they use the ++// opcode keyword to specify, in order, their primary, secondary, and ++// tertiary opcode. Only the opcode sections which a particular instruction ++// needs for encoding need to be specified. ++encode %{ ++ ++ enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf ++ C2_MacroAssembler _masm(&cbuf); ++ // This is the instruction starting address for relocation info. ++ __ block_comment("Java_To_Runtime"); ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_call((address)$meth$$method); ++ %} ++ ++ enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL ++ // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine ++ // who we intended to call. ++ C2_MacroAssembler _masm(&cbuf); ++ address addr = (address)$meth$$method; ++ address call; ++ __ block_comment("Java_Static_Call"); ++ ++ if ( !_method ) { ++ // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. ++ call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } else { ++ int method_index = resolved_method_index(cbuf); ++ RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) ++ : static_call_Relocation::spec(method_index); ++ call = __ trampoline_call(AddressLiteral(addr, rspec), &cbuf); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ // Emit stub for static call ++ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); ++ if (stub == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } ++ %} ++ ++ ++ // ++ // [Ref: LIR_Assembler::ic_call() ] ++ // ++ enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL ++ C2_MacroAssembler _masm(&cbuf); ++ __ block_comment("Java_Dynamic_Call"); ++ address call = __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ %} ++ ++ ++ enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{ ++ Register result = $result$$Register; ++ Register sub = $sub$$Register; ++ Register super = $super$$Register; ++ Register length = $tmp$$Register; ++ Register tmp = T4; ++ Label miss; ++ ++ // result may be the same as sub ++ // 47c B40: # B21 B41 <- B20 Freq: 0.155379 ++ // 47c partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0 ++ // 4bc mov S2, NULL #@loadConP ++ // 4c0 beq S1, S2, B21 #@branchConP P=0.999999 C=-1.000000 ++ // ++ C2_MacroAssembler _masm(&cbuf); ++ Label done; ++ __ check_klass_subtype_slow_path(sub, super, length, tmp, ++ NULL, &miss, ++ /*set_cond_codes:*/ true); ++ // Refer to X86_64's RDI ++ __ move(result, 0); ++ __ b(done); ++ ++ __ bind(miss); ++ __ li(result, 1); ++ __ bind(done); ++ %} ++ ++%} ++ ++ ++//---------LOONGARCH FRAME-------------------------------------------------------------- ++// Definition of frame structure and management information. ++// ++// S T A C K L A Y O U T Allocators stack-slot number ++// | (to get allocators register number ++// G Owned by | | v add SharedInfo::stack0) ++// r CALLER | | ++// o | +--------+ pad to even-align allocators stack-slot ++// w V | pad0 | numbers; owned by CALLER ++// t -----------+--------+----> Matcher::_in_arg_limit, unaligned ++// h ^ | in | 5 ++// | | args | 4 Holes in incoming args owned by SELF ++// | | old | | 3 ++// | | SP-+--------+----> Matcher::_old_SP, even aligned ++// v | | ret | 3 return address ++// Owned by +--------+ ++// Self | pad2 | 2 pad to align old SP ++// | +--------+ 1 ++// | | locks | 0 ++// | +--------+----> SharedInfo::stack0, even aligned ++// | | pad1 | 11 pad to align new SP ++// | +--------+ ++// | | | 10 ++// | | spills | 9 spills ++// V | | 8 (pad0 slot for callee) ++// -----------+--------+----> Matcher::_out_arg_limit, unaligned ++// ^ | out | 7 ++// | | args | 6 Holes in outgoing args owned by CALLEE ++// Owned by new | | ++// Callee SP-+--------+----> Matcher::_new_SP, even aligned ++// | | ++// ++// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is ++// known from SELF's arguments and the Java calling convention. ++// Region 6-7 is determined per call site. ++// Note 2: If the calling convention leaves holes in the incoming argument ++// area, those holes are owned by SELF. Holes in the outgoing area ++// are owned by the CALLEE. Holes should not be nessecary in the ++// incoming area, as the Java calling convention is completely under ++// the control of the AD file. Doubles can be sorted and packed to ++// avoid holes. Holes in the outgoing arguments may be nessecary for ++// varargs C calling conventions. ++// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is ++// even aligned with pad0 as needed. ++// Region 6 is even aligned. Region 6-7 is NOT even aligned; ++// region 6-11 is even aligned; it may be padded out more so that ++// the region from SP to FP meets the minimum stack alignment. ++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack ++// alignment. Region 11, pad1, may be dynamically extended so that ++// SP meets the minimum alignment. ++ ++ ++frame %{ ++ // These two registers define part of the calling convention ++ // between compiled code and the interpreter. ++ // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention ++ // for more information. ++ ++ inline_cache_reg(T1); // Inline Cache Register ++ ++ // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] ++ cisc_spilling_operand_name(indOffset32); ++ ++ // Number of stack slots consumed by locking an object ++ // generate Compile::sync_stack_slots ++ sync_stack_slots(2); ++ ++ frame_pointer(SP); ++ ++ // Interpreter stores its frame pointer in a register which is ++ // stored to the stack by I2CAdaptors. ++ // I2CAdaptors convert from interpreted java to compiled java. ++ ++ interpreter_frame_pointer(FP); ++ ++ // generate Matcher::stack_alignment ++ stack_alignment(StackAlignmentInBytes); //wordSize = sizeof(char*); ++ ++ // Number of outgoing stack slots killed above the out_preserve_stack_slots ++ // for calls to C. Supports the var-args backing area for register parms. ++ varargs_C_out_slots_killed(0); ++ ++ // The after-PROLOG location of the return address. Location of ++ // return address specifies a type (REG or STACK) and a number ++ // representing the register number (i.e. - use a register name) or ++ // stack slot. ++ // Ret Addr is on stack in slot 0 if no locks or verification or alignment. ++ // Otherwise, it is above the locks and verification slot and alignment word ++ //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong)); ++ return_addr(REG RA); ++ ++ // Location of C & interpreter return values ++ // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR. ++ // SEE Matcher::match. ++ c_return_value %{ ++ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); ++ /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ ++ static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; ++ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num }; ++ return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); ++ %} ++ ++ // Location of return values ++ // register(s) contain(s) return value for Op_StartC2I and Op_Start. ++ // SEE Matcher::match. ++ ++ return_value %{ ++ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); ++ /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ ++ static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; ++ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num}; ++ return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); ++ %} ++ ++%} ++ ++//----------ATTRIBUTES--------------------------------------------------------- ++//----------Operand Attributes------------------------------------------------- ++op_attrib op_cost(0); // Required cost attribute ++ ++//----------Instruction Attributes--------------------------------------------- ++ins_attrib ins_cost(100); // Required cost attribute ++ins_attrib ins_size(32); // Required size attribute (in bits) ++ins_attrib ins_pc_relative(0); // Required PC Relative flag ++ins_attrib ins_short_branch(0); // Required flag: is this instruction a ++ // non-matching short branch variant of some ++ // long branch? ++ins_attrib ins_alignment(4); // Required alignment attribute (must be a power of 2) ++ // specifies the alignment that some part of the instruction (not ++ // necessarily the start) requires. If > 1, a compute_padding() ++ // function must be provided for the instruction ++ ++//----------OPERANDS----------------------------------------------------------- ++// Operand definitions must precede instruction definitions for correct parsing ++// in the ADLC because operands constitute user defined types which are used in ++// instruction definitions. ++ ++// Vectors ++ ++operand vecX() %{ ++ constraint(ALLOC_IN_RC(vectorx_reg)); ++ match(VecX); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand vecY() %{ ++ constraint(ALLOC_IN_RC(vectory_reg)); ++ match(VecY); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Flags register, used as output of compare instructions ++operand FlagsReg() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegFlags); ++ ++ format %{ "T0" %} ++ interface(REG_INTER); ++%} ++ ++//----------Simple Operands---------------------------------------------------- ++// TODO: Should we need to define some more special immediate number ? ++// Immediate Operands ++// Integer Immediate ++operand immI() %{ ++ match(ConI); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU1() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 1)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU2() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 3)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU3() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 7)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU4() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 15)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU5() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 31)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU6() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 63)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU8() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 255)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI10() %{ ++ predicate((-512 <= n->get_int()) && (n->get_int() <= 511)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI12() %{ ++ predicate((-2048 <= n->get_int()) && (n->get_int() <= 2047)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M65536() %{ ++ predicate(n->get_int() == -65536); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for decrement ++operand immI_M1() %{ ++ predicate(n->get_int() == -1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for zero ++operand immI_0() %{ ++ predicate(n->get_int() == 0); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_1() %{ ++ predicate(n->get_int() == 1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_2() %{ ++ predicate(n->get_int() == 2); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_16() %{ ++ predicate(n->get_int() == 16); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_24() %{ ++ predicate(n->get_int() == 24); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for long shifts ++operand immI_32() %{ ++ predicate(n->get_int() == 32); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for byte-wide masking ++operand immI_255() %{ ++ predicate(n->get_int() == 255); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_65535() %{ ++ predicate(n->get_int() == 65535); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_MaxI() %{ ++ predicate(n->get_int() == 2147483647); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M2047_2048() %{ ++ predicate((-2047 <= n->get_int()) && (n->get_int() <= 2048)); ++ match(ConI); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Valid scale values for addressing modes ++operand immI_0_3() %{ ++ predicate(0 <= n->get_int() && (n->get_int() <= 3)); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_31() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 31); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_4095() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 4095); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_1_4() %{ ++ predicate(1 <= n->get_int() && (n->get_int() <= 4)); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_32_63() %{ ++ predicate(n->get_int() >= 32 && n->get_int() <= 63); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M128_255() %{ ++ predicate((-128 <= n->get_int()) && (n->get_int() <= 255)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Operand for non-negtive integer mask ++operand immI_nonneg_mask() %{ ++ predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1)); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate ++operand immL() %{ ++ match(ConL); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immLU5() %{ ++ predicate((0 <= n->get_long()) && (n->get_long() <= 31)); ++ match(ConL); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL10() %{ ++ predicate((-512 <= n->get_long()) && (n->get_long() <= 511)); ++ match(ConL); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL12() %{ ++ predicate((-2048 <= n->get_long()) && (n->get_long() <= 2047)); ++ match(ConL); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate 32-bit signed ++operand immL32() ++%{ ++ predicate(n->get_long() == (int)n->get_long()); ++ match(ConL); ++ ++ op_cost(15); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 3..6 zero ++operand immL_M121() %{ ++ predicate(n->get_long() == -121L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 0..2 zero ++operand immL_M8() %{ ++ predicate(n->get_long() == -8L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 1..2 zero ++operand immL_M7() %{ ++ predicate(n->get_long() == -7L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 2 zero ++operand immL_M5() %{ ++ predicate(n->get_long() == -5L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 0..1 zero ++operand immL_M4() %{ ++ predicate(n->get_long() == -4L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate zero ++operand immL_0() %{ ++ predicate(n->get_long() == 0L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_7() %{ ++ predicate(n->get_long() == 7L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_MaxUI() %{ ++ predicate(n->get_long() == 0xFFFFFFFFL); ++ match(ConL); ++ op_cost(20); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_M2047_2048() %{ ++ predicate((-2047 <= n->get_long()) && (n->get_long() <= 2048)); ++ match(ConL); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_0_4095() %{ ++ predicate(n->get_long() >= 0 && n->get_long() <= 4095); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Operand for non-negtive long mask ++operand immL_nonneg_mask() %{ ++ predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1)); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immP() %{ ++ match(ConP); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// NULL Pointer Immediate ++operand immP_0() %{ ++ predicate(n->get_ptr() == 0); ++ match(ConP); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immP_no_oop_cheap() %{ ++ predicate(!n->bottom_type()->isa_oop_ptr()); ++ match(ConP); ++ ++ op_cost(5); ++ // formats are generated automatically for constants and base registers ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immN() %{ ++ match(ConN); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// NULL Pointer Immediate ++operand immN_0() %{ ++ predicate(n->get_narrowcon() == 0); ++ match(ConN); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immNKlass() %{ ++ match(ConNKlass); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Single-precision floating-point immediate ++operand immF() %{ ++ match(ConF); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Single-precision floating-point zero ++operand immF_0() %{ ++ predicate(jint_cast(n->getf()) == 0); ++ match(ConF); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Double-precision floating-point immediate ++operand immD() %{ ++ match(ConD); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Double-precision floating-point zero ++operand immD_0() %{ ++ predicate(jlong_cast(n->getd()) == 0); ++ match(ConD); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Register Operands ++// Integer Register ++operand mRegI() %{ ++ constraint(ALLOC_IN_RC(int_reg)); ++ match(RegI); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_Ax_mRegI() %{ ++ constraint(ALLOC_IN_RC(no_Ax_int_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mS0RegI() %{ ++ constraint(ALLOC_IN_RC(s0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S0" %} ++ interface(REG_INTER); ++%} ++ ++operand mS1RegI() %{ ++ constraint(ALLOC_IN_RC(s1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S1" %} ++ interface(REG_INTER); ++%} ++ ++operand mS3RegI() %{ ++ constraint(ALLOC_IN_RC(s3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S3" %} ++ interface(REG_INTER); ++%} ++ ++operand mS4RegI() %{ ++ constraint(ALLOC_IN_RC(s4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S4" %} ++ interface(REG_INTER); ++%} ++ ++operand mS5RegI() %{ ++ constraint(ALLOC_IN_RC(s5_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S5" %} ++ interface(REG_INTER); ++%} ++ ++operand mS6RegI() %{ ++ constraint(ALLOC_IN_RC(s6_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S6" %} ++ interface(REG_INTER); ++%} ++ ++operand mS7RegI() %{ ++ constraint(ALLOC_IN_RC(s7_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S7" %} ++ interface(REG_INTER); ++%} ++ ++ ++operand mT0RegI() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T0" %} ++ interface(REG_INTER); ++%} ++ ++operand mT1RegI() %{ ++ constraint(ALLOC_IN_RC(t1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T1" %} ++ interface(REG_INTER); ++%} ++ ++operand mT2RegI() %{ ++ constraint(ALLOC_IN_RC(t2_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T2" %} ++ interface(REG_INTER); ++%} ++ ++operand mT3RegI() %{ ++ constraint(ALLOC_IN_RC(t3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T3" %} ++ interface(REG_INTER); ++%} ++ ++operand mT8RegI() %{ ++ constraint(ALLOC_IN_RC(t8_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T8" %} ++ interface(REG_INTER); ++%} ++ ++operand mT4RegI() %{ ++ constraint(ALLOC_IN_RC(t4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T4" %} ++ interface(REG_INTER); ++%} ++ ++operand mA0RegI() %{ ++ constraint(ALLOC_IN_RC(a0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A0" %} ++ interface(REG_INTER); ++%} ++ ++operand mA1RegI() %{ ++ constraint(ALLOC_IN_RC(a1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A1" %} ++ interface(REG_INTER); ++%} ++ ++operand mA2RegI() %{ ++ constraint(ALLOC_IN_RC(a2_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A2" %} ++ interface(REG_INTER); ++%} ++ ++operand mA3RegI() %{ ++ constraint(ALLOC_IN_RC(a3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A3" %} ++ interface(REG_INTER); ++%} ++ ++operand mA4RegI() %{ ++ constraint(ALLOC_IN_RC(a4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A4" %} ++ interface(REG_INTER); ++%} ++ ++operand mA5RegI() %{ ++ constraint(ALLOC_IN_RC(a5_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A5" %} ++ interface(REG_INTER); ++%} ++ ++operand mA6RegI() %{ ++ constraint(ALLOC_IN_RC(a6_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A6" %} ++ interface(REG_INTER); ++%} ++ ++operand mA7RegI() %{ ++ constraint(ALLOC_IN_RC(a7_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A7" %} ++ interface(REG_INTER); ++%} ++ ++operand mRegN() %{ ++ constraint(ALLOC_IN_RC(int_reg)); ++ match(RegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0_RegN() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1_RegN() %{ ++ constraint(ALLOC_IN_RC(t1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3_RegN() %{ ++ constraint(ALLOC_IN_RC(t3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8_RegN() %{ ++ constraint(ALLOC_IN_RC(t8_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0_RegN() %{ ++ constraint(ALLOC_IN_RC(a0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a1_RegN() %{ ++ constraint(ALLOC_IN_RC(a1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2_RegN() %{ ++ constraint(ALLOC_IN_RC(a2_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3_RegN() %{ ++ constraint(ALLOC_IN_RC(a3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4_RegN() %{ ++ constraint(ALLOC_IN_RC(a4_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a5_RegN() %{ ++ constraint(ALLOC_IN_RC(a5_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6_RegN() %{ ++ constraint(ALLOC_IN_RC(a6_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7_RegN() %{ ++ constraint(ALLOC_IN_RC(a7_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s0_RegN() %{ ++ constraint(ALLOC_IN_RC(s0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1_RegN() %{ ++ constraint(ALLOC_IN_RC(s1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s2_RegN() %{ ++ constraint(ALLOC_IN_RC(s2_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3_RegN() %{ ++ constraint(ALLOC_IN_RC(s3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4_RegN() %{ ++ constraint(ALLOC_IN_RC(s4_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s5_RegN() %{ ++ constraint(ALLOC_IN_RC(s5_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s6_RegN() %{ ++ constraint(ALLOC_IN_RC(s6_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7_RegN() %{ ++ constraint(ALLOC_IN_RC(s7_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Pointer Register ++operand mRegP() %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(RegP); ++ match(a0_RegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_T8_mRegP() %{ ++ constraint(ALLOC_IN_RC(no_T8_p_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_Ax_mRegP() %{ ++ constraint(ALLOC_IN_RC(no_Ax_p_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s4_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s5_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s5_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s6_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s6_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s7_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t2_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t2_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t8_long_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a2_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a4_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++ ++operand a5_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a5_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a6_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a7_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(v0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(v1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mRegL() %{ ++ constraint(ALLOC_IN_RC(long_reg)); ++ match(RegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mRegI2L(mRegI reg) %{ ++ match(ConvI2L reg); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mRegL2I(mRegL reg) %{ ++ match(ConvL2I reg); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0RegL() %{ ++ constraint(ALLOC_IN_RC(v0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1RegL() %{ ++ constraint(ALLOC_IN_RC(v1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0RegL() %{ ++ constraint(ALLOC_IN_RC(a0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ "A0" %} ++ interface(REG_INTER); ++%} ++ ++operand a1RegL() %{ ++ constraint(ALLOC_IN_RC(a1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2RegL() %{ ++ constraint(ALLOC_IN_RC(a2_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3RegL() %{ ++ constraint(ALLOC_IN_RC(a3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0RegL() %{ ++ constraint(ALLOC_IN_RC(t0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1RegL() %{ ++ constraint(ALLOC_IN_RC(t1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3RegL() %{ ++ constraint(ALLOC_IN_RC(t3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8RegL() %{ ++ constraint(ALLOC_IN_RC(t8_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4RegL() %{ ++ constraint(ALLOC_IN_RC(a4_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a5RegL() %{ ++ constraint(ALLOC_IN_RC(a5_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6RegL() %{ ++ constraint(ALLOC_IN_RC(a6_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7RegL() %{ ++ constraint(ALLOC_IN_RC(a7_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s0RegL() %{ ++ constraint(ALLOC_IN_RC(s0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1RegL() %{ ++ constraint(ALLOC_IN_RC(s1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3RegL() %{ ++ constraint(ALLOC_IN_RC(s3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4RegL() %{ ++ constraint(ALLOC_IN_RC(s4_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7RegL() %{ ++ constraint(ALLOC_IN_RC(s7_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Floating register operands ++operand regF() %{ ++ constraint(ALLOC_IN_RC(flt_reg)); ++ match(RegF); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//Double Precision Floating register operands ++operand regD() %{ ++ constraint(ALLOC_IN_RC(dbl_reg)); ++ match(RegD); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//----------Memory Operands---------------------------------------------------- ++// Indirect Memory Operand ++operand indirect(mRegP reg) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(reg); ++ ++ format %{ "[$reg] @ indirect" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); /* NO_INDEX */ ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Plus Short Offset Operand ++operand indOffset12(mRegP reg, immL12 off) ++%{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP reg off); ++ ++ op_cost(10); ++ format %{ "[$reg + $off (12-bit)] @ indOffset12" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); /* NO_INDEX */ ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indOffset12I2L(mRegP reg, immI12 off) ++%{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP reg (ConvI2L off)); ++ ++ op_cost(10); ++ format %{ "[$reg + $off (12-bit)] @ indOffset12I2L" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp($off); ++ %} ++%} ++// Indirect Memory Plus Index Register ++operand indIndex(mRegP addr, mRegL index) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP addr index); ++ ++ op_cost(20); ++ format %{"[$addr + $index] @ indIndex" %} ++ interface(MEMORY_INTER) %{ ++ base($addr); ++ index($index); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++operand indIndexI2L(mRegP reg, mRegI ireg) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg (ConvI2L ireg)); ++ op_cost(10); ++ format %{ "[$reg + $ireg] @ indIndexI2L" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($ireg); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Operand ++operand indirectNarrow(mRegN reg) ++%{ ++ predicate(CompressedOops::shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(DecodeN reg); ++ ++ format %{ "[$reg] @ indirectNarrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Plus Short Offset Operand ++operand indOffset12Narrow(mRegN reg, immL12 off) ++%{ ++ predicate(CompressedOops::shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(AddP (DecodeN reg) off); ++ ++ format %{ "[$reg + $off (12-bit)] @ indOffset12Narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++//----------Conditional Branch Operands---------------------------------------- ++// Comparison Op - This is the operation of the comparison, and is limited to ++// the following set of codes: ++// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) ++// ++// Other attributes of the comparison, such as unsignedness, are specified ++// by the comparison instruction that sets a condition code flags register. ++// That result is represented by a flags operand whose subtype is appropriate ++// to the unsignedness (etc.) of the comparison. ++// ++// Later, the instruction which matches both the Comparison Op (a Bool) and ++// the flags (produced by the Cmp) specifies the coding of the comparison op ++// by matching a specific subtype of Bool operand below, such as cmpOp. ++ ++// Comparision Code ++operand cmpOp() %{ ++ match(Bool); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x01); ++ not_equal(0x02); ++ greater(0x03); ++ greater_equal(0x04); ++ less(0x05); ++ less_equal(0x06); ++ overflow(0x7); ++ no_overflow(0x8); ++ %} ++%} ++ ++operand cmpOpEqNe() %{ ++ match(Bool); ++ predicate(n->as_Bool()->_test._test == BoolTest::ne ++ || n->as_Bool()->_test._test == BoolTest::eq); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x01); ++ not_equal(0x02); ++ greater(0x03); ++ greater_equal(0x04); ++ less(0x05); ++ less_equal(0x06); ++ overflow(0x7); ++ no_overflow(0x8); ++ %} ++%} ++ ++//----------Special Memory Operands-------------------------------------------- ++// Stack Slot Operand - This operand is used for loading and storing temporary ++// values on the stack where a match requires a value to ++// flow through memory. ++operand stackSlotP(sRegP reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotI(sRegI reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotF(sRegF reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotD(sRegD reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotL(sRegL reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++ ++//------------------------OPERAND CLASSES-------------------------------------- ++opclass memory( indirect, indOffset12, indOffset12I2L, indIndex, indIndexI2L, ++ indirectNarrow, indOffset12Narrow); ++opclass memory_loadRange(indOffset12, indirect); ++ ++opclass mRegLorI2L(mRegI2L, mRegL); ++opclass mRegIorL2I( mRegI, mRegL2I); ++ ++//----------PIPELINE----------------------------------------------------------- ++// Rules which define the behavior of the target architectures pipeline. ++ ++pipeline %{ ++ ++ //----------ATTRIBUTES--------------------------------------------------------- ++ attributes %{ ++ fixed_size_instructions; // Fixed size instructions ++ max_instructions_per_bundle = 1; // 1 instruction per bundle ++ max_bundles_per_cycle = 4; // Up to 4 bundles per cycle ++ bundle_unit_size=4; ++ instruction_unit_size = 4; // An instruction is 4 bytes long ++ instruction_fetch_unit_size = 16; // The processor fetches one line ++ instruction_fetch_units = 1; // of 16 bytes ++ ++ // List of nop instructions ++ nops( MachNop ); ++ %} ++ ++ //----------RESOURCES---------------------------------------------------------- ++ // Resources are the functional units available to the machine ++ ++ resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4, ALU1, ALU2, ALU = ALU1 | ALU2, FPU1, FPU2, FPU = FPU1 | FPU2, MEM, BR); ++ ++ //----------PIPELINE DESCRIPTION----------------------------------------------- ++ // Pipeline Description specifies the stages in the machine's pipeline ++ ++ // IF: fetch ++ // ID: decode ++ // RD: read ++ // CA: caculate ++ // WB: write back ++ // CM: commit ++ ++ pipe_desc(IF, ID, RD, CA, WB, CM); ++ ++ ++ //----------PIPELINE CLASSES--------------------------------------------------- ++ // Pipeline Classes describe the stages in which input and output are ++ // referenced by the hardware pipeline. ++ ++ //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{ ++ single_instruction; ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+1; ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.19 Integer mult operation : dst <-- reg1 mult reg2 ++ pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+5; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.19 Integer div operation : dst <-- reg1 div reg2 ++ pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.19 Integer mod operation : dst <-- reg1 mod reg2 ++ pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{ ++ instruction_count(2); ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{ ++ instruction_count(2); ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16 ++ pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{ ++ instruction_count(2); ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //no.16 load Long from memory : ++ pipe_class ialu_loadL(mRegL dst, memory mem) %{ ++ instruction_count(2); ++ mem : RD(read); ++ dst : WB(write)+5; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.17 Store Long to Memory : ++ pipe_class ialu_storeL(mRegL src, memory mem) %{ ++ instruction_count(2); ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16 ++ pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{ ++ single_instruction; ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.3 Integer move operation : dst <-- reg ++ pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.4 No instructions : do nothing ++ pipe_class empty( ) %{ ++ instruction_count(0); ++ %} ++ ++ //No.5 UnConditional branch : ++ pipe_class pipe_jump( label labl ) %{ ++ multiple_bundles; ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++ //No.6 ALU Conditional branch : ++ pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++ //no.7 load integer from memory : ++ pipe_class ialu_loadI(mRegI dst, memory mem) %{ ++ mem : RD(read); ++ dst : WB(write)+3; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.8 Store Integer to Memory : ++ pipe_class ialu_storeI(mRegI src, memory mem) %{ ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ ++ //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU : CA; ++ %} ++ ++ //No.22 Floating div operation : dst <-- reg1 div reg2 ++ pipe_class fpu_div(regF dst, regF src1, regF src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU2 : CA; ++ %} ++ ++ pipe_class fcvt_I2D(regD dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU1 : CA; ++ %} ++ ++ pipe_class fcvt_D2I(mRegI dst, regD src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU1 : CA; ++ %} ++ ++ pipe_class pipe_mfc1(mRegI dst, regD src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ pipe_class pipe_mtc1(regD dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ MEM : RD(5); ++ %} ++ ++ //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2 ++ pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU2 : CA; ++ %} ++ ++ //No.11 Load Floating from Memory : ++ pipe_class fpu_loadF(regF dst, memory mem) %{ ++ instruction_count(1); ++ mem : RD(read); ++ dst : WB(write)+3; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.12 Store Floating to Memory : ++ pipe_class fpu_storeF(regF src, memory mem) %{ ++ instruction_count(1); ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.13 FPU Conditional branch : ++ pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++//No.14 Floating FPU reg operation : dst <-- op reg ++ pipe_class fpu1_regF(regF dst, regF src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU : CA; ++ %} ++ ++ pipe_class long_memory_op() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(30); ++ %} ++ ++ pipe_class simple_call() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(200); ++ BR : RD; ++ %} ++ ++ pipe_class call() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(200); ++ %} ++ ++ //FIXME: ++ //No.9 Piple slow : for multi-instructions ++ pipe_class pipe_slow( ) %{ ++ instruction_count(20); ++ force_serialization; ++ multiple_bundles; ++ fixed_latency(50); ++ %} ++ ++%} ++ ++ ++ ++//----------INSTRUCTIONS------------------------------------------------------- ++// ++// match -- States which machine-independent subtree may be replaced ++// by this instruction. ++// ins_cost -- The estimated cost of this instruction is used by instruction ++// selection to identify a minimum cost tree of machine ++// instructions that matches a tree of machine-independent ++// instructions. ++// format -- A string providing the disassembly for this instruction. ++// The value of an instruction's operand may be inserted ++// by referring to it with a '$' prefix. ++// opcode -- Three instruction opcodes may be provided. These are referred ++// to within an encode class as $primary, $secondary, and $tertiary ++// respectively. The primary opcode is commonly used to ++// indicate the type of machine instruction, while secondary ++// and tertiary are often used for prefix options or addressing ++// modes. ++// ins_encode -- A list of encode classes with parameters. The encode class ++// name must have been defined in an 'enc_class' specification ++// in the encode section of the architecture description. ++ ++ ++// Load Integer ++instruct loadI(mRegI dst, memory mem) %{ ++ match(Set dst (LoadI mem)); ++ ++ ins_cost(125); ++ format %{ "ld_w $dst, $mem #@loadI" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadI_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadI mem))); ++ ++ ins_cost(125); ++ format %{ "ld_w $dst, $mem #@loadI_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Integer (32 bit signed) to Byte (8 bit signed) ++instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{ ++ match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); ++ ++ ins_cost(125); ++ format %{ "ld_b $dst, $mem\t# int -> byte #@loadI2B" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) ++instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI (LoadI mem) mask)); ++ ++ ins_cost(125); ++ format %{ "ld_bu $dst, $mem\t# int -> ubyte #@loadI2UB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Short (16 bit signed) ++instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{ ++ match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); ++ ++ ins_cost(125); ++ format %{ "ld_h $dst, $mem\t# int -> short #@loadI2S" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) ++instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{ ++ match(Set dst (AndI (LoadI mem) mask)); ++ ++ ins_cost(125); ++ format %{ "ld_hu $dst, $mem\t# int -> ushort/char #@loadI2US" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Long. ++instruct loadL(mRegL dst, memory mem) %{ ++// predicate(!((LoadLNode*)n)->require_atomic_access()); ++ match(Set dst (LoadL mem)); ++ ++ ins_cost(250); ++ format %{ "ld_d $dst, $mem #@loadL" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadL ); ++%} ++ ++// Load Long - UNaligned ++instruct loadL_unaligned(mRegL dst, memory mem) %{ ++ match(Set dst (LoadL_unaligned mem)); ++ ++ // FIXME: Need more effective ldl/ldr ++ ins_cost(450); ++ format %{ "ld_d $dst, $mem #@loadL_unaligned\n\t" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadL ); ++%} ++ ++// Store Long ++instruct storeL_reg(memory mem, mRegL src) %{ ++ match(Set mem (StoreL mem src)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(200); ++ format %{ "st_d $mem, $src #@storeL_reg\n" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++instruct storeL_reg_volatile(indirect mem, mRegL src) %{ ++ match(Set mem (StoreL mem src)); ++ ++ ins_cost(205); ++ format %{ "amswap_db_d R0, $src, $mem #@storeL_reg\n" %} ++ ins_encode %{ ++ __ amswap_db_d(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++instruct storeL_immL_0(memory mem, immL_0 zero) %{ ++ match(Set mem (StoreL mem zero)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(180); ++ format %{ "st_d zero, $mem #@storeL_immL_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++instruct storeL_immL_0_volatile(indirect mem, immL_0 zero) %{ ++ match(Set mem (StoreL mem zero)); ++ ++ ins_cost(185); ++ format %{ "amswap_db_d AT, R0, $mem #@storeL_immL_0" %} ++ ins_encode %{ ++ __ amswap_db_d(AT, R0, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++// Load Compressed Pointer ++instruct loadN(mRegN dst, memory mem) ++%{ ++ match(Set dst (LoadN mem)); ++ ++ ins_cost(125); // XXX ++ format %{ "ld_wu $dst, $mem\t# compressed ptr @ loadN" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++instruct loadN2P(mRegP dst, memory mem) ++%{ ++ match(Set dst (DecodeN (LoadN mem))); ++ predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "ld_wu $dst, $mem\t# @ loadN2P" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++// Load Pointer ++instruct loadP(mRegP dst, memory mem) %{ ++ match(Set dst (LoadP mem)); ++ predicate(n->as_Load()->barrier_data() == 0); ++ ++ ins_cost(125); ++ format %{ "ld_d $dst, $mem #@loadP" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ block_comment("loadP"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Klass Pointer ++instruct loadKlass(mRegP dst, memory mem) %{ ++ match(Set dst (LoadKlass mem)); ++ ++ ins_cost(125); ++ format %{ "MOV $dst,$mem @ loadKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load narrow Klass Pointer ++instruct loadNKlass(mRegN dst, memory mem) ++%{ ++ match(Set dst (LoadNKlass mem)); ++ ++ ins_cost(125); // XXX ++ format %{ "ld_wu $dst, $mem\t# compressed klass ptr @ loadNKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++instruct loadN2PKlass(mRegP dst, memory mem) ++%{ ++ match(Set dst (DecodeNKlass (LoadNKlass mem))); ++ predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "ld_wu $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++// Load Constant ++instruct loadConI(mRegI dst, immI src) %{ ++ match(Set dst src); ++ ++ ins_cost(120); ++ format %{ "mov $dst, $src #@loadConI" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ int value = $src$$constant; ++ __ li(dst, value); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct loadConL(mRegL dst, immL src) %{ ++ match(Set dst src); ++ ins_cost(120); ++ format %{ "li $dst, $src @ loadConL" %} ++ ins_encode %{ ++ __ li($dst$$Register, $src$$constant); ++ %} ++ ins_pipe(ialu_regL_regL); ++%} ++ ++// Load Range ++instruct loadRange(mRegI dst, memory_loadRange mem) %{ ++ match(Set dst (LoadRange mem)); ++ ++ ins_cost(125); ++ format %{ "MOV $dst,$mem @ loadRange" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct storeP(memory mem, mRegP src ) %{ ++ match(Set mem (StoreP mem src)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(125); ++ format %{ "st_d $src, $mem #@storeP" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP_volatile(indirect mem, mRegP src ) %{ ++ match(Set mem (StoreP mem src)); ++ ++ ins_cost(130); ++ format %{ "amswap_db_d R0, $src, $mem #@storeP" %} ++ ins_encode %{ ++ __ amswap_db_d(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store NULL Pointer, mark word, or other simple pointer constant. ++instruct storeImmP_immP_0(memory mem, immP_0 zero) %{ ++ match(Set mem (StoreP mem zero)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(125); ++ format %{ "mov $mem, $zero #@storeImmP_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeImmP_immP_0_volatile(indirect mem, immP_0 zero) %{ ++ match(Set mem (StoreP mem zero)); ++ ++ ins_cost(130); ++ format %{ "amswap_db_d AT, R0, $mem #@storeImmP_0" %} ++ ins_encode %{ ++ __ amswap_db_d(AT, R0, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Compressed Pointer ++instruct storeN(memory mem, mRegN src) ++%{ ++ match(Set mem (StoreN mem src)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(125); // XXX ++ format %{ "st_w $mem, $src\t# compressed ptr @ storeN" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeN_volatile(indirect mem, mRegN src) ++%{ ++ match(Set mem (StoreN mem src)); ++ ++ ins_cost(130); // XXX ++ format %{ "amswap_db_w R0, $src, $mem # compressed ptr @ storeN" %} ++ ins_encode %{ ++ __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2N(memory mem, mRegP src) ++%{ ++ match(Set mem (StoreN mem (EncodeP src))); ++ predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0 && !needs_releasing_store(n)); ++ ++ ins_cost(125); // XXX ++ format %{ "st_w $mem, $src\t# @ storeP2N" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2N_volatile(indirect mem, mRegP src) ++%{ ++ match(Set mem (StoreN mem (EncodeP src))); ++ predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0); ++ ++ ins_cost(130); // XXX ++ format %{ "amswap_db_w R0, $src, $mem # @ storeP2N" %} ++ ins_encode %{ ++ __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeNKlass(memory mem, mRegN src) ++%{ ++ match(Set mem (StoreNKlass mem src)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(125); // XXX ++ format %{ "st_w $mem, $src\t# compressed klass ptr @ storeNKlass" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeNKlass_volatile(indirect mem, mRegN src) ++%{ ++ match(Set mem (StoreNKlass mem src)); ++ ++ ins_cost(130); ++ format %{ "amswap_db_w R0, $src, $mem # compressed klass ptr @ storeNKlass" %} ++ ins_encode %{ ++ __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2NKlass(memory mem, mRegP src) ++%{ ++ match(Set mem (StoreNKlass mem (EncodePKlass src))); ++ predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0 && !needs_releasing_store(n)); ++ ++ ins_cost(125); // XXX ++ format %{ "st_w $mem, $src\t# @ storeP2NKlass" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2NKlass_volatile(indirect mem, mRegP src) ++%{ ++ match(Set mem (StoreNKlass mem (EncodePKlass src))); ++ predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0); ++ ++ ins_cost(130); ++ format %{ "amswap_db_w R0, $src, $mem # @ storeP2NKlass" %} ++ ins_encode %{ ++ __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeImmN_immN_0(memory mem, immN_0 zero) ++%{ ++ match(Set mem (StoreN mem zero)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(125); // XXX ++ format %{ "storeN0 zero, $mem\t# compressed ptr" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeImmN_immN_0_volatile(indirect mem, immN_0 zero) ++%{ ++ match(Set mem (StoreN mem zero)); ++ ++ ins_cost(130); // XXX ++ format %{ "amswap_db_w AT, R0, $mem # compressed ptr" %} ++ ins_encode %{ ++ __ amswap_db_w(AT, R0, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Byte ++instruct storeB_immB_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreB mem zero)); ++ ++ format %{ "mov $mem, zero #@storeB_immB_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeB(memory mem, mRegIorL2I src) %{ ++ match(Set mem (StoreB mem src)); ++ ++ ins_cost(125); ++ format %{ "st_b $src, $mem #@storeB" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Byte (8bit signed) ++instruct loadB(mRegI dst, memory mem) %{ ++ match(Set dst (LoadB mem)); ++ ++ ins_cost(125); ++ format %{ "ld_b $dst, $mem #@loadB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadB_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadB mem))); ++ ++ ins_cost(125); ++ format %{ "ld_b $dst, $mem #@loadB_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Byte (8bit UNsigned) ++instruct loadUB(mRegI dst, memory mem) %{ ++ match(Set dst (LoadUB mem)); ++ ++ ins_cost(125); ++ format %{ "ld_bu $dst, $mem #@loadUB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadUB_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadUB mem))); ++ ++ ins_cost(125); ++ format %{ "ld_bu $dst, $mem #@loadUB_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Short (16bit signed) ++instruct loadS(mRegI dst, memory mem) %{ ++ match(Set dst (LoadS mem)); ++ ++ ins_cost(125); ++ format %{ "ld_h $dst, $mem #@loadS" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Short (16 bit signed) to Byte (8 bit signed) ++instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{ ++ match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); ++ ++ ins_cost(125); ++ format %{ "ld_b $dst, $mem\t# short -> byte #@loadS2B" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct loadS_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadS mem))); ++ ++ ins_cost(125); ++ format %{ "ld_h $dst, $mem #@loadS_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Store Integer Immediate ++instruct storeI_immI_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreI mem zero)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(120); ++ format %{ "mov $mem, zero #@storeI_immI_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeI_immI_0_volatile(indirect mem, immI_0 zero) %{ ++ match(Set mem (StoreI mem zero)); ++ ++ ins_cost(125); ++ format %{ "amswap_db_w AT, R0, $mem #@storeI_immI_0" %} ++ ins_encode %{ ++ __ amswap_db_w(AT, R0, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Integer ++instruct storeI(memory mem, mRegIorL2I src) %{ ++ match(Set mem (StoreI mem src)); ++ predicate(!needs_releasing_store(n)); ++ ++ ins_cost(125); ++ format %{ "st_w $mem, $src #@storeI" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeI_volatile(indirect mem, mRegIorL2I src) %{ ++ match(Set mem (StoreI mem src)); ++ ++ ins_cost(130); ++ format %{ "amswap_db_w R0, $src, $mem #@storeI" %} ++ ins_encode %{ ++ __ amswap_db_w(R0, $src$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Float ++instruct loadF(regF dst, memory mem) %{ ++ match(Set dst (LoadF mem)); ++ ++ ins_cost(150); ++ format %{ "loadF $dst, $mem #@loadF" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_FLOAT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadConP_general(mRegP dst, immP src) %{ ++ match(Set dst src); ++ ++ ins_cost(120); ++ format %{ "li $dst, $src #@loadConP_general" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ long* value = (long*)$src$$constant; ++ ++ if ($src->constant_reloc() == relocInfo::metadata_type){ ++ __ mov_metadata(dst, (Metadata*)value); ++ } else if($src->constant_reloc() == relocInfo::oop_type){ ++ __ movoop(dst, (jobject)value, /*immediate*/true); ++ } else if ($src->constant_reloc() == relocInfo::none) { ++ __ li(dst, (long)value); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{ ++ match(Set dst src); ++ ++ ins_cost(80); ++ format %{ "li $dst, $src @ loadConP_no_oop_cheap" %} ++ ++ ins_encode %{ ++ if ($src->constant_reloc() == relocInfo::metadata_type) { ++ __ mov_metadata($dst$$Register, (Metadata*)$src$$constant); ++ } else { ++ __ li($dst$$Register, $src$$constant); ++ } ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct loadConP_immP_0(mRegP dst, immP_0 src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(50); ++ format %{ "mov $dst, R0\t# ptr" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ __ add_d(dst_reg, R0, R0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConN_immN_0(mRegN dst, immN_0 src) %{ ++ match(Set dst src); ++ format %{ "move $dst, R0\t# compressed NULL ptr" %} ++ ins_encode %{ ++ __ move($dst$$Register, R0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConN(mRegN dst, immN src) %{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "li $dst, $src\t# compressed ptr @ loadConN" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ __ set_narrow_oop(dst, (jobject)$src$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); // XXX ++%} ++ ++instruct loadConNKlass(mRegN dst, immNKlass src) %{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "li $dst, $src\t# compressed klass ptr @ loadConNKlass" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ __ set_narrow_klass(dst, (Klass*)$src$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); // XXX ++%} ++ ++//FIXME ++// Tail Call; Jump from runtime stub to Java code. ++// Also known as an 'interprocedural jump'. ++// Target of jump will eventually return to caller. ++// TailJump below removes the return address. ++instruct TailCalljmpInd(mRegP jump_target, mRegP method_ptr) %{ ++ match(TailCall jump_target method_ptr); ++ ins_cost(300); ++ format %{ "JMP $jump_target \t# @TailCalljmpInd" %} ++ ++ ins_encode %{ ++ Register target = $jump_target$$Register; ++ Register ptr = $method_ptr$$Register; ++ ++ // RA will be used in generate_forward_exception() ++ __ push(RA); ++ ++ __ move(S3, ptr); ++ __ jr(target); ++ %} ++ ++ ins_pipe( pipe_jump ); ++%} ++ ++// Create exception oop: created by stack-crawling runtime code. ++// Created exception is now available to this handler, and is setup ++// just prior to jumping to this handler. No code emitted. ++instruct CreateException( a0_RegP ex_oop ) ++%{ ++ match(Set ex_oop (CreateEx)); ++ ++ // use the following format syntax ++ format %{ "# exception oop is in A0; no code emitted @CreateException" %} ++ ins_encode %{ ++ // X86 leaves this function empty ++ __ block_comment("CreateException is empty in LA"); ++ %} ++ ins_pipe( empty ); ++// ins_pipe( pipe_jump ); ++%} ++ ++ ++/* The mechanism of exception handling is clear now. ++ ++- Common try/catch: ++ [stubGenerator_loongarch.cpp] generate_forward_exception() ++ |- V0, V1 are created ++ |- T4 <= SharedRuntime::exception_handler_for_return_address ++ `- jr T4 ++ `- the caller's exception_handler ++ `- jr OptoRuntime::exception_blob ++ `- here ++- Rethrow(e.g. 'unwind'): ++ * The callee: ++ |- an exception is triggered during execution ++ `- exits the callee method through RethrowException node ++ |- The callee pushes exception_oop(T0) and exception_pc(RA) ++ `- The callee jumps to OptoRuntime::rethrow_stub() ++ * In OptoRuntime::rethrow_stub: ++ |- The VM calls _rethrow_Java to determine the return address in the caller method ++ `- exits the stub with tailjmpInd ++ |- pops exception_oop(V0) and exception_pc(V1) ++ `- jumps to the return address(usually an exception_handler) ++ * The caller: ++ `- continues processing the exception_blob with V0/V1 ++*/ ++ ++// Rethrow exception: ++// The exception oop will come in the first argument position. ++// Then JUMP (not call) to the rethrow stub code. ++instruct RethrowException() ++%{ ++ match(Rethrow); ++ ++ // use the following format syntax ++ format %{ "JMP rethrow_stub #@RethrowException" %} ++ ins_encode %{ ++ __ block_comment("@ RethrowException"); ++ ++ cbuf.set_insts_mark(); ++ cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec()); ++ ++ // call OptoRuntime::rethrow_stub to get the exception handler in parent method ++ __ patchable_jump((address)OptoRuntime::rethrow_stub()); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++// ============================================================================ ++// Branch Instructions --- long offset versions ++ ++// Jump Direct ++instruct jmpDir_long(label labl) %{ ++ match(Goto); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "JMP $labl #@jmpDir_long" %} ++ ++ ins_encode %{ ++ Label* L = $labl$$label; ++ __ jmp_far(*L); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ //ins_pc_relative(1); ++%} ++ ++// Jump Direct Conditional - Label defines a relative address from Jcc+1 ++instruct jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_long" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, L, true /* signed */); ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++instruct jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cop$$cmpcode; ++ int val = $src2$$constant; ++ ++ if (val == 0) { ++ __ cmp_branch_long(flag, op1, R0, L, true /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_long(flag, op1, AT, L, true /* signed */); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++ ++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! ++instruct jmpCon_flags_long(cmpOpEqNe cop, FlagsReg cr, label labl) %{ ++ match(If cop cr); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $labl #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_long" %} ++ ++ ins_encode %{ ++ Label* L = $labl$$label; ++ switch($cop$$cmpcode) { ++ case 0x01: //equal ++ __ bne_long($cr$$Register, R0, *L); ++ break; ++ case 0x02: //not equal ++ __ beq_long($cr$$Register, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++// Conditional jumps ++instruct branchConP_0_long(cmpOpEqNe cmp, mRegP op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConP_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, R0, L, true /* signed */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConN2P_0_long(cmpOpEqNe cmp, mRegN op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConN2P_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, R0, L, true /* signed */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConP_long(cmpOp cmp, mRegP op1, mRegP op2, label labl) %{ ++ match(If cmp (CmpP op1 op2)); ++// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); ++ effect(USE labl); ++ ++ ins_cost(200); ++ format %{ "b$cmp $op1, $op2, $labl #@branchConP_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct cmpN_null_branch_long(cmpOpEqNe cmp, mRegN op1, immN_0 null, label labl) %{ ++ match(If cmp (CmpN op1 null)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,0\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_null_branch_long" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, R0, L, true /* signed */); ++ %} ++//TODO: pipe_branchP or create pipe_branchN LEE ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ ++ match(If cmp (CmpN op1 op2)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,$op2\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_reg_branch_long" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */); ++ %} ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConIU_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, L, false /* unsigned */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConIU_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ if (val == 0) { ++ __ cmp_branch_long(flag, op1, R0, L, false /* unsigned */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_long(flag, op1, AT, L, false /* unsigned */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, L, true /* signed */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(200); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ if (val == 0) { ++ __ cmp_branch_long(flag, op1, R0, L, true /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_long(flag, op1, AT, L, true /* signed */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_long" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, target, true /* signed */); ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_long" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_long(flag, op1, op2, target, false /* signed */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_long" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ long val = $src2$$constant; ++ ++ if (val == 0) { ++ __ cmp_branch_long(flag, op1, R0, target, true /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_long(flag, op1, AT, target, true /* signed */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_long" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ long val = $src2$$constant; ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ if (val == 0) { ++ __ cmp_branch_long(flag, op1, R0, target, false /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_long(flag, op1, AT, target, false /* signed */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++//FIXME ++instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{ ++ match( If cmp (CmpF src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_long" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x02: //not_equal ++ __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x03: //greater ++ __ fcmp_cule_s(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x04: //greater_equal ++ __ fcmp_cult_s(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x05: //less ++ __ fcmp_cult_s(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x06: //less_equal ++ __ fcmp_cule_s(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_slow); ++%} ++ ++instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{ ++ match( If cmp (CmpD src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_long" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x02: //not_equal ++ // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. ++ __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x03: //greater ++ __ fcmp_cule_d(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x04: //greater_equal ++ __ fcmp_cult_d(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x05: //less ++ __ fcmp_cult_d(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x06: //less_equal ++ __ fcmp_cule_d(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_slow); ++%} ++ ++ ++// ============================================================================ ++// Branch Instructions -- short offset versions ++ ++// Jump Direct ++instruct jmpDir_short(label labl) %{ ++ match(Goto); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "JMP $labl #@jmpDir_short" %} ++ ++ ins_encode %{ ++ Label &L = *($labl$$label); ++ if(&L) ++ __ b(L); ++ else ++ __ b(int(0)); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++// Jump Direct Conditional - Label defines a relative address from Jcc+1 ++instruct jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_short" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, L, true /* signed */); ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++instruct jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cop$$cmpcode; ++ int val = $src2$$constant; ++ ++ if (val == 0) { ++ __ cmp_branch_short(flag, op1, R0, L, true /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_short(flag, op1, AT, L, true /* signed */); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++ ++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! ++instruct jmpCon_flags_short(cmpOpEqNe cop, FlagsReg cr, label labl) %{ ++ match(If cop cr); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $labl #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_short" %} ++ ++ ins_encode %{ ++ Label &L = *($labl$$label); ++ switch($cop$$cmpcode) { ++ case 0x01: //equal ++ if (&L) ++ __ bnez($cr$$Register, L); ++ else ++ __ bnez($cr$$Register, (int)0); ++ break; ++ case 0x02: //not equal ++ if (&L) ++ __ beqz($cr$$Register, L); ++ else ++ __ beqz($cr$$Register, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++// Conditional jumps ++instruct branchConP_0_short(cmpOpEqNe cmp, mRegP op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConP_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branchEqNe_off21(flag, op1, L); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConN2P_0_short(cmpOpEqNe cmp, mRegN op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConN2P_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branchEqNe_off21(flag, op1, L); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConP_short(cmpOp cmp, mRegP op1, mRegP op2, label labl) %{ ++ match(If cmp (CmpP op1 op2)); ++// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); ++ effect(USE labl); ++ ++ ins_cost(200); ++ format %{ "b$cmp $op1, $op2, $labl #@branchConP_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ ++ match(If cmp (CmpN op1 null)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,0\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_null_branch_short" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branchEqNe_off21(flag, op1, L); ++ %} ++//TODO: pipe_branchP or create pipe_branchN LEE ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ ++ match(If cmp (CmpN op1 op2)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,$op2\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_reg_branch_short" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */); ++ %} ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConIU_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, L, false /* unsigned */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConIU_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ if (val == 0) { ++ __ cmp_branch_short(flag, op1, R0, L, false /* unsigned */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_short(flag, op1, AT, L, false /* unsigned */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, L, true /* signed */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(200); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ if (val == 0) { ++ __ cmp_branch_short(flag, op1, R0, L, true /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_short(flag, op1, AT, L, true /* signed */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_short" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, target, true /* signed */); ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_short" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ Label& target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ cmp_branch_short(flag, op1, op2, target, false /* signed */); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_short" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ long val = $src2$$constant; ++ ++ if (val == 0) { ++ __ cmp_branch_short(flag, op1, R0, target, true /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_short(flag, op1, AT, target, true /* signed */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_short" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register op1 = as_Register($src1$$reg); ++ long val = $src2$$constant; ++ Label& target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ if (val == 0) { ++ __ cmp_branch_short(flag, op1, R0, target, false /* signed */); ++ } else { ++ __ li(AT, val); ++ __ cmp_branch_short(flag, op1, AT, target, false /* signed */); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++//FIXME ++instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{ ++ match( If cmp (CmpF src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_short" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); ++ if (&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ case 0x02: //not_equal ++ __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); ++ if (&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x03: //greater ++ __ fcmp_cule_s(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x04: //greater_equal ++ __ fcmp_cult_s(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x05: //less ++ __ fcmp_cult_s(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ case 0x06: //less_equal ++ __ fcmp_cule_s(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_fpu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{ ++ match( If cmp (CmpD src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_short" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); ++ if (&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ case 0x02: //not_equal ++ // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. ++ __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); ++ if (&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x03: //greater ++ __ fcmp_cule_d(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x04: //greater_equal ++ __ fcmp_cult_d(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x05: //less ++ __ fcmp_cult_d(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ case 0x06: //less_equal ++ __ fcmp_cule_d(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_fpu_branch); ++ ins_short_branch(1); ++%} ++ ++// =================== End of branch instructions ========================== ++ ++// Call Runtime Instruction ++instruct CallRuntimeDirect(method meth) %{ ++ match(CallRuntime ); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL,runtime #@CallRuntimeDirect" %} ++ ins_encode( Java_To_Runtime( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_alignment(4); ++%} ++ ++ ++ ++//------------------------MemBar Instructions------------------------------- ++//Memory barrier flavors ++ ++instruct unnecessary_membar_acquire() %{ ++ predicate(unnecessary_acquire(n)); ++ match(MemBarAcquire); ++ ins_cost(0); ++ ++ format %{ "membar_acquire (elided)" %} ++ ++ ins_encode %{ ++ __ block_comment("membar_acquire (elided)"); ++ %} ++ ++ ins_pipe(empty); ++%} ++ ++instruct membar_acquire() %{ ++ match(MemBarAcquire); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-acquire @ membar_acquire" %} ++ ins_encode %{ ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ %} ++ ins_pipe(empty); ++%} ++ ++instruct load_fence() %{ ++ match(LoadFence); ++ ins_cost(400); ++ ++ format %{ "MEMBAR @ load_fence" %} ++ ins_encode %{ ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_acquire_lock() ++%{ ++ match(MemBarAcquireLock); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %} ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct unnecessary_membar_release() %{ ++ predicate(unnecessary_release(n)); ++ match(MemBarRelease); ++ ins_cost(0); ++ ++ format %{ "membar_release (elided)" %} ++ ++ ins_encode %{ ++ __ block_comment("membar_release (elided)"); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_release() %{ ++ match(MemBarRelease); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-release @ membar_release" %} ++ ++ ins_encode %{ ++ // Attention: DO NOT DELETE THIS GUY! ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct store_fence() %{ ++ match(StoreFence); ++ ins_cost(400); ++ ++ format %{ "MEMBAR @ store_fence" %} ++ ++ ins_encode %{ ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_release_lock() ++%{ ++ match(MemBarReleaseLock); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %} ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct unnecessary_membar_volatile() %{ ++ predicate(unnecessary_volatile(n)); ++ match(MemBarVolatile); ++ ins_cost(0); ++ ++ format %{ "membar_volatile (elided)" %} ++ ++ ins_encode %{ ++ __ block_comment("membar_volatile (elided)"); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_volatile() %{ ++ match(MemBarVolatile); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-volatile" %} ++ ins_encode %{ ++ if( !os::is_MP() ) return; // Not needed on single CPU ++ __ membar(__ StoreLoad); ++ ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_storestore() %{ ++ match(MemBarStoreStore); ++ match(StoreStoreFence); ++ ++ ins_cost(400); ++ format %{ "MEMBAR-storestore @ membar_storestore" %} ++ ins_encode %{ ++ __ membar(__ StoreStore); ++ %} ++ ins_pipe(empty); ++%} ++ ++//----------Move Instructions-------------------------------------------------- ++instruct castX2P(mRegP dst, mRegL src) %{ ++ match(Set dst (CastX2P src)); ++ format %{ "castX2P $dst, $src @ castX2P" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ if(src != dst) ++ __ move(dst, src); ++ %} ++ ins_cost(10); ++ ins_pipe( ialu_regI_mov ); ++%} ++ ++instruct castP2X(mRegL dst, mRegP src ) %{ ++ match(Set dst (CastP2X src)); ++ ++ format %{ "mov $dst, $src\t #@castP2X" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ if(src != dst) ++ __ move(dst, src); ++ %} ++ ins_pipe( ialu_regI_mov ); ++%} ++ ++instruct MoveF2I_reg_reg(mRegI dst, regF src) %{ ++ match(Set dst (MoveF2I src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveF2I $dst, $src @ MoveF2I_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ __ movfr2gr_s(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveI2F_reg_reg(regF dst, mRegI src) %{ ++ match(Set dst (MoveI2F src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveI2F $dst, $src @ MoveI2F_reg_reg" %} ++ ins_encode %{ ++ Register src = as_Register($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ movgr2fr_w(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveD2L_reg_reg(mRegL dst, regD src) %{ ++ match(Set dst (MoveD2L src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveD2L $dst, $src @ MoveD2L_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ __ movfr2gr_d(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveL2D_reg_reg(regD dst, mRegL src) %{ ++ match(Set dst (MoveL2D src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveL2D $dst, $src @ MoveL2D_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ movgr2fr_d(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++//----------Conditional Move--------------------------------------------------- ++// Conditional move ++instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src1, mRegI src2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpI src1 src2)) (Binary src1 src2))); ++ ins_cost(50); ++ format %{ ++ "CMP$cop $src1, $src2\t @cmovI_cmpI_reg_reg\n" ++ "\tCMOV $dst,$src1, $src2 \t @cmovI_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, true); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpI_reg_reg2(mRegI dst, mRegI src1, mRegI src2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpI src1 src2)) (Binary src2 src1))); ++ ins_cost(50); ++ format %{ ++ "CMP$cop $src1, $src2\t @cmovI_cmpI_reg_reg2\n" ++ "\tCMOV $dst,$src2, $src1 \t @cmovI_cmpI_reg_reg2" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, true); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpI_dst_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpI_dst_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpI_dst_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpL_reg_reg(mRegI dst, mRegIorL2I src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegIorL2I src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpI_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ Label L; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovN_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovN_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovN_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovN_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpI_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpI_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src1, mRegL src2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpL src1 src2)) (Binary src1 src2))); ++ ins_cost(50); ++ format %{ ++ "CMP$cop $src1, $src2\t @cmovL_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src1, $src2 \t @cmovL_cmpL_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, true); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src1, mRegL src2, cmpOp cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpUL src1 src2)) (Binary src1 src2))); ++ ins_cost(50); ++ format %{ ++ "CMP$cop $src1, $src2\t @cmovL_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src1, $src2 \t @cmovL_cmpUL_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, op1, op2, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpL_reg_reg2(mRegL dst, mRegL src1, mRegL src2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpL src1 src2)) (Binary src2 src1))); ++ ins_cost(50); ++ format %{ ++ "CMP$cop $src1, $src2\t @cmovL_cmpL_reg_reg2\n" ++ "\tCMOV $dst,$src2, $src1 \t @cmovL_cmpL_reg_reg2" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, true); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpUL_reg_reg2(mRegL dst, mRegL src1, mRegL src2, cmpOp cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpUL src1 src2)) (Binary src2 src1))); ++ ins_cost(50); ++ format %{ ++ "CMP$cop $src1, $src2\t @cmovL_cmpUL_reg_reg2\n" ++ "\tCMOV $dst,$src2, $src1 \t @cmovL_cmpUL_reg_reg2" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, op2, op1, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovL_cmpL_dst_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpL_dst_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpL_dst_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpUL_dst_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpUL_dst_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpUL_dst_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovD_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{ ++ match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovF_cmpI_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovF_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister tmp1 = as_FloatRegister($tmp3$$reg); ++ FloatRegister tmp2 = as_FloatRegister($tmp4$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{ ++ match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpI_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovD_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister tmp1 = as_FloatRegister($tmp3$$reg); ++ FloatRegister tmp2 = as_FloatRegister($tmp4$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop, regF tmp3, regF tmp4) %{ ++ match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpP_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovD_cmpP_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister tmp1 = as_FloatRegister($tmp3$$reg); ++ FloatRegister tmp2 = as_FloatRegister($tmp4$$reg); ++ int flag = $cop$$cmpcode; ++ ++ // Use signed comparison here, because the most significant bit of the ++ // user-space virtual address must be 0. ++ __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++//FIXME ++instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovF_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovF_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// Manifest a CmpL result in an integer register. Very painful. ++// This is the test to avoid. ++instruct cmpL3_reg_zero(mRegI dst, mRegL src1, immL_0 zero) %{ ++ match(Set dst (CmpL3 src1 zero)); ++ ins_cost(1000); ++ format %{ "cmpL3 $dst, $src1, zero @ cmpL3_reg_zero" %} ++ ins_encode %{ ++ Register opr1 = as_Register($src1$$reg); ++ Register dst = as_Register($dst$$reg); ++ __ slt(AT, opr1, R0); ++ __ slt(dst, R0, opr1); ++ __ sub_d(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (CmpL3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpL3 $dst, $src1, $src2 @ cmpL3_reg_reg" %} ++ ins_encode %{ ++ Register opr1 = as_Register($src1$$reg); ++ Register opr2 = as_Register($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ slt(AT, opr1, opr2); ++ __ slt(dst, opr2, opr1); ++ __ sub_d(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ++// less_rsult = -1 ++// greater_result = 1 ++// equal_result = 0 ++// nan_result = -1 ++// ++instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{ ++ match(Set dst (CmpF3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpF3 $dst, $src1, $src2 @ cmpF3_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ fcmp_clt_s(FCC0, src2, src1); ++ __ fcmp_cult_s(FCC1, src1, src2); ++ __ movcf2gr(dst, FCC0); ++ __ movcf2gr(AT, FCC1); ++ __ sub_d(dst, dst, AT); ++ ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{ ++ match(Set dst (CmpD3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpD3 $dst, $src1, $src2 @ cmpD3_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ fcmp_clt_d(FCC0, src2, src1); ++ __ fcmp_cult_d(FCC1, src1, src2); ++ __ movcf2gr(dst, FCC0); ++ __ movcf2gr(AT, FCC1); ++ __ sub_d(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct clear_array(t8RegL cnt, t3_RegP base, Universe dummy) %{ ++ match(Set dummy (ClearArray cnt base)); ++ effect(USE_KILL cnt, USE_KILL base); ++ format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %} ++ ins_encode %{ ++ //Assume cnt is the number of bytes in an array to be cleared, ++ //and base points to the starting address of the array. ++ Register base = $base$$Register; ++ Register cnt = $cnt$$Register; ++ Label Loop, done; ++ ++ __ beq(cnt, R0, done); ++ ++ __ bind(Loop); ++ __ st_d(R0, base, 0); ++ __ addi_d(cnt, cnt, -1); ++ __ addi_d(base, base, wordSize); ++ __ bne(cnt, R0, Loop); ++ ++ __ bind(done); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct clear_array_imm(immL cnt, t3_RegP base, Universe dummy) %{ ++ match(Set dummy (ClearArray cnt base)); ++ effect(USE_KILL base); ++ format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %} ++ ins_encode %{ ++ //Assume cnt is the number of bytes in an array to be cleared, ++ //and base points to the starting address of the array. ++ Register base = $base$$Register; ++ long cnt = $cnt$$constant; ++ Label Loop, done; ++ ++ int tmp = cnt % 8; ++ int i = 0; ++ for (; i < tmp; i++) { ++ __ st_d(R0, base, i * 8); ++ } ++ if (cnt - tmp) { ++ __ li(AT, cnt); ++ __ alsl_d(AT, AT, base, 2); ++ __ addi_d(base, base, i * 8); ++ __ bind(Loop); ++ __ st_d(R0, base, 0); ++ __ st_d(R0, base, 8); ++ __ st_d(R0, base, 16); ++ __ st_d(R0, base, 24); ++ __ st_d(R0, base, 32); ++ __ st_d(R0, base, 40); ++ __ st_d(R0, base, 48); ++ __ st_d(R0, base, 56); ++ __ addi_d(base, base, 64); ++ __ blt(base, AT, Loop); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result, t8RegL tmp1, t3RegL tmp2) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp1, KILL tmp2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1, tmp2:$tmp2 -> $result @ string_compareL" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::LL, $tmp1$$Register, $tmp2$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result, t8RegL tmp1, t3RegL tmp2) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp1, KILL tmp2); ++ ++ format %{ "String Compare char[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1, tmp2:$tmp2 -> $result @ string_compareU" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::UU, $tmp1$$Register, $tmp2$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareLU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result, t8RegL tmp1, t3RegL tmp2) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp1, KILL tmp2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1, tmp2:$tmp2 -> $result @ string_compareLU" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::LU, $tmp1$$Register, $tmp2$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result, t8RegL tmp1, t3RegL tmp2) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp1, KILL tmp2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1 tmp2:$tmp2 -> $result @ string_compareUL" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::UL, $tmp1$$Register, $tmp2$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_indexofUU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, ++ mT8RegI result) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %} ++ ins_encode %{ ++ __ string_indexof($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, ++ $result$$Register, StrIntrinsicNode::UU); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct string_indexofLL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, ++ mT8RegI result) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %} ++ ins_encode %{ ++ __ string_indexof($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, ++ $result$$Register, StrIntrinsicNode::LL); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct string_indexofUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, ++ mT8RegI result) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %} ++ ++ ins_encode %{ ++ __ string_indexof($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, ++ $result$$Register, StrIntrinsicNode::UL); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct string_indexof_conUU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, immI_1_4 int_cnt2, ++ mT8RegI result) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1); ++ ++ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %} ++ ++ ins_encode %{ ++ int icnt2 = (int)$int_cnt2$$constant; ++ __ string_indexof_linearscan($str1$$Register, $str2$$Register, ++ $cnt1$$Register, noreg, ++ icnt2, $result$$Register, StrIntrinsicNode::UU); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct string_indexof_conLL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, immI_1_4 int_cnt2, ++ mT8RegI result) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1); ++ ++ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %} ++ ins_encode %{ ++ int icnt2 = (int)$int_cnt2$$constant; ++ __ string_indexof_linearscan($str1$$Register, $str2$$Register, ++ $cnt1$$Register, noreg, ++ icnt2, $result$$Register, StrIntrinsicNode::LL); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct string_indexof_conUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, immI_1 int_cnt2, ++ mT8RegI result) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1); ++ ++ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %} ++ ins_encode %{ ++ int icnt2 = (int)$int_cnt2$$constant; ++ __ string_indexof_linearscan($str1$$Register, $str2$$Register, ++ $cnt1$$Register, noreg, ++ icnt2, $result$$Register, StrIntrinsicNode::UL); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct string_indexof_char(a4_RegP str1, mA5RegI cnt1, mA6RegI ch, no_Ax_mRegI result, mRegL tmp1, mRegL tmp2, mRegL tmp3) %{ ++ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U); ++ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); ++ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ "StringUTF16 IndexOf char[] $str1, len:$cnt1, char:$ch, res:$result, tmp1:$tmp1, tmp2:$tmp2, tmp3:$tmp3 -> $result @ string_indexof_char" %} ++ ++ ins_encode %{ ++ __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, ++ $result$$Register, $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct stringL_indexof_char(a4_RegP str1, mA5RegI cnt1, mA6RegI ch, no_Ax_mRegI result, mRegL tmp1, mRegL tmp2, mRegL tmp3) %{ ++ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L); ++ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); ++ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ "StringLatin1 IndexOf char[] $str1, len:$cnt1, char:$ch, res:$result, tmp1:$tmp1, tmp2:$tmp2, tmp3:$tmp3 -> $result @ stringL_indexof_char" %} ++ ++ ins_encode %{ ++ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, ++ $result$$Register, $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct has_negatives(a4_RegP ary1, mA5RegI len, no_Ax_mRegI result) %{ ++ match(Set result (HasNegatives ary1 len)); ++ effect(USE_KILL ary1, USE_KILL len); ++ format %{ "has negatives byte[] ary1:$ary1, len:$len -> $result @ has_negatives" %} ++ ++ ins_encode %{ ++ __ has_negatives($ary1$$Register, $len$$Register, $result$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// fast char[] to byte[] compression ++instruct string_compress(a4_RegP src, a5_RegP dst, mA6RegI len, no_Ax_mRegI result, ++ mRegL tmp1, mRegL tmp2, mRegL tmp3) ++%{ ++ match(Set result (StrCompressedCopy src (Binary dst len))); ++ effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ "String Compress $src,$dst -> $result @ string_compress " %} ++ ins_encode %{ ++ __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, ++ $result$$Register, $tmp1$$Register, ++ $tmp2$$Register, $tmp3$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// byte[] to char[] inflation ++instruct string_inflate(Universe dummy, a4_RegP src, a5_RegP dst, mA6RegI len, ++ mRegL tmp1, mRegL tmp2) ++%{ ++ match(Set dummy (StrInflatedCopy src (Binary dst len))); ++ effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP tmp1, TEMP tmp2); ++ ++ format %{ "String Inflate $src,$dst @ string_inflate " %} ++ ins_encode %{ ++ __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, ++ $tmp1$$Register, $tmp2$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// intrinsic optimization ++instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, no_Ax_mRegI result, t8RegL tmp1, t3RegL tmp2) %{ ++ match(Set result (StrEquals (Binary str1 str2) cnt)); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp1, KILL tmp2); ++ ++ format %{ "String Equal $str1, $str2, len:$cnt, tmp1:$tmp1, tmp2:$tmp2 -> $result @ string_equals" %} ++ ins_encode %{ ++ __ arrays_equals($str1$$Register, $str2$$Register, ++ $cnt$$Register, $tmp1$$Register, $tmp2$$Register, $result$$Register, ++ false/* byte */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// encode char[] to byte[] in ISO_8859_1 ++instruct encode_iso_array(a4_RegP src, a5_RegP dst, mA6RegI len, no_Ax_mRegI result, ++ mRegL tmp1, mRegL tmp2, mRegL tmp3) ++%{ ++ predicate(!((EncodeISOArrayNode*)n)->is_ascii()); ++ match(Set result (EncodeISOArray src (Binary dst len))); ++ effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ "Encode ISO array $src,$dst,$len -> $result @ encode_iso_array" %} ++ ins_encode %{ ++ __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, ++ $result$$Register, $tmp1$$Register, ++ $tmp2$$Register, $tmp3$$Register, false); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// encode char[] to byte[] in ASCII ++instruct encode_ascii_array(a4_RegP src, a5_RegP dst, mA6RegI len, no_Ax_mRegI result, ++ mRegL tmp1, mRegL tmp2, mRegL tmp3) ++%{ ++ predicate(((EncodeISOArrayNode*)n)->is_ascii()); ++ match(Set result (EncodeISOArray src (Binary dst len))); ++ effect(USE_KILL src, USE_KILL dst, USE_KILL len, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ "Encode ASCII array $src,$dst,$len -> $result @ encode_ascii_array" %} ++ ins_encode %{ ++ __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, ++ $result$$Register, $tmp1$$Register, ++ $tmp2$$Register, $tmp3$$Register, true); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++//----------Arithmetic Instructions------------------------------------------- ++//----------Addition Instructions--------------------------------------------- ++instruct addI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ format %{ "add $dst, $src1, $src2 #@addI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ add_w(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addI_Reg_imm(mRegI dst, mRegIorL2I src1, immI12 src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ format %{ "add $dst, $src1, $src2 #@addI_Reg_imm12" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ int imm = $src2$$constant; ++ ++ __ addi_w(dst, src1, imm); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addI_salI_Reg_Reg_immI_1_4(mRegI dst, mRegI src1, mRegI src2, immI_1_4 shift) %{ ++ match(Set dst (AddI src1 (LShiftI src2 shift))); ++ ++ format %{ "alsl $dst, $src1, $src2, $shift #@addI_salI_Reg_Reg_immI_1_4" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ int sh = $shift$$constant; ++ __ alsl_w(dst, src2, src1, sh - 1); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct addP_reg_reg(mRegP dst, mRegP src1, mRegLorI2L src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addP_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ add_d(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_reg_M8(mRegP dst, mRegP src1, mRegLorI2L src2, immL_M8 M8) %{ ++ match(Set dst (AddP src1 (AndL src2 M8))); ++ format %{ "dadd $dst, $src1, $src2 #@addP_reg_reg_M8" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ bstrins_d(src2, R0, 2, 0); ++ __ add_d(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_imm12(mRegP dst, mRegP src1, immL12 src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addP_reg_imm12" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ long src2 = $src2$$constant; ++ Register dst = $dst$$Register; ++ ++ __ addi_d(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++instruct addP_salL_Reg_RegI2L_immI_1_4(mRegP dst, mRegP src1, mRegI src2, immI_1_4 shift) %{ ++ match(Set dst (AddP src1 (LShiftL (ConvI2L src2) shift))); ++ ++ format %{ "alsl $dst, $src1, $src2, $shift #@addP_salL_Reg_RegI2L_immI_1_4" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ int sh = $shift$$constant; ++ __ alsl_d(dst, src2, src1, sh - 1); ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Add Long Register with Register ++instruct addL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (AddL src1 src2)); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_Reg\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ add_d(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_Reg_imm(mRegL dst, mRegLorI2L src1, immL12 src2) ++%{ ++ match(Set dst (AddL src1 src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_imm " %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ int src2_imm = $src2$$constant; ++ ++ __ addi_d(dst_reg, src1_reg, src2_imm); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++//----------Abs Instructions------------------------------------------- ++ ++// Integer Absolute Instructions ++instruct absI_rReg(mRegI dst, mRegI src) ++%{ ++ match(Set dst (AbsI src)); ++ effect(TEMP dst); ++ format %{ "AbsI $dst, $src" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ srai_w(AT, src, 31); ++ __ xorr(dst, src, AT); ++ __ sub_w(dst, dst, AT); ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Long Absolute Instructions ++instruct absL_rReg(mRegL dst, mRegLorI2L src) ++%{ ++ match(Set dst (AbsL src)); ++ effect(TEMP dst); ++ format %{ "AbsL $dst, $src" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ srai_d(AT, src, 63); ++ __ xorr(dst, src, AT); ++ __ sub_d(dst, dst, AT); ++ %} ++ ++ ins_pipe(ialu_regL_regL); ++%} ++ ++//----------Subtraction Instructions------------------------------------------- ++// Integer Subtraction Instructions ++instruct subI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ins_cost(100); ++ ++ format %{ "sub $dst, $src1, $src2 #@subI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ sub_w(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct subI_Reg_immI_M2047_2048(mRegI dst, mRegIorL2I src1, immI_M2047_2048 src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ins_cost(80); ++ ++ format %{ "sub $dst, $src1, $src2 #@subI_Reg_immI_M2047_2048" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ __ addi_w(dst, src1, -1 * $src2$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct negI_Reg(mRegI dst, immI_0 zero, mRegIorL2I src) %{ ++ match(Set dst (SubI zero src)); ++ ins_cost(80); ++ ++ format %{ "neg $dst, $src #@negI_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ __ sub_w(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct negL_Reg(mRegL dst, immL_0 zero, mRegLorI2L src) %{ ++ match(Set dst (SubL zero src)); ++ ins_cost(80); ++ ++ format %{ "neg $dst, $src #@negL_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ __ sub_d(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct subL_Reg_immL_M2047_2048(mRegL dst, mRegL src1, immL_M2047_2048 src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(80); ++ ++ format %{ "sub $dst, $src1, $src2 #@subL_Reg_immL_M2047_2048" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ __ addi_d(dst, src1, -1 * $src2$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Subtract Long Register with Register. ++instruct subL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(100); ++ format %{ "SubL $dst, $src1, $src2 @ subL_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ sub_d(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Integer MOD with Register ++instruct modI_Reg_Reg(mRegI dst, mRegIorL2I src1, mRegIorL2I src2) %{ ++ match(Set dst (ModI src1 src2)); ++ ins_cost(300); ++ format %{ "modi $dst, $src1, $src2 @ modI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ mod_w(dst, src1, src2); ++ %} ++ ++ //ins_pipe( ialu_mod ); ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct modL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (ModL src1 src2)); ++ format %{ "modL $dst, $src1, $src2 @modL_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ __ mod_d(dst, op1, op2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (MulI src1 src2)); ++ ++ ins_cost(300); ++ format %{ "mul $dst, $src1, $src2 @ mulI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ ++ __ mul_w(dst, src1, src2); ++ %} ++ ins_pipe( ialu_mult ); ++%} ++ ++instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (DivI src1 src2)); ++ ++ ins_cost(300); ++ format %{ "div $dst, $src1, $src2 @ divI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ ++ __ div_w(dst, src1, src2); ++ ++ %} ++ ins_pipe( ialu_mod ); ++%} ++ ++instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (DivF src1 src2)); ++ ++ ins_cost(300); ++ format %{ "divF $dst, $src1, $src2 @ divF_Reg_Reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ fdiv_s(dst, src1, src2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (DivD src1 src2)); ++ ++ ins_cost(300); ++ format %{ "divD $dst, $src1, $src2 @ divD_Reg_Reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ fdiv_d(dst, src1, src2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (MulL src1 src2)); ++ format %{ "mulL $dst, $src1, $src2 @mulL_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ __ mul_d(dst, op1, op2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulHiL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (MulHiL src1 src2)); ++ format %{ "mulHiL $dst, $src1, $src2 @mulL_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ __ mulh_d(dst, op1, op2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (DivL src1 src2)); ++ format %{ "divL $dst, $src1, $src2 @divL_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ __ div_d(dst, op1, op2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (AddF src1 src2)); ++ format %{ "AddF $dst, $src1, $src2 @addF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fadd_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (SubF src1 src2)); ++ format %{ "SubF $dst, $src1, $src2 @subF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fsub_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (AddD src1 src2)); ++ format %{ "AddD $dst, $src1, $src2 @addD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fadd_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (SubD src1 src2)); ++ format %{ "SubD $dst, $src1, $src2 @subD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fsub_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct negF_reg(regF dst, regF src) %{ ++ match(Set dst (NegF src)); ++ format %{ "negF $dst, $src @negF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fneg_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct negD_reg(regD dst, regD src) %{ ++ match(Set dst (NegD src)); ++ format %{ "negD $dst, $src @negD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fneg_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (MulF src1 src2)); ++ format %{ "MULF $dst, $src1, $src2 @mulF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ fmul_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// Mul two double precision floating piont number ++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (MulD src1 src2)); ++ format %{ "MULD $dst, $src1, $src2 @mulD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ fmul_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct absF_reg(regF dst, regF src) %{ ++ match(Set dst (AbsF src)); ++ ins_cost(100); ++ format %{ "absF $dst, $src @absF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fabs_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++// intrinsics for math_native. ++// AbsD SqrtD CosD SinD TanD LogD Log10D ++ ++instruct absD_reg(regD dst, regD src) %{ ++ match(Set dst (AbsD src)); ++ ins_cost(100); ++ format %{ "absD $dst, $src @absD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fabs_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct sqrtD_reg(regD dst, regD src) %{ ++ match(Set dst (SqrtD src)); ++ ins_cost(100); ++ format %{ "SqrtD $dst, $src @sqrtD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fsqrt_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct sqrtF_reg(regF dst, regF src) %{ ++ match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); ++ ins_cost(100); ++ format %{ "SqrtF $dst, $src @sqrtF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fsqrt_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// src1 * src2 + src3 ++instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary src1 src2))); ++ ++ format %{ "fmadd_s $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 + src3 ++instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary src1 src2))); ++ ++ format %{ "fmadd_d $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 - src3 ++instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary src1 src2))); ++ ++ format %{ "fmsub_s $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 - src3 ++instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary src1 src2))); ++ ++ format %{ "fmsub_d $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 - src3 ++instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2))); ++ match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2)))); ++ ++ format %{ "fnmadds $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fnmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 - src3 ++instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2))); ++ match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2)))); ++ ++ format %{ "fnmaddd $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fnmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 + src3 ++instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary (NegF src1) src2))); ++ match(Set dst (FmaF src3 (Binary src1 (NegF src2)))); ++ ++ format %{ "fnmsubs $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fnmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 + src3 ++instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary (NegD src1) src2))); ++ match(Set dst (FmaD src3 (Binary src1 (NegD src2)))); ++ ++ format %{ "fnmsubd $dst, $src1, $src2, $src3" %} ++ ++ ins_encode %{ ++ __ fnmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++instruct copySignF_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (CopySignF src1 src2)); ++ effect(TEMP_DEF dst, USE src1, USE src2); ++ ++ format %{ "fcopysign_s $dst $src1 $src2 @ copySignF_reg" %} ++ ++ ins_encode %{ ++ __ fcopysign_s($dst$$FloatRegister, ++ $src1$$FloatRegister, ++ $src2$$FloatRegister); ++ %} ++ ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct copySignD_reg(regD dst, regD src1, regD src2, immD_0 zero) %{ ++ match(Set dst (CopySignD src1 (Binary src2 zero))); ++ effect(TEMP_DEF dst, USE src1, USE src2); ++ ++ format %{ "fcopysign_d $dst $src1 $src2 @ copySignD_reg" %} ++ ++ ins_encode %{ ++ __ fcopysign_d($dst$$FloatRegister, ++ $src1$$FloatRegister, ++ $src2$$FloatRegister); ++ %} ++ ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++//----------------------------------Logical Instructions---------------------- ++//__________________________________Integer Logical Instructions------------- ++ ++//And Instuctions ++// And Register with Immediate ++instruct andI_Reg_imm_0_4095(mRegI dst, mRegI src1, immI_0_4095 src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1, immI_nonneg_mask mask) %{ ++ match(Set dst (AndI src1 mask)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int size = Assembler::is_int_mask($mask$$constant); ++ ++ __ bstrpick_w(dst, src, size-1, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1, immL_nonneg_mask mask) %{ ++ match(Set dst (AndL src1 mask)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int size = Assembler::is_jlong_mask($mask$$constant); ++ ++ __ bstrpick_d(dst, src, size-1, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorI_Reg_imm_0_4095(mRegI dst, mRegI src1, immI_0_4095 src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ins_cost(60); ++ ++ format %{ "xori $dst, $src1, $src2 #@xorI_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ xori(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorI_Reg_immI_M1(mRegI dst, mRegIorL2I src1, immI_M1 M1) %{ ++ match(Set dst (XorI src1 M1)); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorI_Reg_immI_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ orn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorL_Reg_imm_0_4095(mRegL dst, mRegL src1, immL_0_4095 src2) %{ ++ match(Set dst (XorL src1 src2)); ++ ins_cost(60); ++ ++ format %{ "xori $dst, $src1, $src2 #@xorL_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ xori(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct lbu_and_lmask(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI mask (LoadB mem))); ++ ins_cost(60); ++ ++ format %{ "lhu $dst, $mem #@lbu_and_lmask" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct lbu_and_rmask(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI (LoadB mem) mask)); ++ ins_cost(60); ++ ++ format %{ "lhu $dst, $mem #@lbu_and_rmask" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct andI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ andr(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andnI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (AndI src1 (XorI src2 M1))); ++ ++ format %{ "andn $dst, $src1, $src2 #@andnI_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ andn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct ornI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (OrI src1 (XorI src2 M1))); ++ ++ format %{ "orn $dst, $src1, $src2 #@ornI_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ orn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andnI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (AndI (XorI src1 M1) src2)); ++ ++ format %{ "andn $dst, $src2, $src1 #@andnI_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ andn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct ornI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (OrI (XorI src1 M1) src2)); ++ ++ format %{ "orn $dst, $src2, $src1 #@ornI_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ orn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// And Long Register with Register ++instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegLorI2L src2) %{ ++ match(Set dst (AndL src1 src2)); ++ format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ andr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct andL_Reg_imm_0_4095(mRegL dst, mRegL src1, immL_0_4095 src2) %{ ++ match(Set dst (AndL src1 src2)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andL_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ long val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL2I_Reg_imm_0_4095(mRegI dst, mRegL src1, immL_0_4095 src2) %{ ++ match(Set dst (ConvL2I (AndL src1 src2))); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andL2I_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ long val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct andL_Reg_immL_M8(mRegL dst, immL_M8 M8) %{ ++ match(Set dst (AndL dst M8)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M8 #@andL_Reg_immL_M8" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 2, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M5(mRegL dst, immL_M5 M5) %{ ++ match(Set dst (AndL dst M5)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M5 #@andL_Reg_immL_M5" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 2, 2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M7(mRegL dst, immL_M7 M7) %{ ++ match(Set dst (AndL dst M7)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M7 #@andL_Reg_immL_M7" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 2, 1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M4(mRegL dst, immL_M4 M4) %{ ++ match(Set dst (AndL dst M4)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M4 #@andL_Reg_immL_M4" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 1, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M121(mRegL dst, immL_M121 M121) %{ ++ match(Set dst (AndL dst M121)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M121 #@andL_Reg_immL_M121" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 6, 3); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Long Register with Register ++instruct orL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (OrL src1 src2)); ++ format %{ "OR $dst, $src1, $src2 @ orL_Reg_Reg\t" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ Register src1_reg = $src1$$Register; ++ Register src2_reg = $src2$$Register; ++ ++ __ orr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegLorI2L src2) %{ ++ match(Set dst (OrL (CastP2X src1) src2)); ++ format %{ "OR $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ Register src1_reg = $src1$$Register; ++ Register src2_reg = $src2$$Register; ++ ++ __ orr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Xor Long Register with Register ++instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (XorL src1 src2)); ++ format %{ "XOR $dst, $src1, $src2 @ xorL_Reg_Reg\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ xorr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Left by 5-bit immediate ++instruct salI_Reg_imm(mRegI dst, mRegIorL2I src, immIU5 shift) %{ ++ match(Set dst (LShiftI src shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ slli_w(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{ ++ match(Set dst (AndI (LShiftI src shift) mask)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_imm_and_M65536" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ slli_w(dst, src, 16); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen) ++%{ ++ match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen)); ++ ++ format %{ "andi $dst, $src, 7\t# @land7_2_s" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ andi(dst, src, 7); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. ++// This idiom is used by the compiler the i2s bytecode. ++instruct i2s(mRegI dst, mRegI src, immI_16 sixteen) ++%{ ++ match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); ++ ++ format %{ "i2s $dst, $src\t# @i2s" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ ext_w_h(dst, src); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. ++// This idiom is used by the compiler for the i2b bytecode. ++instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour) ++%{ ++ match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); ++ ++ format %{ "i2b $dst, $src\t# @i2b" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ ext_w_b(dst, src); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++ ++instruct salI_RegL2I_imm(mRegI dst, mRegL src, immIU5 shift) %{ ++ match(Set dst (LShiftI (ConvL2I src) shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_RegL2I_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ slli_w(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Shift Left by 8-bit immediate ++instruct salI_Reg_Reg(mRegI dst, mRegIorL2I src, mRegI shift) %{ ++ match(Set dst (LShiftI src shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shamt = $shift$$Register; ++ __ sll_w(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++// Shift Left Long 6-bit immI ++instruct salL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{ ++ match(Set dst (LShiftL src shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_Reg_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ slli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Left Long ++instruct salL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{ ++ match(Set dst (LShiftL src shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ sll_d(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long 6-bit ++instruct sarL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{ ++ match(Set dst (RShiftL src shift)); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL_Reg_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srai_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{ ++ match(Set dst (ConvL2I (RShiftL src shift))); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srai_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long arithmetically ++instruct sarL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{ ++ match(Set dst (RShiftL src shift)); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ sra_d(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long logically ++instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(100); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ srl_d(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegLorI2L src, immI_0_31 shift, immI_MaxI max_int) %{ ++ match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int)); ++ ins_cost(80); ++ format %{ "bstrpick_d $dst, $src, $shift+30, shift @ slrL_Reg_immI_0_31_and_max_int" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ bstrpick_d(dst_reg, src_reg, shamt+30, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{ ++ match(Set dst (URShiftL (CastP2X src) shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_convL2I(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{ ++ match(Set dst (ConvL2I (URShiftL src shift))); ++ predicate(n->in(1)->in(2)->get_int() > 32); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_convL2I" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{ ++ match(Set dst (URShiftL (CastP2X src) shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Xor Instructions ++// Xor Register with Register ++instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ++ format %{ "XOR $dst, $src1, $src2 #@xorI_Reg_Reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ xorr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Instructions ++instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_4095 src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_imm" %} ++ ins_encode %{ ++ __ ori($dst$$Register, $src1$$Register, $src2$$constant); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Register with Register ++instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ orr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{ ++ match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift))); ++ predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()))); ++ ++ format %{ "rotri_w $dst, $src, 1 ...\n\t" ++ "srli_w $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int rshift = $rshift$$constant; ++ ++ __ rotri_w(dst, src, 1); ++ if (rshift - 1) { ++ __ srli_w(dst, dst, rshift - 1); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{ ++ match(Set dst (OrI src1 (CastP2X src2))); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_castP2X" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ orr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Logical Shift Right by 5-bit immediate ++instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{ ++ match(Set dst (URShiftI src shift)); ++ //effect(KILL cr); ++ ++ format %{ "SRLI_W $dst, $src, $shift #@shr_logical_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shift = $shift$$constant; ++ ++ __ srli_w(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{ ++ match(Set dst (AndI (URShiftI src shift) mask)); ++ ++ format %{ "bstrpick_w $dst, $src, $shift+one-bits($mask)-1, shift #@shr_logical_Reg_imm_nonneg_mask" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int pos = $shift$$constant; ++ int size = Assembler::is_int_mask($mask$$constant); ++ ++ __ bstrpick_w(dst, src, pos+size-1, pos); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 lshift, immI_0_31 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); ++ match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_w $dst, $src, $rshift #@rolI_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_w(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_32_63 lshift, immI_0_31 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_d $dst, $src, $rshift #@rolL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_d(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_0_31 lshift, immI_32_63 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_d $dst, $src, $rshift #@rolL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_d(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); ++ match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_w $dst, $src, $rshift #@rorI_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_w(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 rshift, immI_32_63 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_d $dst, $src, $rshift #@rorL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_d(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 rshift, immI_0_31 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_d $dst, $src, $rshift #@rorL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_d(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Rotate Shift Left ++instruct rolI_reg(mRegI dst, mRegI src, mRegI shift) ++%{ ++ match(Set dst (RotateLeft src shift)); ++ ++ format %{ "rotl_w $dst, $src, $shift @ rolI_reg" %} ++ ++ ins_encode %{ ++ __ sub_w(AT, R0, $shift$$Register); ++ __ rotr_w($dst$$Register, $src$$Register, AT); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolL_reg(mRegL dst, mRegL src, mRegI shift) ++%{ ++ match(Set dst (RotateLeft src shift)); ++ ++ format %{ "rotl_d $dst, $src, $shift @ rolL_reg" %} ++ ++ ins_encode %{ ++ __ sub_d(AT, R0, $shift$$Register); ++ __ rotr_d($dst$$Register, $src$$Register, AT); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Rotate Shift Right ++instruct rorI_imm(mRegI dst, mRegI src, immI shift) ++%{ ++ match(Set dst (RotateRight src shift)); ++ ++ format %{ "rotri_w $dst, $src, $shift @ rorI_imm" %} ++ ++ ins_encode %{ ++ __ rotri_w($dst$$Register, $src$$Register, $shift$$constant/* & 0x1f*/); ++ %} ++ ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++instruct rorI_reg(mRegI dst, mRegI src, mRegI shift) ++%{ ++ match(Set dst (RotateRight src shift)); ++ ++ format %{ "rotr_w $dst, $src, $shift @ rorI_reg" %} ++ ++ ins_encode %{ ++ __ rotr_w($dst$$Register, $src$$Register, $shift$$Register); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorL_imm(mRegL dst, mRegL src, immI shift) ++%{ ++ match(Set dst (RotateRight src shift)); ++ ++ format %{ "rotri_d $dst, $src, $shift @ rorL_imm" %} ++ ++ ins_encode %{ ++ __ rotri_d($dst$$Register, $src$$Register, $shift$$constant/* & 0x3f*/); ++ %} ++ ++ ins_pipe( ialu_regL_imm16 ); ++%} ++ ++instruct rorL_reg(mRegL dst, mRegL src, mRegI shift) ++%{ ++ match(Set dst (RotateRight src shift)); ++ ++ format %{ "rotr_d $dst, $src, $shift @ rorL_reg" %} ++ ++ ins_encode %{ ++ __ rotr_d($dst$$Register, $src$$Register, $shift$$Register); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Logical Shift Right ++instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (URShiftI src shift)); ++ ++ format %{ "SRL_W $dst, $src, $shift #@shr_logical_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shift = $shift$$Register; ++ __ srl_w(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{ ++ match(Set dst (RShiftI src shift)); ++ // effect(KILL cr); ++ ++ format %{ "SRAI_W $dst, $src, $shift #@shr_arith_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shift = $shift$$constant; ++ __ srai_w(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (RShiftI src shift)); ++ // effect(KILL cr); ++ ++ format %{ "SRA_W $dst, $src, $shift #@shr_arith_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shift = $shift$$Register; ++ __ sra_w(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++//----------Convert Int to Boolean--------------------------------------------- ++ ++instruct convI2B(mRegI dst, mRegI src) %{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(100); ++ format %{ "convI2B $dst, $src @ convI2B" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if (dst != src) { ++ __ addi_d(dst, R0, 1); ++ __ maskeqz(dst, dst, src); ++ } else { ++ __ move(AT, src); ++ __ addi_d(dst, R0, 1); ++ __ maskeqz(dst, dst, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct convI2L_reg( mRegL dst, mRegI src) %{ ++ match(Set dst (ConvI2L src)); ++ ++ ins_cost(100); ++ format %{ "SLLI_W $dst, $src @ convI2L_reg\t" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if(dst != src) __ slli_w(dst, src, 0); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct convL2I_reg( mRegI dst, mRegLorI2L src ) %{ ++ match(Set dst (ConvL2I src)); ++ ++ format %{ "MOV $dst, $src @ convL2I_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ slli_w(dst, src, 0); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct convL2D_reg( regD dst, mRegL src ) %{ ++ match(Set dst (ConvL2D src)); ++ format %{ "convL2D $dst, $src @ convL2D_reg" %} ++ ins_encode %{ ++ Register src = as_Register($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ movgr2fr_d(dst, src); ++ __ ffint_d_l(dst, dst); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++// Convert double to int. ++// If the double is NaN, stuff a zero in instead. ++instruct convD2I_reg_reg(mRegI dst, regD src, regD tmp) %{ ++ match(Set dst (ConvD2I src)); ++ effect(USE src, TEMP tmp); ++ ++ format %{ "convd2i $dst, $src, using $tmp as TEMP @ convD2I_reg_reg" %} ++ ++ ins_encode %{ ++ __ ftintrz_w_d($tmp$$FloatRegister, $src$$FloatRegister); ++ __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convD2L_reg_reg(mRegL dst, regD src, regD tmp) %{ ++ match(Set dst (ConvD2L src)); ++ effect(USE src, TEMP tmp); ++ ++ format %{ "convd2l $dst, $src, using $tmp as TEMP @ convD2L_reg_reg" %} ++ ++ ins_encode %{ ++ __ ftintrz_l_d($tmp$$FloatRegister, $src$$FloatRegister); ++ __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// Convert float to int. ++// If the float is NaN, stuff a zero in instead. ++instruct convF2I_reg_reg(mRegI dst, regF src, regF tmp) %{ ++ match(Set dst (ConvF2I src)); ++ effect(USE src, TEMP tmp); ++ ++ format %{ "convf2i $dst, $src, using $tmp as TEMP @ convF2I_reg_reg" %} ++ ++ ins_encode %{ ++ __ ftintrz_w_s($tmp$$FloatRegister, $src$$FloatRegister); ++ __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convF2L_reg_reg(mRegL dst, regF src, regF tmp) %{ ++ match(Set dst (ConvF2L src)); ++ effect(USE src, TEMP tmp); ++ ++ format %{ "convf2l $dst, $src, using $tmp as TEMP @ convF2L_reg_reg" %} ++ ++ ins_encode %{ ++ __ ftintrz_l_s($tmp$$FloatRegister, $src$$FloatRegister); ++ __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convL2F_reg( regF dst, mRegL src ) %{ ++ match(Set dst (ConvL2F src)); ++ format %{ "convl2f $dst, $src @ convL2F_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ Register src = as_Register($src$$reg); ++ Label L; ++ ++ __ movgr2fr_d(dst, src); ++ __ ffint_s_l(dst, dst); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convI2F_reg( regF dst, mRegI src ) %{ ++ match(Set dst (ConvI2F src)); ++ format %{ "convi2f $dst, $src @ convI2F_reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ movgr2fr_w(dst, src); ++ __ ffint_s_w(dst, dst); ++ %} ++ ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct roundD(regD dst, regD src, immI rmode) %{ ++ predicate(UseLSX); ++ match(Set dst (RoundDoubleMode src rmode)); ++ format %{ "frint $dst, $src, $rmode\t# @roundD" %} ++ ins_encode %{ ++ switch ($rmode$$constant) { ++ case RoundDoubleModeNode::rmode_rint: __ vfrintrne_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ case RoundDoubleModeNode::rmode_floor: __ vfrintrm_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ case RoundDoubleModeNode::rmode_ceil: __ vfrintrp_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{ ++ match(Set dst (CmpLTMask p zero)); ++ ins_cost(100); ++ ++ format %{ "srai_w $dst, $p, 31 @ cmpLTMask_immI_0" %} ++ ins_encode %{ ++ Register src = $p$$Register; ++ Register dst = $dst$$Register; ++ ++ __ srai_w(dst, src, 31); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{ ++ match(Set dst (CmpLTMask p q)); ++ ins_cost(400); ++ ++ format %{ "cmpLTMask $dst, $p, $q @ cmpLTMask" %} ++ ins_encode %{ ++ Register p = $p$$Register; ++ Register q = $q$$Register; ++ Register dst = $dst$$Register; ++ ++ __ slt(dst, p, q); ++ __ sub_d(dst, R0, dst); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convP2B(mRegI dst, mRegP src) %{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(100); ++ format %{ "convP2B $dst, $src @ convP2B" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if (dst != src) { ++ __ addi_d(dst, R0, 1); ++ __ maskeqz(dst, dst, src); ++ } else { ++ __ move(AT, src); ++ __ addi_d(dst, R0, 1); ++ __ maskeqz(dst, dst, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++ ++instruct convI2D_reg_reg(regD dst, mRegI src) %{ ++ match(Set dst (ConvI2D src)); ++ format %{ "conI2D $dst, $src @convI2D_reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ FloatRegister dst = $dst$$FloatRegister; ++ __ movgr2fr_w(dst ,src); ++ __ ffint_d_w(dst, dst); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct convF2D_reg_reg(regD dst, regF src) %{ ++ match(Set dst (ConvF2D src)); ++ format %{ "convF2D $dst, $src\t# @convF2D_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ ++ __ fcvt_d_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct convD2F_reg_reg(regF dst, regD src) %{ ++ match(Set dst (ConvD2F src)); ++ format %{ "convD2F $dst, $src\t# @convD2F_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ ++ __ fcvt_s_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++// Convert oop pointer into compressed form ++instruct encodeHeapOop(mRegN dst, mRegP src) %{ ++ predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); ++ match(Set dst (EncodeP src)); ++ format %{ "encode_heap_oop $dst,$src" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ encode_heap_oop(dst, src); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{ ++ predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull); ++ match(Set dst (EncodeP src)); ++ format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %} ++ ins_encode %{ ++ __ encode_heap_oop_not_null($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeHeapOop(mRegP dst, mRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && ++ n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ ++ __ decode_heap_oop(d, s); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || ++ n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ if (s != d) { ++ __ decode_heap_oop_not_null(d, s); ++ } else { ++ __ decode_heap_oop_not_null(d); ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct encodeKlass_not_null(mRegN dst, mRegP src) %{ ++ match(Set dst (EncodePKlass src)); ++ format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %} ++ ins_encode %{ ++ __ encode_klass_not_null($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeKlass_not_null(mRegP dst, mRegN src) %{ ++ match(Set dst (DecodeNKlass src)); ++ format %{ "decode_heap_klass_not_null $dst,$src" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ if (s != d) { ++ __ decode_klass_not_null(d, s); ++ } else { ++ __ decode_klass_not_null(d); ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++//FIXME ++instruct tlsLoadP(mRegP dst) %{ ++ match(Set dst (ThreadLocal)); ++ ++ ins_cost(0); ++ format %{ " get_thread in $dst #@tlsLoadP" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++#ifdef OPT_THREAD ++ __ move(dst, TREG); ++#else ++ __ get_thread(dst); ++#endif ++ %} ++ ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct checkCastPP( mRegP dst ) %{ ++ match(Set dst (CheckCastPP dst)); ++ ++ format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %} ++ ins_encode( /*empty encoding*/ ); ++ ins_pipe( empty ); ++%} ++ ++instruct castPP(mRegP dst) ++%{ ++ match(Set dst (CastPP dst)); ++ ++ size(0); ++ format %{ "# castPP of $dst" %} ++ ins_encode(/* empty encoding */); ++ ins_pipe(empty); ++%} ++ ++instruct castII( mRegI dst ) %{ ++ match(Set dst (CastII dst)); ++ format %{ "#castII of $dst empty encoding" %} ++ ins_encode( /*empty encoding*/ ); ++ ins_cost(0); ++ ins_pipe( empty ); ++%} ++ ++instruct castLL(mRegL dst) ++%{ ++ match(Set dst (CastLL dst)); ++ ++ size(0); ++ format %{ "# castLL of $dst" %} ++ ins_encode(/* empty encoding */); ++ ins_cost(0); ++ ins_pipe(empty); ++%} ++ ++instruct castFF(regF dst) %{ ++ match(Set dst (CastFF dst)); ++ size(0); ++ format %{ "# castFF of $dst" %} ++ ins_encode(/*empty*/); ++ ins_pipe(empty); ++%} ++ ++instruct castDD(regD dst) %{ ++ match(Set dst (CastDD dst)); ++ size(0); ++ format %{ "# castDD of $dst" %} ++ ins_encode(/*empty*/); ++ ins_pipe(empty); ++%} ++ ++instruct castVVX(vecX dst) %{ ++ match(Set dst (CastVV dst)); ++ size(0); ++ format %{ "# castVV of $dst" %} ++ ins_encode(/*empty*/); ++ ins_pipe(empty); ++%} ++ ++instruct castVVY(vecY dst) %{ ++ match(Set dst (CastVV dst)); ++ size(0); ++ format %{ "# castVV of $dst" %} ++ ins_encode(/*empty*/); ++ ins_pipe(empty); ++%} ++ ++// Return Instruction ++// Remove the return address & jump to it. ++instruct Ret() %{ ++ match(Return); ++ format %{ "RET #@Ret" %} ++ ++ ins_encode %{ ++ __ jr(RA); ++ %} ++ ++ ins_pipe( pipe_jump ); ++%} ++ ++ ++ ++// Tail Jump; remove the return address; jump to target. ++// TailCall above leaves the return address around. ++// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2). ++// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a ++// "restore" before this instruction (in Epilogue), we need to materialize it ++// in %i0. ++//FIXME ++instruct tailjmpInd(no_Ax_mRegP jump_target, mRegP ex_oop) %{ ++ match( TailJump jump_target ex_oop ); ++ ins_cost(200); ++ format %{ "Jmp $jump_target ; ex_oop = $ex_oop #@tailjmpInd" %} ++ ins_encode %{ ++ Register target = $jump_target$$Register; ++ ++ // V0, V1 are indicated in: ++ // [stubGenerator_loongarch.cpp] generate_forward_exception() ++ // [runtime_loongarch.cpp] OptoRuntime::generate_exception_blob() ++ // ++ Register oop = $ex_oop$$Register; ++ Register exception_oop = V0; ++ Register exception_pc = V1; ++ ++ __ move(exception_pc, RA); ++ __ move(exception_oop, oop); ++ ++ __ jr(target); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++// ============================================================================ ++// Procedure Call/Return Instructions ++// Call Java Static Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallStaticJavaDirect(method meth) %{ ++ match(CallStaticJava); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL,static #@CallStaticJavaDirect " %} ++ ins_encode( Java_Static_Call( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(4); ++%} ++ ++// Call Java Dynamic Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallDynamicJavaDirect(method meth) %{ ++ match(CallDynamicJava); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t" ++ "CallDynamic @ CallDynamicJavaDirect" %} ++ ins_encode( Java_Dynamic_Call( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(4); ++%} ++ ++instruct CallLeafNoFPDirect(method meth) %{ ++ match(CallLeafNoFP); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL_LEAF_NOFP,runtime " %} ++ ins_encode(Java_To_Runtime(meth)); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(4); ++%} ++ ++// Prefetch instructions for allocation. ++ ++instruct prefetchAlloc(memory mem) %{ ++ match(PrefetchAllocation mem); ++ ins_cost(125); ++ format %{ "preld $mem\t# Prefetch allocation @ prefetchAlloc" %} ++ ins_encode %{ ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if (index != -1) { ++ if (scale == 0) { ++ __ add_d(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ alsl_d(AT, as_Register(index), as_Register(base), scale - 1); ++ } ++ ++ if (Assembler::is_simm(disp, 12)) { ++ __ preld(8, AT, disp); ++ } else { ++ __ li(T4, disp); ++ __ add_d(AT, AT, T4); ++ __ preld(8, AT, 0); ++ } ++ } else { ++ if (Assembler::is_simm(disp, 12)) { ++ __ preld(8, as_Register(base), disp); ++ } else { ++ __ li(T4, disp); ++ __ add_d(AT, as_Register(base), T4); ++ __ preld(8, AT, 0); ++ } ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// Call runtime without safepoint ++instruct CallLeafDirect(method meth) %{ ++ match(CallLeaf); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL_LEAF,runtime #@CallLeafDirect " %} ++ ins_encode(Java_To_Runtime(meth)); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(4); ++%} ++ ++// Load Char (16bit unsigned) ++instruct loadUS(mRegI dst, memory mem) %{ ++ match(Set dst (LoadUS mem)); ++ ++ ins_cost(125); ++ format %{ "loadUS $dst,$mem @ loadC" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadUS_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadUS mem))); ++ ++ ins_cost(125); ++ format %{ "loadUS $dst,$mem @ loadUS_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Store Char (16bit unsigned) ++instruct storeC(memory mem, mRegIorL2I src) %{ ++ match(Set mem (StoreC mem src)); ++ ++ ins_cost(125); ++ format %{ "storeC $src, $mem @ storeC" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_CHAR); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct storeC_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreC mem zero)); ++ ++ ins_cost(125); ++ format %{ "storeC $zero, $mem @ storeC_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct loadConF_immF_0(regF dst, immF_0 zero) %{ ++ match(Set dst zero); ++ ins_cost(100); ++ ++ format %{ "mov $dst, zero @ loadConF_immF_0\n"%} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ movgr2fr_w(dst, R0); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++ ++instruct loadConF(regF dst, immF src) %{ ++ match(Set dst src); ++ ins_cost(125); ++ ++ format %{ "fld_s $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %} ++ ins_encode %{ ++ int con_offset = $constantoffset($src); ++ ++ if (Assembler::is_simm(con_offset, 12)) { ++ __ fld_s($dst$$FloatRegister, $constanttablebase, con_offset); ++ } else { ++ __ li(AT, con_offset); ++ __ fldx_s($dst$$FloatRegister, $constanttablebase, AT); ++ } ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++ ++instruct loadConD_immD_0(regD dst, immD_0 zero) %{ ++ match(Set dst zero); ++ ins_cost(100); ++ ++ format %{ "mov $dst, zero @ loadConD_immD_0"%} ++ ins_encode %{ ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ movgr2fr_d(dst, R0); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++instruct loadConD(regD dst, immD src) %{ ++ match(Set dst src); ++ ins_cost(125); ++ ++ format %{ "fld_d $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %} ++ ins_encode %{ ++ int con_offset = $constantoffset($src); ++ ++ if (Assembler::is_simm(con_offset, 12)) { ++ __ fld_d($dst$$FloatRegister, $constanttablebase, con_offset); ++ } else { ++ __ li(AT, con_offset); ++ __ fldx_d($dst$$FloatRegister, $constanttablebase, AT); ++ } ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++// Store register Float value (it is faster than store from FPU register) ++instruct storeF_reg( memory mem, regF src) %{ ++ match(Set mem (StoreF mem src)); ++ ++ ins_cost(50); ++ format %{ "store $mem, $src\t# store float @ storeF_reg" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_FLOAT); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct storeF_immF_0( memory mem, immF_0 zero) %{ ++ match(Set mem (StoreF mem zero)); ++ ++ ins_cost(40); ++ format %{ "store $mem, zero\t# store float @ storeF_immF_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Double ++instruct loadD(regD dst, memory mem) %{ ++ match(Set dst (LoadD mem)); ++ ++ ins_cost(150); ++ format %{ "loadD $dst, $mem #@loadD" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Double - UNaligned ++instruct loadD_unaligned(regD dst, memory mem ) %{ ++ match(Set dst (LoadD_unaligned mem)); ++ ins_cost(250); ++ // FIXME: Need more effective ldl/ldr ++ format %{ "loadD_unaligned $dst, $mem #@loadD_unaligned" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct storeD_reg( memory mem, regD src) %{ ++ match(Set mem (StoreD mem src)); ++ ++ ins_cost(50); ++ format %{ "store $mem, $src\t# store float @ storeD_reg" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_DOUBLE); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct storeD_immD_0( memory mem, immD_0 zero) %{ ++ match(Set mem (StoreD mem zero)); ++ ++ ins_cost(40); ++ format %{ "store $mem, zero\t# store float @ storeD_immD_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct loadSSI(mRegI dst, stackSlotI src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld_w $dst, $src\t# int stk @ loadSSI" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSI) !"); ++ __ ld_w($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSI(stackSlotI dst, mRegI src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "st_w $dst, $src\t# int stk @ storeSSI" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSI) !"); ++ __ st_w($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSL(mRegL dst, stackSlotL src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld_d $dst, $src\t# long stk @ loadSSL" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSL) !"); ++ __ ld_d($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSL(stackSlotL dst, mRegL src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "st_d $dst, $src\t# long stk @ storeSSL" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSL) !"); ++ __ st_d($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSP(mRegP dst, stackSlotP src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld_d $dst, $src\t# ptr stk @ loadSSP" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSP) !"); ++ __ ld_d($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSP(stackSlotP dst, mRegP src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "st_d $dst, $src\t# ptr stk @ storeSSP" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSP) !"); ++ __ st_d($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSF(regF dst, stackSlotF src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "fld_s $dst, $src\t# float stk @ loadSSF" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSF) !"); ++ __ fld_s($dst$$FloatRegister, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSF(stackSlotF dst, regF src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "fst_s $dst, $src\t# float stk @ storeSSF" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSF) !"); ++ __ fst_s($src$$FloatRegister, SP, $dst$$disp); ++ %} ++ ins_pipe(fpu_storeF); ++%} ++ ++// Use the same format since predicate() can not be used here. ++instruct loadSSD(regD dst, stackSlotD src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "fld_d $dst, $src\t# double stk @ loadSSD" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSD) !"); ++ __ fld_d($dst$$FloatRegister, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSD(stackSlotD dst, regD src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "fst_d $dst, $src\t# double stk @ storeSSD" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSD) !"); ++ __ fst_d($src$$FloatRegister, SP, $dst$$disp); ++ %} ++ ins_pipe(fpu_storeF); ++%} ++ ++instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ ++ match(Set cr (FastLock object box)); ++ effect(TEMP tmp, TEMP scr); ++ ins_cost(300); ++ format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %} ++ ins_encode %{ ++ __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++%} ++ ++instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ ++ match(Set cr (FastUnlock object box)); ++ effect(TEMP tmp, TEMP scr); ++ ins_cost(300); ++ format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %} ++ ins_encode %{ ++ __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++%} ++ ++// Store CMS card-mark Immediate 0 ++instruct storeImmCM(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreCM mem zero)); ++ ++ ins_cost(150); ++ format %{ "StoreCM MEMBAR loadstore\n\t" ++ "st_b $mem, zero\t! CMS card-mark imm0" %} ++ ins_encode %{ ++ __ membar(__ StoreStore); ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Die now ++instruct ShouldNotReachHere( ) ++%{ ++ match(Halt); ++ ins_cost(300); ++ ++ // Use the following format syntax ++ format %{ "stop; #@ShouldNotReachHere" %} ++ ins_encode %{ ++ if (is_reachable()) { ++ __ stop(_halt_reason); ++ } ++ %} ++ ++ ins_pipe( pipe_jump ); ++%} ++ ++instruct leaP12Narrow(mRegP dst, indOffset12Narrow mem) ++%{ ++ predicate(CompressedOops::shift() == 0); ++ match(Set dst mem); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, $mem\t# ptr off12narrow @ leaP12Narrow" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = as_Register($mem$$base); ++ int disp = $mem$$disp; ++ ++ __ addi_d(dst, base, disp); ++ %} ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++instruct leaPIdxScale(mRegP dst, mRegP reg, mRegLorI2L lreg, immI_0_3 scale) ++%{ ++ match(Set dst (AddP reg (LShiftL lreg scale))); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, [$reg + $lreg << $scale]\t# @ leaPIdxScale" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = $reg$$Register; ++ Register index = $lreg$$Register; ++ int scale = $scale$$constant; ++ ++ if (scale == 0) { ++ __ add_d($dst$$Register, $reg$$Register, index); ++ } else { ++ __ alsl_d(dst, index, base, scale - 1); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++ ++// ============================================================================ ++// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass ++// array for an instance of the superklass. Set a hidden internal cache on a ++// hit (cache is checked with exposed code in gen_subtype_check()). Return ++// NZ for a miss or zero for a hit. The encoding ALSO sets flags. ++instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{ ++ match(Set result (PartialSubtypeCheck sub super)); ++ effect(KILL tmp); ++ ins_cost(1100); // slightly larger than the next version ++ format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %} ++ ++ ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) ); ++ ins_pipe( pipe_slow ); ++%} ++ ++// Conditional-store of the updated heap-top. ++// Used during allocation of the shared heap. ++ ++instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{ ++ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); ++ ++ format %{ "move AT, $newval\n\t" ++ "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t" ++ "move $cr, AT\n" %} ++ ins_encode%{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp); ++ ++ int index = $heap_top_ptr$$index; ++ int scale = $heap_top_ptr$$scale; ++ int disp = $heap_top_ptr$$disp; ++ ++ guarantee(Assembler::is_simm(disp, 12), ""); ++ ++ if (index != -1) { ++ __ stop("in storePConditional: index != -1"); ++ } else { ++ __ move(AT, newval); ++ __ sc_d(AT, addr); ++ __ move($cr$$Register, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++// Conditional-store of an int value. ++// AT flag is set on success, reset otherwise. ++instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{ ++ match(Set cr (StoreIConditional mem (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, $mem, $oldval \t# @storeIConditional" %} ++ ++ ins_encode %{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Register cr = $cr$$Register; ++ Address addr(as_Register($mem$$base), $mem$$disp); ++ ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ guarantee(Assembler::is_simm(disp, 12), ""); ++ ++ if (index != -1) { ++ __ stop("in storeIConditional: index != -1"); ++ } else { ++ if (cr != addr.base() && cr != oldval && cr != newval) { ++ __ cmpxchg32(addr, oldval, newval, cr, true, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, true, false, true); ++ __ move(cr, AT); ++ } ++ } ++ %} ++ ++ ins_pipe(long_memory_op); ++%} ++ ++// Conditional-store of a long value. ++// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG. ++instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr) ++%{ ++ match(Set cr (StoreLConditional mem (Binary oldval newval))); ++ ++ format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %} ++ ins_encode%{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Register cr = $cr$$Register; ++ Address addr(as_Register($mem$$base), $mem$$disp); ++ ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ guarantee(Assembler::is_simm(disp, 12), ""); ++ ++ if (index != -1) { ++ __ stop("in storeIConditional: index != -1"); ++ } else { ++ if (cr != addr.base() && cr != oldval && cr != newval) { ++ __ cmpxchg(addr, oldval, newval, cr, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(cr, AT); ++ } ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++// Implement LoadPLocked. Must be ordered against changes of the memory location ++// by storePConditional. ++instruct loadPLocked(mRegP dst, memory mem) %{ ++ match(Set dst (LoadPLocked mem)); ++ ins_cost(MEMORY_REF_COST); ++ ++ format %{ "ll_d $dst, $mem #@loadPLocked\n\t" %} ++ size(12); ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LINKED_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{ ++ match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); ++ ins_cost(3 * MEMORY_REF_COST); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, true, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, true, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{ ++ predicate(VM_Version::supports_cx8()); ++ match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); ++ ins_cost(3 * MEMORY_REF_COST); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{ ++ match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); ++ predicate(n->as_LoadStore()->barrier_data() == 0); ++ ins_cost(3 * MEMORY_REF_COST); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{ ++ match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); ++ ins_cost(3 * MEMORY_REF_COST); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, false, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, false, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct get_and_setI(indirect mem, mRegI newv, mRegI prev) %{ ++ match(Set prev (GetAndSetI mem newv)); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ "amswap_db_w $prev, $newv, [$mem]" %} ++ ins_encode %{ ++ Register prev = $prev$$Register; ++ Register newv = $newv$$Register; ++ Register addr = as_Register($mem$$base); ++ if (prev == newv || prev == addr) { ++ __ amswap_db_w(AT, newv, addr); ++ __ move(prev, AT); ++ } else { ++ __ amswap_db_w(prev, newv, addr); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_setL(indirect mem, mRegL newv, mRegL prev) %{ ++ match(Set prev (GetAndSetL mem newv)); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ "amswap_db_d $prev, $newv, [$mem]" %} ++ ins_encode %{ ++ Register prev = $prev$$Register; ++ Register newv = $newv$$Register; ++ Register addr = as_Register($mem$$base); ++ if (prev == newv || prev == addr) { ++ __ amswap_db_d(AT, newv, addr); ++ __ move(prev, AT); ++ } else { ++ __ amswap_db_d(prev, newv, addr); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_setN(indirect mem, mRegN newv, mRegN prev) %{ ++ match(Set prev (GetAndSetN mem newv)); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ "amswap_db_w $prev, $newv, [$mem]" %} ++ ins_encode %{ ++ Register prev = $prev$$Register; ++ Register newv = $newv$$Register; ++ Register addr = as_Register($mem$$base); ++ __ amswap_db_w(AT, newv, addr); ++ __ bstrpick_d(prev, AT, 31, 0); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_setP(indirect mem, mRegP newv, mRegP prev) %{ ++ match(Set prev (GetAndSetP mem newv)); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ "amswap_db_d $prev, $newv, [$mem]" %} ++ ins_encode %{ ++ Register prev = $prev$$Register; ++ Register newv = $newv$$Register; ++ Register addr = as_Register($mem$$base); ++ if (prev == newv || prev == addr) { ++ __ amswap_db_d(AT, newv, addr); ++ __ move(prev, AT); ++ } else { ++ __ amswap_db_d(prev, newv, addr); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_addL(indirect mem, mRegL newval, mRegL incr) %{ ++ match(Set newval (GetAndAddL mem incr)); ++ ins_cost(2 * MEMORY_REF_COST + 1); ++ format %{ "amadd_db_d $newval, [$mem], $incr" %} ++ ins_encode %{ ++ Register newv = $newval$$Register; ++ Register incr = $incr$$Register; ++ Register addr = as_Register($mem$$base); ++ if (newv == incr || newv == addr) { ++ __ amadd_db_d(AT, incr, addr); ++ __ move(newv, AT); ++ } else { ++ __ amadd_db_d(newv, incr, addr); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_addL_no_res(indirect mem, Universe dummy, mRegL incr) %{ ++ predicate(n->as_LoadStore()->result_not_used()); ++ match(Set dummy (GetAndAddL mem incr)); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ "amadd_db_d [$mem], $incr" %} ++ ins_encode %{ ++ __ amadd_db_d(R0, $incr$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_addI(indirect mem, mRegI newval, mRegIorL2I incr) %{ ++ match(Set newval (GetAndAddI mem incr)); ++ ins_cost(2 * MEMORY_REF_COST + 1); ++ format %{ "amadd_db_w $newval, [$mem], $incr" %} ++ ins_encode %{ ++ Register newv = $newval$$Register; ++ Register incr = $incr$$Register; ++ Register addr = as_Register($mem$$base); ++ if (newv == incr || newv == addr) { ++ __ amadd_db_w(AT, incr, addr); ++ __ move(newv, AT); ++ } else { ++ __ amadd_db_w(newv, incr, addr); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_addI_no_res(indirect mem, Universe dummy, mRegIorL2I incr) %{ ++ predicate(n->as_LoadStore()->result_not_used()); ++ match(Set dummy (GetAndAddI mem incr)); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ "amadd_db_w [$mem], $incr" %} ++ ins_encode %{ ++ __ amadd_db_w(R0, $incr$$Register, as_Register($mem$$base)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeI(mRegI res, indirect mem, mRegI oldval, mRegI newval) %{ ++ ++ match(Set res (CompareAndExchangeI mem (Binary oldval newval))); ++ ins_cost(2 * MEMORY_REF_COST); ++ effect(TEMP_DEF res); ++ format %{ ++ "cmpxchg32 $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @compareAndExchangeI" ++ %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr(as_Register($mem$$base)); ++ __ cmpxchg32(addr, oldval, newval, res, true /* sign */, false /* retold */, true /* barrier */, false /* weak */, true /* exchange */); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeL(mRegL res, indirect mem, mRegL oldval, mRegL newval) %{ ++ ++ match(Set res (CompareAndExchangeL mem (Binary oldval newval))); ++ ins_cost(2 * MEMORY_REF_COST); ++ effect(TEMP_DEF res); ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @compareAndExchangeL" ++ %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr(as_Register($mem$$base)); ++ __ cmpxchg(addr, oldval, newval, res, false /* retold */, true /* barrier */, false /* weak */, true /* exchange */); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeP(mRegP res, indirect mem, mRegP oldval, mRegP newval) %{ ++ predicate(n->as_LoadStore()->barrier_data() == 0); ++ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ++ ins_cost(2 * MEMORY_REF_COST); ++ effect(TEMP_DEF res); ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @compareAndExchangeP" ++ %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr(as_Register($mem$$base)); ++ __ cmpxchg(addr, oldval, newval, res, false /* retold */, true /* barrier */, false /* weak */, true /* exchange */); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeN(mRegN res, indirect mem, mRegN oldval, mRegN newval) %{ ++ ++ match(Set res (CompareAndExchangeN mem (Binary oldval newval))); ++ ins_cost(2 * MEMORY_REF_COST); ++ effect(TEMP_DEF res); ++ format %{ ++ "cmpxchg32 $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @compareAndExchangeN" ++ %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr(as_Register($mem$$base)); ++ __ cmpxchg32(addr, oldval, newval, res, false /* sign */, false /* retold */, true /* barrier */, false /* weak */, true /* exchange */); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapI(mRegI res, indirect mem, mRegI oldval, mRegI newval) %{ ++ ++ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ ++ "cmpxchg32 $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @weakCompareAndSwapI" ++ %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr(as_Register($mem$$base)); ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, true /* sign */, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, true /* sign */, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapL(mRegI res, indirect mem, mRegL oldval, mRegL newval) %{ ++ ++ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @WeakCompareAndSwapL" ++ %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr(as_Register($mem$$base)); ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapP(mRegI res, indirect mem, mRegP oldval, mRegP newval) %{ ++ predicate((((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0); ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ++ ins_cost(MEMORY_REF_COST); ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @weakCompareAndSwapP" ++ %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr(as_Register($mem$$base)); ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false /* retold */, false /* barrier */, true /* weak */, false /* exchange */); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false /* retold */, false /* barrier */, true /* weak */, false /* exchange */); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapP_acq(mRegI res, indirect mem, mRegP oldval, mRegP newval) %{ ++ predicate(n->as_LoadStore()->barrier_data() == 0); ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @weakCompareAndSwapP" ++ %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr(as_Register($mem$$base)); ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapN(mRegI res, indirect mem, mRegN oldval, mRegN newval) %{ ++ ++ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ ++ "cmpxchg32 $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @weakCompareAndSwapN" ++ %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr(as_Register($mem$$base)); ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, false /* sign */, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, false /* sign */, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++//----------Max and Min-------------------------------------------------------- ++ ++// Min Register with Register (generic version) ++instruct minI_Reg_Reg(mRegI dst, mRegI src) %{ ++ match(Set dst (MinI dst src)); ++ //effect(KILL flags); ++ ins_cost(80); ++ ++ format %{ "MIN $dst, $src @minI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ slt(AT, src, dst); ++ __ masknez(dst, dst, AT); ++ __ maskeqz(AT, src, AT); ++ __ OR(dst, dst, AT); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// Max Register with Register (generic version) ++instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{ ++ match(Set dst (MaxI dst src)); ++ ins_cost(80); ++ ++ format %{ "MAX $dst, $src @maxI_Reg_Reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ slt(AT, dst, src); ++ __ masknez(dst, dst, AT); ++ __ maskeqz(AT, src, AT); ++ __ OR(dst, dst, AT); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{ ++ match(Set dst (MaxI dst zero)); ++ ins_cost(50); ++ ++ format %{ "MAX $dst, 0 @maxI_Reg_zero" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ slt(AT, dst, R0); ++ __ masknez(dst, dst, AT); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL src mask)); ++ ++ format %{ "movl $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ bstrpick_d(dst, src, 31, 0); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32) ++%{ ++ match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32))); ++ ++ format %{ "combine_i2l $dst, $src2(H), $src1(L) @ combine_i2l" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ if (src1 == dst) { ++ __ bstrins_d(dst, src2, 63, 32); ++ } else if (src2 == dst) { ++ __ slli_d(dst, dst, 32); ++ __ bstrins_d(dst, src1, 31, 0); ++ } else { ++ __ bstrpick_d(dst, src1, 31, 0); ++ __ bstrins_d(dst, src2, 63, 32); ++ } ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Zero-extend convert int to long ++instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL (ConvI2L src) mask)); ++ ++ format %{ "movl $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ bstrpick_d(dst, src, 31, 0); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL (ConvI2L (ConvL2I src)) mask)); ++ ++ format %{ "movl $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ bstrpick_d(dst, src, 31, 0); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Match loading integer and casting it to unsigned int in long register. ++// LoadI + ConvI2L + AndL 0xffffffff. ++instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{ ++ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); ++ ++ format %{ "ld_wu $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{ ++ match(Set dst (AndL mask (ConvI2L (LoadI mem)))); ++ ++ format %{ "ld_wu $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++ ++// ============================================================================ ++// Safepoint Instruction ++instruct safePoint_poll_tls(mRegP poll) %{ ++ match(SafePoint poll); ++ effect(USE poll); ++ ++ ins_cost(125); ++ format %{ "ld_w AT, [$poll]\t" ++ "Safepoint @ [$poll] : poll for GC" %} ++ size(4); ++ ins_encode %{ ++ Register poll_reg = $poll$$Register; ++ ++ __ block_comment("Safepoint:"); ++ __ relocate(relocInfo::poll_type); ++ address pre_pc = __ pc(); ++ __ ld_w(AT, poll_reg, 0); ++ assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit ld_w AT, [$poll]"); ++ %} ++ ++ ins_pipe( ialu_storeI ); ++%} ++ ++//----------Arithmetic Conversion Instructions--------------------------------- ++ ++instruct roundFloat_nop(regF dst) ++%{ ++ match(Set dst (RoundFloat dst)); ++ ++ ins_cost(0); ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct roundDouble_nop(regD dst) ++%{ ++ match(Set dst (RoundDouble dst)); ++ ++ ins_cost(0); ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++//----------BSWAP Instructions------------------------------------------------- ++instruct bytes_reverse_int(mRegI dst, mRegIorL2I src) %{ ++ match(Set dst (ReverseBytesI src)); ++ ++ format %{ "RevB_I $dst, $src" %} ++ ins_encode %{ ++ __ bswap_w($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct bytes_reverse_long(mRegL dst, mRegL src) %{ ++ match(Set dst (ReverseBytesL src)); ++ ++ format %{ "RevB_L $dst, $src" %} ++ ins_encode %{ ++ __ revb_d($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct bytes_reverse_unsigned_short(mRegI dst, mRegIorL2I src) %{ ++ match(Set dst (ReverseBytesUS src)); ++ ++ format %{ "RevB_US $dst, $src" %} ++ ins_encode %{ ++ __ bswap_hu($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct bytes_reverse_short(mRegI dst, mRegIorL2I src) %{ ++ match(Set dst (ReverseBytesS src)); ++ ++ format %{ "RevB_S $dst, $src" %} ++ ins_encode %{ ++ __ bswap_h($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++//---------- Zeros Count Instructions ------------------------------------------ ++// CountLeadingZerosINode CountTrailingZerosINode ++instruct countLeadingZerosI(mRegI dst, mRegIorL2I src) %{ ++ match(Set dst (CountLeadingZerosI src)); ++ ++ format %{ "clz_w $dst, $src\t# count leading zeros (int)" %} ++ ins_encode %{ ++ __ clz_w($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countLeadingZerosL(mRegI dst, mRegL src) %{ ++ match(Set dst (CountLeadingZerosL src)); ++ ++ format %{ "clz_d $dst, $src\t# count leading zeros (long)" %} ++ ins_encode %{ ++ __ clz_d($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countTrailingZerosI(mRegI dst, mRegIorL2I src) %{ ++ match(Set dst (CountTrailingZerosI src)); ++ ++ format %{ "ctz_w $dst, $src\t# count trailing zeros (int)" %} ++ ins_encode %{ ++ __ ctz_w($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countTrailingZerosL(mRegI dst, mRegL src) %{ ++ match(Set dst (CountTrailingZerosL src)); ++ ++ format %{ "ctz_d $dst, $src\t# count trailing zeros (long)" %} ++ ins_encode %{ ++ __ ctz_d($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// --------------- Population Count Instructions ------------------------------ ++// ++instruct popCountI(mRegI dst, mRegIorL2I src) %{ ++ predicate(UsePopCountInstruction); ++ match(Set dst (PopCountI src)); ++ ++ format %{ "vinsgr2vr_w fscratch, $src, 0\n\t" ++ "vpcnt_w fscratch, fscratch\n\t" ++ "vpickve2gr_wu $dst, fscratch, 0\n\t# @popCountI" %} ++ ++ ins_encode %{ ++ __ vinsgr2vr_w(fscratch, $src$$Register, 0); ++ __ vpcnt_w(fscratch, fscratch); ++ __ vpickve2gr_wu($dst$$Register, fscratch, 0); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct popCountI_mem(mRegI dst, memory mem) %{ ++ predicate(UsePopCountInstruction); ++ match(Set dst (PopCountI (LoadI mem))); ++ ++ format %{ "fld_s fscratch, $mem, 0\n\t" ++ "vpcnt_w fscratch, fscratch\n\t" ++ "vpickve2gr_wu $dst, fscratch, 0\n\t# @popCountI_mem" %} ++ ++ ins_encode %{ ++ __ loadstore_enc(fscratch, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_FLOAT); ++ __ vpcnt_w(fscratch, fscratch); ++ __ vpickve2gr_wu($dst$$Register, fscratch, 0); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// Note: Long.bitCount(long) returns an int. ++instruct popCountL(mRegI dst, mRegL src) %{ ++ predicate(UsePopCountInstruction); ++ match(Set dst (PopCountL src)); ++ ++ format %{ "vinsgr2vr_d fscratch, $src, 0\n\t" ++ "vpcnt_d fscratch, fscratch\n\t" ++ "vpickve2gr_wu $dst, fscratch, 0\n\t# @popCountL" %} ++ ++ ins_encode %{ ++ __ vinsgr2vr_d(fscratch, $src$$Register, 0); ++ __ vpcnt_d(fscratch, fscratch); ++ __ vpickve2gr_wu($dst$$Register, fscratch, 0); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct popCountL_mem(mRegI dst, memory mem) %{ ++ predicate(UsePopCountInstruction); ++ match(Set dst (PopCountL (LoadL mem))); ++ ++ format %{ "fld_d fscratch, $mem, 0\n\t" ++ "vpcnt_d fscratch, fscratch\n\t" ++ "vpickve2gr_wu $dst, fscratch, 0\n\t# @popCountL_mem" %} ++ ++ ins_encode %{ ++ __ loadstore_enc(fscratch, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_DOUBLE); ++ __ vpcnt_d(fscratch, fscratch); ++ __ vpickve2gr_wu($dst$$Register, fscratch, 0); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// ====================VECTOR INSTRUCTIONS===================================== ++ ++// --------------------------------- Load ------------------------------------- ++ ++instruct loadV16(vecX dst, memory mem) %{ ++ predicate(n->as_LoadVector()->memory_size() == 16); ++ match(Set dst (LoadVector mem)); ++ format %{ "vload $dst, $mem\t# @loadV16" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_VECTORX); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct loadV32(vecY dst, memory mem) %{ ++ predicate(n->as_LoadVector()->memory_size() == 32); ++ match(Set dst (LoadVector mem)); ++ format %{ "xvload $dst, $mem\t# @loadV32" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_VECTORY); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- Store ------------------------------------ ++ ++instruct storeV16(memory mem, vecX src) %{ ++ predicate(n->as_StoreVector()->memory_size() == 16); ++ match(Set mem (StoreVector mem src)); ++ format %{ "vstore $src, $mem\t# @storeV16" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_VECTORX); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct storeV32(memory mem, vecY src) %{ ++ predicate(n->as_StoreVector()->memory_size() == 32); ++ match(Set mem (StoreVector mem src)); ++ format %{ "xvstore $src, $mem\t# @storeV32" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_VECTORY); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ------------------------------- Replicate ---------------------------------- ++ ++instruct repl16B(vecX dst, mRegI src) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (ReplicateB src)); ++ format %{ "vreplgr2vr.b $dst, $src\t# @repl16B" %} ++ ins_encode %{ ++ __ vreplgr2vr_b($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl16B_imm(vecX dst, immI_M128_255 imm) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (ReplicateB imm)); ++ format %{ "vldi $dst, $imm\t# @repl16B_imm" %} ++ ins_encode %{ ++ __ vldi($dst$$FloatRegister, ($imm$$constant & 0xff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8S(vecX dst, mRegI src) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (ReplicateS src)); ++ format %{ "vreplgr2vr.h $dst, $src\t# @repl8S" %} ++ ins_encode %{ ++ __ vreplgr2vr_h($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8S_imm(vecX dst, immI10 imm) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (ReplicateS imm)); ++ format %{ "vldi $dst, $imm\t# @repl8S_imm" %} ++ ins_encode %{ ++ __ vldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4I(vecX dst, mRegI src) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (ReplicateI src)); ++ format %{ "vreplgr2vr.w $dst, $src\t# @repl4I" %} ++ ins_encode %{ ++ __ vreplgr2vr_w($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4I_imm(vecX dst, immI10 imm) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (ReplicateI imm)); ++ format %{ "vldi $dst, $imm\t# @repl4I_imm" %} ++ ins_encode %{ ++ __ vldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl2L(vecX dst, mRegL src) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (ReplicateL src)); ++ format %{ "vreplgr2vr.d $dst, $src\t# @repl2L" %} ++ ins_encode %{ ++ __ vreplgr2vr_d($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl2L_imm(vecX dst, immL10 imm) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (ReplicateL imm)); ++ format %{ "vldi $dst, $imm\t# @repl2L_imm" %} ++ ins_encode %{ ++ __ vldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4F(vecX dst, regF src) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (ReplicateF src)); ++ format %{ "vreplvei.w $dst, $src, 0\t# @repl4F" %} ++ ins_encode %{ ++ __ vreplvei_w($dst$$FloatRegister, $src$$FloatRegister, 0); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl2D(vecX dst, regD src) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (ReplicateD src)); ++ format %{ "vreplvei.d $dst, $src, 0\t# @repl2D" %} ++ ins_encode %{ ++ __ vreplvei_d($dst$$FloatRegister, $src$$FloatRegister, 0); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl32B(vecY dst, mRegI src) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (ReplicateB src)); ++ format %{ "xvreplgr2vr.b $dst, $src\t# @repl32B" %} ++ ins_encode %{ ++ __ xvreplgr2vr_b($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl32B_imm(vecY dst, immI_M128_255 imm) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (ReplicateB imm)); ++ format %{ "xvldi $dst, $imm\t# @repl32B_imm" %} ++ ins_encode %{ ++ __ xvldi($dst$$FloatRegister, ($imm$$constant & 0xff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl16S(vecY dst, mRegI src) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (ReplicateS src)); ++ format %{ "xvreplgr2vr.h $dst, $src\t# @repl16S" %} ++ ins_encode %{ ++ __ xvreplgr2vr_h($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl16S_imm(vecY dst, immI10 imm) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (ReplicateS imm)); ++ format %{ "xvldi $dst, $imm\t# @repl16S_imm" %} ++ ins_encode %{ ++ __ xvldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8I(vecY dst, mRegI src) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (ReplicateI src)); ++ format %{ "xvreplgr2vr.w $dst, $src\t# @repl8I" %} ++ ins_encode %{ ++ __ xvreplgr2vr_w($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8I_imm(vecY dst, immI10 imm) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (ReplicateI imm)); ++ format %{ "xvldi $dst, $imm\t# @repl8I_imm" %} ++ ins_encode %{ ++ __ xvldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4L(vecY dst, mRegL src) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (ReplicateL src)); ++ format %{ "xvreplgr2vr.d $dst, $src\t# @repl4L" %} ++ ins_encode %{ ++ __ xvreplgr2vr_d($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4L_imm(vecY dst, immL10 imm) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (ReplicateL imm)); ++ format %{ "xvldi $dst, $imm\t# @repl4L_imm" %} ++ ins_encode %{ ++ __ xvldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8F(vecY dst, regF src) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (ReplicateF src)); ++ format %{ "xvreplve0.w $dst, $src\t# @repl8F" %} ++ ins_encode %{ ++ __ xvreplve0_w($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4D(vecY dst, regD src) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (ReplicateD src)); ++ format %{ "xvreplve0.d $dst, $src\t# @repl4D" %} ++ ins_encode %{ ++ __ xvreplve0_d($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- ADD -------------------------------------- ++ ++instruct add16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (AddVB src1 src2)); ++ format %{ "vadd.b $dst, $src1, $src2\t# @add16B" %} ++ ins_encode %{ ++ __ vadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add16B_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (AddVB src (ReplicateB imm))); ++ format %{ "vaddi.bu $dst, $src, $imm\t# @add16B_imm" %} ++ ins_encode %{ ++ __ vaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (AddVS src1 src2)); ++ format %{ "vadd.h $dst, $src1, $src2\t# @add8S" %} ++ ins_encode %{ ++ __ vadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8S_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (AddVS src (ReplicateS imm))); ++ format %{ "vaddi.hu $dst, $src, $imm\t# @add8S_imm" %} ++ ins_encode %{ ++ __ vaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (AddVI src1 src2)); ++ format %{ "vadd.w $dst, $src1, src2\t# @add4I" %} ++ ins_encode %{ ++ __ vadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4I_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (AddVI src (ReplicateI imm))); ++ format %{ "vaddi.wu $dst, $src, $imm\t# @add4I_imm" %} ++ ins_encode %{ ++ __ vaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (AddVL src1 src2)); ++ format %{ "vadd.d $dst, $src1, $src2\t# @add2L" %} ++ ins_encode %{ ++ __ vadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add2L_imm(vecX dst, vecX src, immLU5 imm) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (AddVL src (ReplicateL imm))); ++ format %{ "vaddi.du $dst, $src, $imm\t# @add2L_imm" %} ++ ins_encode %{ ++ __ vaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4F(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (AddVF src1 src2)); ++ format %{ "vfadd.s $dst, $src1, $src2\t# @add4F" %} ++ ins_encode %{ ++ __ vfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add2D(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (AddVD src1 src2)); ++ format %{ "vfadd.d $dst, $src1, $src2\t# @add2D" %} ++ ins_encode %{ ++ __ vfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (AddVB src1 src2)); ++ format %{ "xvadd.b $dst, $src1, $src2\t# @add32B" %} ++ ins_encode %{ ++ __ xvadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add32B_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (AddVB src (ReplicateB imm))); ++ format %{ "xvaddi.bu $dst, $src, $imm\t# @add32B_imm" %} ++ ins_encode %{ ++ __ xvaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (AddVS src1 src2)); ++ format %{ "xvadd.h $dst, $src1, $src2\t# @add16S" %} ++ ins_encode %{ ++ __ xvadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add16S_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (AddVS src (ReplicateS imm))); ++ format %{ "xvaddi.hu $dst, $src, $imm\t# @add16S_imm" %} ++ ins_encode %{ ++ __ xvaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (AddVI src1 src2)); ++ format %{ "xvadd.wu $dst, $src1, $src2\t# @add8I" %} ++ ins_encode %{ ++ __ xvadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8I_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (AddVI src (ReplicateI imm))); ++ format %{ "xvaddi.wu $dst, $src, $imm\t# @add8I_imm" %} ++ ins_encode %{ ++ __ xvaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (AddVL src1 src2)); ++ format %{ "xvadd.d $dst, $src1, $src2\t# @add4L" %} ++ ins_encode %{ ++ __ xvadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4L_imm(vecY dst, vecY src, immLU5 imm) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (AddVL src (ReplicateL imm))); ++ format %{ "xvaddi.du $dst, $src, $imm\t# @add4L_imm" %} ++ ins_encode %{ ++ __ xvaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8F(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (AddVF src1 src2)); ++ format %{ "xvfadd.s $dst, $src1, $src2\t# @add8F" %} ++ ins_encode %{ ++ __ xvfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4D(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (AddVD src1 src2)); ++ format %{ "xvfadd.d $dst, $src1, $src2\t# @add4D" %} ++ ins_encode %{ ++ __ xvfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- SUB -------------------------------------- ++ ++instruct sub16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (SubVB src1 src2)); ++ format %{ "vsub.b $dst, $src1, $src2\t# @sub16B" %} ++ ins_encode %{ ++ __ vsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub16B_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (SubVB src (ReplicateB imm))); ++ format %{ "vsubi.bu $dst, $src, $imm\t# @sub16B_imm" %} ++ ins_encode %{ ++ __ vsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (SubVS src1 src2)); ++ format %{ "vsub.h $dst, $src1, $src2\t# @sub8S" %} ++ ins_encode %{ ++ __ vsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8S_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (SubVS src (ReplicateS imm))); ++ format %{ "vsubi.hu $dst, $src, $imm\t# @sub8S_imm" %} ++ ins_encode %{ ++ __ vsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (SubVI src1 src2)); ++ format %{ "vsub.w $dst, $src1, src2\t# @sub4I" %} ++ ins_encode %{ ++ __ vsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4I_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (SubVI src (ReplicateI imm))); ++ format %{ "vsubi.wu $dst, $src, $imm\t# @sub4I_imm" %} ++ ins_encode %{ ++ __ vsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (SubVL src1 src2)); ++ format %{ "vsub.d $dst, $src1, $src2\t# @sub2L" %} ++ ins_encode %{ ++ __ vsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub2L_imm(vecX dst, vecX src, immLU5 imm) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (SubVL src (ReplicateL imm))); ++ format %{ "vsubi.du $dst, $src, $imm\t# @sub2L_imm" %} ++ ins_encode %{ ++ __ vsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4F(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (SubVF src1 src2)); ++ format %{ "vfsub.s $dst, $src1, $src2\t# @sub4F" %} ++ ins_encode %{ ++ __ vfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub2D(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (SubVD src1 src2)); ++ format %{ "vfsub.d $dst, $src1, $src2\t# @sub2D" %} ++ ins_encode %{ ++ __ vfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (SubVB src1 src2)); ++ format %{ "xvsub.b $dst, $src1, $src2\t# @sub32B" %} ++ ins_encode %{ ++ __ xvsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub32B_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (SubVB src (ReplicateB imm))); ++ format %{ "xvsubi.bu $dst, $src, $imm\t# @sub32B_imm" %} ++ ins_encode %{ ++ __ xvsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (SubVS src1 src2)); ++ format %{ "xvsub.h $dst, $src1, $src2\t# @sub16S" %} ++ ins_encode %{ ++ __ xvsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub16S_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (SubVS src (ReplicateS imm))); ++ format %{ "xvsubi.hu $dst, $src, $imm\t# @sub16S_imm" %} ++ ins_encode %{ ++ __ xvsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (SubVI src1 src2)); ++ format %{ "xvsub.w $dst, $src1, $src2\t# @sub8I" %} ++ ins_encode %{ ++ __ xvsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8I_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (SubVI src (ReplicateI imm))); ++ format %{ "xvsubi.wu $dst, $src, $imm\t# @sub8I_imm" %} ++ ins_encode %{ ++ __ xvsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (SubVL src1 src2)); ++ format %{ "xvsub.d $dst, $src1, $src2\t# @sub4L" %} ++ ins_encode %{ ++ __ xvsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4L_imm(vecY dst, vecY src, immLU5 imm) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (SubVL src (ReplicateL imm))); ++ format %{ "xvsubi.du $dst, $src, $imm\t# @sub4L_imm" %} ++ ins_encode %{ ++ __ xvsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8F(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (SubVF src1 src2)); ++ format %{ "xvfsub.s $dst, $src1, $src2\t# @sub8F" %} ++ ins_encode %{ ++ __ xvfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4D(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (SubVD src1 src2)); ++ format %{ "xvfsub.d $dst,$src1,$src2\t# @sub4D" %} ++ ins_encode %{ ++ __ xvfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- MUL -------------------------------------- ++instruct mul16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (MulVB src1 src2)); ++ format %{ "vmul.b $dst, $src1, $src2\t# @mul16B" %} ++ ins_encode %{ ++ __ vmul_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (MulVS src1 src2)); ++ format %{ "vmul.h $dst, $src1, $src2\t# @mul8S" %} ++ ins_encode %{ ++ __ vmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (MulVI src1 src2)); ++ format %{ "vmul.w $dst, $src1, $src2\t# @mul4I" %} ++ ins_encode %{ ++ __ vmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (MulVL src1 src2)); ++ format %{ "vmul.d $dst, $src1, $src2\t# @mul2L" %} ++ ins_encode %{ ++ __ vmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul4F(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (MulVF src1 src2)); ++ format %{ "vfmul.s $dst, $src1, $src2\t# @mul4F" %} ++ ins_encode %{ ++ __ vfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul2D(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (MulVD src1 src2)); ++ format %{ "vfmul.d $dst, $src1, $src2\t# @mul2D" %} ++ ins_encode %{ ++ __ vfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (MulVB src1 src2)); ++ format %{ "xvmul.b $dst, $src1, $src2\t# @mul32B" %} ++ ins_encode %{ ++ __ xvmul_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (MulVS src1 src2)); ++ format %{ "xvmul.h $dst, $src1, $src2\t# @mul16S" %} ++ ins_encode %{ ++ __ xvmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (MulVI src1 src2)); ++ format %{ "xvmul.w $dst, $src1, $src2\t# @mul8I" %} ++ ins_encode %{ ++ __ xvmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (MulVL src1 src2)); ++ format %{ "xvmul.d $dst, $src1, $src2\t# @mul4L" %} ++ ins_encode %{ ++ __ xvmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul8F(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (MulVF src1 src2)); ++ format %{ "xvfmul.s $dst, $src1, $src2\t# @mul8F" %} ++ ins_encode %{ ++ __ xvfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul4D(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (MulVD src1 src2)); ++ format %{ "xvfmul.d $dst, $src1, $src2\t# @mul4D" %} ++ ins_encode %{ ++ __ xvfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- DIV -------------------------------------- ++instruct div4F(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (DivVF src1 src2)); ++ format %{ "vfdiv.s $dst, $src1, $src2\t# @div4F" %} ++ ins_encode %{ ++ __ vfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct div2D(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (DivVD src1 src2)); ++ format %{ "vfdiv.d $dst, $src1, $src2\t# @div2D" %} ++ ins_encode %{ ++ __ vfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct div8F(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (DivVF src1 src2)); ++ format %{ "xvfdiv.s $dst, $src1, $src2\t# @div8F" %} ++ ins_encode %{ ++ __ xvfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct div4D(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (DivVD src1 src2)); ++ format %{ "xvfdiv.d $dst, $src1, $src2\t# @div4D" %} ++ ins_encode %{ ++ __ xvfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- ABS -------------------------------------- ++ ++instruct abs16B(vecX dst, vecX src) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (AbsVB src)); ++ effect(TEMP_DEF dst); ++ format %{ "vabs $dst, $src\t# @abs16B" %} ++ ins_encode %{ ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ vabsd_b($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs8S(vecX dst, vecX src) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (AbsVS src)); ++ effect(TEMP_DEF dst); ++ format %{ "vabs $dst, $src\t# @abs8S" %} ++ ins_encode %{ ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ vabsd_h($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs4I(vecX dst, vecX src) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (AbsVI src)); ++ effect(TEMP_DEF dst); ++ format %{ "vabs $dst, $src\t# @abs4I" %} ++ ins_encode %{ ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ vabsd_w($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs2L(vecX dst, vecX src) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (AbsVL src)); ++ effect(TEMP_DEF dst); ++ format %{ "vabs $dst, $src\t# @abs2L" %} ++ ins_encode %{ ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ vabsd_d($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs4F(vecX dst, vecX src) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (AbsVF src)); ++ format %{ "vbitclri.w $dst, $src\t# @abs4F" %} ++ ins_encode %{ ++ __ vbitclri_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs2D(vecX dst, vecX src) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (AbsVD src)); ++ format %{ "vbitclri.d $dst, $src\t# @abs2D" %} ++ ins_encode %{ ++ __ vbitclri_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs32B(vecY dst, vecY src) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (AbsVB src)); ++ effect(TEMP_DEF dst); ++ format %{ "xvabs $dst, $src\t# @abs32B" %} ++ ins_encode %{ ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ xvabsd_b($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs16S(vecY dst, vecY src) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (AbsVS src)); ++ effect(TEMP_DEF dst); ++ format %{ "xvabs $dst, $src\t# @abs16S" %} ++ ins_encode %{ ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ xvabsd_h($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs8I(vecY dst, vecY src) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (AbsVI src)); ++ effect(TEMP_DEF dst); ++ format %{ "xvabs $dst, $src\t# @abs8I" %} ++ ins_encode %{ ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ xvabsd_w($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs4L(vecY dst, vecY src) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (AbsVL src)); ++ effect(TEMP_DEF dst); ++ format %{ "xvabs $dst, $src\t# @abs4L" %} ++ ins_encode %{ ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ xvabsd_d($dst$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs8F(vecY dst, vecY src) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (AbsVF src)); ++ format %{ "xvbitclri.w $dst, $src\t# @abs8F" %} ++ ins_encode %{ ++ __ xvbitclri_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct abs4D(vecY dst, vecY src) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (AbsVD src)); ++ format %{ "xvbitclri.d $dst, $src\t# @abs4D" %} ++ ins_encode %{ ++ __ xvbitclri_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- ABS DIFF --------------------------------- ++ ++instruct absd4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (AbsVI (SubVI src1 src2))); ++ format %{ "vabsd.w $dst, $src1, $src2\t# @absd4I" %} ++ ins_encode %{ ++ __ vabsd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct absd2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (AbsVL (SubVL src1 src2))); ++ format %{ "vabsd.d $dst, $src1, $src2\t# @absd2L" %} ++ ins_encode %{ ++ __ vabsd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct absd8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (AbsVI (SubVI src1 src2))); ++ format %{ "xvabsd.w $dst, $src1, $src2\t# @absd8I" %} ++ ins_encode %{ ++ __ xvabsd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct absd4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (AbsVL (SubVL src1 src2))); ++ format %{ "xvabsd.d $dst, $src1, $src2\t# @absd4L" %} ++ ins_encode %{ ++ __ xvabsd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- MAX -------------------------------------- ++ ++instruct max16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_BYTE); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "vmax.b $dst, $src1, $src2\t# @max16B" %} ++ ins_encode %{ ++ __ vmax_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_SHORT); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "vmax.h $dst, $src1, $src2\t# @max8S" %} ++ ins_encode %{ ++ __ vmax_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_INT); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "vmax.w $dst, $src1, $src2\t# @max4I" %} ++ ins_encode %{ ++ __ vmax_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 2 && vector_element_basic_type(n) == T_LONG); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "vmax.d $dst, $src1, $src2\t# @max2L" %} ++ ins_encode %{ ++ __ vmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max4F(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_FLOAT); ++ match(Set dst (MaxV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "vfmax $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max4F" %} ++ ins_encode %{ ++ __ vfmax_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max2D(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n) == 2 && vector_element_basic_type(n) == T_DOUBLE); ++ match(Set dst (MaxV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "vfmax $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max2D" %} ++ ins_encode %{ ++ __ vfmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 32 && vector_element_basic_type(n) == T_BYTE); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "xvmax.b $dst, $src1, $src2\t# @max32B" %} ++ ins_encode %{ ++ __ xvmax_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_SHORT); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "xvmax.h $dst, $src1, $src2\t# @max16S" %} ++ ins_encode %{ ++ __ xvmax_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_INT); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "xvmax.w $dst, $src1, $src2\t# @max8I" %} ++ ins_encode %{ ++ __ xvmax_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_LONG); ++ match(Set dst (MaxV src1 src2)); ++ format %{ "xvmax.d $dst, $src1, $src2\t# @max4L" %} ++ ins_encode %{ ++ __ xvmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max8F(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_FLOAT); ++ match(Set dst (MaxV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "xvfmax $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max8F" %} ++ ins_encode %{ ++ __ xvfmax_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct max4D(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_DOUBLE); ++ match(Set dst (MaxV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "xvfmax $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @max4D" %} ++ ins_encode %{ ++ __ xvfmax_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- MIN -------------------------------------- ++ ++instruct min16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_BYTE); ++ match(Set dst (MinV src1 src2)); ++ format %{ "vmin.b $dst, $src1, $src2\t# @min16B" %} ++ ins_encode %{ ++ __ vmin_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_SHORT); ++ match(Set dst (MinV src1 src2)); ++ format %{ "vmin.h $dst, $src1, $src2\t# @min8S" %} ++ ins_encode %{ ++ __ vmin_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_INT); ++ match(Set dst (MinV src1 src2)); ++ format %{ "vmin.w $dst, $src1, $src2\t# @min4I" %} ++ ins_encode %{ ++ __ vmin_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 2 && vector_element_basic_type(n) == T_LONG); ++ match(Set dst (MinV src1 src2)); ++ format %{ "vmin.d $dst, $src1, $src2\t# @min2L" %} ++ ins_encode %{ ++ __ vmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min4F(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_FLOAT); ++ match(Set dst (MinV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "vfmin $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min4F" %} ++ ins_encode %{ ++ __ vfmin_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min2D(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n) == 2 && vector_element_basic_type(n) == T_DOUBLE); ++ match(Set dst (MinV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "vfmin $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min2D" %} ++ ins_encode %{ ++ __ vfmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 32 && vector_element_basic_type(n) == T_BYTE); ++ match(Set dst (MinV src1 src2)); ++ format %{ "xvmin.b $dst, $src1, $src2\t# @min32B" %} ++ ins_encode %{ ++ __ xvmin_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_SHORT); ++ match(Set dst (MinV src1 src2)); ++ format %{ "xvmin.h $dst, $src1, $src2\t# @min16S" %} ++ ins_encode %{ ++ __ xvmin_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_INT); ++ match(Set dst (MinV src1 src2)); ++ format %{ "xvmin.w $dst, $src1, $src2\t# @min8I" %} ++ ins_encode %{ ++ __ xvmin_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_LONG); ++ match(Set dst (MinV src1 src2)); ++ format %{ "xvmin.d $dst, $src1, $src2\t# @min4L" %} ++ ins_encode %{ ++ __ xvmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min8F(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_FLOAT); ++ match(Set dst (MinV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "xvfmin $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min8F" %} ++ ins_encode %{ ++ __ xvfmin_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfdiv_s($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfcmp_cun_s($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct min4D(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_DOUBLE); ++ match(Set dst (MinV src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "xvfmin $dst, $src1, $src2\t# TEMP($tmp1, $tmp2) @min4D" %} ++ ins_encode %{ ++ __ xvfmin_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvxor_v($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfdiv_d($tmp1$$FloatRegister, $tmp1$$FloatRegister, $tmp1$$FloatRegister); ++ __ xvfcmp_cun_d($tmp2$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvbitsel_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- NEG -------------------------------------- ++ ++instruct neg4I(vecX dst, vecX src) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (NegVI src)); ++ format %{ "vneg.w $dst, $src\t# @neg4I" %} ++ ins_encode %{ ++ DEBUG_ONLY(Unimplemented()); // unverified ++ __ vneg_w($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct neg4F(vecX dst, vecX src) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (NegVF src)); ++ format %{ "vbitrevi.w $dst, $src\t# @neg4F" %} ++ ins_encode %{ ++ __ vbitrevi_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct neg2D(vecX dst, vecX src) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (NegVD src)); ++ format %{ "vbitrevi.d $dst, $src\t# @neg2D" %} ++ ins_encode %{ ++ __ vbitrevi_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct neg8I(vecY dst, vecY src) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (NegVI src)); ++ format %{ "xvneg.w $dst, $src\t# @neg8I" %} ++ ins_encode %{ ++ DEBUG_ONLY(Unimplemented()); // unverified ++ __ xvneg_w($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct neg8F(vecY dst, vecY src) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (NegVF src)); ++ format %{ "xvbitrevi.w $dst, $src\t# @neg8F" %} ++ ins_encode %{ ++ __ xvbitrevi_w($dst$$FloatRegister, $src$$FloatRegister, 0x1f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct neg4D(vecY dst, vecY src) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (NegVD src)); ++ format %{ "xvbitrevi.d $dst, $src\t# @neg4D" %} ++ ins_encode %{ ++ __ xvbitrevi_d($dst$$FloatRegister, $src$$FloatRegister, 0x3f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- SQRT ------------------------------------- ++ ++instruct sqrt4F(vecX dst, vecX src) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (SqrtVF src)); ++ format %{ "vfsqrt.s $dst, $src\t# @sqrt4F" %} ++ ins_encode %{ ++ __ vfsqrt_s($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sqrt2D(vecX dst, vecX src) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (SqrtVD src)); ++ format %{ "vfsqrt.d $dst, $src\t# @sqrt2D" %} ++ ins_encode %{ ++ __ vfsqrt_d($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sqrt8F(vecY dst, vecY src) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (SqrtVF src)); ++ format %{ "xvfsqrt.s $dst, $src\t# @sqrt8F" %} ++ ins_encode %{ ++ __ xvfsqrt_s($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sqrt4D(vecY dst, vecY src) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (SqrtVD src)); ++ format %{ "xvfsqrt.d $dst, $src\t# @sqrt4D" %} ++ ins_encode %{ ++ __ xvfsqrt_d($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- MADD ------------------------------------- ++ ++instruct madd16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (AddVB dst (MulVB src1 src2))); ++ format %{ "vmadd.b $dst, $src1, $src2\t# @madd16B" %} ++ ins_encode %{ ++ __ vmadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (AddVS dst (MulVS src1 src2))); ++ format %{ "vmadd.h $dst, $src1, $src2\t# @madd8S" %} ++ ins_encode %{ ++ __ vmadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (AddVI dst (MulVI src1 src2))); ++ format %{ "vmadd $dst, $src1, $src2\t# @madd4I" %} ++ ins_encode %{ ++ __ vmadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (AddVL dst (MulVL src1 src2))); ++ format %{ "vmadd.d $dst, $src1, $src2\t# @madd2L" %} ++ ins_encode %{ ++ __ vmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 + src3 ++instruct madd4F(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && vector_length(n) == 4); ++ match(Set dst (FmaVF src3 (Binary src1 src2))); ++ format %{ "vfmadd.s $dst, $src1, $src2, $src3\t# @madd4F" %} ++ ins_encode %{ ++ __ vfmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 + src3 ++instruct madd2D(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && vector_length(n) == 2); ++ match(Set dst (FmaVD src3 (Binary src1 src2))); ++ format %{ "vfmadd.d $dst, $src1, $src2, $src3\t# @madd2D" %} ++ ins_encode %{ ++ __ vfmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (AddVB dst (MulVB src1 src2))); ++ format %{ "xvmadd.b $dst, $src1, $src2\t# @madd32B" %} ++ ins_encode %{ ++ __ xvmadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (AddVS dst (MulVS src1 src2))); ++ format %{ "xvmadd.h $dst, $src1, $src2\t# @madd16S" %} ++ ins_encode %{ ++ __ xvmadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (AddVI dst (MulVI src1 src2))); ++ format %{ "xvmadd.w $dst, $src1, $src2\t# @madd8I" %} ++ ins_encode %{ ++ __ xvmadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct madd4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (AddVL dst (MulVL src1 src2))); ++ format %{ "xvmadd.d $dst, $src1, $src2\t# @madd4L" %} ++ ins_encode %{ ++ __ xvmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 + src3 ++instruct madd8F(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && vector_length(n) == 8); ++ match(Set dst (FmaVF src3 (Binary src1 src2))); ++ format %{ "xvfmadd.s $dst, $src1, $src2, $src3\t# @madd8F" %} ++ ins_encode %{ ++ __ xvfmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 + src3 ++instruct madd4D(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && vector_length(n) == 4); ++ match(Set dst (FmaVD src3 (Binary src1 src2))); ++ format %{ "xvfmadd.d $dst, $src1, $src2, $src3\t# @madd4D" %} ++ ins_encode %{ ++ __ xvfmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- MSUB ------------------------------------- ++ ++instruct msub16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (SubVB dst (MulVB src1 src2))); ++ format %{ "vmsub.b $dst, $src1, $src2\t# @msub16B" %} ++ ins_encode %{ ++ __ vmsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (SubVS dst (MulVS src1 src2))); ++ format %{ "vmsub.h $dst, $src1, $src2\t# @msub8S" %} ++ ins_encode %{ ++ __ vmsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (SubVI dst (MulVI src1 src2))); ++ format %{ "vmsub.w $dst, $src1, $src2\t# @msub4I" %} ++ ins_encode %{ ++ __ vmsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (SubVL dst (MulVL src1 src2))); ++ format %{ "vmsub.d $dst, $src1, $src2\t# @msub2L" %} ++ ins_encode %{ ++ __ vmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 - src3 ++instruct msub4F(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && vector_length(n) == 4); ++ match(Set dst (FmaVF (NegVF src3) (Binary src1 src2))); ++ format %{ "vfmsub.s $dst, $src1, $src2, $src3\t# @msub4F" %} ++ ins_encode %{ ++ __ vfmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 - src3 ++instruct msub2D(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && vector_length(n) == 2); ++ match(Set dst (FmaVD (NegVD src3) (Binary src1 src2))); ++ format %{ "vfmsub.d $dst, $src1, $src2, $src3\t# @msub2D" %} ++ ins_encode %{ ++ __ vfmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (SubVB dst (MulVB src1 src2))); ++ format %{ "xvmsub.b $dst, $src1, $src2\t# @msub32B" %} ++ ins_encode %{ ++ __ xvmsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (SubVS dst (MulVS src1 src2))); ++ format %{ "xvmsub.h $dst, $src1, $src2\t# @msub16S" %} ++ ins_encode %{ ++ __ xvmsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (SubVI dst (MulVI src1 src2))); ++ format %{ "xvmsub.w $dst, $src1, $src2\t# @msub8I" %} ++ ins_encode %{ ++ __ xvmsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct msub4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (SubVL dst (MulVL src1 src2))); ++ format %{ "xvmsub.d $dst, $src1, $src2\t# @msub4L" %} ++ ins_encode %{ ++ __ xvmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 - src3 ++instruct msub8F(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && vector_length(n) == 8); ++ match(Set dst (FmaVF (NegVF src3) (Binary src1 src2))); ++ format %{ "xvfmsub.s $dst, $src1, $src2, $src3\t# @msub8F" %} ++ ins_encode %{ ++ __ xvfmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// src1 * src2 - src3 ++instruct msub4D(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && vector_length(n) == 4); ++ match(Set dst (FmaVD (NegVD src3) (Binary src1 src2))); ++ format %{ "xvfmsub.d $dst, $src1, $src2, $src3\t# @msub4D" %} ++ ins_encode %{ ++ __ xvfmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- FNMADD ----------------------------------- ++ ++// -src1 * src2 - src3 ++instruct nmadd4F(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && vector_length(n) == 4); ++ match(Set dst (FmaVF (NegVF src3) (Binary (NegVF src1) src2))); ++ match(Set dst (FmaVF (NegVF src3) (Binary src1 (NegVF src2)))); ++ format %{ "vfnmadd.s $dst, $src1, $src2, $src3\t# @nmadd4F" %} ++ ins_encode %{ ++ __ vfnmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -src1 * src2 - src3 ++instruct nmadd2D(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && vector_length(n) == 2); ++ match(Set dst (FmaVD (NegVD src3) (Binary (NegVD src1) src2))); ++ match(Set dst (FmaVD (NegVD src3) (Binary src1 (NegVD src2)))); ++ format %{ "vfnmadd.d $dst, $src1, $src2, $src3\t# @nmadd2D" %} ++ ins_encode %{ ++ __ vfnmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -src1 * src2 - src3 ++instruct nmadd8F(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && vector_length(n) == 8); ++ match(Set dst (FmaVF (NegVF src3) (Binary (NegVF src1) src2))); ++ match(Set dst (FmaVF (NegVF src3) (Binary src1 (NegVF src2)))); ++ format %{ "xvfnmadd.s $dst, $src1, $src2, $src3\t# @nmadd8F" %} ++ ins_encode %{ ++ __ xvfnmadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -src1 * src2 - src3 ++instruct nmadd4D(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && vector_length(n) == 4); ++ match(Set dst (FmaVD (NegVD src3) (Binary (NegVD src1) src2))); ++ match(Set dst (FmaVD (NegVD src3) (Binary src1 (NegVD src2)))); ++ format %{ "xvfnmadd.d $dst, $src1, $src2, $src3\t# @nmadd4D" %} ++ ins_encode %{ ++ __ xvfnmadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- FNMSUB ----------------------------------- ++ ++// -src1 * src2 + src3 ++instruct nmsub4F(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && vector_length(n) == 4); ++ match(Set dst (FmaVF src3 (Binary (NegVF src1) src2))); ++ match(Set dst (FmaVF src3 (Binary src1 (NegVF src2)))); ++ format %{ "vfnmsub.s $dst, $src1, $src2, $src3\t# @nmsub4F" %} ++ ins_encode %{ ++ __ vfnmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -src1 * src2 + src3 ++instruct nmsub2D(vecX dst, vecX src1, vecX src2, vecX src3) %{ ++ predicate(UseFMA && vector_length(n) == 2); ++ match(Set dst (FmaVD src3 (Binary (NegVD src1) src2))); ++ match(Set dst (FmaVD src3 (Binary src1 (NegVD src2)))); ++ format %{ "vfnmsub.d $dst, $src1, $src2, $src3\t# @nmsub2D" %} ++ ins_encode %{ ++ __ vfnmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -src1 * src2 + src3 ++instruct nmsub8F(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && vector_length(n) == 8); ++ match(Set dst (FmaVF src3 (Binary (NegVF src1) src2))); ++ match(Set dst (FmaVF src3 (Binary src1 (NegVF src2)))); ++ format %{ "xvfnmsub.s $dst, $src1, $src2, $src3\t# @nmsub8F" %} ++ ins_encode %{ ++ __ xvfnmsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -src1 * src2 + src3 ++instruct nmsub4D(vecY dst, vecY src1, vecY src2, vecY src3) %{ ++ predicate(UseFMA && vector_length(n) == 4); ++ match(Set dst (FmaVD src3 (Binary (NegVD src1) src2))); ++ match(Set dst (FmaVD src3 (Binary src1 (NegVD src2)))); ++ format %{ "xvfnmsub.d $dst, $src1, $src2, $src3\t# @nmsub4D" %} ++ ins_encode %{ ++ __ xvfnmsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------- Vector Multiply-Add Shorts into Integer -------------------- ++ ++instruct muladd8Sto4I(vecX dst, vecX src1, vecX src2, vecX tmp) %{ ++ predicate(vector_length(n->in(1)) == 8 && vector_element_basic_type(n->in(1)) == T_SHORT); ++ match(Set dst (MulAddVS2VI src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "muladdvs2vi $dst, $src1, $src2\t# TEMP($tmp) @muladd8Sto4I" %} ++ ins_encode %{ ++ DEBUG_ONLY(Unimplemented()); // unverified ++ __ vmulwev_w_h($tmp$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vmulwod_w_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ vadd_w($dst$$FloatRegister, $tmp$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct muladd16Sto8I(vecY dst, vecY src1, vecY src2, vecY tmp) %{ ++ predicate(vector_length(n->in(1)) == 16 && vector_element_basic_type(n->in(1)) == T_SHORT); ++ match(Set dst (MulAddVS2VI src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "muladdvs2vi $dst, $src1, $src2\t# TEMP($tmp) @muladd16Sto8I" %} ++ ins_encode %{ ++ DEBUG_ONLY(Unimplemented()); // unverified ++ __ xvmulwev_w_h($tmp$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvmulwod_w_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ __ xvadd_w($dst$$FloatRegister, $tmp$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ------------------------------ Shift --------------------------------------- ++ ++instruct shiftcntX(vecX dst, mRegI cnt) %{ ++ predicate(vector_length_in_bytes(n) == 16); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "vreplgr2vr.b $dst, $cnt\t# @shiftcntX" %} ++ ins_encode %{ ++ __ vreplgr2vr_b($dst$$FloatRegister, $cnt$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct shiftcntY(vecY dst, mRegI cnt) %{ ++ predicate(vector_length_in_bytes(n) == 32); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "xvreplgr2vr.b $dst, $cnt\t# @shiftcntY" %} ++ ins_encode %{ ++ __ xvreplgr2vr_b($dst$$FloatRegister, $cnt$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ------------------------------ LeftShift ----------------------------------- ++ ++instruct sll16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (LShiftVB src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "vsll $dst, $src, $shift\t# TEMP($tmp) @sll16B" %} ++ ins_encode %{ ++ __ vsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll16B_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (LShiftVB src (LShiftCntV shift))); ++ format %{ "vslli.b $dst, $src, $shift\t# @sll16B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ vslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (LShiftVS src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "vsll $dst, $src, $shift\t# TEMP($tmp) @sll8S" %} ++ ins_encode %{ ++ __ vsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll8S_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (LShiftVS src (LShiftCntV shift))); ++ format %{ "vslli.h $dst, $src, $shift\t# @sll8S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ vslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll4I(vecX dst, vecX src, vecX shift) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (LShiftVI src shift)); ++ format %{ "vsll.w $dst, $src, $shift\t# @sll4I" %} ++ ins_encode %{ ++ __ vsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll4I_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (LShiftVI src (LShiftCntV shift))); ++ format %{ "vslli.w $dst, $src, $shift\t# @sll4I_imm" %} ++ ins_encode %{ ++ __ vslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll2L(vecX dst, vecX src, vecX shift) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (LShiftVL src shift)); ++ format %{ "vsll.d $dst, $src, $shift\t# @sll2L" %} ++ ins_encode %{ ++ __ vsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll2L_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (LShiftVL src (LShiftCntV shift))); ++ format %{ "vslli.d $dst, $src, $shift\t# @sll2L_imm" %} ++ ins_encode %{ ++ __ vslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll32B(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (LShiftVB src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "xvsll $dst, $src, $shift\t# TEMP($tmp) @sll32B" %} ++ ins_encode %{ ++ __ xvsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll32B_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (LShiftVB src (LShiftCntV shift))); ++ format %{ "xvslli.b $dst, $src, $shift\t# @sll32B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ xvslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll16S(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (LShiftVS src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "xvsll $dst, $src, $shift\t# TEMP($tmp) @sll16S" %} ++ ins_encode %{ ++ __ xvsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll16S_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (LShiftVS src (LShiftCntV shift))); ++ format %{ "xvslli.h $dst, $src, $shift\t# @sll16S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ xvslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll8I(vecY dst, vecY src, vecY shift) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (LShiftVI src shift)); ++ format %{ "xvsll.w $dst, $src, $shift\t# @sll8I" %} ++ ins_encode %{ ++ __ xvsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll8I_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (LShiftVI src (LShiftCntV shift))); ++ format %{ "xvslli.w $dst, $src, $shift\t# @sll8I_imm" %} ++ ins_encode %{ ++ __ xvslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll4L(vecY dst, vecY src, vecY shift) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (LShiftVL src shift)); ++ format %{ "xvsll.d $dst, $src, $shift\t# @sll4L" %} ++ ins_encode %{ ++ __ xvsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll4L_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (LShiftVL src (LShiftCntV shift))); ++ format %{ "xvslli.d $dst, $src, $shift\t# @sll4L_imm" %} ++ ins_encode %{ ++ __ xvslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ----------------------- LogicalRightShift ---------------------------------- ++ ++instruct srl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (URShiftVB src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "vsrl $dst, $src, $shift\t# TEMP($tmp) @srl16B" %} ++ ins_encode %{ ++ __ vsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl16B_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (URShiftVB src (RShiftCntV shift))); ++ format %{ "vsrli.b $dst, $src, $shift\t# @srl16B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ vsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (URShiftVS src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "vsrl $dst, $src, $shift\t# TEMP($tmp) @srl8S" %} ++ ins_encode %{ ++ __ vsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl8S_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (URShiftVS src (RShiftCntV shift))); ++ format %{ "vsrli.h $dst, $src, $shift\t# @srl8S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ vsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl4I(vecX dst, vecX src, vecX shift) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (URShiftVI src shift)); ++ format %{ "vsrl.w $dst, $src, $shift\t# @srl4I" %} ++ ins_encode %{ ++ __ vsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl4I_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (URShiftVI src (RShiftCntV shift))); ++ format %{ "vsrli.w $dst, $src, $shift\t# @srl4I_imm" %} ++ ins_encode %{ ++ __ vsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl2L(vecX dst, vecX src, vecX shift) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (URShiftVL src shift)); ++ format %{ "vsrl.d $dst, $src, $shift\t# @srl2L" %} ++ ins_encode %{ ++ __ vsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl2L_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (URShiftVL src (RShiftCntV shift))); ++ format %{ "vsrli.d $dst, $src, $shift\t# @srl2L_imm" %} ++ ins_encode %{ ++ __ vsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl32B(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (URShiftVB src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "xvsrl $dst, $src, $shift\t# TEMP($tmp) @srl32B" %} ++ ins_encode %{ ++ __ xvsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl32B_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (URShiftVB src (RShiftCntV shift))); ++ format %{ "xvsrli.b $dst, $src, $shift\t# @srl32B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ xvsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl16S(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (URShiftVS src shift)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "xvsrl $dst, $src, $shift\t# TEMP($tmp) @srl16S" %} ++ ins_encode %{ ++ __ xvsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl16S_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (URShiftVS src (RShiftCntV shift))); ++ format %{ "xvsrli.h $dst, $src, $shift\t# @srl16S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ xvsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl8I(vecY dst, vecY src, vecY shift) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (URShiftVI src shift)); ++ format %{ "xvsrl.w $dst, $src, $shift\t# @srl8I" %} ++ ins_encode %{ ++ __ xvsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl8I_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (URShiftVI src (RShiftCntV shift))); ++ format %{ "xvsrli.w $dst, $src, $shift\t# @srl8I_imm" %} ++ ins_encode %{ ++ __ xvsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl4L(vecY dst, vecY src, vecY shift) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (URShiftVL src shift)); ++ format %{ "xvsrl.d $dst, $src, $shift\t# @srl4L" %} ++ ins_encode %{ ++ __ xvsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl4L_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (URShiftVL src (RShiftCntV shift))); ++ format %{ "xvsrli.d $dst, $src, $shift\t# @srl4L_imm" %} ++ ins_encode %{ ++ __ xvsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ------------------------- ArithmeticRightShift ----------------------------- ++ ++instruct sra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (RShiftVB src shift)); ++ effect(TEMP tmp); ++ format %{ "vsra $dst, $src, $shift\t# TEMP($tmp) @sra16B" %} ++ ins_encode %{ ++ __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); ++ __ vsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra16B_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (RShiftVB src (RShiftCntV shift))); ++ format %{ "vsrai.b $dst, $src, $shift\t# @sra16B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7); ++ } else { ++ __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (RShiftVS src shift)); ++ effect(TEMP tmp); ++ format %{ "vsra $dst, $src, $shift\t# TEMP($tmp) @sra8S" %} ++ ins_encode %{ ++ __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); ++ __ vsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra8S_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (RShiftVS src (RShiftCntV shift))); ++ format %{ "vsrai.h $dst, $src, $shift\t# @sra8S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15); ++ } else { ++ __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra4I(vecX dst, vecX src, vecX shift) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (RShiftVI src shift)); ++ format %{ "vsra.w $dst, $src, $shift\t# @sra4I" %} ++ ins_encode %{ ++ __ vsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra4I_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (RShiftVI src (RShiftCntV shift))); ++ format %{ "vsrai.w $dst, $src, $shift\t# @sra4I_imm" %} ++ ins_encode %{ ++ __ vsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra2L(vecX dst, vecX src, vecX shift) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (RShiftVL src shift)); ++ format %{ "vsra.d $dst, $src, $shift\t# @sra2L" %} ++ ins_encode %{ ++ __ vsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra2L_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (RShiftVL src (RShiftCntV shift))); ++ format %{ "vsrai.d $dst, $src, $shift\t# @sra2L_imm" %} ++ ins_encode %{ ++ __ vsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra32B(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (RShiftVB src shift)); ++ effect(TEMP tmp); ++ format %{ "xvsra $dst, $src, $shift\t# TEMP($tmp) @sra32B" %} ++ ins_encode %{ ++ __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); ++ __ xvsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra32B_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (RShiftVB src (RShiftCntV shift))); ++ format %{ "xvsrai.b $dst, $src, $shift\t# @sra32B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7); ++ } else { ++ __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra16S(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (RShiftVS src shift)); ++ effect(TEMP tmp); ++ format %{ "xvsra $dst, $src, $shift\t# TEMP($tmp) @sra16S" %} ++ ins_encode %{ ++ __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); ++ __ xvsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra16S_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (RShiftVS src (RShiftCntV shift))); ++ format %{ "xvsrai.h $dst, $src, $shift\t# @sra16S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15); ++ } else { ++ __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra8I(vecY dst, vecY src, vecY shift) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (RShiftVI src shift)); ++ format %{ "xvsra.w $dst, $src, $shift\t# @sra8I" %} ++ ins_encode %{ ++ __ xvsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra8I_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (RShiftVI src (RShiftCntV shift))); ++ format %{ "xvsrai.w $dst, $src, $shift\t# @sra8I_imm" %} ++ ins_encode %{ ++ __ xvsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra4L(vecY dst, vecY src, vecY shift) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (RShiftVL src shift)); ++ format %{ "xvsra.d $dst, $src, $shift\t# @sra4L" %} ++ ins_encode %{ ++ __ xvsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra4L_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (RShiftVL src (RShiftCntV shift))); ++ format %{ "xvsrai.d $dst, $src, $shift\t# @sra4L_imm" %} ++ ins_encode %{ ++ __ xvsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ----------------------------- RotateRightV --------------------------------- ++ ++instruct rotr4I(vecX dst, vecX src, vecX shift) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (RotateRightV src shift)); ++ format %{ "vrotr.w $dst, $src, $shift\t# @rotr4I" %} ++ ins_encode %{ ++ __ vrotr_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct rotr4I_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (RotateRightV src shift)); ++ format %{ "vrotri.w $dst, $src, $shift\t# @rotr4I_imm" %} ++ ins_encode %{ ++ __ vrotri_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct rotr2L(vecX dst, vecX src, vecX shift) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (RotateRightV src shift)); ++ format %{ "vrotr.d $dst, $src, $shift\t# @rotr2L" %} ++ ins_encode %{ ++ __ vrotr_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct rotr2L_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (RotateRightV src shift)); ++ format %{ "vrotri.d $dst, $src, $shift\t# @rotr2L_imm" %} ++ ins_encode %{ ++ __ vrotri_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct rotr8I(vecY dst, vecY src, vecY shift) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (RotateRightV src shift)); ++ format %{ "xvrotr.w $dst, $src, $shift\t# @rotr8I" %} ++ ins_encode %{ ++ __ xvrotr_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct rotr8I_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (RotateRightV src shift)); ++ format %{ "xvrotri.w $dst, $src, $shift\t# @rotr8I_imm" %} ++ ins_encode %{ ++ __ xvrotri_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct rotr4L(vecY dst, vecY src, vecY shift) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (RotateRightV src shift)); ++ format %{ "xvrotr.d $dst, $src, $shift\t# @rotr4L" %} ++ ins_encode %{ ++ __ xvrotr_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct rotr4L_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (RotateRightV src shift)); ++ format %{ "xvrotri.d $dst, $src, $shift\t# @rotr4L_imm" %} ++ ins_encode %{ ++ __ xvrotri_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ------------------------------ RotateLeftV --------------------------------- ++ ++instruct rotl4I(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (RotateLeftV src shift)); ++ effect(TEMP tmp); ++ format %{ "vrotl $dst, $src, $shift\t# TEMP($tmp) @rotl4I" %} ++ ins_encode %{ ++ __ vneg_w($tmp$$FloatRegister, $shift$$FloatRegister); ++ __ vrotr_w($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct rotl4I_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (RotateLeftV src shift)); ++ format %{ "vrotli $dst, $src, $shift\t# @rotl4I_imm" %} ++ ins_encode %{ ++ __ vrotri_w($dst$$FloatRegister, $src$$FloatRegister, (-$shift$$constant) & 0x1f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct rotl2L(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (RotateLeftV src shift)); ++ effect(TEMP tmp); ++ format %{ "vrotl $dst, $src, $shift\t# TEMP($tmp) @rotl2L" %} ++ ins_encode %{ ++ __ vneg_d($tmp$$FloatRegister, $shift$$FloatRegister); ++ __ vrotr_d($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct rotl2L_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (RotateLeftV src shift)); ++ format %{ "vrotli $dst, $src, $shift\t# @rotl2L_imm" %} ++ ins_encode %{ ++ __ vrotri_d($dst$$FloatRegister, $src$$FloatRegister, (-$shift$$constant) & 0x3f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct rotl8I(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (RotateLeftV src shift)); ++ effect(TEMP tmp); ++ format %{ "xvrotl $dst, $src, $shift\t# TEMP($tmp) @rotl8I" %} ++ ins_encode %{ ++ __ xvneg_w($tmp$$FloatRegister, $shift$$FloatRegister); ++ __ xvrotr_w($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct rotl8I_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(vector_length(n) == 8); ++ match(Set dst (RotateLeftV src shift)); ++ format %{ "xvrotli $dst, $src, $shift\t# @rotr8I_imm" %} ++ ins_encode %{ ++ __ xvrotri_w($dst$$FloatRegister, $src$$FloatRegister, (-$shift$$constant) & 0x1f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct rotl4L(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (RotateLeftV src shift)); ++ effect(TEMP tmp); ++ format %{ "xvrotl $dst, $src, $shift\t# TEMP($tmp) @rotl4L" %} ++ ins_encode %{ ++ __ xvneg_d($tmp$$FloatRegister, $shift$$FloatRegister); ++ __ xvrotr_d($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct rotl4L_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (RotateLeftV src shift)); ++ format %{ "xvrotli $dst, $src, $shift\t# @rotl4L_imm" %} ++ ins_encode %{ ++ __ xvrotri_d($dst$$FloatRegister, $src$$FloatRegister, (-$shift$$constant) & 0x3f); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- AND -------------------------------------- ++ ++instruct andV16(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length_in_bytes(n) == 16); ++ match(Set dst (AndV src1 src2)); ++ format %{ "vand.v $dst, $src1, $src2\t# @andV16" %} ++ ins_encode %{ ++ __ vand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct and16B_imm(vecX dst, vecX src, immIU8 imm) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (AndV src (ReplicateB imm))); ++ format %{ "vandi.b $dst, $src, $imm\t# @and16B_imm" %} ++ ins_encode %{ ++ __ vandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct andV32(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length_in_bytes(n) == 32); ++ match(Set dst (AndV src1 src2)); ++ format %{ "xvand.v $dst, $src1, $src2\t# @andV32" %} ++ ins_encode %{ ++ __ xvand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct and32B_imm(vecY dst, vecY src, immIU8 imm) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (AndV src (ReplicateB imm))); ++ format %{ "xvandi.b $dst, $src, $imm\t# @and32B_imm" %} ++ ins_encode %{ ++ __ xvandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- OR --------------------------------------- ++ ++instruct orV16(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length_in_bytes(n) == 16); ++ match(Set dst (OrV src1 src2)); ++ format %{ "vor.v $dst, $src1, $src2\t# @orV16" %} ++ ins_encode %{ ++ __ vor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct or16B_imm(vecX dst, vecX src, immIU8 imm) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (OrV src (ReplicateB imm))); ++ format %{ "vori.b $dst, $src, $imm\t# @or16B_imm" %} ++ ins_encode %{ ++ __ vori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct orV32(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length_in_bytes(n) == 32); ++ match(Set dst (OrV src1 src2)); ++ format %{ "xvor.v $dst, $src1, $src2\t# @orV32" %} ++ ins_encode %{ ++ __ xvor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct or32B_imm(vecY dst, vecY src, immIU8 imm) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (OrV src (ReplicateB imm))); ++ format %{ "xvori.b $dst, $src, $imm\t# @or32B_imm" %} ++ ins_encode %{ ++ __ xvori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- XOR -------------------------------------- ++ ++instruct xorV16(vecX dst, vecX src1, vecX src2) %{ ++ predicate(vector_length_in_bytes(n) == 16); ++ match(Set dst (XorV src1 src2)); ++ format %{ "vxor.v $dst, $src1, $src2\t# @xorV16" %} ++ ins_encode %{ ++ __ vxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct xor16B_imm(vecX dst, vecX src, immIU8 imm) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (XorV src (ReplicateB imm))); ++ format %{ "vxori.b $dst, $src, $imm\t# @xor16B_imm" %} ++ ins_encode %{ ++ __ vxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct xorV32(vecY dst, vecY src1, vecY src2) %{ ++ predicate(vector_length_in_bytes(n) == 32); ++ match(Set dst (XorV src1 src2)); ++ format %{ "xvxor.v $dst, $src1, $src2\t# @xorV32" %} ++ ins_encode %{ ++ __ xvxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct xor32B_imm(vecX dst, vecX src, immIU8 imm) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (XorV src (ReplicateB imm))); ++ format %{ "xvxori.b $dst, $src, $imm\t# @xor32B_imm" %} ++ ins_encode %{ ++ __ xvxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- NOR -------------------------------------- ++ ++instruct norV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{ ++ predicate(vector_length_in_bytes(n) == 16); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateB m1))); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateS m1))); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateI m1))); ++ format %{ "vnor.v $dst, $src1, $src2\t# @norV16" %} ++ ins_encode %{ ++ __ vnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct nor16B_imm(vecX dst, vecX src, immIU8 imm, immI_M1 m1) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1))); ++ format %{ "vnori.b $dst, $src, $imm\t# @nor16B_imm" %} ++ ins_encode %{ ++ __ vnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct norV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateB m1))); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateS m1))); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateI m1))); ++ format %{ "xvnor.v $dst, $src1, $src2\t# @norV32" %} ++ ins_encode %{ ++ __ xvnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct nor32B_imm(vecY dst, vecY src, immIU8 imm, immI_M1 m1) %{ ++ predicate(vector_length_in_bytes(n) == 32); ++ match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1))); ++ format %{ "xvnori.b $dst, $src, $imm\t# @nor32B_imm" %} ++ ins_encode %{ ++ __ xvnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- ANDN ------------------------------------- ++ ++instruct andnV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{ ++ predicate(vector_length_in_bytes(n) == 16); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateB m1)))); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateS m1)))); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateI m1)))); ++ format %{ "vandn.v $dst, $src1, $src2\t# @andnV16" %} ++ ins_encode %{ ++ __ vandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct andnV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{ ++ predicate(vector_length_in_bytes(n) == 32); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateB m1)))); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateS m1)))); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateI m1)))); ++ format %{ "xvandn.v $dst, $src1, $src2\t# @andnV32" %} ++ ins_encode %{ ++ __ xvandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- ORN -------------------------------------- ++ ++instruct ornV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{ ++ predicate(vector_length_in_bytes(n) == 16); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateB m1)))); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateS m1)))); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateI m1)))); ++ format %{ "vorn.v $dst, $src1, $src2\t# @ornV16" %} ++ ins_encode %{ ++ __ vorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct ornV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{ ++ predicate(vector_length_in_bytes(n) == 32); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateB m1)))); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateS m1)))); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateI m1)))); ++ format %{ "xvorn.v $dst, $src1, $src2\t# @ornV32" %} ++ ins_encode %{ ++ __ xvorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ----------------------------- Reduction Add -------------------------------- ++ ++instruct reduce_add16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_BYTE); ++ match(Set dst (AddReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add16B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_SHORT); ++ match(Set dst (AddReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add8S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_INT); ++ match(Set dst (AddReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add4I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{ ++ predicate(vector_length(n->in(2)) == 2 && vector_element_basic_type(n->in(2)) == T_LONG); ++ match(Set dst (AddReductionVL src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add2L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add4F(regF dst, regF src, vecX vsrc, vecX tmp) %{ ++ predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_FLOAT); ++ match(Set dst (AddReductionVF src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add4F" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add2D(regD dst, regD src, vecX vsrc, vecX tmp) %{ ++ predicate(vector_length(n->in(2)) == 2 && vector_element_basic_type(n->in(2)) == T_DOUBLE); ++ match(Set dst (AddReductionVD src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add2D" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 32 && vector_element_basic_type(n->in(2)) == T_BYTE); ++ match(Set dst (AddReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add32B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_SHORT); ++ match(Set dst (AddReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add16S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_INT); ++ match(Set dst (AddReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add8I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_LONG); ++ match(Set dst (AddReductionVL src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_add4L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add8F(regF dst, regF src, vecY vsrc, vecY tmp) %{ ++ predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_FLOAT); ++ match(Set dst (AddReductionVF src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add8F" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_add4D(regD dst, regD src, vecY vsrc, vecY tmp) %{ ++ predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_DOUBLE); ++ match(Set dst (AddReductionVD src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_add4D" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// ----------------------------- Reduction Mul -------------------------------- ++ ++instruct reduce_mul16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_BYTE); ++ match(Set dst (MulReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul16B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_SHORT); ++ match(Set dst (MulReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul8S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_INT); ++ match(Set dst (MulReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul4I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{ ++ predicate(vector_length(n->in(2)) == 2 && vector_element_basic_type(n->in(2)) == T_LONG); ++ match(Set dst (MulReductionVL src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul2L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul4F(regF dst, regF src, vecX vsrc, vecX tmp) %{ ++ predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_FLOAT); ++ match(Set dst (MulReductionVF src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul4F" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul2D(regD dst, regD src, vecX vsrc, vecX tmp) %{ ++ predicate(vector_length(n->in(2)) == 2 && vector_element_basic_type(n->in(2)) == T_DOUBLE); ++ match(Set dst (MulReductionVD src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul2D" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 32 && vector_element_basic_type(n->in(2)) == T_BYTE); ++ match(Set dst (MulReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul32B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_SHORT); ++ match(Set dst (MulReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul16S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_INT); ++ match(Set dst (MulReductionVI src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul8I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_LONG); ++ match(Set dst (MulReductionVL src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_mul4L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul8F(regF dst, regF src, vecY vsrc, vecY tmp) %{ ++ predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_FLOAT); ++ match(Set dst (MulReductionVF src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul8F" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_FLOAT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_mul4D(regD dst, regD src, vecY vsrc, vecY tmp) %{ ++ predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_DOUBLE); ++ match(Set dst (MulReductionVD src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_mul4D" %} ++ ins_encode %{ ++ __ reduce($dst$$FloatRegister, $src$$FloatRegister, $vsrc$$FloatRegister, $tmp$$FloatRegister, T_DOUBLE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// ----------------------------- Reduction Max -------------------------------- ++ ++instruct reduce_max16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_BYTE); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max16B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_SHORT); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max8S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_INT); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max4I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{ ++ predicate(vector_length(n->in(2)) == 2 && vector_element_basic_type(n->in(2)) == T_LONG); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_max2L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 32 && vector_element_basic_type(n->in(2)) == T_BYTE); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max32B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_SHORT); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max16S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_INT); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max8I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_max4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_LONG); ++ match(Set dst (MaxReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_max4L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// ----------------------------- Reduction Min -------------------------------- ++ ++instruct reduce_min16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_BYTE); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min16B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_SHORT); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min8S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_INT); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min4I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{ ++ predicate(vector_length(n->in(2)) == 2 && vector_element_basic_type(n->in(2)) == T_LONG); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_min2L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 32 && vector_element_basic_type(n->in(2)) == T_BYTE); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min32B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_SHORT); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min16S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_INT); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min8I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_min4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_LONG); ++ match(Set dst (MinReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_min4L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// ----------------------------- Reduction And -------------------------------- ++ ++instruct reduce_and16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_BYTE); ++ match(Set dst (AndReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_and16B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_and8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_SHORT); ++ match(Set dst (AndReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_and8S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_and4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_INT); ++ match(Set dst (AndReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_and4I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_and2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{ ++ predicate(vector_length(n->in(2)) == 2 && vector_element_basic_type(n->in(2)) == T_LONG); ++ match(Set dst (AndReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_and2L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_and32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 32 && vector_element_basic_type(n->in(2)) == T_BYTE); ++ match(Set dst (AndReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_and32B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_and16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_SHORT); ++ match(Set dst (AndReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_and16S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_and8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_INT); ++ match(Set dst (AndReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_and8I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_and4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_LONG); ++ match(Set dst (AndReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_and4L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// ----------------------------- Reduction Or --------------------------------- ++ ++instruct reduce_or16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_BYTE); ++ match(Set dst (OrReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_or16B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_or8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_SHORT); ++ match(Set dst (OrReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_or8S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_or4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_INT); ++ match(Set dst (OrReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_or4I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_or2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{ ++ predicate(vector_length(n->in(2)) == 2 && vector_element_basic_type(n->in(2)) == T_LONG); ++ match(Set dst (OrReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_or2L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_or32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 32 && vector_element_basic_type(n->in(2)) == T_BYTE); ++ match(Set dst (OrReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_or32B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_or16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_SHORT); ++ match(Set dst (OrReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_or16S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_or8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_INT); ++ match(Set dst (OrReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_or8I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_or4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_LONG); ++ match(Set dst (OrReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_or4L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// ----------------------------- Reduction Xor -------------------------------- ++ ++instruct reduce_xor16B(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_BYTE); ++ match(Set dst (XorReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_xor16B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_xor8S(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_SHORT); ++ match(Set dst (XorReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_xor8S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_xor4I(mRegI dst, mRegI src, vecX vsrc, vecX tmp1, vecX tmp2) %{ ++ predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_INT); ++ match(Set dst (XorReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_xor4I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_xor2L(mRegL dst, mRegL src, vecX vsrc, vecX tmp) %{ ++ predicate(vector_length(n->in(2)) == 2 && vector_element_basic_type(n->in(2)) == T_LONG); ++ match(Set dst (XorReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp) @reduce_xor2L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp$$FloatRegister, FNOREG, T_LONG, this->ideal_Opcode(), 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_xor32B(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 32 && vector_element_basic_type(n->in(2)) == T_BYTE); ++ match(Set dst (XorReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_xor32B" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_BYTE, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_xor16S(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 16 && vector_element_basic_type(n->in(2)) == T_SHORT); ++ match(Set dst (XorReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_xor16S" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_SHORT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_xor8I(mRegI dst, mRegI src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 8 && vector_element_basic_type(n->in(2)) == T_INT); ++ match(Set dst (XorReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_xor8I" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_INT, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_xor4L(mRegL dst, mRegL src, vecY vsrc, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n->in(2)) == 4 && vector_element_basic_type(n->in(2)) == T_LONG); ++ match(Set dst (XorReductionV src vsrc)); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce $dst, $src, $vsrc\t# TEMP($tmp1, $tmp2) @reduce_xor4L" %} ++ ins_encode %{ ++ __ reduce($dst$$Register, $src$$Register, $vsrc$$FloatRegister, $tmp1$$FloatRegister, $tmp2$$FloatRegister, T_LONG, this->ideal_Opcode(), 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// ------------------------------ RoundDoubleModeV ---------------------------- ++ ++instruct round2D(vecX dst, vecX src, immI rmode) %{ ++ predicate(vector_length(n) == 2); ++ match(Set dst (RoundDoubleModeV src rmode)); ++ format %{ "vfrint $dst, $src, $rmode\t# @round2D" %} ++ ins_encode %{ ++ switch ($rmode$$constant) { ++ case RoundDoubleModeNode::rmode_rint: __ vfrintrne_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ case RoundDoubleModeNode::rmode_floor: __ vfrintrm_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ case RoundDoubleModeNode::rmode_ceil: __ vfrintrp_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct round4D(vecY dst, vecY src, immI rmode) %{ ++ predicate(vector_length(n) == 4); ++ match(Set dst (RoundDoubleModeV src rmode)); ++ format %{ "xvfrint $dst, $src, $rmode\t# @round4D" %} ++ ins_encode %{ ++ switch ($rmode$$constant) { ++ case RoundDoubleModeNode::rmode_rint: __ xvfrintrne_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ case RoundDoubleModeNode::rmode_floor: __ xvfrintrm_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ case RoundDoubleModeNode::rmode_ceil: __ xvfrintrp_d($dst$$FloatRegister, $src$$FloatRegister); break; ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ---------------------------- Vector Cast B2X ------------------------------- ++ ++instruct cvt16Bto16S(vecY dst, vecX src) %{ ++ predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_SHORT); ++ match(Set dst (VectorCastB2X src)); ++ format %{ "vext2xv.h.b $dst, $src\t# @cvt16Bto16S" %} ++ ins_encode %{ ++ __ vext2xv_h_b($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------- Vector Cast S2X -------------------------------- ++ ++instruct cvt16Sto16B(vecX dst, vecY src) %{ ++ predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_BYTE); ++ match(Set dst (VectorCastS2X src)); ++ effect(TEMP_DEF dst); ++ format %{ "vconvert $dst, $src\t# @cvt16Sto16B" %} ++ ins_encode %{ ++ __ xvpermi_q($dst$$FloatRegister, $src$$FloatRegister, 0x01); ++ __ vsrlni_b_h($dst$$FloatRegister, $src$$FloatRegister, 0); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cvt8Sto8I(vecY dst, vecX src) %{ ++ predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_INT); ++ match(Set dst (VectorCastS2X src)); ++ format %{ "vext2xv.w.h $dst, $src\t# @cvt8Sto8I" %} ++ ins_encode %{ ++ __ vext2xv_w_h($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cvt8Sto8F(vecY dst, vecX src) %{ ++ predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_FLOAT); ++ match(Set dst (VectorCastS2X src)); ++ format %{ "vconvert $dst, $src\t# @cvt8Sto8F" %} ++ ins_encode %{ ++ __ vext2xv_w_h($dst$$FloatRegister, $src$$FloatRegister); ++ __ xvffint_s_w($dst$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------- Vector Cast I2X -------------------------------- ++ ++instruct cvt8Ito8S(vecX dst, vecY src) %{ ++ predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_SHORT); ++ match(Set dst (VectorCastI2X src)); ++ effect(TEMP_DEF dst); ++ format %{ "vconvert $dst, $src\t# @cvt8Ito8S" %} ++ ins_encode %{ ++ __ xvpermi_q($dst$$FloatRegister, $src$$FloatRegister, 0x01); ++ __ vsrlni_h_w($dst$$FloatRegister, $src$$FloatRegister, 0); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cvt4Ito4F(vecX dst, vecX src) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_FLOAT); ++ match(Set dst (VectorCastI2X src)); ++ format %{ "vffint.s.w $dst, $src\t# @cvt4Ito4F" %} ++ ins_encode %{ ++ __ vffint_s_w($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cvt4Ito4L(vecY dst, vecX src) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_LONG); ++ match(Set dst (VectorCastI2X src)); ++ format %{ "vext2xv.d.w $dst, $src\t# @cvt4Ito4L" %} ++ ins_encode %{ ++ __ vext2xv_d_w($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cvt8Ito8F(vecY dst, vecY src) %{ ++ predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_FLOAT); ++ match(Set dst (VectorCastI2X src)); ++ format %{ "xvffint.s.w $dst, $src\t# @cvt8Ito8F" %} ++ ins_encode %{ ++ __ xvffint_s_w($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cvt4Ito4D(vecY dst, vecX src) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_DOUBLE); ++ match(Set dst (VectorCastI2X src)); ++ format %{ "vconvert $dst, $src\t# @cvt4Ito4D" %} ++ ins_encode %{ ++ __ vext2xv_d_w($dst$$FloatRegister, $src$$FloatRegister); ++ __ xvffint_d_l($dst$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ----------------------------- Vector Cast L2X ------------------------------ ++ ++instruct cvt4Lto4I(vecX dst, vecY src) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_INT); ++ match(Set dst (VectorCastL2X src)); ++ effect(TEMP_DEF dst); ++ format %{ "vconvert $dst, $src\t# @cvt4Lto4I" %} ++ ins_encode %{ ++ __ xvpermi_q($dst$$FloatRegister, $src$$FloatRegister, 0x01); ++ __ vsrlni_w_d($dst$$FloatRegister, $src$$FloatRegister, 0); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cvt4Lto4F(vecX dst, vecY src, vecY tmp) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_FLOAT); ++ match(Set dst (VectorCastL2X src)); ++ effect(TEMP tmp); ++ format %{ "vconvert $dst, $src\t# TEMP($tmp) @cvt4Lto4F" %} ++ ins_encode %{ ++ __ xvpermi_q($tmp$$FloatRegister, $src$$FloatRegister, 0x01); ++ __ xvffint_s_l($dst$$FloatRegister, $tmp$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cvt2Lto2D(vecX dst, vecX src) %{ ++ predicate(vector_length(n) == 2 && vector_element_basic_type(n) == T_DOUBLE); ++ match(Set dst (VectorCastL2X src)); ++ format %{ "vffint.d.l $dst, $src\t# @cvt2Lto2D" %} ++ ins_encode %{ ++ __ vffint_d_l($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cvt4Lto4D(vecY dst, vecY src) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_DOUBLE); ++ match(Set dst (VectorCastL2X src)); ++ format %{ "xvffint.d.l $dst, $src\t# @cvt4Lto4D" %} ++ ins_encode %{ ++ __ xvffint_d_l($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ----------------------------- Vector Cast F2X ------------------------------ ++ ++instruct cvt8Fto8S(vecX dst, vecY src, vecY tmp) %{ ++ predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_SHORT); ++ match(Set dst (VectorCastF2X src)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "vconvert $dst, $src\t# TEMP($tmp) @cvt8Fto8S" %} ++ ins_encode %{ ++ __ vftintrz_w_s($tmp$$FloatRegister, $src$$FloatRegister); ++ __ xvpermi_q($dst$$FloatRegister, $tmp$$FloatRegister, 0x01); ++ __ vsrlni_h_w($dst$$FloatRegister, $tmp$$FloatRegister, 0); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cvt4Fto4I(vecX dst, vecX src) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_INT); ++ match(Set dst (VectorCastF2X src)); ++ format %{ "vftint.w.s $dst, $src\t# @cvt2Fto2I" %} ++ ins_encode %{ ++ __ vftintrz_w_s($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cvt8Fto8I(vecY dst, vecY src) %{ ++ predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_INT); ++ match(Set dst (VectorCastF2X src)); ++ format %{ "xvftint.w.s $dst, $src\t# @cvt4Fto4I" %} ++ ins_encode %{ ++ __ xvftintrz_w_s($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cvt4Fto4L(vecY dst, vecX src) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_LONG); ++ match(Set dst (VectorCastF2X src)); ++ effect(TEMP_DEF dst); ++ format %{ "vconvert $dst, $src\t# @cvt4Fto4L" %} ++ ins_encode %{ ++ __ xvpermi_d($dst$$FloatRegister, $src$$FloatRegister, 0b01010000); ++ __ xvftintrzl_l_s($dst$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cvt4Fto4D(vecY dst, vecX src) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_DOUBLE); ++ match(Set dst (VectorCastF2X src)); ++ format %{ "vconvert $dst, $src\t# @cvt4Fto4D" %} ++ ins_encode %{ ++ __ xvpermi_d($dst$$FloatRegister, $src$$FloatRegister, 0b01010000); ++ __ xvfcvtl_d_s($dst$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ---------------------------- Vector Cast D2X ------------------------------- ++ ++instruct cvt4Dto4I(vecX dst, vecY src, vecY tmp) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_INT); ++ match(Set dst (VectorCastD2X src)); ++ effect(TEMP tmp); ++ format %{ "vconvert $dst, $src\t# TEMP($tmp) @cvt4Dto4I" %} ++ ins_encode %{ ++ __ xvftintrz_w_d($tmp$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister); ++ __ xvpermi_d($dst$$FloatRegister, $tmp$$FloatRegister, 0b11011000); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cvt4Dto4F(vecX dst, vecY src, vecY tmp) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_FLOAT); ++ match(Set dst (VectorCastD2X src)); ++ effect(TEMP tmp); ++ format %{ "vconvert $dst, $src\t# TEMP($tmp) @cvt4Dto4F" %} ++ ins_encode %{ ++ __ xvfcvt_s_d($tmp$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister); ++ __ xvpermi_d($dst$$FloatRegister, $tmp$$FloatRegister, 0b11011000); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cvt2Dto2L(vecX dst, vecX src) %{ ++ predicate(vector_length(n) == 2 && vector_element_basic_type(n) == T_LONG); ++ match(Set dst (VectorCastD2X src)); ++ format %{ "vftint.l.d $dst, $src\t# @cvt2Dto2L" %} ++ ins_encode %{ ++ __ vftintrz_l_d($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cvt4Dto4L(vecY dst, vecY src) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_LONG); ++ match(Set dst (VectorCastD2X src)); ++ format %{ "xvftint.l.d $dst, $src\t# @cvt4Dto4L" %} ++ ins_encode %{ ++ __ xvftintrz_l_d($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ------------------------------ VectorReinterpret --------------------------- ++ ++instruct reinterpretX(vecX dst) ++%{ ++ predicate(vector_length_in_bytes(n) == 16 && vector_length_in_bytes(n->in(1)) == 16); ++ match(Set dst (VectorReinterpret dst)); ++ format %{ "vreinterpret $dst\t# @reinterpretX" %} ++ ins_encode %{ ++ // empty ++ %} ++ ins_pipe(empty); ++%} ++ ++instruct reinterpretY(vecY dst) ++%{ ++ predicate(vector_length_in_bytes(n) == 32 && vector_length_in_bytes(n->in(1)) == 32); ++ match(Set dst (VectorReinterpret dst)); ++ format %{ "xvreinterpret $dst\t# @reinterpretY" %} ++ ins_encode %{ ++ // empty ++ %} ++ ins_pipe(empty); ++%} ++ ++instruct reinterpretX2Y(vecY dst, vecX src) ++%{ ++ predicate(vector_length_in_bytes(n) == 32 && vector_length_in_bytes(n->in(1)) == 16); ++ match(Set dst (VectorReinterpret src)); ++ format %{ "vreinterpret $dst, $src\t# @reinterpretX2Y" %} ++ ins_encode %{ ++ // The higher 128-bits of the "dst" register must be cleared to zero. ++ if ($dst$$FloatRegister == $src$$FloatRegister) { ++ __ xvinsgr2vr_d($dst$$FloatRegister, R0, 2); ++ __ xvinsgr2vr_d($dst$$FloatRegister, R0, 3); ++ } else { ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ __ xvpermi_q($dst$$FloatRegister, $src$$FloatRegister, 0b00110000); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reinterpretY2X(vecX dst, vecY src) ++%{ ++ predicate(vector_length_in_bytes(n) == 16 && vector_length_in_bytes(n->in(1)) == 32); ++ match(Set dst (VectorReinterpret src)); ++ format %{ "vreinterpret $dst, $src\t# @reinterpretY2X" %} ++ ins_encode %{ ++ if ($dst$$FloatRegister == $src$$FloatRegister) { ++ // empty ++ } else { ++ __ vori_b($dst$$FloatRegister, $src$$FloatRegister, 0); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// ------------------------------ VectorInsert -------------------------------- ++ ++instruct insert16B(vecX dst, mRegI val, immIU4 idx) %{ ++ predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_BYTE); ++ match(Set dst (VectorInsert (Binary dst val) idx)); ++ format %{ "vinsgr2vr.b $dst, $val, $idx\t# @insert16B" %} ++ ins_encode %{ ++ __ vinsgr2vr_b($dst$$FloatRegister, $val$$Register, $idx$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct insert8S(vecX dst, mRegI val, immIU3 idx) %{ ++ predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_SHORT); ++ match(Set dst (VectorInsert (Binary dst val) idx)); ++ format %{ "vinsgr2vr.h $dst, $val, $idx\t# @insert8S" %} ++ ins_encode %{ ++ __ vinsgr2vr_h($dst$$FloatRegister, $val$$Register, $idx$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct insert4I(vecX dst, mRegI val, immIU2 idx) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_INT); ++ match(Set dst (VectorInsert (Binary dst val) idx)); ++ format %{ "vinsgr2vr.w $dst, $val, $idx\t# @insert4I" %} ++ ins_encode %{ ++ __ vinsgr2vr_w($dst$$FloatRegister, $val$$Register, $idx$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct insert2L(vecX dst, mRegL val, immIU1 idx) %{ ++ predicate(vector_length(n) == 2 && vector_element_basic_type(n) == T_LONG); ++ match(Set dst (VectorInsert (Binary dst val) idx)); ++ format %{ "vinsgr2vr.d $dst, $val, $idx\t# @insert2L" %} ++ ins_encode %{ ++ __ vinsgr2vr_d($dst$$FloatRegister, $val$$Register, $idx$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct insert4F(vecX dst, regF val, immIU2 idx, mRegI tmp) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_FLOAT); ++ match(Set dst (VectorInsert (Binary dst val) idx)); ++ effect(TEMP tmp); ++ format %{ "vinsert $dst, $val, $idx\t# TEMP($tmp) @insert4F" %} ++ ins_encode %{ ++ __ vpickve2gr_w($tmp$$Register, $val$$FloatRegister, 0); ++ __ vinsgr2vr_w($dst$$FloatRegister, $tmp$$Register, $idx$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct insert2D(vecX dst, regD val, immIU1 idx, mRegI tmp) %{ ++ predicate(vector_length(n) == 2 && vector_element_basic_type(n) == T_DOUBLE); ++ match(Set dst (VectorInsert (Binary dst val) idx)); ++ effect(TEMP tmp); ++ format %{ "vinsert $dst, $val, $idx\t# TEMP($tmp) @insert2D" %} ++ ins_encode %{ ++ __ vpickve2gr_d($tmp$$Register, $val$$FloatRegister, 0); ++ __ vinsgr2vr_d($dst$$FloatRegister, $tmp$$Register, $idx$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct insert32B(vecY dst, mRegI val, immIU5 idx) %{ ++ predicate(vector_length(n) == 32 && vector_element_basic_type(n) == T_BYTE); ++ match(Set dst (VectorInsert (Binary dst val) idx)); ++ format %{ "xvinsert $dst, $val, $idx\t# @insert32B" %} ++ ins_encode %{ ++ int idx = $idx$$constant; ++ int msbw, lsbw; ++ switch (idx % 4) { ++ case 0: msbw = 7, lsbw = 0; break; ++ case 1: msbw = 15, lsbw = 8; break; ++ case 2: msbw = 23, lsbw = 16; break; ++ case 3: msbw = 31, lsbw = 24; break; ++ default: ++ ShouldNotReachHere(); ++ } ++ __ xvpickve2gr_w(SCR1, $dst$$FloatRegister, idx >> 2); ++ __ bstrins_w(SCR1, $val$$Register, msbw, lsbw); ++ __ xvinsgr2vr_w($dst$$FloatRegister, SCR1, idx >> 2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct insert16S(vecY dst, mRegI val, immIU4 idx) %{ ++ predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_SHORT); ++ match(Set dst (VectorInsert (Binary dst val) idx)); ++ format %{ "xvinsert $dst, $val, $idx\t# @insert16S" %} ++ ins_encode %{ ++ int idx = $idx$$constant; ++ int msbw = (idx % 2) ? 31 : 15; ++ int lsbw = (idx % 2) ? 16 : 0; ++ __ xvpickve2gr_w(SCR1, $dst$$FloatRegister, idx >> 1); ++ __ bstrins_w(SCR1, $val$$Register, msbw, lsbw); ++ __ xvinsgr2vr_w($dst$$FloatRegister, SCR1, idx >> 1); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct insert8I(vecY dst, mRegI val, immIU3 idx) %{ ++ predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_INT); ++ match(Set dst (VectorInsert (Binary dst val) idx)); ++ format %{ "vinsgr2vr.w $dst, $val, $idx\t# @insert8I" %} ++ ins_encode %{ ++ __ xvinsgr2vr_w($dst$$FloatRegister, $val$$Register, $idx$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct insert4L(vecY dst, mRegL val, immIU2 idx) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_LONG); ++ match(Set dst (VectorInsert (Binary dst val) idx)); ++ format %{ "vinsgr2vr.d $dst, $val, $idx\t# @insert4L" %} ++ ins_encode %{ ++ __ xvinsgr2vr_d($dst$$FloatRegister, $val$$Register, $idx$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct insert8F(vecY dst, regF val, immIU3 idx) %{ ++ predicate(vector_length(n) == 8 && vector_element_basic_type(n) == T_FLOAT); ++ match(Set dst (VectorInsert (Binary dst val) idx)); ++ format %{ "xvinsve0.w $dst, $val, $idx\t# @insert8F" %} ++ ins_encode %{ ++ __ xvinsve0_w($dst$$FloatRegister, $val$$FloatRegister, $idx$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct insert4D(vecY dst, regD val, immIU2 idx) %{ ++ predicate(vector_length(n) == 4 && vector_element_basic_type(n) == T_DOUBLE); ++ match(Set dst (VectorInsert (Binary dst val) idx)); ++ format %{ "xvinsve0.d $dst, $val, $idx\t# @insert4D" %} ++ ins_encode %{ ++ __ xvinsve0_d($dst$$FloatRegister, $val$$FloatRegister, $idx$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -------------------------------- Vector Blend ------------------------------ ++ ++instruct blendV16(vecX dst, vecX src1, vecX src2, vecX mask) ++%{ ++ predicate(vector_length_in_bytes(n) == 16); ++ match(Set dst (VectorBlend (Binary src1 src2) mask)); ++ format %{ "vbitsel.v $dst, $src1, $src2, $mask\t# @blendV16" %} ++ ins_encode %{ ++ __ vbitsel_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $mask$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct blendV32(vecY dst, vecY src1, vecY src2, vecY mask) ++%{ ++ predicate(vector_length_in_bytes(n) == 32); ++ match(Set dst (VectorBlend (Binary src1 src2) mask)); ++ format %{ "xvbitsel.v $dst, $src1, $src2, $mask\t# @blendV32" %} ++ ins_encode %{ ++ __ xvbitsel_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $mask$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// -------------------------------- LoadMask ---------------------------------- ++ ++instruct loadmask16B(vecX dst, vecX src) %{ ++ predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_BYTE); ++ match(Set dst (VectorLoadMask src)); ++ format %{ "vneg.b $dst, $src\t# @loadmask16B" %} ++ ins_encode %{ ++ __ vneg_b($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct loadmask32B(vecY dst, vecY src) %{ ++ predicate(vector_length(n) == 32 && vector_element_basic_type(n) == T_BYTE); ++ match(Set dst (VectorLoadMask src)); ++ format %{ "xvneg.b $dst, $src\t# @loadmask32B" %} ++ ins_encode %{ ++ __ xvneg_b($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct loadmask16S(vecY dst, vecX src) %{ ++ predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_SHORT); ++ match(Set dst (VectorLoadMask src)); ++ format %{ "vloadmask $dst, $src\t# @loadmask16S" %} ++ ins_encode %{ ++ __ vext2xv_h_b($dst$$FloatRegister, $src$$FloatRegister); ++ __ xvneg_h($dst$$FloatRegister, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++//-------------------------------- StoreMask ---------------------------------- ++ ++instruct storemask16B(vecX dst, vecX src, immI_1 size) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (VectorStoreMask src size)); ++ format %{ "vneg.b $dst, $src\t# @storemask16B" %} ++ ins_encode %{ ++ __ vneg_b($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct storemask32B(vecY dst, vecY src, immI_1 size) %{ ++ predicate(vector_length(n) == 32); ++ match(Set dst (VectorStoreMask src size)); ++ format %{ "xvneg.b $dst, $src\t# @storemask32B" %} ++ ins_encode %{ ++ __ xvneg_b($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct storemask16S(vecX dst, vecY src, immI_2 size, vecX tmp) %{ ++ predicate(vector_length(n) == 16); ++ match(Set dst (VectorStoreMask src size)); ++ effect(TEMP tmp); ++ format %{ "vstoremask $dst, $src\t# TEMP($tmp) @storemask16S" %} ++ ins_encode %{ ++ __ xvpermi_d($tmp$$FloatRegister, $src$$FloatRegister, 0b00001110); ++ __ vsrlni_b_h($tmp$$FloatRegister, $src$$FloatRegister, 0); ++ __ vneg_b($dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ----------------------------- VectorTest ----------------------------------- ++ ++instruct anytrue_in_maskV16(mRegI dst, vecX src1, vecX src2, vecX tmp) ++%{ ++ predicate(static_cast(n)->get_predicate() == BoolTest::ne); ++ match(Set dst (VectorTest src1 src2)); ++ effect(TEMP tmp); ++ format %{ "vtest $dst, $src1, $src2(not used)\t# TEMP($tmp) @anytrue_in_maskV16" %} ++ ins_encode %{ ++ // No need to use src2, src2 is all ones. ++ __ vpermi_w($tmp$$FloatRegister, $src1$$FloatRegister, 0b00001110); ++ __ vor_v($tmp$$FloatRegister, $src1$$FloatRegister, $tmp$$FloatRegister); ++ __ vpickve2gr_d($dst$$Register, $tmp$$FloatRegister, 0); ++ __ sltu($dst$$Register, R0, $dst$$Register); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct alltrue_in_maskV16(mRegI dst, vecX src1, vecX src2, vecX tmp) ++%{ ++ predicate(static_cast(n)->get_predicate() == BoolTest::overflow); ++ match(Set dst (VectorTest src1 src2)); ++ effect(TEMP tmp); ++ format %{ "vtest $dst, $src1, $src2(not used)\t# TEMP($tmp) @alltrue_in_maskV16" %} ++ ins_encode %{ ++ // No need to use src2, src2 is all ones. ++ __ vpermi_w($tmp$$FloatRegister, $src1$$FloatRegister, 0b00001110); ++ __ vand_v($tmp$$FloatRegister, $src1$$FloatRegister, $tmp$$FloatRegister); ++ __ vpickve2gr_d($dst$$Register, $tmp$$FloatRegister, 0); ++ __ sltui($dst$$Register, $dst$$Register, -1); ++ __ xori($dst$$Register, $dst$$Register, 1); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct anytrue_in_maskV32(mRegI dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) ++%{ ++ predicate(static_cast(n)->get_predicate() == BoolTest::ne); ++ match(Set dst (VectorTest src1 src2)); ++ effect(TEMP tmp1, TEMP tmp2); ++ format %{ "xvtest $dst, $src1, $src2(not used)\t# TEMP($tmp1, $tmp2) @anytrue_in_maskV32" %} ++ ins_encode %{ ++ // No need to use src2, src2 is all ones. ++ __ xvpermi_d($tmp1$$FloatRegister, $src1$$FloatRegister, 0b00001110); ++ __ vor_v($tmp1$$FloatRegister, $src1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vpermi_w($tmp2$$FloatRegister, $tmp1$$FloatRegister, 0b00001110); ++ __ vor_v($tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp1$$FloatRegister); ++ __ vpickve2gr_d($dst$$Register, $tmp1$$FloatRegister, 0); ++ __ sltu($dst$$Register, R0, $dst$$Register); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct alltrue_in_maskV32(mRegI dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2) ++%{ ++ predicate(static_cast(n)->get_predicate() == BoolTest::overflow); ++ match(Set dst (VectorTest src1 src2)); ++ effect(TEMP tmp1, TEMP tmp2); ++ format %{ "xvtest $dst, $src1, $src2(not used)\t# TEMP($tmp1, $tmp2) @alltrue_in_maskV32" %} ++ ins_encode %{ ++ // No need to use src2, src2 is all ones. ++ __ xvpermi_d($tmp1$$FloatRegister, $src1$$FloatRegister, 0b00001110); ++ __ vand_v($tmp1$$FloatRegister, $src1$$FloatRegister, $tmp1$$FloatRegister); ++ __ vpermi_w($tmp2$$FloatRegister, $tmp1$$FloatRegister, 0b00001110); ++ __ vand_v($tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp1$$FloatRegister); ++ __ vpickve2gr_d($dst$$Register, $tmp1$$FloatRegister, 0); ++ __ sltui($dst$$Register, $dst$$Register, -1); ++ __ xori($dst$$Register, $dst$$Register, 1); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// ----------------------------- Vector comparison ---------------------------- ++ ++instruct cmpV16(vecX dst, vecX src1, vecX src2, immI cond) ++%{ ++ predicate(vector_length_in_bytes(n) == 16); ++ match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); ++ format %{ "vcompare $dst, $src1, $src2, $cond\t# @cmpV16" %} ++ ins_encode %{ ++ BasicType bt = vector_element_basic_type(this); ++ __ vector_compare($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, bt, $cond$$constant, 16); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmpV32(vecY dst, vecY src1, vecY src2, immI cond) ++%{ ++ predicate(vector_length_in_bytes(n) == 32); ++ match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); ++ format %{ "xvcompare $dst, $src1, $src2, $cond\t# @cmpV32" %} ++ ins_encode %{ ++ BasicType bt = vector_element_basic_type(this); ++ __ vector_compare($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, bt, $cond$$constant, 32); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// ---------------------------- LOAD_IOTA_INDICES ----------------------------- ++ ++instruct loadcon16B(vecX dst, immI_0 src) %{ ++ predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_BYTE); ++ match(Set dst (VectorLoadConst src)); ++ format %{ "vld_con $dst, CONSTANT_MEMORY\t# @loadcon16B" %} ++ ins_encode %{ ++ __ li(AT, (long)StubRoutines::la::vector_iota_indices()); ++ __ vld($dst$$FloatRegister, AT, (int)0); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct loadcon32B(vecY dst, immI_0 src) %{ ++ predicate(vector_length(n) == 32 && vector_element_basic_type(n) == T_BYTE); ++ match(Set dst (VectorLoadConst src)); ++ format %{ "xvld_con $dst, CONSTANT_MEMORY\t# @loadcon32B" %} ++ ins_encode %{ ++ __ li(AT, (long)StubRoutines::la::vector_iota_indices()); ++ __ xvld($dst$$FloatRegister, AT, (int)0); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ---------------------------- LOAD_SHUFFLE ---------------------------------- ++ ++instruct loadShuffle16B(vecX dst) %{ ++ predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_BYTE); ++ match(Set dst (VectorLoadShuffle dst)); ++ format %{ "vld_shuffle $dst\t# @loadShuffle16B" %} ++ ins_encode %{ ++ // empty ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct loadShuffle32B(vecY dst) %{ ++ predicate(vector_length(n) == 32 && vector_element_basic_type(n) == T_BYTE); ++ match(Set dst (VectorLoadShuffle dst)); ++ format %{ "xvld_shuffle $dst\t# @loadShuffle32B" %} ++ ins_encode %{ ++ // empty ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct loadShuffle16S(vecY dst, vecX src) %{ ++ predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_SHORT); ++ match(Set dst (VectorLoadShuffle src)); ++ format %{ "vext2xv.hu.bu $dst, $src\t# @loadShuffle16S" %} ++ ins_encode %{ ++ __ vext2xv_hu_bu($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ---------------------------- Rearrange ------------------------------------- ++ ++instruct rearrange16B(vecX dst, vecX src, vecX shuffle) %{ ++ predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_BYTE); ++ match(Set dst (VectorRearrange src shuffle)); ++ format %{ "vshuf.b $dst, $src, $shuffle\t# @rearrange16B" %} ++ ins_encode %{ ++ __ vshuf_b($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister, $shuffle$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct rearrange32B(vecY dst, vecY src, vecY shuffle, vecY tmp) %{ ++ predicate(vector_length(n) == 32 && vector_element_basic_type(n) == T_BYTE); ++ match(Set dst (VectorRearrange src shuffle)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ format %{ "xvrearrange $dst, $src, $shuffle\t# TEMP($tmp) @rearrange32B" %} ++ ins_encode %{ ++ __ xvpermi_q($tmp$$FloatRegister, $src$$FloatRegister, 0x00); ++ __ xvpermi_q($dst$$FloatRegister, $src$$FloatRegister, 0x11); ++ __ xvshuf_b($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister, $shuffle$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct rearrange16S(vecY dst, vecY src, vecY tmp1, vecY tmp2) %{ ++ predicate(vector_length(n) == 16 && vector_element_basic_type(n) == T_SHORT); ++ match(Set dst (VectorRearrange src dst)); ++ effect(TEMP tmp1, TEMP tmp2); ++ format %{ "xvrearrange $dst, $src, $dst\t# TEMP($tmp1, $tmp2) @rearrange16S" %} ++ ins_encode %{ ++ __ xvpermi_q($tmp1$$FloatRegister, $src$$FloatRegister, 0x00); ++ __ xvpermi_q($tmp2$$FloatRegister, $src$$FloatRegister, 0x11); ++ __ xvshuf_h($dst$$FloatRegister, $tmp2$$FloatRegister, $tmp1$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ---------------------------- PopCount -------------------------------------- ++ ++instruct popcount4I(vecX dst, vecX src) %{ ++ predicate(UsePopCountInstruction && n->as_Vector()->length() == 4); ++ match(Set dst (PopCountVI src)); ++ format %{ "vpcnt.w $dst, $src\t# @popcount4I" %} ++ ins_encode %{ ++ __ vpcnt_w($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct popcount8I(vecY dst, vecY src) %{ ++ predicate(UsePopCountInstruction && n->as_Vector()->length() == 8); ++ match(Set dst (PopCountVI src)); ++ format %{ "xvpcnt.w $dst, $src\t# @popcount8I" %} ++ ins_encode %{ ++ __ xvpcnt_w($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++//----------PEEPHOLE RULES----------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++// ++// peepmatch ( root_instr_name [preceeding_instruction]* ); ++// ++// peepconstraint %{ ++// (instruction_number.operand_name relational_op instruction_number.operand_name ++// [, ...] ); ++// // instruction numbers are zero-based using left to right order in peepmatch ++// ++// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); ++// // provide an instruction_number.operand_name for each operand that appears ++// // in the replacement instruction's match rule ++// ++// ---------VM FLAGS--------------------------------------------------------- ++// ++// All peephole optimizations can be turned off using -XX:-OptoPeephole ++// ++// Each peephole rule is given an identifying number starting with zero and ++// increasing by one in the order seen by the parser. An individual peephole ++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# ++// on the command-line. ++// ++// ---------CURRENT LIMITATIONS---------------------------------------------- ++// ++// Only match adjacent instructions in same basic block ++// Only equality constraints ++// Only constraints between operands, not (0.dest_reg == EAX_enc) ++// Only one replacement instruction ++// ++// ---------EXAMPLE---------------------------------------------------------- ++// ++// // pertinent parts of existing instructions in architecture description ++// instruct movI(eRegI dst, eRegI src) %{ ++// match(Set dst (CopyI src)); ++// %} ++// ++// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{ ++// match(Set dst (AddI dst src)); ++// effect(KILL cr); ++// %} ++// ++// // Change (inc mov) to lea ++// peephole %{ ++// // increment preceeded by register-register move ++// peepmatch ( incI_eReg movI ); ++// // require that the destination register of the increment ++// // match the destination register of the move ++// peepconstraint ( 0.dst == 1.dst ); ++// // construct a replacement instruction that sets ++// // the destination to ( move's source register + one ) ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// Implementation no longer uses movX instructions since ++// machine-independent system no longer uses CopyX nodes. ++// ++// peephole %{ ++// peepmatch ( incI_eReg movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( decI_eReg movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( addI_eReg_imm movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( addP_eReg_imm movP ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++ ++// // Change load of spilled value to only a spill ++// instruct storeI(memory mem, eRegI src) %{ ++// match(Set mem (StoreI mem src)); ++// %} ++// ++// instruct loadI(eRegI dst, memory mem) %{ ++// match(Set dst (LoadI mem)); ++// %} ++// ++//peephole %{ ++// peepmatch ( loadI storeI ); ++// peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); ++// peepreplace ( storeI( 1.mem 1.mem 1.src ) ); ++//%} ++ ++//----------SMARTSPILL RULES--------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++ +diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp +new file mode 100644 +index 00000000000..a7062552f76 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp +@@ -0,0 +1,3827 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "jvm.h" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "compiler/disassembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/bytecodeHistogram.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "memory/universe.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/compressedOops.inline.hpp" ++#include "oops/klass.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/jniHandles.inline.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "runtime/os.hpp" ++#include "runtime/safepoint.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/macros.hpp" ++ ++#ifdef COMPILER2 ++#include "opto/compile.hpp" ++#include "opto/output.hpp" ++#endif ++ ++#if INCLUDE_ZGC ++#include "gc/z/zThreadLocalData.hpp" ++#endif ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++// Implementation of MacroAssembler ++ ++intptr_t MacroAssembler::i[32] = {0}; ++float MacroAssembler::f[32] = {0.0}; ++ ++void MacroAssembler::print(outputStream *s) { ++ unsigned int k; ++ for(k=0; kprint_cr("i%d = 0x%.16lx", k, i[k]); ++ } ++ s->cr(); ++ ++ for(k=0; kprint_cr("f%d = %f", k, f[k]); ++ } ++ s->cr(); ++} ++ ++int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; } ++int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; } ++ ++void MacroAssembler::save_registers(MacroAssembler *masm) { ++#define __ masm-> ++ for(int k=0; k<32; k++) { ++ __ st_w (as_Register(k), A0, i_offset(k)); ++ } ++ ++ for(int k=0; k<32; k++) { ++ __ fst_s (as_FloatRegister(k), A0, f_offset(k)); ++ } ++#undef __ ++} ++ ++void MacroAssembler::restore_registers(MacroAssembler *masm) { ++#define __ masm-> ++ for(int k=0; k<32; k++) { ++ __ ld_w (as_Register(k), A0, i_offset(k)); ++ } ++ ++ for(int k=0; k<32; k++) { ++ __ fld_s (as_FloatRegister(k), A0, f_offset(k)); ++ } ++#undef __ ++} ++ ++ ++void MacroAssembler::pd_patch_instruction(address branch, address target, const char* file, int line) { ++ jint& stub_inst = *(jint*)branch; ++ jint *pc = (jint *)branch; ++ ++ if (high(stub_inst, 7) == pcaddu18i_op) { ++ // far: ++ // pcaddu18i reg, si20 ++ // jirl r0, reg, si18 ++ ++ assert(high(pc[1], 6) == jirl_op, "Not a branch label patch"); ++ jlong offs = target - branch; ++ CodeBuffer cb(branch, 2 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ if (reachable_from_branch_short(offs)) { ++ // convert far to short ++#define __ masm. ++ __ b(target); ++ __ nop(); ++#undef __ ++ } else { ++ masm.patchable_jump_far(R0, offs); ++ } ++ return; ++ } else if (high(stub_inst, 7) == pcaddi_op) { ++ // see MacroAssembler::set_last_Java_frame: ++ // pcaddi reg, si20 ++ ++ jint offs = (target - branch) >> 2; ++ guarantee(is_simm(offs, 20), "Not signed 20-bit offset"); ++ CodeBuffer cb(branch, 1 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.pcaddi(as_Register(low(stub_inst, 5)), offs); ++ return; ++ } else if (high(stub_inst, 7) == pcaddu12i_op) { ++ // pc-relative ++ jlong offs = target - branch; ++ guarantee(is_simm(offs, 32), "Not signed 32-bit offset"); ++ jint si12, si20; ++ jint& stub_instNext = *(jint*)(branch+4); ++ split_simm32(offs, si12, si20); ++ CodeBuffer cb(branch, 2 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.pcaddu12i(as_Register(low(stub_inst, 5)), si20); ++ masm.addi_d(as_Register(low((stub_instNext), 5)), as_Register(low((stub_instNext) >> 5, 5)), si12); ++ return; ++ } else if (high(stub_inst, 7) == lu12i_w_op) { ++ // long call (absolute) ++ CodeBuffer cb(branch, 3 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.call_long(target); ++ return; ++ } ++ ++ stub_inst = patched_branch(target - branch, stub_inst, 0); ++} ++ ++bool MacroAssembler::reachable_from_branch_short(jlong offs) { ++ if (ForceUnreachable) { ++ return false; ++ } ++ return is_simm(offs >> 2, 26); ++} ++ ++void MacroAssembler::patchable_jump_far(Register ra, jlong offs) { ++ jint si18, si20; ++ guarantee(is_simm(offs, 38), "Not signed 38-bit offset"); ++ split_simm38(offs, si18, si20); ++ pcaddu18i(T4, si20); ++ jirl(ra, T4, si18); ++} ++ ++void MacroAssembler::patchable_jump(address target, bool force_patchable) { ++ assert(ReservedCodeCacheSize < 4*G, "branch out of range"); ++ assert(CodeCache::find_blob(target) != NULL, ++ "destination of jump not found in code cache"); ++ if (force_patchable || patchable_branches()) { ++ jlong offs = target - pc(); ++ if (reachable_from_branch_short(offs)) { // Short jump ++ b(offset26(target)); ++ nop(); ++ } else { // Far jump ++ patchable_jump_far(R0, offs); ++ } ++ } else { // Real short jump ++ b(offset26(target)); ++ } ++} ++ ++void MacroAssembler::patchable_call(address target, address call_site) { ++ jlong offs = target - (call_site ? call_site : pc()); ++ if (reachable_from_branch_short(offs - BytesPerInstWord)) { // Short call ++ nop(); ++ bl((offs - BytesPerInstWord) >> 2); ++ } else { // Far call ++ patchable_jump_far(RA, offs); ++ } ++} ++ ++// Maybe emit a call via a trampoline. If the code cache is small ++// trampolines won't be emitted. ++ ++address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) { ++ assert(JavaThread::current()->is_Compiler_thread(), "just checking"); ++ assert(entry.rspec().type() == relocInfo::runtime_call_type ++ || entry.rspec().type() == relocInfo::opt_virtual_call_type ++ || entry.rspec().type() == relocInfo::static_call_type ++ || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); ++ ++ // We need a trampoline if branches are far. ++ if (far_branches()) { ++ bool in_scratch_emit_size = false; ++#ifdef COMPILER2 ++ // We don't want to emit a trampoline if C2 is generating dummy ++ // code during its branch shortening phase. ++ CompileTask* task = ciEnv::current()->task(); ++ in_scratch_emit_size = ++ (task != NULL && is_c2_compile(task->comp_level()) && ++ Compile::current()->output()->in_scratch_emit_size()); ++#endif ++ if (!in_scratch_emit_size) { ++ address stub = emit_trampoline_stub(offset(), entry.target()); ++ if (stub == NULL) { ++ postcond(pc() == badAddress); ++ return NULL; // CodeCache is full ++ } ++ } ++ } ++ ++ if (cbuf) cbuf->set_insts_mark(); ++ relocate(entry.rspec()); ++ if (!far_branches()) { ++ bl(entry.target()); ++ } else { ++ bl(pc()); ++ } ++ // just need to return a non-null address ++ postcond(pc() != badAddress); ++ return pc(); ++} ++ ++// Emit a trampoline stub for a call to a target which is too far away. ++// ++// code sequences: ++// ++// call-site: ++// branch-and-link to or ++// ++// Related trampoline stub for this call site in the stub section: ++// load the call target from the constant pool ++// branch (RA still points to the call site above) ++ ++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, ++ address dest) { ++ // Start the stub ++ address stub = start_a_stub(NativeInstruction::nop_instruction_size ++ + NativeCallTrampolineStub::instruction_size); ++ if (stub == NULL) { ++ return NULL; // CodeBuffer::expand failed ++ } ++ ++ // Create a trampoline stub relocation which relates this trampoline stub ++ // with the call instruction at insts_call_instruction_offset in the ++ // instructions code-section. ++ align(wordSize); ++ relocate(trampoline_stub_Relocation::spec(code()->insts()->start() ++ + insts_call_instruction_offset)); ++ const int stub_start_offset = offset(); ++ ++ // Now, create the trampoline stub's code: ++ // - load the call ++ // - call ++ pcaddi(T4, 0); ++ ld_d(T4, T4, 16); ++ jr(T4); ++ nop(); //align ++ assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, ++ "should be"); ++ emit_int64((int64_t)dest); ++ ++ const address stub_start_addr = addr_at(stub_start_offset); ++ ++ NativeInstruction* ni = nativeInstruction_at(stub_start_addr); ++ assert(ni->is_NativeCallTrampolineStub_at(), "doesn't look like a trampoline"); ++ ++ end_a_stub(); ++ return stub_start_addr; ++} ++ ++void MacroAssembler::beq_far(Register rs, Register rt, address entry) { ++ if (is_simm16((entry - pc()) >> 2)) { // Short jump ++ beq(rs, rt, offset16(entry)); ++ } else { // Far jump ++ Label not_jump; ++ bne(rs, rt, not_jump); ++ b_far(entry); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::beq_far(Register rs, Register rt, Label& L) { ++ if (L.is_bound()) { ++ beq_far(rs, rt, target(L)); ++ } else { ++ Label not_jump; ++ bne(rs, rt, not_jump); ++ b_far(L); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::bne_far(Register rs, Register rt, address entry) { ++ if (is_simm16((entry - pc()) >> 2)) { // Short jump ++ bne(rs, rt, offset16(entry)); ++ } else { // Far jump ++ Label not_jump; ++ beq(rs, rt, not_jump); ++ b_far(entry); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::bne_far(Register rs, Register rt, Label& L) { ++ if (L.is_bound()) { ++ bne_far(rs, rt, target(L)); ++ } else { ++ Label not_jump; ++ beq(rs, rt, not_jump); ++ b_far(L); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::blt_far(Register rs, Register rt, address entry, bool is_signed) { ++ if (is_simm16((entry - pc()) >> 2)) { // Short jump ++ if (is_signed) { ++ blt(rs, rt, offset16(entry)); ++ } else { ++ bltu(rs, rt, offset16(entry)); ++ } ++ } else { // Far jump ++ Label not_jump; ++ if (is_signed) { ++ bge(rs, rt, not_jump); ++ } else { ++ bgeu(rs, rt, not_jump); ++ } ++ b_far(entry); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::blt_far(Register rs, Register rt, Label& L, bool is_signed) { ++ if (L.is_bound()) { ++ blt_far(rs, rt, target(L), is_signed); ++ } else { ++ Label not_jump; ++ if (is_signed) { ++ bge(rs, rt, not_jump); ++ } else { ++ bgeu(rs, rt, not_jump); ++ } ++ b_far(L); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::bge_far(Register rs, Register rt, address entry, bool is_signed) { ++ if (is_simm16((entry - pc()) >> 2)) { // Short jump ++ if (is_signed) { ++ bge(rs, rt, offset16(entry)); ++ } else { ++ bgeu(rs, rt, offset16(entry)); ++ } ++ } else { // Far jump ++ Label not_jump; ++ if (is_signed) { ++ blt(rs, rt, not_jump); ++ } else { ++ bltu(rs, rt, not_jump); ++ } ++ b_far(entry); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::bge_far(Register rs, Register rt, Label& L, bool is_signed) { ++ if (L.is_bound()) { ++ bge_far(rs, rt, target(L), is_signed); ++ } else { ++ Label not_jump; ++ if (is_signed) { ++ blt(rs, rt, not_jump); ++ } else { ++ bltu(rs, rt, not_jump); ++ } ++ b_far(L); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::b_far(Label& L) { ++ if (L.is_bound()) { ++ b_far(target(L)); ++ } else { ++ L.add_patch_at(code(), locator()); ++ if (ForceUnreachable) { ++ patchable_jump_far(R0, 0); ++ } else { ++ b(0); ++ } ++ } ++} ++ ++void MacroAssembler::b_far(address entry) { ++ jlong offs = entry - pc(); ++ if (reachable_from_branch_short(offs)) { // Short jump ++ b(offset26(entry)); ++ } else { // Far jump ++ patchable_jump_far(R0, offs); ++ } ++} ++ ++void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) { ++ ldx_d(rt, base, offset); ++} ++ ++void MacroAssembler::st_ptr(Register rt, Register base, Register offset) { ++ stx_d(rt, base, offset); ++} ++ ++Address MacroAssembler::as_Address(AddressLiteral adr) { ++ return Address(adr.target(), adr.rspec()); ++} ++ ++Address MacroAssembler::as_Address(ArrayAddress adr) { ++ return Address::make_array(adr); ++} ++ ++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved). ++void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) { ++ li(tmp_reg1, inc); ++ li(tmp_reg2, counter_addr); ++ amadd_w(R0, tmp_reg1, tmp_reg2); ++} ++ ++// Writes to stack successive pages until offset reached to check for ++// stack overflow + shadow pages. This clobbers tmp. ++void MacroAssembler::bang_stack_size(Register size, Register tmp) { ++ assert_different_registers(tmp, size, AT); ++ move(tmp, SP); ++ // Bang stack for total size given plus shadow page size. ++ // Bang one page at a time because large size can bang beyond yellow and ++ // red zones. ++ Label loop; ++ li(AT, os::vm_page_size()); ++ bind(loop); ++ sub_d(tmp, tmp, AT); ++ sub_d(size, size, AT); ++ st_d(size, tmp, 0); ++ blt(R0, size, loop); ++ ++ // Bang down shadow pages too. ++ // At this point, (tmp-0) is the last address touched, so don't ++ // touch it again. (It was touched as (tmp-pagesize) but then tmp ++ // was post-decremented.) Skip this address by starting at i=1, and ++ // touch a few more pages below. N.B. It is important to touch all ++ // the way down to and including i=StackShadowPages. ++ for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { ++ // this could be any sized move but this is can be a debugging crumb ++ // so the bigger the better. ++ sub_d(tmp, tmp, AT); ++ st_d(size, tmp, 0); ++ } ++} ++ ++void MacroAssembler::reserved_stack_check() { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // testing if reserved zone needs to be enabled ++ Label no_reserved_zone_enabling; ++ ++ ld_d(AT, Address(thread, JavaThread::reserved_stack_activation_offset())); ++ sub_d(AT, SP, AT); ++ blt(AT, R0, no_reserved_zone_enabling); ++ ++ enter(); // RA and FP are live. ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread); ++ leave(); ++ ++ // We have already removed our own frame. ++ // throw_delayed_StackOverflowError will think that it's been ++ // called by our caller. ++ li(AT, (long)StubRoutines::throw_delayed_StackOverflowError_entry()); ++ jr(AT); ++ should_not_reach_here(); ++ ++ bind(no_reserved_zone_enabling); ++} ++ ++void MacroAssembler::biased_locking_enter(Register lock_reg, ++ Register obj_reg, ++ Register swap_reg, ++ Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, ++ Label* slow_case, ++ BiasedLockingCounters* counters) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ bool need_tmp_reg = false; ++ if (tmp_reg == noreg) { ++ need_tmp_reg = true; ++ tmp_reg = T4; ++ } ++ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT); ++ assert(markWord::age_shift == markWord::lock_bits + markWord::biased_lock_bits, "biased locking makes assumptions about bit layout"); ++ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); ++ Address saved_mark_addr(lock_reg, 0); ++ ++ // Biased locking ++ // See whether the lock is currently biased toward our thread and ++ // whether the epoch is still valid ++ // Note that the runtime guarantees sufficient alignment of JavaThread ++ // pointers to allow age to be placed into low bits ++ // First check to see whether biasing is even enabled for this object ++ Label cas_label; ++ if (!swap_reg_contains_mark) { ++ ld_ptr(swap_reg, mark_addr); ++ } ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ move(tmp_reg, swap_reg); ++ andi(tmp_reg, tmp_reg, markWord::biased_lock_mask_in_place); ++ addi_d(AT, R0, markWord::biased_lock_pattern); ++ sub_d(AT, AT, tmp_reg); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ ++ bne(AT, R0, cas_label); ++ ++ ++ // The bias pattern is present in the object's header. Need to check ++ // whether the bias owner and the epoch are both still current. ++ // Note that because there is no current thread register on LA we ++ // need to store off the mark word we read out of the object to ++ // avoid reloading it and needing to recheck invariants below. This ++ // store is unfortunate but it makes the overall code shorter and ++ // simpler. ++ st_ptr(swap_reg, saved_mark_addr); ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ xorr(tmp_reg, tmp_reg, swap_reg); ++#ifndef OPT_THREAD ++ get_thread(swap_reg); ++ xorr(swap_reg, swap_reg, tmp_reg); ++#else ++ xorr(swap_reg, TREG, tmp_reg); ++#endif ++ ++ li(AT, ~((int) markWord::age_mask_in_place)); ++ andr(swap_reg, swap_reg, AT); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(swap_reg, R0, L); ++ push(tmp_reg); ++ push(A0); ++ atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg); ++ pop(A0); ++ pop(tmp_reg); ++ bind(L); ++ } ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ beq(swap_reg, R0, done); ++ Label try_revoke_bias; ++ Label try_rebias; ++ ++ // At this point we know that the header has the bias pattern and ++ // that we are not the bias owner in the current epoch. We need to ++ // figure out more details about the state of the header in order to ++ // know what operations can be legally performed on the object's ++ // header. ++ ++ // If the low three bits in the xor result aren't clear, that means ++ // the prototype header is no longer biased and we have to revoke ++ // the bias on this object. ++ ++ li(AT, markWord::biased_lock_mask_in_place); ++ andr(AT, swap_reg, AT); ++ bne(AT, R0, try_revoke_bias); ++ // Biasing is still enabled for this data type. See whether the ++ // epoch of the current bias is still valid, meaning that the epoch ++ // bits of the mark word are equal to the epoch bits of the ++ // prototype header. (Note that the prototype header's epoch bits ++ // only change at a safepoint.) If not, attempt to rebias the object ++ // toward the current thread. Note that we must be absolutely sure ++ // that the current epoch is invalid in order to do this because ++ // otherwise the manipulations it performs on the mark word are ++ // illegal. ++ ++ li(AT, markWord::epoch_mask_in_place); ++ andr(AT,swap_reg, AT); ++ bne(AT, R0, try_rebias); ++ // The epoch of the current bias is still valid but we know nothing ++ // about the owner; it might be set or it might be clear. Try to ++ // acquire the bias of the object using an atomic operation. If this ++ // fails we will go in to the runtime to revoke the object's bias. ++ // Note that we first construct the presumed unbiased header so we ++ // don't accidentally blow away another thread's valid bias. ++ ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ li(AT, markWord::biased_lock_mask_in_place | markWord::age_mask_in_place | markWord::epoch_mask_in_place); ++ andr(swap_reg, swap_reg, AT); ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++#ifndef OPT_THREAD ++ get_thread(tmp_reg); ++ orr(tmp_reg, tmp_reg, swap_reg); ++#else ++ orr(tmp_reg, TREG, swap_reg); ++#endif ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // If the biasing toward our thread failed, this means that ++ // another thread succeeded in biasing it toward itself and we ++ // need to revoke that bias. The revocation will occur in the ++ // interpreter runtime in the slow case. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ push(tmp_reg); ++ push(A0); ++ atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg); ++ pop(A0); ++ pop(tmp_reg); ++ bind(L); ++ } ++ if (slow_case != NULL) { ++ beq_far(AT, R0, *slow_case); ++ } ++ b(done); ++ ++ bind(try_rebias); ++ // At this point we know the epoch has expired, meaning that the ++ // current "bias owner", if any, is actually invalid. Under these ++ // circumstances _only_, we are allowed to use the current header's ++ // value as the comparison value when doing the cas to acquire the ++ // bias in the current epoch. In other words, we allow transfer of ++ // the bias from one thread to another directly in this situation. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++#ifndef OPT_THREAD ++ get_thread(swap_reg); ++ orr(tmp_reg, tmp_reg, swap_reg); ++#else ++ orr(tmp_reg, tmp_reg, TREG); ++#endif ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // If the biasing toward our thread failed, then another thread ++ // succeeded in biasing it toward itself and we need to revoke that ++ // bias. The revocation will occur in the runtime in the slow case. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ push(AT); ++ push(tmp_reg); ++ atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg); ++ pop(tmp_reg); ++ pop(AT); ++ bind(L); ++ } ++ if (slow_case != NULL) { ++ beq_far(AT, R0, *slow_case); ++ } ++ ++ b(done); ++ bind(try_revoke_bias); ++ // The prototype mark in the klass doesn't have the bias bit set any ++ // more, indicating that objects of this data type are not supposed ++ // to be biased any more. We are going to try to reset the mark of ++ // this object to the prototype value and fall through to the ++ // CAS-based locking scheme. Note that if our CAS fails, it means ++ // that another thread raced us for the privilege of revoking the ++ // bias of this particular object, so it's okay to continue in the ++ // normal locking code. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // Fall through to the normal CAS-based lock, because no matter what ++ // the result of the above CAS, some thread must have succeeded in ++ // removing the bias bit from the object's header. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ push(AT); ++ push(tmp_reg); ++ atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg); ++ pop(tmp_reg); ++ pop(AT); ++ bind(L); ++ } ++ ++ bind(cas_label); ++} ++ ++void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ ++ // Check for biased locking unlock case, which is a no-op ++ // Note: we do not have to check the thread ID for two reasons. ++ // First, the interpreter checks for IllegalMonitorStateException at ++ // a higher level. Second, if the bias was revoked while we held the ++ // lock, the object could not be rebiased toward another thread, so ++ // the bias bit would be clear. ++ ld_d(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ andi(temp_reg, temp_reg, markWord::biased_lock_mask_in_place); ++ addi_d(AT, R0, markWord::biased_lock_pattern); ++ ++ beq(AT, temp_reg, done); ++} ++ ++// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf ++// this method will handle the stack problem, you need not to preserve the stack space for the argument now ++void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) { ++ assert(number_of_arguments <= 4, "just check"); ++ assert(StackAlignmentInBytes == 16, "must be"); ++ move(AT, SP); ++ bstrins_d(SP, R0, 3, 0); ++ addi_d(SP, SP, -(StackAlignmentInBytes)); ++ st_d(AT, SP, 0); ++ call(entry_point, relocInfo::runtime_call_type); ++ ld_d(SP, SP, 0); ++} ++ ++ ++void MacroAssembler::jmp(address entry) { ++ jlong offs = entry - pc(); ++ if (reachable_from_branch_short(offs)) { // Short jump ++ b(offset26(entry)); ++ } else { // Far jump ++ patchable_jump_far(R0, offs); ++ } ++} ++ ++void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) { ++ switch (rtype) { ++ case relocInfo::none: ++ jmp(entry); ++ break; ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rtype); ++ patchable_jump(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::jmp_far(Label& L) { ++ if (L.is_bound()) { ++ assert(target(L) != NULL, "jmp most probably wrong"); ++ patchable_jump(target(L), true /* force patchable */); ++ } else { ++ L.add_patch_at(code(), locator()); ++ patchable_jump_far(R0, 0); ++ } ++} ++ ++// Move an oop into a register. immediate is true if we want ++// immediate instructions and nmethod entry barriers are not enabled. ++// i.e. we are not going to patch this instruction while the code is being ++// executed by another thread. ++void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { ++ int oop_index; ++ if (obj == NULL) { ++ oop_index = oop_recorder()->allocate_oop_index(obj); ++ } else { ++#ifdef ASSERT ++ { ++ ThreadInVMfromUnknown tiv; ++ assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); ++ } ++#endif ++ oop_index = oop_recorder()->find_index(obj); ++ } ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ ++ // nmethod entry barrier necessitate using the constant pool. They have to be ++ // ordered with respected to oop accesses. ++ // Using immediate literals would necessitate ISBs. ++ if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) { ++ address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address ++ relocate(rspec); ++ patchable_li52(dst, (long)dummy); ++ } else { ++ relocate(rspec); ++ patchable_li52(dst, (long)obj); ++ } ++} ++ ++void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { ++ int oop_index; ++ if (obj) { ++ oop_index = oop_recorder()->find_index(obj); ++ } else { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } ++ relocate(metadata_Relocation::spec(oop_index)); ++ patchable_li52(AT, (long)obj); ++ st_d(AT, dst); ++} ++ ++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { ++ int oop_index; ++ if (obj) { ++ oop_index = oop_recorder()->find_index(obj); ++ } else { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } ++ relocate(metadata_Relocation::spec(oop_index)); ++ patchable_li52(dst, (long)obj); ++} ++ ++void MacroAssembler::call(address entry) { ++ jlong offs = entry - pc(); ++ if (reachable_from_branch_short(offs)) { // Short call (pc-rel) ++ bl(offset26(entry)); ++ } else if (is_simm(offs, 38)) { // Far call (pc-rel) ++ patchable_jump_far(RA, offs); ++ } else { // Long call (absolute) ++ call_long(entry); ++ } ++} ++ ++void MacroAssembler::call(address entry, relocInfo::relocType rtype) { ++ switch (rtype) { ++ case relocInfo::none: ++ call(entry); ++ break; ++ case relocInfo::runtime_call_type: ++ if (!is_simm(entry - pc(), 38)) { ++ call_long(entry); ++ break; ++ } ++ // fallthrough ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rtype); ++ patchable_call(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::call(address entry, RelocationHolder& rh){ ++ switch (rh.type()) { ++ case relocInfo::none: ++ call(entry); ++ break; ++ case relocInfo::runtime_call_type: ++ if (!is_simm(entry - pc(), 38)) { ++ call_long(entry); ++ break; ++ } ++ // fallthrough ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rh); ++ patchable_call(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::call_long(address entry) { ++ jlong value = (jlong)entry; ++ lu12i_w(T4, split_low20(value >> 12)); ++ lu32i_d(T4, split_low20(value >> 32)); ++ jirl(RA, T4, split_low12(value)); ++} ++ ++address MacroAssembler::ic_call(address entry, jint method_index) { ++ RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); ++ patchable_li52(IC_Klass, (long)Universe::non_oop_word()); ++ assert(entry != NULL, "call most probably wrong"); ++ InstructionMark im(this); ++ return trampoline_call(AddressLiteral(entry, rh)); ++} ++ ++void MacroAssembler::c2bool(Register r) { ++ sltu(r, R0, r); ++} ++ ++#ifndef PRODUCT ++extern "C" void findpc(intptr_t x); ++#endif ++ ++void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) { ++ if ( ShowMessageBoxOnError ) { ++ JavaThreadState saved_state = JavaThread::current()->thread_state(); ++ JavaThread::current()->set_thread_state(_thread_in_vm); ++ { ++ // In order to get locks work, we need to fake a in_VM state ++ ttyLocker ttyl; ++ ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); ++ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { ++ BytecodeCounter::print(); ++ } ++ ++ } ++ ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); ++ } ++ else ++ ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); ++} ++ ++ ++void MacroAssembler::stop(const char* msg) { ++#ifndef PRODUCT ++ block_comment(msg); ++#endif ++ csrrd(R0, 0); ++ emit_int64((uintptr_t)msg); ++} ++ ++void MacroAssembler::increment(Register reg, int imm) { ++ if (!imm) return; ++ if (is_simm(imm, 12)) { ++ addi_d(reg, reg, imm); ++ } else { ++ li(AT, imm); ++ add_d(reg, reg, AT); ++ } ++} ++ ++void MacroAssembler::decrement(Register reg, int imm) { ++ increment(reg, -imm); ++} ++ ++void MacroAssembler::increment(Address addr, int imm) { ++ if (!imm) return; ++ assert(is_simm(imm, 12), "must be"); ++ ld_ptr(AT, addr); ++ addi_d(AT, AT, imm); ++ st_ptr(AT, addr); ++} ++ ++void MacroAssembler::decrement(Address addr, int imm) { ++ increment(addr, -imm); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions) { ++ call_VM_helper(oop_result, entry_point, 0, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ call_VM_helper(oop_result, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ if (arg_2!=A2) move(A2, arg_2); ++ assert(arg_2 != A1, "smashed argument"); ++ call_VM_helper(oop_result, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); ++ call_VM_helper(oop_result, entry_point, 3, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); ++ call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); ++} ++ ++void MacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T2; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ // debugging support ++ assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); ++ assert(number_of_arguments <= 4 , "cannot have negative number of arguments"); ++ assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); ++ assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); ++ ++ assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp"); ++ ++ // set last Java frame before call ++ Label before_call; ++ bind(before_call); ++ set_last_Java_frame(java_thread, last_java_sp, FP, before_call); ++ ++ // do the call ++ move(A0, java_thread); ++ call(entry_point, relocInfo::runtime_call_type); ++ ++ // restore the thread (cannot use the pushed argument since arguments ++ // may be overwritten by C code generated by an optimizing compiler); ++ // however can use the register value directly if it is callee saved. ++#ifndef OPT_THREAD ++ get_thread(java_thread); ++#else ++#ifdef ASSERT ++ { ++ Label L; ++ get_thread(AT); ++ beq(java_thread, AT, L); ++ stop("MacroAssembler::call_VM_base: TREG not callee saved?"); ++ bind(L); ++ } ++#endif ++#endif ++ ++ // discard thread and arguments ++ ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // reset last Java frame ++ reset_last_Java_frame(java_thread, false); ++ ++ check_and_handle_popframe(java_thread); ++ check_and_handle_earlyret(java_thread); ++ if (check_exceptions) { ++ // check for pending exceptions (java_thread is set upon return) ++ Label L; ++ ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset())); ++ beq(AT, R0, L); ++ lipc(AT, before_call); ++ push(AT); ++ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ bind(L); ++ } ++ ++ // get oop result if there is one and reset the value in the thread ++ if (oop_result->is_valid()) { ++ ld_d(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); ++ st_d(R0, java_thread, in_bytes(JavaThread::vm_result_offset())); ++ verify_oop(oop_result); ++ } ++} ++ ++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { ++ move(V0, SP); ++ //we also reserve space for java_thread here ++ assert(StackAlignmentInBytes == 16, "must be"); ++ bstrins_d(SP, R0, 3, 0); ++ call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { ++ call_VM_leaf_base(entry_point, number_of_arguments); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { ++ if (arg_0 != A0) move(A0, arg_0); ++ call_VM_leaf(entry_point, 1); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { ++ if (arg_0 != A0) move(A0, arg_0); ++ if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); ++ call_VM_leaf(entry_point, 2); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { ++ if (arg_0 != A0) move(A0, arg_0); ++ if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument"); ++ call_VM_leaf(entry_point, 3); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point) { ++ MacroAssembler::call_VM_leaf_base(entry_point, 0); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1) { ++ if (arg_1 != A0) move(A0, arg_1); ++ MacroAssembler::call_VM_leaf_base(entry_point, 1); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1, ++ Register arg_2) { ++ if (arg_1 != A0) move(A0, arg_1); ++ if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); ++ MacroAssembler::call_VM_leaf_base(entry_point, 2); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3) { ++ if (arg_1 != A0) move(A0, arg_1); ++ if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); ++ if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument"); ++ MacroAssembler::call_VM_leaf_base(entry_point, 3); ++} ++ ++void MacroAssembler::check_and_handle_earlyret(Register java_thread) { ++} ++ ++void MacroAssembler::check_and_handle_popframe(Register java_thread) { ++} ++ ++void MacroAssembler::null_check(Register reg, int offset) { ++ if (needs_explicit_null_check(offset)) { ++ // provoke OS NULL exception if reg = NULL by ++ // accessing M[reg] w/o changing any (non-CC) registers ++ // NOTE: cmpl is plenty here to provoke a segv ++ ld_w(AT, reg, 0); ++ } else { ++ // nothing to do, (later) access of M[reg + offset] ++ // will provoke OS NULL exception if reg = NULL ++ } ++} ++ ++void MacroAssembler::enter() { ++ push2(RA, FP); ++ addi_d(FP, SP, 2 * wordSize); ++} ++ ++void MacroAssembler::leave() { ++ addi_d(SP, FP, -2 * wordSize); ++ pop2(RA, FP); ++} ++ ++void MacroAssembler::build_frame(int framesize) { ++ assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA"); ++ assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment"); ++ if (Assembler::is_simm(-framesize, 12)) { ++ addi_d(SP, SP, -framesize); ++ st_ptr(FP, Address(SP, framesize - 2 * wordSize)); ++ st_ptr(RA, Address(SP, framesize - 1 * wordSize)); ++ if (PreserveFramePointer) ++ addi_d(FP, SP, framesize); ++ } else { ++ addi_d(SP, SP, -2 * wordSize); ++ st_ptr(FP, Address(SP, 0 * wordSize)); ++ st_ptr(RA, Address(SP, 1 * wordSize)); ++ if (PreserveFramePointer) ++ addi_d(FP, SP, 2 * wordSize); ++ li(SCR1, framesize - 2 * wordSize); ++ sub_d(SP, SP, SCR1); ++ } ++ verify_cross_modify_fence_not_required(); ++} ++ ++void MacroAssembler::remove_frame(int framesize) { ++ assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA"); ++ assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); ++ if (Assembler::is_simm(framesize, 12)) { ++ ld_ptr(FP, Address(SP, framesize - 2 * wordSize)); ++ ld_ptr(RA, Address(SP, framesize - 1 * wordSize)); ++ addi_d(SP, SP, framesize); ++ } else { ++ li(SCR1, framesize - 2 * wordSize); ++ add_d(SP, SP, SCR1); ++ ld_ptr(FP, Address(SP, 0 * wordSize)); ++ ld_ptr(RA, Address(SP, 1 * wordSize)); ++ addi_d(SP, SP, 2 * wordSize); ++ } ++} ++ ++void MacroAssembler::unimplemented(const char* what) { ++ const char* buf = NULL; ++ { ++ ResourceMark rm; ++ stringStream ss; ++ ss.print("unimplemented: %s", what); ++ buf = code_string(ss.as_string()); ++ } ++ stop(buf); ++} ++ ++void MacroAssembler::get_thread(Register thread) { ++#ifdef MINIMIZE_RAM_USAGE ++ Register tmp; ++ ++ if (thread == AT) ++ tmp = T4; ++ else ++ tmp = AT; ++ ++ move(thread, SP); ++ shr(thread, PAGE_SHIFT); ++ ++ push(tmp); ++ li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1)); ++ andr(thread, thread, tmp); ++ shl(thread, Address::times_ptr); // sizeof(Thread *) ++ li(tmp, (long)ThreadLocalStorage::sp_map_addr()); ++ add_d(tmp, tmp, thread); ++ ld_ptr(thread, tmp, 0); ++ pop(tmp); ++#else ++ if (thread != V0) { ++ push(V0); ++ } ++ push_call_clobbered_registers_except(RegSet::of(V0)); ++ ++ push(S5); ++ move(S5, SP); ++ assert(StackAlignmentInBytes == 16, "must be"); ++ bstrins_d(SP, R0, 3, 0); ++ // TODO: confirm reloc ++ call(CAST_FROM_FN_PTR(address, Thread::current), relocInfo::runtime_call_type); ++ move(SP, S5); ++ pop(S5); ++ ++ pop_call_clobbered_registers_except(RegSet::of(V0)); ++ if (thread != V0) { ++ move(thread, V0); ++ pop(V0); ++ } ++#endif // MINIMIZE_RAM_USAGE ++} ++ ++void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T1; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // we must set sp to zero to clear frame ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // must clear fp, so that compiled frames are not confused; it is possible ++ // that we need it only for debugging ++ if(clear_fp) { ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); ++} ++ ++void MacroAssembler::reset_last_Java_frame(bool clear_fp) { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // we must set sp to zero to clear frame ++ st_d(R0, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // must clear fp, so that compiled frames are not confused; it is ++ // possible that we need it only for debugging ++ if (clear_fp) { ++ st_d(R0, thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ st_d(R0, thread, in_bytes(JavaThread::last_Java_pc_offset())); ++} ++ ++void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool acquire, bool in_nmethod) { ++ if (acquire) { ++ ld_d(AT, thread_reg, in_bytes(JavaThread::polling_word_offset())); ++ membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore)); ++ } else { ++ ld_d(AT, thread_reg, in_bytes(JavaThread::polling_word_offset())); ++ } ++ if (at_return) { ++ // Note that when in_nmethod is set, the stack pointer is incremented before the poll. Therefore, ++ // we may safely use the sp instead to perform the stack watermark check. ++ blt_far(AT, in_nmethod ? SP : FP, slow_path, false /* signed */); ++ } else { ++ andi(AT, AT, SafepointMechanism::poll_bit()); ++ bnez(AT, slow_path); ++ } ++} ++ ++// Calls to C land ++// ++// When entering C land, the fp, & sp of the last Java frame have to be recorded ++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp ++// has to be reset to 0. This is required to allow proper stack traversal. ++void MacroAssembler::set_last_Java_frame(Register java_thread, ++ Register last_java_sp, ++ Register last_java_fp, ++ Label& last_java_pc) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T2; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ ++ // last_java_fp is optional ++ if (last_java_fp->is_valid()) { ++ st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // last_java_pc ++ lipc(AT, last_java_pc); ++ st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + ++ JavaFrameAnchor::last_Java_pc_offset())); ++ ++ st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++} ++ ++void MacroAssembler::set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ Label& last_java_pc) { ++ set_last_Java_frame(NOREG, last_java_sp, last_java_fp, last_java_pc); ++} ++ ++void MacroAssembler::set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ Register last_java_pc) { ++#ifndef OPT_THREAD ++ Register java_thread = T2; ++ get_thread(java_thread); ++#else ++ Register java_thread = TREG; ++#endif ++ ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ ++ // last_java_fp is optional ++ if (last_java_fp->is_valid()) { ++ st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // last_java_pc is optional ++ if (last_java_pc->is_valid()) { ++ st_ptr(last_java_pc, java_thread, in_bytes(JavaThread::frame_anchor_offset() + ++ JavaFrameAnchor::last_Java_pc_offset())); ++ } ++ ++ st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++} ++ ++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. ++void MacroAssembler::tlab_allocate(Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Register t2, ++ Label& slow_case) { ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void MacroAssembler::eden_allocate(Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Label& slow_case) { ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); ++} ++ ++ ++void MacroAssembler::incr_allocated_bytes(Register thread, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1) { ++ if (!thread->is_valid()) { ++#ifndef OPT_THREAD ++ assert(t1->is_valid(), "need temp reg"); ++ thread = t1; ++ get_thread(thread); ++#else ++ thread = TREG; ++#endif ++ } ++ ++ ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); ++ if (var_size_in_bytes->is_valid()) { ++ add_d(AT, AT, var_size_in_bytes); ++ } else { ++ addi_d(AT, AT, con_size_in_bytes); ++ } ++ st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); ++} ++ ++void MacroAssembler::li(Register rd, jlong value) { ++ jlong hi12 = bitfield(value, 52, 12); ++ jlong lo52 = bitfield(value, 0, 52); ++ ++ if (hi12 != 0 && lo52 == 0) { ++ lu52i_d(rd, R0, hi12); ++ } else { ++ jlong hi20 = bitfield(value, 32, 20); ++ jlong lo20 = bitfield(value, 12, 20); ++ jlong lo12 = bitfield(value, 0, 12); ++ ++ if (lo20 == 0) { ++ ori(rd, R0, lo12); ++ } else if (bitfield(simm12(lo12), 12, 20) == lo20) { ++ addi_w(rd, R0, simm12(lo12)); ++ } else { ++ lu12i_w(rd, lo20); ++ if (lo12 != 0) ++ ori(rd, rd, lo12); ++ } ++ if (hi20 != bitfield(simm20(lo20), 20, 20)) ++ lu32i_d(rd, hi20); ++ if (hi12 != bitfield(simm20(hi20), 20, 12)) ++ lu52i_d(rd, rd, hi12); ++ } ++} ++ ++void MacroAssembler::patchable_li52(Register rd, jlong value) { ++ int count = 0; ++ ++ if (value <= max_jint && value >= min_jint) { ++ if (is_simm(value, 12)) { ++ addi_d(rd, R0, value); ++ count++; ++ } else if (is_uimm(value, 12)) { ++ ori(rd, R0, value); ++ count++; ++ } else { ++ lu12i_w(rd, split_low20(value >> 12)); ++ count++; ++ if (split_low12(value)) { ++ ori(rd, rd, split_low12(value)); ++ count++; ++ } ++ } ++ } else if (is_simm(value, 52)) { ++ lu12i_w(rd, split_low20(value >> 12)); ++ count++; ++ if (split_low12(value)) { ++ ori(rd, rd, split_low12(value)); ++ count++; ++ } ++ lu32i_d(rd, split_low20(value >> 32)); ++ count++; ++ } else { ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 3) { ++ nop(); ++ count++; ++ } ++} ++ ++void MacroAssembler::lipc(Register rd, Label& L) { ++ if (L.is_bound()) { ++ jint offs = (target(L) - pc()) >> 2; ++ guarantee(is_simm(offs, 20), "Not signed 20-bit offset"); ++ pcaddi(rd, offs); ++ } else { ++ InstructionMark im(this); ++ L.add_patch_at(code(), locator()); ++ pcaddi(rd, 0); ++ } ++} ++ ++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { ++ assert(UseCompressedClassPointers, "should only be used for compressed header"); ++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ ++ int klass_index = oop_recorder()->find_index(k); ++ RelocationHolder rspec = metadata_Relocation::spec(klass_index); ++ long narrowKlass = (long)CompressedKlassPointers::encode(k); ++ ++ relocate(rspec, Assembler::narrow_oop_operand); ++ patchable_li52(dst, narrowKlass); ++} ++ ++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { ++ assert(UseCompressedOops, "should only be used for compressed header"); ++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ ++ int oop_index = oop_recorder()->find_index(obj); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ ++ relocate(rspec, Assembler::narrow_oop_operand); ++ patchable_li52(dst, oop_index); ++} ++ ++// ((OopHandle)result).resolve(); ++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { ++ // OopHandle::resolve is an indirection. ++ access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, NOREG); ++} ++ ++// ((WeakHandle)result).resolve(); ++void MacroAssembler::resolve_weak_handle(Register rresult, Register rtmp) { ++ assert_different_registers(rresult, rtmp); ++ Label resolved; ++ ++ // A null weak handle resolves to null. ++ beqz(rresult, resolved); ++ ++ // Only 64 bit platforms support GCs that require a tmp register ++ // Only IN_HEAP loads require a thread_tmp register ++ // WeakHandle::resolve is an indirection like jweak. ++ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, ++ rresult, Address(rresult), rtmp, /*tmp_thread*/noreg); ++ bind(resolved); ++} ++ ++void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { ++ // get mirror ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ ld_ptr(mirror, method, in_bytes(Method::const_offset())); ++ ld_ptr(mirror, mirror, in_bytes(ConstMethod::constants_offset())); ++ ld_ptr(mirror, mirror, ConstantPool::pool_holder_offset_in_bytes()); ++ ld_ptr(mirror, mirror, mirror_offset); ++ resolve_oop_handle(mirror, tmp); ++} ++ ++void MacroAssembler::verify_oop(Register reg, const char* s) { ++ if (!VerifyOops) return; ++ ++ const char * b = NULL; ++ stringStream ss; ++ ss.print("verify_oop: %s: %s", reg->name(), s); ++ b = code_string(ss.as_string()); ++ ++ addi_d(SP, SP, -6 * wordSize); ++ st_ptr(SCR1, Address(SP, 0 * wordSize)); ++ st_ptr(SCR2, Address(SP, 1 * wordSize)); ++ st_ptr(RA, Address(SP, 2 * wordSize)); ++ st_ptr(A0, Address(SP, 3 * wordSize)); ++ st_ptr(A1, Address(SP, 4 * wordSize)); ++ ++ move(A1, reg); ++ patchable_li52(A0, (uintptr_t)(address)b); // Fixed size instructions ++ li(SCR2, StubRoutines::verify_oop_subroutine_entry_address()); ++ ld_ptr(SCR2, Address(SCR2)); ++ jalr(SCR2); ++ ++ ld_ptr(SCR1, Address(SP, 0 * wordSize)); ++ ld_ptr(SCR2, Address(SP, 1 * wordSize)); ++ ld_ptr(RA, Address(SP, 2 * wordSize)); ++ ld_ptr(A0, Address(SP, 3 * wordSize)); ++ ld_ptr(A1, Address(SP, 4 * wordSize)); ++ addi_d(SP, SP, 6 * wordSize); ++} ++ ++void MacroAssembler::verify_oop_addr(Address addr, const char* s) { ++ if (!VerifyOops) return; ++ ++ const char* b = NULL; ++ { ++ ResourceMark rm; ++ stringStream ss; ++ ss.print("verify_oop_addr: %s", s); ++ b = code_string(ss.as_string()); ++ } ++ ++ addi_d(SP, SP, -6 * wordSize); ++ st_ptr(SCR1, Address(SP, 0 * wordSize)); ++ st_ptr(SCR2, Address(SP, 1 * wordSize)); ++ st_ptr(RA, Address(SP, 2 * wordSize)); ++ st_ptr(A0, Address(SP, 3 * wordSize)); ++ st_ptr(A1, Address(SP, 4 * wordSize)); ++ ++ patchable_li52(A0, (uintptr_t)(address)b); // Fixed size instructions ++ // addr may contain sp so we will have to adjust it based on the ++ // pushes that we just did. ++ if (addr.uses(SP)) { ++ lea(A1, addr); ++ ld_ptr(A1, Address(A1, 6 * wordSize)); ++ } else { ++ ld_ptr(A1, addr); ++ } ++ ++ // call indirectly to solve generation ordering problem ++ li(SCR2, StubRoutines::verify_oop_subroutine_entry_address()); ++ ld_ptr(SCR2, Address(SCR2)); ++ jalr(SCR2); ++ ++ ld_ptr(SCR1, Address(SP, 0 * wordSize)); ++ ld_ptr(SCR2, Address(SP, 1 * wordSize)); ++ ld_ptr(RA, Address(SP, 2 * wordSize)); ++ ld_ptr(A0, Address(SP, 3 * wordSize)); ++ ld_ptr(A1, Address(SP, 4 * wordSize)); ++ addi_d(SP, SP, 6 * wordSize); ++} ++ ++// used registers : SCR1, SCR2 ++void MacroAssembler::verify_oop_subroutine() { ++ // RA: ra ++ // A0: char* error message ++ // A1: oop object to verify ++ Label exit, error; ++ // increment counter ++ li(SCR2, (long)StubRoutines::verify_oop_count_addr()); ++ ld_w(SCR1, SCR2, 0); ++ addi_d(SCR1, SCR1, 1); ++ st_w(SCR1, SCR2, 0); ++ ++ // make sure object is 'reasonable' ++ beqz(A1, exit); // if obj is NULL it is ok ++ ++#if INCLUDE_ZGC ++ if (UseZGC) { ++ // Check if mask is good. ++ // verifies that ZAddressBadMask & A1 == 0 ++ ld_ptr(AT, Address(TREG, ZThreadLocalData::address_bad_mask_offset())); ++ andr(AT, A1, AT); ++ bnez(AT, error); ++ } ++#endif ++ ++ // Check if the oop is in the right area of memory ++ // const int oop_mask = Universe::verify_oop_mask(); ++ // const int oop_bits = Universe::verify_oop_bits(); ++ const uintptr_t oop_mask = Universe::verify_oop_mask(); ++ const uintptr_t oop_bits = Universe::verify_oop_bits(); ++ li(SCR1, oop_mask); ++ andr(SCR2, A1, SCR1); ++ li(SCR1, oop_bits); ++ bne(SCR2, SCR1, error); ++ ++ // make sure klass is 'reasonable' ++ // add for compressedoops ++ load_klass(SCR2, A1); ++ beqz(SCR2, error); // if klass is NULL it is broken ++ // return if everything seems ok ++ bind(exit); ++ ++ jr(RA); ++ ++ // handle errors ++ bind(error); ++ push_call_clobbered_registers(); ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ pop_call_clobbered_registers(); ++ jr(RA); ++} ++ ++void MacroAssembler::verify_tlab(Register t1, Register t2) { ++#ifdef ASSERT ++ assert_different_registers(t1, t2, AT); ++ if (UseTLAB && VerifyOops) { ++ Label next, ok; ++ ++ get_thread(t1); ++ ++ ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset())); ++ ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset())); ++ bgeu(t2, AT, next); ++ ++ stop("assert(top >= start)"); ++ ++ bind(next); ++ ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset())); ++ bgeu(AT, t2, ok); ++ ++ stop("assert(top <= end)"); ++ ++ bind(ok); ++ ++ } ++#endif ++} ++ ++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++ return RegisterOrConstant(tmp); ++} ++ ++void MacroAssembler::bswap_h(Register dst, Register src) { ++ revb_2h(dst, src); ++ ext_w_h(dst, dst); // sign extension of the lower 16 bits ++} ++ ++void MacroAssembler::bswap_hu(Register dst, Register src) { ++ revb_2h(dst, src); ++ bstrpick_d(dst, dst, 15, 0); // zero extension of the lower 16 bits ++} ++ ++void MacroAssembler::bswap_w(Register dst, Register src) { ++ revb_2w(dst, src); ++ slli_w(dst, dst, 0); // keep sign, clear upper bits ++} ++ ++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, ++ Register resflag, bool retold, bool barrier, ++ bool weak, bool exchange) { ++ assert(oldval != resflag, "oldval != resflag"); ++ assert(newval != resflag, "newval != resflag"); ++ assert(addr.base() != resflag, "addr.base() != resflag"); ++ Label again, succ, fail; ++ ++ bind(again); ++ ll_d(resflag, addr); ++ bne(resflag, oldval, fail); ++ move(resflag, newval); ++ sc_d(resflag, addr); ++ if (weak) { ++ b(succ); ++ } else { ++ beqz(resflag, again); ++ } ++ if (exchange) { ++ move(resflag, oldval); ++ } ++ b(succ); ++ ++ bind(fail); ++ if (barrier) ++ dbar(0x700); ++ if (retold && oldval != R0) ++ move(oldval, resflag); ++ if (!exchange) { ++ move(resflag, R0); ++ } ++ bind(succ); ++} ++ ++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, ++ Register tmp, bool retold, bool barrier, Label& succ, Label* fail) { ++ assert(oldval != tmp, "oldval != tmp"); ++ assert(newval != tmp, "newval != tmp"); ++ Label again, neq; ++ ++ bind(again); ++ ll_d(tmp, addr); ++ bne(tmp, oldval, neq); ++ move(tmp, newval); ++ sc_d(tmp, addr); ++ beqz(tmp, again); ++ b(succ); ++ ++ bind(neq); ++ if (barrier) ++ dbar(0x700); ++ if (retold && oldval != R0) ++ move(oldval, tmp); ++ if (fail) ++ b(*fail); ++} ++ ++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, ++ Register resflag, bool sign, bool retold, bool barrier, ++ bool weak, bool exchange) { ++ assert(oldval != resflag, "oldval != resflag"); ++ assert(newval != resflag, "newval != resflag"); ++ assert(addr.base() != resflag, "addr.base() != resflag"); ++ Label again, succ, fail; ++ ++ bind(again); ++ ll_w(resflag, addr); ++ if (!sign) ++ lu32i_d(resflag, 0); ++ bne(resflag, oldval, fail); ++ move(resflag, newval); ++ sc_w(resflag, addr); ++ if (weak) { ++ b(succ); ++ } else { ++ beqz(resflag, again); ++ } ++ if (exchange) { ++ move(resflag, oldval); ++ } ++ b(succ); ++ ++ bind(fail); ++ if (barrier) ++ dbar(0x700); ++ if (retold && oldval != R0) ++ move(oldval, resflag); ++ if (!exchange) { ++ move(resflag, R0); ++ } ++ bind(succ); ++} ++ ++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, ++ bool sign, bool retold, bool barrier, Label& succ, Label* fail) { ++ assert(oldval != tmp, "oldval != tmp"); ++ assert(newval != tmp, "newval != tmp"); ++ Label again, neq; ++ ++ bind(again); ++ ll_w(tmp, addr); ++ if (!sign) ++ lu32i_d(tmp, 0); ++ bne(tmp, oldval, neq); ++ move(tmp, newval); ++ sc_w(tmp, addr); ++ beqz(tmp, again); ++ b(succ); ++ ++ bind(neq); ++ if (barrier) ++ dbar(0x700); ++ if (retold && oldval != R0) ++ move(oldval, tmp); ++ if (fail) ++ b(*fail); ++} ++ ++// be sure the three register is different ++void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++// be sure the three register is different ++void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++void MacroAssembler::align(int modulus) { ++ while (offset() % modulus != 0) nop(); ++} ++ ++ ++void MacroAssembler::verify_FPU(int stack_depth, const char* s) { ++ //Unimplemented(); ++} ++ ++static RegSet caller_saved_regset = RegSet::range(A0, A7) + RegSet::range(T0, T8) + RegSet::of(FP, RA) - RegSet::of(SCR1, SCR2); ++static FloatRegSet caller_saved_fpu_regset = FloatRegSet::range(F0, F23); ++ ++void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { ++ push(caller_saved_regset - exclude); ++ push_fpu(caller_saved_fpu_regset); ++} ++ ++void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { ++ pop_fpu(caller_saved_fpu_regset); ++ pop(caller_saved_regset - exclude); ++} ++ ++void MacroAssembler::push2(Register reg1, Register reg2) { ++ addi_d(SP, SP, -16); ++ st_d(reg1, SP, 8); ++ st_d(reg2, SP, 0); ++} ++ ++void MacroAssembler::pop2(Register reg1, Register reg2) { ++ ld_d(reg1, SP, 8); ++ ld_d(reg2, SP, 0); ++ addi_d(SP, SP, 16); ++} ++ ++void MacroAssembler::push(unsigned int bitset) { ++ unsigned char regs[31]; ++ int count = 0; ++ ++ bitset >>= 1; ++ for (int reg = 1; reg < 31; reg++) { ++ if (1 & bitset) ++ regs[count++] = reg; ++ bitset >>= 1; ++ } ++ ++ addi_d(SP, SP, -align_up(count, 2) * wordSize); ++ for (int i = 0; i < count; i ++) ++ st_d(as_Register(regs[i]), SP, i * wordSize); ++} ++ ++void MacroAssembler::pop(unsigned int bitset) { ++ unsigned char regs[31]; ++ int count = 0; ++ ++ bitset >>= 1; ++ for (int reg = 1; reg < 31; reg++) { ++ if (1 & bitset) ++ regs[count++] = reg; ++ bitset >>= 1; ++ } ++ ++ for (int i = 0; i < count; i ++) ++ ld_d(as_Register(regs[i]), SP, i * wordSize); ++ addi_d(SP, SP, align_up(count, 2) * wordSize); ++} ++ ++void MacroAssembler::push_fpu(unsigned int bitset) { ++ unsigned char regs[32]; ++ int count = 0; ++ ++ if (bitset == 0) ++ return; ++ ++ for (int reg = 0; reg <= 31; reg++) { ++ if (1 & bitset) ++ regs[count++] = reg; ++ bitset >>= 1; ++ } ++ ++ addi_d(SP, SP, -align_up(count, 2) * wordSize); ++ for (int i = 0; i < count; i++) ++ fst_d(as_FloatRegister(regs[i]), SP, i * wordSize); ++} ++ ++void MacroAssembler::pop_fpu(unsigned int bitset) { ++ unsigned char regs[32]; ++ int count = 0; ++ ++ if (bitset == 0) ++ return; ++ ++ for (int reg = 0; reg <= 31; reg++) { ++ if (1 & bitset) ++ regs[count++] = reg; ++ bitset >>= 1; ++ } ++ ++ for (int i = 0; i < count; i++) ++ fld_d(as_FloatRegister(regs[i]), SP, i * wordSize); ++ addi_d(SP, SP, align_up(count, 2) * wordSize); ++} ++ ++static int vpr_offset(int off) { ++ int slots_per_vpr = 0; ++ ++ if (UseLASX) ++ slots_per_vpr = FloatRegisterImpl::slots_per_lasx_register; ++ else if (UseLSX) ++ slots_per_vpr = FloatRegisterImpl::slots_per_lsx_register; ++ ++ return off * slots_per_vpr * VMRegImpl::stack_slot_size; ++} ++ ++void MacroAssembler::push_vp(unsigned int bitset) { ++ unsigned char regs[32]; ++ int count = 0; ++ ++ if (bitset == 0) ++ return; ++ ++ for (int reg = 0; reg <= 31; reg++) { ++ if (1 & bitset) ++ regs[count++] = reg; ++ bitset >>= 1; ++ } ++ ++ addi_d(SP, SP, vpr_offset(-align_up(count, 2))); ++ ++ for (int i = 0; i < count; i++) { ++ int off = vpr_offset(i); ++ if (UseLASX) ++ xvst(as_FloatRegister(regs[i]), SP, off); ++ else if (UseLSX) ++ vst(as_FloatRegister(regs[i]), SP, off); ++ } ++} ++ ++void MacroAssembler::pop_vp(unsigned int bitset) { ++ unsigned char regs[32]; ++ int count = 0; ++ ++ if (bitset == 0) ++ return; ++ ++ for (int reg = 0; reg <= 31; reg++) { ++ if (1 & bitset) ++ regs[count++] = reg; ++ bitset >>= 1; ++ } ++ ++ for (int i = 0; i < count; i++) { ++ int off = vpr_offset(i); ++ if (UseLASX) ++ xvld(as_FloatRegister(regs[i]), SP, off); ++ else if (UseLSX) ++ vld(as_FloatRegister(regs[i]), SP, off); ++ } ++ ++ addi_d(SP, SP, vpr_offset(align_up(count, 2))); ++} ++ ++void MacroAssembler::load_method_holder(Register holder, Register method) { ++ ld_d(holder, Address(method, Method::const_offset())); // ConstMethod* ++ ld_d(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* ++ ld_d(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass* ++} ++ ++void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) { ++ load_method_holder(rresult, rmethod); ++ ld_ptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset())); ++} ++ ++// for UseCompressedOops Option ++void MacroAssembler::load_klass(Register dst, Register src) { ++ if(UseCompressedClassPointers){ ++ ld_wu(dst, Address(src, oopDesc::klass_offset_in_bytes())); ++ decode_klass_not_null(dst); ++ } else { ++ ld_d(dst, src, oopDesc::klass_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::store_klass(Register dst, Register src) { ++ if(UseCompressedClassPointers){ ++ encode_klass_not_null(src); ++ st_w(src, dst, oopDesc::klass_offset_in_bytes()); ++ } else { ++ st_d(src, dst, oopDesc::klass_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::load_prototype_header(Register dst, Register src) { ++ load_klass(dst, src); ++ ld_d(dst, Address(dst, Klass::prototype_header_offset())); ++} ++ ++void MacroAssembler::store_klass_gap(Register dst, Register src) { ++ if (UseCompressedClassPointers) { ++ st_w(src, dst, oopDesc::klass_gap_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, ++ Register tmp1, Register thread_tmp) { ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } else { ++ bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } ++} ++ ++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, ++ Register tmp1, Register tmp2) { ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2); ++ } else { ++ bs->store_at(this, decorators, type, dst, src, tmp1, tmp2); ++ } ++} ++ ++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); ++} ++ ++// Doesn't do verfication, generates fixed size code ++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp); ++} ++ ++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, ++ Register tmp2, DecoratorSet decorators) { ++ access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); ++} ++ ++// Used for storing NULLs. ++void MacroAssembler::store_heap_oop_null(Address dst) { ++ access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); ++} ++ ++#ifdef ASSERT ++void MacroAssembler::verify_heapbase(const char* msg) { ++ assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++} ++#endif ++ ++// Algorithm must match oop.inline.hpp encode_heap_oop. ++void MacroAssembler::encode_heap_oop(Register r) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); ++#endif ++ verify_oop(r, "broken oop in encode_heap_oop"); ++ if (CompressedOops::base() == NULL) { ++ if (CompressedOops::shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++ return; ++ } ++ ++ sub_d(AT, r, S5_heapbase); ++ maskeqz(r, AT, r); ++ if (CompressedOops::shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_heap_oop(Register dst, Register src) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); ++#endif ++ verify_oop(src, "broken oop in encode_heap_oop"); ++ if (CompressedOops::base() == NULL) { ++ if (CompressedOops::shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ srli_d(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++ return; ++ } ++ ++ sub_d(AT, src, S5_heapbase); ++ maskeqz(dst, AT, src); ++ if (CompressedOops::shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_heap_oop_not_null(Register r) { ++ assert (UseCompressedOops, "should be compressed"); ++#ifdef ASSERT ++ if (CheckCompressedOops) { ++ Label ok; ++ bne(r, R0, ok); ++ stop("null oop passed to encode_heap_oop_not_null"); ++ bind(ok); ++ } ++#endif ++ verify_oop(r, "broken oop in encode_heap_oop_not_null"); ++ if (CompressedOops::base() != NULL) { ++ sub_d(r, r, S5_heapbase); ++ } ++ if (CompressedOops::shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++ ++} ++ ++void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { ++ assert (UseCompressedOops, "should be compressed"); ++#ifdef ASSERT ++ if (CheckCompressedOops) { ++ Label ok; ++ bne(src, R0, ok); ++ stop("null oop passed to encode_heap_oop_not_null2"); ++ bind(ok); ++ } ++#endif ++ verify_oop(src, "broken oop in encode_heap_oop_not_null2"); ++ if (CompressedOops::base() == NULL) { ++ if (CompressedOops::shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ srli_d(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++ return; ++ } ++ ++ sub_d(dst, src, S5_heapbase); ++ if (CompressedOops::shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::decode_heap_oop(Register r) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); ++#endif ++ if (CompressedOops::base() == NULL) { ++ if (CompressedOops::shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ shl(r, LogMinObjAlignmentInBytes); ++ } ++ return; ++ } ++ ++ move(AT, r); ++ if (CompressedOops::shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ if (LogMinObjAlignmentInBytes <= 4) { ++ alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1); ++ } else { ++ shl(r, LogMinObjAlignmentInBytes); ++ add_d(r, r, S5_heapbase); ++ } ++ } else { ++ add_d(r, r, S5_heapbase); ++ } ++ maskeqz(r, r, AT); ++ verify_oop(r, "broken oop in decode_heap_oop"); ++} ++ ++void MacroAssembler::decode_heap_oop(Register dst, Register src) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); ++#endif ++ if (CompressedOops::base() == NULL) { ++ if (CompressedOops::shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ slli_d(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++ return; ++ } ++ ++ Register cond; ++ if (dst == src) { ++ cond = AT; ++ move(cond, src); ++ } else { ++ cond = src; ++ } ++ if (CompressedOops::shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ if (LogMinObjAlignmentInBytes <= 4) { ++ alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1); ++ } else { ++ slli_d(dst, src, LogMinObjAlignmentInBytes); ++ add_d(dst, dst, S5_heapbase); ++ } ++ } else { ++ add_d(dst, src, S5_heapbase); ++ } ++ maskeqz(dst, dst, cond); ++ verify_oop(dst, "broken oop in decode_heap_oop"); ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register r) { ++ // Note: it will change flags ++ assert(UseCompressedOops, "should only be used for compressed headers"); ++ assert(Universe::heap() != NULL, "java heap should be initialized"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (CompressedOops::shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ if (CompressedOops::base() != NULL) { ++ if (LogMinObjAlignmentInBytes <= 4) { ++ alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1); ++ } else { ++ shl(r, LogMinObjAlignmentInBytes); ++ add_d(r, r, S5_heapbase); ++ } ++ } else { ++ shl(r, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ assert(CompressedOops::base() == NULL, "sanity"); ++ } ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { ++ assert(UseCompressedOops, "should only be used for compressed headers"); ++ assert(Universe::heap() != NULL, "java heap should be initialized"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (CompressedOops::shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ if (CompressedOops::base() != NULL) { ++ if (LogMinObjAlignmentInBytes <= 4) { ++ alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1); ++ } else { ++ slli_d(dst, src, LogMinObjAlignmentInBytes); ++ add_d(dst, dst, S5_heapbase); ++ } ++ } else { ++ slli_d(dst, src, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ assert (CompressedOops::base() == NULL, "sanity"); ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++} ++ ++void MacroAssembler::encode_klass_not_null(Register r) { ++ if (CompressedKlassPointers::base() != NULL) { ++ if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0 ++ && CompressedKlassPointers::shift() == 0) { ++ bstrpick_d(r, r, 31, 0); ++ return; ++ } ++ assert(r != AT, "Encoding a klass in AT"); ++ li(AT, (int64_t)CompressedKlassPointers::base()); ++ sub_d(r, r, AT); ++ } ++ if (CompressedKlassPointers::shift() != 0) { ++ assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ shr(r, LogKlassAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_klass_not_null(Register dst, Register src) { ++ if (dst == src) { ++ encode_klass_not_null(src); ++ } else { ++ if (CompressedKlassPointers::base() != NULL) { ++ if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0 ++ && CompressedKlassPointers::shift() == 0) { ++ bstrpick_d(dst, src, 31, 0); ++ return; ++ } ++ li(dst, (int64_t)CompressedKlassPointers::base()); ++ sub_d(dst, src, dst); ++ if (CompressedKlassPointers::shift() != 0) { ++ assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ shr(dst, LogKlassAlignmentInBytes); ++ } ++ } else { ++ if (CompressedKlassPointers::shift() != 0) { ++ assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ srli_d(dst, src, LogKlassAlignmentInBytes); ++ } else { ++ move(dst, src); ++ } ++ } ++ } ++} ++ ++void MacroAssembler::decode_klass_not_null(Register r) { ++ assert(UseCompressedClassPointers, "should only be used for compressed headers"); ++ assert(r != AT, "Decoding a klass in AT"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (CompressedKlassPointers::base() != NULL) { ++ if (CompressedKlassPointers::shift() == 0) { ++ if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0) { ++ lu32i_d(r, (uint64_t)CompressedKlassPointers::base() >> 32); ++ } else { ++ li(AT, (int64_t)CompressedKlassPointers::base()); ++ add_d(r, r, AT); ++ } ++ } else { ++ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); ++ li(AT, (int64_t)CompressedKlassPointers::base()); ++ alsl_d(r, r, AT, Address::times_8 - 1); ++ } ++ } else { ++ if (CompressedKlassPointers::shift() != 0) { ++ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ shl(r, LogKlassAlignmentInBytes); ++ } ++ } ++} ++ ++void MacroAssembler::decode_klass_not_null(Register dst, Register src) { ++ assert(UseCompressedClassPointers, "should only be used for compressed headers"); ++ if (dst == src) { ++ decode_klass_not_null(dst); ++ } else { ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (CompressedKlassPointers::base() != NULL) { ++ if (CompressedKlassPointers::shift() == 0) { ++ if (((uint64_t)CompressedKlassPointers::base() & 0xffffffff) == 0) { ++ move(dst, src); ++ lu32i_d(dst, (uint64_t)CompressedKlassPointers::base() >> 32); ++ } else { ++ li(dst, (int64_t)CompressedKlassPointers::base()); ++ add_d(dst, dst, src); ++ } ++ } else { ++ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); ++ li(dst, (int64_t)CompressedKlassPointers::base()); ++ alsl_d(dst, src, dst, Address::times_8 - 1); ++ } ++ } else { ++ if (CompressedKlassPointers::shift() != 0) { ++ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ slli_d(dst, src, LogKlassAlignmentInBytes); ++ } else { ++ move(dst, src); ++ } ++ } ++ } ++} ++ ++void MacroAssembler::reinit_heapbase() { ++ if (UseCompressedOops) { ++ if (Universe::heap() != NULL) { ++ if (CompressedOops::base() == NULL) { ++ move(S5_heapbase, R0); ++ } else { ++ li(S5_heapbase, (int64_t)CompressedOops::ptrs_base()); ++ } ++ } else { ++ li(S5_heapbase, (intptr_t)CompressedOops::ptrs_base_addr()); ++ ld_d(S5_heapbase, S5_heapbase, 0); ++ } ++ } ++} ++ ++void MacroAssembler::check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label& L_success) { ++//implement ind gen_subtype_check ++ Label L_failure; ++ check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); ++ check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); ++ bind(L_failure); ++} ++ ++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ RegisterOrConstant super_check_offset) { ++ assert_different_registers(sub_klass, super_klass, temp_reg); ++ bool must_load_sco = (super_check_offset.constant_or_zero() == -1); ++ if (super_check_offset.is_register()) { ++ assert_different_registers(sub_klass, super_klass, ++ super_check_offset.as_register()); ++ } else if (must_load_sco) { ++ assert(temp_reg != noreg, "supply either a temp or a register offset"); ++ } ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ // If the pointers are equal, we are done (e.g., String[] elements). ++ // This self-check enables sharing of secondary supertype arrays among ++ // non-primary types such as array-of-interface. Otherwise, each such ++ // type would need its own customized SSA. ++ // We move this check to the front of the fast path because many ++ // type checks are in fact trivially successful in this manner, ++ // so we get a nicely predicted branch right at the start of the check. ++ beq(sub_klass, super_klass, *L_success); ++ // Check the supertype display: ++ if (must_load_sco) { ++ ld_wu(temp_reg, super_klass, sco_offset); ++ super_check_offset = RegisterOrConstant(temp_reg); ++ } ++ add_d(AT, sub_klass, super_check_offset.register_or_noreg()); ++ ld_d(AT, AT, super_check_offset.constant_or_zero()); ++ ++ // This check has worked decisively for primary supers. ++ // Secondary supers are sought in the super_cache ('super_cache_addr'). ++ // (Secondary supers are interfaces and very deeply nested subtypes.) ++ // This works in the same check above because of a tricky aliasing ++ // between the super_cache and the primary super display elements. ++ // (The 'super_check_addr' can address either, as the case requires.) ++ // Note that the cache is updated below if it does not help us find ++ // what we need immediately. ++ // So if it was a primary super, we can just fail immediately. ++ // Otherwise, it's the slow path for us (no success at this point). ++ ++ if (super_check_offset.is_register()) { ++ beq(super_klass, AT, *L_success); ++ addi_d(AT, super_check_offset.as_register(), -sc_offset); ++ if (L_failure == &L_fallthrough) { ++ beq(AT, R0, *L_slow_path); ++ } else { ++ bne_far(AT, R0, *L_failure); ++ b(*L_slow_path); ++ } ++ } else if (super_check_offset.as_constant() == sc_offset) { ++ // Need a slow path; fast failure is impossible. ++ if (L_slow_path == &L_fallthrough) { ++ beq(super_klass, AT, *L_success); ++ } else { ++ bne(super_klass, AT, *L_slow_path); ++ b(*L_success); ++ } ++ } else { ++ // No slow path; it's a fast decision. ++ if (L_failure == &L_fallthrough) { ++ beq(super_klass, AT, *L_success); ++ } else { ++ bne_far(super_klass, AT, *L_failure); ++ b(*L_success); ++ } ++ } ++ ++ bind(L_fallthrough); ++} ++ ++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Register temp2_reg, ++ Label* L_success, ++ Label* L_failure, ++ bool set_cond_codes) { ++ if (temp2_reg == noreg) ++ temp2_reg = TSR; ++ assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); ++#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ ++ // a couple of useful fields in sub_klass: ++ int ss_offset = in_bytes(Klass::secondary_supers_offset()); ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ Address secondary_supers_addr(sub_klass, ss_offset); ++ Address super_cache_addr( sub_klass, sc_offset); ++ ++ // Do a linear scan of the secondary super-klass chain. ++ // This code is rarely used, so simplicity is a virtue here. ++ // The repne_scan instruction uses fixed registers, which we must spill. ++ // Don't worry too much about pre-existing connections with the input regs. ++ ++#ifndef PRODUCT ++ int* pst_counter = &SharedRuntime::_partial_subtype_ctr; ++ ExternalAddress pst_counter_addr((address) pst_counter); ++#endif //PRODUCT ++ ++ // We will consult the secondary-super array. ++ ld_d(temp_reg, secondary_supers_addr); ++ // Load the array length. ++ ld_w(temp2_reg, Address(temp_reg, Array::length_offset_in_bytes())); ++ // Skip to start of data. ++ addi_d(temp_reg, temp_reg, Array::base_offset_in_bytes()); ++ ++ Label Loop, subtype; ++ bind(Loop); ++ beq(temp2_reg, R0, *L_failure); ++ ld_d(AT, temp_reg, 0); ++ addi_d(temp_reg, temp_reg, 1 * wordSize); ++ beq(AT, super_klass, subtype); ++ addi_d(temp2_reg, temp2_reg, -1); ++ b(Loop); ++ ++ bind(subtype); ++ st_d(super_klass, super_cache_addr); ++ if (L_success != &L_fallthrough) { ++ b(*L_success); ++ } ++ ++ // Success. Cache the super we found and proceed in triumph. ++#undef IS_A_TEMP ++ ++ bind(L_fallthrough); ++} ++ ++void MacroAssembler::clinit_barrier(Register klass, Register scratch, Label* L_fast_path, Label* L_slow_path) { ++ Register rthread = TREG; ++#ifndef OPT_THREAD ++ get_thread(rthread); ++#endif ++ ++ assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required"); ++ assert_different_registers(klass, rthread, scratch); ++ ++ Label L_fallthrough; ++ if (L_fast_path == NULL) { ++ L_fast_path = &L_fallthrough; ++ } else if (L_slow_path == NULL) { ++ L_slow_path = &L_fallthrough; ++ } ++ ++ // Fast path check: class is fully initialized ++ ld_b(scratch, Address(klass, InstanceKlass::init_state_offset())); ++ addi_d(scratch, scratch, -InstanceKlass::fully_initialized); ++ beqz(scratch, *L_fast_path); ++ ++ // Fast path check: current thread is initializer thread ++ ld_d(scratch, Address(klass, InstanceKlass::init_thread_offset())); ++ if (L_slow_path == &L_fallthrough) { ++ beq(rthread, scratch, *L_fast_path); ++ bind(*L_slow_path); ++ } else if (L_fast_path == &L_fallthrough) { ++ bne(rthread, scratch, *L_slow_path); ++ bind(*L_fast_path); ++ } else { ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { ++ ld_d(oop_result, Address(java_thread, JavaThread::vm_result_offset())); ++ st_d(R0, Address(java_thread, JavaThread::vm_result_offset())); ++ verify_oop(oop_result, "broken oop in call_VM_base"); ++} ++ ++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { ++ ld_d(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); ++ st_d(R0, Address(java_thread, JavaThread::vm_result_2_offset())); ++} ++ ++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, ++ int extra_slot_offset) { ++ // cf. TemplateTable::prepare_invoke(), if (load_receiver). ++ int stackElementSize = Interpreter::stackElementSize; ++ int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); ++#ifdef ASSERT ++ int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); ++ assert(offset1 - offset == stackElementSize, "correct arithmetic"); ++#endif ++ Register scale_reg = NOREG; ++ Address::ScaleFactor scale_factor = Address::no_scale; ++ if (arg_slot.is_constant()) { ++ offset += arg_slot.as_constant() * stackElementSize; ++ } else { ++ scale_reg = arg_slot.as_register(); ++ scale_factor = Address::times_8; ++ } ++ // We don't push RA on stack in prepare_invoke. ++ // offset += wordSize; // return PC is on stack ++ if(scale_reg==NOREG) return Address(SP, offset); ++ else { ++ alsl_d(scale_reg, scale_reg, SP, scale_factor - 1); ++ return Address(scale_reg, offset); ++ } ++} ++ ++SkipIfEqual::~SkipIfEqual() { ++ _masm->bind(_label); ++} ++ ++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { ++ switch (size_in_bytes) { ++ case 8: ld_d(dst, src); break; ++ case 4: ld_w(dst, src); break; ++ case 2: is_signed ? ld_h(dst, src) : ld_hu(dst, src); break; ++ case 1: is_signed ? ld_b( dst, src) : ld_bu( dst, src); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { ++ switch (size_in_bytes) { ++ case 8: st_d(src, dst); break; ++ case 4: st_w(src, dst); break; ++ case 2: st_h(src, dst); break; ++ case 1: st_b(src, dst); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++// Look up the method for a megamorphic invokeinterface call. ++// The target method is determined by . ++// The receiver klass is in recv_klass. ++// On success, the result will be in method_result, and execution falls through. ++// On failure, execution transfers to the given label. ++void MacroAssembler::lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_temp, ++ Label& L_no_such_interface, ++ bool return_method) { ++ assert_different_registers(recv_klass, intf_klass, scan_temp, AT); ++ assert_different_registers(method_result, intf_klass, scan_temp, AT); ++ assert(recv_klass != method_result || !return_method, ++ "recv_klass can be destroyed when method isn't needed"); ++ ++ assert(itable_index.is_constant() || itable_index.as_register() == method_result, ++ "caller must use same register for non-constant itable index as for method"); ++ ++ // Compute start of first itableOffsetEntry (which is at the end of the vtable) ++ int vtable_base = in_bytes(Klass::vtable_start_offset()); ++ int itentry_off = itableMethodEntry::method_offset_in_bytes(); ++ int scan_step = itableOffsetEntry::size() * wordSize; ++ int vte_size = vtableEntry::size() * wordSize; ++ Address::ScaleFactor times_vte_scale = Address::times_ptr; ++ assert(vte_size == wordSize, "else adjust times_vte_scale"); ++ ++ ld_w(scan_temp, Address(recv_klass, Klass::vtable_length_offset())); ++ ++ // %%% Could store the aligned, prescaled offset in the klassoop. ++ alsl_d(scan_temp, scan_temp, recv_klass, times_vte_scale - 1); ++ addi_d(scan_temp, scan_temp, vtable_base); ++ ++ if (return_method) { ++ // Adjust recv_klass by scaled itable_index, so we can free itable_index. ++ if (itable_index.is_constant()) { ++ li(AT, (itable_index.as_constant() * itableMethodEntry::size() * wordSize) + itentry_off); ++ add_d(recv_klass, recv_klass, AT); ++ } else { ++ assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); ++ alsl_d(AT, itable_index.as_register(), recv_klass, (int)Address::times_ptr - 1); ++ addi_d(recv_klass, AT, itentry_off); ++ } ++ } ++ ++ Label search, found_method; ++ ++ ld_d(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); ++ beq(intf_klass, method_result, found_method); ++ ++ bind(search); ++ // Check that the previous entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ beqz(method_result, L_no_such_interface); ++ addi_d(scan_temp, scan_temp, scan_step); ++ ld_d(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); ++ bne(intf_klass, method_result, search); ++ ++ bind(found_method); ++ if (return_method) { ++ // Got a hit. ++ ld_wu(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); ++ ldx_d(method_result, recv_klass, scan_temp); ++ } ++} ++ ++// virtual method calling ++void MacroAssembler::lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result) { ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); ++ ++ if (vtable_index.is_constant()) { ++ li(AT, vtable_index.as_constant()); ++ alsl_d(AT, AT, recv_klass, Address::times_ptr - 1); ++ } else { ++ alsl_d(AT, vtable_index.as_register(), recv_klass, Address::times_ptr - 1); ++ } ++ ++ ld_d(method_result, AT, base + vtableEntry::method_offset_in_bytes()); ++} ++ ++void MacroAssembler::load_byte_map_base(Register reg) { ++ CardTable::CardValue* byte_map_base = ++ ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); ++ ++ // Strictly speaking the byte_map_base isn't an address at all, and it might ++ // even be negative. It is thus materialised as a constant. ++ li(reg, (uint64_t)byte_map_base); ++} ++ ++void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { ++ const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask); ++ STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code ++ // The inverted mask is sign-extended ++ li(AT, inverted_jweak_mask); ++ andr(possibly_jweak, AT, possibly_jweak); ++} ++ ++void MacroAssembler::resolve_jobject(Register value, ++ Register thread, ++ Register tmp) { ++ assert_different_registers(value, thread, tmp); ++ Label done, not_weak; ++ beq(value, R0, done); // Use NULL as-is. ++ li(AT, JNIHandles::weak_tag_mask); // Test for jweak tag. ++ andr(AT, value, AT); ++ beq(AT, R0, not_weak); ++ // Resolve jweak. ++ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, ++ value, Address(value, -JNIHandles::weak_tag_value), tmp, thread); ++ verify_oop(value); ++ b(done); ++ bind(not_weak); ++ // Resolve (untagged) jobject. ++ access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread); ++ verify_oop(value); ++ bind(done); ++} ++ ++void MacroAssembler::lea(Register rd, Address src) { ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index == noreg) { ++ if (is_simm(disp, 12)) { ++ addi_d(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ add_d(dst, base, AT); ++ } ++ } else { ++ if (scale == 0) { ++ if (is_simm(disp, 12)) { ++ add_d(AT, base, index); ++ addi_d(dst, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ add_d(AT, base, AT); ++ add_d(dst, AT, index); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ alsl_d(AT, index, base, scale - 1); ++ addi_d(dst, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ add_d(AT, AT, base); ++ alsl_d(dst, index, AT, scale - 1); ++ } ++ } ++ } ++} ++ ++void MacroAssembler::lea(Register dst, AddressLiteral adr) { ++ code_section()->relocate(pc(), adr.rspec()); ++ pcaddi(dst, (adr.target() - pc()) >> 2); ++} ++ ++int MacroAssembler::patched_branch(int dest_pos, int inst, int inst_pos) { ++ int v = (dest_pos - inst_pos) >> 2; ++ switch(high(inst, 6)) { ++ case beq_op: ++ case bne_op: ++ case blt_op: ++ case bge_op: ++ case bltu_op: ++ case bgeu_op: ++ assert(is_simm16(v), "must be simm16"); ++#ifndef PRODUCT ++ if(!is_simm16(v)) ++ { ++ tty->print_cr("must be simm16"); ++ tty->print_cr("Inst: %x", inst); ++ } ++#endif ++ ++ inst &= 0xfc0003ff; ++ inst |= ((v & 0xffff) << 10); ++ break; ++ case beqz_op: ++ case bnez_op: ++ case bccondz_op: ++ assert(is_simm(v, 21), "must be simm21"); ++#ifndef PRODUCT ++ if(!is_simm(v, 21)) ++ { ++ tty->print_cr("must be simm21"); ++ tty->print_cr("Inst: %x", inst); ++ } ++#endif ++ ++ inst &= 0xfc0003e0; ++ inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x1f) ); ++ break; ++ case b_op: ++ case bl_op: ++ assert(is_simm(v, 26), "must be simm26"); ++#ifndef PRODUCT ++ if(!is_simm(v, 26)) ++ { ++ tty->print_cr("must be simm26"); ++ tty->print_cr("Inst: %x", inst); ++ } ++#endif ++ ++ inst &= 0xfc000000; ++ inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x3ff) ); ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ return inst; ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src1, ++ Register src2, ++ CMCompare cmp, ++ bool is_signed) { ++ switch (cmp) { ++ case EQ: ++ sub_d(AT, op1, op2); ++ if (dst == src2) { ++ masknez(dst, src2, AT); ++ maskeqz(AT, src1, AT); ++ } else { ++ maskeqz(dst, src1, AT); ++ masknez(AT, src2, AT); ++ } ++ break; ++ ++ case NE: ++ sub_d(AT, op1, op2); ++ if (dst == src2) { ++ maskeqz(dst, src2, AT); ++ masknez(AT, src1, AT); ++ } else { ++ masknez(dst, src1, AT); ++ maskeqz(AT, src2, AT); ++ } ++ break; ++ ++ case GT: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ if(dst == src2) { ++ maskeqz(dst, src2, AT); ++ masknez(AT, src1, AT); ++ } else { ++ masknez(dst, src1, AT); ++ maskeqz(AT, src2, AT); ++ } ++ break; ++ case GE: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ if(dst == src2) { ++ masknez(dst, src2, AT); ++ maskeqz(AT, src1, AT); ++ } else { ++ maskeqz(dst, src1, AT); ++ masknez(AT, src2, AT); ++ } ++ break; ++ ++ case LT: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ if(dst == src2) { ++ maskeqz(dst, src2, AT); ++ masknez(AT, src1, AT); ++ } else { ++ masknez(dst, src1, AT); ++ maskeqz(AT, src2, AT); ++ } ++ break; ++ case LE: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ if(dst == src2) { ++ masknez(dst, src2, AT); ++ maskeqz(AT, src1, AT); ++ } else { ++ maskeqz(dst, src1, AT); ++ masknez(AT, src2, AT); ++ } ++ break; ++ default: ++ Unimplemented(); ++ } ++ OR(dst, dst, AT); ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src, ++ CMCompare cmp, ++ bool is_signed) { ++ switch (cmp) { ++ case EQ: ++ sub_d(AT, op1, op2); ++ maskeqz(dst, dst, AT); ++ masknez(AT, src, AT); ++ break; ++ ++ case NE: ++ sub_d(AT, op1, op2); ++ masknez(dst, dst, AT); ++ maskeqz(AT, src, AT); ++ break; ++ ++ case GT: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ masknez(dst, dst, AT); ++ maskeqz(AT, src, AT); ++ break; ++ ++ case GE: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ maskeqz(dst, dst, AT); ++ masknez(AT, src, AT); ++ break; ++ ++ case LT: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ masknez(dst, dst, AT); ++ maskeqz(AT, src, AT); ++ break; ++ ++ case LE: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ maskeqz(dst, dst, AT); ++ masknez(AT, src, AT); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ OR(dst, dst, AT); ++} ++ ++ ++void MacroAssembler::cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ Register dst, ++ Register src, ++ FloatRegister tmp1, ++ FloatRegister tmp2, ++ CMCompare cmp, ++ bool is_float) { ++ movgr2fr_d(tmp1, dst); ++ movgr2fr_d(tmp2, src); ++ ++ switch(cmp) { ++ case EQ: ++ if (is_float) { ++ fcmp_ceq_s(FCC0, op1, op2); ++ } else { ++ fcmp_ceq_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp1, tmp2, FCC0); ++ break; ++ ++ case NE: ++ if (is_float) { ++ fcmp_ceq_s(FCC0, op1, op2); ++ } else { ++ fcmp_ceq_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp2, tmp1, FCC0); ++ break; ++ ++ case GT: ++ if (is_float) { ++ fcmp_cule_s(FCC0, op1, op2); ++ } else { ++ fcmp_cule_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp2, tmp1, FCC0); ++ break; ++ ++ case GE: ++ if (is_float) { ++ fcmp_cult_s(FCC0, op1, op2); ++ } else { ++ fcmp_cult_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp2, tmp1, FCC0); ++ break; ++ ++ case LT: ++ if (is_float) { ++ fcmp_cult_s(FCC0, op1, op2); ++ } else { ++ fcmp_cult_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp1, tmp2, FCC0); ++ break; ++ ++ case LE: ++ if (is_float) { ++ fcmp_cule_s(FCC0, op1, op2); ++ } else { ++ fcmp_cule_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp1, tmp2, FCC0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ ++ movfr2gr_d(dst, tmp1); ++} ++ ++void MacroAssembler::cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp, ++ bool is_float) { ++ switch(cmp) { ++ case EQ: ++ if (!is_float) { ++ fcmp_ceq_d(FCC0, op1, op2); ++ } else { ++ fcmp_ceq_s(FCC0, op1, op2); ++ } ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ case NE: ++ if (!is_float) { ++ fcmp_ceq_d(FCC0, op1, op2); ++ } else { ++ fcmp_ceq_s(FCC0, op1, op2); ++ } ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case GT: ++ if (!is_float) { ++ fcmp_cule_d(FCC0, op1, op2); ++ } else { ++ fcmp_cule_s(FCC0, op1, op2); ++ } ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case GE: ++ if (!is_float) { ++ fcmp_cult_d(FCC0, op1, op2); ++ } else { ++ fcmp_cult_s(FCC0, op1, op2); ++ } ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case LT: ++ if (!is_float) { ++ fcmp_cult_d(FCC0, op1, op2); ++ } else { ++ fcmp_cult_s(FCC0, op1, op2); ++ } ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ case LE: ++ if (!is_float) { ++ fcmp_cule_d(FCC0, op1, op2); ++ } else { ++ fcmp_cule_s(FCC0, op1, op2); ++ } ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ FloatRegister dst, ++ FloatRegister src, ++ FloatRegister tmp1, ++ FloatRegister tmp2, ++ CMCompare cmp) { ++ movgr2fr_w(tmp1, R0); ++ ++ switch (cmp) { ++ case EQ: ++ sub_d(AT, op1, op2); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ case NE: ++ sub_d(AT, op1, op2); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case GT: ++ slt(AT, op2, op1); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case GE: ++ slt(AT, op1, op2); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ case LT: ++ slt(AT, op1, op2); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case LE: ++ slt(AT, op2, op1); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::membar(Membar_mask_bits hint){ ++ address prev = pc() - NativeInstruction::sync_instruction_size; ++ address last = code()->last_insn(); ++ if (last != NULL && ((NativeInstruction*)last)->is_sync() && prev == last) { ++ code()->set_last_insn(NULL); ++ NativeMembar *membar = (NativeMembar*)prev; ++ // merged membar ++ // e.g. LoadLoad and LoadLoad|LoadStore to LoadLoad|LoadStore ++ membar->set_hint(membar->get_hint() & (~hint & 0xF)); ++ block_comment("merged membar"); ++ } else { ++ code()->set_last_insn(pc()); ++ Assembler::membar(hint); ++ } ++} ++ ++/** ++ * Emits code to update CRC-32 with a byte value according to constants in table ++ * ++ * @param [in,out]crc Register containing the crc. ++ * @param [in]val Register containing the byte to fold into the CRC. ++ * @param [in]table Register containing the table of crc constants. ++ * ++ * uint32_t crc; ++ * val = crc_table[(val ^ crc) & 0xFF]; ++ * crc = val ^ (crc >> 8); ++**/ ++void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { ++ xorr(val, val, crc); ++ andi(val, val, 0xff); ++ ld_w(val, Address(table, val, Address::times_4, 0)); ++ srli_w(crc, crc, 8); ++ xorr(crc, val, crc); ++} ++ ++/** ++ * @param crc register containing existing CRC (32-bit) ++ * @param buf register pointing to input byte buffer (byte*) ++ * @param len register containing number of bytes ++ * @param tmp scratch register ++**/ ++void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register tmp) { ++ Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit; ++ assert_different_registers(crc, buf, len, tmp); ++ ++ nor(crc, crc, R0); ++ ++ addi_d(len, len, -64); ++ bge(len, R0, CRC_by64_loop); ++ addi_d(len, len, 64-4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ blt(R0, len, CRC_by1_loop); ++ b(L_exit); ++ ++ bind(CRC_by64_loop); ++ ld_d(tmp, buf, 0); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 8); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 16); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 24); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 32); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 40); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 48); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 56); ++ crc_w_d_w(crc, tmp, crc); ++ addi_d(buf, buf, 64); ++ addi_d(len, len, -64); ++ bge(len, R0, CRC_by64_loop); ++ addi_d(len, len, 64-4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ blt(R0, len, CRC_by1_loop); ++ b(L_exit); ++ ++ bind(CRC_by4_loop); ++ ld_w(tmp, buf, 0); ++ crc_w_w_w(crc, tmp, crc); ++ addi_d(buf, buf, 4); ++ addi_d(len, len, -4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ bge(R0, len, L_exit); ++ ++ bind(CRC_by1_loop); ++ ld_b(tmp, buf, 0); ++ crc_w_b_w(crc, tmp, crc); ++ addi_d(buf, buf, 1); ++ addi_d(len, len, -1); ++ blt(R0, len, CRC_by1_loop); ++ ++ bind(L_exit); ++ nor(crc, crc, R0); ++} ++ ++/** ++ * @param crc register containing existing CRC (32-bit) ++ * @param buf register pointing to input byte buffer (byte*) ++ * @param len register containing number of bytes ++ * @param tmp scratch register ++**/ ++void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len, Register tmp) { ++ Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit; ++ assert_different_registers(crc, buf, len, tmp); ++ ++ addi_d(len, len, -64); ++ bge(len, R0, CRC_by64_loop); ++ addi_d(len, len, 64-4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ blt(R0, len, CRC_by1_loop); ++ b(L_exit); ++ ++ bind(CRC_by64_loop); ++ ld_d(tmp, buf, 0); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 8); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 16); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 24); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 32); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 40); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 48); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 56); ++ crcc_w_d_w(crc, tmp, crc); ++ addi_d(buf, buf, 64); ++ addi_d(len, len, -64); ++ bge(len, R0, CRC_by64_loop); ++ addi_d(len, len, 64-4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ blt(R0, len, CRC_by1_loop); ++ b(L_exit); ++ ++ bind(CRC_by4_loop); ++ ld_w(tmp, buf, 0); ++ crcc_w_w_w(crc, tmp, crc); ++ addi_d(buf, buf, 4); ++ addi_d(len, len, -4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ bge(R0, len, L_exit); ++ ++ bind(CRC_by1_loop); ++ ld_b(tmp, buf, 0); ++ crcc_w_b_w(crc, tmp, crc); ++ addi_d(buf, buf, 1); ++ addi_d(len, len, -1); ++ blt(R0, len, CRC_by1_loop); ++ ++ bind(L_exit); ++} ++ ++// This method checks if provided byte array contains byte with highest bit set. ++void MacroAssembler::has_negatives(Register ary1, Register len, Register result) { ++ Label Loop, End, Nega, Done; ++ ++ orr(result, R0, R0); ++ bge(R0, len, Done); ++ ++ li(AT, 0x8080808080808080); ++ ++ addi_d(len, len, -8); ++ blt(len, R0, End); ++ ++ bind(Loop); ++ ld_d(result, ary1, 0); ++ andr(result, result, AT); ++ bnez(result, Nega); ++ beqz(len, Done); ++ addi_d(len, len, -8); ++ addi_d(ary1, ary1, 8); ++ bge(len, R0, Loop); ++ ++ bind(End); ++ ld_d(result, ary1, 0); ++ slli_d(len, len, 3); ++ sub_d(len, R0, len); ++ sll_d(result, result, len); ++ andr(result, result, AT); ++ beqz(result, Done); ++ ++ bind(Nega); ++ ori(result, R0, 1); ++ ++ bind(Done); ++} ++ ++// Compress char[] to byte[]. len must be positive int. ++// jtreg: TestStringIntrinsicRangeChecks.java ++void MacroAssembler::char_array_compress(Register src, Register dst, ++ Register len, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3) { ++ Label Loop, Done, Once, Fail; ++ ++ move(result, len); ++ bge(R0, result, Done); ++ ++ srli_w(AT, len, 2); ++ andi(len, len, 3); ++ ++ li(tmp3, 0xff00ff00ff00ff00); ++ ++ bind(Loop); ++ beqz(AT, Once); ++ ld_d(tmp1, src, 0); ++ andr(tmp2, tmp3, tmp1); // not latin-1, stop here ++ bnez(tmp2, Fail); ++ ++ // 0x00a100b200c300d4 -> 0x00000000a1b2c3d4 ++ srli_d(tmp2, tmp1, 8); ++ orr(tmp2, tmp2, tmp1); // 0x00a1a1b2b2c3c3d4 ++ bstrpick_d(tmp1, tmp2, 47, 32); // 0x0000a1b2 ++ slli_d(tmp1, tmp1, 16); // 0xa1b20000 ++ bstrins_d(tmp1, tmp2, 15, 0); // 0xa1b2c3d4 ++ ++ st_w(tmp1, dst, 0); ++ addi_w(AT, AT, -1); ++ addi_d(dst, dst, 4); ++ addi_d(src, src, 8); ++ b(Loop); ++ ++ bind(Once); ++ beqz(len, Done); ++ ld_d(AT, src, 0); ++ ++ bstrpick_d(tmp1, AT, 15, 0); ++ andr(tmp2, tmp3, tmp1); ++ bnez(tmp2, Fail); ++ st_b(tmp1, dst, 0); ++ addi_w(len, len, -1); ++ ++ beqz(len, Done); ++ bstrpick_d(tmp1, AT, 31, 16); ++ andr(tmp2, tmp3, tmp1); ++ bnez(tmp2, Fail); ++ st_b(tmp1, dst, 1); ++ addi_w(len, len, -1); ++ ++ beqz(len, Done); ++ bstrpick_d(tmp1, AT, 47, 32); ++ andr(tmp2, tmp3, tmp1); ++ bnez(tmp2, Fail); ++ st_b(tmp1, dst, 2); ++ b(Done); ++ ++ bind(Fail); ++ move(result, R0); ++ ++ bind(Done); ++} ++ ++// Inflate byte[] to char[]. len must be positive int. ++// jtreg:test/jdk/sun/nio/cs/FindDecoderBugs.java ++void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len, ++ Register tmp1, Register tmp2) { ++ Label Loop, Once, Done; ++ ++ bge(R0, len, Done); ++ ++ srli_w(AT, len, 2); ++ andi(len, len, 3); ++ ++ bind(Loop); ++ beqz(AT, Once); ++ ld_wu(tmp1, src, 0); ++ ++ // 0x00000000a1b2c3d4 -> 0x00a100b200c300d4 ++ bstrpick_d(tmp2, tmp1, 7, 0); ++ srli_d(tmp1, tmp1, 8); ++ bstrins_d(tmp2, tmp1, 23, 16); ++ srli_d(tmp1, tmp1, 8); ++ bstrins_d(tmp2, tmp1, 39, 32); ++ srli_d(tmp1, tmp1, 8); ++ bstrins_d(tmp2, tmp1, 55, 48); ++ ++ st_d(tmp2, dst, 0); ++ addi_w(AT, AT, -1); ++ addi_d(dst, dst, 8); ++ addi_d(src, src, 4); ++ b(Loop); ++ ++ bind(Once); ++ beqz(len, Done); ++ ld_wu(tmp1, src, 0); ++ ++ bstrpick_d(tmp2, tmp1, 7, 0); ++ st_h(tmp2, dst, 0); ++ addi_w(len, len, -1); ++ ++ beqz(len, Done); ++ bstrpick_d(tmp2, tmp1, 15, 8); ++ st_h(tmp2, dst, 2); ++ addi_w(len, len, -1); ++ ++ beqz(len, Done); ++ bstrpick_d(tmp2, tmp1, 23, 16); ++ st_h(tmp2, dst, 4); ++ ++ bind(Done); ++} ++ ++// Intrinsic for ++// ++// - java.lang.StringCoding::implEncodeISOArray ++// - java.lang.StringCoding::implEncodeAsciiArray ++// ++// This version always returns the number of characters copied. ++void MacroAssembler::encode_iso_array(Register src, Register dst, ++ Register len, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3, bool ascii) { ++ Label Loop, Done, Once; ++ ++ move(result, R0); // init in case of bad value ++ bge(R0, len, Done); ++ ++ srai_w(AT, len, 2); ++ ++ li(tmp3, ascii ? 0xff80ff80ff80ff80 : 0xff00ff00ff00ff00); ++ ++ bind(Loop); ++ beqz(AT, Once); ++ ld_d(tmp1, src, 0); ++ andr(tmp2, tmp3, tmp1); // not latin-1, stop here ++ bnez(tmp2, Once); ++ ++ // 0x00a100b200c300d4 -> 0x00000000a1b2c3d4 ++ srli_d(tmp2, tmp1, 8); ++ orr(tmp2, tmp2, tmp1); // 0x00a1a1b2b2c3c3d4 ++ bstrpick_d(tmp1, tmp2, 47, 32); // 0x0000a1b2 ++ slli_d(tmp1, tmp1, 16); // 0xa1b20000 ++ bstrins_d(tmp1, tmp2, 15, 0); // 0xa1b2c3d4 ++ ++ stx_w(tmp1, dst, result); ++ addi_w(AT, AT, -1); ++ addi_d(src, src, 8); ++ addi_w(result, result, 4); ++ b(Loop); ++ ++ bind(Once); ++ beq(len, result, Done); ++ ld_hu(tmp1, src, 0); ++ andr(tmp2, tmp3, tmp1); // not latin-1, stop here ++ bnez(tmp2, Done); ++ stx_b(tmp1, dst, result); ++ addi_d(src, src, 2); ++ addi_w(result, result, 1); ++ b(Once); ++ ++ bind(Done); ++} ++ ++// Code for BigInteger::mulAdd intrinsic ++// out = A0 ++// in = A1 ++// offset = A2 (already out.length-offset) ++// len = A3 ++// k = A4 ++// ++// pseudo code from java implementation: ++// long kLong = k & LONG_MASK; ++// carry = 0; ++// offset = out.length-offset - 1; ++// for (int j = len - 1; j >= 0; j--) { ++// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; ++// out[offset--] = (int)product; ++// carry = product >>> 32; ++// } ++// return (int)carry; ++void MacroAssembler::mul_add(Register out, Register in, Register offset, ++ Register len, Register k) { ++ Label L_tail_loop, L_unroll, L_end; ++ ++ move(SCR2, out); ++ move(out, R0); // should clear out ++ bge(R0, len, L_end); ++ ++ alsl_d(offset, offset, SCR2, LogBytesPerInt - 1); ++ alsl_d(in, len, in, LogBytesPerInt - 1); ++ ++ const int unroll = 16; ++ li(SCR2, unroll); ++ blt(len, SCR2, L_tail_loop); ++ ++ bind(L_unroll); ++ ++ addi_d(in, in, -unroll * BytesPerInt); ++ addi_d(offset, offset, -unroll * BytesPerInt); ++ ++ for (int i = unroll - 1; i >= 0; i--) { ++ ld_wu(SCR1, in, i * BytesPerInt); ++ mulw_d_wu(SCR1, SCR1, k); ++ add_d(out, out, SCR1); // out as scratch ++ ld_wu(SCR1, offset, i * BytesPerInt); ++ add_d(SCR1, SCR1, out); ++ st_w(SCR1, offset, i * BytesPerInt); ++ srli_d(out, SCR1, 32); // keep carry ++ } ++ ++ sub_w(len, len, SCR2); ++ bge(len, SCR2, L_unroll); ++ ++ bge(R0, len, L_end); // check tail ++ ++ bind(L_tail_loop); ++ ++ addi_d(in, in, -BytesPerInt); ++ ld_wu(SCR1, in, 0); ++ mulw_d_wu(SCR1, SCR1, k); ++ add_d(out, out, SCR1); // out as scratch ++ ++ addi_d(offset, offset, -BytesPerInt); ++ ld_wu(SCR1, offset, 0); ++ add_d(SCR1, SCR1, out); ++ st_w(SCR1, offset, 0); ++ ++ srli_d(out, SCR1, 32); // keep carry ++ ++ addi_w(len, len, -1); ++ blt(R0, len, L_tail_loop); ++ ++ bind(L_end); ++} ++ ++#ifndef PRODUCT ++void MacroAssembler::verify_cross_modify_fence_not_required() { ++ if (VerifyCrossModifyFence) { ++ // Check if thread needs a cross modify fence. ++ ld_bu(SCR1, Address(TREG, in_bytes(JavaThread::requires_cross_modify_fence_offset()))); ++ Label fence_not_required; ++ beqz(SCR1, fence_not_required); ++ // If it does then fail. ++ move(A0, TREG); ++ call(CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure)); ++ bind(fence_not_required); ++ } ++} ++#endif +diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp +new file mode 100644 +index 00000000000..c24d8a4712a +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp +@@ -0,0 +1,754 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/assembler.hpp" ++#include "runtime/rtmLocking.hpp" ++#include "utilities/macros.hpp" ++ ++// MacroAssembler extends Assembler by frequently used macros. ++// ++// Instructions for which a 'better' code sequence exists depending ++// on arguments should also go in here. ++ ++class MacroAssembler: public Assembler { ++ friend class LIR_Assembler; ++ friend class Runtime1; // as_Address() ++ ++ public: ++ // Compare code ++ typedef enum { ++ EQ = 0x01, ++ NE = 0x02, ++ GT = 0x03, ++ GE = 0x04, ++ LT = 0x05, ++ LE = 0x06 ++ } CMCompare; ++ ++ public: ++ // Support for VM calls ++ // ++ // This is the base routine called by the different versions of call_VM_leaf. The interpreter ++ // may customize this version by overriding it for its purposes (e.g., to save/restore ++ // additional registers when doing a VM call). ++ #define VIRTUAL virtual ++ ++ VIRTUAL void call_VM_leaf_base( ++ address entry_point, // the entry point ++ int number_of_arguments // the number of arguments to pop after the call ++ ); ++ ++ protected: ++ // This is the base routine called by the different versions of call_VM. The interpreter ++ // may customize this version by overriding it for its purposes (e.g., to save/restore ++ // additional registers when doing a VM call). ++ // ++ // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base ++ // returns the register which contains the thread upon return. If a thread register has been ++ // specified, the return value will correspond to that register. If no last_java_sp is specified ++ // (noreg) than sp will be used instead. ++ VIRTUAL void call_VM_base( // returns the register containing the thread upon return ++ Register oop_result, // where an oop-result ends up if any; use noreg otherwise ++ Register java_thread, // the thread if computed before ; use noreg otherwise ++ Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise ++ address entry_point, // the entry point ++ int number_of_arguments, // the number of arguments (w/o thread) to pop after the call ++ bool check_exceptions // whether to check for pending exceptions after return ++ ); ++ ++ void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); ++ ++ // helpers for FPU flag access ++ // tmp is a temporary register, if none is available use noreg ++ ++ public: ++ MacroAssembler(CodeBuffer* code) : Assembler(code) {} ++ ++ // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. ++ // The implementation is only non-empty for the InterpreterMacroAssembler, ++ // as only the interpreter handles PopFrame and ForceEarlyReturn requests. ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); ++ ++ Address as_Address(AddressLiteral adr); ++ Address as_Address(ArrayAddress adr); ++ ++ static intptr_t i[32]; ++ static float f[32]; ++ static void print(outputStream *s); ++ ++ static int i_offset(unsigned int k); ++ static int f_offset(unsigned int k); ++ ++ static void save_registers(MacroAssembler *masm); ++ static void restore_registers(MacroAssembler *masm); ++ ++ // Support for NULL-checks ++ // ++ // Generates code that causes a NULL OS exception if the content of reg is NULL. ++ // If the accessed location is M[reg + offset] and the offset is known, provide the ++ // offset. No explicit code generation is needed if the offset is within a certain ++ // range (0 <= offset <= page_size). ++ ++ void null_check(Register reg, int offset = -1); ++ static bool needs_explicit_null_check(intptr_t offset); ++ static bool uses_implicit_null_check(void* address); ++ ++ // Required platform-specific helpers for Label::patch_instructions. ++ // They _shadow_ the declarations in AbstractAssembler, which are undefined. ++ static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0); ++ ++ address emit_trampoline_stub(int insts_call_instruction_offset, address target); ++ ++ // Support for inc/dec with optimal instruction selection depending on value ++ // void incrementl(Register reg, int value = 1); ++ // void decrementl(Register reg, int value = 1); ++ ++ ++ // Alignment ++ void align(int modulus); ++ ++ ++ // Stack frame creation/removal ++ void enter(); ++ void leave(); ++ ++ // Frame creation and destruction shared between JITs. ++ void build_frame(int framesize); ++ void remove_frame(int framesize); ++ ++ // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) ++ // The pointer will be loaded into the thread register. ++ void get_thread(Register thread); ++ ++ ++ // Support for VM calls ++ // ++ // It is imperative that all calls into the VM are handled via the call_VM macros. ++ // They make sure that the stack linkage is setup correctly. call_VM's correspond ++ // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. ++ ++ ++ void call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ // Overloadings with last_Java_sp ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments = 0, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, bool ++ check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ void get_vm_result (Register oop_result, Register thread); ++ void get_vm_result_2(Register metadata_result, Register thread); ++ void call_VM_leaf(address entry_point, ++ int number_of_arguments = 0); ++ void call_VM_leaf(address entry_point, ++ Register arg_1); ++ void call_VM_leaf(address entry_point, ++ Register arg_1, Register arg_2); ++ void call_VM_leaf(address entry_point, ++ Register arg_1, Register arg_2, Register arg_3); ++ ++ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls ++ void super_call_VM_leaf(address entry_point); ++ void super_call_VM_leaf(address entry_point, Register arg_1); ++ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); ++ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); ++ ++ // last Java Frame (fills frame anchor) ++ void set_last_Java_frame(Register thread, ++ Register last_java_sp, ++ Register last_java_fp, ++ Label& last_java_pc); ++ ++ // thread in the default location (S6) ++ void set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ Label& last_java_pc); ++ ++ void set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ Register last_java_pc); ++ ++ void reset_last_Java_frame(Register thread, bool clear_fp); ++ ++ // thread in the default location (S6) ++ void reset_last_Java_frame(bool clear_fp); ++ ++ // jobjects ++ void clear_jweak_tag(Register possibly_jweak); ++ void resolve_jobject(Register value, Register thread, Register tmp); ++ ++ // C 'boolean' to Java boolean: x == 0 ? 0 : 1 ++ void c2bool(Register x); ++ ++ void resolve_weak_handle(Register result, Register tmp); ++ void resolve_oop_handle(Register result, Register tmp); ++ void load_mirror(Register dst, Register method, Register tmp); ++ ++ void load_method_holder_cld(Register rresult, Register rmethod); ++ void load_method_holder(Register holder, Register method); ++ ++ // oop manipulations ++ void load_klass(Register dst, Register src); ++ void store_klass(Register dst, Register src); ++ ++ void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, ++ Register tmp1, Register thread_tmp); ++ void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, ++ Register tmp1, Register tmp2); ++ ++ void load_heap_oop(Register dst, Address src, Register tmp1, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, ++ Register tmp2 = noreg, DecoratorSet decorators = 0); ++ ++ // Used for storing NULL. All other oop constants should be ++ // stored using routines that take a jobject. ++ void store_heap_oop_null(Address dst); ++ ++ void load_prototype_header(Register dst, Register src); ++ ++ void store_klass_gap(Register dst, Register src); ++ ++ void encode_heap_oop(Register r); ++ void encode_heap_oop(Register dst, Register src); ++ void decode_heap_oop(Register r); ++ void decode_heap_oop(Register dst, Register src); ++ void encode_heap_oop_not_null(Register r); ++ void decode_heap_oop_not_null(Register r); ++ void encode_heap_oop_not_null(Register dst, Register src); ++ void decode_heap_oop_not_null(Register dst, Register src); ++ ++ void encode_klass_not_null(Register r); ++ void decode_klass_not_null(Register r); ++ void encode_klass_not_null(Register dst, Register src); ++ void decode_klass_not_null(Register dst, Register src); ++ ++ // if heap base register is used - reinit it with the correct value ++ void reinit_heapbase(); ++ ++ DEBUG_ONLY(void verify_heapbase(const char* msg);) ++ ++ void set_narrow_klass(Register dst, Klass* k); ++ void set_narrow_oop(Register dst, jobject obj); ++ ++ // Sign extension ++ void sign_extend_short(Register reg) { ext_w_h(reg, reg); } ++ void sign_extend_byte(Register reg) { ext_w_b(reg, reg); } ++ void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); ++ void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); ++ ++ // allocation ++ void eden_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ void tlab_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ void incr_allocated_bytes(Register thread, ++ Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1 = noreg); ++ // interface method calling ++ void lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_temp, ++ Label& no_such_interface, ++ bool return_method = true); ++ ++ // virtual method calling ++ void lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result); ++ ++ // Test sub_klass against super_klass, with fast and slow paths. ++ ++ // The fast path produces a tri-state answer: yes / no / maybe-slow. ++ // One of the three labels can be NULL, meaning take the fall-through. ++ // If super_check_offset is -1, the value is loaded up from super_klass. ++ // No registers are killed, except temp_reg. ++ void check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); ++ ++ // The rest of the type check; must be wired to a corresponding fast path. ++ // It does not repeat the fast path logic, so don't use it standalone. ++ // The temp_reg and temp2_reg can be noreg, if no temps are available. ++ // Updates the sub's secondary super cache as necessary. ++ // If set_cond_codes, condition codes will be Z on success, NZ on failure. ++ void check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Register temp2_reg, ++ Label* L_success, ++ Label* L_failure, ++ bool set_cond_codes = false); ++ ++ // Simplified, combined version, good for typical uses. ++ // Falls through on failure. ++ void check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label& L_success); ++ ++ void clinit_barrier(Register klass, ++ Register scratch, ++ Label* L_fast_path = NULL, ++ Label* L_slow_path = NULL); ++ ++ ++ // Debugging ++ ++ // only if +VerifyOops ++ void verify_oop(Register reg, const char* s = "broken oop"); ++ void verify_oop_addr(Address addr, const char * s = "broken oop addr"); ++ void verify_oop_subroutine(); ++ // TODO: verify method and klass metadata (compare against vptr?) ++ void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} ++ void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} ++ ++ #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) ++ #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) ++ ++ // only if +VerifyFPU ++ void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); ++ ++ // prints msg, dumps registers and stops execution ++ void stop(const char* msg); ++ ++ static void debug(char* msg/*, RegistersForDebugging* regs*/); ++ static void debug64(char* msg, int64_t pc, int64_t regs[]); ++ ++ void untested() { stop("untested"); } ++ ++ void unimplemented(const char* what = ""); ++ ++ void should_not_reach_here() { stop("should not reach here"); } ++ ++ void print_CPU_state(); ++ ++ // Stack overflow checking ++ void bang_stack_with_offset(int offset) { ++ // stack grows down, caller passes positive offset ++ assert(offset > 0, "must bang with negative offset"); ++ if (offset <= 2048) { ++ st_w(RA0, SP, -offset); ++ } else if (offset <= 32768 && !(offset & 3)) { ++ stptr_w(RA0, SP, -offset); ++ } else { ++ li(AT, offset); ++ sub_d(AT, SP, AT); ++ st_w(RA0, AT, 0); ++ } ++ } ++ ++ // Writes to stack successive pages until offset reached to check for ++ // stack overflow + shadow pages. Also, clobbers tmp ++ void bang_stack_size(Register size, Register tmp); ++ ++ // Check for reserved stack access in method being exited (for JIT) ++ void reserved_stack_check(); ++ ++ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset); ++ ++ void safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool acquire, bool in_nmethod); ++ ++ //void verify_tlab(); ++ void verify_tlab(Register t1, Register t2); ++ ++ // Biased locking support ++ // lock_reg and obj_reg must be loaded up with the appropriate values. ++ // tmp_reg is optional. If it is supplied (i.e., != noreg) it will ++ // be killed; if not supplied, push/pop will be used internally to ++ // allocate a temporary (inefficient, avoid if possible). ++ // Optional slow case is for implementations (interpreter and C1) which branch to ++ // slow case directly. Leaves condition codes set for C2's Fast_Lock node. ++ // Returns offset of first potentially-faulting instruction for null ++ // check info (currently consumed only by C1). If ++ // swap_reg_contains_mark is true then returns -1 as it is assumed ++ // the calling code has already passed any potential faults. ++ void biased_locking_enter(Register lock_reg, Register obj_reg, ++ Register swap_reg, Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, Label* slow_case = NULL, ++ BiasedLockingCounters* counters = NULL); ++ void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); ++ ++ // the follow two might use AT register, be sure you have no meanful data in AT before you call them ++ void increment(Register reg, int imm); ++ void decrement(Register reg, int imm); ++ void increment(Address addr, int imm = 1); ++ void decrement(Address addr, int imm = 1); ++ void shl(Register reg, int sa) { slli_d(reg, reg, sa); } ++ void shr(Register reg, int sa) { srli_d(reg, reg, sa); } ++ void sar(Register reg, int sa) { srai_d(reg, reg, sa); } ++ // Helper functions for statistics gathering. ++ void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2); ++ ++ // Calls ++ void call(address entry); ++ void call(address entry, relocInfo::relocType rtype); ++ void call(address entry, RelocationHolder& rh); ++ void call_long(address entry); ++ ++ address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL); ++ ++ static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M); ++ ++ static bool far_branches() { ++ if (ForceUnreachable) { ++ return true; ++ } else { ++ return ReservedCodeCacheSize > branch_range; ++ } ++ } ++ ++ // Emit the CompiledIC call idiom ++ address ic_call(address entry, jint method_index = 0); ++ ++ // Jumps ++ void jmp(address entry); ++ void jmp(address entry, relocInfo::relocType rtype); ++ void jmp_far(Label& L); // patchable ++ ++ /* branches may exceed 16-bit offset */ ++ void b_far(address entry); ++ void b_far(Label& L); ++ ++ void bne_far (Register rs, Register rt, address entry); ++ void bne_far (Register rs, Register rt, Label& L); ++ ++ void beq_far (Register rs, Register rt, address entry); ++ void beq_far (Register rs, Register rt, Label& L); ++ ++ void blt_far (Register rs, Register rt, address entry, bool is_signed); ++ void blt_far (Register rs, Register rt, Label& L, bool is_signed); ++ ++ void bge_far (Register rs, Register rt, address entry, bool is_signed); ++ void bge_far (Register rs, Register rt, Label& L, bool is_signed); ++ ++ static bool patchable_branches() { ++ const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M); ++ return ReservedCodeCacheSize > branch_range; ++ } ++ ++ static bool reachable_from_branch_short(jlong offs); ++ ++ void patchable_jump_far(Register ra, jlong offs); ++ void patchable_jump(address target, bool force_patchable = false); ++ void patchable_call(address target, address call_size = 0); ++ ++ // Floating ++ void generate_dsin_dcos(bool isCos, address npio2_hw, address two_over_pi, ++ address pio2, address dsin_coef, address dcos_coef); ++ ++ // Data ++ ++ // Load and store values by size and signed-ness ++ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); ++ void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); ++ ++ // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs ++ inline void ld_ptr(Register rt, Address a) { ++ ld_d(rt, a); ++ } ++ ++ inline void ld_ptr(Register rt, Register base, int offset16) { ++ ld_d(rt, base, offset16); ++ } ++ ++ // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs ++ inline void st_ptr(Register rt, Address a) { ++ st_d(rt, a); ++ } ++ ++ inline void st_ptr(Register rt, Register base, int offset16) { ++ st_d(rt, base, offset16); ++ } ++ ++ void ld_ptr(Register rt, Register base, Register offset); ++ void st_ptr(Register rt, Register base, Register offset); ++ ++ // swap the two byte of the low 16-bit halfword ++ void bswap_h(Register dst, Register src); ++ void bswap_hu(Register dst, Register src); ++ ++ // convert big endian integer to little endian integer ++ void bswap_w(Register dst, Register src); ++ ++ void cmpxchg(Address addr, Register oldval, Register newval, Register resflag, ++ bool retold, bool barrier, bool weak = false, bool exchange = false); ++ void cmpxchg(Address addr, Register oldval, Register newval, Register tmp, ++ bool retold, bool barrier, Label& succ, Label* fail = nullptr); ++ void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag, ++ bool sign, bool retold, bool barrier, bool weak = false, bool exchange = false); ++ void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, ++ bool sign, bool retold, bool barrier, Label& succ, Label* fail = nullptr); ++ ++ void extend_sign(Register rh, Register rl) { /*stop("extend_sign");*/ guarantee(0, "LA not implemented yet");} ++ void neg(Register reg) { /*dsubu(reg, R0, reg);*/ guarantee(0, "LA not implemented yet");} ++ void push (Register reg) { addi_d(SP, SP, -8); st_d (reg, SP, 0); } ++ void push (FloatRegister reg) { addi_d(SP, SP, -8); fst_d (reg, SP, 0); } ++ void pop (Register reg) { ld_d (reg, SP, 0); addi_d(SP, SP, 8); } ++ void pop (FloatRegister reg) { fld_d (reg, SP, 0); addi_d(SP, SP, 8); } ++ void pop () { addi_d(SP, SP, 8); } ++ void pop2 () { addi_d(SP, SP, 16); } ++ void push2(Register reg1, Register reg2); ++ void pop2 (Register reg1, Register reg2); ++ // Push and pop everything that might be clobbered by a native ++ // runtime call except SCR1 and SCR2. (They are always scratch, ++ // so we don't have to protect them.) Only save the lower 64 bits ++ // of each vector register. Additional registers can be excluded ++ // in a passed RegSet. ++ void push_call_clobbered_registers_except(RegSet exclude); ++ void pop_call_clobbered_registers_except(RegSet exclude); ++ ++ void push_call_clobbered_registers() { ++ push_call_clobbered_registers_except(RegSet()); ++ } ++ void pop_call_clobbered_registers() { ++ pop_call_clobbered_registers_except(RegSet()); ++ } ++ void push(RegSet regs) { if (regs.bits()) push(regs.bits()); } ++ void pop(RegSet regs) { if (regs.bits()) pop(regs.bits()); } ++ void push_fpu(FloatRegSet regs) { if (regs.bits()) push_fpu(regs.bits()); } ++ void pop_fpu(FloatRegSet regs) { if (regs.bits()) pop_fpu(regs.bits()); } ++ void push_vp(FloatRegSet regs) { if (regs.bits()) push_vp(regs.bits()); } ++ void pop_vp(FloatRegSet regs) { if (regs.bits()) pop_vp(regs.bits()); } ++ ++ void li(Register rd, jlong value); ++ void li(Register rd, address addr) { li(rd, (long)addr); } ++ void patchable_li52(Register rd, jlong value); ++ void lipc(Register rd, Label& L); ++ ++ void move(Register rd, Register rs) { orr(rd, rs, R0); } ++ void move_u32(Register rd, Register rs) { add_w(rd, rs, R0); } ++ void mov_metadata(Register dst, Metadata* obj); ++ void mov_metadata(Address dst, Metadata* obj); ++ ++ // Load the base of the cardtable byte map into reg. ++ void load_byte_map_base(Register reg); ++ ++ // method handles (JSR 292) ++ Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); ++ ++ ++ // LA added: ++ void jr (Register reg) { jirl(R0, reg, 0); } ++ void jalr(Register reg) { jirl(RA, reg, 0); } ++ void nop () { andi(R0, R0, 0); } ++ void andr(Register rd, Register rj, Register rk) { AND(rd, rj, rk); } ++ void xorr(Register rd, Register rj, Register rk) { XOR(rd, rj, rk); } ++ void orr (Register rd, Register rj, Register rk) { OR(rd, rj, rk); } ++ void lea (Register rd, Address src); ++ void lea(Register dst, AddressLiteral adr); ++ static int patched_branch(int dest_pos, int inst, int inst_pos); ++ ++ // Conditional move ++ void cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src1, ++ Register src2, ++ CMCompare cmp = EQ, ++ bool is_signed = true); ++ void cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src, ++ CMCompare cmp = EQ, ++ bool is_signed = true); ++ void cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ Register dst, ++ Register src, ++ FloatRegister tmp1, ++ FloatRegister tmp2, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ void cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ void cmp_cmov(Register op1, ++ Register op2, ++ FloatRegister dst, ++ FloatRegister src, ++ FloatRegister tmp1, ++ FloatRegister tmp2, ++ CMCompare cmp = EQ); ++ ++ void membar(Membar_mask_bits hint); ++ ++ void bind(Label& L) { ++ Assembler::bind(L); ++ code()->clear_last_insn(); ++ } ++ ++ // CRC32 code for java.util.zip.CRC32::update() instrinsic. ++ void update_byte_crc32(Register crc, Register val, Register table); ++ ++ // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic. ++ void kernel_crc32(Register crc, Register buf, Register len, Register tmp); ++ ++ // CRC32C code for java.util.zip.CRC32C::updateBytes() instrinsic. ++ void kernel_crc32c(Register crc, Register buf, Register len, Register tmp); ++ ++ // Code for java.lang.StringCoding::hasNegatives() instrinsic. ++ void has_negatives(Register ary1, Register len, Register result); ++ ++ // Code for java.lang.StringUTF16::compress intrinsic. ++ void char_array_compress(Register src, Register dst, Register len, ++ Register result, Register tmp1, ++ Register tmp2, Register tmp3); ++ ++ // Code for java.lang.StringLatin1::inflate intrinsic. ++ void byte_array_inflate(Register src, Register dst, Register len, ++ Register tmp1, Register tmp2); ++ ++ // Encode UTF16 to ISO_8859_1 or ASCII. ++ // Return len on success or position of first mismatch. ++ void encode_iso_array(Register src, Register dst, ++ Register len, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3, bool ascii); ++ ++ // Code for java.math.BigInteger::mulAdd intrinsic. ++ void mul_add(Register out, Register in, Register offset, ++ Register len, Register k); ++ ++ void movoop(Register dst, jobject obj, bool immediate = false); ++ ++#undef VIRTUAL ++ ++private: ++ void push(unsigned int bitset); ++ void pop(unsigned int bitset); ++ void push_fpu(unsigned int bitset); ++ void pop_fpu(unsigned int bitset); ++ void push_vp(unsigned int bitset); ++ void pop_vp(unsigned int bitset); ++ ++ // Check the current thread doesn't need a cross modify fence. ++ void verify_cross_modify_fence_not_required() PRODUCT_RETURN; ++ void generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef); ++ void generate_kernel_cos(FloatRegister x, address dcos_coef); ++ void generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2); ++ void generate__kernel_rem_pio2(address two_over_pi, address pio2); ++}; ++ ++/** ++ * class SkipIfEqual: ++ * ++ * Instantiating this class will result in assembly code being output that will ++ * jump around any code emitted between the creation of the instance and it's ++ * automatic destruction at the end of a scope block, depending on the value of ++ * the flag passed to the constructor, which will be checked at run-time. ++ */ ++class SkipIfEqual { ++private: ++ MacroAssembler* _masm; ++ Label _label; ++ ++public: ++ inline SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) ++ : _masm(masm) { ++ _masm->li(AT, (address)flag_addr); ++ _masm->ld_b(AT, AT, 0); ++ if (value) { ++ _masm->bne(AT, R0, _label); ++ } else { ++ _masm->beq(AT, R0, _label); ++ } ++ } ++ ++ ~SkipIfEqual(); ++}; ++ ++#ifdef ASSERT ++inline bool AbstractAssembler::pd_check_instruction_mark() { return true; } ++#endif ++ ++struct tableswitch { ++ Register _reg; ++ int _insn_index; jint _first_key; jint _last_key; ++ Label _after; ++ Label _branches; ++}; ++ ++#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp +new file mode 100644 +index 00000000000..49302590c37 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp +@@ -0,0 +1,34 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/codeBuffer.hpp" ++#include "code/codeCache.hpp" ++ ++#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP +diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp +new file mode 100644 +index 00000000000..63b5b0da7e7 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp +@@ -0,0 +1,1633 @@ ++/* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT) ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "macroAssembler_loongarch.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++// The following code is a optimized version of fdlibm sin/cos implementation ++// (C code is in share/runtime/sharedRuntimeTrig.cpp) adapted for LOONGARCH64. ++ ++// Please refer to sin/cos approximation via polynomial and ++// trigonometric argument reduction techniques to the following literature: ++// ++// [1] Muller, Jean-Michel, Nicolas Brisebarre, Florent De Dinechin, ++// Claude-Pierre Jeannerod, Vincent Lefevre, Guillaume Melquiond, ++// Nathalie Revol, Damien Stehlé, and Serge Torres: ++// Handbook of floating-point arithmetic. ++// Springer Science & Business Media, 2009. ++// [2] K. C. Ng ++// Argument Reduction for Huge Arguments: Good to the Last Bit ++// July 13, 1992, SunPro ++// ++// HOW TO READ THIS CODE: ++// This code consists of several functions. Each function has following header: ++// 1) Description ++// 2) C-pseudo code with differences from fdlibm marked by comments starting ++// with "NOTE". Check unmodified fdlibm code in ++// share/runtime/SharedRuntimeTrig.cpp ++// 3) Brief textual description of changes between fdlibm and current ++// implementation along with optimization notes (if applicable) ++// 4) Assumptions, input and output ++// 5) (Optional) additional notes about intrinsic implementation ++// Each function is separated in blocks which follow the pseudo-code structure ++// ++// HIGH-LEVEL ALGORITHM DESCRIPTION: ++// - entry point: generate_dsin_dcos(...); ++// - check corner cases: NaN, INF, tiny argument. ++// - check if |x| < Pi/4. Then approximate sin/cos via polynomial (kernel_sin/kernel_cos) ++// -- else proceed to argument reduction routine (__ieee754_rem_pio2) and ++// use reduced argument to get result via kernel_sin/kernel_cos ++// ++// HIGH-LEVEL CHANGES BETWEEN INTRINSICS AND FDLIBM: ++// 1) two_over_pi table fdlibm representation is int[], while intrinsic version ++// has these int values converted to double representation to load converted ++// double values directly (see stubRoutines_aarch4::_two_over_pi) ++// 2) Several loops are unrolled and vectorized: see comments in code after ++// labels: SKIP_F_LOAD, RECOMP_FOR1_CHECK, RECOMP_FOR2 ++// 3) fdlibm npio2_hw table now has "prefix" with constants used in ++// calculation. These constants are loaded from npio2_hw table instead of ++// constructing it in code (see stubRoutines_loongarch64.cpp) ++// 4) Polynomial coefficients for sin and cos are moved to table sin_coef ++// and cos_coef to use the same optimization as in 3). It allows to load most of ++// required constants via single instruction ++// ++// ++// ++///* __ieee754_rem_pio2(x,y) ++// * ++// * returns the remainder of x rem pi/2 in y[0]+y[1] (i.e. like x div pi/2) ++// * x is input argument, y[] is hi and low parts of reduced argument (x) ++// * uses __kernel_rem_pio2() ++// */ ++// // use tables(see stubRoutines_loongarch64.cpp): two_over_pi and modified npio2_hw ++// ++// BEGIN __ieee754_rem_pio2 PSEUDO CODE ++// ++//static int __ieee754_rem_pio2(double x, double *y) { ++// double z,w,t,r,fn; ++// double tx[3]; ++// int e0,i,j,nx,n,ix,hx,i0; ++// ++// i0 = ((*(int*)&two24A)>>30)^1; /* high word index */ ++// hx = *(i0+(int*)&x); /* high word of x */ ++// ix = hx&0x7fffffff; ++// if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */ ++// if(hx>0) { ++// z = x - pio2_1; ++// if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ ++// y[0] = z - pio2_1t; ++// y[1] = (z-y[0])-pio2_1t; ++// } else { /* near pi/2, use 33+33+53 bit pi */ ++// z -= pio2_2; ++// y[0] = z - pio2_2t; ++// y[1] = (z-y[0])-pio2_2t; ++// } ++// return 1; ++// } else { /* negative x */ ++// z = x + pio2_1; ++// if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ ++// y[0] = z + pio2_1t; ++// y[1] = (z-y[0])+pio2_1t; ++// } else { /* near pi/2, use 33+33+53 bit pi */ ++// z += pio2_2; ++// y[0] = z + pio2_2t; ++// y[1] = (z-y[0])+pio2_2t; ++// } ++// return -1; ++// } ++// } ++// if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */ ++// t = fabsd(x); ++// n = (int) (t*invpio2+half); ++// fn = (double)n; ++// r = t-fn*pio2_1; ++// w = fn*pio2_1t; /* 1st round good to 85 bit */ ++// // NOTE: y[0] = r-w; is moved from if/else below to be before "if" ++// y[0] = r-w; ++// if(n<32&&ix!=npio2_hw[n-1]) { ++// // y[0] = r-w; /* quick check no cancellation */ // NOTE: moved earlier ++// } else { ++// j = ix>>20; ++// // y[0] = r-w; // NOTE: moved earlier ++// i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); ++// if(i>16) { /* 2nd iteration needed, good to 118 */ ++// t = r; ++// w = fn*pio2_2; ++// r = t-w; ++// w = fn*pio2_2t-((t-r)-w); ++// y[0] = r-w; ++// i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); ++// if(i>49) { /* 3rd iteration need, 151 bits acc */ ++// t = r; /* will cover all possible cases */ ++// w = fn*pio2_3; ++// r = t-w; ++// w = fn*pio2_3t-((t-r)-w); ++// y[0] = r-w; ++// } ++// } ++// } ++// y[1] = (r-y[0])-w; ++// if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} ++// else return n; ++// } ++// /* ++// * all other (large) arguments ++// */ ++// // NOTE: this check is removed, because it was checked in dsin/dcos ++// // if(ix>=0x7ff00000) { /* x is inf or NaN */ ++// // y[0]=y[1]=x-x; return 0; ++// // } ++// /* set z = scalbn(|x|,ilogb(x)-23) */ ++// *(1-i0+(int*)&z) = *(1-i0+(int*)&x); ++// e0 = (ix>>20)-1046; /* e0 = ilogb(z)-23; */ ++// *(i0+(int*)&z) = ix - (e0<<20); ++// ++// // NOTE: "for" loop below in unrolled. See comments in asm code ++// for(i=0;i<2;i++) { ++// tx[i] = (double)((int)(z)); ++// z = (z-tx[i])*two24A; ++// } ++// ++// tx[2] = z; ++// nx = 3; ++// ++// // NOTE: while(tx[nx-1]==zeroA) nx--; is unrolled. See comments in asm code ++// while(tx[nx-1]==zeroA) nx--; /* skip zero term */ ++// ++// n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi); ++// if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} ++// return n; ++//} ++// ++// END __ieee754_rem_pio2 PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic for __ieee754_rem_pio2: ++// 1. INF/NaN check for huge argument is removed in comparison with fdlibm ++// code, because this check is already done in dcos/dsin code ++// 2. Most constants are now loaded from table instead of direct initialization ++// 3. Two loops are unrolled ++// Assumptions: ++// 1. Assume |X| >= PI/4 ++// 2. Assume SCR1 = 0x3fe921fb00000000 (~ PI/4) ++// 3. Assume ix = A3 ++// Input and output: ++// 1. Input: X = A0 ++// 2. Return n in A2, y[0] == y0 == FA4, y[1] == y1 == FA5 ++// NOTE: general purpose register names match local variable names in C code ++// NOTE: fpu registers are actively reused. See comments in code about their usage ++void MacroAssembler::generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2) { ++ const int64_t PIO2_1t = 0x3DD0B4611A626331ULL; ++ const int64_t PIO2_2 = 0x3DD0B4611A600000ULL; ++ const int64_t PIO2_2t = 0x3BA3198A2E037073ULL; ++ Label X_IS_NEGATIVE, X_IS_MEDIUM_OR_LARGE, X_IS_POSITIVE_LONG_PI, LARGE_ELSE, ++ REDUCTION_DONE, X_IS_MEDIUM_BRANCH_DONE, X_IS_LARGE, NX_SET, ++ X_IS_NEGATIVE_LONG_PI; ++ Register X = A0, n = A2, ix = A3, jv = A4, tmp5 = A5, jx = A6, ++ tmp3 = A7, iqBase = T0, ih = T1, i = T2; ++ FloatRegister v0 = FA0, v1 = FA1, v2 = FA2, v3 = FA3, v4 = FA4, v5 = FA5, v6 = FA6, v7 = FA7, ++ vt = FT1, v24 = FT8, v26 = FT10, v27 = FT11, v28 = FT12, v29 = FT13, v31 = FT15; ++ ++ push2(S0, S1); ++ ++ // initializing constants first ++ li(SCR1, 0x3ff921fb54400000); // PIO2_1 ++ li(SCR2, 0x4002d97c); // 3*PI/4 high word ++ movgr2fr_d(v1, SCR1); // v1 = PIO2_1 ++ bge(ix, SCR2, X_IS_MEDIUM_OR_LARGE); ++ ++ block_comment("if(ix<0x4002d97c) {... /* |x| ~< 3pi/4 */ "); { ++ blt(X, R0, X_IS_NEGATIVE); ++ ++ block_comment("if(hx>0) {"); { ++ fsub_d(v2, v0, v1); // v2 = z = x - pio2_1 ++ srli_d(SCR1, SCR1, 32); ++ li(n, 1); ++ beq(ix, SCR1, X_IS_POSITIVE_LONG_PI); ++ ++ block_comment("case: hx > 0 && ix!=0x3ff921fb {"); { /* 33+53 bit pi is good enough */ ++ li(SCR2, PIO2_1t); ++ movgr2fr_d(v27, SCR2); ++ fsub_d(v4, v2, v27); // v4 = y[0] = z - pio2_1t; ++ fsub_d(v5, v2, v4); ++ fsub_d(v5, v5, v27); // v5 = y[1] = (z-y[0])-pio2_1t ++ b(REDUCTION_DONE); ++ } ++ ++ block_comment("case: hx > 0 &*& ix==0x3ff921fb {"); { /* near pi/2, use 33+33+53 bit pi */ ++ bind(X_IS_POSITIVE_LONG_PI); ++ li(SCR1, PIO2_2); ++ li(SCR2, PIO2_2t); ++ movgr2fr_d(v27, SCR1); ++ movgr2fr_d(v6, SCR2); ++ fsub_d(v2, v2, v27); // z-= pio2_2 ++ fsub_d(v4, v2, v6); // y[0] = z - pio2_2t ++ fsub_d(v5, v2, v4); ++ fsub_d(v5, v5, v6); // v5 = (z - y[0]) - pio2_2t ++ b(REDUCTION_DONE); ++ } ++ } ++ ++ block_comment("case: hx <= 0)"); { ++ bind(X_IS_NEGATIVE); ++ fadd_d(v2, v0, v1); // v2 = z = x + pio2_1 ++ srli_d(SCR1, SCR1, 32); ++ li(n, -1); ++ beq(ix, SCR1, X_IS_NEGATIVE_LONG_PI); ++ ++ block_comment("case: hx <= 0 && ix!=0x3ff921fb) {"); { /* 33+53 bit pi is good enough */ ++ li(SCR2, PIO2_1t); ++ movgr2fr_d(v27, SCR2); ++ fadd_d(v4, v2, v27); // v4 = y[0] = z + pio2_1t; ++ fsub_d(v5, v2, v4); ++ fadd_d(v5, v5, v27); // v5 = y[1] = (z-y[0]) + pio2_1t ++ b(REDUCTION_DONE); ++ } ++ ++ block_comment("case: hx <= 0 && ix==0x3ff921fb"); { /* near pi/2, use 33+33+53 bit pi */ ++ bind(X_IS_NEGATIVE_LONG_PI); ++ li(SCR1, PIO2_2); ++ li(SCR2, PIO2_2t); ++ movgr2fr_d(v27, SCR1); ++ movgr2fr_d(v6, SCR2); ++ fadd_d(v2, v2, v27); // z += pio2_2 ++ fadd_d(v4, v2, v6); // y[0] = z + pio2_2t ++ fsub_d(v5, v2, v4); ++ fadd_d(v5, v5, v6); // v5 = (z - y[0]) + pio2_2t ++ b(REDUCTION_DONE); ++ } ++ } ++ } ++ bind(X_IS_MEDIUM_OR_LARGE); ++ li(SCR1, 0x413921fb); ++ blt(SCR1, ix, X_IS_LARGE); // ix < = 0x413921fb ? ++ ++ block_comment("|x| ~<= 2^19*(pi/2), medium size"); { ++ li(ih, npio2_hw); ++ fld_d(v4, ih, 0); ++ fld_d(v5, ih, 8); ++ fld_d(v6, ih, 16); ++ fld_d(v7, ih, 24); ++ fabs_d(v31, v0); // v31 = t = |x| ++ addi_d(ih, ih, 64); ++ fmadd_d(v2, v31, v5, v4); // v2 = t * invpio2 + half (invpio2 = 53 bits of 2/pi, half = 0.5) ++ ftintrz_w_d(vt, v2); // n = (int) v2 ++ movfr2gr_s(n, vt); ++ vfrintrz_d(v2, v2); ++ fnmsub_d(v3, v2, v6, v31); // v3 = r = t - fn * pio2_1 ++ fmul_d(v26, v2, v7); // v26 = w = fn * pio2_1t ++ fsub_d(v4, v3, v26); // y[0] = r - w. Calculated before branch ++ li(SCR1, 32); ++ blt(SCR1, n, LARGE_ELSE); ++ addi_w(tmp5, n, -1); // tmp5 = n - 1 ++ alsl_d(tmp5, tmp5, ih, 2 - 1); ++ ld_w(jv, tmp5, 0); ++ bne(ix, jv, X_IS_MEDIUM_BRANCH_DONE); ++ ++ block_comment("else block for if(n<32&&ix!=npio2_hw[n-1])"); { ++ bind(LARGE_ELSE); ++ movfr2gr_d(jx, v4); ++ srli_d(tmp5, ix, 20); // j = ix >> 20 ++ slli_d(jx, jx, 1); ++ srli_d(tmp3, jx, 32 + 20 + 1); // r7 = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); ++ sub_d(tmp3, tmp5, tmp3); ++ ++ block_comment("if(i>16)"); { ++ li(SCR1, 16); ++ bge(SCR1, tmp3, X_IS_MEDIUM_BRANCH_DONE); ++ // i > 16. 2nd iteration needed ++ fld_d(v6, ih, -32); ++ fld_d(v7, ih, -24); ++ fmov_d(v28, v3); // t = r ++ fmul_d(v29, v2, v6); // w = v29 = fn * pio2_2 ++ fsub_d(v3, v28, v29); // r = t - w ++ fsub_d(v31, v28, v3); // v31 = (t - r) ++ fsub_d(v31, v29, v31); // v31 = w - (t - r) = - ((t - r) - w) ++ fmadd_d(v26, v2, v7, v31); // v26 = w = fn*pio2_2t - ((t - r) - w) ++ fsub_d(v4, v3, v26); // y[0] = r - w ++ movfr2gr_d(jx, v4); ++ slli_d(jx, jx, 1); ++ srli_d(tmp3, jx, 32 + 20 + 1); // r7 = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); ++ sub_d(tmp3, tmp5, tmp3); ++ ++ block_comment("if(i>49)"); { ++ li(SCR1, 49); ++ bge(SCR1, tmp3, X_IS_MEDIUM_BRANCH_DONE); ++ // 3rd iteration need, 151 bits acc ++ fld_d(v6, ih, -16); ++ fld_d(v7, ih, -8); ++ fmov_d(v28, v3); // save "r" ++ fmul_d(v29, v2, v6); // v29 = fn * pio2_3 ++ fsub_d(v3, v28, v29); // r = r - w ++ fsub_d(v31, v28, v3); // v31 = (t - r) ++ fsub_d(v31, v29, v31); // v31 = w - (t - r) = - ((t - r) - w) ++ fmadd_d(v26, v2, v7, v31); // v26 = w = fn*pio2_3t - ((t - r) - w) ++ fsub_d(v4, v3, v26); // y[0] = r - w ++ } ++ } ++ } ++ block_comment("medium x tail"); { ++ bind(X_IS_MEDIUM_BRANCH_DONE); ++ fsub_d(v5, v3, v4); // v5 = y[1] = (r - y[0]) ++ fsub_d(v5, v5, v26); // v5 = y[1] = (r - y[0]) - w ++ blt(R0, X, REDUCTION_DONE); ++ fneg_d(v4, v4); ++ sub_w(n, R0, n); ++ fneg_d(v5, v5); ++ b(REDUCTION_DONE); ++ } ++ } ++ ++ block_comment("all other (large) arguments"); { ++ bind(X_IS_LARGE); ++ srli_d(SCR1, ix, 20); // ix >> 20 ++ li(tmp5, 0x4170000000000000); ++ addi_w(SCR1, SCR1, -1046); // e0 ++ movgr2fr_d(v24, tmp5); // init two24A value ++ slli_w(jv, SCR1, 20); // ix - (e0<<20) ++ sub_w(jv, ix, jv); ++ slli_d(jv, jv, 32); ++ addi_w(SCR2, SCR1, -3); ++ bstrins_d(jv, X, 31, 0); // jv = z ++ li(i, 24); ++ movgr2fr_d(v26, jv); // v26 = z ++ ++ block_comment("unrolled for(i=0;i<2;i++) {tx[i] = (double)((int)(z));z = (z-tx[i])*two24A;}"); { ++ // tx[0,1,2] = v6,v7,v26 ++ vfrintrz_d(v6, v26); // v6 = (double)((int)v26) ++ div_w(jv, SCR2, i); // jv = (e0 - 3)/24 ++ fsub_d(v26, v26, v6); ++ addi_d(SP, SP, -560); ++ fmul_d(v26, v26, v24); ++ vfrintrz_d(v7, v26); // v7 = (double)((int)v26) ++ li(jx, 2); // calculate jx as nx - 1, which is initially 2. Not a part of unrolled loop ++ fsub_d(v26, v26, v7); ++ } ++ ++ block_comment("nx calculation with unrolled while(tx[nx-1]==zeroA) nx--;"); { ++ vxor_v(vt, vt, vt); ++ fcmp_cne_d(FCC0, v26, vt); // if NE then jx == 2. else it's 1 or 0 ++ addi_d(iqBase, SP, 480); // base of iq[] ++ fmul_d(v3, v26, v24); ++ bcnez(FCC0, NX_SET); ++ fcmp_cne_d(FCC0, v7, vt); // v7 == 0 => jx = 0. Else jx = 1 ++ movcf2gr(jx, FCC0); ++ } ++ bind(NX_SET); ++ generate__kernel_rem_pio2(two_over_pi, pio2); ++ // now we have y[0] = v4, y[1] = v5 and n = r2 ++ bge(X, R0, REDUCTION_DONE); ++ fneg_d(v4, v4); ++ fneg_d(v5, v5); ++ sub_w(n, R0, n); ++ } ++ bind(REDUCTION_DONE); ++ ++ pop2(S0, S1); ++} ++ ++///* ++// * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2) ++// * double x[],y[]; int e0,nx,prec; int ipio2[]; ++// * ++// * __kernel_rem_pio2 return the last three digits of N with ++// * y = x - N*pi/2 ++// * so that |y| < pi/2. ++// * ++// * The method is to compute the integer (mod 8) and fraction parts of ++// * (2/pi)*x without doing the full multiplication. In general we ++// * skip the part of the product that are known to be a huge integer ( ++// * more accurately, = 0 mod 8 ). Thus the number of operations are ++// * independent of the exponent of the input. ++// * ++// * NOTE: 2/pi int representation is converted to double ++// * // (2/pi) is represented by an array of 24-bit integers in ipio2[]. ++// * ++// * Input parameters: ++// * x[] The input value (must be positive) is broken into nx ++// * pieces of 24-bit integers in double precision format. ++// * x[i] will be the i-th 24 bit of x. The scaled exponent ++// * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 ++// * match x's up to 24 bits. ++// * ++// * Example of breaking a double positive z into x[0]+x[1]+x[2]: ++// * e0 = ilogb(z)-23 ++// * z = scalbn(z,-e0) ++// * for i = 0,1,2 ++// * x[i] = floor(z) ++// * z = (z-x[i])*2**24 ++// * ++// * ++// * y[] ouput result in an array of double precision numbers. ++// * The dimension of y[] is: ++// * 24-bit precision 1 ++// * 53-bit precision 2 ++// * 64-bit precision 2 ++// * 113-bit precision 3 ++// * The actual value is the sum of them. Thus for 113-bit ++// * precsion, one may have to do something like: ++// * ++// * long double t,w,r_head, r_tail; ++// * t = (long double)y[2] + (long double)y[1]; ++// * w = (long double)y[0]; ++// * r_head = t+w; ++// * r_tail = w - (r_head - t); ++// * ++// * e0 The exponent of x[0] ++// * ++// * nx dimension of x[] ++// * ++// * prec an interger indicating the precision: ++// * 0 24 bits (single) ++// * 1 53 bits (double) ++// * 2 64 bits (extended) ++// * 3 113 bits (quad) ++// * ++// * NOTE: ipio2[] array below is converted to double representation ++// * //ipio2[] ++// * // integer array, contains the (24*i)-th to (24*i+23)-th ++// * // bit of 2/pi after binary point. The corresponding ++// * // floating value is ++// * ++// * ipio2[i] * 2^(-24(i+1)). ++// * ++// * Here is the description of some local variables: ++// * ++// * jk jk+1 is the initial number of terms of ipio2[] needed ++// * in the computation. The recommended value is 2,3,4, ++// * 6 for single, double, extended,and quad. ++// * ++// * jz local integer variable indicating the number of ++// * terms of ipio2[] used. ++// * ++// * jx nx - 1 ++// * ++// * jv index for pointing to the suitable ipio2[] for the ++// * computation. In general, we want ++// * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 ++// * is an integer. Thus ++// * e0-3-24*jv >= 0 or (e0-3)/24 >= jv ++// * Hence jv = max(0,(e0-3)/24). ++// * ++// * jp jp+1 is the number of terms in PIo2[] needed, jp = jk. ++// * ++// * q[] double array with integral value, representing the ++// * 24-bits chunk of the product of x and 2/pi. ++// * ++// * q0 the corresponding exponent of q[0]. Note that the ++// * exponent for q[i] would be q0-24*i. ++// * ++// * PIo2[] double precision array, obtained by cutting pi/2 ++// * into 24 bits chunks. ++// * ++// * f[] ipio2[] in floating point ++// * ++// * iq[] integer array by breaking up q[] in 24-bits chunk. ++// * ++// * fq[] final product of x*(2/pi) in fq[0],..,fq[jk] ++// * ++// * ih integer. If >0 it indicates q[] is >= 0.5, hence ++// * it also indicates the *sign* of the result. ++// * ++// */ ++// ++// Use PIo2 table(see stubRoutines_loongarch64.cpp) ++// ++// BEGIN __kernel_rem_pio2 PSEUDO CODE ++// ++//static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, /* NOTE: converted to double */ const double *ipio2 // const int *ipio2) { ++// int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih; ++// double z,fw,f[20],fq[20],q[20]; ++// ++// /* initialize jk*/ ++// // jk = init_jk[prec]; // NOTE: prec==2 for double. jk is always 4. ++// jp = jk; // NOTE: always 4 ++// ++// /* determine jx,jv,q0, note that 3>q0 */ ++// jx = nx-1; ++// jv = (e0-3)/24; if(jv<0) jv=0; ++// q0 = e0-24*(jv+1); ++// ++// /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ ++// j = jv-jx; m = jx+jk; ++// ++// // NOTE: split into two for-loops: one with zeroB and one with ipio2[j]. It ++// // allows the use of wider loads/stores ++// for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; //(double) ipio2[j]; ++// ++// // NOTE: unrolled and vectorized "for". See comments in asm code ++// /* compute q[0],q[1],...q[jk] */ ++// for (i=0;i<=jk;i++) { ++// for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw; ++// } ++// ++// jz = jk; ++//recompute: ++// /* distill q[] into iq[] reversingly */ ++// for(i=0,j=jz,z=q[jz];j>0;i++,j--) { ++// fw = (double)((int)(twon24* z)); ++// iq[i] = (int)(z-two24B*fw); ++// z = q[j-1]+fw; ++// } ++// ++// /* compute n */ ++// z = scalbnA(z,q0); /* actual value of z */ ++// z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */ ++// n = (int) z; ++// z -= (double)n; ++// ih = 0; ++// if(q0>0) { /* need iq[jz-1] to determine n */ ++// i = (iq[jz-1]>>(24-q0)); n += i; ++// iq[jz-1] -= i<<(24-q0); ++// ih = iq[jz-1]>>(23-q0); ++// } ++// else if(q0==0) ih = iq[jz-1]>>23; ++// else if(z>=0.5) ih=2; ++// ++// if(ih>0) { /* q > 0.5 */ ++// n += 1; carry = 0; ++// for(i=0;i0) { /* rare case: chance is 1 in 12 */ ++// switch(q0) { ++// case 1: ++// iq[jz-1] &= 0x7fffff; break; ++// case 2: ++// iq[jz-1] &= 0x3fffff; break; ++// } ++// } ++// if(ih==2) { ++// z = one - z; ++// if(carry!=0) z -= scalbnA(one,q0); ++// } ++// } ++// ++// /* check if recomputation is needed */ ++// if(z==zeroB) { ++// j = 0; ++// for (i=jz-1;i>=jk;i--) j |= iq[i]; ++// if(j==0) { /* need recomputation */ ++// for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */ ++// ++// for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */ ++// f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i]; ++// for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; ++// q[i] = fw; ++// } ++// jz += k; ++// goto recompute; ++// } ++// } ++// ++// /* chop off zero terms */ ++// if(z==0.0) { ++// jz -= 1; q0 -= 24; ++// while(iq[jz]==0) { jz--; q0-=24;} ++// } else { /* break z into 24-bit if necessary */ ++// z = scalbnA(z,-q0); ++// if(z>=two24B) { ++// fw = (double)((int)(twon24*z)); ++// iq[jz] = (int)(z-two24B*fw); ++// jz += 1; q0 += 24; ++// iq[jz] = (int) fw; ++// } else iq[jz] = (int) z ; ++// } ++// ++// /* convert integer "bit" chunk to floating-point value */ ++// fw = scalbnA(one,q0); ++// for(i=jz;i>=0;i--) { ++// q[i] = fw*(double)iq[i]; fw*=twon24; ++// } ++// ++// /* compute PIo2[0,...,jp]*q[jz,...,0] */ ++// for(i=jz;i>=0;i--) { ++// for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k]; ++// fq[jz-i] = fw; ++// } ++// ++// // NOTE: switch below is eliminated, because prec is always 2 for doubles ++// /* compress fq[] into y[] */ ++// //switch(prec) { ++// //case 0: ++// // fw = 0.0; ++// // for (i=jz;i>=0;i--) fw += fq[i]; ++// // y[0] = (ih==0)? fw: -fw; ++// // break; ++// //case 1: ++// //case 2: ++// fw = 0.0; ++// for (i=jz;i>=0;i--) fw += fq[i]; ++// y[0] = (ih==0)? fw: -fw; ++// fw = fq[0]-fw; ++// for (i=1;i<=jz;i++) fw += fq[i]; ++// y[1] = (ih==0)? fw: -fw; ++// // break; ++// //case 3: /* painful */ ++// // for (i=jz;i>0;i--) { ++// // fw = fq[i-1]+fq[i]; ++// // fq[i] += fq[i-1]-fw; ++// // fq[i-1] = fw; ++// // } ++// // for (i=jz;i>1;i--) { ++// // fw = fq[i-1]+fq[i]; ++// // fq[i] += fq[i-1]-fw; ++// // fq[i-1] = fw; ++// // } ++// // for (fw=0.0,i=jz;i>=2;i--) fw += fq[i]; ++// // if(ih==0) { ++// // y[0] = fq[0]; y[1] = fq[1]; y[2] = fw; ++// // } else { ++// // y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw; ++// // } ++// //} ++// return n&7; ++//} ++// ++// END __kernel_rem_pio2 PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic: ++// 1. One loop is unrolled and vectorized (see comments in code) ++// 2. One loop is split into 2 loops (see comments in code) ++// 3. Non-double code is removed(last switch). Sevaral variables became ++// constants because of that (see comments in code) ++// 4. Use of jx, which is nx-1 instead of nx ++// Assumptions: ++// 1. Assume |X| >= PI/4 ++// Input and output: ++// 1. Input: X = A0, jx == nx - 1 == A6, e0 == SCR1 ++// 2. Return n in A2, y[0] == y0 == FA4, y[1] == y1 == FA5 ++// NOTE: general purpose register names match local variable names in C code ++// NOTE: fpu registers are actively reused. See comments in code about their usage ++void MacroAssembler::generate__kernel_rem_pio2(address two_over_pi, address pio2) { ++ Label Q_DONE, JX_IS_0, JX_IS_2, COMP_INNER_LOOP, RECOMP_FOR2, Q0_ZERO_CMP_LT, ++ RECOMP_CHECK_DONE_NOT_ZERO, Q0_ZERO_CMP_DONE, COMP_FOR, Q0_ZERO_CMP_EQ, ++ INIT_F_ZERO, RECOMPUTE, IH_FOR_INCREMENT, IH_FOR_STORE, RECOMP_CHECK_DONE, ++ Z_IS_LESS_THAN_TWO24B, Z_IS_ZERO, FW_Y1_NO_NEGATION, ++ RECOMP_FW_UPDATED, Z_ZERO_CHECK_DONE, FW_FOR1, IH_AFTER_SWITCH, IH_HANDLED, ++ CONVERTION_FOR, FW_Y0_NO_NEGATION, FW_FOR1_DONE, FW_FOR2, FW_FOR2_DONE, ++ IH_FOR, SKIP_F_LOAD, RECOMP_FOR1, RECOMP_FIRST_FOR, INIT_F_COPY, ++ RECOMP_FOR1_CHECK; ++ Register tmp2 = A1, n = A2, jv = A4, tmp5 = A5, jx = A6, ++ tmp3 = A7, iqBase = T0, ih = T1, i = T2, tmp1 = T3, ++ jz = S0, j = T5, twoOverPiBase = T6, tmp4 = S1, qBase = T8; ++ FloatRegister v0 = FA0, v1 = FA1, v2 = FA2, v3 = FA3, v4 = FA4, v5 = FA5, v6 = FA6, v7 = FA7, ++ vt = FT1, v17 = FT2, v18 = FT3, v19 = FT4, v20 = FT5, v21 = FT6, v22 = FT7, v24 = FT8, ++ v25 = FT9, v26 = FT10, v27 = FT11, v28 = FT12, v29 = FT13, v30 = FT14, v31 = FT15; ++ // jp = jk == init_jk[prec] = init_jk[2] == {2,3,4,6}[2] == 4 ++ // jx = nx - 1 ++ li(twoOverPiBase, two_over_pi); ++ slti(SCR2, jv, 0); ++ addi_w(tmp4, jx, 4); // tmp4 = m = jx + jk = jx + 4. jx is in {0,1,2} so m is in [4,5,6] ++ masknez(jv, jv, SCR2); ++ if (UseLASX) ++ xvxor_v(v26, v26, v26); ++ else ++ vxor_v(v26, v26, v26); ++ addi_w(tmp5, jv, 1); // jv+1 ++ sub_w(j, jv, jx); ++ addi_d(qBase, SP, 320); // base of q[] ++ mul_w(SCR2, i, tmp5); // q0 = e0-24*(jv+1) ++ sub_w(SCR1, SCR1, SCR2); ++ // use double f[20], fq[20], q[20], iq[20] on stack, which is ++ // (20 + 20 + 20) x 8 + 20 x 4 = 560 bytes. From lower to upper addresses it ++ // will contain f[20], fq[20], q[20], iq[20] ++ // now initialize f[20] indexes 0..m (inclusive) ++ // for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; // (double) ipio2[j]; ++ move(tmp5, SP); ++ ++ block_comment("for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; // (double) ipio2[j];"); { ++ xorr(i, i, i); ++ bge(j, R0, INIT_F_COPY); ++ bind(INIT_F_ZERO); ++ if (UseLASX) { ++ xvst(v26, tmp5, 0); ++ } else { ++ vst(v26, tmp5, 0); ++ vst(v26, tmp5, 16); ++ } ++ addi_d(tmp5, tmp5, 32); ++ addi_w(i, i, 4); ++ addi_w(j, j, 4); ++ blt(j, R0, INIT_F_ZERO); ++ sub_w(i, i, j); ++ move(j, R0); ++ bind(INIT_F_COPY); ++ alsl_d(tmp1, j, twoOverPiBase, 3 - 1); // ipio2[j] start address ++ if (UseLASX) { ++ xvld(v18, tmp1, 0); ++ xvld(v19, tmp1, 32); ++ } else { ++ vld(v18, tmp1, 0); ++ vld(v19, tmp1, 16); ++ vld(v20, tmp1, 32); ++ vld(v21, tmp1, 48); ++ } ++ alsl_d(tmp5, i, SP, 3 - 1); ++ if (UseLASX) { ++ xvst(v18, tmp5, 0); ++ xvst(v19, tmp5, 32); ++ } else { ++ vst(v18, tmp5, 0); ++ vst(v19, tmp5, 16); ++ vst(v20, tmp5, 32); ++ vst(v21, tmp5, 48); ++ } ++ } ++ // v18..v21 can actually contain f[0..7] ++ beqz(i, SKIP_F_LOAD); // i == 0 => f[i] == f[0] => already loaded ++ if (UseLASX) { ++ xvld(v18, SP, 0); // load f[0..7] ++ xvld(v19, SP, 32); ++ } else { ++ vld(v18, SP, 0); // load f[0..7] ++ vld(v19, SP, 16); ++ vld(v20, SP, 32); ++ vld(v21, SP, 48); ++ } ++ bind(SKIP_F_LOAD); ++ // calculate 2^q0 and 2^-q0, which we'll need further. ++ // q0 is exponent. So, calculate biased exponent(q0+1023) ++ sub_w(tmp4, R0, SCR1); ++ addi_w(tmp5, SCR1, 1023); ++ addi_w(tmp4, tmp4, 1023); ++ // Unroll following for(s) depending on jx in [0,1,2] ++ // for (i=0;i<=jk;i++) { ++ // for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw; ++ // } ++ // Unrolling for jx == 0 case: ++ // q[0] = x[0] * f[0] ++ // q[1] = x[0] * f[1] ++ // q[2] = x[0] * f[2] ++ // q[3] = x[0] * f[3] ++ // q[4] = x[0] * f[4] ++ // ++ // Vectorization for unrolled jx == 0 case: ++ // {q[0], q[1]} = {f[0], f[1]} * x[0] ++ // {q[2], q[3]} = {f[2], f[3]} * x[0] ++ // q[4] = f[4] * x[0] ++ // ++ // Unrolling for jx == 1 case: ++ // q[0] = x[0] * f[1] + x[1] * f[0] ++ // q[1] = x[0] * f[2] + x[1] * f[1] ++ // q[2] = x[0] * f[3] + x[1] * f[2] ++ // q[3] = x[0] * f[4] + x[1] * f[3] ++ // q[4] = x[0] * f[5] + x[1] * f[4] ++ // ++ // Vectorization for unrolled jx == 1 case: ++ // {q[0], q[1]} = {f[0], f[1]} * x[1] ++ // {q[2], q[3]} = {f[2], f[3]} * x[1] ++ // q[4] = f[4] * x[1] ++ // {q[0], q[1]} += {f[1], f[2]} * x[0] ++ // {q[2], q[3]} += {f[3], f[4]} * x[0] ++ // q[4] += f[5] * x[0] ++ // ++ // Unrolling for jx == 2 case: ++ // q[0] = x[0] * f[2] + x[1] * f[1] + x[2] * f[0] ++ // q[1] = x[0] * f[3] + x[1] * f[2] + x[2] * f[1] ++ // q[2] = x[0] * f[4] + x[1] * f[3] + x[2] * f[2] ++ // q[3] = x[0] * f[5] + x[1] * f[4] + x[2] * f[3] ++ // q[4] = x[0] * f[6] + x[1] * f[5] + x[2] * f[4] ++ // ++ // Vectorization for unrolled jx == 2 case: ++ // {q[0], q[1]} = {f[0], f[1]} * x[2] ++ // {q[2], q[3]} = {f[2], f[3]} * x[2] ++ // q[4] = f[4] * x[2] ++ // {q[0], q[1]} += {f[1], f[2]} * x[1] ++ // {q[2], q[3]} += {f[3], f[4]} * x[1] ++ // q[4] += f[5] * x[1] ++ // {q[0], q[1]} += {f[2], f[3]} * x[0] ++ // {q[2], q[3]} += {f[4], f[5]} * x[0] ++ // q[4] += f[6] * x[0] ++ block_comment("unrolled and vectorized computation of q[0]..q[jk]"); { ++ li(SCR2, 1); ++ slli_d(tmp5, tmp5, 52); // now it's 2^q0 double value ++ slli_d(tmp4, tmp4, 52); // now it's 2^-q0 double value ++ if (UseLASX) ++ xvpermi_d(v6, v6, 0); ++ else ++ vreplvei_d(v6, v6, 0); ++ blt(jx, SCR2, JX_IS_0); ++ addi_d(i, SP, 8); ++ if (UseLASX) { ++ xvld(v26, i, 0); // load f[1..4] ++ xvpermi_d(v3, v3, 0); ++ xvpermi_d(v7, v7, 0); ++ xvpermi_d(v20, v19, 85); ++ xvpermi_d(v21, v19, 170); ++ } else { ++ vld(v26, i, 0); // load f[1..4] ++ vld(v27, i, 16); ++ vreplvei_d(v3, v3, 0); ++ vreplvei_d(v7, v7, 0); ++ vreplvei_d(vt, v20, 1); ++ vreplvei_d(v21, v21, 0); ++ } ++ blt(SCR2, jx, JX_IS_2); ++ // jx == 1 ++ if (UseLASX) { ++ xvfmul_d(v28, v18, v7); // f[0,3] * x[1] ++ fmul_d(v30, v19, v7); // f[4] * x[1] ++ xvfmadd_d(v28, v26, v6, v28); ++ fmadd_d(v30, v6, v20, v30); // v30 += f[5] * x[0] ++ } else { ++ vfmul_d(v28, v18, v7); // f[0,1] * x[1] ++ vfmul_d(v29, v19, v7); // f[2,3] * x[1] ++ fmul_d(v30, v20, v7); // f[4] * x[1] ++ vfmadd_d(v28, v26, v6, v28); ++ vfmadd_d(v29, v27, v6, v29); ++ fmadd_d(v30, v6, vt, v30); // v30 += f[5] * x[0] ++ } ++ b(Q_DONE); ++ bind(JX_IS_2); ++ if (UseLASX) { ++ xvfmul_d(v28, v18, v3); // f[0,3] * x[2] ++ fmul_d(v30, v19, v3); // f[4] * x[2] ++ xvfmadd_d(v28, v26, v7, v28); ++ fmadd_d(v30, v7, v20, v30); // v30 += f[5] * x[1] ++ xvpermi_q(v18, v19, 3); ++ xvfmadd_d(v28, v18, v6, v28); ++ } else { ++ vfmul_d(v28, v18, v3); // f[0,1] * x[2] ++ vfmul_d(v29, v19, v3); // f[2,3] * x[2] ++ fmul_d(v30, v20, v3); // f[4] * x[2] ++ vfmadd_d(v28, v26, v7, v28); ++ vfmadd_d(v29, v27, v7, v29); ++ fmadd_d(v30, v7, vt, v30); // v30 += f[5] * x[1] ++ vfmadd_d(v28, v19, v6, v28); ++ vfmadd_d(v29, v20, v6, v29); ++ } ++ fmadd_d(v30, v6, v21, v30); // v30 += f[6] * x[0] ++ b(Q_DONE); ++ bind(JX_IS_0); ++ if (UseLASX) { ++ xvfmul_d(v28, v18, v6); // f[0,1] * x[0] ++ fmul_d(v30, v19, v6); // f[4] * x[0] ++ } else { ++ vfmul_d(v28, v18, v6); // f[0,1] * x[0] ++ vfmul_d(v29, v19, v6); // f[2,3] * x[0] ++ fmul_d(v30, v20, v6); // f[4] * x[0] ++ } ++ bind(Q_DONE); ++ if (UseLASX) { ++ xvst(v28, qBase, 0); // save calculated q[0]...q[jk] ++ } else { ++ vst(v28, qBase, 0); // save calculated q[0]...q[jk] ++ vst(v29, qBase, 16); ++ } ++ fst_d(v30, qBase, 32); ++ } ++ li(i, 0x3E70000000000000); ++ li(jz, 4); ++ movgr2fr_d(v17, i); // v17 = twon24 ++ movgr2fr_d(v30, tmp5); // 2^q0 ++ vldi(v21, -960); // 0.125 (0x3fc0000000000000) ++ vldi(v20, -992); // 8.0 (0x4020000000000000) ++ movgr2fr_d(v22, tmp4); // 2^-q0 ++ ++ block_comment("recompute loop"); { ++ bind(RECOMPUTE); ++ // for(i=0,j=jz,z=q[jz];j>0;i++,j--) { ++ // fw = (double)((int)(twon24* z)); ++ // iq[i] = (int)(z-two24A*fw); ++ // z = q[j-1]+fw; ++ // } ++ block_comment("distill q[] into iq[] reversingly"); { ++ xorr(i, i, i); ++ move(j, jz); ++ alsl_d(tmp2, jz, qBase, 3 - 1); // q[jz] address ++ fld_d(v18, tmp2, 0); // z = q[j] and moving address to q[j-1] ++ addi_d(tmp2, tmp2, -8); ++ bind(RECOMP_FIRST_FOR); ++ fld_d(v27, tmp2, 0); ++ addi_d(tmp2, tmp2, -8); ++ fmul_d(v29, v17, v18); // twon24*z ++ vfrintrz_d(v29, v29); // (double)(int) ++ fnmsub_d(v28, v24, v29, v18); // v28 = z-two24A*fw ++ ftintrz_w_d(vt, v28); // (int)(z-two24A*fw) ++ alsl_d(SCR2, i, iqBase, 2 - 1); ++ fst_s(vt, SCR2, 0); ++ fadd_d(v18, v27, v29); ++ addi_w(i, i, 1); ++ addi_w(j, j, -1); ++ blt(R0, j, RECOMP_FIRST_FOR); ++ } ++ // compute n ++ fmul_d(v18, v18, v30); ++ fmul_d(v2, v18, v21); ++ vfrintrm_d(v2, v2); // v2 = floor(v2) == rounding towards -inf ++ fnmsub_d(v18, v2, v20, v18); // z -= 8.0*floor(z*0.125); ++ li(ih, 2); ++ vfrintrz_d(v2, v18); // v2 = (double)((int)z) ++ ftintrz_w_d(vt, v18); // n = (int) z; ++ movfr2gr_s(n, vt); ++ fsub_d(v18, v18, v2); // z -= (double)n; ++ ++ block_comment("q0-dependent initialization"); { ++ blt(SCR1, R0, Q0_ZERO_CMP_LT); // if (q0 > 0) ++ addi_w(j, jz, -1); // j = jz - 1 ++ alsl_d(SCR2, j, iqBase, 2 - 1); ++ ld_w(tmp2, SCR2, 0); // tmp2 = iq[jz-1] ++ beq(SCR1, R0, Q0_ZERO_CMP_EQ); ++ li(tmp4, 24); ++ sub_w(tmp4, tmp4, SCR1); // == 24 - q0 ++ srl_w(i, tmp2, tmp4); // i = iq[jz-1] >> (24-q0) ++ sll_w(tmp5, i, tmp4); ++ sub_w(tmp2, tmp2, tmp5); // iq[jz-1] -= i<<(24-q0); ++ alsl_d(SCR2, j, iqBase, 2 - 1); ++ st_w(tmp2, SCR2, 0); // store iq[jz-1] ++ addi_w(SCR2, tmp4, -1); // == 23 - q0 ++ add_w(n, n, i); // n+=i ++ srl_w(ih, tmp2, SCR2); // ih = iq[jz-1] >> (23-q0) ++ b(Q0_ZERO_CMP_DONE); ++ bind(Q0_ZERO_CMP_EQ); ++ srli_d(ih, tmp2, 23); // ih = iq[z-1] >> 23 ++ b(Q0_ZERO_CMP_DONE); ++ bind(Q0_ZERO_CMP_LT); ++ vldi(v4, -928); // 0.5 (0x3fe0000000000000) ++ fcmp_clt_d(FCC0, v18, v4); ++ movcf2gr(SCR2, FCC0); ++ masknez(ih, ih, SCR2); // if (z<0.5) ih = 0 ++ } ++ bind(Q0_ZERO_CMP_DONE); ++ bge(R0, ih, IH_HANDLED); ++ ++ block_comment("if(ih>) {"); { ++ // use rscratch2 as carry ++ ++ block_comment("for(i=0;i0) {"); { ++ bge(R0, SCR1, IH_AFTER_SWITCH); ++ // tmp3 still has iq[jz-1] value. no need to reload ++ // now, zero high tmp3 bits (rscratch1 number of bits) ++ li(j, 0xffffffff); ++ addi_w(i, jz, -1); // set i to jz-1 ++ srl_d(j, j, SCR1); ++ srli_w(tmp1, j, 8); ++ andr(tmp3, tmp3, tmp1); // we have 24-bit-based constants ++ alsl_d(tmp1, i, iqBase, 2 - 1); ++ st_w(tmp3, tmp1, 0); // save iq[jz-1] ++ } ++ bind(IH_AFTER_SWITCH); ++ li(tmp1, 2); ++ bne(ih, tmp1, IH_HANDLED); ++ ++ block_comment("if(ih==2) {"); { ++ vldi(v25, -912); // 1.0 (0x3ff0000000000000) ++ fsub_d(v18, v25, v18); // z = one - z; ++ beqz(SCR2, IH_HANDLED); ++ fsub_d(v18, v18, v30); // z -= scalbnA(one,q0); ++ } ++ } ++ bind(IH_HANDLED); ++ // check if recomputation is needed ++ vxor_v(vt, vt, vt); ++ fcmp_cne_d(FCC0, v18, vt); ++ bcnez(FCC0, RECOMP_CHECK_DONE_NOT_ZERO); ++ ++ block_comment("if(z==zeroB) {"); { ++ ++ block_comment("for (i=jz-1;i>=jk;i--) j |= iq[i];"); { ++ addi_w(i, jz, -1); ++ xorr(j, j, j); ++ b(RECOMP_FOR1_CHECK); ++ bind(RECOMP_FOR1); ++ alsl_d(tmp1, i, iqBase, 2 - 1); ++ ld_w(tmp1, tmp1, 0); ++ orr(j, j, tmp1); ++ addi_w(i, i, -1); ++ bind(RECOMP_FOR1_CHECK); ++ li(SCR2, 4); ++ bge(i, SCR2, RECOMP_FOR1); ++ } ++ bnez(j, RECOMP_CHECK_DONE); ++ ++ block_comment("if(j==0) {"); { ++ // for(k=1;iq[jk-k]==0;k++); // let's unroll it. jk == 4. So, read ++ // iq[3], iq[2], iq[1], iq[0] until non-zero value ++ ld_d(tmp1, iqBase, 0); // iq[0..3] ++ ld_d(tmp3, iqBase, 8); ++ li(j, 2); ++ masknez(tmp1, tmp1, tmp3); // set register for further consideration ++ orr(tmp1, tmp1, tmp3); ++ masknez(j, j, tmp3); // set initial k. Use j as k ++ srli_d(SCR2, tmp1, 32); ++ sltu(SCR2, R0, SCR2); ++ addi_w(i, jz, 1); ++ add_w(j, j, SCR2); ++ ++ block_comment("for(i=jz+1;i<=jz+k;i++) {...}"); { ++ add_w(jz, i, j); // i = jz+1, j = k-1. j+i = jz+k (which is a new jz) ++ bind(RECOMP_FOR2); ++ add_w(tmp1, jv, i); ++ alsl_d(SCR2, tmp1, twoOverPiBase, 3 - 1); ++ fld_d(v29, SCR2, 0); ++ add_w(tmp2, jx, i); ++ alsl_d(SCR2, tmp2, SP, 3 - 1); ++ fst_d(v29, SCR2, 0); ++ // f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i]; ++ // since jx = 0, 1 or 2 we can unroll it: ++ // for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; ++ // f[jx+i-j] == (for first iteration) f[jx+i], which is already v29 ++ alsl_d(tmp2, tmp2, SP, 3 - 1); // address of f[jx+i] ++ fld_d(v4, tmp2, -16); // load f[jx+i-2] and f[jx+i-1] ++ fld_d(v5, tmp2, -8); ++ fmul_d(v26, v6, v29); // initial fw ++ beqz(jx, RECOMP_FW_UPDATED); ++ fmadd_d(v26, v7, v5, v26); ++ li(SCR2, 1); ++ beq(jx, SCR2, RECOMP_FW_UPDATED); ++ fmadd_d(v26, v3, v4, v26); ++ bind(RECOMP_FW_UPDATED); ++ alsl_d(SCR2, i, qBase, 3 - 1); ++ fst_d(v26, SCR2, 0); // q[i] = fw; ++ addi_w(i, i, 1); ++ bge(jz, i, RECOMP_FOR2); // jz here is "old jz" + k ++ } ++ b(RECOMPUTE); ++ } ++ } ++ } ++ bind(RECOMP_CHECK_DONE); ++ // chop off zero terms ++ vxor_v(vt, vt, vt); ++ fcmp_ceq_d(FCC0, v18, vt); ++ bcnez(FCC0, Z_IS_ZERO); ++ ++ block_comment("else block of if(z==0.0) {"); { ++ bind(RECOMP_CHECK_DONE_NOT_ZERO); ++ fmul_d(v18, v18, v22); ++ fcmp_clt_d(FCC0, v18, v24); // v24 is stil two24A ++ bcnez(FCC0, Z_IS_LESS_THAN_TWO24B); ++ fmul_d(v1, v18, v17); // twon24*z ++ vfrintrz_d(v1, v1); // v1 = (double)(int)(v1) ++ fnmsub_d(v2, v24, v1, v18); ++ ftintrz_w_d(vt, v1); // (int)fw ++ movfr2gr_s(tmp3, vt); ++ ftintrz_w_d(vt, v2); // double to int ++ movfr2gr_s(tmp2, vt); ++ alsl_d(SCR2, jz, iqBase, 2 - 1); ++ st_w(tmp2, SCR2, 0); ++ addi_w(SCR1, SCR1, 24); ++ addi_w(jz, jz, 1); ++ st_w(tmp3, SCR2, 0); // iq[jz] = (int) fw ++ b(Z_ZERO_CHECK_DONE); ++ bind(Z_IS_LESS_THAN_TWO24B); ++ ftintrz_w_d(vt, v18); // (int)z ++ movfr2gr_s(tmp3, vt); ++ alsl_d(SCR2, jz, iqBase, 2 - 1); ++ st_w(tmp3, SCR2, 0); // iq[jz] = (int) z ++ b(Z_ZERO_CHECK_DONE); ++ } ++ ++ block_comment("if(z==0.0) {"); { ++ bind(Z_IS_ZERO); ++ addi_w(jz, jz, -1); ++ alsl_d(SCR2, jz, iqBase, 2 - 1); ++ ld_w(tmp1, SCR2, 0); ++ addi_w(SCR1, SCR1, -24); ++ beqz(tmp1, Z_IS_ZERO); ++ } ++ bind(Z_ZERO_CHECK_DONE); ++ // convert integer "bit" chunk to floating-point value ++ // v17 = twon24 ++ // update v30, which was scalbnA(1.0, ); ++ addi_w(tmp2, SCR1, 1023); // biased exponent ++ slli_d(tmp2, tmp2, 52); // put at correct position ++ move(i, jz); ++ movgr2fr_d(v30, tmp2); ++ ++ block_comment("for(i=jz;i>=0;i--) {q[i] = fw*(double)iq[i]; fw*=twon24;}"); { ++ bind(CONVERTION_FOR); ++ alsl_d(SCR2, i, iqBase, 2 - 1); ++ fld_s(v31, SCR2, 0); ++ vffintl_d_w(v31, v31); ++ fmul_d(v31, v31, v30); ++ alsl_d(SCR2, i, qBase, 3 - 1); ++ fst_d(v31, SCR2, 0); ++ fmul_d(v30, v30, v17); ++ addi_w(i, i, -1); ++ bge(i, R0, CONVERTION_FOR); ++ } ++ addi_d(SCR2, SP, 160); // base for fq ++ // reusing twoOverPiBase ++ li(twoOverPiBase, pio2); ++ ++ block_comment("compute PIo2[0,...,jp]*q[jz,...,0]. for(i=jz;i>=0;i--) {...}"); { ++ move(i, jz); ++ move(tmp2, R0); // tmp2 will keep jz - i == 0 at start ++ bind(COMP_FOR); ++ // for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k]; ++ vxor_v(v30, v30, v30); ++ alsl_d(tmp5, i, qBase, 3 - 1); // address of q[i+k] for k==0 ++ li(tmp3, 4); ++ slti(tmp4, tmp2, 5); ++ alsl_d(tmp1, i, qBase, 3 - 1); // used as q[i] address ++ masknez(tmp3, tmp3, tmp4); // min(jz - i, jp); ++ maskeqz(tmp4, tmp2, tmp4); ++ orr(tmp3, tmp3, tmp4); ++ move(tmp4, R0); // used as k ++ ++ block_comment("for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];"); { ++ bind(COMP_INNER_LOOP); ++ alsl_d(tmp5, tmp4, tmp1, 3 - 1); ++ fld_d(v18, tmp5, 0); // q[i+k] ++ alsl_d(tmp5, tmp4, twoOverPiBase, 3 - 1); ++ fld_d(v19, tmp5, 0); // PIo2[k] ++ fmadd_d(v30, v18, v19, v30); // fw += PIo2[k]*q[i+k]; ++ addi_w(tmp4, tmp4, 1); // k++ ++ bge(tmp3, tmp4, COMP_INNER_LOOP); ++ } ++ alsl_d(tmp5, tmp2, SCR2, 3 - 1); ++ fst_d(v30, tmp5, 0); // fq[jz-i] ++ addi_d(tmp2, tmp2, 1); ++ addi_w(i, i, -1); ++ bge(i, R0, COMP_FOR); ++ } ++ ++ block_comment("switch(prec) {...}. case 2:"); { ++ // compress fq into y[] ++ // remember prec == 2 ++ ++ block_comment("for (i=jz;i>=0;i--) fw += fq[i];"); { ++ vxor_v(v4, v4, v4); ++ move(i, jz); ++ bind(FW_FOR1); ++ alsl_d(tmp5, i, SCR2, 3 - 1); ++ fld_d(v1, tmp5, 0); ++ addi_w(i, i, -1); ++ fadd_d(v4, v4, v1); ++ bge(i, R0, FW_FOR1); ++ } ++ bind(FW_FOR1_DONE); ++ // v1 contains fq[0]. so, keep it so far ++ fsub_d(v5, v1, v4); // fw = fq[0] - fw ++ beqz(ih, FW_Y0_NO_NEGATION); ++ fneg_d(v4, v4); ++ bind(FW_Y0_NO_NEGATION); ++ ++ block_comment("for (i=1;i<=jz;i++) fw += fq[i];"); { ++ li(i, 1); ++ blt(jz, i, FW_FOR2_DONE); ++ bind(FW_FOR2); ++ alsl_d(tmp5, i, SCR2, 3 - 1); ++ fld_d(v1, tmp5, 0); ++ addi_w(i, i, 1); ++ fadd_d(v5, v5, v1); ++ bge(jz, i, FW_FOR2); ++ } ++ bind(FW_FOR2_DONE); ++ beqz(ih, FW_Y1_NO_NEGATION); ++ fneg_d(v5, v5); ++ bind(FW_Y1_NO_NEGATION); ++ addi_d(SP, SP, 560); ++ } ++} ++ ++///* __kernel_sin( x, y, iy) ++// * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854 ++// * Input x is assumed to be bounded by ~pi/4 in magnitude. ++// * Input y is the tail of x. ++// * Input iy indicates whether y is 0. (if iy=0, y assume to be 0). ++// * ++// * Algorithm ++// * 1. Since sin(-x) = -sin(x), we need only to consider positive x. ++// * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0. ++// * 3. sin(x) is approximated by a polynomial of degree 13 on ++// * [0,pi/4] ++// * 3 13 ++// * sin(x) ~ x + S1*x + ... + S6*x ++// * where ++// * ++// * |sin(x) 2 4 6 8 10 12 | -58 ++// * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 ++// * | x | ++// * ++// * 4. sin(x+y) = sin(x) + sin'(x')*y ++// * ~ sin(x) + (1-x*x/2)*y ++// * For better accuracy, let ++// * 3 2 2 2 2 ++// * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) ++// * then 3 2 ++// * sin(x) = x + (S1*x + (x *(r-y/2)+y)) ++// */ ++//static const double ++//S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ ++//S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ ++//S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */ ++//S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */ ++//S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */ ++//S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ ++// ++// NOTE: S1..S6 were moved into a table: StubRoutines::la::_dsin_coef ++// ++// BEGIN __kernel_sin PSEUDO CODE ++// ++//static double __kernel_sin(double x, double y, bool iy) ++//{ ++// double z,r,v; ++// ++// // NOTE: not needed. moved to dsin/dcos ++// //int ix; ++// //ix = high(x)&0x7fffffff; /* high word of x */ ++// ++// // NOTE: moved to dsin/dcos ++// //if(ix<0x3e400000) /* |x| < 2**-27 */ ++// // {if((int)x==0) return x;} /* generate inexact */ ++// ++// z = x*x; ++// v = z*x; ++// r = S2+z*(S3+z*(S4+z*(S5+z*S6))); ++// if(iy==0) return x+v*(S1+z*r); ++// else return x-((z*(half*y-v*r)-y)-v*S1); ++//} ++// ++// END __kernel_sin PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic: ++// 1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos ++// 2. Constants are now loaded from table dsin_coef ++// 3. C code parameter "int iy" was modified to "bool iyIsOne", because ++// iy is always 0 or 1. Also, iyIsOne branch was moved into ++// generation phase instead of taking it during code execution ++// Input ans output: ++// 1. Input for generated function: X argument = x ++// 2. Input for generator: x = register to read argument from, iyIsOne ++// = flag to use low argument low part or not, dsin_coef = coefficients ++// table address ++// 3. Return sin(x) value in FA0 ++void MacroAssembler::generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef) { ++ FloatRegister y = FA5, z = FA6, v = FA7, r = FT0, s1 = FT1, s2 = FT2, ++ s3 = FT3, s4 = FT4, s5 = FT5, s6 = FT6, half = FT7; ++ li(SCR2, dsin_coef); ++ fld_d(s5, SCR2, 32); ++ fld_d(s6, SCR2, 40); ++ fmul_d(z, x, x); // z = x*x; ++ fld_d(s1, SCR2, 0); ++ fld_d(s2, SCR2, 8); ++ fld_d(s3, SCR2, 16); ++ fld_d(s4, SCR2, 24); ++ fmul_d(v, z, x); // v = z*x; ++ ++ block_comment("calculate r = S2+z*(S3+z*(S4+z*(S5+z*S6)))"); { ++ fmadd_d(r, z, s6, s5); ++ // initialize "half" in current block to utilize 2nd FPU. However, it's ++ // not a part of this block ++ vldi(half, -928); // 0.5 (0x3fe0000000000000) ++ fmadd_d(r, z, r, s4); ++ fmadd_d(r, z, r, s3); ++ fmadd_d(r, z, r, s2); ++ } ++ ++ if (!iyIsOne) { ++ // return x+v*(S1+z*r); ++ fmadd_d(s1, z, r, s1); ++ fmadd_d(FA0, v, s1, x); ++ } else { ++ // return x-((z*(half*y-v*r)-y)-v*S1); ++ fmul_d(s6, half, y); // half*y ++ fnmsub_d(s6, v, r, s6); // half*y-v*r ++ fnmsub_d(s6, z, s6, y); // y - z*(half*y-v*r) = - (z*(half*y-v*r)-y) ++ fmadd_d(s6, v, s1, s6); // - (z*(half*y-v*r)-y) + v*S1 == -((z*(half*y-v*r)-y)-v*S1) ++ fadd_d(FA0, x, s6); ++ } ++} ++ ++///* ++// * __kernel_cos( x, y ) ++// * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 ++// * Input x is assumed to be bounded by ~pi/4 in magnitude. ++// * Input y is the tail of x. ++// * ++// * Algorithm ++// * 1. Since cos(-x) = cos(x), we need only to consider positive x. ++// * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. ++// * 3. cos(x) is approximated by a polynomial of degree 14 on ++// * [0,pi/4] ++// * 4 14 ++// * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x ++// * where the remez error is ++// * ++// * | 2 4 6 8 10 12 14 | -58 ++// * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 ++// * | | ++// * ++// * 4 6 8 10 12 14 ++// * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then ++// * cos(x) = 1 - x*x/2 + r ++// * since cos(x+y) ~ cos(x) - sin(x)*y ++// * ~ cos(x) - x*y, ++// * a correction term is necessary in cos(x) and hence ++// * cos(x+y) = 1 - (x*x/2 - (r - x*y)) ++// * For better accuracy when x > 0.3, let qx = |x|/4 with ++// * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125. ++// * Then ++// * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)). ++// * Note that 1-qx and (x*x/2-qx) is EXACT here, and the ++// * magnitude of the latter is at least a quarter of x*x/2, ++// * thus, reducing the rounding error in the subtraction. ++// */ ++// ++//static const double ++//C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */ ++//C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */ ++//C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */ ++//C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */ ++//C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */ ++//C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ ++// ++// NOTE: C1..C6 were moved into a table: StubRoutines::la::_dcos_coef ++// ++// BEGIN __kernel_cos PSEUDO CODE ++// ++//static double __kernel_cos(double x, double y) ++//{ ++// double a,h,z,r,qx=0; ++// ++// // NOTE: ix is already initialized in dsin/dcos. Reuse value from register ++// //int ix; ++// //ix = high(x)&0x7fffffff; /* ix = |x|'s high word*/ ++// ++// // NOTE: moved to dsin/dcos ++// //if(ix<0x3e400000) { /* if x < 2**27 */ ++// // if(((int)x)==0) return one; /* generate inexact */ ++// //} ++// ++// z = x*x; ++// r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6))))); ++// if(ix < 0x3FD33333) /* if |x| < 0.3 */ ++// return one - (0.5*z - (z*r - x*y)); ++// else { ++// if(ix > 0x3fe90000) { /* x > 0.78125 */ ++// qx = 0.28125; ++// } else { ++// set_high(&qx, ix-0x00200000); /* x/4 */ ++// set_low(&qx, 0); ++// } ++// h = 0.5*z-qx; ++// a = one-qx; ++// return a - (h - (z*r-x*y)); ++// } ++//} ++// ++// END __kernel_cos PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic: ++// 1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos ++// 2. Constants are now loaded from table dcos_coef ++// Input and output: ++// 1. Input for generated function: X argument = x ++// 2. Input for generator: x = register to read argument from, dcos_coef ++// = coefficients table address ++// 3. Return cos(x) value in FA0 ++void MacroAssembler::generate_kernel_cos(FloatRegister x, address dcos_coef) { ++ Register ix = A3; ++ FloatRegister qx = FA1, h = FA2, a = FA3, y = FA5, z = FA6, r = FA7, C1 = FT0, ++ C2 = FT1, C3 = FT2, C4 = FT3, C5 = FT4, C6 = FT5, one = FT6, half = FT7; ++ Label IX_IS_LARGE, SET_QX_CONST, DONE, QX_SET; ++ li(SCR2, dcos_coef); ++ fld_d(C1, SCR2, 0); ++ fld_d(C2, SCR2, 8); ++ fld_d(C3, SCR2, 16); ++ fld_d(C4, SCR2, 24); ++ fld_d(C5, SCR2, 32); ++ fld_d(C6, SCR2, 40); ++ fmul_d(z, x, x); // z=x^2 ++ block_comment("calculate r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))))"); { ++ fmadd_d(r, z, C6, C5); ++ vldi(half, -928); // 0.5 (0x3fe0000000000000) ++ fmadd_d(r, z, r, C4); ++ fmul_d(y, x, y); ++ fmadd_d(r, z, r, C3); ++ li(SCR1, 0x3FD33333); ++ fmadd_d(r, z, r, C2); ++ fmul_d(x, z, z); // x = z^2 ++ fmadd_d(r, z, r, C1); // r = C1+z(C2+z(C4+z(C5+z*C6))) ++ } ++ // need to multiply r by z to have "final" r value ++ vldi(one, -912); // 1.0 (0x3ff0000000000000) ++ bge(ix, SCR1, IX_IS_LARGE); ++ block_comment("if(ix < 0x3FD33333) return one - (0.5*z - (z*r - x*y))"); { ++ // return 1.0 - (0.5*z - (z*r - x*y)) = 1.0 - (0.5*z + (x*y - z*r)) ++ fnmsub_d(FA0, x, r, y); ++ fmadd_d(FA0, half, z, FA0); ++ fsub_d(FA0, one, FA0); ++ b(DONE); ++ } ++ block_comment("if(ix >= 0x3FD33333)"); { ++ bind(IX_IS_LARGE); ++ li(SCR2, 0x3FE90000); ++ blt(SCR2, ix, SET_QX_CONST); ++ block_comment("set_high(&qx, ix-0x00200000); set_low(&qx, 0);"); { ++ li(SCR2, 0x00200000); ++ sub_w(SCR2, ix, SCR2); ++ slli_d(SCR2, SCR2, 32); ++ movgr2fr_d(qx, SCR2); ++ } ++ b(QX_SET); ++ bind(SET_QX_CONST); ++ block_comment("if(ix > 0x3fe90000) qx = 0.28125;"); { ++ vldi(qx, -942); // 0.28125 (0x3fd2000000000000) ++ } ++ bind(QX_SET); ++ fmsub_d(C6, x, r, y); // z*r - xy ++ fmsub_d(h, half, z, qx); // h = 0.5*z - qx ++ fsub_d(a, one, qx); // a = 1-qx ++ fsub_d(C6, h, C6); // = h - (z*r - x*y) ++ fsub_d(FA0, a, C6); ++ } ++ bind(DONE); ++} ++ ++// generate_dsin_dcos creates stub for dsin and dcos ++// Generation is done via single call because dsin and dcos code is almost the ++// same(see C code below). These functions work as follows: ++// 1) handle corner cases: |x| ~< pi/4, x is NaN or INF, |x| < 2**-27 ++// 2) perform argument reduction if required ++// 3) call kernel_sin or kernel_cos which approximate sin/cos via polynomial ++// ++// BEGIN dsin/dcos PSEUDO CODE ++// ++//dsin_dcos(jdouble x, bool isCos) { ++// double y[2],z=0.0; ++// int n, ix; ++// ++// /* High word of x. */ ++// ix = high(x); ++// ++// /* |x| ~< pi/4 */ ++// ix &= 0x7fffffff; ++// if(ix <= 0x3fe921fb) return isCos ? __kernel_cos : __kernel_sin(x,z,0); ++// ++// /* sin/cos(Inf or NaN) is NaN */ ++// else if (ix>=0x7ff00000) return x-x; ++// else if (ix<0x3e400000) { /* if ix < 2**27 */ ++// if(((int)x)==0) return isCos ? one : x; /* generate inexact */ ++// } ++// /* argument reduction needed */ ++// else { ++// n = __ieee754_rem_pio2(x,y); ++// switch(n&3) { ++// case 0: return isCos ? __kernel_cos(y[0],y[1]) : __kernel_sin(y[0],y[1], true); ++// case 1: return isCos ? -__kernel_sin(y[0],y[1],true) : __kernel_cos(y[0],y[1]); ++// case 2: return isCos ? -__kernel_cos(y[0],y[1]) : -__kernel_sin(y[0],y[1], true); ++// default: ++// return isCos ? __kernel_sin(y[0],y[1],1) : -__kernel_cos(y[0],y[1]); ++// } ++// } ++//} ++// END dsin/dcos PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic: ++// 1. Moved ix < 2**27 from kernel_sin/kernel_cos into dsin/dcos ++// 2. Final switch use equivalent bit checks(tbz/tbnz) ++// Input ans output: ++// 1. Input for generated function: X = A0 ++// 2. Input for generator: isCos = generate sin or cos, npio2_hw = address ++// of npio2_hw table, two_over_pi = address of two_over_pi table, ++// pio2 = address if pio2 table, dsin_coef = address if dsin_coef table, ++// dcos_coef = address of dcos_coef table ++// 3. Return result in FA0 ++// NOTE: general purpose register names match local variable names in C code ++void MacroAssembler::generate_dsin_dcos(bool isCos, address npio2_hw, ++ address two_over_pi, address pio2, ++ address dsin_coef, address dcos_coef) { ++ Label DONE, ARG_REDUCTION, TINY_X, RETURN_SIN, EARLY_CASE; ++ Register X = A0, absX = A1, n = A2, ix = A3; ++ FloatRegister y0 = FA4, y1 = FA5; ++ ++ block_comment("check |x| ~< pi/4, NaN, Inf and |x| < 2**-27 cases"); { ++ movfr2gr_d(X, FA0); ++ li(SCR2, 0x3e400000); ++ li(SCR1, 0x3fe921fb); // high word of pi/4. ++ bstrpick_d(absX, X, 62, 0); // absX ++ li(T0, 0x7ff0000000000000); ++ srli_d(ix, absX, 32); // set ix ++ blt(ix, SCR2, TINY_X); // handle tiny x (|x| < 2^-27) ++ bge(SCR1, ix, EARLY_CASE); // if(ix <= 0x3fe921fb) return ++ blt(absX, T0, ARG_REDUCTION); ++ // X is NaN or INF(i.e. 0x7FF* or 0xFFF*). Return NaN (mantissa != 0). ++ // Set last bit unconditionally to make it NaN ++ ori(T0, T0, 1); ++ movgr2fr_d(FA0, T0); ++ jr(RA); ++ } ++ block_comment("kernel_sin/kernel_cos: if(ix<0x3e400000) {}"); { ++ bind(TINY_X); ++ if (isCos) { ++ vldi(FA0, -912); // 1.0 (0x3ff0000000000000) ++ } ++ jr(RA); ++ } ++ bind(ARG_REDUCTION); /* argument reduction needed */ ++ block_comment("n = __ieee754_rem_pio2(x,y);"); { ++ generate__ieee754_rem_pio2(npio2_hw, two_over_pi, pio2); ++ } ++ block_comment("switch(n&3) {case ... }"); { ++ if (isCos) { ++ srli_w(T0, n, 1); ++ xorr(absX, n, T0); ++ andi(T0, n, 1); ++ bnez(T0, RETURN_SIN); ++ } else { ++ andi(T0, n, 1); ++ beqz(T0, RETURN_SIN); ++ } ++ generate_kernel_cos(y0, dcos_coef); ++ if (isCos) { ++ andi(T0, absX, 1); ++ beqz(T0, DONE); ++ } else { ++ andi(T0, n, 2); ++ beqz(T0, DONE); ++ } ++ fneg_d(FA0, FA0); ++ jr(RA); ++ bind(RETURN_SIN); ++ generate_kernel_sin(y0, true, dsin_coef); ++ if (isCos) { ++ andi(T0, absX, 1); ++ beqz(T0, DONE); ++ } else { ++ andi(T0, n, 2); ++ beqz(T0, DONE); ++ } ++ fneg_d(FA0, FA0); ++ jr(RA); ++ } ++ bind(EARLY_CASE); ++ vxor_v(y1, y1, y1); ++ if (isCos) { ++ generate_kernel_cos(FA0, dcos_coef); ++ } else { ++ generate_kernel_sin(FA0, false, dsin_coef); ++ } ++ bind(DONE); ++ jr(RA); ++} +diff --git a/src/hotspot/cpu/loongarch/matcher_loongarch.hpp b/src/hotspot/cpu/loongarch/matcher_loongarch.hpp +new file mode 100644 +index 00000000000..21a691b1faa +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/matcher_loongarch.hpp +@@ -0,0 +1,145 @@ ++/* ++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_MATCHER_LOONGARCH_HPP ++#define CPU_LOONGARCH_MATCHER_LOONGARCH_HPP ++ ++ // Defined within class Matcher ++ ++ // false => size gets scaled to BytesPerLong, ok. ++ static const bool init_array_count_is_in_bytes = false; ++ ++ // Whether this platform implements the scalable vector feature ++ static const bool implements_scalable_vector = false; ++ ++ static const bool supports_scalable_vector() { ++ return false; ++ } ++ ++ // LoongArch doesn't support misaligned vectors store/load? FIXME ++ static constexpr bool misaligned_vectors_ok() { ++ return false; ++ } ++ ++ // Whether code generation need accurate ConvI2L types. ++ static const bool convi2l_type_required = true; ++ ++ // Does the CPU require late expand (see block.cpp for description of late expand)? ++ static const bool require_postalloc_expand = false; ++ ++ // Do we need to mask the count passed to shift instructions or does ++ // the cpu only look at the lower 5/6 bits anyway? ++ static const bool need_masked_shift_count = false; ++ ++ // No support for generic vector operands. ++ static const bool supports_generic_vector_operands = false; ++ ++ static constexpr bool isSimpleConstant64(jlong value) { ++ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. ++ // Probably always true, even if a temp register is required. ++ return true; ++ } ++ ++ // No additional cost for CMOVL. ++ static constexpr int long_cmove_cost() { return 0; } ++ ++ // No CMOVF/CMOVD with SSE2 ++ static int float_cmove_cost() { return ConditionalMoveLimit; } ++ ++ static bool narrow_oop_use_complex_address() { ++ assert(UseCompressedOops, "only for compressed oops code"); ++ return false; ++ } ++ ++ static bool narrow_klass_use_complex_address() { ++ assert(UseCompressedClassPointers, "only for compressed klass code"); ++ return false; ++ } ++ ++ static bool const_oop_prefer_decode() { ++ // Prefer ConN+DecodeN over ConP. ++ return true; ++ } ++ ++ static bool const_klass_prefer_decode() { ++ // TODO: Either support matching DecodeNKlass (heap-based) in operand ++ // or condisider the following: ++ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. ++ //return CompressedKlassPointers::base() == NULL; ++ return true; ++ } ++ ++ // Is it better to copy float constants, or load them directly from memory? ++ // Intel can load a float constant from a direct address, requiring no ++ // extra registers. Most RISCs will have to materialize an address into a ++ // register first, so they would do better to copy the constant from stack. ++ static const bool rematerialize_float_constants = false; ++ ++ // If CPU can load and store mis-aligned doubles directly then no fixup is ++ // needed. Else we split the double into 2 integer pieces and move it ++ // piece-by-piece. Only happens when passing doubles into C code as the ++ // Java calling convention forces doubles to be aligned. ++ static const bool misaligned_doubles_ok = false; ++ ++ // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. ++ static const bool strict_fp_requires_explicit_rounding = false; ++ ++ // Are floats converted to double when stored to stack during ++ // deoptimization? ++ static constexpr bool float_in_double() { return false; } ++ ++ // Do ints take an entire long register or just half? ++ static const bool int_in_long = true; ++ ++ // Does the CPU supports vector variable shift instructions? ++ static constexpr bool supports_vector_variable_shifts(void) { ++ return true; ++ } ++ ++ // Does the CPU supports vector variable rotate instructions? ++ static constexpr bool supports_vector_variable_rotates(void) { ++ return true; ++ } ++ ++ // Does the CPU supports vector unsigned comparison instructions? ++ static constexpr bool supports_vector_comparison_unsigned(int vlen, BasicType bt) { ++ return true; ++ } ++ ++ // Some microarchitectures have mask registers used on vectors ++ static const bool has_predicated_vectors(void) { ++ return false; ++ } ++ ++ // true means we have fast l2f convers ++ // false means that conversion is done by runtime call ++ static constexpr bool convL2FSupported(void) { ++ return true; ++ } ++ ++ // Implements a variant of EncodeISOArrayNode that encode ASCII only ++ static const bool supports_encode_ascii_array = true; ++ ++#endif // CPU_LOONGARCH_MATCHER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp b/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp +new file mode 100644 +index 00000000000..31b3040c3a5 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp +@@ -0,0 +1,585 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "classfile/javaClasses.inline.hpp" ++#include "classfile/vmClasses.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/preserveException.hpp" ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T8 RT8 ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) // nothing ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#define STOP(error) block_comment(error); __ stop(error) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { ++ if (VerifyMethodHandles) ++ verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class), ++ "MH argument is a Class"); ++ __ ld_d(klass_reg, Address(klass_reg, java_lang_Class::klass_offset())); ++} ++ ++#ifdef ASSERT ++static int check_nonzero(const char* xname, int x) { ++ assert(x != 0, "%s should be nonzero", xname); ++ return x; ++} ++#define NONZERO(x) check_nonzero(#x, x) ++#else //ASSERT ++#define NONZERO(x) (x) ++#endif //ASSERT ++ ++#ifdef ASSERT ++void MethodHandles::verify_klass(MacroAssembler* _masm, ++ Register obj_reg, vmClassID klass_id, ++ const char* error_message) { ++} ++ ++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { ++ Label L; ++ BLOCK_COMMENT("verify_ref_kind {"); ++ __ ld_w(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset()))); ++ __ srai_w(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT); ++ __ li(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK); ++ __ andr(temp, temp, AT); ++ __ li(AT, ref_kind); ++ __ beq(temp, AT, L); ++ { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal); ++ jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind); ++ if (ref_kind == JVM_REF_invokeVirtual || ++ ref_kind == JVM_REF_invokeSpecial) ++ // could do this for all ref_kinds, but would explode assembly code size ++ trace_method_handle(_masm, buf); ++ __ STOP(buf); ++ } ++ BLOCK_COMMENT("} verify_ref_kind"); ++ __ bind(L); ++} ++ ++#endif //ASSERT ++ ++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry) { ++ assert(method == Rmethod, "interpreter calling convention"); ++ ++ Label L_no_such_method; ++ __ beq(method, R0, L_no_such_method); ++ ++ __ verify_method_ptr(method); ++ ++ if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++ Register rthread = TREG; ++ // interp_only is an int, on little endian it is sufficient to test the byte only ++ // Is a cmpl faster? ++ __ ld_bu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset())); ++ __ beq(AT, R0, run_compiled_code); ++ __ ld_d(T4, method, in_bytes(Method::interpreter_entry_offset())); ++ __ jr(T4); ++ __ BIND(run_compiled_code); ++ } ++ ++ const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : ++ Method::from_interpreted_offset(); ++ __ ld_d(T4, method, in_bytes(entry_offset)); ++ __ jr(T4); ++ ++ __ bind(L_no_such_method); ++ address wrong_method = StubRoutines::throw_AbstractMethodError_entry(); ++ __ jmp(wrong_method, relocInfo::runtime_call_type); ++} ++ ++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry) { ++ BLOCK_COMMENT("jump_to_lambda_form {"); ++ // This is the initial entry point of a lazy method handle. ++ // After type checking, it picks up the invoker from the LambdaForm. ++ assert_different_registers(recv, method_temp, temp2); ++ assert(recv != noreg, "required register"); ++ assert(method_temp == Rmethod, "required register for loading method"); ++ ++ //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); }); ++ ++ // Load the invoker, as MH -> MH.form -> LF.vmentry ++ __ verify_oop(recv); ++ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset())), temp2); ++ __ verify_oop(method_temp); ++ __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), noreg, noreg); ++ ++ if (VerifyMethodHandles && !for_compiler_entry) { ++ // make sure recv is already on stack ++ __ ld_d(temp2, Address(method_temp, Method::const_offset())); ++ __ load_sized_value(temp2, ++ Address(temp2, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), false); ++ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); ++ Label L; ++ Address recv_addr = __ argument_address(temp2, -1); ++ __ ld_d(AT, recv_addr); ++ __ beq(recv, AT, L); ++ ++ recv_addr = __ argument_address(temp2, -1); ++ __ ld_d(V0, recv_addr); ++ __ STOP("receiver not on stack"); ++ __ BIND(L); ++ } ++ ++ jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); ++ BLOCK_COMMENT("} jump_to_lambda_form"); ++} ++ ++ ++// Code generation ++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, ++ vmIntrinsics::ID iid) { ++ const bool not_for_compiler_entry = false; // this is the interpreter entry ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ if (iid == vmIntrinsics::_invokeGeneric || ++ iid == vmIntrinsics::_compiledLambdaForm) { ++ // Perhaps surprisingly, the symbolic references visible to Java are not directly used. ++ // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. ++ // They all allow an appendix argument. ++ __ stop("empty stubs make SG sick"); ++ return NULL; ++ } ++ ++ // No need in interpreter entry for linkToNative for now. ++ // Interpreter calls compiled entry through i2c. ++ if (iid == vmIntrinsics::_linkToNative) { ++ __ stop("Should not reach here"); // empty stubs make SG sick ++ return NULL; ++ } ++ ++ // Rmethod: Method* ++ // T4: argument locator (parameter slot count, added to sp) ++ // S7: used as temp to hold mh or receiver ++ Register t4_argp = T4; // argument list ptr, live on error paths ++ Register s7_mh = S7; // MH receiver; dies quickly and is recycled ++ Register rm_method = Rmethod; // eventual target of this invocation ++ ++ // here's where control starts out: ++ __ align(CodeEntryAlignment); ++ address entry_point = __ pc(); ++ ++ if (VerifyMethodHandles) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++ ++ Label L; ++ BLOCK_COMMENT("verify_intrinsic_id {"); ++ __ ld_hu(AT, rm_method, Method::intrinsic_id_offset_in_bytes()); ++ guarantee(Assembler::is_simm(vmIntrinsics::as_int(iid), 12), "Oops, iid is not simm12! Change the instructions."); ++ __ addi_d(AT, AT, -1 * (int) iid); ++ __ beq(AT, R0, L); ++ if (iid == vmIntrinsics::_linkToVirtual || ++ iid == vmIntrinsics::_linkToSpecial) { ++ // could do this for all kinds, but would explode assembly code size ++ trace_method_handle(_masm, "bad Method*::intrinsic_id"); ++ } ++ __ STOP("bad Method*::intrinsic_id"); ++ __ bind(L); ++ BLOCK_COMMENT("} verify_intrinsic_id"); ++ } ++ ++ // First task: Find out how big the argument list is. ++ Address t4_first_arg_addr; ++ int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); ++ assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); ++ if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ __ ld_d(t4_argp, Address(rm_method, Method::const_offset())); ++ __ load_sized_value(t4_argp, ++ Address(t4_argp, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), false); ++ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); ++ t4_first_arg_addr = __ argument_address(t4_argp, -1); ++ } else { ++ DEBUG_ONLY(t4_argp = noreg); ++ } ++ ++ if (!is_signature_polymorphic_static(iid)) { ++ __ ld_d(s7_mh, t4_first_arg_addr); ++ DEBUG_ONLY(t4_argp = noreg); ++ } ++ ++ // t4_first_arg_addr is live! ++ ++ trace_method_handle_interpreter_entry(_masm, iid); ++ ++ if (iid == vmIntrinsics::_invokeBasic) { ++ generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry); ++ ++ } else { ++ // Adjust argument list by popping the trailing MemberName argument. ++ Register r_recv = noreg; ++ if (MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. ++ __ ld_d(r_recv = T2, t4_first_arg_addr); ++ } ++ DEBUG_ONLY(t4_argp = noreg); ++ Register rm_member = rm_method; // MemberName ptr; incoming method ptr is dead now ++ __ pop(rm_member); // extract last argument ++ generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry); ++ } ++ ++ return entry_point; ++} ++ ++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, ++ vmIntrinsics::ID iid, ++ Register receiver_reg, ++ Register member_reg, ++ bool for_compiler_entry) { ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ Register rm_method = Rmethod; // eventual target of this invocation ++ // temps used in this code are not used in *either* compiled or interpreted calling sequences ++ Register j_rarg0 = T0; ++ Register j_rarg1 = A0; ++ Register j_rarg2 = A1; ++ Register j_rarg3 = A2; ++ Register j_rarg4 = A3; ++ Register j_rarg5 = A4; ++ ++ Register temp1 = T8; ++ Register temp2 = T4; ++ Register temp3 = T5; ++ if (for_compiler_entry) { ++ assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); ++ assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ } ++ else { ++ assert_different_registers(temp1, temp2, temp3, saved_last_sp_register()); // don't trash lastSP ++ } ++ assert_different_registers(temp1, temp2, temp3, receiver_reg); ++ assert_different_registers(temp1, temp2, temp3, member_reg); ++ ++ if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { ++ if (iid == vmIntrinsics::_linkToNative) { ++ assert(for_compiler_entry, "only compiler entry is supported"); ++ } ++ // indirect through MH.form.vmentry.vmtarget ++ jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry); ++ ++ } else { ++ // The method is a member invoker used by direct method handles. ++ if (VerifyMethodHandles) { ++ // make sure the trailing argument really is a MemberName (caller responsibility) ++ verify_klass(_masm, member_reg, VM_CLASS_ID(java_lang_invoke_MemberName), ++ "MemberName required for invokeVirtual etc."); ++ } ++ ++ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset())); ++ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset())); ++ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset())); ++ Address vmtarget_method( rm_method, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())); ++ ++ Register temp1_recv_klass = temp1; ++ if (iid != vmIntrinsics::_linkToStatic) { ++ __ verify_oop(receiver_reg); ++ if (iid == vmIntrinsics::_linkToSpecial) { ++ // Don't actually load the klass; just null-check the receiver. ++ __ null_check(receiver_reg); ++ } else { ++ // load receiver klass itself ++ __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ BLOCK_COMMENT("check_receiver {"); ++ // The receiver for the MemberName must be in receiver_reg. ++ // Check the receiver against the MemberName.clazz ++ if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { ++ // Did not load it above... ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { ++ Label L_ok; ++ Register temp2_defc = temp2; ++ __ load_heap_oop(temp2_defc, member_clazz, temp3); ++ load_klass_from_Class(_masm, temp2_defc); ++ __ verify_klass_ptr(temp2_defc); ++ __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); ++ // If we get here, the type check failed! ++ __ STOP("receiver class disagrees with MemberName.clazz"); ++ __ bind(L_ok); ++ } ++ BLOCK_COMMENT("} check_receiver"); ++ } ++ if (iid == vmIntrinsics::_linkToSpecial || ++ iid == vmIntrinsics::_linkToStatic) { ++ DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass ++ } ++ ++ // Live registers at this point: ++ // member_reg - MemberName that was the trailing argument ++ // temp1_recv_klass - klass of stacked receiver, if needed ++ ++ Label L_incompatible_class_change_error; ++ switch (iid) { ++ case vmIntrinsics::_linkToSpecial: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); ++ } ++ __ load_heap_oop(rm_method, member_vmtarget, temp3); ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg); ++ break; ++ ++ case vmIntrinsics::_linkToStatic: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); ++ } ++ __ load_heap_oop(rm_method, member_vmtarget, temp3); ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg); ++ break; ++ ++ case vmIntrinsics::_linkToVirtual: ++ { ++ // same as TemplateTable::invokevirtual, ++ // minus the CP setup and profiling: ++ ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); ++ } ++ ++ // pick out the vtable index from the MemberName, and then we can discard it: ++ Register temp2_index = temp2; ++ __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg); ++ if (VerifyMethodHandles) { ++ Label L_index_ok; ++ __ blt(R0, temp2_index, L_index_ok); ++ __ STOP("no virtual index"); ++ __ BIND(L_index_ok); ++ } ++ ++ // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget ++ // at this point. And VerifyMethodHandles has already checked clazz, if needed. ++ ++ // get target Method* & entry point ++ __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method); ++ break; ++ } ++ ++ case vmIntrinsics::_linkToInterface: ++ { ++ // same as TemplateTable::invokeinterface ++ // (minus the CP setup and profiling, with different argument motion) ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); ++ } ++ ++ Register temp3_intf = temp3; ++ __ load_heap_oop(temp3_intf, member_clazz, temp2); ++ load_klass_from_Class(_masm, temp3_intf); ++ __ verify_klass_ptr(temp3_intf); ++ ++ Register rm_index = rm_method; ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_index, member_vmindex, noreg, noreg); ++ if (VerifyMethodHandles) { ++ Label L; ++ __ bge(rm_index, R0, L); ++ __ STOP("invalid vtable index for MH.invokeInterface"); ++ __ bind(L); ++ } ++ ++ // given intf, index, and recv klass, dispatch to the implementation method ++ __ lookup_interface_method(temp1_recv_klass, temp3_intf, ++ // note: next two args must be the same: ++ rm_index, rm_method, ++ temp2, ++ L_incompatible_class_change_error); ++ break; ++ } ++ ++ default: ++ fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid)); ++ break; ++ } ++ ++ // Live at this point: ++ // rm_method ++ ++ // After figuring out which concrete method to call, jump into it. ++ // Note that this works in the interpreter with no data motion. ++ // But the compiled version will require that r_recv be shifted out. ++ __ verify_method_ptr(rm_method); ++ jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry); ++ ++ if (iid == vmIntrinsics::_linkToInterface) { ++ __ bind(L_incompatible_class_change_error); ++ address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry(); ++ __ jmp(icce_entry, relocInfo::runtime_call_type); ++ } ++ } ++} ++ ++#ifndef PRODUCT ++void trace_method_handle_stub(const char* adaptername, ++ oop mh, ++ intptr_t* saved_regs, ++ intptr_t* entry_sp) { ++ // called as a leaf from native code: do not block the JVM! ++ bool has_mh = (strstr(adaptername, "/static") == NULL && ++ strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH ++ const char* mh_reg_name = has_mh ? "s7_mh" : "s7"; ++ tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT, ++ adaptername, mh_reg_name, ++ p2i(mh), p2i(entry_sp)); ++ ++ if (Verbose) { ++ tty->print_cr("Registers:"); ++ const int saved_regs_count = RegisterImpl::number_of_registers; ++ for (int i = 0; i < saved_regs_count; i++) { ++ Register r = as_Register(i); ++ // The registers are stored in reverse order on the stack (by pusha). ++ tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]); ++ if ((i + 1) % 4 == 0) { ++ tty->cr(); ++ } else { ++ tty->print(", "); ++ } ++ } ++ tty->cr(); ++ ++ { ++ // dumping last frame with frame::describe ++ ++ JavaThread* p = JavaThread::active(); ++ ++ ResourceMark rm; ++ // may not be needed by safer and unexpensive here ++ PreserveExceptionMark pem(Thread::current()); ++ FrameValues values; ++ ++ // Note: We want to allow trace_method_handle from any call site. ++ // While trace_method_handle creates a frame, it may be entered ++ // without a PC on the stack top (e.g. not just after a call). ++ // Walking that frame could lead to failures due to that invalid PC. ++ // => carefully detect that frame when doing the stack walking ++ ++ // Current C frame ++ frame cur_frame = os::current_frame(); ++ ++ // Robust search of trace_calling_frame (independant of inlining). ++ // Assumes saved_regs comes from a pusha in the trace_calling_frame. ++ assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?"); ++ frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame); ++ while (trace_calling_frame.fp() < saved_regs) { ++ trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame); ++ } ++ ++ // safely create a frame and call frame::describe ++ intptr_t *dump_sp = trace_calling_frame.sender_sp(); ++ intptr_t *dump_fp = trace_calling_frame.link(); ++ ++ bool walkable = has_mh; // whether the traced frame shoud be walkable ++ ++ if (walkable) { ++ // The previous definition of walkable may have to be refined ++ // if new call sites cause the next frame constructor to start ++ // failing. Alternatively, frame constructors could be ++ // modified to support the current or future non walkable ++ // frames (but this is more intrusive and is not considered as ++ // part of this RFE, which will instead use a simpler output). ++ frame dump_frame = frame(dump_sp, dump_fp); ++ dump_frame.describe(values, 1); ++ } else { ++ // Stack may not be walkable (invalid PC above FP): ++ // Add descriptions without building a Java frame to avoid issues ++ values.describe(-1, dump_fp, "fp for #1 "); ++ values.describe(-1, dump_sp, "sp for #1"); ++ } ++ values.describe(-1, entry_sp, "raw top of stack"); ++ ++ tty->print_cr("Stack layout:"); ++ values.print(p); ++ } ++ if (has_mh && oopDesc::is_oop(mh)) { ++ mh->print(); ++ if (java_lang_invoke_MethodHandle::is_instance(mh)) { ++ java_lang_invoke_MethodHandle::form(mh)->print(); ++ } ++ } ++ } ++} ++ ++// The stub wraps the arguments in a struct on the stack to avoid ++// dealing with the different calling conventions for passing 6 ++// arguments. ++struct MethodHandleStubArguments { ++ const char* adaptername; ++ oopDesc* mh; ++ intptr_t* saved_regs; ++ intptr_t* entry_sp; ++}; ++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { ++ trace_method_handle_stub(args->adaptername, ++ args->mh, ++ args->saved_regs, ++ args->entry_sp); ++} ++ ++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { ++} ++#endif //PRODUCT +diff --git a/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp b/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp +new file mode 100644 +index 00000000000..a97520ea768 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/methodHandles_loongarch.hpp +@@ -0,0 +1,62 @@ ++/* ++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// Platform-specific definitions for method handles. ++// These definitions are inlined into class MethodHandles. ++ ++// Adapters ++enum /* platform_dependent_constants */ { ++ adapter_code_size = 32000 DEBUG_ONLY(+ 150000) ++}; ++ ++// Additional helper methods for MethodHandles code generation: ++public: ++ static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); ++ ++ static void verify_klass(MacroAssembler* _masm, ++ Register obj, vmClassID klass_id, ++ const char* error_message = "wrong klass") NOT_DEBUG_RETURN; ++ ++ static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { ++ verify_klass(_masm, mh_reg, VM_CLASS_ID(MethodHandle_klass), ++ "reference is a MH"); ++ } ++ ++ static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; ++ ++ // Similar to InterpreterMacroAssembler::jump_from_interpreted. ++ // Takes care of special dispatch from single stepping too. ++ static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry); ++ ++ static void jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry); ++ ++ static Register saved_last_sp_register() { ++ // Should be in sharedRuntime, not here. ++ return R3; ++ } +diff --git a/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp +new file mode 100644 +index 00000000000..25ef0ecd224 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp +@@ -0,0 +1,529 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/codeCache.hpp" ++#include "code/compiledIC.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.hpp" ++#include "runtime/safepoint.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/ostream.hpp" ++ ++#ifndef PRODUCT ++#include "compiler/disassembler.hpp" ++#endif ++ ++#include ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++void NativeInstruction::wrote(int offset) { ++ ICache::invalidate_word(addr_at(offset)); ++} ++ ++void NativeInstruction::set_long_at(int offset, long i) { ++ address addr = addr_at(offset); ++ *(long*)addr = i; ++ ICache::invalidate_range(addr, 8); ++} ++ ++bool NativeInstruction::is_int_branch() { ++ int op = Assembler::high(insn_word(), 6); ++ return op == Assembler::beqz_op || op == Assembler::bnez_op || ++ op == Assembler::beq_op || op == Assembler::bne_op || ++ op == Assembler::blt_op || op == Assembler::bge_op || ++ op == Assembler::bltu_op || op == Assembler::bgeu_op; ++} ++ ++bool NativeInstruction::is_float_branch() { ++ return Assembler::high(insn_word(), 6) == Assembler::bccondz_op; ++} ++ ++bool NativeInstruction::is_lu12iw_lu32id() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 7) == Assembler::lu32i_d_op; ++} ++ ++bool NativeInstruction::is_pcaddu12i_add() const { ++ return Assembler::high(int_at(0), 7) == Assembler::pcaddu12i_op && ++ Assembler::high(int_at(4), 10) == Assembler::addi_d_op; ++} ++ ++bool NativeCall::is_bl() const { ++ return Assembler::high(int_at(0), 6) == Assembler::bl_op; ++} ++ ++void NativeCall::verify() { ++ assert(is_bl(), "not a NativeCall"); ++} ++ ++address NativeCall::target_addr_for_bl(address orig_addr) const { ++ address addr = orig_addr ? orig_addr : addr_at(0); ++ ++ // bl ++ if (is_bl()) { ++ return addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) | ++ ((int_at(0) >> 10) & 0xffff)) << 2); ++ } ++ ++ fatal("not a NativeCall"); ++ return NULL; ++} ++ ++address NativeCall::destination() const { ++ address addr = (address)this; ++ address destination = target_addr_for_bl(); ++ // Do we use a trampoline stub for this call? ++ // Trampoline stubs are located behind the main code. ++ if (destination > addr) { ++ // Filter out recursive method invocation (call to verified/unverified entry point). ++ CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. ++ assert(cb && cb->is_nmethod(), "sanity"); ++ nmethod *nm = (nmethod *)cb; ++ NativeInstruction* ni = nativeInstruction_at(destination); ++ if (nm->stub_contains(destination) && ni->is_NativeCallTrampolineStub_at()) { ++ // Yes we do, so get the destination from the trampoline stub. ++ const address trampoline_stub_addr = destination; ++ destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); ++ } ++ } ++ return destination; ++} ++ ++// Similar to replace_mt_safe, but just changes the destination. The ++// important thing is that free-running threads are able to execute this ++// call instruction at all times. ++// ++// Used in the runtime linkage of calls; see class CompiledIC. ++// ++// Add parameter assert_lock to switch off assertion ++// during code generation, where no patching lock is needed. ++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { ++ assert(!assert_lock || ++ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) || ++ CompiledICLocker::is_safe(addr_at(0)), ++ "concurrent code patching"); ++ ++ ResourceMark rm; ++ address addr_call = addr_at(0); ++ bool reachable = MacroAssembler::reachable_from_branch_short(dest - addr_call); ++ assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); ++ ++ // Patch the call. ++ if (!reachable) { ++ address trampoline_stub_addr = get_trampoline(); ++ assert (trampoline_stub_addr != NULL, "we need a trampoline"); ++ guarantee(Assembler::is_simm((trampoline_stub_addr - addr_call) >> 2, 26), "cannot reach trampoline stub"); ++ ++ // Patch the constant in the call's trampoline stub. ++ NativeInstruction* ni = nativeInstruction_at(dest); ++ assert (! ni->is_NativeCallTrampolineStub_at(), "chained trampolines"); ++ nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); ++ dest = trampoline_stub_addr; ++ } ++ set_destination(dest); ++} ++ ++address NativeCall::get_trampoline() { ++ address call_addr = addr_at(0); ++ ++ CodeBlob *code = CodeCache::find_blob(call_addr); ++ assert(code != NULL, "Could not find the containing code blob"); ++ ++ address bl_destination ++ = nativeCall_at(call_addr)->target_addr_for_bl(); ++ NativeInstruction* ni = nativeInstruction_at(bl_destination); ++ if (code->contains(bl_destination) && ++ ni->is_NativeCallTrampolineStub_at()) ++ return bl_destination; ++ ++ if (code->is_nmethod()) { ++ return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); ++ } ++ ++ return NULL; ++} ++ ++void NativeCall::set_destination(address dest) { ++ address addr_call = addr_at(0); ++ CodeBuffer cb(addr_call, instruction_size); ++ MacroAssembler masm(&cb); ++ assert(is_call_at(addr_call), "unexpected call type"); ++ jlong offs = dest - addr_call; ++ masm.bl(offs >> 2); ++ ICache::invalidate_range(addr_call, instruction_size); ++} ++ ++// Generate a trampoline for a branch to dest. If there's no need for a ++// trampoline, simply patch the call directly to dest. ++address NativeCall::trampoline_jump(CodeBuffer &cbuf, address dest) { ++ MacroAssembler a(&cbuf); ++ address stub = NULL; ++ ++ if (a.far_branches() ++ && ! is_NativeCallTrampolineStub_at()) { ++ stub = a.emit_trampoline_stub(instruction_address() - cbuf.insts()->start(), dest); ++ } ++ ++ if (stub == NULL) { ++ // If we generated no stub, patch this call directly to dest. ++ // This will happen if we don't need far branches or if there ++ // already was a trampoline. ++ set_destination(dest); ++ } ++ ++ return stub; ++} ++ ++void NativeCall::print() { ++ tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT, ++ p2i(instruction_address()), p2i(destination())); ++} ++ ++// Inserts a native call instruction at a given pc ++void NativeCall::insert(address code_pos, address entry) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++// MT-safe patching of a call instruction. ++// First patches first word of instruction to two jmp's that jmps to them ++// selfs (spinlock). Then patches the last byte, and then atomicly replaces ++// the jmp's with the first 4 byte of the new instruction. ++void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) { ++ Unimplemented(); ++} ++ ++bool NativeFarCall::is_short() const { ++ return Assembler::high(int_at(0), 10) == Assembler::andi_op && ++ Assembler::low(int_at(0), 22) == 0 && ++ Assembler::high(int_at(4), 6) == Assembler::bl_op; ++} ++ ++bool NativeFarCall::is_far() const { ++ return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op && ++ Assembler::high(int_at(4), 6) == Assembler::jirl_op && ++ Assembler::low(int_at(4), 5) == RA->encoding(); ++} ++ ++address NativeFarCall::destination(address orig_addr) const { ++ address addr = orig_addr ? orig_addr : addr_at(0); ++ ++ if (is_short()) { ++ // short ++ return addr + BytesPerInstWord + ++ (Assembler::simm26(((int_at(4) & 0x3ff) << 16) | ++ ((int_at(4) >> 10) & 0xffff)) << 2); ++ } ++ ++ if (is_far()) { ++ // far ++ return addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) + ++ (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2); ++ } ++ ++ fatal("not a NativeFarCall"); ++ return NULL; ++} ++ ++void NativeFarCall::set_destination(address dest) { ++ address addr_call = addr_at(0); ++ CodeBuffer cb(addr_call, instruction_size); ++ MacroAssembler masm(&cb); ++ assert(is_far_call_at(addr_call), "unexpected call type"); ++ masm.patchable_call(dest, addr_call); ++ ICache::invalidate_range(addr_call, instruction_size); ++} ++ ++void NativeFarCall::verify() { ++ assert(is_short() || is_far(), "not a NativeFarcall"); ++} ++ ++//------------------------------------------------------------------- ++ ++bool NativeMovConstReg::is_lu12iw_ori_lu32id() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 10) == Assembler::ori_op && ++ Assembler::high(int_at(8), 7) == Assembler::lu32i_d_op; ++} ++ ++bool NativeMovConstReg::is_lu12iw_lu32id_nop() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 7) == Assembler::lu32i_d_op && ++ Assembler::high(int_at(8), 10) == Assembler::andi_op; ++} ++ ++bool NativeMovConstReg::is_lu12iw_2nop() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 10) == Assembler::andi_op && ++ Assembler::high(int_at(8), 10) == Assembler::andi_op; ++} ++ ++bool NativeMovConstReg::is_lu12iw_ori_nop() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 10) == Assembler::ori_op && ++ Assembler::high(int_at(8), 10) == Assembler::andi_op; ++} ++ ++bool NativeMovConstReg::is_ori_2nop() const { ++ return Assembler::high(int_at(0), 10) == Assembler::ori_op && ++ Assembler::high(int_at(4), 10) == Assembler::andi_op && ++ Assembler::high(int_at(8), 10) == Assembler::andi_op; ++} ++ ++bool NativeMovConstReg::is_addid_2nop() const { ++ return Assembler::high(int_at(0), 10) == Assembler::addi_d_op && ++ Assembler::high(int_at(4), 10) == Assembler::andi_op && ++ Assembler::high(int_at(8), 10) == Assembler::andi_op; ++} ++ ++void NativeMovConstReg::verify() { ++ assert(is_li52(), "not a mov reg, imm52"); ++} ++ ++void NativeMovConstReg::print() { ++ tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, ++ p2i(instruction_address()), data()); ++} ++ ++intptr_t NativeMovConstReg::data() const { ++ if (is_lu12iw_ori_lu32id()) { ++ return Assembler::merge((intptr_t)((int_at(4) >> 10) & 0xfff), ++ (intptr_t)((int_at(0) >> 5) & 0xfffff), ++ (intptr_t)((int_at(8) >> 5) & 0xfffff)); ++ } ++ ++ if (is_lu12iw_lu32id_nop()) { ++ return Assembler::merge((intptr_t)0, ++ (intptr_t)((int_at(0) >> 5) & 0xfffff), ++ (intptr_t)((int_at(4) >> 5) & 0xfffff)); ++ } ++ ++ if (is_lu12iw_2nop()) { ++ return Assembler::merge((intptr_t)0, ++ (intptr_t)((int_at(0) >> 5) & 0xfffff)); ++ } ++ ++ if (is_lu12iw_ori_nop()) { ++ return Assembler::merge((intptr_t)((int_at(4) >> 10) & 0xfff), ++ (intptr_t)((int_at(0) >> 5) & 0xfffff)); ++ } ++ ++ if (is_ori_2nop()) { ++ return (int_at(0) >> 10) & 0xfff; ++ } ++ ++ if (is_addid_2nop()) { ++ return Assembler::simm12((int_at(0) >> 10) & 0xfff); ++ } ++ ++#ifndef PRODUCT ++ Disassembler::decode(addr_at(0), addr_at(0) + 16, tty); ++#endif ++ fatal("not a mov reg, imm52"); ++ return 0; // unreachable ++} ++ ++void NativeMovConstReg::set_data(intptr_t x, intptr_t o) { ++ CodeBuffer cb(addr_at(0), instruction_size); ++ MacroAssembler masm(&cb); ++ masm.patchable_li52(as_Register(int_at(0) & 0x1f), x); ++ ICache::invalidate_range(addr_at(0), instruction_size); ++ ++ // Find and replace the oop/metadata corresponding to this ++ // instruction in oops section. ++ CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address()); ++ nmethod* nm = blob->as_nmethod_or_null(); ++ if (nm != NULL) { ++ o = o ? o : x; ++ RelocIterator iter(nm, instruction_address(), next_instruction_address()); ++ while (iter.next()) { ++ if (iter.type() == relocInfo::oop_type) { ++ oop* oop_addr = iter.oop_reloc()->oop_addr(); ++ *oop_addr = cast_to_oop(o); ++ break; ++ } else if (iter.type() == relocInfo::metadata_type) { ++ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); ++ *metadata_addr = (Metadata*)o; ++ break; ++ } ++ } ++ } ++} ++ ++//------------------------------------------------------------------- ++ ++int NativeMovRegMem::offset() const{ ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++ return 0; // mute compiler ++} ++ ++void NativeMovRegMem::set_offset(int x) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++void NativeMovRegMem::verify() { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++ ++void NativeMovRegMem::print() { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++bool NativeInstruction::is_sigill_zombie_not_entrant() { ++ return uint_at(0) == NativeIllegalInstruction::instruction_code; ++} ++ ++bool NativeInstruction::is_stop() { ++ return uint_at(0) == 0x04000000; // csrrd R0 0 ++} ++ ++void NativeIllegalInstruction::insert(address code_pos) { ++ *(juint*)code_pos = instruction_code; ++ ICache::invalidate_range(code_pos, instruction_size); ++} ++ ++void NativeJump::verify() { ++ assert(is_short() || is_far(), "not a general jump instruction"); ++} ++ ++bool NativeJump::is_short() { ++ return Assembler::high(insn_word(), 6) == Assembler::b_op; ++} ++ ++bool NativeJump::is_far() { ++ return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op && ++ Assembler::high(int_at(4), 6) == Assembler::jirl_op && ++ Assembler::low(int_at(4), 5) == R0->encoding(); ++} ++ ++address NativeJump::jump_destination(address orig_addr) { ++ address addr = orig_addr ? orig_addr : addr_at(0); ++ address ret = (address)-1; ++ ++ // short ++ if (is_short()) { ++ ret = addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) | ++ ((int_at(0) >> 10) & 0xffff)) << 2); ++ return ret == instruction_address() ? (address)-1 : ret; ++ } ++ ++ // far ++ if (is_far()) { ++ ret = addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) + ++ (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2); ++ return ret == instruction_address() ? (address)-1 : ret; ++ } ++ ++ fatal("not a jump"); ++ return NULL; ++} ++ ++void NativeJump::set_jump_destination(address dest) { ++ OrderAccess::fence(); ++ ++ CodeBuffer cb(addr_at(0), instruction_size); ++ MacroAssembler masm(&cb); ++ masm.patchable_jump(dest); ++ ICache::invalidate_range(addr_at(0), instruction_size); ++} ++ ++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++// MT-safe patching of a long jump instruction. ++// First patches first word of instruction to two jmp's that jmps to them ++// selfs (spinlock). Then patches the last byte, and then atomicly replaces ++// the jmp's with the first 4 byte of the new instruction. ++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++// Must ensure atomicity ++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { ++ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); ++ jlong offs = dest - verified_entry; ++ ++ if (MacroAssembler::reachable_from_branch_short(offs)) { ++ CodeBuffer cb(verified_entry, 1 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.b(dest); ++ } else { ++ // We use an illegal instruction for marking a method as ++ // not_entrant or zombie ++ NativeIllegalInstruction::insert(verified_entry); ++ } ++ ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord); ++} ++ ++bool NativeInstruction::is_safepoint_poll() { ++ // ++ // 390 li T2, 0x0000000000400000 #@loadConP ++ // 394 st_w [SP + #12], V1 # spill 9 ++ // 398 Safepoint @ [T2] : poll for GC @ safePoint_poll # spec.benchmarks.compress.Decompressor::decompress @ bci:224 L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1 ++ // ++ // 0x000000ffe5815130: lu12i_w t2, 0x40 ++ // 0x000000ffe5815134: st_w v1, 0xc(sp) ; OopMap{a6=Oop off=920} ++ // ;*goto ++ // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) ++ // ++ // 0x000000ffe5815138: ld_w at, 0x0(t2) ;*goto <--- PC ++ // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) ++ // ++ ++ // Since there may be some spill instructions between the safePoint_poll and loadConP, ++ // we check the safepoint instruction like this. ++ return Assembler::high(insn_word(), 10) == Assembler::ld_w_op && ++ Assembler::low(insn_word(), 5) == AT->encoding(); ++} +diff --git a/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp +new file mode 100644 +index 00000000000..0ec8ebddf09 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp +@@ -0,0 +1,531 @@ ++/* ++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP ++#define CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP ++ ++#include "asm/assembler.hpp" ++#include "asm/macroAssembler.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/os.hpp" ++#include "runtime/safepointMechanism.hpp" ++ ++// We have interfaces for the following instructions: ++// - NativeInstruction ++// - - NativeCall ++// - - NativeMovConstReg ++// - - NativeMovConstRegPatching ++// - - NativeMovRegMem ++// - - NativeMovRegMemPatching ++// - - NativeIllegalOpCode ++// - - NativeGeneralJump ++// - - NativePushConst ++// - - NativeTstRegMem ++ ++// The base class for different kinds of native instruction abstractions. ++// Provides the primitive operations to manipulate code relative to this. ++ ++class NativeInstruction { ++ friend class Relocation; ++ ++ public: ++ enum loongarch_specific_constants { ++ nop_instruction_code = 0, ++ nop_instruction_size = 4, ++ sync_instruction_code = 0xf, ++ sync_instruction_size = 4 ++ }; ++ ++ bool is_nop() { guarantee(0, "LA not implemented yet"); return long_at(0) == nop_instruction_code; } ++ bool is_sync() { return Assembler::high(insn_word(), 17) == Assembler::dbar_op; } ++ inline bool is_call(); ++ inline bool is_far_call(); ++ inline bool is_illegal(); ++ bool is_jump(); ++ bool is_safepoint_poll(); ++ ++ // Helper func for jvmci ++ bool is_lu12iw_lu32id() const; ++ bool is_pcaddu12i_add() const; ++ ++ // LoongArch has no instruction to generate a illegal instrucion exception? ++ // But `break 11` is not illegal instruction for LoongArch. ++ static int illegal_instruction(); ++ ++ bool is_int_branch(); ++ bool is_float_branch(); ++ ++ inline bool is_NativeCallTrampolineStub_at(); ++ //We use an illegal instruction for marking a method as not_entrant or zombie. ++ bool is_sigill_zombie_not_entrant(); ++ bool is_stop(); ++ ++ protected: ++ address addr_at(int offset) const { return address(this) + offset; } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(BytesPerInstWord); } ++ address prev_instruction_address() const { return addr_at(-BytesPerInstWord); } ++ ++ s_char sbyte_at(int offset) const { return *(s_char*) addr_at(offset); } ++ u_char ubyte_at(int offset) const { return *(u_char*) addr_at(offset); } ++ ++ jint int_at(int offset) const { return *(jint*) addr_at(offset); } ++ juint uint_at(int offset) const { return *(juint*) addr_at(offset); } ++ ++ intptr_t ptr_at(int offset) const { return *(intptr_t*) addr_at(offset); } ++ ++ oop oop_at (int offset) const { return *(oop*) addr_at(offset); } ++ int long_at(int offset) const { return *(jint*)addr_at(offset); } ++ ++ ++ void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; wrote(offset); } ++ void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; wrote(offset); } ++ void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; wrote(offset); } ++ void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; wrote(offset); } ++ void set_long_at(int offset, long i); ++ ++ int insn_word() const { return long_at(0); } ++ ++ void wrote(int offset); ++ ++ public: ++ ++ // unit test stuff ++ static void test() {} // override for testing ++ ++ inline friend NativeInstruction* nativeInstruction_at(address address); ++}; ++ ++inline NativeInstruction* nativeInstruction_at(address address) { ++ NativeInstruction* inst = (NativeInstruction*)address; ++#ifdef ASSERT ++ //inst->verify(); ++#endif ++ return inst; ++} ++ ++class NativeCall; ++inline NativeCall* nativeCall_at(address address); ++ ++// The NativeCall is an abstraction for accessing/manipulating native call ++// instructions (used to manipulate inline caches, primitive & dll calls, etc.). ++class NativeCall: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 1 * BytesPerInstWord, ++ return_address_offset = 1 * BytesPerInstWord, ++ displacement_offset = 0 ++ }; ++ ++ // We have only bl. ++ bool is_bl() const; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ ++ address next_instruction_address() const { ++ return addr_at(return_address_offset); ++ } ++ ++ address return_address() const { ++ return next_instruction_address(); ++ } ++ ++ address target_addr_for_bl(address orig_addr = 0) const; ++ address destination() const; ++ void set_destination(address dest); ++ ++ void verify_alignment() {} ++ void verify(); ++ void print(); ++ ++ // Creation ++ inline friend NativeCall* nativeCall_at(address address); ++ inline friend NativeCall* nativeCall_before(address return_address); ++ ++ static bool is_call_at(address instr) { ++ return nativeInstruction_at(instr)->is_call(); ++ } ++ ++ static bool is_call_before(address return_address) { ++ return is_call_at(return_address - return_address_offset); ++ } ++ ++ // MT-safe patching of a call instruction. ++ static void insert(address code_pos, address entry); ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++ ++ // Similar to replace_mt_safe, but just changes the destination. The ++ // important thing is that free-running threads are able to execute ++ // this call instruction at all times. If the call is an immediate bl ++ // instruction we can simply rely on atomicity of 32-bit writes to ++ // make sure other threads will see no intermediate states. ++ ++ // We cannot rely on locks here, since the free-running threads must run at ++ // full speed. ++ // ++ // Used in the runtime linkage of calls; see class CompiledIC. ++ ++ // The parameter assert_lock disables the assertion during code generation. ++ void set_destination_mt_safe(address dest, bool assert_lock = true); ++ ++ address get_trampoline(); ++ address trampoline_jump(CodeBuffer &cbuf, address dest); ++}; ++ ++inline NativeCall* nativeCall_at(address address) { ++ NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset); ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++inline NativeCall* nativeCall_before(address return_address) { ++ NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset); ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++// The NativeFarCall is an abstraction for accessing/manipulating native ++// call-anywhere instructions. ++// Used to call native methods which may be loaded anywhere in the address ++// space, possibly out of reach of a call instruction. ++class NativeFarCall: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 2 * BytesPerInstWord ++ }; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ ++ // We use MacroAssembler::patchable_call() for implementing a ++ // call-anywhere instruction. ++ bool is_short() const; ++ bool is_far() const; ++ ++ // Checks whether instr points at a NativeFarCall instruction. ++ static bool is_far_call_at(address address) { ++ return nativeInstruction_at(address)->is_far_call(); ++ } ++ ++ // Returns the NativeFarCall's destination. ++ address destination(address orig_addr = 0) const; ++ ++ // Sets the NativeFarCall's destination, not necessarily mt-safe. ++ // Used when relocating code. ++ void set_destination(address dest); ++ ++ void verify(); ++}; ++ ++// Instantiates a NativeFarCall object starting at the given instruction ++// address and returns the NativeFarCall object. ++inline NativeFarCall* nativeFarCall_at(address address) { ++ NativeFarCall* call = (NativeFarCall*)address; ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++// An interface for accessing/manipulating native set_oop imm, reg instructions ++// (used to manipulate inlined data references, etc.). ++class NativeMovConstReg: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 3 * BytesPerInstWord, ++ next_instruction_offset = 3 * BytesPerInstWord, ++ }; ++ ++ int insn_word() const { return long_at(instruction_offset); } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(next_instruction_offset); } ++ intptr_t data() const; ++ void set_data(intptr_t x, intptr_t o = 0); ++ ++ bool is_li52() const { ++ return is_lu12iw_ori_lu32id() || ++ is_lu12iw_lu32id_nop() || ++ is_lu12iw_2nop() || ++ is_lu12iw_ori_nop() || ++ is_ori_2nop() || ++ is_addid_2nop(); ++ } ++ bool is_lu12iw_ori_lu32id() const; ++ bool is_lu12iw_lu32id_nop() const; ++ bool is_lu12iw_2nop() const; ++ bool is_lu12iw_ori_nop() const; ++ bool is_ori_2nop() const; ++ bool is_addid_2nop() const; ++ void verify(); ++ void print(); ++ ++ // unit test stuff ++ static void test() {} ++ ++ // Creation ++ inline friend NativeMovConstReg* nativeMovConstReg_at(address address); ++ inline friend NativeMovConstReg* nativeMovConstReg_before(address address); ++}; ++ ++inline NativeMovConstReg* nativeMovConstReg_at(address address) { ++ NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++inline NativeMovConstReg* nativeMovConstReg_before(address address) { ++ NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++class NativeMovConstRegPatching: public NativeMovConstReg { ++ private: ++ friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) { ++ NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset); ++ #ifdef ASSERT ++ test->verify(); ++ #endif ++ return test; ++ } ++}; ++ ++class NativeMovRegMem: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 4, ++ hiword_offset = 4, ++ ldst_offset = 12, ++ immediate_size = 4, ++ ldst_size = 16 ++ }; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ ++ int num_bytes_to_end_of_patch() const { return instruction_offset + instruction_size; } ++ ++ int offset() const; ++ ++ void set_offset(int x); ++ ++ void add_offset_in_bytes(int add_offset) { set_offset ( ( offset() + add_offset ) ); } ++ ++ void verify(); ++ void print (); ++ ++ // unit test stuff ++ static void test() {} ++ ++ private: ++ inline friend NativeMovRegMem* nativeMovRegMem_at (address address); ++}; ++ ++inline NativeMovRegMem* nativeMovRegMem_at (address address) { ++ NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++class NativeMovRegMemPatching: public NativeMovRegMem { ++ private: ++ friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) { ++ NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset); ++ #ifdef ASSERT ++ test->verify(); ++ #endif ++ return test; ++ } ++}; ++ ++ ++// Handles all kinds of jump on Loongson. ++// short: ++// b offs26 ++// nop ++// ++// far: ++// pcaddu18i reg, si20 ++// jirl r0, reg, si18 ++// ++class NativeJump: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 2 * BytesPerInstWord ++ }; ++ ++ bool is_short(); ++ bool is_far(); ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address jump_destination(address orig_addr = 0); ++ void set_jump_destination(address dest); ++ ++ // Creation ++ inline friend NativeJump* nativeJump_at(address address); ++ ++ // Insertion of native jump instruction ++ static void insert(address code_pos, address entry) { Unimplemented(); } ++ // MT-safe insertion of native jump at verified method entry ++ static void check_verified_entry_alignment(address entry, address verified_entry){} ++ static void patch_verified_entry(address entry, address verified_entry, address dest); ++ ++ void verify(); ++}; ++ ++inline NativeJump* nativeJump_at(address address) { ++ NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset); ++ debug_only(jump->verify();) ++ return jump; ++} ++ ++class NativeGeneralJump: public NativeJump { ++ public: ++ // Creation ++ inline friend NativeGeneralJump* nativeGeneralJump_at(address address); ++ ++ // Insertion of native general jump instruction ++ static void insert_unconditional(address code_pos, address entry); ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++}; ++ ++inline NativeGeneralJump* nativeGeneralJump_at(address address) { ++ NativeGeneralJump* jump = (NativeGeneralJump*)(address); ++ debug_only(jump->verify();) ++ return jump; ++} ++ ++class NativeIllegalInstruction: public NativeInstruction { ++public: ++ enum loongarch_specific_constants { ++ instruction_code = 0xbadc0de0, // TODO: LA ++ // Temporary LoongArch reserved instruction ++ instruction_size = 4, ++ instruction_offset = 0, ++ next_instruction_offset = 4 ++ }; ++ ++ // Insert illegal opcode as specific address ++ static void insert(address code_pos); ++}; ++ ++inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); } ++ ++inline bool NativeInstruction::is_call() { ++ NativeCall *call = (NativeCall*)instruction_address(); ++ return call->is_bl(); ++} ++ ++inline bool NativeInstruction::is_far_call() { ++ NativeFarCall *call = (NativeFarCall*)instruction_address(); ++ ++ // short ++ if (call->is_short()) { ++ return true; ++ } ++ ++ // far ++ if (call->is_far()) { ++ return true; ++ } ++ ++ return false; ++} ++ ++inline bool NativeInstruction::is_jump() ++{ ++ NativeGeneralJump *jump = (NativeGeneralJump*)instruction_address(); ++ ++ // short ++ if (jump->is_short()) { ++ return true; ++ } ++ ++ // far ++ if (jump->is_far()) { ++ return true; ++ } ++ ++ return false; ++} ++ ++// Call trampoline stubs. ++class NativeCallTrampolineStub : public NativeInstruction { ++ public: ++ ++ enum la_specific_constants { ++ instruction_size = 6 * 4, ++ instruction_offset = 0, ++ data_offset = 4 * 4, ++ next_instruction_offset = 6 * 4 ++ }; ++ ++ address destination() const { ++ return (address)ptr_at(data_offset); ++ } ++ ++ void set_destination(address new_destination) { ++ set_ptr_at(data_offset, (intptr_t)new_destination); ++ OrderAccess::fence(); ++ } ++}; ++ ++// Note: Other stubs must not begin with this pattern. ++inline bool NativeInstruction::is_NativeCallTrampolineStub_at() { ++ // pcaddi ++ // ld_d ++ // jirl ++ return Assembler::high(int_at(0), 7) == Assembler::pcaddi_op && ++ Assembler::high(int_at(4), 10) == Assembler::ld_d_op && ++ Assembler::high(int_at(8), 6) == Assembler::jirl_op && ++ Assembler::low(int_at(8), 5) == R0->encoding(); ++} ++ ++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { ++ NativeInstruction* ni = nativeInstruction_at(addr); ++ assert(ni->is_NativeCallTrampolineStub_at(), "no call trampoline found"); ++ return (NativeCallTrampolineStub*)addr; ++} ++ ++class NativeMembar : public NativeInstruction { ++public: ++ unsigned int get_hint() { return Assembler::low(insn_word(), 4); } ++ void set_hint(int hint) { Assembler::patch(addr_at(0), 4, hint); } ++}; ++ ++#endif // CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp +new file mode 100644 +index 00000000000..07aa5b22817 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp +@@ -0,0 +1,61 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP ++#define CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP ++ ++// machine-dependent implemention for register maps ++ friend class frame; ++ ++ private: ++#ifndef CORE ++ // This is the hook for finding a register in an "well-known" location, ++ // such as a register block of a predetermined format. ++ // Since there is none, we just return NULL. ++ // See registerMap_sparc.hpp for an example of grabbing registers ++ // from register save areas of a standard layout. ++ address pd_location(VMReg reg) const {return NULL;} ++ address pd_location(VMReg base_reg, int slot_idx) const { ++ if (base_reg->is_FloatRegister()) { ++ assert(base_reg->is_concrete(), "must pass base reg"); ++ intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size; ++ address base_location = location(base_reg); ++ if (base_location != NULL) { ++ return base_location + offset_in_bytes; ++ } else { ++ return NULL; ++ } ++ } else { ++ return location(base_reg->next(slot_idx)); ++ } ++ } ++#endif ++ ++ // no PD state to clear or copy: ++ void pd_clear() {} ++ void pd_initialize() {} ++ void pd_initialize_from(const RegisterMap* map) {} ++ ++#endif // CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp b/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp +new file mode 100644 +index 00000000000..58f40b747c2 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp +@@ -0,0 +1,103 @@ ++/* ++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/register.hpp" ++#include "register_loongarch.hpp" ++#ifdef TARGET_ARCH_MODEL_loongarch_32 ++# include "interp_masm_loongarch_32.hpp" ++#endif ++#ifdef TARGET_ARCH_MODEL_loongarch_64 ++# include "interp_masm_loongarch_64.hpp" ++#endif ++ ++REGISTER_DEFINITION(Register, noreg); ++REGISTER_DEFINITION(Register, r0); ++REGISTER_DEFINITION(Register, r1); ++REGISTER_DEFINITION(Register, r2); ++REGISTER_DEFINITION(Register, r3); ++REGISTER_DEFINITION(Register, r4); ++REGISTER_DEFINITION(Register, r5); ++REGISTER_DEFINITION(Register, r6); ++REGISTER_DEFINITION(Register, r7); ++REGISTER_DEFINITION(Register, r8); ++REGISTER_DEFINITION(Register, r9); ++REGISTER_DEFINITION(Register, r10); ++REGISTER_DEFINITION(Register, r11); ++REGISTER_DEFINITION(Register, r12); ++REGISTER_DEFINITION(Register, r13); ++REGISTER_DEFINITION(Register, r14); ++REGISTER_DEFINITION(Register, r15); ++REGISTER_DEFINITION(Register, r16); ++REGISTER_DEFINITION(Register, r17); ++REGISTER_DEFINITION(Register, r18); ++REGISTER_DEFINITION(Register, r19); ++REGISTER_DEFINITION(Register, r20); ++REGISTER_DEFINITION(Register, r21); ++REGISTER_DEFINITION(Register, r22); ++REGISTER_DEFINITION(Register, r23); ++REGISTER_DEFINITION(Register, r24); ++REGISTER_DEFINITION(Register, r25); ++REGISTER_DEFINITION(Register, r26); ++REGISTER_DEFINITION(Register, r27); ++REGISTER_DEFINITION(Register, r28); ++REGISTER_DEFINITION(Register, r29); ++REGISTER_DEFINITION(Register, r30); ++REGISTER_DEFINITION(Register, r31); ++ ++REGISTER_DEFINITION(FloatRegister, fnoreg); ++REGISTER_DEFINITION(FloatRegister, f0); ++REGISTER_DEFINITION(FloatRegister, f1); ++REGISTER_DEFINITION(FloatRegister, f2); ++REGISTER_DEFINITION(FloatRegister, f3); ++REGISTER_DEFINITION(FloatRegister, f4); ++REGISTER_DEFINITION(FloatRegister, f5); ++REGISTER_DEFINITION(FloatRegister, f6); ++REGISTER_DEFINITION(FloatRegister, f7); ++REGISTER_DEFINITION(FloatRegister, f8); ++REGISTER_DEFINITION(FloatRegister, f9); ++REGISTER_DEFINITION(FloatRegister, f10); ++REGISTER_DEFINITION(FloatRegister, f11); ++REGISTER_DEFINITION(FloatRegister, f12); ++REGISTER_DEFINITION(FloatRegister, f13); ++REGISTER_DEFINITION(FloatRegister, f14); ++REGISTER_DEFINITION(FloatRegister, f15); ++REGISTER_DEFINITION(FloatRegister, f16); ++REGISTER_DEFINITION(FloatRegister, f17); ++REGISTER_DEFINITION(FloatRegister, f18); ++REGISTER_DEFINITION(FloatRegister, f19); ++REGISTER_DEFINITION(FloatRegister, f20); ++REGISTER_DEFINITION(FloatRegister, f21); ++REGISTER_DEFINITION(FloatRegister, f22); ++REGISTER_DEFINITION(FloatRegister, f23); ++REGISTER_DEFINITION(FloatRegister, f24); ++REGISTER_DEFINITION(FloatRegister, f25); ++REGISTER_DEFINITION(FloatRegister, f26); ++REGISTER_DEFINITION(FloatRegister, f27); ++REGISTER_DEFINITION(FloatRegister, f28); ++REGISTER_DEFINITION(FloatRegister, f29); ++REGISTER_DEFINITION(FloatRegister, f30); ++REGISTER_DEFINITION(FloatRegister, f31); +diff --git a/src/hotspot/cpu/loongarch/register_loongarch.cpp b/src/hotspot/cpu/loongarch/register_loongarch.cpp +new file mode 100644 +index 00000000000..54d90167a52 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/register_loongarch.cpp +@@ -0,0 +1,59 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "register_loongarch.hpp" ++ ++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * RegisterImpl::max_slots_per_register; ++const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + ++ FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; ++ ++ ++const char* RegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "zero", "ra", "tp", "sp", "a0/v0", "a1/v1", "a2", "a3", ++ "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", ++ "t4", "t5", "t6", "t7", "t8", "x", "fp", "s0", ++ "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8" ++ }; ++ return is_valid() ? names[encoding()] : "noreg"; ++} ++ ++const char* FloatRegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", ++ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", ++ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", ++ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", ++ }; ++ return is_valid() ? names[encoding()] : "fnoreg"; ++} ++ ++const char* ConditionalFlagRegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7", ++ }; ++ return is_valid() ? names[encoding()] : "fccnoreg"; ++} +diff --git a/src/hotspot/cpu/loongarch/register_loongarch.hpp b/src/hotspot/cpu/loongarch/register_loongarch.hpp +new file mode 100644 +index 00000000000..85669f435c7 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/register_loongarch.hpp +@@ -0,0 +1,499 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_REGISTER_LOONGARCH_HPP ++#define CPU_LOONGARCH_REGISTER_LOONGARCH_HPP ++ ++#include "asm/register.hpp" ++#include "logging/log.hpp" ++#include "utilities/bitMap.hpp" ++#include "utilities/formatBuffer.hpp" ++#include "utilities/ticks.hpp" ++ ++class VMRegImpl; ++typedef VMRegImpl* VMReg; ++ ++// Use Register as shortcut ++class RegisterImpl; ++typedef RegisterImpl* Register; ++ ++inline Register as_Register(int encoding) { ++ return (Register)(intptr_t) encoding; ++} ++ ++class RegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 32, ++ max_slots_per_register = 2 ++ }; ++ ++ // derived registers, offsets, and addresses ++ Register successor() const { return as_Register(encoding() + 1); } ++ ++ // construction ++ inline friend Register as_Register(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register (%d)", (int)(intptr_t)this ); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++}; ++ ++// The integer registers of the LoongArch architecture ++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); ++ ++ ++CONSTANT_REGISTER_DECLARATION(Register, r0, (0)); ++CONSTANT_REGISTER_DECLARATION(Register, r1, (1)); ++CONSTANT_REGISTER_DECLARATION(Register, r2, (2)); ++CONSTANT_REGISTER_DECLARATION(Register, r3, (3)); ++CONSTANT_REGISTER_DECLARATION(Register, r4, (4)); ++CONSTANT_REGISTER_DECLARATION(Register, r5, (5)); ++CONSTANT_REGISTER_DECLARATION(Register, r6, (6)); ++CONSTANT_REGISTER_DECLARATION(Register, r7, (7)); ++CONSTANT_REGISTER_DECLARATION(Register, r8, (8)); ++CONSTANT_REGISTER_DECLARATION(Register, r9, (9)); ++CONSTANT_REGISTER_DECLARATION(Register, r10, (10)); ++CONSTANT_REGISTER_DECLARATION(Register, r11, (11)); ++CONSTANT_REGISTER_DECLARATION(Register, r12, (12)); ++CONSTANT_REGISTER_DECLARATION(Register, r13, (13)); ++CONSTANT_REGISTER_DECLARATION(Register, r14, (14)); ++CONSTANT_REGISTER_DECLARATION(Register, r15, (15)); ++CONSTANT_REGISTER_DECLARATION(Register, r16, (16)); ++CONSTANT_REGISTER_DECLARATION(Register, r17, (17)); ++CONSTANT_REGISTER_DECLARATION(Register, r18, (18)); ++CONSTANT_REGISTER_DECLARATION(Register, r19, (19)); ++CONSTANT_REGISTER_DECLARATION(Register, r20, (20)); ++CONSTANT_REGISTER_DECLARATION(Register, r21, (21)); ++CONSTANT_REGISTER_DECLARATION(Register, r22, (22)); ++CONSTANT_REGISTER_DECLARATION(Register, r23, (23)); ++CONSTANT_REGISTER_DECLARATION(Register, r24, (24)); ++CONSTANT_REGISTER_DECLARATION(Register, r25, (25)); ++CONSTANT_REGISTER_DECLARATION(Register, r26, (26)); ++CONSTANT_REGISTER_DECLARATION(Register, r27, (27)); ++CONSTANT_REGISTER_DECLARATION(Register, r28, (28)); ++CONSTANT_REGISTER_DECLARATION(Register, r29, (29)); ++CONSTANT_REGISTER_DECLARATION(Register, r30, (30)); ++CONSTANT_REGISTER_DECLARATION(Register, r31, (31)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define NOREG ((Register)(noreg_RegisterEnumValue)) ++ ++#define R0 ((Register)(r0_RegisterEnumValue)) ++#define R1 ((Register)(r1_RegisterEnumValue)) ++#define R2 ((Register)(r2_RegisterEnumValue)) ++#define R3 ((Register)(r3_RegisterEnumValue)) ++#define R4 ((Register)(r4_RegisterEnumValue)) ++#define R5 ((Register)(r5_RegisterEnumValue)) ++#define R6 ((Register)(r6_RegisterEnumValue)) ++#define R7 ((Register)(r7_RegisterEnumValue)) ++#define R8 ((Register)(r8_RegisterEnumValue)) ++#define R9 ((Register)(r9_RegisterEnumValue)) ++#define R10 ((Register)(r10_RegisterEnumValue)) ++#define R11 ((Register)(r11_RegisterEnumValue)) ++#define R12 ((Register)(r12_RegisterEnumValue)) ++#define R13 ((Register)(r13_RegisterEnumValue)) ++#define R14 ((Register)(r14_RegisterEnumValue)) ++#define R15 ((Register)(r15_RegisterEnumValue)) ++#define R16 ((Register)(r16_RegisterEnumValue)) ++#define R17 ((Register)(r17_RegisterEnumValue)) ++#define R18 ((Register)(r18_RegisterEnumValue)) ++#define R19 ((Register)(r19_RegisterEnumValue)) ++#define R20 ((Register)(r20_RegisterEnumValue)) ++#define R21 ((Register)(r21_RegisterEnumValue)) ++#define R22 ((Register)(r22_RegisterEnumValue)) ++#define R23 ((Register)(r23_RegisterEnumValue)) ++#define R24 ((Register)(r24_RegisterEnumValue)) ++#define R25 ((Register)(r25_RegisterEnumValue)) ++#define R26 ((Register)(r26_RegisterEnumValue)) ++#define R27 ((Register)(r27_RegisterEnumValue)) ++#define R28 ((Register)(r28_RegisterEnumValue)) ++#define R29 ((Register)(r29_RegisterEnumValue)) ++#define R30 ((Register)(r30_RegisterEnumValue)) ++#define R31 ((Register)(r31_RegisterEnumValue)) ++ ++ ++#define RA R1 ++#define TP R2 ++#define SP R3 ++#define RA0 R4 ++#define RA1 R5 ++#define RA2 R6 ++#define RA3 R7 ++#define RA4 R8 ++#define RA5 R9 ++#define RA6 R10 ++#define RA7 R11 ++#define RT0 R12 ++#define RT1 R13 ++#define RT2 R14 ++#define RT3 R15 ++#define RT4 R16 ++#define RT5 R17 ++#define RT6 R18 ++#define RT7 R19 ++#define RT8 R20 ++#define RX R21 ++#define FP R22 ++#define S0 R23 ++#define S1 R24 ++#define S2 R25 ++#define S3 R26 ++#define S4 R27 ++#define S5 R28 ++#define S6 R29 ++#define S7 R30 ++#define S8 R31 ++ ++#define c_rarg0 RT0 ++#define c_rarg1 RT1 ++#define Rmethod S3 ++#define Rsender S4 ++#define Rnext S1 ++ ++#define V0 RA0 ++#define V1 RA1 ++ ++#define SCR1 RT7 ++#define SCR2 RT4 ++ ++//for interpreter frame ++// bytecode pointer register ++#define BCP S0 ++// local variable pointer register ++#define LVP S7 ++// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM ++// be sure to save and restore its value in call_stub ++#define TSR S2 ++ ++#define OPT_THREAD 1 ++ ++#define TREG S6 ++ ++#define S5_heapbase S5 ++ ++#define FSR V0 ++#define SSR T6 ++#define FSF FV0 ++ ++#define RECEIVER T0 ++#define IC_Klass T1 ++ ++#define SHIFT_count T3 ++ ++// ---------- Scratch Register ---------- ++#define AT RT7 ++#define fscratch F23 ++ ++#endif // DONT_USE_REGISTER_DEFINES ++ ++// Use FloatRegister as shortcut ++class FloatRegisterImpl; ++typedef FloatRegisterImpl* FloatRegister; ++ ++inline FloatRegister as_FloatRegister(int encoding) { ++ return (FloatRegister)(intptr_t) encoding; ++} ++ ++// The implementation of floating point registers for the LoongArch architecture ++class FloatRegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 32, ++ save_slots_per_register = 2, ++ slots_per_lsx_register = 4, ++ slots_per_lasx_register = 8, ++ max_slots_per_register = 8 ++ }; ++ ++ // construction ++ inline friend FloatRegister as_FloatRegister(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // derived registers, offsets, and addresses ++ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++ ++}; ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue)) ++#define F0 ((FloatRegister)( f0_FloatRegisterEnumValue)) ++#define F1 ((FloatRegister)( f1_FloatRegisterEnumValue)) ++#define F2 ((FloatRegister)( f2_FloatRegisterEnumValue)) ++#define F3 ((FloatRegister)( f3_FloatRegisterEnumValue)) ++#define F4 ((FloatRegister)( f4_FloatRegisterEnumValue)) ++#define F5 ((FloatRegister)( f5_FloatRegisterEnumValue)) ++#define F6 ((FloatRegister)( f6_FloatRegisterEnumValue)) ++#define F7 ((FloatRegister)( f7_FloatRegisterEnumValue)) ++#define F8 ((FloatRegister)( f8_FloatRegisterEnumValue)) ++#define F9 ((FloatRegister)( f9_FloatRegisterEnumValue)) ++#define F10 ((FloatRegister)( f10_FloatRegisterEnumValue)) ++#define F11 ((FloatRegister)( f11_FloatRegisterEnumValue)) ++#define F12 ((FloatRegister)( f12_FloatRegisterEnumValue)) ++#define F13 ((FloatRegister)( f13_FloatRegisterEnumValue)) ++#define F14 ((FloatRegister)( f14_FloatRegisterEnumValue)) ++#define F15 ((FloatRegister)( f15_FloatRegisterEnumValue)) ++#define F16 ((FloatRegister)( f16_FloatRegisterEnumValue)) ++#define F17 ((FloatRegister)( f17_FloatRegisterEnumValue)) ++#define F18 ((FloatRegister)( f18_FloatRegisterEnumValue)) ++#define F19 ((FloatRegister)( f19_FloatRegisterEnumValue)) ++#define F20 ((FloatRegister)( f20_FloatRegisterEnumValue)) ++#define F21 ((FloatRegister)( f21_FloatRegisterEnumValue)) ++#define F22 ((FloatRegister)( f22_FloatRegisterEnumValue)) ++#define F23 ((FloatRegister)( f23_FloatRegisterEnumValue)) ++#define F24 ((FloatRegister)( f24_FloatRegisterEnumValue)) ++#define F25 ((FloatRegister)( f25_FloatRegisterEnumValue)) ++#define F26 ((FloatRegister)( f26_FloatRegisterEnumValue)) ++#define F27 ((FloatRegister)( f27_FloatRegisterEnumValue)) ++#define F28 ((FloatRegister)( f28_FloatRegisterEnumValue)) ++#define F29 ((FloatRegister)( f29_FloatRegisterEnumValue)) ++#define F30 ((FloatRegister)( f30_FloatRegisterEnumValue)) ++#define F31 ((FloatRegister)( f31_FloatRegisterEnumValue)) ++ ++#define FA0 F0 ++#define FA1 F1 ++#define FA2 F2 ++#define FA3 F3 ++#define FA4 F4 ++#define FA5 F5 ++#define FA6 F6 ++#define FA7 F7 ++ ++#define FV0 F0 ++#define FV1 F1 ++ ++#define FT0 F8 ++#define FT1 F9 ++#define FT2 F10 ++#define FT3 F11 ++#define FT4 F12 ++#define FT5 F13 ++#define FT6 F14 ++#define FT7 F15 ++#define FT8 F16 ++#define FT9 F17 ++#define FT10 F18 ++#define FT11 F19 ++#define FT12 F20 ++#define FT13 F21 ++#define FT14 F22 ++#define FT15 F23 ++ ++#define FS0 F24 ++#define FS1 F25 ++#define FS2 F26 ++#define FS3 F27 ++#define FS4 F28 ++#define FS5 F29 ++#define FS6 F30 ++#define FS7 F31 ++ ++#endif // DONT_USE_REGISTER_DEFINES ++ ++// Use ConditionalFlagRegister as shortcut ++class ConditionalFlagRegisterImpl; ++typedef ConditionalFlagRegisterImpl* ConditionalFlagRegister; ++ ++inline ConditionalFlagRegister as_ConditionalFlagRegister(int encoding) { ++ return (ConditionalFlagRegister)(intptr_t) encoding; ++} ++ ++// The implementation of floating point registers for the LoongArch architecture ++class ConditionalFlagRegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++// conditionalflag_arg_base = 12, ++ number_of_registers = 8 ++ }; ++ ++ // construction ++ inline friend ConditionalFlagRegister as_ConditionalFlagRegister(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // derived registers, offsets, and addresses ++ ConditionalFlagRegister successor() const { return as_ConditionalFlagRegister(encoding() + 1); } ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++ ++}; ++ ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fccnoreg , (-1)); ++ ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc7 , ( 7)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define FCCNOREG ((ConditionalFlagRegister)(fccnoreg_ConditionalFlagRegisterEnumValue)) ++#define FCC0 ((ConditionalFlagRegister)( fcc0_ConditionalFlagRegisterEnumValue)) ++#define FCC1 ((ConditionalFlagRegister)( fcc1_ConditionalFlagRegisterEnumValue)) ++#define FCC2 ((ConditionalFlagRegister)( fcc2_ConditionalFlagRegisterEnumValue)) ++#define FCC3 ((ConditionalFlagRegister)( fcc3_ConditionalFlagRegisterEnumValue)) ++#define FCC4 ((ConditionalFlagRegister)( fcc4_ConditionalFlagRegisterEnumValue)) ++#define FCC5 ((ConditionalFlagRegister)( fcc5_ConditionalFlagRegisterEnumValue)) ++#define FCC6 ((ConditionalFlagRegister)( fcc6_ConditionalFlagRegisterEnumValue)) ++#define FCC7 ((ConditionalFlagRegister)( fcc7_ConditionalFlagRegisterEnumValue)) ++ ++#endif // DONT_USE_REGISTER_DEFINES ++ ++// Need to know the total number of registers of all sorts for SharedInfo. ++// Define a class that exports it. ++class ConcreteRegisterImpl : public AbstractRegisterImpl { ++ public: ++ enum { ++ // A big enough number for C2: all the registers plus flags ++ // This number must be large enough to cover REG_COUNT (defined by c2) registers. ++ // There is no requirement that any ordering here matches any ordering c2 gives ++ // it's optoregs. ++ number_of_registers = RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + ++ FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers ++ }; ++ ++ static const int max_gpr; ++ static const int max_fpr; ++}; ++ ++// A set of registers ++template ++class AbstractRegSet { ++ uint32_t _bitset; ++ ++ AbstractRegSet(uint32_t bitset) : _bitset(bitset) { } ++ ++public: ++ ++ AbstractRegSet() : _bitset(0) { } ++ ++ AbstractRegSet(RegImpl r1) : _bitset(1 << r1->encoding()) { } ++ ++ AbstractRegSet operator+(const AbstractRegSet aSet) const { ++ AbstractRegSet result(_bitset | aSet._bitset); ++ return result; ++ } ++ ++ AbstractRegSet operator-(const AbstractRegSet aSet) const { ++ AbstractRegSet result(_bitset & ~aSet._bitset); ++ return result; ++ } ++ ++ AbstractRegSet &operator+=(const AbstractRegSet aSet) { ++ *this = *this + aSet; ++ return *this; ++ } ++ ++ AbstractRegSet &operator-=(const AbstractRegSet aSet) { ++ *this = *this - aSet; ++ return *this; ++ } ++ ++ static AbstractRegSet of(RegImpl r1) { ++ return AbstractRegSet(r1); ++ } ++ ++ static AbstractRegSet of(RegImpl r1, RegImpl r2) { ++ return of(r1) + r2; ++ } ++ ++ static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3) { ++ return of(r1, r2) + r3; ++ } ++ ++ static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3, RegImpl r4) { ++ return of(r1, r2, r3) + r4; ++ } ++ ++ static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3, RegImpl r4, RegImpl r5) { ++ return of(r1, r2, r3, r4) + r5; ++ } ++ ++ static AbstractRegSet range(RegImpl start, RegImpl end) { ++ uint32_t bits = ~0; ++ bits <<= start->encoding(); ++ bits <<= 31 - end->encoding(); ++ bits >>= 31 - end->encoding(); ++ ++ return AbstractRegSet(bits); ++ } ++ ++ uint32_t bits() const { return _bitset; } ++}; ++ ++typedef AbstractRegSet RegSet; ++typedef AbstractRegSet FloatRegSet; ++ ++#endif //CPU_LOONGARCH_REGISTER_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp b/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp +new file mode 100644 +index 00000000000..079d581c91f +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp +@@ -0,0 +1,132 @@ ++/* ++ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/relocInfo.hpp" ++#include "compiler/disassembler.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/compressedOops.inline.hpp" ++#include "oops/oop.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/safepoint.hpp" ++ ++ ++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { ++ x += o; ++ typedef Assembler::WhichOperand WhichOperand; ++ WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop ++ assert(which == Assembler::disp32_operand || ++ which == Assembler::narrow_oop_operand || ++ which == Assembler::imm_operand, "format unpacks ok"); ++ if (type() == relocInfo::internal_word_type || ++ type() == relocInfo::section_word_type) { ++ MacroAssembler::pd_patch_instruction(addr(), x); ++ } else if (which == Assembler::imm_operand) { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(x)); ++ } ++ } else if (which == Assembler::narrow_oop_operand) { ++ // both compressed oops and compressed classes look the same ++ if (CompressedOops::is_in((void*)x)) { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)CompressedOops::encode(cast_to_oop(x)), "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedOops::encode(cast_to_oop(x))), (intptr_t)(x)); ++ } ++ } else { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)CompressedKlassPointers::encode((Klass*)x), "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedKlassPointers::encode((Klass*)x)), (intptr_t)(x)); ++ } ++ } ++ } else { ++ // Note: Use runtime_call_type relocations for call32_operand. ++ assert(0, "call32_operand not supported in LoongArch64"); ++ } ++} ++ ++ ++address Relocation::pd_call_destination(address orig_addr) { ++ NativeInstruction* ni = nativeInstruction_at(addr()); ++ if (ni->is_far_call()) { ++ return nativeFarCall_at(addr())->destination(orig_addr); ++ } else if (ni->is_call()) { ++ address trampoline = nativeCall_at(addr())->get_trampoline(); ++ if (trampoline) { ++ return nativeCallTrampolineStub_at(trampoline)->destination(); ++ } else { ++ address new_addr = nativeCall_at(addr())->target_addr_for_bl(orig_addr); ++ // If call is branch to self, don't try to relocate it, just leave it ++ // as branch to self. This happens during code generation if the code ++ // buffer expands. It will be relocated to the trampoline above once ++ // code generation is complete. ++ return (new_addr == orig_addr) ? addr() : new_addr; ++ } ++ } else if (ni->is_jump()) { ++ return nativeGeneralJump_at(addr())->jump_destination(orig_addr); ++ } else { ++ tty->print_cr("\nError!\ncall destination: " INTPTR_FORMAT, p2i(addr())); ++ Disassembler::decode(addr() - 10 * BytesPerInstWord, addr() + 10 * BytesPerInstWord, tty); ++ ShouldNotReachHere(); ++ return NULL; ++ } ++} ++ ++void Relocation::pd_set_call_destination(address x) { ++ NativeInstruction* ni = nativeInstruction_at(addr()); ++ if (ni->is_far_call()) { ++ nativeFarCall_at(addr())->set_destination(x); ++ } else if (ni->is_call()) { ++ address trampoline = nativeCall_at(addr())->get_trampoline(); ++ if (trampoline) { ++ nativeCall_at(addr())->set_destination_mt_safe(x, false); ++ } else { ++ nativeCall_at(addr())->set_destination(x); ++ } ++ } else if (ni->is_jump()) { ++ nativeGeneralJump_at(addr())->set_jump_destination(x); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++address* Relocation::pd_address_in_code() { ++ return (address*)addr(); ++} ++ ++address Relocation::pd_get_address_from_code() { ++ NativeMovConstReg* ni = nativeMovConstReg_at(addr()); ++ return (address)ni->data(); ++} ++ ++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++} ++ ++void metadata_Relocation::pd_fix_value(address x) { ++} +diff --git a/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp b/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp +new file mode 100644 +index 00000000000..c85ca4963f3 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp +@@ -0,0 +1,44 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP ++#define CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP ++ ++ // machine-dependent parts of class relocInfo ++ private: ++ enum { ++ // Since LoongArch instructions are whole words, ++ // the two low-order offset bits can always be discarded. ++ offset_unit = 4, ++ ++ // imm_oop_operand vs. narrow_oop_operand ++ format_width = 2 ++ }; ++ ++ public: ++ ++ static bool mustIterateImmediateOopsInCode() { return false; } ++ ++#endif // CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp +new file mode 100644 +index 00000000000..fae11f47e62 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp +@@ -0,0 +1,199 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#ifdef COMPILER2 ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/vmreg.hpp" ++#include "compiler/oopMap.hpp" ++#include "interpreter/interpreter.hpp" ++#include "opto/runtime.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/globalDefinitions.hpp" ++#include "vmreg_loongarch.inline.hpp" ++#endif ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++//-------------- generate_exception_blob ----------- ++// creates _exception_blob. ++// The exception blob is jumped to from a compiled method. ++// (see emit_exception_handler in sparc.ad file) ++// ++// Given an exception pc at a call we call into the runtime for the ++// handler in this method. This handler might merely restore state ++// (i.e. callee save registers) unwind the frame and jump to the ++// exception handler for the nmethod if there is no Java level handler ++// for the nmethod. ++// ++// This code is entered with a jump, and left with a jump. ++// ++// Arguments: ++// V0: exception oop ++// V1: exception pc ++// ++// Results: ++// A0: exception oop ++// A1: exception pc in caller or ??? ++// jumps to: exception handler of caller ++// ++// Note: the exception pc MUST be at a call (precise debug information) ++// ++// [stubGenerator_loongarch_64.cpp] generate_forward_exception() ++// |- V0, V1 are created ++// |- T4 <= SharedRuntime::exception_handler_for_return_address ++// `- jr T4 ++// `- the caller's exception_handler ++// `- jr OptoRuntime::exception_blob ++// `- here ++// ++void OptoRuntime::generate_exception_blob() { ++ // Capture info about frame layout ++ enum layout { ++ fp_off, ++ return_off, // slot for return address ++ framesize ++ }; ++ ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ CodeBuffer buffer("exception_blob", 5120, 5120); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ address start = __ pc(); ++ ++ __ addi_d(SP, SP, -1 * framesize * wordSize); // Prolog! ++ ++ // this frame will be treated as the original caller method. ++ // So, the return pc should be filled with the original exception pc. ++ // ref: X86's implementation ++ __ st_d(V1, SP, return_off * wordSize); // return address ++ __ st_d(FP, SP, fp_off * wordSize); ++ ++ // Save callee saved registers. None for UseSSE=0, ++ // floats-only for UseSSE=1, and doubles for UseSSE=2. ++ ++ __ addi_d(FP, SP, framesize * wordSize); ++ ++ // Store exception in Thread object. We cannot pass any arguments to the ++ // handle_exception call, since we do not want to make any assumption ++ // about the size of the frame where the exception happened in. ++ Register thread = TREG; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ __ st_d(V0, Address(thread, JavaThread::exception_oop_offset())); ++ __ st_d(V1, Address(thread, JavaThread::exception_pc_offset())); ++ ++ // This call does all the hard work. It checks if an exception handler ++ // exists in the method. ++ // If so, it returns the handler address. ++ // If not, it prepares for stack-unwinding, restoring the callee-save ++ // registers of the frame being removed. ++ Label L; ++ address the_pc = __ pc(); ++ __ bind(L); ++ __ set_last_Java_frame(thread, NOREG, NOREG, L); ++ ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ bstrins_d(SP, R0, 3, 0); // Fix stack alignment as required by ABI ++ ++ __ move(A0, thread); ++ // TODO: confirm reloc ++ __ call((address)OptoRuntime::handle_exception_C, relocInfo::runtime_call_type); ++ ++ // Set an oopmap for the call site ++ OopMapSet *oop_maps = new OopMapSet(); ++ ++ oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0)); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(thread, true); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog! ++ ++ // V0: exception handler ++ ++ // We have a handler in V0, (could be deopt blob) ++ __ move(T4, V0); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // Get the exception ++ __ ld_d(A0, Address(thread, JavaThread::exception_oop_offset())); ++ // Get the exception pc in case we are deoptimized ++ __ ld_d(A1, Address(thread, JavaThread::exception_pc_offset())); ++#ifdef ASSERT ++ __ st_d(R0, Address(thread, JavaThread::exception_handler_pc_offset())); ++ __ st_d(R0, Address(thread, JavaThread::exception_pc_offset())); ++#endif ++ // Clear the exception oop so GC no longer processes it as a root. ++ __ st_d(R0, Address(thread, JavaThread::exception_oop_offset())); ++ ++ // Fix seg fault when running: ++ // Eclipse + Plugin + Debug As ++ // This is the only condition where C2 calls SharedRuntime::generate_deopt_blob() ++ // ++ __ move(V0, A0); ++ __ move(V1, A1); ++ ++ // V0: exception oop ++ // T4: exception handler ++ // A1: exception pc ++ __ jr(T4); ++ ++ // make sure all code is generated ++ masm->flush(); ++ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize); ++} +diff --git a/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp +new file mode 100644 +index 00000000000..0b3ea4c42f3 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp +@@ -0,0 +1,3113 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/debugInfoRec.hpp" ++#include "code/icBuffer.hpp" ++#include "code/nativeInst.hpp" ++#include "code/vtableStubs.hpp" ++#include "compiler/oopMap.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/klass.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/jniHandles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/vframeArray.hpp" ++#include "vmreg_loongarch.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++#if INCLUDE_JVMCI ++#include "jvmci/jvmciJavaClasses.hpp" ++#endif ++ ++#include ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; ++ ++class RegisterSaver { ++ // Capture info about frame layout ++ enum layout { ++ fpr0_off = 0, ++ fpr1_off, ++ fpr2_off, ++ fpr3_off, ++ fpr4_off, ++ fpr5_off, ++ fpr6_off, ++ fpr7_off, ++ fpr8_off, ++ fpr9_off, ++ fpr10_off, ++ fpr11_off, ++ fpr12_off, ++ fpr13_off, ++ fpr14_off, ++ fpr15_off, ++ fpr16_off, ++ fpr17_off, ++ fpr18_off, ++ fpr19_off, ++ fpr20_off, ++ fpr21_off, ++ fpr22_off, ++ fpr23_off, ++ fpr24_off, ++ fpr25_off, ++ fpr26_off, ++ fpr27_off, ++ fpr28_off, ++ fpr29_off, ++ fpr30_off, ++ fpr31_off, ++ a0_off, ++ a1_off, ++ a2_off, ++ a3_off, ++ a4_off, ++ a5_off, ++ a6_off, ++ a7_off, ++ t0_off, ++ t1_off, ++ t2_off, ++ t3_off, ++ t4_off, ++ t5_off, ++ t6_off, ++ t7_off, ++ t8_off, ++ s0_off, ++ s1_off, ++ s2_off, ++ s3_off, ++ s4_off, ++ s5_off, ++ s6_off, ++ s7_off, ++ s8_off, ++ fp_off, ++ ra_off, ++ fpr_size = fpr31_off - fpr0_off + 1, ++ gpr_size = ra_off - a0_off + 1, ++ }; ++ ++ const bool _save_vectors; ++ public: ++ RegisterSaver(bool save_vectors) : _save_vectors(save_vectors) {} ++ ++ OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); ++ void restore_live_registers(MacroAssembler* masm); ++ ++ int slots_save() { ++ int slots = gpr_size * VMRegImpl::slots_per_word; ++ ++ if (_save_vectors && UseLASX) ++ slots += FloatRegisterImpl::slots_per_lasx_register * fpr_size; ++ else if (_save_vectors && UseLSX) ++ slots += FloatRegisterImpl::slots_per_lsx_register * fpr_size; ++ else ++ slots += FloatRegisterImpl::save_slots_per_register * fpr_size; ++ ++ return slots; ++ } ++ ++ int gpr_offset(int off) { ++ int slots_per_fpr = FloatRegisterImpl::save_slots_per_register; ++ int slots_per_gpr = VMRegImpl::slots_per_word; ++ ++ if (_save_vectors && UseLASX) ++ slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register; ++ else if (_save_vectors && UseLSX) ++ slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register; ++ ++ return (fpr_size * slots_per_fpr + (off - a0_off) * slots_per_gpr) * VMRegImpl::stack_slot_size; ++ } ++ ++ int fpr_offset(int off) { ++ int slots_per_fpr = FloatRegisterImpl::save_slots_per_register; ++ ++ if (_save_vectors && UseLASX) ++ slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register; ++ else if (_save_vectors && UseLSX) ++ slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register; ++ ++ return off * slots_per_fpr * VMRegImpl::stack_slot_size; ++ } ++ ++ int ra_offset() { return gpr_offset(ra_off); } ++ int t5_offset() { return gpr_offset(t5_off); } ++ int s3_offset() { return gpr_offset(s3_off); } ++ int v0_offset() { return gpr_offset(a0_off); } ++ int v1_offset() { return gpr_offset(a1_off); } ++ ++ int fpr0_offset() { return fpr_offset(fpr0_off); } ++ int fpr1_offset() { return fpr_offset(fpr1_off); } ++ ++ // During deoptimization only the result register need to be restored ++ // all the other values have already been extracted. ++ void restore_result_registers(MacroAssembler* masm); ++}; ++ ++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { ++ // Always make the frame size 16-byte aligned ++ int frame_size_in_bytes = align_up(additional_frame_words * wordSize + slots_save() * VMRegImpl::stack_slot_size, StackAlignmentInBytes); ++ // OopMap frame size is in compiler stack slots (jint's) not bytes or words ++ int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size; ++ // The caller will allocate additional_frame_words ++ int additional_frame_slots = additional_frame_words * wordSize / VMRegImpl::stack_slot_size; ++ // CodeBlob frame size is in words. ++ int frame_size_in_words = frame_size_in_bytes / wordSize; ++ ++ *total_frame_words = frame_size_in_words; ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap(frame_size_in_slots, 0); ++ ++ // save registers ++ __ addi_d(SP, SP, -slots_save() * VMRegImpl::stack_slot_size); ++ ++ for (int i = 0; i < fpr_size; i++) { ++ FloatRegister fpr = as_FloatRegister(i); ++ int off = fpr_offset(i); ++ ++ if (_save_vectors && UseLASX) ++ __ xvst(fpr, SP, off); ++ else if (_save_vectors && UseLSX) ++ __ vst(fpr, SP, off); ++ else ++ __ fst_d(fpr, SP, off); ++ map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), fpr->as_VMReg()); ++ } ++ ++ for (int i = a0_off; i <= a7_off; i++) { ++ Register gpr = as_Register(A0->encoding() + (i - a0_off)); ++ int off = gpr_offset(i); ++ ++ __ st_d(gpr, SP, gpr_offset(i)); ++ map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg()); ++ } ++ ++ for (int i = t0_off; i <= t6_off; i++) { ++ Register gpr = as_Register(T0->encoding() + (i - t0_off)); ++ int off = gpr_offset(i); ++ ++ __ st_d(gpr, SP, gpr_offset(i)); ++ map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg()); ++ } ++ __ st_d(T8, SP, gpr_offset(t8_off)); ++ map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(t8_off) / VMRegImpl::stack_slot_size + additional_frame_slots), T8->as_VMReg()); ++ ++ for (int i = s0_off; i <= s8_off; i++) { ++ Register gpr = as_Register(S0->encoding() + (i - s0_off)); ++ int off = gpr_offset(i); ++ ++ __ st_d(gpr, SP, gpr_offset(i)); ++ map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg()); ++ } ++ ++ __ st_d(FP, SP, gpr_offset(fp_off)); ++ map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(fp_off) / VMRegImpl::stack_slot_size + additional_frame_slots), FP->as_VMReg()); ++ __ st_d(RA, SP, gpr_offset(ra_off)); ++ map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(ra_off) / VMRegImpl::stack_slot_size + additional_frame_slots), RA->as_VMReg()); ++ ++ __ addi_d(FP, SP, slots_save() * VMRegImpl::stack_slot_size); ++ ++ return map; ++} ++ ++ ++// Pop the current frame and restore all the registers that we ++// saved. ++void RegisterSaver::restore_live_registers(MacroAssembler* masm) { ++ for (int i = 0; i < fpr_size; i++) { ++ FloatRegister fpr = as_FloatRegister(i); ++ int off = fpr_offset(i); ++ ++ if (_save_vectors && UseLASX) ++ __ xvld(fpr, SP, off); ++ else if (_save_vectors && UseLSX) ++ __ vld(fpr, SP, off); ++ else ++ __ fld_d(fpr, SP, off); ++ } ++ ++ for (int i = a0_off; i <= a7_off; i++) { ++ Register gpr = as_Register(A0->encoding() + (i - a0_off)); ++ int off = gpr_offset(i); ++ ++ __ ld_d(gpr, SP, gpr_offset(i)); ++ } ++ ++ for (int i = t0_off; i <= t6_off; i++) { ++ Register gpr = as_Register(T0->encoding() + (i - t0_off)); ++ int off = gpr_offset(i); ++ ++ __ ld_d(gpr, SP, gpr_offset(i)); ++ } ++ __ ld_d(T8, SP, gpr_offset(t8_off)); ++ ++ for (int i = s0_off; i <= s8_off; i++) { ++ Register gpr = as_Register(S0->encoding() + (i - s0_off)); ++ int off = gpr_offset(i); ++ ++ __ ld_d(gpr, SP, gpr_offset(i)); ++ } ++ ++ __ ld_d(FP, SP, gpr_offset(fp_off)); ++ __ ld_d(RA, SP, gpr_offset(ra_off)); ++ ++ __ addi_d(SP, SP, slots_save() * VMRegImpl::stack_slot_size); ++} ++ ++// Pop the current frame and restore the registers that might be holding ++// a result. ++void RegisterSaver::restore_result_registers(MacroAssembler* masm) { ++ // Just restore result register. Only used by deoptimization. By ++ // now any callee save register that needs to be restore to a c2 ++ // caller of the deoptee has been extracted into the vframeArray ++ // and will be stuffed into the c2i adapter we create for later ++ // restoration so only result registers need to be restored here. ++ ++ __ ld_d(V0, SP, gpr_offset(a0_off)); ++ __ ld_d(V1, SP, gpr_offset(a1_off)); ++ ++ __ fld_d(F0, SP, fpr_offset(fpr0_off)); ++ __ fld_d(F1, SP, fpr_offset(fpr1_off)); ++ ++ __ addi_d(SP, SP, gpr_offset(ra_off)); ++} ++ ++// Is vector's size (in bytes) bigger than a size saved by default? ++// 8 bytes registers are saved by default using fld/fst instructions. ++bool SharedRuntime::is_wide_vector(int size) { ++ return size > 8; ++} ++ ++// The java_calling_convention describes stack locations as ideal slots on ++// a frame with no abi restrictions. Since we must observe abi restrictions ++// (like the placement of the register window) the slots must be biased by ++// the following value. ++ ++static int reg2offset_in(VMReg r) { ++ // This should really be in_preserve_stack_slots ++ return r->reg2stack() * VMRegImpl::stack_slot_size; ++} ++ ++static int reg2offset_out(VMReg r) { ++ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; ++} ++ ++// --------------------------------------------------------------------------- ++// Read the array of BasicTypes from a signature, and compute where the ++// arguments should go. Values in the VMRegPair regs array refer to 4-byte ++// quantities. Values less than SharedInfo::stack0 are registers, those above ++// refer to 4-byte stack slots. All stack slots are based off of the stack pointer ++// as framesizes are fixed. ++// VMRegImpl::stack0 refers to the first slot 0(sp). ++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register ++// up to RegisterImpl::number_of_registers) are the 32-bit ++// integer registers. ++ ++// Pass first five oop/int args in registers T0, A0 - A3. ++// Pass float/double/long args in stack. ++// Doubles have precedence, so if you pass a mix of floats and doubles ++// the doubles will grab the registers before the floats will. ++ ++// Note: the INPUTS in sig_bt are in units of Java argument words, which are ++// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit ++// units regardless of build. ++ ++ ++// --------------------------------------------------------------------------- ++// The compiled Java calling convention. ++// Pass first five oop/int args in registers T0, A0 - A3. ++// Pass float/double/long args in stack. ++// Doubles have precedence, so if you pass a mix of floats and doubles ++// the doubles will grab the registers before the floats will. ++ ++int SharedRuntime::java_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ int total_args_passed) { ++ ++ // Create the mapping between argument positions and registers. ++ static const Register INT_ArgReg[Argument::n_register_parameters + 1] = { ++ T0, A0, A1, A2, A3, A4, A5, A6, A7 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { ++ FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7 ++ }; ++ ++ uint int_args = 0; ++ uint fp_args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_VOID: ++ // halves of T_LONG or T_DOUBLE ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_BOOLEAN: ++ case T_CHAR: ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ if (int_args < Argument::n_register_parameters + 1) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ // fall through ++ case T_OBJECT: ++ case T_ARRAY: ++ case T_ADDRESS: ++ if (int_args < Argument::n_register_parameters + 1) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (fp_args < Argument::n_float_register_parameters) { ++ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (fp_args < Argument::n_float_register_parameters) { ++ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++ ++ return align_up(stk_args, 2); ++} ++ ++// Patch the callers callsite with entry to compiled code if it exists. ++static void patch_callers_callsite(MacroAssembler *masm) { ++ Label L; ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); ++ __ beq(AT, R0, L); ++ ++ // Schedule the branch target address early. ++ // Call into the VM to patch the caller, then jump to compiled callee ++ // T5 isn't live so capture return address while we easily can ++ __ move(T5, RA); ++ ++ __ push_call_clobbered_registers(); ++ ++ // VM needs caller's callsite ++ // VM needs target method ++ ++ __ move(A0, Rmethod); ++ __ move(A1, T5); ++ // we should preserve the return address ++ __ move(TSR, SP); ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ bstrins_d(SP, R0, 3, 0); // align the stack ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), ++ relocInfo::runtime_call_type); ++ ++ __ move(SP, TSR); ++ __ pop_call_clobbered_registers(); ++ __ bind(L); ++} ++ ++static void gen_c2i_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ Label& skip_fixup) { ++ ++ // Before we get into the guts of the C2I adapter, see if we should be here ++ // at all. We've come from compiled code and are attempting to jump to the ++ // interpreter, which means the caller made a static call to get here ++ // (vcalls always get a compiled target if there is one). Check for a ++ // compiled target. If there is one, we need to patch the caller's call. ++ // However we will run interpreted if we come thru here. The next pass ++ // thru the call site will run compiled. If we ran compiled here then ++ // we can (theorectically) do endless i2c->c2i->i2c transitions during ++ // deopt/uncommon trap cycles. If we always go interpreted here then ++ // we can have at most one and don't need to play any tricks to keep ++ // from endlessly growing the stack. ++ // ++ // Actually if we detected that we had an i2c->c2i transition here we ++ // ought to be able to reset the world back to the state of the interpreted ++ // call and not bother building another interpreter arg area. We don't ++ // do that at this point. ++ ++ patch_callers_callsite(masm); ++ __ bind(skip_fixup); ++ ++ // Since all args are passed on the stack, total_args_passed * ++ // Interpreter::stackElementSize is the space we need. ++ int extraspace = total_args_passed * Interpreter::stackElementSize; ++ ++ // stack is aligned, keep it that way ++ extraspace = align_up(extraspace, 2*wordSize); ++ ++ // Get return address ++ __ move(T5, RA); ++ // set senderSP value ++ //refer to interpreter_loongarch.cpp:generate_asm_entry ++ __ move(Rsender, SP); ++ __ addi_d(SP, SP, -extraspace); ++ ++ // Now write the args into the outgoing interpreter space ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // st_off points to lowest address on stack. ++ int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize; ++ // Say 4 args: ++ // i st_off ++ // 0 12 T_LONG ++ // 1 8 T_VOID ++ // 2 4 T_OBJECT ++ // 3 0 T_BOOL ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // memory to memory use fpu stack top ++ int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; ++ if (!r_2->is_valid()) { ++ __ ld_ptr(AT, Address(SP, ld_off)); ++ __ st_ptr(AT, Address(SP, st_off)); ++ ++ } else { ++ ++ ++ int next_off = st_off - Interpreter::stackElementSize; ++ __ ld_ptr(AT, Address(SP, ld_off)); ++ __ st_ptr(AT, Address(SP, st_off)); ++ ++ // Ref to is_Register condition ++ if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ++ __ st_ptr(AT, SP, st_off - 8); ++ } ++ } else if (r_1->is_Register()) { ++ Register r = r_1->as_Register(); ++ if (!r_2->is_valid()) { ++ __ st_d(r, SP, st_off); ++ } else { ++ //FIXME, LA will not enter here ++ // long/double in gpr ++ __ st_d(r, SP, st_off); ++ // In [java/util/zip/ZipFile.java] ++ // ++ // private static native long open(String name, int mode, long lastModified); ++ // private static native int getTotal(long jzfile); ++ // ++ // We need to transfer T_LONG paramenters from a compiled method to a native method. ++ // It's a complex process: ++ // ++ // Caller -> lir_static_call -> gen_resolve_stub ++ // -> -- resolve_static_call_C ++ // `- gen_c2i_adapter() [*] ++ // | ++ // `- AdapterHandlerLibrary::get_create_apapter_index ++ // -> generate_native_entry ++ // -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**] ++ // ++ // In [**], T_Long parameter is stored in stack as: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // However, the sequence is reversed here: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry(). ++ // ++ if (sig_bt[i] == T_LONG) ++ __ st_d(r, SP, st_off - 8); ++ } ++ } else if (r_1->is_FloatRegister()) { ++ assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); ++ ++ FloatRegister fr = r_1->as_FloatRegister(); ++ if (sig_bt[i] == T_FLOAT) ++ __ fst_s(fr, SP, st_off); ++ else { ++ __ fst_d(fr, SP, st_off); ++ __ fst_d(fr, SP, st_off - 8); // T_DOUBLE needs two slots ++ } ++ } ++ } ++ ++ // Schedule the branch target address early. ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) ); ++ // And repush original return address ++ __ move(RA, T5); ++ __ jr (AT); ++} ++ ++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs) { ++ ++ // Generate an I2C adapter: adjust the I-frame to make space for the C-frame ++ // layout. Lesp was saved by the calling I-frame and will be restored on ++ // return. Meanwhile, outgoing arg space is all owned by the callee ++ // C-frame, so we can mangle it at will. After adjusting the frame size, ++ // hoist register arguments and repack other args according to the compiled ++ // code convention. Finally, end in a jump to the compiled code. The entry ++ // point address is the start of the buffer. ++ ++ // We will only enter here from an interpreted frame and never from after ++ // passing thru a c2i. Azul allowed this but we do not. If we lose the ++ // race and use a c2i we will remain interpreted for the race loser(s). ++ // This removes all sorts of headaches on the LA side and also eliminates ++ // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. ++ ++ __ move(T4, SP); ++ ++ // Cut-out for having no stack args. Since up to 2 int/oop args are passed ++ // in registers, we will occasionally have no stack args. ++ int comp_words_on_stack = 0; ++ if (comp_args_on_stack) { ++ // Sig words on the stack are greater-than VMRegImpl::stack0. Those in ++ // registers are below. By subtracting stack0, we either get a negative ++ // number (all values in registers) or the maximum stack slot accessed. ++ // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg); ++ // Convert 4-byte stack slots to words. ++ // did LA need round? FIXME ++ comp_words_on_stack = align_up(comp_args_on_stack*4, wordSize)>>LogBytesPerWord; ++ // Round up to miminum stack alignment, in wordSize ++ comp_words_on_stack = align_up(comp_words_on_stack, 2); ++ __ addi_d(SP, SP, -comp_words_on_stack * wordSize); ++ } ++ ++ // Align the outgoing SP ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ bstrins_d(SP, R0, 3, 0); ++ // push the return address on the stack (note that pushing, rather ++ // than storing it, yields the correct frame alignment for the callee) ++ // Put saved SP in another register ++ const Register saved_sp = T5; ++ __ move(saved_sp, T4); ++ ++ ++ // Will jump to the compiled code just as if compiled code was doing it. ++ // Pre-load the register-jump target early, to schedule it better. ++ __ ld_d(T4, Rmethod, in_bytes(Method::from_compiled_offset())); ++ ++#if INCLUDE_JVMCI ++ if (EnableJVMCI) { ++ // check if this call should be routed towards a specific entry point ++ __ ld_d(AT, Address(TREG, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); ++ Label no_alternative_target; ++ __ beqz(AT, no_alternative_target); ++ __ move(T4, AT); ++ __ st_d(R0, Address(TREG, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); ++ __ bind(no_alternative_target); ++ } ++#endif // INCLUDE_JVMCI ++ ++ // Now generate the shuffle code. Pick up all register args and move the ++ // rest through the floating point stack top. ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ // Longs and doubles are passed in native word order, but misaligned ++ // in the 32-bit build. ++ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // Pick up 0, 1 or 2 words from SP+offset. ++ ++ assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); ++ // Load in argument order going down. ++ int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize; ++ // Point to interpreter value (vs. tag) ++ int next_off = ld_off - Interpreter::stackElementSize; ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // Convert stack slot to an SP offset (+ wordSize to ++ // account for return address ) ++ // NOTICE HERE!!!! I sub a wordSize here ++ int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; ++ //+ wordSize; ++ ++ if (!r_2->is_valid()) { ++ __ ld_d(AT, saved_sp, ld_off); ++ __ st_d(AT, SP, st_off); ++ } else { ++ // Interpreter local[n] == MSW, local[n+1] == LSW however locals ++ // are accessed as negative so LSW is at LOW address ++ ++ // ld_off is MSW so get LSW ++ // st_off is LSW (i.e. reg.first()) ++ ++ // [./org/eclipse/swt/graphics/GC.java] ++ // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, ++ // int destX, int destY, int destWidth, int destHeight, ++ // boolean simple, ++ // int imgWidth, int imgHeight, ++ // long maskPixmap, <-- Pass T_LONG in stack ++ // int maskType); ++ // Before this modification, Eclipse displays icons with solid black background. ++ // ++ __ ld_d(AT, saved_sp, ld_off); ++ if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ++ __ ld_d(AT, saved_sp, ld_off - 8); ++ __ st_d(AT, SP, st_off); ++ } ++ } else if (r_1->is_Register()) { // Register argument ++ Register r = r_1->as_Register(); ++ if (r_2->is_valid()) { ++ // Remember r_1 is low address (and LSB on LA) ++ // So r_2 gets loaded from high address regardless of the platform ++ assert(r_2->as_Register() == r_1->as_Register(), ""); ++ __ ld_d(r, saved_sp, ld_off); ++ ++ // ++ // For T_LONG type, the real layout is as below: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // We should load the low-8 bytes. ++ // ++ if (sig_bt[i] == T_LONG) ++ __ ld_d(r, saved_sp, ld_off - 8); ++ } else { ++ __ ld_w(r, saved_sp, ld_off); ++ } ++ } else if (r_1->is_FloatRegister()) { // Float Register ++ assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); ++ ++ FloatRegister fr = r_1->as_FloatRegister(); ++ if (sig_bt[i] == T_FLOAT) ++ __ fld_s(fr, saved_sp, ld_off); ++ else { ++ __ fld_d(fr, saved_sp, ld_off); ++ __ fld_d(fr, saved_sp, ld_off - 8); ++ } ++ } ++ } ++ ++ // 6243940 We might end up in handle_wrong_method if ++ // the callee is deoptimized as we race thru here. If that ++ // happens we don't want to take a safepoint because the ++ // caller frame will look interpreted and arguments are now ++ // "compiled" so it is much better to make this transition ++ // invisible to the stack walking code. Unfortunately if ++ // we try and find the callee by normal means a safepoint ++ // is possible. So we stash the desired callee in the thread ++ // and the vm will find there should this case occur. ++#ifndef OPT_THREAD ++ Register thread = T8; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ __ st_d(Rmethod, thread, in_bytes(JavaThread::callee_target_offset())); ++ ++ // move Method* to T5 in case we end up in an c2i adapter. ++ // the c2i adapters expect Method* in T5 (c2) because c2's ++ // resolve stubs return the result (the method) in T5. ++ // I'd love to fix this. ++ __ move(T5, Rmethod); ++ __ jr(T4); ++} ++ ++// --------------------------------------------------------------- ++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ AdapterFingerPrint* fingerprint) { ++ address i2c_entry = __ pc(); ++ ++ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); ++ ++ // ------------------------------------------------------------------------- ++ // Generate a C2I adapter. On entry we know G5 holds the Method*. The ++ // args start out packed in the compiled layout. They need to be unpacked ++ // into the interpreter layout. This will almost always require some stack ++ // space. We grow the current (compiled) stack, then repack the args. We ++ // finally end in a jump to the generic interpreter entry point. On exit ++ // from the interpreter, the interpreter will restore our SP (lest the ++ // compiled code, which relys solely on SP and not FP, get sick). ++ ++ address c2i_unverified_entry = __ pc(); ++ Label skip_fixup; ++ { ++ Register holder = T1; ++ Register receiver = T0; ++ Register temp = T8; ++ address ic_miss = SharedRuntime::get_ic_miss_stub(); ++ ++ Label missed; ++ ++ //add for compressedoops ++ __ load_klass(temp, receiver); ++ ++ __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); ++ __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset()); ++ __ bne(AT, temp, missed); ++ // Method might have been compiled since the call site was patched to ++ // interpreted if that is the case treat it as a miss so we can get ++ // the call site corrected. ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); ++ __ beq(AT, R0, skip_fixup); ++ __ bind(missed); ++ ++ __ jmp(ic_miss, relocInfo::runtime_call_type); ++ } ++ address c2i_entry = __ pc(); ++ ++ // Class initialization barrier for static methods ++ address c2i_no_clinit_check_entry = NULL; ++ if (VM_Version::supports_fast_class_init_checks()) { ++ Label L_skip_barrier; ++ address handle_wrong_method = SharedRuntime::get_handle_wrong_method_stub(); ++ ++ { // Bypass the barrier for non-static methods ++ __ ld_w(AT, Address(Rmethod, Method::access_flags_offset())); ++ __ andi(AT, AT, JVM_ACC_STATIC); ++ __ beqz(AT, L_skip_barrier); // non-static ++ } ++ ++ __ load_method_holder(T4, Rmethod); ++ __ clinit_barrier(T4, AT, &L_skip_barrier); ++ __ jmp(handle_wrong_method, relocInfo::runtime_call_type); ++ ++ __ bind(L_skip_barrier); ++ c2i_no_clinit_check_entry = __ pc(); ++ } ++ ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->c2i_entry_barrier(masm); ++ ++ gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); ++ ++ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); ++} ++ ++int SharedRuntime::vector_calling_convention(VMRegPair *regs, ++ uint num_bits, ++ uint total_args_passed) { ++ Unimplemented(); ++ return 0; ++} ++ ++int SharedRuntime::c_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ VMRegPair *regs2, ++ int total_args_passed) { ++ assert(regs2 == NULL, "not needed on LA"); ++ // Return the number of VMReg stack_slots needed for the args. ++ // This value does not include an abi space (like register window ++ // save area). ++ ++ // We return the amount of VMReg stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. Since we always ++ // have space for storing at least 6 registers to memory we start with that. ++ // See int_stk_helper for a further discussion. ++ // We return the amount of VMRegImpl stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. ++ static const Register INT_ArgReg[Argument::n_register_parameters] = { ++ A0, A1, A2, A3, A4, A5, A6, A7 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { ++ FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7 ++ }; ++ uint int_args = 0; ++ uint fp_args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++// Example: ++// n java.lang.UNIXProcess::forkAndExec ++// private native int forkAndExec(byte[] prog, ++// byte[] argBlock, int argc, ++// byte[] envBlock, int envc, ++// byte[] dir, ++// boolean redirectErrorStream, ++// FileDescriptor stdin_fd, ++// FileDescriptor stdout_fd, ++// FileDescriptor stderr_fd) ++// JNIEXPORT jint JNICALL ++// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env, ++// jobject process, ++// jbyteArray prog, ++// jbyteArray argBlock, jint argc, ++// jbyteArray envBlock, jint envc, ++// jbyteArray dir, ++// jboolean redirectErrorStream, ++// jobject stdin_fd, ++// jobject stdout_fd, ++// jobject stderr_fd) ++// ++// ::c_calling_convention ++// 0: // env <-- a0 ++// 1: L // klass/obj <-- t0 => a1 ++// 2: [ // prog[] <-- a0 => a2 ++// 3: [ // argBlock[] <-- a1 => a3 ++// 4: I // argc <-- a2 => a4 ++// 5: [ // envBlock[] <-- a3 => a5 ++// 6: I // envc <-- a4 => a5 ++// 7: [ // dir[] <-- a5 => a7 ++// 8: Z // redirectErrorStream <-- a6 => sp[0] ++// 9: L // stdin <-- a7 => sp[8] ++// 10: L // stdout fp[16] => sp[16] ++// 11: L // stderr fp[24] => sp[24] ++// ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_VOID: // Halves of longs and doubles ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_BOOLEAN: ++ case T_CHAR: ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ if (int_args < Argument::n_register_parameters) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ // fall through ++ case T_OBJECT: ++ case T_ARRAY: ++ case T_ADDRESS: ++ case T_METADATA: ++ if (int_args < Argument::n_register_parameters) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (fp_args < Argument::n_float_register_parameters) { ++ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else if (int_args < Argument::n_register_parameters) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (fp_args < Argument::n_float_register_parameters) { ++ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else if (int_args < Argument::n_register_parameters) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++ ++ return align_up(stk_args, 2); ++} ++ ++// --------------------------------------------------------------------------- ++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_VOID: ++ break; ++ case T_FLOAT: ++ __ fst_s(FSF, FP, -3 * wordSize); ++ break; ++ case T_DOUBLE: ++ __ fst_d(FSF, FP, -3 * wordSize); ++ break; ++ case T_LONG: ++ case T_OBJECT: ++ case T_ARRAY: ++ __ st_d(V0, FP, -3 * wordSize); ++ break; ++ default: ++ __ st_w(V0, FP, -3 * wordSize); ++ } ++} ++ ++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_VOID: ++ break; ++ case T_FLOAT: ++ __ fld_s(FSF, FP, -3 * wordSize); ++ break; ++ case T_DOUBLE: ++ __ fld_d(FSF, FP, -3 * wordSize); ++ break; ++ case T_LONG: ++ case T_OBJECT: ++ case T_ARRAY: ++ __ ld_d(V0, FP, -3 * wordSize); ++ break; ++ default: { ++ __ ld_w(V0, FP, -3 * wordSize); ++ } ++ } ++} ++ ++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ for ( int i = first_arg ; i < arg_count ; i++ ) { ++ if (args[i].first()->is_Register()) { ++ __ push(args[i].first()->as_Register()); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ push(args[i].first()->as_FloatRegister()); ++ } ++ } ++} ++ ++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { ++ if (args[i].first()->is_Register()) { ++ __ pop(args[i].first()->as_Register()); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ pop(args[i].first()->as_FloatRegister()); ++ } ++ } ++} ++ ++// A simple move of integer like type ++static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ // stack to stack ++ __ ld_w(AT, FP, reg2offset_in(src.first())); ++ __ st_d(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ // stack to reg ++ __ ld_w(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else if (dst.first()->is_stack()) { ++ // reg to stack ++ __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first())); ++ } else { ++ if (dst.first() != src.first()){ ++ __ move(dst.first()->as_Register(), src.first()->as_Register()); ++ } ++ } ++} ++ ++// An oop arg. Must pass a handle not the oop itself ++static void object_move(MacroAssembler* masm, ++ OopMap* map, ++ int oop_handle_offset, ++ int framesize_in_slots, ++ VMRegPair src, ++ VMRegPair dst, ++ bool is_receiver, ++ int* receiver_offset) { ++ ++ // must pass a handle. First figure out the location we use as a handle ++ ++ if (src.first()->is_stack()) { ++ // Oop is already on the stack as an argument ++ Register rHandle = T5; ++ Label nil; ++ __ xorr(rHandle, rHandle, rHandle); ++ __ ld_d(AT, FP, reg2offset_in(src.first())); ++ __ beq(AT, R0, nil); ++ __ lea(rHandle, Address(FP, reg2offset_in(src.first()))); ++ __ bind(nil); ++ if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first())); ++ else __ move( (dst.first())->as_Register(), rHandle); ++ ++ int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); ++ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); ++ if (is_receiver) { ++ *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; ++ } ++ } else { ++ // Oop is in an a register we must store it to the space we reserve ++ // on the stack for oop_handles ++ const Register rOop = src.first()->as_Register(); ++ assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register"); ++ const Register rHandle = T5; ++ //Important: refer to java_calling_convertion ++ int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset; ++ int offset = oop_slot*VMRegImpl::stack_slot_size; ++ Label skip; ++ __ st_d( rOop , SP, offset ); ++ map->set_oop(VMRegImpl::stack2reg(oop_slot)); ++ __ xorr( rHandle, rHandle, rHandle); ++ __ beq(rOop, R0, skip); ++ __ lea(rHandle, Address(SP, offset)); ++ __ bind(skip); ++ // Store the handle parameter ++ if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first())); ++ else __ move((dst.first())->as_Register(), rHandle); ++ ++ if (is_receiver) { ++ *receiver_offset = offset; ++ } ++ } ++} ++ ++// A float arg may have to do float reg int reg conversion ++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); ++ if (src.first()->is_stack()) { ++ // stack to stack/reg ++ if (dst.first()->is_stack()) { ++ __ ld_w(AT, FP, reg2offset_in(src.first())); ++ __ st_w(AT, SP, reg2offset_out(dst.first())); ++ } else if (dst.first()->is_FloatRegister()) { ++ __ fld_s(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); ++ } else { ++ __ ld_w(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else { ++ // reg to stack/reg ++ if(dst.first()->is_stack()) { ++ __ fst_s(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); ++ } else if (dst.first()->is_FloatRegister()) { ++ __ fmov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } else { ++ __ movfr2gr_s(dst.first()->as_Register(), src.first()->as_FloatRegister()); ++ } ++ } ++} ++ ++// A long move ++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ ++ // The only legal possibility for a long_move VMRegPair is: ++ // 1: two stack slots (possibly unaligned) ++ // as neither the java or C calling convention will use registers ++ // for longs. ++ if (src.first()->is_stack()) { ++ assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack"); ++ if( dst.first()->is_stack()){ ++ __ ld_d(AT, FP, reg2offset_in(src.first())); ++ __ st_d(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else { ++ if( dst.first()->is_stack()){ ++ __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first())); ++ } else { ++ __ move(dst.first()->as_Register(), src.first()->as_Register()); ++ } ++ } ++} ++ ++// A double move ++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ ++ // The only legal possibilities for a double_move VMRegPair are: ++ // The painful thing here is that like long_move a VMRegPair might be ++ ++ // Because of the calling convention we know that src is either ++ // 1: a single physical register (xmm registers only) ++ // 2: two stack slots (possibly unaligned) ++ // dst can only be a pair of stack slots. ++ ++ if (src.first()->is_stack()) { ++ // source is all stack ++ if( dst.first()->is_stack()){ ++ __ ld_d(AT, FP, reg2offset_in(src.first())); ++ __ st_d(AT, SP, reg2offset_out(dst.first())); ++ } else if (dst.first()->is_FloatRegister()) { ++ __ fld_d(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); ++ } else { ++ __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else { ++ // reg to stack/reg ++ // No worries about stack alignment ++ if( dst.first()->is_stack()){ ++ __ fst_d(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); ++ } else if (dst.first()->is_FloatRegister()) { ++ __ fmov_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } else { ++ __ movfr2gr_d(dst.first()->as_Register(), src.first()->as_FloatRegister()); ++ } ++ } ++} ++ ++static void verify_oop_args(MacroAssembler* masm, ++ methodHandle method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ Register temp_reg = T4; // not part of any compiled calling seq ++ if (VerifyOops) { ++ for (int i = 0; i < method->size_of_parameters(); i++) { ++ if (sig_bt[i] == T_OBJECT || ++ sig_bt[i] == T_ARRAY) { ++ VMReg r = regs[i].first(); ++ assert(r->is_valid(), "bad oop arg"); ++ if (r->is_stack()) { ++ __ ld_d(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); ++ __ verify_oop(temp_reg); ++ } else { ++ __ verify_oop(r->as_Register()); ++ } ++ } ++ } ++ } ++} ++ ++static void gen_special_dispatch(MacroAssembler* masm, ++ methodHandle method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ verify_oop_args(masm, method, sig_bt, regs); ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ ++ // Now write the args into the outgoing interpreter space ++ bool has_receiver = false; ++ Register receiver_reg = noreg; ++ int member_arg_pos = -1; ++ Register member_reg = noreg; ++ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); ++ if (ref_kind != 0) { ++ member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument ++ member_reg = S3; // known to be free at this point ++ has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); ++ } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { ++ has_receiver = true; ++ } else { ++ fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); ++ } ++ ++ if (member_reg != noreg) { ++ // Load the member_arg into register, if necessary. ++ SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); ++ VMReg r = regs[member_arg_pos].first(); ++ if (r->is_stack()) { ++ __ ld_d(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ member_reg = r->as_Register(); ++ } ++ } ++ ++ if (has_receiver) { ++ // Make sure the receiver is loaded into a register. ++ assert(method->size_of_parameters() > 0, "oob"); ++ assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); ++ VMReg r = regs[0].first(); ++ assert(r->is_valid(), "bad receiver arg"); ++ if (r->is_stack()) { ++ // Porting note: This assumes that compiled calling conventions always ++ // pass the receiver oop in a register. If this is not true on some ++ // platform, pick a temp and load the receiver from stack. ++ fatal("receiver always in a register"); ++ receiver_reg = SSR; // known to be free at this point ++ __ ld_d(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ receiver_reg = r->as_Register(); ++ } ++ } ++ ++ // Figure out which address we are really jumping to: ++ MethodHandles::generate_method_handle_dispatch(masm, iid, ++ receiver_reg, member_reg, /*for_compiler_entry:*/ true); ++} ++ ++// --------------------------------------------------------------------------- ++// Generate a native wrapper for a given method. The method takes arguments ++// in the Java compiled code convention, marshals them to the native ++// convention (handlizes oops, etc), transitions to native, makes the call, ++// returns to java state (possibly blocking), unhandlizes any result and ++// returns. ++nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, ++ const methodHandle& method, ++ int compile_id, ++ BasicType* in_sig_bt, ++ VMRegPair* in_regs, ++ BasicType ret_type, ++ address critical_entry) { ++ if (method->is_method_handle_intrinsic()) { ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ intptr_t start = (intptr_t)__ pc(); ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ gen_special_dispatch(masm, ++ method, ++ in_sig_bt, ++ in_regs); ++ assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord, ++ "valid size for make_non_entrant"); ++ int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period ++ __ flush(); ++ int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually ++ return nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ in_ByteSize(-1), ++ in_ByteSize(-1), ++ (OopMapSet*)NULL); ++ } ++ ++ bool is_critical_native = true; ++ address native_func = critical_entry; ++ if (native_func == NULL) { ++ native_func = method->native_function(); ++ is_critical_native = false; ++ } ++ assert(native_func != NULL, "must have function"); ++ ++ // Native nmethod wrappers never take possesion of the oop arguments. ++ // So the caller will gc the arguments. The only thing we need an ++ // oopMap for is if the call is static ++ // ++ // An OopMap for lock (and class if static), and one for the VM call itself ++ OopMapSet *oop_maps = new OopMapSet(); ++ ++ // We have received a description of where all the java arg are located ++ // on entry to the wrapper. We need to convert these args to where ++ // the jni function will expect them. To figure out where they go ++ // we convert the java signature to a C signature by inserting ++ // the hidden arguments as arg[0] and possibly arg[1] (static method) ++ ++ const int total_in_args = method->size_of_parameters(); ++ int total_c_args = total_in_args; ++ if (!is_critical_native) { ++ total_c_args += 1; ++ if (method->is_static()) { ++ total_c_args++; ++ } ++ } else { ++ for (int i = 0; i < total_in_args; i++) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ total_c_args++; ++ } ++ } ++ } ++ ++ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); ++ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); ++ BasicType* in_elem_bt = NULL; ++ ++ int argc = 0; ++ if (!is_critical_native) { ++ out_sig_bt[argc++] = T_ADDRESS; ++ if (method->is_static()) { ++ out_sig_bt[argc++] = T_OBJECT; ++ } ++ ++ for (int i = 0; i < total_in_args ; i++ ) { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ } ++ } else { ++ in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); ++ SignatureStream ss(method->signature()); ++ for (int i = 0; i < total_in_args ; i++ ) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ // Arrays are passed as int, elem* pair ++ out_sig_bt[argc++] = T_INT; ++ out_sig_bt[argc++] = T_ADDRESS; ++ Symbol* atype = ss.as_symbol(); ++ const char* at = atype->as_C_string(); ++ if (strlen(at) == 2) { ++ assert(at[0] == '[', "must be"); ++ switch (at[1]) { ++ case 'B': in_elem_bt[i] = T_BYTE; break; ++ case 'C': in_elem_bt[i] = T_CHAR; break; ++ case 'D': in_elem_bt[i] = T_DOUBLE; break; ++ case 'F': in_elem_bt[i] = T_FLOAT; break; ++ case 'I': in_elem_bt[i] = T_INT; break; ++ case 'J': in_elem_bt[i] = T_LONG; break; ++ case 'S': in_elem_bt[i] = T_SHORT; break; ++ case 'Z': in_elem_bt[i] = T_BOOLEAN; break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ } else { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ in_elem_bt[i] = T_VOID; ++ } ++ if (in_sig_bt[i] != T_VOID) { ++ assert(in_sig_bt[i] == ss.type(), "must match"); ++ ss.next(); ++ } ++ } ++ } ++ ++ // Now figure out where the args must be stored and how much stack space ++ // they require (neglecting out_preserve_stack_slots but space for storing ++ // the 1st six register arguments). It's weird see int_stk_helper. ++ // ++ int out_arg_slots; ++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); ++ ++ // Compute framesize for the wrapper. We need to handlize all oops in ++ // registers. We must create space for them here that is disjoint from ++ // the windowed save area because we have no control over when we might ++ // flush the window again and overwrite values that gc has since modified. ++ // (The live window race) ++ // ++ // We always just allocate 6 word for storing down these object. This allow ++ // us to simply record the base and use the Ireg number to decide which ++ // slot to use. (Note that the reg number is the inbound number not the ++ // outbound number). ++ // We must shuffle args to match the native convention, and include var-args space. ++ ++ // Calculate the total number of stack slots we will need. ++ ++ // First count the abi requirement plus all of the outgoing args ++ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; ++ ++ // Now the space for the inbound oop handle area ++ int total_save_slots = 9 * VMRegImpl::slots_per_word; // 9 arguments passed in registers ++ if (is_critical_native) { ++ // Critical natives may have to call out so they need a save area ++ // for register arguments. ++ int double_slots = 0; ++ int single_slots = 0; ++ for ( int i = 0; i < total_in_args; i++) { ++ if (in_regs[i].first()->is_Register()) { ++ const Register reg = in_regs[i].first()->as_Register(); ++ switch (in_sig_bt[i]) { ++ case T_BOOLEAN: ++ case T_BYTE: ++ case T_SHORT: ++ case T_CHAR: ++ case T_INT: single_slots++; break; ++ case T_ARRAY: ++ case T_LONG: double_slots++; break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ switch (in_sig_bt[i]) { ++ case T_FLOAT: single_slots++; break; ++ case T_DOUBLE: double_slots++; break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ } ++ total_save_slots = double_slots * 2 + single_slots; ++ // align the save area ++ if (double_slots != 0) { ++ stack_slots = align_up(stack_slots, 2); ++ } ++ } ++ ++ int oop_handle_offset = stack_slots; ++ stack_slots += total_save_slots; ++ ++ // Now any space we need for handlizing a klass if static method ++ ++ int klass_slot_offset = 0; ++ int klass_offset = -1; ++ int lock_slot_offset = 0; ++ bool is_static = false; ++ ++ if (method->is_static()) { ++ klass_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; ++ is_static = true; ++ } ++ ++ // Plus a lock if needed ++ ++ if (method->is_synchronized()) { ++ lock_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ } ++ ++ // Now a place to save return value or as a temporary for any gpr -> fpr moves ++ // + 2 for return address (which we own) and saved fp ++ stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7) ++ ++ // Ok The space we have allocated will look like: ++ // ++ // ++ // FP-> | | ++ // | 2 slots (ra) | ++ // | 2 slots (fp) | ++ // |---------------------| ++ // | 2 slots for moves | ++ // |---------------------| ++ // | lock box (if sync) | ++ // |---------------------| <- lock_slot_offset ++ // | klass (if static) | ++ // |---------------------| <- klass_slot_offset ++ // | oopHandle area | ++ // |---------------------| <- oop_handle_offset ++ // | outbound memory | ++ // | based arguments | ++ // | | ++ // |---------------------| ++ // | vararg area | ++ // |---------------------| ++ // | | ++ // SP-> | out_preserved_slots | ++ // ++ // ++ ++ ++ // Now compute actual number of stack words we need rounding to make ++ // stack properly aligned. ++ stack_slots = align_up(stack_slots, StackAlignmentInSlots); ++ ++ int stack_size = stack_slots * VMRegImpl::stack_slot_size; ++ ++ intptr_t start = (intptr_t)__ pc(); ++ ++ ++ ++ // First thing make an ic check to see if we should even be here ++ address ic_miss = SharedRuntime::get_ic_miss_stub(); ++ ++ // We are free to use all registers as temps without saving them and ++ // restoring them except fp. fp is the only callee save register ++ // as far as the interpreter and the compiler(s) are concerned. ++ ++ //refer to register_loongarch.hpp:IC_Klass ++ const Register ic_reg = T1; ++ const Register receiver = T0; ++ ++ Label hit; ++ Label exception_pending; ++ ++ __ verify_oop(receiver); ++ //add for compressedoops ++ __ load_klass(T4, receiver); ++ __ beq(T4, ic_reg, hit); ++ __ jmp(ic_miss, relocInfo::runtime_call_type); ++ __ bind(hit); ++ ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ ++ if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { ++ Label L_skip_barrier; ++ address handle_wrong_method = SharedRuntime::get_handle_wrong_method_stub(); ++ __ mov_metadata(T4, method->method_holder()); // InstanceKlass* ++ __ clinit_barrier(T4, AT, &L_skip_barrier); ++ __ jmp(handle_wrong_method, relocInfo::runtime_call_type); ++ ++ __ bind(L_skip_barrier); ++ } ++ ++#ifdef COMPILER1 ++ if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) { ++ // Object.hashCode can pull the hashCode from the header word ++ // instead of doing a full VM transition once it's been computed. ++ // Since hashCode is usually polymorphic at call sites we can't do ++ // this optimization at the call site without a lot of work. ++ Label slowCase; ++ Register receiver = T0; ++ Register result = V0; ++ __ ld_d ( result, receiver, oopDesc::mark_offset_in_bytes()); ++ // check if locked ++ __ andi(AT, result, markWord::unlocked_value); ++ __ beq(AT, R0, slowCase); ++ if (UseBiasedLocking) { ++ // Check if biased and fall through to runtime if so ++ __ andi (AT, result, markWord::biased_lock_bit_in_place); ++ __ bne(AT, R0, slowCase); ++ } ++ // get hash ++ __ li(AT, markWord::hash_mask_in_place); ++ __ andr (AT, result, AT); ++ // test if hashCode exists ++ __ beq (AT, R0, slowCase); ++ __ shr(result, markWord::hash_shift); ++ __ jr(RA); ++ __ bind (slowCase); ++ } ++#endif // COMPILER1 ++ ++ // Generate stack overflow check ++ __ bang_stack_with_offset((int)StackOverflow::stack_shadow_zone_size()); ++ ++ // The instruction at the verified entry point must be 4 bytes or longer ++ // because it can be patched on the fly by make_non_entrant. ++ if (((intptr_t)__ pc() - start - vep_offset) < 1 * BytesPerInstWord) { ++ __ nop(); ++ } ++ ++ // Generate a new frame for the wrapper. ++ // do LA need this ? ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ bstrins_d(SP, R0, 3, 0); ++ ++ __ enter(); ++ // -2 because return address is already present and so is saved fp ++ __ addi_d(SP, SP, -1 * (stack_size - 2*wordSize)); ++ ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->nmethod_entry_barrier(masm); ++ ++ // Frame is now completed as far a size and linkage. ++ ++ int frame_complete = ((intptr_t)__ pc()) - start; ++ ++ // Calculate the difference between sp and fp. We need to know it ++ // after the native call because on windows Java Natives will pop ++ // the arguments and it is painful to do sp relative addressing ++ // in a platform independent way. So after the call we switch to ++ // fp relative addressing. ++ //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change ++ //the SP ++ int fp_adjustment = stack_size; ++ ++ // Compute the fp offset for any slots used after the jni call ++ ++ int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; ++ // We use TREG as a thread pointer because it is callee save and ++ // if we load it once it is usable thru the entire wrapper ++ const Register thread = TREG; ++ ++ // We use S4 as the oop handle for the receiver/klass ++ // It is callee save so it survives the call to native ++ ++ const Register oop_handle_reg = S4; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ // ++ // We immediately shuffle the arguments so that any vm call we have to ++ // make from here on out (sync slow path, jvmpi, etc.) we will have ++ // captured the oops from our caller and have a valid oopMap for ++ // them. ++ ++ // ----------------- ++ // The Grand Shuffle ++ // ++ // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* ++ // and, if static, the class mirror instead of a receiver. This pretty much ++ // guarantees that register layout will not match (and LA doesn't use reg ++ // parms though amd does). Since the native abi doesn't use register args ++ // and the java conventions does we don't have to worry about collisions. ++ // All of our moved are reg->stack or stack->stack. ++ // We ignore the extra arguments during the shuffle and handle them at the ++ // last moment. The shuffle is described by the two calling convention ++ // vectors we have in our possession. We simply walk the java vector to ++ // get the source locations and the c vector to get the destinations. ++ ++ int c_arg = method->is_static() ? 2 : 1 ; ++ ++ // Record sp-based slot for receiver on stack for non-static methods ++ int receiver_offset = -1; ++ ++ // This is a trick. We double the stack slots so we can claim ++ // the oops in the caller's frame. Since we are sure to have ++ // more args than the caller doubling is enough to make ++ // sure we can capture all the incoming oop args from the ++ // caller. ++ // ++ OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); ++ ++ // Mark location of fp (someday) ++ // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp)); ++ ++#ifdef ASSERT ++ bool reg_destroyed[RegisterImpl::number_of_registers]; ++ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; ++ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { ++ reg_destroyed[r] = false; ++ } ++ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { ++ freg_destroyed[f] = false; ++ } ++ ++#endif /* ASSERT */ ++ ++ // This may iterate in two different directions depending on the ++ // kind of native it is. The reason is that for regular JNI natives ++ // the incoming and outgoing registers are offset upwards and for ++ // critical natives they are offset down. ++ GrowableArray arg_order(2 * total_in_args); ++ VMRegPair tmp_vmreg; ++ tmp_vmreg.set2(T8->as_VMReg()); ++ ++ if (!is_critical_native) { ++ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { ++ arg_order.push(i); ++ arg_order.push(c_arg); ++ } ++ } else { ++ // Compute a valid move order, using tmp_vmreg to break any cycles ++ Unimplemented(); ++ // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); ++ } ++ ++ int temploc = -1; ++ for (int ai = 0; ai < arg_order.length(); ai += 2) { ++ int i = arg_order.at(ai); ++ int c_arg = arg_order.at(ai + 1); ++ __ block_comment(err_msg("move %d -> %d", i, c_arg)); ++ if (c_arg == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // This arg needs to be moved to a temporary ++ __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); ++ in_regs[i] = tmp_vmreg; ++ temploc = i; ++ continue; ++ } else if (i == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // Read from the temporary location ++ assert(temploc != -1, "must be valid"); ++ i = temploc; ++ temploc = -1; ++ } ++#ifdef ASSERT ++ if (in_regs[i].first()->is_Register()) { ++ assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); ++ } ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif /* ASSERT */ ++ switch (in_sig_bt[i]) { ++ case T_ARRAY: ++ if (is_critical_native) { ++ Unimplemented(); ++ // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); ++ c_arg++; ++#ifdef ASSERT ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif ++ break; ++ } ++ case T_OBJECT: ++ assert(!is_critical_native, "no oop arguments"); ++ object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], ++ ((i == 0) && (!is_static)), ++ &receiver_offset); ++ break; ++ case T_VOID: ++ break; ++ ++ case T_FLOAT: ++ float_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_DOUBLE: ++ assert( i + 1 < total_in_args && ++ in_sig_bt[i + 1] == T_VOID && ++ out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); ++ double_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_LONG : ++ long_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); ++ ++ default: ++ simple_move32(masm, in_regs[i], out_regs[c_arg]); ++ } ++ } ++ ++ // point c_arg at the first arg that is already loaded in case we ++ // need to spill before we call out ++ c_arg = total_c_args - total_in_args; ++ // Pre-load a static method's oop. Used both by locking code and ++ // the normal JNI call code. ++ ++ __ move(oop_handle_reg, A1); ++ ++ if (method->is_static() && !is_critical_native) { ++ ++ // load oop into a register ++ __ movoop(oop_handle_reg, ++ JNIHandles::make_local((method->method_holder())->java_mirror()), ++ /*immediate*/true); ++ ++ // Now handlize the static class mirror it's known not-null. ++ __ st_d( oop_handle_reg, SP, klass_offset); ++ map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); ++ ++ // Now get the handle ++ __ lea(oop_handle_reg, Address(SP, klass_offset)); ++ // store the klass handle as second argument ++ __ move(A1, oop_handle_reg); ++ // and protect the arg if we must spill ++ c_arg--; ++ } ++ ++ // Change state to native (we save the return address in the thread, since it might not ++ // be pushed on the stack when we do a a stack traversal). It is enough that the pc() ++ // points into the right code segment. It does not have to be the correct return pc. ++ // We use the same pc/oopMap repeatedly when we call out ++ ++ Label native_return; ++ __ set_last_Java_frame(SP, noreg, native_return); ++ ++ // We have all of the arguments setup at this point. We must not touch any register ++ // argument registers at this point (what if we save/restore them there are no oop? ++ { ++ SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); ++ save_args(masm, total_c_args, c_arg, out_regs); ++ int metadata_index = __ oop_recorder()->find_index(method()); ++ RelocationHolder rspec = metadata_Relocation::spec(metadata_index); ++ __ relocate(rspec); ++ __ patchable_li52(AT, (long)(method())); ++ ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ thread, AT); ++ ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ } ++ ++ // These are register definitions we need for locking/unlocking ++ const Register swap_reg = T8; // Must use T8 for cmpxchg instruction ++ const Register obj_reg = T4; // Will contain the oop ++ //const Register lock_reg = T6; // Address of compiler lock object (BasicLock) ++ const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock) ++ ++ ++ ++ Label slow_path_lock; ++ Label lock_done; ++ ++ // Lock a synchronized method ++ if (method->is_synchronized()) { ++ assert(!is_critical_native, "unhandled"); ++ ++ const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); ++ ++ // Get the handle (the 2nd argument) ++ __ move(oop_handle_reg, A1); ++ ++ // Get address of the box ++ __ lea(lock_reg, Address(FP, lock_slot_fp_offset)); ++ ++ // Load the oop from the handle ++ __ ld_d(obj_reg, oop_handle_reg, 0); ++ ++ if (UseBiasedLocking) { ++ // Note that oop_handle_reg is trashed during this call ++ __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock); ++ } ++ ++ // Load immediate 1 into swap_reg %T8 ++ __ li(swap_reg, 1); ++ ++ __ ld_d(AT, obj_reg, 0); ++ __ orr(swap_reg, swap_reg, AT); ++ ++ __ st_d(swap_reg, lock_reg, mark_word_offset); ++ __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done); ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) sp <= mark < mark + os::pagesize() ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg ++ ++ __ sub_d(swap_reg, swap_reg, SP); ++ __ li(AT, 3 - os::vm_page_size()); ++ __ andr(swap_reg , swap_reg, AT); ++ // Save the test result, for recursive case, the result is zero ++ __ st_d(swap_reg, lock_reg, mark_word_offset); ++ __ bne(swap_reg, R0, slow_path_lock); ++ // Slow path will re-enter here ++ __ bind(lock_done); ++ ++ if (UseBiasedLocking) { ++ // Re-fetch oop_handle_reg as we trashed it above ++ __ move(A1, oop_handle_reg); ++ } ++ } ++ ++ ++ // Finally just about ready to make the JNI call ++ ++ ++ // get JNIEnv* which is first argument to native ++ if (!is_critical_native) { ++ __ addi_d(A0, thread, in_bytes(JavaThread::jni_environment_offset())); ++ ++ // Now set thread in native ++ __ addi_d(AT, R0, _thread_in_native); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); ++ } ++ __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ } ++ // do the call ++ __ call(native_func, relocInfo::runtime_call_type); ++ __ bind(native_return); ++ ++ oop_maps->add_gc_map(((intptr_t)__ pc()) - start, map); ++ ++ // WARNING - on Windows Java Natives use pascal calling convention and pop the ++ // arguments off of the stack. We could just re-adjust the stack pointer here ++ // and continue to do SP relative addressing but we instead switch to FP ++ // relative addressing. ++ ++ // Unpack native results. ++ switch (ret_type) { ++ case T_BOOLEAN: __ c2bool(V0); break; ++ case T_CHAR : __ bstrpick_d(V0, V0, 15, 0); break; ++ case T_BYTE : __ sign_extend_byte (V0); break; ++ case T_SHORT : __ sign_extend_short(V0); break; ++ case T_INT : // nothing to do break; ++ case T_DOUBLE : ++ case T_FLOAT : ++ // Result is in st0 we'll save as needed ++ break; ++ case T_ARRAY: // Really a handle ++ case T_OBJECT: // Really a handle ++ break; // can't de-handlize until after safepoint check ++ case T_VOID: break; ++ case T_LONG: break; ++ default : ShouldNotReachHere(); ++ } ++ ++ Label after_transition; ++ ++ // If this is a critical native, check for a safepoint or suspend request after the call. ++ // If a safepoint is needed, transition to native, then to native_trans to handle ++ // safepoints like the native methods that are not critical natives. ++ if (is_critical_native) { ++ Label needs_safepoint; ++ __ safepoint_poll(needs_safepoint, thread, false /* at_return */, true /* acquire */, false /* in_nmethod */); ++ __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, after_transition); ++ __ bind(needs_safepoint); ++ } ++ ++ // Switch thread to "native transition" state before reading the synchronization state. ++ // This additional state is necessary because reading and testing the synchronization ++ // state is not atomic w.r.t. GC, as this scenario demonstrates: ++ // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. ++ // VM thread changes sync state to synchronizing and suspends threads for GC. ++ // Thread A is resumed to finish this native method, but doesn't block here since it ++ // didn't see any synchronization is progress, and escapes. ++ __ addi_d(AT, R0, _thread_in_native_trans); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); ++ } ++ __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ if(os::is_MP()) __ membar(__ AnyAny); ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { ++ Label Continue; ++ Label slow_path; ++ ++ // We need an acquire here to ensure that any subsequent load of the ++ // global SafepointSynchronize::_state flag is ordered after this load ++ // of the thread-local polling word. We don't want this poll to ++ // return false (i.e. not safepointing) and a later poll of the global ++ // SafepointSynchronize::_state spuriously to return true. ++ // ++ // This is to avoid a race when we're in a native->Java transition ++ // racing the code which wakes up from a safepoint. ++ ++ __ safepoint_poll(slow_path, thread, true /* at_return */, true /* acquire */, false /* in_nmethod */); ++ __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, Continue); ++ __ bind(slow_path); ++ ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // ++ save_native_result(masm, ret_type, stack_slots); ++ __ move(A0, thread); ++ __ addi_d(SP, SP, -wordSize); ++ __ push(S2); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ bstrins_d(SP, R0, 3, 0); // align stack as required by ABI ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type); ++ __ move(SP, S2); // use S2 as a sender SP holder ++ __ pop(S2); ++ __ addi_d(SP, SP, wordSize); ++ // Restore any method result value ++ restore_native_result(masm, ret_type, stack_slots); ++ ++ __ bind(Continue); ++ } ++ ++ // change thread state ++ __ addi_d(AT, R0, _thread_in_Java); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); ++ } ++ __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ bind(after_transition); ++ Label reguard; ++ Label reguard_done; ++ __ ld_w(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); ++ __ addi_d(AT, AT, -StackOverflow::stack_guard_yellow_reserved_disabled); ++ __ beq(AT, R0, reguard); ++ // slow path reguard re-enters here ++ __ bind(reguard_done); ++ ++ // Handle possible exception (will unlock if necessary) ++ ++ // native result if any is live ++ ++ // Unlock ++ Label slow_path_unlock; ++ Label unlock_done; ++ if (method->is_synchronized()) { ++ ++ Label done; ++ ++ // Get locked oop from the handle we passed to jni ++ __ ld_d( obj_reg, oop_handle_reg, 0); ++ if (UseBiasedLocking) { ++ __ biased_locking_exit(obj_reg, T8, done); ++ ++ } ++ ++ // Simple recursive lock? ++ ++ __ ld_d(AT, FP, lock_slot_fp_offset); ++ __ beq(AT, R0, done); ++ // Must save FSF if if it is live now because cmpxchg must use it ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ ++ // get old displaced header ++ __ ld_d (T8, FP, lock_slot_fp_offset); ++ // get address of the stack lock ++ __ addi_d (c_rarg0, FP, lock_slot_fp_offset); ++ // Atomic swap old header if oop still contains the stack lock ++ __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock); ++ ++ // slow path re-enters here ++ __ bind(unlock_done); ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ ++ __ bind(done); ++ ++ } ++ { ++ SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); ++ // Tell dtrace about this method exit ++ save_native_result(masm, ret_type, stack_slots); ++ int metadata_index = __ oop_recorder()->find_index( (method())); ++ RelocationHolder rspec = metadata_Relocation::spec(metadata_index); ++ __ relocate(rspec); ++ __ patchable_li52(AT, (long)(method())); ++ ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ thread, AT); ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ ++ // We can finally stop using that last_Java_frame we setup ages ago ++ ++ __ reset_last_Java_frame(false); ++ ++ // Unpack oop result, e.g. JNIHandles::resolve value. ++ if (is_reference_type(ret_type)) { ++ __ resolve_jobject(V0, thread, T4); ++ } ++ ++ if (CheckJNICalls) { ++ // clear_pending_jni_exception_check ++ __ st_d(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset())); ++ } ++ ++ if (!is_critical_native) { ++ // reset handle block ++ __ ld_d(AT, thread, in_bytes(JavaThread::active_handles_offset())); ++ __ st_w(R0, AT, JNIHandleBlock::top_offset_in_bytes()); ++ } ++ ++ if (!is_critical_native) { ++ // Any exception pending? ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, exception_pending); ++ } ++ // no exception, we're almost done ++ ++ // check that only result value is on FPU stack ++ __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit"); ++ ++ // Return ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ leave(); ++ ++ __ jr(RA); ++ // Unexpected paths are out of line and go here ++ // Slow path locking & unlocking ++ if (method->is_synchronized()) { ++ ++ // BEGIN Slow path lock ++ __ bind(slow_path_lock); ++ ++ // protect the args we've loaded ++ save_args(masm, total_c_args, c_arg, out_regs); ++ ++ // has last_Java_frame setup. No exceptions so do vanilla call not call_VM ++ // args are (oop obj, BasicLock* lock, JavaThread* thread) ++ ++ __ move(A0, obj_reg); ++ __ move(A1, lock_reg); ++ __ move(A2, thread); ++ __ addi_d(SP, SP, - 3*wordSize); ++ ++ __ move(S2, SP); // use S2 as a sender SP holder ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ bstrins_d(SP, R0, 3, 0); // align stack as required by ABI ++ ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); ++ __ move(SP, S2); ++ __ addi_d(SP, SP, 3*wordSize); ++ ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ stop("no pending exception allowed on exit from monitorenter"); ++ __ bind(L); ++ } ++#endif ++ __ b(lock_done); ++ // END Slow path lock ++ ++ // BEGIN Slow path unlock ++ __ bind(slow_path_unlock); ++ ++ // Slow path unlock ++ ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ // Save pending exception around call to VM (which contains an EXCEPTION_MARK) ++ ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ push(AT); ++ __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset())); ++ ++ __ move(S2, SP); // use S2 as a sender SP holder ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ bstrins_d(SP, R0, 3, 0); // align stack as required by ABI ++ ++ // should be a peal ++ // +wordSize because of the push above ++ __ addi_d(A1, FP, lock_slot_fp_offset); ++ ++ __ move(A0, obj_reg); ++ __ move(A2, thread); ++ __ addi_d(SP, SP, -2*wordSize); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), ++ relocInfo::runtime_call_type); ++ __ addi_d(SP, SP, 2*wordSize); ++ __ move(SP, S2); ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_d( AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); ++ __ bind(L); ++ } ++#endif /* ASSERT */ ++ ++ __ pop(AT); ++ __ st_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ __ b(unlock_done); ++ // END Slow path unlock ++ ++ } ++ ++ // SLOW PATH Reguard the stack if needed ++ ++ __ bind(reguard); ++ save_native_result(masm, ret_type, stack_slots); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), ++ relocInfo::runtime_call_type); ++ restore_native_result(masm, ret_type, stack_slots); ++ __ b(reguard_done); ++ ++ // BEGIN EXCEPTION PROCESSING ++ if (!is_critical_native) { ++ // Forward the exception ++ __ bind(exception_pending); ++ ++ // pop our frame ++ //forward_exception_entry need return address on stack ++ __ addi_d(SP, FP, - 2 * wordSize); ++ __ pop(FP); ++ ++ // and forward the exception ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ } ++ __ flush(); ++ ++ nmethod *nm = nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), ++ in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), ++ oop_maps); ++ ++ return nm; ++} ++ ++// this function returns the adjust size (in number of words) to a c2i adapter ++// activation for use during deoptimization ++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { ++ return (callee_locals - callee_parameters) * Interpreter::stackElementWords; ++} ++ ++// Number of stack slots between incoming argument block and the start of ++// a new frame. The PROLOG must add this many slots to the stack. The ++// EPILOG must remove this many slots. LA needs two slots for ++// return address and fp. ++// TODO think this is correct but check ++uint SharedRuntime::in_preserve_stack_slots() { ++ return 4; ++} ++ ++// "Top of Stack" slots that may be unused by the calling convention but must ++// otherwise be preserved. ++// On Intel these are not necessary and the value can be zero. ++// On Sparc this describes the words reserved for storing a register window ++// when an interrupt occurs. ++uint SharedRuntime::out_preserve_stack_slots() { ++ return 0; ++} ++ ++//------------------------------generate_deopt_blob---------------------------- ++// Ought to generate an ideal graph & compile, but here's some SPARC ASM ++// instead. ++void SharedRuntime::generate_deopt_blob() { ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ int pad = 0; ++#if INCLUDE_JVMCI ++ if (EnableJVMCI) { ++ pad += 512; // Increase the buffer size when compiling for JVMCI ++ } ++#endif ++ //CodeBuffer buffer ("deopt_blob", 4000, 2048); ++ CodeBuffer buffer ("deopt_blob", 8000+pad, 2048); // FIXME for debug ++ MacroAssembler* masm = new MacroAssembler( & buffer); ++ int frame_size_in_words; ++ OopMap* map = NULL; ++ // Account for the extra args we place on the stack ++ // by the time we call fetch_unroll_info ++ const int additional_words = 2; // deopt kind, thread ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ RegisterSaver reg_save(COMPILER2_OR_JVMCI != 0); ++ ++ address start = __ pc(); ++ Label cont; ++ // we use S3 for DeOpt reason register ++ Register reason = S3; ++ // use S6 for thread register ++ Register thread = TREG; ++ // use S7 for fetch_unroll_info returned UnrollBlock ++ Register unroll = S7; ++ // Prolog for non exception case! ++ ++ // We have been called from the deopt handler of the deoptee. ++ // ++ // deoptee: ++ // ... ++ // call X ++ // ... ++ // deopt_handler: call_deopt_stub ++ // cur. return pc --> ... ++ // ++ // So currently RA points behind the call in the deopt handler. ++ // We adjust it such that it points to the start of the deopt handler. ++ // The return_pc has been stored in the frame of the deoptee and ++ // will replace the address of the deopt_handler in the call ++ // to Deoptimization::fetch_unroll_info below. ++ ++ // HandlerImpl::size_deopt_handler() ++ __ addi_d(RA, RA, - NativeFarCall::instruction_size); ++ // Save everything in sight. ++ map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ // Normal deoptimization ++ __ li(reason, Deoptimization::Unpack_deopt); ++ __ b(cont); ++ ++ int reexecute_offset = __ pc() - start; ++#if INCLUDE_JVMCI && !defined(COMPILER1) ++ if (EnableJVMCI && UseJVMCICompiler) { ++ // JVMCI does not use this kind of deoptimization ++ __ should_not_reach_here(); ++ } ++#endif ++ ++ // Reexecute case ++ // return address is the pc describes what bci to do re-execute at ++ ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ __ li(reason, Deoptimization::Unpack_reexecute); ++ __ b(cont); ++ ++#if INCLUDE_JVMCI ++ Label after_fetch_unroll_info_call; ++ int implicit_exception_uncommon_trap_offset = 0; ++ int uncommon_trap_offset = 0; ++ ++ if (EnableJVMCI) { ++ implicit_exception_uncommon_trap_offset = __ pc() - start; ++ ++ __ ld_d(RA, Address(TREG, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); ++ __ st_d(R0, Address(TREG, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); ++ ++ uncommon_trap_offset = __ pc() - start; ++ ++ // Save everything in sight. ++ (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ __ addi_d(SP, SP, -additional_words * wordSize); ++ // fetch_unroll_info needs to call last_java_frame() ++ Label retaddr; ++ __ set_last_Java_frame(NOREG, NOREG, retaddr); ++ ++ __ ld_w(A1, Address(TREG, in_bytes(JavaThread::pending_deoptimization_offset()))); ++ __ li(AT, -1); ++ __ st_w(AT, Address(TREG, in_bytes(JavaThread::pending_deoptimization_offset()))); ++ ++ __ li(reason, (int32_t)Deoptimization::Unpack_reexecute); ++ __ move(A0, TREG); ++ __ move(A2, reason); // exec mode ++ __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ oop_maps->add_gc_map( __ pc()-start, map->deep_copy()); ++ __ addi_d(SP, SP, additional_words * wordSize); ++ ++ __ reset_last_Java_frame(false); ++ ++ __ b(after_fetch_unroll_info_call); ++ } // EnableJVMCI ++#endif // INCLUDE_JVMCI ++ ++ int exception_offset = __ pc() - start; ++ // Prolog for exception case ++ ++ // all registers are dead at this entry point, except for V0 and ++ // V1 which contain the exception oop and exception pc ++ // respectively. Set them in TLS and fall thru to the ++ // unpack_with_exception_in_tls entry point. ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ int exception_in_tls_offset = __ pc() - start; ++ // new implementation because exception oop is now passed in JavaThread ++ ++ // Prolog for exception case ++ // All registers must be preserved because they might be used by LinearScan ++ // Exceptiop oop and throwing PC are passed in JavaThread ++ // tos: stack at point of call to method that threw the exception (i.e. only ++ // args are on the stack, no return address) ++ ++ // Return address will be patched later with the throwing pc. The correct value is not ++ // available now because loading it from memory would destroy registers. ++ // Save everything in sight. ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ ++ // Now it is safe to overwrite any register ++ // store the correct deoptimization type ++ __ li(reason, Deoptimization::Unpack_exception); ++ // load throwing pc from JavaThread and patch it as the return address ++ // of the current frame. Then clear the field in JavaThread ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(V1, SP, reg_save.ra_offset()); //save ra ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); ++ ++ ++#ifdef ASSERT ++ // verify that there is really an exception oop in JavaThread ++ __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset())); ++ __ verify_oop(AT); ++ // verify that there is no pending exception ++ Label no_pending_exception; ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, no_pending_exception); ++ __ stop("must not have pending exception here"); ++ __ bind(no_pending_exception); ++#endif ++ __ bind(cont); ++ ++ // Call C code. Need thread and this frame, but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ __ move(A0, thread); ++ __ move(A1, reason); // exec_mode ++ __ addi_d(SP, SP, -additional_words * wordSize); ++ ++ Label retaddr; ++ __ set_last_Java_frame(NOREG, NOREG, retaddr); ++ ++ // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on ++ // this call, no GC can happen. Call should capture return values. ++ ++ // TODO: confirm reloc ++ __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ oop_maps->add_gc_map(__ pc() - start, map); ++ __ addi_d(SP, SP, additional_words * wordSize); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(false); ++ ++#if INCLUDE_JVMCI ++ if (EnableJVMCI) { ++ __ bind(after_fetch_unroll_info_call); ++ } ++#endif ++ ++ // Load UnrollBlock into S7 ++ __ move(unroll, V0); ++ ++ ++ // Move the unpack kind to a safe place in the UnrollBlock because ++ // we are very short of registers ++ ++ Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); ++ __ st_w(reason, unpack_kind); ++ // save the unpack_kind value ++ // Retrieve the possible live values (return values) ++ // All callee save registers representing jvm state ++ // are now in the vframeArray. ++ ++ Label noException; ++ __ li(AT, Deoptimization::Unpack_exception); ++ __ bne(AT, reason, noException);// Was exception pending? ++ __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ ++ __ verify_oop(V0); ++ ++ // Overwrite the result registers with the exception results. ++ __ st_ptr(V0, SP, reg_save.v0_offset()); ++ __ st_ptr(V1, SP, reg_save.v1_offset()); ++ ++ __ bind(noException); ++ ++ ++ // Stack is back to only having register save data on the stack. ++ // Now restore the result registers. Everything else is either dead or captured ++ // in the vframeArray. ++ ++ reg_save.restore_result_registers(masm); ++ // All of the register save area has been popped of the stack. Only the ++ // return address remains. ++ // Pop all the frames we must move/replace. ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: caller of deopting frame (could be compiled/interpreted). ++ // ++ // Note: by leaving the return address of self-frame on the stack ++ // and using the size of frame 2 to adjust the stack ++ // when we are done the return to frame 3 will still be on the stack. ++ ++ // register for the sender's sp ++ Register sender_sp = Rsender; ++ // register for frame pcs ++ Register pcs = T0; ++ // register for frame sizes ++ Register sizes = T1; ++ // register for frame count ++ Register count = T3; ++ ++ // Pop deoptimized frame ++ __ ld_w(T8, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); ++ __ add_d(SP, SP, T8); ++ // sp should be pointing at the return address to the caller (3) ++ ++ // Load array of frame pcs into pcs ++ __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); ++ __ addi_d(SP, SP, wordSize); // trash the old pc ++ // Load array of frame sizes into T6 ++ __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); ++ ++#ifdef ASSERT ++ // Compilers generate code that bang the stack by as much as the ++ // interpreter would need. So this stack banging should never ++ // trigger a fault. Verify that it does not on non product builds. ++ __ ld_w(TSR, unroll, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()); ++ __ bang_stack_size(TSR, T8); ++#endif ++ ++ // Load count of frams into T3 ++ __ ld_w(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); ++ // Pick up the initial fp we should save ++ __ ld_d(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. ++ __ move(sender_sp, SP); ++ __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); ++ __ sub_d(SP, SP, AT); ++ ++ Label loop; ++ __ bind(loop); ++ __ ld_d(T2, sizes, 0); // Load frame size ++ __ ld_ptr(AT, pcs, 0); // save return address ++ __ addi_d(T2, T2, -2 * wordSize); // we'll push pc and fp, by hand ++ __ push2(AT, FP); ++ __ addi_d(FP, SP, 2 * wordSize); ++ __ sub_d(SP, SP, T2); // Prolog! ++ // This value is corrected by layout_activation_impl ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable ++ __ move(sender_sp, SP); // pass to next frame ++ __ addi_d(count, count, -1); // decrement counter ++ __ addi_d(sizes, sizes, wordSize); // Bump array pointer (sizes) ++ __ addi_d(pcs, pcs, wordSize); // Bump array pointer (pcs) ++ __ bne(count, R0, loop); ++ ++ // Re-push self-frame ++ __ ld_d(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0); ++ __ push2(AT, FP); ++ __ addi_d(FP, SP, 2 * wordSize); ++ __ addi_d(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize); ++ ++ // Restore frame locals after moving the frame ++ __ st_d(V0, SP, reg_save.v0_offset()); ++ __ st_d(V1, SP, reg_save.v1_offset()); ++ __ fst_d(F0, SP, reg_save.fpr0_offset()); ++ __ fst_d(F1, SP, reg_save.fpr1_offset()); ++ ++ // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on ++ // this call, no GC can happen. ++ __ move(A1, reason); // exec_mode ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(A0, thread); // thread ++ __ addi_d(SP, SP, (-additional_words) *wordSize); ++ ++ // set last_Java_sp, last_Java_fp ++ Label L; ++ address the_pc = __ pc(); ++ __ bind(L); ++ __ set_last_Java_frame(NOREG, FP, L); ++ ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ bstrins_d(SP, R0, 3, 0); // Fix stack alignment as required by ABI ++ ++ __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type); ++ // Revert SP alignment after call since we're going to do some SP relative addressing below ++ __ ld_d(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // Set an oopmap for the call site ++ oop_maps->add_gc_map(the_pc - start, new OopMap(frame_size_in_words, 0)); ++ ++ __ push(V0); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(true); ++ ++ // Collect return values ++ __ ld_d(V0, SP, reg_save.v0_offset() + (additional_words + 1) * wordSize); ++ __ ld_d(V1, SP, reg_save.v1_offset() + (additional_words + 1) * wordSize); ++ // Pop float stack and store in local ++ __ fld_d(F0, SP, reg_save.fpr0_offset() + (additional_words + 1) * wordSize); ++ __ fld_d(F1, SP, reg_save.fpr1_offset() + (additional_words + 1) * wordSize); ++ ++ // Push a float or double return value if necessary. ++ __ leave(); ++ ++ // Jump to interpreter ++ __ jr(RA); ++ ++ masm->flush(); ++ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); ++ _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); ++#if INCLUDE_JVMCI ++ if (EnableJVMCI) { ++ _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset); ++ _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset); ++ } ++#endif ++} ++ ++#ifdef COMPILER2 ++ ++//------------------------------generate_uncommon_trap_blob-------------------- ++// Ought to generate an ideal graph & compile, but here's some SPARC ASM ++// instead. ++void SharedRuntime::generate_uncommon_trap_blob() { ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 ); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ enum frame_layout { ++ fp_off, fp_off2, ++ return_off, return_off2, ++ framesize ++ }; ++ assert(framesize % 4 == 0, "sp not 16-byte aligned"); ++ address start = __ pc(); ++ ++ // S8 be used in C2 ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++ // Push self-frame. ++ __ addi_d(SP, SP, -framesize * BytesPerInt); ++ ++ __ st_d(RA, SP, return_off * BytesPerInt); ++ __ st_d(FP, SP, fp_off * BytesPerInt); ++ ++ __ addi_d(FP, SP, framesize * BytesPerInt); ++ ++ Register thread = TREG; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // set last_Java_sp ++ Label retaddr; ++ __ set_last_Java_frame(NOREG, FP, retaddr); ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // capture callee-saved registers as well as return values. ++ __ move(A0, thread); ++ // argument already in T0 ++ __ move(A1, T0); ++ __ addi_d(A2, R0, Deoptimization::Unpack_uncommon_trap); ++ __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ ++ // Set an oopmap for the call site ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap( framesize, 0 ); ++ ++ oop_maps->add_gc_map(__ pc() - start, map); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(false); ++ ++ // Load UnrollBlock into S7 ++ Register unroll = S7; ++ __ move(unroll, V0); ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld_ptr(AT, unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); ++ __ li(T4, Deoptimization::Unpack_uncommon_trap); ++ __ beq(AT, T4, L); ++ __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap"); ++ __ bind(L); ++ } ++#endif ++ ++ // Pop all the frames we must move/replace. ++ // ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: possible-i2c-adapter-frame ++ // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an ++ // and c2i here) ++ ++ __ addi_d(SP, SP, framesize * BytesPerInt); ++ ++ // Pop deoptimized frame ++ __ ld_w(T8, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); ++ __ addi_d(T8, T8, -2 * wordSize); ++ __ add_d(SP, SP, T8); ++ __ ld_d(FP, SP, 0); ++ __ ld_d(RA, SP, wordSize); ++ __ addi_d(SP, SP, 2 * wordSize); ++ ++#ifdef ASSERT ++ // Compilers generate code that bang the stack by as much as the ++ // interpreter would need. So this stack banging should never ++ // trigger a fault. Verify that it does not on non product builds. ++ __ ld_w(TSR, unroll, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()); ++ __ bang_stack_size(TSR, T8); ++#endif ++ ++ // register for frame pcs ++ Register pcs = T8; ++ // register for frame sizes ++ Register sizes = T4; ++ // register for frame count ++ Register count = T3; ++ // register for the sender's sp ++ Register sender_sp = T1; ++ ++ // sp should be pointing at the return address to the caller (4) ++ // Load array of frame pcs ++ __ ld_d(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); ++ ++ // Load array of frame sizes ++ __ ld_d(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); ++ __ ld_wu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); ++ ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. ++ ++ __ move(sender_sp, SP); ++ __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); ++ __ sub_d(SP, SP, AT); ++ // Push interpreter frames in a loop ++ Label loop; ++ __ bind(loop); ++ __ ld_d(T2, sizes, 0); // Load frame size ++ __ ld_d(RA, pcs, 0); // save return address ++ __ addi_d(T2, T2, -2*wordSize); // we'll push pc and fp, by hand ++ __ enter(); ++ __ sub_d(SP, SP, T2); // Prolog! ++ // This value is corrected by layout_activation_impl ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable ++ __ move(sender_sp, SP); // pass to next frame ++ __ addi_d(count, count, -1); // decrement counter ++ __ addi_d(sizes, sizes, wordSize); // Bump array pointer (sizes) ++ __ addi_d(pcs, pcs, wordSize); // Bump array pointer (pcs) ++ __ bne(count, R0, loop); ++ ++ __ ld_d(RA, pcs, 0); ++ ++ // Re-push self-frame ++ // save old & set new FP ++ // save final return address ++ __ enter(); ++ ++ // Use FP because the frames look interpreted now ++ // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. ++ // Don't need the precise return PC here, just precise enough to point into this code blob. ++ Label L; ++ address the_pc = __ pc(); ++ __ bind(L); ++ __ set_last_Java_frame(NOREG, FP, L); ++ ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ bstrins_d(SP, R0, 3, 0); // Fix stack alignment as required by ABI ++ ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // restore return values to their stack-slots with the new SP. ++ __ move(A0, thread); ++ __ li(A1, Deoptimization::Unpack_uncommon_trap); ++ __ call((address)Deoptimization::unpack_frames, relocInfo::runtime_call_type); ++ // Set an oopmap for the call site ++ oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0)); ++ ++ __ reset_last_Java_frame(true); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog! ++ ++ // Jump to interpreter ++ __ jr(RA); ++ // ------------- ++ // make sure all code is generated ++ masm->flush(); ++ _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2); ++} ++ ++#endif // COMPILER2 ++ ++//------------------------------generate_handler_blob------------------- ++// ++// Generate a special Compile2Runtime blob that saves all registers, and sets ++// up an OopMap and calls safepoint code to stop the compiled code for ++// a safepoint. ++// ++// This blob is jumped to (via a breakpoint and the signal handler) from a ++// safepoint in compiled code. ++ ++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { ++ ++ // Account for thread arg in our frame ++ const int additional_words = 0; ++ int frame_size_in_words; ++ ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ ++ ResourceMark rm; ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map; ++ ++ // allocate space for the code ++ // setup code generation tools ++ CodeBuffer buffer ("handler_blob", 2048, 512); ++ MacroAssembler* masm = new MacroAssembler( &buffer); ++ ++ const Register thread = TREG; ++ address start = __ pc(); ++ bool cause_return = (poll_type == POLL_AT_RETURN); ++ RegisterSaver reg_save(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ // The following is basically a call_VM. However, we need the precise ++ // address of the call in order to generate an oopmap. Hence, we do all the ++ // work outselvs. ++ ++ Label retaddr; ++ __ set_last_Java_frame(NOREG, NOREG, retaddr); ++ ++ if (!cause_return) { ++ // overwrite the return address pushed by save_live_registers ++ // Additionally, TSR is a callee-saved register so we can look at ++ // it later to determine if someone changed the return address for ++ // us! ++ __ ld_ptr(TSR, thread, in_bytes(JavaThread::saved_exception_pc_offset())); ++ __ st_ptr(TSR, SP, reg_save.ra_offset()); ++ } ++ ++ // Do the call ++ __ move(A0, thread); ++ // TODO: confirm reloc ++ __ call(call_ptr, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ ++ // Set an oopmap for the call site. This oopmap will map all ++ // oop-registers and debug-info registers as callee-saved. This ++ // will allow deoptimization at this safepoint to find all possible ++ // debug-info recordings, as well as let GC find all oops. ++ oop_maps->add_gc_map(__ pc() - start, map); ++ ++ Label noException; ++ ++ // Clear last_Java_sp again ++ __ reset_last_Java_frame(false); ++ ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, noException); ++ ++ // Exception pending ++ ++ reg_save.restore_live_registers(masm); ++ //forward_exception_entry need return address on the stack ++ __ push(RA); ++ // TODO: confirm reloc ++ __ jmp((address)StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ ++ // No exception case ++ __ bind(noException); ++ ++ Label no_adjust, bail; ++ if (!cause_return) { ++ // If our stashed return pc was modified by the runtime we avoid touching it ++ __ ld_ptr(AT, SP, reg_save.ra_offset()); ++ __ bne(AT, TSR, no_adjust); ++ ++#ifdef ASSERT ++ // Verify the correct encoding of the poll we're about to skip. ++ // See NativeInstruction::is_safepoint_poll() ++ __ ld_wu(AT, TSR, 0); ++ __ push(T5); ++ __ li(T5, 0xffc0001f); ++ __ andr(AT, AT, T5); ++ __ li(T5, 0x28800013); ++ __ xorr(AT, AT, T5); ++ __ pop(T5); ++ __ bne(AT, R0, bail); ++#endif ++ // Adjust return pc forward to step over the safepoint poll instruction ++ __ addi_d(RA, TSR, 4); // NativeInstruction::instruction_size=4 ++ __ st_ptr(RA, SP, reg_save.ra_offset()); ++ } ++ ++ __ bind(no_adjust); ++ // Normal exit, register restoring and exit ++ reg_save.restore_live_registers(masm); ++ __ jr(RA); ++ ++#ifdef ASSERT ++ __ bind(bail); ++ __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); ++#endif ++ ++ // Make sure all code is generated ++ masm->flush(); ++ // Fill-out other meta info ++ return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); ++} ++ ++// ++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss ++// ++// Generate a stub that calls into vm to find out the proper destination ++// of a java call. All the argument registers are live at this point ++// but since this is generic code we don't know what they are and the caller ++// must do any gc of the args. ++// ++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ ++ // allocate space for the code ++ ResourceMark rm; ++ ++ //CodeBuffer buffer(name, 1000, 512); ++ //FIXME. code_size ++ CodeBuffer buffer(name, 2000, 2048); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ int frame_size_words; ++ RegisterSaver reg_save(false /* save_vectors */); ++ //we put the thread in A0 ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = NULL; ++ ++ address start = __ pc(); ++ map = reg_save.save_live_registers(masm, 0, &frame_size_words); ++ ++ ++ int frame_complete = __ offset(); ++#ifndef OPT_THREAD ++ const Register thread = T8; ++ __ get_thread(thread); ++#else ++ const Register thread = TREG; ++#endif ++ ++ __ move(A0, thread); ++ Label retaddr; ++ __ set_last_Java_frame(noreg, FP, retaddr); ++ // align the stack before invoke native ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ bstrins_d(SP, R0, 3, 0); ++ ++ // TODO: confirm reloc ++ __ call(destination, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ ++ // Set an oopmap for the call site. ++ // We need this not only for callee-saved registers, but also for volatile ++ // registers that the compiler might be keeping live across a safepoint. ++ oop_maps->add_gc_map(__ pc() - start, map); ++ // V0 contains the address we are going to jump to assuming no exception got installed ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // clear last_Java_sp ++ __ reset_last_Java_frame(true); ++ // check for pending exceptions ++ Label pending; ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, pending); ++ // get the returned Method* ++ __ get_vm_result_2(Rmethod, thread); ++ __ st_ptr(Rmethod, SP, reg_save.s3_offset()); ++ __ st_ptr(V0, SP, reg_save.t5_offset()); ++ reg_save.restore_live_registers(masm); ++ ++ // We are back the the original state on entry and ready to go the callee method. ++ __ jr(T5); ++ // Pending exception after the safepoint ++ ++ __ bind(pending); ++ ++ reg_save.restore_live_registers(masm); ++ ++ // exception pending => remove activation and forward to exception handler ++ //forward_exception_entry need return address on the stack ++ __ push(RA); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); ++ __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ // ++ // make sure all code is generated ++ masm->flush(); ++ RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); ++ return tmp; ++} ++ ++extern "C" int SpinPause() {return 0;} ++ ++#ifdef COMPILER2 ++RuntimeStub* SharedRuntime::make_native_invoker(address call_target, ++ int shadow_space_bytes, ++ const GrowableArray& input_registers, ++ const GrowableArray& output_registers) { ++ Unimplemented(); ++ return nullptr; ++} ++#endif +diff --git a/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp +new file mode 100644 +index 00000000000..21bfc7d78cb +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp +@@ -0,0 +1,5176 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "compiler/oopMap.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/instanceOop.hpp" ++#include "oops/method.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++// Declaration and definition of StubGenerator (no .hpp file). ++// For a more detailed description of the stub routine structure ++// see the comment in stubRoutines.hpp ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) ++ ++//#ifdef PRODUCT ++//#define BLOCK_COMMENT(str) /* nothing */ ++//#else ++//#define BLOCK_COMMENT(str) __ block_comment(str) ++//#endif ++ ++//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions ++ ++// Stub Code definitions ++ ++class StubGenerator: public StubCodeGenerator { ++ private: ++ ++ // This fig is not LA ABI. It is call Java from C ABI. ++ // Call stubs are used to call Java from C ++ // ++ // [ return_from_Java ] ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ // ... ++ // -10 [ S6 ] ++ // -9 [ S5 ] ++ // -8 [ S4 ] ++ // -7 [ S3 ] ++ // -6 [ S1 ] ++ // -5 [ TSR(S2) ] ++ // -4 [ LVP(S7) ] ++ // -3 [ BCP(S0) ] ++ // -2 [ saved fp ] ++ // -1 [ return address ] ++ // 0 [ ptr. to call wrapper ] <--- a0 (old sp -->) fp ++ // 1 [ result ] <--- a1 ++ // 2 [ result_type ] <--- a2 ++ // 3 [ method ] <--- a3 ++ // 4 [ entry_point ] <--- a4 ++ // 5 [ parameters ] <--- a5 ++ // 6 [ parameter_size ] <--- a6 ++ // 7 [ thread ] <--- a7 ++ ++ // ++ // LA ABI does not save paras in sp. ++ // ++ // [ return_from_Java ] ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ //-24 [ ] ++ //-23 [ F31 ] ++ // ... ++ //-16 [ F24 ] ++ //-15 [ S8 ] ++ //-14 [ thread ] ++ //-13 [ result_type ] <--- a2 ++ //-12 [ result ] <--- a1 ++ //-11 [ ptr. to call wrapper ] <--- a0 ++ //-10 [ S6 ] ++ // -9 [ S5 ] ++ // -8 [ S4 ] ++ // -7 [ S3 ] ++ // -6 [ S1 ] ++ // -5 [ TSR(S2) ] ++ // -4 [ LVP(S7) ] ++ // -3 [ BCP(S0) ] ++ // -2 [ saved fp ] ++ // -1 [ return address ] ++ // 0 [ ] <--- old sp = fp_after_call ++ // ++ // Find a right place in the call_stub for S8. ++ // S8 will point to the starting point of Interpreter::dispatch_table(itos). ++ // It should be saved/restored before/after Java calls. ++ // ++ enum call_stub_layout { ++ RA_off = -1, ++ FP_off = -2, ++ BCP_off = -3, ++ LVP_off = -4, ++ TSR_off = -5, ++ S1_off = -6, ++ S3_off = -7, ++ S4_off = -8, ++ S5_off = -9, ++ S6_off = -10, ++ call_wrapper_off = -11, ++ result_off = -12, ++ result_type_off = -13, ++ thread_off = -14, ++ S8_off = -15, ++ F24_off = -16, ++ F25_off = -17, ++ F26_off = -18, ++ F27_off = -19, ++ F28_off = -20, ++ F29_off = -21, ++ F30_off = -22, ++ F31_off = -23, ++ total_off = -24, ++ }; ++ ++ address generate_call_stub(address& return_address) { ++ assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code"); ++ StubCodeMark mark(this, "StubRoutines", "call_stub"); ++ address start = __ pc(); ++ ++ // same as in generate_catch_exception()! ++ ++ // stub code ++ // save ra and fp ++ __ enter(); ++ // I think 14 is the max gap between argument and callee saved register ++ __ addi_d(SP, SP, total_off * wordSize); ++ __ st_d(BCP, FP, BCP_off * wordSize); ++ __ st_d(LVP, FP, LVP_off * wordSize); ++ __ st_d(TSR, FP, TSR_off * wordSize); ++ __ st_d(S1, FP, S1_off * wordSize); ++ __ st_d(S3, FP, S3_off * wordSize); ++ __ st_d(S4, FP, S4_off * wordSize); ++ __ st_d(S5, FP, S5_off * wordSize); ++ __ st_d(S6, FP, S6_off * wordSize); ++ __ st_d(A0, FP, call_wrapper_off * wordSize); ++ __ st_d(A1, FP, result_off * wordSize); ++ __ st_d(A2, FP, result_type_off * wordSize); ++ __ st_d(A7, FP, thread_off * wordSize); ++ __ st_d(S8, FP, S8_off * wordSize); ++ ++ __ fst_d(F24, FP, F24_off * wordSize); ++ __ fst_d(F25, FP, F25_off * wordSize); ++ __ fst_d(F26, FP, F26_off * wordSize); ++ __ fst_d(F27, FP, F27_off * wordSize); ++ __ fst_d(F28, FP, F28_off * wordSize); ++ __ fst_d(F29, FP, F29_off * wordSize); ++ __ fst_d(F30, FP, F30_off * wordSize); ++ __ fst_d(F31, FP, F31_off * wordSize); ++ ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++ ++#ifdef OPT_THREAD ++ __ move(TREG, A7); ++#endif ++ //add for compressedoops ++ __ reinit_heapbase(); ++ ++#ifdef ASSERT ++ // make sure we have no pending exceptions ++ { ++ Label L; ++ __ ld_d(AT, A7, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ /* FIXME: I do not know how to realize stop in LA, do it in the future */ ++ __ stop("StubRoutines::call_stub: entered with pending exception"); ++ __ bind(L); ++ } ++#endif ++ ++ // pass parameters if any ++ // A5: parameter ++ // A6: parameter_size ++ // T0: parameter_size_tmp(--) ++ // T2: offset(++) ++ // T3: tmp ++ Label parameters_done; ++ // judge if the parameter_size equals 0 ++ __ beq(A6, R0, parameters_done); ++ __ slli_d(AT, A6, Interpreter::logStackElementSize); ++ __ sub_d(SP, SP, AT); ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ bstrins_d(SP, R0, 3, 0); ++ // Copy Java parameters in reverse order (receiver last) ++ // Note that the argument order is inverted in the process ++ Label loop; ++ __ move(T0, A6); ++ __ move(T2, R0); ++ __ bind(loop); ++ ++ // get parameter ++ __ alsl_d(T3, T0, A5, LogBytesPerWord - 1); ++ __ ld_d(AT, T3, -wordSize); ++ __ alsl_d(T3, T2, SP, LogBytesPerWord - 1); ++ __ st_d(AT, T3, Interpreter::expr_offset_in_bytes(0)); ++ __ addi_d(T2, T2, 1); ++ __ addi_d(T0, T0, -1); ++ __ bne(T0, R0, loop); ++ // advance to next parameter ++ ++ // call Java function ++ __ bind(parameters_done); ++ ++ // receiver in V0, Method* in Rmethod ++ ++ __ move(Rmethod, A3); ++ __ move(Rsender, SP); //set sender sp ++ __ jalr(A4); ++ return_address = __ pc(); ++ ++ Label common_return; ++ __ bind(common_return); ++ ++ // store result depending on type ++ // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) ++ __ ld_d(T0, FP, result_off * wordSize); // result --> T0 ++ Label is_long, is_float, is_double, exit; ++ __ ld_d(T2, FP, result_type_off * wordSize); // result_type --> T2 ++ __ addi_d(T3, T2, (-1) * T_LONG); ++ __ beq(T3, R0, is_long); ++ __ addi_d(T3, T2, (-1) * T_FLOAT); ++ __ beq(T3, R0, is_float); ++ __ addi_d(T3, T2, (-1) * T_DOUBLE); ++ __ beq(T3, R0, is_double); ++ ++ // handle T_INT case ++ __ st_d(V0, T0, 0 * wordSize); ++ __ bind(exit); ++ ++ // restore ++ __ ld_d(BCP, FP, BCP_off * wordSize); ++ __ ld_d(LVP, FP, LVP_off * wordSize); ++ __ ld_d(S8, FP, S8_off * wordSize); ++ __ ld_d(TSR, FP, TSR_off * wordSize); ++ ++ __ ld_d(S1, FP, S1_off * wordSize); ++ __ ld_d(S3, FP, S3_off * wordSize); ++ __ ld_d(S4, FP, S4_off * wordSize); ++ __ ld_d(S5, FP, S5_off * wordSize); ++ __ ld_d(S6, FP, S6_off * wordSize); ++ ++ __ fld_d(F24, FP, F24_off * wordSize); ++ __ fld_d(F25, FP, F25_off * wordSize); ++ __ fld_d(F26, FP, F26_off * wordSize); ++ __ fld_d(F27, FP, F27_off * wordSize); ++ __ fld_d(F28, FP, F28_off * wordSize); ++ __ fld_d(F29, FP, F29_off * wordSize); ++ __ fld_d(F30, FP, F30_off * wordSize); ++ __ fld_d(F31, FP, F31_off * wordSize); ++ ++ __ leave(); ++ ++ // return ++ __ jr(RA); ++ ++ // handle return types different from T_INT ++ __ bind(is_long); ++ __ st_d(V0, T0, 0 * wordSize); ++ __ b(exit); ++ ++ __ bind(is_float); ++ __ fst_s(FV0, T0, 0 * wordSize); ++ __ b(exit); ++ ++ __ bind(is_double); ++ __ fst_d(FV0, T0, 0 * wordSize); ++ __ b(exit); ++ StubRoutines::la::set_call_stub_compiled_return(__ pc()); ++ __ b(common_return); ++ return start; ++ } ++ ++ // Return point for a Java call if there's an exception thrown in ++ // Java code. The exception is caught and transformed into a ++ // pending exception stored in JavaThread that can be tested from ++ // within the VM. ++ // ++ // Note: Usually the parameters are removed by the callee. In case ++ // of an exception crossing an activation frame boundary, that is ++ // not the case if the callee is compiled code => need to setup the ++ // sp. ++ // ++ // V0: exception oop ++ ++ address generate_catch_exception() { ++ StubCodeMark mark(this, "StubRoutines", "catch_exception"); ++ address start = __ pc(); ++ ++ Register thread = TREG; ++ ++ // get thread directly ++#ifndef OPT_THREAD ++ __ ld_d(thread, FP, thread_off * wordSize); ++#endif ++ ++#ifdef ASSERT ++ // verify that threads correspond ++ { Label L; ++ __ get_thread(T8); ++ __ beq(T8, thread, L); ++ __ stop("StubRoutines::catch_exception: threads must correspond"); ++ __ bind(L); ++ } ++#endif ++ // set pending exception ++ __ verify_oop(V0); ++ __ st_d(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ li(AT, (long)__FILE__); ++ __ st_d(AT, thread, in_bytes(Thread::exception_file_offset ())); ++ __ li(AT, (long)__LINE__); ++ __ st_d(AT, thread, in_bytes(Thread::exception_line_offset ())); ++ ++ // complete return to VM ++ assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); ++ __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none); ++ return start; ++ } ++ ++ // Continuation point for runtime calls returning with a pending ++ // exception. The pending exception check happened in the runtime ++ // or native call stub. The pending exception in Thread is ++ // converted into a Java-level exception. ++ // ++ // Contract with Java-level exception handlers: ++ // V0: exception ++ // V1: throwing pc ++ // ++ // NOTE: At entry of this stub, exception-pc must be on stack !! ++ ++ address generate_forward_exception() { ++ StubCodeMark mark(this, "StubRoutines", "forward exception"); ++ //Register thread = TREG; ++ Register thread = TREG; ++ address start = __ pc(); ++ ++ // Upon entry, the sp points to the return address returning into ++ // Java (interpreted or compiled) code; i.e., the return address ++ // throwing pc. ++ // ++ // Arguments pushed before the runtime call are still on the stack ++ // but the exception handler will reset the stack pointer -> ++ // ignore them. A potential result in registers can be ignored as ++ // well. ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++#ifdef ASSERT ++ // make sure this code is only executed if there is a pending exception ++ { ++ Label L; ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, L); ++ __ stop("StubRoutines::forward exception: no pending exception (1)"); ++ __ bind(L); ++ } ++#endif ++ ++ // compute exception handler into T4 ++ __ ld_d(A1, SP, 0); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); ++ __ move(T4, V0); ++ __ pop(V1); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_d(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset())); ++ ++#ifdef ASSERT ++ // make sure exception is set ++ { ++ Label L; ++ __ bne(V0, R0, L); ++ __ stop("StubRoutines::forward exception: no pending exception (2)"); ++ __ bind(L); ++ } ++#endif ++ ++ // continue at exception handler (return address removed) ++ // V0: exception ++ // T4: exception handler ++ // V1: throwing pc ++ __ verify_oop(V0); ++ __ jr(T4); ++ return start; ++ } ++ ++ // Non-destructive plausibility checks for oops ++ // ++ address generate_verify_oop() { ++ StubCodeMark mark(this, "StubRoutines", "verify_oop"); ++ address start = __ pc(); ++ __ verify_oop_subroutine(); ++ address end = __ pc(); ++ return start; ++ } ++ ++ // Generate indices for iota vector. ++ address generate_iota_indices(const char *stub_name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", stub_name); ++ address start = __ pc(); ++ __ emit_data64(0x0706050403020100, relocInfo::none); ++ __ emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none); ++ __ emit_data64(0x1716151413121110, relocInfo::none); ++ __ emit_data64(0x1F1E1D1C1B1A1918, relocInfo::none); ++ return start; ++ } ++ ++ // ++ // Generate stub for array fill. If "aligned" is true, the ++ // "to" address is assumed to be heapword aligned. ++ // ++ // Arguments for generated stub: ++ // to: A0 ++ // value: A1 ++ // count: A2 treated as signed ++ // ++ address generate_fill(BasicType t, bool aligned, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ const Register to = A0; // source array address ++ const Register value = A1; // value ++ const Register count = A2; // elements count ++ ++ const Register end = T5; // source array address end ++ const Register tmp = T8; // temp register ++ ++ Label L_fill_elements; ++ ++ int shift = -1; ++ switch (t) { ++ case T_BYTE: ++ shift = 0; ++ __ slti(AT, count, 9); // Short arrays (<= 8 bytes) fill by element ++ __ bstrins_d(value, value, 15, 8); // 8 bit -> 16 bit ++ __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit ++ __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit ++ __ bnez(AT, L_fill_elements); ++ break; ++ case T_SHORT: ++ shift = 1; ++ __ slti(AT, count, 5); // Short arrays (<= 8 bytes) fill by element ++ __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit ++ __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit ++ __ bnez(AT, L_fill_elements); ++ break; ++ case T_INT: ++ shift = 2; ++ __ slti(AT, count, 3); // Short arrays (<= 8 bytes) fill by element ++ __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit ++ __ bnez(AT, L_fill_elements); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ switch (t) { ++ case T_BYTE: ++ __ add_d(end, to, count); ++ break; ++ case T_SHORT: ++ case T_INT: ++ __ alsl_d(end, count, to, shift-1); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ if (!aligned) { ++ __ st_d(value, to, 0); ++ __ bstrins_d(to, R0, 2, 0); ++ __ addi_d(to, to, 8); ++ } ++ __ st_d(value, end, -8); ++ __ bstrins_d(end, R0, 2, 0); ++ ++ // ++ // Fill large chunks ++ // ++ Label L_loop_begin, L_not_64bytes_fill, L_loop_end, L_jtab1, L_jtab2; ++ __ addi_d(AT, to, 64); ++ __ blt(end, AT, L_not_64bytes_fill); ++ __ addi_d(to, to, 64); ++ __ bind(L_loop_begin); ++ __ st_d(value, to, -8); ++ __ st_d(value, to, -16); ++ __ st_d(value, to, -24); ++ __ st_d(value, to, -32); ++ __ st_d(value, to, -40); ++ __ st_d(value, to, -48); ++ __ st_d(value, to, -56); ++ __ st_d(value, to, -64); ++ __ addi_d(to, to, 64); ++ __ bge(end, to, L_loop_begin); ++ __ addi_d(to, to, -64); ++ __ beq(to, end, L_loop_end); ++ ++ __ bind(L_not_64bytes_fill); ++ // There are 0 - 7 words ++ __ lipc(AT, L_jtab1); ++ __ sub_d(tmp, end, to); ++ __ alsl_d(AT, tmp, AT, 1); ++ __ jr(AT); ++ ++ __ bind(L_jtab1); ++ // 0: ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ st_d(value, to, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 4: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ st_d(value, to, 24); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ st_d(value, to, 24); ++ __ st_d(value, to, 32); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 6: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ st_d(value, to, 24); ++ __ st_d(value, to, 32); ++ __ st_d(value, to, 40); ++ __ jr(RA); ++ __ nop(); ++ ++ // 7: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ st_d(value, to, 24); ++ __ st_d(value, to, 32); ++ __ st_d(value, to, 40); ++ __ st_d(value, to, 48); ++ ++ __ bind(L_loop_end); ++ __ jr(RA); ++ ++ // Short arrays (<= 8 bytes) ++ __ bind(L_fill_elements); ++ __ lipc(AT, L_jtab2); ++ __ slli_d(tmp, count, 4 + shift); ++ __ add_d(AT, AT, tmp); ++ __ jr(AT); ++ ++ __ bind(L_jtab2); ++ // 0: ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ st_b(value, to, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ st_h(value, to, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ st_h(value, to, 0); ++ __ st_b(value, to, 2); ++ __ jr(RA); ++ __ nop(); ++ ++ // 4: ++ __ st_w(value, to, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ __ st_w(value, to, 0); ++ __ st_b(value, to, 4); ++ __ jr(RA); ++ __ nop(); ++ ++ // 6: ++ __ st_w(value, to, 0); ++ __ st_h(value, to, 4); ++ __ jr(RA); ++ __ nop(); ++ ++ // 7: ++ __ st_w(value, to, 0); ++ __ st_w(value, to, 3); ++ __ jr(RA); ++ __ nop(); ++ ++ // 8: ++ __ st_d(value, to, 0); ++ __ jr(RA); ++ return start; ++ } ++ ++ // ++ // Generate overlap test for array copy stubs ++ // ++ // Input: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count ++ // ++ // Temp: ++ // AT - destination array address - source array address ++ // T4 - element count * element size ++ // ++ void array_overlap_test(address no_overlap_target, int log2_elem_size) { ++ __ slli_d(T4, A2, log2_elem_size); ++ __ sub_d(AT, A1, A0); ++ __ bgeu(AT, T4, no_overlap_target); ++ } ++ ++ // disjoint large copy ++ void generate_disjoint_large_copy(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ { ++ UnsafeCopyMemoryMark ucmm(this, true, true); ++ Label loop, le32, le16, le8, lt8; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ ld_d(A6, A0, 0); ++ __ ld_d(A7, A2, -8); ++ ++ __ andi(T1, A1, 7); ++ __ sub_d(T0, R0, T1); ++ __ addi_d(T0, T0, 8); ++ ++ __ add_d(A0, A0, T0); ++ __ add_d(A5, A1, T0); ++ ++ __ addi_d(A4, A2, -64); ++ __ bgeu(A0, A4, le32); ++ ++ __ bind(loop); ++ __ ld_d(T0, A0, 0); ++ __ ld_d(T1, A0, 8); ++ __ ld_d(T2, A0, 16); ++ __ ld_d(T3, A0, 24); ++ __ ld_d(T4, A0, 32); ++ __ ld_d(T5, A0, 40); ++ __ ld_d(T6, A0, 48); ++ __ ld_d(T7, A0, 56); ++ __ addi_d(A0, A0, 64); ++ __ st_d(T0, A5, 0); ++ __ st_d(T1, A5, 8); ++ __ st_d(T2, A5, 16); ++ __ st_d(T3, A5, 24); ++ __ st_d(T4, A5, 32); ++ __ st_d(T5, A5, 40); ++ __ st_d(T6, A5, 48); ++ __ st_d(T7, A5, 56); ++ __ addi_d(A5, A5, 64); ++ __ bltu(A0, A4, loop); ++ ++ __ bind(le32); ++ __ addi_d(A4, A2, -32); ++ __ bgeu(A0, A4, le16); ++ __ ld_d(T0, A0, 0); ++ __ ld_d(T1, A0, 8); ++ __ ld_d(T2, A0, 16); ++ __ ld_d(T3, A0, 24); ++ __ addi_d(A0, A0, 32); ++ __ st_d(T0, A5, 0); ++ __ st_d(T1, A5, 8); ++ __ st_d(T2, A5, 16); ++ __ st_d(T3, A5, 24); ++ __ addi_d(A5, A5, 32); ++ ++ __ bind(le16); ++ __ addi_d(A4, A2, -16); ++ __ bgeu(A0, A4, le8); ++ __ ld_d(T0, A0, 0); ++ __ ld_d(T1, A0, 8); ++ __ addi_d(A0, A0, 16); ++ __ st_d(T0, A5, 0); ++ __ st_d(T1, A5, 8); ++ __ addi_d(A5, A5, 16); ++ ++ __ bind(le8); ++ __ addi_d(A4, A2, -8); ++ __ bgeu(A0, A4, lt8); ++ __ ld_d(T0, A0, 0); ++ __ st_d(T0, A5, 0); ++ ++ __ bind(lt8); ++ __ st_d(A6, A1, 0); ++ __ st_d(A7, A3, -8); ++ } ++ ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // disjoint large copy lsx ++ void generate_disjoint_large_copy_lsx(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ { ++ UnsafeCopyMemoryMark ucmm(this, true, true); ++ Label loop, le64, le32, le16, lt16; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ vld(F0, A0, 0); ++ __ vld(F1, A2, -16); ++ ++ __ andi(T1, A1, 15); ++ __ sub_d(T0, R0, T1); ++ __ addi_d(T0, T0, 16); ++ ++ __ add_d(A0, A0, T0); ++ __ add_d(A5, A1, T0); ++ ++ __ addi_d(A4, A2, -128); ++ __ bgeu(A0, A4, le64); ++ ++ __ bind(loop); ++ __ vld(FT0, A0, 0); ++ __ vld(FT1, A0, 16); ++ __ vld(FT2, A0, 32); ++ __ vld(FT3, A0, 48); ++ __ vld(FT4, A0, 64); ++ __ vld(FT5, A0, 80); ++ __ vld(FT6, A0, 96); ++ __ vld(FT7, A0, 112); ++ __ addi_d(A0, A0, 128); ++ __ vst(FT0, A5, 0); ++ __ vst(FT1, A5, 16); ++ __ vst(FT2, A5, 32); ++ __ vst(FT3, A5, 48); ++ __ vst(FT4, A5, 64); ++ __ vst(FT5, A5, 80); ++ __ vst(FT6, A5, 96); ++ __ vst(FT7, A5, 112); ++ __ addi_d(A5, A5, 128); ++ __ bltu(A0, A4, loop); ++ ++ __ bind(le64); ++ __ addi_d(A4, A2, -64); ++ __ bgeu(A0, A4, le32); ++ __ vld(FT0, A0, 0); ++ __ vld(FT1, A0, 16); ++ __ vld(FT2, A0, 32); ++ __ vld(FT3, A0, 48); ++ __ addi_d(A0, A0, 64); ++ __ vst(FT0, A5, 0); ++ __ vst(FT1, A5, 16); ++ __ vst(FT2, A5, 32); ++ __ vst(FT3, A5, 48); ++ __ addi_d(A5, A5, 64); ++ ++ __ bind(le32); ++ __ addi_d(A4, A2, -32); ++ __ bgeu(A0, A4, le16); ++ __ vld(FT0, A0, 0); ++ __ vld(FT1, A0, 16); ++ __ addi_d(A0, A0, 32); ++ __ vst(FT0, A5, 0); ++ __ vst(FT1, A5, 16); ++ __ addi_d(A5, A5, 32); ++ ++ __ bind(le16); ++ __ addi_d(A4, A2, -16); ++ __ bgeu(A0, A4, lt16); ++ __ vld(FT0, A0, 0); ++ __ vst(FT0, A5, 0); ++ ++ __ bind(lt16); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A3, -16); ++ } ++ ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // disjoint large copy lasx ++ void generate_disjoint_large_copy_lasx(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ { ++ UnsafeCopyMemoryMark ucmm(this, true, true); ++ Label loop, le128, le64, le32, lt32; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ xvld(F0, A0, 0); ++ __ xvld(F1, A2, -32); ++ ++ __ andi(T1, A1, 31); ++ __ sub_d(T0, R0, T1); ++ __ addi_d(T0, T0, 32); ++ ++ __ add_d(A0, A0, T0); ++ __ add_d(A5, A1, T0); ++ ++ __ addi_d(A4, A2, -256); ++ __ bgeu(A0, A4, le128); ++ ++ __ bind(loop); ++ __ xvld(FT0, A0, 0); ++ __ xvld(FT1, A0, 32); ++ __ xvld(FT2, A0, 64); ++ __ xvld(FT3, A0, 96); ++ __ xvld(FT4, A0, 128); ++ __ xvld(FT5, A0, 160); ++ __ xvld(FT6, A0, 192); ++ __ xvld(FT7, A0, 224); ++ __ addi_d(A0, A0, 256); ++ __ xvst(FT0, A5, 0); ++ __ xvst(FT1, A5, 32); ++ __ xvst(FT2, A5, 64); ++ __ xvst(FT3, A5, 96); ++ __ xvst(FT4, A5, 128); ++ __ xvst(FT5, A5, 160); ++ __ xvst(FT6, A5, 192); ++ __ xvst(FT7, A5, 224); ++ __ addi_d(A5, A5, 256); ++ __ bltu(A0, A4, loop); ++ ++ __ bind(le128); ++ __ addi_d(A4, A2, -128); ++ __ bgeu(A0, A4, le64); ++ __ xvld(FT0, A0, 0); ++ __ xvld(FT1, A0, 32); ++ __ xvld(FT2, A0, 64); ++ __ xvld(FT3, A0, 96); ++ __ addi_d(A0, A0, 128); ++ __ xvst(FT0, A5, 0); ++ __ xvst(FT1, A5, 32); ++ __ xvst(FT2, A5, 64); ++ __ xvst(FT3, A5, 96); ++ __ addi_d(A5, A5, 128); ++ ++ __ bind(le64); ++ __ addi_d(A4, A2, -64); ++ __ bgeu(A0, A4, le32); ++ __ xvld(FT0, A0, 0); ++ __ xvld(FT1, A0, 32); ++ __ addi_d(A0, A0, 64); ++ __ xvst(FT0, A5, 0); ++ __ xvst(FT1, A5, 32); ++ __ addi_d(A5, A5, 64); ++ ++ __ bind(le32); ++ __ addi_d(A4, A2, -32); ++ __ bgeu(A0, A4, lt32); ++ __ xvld(FT0, A0, 0); ++ __ xvst(FT0, A5, 0); ++ ++ __ bind(lt32); ++ __ xvst(F0, A1, 0); ++ __ xvst(F1, A3, -32); ++ } ++ ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // conjoint large copy ++ void generate_conjoint_large_copy(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ { ++ UnsafeCopyMemoryMark ucmm(this, true, true); ++ Label loop, le32, le16, le8, lt8; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ ld_d(A6, A0, 0); ++ __ ld_d(A7, A2, -8); ++ ++ __ andi(T1, A3, 7); ++ __ sub_d(A2, A2, T1); ++ __ sub_d(A5, A3, T1); ++ ++ __ addi_d(A4, A0, 64); ++ __ bgeu(A4, A2, le32); ++ ++ __ bind(loop); ++ __ ld_d(T0, A2, -8); ++ __ ld_d(T1, A2, -16); ++ __ ld_d(T2, A2, -24); ++ __ ld_d(T3, A2, -32); ++ __ ld_d(T4, A2, -40); ++ __ ld_d(T5, A2, -48); ++ __ ld_d(T6, A2, -56); ++ __ ld_d(T7, A2, -64); ++ __ addi_d(A2, A2, -64); ++ __ st_d(T0, A5, -8); ++ __ st_d(T1, A5, -16); ++ __ st_d(T2, A5, -24); ++ __ st_d(T3, A5, -32); ++ __ st_d(T4, A5, -40); ++ __ st_d(T5, A5, -48); ++ __ st_d(T6, A5, -56); ++ __ st_d(T7, A5, -64); ++ __ addi_d(A5, A5, -64); ++ __ bltu(A4, A2, loop); ++ ++ __ bind(le32); ++ __ addi_d(A4, A0, 32); ++ __ bgeu(A4, A2, le16); ++ __ ld_d(T0, A2, -8); ++ __ ld_d(T1, A2, -16); ++ __ ld_d(T2, A2, -24); ++ __ ld_d(T3, A2, -32); ++ __ addi_d(A2, A2, -32); ++ __ st_d(T0, A5, -8); ++ __ st_d(T1, A5, -16); ++ __ st_d(T2, A5, -24); ++ __ st_d(T3, A5, -32); ++ __ addi_d(A5, A5, -32); ++ ++ __ bind(le16); ++ __ addi_d(A4, A0, 16); ++ __ bgeu(A4, A2, le8); ++ __ ld_d(T0, A2, -8); ++ __ ld_d(T1, A2, -16); ++ __ addi_d(A2, A2, -16); ++ __ st_d(T0, A5, -8); ++ __ st_d(T1, A5, -16); ++ __ addi_d(A5, A5, -16); ++ ++ __ bind(le8); ++ __ addi_d(A4, A0, 8); ++ __ bgeu(A4, A2, lt8); ++ __ ld_d(T0, A2, -8); ++ __ st_d(T0, A5, -8); ++ ++ __ bind(lt8); ++ __ st_d(A6, A1, 0); ++ __ st_d(A7, A3, -8); ++ } ++ ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // conjoint large copy lsx ++ void generate_conjoint_large_copy_lsx(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ { ++ UnsafeCopyMemoryMark ucmm(this, true, true); ++ Label loop, le64, le32, le16, lt16; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ vld(F0, A0, 0); ++ __ vld(F1, A2, -16); ++ ++ __ andi(T1, A3, 15); ++ __ sub_d(A2, A2, T1); ++ __ sub_d(A5, A3, T1); ++ ++ __ addi_d(A4, A0, 128); ++ __ bgeu(A4, A2, le64); ++ ++ __ bind(loop); ++ __ vld(FT0, A2, -16); ++ __ vld(FT1, A2, -32); ++ __ vld(FT2, A2, -48); ++ __ vld(FT3, A2, -64); ++ __ vld(FT4, A2, -80); ++ __ vld(FT5, A2, -96); ++ __ vld(FT6, A2, -112); ++ __ vld(FT7, A2, -128); ++ __ addi_d(A2, A2, -128); ++ __ vst(FT0, A5, -16); ++ __ vst(FT1, A5, -32); ++ __ vst(FT2, A5, -48); ++ __ vst(FT3, A5, -64); ++ __ vst(FT4, A5, -80); ++ __ vst(FT5, A5, -96); ++ __ vst(FT6, A5, -112); ++ __ vst(FT7, A5, -128); ++ __ addi_d(A5, A5, -128); ++ __ bltu(A4, A2, loop); ++ ++ __ bind(le64); ++ __ addi_d(A4, A0, 64); ++ __ bgeu(A4, A2, le32); ++ __ vld(FT0, A2, -16); ++ __ vld(FT1, A2, -32); ++ __ vld(FT2, A2, -48); ++ __ vld(FT3, A2, -64); ++ __ addi_d(A2, A2, -64); ++ __ vst(FT0, A5, -16); ++ __ vst(FT1, A5, -32); ++ __ vst(FT2, A5, -48); ++ __ vst(FT3, A5, -64); ++ __ addi_d(A5, A5, -64); ++ ++ __ bind(le32); ++ __ addi_d(A4, A0, 32); ++ __ bgeu(A4, A2, le16); ++ __ vld(FT0, A2, -16); ++ __ vld(FT1, A2, -32); ++ __ addi_d(A2, A2, -32); ++ __ vst(FT0, A5, -16); ++ __ vst(FT1, A5, -32); ++ __ addi_d(A5, A5, -32); ++ ++ __ bind(le16); ++ __ addi_d(A4, A0, 16); ++ __ bgeu(A4, A2, lt16); ++ __ vld(FT0, A2, -16); ++ __ vst(FT0, A5, -16); ++ ++ __ bind(lt16); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A3, -16); ++ } ++ ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // conjoint large copy lasx ++ void generate_conjoint_large_copy_lasx(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ { ++ UnsafeCopyMemoryMark ucmm(this, true, true); ++ Label loop, le128, le64, le32, lt32; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ xvld(F0, A0, 0); ++ __ xvld(F1, A2, -32); ++ ++ __ andi(T1, A3, 31); ++ __ sub_d(A2, A2, T1); ++ __ sub_d(A5, A3, T1); ++ ++ __ addi_d(A4, A0, 256); ++ __ bgeu(A4, A2, le128); ++ ++ __ bind(loop); ++ __ xvld(FT0, A2, -32); ++ __ xvld(FT1, A2, -64); ++ __ xvld(FT2, A2, -96); ++ __ xvld(FT3, A2, -128); ++ __ xvld(FT4, A2, -160); ++ __ xvld(FT5, A2, -192); ++ __ xvld(FT6, A2, -224); ++ __ xvld(FT7, A2, -256); ++ __ addi_d(A2, A2, -256); ++ __ xvst(FT0, A5, -32); ++ __ xvst(FT1, A5, -64); ++ __ xvst(FT2, A5, -96); ++ __ xvst(FT3, A5, -128); ++ __ xvst(FT4, A5, -160); ++ __ xvst(FT5, A5, -192); ++ __ xvst(FT6, A5, -224); ++ __ xvst(FT7, A5, -256); ++ __ addi_d(A5, A5, -256); ++ __ bltu(A4, A2, loop); ++ ++ __ bind(le128); ++ __ addi_d(A4, A0, 128); ++ __ bgeu(A4, A2, le64); ++ __ xvld(FT0, A2, -32); ++ __ xvld(FT1, A2, -64); ++ __ xvld(FT2, A2, -96); ++ __ xvld(FT3, A2, -128); ++ __ addi_d(A2, A2, -128); ++ __ xvst(FT0, A5, -32); ++ __ xvst(FT1, A5, -64); ++ __ xvst(FT2, A5, -96); ++ __ xvst(FT3, A5, -128); ++ __ addi_d(A5, A5, -128); ++ ++ __ bind(le64); ++ __ addi_d(A4, A0, 64); ++ __ bgeu(A4, A2, le32); ++ __ xvld(FT0, A2, -32); ++ __ xvld(FT1, A2, -64); ++ __ addi_d(A2, A2, -64); ++ __ xvst(FT0, A5, -32); ++ __ xvst(FT1, A5, -64); ++ __ addi_d(A5, A5, -64); ++ ++ __ bind(le32); ++ __ addi_d(A4, A0, 32); ++ __ bgeu(A4, A2, lt32); ++ __ xvld(FT0, A2, -32); ++ __ xvst(FT0, A5, -32); ++ ++ __ bind(lt32); ++ __ xvst(F0, A1, 0); ++ __ xvst(F1, A3, -32); ++ } ++ ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // Byte small copy: less than { int:9, lsx:17, lasx:33 } elements. ++ void generate_byte_small_copy(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label L; ++ __ bind(entry); ++ __ lipc(AT, L); ++ __ slli_d(A2, A2, 5); ++ __ add_d(AT, AT, A2); ++ __ jr(AT); ++ ++ __ bind(L); ++ // 0: ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ ld_b(AT, A0, 0); ++ __ st_b(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ ld_h(AT, A0, 0); ++ __ st_h(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ ld_h(AT, A0, 0); ++ __ ld_b(A2, A0, 2); ++ __ st_h(AT, A1, 0); ++ __ st_b(A2, A1, 2); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 4: ++ __ ld_w(AT, A0, 0); ++ __ st_w(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ __ ld_w(AT, A0, 0); ++ __ ld_b(A2, A0, 4); ++ __ st_w(AT, A1, 0); ++ __ st_b(A2, A1, 4); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 6: ++ __ ld_w(AT, A0, 0); ++ __ ld_h(A2, A0, 4); ++ __ st_w(AT, A1, 0); ++ __ st_h(A2, A1, 4); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 7: ++ __ ld_w(AT, A0, 0); ++ __ ld_w(A2, A0, 3); ++ __ st_w(AT, A1, 0); ++ __ st_w(A2, A1, 3); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 8: ++ __ ld_d(AT, A0, 0); ++ __ st_d(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ ++ if (!UseLSX) ++ return; ++ ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 9: ++ __ ld_d(AT, A0, 0); ++ __ ld_b(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_b(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 10: ++ __ ld_d(AT, A0, 0); ++ __ ld_h(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_h(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 11: ++ __ ld_d(AT, A0, 0); ++ __ ld_w(A2, A0, 7); ++ __ st_d(AT, A1, 0); ++ __ st_w(A2, A1, 7); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 12: ++ __ ld_d(AT, A0, 0); ++ __ ld_w(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_w(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 13: ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 5); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 5); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 14: ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 6); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 6); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 15: ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 7); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 7); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 16: ++ __ vld(F0, A0, 0); ++ __ vst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ if (!UseLASX) ++ return; ++ ++ // 17: ++ __ vld(F0, A0, 0); ++ __ ld_b(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_b(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 18: ++ __ vld(F0, A0, 0); ++ __ ld_h(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_h(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 19: ++ __ vld(F0, A0, 0); ++ __ ld_w(AT, A0, 15); ++ __ vst(F0, A1, 0); ++ __ st_w(AT, A1, 15); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 20: ++ __ vld(F0, A0, 0); ++ __ ld_w(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_w(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 21: ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 13); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 13); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 22: ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 14); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 14); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 23: ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 15); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 15); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 24: ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 25: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 9); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 9); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 26: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 10); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 10); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 27: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 11); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 11); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 28: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 12); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 12); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 29: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 13); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 13); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 30: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 14); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 14); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 31: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 15); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 15); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 32: ++ __ xvld(F0, A0, 0); ++ __ xvst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_byte_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_byte_copy(). ++ // ++ address generate_disjoint_byte_copy(bool aligned, Label &small, Label &large, ++ const char * name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ if (UseLASX) ++ __ sltui(T0, A2, 33); ++ else if (UseLSX) ++ __ sltui(T0, A2, 17); ++ else ++ __ sltui(T0, A2, 9); ++ __ bnez(T0, small); ++ ++ __ b(large); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_byte_copy(bool aligned, Label &small, Label &large, ++ const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ array_overlap_test(StubRoutines::jbyte_disjoint_arraycopy(), 0); ++ ++ if (UseLASX) ++ __ sltui(T0, A2, 33); ++ else if (UseLSX) ++ __ sltui(T0, A2, 17); ++ else ++ __ sltui(T0, A2, 9); ++ __ bnez(T0, small); ++ ++ __ b(large); ++ ++ return start; ++ } ++ ++ // Short small copy: less than { int:9, lsx:9, lasx:17 } elements. ++ void generate_short_small_copy(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label L; ++ __ bind(entry); ++ __ lipc(AT, L); ++ __ slli_d(A2, A2, 5); ++ __ add_d(AT, AT, A2); ++ __ jr(AT); ++ ++ __ bind(L); ++ // 0: ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ ld_h(AT, A0, 0); ++ __ st_h(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ ld_w(AT, A0, 0); ++ __ st_w(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ ld_w(AT, A0, 0); ++ __ ld_h(A2, A0, 4); ++ __ st_w(AT, A1, 0); ++ __ st_h(A2, A1, 4); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 4: ++ __ ld_d(AT, A0, 0); ++ __ st_d(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ __ ld_d(AT, A0, 0); ++ __ ld_h(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_h(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 6: ++ __ ld_d(AT, A0, 0); ++ __ ld_w(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_w(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 7: ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 6); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 6); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 8: ++ if (UseLSX) { ++ __ vld(F0, A0, 0); ++ __ vst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ } else { ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ if (!UseLASX) ++ return; ++ ++ __ nop(); ++ __ nop(); ++ ++ // 9: ++ __ vld(F0, A0, 0); ++ __ ld_h(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_h(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 10: ++ __ vld(F0, A0, 0); ++ __ ld_w(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_w(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 11: ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 14); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 14); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 12: ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 13: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 10); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 10); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 14: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 12); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 12); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 15: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 14); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 14); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 16: ++ __ xvld(F0, A0, 0); ++ __ xvst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_short_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_short_copy(). ++ // ++ address generate_disjoint_short_copy(bool aligned, Label &small, Label &large, ++ const char * name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ if (UseLASX) ++ __ sltui(T0, A2, 17); ++ else ++ __ sltui(T0, A2, 9); ++ __ bnez(T0, small); ++ ++ __ slli_d(A2, A2, 1); ++ ++ __ b(large); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we ++ // let the hardware handle it. The two or four words within dwords ++ // or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_short_copy(bool aligned, Label &small, Label &large, ++ const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ array_overlap_test(StubRoutines::jshort_disjoint_arraycopy(), 1); ++ ++ if (UseLASX) ++ __ sltui(T0, A2, 17); ++ else ++ __ sltui(T0, A2, 9); ++ __ bnez(T0, small); ++ ++ __ slli_d(A2, A2, 1); ++ ++ __ b(large); ++ ++ return start; ++ } ++ ++ // Int small copy: less than { int:7, lsx:7, lasx:9 } elements. ++ void generate_int_small_copy(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label L; ++ __ bind(entry); ++ __ lipc(AT, L); ++ __ slli_d(A2, A2, 5); ++ __ add_d(AT, AT, A2); ++ __ jr(AT); ++ ++ __ bind(L); ++ // 0: ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ ld_w(AT, A0, 0); ++ __ st_w(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ ld_d(AT, A0, 0); ++ __ st_d(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ ld_d(AT, A0, 0); ++ __ ld_w(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_w(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 4: ++ if (UseLSX) { ++ __ vld(F0, A0, 0); ++ __ vst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ } else { ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ if (UseLSX) { ++ __ vld(F0, A0, 0); ++ __ ld_w(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_w(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ } else { ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ ld_w(A3, A0, 16); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ st_w(A3, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // 6: ++ if (UseLSX) { ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ } else { ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ ld_d(A3, A0, 16); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ st_d(A3, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ if (!UseLASX) ++ return; ++ ++ // 7: ++ __ vld(F0, A0, 0); ++ __ vld(F1, A0, 12); ++ __ vst(F0, A1, 0); ++ __ vst(F1, A1, 12); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 8: ++ __ xvld(F0, A0, 0); ++ __ xvst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // Generate maybe oop copy ++ void gen_maybe_oop_copy(bool is_oop, bool disjoint, bool aligned, Label &small, ++ Label &large, const char *name, int small_limit, ++ int log2_elem_size, bool dest_uninitialized = false) { ++ Label post, _large; ++ DecoratorSet decorators = DECORATORS_NONE; ++ BarrierSetAssembler *bs = nullptr; ++ ++ if (is_oop) { ++ decorators = IN_HEAP | IS_ARRAY; ++ ++ if (disjoint) { ++ decorators |= ARRAYCOPY_DISJOINT; ++ } ++ ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ ++ __ addi_d(SP, SP, -4 * wordSize); ++ __ st_d(A2, SP, 3 * wordSize); ++ __ st_d(A1, SP, 2 * wordSize); ++ __ st_d(A0, SP, 1 * wordSize); ++ __ st_d(RA, SP, 0 * wordSize); ++ ++ bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, A0, A1, A2, RegSet()); ++ ++ __ ld_d(A2, SP, 3 * wordSize); ++ __ ld_d(A1, SP, 2 * wordSize); ++ __ ld_d(A0, SP, 1 * wordSize); ++ } ++ ++ __ sltui(T0, A2, small_limit); ++ if (is_oop) { ++ __ beqz(T0, _large); ++ __ bl(small); ++ __ b(post); ++ } else { ++ __ bnez(T0, small); ++ } ++ ++ __ bind(_large); ++ __ slli_d(A2, A2, log2_elem_size); ++ ++ if (is_oop) { ++ __ bl(large); ++ } else { ++ __ b(large); ++ } ++ ++ if (is_oop) { ++ __ bind(post); ++ __ ld_d(A2, SP, 3 * wordSize); ++ __ ld_d(A1, SP, 2 * wordSize); ++ ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1, RegSet()); ++ ++ __ ld_d(RA, SP, 0 * wordSize); ++ __ addi_d(SP, SP, 4 * wordSize); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, Label &small, ++ Label &large, const char *name, int small_limit, ++ bool dest_uninitialized = false) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ gen_maybe_oop_copy(is_oop, true, aligned, small, large, name, ++ small_limit, 2, dest_uninitialized); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, Label &small, ++ Label &large, const char *name, int small_limit, ++ bool dest_uninitialized = false) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ if (is_oop) { ++ array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 2); ++ } else { ++ array_overlap_test(StubRoutines::jint_disjoint_arraycopy(), 2); ++ } ++ ++ gen_maybe_oop_copy(is_oop, false, aligned, small, large, name, ++ small_limit, 2, dest_uninitialized); ++ ++ return start; ++ } ++ ++ // Long small copy: less than { int:4, lsx:4, lasx:5 } elements. ++ void generate_long_small_copy(Label &entry, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ Label L; ++ __ bind(entry); ++ __ lipc(AT, L); ++ __ slli_d(A2, A2, 5); ++ __ add_d(AT, AT, A2); ++ __ jr(AT); ++ ++ __ bind(L); ++ // 0: ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ { ++ UnsafeCopyMemoryMark ucmm(this, true, true); ++ // 1: ++ __ ld_d(AT, A0, 0); ++ __ st_d(AT, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ if (UseLSX) { ++ __ vld(F0, A0, 0); ++ __ vst(F0, A1, 0); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ } else { ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ if (UseLSX) { ++ __ vld(F0, A0, 0); ++ __ ld_d(AT, A0, 16); ++ __ vst(F0, A1, 0); ++ __ st_d(AT, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ } else { ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ ld_d(A3, A0, 16); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ st_d(A3, A1, 16); ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // 4: ++ if (UseLASX) { ++ __ xvld(F0, A0, 0); ++ __ xvst(F0, A1, 0); ++ } ++ } ++ ++ __ move(A0, R0); ++ __ jr(RA); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, Label &small, ++ Label &large, const char *name, int small_limit, ++ bool dest_uninitialized = false) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ gen_maybe_oop_copy(is_oop, true, aligned, small, large, name, ++ small_limit, 3, dest_uninitialized); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, Label &small, ++ Label &large, const char *name, int small_limit, ++ bool dest_uninitialized = false) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ if (is_oop) { ++ array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 3); ++ } else { ++ array_overlap_test(StubRoutines::jlong_disjoint_arraycopy(), 3); ++ } ++ ++ gen_maybe_oop_copy(is_oop, false, aligned, small, large, name, ++ small_limit, 3, dest_uninitialized); ++ ++ return start; ++ } ++ ++ // Helper for generating a dynamic type check. ++ // Smashes scratch1, scratch2. ++ void generate_type_check(Register sub_klass, ++ Register super_check_offset, ++ Register super_klass, ++ Register tmp1, ++ Register tmp2, ++ Label& L_success) { ++ assert_different_registers(sub_klass, super_check_offset, super_klass); ++ ++ __ block_comment("type_check:"); ++ ++ Label L_miss; ++ ++ __ check_klass_subtype_fast_path(sub_klass, super_klass, tmp1, &L_success, &L_miss, NULL, ++ super_check_offset); ++ __ check_klass_subtype_slow_path(sub_klass, super_klass, tmp1, tmp2, &L_success, NULL); ++ ++ // Fall through on failure! ++ __ bind(L_miss); ++ } ++ ++ // ++ // Generate checkcasting array copy stub ++ // ++ // Input: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // A3 - size_t ckoff (super_check_offset) ++ // A4 - oop ckval (super_klass) ++ // ++ // Output: ++ // V0 == 0 - success ++ // V0 == -1^K - failure, where K is partial transfer count ++ // ++ address generate_checkcast_copy(const char *name, bool dest_uninitialized = false) { ++ Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop; ++ ++ // Input registers (after setup_arg_regs) ++ const Register from = A0; // source array address ++ const Register to = A1; // destination array address ++ const Register count = A2; // elementscount ++ const Register ckoff = A3; // super_check_offset ++ const Register ckval = A4; // super_klass ++ ++ RegSet wb_pre_saved_regs = RegSet::range(A0, A4); ++ RegSet wb_post_saved_regs = RegSet::of(count); ++ ++ // Registers used as temps (S0, S1, S2, S3 are save-on-entry) ++ const Register copied_oop = S0; // actual oop copied ++ const Register count_save = S1; // orig elementscount ++ const Register start_to = S2; // destination array start address ++ const Register oop_klass = S3; // oop._klass ++ const Register tmp1 = A5; ++ const Register tmp2 = A6; ++ ++ //--------------------------------------------------------------- ++ // Assembler stub will be used for this call to arraycopy ++ // if the two arrays are subtypes of Object[] but the ++ // destination array type is not equal to or a supertype ++ // of the source type. Each element must be separately ++ // checked. ++ ++ assert_different_registers(from, to, count, ckoff, ckval, start_to, ++ copied_oop, oop_klass, count_save); ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ // caller guarantees that the arrays really are different ++ // otherwise, we would have to make conjoint checks ++ ++ // Caller of this entry point must set up the argument registers. ++ __ block_comment("Entry:"); ++ ++ // Empty array: Nothing to do. ++ __ beqz(count, L_done); ++ ++ __ push(RegSet::of(S0, S1, S2, S3, RA)); ++ ++#ifdef ASSERT ++ __ block_comment("assert consistent ckoff/ckval"); ++ // The ckoff and ckval must be mutually consistent, ++ // even though caller generates both. ++ { Label L; ++ int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ __ ld_w(start_to, Address(ckval, sco_offset)); ++ __ beq(ckoff, start_to, L); ++ __ stop("super_check_offset inconsistent"); ++ __ bind(L); ++ } ++#endif //ASSERT ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT; ++ bool is_oop = true; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs); ++ ++ // save the original count ++ __ move(count_save, count); ++ ++ // Copy from low to high addresses ++ __ move(start_to, to); // Save destination array start address ++ __ b(L_load_element); ++ ++ // ======== begin loop ======== ++ // (Loop is rotated; its entry is L_load_element.) ++ // Loop control: ++ // for (; count != 0; count--) { ++ // copied_oop = load_heap_oop(from++); ++ // ... generate_type_check ...; ++ // store_heap_oop(to++, copied_oop); ++ // } ++ __ align(OptoLoopAlignment); ++ ++ __ bind(L_store_element); ++ __ store_heap_oop(Address(to, 0), copied_oop, tmp1, tmp2, AS_RAW); // store the oop ++ __ addi_d(to, to, UseCompressedOops ? 4 : 8); ++ __ addi_d(count, count, -1); ++ __ beqz(count, L_do_card_marks); ++ ++ // ======== loop entry is here ======== ++ __ bind(L_load_element); ++ __ load_heap_oop(copied_oop, Address(from, 0), tmp1, tmp2, AS_RAW); // load the oop ++ __ addi_d(from, from, UseCompressedOops ? 4 : 8); ++ __ beqz(copied_oop, L_store_element); ++ ++ __ load_klass(oop_klass, copied_oop); // query the object klass ++ generate_type_check(oop_klass, ckoff, ckval, tmp1, tmp2, L_store_element); ++ // ======== end loop ======== ++ ++ // Register count = remaining oops, count_orig = total oops. ++ // Emit GC store barriers for the oops we have copied and report ++ // their number to the caller. ++ ++ __ sub_d(tmp1, count_save, count); // K = partially copied oop count ++ __ nor(count, tmp1, R0); // report (-1^K) to caller ++ __ beqz(tmp1, L_done_pop); ++ ++ __ bind(L_do_card_marks); ++ ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, tmp2, wb_post_saved_regs); ++ ++ __ bind(L_done_pop); ++ __ pop(RegSet::of(S0, S1, S2, S3, RA)); ++ ++#ifndef PRODUCT ++ __ li(SCR2, (address)&SharedRuntime::_checkcast_array_copy_ctr); ++ __ increment(Address(SCR2, 0), 1); ++#endif ++ ++ __ bind(L_done); ++ __ move(A0, count); ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ // ++ // Generate 'unsafe' array copy stub ++ // Though just as safe as the other stubs, it takes an unscaled ++ // size_t argument instead of an element count. ++ // ++ // Input: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - byte count, treated as ssize_t, can be zero ++ // ++ // Examines the alignment of the operands and dispatches ++ // to a long, int, short, or byte copy loop. ++ // ++ address generate_unsafe_copy(const char *name) { ++ Label L_long_aligned, L_int_aligned, L_short_aligned; ++ Register s = A0, d = A1, count = A2; ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ __ orr(AT, s, d); ++ __ orr(AT, AT, count); ++ ++ __ andi(AT, AT, BytesPerLong-1); ++ __ beqz(AT, L_long_aligned); ++ __ andi(AT, AT, BytesPerInt-1); ++ __ beqz(AT, L_int_aligned); ++ __ andi(AT, AT, BytesPerShort-1); ++ __ beqz(AT, L_short_aligned); ++ __ b(StubRoutines::_jbyte_arraycopy); ++ ++ __ bind(L_short_aligned); ++ __ srli_d(count, count, LogBytesPerShort); // size => short_count ++ __ b(StubRoutines::_jshort_arraycopy); ++ __ bind(L_int_aligned); ++ __ srli_d(count, count, LogBytesPerInt); // size => int_count ++ __ b(StubRoutines::_jint_arraycopy); ++ __ bind(L_long_aligned); ++ __ srli_d(count, count, LogBytesPerLong); // size => long_count ++ __ b(StubRoutines::_jlong_arraycopy); ++ ++ return start; ++ } ++ ++ // Perform range checks on the proposed arraycopy. ++ // Kills temp, but nothing else. ++ // Also, clean the sign bits of src_pos and dst_pos. ++ void arraycopy_range_checks(Register src, // source array oop (A0) ++ Register src_pos, // source position (A1) ++ Register dst, // destination array oo (A2) ++ Register dst_pos, // destination position (A3) ++ Register length, ++ Register temp, ++ Label& L_failed) { ++ __ block_comment("arraycopy_range_checks:"); ++ ++ assert_different_registers(SCR1, temp); ++ ++ // if (src_pos + length > arrayOop(src)->length()) FAIL; ++ __ ld_w(SCR1, Address(src, arrayOopDesc::length_offset_in_bytes())); ++ __ add_w(temp, length, src_pos); ++ __ bltu(SCR1, temp, L_failed); ++ ++ // if (dst_pos + length > arrayOop(dst)->length()) FAIL; ++ __ ld_w(SCR1, Address(dst, arrayOopDesc::length_offset_in_bytes())); ++ __ add_w(temp, length, dst_pos); ++ __ bltu(SCR1, temp, L_failed); ++ ++ // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'. ++ __ move(src_pos, src_pos); ++ __ move(dst_pos, dst_pos); ++ ++ __ block_comment("arraycopy_range_checks done"); ++ } ++ ++ // ++ // Generate generic array copy stubs ++ // ++ // Input: ++ // A0 - src oop ++ // A1 - src_pos (32-bits) ++ // A2 - dst oop ++ // A3 - dst_pos (32-bits) ++ // A4 - element count (32-bits) ++ // ++ // Output: ++ // V0 == 0 - success ++ // V0 == -1^K - failure, where K is partial transfer count ++ // ++ address generate_generic_copy(const char *name) { ++ Label L_failed, L_objArray; ++ Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs; ++ ++ // Input registers ++ const Register src = A0; // source array oop ++ const Register src_pos = A1; // source position ++ const Register dst = A2; // destination array oop ++ const Register dst_pos = A3; // destination position ++ const Register length = A4; ++ ++ // Registers used as temps ++ const Register dst_klass = A5; ++ ++ __ align(CodeEntryAlignment); ++ ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ address start = __ pc(); ++ ++#ifndef PRODUCT ++ // bump this on entry, not on exit: ++ __ li(SCR2, (address)&SharedRuntime::_generic_array_copy_ctr); ++ __ increment(Address(SCR2, 0), 1); ++#endif ++ ++ //----------------------------------------------------------------------- ++ // Assembler stub will be used for this call to arraycopy ++ // if the following conditions are met: ++ // ++ // (1) src and dst must not be null. ++ // (2) src_pos must not be negative. ++ // (3) dst_pos must not be negative. ++ // (4) length must not be negative. ++ // (5) src klass and dst klass should be the same and not NULL. ++ // (6) src and dst should be arrays. ++ // (7) src_pos + length must not exceed length of src. ++ // (8) dst_pos + length must not exceed length of dst. ++ // ++ ++ // if (src == NULL) return -1; ++ __ beqz(src, L_failed); ++ ++ // if (src_pos < 0) return -1; ++ __ blt(src_pos, R0, L_failed); ++ ++ // if (dst == NULL) return -1; ++ __ beqz(dst, L_failed); ++ ++ // if (dst_pos < 0) return -1; ++ __ blt(dst_pos, R0, L_failed); ++ ++ // registers used as temp ++ const Register scratch_length = T0; // elements count to copy ++ const Register scratch_src_klass = T1; // array klass ++ const Register lh = T2; // layout helper ++ const Register tmp1 = T3; ++ const Register tmp2 = T4; ++ ++ // if (length < 0) return -1; ++ __ move(scratch_length, length); // length (elements count, 32-bits value) ++ __ blt(scratch_length, R0, L_failed); ++ ++ __ load_klass(scratch_src_klass, src); ++#ifdef ASSERT ++ // assert(src->klass() != NULL); ++ { ++ __ block_comment("assert klasses not null {"); ++ Label L1, L2; ++ __ bnez(scratch_src_klass, L2); // it is broken if klass is NULL ++ __ bind(L1); ++ __ stop("broken null klass"); ++ __ bind(L2); ++ __ load_klass(SCR2, dst); ++ __ beqz(SCR2, L1); // this would be broken also ++ __ block_comment("} assert klasses not null done"); ++ } ++#endif ++ ++ // Load layout helper (32-bits) ++ // ++ // |array_tag| | header_size | element_type | |log2_element_size| ++ // 32 30 24 16 8 2 0 ++ // ++ // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 ++ // ++ ++ const int lh_offset = in_bytes(Klass::layout_helper_offset()); ++ ++ // Handle objArrays completely differently... ++ const jint objArray_lh = Klass::array_layout_helper(T_OBJECT); ++ __ ld_w(lh, Address(scratch_src_klass, lh_offset)); ++ __ li(SCR1, objArray_lh); ++ __ xorr(SCR2, lh, SCR1); ++ __ beqz(SCR2, L_objArray); ++ ++ // if (src->klass() != dst->klass()) return -1; ++ __ load_klass(SCR2, dst); ++ __ xorr(SCR2, SCR2, scratch_src_klass); ++ __ bnez(SCR2, L_failed); ++ ++ // if (!src->is_Array()) return -1; ++ __ bge(lh, R0, L_failed); // i.e. (lh >= 0) ++ ++ // At this point, it is known to be a typeArray (array_tag 0x3). ++#ifdef ASSERT ++ { ++ __ block_comment("assert primitive array {"); ++ Label L; ++ __ li(SCR2, (int)(Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); ++ __ bge(lh, SCR2, L); ++ __ stop("must be a primitive array"); ++ __ bind(L); ++ __ block_comment("} assert primitive array done"); ++ } ++#endif ++ ++ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, SCR2, L_failed); ++ ++ // TypeArrayKlass ++ // ++ // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); ++ // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); ++ // ++ ++ const Register scr1_offset = SCR1; // array offset ++ const Register elsize = lh; // element size ++ ++ __ bstrpick_d(scr1_offset, lh, Klass::_lh_header_size_shift + ++ exact_log2(Klass::_lh_header_size_mask+1) - 1, ++ Klass::_lh_header_size_shift); // array_offset ++ __ add_d(src, src, scr1_offset); // src array offset ++ __ add_d(dst, dst, scr1_offset); // dst array offset ++ __ block_comment("choose copy loop based on element size"); ++ ++ // next registers should be set before the jump to corresponding stub ++ const Register from = A0; // source array address ++ const Register to = A1; // destination array address ++ const Register count = A2; // elements count ++ ++ // 'from', 'to', 'count' registers should be set in such order ++ // since they are the same as 'src', 'src_pos', 'dst'. ++ ++ assert(Klass::_lh_log2_element_size_shift == 0, "fix this code"); ++ ++ // The possible values of elsize are 0-3, i.e. exact_log2(element ++ // size in bytes). We do a simple bitwise binary search. ++ __ bind(L_copy_bytes); ++ __ andi(tmp1, elsize, 2); ++ __ bnez(tmp1, L_copy_ints); ++ __ andi(tmp1, elsize, 1); ++ __ bnez(tmp1, L_copy_shorts); ++ __ lea(from, Address(src, src_pos, Address::no_scale)); // src_addr ++ __ lea(to, Address(dst, dst_pos, Address::no_scale)); // dst_addr ++ __ move(count, scratch_length); // length ++ __ b(StubRoutines::_jbyte_arraycopy); ++ ++ __ bind(L_copy_shorts); ++ __ lea(from, Address(src, src_pos, Address::times_2)); // src_addr ++ __ lea(to, Address(dst, dst_pos, Address::times_2)); // dst_addr ++ __ move(count, scratch_length); // length ++ __ b(StubRoutines::_jshort_arraycopy); ++ ++ __ bind(L_copy_ints); ++ __ andi(tmp1, elsize, 1); ++ __ bnez(tmp1, L_copy_longs); ++ __ lea(from, Address(src, src_pos, Address::times_4)); // src_addr ++ __ lea(to, Address(dst, dst_pos, Address::times_4)); // dst_addr ++ __ move(count, scratch_length); // length ++ __ b(StubRoutines::_jint_arraycopy); ++ ++ __ bind(L_copy_longs); ++#ifdef ASSERT ++ { ++ __ block_comment("assert long copy {"); ++ Label L; ++ __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> elsize ++ __ li(tmp1, LogBytesPerLong); ++ __ beq(elsize, tmp1, L); ++ __ stop("must be long copy, but elsize is wrong"); ++ __ bind(L); ++ __ block_comment("} assert long copy done"); ++ } ++#endif ++ __ lea(from, Address(src, src_pos, Address::times_8)); // src_addr ++ __ lea(to, Address(dst, dst_pos, Address::times_8)); // dst_addr ++ __ move(count, scratch_length); // length ++ __ b(StubRoutines::_jlong_arraycopy); ++ ++ // ObjArrayKlass ++ __ bind(L_objArray); ++ // live at this point: scratch_src_klass, scratch_length, src[_pos], dst[_pos] ++ ++ Label L_plain_copy, L_checkcast_copy; ++ // test array classes for subtyping ++ __ load_klass(tmp1, dst); ++ __ bne(scratch_src_klass, tmp1, L_checkcast_copy); // usual case is exact equality ++ ++ // Identically typed arrays can be copied without element-wise checks. ++ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, SCR2, L_failed); ++ ++ __ lea(from, Address(src, src_pos, Address::ScaleFactor(LogBytesPerHeapOop))); ++ __ addi_d(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ lea(to, Address(dst, dst_pos, Address::ScaleFactor(LogBytesPerHeapOop))); ++ __ addi_d(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ move(count, scratch_length); // length ++ __ bind(L_plain_copy); ++ __ b(StubRoutines::_oop_arraycopy); ++ ++ __ bind(L_checkcast_copy); ++ // live at this point: scratch_src_klass, scratch_length, tmp1 (dst_klass) ++ { ++ // Before looking at dst.length, make sure dst is also an objArray. ++ __ ld_w(SCR1, Address(tmp1, lh_offset)); ++ __ li(SCR2, objArray_lh); ++ __ xorr(SCR1, SCR1, SCR2); ++ __ bnez(SCR1, L_failed); ++ ++ // It is safe to examine both src.length and dst.length. ++ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, tmp1, L_failed); ++ ++ __ load_klass(dst_klass, dst); // reload ++ ++ // Marshal the base address arguments now, freeing registers. ++ __ lea(from, Address(src, src_pos, Address::ScaleFactor(LogBytesPerHeapOop))); ++ __ addi_d(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ lea(to, Address(dst, dst_pos, Address::ScaleFactor(LogBytesPerHeapOop))); ++ __ addi_d(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ move(count, length); // length (reloaded) ++ Register sco_temp = A3; // this register is free now ++ assert_different_registers(from, to, count, sco_temp, dst_klass, scratch_src_klass); ++ // assert_clean_int(count, sco_temp); ++ ++ // Generate the type check. ++ const int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ __ ld_w(sco_temp, Address(dst_klass, sco_offset)); ++ ++ // Smashes SCR1, SCR2 ++ generate_type_check(scratch_src_klass, sco_temp, dst_klass, tmp1, tmp2, L_plain_copy); ++ ++ // Fetch destination element klass from the ObjArrayKlass header. ++ int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); ++ __ ld_d(dst_klass, Address(dst_klass, ek_offset)); ++ __ ld_w(sco_temp, Address(dst_klass, sco_offset)); ++ ++ // the checkcast_copy loop needs two extra arguments: ++ assert(A3 == sco_temp, "#3 already in place"); ++ // Set up arguments for checkcast_arraycopy. ++ __ move(A4, dst_klass); // dst.klass.element_klass ++ __ b(StubRoutines::_checkcast_arraycopy); ++ } ++ ++ __ bind(L_failed); ++ __ li(V0, -1); ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ void generate_arraycopy_stubs() { ++ Label disjoint_large_copy, conjoint_large_copy; ++ Label byte_small_copy, short_small_copy, int_small_copy, long_small_copy; ++ int int_oop_small_limit, long_oop_small_limit; ++ ++ if (UseLASX) { ++ int_oop_small_limit = 9; ++ long_oop_small_limit = 5; ++ generate_disjoint_large_copy_lasx(disjoint_large_copy, "disjoint_large_copy_lasx"); ++ generate_conjoint_large_copy_lasx(conjoint_large_copy, "conjoint_large_copy_lasx"); ++ } else if (UseLSX) { ++ int_oop_small_limit = 7; ++ long_oop_small_limit = 4; ++ generate_disjoint_large_copy_lsx(disjoint_large_copy, "disjoint_large_copy_lsx"); ++ generate_conjoint_large_copy_lsx(conjoint_large_copy, "conjoint_large_copy_lsx"); ++ } else { ++ int_oop_small_limit = 7; ++ long_oop_small_limit = 4; ++ generate_disjoint_large_copy(disjoint_large_copy, "disjoint_large_copy_int"); ++ generate_conjoint_large_copy(conjoint_large_copy, "conjoint_large_copy_int"); ++ } ++ generate_byte_small_copy(byte_small_copy, "jbyte_small_copy"); ++ generate_short_small_copy(short_small_copy, "jshort_small_copy"); ++ generate_int_small_copy(int_small_copy, "jint_small_copy"); ++ generate_long_small_copy(long_small_copy, "jlong_small_copy"); ++ ++ if (UseCompressedOops) { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, ++ "oop_disjoint_arraycopy", int_oop_small_limit); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, ++ "oop_disjoint_arraycopy_uninit", int_oop_small_limit, true); ++ StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, ++ "oop_arraycopy", int_oop_small_limit); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, ++ "oop_arraycopy_uninit", int_oop_small_limit, true); ++ } else { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, ++ "oop_disjoint_arraycopy", long_oop_small_limit); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, ++ "oop_disjoint_arraycopy_uninit", long_oop_small_limit, true); ++ StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, ++ "oop_arraycopy", long_oop_small_limit); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, ++ "oop_arraycopy_uninit", long_oop_small_limit, true); ++ } ++ ++ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy, "jbyte_disjoint_arraycopy"); ++ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy, "jshort_disjoint_arraycopy"); ++ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy, ++ "jint_disjoint_arraycopy", int_oop_small_limit); ++ ++ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy, "jbyte_arraycopy"); ++ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy, "jshort_arraycopy"); ++ StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy, ++ "jint_arraycopy", int_oop_small_limit); ++ ++ StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, ++ "jlong_disjoint_arraycopy", long_oop_small_limit); ++ StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, ++ "jlong_arraycopy", long_oop_small_limit); ++ ++ // We don't generate specialized code for HeapWord-aligned source ++ // arrays, so just use the code we've already generated ++ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy; ++ StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy; ++ ++ StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy; ++ StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy; ++ ++ StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; ++ StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; ++ ++ StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; ++ StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; ++ StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; ++ StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; ++ ++ StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy"); ++ StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", true); ++ ++ StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy"); ++ ++ StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy"); ++ ++ StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); ++ StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); ++ StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); ++ StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); ++ StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); ++ StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); ++ } ++ ++ address generate_method_entry_barrier() { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier"); ++ ++ Label deoptimize_label; ++ Register rscratch2 = T8; ++ ++ address start = __ pc(); ++ ++ __ set_last_Java_frame(SP, FP, RA); ++ ++ __ enter(); ++ __ addi_d(T4, SP, wordSize); // T4 points to the saved RA ++ ++ __ addi_d(SP, SP, -4 * wordSize); // four words for the returned {SP, FP, RA, PC} ++ ++ __ push(V0); ++ __ push_call_clobbered_registers_except(RegSet::of(V0)); ++ ++ __ move(A0, T4); ++ __ call_VM_leaf ++ (CAST_FROM_FN_PTR ++ (address, BarrierSetNMethod::nmethod_stub_entry_barrier), 1); ++ ++ __ reset_last_Java_frame(true); ++ ++ __ pop_call_clobbered_registers_except(RegSet::of(V0)); ++ ++ __ bnez(V0, deoptimize_label); ++ ++ __ pop(V0); ++ __ leave(); ++ __ jr(RA); ++ ++ __ bind(deoptimize_label); ++ ++ __ pop(V0); ++ __ ld_d(rscratch2, SP, 0); ++ __ ld_d(FP, SP, 1 * wordSize); ++ __ ld_d(RA, SP, 2 * wordSize); ++ __ ld_d(T4, SP, 3 * wordSize); ++ ++ __ move(SP, rscratch2); ++ __ jr(T4); ++ ++ return start; ++ } ++ ++ // T8 result ++ // A4 src ++ // A5 src count ++ // A6 pattern ++ // A7 pattern count ++ address generate_string_indexof_linear(bool needle_isL, bool haystack_isL) ++ { ++ const char* stubName = needle_isL ++ ? (haystack_isL ? "indexof_linear_ll" : "indexof_linear_ul") ++ : "indexof_linear_uu"; ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", stubName); ++ address entry = __ pc(); ++ ++ int needle_chr_size = needle_isL ? 1 : 2; ++ int haystack_chr_size = haystack_isL ? 1 : 2; ++ int needle_chr_shift = needle_isL ? 0 : 1; ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ bool isL = needle_isL && haystack_isL; ++ ++ // parameters ++ Register result = T8, haystack = A4, haystack_len = A5, needle = A6, needle_len = A7; ++ ++ // temporary registers ++ Register match_mask = T0, mask1 = T1, mask2 = T2; ++ Register first = T3, trailing_zeros = T4; ++ Register ch1 = T5, ch2 = T6; ++ ++ RegSet spilled_regs = RegSet::range(T0, T6); ++ ++ __ push(spilled_regs); ++ ++ Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO, L_SMALL_HAS_ZERO, ++ L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, ++ L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP, ++ L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH, ++ L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2, ++ L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH; ++ ++ __ ld_d(ch1, Address(needle)); ++ ++ // src.length - pattern.length ++ __ sub_d(haystack_len, haystack_len, needle_len); ++ ++ // first is needle[0] ++ __ bstrpick_d(first, ch1, needle_isL ? 7 : 15, 0); ++ ++ uint64_t mask0101 = UCONST64(0x0101010101010101); ++ uint64_t mask0001 = UCONST64(0x0001000100010001); ++ __ li(mask1, haystack_isL ? mask0101 : mask0001); ++ ++ uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); ++ uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); ++ __ li(mask2, haystack_isL ? mask7f7f : mask7fff); ++ ++ // first -> needle[0]needle[0]needle[0]needle[0] ++ if (haystack_isL) __ bstrins_d(first, first, 15, 8); ++ __ bstrins_d(first, first, 31, 16); ++ __ bstrins_d(first, first, 63, 32); ++ ++ if (needle_isL != haystack_isL) { ++ // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d ++ __ move(AT, ch1); ++ __ bstrpick_d(ch1, AT, 7, 0); ++ __ srli_d(AT, AT, 8); ++ __ bstrins_d(ch1, AT, 23, 16); ++ __ srli_d(AT, AT, 8); ++ __ bstrins_d(ch1, AT, 39, 32); ++ __ srli_d(AT, AT, 8); ++ __ bstrins_d(ch1, AT, 55, 48); ++ } ++ ++ __ addi_d(haystack_len, haystack_len, -1 * (wordSize / haystack_chr_size - 1)); ++ __ bge(R0, haystack_len, L_SMALL); ++ ++ // compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i] ++ // eg: ++ // first: aa aa aa aa aa aa aa aa ++ // ch2: aa aa li nx jd ka aa aa ++ // match_mask: 80 80 00 00 00 00 80 80 ++ ++ __ bind(L_LOOP); ++ __ ld_d(ch2, Address(haystack)); ++ // compute match_mask ++ __ xorr(ch2, first, ch2); ++ __ sub_d(match_mask, ch2, mask1); ++ __ orr(ch2, ch2, mask2); ++ __ andn(match_mask, match_mask, ch2); ++ // search first char of needle, goto L_HAS_ZERO if success. ++ __ bnez(match_mask, L_HAS_ZERO); ++ ++ __ bind(L_LOOP_PROCEED); ++ __ addi_d(haystack_len, haystack_len, -1 * (wordSize / haystack_chr_size)); ++ __ addi_d(haystack, haystack, wordSize); ++ __ addi_d(result, result, wordSize / haystack_chr_size); ++ __ bge(haystack_len, R0, L_LOOP); ++ ++ __ bind(L_POST_LOOP); ++ __ li(ch2, -1 * (wordSize / haystack_chr_size)); ++ __ bge(ch2, haystack_len, NOMATCH); // no extra characters to check ++ ++ __ bind(L_SMALL); ++ __ ld_d(ch2, Address(haystack)); ++ __ slli_d(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift); ++ __ sub_d(haystack_len, R0, haystack_len); ++ // compute match_mask ++ __ xorr(ch2, first, ch2); ++ __ sub_d(match_mask, ch2, mask1); ++ __ orr(ch2, ch2, mask2); ++ __ andn(match_mask, match_mask, ch2); ++ // clear useless match_mask bits and check ++ __ nor(trailing_zeros, R0, R0); // all bits set ++ __ srl_d(trailing_zeros, trailing_zeros, haystack_len); // zeroes on useless bits. ++ __ andr(match_mask, match_mask, trailing_zeros); // refine match_mask ++ __ beqz(match_mask, NOMATCH); ++ ++ __ bind(L_SMALL_HAS_ZERO); ++ __ ctz_d(trailing_zeros, match_mask); ++ __ li(AT, wordSize / haystack_chr_size); ++ __ bge(AT, needle_len, L_SMALL_CMP_LOOP_LAST_CMP2); ++ ++ __ bind(L_SMALL_HAS_ZERO_LOOP); ++ // compute index ++ __ srl_d(match_mask, match_mask, trailing_zeros); ++ __ srli_d(match_mask, match_mask, 1); ++ __ srli_d(AT, trailing_zeros, LogBitsPerByte); ++ if (!haystack_isL) __ andi(AT, AT, 0xE); ++ __ add_d(haystack, haystack, AT); ++ __ ld_d(ch2, Address(haystack)); ++ if (!haystack_isL) __ srli_d(AT, AT, haystack_chr_shift); ++ __ add_d(result, result, AT); ++ ++ __ li(trailing_zeros, wordSize / haystack_chr_size); ++ __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH); ++ ++ __ bind(L_SMALL_CMP_LOOP); ++ needle_isL ? __ ld_bu(first, Address(needle, trailing_zeros, Address::no_scale, 0)) ++ : __ ld_hu(first, Address(needle, trailing_zeros, Address::times_2, 0)); ++ haystack_isL ? __ ld_bu(ch2, Address(haystack, trailing_zeros, Address::no_scale, 0)) ++ : __ ld_hu(ch2, Address(haystack, trailing_zeros, Address::times_2, 0)); ++ __ addi_d(trailing_zeros, trailing_zeros, 1); ++ __ bge(trailing_zeros, needle_len, L_SMALL_CMP_LOOP_LAST_CMP); ++ __ beq(first, ch2, L_SMALL_CMP_LOOP); ++ ++ __ bind(L_SMALL_CMP_LOOP_NOMATCH); ++ __ beqz(match_mask, NOMATCH); ++ __ ctz_d(trailing_zeros, match_mask); ++ __ addi_d(result, result, 1); ++ __ addi_d(haystack, haystack, haystack_chr_size); ++ __ b(L_SMALL_HAS_ZERO_LOOP); ++ ++ __ bind(L_SMALL_CMP_LOOP_LAST_CMP); ++ __ bne(first, ch2, L_SMALL_CMP_LOOP_NOMATCH); ++ __ b(DONE); ++ ++ __ bind(L_SMALL_CMP_LOOP_LAST_CMP2); ++ // compute index ++ __ srl_d(match_mask, match_mask, trailing_zeros); ++ __ srli_d(match_mask, match_mask, 1); ++ __ srli_d(AT, trailing_zeros, LogBitsPerByte); ++ if (!haystack_isL) __ andi(AT, AT, 0xE); ++ __ add_d(haystack, haystack, AT); ++ __ ld_d(ch2, Address(haystack)); ++ if (!haystack_isL) __ srli_d(AT, AT, haystack_chr_shift); ++ __ add_d(result, result, AT); ++ ++ __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH); ++ __ b(DONE); ++ ++ __ bind(L_HAS_ZERO); ++ __ ctz_d(trailing_zeros, match_mask); ++ __ li(AT, wordSize / haystack_chr_size); ++ __ bge(AT, needle_len, L_CMP_LOOP_LAST_CMP2); ++ __ addi_d(result, result, -1); // array index from 0, so result -= 1 ++ ++ __ bind(L_HAS_ZERO_LOOP); ++ // compute index ++ __ srl_d(match_mask, match_mask, trailing_zeros); ++ __ srli_d(match_mask, match_mask, 1); ++ __ srli_d(AT, trailing_zeros, LogBitsPerByte); ++ if (!haystack_isL) __ andi(AT, AT, 0xE); ++ __ add_d(haystack, haystack, AT); ++ __ ld_d(ch2, Address(haystack)); ++ if (!haystack_isL) __ srli_d(AT, AT, haystack_chr_shift); ++ __ add_d(result, result, AT); ++ ++ __ addi_d(result, result, 1); ++ __ li(trailing_zeros, wordSize / haystack_chr_size); ++ __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH); ++ ++ // compare one char ++ __ bind(L_CMP_LOOP); ++ haystack_isL ? __ ld_bu(ch2, Address(haystack, trailing_zeros, Address::no_scale, 0)) ++ : __ ld_hu(ch2, Address(haystack, trailing_zeros, Address::times_2, 0)); ++ needle_isL ? __ ld_bu(AT, Address(needle, trailing_zeros, Address::no_scale, 0)) ++ : __ ld_hu(AT, Address(needle, trailing_zeros, Address::times_2, 0)); ++ __ addi_d(trailing_zeros, trailing_zeros, 1); // next char index ++ __ bge(trailing_zeros, needle_len, L_CMP_LOOP_LAST_CMP); ++ __ beq(AT, ch2, L_CMP_LOOP); ++ ++ __ bind(L_CMP_LOOP_NOMATCH); ++ __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH); ++ __ ctz_d(trailing_zeros, match_mask); ++ __ addi_d(haystack, haystack, haystack_chr_size); ++ __ b(L_HAS_ZERO_LOOP); ++ ++ __ bind(L_CMP_LOOP_LAST_CMP); ++ __ bne(AT, ch2, L_CMP_LOOP_NOMATCH); ++ __ b(DONE); ++ ++ __ bind(L_CMP_LOOP_LAST_CMP2); ++ // compute index ++ __ srl_d(match_mask, match_mask, trailing_zeros); ++ __ srli_d(match_mask, match_mask, 1); ++ __ srli_d(AT, trailing_zeros, LogBitsPerByte); ++ if (!haystack_isL) __ andi(AT, AT, 0xE); ++ __ add_d(haystack, haystack, AT); ++ __ ld_d(ch2, Address(haystack)); ++ if (!haystack_isL) __ srli_d(AT, AT, haystack_chr_shift); ++ __ add_d(result, result, AT); ++ ++ __ addi_d(result, result, 1); ++ __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH); ++ __ b(DONE); ++ ++ __ bind(L_HAS_ZERO_LOOP_NOMATCH); ++ // 1) Restore "result" index. Index was wordSize/str2_chr_size * N until ++ // L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP, ++ // so, result was increased at max by wordSize/str2_chr_size - 1, so, ++ // respective high bit wasn't changed. L_LOOP_PROCEED will increase ++ // result by analyzed characters value, so, we can just reset lower bits ++ // in result here. Clear 2 lower bits for UU/UL and 3 bits for LL ++ // 2) advance haystack value to represent next haystack octet. result & 7/3 is ++ // index of last analyzed substring inside current octet. So, haystack in at ++ // respective start address. We need to advance it to next octet ++ __ andi(match_mask, result, wordSize / haystack_chr_size - 1); ++ __ sub_d(result, result, match_mask); ++ if (!haystack_isL) __ slli_d(match_mask, match_mask, haystack_chr_shift); ++ __ sub_d(haystack, haystack, match_mask); ++ __ b(L_LOOP_PROCEED); ++ ++ __ bind(NOMATCH); ++ __ nor(result, R0, R0); // result = -1 ++ ++ __ bind(DONE); ++ __ pop(spilled_regs); ++ __ jr(RA); ++ return entry; ++ } ++ ++ void generate_string_indexof_stubs() ++ { ++ StubRoutines::la::_string_indexof_linear_ll = generate_string_indexof_linear(true, true); ++ StubRoutines::la::_string_indexof_linear_uu = generate_string_indexof_linear(false, false); ++ StubRoutines::la::_string_indexof_linear_ul = generate_string_indexof_linear(true, false); ++ } ++ ++ // Arguments: ++ // ++ // Inputs: ++ // A0 - source byte array address ++ // A1 - destination byte array address ++ // A2 - K (key) in little endian int array ++ // A3 - r vector byte array address ++ // A4 - input length ++ // ++ // Output: ++ // A0 - input length ++ // ++ address generate_aescrypt_encryptBlock(bool cbc) { ++ static const uint32_t ft_consts[256] = { ++ 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, ++ 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, ++ 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, ++ 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, ++ 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, ++ 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, ++ 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, ++ 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, ++ 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, ++ 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, ++ 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, ++ 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, ++ 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, ++ 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, ++ 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, ++ 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, ++ 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, ++ 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, ++ 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, ++ 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, ++ 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, ++ 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, ++ 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, ++ 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, ++ 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, ++ 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, ++ 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, ++ 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, ++ 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, ++ 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, ++ 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, ++ 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, ++ 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, ++ 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, ++ 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, ++ 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, ++ 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, ++ 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, ++ 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, ++ 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, ++ 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, ++ 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, ++ 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, ++ 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, ++ 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, ++ 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, ++ 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, ++ 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, ++ 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, ++ 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, ++ 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, ++ 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, ++ 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, ++ 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, ++ 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, ++ 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, ++ 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, ++ 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, ++ 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, ++ 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, ++ 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, ++ 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, ++ 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, ++ 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a ++ }; ++ static const uint8_t fsb_consts[256] = { ++ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, ++ 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, ++ 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, ++ 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, ++ 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, ++ 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, ++ 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, ++ 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, ++ 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, ++ 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, ++ 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, ++ 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, ++ 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, ++ 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, ++ 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, ++ 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, ++ 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, ++ 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, ++ 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, ++ 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, ++ 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, ++ 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, ++ 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, ++ 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, ++ 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, ++ 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, ++ 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, ++ 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, ++ 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, ++ 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, ++ 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, ++ 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 ++ }; ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); ++ ++ // Allocate registers ++ Register src = A0; ++ Register dst = A1; ++ Register key = A2; ++ Register rve = A3; ++ Register srclen = A4; ++ Register keylen = T8; ++ Register srcend = A5; ++ Register keyold = A6; ++ Register t0 = A7; ++ Register t1, t2, t3, ftp; ++ Register xa[4] = { T0, T1, T2, T3 }; ++ Register ya[4] = { T4, T5, T6, T7 }; ++ ++ Label loop, tail, done; ++ address start = __ pc(); ++ ++ if (cbc) { ++ t1 = S0; ++ t2 = S1; ++ t3 = S2; ++ ftp = S3; ++ ++ __ beqz(srclen, done); ++ ++ __ addi_d(SP, SP, -4 * wordSize); ++ __ st_d(S3, SP, 3 * wordSize); ++ __ st_d(S2, SP, 2 * wordSize); ++ __ st_d(S1, SP, 1 * wordSize); ++ __ st_d(S0, SP, 0 * wordSize); ++ ++ __ add_d(srcend, src, srclen); ++ __ move(keyold, key); ++ } else { ++ t1 = A3; ++ t2 = A4; ++ t3 = A5; ++ ftp = A6; ++ } ++ ++ __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)); ++ ++ // Round 1 ++ if (cbc) { ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xa[i], rve, 4 * i); ++ } ++ ++ __ bind(loop); ++ ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(ya[i], src, 4 * i); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ XOR(xa[i], xa[i], ya[i]); ++ } ++ } else { ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xa[i], src, 4 * i); ++ } ++ } ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(ya[i], key, 4 * i); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ revb_2h(xa[i], xa[i]); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ rotri_w(xa[i], xa[i], 16); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ XOR(xa[i], xa[i], ya[i]); ++ } ++ ++ __ li(ftp, (intptr_t)ft_consts); ++ ++ // Round 2 - (N-1) ++ for (int r = 0; r < 14; r++) { ++ Register *xp; ++ Register *yp; ++ ++ if (r & 1) { ++ xp = xa; ++ yp = ya; ++ } else { ++ xp = ya; ++ yp = xa; ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xp[i], key, 4 * (4 * (r + 1) + i)); ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ bstrpick_d(t0, yp[(i + 3) & 3], 7, 0); ++ __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8); ++ __ bstrpick_d(t2, yp[(i + 1) & 3], 23, 16); ++ __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24); ++ __ slli_w(t0, t0, 2); ++ __ slli_w(t1, t1, 2); ++ __ slli_w(t2, t2, 2); ++ __ slli_w(t3, t3, 2); ++ __ ldx_w(t0, ftp, t0); ++ __ ldx_w(t1, ftp, t1); ++ __ ldx_w(t2, ftp, t2); ++ __ ldx_w(t3, ftp, t3); ++ __ rotri_w(t0, t0, 24); ++ __ rotri_w(t1, t1, 16); ++ __ rotri_w(t2, t2, 8); ++ __ XOR(xp[i], xp[i], t0); ++ __ XOR(t0, t1, t2); ++ __ XOR(xp[i], xp[i], t3); ++ __ XOR(xp[i], xp[i], t0); ++ } ++ ++ if (r == 8) { ++ // AES 128 ++ __ li(t0, 44); ++ __ beq(t0, keylen, tail); ++ } else if (r == 10) { ++ // AES 192 ++ __ li(t0, 52); ++ __ beq(t0, keylen, tail); ++ } ++ } ++ ++ __ bind(tail); ++ __ li(ftp, (intptr_t)fsb_consts); ++ __ alsl_d(key, keylen, key, 2 - 1); ++ ++ // Round N ++ for (int i = 0; i < 4; i++) { ++ __ bstrpick_d(t0, ya[(i + 3) & 3], 7, 0); ++ __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8); ++ __ bstrpick_d(t2, ya[(i + 1) & 3], 23, 16); ++ __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24); ++ __ ldx_bu(t0, ftp, t0); ++ __ ldx_bu(t1, ftp, t1); ++ __ ldx_bu(t2, ftp, t2); ++ __ ldx_bu(t3, ftp, t3); ++ __ ld_w(xa[i], key, 4 * i - 16); ++ __ slli_w(t1, t1, 8); ++ __ slli_w(t2, t2, 16); ++ __ slli_w(t3, t3, 24); ++ __ XOR(xa[i], xa[i], t0); ++ __ XOR(t0, t1, t2); ++ __ XOR(xa[i], xa[i], t3); ++ __ XOR(xa[i], xa[i], t0); ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ revb_2h(xa[i], xa[i]); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ rotri_w(xa[i], xa[i], 16); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ st_w(xa[i], dst, 4 * i); ++ } ++ ++ if (cbc) { ++ __ move(key, keyold); ++ __ addi_d(src, src, 16); ++ __ addi_d(dst, dst, 16); ++ __ blt(src, srcend, loop); ++ ++ for (int i = 0; i < 4; i++) { ++ __ st_w(xa[i], rve, 4 * i); ++ } ++ ++ __ ld_d(S3, SP, 3 * wordSize); ++ __ ld_d(S2, SP, 2 * wordSize); ++ __ ld_d(S1, SP, 1 * wordSize); ++ __ ld_d(S0, SP, 0 * wordSize); ++ __ addi_d(SP, SP, 4 * wordSize); ++ ++ __ bind(done); ++ __ move(A0, srclen); ++ } ++ ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ address generate_mulAdd() { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "mulAdd"); ++ ++ address entry = __ pc(); ++ ++ const Register out = A0; ++ const Register in = A1; ++ const Register offset = A2; ++ const Register len = A3; ++ const Register k = A4; ++ ++ __ block_comment("Entry:"); ++ __ mul_add(out, in, offset, len, k); ++ __ jr(RA); ++ ++ return entry; ++ } ++ ++ // Arguments: ++ // ++ // Inputs: ++ // A0 - source byte array address ++ // A1 - destination byte array address ++ // A2 - K (key) in little endian int array ++ // A3 - r vector byte array address ++ // A4 - input length ++ // ++ // Output: ++ // A0 - input length ++ // ++ address generate_aescrypt_decryptBlock(bool cbc) { ++ static const uint32_t rt_consts[256] = { ++ 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, ++ 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, ++ 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, ++ 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, ++ 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, ++ 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, ++ 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, ++ 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, ++ 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, ++ 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, ++ 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, ++ 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, ++ 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, ++ 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, ++ 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, ++ 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, ++ 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, ++ 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, ++ 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, ++ 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, ++ 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, ++ 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, ++ 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, ++ 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, ++ 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, ++ 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, ++ 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, ++ 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, ++ 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, ++ 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, ++ 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, ++ 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, ++ 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, ++ 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, ++ 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, ++ 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, ++ 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, ++ 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, ++ 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, ++ 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, ++ 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, ++ 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, ++ 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, ++ 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, ++ 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, ++ 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, ++ 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, ++ 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, ++ 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, ++ 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, ++ 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, ++ 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, ++ 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, ++ 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, ++ 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, ++ 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, ++ 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, ++ 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, ++ 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, ++ 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, ++ 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, ++ 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, ++ 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, ++ 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 ++ }; ++ static const uint8_t rsb_consts[256] = { ++ 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, ++ 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, ++ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, ++ 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, ++ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, ++ 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, ++ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, ++ 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, ++ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, ++ 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, ++ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, ++ 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, ++ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, ++ 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, ++ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, ++ 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, ++ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, ++ 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, ++ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, ++ 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, ++ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, ++ 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, ++ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, ++ 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, ++ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, ++ 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, ++ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, ++ 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, ++ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, ++ 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, ++ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, ++ 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d ++ }; ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); ++ ++ // Allocate registers ++ Register src = A0; ++ Register dst = A1; ++ Register key = A2; ++ Register rve = A3; ++ Register srclen = A4; ++ Register keylen = T8; ++ Register srcend = A5; ++ Register t0 = A6; ++ Register t1 = A7; ++ Register t2, t3, rtp, rvp; ++ Register xa[4] = { T0, T1, T2, T3 }; ++ Register ya[4] = { T4, T5, T6, T7 }; ++ ++ Label loop, tail, done; ++ address start = __ pc(); ++ ++ if (cbc) { ++ t2 = S0; ++ t3 = S1; ++ rtp = S2; ++ rvp = S3; ++ ++ __ beqz(srclen, done); ++ ++ __ addi_d(SP, SP, -4 * wordSize); ++ __ st_d(S3, SP, 3 * wordSize); ++ __ st_d(S2, SP, 2 * wordSize); ++ __ st_d(S1, SP, 1 * wordSize); ++ __ st_d(S0, SP, 0 * wordSize); ++ ++ __ add_d(srcend, src, srclen); ++ __ move(rvp, rve); ++ } else { ++ t2 = A3; ++ t3 = A4; ++ rtp = A5; ++ } ++ ++ __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)); ++ ++ __ bind(loop); ++ ++ // Round 1 ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xa[i], src, 4 * i); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(ya[i], key, 4 * (4 + i)); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ revb_2h(xa[i], xa[i]); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ rotri_w(xa[i], xa[i], 16); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ XOR(xa[i], xa[i], ya[i]); ++ } ++ ++ __ li(rtp, (intptr_t)rt_consts); ++ ++ // Round 2 - (N-1) ++ for (int r = 0; r < 14; r++) { ++ Register *xp; ++ Register *yp; ++ ++ if (r & 1) { ++ xp = xa; ++ yp = ya; ++ } else { ++ xp = ya; ++ yp = xa; ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xp[i], key, 4 * (4 * (r + 1) + 4 + i)); ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ bstrpick_d(t0, yp[(i + 1) & 3], 7, 0); ++ __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8); ++ __ bstrpick_d(t2, yp[(i + 3) & 3], 23, 16); ++ __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24); ++ __ slli_w(t0, t0, 2); ++ __ slli_w(t1, t1, 2); ++ __ slli_w(t2, t2, 2); ++ __ slli_w(t3, t3, 2); ++ __ ldx_w(t0, rtp, t0); ++ __ ldx_w(t1, rtp, t1); ++ __ ldx_w(t2, rtp, t2); ++ __ ldx_w(t3, rtp, t3); ++ __ rotri_w(t0, t0, 24); ++ __ rotri_w(t1, t1, 16); ++ __ rotri_w(t2, t2, 8); ++ __ XOR(xp[i], xp[i], t0); ++ __ XOR(t0, t1, t2); ++ __ XOR(xp[i], xp[i], t3); ++ __ XOR(xp[i], xp[i], t0); ++ } ++ ++ if (r == 8) { ++ // AES 128 ++ __ li(t0, 44); ++ __ beq(t0, keylen, tail); ++ } else if (r == 10) { ++ // AES 192 ++ __ li(t0, 52); ++ __ beq(t0, keylen, tail); ++ } ++ } ++ ++ __ bind(tail); ++ __ li(rtp, (intptr_t)rsb_consts); ++ ++ // Round N ++ for (int i = 0; i < 4; i++) { ++ __ bstrpick_d(t0, ya[(i + 1) & 3], 7, 0); ++ __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8); ++ __ bstrpick_d(t2, ya[(i + 3) & 3], 23, 16); ++ __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24); ++ __ ldx_bu(t0, rtp, t0); ++ __ ldx_bu(t1, rtp, t1); ++ __ ldx_bu(t2, rtp, t2); ++ __ ldx_bu(t3, rtp, t3); ++ __ ld_w(xa[i], key, 4 * i); ++ __ slli_w(t1, t1, 8); ++ __ slli_w(t2, t2, 16); ++ __ slli_w(t3, t3, 24); ++ __ XOR(xa[i], xa[i], t0); ++ __ XOR(t0, t1, t2); ++ __ XOR(xa[i], xa[i], t3); ++ __ XOR(xa[i], xa[i], t0); ++ } ++ ++ if (cbc) { ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(ya[i], rvp, 4 * i); ++ } ++ } ++ for (int i = 0; i < 4; i++) { ++ __ revb_2h(xa[i], xa[i]); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ rotri_w(xa[i], xa[i], 16); ++ } ++ if (cbc) { ++ for (int i = 0; i < 4; i++) { ++ __ XOR(xa[i], xa[i], ya[i]); ++ } ++ } ++ for (int i = 0; i < 4; i++) { ++ __ st_w(xa[i], dst, 4 * i); ++ } ++ ++ if (cbc) { ++ __ move(rvp, src); ++ __ addi_d(src, src, 16); ++ __ addi_d(dst, dst, 16); ++ __ blt(src, srcend, loop); ++ ++ __ ld_d(t0, src, -16); ++ __ ld_d(t1, src, -8); ++ __ st_d(t0, rve, 0); ++ __ st_d(t1, rve, 8); ++ ++ __ ld_d(S3, SP, 3 * wordSize); ++ __ ld_d(S2, SP, 2 * wordSize); ++ __ ld_d(S1, SP, 1 * wordSize); ++ __ ld_d(S0, SP, 0 * wordSize); ++ __ addi_d(SP, SP, 4 * wordSize); ++ ++ __ bind(done); ++ __ move(A0, srclen); ++ } ++ ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // ++ // Inputs: ++ // A0 - byte[] source+offset ++ // A1 - int[] SHA.state ++ // A2 - int offset ++ // A3 - int limit ++ // ++ void generate_md5_implCompress(const char *name, address &entry, address &entry_mb) { ++ static const uint32_t round_consts[64] = { ++ 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, ++ 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, ++ 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, ++ 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, ++ 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, ++ 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, ++ 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, ++ 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, ++ 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, ++ 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, ++ 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, ++ 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, ++ 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, ++ 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, ++ 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, ++ 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, ++ }; ++ static const uint8_t round_offs[64] = { ++ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ++ 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, ++ 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, ++ 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, ++ }; ++ static const uint8_t round_shfs[64] = { ++ 25, 20, 15, 10, 25, 20, 15, 10, 25, 20, 15, 10, 25, 20, 15, 10, ++ 27, 23, 18, 12, 27, 23, 18, 12, 27, 23, 18, 12, 27, 23, 18, 12, ++ 28, 21, 16, 9, 28, 21, 16, 9, 28, 21, 16, 9, 28, 21, 16, 9, ++ 26, 22, 17, 11, 26, 22, 17, 11, 26, 22, 17, 11, 26, 22, 17, 11, ++ }; ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ Label loop; ++ ++ // Allocate registers ++ Register t0 = T4; ++ Register t1 = T5; ++ Register t2 = T6; ++ Register t3 = T7; ++ Register buf = A0; ++ Register state = A1; ++ Register ofs = A2; ++ Register limit = A3; ++ Register kptr = T8; ++ Register sa[4] = { T0, T1, T2, T3 }; ++ ++ // Entry ++ entry = __ pc(); ++ __ move(ofs, R0); ++ __ move(limit, R0); ++ ++ // Entry MB ++ entry_mb = __ pc(); ++ ++ // Load keys base address ++ __ li(kptr, (intptr_t)round_consts); ++ ++ __ bind(loop); ++ // Load states ++ __ ld_w(sa[0], state, 0); ++ __ ld_w(sa[1], state, 4); ++ __ ld_w(sa[2], state, 8); ++ __ ld_w(sa[3], state, 12); ++ ++ // 64 rounds of hashing ++ for (int i = 0; i < 64; i++) { ++ Register a = sa[(0 - i) & 3]; ++ Register b = sa[(1 - i) & 3]; ++ Register c = sa[(2 - i) & 3]; ++ Register d = sa[(3 - i) & 3]; ++ ++ if (i < 16) { ++ __ XOR(t0, c, d); ++ __ AND(t0, t0, b); ++ __ XOR(t0, t0, d); ++ } else if (i < 32) { ++ __ andn(t0, c, d); ++ __ AND(t1, d, b); ++ __ OR(t0, t0, t1); ++ } else if (i < 48) { ++ __ XOR(t0, c, d); ++ __ XOR(t0, t0, b); ++ } else { ++ __ orn(t0, b, d); ++ __ XOR(t0, t0, c); ++ } ++ ++ __ ld_w(t1, kptr, i * 4); ++ __ ld_w(t2, buf, round_offs[i] * 4); ++ __ add_w(a, a, t1); ++ __ add_w(a, a, t2); ++ __ add_w(a, a, t0); ++ __ rotri_w(a, a, round_shfs[i]); ++ __ add_w(a, a, b); ++ } ++ ++ // Save updated state ++ __ ld_w(t0, state, 0); ++ __ ld_w(t1, state, 4); ++ __ ld_w(t2, state, 8); ++ __ ld_w(t3, state, 12); ++ __ add_w(sa[0], sa[0], t0); ++ __ add_w(sa[1], sa[1], t1); ++ __ add_w(sa[2], sa[2], t2); ++ __ add_w(sa[3], sa[3], t3); ++ __ st_w(sa[0], state, 0); ++ __ st_w(sa[1], state, 4); ++ __ st_w(sa[2], state, 8); ++ __ st_w(sa[3], state, 12); ++ ++ __ addi_w(ofs, ofs, 64); ++ __ addi_d(buf, buf, 64); ++ __ bge(limit, ofs, loop); ++ __ move(V0, ofs); // return ofs ++ ++ __ jr(RA); ++ } ++ ++ // Arguments: ++ // ++ // Inputs: ++ // A0 - byte[] source+offset ++ // A1 - int[] SHA.state ++ // A2 - int offset ++ // A3 - int limit ++ // ++ void generate_sha1_implCompress(const char *name, address &entry, address &entry_mb) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ Label keys, loop; ++ ++ // Keys ++ __ bind(keys); ++ __ emit_int32(0x5a827999); ++ __ emit_int32(0x6ed9eba1); ++ __ emit_int32(0x8f1bbcdc); ++ __ emit_int32(0xca62c1d6); ++ ++ // Allocate registers ++ Register t0 = T5; ++ Register t1 = T6; ++ Register t2 = T7; ++ Register t3 = T8; ++ Register buf = A0; ++ Register state = A1; ++ Register ofs = A2; ++ Register limit = A3; ++ Register ka[4] = { A4, A5, A6, A7 }; ++ Register sa[5] = { T0, T1, T2, T3, T4 }; ++ ++ // Entry ++ entry = __ pc(); ++ __ move(ofs, R0); ++ __ move(limit, R0); ++ ++ // Entry MB ++ entry_mb = __ pc(); ++ ++ // Allocate scratch space ++ __ addi_d(SP, SP, -64); ++ ++ // Load keys ++ __ lipc(t0, keys); ++ __ ld_w(ka[0], t0, 0); ++ __ ld_w(ka[1], t0, 4); ++ __ ld_w(ka[2], t0, 8); ++ __ ld_w(ka[3], t0, 12); ++ ++ __ bind(loop); ++ // Load arguments ++ __ ld_w(sa[0], state, 0); ++ __ ld_w(sa[1], state, 4); ++ __ ld_w(sa[2], state, 8); ++ __ ld_w(sa[3], state, 12); ++ __ ld_w(sa[4], state, 16); ++ ++ // 80 rounds of hashing ++ for (int i = 0; i < 80; i++) { ++ Register a = sa[(5 - (i % 5)) % 5]; ++ Register b = sa[(6 - (i % 5)) % 5]; ++ Register c = sa[(7 - (i % 5)) % 5]; ++ Register d = sa[(8 - (i % 5)) % 5]; ++ Register e = sa[(9 - (i % 5)) % 5]; ++ ++ if (i < 16) { ++ __ ld_w(t0, buf, i * 4); ++ __ revb_2h(t0, t0); ++ __ rotri_w(t0, t0, 16); ++ __ add_w(e, e, t0); ++ __ st_w(t0, SP, i * 4); ++ __ XOR(t0, c, d); ++ __ AND(t0, t0, b); ++ __ XOR(t0, t0, d); ++ } else { ++ __ ld_w(t0, SP, ((i - 3) & 0xF) * 4); ++ __ ld_w(t1, SP, ((i - 8) & 0xF) * 4); ++ __ ld_w(t2, SP, ((i - 14) & 0xF) * 4); ++ __ ld_w(t3, SP, ((i - 16) & 0xF) * 4); ++ __ XOR(t0, t0, t1); ++ __ XOR(t0, t0, t2); ++ __ XOR(t0, t0, t3); ++ __ rotri_w(t0, t0, 31); ++ __ add_w(e, e, t0); ++ __ st_w(t0, SP, (i & 0xF) * 4); ++ ++ if (i < 20) { ++ __ XOR(t0, c, d); ++ __ AND(t0, t0, b); ++ __ XOR(t0, t0, d); ++ } else if (i < 40 || i >= 60) { ++ __ XOR(t0, b, c); ++ __ XOR(t0, t0, d); ++ } else if (i < 60) { ++ __ OR(t0, c, d); ++ __ AND(t0, t0, b); ++ __ AND(t2, c, d); ++ __ OR(t0, t0, t2); ++ } ++ } ++ ++ __ rotri_w(b, b, 2); ++ __ add_w(e, e, t0); ++ __ add_w(e, e, ka[i / 20]); ++ __ rotri_w(t0, a, 27); ++ __ add_w(e, e, t0); ++ } ++ ++ // Save updated state ++ __ ld_w(t0, state, 0); ++ __ ld_w(t1, state, 4); ++ __ ld_w(t2, state, 8); ++ __ ld_w(t3, state, 12); ++ __ add_w(sa[0], sa[0], t0); ++ __ ld_w(t0, state, 16); ++ __ add_w(sa[1], sa[1], t1); ++ __ add_w(sa[2], sa[2], t2); ++ __ add_w(sa[3], sa[3], t3); ++ __ add_w(sa[4], sa[4], t0); ++ __ st_w(sa[0], state, 0); ++ __ st_w(sa[1], state, 4); ++ __ st_w(sa[2], state, 8); ++ __ st_w(sa[3], state, 12); ++ __ st_w(sa[4], state, 16); ++ ++ __ addi_w(ofs, ofs, 64); ++ __ addi_d(buf, buf, 64); ++ __ bge(limit, ofs, loop); ++ __ move(V0, ofs); // return ofs ++ ++ __ addi_d(SP, SP, 64); ++ __ jr(RA); ++ } ++ ++ // Arguments: ++ // ++ // Inputs: ++ // A0 - byte[] source+offset ++ // A1 - int[] SHA.state ++ // A2 - int offset ++ // A3 - int limit ++ // ++ void generate_sha256_implCompress(const char *name, address &entry, address &entry_mb) { ++ static const uint32_t round_consts[64] = { ++ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, ++ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, ++ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, ++ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, ++ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, ++ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, ++ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, ++ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, ++ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, ++ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, ++ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, ++ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, ++ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, ++ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, ++ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, ++ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, ++ }; ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ Label loop; ++ ++ // Allocate registers ++ Register t0 = A4; ++ Register t1 = A5; ++ Register t2 = A6; ++ Register t3 = A7; ++ Register buf = A0; ++ Register state = A1; ++ Register ofs = A2; ++ Register limit = A3; ++ Register kptr = T8; ++ Register sa[8] = { T0, T1, T2, T3, T4, T5, T6, T7 }; ++ ++ // Entry ++ entry = __ pc(); ++ __ move(ofs, R0); ++ __ move(limit, R0); ++ ++ // Entry MB ++ entry_mb = __ pc(); ++ ++ // Allocate scratch space ++ __ addi_d(SP, SP, -64); ++ ++ // Load keys base address ++ __ li(kptr, (intptr_t)round_consts); ++ ++ __ bind(loop); ++ // Load state ++ __ ld_w(sa[0], state, 0); ++ __ ld_w(sa[1], state, 4); ++ __ ld_w(sa[2], state, 8); ++ __ ld_w(sa[3], state, 12); ++ __ ld_w(sa[4], state, 16); ++ __ ld_w(sa[5], state, 20); ++ __ ld_w(sa[6], state, 24); ++ __ ld_w(sa[7], state, 28); ++ ++ // Do 64 rounds of hashing ++ for (int i = 0; i < 64; i++) { ++ Register a = sa[(0 - i) & 7]; ++ Register b = sa[(1 - i) & 7]; ++ Register c = sa[(2 - i) & 7]; ++ Register d = sa[(3 - i) & 7]; ++ Register e = sa[(4 - i) & 7]; ++ Register f = sa[(5 - i) & 7]; ++ Register g = sa[(6 - i) & 7]; ++ Register h = sa[(7 - i) & 7]; ++ ++ if (i < 16) { ++ __ ld_w(t1, buf, i * 4); ++ __ revb_2h(t1, t1); ++ __ rotri_w(t1, t1, 16); ++ } else { ++ __ ld_w(t0, SP, ((i - 15) & 0xF) * 4); ++ __ ld_w(t1, SP, ((i - 16) & 0xF) * 4); ++ __ ld_w(t2, SP, ((i - 7) & 0xF) * 4); ++ __ add_w(t1, t1, t2); ++ __ rotri_w(t2, t0, 18); ++ __ srli_w(t3, t0, 3); ++ __ rotri_w(t0, t0, 7); ++ __ XOR(t2, t2, t3); ++ __ XOR(t0, t0, t2); ++ __ add_w(t1, t1, t0); ++ __ ld_w(t0, SP, ((i - 2) & 0xF) * 4); ++ __ rotri_w(t2, t0, 19); ++ __ srli_w(t3, t0, 10); ++ __ rotri_w(t0, t0, 17); ++ __ XOR(t2, t2, t3); ++ __ XOR(t0, t0, t2); ++ __ add_w(t1, t1, t0); ++ } ++ ++ __ rotri_w(t2, e, 11); ++ __ rotri_w(t3, e, 25); ++ __ rotri_w(t0, e, 6); ++ __ XOR(t2, t2, t3); ++ __ XOR(t0, t0, t2); ++ __ XOR(t2, g, f); ++ __ ld_w(t3, kptr, i * 4); ++ __ AND(t2, t2, e); ++ __ XOR(t2, t2, g); ++ __ add_w(t0, t0, t2); ++ __ add_w(t0, t0, t3); ++ __ add_w(h, h, t1); ++ __ add_w(h, h, t0); ++ __ add_w(d, d, h); ++ __ rotri_w(t2, a, 13); ++ __ rotri_w(t3, a, 22); ++ __ rotri_w(t0, a, 2); ++ __ XOR(t2, t2, t3); ++ __ XOR(t0, t0, t2); ++ __ add_w(h, h, t0); ++ __ OR(t0, c, b); ++ __ AND(t2, c, b); ++ __ AND(t0, t0, a); ++ __ OR(t0, t0, t2); ++ __ add_w(h, h, t0); ++ __ st_w(t1, SP, (i & 0xF) * 4); ++ } ++ ++ // Add to state ++ __ ld_w(t0, state, 0); ++ __ ld_w(t1, state, 4); ++ __ ld_w(t2, state, 8); ++ __ ld_w(t3, state, 12); ++ __ add_w(sa[0], sa[0], t0); ++ __ add_w(sa[1], sa[1], t1); ++ __ add_w(sa[2], sa[2], t2); ++ __ add_w(sa[3], sa[3], t3); ++ __ ld_w(t0, state, 16); ++ __ ld_w(t1, state, 20); ++ __ ld_w(t2, state, 24); ++ __ ld_w(t3, state, 28); ++ __ add_w(sa[4], sa[4], t0); ++ __ add_w(sa[5], sa[5], t1); ++ __ add_w(sa[6], sa[6], t2); ++ __ add_w(sa[7], sa[7], t3); ++ __ st_w(sa[0], state, 0); ++ __ st_w(sa[1], state, 4); ++ __ st_w(sa[2], state, 8); ++ __ st_w(sa[3], state, 12); ++ __ st_w(sa[4], state, 16); ++ __ st_w(sa[5], state, 20); ++ __ st_w(sa[6], state, 24); ++ __ st_w(sa[7], state, 28); ++ ++ __ addi_w(ofs, ofs, 64); ++ __ addi_d(buf, buf, 64); ++ __ bge(limit, ofs, loop); ++ __ move(V0, ofs); // return ofs ++ ++ __ addi_d(SP, SP, 64); ++ __ jr(RA); ++ } ++ ++ // Do NOT delete this node which stands for stub routine placeholder ++ address generate_updateBytesCRC32() { ++ assert(UseCRC32Intrinsics, "need CRC32 instructions support"); ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); ++ ++ address start = __ pc(); ++ ++ const Register crc = A0; // crc ++ const Register buf = A1; // source java byte array address ++ const Register len = A2; // length ++ const Register tmp = A3; ++ ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ kernel_crc32(crc, buf, len, tmp); ++ ++ __ leave(); // required for proper stackwalking of RuntimeStub frame ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ // Do NOT delete this node which stands for stub routine placeholder ++ address generate_updateBytesCRC32C() { ++ assert(UseCRC32CIntrinsics, "need CRC32C instructions support"); ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C"); ++ ++ address start = __ pc(); ++ ++ const Register crc = A0; // crc ++ const Register buf = A1; // source java byte array address ++ const Register len = A2; // length ++ const Register tmp = A3; ++ ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ kernel_crc32c(crc, buf, len, tmp); ++ ++ __ leave(); // required for proper stackwalking of RuntimeStub frame ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ address generate_dsin_dcos(bool isCos) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", isCos ? "libmDcos" : "libmDsin"); ++ address start = __ pc(); ++ __ generate_dsin_dcos(isCos, (address)StubRoutines::la::_npio2_hw, ++ (address)StubRoutines::la::_two_over_pi, ++ (address)StubRoutines::la::_pio2, ++ (address)StubRoutines::la::_dsin_coef, ++ (address)StubRoutines::la::_dcos_coef); ++ return start; ++ } ++ ++ ++#undef __ ++#define __ masm-> ++ ++ // Continuation point for throwing of implicit exceptions that are ++ // not handled in the current activation. Fabricates an exception ++ // oop and initiates normal exception dispatching in this ++ // frame. Since we need to preserve callee-saved values (currently ++ // only for C2, but done for C1 as well) we need a callee-saved oop ++ // map and therefore have to make these stubs into RuntimeStubs ++ // rather than BufferBlobs. If the compiler needs all registers to ++ // be preserved between the fault point and the exception handler ++ // then it must assume responsibility for that in ++ // AbstractCompiler::continuation_for_implicit_null_exception or ++ // continuation_for_implicit_division_by_zero_exception. All other ++ // implicit exceptions (e.g., NullPointerException or ++ // AbstractMethodError on entry) are either at call sites or ++ // otherwise assume that stack unwinding will be initiated, so ++ // caller saved registers were assumed volatile in the compiler. ++ address generate_throw_exception(const char* name, ++ address runtime_entry, ++ bool restore_saved_exception_pc) { ++ // Information about frame layout at time of blocking runtime call. ++ // Note that we only have to preserve callee-saved registers since ++ // the compilers are responsible for supplying a continuation point ++ // if they expect all registers to be preserved. ++ enum layout { ++ thread_off, // last_java_sp ++ S7_off, // callee saved register sp + 1 ++ S6_off, // callee saved register sp + 2 ++ S5_off, // callee saved register sp + 3 ++ S4_off, // callee saved register sp + 4 ++ S3_off, // callee saved register sp + 5 ++ S2_off, // callee saved register sp + 6 ++ S1_off, // callee saved register sp + 7 ++ S0_off, // callee saved register sp + 8 ++ FP_off, ++ ret_address, ++ framesize ++ }; ++ ++ int insts_size = 2048; ++ int locs_size = 32; ++ ++ // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false, ++ // NULL, NULL, NULL, false, NULL, name, false); ++ CodeBuffer code (name , insts_size, locs_size); ++ OopMapSet* oop_maps = new OopMapSet(); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ ++ address start = __ pc(); ++ ++ // This is an inlined and slightly modified version of call_VM ++ // which has the ability to fetch the return PC out of ++ // thread-local storage and also sets up last_Java_sp slightly ++ // differently than the real call_VM ++#ifndef OPT_THREAD ++ Register java_thread = TREG; ++ __ get_thread(java_thread); ++#else ++ Register java_thread = TREG; ++#endif ++ if (restore_saved_exception_pc) { ++ __ ld_d(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); ++ } ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ addi_d(SP, SP, (-1) * (framesize-2) * wordSize); // prolog ++ __ st_d(S0, SP, S0_off * wordSize); ++ __ st_d(S1, SP, S1_off * wordSize); ++ __ st_d(S2, SP, S2_off * wordSize); ++ __ st_d(S3, SP, S3_off * wordSize); ++ __ st_d(S4, SP, S4_off * wordSize); ++ __ st_d(S5, SP, S5_off * wordSize); ++ __ st_d(S6, SP, S6_off * wordSize); ++ __ st_d(S7, SP, S7_off * wordSize); ++ ++ int frame_complete = __ pc() - start; ++ // push java thread (becomes first argument of C function) ++ __ st_d(java_thread, SP, thread_off * wordSize); ++ if (java_thread != A0) ++ __ move(A0, java_thread); ++ ++ // Set up last_Java_sp and last_Java_fp ++ Label before_call; ++ address the_pc = __ pc(); ++ __ bind(before_call); ++ __ set_last_Java_frame(java_thread, SP, FP, before_call); ++ // Align stack ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ bstrins_d(SP, R0, 3, 0); ++ ++ // Call runtime ++ // TODO: confirm reloc ++ __ call(runtime_entry, relocInfo::runtime_call_type); ++ // Generate oop map ++ OopMap* map = new OopMap(framesize, 0); ++ oop_maps->add_gc_map(the_pc - start, map); ++ ++ // restore the thread (cannot use the pushed argument since arguments ++ // may be overwritten by C code generated by an optimizing compiler); ++ // however can use the register value directly if it is callee saved. ++#ifndef OPT_THREAD ++ __ get_thread(java_thread); ++#endif ++ ++ __ ld_d(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ reset_last_Java_frame(java_thread, true); ++ ++ // Restore callee save registers. This must be done after resetting the Java frame ++ __ ld_d(S0, SP, S0_off * wordSize); ++ __ ld_d(S1, SP, S1_off * wordSize); ++ __ ld_d(S2, SP, S2_off * wordSize); ++ __ ld_d(S3, SP, S3_off * wordSize); ++ __ ld_d(S4, SP, S4_off * wordSize); ++ __ ld_d(S5, SP, S5_off * wordSize); ++ __ ld_d(S6, SP, S6_off * wordSize); ++ __ ld_d(S7, SP, S7_off * wordSize); ++ ++ // discard arguments ++ __ addi_d(SP, FP, -2 * wordSize); // epilog ++ __ pop(FP); ++ // check for pending exceptions ++#ifdef ASSERT ++ Label L; ++ __ ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, L); ++ __ should_not_reach_here(); ++ __ bind(L); ++#endif //ASSERT ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, ++ &code, ++ frame_complete, ++ framesize, ++ oop_maps, false); ++ return stub->entry_point(); ++ } ++ ++ class MontgomeryMultiplyGenerator : public MacroAssembler { ++ ++ Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Rlen2, Ra, Rb, Rm, ++ Rn, Iam, Ibn, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj; ++ ++ bool _squaring; ++ ++ public: ++ MontgomeryMultiplyGenerator (Assembler *as, bool squaring) ++ : MacroAssembler(as->code()), _squaring(squaring) { ++ ++ // Register allocation ++ ++ Register reg = A0; ++ Pa_base = reg; // Argument registers: ++ if (squaring) ++ Pb_base = Pa_base; ++ else ++ Pb_base = ++reg; ++ Pn_base = ++reg; ++ Rlen = ++reg; ++ inv = ++reg; ++ Rlen2 = inv; // Reuse inv ++ Pm_base = ++reg; ++ ++ // Working registers: ++ Ra = ++reg; // The current digit of a, b, n, and m. ++ Rb = ++reg; ++ Rm = ++reg; ++ Rn = ++reg; ++ ++ Iam = ++reg; // Index to the current/next digit of a, b, n, and m. ++ Ibn = ++reg; ++ ++ t0 = ++reg; // Three registers which form a ++ t1 = ++reg; // triple-precision accumuator. ++ t2 = ++reg; ++ ++ Ri = ++reg; // Inner and outer loop indexes. ++ Rj = ++reg; ++ ++ if (squaring) { ++ Rhi_ab = ++reg; // Product registers: low and high parts ++ reg = S0; ++ Rlo_ab = ++reg; // of a*b and m*n. ++ } else { ++ reg = S0; ++ Rhi_ab = reg; // Product registers: low and high parts ++ Rlo_ab = ++reg; // of a*b and m*n. ++ } ++ ++ Rhi_mn = ++reg; ++ Rlo_mn = ++reg; ++ } ++ ++ private: ++ void enter() { ++ addi_d(SP, SP, -6 * wordSize); ++ st_d(FP, SP, 0 * wordSize); ++ move(FP, SP); ++ } ++ ++ void leave() { ++ addi_d(T0, FP, 6 * wordSize); ++ ld_d(FP, FP, 0 * wordSize); ++ move(SP, T0); ++ } ++ ++ void save_regs() { ++ if (!_squaring) ++ st_d(Rhi_ab, FP, 5 * wordSize); ++ st_d(Rlo_ab, FP, 4 * wordSize); ++ st_d(Rhi_mn, FP, 3 * wordSize); ++ st_d(Rlo_mn, FP, 2 * wordSize); ++ st_d(Pm_base, FP, 1 * wordSize); ++ } ++ ++ void restore_regs() { ++ if (!_squaring) ++ ld_d(Rhi_ab, FP, 5 * wordSize); ++ ld_d(Rlo_ab, FP, 4 * wordSize); ++ ld_d(Rhi_mn, FP, 3 * wordSize); ++ ld_d(Rlo_mn, FP, 2 * wordSize); ++ ld_d(Pm_base, FP, 1 * wordSize); ++ } ++ ++ template ++ void unroll_2(Register count, T block, Register tmp) { ++ Label loop, end, odd; ++ andi(tmp, count, 1); ++ bnez(tmp, odd); ++ beqz(count, end); ++ align(16); ++ bind(loop); ++ (this->*block)(); ++ bind(odd); ++ (this->*block)(); ++ addi_w(count, count, -2); ++ blt(R0, count, loop); ++ bind(end); ++ } ++ ++ template ++ void unroll_2(Register count, T block, Register d, Register s, Register tmp) { ++ Label loop, end, odd; ++ andi(tmp, count, 1); ++ bnez(tmp, odd); ++ beqz(count, end); ++ align(16); ++ bind(loop); ++ (this->*block)(d, s, tmp); ++ bind(odd); ++ (this->*block)(d, s, tmp); ++ addi_w(count, count, -2); ++ blt(R0, count, loop); ++ bind(end); ++ } ++ ++ void acc(Register Rhi, Register Rlo, ++ Register t0, Register t1, Register t2, Register t, Register c) { ++ add_d(t0, t0, Rlo); ++ OR(t, t1, Rhi); ++ sltu(c, t0, Rlo); ++ add_d(t1, t1, Rhi); ++ add_d(t1, t1, c); ++ sltu(c, t1, t); ++ add_d(t2, t2, c); ++ } ++ ++ void pre1(Register i) { ++ block_comment("pre1"); ++ // Iam = 0; ++ // Ibn = i; ++ ++ slli_w(Ibn, i, LogBytesPerWord); ++ ++ // Ra = Pa_base[Iam]; ++ // Rb = Pb_base[Ibn]; ++ // Rm = Pm_base[Iam]; ++ // Rn = Pn_base[Ibn]; ++ ++ ld_d(Ra, Pa_base, 0); ++ ldx_d(Rb, Pb_base, Ibn); ++ ld_d(Rm, Pm_base, 0); ++ ldx_d(Rn, Pn_base, Ibn); ++ ++ move(Iam, R0); ++ ++ // Zero the m*n result. ++ move(Rhi_mn, R0); ++ move(Rlo_mn, R0); ++ } ++ ++ // The core multiply-accumulate step of a Montgomery ++ // multiplication. The idea is to schedule operations as a ++ // pipeline so that instructions with long latencies (loads and ++ // multiplies) have time to complete before their results are ++ // used. This most benefits in-order implementations of the ++ // architecture but out-of-order ones also benefit. ++ void step() { ++ block_comment("step"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ addi_d(Iam, Iam, wordSize); ++ addi_d(Ibn, Ibn, -wordSize); ++ mul_d(Rlo_ab, Ra, Rb); ++ mulh_du(Rhi_ab, Ra, Rb); ++ acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n from the ++ // previous iteration. ++ ldx_d(Ra, Pa_base, Iam); ++ ldx_d(Rb, Pb_base, Ibn); ++ ++ // MACC(Rm, Rn, t0, t1, t2); ++ // Rm = Pm_base[Iam]; ++ // Rn = Pn_base[Ibn]; ++ mul_d(Rlo_mn, Rm, Rn); ++ mulh_du(Rhi_mn, Rm, Rn); ++ acc(Rhi_ab, Rlo_ab, t0, t1, t2, Rm, Rn); ++ ldx_d(Rm, Pm_base, Iam); ++ ldx_d(Rn, Pn_base, Ibn); ++ } ++ ++ void post1() { ++ block_comment("post1"); ++ ++ // MACC(Ra, Rb, t0, t1, t2); ++ mul_d(Rlo_ab, Ra, Rb); ++ mulh_du(Rhi_ab, Ra, Rb); ++ acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n ++ acc(Rhi_ab, Rlo_ab, t0, t1, t2, Ra, Rb); ++ ++ // Pm_base[Iam] = Rm = t0 * inv; ++ mul_d(Rm, t0, inv); ++ stx_d(Rm, Pm_base, Iam); ++ ++ // MACC(Rm, Rn, t0, t1, t2); ++ // t0 = t1; t1 = t2; t2 = 0; ++ mulh_du(Rhi_mn, Rm, Rn); ++ ++#ifndef PRODUCT ++ // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply"); ++ { ++ mul_d(Rlo_mn, Rm, Rn); ++ add_d(Rlo_mn, t0, Rlo_mn); ++ Label ok; ++ beqz(Rlo_mn, ok); { ++ stop("broken Montgomery multiply"); ++ } bind(ok); ++ } ++#endif ++ ++ // We have very carefully set things up so that ++ // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate ++ // the lower half of Rm * Rn because we know the result already: ++ // it must be -t0. t0 + (-t0) must generate a carry iff ++ // t0 != 0. So, rather than do a mul and an adds we just set ++ // the carry flag iff t0 is nonzero. ++ // ++ // mul_d(Rlo_mn, Rm, Rn); ++ // add_d(t0, t0, Rlo_mn); ++ OR(Ra, t1, Rhi_mn); ++ sltu(Rb, R0, t0); ++ add_d(t0, t1, Rhi_mn); ++ add_d(t0, t0, Rb); ++ sltu(Rb, t0, Ra); ++ add_d(t1, t2, Rb); ++ move(t2, R0); ++ } ++ ++ void pre2(Register i, Register len) { ++ block_comment("pre2"); ++ ++ // Rj == i-len ++ sub_w(Rj, i, len); ++ ++ // Iam = i - len; ++ // Ibn = len; ++ slli_w(Iam, Rj, LogBytesPerWord); ++ slli_w(Ibn, len, LogBytesPerWord); ++ ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ addi_d(Iam, Iam, wordSize); ++ addi_d(Ibn, Ibn, -wordSize); ++ ++ ldx_d(Ra, Pa_base, Iam); ++ ldx_d(Rb, Pb_base, Ibn); ++ ldx_d(Rm, Pm_base, Iam); ++ ldx_d(Rn, Pn_base, Ibn); ++ ++ move(Rhi_mn, R0); ++ move(Rlo_mn, R0); ++ } ++ ++ void post2(Register i, Register len) { ++ block_comment("post2"); ++ ++ sub_w(Rj, i, len); ++ alsl_d(Iam, Rj, Pm_base, LogBytesPerWord - 1); ++ ++ add_d(t0, t0, Rlo_mn); // The pending m*n, low part ++ ++ // As soon as we know the least significant digit of our result, ++ // store it. ++ // Pm_base[i-len] = t0; ++ st_d(t0, Iam, 0); ++ ++ // t0 = t1; t1 = t2; t2 = 0; ++ OR(Ra, t1, Rhi_mn); ++ sltu(Rb, t0, Rlo_mn); ++ add_d(t0, t1, Rhi_mn); // The pending m*n, high part ++ add_d(t0, t0, Rb); ++ sltu(Rb, t0, Ra); ++ add_d(t1, t2, Rb); ++ move(t2, R0); ++ } ++ ++ // A carry in t0 after Montgomery multiplication means that we ++ // should subtract multiples of n from our result in m. We'll ++ // keep doing that until there is no carry. ++ void normalize(Register len) { ++ block_comment("normalize"); ++ // while (t0) ++ // t0 = sub(Pm_base, Pn_base, t0, len); ++ Label loop, post, again; ++ Register cnt = t1, i = t2, b = Ra, t = Rb; // Re-use registers; we're done with them now ++ beqz(t0, post); { ++ bind(again); { ++ move(i, R0); ++ move(b, R0); ++ slli_w(cnt, len, LogBytesPerWord); ++ align(16); ++ bind(loop); { ++ ldx_d(Rm, Pm_base, i); ++ ldx_d(Rn, Pn_base, i); ++ sltu(t, Rm, b); ++ sub_d(Rm, Rm, b); ++ sltu(b, Rm, Rn); ++ sub_d(Rm, Rm, Rn); ++ OR(b, b, t); ++ stx_d(Rm, Pm_base, i); ++ addi_w(i, i, BytesPerWord); ++ } blt(i, cnt, loop); ++ sub_d(t0, t0, b); ++ } bnez(t0, again); ++ } bind(post); ++ } ++ ++ // Move memory at s to d, reversing words. ++ // Increments d to end of copied memory ++ // Destroys tmp1, tmp2, tmp3 ++ // Preserves len ++ // Leaves s pointing to the address which was in d at start ++ void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) { ++ assert(tmp1 < S0 && tmp2 < S0, "register corruption"); ++ ++ alsl_d(s, len, s, LogBytesPerWord - 1); ++ move(tmp1, len); ++ unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2); ++ slli_w(s, len, LogBytesPerWord); ++ sub_d(s, d, s); ++ } ++ ++ // where ++ void reverse1(Register d, Register s, Register tmp) { ++ ld_d(tmp, s, -wordSize); ++ addi_d(s, s, -wordSize); ++ addi_d(d, d, wordSize); ++ rotri_d(tmp, tmp, 32); ++ st_d(tmp, d, -wordSize); ++ } ++ ++ public: ++ /** ++ * Fast Montgomery multiplication. The derivation of the ++ * algorithm is in A Cryptographic Library for the Motorola ++ * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. ++ * ++ * Arguments: ++ * ++ * Inputs for multiplication: ++ * A0 - int array elements a ++ * A1 - int array elements b ++ * A2 - int array elements n (the modulus) ++ * A3 - int length ++ * A4 - int inv ++ * A5 - int array elements m (the result) ++ * ++ * Inputs for squaring: ++ * A0 - int array elements a ++ * A1 - int array elements n (the modulus) ++ * A2 - int length ++ * A3 - int inv ++ * A4 - int array elements m (the result) ++ * ++ */ ++ address generate_multiply() { ++ Label argh, nothing; ++ bind(argh); ++ stop("MontgomeryMultiply total_allocation must be <= 8192"); ++ ++ align(CodeEntryAlignment); ++ address entry = pc(); ++ ++ beqz(Rlen, nothing); ++ ++ enter(); ++ ++ // Make room. ++ sltui(Ra, Rlen, 513); ++ beqz(Ra, argh); ++ slli_w(Ra, Rlen, exact_log2(4 * sizeof (jint))); ++ sub_d(Ra, SP, Ra); ++ ++ srli_w(Rlen, Rlen, 1); // length in longwords = len/2 ++ ++ { ++ // Copy input args, reversing as we go. We use Ra as a ++ // temporary variable. ++ reverse(Ra, Pa_base, Rlen, t0, t1); ++ if (!_squaring) ++ reverse(Ra, Pb_base, Rlen, t0, t1); ++ reverse(Ra, Pn_base, Rlen, t0, t1); ++ } ++ ++ // Push all call-saved registers and also Pm_base which we'll need ++ // at the end. ++ save_regs(); ++ ++#ifndef PRODUCT ++ // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); ++ { ++ ld_d(Rn, Pn_base, 0); ++ li(t0, -1); ++ mul_d(Rlo_mn, Rn, inv); ++ Label ok; ++ beq(Rlo_mn, t0, ok); { ++ stop("broken inverse in Montgomery multiply"); ++ } bind(ok); ++ } ++#endif ++ ++ move(Pm_base, Ra); ++ ++ move(t0, R0); ++ move(t1, R0); ++ move(t2, R0); ++ ++ block_comment("for (int i = 0; i < len; i++) {"); ++ move(Ri, R0); { ++ Label loop, end; ++ bge(Ri, Rlen, end); ++ ++ bind(loop); ++ pre1(Ri); ++ ++ block_comment(" for (j = i; j; j--) {"); { ++ move(Rj, Ri); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab); ++ } block_comment(" } // j"); ++ ++ post1(); ++ addi_w(Ri, Ri, 1); ++ blt(Ri, Rlen, loop); ++ bind(end); ++ block_comment("} // i"); ++ } ++ ++ block_comment("for (int i = len; i < 2*len; i++) {"); ++ move(Ri, Rlen); ++ slli_w(Rlen2, Rlen, 1); { ++ Label loop, end; ++ bge(Ri, Rlen2, end); ++ ++ bind(loop); ++ pre2(Ri, Rlen); ++ ++ block_comment(" for (j = len*2-i-1; j; j--) {"); { ++ sub_w(Rj, Rlen2, Ri); ++ addi_w(Rj, Rj, -1); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab); ++ } block_comment(" } // j"); ++ ++ post2(Ri, Rlen); ++ addi_w(Ri, Ri, 1); ++ blt(Ri, Rlen2, loop); ++ bind(end); ++ } ++ block_comment("} // i"); ++ ++ normalize(Rlen); ++ ++ move(Ra, Pm_base); // Save Pm_base in Ra ++ restore_regs(); // Restore caller's Pm_base ++ ++ // Copy our result into caller's Pm_base ++ reverse(Pm_base, Ra, Rlen, t0, t1); ++ ++ leave(); ++ bind(nothing); ++ jr(RA); ++ ++ return entry; ++ } ++ // In C, approximately: ++ ++ // void ++ // montgomery_multiply(unsigned long Pa_base[], unsigned long Pb_base[], ++ // unsigned long Pn_base[], unsigned long Pm_base[], ++ // unsigned long inv, int len) { ++ // unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator ++ // unsigned long Ra, Rb, Rn, Rm; ++ // int i, Iam, Ibn; ++ ++ // assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply"); ++ ++ // for (i = 0; i < len; i++) { ++ // int j; ++ ++ // Iam = 0; ++ // Ibn = i; ++ ++ // Ra = Pa_base[Iam]; ++ // Rb = Pb_base[Iam]; ++ // Rm = Pm_base[Ibn]; ++ // Rn = Pn_base[Ibn]; ++ ++ // int iters = i; ++ // for (j = 0; iters--; j++) { ++ // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Ra = Pa_base[++Iam]; ++ // Rb = pb_base[--Ibn]; ++ // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); ++ // MACC(Rm, Rn, t0, t1, t2); ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ // } ++ ++ // assert(Ra == Pa_base[i] && Rb == Pb_base[0], "must be"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Pm_base[Iam] = Rm = t0 * inv; ++ // assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be"); ++ // MACC(Rm, Rn, t0, t1, t2); ++ ++ // assert(t0 == 0, "broken Montgomery multiply"); ++ ++ // t0 = t1; t1 = t2; t2 = 0; ++ // } ++ ++ // for (i = len; i < 2*len; i++) { ++ // int j; ++ ++ // Iam = i - len; ++ // Ibn = len; ++ ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ ++ // int iters = len*2-i-1; ++ // for (j = i-len+1; iters--; j++) { ++ // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); ++ // MACC(Rm, Rn, t0, t1, t2); ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ // } ++ ++ // Pm_base[i-len] = t0; ++ // t0 = t1; t1 = t2; t2 = 0; ++ // } ++ ++ // while (t0) ++ // t0 = sub(Pm_base, Pn_base, t0, len); ++ // } ++ }; ++ ++ // Initialization ++ void generate_initial() { ++ // Generates all stubs and initializes the entry points ++ ++ //------------------------------------------------------------- ++ //----------------------------------------------------------- ++ // entry points that exist in all platforms ++ // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller ++ // than the disadvantage of having a much more complicated generator structure. ++ // See also comment in stubRoutines.hpp. ++ StubRoutines::_forward_exception_entry = generate_forward_exception(); ++ StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); ++ // is referenced by megamorphic call ++ StubRoutines::_catch_exception_entry = generate_catch_exception(); ++ ++ StubRoutines::_throw_StackOverflowError_entry = ++ generate_throw_exception("StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), ++ false); ++ StubRoutines::_throw_delayed_StackOverflowError_entry = ++ generate_throw_exception("delayed StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError), ++ false); ++ ++ if (UseCRC32Intrinsics) { ++ // set table address before stub generation which use it ++ StubRoutines::_crc_table_adr = (address)StubRoutines::la::_crc_table; ++ StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); ++ } ++ ++ if (UseCRC32CIntrinsics) { ++ StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(); ++ } ++ } ++ ++ void generate_all() { ++ // Generates all stubs and initializes the entry points ++ ++ // These entry points require SharedInfo::stack0 to be set up in ++ // non-core builds and need to be relocatable, so they each ++ // fabricate a RuntimeStub internally. ++ StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); ++ ++ StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false); ++ ++ StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); ++ ++ StubRoutines::la::_vector_iota_indices = generate_iota_indices("iota_indices"); ++ ++ // entry points that are platform specific ++ ++ // support for verify_oop (must happen after universe_init) ++ StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); ++#ifndef CORE ++ // arraycopy stubs used by compilers ++ generate_arraycopy_stubs(); ++#endif ++ ++ if (UseLSX && vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) { ++ StubRoutines::_dsin = generate_dsin_dcos(/* isCos = */ false); ++ } ++ ++ if (UseLSX && vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) { ++ StubRoutines::_dcos = generate_dsin_dcos(/* isCos = */ true); ++ } ++ ++#ifdef COMPILER2 ++ if (UseMulAddIntrinsic) { ++ StubRoutines::_mulAdd = generate_mulAdd(); ++ } ++ ++ if (UseMontgomeryMultiplyIntrinsic) { ++ StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); ++ MontgomeryMultiplyGenerator g(_masm, false /* squaring */); ++ StubRoutines::_montgomeryMultiply = g.generate_multiply(); ++ } ++ ++ if (UseMontgomerySquareIntrinsic) { ++ StubCodeMark mark(this, "StubRoutines", "montgomerySquare"); ++ MontgomeryMultiplyGenerator g(_masm, true /* squaring */); ++ // We use generate_multiply() rather than generate_square() ++ // because it's faster for the sizes of modulus we care about. ++ StubRoutines::_montgomerySquare = g.generate_multiply(); ++ } ++#endif ++ ++ if (UseAESIntrinsics) { ++ StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(false); ++ StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(false); ++ StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_aescrypt_encryptBlock(true); ++ StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_aescrypt_decryptBlock(true); ++ } ++ ++ if (UseMD5Intrinsics) { ++ generate_md5_implCompress("md5_implCompress", StubRoutines::_md5_implCompress, StubRoutines::_md5_implCompressMB); ++ } ++ ++ if (UseSHA1Intrinsics) { ++ generate_sha1_implCompress("sha1_implCompress", StubRoutines::_sha1_implCompress, StubRoutines::_sha1_implCompressMB); ++ } ++ ++ if (UseSHA256Intrinsics) { ++ generate_sha256_implCompress("sha256_implCompress", StubRoutines::_sha256_implCompress, StubRoutines::_sha256_implCompressMB); ++ } ++ ++ generate_string_indexof_stubs(); ++ ++ BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); ++ if (bs_nm != NULL) { ++ StubRoutines::la::_method_entry_barrier = generate_method_entry_barrier(); ++ } ++ } ++ ++ public: ++ StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { ++ if (all) { ++ generate_all(); ++ } else { ++ generate_initial(); ++ } ++ } ++}; // end class declaration ++ ++#define UCM_TABLE_MAX_ENTRIES 7 ++void StubGenerator_generate(CodeBuffer* code, bool all) { ++ if (UnsafeCopyMemory::_table == NULL) { ++ UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); ++ } ++ StubGenerator g(code, all); ++} +diff --git a/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp +new file mode 100644 +index 00000000000..20f2a14afcd +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp +@@ -0,0 +1,89 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP ++#define CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP ++ ++// This file holds the platform specific parts of the StubRoutines ++// definition. See stubRoutines.hpp for a description on how to ++// extend it. ++ ++static bool returns_to_call_stub(address return_pc){ ++ return return_pc == _call_stub_return_address||return_pc == la::get_call_stub_compiled_return(); ++} ++ ++enum platform_dependent_constants { ++ code_size1 = 20000, // simply increase if too small (assembler will crash if too small) ++ code_size2 = 60000 // simply increase if too small (assembler will crash if too small) ++}; ++ ++class la { ++ friend class StubGenerator; ++ friend class VMStructs; ++ private: ++ // If we call compiled code directly from the call stub we will ++ // need to adjust the return back to the call stub to a specialized ++ // piece of code that can handle compiled results and cleaning the fpu ++ // stack. The variable holds that location. ++ static address _call_stub_compiled_return; ++ static address _vector_iota_indices; ++ static juint _crc_table[]; ++ static address _method_entry_barrier; ++ // begin trigonometric tables block. See comments in .cpp file ++ static juint _npio2_hw[]; ++ static jdouble _two_over_pi[]; ++ static jdouble _pio2[]; ++ static jdouble _dsin_coef[]; ++ static jdouble _dcos_coef[]; ++ // end trigonometric tables block ++ ++ static address _string_indexof_linear_ll; ++ static address _string_indexof_linear_uu; ++ static address _string_indexof_linear_ul; ++ ++public: ++ // Call back points for traps in compiled code ++ static address get_call_stub_compiled_return() { return _call_stub_compiled_return; } ++ static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; } ++ static address vector_iota_indices() { return _vector_iota_indices; } ++ ++ static address method_entry_barrier() { ++ return _method_entry_barrier; ++ } ++ ++ static address string_indexof_linear_ul() { ++ return _string_indexof_linear_ul; ++ } ++ ++ static address string_indexof_linear_ll() { ++ return _string_indexof_linear_ll; ++ } ++ ++ static address string_indexof_linear_uu() { ++ return _string_indexof_linear_uu; ++ } ++}; ++ ++#endif // CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP +diff --git a/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp +new file mode 100644 +index 00000000000..53ded54ae6c +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp +@@ -0,0 +1,183 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++ ++// a description of how to extend it, see the stubRoutines.hpp file. ++ ++//find the last fp value ++address StubRoutines::la::_call_stub_compiled_return = NULL; ++address StubRoutines::la::_method_entry_barrier = NULL; ++address StubRoutines::la::_vector_iota_indices = NULL; ++address StubRoutines::la::_string_indexof_linear_ll = NULL; ++address StubRoutines::la::_string_indexof_linear_uu = NULL; ++address StubRoutines::la::_string_indexof_linear_ul = NULL; ++ ++/** ++ * crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h ++ */ ++juint StubRoutines::la::_crc_table[] = ++{ ++ 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, ++ 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, ++ 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, ++ 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, ++ 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, ++ 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, ++ 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, ++ 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, ++ 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, ++ 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, ++ 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, ++ 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, ++ 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, ++ 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, ++ 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, ++ 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, ++ 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, ++ 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, ++ 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, ++ 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, ++ 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, ++ 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, ++ 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, ++ 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, ++ 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, ++ 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, ++ 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, ++ 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, ++ 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, ++ 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, ++ 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, ++ 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, ++ 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, ++ 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, ++ 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, ++ 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, ++ 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, ++ 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, ++ 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, ++ 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, ++ 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, ++ 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, ++ 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, ++ 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, ++ 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, ++ 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, ++ 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, ++ 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, ++ 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, ++ 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, ++ 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, ++ 0x2d02ef8dUL ++}; ++ ++ATTRIBUTE_ALIGNED(64) juint StubRoutines::la::_npio2_hw[] = { ++ // first, various coefficient values: 0.5, invpio2, pio2_1, pio2_1t, pio2_2, ++ // pio2_2t, pio2_3, pio2_3t ++ // This is a small optimization wich keeping double[8] values in int[] table ++ // to have less address calculation instructions ++ // ++ // invpio2: 53 bits of 2/pi (enough for cases when trigonometric argument is small) ++ // pio2_1: first 33 bit of pi/2 ++ // pio2_1t: pi/2 - pio2_1 ++ // pio2_2: second 33 bit of pi/2 ++ // pio2_2t: pi/2 - (pio2_1+pio2_2) ++ // pio2_3: third 33 bit of pi/2 ++ // pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3) ++ 0x00000000, 0x3fe00000, // 0.5 ++ 0x6DC9C883, 0x3FE45F30, // invpio2 = 6.36619772367581382433e-01 ++ 0x54400000, 0x3FF921FB, // pio2_1 = 1.57079632673412561417e+00 ++ 0x1A626331, 0x3DD0B461, // pio2_1t = 6.07710050650619224932e-11 ++ 0x1A600000, 0x3DD0B461, // pio2_2 = 6.07710050630396597660e-11 ++ 0x2E037073, 0x3BA3198A, // pio2_2t = 2.02226624879595063154e-21 ++ 0x2E000000, 0x3BA3198A, // pio2_3 = 2.02226624871116645580e-21 ++ 0x252049C1, 0x397B839A, // pio2_3t = 8.47842766036889956997e-32 ++ // now, npio2_hw itself ++ 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C, ++ 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C, ++ 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A, ++ 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C, ++ 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB, ++ 0x404858EB, 0x404921FB ++}; ++ ++// Coefficients for sin(x) polynomial approximation: S1..S6. ++// See kernel_sin comments in macroAssembler_loongarch64_trig.cpp for details ++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_dsin_coef[] = { ++ -1.66666666666666324348e-01, // 0xBFC5555555555549 ++ 8.33333333332248946124e-03, // 0x3F8111111110F8A6 ++ -1.98412698298579493134e-04, // 0xBF2A01A019C161D5 ++ 2.75573137070700676789e-06, // 0x3EC71DE357B1FE7D ++ -2.50507602534068634195e-08, // 0xBE5AE5E68A2B9CEB ++ 1.58969099521155010221e-10 // 0x3DE5D93A5ACFD57C ++}; ++ ++// Coefficients for cos(x) polynomial approximation: C1..C6. ++// See kernel_cos comments in macroAssembler_loongarch64_trig.cpp for details ++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_dcos_coef[] = { ++ 4.16666666666666019037e-02, // c0x3FA555555555554C ++ -1.38888888888741095749e-03, // 0xBF56C16C16C15177 ++ 2.48015872894767294178e-05, // 0x3EFA01A019CB1590 ++ -2.75573143513906633035e-07, // 0xBE927E4F809C52AD ++ 2.08757232129817482790e-09, // 0x3E21EE9EBDB4B1C4 ++ -1.13596475577881948265e-11 // 0xBDA8FAE9BE8838D4 ++}; ++ ++// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi. ++// Used in cases of very large argument. 396 hex digits is enough to support ++// required precision. ++// Converted to double to avoid unnecessary conversion in code ++// NOTE: table looks like original int table: {0xA2F983, 0x6E4E44,...} with ++// only (double) conversion added ++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_two_over_pi[] = { ++ (double)0xA2F983, (double)0x6E4E44, (double)0x1529FC, (double)0x2757D1, (double)0xF534DD, (double)0xC0DB62, ++ (double)0x95993C, (double)0x439041, (double)0xFE5163, (double)0xABDEBB, (double)0xC561B7, (double)0x246E3A, ++ (double)0x424DD2, (double)0xE00649, (double)0x2EEA09, (double)0xD1921C, (double)0xFE1DEB, (double)0x1CB129, ++ (double)0xA73EE8, (double)0x8235F5, (double)0x2EBB44, (double)0x84E99C, (double)0x7026B4, (double)0x5F7E41, ++ (double)0x3991D6, (double)0x398353, (double)0x39F49C, (double)0x845F8B, (double)0xBDF928, (double)0x3B1FF8, ++ (double)0x97FFDE, (double)0x05980F, (double)0xEF2F11, (double)0x8B5A0A, (double)0x6D1F6D, (double)0x367ECF, ++ (double)0x27CB09, (double)0xB74F46, (double)0x3F669E, (double)0x5FEA2D, (double)0x7527BA, (double)0xC7EBE5, ++ (double)0xF17B3D, (double)0x0739F7, (double)0x8A5292, (double)0xEA6BFB, (double)0x5FB11F, (double)0x8D5D08, ++ (double)0x560330, (double)0x46FC7B, (double)0x6BABF0, (double)0xCFBC20, (double)0x9AF436, (double)0x1DA9E3, ++ (double)0x91615E, (double)0xE61B08, (double)0x659985, (double)0x5F14A0, (double)0x68408D, (double)0xFFD880, ++ (double)0x4D7327, (double)0x310606, (double)0x1556CA, (double)0x73A8C9, (double)0x60E27B, (double)0xC08C6B, ++}; ++ ++// Pi over 2 value ++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_pio2[] = { ++ 1.57079625129699707031e+00, // 0x3FF921FB40000000 ++ 7.54978941586159635335e-08, // 0x3E74442D00000000 ++ 5.39030252995776476554e-15, // 0x3CF8469880000000 ++ 3.28200341580791294123e-22, // 0x3B78CC5160000000 ++ 1.27065575308067607349e-29, // 0x39F01B8380000000 ++ 1.22933308981111328932e-36, // 0x387A252040000000 ++ 2.73370053816464559624e-44, // 0x36E3822280000000 ++ 2.16741683877804819444e-51, // 0x3569F31D00000000 ++}; +diff --git a/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp +new file mode 100644 +index 00000000000..02af7c8ffa7 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp +@@ -0,0 +1,2197 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "classfile/javaClasses.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interpreter/bytecodeHistogram.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/templateInterpreterGenerator.hpp" ++#include "interpreter/templateTable.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/jniHandles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "runtime/timer.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/debug.hpp" ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++int TemplateInterpreter::InterpreterCodeSize = 500 * K; ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif ++ ++address TemplateInterpreterGenerator::generate_slow_signature_handler() { ++ address entry = __ pc(); ++ // Rmethod: method ++ // LVP: pointer to locals ++ // A3: first stack arg ++ __ move(A3, SP); ++ __ addi_d(SP, SP, -18 * wordSize); ++ __ st_d(RA, SP, 0); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::slow_signature_handler), ++ Rmethod, LVP, A3); ++ ++ // V0: result handler ++ ++ // Stack layout: ++ // ... ++ // 18 stack arg0 <--- old sp ++ // 17 floatReg arg7 ++ // ... ++ // 10 floatReg arg0 ++ // 9 float/double identifiers ++ // 8 IntReg arg7 ++ // ... ++ // 2 IntReg arg1 ++ // 1 aligned slot ++ // SP: 0 return address ++ ++ // Do FPU first so we can use A3 as temp ++ __ ld_d(A3, Address(SP, 9 * wordSize)); // float/double identifiers ++ ++ for (int i= 0; i < Argument::n_float_register_parameters; i++) { ++ FloatRegister floatreg = as_FloatRegister(i + FA0->encoding()); ++ Label isdouble, done; ++ ++ __ andi(AT, A3, 1 << i); ++ __ bnez(AT, isdouble); ++ __ fld_s(floatreg, SP, (10 + i) * wordSize); ++ __ b(done); ++ __ bind(isdouble); ++ __ fld_d(floatreg, SP, (10 + i) * wordSize); ++ __ bind(done); ++ } ++ ++ // A0 is for env. ++ // If the mothed is not static, A1 will be corrected in generate_native_entry. ++ for (int i= 1; i < Argument::n_register_parameters; i++) { ++ Register reg = as_Register(i + A0->encoding()); ++ __ ld_d(reg, SP, (1 + i) * wordSize); ++ } ++ ++ // A0/V0 contains the result from the call of ++ // InterpreterRuntime::slow_signature_handler so we don't touch it ++ // here. It will be loaded with the JNIEnv* later. ++ __ ld_d(RA, SP, 0); ++ __ addi_d(SP, SP, 18 * wordSize); ++ __ jr(RA); ++ return entry; ++} ++ ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.update(int crc, int b) ++ */ ++address TemplateInterpreterGenerator::generate_CRC32_update_entry() { ++ if (UseCRC32Intrinsics) { ++ address entry = __ pc(); ++ ++ // rmethod: Method* ++ // Rsender: senderSP must preserved for slow path ++ // SP: args ++ ++ Label slow_path; ++ // If we need a safepoint check, generate full interpreter entry. ++ __ safepoint_poll(slow_path, TREG, false /* at_return */, false /* acquire */, false /* in_nmethod */); ++ ++ // We don't generate local frame and don't align stack because ++ // we call stub code and there is no safepoint on this path. ++ ++ const Register crc = A0; // crc ++ const Register val = A1; // source java byte value ++ const Register tbl = A2; // scratch ++ ++ // Arguments are reversed on java expression stack ++ __ ld_w(val, SP, 0); // byte value ++ __ ld_w(crc, SP, wordSize); // Initial CRC ++ ++ __ li(tbl, (long)StubRoutines::crc_table_addr()); ++ ++ __ nor(crc, crc, R0); // ~crc ++ __ update_byte_crc32(crc, val, tbl); ++ __ nor(crc, crc, R0); // ~crc ++ ++ // restore caller SP ++ __ move(SP, Rsender); ++ __ jr(RA); ++ ++ // generate a vanilla native entry as the slow path ++ __ bind(slow_path); ++ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); ++ return entry; ++ } ++ return NULL; ++} ++ ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) ++ * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) ++ */ ++address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { ++ if (UseCRC32Intrinsics) { ++ address entry = __ pc(); ++ ++ // rmethod: Method* ++ // Rsender: senderSP must preserved for slow path ++ // SP: args ++ ++ Label slow_path; ++ // If we need a safepoint check, generate full interpreter entry. ++ __ safepoint_poll(slow_path, TREG, false /* at_return */, false /* acquire */, false /* in_nmethod */); ++ ++ // We don't generate local frame and don't align stack because ++ // we call stub code and there is no safepoint on this path. ++ ++ const Register crc = A0; // crc ++ const Register buf = A1; // source java byte array address ++ const Register len = A2; // length ++ const Register tmp = A3; ++ ++ const Register off = len; // offset (never overlaps with 'len') ++ ++ // Arguments are reversed on java expression stack ++ // Calculate address of start element ++ __ ld_w(off, SP, wordSize); // int offset ++ __ ld_d(buf, SP, 2 * wordSize); // byte[] buf | long buf ++ __ add_d(buf, buf, off); // + offset ++ if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { ++ __ ld_w(crc, SP, 4 * wordSize); // long crc ++ } else { ++ __ addi_d(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size ++ __ ld_w(crc, SP, 3 * wordSize); // long crc ++ } ++ ++ // Can now load 'len' since we're finished with 'off' ++ __ ld_w(len, SP, 0); // length ++ ++ __ kernel_crc32(crc, buf, len, tmp); ++ ++ // restore caller SP ++ __ move(SP, Rsender); ++ __ jr(RA); ++ ++ // generate a vanilla native entry as the slow path ++ __ bind(slow_path); ++ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); ++ return entry; ++ } ++ return NULL; ++} ++ ++/** ++ * Method entry for intrinsic-candidate (non-native) methods: ++ * int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) ++ * int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end) ++ * Unlike CRC32, CRC32C does not have any methods marked as native ++ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses ++ */ ++address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { ++ if (UseCRC32CIntrinsics) { ++ address entry = __ pc(); ++ ++ const Register crc = A0; // initial crc ++ const Register buf = A1; // source java byte array address ++ const Register len = A2; // len argument to the kernel ++ const Register tmp = A3; ++ ++ const Register end = len; // index of last element to process ++ const Register off = crc; // offset ++ ++ __ ld_w(end, SP, 0); // int end ++ __ ld_w(off, SP, wordSize); // int offset ++ __ sub_w(len, end, off); // calculate length ++ __ ld_d(buf, SP, 2 * wordSize); // byte[] buf | long buf ++ __ add_d(buf, buf, off); // + offset ++ if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { ++ __ ld_w(crc, SP, 4 * wordSize); // int crc ++ } else { ++ __ addi_d(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size ++ __ ld_w(crc, SP, 3 * wordSize); // int crc ++ } ++ ++ __ kernel_crc32c(crc, buf, len, tmp); ++ ++ // restore caller SP ++ __ move(SP, Rsender); ++ __ jr(RA); ++ ++ return entry; ++ } ++ return NULL; ++} ++ ++// ++// Various method entries ++// ++ ++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { ++ if (!InlineIntrinsics) return NULL; // Generate a vanilla entry ++ ++ // These don't need a safepoint check because they aren't virtually ++ // callable. We won't enter these intrinsics from compiled code. ++ // If in the future we added an intrinsic which was virtually callable ++ // we'd have to worry about how to safepoint so that this code is used. ++ ++ // mathematical functions inlined by compiler ++ // (interpreter must provide identical implementation ++ // in order to avoid monotonicity bugs when switching ++ // from interpreter to compiler in the middle of some ++ // computation) ++ // ++ // stack: ++ // [ arg ] <-- sp ++ // [ arg ] ++ // retaddr in ra ++ ++ address entry_point = NULL; ++ switch (kind) { ++ case Interpreter::java_lang_math_abs: ++ entry_point = __ pc(); ++ __ fld_d(FA0, SP, 0); ++ __ fabs_d(F0, FA0); ++ __ move(SP, Rsender); ++ break; ++ case Interpreter::java_lang_math_sqrt: ++ entry_point = __ pc(); ++ __ fld_d(FA0, SP, 0); ++ __ fsqrt_d(F0, FA0); ++ __ move(SP, Rsender); ++ break; ++ case Interpreter::java_lang_math_sin : ++ case Interpreter::java_lang_math_cos : ++ case Interpreter::java_lang_math_tan : ++ case Interpreter::java_lang_math_log : ++ case Interpreter::java_lang_math_log10 : ++ case Interpreter::java_lang_math_exp : ++ entry_point = __ pc(); ++ __ fld_d(FA0, SP, 0); ++ __ move(SP, Rsender); ++ __ movgr2fr_d(FS0, RA); ++ __ movgr2fr_d(FS1, SP); ++ __ bstrins_d(SP, R0, exact_log2(StackAlignmentInBytes) - 1, 0); ++ generate_transcendental_entry(kind, 1); ++ __ movfr2gr_d(SP, FS1); ++ __ movfr2gr_d(RA, FS0); ++ break; ++ case Interpreter::java_lang_math_pow : ++ entry_point = __ pc(); ++ __ fld_d(FA0, SP, 2 * Interpreter::stackElementSize); ++ __ fld_d(FA1, SP, 0); ++ __ move(SP, Rsender); ++ __ movgr2fr_d(FS0, RA); ++ __ movgr2fr_d(FS1, SP); ++ __ bstrins_d(SP, R0, exact_log2(StackAlignmentInBytes) - 1, 0); ++ generate_transcendental_entry(kind, 2); ++ __ movfr2gr_d(SP, FS1); ++ __ movfr2gr_d(RA, FS0); ++ break; ++ case Interpreter::java_lang_math_fmaD : ++ if (UseFMA) { ++ entry_point = __ pc(); ++ __ fld_d(FA0, SP, 4 * Interpreter::stackElementSize); ++ __ fld_d(FA1, SP, 2 * Interpreter::stackElementSize); ++ __ fld_d(FA2, SP, 0); ++ __ fmadd_d(F0, FA0, FA1, FA2); ++ __ move(SP, Rsender); ++ } ++ break; ++ case Interpreter::java_lang_math_fmaF : ++ if (UseFMA) { ++ entry_point = __ pc(); ++ __ fld_s(FA0, SP, 2 * Interpreter::stackElementSize); ++ __ fld_s(FA1, SP, Interpreter::stackElementSize); ++ __ fld_s(FA2, SP, 0); ++ __ fmadd_s(F0, FA0, FA1, FA2); ++ __ move(SP, Rsender); ++ } ++ break; ++ default: ++ ; ++ } ++ if (entry_point) { ++ __ jr(RA); ++ } ++ ++ return entry_point; ++} ++ ++ // double trigonometrics and transcendentals ++ // static jdouble dsin(jdouble x); ++ // static jdouble dcos(jdouble x); ++ // static jdouble dtan(jdouble x); ++ // static jdouble dlog(jdouble x); ++ // static jdouble dlog10(jdouble x); ++ // static jdouble dexp(jdouble x); ++ // static jdouble dpow(jdouble x, jdouble y); ++ ++void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs) { ++ address fn; ++ switch (kind) { ++ case Interpreter::java_lang_math_sin : ++ if (StubRoutines::dsin() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin()); ++ } ++ break; ++ case Interpreter::java_lang_math_cos : ++ if (StubRoutines::dcos() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos()); ++ } ++ break; ++ case Interpreter::java_lang_math_tan : ++ if (StubRoutines::dtan() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan()); ++ } ++ break; ++ case Interpreter::java_lang_math_log : ++ if (StubRoutines::dlog() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog()); ++ } ++ break; ++ case Interpreter::java_lang_math_log10 : ++ if (StubRoutines::dlog10() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10()); ++ } ++ break; ++ case Interpreter::java_lang_math_exp : ++ if (StubRoutines::dexp() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp()); ++ } ++ break; ++ case Interpreter::java_lang_math_pow : ++ if (StubRoutines::dpow() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow()); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ fn = NULL; // unreachable ++ } ++ __ li(T4, fn); ++ __ jalr(T4); ++} ++ ++// Abstract method entry ++// Attempt to execute abstract method. Throw exception ++address TemplateInterpreterGenerator::generate_abstract_entry(void) { ++ ++ // Rmethod: Method* ++ // V0: receiver (unused) ++ // Rsender : sender 's sp ++ address entry_point = __ pc(); ++ ++ // abstract method entry ++ // throw exception ++ // adjust stack to what a normal return would do ++ __ empty_expression_stack(); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorWithMethod), Rmethod); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ return entry_point; ++} ++ ++ ++const int method_offset = frame::interpreter_frame_method_offset * wordSize; ++const int bci_offset = frame::interpreter_frame_bcp_offset * wordSize; ++const int locals_offset = frame::interpreter_frame_locals_offset * wordSize; ++ ++//----------------------------------------------------------------------------- ++ ++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { ++ address entry = __ pc(); ++ ++#ifdef ASSERT ++ { ++ Label L; ++ __ addi_d(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ sub_d(T1, T1, SP); // T1 = maximal sp for current fp ++ __ bge(T1, R0, L); // check if frame is complete ++ __ stop("interpreter frame not set up"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ // Restore bcp under the assumption that the current frame is still ++ // interpreted ++ __ restore_bcp(); ++ ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // throw exception ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() { ++ address entry = __ pc(); ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // ??? convention: expect array in register A1 ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ClassCastException_handler() { ++ address entry = __ pc(); ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException), FSR); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_exception_handler_common( ++ const char* name, const char* message, bool pass_oop) { ++ assert(!pass_oop || message == NULL, "either oop or message but not both"); ++ address entry = __ pc(); ++ ++ // expression stack must be empty before entering the VM if an exception happened ++ __ empty_expression_stack(); ++ // setup parameters ++ __ li(A1, (long)name); ++ if (pass_oop) { ++ __ call_VM(V0, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR); ++ } else { ++ __ li(A2, (long)message); ++ __ call_VM(V0, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2); ++ } ++ // throw exception ++ __ jmp(Interpreter::throw_exception_entry(), relocInfo::none); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { ++ ++ address entry = __ pc(); ++ // S8 be used in C2 ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++ // Restore stack bottom in case i2c adjusted stack ++ __ ld_d(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); ++ // and NULL it as marker that sp is now tos until next java call ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ __ restore_bcp(); ++ __ restore_locals(); ++ ++ // mdp: T8 ++ // ret: FSR ++ // tmp: T4 ++ if (state == atos) { ++ Register mdp = T8; ++ Register tmp = T4; ++ __ profile_return_type(mdp, FSR, tmp); ++ } ++ ++ ++ const Register cache = T4; ++ const Register index = T3; ++ __ get_cache_and_index_at_bcp(cache, index, 1, index_size); ++ ++ const Register flags = cache; ++ __ alsl_d(AT, index, cache, Address::times_ptr - 1); ++ __ ld_w(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask); ++ __ alsl_d(SP, flags, SP, Interpreter::logStackElementSize - 1); ++ ++ Register java_thread; ++#ifndef OPT_THREAD ++ java_thread = T4; ++ __ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ ++ __ check_and_handle_popframe(java_thread); ++ __ check_and_handle_earlyret(java_thread); ++ ++ __ dispatch_next(state, step); ++ ++ return entry; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, ++ int step, ++ address continuation) { ++ address entry = __ pc(); ++ // S8 be used in C2 ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++ // NULL last_sp until next java call ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ restore_bcp(); ++ __ restore_locals(); ++ ++#if INCLUDE_JVMCI ++ // Check if we need to take lock at entry of synchronized method. This can ++ // only occur on method entry so emit it only for vtos with step 0. ++ if (EnableJVMCI && state == vtos && step == 0) { ++ Label L; ++ __ ld_b(AT, Address(TREG, JavaThread::pending_monitorenter_offset())); ++ __ beqz(AT, L); ++ // Clear flag. ++ __ st_b(R0, Address(TREG, JavaThread::pending_monitorenter_offset())); ++ // Take lock. ++ lock_method(); ++ __ bind(L); ++ } else { ++#ifdef ASSERT ++ if (EnableJVMCI) { ++ Label L; ++ __ ld_b(AT, Address(TREG, JavaThread::pending_monitorenter_offset())); ++ __ beqz(AT, L); ++ __ stop("unexpected pending monitor in deopt entry"); ++ __ bind(L); ++ } ++#endif ++ } ++#endif ++ ++ // handle exceptions ++ { ++ Label L; ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ if (continuation == NULL) { ++ __ dispatch_next(state, step); ++ } else { ++ __ jump_to_entry(continuation); ++ } ++ return entry; ++} ++ ++int AbstractInterpreter::BasicType_as_index(BasicType type) { ++ int i = 0; ++ switch (type) { ++ case T_BOOLEAN: i = 0; break; ++ case T_CHAR : i = 1; break; ++ case T_BYTE : i = 2; break; ++ case T_SHORT : i = 3; break; ++ case T_INT : // fall through ++ case T_LONG : // fall through ++ case T_VOID : i = 4; break; ++ case T_FLOAT : i = 5; break; ++ case T_DOUBLE : i = 6; break; ++ case T_OBJECT : // fall through ++ case T_ARRAY : i = 7; break; ++ default : ShouldNotReachHere(); ++ } ++ assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, ++ "index out of bounds"); ++ return i; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_result_handler_for( ++ BasicType type) { ++ address entry = __ pc(); ++ switch (type) { ++ case T_BOOLEAN: __ c2bool(V0); break; ++ case T_CHAR : __ bstrpick_d(V0, V0, 15, 0); break; ++ case T_BYTE : __ sign_extend_byte (V0); break; ++ case T_SHORT : __ sign_extend_short(V0); break; ++ case T_INT : /* nothing to do */ break; ++ case T_FLOAT : /* nothing to do */ break; ++ case T_DOUBLE : /* nothing to do */ break; ++ case T_OBJECT : ++ { ++ __ ld_d(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ verify_oop(V0); // and verify it ++ } ++ break; ++ default : ShouldNotReachHere(); ++ } ++ __ jr(RA); // return from result handler ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_safept_entry_for( ++ TosState state, ++ address runtime_entry) { ++ address entry = __ pc(); ++ __ push(state); ++ __ call_VM(noreg, runtime_entry); ++ __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); ++ return entry; ++} ++ ++ ++ ++// Helpers for commoning out cases in the various type of method entries. ++// ++ ++ ++// increment invocation count & check for overflow ++// ++// Note: checking for negative value instead of overflow ++// so we have a 'sticky' overflow test ++// ++// Rmethod: method ++void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) { ++ Label done; ++ int increment = InvocationCounter::count_increment; ++ Label no_mdo; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld_d(T0, Address(Rmethod, Method::method_data_offset())); ++ __ beq(T0, R0, no_mdo); ++ // Increment counter in the MDO ++ const Address mdo_invocation_counter(T0, in_bytes(MethodData::invocation_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ const Address mask(T0, in_bytes(MethodData::invoke_mask_offset())); ++ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, T1, false, Assembler::zero, overflow); ++ __ b(done); ++ } ++ __ bind(no_mdo); ++ // Increment counter in MethodCounters ++ const Address invocation_counter(T0, ++ MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset()); ++ __ get_method_counters(Rmethod, T0, done); ++ const Address mask(T0, in_bytes(MethodCounters::invoke_mask_offset())); ++ __ increment_mask_and_jump(invocation_counter, increment, mask, T1, false, Assembler::zero, overflow); ++ __ bind(done); ++} ++ ++void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { ++ ++ // Asm interpreter on entry ++ // S7 - locals ++ // S0 - bcp ++ // Rmethod - method ++ // FP - interpreter frame ++ ++ // On return (i.e. jump to entry_point) ++ // Rmethod - method ++ // RA - return address of interpreter caller ++ // tos - the last parameter to Java method ++ // SP - sender_sp ++ ++ // the bcp is valid if and only if it's not null ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), R0); ++ __ ld_d(Rmethod, FP, method_offset); ++ // Preserve invariant that S0/S7 contain bcp/locals of sender frame ++ __ b_far(do_continue); ++} ++ ++// See if we've got enough room on the stack for locals plus overhead. ++// The expression stack grows down incrementally, so the normal guard ++// page mechanism will work for that. ++// ++// NOTE: Since the additional locals are also always pushed (wasn't ++// obvious in generate_method_entry) so the guard should work for them ++// too. ++// ++// Args: ++// T2: number of additional locals this frame needs (what we must check) ++// T0: Method* ++// ++void TemplateInterpreterGenerator::generate_stack_overflow_check(void) { ++ // see if we've got enough room on the stack for locals plus overhead. ++ // the expression stack grows down incrementally, so the normal guard ++ // page mechanism will work for that. ++ // ++ // Registers live on entry: ++ // ++ // T0: Method* ++ // T2: number of additional locals this frame needs (what we must check) ++ ++ // NOTE: since the additional locals are also always pushed (wasn't obvious in ++ // generate_method_entry) so the guard should work for them too. ++ // ++ ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++ // total overhead size: entry_size + (saved fp thru expr stack bottom). ++ // be sure to change this if you add/subtract anything to/from the overhead area ++ const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize) ++ + entry_size; ++ ++ const int page_size = os::vm_page_size(); ++ Label after_frame_check; ++ ++ // see if the frame is greater than one page in size. If so, ++ // then we need to verify there is enough stack space remaining ++ // for the additional locals. ++ __ li(AT, (page_size - overhead_size) / Interpreter::stackElementSize); ++ __ bge(AT, T2, after_frame_check); ++ ++ // compute sp as if this were going to be the last frame on ++ // the stack before the red zone ++#ifndef OPT_THREAD ++ Register thread = T1; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ++ // locals + overhead, in bytes ++ __ slli_d(T3, T2, Interpreter::logStackElementSize); ++ __ addi_d(T3, T3, overhead_size); // locals * 4 + overhead_size --> T3 ++ ++#ifdef ASSERT ++ Label stack_base_okay, stack_size_okay; ++ // verify that thread stack base is non-zero ++ __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset())); ++ __ bne(AT, R0, stack_base_okay); ++ __ stop("stack base is zero"); ++ __ bind(stack_base_okay); ++ // verify that thread stack size is non-zero ++ __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset())); ++ __ bne(AT, R0, stack_size_okay); ++ __ stop("stack size is zero"); ++ __ bind(stack_size_okay); ++#endif ++ ++ // Add stack base to locals and subtract stack size ++ __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT ++ __ add_d(T3, T3, AT); // locals * 4 + overhead_size + stack_base--> T3 ++ __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset())); // stack_size --> AT ++ __ sub_d(T3, T3, AT); // locals * 4 + overhead_size + stack_base - stack_size --> T3 ++ ++ // Use the bigger size for banging. ++ const int max_bang_size = (int)MAX2(StackOverflow::stack_shadow_zone_size(), StackOverflow::stack_guard_zone_size()); ++ ++ // add in the redzone and yellow size ++ __ li(AT, max_bang_size); ++ __ add_d(T3, T3, AT); ++ ++ // check against the current stack bottom ++ __ blt(T3, SP, after_frame_check); ++ ++ // Note: the restored frame is not necessarily interpreted. ++ // Use the shared runtime version of the StackOverflowError. ++ __ move(SP, Rsender); ++ assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); ++ __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type); ++ ++ // all done with frame size check ++ __ bind(after_frame_check); ++} ++ ++// Allocate monitor and lock method (asm interpreter) ++// Rmethod - Method* ++void TemplateInterpreterGenerator::lock_method(void) { ++ // synchronize method ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T0, T0, JVM_ACC_SYNCHRONIZED); ++ __ bne(T0, R0, L); ++ __ stop("method doesn't need synchronization"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ // get synchronization object ++ { ++ Label done; ++ __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T2, T0, JVM_ACC_STATIC); ++ __ ld_d(T0, LVP, Interpreter::local_offset_in_bytes(0)); ++ __ beq(T2, R0, done); ++ __ load_mirror(T0, Rmethod, T4); ++ __ bind(done); ++ } ++ // add space for monitor & lock ++ __ addi_d(SP, SP, (-1) * entry_size); // add space for a monitor entry ++ __ st_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ // set new monitor block top ++ __ st_d(T0, SP, BasicObjectLock::obj_offset_in_bytes()); // store object ++ // FIXME: I do not know what lock_object will do and what it will need ++ __ move(c_rarg0, SP); // object address ++ __ lock_object(c_rarg0); ++} ++ ++// Generate a fixed interpreter frame. This is identical setup for ++// interpreted methods and for native methods hence the shared code. ++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { ++ ++ // [ local var m-1 ] <--- sp ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- T0(sender's sp) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // initialize fixed part of activation frame ++ // sender's sp in Rsender ++ int i = 2; ++ int frame_size = 10; ++#ifndef CORE ++ ++frame_size; ++#endif ++ __ addi_d(SP, SP, (-frame_size) * wordSize); ++ __ st_d(RA, SP, (frame_size - 1) * wordSize); // save return address ++ __ st_d(FP, SP, (frame_size - 2) * wordSize); // save sender's fp ++ __ addi_d(FP, SP, (frame_size) * wordSize); ++ __ st_d(Rsender, FP, (-++i) * wordSize); // save sender's sp ++ __ st_d(R0, FP,(-++i) * wordSize); //save last_sp as null ++ __ st_d(LVP, FP, (-++i) * wordSize); // save locals offset ++ __ ld_d(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop ++ __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase ++ __ st_d(Rmethod, FP, (-++i) * wordSize); // save Method* ++ // Get mirror and store it in the frame as GC root for this Method* ++ __ load_mirror(T2, Rmethod, T4); ++ __ st_d(T2, FP, (-++i) * wordSize); // Mirror ++#ifndef CORE ++ if (ProfileInterpreter) { ++ Label method_data_continue; ++ __ ld_d(AT, Rmethod, in_bytes(Method::method_data_offset())); ++ __ beq(AT, R0, method_data_continue); ++ __ addi_d(AT, AT, in_bytes(MethodData::data_offset())); ++ __ bind(method_data_continue); ++ __ st_d(AT, FP, (-++i) * wordSize); ++ } else { ++ __ st_d(R0, FP, (-++i) * wordSize); ++ } ++#endif // !CORE ++ ++ __ ld_d(T2, Rmethod, in_bytes(Method::const_offset())); ++ __ ld_d(T2, T2, in_bytes(ConstMethod::constants_offset())); ++ __ ld_d(T2, T2, ConstantPool::cache_offset_in_bytes()); ++ __ st_d(T2, FP, (-++i) * wordSize); // set constant pool cache ++ if (native_call) { ++ __ st_d(R0, FP, (-++i) * wordSize); // no bcp ++ } else { ++ __ st_d(BCP, FP, (-++i) * wordSize); // set bcp ++ } ++ __ st_d(SP, FP, (-++i) * wordSize); // reserve word for pointer to expression stack bottom ++ assert(i == frame_size, "i should be equal to frame_size"); ++} ++ ++// End of helpers ++ ++// Various method entries ++//------------------------------------------------------------------------------------------------------------------------ ++// ++// ++ ++// Method entry for java.lang.ref.Reference.get. ++address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { ++ // Code: _aload_0, _getfield, _areturn ++ // parameter size = 1 ++ // ++ // The code that gets generated by this routine is split into 2 parts: ++ // 1. The "intrinsified" code for G1 (or any SATB based GC), ++ // 2. The slow path - which is an expansion of the regular method entry. ++ // ++ // Notes:- ++ // * In the G1 code we do not check whether we need to block for ++ // a safepoint. If G1 is enabled then we must execute the specialized ++ // code for Reference.get (except when the Reference object is null) ++ // so that we can log the value in the referent field with an SATB ++ // update buffer. ++ // If the code for the getfield template is modified so that the ++ // G1 pre-barrier code is executed when the current method is ++ // Reference.get() then going through the normal method entry ++ // will be fine. ++ // * The G1 code can, however, check the receiver object (the instance ++ // of java.lang.Reference) and jump to the slow path if null. If the ++ // Reference object is null then we obviously cannot fetch the referent ++ // and so we don't need to call the G1 pre-barrier. Thus we can use the ++ // regular method entry code to generate the NPE. ++ // ++ // This code is based on generate_accessor_entry. ++ // ++ // Rmethod: Method* ++ // Rsender: senderSP must preserve for slow path, set SP to it on fast path ++ // RA is live. It must be saved around calls. ++ ++ address entry = __ pc(); ++ ++ const int referent_offset = java_lang_ref_Reference::referent_offset(); ++ ++ Label slow_path; ++ const Register local_0 = A0; ++ // Check if local 0 != NULL ++ // If the receiver is null then it is OK to jump to the slow path. ++ __ ld_d(local_0, Address(SP, 0)); ++ __ beqz(local_0, slow_path); ++ ++ // Load the value of the referent field. ++ const Address field_address(local_0, referent_offset); ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ T4, /*tmp2*/ noreg); ++ ++ // areturn ++ __ move(SP, Rsender); ++ __ jr(RA); ++ ++ // generate a vanilla interpreter entry as the slow path ++ __ bind(slow_path); ++ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); ++ return entry; ++} ++ ++// Interpreter stub for calling a native method. (asm interpreter) ++// This sets up a somewhat different looking stack for calling the ++// native method than the typical interpreter frame setup. ++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ // Rsender: sender's sp ++ // Rmethod: Method* ++ address entry_point = __ pc(); ++ ++#ifndef CORE ++ const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset())); ++#endif ++ // get parameter size (always needed) ++ // the size in the java stack ++ __ ld_d(V0, Rmethod, in_bytes(Method::const_offset())); ++ __ ld_hu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // native calls don't need the stack size check since they have no expression stack ++ // and the arguments are already on the stack and we only add a handful of words ++ // to the stack ++ ++ // Rmethod: Method* ++ // V0: size of parameters ++ // Layout of frame at this point ++ // ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ ++ // for natives the size of locals is zero ++ ++ // compute beginning of parameters (S7) ++ __ slli_d(LVP, V0, Address::times_8); ++ __ addi_d(LVP, LVP, (-1) * wordSize); ++ __ add_d(LVP, LVP, SP); ++ ++ ++ // add 2 zero-initialized slots for native calls ++ // 1 slot for native oop temp offset (setup via runtime) ++ // 1 slot for static native result handler3 (setup via runtime) ++ __ push2(R0, R0); ++ ++ // Layout of frame at this point ++ // [ method holder mirror ] <--- sp ++ // [ result type info ] ++ // [ argument word n-1 ] <--- T0 ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++#ifndef CORE ++ if (inc_counter) __ ld_w(T3, invocation_counter); // (pre-)fetch invocation count ++#endif ++ ++ // initialize fixed part of activation frame ++ generate_fixed_frame(true); ++ // after this function, the layout of frame is as following ++ // ++ // [ monitor block top ] <--- sp ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- sender's sp ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ ++ // make sure method is native & not abstract ++#ifdef ASSERT ++ __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ { ++ Label L; ++ __ andi(AT, T0, JVM_ACC_NATIVE); ++ __ bne(AT, R0, L); ++ __ stop("tried to execute native method as non-native"); ++ __ bind(L); ++ } ++ { ++ Label L; ++ __ andi(AT, T0, JVM_ACC_ABSTRACT); ++ __ beq(AT, R0, L); ++ __ stop("tried to execute abstract method in interpreter"); ++ __ bind(L); ++ } ++#endif ++ ++ // Since at this point in the method invocation the exception handler ++ // would try to exit the monitor of synchronized methods which hasn't ++ // been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation will ++ // check this flag. ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ li(AT, (int)true); ++ __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++#ifndef CORE ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow); ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++#endif // CORE ++ ++ bang_stack_shadow_pages(true); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ if (synchronized) { ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, T0, JVM_ACC_SYNCHRONIZED); ++ __ beq(AT, R0, L); ++ __ stop("method needs synchronization"); ++ __ bind(L); ++ } ++#endif ++ } ++ ++ // after method_lock, the layout of frame is as following ++ // ++ // [ monitor entry ] <--- sp ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // start execution ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ beq(AT, SP, L); ++ __ stop("broken stack frame setup in interpreter in asm"); ++ __ bind(L); ++ } ++#endif ++ ++ // jvmti/jvmpi support ++ __ notify_method_entry(); ++ ++ // work registers ++ const Register method = Rmethod; ++ const Register t = T8; ++ ++ __ get_method(method); ++ { ++ Label L, Lstatic; ++ __ ld_d(t,method,in_bytes(Method::const_offset())); ++ __ ld_hu(t, t, in_bytes(ConstMethod::size_of_parameters_offset())); ++ // LoongArch ABI: caller does not reserve space for the register auguments. ++ // A0 and A1(if needed) ++ __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, AT, JVM_ACC_STATIC); ++ __ beq(AT, R0, Lstatic); ++ __ addi_d(t, t, 1); ++ __ bind(Lstatic); ++ __ addi_d(t, t, -7); ++ __ bge(R0, t, L); ++ __ slli_d(t, t, Address::times_8); ++ __ sub_d(SP, SP, t); ++ __ bind(L); ++ } ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ bstrins_d(SP, R0, 3, 0); ++ __ move(AT, SP); ++ // [ ] <--- sp ++ // ... (size of parameters - 8 ) ++ // [ monitor entry ] ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ // get signature handler ++ { ++ Label L; ++ __ ld_d(T4, method, in_bytes(Method::signature_handler_offset())); ++ __ bne(T4, R0, L); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::prepare_native_call), method); ++ __ get_method(method); ++ __ ld_d(T4, method, in_bytes(Method::signature_handler_offset())); ++ __ bind(L); ++ } ++ ++ // call signature handler ++ // FIXME: when change codes in InterpreterRuntime, note this point ++ // from: begin of parameters ++ assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code"); ++ // to: current sp ++ assert(InterpreterRuntime::SignatureHandlerGenerator::to () == SP, "adjust this code"); ++ // temp: T3 ++ assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t , "adjust this code"); ++ ++ __ jalr(T4); ++ __ get_method(method); ++ ++ // ++ // if native function is static, and its second parameter has type length of double word, ++ // and first parameter has type length of word, we have to reserve one word ++ // for the first parameter, according to LoongArch abi. ++ // if native function is not static, and its third parameter has type length of double word, ++ // and second parameter has type length of word, we have to reserve one word for the second ++ // parameter. ++ // ++ ++ ++ // result handler is in V0 ++ // set result handler ++ __ st_d(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize); ++ ++#define FIRSTPARA_SHIFT_COUNT 5 ++#define SECONDPARA_SHIFT_COUNT 9 ++#define THIRDPARA_SHIFT_COUNT 13 ++#define PARA_MASK 0xf ++ ++ // pass mirror handle if static call ++ { ++ Label L; ++ __ ld_w(t, method, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, t, JVM_ACC_STATIC); ++ __ beq(AT, R0, L); ++ ++ // get mirror ++ __ load_mirror(t, method, T4); ++ // copy mirror into activation frame ++ __ st_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ // pass handle to mirror ++ __ addi_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ move(A1, t); ++ __ bind(L); ++ } ++ ++ // [ mthd holder mirror ptr ] <--- sp --------------------| (only for static method) ++ // [ ] | ++ // ... size of parameters(or +1) | ++ // [ monitor entry ] | ++ // ... | ++ // [ monitor entry ] | ++ // [ monitor block top ] ( the top monitor entry ) | ++ // [ byte code pointer (0) ] (if native, bcp = 0) | ++ // [ constant pool cache ] | ++ // [ Mirror ] | ++ // [ Method* ] | ++ // [ locals offset ] | ++ // [ sender's sp ] | ++ // [ sender's fp ] | ++ // [ return address ] <--- fp | ++ // [ method holder mirror ] <----------------------------| ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // get native function entry point ++ { Label L; ++ __ ld_d(T4, method, in_bytes(Method::native_function_offset())); ++ __ li(T6, SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); ++ __ bne(T6, T4, L); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method); ++ __ get_method(method); ++ __ ld_d(T4, method, in_bytes(Method::native_function_offset())); ++ __ bind(L); ++ } ++ ++ // pass JNIEnv ++ // native function in T4 ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ addi_d(t, thread, in_bytes(JavaThread::jni_environment_offset())); ++ __ move(A0, t); ++ // [ jni environment ] <--- sp ++ // [ mthd holder mirror ptr ] ---------------------------->| (only for static method) ++ // [ ] | ++ // ... size of parameters | ++ // [ monitor entry ] | ++ // ... | ++ // [ monitor entry ] | ++ // [ monitor block top ] ( the top monitor entry ) | ++ // [ byte code pointer (0) ] (if native, bcp = 0) | ++ // [ constant pool cache ] | ++ // [ Mirror ] | ++ // [ Method* ] | ++ // [ locals offset ] | ++ // [ sender's sp ] | ++ // [ sender's fp ] | ++ // [ return address ] <--- fp | ++ // [ method holder mirror ] <----------------------------| ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // Set the last Java PC in the frame anchor to be the return address from ++ // the call to the native method: this will allow the debugger to ++ // generate an accurate stack trace. ++ Label native_return; ++ __ set_last_Java_frame(thread, SP, FP, native_return); ++ ++ // change thread state ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_w(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ addi_d(t, t, (-1) * _thread_in_Java); ++ __ beq(t, R0, L); ++ __ stop("Wrong thread state in native stub"); ++ __ bind(L); ++ } ++#endif ++ ++ __ li(t, _thread_in_native); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); ++ } ++ __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ // call native method ++ __ jalr(T4); ++ __ bind(native_return); ++ // result potentially in V0 or F0 ++ ++ ++ // via _last_native_pc and not via _last_jave_sp ++ // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result. ++ // If the order changes or anything else is added to the stack the code in ++ // interpreter_frame_result will have to be changed. ++ //FIXME, should modify here ++ // save return value to keep the value from being destroyed by other calls ++ __ push(dtos); ++ __ push(ltos); ++ ++ // change thread state ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ li(t, _thread_in_native_trans); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); ++ } ++ __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ if( os::is_MP() ) __ membar(__ AnyAny); ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { Label Continue; ++ ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are ++ // preserved and correspond to the bcp/locals pointers. So we do a runtime call ++ // by hand. ++ // ++ Label slow_path; ++ ++ // We need an acquire here to ensure that any subsequent load of the ++ // global SafepointSynchronize::_state flag is ordered after this load ++ // of the thread-local polling word. We don't want this poll to ++ // return false (i.e. not safepointing) and a later poll of the global ++ // SafepointSynchronize::_state spuriously to return true. ++ // ++ // This is to avoid a race when we're in a native->Java transition ++ // racing the code which wakes up from a safepoint. ++ __ safepoint_poll(slow_path, thread, true /* at_return */, true /* acquire */, false /* in_nmethod */); ++ __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, Continue); ++ __ bind(slow_path); ++ __ move(A0, thread); ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), ++ relocInfo::runtime_call_type); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ //add for compressedoops ++ __ reinit_heapbase(); ++ __ bind(Continue); ++ } ++ ++ // change thread state ++ __ li(t, _thread_in_Java); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); ++ } ++ __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ reset_last_Java_frame(thread, true); ++ ++ if (CheckJNICalls) { ++ // clear_pending_jni_exception_check ++ __ st_d(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset())); ++ } ++ ++ // reset handle block ++ __ ld_d(t, thread, in_bytes(JavaThread::active_handles_offset())); ++ __ st_w(R0, t, JNIHandleBlock::top_offset_in_bytes()); ++ ++ // If result was an oop then unbox and save it in the frame ++ { ++ Label no_oop; ++ __ ld_d(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize); ++ __ li(T0, AbstractInterpreter::result_handler(T_OBJECT)); ++ __ bne(AT, T0, no_oop); ++ __ pop(ltos); ++ // Unbox oop result, e.g. JNIHandles::resolve value. ++ __ resolve_jobject(V0, thread, T4); ++ __ st_d(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize); ++ // keep stack depth as expected by pushing oop which will eventually be discarded ++ __ push(ltos); ++ __ bind(no_oop); ++ } ++ { ++ Label no_reguard; ++ __ ld_w(t, thread, in_bytes(JavaThread::stack_guard_state_offset())); ++ __ li(AT, (u1)StackOverflow::stack_guard_yellow_reserved_disabled); ++ __ bne(t, AT, no_reguard); ++ __ push_call_clobbered_registers(); ++ __ move(S5_heapbase, SP); ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ bstrins_d(SP, R0, 3, 0); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type); ++ __ move(SP, S5_heapbase); ++ __ pop_call_clobbered_registers(); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ __ bind(no_reguard); ++ } ++ // restore BCP to have legal interpreter frame, ++ // i.e., bci == 0 <=> BCP == code_base() ++ // Can't call_VM until bcp is within reasonable. ++ __ get_method(method); // method is junk from thread_in_native to now. ++ __ ld_d(BCP, method, in_bytes(Method::const_offset())); ++ __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset()))); ++ // handle exceptions (exception handling will handle unlocking!) ++ { ++ Label L; ++ __ ld_d(t, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(t, R0, L); ++ // Note: At some point we may want to unify this with the code used in ++ // call_VM_base(); ++ // i.e., we should use the StubRoutines::forward_exception code. For now this ++ // doesn't work here because the sp is not correctly set at this point. ++ __ MacroAssembler::call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ ++ // do unlocking if necessary ++ { ++ Label L; ++ __ ld_w(t, method, in_bytes(Method::access_flags_offset())); ++ __ andi(t, t, JVM_ACC_SYNCHRONIZED); ++ __ addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock)); ++ __ beq(t, R0, L); ++ // the code below should be shared with interpreter macro assembler implementation ++ { ++ Label unlock; ++ // BasicObjectLock will be first in list, ++ // since this is a synchronized method. However, need ++ // to check that the object has not been unlocked by ++ // an explicit monitorexit bytecode. ++ // address of first monitor ++ ++ __ ld_d(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ bne(t, R0, unlock); ++ ++ // Entry already unlocked, need to throw exception ++ __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ __ bind(unlock); ++ __ unlock_object(c_rarg0); ++ } ++ __ bind(L); ++ } ++ ++ // jvmti/jvmpi support ++ // Note: This must happen _after_ handling/throwing any exceptions since ++ // the exception handler code notifies the runtime of method exits ++ // too. If this happens before, method entry/exit notifications are ++ // not properly paired (was bug - gri 11/22/99). ++ __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); ++ ++ // restore potential result in V0, ++ // call result handler to restore potential result in ST0 & handle result ++ ++ __ pop(ltos); ++ __ pop(dtos); ++ ++ __ ld_d(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize); ++ __ jalr(t); ++ ++ ++ // remove activation ++ __ ld_d(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp ++ __ ld_d(RA, FP, frame::return_addr_offset * wordSize); // get return address ++ __ ld_d(FP, FP, frame::link_offset * wordSize); // restore sender's fp ++ __ jr(RA); ++ ++#ifndef CORE ++ if (inc_counter) { ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(continue_after_compile); ++ // entry_point is the beginning of this ++ // function and checks again for compiled code ++ } ++#endif ++ return entry_point; ++} ++ ++void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { ++ // Quick & dirty stack overflow checking: bang the stack & handle trap. ++ // Note that we do the banging after the frame is setup, since the exception ++ // handling code expects to find a valid interpreter frame on the stack. ++ // Doing the banging earlier fails if the caller frame is not an interpreter ++ // frame. ++ // (Also, the exception throwing code expects to unlock any synchronized ++ // method receiever, so do the banging after locking the receiver.) ++ ++ // Bang each page in the shadow zone. We can't assume it's been done for ++ // an interpreter frame with greater than a page of locals, so each page ++ // needs to be checked. Only true for non-native. ++ const int page_size = os::vm_page_size(); ++ const int n_shadow_pages = ((int)StackOverflow::stack_shadow_zone_size()) / page_size; ++ const int start_page = native_call ? n_shadow_pages : 1; ++ BLOCK_COMMENT("bang_stack_shadow_pages:"); ++ for (int pages = start_page; pages <= n_shadow_pages; pages++) { ++ __ bang_stack_with_offset(pages*page_size); ++ } ++} ++ ++// ++// Generic interpreted method entry to (asm) interpreter ++// ++// Layout of frame just at the entry ++// ++// [ argument word n-1 ] <--- sp ++// ... ++// [ argument word 0 ] ++// assume Method* in Rmethod before call this method. ++// prerequisites to the generated stub : the callee Method* in Rmethod ++// note you must save the caller bcp before call the generated stub ++// ++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ ++ // Rmethod: Method* ++ // Rsender: sender 's sp ++ address entry_point = __ pc(); ++ ++ // S8 be used in C2 ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++ const Address invocation_counter(Rmethod, ++ in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset())); ++ ++ // get parameter size (always needed) ++ __ ld_d(T3, Rmethod, in_bytes(Method::const_offset())); //T3 --> Rmethod._constMethod ++ __ ld_hu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // Rmethod: Method* ++ // V0: size of parameters ++ // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i ++ // get size of locals in words to T2 ++ __ ld_hu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset())); ++ // T2 = no. of additional locals, locals include parameters ++ __ sub_d(T2, T2, V0); ++ ++ // see if we've got enough room on the stack for locals plus overhead. ++ // Layout of frame at this point ++ // ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ generate_stack_overflow_check(); ++ // after this function, the layout of frame does not change ++ ++ // compute beginning of parameters (LVP) ++ __ slli_d(LVP, V0, LogBytesPerWord); ++ __ addi_d(LVP, LVP, (-1) * wordSize); ++ __ add_d(LVP, LVP, SP); ++ ++ // T2 - # of additional locals ++ // allocate space for locals ++ // explicitly initialize locals ++ { ++ Label exit, loop; ++ __ beq(T2, R0, exit); ++ ++ __ bind(loop); ++ __ addi_d(SP, SP, (-1) * wordSize); ++ __ addi_d(T2, T2, -1); // until everything initialized ++ __ st_d(R0, SP, 0); // initialize local variables ++ __ bne(T2, R0, loop); ++ ++ __ bind(exit); ++ } ++ ++ // ++ // [ local var m-1 ] <--- sp ++ // ... ++ // [ local var 0 ] ++ // [ argument word n-1 ] <--- T0? ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ // initialize fixed part of activation frame ++ ++ generate_fixed_frame(false); ++ ++ ++ // after this function, the layout of frame is as following ++ // ++ // [ monitor block top ] <--- sp ( the top monitor entry ) ++ // [ byte code pointer ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] <--- fp ++ // [ return address ] ++ // [ local var m-1 ] ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++ // make sure method is not native & not abstract ++#ifdef ASSERT ++ __ ld_d(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ { ++ Label L; ++ __ andi(T2, AT, JVM_ACC_NATIVE); ++ __ beq(T2, R0, L); ++ __ stop("tried to execute native method as non-native"); ++ __ bind(L); ++ } ++ { ++ Label L; ++ __ andi(T2, AT, JVM_ACC_ABSTRACT); ++ __ beq(T2, R0, L); ++ __ stop("tried to execute abstract method in interpreter"); ++ __ bind(L); ++ } ++#endif ++ ++ // Since at this point in the method invocation the exception handler ++ // would try to exit the monitor of synchronized methods which hasn't ++ // been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation will ++ // check this flag. ++ ++#ifndef OPT_THREAD ++ Register thread = T8; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ __ li(AT, (int)true); ++ __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++#ifndef CORE ++ ++ // mdp : T8 ++ // tmp1: T4 ++ // tmp2: T2 ++ __ profile_parameters_type(T8, T4, T2); ++ ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow); ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++ ++#endif // CORE ++ ++ bang_stack_shadow_pages(false); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ // ++ if (synchronized) { ++ // Allocate monitor and lock method ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ { Label L; ++ __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T2, AT, JVM_ACC_SYNCHRONIZED); ++ __ beq(T2, R0, L); ++ __ stop("method needs synchronization"); ++ __ bind(L); ++ } ++#endif ++ } ++ ++ // layout of frame after lock_method ++ // [ monitor entry ] <--- sp ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ local var m-1 ] ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++ // start execution ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ beq(AT, SP, L); ++ __ stop("broken stack frame setup in interpreter in native"); ++ __ bind(L); ++ } ++#endif ++ ++ // jvmti/jvmpi support ++ __ notify_method_entry(); ++ ++ __ dispatch_next(vtos); ++ ++ // invocation counter overflow ++ if (inc_counter) { ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(continue_after_compile); ++ } ++ ++ return entry_point; ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateInterpreterGenerator::generate_throw_exception() { ++ // Entry point in previous activation (i.e., if the caller was ++ // interpreted) ++ Interpreter::_rethrow_exception_entry = __ pc(); ++ // Restore sp to interpreter_frame_last_sp even though we are going ++ // to empty the expression stack for the exception processing. ++ __ st_d(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ // V0: exception ++ // V1: return address/pc that threw exception ++ __ restore_bcp(); // BCP points to call/send ++ __ restore_locals(); ++ ++ //add for compressedoops ++ __ reinit_heapbase(); ++ // S8 be used in C2 ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++ // Entry point for exceptions thrown within interpreter code ++ Interpreter::_throw_exception_entry = __ pc(); ++ // expression stack is undefined here ++ // V0: exception ++ // BCP: exception bcp ++ __ verify_oop(V0); ++ ++ // expression stack must be empty before entering the VM in case of an exception ++ __ empty_expression_stack(); ++ // find exception handler address and preserve exception oop ++ __ move(A1, V0); ++ __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1); ++ // V0: exception handler entry point ++ // V1: preserved exception oop ++ // S0: bcp for exception handler ++ __ push(V1); // push exception which is now the only value on the stack ++ __ jr(V0); // jump to exception handler (may be _remove_activation_entry!) ++ ++ // If the exception is not handled in the current frame the frame is removed and ++ // the exception is rethrown (i.e. exception continuation is _rethrow_exception). ++ // ++ // Note: At this point the bci is still the bxi for the instruction which caused ++ // the exception and the expression stack is empty. Thus, for any VM calls ++ // at this point, GC will find a legal oop map (with empty expression stack). ++ ++ // In current activation ++ // V0: exception ++ // BCP: exception bcp ++ ++ // ++ // JVMTI PopFrame support ++ // ++ ++ Interpreter::_remove_activation_preserving_args_entry = __ pc(); ++ __ empty_expression_stack(); ++ // Set the popframe_processing bit in pending_popframe_condition indicating that we are ++ // currently handling popframe, so that call_VMs that may happen later do not trigger new ++ // popframe handling cycles. ++#ifndef OPT_THREAD ++ Register thread = T2; ++ __ get_thread(T2); ++#else ++ Register thread = TREG; ++#endif ++ __ ld_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ __ ori(T3, T3, JavaThread::popframe_processing_bit); ++ __ st_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++#ifndef CORE ++ { ++ // Check to see whether we are returning to a deoptimized frame. ++ // (The PopFrame call ensures that the caller of the popped frame is ++ // either interpreted or compiled and deoptimizes it if compiled.) ++ // In this case, we can't call dispatch_next() after the frame is ++ // popped, but instead must save the incoming arguments and restore ++ // them after deoptimization has occurred. ++ // ++ // Note that we don't compare the return PC against the ++ // deoptimization blob's unpack entry because of the presence of ++ // adapter frames in C2. ++ Label caller_not_deoptimized; ++ __ ld_d(A0, FP, frame::return_addr_offset * wordSize); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0); ++ __ bne(V0, R0, caller_not_deoptimized); ++ ++ // Compute size of arguments for saving when returning to deoptimized caller ++ __ get_method(A1); ++ __ verify_oop(A1); ++ __ ld_d(A1, A1, in_bytes(Method::const_offset())); ++ __ ld_hu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset())); ++ __ shl(A1, Interpreter::logStackElementSize); ++ __ restore_locals(); ++ __ sub_d(A2, LVP, A1); ++ __ addi_d(A2, A2, wordSize); ++ // Save these arguments ++#ifndef OPT_THREAD ++ __ get_thread(A0); ++#else ++ __ move(A0, TREG); ++#endif ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2); ++ ++ __ remove_activation(vtos, T4, false, false, false); ++ ++ // Inform deoptimization that it is responsible for restoring these arguments ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ li(AT, JavaThread::popframe_force_deopt_reexecution_bit); ++ __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ // Continue in deoptimization handler ++ __ jr(T4); ++ ++ __ bind(caller_not_deoptimized); ++ } ++#endif /* !CORE */ ++ ++ __ remove_activation(vtos, T3, ++ /* throw_monitor_exception */ false, ++ /* install_monitor_exception */ false, ++ /* notify_jvmdi */ false); ++ ++ // Clear the popframe condition flag ++ // Finish with popframe handling ++ // A previous I2C followed by a deoptimization might have moved the ++ // outgoing arguments further up the stack. PopFrame expects the ++ // mutations to those outgoing arguments to be preserved and other ++ // constraints basically require this frame to look exactly as ++ // though it had previously invoked an interpreted activation with ++ // no space between the top of the expression stack (current ++ // last_sp) and the top of stack. Rather than force deopt to ++ // maintain this kind of invariant all the time we call a small ++ // fixup routine to move the mutated arguments onto the top of our ++ // expression stack if necessary. ++ __ move(T8, SP); ++ __ ld_d(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // PC must point into interpreter here ++ Label L; ++ __ bind(L); ++ __ set_last_Java_frame(thread, noreg, FP, L); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2); ++ __ reset_last_Java_frame(thread, true); ++ // Restore the last_sp and null it out ++ __ ld_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ ++ ++ __ li(AT, JavaThread::popframe_inactive); ++ __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++ // Finish with popframe handling ++ __ restore_bcp(); ++ __ restore_locals(); ++ // S8 be used in C2 ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++#ifndef CORE ++ // The method data pointer was incremented already during ++ // call profiling. We have to restore the mdp for the current bcp. ++ if (ProfileInterpreter) { ++ __ set_method_data_pointer_for_bcp(); ++ } ++#endif // !CORE ++ // Clear the popframe condition flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ li(AT, JavaThread::popframe_inactive); ++ __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++#if INCLUDE_JVMTI ++ { ++ Label L_done; ++ ++ __ ld_bu(AT, BCP, 0); ++ __ addi_d(AT, AT, -1 * Bytecodes::_invokestatic); ++ __ bne(AT, R0, L_done); ++ ++ // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. ++ // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. ++ ++ __ get_method(T4); ++ __ ld_d(T8, LVP, 0); ++ __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T4, BCP); ++ ++ __ beq(T8, R0, L_done); ++ ++ __ st_d(T8, SP, 0); ++ __ bind(L_done); ++ } ++#endif // INCLUDE_JVMTI ++ ++ __ dispatch_next(vtos); ++ // end of PopFrame support ++ ++ Interpreter::_remove_activation_entry = __ pc(); ++ ++ // preserve exception over this code sequence ++ __ pop(T0); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_d(T0, thread, in_bytes(JavaThread::vm_result_offset())); ++ // remove the activation (without doing throws on illegalMonitorExceptions) ++ __ remove_activation(vtos, T3, false, true, false); ++ // restore exception ++ __ get_vm_result(T0, thread); ++ __ verify_oop(T0); ++ ++ // In between activations - previous activation type unknown yet ++ // compute continuation point - the continuation point expects ++ // the following registers set up: ++ // ++ // T0: exception ++ // T1: return address/pc that threw exception ++ // SP: expression stack of caller ++ // FP: fp of caller ++ __ push2(T0, T3); // save exception and return address ++ __ move(A1, T3); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); ++ __ move(T4, V0); // save exception handler ++ __ pop2(V0, V1); // restore return address and exception ++ ++ // Note that an "issuing PC" is actually the next PC after the call ++ __ jr(T4); // jump to exception handler of caller ++} ++ ++ ++// ++// JVMTI ForceEarlyReturn support ++// ++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { ++ address entry = __ pc(); ++ ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ empty_expression_stack(); ++ __ load_earlyret_value(state); ++ ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ __ ld_ptr(T4, TREG, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ const Address cond_addr(T4, in_bytes(JvmtiThreadState::earlyret_state_offset())); ++ ++ // Clear the earlyret state ++ __ li(AT, JvmtiThreadState::earlyret_inactive); ++ __ st_w(AT, cond_addr); ++ ++ __ remove_activation(state, T0, ++ false, /* throw_monitor_exception */ ++ false, /* install_monitor_exception */ ++ true); /* notify_jvmdi */ ++ __ jr(T0); ++ ++ return entry; ++} // end of ForceEarlyReturn support ++ ++ ++//----------------------------------------------------------------------------- ++// Helper for vtos entry point generation ++ ++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, ++ address& bep, ++ address& cep, ++ address& sep, ++ address& aep, ++ address& iep, ++ address& lep, ++ address& fep, ++ address& dep, ++ address& vep) { ++ assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); ++ Label L; ++ fep = __ pc(); __ push(ftos); __ b(L); ++ dep = __ pc(); __ push(dtos); __ b(L); ++ lep = __ pc(); __ push(ltos); __ b(L); ++ aep =__ pc(); __ push(atos); __ b(L); ++ bep = cep = sep = ++ iep = __ pc(); __ push(itos); ++ vep = __ pc(); ++ __ bind(L); ++ generate_and_dispatch(t); ++} ++ ++//----------------------------------------------------------------------------- ++ ++// Non-product code ++#ifndef PRODUCT ++address TemplateInterpreterGenerator::generate_trace_code(TosState state) { ++ address entry = __ pc(); ++ ++ // prepare expression stack ++ __ push(state); // save tosca ++ ++ // tos & tos2 ++ // trace_bytecode need actually 4 args, the last two is tos&tos2 ++ // this work fine for x86. but LA ABI calling convention will store A2-A3 ++ // to the stack position it think is the tos&tos2 ++ // when the expression stack have no more than 2 data, error occur. ++ __ ld_d(A2, SP, 0); ++ __ ld_d(A3, SP, 1 * wordSize); ++ ++ // pass arguments & call tracer ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), RA, A2, A3); ++ __ move(RA, V0); // make sure return address is not destroyed by pop(state) ++ ++ // restore expression stack ++ __ pop(state); // restore tosca ++ ++ // return ++ __ jr(RA); ++ return entry; ++} ++ ++void TemplateInterpreterGenerator::count_bytecode() { ++ __ li(T8, (long)&BytecodeCounter::_counter_value); ++ __ ld_w(AT, T8, 0); ++ __ addi_d(AT, AT, 1); ++ __ st_w(AT, T8, 0); ++} ++ ++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ++ __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]); ++ __ ld_w(AT, T8, 0); ++ __ addi_d(AT, AT, 1); ++ __ st_w(AT, T8, 0); ++} ++ ++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ++ __ li(T8, (long)&BytecodePairHistogram::_index); ++ __ ld_w(T4, T8, 0); ++ __ srli_d(T4, T4, BytecodePairHistogram::log2_number_of_codes); ++ __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes); ++ __ orr(T4, T4, T8); ++ __ li(T8, (long)&BytecodePairHistogram::_index); ++ __ st_w(T4, T8, 0); ++ __ slli_d(T4, T4, 2); ++ __ li(T8, (long)BytecodePairHistogram::_counters); ++ __ add_d(T8, T8, T4); ++ __ ld_w(AT, T8, 0); ++ __ addi_d(AT, AT, 1); ++ __ st_w(AT, T8, 0); ++} ++ ++ ++void TemplateInterpreterGenerator::trace_bytecode(Template* t) { ++ // Call a little run-time stub to avoid blow-up for each bytecode. ++ // The run-time runtime saves the right registers, depending on ++ // the tosca in-state for the given template. ++ address entry = Interpreter::trace_code(t->tos_in()); ++ assert(entry != NULL, "entry must have been generated"); ++ __ call(entry, relocInfo::none); ++ //add for compressedoops ++ __ reinit_heapbase(); ++} ++ ++ ++void TemplateInterpreterGenerator::stop_interpreter_at() { ++ Label L; ++ __ li(T8, long(&BytecodeCounter::_counter_value)); ++ __ ld_w(T8, T8, 0); ++ __ li(AT, StopInterpreterAt); ++ __ bne(T8, AT, L); ++ __ brk(5); ++ __ bind(L); ++} ++#endif // !PRODUCT +diff --git a/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp b/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp +new file mode 100644 +index 00000000000..ddb38faf446 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP ++#define CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP ++ ++ static void prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index = noreg, // itable index, MethodType, etc. ++ Register recv = noreg, // if caller wants to see it ++ Register flags = noreg // if caller wants to test it ++ ); ++ static void invokevirtual_helper(Register index, Register recv, ++ Register flags); ++ static void volatile_barrier(); ++ ++ // Helpers ++ static void index_check(Register array, Register index); ++ static void index_check_without_pop(Register array, Register index); ++ ++#endif // CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP +diff --git a/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp +new file mode 100644 +index 00000000000..c0d1daea305 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp +@@ -0,0 +1,4045 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/templateTable.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "memory/universe.hpp" ++#include "oops/klass.inline.hpp" ++#include "oops/methodData.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "utilities/macros.hpp" ++ ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T6 RT6 ++#define T8 RT8 ++ ++// Address computation: local variables ++ ++static inline Address iaddress(int n) { ++ return Address(LVP, Interpreter::local_offset_in_bytes(n)); ++} ++ ++static inline Address laddress(int n) { ++ return iaddress(n + 1); ++} ++ ++static inline Address faddress(int n) { ++ return iaddress(n); ++} ++ ++static inline Address daddress(int n) { ++ return laddress(n); ++} ++ ++static inline Address aaddress(int n) { ++ return iaddress(n); ++} ++static inline Address haddress(int n) { return iaddress(n + 0); } ++ ++ ++static inline Address at_sp() { return Address(SP, 0); } ++static inline Address at_sp_p1() { return Address(SP, 1 * wordSize); } ++static inline Address at_sp_p2() { return Address(SP, 2 * wordSize); } ++ ++// At top of Java expression stack which may be different than sp(). ++// It isn't for category 1 objects. ++static inline Address at_tos () { ++ Address tos = Address(SP, Interpreter::expr_offset_in_bytes(0)); ++ return tos; ++} ++ ++static inline Address at_tos_p1() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(1)); ++} ++ ++static inline Address at_tos_p2() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(2)); ++} ++ ++static inline Address at_tos_p3() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(3)); ++} ++ ++// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator ++Address TemplateTable::at_bcp(int offset) { ++ assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); ++ return Address(BCP, offset); ++} ++ ++// Miscelaneous helper routines ++// Store an oop (or NULL) at the address described by obj. ++// If val == noreg this means store a NULL ++static void do_oop_store(InterpreterMacroAssembler* _masm, ++ Address dst, ++ Register val, ++ DecoratorSet decorators = 0) { ++ assert(val == noreg || val == V0, "parameter is just for looks"); ++ __ store_heap_oop(dst, val, T4, T1, decorators); ++} ++ ++static void do_oop_load(InterpreterMacroAssembler* _masm, ++ Address src, ++ Register dst, ++ DecoratorSet decorators = 0) { ++ __ load_heap_oop(dst, src, T4, T1, decorators); ++} ++ ++// bytecode folding ++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, ++ Register tmp_reg, bool load_bc_into_bc_reg/*=true*/, ++ int byte_no) { ++ if (!RewriteBytecodes) return; ++ Label L_patch_done; ++ ++ switch (bc) { ++ case Bytecodes::_fast_aputfield: ++ case Bytecodes::_fast_bputfield: ++ case Bytecodes::_fast_zputfield: ++ case Bytecodes::_fast_cputfield: ++ case Bytecodes::_fast_dputfield: ++ case Bytecodes::_fast_fputfield: ++ case Bytecodes::_fast_iputfield: ++ case Bytecodes::_fast_lputfield: ++ case Bytecodes::_fast_sputfield: ++ { ++ // We skip bytecode quickening for putfield instructions when ++ // the put_code written to the constant pool cache is zero. ++ // This is required so that every execution of this instruction ++ // calls out to InterpreterRuntime::resolve_get_put to do ++ // additional, required work. ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ assert(load_bc_into_bc_reg, "we use bc_reg as temp"); ++ __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1); ++ __ addi_d(bc_reg, R0, bc); ++ __ beq(tmp_reg, R0, L_patch_done); ++ } ++ break; ++ default: ++ assert(byte_no == -1, "sanity"); ++ // the pair bytecodes have already done the load. ++ if (load_bc_into_bc_reg) { ++ __ li(bc_reg, bc); ++ } ++ } ++ ++ if (JvmtiExport::can_post_breakpoint()) { ++ Label L_fast_patch; ++ // if a breakpoint is present we can't rewrite the stream directly ++ __ ld_bu(tmp_reg, at_bcp(0)); ++ __ li(AT, Bytecodes::_breakpoint); ++ __ bne(tmp_reg, AT, L_fast_patch); ++ ++ __ get_method(tmp_reg); ++ // Let breakpoint table handling rewrite to quicker bytecode ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg); ++ ++ __ b(L_patch_done); ++ __ bind(L_fast_patch); ++ } ++ ++#ifdef ASSERT ++ Label L_okay; ++ __ ld_bu(tmp_reg, at_bcp(0)); ++ __ li(AT, (int)Bytecodes::java_code(bc)); ++ __ beq(tmp_reg, AT, L_okay); ++ __ beq(tmp_reg, bc_reg, L_patch_done); ++ __ stop("patching the wrong bytecode"); ++ __ bind(L_okay); ++#endif ++ ++ // patch bytecode ++ __ st_b(bc_reg, at_bcp(0)); ++ __ bind(L_patch_done); ++} ++ ++ ++// Individual instructions ++ ++void TemplateTable::nop() { ++ transition(vtos, vtos); ++ // nothing to do ++} ++ ++void TemplateTable::shouldnotreachhere() { ++ transition(vtos, vtos); ++ __ stop("shouldnotreachhere bytecode"); ++} ++ ++void TemplateTable::aconst_null() { ++ transition(vtos, atos); ++ __ move(FSR, R0); ++} ++ ++void TemplateTable::iconst(int value) { ++ transition(vtos, itos); ++ if (value == 0) { ++ __ move(FSR, R0); ++ } else { ++ __ li(FSR, value); ++ } ++} ++ ++void TemplateTable::lconst(int value) { ++ transition(vtos, ltos); ++ if (value == 0) { ++ __ move(FSR, R0); ++ } else { ++ __ li(FSR, value); ++ } ++} ++ ++void TemplateTable::fconst(int value) { ++ transition(vtos, ftos); ++ switch( value ) { ++ case 0: __ movgr2fr_w(FSF, R0); return; ++ case 1: __ addi_d(AT, R0, 1); break; ++ case 2: __ addi_d(AT, R0, 2); break; ++ default: ShouldNotReachHere(); ++ } ++ __ movgr2fr_w(FSF, AT); ++ __ ffint_s_w(FSF, FSF); ++} ++ ++void TemplateTable::dconst(int value) { ++ transition(vtos, dtos); ++ switch( value ) { ++ case 0: __ movgr2fr_d(FSF, R0); ++ return; ++ case 1: __ addi_d(AT, R0, 1); ++ __ movgr2fr_d(FSF, AT); ++ __ ffint_d_w(FSF, FSF); ++ break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::bipush() { ++ transition(vtos, itos); ++ __ ld_b(FSR, at_bcp(1)); ++} ++ ++void TemplateTable::sipush() { ++ transition(vtos, itos); ++ __ ld_b(FSR, BCP, 1); ++ __ ld_bu(AT, BCP, 2); ++ __ slli_d(FSR, FSR, 8); ++ __ orr(FSR, FSR, AT); ++} ++ ++// T1 : tags ++// T2 : index ++// T3 : cpool ++// T8 : tag ++void TemplateTable::ldc(bool wide) { ++ transition(vtos, vtos); ++ Label call_ldc, notFloat, notClass, notInt, Done; ++ // get index in cpool ++ if (wide) { ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); ++ } else { ++ __ ld_bu(T2, at_bcp(1)); ++ } ++ ++ __ get_cpool_and_tags(T3, T1); ++ ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type ++ __ add_d(AT, T1, T2); ++ __ ld_b(T1, AT, tags_offset); ++ if(os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ } ++ //now T1 is the tag ++ ++ // unresolved class - get the resolved class ++ __ addi_d(AT, T1, - JVM_CONSTANT_UnresolvedClass); ++ __ beq(AT, R0, call_ldc); ++ ++ // unresolved class in error (resolution failed) - call into runtime ++ // so that the same error from first resolution attempt is thrown. ++ __ addi_d(AT, T1, -JVM_CONSTANT_UnresolvedClassInError); ++ __ beq(AT, R0, call_ldc); ++ ++ // resolved class - need to call vm to get java mirror of the class ++ __ addi_d(AT, T1, - JVM_CONSTANT_Class); ++ __ slli_d(T2, T2, Address::times_8); ++ __ bne(AT, R0, notClass); ++ ++ __ bind(call_ldc); ++ __ li(A1, wide); ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1); ++ //__ push(atos); ++ __ addi_d(SP, SP, - Interpreter::stackElementSize); ++ __ st_d(FSR, SP, 0); ++ __ b(Done); ++ ++ __ bind(notClass); ++ __ addi_d(AT, T1, -JVM_CONSTANT_Float); ++ __ bne(AT, R0, notFloat); ++ // ftos ++ __ add_d(AT, T3, T2); ++ __ fld_s(FSF, AT, base_offset); ++ //__ push_f(); ++ __ addi_d(SP, SP, - Interpreter::stackElementSize); ++ __ fst_s(FSF, SP, 0); ++ __ b(Done); ++ ++ __ bind(notFloat); ++ __ addi_d(AT, T1, -JVM_CONSTANT_Integer); ++ __ bne(AT, R0, notInt); ++ // itos ++ __ add_d(T0, T3, T2); ++ __ ld_w(FSR, T0, base_offset); ++ __ push(itos); ++ __ b(Done); ++ ++ // assume the tag is for condy; if not, the VM runtime will tell us ++ __ bind(notInt); ++ condy_helper(Done); ++ ++ __ bind(Done); ++} ++ ++void TemplateTable::condy_helper(Label& Done) { ++ const Register obj = FSR; ++ const Register off = SSR; ++ const Register flags = T3; ++ const Register rarg = A1; ++ __ li(rarg, (int)bytecode()); ++ __ call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg); ++ __ get_vm_result_2(flags, TREG); ++ // VMr = obj = base address to find primitive value to push ++ // VMr2 = flags = (tos, off) using format of CPCE::_flags ++ __ li(AT, ConstantPoolCacheEntry::field_index_mask); ++ __ andr(off, flags, AT); ++ __ add_d(obj, off, obj); ++ const Address field(obj, 0 * wordSize); ++ ++ // What sort of thing are we loading? ++ __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ ++ switch (bytecode()) { ++ case Bytecodes::_ldc: ++ case Bytecodes::_ldc_w: ++ { ++ // tos in (itos, ftos, stos, btos, ctos, ztos) ++ Label notInt, notFloat, notShort, notByte, notChar, notBool; ++ __ addi_d(AT, flags, -itos); ++ __ bne(AT, R0, notInt); ++ // itos ++ __ ld_d(obj, field); ++ __ push(itos); ++ __ b(Done); ++ ++ __ bind(notInt); ++ __ addi_d(AT, flags, -ftos); ++ __ bne(AT, R0, notFloat); ++ // ftos ++ __ fld_s(FSF, field); ++ __ push(ftos); ++ __ b(Done); ++ ++ __ bind(notFloat); ++ __ addi_d(AT, flags, -stos); ++ __ bne(AT, R0, notShort); ++ // stos ++ __ ld_h(obj, field); ++ __ push(stos); ++ __ b(Done); ++ ++ __ bind(notShort); ++ __ addi_d(AT, flags, -btos); ++ __ bne(AT, R0, notByte); ++ // btos ++ __ ld_b(obj, field); ++ __ push(btos); ++ __ b(Done); ++ ++ __ bind(notByte); ++ __ addi_d(AT, flags, -ctos); ++ __ bne(AT, R0, notChar); ++ // ctos ++ __ ld_hu(obj, field); ++ __ push(ctos); ++ __ b(Done); ++ ++ __ bind(notChar); ++ __ addi_d(AT, flags, -ztos); ++ __ bne(AT, R0, notBool); ++ // ztos ++ __ ld_bu(obj, field); ++ __ push(ztos); ++ __ b(Done); ++ ++ __ bind(notBool); ++ break; ++ } ++ ++ case Bytecodes::_ldc2_w: ++ { ++ Label notLong, notDouble; ++ __ addi_d(AT, flags, -ltos); ++ __ bne(AT, R0, notLong); ++ // ltos ++ __ ld_d(obj, field); ++ __ push(ltos); ++ __ b(Done); ++ ++ __ bind(notLong); ++ __ addi_d(AT, flags, -dtos); ++ __ bne(AT, R0, notDouble); ++ // dtos ++ __ fld_d(FSF, field); ++ __ push(dtos); ++ __ b(Done); ++ ++ __ bind(notDouble); ++ break; ++ } ++ ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ __ stop("bad ldc/condy"); ++} ++ ++// Fast path for caching oop constants. ++void TemplateTable::fast_aldc(bool wide) { ++ transition(vtos, atos); ++ ++ Register result = FSR; ++ Register tmp = SSR; ++ Register rarg = A1; ++ int index_size = wide ? sizeof(u2) : sizeof(u1); ++ ++ Label resolved; ++ ++ // We are resolved if the resolved reference cache entry contains a ++ // non-null object (String, MethodType, etc.) ++ assert_different_registers(result, tmp); ++ __ get_cache_index_at_bcp(tmp, 1, index_size); ++ __ load_resolved_reference_at_index(result, tmp, T4); ++ __ bne(result, R0, resolved); ++ ++ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); ++ // first time invocation - must resolve first ++ int i = (int)bytecode(); ++ __ li(rarg, i); ++ __ call_VM(result, entry, rarg); ++ ++ __ bind(resolved); ++ ++ { // Check for the null sentinel. ++ // If we just called the VM, it already did the mapping for us, ++ // but it's harmless to retry. ++ Label notNull; ++ __ li(rarg, (long)Universe::the_null_sentinel_addr()); ++ __ ld_ptr(tmp, Address(rarg)); ++ __ resolve_oop_handle(tmp, T4); ++ __ bne(tmp, result, notNull); ++ __ xorr(result, result, result); // NULL object reference ++ __ bind(notNull); ++ } ++ ++ if (VerifyOops) { ++ __ verify_oop(result); ++ } ++} ++ ++// used register: T2, T3, T1 ++// T2 : index ++// T3 : cpool ++// T1 : tag ++void TemplateTable::ldc2_w() { ++ transition(vtos, vtos); ++ Label notDouble, notLong, Done; ++ ++ // get index in cpool ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); ++ ++ __ get_cpool_and_tags(T3, T1); ++ ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type in T1 ++ __ add_d(AT, T1, T2); ++ __ ld_b(T1, AT, tags_offset); ++ ++ __ addi_d(AT, T1, -JVM_CONSTANT_Double); ++ __ bne(AT, R0, notDouble); ++ ++ // dtos ++ __ alsl_d(AT, T2, T3, Address::times_8 - 1); ++ __ fld_d(FSF, AT, base_offset); ++ __ push(dtos); ++ __ b(Done); ++ ++ __ bind(notDouble); ++ __ addi_d(AT, T1, -JVM_CONSTANT_Long); ++ __ bne(AT, R0, notLong); ++ ++ // ltos ++ __ slli_d(T2, T2, Address::times_8); ++ __ add_d(AT, T3, T2); ++ __ ld_d(FSR, AT, base_offset); ++ __ push(ltos); ++ __ b(Done); ++ ++ __ bind(notLong); ++ condy_helper(Done); ++ ++ __ bind(Done); ++} ++ ++// we compute the actual local variable address here ++void TemplateTable::locals_index(Register reg, int offset) { ++ __ ld_bu(reg, at_bcp(offset)); ++ __ slli_d(reg, reg, Address::times_8); ++ __ sub_d(reg, LVP, reg); ++} ++ ++void TemplateTable::iload() { ++ iload_internal(); ++} ++ ++void TemplateTable::nofast_iload() { ++ iload_internal(may_not_rewrite); ++} ++ ++// this method will do bytecode folding of the two form: ++// iload iload iload caload ++// used register : T2, T3 ++// T2 : bytecode ++// T3 : folded code ++void TemplateTable::iload_internal(RewriteControl rc) { ++ transition(vtos, itos); ++ if (RewriteFrequentPairs && rc == may_rewrite) { ++ Label rewrite, done; ++ // get the next bytecode in T2 ++ __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); ++ // if _iload, wait to rewrite to iload2. We only want to rewrite the ++ // last two iloads in a pair. Comparing against fast_iload means that ++ // the next bytecode is neither an iload or a caload, and therefore ++ // an iload pair. ++ __ li(AT, Bytecodes::_iload); ++ __ beq(AT, T2, done); ++ ++ __ li(T3, Bytecodes::_fast_iload2); ++ __ li(AT, Bytecodes::_fast_iload); ++ __ beq(AT, T2, rewrite); ++ ++ // if _caload, rewrite to fast_icaload ++ __ li(T3, Bytecodes::_fast_icaload); ++ __ li(AT, Bytecodes::_caload); ++ __ beq(AT, T2, rewrite); ++ ++ // rewrite so iload doesn't check again. ++ __ li(T3, Bytecodes::_fast_iload); ++ ++ // rewrite ++ // T3 : fast bytecode ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_iload, T3, T2, false); ++ __ bind(done); ++ } ++ ++ // Get the local value into tos ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fast_iload2() { ++ transition(vtos, itos); ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++ __ push(itos); ++ locals_index(T2, 3); ++ __ ld_w(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fast_iload() { ++ transition(vtos, itos); ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::lload() { ++ transition(vtos, ltos); ++ locals_index(T2); ++ __ ld_d(FSR, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fload() { ++ transition(vtos, ftos); ++ locals_index(T2); ++ __ fld_s(FSF, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::dload() { ++ transition(vtos, dtos); ++ locals_index(T2); ++ __ fld_d(FSF, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::aload() { ++ transition(vtos, atos); ++ locals_index(T2); ++ __ ld_d(FSR, T2, 0); ++} ++ ++void TemplateTable::locals_index_wide(Register reg) { ++ __ get_unsigned_2_byte_index_at_bcp(reg, 2); ++ __ slli_d(reg, reg, Address::times_8); ++ __ sub_d(reg, LVP, reg); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_iload() { ++ transition(vtos, itos); ++ locals_index_wide(T2); ++ __ ld_d(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_lload() { ++ transition(vtos, ltos); ++ locals_index_wide(T2); ++ __ ld_d(FSR, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_fload() { ++ transition(vtos, ftos); ++ locals_index_wide(T2); ++ __ fld_s(FSF, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_dload() { ++ transition(vtos, dtos); ++ locals_index_wide(T2); ++ __ fld_d(FSF, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_aload() { ++ transition(vtos, atos); ++ locals_index_wide(T2); ++ __ ld_d(FSR, T2, 0); ++} ++ ++// we use A2 as the regiser for index, BE CAREFUL! ++// we dont use our tge 29 now, for later optimization ++void TemplateTable::index_check(Register array, Register index) { ++ // Pop ptr into array ++ __ pop_ptr(array); ++ index_check_without_pop(array, index); ++} ++ ++void TemplateTable::index_check_without_pop(Register array, Register index) { ++ // destroys A2 ++ // check array ++ __ null_check(array, arrayOopDesc::length_offset_in_bytes()); ++ ++ // sign extend since tos (index) might contain garbage in upper bits ++ __ slli_w(index, index, 0); ++ ++ // check index ++ Label ok; ++ __ ld_w(AT, array, arrayOopDesc::length_offset_in_bytes()); ++ __ bltu(index, AT, ok); ++ ++ //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2 ++ if (A1 != array) __ move(A1, array); ++ if (A2 != index) __ move(A2, index); ++ __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); ++ __ bind(ok); ++} ++ ++void TemplateTable::iaload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, 1); ++ __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg); ++} ++ ++void TemplateTable::laload() { ++ transition(itos, ltos); ++ index_check(SSR, FSR); ++ __ alsl_d(T4, FSR, SSR, Address::times_8 - 1); ++ __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T4, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg); ++} ++ ++void TemplateTable::faload() { ++ transition(itos, ftos); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, Address::times_4 - 1); ++ __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg); ++} ++ ++void TemplateTable::daload() { ++ transition(itos, dtos); ++ index_check(SSR, FSR); ++ __ alsl_d(T4, FSR, SSR, 2); ++ __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T4, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg); ++} ++ ++void TemplateTable::aaload() { ++ transition(itos, atos); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, (UseCompressedOops ? Address::times_4 : Address::times_8) - 1); ++ //add for compressedoops ++ do_oop_load(_masm, ++ Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), ++ FSR, ++ IS_ARRAY); ++} ++ ++void TemplateTable::baload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ add_d(FSR, SSR, FSR); ++ __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg); ++} ++ ++void TemplateTable::caload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); ++} ++ ++// iload followed by caload frequent pair ++// used register : T2 ++// T2 : index ++void TemplateTable::fast_icaload() { ++ transition(vtos, itos); ++ // load index out of locals ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, 0); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); ++} ++ ++void TemplateTable::saload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1); ++ __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg); ++} ++ ++void TemplateTable::iload(int n) { ++ transition(vtos, itos); ++ __ ld_w(FSR, iaddress(n)); ++} ++ ++void TemplateTable::lload(int n) { ++ transition(vtos, ltos); ++ __ ld_d(FSR, laddress(n)); ++} ++ ++void TemplateTable::fload(int n) { ++ transition(vtos, ftos); ++ __ fld_s(FSF, faddress(n)); ++} ++ ++void TemplateTable::dload(int n) { ++ transition(vtos, dtos); ++ __ fld_d(FSF, laddress(n)); ++} ++ ++void TemplateTable::aload(int n) { ++ transition(vtos, atos); ++ __ ld_d(FSR, aaddress(n)); ++} ++ ++void TemplateTable::aload_0() { ++ aload_0_internal(); ++} ++ ++void TemplateTable::nofast_aload_0() { ++ aload_0_internal(may_not_rewrite); ++} ++ ++// used register : T2, T3 ++// T2 : bytecode ++// T3 : folded code ++void TemplateTable::aload_0_internal(RewriteControl rc) { ++ transition(vtos, atos); ++ // According to bytecode histograms, the pairs: ++ // ++ // _aload_0, _fast_igetfield ++ // _aload_0, _fast_agetfield ++ // _aload_0, _fast_fgetfield ++ // ++ // occur frequently. If RewriteFrequentPairs is set, the (slow) ++ // _aload_0 bytecode checks if the next bytecode is either ++ // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then ++ // rewrites the current bytecode into a pair bytecode; otherwise it ++ // rewrites the current bytecode into _fast_aload_0 that doesn't do ++ // the pair check anymore. ++ // ++ // Note: If the next bytecode is _getfield, the rewrite must be ++ // delayed, otherwise we may miss an opportunity for a pair. ++ // ++ // Also rewrite frequent pairs ++ // aload_0, aload_1 ++ // aload_0, iload_1 ++ // These bytecodes with a small amount of code are most profitable ++ // to rewrite ++ if (RewriteFrequentPairs && rc == may_rewrite) { ++ Label rewrite, done; ++ // get the next bytecode in T2 ++ __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); ++ ++ // do actual aload_0 ++ aload(0); ++ ++ // if _getfield then wait with rewrite ++ __ li(AT, Bytecodes::_getfield); ++ __ beq(AT, T2, done); ++ ++ // if _igetfield then reqrite to _fast_iaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ li(T3, Bytecodes::_fast_iaccess_0); ++ __ li(AT, Bytecodes::_fast_igetfield); ++ __ beq(AT, T2, rewrite); ++ ++ // if _agetfield then reqrite to _fast_aaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ li(T3, Bytecodes::_fast_aaccess_0); ++ __ li(AT, Bytecodes::_fast_agetfield); ++ __ beq(AT, T2, rewrite); ++ ++ // if _fgetfield then reqrite to _fast_faccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ li(T3, Bytecodes::_fast_faccess_0); ++ __ li(AT, Bytecodes::_fast_fgetfield); ++ __ beq(AT, T2, rewrite); ++ ++ // else rewrite to _fast_aload0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ li(T3, Bytecodes::_fast_aload_0); ++ ++ // rewrite ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_aload_0, T3, T2, false); ++ ++ __ bind(done); ++ } else { ++ aload(0); ++ } ++} ++ ++void TemplateTable::istore() { ++ transition(itos, vtos); ++ locals_index(T2); ++ __ st_w(FSR, T2, 0); ++} ++ ++void TemplateTable::lstore() { ++ transition(ltos, vtos); ++ locals_index(T2); ++ __ st_d(FSR, T2, -wordSize); ++} ++ ++void TemplateTable::fstore() { ++ transition(ftos, vtos); ++ locals_index(T2); ++ __ fst_s(FSF, T2, 0); ++} ++ ++void TemplateTable::dstore() { ++ transition(dtos, vtos); ++ locals_index(T2); ++ __ fst_d(FSF, T2, -wordSize); ++} ++ ++void TemplateTable::astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ locals_index(T2); ++ __ st_d(FSR, T2, 0); ++} ++ ++void TemplateTable::wide_istore() { ++ transition(vtos, vtos); ++ __ pop_i(FSR); ++ locals_index_wide(T2); ++ __ st_d(FSR, T2, 0); ++} ++ ++void TemplateTable::wide_lstore() { ++ transition(vtos, vtos); ++ __ pop_l(FSR); ++ locals_index_wide(T2); ++ __ st_d(FSR, T2, -wordSize); ++} ++ ++void TemplateTable::wide_fstore() { ++ wide_istore(); ++} ++ ++void TemplateTable::wide_dstore() { ++ wide_lstore(); ++} ++ ++void TemplateTable::wide_astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ locals_index_wide(T2); ++ __ st_d(FSR, T2, 0); ++} ++ ++// used register : T2 ++void TemplateTable::iastore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); // T2: array SSR: index ++ index_check(T2, SSR); // prefer index in SSR ++ __ alsl_d(T2, SSR, T2, Address::times_4 - 1); ++ __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg); ++} ++ ++// used register T2, T3 ++void TemplateTable::lastore() { ++ transition(ltos, vtos); ++ __ pop_i (T2); ++ index_check(T3, T2); ++ __ alsl_d(T3, T2, T3, Address::times_8 - 1); ++ __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg); ++} ++ ++// used register T2 ++void TemplateTable::fastore() { ++ transition(ftos, vtos); ++ __ pop_i(SSR); ++ index_check(T2, SSR); ++ __ alsl_d(T2, SSR, T2, Address::times_4 - 1); ++ __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg); ++} ++ ++// used register T2, T3 ++void TemplateTable::dastore() { ++ transition(dtos, vtos); ++ __ pop_i (T2); ++ index_check(T3, T2); ++ __ alsl_d(T3, T2, T3, Address::times_8 - 1); ++ __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg); ++} ++ ++// used register : T2, T3, T8 ++// T2 : array ++// T3 : subklass ++// T8 : supklass ++void TemplateTable::aastore() { ++ Label is_null, ok_is_subtype, done; ++ transition(vtos, vtos); ++ // stack: ..., array, index, value ++ __ ld_d(FSR, at_tos()); // Value ++ __ ld_w(SSR, at_tos_p1()); // Index ++ __ ld_d(T2, at_tos_p2()); // Array ++ ++ // index_check(T2, SSR); ++ index_check_without_pop(T2, SSR); ++ // do array store check - check for NULL value first ++ __ beq(FSR, R0, is_null); ++ ++ // Move subklass into T3 ++ //add for compressedoops ++ __ load_klass(T3, FSR); ++ // Move superklass into T8 ++ //add for compressedoops ++ __ load_klass(T8, T2); ++ __ ld_d(T8, Address(T8, ObjArrayKlass::element_klass_offset())); ++ // Compress array+index*4+12 into a single register. T2 ++ __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1); ++ __ addi_d(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ ++ // Generate subtype check. ++ // Superklass in T8. Subklass in T3. ++ __ gen_subtype_check(T8, T3, ok_is_subtype); ++ // Come here on failure ++ // object is at FSR ++ __ jmp(Interpreter::_throw_ArrayStoreException_entry); ++ // Come here on success ++ __ bind(ok_is_subtype); ++ do_oop_store(_masm, Address(T2, 0), FSR, IS_ARRAY); ++ __ b(done); ++ ++ // Have a NULL in FSR, T2=array, SSR=index. Store NULL at ary[idx] ++ __ bind(is_null); ++ __ profile_null_seen(T4); ++ __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1); ++ do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, IS_ARRAY); ++ ++ __ bind(done); ++ __ addi_d(SP, SP, 3 * Interpreter::stackElementSize); ++} ++ ++void TemplateTable::bastore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); ++ index_check(T2, SSR); ++ ++ // Need to check whether array is boolean or byte ++ // since both types share the bastore bytecode. ++ __ load_klass(T4, T2); ++ __ ld_w(T4, T4, in_bytes(Klass::layout_helper_offset())); ++ ++ int diffbit = Klass::layout_helper_boolean_diffbit(); ++ __ li(AT, diffbit); ++ ++ Label L_skip; ++ __ andr(AT, T4, AT); ++ __ beq(AT, R0, L_skip); ++ __ andi(FSR, FSR, 0x1); ++ __ bind(L_skip); ++ ++ __ add_d(SSR, T2, SSR); ++ __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg); ++} ++ ++void TemplateTable::castore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); ++ index_check(T2, SSR); ++ __ alsl_d(SSR, SSR, T2, Address::times_2 - 1); ++ __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg); ++} ++ ++void TemplateTable::sastore() { ++ castore(); ++} ++ ++void TemplateTable::istore(int n) { ++ transition(itos, vtos); ++ __ st_w(FSR, iaddress(n)); ++} ++ ++void TemplateTable::lstore(int n) { ++ transition(ltos, vtos); ++ __ st_d(FSR, laddress(n)); ++} ++ ++void TemplateTable::fstore(int n) { ++ transition(ftos, vtos); ++ __ fst_s(FSF, faddress(n)); ++} ++ ++void TemplateTable::dstore(int n) { ++ transition(dtos, vtos); ++ __ fst_d(FSF, laddress(n)); ++} ++ ++void TemplateTable::astore(int n) { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ __ st_d(FSR, aaddress(n)); ++} ++ ++void TemplateTable::pop() { ++ transition(vtos, vtos); ++ __ addi_d(SP, SP, Interpreter::stackElementSize); ++} ++ ++void TemplateTable::pop2() { ++ transition(vtos, vtos); ++ __ addi_d(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void TemplateTable::dup() { ++ transition(vtos, vtos); ++ // stack: ..., a ++ __ load_ptr(0, FSR); ++ __ push_ptr(FSR); ++ // stack: ..., a, a ++} ++ ++// blows FSR ++void TemplateTable::dup_x1() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ load_ptr(0, FSR); // load b ++ __ load_ptr(1, A5); // load a ++ __ store_ptr(1, FSR); // store b ++ __ store_ptr(0, A5); // store a ++ __ push_ptr(FSR); // push b ++ // stack: ..., b, a, b ++} ++ ++// blows FSR ++void TemplateTable::dup_x2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ load_ptr(0, FSR); // load c ++ __ load_ptr(2, A5); // load a ++ __ store_ptr(2, FSR); // store c in a ++ __ push_ptr(FSR); // push c ++ // stack: ..., c, b, c, c ++ __ load_ptr(2, FSR); // load b ++ __ store_ptr(2, A5); // store a in b ++ // stack: ..., c, a, c, c ++ __ store_ptr(1, FSR); // store b in c ++ // stack: ..., c, a, b, c ++} ++ ++// blows FSR ++void TemplateTable::dup2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ load_ptr(1, FSR); // load a ++ __ push_ptr(FSR); // push a ++ __ load_ptr(1, FSR); // load b ++ __ push_ptr(FSR); // push b ++ // stack: ..., a, b, a, b ++} ++ ++// blows FSR ++void TemplateTable::dup2_x1() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ load_ptr(0, T2); // load c ++ __ load_ptr(1, FSR); // load b ++ __ push_ptr(FSR); // push b ++ __ push_ptr(T2); // push c ++ // stack: ..., a, b, c, b, c ++ __ store_ptr(3, T2); // store c in b ++ // stack: ..., a, c, c, b, c ++ __ load_ptr(4, T2); // load a ++ __ store_ptr(2, T2); // store a in 2nd c ++ // stack: ..., a, c, a, b, c ++ __ store_ptr(4, FSR); // store b in a ++ // stack: ..., b, c, a, b, c ++ ++ // stack: ..., b, c, a, b, c ++} ++ ++// blows FSR, SSR ++void TemplateTable::dup2_x2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c, d ++ // stack: ..., a, b, c, d ++ __ load_ptr(0, T2); // load d ++ __ load_ptr(1, FSR); // load c ++ __ push_ptr(FSR); // push c ++ __ push_ptr(T2); // push d ++ // stack: ..., a, b, c, d, c, d ++ __ load_ptr(4, FSR); // load b ++ __ store_ptr(2, FSR); // store b in d ++ __ store_ptr(4, T2); // store d in b ++ // stack: ..., a, d, c, b, c, d ++ __ load_ptr(5, T2); // load a ++ __ load_ptr(3, FSR); // load c ++ __ store_ptr(3, T2); // store a in c ++ __ store_ptr(5, FSR); // store c in a ++ // stack: ..., c, d, a, b, c, d ++ ++ // stack: ..., c, d, a, b, c, d ++} ++ ++// blows FSR ++void TemplateTable::swap() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ ++ __ load_ptr(1, A5); // load a ++ __ load_ptr(0, FSR); // load b ++ __ store_ptr(0, A5); // store a in b ++ __ store_ptr(1, FSR); // store b in a ++ ++ // stack: ..., b, a ++} ++ ++void TemplateTable::iop2(Operation op) { ++ transition(itos, itos); ++ ++ __ pop_i(SSR); ++ switch (op) { ++ case add : __ add_w(FSR, SSR, FSR); break; ++ case sub : __ sub_w(FSR, SSR, FSR); break; ++ case mul : __ mul_w(FSR, SSR, FSR); break; ++ case _and : __ andr(FSR, SSR, FSR); break; ++ case _or : __ orr(FSR, SSR, FSR); break; ++ case _xor : __ xorr(FSR, SSR, FSR); break; ++ case shl : __ sll_w(FSR, SSR, FSR); break; ++ case shr : __ sra_w(FSR, SSR, FSR); break; ++ case ushr : __ srl_w(FSR, SSR, FSR); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++// the result stored in FSR, SSR, ++// used registers : T2, T3 ++void TemplateTable::lop2(Operation op) { ++ transition(ltos, ltos); ++ __ pop_l(T2); ++ ++ switch (op) { ++ case add : __ add_d(FSR, T2, FSR); break; ++ case sub : __ sub_d(FSR, T2, FSR); break; ++ case _and: __ andr(FSR, T2, FSR); break; ++ case _or : __ orr(FSR, T2, FSR); break; ++ case _xor: __ xorr(FSR, T2, FSR); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception, ++// the result is 0x80000000 ++// the godson2 cpu do the same, so we need not handle this specially like x86 ++void TemplateTable::idiv() { ++ transition(itos, itos); ++ Label not_zero; ++ ++ __ bne(FSR, R0, not_zero); ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ bind(not_zero); ++ ++ __ pop_i(SSR); ++ __ div_w(FSR, SSR, FSR); ++} ++ ++void TemplateTable::irem() { ++ transition(itos, itos); ++ Label not_zero; ++ __ pop_i(SSR); ++ ++ __ bne(FSR, R0, not_zero); ++ //__ brk(7); ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ ++ __ bind(not_zero); ++ __ mod_w(FSR, SSR, FSR); ++} ++ ++void TemplateTable::lmul() { ++ transition(ltos, ltos); ++ __ pop_l(T2); ++ __ mul_d(FSR, T2, FSR); ++} ++ ++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry ++void TemplateTable::ldiv() { ++ transition(ltos, ltos); ++ Label normal; ++ ++ __ bne(FSR, R0, normal); ++ ++ //__ brk(7); //generate FPE ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ ++ __ bind(normal); ++ __ pop_l(A2); ++ __ div_d(FSR, A2, FSR); ++} ++ ++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry ++void TemplateTable::lrem() { ++ transition(ltos, ltos); ++ Label normal; ++ ++ __ bne(FSR, R0, normal); ++ ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ ++ __ bind(normal); ++ __ pop_l (A2); ++ ++ __ mod_d(FSR, A2, FSR); ++} ++ ++// result in FSR ++// used registers : T0 ++void TemplateTable::lshl() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ sll_d(FSR, T0, FSR); ++} ++ ++// used registers : T0 ++void TemplateTable::lshr() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ sra_d(FSR, T0, FSR); ++} ++ ++// used registers : T0 ++void TemplateTable::lushr() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ srl_d(FSR, T0, FSR); ++} ++ ++// result in FSF ++void TemplateTable::fop2(Operation op) { ++ transition(ftos, ftos); ++ switch (op) { ++ case add: ++ __ fld_s(fscratch, at_sp()); ++ __ fadd_s(FSF, fscratch, FSF); ++ break; ++ case sub: ++ __ fld_s(fscratch, at_sp()); ++ __ fsub_s(FSF, fscratch, FSF); ++ break; ++ case mul: ++ __ fld_s(fscratch, at_sp()); ++ __ fmul_s(FSF, fscratch, FSF); ++ break; ++ case div: ++ __ fld_s(fscratch, at_sp()); ++ __ fdiv_s(FSF, fscratch, FSF); ++ break; ++ case rem: ++ __ fmov_s(FA1, FSF); ++ __ fld_s(FA0, at_sp()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); ++ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ __ addi_d(SP, SP, 1 * wordSize); ++} ++ ++// result in SSF||FSF ++// i dont handle the strict flags ++void TemplateTable::dop2(Operation op) { ++ transition(dtos, dtos); ++ switch (op) { ++ case add: ++ __ fld_d(fscratch, at_sp()); ++ __ fadd_d(FSF, fscratch, FSF); ++ break; ++ case sub: ++ __ fld_d(fscratch, at_sp()); ++ __ fsub_d(FSF, fscratch, FSF); ++ break; ++ case mul: ++ __ fld_d(fscratch, at_sp()); ++ __ fmul_d(FSF, fscratch, FSF); ++ break; ++ case div: ++ __ fld_d(fscratch, at_sp()); ++ __ fdiv_d(FSF, fscratch, FSF); ++ break; ++ case rem: ++ __ fmov_d(FA1, FSF); ++ __ fld_d(FA0, at_sp()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); ++ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ __ addi_d(SP, SP, 2 * wordSize); ++} ++ ++void TemplateTable::ineg() { ++ transition(itos, itos); ++ __ sub_w(FSR, R0, FSR); ++} ++ ++void TemplateTable::lneg() { ++ transition(ltos, ltos); ++ __ sub_d(FSR, R0, FSR); ++} ++ ++void TemplateTable::fneg() { ++ transition(ftos, ftos); ++ __ fneg_s(FSF, FSF); ++} ++ ++void TemplateTable::dneg() { ++ transition(dtos, dtos); ++ __ fneg_d(FSF, FSF); ++} ++ ++// used registers : T2 ++void TemplateTable::iinc() { ++ transition(vtos, vtos); ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++ __ ld_b(AT, at_bcp(2)); // get constant ++ __ add_d(FSR, FSR, AT); ++ __ st_w(FSR, T2, 0); ++} ++ ++// used register : T2 ++void TemplateTable::wide_iinc() { ++ transition(vtos, vtos); ++ locals_index_wide(T2); ++ __ get_2_byte_integer_at_bcp(FSR, AT, 4); ++ __ bswap_h(FSR, FSR); ++ __ ld_w(AT, T2, 0); ++ __ add_d(FSR, AT, FSR); ++ __ st_w(FSR, T2, 0); ++} ++ ++void TemplateTable::convert() { ++ // Checking ++#ifdef ASSERT ++ { ++ TosState tos_in = ilgl; ++ TosState tos_out = ilgl; ++ switch (bytecode()) { ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_in = itos; break; ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_l2d: tos_in = ltos; break; ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_f2d: tos_in = ftos; break; ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_d2l: // fall through ++ case Bytecodes::_d2f: tos_in = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ switch (bytecode()) { ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_out = itos; break; ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_d2l: tos_out = ltos; break; ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_d2f: tos_out = ftos; break; ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_l2d: // fall through ++ case Bytecodes::_f2d: tos_out = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ transition(tos_in, tos_out); ++ } ++#endif // ASSERT ++ // Conversion ++ switch (bytecode()) { ++ case Bytecodes::_i2l: ++ __ slli_w(FSR, FSR, 0); ++ break; ++ case Bytecodes::_i2f: ++ __ movgr2fr_w(FSF, FSR); ++ __ ffint_s_w(FSF, FSF); ++ break; ++ case Bytecodes::_i2d: ++ __ movgr2fr_w(FSF, FSR); ++ __ ffint_d_w(FSF, FSF); ++ break; ++ case Bytecodes::_i2b: ++ __ ext_w_b(FSR, FSR); ++ break; ++ case Bytecodes::_i2c: ++ __ bstrpick_d(FSR, FSR, 15, 0); // truncate upper 56 bits ++ break; ++ case Bytecodes::_i2s: ++ __ ext_w_h(FSR, FSR); ++ break; ++ case Bytecodes::_l2i: ++ __ slli_w(FSR, FSR, 0); ++ break; ++ case Bytecodes::_l2f: ++ __ movgr2fr_d(FSF, FSR); ++ __ ffint_s_l(FSF, FSF); ++ break; ++ case Bytecodes::_l2d: ++ __ movgr2fr_d(FSF, FSR); ++ __ ffint_d_l(FSF, FSF); ++ break; ++ case Bytecodes::_f2i: ++ __ ftintrz_w_s(fscratch, FSF); ++ __ movfr2gr_s(FSR, fscratch); ++ break; ++ case Bytecodes::_f2l: ++ __ ftintrz_l_s(fscratch, FSF); ++ __ movfr2gr_d(FSR, fscratch); ++ break; ++ case Bytecodes::_f2d: ++ __ fcvt_d_s(FSF, FSF); ++ break; ++ case Bytecodes::_d2i: ++ __ ftintrz_w_d(fscratch, FSF); ++ __ movfr2gr_s(FSR, fscratch); ++ break; ++ case Bytecodes::_d2l: ++ __ ftintrz_l_d(fscratch, FSF); ++ __ movfr2gr_d(FSR, fscratch); ++ break; ++ case Bytecodes::_d2f: ++ __ fcvt_s_d(FSF, FSF); ++ break; ++ default : ++ ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::lcmp() { ++ transition(ltos, itos); ++ ++ __ pop(T0); ++ __ pop(R0); ++ ++ __ slt(AT, T0, FSR); ++ __ slt(FSR, FSR, T0); ++ __ sub_d(FSR, FSR, AT); ++} ++ ++void TemplateTable::float_cmp(bool is_float, int unordered_result) { ++ if (is_float) { ++ __ fld_s(fscratch, at_sp()); ++ __ addi_d(SP, SP, 1 * wordSize); ++ ++ if (unordered_result < 0) { ++ __ fcmp_clt_s(FCC0, FSF, fscratch); ++ __ fcmp_cult_s(FCC1, fscratch, FSF); ++ } else { ++ __ fcmp_cult_s(FCC0, FSF, fscratch); ++ __ fcmp_clt_s(FCC1, fscratch, FSF); ++ } ++ } else { ++ __ fld_d(fscratch, at_sp()); ++ __ addi_d(SP, SP, 2 * wordSize); ++ ++ if (unordered_result < 0) { ++ __ fcmp_clt_d(FCC0, FSF, fscratch); ++ __ fcmp_cult_d(FCC1, fscratch, FSF); ++ } else { ++ __ fcmp_cult_d(FCC0, FSF, fscratch); ++ __ fcmp_clt_d(FCC1, fscratch, FSF); ++ } ++ } ++ ++ __ movcf2gr(FSR, FCC0); ++ __ movcf2gr(AT, FCC1); ++ __ sub_d(FSR, FSR, AT); ++} ++ ++// used registers : T3, A7, Rnext ++// FSR : return bci, this is defined by the vm specification ++// T2 : MDO taken count ++// T3 : method ++// A7 : offset ++// Rnext : next bytecode, this is required by dispatch_base ++void TemplateTable::branch(bool is_jsr, bool is_wide) { ++ __ get_method(T3); ++ __ profile_taken_branch(A7, T2); // only C2 meaningful ++ ++ const ByteSize be_offset = MethodCounters::backedge_counter_offset() + ++ InvocationCounter::counter_offset(); ++ const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset(); ++ ++ // Load up T4 with the branch displacement ++ if (!is_wide) { ++ __ ld_b(A7, BCP, 1); ++ __ ld_bu(AT, BCP, 2); ++ __ slli_d(A7, A7, 8); ++ __ orr(A7, A7, AT); ++ } else { ++ __ get_4_byte_integer_at_bcp(A7, 1); ++ __ bswap_w(A7, A7); ++ } ++ ++ // Handle all the JSR stuff here, then exit. ++ // It's much shorter and cleaner than intermingling with the non-JSR ++ // normal-branch stuff occuring below. ++ if (is_jsr) { ++ // Pre-load the next target bytecode into Rnext ++ __ ldx_bu(Rnext, BCP, A7); ++ ++ // compute return address as bci in FSR ++ __ addi_d(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset())); ++ __ ld_d(AT, T3, in_bytes(Method::const_offset())); ++ __ sub_d(FSR, FSR, AT); ++ // Adjust the bcp in BCP by the displacement in A7 ++ __ add_d(BCP, BCP, A7); ++ // jsr returns atos that is not an oop ++ // Push return address ++ __ push_i(FSR); ++ // jsr returns vtos ++ __ dispatch_only_noverify(vtos); ++ ++ return; ++ } ++ ++ // Normal (non-jsr) branch handling ++ ++ // Adjust the bcp in S0 by the displacement in T4 ++ __ add_d(BCP, BCP, A7); ++ ++ assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters"); ++ Label backedge_counter_overflow; ++ Label profile_method; ++ Label dispatch; ++ if (UseLoopCounter) { ++ // increment backedge counter for backward branches ++ // T3: method ++ // T4: target offset ++ // BCP: target bcp ++ // LVP: locals pointer ++ __ blt(R0, A7, dispatch); // check if forward or backward branch ++ ++ // check if MethodCounters exists ++ Label has_counters; ++ __ ld_d(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP ++ __ bne(AT, R0, has_counters); ++ __ push2(T3, A7); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), ++ T3); ++ __ pop2(T3, A7); ++ __ ld_d(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP ++ __ beq(AT, R0, dispatch); ++ __ bind(has_counters); ++ ++ Label no_mdo; ++ int increment = InvocationCounter::count_increment; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld_d(T0, Address(T3, in_bytes(Method::method_data_offset()))); ++ __ beq(T0, R0, no_mdo); ++ // Increment the MDO backedge counter ++ const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ const Address mask(T0, in_bytes(MethodData::backedge_mask_offset())); ++ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, ++ T1, false, Assembler::zero, ++ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); ++ __ beq(R0, R0, dispatch); ++ } ++ __ bind(no_mdo); ++ // Increment backedge counter in MethodCounters* ++ __ ld_d(T0, Address(T3, Method::method_counters_offset())); ++ const Address mask(T0, in_bytes(MethodCounters::backedge_mask_offset())); ++ __ increment_mask_and_jump(Address(T0, be_offset), increment, mask, ++ T1, false, Assembler::zero, ++ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); ++ __ bind(dispatch); ++ } ++ ++ // Pre-load the next target bytecode into Rnext ++ __ ld_bu(Rnext, BCP, 0); ++ ++ // continue with the bytecode @ target ++ // FSR: return bci for jsr's, unused otherwise ++ // Rnext: target bytecode ++ // BCP: target bcp ++ __ dispatch_only(vtos, true); ++ ++ if (UseLoopCounter && UseOnStackReplacement) { ++ // invocation counter overflow ++ __ bind(backedge_counter_overflow); ++ __ sub_d(A7, BCP, A7); // branch bcp ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), A7); ++ ++ // V0: osr nmethod (osr ok) or NULL (osr not possible) ++ // V1: osr adapter frame return address ++ // LVP: locals pointer ++ // BCP: bcp ++ __ beq(V0, R0, dispatch); ++ // nmethod may have been invalidated (VM may block upon call_VM return) ++ __ ld_b(T3, V0, nmethod::state_offset()); ++ __ li(AT, nmethod::in_use); ++ __ bne(AT, T3, dispatch); ++ ++ // We have the address of an on stack replacement routine in rax. ++ // In preparation of invoking it, first we must migrate the locals ++ // and monitors from off the interpreter frame on the stack. ++ // Ensure to save the osr nmethod over the migration call, ++ // it will be preserved in Rnext. ++ __ move(Rnext, V0); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); ++ ++ // V0 is OSR buffer, move it to expected parameter location ++ // refer to osrBufferPointer in c1_LIRAssembler_loongarch.cpp ++ __ move(T0, V0); ++ ++ // pop the interpreter frame ++ __ ld_d(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); ++ // remove frame anchor ++ __ leave(); ++ __ move(LVP, RA); ++ __ move(SP, A7); ++ ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ bstrins_d(SP, R0, 3, 0); ++ ++ // push the (possibly adjusted) return address ++ // refer to osr_entry in c1_LIRAssembler_loongarch.cpp ++ __ ld_d(AT, Rnext, nmethod::osr_entry_point_offset()); ++ __ jr(AT); ++ } ++} ++ ++void TemplateTable::if_0cmp(Condition cc) { ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ switch(cc) { ++ case not_equal: ++ __ beq(FSR, R0, not_taken); ++ break; ++ case equal: ++ __ bne(FSR, R0, not_taken); ++ break; ++ case less: ++ __ bge(FSR, R0, not_taken); ++ break; ++ case less_equal: ++ __ blt(R0, FSR, not_taken); ++ break; ++ case greater: ++ __ bge(R0, FSR, not_taken); ++ break; ++ case greater_equal: ++ __ blt(FSR, R0, not_taken); ++ break; ++ } ++ ++ branch(false, false); ++ ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++void TemplateTable::if_icmp(Condition cc) { ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ ++ __ pop_i(SSR); ++ switch(cc) { ++ case not_equal: ++ __ beq(SSR, FSR, not_taken); ++ break; ++ case equal: ++ __ bne(SSR, FSR, not_taken); ++ break; ++ case less: ++ __ bge(SSR, FSR, not_taken); ++ break; ++ case less_equal: ++ __ blt(FSR, SSR, not_taken); ++ break; ++ case greater: ++ __ bge(FSR, SSR, not_taken); ++ break; ++ case greater_equal: ++ __ blt(SSR, FSR, not_taken); ++ break; ++ } ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++void TemplateTable::if_nullcmp(Condition cc) { ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ switch(cc) { ++ case not_equal: ++ __ beq(FSR, R0, not_taken); ++ break; ++ case equal: ++ __ bne(FSR, R0, not_taken); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++ ++void TemplateTable::if_acmp(Condition cc) { ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ // __ ld_w(SSR, SP, 0); ++ __ pop_ptr(SSR); ++ switch(cc) { ++ case not_equal: ++ __ beq(SSR, FSR, not_taken); ++ break; ++ case equal: ++ __ bne(SSR, FSR, not_taken); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ branch(false, false); ++ ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : method ++// T2 : returb bci ++void TemplateTable::ret() { ++ transition(vtos, vtos); ++ ++ locals_index(T2); ++ __ ld_d(T2, T2, 0); ++ __ profile_ret(T2, T3); ++ ++ __ get_method(T1); ++ __ ld_d(BCP, T1, in_bytes(Method::const_offset())); ++ __ add_d(BCP, BCP, T2); ++ __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); ++ ++ __ dispatch_next(vtos, 0, true); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : method ++// T2 : returb bci ++void TemplateTable::wide_ret() { ++ transition(vtos, vtos); ++ ++ locals_index_wide(T2); ++ __ ld_d(T2, T2, 0); // get return bci, compute return bcp ++ __ profile_ret(T2, T3); ++ ++ __ get_method(T1); ++ __ ld_d(BCP, T1, in_bytes(Method::const_offset())); ++ __ add_d(BCP, BCP, T2); ++ __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); ++ ++ __ dispatch_next(vtos, 0, true); ++} ++ ++// used register T2, T3, A7, Rnext ++// T2 : bytecode pointer ++// T3 : low ++// A7 : high ++// Rnext : dest bytecode, required by dispatch_base ++void TemplateTable::tableswitch() { ++ Label default_case, continue_execution; ++ transition(itos, vtos); ++ ++ // align BCP ++ __ addi_d(T2, BCP, BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(T2, T2, AT); ++ ++ // load lo & hi ++ __ ld_w(T3, T2, 1 * BytesPerInt); ++ __ bswap_w(T3, T3); ++ __ ld_w(A7, T2, 2 * BytesPerInt); ++ __ bswap_w(A7, A7); ++ ++ // check against lo & hi ++ __ blt(FSR, T3, default_case); ++ __ blt(A7, FSR, default_case); ++ ++ // lookup dispatch offset, in A7 big endian ++ __ sub_d(FSR, FSR, T3); ++ __ alsl_d(AT, FSR, T2, Address::times_4 - 1); ++ __ ld_w(A7, AT, 3 * BytesPerInt); ++ __ profile_switch_case(FSR, T4, T3); ++ ++ __ bind(continue_execution); ++ __ bswap_w(A7, A7); ++ __ add_d(BCP, BCP, A7); ++ __ ld_bu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++ ++ // handle default ++ __ bind(default_case); ++ __ profile_switch_default(FSR); ++ __ ld_w(A7, T2, 0); ++ __ b(continue_execution); ++} ++ ++void TemplateTable::lookupswitch() { ++ transition(itos, itos); ++ __ stop("lookupswitch bytecode should have been rewritten"); ++} ++ ++// used registers : T2, T3, A7, Rnext ++// T2 : bytecode pointer ++// T3 : pair index ++// A7 : offset ++// Rnext : dest bytecode ++// the data after the opcode is the same as lookupswitch ++// see Rewriter::rewrite_method for more information ++void TemplateTable::fast_linearswitch() { ++ transition(itos, vtos); ++ Label loop_entry, loop, found, continue_execution; ++ ++ // swap FSR so we can avoid swapping the table entries ++ __ bswap_w(FSR, FSR); ++ ++ // align BCP ++ __ addi_d(T2, BCP, BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(T2, T2, AT); ++ ++ // set counter ++ __ ld_w(T3, T2, BytesPerInt); ++ __ bswap_w(T3, T3); ++ __ b(loop_entry); ++ ++ // table search ++ __ bind(loop); ++ // get the entry value ++ __ alsl_d(AT, T3, T2, Address::times_8 - 1); ++ __ ld_w(AT, AT, 2 * BytesPerInt); ++ ++ // found? ++ __ beq(FSR, AT, found); ++ ++ __ bind(loop_entry); ++ Label L1; ++ __ bge(R0, T3, L1); ++ __ addi_d(T3, T3, -1); ++ __ b(loop); ++ __ bind(L1); ++ __ addi_d(T3, T3, -1); ++ ++ // default case ++ __ profile_switch_default(FSR); ++ __ ld_w(A7, T2, 0); ++ __ b(continue_execution); ++ ++ // entry found -> get offset ++ __ bind(found); ++ __ alsl_d(AT, T3, T2, Address::times_8 - 1); ++ __ ld_w(A7, AT, 3 * BytesPerInt); ++ __ profile_switch_case(T3, FSR, T2); ++ ++ // continue execution ++ __ bind(continue_execution); ++ __ bswap_w(A7, A7); ++ __ add_d(BCP, BCP, A7); ++ __ ld_bu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++} ++ ++// used registers : T0, T1, T2, T3, A7, Rnext ++// T2 : pairs address(array) ++// Rnext : dest bytecode ++// the data after the opcode is the same as lookupswitch ++// see Rewriter::rewrite_method for more information ++void TemplateTable::fast_binaryswitch() { ++ transition(itos, vtos); ++ // Implementation using the following core algorithm: ++ // ++ // int binary_search(int key, LookupswitchPair* array, int n) { ++ // // Binary search according to "Methodik des Programmierens" by ++ // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. ++ // int i = 0; ++ // int j = n; ++ // while (i+1 < j) { ++ // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) ++ // // with Q: for all i: 0 <= i < n: key < a[i] ++ // // where a stands for the array and assuming that the (inexisting) ++ // // element a[n] is infinitely big. ++ // int h = (i + j) >> 1; ++ // // i < h < j ++ // if (key < array[h].fast_match()) { ++ // j = h; ++ // } else { ++ // i = h; ++ // } ++ // } ++ // // R: a[i] <= key < a[i+1] or Q ++ // // (i.e., if key is within array, i is the correct index) ++ // return i; ++ // } ++ ++ // register allocation ++ const Register array = T2; ++ const Register i = T3, j = A7; ++ const Register h = T1; ++ const Register temp = T0; ++ const Register key = FSR; ++ ++ // setup array ++ __ addi_d(array, BCP, 3*BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(array, array, AT); ++ ++ // initialize i & j ++ __ move(i, R0); ++ __ ld_w(j, array, - 1 * BytesPerInt); ++ // Convert j into native byteordering ++ __ bswap_w(j, j); ++ ++ // and start ++ Label entry; ++ __ b(entry); ++ ++ // binary search loop ++ { ++ Label loop; ++ __ bind(loop); ++ // int h = (i + j) >> 1; ++ __ add_d(h, i, j); ++ __ srli_d(h, h, 1); ++ // if (key < array[h].fast_match()) { ++ // j = h; ++ // } else { ++ // i = h; ++ // } ++ // Convert array[h].match to native byte-ordering before compare ++ __ alsl_d(AT, h, array, Address::times_8 - 1); ++ __ ld_w(temp, AT, 0 * BytesPerInt); ++ __ bswap_w(temp, temp); ++ ++ __ slt(AT, key, temp); ++ __ maskeqz(i, i, AT); ++ __ masknez(temp, h, AT); ++ __ OR(i, i, temp); ++ __ masknez(j, j, AT); ++ __ maskeqz(temp, h, AT); ++ __ OR(j, j, temp); ++ ++ // while (i+1 < j) ++ __ bind(entry); ++ __ addi_d(h, i, 1); ++ __ blt(h, j, loop); ++ } ++ ++ // end of binary search, result index is i (must check again!) ++ Label default_case; ++ // Convert array[i].match to native byte-ordering before compare ++ __ alsl_d(AT, i, array, Address::times_8 - 1); ++ __ ld_w(temp, AT, 0 * BytesPerInt); ++ __ bswap_w(temp, temp); ++ __ bne(key, temp, default_case); ++ ++ // entry found -> j = offset ++ __ alsl_d(AT, i, array, Address::times_8 - 1); ++ __ ld_w(j, AT, 1 * BytesPerInt); ++ __ profile_switch_case(i, key, array); ++ __ bswap_w(j, j); ++ ++ __ add_d(BCP, BCP, j); ++ __ ld_bu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++ ++ // default case -> j = default offset ++ __ bind(default_case); ++ __ profile_switch_default(i); ++ __ ld_w(j, array, - 2 * BytesPerInt); ++ __ bswap_w(j, j); ++ __ add_d(BCP, BCP, j); ++ __ ld_bu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++} ++ ++void TemplateTable::_return(TosState state) { ++ transition(state, state); ++ assert(_desc->calls_vm(), ++ "inconsistent calls_vm information"); // call in remove_activation ++ ++ if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { ++ assert(state == vtos, "only valid state"); ++ __ ld_d(T1, aaddress(0)); ++ __ load_klass(LVP, T1); ++ __ ld_w(LVP, LVP, in_bytes(Klass::access_flags_offset())); ++ __ li(AT, JVM_ACC_HAS_FINALIZER); ++ __ andr(AT, AT, LVP); ++ Label skip_register_finalizer; ++ __ beq(AT, R0, skip_register_finalizer); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::register_finalizer), T1); ++ __ bind(skip_register_finalizer); ++ } ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ if (_desc->bytecode() != Bytecodes::_return_register_finalizer) { ++ Label no_safepoint; ++ NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll")); ++ __ ld_b(AT, thread, in_bytes(JavaThread::polling_word_offset())); ++ __ andi(AT, AT, SafepointMechanism::poll_bit()); ++ __ beq(AT, R0, no_safepoint); ++ __ push(state); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::at_safepoint)); ++ __ pop(state); ++ __ bind(no_safepoint); ++ } ++ ++ // Narrow result if state is itos but result type is smaller. ++ // Need to narrow in the return bytecode rather than in generate_return_entry ++ // since compiled code callers expect the result to already be narrowed. ++ if (state == itos) { ++ __ narrow(FSR); ++ } ++ ++ __ remove_activation(state, T4); ++ __ membar(__ StoreStore); ++ ++ __ jr(T4); ++} ++ ++// we dont shift left 2 bits in get_cache_and_index_at_bcp ++// for we always need shift the index we use it. the ConstantPoolCacheEntry ++// is 16-byte long, index is the index in ++// ConstantPoolCache, so cache + base_offset() + index * 16 is ++// the corresponding ConstantPoolCacheEntry ++// used registers : T2 ++// NOTE : the returned index need also shift left 4 to get the address! ++void TemplateTable::resolve_cache_and_index(int byte_no, ++ Register Rcache, ++ Register index, ++ size_t index_size) { ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ const Register temp = A1; ++ assert_different_registers(Rcache, index); ++ ++ Label resolved, clinit_barrier_slow; ++ ++ Bytecodes::Code code = bytecode(); ++ switch (code) { ++ case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; ++ case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; ++ default: break; ++ } ++ ++ __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); ++ // is resolved? ++ int i = (int)code; ++ __ addi_d(temp, temp, -i); ++ __ beq(temp, R0, resolved); ++ ++ // resolve first time through ++ // Class initialization barrier slow path lands here as well. ++ __ bind(clinit_barrier_slow); ++ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); ++ ++ __ li(temp, i); ++ __ call_VM(NOREG, entry, temp); ++ ++ // Update registers with resolved info ++ __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); ++ __ bind(resolved); ++ ++ // Class initialization barrier for static methods ++ if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) { ++ __ load_resolved_method_at_index(byte_no, temp, Rcache, index); ++ __ load_method_holder(temp, temp); ++ __ clinit_barrier(temp, AT, NULL, &clinit_barrier_slow); ++ } ++} ++//END: LA ++ ++// The Rcache and index registers must be set before call ++void TemplateTable::load_field_cp_cache_entry(Register obj, ++ Register cache, ++ Register index, ++ Register off, ++ Register flags, ++ bool is_static = false) { ++ assert_different_registers(cache, index, flags, off); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ // Field offset ++ __ alsl_d(AT, index, cache, Address::times_ptr - 1); ++ __ ld_d(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset())); ++ // Flags ++ __ ld_d(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())); ++ ++ // klass overwrite register ++ if (is_static) { ++ __ ld_d(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())); ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ __ ld_d(obj, Address(obj, mirror_offset)); ++ ++ __ resolve_oop_handle(obj, T4); ++ } ++} ++ ++// get the method, itable_index and flags of the current invoke ++void TemplateTable::load_invoke_cp_cache_entry(int byte_no, ++ Register method, ++ Register itable_index, ++ Register flags, ++ bool is_invokevirtual, ++ bool is_invokevfinal, /*unused*/ ++ bool is_invokedynamic) { ++ // setup registers ++ const Register cache = T3; ++ const Register index = T1; ++ assert_different_registers(method, flags); ++ assert_different_registers(method, cache, index); ++ assert_different_registers(itable_index, flags); ++ assert_different_registers(itable_index, cache, index); ++ assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant"); ++ // determine constant pool cache field offsets ++ const int method_offset = in_bytes( ++ ConstantPoolCache::base_offset() + ++ ((byte_no == f2_byte) ++ ? ConstantPoolCacheEntry::f2_offset() ++ : ConstantPoolCacheEntry::f1_offset())); ++ const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::flags_offset()); ++ // access constant pool cache fields ++ const int index_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::f2_offset()); ++ ++ size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2)); ++ resolve_cache_and_index(byte_no, cache, index, index_size); ++ ++ __ alsl_d(AT, index, cache, Address::times_ptr - 1); ++ __ ld_d(method, AT, method_offset); ++ ++ if (itable_index != NOREG) { ++ __ ld_d(itable_index, AT, index_offset); ++ } ++ __ ld_d(flags, AT, flags_offset); ++} ++ ++// The registers cache and index expected to be set before call. ++// Correct values of the cache and index registers are preserved. ++void TemplateTable::jvmti_post_field_access(Register cache, Register index, ++ bool is_static, bool has_tos) { ++ // do the JVMTI work here to avoid disturbing the register state below ++ // We use c_rarg registers here because we want to use the register used in ++ // the call to the VM ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we ++ // take the time to call into the VM. ++ Label L1; ++ // kill FSR ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ assert_different_registers(cache, index, AT); ++ __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); ++ __ ld_w(AT, AT, 0); ++ __ beq(AT, R0, L1); ++ ++ __ get_cache_and_index_at_bcp(tmp2, tmp3, 1); ++ ++ // cache entry pointer ++ __ addi_d(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset())); ++ __ alsl_d(tmp2, tmp3, tmp2, LogBytesPerWord - 1); ++ ++ if (is_static) { ++ __ move(tmp1, R0); ++ } else { ++ __ ld_d(tmp1, SP, 0); ++ __ verify_oop(tmp1); ++ } ++ // tmp1: object pointer or NULL ++ // tmp2: cache entry pointer ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_access), ++ tmp1, tmp2); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++void TemplateTable::pop_and_check_object(Register r) { ++ __ pop_ptr(r); ++ __ null_check(r); // for field access must check obj. ++ __ verify_oop(r); ++} ++ ++// used registers : T1, T2, T3, T1 ++// T1 : flags ++// T2 : off ++// T3 : obj ++// T1 : field address ++// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the ++// following mapping to the TosState states: ++// btos: 0 ++// ctos: 1 ++// stos: 2 ++// itos: 3 ++// ltos: 4 ++// ftos: 5 ++// dtos: 6 ++// atos: 7 ++// vtos: 8 ++// see ConstantPoolCacheEntry::set_field for more info ++void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) { ++ transition(vtos, vtos); ++ ++ const Register cache = T3; ++ const Register index = T0; ++ ++ const Register obj = T3; ++ const Register off = T2; ++ const Register flags = T1; ++ ++ const Register scratch = T8; ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_access(cache, index, is_static, false); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); ++ ++ { ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, flags); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(MacroAssembler::AnyAny); ++ __ bind(notVolatile); ++ } ++ ++ if (!is_static) pop_and_check_object(obj); ++ __ add_d(index, obj, off); ++ ++ const Address field(index, 0); ++ ++ Label Done, notByte, notBool, notInt, notShort, notChar, ++ notLong, notFloat, notObj, notDouble; ++ ++ assert(btos == 0, "change code, btos != 0"); ++ __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); ++ __ bne(flags, R0, notByte); ++ ++ // btos ++ __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(btos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ ++ __ bind(notByte); ++ __ li(AT, ztos); ++ __ bne(flags, AT, notBool); ++ ++ // ztos ++ __ access_load_at(T_BOOLEAN, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(ztos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ ++ __ bind(notBool); ++ __ li(AT, itos); ++ __ bne(flags, AT, notInt); ++ ++ // itos ++ __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(itos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_igetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notInt); ++ __ li(AT, atos); ++ __ bne(flags, AT, notObj); ++ ++ // atos ++ //add for compressedoops ++ do_oop_load(_masm, Address(index, 0), FSR, IN_HEAP); ++ __ push(atos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_agetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notObj); ++ __ li(AT, ctos); ++ __ bne(flags, AT, notChar); ++ ++ // ctos ++ __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(ctos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notChar); ++ __ li(AT, stos); ++ __ bne(flags, AT, notShort); ++ ++ // stos ++ __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(stos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notShort); ++ __ li(AT, ltos); ++ __ bne(flags, AT, notLong); ++ ++ // ltos ++ __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, FSR, field, noreg, noreg); ++ __ push(ltos); ++ ++ // Don't rewrite to _fast_lgetfield for potential volatile case. ++ __ b(Done); ++ ++ __ bind(notLong); ++ __ li(AT, ftos); ++ __ bne(flags, AT, notFloat); ++ ++ // ftos ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); ++ __ push(ftos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notFloat); ++ __ li(AT, dtos); ++#ifdef ASSERT ++ __ bne(flags, AT, notDouble); ++#endif ++ ++ // dtos ++ __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg); ++ __ push(dtos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2); ++ } ++ ++#ifdef ASSERT ++ __ b(Done); ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++void TemplateTable::getfield(int byte_no) { ++ getfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::nofast_getfield(int byte_no) { ++ getfield_or_static(byte_no, false, may_not_rewrite); ++} ++ ++void TemplateTable::getstatic(int byte_no) { ++ getfield_or_static(byte_no, true); ++} ++ ++// The registers cache and index expected to be set before call. ++// The function may destroy various registers, just not the cache and index registers. ++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { ++ transition(vtos, vtos); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L1; ++ //kill AT, T1, T2, T3, T4 ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T4; ++ assert_different_registers(cache, index, tmp4); ++ ++ __ li(AT, JvmtiExport::get_field_modification_count_addr()); ++ __ ld_w(AT, AT, 0); ++ __ beq(AT, R0, L1); ++ ++ __ get_cache_and_index_at_bcp(tmp2, tmp4, 1); ++ ++ if (is_static) { ++ __ move(tmp1, R0); ++ } else { ++ // Life is harder. The stack holds the value on top, followed by ++ // the object. We don't know the size of the value, though; it ++ // could be one or two words depending on its type. As a result, ++ // we must find the type to determine where the object is. ++ Label two_word, valsize_known; ++ __ alsl_d(AT, tmp4, tmp2, Address::times_8 - 1); ++ __ ld_d(tmp3, AT, in_bytes(cp_base_offset + ++ ConstantPoolCacheEntry::flags_offset())); ++ __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift); ++ ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ __ move(tmp1, SP); ++ __ li(AT, ltos); ++ __ beq(tmp3, AT, two_word); ++ __ li(AT, dtos); ++ __ beq(tmp3, AT, two_word); ++ __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) ); ++ __ b(valsize_known); ++ ++ __ bind(two_word); ++ __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2)); ++ ++ __ bind(valsize_known); ++ // setup object pointer ++ __ ld_d(tmp1, tmp1, 0 * wordSize); ++ } ++ // cache entry pointer ++ __ addi_d(tmp2, tmp2, in_bytes(cp_base_offset)); ++ __ alsl_d(tmp2, tmp4, tmp2, LogBytesPerWord - 1); ++ // object (tos) ++ __ move(tmp3, SP); ++ // tmp1: object pointer set up above (NULL if static) ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ tmp1, tmp2, tmp3); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++// used registers : T0, T1, T2, T3, T8 ++// T1 : flags ++// T2 : off ++// T3 : obj ++// T8 : volatile bit ++// see ConstantPoolCacheEntry::set_field for more info ++void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { ++ transition(vtos, vtos); ++ ++ const Register cache = T3; ++ const Register index = T0; ++ const Register obj = T3; ++ const Register off = T2; ++ const Register flags = T1; ++ const Register bc = T3; ++ ++ const Register scratch = T8; ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_mod(cache, index, is_static); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); ++ ++ Label Done; ++ { ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, flags); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++ ++ ++ Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; ++ ++ assert(btos == 0, "change code, btos != 0"); ++ ++ // btos ++ __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); ++ __ bne(flags, R0, notByte); ++ ++ __ pop(btos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_BYTE, IN_HEAP, Address(T4), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // ztos ++ __ bind(notByte); ++ __ li(AT, ztos); ++ __ bne(flags, AT, notBool); ++ ++ __ pop(ztos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ andi(FSR, FSR, 0x1); ++ __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T4), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // itos ++ __ bind(notBool); ++ __ li(AT, itos); ++ __ bne(flags, AT, notInt); ++ ++ __ pop(itos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_INT, IN_HEAP, Address(T4), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // atos ++ __ bind(notInt); ++ __ li(AT, atos); ++ __ bne(flags, AT, notObj); ++ ++ __ pop(atos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ ++ do_oop_store(_masm, Address(obj, off, Address::no_scale, 0), FSR); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // ctos ++ __ bind(notObj); ++ __ li(AT, ctos); ++ __ bne(flags, AT, notChar); ++ ++ __ pop(ctos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_CHAR, IN_HEAP, Address(T4), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // stos ++ __ bind(notChar); ++ __ li(AT, stos); ++ __ bne(flags, AT, notShort); ++ ++ __ pop(stos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_SHORT, IN_HEAP, Address(T4), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // ltos ++ __ bind(notShort); ++ __ li(AT, ltos); ++ __ bne(flags, AT, notLong); ++ ++ __ pop(ltos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_LONG, IN_HEAP, Address(T4), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // ftos ++ __ bind(notLong); ++ __ li(AT, ftos); ++ __ bne(flags, AT, notFloat); ++ ++ __ pop(ftos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_FLOAT, IN_HEAP, Address(T4), noreg, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ ++ // dtos ++ __ bind(notFloat); ++ __ li(AT, dtos); ++#ifdef ASSERT ++ __ bne(flags, AT, notDouble); ++#endif ++ ++ __ pop(dtos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(T4, obj, off); ++ __ access_store_at(T_DOUBLE, IN_HEAP, Address(T4), noreg, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no); ++ } ++ ++#ifdef ASSERT ++ __ b(Done); ++ ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++void TemplateTable::putfield(int byte_no) { ++ putfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::nofast_putfield(int byte_no) { ++ putfield_or_static(byte_no, false, may_not_rewrite); ++} ++ ++void TemplateTable::putstatic(int byte_no) { ++ putfield_or_static(byte_no, true); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : cp_entry ++// T2 : obj ++// T3 : value pointer ++void TemplateTable::jvmti_post_fast_field_mod() { ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L2; ++ //kill AT, T1, T2, T3, T4 ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T4; ++ __ li(AT, JvmtiExport::get_field_modification_count_addr()); ++ __ ld_w(tmp3, AT, 0); ++ __ beq(tmp3, R0, L2); ++ __ pop_ptr(tmp1); ++ __ verify_oop(tmp1); ++ __ push_ptr(tmp1); ++ switch (bytecode()) { // load values into the jvalue object ++ case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ push_i(FSR); break; ++ case Bytecodes::_fast_dputfield: __ push_d(FSF); break; ++ case Bytecodes::_fast_fputfield: __ push_f(); break; ++ case Bytecodes::_fast_lputfield: __ push_l(FSR); break; ++ default: ShouldNotReachHere(); ++ } ++ __ move(tmp3, SP); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1); ++ __ verify_oop(tmp1); ++ // tmp1: object pointer copied above ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ tmp1, tmp2, tmp3); ++ ++ switch (bytecode()) { // restore tos values ++ case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ pop_i(FSR); break; ++ case Bytecodes::_fast_dputfield: __ pop_d(); break; ++ case Bytecodes::_fast_fputfield: __ pop_f(); break; ++ case Bytecodes::_fast_lputfield: __ pop_l(FSR); break; ++ default: break; ++ } ++ __ bind(L2); ++ } ++} ++ ++// used registers : T2, T3, T1 ++// T2 : index & off & field address ++// T3 : cache & obj ++// T1 : flags ++void TemplateTable::fast_storefield(TosState state) { ++ transition(state, vtos); ++ ++ const Register scratch = T8; ++ ++ ByteSize base = ConstantPoolCache::base_offset(); ++ ++ jvmti_post_fast_field_mod(); ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ membar(__ LoadLoad); ++ ++ // test for volatile with T1 ++ __ alsl_d(AT, T2, T3, Address::times_8 - 1); ++ __ ld_d(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset())); ++ ++ // replace index with field offset from cache entry ++ __ ld_d(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset())); ++ ++ Label Done; ++ { ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, T1); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++ ++ // Get object from stack ++ pop_and_check_object(T3); ++ ++ if (bytecode() != Bytecodes::_fast_aputfield) { ++ // field address ++ __ add_d(T2, T3, T2); ++ } ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_zputfield: ++ __ andi(FSR, FSR, 0x1); // boolean is true if LSB is 1 ++ __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_bputfield: ++ __ access_store_at(T_BYTE, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_sputfield: ++ __ access_store_at(T_SHORT, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_cputfield: ++ __ access_store_at(T_CHAR, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_iputfield: ++ __ access_store_at(T_INT, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_lputfield: ++ __ access_store_at(T_LONG, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_fputfield: ++ __ access_store_at(T_FLOAT, IN_HEAP, Address(T2), noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_dputfield: ++ __ access_store_at(T_DOUBLE, IN_HEAP, Address(T2), noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_aputfield: ++ do_oop_store(_masm, Address(T3, T2, Address::no_scale, 0), FSR); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++// used registers : T2, T3, T1 ++// T3 : cp_entry & cache ++// T2 : index & offset ++void TemplateTable::fast_accessfield(TosState state) { ++ transition(atos, state); ++ ++ const Register scratch = T8; ++ ++ // do the JVMTI work here to avoid disturbing the register state below ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we take ++ // the time to call into the VM. ++ Label L1; ++ __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); ++ __ ld_w(T3, AT, 0); ++ __ beq(T3, R0, L1); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(T3, T1, 1); ++ __ move(TSR, FSR); ++ __ verify_oop(FSR); ++ // FSR: object pointer copied above ++ // T3: cache entry pointer ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), ++ FSR, T3); ++ __ move(FSR, TSR); ++ __ bind(L1); ++ } ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ membar(__ LoadLoad); ++ ++ // replace index with field offset from cache entry ++ __ alsl_d(AT, T2, T3, Address::times_8 - 1); ++ __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ ++ { ++ __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, AT); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(MacroAssembler::AnyAny); ++ __ bind(notVolatile); ++ } ++ ++ // FSR: object ++ __ verify_oop(FSR); ++ __ null_check(FSR); ++ // field addresses ++ __ add_d(FSR, FSR, T2); ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_bgetfield: ++ __ access_load_at(T_BYTE, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_sgetfield: ++ __ access_load_at(T_SHORT, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_cgetfield: ++ __ access_load_at(T_CHAR, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_igetfield: ++ __ access_load_at(T_INT, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_lgetfield: ++ __ stop("should not be rewritten"); ++ break; ++ case Bytecodes::_fast_fgetfield: ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_dgetfield: ++ __ access_load_at(T_DOUBLE, IN_HEAP, noreg, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_agetfield: ++ do_oop_load(_masm, Address(FSR, 0), FSR, IN_HEAP); ++ __ verify_oop(FSR); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0 ++// used registers : T1, T2, T3, T1 ++// T1 : obj & field address ++// T2 : off ++// T3 : cache ++// T1 : index ++void TemplateTable::fast_xaccess(TosState state) { ++ transition(vtos, state); ++ ++ const Register scratch = T8; ++ ++ // get receiver ++ __ ld_d(T1, aaddress(0)); ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 2); ++ __ alsl_d(AT, T2, T3, Address::times_8 - 1); ++ __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ ++ { ++ __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, AT); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(MacroAssembler::AnyAny); ++ __ bind(notVolatile); ++ } ++ ++ // make sure exception is reported in correct bcp range (getfield is ++ // next instruction) ++ __ addi_d(BCP, BCP, 1); ++ __ null_check(T1); ++ __ add_d(T1, T1, T2); ++ ++ if (state == itos) { ++ __ access_load_at(T_INT, IN_HEAP, FSR, Address(T1), noreg, noreg); ++ } else if (state == atos) { ++ do_oop_load(_masm, Address(T1, 0), FSR, IN_HEAP); ++ __ verify_oop(FSR); ++ } else if (state == ftos) { ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(T1), noreg, noreg); ++ } else { ++ ShouldNotReachHere(); ++ } ++ __ addi_d(BCP, BCP, -1); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++ ++//----------------------------------------------------------------------------- ++// Calls ++ ++// method, index, recv, flags: T1, T2, T3, T1 ++// byte_no = 2 for _invokevirtual, 1 else ++// T0 : return address ++// get the method & index of the invoke, and push the return address of ++// the invoke(first word in the frame) ++// this address is where the return code jmp to. ++// NOTE : this method will set T3&T1 as recv&flags ++void TemplateTable::prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index, // itable index, MethodType, etc. ++ Register recv, // if caller wants to see it ++ Register flags // if caller wants to test it ++ ) { ++ // determine flags ++ const Bytecodes::Code code = bytecode(); ++ const bool is_invokeinterface = code == Bytecodes::_invokeinterface; ++ const bool is_invokedynamic = code == Bytecodes::_invokedynamic; ++ const bool is_invokehandle = code == Bytecodes::_invokehandle; ++ const bool is_invokevirtual = code == Bytecodes::_invokevirtual; ++ const bool is_invokespecial = code == Bytecodes::_invokespecial; ++ const bool load_receiver = (recv != noreg); ++ const bool save_flags = (flags != noreg); ++ assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),""); ++ assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); ++ assert(flags == noreg || flags == T1, "error flags reg."); ++ assert(recv == noreg || recv == T3, "error recv reg."); ++ ++ // setup registers & access constant pool cache ++ if(recv == noreg) recv = T3; ++ if(flags == noreg) flags = T1; ++ assert_different_registers(method, index, recv, flags); ++ ++ // save 'interpreter return address' ++ __ save_bcp(); ++ ++ load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); ++ ++ if (is_invokedynamic || is_invokehandle) { ++ Label L_no_push; ++ __ li(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift)); ++ __ andr(AT, AT, flags); ++ __ beq(AT, R0, L_no_push); ++ // Push the appendix as a trailing parameter. ++ // This must be done before we get the receiver, ++ // since the parameter_size includes it. ++ Register tmp = SSR; ++ __ push(tmp); ++ __ move(tmp, index); ++ __ load_resolved_reference_at_index(index, tmp, recv); ++ __ pop(tmp); ++ __ push(index); // push appendix (MethodType, CallSite, etc.) ++ __ bind(L_no_push); ++ } ++ ++ // load receiver if needed (after appendix is pushed so parameter size is correct) ++ // Note: no return address pushed yet ++ if (load_receiver) { ++ __ li(AT, ConstantPoolCacheEntry::parameter_size_mask); ++ __ andr(recv, flags, AT); ++ // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0. ++ const int no_return_pc_pushed_yet = 0; // argument slot correction before we push return address ++ const int receiver_is_at_end = -1; // back off one slot to get receiver ++ Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end); ++ __ ld_d(recv, recv_addr); ++ __ verify_oop(recv); ++ } ++ if(save_flags) { ++ __ move(BCP, flags); ++ } ++ ++ // compute return type ++ __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, 0xf); ++ ++ // Make sure we don't need to mask flags for tos_state_shift after the above shift ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ // load return address ++ { ++ const address table = (address) Interpreter::invoke_return_entry_table_for(code); ++ __ li(AT, (long)table); ++ __ alsl_d(AT, flags, AT, LogBytesPerWord - 1); ++ __ ld_d(RA, AT, 0); ++ } ++ ++ if (save_flags) { ++ __ move(flags, BCP); ++ __ restore_bcp(); ++ } ++} ++ ++// used registers : T0, T3, T1, T2 ++// T3 : recv, this two register using convention is by prepare_invoke ++// T1 : flags, klass ++// Rmethod : method, index must be Rmethod ++void TemplateTable::invokevirtual_helper(Register index, ++ Register recv, ++ Register flags) { ++ ++ assert_different_registers(index, recv, flags, T2); ++ ++ // Test for an invoke of a final method ++ Label notFinal; ++ __ li(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); ++ __ andr(AT, flags, AT); ++ __ beq(AT, R0, notFinal); ++ ++ Register method = index; // method must be Rmethod ++ assert(method == Rmethod, "Method must be Rmethod for interpreter calling convention"); ++ ++ // do the call - the index is actually the method to call ++ // the index is indeed Method*, for this is vfinal, ++ // see ConstantPoolCacheEntry::set_method for more info ++ ++ // It's final, need a null check here! ++ __ null_check(recv); ++ ++ // profile this call ++ __ profile_final_call(T2); ++ ++ // T2: tmp, used for mdp ++ // method: callee ++ // T4: tmp ++ // is_virtual: true ++ __ profile_arguments_type(T2, method, T4, true); ++ ++ __ jump_from_interpreted(method, T2); ++ ++ __ bind(notFinal); ++ ++ // get receiver klass ++ __ null_check(recv, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(T2, recv); ++ ++ // profile this call ++ __ profile_virtual_call(T2, T0, T1); ++ ++ // get target Method & entry point ++ __ lookup_virtual_method(T2, index, method); ++ __ profile_arguments_type(T2, method, T4, true); ++ __ jump_from_interpreted(method, T2); ++} ++ ++void TemplateTable::invokevirtual(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG, T3, T1); ++ // now recv & flags in T3, T1 ++ invokevirtual_helper(Rmethod, T3, T1); ++} ++ ++// T4 : entry ++// Rmethod : method ++void TemplateTable::invokespecial(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG, T3); ++ // now recv & flags in T3, T1 ++ __ verify_oop(T3); ++ __ null_check(T3); ++ __ profile_call(T4); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T4: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T4, false); ++ ++ __ jump_from_interpreted(Rmethod, T4); ++ __ move(T0, T3); ++} ++ ++void TemplateTable::invokestatic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG); ++ ++ __ profile_call(T4); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T4: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T4, false); ++ ++ __ jump_from_interpreted(Rmethod, T4); ++} ++ ++// i have no idea what to do here, now. for future change. FIXME. ++void TemplateTable::fast_invokevfinal(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); ++ __ stop("fast_invokevfinal not used on LoongArch64"); ++} ++ ++// used registers : T0, T1, T2, T3, T1, A7 ++// T0 : itable, vtable, entry ++// T1 : interface ++// T3 : receiver ++// T1 : flags, klass ++// Rmethod : index, method, this is required by interpreter_entry ++void TemplateTable::invokeinterface(int byte_no) { ++ transition(vtos, vtos); ++ //this method will use T1-T4 and T0 ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, T2, Rmethod, T3, T1); ++ // T2: reference klass (from f1) if interface method ++ // Rmethod: method (from f2) ++ // T3: receiver ++ // T1: flags ++ ++ // First check for Object case, then private interface method, ++ // then regular interface method. ++ ++ // Special case of invokeinterface called for virtual method of ++ // java.lang.Object. See cpCache.cpp for details. ++ Label notObjectMethod; ++ __ li(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift)); ++ __ andr(AT, T1, AT); ++ __ beq(AT, R0, notObjectMethod); ++ ++ invokevirtual_helper(Rmethod, T3, T1); ++ // no return from above ++ __ bind(notObjectMethod); ++ ++ Label no_such_interface; // for receiver subtype check ++ Register recvKlass; // used for exception processing ++ ++ // Check for private method invocation - indicated by vfinal ++ Label notVFinal; ++ __ li(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); ++ __ andr(AT, T1, AT); ++ __ beq(AT, R0, notVFinal); ++ ++ // Get receiver klass into FSR - also a null check ++ __ null_check(T3, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(FSR, T3); ++ ++ Label subtype; ++ __ check_klass_subtype(FSR, T2, T0, subtype); ++ // If we get here the typecheck failed ++ recvKlass = T1; ++ __ move(recvKlass, FSR); ++ __ b(no_such_interface); ++ ++ __ bind(subtype); ++ ++ // do the call - rbx is actually the method to call ++ ++ __ profile_final_call(T1); ++ __ profile_arguments_type(T1, Rmethod, T0, true); ++ ++ __ jump_from_interpreted(Rmethod, T1); ++ // no return from above ++ __ bind(notVFinal); ++ ++ // Get receiver klass into T1 - also a null check ++ __ restore_locals(); ++ __ null_check(T3, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(T1, T3); ++ ++ Label no_such_method; ++ ++ // Preserve method for throw_AbstractMethodErrorVerbose. ++ __ move(T3, Rmethod); ++ // Receiver subtype check against REFC. ++ // Superklass in T2. Subklass in T1. ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ T1, T2, noreg, ++ // outputs: scan temp. reg, scan temp. reg ++ T0, FSR, ++ no_such_interface, ++ /*return_method=*/false); ++ ++ ++ // profile this call ++ __ restore_bcp(); ++ __ profile_virtual_call(T1, T0, FSR); ++ ++ // Get declaring interface class from method, and itable index ++ __ load_method_holder(T2, Rmethod); ++ __ ld_w(Rmethod, Rmethod, in_bytes(Method::itable_index_offset())); ++ __ addi_d(Rmethod, Rmethod, (-1) * Method::itable_index_max); ++ __ sub_w(Rmethod, R0, Rmethod); ++ ++ // Preserve recvKlass for throw_AbstractMethodErrorVerbose. ++ __ move(FSR, T1); ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ FSR, T2, Rmethod, ++ // outputs: method, scan temp. reg ++ Rmethod, T0, ++ no_such_interface); ++ ++ // Rmethod: Method* to call ++ // T3: receiver ++ // Check for abstract method error ++ // Note: This should be done more efficiently via a throw_abstract_method_error ++ // interpreter entry point and a conditional jump to it in case of a null ++ // method. ++ __ beq(Rmethod, R0, no_such_method); ++ ++ __ profile_arguments_type(T1, Rmethod, T0, true); ++ ++ // do the call ++ // T3: receiver ++ // Rmethod: Method* ++ __ jump_from_interpreted(Rmethod, T1); ++ __ should_not_reach_here(); ++ ++ // exception handling code follows... ++ // note: must restore interpreter registers to canonical ++ // state for exception handling to work correctly! ++ ++ __ bind(no_such_method); ++ // throw exception ++ __ pop(Rmethod); // pop return address (pushed by prepare_invoke) ++ __ restore_bcp(); ++ __ restore_locals(); ++ // Pass arguments for generating a verbose error message. ++ recvKlass = A1; ++ Register method = A2; ++ if (recvKlass != T1) { __ move(recvKlass, T1); } ++ if (method != T3) { __ move(method, T3); } ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), recvKlass, method); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ __ bind(no_such_interface); ++ // throw exception ++ __ pop(Rmethod); // pop return address (pushed by prepare_invoke) ++ __ restore_bcp(); ++ __ restore_locals(); ++ // Pass arguments for generating a verbose error message. ++ if (recvKlass != T1) { __ move(recvKlass, T1); } ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), recvKlass, T2); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++} ++ ++ ++void TemplateTable::invokehandle(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ const Register T2_method = Rmethod; ++ const Register FSR_mtype = FSR; ++ const Register T3_recv = T3; ++ ++ prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv); ++ //??__ verify_method_ptr(T2_method); ++ __ verify_oop(T3_recv); ++ __ null_check(T3_recv); ++ ++ // T4: MethodType object (from cpool->resolved_references[f1], if necessary) ++ // T2_method: MH.invokeExact_MT method (from f2) ++ ++ // Note: T4 is already pushed (if necessary) by prepare_invoke ++ ++ // FIXME: profile the LambdaForm also ++ __ profile_final_call(T4); ++ ++ // T8: tmp, used for mdp ++ // T2_method: callee ++ // T4: tmp ++ // is_virtual: true ++ __ profile_arguments_type(T8, T2_method, T4, true); ++ ++ __ jump_from_interpreted(T2_method, T4); ++} ++ ++ void TemplateTable::invokedynamic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ ++ const Register T2_callsite = T2; ++ ++ prepare_invoke(byte_no, Rmethod, T2_callsite); ++ ++ // T2: CallSite object (from cpool->resolved_references[f1]) ++ // Rmethod: MH.linkToCallSite method (from f2) ++ ++ // Note: T2_callsite is already pushed by prepare_invoke ++ // %%% should make a type profile for any invokedynamic that takes a ref argument ++ // profile this call ++ __ profile_call(T4); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T4: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T4, false); ++ ++ __ verify_oop(T2_callsite); ++ ++ __ jump_from_interpreted(Rmethod, T4); ++ } ++ ++//----------------------------------------------------------------------------- ++// Allocation ++// T1 : tags & buffer end & thread ++// T2 : object end ++// T3 : klass ++// T1 : object size ++// A1 : cpool ++// A2 : cp index ++// return object in FSR ++void TemplateTable::_new() { ++ transition(vtos, atos); ++ __ get_unsigned_2_byte_index_at_bcp(A2, 1); ++ ++ Label slow_case; ++ Label done; ++ Label initialize_header; ++ Label initialize_object; // including clearing the fields ++ Label allocate_shared; ++ ++ __ get_cpool_and_tags(A1, T1); ++ ++ // make sure the class we're about to instantiate has been resolved. ++ // Note: slow_case does a pop of stack, which is why we loaded class/pushed above ++ const int tags_offset = Array::base_offset_in_bytes(); ++ __ add_d(T1, T1, A2); ++ __ ld_b(AT, T1, tags_offset); ++ if(os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ } ++ __ addi_d(AT, AT, -(int)JVM_CONSTANT_Class); ++ __ bne(AT, R0, slow_case); ++ ++ // get InstanceKlass ++ __ load_resolved_klass_at_index(A1, A2, T3); ++ ++ // make sure klass is initialized & doesn't have finalizer ++ // make sure klass is fully initialized ++ __ ld_hu(T1, T3, in_bytes(InstanceKlass::init_state_offset())); ++ __ addi_d(AT, T1, - (int)InstanceKlass::fully_initialized); ++ __ bne(AT, R0, slow_case); ++ ++ // has_finalizer ++ __ ld_w(T0, T3, in_bytes(Klass::layout_helper_offset()) ); ++ __ andi(AT, T0, Klass::_lh_instance_slow_path_bit); ++ __ bne(AT, R0, slow_case); ++ ++ // Allocate the instance ++ // 1) Try to allocate in the TLAB ++ // 2) if fail and the object is large allocate in the shared Eden ++ // 3) if the above fails (or is not applicable), go to a slow case ++ // (creates a new TLAB, etc.) ++ ++ const bool allow_shared_alloc = ++ Universe::heap()->supports_inline_contig_alloc(); ++ ++#ifndef OPT_THREAD ++ const Register thread = T8; ++ if (UseTLAB || allow_shared_alloc) { ++ __ get_thread(thread); ++ } ++#else ++ const Register thread = TREG; ++#endif ++ ++ if (UseTLAB) { ++ // get tlab_top ++ __ ld_d(FSR, thread, in_bytes(JavaThread::tlab_top_offset())); ++ // get tlab_end ++ __ ld_d(AT, thread, in_bytes(JavaThread::tlab_end_offset())); ++ __ add_d(T2, FSR, T0); ++ __ blt(AT, T2, allow_shared_alloc ? allocate_shared : slow_case); ++ __ st_d(T2, thread, in_bytes(JavaThread::tlab_top_offset())); ++ ++ if (ZeroTLAB) { ++ // the fields have been already cleared ++ __ beq(R0, R0, initialize_header); ++ } else { ++ // initialize both the header and fields ++ __ beq(R0, R0, initialize_object); ++ } ++ } ++ ++ // Allocation in the shared Eden , if allowed ++ // T0 : instance size in words ++ if(allow_shared_alloc){ ++ __ bind(allocate_shared); ++ ++ Label done, retry; ++ Address heap_top(T1); ++ __ li(T1, (long)Universe::heap()->top_addr()); ++ __ ld_d(FSR, heap_top); ++ ++ __ bind(retry); ++ __ li(AT, (long)Universe::heap()->end_addr()); ++ __ ld_d(AT, AT, 0); ++ __ add_d(T2, FSR, T0); ++ __ blt(AT, T2, slow_case); ++ ++ // Compare FSR with the top addr, and if still equal, store the new ++ // top addr in T2 at the address of the top addr pointer. Sets AT if was ++ // equal, and clears it otherwise. Use lock prefix for atomicity on MPs. ++ // ++ // FSR: object begin ++ // T2: object end ++ // T0: instance size in words ++ ++ // if someone beat us on the allocation, try again, otherwise continue ++ __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry); ++ ++ __ bind(done); ++ __ incr_allocated_bytes(thread, T0, 0); ++ } ++ ++ if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) { ++ // The object is initialized before the header. If the object size is ++ // zero, go directly to the header initialization. ++ __ bind(initialize_object); ++ __ li(AT, - sizeof(oopDesc)); ++ __ add_d(T0, T0, AT); ++ __ beq(T0, R0, initialize_header); ++ ++ // initialize remaining object fields: T0 is a multiple of 2 ++ { ++ Label loop; ++ __ add_d(T1, FSR, T0); ++ ++ __ bind(loop); ++ __ addi_d(T1, T1, -oopSize); ++ __ st_d(R0, T1, sizeof(oopDesc)); ++ __ bne(T1, FSR, loop); // dont clear header ++ } ++ ++ // klass in T3, ++ // initialize object header only. ++ __ bind(initialize_header); ++ if (UseBiasedLocking) { ++ __ ld_d(AT, T3, in_bytes(Klass::prototype_header_offset())); ++ __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes ()); ++ } else { ++ __ li(AT, (long)markWord::prototype().value()); ++ __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes()); ++ } ++ ++ __ store_klass_gap(FSR, R0); ++ __ store_klass(FSR, T3); ++ ++ { ++ SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0); ++ // Trigger dtrace event for fastpath ++ __ push(atos); ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR); ++ __ pop(atos); ++ ++ } ++ __ b(done); ++ } ++ ++ // slow case ++ __ bind(slow_case); ++ __ get_constant_pool(A1); ++ __ get_unsigned_2_byte_index_at_bcp(A2, 1); ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2); ++ ++ // continue ++ __ bind(done); ++ __ membar(__ StoreStore); ++} ++ ++void TemplateTable::newarray() { ++ transition(itos, atos); ++ __ ld_bu(A1, at_bcp(1)); ++ // type, count ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR); ++ __ membar(__ StoreStore); ++} ++ ++void TemplateTable::anewarray() { ++ transition(itos, atos); ++ __ get_unsigned_2_byte_index_at_bcp(A2, 1); // big-endian ++ __ get_constant_pool(A1); ++ // cp, index, count ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR); ++ __ membar(__ StoreStore); ++} ++ ++void TemplateTable::arraylength() { ++ transition(atos, itos); ++ __ null_check(FSR, arrayOopDesc::length_offset_in_bytes()); ++ __ ld_w(FSR, FSR, arrayOopDesc::length_offset_in_bytes()); ++} ++ ++// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always) ++// T2 : sub klass ++// T3 : cpool ++// T3 : super klass ++void TemplateTable::checkcast() { ++ transition(atos, atos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ __ beq(FSR, R0, is_null); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(T3, T1); ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); // big-endian ++ ++ // See if bytecode has already been quicked ++ __ add_d(AT, T1, T2); ++ __ ld_b(AT, AT, Array::base_offset_in_bytes()); ++ if(os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ } ++ __ addi_d(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ beq(AT, R0, quicked); ++ ++ // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded. ++ // Then, GC will move the object in V0 to another places in heap. ++ // Therefore, We should never save such an object in register. ++ // Instead, we should save it in the stack. It can be modified automatically by the GC thread. ++ // After GC, the object address in FSR is changed to a new place. ++ // ++ __ push(atos); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ __ get_vm_result_2(T3, thread); ++ __ pop_ptr(FSR); ++ __ b(resolved); ++ ++ // klass already in cp, get superklass in T3 ++ __ bind(quicked); ++ __ load_resolved_klass_at_index(T3, T2, T3); ++ ++ __ bind(resolved); ++ ++ // get subklass in T2 ++ __ load_klass(T2, FSR); ++ // Superklass in T3. Subklass in T2. ++ __ gen_subtype_check(T3, T2, ok_is_subtype); ++ ++ // Come here on failure ++ // object is at FSR ++ __ jmp(Interpreter::_throw_ClassCastException_entry); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ ++ // Collect counts on whether this check-cast sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ b(done); ++ __ bind(is_null); ++ __ profile_null_seen(T3); ++ } else { ++ __ bind(is_null); ++ } ++ __ bind(done); ++} ++ ++// T3 as cpool, T1 as tags, T2 as index ++// object always in FSR, superklass in T3, subklass in T2 ++void TemplateTable::instanceof() { ++ transition(atos, itos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ ++ __ beq(FSR, R0, is_null); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(T3, T1); ++ // get index ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); // big-endian ++ ++ // See if bytecode has already been quicked ++ // quicked ++ __ add_d(AT, T1, T2); ++ __ ld_b(AT, AT, Array::base_offset_in_bytes()); ++ if(os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ } ++ __ addi_d(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ beq(AT, R0, quicked); ++ ++ __ push(atos); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ __ get_vm_result_2(T3, thread); ++ __ pop_ptr(FSR); ++ __ b(resolved); ++ ++ // get superklass in T3, subklass in T2 ++ __ bind(quicked); ++ __ load_resolved_klass_at_index(T3, T2, T3); ++ ++ __ bind(resolved); ++ // get subklass in T2 ++ __ load_klass(T2, FSR); ++ ++ // Superklass in T3. Subklass in T2. ++ __ gen_subtype_check(T3, T2, ok_is_subtype); ++ __ move(FSR, R0); ++ // Come here on failure ++ __ b(done); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ __ li(FSR, 1); ++ ++ // Collect counts on whether this test sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ beq(R0, R0, done); ++ __ bind(is_null); ++ __ profile_null_seen(T3); ++ } else { ++ __ bind(is_null); // same as 'done' ++ } ++ __ bind(done); ++ // FSR = 0: obj == NULL or obj is not an instanceof the specified klass ++ // FSR = 1: obj != NULL and obj is an instanceof the specified klass ++} ++ ++//-------------------------------------------------------- ++//-------------------------------------------- ++// Breakpoints ++void TemplateTable::_breakpoint() { ++ // Note: We get here even if we are single stepping.. ++ // jbug inists on setting breakpoints at every bytecode ++ // even if we are in single step mode. ++ ++ transition(vtos, vtos); ++ ++ // get the unpatched byte code ++ __ get_method(A1); ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::get_original_bytecode_at), ++ A1, BCP); ++ __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal ++ ++ // post the breakpoint event ++ __ get_method(A1); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP); ++ ++ // complete the execution of original bytecode ++ __ dispatch_only_normal(vtos); ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateTable::athrow() { ++ transition(atos, vtos); ++ __ null_check(FSR); ++ __ jmp(Interpreter::throw_exception_entry()); ++} ++ ++//----------------------------------------------------------------------------- ++// Synchronization ++// ++// Note: monitorenter & exit are symmetric routines; which is reflected ++// in the assembly code structure as well ++// ++// Stack layout: ++// ++// [expressions ] <--- SP = expression stack top ++// .. ++// [expressions ] ++// [monitor entry] <--- monitor block top = expression stack bot ++// .. ++// [monitor entry] ++// [frame data ] <--- monitor block bot ++// ... ++// [return addr ] <--- FP ++ ++// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer ++// object always in FSR ++void TemplateTable::monitorenter() { ++ transition(atos, vtos); ++ ++ // check for NULL object ++ __ null_check(FSR); ++ ++ const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset ++ * wordSize); ++ const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize); ++ Label allocated; ++ ++ // initialize entry pointer ++ __ move(c_rarg0, R0); ++ ++ // find a free slot in the monitor block (result in c_rarg0) ++ { ++ Label entry, loop, exit, next; ++ __ ld_d(T2, monitor_block_top); ++ __ addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ __ b(entry); ++ ++ // free slot? ++ __ bind(loop); ++ __ ld_d(AT, T2, BasicObjectLock::obj_offset_in_bytes()); ++ __ bne(AT, R0, next); ++ __ move(c_rarg0, T2); ++ ++ __ bind(next); ++ __ beq(FSR, AT, exit); ++ __ addi_d(T2, T2, entry_size); ++ ++ __ bind(entry); ++ __ bne(T3, T2, loop); ++ __ bind(exit); ++ } ++ ++ __ bne(c_rarg0, R0, allocated); ++ ++ // allocate one if there's no free slot ++ { ++ Label entry, loop; ++ // 1. compute new pointers // SP: old expression stack top ++ __ ld_d(c_rarg0, monitor_block_top); ++ __ addi_d(SP, SP, -entry_size); ++ __ addi_d(c_rarg0, c_rarg0, -entry_size); ++ __ st_d(c_rarg0, monitor_block_top); ++ __ move(T3, SP); ++ __ b(entry); ++ ++ // 2. move expression stack contents ++ __ bind(loop); ++ __ ld_d(AT, T3, entry_size); ++ __ st_d(AT, T3, 0); ++ __ addi_d(T3, T3, wordSize); ++ __ bind(entry); ++ __ bne(T3, c_rarg0, loop); ++ } ++ ++ __ bind(allocated); ++ // Increment bcp to point to the next bytecode, ++ // so exception handling for async. exceptions work correctly. ++ // The object has already been poped from the stack, so the ++ // expression stack looks correct. ++ __ addi_d(BCP, BCP, 1); ++ __ st_d(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ lock_object(c_rarg0); ++ // check to make sure this monitor doesn't cause stack overflow after locking ++ __ save_bcp(); // in case of exception ++ __ generate_stack_overflow_check(0); ++ // The bcp has already been incremented. Just need to dispatch to next instruction. ++ ++ __ dispatch_next(vtos); ++} ++ ++// T2 : top ++// c_rarg0 : entry ++void TemplateTable::monitorexit() { ++ transition(atos, vtos); ++ ++ __ null_check(FSR); ++ ++ const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize); ++ Label found; ++ ++ // find matching slot ++ { ++ Label entry, loop; ++ __ ld_d(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ addi_d(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ __ b(entry); ++ ++ __ bind(loop); ++ __ ld_d(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ beq(FSR, AT, found); ++ __ addi_d(c_rarg0, c_rarg0, entry_size); ++ __ bind(entry); ++ __ bne(T2, c_rarg0, loop); ++ } ++ ++ // error handling. Unlocking was not block-structured ++ Label end; ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ // call run-time routine ++ // c_rarg0: points to monitor entry ++ __ bind(found); ++ __ move(TSR, FSR); ++ __ unlock_object(c_rarg0); ++ __ move(FSR, TSR); ++ __ bind(end); ++} ++ ++ ++// Wide instructions ++void TemplateTable::wide() { ++ transition(vtos, vtos); ++ __ ld_bu(Rnext, at_bcp(1)); ++ __ li(AT, (long)Interpreter::_wentry_point); ++ __ alsl_d(AT, Rnext, AT, Address::times_8 - 1); ++ __ ld_d(AT, AT, 0); ++ __ jr(AT); ++} ++ ++ ++void TemplateTable::multianewarray() { ++ transition(vtos, atos); ++ // last dim is on top of stack; we want address of first one: ++ // first_addr = last_addr + (ndims - 1) * wordSize ++ __ ld_bu(A1, at_bcp(3)); // dimension ++ __ addi_d(A1, A1, -1); ++ __ alsl_d(A1, A1, SP, Address::times_8 - 1); // now A1 pointer to the count array on the stack ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1); ++ __ ld_bu(AT, at_bcp(3)); ++ __ alsl_d(SP, AT, SP, Address::times_8 - 1); ++ __ membar(__ AnyAny);//no membar here for aarch64 ++} +diff --git a/src/hotspot/cpu/loongarch/universalNativeInvoker_loongarch_64.cpp b/src/hotspot/cpu/loongarch/universalNativeInvoker_loongarch_64.cpp +new file mode 100644 +index 00000000000..87f6a113268 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/universalNativeInvoker_loongarch_64.cpp +@@ -0,0 +1,32 @@ ++/* ++ * Copyright (c) 2020, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#include "precompiled.hpp" ++#include "prims/universalNativeInvoker.hpp" ++#include "utilities/debug.hpp" ++ ++address ProgrammableInvoker::generate_adapter(jobject jabi, jobject jlayout) { ++ Unimplemented(); ++ return nullptr; ++} +diff --git a/src/hotspot/cpu/loongarch/universalUpcallHandler_loongarch_64.cpp b/src/hotspot/cpu/loongarch/universalUpcallHandler_loongarch_64.cpp +new file mode 100644 +index 00000000000..7586b084868 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/universalUpcallHandler_loongarch_64.cpp +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 2020, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#include "precompiled.hpp" ++#include "prims/universalUpcallHandler.hpp" ++#include "utilities/debug.hpp" ++ ++address ProgrammableUpcallHandler::generate_upcall_stub(jobject rec, jobject jabi, jobject jlayout) { ++ Unimplemented(); ++ return nullptr; ++} ++ ++address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) { ++ ShouldNotCallThis(); ++ return nullptr; ++} ++ ++bool ProgrammableUpcallHandler::supports_optimized_upcalls() { ++ return false; ++} +diff --git a/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp b/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp +new file mode 100644 +index 00000000000..5b9f7b78981 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp +@@ -0,0 +1,61 @@ ++/* ++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP ++#define CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP ++ ++// These are the CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) \ ++ \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_STRUCTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++ ++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_TYPES_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++ ++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++#endif // CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp +new file mode 100644 +index 00000000000..31da20e6f39 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp +@@ -0,0 +1,85 @@ ++/* ++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "memory/allocation.inline.hpp" ++#include "runtime/os.inline.hpp" ++#include "vm_version_ext_loongarch.hpp" ++ ++// VM_Version_Ext statics ++int VM_Version_Ext::_no_of_threads = 0; ++int VM_Version_Ext::_no_of_cores = 0; ++int VM_Version_Ext::_no_of_sockets = 0; ++bool VM_Version_Ext::_initialized = false; ++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; ++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; ++ ++void VM_Version_Ext::initialize_cpu_information(void) { ++ // do nothing if cpu info has been initialized ++ if (_initialized) { ++ return; ++ } ++ ++ _no_of_cores = os::processor_count(); ++ _no_of_threads = _no_of_cores; ++ _no_of_sockets = _no_of_cores; ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "LoongArch"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "LoongArch %s", features_string()); ++ _initialized = true; ++} ++ ++int VM_Version_Ext::number_of_threads(void) { ++ initialize_cpu_information(); ++ return _no_of_threads; ++} ++ ++int VM_Version_Ext::number_of_cores(void) { ++ initialize_cpu_information(); ++ return _no_of_cores; ++} ++ ++int VM_Version_Ext::number_of_sockets(void) { ++ initialize_cpu_information(); ++ return _no_of_sockets; ++} ++ ++const char* VM_Version_Ext::cpu_name(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); ++ return tmp; ++} ++ ++const char* VM_Version_Ext::cpu_description(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); ++ return tmp; ++} +diff --git a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp +new file mode 100644 +index 00000000000..1a93123134c +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP ++ ++#include "runtime/vm_version.hpp" ++#include "utilities/macros.hpp" ++ ++class VM_Version_Ext : public VM_Version { ++ private: ++ static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; ++ static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; ++ ++ static int _no_of_threads; ++ static int _no_of_cores; ++ static int _no_of_sockets; ++ static bool _initialized; ++ static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; ++ static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; ++ ++ public: ++ static int number_of_threads(void); ++ static int number_of_cores(void); ++ static int number_of_sockets(void); ++ ++ static const char* cpu_name(void); ++ static const char* cpu_description(void); ++ static void initialize_cpu_information(void); ++}; ++ ++#endif // CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp +new file mode 100644 +index 00000000000..1a1ac923117 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp +@@ -0,0 +1,432 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/java.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/vm_version.hpp" ++#ifdef TARGET_OS_FAMILY_linux ++# include "os_linux.inline.hpp" ++#endif ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; ++bool VM_Version::_cpu_info_is_initialized = false; ++ ++static BufferBlob* stub_blob; ++static const int stub_size = 600; ++ ++extern "C" { ++ typedef void (*get_cpu_info_stub_t)(void*); ++} ++static get_cpu_info_stub_t get_cpu_info_stub = NULL; ++ ++ ++class VM_Version_StubGenerator: public StubCodeGenerator { ++ public: ++ ++ VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} ++ ++ address generate_get_cpu_info() { ++ assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized"); ++ StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); ++# define __ _masm-> ++ ++ address start = __ pc(); ++ ++ __ enter(); ++ __ push(AT); ++ __ push(T5); ++ ++ __ li(AT, (long)0); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); ++ ++ __ li(AT, 1); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); ++ ++ __ li(AT, 2); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); ++ ++ __ li(AT, 3); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id3_offset())); ++ ++ __ li(AT, 4); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id4_offset())); ++ ++ __ li(AT, 5); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id5_offset())); ++ ++ __ li(AT, 6); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id6_offset())); ++ ++ __ li(AT, 10); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id10_offset())); ++ ++ __ li(AT, 11); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id11_offset())); ++ ++ __ li(AT, 12); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id12_offset())); ++ ++ __ li(AT, 13); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id13_offset())); ++ ++ __ li(AT, 14); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id14_offset())); ++ ++ __ pop(T5); ++ __ pop(AT); ++ __ leave(); ++ __ jr(RA); ++# undef __ ++ return start; ++ }; ++}; ++ ++uint32_t VM_Version::get_feature_flags_by_cpucfg() { ++ uint32_t result = 0; ++ if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b00 || _cpuid_info.cpucfg_info_id1.bits.ARCH == 0b01 ) { ++ result |= CPU_LA32; ++ } else if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b10 ) { ++ result |= CPU_LA64; ++ } ++ ++ if (_cpuid_info.cpucfg_info_id2.bits.FP_CFG != 0) ++ result |= CPU_FP; ++ ++ if (_cpuid_info.cpucfg_info_id3.bits.CCDMA != 0) ++ result |= CPU_CCDMA; ++ if (_cpuid_info.cpucfg_info_id3.bits.LLDBAR != 0) ++ result |= CPU_LLDBAR; ++ if (_cpuid_info.cpucfg_info_id3.bits.SCDLY != 0) ++ result |= CPU_SCDLY; ++ if (_cpuid_info.cpucfg_info_id3.bits.LLEXC != 0) ++ result |= CPU_LLEXC; ++ ++ result |= CPU_ULSYNC; ++ ++ return result; ++} ++ ++void VM_Version::get_processor_features() { ++ ++ clean_cpuFeatures(); ++ ++ get_os_cpu_info(); ++ ++ get_cpu_info_stub(&_cpuid_info); ++ _features |= get_feature_flags_by_cpucfg(); ++ ++ _supports_cx8 = true; ++ ++ if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) { ++ FLAG_SET_DEFAULT(MaxGCPauseMillis, 150); ++ } ++ ++ if (supports_lsx()) { ++ if (FLAG_IS_DEFAULT(UseLSX)) { ++ FLAG_SET_DEFAULT(UseLSX, true); ++ } ++ } else if (UseLSX) { ++ warning("LSX instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLSX, false); ++ } ++ ++ if (supports_lasx()) { ++ if (FLAG_IS_DEFAULT(UseLASX)) { ++ FLAG_SET_DEFAULT(UseLASX, true); ++ } ++ } else if (UseLASX) { ++ warning("LASX instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLASX, false); ++ } ++ ++ if (UseLASX && !UseLSX) { ++ warning("LASX instructions depends on LSX, setting UseLASX to false"); ++ FLAG_SET_DEFAULT(UseLASX, false); ++ } ++ ++#ifdef COMPILER2 ++ int max_vector_size = 0; ++ int min_vector_size = 0; ++ if (UseLASX) { ++ max_vector_size = 32; ++ min_vector_size = 16; ++ } ++ else if (UseLSX) { ++ max_vector_size = 16; ++ min_vector_size = 16; ++ } ++ ++ if (!FLAG_IS_DEFAULT(MaxVectorSize)) { ++ if (MaxVectorSize == 0) { ++ // do nothing ++ } else if (MaxVectorSize > max_vector_size) { ++ warning("MaxVectorSize must be at most %i on this platform", max_vector_size); ++ FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); ++ } else if (MaxVectorSize < min_vector_size) { ++ warning("MaxVectorSize must be at least %i or 0 on this platform, setting to: %i", min_vector_size, min_vector_size); ++ FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); ++ } else if (!is_power_of_2(MaxVectorSize)) { ++ warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); ++ FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); ++ } ++ } else { ++ // If default, use highest supported configuration ++ FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); ++ } ++#endif ++ ++ char buf[256]; ++ ++ // A note on the _features_string format: ++ // There are jtreg tests checking the _features_string for various properties. ++ // For some strange reason, these tests require the string to contain ++ // only _lowercase_ characters. Keep that in mind when being surprised ++ // about the unusual notation of features - and when adding new ones. ++ // Features may have one comma at the end. ++ // Furthermore, use one, and only one, separator space between features. ++ // Multiple spaces are considered separate tokens, messing up everything. ++ jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, " ++ "0x%lx, fp_ver: %d, lvz_ver: %d, ", ++ (is_la64() ? "la64" : ""), ++ (is_la32() ? "la32" : ""), ++ (supports_lsx() ? ", lsx" : ""), ++ (supports_lasx() ? ", lasx" : ""), ++ (supports_crypto() ? ", crypto" : ""), ++ (supports_lam() ? ", am" : ""), ++ (supports_ual() ? ", ual" : ""), ++ (supports_lldbar() ? ", lldbar" : ""), ++ (supports_scdly() ? ", scdly" : ""), ++ (supports_llexc() ? ", llexc" : ""), ++ (supports_lbt_x86() ? ", lbt_x86" : ""), ++ (supports_lbt_arm() ? ", lbt_arm" : ""), ++ (supports_lbt_mips() ? ", lbt_mips" : ""), ++ (needs_llsync() ? ", needs_llsync" : ""), ++ (needs_tgtsync() ? ", needs_tgtsync": ""), ++ (needs_ulsync() ? ", needs_ulsync": ""), ++ _cpuid_info.cpucfg_info_id0.bits.PRID, ++ _cpuid_info.cpucfg_info_id2.bits.FP_VER, ++ _cpuid_info.cpucfg_info_id2.bits.LVZ_VER); ++ _features_string = os::strdup(buf); ++ ++ assert(!is_la32(), "Should Not Reach Here, what is the cpu type?"); ++ assert( is_la64(), "Should be LoongArch64"); ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchLines, 3); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchDistance, 192); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) { ++ FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1); ++ } ++ ++ // Basic instructions are used to implement SHA Intrinsics on LA, so sha ++ // instructions support is not needed. ++ if (/*supports_crypto()*/ 1) { ++ if (FLAG_IS_DEFAULT(UseSHA)) { ++ FLAG_SET_DEFAULT(UseSHA, true); ++ } ++ } else if (UseSHA) { ++ warning("SHA instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseSHA, false); ++ } ++ ++ if (UseSHA/* && supports_crypto()*/) { ++ if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { ++ FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); ++ } ++ } else if (UseSHA1Intrinsics) { ++ warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); ++ } ++ ++ if (UseSHA/* && supports_crypto()*/) { ++ if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { ++ FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); ++ } ++ } else if (UseSHA256Intrinsics) { ++ warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); ++ } ++ ++ if (UseSHA512Intrinsics) { ++ warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); ++ } ++ ++ if (UseSHA3Intrinsics) { ++ warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); ++ } ++ ++ if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA3Intrinsics || UseSHA512Intrinsics)) { ++ FLAG_SET_DEFAULT(UseSHA, false); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { ++ FLAG_SET_DEFAULT(UseMD5Intrinsics, true); ++ } ++ ++ // Basic instructions are used to implement AES Intrinsics on LA, so AES ++ // instructions support is not needed. ++ if (/*supports_crypto()*/ 1) { ++ if (FLAG_IS_DEFAULT(UseAES)) { ++ FLAG_SET_DEFAULT(UseAES, true); ++ } ++ } else if (UseAES) { ++ if (!FLAG_IS_DEFAULT(UseAES)) ++ warning("AES instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAES, false); ++ } ++ ++ if (UseAES/* && supports_crypto()*/) { ++ if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { ++ FLAG_SET_DEFAULT(UseAESIntrinsics, true); ++ } ++ } else if (UseAESIntrinsics) { ++ if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) ++ warning("AES intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAESIntrinsics, false); ++ } ++ ++ if (UseAESCTRIntrinsics) { ++ warning("AES/CTR intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseCRC32)) { ++ FLAG_SET_DEFAULT(UseCRC32, true); ++ } ++ ++ if (UseCRC32) { ++ if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { ++ UseCRC32Intrinsics = true; ++ } ++ ++ if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { ++ UseCRC32CIntrinsics = true; ++ } ++ } ++ ++#ifdef COMPILER2 ++ if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { ++ FLAG_SET_DEFAULT(UseMulAddIntrinsic, true); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { ++ UseMontgomeryMultiplyIntrinsic = true; ++ } ++ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { ++ UseMontgomerySquareIntrinsic = true; ++ } ++#endif ++ ++ // This machine allows unaligned memory accesses ++ if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { ++ FLAG_SET_DEFAULT(UseUnalignedAccesses, true); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseFMA)) { ++ FLAG_SET_DEFAULT(UseFMA, true); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { ++ FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); ++ } ++ ++ if (UseLSX) { ++ if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { ++ FLAG_SET_DEFAULT(UsePopCountInstruction, true); ++ } ++ } else if (UsePopCountInstruction) { ++ if (!FLAG_IS_DEFAULT(UsePopCountInstruction)) ++ warning("PopCountI/L/VI(4) employs LSX whereas PopCountVI(8) hinges on LASX."); ++ FLAG_SET_DEFAULT(UsePopCountInstruction, false); ++ } ++ ++ UNSUPPORTED_OPTION(CriticalJNINatives); ++} ++ ++void VM_Version::initialize() { ++ ResourceMark rm; ++ // Making this stub must be FIRST use of assembler ++ ++ stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size); ++ if (stub_blob == NULL) { ++ vm_exit_during_initialization("Unable to allocate get_cpu_info_stub"); ++ } ++ CodeBuffer c(stub_blob); ++ VM_Version_StubGenerator g(&c); ++ get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, ++ g.generate_get_cpu_info()); ++ ++ get_processor_features(); ++} +diff --git a/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp +new file mode 100644 +index 00000000000..cae9f863c30 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp +@@ -0,0 +1,295 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP ++ ++#include "runtime/abstract_vm_version.hpp" ++#include "runtime/globals_extension.hpp" ++#include "utilities/sizes.hpp" ++ ++class VM_Version: public Abstract_VM_Version { ++ friend class JVMCIVMStructs; ++ ++public: ++ ++ union LoongArch_Cpucfg_Id0 { ++ uint32_t value; ++ struct { ++ uint32_t PRID : 32; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id1 { ++ uint32_t value; ++ struct { ++ uint32_t ARCH : 2, ++ PGMMU : 1, ++ IOCSR : 1, ++ PALEN : 8, ++ VALEN : 8, ++ UAL : 1, // unaligned access ++ RI : 1, ++ EP : 1, ++ RPLV : 1, ++ HP : 1, ++ IOCSR_BRD : 1, ++ MSG_INT : 1, ++ : 5; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id2 { ++ uint32_t value; ++ struct { ++ uint32_t FP_CFG : 1, // FP is used, use FP_CFG instead ++ FP_SP : 1, ++ FP_DP : 1, ++ FP_VER : 3, ++ LSX : 1, ++ LASX : 1, ++ COMPLEX : 1, ++ CRYPTO : 1, ++ LVZ : 1, ++ LVZ_VER : 3, ++ LLFTP : 1, ++ LLFTP_VER : 3, ++ LBT_X86 : 1, ++ LBT_ARM : 1, ++ LBT_MIPS : 1, ++ LSPW : 1, ++ LAM : 1, ++ : 9; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id3 { ++ uint32_t value; ++ struct { ++ uint32_t CCDMA : 1, ++ SFB : 1, ++ UCACC : 1, ++ LLEXC : 1, ++ SCDLY : 1, ++ LLDBAR : 1, ++ ITLBHMC : 1, ++ ICHMC : 1, ++ SPW_LVL : 3, ++ SPW_HP_HF : 1, ++ RVA : 1, ++ RVAMAXM1 : 4, ++ : 15; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id4 { ++ uint32_t value; ++ struct { ++ uint32_t CC_FREQ : 32; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id5 { ++ uint32_t value; ++ struct { ++ uint32_t CC_MUL : 16, ++ CC_DIV : 16; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id6 { ++ uint32_t value; ++ struct { ++ uint32_t PMP : 1, ++ PMVER : 3, ++ PMNUM : 4, ++ PMBITS : 6, ++ UPM : 1, ++ : 17; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id10 { ++ uint32_t value; ++ struct { ++ uint32_t L1IU_PRESENT : 1, ++ L1IU_UNIFY : 1, ++ L1D_PRESENT : 1, ++ L2IU_PRESENT : 1, ++ L2IU_UNIFY : 1, ++ L2IU_PRIVATE : 1, ++ L2IU_INCLUSIVE : 1, ++ L2D_PRESENT : 1, ++ L2D_PRIVATE : 1, ++ L2D_INCLUSIVE : 1, ++ L3IU_PRESENT : 1, ++ L3IU_UNIFY : 1, ++ L3IU_PRIVATE : 1, ++ L3IU_INCLUSIVE : 1, ++ L3D_PRESENT : 1, ++ L3D_PRIVATE : 1, ++ L3D_INCLUSIVE : 1, ++ : 15; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id11 { ++ uint32_t value; ++ struct { ++ uint32_t WAYM1 : 16, ++ INDEXMLOG2 : 8, ++ LINESIZELOG2 : 7, ++ : 1; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id12 { ++ uint32_t value; ++ struct { ++ uint32_t WAYM1 : 16, ++ INDEXMLOG2 : 8, ++ LINESIZELOG2 : 7, ++ : 1; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id13 { ++ uint32_t value; ++ struct { ++ uint32_t WAYM1 : 16, ++ INDEXMLOG2 : 8, ++ LINESIZELOG2 : 7, ++ : 1; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id14 { ++ uint32_t value; ++ struct { ++ uint32_t WAYM1 : 16, ++ INDEXMLOG2 : 8, ++ LINESIZELOG2 : 7, ++ : 1; ++ } bits; ++ }; ++ ++#define CPU_FEATURE_FLAGS(decl) \ ++ decl(LAM, lam, 1) \ ++ decl(UAL, ual, 2) \ ++ decl(LSX, lsx, 4) \ ++ decl(LASX, lasx, 5) \ ++ decl(COMPLEX, complex, 7) \ ++ decl(CRYPTO, crypto, 8) \ ++ decl(LBT_X86, lbt_x86, 10) \ ++ decl(LBT_ARM, lbt_arm, 11) \ ++ decl(LBT_MIPS, lbt_mips, 12) \ ++ /* flags above must follow Linux HWCAP */ \ ++ decl(LA32, la32, 13) \ ++ decl(LA64, la64, 14) \ ++ decl(FP, fp, 15) \ ++ decl(LLEXC, llexc, 16) \ ++ decl(SCDLY, scdly, 17) \ ++ decl(LLDBAR, lldbar, 18) \ ++ decl(CCDMA, ccdma, 19) \ ++ decl(LLSYNC, llsync, 20) \ ++ decl(TGTSYNC, tgtsync, 21) \ ++ decl(ULSYNC, ulsync, 22) \ ++ ++ enum Feature_Flag { ++#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit), ++ CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG) ++#undef DECLARE_CPU_FEATURE_FLAG ++ }; ++ ++protected: ++ ++ static bool _cpu_info_is_initialized; ++ ++ struct CpuidInfo { ++ LoongArch_Cpucfg_Id0 cpucfg_info_id0; ++ LoongArch_Cpucfg_Id1 cpucfg_info_id1; ++ LoongArch_Cpucfg_Id2 cpucfg_info_id2; ++ LoongArch_Cpucfg_Id3 cpucfg_info_id3; ++ LoongArch_Cpucfg_Id4 cpucfg_info_id4; ++ LoongArch_Cpucfg_Id5 cpucfg_info_id5; ++ LoongArch_Cpucfg_Id6 cpucfg_info_id6; ++ LoongArch_Cpucfg_Id10 cpucfg_info_id10; ++ LoongArch_Cpucfg_Id11 cpucfg_info_id11; ++ LoongArch_Cpucfg_Id12 cpucfg_info_id12; ++ LoongArch_Cpucfg_Id13 cpucfg_info_id13; ++ LoongArch_Cpucfg_Id14 cpucfg_info_id14; ++ }; ++ ++ // The actual cpuid info block ++ static CpuidInfo _cpuid_info; ++ ++ static uint32_t get_feature_flags_by_cpucfg(); ++ static void get_processor_features(); ++ static void get_os_cpu_info(); ++ ++public: ++ // Offsets for cpuid asm stub ++ static ByteSize Loongson_Cpucfg_id0_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id0); } ++ static ByteSize Loongson_Cpucfg_id1_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id1); } ++ static ByteSize Loongson_Cpucfg_id2_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id2); } ++ static ByteSize Loongson_Cpucfg_id3_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id3); } ++ static ByteSize Loongson_Cpucfg_id4_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id4); } ++ static ByteSize Loongson_Cpucfg_id5_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id5); } ++ static ByteSize Loongson_Cpucfg_id6_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id6); } ++ static ByteSize Loongson_Cpucfg_id10_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id10); } ++ static ByteSize Loongson_Cpucfg_id11_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id11); } ++ static ByteSize Loongson_Cpucfg_id12_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id12); } ++ static ByteSize Loongson_Cpucfg_id13_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id13); } ++ static ByteSize Loongson_Cpucfg_id14_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id14); } ++ ++ static void clean_cpuFeatures() { _features = 0; } ++ ++ // Initialization ++ static void initialize(); ++ ++ static bool cpu_info_is_initialized() { return _cpu_info_is_initialized; } ++ ++ static bool is_la32() { return _features & CPU_LA32; } ++ static bool is_la64() { return _features & CPU_LA64; } ++ static bool supports_crypto() { return _features & CPU_CRYPTO; } ++ static bool supports_lsx() { return _features & CPU_LSX; } ++ static bool supports_lasx() { return _features & CPU_LASX; } ++ static bool supports_lam() { return _features & CPU_LAM; } ++ static bool supports_llexc() { return _features & CPU_LLEXC; } ++ static bool supports_scdly() { return _features & CPU_SCDLY; } ++ static bool supports_lldbar() { return _features & CPU_LLDBAR; } ++ static bool supports_ual() { return _features & CPU_UAL; } ++ static bool supports_lbt_x86() { return _features & CPU_LBT_X86; } ++ static bool supports_lbt_arm() { return _features & CPU_LBT_ARM; } ++ static bool supports_lbt_mips() { return _features & CPU_LBT_MIPS; } ++ static bool needs_llsync() { return !supports_lldbar(); } ++ static bool needs_tgtsync() { return 1; } ++ static bool needs_ulsync() { return 1; } ++ ++ static bool supports_fast_class_init_checks() { return true; } ++ constexpr static bool supports_stack_watermark_barrier() { return true; } ++}; ++ ++#endif // CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp +new file mode 100644 +index 00000000000..79d2560f494 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp +@@ -0,0 +1,58 @@ ++/* ++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "code/vmreg.hpp" ++ ++ ++ ++void VMRegImpl::set_regName() { ++ Register reg = ::as_Register(0); ++ int i; ++ for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) { ++ for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) { ++ regName[i++] = reg->name(); ++ } ++ reg = reg->successor(); ++ } ++ ++ FloatRegister freg = ::as_FloatRegister(0); ++ for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { ++ for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) { ++ regName[i++] = freg->name(); ++ } ++ freg = freg->successor(); ++ } ++ ++ for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) { ++ regName[i] = "NON-GPR-FPR"; ++ } ++} ++ ++VMReg VMRegImpl::vmStorageToVMReg(int type, int index) { ++ Unimplemented(); ++ return VMRegImpl::Bad(); ++} +diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp +new file mode 100644 +index 00000000000..819eaff0bb3 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp +@@ -0,0 +1,58 @@ ++/* ++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VMREG_LOONGARCH_HPP ++#define CPU_LOONGARCH_VMREG_LOONGARCH_HPP ++ ++inline bool is_Register() { ++ return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; ++} ++ ++inline Register as_Register() { ++ assert( is_Register(), "must be"); ++ return ::as_Register(value() / RegisterImpl::max_slots_per_register); ++} ++ ++inline bool is_FloatRegister() { ++ return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; ++} ++ ++inline FloatRegister as_FloatRegister() { ++ assert( is_FloatRegister() && is_even(value()), "must be" ); ++ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) / ++ FloatRegisterImpl::max_slots_per_register); ++} ++ ++inline bool is_concrete() { ++ assert(is_reg(), "must be"); ++ if (is_FloatRegister()) { ++ int base = value() - ConcreteRegisterImpl::max_gpr; ++ return base % FloatRegisterImpl::max_slots_per_register == 0; ++ } else { ++ return is_even(value()); ++ } ++} ++ ++#endif // CPU_LOONGARCH_VMREG_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp +new file mode 100644 +index 00000000000..edb78e36daa +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp +@@ -0,0 +1,39 @@ ++/* ++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP ++ ++inline VMReg RegisterImpl::as_VMReg() { ++ if( this==noreg ) return VMRegImpl::Bad(); ++ return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register); ++} ++ ++inline VMReg FloatRegisterImpl::as_VMReg() { ++ return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) + ++ ConcreteRegisterImpl::max_gpr); ++} ++ ++#endif // CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP +diff --git a/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp b/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp +new file mode 100644 +index 00000000000..6a190529b64 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp +@@ -0,0 +1,331 @@ ++/* ++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/vtableStubs.hpp" ++#include "interp_masm_loongarch.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/klass.inline.hpp" ++#include "oops/klassVtable.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_loongarch.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++ ++// machine-dependent part of VtableStubs: create VtableStub of correct size and ++// initialize its code ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++#ifndef PRODUCT ++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); ++#endif ++ ++// used by compiler only; reciever in T0. ++// used registers : ++// Rmethod : receiver klass & method ++// NOTE: If this code is used by the C1, the receiver_location is always 0. ++// when reach here, receiver in T0, klass in T8 ++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { ++ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. ++ const int stub_code_length = code_size_limit(true); ++ VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); ++ // Can be NULL if there is no free space in the code cache. ++ if (s == NULL) { ++ return NULL; ++ } ++ ++ // Count unused bytes in instruction sequences of variable size. ++ // We add them to the computed buffer size in order to avoid ++ // overflow in subsequently generated stubs. ++ address start_pc; ++ int slop_bytes = 0; ++ int slop_delta = 0; ++ int load_const_maxLen = 4*BytesPerInstWord; // load_const generates 4 instructions. Assume that as max size for li ++ // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation. ++ const int index_dependent_slop = 0; ++ ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), stub_code_length); ++ MacroAssembler* masm = new MacroAssembler(&cb); ++ Register t1 = T8, t2 = Rmethod; ++#if (!defined(PRODUCT) && defined(COMPILER2)) ++ if (CountCompiledCalls) { ++ start_pc = __ pc(); ++ __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ ld_w(t1, AT , 0); ++ __ addi_w(t1, t1, 1); ++ __ st_w(t1, AT,0); ++ } ++#endif ++ ++ // get receiver (need to skip return address on top of stack) ++ //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0"); ++ ++ // get receiver klass ++ address npe_addr = __ pc(); ++ __ load_klass(t1, T0); ++ ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ // check offset vs vtable length ++ __ ld_w(t2, t1, in_bytes(Klass::vtable_length_offset())); ++ assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code"); ++ __ li(AT, vtable_index*vtableEntry::size()); ++ __ blt(AT, t2, L); ++ __ li(A2, vtable_index); ++ __ move(A1, A0); ++ ++ // VTABLE TODO: find upper bound for call_VM length. ++ start_pc = __ pc(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2); ++ const ptrdiff_t estimate = 512; ++ const ptrdiff_t codesize = __ pc() - start_pc; ++ slop_delta = estimate - codesize; // call_VM varies in length, depending on data ++ assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize); ++ __ bind(L); ++ } ++#endif // PRODUCT ++ const Register method = Rmethod; ++ ++ // load Method* and target address ++ start_pc = __ pc(); ++ // lookup_virtual_method generates 6 instructions (worst case) ++ __ lookup_virtual_method(t1, vtable_index, method); ++ slop_delta = 6*BytesPerInstWord - (int)(__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ __ beq(method, R0, L); ++ __ ld_d(AT, method,in_bytes(Method::from_compiled_offset())); ++ __ bne(AT, R0, L); ++ __ stop("Vtable entry is NULL"); ++ __ bind(L); ++ } ++#endif // PRODUCT ++ ++ // T8: receiver klass ++ // T0: receiver ++ // Rmethod: Method* ++ // T4: entry ++ address ame_addr = __ pc(); ++ __ ld_ptr(T4, method,in_bytes(Method::from_compiled_offset())); ++ __ jr(T4); ++ masm->flush(); ++ slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets ++ bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop); ++ ++ return s; ++} ++ ++ ++// used registers : ++// T1 T2 ++// when reach here, the receiver in T0, klass in T1 ++VtableStub* VtableStubs::create_itable_stub(int itable_index) { ++ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. ++ const int stub_code_length = code_size_limit(false); ++ VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); ++ // Can be NULL if there is no free space in the code cache. ++ if (s == NULL) { ++ return NULL; ++ } ++ // Count unused bytes in instruction sequences of variable size. ++ // We add them to the computed buffer size in order to avoid ++ // overflow in subsequently generated stubs. ++ address start_pc; ++ int slop_bytes = 0; ++ int slop_delta = 0; ++ int load_const_maxLen = 4*BytesPerInstWord; // load_const generates 4 instructions. Assume that as max size for li ++ ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), stub_code_length); ++ MacroAssembler *masm = new MacroAssembler(&cb); ++ ++ // we use T8, T4, T2 as temparary register, they are free from register allocator ++ Register t1 = T8, t2 = T2, t3 = T4; ++ // Entry arguments: ++ // T1: Interface ++ // T0: Receiver ++ ++#if (!defined(PRODUCT) && defined(COMPILER2)) ++ if (CountCompiledCalls) { ++ start_pc = __ pc(); ++ __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ ld_w(T8, AT, 0); ++ __ addi_w(T8, T8, 1); ++ __ st_w(T8, AT, 0); ++ } ++#endif // PRODUCT ++ ++ const Register holder_klass_reg = T1; // declaring interface klass (DECC) ++ const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC) ++ const Register icholder_reg = T1; ++ ++ Label L_no_such_interface; ++ ++ __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset()); ++ __ ld_ptr(holder_klass_reg, icholder_reg, CompiledICHolder::holder_metadata_offset()); ++ ++ // get receiver klass (also an implicit null-check) ++ address npe_addr = __ pc(); ++ __ load_klass(t1, T0); ++ ++ // x86 use lookup_interface_method, but lookup_interface_method makes more instructions. ++ // No dynamic code size variance here, so slop_bytes is not needed. ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); ++ assert(Assembler::is_simm16(base), "change this code"); ++ __ addi_d(t2, t1, base); ++ __ ld_w(AT, t1, in_bytes(Klass::vtable_length_offset())); ++ __ alsl_d(t2, AT, t2, Address::times_8 - 1); ++ ++ __ move(t3, t2); ++ { ++ Label hit, entry; ++ ++ __ ld_ptr(AT, t3, itableOffsetEntry::interface_offset_in_bytes()); ++ __ beq(AT, resolved_klass_reg, hit); ++ ++ __ bind(entry); ++ // Check that the entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ __ beqz(AT, L_no_such_interface); ++ ++ __ addi_d(t3, t3, itableOffsetEntry::size() * wordSize); ++ __ ld_ptr(AT, t3, itableOffsetEntry::interface_offset_in_bytes()); ++ __ bne(AT, resolved_klass_reg, entry); ++ ++ __ bind(hit); ++ } ++ ++ { ++ Label hit, entry; ++ ++ __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); ++ __ beq(AT, holder_klass_reg, hit); ++ ++ __ bind(entry); ++ // Check that the entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ __ beqz(AT, L_no_such_interface); ++ ++ __ addi_d(t2, t2, itableOffsetEntry::size() * wordSize); ++ __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); ++ __ bne(AT, holder_klass_reg, entry); ++ ++ __ bind(hit); ++ } ++ ++ // We found a hit, move offset into T4 ++ __ ld_wu(t2, t2, itableOffsetEntry::offset_offset_in_bytes()); ++ ++ // Compute itableMethodEntry. ++ const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) + ++ itableMethodEntry::method_offset_in_bytes(); ++ ++ // Get Method* and entrypoint for compiler ++ const Register method = Rmethod; ++ ++ start_pc = __ pc(); ++ __ li(AT, method_offset); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ add_d(AT, AT, t2); ++ __ ldx_d(method, t1, AT); ++ ++#ifdef ASSERT ++ if (DebugVtables) { ++ Label L1; ++ __ beq(method, R0, L1); ++ __ ld_d(AT, method,in_bytes(Method::from_compiled_offset())); ++ __ bne(AT, R0, L1); ++ __ stop("compiler entrypoint is null"); ++ __ bind(L1); ++ } ++#endif // ASSERT ++ ++ // Rmethod: Method* ++ // T0: receiver ++ // T4: entry point ++ address ame_addr = __ pc(); ++ __ ld_ptr(T4, method, in_bytes(Method::from_compiled_offset())); ++ __ jr(T4); ++ ++ __ bind(L_no_such_interface); ++ // Handle IncompatibleClassChangeError in itable stubs. ++ // More detailed error message. ++ // We force resolving of the call site by jumping to the "handle ++ // wrong method" stub, and so let the interpreter runtime do all the ++ // dirty work. ++ assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order"); ++ __ jmp((address)SharedRuntime::get_handle_wrong_method_stub(), relocInfo::runtime_call_type); ++ ++ masm->flush(); ++ bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0); ++ ++ return s; ++} ++ ++// NOTE : whenever you change the code above, dont forget to change the const here ++int VtableStub::pd_code_alignment() { ++ const unsigned int icache_line_size = wordSize; ++ return icache_line_size; ++} +diff --git a/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp b/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp +new file mode 100644 +index 00000000000..c34334ec4c7 +--- /dev/null ++++ b/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp +@@ -0,0 +1,133 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "ci/ciMethod.hpp" ++#include "interpreter/interpreter.hpp" ++#include "oops/klass.inline.hpp" ++#include "runtime/frame.inline.hpp" ++ ++// asm based interpreter deoptimization helpers ++int AbstractInterpreter::size_activation(int max_stack, ++ int temps, ++ int extra_args, ++ int monitors, ++ int callee_params, ++ int callee_locals, ++ bool is_top_frame) { ++ // Note: This calculation must exactly parallel the frame setup ++ // in AbstractInterpreterGenerator::generate_method_entry. ++ ++ // fixed size of an interpreter frame: ++ int overhead = frame::sender_sp_offset - ++ frame::interpreter_frame_initial_sp_offset; ++ // Our locals were accounted for by the caller (or last_frame_adjust ++ // on the transistion) Since the callee parameters already account ++ // for the callee's params we only need to account for the extra ++ // locals. ++ int size = overhead + ++ (callee_locals - callee_params)*Interpreter::stackElementWords + ++ monitors * frame::interpreter_frame_monitor_size() + ++ temps* Interpreter::stackElementWords + extra_args; ++ ++ return size; ++} ++ ++// How much stack a method activation needs in words. ++int AbstractInterpreter::size_top_interpreter_activation(Method* method) { ++ ++ const int entry_size = frame::interpreter_frame_monitor_size(); ++ ++ // total overhead size: entry_size + (saved ebp thru expr stack bottom). ++ // be sure to change this if you add/subtract anything to/from the overhead area ++ const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size; ++ ++ const int stub_code = 6; // see generate_call_stub ++ // return overhead_size + method->max_locals() + method->max_stack() + stub_code; ++ const int method_stack = (method->max_locals() + method->max_stack()) * ++ Interpreter::stackElementWords; ++ return overhead_size + method_stack + stub_code; ++} ++ ++void AbstractInterpreter::layout_activation(Method* method, ++ int tempcount, ++ int popframe_extra_args, ++ int moncount, ++ int caller_actual_parameters, ++ int callee_param_count, ++ int callee_locals, ++ frame* caller, ++ frame* interpreter_frame, ++ bool is_top_frame, ++ bool is_bottom_frame) { ++ // Note: This calculation must exactly parallel the frame setup ++ // in AbstractInterpreterGenerator::generate_method_entry. ++ // If interpreter_frame!=NULL, set up the method, locals, and monitors. ++ // The frame interpreter_frame, if not NULL, is guaranteed to be the ++ // right size, as determined by a previous call to this method. ++ // It is also guaranteed to be walkable even though it is in a skeletal state ++ ++ // fixed size of an interpreter frame: ++ ++ int max_locals = method->max_locals() * Interpreter::stackElementWords; ++ int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords; ++ ++#ifdef ASSERT ++ assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); ++#endif ++ ++ interpreter_frame->interpreter_frame_set_method(method); ++ // NOTE the difference in using sender_sp and interpreter_frame_sender_sp ++ // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) ++ // and sender_sp is fp+8 ++ intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; ++ ++#ifdef ASSERT ++ if (caller->is_interpreted_frame()) { ++ assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); ++ } ++#endif ++ ++ interpreter_frame->interpreter_frame_set_locals(locals); ++ BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); ++ BasicObjectLock* monbot = montop - moncount; ++ interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount); ++ ++ //set last sp; ++ intptr_t* esp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords - ++ popframe_extra_args; ++ interpreter_frame->interpreter_frame_set_last_sp(esp); ++ // All frames but the initial interpreter frame we fill in have a ++ // value for sender_sp that allows walking the stack but isn't ++ // truly correct. Correct the value here. ++ // ++ if (extra_locals != 0 && ++ interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) { ++ interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals); ++ } ++ *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache(); ++ *interpreter_frame->interpreter_frame_mirror_addr() = method->method_holder()->java_mirror(); ++} ++ +diff --git a/src/hotspot/cpu/mips/assembler_mips.cpp b/src/hotspot/cpu/mips/assembler_mips.cpp +new file mode 100644 +index 00000000000..2205ef1a42c +--- /dev/null ++++ b/src/hotspot/cpu/mips/assembler_mips.cpp +@@ -0,0 +1,764 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "runtime/os.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/macros.hpp" ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) block_comment(str) ++#define STOP(error) block_comment(error); stop(error) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++// Implementation of AddressLiteral ++ ++AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { ++ _is_lval = false; ++ _target = target; ++ _rspec = rspec_from_rtype(rtype, target); ++} ++ ++// Implementation of Address ++ ++Address Address::make_array(ArrayAddress adr) { ++ AddressLiteral base = adr.base(); ++ Address index = adr.index(); ++ assert(index._disp == 0, "must not have disp"); // maybe it can? ++ Address array(index._base, index._index, index._scale, (intptr_t) base.target()); ++ array._rspec = base._rspec; ++ return array; ++} ++ ++// exceedingly dangerous constructor ++Address::Address(address loc, RelocationHolder spec) { ++ _base = noreg; ++ _index = noreg; ++ _scale = no_scale; ++ _disp = (intptr_t) loc; ++ _rspec = spec; ++} ++ ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of Assembler ++const char *Assembler::ops_name[] = { ++ "special", "regimm", "j", "jal", "beq", "bne", "blez", "bgtz", ++ "addi", "addiu", "slti", "sltiu", "andi", "ori", "xori", "lui", ++ "cop0", "cop1", "cop2", "cop3", "beql", "bnel", "bleql", "bgtzl", ++ "daddi", "daddiu", "ldl", "ldr", "", "", "", "", ++ "lb", "lh", "lwl", "lw", "lbu", "lhu", "lwr", "lwu", ++ "sb", "sh", "swl", "sw", "sdl", "sdr", "swr", "cache", ++ "ll", "lwc1", "", "", "lld", "ldc1", "", "ld", ++ "sc", "swc1", "", "", "scd", "sdc1", "", "sd" ++}; ++ ++const char* Assembler::special_name[] = { ++ "sll", "", "srl", "sra", "sllv", "", "srlv", "srav", ++ "jr", "jalr", "movz", "movn", "syscall", "break", "", "sync", ++ "mfhi", "mthi", "mflo", "mtlo", "dsll", "", "dsrl", "dsra", ++ "mult", "multu", "div", "divu", "dmult", "dmultu", "ddiv", "ddivu", ++ "add", "addu", "sub", "subu", "and", "or", "xor", "nor", ++ "", "", "slt", "sltu", "dadd", "daddu", "dsub", "dsubu", ++ "tge", "tgeu", "tlt", "tltu", "teq", "", "tne", "", ++ "dsll", "", "dsrl", "dsra", "dsll32", "", "dsrl32", "dsra32" ++}; ++ ++const char* Assembler::cop1_name[] = { ++ "add", "sub", "mul", "div", "sqrt", "abs", "mov", "neg", ++ "round.l", "trunc.l", "ceil.l", "floor.l", "round.w", "trunc.w", "ceil.w", "floor.w", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "c.f", "c.un", "c.eq", "c.ueq", "c.olt", "c.ult", "c.ole", "c.ule", ++ "c.sf", "c.ngle", "c.seq", "c.ngl", "c.lt", "c.nge", "c.le", "c.ngt" ++}; ++ ++const char* Assembler::cop1x_name[] = { ++ "lwxc1", "ldxc1", "", "", "", "luxc1", "", "", ++ "swxc1", "sdxc1", "", "", "", "suxc1", "", "prefx", ++ "", "", "", "", "", "", "alnv.ps", "", ++ "", "", "", "", "", "", "", "", ++ "madd.s", "madd.d", "", "", "", "", "madd.ps", "", ++ "msub.s", "msub.d", "", "", "", "", "msub.ps", "", ++ "nmadd.s", "nmadd.d", "", "", "", "", "nmadd.ps", "", ++ "nmsub.s", "nmsub.d", "", "", "", "", "nmsub.ps", "" ++}; ++ ++const char* Assembler::special2_name[] = { ++ "madd", "", "mul", "", "msub", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "gsdmult", "", "", "gsdiv", "gsddiv", "", "", ++ "", "", "", "", "gsmod", "gsdmod", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "" ++}; ++ ++const char* Assembler::special3_name[] = { ++ "ext", "", "", "", "ins", "dinsm", "dinsu", "dins", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "bshfl", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++}; ++ ++const char* Assembler::regimm_name[] = { ++ "bltz", "bgez", "bltzl", "bgezl", "", "", "", "", ++ "tgei", "tgeiu", "tlti", "tltiu", "teqi", "", "tnei", "", ++ "bltzal", "bgezal", "bltzall", "bgezall" ++}; ++ ++const char* Assembler::gs_ldc2_name[] = { ++ "gslbx", "gslhx", "gslwx", "gsldx", "", "", "gslwxc1", "gsldxc1" ++}; ++ ++ ++const char* Assembler::gs_lwc2_name[] = { ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "gslble", "gslbgt", "gslhle", "gslhgt", "gslwle", "gslwgt", "gsldle", "gsldgt", ++ "", "", "", "gslwlec1", "gslwgtc1", "gsldlec1", "gsldgtc1", "",/*LWDIR, LWPTE, LDDIR and LDPTE have the same low 6 bits.*/ ++ "gslq", "" ++}; ++ ++const char* Assembler::gs_sdc2_name[] = { ++ "gssbx", "gsshx", "gsswx", "gssdx", "", "", "gsswxc1", "gssdxc1" ++}; ++ ++const char* Assembler::gs_swc2_name[] = { ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "gssble", "gssbgt", "gsshle", "gsshgt", "gsswle", "gsswgt", "gssdle", "gssdgt", ++ "", "", "", "", "gsswlec1", "gsswgtc1", "gssdlec1", "gssdgtc1", ++ "gssq", "" ++}; ++ ++//misleading name, print only branch/jump instruction ++void Assembler::print_instruction(int inst) { ++ const char *s; ++ switch( opcode(inst) ) { ++ default: ++ s = ops_name[opcode(inst)]; ++ break; ++ case special_op: ++ s = special_name[special(inst)]; ++ break; ++ case regimm_op: ++ s = special_name[rt(inst)]; ++ break; ++ } ++ ++ ::tty->print("%s", s); ++} ++ ++int Assembler::is_int_mask(int x) { ++ int xx = x; ++ int count = 0; ++ ++ while (x != 0) { ++ x &= (x - 1); ++ count++; ++ } ++ ++ if ((1<>2; ++ switch(opcode(inst)) { ++ case j_op: ++ case jal_op: ++ case lui_op: ++ case ori_op: ++ case daddiu_op: ++ ShouldNotReachHere(); ++ break; ++ default: ++ assert(is_simm16(v), "must be simm16"); ++#ifndef PRODUCT ++ if (!is_simm16(v)) { ++ tty->print_cr("must be simm16"); ++ tty->print_cr("Inst: %x", inst); ++ } ++#endif ++ ++ v = low16(v); ++ inst &= 0xffff0000; ++ break; ++ } ++ ++ return inst | v; ++} ++ ++int Assembler::branch_destination(int inst, int pos) { ++ int off = 0; ++ ++ switch(opcode(inst)) { ++ case j_op: ++ case jal_op: ++ assert(false, "should not use j/jal here"); ++ break; ++ default: ++ off = expand(low16(inst), 15); ++ break; ++ } ++ ++ return off ? pos + 4 + (off<<2) : 0; ++} ++ ++int AbstractAssembler::code_fill_byte() { ++ return 0x00; // illegal instruction 0x00000000 ++} ++ ++// Now the Assembler instruction (identical for 32/64 bits) ++ ++void Assembler::lb(Register rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lb(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lbu(Register rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lbu(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ld(Register rt, Address dst){ ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (Assembler::is_simm16(disp)) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ gsldx(src, base, index, disp); ++ } else { ++ dsll(AT, index, scale); ++ gsldx(src, base, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ ld(src, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gsldx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ ld(src, AT, 0); ++ } ++ } else { ++ assert_different_registers(src, AT); ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(src, split_low(disp >> 16)); ++ if (split_low(disp)) ori(src, src, split_low(disp)); ++ if (UseLEXT1) { ++ gsldx(src, AT, src, 0); ++ } else { ++ daddu(AT, AT, src); ++ ld(src, AT, 0); ++ } ++ } ++ } ++ } else { ++ if (Assembler::is_simm16(disp)) { ++ ld(src, base, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gsldx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ ld(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::ldl(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ldl(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ldr(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ldr(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lh(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lh(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lhu(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lhu(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ll(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ll(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lld(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lld(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lw(Register rt, Address dst){ ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (Assembler::is_simm16(disp)) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ gslwx(src, base, index, disp); ++ } else { ++ dsll(AT, index, scale); ++ gslwx(src, base, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ lw(src, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gslwx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ lw(src, AT, 0); ++ } ++ } else { ++ assert_different_registers(src, AT); ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(src, split_low(disp >> 16)); ++ if (split_low(disp)) ori(src, src, split_low(disp)); ++ if (UseLEXT1) { ++ gslwx(src, AT, src, 0); ++ } else { ++ daddu(AT, AT, src); ++ lw(src, AT, 0); ++ } ++ } ++ } ++ } else { ++ if (Assembler::is_simm16(disp)) { ++ lw(src, base, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gslwx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ lw(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::lea(Register rt, Address src) { ++ Register dst = rt; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index == noreg) { ++ if (is_simm16(disp)) { ++ daddiu(dst, base, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(dst, base, AT); ++ } ++ } else { ++ if (scale == 0) { ++ if (is_simm16(disp)) { ++ daddu(AT, base, index); ++ daddiu(dst, AT, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, base, AT); ++ daddu(dst, AT, index); ++ } ++ } else { ++ if (is_simm16(disp)) { ++ dsll(AT, index, scale); ++ daddu(AT, AT, base); ++ daddiu(dst, AT, disp); ++ } else { ++ assert_different_registers(dst, AT); ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ dsll(dst, index, scale); ++ daddu(dst, dst, AT); ++ } ++ } ++ } ++} ++ ++void Assembler::lwl(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwl(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lwr(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwr(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lwu(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwu(rt, src.base(), src.disp()); ++} ++ ++void Assembler::sb(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sb(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sc(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sc(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::scd(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ scd(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sd(Register rt, Address dst) { ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (is_simm16(disp)) { ++ if ( UseLEXT1 && is_simm(disp, 8)) { ++ if (scale == 0) { ++ gssdx(src, base, index, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ dsll(AT, index, scale); ++ gssdx(src, base, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ sd(src, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gssdx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ sd(src, AT, 0); ++ } ++ } else { ++ daddiu(SP, SP, -wordSize); ++ sd(T9, SP, 0); ++ ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(T9, split_low(disp >> 16)); ++ if (split_low(disp)) ori(T9, T9, split_low(disp)); ++ daddu(AT, AT, T9); ++ ld(T9, SP, 0); ++ daddiu(SP, SP, wordSize); ++ sd(src, AT, 0); ++ } ++ } ++ } else { ++ if (is_simm16(disp)) { ++ sd(src, base, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gssdx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ sd(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::sdl(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sdl(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sdr(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sdr(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sh(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sh(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sw(Register rt, Address dst) { ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if ( Assembler::is_simm16(disp) ) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ gsswx(src, base, index, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ dsll(AT, index, scale); ++ gsswx(src, base, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ sw(src, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gsswx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ sw(src, AT, 0); ++ } ++ } else { ++ daddiu(SP, SP, -wordSize); ++ sd(T9, SP, 0); ++ ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(T9, split_low(disp >> 16)); ++ if (split_low(disp)) ori(T9, T9, split_low(disp)); ++ daddu(AT, AT, T9); ++ ld(T9, SP, 0); ++ daddiu(SP, SP, wordSize); ++ sw(src, AT, 0); ++ } ++ } ++ } else { ++ if (Assembler::is_simm16(disp)) { ++ sw(src, base, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gsswx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ sw(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::swl(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ swl(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::swr(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ swr(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::lwc1(FloatRegister rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwc1(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ldc1(FloatRegister rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ldc1(rt, src.base(), src.disp()); ++} ++ ++void Assembler::swc1(FloatRegister rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ swc1(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sdc1(FloatRegister rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sdc1(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::j(address entry) { ++ int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2; ++ emit_long((j_op<<26) | dest); ++ has_delay_slot(); ++} ++ ++void Assembler::jal(address entry) { ++ int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2; ++ emit_long((jal_op<<26) | dest); ++ has_delay_slot(); ++} ++ ++void Assembler::emit_long(int x) { // shadows AbstractAssembler::emit_long ++ check_delay(); ++ AbstractAssembler::emit_int32(x); ++} ++ ++inline void Assembler::emit_data(int x) { emit_long(x); } ++inline void Assembler::emit_data(int x, relocInfo::relocType rtype) { ++ relocate(rtype); ++ emit_long(x); ++} ++ ++inline void Assembler::emit_data(int x, RelocationHolder const& rspec) { ++ relocate(rspec); ++ emit_long(x); ++} ++ ++inline void Assembler::check_delay() { ++#ifdef CHECK_DELAY ++ guarantee(delay_state != at_delay_slot, "must say delayed() when filling delay slot"); ++ delay_state = no_delay; ++#endif ++} +diff --git a/src/hotspot/cpu/mips/assembler_mips.hpp b/src/hotspot/cpu/mips/assembler_mips.hpp +new file mode 100644 +index 00000000000..8d0d9e0eb21 +--- /dev/null ++++ b/src/hotspot/cpu/mips/assembler_mips.hpp +@@ -0,0 +1,1777 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_HPP ++#define CPU_MIPS_VM_ASSEMBLER_MIPS_HPP ++ ++#include "asm/register.hpp" ++#include "runtime/vm_version.hpp" ++ ++class BiasedLockingCounters; ++ ++ ++// Note: A register location is represented via a Register, not ++// via an address for efficiency & simplicity reasons. ++ ++class ArrayAddress; ++ ++class Address { ++ public: ++ enum ScaleFactor { ++ no_scale = -1, ++ times_1 = 0, ++ times_2 = 1, ++ times_4 = 2, ++ times_8 = 3, ++ times_ptr = times_8 ++ }; ++ static ScaleFactor times(int size) { ++ assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size"); ++ if (size == 8) return times_8; ++ if (size == 4) return times_4; ++ if (size == 2) return times_2; ++ return times_1; ++ } ++ ++ private: ++ Register _base; ++ Register _index; ++ ScaleFactor _scale; ++ int _disp; ++ RelocationHolder _rspec; ++ ++ // Easily misused constructors make them private ++ Address(address loc, RelocationHolder spec); ++ Address(int disp, address loc, relocInfo::relocType rtype); ++ Address(int disp, address loc, RelocationHolder spec); ++ ++ public: ++ ++ // creation ++ Address() ++ : _base(noreg), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(0) { ++ } ++ ++ // No default displacement otherwise Register can be implicitly ++ // converted to 0(Register) which is quite a different animal. ++ ++ Address(Register base, int disp = 0) ++ : _base(base), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(disp) { ++ assert_different_registers(_base, AT); ++ } ++ ++ Address(Register base, Register index, ScaleFactor scale, int disp = 0) ++ : _base (base), ++ _index(index), ++ _scale(scale), ++ _disp (disp) { ++ assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); ++ assert_different_registers(_base, _index, AT); ++ } ++ ++ // The following overloads are used in connection with the ++ // ByteSize type (see sizes.hpp). They simplify the use of ++ // ByteSize'd arguments in assembly code. ++ ++ Address(Register base, ByteSize disp) ++ : Address(base, in_bytes(disp)) {} ++ ++ Address(Register base, Register index, ScaleFactor scale, ByteSize disp) ++ : Address(base, index, scale, in_bytes(disp)) {} ++ ++ // accessors ++ bool uses(Register reg) const { return _base == reg || _index == reg; } ++ Register base() const { return _base; } ++ Register index() const { return _index; } ++ ScaleFactor scale() const { return _scale; } ++ int disp() const { return _disp; } ++ ++ static Address make_array(ArrayAddress); ++ ++ friend class Assembler; ++ friend class MacroAssembler; ++ friend class LIR_Assembler; // base/index/scale/disp ++}; ++ ++// Calling convention ++class Argument { ++ private: ++ int _number; ++ public: ++ enum { ++ n_register_parameters = 8, // 8 integer registers used to pass parameters ++ n_float_register_parameters = 8 // 8 float registers used to pass parameters ++ }; ++ ++ Argument(int number):_number(number){ } ++ Argument successor() {return Argument(number() + 1);} ++ ++ int number()const {return _number;} ++ bool is_Register()const {return _number < n_register_parameters;} ++ bool is_FloatRegister()const {return _number < n_float_register_parameters;} ++ ++ Register as_Register()const { ++ assert(is_Register(), "must be a register argument"); ++ return ::as_Register(RA0->encoding() + _number); ++ } ++ FloatRegister as_FloatRegister()const { ++ assert(is_FloatRegister(), "must be a float register argument"); ++ return ::as_FloatRegister(F12->encoding() + _number); ++ } ++ ++ Address as_caller_address()const {return Address(SP, (number() - n_register_parameters) * wordSize);} ++}; ++ ++// ++// AddressLiteral has been split out from Address because operands of this type ++// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out ++// the few instructions that need to deal with address literals are unique and the ++// MacroAssembler does not have to implement every instruction in the Assembler ++// in order to search for address literals that may need special handling depending ++// on the instruction and the platform. As small step on the way to merging i486/amd64 ++// directories. ++// ++class AddressLiteral { ++ friend class ArrayAddress; ++ RelocationHolder _rspec; ++ // Typically we use AddressLiterals we want to use their rval ++ // However in some situations we want the lval (effect address) of the item. ++ // We provide a special factory for making those lvals. ++ bool _is_lval; ++ ++ // If the target is far we'll need to load the ea of this to ++ // a register to reach it. Otherwise if near we can do rip ++ // relative addressing. ++ ++ address _target; ++ ++ protected: ++ // creation ++ AddressLiteral() ++ : _is_lval(false), ++ _target(NULL) ++ {} ++ ++ public: ++ ++ ++ AddressLiteral(address target, relocInfo::relocType rtype); ++ ++ AddressLiteral(address target, RelocationHolder const& rspec) ++ : _rspec(rspec), ++ _is_lval(false), ++ _target(target) ++ {} ++ ++ AddressLiteral addr() { ++ AddressLiteral ret = *this; ++ ret._is_lval = true; ++ return ret; ++ } ++ ++ ++ private: ++ ++ address target() { return _target; } ++ bool is_lval() { return _is_lval; } ++ ++ relocInfo::relocType reloc() const { return _rspec.type(); } ++ const RelocationHolder& rspec() const { return _rspec; } ++ ++ friend class Assembler; ++ friend class MacroAssembler; ++ friend class Address; ++ friend class LIR_Assembler; ++ RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { ++ switch (rtype) { ++ case relocInfo::external_word_type: ++ return external_word_Relocation::spec(addr); ++ case relocInfo::internal_word_type: ++ return internal_word_Relocation::spec(addr); ++ case relocInfo::opt_virtual_call_type: ++ return opt_virtual_call_Relocation::spec(); ++ case relocInfo::static_call_type: ++ return static_call_Relocation::spec(); ++ case relocInfo::runtime_call_type: ++ return runtime_call_Relocation::spec(); ++ case relocInfo::poll_type: ++ case relocInfo::poll_return_type: ++ return Relocation::spec_simple(rtype); ++ case relocInfo::none: ++ case relocInfo::oop_type: ++ // Oops are a special case. Normally they would be their own section ++ // but in cases like icBuffer they are literals in the code stream that ++ // we don't have a section for. We use none so that we get a literal address ++ // which is always patchable. ++ return RelocationHolder(); ++ default: ++ ShouldNotReachHere(); ++ return RelocationHolder(); ++ } ++ } ++ ++}; ++ ++// Convience classes ++class RuntimeAddress: public AddressLiteral { ++ ++ public: ++ ++ RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {} ++ ++}; ++ ++class OopAddress: public AddressLiteral { ++ ++ public: ++ ++ OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){} ++ ++}; ++ ++class ExternalAddress: public AddressLiteral { ++ ++ public: ++ ++ ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){} ++ ++}; ++ ++class InternalAddress: public AddressLiteral { ++ ++ public: ++ ++ InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {} ++ ++}; ++ ++// x86 can do array addressing as a single operation since disp can be an absolute ++// address amd64 can't. We create a class that expresses the concept but does extra ++// magic on amd64 to get the final result ++ ++class ArrayAddress { ++ private: ++ ++ AddressLiteral _base; ++ Address _index; ++ ++ public: ++ ++ ArrayAddress() {}; ++ ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {}; ++ AddressLiteral base() { return _base; } ++ Address index() { return _index; } ++ ++}; ++ ++const int FPUStateSizeInWords = 512 / wordSize; ++ ++// The MIPS LOONGSON Assembler: Pure assembler doing NO optimizations on the instruction ++// level ; i.e., what you write is what you get. The Assembler is generating code into ++// a CodeBuffer. ++ ++class Assembler : public AbstractAssembler { ++ friend class AbstractAssembler; // for the non-virtual hack ++ friend class LIR_Assembler; // as_Address() ++ friend class StubGenerator; ++ ++ public: ++ enum Condition { ++ zero , ++ notZero , ++ equal , ++ notEqual , ++ less , ++ lessEqual , ++ greater , ++ greaterEqual , ++ below , ++ belowEqual , ++ above , ++ aboveEqual ++ }; ++ ++ static const int LogInstructionSize = 2; ++ static const int InstructionSize = 1 << LogInstructionSize; ++ ++ // opcode, highest 6 bits: bits[31...26] ++ enum ops { ++ special_op = 0x00, // special_ops ++ regimm_op = 0x01, // regimm_ops ++ j_op = 0x02, ++ jal_op = 0x03, ++ beq_op = 0x04, ++ bne_op = 0x05, ++ blez_op = 0x06, ++ bgtz_op = 0x07, ++ addiu_op = 0x09, ++ slti_op = 0x0a, ++ sltiu_op = 0x0b, ++ andi_op = 0x0c, ++ ori_op = 0x0d, ++ xori_op = 0x0e, ++ lui_op = 0x0f, ++ cop0_op = 0x10, // cop0_ops ++ cop1_op = 0x11, // cop1_ops ++ gs_cop2_op = 0x12, // gs_cop2_ops ++ cop1x_op = 0x13, // cop1x_ops ++ beql_op = 0x14, ++ bnel_op = 0x15, ++ blezl_op = 0x16, ++ bgtzl_op = 0x17, ++ daddiu_op = 0x19, ++ ldl_op = 0x1a, ++ ldr_op = 0x1b, ++ special2_op = 0x1c, // special2_ops ++ msa_op = 0x1e, // msa_ops ++ special3_op = 0x1f, // special3_ops ++ lb_op = 0x20, ++ lh_op = 0x21, ++ lwl_op = 0x22, ++ lw_op = 0x23, ++ lbu_op = 0x24, ++ lhu_op = 0x25, ++ lwr_op = 0x26, ++ lwu_op = 0x27, ++ sb_op = 0x28, ++ sh_op = 0x29, ++ swl_op = 0x2a, ++ sw_op = 0x2b, ++ sdl_op = 0x2c, ++ sdr_op = 0x2d, ++ swr_op = 0x2e, ++ cache_op = 0x2f, ++ ll_op = 0x30, ++ lwc1_op = 0x31, ++ gs_lwc2_op = 0x32, //gs_lwc2_ops ++ pref_op = 0x33, ++ lld_op = 0x34, ++ ldc1_op = 0x35, ++ gs_ldc2_op = 0x36, //gs_ldc2_ops ++ ld_op = 0x37, ++ sc_op = 0x38, ++ swc1_op = 0x39, ++ gs_swc2_op = 0x3a, //gs_swc2_ops ++ scd_op = 0x3c, ++ sdc1_op = 0x3d, ++ gs_sdc2_op = 0x3e, //gs_sdc2_ops ++ sd_op = 0x3f ++ }; ++ ++ static const char *ops_name[]; ++ ++ //special family, the opcode is in low 6 bits. ++ enum special_ops { ++ sll_op = 0x00, ++ movci_op = 0x01, ++ srl_op = 0x02, ++ sra_op = 0x03, ++ sllv_op = 0x04, ++ srlv_op = 0x06, ++ srav_op = 0x07, ++ jr_op = 0x08, ++ jalr_op = 0x09, ++ movz_op = 0x0a, ++ movn_op = 0x0b, ++ syscall_op = 0x0c, ++ break_op = 0x0d, ++ sync_op = 0x0f, ++ mfhi_op = 0x10, ++ mthi_op = 0x11, ++ mflo_op = 0x12, ++ mtlo_op = 0x13, ++ dsllv_op = 0x14, ++ dsrlv_op = 0x16, ++ dsrav_op = 0x17, ++ mult_op = 0x18, ++ multu_op = 0x19, ++ div_op = 0x1a, ++ divu_op = 0x1b, ++ dmult_op = 0x1c, ++ dmultu_op = 0x1d, ++ ddiv_op = 0x1e, ++ ddivu_op = 0x1f, ++ addu_op = 0x21, ++ subu_op = 0x23, ++ and_op = 0x24, ++ or_op = 0x25, ++ xor_op = 0x26, ++ nor_op = 0x27, ++ slt_op = 0x2a, ++ sltu_op = 0x2b, ++ daddu_op = 0x2d, ++ dsubu_op = 0x2f, ++ tge_op = 0x30, ++ tgeu_op = 0x31, ++ tlt_op = 0x32, ++ tltu_op = 0x33, ++ teq_op = 0x34, ++ tne_op = 0x36, ++ dsll_op = 0x38, ++ dsrl_op = 0x3a, ++ dsra_op = 0x3b, ++ dsll32_op = 0x3c, ++ dsrl32_op = 0x3e, ++ dsra32_op = 0x3f ++ }; ++ ++ static const char* special_name[]; ++ ++ //regimm family, the opcode is in rt[16...20], 5 bits ++ enum regimm_ops { ++ bltz_op = 0x00, ++ bgez_op = 0x01, ++ bltzl_op = 0x02, ++ bgezl_op = 0x03, ++ tgei_op = 0x08, ++ tgeiu_op = 0x09, ++ tlti_op = 0x0a, ++ tltiu_op = 0x0b, ++ teqi_op = 0x0c, ++ tnei_op = 0x0e, ++ bltzal_op = 0x10, ++ bgezal_op = 0x11, ++ bltzall_op = 0x12, ++ bgezall_op = 0x13, ++ bposge32_op = 0x1c, ++ bposge64_op = 0x1d, ++ synci_op = 0x1f, ++ }; ++ ++ static const char* regimm_name[]; ++ ++ //cop0 family, the ops is in bits[25...21], 5 bits ++ enum cop0_ops { ++ mfc0_op = 0x00, ++ dmfc0_op = 0x01, ++ // ++ mxgc0_op = 0x03, //MFGC0, DMFGC0, MTGC0 ++ mtc0_op = 0x04, ++ dmtc0_op = 0x05, ++ rdpgpr_op = 0x0a, ++ inter_op = 0x0b, ++ wrpgpr_op = 0x0c ++ }; ++ ++ //cop1 family, the ops is in bits[25...21], 5 bits ++ enum cop1_ops { ++ mfc1_op = 0x00, ++ dmfc1_op = 0x01, ++ cfc1_op = 0x02, ++ mfhc1_op = 0x03, ++ mtc1_op = 0x04, ++ dmtc1_op = 0x05, ++ ctc1_op = 0x06, ++ mthc1_op = 0x07, ++ bc1f_op = 0x08, ++ single_fmt = 0x10, ++ double_fmt = 0x11, ++ word_fmt = 0x14, ++ long_fmt = 0x15, ++ ps_fmt = 0x16 ++ }; ++ ++ ++ //2 bist (bits[17...16]) of bc1x instructions (cop1) ++ enum bc_ops { ++ bcf_op = 0x0, ++ bct_op = 0x1, ++ bcfl_op = 0x2, ++ bctl_op = 0x3, ++ }; ++ ++ // low 6 bits of c_x_fmt instructions (cop1) ++ enum c_conds { ++ f_cond = 0x30, ++ un_cond = 0x31, ++ eq_cond = 0x32, ++ ueq_cond = 0x33, ++ olt_cond = 0x34, ++ ult_cond = 0x35, ++ ole_cond = 0x36, ++ ule_cond = 0x37, ++ sf_cond = 0x38, ++ ngle_cond = 0x39, ++ seq_cond = 0x3a, ++ ngl_cond = 0x3b, ++ lt_cond = 0x3c, ++ nge_cond = 0x3d, ++ le_cond = 0x3e, ++ ngt_cond = 0x3f ++ }; ++ ++ // low 6 bits of cop1 instructions ++ enum float_ops { ++ fadd_op = 0x00, ++ fsub_op = 0x01, ++ fmul_op = 0x02, ++ fdiv_op = 0x03, ++ fsqrt_op = 0x04, ++ fabs_op = 0x05, ++ fmov_op = 0x06, ++ fneg_op = 0x07, ++ froundl_op = 0x08, ++ ftruncl_op = 0x09, ++ fceill_op = 0x0a, ++ ffloorl_op = 0x0b, ++ froundw_op = 0x0c, ++ ftruncw_op = 0x0d, ++ fceilw_op = 0x0e, ++ ffloorw_op = 0x0f, ++ movf_f_op = 0x11, ++ movt_f_op = 0x11, ++ movz_f_op = 0x12, ++ movn_f_op = 0x13, ++ frecip_op = 0x15, ++ frsqrt_op = 0x16, ++ fcvts_op = 0x20, ++ fcvtd_op = 0x21, ++ fcvtw_op = 0x24, ++ fcvtl_op = 0x25, ++ fcvtps_op = 0x26, ++ fcvtspl_op = 0x28, ++ fpll_op = 0x2c, ++ fplu_op = 0x2d, ++ fpul_op = 0x2e, ++ fpuu_op = 0x2f ++ }; ++ ++ static const char* cop1_name[]; ++ ++ //cop1x family, the opcode is in low 6 bits. ++ enum cop1x_ops { ++ lwxc1_op = 0x00, ++ ldxc1_op = 0x01, ++ luxc1_op = 0x05, ++ swxc1_op = 0x08, ++ sdxc1_op = 0x09, ++ suxc1_op = 0x0d, ++ prefx_op = 0x0f, ++ ++ alnv_ps_op = 0x1e, ++ madd_s_op = 0x20, ++ madd_d_op = 0x21, ++ madd_ps_op = 0x26, ++ msub_s_op = 0x28, ++ msub_d_op = 0x29, ++ msub_ps_op = 0x2e, ++ nmadd_s_op = 0x30, ++ nmadd_d_op = 0x31, ++ nmadd_ps_op = 0x36, ++ nmsub_s_op = 0x38, ++ nmsub_d_op = 0x39, ++ nmsub_ps_op = 0x3e ++ }; ++ ++ static const char* cop1x_name[]; ++ ++ //special2 family, the opcode is in low 6 bits. ++ enum special2_ops { ++ madd_op = 0x00, ++ maddu_op = 0x01, ++ mul_op = 0x02, ++ gs0x03_op = 0x03, ++ msub_op = 0x04, ++ msubu_op = 0x05, ++ gs0x06_op = 0x06, ++ gsemul2_op = 0x07, ++ gsemul3_op = 0x08, ++ gsemul4_op = 0x09, ++ gsemul5_op = 0x0a, ++ gsemul6_op = 0x0b, ++ gsemul7_op = 0x0c, ++ gsemul8_op = 0x0d, ++ gsemul9_op = 0x0e, ++ gsemul10_op = 0x0f, ++ gsmult_op = 0x10, ++ gsdmult_op = 0x11, ++ gsmultu_op = 0x12, ++ gsdmultu_op = 0x13, ++ gsdiv_op = 0x14, ++ gsddiv_op = 0x15, ++ gsdivu_op = 0x16, ++ gsddivu_op = 0x17, ++ gsmod_op = 0x1c, ++ gsdmod_op = 0x1d, ++ gsmodu_op = 0x1e, ++ gsdmodu_op = 0x1f, ++ clz_op = 0x20, ++ clo_op = 0x21, ++ xctx_op = 0x22, //ctz, cto, dctz, dcto, gsX ++ gsrxr_x_op = 0x23, //gsX ++ dclz_op = 0x24, ++ dclo_op = 0x25, ++ gsle_op = 0x26, ++ gsgt_op = 0x27, ++ gs86j_op = 0x28, ++ gsloop_op = 0x29, ++ gsaj_op = 0x2a, ++ gsldpc_op = 0x2b, ++ gs86set_op = 0x30, ++ gstm_op = 0x31, ++ gscvt_ld_op = 0x32, ++ gscvt_ud_op = 0x33, ++ gseflag_op = 0x34, ++ gscam_op = 0x35, ++ gstop_op = 0x36, ++ gssettag_op = 0x37, ++ gssdbbp_op = 0x38 ++ }; ++ ++ static const char* special2_name[]; ++ ++ // special3 family, the opcode is in low 6 bits. ++ enum special3_ops { ++ ext_op = 0x00, ++ dextm_op = 0x01, ++ dextu_op = 0x02, ++ dext_op = 0x03, ++ ins_op = 0x04, ++ dinsm_op = 0x05, ++ dinsu_op = 0x06, ++ dins_op = 0x07, ++ lxx_op = 0x0a, //lwx, lhx, lbux, ldx ++ insv_op = 0x0c, ++ dinsv_op = 0x0d, ++ ar1_op = 0x10, //MIPS DSP ++ cmp1_op = 0x11, //MIPS DSP ++ re1_op = 0x12, //MIPS DSP, re1_ops ++ sh1_op = 0x13, //MIPS DSP ++ ar2_op = 0x14, //MIPS DSP ++ cmp2_op = 0x15, //MIPS DSP ++ re2_op = 0x16, //MIPS DSP, re2_ops ++ sh2_op = 0x17, //MIPS DSP ++ ar3_op = 0x18, //MIPS DSP ++ bshfl_op = 0x20 //seb, seh ++ }; ++ ++ // re1_ops ++ enum re1_ops { ++ absq_s_qb_op = 0x01, ++ repl_qb_op = 0x02, ++ replv_qb_op = 0x03, ++ absq_s_ph_op = 0x09, ++ repl_ph_op = 0x0a, ++ replv_ph_op = 0x0b, ++ absq_s_w_op = 0x11, ++ bitrev_op = 0x1b ++ }; ++ ++ // re2_ops ++ enum re2_ops { ++ repl_ob_op = 0x02, ++ replv_ob_op = 0x03, ++ absq_s_qh_op = 0x09, ++ repl_qh_op = 0x0a, ++ replv_qh_op = 0x0b, ++ absq_s_pw_op = 0x11, ++ repl_pw_op = 0x12, ++ replv_pw_op = 0x13 ++ }; ++ ++ static const char* special3_name[]; ++ ++ // lwc2/gs_lwc2 family, the opcode is in low 6 bits. ++ enum gs_lwc2_ops { ++ gslble_op = 0x10, ++ gslbgt_op = 0x11, ++ gslhle_op = 0x12, ++ gslhgt_op = 0x13, ++ gslwle_op = 0x14, ++ gslwgt_op = 0x15, ++ gsldle_op = 0x16, ++ gsldgt_op = 0x17, ++ gslwlec1_op = 0x1c, ++ gslwgtc1_op = 0x1d, ++ gsldlec1_op = 0x1e, ++ gsldgtc1_op = 0x1f, ++ gslq_op = 0x20 ++ }; ++ ++ static const char* gs_lwc2_name[]; ++ ++ // ldc2/gs_ldc2 family, the opcode is in low 3 bits. ++ enum gs_ldc2_ops { ++ gslbx_op = 0x0, ++ gslhx_op = 0x1, ++ gslwx_op = 0x2, ++ gsldx_op = 0x3, ++ gslwxc1_op = 0x6, ++ gsldxc1_op = 0x7 ++ }; ++ ++ static const char* gs_ldc2_name[]; ++ ++ // swc2/gs_swc2 family, the opcode is in low 6 bits. ++ enum gs_swc2_ops { ++ gssble_op = 0x10, ++ gssbgt_op = 0x11, ++ gsshle_op = 0x12, ++ gsshgt_op = 0x13, ++ gsswle_op = 0x14, ++ gsswgt_op = 0x15, ++ gssdle_op = 0x16, ++ gssdgt_op = 0x17, ++ gsswlec1_op = 0x1c, ++ gsswgtc1_op = 0x1d, ++ gssdlec1_op = 0x1e, ++ gssdgtc1_op = 0x1f, ++ gssq_op = 0x20 ++ }; ++ ++ static const char* gs_swc2_name[]; ++ ++ // sdc2/gs_sdc2 family, the opcode is in low 3 bits. ++ enum gs_sdc2_ops { ++ gssbx_op = 0x0, ++ gsshx_op = 0x1, ++ gsswx_op = 0x2, ++ gssdx_op = 0x3, ++ gsswxc1_op = 0x6, ++ gssdxc1_op = 0x7 ++ }; ++ ++ static const char* gs_sdc2_name[]; ++ ++ enum WhichOperand { ++ // input to locate_operand, and format code for relocations ++ imm_operand = 0, // embedded 32-bit|64-bit immediate operand ++ disp32_operand = 1, // embedded 32-bit displacement or address ++ call32_operand = 2, // embedded 32-bit self-relative displacement ++ narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop ++ _WhichOperand_limit = 4 ++ }; ++ ++ static int opcode(int insn) { return (insn>>26)&0x3f; } ++ static int rs(int insn) { return (insn>>21)&0x1f; } ++ static int rt(int insn) { return (insn>>16)&0x1f; } ++ static int rd(int insn) { return (insn>>11)&0x1f; } ++ static int sa(int insn) { return (insn>>6)&0x1f; } ++ static int special(int insn) { return insn&0x3f; } ++ static int imm_off(int insn) { return (short)low16(insn); } ++ ++ static int low (int x, int l) { return bitfield(x, 0, l); } ++ static int low16(int x) { return low(x, 16); } ++ static int low26(int x) { return low(x, 26); } ++ ++ protected: ++ //help methods for instruction ejection ++ ++ // I-Type (Immediate) ++ // 31 26 25 21 20 16 15 0 ++ //| opcode | rs | rt | immediat | ++ //| | | | | ++ // 6 5 5 16 ++ static int insn_ORRI(int op, int rs, int rt, int imm) { assert(is_simm16(imm), "not a signed 16-bit int"); return (op<<26) | (rs<<21) | (rt<<16) | low16(imm); } ++ ++ // R-Type (Register) ++ // 31 26 25 21 20 16 15 11 10 6 5 0 ++ //| special | rs | rt | rd | 0 | opcode | ++ //| 0 0 0 0 0 0 | | | | 0 0 0 0 0 | | ++ // 6 5 5 5 5 6 ++ static int insn_RRRO(int rs, int rt, int rd, int op) { return (rs<<21) | (rt<<16) | (rd<<11) | op; } ++ static int insn_RRSO(int rt, int rd, int sa, int op) { return (rt<<16) | (rd<<11) | (sa<<6) | op; } ++ static int insn_RRCO(int rs, int rt, int code, int op) { return (rs<<21) | (rt<<16) | (code<<6) | op; } ++ ++ static int insn_COP0(int op, int rt, int rd) { return (cop0_op<<26) | (op<<21) | (rt<<16) | (rd<<11); } ++ static int insn_COP1(int op, int rt, int fs) { return (cop1_op<<26) | (op<<21) | (rt<<16) | (fs<<11); } ++ ++ static int insn_F3RO(int fmt, int ft, int fs, int fd, int func) { ++ return (cop1_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func; ++ } ++ static int insn_F3ROX(int fmt, int ft, int fs, int fd, int func) { ++ return (cop1x_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func; ++ } ++ ++ static int high (int x, int l) { return bitfield(x, 32-l, l); } ++ static int high16(int x) { return high(x, 16); } ++ static int high6 (int x) { return high(x, 6); } ++ ++ //get the offset field of jump/branch instruction ++ int offset(address entry) { ++ assert(is_simm16((entry - pc() - 4) / 4), "change this code"); ++ if (!is_simm16((entry - pc() - 4) / 4)) { ++ tty->print_cr("!!! is_simm16: %lx", (entry - pc() - 4) / 4); ++ } ++ return (entry - pc() - 4) / 4; ++ } ++ ++ ++public: ++ using AbstractAssembler::offset; ++ ++ //sign expand with the sign bit is h ++ static int expand(int x, int h) { return -(x & (1<> 16; ++ } ++ ++ static int split_high(int x) { ++ return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff; ++ } ++ ++ static int merge(int low, int high) { ++ return expand(low, 15) + (high<<16); ++ } ++ ++ static intptr_t merge(intptr_t x0, intptr_t x16, intptr_t x32, intptr_t x48) { ++ return (x48 << 48) | (x32 << 32) | (x16 << 16) | x0; ++ } ++ ++ // Test if x is within signed immediate range for nbits. ++ static bool is_simm (int x, int nbits) { ++ assert(0 < nbits && nbits < 32, "out of bounds"); ++ const int min = -( ((int)1) << nbits-1 ); ++ const int maxplus1 = ( ((int)1) << nbits-1 ); ++ return min <= x && x < maxplus1; ++ } ++ ++ static bool is_simm(jlong x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 64, "out of bounds"); ++ const jlong min = -( ((jlong)1) << nbits-1 ); ++ const jlong maxplus1 = ( ((jlong)1) << nbits-1 ); ++ return min <= x && x < maxplus1; ++ } ++ ++ // Test if x is within unsigned immediate range for nbits ++ static bool is_uimm(int x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 32, "out of bounds"); ++ const int maxplus1 = ( ((int)1) << nbits ); ++ return 0 <= x && x < maxplus1; ++ } ++ ++ static bool is_uimm(jlong x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 64, "out of bounds"); ++ const jlong maxplus1 = ( ((jlong)1) << nbits ); ++ return 0 <= x && x < maxplus1; ++ } ++ ++ static bool is_simm16(int x) { return is_simm(x, 16); } ++ static bool is_simm16(long x) { return is_simm((jlong)x, (unsigned int)16); } ++ ++ static bool fit_in_jal(address target, address pc) { ++ intptr_t mask = 0xfffffffff0000000; ++ return ((intptr_t)(pc + 4) & mask) == ((intptr_t)target & mask); ++ } ++ ++ bool fit_int_branch(address entry) { ++ return is_simm16(offset(entry)); ++ } ++ ++protected: ++#ifdef ASSERT ++ #define CHECK_DELAY ++#endif ++#ifdef CHECK_DELAY ++ enum Delay_state { no_delay, at_delay_slot, filling_delay_slot } delay_state; ++#endif ++ ++public: ++ void assert_not_delayed() { ++#ifdef CHECK_DELAY ++ assert(delay_state == no_delay, "next instruction should not be a delay slot"); ++#endif ++ } ++ ++protected: ++ // Delay slot helpers ++ // cti is called when emitting control-transfer instruction, ++ // BEFORE doing the emitting. ++ // Only effective when assertion-checking is enabled. ++ ++ // called when emitting cti with a delay slot, AFTER emitting ++ void has_delay_slot() { ++#ifdef CHECK_DELAY ++ assert(delay_state == no_delay, "just checking"); ++ delay_state = at_delay_slot; ++#endif ++ } ++ ++public: ++ Assembler* delayed() { ++#ifdef CHECK_DELAY ++ guarantee( delay_state == at_delay_slot, "delayed instructition is not in delay slot"); ++ delay_state = filling_delay_slot; ++#endif ++ return this; ++ } ++ ++ void flush() { ++#ifdef CHECK_DELAY ++ guarantee( delay_state == no_delay, "ending code with a delay slot"); ++#endif ++ AbstractAssembler::flush(); ++ } ++ ++ void emit_long(int); // shadows AbstractAssembler::emit_long ++ void emit_data(int); ++ void emit_data(int, RelocationHolder const&); ++ void emit_data(int, relocInfo::relocType rtype); ++ void check_delay(); ++ ++ //---< calculate length of instruction >--- ++ // With MIPS being a RISC architecture, this always is BytesPerInstWord ++ // instruction must start at passed address ++ static unsigned int instr_len(unsigned char *instr) { return BytesPerInstWord; } ++ ++ //---< longest instructions >--- ++ static unsigned int instr_maxlen() { return BytesPerInstWord; } ++ ++ ++ // Generic instructions ++ // Does 32bit or 64bit as needed for the platform. In some sense these ++ // belong in macro assembler but there is no need for both varieties to exist ++ ++ void addu32(Register rd, Register rs, Register rt){ emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), addu_op)); } ++ void addiu32(Register rt, Register rs, int imm) { emit_long(insn_ORRI(addiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void addiu(Register rt, Register rs, int imm) { daddiu (rt, rs, imm);} ++ void addu(Register rd, Register rs, Register rt) { daddu (rd, rs, rt); } ++ ++ void andr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), and_op)); } ++ void andi(Register rt, Register rs, int imm) { emit_long(insn_ORRI(andi_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } ++ ++ void beq (Register rs, Register rt, int off) { emit_long(insn_ORRI(beq_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ void beql (Register rs, Register rt, int off) { emit_long(insn_ORRI(beql_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ void bgez (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgez_op, off)); has_delay_slot(); } ++ void bgezal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezal_op, off)); has_delay_slot(); } ++ void bgezall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezall_op, off)); has_delay_slot(); } ++ void bgezl (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezl_op, off)); has_delay_slot(); } ++ void bgtz (Register rs, int off) { emit_long(insn_ORRI(bgtz_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void bgtzl (Register rs, int off) { emit_long(insn_ORRI(bgtzl_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void blez (Register rs, int off) { emit_long(insn_ORRI(blez_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void blezl (Register rs, int off) { emit_long(insn_ORRI(blezl_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void bltz (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltz_op, off)); has_delay_slot(); } ++ void bltzal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzal_op, off)); has_delay_slot(); } ++ void bltzall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzall_op, off)); has_delay_slot(); } ++ void bltzl (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzl_op, off)); has_delay_slot(); } ++ void bne (Register rs, Register rt, int off) { emit_long(insn_ORRI(bne_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ void bnel (Register rs, Register rt, int off) { emit_long(insn_ORRI(bnel_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ // two versions of brk: ++ // the brk(code) version is according to MIPS64 Architecture For Programmers Volume II: The MIPS64 Instruction Set ++ // the brk(code1, code2) is according to disassembler of hsdis (binutils-2.27) ++ // both versions work ++ void brk (int code) { assert(is_uimm(code, 20), "code is 20 bits"); emit_long( (low(code, 20)<<6) | break_op ); } ++ void brk (int code1, int code2) { assert(is_uimm(code1, 10) && is_uimm(code2, 10), "code is 20 bits"); emit_long( (low(code1, 10)<<16) | (low(code2, 10)<<6) | break_op ); } ++ ++ void beq (Register rs, Register rt, address entry) { beq(rs, rt, offset(entry)); } ++ void beql (Register rs, Register rt, address entry) { beql(rs, rt, offset(entry));} ++ void bgez (Register rs, address entry) { bgez (rs, offset(entry)); } ++ void bgezal (Register rs, address entry) { bgezal (rs, offset(entry)); } ++ void bgezall(Register rs, address entry) { bgezall(rs, offset(entry)); } ++ void bgezl (Register rs, address entry) { bgezl (rs, offset(entry)); } ++ void bgtz (Register rs, address entry) { bgtz (rs, offset(entry)); } ++ void bgtzl (Register rs, address entry) { bgtzl (rs, offset(entry)); } ++ void blez (Register rs, address entry) { blez (rs, offset(entry)); } ++ void blezl (Register rs, address entry) { blezl (rs, offset(entry)); } ++ void bltz (Register rs, address entry) { bltz (rs, offset(entry)); } ++ void bltzal (Register rs, address entry) { bltzal (rs, offset(entry)); } ++ void bltzall(Register rs, address entry) { bltzall(rs, offset(entry)); } ++ void bltzl (Register rs, address entry) { bltzl (rs, offset(entry)); } ++ void bne (Register rs, Register rt, address entry) { bne(rs, rt, offset(entry)); } ++ void bnel (Register rs, Register rt, address entry) { bnel(rs, rt, offset(entry)); } ++ ++ void beq (Register rs, Register rt, Label& L) { beq(rs, rt, target(L)); } ++ void beql (Register rs, Register rt, Label& L) { beql(rs, rt, target(L)); } ++ void bgez (Register rs, Label& L){ bgez (rs, target(L)); } ++ void bgezal (Register rs, Label& L){ bgezal (rs, target(L)); } ++ void bgezall(Register rs, Label& L){ bgezall(rs, target(L)); } ++ void bgezl (Register rs, Label& L){ bgezl (rs, target(L)); } ++ void bgtz (Register rs, Label& L){ bgtz (rs, target(L)); } ++ void bgtzl (Register rs, Label& L){ bgtzl (rs, target(L)); } ++ void blez (Register rs, Label& L){ blez (rs, target(L)); } ++ void blezl (Register rs, Label& L){ blezl (rs, target(L)); } ++ void bltz (Register rs, Label& L){ bltz (rs, target(L)); } ++ void bltzal (Register rs, Label& L){ bltzal (rs, target(L)); } ++ void bltzall(Register rs, Label& L){ bltzall(rs, target(L)); } ++ void bltzl (Register rs, Label& L){ bltzl (rs, target(L)); } ++ void bne (Register rs, Register rt, Label& L){ bne(rs, rt, target(L)); } ++ void bnel (Register rs, Register rt, Label& L){ bnel(rs, rt, target(L)); } ++ ++ void daddiu(Register rt, Register rs, int imm) { emit_long(insn_ORRI(daddiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void daddu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), daddu_op)); } ++ void ddiv (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddiv_op)); } ++ void ddivu (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddivu_op)); } ++ ++ void movz (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), movz_op)); } ++ void movn (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), movn_op)); } ++ ++ void movt (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | (1 << 16) | ((int)rd->encoding() << 11) | movci_op); } ++ void movf (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | ((int)rd->encoding() << 11) | movci_op); } ++ ++ enum bshfl_ops { ++ seb_op = 0x10, ++ seh_op = 0x18 ++ }; ++ void seb (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seb_op << 6) | bshfl_op); } ++ void seh (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seh_op << 6) | bshfl_op); } ++ ++ void ext (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); ++ guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]"); ++ ++ int lsb = pos; ++ int msbd = size - 1; ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | ext_op); ++ } ++ ++ void dext (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); ++ guarantee((0 < pos + size) && (pos + size <= 63), "pos + size must be in (0, 63]"); ++ ++ int lsb = pos; ++ int msbd = size - 1; ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dext_op); ++ } ++ ++ void dextm (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((32 < size) && (size <= 64), "size must be in (32, 64]"); ++ guarantee((32 < pos + size) && (pos + size <= 64), "pos + size must be in (32, 64]"); ++ ++ int lsb = pos; ++ int msbd = size - 1 - 32; ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dextm_op); ++ } ++ ++ void rotr (Register rd, Register rt, int sa) { ++ emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | srl_op); ++ } ++ ++ void drotr (Register rd, Register rt, int sa) { ++ emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl_op); ++ } ++ ++ void drotr32 (Register rd, Register rt, int sa) { ++ emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl32_op); ++ } ++ ++ void rotrv (Register rd, Register rt, Register rs) { ++ emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | srlv_op); ++ } ++ ++ void drotrv (Register rd, Register rt, Register rs) { ++ emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | dsrlv_op); ++ } ++ ++ void div (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, div_op)); } ++ void divu (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, divu_op)); } ++ void dmult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmult_op)); } ++ void dmultu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmultu_op)); } ++ void dsll (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll_op)); } ++ void dsllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsllv_op)); } ++ void dsll32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll32_op)); } ++ void dsra (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra_op)); } ++ void dsrav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrav_op)); } ++ void dsra32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra32_op)); } ++ void dsrl (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl_op)); } ++ void dsrlv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrlv_op)); } ++ void dsrl32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl32_op)); } ++ void dsubu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsubu_op)); } ++ ++ void b(int off) { beq(R0, R0, off); } ++ void b(address entry) { b(offset(entry)); } ++ void b(Label& L) { b(target(L)); } ++ ++ void j(address entry); ++ void jal(address entry); ++ ++ void jalr(Register rd, Register rs) { emit_long( ((int)rs->encoding()<<21) | ((int)rd->encoding()<<11) | jalr_op); has_delay_slot(); } ++ void jalr(Register rs) { jalr(RA, rs); } ++ void jalr() { jalr(RT9); } ++ ++ void jr(Register rs) { emit_long(((int)rs->encoding()<<21) | jr_op); has_delay_slot(); } ++ void jr_hb(Register rs) { emit_long(((int)rs->encoding()<<21) | (1 << 10) | jr_op); has_delay_slot(); } ++ ++ void lb (Register rt, Register base, int off) { emit_long(insn_ORRI(lb_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lbu(Register rt, Register base, int off) { emit_long(insn_ORRI(lbu_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ld (Register rt, Register base, int off) { emit_long(insn_ORRI(ld_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ldl(Register rt, Register base, int off) { emit_long(insn_ORRI(ldl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ldr(Register rt, Register base, int off) { emit_long(insn_ORRI(ldr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lh (Register rt, Register base, int off) { emit_long(insn_ORRI(lh_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lhu(Register rt, Register base, int off) { emit_long(insn_ORRI(lhu_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ll (Register rt, Register base, int off) { emit_long(insn_ORRI(ll_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lld(Register rt, Register base, int off) { emit_long(insn_ORRI(lld_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lui(Register rt, int imm) { emit_long(insn_ORRI(lui_op, 0, (int)rt->encoding(), simm16(imm))); } ++ void lw (Register rt, Register base, int off) { emit_long(insn_ORRI(lw_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lwl(Register rt, Register base, int off) { emit_long(insn_ORRI(lwl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lwr(Register rt, Register base, int off) { emit_long(insn_ORRI(lwr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lwu(Register rt, Register base, int off) { emit_long(insn_ORRI(lwu_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ ++ void lb (Register rt, Address src); ++ void lbu(Register rt, Address src); ++ void ld (Register rt, Address src); ++ void ldl(Register rt, Address src); ++ void ldr(Register rt, Address src); ++ void lh (Register rt, Address src); ++ void lhu(Register rt, Address src); ++ void ll (Register rt, Address src); ++ void lld(Register rt, Address src); ++ void lw (Register rt, Address src); ++ void lwl(Register rt, Address src); ++ void lwr(Register rt, Address src); ++ void lwu(Register rt, Address src); ++ void lea(Register rt, Address src); ++ void pref(int hint, Register base, int off) { emit_long(insn_ORRI(pref_op, (int)base->encoding(), low(hint, 5), low(off, 16))); } ++ ++ void mfhi (Register rd) { emit_long( ((int)rd->encoding()<<11) | mfhi_op ); } ++ void mflo (Register rd) { emit_long( ((int)rd->encoding()<<11) | mflo_op ); } ++ void mthi (Register rs) { emit_long( ((int)rs->encoding()<<21) | mthi_op ); } ++ void mtlo (Register rs) { emit_long( ((int)rs->encoding()<<21) | mtlo_op ); } ++ ++ void mult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, mult_op)); } ++ void multu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, multu_op)); } ++ ++ void nor(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), nor_op)); } ++ ++ void orr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), or_op)); } ++ void ori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(ori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } ++ ++ void sb (Register rt, Register base, int off) { emit_long(insn_ORRI(sb_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sc (Register rt, Register base, int off) { emit_long(insn_ORRI(sc_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void scd (Register rt, Register base, int off) { emit_long(insn_ORRI(scd_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sd (Register rt, Register base, int off) { emit_long(insn_ORRI(sd_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sdl (Register rt, Register base, int off) { emit_long(insn_ORRI(sdl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sdr (Register rt, Register base, int off) { emit_long(insn_ORRI(sdr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sh (Register rt, Register base, int off) { emit_long(insn_ORRI(sh_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sll (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), sll_op)); } ++ void sllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), sllv_op)); } ++ void slt (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), slt_op)); } ++ void slti (Register rt, Register rs, int imm) { emit_long(insn_ORRI(slti_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void sltiu(Register rt, Register rs, int imm) { emit_long(insn_ORRI(sltiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void sltu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), sltu_op)); } ++ void sra (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), sra_op)); } ++ void srav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), srav_op)); } ++ void srl (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), srl_op)); } ++ void srlv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), srlv_op)); } ++ ++ void subu (Register rd, Register rs, Register rt) { dsubu (rd, rs, rt); } ++ void subu32 (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), subu_op)); } ++ void sw (Register rt, Register base, int off) { emit_long(insn_ORRI(sw_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void swl (Register rt, Register base, int off) { emit_long(insn_ORRI(swl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void swr (Register rt, Register base, int off) { emit_long(insn_ORRI(swr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void synci(Register base, int off) { emit_long(insn_ORRI(regimm_op, (int)base->encoding(), synci_op, off)); } ++ void sync () { ++ if (os::is_ActiveCoresMP()) ++ emit_long(0); ++ else ++ emit_long(sync_op); ++ } ++ void syscall(int code) { emit_long( (code<<6) | syscall_op ); } ++ ++ void sb(Register rt, Address dst); ++ void sc(Register rt, Address dst); ++ void scd(Register rt, Address dst); ++ void sd(Register rt, Address dst); ++ void sdl(Register rt, Address dst); ++ void sdr(Register rt, Address dst); ++ void sh(Register rt, Address dst); ++ void sw(Register rt, Address dst); ++ void swl(Register rt, Address dst); ++ void swr(Register rt, Address dst); ++ ++ void teq (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, teq_op)); } ++ void teqi (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), teqi_op, imm)); } ++ void tge (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tge_op)); } ++ void tgei (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgei_op, imm)); } ++ void tgeiu(Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgeiu_op, imm)); } ++ void tgeu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tgeu_op)); } ++ void tlt (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tlt_op)); } ++ void tlti (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tlti_op, imm)); } ++ void tltiu(Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tltiu_op, imm)); } ++ void tltu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tltu_op)); } ++ void tne (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tne_op)); } ++ void tnei (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tnei_op, imm)); } ++ ++ void xorr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), xor_op)); } ++ void xori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(xori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } ++ ++ void nop() { emit_long(0); } ++ ++ ++ ++ void ldc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(ldc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void lwc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(lwc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void ldc1(FloatRegister ft, Address src); ++ void lwc1(FloatRegister ft, Address src); ++ ++ //COP0 ++ void mfc0 (Register rt, Register rd) { emit_long(insn_COP0( mfc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ void dmfc0 (Register rt, FloatRegister rd) { emit_long(insn_COP0(dmfc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ // MFGC0, DMFGC0, MTGC0, DMTGC0 not implemented yet ++ void mtc0 (Register rt, Register rd) { emit_long(insn_COP0( mtc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ void dmtc0 (Register rt, FloatRegister rd) { emit_long(insn_COP0(dmtc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ //COP0 end ++ ++ ++ //COP1 ++ void mfc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1 (mfc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void dmfc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmfc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void cfc1 (Register rt, int fs) { emit_long(insn_COP1( cfc1_op, (int)rt->encoding(), fs)); } ++ void mfhc1(Register rt, int fs) { emit_long(insn_COP1(mfhc1_op, (int)rt->encoding(), fs)); } ++ void mtc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( mtc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void dmtc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmtc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void ctc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( ctc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void ctc1 (Register rt, int fs) { emit_long(insn_COP1(ctc1_op, (int)rt->encoding(), fs)); } ++ void mthc1(Register rt, int fs) { emit_long(insn_COP1(mthc1_op, (int)rt->encoding(), fs)); } ++ ++ void bc1f (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcf_op, off)); has_delay_slot(); } ++ void bc1fl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcfl_op, off)); has_delay_slot(); } ++ void bc1t (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bct_op, off)); has_delay_slot(); } ++ void bc1tl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bctl_op, off)); has_delay_slot(); } ++ ++ void bc1f (address entry) { bc1f(offset(entry)); } ++ void bc1fl(address entry) { bc1fl(offset(entry)); } ++ void bc1t (address entry) { bc1t(offset(entry)); } ++ void bc1tl(address entry) { bc1tl(offset(entry)); } ++ ++ void bc1f (Label& L) { bc1f(target(L)); } ++ void bc1fl(Label& L) { bc1fl(target(L)); } ++ void bc1t (Label& L) { bc1t(target(L)); } ++ void bc1tl(Label& L) { bc1tl(target(L)); } ++ ++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags. ++#define INSN_SINGLE(r1, r2, r3, op) \ ++ { emit_long(insn_F3RO(single_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ void add_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fadd_op)} ++ void sub_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fsub_op)} ++ void mul_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fmul_op)} ++ void div_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fdiv_op)} ++ void sqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fsqrt_op)} ++ void abs_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fabs_op)} ++ void mov_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fmov_op)} ++ void neg_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fneg_op)} ++ void round_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundl_op)} ++ void trunc_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncl_op)} ++ void ceil_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceill_op)} ++ void floor_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorl_op)} ++ void round_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundw_op)} ++ void trunc_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncw_op)} ++ void ceil_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceilw_op)} ++ void floor_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorw_op)} ++ //null ++ void movf_s(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movt_s(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movz_s (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movz_f_op)} ++ void movn_s (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movn_f_op)} ++ //null ++ void recip_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frecip_op)} ++ void rsqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frsqrt_op)} ++ //null ++ void cvt_d_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtd_op)} ++ //null ++ void cvt_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtw_op)} ++ void cvt_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtl_op)} ++ void cvt_ps_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fcvtps_op)} ++ //null ++ void c_f_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, f_cond)} ++ void c_un_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, un_cond)} ++ void c_eq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, eq_cond)} ++ void c_ueq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ueq_cond)} ++ void c_olt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, olt_cond)} ++ void c_ult_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ult_cond)} ++ void c_ole_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ole_cond)} ++ void c_ule_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ule_cond)} ++ void c_sf_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, sf_cond)} ++ void c_ngle_s(FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngle_cond)} ++ void c_seq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, seq_cond)} ++ void c_ngl_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngl_cond)} ++ void c_lt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, lt_cond)} ++ void c_nge_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, nge_cond)} ++ void c_le_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, le_cond)} ++ void c_ngt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngt_cond)} ++ ++#undef INSN_SINGLE ++ ++ ++//R0->encoding() is 0; INSN_DOUBLE is enclosed by {} for ctags. ++#define INSN_DOUBLE(r1, r2, r3, op) \ ++ { emit_long(insn_F3RO(double_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ ++ void add_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fadd_op)} ++ void sub_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fsub_op)} ++ void mul_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fmul_op)} ++ void div_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fdiv_op)} ++ void sqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fsqrt_op)} ++ void abs_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fabs_op)} ++ void mov_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fmov_op)} ++ void neg_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fneg_op)} ++ void round_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundl_op)} ++ void trunc_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncl_op)} ++ void ceil_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceill_op)} ++ void floor_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorl_op)} ++ void round_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundw_op)} ++ void trunc_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncw_op)} ++ void ceil_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceilw_op)} ++ void floor_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorw_op)} ++ //null ++ void movf_d(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movt_d(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movz_d (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movz_f_op)} ++ void movn_d (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movn_f_op)} ++ //null ++ void recip_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frecip_op)} ++ void rsqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frsqrt_op)} ++ //null ++ void cvt_s_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvts_op)} ++ void cvt_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtl_op)} ++ //null ++ void cvt_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtw_op)} ++ //null ++ void c_f_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, f_cond)} ++ void c_un_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, un_cond)} ++ void c_eq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, eq_cond)} ++ void c_ueq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ueq_cond)} ++ void c_olt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, olt_cond)} ++ void c_ult_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ult_cond)} ++ void c_ole_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ole_cond)} ++ void c_ule_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ule_cond)} ++ void c_sf_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, sf_cond)} ++ void c_ngle_d(FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngle_cond)} ++ void c_seq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, seq_cond)} ++ void c_ngl_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngl_cond)} ++ void c_lt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, lt_cond)} ++ void c_nge_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, nge_cond)} ++ void c_le_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, le_cond)} ++ void c_ngt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngt_cond)} ++ ++#undef INSN_DOUBLE ++ ++ ++ //null ++ void cvt_s_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); } ++ void cvt_d_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); } ++ //null ++ void cvt_s_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); } ++ void cvt_d_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); } ++ //null ++ ++ ++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags. ++#define INSN_PS(r1, r2, r3, op) \ ++ { emit_long(insn_F3RO(ps_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ ++ void add_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fadd_op)} ++ void sub_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fsub_op)} ++ void mul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fmul_op)} ++ //null ++ void abs_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fabs_op)} ++ void mov_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fmov_op)} ++ void neg_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fneg_op)} ++ //null ++ //void movf_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movf_ps")} ++ //void movt_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movt_ps") } ++ void movz_ps (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movz_f_op)} ++ void movn_ps (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movn_f_op)} ++ //null ++ void cvt_s_pu (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvts_op)} ++ //null ++ void cvt_s_pl (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvtspl_op)} ++ //null ++ void pll_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpll_op)} ++ void plu_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fplu_op)} ++ void pul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpul_op)} ++ void puu_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpuu_op)} ++ void c_f_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, f_cond)} ++ void c_un_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, un_cond)} ++ void c_eq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, eq_cond)} ++ void c_ueq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ueq_cond)} ++ void c_olt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, olt_cond)} ++ void c_ult_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ult_cond)} ++ void c_ole_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ole_cond)} ++ void c_ule_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ule_cond)} ++ void c_sf_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, sf_cond)} ++ void c_ngle_ps(FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngle_cond)} ++ void c_seq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, seq_cond)} ++ void c_ngl_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngl_cond)} ++ void c_lt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, lt_cond)} ++ void c_nge_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, nge_cond)} ++ void c_le_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, le_cond)} ++ void c_ngt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngt_cond)} ++ //null ++#undef INSN_PS ++ //COP1 end ++ ++ ++ //COP1X ++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags. ++#define INSN_COP1X(r0, r1, r2, r3, op) \ ++ { emit_long(insn_F3ROX((int)r0->encoding(), (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ void madd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_s_op) } ++ void madd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_d_op) } ++ void madd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, madd_ps_op) } ++ void msub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_s_op) } ++ void msub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_d_op) } ++ void msub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, msub_ps_op) } ++ void nmadd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_s_op) } ++ void nmadd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_d_op) } ++ void nmadd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmadd_ps_op) } ++ void nmsub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_s_op) } ++ void nmsub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_d_op) } ++ void nmsub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmsub_ps_op) } ++#undef INSN_COP1X ++ //COP1X end ++ ++ //SPECIAL2 ++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags. ++#define INSN_S2(op) \ ++ { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | op);} ++ ++ void madd (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | madd_op); } ++ void maddu (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | maddu_op); } ++ void mul (Register rd, Register rs, Register rt) { INSN_S2(mul_op) } ++ void gsandn (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x03_op) } ++ void msub (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msub_op); } ++ void msubu (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msubu_op); } ++ void gsorn (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x06_op) } ++ ++ void gsmult (Register rd, Register rs, Register rt) { INSN_S2(gsmult_op) } ++ void gsdmult (Register rd, Register rs, Register rt) { INSN_S2(gsdmult_op) } ++ void gsmultu (Register rd, Register rs, Register rt) { INSN_S2(gsmultu_op) } ++ void gsdmultu(Register rd, Register rs, Register rt) { INSN_S2(gsdmultu_op)} ++ void gsdiv (Register rd, Register rs, Register rt) { INSN_S2(gsdiv_op) } ++ void gsddiv (Register rd, Register rs, Register rt) { INSN_S2(gsddiv_op) } ++ void gsdivu (Register rd, Register rs, Register rt) { INSN_S2(gsdivu_op) } ++ void gsddivu (Register rd, Register rs, Register rt) { INSN_S2(gsddivu_op) } ++ void gsmod (Register rd, Register rs, Register rt) { INSN_S2(gsmod_op) } ++ void gsdmod (Register rd, Register rs, Register rt) { INSN_S2(gsdmod_op) } ++ void gsmodu (Register rd, Register rs, Register rt) { INSN_S2(gsmodu_op) } ++ void gsdmodu (Register rd, Register rs, Register rt) { INSN_S2(gsdmodu_op) } ++ void clz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clz_op); } ++ void clo (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clo_op); } ++ void ctz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 0 << 6| xctx_op); } ++ void cto (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 1 << 6| xctx_op); } ++ void dctz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 2 << 6| xctx_op); } ++ void dcto(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 3 << 6| xctx_op); } ++ void dclz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclz_op); } ++ void dclo(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclo_op); } ++ ++#undef INSN_S2 ++ ++ //SPECIAL3 ++/* ++// FIXME ++#define is_0_to_32(a, b) \ ++ assert (a >= 0, " just a check"); \ ++ assert (a <= 0, " just a check"); \ ++ assert (b >= 0, " just a check"); \ ++ assert (b <= 0, " just a check"); \ ++ assert (a+b >= 0, " just a check"); \ ++ assert (a+b <= 0, " just a check"); ++ */ ++#define is_0_to_32(a, b) ++ ++ void ins (Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | ins_op); } ++ void dinsm(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos, 5) << 6) | dinsm_op); } ++ void dinsu(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos-32, 5) << 6) | dinsu_op); } ++ void dins (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); ++ guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]"); ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | dins_op); ++ } ++ ++ void repl_qb (Register rd, int const8) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16) | ((int)rd->encoding() << 11) | repl_qb_op << 6 | re1_op); } ++ void replv_qb(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qb_op << 6 | re1_op ); } ++ void repl_ph (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_ph_op << 6 | re1_op); } ++ void replv_ph(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ph_op << 6 | re1_op ); } ++ ++ void repl_ob (Register rd, int const8) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16) | ((int)rd->encoding() << 11) | repl_ob_op << 6 | re2_op); } ++ void replv_ob(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ob_op << 6 | re2_op ); } ++ void repl_qh (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_qh_op << 6 | re2_op); } ++ void replv_qh(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qh_op << 6 | re2_op ); } ++ void repl_pw (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_pw_op << 6 | re2_op); } ++ void replv_pw(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_pw_op << 6 | re2_op ); } ++ ++ void sdc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(sdc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void sdc1(FloatRegister ft, Address dst); ++ void swc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(swc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void swc1(FloatRegister ft, Address dst); ++ ++ ++ static void print_instruction(int); ++ int patched_branch(int dest_pos, int inst, int inst_pos); ++ int branch_destination(int inst, int pos); ++ ++ // Loongson extension ++ ++ // gssq/gslq/gssqc1/gslqc1: vAddr = sign_extend(offset << 4 ) + GPR[base]. Therefore, the off should be ">> 4". ++ void gslble(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslble_op); ++ } ++ ++ void gslbgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslbgt_op); ++ } ++ ++ void gslhle(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhle_op); ++ } ++ ++ void gslhgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhgt_op); ++ } ++ ++ void gslwle(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwle_op); ++ } ++ ++ void gslwgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgt_op); ++ } ++ ++ void gsldle(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldle_op); ++ } ++ ++ void gsldgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgt_op); ++ } ++ ++ void gslwlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwlec1_op); ++ } ++ ++ void gslwgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgtc1_op); ++ } ++ ++ void gsldlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldlec1_op); ++ } ++ ++ void gsldgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgtc1_op); ++ } ++ ++ void gslq(Register rq, Register rt, Register base, int off) { ++ assert(!(off & 0xF), "gslq: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gslq: off exceeds 9 bits"); ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() ); ++ } ++ ++ void gslqc1(FloatRegister rq, FloatRegister rt, Register base, int off) { ++ assert(!(off & 0xF), "gslqc1: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gslqc1: off exceeds 9 bits"); ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() ); ++ } ++ ++ void gssble(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssble_op); ++ } ++ ++ void gssbgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssbgt_op); ++ } ++ ++ void gsshle(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshle_op); ++ } ++ ++ void gsshgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshgt_op); ++ } ++ ++ void gsswle(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswle_op); ++ } ++ ++ void gsswgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgt_op); ++ } ++ ++ void gssdle(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdle_op); ++ } ++ ++ void gssdgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgt_op); ++ } ++ ++ void gsswlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswlec1_op); ++ } ++ ++ void gsswgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgtc1_op); ++ } ++ ++ void gssdlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdlec1_op); ++ } ++ ++ void gssdgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgtc1_op); ++ } ++ ++ void gssq(Register rq, Register rt, Register base, int off) { ++ assert(!(off & 0xF), "gssq: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gssq: off exceeds 9 bits"); ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() ); ++ } ++ ++ void gssqc1(FloatRegister rq, FloatRegister rt, Register base, int off) { ++ assert(!(off & 0xF), "gssqc1: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gssqc1: off exceeds 9 bits"); ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() ); ++ } ++ ++ //LDC2 & SDC2 ++#define INSN(OPS, OP) \ ++ assert(is_simm(off, 8), "NAME: off exceeds 8 bits"); \ ++ assert(UseLEXT1, "check UseLEXT1"); \ ++ emit_long( (OPS << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | \ ++ ((int)index->encoding() << 11) | (low(off, 8) << 3) | OP); ++ ++#define INSN_LDC2(NAME, op) \ ++ void NAME(Register rt, Register base, Register index, int off) { \ ++ INSN(gs_ldc2_op, op) \ ++ } ++ ++#define INSN_LDC2_F(NAME, op) \ ++ void NAME(FloatRegister rt, Register base, Register index, int off) { \ ++ INSN(gs_ldc2_op, op) \ ++ } ++ ++#define INSN_SDC2(NAME, op) \ ++ void NAME(Register rt, Register base, Register index, int off) { \ ++ INSN(gs_sdc2_op, op) \ ++ } ++ ++#define INSN_SDC2_F(NAME, op) \ ++ void NAME(FloatRegister rt, Register base, Register index, int off) { \ ++ INSN(gs_sdc2_op, op) \ ++ } ++ ++/* ++ void gslbx(Register rt, Register base, Register index, int off) { ++ assert(is_simm(off, 8), "gslbx: off exceeds 8 bits"); ++ assert(UseLEXT1, "check UseLEXT1"); ++ emit_long( (gs_ldc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ++ ((int)index->encoding() << 11) | (low(off, 8) << 3) | gslbx_op); ++ void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op);} ++ ++ INSN_LDC2(gslbx, gslbx_op) ++ INSN_LDC2(gslhx, gslhx_op) ++ INSN_LDC2(gslwx, gslwx_op) ++ INSN_LDC2(gsldx, gsldx_op) ++ INSN_LDC2_F(gslwxc1, gslwxc1_op) ++ INSN_LDC2_F(gsldxc1, gsldxc1_op) ++ ++ INSN_SDC2(gssbx, gssbx_op) ++ INSN_SDC2(gsshx, gsshx_op) ++ INSN_SDC2(gsswx, gsswx_op) ++ INSN_SDC2(gssdx, gssdx_op) ++ INSN_SDC2_F(gsswxc1, gsswxc1_op) ++ INSN_SDC2_F(gssdxc1, gssdxc1_op) ++*/ ++ void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op) } ++ void gslhx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslhx_op) } ++ void gslwx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwx_op) } ++ void gsldx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldx_op) } ++ void gslwxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwxc1_op) } ++ void gsldxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldxc1_op) } ++ ++ void gssbx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssbx_op) } ++ void gsshx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsshx_op) } ++ void gsswx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswx_op) } ++ void gssdx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdx_op) } ++ void gsswxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswxc1_op) } ++ void gssdxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdxc1_op) } ++ ++#undef INSN ++#undef INSN_LDC2 ++#undef INSN_LDC2_F ++#undef INSN_SDC2 ++#undef INSN_SDC2_F ++ ++ // cpucfg on Loongson CPUs above 3A4000 ++ void cpucfg(Register rd, Register rs) { emit_long((gs_lwc2_op << 26) | ((int)rs->encoding() << 21) | (0b01000 << 16) | ((int)rd->encoding() << 11) | ( 0b00100 << 6) | 0b011000);} ++ ++ enum Membar_mask_bits { ++ StoreStore = 1 << 3, ++ LoadStore = 1 << 2, ++ StoreLoad = 1 << 1, ++ LoadLoad = 1 << 0 ++ }; ++ ++ // Serializes memory and blows flags ++ void membar(Membar_mask_bits order_constraint) { ++ sync(); ++ } ++ ++public: ++ // Creation ++ Assembler(CodeBuffer* code) : AbstractAssembler(code) { ++#ifdef CHECK_DELAY ++ delay_state = no_delay; ++#endif ++ } ++ ++ // Decoding ++ static address locate_operand(address inst, WhichOperand which); ++ static address locate_next_instruction(address inst); ++}; ++ ++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/assembler_mips.inline.hpp b/src/hotspot/cpu/mips/assembler_mips.inline.hpp +new file mode 100644 +index 00000000000..39aeb5509a7 +--- /dev/null ++++ b/src/hotspot/cpu/mips/assembler_mips.inline.hpp +@@ -0,0 +1,33 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "asm/codeBuffer.hpp" ++#include "code/codeCache.hpp" ++ ++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP +diff --git a/src/hotspot/cpu/mips/bytes_mips.hpp b/src/hotspot/cpu/mips/bytes_mips.hpp +new file mode 100644 +index 00000000000..4172db219b1 +--- /dev/null ++++ b/src/hotspot/cpu/mips/bytes_mips.hpp +@@ -0,0 +1,181 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_BYTES_MIPS_HPP ++#define CPU_MIPS_VM_BYTES_MIPS_HPP ++ ++#include "memory/allocation.hpp" ++ ++class Bytes: AllStatic { ++ public: ++ // Returns true if the byte ordering used by Java is different from the native byte ordering ++ // of the underlying machine. For example, this is true for Intel x86, but false for Solaris ++ // on Sparc. ++ // we use mipsel, so return true ++ static inline bool is_Java_byte_ordering_different(){ return true; } ++ ++ ++ // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering ++ // (no special code is needed since x86 CPUs can access unaligned data) ++ static inline u2 get_native_u2(address p) { ++ if ((intptr_t)p & 0x1) { ++ return ((u2)p[1] << 8) | (u2)p[0]; ++ } else { ++ return *(u2*)p; ++ } ++ } ++ ++ static inline u4 get_native_u4(address p) { ++ if ((intptr_t)p & 3) { ++ u4 res; ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips64\n" ++ " .set noreorder\n" ++ ++ " lwr %[res], 0(%[addr]) \n" ++ " lwl %[res], 3(%[addr]) \n" ++ ++ " .set pop" ++ : [res] "=&r" (res) ++ : [addr] "r" (p) ++ : "memory" ++ ); ++ return res; ++ } else { ++ return *(u4*)p; ++ } ++ } ++ ++ static inline u8 get_native_u8(address p) { ++ u8 res; ++ u8 temp = 0; ++ // u4 tp;//tmp register ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips64\n" ++ " .set noreorder\n" ++ " .set noat\n" ++ " andi $1,%[addr],0x7 \n" ++ " beqz $1,1f \n" ++ " nop \n" ++ " ldr %[temp], 0(%[addr]) \n" ++ " ldl %[temp], 7(%[addr]) \n" ++ " b 2f \n" ++ " nop \n" ++ " 1:\t ld %[temp],0(%[addr]) \n" ++ " 2:\t sd %[temp], %[res] \n" ++ ++ " .set at\n" ++ " .set pop\n" ++ : [addr]"=r"(p), [temp]"=r" (temp) ++ : "[addr]"(p), "[temp]" (temp), [res]"m" (*(volatile jint*)&res) ++ : "memory" ++ ); ++ ++ return res; ++ } ++ ++ //use mips unaligned load instructions ++ static inline void put_native_u2(address p, u2 x) { ++ if((intptr_t)p & 0x1) { ++ p[0] = (u_char)(x); ++ p[1] = (u_char)(x>>8); ++ } else { ++ *(u2*)p = x; ++ } ++ } ++ ++ static inline void put_native_u4(address p, u4 x) { ++ // refer to sparc implementation. ++ // Note that sparc is big-endian, while mips is little-endian ++ switch ( intptr_t(p) & 3 ) { ++ case 0: *(u4*)p = x; ++ break; ++ ++ case 2: ((u2*)p)[1] = x >> 16; ++ ((u2*)p)[0] = x; ++ break; ++ ++ default: ((u1*)p)[3] = x >> 24; ++ ((u1*)p)[2] = x >> 16; ++ ((u1*)p)[1] = x >> 8; ++ ((u1*)p)[0] = x; ++ break; ++ } ++ } ++ ++ static inline void put_native_u8(address p, u8 x) { ++ // refer to sparc implementation. ++ // Note that sparc is big-endian, while mips is little-endian ++ switch ( intptr_t(p) & 7 ) { ++ case 0: *(u8*)p = x; ++ break; ++ ++ case 4: ((u4*)p)[1] = x >> 32; ++ ((u4*)p)[0] = x; ++ break; ++ ++ case 2: ((u2*)p)[3] = x >> 48; ++ ((u2*)p)[2] = x >> 32; ++ ((u2*)p)[1] = x >> 16; ++ ((u2*)p)[0] = x; ++ break; ++ ++ default: ((u1*)p)[7] = x >> 56; ++ ((u1*)p)[6] = x >> 48; ++ ((u1*)p)[5] = x >> 40; ++ ((u1*)p)[4] = x >> 32; ++ ((u1*)p)[3] = x >> 24; ++ ((u1*)p)[2] = x >> 16; ++ ((u1*)p)[1] = x >> 8; ++ ((u1*)p)[0] = x; ++ } ++ } ++ ++ ++ // Efficient reading and writing of unaligned unsigned data in Java ++ // byte ordering (i.e. big-endian ordering). Byte-order reversal is ++ // needed since MIPS64EL CPUs use little-endian format. ++ static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } ++ static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } ++ static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } ++ ++ static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); } ++ static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); } ++ static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } ++ ++ ++ // Efficient swapping of byte ordering ++ static inline u2 swap_u2(u2 x); // compiler-dependent implementation ++ static inline u4 swap_u4(u4 x); // compiler-dependent implementation ++ static inline u8 swap_u8(u8 x); ++}; ++ ++ ++// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base] ++#include OS_CPU_HEADER_INLINE(bytes) ++ ++#endif // CPU_MIPS_VM_BYTES_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/c2_MacroAssembler_mips.cpp b/src/hotspot/cpu/mips/c2_MacroAssembler_mips.cpp +new file mode 100644 +index 00000000000..3d98ec11f6b +--- /dev/null ++++ b/src/hotspot/cpu/mips/c2_MacroAssembler_mips.cpp +@@ -0,0 +1,614 @@ ++/* ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "opto/c2_MacroAssembler.hpp" ++#include "opto/intrinsicnode.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "vmreg_mips.inline.hpp" ++ ++// Fast_Lock and Fast_Unlock used by C2 ++ ++// Because the transitions from emitted code to the runtime ++// monitorenter/exit helper stubs are so slow it's critical that ++// we inline both the stack-locking fast-path and the inflated fast path. ++// ++// See also: cmpFastLock and cmpFastUnlock. ++// ++// What follows is a specialized inline transliteration of the code ++// in slow_enter() and slow_exit(). If we're concerned about I$ bloat ++// another option would be to emit TrySlowEnter and TrySlowExit methods ++// at startup-time. These methods would accept arguments as ++// (Obj, Self, box, Scratch) and return success-failure ++// indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply ++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. ++// In practice, however, the # of lock sites is bounded and is usually small. ++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer ++// if the processor uses simple bimodal branch predictors keyed by EIP ++// Since the helper routines would be called from multiple synchronization ++// sites. ++// ++// An even better approach would be write "MonitorEnter()" and "MonitorExit()" ++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites ++// to those specialized methods. That'd give us a mostly platform-independent ++// implementation that the JITs could optimize and inline at their pleasure. ++// Done correctly, the only time we'd need to cross to native could would be ++// to park() or unpark() threads. We'd also need a few more unsafe operators ++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and ++// (b) explicit barriers or fence operations. ++// ++// TODO: ++// ++// * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). ++// This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. ++// Given TLAB allocation, Self is usually manifested in a register, so passing it into ++// the lock operators would typically be faster than reifying Self. ++// ++// * Ideally I'd define the primitives as: ++// fast_lock (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED. ++// fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED ++// Unfortunately ADLC bugs prevent us from expressing the ideal form. ++// Instead, we're stuck with a rather awkward and brittle register assignments below. ++// Furthermore the register assignments are overconstrained, possibly resulting in ++// sub-optimal code near the synchronization site. ++// ++// * Eliminate the sp-proximity tests and just use "== Self" tests instead. ++// Alternately, use a better sp-proximity test. ++// ++// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. ++// Either one is sufficient to uniquely identify a thread. ++// TODO: eliminate use of sp in _owner and use get_thread(tr) instead. ++// ++// * Intrinsify notify() and notifyAll() for the common cases where the ++// object is locked by the calling thread but the waitlist is empty. ++// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). ++// ++// * use jccb and jmpb instead of jcc and jmp to improve code density. ++// But beware of excessive branch density on AMD Opterons. ++// ++// * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success ++// or failure of the fast-path. If the fast-path fails then we pass ++// control to the slow-path, typically in C. In Fast_Lock and ++// Fast_Unlock we often branch to DONE_LABEL, just to find that C2 ++// will emit a conditional branch immediately after the node. ++// So we have branches to branches and lots of ICC.ZF games. ++// Instead, it might be better to have C2 pass a "FailureLabel" ++// into Fast_Lock and Fast_Unlock. In the case of success, control ++// will drop through the node. ICC.ZF is undefined at exit. ++// In the case of failure, the node will branch directly to the ++// FailureLabel ++ ++// obj: object to lock ++// box: on-stack box address (displaced header location) ++// tmp: tmp -- KILLED ++// scr: tmp -- KILLED ++void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg, ++ Register tmpReg, Register scrReg) { ++ Label IsInflated, DONE, DONE_SET; ++ ++ // Ensure the register assignents are disjoint ++ guarantee(objReg != boxReg, ""); ++ guarantee(objReg != tmpReg, ""); ++ guarantee(objReg != scrReg, ""); ++ guarantee(boxReg != tmpReg, ""); ++ guarantee(boxReg != scrReg, ""); ++ ++ block_comment("FastLock"); ++ ++ if (PrintBiasedLockingStatistics) { ++ atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg); ++ } ++ ++ // Possible cases that we'll encounter in fast_lock ++ // ------------------------------------------------ ++ // * Inflated ++ // -- unlocked ++ // -- Locked ++ // = by self ++ // = by other ++ // * biased ++ // -- by Self ++ // -- by other ++ // * neutral ++ // * stack-locked ++ // -- by self ++ // = sp-proximity test hits ++ // = sp-proximity test generates false-negative ++ // -- by other ++ // ++ ++ if (DiagnoseSyncOnValueBasedClasses != 0) { ++ load_klass(tmpReg, objReg); ++ lw(tmpReg, Address(tmpReg, Klass::access_flags_offset())); ++ move(AT, JVM_ACC_IS_VALUE_BASED_CLASS); ++ andr(AT, tmpReg, AT); ++ sltiu(scrReg, AT, 1); ++ beq(scrReg, R0, DONE_SET); ++ delayed()->nop(); ++ } ++ ++ // TODO: optimize away redundant LDs of obj->mark and improve the markword triage ++ // order to reduce the number of conditional branches in the most common cases. ++ // Beware -- there's a subtle invariant that fetch of the markword ++ // at [FETCH], below, will never observe a biased encoding (*101b). ++ // If this invariant is not held we risk exclusion (safety) failure. ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ Label succ, fail; ++ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL); ++ b(fail); ++ delayed()->nop(); ++ bind(succ); ++ b(DONE); ++ delayed()->ori(resReg, R0, 1); ++ bind(fail); ++ } ++ ++ ld(tmpReg, Address(objReg, 0)); //Fetch the markword of the object. ++ andi(AT, tmpReg, markWord::monitor_value); ++ bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias ++ delayed()->nop(); ++ ++ // Attempt stack-locking ... ++ ori(tmpReg, tmpReg, markWord::unlocked_value); ++ sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS ++ ++ if (PrintBiasedLockingStatistics) { ++ Label SUCC, FAIL; ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg ++ bind(SUCC); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); ++ b(DONE); ++ delayed()->ori(resReg, R0, 1); ++ bind(FAIL); ++ } else { ++ // If cmpxchg is succ, then scrReg = 1 ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg ++ } ++ ++ // Recursive locking ++ // The object is stack-locked: markword contains stack pointer to BasicLock. ++ // Locked by current thread if difference with current SP is less than one page. ++ dsubu(tmpReg, tmpReg, SP); ++ li(AT, 7 - os::vm_page_size()); ++ andr(tmpReg, tmpReg, AT); ++ sd(tmpReg, Address(boxReg, 0)); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++ ++ bne(tmpReg, R0, L); ++ delayed()->nop(); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); ++ bind(L); ++ } ++ ++ b(DONE); ++ delayed()->sltiu(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0 ++ ++ bind(IsInflated); ++ // The object's monitor m is unlocked iff m->owner == NULL, ++ // otherwise m->owner may contain a thread or a stack address. ++ ++ // TODO: someday avoid the ST-before-CAS penalty by ++ // relocating (deferring) the following ST. ++ // We should also think about trying a CAS without having ++ // fetched _owner. If the CAS is successful we may ++ // avoid an RTO->RTS upgrade on the $line. ++ // Without cast to int32_t a movptr will destroy r10 which is typically obj ++ li(AT, (int32_t)intptr_t(markWord::unused_mark().value())); ++ sd(AT, Address(boxReg, 0)); ++ ++ ld(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ // if (m->owner != 0) => AT = 0, goto slow path. ++ bne(AT, R0, DONE_SET); ++ delayed()->ori(scrReg, R0, 0); ++ ++#ifndef OPT_THREAD ++ get_thread(TREG); ++#endif ++ // It's inflated and appears unlocked ++ cmpxchg(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2), R0, TREG, scrReg, false, false) ; ++ // Intentional fall-through into DONE ... ++ ++ bind(DONE_SET); ++ move(resReg, scrReg); ++ ++ // DONE is a hot target - we'd really like to place it at the ++ // start of cache line by padding with NOPs. ++ // See the AMD and Intel software optimization manuals for the ++ // most efficient "long" NOP encodings. ++ // Unfortunately none of our alignment mechanisms suffice. ++ bind(DONE); ++ // At DONE the resReg is set as follows ... ++ // Fast_Unlock uses the same protocol. ++ // resReg == 1 -> Success ++ // resREg == 0 -> Failure - force control through the slow-path ++} ++ ++// obj: object to unlock ++// box: box address (displaced header location), killed. ++// tmp: killed tmp; cannot be obj nor box. ++// ++// Some commentary on balanced locking: ++// ++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. ++// Methods that don't have provably balanced locking are forced to run in the ++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock. ++// The interpreter provides two properties: ++// I1: At return-time the interpreter automatically and quietly unlocks any ++// objects acquired the current activation (frame). Recall that the ++// interpreter maintains an on-stack list of locks currently held by ++// a frame. ++// I2: If a method attempts to unlock an object that is not held by the ++// the frame the interpreter throws IMSX. ++// ++// Lets say A(), which has provably balanced locking, acquires O and then calls B(). ++// B() doesn't have provably balanced locking so it runs in the interpreter. ++// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O ++// is still locked by A(). ++// ++// The only other source of unbalanced locking would be JNI. The "Java Native Interface: ++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter ++// should not be unlocked by "normal" java-level locking and vice-versa. The specification ++// doesn't specify what will occur if a program engages in such mixed-mode locking, however. ++ ++void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg, ++ Register tmpReg, Register scrReg) { ++ Label DONE, DONE_SET, Stacked, Inflated; ++ ++ guarantee(objReg != boxReg, ""); ++ guarantee(objReg != tmpReg, ""); ++ guarantee(objReg != scrReg, ""); ++ guarantee(boxReg != tmpReg, ""); ++ guarantee(boxReg != scrReg, ""); ++ ++ block_comment("FastUnlock"); ++ ++ // Critically, the biased locking test must have precedence over ++ // and appear before the (box->dhw == 0) recursive stack-lock test. ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ Label succ, fail; ++ biased_locking_exit(objReg, tmpReg, succ); ++ b(fail); ++ delayed()->nop(); ++ bind(succ); ++ b(DONE); ++ delayed()->ori(resReg, R0, 1); ++ bind(fail); ++ } ++ ++ ld(tmpReg, Address(boxReg, 0)); // Examine the displaced header ++ beq(tmpReg, R0, DONE_SET); // 0 indicates recursive stack-lock ++ delayed()->sltiu(AT, tmpReg, 1); ++ ++ ld(tmpReg, Address(objReg, 0)); // Examine the object's markword ++ andi(AT, tmpReg, markWord::monitor_value); ++ beq(AT, R0, Stacked); // Inflated? ++ delayed()->nop(); ++ ++ bind(Inflated); ++ // It's inflated. ++ // Despite our balanced locking property we still check that m->_owner == Self ++ // as java routines or native JNI code called by this thread might ++ // have released the lock. ++ // Refer to the comments in synchronizer.cpp for how we might encode extra ++ // state in _succ so we can avoid fetching EntryList|cxq. ++ // ++ // I'd like to add more cases in fast_lock() and fast_unlock() -- ++ // such as recursive enter and exit -- but we have to be wary of ++ // I$ bloat, T$ effects and BP$ effects. ++ // ++ // If there's no contention try a 1-0 exit. That is, exit without ++ // a costly MEMBAR or CAS. See synchronizer.cpp for details on how ++ // we detect and recover from the race that the 1-0 exit admits. ++ // ++ // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier ++ // before it STs null into _owner, releasing the lock. Updates ++ // to data protected by the critical section must be visible before ++ // we drop the lock (and thus before any other thread could acquire ++ // the lock and observe the fields protected by the lock). ++#ifndef OPT_THREAD ++ get_thread(TREG); ++#endif ++ ++ // It's inflated ++ ld(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)) ; ++ xorr(scrReg, scrReg, TREG); ++ ++ ld(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2)) ; ++ orr(scrReg, scrReg, AT); ++ ++ bne(scrReg, R0, DONE_SET); ++ delayed()->ori(AT, R0, 0); ++ ++ ld(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2)); ++ ld(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2)); ++ orr(scrReg, scrReg, AT); ++ ++ bne(scrReg, R0, DONE_SET); ++ delayed()->ori(AT, R0, 0); ++ ++ sync(); ++ sd(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ b(DONE); ++ delayed()->ori(resReg, R0, 1); ++ ++ bind(Stacked); ++ ld(tmpReg, Address(boxReg, 0)); ++ cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); ++ ++ bind(DONE_SET); ++ move(resReg, AT); ++ ++ bind(DONE); ++} ++ ++void C2_MacroAssembler::beq_long(Register rs, Register rt, Label& L) { ++ Label not_taken; ++ ++ bne(rs, rt, not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void C2_MacroAssembler::bne_long(Register rs, Register rt, Label& L) { ++ Label not_taken; ++ ++ beq(rs, rt, not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void C2_MacroAssembler::bc1t_long(Label& L) { ++ Label not_taken; ++ ++ bc1f(not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void C2_MacroAssembler::bc1f_long(Label& L) { ++ Label not_taken; ++ ++ bc1t(not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++// Compare strings, used for char[] and byte[]. ++void C2_MacroAssembler::string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ int ae) { ++ Label L, Loop, haveResult, done; ++ ++ bool isLL = ae == StrIntrinsicNode::LL; ++ bool isLU = ae == StrIntrinsicNode::LU; ++ bool isUL = ae == StrIntrinsicNode::UL; ++ ++ bool str1_isL = isLL || isLU; ++ bool str2_isL = isLL || isUL; ++ ++ if (!str1_isL) srl(cnt1, cnt1, 1); ++ if (!str2_isL) srl(cnt2, cnt2, 1); ++ ++ // compute the and difference of lengths (in result) ++ subu(result, cnt1, cnt2); // result holds the difference of two lengths ++ ++ // compute the shorter length (in cnt1) ++ slt(AT, cnt2, cnt1); ++ movn(cnt1, cnt2, AT); ++ ++ // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register ++ bind(Loop); // Loop begin ++ beq(cnt1, R0, done); ++ if (str1_isL) { ++ delayed()->lbu(AT, str1, 0); ++ } else { ++ delayed()->lhu(AT, str1, 0); ++ } ++ ++ // compare current character ++ if (str2_isL) { ++ lbu(cnt2, str2, 0); ++ } else { ++ lhu(cnt2, str2, 0); ++ } ++ bne(AT, cnt2, haveResult); ++ delayed()->addiu(str1, str1, str1_isL ? 1 : 2); ++ addiu(str2, str2, str2_isL ? 1 : 2); ++ b(Loop); ++ delayed()->addiu(cnt1, cnt1, -1); // Loop end ++ ++ bind(haveResult); ++ subu(result, AT, cnt2); ++ ++ bind(done); ++} ++ ++// Compare char[] or byte[] arrays or substrings. ++void C2_MacroAssembler::arrays_equals(Register str1, Register str2, ++ Register cnt, Register tmp, Register result, ++ bool is_char) { ++ Label Loop, True, False; ++ ++ beq(str1, str2, True); // same char[] ? ++ delayed()->daddiu(result, R0, 1); ++ ++ beq(cnt, R0, True); ++ delayed()->nop(); // count == 0 ++ ++ bind(Loop); ++ ++ // compare current character ++ if (is_char) { ++ lhu(AT, str1, 0); ++ lhu(tmp, str2, 0); ++ } else { ++ lbu(AT, str1, 0); ++ lbu(tmp, str2, 0); ++ } ++ bne(AT, tmp, False); ++ delayed()->addiu(str1, str1, is_char ? 2 : 1); ++ addiu(cnt, cnt, -1); ++ bne(cnt, R0, Loop); ++ delayed()->addiu(str2, str2, is_char ? 2 : 1); ++ ++ b(True); ++ delayed()->nop(); ++ ++ bind(False); ++ daddiu(result, R0, 0); ++ ++ bind(True); ++} ++ ++void C2_MacroAssembler::gs_loadstore(Register reg, Register base, Register index, int disp, int type) { ++ switch (type) { ++ case STORE_BYTE: ++ gssbx(reg, base, index, disp); ++ break; ++ case STORE_CHAR: ++ case STORE_SHORT: ++ gsshx(reg, base, index, disp); ++ break; ++ case STORE_INT: ++ gsswx(reg, base, index, disp); ++ break; ++ case STORE_LONG: ++ gssdx(reg, base, index, disp); ++ break; ++ case LOAD_BYTE: ++ gslbx(reg, base, index, disp); ++ break; ++ case LOAD_SHORT: ++ gslhx(reg, base, index, disp); ++ break; ++ case LOAD_INT: ++ gslwx(reg, base, index, disp); ++ break; ++ case LOAD_LONG: ++ gsldx(reg, base, index, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void C2_MacroAssembler::gs_loadstore(FloatRegister reg, Register base, Register index, int disp, int type) { ++ switch (type) { ++ case STORE_FLOAT: ++ gsswxc1(reg, base, index, disp); ++ break; ++ case STORE_DOUBLE: ++ gssdxc1(reg, base, index, disp); ++ break; ++ case LOAD_FLOAT: ++ gslwxc1(reg, base, index, disp); ++ break; ++ case LOAD_DOUBLE: ++ gsldxc1(reg, base, index, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void C2_MacroAssembler::loadstore(Register reg, Register base, int disp, int type) { ++ switch (type) { ++ case STORE_BYTE: ++ sb(reg, base, disp); ++ break; ++ case STORE_CHAR: ++ case STORE_SHORT: ++ sh(reg, base, disp); ++ break; ++ case STORE_INT: ++ sw(reg, base, disp); ++ break; ++ case STORE_LONG: ++ sd(reg, base, disp); ++ break; ++ case LOAD_BYTE: ++ lb(reg, base, disp); ++ break; ++ case LOAD_U_BYTE: ++ lbu(reg, base, disp); ++ break; ++ case LOAD_SHORT: ++ lh(reg, base, disp); ++ break; ++ case LOAD_U_SHORT: ++ lhu(reg, base, disp); ++ break; ++ case LOAD_INT: ++ lw(reg, base, disp); ++ break; ++ case LOAD_U_INT: ++ lwu(reg, base, disp); ++ break; ++ case LOAD_LONG: ++ ld(reg, base, disp); ++ break; ++ case LOAD_LINKED_LONG: ++ lld(reg, base, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void C2_MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) { ++ switch (type) { ++ case STORE_FLOAT: ++ swc1(reg, base, disp); ++ break; ++ case STORE_DOUBLE: ++ sdc1(reg, base, disp); ++ break; ++ case LOAD_FLOAT: ++ lwc1(reg, base, disp); ++ break; ++ case LOAD_DOUBLE: ++ ldc1(reg, base, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} +diff --git a/src/hotspot/cpu/mips/c2_MacroAssembler_mips.hpp b/src/hotspot/cpu/mips/c2_MacroAssembler_mips.hpp +new file mode 100644 +index 00000000000..7bdf6e52126 +--- /dev/null ++++ b/src/hotspot/cpu/mips/c2_MacroAssembler_mips.hpp +@@ -0,0 +1,162 @@ ++/* ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_C2_MACROASSEMBLER_MIPS_HPP ++#define CPU_MIPS_VM_C2_MACROASSEMBLER_MIPS_HPP ++ ++// C2_MacroAssembler contains high-level macros for C2 ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++public: ++ ++ void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr); ++ void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr); ++ ++ // For C2 to support long branches ++ void beq_long (Register rs, Register rt, Label& L); ++ void bne_long (Register rs, Register rt, Label& L); ++ void bc1t_long (Label& L); ++ void bc1f_long (Label& L); ++ ++ // Compare strings. ++ void string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ int ae); ++ ++ // Compare char[] or byte[] arrays. ++ void arrays_equals(Register str1, Register str2, ++ Register cnt, Register tmp, Register result, ++ bool is_char); ++ ++ // Memory Data Type ++ #define INT_TYPE 0x100 ++ #define FLOAT_TYPE 0x200 ++ #define SIGNED_TYPE 0x10 ++ #define UNSIGNED_TYPE 0x20 ++ ++ typedef enum { ++ LOAD_BYTE = INT_TYPE | SIGNED_TYPE | 0x1, ++ LOAD_CHAR = INT_TYPE | SIGNED_TYPE | 0x2, ++ LOAD_SHORT = INT_TYPE | SIGNED_TYPE | 0x3, ++ LOAD_INT = INT_TYPE | SIGNED_TYPE | 0x4, ++ LOAD_LONG = INT_TYPE | SIGNED_TYPE | 0x5, ++ STORE_BYTE = INT_TYPE | SIGNED_TYPE | 0x6, ++ STORE_CHAR = INT_TYPE | SIGNED_TYPE | 0x7, ++ STORE_SHORT = INT_TYPE | SIGNED_TYPE | 0x8, ++ STORE_INT = INT_TYPE | SIGNED_TYPE | 0x9, ++ STORE_LONG = INT_TYPE | SIGNED_TYPE | 0xa, ++ LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb, ++ ++ LOAD_U_BYTE = INT_TYPE | UNSIGNED_TYPE | 0x1, ++ LOAD_U_SHORT = INT_TYPE | UNSIGNED_TYPE | 0x2, ++ LOAD_U_INT = INT_TYPE | UNSIGNED_TYPE | 0x3, ++ ++ LOAD_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x1, ++ LOAD_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x2, ++ STORE_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x3, ++ STORE_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x4 ++ } CMLoadStoreDataType; ++ ++ void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) { ++ assert((type & INT_TYPE), "must be General reg type"); ++ loadstore_t(reg, base, index, scale, disp, type); ++ } ++ ++ void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) { ++ assert((type & FLOAT_TYPE), "must be Float reg type"); ++ loadstore_t(reg, base, index, scale, disp, type); ++ } ++ ++private: ++ ++ template ++ void loadstore_t(T reg, int base, int index, int scale, int disp, int type) { ++ if (index != -1) { ++ if (Assembler::is_simm16(disp)) { ++ if (UseLEXT1 && (type & SIGNED_TYPE) && Assembler::is_simm(disp, 8)) { ++ if (scale == 0) { ++ gs_loadstore(reg, as_Register(base), as_Register(index), disp, type); ++ } else { ++ dsll(AT, as_Register(index), scale); ++ gs_loadstore(reg, as_Register(base), AT, disp, type); ++ } ++ } else { ++ if (scale == 0) { ++ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ dsll(AT, as_Register(index), scale); ++ addu(AT, as_Register(base), AT); ++ } ++ loadstore(reg, AT, disp, type); ++ } ++ } else { ++ if (scale == 0) { ++ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ dsll(AT, as_Register(index), scale); ++ addu(AT, as_Register(base), AT); ++ } ++ move(T9, disp); ++ if (UseLEXT1 && (type & SIGNED_TYPE)) { ++ gs_loadstore(reg, AT, T9, 0, type); ++ } else { ++ addu(AT, AT, T9); ++ loadstore(reg, AT, 0, type); ++ } ++ } ++ } else { ++ if (Assembler::is_simm16(disp)) { ++ loadstore(reg, as_Register(base), disp, type); ++ } else { ++ move(T9, disp); ++ if (UseLEXT1 && (type & SIGNED_TYPE)) { ++ gs_loadstore(reg, as_Register(base), T9, 0, type); ++ } else { ++ addu(AT, as_Register(base), T9); ++ loadstore(reg, AT, 0, type); ++ } ++ } ++ } ++ } ++ void loadstore(Register reg, Register base, int disp, int type); ++ void loadstore(FloatRegister reg, Register base, int disp, int type); ++ void gs_loadstore(Register reg, Register base, Register index, int disp, int type); ++ void gs_loadstore(FloatRegister reg, Register base, Register index, int disp, int type); ++ ++#endif // CPU_MIPS_VM_C2_MACROASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/c2_globals_mips.hpp b/src/hotspot/cpu/mips/c2_globals_mips.hpp +new file mode 100644 +index 00000000000..f452cebf549 +--- /dev/null ++++ b/src/hotspot/cpu/mips/c2_globals_mips.hpp +@@ -0,0 +1,91 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP ++#define CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the server compiler. ++// (see c2_globals.hpp). Alpha-sorted. ++define_pd_global(bool, BackgroundCompilation, true); ++define_pd_global(bool, UseTLAB, true); ++define_pd_global(bool, ResizeTLAB, true); ++define_pd_global(bool, CICompileOSR, true); ++define_pd_global(bool, InlineIntrinsics, true); ++define_pd_global(bool, PreferInterpreterNativeStubs, false); ++define_pd_global(bool, ProfileTraps, true); ++define_pd_global(bool, UseOnStackReplacement, true); ++define_pd_global(bool, ProfileInterpreter, true); ++// Disable C1 in server JIT ++define_pd_global(bool, TieredCompilation, false); ++define_pd_global(intx, CompileThreshold, 10000); ++define_pd_global(intx, BackEdgeThreshold, 100000); ++ ++define_pd_global(intx, OnStackReplacePercentage, 140); ++define_pd_global(intx, ConditionalMoveLimit, 3); ++define_pd_global(intx, FLOATPRESSURE, 31); ++define_pd_global(intx, FreqInlineSize, 325); ++define_pd_global(intx, MinJumpTableSize, 10); ++define_pd_global(intx, INTPRESSURE, 21); ++define_pd_global(intx, InteriorEntryAlignment, 16); ++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); ++define_pd_global(intx, LoopUnrollLimit, 60); ++define_pd_global(intx, LoopPercentProfileLimit, 10); ++// InitialCodeCacheSize derived from specjbb2000 run. ++define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize ++define_pd_global(intx, CodeCacheExpansionSize, 64*K); ++ ++// Ergonomics related flags ++define_pd_global(uint64_t,MaxRAM, 128ULL*G); ++define_pd_global(intx, RegisterCostAreaRatio, 16000); ++ ++// Peephole and CISC spilling both break the graph, and so makes the ++// scheduler sick. ++define_pd_global(bool, OptoPeephole, false); ++define_pd_global(bool, UseCISCSpill, false); ++define_pd_global(bool, OptoScheduling, false); ++define_pd_global(bool, OptoBundling, false); ++define_pd_global(bool, OptoRegScheduling, false); ++define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); ++define_pd_global(bool, IdealizeClearArrayNode, true); ++ ++define_pd_global(intx, ReservedCodeCacheSize, 120*M); ++define_pd_global(intx, NonProfiledCodeHeapSize, 57*M); ++define_pd_global(intx, ProfiledCodeHeapSize, 58*M); ++define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); ++define_pd_global(uintx, CodeCacheMinBlockLength, 4); ++define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++ ++define_pd_global(bool, TrapBasedRangeChecks, false); ++ ++// Heap related flags ++define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); ++ ++// Ergonomics related flags ++define_pd_global(bool, NeverActAsServerClassMachine, false); ++ ++#endif // CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/c2_init_mips.cpp b/src/hotspot/cpu/mips/c2_init_mips.cpp +new file mode 100644 +index 00000000000..e6d5815f424 +--- /dev/null ++++ b/src/hotspot/cpu/mips/c2_init_mips.cpp +@@ -0,0 +1,34 @@ ++/* ++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "opto/compile.hpp" ++#include "opto/node.hpp" ++ ++// processor dependent initialization for mips ++ ++void Compile::pd_compiler2_init() { ++ guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); ++} +diff --git a/src/hotspot/cpu/mips/codeBuffer_mips.hpp b/src/hotspot/cpu/mips/codeBuffer_mips.hpp +new file mode 100644 +index 00000000000..3cc191006d4 +--- /dev/null ++++ b/src/hotspot/cpu/mips/codeBuffer_mips.hpp +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_CODEBUFFER_MIPS_HPP ++#define CPU_MIPS_VM_CODEBUFFER_MIPS_HPP ++ ++private: ++ void pd_initialize() {} ++ ++public: ++ void flush_bundle(bool start_new_bundle) {} ++ ++#endif // CPU_MIPS_VM_CODEBUFFER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/compiledIC_mips.cpp b/src/hotspot/cpu/mips/compiledIC_mips.cpp +new file mode 100644 +index 00000000000..81f67a92a78 +--- /dev/null ++++ b/src/hotspot/cpu/mips/compiledIC_mips.cpp +@@ -0,0 +1,147 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/compiledIC.hpp" ++#include "code/icBuffer.hpp" ++#include "code/nmethod.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/safepoint.hpp" ++ ++// ---------------------------------------------------------------------------- ++ ++#define __ _masm. ++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { ++ ++ if (mark == NULL) { ++ mark = cbuf.insts_mark(); // get mark within main instrs section ++ } ++ ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a stub. ++ MacroAssembler _masm(&cbuf); ++ ++ address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size()); ++ if (base == NULL) return NULL; // CodeBuffer::expand failed ++ // static stub relocation stores the instruction address of the call ++ ++ __ relocate(static_stub_Relocation::spec(mark), 0); ++ ++ // Code stream for loading method may be changed. ++ __ synci(R0, 0); ++ ++ // Rmethod contains Method*, it should be relocated for GC ++ // static stub relocation also tags the Method* in the code-stream. ++ __ mov_metadata(Rmethod, NULL); ++ // This is recognized as unresolved by relocs/nativeInst/ic code ++ ++ __ relocate(relocInfo::runtime_call_type); ++ ++ cbuf.set_insts_mark(); ++ address call_pc = (address)-1; ++ __ patchable_jump(call_pc); ++ __ align(16); ++ // Update current stubs pointer and restore code_end. ++ __ end_a_stub(); ++ return base; ++} ++#undef __ ++ ++int CompiledStaticCall::to_interp_stub_size() { ++ int size = NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeCall::instruction_size; ++ return align_up(size, 16); ++} ++ ++int CompiledStaticCall::to_trampoline_stub_size() { ++ return NativeInstruction::nop_instruction_size + NativeCallTrampolineStub::instruction_size; ++} ++ ++// Relocation entries for call stub, compiled java to interpreter. ++int CompiledStaticCall::reloc_to_interp_stub() { ++ return 16; ++} ++ ++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { ++ address stub = find_stub(); ++ guarantee(stub != NULL, "stub not found"); ++ ++ if (TraceICs) { ++ ResourceMark rm; ++ tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", ++ p2i(instruction_address()), ++ callee->name_and_sig_as_C_string()); ++ } ++ ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ verify_mt_safe(callee, entry, method_holder, jump); ++ ++ // Update stub. ++ method_holder->set_data((intptr_t)callee()); ++ jump->set_jump_destination(entry); ++ ++ // Update jump to call. ++ set_destination_mt_safe(stub); ++} ++ ++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { ++ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); ++ // Reset stub. ++ address stub = static_stub->addr(); ++ assert(stub != NULL, "stub not found"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ method_holder->set_data(0); ++ jump->set_jump_destination((address)-1); ++} ++ ++//----------------------------------------------------------------------------- ++// Non-product mode code ++#ifndef PRODUCT ++ ++void CompiledDirectStaticCall::verify() { ++ // Verify call. ++ _call->verify(); ++ if (os::is_MP()) { ++ _call->verify_alignment(); ++ } ++ ++ // Verify stub. ++ address stub = find_stub(); ++ assert(stub != NULL, "no stub found for static call"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ ++ ++ // Verify state. ++ assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); ++} ++ ++#endif // !PRODUCT +diff --git a/src/hotspot/cpu/mips/copy_mips.hpp b/src/hotspot/cpu/mips/copy_mips.hpp +new file mode 100644 +index 00000000000..dcc77adfec1 +--- /dev/null ++++ b/src/hotspot/cpu/mips/copy_mips.hpp +@@ -0,0 +1,77 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_COPY_MIPS_HPP ++#define CPU_MIPS_VM_COPY_MIPS_HPP ++ ++// Inline functions for memory copy and fill. ++ ++// Contains inline asm implementations ++#include OS_CPU_HEADER_INLINE(copy) ++ ++// Template for atomic, element-wise copy. ++template ++static void copy_conjoint_atomic(const T* from, T* to, size_t count) { ++ if (from > to) { ++ while (count-- > 0) { ++ // Copy forwards ++ *to++ = *from++; ++ } ++ } else { ++ from += count - 1; ++ to += count - 1; ++ while (count-- > 0) { ++ // Copy backwards ++ *to-- = *from--; ++ } ++ } ++} ++ ++ ++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { ++ julong* to = (julong*) tohw; ++ julong v = ((julong) value << 32) | value; ++ while (count-- > 0) { ++ *to++ = v; ++ } ++} ++ ++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { ++ pd_fill_to_words(tohw, count, value); ++} ++ ++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { ++ (void)memset(to, value, count); ++} ++ ++static void pd_zero_to_words(HeapWord* tohw, size_t count) { ++ pd_fill_to_words(tohw, count, 0); ++} ++ ++static void pd_zero_to_bytes(void* to, size_t count) { ++ (void)memset(to, 0, count); ++} ++ ++#endif //CPU_MIPS_VM_COPY_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/depChecker_mips.cpp b/src/hotspot/cpu/mips/depChecker_mips.cpp +new file mode 100644 +index 00000000000..756ccb68f9c +--- /dev/null ++++ b/src/hotspot/cpu/mips/depChecker_mips.cpp +@@ -0,0 +1,30 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "compiler/disassembler.hpp" ++#include "depChecker_mips.hpp" ++ ++// Nothing to do on mips +diff --git a/src/hotspot/cpu/mips/depChecker_mips.hpp b/src/hotspot/cpu/mips/depChecker_mips.hpp +new file mode 100644 +index 00000000000..11e52b4e8f8 +--- /dev/null ++++ b/src/hotspot/cpu/mips/depChecker_mips.hpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_DEPCHECKER_MIPS_HPP ++#define CPU_MIPS_VM_DEPCHECKER_MIPS_HPP ++ ++// Nothing to do on MIPS ++ ++#endif // CPU_MIPS_VM_DEPCHECKER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/disassembler_mips.hpp b/src/hotspot/cpu/mips/disassembler_mips.hpp +new file mode 100644 +index 00000000000..1ca0053b923 +--- /dev/null ++++ b/src/hotspot/cpu/mips/disassembler_mips.hpp +@@ -0,0 +1,57 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP ++#define CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP ++ ++ static int pd_instruction_alignment() { ++ return sizeof(int); ++ } ++ ++ static const char* pd_cpu_opts() { ++ return "gpr-names=64"; ++ } ++ ++ // Returns address of n-th instruction preceding addr, ++ // NULL if no preceding instruction can be found. ++ // With MIPS being a RISC architecture, this always is BytesPerInstWord ++ // It might be beneficial to check "is_readable" as we do on ppc and s390. ++ static address find_prev_instr(address addr, int n_instr) { ++ return addr - BytesPerInstWord*n_instr; ++ } ++ ++ // special-case instruction decoding. ++ // There may be cases where the binutils disassembler doesn't do ++ // the perfect job. In those cases, decode_instruction0 may kick in ++ // and do it right. ++ // If nothing had to be done, just return "here", otherwise return "here + instr_len(here)" ++ static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) { ++ return here; ++ } ++ ++ // platform-specific instruction annotations (like value of loaded constants) ++ static void annotate(address pc, outputStream* st) { }; ++ ++#endif // CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/foreign_globals_mips.cpp b/src/hotspot/cpu/mips/foreign_globals_mips.cpp +new file mode 100644 +index 00000000000..fb4647c2723 +--- /dev/null ++++ b/src/hotspot/cpu/mips/foreign_globals_mips.cpp +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 2020, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#include "precompiled.hpp" ++#include "prims/foreign_globals.hpp" ++#include "utilities/debug.hpp" ++ ++const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const { ++ Unimplemented(); ++ return {}; ++} ++ ++const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const { ++ Unimplemented(); ++ return {}; ++} ++ ++const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const { ++ ShouldNotCallThis(); ++ return {}; ++} +diff --git a/src/hotspot/cpu/mips/foreign_globals_mips.hpp b/src/hotspot/cpu/mips/foreign_globals_mips.hpp +new file mode 100644 +index 00000000000..3c00688168a +--- /dev/null ++++ b/src/hotspot/cpu/mips/foreign_globals_mips.hpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 2020, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#ifndef CPU_MIPS_VM_FOREIGN_GLOBALS_MIPS_HPP ++#define CPU_MIPS_VM_FOREIGN_GLOBALS_MIPS_HPP ++ ++class BufferLayout {}; ++class ABIDescriptor {}; ++ ++#endif // CPU_MIPS_VM_FOREIGN_GLOBALS_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/frame_mips.cpp b/src/hotspot/cpu/mips/frame_mips.cpp +new file mode 100644 +index 00000000000..bf2b87b24cc +--- /dev/null ++++ b/src/hotspot/cpu/mips/frame_mips.cpp +@@ -0,0 +1,665 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "compiler/oopMap.hpp" ++#include "interpreter/interpreter.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/markWord.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/monitorChunk.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "vmreg_mips.inline.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++ ++#ifdef ASSERT ++void RegisterMap::check_location_valid() { ++} ++#endif ++ ++ ++// Profiling/safepoint support ++// for Profiling - acting on another frame. walks sender frames ++// if valid. ++// frame profile_find_Java_sender_frame(JavaThread *thread); ++ ++bool frame::safe_for_sender(JavaThread *thread) { ++ address sp = (address)_sp; ++ address fp = (address)_fp; ++ address unextended_sp = (address)_unextended_sp; ++ ++ // consider stack guards when trying to determine "safe" stack pointers ++ // sp must be within the usable part of the stack (not in guards) ++ if (!thread->is_in_usable_stack(sp)) { ++ return false; ++ } ++ ++ // unextended sp must be within the stack and above or equal sp ++ if (!thread->is_in_stack_range_incl(unextended_sp, sp)) { ++ return false; ++ } ++ ++ // an fp must be within the stack and above (but not equal) sp ++ // second evaluation on fp+ is added to handle situation where fp is -1 ++ bool fp_safe = thread->is_in_stack_range_excl(fp, sp) && ++ thread->is_in_full_stack_checked(fp + (return_addr_offset * sizeof(void*))); ++ ++ // We know sp/unextended_sp are safe only fp is questionable here ++ ++ // If the current frame is known to the code cache then we can attempt to ++ // construct the sender and do some validation of it. This goes a long way ++ // toward eliminating issues when we get in frame construction code ++ ++ if (_cb != NULL ) { ++ ++ // First check if frame is complete and tester is reliable ++ // Unfortunately we can only check frame complete for runtime stubs and nmethod ++ // other generic buffer blobs are more problematic so we just assume they are ++ // ok. adapter blobs never have a frame complete and are never ok. ++ ++ if (!_cb->is_frame_complete_at(_pc)) { ++ if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { ++ return false; ++ } ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!_cb->code_contains(_pc)) { ++ return false; ++ } ++ ++ // Entry frame checks ++ if (is_entry_frame()) { ++ // an entry frame must have a valid fp. ++ return fp_safe && is_entry_frame_valid(thread); ++ } ++ ++ intptr_t* sender_sp = NULL; ++ intptr_t* sender_unextended_sp = NULL; ++ address sender_pc = NULL; ++ intptr_t* saved_fp = NULL; ++ ++ if (is_interpreted_frame()) { ++ // fp must be safe ++ if (!fp_safe) { ++ return false; ++ } ++ ++ sender_pc = (address) this->fp()[return_addr_offset]; ++ // for interpreted frames, the value below is the sender "raw" sp, ++ // which can be different from the sender unextended sp (the sp seen ++ // by the sender) because of current frame local variables ++ sender_sp = (intptr_t*) addr_at(sender_sp_offset); ++ sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; ++ saved_fp = (intptr_t*) this->fp()[link_offset]; ++ ++ } else { ++ // must be some sort of compiled/runtime frame ++ // fp does not have to be safe (although it could be check for c1?) ++ ++ // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc ++ if (_cb->frame_size() <= 0) { ++ return false; ++ } ++ ++ sender_sp = _unextended_sp + _cb->frame_size(); ++ // Is sender_sp safe? ++ if (!thread->is_in_full_stack_checked((address)sender_sp)) { ++ return false; ++ } ++ sender_unextended_sp = sender_sp; ++ // On MIPS the return_address is always the word on the stack ++ sender_pc = (address) *(sender_sp-1); ++ // Note: frame::sender_sp_offset is only valid for compiled frame ++ saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset); ++ } ++ ++ ++ // If the potential sender is the interpreter then we can do some more checking ++ if (Interpreter::contains(sender_pc)) { ++ ++ // FP is always saved in a recognizable place in any code we generate. However ++ // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP ++ // is really a frame pointer. ++ ++ if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { ++ return false; ++ } ++ ++ // construct the potential sender ++ ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ return sender.is_interpreted_frame_valid(thread); ++ ++ } ++ ++ // We must always be able to find a recognizable pc ++ CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); ++ if (sender_pc == NULL || sender_blob == NULL) { ++ return false; ++ } ++ ++ // Could be a zombie method ++ if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { ++ return false; ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!sender_blob->code_contains(sender_pc)) { ++ return false; ++ } ++ ++ // We should never be able to see an adapter if the current frame is something from code cache ++ if (sender_blob->is_adapter_blob()) { ++ return false; ++ } ++ ++ // Could be the call_stub ++ if (StubRoutines::returns_to_call_stub(sender_pc)) { ++ if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { ++ return false; ++ } ++ ++ // construct the potential sender ++ ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ // Validate the JavaCallWrapper an entry frame must have ++ address jcw = (address)sender.entry_frame_call_wrapper(); ++ ++ return thread->is_in_stack_range_excl(jcw, (address)sender.fp()); ++ } ++ ++ CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); ++ if (nm != NULL) { ++ if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || ++ nm->method()->is_method_handle_intrinsic()) { ++ return false; ++ } ++ } ++ ++ // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size ++ // because the return address counts against the callee's frame. ++ ++ if (sender_blob->frame_size() <= 0) { ++ assert(!sender_blob->is_compiled(), "should count return address at least"); ++ return false; ++ } ++ ++ // We should never be able to see anything here except an nmethod. If something in the ++ // code cache (current frame) is called by an entity within the code cache that entity ++ // should not be anything but the call stub (already covered), the interpreter (already covered) ++ // or an nmethod. ++ ++ if (!sender_blob->is_compiled()) { ++ return false; ++ } ++ ++ // Could put some more validation for the potential non-interpreted sender ++ // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... ++ ++ // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb ++ ++ // We've validated the potential sender that would be created ++ return true; ++ } ++ ++ // Must be native-compiled frame. Since sender will try and use fp to find ++ // linkages it must be safe ++ ++ if (!fp_safe) { ++ return false; ++ } ++ ++ // Will the pc we fetch be non-zero (which we'll find at the oldest frame) ++ ++ if ( (address) this->fp()[return_addr_offset] == NULL) return false; ++ ++ ++ // could try and do some more potential verification of native frame if we could think of some... ++ ++ return true; ++ ++} ++ ++void frame::patch_pc(Thread* thread, address pc) { ++ assert(_cb == CodeCache::find_blob(pc), "unexpected pc"); ++ address* pc_addr = &(((address*) sp())[-1]); ++ if (TracePcPatching) { ++ tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", ++ p2i(pc_addr), p2i(*pc_addr), p2i(pc)); ++ } ++ // Either the return address is the original one or we are going to ++ // patch in the same address that's already there. ++ assert(_pc == *pc_addr || pc == *pc_addr, "must be"); ++ *pc_addr = pc; ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ assert(original_pc == _pc, "expected original PC to be stored before patching"); ++ _deopt_state = is_deoptimized; ++ // leave _pc as is ++ } else { ++ _deopt_state = not_deoptimized; ++ _pc = pc; ++ } ++} ++ ++bool frame::is_interpreted_frame() const { ++ return Interpreter::contains(pc()); ++} ++ ++int frame::frame_size(RegisterMap* map) const { ++ frame sender = this->sender(map); ++ return sender.sp() - sp(); ++} ++ ++intptr_t* frame::entry_frame_argument_at(int offset) const { ++ // convert offset to index to deal with tsi ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ // Entry frame's arguments are always in relation to unextended_sp() ++ return &unextended_sp()[index]; ++} ++ ++// sender_sp ++intptr_t* frame::interpreter_frame_sender_sp() const { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ return (intptr_t*) at(interpreter_frame_sender_sp_offset); ++} ++ ++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); ++} ++ ++ ++// monitor elements ++ ++BasicObjectLock* frame::interpreter_frame_monitor_begin() const { ++ return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); ++} ++ ++BasicObjectLock* frame::interpreter_frame_monitor_end() const { ++ BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); ++ // make sure the pointer points inside the frame ++ assert((intptr_t) fp() > (intptr_t) result, "result must < than frame pointer"); ++ assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer"); ++ return result; ++} ++ ++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { ++ *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; ++} ++ ++// Used by template based interpreter deoptimization ++void frame::interpreter_frame_set_last_sp(intptr_t* sp) { ++ *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp; ++} ++ ++frame frame::sender_for_entry_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); ++ assert(!entry_frame_is_first(), "next Java fp must be non zero"); ++ assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); ++ map->clear(); ++ assert(map->include_argument_oops(), "should be set by clear"); ++ if (jfa->last_Java_pc() != NULL ) { ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); ++ return fr; ++ } ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp()); ++ return fr; ++} ++ ++bool frame::optimized_entry_frame_is_first() const { ++ ShouldNotCallThis(); ++ return false; ++} ++ ++frame frame::sender_for_interpreter_frame(RegisterMap* map) const { ++ // sp is the raw sp from the sender after adapter or interpreter extension ++ intptr_t* sender_sp = this->sender_sp(); ++ ++ // This is the sp before any possible extension (adapter/locals). ++ intptr_t* unextended_sp = interpreter_frame_sender_sp(); ++ ++ // The interpreter and compiler(s) always save FP in a known ++ // location on entry. We must record where that location is ++ // so this if FP was live on callout from c2 we can find ++ // the saved copy no matter what it called. ++ ++ // Since the interpreter always saves FP if we record where it is then ++ // we don't have to always save FP on entry and exit to c2 compiled ++ // code, on entry will be enough. ++#ifdef COMPILER2 ++ if (map->update_map()) { ++ update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); ++ } ++#endif /* COMPILER2 */ ++ return frame(sender_sp, unextended_sp, link(), sender_pc()); ++} ++ ++OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const { ++ ShouldNotCallThis(); ++ return nullptr; ++} ++ ++frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const { ++ ShouldNotCallThis(); ++ return {}; ++} ++ ++ ++//------------------------------------------------------------------------------ ++// frame::verify_deopt_original_pc ++// ++// Verifies the calculated original PC of a deoptimization PC for the ++// given unextended SP. The unextended SP might also be the saved SP ++// for MethodHandle call sites. ++#ifdef ASSERT ++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) { ++ frame fr; ++ ++ // This is ugly but it's better than to change {get,set}_original_pc ++ // to take an SP value as argument. And it's only a debugging ++ // method anyway. ++ fr._unextended_sp = unextended_sp; ++ ++ address original_pc = nm->get_original_pc(&fr); ++ assert(nm->insts_contains(original_pc), ++ "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); ++} ++#endif ++ ++ ++//------------------------------------------------------------------------------ ++// frame::adjust_unextended_sp ++void frame::adjust_unextended_sp() { ++ // On MIPS, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. ++ ++ if (_cb != NULL) { ++ CompiledMethod* sender_cm = _cb->as_compiled_method_or_null(); ++ if (sender_cm != NULL) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (sender_cm->is_deopt_entry(_pc) || ++ sender_cm->is_deopt_mh_entry(_pc)) { ++ DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp)); ++ } ++ } ++ } ++} ++ ++//------------------------------------------------------------------------------ ++// frame::update_map_with_saved_link ++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { ++ // The interpreter and compiler(s) always save fp in a known ++ // location on entry. We must record where that location is ++ // so that if fp was live on callout from c2 we can find ++ // the saved copy no matter what it called. ++ ++ // Since the interpreter always saves fp if we record where it is then ++ // we don't have to always save fp on entry and exit to c2 compiled ++ // code, on entry will be enough. ++ map->set_location(FP->as_VMReg(), (address) link_addr); ++ // this is weird "H" ought to be at a higher address however the ++ // oopMaps seems to have the "H" regs at the same address and the ++ // vanilla register. ++ // XXXX make this go away ++ if (true) { ++ map->set_location(FP->as_VMReg()->next(), (address) link_addr); ++ } ++} ++ ++//------------------------------sender_for_compiled_frame----------------------- ++frame frame::sender_for_compiled_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ ++ // frame owned by optimizing compiler ++ assert(_cb->frame_size() >= 0, "must have non-zero frame size"); ++ ++ intptr_t* sender_sp = unextended_sp() + _cb->frame_size(); ++ intptr_t* unextended_sp = sender_sp; ++ ++ // On Loongson the return_address is always the word on the stack ++ // the fp in compiler points to sender fp, but in interpreter, fp points to return address, ++ // so getting sender for compiled frame is not same as interpreter frame. ++ // we hard code here temporarily ++ // spark ++ address sender_pc = (address) *(sender_sp-1); ++ ++ intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset); ++ ++ if (map->update_map()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); ++ if (_cb->oop_maps() != NULL) { ++ OopMapSet::update_register_map(this, map); ++ } ++ ++ // Since the prolog does the save and restore of epb there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ update_map_with_saved_link(map, saved_fp_addr); ++ } ++ assert(sender_sp != sp(), "must have changed"); ++ return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc); ++} ++ ++frame frame::sender(RegisterMap* map) const { ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ map->set_include_argument_oops(false); ++ ++ if (is_entry_frame()) return sender_for_entry_frame(map); ++ if (is_interpreted_frame()) return sender_for_interpreter_frame(map); ++ assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); ++ ++ if (_cb != NULL) { ++ return sender_for_compiled_frame(map); ++ } ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return frame(sender_sp(), link(), sender_pc()); ++} ++ ++bool frame::is_interpreted_frame_valid(JavaThread* thread) const { ++ assert(is_interpreted_frame(), "Not an interpreted frame"); ++ // These are reasonable sanity checks ++ if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (fp() + interpreter_frame_initial_sp_offset < sp()) { ++ return false; ++ } ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (fp() <= sp()) { // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ // do some validation of frame elements ++ ++ // first the method ++ ++ Method* m = *interpreter_frame_method_addr(); ++ ++ // validate the method we'd find in this potential sender ++ if (!Method::is_valid_method(m)) return false; ++ ++ // stack frames shouldn't be much larger than max_stack elements ++ ++ //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) { ++ if (fp() - sp() > 4096) { // stack frames shouldn't be large. ++ return false; ++ } ++ ++ // validate bci/bcp ++ ++ address bcp = interpreter_frame_bcp(); ++ if (m->validate_bci_from_bcp(bcp) < 0) { ++ return false; ++ } ++ ++ // validate ConstantPoolCache* ++ ++ ConstantPoolCache* cp = *interpreter_frame_cache_addr(); ++ ++ if (MetaspaceObj::is_valid(cp) == false) return false; ++ ++ // validate locals ++ ++ address locals = (address) *interpreter_frame_locals_addr(); ++ ++ if (locals > thread->stack_base() || locals < (address) fp()) return false; ++ ++ // We'd have to be pretty unlucky to be mislead at this point ++ ++ return true; ++} ++ ++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ Method* method = interpreter_frame_method(); ++ BasicType type = method->result_type(); ++ ++ intptr_t* tos_addr; ++ if (method->is_native()) { ++ // Prior to calling into the runtime to report the method_exit the possible ++ // return value is pushed to the native stack. If the result is a jfloat/jdouble ++ // then ST0 is saved. See the note in generate_native_result ++ tos_addr = (intptr_t*)sp(); ++ if (type == T_FLOAT || type == T_DOUBLE) { ++ tos_addr += 2; ++ } ++ } else { ++ tos_addr = (intptr_t*)interpreter_frame_tos_address(); ++ } ++ ++ switch (type) { ++ case T_OBJECT : ++ case T_ARRAY : { ++ oop obj; ++ if (method->is_native()) { ++ obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); ++ } else { ++ oop* obj_p = (oop*)tos_addr; ++ obj = (obj_p == NULL) ? (oop)NULL : *obj_p; ++ } ++ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); ++ *oop_result = obj; ++ break; ++ } ++ case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; ++ case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; ++ case T_CHAR : value_result->c = *(jchar*)tos_addr; break; ++ case T_SHORT : value_result->s = *(jshort*)tos_addr; break; ++ case T_INT : value_result->i = *(jint*)tos_addr; break; ++ case T_LONG : value_result->j = *(jlong*)tos_addr; break; ++ case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break; ++ case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; ++ case T_VOID : /* Nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ return type; ++} ++ ++ ++intptr_t* frame::interpreter_frame_tos_at(jint offset) const { ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ return &interpreter_frame_tos_address()[index]; ++} ++ ++#ifndef PRODUCT ++ ++#define DESCRIBE_FP_OFFSET(name) \ ++ values.describe(frame_no, fp() + frame::name##_offset, #name) ++ ++void frame::describe_pd(FrameValues& values, int frame_no) { ++ if (is_interpreted_frame()) { ++ DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_method); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mirror); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mdp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_cache); ++ DESCRIBE_FP_OFFSET(interpreter_frame_locals); ++ DESCRIBE_FP_OFFSET(interpreter_frame_bcp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); ++ } ++} ++#endif ++ ++intptr_t *frame::initial_deoptimization_info() { ++ // used to reset the saved FP ++ return fp(); ++} ++ ++intptr_t* frame::real_fp() const { ++ if (_cb != NULL) { ++ // use the frame size if valid ++ int size = _cb->frame_size(); ++ if (size > 0) { ++ return unextended_sp() + size; ++ } ++ } ++ // else rely on fp() ++ assert(! is_compiled_frame(), "unknown compiled frame size"); ++ return fp(); ++} ++ ++#ifndef PRODUCT ++// This is a generic constructor which is only used by pns() in debug.cpp. ++frame::frame(void* sp, void* fp, void* pc) { ++ init((intptr_t*)sp, (intptr_t*)fp, (address)pc); ++} ++ ++void frame::pd_ps() {} ++#endif +diff --git a/src/hotspot/cpu/mips/frame_mips.hpp b/src/hotspot/cpu/mips/frame_mips.hpp +new file mode 100644 +index 00000000000..ac706e71f9a +--- /dev/null ++++ b/src/hotspot/cpu/mips/frame_mips.hpp +@@ -0,0 +1,215 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_FRAME_MIPS_HPP ++#define CPU_MIPS_VM_FRAME_MIPS_HPP ++ ++#include "runtime/synchronizer.hpp" ++ ++// A frame represents a physical stack frame (an activation). Frames can be ++// C or Java frames, and the Java frames can be interpreted or compiled. ++// In contrast, vframes represent source-level activations, so that one physical frame ++// can correspond to multiple source level frames because of inlining. ++// A frame is comprised of {pc, fp, sp} ++// ------------------------------ Asm interpreter ---------------------------------------- ++// Layout of asm interpreter frame: ++// [expression stack ] * <- sp ++// [monitors ] \ ++// ... | monitor block size ++// [monitors ] / ++// [monitor block size ] ++// [byte code index/pointr] = bcx() bcx_offset ++// [pointer to locals ] = locals() locals_offset ++// [constant pool cache ] = cache() cache_offset ++// [methodData ] = mdp() mdx_offset ++// [Method ] = method() method_offset ++// [last sp ] = last_sp() last_sp_offset ++// [old stack pointer ] (sender_sp) sender_sp_offset ++// [old frame pointer ] <- fp = link() ++// [return pc ] ++// [oop temp ] (only for native calls) ++// [locals and parameters ] ++// <- sender sp ++// ------------------------------ Asm interpreter ---------------------------------------- ++ ++// ------------------------------ C++ interpreter ---------------------------------------- ++// ++// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run) ++// ++// <- SP (current sp) ++// [local variables ] BytecodeInterpreter::run local variables ++// ... BytecodeInterpreter::run local variables ++// [local variables ] BytecodeInterpreter::run local variables ++// [old frame pointer ] fp [ BytecodeInterpreter::run's fp ] ++// [return pc ] (return to frame manager) ++// [interpreter_state* ] (arg to BytecodeInterpreter::run) -------------- ++// [expression stack ] <- last_Java_sp | ++// [... ] * <- interpreter_state.stack | ++// [expression stack ] * <- interpreter_state.stack_base | ++// [monitors ] \ | ++// ... | monitor block size | ++// [monitors ] / <- interpreter_state.monitor_base | ++// [struct interpretState ] <-----------------------------------------| ++// [return pc ] (return to callee of frame manager [1] ++// [locals and parameters ] ++// <- sender sp ++ ++// [1] When the c++ interpreter calls a new method it returns to the frame ++// manager which allocates a new frame on the stack. In that case there ++// is no real callee of this newly allocated frame. The frame manager is ++// aware of the additional frame(s) and will pop them as nested calls ++// complete. Howevers tTo make it look good in the debugger the frame ++// manager actually installs a dummy pc pointing to RecursiveInterpreterActivation ++// with a fake interpreter_state* parameter to make it easy to debug ++// nested calls. ++ ++// Note that contrary to the layout for the assembly interpreter the ++// expression stack allocated for the C++ interpreter is full sized. ++// However this is not as bad as it seems as the interpreter frame_manager ++// will truncate the unused space on succesive method calls. ++// ++// ------------------------------ C++ interpreter ---------------------------------------- ++ ++// Layout of interpreter frame: ++// ++// [ monitor entry ] <--- sp ++// ... ++// [ monitor entry ] ++// -9 [ monitor block top ] ( the top monitor entry ) ++// -8 [ byte code pointer ] (if native, bcp = 0) ++// -7 [ constant pool cache ] ++// -6 [ methodData ] mdx_offset(not core only) ++// -5 [ mirror ] ++// -4 [ Method ] ++// -3 [ locals offset ] ++// -2 [ last_sp ] ++// -1 [ sender's sp ] ++// 0 [ sender's fp ] <--- fp ++// 1 [ return address ] ++// 2 [ oop temp offset ] (only for native calls) ++// 3 [ result handler offset ] (only for native calls) ++// 4 [ result type info ] (only for native calls) ++// [ local var m-1 ] ++// ... ++// [ local var 0 ] ++// [ argumnet word n-1 ] <--- ( sender's sp ) ++// ... ++// [ argument word 0 ] <--- S7 ++ ++ public: ++ enum { ++ pc_return_offset = 0, ++ // All frames ++ link_offset = 0, ++ return_addr_offset = 1, ++ // non-interpreter frames ++ sender_sp_offset = 2, ++ ++ // Interpreter frames ++ interpreter_frame_return_addr_offset = 1, ++ interpreter_frame_result_handler_offset = 3, // for native calls only ++ interpreter_frame_oop_temp_offset = 2, // for native calls only ++ ++ interpreter_frame_sender_fp_offset = 0, ++ interpreter_frame_sender_sp_offset = -1, ++ // outgoing sp before a call to an invoked method ++ interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, ++ interpreter_frame_locals_offset = interpreter_frame_last_sp_offset - 1, ++ interpreter_frame_method_offset = interpreter_frame_locals_offset - 1, ++ interpreter_frame_mirror_offset = interpreter_frame_method_offset - 1, ++ interpreter_frame_mdp_offset = interpreter_frame_mirror_offset - 1, ++ interpreter_frame_cache_offset = interpreter_frame_mdp_offset - 1, ++ interpreter_frame_bcp_offset = interpreter_frame_cache_offset - 1, ++ interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1, ++ ++ interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, ++ interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, ++ ++ // Entry frames ++ entry_frame_call_wrapper_offset = -9, ++ ++ // Native frames ++ ++ native_frame_initial_param_offset = 2 ++ ++ }; ++ ++ intptr_t ptr_at(int offset) const { ++ return *ptr_at_addr(offset); ++ } ++ ++ void ptr_at_put(int offset, intptr_t value) { ++ *ptr_at_addr(offset) = value; ++ } ++ ++ private: ++ // an additional field beyond _sp and _pc: ++ intptr_t* _fp; // frame pointer ++ // The interpreter and adapters will extend the frame of the caller. ++ // Since oopMaps are based on the sp of the caller before extension ++ // we need to know that value. However in order to compute the address ++ // of the return address we need the real "raw" sp. Since sparc already ++ // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's ++ // original sp we use that convention. ++ ++ intptr_t* _unextended_sp; ++ void adjust_unextended_sp(); ++ ++ intptr_t* ptr_at_addr(int offset) const { ++ return (intptr_t*) addr_at(offset); ++ } ++#ifdef ASSERT ++ // Used in frame::sender_for_{interpreter,compiled}_frame ++ static void verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp); ++#endif ++ ++ public: ++ // Constructors ++ ++ frame(intptr_t* sp, intptr_t* fp, address pc); ++ ++ frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc); ++ ++ frame(intptr_t* sp, intptr_t* fp); ++ ++ void init(intptr_t* sp, intptr_t* fp, address pc); ++ ++ // accessors for the instance variables ++ intptr_t* fp() const { return _fp; } ++ ++ inline address* sender_pc_addr() const; ++ ++ // expression stack tos if we are nested in a java call ++ intptr_t* interpreter_frame_last_sp() const; ++ ++ // helper to update a map with callee-saved FP ++ static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); ++ ++ // deoptimization support ++ void interpreter_frame_set_last_sp(intptr_t* sp); ++ ++ static jint interpreter_frame_expression_stack_direction() { return -1; } ++ ++#endif // CPU_MIPS_VM_FRAME_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/frame_mips.inline.hpp b/src/hotspot/cpu/mips/frame_mips.inline.hpp +new file mode 100644 +index 00000000000..c408f01d69a +--- /dev/null ++++ b/src/hotspot/cpu/mips/frame_mips.inline.hpp +@@ -0,0 +1,238 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP ++ ++#include "code/codeCache.hpp" ++#include "code/vmreg.inline.hpp" ++ ++// Inline functions for Loongson frames: ++ ++// Constructors: ++ ++inline frame::frame() { ++ _pc = NULL; ++ _sp = NULL; ++ _unextended_sp = NULL; ++ _fp = NULL; ++ _cb = NULL; ++ _deopt_state = unknown; ++} ++ ++inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) { ++ _sp = sp; ++ _unextended_sp = sp; ++ _fp = fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { ++ init(sp, fp, pc); ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) { ++ _sp = sp; ++ _unextended_sp = unextended_sp; ++ _fp = fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* fp) { ++ _sp = sp; ++ _unextended_sp = sp; ++ _fp = fp; ++ _pc = (address)(sp[-1]); ++ ++ // Here's a sticky one. This constructor can be called via AsyncGetCallTrace ++ // when last_Java_sp is non-null but the pc fetched is junk. If we are truly ++ // unlucky the junk value could be to a zombied method and we'll die on the ++ // find_blob call. This is also why we can have no asserts on the validity ++ // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler ++ // -> pd_last_frame should use a specialized version of pd_last_frame which could ++ // call a specilaized frame constructor instead of this one. ++ // Then we could use the assert below. However this assert is of somewhat dubious ++ // value. ++ // assert(_pc != NULL, "no pc?"); ++ ++ _cb = CodeCache::find_blob(_pc); ++ adjust_unextended_sp(); ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++// Accessors ++ ++inline bool frame::equal(frame other) const { ++ bool ret = sp() == other.sp() ++ && unextended_sp() == other.unextended_sp() ++ && fp() == other.fp() ++ && pc() == other.pc(); ++ assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); ++ return ret; ++} ++ ++// Return unique id for this frame. The id must have a value where we can distinguish ++// identity and younger/older relationship. NULL represents an invalid (incomparable) ++// frame. ++inline intptr_t* frame::id(void) const { return unextended_sp(); } ++ ++// Relationals on frames based ++// Return true if the frame is younger (more recent activation) than the frame represented by id ++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() < id ; } ++ ++// Return true if the frame is older (less recent activation) than the frame represented by id ++inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() > id ; } ++ ++inline intptr_t* frame::link() const { ++ return (intptr_t*) *(intptr_t **)addr_at(link_offset); ++} ++ ++inline intptr_t* frame::link_or_null() const { ++ intptr_t** ptr = (intptr_t **)addr_at(link_offset); ++ return os::is_readable_pointer(ptr) ? *ptr : NULL; ++} ++ ++inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } ++ ++// Return address: ++ ++inline address* frame::sender_pc_addr() const { return (address*) addr_at( return_addr_offset); } ++inline address frame::sender_pc() const { return *sender_pc_addr(); } ++ ++inline intptr_t* frame::sender_sp() const { return addr_at( sender_sp_offset); } ++ ++inline intptr_t** frame::interpreter_frame_locals_addr() const { ++ return (intptr_t**)addr_at(interpreter_frame_locals_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_last_sp() const { ++ return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_bcp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_bcp_offset); ++} ++ ++ ++inline intptr_t* frame::interpreter_frame_mdp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_mdp_offset); ++} ++ ++ ++ ++// Constant pool cache ++ ++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { ++ return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); ++} ++ ++// Method ++ ++inline Method** frame::interpreter_frame_method_addr() const { ++ return (Method**)addr_at(interpreter_frame_method_offset); ++} ++ ++// Mirror ++ ++inline oop* frame::interpreter_frame_mirror_addr() const { ++ return (oop*)addr_at(interpreter_frame_mirror_offset); ++} ++ ++// top of expression stack ++inline intptr_t* frame::interpreter_frame_tos_address() const { ++ intptr_t* last_sp = interpreter_frame_last_sp(); ++ if (last_sp == NULL ) { ++ return sp(); ++ } else { ++ // sp() may have been extended by an adapter ++ assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos"); ++ return last_sp; ++ } ++} ++ ++inline oop* frame::interpreter_frame_temp_oop_addr() const { ++ return (oop *)(fp() + interpreter_frame_oop_temp_offset); ++} ++ ++inline int frame::interpreter_frame_monitor_size() { ++ return BasicObjectLock::size(); ++} ++ ++ ++// expression stack ++// (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++inline intptr_t* frame::interpreter_frame_expression_stack() const { ++ intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); ++ return monitor_end-1; ++} ++ ++// Entry frames ++ ++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { ++ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); ++} ++ ++// Compiled frames ++ ++inline oop frame::saved_oop_result(RegisterMap* map) const { ++ return *((oop*) map->location(V0->as_VMReg())); ++} ++ ++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { ++ *((oop*) map->location(V0->as_VMReg())) = obj; ++} ++ ++#endif // CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP +diff --git a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp +new file mode 100644 +index 00000000000..196ff1582a1 +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp +@@ -0,0 +1,373 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/g1/g1BarrierSet.hpp" ++#include "gc/g1/g1BarrierSetAssembler.hpp" ++#include "gc/g1/g1BarrierSetRuntime.hpp" ++#include "gc/g1/g1CardTable.hpp" ++#include "gc/g1/g1ThreadLocalData.hpp" ++#include "gc/g1/heapRegion.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "utilities/macros.hpp" ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++ ++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count) { ++ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; ++ ++ if (!dest_uninitialized) { ++#ifndef OPT_THREAD ++ Register thread = T9; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ++ Label filtered; ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ lw(AT, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lb(AT, in_progress); ++ } ++ ++ __ beq(AT, R0, filtered); ++ __ delayed()->nop(); ++ ++ __ pushad(); // push registers ++ if (count == A0) { ++ if (addr == A1) { ++ __ move(AT, A0); ++ __ move(A0, A1); ++ __ move(A1, AT); ++ } else { ++ __ move(A1, count); ++ __ move(A0, addr); ++ } ++ } else { ++ __ move(A0, addr); ++ __ move(A1, count); ++ } ++ if (UseCompressedOops) { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); ++ } ++ __ popad(); ++ ++ __ bind(filtered); ++ } ++} ++ ++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp) { ++ __ pushad(); // push registers (overkill) ++ if (count == A0) { ++ assert_different_registers(A1, addr); ++ __ move(A1, count); ++ __ move(A0, addr); ++ } else { ++ assert_different_registers(A0, count); ++ __ move(A0, addr); ++ __ move(A1, count); ++ } ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); ++ __ popad(); ++} ++ ++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ bool on_oop = type == T_OBJECT || type == T_ARRAY; ++ bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; ++ bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; ++ bool on_reference = on_weak || on_phantom; ++ ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ if (on_oop && on_reference) { ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // Generate the G1 pre-barrier code to log the value of ++ // the referent field in an SATB buffer. ++ g1_write_barrier_pre(masm /* masm */, ++ noreg /* obj */, ++ dst /* pre_val */, ++ thread /* thread */, ++ tmp1 /* tmp */, ++ true /* tosca_live */, ++ true /* expand_call */); ++ } ++} ++ ++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ // If expand_call is true then we expand the call_VM_leaf macro ++ // directly to skip generating the check by ++ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. ++ ++ assert(thread == TREG, "must be"); ++ ++ Label done; ++ Label runtime; ++ ++ assert(pre_val != noreg, "check this code"); ++ ++ if (obj != noreg) { ++ assert_different_registers(obj, pre_val, tmp); ++ assert(pre_val != V0, "check this code"); ++ } ++ ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ lw(AT, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lb(AT, in_progress); ++ } ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ ++ // Do we need to load the previous value? ++ if (obj != noreg) { ++ __ load_heap_oop(pre_val, Address(obj, 0)); ++ } ++ ++ // Is the previous value null? ++ __ beq(pre_val, R0, done); ++ __ delayed()->nop(); ++ ++ // Can we store original value in the thread's buffer? ++ // Is index == 0? ++ // (The index field is typed as size_t.) ++ ++ __ ld(tmp, index); ++ __ beq(tmp, R0, runtime); ++ __ delayed()->nop(); ++ ++ __ daddiu(tmp, tmp, -1 * wordSize); ++ __ sd(tmp, index); ++ __ ld(AT, buffer); ++ __ daddu(tmp, tmp, AT); ++ ++ // Record the previous value ++ __ sd(pre_val, tmp, 0); ++ __ beq(R0, R0, done); ++ __ delayed()->nop(); ++ ++ __ bind(runtime); ++ // save the live input values ++ if (tosca_live) __ push(V0); ++ ++ if (obj != noreg && obj != V0) __ push(obj); ++ ++ if (pre_val != V0) __ push(pre_val); ++ ++ // Calling the runtime using the regular call_VM_leaf mechanism generates ++ // code (generated by InterpreterMacroAssember::call_VM_leaf_base) ++ // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. ++ // ++ // If we care generating the pre-barrier without a frame (e.g. in the ++ // intrinsified Reference.get() routine) then ebp might be pointing to ++ // the caller frame and so this check will most likely fail at runtime. ++ // ++ // Expanding the call directly bypasses the generation of the check. ++ // So when we do not have have a full interpreter frame on the stack ++ // expand_call should be passed true. ++ ++ if (expand_call) { ++ assert(pre_val != A1, "smashed arg"); ++ if (thread != A1) __ move(A1, thread); ++ if (pre_val != A0) __ move(A0, pre_val); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } ++ ++ // save the live input values ++ if (pre_val != V0) ++ __ pop(pre_val); ++ ++ if (obj != noreg && obj != V0) ++ __ pop(obj); ++ ++ if (tosca_live) __ pop(V0); ++ ++ __ bind(done); ++} ++ ++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2) { ++ assert_different_registers(tmp, tmp2, AT); ++ assert(thread == TREG, "must be"); ++ ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); ++ ++ CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set()); ++ assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ ++ Label done; ++ Label runtime; ++ ++ // Does store cross heap regions? ++ __ xorr(AT, store_addr, new_val); ++ __ dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes); ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ ++ // crosses regions, storing NULL? ++ __ beq(new_val, R0, done); ++ __ delayed()->nop(); ++ ++ // storing region crossing non-NULL, is card already dirty? ++ const Register card_addr = tmp; ++ const Register cardtable = tmp2; ++ ++ __ move(card_addr, store_addr); ++ __ dsrl(card_addr, card_addr, CardTable::card_shift); ++ // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT ++ // a valid address and therefore is not properly handled by the relocation code. ++ __ set64(cardtable, (intptr_t)ct->card_table()->byte_map_base()); ++ __ daddu(card_addr, card_addr, cardtable); ++ ++ __ lb(AT, card_addr, 0); ++ __ daddiu(AT, AT, -1 * (int)G1CardTable::g1_young_card_val()); ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ ++ __ sync(); ++ __ lb(AT, card_addr, 0); ++ __ daddiu(AT, AT, -1 * (int)G1CardTable::dirty_card_val()); ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ ++ // storing a region crossing, non-NULL oop, card is clean. ++ // dirty card and log. ++ __ move(AT, (int)G1CardTable::dirty_card_val()); ++ __ sb(AT, card_addr, 0); ++ ++ __ lw(AT, queue_index); ++ __ beq(AT, R0, runtime); ++ __ delayed()->nop(); ++ __ daddiu(AT, AT, -1 * wordSize); ++ __ sw(AT, queue_index); ++ __ ld(tmp2, buffer); ++ __ ld(AT, queue_index); ++ __ daddu(tmp2, tmp2, AT); ++ __ sd(card_addr, tmp2, 0); ++ __ beq(R0, R0, done); ++ __ delayed()->nop(); ++ ++ __ bind(runtime); ++ // save the live input values ++ __ push(store_addr); ++ __ push(new_val); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, TREG); ++ __ pop(new_val); ++ __ pop(store_addr); ++ ++ __ bind(done); ++} ++ ++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool as_normal = (decorators & AS_NORMAL) != 0; ++ assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported"); ++ ++ bool needs_pre_barrier = as_normal; ++ bool needs_post_barrier = val != noreg && in_heap; ++ ++ Register tmp3 = RT3; ++ Register rthread = TREG; ++ // flatten object address if needed ++ // We do it regardless of precise because we need the registers ++ if (dst.index() == noreg && dst.disp() == 0) { ++ if (dst.base() != tmp3) { ++ __ move(tmp3, dst.base()); ++ } ++ } else { ++ __ lea(tmp3, dst); ++ } ++ ++ if (needs_pre_barrier) { ++ g1_write_barrier_pre(masm /*masm*/, ++ tmp3 /* obj */, ++ tmp2 /* pre_val */, ++ rthread /* thread */, ++ tmp1 /* tmp */, ++ val != noreg /* tosca_live */, ++ false /* expand_call */); ++ } ++ if (val == noreg) { ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg); ++ } else { ++ Register new_val = val; ++ if (needs_post_barrier) { ++ // G1 barrier needs uncompressed oop for region cross check. ++ if (UseCompressedOops) { ++ new_val = tmp2; ++ __ move(new_val, val); ++ } ++ } ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg); ++ if (needs_post_barrier) { ++ g1_write_barrier_post(masm /*masm*/, ++ tmp3 /* store_adr */, ++ new_val /* new_val */, ++ rthread /* thread */, ++ tmp1 /* tmp */, ++ tmp2 /* tmp2 */); ++ } ++ } ++} +diff --git a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp +new file mode 100644 +index 00000000000..ec5c243c3f1 +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp +@@ -0,0 +1,71 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP ++#define CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++class LIR_Assembler; ++class StubAssembler; ++class G1PreBarrierStub; ++class G1PostBarrierStub; ++ ++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { ++ protected: ++ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count); ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp); ++ ++ void g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); ++ ++ void g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2); ++ ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++ ++ public: ++ void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); ++ void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); ++ ++ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); ++ void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); ++ ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++}; ++ ++#endif // CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/gc/g1/g1Globals_mips.hpp b/src/hotspot/cpu/mips/gc/g1/g1Globals_mips.hpp +new file mode 100644 +index 00000000000..f0c7badaac7 +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/g1/g1Globals_mips.hpp +@@ -0,0 +1,30 @@ ++/* ++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#ifndef CPU_MIPS_GC_G1_G1GLOBALS_MIPS_HPP ++#define CPU_MIPS_GC_G1_G1GLOBALS_MIPS_HPP ++ ++const size_t G1MergeHeapRootsPrefetchCacheSize = 8; ++ ++#endif // CPU_MIPS_GC_G1_G1GLOBALS_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp +new file mode 100644 +index 00000000000..071debdc3a3 +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp +@@ -0,0 +1,194 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/jniHandles.hpp" ++#include "runtime/thread.hpp" ++ ++#define __ masm-> ++ ++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ bool is_not_null = (decorators & IS_NOT_NULL) != 0; ++ ++ switch (type) { ++ case T_OBJECT: ++ case T_ARRAY: { ++ if (in_heap) { ++ if (UseCompressedOops) { ++ __ lwu(dst, src); ++ if (is_not_null) { ++ __ decode_heap_oop_not_null(dst); ++ } else { ++ __ decode_heap_oop(dst); ++ } ++ } else ++ { ++ __ ld_ptr(dst, src); ++ } ++ } else { ++ assert(in_native, "why else?"); ++ __ ld_ptr(dst, src); ++ } ++ break; ++ } ++ case T_BOOLEAN: __ lbu (dst, src); break; ++ case T_BYTE: __ lb (dst, src); break; ++ case T_CHAR: __ lhu (dst, src); break; ++ case T_SHORT: __ lh (dst, src); break; ++ case T_INT: __ lw (dst, src); break; ++ case T_LONG: __ ld (dst, src); break; ++ case T_ADDRESS: __ ld_ptr(dst, src); break; ++ case T_FLOAT: ++ assert(dst == noreg, "only to ftos"); ++ __ lwc1(FSF, src); ++ break; ++ case T_DOUBLE: ++ assert(dst == noreg, "only to dtos"); ++ __ ldc1(FSF, src); ++ break; ++ default: Unimplemented(); ++ } ++} ++ ++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ bool is_not_null = (decorators & IS_NOT_NULL) != 0; ++ ++ switch (type) { ++ case T_OBJECT: ++ case T_ARRAY: { ++ if (in_heap) { ++ if (val == noreg) { ++ assert(!is_not_null, "inconsistent access"); ++ if (UseCompressedOops) { ++ __ sw(R0, dst); ++ } else { ++ __ sd(R0, dst); ++ } ++ } else { ++ if (UseCompressedOops) { ++ assert(!dst.uses(val), "not enough registers"); ++ if (is_not_null) { ++ __ encode_heap_oop_not_null(val); ++ } else { ++ __ encode_heap_oop(val); ++ } ++ __ sw(val, dst); ++ } else ++ { ++ __ st_ptr(val, dst); ++ } ++ } ++ } else { ++ assert(in_native, "why else?"); ++ assert(val != noreg, "not supported"); ++ __ st_ptr(val, dst); ++ } ++ break; ++ } ++ case T_BOOLEAN: ++ __ andi(val, val, 0x1); // boolean is true if LSB is 1 ++ __ sb(val, dst); ++ break; ++ case T_BYTE: ++ __ sb(val, dst); ++ break; ++ case T_SHORT: ++ __ sh(val, dst); ++ break; ++ case T_CHAR: ++ __ sh(val, dst); ++ break; ++ case T_INT: ++ __ sw(val, dst); ++ break; ++ case T_LONG: ++ __ sd(val, dst); ++ break; ++ case T_FLOAT: ++ assert(val == noreg, "only tos"); ++ __ swc1(FSF, dst); ++ break; ++ case T_DOUBLE: ++ assert(val == noreg, "only tos"); ++ __ sdc1(FSF, dst); ++ break; ++ case T_ADDRESS: ++ __ st_ptr(val, dst); ++ break; ++ default: Unimplemented(); ++ } ++} ++ ++void BarrierSetAssembler::obj_equals(MacroAssembler* masm, ++ Register obj1, Address obj2) { ++ Unimplemented(); ++} ++ ++void BarrierSetAssembler::obj_equals(MacroAssembler* masm, ++ Register obj1, Register obj2) { ++ Unimplemented(); ++} ++ ++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath) { ++ __ clear_jweak_tag(obj); ++ __ ld_ptr(obj, Address(obj, 0)); ++} ++ ++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, ++ Register thread, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Register t2, ++ Label& slow_case) { ++ Unimplemented(); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, ++ Register thread, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Label& slow_case) { ++ Unimplemented(); ++} ++ ++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, Register thread, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1) { ++ Unimplemented(); ++} +diff --git a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp +new file mode 100644 +index 00000000000..bc68de604d2 +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp +@@ -0,0 +1,84 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP ++#define CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetNMethod.hpp" ++#include "memory/allocation.hpp" ++#include "oops/access.hpp" ++ ++class InterpreterMacroAssembler; ++ ++class BarrierSetAssembler: public CHeapObj { ++private: ++ void incr_allocated_bytes(MacroAssembler* masm, Register thread, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1); ++ ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch = NOREG) {} ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch = NOREG) {} ++ ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++ ++ virtual void obj_equals(MacroAssembler* masm, ++ Register obj1, Register obj2); ++ virtual void obj_equals(MacroAssembler* masm, ++ Register obj1, Address obj2); ++ ++ virtual void resolve(MacroAssembler* masm, DecoratorSet decorators, Register obj) { ++ // Default implementation does not need to do anything. ++ } ++ ++ // Support for jniFastGetField to try resolving a jobject/jweak in native ++ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath); ++ ++ virtual void tlab_allocate(MacroAssembler* masm, ++ Register thread, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, Register t2, ++ Label& slow_case); ++ virtual void eden_allocate(MacroAssembler* masm, ++ Register thread, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Label& slow_case); ++ ++ virtual void barrier_stubs_init() {} ++}; ++ ++#endif // CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/gc/shared/barrierSetNMethod_mips.cpp b/src/hotspot/cpu/mips/gc/shared/barrierSetNMethod_mips.cpp +new file mode 100644 +index 00000000000..3d4e69333b0 +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/shared/barrierSetNMethod_mips.cpp +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc/shared/barrierSetNMethod.hpp" ++#include "utilities/debug.hpp" ++ ++void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { ++ ShouldNotReachHere(); ++} ++ ++void BarrierSetNMethod::disarm(nmethod* nm) { ++ ShouldNotReachHere(); ++} ++ ++bool BarrierSetNMethod::is_armed(nmethod* nm) { ++ ShouldNotReachHere(); ++ return false; ++} +diff --git a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp +new file mode 100644 +index 00000000000..1b2002fd040 +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp +@@ -0,0 +1,144 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/cardTableBarrierSetAssembler.hpp" ++ ++#define __ masm-> ++ ++#define T9 RT9 ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) ++ ++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp) { ++ BarrierSet *bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ intptr_t disp = (intptr_t) ct->byte_map_base(); ++ ++ Label L_loop, L_done; ++ const Register end = count; ++ assert_different_registers(addr, end); ++ ++ __ beq(count, R0, L_done); // zero count - nothing to do ++ __ delayed()->nop(); ++ ++ __ set64(tmp, disp); ++ ++ __ lea(end, Address(addr, count, TIMES_OOP, 0)); // end == addr+count*oop_size ++ __ daddiu(end, end, -BytesPerHeapOop); // end - 1 to make inclusive ++ __ shr(addr, CardTable::card_shift); ++ __ shr(end, CardTable::card_shift); ++ __ dsubu(end, end, addr); // end --> cards count ++ ++ __ daddu(addr, addr, tmp); ++ ++ __ BIND(L_loop); ++ if (UseLEXT1) { ++ __ gssbx(R0, addr, count, 0); ++ } else { ++ __ daddu(AT, addr, count); ++ __ sb(R0, AT, 0); ++ } ++ __ daddiu(count, count, -1); ++ __ bgez(count, L_loop); ++ __ delayed()->nop(); ++ ++ __ BIND(L_done); ++} ++ ++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) { ++ // Does a store check for the oop in register obj. The content of ++ // register obj is destroyed afterwards. ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ ++ __ shr(obj, CardTable::card_shift); ++ ++ Address card_addr; ++ ++ intptr_t byte_map_base = (intptr_t)ct->byte_map_base(); ++ Register tmp = T9; ++ assert_different_registers(tmp, obj); ++ __ li(tmp, byte_map_base); ++ __ addu(tmp, tmp, obj); ++ ++ assert(CardTable::dirty_card_val() == 0, "must be"); ++ ++ jbyte dirty = CardTable::dirty_card_val(); ++ if (UseCondCardMark) { ++ Untested("Untested"); ++ __ warn("store_check Untested"); ++ Label L_already_dirty; ++ __ membar(Assembler::StoreLoad); ++ __ lb(AT, tmp, 0); ++ __ addiu(AT, AT, -1 * dirty); ++ __ beq(AT, R0, L_already_dirty); ++ __ delayed()->nop(); ++ __ sb(R0, tmp, 0); ++ __ bind(L_already_dirty); ++ } else { ++ __ sb(R0, tmp, 0); ++ } ++} ++ ++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ ++ bool is_array = (decorators & IS_ARRAY) != 0; ++ bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; ++ bool precise = is_array || on_anonymous; ++ ++ bool needs_post_barrier = val != noreg && in_heap; ++ ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg); ++ if (needs_post_barrier) { ++ // flatten object address if needed ++ if (!precise || (dst.index() == noreg && dst.disp() == 0)) { ++ store_check(masm, dst.base(), dst); ++ } else { ++ __ lea(tmp1, dst); ++ store_check(masm, tmp1, dst); ++ } ++ } ++} +diff --git a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp +new file mode 100644 +index 00000000000..49c2a0ea80e +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP ++#define CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { ++protected: ++ void store_check(MacroAssembler* masm, Register obj, Address dst); ++ ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp); ++ ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++}; ++ ++#endif // CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp +new file mode 100644 +index 00000000000..765259e6266 +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp +@@ -0,0 +1,53 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++#define __ masm-> ++ ++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch) { ++ if (is_oop) { ++ gen_write_ref_array_pre_barrier(masm, decorators, dst, count); ++ } ++} ++ ++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch) { ++ if (is_oop) { ++ gen_write_ref_array_post_barrier(masm, decorators, dst, count, scratch); ++ } ++} ++ ++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ if (type == T_OBJECT || type == T_ARRAY) { ++ oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ } else { ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ } ++} +diff --git a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp +new file mode 100644 +index 00000000000..5320a4c0add +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP ++#define CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++ ++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other ++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected ++// accesses, which are overridden in the concrete BarrierSetAssembler. ++ ++class ModRefBarrierSetAssembler: public BarrierSetAssembler { ++protected: ++ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count) {} ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp) {} ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) = 0; ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch = NOREG); ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch = NOREG); ++ ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++}; ++ ++#endif // CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/globalDefinitions_mips.hpp b/src/hotspot/cpu/mips/globalDefinitions_mips.hpp +new file mode 100644 +index 00000000000..2b50d15ffd7 +--- /dev/null ++++ b/src/hotspot/cpu/mips/globalDefinitions_mips.hpp +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP ++#define CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP ++// Size of MIPS Instructions ++const int BytesPerInstWord = 4; ++ ++const int StackAlignmentInBytes = (2*wordSize); ++ ++// Indicates whether the C calling conventions require that ++// 32-bit integer argument values are properly extended to 64 bits. ++// If set, SharedRuntime::c_calling_convention() must adapt ++// signatures accordingly. ++const bool CCallingConventionRequiresIntsAsLongs = false; ++ ++#define SUPPORTS_NATIVE_CX8 ++ ++#define SUPPORT_RESERVED_STACK_AREA ++ ++#define PREFERRED_METASPACE_ALIGNMENT ++ ++#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false ++ ++#endif // CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/globals_mips.hpp b/src/hotspot/cpu/mips/globals_mips.hpp +new file mode 100644 +index 00000000000..2d88d370c94 +--- /dev/null ++++ b/src/hotspot/cpu/mips/globals_mips.hpp +@@ -0,0 +1,132 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_GLOBALS_MIPS_HPP ++#define CPU_MIPS_VM_GLOBALS_MIPS_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++define_pd_global(bool, ShareVtableStubs, true); ++define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this ++ ++define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks ++define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on x86. ++define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast ++ ++define_pd_global(uintx, CodeCacheSegmentSize, 64); ++define_pd_global(intx, CodeEntryAlignment, 16); ++define_pd_global(intx, OptoLoopAlignment, 16); ++define_pd_global(intx, InlineFrequencyCount, 100); ++// MIPS generates 3x instructions than X86 ++define_pd_global(intx, InlineSmallCode, 4000); ++ ++#define DEFAULT_STACK_YELLOW_PAGES (2) ++#define DEFAULT_STACK_RED_PAGES (1) ++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+4)) ++#define DEFAULT_STACK_RESERVED_PAGES (1) ++define_pd_global(uintx, TLABSize, 0); ++define_pd_global(uintx, NewSize, 1024 * K); ++define_pd_global(intx, PreInflateSpin, 10); ++ ++define_pd_global(intx, PrefetchCopyIntervalInBytes, -1); ++define_pd_global(intx, PrefetchScanIntervalInBytes, -1); ++define_pd_global(intx, PrefetchFieldsAhead, -1); ++ ++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES ++#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES ++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES ++#define MIN_STACK_RESERVED_PAGES (0) ++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); ++ ++define_pd_global(intx, StackYellowPages, 2); ++define_pd_global(intx, StackRedPages, 1); ++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); ++ ++define_pd_global(bool, RewriteBytecodes, true); ++define_pd_global(bool, RewriteFrequentPairs, true); ++ ++define_pd_global(uintx, TypeProfileLevel, 111); ++ ++define_pd_global(bool, CompactStrings, true); ++ ++define_pd_global(bool, PreserveFramePointer, false); ++ ++define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); ++ ++// Only c2 cares about this at the moment ++define_pd_global(intx, AllocatePrefetchStyle, 2); ++define_pd_global(intx, AllocatePrefetchDistance, -1); ++ ++#define ARCH_FLAGS(develop, \ ++ product, \ ++ notproduct, \ ++ range, \ ++ constraint) \ ++ \ ++ product(bool, UseLEXT1, false, \ ++ "Use LoongISA general EXTensions 1") \ ++ \ ++ product(bool, UseLEXT2, false, \ ++ "Use LoongISA general EXTensions 2") \ ++ \ ++ product(bool, UseLEXT3, false, \ ++ "Use LoongISA general EXTensions 3") \ ++ \ ++ product(bool, UseCodeCacheAllocOpt, true, \ ++ "Allocate code cache within 32-bit memory address space") \ ++ \ ++ product(intx, UseSyncLevel, 10000, \ ++ "The sync level on Loongson CPUs" \ ++ "UseSyncLevel == 10000, 111, for all Loongson CPUs, " \ ++ "UseSyncLevel == 4000, 101, maybe for GS464V" \ ++ "UseSyncLevel == 3000, 001, maybe for GS464V" \ ++ "UseSyncLevel == 2000, 011, maybe for GS464E/GS264" \ ++ "UseSyncLevel == 1000, 110, maybe for GS464") \ ++ \ ++ develop(bool, UseBoundCheckInstruction, false, \ ++ "Use bound check instruction") \ ++ \ ++ product(intx, SetFSFOFN, 999, \ ++ "Set the FS/FO/FN bits in FCSR" \ ++ "999 means FS/FO/FN will not be changed" \ ++ "=XYZ, with X:FS, Y:FO, Z:FN, X, Y and Z in 0=off, 1=on") \ ++ \ ++ /* assembler */ \ ++ product(bool, UseCountLeadingZerosInstructionMIPS64, true, \ ++ "Use count leading zeros instruction") \ ++ \ ++ product(bool, UseCountTrailingZerosInstructionMIPS64, false, \ ++ "Use count trailing zeros instruction") \ ++ \ ++ product(bool, UseActiveCoresMP, false, \ ++ "Eliminate barriers for single active cpu") ++ ++// end of ARCH_FLAGS ++ ++#endif // CPU_MIPS_VM_GLOBALS_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/icBuffer_mips.cpp b/src/hotspot/cpu/mips/icBuffer_mips.cpp +new file mode 100644 +index 00000000000..604e951a9bf +--- /dev/null ++++ b/src/hotspot/cpu/mips/icBuffer_mips.cpp +@@ -0,0 +1,96 @@ ++/* ++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/icBuffer.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/bytecodes.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/oop.inline.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++int InlineCacheBuffer::ic_stub_code_size() { ++ return NativeMovConstReg::instruction_size + ++ NativeGeneralJump::instruction_size + ++ 1; ++ // so that code_end can be set in CodeBuffer ++ // 64bit 15 = 6 + 8 bytes + 1 byte ++ // 32bit 7 = 2 + 4 bytes + 1 byte ++} ++ ++ ++// we use T1 as cached oop(klass) now. this is the target of virtual call, ++// when reach here, the receiver in T0 ++// refer to shareRuntime_mips.cpp,gen_i2c2i_adapters ++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { ++ ResourceMark rm; ++ CodeBuffer code(code_begin, ic_stub_code_size()); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ // note: even though the code contains an embedded oop, we do not need reloc info ++ // because ++ // (1) the oop is old (i.e., doesn't matter for scavenges) ++ // (2) these ICStubs are removed *before* a GC happens, so the roots disappear ++// assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop"); ++#define __ masm-> ++ __ patchable_set48(T1, (long)cached_value); ++ ++ __ patchable_jump(entry_point); ++ __ flush(); ++#undef __ ++} ++ ++ ++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object ++ NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); ++ return jump->jump_destination(); ++} ++ ++ ++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { ++ // creation also verifies the object ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); ++ // Verifies the jump ++ NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); ++ void* o= (void*)move->data(); ++ return o; ++} +diff --git a/src/hotspot/cpu/mips/icache_mips.cpp b/src/hotspot/cpu/mips/icache_mips.cpp +new file mode 100644 +index 00000000000..848964b63f6 +--- /dev/null ++++ b/src/hotspot/cpu/mips/icache_mips.cpp +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "runtime/icache.hpp" ++ ++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) ++{ ++#define __ _masm-> ++ StubCodeMark mark(this, "ICache", "flush_icache_stub"); ++ address start = __ pc(); ++ ++ __ jr_hb(RA); ++ __ delayed()->ori(V0, RA2, 0); ++ ++ *flush_icache_stub = (ICache::flush_icache_stub_t)start; ++#undef __ ++} +diff --git a/src/hotspot/cpu/mips/icache_mips.hpp b/src/hotspot/cpu/mips/icache_mips.hpp +new file mode 100644 +index 00000000000..f90dee6eef7 +--- /dev/null ++++ b/src/hotspot/cpu/mips/icache_mips.hpp +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_ICACHE_MIPS_HPP ++#define CPU_MIPS_VM_ICACHE_MIPS_HPP ++ ++// Interface for updating the instruction cache. Whenever the VM modifies ++// code, part of the processor instruction cache potentially has to be flushed. ++ ++class ICache : public AbstractICache { ++ public: ++ enum { ++ stub_size = 2 * BytesPerInstWord, // Size of the icache flush stub in bytes ++ line_size = 32, // flush instruction affects a dword ++ log2_line_size = 5 // log2(line_size) ++ }; ++}; ++ ++#endif // CPU_MIPS_VM_ICACHE_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/interp_masm_mips.hpp b/src/hotspot/cpu/mips/interp_masm_mips.hpp +new file mode 100644 +index 00000000000..1b9aa653fd4 +--- /dev/null ++++ b/src/hotspot/cpu/mips/interp_masm_mips.hpp +@@ -0,0 +1,266 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP ++#define CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP ++ ++#include "asm/assembler.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "interpreter/invocationCounter.hpp" ++#include "runtime/frame.hpp" ++ ++// This file specializes the assember with interpreter-specific macros ++ ++ ++class InterpreterMacroAssembler: public MacroAssembler { ++ private: ++ ++ Register _locals_register; // register that contains the pointer to the locals ++ Register _bcp_register; // register that contains the bcp ++ ++ protected: ++ // Interpreter specific version of call_VM_base ++ virtual void call_VM_leaf_base(address entry_point, ++ int number_of_arguments); ++ ++ virtual void call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions); ++ ++ // base routine for all dispatches ++ void dispatch_base(TosState state, address* table, bool verifyoop = true, bool generate_poll = false); ++ ++ public: ++ void jump_to_entry(address entry); ++ // narrow int return value ++ void narrow(Register result); ++ ++ InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {} ++ ++ void get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset); ++ void get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset); ++ ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); ++ ++ void load_earlyret_value(TosState state); ++ ++ // Interpreter-specific registers ++ void save_bcp() { ++ sd(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize); ++ } ++ ++ void restore_bcp() { ++ ld(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize); ++ } ++ ++ void restore_locals() { ++ ld(LVP, FP, frame::interpreter_frame_locals_offset * wordSize); ++ } ++ ++ // Helpers for runtime call arguments/results ++ void get_method(Register reg) { ++ ld(reg, FP, frame::interpreter_frame_method_offset * wordSize); ++ } ++ ++ void get_const(Register reg){ ++ get_method(reg); ++ ld(reg, reg, in_bytes(Method::const_offset())); ++ } ++ ++ void get_constant_pool(Register reg) { ++ get_const(reg); ++ ld(reg, reg, in_bytes(ConstMethod::constants_offset())); ++ } ++ ++ void get_constant_pool_cache(Register reg) { ++ get_constant_pool(reg); ++ ld(reg, reg, ConstantPool::cache_offset_in_bytes()); ++ } ++ ++ void get_cpool_and_tags(Register cpool, Register tags) { ++ get_constant_pool(cpool); ++ ld(tags, cpool, ConstantPool::tags_offset_in_bytes()); ++ } ++ ++ void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); ++ void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_method_counters(Register method, Register mcs, Label& skip); ++ ++ // load cpool->resolved_references(index); ++ void load_resolved_reference_at_index(Register result, Register index, Register tmp); ++ ++ // load cpool->resolved_klass_at(index) ++ void load_resolved_klass_at_index(Register cpool, // the constant pool (corrupted on return) ++ Register index, // the constant pool index (corrupted on return) ++ Register klass); // contains the Klass on return ++ ++ void load_resolved_method_at_index(int byte_no, ++ Register method, ++ Register cache, ++ Register index); ++ ++ void pop_ptr( Register r = FSR); ++ void pop_i( Register r = FSR); ++ void pop_l( Register r = FSR); ++ void pop_f(FloatRegister r = FSF); ++ void pop_d(FloatRegister r = FSF); ++ ++ void push_ptr( Register r = FSR); ++ void push_i( Register r = FSR); ++ void push_l( Register r = FSR); ++ void push_f(FloatRegister r = FSF); ++ void push_d(FloatRegister r = FSF); ++ ++ void pop(Register r ) { ((MacroAssembler*)this)->pop(r); } ++ ++ void push(Register r ) { ((MacroAssembler*)this)->push(r); } ++ ++ void pop(TosState state); // transition vtos -> state ++ void push(TosState state); // transition state -> vtos ++ ++ void empty_expression_stack() { ++ ld(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ // NULL last_sp until next java call ++ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ } ++ ++ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls ++ void load_ptr(int n, Register val); ++ void store_ptr(int n, Register val); ++ ++ // Generate a subtype check: branch to ok_is_subtype if sub_klass is ++ // a subtype of super_klass. ++ //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); ++ void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype ); ++ ++ // Dispatching ++ void dispatch_prolog(TosState state, int step = 0); ++ void dispatch_epilog(TosState state, int step = 0); ++ void dispatch_only(TosState state, bool generate_poll = false); ++ void dispatch_only_normal(TosState state); ++ void dispatch_only_noverify(TosState state); ++ void dispatch_next(TosState state, int step = 0, bool generate_poll = false); ++ void dispatch_via (TosState state, address* table); ++ ++ // jump to an invoked target ++ void prepare_to_jump_from_interpreted(); ++ void jump_from_interpreted(Register method, Register temp); ++ ++ ++ // Returning from interpreted functions ++ // ++ // Removes the current activation (incl. unlocking of monitors) ++ // and sets up the return address. This code is also used for ++ // exception unwindwing. In that case, we do not want to throw ++ // IllegalMonitorStateExceptions, since that might get us into an ++ // infinite rethrow exception loop. ++ // Additionally this code is used for popFrame and earlyReturn. ++ // In popFrame case we want to skip throwing an exception, ++ // installing an exception, and notifying jvmdi. ++ // In earlyReturn case we only want to skip throwing an exception ++ // and installing an exception. ++ void remove_activation(TosState state, Register ret_addr, ++ bool throw_monitor_exception = true, ++ bool install_monitor_exception = true, ++ bool notify_jvmdi = true); ++ ++ // Object locking ++ void lock_object (Register lock_reg); ++ void unlock_object(Register lock_reg); ++ ++ // Interpreter profiling operations ++ void set_method_data_pointer_for_bcp(); ++ void test_method_data_pointer(Register mdp, Label& zero_continue); ++ void verify_method_data_pointer(); ++ ++ void set_mdp_data_at(Register mdp_in, int constant, Register value); ++ void increment_mdp_data_at(Address data, bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, int constant, ++ bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, Register reg, int constant, ++ bool decrement = false); ++ void increment_mask_and_jump(Address counter_addr, ++ int increment, Address mask, ++ Register scratch, bool preloaded, ++ Condition cond, Label* where); ++ void set_mdp_flag_at(Register mdp_in, int flag_constant); ++ void test_mdp_data_at(Register mdp_in, int offset, Register value, ++ Register test_value_out, ++ Label& not_equal_continue); ++ ++ void record_klass_in_profile(Register receiver, Register mdp, ++ Register reg2, bool is_virtual_call); ++ void record_klass_in_profile_helper(Register receiver, Register mdp, ++ Register reg2, int start_row, ++ Label& done, bool is_virtual_call); ++ ++ void update_mdp_by_offset(Register mdp_in, int offset_of_offset); ++ void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); ++ void update_mdp_by_constant(Register mdp_in, int constant); ++ void update_mdp_for_ret(Register return_bci); ++ ++ void profile_taken_branch(Register mdp, Register bumped_count); ++ void profile_not_taken_branch(Register mdp); ++ void profile_call(Register mdp); ++ void profile_final_call(Register mdp); ++ void profile_virtual_call(Register receiver, Register mdp, ++ Register scratch2, ++ bool receiver_can_be_null = false); ++ void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN; ++ void profile_ret(Register return_bci, Register mdp); ++ void profile_null_seen(Register mdp); ++ void profile_typecheck(Register mdp, Register klass, Register scratch); ++ void profile_typecheck_failed(Register mdp); ++ void profile_switch_default(Register mdp); ++ void profile_switch_case(Register index_in_scratch, Register mdp, ++ Register scratch2); ++ ++ // Debugging ++ // only if +VerifyOops && state == atos ++ void verify_oop(Register reg, TosState state = atos); ++ // only if +VerifyFPU && (state == ftos || state == dtos) ++ void verify_FPU(int stack_depth, TosState state = ftos); ++ ++ void profile_obj_type(Register obj, const Address& mdo_addr); ++ void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); ++ void profile_return_type(Register mdp, Register ret, Register tmp); ++ void profile_parameters_type(Register mdp, Register tmp1, Register tmp2); ++ ++ typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; ++ ++ // support for jvmti/dtrace ++ void notify_method_entry(); ++ void notify_method_exit(TosState state, NotifyMethodExitMode mode); ++}; ++ ++#endif // CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP +diff --git a/src/hotspot/cpu/mips/interp_masm_mips_64.cpp b/src/hotspot/cpu/mips/interp_masm_mips_64.cpp +new file mode 100644 +index 00000000000..732325fdbd4 +--- /dev/null ++++ b/src/hotspot/cpu/mips/interp_masm_mips_64.cpp +@@ -0,0 +1,2140 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interp_masm_mips.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/markWord.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/thread.inline.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of InterpreterMacroAssembler ++ ++void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) { ++ // The runtime address of BCP may be unaligned. ++ // Refer to the SPARC implementation. ++ lbu(reg, BCP, offset+1); ++ lbu(tmp, BCP, offset); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++} ++ ++void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset) { ++ assert(reg != tmp, "need separate temp register"); ++ if (offset & 3) { // Offset unaligned? ++ lbu(reg, BCP, offset+3); ++ lbu(tmp, BCP, offset+2); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++ lbu(tmp, BCP, offset+1); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++ lbu(tmp, BCP, offset); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++ } else { ++ lwu(reg, BCP, offset); ++ } ++} ++ ++void InterpreterMacroAssembler::jump_to_entry(address entry) { ++ assert(entry, "Entry must have been generated by now"); ++ jmp(entry); ++} ++ ++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, ++ int number_of_arguments) { ++ // interpreter specific ++ // ++ // Note: No need to save/restore bcp & locals (r13 & r14) pointer ++ // since these are callee saved registers and no blocking/ ++ // GC can happen in leaf calls. ++ // Further Note: DO NOT save/restore bcp/locals. If a caller has ++ // already saved them so that it can use BCP/LVP as temporaries ++ // then a save/restore here will DESTROY the copy the caller ++ // saved! There used to be a save_bcp() that only happened in ++ // the ASSERT path (no restore_bcp). Which caused bizarre failures ++ // when jvm built with ASSERTs. ++#ifdef ASSERT ++ save_bcp(); ++ { ++ Label L; ++ ld(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize); ++ beq(AT,R0,L); ++ delayed()->nop(); ++ stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL"); ++ bind(L); ++ } ++#endif ++ // super call ++ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); ++ // interpreter specific ++ // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals ++ // but since they may not have been saved (and we don't want to ++ // save them here (see note above) the assert is invalid. ++} ++ ++void InterpreterMacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ // interpreter specific ++ // ++ // Note: Could avoid restoring locals ptr (callee saved) - however doesn't ++ // really make a difference for these runtime calls, since they are ++ // slow anyway. Btw., bcp must be saved/restored since it may change ++ // due to GC. ++ assert(java_thread == noreg , "not expecting a precomputed java thread"); ++ save_bcp(); ++#ifdef ASSERT ++ { ++ Label L; ++ ld(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL"); ++ bind(L); ++ } ++#endif /* ASSERT */ ++ // super call ++ MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp, ++ entry_point, number_of_arguments, ++ check_exceptions); ++ // interpreter specific ++ restore_bcp(); ++ restore_locals(); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { ++ if (JvmtiExport::can_pop_frame()) { ++ Label L; ++ // Initiate popframe handling only if it is not already being ++ // processed. If the flag has the popframe_processing bit set, it ++ // means that this code is called *during* popframe handling - we ++ // don't want to reenter. ++ // This method is only called just after the call into the vm in ++ // call_VM_base, so the arg registers are available. ++ // Not clear if any other register is available, so load AT twice ++ assert(AT != java_thread, "check"); ++ lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); ++ andi(AT, AT, JavaThread::popframe_pending_bit); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ ++ lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); ++ andi(AT, AT, JavaThread::popframe_processing_bit); ++ bne(AT, R0, L); ++ delayed()->nop(); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); ++ jr(V0); ++ delayed()->nop(); ++ bind(L); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::load_earlyret_value(TosState state) { ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ld_ptr(T8, thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ const Address tos_addr (T8, in_bytes(JvmtiThreadState::earlyret_tos_offset())); ++ const Address oop_addr (T8, in_bytes(JvmtiThreadState::earlyret_oop_offset())); ++ const Address val_addr (T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ //V0, oop_addr,V1,val_addr ++ switch (state) { ++ case atos: ++ ld_ptr(V0, oop_addr); ++ st_ptr(R0, oop_addr); ++ verify_oop(V0, state); ++ break; ++ case ltos: ++ ld_ptr(V0, val_addr); // fall through ++ break; ++ case btos: // fall through ++ case ztos: // fall through ++ case ctos: // fall through ++ case stos: // fall through ++ case itos: ++ lw(V0, val_addr); ++ break; ++ case ftos: ++ lwc1(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ break; ++ case dtos: ++ ldc1(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ break; ++ case vtos: /* nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ // Clean up tos value in the thread object ++ move(AT, (int)ilgl); ++ sw(AT, tos_addr); ++ sw(R0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { ++ if (JvmtiExport::can_force_early_return()) { ++ Label L; ++ Register tmp = T9; ++ ++ assert(java_thread != AT, "check"); ++ assert(java_thread != tmp, "check"); ++ ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ ++ // Initiate earlyret handling only if it is not already being processed. ++ // If the flag has the earlyret_processing bit set, it means that this code ++ // is called *during* earlyret handling - we don't want to reenter. ++ lw(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset())); ++ move(tmp, JvmtiThreadState::earlyret_pending); ++ bne(tmp, AT, L); ++ delayed()->nop(); ++ ++ // Call Interpreter::remove_activation_early_entry() to get the address of the ++ // same-named entrypoint in the generated interpreter code. ++ ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ lw(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset())); ++ move(A0, AT); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0); ++ jr(V0); ++ delayed()->nop(); ++ bind(L); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, ++ int bcp_offset) { ++ assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); ++ lbu(AT, BCP, bcp_offset); ++ lbu(reg, BCP, bcp_offset + 1); ++ ins(reg, AT, 8, 8); ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ if (index_size == sizeof(u2)) { ++ get_2_byte_integer_at_bcp(index, AT, bcp_offset); ++ } else if (index_size == sizeof(u4)) { ++ get_4_byte_integer_at_bcp(index, AT, bcp_offset); ++ // Check if the secondary index definition is still ~x, otherwise ++ // we have to change the following assembler code to calculate the ++ // plain index. ++ assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); ++ nor(index, index, R0); ++ sll(index, index, 0); ++ } else if (index_size == sizeof(u1)) { ++ lbu(index, BCP, bcp_offset); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, ++ Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert_different_registers(cache, index); ++ get_cache_index_at_bcp(index, bcp_offset, index_size); ++ ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line"); ++ shl(index, 2); ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, ++ Register index, ++ Register bytecode, ++ int byte_no, ++ int bcp_offset, ++ size_t index_size) { ++ get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); ++ // We use a 32-bit load here since the layout of 64-bit words on ++ // little-endian machines allow us that. ++ dsll(AT, index, Address::times_ptr); ++ daddu(AT, cache, AT); ++ lw(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); ++ if(os::is_MP()) { ++ sync(); // load acquire ++ } ++ ++ const int shift_count = (1 + byte_no) * BitsPerByte; ++ assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) || ++ (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift), ++ "correct shift count"); ++ dsrl(bytecode, bytecode, shift_count); ++ assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask"); ++ move(AT, ConstantPoolCacheEntry::bytecode_1_mask); ++ andr(bytecode, bytecode, AT); ++} ++ ++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, ++ Register tmp, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ assert(cache != tmp, "must use different register"); ++ get_cache_index_at_bcp(tmp, bcp_offset, index_size); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ // convert from field index to ConstantPoolCacheEntry index ++ // and from word offset to byte offset ++ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); ++ shl(tmp, 2 + LogBytesPerWord); ++ ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize); ++ // skip past the header ++ daddiu(cache, cache, in_bytes(ConstantPoolCache::base_offset())); ++ daddu(cache, cache, tmp); ++} ++ ++void InterpreterMacroAssembler::get_method_counters(Register method, ++ Register mcs, Label& skip) { ++ Label has_counters; ++ ld(mcs, method, in_bytes(Method::method_counters_offset())); ++ bne(mcs, R0, has_counters); ++ delayed()->nop(); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::build_method_counters), method); ++ ld(mcs, method, in_bytes(Method::method_counters_offset())); ++ beq(mcs, R0, skip); // No MethodCounters allocated, OutOfMemory ++ delayed()->nop(); ++ bind(has_counters); ++} ++ ++// Load object from cpool->resolved_references(index) ++void InterpreterMacroAssembler::load_resolved_reference_at_index( ++ Register result, Register index, Register tmp) { ++ assert_different_registers(result, index); ++ // convert from field index to resolved_references() index and from ++ // word index to byte offset. Since this is a java object, it can be compressed ++ shl(index, LogBytesPerHeapOop); ++ ++ get_constant_pool(result); ++ // load pointer for resolved_references[] objArray ++ ld(result, result, ConstantPool::cache_offset_in_bytes()); ++ ld(result, result, ConstantPoolCache::resolved_references_offset_in_bytes()); ++ resolve_oop_handle(result, tmp); ++ // Add in the index ++ daddu(result, result, index); ++ load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), tmp); ++} ++ ++// load cpool->resolved_klass_at(index) ++void InterpreterMacroAssembler::load_resolved_klass_at_index(Register cpool, ++ Register index, Register klass) { ++ dsll(AT, index, Address::times_ptr); ++ if (UseLEXT1 && Assembler::is_simm(sizeof(ConstantPool), 8)) { ++ gslhx(index, cpool, AT, sizeof(ConstantPool)); ++ } else { ++ daddu(AT, cpool, AT); ++ lh(index, AT, sizeof(ConstantPool)); ++ } ++ Register resolved_klasses = cpool; ++ ld_ptr(resolved_klasses, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); ++ dsll(AT, index, Address::times_ptr); ++ daddu(AT, resolved_klasses, AT); ++ ld(klass, AT, Array::base_offset_in_bytes()); ++} ++ ++void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no, ++ Register method, ++ Register cache, ++ Register index) { ++ const int method_offset = in_bytes( ++ ConstantPoolCache::base_offset() + ++ ((byte_no == TemplateTable::f2_byte) ++ ? ConstantPoolCacheEntry::f2_offset() ++ : ConstantPoolCacheEntry::f1_offset())); ++ ++ ld(method, Address(cache, index, Address::times_ptr, method_offset)); // get f1 Method* ++} ++ ++// Resets LVP to locals. Register sub_klass cannot be any of the above. ++void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) { ++ assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" ); ++ assert( Rsub_klass != T1, "T1 holds 2ndary super array length" ); ++ assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" ); ++ // Profile the not-null value's klass. ++ // Here T9 and T1 are used as temporary registers. ++ profile_typecheck(T9, Rsub_klass, T1); // blows T9, reloads T1 ++ ++ // Do the check. ++ check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1 ++ ++ // Profile the failure of the check. ++ profile_typecheck_failed(T9); // blows T9 ++} ++ ++ ++ ++// Java Expression Stack ++ ++void InterpreterMacroAssembler::pop_ptr(Register r) { ++ ld(r, SP, 0); ++ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_i(Register r) { ++ lw(r, SP, 0); ++ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_l(Register r) { ++ ld(r, SP, 0); ++ daddiu(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_f(FloatRegister r) { ++ lwc1(r, SP, 0); ++ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_d(FloatRegister r) { ++ ldc1(r, SP, 0); ++ daddiu(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_ptr(Register r) { ++ daddiu(SP, SP, - Interpreter::stackElementSize); ++ sd(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_i(Register r) { ++ // For compatibility reason, don't change to sw. ++ daddiu(SP, SP, - Interpreter::stackElementSize); ++ sd(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_l(Register r) { ++ daddiu(SP, SP, -2 * Interpreter::stackElementSize); ++ sd(r, SP, 0); ++ sd(R0, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_f(FloatRegister r) { ++ daddiu(SP, SP, - Interpreter::stackElementSize); ++ swc1(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_d(FloatRegister r) { ++ daddiu(SP, SP, -2 * Interpreter::stackElementSize); ++ sdc1(r, SP, 0); ++ sd(R0, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop(TosState state) { ++ switch (state) { ++ case atos: pop_ptr(); break; ++ case btos: ++ case ztos: ++ case ctos: ++ case stos: ++ case itos: pop_i(); break; ++ case ltos: pop_l(); break; ++ case ftos: pop_f(); break; ++ case dtos: pop_d(); break; ++ case vtos: /* nothing to do */ break; ++ default: ShouldNotReachHere(); ++ } ++ verify_oop(FSR, state); ++} ++ ++//FSR=V0,SSR=V1 ++void InterpreterMacroAssembler::push(TosState state) { ++ verify_oop(FSR, state); ++ switch (state) { ++ case atos: push_ptr(); break; ++ case btos: ++ case ztos: ++ case ctos: ++ case stos: ++ case itos: push_i(); break; ++ case ltos: push_l(); break; ++ case ftos: push_f(); break; ++ case dtos: push_d(); break; ++ case vtos: /* nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++ ++ ++void InterpreterMacroAssembler::load_ptr(int n, Register val) { ++ ld(val, SP, Interpreter::expr_offset_in_bytes(n)); ++} ++ ++void InterpreterMacroAssembler::store_ptr(int n, Register val) { ++ sd(val, SP, Interpreter::expr_offset_in_bytes(n)); ++} ++ ++// Jump to from_interpreted entry of a call unless single stepping is possible ++// in this thread in which case we must call the i2i entry ++void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) { ++ // record last_sp ++ move(Rsender, SP); ++ sd(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++#ifndef OPT_THREAD ++ Register thread = temp; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ // interp_only is an int, on little endian it is sufficient to test the byte only ++ // Is a cmpl faster? ++ lw(AT, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(AT, R0, run_compiled_code); ++ delayed()->nop(); ++ ld(AT, method, in_bytes(Method::interpreter_entry_offset())); ++ jr(AT); ++ delayed()->nop(); ++ bind(run_compiled_code); ++ } ++ ++ ld(AT, method, in_bytes(Method::from_interpreted_offset())); ++ jr(AT); ++ delayed()->nop(); ++} ++ ++ ++// The following two routines provide a hook so that an implementation ++// can schedule the dispatch in two parts. mips64 does not do this. ++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { ++ // Nothing mips64 specific to be done here ++} ++ ++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { ++ dispatch_next(state, step); ++} ++ ++// assume the next bytecode in T8. ++void InterpreterMacroAssembler::dispatch_base(TosState state, ++ address* table, ++ bool verifyoop, ++ bool generate_poll) { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ ++ if (VerifyActivationFrameSize) { ++ Label L; ++ ++ dsubu(T2, FP, SP); ++ int min_frame_size = (frame::link_offset - ++ frame::interpreter_frame_initial_sp_offset) * wordSize; ++ daddiu(T2, T2, -min_frame_size); ++ bgez(T2, L); ++ delayed()->nop(); ++ stop("broken stack frame"); ++ bind(L); ++ } ++ // FIXME: I do not know which register should pass to verify_oop ++ if (verifyoop) verify_oop(FSR, state); ++ dsll(T2, Rnext, LogBytesPerWord); ++ ++ Label safepoint; ++ address* const safepoint_table = Interpreter::safept_table(state); ++ bool needs_thread_local_poll = generate_poll && table != safepoint_table; ++ ++ if (needs_thread_local_poll) { ++ NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); ++ ld(T3, thread, in_bytes(JavaThread::polling_word_offset())); ++ andi(T3, T3, SafepointMechanism::poll_bit()); ++ bne(T3, R0, safepoint); ++ delayed()->nop(); ++ } ++ ++ if((long)table >= (long)Interpreter::dispatch_table(btos) && ++ (long)table <= (long)Interpreter::dispatch_table(vtos) ++ ) { ++ int table_size = (long)Interpreter::dispatch_table(itos) - (long)Interpreter::dispatch_table(stos); ++ int table_offset = ((int)state - (int)itos) * table_size; ++ ++ // GP points to the starting address of Interpreter::dispatch_table(itos). ++ // See StubGenerator::generate_call_stub(address& return_address) for the initialization of GP. ++ if(table_offset != 0) { ++ daddiu(T3, GP, table_offset); ++ if (UseLEXT1) { ++ gsldx(T3, T2, T3, 0); ++ } else { ++ daddu(T3, T2, T3); ++ ld(T3, T3, 0); ++ } ++ } else { ++ if (UseLEXT1) { ++ gsldx(T3, T2, GP, 0); ++ } else { ++ daddu(T3, T2, GP); ++ ld(T3, T3, 0); ++ } ++ } ++ } else { ++ li(T3, (long)table); ++ if (UseLEXT1) { ++ gsldx(T3, T2, T3, 0); ++ } else { ++ daddu(T3, T2, T3); ++ ld(T3, T3, 0); ++ } ++ } ++ jr(T3); ++ delayed()->nop(); ++ ++ if (needs_thread_local_poll) { ++ bind(safepoint); ++ li(T3, (long)safepoint_table); ++ if (UseLEXT1) { ++ gsldx(T3, T2, T3, 0); ++ } else { ++ daddu(T3, T2, T3); ++ ld(T3, T3, 0); ++ } ++ jr(T3); ++ delayed()->nop(); ++ } ++} ++ ++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) { ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { ++ dispatch_base(state, Interpreter::normal_table(state)); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { ++ dispatch_base(state, Interpreter::normal_table(state), false); ++} ++ ++ ++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) { ++ // load next bytecode (load before advancing r13 to prevent AGI) ++ lbu(Rnext, BCP, step); ++ increment(BCP, step); ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); ++} ++ ++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { ++ // load current bytecode ++ lbu(Rnext, BCP, 0); ++ dispatch_base(state, table); ++} ++ ++// remove activation ++// ++// Unlock the receiver if this is a synchronized method. ++// Unlock any Java monitors from syncronized blocks. ++// Remove the activation from the stack. ++// ++// If there are locked Java monitors ++// If throw_monitor_exception ++// throws IllegalMonitorStateException ++// Else if install_monitor_exception ++// installs IllegalMonitorStateException ++// Else ++// no error processing ++// used registers : T1, T2, T3, T8 ++// T1 : thread, method access flags ++// T2 : monitor entry pointer ++// T3 : method, monitor top ++// T8 : unlock flag ++void InterpreterMacroAssembler::remove_activation( ++ TosState state, ++ Register ret_addr, ++ bool throw_monitor_exception, ++ bool install_monitor_exception, ++ bool notify_jvmdi) { ++ // Note: Registers V0, V1 and F0, F1 may be in use for the result ++ // check if synchronized method ++ Label unlocked, unlock, no_unlock; ++ ++ // get the value of _do_not_unlock_if_synchronized into T8 ++#ifndef OPT_THREAD ++ Register thread = T1; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ lb(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ // reset the flag ++ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ // get method access flags ++ ld(T3, FP, frame::interpreter_frame_method_offset * wordSize); ++ lw(T1, T3, in_bytes(Method::access_flags_offset())); ++ andi(T1, T1, JVM_ACC_SYNCHRONIZED); ++ beq(T1, R0, unlocked); ++ delayed()->nop(); ++ ++ // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set. ++ bne(T8, R0, no_unlock); ++ delayed()->nop(); ++ // unlock monitor ++ push(state); // save result ++ ++ // BasicObjectLock will be first in list, since this is a ++ // synchronized method. However, need to check that the object has ++ // not been unlocked by an explicit monitorexit bytecode. ++ daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize ++ - (int)sizeof(BasicObjectLock)); ++ // address of first monitor ++ ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ bne(T1, R0, unlock); ++ delayed()->nop(); ++ pop(state); ++ if (throw_monitor_exception) { ++ // Entry already unlocked, need to throw exception ++ // I think mips do not need empty_FPU_stack ++ // remove possible return value from FPU-stack, otherwise stack could overflow ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Monitor already unlocked during a stack unroll. If requested, ++ // install an illegal_monitor_state_exception. Continue with ++ // stack unrolling. ++ if (install_monitor_exception) { ++ // remove possible return value from FPU-stack, ++ // otherwise stack could overflow ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ ++ } ++ ++ b(unlocked); ++ delayed()->nop(); ++ } ++ ++ bind(unlock); ++ unlock_object(c_rarg0); ++ pop(state); ++ ++ // Check that for block-structured locking (i.e., that all locked ++ // objects has been unlocked) ++ bind(unlocked); ++ ++ // V0, V1: Might contain return value ++ ++ // Check that all monitors are unlocked ++ { ++ Label loop, exception, entry, restart; ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ const Address monitor_block_top(FP, ++ frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ ++ bind(restart); ++ // points to current entry, starting with top-most entry ++ ld(c_rarg0, monitor_block_top); ++ // points to word before bottom of monitor block ++ daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ b(entry); ++ delayed()->nop(); ++ ++ // Entry already locked, need to throw exception ++ bind(exception); ++ ++ if (throw_monitor_exception) { ++ // Throw exception ++ // remove possible return value from FPU-stack, ++ // otherwise stack could overflow ++ empty_FPU_stack(); ++ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Stack unrolling. Unlock object and install illegal_monitor_exception ++ // Unlock does not block, so don't have to worry about the frame ++ // We don't have to preserve c_rarg0, since we are going to ++ // throw an exception ++ ++ push(state); ++ unlock_object(c_rarg0); ++ pop(state); ++ ++ if (install_monitor_exception) { ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ } ++ ++ b(restart); ++ delayed()->nop(); ++ } ++ ++ bind(loop); ++ ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ bne(T1, R0, exception);// check if current entry is used ++ delayed()->nop(); ++ ++ daddiu(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry ++ bind(entry); ++ bne(c_rarg0, T3, loop); // check if bottom reached ++ delayed()->nop(); // if not at bottom then check this entry ++ } ++ ++ bind(no_unlock); ++ ++ // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame) ++ if (notify_jvmdi) { ++ notify_method_exit(state, NotifyJVMTI); // preserve TOSCA ++ } else { ++ notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA ++ } ++ ++ // remove activation ++ ld(TSR, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ if (StackReservedPages > 0) { ++ // testing if reserved zone needs to be re-enabled ++ Label no_reserved_zone_enabling; ++ ++ ld(AT, Address(thread, JavaThread::reserved_stack_activation_offset())); ++ dsubu(AT, TSR, AT); ++ blez(AT, no_reserved_zone_enabling); ++ delayed()->nop(); ++ ++ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_delayed_StackOverflowError)); ++ should_not_reach_here(); ++ ++ bind(no_reserved_zone_enabling); ++ } ++ ld(ret_addr, FP, frame::interpreter_frame_return_addr_offset * wordSize); ++ ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); ++ move(SP, TSR); // set sp to sender sp ++} ++ ++// Lock object ++// ++// Args: ++// c_rarg0: BasicObjectLock to be used for locking ++// ++// Kills: ++// T1 ++// T2 ++void InterpreterMacroAssembler::lock_object(Register lock_reg) { ++ assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); ++ ++ if (UseHeavyMonitors) { ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); ++ } else { ++ Label done, slow_case; ++ const Register tmp_reg = T2; ++ const Register scr_reg = T1; ++ const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); ++ const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); ++ const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); ++ ++ // Load object pointer into scr_reg ++ ld(scr_reg, lock_reg, obj_offset); ++ ++ if (DiagnoseSyncOnValueBasedClasses != 0) { ++ load_klass(tmp_reg, scr_reg); ++ lw(tmp_reg, Address(tmp_reg, Klass::access_flags_offset())); ++ move(AT, JVM_ACC_IS_VALUE_BASED_CLASS); ++ andr(AT, AT, tmp_reg); ++ bne(AT, R0, slow_case); ++ delayed()->nop(); ++ } ++ ++ if (UseBiasedLocking) { ++ // Note: we use noreg for the temporary register since it's hard ++ // to come up with a free register on all incoming code paths ++ biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case); ++ } ++ ++ // Load (object->mark() | 1) into tmp_reg ++ ld(AT, scr_reg, 0); ++ ori(tmp_reg, AT, 1); ++ ++ // Save (object->mark() | 1) into BasicLock's displaced header ++ sd(tmp_reg, lock_reg, mark_offset); ++ ++ assert(lock_offset == 0, "displached header must be first word in BasicObjectLock"); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label succ, fail; ++ cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail); ++ bind(succ); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); ++ b(done); ++ delayed()->nop(); ++ bind(fail); ++ } else { ++ cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done); ++ } ++ ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) SP <= mark < SP + os::pagesize() ++ // ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in tmp_reg as the result of cmpxchg ++ ++ dsubu(tmp_reg, tmp_reg, SP); ++ move(AT, 7 - os::vm_page_size()); ++ andr(tmp_reg, tmp_reg, AT); ++ // Save the test result, for recursive case, the result is zero ++ sd(tmp_reg, lock_reg, mark_offset); ++ if (PrintBiasedLockingStatistics) { ++ bne(tmp_reg, R0, slow_case); ++ delayed()->nop(); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); ++ } ++ beq(tmp_reg, R0, done); ++ delayed()->nop(); ++ ++ bind(slow_case); ++ // Call the runtime routine for slow case ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); ++ ++ bind(done); ++ } ++} ++ ++ ++// Unlocks an object. Used in monitorexit bytecode and ++// remove_activation. Throws an IllegalMonitorException if object is ++// not locked by current thread. ++// ++// Args: ++// c_rarg0: BasicObjectLock for lock ++// ++// Kills: ++// T1 ++// T2 ++// T3 ++// Throw an IllegalMonitorException if object is not locked by current thread ++void InterpreterMacroAssembler::unlock_object(Register lock_reg) { ++ assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); ++ ++ if (UseHeavyMonitors) { ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); ++ } else { ++ Label done; ++ ++ const Register tmp_reg = T1; ++ const Register scr_reg = T2; ++ const Register hdr_reg = T3; ++ ++ save_bcp(); // Save in case of exception ++ ++ // Convert from BasicObjectLock structure to object and BasicLock structure ++ // Store the BasicLock address into %T2 ++ daddiu(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes()); ++ ++ // Load oop into scr_reg(%T1) ++ ld(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); ++ // free entry ++ sd(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes()); ++ if (UseBiasedLocking) { ++ biased_locking_exit(scr_reg, hdr_reg, done); ++ } ++ ++ // Load the old header from BasicLock structure ++ ld(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes()); ++ // zero for recursive case ++ beq(hdr_reg, R0, done); ++ delayed()->nop(); ++ ++ // Atomic swap back the old header ++ cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done); ++ ++ // Call the runtime routine for slow case. ++ sd(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); ++ ++ bind(done); ++ ++ restore_bcp(); ++ } ++} ++ ++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, ++ Label& zero_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ ld(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++ beq(mdp, R0, zero_continue); ++ delayed()->nop(); ++} ++ ++ ++// Set the method data pointer for the current bcp. ++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Label set_mdp; ++ ++ // V0 and T0 will be used as two temporary registers. ++ push2(V0, T0); ++ ++ get_method(T0); ++ // Test MDO to avoid the call if it is NULL. ++ ld(V0, T0, in_bytes(Method::method_data_offset())); ++ beq(V0, R0, set_mdp); ++ delayed()->nop(); ++ ++ // method: T0 ++ // bcp: BCP --> S0 ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP); ++ // mdi: V0 ++ // mdo is guaranteed to be non-zero here, we checked for it before the call. ++ get_method(T0); ++ ld(T0, T0, in_bytes(Method::method_data_offset())); ++ daddiu(T0, T0, in_bytes(MethodData::data_offset())); ++ daddu(V0, T0, V0); ++ bind(set_mdp); ++ sd(V0, FP, frame::interpreter_frame_mdp_offset * wordSize); ++ pop2(V0, T0); ++} ++ ++void InterpreterMacroAssembler::verify_method_data_pointer() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++#ifdef ASSERT ++ Label verify_continue; ++ Register method = V0; ++ Register mdp = V1; ++ Register tmp = A0; ++ push(method); ++ push(mdp); ++ push(tmp); ++ test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue ++ get_method(method); ++ ++ // If the mdp is valid, it will point to a DataLayout header which is ++ // consistent with the bcp. The converse is highly probable also. ++ lhu(tmp, mdp, in_bytes(DataLayout::bci_offset())); ++ ld(AT, method, in_bytes(Method::const_offset())); ++ daddu(tmp, tmp, AT); ++ daddiu(tmp, tmp, in_bytes(ConstMethod::codes_offset())); ++ beq(tmp, BCP, verify_continue); ++ delayed()->nop(); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp); ++ bind(verify_continue); ++ pop(tmp); ++ pop(mdp); ++ pop(method); ++#endif // ASSERT ++} ++ ++ ++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, ++ int constant, ++ Register value) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Address data(mdp_in, constant); ++ sd(value, data); ++} ++ ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ int constant, ++ bool decrement) { ++ // Counter address ++ Address data(mdp_in, constant); ++ ++ increment_mdp_data_at(data, decrement); ++} ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Address data, ++ bool decrement) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ // %%% this does 64bit counters at best it is wasting space ++ // at worst it is a rare bug when counters overflow ++ Register tmp = S0; ++ push(tmp); ++ if (decrement) { ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Decrement the register. ++ ld(AT, data); ++ sltu(tmp, R0, AT); ++ dsubu(AT, AT, tmp); ++ sd(AT, data); ++ } else { ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Increment the register. ++ ld(AT, data); ++ daddiu(tmp, AT, DataLayout::counter_increment); ++ sltu(tmp, R0, tmp); ++ daddu(AT, AT, tmp); ++ sd(AT, data); ++ } ++ pop(tmp); ++} ++ ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ Register reg, ++ int constant, ++ bool decrement) { ++ Register tmp = S0; ++ push(tmp); ++ if (decrement) { ++ assert(Assembler::is_simm16(constant), "constant is not a simm16 !"); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Decrement the register. ++ daddu(tmp, mdp_in, reg); ++ ld(AT, tmp, constant); ++ sltu(tmp, R0, AT); ++ dsubu(AT, AT, tmp); ++ daddu(tmp, mdp_in, reg); ++ sd(AT, tmp, constant); ++ } else { ++ assert(Assembler::is_simm16(constant), "constant is not a simm16 !"); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Increment the register. ++ daddu(tmp, mdp_in, reg); ++ ld(AT, tmp, constant); ++ daddiu(tmp, AT, DataLayout::counter_increment); ++ sltu(tmp, R0, tmp); ++ daddu(AT, AT, tmp); ++ daddu(tmp, mdp_in, reg); ++ sd(AT, tmp, constant); ++ } ++ pop(tmp); ++} ++ ++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, ++ int flag_byte_constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ int header_offset = in_bytes(DataLayout::header_offset()); ++ int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant); ++ // Set the flag ++ lw(AT, Address(mdp_in, header_offset)); ++ if(Assembler::is_simm16(header_bits)) { ++ ori(AT, AT, header_bits); ++ } else { ++ push(T8); ++ // T8 is used as a temporary register. ++ move(T8, header_bits); ++ orr(AT, AT, T8); ++ pop(T8); ++ } ++ sw(AT, Address(mdp_in, header_offset)); ++} ++ ++ ++ ++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, ++ int offset, ++ Register value, ++ Register test_value_out, ++ Label& not_equal_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if (test_value_out == noreg) { ++ ld(AT, Address(mdp_in, offset)); ++ bne(AT, value, not_equal_continue); ++ delayed()->nop(); ++ } else { ++ // Put the test value into a register, so caller can use it: ++ ld(test_value_out, Address(mdp_in, offset)); ++ bne(value, test_value_out, not_equal_continue); ++ delayed()->nop(); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16"); ++ ld(AT, mdp_in, offset_of_disp); ++ daddu(mdp_in, mdp_in, AT); ++ sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ Register reg, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ daddu(AT, reg, mdp_in); ++ assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16"); ++ ld(AT, AT, offset_of_disp); ++ daddu(mdp_in, mdp_in, AT); ++ sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, ++ int constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if(Assembler::is_simm16(constant)) { ++ daddiu(mdp_in, mdp_in, constant); ++ } else { ++ move(AT, constant); ++ daddu(mdp_in, mdp_in, AT); ++ } ++ sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ push(return_bci); // save/restore across call_VM ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), ++ return_bci); ++ pop(return_bci); ++} ++ ++ ++void InterpreterMacroAssembler::profile_taken_branch(Register mdp, ++ Register bumped_count) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ // Otherwise, assign to mdp ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the taken count. ++ // We inline increment_mdp_data_at to return bumped_count in a register ++ //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset())); ++ ld(bumped_count, mdp, in_bytes(JumpData::taken_offset())); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ daddiu(AT, bumped_count, DataLayout::counter_increment); ++ sltu(AT, R0, AT); ++ daddu(bumped_count, bumped_count, AT); ++ sd(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the not taken count. ++ increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); ++ ++ // The method data pointer needs to be updated to correspond to ++ // the next bytecode ++ update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_final_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_virtual_call(Register receiver, ++ Register mdp, ++ Register reg2, ++ bool receiver_can_be_null) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ Label skip_receiver_profile; ++ if (receiver_can_be_null) { ++ Label not_null; ++ bne(receiver, R0, not_null); ++ delayed()->nop(); ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ beq(R0, R0, skip_receiver_profile); ++ delayed()->nop(); ++ bind(not_null); ++ } ++ ++ // Record the receiver type. ++ record_klass_in_profile(receiver, mdp, reg2, true); ++ bind(skip_receiver_profile); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++#if INCLUDE_JVMCI ++void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) { ++ assert_different_registers(method, mdp, reg2); ++ if (ProfileInterpreter && MethodProfileWidth > 0) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ Label done; ++ record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth, ++ &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset())); ++ bind(done); ++ ++ update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++#endif // INCLUDE_JVMCI ++ ++// This routine creates a state machine for updating the multi-row ++// type profile at a virtual call site (or other type-sensitive bytecode). ++// The machine visits each row (of receiver/count) until the receiver type ++// is found, or until it runs out of rows. At the same time, it remembers ++// the location of the first empty row. (An empty row records null for its ++// receiver, and can be allocated for a newly-observed receiver type.) ++// Because there are two degrees of freedom in the state, a simple linear ++// search will not work; it must be a decision tree. Hence this helper ++// function is recursive, to generate the required tree structured code. ++// It's the interpreter, so we are trading off code space for speed. ++// See below for example code. ++void InterpreterMacroAssembler::record_klass_in_profile_helper( ++ Register receiver, Register mdp, ++ Register reg2, int start_row, ++ Label& done, bool is_virtual_call) { ++ if (TypeProfileWidth == 0) { ++ if (is_virtual_call) { ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ } ++ return; ++ } ++ ++ int last_row = VirtualCallData::row_limit() - 1; ++ assert(start_row <= last_row, "must be work left to do"); ++ // Test this row for both the receiver and for null. ++ // Take any of three different outcomes: ++ // 1. found receiver => increment count and goto done ++ // 2. found null => keep looking for case 1, maybe allocate this cell ++ // 3. found something else => keep looking for cases 1 and 2 ++ // Case 3 is handled by a recursive call. ++ for (int row = start_row; row <= last_row; row++) { ++ Label next_test; ++ bool test_for_null_also = (row == start_row); ++ ++ // See if the receiver is receiver[n]. ++ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); ++ test_mdp_data_at(mdp, recvr_offset, receiver, ++ (test_for_null_also ? reg2 : noreg), ++ next_test); ++ // (Reg2 now contains the receiver from the CallData.) ++ ++ // The receiver is receiver[n]. Increment count[n]. ++ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); ++ increment_mdp_data_at(mdp, count_offset); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ bind(next_test); ++ ++ if (test_for_null_also) { ++ Label found_null; ++ // Failed the equality check on receiver[n]... Test for null. ++ if (start_row == last_row) { ++ // The only thing left to do is handle the null case. ++ if (is_virtual_call) { ++ beq(reg2, R0, found_null); ++ delayed()->nop(); ++ // Receiver did not match any saved receiver and there is no empty row for it. ++ // Increment total counter to indicate polymorphic case. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ bind(found_null); ++ } else { ++ bne(reg2, R0, done); ++ delayed()->nop(); ++ } ++ break; ++ } ++ // Since null is rare, make it be the branch-taken case. ++ beq(reg2, R0, found_null); ++ delayed()->nop(); ++ ++ // Put all the "Case 3" tests here. ++ record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call); ++ ++ // Found a null. Keep searching for a matching receiver, ++ // but remember that this is an empty (unused) slot. ++ bind(found_null); ++ } ++ } ++ ++ // In the fall-through case, we found no matching receiver, but we ++ // observed the receiver[start_row] is NULL. ++ ++ // Fill in the receiver field and increment the count. ++ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); ++ set_mdp_data_at(mdp, recvr_offset, receiver); ++ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); ++ move(reg2, DataLayout::counter_increment); ++ set_mdp_data_at(mdp, count_offset, reg2); ++ if (start_row > 0) { ++ beq(R0, R0, done); ++ delayed()->nop(); ++ } ++} ++ ++// Example state machine code for three profile rows: ++// // main copy of decision tree, rooted at row[1] ++// if (row[0].rec == rec) { row[0].incr(); goto done; } ++// if (row[0].rec != NULL) { ++// // inner copy of decision tree, rooted at row[1] ++// if (row[1].rec == rec) { row[1].incr(); goto done; } ++// if (row[1].rec != NULL) { ++// // degenerate decision tree, rooted at row[2] ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// if (row[2].rec != NULL) { goto done; } // overflow ++// row[2].init(rec); goto done; ++// } else { ++// // remember row[1] is empty ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// row[1].init(rec); goto done; ++// } ++// } else { ++// // remember row[0] is empty ++// if (row[1].rec == rec) { row[1].incr(); goto done; } ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// row[0].init(rec); goto done; ++// } ++// done: ++ ++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, ++ Register mdp, Register reg2, ++ bool is_virtual_call) { ++ assert(ProfileInterpreter, "must be profiling"); ++ Label done; ++ ++ record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call); ++ ++ bind (done); ++} ++ ++void InterpreterMacroAssembler::profile_ret(Register return_bci, ++ Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ uint row; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the total ret count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ for (row = 0; row < RetData::row_limit(); row++) { ++ Label next_test; ++ ++ // See if return_bci is equal to bci[n]: ++ test_mdp_data_at(mdp, ++ in_bytes(RetData::bci_offset(row)), ++ return_bci, noreg, ++ next_test); ++ ++ // return_bci is equal to bci[n]. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, ++ in_bytes(RetData::bci_displacement_offset(row))); ++ beq(R0, R0, profile_continue); ++ delayed()->nop(); ++ bind(next_test); ++ } ++ ++ update_mdp_for_ret(return_bci); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_null_seen(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { ++ if (ProfileInterpreter && TypeProfileCasts) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int count_offset = in_bytes(CounterData::count_offset()); ++ // Back up the address, since we have already bumped the mdp. ++ count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // *Decrement* the counter. We expect to see zero or small negatives. ++ increment_mdp_data_at(mdp, count_offset, true); ++ ++ bind (profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // Record the object type. ++ record_klass_in_profile(klass, mdp, reg2, false); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_switch_default(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the default case count ++ increment_mdp_data_at(mdp, ++ in_bytes(MultiBranchData::default_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ in_bytes(MultiBranchData:: ++ default_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_switch_case(Register index, ++ Register mdp, ++ Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Build the base (index * per_case_size_in_bytes()) + ++ // case_array_offset_in_bytes() ++ move(reg2, in_bytes(MultiBranchData::per_case_size())); ++ if (UseLEXT1) { ++ gsdmult(index, index, reg2); ++ } else { ++ dmult(index, reg2); ++ mflo(index); ++ } ++ daddiu(index, index, in_bytes(MultiBranchData::case_array_offset())); ++ ++ // Update the case count ++ increment_mdp_data_at(mdp, ++ index, ++ in_bytes(MultiBranchData::relative_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ index, ++ in_bytes(MultiBranchData:: ++ relative_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::narrow(Register result) { ++ ++ // Get method->_constMethod->_result_type ++ ld(T9, FP, frame::interpreter_frame_method_offset * wordSize); ++ ld(T9, T9, in_bytes(Method::const_offset())); ++ lbu(T9, T9, in_bytes(ConstMethod::result_type_offset())); ++ ++ Label done, notBool, notByte, notChar; ++ ++ // common case first ++ addiu(AT, T9, -T_INT); ++ beq(AT, R0, done); ++ delayed()->nop(); ++ ++ // mask integer result to narrower return type. ++ addiu(AT, T9, -T_BOOLEAN); ++ bne(AT, R0, notBool); ++ delayed()->nop(); ++ andi(result, result, 0x1); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ ++ bind(notBool); ++ addiu(AT, T9, -T_BYTE); ++ bne(AT, R0, notByte); ++ delayed()->nop(); ++ seb(result, result); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ ++ bind(notByte); ++ addiu(AT, T9, -T_CHAR); ++ bne(AT, R0, notChar); ++ delayed()->nop(); ++ andi(result, result, 0xFFFF); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ ++ bind(notChar); ++ seh(result, result); ++ ++ // Nothing to do for T_INT ++ bind(done); ++} ++ ++ ++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { ++ Label update, next, none; ++ ++ verify_oop(obj); ++ ++ if (mdo_addr.index() != noreg) { ++ guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !"); ++ guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !"); ++ push(T0); ++ dsll(T0, mdo_addr.index(), mdo_addr.scale()); ++ daddu(T0, T0, mdo_addr.base()); ++ } ++ ++ bne(obj, R0, update); ++ delayed()->nop(); ++ ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ ori(AT, AT, TypeEntries::null_seen); ++ if (mdo_addr.index() == noreg) { ++ sd(AT, mdo_addr); ++ } else { ++ sd(AT, T0, mdo_addr.disp()); ++ } ++ ++ beq(R0, R0, next); ++ delayed()->nop(); ++ ++ bind(update); ++ load_klass(obj, obj); ++ ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ xorr(obj, obj, AT); ++ ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ dextm(AT, obj, 2, 62); ++ beq(AT, R0, next); ++ delayed()->nop(); ++ ++ andi(AT, obj, TypeEntries::type_unknown); ++ bne(AT, R0, next); ++ delayed()->nop(); ++ ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ beq(AT, R0, none); ++ delayed()->nop(); ++ ++ daddiu(AT, AT, -(TypeEntries::null_seen)); ++ beq(AT, R0, none); ++ delayed()->nop(); ++ ++ // There is a chance that the checks above (re-reading profiling ++ // data from memory) fail if another thread has just set the ++ // profiling to this obj's klass ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ xorr(obj, obj, AT); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ dextm(AT, obj, 2, 62); ++ beq(AT, R0, next); ++ delayed()->nop(); ++ ++ // different than before. Cannot keep accurate profile. ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ ori(AT, AT, TypeEntries::type_unknown); ++ if (mdo_addr.index() == noreg) { ++ sd(AT, mdo_addr); ++ } else { ++ sd(AT, T0, mdo_addr.disp()); ++ } ++ beq(R0, R0, next); ++ delayed()->nop(); ++ ++ bind(none); ++ // first time here. Set profile type. ++ if (mdo_addr.index() == noreg) { ++ sd(obj, mdo_addr); ++ } else { ++ sd(obj, T0, mdo_addr.disp()); ++ } ++ ++ bind(next); ++ if (mdo_addr.index() != noreg) { ++ pop(T0); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { ++ if (!ProfileInterpreter) { ++ return; ++ } ++ ++ if (MethodData::profile_arguments() || MethodData::profile_return()) { ++ Label profile_continue; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); ++ ++ lb(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start); ++ li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag); ++ bne(tmp, AT, profile_continue); ++ delayed()->nop(); ++ ++ ++ if (MethodData::profile_arguments()) { ++ Label done; ++ int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); ++ if (Assembler::is_simm16(off_to_args)) { ++ daddiu(mdp, mdp, off_to_args); ++ } else { ++ move(AT, off_to_args); ++ daddu(mdp, mdp, AT); ++ } ++ ++ ++ for (int i = 0; i < TypeProfileArgsLimit; i++) { ++ if (i > 0 || MethodData::profile_return()) { ++ // If return value type is profiled we may have no argument to profile ++ ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); ++ ++ if (Assembler::is_simm16(-1 * i * TypeStackSlotEntries::per_arg_count())) { ++ addiu32(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count()); ++ } else { ++ li(AT, i*TypeStackSlotEntries::per_arg_count()); ++ subu32(tmp, tmp, AT); ++ } ++ ++ li(AT, TypeStackSlotEntries::per_arg_count()); ++ slt(AT, tmp, AT); ++ bne(AT, R0, done); ++ delayed()->nop(); ++ } ++ ld(tmp, callee, in_bytes(Method::const_offset())); ++ ++ lhu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // stack offset o (zero based) from the start of the argument ++ // list, for n arguments translates into offset n - o - 1 from ++ // the end of the argument list ++ ld(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args); ++ subu(tmp, tmp, AT); ++ ++ addiu32(tmp, tmp, -1); ++ ++ Address arg_addr = argument_address(tmp); ++ ld(tmp, arg_addr); ++ ++ Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); ++ profile_obj_type(tmp, mdo_arg_addr); ++ ++ int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); ++ if (Assembler::is_simm16(to_add)) { ++ daddiu(mdp, mdp, to_add); ++ } else { ++ move(AT, to_add); ++ daddu(mdp, mdp, AT); ++ } ++ ++ off_to_args += to_add; ++ } ++ ++ if (MethodData::profile_return()) { ++ ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); ++ ++ int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(); ++ if (Assembler::is_simm16(-1 * tmp_arg_counts)) { ++ addiu32(tmp, tmp, -1 * tmp_arg_counts); ++ } else { ++ move(AT, tmp_arg_counts); ++ subu32(mdp, mdp, AT); ++ } ++ } ++ ++ bind(done); ++ ++ if (MethodData::profile_return()) { ++ // We're right after the type profile for the last ++ // argument. tmp is the number of cells left in the ++ // CallTypeData/VirtualCallTypeData to reach its end. Non null ++ // if there's a return to profile. ++ assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); ++ sll(tmp, tmp, exact_log2(DataLayout::cell_size)); ++ daddu(mdp, mdp, tmp); ++ } ++ sd(mdp, FP, frame::interpreter_frame_mdp_offset * wordSize); ++ } else { ++ assert(MethodData::profile_return(), "either profile call args or call ret"); ++ update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); ++ } ++ ++ // mdp points right after the end of the ++ // CallTypeData/VirtualCallTypeData, right after the cells for the ++ // return value type if there's one ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { ++ assert_different_registers(mdp, ret, tmp, _bcp_register); ++ if (ProfileInterpreter && MethodData::profile_return()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ if (MethodData::profile_return_jsr292_only()) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++ ++ // If we don't profile all invoke bytecodes we must make sure ++ // it's a bytecode we indeed profile. We can't go back to the ++ // begining of the ProfileData we intend to update to check its ++ // type because we're right after it and we don't known its ++ // length ++ Label do_profile; ++ lb(tmp, _bcp_register, 0); ++ daddiu(AT, tmp, -1 * Bytecodes::_invokedynamic); ++ beq(AT, R0, do_profile); ++ delayed()->daddiu(AT, tmp, -1 * Bytecodes::_invokehandle); ++ beq(AT, R0, do_profile); ++ delayed()->nop(); ++ ++ get_method(tmp); ++ lhu(tmp, tmp, Method::intrinsic_id_offset_in_bytes()); ++ li(AT, static_cast(vmIntrinsics::_compiledLambdaForm)); ++ bne(tmp, AT, profile_continue); ++ delayed()->nop(); ++ ++ bind(do_profile); ++ } ++ ++ Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); ++ daddu(tmp, ret, R0); ++ profile_obj_type(tmp, mdo_ret_addr); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) { ++ guarantee(T9 == tmp1, "You are reqired to use T9 as the index register for MIPS !"); ++ ++ if (ProfileInterpreter && MethodData::profile_parameters()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Load the offset of the area within the MDO used for ++ // parameters. If it's negative we're not profiling any parameters ++ lw(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())); ++ bltz(tmp1, profile_continue); ++ delayed()->nop(); ++ ++ // Compute a pointer to the area for parameters from the offset ++ // and move the pointer to the slot for the last ++ // parameters. Collect profiling from last parameter down. ++ // mdo start + parameters offset + array length - 1 ++ daddu(mdp, mdp, tmp1); ++ ld(tmp1, mdp, in_bytes(ArrayData::array_len_offset())); ++ decrement(tmp1, TypeStackSlotEntries::per_arg_count()); ++ ++ ++ Label loop; ++ bind(loop); ++ ++ int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); ++ int type_base = in_bytes(ParametersTypeData::type_offset(0)); ++ Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size); ++ Address arg_type(mdp, tmp1, per_arg_scale, type_base); ++ ++ // load offset on the stack from the slot for this parameter ++ dsll(AT, tmp1, per_arg_scale); ++ daddu(AT, AT, mdp); ++ ld(tmp2, AT, off_base); ++ ++ subu(tmp2, R0, tmp2); ++ ++ // read the parameter from the local area ++ dsll(AT, tmp2, Interpreter::logStackElementSize); ++ daddu(AT, AT, _locals_register); ++ ld(tmp2, AT, 0); ++ ++ // profile the parameter ++ profile_obj_type(tmp2, arg_type); ++ ++ // go to next parameter ++ decrement(tmp1, TypeStackSlotEntries::per_arg_count()); ++ bgtz(tmp1, loop); ++ delayed()->nop(); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) { ++ if (state == atos) { ++ MacroAssembler::verify_oop(reg); ++ } ++} ++ ++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ++} ++ ++void InterpreterMacroAssembler::notify_method_entry() { ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ Register tempreg = T0; ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label L; ++ lw(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(tempreg, R0, L); ++ delayed()->nop(); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_method_entry)); ++ bind(L); ++ } ++ ++ { ++ SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); ++ get_method(S3); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ //Rthread, ++ thread, ++ //Rmethod); ++ S3); ++ } ++ ++} ++ ++void InterpreterMacroAssembler::notify_method_exit( ++ TosState state, NotifyMethodExitMode mode) { ++ Register tempreg = T0; ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { ++ Label skip; ++ // Note: frame::interpreter_frame_result has a dependency on how the ++ // method result is saved across the call to post_method_exit. If this ++ // is changed then the interpreter_frame_result implementation will ++ // need to be updated too. ++ ++ // template interpreter will leave it on the top of the stack. ++ push(state); ++ lw(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(tempreg, R0, skip); ++ delayed()->nop(); ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); ++ bind(skip); ++ pop(state); ++ } ++ ++ { ++ // Dtrace notification ++ SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); ++ push(state); ++ get_method(S3); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ //Rthread, Rmethod); ++ thread, S3); ++ pop(state); ++ } ++} ++ ++// Jump if ((*counter_addr += increment) & mask) satisfies the condition. ++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, ++ int increment, Address mask, ++ Register scratch, bool preloaded, ++ Condition cond, Label* where) { ++ assert_different_registers(scratch, AT); ++ ++ if (!preloaded) { ++ lw(scratch, counter_addr); ++ } ++ addiu32(scratch, scratch, increment); ++ sw(scratch, counter_addr); ++ ++ lw(AT, mask); ++ andr(scratch, scratch, AT); ++ ++ if (cond == Assembler::zero) { ++ beq(scratch, R0, *where); ++ delayed()->nop(); ++ } else { ++ unimplemented(); ++ } ++} +diff --git a/src/hotspot/cpu/mips/interpreterRT_mips.hpp b/src/hotspot/cpu/mips/interpreterRT_mips.hpp +new file mode 100644 +index 00000000000..054138ea42b +--- /dev/null ++++ b/src/hotspot/cpu/mips/interpreterRT_mips.hpp +@@ -0,0 +1,60 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP ++#define CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP ++ ++// This is included in the middle of class Interpreter. ++// Do not include files here. ++ ++// native method calls ++ ++class SignatureHandlerGenerator: public NativeSignatureIterator { ++ private: ++ MacroAssembler* _masm; ++ ++ void move(int from_offset, int to_offset); ++ ++ void box(int from_offset, int to_offset); ++ void pass_int(); ++ void pass_long(); ++ void pass_object(); ++ void pass_float(); ++ void pass_double(); ++ ++ public: ++ // Creation ++ SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); ++ ++ // Code generation ++ void generate(uint64_t fingerprint); ++ ++ // Code generation support ++ static Register from(); ++ static Register to(); ++ static Register temp(); ++}; ++ ++#endif // CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp b/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp +new file mode 100644 +index 00000000000..3e8ae9fb5a5 +--- /dev/null ++++ b/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp +@@ -0,0 +1,260 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "memory/universe.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/signature.hpp" ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of SignatureHandlerGenerator ++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( ++ const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) { ++ _masm = new MacroAssembler(buffer); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) { ++ __ ld(temp(), from(), Interpreter::local_offset_in_bytes(from_offset)); ++ __ sd(temp(), to(), to_offset * longSize); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) { ++ __ addiu(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) ); ++ __ lw(AT, from(), Interpreter::local_offset_in_bytes(from_offset) ); ++ ++ __ movz(temp(), R0, AT); ++ __ sw(temp(), to(), to_offset * wordSize); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { ++ // generate code to handle arguments ++ iterate(fingerprint); ++ // return result handler ++ __ li(V0, AbstractInterpreter::result_handler(method()->result_type())); ++ // return ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ __ flush(); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ lw(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset())); ++ __ sw(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2. ++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ ld(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else { ++ __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ __ sd(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { ++ Argument jni_arg(jni_offset()); ++ ++ // the handle for a receiver will never be null ++ bool do_NULL_check = offset() != 0 || is_static(); ++ if (do_NULL_check) { ++ __ ld(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ daddiu((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), from(), Interpreter::local_offset_in_bytes(offset())); ++ __ movz((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), R0, AT); ++ } else { ++ __ daddiu(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset())); ++ } ++ ++ if (!jni_arg.is_Register()) ++ __ sd(temp(), jni_arg.as_caller_address()); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ lwc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset())); ++ __ sw(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2. ++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ ldc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else { ++ __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ __ sd(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++ ++Register InterpreterRuntime::SignatureHandlerGenerator::from() { return LVP; } ++Register InterpreterRuntime::SignatureHandlerGenerator::to() { return SP; } ++Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return T8; } ++ ++// Implementation of SignatureHandlerLibrary ++ ++void SignatureHandlerLibrary::pd_set_handler(address handler) {} ++ ++ ++class SlowSignatureHandler ++ : public NativeSignatureIterator { ++ private: ++ address _from; ++ intptr_t* _to; ++ intptr_t* _reg_args; ++ intptr_t* _fp_identifiers; ++ unsigned int _num_args; ++ ++ virtual void pass_int() ++ { ++ jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_register_parameters) { ++ *_reg_args++ = from_obj; ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_long() ++ { ++ intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); ++ _from -= 2 * Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_register_parameters) { ++ *_reg_args++ = from_obj; ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_object() ++ { ++ intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ if (_num_args < Argument::n_register_parameters) { ++ *_reg_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; ++ _num_args++; ++ } else { ++ *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; ++ } ++ } ++ ++ virtual void pass_float() ++ { ++ jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_float_register_parameters) { ++ *_reg_args++ = from_obj; ++ *_fp_identifiers |= (0x01 << (_num_args*2)); // mark as float ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_double() ++ { ++ intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); ++ _from -= 2*Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_float_register_parameters) { ++ *_reg_args++ = from_obj; ++ *_fp_identifiers |= (0x3 << (_num_args*2)); // mark as double ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ public: ++ SlowSignatureHandler(methodHandle method, address from, intptr_t* to) ++ : NativeSignatureIterator(method) ++ { ++ _from = from; ++ _to = to; ++ ++ // see TemplateInterpreterGenerator::generate_slow_signature_handler() ++ _reg_args = to - Argument::n_register_parameters + jni_offset() - 1; ++ _fp_identifiers = to - 1; ++ *(int*) _fp_identifiers = 0; ++ _num_args = jni_offset(); ++ } ++}; ++ ++ ++JRT_ENTRY(address, ++ InterpreterRuntime::slow_signature_handler(JavaThread* current, ++ Method* method, ++ intptr_t* from, ++ intptr_t* to)) ++ methodHandle m(current, (Method*)method); ++ assert(m->is_native(), "sanity check"); ++ ++ // handle arguments ++ SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1)); ++ ++ // return result handler ++ return Interpreter::result_handler(m->result_type()); ++JRT_END +diff --git a/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp b/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp +new file mode 100644 +index 00000000000..f5f3735e7cb +--- /dev/null ++++ b/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp +@@ -0,0 +1,87 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP ++#define CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP ++ ++private: ++ ++ // FP value associated with _last_Java_sp: ++ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to ++ ++public: ++ // Each arch must define reset, save, restore ++ // These are used by objects that only care about: ++ // 1 - initializing a new state (thread creation, javaCalls) ++ // 2 - saving a current state (javaCalls) ++ // 3 - restoring an old state (javaCalls) ++ ++ void clear(void) { ++ // clearing _last_Java_sp must be first ++ _last_Java_sp = NULL; ++ // fence? ++ _last_Java_fp = NULL; ++ _last_Java_pc = NULL; ++ } ++ ++ void copy(JavaFrameAnchor* src) { ++ // In order to make sure the transition state is valid for "this" ++ // We must clear _last_Java_sp before copying the rest of the new data ++ // ++ // Hack Alert: Temporary bugfix for 4717480/4721647 ++ // To act like previous version (pd_cache_state) don't NULL _last_Java_sp ++ // unless the value is changing ++ // ++ if (_last_Java_sp != src->_last_Java_sp) ++ _last_Java_sp = NULL; ++ ++ _last_Java_fp = src->_last_Java_fp; ++ _last_Java_pc = src->_last_Java_pc; ++ // Must be last so profiler will always see valid frame if has_last_frame() is true ++ _last_Java_sp = src->_last_Java_sp; ++ } ++ ++ // Always walkable ++ bool walkable(void) { return true; } ++ // Never any thing to do since we are always walkable and can find address of return addresses ++ void make_walkable() { } ++ ++ intptr_t* last_Java_sp(void) const { return _last_Java_sp; } ++ ++ address last_Java_pc(void) { return _last_Java_pc; } ++ ++private: ++ ++ static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } ++ ++public: ++ ++ void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; } ++ ++ intptr_t* last_Java_fp(void) { return _last_Java_fp; } ++ // Assert (last_Java_sp == NULL || fp == NULL) ++ void set_last_Java_fp(intptr_t* fp) { _last_Java_fp = fp; } ++ ++#endif // CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp b/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp +new file mode 100644 +index 00000000000..547414f7ef3 +--- /dev/null ++++ b/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp +@@ -0,0 +1,203 @@ ++/* ++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/codeBlob.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "runtime/safepoint.hpp" ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#define BUFFER_SIZE 30*wordSize ++ ++// Instead of issuing membar for LoadLoad barrier, we create address dependency ++// between loads, which is more efficient than membar. ++ ++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { ++ const char *name = NULL; ++ switch (type) { ++ case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; ++ case T_BYTE: name = "jni_fast_GetByteField"; break; ++ case T_CHAR: name = "jni_fast_GetCharField"; break; ++ case T_SHORT: name = "jni_fast_GetShortField"; break; ++ case T_INT: name = "jni_fast_GetIntField"; break; ++ case T_LONG: name = "jni_fast_GetLongField"; break; ++ case T_FLOAT: name = "jni_fast_GetFloatField"; break; ++ case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; ++ default: ShouldNotReachHere(); ++ } ++ ResourceMark rm; ++ BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); ++ CodeBuffer cbuf(blob); ++ MacroAssembler* masm = new MacroAssembler(&cbuf); ++ address fast_entry = __ pc(); ++ Label slow; ++ ++ const Register env = A0; ++ const Register obj = A1; ++ const Register fid = A2; ++ const Register tmp1 = AT; ++ const Register tmp2 = T9; ++ const Register obj_addr = T0; ++ const Register field_val = V0; ++ const Register field_addr = T0; ++ const Register counter_addr = T2; ++ const Register counter_prev_val = T1; ++ ++ __ li(counter_addr, SafepointSynchronize::safepoint_counter_addr()); ++ __ lw(counter_prev_val, counter_addr, 0); ++ ++ // Parameters(A0~A3) should not be modified, since they will be used in slow path ++ __ andi(tmp1, counter_prev_val, 1); ++ __ bne(tmp1, R0, slow); ++ __ delayed()->nop(); ++ ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we ++ // take the fast path. ++ __ li(tmp1, JvmtiExport::get_field_access_count_addr()); ++ // address dependency ++ __ xorr(tmp1, tmp1, counter_prev_val); ++ __ xorr(tmp1, tmp1, counter_prev_val); ++ __ lw(tmp1, tmp1, 0); ++ __ bne(tmp1, R0, slow); ++ __ delayed()->nop(); ++ } ++ ++ __ move(obj_addr, obj); ++ // Both obj_addr and tmp2 are clobbered by try_resolve_jobject_in_native. ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->try_resolve_jobject_in_native(masm, env, obj_addr, tmp2, slow); ++ ++ __ dsrl(tmp1, fid, 2); // offset ++ __ daddu(field_addr, obj_addr, tmp1); ++ // address dependency ++ __ xorr(field_addr, field_addr, counter_prev_val); ++ __ xorr(field_addr, field_addr, counter_prev_val); ++ ++ assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); ++ speculative_load_pclist[count] = __ pc(); ++ switch (type) { ++ case T_BOOLEAN: __ lbu (field_val, field_addr, 0); break; ++ case T_BYTE: __ lb (field_val, field_addr, 0); break; ++ case T_CHAR: __ lhu (field_val, field_addr, 0); break; ++ case T_SHORT: __ lh (field_val, field_addr, 0); break; ++ case T_INT: __ lw (field_val, field_addr, 0); break; ++ case T_LONG: __ ld (field_val, field_addr, 0); break; ++ case T_FLOAT: __ lwu (field_val, field_addr, 0); break; ++ case T_DOUBLE: __ ld (field_val, field_addr, 0); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ // address dependency ++ __ xorr(counter_addr, counter_addr, field_val); ++ __ xorr(counter_addr, counter_addr, field_val); ++ __ lw(tmp1, counter_addr, 0); ++ __ bne(counter_prev_val, tmp1, slow); ++ __ delayed()->nop(); ++ ++ switch (type) { ++ case T_FLOAT: __ mtc1 (field_val, F0); break; ++ case T_DOUBLE: __ dmtc1(field_val, F0); break; ++ default: break; ++ } ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ slowcase_entry_pclist[count++] = __ pc(); ++ __ bind (slow); ++ address slow_case_addr = NULL; ++ switch (type) { ++ case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; ++ case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; ++ case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; ++ case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; ++ case T_INT: slow_case_addr = jni_GetIntField_addr(); break; ++ case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; ++ case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; ++ case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; ++ default: ShouldNotReachHere(); ++ } ++ __ jmp(slow_case_addr); ++ __ delayed()->nop(); ++ ++ __ flush (); ++ ++ return fast_entry; ++} ++ ++address JNI_FastGetField::generate_fast_get_boolean_field() { ++ return generate_fast_get_int_field0(T_BOOLEAN); ++} ++ ++address JNI_FastGetField::generate_fast_get_byte_field() { ++ return generate_fast_get_int_field0(T_BYTE); ++} ++ ++address JNI_FastGetField::generate_fast_get_char_field() { ++ return generate_fast_get_int_field0(T_CHAR); ++} ++ ++address JNI_FastGetField::generate_fast_get_short_field() { ++ return generate_fast_get_int_field0(T_SHORT); ++} ++ ++address JNI_FastGetField::generate_fast_get_int_field() { ++ return generate_fast_get_int_field0(T_INT); ++} ++ ++address JNI_FastGetField::generate_fast_get_long_field() { ++ return generate_fast_get_int_field0(T_LONG); ++} ++ ++address JNI_FastGetField::generate_fast_get_float_field() { ++ return generate_fast_get_int_field0(T_FLOAT); ++} ++ ++address JNI_FastGetField::generate_fast_get_double_field() { ++ return generate_fast_get_int_field0(T_DOUBLE); ++} +diff --git a/src/hotspot/cpu/mips/jniTypes_mips.hpp b/src/hotspot/cpu/mips/jniTypes_mips.hpp +new file mode 100644 +index 00000000000..08ac565aa53 +--- /dev/null ++++ b/src/hotspot/cpu/mips/jniTypes_mips.hpp +@@ -0,0 +1,143 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_JNITYPES_MIPS_HPP ++#define CPU_MIPS_VM_JNITYPES_MIPS_HPP ++ ++#include "jni.h" ++#include "memory/allocation.hpp" ++#include "oops/oop.hpp" ++ ++// This file holds platform-dependent routines used to write primitive jni ++// types to the array of arguments passed into JavaCalls::call ++ ++class JNITypes : AllStatic { ++ // These functions write a java primitive type (in native format) ++ // to a java stack slot array to be passed as an argument to JavaCalls:calls. ++ // I.e., they are functionally 'push' operations if they have a 'pos' ++ // formal parameter. Note that jlong's and jdouble's are written ++ // _in reverse_ of the order in which they appear in the interpreter ++ // stack. This is because call stubs (see stubGenerator_sparc.cpp) ++ // reverse the argument list constructed by JavaCallArguments (see ++ // javaCalls.hpp). ++ ++private: ++ ++ // 32bit Helper routines. ++ static inline void put_int2r(jint *from, intptr_t *to) { *(jint *)(to++) = from[1]; ++ *(jint *)(to ) = from[0]; } ++ static inline void put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; } ++ ++public: ++ // In MIPS64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[] ++ // is 8 bytes. ++ // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values. ++ // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded. ++ // This error occurs in ReflectInvoke.java ++ // The parameter of DD(int) should be 4 instead of 0x550000004. ++ // ++ // See: [runtime/javaCalls.hpp] ++ ++ static inline void put_int(jint from, intptr_t *to) { *(intptr_t *)(to + 0 ) = from; } ++ static inline void put_int(jint from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = from; } ++ static inline void put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; } ++ ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to). ++ // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), ++ // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. ++ static inline void put_long(jlong from, intptr_t *to) { ++ *(jlong*) (to + 1) = from; ++ *(jlong*) (to) = from; ++ } ++ ++ // A long parameter occupies two slot. ++ // It must fit the layout rule in methodHandle. ++ // ++ // See: [runtime/reflection.cpp] Reflection::invoke() ++ // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); ++ ++ static inline void put_long(jlong from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = from; ++ *(jlong*) (to + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_long(jlong *from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = *from; ++ *(jlong*) (to + pos) = *from; ++ pos += 2; ++ } ++ ++ // Oops are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); } ++ static inline void put_obj(jobject from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle; } ++ ++ // Floats are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } ++ static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } ++ static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } ++ ++#undef _JNI_SLOT_OFFSET ++#define _JNI_SLOT_OFFSET 0 ++ ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to). ++ // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), ++ // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. ++ static inline void put_double(jdouble from, intptr_t *to) { ++ *(jdouble*) (to + 1) = from; ++ *(jdouble*) (to) = from; ++ } ++ ++ // A long parameter occupies two slot. ++ // It must fit the layout rule in methodHandle. ++ // ++ // See: [runtime/reflection.cpp] Reflection::invoke() ++ // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); ++ ++ static inline void put_double(jdouble from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = from; ++ *(jdouble*) (to + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_double(jdouble *from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = *from; ++ *(jdouble*) (to + pos) = *from; ++ pos += 2; ++ } ++ ++ // The get_xxx routines, on the other hand, actually _do_ fetch ++ // java primitive types from the interpreter stack. ++ static inline jint get_int (intptr_t *from) { return *(jint *) from; } ++ static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } ++ static inline oop get_obj (intptr_t *from) { return *(oop *) from; } ++ static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } ++ static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } ++#undef _JNI_SLOT_OFFSET ++}; ++ ++#endif // CPU_MIPS_VM_JNITYPES_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.cpp b/src/hotspot/cpu/mips/macroAssembler_mips.cpp +new file mode 100644 +index 00000000000..1256e957970 +--- /dev/null ++++ b/src/hotspot/cpu/mips/macroAssembler_mips.cpp +@@ -0,0 +1,3686 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "jvm.h" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "compiler/disassembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/bytecodeHistogram.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "memory/universe.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/compressedOops.inline.hpp" ++#include "oops/klass.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/jniHandles.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "runtime/os.hpp" ++#include "runtime/safepoint.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/macros.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of MacroAssembler ++ ++intptr_t MacroAssembler::i[32] = {0}; ++float MacroAssembler::f[32] = {0.0}; ++ ++void MacroAssembler::print(outputStream *s) { ++ unsigned int k; ++ for(k=0; kprint_cr("i%d = 0x%.16lx", k, i[k]); ++ } ++ s->cr(); ++ ++ for(k=0; kprint_cr("f%d = %f", k, f[k]); ++ } ++ s->cr(); ++} ++ ++int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; } ++int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; } ++ ++void MacroAssembler::save_registers(MacroAssembler *masm) { ++#define __ masm-> ++ for(int k=0; k<32; k++) { ++ __ sw (as_Register(k), A0, i_offset(k)); ++ } ++ ++ for(int k=0; k<32; k++) { ++ __ swc1 (as_FloatRegister(k), A0, f_offset(k)); ++ } ++#undef __ ++} ++ ++void MacroAssembler::restore_registers(MacroAssembler *masm) { ++#define __ masm-> ++ for(int k=0; k<32; k++) { ++ __ lw (as_Register(k), A0, i_offset(k)); ++ } ++ ++ for(int k=0; k<32; k++) { ++ __ lwc1 (as_FloatRegister(k), A0, f_offset(k)); ++ } ++#undef __ ++} ++ ++ ++void MacroAssembler::pd_patch_instruction(address branch, address target, const char* file, int line) { ++ jint& stub_inst = *(jint*) branch; ++ jint *pc = (jint *)branch; ++ ++ if((opcode(stub_inst) == special_op) && (special(stub_inst) == daddu_op)) { ++ //b_far: ++ // move(AT, RA); // daddu ++ // emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); ++ // nop(); ++ // lui(T9, 0); // to be patched ++ // ori(T9, 0); ++ // daddu(T9, T9, RA); ++ // move(RA, AT); ++ // jr(T9); ++ ++ assert(opcode(pc[3]) == lui_op ++ && opcode(pc[4]) == ori_op ++ && special(pc[5]) == daddu_op, "Not a branch label patch"); ++ if(!(opcode(pc[3]) == lui_op ++ && opcode(pc[4]) == ori_op ++ && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); } ++ ++ int offset = target - branch; ++ if (!is_simm16(offset)) { ++ pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12); ++ pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12); ++ } else { ++ // revert to "beq + nop" ++ CodeBuffer cb(branch, 4 * 10); ++ MacroAssembler masm(&cb); ++#define __ masm. ++ __ b(target); ++ __ delayed()->nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ } ++ return; ++ } else if (special(pc[4]) == jr_op ++ && opcode(pc[4]) == special_op ++ && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) { ++ //jmp_far: ++ // patchable_set48(T9, target); ++ // jr(T9); ++ // nop(); ++ ++ CodeBuffer cb(branch, 4 * 4); ++ MacroAssembler masm(&cb); ++ masm.patchable_set48(T9, (long)(target)); ++ return; ++ } ++ ++#ifndef PRODUCT ++ if (!is_simm16((target - branch - 4) >> 2)) { ++ tty->print_cr("Illegal patching: branch = " INTPTR_FORMAT ", target = " INTPTR_FORMAT, p2i(branch), p2i(target)); ++ tty->print_cr("======= Start decoding at branch = " INTPTR_FORMAT " =======", p2i(branch)); ++ Disassembler::decode(branch - 4 * 16, branch + 4 * 16, tty); ++ tty->print_cr("======= End of decoding ======="); ++ } ++#endif ++ ++ stub_inst = patched_branch(target - branch, stub_inst, 0); ++} ++ ++static inline address first_cache_address() { ++ return CodeCache::low_bound() + sizeof(HeapBlock::Header); ++} ++ ++static inline address last_cache_address() { ++ return CodeCache::high_bound() - Assembler::InstructionSize; ++} ++ ++int MacroAssembler::call_size(address target, bool far, bool patchable) { ++ if (patchable) return 6 << Assembler::LogInstructionSize; ++ if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop ++ return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize; ++} ++ ++// Can we reach target using jal/j from anywhere ++// in the code cache (because code can be relocated)? ++bool MacroAssembler::reachable_from_cache(address target) { ++ address cl = first_cache_address(); ++ address ch = last_cache_address(); ++ ++ return (cl <= target) && (target <= ch) && fit_in_jal(cl, ch); ++} ++ ++bool MacroAssembler::reachable_from_cache() { ++ if (ForceUnreachable) { ++ return false; ++ } else { ++ address cl = first_cache_address(); ++ address ch = last_cache_address(); ++ ++ return fit_in_jal(cl, ch); ++ } ++} ++ ++void MacroAssembler::general_jump(address target) { ++ if (reachable_from_cache(target)) { ++ j(target); ++ delayed()->nop(); ++ } else { ++ set64(T9, (long)target); ++ jr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_general_jump(address target) { ++ if (reachable_from_cache(target)) { ++ //j(target); ++ //nop(); ++ return 2; ++ } else { ++ //set64(T9, (long)target); ++ //jr(T9); ++ //nop(); ++ return insts_for_set64((jlong)target) + 2; ++ } ++} ++ ++void MacroAssembler::patchable_jump(address target) { ++ if (reachable_from_cache(target)) { ++ nop(); ++ nop(); ++ nop(); ++ nop(); ++ j(target); ++ delayed()->nop(); ++ } else { ++ patchable_set48(T9, (long)target); ++ jr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_patchable_jump(address target) { ++ return 6; ++} ++ ++void MacroAssembler::general_call(address target) { ++ if (reachable_from_cache(target)) { ++ jal(target); ++ delayed()->nop(); ++ } else { ++ set64(T9, (long)target); ++ jalr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_general_call(address target) { ++ if (reachable_from_cache(target)) { ++ //jal(target); ++ //nop(); ++ return 2; ++ } else { ++ //set64(T9, (long)target); ++ //jalr(T9); ++ //nop(); ++ return insts_for_set64((jlong)target) + 2; ++ } ++} ++ ++void MacroAssembler::patchable_call(address target) { ++ if (reachable_from_cache(target)) { ++ nop(); ++ nop(); ++ nop(); ++ nop(); ++ jal(target); ++ delayed()->nop(); ++ } else { ++ patchable_set48(T9, (long)target); ++ jalr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_patchable_call(address target) { ++ return 6; ++} ++ ++// Maybe emit a call via a trampoline. If the code cache is small ++// trampolines won't be emitted. ++ ++address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) { ++ assert(JavaThread::current()->is_Compiler_thread(), "just checking"); ++ assert(entry.rspec().type() == relocInfo::runtime_call_type ++ || entry.rspec().type() == relocInfo::opt_virtual_call_type ++ || entry.rspec().type() == relocInfo::static_call_type ++ || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); ++ ++ address target = entry.target(); ++ if (!reachable_from_cache()) { ++ address stub = emit_trampoline_stub(offset(), target); ++ if (stub == NULL) { ++ return NULL; // CodeCache is full ++ } ++ } ++ ++ if (cbuf) cbuf->set_insts_mark(); ++ relocate(entry.rspec()); ++ ++ if (reachable_from_cache()) { ++ nop(); ++ nop(); ++ nop(); ++ nop(); ++ jal(target); ++ delayed()->nop(); ++ } else { ++ // load the call target from the trampoline stub ++ // branch ++ long dest = (long)pc(); ++ dest += (dest & 0x8000) << 1; ++ lui(T9, dest >> 32); ++ ori(T9, T9, split_low(dest >> 16)); ++ dsll(T9, T9, 16); ++ ld(T9, T9, simm16(split_low(dest))); ++ jalr(T9); ++ delayed()->nop(); ++ } ++ return pc(); ++} ++ ++// Emit a trampoline stub for a call to a target which is too far away. ++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, ++ address dest) { ++ // Max stub size: alignment nop, TrampolineStub. ++ address stub = start_a_stub(NativeInstruction::nop_instruction_size ++ + NativeCallTrampolineStub::instruction_size); ++ if (stub == NULL) { ++ return NULL; // CodeBuffer::expand failed ++ } ++ ++ // Create a trampoline stub relocation which relates this trampoline stub ++ // with the call instruction at insts_call_instruction_offset in the ++ // instructions code-section. ++ align(wordSize); ++ relocate(trampoline_stub_Relocation::spec(code()->insts()->start() ++ + insts_call_instruction_offset)); ++ emit_int64((int64_t)dest); ++ end_a_stub(); ++ return stub; ++} ++ ++void MacroAssembler::beq_far(Register rs, Register rt, address entry) { ++ u_char * cur_pc = pc(); ++ ++ // Near/Far jump ++ if(is_simm16((entry - pc() - 4) / 4)) { ++ Assembler::beq(rs, rt, offset(entry)); ++ } else { ++ Label not_jump; ++ bne(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(entry); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::beq_far(Register rs, Register rt, Label& L) { ++ if (L.is_bound()) { ++ beq_far(rs, rt, target(L)); ++ } else { ++ u_char * cur_pc = pc(); ++ Label not_jump; ++ bne(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(L); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::bne_far(Register rs, Register rt, address entry) { ++ u_char * cur_pc = pc(); ++ ++ //Near/Far jump ++ if(is_simm16((entry - pc() - 4) / 4)) { ++ Assembler::bne(rs, rt, offset(entry)); ++ } else { ++ Label not_jump; ++ beq(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(entry); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::bne_far(Register rs, Register rt, Label& L) { ++ if (L.is_bound()) { ++ bne_far(rs, rt, target(L)); ++ } else { ++ u_char * cur_pc = pc(); ++ Label not_jump; ++ beq(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(L); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::b_far(Label& L) { ++ if (L.is_bound()) { ++ b_far(target(L)); ++ } else { ++ volatile address dest = target(L); ++// ++// MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8 ++// 0x00000055651ed514: daddu at, ra, zero ++// 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520 ++// ++// 0x00000055651ed51c: sll zero, zero, 0 ++// 0x00000055651ed520: lui t9, 0x0 ++// 0x00000055651ed524: ori t9, t9, 0x21b8 ++// 0x00000055651ed528: daddu t9, t9, ra ++// 0x00000055651ed52c: daddu ra, at, zero ++// 0x00000055651ed530: jr t9 ++// 0x00000055651ed534: sll zero, zero, 0 ++// ++ move(AT, RA); ++ emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); ++ nop(); ++ lui(T9, 0); // to be patched ++ ori(T9, T9, 0); ++ daddu(T9, T9, RA); ++ move(RA, AT); ++ jr(T9); ++ } ++} ++ ++void MacroAssembler::b_far(address entry) { ++ u_char * cur_pc = pc(); ++ ++ // Near/Far jump ++ if(is_simm16((entry - pc() - 4) / 4)) { ++ b(offset(entry)); ++ } else { ++ // address must be bounded ++ move(AT, RA); ++ emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); ++ nop(); ++ li32(T9, entry - pc()); ++ daddu(T9, T9, RA); ++ move(RA, AT); ++ jr(T9); ++ } ++} ++ ++void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) { ++ addu_long(AT, base, offset); ++ ld_ptr(rt, AT, 0); ++} ++ ++void MacroAssembler::st_ptr(Register rt, Register base, Register offset) { ++ guarantee(AT != rt, "AT must not equal rt"); ++ addu_long(AT, base, offset); ++ st_ptr(rt, AT, 0); ++} ++ ++Address MacroAssembler::as_Address(AddressLiteral adr) { ++ return Address(adr.target(), adr.rspec()); ++} ++ ++Address MacroAssembler::as_Address(ArrayAddress adr) { ++ return Address::make_array(adr); ++} ++ ++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved). ++void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) { ++ Label again; ++ ++ li(tmp_reg1, counter_addr); ++ bind(again); ++ if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync(); ++ ll(tmp_reg2, tmp_reg1, 0); ++ addiu(tmp_reg2, tmp_reg2, inc); ++ sc(tmp_reg2, tmp_reg1, 0); ++ beq(tmp_reg2, R0, again); ++ delayed()->nop(); ++} ++ ++// Writes to stack successive pages until offset reached to check for ++// stack overflow + shadow pages. This clobbers tmp. ++void MacroAssembler::bang_stack_size(Register size, Register tmp) { ++ assert_different_registers(tmp, size, AT); ++ move(tmp, SP); ++ // Bang stack for total size given plus shadow page size. ++ // Bang one page at a time because large size can bang beyond yellow and ++ // red zones. ++ Label loop; ++ move(AT, os::vm_page_size()); ++ bind(loop); ++ subu(tmp, tmp, AT); ++ subu(size, size, AT); ++ sd(size, tmp, 0); ++ bgtz(size, loop); ++ delayed()->nop(); ++ ++ // Bang down shadow pages too. ++ // At this point, (tmp-0) is the last address touched, so don't ++ // touch it again. (It was touched as (tmp-pagesize) but then tmp ++ // was post-decremented.) Skip this address by starting at i=1, and ++ // touch a few more pages below. N.B. It is important to touch all ++ // the way down to and including i=StackShadowPages. ++ for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { ++ // this could be any sized move but this is can be a debugging crumb ++ // so the bigger the better. ++ subu(tmp, tmp, AT); ++ sd(size, tmp, 0); ++ } ++} ++ ++void MacroAssembler::reserved_stack_check() { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // testing if reserved zone needs to be enabled ++ Label no_reserved_zone_enabling; ++ ++ ld(AT, Address(thread, JavaThread::reserved_stack_activation_offset())); ++ dsubu(AT, SP, AT); ++ bltz(AT, no_reserved_zone_enabling); ++ delayed()->nop(); ++ ++ enter(); // RA and FP are live. ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread); ++ leave(); ++ ++ // We have already removed our own frame. ++ // throw_delayed_StackOverflowError will think that it's been ++ // called by our caller. ++ li(AT, (long)StubRoutines::throw_delayed_StackOverflowError_entry()); ++ jr(AT); ++ delayed()->nop(); ++ should_not_reach_here(); ++ ++ bind(no_reserved_zone_enabling); ++} ++ ++void MacroAssembler::biased_locking_enter(Register lock_reg, ++ Register obj_reg, ++ Register swap_reg, ++ Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, ++ Label* slow_case, ++ BiasedLockingCounters* counters) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ bool need_tmp_reg = false; ++ if (tmp_reg == noreg) { ++ need_tmp_reg = true; ++ tmp_reg = T9; ++ } ++ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT); ++ assert(markWord::age_shift == markWord::lock_bits + markWord::biased_lock_bits, "biased locking makes assumptions about bit layout"); ++ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); ++ Address saved_mark_addr(lock_reg, 0); ++ ++ // Biased locking ++ // See whether the lock is currently biased toward our thread and ++ // whether the epoch is still valid ++ // Note that the runtime guarantees sufficient alignment of JavaThread ++ // pointers to allow age to be placed into low bits ++ // First check to see whether biasing is even enabled for this object ++ Label cas_label; ++ if (!swap_reg_contains_mark) { ++ ld_ptr(swap_reg, mark_addr); ++ } ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ move(tmp_reg, swap_reg); ++ andi(tmp_reg, tmp_reg, markWord::biased_lock_mask_in_place); ++ daddiu(AT, R0, markWord::biased_lock_pattern); ++ dsubu(AT, AT, tmp_reg); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ ++ bne(AT, R0, cas_label); ++ delayed()->nop(); ++ ++ ++ // The bias pattern is present in the object's header. Need to check ++ // whether the bias owner and the epoch are both still current. ++ // Note that because there is no current thread register on MIPS we ++ // need to store off the mark word we read out of the object to ++ // avoid reloading it and needing to recheck invariants below. This ++ // store is unfortunate but it makes the overall code shorter and ++ // simpler. ++ st_ptr(swap_reg, saved_mark_addr); ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ xorr(tmp_reg, tmp_reg, swap_reg); ++#ifndef OPT_THREAD ++ get_thread(swap_reg); ++ xorr(swap_reg, swap_reg, tmp_reg); ++#else ++ xorr(swap_reg, TREG, tmp_reg); ++#endif ++ ++ move(AT, ~((int) markWord::age_mask_in_place)); ++ andr(swap_reg, swap_reg, AT); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(swap_reg, R0, L); ++ delayed()->nop(); ++ push(tmp_reg); ++ push(A0); ++ atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg); ++ pop(A0); ++ pop(tmp_reg); ++ bind(L); ++ } ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ beq(swap_reg, R0, done); ++ delayed()->nop(); ++ Label try_revoke_bias; ++ Label try_rebias; ++ ++ // At this point we know that the header has the bias pattern and ++ // that we are not the bias owner in the current epoch. We need to ++ // figure out more details about the state of the header in order to ++ // know what operations can be legally performed on the object's ++ // header. ++ ++ // If the low three bits in the xor result aren't clear, that means ++ // the prototype header is no longer biased and we have to revoke ++ // the bias on this object. ++ ++ move(AT, markWord::biased_lock_mask_in_place); ++ andr(AT, swap_reg, AT); ++ bne(AT, R0, try_revoke_bias); ++ delayed()->nop(); ++ // Biasing is still enabled for this data type. See whether the ++ // epoch of the current bias is still valid, meaning that the epoch ++ // bits of the mark word are equal to the epoch bits of the ++ // prototype header. (Note that the prototype header's epoch bits ++ // only change at a safepoint.) If not, attempt to rebias the object ++ // toward the current thread. Note that we must be absolutely sure ++ // that the current epoch is invalid in order to do this because ++ // otherwise the manipulations it performs on the mark word are ++ // illegal. ++ ++ move(AT, markWord::epoch_mask_in_place); ++ andr(AT,swap_reg, AT); ++ bne(AT, R0, try_rebias); ++ delayed()->nop(); ++ // The epoch of the current bias is still valid but we know nothing ++ // about the owner; it might be set or it might be clear. Try to ++ // acquire the bias of the object using an atomic operation. If this ++ // fails we will go in to the runtime to revoke the object's bias. ++ // Note that we first construct the presumed unbiased header so we ++ // don't accidentally blow away another thread's valid bias. ++ ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ move(AT, markWord::biased_lock_mask_in_place | markWord::age_mask_in_place | markWord::epoch_mask_in_place); ++ andr(swap_reg, swap_reg, AT); ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++#ifndef OPT_THREAD ++ get_thread(tmp_reg); ++ orr(tmp_reg, tmp_reg, swap_reg); ++#else ++ orr(tmp_reg, TREG, swap_reg); ++#endif ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // If the biasing toward our thread failed, this means that ++ // another thread succeeded in biasing it toward itself and we ++ // need to revoke that bias. The revocation will occur in the ++ // interpreter runtime in the slow case. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ delayed()->nop(); ++ push(tmp_reg); ++ push(A0); ++ atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg); ++ pop(A0); ++ pop(tmp_reg); ++ bind(L); ++ } ++ if (slow_case != NULL) { ++ beq_far(AT, R0, *slow_case); ++ delayed()->nop(); ++ } ++ b(done); ++ delayed()->nop(); ++ ++ bind(try_rebias); ++ // At this point we know the epoch has expired, meaning that the ++ // current "bias owner", if any, is actually invalid. Under these ++ // circumstances _only_, we are allowed to use the current header's ++ // value as the comparison value when doing the cas to acquire the ++ // bias in the current epoch. In other words, we allow transfer of ++ // the bias from one thread to another directly in this situation. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++#ifndef OPT_THREAD ++ get_thread(swap_reg); ++ orr(tmp_reg, tmp_reg, swap_reg); ++#else ++ orr(tmp_reg, tmp_reg, TREG); ++#endif ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // If the biasing toward our thread failed, then another thread ++ // succeeded in biasing it toward itself and we need to revoke that ++ // bias. The revocation will occur in the runtime in the slow case. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ delayed()->nop(); ++ push(AT); ++ push(tmp_reg); ++ atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg); ++ pop(tmp_reg); ++ pop(AT); ++ bind(L); ++ } ++ if (slow_case != NULL) { ++ beq_far(AT, R0, *slow_case); ++ delayed()->nop(); ++ } ++ ++ b(done); ++ delayed()->nop(); ++ bind(try_revoke_bias); ++ // The prototype mark in the klass doesn't have the bias bit set any ++ // more, indicating that objects of this data type are not supposed ++ // to be biased any more. We are going to try to reset the mark of ++ // this object to the prototype value and fall through to the ++ // CAS-based locking scheme. Note that if our CAS fails, it means ++ // that another thread raced us for the privilege of revoking the ++ // bias of this particular object, so it's okay to continue in the ++ // normal locking code. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // Fall through to the normal CAS-based lock, because no matter what ++ // the result of the above CAS, some thread must have succeeded in ++ // removing the bias bit from the object's header. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ delayed()->nop(); ++ push(AT); ++ push(tmp_reg); ++ atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg); ++ pop(tmp_reg); ++ pop(AT); ++ bind(L); ++ } ++ ++ bind(cas_label); ++} ++ ++void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ ++ // Check for biased locking unlock case, which is a no-op ++ // Note: we do not have to check the thread ID for two reasons. ++ // First, the interpreter checks for IllegalMonitorStateException at ++ // a higher level. Second, if the bias was revoked while we held the ++ // lock, the object could not be rebiased toward another thread, so ++ // the bias bit would be clear. ++ ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ andi(temp_reg, temp_reg, markWord::biased_lock_mask_in_place); ++ daddiu(AT, R0, markWord::biased_lock_pattern); ++ ++ beq(AT, temp_reg, done); ++ delayed()->nop(); ++} ++ ++// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf ++// this method will handle the stack problem, you need not to preserve the stack space for the argument now ++void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) { ++ assert(number_of_arguments <= 4, "just check"); ++ assert(StackAlignmentInBytes == 16, "must be"); ++ move(AT, SP); ++ dins(SP, R0, 0, 4); ++ daddiu(SP, SP, -(StackAlignmentInBytes)); ++ call(entry_point, relocInfo::runtime_call_type); ++ delayed()->sd(AT, SP, 0); ++ ld(SP, SP, 0); ++} ++ ++ ++void MacroAssembler::jmp(address entry) { ++ patchable_set48(T9, (long)entry); ++ jr(T9); ++} ++ ++void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) { ++ switch (rtype) { ++ case relocInfo::runtime_call_type: ++ case relocInfo::none: ++ jmp(entry); ++ break; ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rtype); ++ patchable_set48(T9, (long)entry); ++ jr(T9); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::jmp_far(Label& L) { ++ if (L.is_bound()) { ++ address entry = target(L); ++ assert(entry != NULL, "jmp most probably wrong"); ++ InstructionMark im(this); ++ ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(T9, (long)entry); ++ } else { ++ InstructionMark im(this); ++ L.add_patch_at(code(), locator()); ++ ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(T9, (long)pc()); ++ } ++ ++ jr(T9); ++ delayed()->nop(); ++} ++void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { ++ int oop_index; ++ if (obj) { ++ oop_index = oop_recorder()->find_index(obj); ++ } else { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } ++ relocate(metadata_Relocation::spec(oop_index)); ++ patchable_set48(AT, (long)obj); ++ sd(AT, dst); ++} ++ ++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { ++ int oop_index; ++ if (obj) { ++ oop_index = oop_recorder()->find_index(obj); ++ } else { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } ++ relocate(metadata_Relocation::spec(oop_index)); ++ patchable_set48(dst, (long)obj); ++} ++ ++void MacroAssembler::call(address entry) { ++// c/c++ code assume T9 is entry point, so we just always move entry to t9 ++// maybe there is some more graceful method to handle this. FIXME ++// For more info, see class NativeCall. ++ patchable_set48(T9, (long)entry); ++ jalr(T9); ++} ++ ++void MacroAssembler::call(address entry, relocInfo::relocType rtype) { ++ switch (rtype) { ++ case relocInfo::runtime_call_type: ++ case relocInfo::none: ++ call(entry); ++ break; ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rtype); ++ call(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::call(address entry, RelocationHolder& rh) ++{ ++ switch (rh.type()) { ++ case relocInfo::runtime_call_type: ++ case relocInfo::none: ++ call(entry); ++ break; ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rh); ++ call(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::ic_call(address entry, jint method_index) { ++ RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); ++ patchable_set48(IC_Klass, (long)Universe::non_oop_word()); ++ assert(entry != NULL, "call most probably wrong"); ++ InstructionMark im(this); ++ trampoline_call(AddressLiteral(entry, rh)); ++} ++ ++void MacroAssembler::c2bool(Register r) { ++ sltu(r, R0, r); ++} ++ ++#ifndef PRODUCT ++extern "C" void findpc(intptr_t x); ++#endif ++ ++void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) { ++ if ( ShowMessageBoxOnError ) { ++ JavaThreadState saved_state = JavaThread::current()->thread_state(); ++ JavaThread::current()->set_thread_state(_thread_in_vm); ++ { ++ // In order to get locks work, we need to fake a in_VM state ++ ttyLocker ttyl; ++ ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); ++ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { ++ BytecodeCounter::print(); ++ } ++ ++ } ++ ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); ++ } ++ else ++ ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); ++} ++ ++ ++void MacroAssembler::stop(const char* msg) { ++ li(A0, (long)msg); ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ brk(17); ++} ++ ++void MacroAssembler::warn(const char* msg) { ++ pushad(); ++ li(A0, (long)msg); ++ push(S2); ++ move(S2, SP); // use S2 as a sender SP holder ++ assert(StackAlignmentInBytes == 16, "must be"); ++ dins(SP, R0, 0, 4); // align stack as required by ABI ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ move(SP, S2); // use S2 as a sender SP holder ++ pop(S2); ++ popad(); ++} ++ ++void MacroAssembler::increment(Register reg, int imm) { ++ if (!imm) return; ++ if (is_simm16(imm)) { ++ daddiu(reg, reg, imm); ++ } else { ++ move(AT, imm); ++ daddu(reg, reg, AT); ++ } ++} ++ ++void MacroAssembler::decrement(Register reg, int imm) { ++ increment(reg, -imm); ++} ++ ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions) { ++ call_VM_helper(oop_result, entry_point, 0, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ call_VM_helper(oop_result, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ if (arg_2!=A2) move(A2, arg_2); ++ assert(arg_2 != A1, "smashed argument"); ++ call_VM_helper(oop_result, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); ++ call_VM_helper(oop_result, entry_point, 3, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); ++ call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); ++} ++ ++void MacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ ++ address before_call_pc; ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T2; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ // debugging support ++ assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); ++ assert(number_of_arguments <= 4 , "cannot have negative number of arguments"); ++ assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); ++ assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); ++ ++ assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp"); ++ ++ // set last Java frame before call ++ before_call_pc = (address)pc(); ++ set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc); ++ ++ // do the call ++ move(A0, java_thread); ++ call(entry_point, relocInfo::runtime_call_type); ++ delayed()->nop(); ++ ++ // restore the thread (cannot use the pushed argument since arguments ++ // may be overwritten by C code generated by an optimizing compiler); ++ // however can use the register value directly if it is callee saved. ++#ifndef OPT_THREAD ++ get_thread(java_thread); ++#else ++#ifdef ASSERT ++ { ++ Label L; ++ get_thread(AT); ++ beq(java_thread, AT, L); ++ delayed()->nop(); ++ stop("MacroAssembler::call_VM_base: TREG not callee saved?"); ++ bind(L); ++ } ++#endif ++#endif ++ ++ // discard thread and arguments ++ ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // reset last Java frame ++ reset_last_Java_frame(java_thread, false); ++ ++ check_and_handle_popframe(java_thread); ++ check_and_handle_earlyret(java_thread); ++ if (check_exceptions) { ++ // check for pending exceptions (java_thread is set upon return) ++ Label L; ++ ld(AT, java_thread, in_bytes(Thread::pending_exception_offset())); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(AT, (long)before_call_pc); ++ push(AT); ++ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ bind(L); ++ } ++ ++ // get oop result if there is one and reset the value in the thread ++ if (oop_result->is_valid()) { ++ ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); ++ sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset())); ++ verify_oop(oop_result); ++ } ++} ++ ++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { ++ ++ move(V0, SP); ++ //we also reserve space for java_thread here ++ assert(StackAlignmentInBytes == 16, "must be"); ++ dins(SP, R0, 0, 4); ++ call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions); ++ ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { ++ call_VM_leaf_base(entry_point, number_of_arguments); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { ++ if (arg_0 != A0) move(A0, arg_0); ++ call_VM_leaf(entry_point, 1); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { ++ if (arg_0 != A0) move(A0, arg_0); ++ if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); ++ call_VM_leaf(entry_point, 2); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { ++ if (arg_0 != A0) move(A0, arg_0); ++ if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument"); ++ call_VM_leaf(entry_point, 3); ++} ++void MacroAssembler::super_call_VM_leaf(address entry_point) { ++ MacroAssembler::call_VM_leaf_base(entry_point, 0); ++} ++ ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1) { ++ if (arg_1 != A0) move(A0, arg_1); ++ MacroAssembler::call_VM_leaf_base(entry_point, 1); ++} ++ ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1, ++ Register arg_2) { ++ if (arg_1 != A0) move(A0, arg_1); ++ if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); ++ MacroAssembler::call_VM_leaf_base(entry_point, 2); ++} ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3) { ++ if (arg_1 != A0) move(A0, arg_1); ++ if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); ++ if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument"); ++ MacroAssembler::call_VM_leaf_base(entry_point, 3); ++} ++ ++void MacroAssembler::check_and_handle_earlyret(Register java_thread) { ++} ++ ++void MacroAssembler::check_and_handle_popframe(Register java_thread) { ++} ++ ++void MacroAssembler::null_check(Register reg, int offset) { ++ if (needs_explicit_null_check(offset)) { ++ // provoke OS NULL exception if reg = NULL by ++ // accessing M[reg] w/o changing any (non-CC) registers ++ // NOTE: cmpl is plenty here to provoke a segv ++ lw(AT, reg, 0); ++ } else { ++ // nothing to do, (later) access of M[reg + offset] ++ // will provoke OS NULL exception if reg = NULL ++ } ++} ++ ++void MacroAssembler::enter() { ++ push2(RA, FP); ++ move(FP, SP); ++} ++ ++void MacroAssembler::leave() { ++ move(SP, FP); ++ pop2(RA, FP); ++} ++ ++void MacroAssembler::unimplemented(const char* what) { ++ const char* buf = NULL; ++ { ++ ResourceMark rm; ++ stringStream ss; ++ ss.print("unimplemented: %s", what); ++ buf = code_string(ss.as_string()); ++ } ++ stop(buf); ++} ++ ++void MacroAssembler::get_thread(Register thread) { ++#ifdef MINIMIZE_RAM_USAGE ++// ++// In MIPS64, we don't use full 64-bit address space. ++// Only a small range is actually used. ++// ++// Example: ++// $ cat /proc/13352/maps ++// 120000000-120010000 r-xp 00000000 08:01 41077 /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java ++// 12001c000-120020000 rw-p 0000c000 08:01 41077 /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java ++// 120020000-1208dc000 rwxp 00000000 00:00 0 [heap] ++// 555d574000-555d598000 r-xp 00000000 08:01 2073768 /lib/ld-2.12.so ++// 555d598000-555d59c000 rw-p 00000000 00:00 0 ++// ...... ++// 558b1f8000-558b23c000 rwxp 00000000 00:00 0 ++// 558b23c000-558b248000 ---p 00000000 00:00 0 ++// 558b248000-558b28c000 rwxp 00000000 00:00 0 ++// ffff914000-ffff94c000 rwxp 00000000 00:00 0 [stack] ++// ffffffc000-10000000000 r-xp 00000000 00:00 0 [vdso] ++// ++// All stacks are positioned at 0x55________. ++// Therefore, we can utilize the same algorithm used in 32-bit. ++ // int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1); ++ // Thread* thread = _sp_map[index]; ++ Register tmp; ++ ++ if (thread == AT) ++ tmp = T9; ++ else ++ tmp = AT; ++ ++ move(thread, SP); ++ shr(thread, PAGE_SHIFT); ++ ++ push(tmp); ++ li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1)); ++ andr(thread, thread, tmp); ++ shl(thread, Address::times_ptr); // sizeof(Thread *) ++ li48(tmp, (long)ThreadLocalStorage::sp_map_addr()); ++ addu(tmp, tmp, thread); ++ ld_ptr(thread, tmp, 0); ++ pop(tmp); ++#else ++ if (thread != V0) { ++ push(V0); ++ } ++ pushad_except_v0(); ++ ++ push(S5); ++ move(S5, SP); ++ assert(StackAlignmentInBytes == 16, "must be"); ++ dins(SP, R0, 0, 4); ++ call(CAST_FROM_FN_PTR(address, Thread::current)); ++ //MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, Thread::current), 0); ++ delayed()->nop(); ++ move(SP, S5); ++ pop(S5); ++ ++ popad_except_v0(); ++ if (thread != V0) { ++ move(thread, V0); ++ pop(V0); ++ } ++#endif // MINIMIZE_RAM_USAGE ++} ++ ++void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T1; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // we must set sp to zero to clear frame ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // must clear fp, so that compiled frames are not confused; it is possible ++ // that we need it only for debugging ++ if(clear_fp) { ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); ++} ++ ++void MacroAssembler::reset_last_Java_frame(bool clear_fp) { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // we must set sp to zero to clear frame ++ sd(R0, Address(thread, JavaThread::last_Java_sp_offset())); ++ // must clear fp, so that compiled frames are not confused; it is ++ // possible that we need it only for debugging ++ if (clear_fp) { ++ sd(R0, Address(thread, JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ sd(R0, Address(thread, JavaThread::last_Java_pc_offset())); ++} ++ ++void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg) { ++ ld(AT, thread_reg, in_bytes(JavaThread::polling_word_offset())); ++ andi(AT, AT, SafepointMechanism::poll_bit()); ++ bne(AT, R0, slow_path); ++ delayed()->nop(); ++} ++ ++// Just like safepoint_poll, but use an acquiring load for thread- ++// local polling. ++// ++// We need an acquire here to ensure that any subsequent load of the ++// global SafepointSynchronize::_state flag is ordered after this load ++// of the local Thread::_polling page. We don't want this poll to ++// return false (i.e. not safepointing) and a later poll of the global ++// SafepointSynchronize::_state spuriously to return true. ++// ++// This is to avoid a race when we're in a native->Java transition ++// racing the code which wakes up from a safepoint. ++// ++void MacroAssembler::safepoint_poll_acquire(Label& slow_path, Register thread_reg) { ++ ld(AT, thread_reg, in_bytes(JavaThread::polling_word_offset())); ++ sync(); ++ andi(AT, AT, SafepointMechanism::poll_bit()); ++ bne(AT, R0, slow_path); ++ delayed()->nop(); ++} ++ ++// Calls to C land ++// ++// When entering C land, the fp, & sp of the last Java frame have to be recorded ++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp ++// has to be reset to 0. This is required to allow proper stack traversal. ++void MacroAssembler::set_last_Java_frame(Register java_thread, ++ Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T2; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ ++ // last_java_fp is optional ++ if (last_java_fp->is_valid()) { ++ st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // last_java_pc is optional ++ if (last_java_pc != NULL) { ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(AT, (long)last_java_pc); ++ st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ } ++ st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++} ++ ++void MacroAssembler::set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc) { ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // last_java_fp is optional ++ if (last_java_fp->is_valid()) { ++ sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset())); ++ } ++ ++ // last_java_pc is optional ++ if (last_java_pc != NULL) { ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(AT, (long)last_java_pc); ++ st_ptr(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ } ++ ++ sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset())); ++} ++ ++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. ++void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1, Register t2, Label& slow_case) { ++ Unimplemented(); ++ //BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ //bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1, Register t2, Label& slow_case) { ++ Unimplemented(); ++ //assert_different_registers(obj, var_size_in_bytes, t1, AT); ++ //BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ //bs->eden_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); ++} ++ ++void MacroAssembler::incr_allocated_bytes(Register thread, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1) { ++ if (!thread->is_valid()) { ++#ifndef OPT_THREAD ++ assert(t1->is_valid(), "need temp reg"); ++ thread = t1; ++ get_thread(thread); ++#else ++ thread = TREG; ++#endif ++ } ++ ++ ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); ++ if (var_size_in_bytes->is_valid()) { ++ addu(AT, AT, var_size_in_bytes); ++ } else { ++ addiu(AT, AT, con_size_in_bytes); ++ } ++ st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); ++} ++ ++void MacroAssembler::li(Register rd, long imm) { ++ if (imm <= max_jint && imm >= min_jint) { ++ li32(rd, (int)imm); ++ } else if (julong(imm) <= 0xFFFFFFFF) { ++ assert_not_delayed(); ++ // lui sign-extends, so we can't use that. ++ ori(rd, R0, julong(imm) >> 16); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm)); ++ } else if ((imm > 0) && is_simm16(imm >> 32)) { ++ // A 48-bit address ++ li48(rd, imm); ++ } else { ++ li64(rd, imm); ++ } ++} ++ ++void MacroAssembler::li32(Register reg, int imm) { ++ if (is_simm16(imm)) { ++ addiu(reg, R0, imm); ++ } else { ++ lui(reg, split_low(imm >> 16)); ++ if (split_low(imm)) ++ ori(reg, reg, split_low(imm)); ++ } ++} ++ ++void MacroAssembler::set64(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ } else { ++ lui(d, split_low(value >> 16)); ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ ori(d, R0, julong(value) >> 16); ++ dsll(d, d, 16); ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ } ++ } else if ((value> 0) && is_simm16(value >> 32)) { // li48 ++ // 4 insts ++ li48(d, value); ++ } else { // li64 ++ // 6 insts ++ li64(d, value); ++ } ++} ++ ++ ++int MacroAssembler::insts_for_set64(jlong value) { ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ //daddiu(d, R0, value); ++ count++; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ count++; ++ if (split_low(value)) { ++ //ori(d, d, split_low(value)); ++ count++; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ //ori(d, R0, julong(value) >> 16); ++ //dsll(d, d, 16); ++ count += 2; ++ if (split_low(value)) { ++ //ori(d, d, split_low(value)); ++ count++; ++ } ++ } else if ((value> 0) && is_simm16(value >> 32)) { // li48 ++ // 4 insts ++ //li48(d, value); ++ count += 4; ++ } else { // li64 ++ // 6 insts ++ //li64(d, value); ++ count += 6; ++ } ++ ++ return count; ++} ++ ++void MacroAssembler::patchable_set48(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ count += 1; ++ } else { ++ lui(d, split_low(value >> 16)); ++ count += 1; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ ori(d, R0, julong(value) >> 16); ++ dsll(d, d, 16); ++ count += 2; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && is_simm16(value >> 32)) { // li48 ++ // 4 insts ++ li48(d, value); ++ count += 4; ++ } else { // li64 ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ nop(); ++ count++; ++ } ++} ++ ++void MacroAssembler::patchable_set32(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ count += 1; ++ } else { ++ lui(d, split_low(value >> 16)); ++ count += 1; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ ori(d, R0, julong(value) >> 16); ++ dsll(d, d, 16); ++ count += 2; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } else { ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 3) { ++ nop(); ++ count++; ++ } ++} ++ ++void MacroAssembler::patchable_call32(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ count += 1; ++ } else { ++ lui(d, split_low(value >> 16)); ++ count += 1; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } ++ } else { ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 2) { ++ nop(); ++ count++; ++ } ++} ++ ++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { ++ assert(UseCompressedClassPointers, "should only be used for compressed header"); ++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ ++ int klass_index = oop_recorder()->find_index(k); ++ RelocationHolder rspec = metadata_Relocation::spec(klass_index); ++ long narrowKlass = (long)CompressedKlassPointers::encode(k); ++ ++ relocate(rspec, Assembler::narrow_oop_operand); ++ patchable_set48(dst, narrowKlass); ++} ++ ++ ++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { ++ assert(UseCompressedOops, "should only be used for compressed header"); ++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ ++ int oop_index = oop_recorder()->find_index(obj); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ ++ relocate(rspec, Assembler::narrow_oop_operand); ++ patchable_set48(dst, oop_index); ++} ++ ++// ((OopHandle)result).resolve(); ++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { ++ // OopHandle::resolve is an indirection. ++ access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, NOREG); ++} ++ ++void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { ++ // get mirror ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ ld_ptr(mirror, method, in_bytes(Method::const_offset())); ++ ld_ptr(mirror, mirror, in_bytes(ConstMethod::constants_offset())); ++ ld_ptr(mirror, mirror, ConstantPool::pool_holder_offset_in_bytes()); ++ ld_ptr(mirror, mirror, mirror_offset); ++ resolve_oop_handle(mirror, tmp); ++} ++ ++void MacroAssembler::li64(Register rd, long imm) { ++ assert_not_delayed(); ++ lui(rd, split_low(imm >> 48)); ++ ori(rd, rd, split_low(imm >> 32)); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm >> 16)); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm)); ++} ++ ++void MacroAssembler::li48(Register rd, long imm) { ++ assert_not_delayed(); ++ assert(is_simm16(imm >> 32), "Not a 48-bit address"); ++ lui(rd, imm >> 32); ++ ori(rd, rd, split_low(imm >> 16)); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm)); ++} ++ ++void MacroAssembler::verify_oop(Register reg, const char* s) { ++ if (!VerifyOops) return; ++ const char * b = NULL; ++ stringStream ss; ++ ss.print("verify_oop: %s: %s", reg->name(), s); ++ b = code_string(ss.as_string()); ++ pushad(); ++ move(A1, reg); ++ li(A0, (long)b); ++ li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); ++ ld(T9, AT, 0); ++ jalr(T9); ++ delayed()->nop(); ++ popad(); ++} ++ ++ ++void MacroAssembler::verify_oop_addr(Address addr, const char* s) { ++ if (!VerifyOops) { ++ nop(); ++ return; ++ } ++ // Pass register number to verify_oop_subroutine ++ const char * b = NULL; ++ stringStream ss; ++ ss.print("verify_oop_addr: %s", s); ++ b = code_string(ss.as_string()); ++ ++ addiu(SP, SP, - 7 * wordSize); ++ st_ptr(T0, SP, 6 * wordSize); ++ st_ptr(T1, SP, 5 * wordSize); ++ st_ptr(RA, SP, 4 * wordSize); ++ st_ptr(A0, SP, 3 * wordSize); ++ st_ptr(A1, SP, 2 * wordSize); ++ st_ptr(AT, SP, 1 * wordSize); ++ st_ptr(T9, SP, 0); ++ ++ // addr may contain sp so we will have to adjust it based on the ++ // pushes that we just did. ++ if (addr.uses(SP)) { ++ lea(A1, addr); ++ ld_ptr(A1, Address(A1, 7 * wordSize)); ++ } else { ++ ld_ptr(A1, addr); ++ } ++ li(A0, (long)b); ++ // call indirectly to solve generation ordering problem ++ li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); ++ ld_ptr(T9, AT, 0); ++ jalr(T9); ++ delayed()->nop(); ++ ld_ptr(T0, SP, 6* wordSize); ++ ld_ptr(T1, SP, 5* wordSize); ++ ld_ptr(RA, SP, 4* wordSize); ++ ld_ptr(A0, SP, 3* wordSize); ++ ld_ptr(A1, SP, 2* wordSize); ++ ld_ptr(AT, SP, 1* wordSize); ++ ld_ptr(T9, SP, 0* wordSize); ++ addiu(SP, SP, 7 * wordSize); ++} ++ ++// used registers : T0, T1 ++void MacroAssembler::verify_oop_subroutine() { ++ // RA: ra ++ // A0: char* error message ++ // A1: oop object to verify ++ ++ Label exit, error; ++ // increment counter ++ li(T0, (long)StubRoutines::verify_oop_count_addr()); ++ lw(AT, T0, 0); ++ daddiu(AT, AT, 1); ++ sw(AT, T0, 0); ++ ++ // make sure object is 'reasonable' ++ beq(A1, R0, exit); // if obj is NULL it is ok ++ delayed()->nop(); ++ ++ // Check if the oop is in the right area of memory ++ // const int oop_mask = Universe::verify_oop_mask(); ++ // const int oop_bits = Universe::verify_oop_bits(); ++ const uintptr_t oop_mask = Universe::verify_oop_mask(); ++ const uintptr_t oop_bits = Universe::verify_oop_bits(); ++ li(AT, oop_mask); ++ andr(T0, A1, AT); ++ li(AT, oop_bits); ++ bne(T0, AT, error); ++ delayed()->nop(); ++ ++ // make sure klass is 'reasonable' ++ // add for compressedoops ++ reinit_heapbase(); ++ // add for compressedoops ++ load_klass(T0, A1); ++ beq(T0, R0, error); // if klass is NULL it is broken ++ delayed()->nop(); ++ // return if everything seems ok ++ bind(exit); ++ ++ jr(RA); ++ delayed()->nop(); ++ ++ // handle errors ++ bind(error); ++ pushad(); ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ popad(); ++ jr(RA); ++ delayed()->nop(); ++} ++ ++void MacroAssembler::verify_tlab(Register t1, Register t2) { ++#ifdef ASSERT ++ assert_different_registers(t1, t2, AT); ++ if (UseTLAB && VerifyOops) { ++ Label next, ok; ++ ++ get_thread(t1); ++ ++ ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset())); ++ ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset())); ++ sltu(AT, t2, AT); ++ beq(AT, R0, next); ++ delayed()->nop(); ++ ++ stop("assert(top >= start)"); ++ ++ bind(next); ++ ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset())); ++ sltu(AT, AT, t2); ++ beq(AT, R0, ok); ++ delayed()->nop(); ++ ++ stop("assert(top <= end)"); ++ ++ bind(ok); ++ ++ } ++#endif ++} ++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset) { ++ intptr_t value = *delayed_value_addr; ++ if (value != 0) ++ return RegisterOrConstant(value + offset); ++ Unimplemented(); ++ //AddressLiteral a(delayed_value_addr); ++ // load indirectly to solve generation ordering problem ++ //movptr(tmp, ExternalAddress((address) delayed_value_addr)); ++ //ld(tmp, a); ++ if (offset != 0) ++ daddiu(tmp,tmp, offset); ++ ++ return RegisterOrConstant(tmp); ++} ++ ++void MacroAssembler::hswap(Register reg) { ++ //short ++ //andi(reg, reg, 0xffff); ++ srl(AT, reg, 8); ++ sll(reg, reg, 24); ++ sra(reg, reg, 16); ++ orr(reg, reg, AT); ++} ++ ++void MacroAssembler::huswap(Register reg) { ++ dsrl(AT, reg, 8); ++ dsll(reg, reg, 24); ++ dsrl(reg, reg, 16); ++ orr(reg, reg, AT); ++ andi(reg, reg, 0xffff); ++} ++ ++// something funny to do this will only one more register AT ++// 32 bits ++void MacroAssembler::swap(Register reg) { ++ srl(AT, reg, 8); ++ sll(reg, reg, 24); ++ orr(reg, reg, AT); ++ //reg : 4 1 2 3 ++ srl(AT, AT, 16); ++ xorr(AT, AT, reg); ++ andi(AT, AT, 0xff); ++ //AT : 0 0 0 1^3); ++ xorr(reg, reg, AT); ++ //reg : 4 1 2 1 ++ sll(AT, AT, 16); ++ xorr(reg, reg, AT); ++ //reg : 4 3 2 1 ++} ++ ++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, ++ Register resflag, bool retold, bool barrier, ++ bool weak, bool exchange) { ++ assert(oldval != resflag, "oldval != resflag"); ++ assert(newval != resflag, "newval != resflag"); ++ assert(addr.base() != resflag, "addr.base() != resflag"); ++ Label again, succ, fail; ++ ++ bind(again); ++ lld(resflag, addr); ++ bne(resflag, oldval, fail); ++ delayed()->nop(); ++ move(resflag, newval); ++ scd(resflag, addr); ++ if (weak) { ++ b(succ); ++ } else { ++ beq(resflag, R0, again); ++ } ++ delayed()->nop(); ++ if (exchange) { ++ move(resflag, oldval); ++ } ++ b(succ); ++ delayed()->nop(); ++ ++ bind(fail); ++ if (barrier) ++ sync(); ++ if (retold && oldval != R0) ++ move(oldval, resflag); ++ if (!exchange) { ++ move(resflag, R0); ++ } ++ bind(succ); ++} ++ ++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, ++ Register tmp, bool retold, bool barrier, Label& succ, Label* fail) { ++ assert(oldval != tmp, "oldval != tmp"); ++ assert(newval != tmp, "newval != tmp"); ++ Label again, neq; ++ ++ bind(again); ++ lld(tmp, addr); ++ bne(tmp, oldval, neq); ++ delayed()->nop(); ++ move(tmp, newval); ++ scd(tmp, addr); ++ beq(tmp, R0, again); ++ delayed()->nop(); ++ b(succ); ++ delayed()->nop(); ++ ++ bind(neq); ++ if (barrier) ++ sync(); ++ if (retold && oldval != R0) ++ move(oldval, tmp); ++ if (fail) { ++ b(*fail); ++ delayed()->nop(); ++ } ++} ++ ++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, ++ Register resflag, bool sign, bool retold, bool barrier, ++ bool weak, bool exchange) { ++ assert(oldval != resflag, "oldval != resflag"); ++ assert(newval != resflag, "newval != resflag"); ++ assert(addr.base() != resflag, "addr.base() != resflag"); ++ Label again, succ, fail; ++ ++ bind(again); ++ ll(resflag, addr); ++ if (!sign) ++ dinsu(resflag, R0, 32, 32); ++ bne(resflag, oldval, fail); ++ delayed()->nop(); ++ move(resflag, newval); ++ sc(resflag, addr); ++ if (weak) { ++ b(succ); ++ } else { ++ beq(resflag, R0, again); ++ } ++ delayed()->nop(); ++ if (exchange) { ++ move(resflag, oldval); ++ } ++ b(succ); ++ delayed()->nop(); ++ ++ bind(fail); ++ if (barrier) ++ sync(); ++ if (retold && oldval != R0) ++ move(oldval, resflag); ++ if (!exchange) { ++ move(resflag, R0); ++ } ++ bind(succ); ++} ++ ++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, ++ bool sign, bool retold, bool barrier, Label& succ, Label* fail) { ++ assert(oldval != tmp, "oldval != tmp"); ++ assert(newval != tmp, "newval != tmp"); ++ Label again, neq; ++ ++ bind(again); ++ ll(tmp, addr); ++ if (!sign) ++ dinsu(tmp, R0, 32, 32); ++ bne(tmp, oldval, neq); ++ delayed()->nop(); ++ move(tmp, newval); ++ sc(tmp, addr); ++ beq(tmp, R0, again); ++ delayed()->nop(); ++ b(succ); ++ delayed()->nop(); ++ ++ bind(neq); ++ if (barrier) ++ sync(); ++ if (retold && oldval != R0) ++ move(oldval, tmp); ++ if (fail) { ++ b(*fail); ++ delayed()->nop(); ++ } ++} ++ ++void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) { ++ Label done, again, nequal; ++ ++ Register x_reg = x_regLo; ++ dsll32(x_regHi, x_regHi, 0); ++ dsll32(x_regLo, x_regLo, 0); ++ dsrl32(x_regLo, x_regLo, 0); ++ orr(x_reg, x_regLo, x_regHi); ++ ++ Register c_reg = c_regLo; ++ dsll32(c_regHi, c_regHi, 0); ++ dsll32(c_regLo, c_regLo, 0); ++ dsrl32(c_regLo, c_regLo, 0); ++ orr(c_reg, c_regLo, c_regHi); ++ ++ bind(again); ++ ++ if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync(); ++ lld(AT, dest); ++ bne(AT, c_reg, nequal); ++ delayed()->nop(); ++ ++ //move(AT, x_reg); ++ daddu(AT, x_reg, R0); ++ scd(AT, dest); ++ beq(AT, R0, again); ++ delayed()->nop(); ++ b(done); ++ delayed()->nop(); ++ ++ // not xchged ++ bind(nequal); ++ sync(); ++ //move(c_reg, AT); ++ //move(AT, R0); ++ daddu(c_reg, AT, R0); ++ daddu(AT, R0, R0); ++ bind(done); ++} ++ ++// be sure the three register is different ++void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { ++ assert_different_registers(tmp, fs, ft); ++ div_s(tmp, fs, ft); ++ trunc_l_s(tmp, tmp); ++ cvt_s_l(tmp, tmp); ++ mul_s(tmp, tmp, ft); ++ sub_s(fd, fs, tmp); ++} ++ ++// be sure the three register is different ++void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { ++ assert_different_registers(tmp, fs, ft); ++ div_d(tmp, fs, ft); ++ trunc_l_d(tmp, tmp); ++ cvt_d_l(tmp, tmp); ++ mul_d(tmp, tmp, ft); ++ sub_d(fd, fs, tmp); ++} ++ ++void MacroAssembler::align(int modulus) { ++ while (offset() % modulus != 0) nop(); ++} ++ ++ ++void MacroAssembler::verify_FPU(int stack_depth, const char* s) { ++ //Unimplemented(); ++} ++ ++Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; ++Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; ++ ++//In MIPS64, F0~23 are all caller-saved registers ++FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13}; ++ ++// We preserve all caller-saved register ++void MacroAssembler::pushad(){ ++ int i; ++ ++ // Fixed-point registers ++ int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) ++ { ++ sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ ++ // Floating-point registers ++ len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) ++ { ++ sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++}; ++ ++void MacroAssembler::popad(){ ++ int i; ++ ++ // Floating-point registers ++ int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ for (i = 0; i < len; i++) ++ { ++ ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++ ++ // Fixed-point registers ++ len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); ++ for (i = 0; i < len; i++) ++ { ++ ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++}; ++ ++// We preserve all caller-saved register except V0 ++void MacroAssembler::pushad_except_v0() { ++ int i; ++ ++ // Fixed-point registers ++ int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ sd(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); ++ } ++ ++ // Floating-point registers ++ len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++} ++ ++void MacroAssembler::popad_except_v0() { ++ int i; ++ ++ // Floating-point registers ++ int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ for (i = 0; i < len; i++) { ++ ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++ ++ // Fixed-point registers ++ len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); ++ for (i = 0; i < len; i++) { ++ ld(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++} ++ ++void MacroAssembler::push2(Register reg1, Register reg2) { ++ daddiu(SP, SP, -16); ++ sd(reg1, SP, 8); ++ sd(reg2, SP, 0); ++} ++ ++void MacroAssembler::pop2(Register reg1, Register reg2) { ++ ld(reg1, SP, 8); ++ ld(reg2, SP, 0); ++ daddiu(SP, SP, 16); ++} ++ ++void MacroAssembler::load_method_holder(Register holder, Register method) { ++ ld(holder, Address(method, Method::const_offset())); // ConstMethod* ++ ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* ++ ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass* ++} ++ ++// for UseCompressedOops Option ++void MacroAssembler::load_klass(Register dst, Register src) { ++ if(UseCompressedClassPointers){ ++ lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); ++ decode_klass_not_null(dst); ++ } else ++ ld(dst, src, oopDesc::klass_offset_in_bytes()); ++} ++ ++void MacroAssembler::store_klass(Register dst, Register src) { ++ if(UseCompressedClassPointers){ ++ encode_klass_not_null(src); ++ sw(src, dst, oopDesc::klass_offset_in_bytes()); ++ } else { ++ sd(src, dst, oopDesc::klass_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::load_prototype_header(Register dst, Register src) { ++ load_klass(dst, src); ++ ld(dst, Address(dst, Klass::prototype_header_offset())); ++} ++ ++void MacroAssembler::store_klass_gap(Register dst, Register src) { ++ if (UseCompressedClassPointers) { ++ sw(src, dst, oopDesc::klass_gap_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, ++ Register tmp1, Register thread_tmp) { ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } else { ++ bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } ++} ++ ++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, ++ Register tmp1, Register tmp2) { ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2); ++ } else { ++ bs->store_at(this, decorators, type, dst, src, tmp1, tmp2); ++ } ++} ++ ++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); ++} ++ ++// Doesn't do verfication, generates fixed size code ++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp); ++} ++ ++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, ++ Register tmp2, DecoratorSet decorators) { ++ access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); ++} ++ ++// Used for storing NULLs. ++void MacroAssembler::store_heap_oop_null(Address dst) { ++ access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); ++} ++ ++#ifdef ASSERT ++void MacroAssembler::verify_heapbase(const char* msg) { ++ assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++} ++#endif ++ ++ ++// Algorithm must match oop.inline.hpp encode_heap_oop. ++void MacroAssembler::encode_heap_oop(Register r) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); ++#endif ++ verify_oop(r, "broken oop in encode_heap_oop"); ++ if (CompressedOops::base() == NULL) { ++ if (CompressedOops::shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++ return; ++ } ++ ++ movz(r, S5_heapbase, r); ++ dsubu(r, r, S5_heapbase); ++ if (CompressedOops::shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_heap_oop(Register dst, Register src) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); ++#endif ++ verify_oop(src, "broken oop in encode_heap_oop"); ++ if (CompressedOops::base() == NULL) { ++ if (CompressedOops::shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ dsrl(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) move(dst, src); ++ } ++ } else { ++ if (dst == src) { ++ movz(dst, S5_heapbase, dst); ++ dsubu(dst, dst, S5_heapbase); ++ if (CompressedOops::shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ dsubu(dst, src, S5_heapbase); ++ if (CompressedOops::shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++ movz(dst, R0, src); ++ } ++ } ++} ++ ++void MacroAssembler::encode_heap_oop_not_null(Register r) { ++ assert (UseCompressedOops, "should be compressed"); ++#ifdef ASSERT ++ if (CheckCompressedOops) { ++ Label ok; ++ bne(r, R0, ok); ++ delayed()->nop(); ++ stop("null oop passed to encode_heap_oop_not_null"); ++ bind(ok); ++ } ++#endif ++ verify_oop(r, "broken oop in encode_heap_oop_not_null"); ++ if (CompressedOops::base() != NULL) { ++ dsubu(r, r, S5_heapbase); ++ } ++ if (CompressedOops::shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++ ++} ++ ++void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { ++ assert (UseCompressedOops, "should be compressed"); ++#ifdef ASSERT ++ if (CheckCompressedOops) { ++ Label ok; ++ bne(src, R0, ok); ++ delayed()->nop(); ++ stop("null oop passed to encode_heap_oop_not_null2"); ++ bind(ok); ++ } ++#endif ++ verify_oop(src, "broken oop in encode_heap_oop_not_null2"); ++ ++ if (CompressedOops::base() != NULL) { ++ dsubu(dst, src, S5_heapbase); ++ if (CompressedOops::shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ if (CompressedOops::shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ dsrl(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) move(dst, src); ++ } ++ } ++} ++ ++void MacroAssembler::decode_heap_oop(Register r) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); ++#endif ++ if (CompressedOops::base() == NULL) { ++ if (CompressedOops::shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ shl(r, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ move(AT, r); ++ if (CompressedOops::shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ shl(r, LogMinObjAlignmentInBytes); ++ } ++ daddu(r, r, S5_heapbase); ++ movz(r, R0, AT); ++ } ++ verify_oop(r, "broken oop in decode_heap_oop"); ++} ++ ++void MacroAssembler::decode_heap_oop(Register dst, Register src) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); ++#endif ++ if (CompressedOops::base() == NULL) { ++ if (CompressedOops::shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ if (dst != src) nop(); // DON'T DELETE THIS GUY. ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) move(dst, src); ++ } ++ } else { ++ if (dst == src) { ++ move(AT, dst); ++ if (CompressedOops::shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ shl(dst, LogMinObjAlignmentInBytes); ++ } ++ daddu(dst, dst, S5_heapbase); ++ movz(dst, R0, AT); ++ } else { ++ if (CompressedOops::shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ daddu(dst, dst, S5_heapbase); ++ } else { ++ daddu(dst, src, S5_heapbase); ++ } ++ movz(dst, R0, src); ++ } ++ } ++ verify_oop(dst, "broken oop in decode_heap_oop"); ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register r) { ++ // Note: it will change flags ++ assert (UseCompressedOops, "should only be used for compressed headers"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (CompressedOops::shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ shl(r, LogMinObjAlignmentInBytes); ++ if (CompressedOops::base() != NULL) { ++ daddu(r, r, S5_heapbase); ++ } ++ } else { ++ assert (CompressedOops::base() == NULL, "sanity"); ++ } ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { ++ assert (UseCompressedOops, "should only be used for compressed headers"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++ ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ //lea(dst, Address(S5_heapbase, src, Address::times_8, 0)); ++ if (CompressedOops::shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ if (LogMinObjAlignmentInBytes == Address::times_8) { ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ daddu(dst, dst, S5_heapbase); ++ } else { ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ if (CompressedOops::base() != NULL) { ++ daddu(dst, dst, S5_heapbase); ++ } ++ } ++ } else { ++ assert (CompressedOops::base() == NULL, "sanity"); ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++} ++ ++void MacroAssembler::encode_klass_not_null(Register r) { ++ if (CompressedKlassPointers::base() != NULL) { ++ assert(r != AT, "Encoding a klass in AT"); ++ set64(AT, (int64_t)CompressedKlassPointers::base()); ++ dsubu(r, r, AT); ++ } ++ if (CompressedKlassPointers::shift() != 0) { ++ assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ shr(r, LogKlassAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_klass_not_null(Register dst, Register src) { ++ if (dst == src) { ++ encode_klass_not_null(src); ++ } else { ++ if (CompressedKlassPointers::base() != NULL) { ++ set64(dst, (int64_t)CompressedKlassPointers::base()); ++ dsubu(dst, src, dst); ++ if (CompressedKlassPointers::shift() != 0) { ++ assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ shr(dst, LogKlassAlignmentInBytes); ++ } ++ } else { ++ if (CompressedKlassPointers::shift() != 0) { ++ assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ dsrl(dst, src, LogKlassAlignmentInBytes); ++ } else { ++ move(dst, src); ++ } ++ } ++ } ++} ++ ++// Function instr_size_for_decode_klass_not_null() counts the instructions ++// generated by decode_klass_not_null(register r) and reinit_heapbase(), ++// when (Universe::heap() != NULL). Hence, if the instructions they ++// generate change, then this method needs to be updated. ++int MacroAssembler::instr_size_for_decode_klass_not_null() { ++ assert (UseCompressedClassPointers, "only for compressed klass ptrs"); ++ if (CompressedKlassPointers::base() != NULL) { ++ // mov64 + addq + shlq? + mov64 (for reinit_heapbase()). ++ return (CompressedKlassPointers::shift() == 0 ? 4 * 9 : 4 * 10); ++ } else { ++ // longest load decode klass function, mov64, leaq ++ return (CompressedKlassPointers::shift() == 0 ? 4 * 0 : 4 * 1); ++ } ++} ++ ++void MacroAssembler::decode_klass_not_null(Register r) { ++ assert (UseCompressedClassPointers, "should only be used for compressed headers"); ++ assert(r != AT, "Decoding a klass in AT"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (CompressedKlassPointers::shift() != 0) { ++ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ shl(r, LogKlassAlignmentInBytes); ++ } ++ if (CompressedKlassPointers::base() != NULL) { ++ set64(AT, (int64_t)CompressedKlassPointers::base()); ++ daddu(r, r, AT); ++ //Not neccessary for MIPS at all. ++ //reinit_heapbase(); ++ } ++} ++ ++void MacroAssembler::decode_klass_not_null(Register dst, Register src) { ++ assert (UseCompressedClassPointers, "should only be used for compressed headers"); ++ ++ if (dst == src) { ++ decode_klass_not_null(dst); ++ } else { ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ set64(dst, (int64_t)CompressedKlassPointers::base()); ++ if (CompressedKlassPointers::shift() != 0) { ++ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); ++ dsll(AT, src, Address::times_8); ++ daddu(dst, dst, AT); ++ } else { ++ daddu(dst, src, dst); ++ } ++ } ++} ++ ++void MacroAssembler::incrementl(Register reg, int value) { ++ if (value == min_jint) { ++ move(AT, value); ++ addu32(reg, reg, AT); ++ return; ++ } ++ if (value < 0) { decrementl(reg, -value); return; } ++ if (value == 0) { ; return; } ++ ++ move(AT, value); ++ addu32(reg, reg, AT); ++} ++ ++void MacroAssembler::decrementl(Register reg, int value) { ++ if (value == min_jint) { ++ move(AT, value); ++ subu32(reg, reg, AT); ++ return; ++ } ++ if (value < 0) { incrementl(reg, -value); return; } ++ if (value == 0) { ; return; } ++ ++ move(AT, value); ++ subu32(reg, reg, AT); ++} ++ ++void MacroAssembler::reinit_heapbase() { ++ if (UseCompressedOops || UseCompressedClassPointers) { ++ if (Universe::heap() != NULL) { ++ if (CompressedOops::base() == NULL) { ++ move(S5_heapbase, R0); ++ } else { ++ set64(S5_heapbase, (int64_t)CompressedOops::ptrs_base()); ++ } ++ } else { ++ set64(S5_heapbase, (intptr_t)CompressedOops::ptrs_base_addr()); ++ ld(S5_heapbase, S5_heapbase, 0); ++ } ++ } ++} ++ ++void MacroAssembler::check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label& L_success) { ++//implement ind gen_subtype_check ++ Label L_failure; ++ check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); ++ check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); ++ bind(L_failure); ++} ++ ++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ RegisterOrConstant super_check_offset) { ++ assert_different_registers(sub_klass, super_klass, temp_reg); ++ bool must_load_sco = (super_check_offset.constant_or_zero() == -1); ++ if (super_check_offset.is_register()) { ++ assert_different_registers(sub_klass, super_klass, ++ super_check_offset.as_register()); ++ } else if (must_load_sco) { ++ assert(temp_reg != noreg, "supply either a temp or a register offset"); ++ } ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ // If the pointers are equal, we are done (e.g., String[] elements). ++ // This self-check enables sharing of secondary supertype arrays among ++ // non-primary types such as array-of-interface. Otherwise, each such ++ // type would need its own customized SSA. ++ // We move this check to the front of the fast path because many ++ // type checks are in fact trivially successful in this manner, ++ // so we get a nicely predicted branch right at the start of the check. ++ beq(sub_klass, super_klass, *L_success); ++ delayed()->nop(); ++ // Check the supertype display: ++ if (must_load_sco) { ++ lwu(temp_reg, super_klass, sco_offset); ++ super_check_offset = RegisterOrConstant(temp_reg); ++ } ++ daddu(AT, sub_klass, super_check_offset.register_or_noreg()); ++ ld(AT, AT, super_check_offset.constant_or_zero()); ++ ++ // This check has worked decisively for primary supers. ++ // Secondary supers are sought in the super_cache ('super_cache_addr'). ++ // (Secondary supers are interfaces and very deeply nested subtypes.) ++ // This works in the same check above because of a tricky aliasing ++ // between the super_cache and the primary super display elements. ++ // (The 'super_check_addr' can address either, as the case requires.) ++ // Note that the cache is updated below if it does not help us find ++ // what we need immediately. ++ // So if it was a primary super, we can just fail immediately. ++ // Otherwise, it's the slow path for us (no success at this point). ++ ++ if (super_check_offset.is_register()) { ++ beq(super_klass, AT, *L_success); ++ delayed()->nop(); ++ addiu(AT, super_check_offset.as_register(), -sc_offset); ++ if (L_failure == &L_fallthrough) { ++ beq(AT, R0, *L_slow_path); ++ delayed()->nop(); ++ } else { ++ bne_far(AT, R0, *L_failure); ++ delayed()->nop(); ++ b(*L_slow_path); ++ delayed()->nop(); ++ } ++ } else if (super_check_offset.as_constant() == sc_offset) { ++ // Need a slow path; fast failure is impossible. ++ if (L_slow_path == &L_fallthrough) { ++ beq(super_klass, AT, *L_success); ++ delayed()->nop(); ++ } else { ++ bne(super_klass, AT, *L_slow_path); ++ delayed()->nop(); ++ b(*L_success); ++ delayed()->nop(); ++ } ++ } else { ++ // No slow path; it's a fast decision. ++ if (L_failure == &L_fallthrough) { ++ beq(super_klass, AT, *L_success); ++ delayed()->nop(); ++ } else { ++ bne_far(super_klass, AT, *L_failure); ++ delayed()->nop(); ++ b(*L_success); ++ delayed()->nop(); ++ } ++ } ++ ++ bind(L_fallthrough); ++ ++} ++ ++ ++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Register temp2_reg, ++ Label* L_success, ++ Label* L_failure, ++ bool set_cond_codes) { ++ if (temp2_reg == noreg) ++ temp2_reg = TSR; ++ assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); ++#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ ++ // a couple of useful fields in sub_klass: ++ int ss_offset = in_bytes(Klass::secondary_supers_offset()); ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ Address secondary_supers_addr(sub_klass, ss_offset); ++ Address super_cache_addr( sub_klass, sc_offset); ++ ++ // Do a linear scan of the secondary super-klass chain. ++ // This code is rarely used, so simplicity is a virtue here. ++ // The repne_scan instruction uses fixed registers, which we must spill. ++ // Don't worry too much about pre-existing connections with the input regs. ++ ++#ifndef PRODUCT ++ int* pst_counter = &SharedRuntime::_partial_subtype_ctr; ++ ExternalAddress pst_counter_addr((address) pst_counter); ++#endif //PRODUCT ++ ++ // We will consult the secondary-super array. ++ ld(temp_reg, secondary_supers_addr); ++ // Load the array length. ++ lw(temp2_reg, Address(temp_reg, Array::length_offset_in_bytes())); ++ // Skip to start of data. ++ daddiu(temp_reg, temp_reg, Array::base_offset_in_bytes()); ++ ++ // OpenJDK8 never compresses klass pointers in secondary-super array. ++ Label Loop, subtype; ++ bind(Loop); ++ beq(temp2_reg, R0, *L_failure); ++ delayed()->nop(); ++ ld(AT, temp_reg, 0); ++ beq(AT, super_klass, subtype); ++ delayed()->daddiu(temp_reg, temp_reg, 1 * wordSize); ++ b(Loop); ++ delayed()->daddiu(temp2_reg, temp2_reg, -1); ++ ++ bind(subtype); ++ sd(super_klass, super_cache_addr); ++ if (L_success != &L_fallthrough) { ++ b(*L_success); ++ delayed()->nop(); ++ } ++ ++ // Success. Cache the super we found and proceed in triumph. ++#undef IS_A_TEMP ++ ++ bind(L_fallthrough); ++} ++ ++void MacroAssembler::clinit_barrier(Register klass, Register scratch, Label* L_fast_path, Label* L_slow_path) { ++ Register rthread = TREG; ++#ifndef OPT_THREAD ++ get_thread(rthread); ++#endif ++ ++ assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required"); ++ assert_different_registers(klass, rthread, scratch); ++ ++ Label L_fallthrough; ++ if (L_fast_path == NULL) { ++ L_fast_path = &L_fallthrough; ++ } else if (L_slow_path == NULL) { ++ L_slow_path = &L_fallthrough; ++ } ++ ++ // Fast path check: class is fully initialized ++ lb(scratch, Address(klass, InstanceKlass::init_state_offset())); ++ daddiu(scratch, scratch, -InstanceKlass::fully_initialized); ++ beq(scratch, R0, *L_fast_path); ++ delayed()->nop(); ++ ++ // Fast path check: current thread is initializer thread ++ ld(scratch, Address(klass, InstanceKlass::init_thread_offset())); ++ if (L_slow_path == &L_fallthrough) { ++ beq(rthread, scratch, *L_fast_path); ++ delayed()->nop(); ++ bind(*L_slow_path); ++ } else if (L_fast_path == &L_fallthrough) { ++ bne(rthread, scratch, *L_slow_path); ++ delayed()->nop(); ++ bind(*L_fast_path); ++ } else { ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { ++ ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); ++ sd(R0, Address(java_thread, JavaThread::vm_result_offset())); ++ verify_oop(oop_result, "broken oop in call_VM_base"); ++} ++ ++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { ++ ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); ++ sd(R0, Address(java_thread, JavaThread::vm_result_2_offset())); ++} ++ ++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, ++ int extra_slot_offset) { ++ // cf. TemplateTable::prepare_invoke(), if (load_receiver). ++ int stackElementSize = Interpreter::stackElementSize; ++ int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); ++#ifdef ASSERT ++ int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); ++ assert(offset1 - offset == stackElementSize, "correct arithmetic"); ++#endif ++ Register scale_reg = NOREG; ++ Address::ScaleFactor scale_factor = Address::no_scale; ++ if (arg_slot.is_constant()) { ++ offset += arg_slot.as_constant() * stackElementSize; ++ } else { ++ scale_reg = arg_slot.as_register(); ++ scale_factor = Address::times_8; ++ } ++ // We don't push RA on stack in prepare_invoke. ++ // offset += wordSize; // return PC is on stack ++ if(scale_reg==NOREG) return Address(SP, offset); ++ else { ++ dsll(scale_reg, scale_reg, scale_factor); ++ daddu(scale_reg, SP, scale_reg); ++ return Address(scale_reg, offset); ++ } ++} ++ ++SkipIfEqual::~SkipIfEqual() { ++ _masm->bind(_label); ++} ++ ++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { ++ switch (size_in_bytes) { ++ case 8: ld(dst, src); break; ++ case 4: lw(dst, src); break; ++ case 2: is_signed ? lh(dst, src) : lhu(dst, src); break; ++ case 1: is_signed ? lb( dst, src) : lbu( dst, src); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { ++ switch (size_in_bytes) { ++ case 8: sd(src, dst); break; ++ case 4: sw(src, dst); break; ++ case 2: sh(src, dst); break; ++ case 1: sb(src, dst); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++// Look up the method for a megamorphic invokeinterface call. ++// The target method is determined by . ++// The receiver klass is in recv_klass. ++// On success, the result will be in method_result, and execution falls through. ++// On failure, execution transfers to the given label. ++void MacroAssembler::lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_temp, ++ Label& L_no_such_interface, ++ bool return_method) { ++ assert_different_registers(recv_klass, intf_klass, scan_temp, AT); ++ assert_different_registers(method_result, intf_klass, scan_temp, AT); ++ assert(recv_klass != method_result || !return_method, ++ "recv_klass can be destroyed when method isn't needed"); ++ ++ assert(itable_index.is_constant() || itable_index.as_register() == method_result, ++ "caller must use same register for non-constant itable index as for method"); ++ ++ // Compute start of first itableOffsetEntry (which is at the end of the vtable) ++ int vtable_base = in_bytes(Klass::vtable_start_offset()); ++ int itentry_off = itableMethodEntry::method_offset_in_bytes(); ++ int scan_step = itableOffsetEntry::size() * wordSize; ++ int vte_size = vtableEntry::size() * wordSize; ++ Address::ScaleFactor times_vte_scale = Address::times_ptr; ++ assert(vte_size == wordSize, "else adjust times_vte_scale"); ++ ++ lw(scan_temp, Address(recv_klass, Klass::vtable_length_offset())); ++ ++ // %%% Could store the aligned, prescaled offset in the klassoop. ++ dsll(scan_temp, scan_temp, times_vte_scale); ++ daddu(scan_temp, recv_klass, scan_temp); ++ daddiu(scan_temp, scan_temp, vtable_base); ++ if (HeapWordsPerLong > 1) { ++ // Round up to align_object_offset boundary ++ // see code for InstanceKlass::start_of_itable! ++ round_to(scan_temp, BytesPerLong); ++ } ++ ++ if (return_method) { ++ // Adjust recv_klass by scaled itable_index, so we can free itable_index. ++ assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); ++ if (itable_index.is_constant()) { ++ set64(AT, (int)itable_index.is_constant()); ++ dsll(AT, AT, (int)Address::times_ptr); ++ } else { ++ dsll(AT, itable_index.as_register(), (int)Address::times_ptr); ++ } ++ daddu(AT, AT, recv_klass); ++ daddiu(recv_klass, AT, itentry_off); ++ } ++ ++ Label search, found_method; ++ ++ for (int peel = 1; peel >= 0; peel--) { ++ ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); ++ ++ if (peel) { ++ beq(intf_klass, method_result, found_method); ++ delayed()->nop(); ++ } else { ++ bne(intf_klass, method_result, search); ++ delayed()->nop(); ++ // (invert the test to fall through to found_method...) ++ } ++ ++ if (!peel) break; ++ ++ bind(search); ++ ++ // Check that the previous entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ beq(method_result, R0, L_no_such_interface); ++ delayed()->nop(); ++ daddiu(scan_temp, scan_temp, scan_step); ++ } ++ ++ bind(found_method); ++ ++ if (return_method) { ++ // Got a hit. ++ lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); ++ if (UseLEXT1) { ++ gsldx(method_result, recv_klass, scan_temp, 0); ++ } else { ++ daddu(AT, recv_klass, scan_temp); ++ ld(method_result, AT, 0); ++ } ++ } ++} ++ ++// virtual method calling ++void MacroAssembler::lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result) { ++ Register tmp = GP; ++ push(tmp); ++ ++ if (vtable_index.is_constant()) { ++ assert_different_registers(recv_klass, method_result, tmp); ++ } else { ++ assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp); ++ } ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); ++ if (vtable_index.is_constant()) { ++ set64(AT, vtable_index.as_constant()); ++ dsll(AT, AT, (int)Address::times_ptr); ++ } else { ++ dsll(AT, vtable_index.as_register(), (int)Address::times_ptr); ++ } ++ set64(tmp, base + vtableEntry::method_offset_in_bytes()); ++ daddu(tmp, tmp, AT); ++ daddu(tmp, tmp, recv_klass); ++ ld(method_result, tmp, 0); ++ ++ pop(tmp); ++} ++ ++void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) { ++ switch (type) { ++ case T_LONG: ++ st_ptr(src_reg, tmp_reg, disp); ++ break; ++ case T_ARRAY: ++ case T_OBJECT: ++ if (UseCompressedOops && !wide) { ++ sw(src_reg, tmp_reg, disp); ++ } else { ++ st_ptr(src_reg, tmp_reg, disp); ++ } ++ break; ++ case T_ADDRESS: ++ st_ptr(src_reg, tmp_reg, disp); ++ break; ++ case T_INT: ++ sw(src_reg, tmp_reg, disp); ++ break; ++ case T_CHAR: ++ case T_SHORT: ++ sh(src_reg, tmp_reg, disp); ++ break; ++ case T_BYTE: ++ case T_BOOLEAN: ++ sb(src_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) { ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } ++} ++ ++void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) { ++ switch (type) { ++ case T_DOUBLE: ++ sdc1(src_reg, tmp_reg, disp); ++ break; ++ case T_FLOAT: ++ swc1(src_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) { ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } ++} ++ ++void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) { ++ switch (type) { ++ case T_LONG: ++ ld_ptr(dst_reg, tmp_reg, disp); ++ break; ++ case T_ARRAY: ++ case T_OBJECT: ++ if (UseCompressedOops && !wide) { ++ lwu(dst_reg, tmp_reg, disp); ++ } else { ++ ld_ptr(dst_reg, tmp_reg, disp); ++ } ++ break; ++ case T_ADDRESS: ++ if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) { ++ lwu(dst_reg, tmp_reg, disp); ++ } else { ++ ld_ptr(dst_reg, tmp_reg, disp); ++ } ++ break; ++ case T_INT: ++ lw(dst_reg, tmp_reg, disp); ++ break; ++ case T_CHAR: ++ lhu(dst_reg, tmp_reg, disp); ++ break; ++ case T_SHORT: ++ lh(dst_reg, tmp_reg, disp); ++ break; ++ case T_BYTE: ++ case T_BOOLEAN: ++ lb(dst_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) { ++ int code_offset = 0; ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } ++ ++ return code_offset; ++} ++ ++void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) { ++ switch (type) { ++ case T_DOUBLE: ++ ldc1(dst_reg, tmp_reg, disp); ++ break; ++ case T_FLOAT: ++ lwc1(dst_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) { ++ int code_offset = 0; ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } ++ ++ return code_offset; ++} ++ ++void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { ++ const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask); ++ STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code ++ // The inverted mask is sign-extended ++ move(AT, inverted_jweak_mask); ++ andr(possibly_jweak, AT, possibly_jweak); ++} ++ ++void MacroAssembler::resolve_jobject(Register value, ++ Register thread, ++ Register tmp) { ++ assert_different_registers(value, thread, tmp); ++ Label done, not_weak; ++ beq(value, R0, done); // Use NULL as-is. ++ delayed()->nop(); ++ move(AT, JNIHandles::weak_tag_mask); // Test for jweak tag. ++ andr(AT, value, AT); ++ beq(AT, R0, not_weak); ++ delayed()->nop(); ++ // Resolve jweak. ++ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, ++ value, Address(value, -JNIHandles::weak_tag_value), tmp, thread); ++ verify_oop(value); ++ b(done); ++ delayed()->nop(); ++ bind(not_weak); ++ // Resolve (untagged) jobject. ++ access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread); ++ verify_oop(value); ++ bind(done); ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src, ++ CMCompare cmp, ++ bool is_signed) { ++ switch (cmp) { ++ case EQ: ++ subu(AT, op1, op2); ++ movz(dst, src, AT); ++ break; ++ ++ case NE: ++ subu(AT, op1, op2); ++ movn(dst, src, AT); ++ break; ++ ++ case GT: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ movn(dst, src, AT); ++ break; ++ ++ case GE: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ movz(dst, src, AT); ++ break; ++ ++ case LT: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ movn(dst, src, AT); ++ break; ++ ++ case LE: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ movz(dst, src, AT); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ Register dst, ++ Register src, ++ CMCompare cmp, ++ bool is_float) { ++ switch(cmp) { ++ case EQ: ++ if (is_float) { ++ c_eq_s(op1, op2); ++ } else { ++ c_eq_d(op1, op2); ++ } ++ movt(dst, src); ++ break; ++ ++ case NE: ++ if (is_float) { ++ c_eq_s(op1, op2); ++ } else { ++ c_eq_d(op1, op2); ++ } ++ movf(dst, src); ++ break; ++ ++ case GT: ++ if (is_float) { ++ c_ule_s(op1, op2); ++ } else { ++ c_ule_d(op1, op2); ++ } ++ movf(dst, src); ++ break; ++ ++ case GE: ++ if (is_float) { ++ c_ult_s(op1, op2); ++ } else { ++ c_ult_d(op1, op2); ++ } ++ movf(dst, src); ++ break; ++ ++ case LT: ++ if (is_float) { ++ c_ult_s(op1, op2); ++ } else { ++ c_ult_d(op1, op2); ++ } ++ movt(dst, src); ++ break; ++ ++ case LE: ++ if (is_float) { ++ c_ule_s(op1, op2); ++ } else { ++ c_ule_d(op1, op2); ++ } ++ movt(dst, src); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp, ++ bool is_float) { ++ switch(cmp) { ++ case EQ: ++ if (!is_float) { ++ c_eq_d(op1, op2); ++ movt_d(dst, src); ++ } else { ++ c_eq_s(op1, op2); ++ movt_s(dst, src); ++ } ++ break; ++ ++ case NE: ++ if (!is_float) { ++ c_eq_d(op1, op2); ++ movf_d(dst, src); ++ } else { ++ c_eq_s(op1, op2); ++ movf_s(dst, src); ++ } ++ break; ++ ++ case GT: ++ if (!is_float) { ++ c_ule_d(op1, op2); ++ movf_d(dst, src); ++ } else { ++ c_ule_s(op1, op2); ++ movf_s(dst, src); ++ } ++ break; ++ ++ case GE: ++ if (!is_float) { ++ c_ult_d(op1, op2); ++ movf_d(dst, src); ++ } else { ++ c_ult_s(op1, op2); ++ movf_s(dst, src); ++ } ++ break; ++ ++ case LT: ++ if (!is_float) { ++ c_ult_d(op1, op2); ++ movt_d(dst, src); ++ } else { ++ c_ult_s(op1, op2); ++ movt_s(dst, src); ++ } ++ break; ++ ++ case LE: ++ if (!is_float) { ++ c_ule_d(op1, op2); ++ movt_d(dst, src); ++ } else { ++ c_ule_s(op1, op2); ++ movt_s(dst, src); ++ } ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp, ++ bool is_float) { ++ Label L; ++ ++ switch(cmp) { ++ case EQ: ++ bne(op1, op2, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case NE: ++ beq(op1, op2, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case GT: ++ slt(AT, op2, op1); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case GE: ++ slt(AT, op1, op2); ++ bne(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case LT: ++ slt(AT, op1, op2); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case LE: ++ slt(AT, op2, op1); ++ bne(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} +diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.hpp b/src/hotspot/cpu/mips/macroAssembler_mips.hpp +new file mode 100644 +index 00000000000..daec23fcf9c +--- /dev/null ++++ b/src/hotspot/cpu/mips/macroAssembler_mips.hpp +@@ -0,0 +1,704 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP ++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP ++ ++#include "asm/assembler.hpp" ++#include "runtime/rtmLocking.hpp" ++#include "utilities/macros.hpp" ++ ++// MacroAssembler extends Assembler by frequently used macros. ++// ++// Instructions for which a 'better' code sequence exists depending ++// on arguments should also go in here. ++ ++class MacroAssembler: public Assembler { ++ friend class LIR_Assembler; ++ friend class Runtime1; // as_Address() ++ ++ public: ++ // Compare code ++ typedef enum { ++ EQ = 0x01, ++ NE = 0x02, ++ GT = 0x03, ++ GE = 0x04, ++ LT = 0x05, ++ LE = 0x06 ++ } CMCompare; ++ ++ protected: ++ ++ // Support for VM calls ++ // ++ // This is the base routine called by the different versions of call_VM_leaf. The interpreter ++ // may customize this version by overriding it for its purposes (e.g., to save/restore ++ // additional registers when doing a VM call). ++ #define VIRTUAL virtual ++ ++ VIRTUAL void call_VM_leaf_base( ++ address entry_point, // the entry point ++ int number_of_arguments // the number of arguments to pop after the call ++ ); ++ ++ // This is the base routine called by the different versions of call_VM. The interpreter ++ // may customize this version by overriding it for its purposes (e.g., to save/restore ++ // additional registers when doing a VM call). ++ // ++ // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base ++ // returns the register which contains the thread upon return. If a thread register has been ++ // specified, the return value will correspond to that register. If no last_java_sp is specified ++ // (noreg) than sp will be used instead. ++ VIRTUAL void call_VM_base( // returns the register containing the thread upon return ++ Register oop_result, // where an oop-result ends up if any; use noreg otherwise ++ Register java_thread, // the thread if computed before ; use noreg otherwise ++ Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise ++ address entry_point, // the entry point ++ int number_of_arguments, // the number of arguments (w/o thread) to pop after the call ++ bool check_exceptions // whether to check for pending exceptions after return ++ ); ++ ++ void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); ++ ++ // helpers for FPU flag access ++ // tmp is a temporary register, if none is available use noreg ++ ++ public: ++ MacroAssembler(CodeBuffer* code) : Assembler(code) {} ++ ++ // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. ++ // The implementation is only non-empty for the InterpreterMacroAssembler, ++ // as only the interpreter handles PopFrame and ForceEarlyReturn requests. ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); ++ ++ Address as_Address(AddressLiteral adr); ++ Address as_Address(ArrayAddress adr); ++ ++ static intptr_t i[32]; ++ static float f[32]; ++ static void print(outputStream *s); ++ ++ static int i_offset(unsigned int k); ++ static int f_offset(unsigned int k); ++ ++ static void save_registers(MacroAssembler *masm); ++ static void restore_registers(MacroAssembler *masm); ++ ++ // Support for NULL-checks ++ // ++ // Generates code that causes a NULL OS exception if the content of reg is NULL. ++ // If the accessed location is M[reg + offset] and the offset is known, provide the ++ // offset. No explicit code generation is needed if the offset is within a certain ++ // range (0 <= offset <= page_size). ++ ++ void null_check(Register reg, int offset = -1); ++ static bool needs_explicit_null_check(intptr_t offset); ++ static bool uses_implicit_null_check(void* address); ++ ++ // Required platform-specific helpers for Label::patch_instructions. ++ // They _shadow_ the declarations in AbstractAssembler, which are undefined. ++ void pd_patch_instruction(address branch, address target, const char* file, int line); ++ ++ address emit_trampoline_stub(int insts_call_instruction_offset, address target); ++ ++ // Support for inc/dec with optimal instruction selection depending on value ++ void incrementl(Register reg, int value = 1); ++ void decrementl(Register reg, int value = 1); ++ ++ ++ // Alignment ++ void align(int modulus); ++ ++ ++ // Stack frame creation/removal ++ void enter(); ++ void leave(); ++ ++ // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) ++ // The pointer will be loaded into the thread register. ++ void get_thread(Register thread); ++ ++ ++ // Support for VM calls ++ // ++ // It is imperative that all calls into the VM are handled via the call_VM macros. ++ // They make sure that the stack linkage is setup correctly. call_VM's correspond ++ // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. ++ ++ ++ void call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ // Overloadings with last_Java_sp ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments = 0, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, bool ++ check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ void get_vm_result (Register oop_result, Register thread); ++ void get_vm_result_2(Register metadata_result, Register thread); ++ void call_VM_leaf(address entry_point, ++ int number_of_arguments = 0); ++ void call_VM_leaf(address entry_point, ++ Register arg_1); ++ void call_VM_leaf(address entry_point, ++ Register arg_1, Register arg_2); ++ void call_VM_leaf(address entry_point, ++ Register arg_1, Register arg_2, Register arg_3); ++ ++ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls ++ void super_call_VM_leaf(address entry_point); ++ void super_call_VM_leaf(address entry_point, Register arg_1); ++ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); ++ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); ++ ++ // last Java Frame (fills frame anchor) ++ void set_last_Java_frame(Register thread, ++ Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc); ++ ++ // thread in the default location (S6) ++ void set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc); ++ ++ void reset_last_Java_frame(Register thread, bool clear_fp); ++ ++ // thread in the default location (S6) ++ void reset_last_Java_frame(bool clear_fp); ++ ++ // jobjects ++ void clear_jweak_tag(Register possibly_jweak); ++ void resolve_jobject(Register value, Register thread, Register tmp); ++ ++ // C 'boolean' to Java boolean: x == 0 ? 0 : 1 ++ void c2bool(Register x); ++ ++ void resolve_oop_handle(Register result, Register tmp); ++ void load_mirror(Register dst, Register method, Register tmp); ++ ++ void load_method_holder(Register holder, Register method); ++ ++ // oop manipulations ++ void load_klass(Register dst, Register src); ++ void store_klass(Register dst, Register src); ++ ++ void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, ++ Register tmp1, Register thread_tmp); ++ void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, ++ Register tmp1, Register tmp2); ++ ++ void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, ++ Register tmp2 = noreg, DecoratorSet decorators = 0); ++ ++ // Used for storing NULL. All other oop constants should be ++ // stored using routines that take a jobject. ++ void store_heap_oop_null(Address dst); ++ ++ void load_prototype_header(Register dst, Register src); ++ ++ void store_klass_gap(Register dst, Register src); ++ ++ void encode_heap_oop(Register r); ++ void encode_heap_oop(Register dst, Register src); ++ void decode_heap_oop(Register r); ++ void decode_heap_oop(Register dst, Register src); ++ void encode_heap_oop_not_null(Register r); ++ void decode_heap_oop_not_null(Register r); ++ void encode_heap_oop_not_null(Register dst, Register src); ++ void decode_heap_oop_not_null(Register dst, Register src); ++ ++ void encode_klass_not_null(Register r); ++ void decode_klass_not_null(Register r); ++ void encode_klass_not_null(Register dst, Register src); ++ void decode_klass_not_null(Register dst, Register src); ++ ++ // Returns the byte size of the instructions generated by decode_klass_not_null() ++ // when compressed klass pointers are being used. ++ static int instr_size_for_decode_klass_not_null(); ++ ++ // if heap base register is used - reinit it with the correct value ++ void reinit_heapbase(); ++ ++ DEBUG_ONLY(void verify_heapbase(const char* msg);) ++ ++ void set_narrow_klass(Register dst, Klass* k); ++ void set_narrow_oop(Register dst, jobject obj); ++ ++ ++ ++ ++ // Sign extension ++ void sign_extend_short(Register reg) { /*dsll32(reg, reg, 16); dsra32(reg, reg, 16);*/ seh(reg, reg); } ++ void sign_extend_byte(Register reg) { /*dsll32(reg, reg, 24); dsra32(reg, reg, 24);*/ seb(reg, reg); } ++ void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); ++ void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); ++ ++ // allocation ++ void eden_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ void tlab_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ void incr_allocated_bytes(Register thread, ++ Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1 = noreg); ++ // interface method calling ++ void lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_temp, ++ Label& no_such_interface, ++ bool return_method = true); ++ ++ // virtual method calling ++ void lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result); ++ ++ // Test sub_klass against super_klass, with fast and slow paths. ++ ++ // The fast path produces a tri-state answer: yes / no / maybe-slow. ++ // One of the three labels can be NULL, meaning take the fall-through. ++ // If super_check_offset is -1, the value is loaded up from super_klass. ++ // No registers are killed, except temp_reg. ++ void check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); ++ ++ // The rest of the type check; must be wired to a corresponding fast path. ++ // It does not repeat the fast path logic, so don't use it standalone. ++ // The temp_reg and temp2_reg can be noreg, if no temps are available. ++ // Updates the sub's secondary super cache as necessary. ++ // If set_cond_codes, condition codes will be Z on success, NZ on failure. ++ void check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Register temp2_reg, ++ Label* L_success, ++ Label* L_failure, ++ bool set_cond_codes = false); ++ ++ // Simplified, combined version, good for typical uses. ++ // Falls through on failure. ++ void check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label& L_success); ++ ++ void clinit_barrier(Register klass, ++ Register scratch, ++ Label* L_fast_path = NULL, ++ Label* L_slow_path = NULL); ++ ++ ++ // Debugging ++ ++ // only if +VerifyOops ++ void verify_oop(Register reg, const char* s = "broken oop"); ++ void verify_oop_addr(Address addr, const char * s = "broken oop addr"); ++ void verify_oop_subroutine(); ++ // TODO: verify method and klass metadata (compare against vptr?) ++ void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} ++ void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} ++ ++ #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) ++ #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) ++ ++ // only if +VerifyFPU ++ void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); ++ ++ // prints msg, dumps registers and stops execution ++ void stop(const char* msg); ++ ++ // prints msg and continues ++ void warn(const char* msg); ++ ++ static void debug(char* msg/*, RegistersForDebugging* regs*/); ++ static void debug64(char* msg, int64_t pc, int64_t regs[]); ++ ++ void print_reg(Register reg); ++ void print_reg(FloatRegister reg); ++ ++ void untested() { stop("untested"); } ++ ++ void unimplemented(const char* what = ""); ++ ++ void should_not_reach_here() { stop("should not reach here"); } ++ ++ void print_CPU_state(); ++ ++ // Stack overflow checking ++ void bang_stack_with_offset(int offset) { ++ // stack grows down, caller passes positive offset ++ assert(offset > 0, "must bang with negative offset"); ++ if (offset <= 32768) { ++ sw(RA0, SP, -offset); ++ } else { ++ li(AT, offset); ++ dsubu(AT, SP, AT); ++ sw(RA0, AT, 0); ++ } ++ } ++ ++ // Writes to stack successive pages until offset reached to check for ++ // stack overflow + shadow pages. Also, clobbers tmp ++ void bang_stack_size(Register size, Register tmp); ++ ++ // Check for reserved stack access in method being exited (for JIT) ++ void reserved_stack_check(); ++ ++ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset); ++ ++ void safepoint_poll(Label& slow_path, Register thread_reg); ++ void safepoint_poll_acquire(Label& slow_path, Register thread_reg); ++ ++ //void verify_tlab(); ++ void verify_tlab(Register t1, Register t2); ++ ++ // Biased locking support ++ // lock_reg and obj_reg must be loaded up with the appropriate values. ++ // tmp_reg is optional. If it is supplied (i.e., != noreg) it will ++ // be killed; if not supplied, push/pop will be used internally to ++ // allocate a temporary (inefficient, avoid if possible). ++ // Optional slow case is for implementations (interpreter and C1) which branch to ++ // slow case directly. Leaves condition codes set for C2's Fast_Lock node. ++ // Returns offset of first potentially-faulting instruction for null ++ // check info (currently consumed only by C1). If ++ // swap_reg_contains_mark is true then returns -1 as it is assumed ++ // the calling code has already passed any potential faults. ++ void biased_locking_enter(Register lock_reg, Register obj_reg, ++ Register swap_reg, Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, Label* slow_case = NULL, ++ BiasedLockingCounters* counters = NULL); ++ void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); ++ ++ // Arithmetics ++ // Regular vs. d* versions ++ inline void addu_long(Register rd, Register rs, Register rt) { ++ daddu(rd, rs, rt); ++ } ++ inline void addu_long(Register rd, Register rs, long imm32_64) { ++ daddiu(rd, rs, imm32_64); ++ } ++ ++ void round_to(Register reg, int modulus) { ++ assert_different_registers(reg, AT); ++ increment(reg, modulus - 1); ++ move(AT, - modulus); ++ andr(reg, reg, AT); ++ } ++ ++ // the follow two might use AT register, be sure you have no meanful data in AT before you call them ++ void increment(Register reg, int imm); ++ void decrement(Register reg, int imm); ++ ++ void shl(Register reg, int sa) { dsll(reg, reg, sa); } ++ void shr(Register reg, int sa) { dsrl(reg, reg, sa); } ++ void sar(Register reg, int sa) { dsra(reg, reg, sa); } ++ ++ // Helper functions for statistics gathering. ++ void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2); ++ ++ // Calls ++ void call(address entry); ++ void call(address entry, relocInfo::relocType rtype); ++ void call(address entry, RelocationHolder& rh); ++ ++ address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL); ++ ++ // Emit the CompiledIC call idiom ++ void ic_call(address entry, jint method_index = 0); ++ ++ // Jumps ++ void jmp(address entry); ++ void jmp(address entry, relocInfo::relocType rtype); ++ void jmp_far(Label& L); // always long jumps ++ ++ /* branches may exceed 16-bit offset */ ++ void b_far(address entry); ++ void b_far(Label& L); ++ ++ void bne_far (Register rs, Register rt, address entry); ++ void bne_far (Register rs, Register rt, Label& L); ++ ++ void beq_far (Register rs, Register rt, address entry); ++ void beq_far (Register rs, Register rt, Label& L); ++ ++ void patchable_call(address target); ++ void general_call(address target); ++ ++ void patchable_jump(address target); ++ void general_jump(address target); ++ ++ static int insts_for_patchable_call(address target); ++ static int insts_for_general_call(address target); ++ ++ static int insts_for_patchable_jump(address target); ++ static int insts_for_general_jump(address target); ++ ++ // Floating ++ // Data ++ ++ // Load and store values by size and signed-ness ++ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); ++ void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); ++ ++ // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs ++ inline void ld_ptr(Register rt, Address a) { ++ ld(rt, a); ++ } ++ ++ inline void ld_ptr(Register rt, Register base, int offset16) { ++ ld(rt, base, offset16); ++ } ++ ++ // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs ++ inline void st_ptr(Register rt, Address a) { ++ sd(rt, a); ++ } ++ ++ inline void st_ptr(Register rt, Register base, int offset16) { ++ sd(rt, base, offset16); ++ } ++ ++ void ld_ptr(Register rt, Register base, Register offset); ++ void st_ptr(Register rt, Register base, Register offset); ++ ++ // swap the two byte of the low 16-bit halfword ++ // this directive will use AT, be sure the high 16-bit of reg is zero ++ void hswap(Register reg); ++ void huswap(Register reg); ++ ++ // convert big endian integer to little endian integer ++ void swap(Register reg); ++ ++ // implement the x86 instruction semantic ++ // if c_reg == *dest then *dest <= x_reg ++ // else c_reg <= *dest ++ // the AT indicate if xchg occurred, 1 for xchged, else 0 ++ void cmpxchg(Address addr, Register oldval, Register newval, Register resflag, ++ bool retold, bool barrier, bool weak = false, bool exchange = false); ++ void cmpxchg(Address addr, Register oldval, Register newval, Register tmp, ++ bool retold, bool barrier, Label& succ, Label* fail = nullptr); ++ void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag, ++ bool sign, bool retold, bool barrier, bool weak = false, bool exchange = false); ++ void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, ++ bool sign, bool retold, bool barrier, Label& succ, Label* fail = nullptr); ++ void cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi); ++ ++ //pop & push ++ void extend_sign(Register rh, Register rl) { stop("extend_sign"); } ++ void neg(Register reg) { dsubu(reg, R0, reg); } ++ void push (Register reg) { daddiu(SP, SP, -8); sd (reg, SP, 0); } ++ void push (FloatRegister reg) { daddiu(SP, SP, -8); sdc1(reg, SP, 0); } ++ void pop (Register reg) { ld (reg, SP, 0); daddiu(SP, SP, 8); } ++ void pop (FloatRegister reg) { ldc1(reg, SP, 0); daddiu(SP, SP, 8); } ++ void pop () { daddiu(SP, SP, 8); } ++ void pop2 () { daddiu(SP, SP, 16); } ++ void push2(Register reg1, Register reg2); ++ void pop2 (Register reg1, Register reg2); ++ void dpush (Register reg) { daddiu(SP, SP, -8); sd (reg, SP, 0); } ++ void dpop (Register reg) { ld (reg, SP, 0); daddiu(SP, SP, 8); } ++ //we need 2 fun to save and resotre general register ++ void pushad(); ++ void popad(); ++ void pushad_except_v0(); ++ void popad_except_v0(); ++ ++ //move an 32-bit immediate to Register ++ void move(Register reg, int imm32) { li32(reg, imm32); } ++ void li (Register rd, long imm); ++ void li (Register rd, address addr) { li(rd, (long)addr); } ++ //replace move(Register reg, int imm) ++ void li32(Register rd, int imm32); // sign-extends to 64 bits on mips64 ++ void set64(Register d, jlong value); ++ static int insts_for_set64(jlong value); ++ ++ void patchable_set48(Register d, jlong value); ++ void patchable_set32(Register d, jlong value); ++ ++ void patchable_call32(Register d, jlong value); ++ ++ static int call_size(address target, bool far, bool patchable); ++ ++ static bool reachable_from_cache(address target); ++ static bool reachable_from_cache(); ++ ++ ++ void dli(Register rd, long imm) { li(rd, imm); } ++ void li64(Register rd, long imm); ++ void li48(Register rd, long imm); ++ ++ void move(Register rd, Register rs) { daddu(rd, rs, R0); } ++ void move_u32(Register rd, Register rs) { addu32(rd, rs, R0); } ++ void dmove(Register rd, Register rs) { daddu(rd, rs, R0); } ++ void mov_metadata(Register dst, Metadata* obj); ++ void mov_metadata(Address dst, Metadata* obj); ++ ++ void store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide); ++ void store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type); ++ void store_for_type(Register src_reg, Address addr, BasicType type = T_INT, bool wide = false); ++ void store_for_type(FloatRegister src_reg, Address addr, BasicType type = T_INT); ++ void load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide); ++ void load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type); ++ int load_for_type(Register dst_reg, Address addr, BasicType type = T_INT, bool wide = false); ++ int load_for_type(FloatRegister dst_reg, Address addr, BasicType type = T_INT); ++ ++#ifndef PRODUCT ++ static void pd_print_patched_instruction(address branch) { ++ jint stub_inst = *(jint*) branch; ++ print_instruction(stub_inst); ++ ::tty->print("%s", " (unresolved)"); ++ ++ } ++#endif ++ ++ //FIXME ++ void empty_FPU_stack(){/*need implemented*/}; ++ ++ // method handles (JSR 292) ++ Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); ++ ++ // Conditional move ++ void cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src, ++ CMCompare cmp = EQ, ++ bool is_signed = true); ++ void cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ Register dst, ++ Register src, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ void cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ void cmp_cmov(Register op1, ++ Register op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ ++#undef VIRTUAL ++ ++}; ++ ++/** ++ * class SkipIfEqual: ++ * ++ * Instantiating this class will result in assembly code being output that will ++ * jump around any code emitted between the creation of the instance and it's ++ * automatic destruction at the end of a scope block, depending on the value of ++ * the flag passed to the constructor, which will be checked at run-time. ++ */ ++class SkipIfEqual { ++private: ++ MacroAssembler* _masm; ++ Label _label; ++ ++public: ++ inline SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) ++ : _masm(masm) { ++ _masm->li(AT, (address)flag_addr); ++ _masm->lb(AT, AT, 0); ++ if (value) { ++ _masm->bne(AT, R0, _label); ++ } else { ++ _masm->beq(AT, R0, _label); ++ } ++ _masm->delayed()->nop(); ++ } ++ ++ ~SkipIfEqual(); ++}; ++ ++#ifdef ASSERT ++inline bool AbstractAssembler::pd_check_instruction_mark() { return true; } ++#endif ++ ++ ++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp b/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp +new file mode 100644 +index 00000000000..92c05fb726a +--- /dev/null ++++ b/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp +@@ -0,0 +1,34 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2017, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/codeBuffer.hpp" ++#include "code/codeCache.hpp" ++ ++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP +diff --git a/src/hotspot/cpu/mips/matcher_mips.hpp b/src/hotspot/cpu/mips/matcher_mips.hpp +new file mode 100644 +index 00000000000..94f58720ffe +--- /dev/null ++++ b/src/hotspot/cpu/mips/matcher_mips.hpp +@@ -0,0 +1,145 @@ ++/* ++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_MATCHER_MIPS_HPP ++#define CPU_MIPS_MATCHER_MIPS_HPP ++ ++ // Defined within class Matcher ++ ++ // false => size gets scaled to BytesPerLong, ok. ++ static const bool init_array_count_is_in_bytes = false; ++ ++ // Whether this platform implements the scalable vector feature ++ static const bool implements_scalable_vector = false; ++ ++ static const bool supports_scalable_vector() { ++ return false; ++ } ++ ++ // MIPS doesn't support misaligned vectors store/load? FIXME ++ static constexpr bool misaligned_vectors_ok() { ++ return false; ++ } ++ ++ // Whether code generation need accurate ConvI2L types. ++ static const bool convi2l_type_required = true; ++ ++ // Does the CPU require late expand (see block.cpp for description of late expand)? ++ static const bool require_postalloc_expand = false; ++ ++ // Do we need to mask the count passed to shift instructions or does ++ // the cpu only look at the lower 5/6 bits anyway? ++ static const bool need_masked_shift_count = false; ++ ++ // No support for generic vector operands. ++ static const bool supports_generic_vector_operands = false; ++ ++ static constexpr bool isSimpleConstant64(jlong value) { ++ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. ++ // Probably always true, even if a temp register is required. ++ return true; ++ } ++ ++ // No additional cost for CMOVL. ++ static constexpr int long_cmove_cost() { return 0; } ++ ++ // No CMOVF/CMOVD with SSE2 ++ static int float_cmove_cost() { return ConditionalMoveLimit; } ++ ++ static bool narrow_oop_use_complex_address() { ++ assert(UseCompressedOops, "only for compressed oops code"); ++ return false; ++ } ++ ++ static bool narrow_klass_use_complex_address() { ++ assert(UseCompressedClassPointers, "only for compressed klass code"); ++ return false; ++ } ++ ++ static bool const_oop_prefer_decode() { ++ // Prefer ConN+DecodeN over ConP. ++ return true; ++ } ++ ++ static bool const_klass_prefer_decode() { ++ // TODO: Either support matching DecodeNKlass (heap-based) in operand ++ // or condisider the following: ++ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. ++ //return CompressedKlassPointers::base() == NULL; ++ return true; ++ } ++ ++ // Is it better to copy float constants, or load them directly from memory? ++ // Intel can load a float constant from a direct address, requiring no ++ // extra registers. Most RISCs will have to materialize an address into a ++ // register first, so they would do better to copy the constant from stack. ++ static const bool rematerialize_float_constants = false; ++ ++ // If CPU can load and store mis-aligned doubles directly then no fixup is ++ // needed. Else we split the double into 2 integer pieces and move it ++ // piece-by-piece. Only happens when passing doubles into C code as the ++ // Java calling convention forces doubles to be aligned. ++ static const bool misaligned_doubles_ok = false; ++ ++ // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. ++ static const bool strict_fp_requires_explicit_rounding = false; ++ ++ // Are floats converted to double when stored to stack during ++ // deoptimization? ++ static constexpr bool float_in_double() { return false; } ++ ++ // Do ints take an entire long register or just half? ++ static const bool int_in_long = true; ++ ++ // Does the CPU supports vector variable shift instructions? ++ static constexpr bool supports_vector_variable_shifts(void) { ++ return false; // not supported ++ } ++ ++ // Does the CPU supports vector variable rotate instructions? ++ static constexpr bool supports_vector_variable_rotates(void) { ++ return false; // not supported ++ } ++ ++ // Does the CPU supports vector unsigned comparison instructions? ++ static constexpr bool supports_vector_comparison_unsigned(int vlen, BasicType bt) { ++ return false; ++ } ++ ++ // Some microarchitectures have mask registers used on vectors ++ static const bool has_predicated_vectors(void) { ++ return false; ++ } ++ ++ // true means we have fast l2f convers ++ // false means that conversion is done by runtime call ++ static constexpr bool convL2FSupported(void) { ++ return true; ++ } ++ ++ // Implements a variant of EncodeISOArrayNode that encode ASCII only ++ static const bool supports_encode_ascii_array = false; ++ ++#endif // CPU_MIPS_MATCHER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/methodHandles_mips.cpp b/src/hotspot/cpu/mips/methodHandles_mips.cpp +new file mode 100644 +index 00000000000..c4279705062 +--- /dev/null ++++ b/src/hotspot/cpu/mips/methodHandles_mips.cpp +@@ -0,0 +1,597 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "classfile/javaClasses.inline.hpp" ++#include "classfile/vmClasses.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/preserveException.hpp" ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) // nothing ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#define STOP(error) block_comment(error); __ stop(error) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { ++ if (VerifyMethodHandles) ++ verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class), ++ "MH argument is a Class"); ++ __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset())); ++} ++ ++#ifdef ASSERT ++static int check_nonzero(const char* xname, int x) { ++ assert(x != 0, "%s should be nonzero", xname); ++ return x; ++} ++#define NONZERO(x) check_nonzero(#x, x) ++#else //ASSERT ++#define NONZERO(x) (x) ++#endif //ASSERT ++ ++#ifdef ASSERT ++void MethodHandles::verify_klass(MacroAssembler* _masm, ++ Register obj_reg, vmClassID klass_id, ++ const char* error_message) { ++} ++ ++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { ++ Label L; ++ BLOCK_COMMENT("verify_ref_kind {"); ++ __ lw(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset()))); ++ __ sra(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT); ++ __ move(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK); ++ __ andr(temp, temp, AT); ++ __ move(AT, ref_kind); ++ __ beq(temp, AT, L); ++ __ delayed()->nop(); ++ { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal); ++ jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind); ++ if (ref_kind == JVM_REF_invokeVirtual || ++ ref_kind == JVM_REF_invokeSpecial) ++ // could do this for all ref_kinds, but would explode assembly code size ++ trace_method_handle(_masm, buf); ++ __ STOP(buf); ++ } ++ BLOCK_COMMENT("} verify_ref_kind"); ++ __ bind(L); ++} ++ ++#endif //ASSERT ++ ++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry) { ++ assert(method == Rmethod, "interpreter calling convention"); ++ ++ Label L_no_such_method; ++ __ beq(method, R0, L_no_such_method); ++ __ delayed()->nop(); ++ ++ __ verify_method_ptr(method); ++ ++ if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++ Register rthread = TREG; ++ // interp_only is an int, on little endian it is sufficient to test the byte only ++ // Is a cmpl faster? ++ __ lbu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset())); ++ __ beq(AT, R0, run_compiled_code); ++ __ delayed()->nop(); ++ __ ld(T9, method, in_bytes(Method::interpreter_entry_offset())); ++ __ jr(T9); ++ __ delayed()->nop(); ++ __ BIND(run_compiled_code); ++ } ++ ++ const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : ++ Method::from_interpreted_offset(); ++ __ ld(T9, method, in_bytes(entry_offset)); ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ __ bind(L_no_such_method); ++ address wrong_method = StubRoutines::throw_AbstractMethodError_entry(); ++ __ jmp(wrong_method, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++} ++ ++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry) { ++ BLOCK_COMMENT("jump_to_lambda_form {"); ++ // This is the initial entry point of a lazy method handle. ++ // After type checking, it picks up the invoker from the LambdaForm. ++ assert_different_registers(recv, method_temp, temp2); ++ assert(recv != noreg, "required register"); ++ assert(method_temp == Rmethod, "required register for loading method"); ++ ++ //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); }); ++ ++ // Load the invoker, as MH -> MH.form -> LF.vmentry ++ __ verify_oop(recv); ++ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset()))); ++ __ verify_oop(method_temp); ++ __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), noreg, noreg); ++ ++ if (VerifyMethodHandles && !for_compiler_entry) { ++ // make sure recv is already on stack ++ __ ld(temp2, Address(method_temp, Method::const_offset())); ++ __ load_sized_value(temp2, ++ Address(temp2, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), false); ++ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); ++ Label L; ++ Address recv_addr = __ argument_address(temp2, -1); ++ __ ld(AT, recv_addr); ++ __ beq(recv, AT, L); ++ __ delayed()->nop(); ++ ++ recv_addr = __ argument_address(temp2, -1); ++ __ ld(V0, recv_addr); ++ __ STOP("receiver not on stack"); ++ __ BIND(L); ++ } ++ ++ jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); ++ BLOCK_COMMENT("} jump_to_lambda_form"); ++} ++ ++ ++// Code generation ++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, ++ vmIntrinsics::ID iid) { ++ const bool not_for_compiler_entry = false; // this is the interpreter entry ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ if (iid == vmIntrinsics::_invokeGeneric || ++ iid == vmIntrinsics::_compiledLambdaForm) { ++ // Perhaps surprisingly, the symbolic references visible to Java are not directly used. ++ // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. ++ // They all allow an appendix argument. ++ __ stop("empty stubs make SG sick"); ++ return NULL; ++ } ++ ++ // No need in interpreter entry for linkToNative for now. ++ // Interpreter calls compiled entry through i2c. ++ if (iid == vmIntrinsics::_linkToNative) { ++ __ stop("Should not reach here"); // empty stubs make SG sick ++ return NULL; ++ } ++ ++ // Rmethod: Method* ++ // T9: argument locator (parameter slot count, added to sp) ++ // S7: used as temp to hold mh or receiver ++ Register t9_argp = T9; // argument list ptr, live on error paths ++ Register s7_mh = S7; // MH receiver; dies quickly and is recycled ++ Register rm_method = Rmethod; // eventual target of this invocation ++ ++ // here's where control starts out: ++ __ align(CodeEntryAlignment); ++ address entry_point = __ pc(); ++ ++ if (VerifyMethodHandles) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++ ++ Label L; ++ BLOCK_COMMENT("verify_intrinsic_id {"); ++ __ lhu(AT, rm_method, Method::intrinsic_id_offset_in_bytes()); ++ guarantee(Assembler::is_simm16(vmIntrinsics::as_int(iid)), "Oops, iid is not simm16! Change the instructions."); ++ __ addiu(AT, AT, -1 * (int) iid); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ if (iid == vmIntrinsics::_linkToVirtual || ++ iid == vmIntrinsics::_linkToSpecial) { ++ // could do this for all kinds, but would explode assembly code size ++ trace_method_handle(_masm, "bad Method*::intrinsic_id"); ++ } ++ __ STOP("bad Method*::intrinsic_id"); ++ __ bind(L); ++ BLOCK_COMMENT("} verify_intrinsic_id"); ++ } ++ ++ // First task: Find out how big the argument list is. ++ Address t9_first_arg_addr; ++ int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); ++ assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); ++ if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ __ ld(t9_argp, Address(rm_method, Method::const_offset())); ++ __ load_sized_value(t9_argp, ++ Address(t9_argp, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), false); ++ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); ++ t9_first_arg_addr = __ argument_address(t9_argp, -1); ++ } else { ++ DEBUG_ONLY(t9_argp = noreg); ++ } ++ ++ if (!is_signature_polymorphic_static(iid)) { ++ __ ld(s7_mh, t9_first_arg_addr); ++ DEBUG_ONLY(t9_argp = noreg); ++ } ++ ++ // t9_first_arg_addr is live! ++ ++ trace_method_handle_interpreter_entry(_masm, iid); ++ ++ if (iid == vmIntrinsics::_invokeBasic) { ++ generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry); ++ ++ } else { ++ // Adjust argument list by popping the trailing MemberName argument. ++ Register r_recv = noreg; ++ if (MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. ++ __ ld(r_recv = T2, t9_first_arg_addr); ++ } ++ DEBUG_ONLY(t9_argp = noreg); ++ Register rm_member = rm_method; // MemberName ptr; incoming method ptr is dead now ++ __ pop(rm_member); // extract last argument ++ generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry); ++ } ++ ++ return entry_point; ++} ++ ++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, ++ vmIntrinsics::ID iid, ++ Register receiver_reg, ++ Register member_reg, ++ bool for_compiler_entry) { ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ Register rm_method = Rmethod; // eventual target of this invocation ++ // temps used in this code are not used in *either* compiled or interpreted calling sequences ++ Register j_rarg0 = T0; ++ Register j_rarg1 = A0; ++ Register j_rarg2 = A1; ++ Register j_rarg3 = A2; ++ Register j_rarg4 = A3; ++ Register j_rarg5 = A4; ++ ++ Register temp1 = T8; ++ Register temp2 = T9; ++ Register temp3 = V0; ++ if (for_compiler_entry) { ++ assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); ++ assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ } ++ else { ++ assert_different_registers(temp1, temp2, temp3, saved_last_sp_register()); // don't trash lastSP ++ } ++ assert_different_registers(temp1, temp2, temp3, receiver_reg); ++ assert_different_registers(temp1, temp2, temp3, member_reg); ++ ++ if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { ++ if (iid == vmIntrinsics::_linkToNative) { ++ assert(for_compiler_entry, "only compiler entry is supported"); ++ } ++ // indirect through MH.form.vmentry.vmtarget ++ jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry); ++ ++ } else { ++ // The method is a member invoker used by direct method handles. ++ if (VerifyMethodHandles) { ++ // make sure the trailing argument really is a MemberName (caller responsibility) ++ verify_klass(_masm, member_reg, VM_CLASS_ID(java_lang_invoke_MemberName), ++ "MemberName required for invokeVirtual etc."); ++ } ++ ++ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset())); ++ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset())); ++ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset())); ++ Address vmtarget_method( rm_method, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())); ++ ++ Register temp1_recv_klass = temp1; ++ if (iid != vmIntrinsics::_linkToStatic) { ++ __ verify_oop(receiver_reg); ++ if (iid == vmIntrinsics::_linkToSpecial) { ++ // Don't actually load the klass; just null-check the receiver. ++ __ null_check(receiver_reg); ++ } else { ++ // load receiver klass itself ++ __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ BLOCK_COMMENT("check_receiver {"); ++ // The receiver for the MemberName must be in receiver_reg. ++ // Check the receiver against the MemberName.clazz ++ if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { ++ // Did not load it above... ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { ++ Label L_ok; ++ Register temp2_defc = temp2; ++ __ load_heap_oop(temp2_defc, member_clazz, temp3); ++ load_klass_from_Class(_masm, temp2_defc); ++ __ verify_klass_ptr(temp2_defc); ++ __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); ++ // If we get here, the type check failed! ++ __ STOP("receiver class disagrees with MemberName.clazz"); ++ __ bind(L_ok); ++ } ++ BLOCK_COMMENT("} check_receiver"); ++ } ++ if (iid == vmIntrinsics::_linkToSpecial || ++ iid == vmIntrinsics::_linkToStatic) { ++ DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass ++ } ++ ++ // Live registers at this point: ++ // member_reg - MemberName that was the trailing argument ++ // temp1_recv_klass - klass of stacked receiver, if needed ++ ++ Label L_incompatible_class_change_error; ++ switch (iid) { ++ case vmIntrinsics::_linkToSpecial: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); ++ } ++ __ load_heap_oop(rm_method, member_vmtarget); ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg); ++ break; ++ ++ case vmIntrinsics::_linkToStatic: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); ++ } ++ __ load_heap_oop(rm_method, member_vmtarget); ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg); ++ break; ++ ++ case vmIntrinsics::_linkToVirtual: ++ { ++ // same as TemplateTable::invokevirtual, ++ // minus the CP setup and profiling: ++ ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); ++ } ++ ++ // pick out the vtable index from the MemberName, and then we can discard it: ++ Register temp2_index = temp2; ++ __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg); ++ if (VerifyMethodHandles) { ++ Label L_index_ok; ++ __ slt(AT, R0, temp2_index); ++ __ bne(AT, R0, L_index_ok); ++ __ delayed()->nop(); ++ __ STOP("no virtual index"); ++ __ BIND(L_index_ok); ++ } ++ ++ // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget ++ // at this point. And VerifyMethodHandles has already checked clazz, if needed. ++ ++ // get target Method* & entry point ++ __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method); ++ break; ++ } ++ ++ case vmIntrinsics::_linkToInterface: ++ { ++ // same as TemplateTable::invokeinterface ++ // (minus the CP setup and profiling, with different argument motion) ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); ++ } ++ ++ Register temp3_intf = temp3; ++ __ load_heap_oop(temp3_intf, member_clazz); ++ load_klass_from_Class(_masm, temp3_intf); ++ __ verify_klass_ptr(temp3_intf); ++ ++ Register rm_index = rm_method; ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_index, member_vmindex, noreg, noreg); ++ if (VerifyMethodHandles) { ++ Label L; ++ __ slt(AT, rm_index, R0); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ STOP("invalid vtable index for MH.invokeInterface"); ++ __ bind(L); ++ } ++ ++ // given intf, index, and recv klass, dispatch to the implementation method ++ __ lookup_interface_method(temp1_recv_klass, temp3_intf, ++ // note: next two args must be the same: ++ rm_index, rm_method, ++ temp2, ++ L_incompatible_class_change_error); ++ break; ++ } ++ ++ default: ++ fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid)); ++ break; ++ } ++ ++ // Live at this point: ++ // rm_method ++ ++ // After figuring out which concrete method to call, jump into it. ++ // Note that this works in the interpreter with no data motion. ++ // But the compiled version will require that r_recv be shifted out. ++ __ verify_method_ptr(rm_method); ++ jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry); ++ ++ if (iid == vmIntrinsics::_linkToInterface) { ++ __ bind(L_incompatible_class_change_error); ++ address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry(); ++ __ jmp(icce_entry, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } ++ } ++} ++ ++#ifndef PRODUCT ++void trace_method_handle_stub(const char* adaptername, ++ oop mh, ++ intptr_t* saved_regs, ++ intptr_t* entry_sp) { ++ // called as a leaf from native code: do not block the JVM! ++ bool has_mh = (strstr(adaptername, "/static") == NULL && ++ strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH ++ const char* mh_reg_name = has_mh ? "s7_mh" : "s7"; ++ tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT, ++ adaptername, mh_reg_name, ++ p2i(mh), p2i(entry_sp)); ++ ++ if (Verbose) { ++ tty->print_cr("Registers:"); ++ const int saved_regs_count = RegisterImpl::number_of_registers; ++ for (int i = 0; i < saved_regs_count; i++) { ++ Register r = as_Register(i); ++ // The registers are stored in reverse order on the stack (by pusha). ++ tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]); ++ if ((i + 1) % 4 == 0) { ++ tty->cr(); ++ } else { ++ tty->print(", "); ++ } ++ } ++ tty->cr(); ++ ++ { ++ // dumping last frame with frame::describe ++ ++ JavaThread* p = JavaThread::active(); ++ ++ ResourceMark rm; ++ // may not be needed by safer and unexpensive here ++ PreserveExceptionMark pem(Thread::current()); ++ FrameValues values; ++ ++ // Note: We want to allow trace_method_handle from any call site. ++ // While trace_method_handle creates a frame, it may be entered ++ // without a PC on the stack top (e.g. not just after a call). ++ // Walking that frame could lead to failures due to that invalid PC. ++ // => carefully detect that frame when doing the stack walking ++ ++ // Current C frame ++ frame cur_frame = os::current_frame(); ++ ++ // Robust search of trace_calling_frame (independant of inlining). ++ // Assumes saved_regs comes from a pusha in the trace_calling_frame. ++ assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?"); ++ frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame); ++ while (trace_calling_frame.fp() < saved_regs) { ++ trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame); ++ } ++ ++ // safely create a frame and call frame::describe ++ intptr_t *dump_sp = trace_calling_frame.sender_sp(); ++ intptr_t *dump_fp = trace_calling_frame.link(); ++ ++ bool walkable = has_mh; // whether the traced frame shoud be walkable ++ ++ if (walkable) { ++ // The previous definition of walkable may have to be refined ++ // if new call sites cause the next frame constructor to start ++ // failing. Alternatively, frame constructors could be ++ // modified to support the current or future non walkable ++ // frames (but this is more intrusive and is not considered as ++ // part of this RFE, which will instead use a simpler output). ++ frame dump_frame = frame(dump_sp, dump_fp); ++ dump_frame.describe(values, 1); ++ } else { ++ // Stack may not be walkable (invalid PC above FP): ++ // Add descriptions without building a Java frame to avoid issues ++ values.describe(-1, dump_fp, "fp for #1 "); ++ values.describe(-1, dump_sp, "sp for #1"); ++ } ++ values.describe(-1, entry_sp, "raw top of stack"); ++ ++ tty->print_cr("Stack layout:"); ++ values.print(p); ++ } ++ if (has_mh && oopDesc::is_oop(mh)) { ++ mh->print(); ++ if (java_lang_invoke_MethodHandle::is_instance(mh)) { ++ java_lang_invoke_MethodHandle::form(mh)->print(); ++ } ++ } ++ } ++} ++ ++// The stub wraps the arguments in a struct on the stack to avoid ++// dealing with the different calling conventions for passing 6 ++// arguments. ++struct MethodHandleStubArguments { ++ const char* adaptername; ++ oopDesc* mh; ++ intptr_t* saved_regs; ++ intptr_t* entry_sp; ++}; ++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { ++ trace_method_handle_stub(args->adaptername, ++ args->mh, ++ args->saved_regs, ++ args->entry_sp); ++} ++ ++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { ++} ++#endif //PRODUCT +diff --git a/src/hotspot/cpu/mips/methodHandles_mips.hpp b/src/hotspot/cpu/mips/methodHandles_mips.hpp +new file mode 100644 +index 00000000000..a95f8e40596 +--- /dev/null ++++ b/src/hotspot/cpu/mips/methodHandles_mips.hpp +@@ -0,0 +1,62 @@ ++/* ++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// Platform-specific definitions for method handles. ++// These definitions are inlined into class MethodHandles. ++ ++// Adapters ++enum /* platform_dependent_constants */ { ++ adapter_code_size = 32000 DEBUG_ONLY(+ 150000) ++}; ++ ++// Additional helper methods for MethodHandles code generation: ++public: ++ static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); ++ ++ static void verify_klass(MacroAssembler* _masm, ++ Register obj, vmClassID klass_id, ++ const char* error_message = "wrong klass") NOT_DEBUG_RETURN; ++ ++ static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { ++ verify_klass(_masm, mh_reg, VM_CLASS_ID(MethodHandle_klass), ++ "reference is a MH"); ++ } ++ ++ static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; ++ ++ // Similar to InterpreterMacroAssembler::jump_from_interpreted. ++ // Takes care of special dispatch from single stepping too. ++ static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry); ++ ++ static void jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry); ++ ++ static Register saved_last_sp_register() { ++ // Should be in sharedRuntime, not here. ++ return I29; ++ } +diff --git a/src/hotspot/cpu/mips/mips.ad b/src/hotspot/cpu/mips/mips.ad +new file mode 100644 +index 00000000000..3563bbe0e59 +--- /dev/null ++++ b/src/hotspot/cpu/mips/mips.ad +@@ -0,0 +1,25 @@ ++// ++// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ +diff --git a/src/hotspot/cpu/mips/mips_64.ad b/src/hotspot/cpu/mips/mips_64.ad +new file mode 100644 +index 00000000000..882878f739a +--- /dev/null ++++ b/src/hotspot/cpu/mips/mips_64.ad +@@ -0,0 +1,12317 @@ ++// ++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++// GodSon3 Architecture Description File ++ ++//----------REGISTER DEFINITION BLOCK------------------------------------------ ++// This information is used by the matcher and the register allocator to ++// describe individual registers and classes of registers within the target ++// archtecture. ++ ++// format: ++// reg_def name (call convention, c-call convention, ideal type, encoding); ++// call convention : ++// NS = No-Save ++// SOC = Save-On-Call ++// SOE = Save-On-Entry ++// AS = Always-Save ++// ideal type : ++// see opto/opcodes.hpp for more info ++// reg_class name (reg, ...); ++// alloc_class name (reg, ...); ++register %{ ++ ++// General Registers ++// Integer Registers ++ reg_def R0 ( NS, NS, Op_RegI, 0, VMRegImpl::Bad()); ++ reg_def AT ( NS, NS, Op_RegI, 1, AT->as_VMReg()); ++ reg_def AT_H ( NS, NS, Op_RegI, 1, AT->as_VMReg()->next()); ++ reg_def V0 (SOC, SOC, Op_RegI, 2, V0->as_VMReg()); ++ reg_def V0_H (SOC, SOC, Op_RegI, 2, V0->as_VMReg()->next()); ++ reg_def V1 (SOC, SOC, Op_RegI, 3, V1->as_VMReg()); ++ reg_def V1_H (SOC, SOC, Op_RegI, 3, V1->as_VMReg()->next()); ++ reg_def A0 (SOC, SOC, Op_RegI, 4, A0->as_VMReg()); ++ reg_def A0_H (SOC, SOC, Op_RegI, 4, A0->as_VMReg()->next()); ++ reg_def A1 (SOC, SOC, Op_RegI, 5, A1->as_VMReg()); ++ reg_def A1_H (SOC, SOC, Op_RegI, 5, A1->as_VMReg()->next()); ++ reg_def A2 (SOC, SOC, Op_RegI, 6, A2->as_VMReg()); ++ reg_def A2_H (SOC, SOC, Op_RegI, 6, A2->as_VMReg()->next()); ++ reg_def A3 (SOC, SOC, Op_RegI, 7, A3->as_VMReg()); ++ reg_def A3_H (SOC, SOC, Op_RegI, 7, A3->as_VMReg()->next()); ++ reg_def A4 (SOC, SOC, Op_RegI, 8, A4->as_VMReg()); ++ reg_def A4_H (SOC, SOC, Op_RegI, 8, A4->as_VMReg()->next()); ++ reg_def A5 (SOC, SOC, Op_RegI, 9, A5->as_VMReg()); ++ reg_def A5_H (SOC, SOC, Op_RegI, 9, A5->as_VMReg()->next()); ++ reg_def A6 (SOC, SOC, Op_RegI, 10, A6->as_VMReg()); ++ reg_def A6_H (SOC, SOC, Op_RegI, 10, A6->as_VMReg()->next()); ++ reg_def A7 (SOC, SOC, Op_RegI, 11, A7->as_VMReg()); ++ reg_def A7_H (SOC, SOC, Op_RegI, 11, A7->as_VMReg()->next()); ++ reg_def T0 (SOC, SOC, Op_RegI, 12, T0->as_VMReg()); ++ reg_def T0_H (SOC, SOC, Op_RegI, 12, T0->as_VMReg()->next()); ++ reg_def T1 (SOC, SOC, Op_RegI, 13, T1->as_VMReg()); ++ reg_def T1_H (SOC, SOC, Op_RegI, 13, T1->as_VMReg()->next()); ++ reg_def T2 (SOC, SOC, Op_RegI, 14, T2->as_VMReg()); ++ reg_def T2_H (SOC, SOC, Op_RegI, 14, T2->as_VMReg()->next()); ++ reg_def T3 (SOC, SOC, Op_RegI, 15, T3->as_VMReg()); ++ reg_def T3_H (SOC, SOC, Op_RegI, 15, T3->as_VMReg()->next()); ++ reg_def S0 (SOC, SOE, Op_RegI, 16, S0->as_VMReg()); ++ reg_def S0_H (SOC, SOE, Op_RegI, 16, S0->as_VMReg()->next()); ++ reg_def S1 (SOC, SOE, Op_RegI, 17, S1->as_VMReg()); ++ reg_def S1_H (SOC, SOE, Op_RegI, 17, S1->as_VMReg()->next()); ++ reg_def S2 (SOC, SOE, Op_RegI, 18, S2->as_VMReg()); ++ reg_def S2_H (SOC, SOE, Op_RegI, 18, S2->as_VMReg()->next()); ++ reg_def S3 (SOC, SOE, Op_RegI, 19, S3->as_VMReg()); ++ reg_def S3_H (SOC, SOE, Op_RegI, 19, S3->as_VMReg()->next()); ++ reg_def S4 (SOC, SOE, Op_RegI, 20, S4->as_VMReg()); ++ reg_def S4_H (SOC, SOE, Op_RegI, 20, S4->as_VMReg()->next()); ++ reg_def S5 (SOC, SOE, Op_RegI, 21, S5->as_VMReg()); ++ reg_def S5_H (SOC, SOE, Op_RegI, 21, S5->as_VMReg()->next()); ++ reg_def S6 (SOC, SOE, Op_RegI, 22, S6->as_VMReg()); ++ reg_def S6_H (SOC, SOE, Op_RegI, 22, S6->as_VMReg()->next()); ++ reg_def S7 (SOC, SOE, Op_RegI, 23, S7->as_VMReg()); ++ reg_def S7_H (SOC, SOE, Op_RegI, 23, S7->as_VMReg()->next()); ++ reg_def T8 (SOC, SOC, Op_RegI, 24, T8->as_VMReg()); ++ reg_def T8_H (SOC, SOC, Op_RegI, 24, T8->as_VMReg()->next()); ++ reg_def T9 (SOC, SOC, Op_RegI, 25, T9->as_VMReg()); ++ reg_def T9_H (SOC, SOC, Op_RegI, 25, T9->as_VMReg()->next()); ++ ++// Special Registers ++ reg_def K0 ( NS, NS, Op_RegI, 26, K0->as_VMReg()); ++ reg_def K1 ( NS, NS, Op_RegI, 27, K1->as_VMReg()); ++ reg_def GP ( NS, NS, Op_RegI, 28, GP->as_VMReg()); ++ reg_def GP_H ( NS, NS, Op_RegI, 28, GP->as_VMReg()->next()); ++ reg_def SP ( NS, NS, Op_RegI, 29, SP->as_VMReg()); ++ reg_def SP_H ( NS, NS, Op_RegI, 29, SP->as_VMReg()->next()); ++ reg_def FP ( NS, NS, Op_RegI, 30, FP->as_VMReg()); ++ reg_def FP_H ( NS, NS, Op_RegI, 30, FP->as_VMReg()->next()); ++ reg_def RA ( NS, NS, Op_RegI, 31, RA->as_VMReg()); ++ reg_def RA_H ( NS, NS, Op_RegI, 31, RA->as_VMReg()->next()); ++ ++// Floating registers. ++reg_def F0 ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()); ++reg_def F0_H ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next()); ++reg_def F1 ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()); ++reg_def F1_H ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next()); ++reg_def F2 ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()); ++reg_def F2_H ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next()); ++reg_def F3 ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()); ++reg_def F3_H ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next()); ++reg_def F4 ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()); ++reg_def F4_H ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next()); ++reg_def F5 ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()); ++reg_def F5_H ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next()); ++reg_def F6 ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()); ++reg_def F6_H ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next()); ++reg_def F7 ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()); ++reg_def F7_H ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next()); ++reg_def F8 ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()); ++reg_def F8_H ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next()); ++reg_def F9 ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()); ++reg_def F9_H ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next()); ++reg_def F10 ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()); ++reg_def F10_H ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next()); ++reg_def F11 ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()); ++reg_def F11_H ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next()); ++reg_def F12 ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()); ++reg_def F12_H ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next()); ++reg_def F13 ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()); ++reg_def F13_H ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next()); ++reg_def F14 ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()); ++reg_def F14_H ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next()); ++reg_def F15 ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()); ++reg_def F15_H ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next()); ++reg_def F16 ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()); ++reg_def F16_H ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next()); ++reg_def F17 ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()); ++reg_def F17_H ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next()); ++reg_def F18 ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()); ++reg_def F18_H ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next()); ++reg_def F19 ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()); ++reg_def F19_H ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next()); ++reg_def F20 ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()); ++reg_def F20_H ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next()); ++reg_def F21 ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()); ++reg_def F21_H ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next()); ++reg_def F22 ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()); ++reg_def F22_H ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next()); ++reg_def F23 ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()); ++reg_def F23_H ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next()); ++reg_def F24 ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()); ++reg_def F24_H ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next()); ++reg_def F25 ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()); ++reg_def F25_H ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next()); ++reg_def F26 ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()); ++reg_def F26_H ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next()); ++reg_def F27 ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()); ++reg_def F27_H ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next()); ++reg_def F28 ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()); ++reg_def F28_H ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next()); ++reg_def F29 ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()); ++reg_def F29_H ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next()); ++reg_def F30 ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()); ++reg_def F30_H ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next()); ++reg_def F31 ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()); ++reg_def F31_H ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next()); ++ ++ ++// ---------------------------- ++// Special Registers ++//S6 is used for get_thread(S6) ++//S5 is uesd for heapbase of compressed oop ++alloc_class chunk0( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S5, S5_H, ++ S6, S6_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T8, T8_H, ++ T9, T9_H, ++ T1, T1_H, // inline_cache_reg ++ V1, V1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ V0, V0_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H, ++ GP, GP_H ++ RA, RA_H, ++ SP, SP_H, // stack_pointer ++ FP, FP_H // frame_pointer ++ ); ++ ++alloc_class chunk1( F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F8, F8_H, ++ F9, F9_H, ++ F10, F10_H, ++ F11, F11_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F23, F23_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++ F28, F28_H, ++ F19, F19_H, ++ F18, F18_H, ++ F17, F17_H, ++ F16, F16_H, ++ F15, F15_H, ++ F14, F14_H, ++ F13, F13_H, ++ F12, F12_H, ++ F29, F29_H, ++ F30, F30_H, ++ F31, F31_H); ++ ++reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 ); ++reg_class s0_reg( S0 ); ++reg_class s1_reg( S1 ); ++reg_class s2_reg( S2 ); ++reg_class s3_reg( S3 ); ++reg_class s4_reg( S4 ); ++reg_class s5_reg( S5 ); ++reg_class s6_reg( S6 ); ++reg_class s7_reg( S7 ); ++ ++reg_class t_reg( T0, T1, T2, T3, T8, T9 ); ++reg_class t0_reg( T0 ); ++reg_class t1_reg( T1 ); ++reg_class t2_reg( T2 ); ++reg_class t3_reg( T3 ); ++reg_class t8_reg( T8 ); ++reg_class t9_reg( T9 ); ++ ++reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 ); ++reg_class a0_reg( A0 ); ++reg_class a1_reg( A1 ); ++reg_class a2_reg( A2 ); ++reg_class a3_reg( A3 ); ++reg_class a4_reg( A4 ); ++reg_class a5_reg( A5 ); ++reg_class a6_reg( A6 ); ++reg_class a7_reg( A7 ); ++ ++reg_class v0_reg( V0 ); ++reg_class v1_reg( V1 ); ++ ++reg_class sp_reg( SP, SP_H ); ++reg_class fp_reg( FP, FP_H ); ++ ++reg_class v0_long_reg( V0, V0_H ); ++reg_class v1_long_reg( V1, V1_H ); ++reg_class a0_long_reg( A0, A0_H ); ++reg_class a1_long_reg( A1, A1_H ); ++reg_class a2_long_reg( A2, A2_H ); ++reg_class a3_long_reg( A3, A3_H ); ++reg_class a4_long_reg( A4, A4_H ); ++reg_class a5_long_reg( A5, A5_H ); ++reg_class a6_long_reg( A6, A6_H ); ++reg_class a7_long_reg( A7, A7_H ); ++reg_class t0_long_reg( T0, T0_H ); ++reg_class t1_long_reg( T1, T1_H ); ++reg_class t2_long_reg( T2, T2_H ); ++reg_class t3_long_reg( T3, T3_H ); ++reg_class t8_long_reg( T8, T8_H ); ++reg_class t9_long_reg( T9, T9_H ); ++reg_class s0_long_reg( S0, S0_H ); ++reg_class s1_long_reg( S1, S1_H ); ++reg_class s2_long_reg( S2, S2_H ); ++reg_class s3_long_reg( S3, S3_H ); ++reg_class s4_long_reg( S4, S4_H ); ++reg_class s5_long_reg( S5, S5_H ); ++reg_class s6_long_reg( S6, S6_H ); ++reg_class s7_long_reg( S7, S7_H ); ++ ++reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, A7, A6, A5, A4, V0, A3, A2, A1, A0, T0 ); ++ ++reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, V0, T0 ); ++ ++reg_class p_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T8, T8_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++reg_class no_T8_p_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++reg_class long_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T8, T8_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++ ++// Floating point registers. ++// F31 are not used as temporary registers in D2I ++reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F31); ++reg_class dbl_reg( F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F8, F8_H, ++ F9, F9_H, ++ F10, F10_H, ++ F11, F11_H, ++ F12, F12_H, ++ F13, F13_H, ++ F14, F14_H, ++ F15, F15_H, ++ F16, F16_H, ++ F17, F17_H, ++ F18, F18_H, ++ F19, F19_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F23, F23_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++ F28, F28_H, ++ F29, F29_H, ++ F31, F31_H); ++ ++reg_class flt_arg0( F12 ); ++reg_class dbl_arg0( F12, F12_H ); ++reg_class dbl_arg1( F14, F14_H ); ++ ++%} ++ ++//----------DEFINITION BLOCK--------------------------------------------------- ++// Define name --> value mappings to inform the ADLC of an integer valued name ++// Current support includes integer values in the range [0, 0x7FFFFFFF] ++// Format: ++// int_def ( , ); ++// Generated Code in ad_.hpp ++// #define () ++// // value == ++// Generated code in ad_.cpp adlc_verification() ++// assert( == , "Expect () to equal "); ++// ++definitions %{ ++ int_def DEFAULT_COST ( 100, 100); ++ int_def HUGE_COST (1000000, 1000000); ++ ++ // Memory refs are twice as expensive as run-of-the-mill. ++ int_def MEMORY_REF_COST ( 200, DEFAULT_COST * 2); ++ ++ // Branches are even more expensive. ++ int_def BRANCH_COST ( 300, DEFAULT_COST * 3); ++ // we use jr instruction to construct call, so more expensive ++ int_def CALL_COST ( 500, DEFAULT_COST * 5); ++/* ++ int_def EQUAL ( 1, 1 ); ++ int_def NOT_EQUAL ( 2, 2 ); ++ int_def GREATER ( 3, 3 ); ++ int_def GREATER_EQUAL ( 4, 4 ); ++ int_def LESS ( 5, 5 ); ++ int_def LESS_EQUAL ( 6, 6 ); ++*/ ++%} ++ ++ ++ ++//----------SOURCE BLOCK------------------------------------------------------- ++// This is a block of C++ code which provides values, functions, and ++// definitions necessary in the rest of the architecture description ++ ++source_hpp %{ ++// Header information of the source block. ++// Method declarations/definitions which are used outside ++// the ad-scope can conveniently be defined here. ++// ++// To keep related declarations/definitions/uses close together, ++// we switch between source %{ }% and source_hpp %{ }% freely as needed. ++ ++class CallStubImpl { ++ ++ //-------------------------------------------------------------- ++ //---< Used for optimization in Compile::shorten_branches >--- ++ //-------------------------------------------------------------- ++ ++ public: ++ // Size of call trampoline stub. ++ static uint size_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++ ++ // number of relocations needed by a call trampoline stub ++ static uint reloc_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++}; ++ ++class HandlerImpl { ++ ++ public: ++ ++ static int emit_exception_handler(CodeBuffer &cbuf); ++ static int emit_deopt_handler(CodeBuffer& cbuf); ++ ++ static uint size_exception_handler() { ++ // NativeCall instruction size is the same as NativeJump. ++ // exception handler starts out as jump and can be patched to ++ // a call be deoptimization. (4932387) ++ // Note that this value is also credited (in output.cpp) to ++ // the size of the code section. ++ int size = NativeCall::instruction_size; ++ const uintx m = 16 - 1; ++ return mask_bits(size + m, ~m); ++ //return round_to(size, 16); ++ } ++ ++ static uint size_deopt_handler() { ++ int size = NativeCall::instruction_size; ++ const uintx m = 16 - 1; ++ return mask_bits(size + m, ~m); ++ //return round_to(size, 16); ++ } ++}; ++ ++class Node::PD { ++public: ++ enum NodeFlags { ++ _last_flag = Node::_last_flag ++ }; ++}; ++ ++%} // end source_hpp ++ ++source %{ ++ ++#define NO_INDEX 0 ++#define RELOC_IMM64 Assembler::imm_operand ++#define RELOC_DISP32 Assembler::disp32_operand ++ ++ ++#define __ _masm. ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++ ++void PhaseOutput::pd_perform_mach_node_analysis() { ++} ++ ++int MachNode::pd_alignment_required() const { ++ return 1; ++} ++ ++int MachNode::compute_padding(int current_offset) const { ++ return 0; ++} ++ ++// Emit exception handler code. ++// Stuff framesize into a register and call a VM stub routine. ++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ C2_MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_exception_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ ++ int offset = __ offset(); ++ ++ __ block_comment("; emit_exception_handler"); ++ ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point()); ++ __ align(16); ++ assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} ++ ++// Emit deopt handler code. ++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ C2_MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_deopt_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ ++ int offset = __ offset(); ++ ++ __ block_comment("; emit_deopt_handler"); ++ ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_call(SharedRuntime::deopt_blob()->unpack()); ++ __ align(16); ++ assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} ++ ++ ++const bool Matcher::match_rule_supported(int opcode) { ++ if (!has_match_rule(opcode)) ++ return false; ++ ++ switch (opcode) { ++ //Op_CountLeadingZerosI Op_CountLeadingZerosL can be deleted, all MIPS CPUs support clz & dclz. ++ case Op_CountLeadingZerosI: ++ case Op_CountLeadingZerosL: ++ if (!UseCountLeadingZerosInstructionMIPS64) ++ return false; ++ break; ++ case Op_CountTrailingZerosI: ++ case Op_CountTrailingZerosL: ++ if (!UseCountTrailingZerosInstructionMIPS64) ++ return false; ++ break; ++ } ++ ++ return true; // Per default match rules are supported. ++} ++ ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { ++ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) ++ return false; ++ ++ return true; ++} ++ ++// Vector calling convention not yet implemented. ++const bool Matcher::supports_vector_calling_convention(void) { ++ return false; ++} ++ ++OptoRegPair Matcher::vector_return_value(uint ideal_reg) { ++ Unimplemented(); ++ return OptoRegPair(0, 0); ++} ++ ++const int Matcher::float_pressure(int default_pressure_threshold) { ++ Unimplemented(); ++ return default_pressure_threshold; ++} ++ ++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { ++ int offs = offset - br_size + 4; ++ // To be conservative on MIPS ++ // branch node should be end with: ++ // branch inst ++ // delay slot ++ const int safety_zone = 3 * BytesPerInstWord; ++ return Assembler::is_simm16((offs<0 ? offs-safety_zone : offs+safety_zone) >> 2); ++} ++ ++MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) { ++ ShouldNotReachHere(); // generic vector operands not supported ++ return NULL; ++} ++ ++bool Matcher::is_generic_reg2reg_move(MachNode* m) { ++ ShouldNotReachHere(); // generic vector operands not supported ++ return false; ++} ++ ++bool Matcher::is_generic_vector(MachOper* opnd) { ++ ShouldNotReachHere(); // generic vector operands not supported ++ return false; ++} ++ ++const RegMask* Matcher::predicate_reg_mask(void) { ++ return NULL; ++} ++ ++const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) { ++ return NULL; ++} ++ ++// Max vector size in bytes. 0 if not supported. ++const int Matcher::vector_width_in_bytes(BasicType bt) { ++ if (MaxVectorSize == 0) ++ return 0; ++ assert(MaxVectorSize == 8, ""); ++ return 8; ++} ++ ++const int Matcher::scalable_vector_reg_size(const BasicType bt) { ++ return -1; ++} ++ ++// Vector ideal reg ++const uint Matcher::vector_ideal_reg(int size) { ++ assert(MaxVectorSize == 8, ""); ++ switch(size) { ++ case 8: return Op_VecD; ++ } ++ ShouldNotReachHere(); ++ return 0; ++} ++ ++// Should the matcher clone input 'm' of node 'n'? ++bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { ++ return false; ++} ++ ++// Should the Matcher clone shifts on addressing modes, expecting them ++// to be subsumed into complex addressing expressions or compute them ++// into registers? ++bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { ++ return clone_base_plus_offset_address(m, mstack, address_visited); ++} ++ ++// Limits on vector size (number of elements) loaded into vector. ++const int Matcher::max_vector_size(const BasicType bt) { ++ assert(is_java_primitive(bt), "only primitive type vectors"); ++ return vector_width_in_bytes(bt)/type2aelembytes(bt); ++} ++ ++const int Matcher::min_vector_size(const BasicType bt) { ++ return max_vector_size(bt); // Same as max. ++} ++ ++// Register for DIVI projection of divmodI ++RegMask Matcher::divI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for MODI projection of divmodI ++RegMask Matcher::modI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for DIVL projection of divmodL ++RegMask Matcher::divL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Return whether or not this register is ever used as an argument. This ++// function is used on startup to build the trampoline stubs in generateOptoStub. ++// Registers not mentioned will be killed by the VM call in the trampoline, and ++// arguments in those registers not be available to the callee. ++bool Matcher::can_be_java_arg( int reg ) { ++ // Refer to: [sharedRuntime_mips_64.cpp] SharedRuntime::java_calling_convention() ++ if ( reg == T0_num || reg == T0_H_num ++ || reg == A0_num || reg == A0_H_num ++ || reg == A1_num || reg == A1_H_num ++ || reg == A2_num || reg == A2_H_num ++ || reg == A3_num || reg == A3_H_num ++ || reg == A4_num || reg == A4_H_num ++ || reg == A5_num || reg == A5_H_num ++ || reg == A6_num || reg == A6_H_num ++ || reg == A7_num || reg == A7_H_num ) ++ return true; ++ ++ if ( reg == F12_num || reg == F12_H_num ++ || reg == F13_num || reg == F13_H_num ++ || reg == F14_num || reg == F14_H_num ++ || reg == F15_num || reg == F15_H_num ++ || reg == F16_num || reg == F16_H_num ++ || reg == F17_num || reg == F17_H_num ++ || reg == F18_num || reg == F18_H_num ++ || reg == F19_num || reg == F19_H_num ) ++ return true; ++ ++ return false; ++} ++ ++bool Matcher::is_spillable_arg( int reg ) { ++ return can_be_java_arg(reg); ++} ++ ++bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { ++ return false; ++} ++ ++// Register for MODL projection of divmodL ++RegMask Matcher::modL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++const RegMask Matcher::method_handle_invoke_SP_save_mask() { ++ return FP_REG_mask(); ++} ++ ++int CallStaticJavaDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallLeafNoFPDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallLeafDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallRuntimeDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++#ifndef PRODUCT ++void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const { ++ st->print("BRK"); ++} ++#endif ++ ++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { ++ C2_MacroAssembler _masm(&cbuf); ++ __ brk(5); ++} ++ ++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { ++ return MachNode::size(ra_); ++} ++ ++ ++ ++// !!!!! Special hack to get all type of calls to specify the byte offset ++// from the start of the call to the point where the return address ++// will point. ++int MachCallStaticJavaNode::ret_addr_offset() { ++ //lui ++ //ori ++ //nop ++ //nop ++ //jalr ++ //nop ++ return 24; ++} ++ ++int MachCallDynamicJavaNode::ret_addr_offset() { ++ //lui IC_Klass, ++ //ori IC_Klass, ++ //dsll IC_Klass ++ //ori IC_Klass ++ ++ //lui T9 ++ //ori T9 ++ //nop ++ //nop ++ //jalr T9 ++ //nop ++ return 4 * 4 + 4 * 6; ++} ++ ++//============================================================================= ++ ++// Figure out which register class each belongs in: rc_int, rc_float, rc_stack ++enum RC { rc_bad, rc_int, rc_float, rc_stack }; ++static enum RC rc_class( OptoReg::Name reg ) { ++ if( !OptoReg::is_valid(reg) ) return rc_bad; ++ if (OptoReg::is_stack(reg)) return rc_stack; ++ VMReg r = OptoReg::as_VMReg(reg); ++ if (r->is_Register()) return rc_int; ++ assert(r->is_FloatRegister(), "must be"); ++ return rc_float; ++} ++ ++uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { ++ // Get registers to move ++ OptoReg::Name src_second = ra_->get_reg_second(in(1)); ++ OptoReg::Name src_first = ra_->get_reg_first(in(1)); ++ OptoReg::Name dst_second = ra_->get_reg_second(this ); ++ OptoReg::Name dst_first = ra_->get_reg_first(this ); ++ ++ enum RC src_second_rc = rc_class(src_second); ++ enum RC src_first_rc = rc_class(src_first); ++ enum RC dst_second_rc = rc_class(dst_second); ++ enum RC dst_first_rc = rc_class(dst_first); ++ ++ assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); ++ ++ // Generate spill code! ++ ++ if( src_first == dst_first && src_second == dst_second ) ++ return 0; // Self copy, no move ++ ++ if (src_first_rc == rc_stack) { ++ // mem -> ++ if (dst_first_rc == rc_stack) { ++ // mem -> mem ++ assert(src_second != dst_first, "overlap"); ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ ld(AT, Address(SP, src_offset)); ++ __ sd(AT, Address(SP, dst_offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("ld AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t" ++ "sd AT, [SP + #%d]", ++ src_offset, dst_offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ // No pushl/popl, so: ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ lw(AT, Address(SP, src_offset)); ++ __ sw(AT, Address(SP, dst_offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("lw AT, [SP + #%d] spill 2\n\t" ++ "sw AT, [SP + #%d]\n\t", ++ src_offset, dst_offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // mem -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ ld(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("ld %s, [SP + #%d]\t# spill 3", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ if (this->ideal_reg() == Op_RegI) ++ __ lw(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++ else ++ __ lwu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ if (this->ideal_reg() == Op_RegI) ++ st->print("lw %s, [SP + #%d]\t# spill 4", ++ Matcher::regName[dst_first], ++ offset); ++ else ++ st->print("lwu %s, [SP + #%d]\t# spill 5", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_float) { ++ // mem-> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ ldc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("ldc1 %s, [SP + #%d]\t# spill 6", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ lwc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("lwc1 %s, [SP + #%d]\t# spill 7", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } ++ } else if (src_first_rc == rc_int) { ++ // gpr -> ++ if (dst_first_rc == rc_stack) { ++ // gpr -> mem ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ sd(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("sd %s, [SP + #%d] # spill 8", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ sw(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("sw %s, [SP + #%d]\t# spill 9", ++ Matcher::regName[src_first], offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // gpr -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ move(as_Register(Matcher::_regEncode[dst_first]), ++ as_Register(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("move(64bit) %s <-- %s\t# spill 10", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ return 0; ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ if (this->ideal_reg() == Op_RegI) ++ __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); ++ else ++ __ daddu(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("move(32-bit) %s <-- %s\t# spill 11", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ return 0; ++ } ++ } else if (dst_first_rc == rc_float) { ++ // gpr -> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ dmtc1(as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("dmtc1 %s, %s\t# spill 12", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ mtc1( as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]) ); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("mtc1 %s, %s\t# spill 13", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } ++ } else if (src_first_rc == rc_float) { ++ // xmm -> ++ if (dst_first_rc == rc_stack) { ++ // xmm -> mem ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ sdc1( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) ); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("sdc1 %s, [SP + #%d]\t# spill 14", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ swc1(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("swc1 %s, [SP + #%d]\t# spill 15", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // xmm -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ dmfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("dmfc1 %s, %s\t# spill 16", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ mfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("mfc1 %s, %s\t# spill 17", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_float) { ++ // xmm -> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ mov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("mov_d %s <-- %s\t# spill 18", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ C2_MacroAssembler _masm(cbuf); ++ __ mov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("mov_s %s <-- %s\t# spill 19", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } ++ } ++ ++ assert(0," foo "); ++ Unimplemented(); ++ return 0; ++} ++ ++#ifndef PRODUCT ++void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ implementation( NULL, ra_, false, st ); ++} ++#endif ++ ++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ implementation( &cbuf, ra_, false, NULL ); ++} ++ ++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ Compile *C = ra_->C; ++ int framesize = C->output()->frame_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ st->print_cr("daddiu SP, SP, %d # Rlease stack @ MachEpilogNode", framesize); ++ st->print("\t"); ++ if (UseLEXT1) { ++ st->print_cr("gslq RA, FP, SP, %d # Restore FP & RA @ MachEpilogNode", -wordSize*2); ++ } else { ++ st->print_cr("ld RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize); ++ st->print("\t"); ++ st->print_cr("ld FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2); ++ } ++ ++ if( do_polling() && C->is_method_compilation() ) { ++ st->print("\t"); ++ st->print_cr("ld AT, poll_offset[thread] #polling_page_address\n\t" ++ "lw AT, [AT]\t" ++ "# Safepoint: poll for GC"); ++ } ++} ++#endif ++ ++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ Compile *C = ra_->C; ++ C2_MacroAssembler _masm(&cbuf); ++ int framesize = C->output()->frame_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ assert(Assembler::is_simm16(framesize), "daddiu uses a signed 16-bit int"); ++ ++ int off = framesize - wordSize * 2; ++ if (UseLEXT1 && Assembler::is_simm(off, 9)) { ++ __ gslq(RA, FP, SP, framesize - wordSize * 2); ++ } else { ++ __ ld(RA, SP, framesize - wordSize ); ++ __ ld(FP, SP, framesize - wordSize * 2); ++ } ++ __ daddiu(SP, SP, framesize); ++ ++ if (StackReservedPages > 0 && C->has_reserved_stack_access()) { ++ __ reserved_stack_check(); ++ } ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ if( do_polling() && C->is_method_compilation() ) { ++ __ ld(AT, thread, in_bytes(JavaThread::polling_page_offset())); ++ __ relocate(relocInfo::poll_return_type); ++ __ lw(AT, AT, 0); ++ } ++} ++ ++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); // too many variables; just compute it the hard way fujie debug ++} ++ ++int MachEpilogNode::reloc() const { ++ return 0; // a large enough number ++} ++ ++const Pipeline * MachEpilogNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} ++ ++//============================================================================= ++ ++#ifndef PRODUCT ++void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_reg_first(this); ++ st->print("ADDI %s, SP, %d @BoxLockNode",Matcher::regName[reg],offset); ++} ++#endif ++ ++ ++uint BoxLockNode::size(PhaseRegAlloc *ra_) const { ++ return 4; ++} ++ ++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ C2_MacroAssembler _masm(&cbuf); ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_encode(this); ++ ++ __ addiu(as_Register(reg), SP, offset); ++} ++ ++ ++//static int sizeof_FFree_Float_Stack_All = -1; ++ ++int MachCallRuntimeNode::ret_addr_offset() { ++ //lui ++ //ori ++ //dsll ++ //ori ++ //jalr ++ //nop ++ assert(NativeCall::instruction_size == 24, "in MachCallRuntimeNode::ret_addr_offset()"); ++ return NativeCall::instruction_size; ++} ++ ++int MachCallNativeNode::ret_addr_offset() { ++ Unimplemented(); ++ return -1; ++} ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const { ++ st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count); ++} ++#endif ++ ++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { ++ C2_MacroAssembler _masm(&cbuf); ++ int i = 0; ++ for(i = 0; i < _count; i++) ++ __ nop(); ++} ++ ++uint MachNopNode::size(PhaseRegAlloc *) const { ++ return 4 * _count; ++} ++const Pipeline* MachNopNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} ++ ++//============================================================================= ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ st->print_cr("load_klass(T9, T0)"); ++ st->print_cr("\tbeq(T9, iCache, L)"); ++ st->print_cr("\tnop"); ++ st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)"); ++ st->print_cr("\tnop"); ++ st->print_cr("\tnop"); ++ st->print_cr(" L:"); ++} ++#endif ++ ++ ++void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ C2_MacroAssembler _masm(&cbuf); ++ int ic_reg = Matcher::inline_cache_reg_encode(); ++ Label L; ++ Register receiver = T0; ++ Register iCache = as_Register(ic_reg); ++ ++ __ load_klass(T9, receiver); ++ __ beq(T9, iCache, L); ++ __ delayed()->nop(); ++ __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ bind(L); ++} ++ ++uint MachUEPNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++ ++ ++//============================================================================= ++ ++const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask(); ++ ++int ConstantTable::calculate_table_base_offset() const { ++ return 0; // absolute addressing, no offset ++} ++ ++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } ++void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { ++ ShouldNotReachHere(); ++} ++ ++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { ++ Compile* C = ra_->C; ++ ConstantTable& constant_table = C->output()->constant_table(); ++ C2_MacroAssembler _masm(&cbuf); ++ ++ Register Rtoc = as_Register(ra_->get_encode(this)); ++ CodeSection* consts_section = __ code()->consts(); ++ int consts_size = consts_section->align_at_start(consts_section->size()); ++ assert(constant_table.size() == consts_size, "must be equal"); ++ ++ if (consts_section->size()) { ++ // Materialize the constant table base. ++ address baseaddr = consts_section->start() + -(constant_table.table_base_offset()); ++ // RelocationHolder rspec = internal_word_Relocation::spec(baseaddr); ++ __ relocate(relocInfo::internal_word_type); ++ __ patchable_set48(Rtoc, (long)baseaddr); ++ } ++} ++ ++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { ++ // patchable_set48 (4 insts) ++ return 4 * 4; ++} ++ ++#ifndef PRODUCT ++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { ++ Register r = as_Register(ra_->get_encode(this)); ++ st->print("patchable_set48 %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name()); ++} ++#endif ++ ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ Compile* C = ra_->C; ++ ++ int framesize = C->output()->frame_size_in_bytes(); ++ int bangsize = C->output()->bang_size_in_bytes(); ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ // Calls to C2R adapters often do not accept exceptional returns. ++ // We require that their callers must bang for them. But be careful, because ++ // some VM calls (such as call site linkage) can use several kilobytes of ++ // stack. But the stack safety zone should account for that. ++ // See bugs 4446381, 4468289, 4497237. ++ if (C->output()->need_stack_bang(bangsize)) { ++ st->print_cr("# stack bang"); st->print("\t"); ++ } ++ if (UseLEXT1) { ++ st->print("gssq RA, FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); ++ } else { ++ st->print("sd RA, %d(SP) @ MachPrologNode\n\t", -wordSize); ++ st->print("sd FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); ++ } ++ st->print("daddiu FP, SP, -%d \n\t", wordSize*2); ++ st->print("daddiu SP, SP, -%d \t",framesize); ++} ++#endif ++ ++ ++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ Compile* C = ra_->C; ++ C2_MacroAssembler _masm(&cbuf); ++ ++ int framesize = C->output()->frame_size_in_bytes(); ++ int bangsize = C->output()->bang_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ assert(Assembler::is_simm16(-framesize), "daddiu uses a signed 16-bit int"); ++ ++ // Make enough room for patch_verified_entry ++ __ nop(); ++ __ nop(); ++ ++ if (C->clinit_barrier_on_entry()) { ++ assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started"); ++ ++ Label L_skip_barrier; ++ ++ __ mov_metadata(T9, C->method()->holder()->constant_encoding()); ++ __ clinit_barrier(T9, AT, &L_skip_barrier); ++ __ jmp((address)SharedRuntime::get_handle_wrong_method_stub(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ bind(L_skip_barrier); ++ } ++ ++ if (C->output()->need_stack_bang(bangsize)) { ++ __ generate_stack_overflow_check(bangsize); ++ } ++ ++ __ daddiu(SP, SP, -framesize); ++ int off = framesize - wordSize * 2; ++ if (UseLEXT1 && Assembler::is_simm(off, 9)) { ++ __ gssq(RA, FP, SP, framesize - wordSize * 2); ++ } else { ++ __ sd(RA, SP, framesize - wordSize); ++ __ sd(FP, SP, framesize - wordSize * 2); ++ } ++ __ daddiu(FP, SP, framesize - wordSize * 2); ++ ++ C->output()->set_frame_complete(cbuf.insts_size()); ++ if (C->has_mach_constant_base_node()) { ++ // NOTE: We set the table base offset here because users might be ++ // emitted before MachConstantBaseNode. ++ ConstantTable& constant_table = C->output()->constant_table(); ++ constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); ++ } ++} ++ ++ ++uint MachPrologNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); // too many variables; just compute it the hard way ++} ++ ++int MachPrologNode::reloc() const { ++ return 0; // a large enough number ++} ++ ++%} ++ ++//----------ENCODING BLOCK----------------------------------------------------- ++// This block specifies the encoding classes used by the compiler to output ++// byte streams. Encoding classes generate functions which are called by ++// Machine Instruction Nodes in order to generate the bit encoding of the ++// instruction. Operands specify their base encoding interface with the ++// interface keyword. There are currently supported four interfaces, ++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an ++// operand to generate a function which returns its register number when ++// queried. CONST_INTER causes an operand to generate a function which ++// returns the value of the constant when queried. MEMORY_INTER causes an ++// operand to generate four functions which return the Base Register, the ++// Index Register, the Scale Value, and the Offset Value of the operand when ++// queried. COND_INTER causes an operand to generate six functions which ++// return the encoding code (ie - encoding bits for the instruction) ++// associated with each basic boolean condition for a conditional instruction. ++// Instructions specify two basic values for encoding. They use the ++// ins_encode keyword to specify their encoding class (which must be one of ++// the class names specified in the encoding block), and they use the ++// opcode keyword to specify, in order, their primary, secondary, and ++// tertiary opcode. Only the opcode sections which a particular instruction ++// needs for encoding need to be specified. ++encode %{ ++ ++ enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf ++ C2_MacroAssembler _masm(&cbuf); ++ // This is the instruction starting address for relocation info. ++ __ block_comment("Java_To_Runtime"); ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_call((address)$meth$$method); ++ %} ++ ++ enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL ++ // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine ++ // who we intended to call. ++ C2_MacroAssembler _masm(&cbuf); ++ address addr = (address)$meth$$method; ++ address call; ++ __ block_comment("Java_Static_Call"); ++ ++ if ( !_method ) { ++ // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. ++ call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf); ++ } else { ++ int method_index = resolved_method_index(cbuf); ++ RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) ++ : static_call_Relocation::spec(method_index); ++ call = __ trampoline_call(AddressLiteral(addr, rspec), &cbuf); ++ ++ // Emit stub for static call ++ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); ++ if (stub == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ %} ++ ++ ++ // ++ // [Ref: LIR_Assembler::ic_call() ] ++ // ++ enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL ++ C2_MacroAssembler _masm(&cbuf); ++ __ block_comment("Java_Dynamic_Call"); ++ __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); ++ %} ++ ++ ++ enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{ ++ Register result = $result$$Register; ++ Register sub = $sub$$Register; ++ Register super = $super$$Register; ++ Register length = $tmp$$Register; ++ Register tmp = T9; ++ Label miss; ++ ++ // result may be the same as sub ++ // 47c B40: # B21 B41 <- B20 Freq: 0.155379 ++ // 47c partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0 ++ // 4bc mov S2, NULL #@loadConP ++ // 4c0 beq S1, S2, B21 #@branchConP P=0.999999 C=-1.000000 ++ // ++ C2_MacroAssembler _masm(&cbuf); ++ Label done; ++ __ check_klass_subtype_slow_path(sub, super, length, tmp, ++ NULL, &miss, ++ /*set_cond_codes:*/ true); ++ // Refer to X86_64's RDI ++ __ move(result, 0); ++ __ b(done); ++ __ delayed()->nop(); ++ ++ __ bind(miss); ++ __ move(result, 1); ++ __ bind(done); ++ %} ++ ++%} ++ ++ ++//---------MIPS FRAME-------------------------------------------------------------- ++// Definition of frame structure and management information. ++// ++// S T A C K L A Y O U T Allocators stack-slot number ++// | (to get allocators register number ++// G Owned by | | v add SharedInfo::stack0) ++// r CALLER | | ++// o | +--------+ pad to even-align allocators stack-slot ++// w V | pad0 | numbers; owned by CALLER ++// t -----------+--------+----> Matcher::_in_arg_limit, unaligned ++// h ^ | in | 5 ++// | | args | 4 Holes in incoming args owned by SELF ++// | | old | | 3 ++// | | SP-+--------+----> Matcher::_old_SP, even aligned ++// v | | ret | 3 return address ++// Owned by +--------+ ++// Self | pad2 | 2 pad to align old SP ++// | +--------+ 1 ++// | | locks | 0 ++// | +--------+----> SharedInfo::stack0, even aligned ++// | | pad1 | 11 pad to align new SP ++// | +--------+ ++// | | | 10 ++// | | spills | 9 spills ++// V | | 8 (pad0 slot for callee) ++// -----------+--------+----> Matcher::_out_arg_limit, unaligned ++// ^ | out | 7 ++// | | args | 6 Holes in outgoing args owned by CALLEE ++// Owned by new | | ++// Callee SP-+--------+----> Matcher::_new_SP, even aligned ++// | | ++// ++// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is ++// known from SELF's arguments and the Java calling convention. ++// Region 6-7 is determined per call site. ++// Note 2: If the calling convention leaves holes in the incoming argument ++// area, those holes are owned by SELF. Holes in the outgoing area ++// are owned by the CALLEE. Holes should not be nessecary in the ++// incoming area, as the Java calling convention is completely under ++// the control of the AD file. Doubles can be sorted and packed to ++// avoid holes. Holes in the outgoing arguments may be nessecary for ++// varargs C calling conventions. ++// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is ++// even aligned with pad0 as needed. ++// Region 6 is even aligned. Region 6-7 is NOT even aligned; ++// region 6-11 is even aligned; it may be padded out more so that ++// the region from SP to FP meets the minimum stack alignment. ++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack ++// alignment. Region 11, pad1, may be dynamically extended so that ++// SP meets the minimum alignment. ++ ++ ++frame %{ ++ // These two registers define part of the calling convention ++ // between compiled code and the interpreter. ++ // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention ++ // for more information. ++ ++ inline_cache_reg(T1); // Inline Cache Register ++ ++ // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] ++ cisc_spilling_operand_name(indOffset32); ++ ++ // Number of stack slots consumed by locking an object ++ // generate Compile::sync_stack_slots ++ sync_stack_slots(2); ++ ++ frame_pointer(SP); ++ ++ // Interpreter stores its frame pointer in a register which is ++ // stored to the stack by I2CAdaptors. ++ // I2CAdaptors convert from interpreted java to compiled java. ++ ++ interpreter_frame_pointer(FP); ++ ++ // generate Matcher::stack_alignment ++ stack_alignment(StackAlignmentInBytes); //wordSize = sizeof(char*); ++ ++ // Number of outgoing stack slots killed above the out_preserve_stack_slots ++ // for calls to C. Supports the var-args backing area for register parms. ++ varargs_C_out_slots_killed(0); ++ ++ // The after-PROLOG location of the return address. Location of ++ // return address specifies a type (REG or STACK) and a number ++ // representing the register number (i.e. - use a register name) or ++ // stack slot. ++ // Ret Addr is on stack in slot 0 if no locks or verification or alignment. ++ // Otherwise, it is above the locks and verification slot and alignment word ++ //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong)); ++ return_addr(REG RA); ++ ++ // Location of C & interpreter return values ++ // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR. ++ // SEE Matcher::match. ++ c_return_value %{ ++ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); ++ /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ ++ static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; ++ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num }; ++ return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); ++ %} ++ ++ // Location of return values ++ // register(s) contain(s) return value for Op_StartC2I and Op_Start. ++ // SEE Matcher::match. ++ ++ return_value %{ ++ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); ++ /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ ++ static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; ++ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num}; ++ return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); ++ %} ++ ++%} ++ ++//----------ATTRIBUTES--------------------------------------------------------- ++//----------Operand Attributes------------------------------------------------- ++op_attrib op_cost(0); // Required cost attribute ++ ++//----------Instruction Attributes--------------------------------------------- ++ins_attrib ins_cost(100); // Required cost attribute ++ins_attrib ins_size(32); // Required size attribute (in bits) ++ins_attrib ins_pc_relative(0); // Required PC Relative flag ++ins_attrib ins_short_branch(0); // Required flag: is this instruction a ++ // non-matching short branch variant of some ++ // long branch? ++ins_attrib ins_alignment(4); // Required alignment attribute (must be a power of 2) ++ // specifies the alignment that some part of the instruction (not ++ // necessarily the start) requires. If > 1, a compute_padding() ++ // function must be provided for the instruction ++ ++//----------OPERANDS----------------------------------------------------------- ++// Operand definitions must precede instruction definitions for correct parsing ++// in the ADLC because operands constitute user defined types which are used in ++// instruction definitions. ++ ++// Vectors ++operand vecD() %{ ++ constraint(ALLOC_IN_RC(dbl_reg)); ++ match(VecD); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Flags register, used as output of compare instructions ++operand FlagsReg() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegFlags); ++ ++ format %{ "T0" %} ++ interface(REG_INTER); ++%} ++ ++//----------Simple Operands---------------------------------------------------- ++// TODO: Should we need to define some more special immediate number ? ++// Immediate Operands ++// Integer Immediate ++operand immI() %{ ++ match(ConI); ++ // TODO: should not match immI8 here LEE ++ match(immI8); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI8() %{ ++ predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI16() %{ ++ predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); ++ match(ConI); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M65536() %{ ++ predicate(n->get_int() == -65536); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for decrement ++operand immI_M1() %{ ++ predicate(n->get_int() == -1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for test vs zero ++operand immI_0() %{ ++ predicate(n->get_int() == 0); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for increment ++operand immI_1() %{ ++ predicate(n->get_int() == 1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constants for increment ++operand immI_16() %{ ++ predicate(n->get_int() == 16); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_24() %{ ++ predicate(n->get_int() == 24); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for long shifts ++operand immI_32() %{ ++ predicate(n->get_int() == 32); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for byte-wide masking ++operand immI_255() %{ ++ predicate(n->get_int() == 255); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_65535() %{ ++ predicate(n->get_int() == 65535); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_MaxI() %{ ++ predicate(n->get_int() == 2147483647); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M32767_32768() %{ ++ predicate((-32767 <= n->get_int()) && (n->get_int() <= 32768)); ++ match(ConI); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Valid scale values for addressing modes ++operand immI_0_3() %{ ++ predicate(0 <= n->get_int() && (n->get_int() <= 3)); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_31() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 31); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_32767() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 32767); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_65535() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 65535); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_32_63() %{ ++ predicate(n->get_int() >= 32 && n->get_int() <= 63); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Operand for non-negtive integer mask ++operand immI_nonneg_mask() %{ ++ predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1)); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate ++operand immL() %{ ++ match(ConL); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate 8-bit ++operand immL8() %{ ++ predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L); ++ match(ConL); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL16() %{ ++ predicate((-32768 <= n->get_long()) && (n->get_long() <= 32767)); ++ match(ConL); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate 32-bit signed ++operand immL32() %{ ++ predicate(n->get_long() == (int)(n->get_long())); ++ match(ConL); ++ ++ op_cost(15); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 3..6 zero ++operand immL_M121() %{ ++ predicate(n->get_long() == -121L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 0..2 zero ++operand immL_M8() %{ ++ predicate(n->get_long() == -8L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 1..2 zero ++operand immL_M7() %{ ++ predicate(n->get_long() == -7L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 2 zero ++operand immL_M5() %{ ++ predicate(n->get_long() == -5L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 0..1 zero ++operand immL_M4() %{ ++ predicate(n->get_long() == -4L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_M1() %{ ++ predicate(n->get_long() == -1L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate zero ++operand immL_0() %{ ++ predicate(n->get_long() == 0L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_7() %{ ++ predicate(n->get_long() == 7L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate: low 32-bit mask ++operand immL_MaxUI() %{ ++ predicate(n->get_long() == 0xFFFFFFFFL); ++ match(ConL); ++ op_cost(20); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_M32767_32768() %{ ++ predicate((-32767 <= n->get_long()) && (n->get_long() <= 32768)); ++ match(ConL); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_0_65535() %{ ++ predicate(n->get_long() >= 0 && n->get_long() <= 65535); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Operand for non-negtive long mask ++operand immL_nonneg_mask() %{ ++ predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1)); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immP() %{ ++ match(ConP); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// NULL Pointer Immediate ++operand immP_0() %{ ++ predicate(n->get_ptr() == 0); ++ match(ConP); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate: 64-bit ++operand immP_no_oop_cheap() %{ ++ predicate(!n->bottom_type()->isa_oop_ptr() && (MacroAssembler::insts_for_set64(n->get_ptr()) <= 3)); ++ match(ConP); ++ ++ op_cost(5); ++ // formats are generated automatically for constants and base registers ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immN() %{ ++ match(ConN); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immNKlass() %{ ++ match(ConNKlass); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// NULL Pointer Immediate ++operand immN_0() %{ ++ predicate(n->get_narrowcon() == 0); ++ match(ConN); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Single-precision floating-point immediate ++operand immF() %{ ++ match(ConF); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Single-precision floating-point zero ++operand immF_0() %{ ++ predicate(jint_cast(n->getf()) == 0); ++ match(ConF); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Double-precision floating-point immediate ++operand immD() %{ ++ match(ConD); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Double-precision floating-point zero ++operand immD_0() %{ ++ predicate(jlong_cast(n->getd()) == 0); ++ match(ConD); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Register Operands ++// Integer Register ++operand mRegI() %{ ++ constraint(ALLOC_IN_RC(int_reg)); ++ match(RegI); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_Ax_mRegI() %{ ++ constraint(ALLOC_IN_RC(no_Ax_int_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mS0RegI() %{ ++ constraint(ALLOC_IN_RC(s0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S0" %} ++ interface(REG_INTER); ++%} ++ ++operand mS1RegI() %{ ++ constraint(ALLOC_IN_RC(s1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S1" %} ++ interface(REG_INTER); ++%} ++ ++operand mS3RegI() %{ ++ constraint(ALLOC_IN_RC(s3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S3" %} ++ interface(REG_INTER); ++%} ++ ++operand mS4RegI() %{ ++ constraint(ALLOC_IN_RC(s4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S4" %} ++ interface(REG_INTER); ++%} ++ ++operand mS5RegI() %{ ++ constraint(ALLOC_IN_RC(s5_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S5" %} ++ interface(REG_INTER); ++%} ++ ++operand mS6RegI() %{ ++ constraint(ALLOC_IN_RC(s6_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S6" %} ++ interface(REG_INTER); ++%} ++ ++operand mS7RegI() %{ ++ constraint(ALLOC_IN_RC(s7_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S7" %} ++ interface(REG_INTER); ++%} ++ ++ ++operand mT0RegI() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T0" %} ++ interface(REG_INTER); ++%} ++ ++operand mT1RegI() %{ ++ constraint(ALLOC_IN_RC(t1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T1" %} ++ interface(REG_INTER); ++%} ++ ++operand mT2RegI() %{ ++ constraint(ALLOC_IN_RC(t2_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T2" %} ++ interface(REG_INTER); ++%} ++ ++operand mT3RegI() %{ ++ constraint(ALLOC_IN_RC(t3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T3" %} ++ interface(REG_INTER); ++%} ++ ++operand mT8RegI() %{ ++ constraint(ALLOC_IN_RC(t8_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T8" %} ++ interface(REG_INTER); ++%} ++ ++operand mT9RegI() %{ ++ constraint(ALLOC_IN_RC(t9_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T9" %} ++ interface(REG_INTER); ++%} ++ ++operand mA0RegI() %{ ++ constraint(ALLOC_IN_RC(a0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A0" %} ++ interface(REG_INTER); ++%} ++ ++operand mA1RegI() %{ ++ constraint(ALLOC_IN_RC(a1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A1" %} ++ interface(REG_INTER); ++%} ++ ++operand mA2RegI() %{ ++ constraint(ALLOC_IN_RC(a2_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A2" %} ++ interface(REG_INTER); ++%} ++ ++operand mA3RegI() %{ ++ constraint(ALLOC_IN_RC(a3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A3" %} ++ interface(REG_INTER); ++%} ++ ++operand mA4RegI() %{ ++ constraint(ALLOC_IN_RC(a4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A4" %} ++ interface(REG_INTER); ++%} ++ ++operand mA5RegI() %{ ++ constraint(ALLOC_IN_RC(a5_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A5" %} ++ interface(REG_INTER); ++%} ++ ++operand mA6RegI() %{ ++ constraint(ALLOC_IN_RC(a6_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A6" %} ++ interface(REG_INTER); ++%} ++ ++operand mA7RegI() %{ ++ constraint(ALLOC_IN_RC(a7_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A7" %} ++ interface(REG_INTER); ++%} ++ ++operand mV0RegI() %{ ++ constraint(ALLOC_IN_RC(v0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "V0" %} ++ interface(REG_INTER); ++%} ++ ++operand mV1RegI() %{ ++ constraint(ALLOC_IN_RC(v1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "V1" %} ++ interface(REG_INTER); ++%} ++ ++operand mRegN() %{ ++ constraint(ALLOC_IN_RC(int_reg)); ++ match(RegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0_RegN() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1_RegN() %{ ++ constraint(ALLOC_IN_RC(t1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3_RegN() %{ ++ constraint(ALLOC_IN_RC(t3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8_RegN() %{ ++ constraint(ALLOC_IN_RC(t8_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t9_RegN() %{ ++ constraint(ALLOC_IN_RC(t9_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0_RegN() %{ ++ constraint(ALLOC_IN_RC(a0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a1_RegN() %{ ++ constraint(ALLOC_IN_RC(a1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2_RegN() %{ ++ constraint(ALLOC_IN_RC(a2_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3_RegN() %{ ++ constraint(ALLOC_IN_RC(a3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4_RegN() %{ ++ constraint(ALLOC_IN_RC(a4_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a5_RegN() %{ ++ constraint(ALLOC_IN_RC(a5_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6_RegN() %{ ++ constraint(ALLOC_IN_RC(a6_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7_RegN() %{ ++ constraint(ALLOC_IN_RC(a7_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s0_RegN() %{ ++ constraint(ALLOC_IN_RC(s0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1_RegN() %{ ++ constraint(ALLOC_IN_RC(s1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s2_RegN() %{ ++ constraint(ALLOC_IN_RC(s2_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3_RegN() %{ ++ constraint(ALLOC_IN_RC(s3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4_RegN() %{ ++ constraint(ALLOC_IN_RC(s4_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s5_RegN() %{ ++ constraint(ALLOC_IN_RC(s5_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s6_RegN() %{ ++ constraint(ALLOC_IN_RC(s6_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7_RegN() %{ ++ constraint(ALLOC_IN_RC(s7_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0_RegN() %{ ++ constraint(ALLOC_IN_RC(v0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1_RegN() %{ ++ constraint(ALLOC_IN_RC(v1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Pointer Register ++operand mRegP() %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(RegP); ++ match(a0_RegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_T8_mRegP() %{ ++ constraint(ALLOC_IN_RC(no_T8_p_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s4_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s5_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s5_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s6_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s6_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s7_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t2_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t2_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t8_long_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t9_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t9_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a2_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a4_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++ ++operand a5_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a5_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a6_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a7_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(v0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(v1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++/* ++operand mSPRegP(mRegP reg) %{ ++ constraint(ALLOC_IN_RC(sp_reg)); ++ match(reg); ++ ++ format %{ "SP" %} ++ interface(REG_INTER); ++%} ++ ++operand mFPRegP(mRegP reg) %{ ++ constraint(ALLOC_IN_RC(fp_reg)); ++ match(reg); ++ ++ format %{ "FP" %} ++ interface(REG_INTER); ++%} ++*/ ++ ++operand mRegL() %{ ++ constraint(ALLOC_IN_RC(long_reg)); ++ match(RegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0RegL() %{ ++ constraint(ALLOC_IN_RC(v0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1RegL() %{ ++ constraint(ALLOC_IN_RC(v1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0RegL() %{ ++ constraint(ALLOC_IN_RC(a0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ "A0" %} ++ interface(REG_INTER); ++%} ++ ++operand a1RegL() %{ ++ constraint(ALLOC_IN_RC(a1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2RegL() %{ ++ constraint(ALLOC_IN_RC(a2_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3RegL() %{ ++ constraint(ALLOC_IN_RC(a3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0RegL() %{ ++ constraint(ALLOC_IN_RC(t0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1RegL() %{ ++ constraint(ALLOC_IN_RC(t1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3RegL() %{ ++ constraint(ALLOC_IN_RC(t3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8RegL() %{ ++ constraint(ALLOC_IN_RC(t8_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4RegL() %{ ++ constraint(ALLOC_IN_RC(a4_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a5RegL() %{ ++ constraint(ALLOC_IN_RC(a5_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6RegL() %{ ++ constraint(ALLOC_IN_RC(a6_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7RegL() %{ ++ constraint(ALLOC_IN_RC(a7_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s0RegL() %{ ++ constraint(ALLOC_IN_RC(s0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1RegL() %{ ++ constraint(ALLOC_IN_RC(s1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3RegL() %{ ++ constraint(ALLOC_IN_RC(s3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4RegL() %{ ++ constraint(ALLOC_IN_RC(s4_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7RegL() %{ ++ constraint(ALLOC_IN_RC(s7_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Floating register operands ++operand regF() %{ ++ constraint(ALLOC_IN_RC(flt_reg)); ++ match(RegF); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//Double Precision Floating register operands ++operand regD() %{ ++ constraint(ALLOC_IN_RC(dbl_reg)); ++ match(RegD); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//----------Memory Operands---------------------------------------------------- ++// Indirect Memory Operand ++operand indirect(mRegP reg) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(reg); ++ ++ format %{ "[$reg] @ indirect" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); /* NO_INDEX */ ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Plus Short Offset Operand ++operand indOffset8(mRegP reg, immL8 off) ++%{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP reg off); ++ ++ op_cost(10); ++ format %{ "[$reg + $off (8-bit)] @ indOffset8" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); /* NO_INDEX */ ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// Indirect Memory Times Scale Plus Index Register ++operand indIndexScale(mRegP reg, mRegL lreg, immI_0_3 scale) ++%{ ++ predicate(UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP reg (LShiftL lreg scale)); ++ ++ op_cost(10); ++ format %{"[$reg + $lreg << $scale] @ indIndexScale" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale($scale); ++ disp(0x0); ++ %} ++%} ++ ++ ++// [base + index + offset] ++operand baseIndexOffset8(mRegP base, mRegL index, immL8 off) ++%{ ++ predicate(UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(5); ++ match(AddP (AddP base index) off); ++ ++ format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8" %} ++ interface(MEMORY_INTER) %{ ++ base($base); ++ index($index); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// [base + index + offset] ++operand baseIndexOffset8_convI2L(mRegP base, mRegI index, immL8 off) ++%{ ++ predicate(UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(5); ++ match(AddP (AddP base (ConvI2L index)) off); ++ ++ format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8_convI2L" %} ++ interface(MEMORY_INTER) %{ ++ base($base); ++ index($index); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// [base + index<in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0); ++ op_cost(10); ++ match(AddP (AddP base (LShiftL (ConvI2L index) scale)) off); ++ ++ format %{ "[$base + $index << $scale + $off (8-bit)] @ basePosIndexScaleOffset8" %} ++ interface(MEMORY_INTER) %{ ++ base($base); ++ index($index); ++ scale($scale); ++ disp($off); ++ %} ++%} ++ ++//FIXME: I think it's better to limit the immI to be 16-bit at most! ++// Indirect Memory Plus Long Offset Operand ++operand indOffset32(mRegP reg, immL32 off) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(20); ++ match(AddP reg off); ++ ++ format %{ "[$reg + $off (32-bit)] @ indOffset32" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); /* NO_INDEX */ ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// Indirect Memory Plus Index Register ++operand indIndex(mRegP addr, mRegL index) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP addr index); ++ ++ op_cost(20); ++ format %{"[$addr + $index] @ indIndex" %} ++ interface(MEMORY_INTER) %{ ++ base($addr); ++ index($index); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++operand indirectNarrowKlass(mRegN reg) ++%{ ++ predicate(CompressedKlassPointers::shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(DecodeNKlass reg); ++ ++ format %{ "[$reg] @ indirectNarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++operand indOffset8NarrowKlass(mRegN reg, immL8 off) ++%{ ++ predicate(CompressedKlassPointers::shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(AddP (DecodeNKlass reg) off); ++ ++ format %{ "[$reg + $off (8-bit)] @ indOffset8NarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indOffset32NarrowKlass(mRegN reg, immL32 off) ++%{ ++ predicate(CompressedKlassPointers::shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(AddP (DecodeNKlass reg) off); ++ ++ format %{ "[$reg + $off (32-bit)] @ indOffset32NarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indIndexOffsetNarrowKlass(mRegN reg, mRegL lreg, immL32 off) ++%{ ++ predicate(UseLEXT1); ++ predicate(CompressedKlassPointers::shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP (AddP (DecodeNKlass reg) lreg) off); ++ ++ op_cost(10); ++ format %{"[$reg + $off + $lreg] @ indIndexOffsetNarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indIndexNarrowKlass(mRegN reg, mRegL lreg) ++%{ ++ predicate(CompressedKlassPointers::shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP (DecodeNKlass reg) lreg); ++ ++ op_cost(10); ++ format %{"[$reg + $lreg] @ indIndexNarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Operand ++operand indirectNarrow(mRegN reg) ++%{ ++ predicate(CompressedOops::shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(DecodeN reg); ++ ++ format %{ "[$reg] @ indirectNarrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Plus Short Offset Operand ++operand indOffset8Narrow(mRegN reg, immL8 off) ++%{ ++ predicate(CompressedOops::shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(AddP (DecodeN reg) off); ++ ++ format %{ "[$reg + $off (8-bit)] @ indOffset8Narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// Indirect Memory Plus Index Register Plus Offset Operand ++operand indIndexOffset8Narrow(mRegN reg, mRegL lreg, immL8 off) ++%{ ++ predicate((CompressedOops::shift() == 0) && UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP (AddP (DecodeN reg) lreg) off); ++ ++ op_cost(10); ++ format %{"[$reg + $off + $lreg] @ indIndexOffset8Narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++//----------Conditional Branch Operands---------------------------------------- ++// Comparison Op - This is the operation of the comparison, and is limited to ++// the following set of codes: ++// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) ++// ++// Other attributes of the comparison, such as unsignedness, are specified ++// by the comparison instruction that sets a condition code flags register. ++// That result is represented by a flags operand whose subtype is appropriate ++// to the unsignedness (etc.) of the comparison. ++// ++// Later, the instruction which matches both the Comparison Op (a Bool) and ++// the flags (produced by the Cmp) specifies the coding of the comparison op ++// by matching a specific subtype of Bool operand below, such as cmpOpU. ++ ++// Comparision Code ++operand cmpOp() %{ ++ match(Bool); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x01); ++ not_equal(0x02); ++ greater(0x03); ++ greater_equal(0x04); ++ less(0x05); ++ less_equal(0x06); ++ overflow(0x7); ++ no_overflow(0x8); ++ %} ++%} ++ ++ ++// Comparision Code ++// Comparison Code, unsigned compare. Used by FP also, with ++// C2 (unordered) turned into GT or LT already. The other bits ++// C0 and C3 are turned into Carry & Zero flags. ++operand cmpOpU() %{ ++ match(Bool); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x01); ++ not_equal(0x02); ++ greater(0x03); ++ greater_equal(0x04); ++ less(0x05); ++ less_equal(0x06); ++ overflow(0x7); ++ no_overflow(0x8); ++ %} ++%} ++ ++ ++//----------Special Memory Operands-------------------------------------------- ++// Stack Slot Operand - This operand is used for loading and storing temporary ++// values on the stack where a match requires a value to ++// flow through memory. ++operand stackSlotP(sRegP reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotI(sRegI reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotF(sRegF reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotD(sRegD reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotL(sRegL reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++ ++//------------------------OPERAND CLASSES-------------------------------------- ++//opclass memory( direct, indirect, indOffset16, indOffset32, indOffset32X, indIndexOffset ); ++opclass memory( indirect, indirectNarrow, indOffset8, indOffset32, indIndex, indIndexScale, baseIndexOffset8, baseIndexOffset8_convI2L, indOffset8Narrow, indIndexOffset8Narrow); ++ ++ ++//----------PIPELINE----------------------------------------------------------- ++// Rules which define the behavior of the target architectures pipeline. ++ ++pipeline %{ ++ ++ //----------ATTRIBUTES--------------------------------------------------------- ++ attributes %{ ++ fixed_size_instructions; // Fixed size instructions ++ branch_has_delay_slot; // branch have delay slot in gs2 ++ max_instructions_per_bundle = 1; // 1 instruction per bundle ++ max_bundles_per_cycle = 4; // Up to 4 bundles per cycle ++ bundle_unit_size=4; ++ instruction_unit_size = 4; // An instruction is 4 bytes long ++ instruction_fetch_unit_size = 16; // The processor fetches one line ++ instruction_fetch_units = 1; // of 16 bytes ++ ++ // List of nop instructions ++ nops( MachNop ); ++ %} ++ ++ //----------RESOURCES---------------------------------------------------------- ++ // Resources are the functional units available to the machine ++ ++ resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4, ALU1, ALU2, ALU = ALU1 | ALU2, FPU1, FPU2, FPU = FPU1 | FPU2, MEM, BR); ++ ++ //----------PIPELINE DESCRIPTION----------------------------------------------- ++ // Pipeline Description specifies the stages in the machine's pipeline ++ ++ // IF: fetch ++ // ID: decode ++ // RD: read ++ // CA: caculate ++ // WB: write back ++ // CM: commit ++ ++ pipe_desc(IF, ID, RD, CA, WB, CM); ++ ++ ++ //----------PIPELINE CLASSES--------------------------------------------------- ++ // Pipeline Classes describe the stages in which input and output are ++ // referenced by the hardware pipeline. ++ ++ //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{ ++ single_instruction; ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+1; ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.19 Integer mult operation : dst <-- reg1 mult reg2 ++ pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+5; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.19 Integer div operation : dst <-- reg1 div reg2 ++ pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.19 Integer mod operation : dst <-- reg1 mod reg2 ++ pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{ ++ instruction_count(2); ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{ ++ instruction_count(2); ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16 ++ pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{ ++ instruction_count(2); ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //no.16 load Long from memory : ++ pipe_class ialu_loadL(mRegL dst, memory mem) %{ ++ instruction_count(2); ++ mem : RD(read); ++ dst : WB(write)+5; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.17 Store Long to Memory : ++ pipe_class ialu_storeL(mRegL src, memory mem) %{ ++ instruction_count(2); ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16 ++ pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{ ++ single_instruction; ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.3 Integer move operation : dst <-- reg ++ pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.4 No instructions : do nothing ++ pipe_class empty( ) %{ ++ instruction_count(0); ++ %} ++ ++ //No.5 UnConditional branch : ++ pipe_class pipe_jump( label labl ) %{ ++ multiple_bundles; ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++ //No.6 ALU Conditional branch : ++ pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++ //no.7 load integer from memory : ++ pipe_class ialu_loadI(mRegI dst, memory mem) %{ ++ mem : RD(read); ++ dst : WB(write)+3; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.8 Store Integer to Memory : ++ pipe_class ialu_storeI(mRegI src, memory mem) %{ ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ ++ //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU : CA; ++ %} ++ ++ //No.22 Floating div operation : dst <-- reg1 div reg2 ++ pipe_class fpu_div(regF dst, regF src1, regF src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU2 : CA; ++ %} ++ ++ pipe_class fcvt_I2D(regD dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU1 : CA; ++ %} ++ ++ pipe_class fcvt_D2I(mRegI dst, regD src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU1 : CA; ++ %} ++ ++ pipe_class pipe_mfc1(mRegI dst, regD src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ pipe_class pipe_mtc1(regD dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ MEM : RD(5); ++ %} ++ ++ //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2 ++ pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU2 : CA; ++ %} ++ ++ //No.11 Load Floating from Memory : ++ pipe_class fpu_loadF(regF dst, memory mem) %{ ++ instruction_count(1); ++ mem : RD(read); ++ dst : WB(write)+3; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.12 Store Floating to Memory : ++ pipe_class fpu_storeF(regF src, memory mem) %{ ++ instruction_count(1); ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.13 FPU Conditional branch : ++ pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++//No.14 Floating FPU reg operation : dst <-- op reg ++ pipe_class fpu1_regF(regF dst, regF src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU : CA; ++ %} ++ ++ pipe_class long_memory_op() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(30); ++ %} ++ ++ pipe_class simple_call() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(200); ++ BR : RD; ++ %} ++ ++ pipe_class call() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(200); ++ %} ++ ++ //FIXME: ++ //No.9 Piple slow : for multi-instructions ++ pipe_class pipe_slow( ) %{ ++ instruction_count(20); ++ force_serialization; ++ multiple_bundles; ++ fixed_latency(50); ++ %} ++ ++%} ++ ++ ++ ++//----------INSTRUCTIONS------------------------------------------------------- ++// ++// match -- States which machine-independent subtree may be replaced ++// by this instruction. ++// ins_cost -- The estimated cost of this instruction is used by instruction ++// selection to identify a minimum cost tree of machine ++// instructions that matches a tree of machine-independent ++// instructions. ++// format -- A string providing the disassembly for this instruction. ++// The value of an instruction's operand may be inserted ++// by referring to it with a '$' prefix. ++// opcode -- Three instruction opcodes may be provided. These are referred ++// to within an encode class as $primary, $secondary, and $tertiary ++// respectively. The primary opcode is commonly used to ++// indicate the type of machine instruction, while secondary ++// and tertiary are often used for prefix options or addressing ++// modes. ++// ins_encode -- A list of encode classes with parameters. The encode class ++// name must have been defined in an 'enc_class' specification ++// in the encode section of the architecture description. ++ ++ ++// Load Integer ++instruct loadI(mRegI dst, memory mem) %{ ++ match(Set dst (LoadI mem)); ++ ++ ins_cost(125); ++ format %{ "lw $dst, $mem #@loadI" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadI_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadI mem))); ++ ++ ins_cost(125); ++ format %{ "lw $dst, $mem #@loadI_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Integer (32 bit signed) to Byte (8 bit signed) ++instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{ ++ match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem\t# int -> byte #@loadI2B" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) ++instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI (LoadI mem) mask)); ++ ++ ins_cost(125); ++ format %{ "lbu $dst, $mem\t# int -> ubyte #@loadI2UB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Short (16 bit signed) ++instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{ ++ match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); ++ ++ ins_cost(125); ++ format %{ "lh $dst, $mem\t# int -> short #@loadI2S" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) ++instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{ ++ match(Set dst (AndI (LoadI mem) mask)); ++ ++ ins_cost(125); ++ format %{ "lhu $dst, $mem\t# int -> ushort/char #@loadI2US" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Long. ++instruct loadL(mRegL dst, memory mem) %{ ++// predicate(!((LoadLNode*)n)->require_atomic_access()); ++ match(Set dst (LoadL mem)); ++ ++ ins_cost(250); ++ format %{ "ld $dst, $mem #@loadL" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadL ); ++%} ++ ++// Load Long - UNaligned ++instruct loadL_unaligned(mRegL dst, memory mem) %{ ++ match(Set dst (LoadL_unaligned mem)); ++ ++ // FIXME: Need more effective ldl/ldr ++ ins_cost(450); ++ format %{ "ld $dst, $mem #@loadL_unaligned\n\t" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadL ); ++%} ++ ++// Store Long ++instruct storeL_reg(memory mem, mRegL src) %{ ++ match(Set mem (StoreL mem src)); ++ ++ ins_cost(200); ++ format %{ "sd $mem, $src #@storeL_reg\n" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++instruct storeL_immL_0(memory mem, immL_0 zero) %{ ++ match(Set mem (StoreL mem zero)); ++ ++ ins_cost(180); ++ format %{ "sd zero, $mem #@storeL_immL_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++// Load Compressed Pointer ++instruct loadN(mRegN dst, memory mem) ++%{ ++ match(Set dst (LoadN mem)); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# compressed ptr @ loadN" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++instruct loadN2P(mRegP dst, memory mem) ++%{ ++ match(Set dst (DecodeN (LoadN mem))); ++ predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# @ loadN2P" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++// Load Pointer ++instruct loadP(mRegP dst, memory mem) %{ ++ match(Set dst (LoadP mem)); ++ ++ ins_cost(125); ++ format %{ "ld $dst, $mem #@loadP" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Klass Pointer ++instruct loadKlass(mRegP dst, memory mem) %{ ++ match(Set dst (LoadKlass mem)); ++ ++ ins_cost(125); ++ format %{ "MOV $dst,$mem @ loadKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load narrow Klass Pointer ++instruct loadNKlass(mRegN dst, memory mem) ++%{ ++ match(Set dst (LoadNKlass mem)); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# compressed klass ptr @ loadNKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++instruct loadN2PKlass(mRegP dst, memory mem) ++%{ ++ match(Set dst (DecodeNKlass (LoadNKlass mem))); ++ predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++// Load Constant ++instruct loadConI(mRegI dst, immI src) %{ ++ match(Set dst src); ++ ++ ins_cost(150); ++ format %{ "mov $dst, $src #@loadConI" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ int value = $src$$constant; ++ __ move(dst, value); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct loadConL_set64(mRegL dst, immL src) %{ ++ match(Set dst src); ++ ins_cost(120); ++ format %{ "li $dst, $src @ loadConL_set64" %} ++ ins_encode %{ ++ __ set64($dst$$Register, $src$$constant); ++ %} ++ ins_pipe(ialu_regL_regL); ++%} ++ ++instruct loadConL16(mRegL dst, immL16 src) %{ ++ match(Set dst src); ++ ins_cost(105); ++ format %{ "mov $dst, $src #@loadConL16" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ int value = $src$$constant; ++ __ daddiu(dst_reg, R0, value); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++ ++instruct loadConL_immL_0(mRegL dst, immL_0 src) %{ ++ match(Set dst src); ++ ins_cost(100); ++ format %{ "mov $dst, zero #@loadConL_immL_0" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ __ daddu(dst_reg, R0, R0); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Load Range ++instruct loadRange(mRegI dst, memory mem) %{ ++ match(Set dst (LoadRange mem)); ++ ++ ins_cost(125); ++ format %{ "MOV $dst,$mem @ loadRange" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct storeP(memory mem, mRegP src ) %{ ++ match(Set mem (StoreP mem src)); ++ ++ ins_cost(125); ++ format %{ "sd $src, $mem #@storeP" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store NULL Pointer, mark word, or other simple pointer constant. ++instruct storeImmP_immP_0(memory mem, immP_0 zero) %{ ++ match(Set mem (StoreP mem zero)); ++ ++ ins_cost(125); ++ format %{ "mov $mem, $zero #@storeImmP_immP_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Compressed Pointer ++instruct storeN(memory mem, mRegN src) ++%{ ++ match(Set mem (StoreN mem src)); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# compressed ptr @ storeN" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2N(memory mem, mRegP src) ++%{ ++ match(Set mem (StoreN mem (EncodeP src))); ++ predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# @ storeP2N" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeNKlass(memory mem, mRegN src) ++%{ ++ match(Set mem (StoreNKlass mem src)); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# compressed klass ptr @ storeNKlass" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2NKlass(memory mem, mRegP src) ++%{ ++ match(Set mem (StoreNKlass mem (EncodePKlass src))); ++ predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# @ storeP2NKlass" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeImmN_immN_0(memory mem, immN_0 zero) ++%{ ++ match(Set mem (StoreN mem zero)); ++ ++ ins_cost(125); // XXX ++ format %{ "storeN0 zero, $mem\t# compressed ptr" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Byte ++instruct storeB_immB_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreB mem zero)); ++ ++ format %{ "mov $mem, zero #@storeB_immB_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeB(memory mem, mRegI src) %{ ++ match(Set mem (StoreB mem src)); ++ ++ ins_cost(125); ++ format %{ "sb $src, $mem #@storeB" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeB_convL2I(memory mem, mRegL src) %{ ++ match(Set mem (StoreB mem (ConvL2I src))); ++ ++ ins_cost(125); ++ format %{ "sb $src, $mem #@storeB_convL2I" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Byte (8bit signed) ++instruct loadB(mRegI dst, memory mem) %{ ++ match(Set dst (LoadB mem)); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem #@loadB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadB_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadB mem))); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem #@loadB_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Byte (8bit UNsigned) ++instruct loadUB(mRegI dst, memory mem) %{ ++ match(Set dst (LoadUB mem)); ++ ++ ins_cost(125); ++ format %{ "lbu $dst, $mem #@loadUB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadUB_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadUB mem))); ++ ++ ins_cost(125); ++ format %{ "lbu $dst, $mem #@loadUB_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Short (16bit signed) ++instruct loadS(mRegI dst, memory mem) %{ ++ match(Set dst (LoadS mem)); ++ ++ ins_cost(125); ++ format %{ "lh $dst, $mem #@loadS" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Short (16 bit signed) to Byte (8 bit signed) ++instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{ ++ match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem\t# short -> byte #@loadS2B" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct loadS_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadS mem))); ++ ++ ins_cost(125); ++ format %{ "lh $dst, $mem #@loadS_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Store Integer Immediate ++instruct storeI_immI_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreI mem zero)); ++ ++ format %{ "mov $mem, zero #@storeI_immI_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Integer ++instruct storeI(memory mem, mRegI src) %{ ++ match(Set mem (StoreI mem src)); ++ ++ ins_cost(125); ++ format %{ "sw $mem, $src #@storeI" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeI_convL2I(memory mem, mRegL src) %{ ++ match(Set mem (StoreI mem (ConvL2I src))); ++ ++ ins_cost(125); ++ format %{ "sw $mem, $src #@storeI_convL2I" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Float ++instruct loadF(regF dst, memory mem) %{ ++ match(Set dst (LoadF mem)); ++ ++ ins_cost(150); ++ format %{ "loadF $dst, $mem #@loadF" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_FLOAT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadConP_general(mRegP dst, immP src) %{ ++ match(Set dst src); ++ ++ ins_cost(120); ++ format %{ "li $dst, $src #@loadConP_general" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ long* value = (long*)$src$$constant; ++ ++ if($src->constant_reloc() == relocInfo::metadata_type){ ++ int klass_index = __ oop_recorder()->find_index((Klass*)value); ++ RelocationHolder rspec = metadata_Relocation::spec(klass_index); ++ ++ __ relocate(rspec); ++ __ patchable_set48(dst, (long)value); ++ } else if($src->constant_reloc() == relocInfo::oop_type){ ++ int oop_index = __ oop_recorder()->find_index((jobject)value); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ ++ __ relocate(rspec); ++ __ patchable_set48(dst, (long)value); ++ } else if ($src->constant_reloc() == relocInfo::none) { ++ __ set64(dst, (long)value); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{ ++ match(Set dst src); ++ ++ ins_cost(80); ++ format %{ "li $dst, $src @ loadConP_no_oop_cheap" %} ++ ++ ins_encode %{ ++ __ set64($dst$$Register, $src$$constant); ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct loadConP_immP_0(mRegP dst, immP_0 src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(50); ++ format %{ "mov $dst, R0\t# ptr" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ __ daddu(dst_reg, R0, R0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConN_immN_0(mRegN dst, immN_0 src) %{ ++ match(Set dst src); ++ format %{ "move $dst, R0\t# compressed NULL ptr" %} ++ ins_encode %{ ++ __ move($dst$$Register, R0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConN(mRegN dst, immN src) %{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "li $dst, $src\t# compressed ptr @ loadConN" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ __ set_narrow_oop(dst, (jobject)$src$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); // XXX ++%} ++ ++instruct loadConNKlass(mRegN dst, immNKlass src) %{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "li $dst, $src\t# compressed klass ptr @ loadConNKlass" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ __ set_narrow_klass(dst, (Klass*)$src$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); // XXX ++%} ++ ++//FIXME ++// Tail Call; Jump from runtime stub to Java code. ++// Also known as an 'interprocedural jump'. ++// Target of jump will eventually return to caller. ++// TailJump below removes the return address. ++instruct TailCalljmpInd(mRegP jump_target, mRegP method_ptr) %{ ++ match(TailCall jump_target method_ptr); ++ ins_cost(300); ++ format %{ "JMP $jump_target \t# @TailCalljmpInd" %} ++ ++ ins_encode %{ ++ Register target = $jump_target$$Register; ++ Register ptr = $method_ptr$$Register; ++ ++ // RA will be used in generate_forward_exception() ++ __ push(RA); ++ ++ __ move(S3, ptr); ++ __ jr(target); ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++%} ++ ++// Create exception oop: created by stack-crawling runtime code. ++// Created exception is now available to this handler, and is setup ++// just prior to jumping to this handler. No code emitted. ++instruct CreateException( a0_RegP ex_oop ) ++%{ ++ match(Set ex_oop (CreateEx)); ++ ++ // use the following format syntax ++ format %{ "# exception oop is in A0; no code emitted @CreateException" %} ++ ins_encode %{ ++ // X86 leaves this function empty ++ __ block_comment("CreateException is empty in MIPS"); ++ %} ++ ins_pipe( empty ); ++// ins_pipe( pipe_jump ); ++%} ++ ++ ++/* The mechanism of exception handling is clear now. ++ ++- Common try/catch: ++ [stubGenerator_mips.cpp] generate_forward_exception() ++ |- V0, V1 are created ++ |- T9 <= SharedRuntime::exception_handler_for_return_address ++ `- jr T9 ++ `- the caller's exception_handler ++ `- jr OptoRuntime::exception_blob ++ `- here ++- Rethrow(e.g. 'unwind'): ++ * The callee: ++ |- an exception is triggered during execution ++ `- exits the callee method through RethrowException node ++ |- The callee pushes exception_oop(T0) and exception_pc(RA) ++ `- The callee jumps to OptoRuntime::rethrow_stub() ++ * In OptoRuntime::rethrow_stub: ++ |- The VM calls _rethrow_Java to determine the return address in the caller method ++ `- exits the stub with tailjmpInd ++ |- pops exception_oop(V0) and exception_pc(V1) ++ `- jumps to the return address(usually an exception_handler) ++ * The caller: ++ `- continues processing the exception_blob with V0/V1 ++*/ ++ ++// Rethrow exception: ++// The exception oop will come in the first argument position. ++// Then JUMP (not call) to the rethrow stub code. ++instruct RethrowException() ++%{ ++ match(Rethrow); ++ ++ // use the following format syntax ++ format %{ "JMP rethrow_stub #@RethrowException" %} ++ ins_encode %{ ++ __ block_comment("@ RethrowException"); ++ ++ cbuf.set_insts_mark(); ++ cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec()); ++ ++ // call OptoRuntime::rethrow_stub to get the exception handler in parent method ++ __ patchable_jump((address)OptoRuntime::rethrow_stub()); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++// ============================================================================ ++// Branch Instructions --- long offset versions ++ ++// Jump Direct ++instruct jmpDir_long(label labl) %{ ++ match(Goto); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "JMP $labl #@jmpDir_long" %} ++ ++ ins_encode %{ ++ Label* L = $labl$$label; ++ __ jmp_far(*L); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ //ins_pc_relative(1); ++%} ++ ++// Jump Direct Conditional - Label defines a relative address from Jcc+1 ++instruct jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_long" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cop$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++instruct jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = AT; ++ Label* L = $labl$$label; ++ int flag = $cop$$cmpcode; ++ ++ __ move(op2, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++ ++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! ++instruct jmpCon_flags_long(cmpOp cop, FlagsReg cr, label labl) %{ ++ match(If cop cr); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $labl #mips uses T0 as equivalent to eflag @jmpCon_flags_long" %} ++ ++ ins_encode %{ ++ Label* L = $labl$$label; ++ switch($cop$$cmpcode) { ++ case 0x01: //equal ++ __ bne_long($cr$$Register, R0, *L); ++ break; ++ case 0x02: //not equal ++ __ beq_long($cr$$Register, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++// Conditional jumps ++instruct branchConP_zero_long(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConP_zero_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConN2P_zero_long(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConN2P_zero_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) ++ { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConP_long(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{ ++ match(If cmp (CmpP op1 op2)); ++// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); ++ effect(USE labl); ++ ++ ins_cost(200); ++ format %{ "b$cmp $op1, $op2, $labl #@branchConP_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct cmpN_null_branch_long(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ ++ match(If cmp (CmpN op1 null)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,0\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_null_branch_long" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++//TODO: pipe_branchP or create pipe_branchN LEE ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ ++ match(If cmp (CmpN op1 op2)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,$op2\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_reg_branch_long" %} ++ ins_encode %{ ++ Register op1_reg = $op1$$Register; ++ Register op2_reg = $op2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1_reg, op2_reg, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1_reg, op2_reg, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2_reg, op1_reg); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1_reg, op2_reg); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1_reg, op2_reg); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2_reg, op1_reg); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConIU_reg_reg_long(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConIU_reg_imm_long(cmpOpU cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, AT, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, AT, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, AT, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, AT); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, AT); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, AT, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_immI_0_long(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(170); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_immI_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, R0, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, R0, *L); ++ break; ++ case 0x03: //greater ++ __ slt(AT, R0, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //greater_equal ++ __ slt(AT, op1, R0); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //less ++ __ slt(AT, op1, R0); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //less_equal ++ __ slt(AT, R0, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(200); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, AT, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, AT, *L); ++ break; ++ case 0x03: //greater ++ __ slt(AT, AT, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //greater_equal ++ __ slt(AT, op1, AT); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //less ++ __ slt(AT, op1, AT); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //less_equal ++ __ slt(AT, AT, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConIU_reg_immI_0_long(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{ ++ match( If cmp (CmpU src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConIU_reg_immI_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, R0, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, R0, *L); ++ break; ++ case 0x03: //above ++ __ bne_long(R0, op1, *L); ++ break; ++ case 0x04: //above_equal ++ __ beq_long(R0, R0, *L); ++ break; ++ case 0x05: //below ++ return; ++ break; ++ case 0x06: //below_equal ++ __ beq_long(op1, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConIU_reg_immI16_long(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ ins_cost(180); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_immI16_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ move(AT, val); ++ __ beq_long(op1, AT, *L); ++ break; ++ case 0x02: //not_equal ++ __ move(AT, val); ++ __ bne_long(op1, AT, *L); ++ break; ++ case 0x03: //above ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltiu(AT, op1, val); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltiu(AT, op1, val); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_long" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_long" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: // not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++instruct branchConL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match( If cmp (CmpL src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConL_regL_immL_0_long" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = R0; ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match(If cmp (CmpUL src1 zero)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConUL_regL_immL_0_long" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = R0; ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ case 0x04: // greater_equal ++ case 0x06: // less_equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: // not_equal ++ case 0x03: // greater ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x05: // less ++ __ beq_long(R0, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_long" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_long" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: // not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++//FIXME ++instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{ ++ match( If cmp (CmpF src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_long" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_s(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x02: // not_equal ++ __ c_eq_s(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x03: // greater ++ __ c_ule_s(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_s(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x05: // less ++ __ c_ult_s(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_s(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_slow); ++%} ++ ++instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{ ++ match( If cmp (CmpD src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_long" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_d(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x02: // not_equal ++ // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. ++ __ c_eq_d(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x03: // greater ++ __ c_ule_d(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_d(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x05: // less ++ __ c_ult_d(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_d(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_slow); ++%} ++ ++ ++// ============================================================================ ++// Branch Instructions -- short offset versions ++ ++// Jump Direct ++instruct jmpDir_short(label labl) %{ ++ match(Goto); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "JMP $labl #@jmpDir_short" %} ++ ++ ins_encode %{ ++ Label &L = *($labl$$label); ++ if(&L) ++ __ b(L); ++ else ++ __ b(int(0)); ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++// Jump Direct Conditional - Label defines a relative address from Jcc+1 ++instruct jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_short" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cop$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++instruct jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = AT; ++ Label &L = *($labl$$label); ++ int flag = $cop$$cmpcode; ++ ++ __ move(op2, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++ ++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! ++instruct jmpCon_flags_short(cmpOp cop, FlagsReg cr, label labl) %{ ++ match(If cop cr); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $labl #mips uses T0 as equivalent to eflag @jmpCon_flags_short" %} ++ ++ ins_encode %{ ++ Label &L = *($labl$$label); ++ switch($cop$$cmpcode) { ++ case 0x01: //equal ++ if (&L) ++ __ bne($cr$$Register, R0, L); ++ else ++ __ bne($cr$$Register, R0, (int)0); ++ break; ++ case 0x02: //not equal ++ if (&L) ++ __ beq($cr$$Register, R0, L); ++ else ++ __ beq($cr$$Register, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++// Conditional jumps ++instruct branchConP_zero_short(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConP_zero_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConN2P_zero_short(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConN2P_zero_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) ++ { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConP_short(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{ ++ match(If cmp (CmpP op1 op2)); ++// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); ++ effect(USE labl); ++ ++ ins_cost(200); ++ format %{ "b$cmp $op1, $op2, $labl #@branchConP_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ ++ match(If cmp (CmpN op1 null)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,0\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_null_branch_short" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++//TODO: pipe_branchP or create pipe_branchN LEE ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ ++ match(If cmp (CmpN op1 op2)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,$op2\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_reg_branch_short" %} ++ ins_encode %{ ++ Register op1_reg = $op1$$Register; ++ Register op2_reg = $op2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1_reg, op2_reg, L); ++ else ++ __ beq(op1_reg, op2_reg, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1_reg, op2_reg, L); ++ else ++ __ bne(op1_reg, op2_reg, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2_reg, op1_reg); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1_reg, op2_reg); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1_reg, op2_reg); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2_reg, op1_reg); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConIU_reg_reg_short(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConIU_reg_imm_short(cmpOpU cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, AT, L); ++ else ++ __ beq(op1, AT, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, AT, L); ++ else ++ __ bne(op1, AT, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, AT); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, AT); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConI_reg_immI_0_short(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(170); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_immI_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, R0, L); ++ else ++ __ beq(op1, R0, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, R0, L); ++ else ++ __ bne(op1, R0, (int)0); ++ break; ++ case 0x03: //greater ++ if(&L) ++ __ bgtz(op1, L); ++ else ++ __ bgtz(op1, (int)0); ++ break; ++ case 0x04: //greater_equal ++ if(&L) ++ __ bgez(op1, L); ++ else ++ __ bgez(op1, (int)0); ++ break; ++ case 0x05: //less ++ if(&L) ++ __ bltz(op1, L); ++ else ++ __ bltz(op1, (int)0); ++ break; ++ case 0x06: //less_equal ++ if(&L) ++ __ blez(op1, L); ++ else ++ __ blez(op1, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(200); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, AT, L); ++ else ++ __ beq(op1, AT, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, AT, L); ++ else ++ __ bne(op1, AT, (int)0); ++ break; ++ case 0x03: //greater ++ __ slt(AT, AT, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //greater_equal ++ __ slt(AT, op1, AT); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //less ++ __ slt(AT, op1, AT); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //less_equal ++ __ slt(AT, AT, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConIU_reg_immI_0_short(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{ ++ match( If cmp (CmpU src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConIU_reg_immI_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, R0, L); ++ else ++ __ beq(op1, R0, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, R0, L); ++ else ++ __ bne(op1, R0, (int)0); ++ break; ++ case 0x03: //above ++ if(&L) ++ __ bne(R0, op1, L); ++ else ++ __ bne(R0, op1, (int)0); ++ break; ++ case 0x04: //above_equal ++ if(&L) ++ __ beq(R0, R0, L); ++ else ++ __ beq(R0, R0, (int)0); ++ break; ++ case 0x05: //below ++ return; ++ break; ++ case 0x06: //below_equal ++ if(&L) ++ __ beq(op1, R0, L); ++ else ++ __ beq(op1, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConIU_reg_immI16_short(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ ins_cost(180); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_immI16_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ move(AT, val); ++ if (&L) ++ __ beq(op1, AT, L); ++ else ++ __ beq(op1, AT, (int)0); ++ break; ++ case 0x02: //not_equal ++ __ move(AT, val); ++ if (&L) ++ __ bne(op1, AT, L); ++ else ++ __ bne(op1, AT, (int)0); ++ break; ++ case 0x03: //above ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltiu(AT, op1, val); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltiu(AT, op1, val); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_short" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x02: //not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match( If cmp (CmpUL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_short" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x02: // not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match( If cmp (CmpL src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConL_regL_immL_0_short" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beq(opr1_reg, R0, target); ++ else ++ __ beq(opr1_reg, R0, int(0)); ++ break; ++ ++ case 0x02: //not_equal ++ if(&target) ++ __ bne(opr1_reg, R0, target); ++ else ++ __ bne(opr1_reg, R0, (int)0); ++ break; ++ ++ case 0x03: //greater ++ if(&target) ++ __ bgtz(opr1_reg, target); ++ else ++ __ bgtz(opr1_reg, (int)0); ++ break; ++ ++ case 0x04: //greater_equal ++ if(&target) ++ __ bgez(opr1_reg, target); ++ else ++ __ bgez(opr1_reg, (int)0); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, R0); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x06: //less_equal ++ if (&target) ++ __ blez(opr1_reg, target); ++ else ++ __ blez(opr1_reg, int(0)); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match(If cmp (CmpUL src1 zero)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConUL_regL_immL_0_short" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ case 0x04: // greater_equal ++ case 0x06: // less_equal ++ if (&target) ++ __ beq(opr1_reg, R0, target); ++ else ++ __ beq(opr1_reg, R0, int(0)); ++ break; ++ ++ case 0x02: // not_equal ++ case 0x03: // greater ++ if(&target) ++ __ bne(opr1_reg, R0, target); ++ else ++ __ bne(opr1_reg, R0, (int)0); ++ break; ++ ++ case 0x05: // less ++ if(&target) ++ __ beq(R0, R0, target); ++ else ++ __ beq(R0, R0, (int)0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_short" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x02: //not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_short" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: // equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x02: // not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++//FIXME ++instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{ ++ match( If cmp (CmpF src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_short" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label& L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_s(reg_op1, reg_op2); ++ if (&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x02: // not_equal ++ __ c_eq_s(reg_op1, reg_op2); ++ if (&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x03: // greater ++ __ c_ule_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x05: // less ++ __ c_ult_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_fpu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{ ++ match( If cmp (CmpD src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_short" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label& L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_d(reg_op1, reg_op2); ++ if (&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x02: // not_equal ++ // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. ++ __ c_eq_d(reg_op1, reg_op2); ++ if (&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x03: // greater ++ __ c_ule_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x05: // less ++ __ c_ult_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_fpu_branch); ++ ins_short_branch(1); ++%} ++ ++// =================== End of branch instructions ========================== ++ ++// Call Runtime Instruction ++instruct CallRuntimeDirect(method meth) %{ ++ match(CallRuntime ); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL,runtime #@CallRuntimeDirect" %} ++ ins_encode( Java_To_Runtime( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_alignment(16); ++%} ++ ++ ++ ++//------------------------MemBar Instructions------------------------------- ++//Memory barrier flavors ++ ++instruct membar_acquire() %{ ++ match(MemBarAcquire); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-acquire @ membar_acquire" %} ++ ins_encode %{ ++ __ sync(); ++ %} ++ ins_pipe(empty); ++%} ++ ++instruct load_fence() %{ ++ match(LoadFence); ++ ins_cost(400); ++ ++ format %{ "MEMBAR @ load_fence" %} ++ ins_encode %{ ++ __ sync(); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_acquire_lock() ++%{ ++ match(MemBarAcquireLock); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %} ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct membar_release() %{ ++ match(MemBarRelease); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-release @ membar_release" %} ++ ++ ins_encode %{ ++ // Attention: DO NOT DELETE THIS GUY! ++ __ sync(); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct store_fence() %{ ++ match(StoreFence); ++ ins_cost(400); ++ ++ format %{ "MEMBAR @ store_fence" %} ++ ++ ins_encode %{ ++ __ sync(); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_release_lock() ++%{ ++ match(MemBarReleaseLock); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %} ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++ ++instruct membar_volatile() %{ ++ match(MemBarVolatile); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-volatile" %} ++ ins_encode %{ ++ if( !os::is_MP() ) return; // Not needed on single CPU ++ __ sync(); ++ ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct unnecessary_membar_volatile() %{ ++ match(MemBarVolatile); ++ predicate(Matcher::post_store_load_barrier(n)); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-volatile (unnecessary so empty encoding) @ unnecessary_membar_volatile" %} ++ ins_encode( ); ++ ins_pipe(empty); ++%} ++ ++instruct membar_storestore() %{ ++ match(MemBarStoreStore); ++ match(StoreStoreFence); ++ ++ ins_cost(400); ++ format %{ "MEMBAR-storestore @ membar_storestore" %} ++ ins_encode %{ ++ __ sync(); ++ %} ++ ins_pipe(empty); ++%} ++ ++//----------Move Instructions-------------------------------------------------- ++instruct castX2P(mRegP dst, mRegL src) %{ ++ match(Set dst (CastX2P src)); ++ format %{ "castX2P $dst, $src @ castX2P" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ if(src != dst) ++ __ move(dst, src); ++ %} ++ ins_cost(10); ++ ins_pipe( ialu_regI_mov ); ++%} ++ ++instruct castP2X(mRegL dst, mRegP src ) %{ ++ match(Set dst (CastP2X src)); ++ ++ format %{ "mov $dst, $src\t #@castP2X" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ if(src != dst) ++ __ move(dst, src); ++ %} ++ ins_pipe( ialu_regI_mov ); ++%} ++ ++instruct MoveF2I_reg_reg(mRegI dst, regF src) %{ ++ match(Set dst (MoveF2I src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveF2I $dst, $src @ MoveF2I_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ __ mfc1(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveI2F_reg_reg(regF dst, mRegI src) %{ ++ match(Set dst (MoveI2F src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveI2F $dst, $src @ MoveI2F_reg_reg" %} ++ ins_encode %{ ++ Register src = as_Register($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ mtc1(src, dst); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveD2L_reg_reg(mRegL dst, regD src) %{ ++ match(Set dst (MoveD2L src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveD2L $dst, $src @ MoveD2L_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ __ dmfc1(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveL2D_reg_reg(regD dst, mRegL src) %{ ++ match(Set dst (MoveL2D src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveL2D $dst, $src @ MoveL2D_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ dmtc1(src, dst); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++//----------Conditional Move--------------------------------------------------- ++// Conditional move ++instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpI_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpI_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovN_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovN_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovN_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovN_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpI_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpI_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovD_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovF_cmpI_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovF_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpI_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovD_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpP_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovD_cmpP_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++//FIXME ++instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovF_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovF_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// Manifest a CmpL result in an integer register. Very painful. ++// This is the test to avoid. ++instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (CmpL3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpL3 $dst, $src1, $src2 @ cmpL3_reg_reg" %} ++ ins_encode %{ ++ Register opr1 = as_Register($src1$$reg); ++ Register opr2 = as_Register($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ slt(AT, opr1, opr2); ++ __ slt(dst, opr2, opr1); ++ __ subu(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ++// less_rsult = -1 ++// greater_result = 1 ++// equal_result = 0 ++// nan_result = -1 ++// ++instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{ ++ match(Set dst (CmpF3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpF3 $dst, $src1, $src2 @ cmpF3_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ ori(dst, R0, 1); ++ __ ori(AT, R0, 1); ++ __ c_olt_s(src2, src1); ++ __ movf(dst, R0); ++ __ c_ult_s(src1, src2); ++ __ movf(AT, R0); ++ __ subu(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{ ++ match(Set dst (CmpD3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpD3 $dst, $src1, $src2 @ cmpD3_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ ori(dst, R0, 1); ++ __ ori(AT, R0, 1); ++ __ c_olt_d(src2, src1); ++ __ movf(dst, R0); ++ __ c_ult_d(src1, src2); ++ __ movf(AT, R0); ++ __ subu(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct clear_array(mRegL cnt, mRegP base, Universe dummy) %{ ++ match(Set dummy (ClearArray cnt base)); ++ format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %} ++ ins_encode %{ ++ //Assume cnt is the number of bytes in an array to be cleared, ++ //and base points to the starting address of the array. ++ Register base = $base$$Register; ++ Register num = $cnt$$Register; ++ Label Loop, done; ++ ++ __ beq(num, R0, done); ++ __ delayed()->daddu(AT, base, R0); ++ ++ __ move(T9, num); /* T9 = words */ ++ ++ __ bind(Loop); ++ __ sd(R0, AT, 0); ++ __ daddiu(T9, T9, -1); ++ __ bne(T9, R0, Loop); ++ __ delayed()->daddiu(AT, AT, wordSize); ++ ++ __ bind(done); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareL" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::LL); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare char[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareU" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::UU); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareLU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareLU" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::LU); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareUL" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::UL); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// intrinsic optimization ++instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, mA7RegI temp, no_Ax_mRegI result) %{ ++ match(Set result (StrEquals (Binary str1 str2) cnt)); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL temp); ++ ++ format %{ "String Equal $str1, $str2, len:$cnt tmp:$temp -> $result @ string_equals" %} ++ ins_encode %{ ++ __ arrays_equals($str1$$Register, $str2$$Register, ++ $cnt$$Register, $temp$$Register, $result$$Register, ++ false/* byte */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++//----------Arithmetic Instructions------------------------------------------- ++//----------Addition Instructions--------------------------------------------- ++instruct addI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ format %{ "add $dst, $src1, $src2 #@addI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ addu32(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addI_Reg_imm(mRegI dst, mRegI src1, immI src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ format %{ "add $dst, $src1, $src2 #@addI_Reg_imm" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ int imm = $src2$$constant; ++ ++ if(Assembler::is_simm16(imm)) { ++ __ addiu32(dst, src1, imm); ++ } else { ++ __ move(AT, imm); ++ __ addu32(dst, src1, AT); ++ } ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_reg(mRegP dst, mRegP src1, mRegL src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ++ format %{ "dadd $dst, $src1, $src2 #@addP_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ daddu(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_reg_convI2L(mRegP dst, mRegP src1, mRegI src2) %{ ++ match(Set dst (AddP src1 (ConvI2L src2))); ++ ++ format %{ "dadd $dst, $src1, $src2 #@addP_reg_reg_convI2L" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ daddu(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_imm(mRegP dst, mRegP src1, immL16 src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ++ format %{ "daddi $dst, $src1, $src2 #@addP_reg_imm" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ long src2 = $src2$$constant; ++ Register dst = $dst$$Register; ++ ++ __ daddiu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++// Add Long Register with Register ++instruct addL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (AddL src1 src2)); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_Reg\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_Reg_imm(mRegL dst, mRegL src1, immL16 src2) ++%{ ++ match(Set dst (AddL src1 src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_imm " %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ int src2_imm = $src2$$constant; ++ ++ __ daddiu(dst_reg, src1_reg, src2_imm); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_RegI2L_imm(mRegL dst, mRegI src1, immL16 src2) ++%{ ++ match(Set dst (AddL (ConvI2L src1) src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_imm " %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ int src2_imm = $src2$$constant; ++ ++ __ daddiu(dst_reg, src1_reg, src2_imm); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{ ++ match(Set dst (AddL (ConvI2L src1) src2)); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_Reg\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AddL (ConvI2L src1) (ConvI2L src2))); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_RegI2L\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (AddL src1 (ConvI2L src2))); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_RegI2L\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++//----------Abs Instructions------------------------------------------- ++ ++// Integer Absolute Instructions ++instruct absI_rReg(mRegI dst, mRegI src) ++%{ ++ match(Set dst (AbsI src)); ++ effect(TEMP dst); ++ format %{ "AbsI $dst, $src" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ sra(AT, src, 31); ++ __ xorr(dst, src, AT); ++ __ subu32(dst, dst, AT); ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Long Absolute Instructions ++instruct absL_rReg(mRegL dst, mRegL src) ++%{ ++ match(Set dst (AbsL src)); ++ effect(TEMP dst); ++ format %{ "AbsL $dst, $src" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ dsra32(AT, src, 31); ++ __ xorr(dst, src, AT); ++ __ subu(dst, dst, AT); ++ %} ++ ++ ins_pipe(ialu_regL_regL); ++%} ++ ++//----------Subtraction Instructions------------------------------------------- ++// Integer Subtraction Instructions ++instruct subI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ins_cost(100); ++ ++ format %{ "sub $dst, $src1, $src2 #@subI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ subu32(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct subI_Reg_immI_M32767_32768(mRegI dst, mRegI src1, immI_M32767_32768 src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ins_cost(80); ++ ++ format %{ "sub $dst, $src1, $src2 #@subI_Reg_immI_M32767_32768" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ __ addiu32(dst, src1, -1 * $src2$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct negI_Reg(mRegI dst, immI_0 zero, mRegI src) %{ ++ match(Set dst (SubI zero src)); ++ ins_cost(80); ++ ++ format %{ "neg $dst, $src #@negI_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ __ subu32(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct negL_Reg(mRegL dst, immL_0 zero, mRegL src) %{ ++ match(Set dst (SubL zero src)); ++ ins_cost(80); ++ ++ format %{ "neg $dst, $src #@negL_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ __ subu(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct subL_Reg_immL_M32767_32768(mRegL dst, mRegL src1, immL_M32767_32768 src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(80); ++ ++ format %{ "sub $dst, $src1, $src2 #@subL_Reg_immL_M32767_32768" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ __ daddiu(dst, src1, -1 * $src2$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Subtract Long Register with Register. ++instruct subL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(100); ++ format %{ "SubL $dst, $src1, $src2 @ subL_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct subL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (SubL src1 (ConvI2L src2))); ++ ins_cost(100); ++ format %{ "SubL $dst, $src1, $src2 @ subL_Reg_RegI2L" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct subL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{ ++ match(Set dst (SubL (ConvI2L src1) src2)); ++ ins_cost(200); ++ format %{ "SubL $dst, $src1, $src2 @ subL_RegI2L_Reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct subL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (SubL (ConvI2L src1) (ConvI2L src2))); ++ ins_cost(200); ++ format %{ "SubL $dst, $src1, $src2 @ subL_RegI2L_RegI2L" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Integer MOD with Register ++instruct modI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (ModI src1 src2)); ++ ins_cost(300); ++ format %{ "modi $dst, $src1, $src2 @ modI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ //if (UseLEXT1) { ++ if (0) { ++ // Experiments show that gsmod is slower that div+mfhi. ++ // So I just disable it here. ++ __ gsmod(dst, src1, src2); ++ } else { ++ __ div(src1, src2); ++ __ mfhi(dst); ++ } ++ %} ++ ++ //ins_pipe( ialu_mod ); ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct modL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (ModL src1 src2)); ++ format %{ "modL $dst, $src1, $src2 @modL_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsdmod(dst, op1, op2); ++ } else { ++ __ ddiv(op1, op2); ++ __ mfhi(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (MulI src1 src2)); ++ ++ ins_cost(300); ++ format %{ "mul $dst, $src1, $src2 @ mulI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ ++ __ mul(dst, src1, src2); ++ %} ++ ins_pipe( ialu_mult ); ++%} ++ ++instruct maddI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2, mRegI src3) %{ ++ match(Set dst (AddI (MulI src1 src2) src3)); ++ ++ ins_cost(999); ++ format %{ "madd $dst, $src1 * $src2 + $src3 #@maddI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register src3 = $src3$$Register; ++ Register dst = $dst$$Register; ++ ++ __ mtlo(src3); ++ __ madd(src1, src2); ++ __ mflo(dst); ++ %} ++ ins_pipe( ialu_mult ); ++%} ++ ++instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (DivI src1 src2)); ++ ++ ins_cost(300); ++ format %{ "div $dst, $src1, $src2 @ divI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ ++ // In MIPS, div does not cause exception. ++ // We must trap an exception manually. ++ __ teq(R0, src2, 0x7); ++ ++ if (UseLEXT1) { ++ __ gsdiv(dst, src1, src2); ++ } else { ++ __ div(src1, src2); ++ ++ __ nop(); ++ __ nop(); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( ialu_mod ); ++%} ++ ++instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (DivF src1 src2)); ++ ++ ins_cost(300); ++ format %{ "divF $dst, $src1, $src2 @ divF_Reg_Reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ /* Here do we need to trap an exception manually ? */ ++ __ div_s(dst, src1, src2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (DivD src1 src2)); ++ ++ ins_cost(300); ++ format %{ "divD $dst, $src1, $src2 @ divD_Reg_Reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ /* Here do we need to trap an exception manually ? */ ++ __ div_d(dst, src1, src2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (MulL src1 src2)); ++ format %{ "mulL $dst, $src1, $src2 @mulL_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsdmult(dst, op1, op2); ++ } else { ++ __ dmult(op1, op2); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulL_reg_regI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (MulL src1 (ConvI2L src2))); ++ format %{ "mulL $dst, $src1, $src2 @mulL_reg_regI2L" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsdmult(dst, op1, op2); ++ } else { ++ __ dmult(op1, op2); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (DivL src1 src2)); ++ format %{ "divL $dst, $src1, $src2 @divL_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsddiv(dst, op1, op2); ++ } else { ++ __ ddiv(op1, op2); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (AddF src1 src2)); ++ format %{ "AddF $dst, $src1, $src2 @addF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ add_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (SubF src1 src2)); ++ format %{ "SubF $dst, $src1, $src2 @subF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sub_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (AddD src1 src2)); ++ format %{ "AddD $dst, $src1, $src2 @addD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ add_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (SubD src1 src2)); ++ format %{ "SubD $dst, $src1, $src2 @subD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sub_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct negF_reg(regF dst, regF src) %{ ++ match(Set dst (NegF src)); ++ format %{ "negF $dst, $src @negF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ neg_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct negD_reg(regD dst, regD src) %{ ++ match(Set dst (NegD src)); ++ format %{ "negD $dst, $src @negD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ neg_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (MulF src1 src2)); ++ format %{ "MULF $dst, $src1, $src2 @mulF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mul_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// Mul two double precision floating piont number ++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (MulD src1 src2)); ++ format %{ "MULD $dst, $src1, $src2 @mulD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mul_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct absF_reg(regF dst, regF src) %{ ++ match(Set dst (AbsF src)); ++ ins_cost(100); ++ format %{ "absF $dst, $src @absF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ abs_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++// intrinsics for math_native. ++// AbsD SqrtD CosD SinD TanD LogD Log10D ++ ++instruct absD_reg(regD dst, regD src) %{ ++ match(Set dst (AbsD src)); ++ ins_cost(100); ++ format %{ "absD $dst, $src @absD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ abs_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct sqrtD_reg(regD dst, regD src) %{ ++ match(Set dst (SqrtD src)); ++ ins_cost(100); ++ format %{ "SqrtD $dst, $src @sqrtD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sqrt_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct sqrtF_reg(regF dst, regF src) %{ ++ match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); ++ ins_cost(100); ++ format %{ "SqrtF $dst, $src @sqrtF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sqrt_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// src1 * src2 + src3 ++instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary src1 src2))); ++ ++ format %{ "madd_s $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ madd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 + src3 ++instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary src1 src2))); ++ ++ format %{ "madd_d $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ madd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 - src3 ++instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary src1 src2))); ++ ++ format %{ "msub_s $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ msub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 - src3 ++instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary src1 src2))); ++ ++ format %{ "msub_d $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ msub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 - src3 ++instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2))); ++ match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2)))); ++ ++ format %{ "nmadds $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ nmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 - src3 ++instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2))); ++ match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2)))); ++ ++ format %{ "nmaddd $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ nmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 + src3 ++instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary (NegF src1) src2))); ++ match(Set dst (FmaF src3 (Binary src1 (NegF src2)))); ++ ++ format %{ "nmsubs $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ nmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 + src3 ++instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary (NegD src1) src2))); ++ match(Set dst (FmaD src3 (Binary src1 (NegD src2)))); ++ ++ format %{ "nmsubd $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ nmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++//----------------------------------Logical Instructions---------------------- ++//__________________________________Integer Logical Instructions------------- ++ ++//And Instuctions ++// And Register with Immediate ++instruct andI_Reg_immI(mRegI dst, mRegI src1, immI src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_immI" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ move(AT, val); ++ __ andr(dst, src, AT); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andI_Reg_imm_0_65535(mRegI dst, mRegI src1, immI_0_65535 src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1, immI_nonneg_mask mask) %{ ++ match(Set dst (AndI src1 mask)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int size = Assembler::is_int_mask($mask$$constant); ++ ++ __ ext(dst, src, 0, size); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1, immL_nonneg_mask mask) %{ ++ match(Set dst (AndL src1 mask)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int size = Assembler::is_jlong_mask($mask$$constant); ++ ++ __ dext(dst, src, 0, size); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorI_Reg_imm_0_65535(mRegI dst, mRegI src1, immI_0_65535 src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ins_cost(60); ++ ++ format %{ "xori $dst, $src1, $src2 #@xorI_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ xori(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorI_Reg_immI_M1(mRegI dst, mRegI src1, immI_M1 M1) %{ ++ match(Set dst (XorI src1 M1)); ++ predicate(UseLEXT3); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorI_Reg_immI_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ gsorn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorL2I_Reg_immI_M1(mRegI dst, mRegL src1, immI_M1 M1) %{ ++ match(Set dst (XorI (ConvL2I src1) M1)); ++ predicate(UseLEXT3); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorL2I_Reg_immI_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ gsorn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorL_Reg_imm_0_65535(mRegL dst, mRegL src1, immL_0_65535 src2) %{ ++ match(Set dst (XorL src1 src2)); ++ ins_cost(60); ++ ++ format %{ "xori $dst, $src1, $src2 #@xorL_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ xori(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++/* ++instruct xorL_Reg_immL_M1(mRegL dst, mRegL src1, immL_M1 M1) %{ ++ match(Set dst (XorL src1 M1)); ++ predicate(UseLEXT3); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorL_Reg_immL_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ gsorn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++instruct lbu_and_lmask(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI mask (LoadB mem))); ++ ins_cost(60); ++ ++ format %{ "lhu $dst, $mem #@lbu_and_lmask" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct lbu_and_rmask(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI (LoadB mem) mask)); ++ ins_cost(60); ++ ++ format %{ "lhu $dst, $mem #@lbu_and_rmask" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct andI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ andr(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andnI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (AndI src1 (XorI src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src1, $src2 #@andnI_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct ornI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (OrI src1 (XorI src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src1, $src2 #@ornI_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andnI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (AndI (XorI src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src2, $src1 #@andnI_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct ornI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (OrI (XorI src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src2, $src1 #@ornI_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// And Long Register with Register ++instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (AndL src1 src2)); ++ format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ andr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct andL_Reg_Reg_convI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (AndL src1 (ConvI2L src2))); ++ format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg_convI2L\n\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ andr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct andL_Reg_imm_0_65535(mRegL dst, mRegL src1, immL_0_65535 src2) %{ ++ match(Set dst (AndL src1 src2)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andL_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ long val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL2I_Reg_imm_0_65535(mRegI dst, mRegL src1, immL_0_65535 src2) %{ ++ match(Set dst (ConvL2I (AndL src1 src2))); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andL2I_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ long val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++/* ++instruct andnL_Reg_nReg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (AndL src1 (XorL src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src1, $src2 #@andnL_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++/* ++instruct ornL_Reg_nReg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (OrL src1 (XorL src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src1, $src2 #@ornL_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++/* ++instruct andnL_nReg_Reg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (AndL (XorL src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src2, $src1 #@andnL_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++/* ++instruct ornL_nReg_Reg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (OrL (XorL src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src2, $src1 #@ornL_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++instruct andL_Reg_immL_M8(mRegL dst, immL_M8 M8) %{ ++ match(Set dst (AndL dst M8)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M8 #@andL_Reg_immL_M8" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 0, 3); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M5(mRegL dst, immL_M5 M5) %{ ++ match(Set dst (AndL dst M5)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M5 #@andL_Reg_immL_M5" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 2, 1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M7(mRegL dst, immL_M7 M7) %{ ++ match(Set dst (AndL dst M7)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M7 #@andL_Reg_immL_M7" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 1, 2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M4(mRegL dst, immL_M4 M4) %{ ++ match(Set dst (AndL dst M4)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M4 #@andL_Reg_immL_M4" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 0, 2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M121(mRegL dst, immL_M121 M121) %{ ++ match(Set dst (AndL dst M121)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M121 #@andL_Reg_immL_M121" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 3, 4); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Long Register with Register ++instruct orL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (OrL src1 src2)); ++ format %{ "OR $dst, $src1, $src2 @ orL_Reg_Reg\t" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ Register src1_reg = $src1$$Register; ++ Register src2_reg = $src2$$Register; ++ ++ __ orr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegL src2) %{ ++ match(Set dst (OrL (CastP2X src1) src2)); ++ format %{ "OR $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ Register src1_reg = $src1$$Register; ++ Register src2_reg = $src2$$Register; ++ ++ __ orr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Xor Long Register with Register ++instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (XorL src1 src2)); ++ format %{ "XOR $dst, $src1, $src2 @ xorL_Reg_Reg\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ xorr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Left by 8-bit immediate ++instruct salI_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ ++ match(Set dst (LShiftI src shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ sll(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct salL2I_Reg_imm(mRegI dst, mRegL src, immI8 shift) %{ ++ match(Set dst (LShiftI (ConvL2I src) shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salL2I_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ sll(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{ ++ match(Set dst (AndI (LShiftI src shift) mask)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_imm_and_M65536" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ sll(dst, src, 16); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen) ++%{ ++ match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen)); ++ ++ format %{ "andi $dst, $src, 7\t# @land7_2_s" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ andi(dst, src, 7); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. ++// This idiom is used by the compiler the i2s bytecode. ++instruct i2s(mRegI dst, mRegI src, immI_16 sixteen) ++%{ ++ match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); ++ ++ format %{ "i2s $dst, $src\t# @i2s" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ seh(dst, src); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. ++// This idiom is used by the compiler for the i2b bytecode. ++instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour) ++%{ ++ match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); ++ ++ format %{ "i2b $dst, $src\t# @i2b" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ seb(dst, src); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++ ++instruct salI_RegL2I_imm(mRegI dst, mRegL src, immI8 shift) %{ ++ match(Set dst (LShiftI (ConvL2I src) shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_RegL2I_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ sll(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Shift Left by 8-bit immediate ++instruct salI_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (LShiftI src shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shamt = $shift$$Register; ++ __ sllv(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++// Shift Left Long ++instruct salL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{ ++ match(Set dst (LShiftL src shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_Reg_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ if (__ is_simm(shamt, 5)) ++ __ dsll(dst_reg, src_reg, shamt); ++ else { ++ int sa = Assembler::low(shamt, 6); ++ if (sa < 32) { ++ __ dsll(dst_reg, src_reg, sa); ++ } else { ++ __ dsll32(dst_reg, src_reg, sa - 32); ++ } ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct salL_RegI2L_imm(mRegL dst, mRegI src, immI8 shift) %{ ++ match(Set dst (LShiftL (ConvI2L src) shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_RegI2L_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ if (__ is_simm(shamt, 5)) ++ __ dsll(dst_reg, src_reg, shamt); ++ else { ++ int sa = Assembler::low(shamt, 6); ++ if (sa < 32) { ++ __ dsll(dst_reg, src_reg, sa); ++ } else { ++ __ dsll32(dst_reg, src_reg, sa - 32); ++ } ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Left Long ++instruct salL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ ++ match(Set dst (LShiftL src shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ dsllv(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long ++instruct sarL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{ ++ match(Set dst (RShiftL src shift)); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL_Reg_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = ($shift$$constant & 0x3f); ++ if (__ is_simm(shamt, 5)) ++ __ dsra(dst_reg, src_reg, shamt); ++ else { ++ int sa = Assembler::low(shamt, 6); ++ if (sa < 32) { ++ __ dsra(dst_reg, src_reg, sa); ++ } else { ++ __ dsra32(dst_reg, src_reg, sa - 32); ++ } ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegL src, immI_32_63 shift) %{ ++ match(Set dst (ConvL2I (RShiftL src shift))); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsra32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long arithmetically ++instruct sarL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ ++ match(Set dst (RShiftL src shift)); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ dsrav(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long logically ++instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(100); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ dsrlv(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegL src, immI_0_31 shift, immI_MaxI max_int) %{ ++ match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int)); ++ ins_cost(80); ++ format %{ "dext $dst, $src, $shift, 31 @ slrL_Reg_immI_0_31_and_max_int" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dext(dst_reg, src_reg, shamt, 31); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{ ++ match(Set dst (URShiftL (CastP2X src) shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_convL2I(mRegI dst, mRegL src, immI_32_63 shift) %{ ++ match(Set dst (ConvL2I (URShiftL src shift))); ++ predicate(n->in(1)->in(2)->get_int() > 32); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_convL2I" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{ ++ match(Set dst (URShiftL (CastP2X src) shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Xor Instructions ++// Xor Register with Register ++instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ++ format %{ "XOR $dst, $src1, $src2 #@xorI_Reg_Reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ xorr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Instructions ++instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_32767 src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_imm" %} ++ ins_encode %{ ++ __ ori($dst$$Register, $src1$$Register, $src2$$constant); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++// Or Register with Register ++instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ orr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{ ++ match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift))); ++ predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()))); ++ ++ format %{ "rotr $dst, $src, 1 ...\n\t" ++ "srl $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int rshift = $rshift$$constant; ++ ++ __ rotr(dst, src, 1); ++ if (rshift - 1) { ++ __ srl(dst, dst, rshift - 1); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{ ++ match(Set dst (OrI src1 (CastP2X src2))); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_castP2X" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ orr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Logical Shift Right by 8-bit immediate ++instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ ++ match(Set dst (URShiftI src shift)); ++ //effect(KILL cr); ++ ++ format %{ "SRL $dst, $src, $shift #@shr_logical_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shift = $shift$$constant; ++ ++ __ srl(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{ ++ match(Set dst (AndI (URShiftI src shift) mask)); ++ ++ format %{ "ext $dst, $src, $shift, one-bits($mask) #@shr_logical_Reg_imm_nonneg_mask" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int pos = $shift$$constant; ++ int size = Assembler::is_int_mask($mask$$constant); ++ ++ __ ext(dst, src, pos, size); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 lshift, immI_0_31 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); ++ match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rolI_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotr(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolL_Reg_immI_0_31(mRegL dst, mRegL src, immI_32_63 lshift, immI_0_31 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rolL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolL_Reg_immI_32_63(mRegL dst, mRegL src, immI_0_31 lshift, immI_32_63 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rolL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr32(dst, src, sa - 32); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); ++ match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rorI_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotr(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 rshift, immI_32_63 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rorL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 rshift, immI_0_31 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rorL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr32(dst, src, sa - 32); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Logical Shift Right ++instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (URShiftI src shift)); ++ ++ format %{ "SRL $dst, $src, $shift #@shr_logical_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shift = $shift$$Register; ++ __ srlv(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ ++ match(Set dst (RShiftI src shift)); ++ // effect(KILL cr); ++ ++ format %{ "SRA $dst, $src, $shift #@shr_arith_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shift = $shift$$constant; ++ __ sra(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (RShiftI src shift)); ++ // effect(KILL cr); ++ ++ format %{ "SRA $dst, $src, $shift #@shr_arith_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shift = $shift$$Register; ++ __ srav(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++//----------Convert Int to Boolean--------------------------------------------- ++ ++instruct convI2B(mRegI dst, mRegI src) %{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(100); ++ format %{ "convI2B $dst, $src @ convI2B" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if (dst != src) { ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, src); ++ } else { ++ __ move(AT, src); ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct convI2L_reg( mRegL dst, mRegI src) %{ ++ match(Set dst (ConvI2L src)); ++ ++ ins_cost(100); ++ format %{ "SLL $dst, $src @ convI2L_reg\t" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if(dst != src) __ sll(dst, src, 0); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++ ++instruct convL2I_reg( mRegI dst, mRegL src ) %{ ++ match(Set dst (ConvL2I src)); ++ ++ format %{ "MOV $dst, $src @ convL2I_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ sll(dst, src, 0); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct convL2I2L_reg( mRegL dst, mRegL src ) %{ ++ match(Set dst (ConvI2L (ConvL2I src))); ++ ++ format %{ "sll $dst, $src, 0 @ convL2I2L_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ sll(dst, src, 0); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct convL2D_reg( regD dst, mRegL src ) %{ ++ match(Set dst (ConvL2D src)); ++ format %{ "convL2D $dst, $src @ convL2D_reg" %} ++ ins_encode %{ ++ Register src = as_Register($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ dmtc1(src, dst); ++ __ cvt_d_l(dst, dst); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convD2L_reg_fast( mRegL dst, regD src ) %{ ++ match(Set dst (ConvD2L src)); ++ ins_cost(150); ++ format %{ "convD2L $dst, $src @ convD2L_reg_fast" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ Label Done; ++ ++ __ trunc_l_d(F30, src); ++ // max_long: 0x7fffffffffffffff ++ // __ set64(AT, 0x7fffffffffffffff); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(dst, F30); ++ ++ __ bne(dst, AT, Done); ++ __ delayed()->mtc1(R0, F30); ++ ++ __ cvt_d_w(F30, F30); ++ __ c_ult_d(src, F30); ++ __ bc1f(Done); ++ __ delayed()->daddiu(T9, R0, -1); ++ ++ __ c_un_d(src, src); //NaN? ++ __ subu(dst, T9, AT); ++ __ movt(dst, R0); ++ ++ __ bind(Done); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convD2L_reg_slow( mRegL dst, regD src ) %{ ++ match(Set dst (ConvD2L src)); ++ ins_cost(250); ++ format %{ "convD2L $dst, $src @ convD2L_reg_slow" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ Label L; ++ ++ __ c_un_d(src, src); //NaN? ++ __ bc1t(L); ++ __ delayed(); ++ __ move(dst, R0); ++ ++ __ trunc_l_d(F30, src); ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->dmfc1(dst, F30); ++ ++ __ mov_d(F12, src); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1); ++ __ move(dst, V0); ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convF2I_reg_fast( mRegI dst, regF src ) %{ ++ match(Set dst (ConvF2I src)); ++ ins_cost(150); ++ format %{ "convf2i $dst, $src @ convF2I_reg_fast" %} ++ ins_encode %{ ++ Register dreg = $dst$$Register; ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ trunc_w_s(F30, fval); ++ __ move(AT, 0x7fffffff); ++ __ mfc1(dreg, F30); ++ __ c_un_s(fval, fval); //NaN? ++ __ movt(dreg, R0); ++ ++ __ bne(AT, dreg, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, fval); ++ __ andr(AT, AT, T9); ++ ++ __ movn(dreg, T9, AT); ++ ++ __ bind(L); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++ ++instruct convF2I_reg_slow( mRegI dst, regF src ) %{ ++ match(Set dst (ConvF2I src)); ++ ins_cost(250); ++ format %{ "convf2i $dst, $src @ convF2I_reg_slow" %} ++ ins_encode %{ ++ Register dreg = $dst$$Register; ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ c_un_s(fval, fval); //NaN? ++ __ bc1t(L); ++ __ delayed(); ++ __ move(dreg, R0); ++ ++ __ trunc_w_s(F30, fval); ++ ++ /* Call SharedRuntime:f2i() to do valid convention */ ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->mfc1(dreg, F30); ++ ++ __ mov_s(F12, fval); ++ ++ //This bug was found when running ezDS's control-panel. ++ // J 982 C2 javax.swing.text.BoxView.layoutMajorAxis(II[I[I)V (283 bytes) @ 0x000000555c46aa74 ++ // ++ // An interger array index has been assigned to V0, and then changed from 1 to Integer.MAX_VALUE. ++ // V0 is corrupted during call_VM_leaf(), and should be preserved. ++ // ++ __ push(fval); ++ if(dreg != V0) { ++ __ push(V0); ++ } ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1); ++ if(dreg != V0) { ++ __ move(dreg, V0); ++ __ pop(V0); ++ } ++ __ pop(fval); ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convF2L_reg_fast( mRegL dst, regF src ) %{ ++ match(Set dst (ConvF2L src)); ++ ins_cost(150); ++ format %{ "convf2l $dst, $src @ convF2L_reg_fast" %} ++ ins_encode %{ ++ Register dreg = $dst$$Register; ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ trunc_l_s(F30, fval); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(dreg, F30); ++ __ c_un_s(fval, fval); //NaN? ++ __ movt(dreg, R0); ++ ++ __ bne(AT, dreg, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, fval); ++ __ andr(AT, AT, T9); ++ ++ __ dsll32(T9, T9, 0); ++ __ movn(dreg, T9, AT); ++ ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convF2L_reg_slow( mRegL dst, regF src ) %{ ++ match(Set dst (ConvF2L src)); ++ ins_cost(250); ++ format %{ "convf2l $dst, $src @ convF2L_reg_slow" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ c_un_s(fval, fval); //NaN? ++ __ bc1t(L); ++ __ delayed(); ++ __ move(dst, R0); ++ ++ __ trunc_l_s(F30, fval); ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->dmfc1(dst, F30); ++ ++ __ mov_s(F12, fval); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1); ++ __ move(dst, V0); ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convL2F_reg( regF dst, mRegL src ) %{ ++ match(Set dst (ConvL2F src)); ++ format %{ "convl2f $dst, $src @ convL2F_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ Register src = as_Register($src$$reg); ++ Label L; ++ ++ __ dmtc1(src, dst); ++ __ cvt_s_l(dst, dst); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convI2F_reg( regF dst, mRegI src ) %{ ++ match(Set dst (ConvI2F src)); ++ format %{ "convi2f $dst, $src @ convI2F_reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mtc1(src, dst); ++ __ cvt_s_w(dst, dst); ++ %} ++ ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{ ++ match(Set dst (CmpLTMask p zero)); ++ ins_cost(100); ++ ++ format %{ "sra $dst, $p, 31 @ cmpLTMask_immI_0" %} ++ ins_encode %{ ++ Register src = $p$$Register; ++ Register dst = $dst$$Register; ++ ++ __ sra(dst, src, 31); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{ ++ match(Set dst (CmpLTMask p q)); ++ ins_cost(400); ++ ++ format %{ "cmpLTMask $dst, $p, $q @ cmpLTMask" %} ++ ins_encode %{ ++ Register p = $p$$Register; ++ Register q = $q$$Register; ++ Register dst = $dst$$Register; ++ ++ __ slt(dst, p, q); ++ __ subu(dst, R0, dst); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convP2B(mRegI dst, mRegP src) %{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(100); ++ format %{ "convP2B $dst, $src @ convP2B" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if (dst != src) { ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, src); ++ } else { ++ __ move(AT, src); ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++ ++instruct convI2D_reg_reg(regD dst, mRegI src) %{ ++ match(Set dst (ConvI2D src)); ++ format %{ "conI2D $dst, $src @convI2D_reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ FloatRegister dst = $dst$$FloatRegister; ++ __ mtc1(src, dst); ++ __ cvt_d_w(dst, dst); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct convF2D_reg_reg(regD dst, regF src) %{ ++ match(Set dst (ConvF2D src)); ++ format %{ "convF2D $dst, $src\t# @convF2D_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ ++ __ cvt_d_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct convD2F_reg_reg(regF dst, regD src) %{ ++ match(Set dst (ConvD2F src)); ++ format %{ "convD2F $dst, $src\t# @convD2F_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ ++ __ cvt_s_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++// Convert a double to an int. If the double is a NAN, stuff a zero in instead. ++instruct convD2I_reg_reg_fast( mRegI dst, regD src ) %{ ++ match(Set dst (ConvD2I src)); ++ ++ ins_cost(150); ++ format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_fast" %} ++ ++ ins_encode %{ ++ FloatRegister src = $src$$FloatRegister; ++ Register dst = $dst$$Register; ++ ++ Label Done; ++ ++ __ trunc_w_d(F30, src); ++ // max_int: 2147483647 ++ __ move(AT, 0x7fffffff); ++ __ mfc1(dst, F30); ++ ++ __ bne(dst, AT, Done); ++ __ delayed()->mtc1(R0, F30); ++ ++ __ cvt_d_w(F30, F30); ++ __ c_ult_d(src, F30); ++ __ bc1f(Done); ++ __ delayed()->addiu(T9, R0, -1); ++ ++ __ c_un_d(src, src); //NaN? ++ __ subu32(dst, T9, AT); ++ __ movt(dst, R0); ++ ++ __ bind(Done); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convD2I_reg_reg_slow( mRegI dst, regD src ) %{ ++ match(Set dst (ConvD2I src)); ++ ++ ins_cost(250); ++ format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_slow" %} ++ ++ ins_encode %{ ++ FloatRegister src = $src$$FloatRegister; ++ Register dst = $dst$$Register; ++ Label L; ++ ++ __ trunc_w_d(F30, src); ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->mfc1(dst, F30); ++ ++ __ mov_d(F12, src); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1); ++ __ move(dst, V0); ++ __ bind(L); ++ ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// Convert oop pointer into compressed form ++instruct encodeHeapOop(mRegN dst, mRegP src) %{ ++ predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); ++ match(Set dst (EncodeP src)); ++ format %{ "encode_heap_oop $dst,$src" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ encode_heap_oop(dst, src); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{ ++ predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull); ++ match(Set dst (EncodeP src)); ++ format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %} ++ ins_encode %{ ++ __ encode_heap_oop_not_null($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeHeapOop(mRegP dst, mRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && ++ n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ ++ __ decode_heap_oop(d, s); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || ++ n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ if (s != d) { ++ __ decode_heap_oop_not_null(d, s); ++ } else { ++ __ decode_heap_oop_not_null(d); ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct encodeKlass_not_null(mRegN dst, mRegP src) %{ ++ match(Set dst (EncodePKlass src)); ++ format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %} ++ ins_encode %{ ++ __ encode_klass_not_null($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeKlass_not_null(mRegP dst, mRegN src) %{ ++ match(Set dst (DecodeNKlass src)); ++ format %{ "decode_heap_klass_not_null $dst,$src" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ if (s != d) { ++ __ decode_klass_not_null(d, s); ++ } else { ++ __ decode_klass_not_null(d); ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++//FIXME ++instruct tlsLoadP(mRegP dst) %{ ++ match(Set dst (ThreadLocal)); ++ ++ ins_cost(0); ++ format %{ " get_thread in $dst #@tlsLoadP" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++#ifdef OPT_THREAD ++ __ move(dst, TREG); ++#else ++ __ get_thread(dst); ++#endif ++ %} ++ ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct checkCastPP( mRegP dst ) %{ ++ match(Set dst (CheckCastPP dst)); ++ ++ format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %} ++ ins_encode( /*empty encoding*/ ); ++ ins_pipe( empty ); ++%} ++ ++instruct castPP(mRegP dst) ++%{ ++ match(Set dst (CastPP dst)); ++ ++ size(0); ++ format %{ "# castPP of $dst" %} ++ ins_encode(/* empty encoding */); ++ ins_pipe(empty); ++%} ++ ++instruct castII( mRegI dst ) %{ ++ match(Set dst (CastII dst)); ++ format %{ "#castII of $dst empty encoding" %} ++ ins_encode( /*empty encoding*/ ); ++ ins_cost(0); ++ ins_pipe( empty ); ++%} ++ ++instruct castLL(mRegL dst) ++%{ ++ match(Set dst (CastLL dst)); ++ ++ size(0); ++ format %{ "# castLL of $dst" %} ++ ins_encode(/* empty encoding */); ++ ins_cost(0); ++ ins_pipe(empty); ++%} ++ ++instruct castFF(regF dst) %{ ++ match(Set dst (CastFF dst)); ++ size(0); ++ format %{ "# castFF of $dst" %} ++ ins_encode(/*empty*/); ++ ins_pipe(empty); ++%} ++ ++instruct castDD(regD dst) %{ ++ match(Set dst (CastDD dst)); ++ size(0); ++ format %{ "# castDD of $dst" %} ++ ins_encode(/*empty*/); ++ ins_pipe(empty); ++%} ++ ++instruct castVVD(vecD dst) %{ ++ match(Set dst (CastVV dst)); ++ size(0); ++ format %{ "# castVV of $dst" %} ++ ins_encode(/*empty*/); ++ ins_pipe(empty); ++%} ++ ++// Return Instruction ++// Remove the return address & jump to it. ++instruct Ret() %{ ++ match(Return); ++ format %{ "RET #@Ret" %} ++ ++ ins_encode %{ ++ __ jr(RA); ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++%} ++ ++/* ++// For Loongson CPUs, jr seems too slow, so this rule shouldn't be imported. ++instruct jumpXtnd(mRegL switch_val) %{ ++ match(Jump switch_val); ++ ++ ins_cost(350); ++ ++ format %{ "load T9 <-- [$constanttablebase, $switch_val, $constantoffset] @ jumpXtnd\n\t" ++ "jr T9\n\t" ++ "nop" %} ++ ins_encode %{ ++ Register table_base = $constanttablebase; ++ int con_offset = $constantoffset; ++ Register switch_reg = $switch_val$$Register; ++ ++ if (UseLEXT1) { ++ if (Assembler::is_simm(con_offset, 8)) { ++ __ gsldx(T9, table_base, switch_reg, con_offset); ++ } else if (Assembler::is_simm16(con_offset)) { ++ __ daddu(T9, table_base, switch_reg); ++ __ ld(T9, T9, con_offset); ++ } else { ++ __ move(T9, con_offset); ++ __ daddu(AT, table_base, switch_reg); ++ __ gsldx(T9, AT, T9, 0); ++ } ++ } else { ++ if (Assembler::is_simm16(con_offset)) { ++ __ daddu(T9, table_base, switch_reg); ++ __ ld(T9, T9, con_offset); ++ } else { ++ __ move(T9, con_offset); ++ __ daddu(AT, table_base, switch_reg); ++ __ daddu(AT, T9, AT); ++ __ ld(T9, AT, 0); ++ } ++ } ++ ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ %} ++ ins_pipe(pipe_jump); ++%} ++*/ ++ ++ ++// Tail Jump; remove the return address; jump to target. ++// TailCall above leaves the return address around. ++// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2). ++// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a ++// "restore" before this instruction (in Epilogue), we need to materialize it ++// in %i0. ++//FIXME ++instruct tailjmpInd(mRegP jump_target,mRegP ex_oop) %{ ++ match( TailJump jump_target ex_oop ); ++ ins_cost(200); ++ format %{ "Jmp $jump_target ; ex_oop = $ex_oop #@tailjmpInd" %} ++ ins_encode %{ ++ Register target = $jump_target$$Register; ++ ++ // V0, V1 are indicated in: ++ // [stubGenerator_mips.cpp] generate_forward_exception() ++ // [runtime_mips.cpp] OptoRuntime::generate_exception_blob() ++ // ++ Register oop = $ex_oop$$Register; ++ Register exception_oop = V0; ++ Register exception_pc = V1; ++ ++ __ move(exception_pc, RA); ++ __ move(exception_oop, oop); ++ ++ __ jr(target); ++ __ delayed()->nop(); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++// ============================================================================ ++// Procedure Call/Return Instructions ++// Call Java Static Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallStaticJavaDirect(method meth) %{ ++ match(CallStaticJava); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL,static #@CallStaticJavaDirect " %} ++ ins_encode( Java_Static_Call( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++// Call Java Dynamic Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallDynamicJavaDirect(method meth) %{ ++ match(CallDynamicJava); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t" ++ "CallDynamic @ CallDynamicJavaDirect" %} ++ ins_encode( Java_Dynamic_Call( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++instruct CallLeafNoFPDirect(method meth) %{ ++ match(CallLeafNoFP); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL_LEAF_NOFP,runtime " %} ++ ins_encode(Java_To_Runtime(meth)); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++// Prefetch instructions for allocation. ++ ++instruct prefetchAllocNTA( memory mem ) %{ ++ match(PrefetchAllocation mem); ++ ins_cost(125); ++ format %{ "pref $mem\t# Prefetch allocation @ prefetchAllocNTA" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++ ++// Call runtime without safepoint ++instruct CallLeafDirect(method meth) %{ ++ match(CallLeaf); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL_LEAF,runtime #@CallLeafDirect " %} ++ ins_encode(Java_To_Runtime(meth)); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++// Load Char (16bit unsigned) ++instruct loadUS(mRegI dst, memory mem) %{ ++ match(Set dst (LoadUS mem)); ++ ++ ins_cost(125); ++ format %{ "loadUS $dst,$mem @ loadC" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadUS_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadUS mem))); ++ ++ ins_cost(125); ++ format %{ "loadUS $dst,$mem @ loadUS_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Store Char (16bit unsigned) ++instruct storeC(memory mem, mRegI src) %{ ++ match(Set mem (StoreC mem src)); ++ ++ ins_cost(125); ++ format %{ "storeC $src, $mem @ storeC" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_CHAR); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct storeC_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreC mem zero)); ++ ++ ins_cost(125); ++ format %{ "storeC $zero, $mem @ storeC_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct loadConF_immF_0(regF dst, immF_0 zero) %{ ++ match(Set dst zero); ++ ins_cost(100); ++ ++ format %{ "mov $dst, zero @ loadConF_immF_0\n"%} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mtc1(R0, dst); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++ ++instruct loadConF(regF dst, immF src) %{ ++ match(Set dst src); ++ ins_cost(125); ++ ++ format %{ "lwc1 $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %} ++ ins_encode %{ ++ int con_offset = $constantoffset($src); ++ ++ if (Assembler::is_simm16(con_offset)) { ++ __ lwc1($dst$$FloatRegister, $constanttablebase, con_offset); ++ } else { ++ __ set64(AT, con_offset); ++ if (UseLEXT1) { ++ __ gslwxc1($dst$$FloatRegister, $constanttablebase, AT, 0); ++ } else { ++ __ daddu(AT, $constanttablebase, AT); ++ __ lwc1($dst$$FloatRegister, AT, 0); ++ } ++ } ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++ ++instruct loadConD_immD_0(regD dst, immD_0 zero) %{ ++ match(Set dst zero); ++ ins_cost(100); ++ ++ format %{ "mov $dst, zero @ loadConD_immD_0"%} ++ ins_encode %{ ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ dmtc1(R0, dst); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++instruct loadConD(regD dst, immD src) %{ ++ match(Set dst src); ++ ins_cost(125); ++ ++ format %{ "ldc1 $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %} ++ ins_encode %{ ++ int con_offset = $constantoffset($src); ++ ++ if (Assembler::is_simm16(con_offset)) { ++ __ ldc1($dst$$FloatRegister, $constanttablebase, con_offset); ++ } else { ++ __ set64(AT, con_offset); ++ if (UseLEXT1) { ++ __ gsldxc1($dst$$FloatRegister, $constanttablebase, AT, 0); ++ } else { ++ __ daddu(AT, $constanttablebase, AT); ++ __ ldc1($dst$$FloatRegister, AT, 0); ++ } ++ } ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++// Store register Float value (it is faster than store from FPU register) ++instruct storeF_reg( memory mem, regF src) %{ ++ match(Set mem (StoreF mem src)); ++ ++ ins_cost(50); ++ format %{ "store $mem, $src\t# store float @ storeF_reg" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_FLOAT); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct storeF_immF_0( memory mem, immF_0 zero) %{ ++ match(Set mem (StoreF mem zero)); ++ ++ ins_cost(40); ++ format %{ "store $mem, zero\t# store float @ storeF_immF_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Double ++instruct loadD(regD dst, memory mem) %{ ++ match(Set dst (LoadD mem)); ++ ++ ins_cost(150); ++ format %{ "loadD $dst, $mem #@loadD" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Double - UNaligned ++instruct loadD_unaligned(regD dst, memory mem ) %{ ++ match(Set dst (LoadD_unaligned mem)); ++ ins_cost(250); ++ // FIXME: Need more effective ldl/ldr ++ format %{ "loadD_unaligned $dst, $mem #@loadD_unaligned" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct storeD_reg( memory mem, regD src) %{ ++ match(Set mem (StoreD mem src)); ++ ++ ins_cost(50); ++ format %{ "store $mem, $src\t# store float @ storeD_reg" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_DOUBLE); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct storeD_immD_0( memory mem, immD_0 zero) %{ ++ match(Set mem (StoreD mem zero)); ++ ++ ins_cost(40); ++ format %{ "store $mem, zero\t# store float @ storeD_immD_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct loadSSI(mRegI dst, stackSlotI src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "lw $dst, $src\t# int stk @ loadSSI" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSI) !"); ++ __ lw($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSI(stackSlotI dst, mRegI src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sw $dst, $src\t# int stk @ storeSSI" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSI) !"); ++ __ sw($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSL(mRegL dst, stackSlotL src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld $dst, $src\t# long stk @ loadSSL" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSL) !"); ++ __ ld($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSL(stackSlotL dst, mRegL src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sd $dst, $src\t# long stk @ storeSSL" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSL) !"); ++ __ sd($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSP(mRegP dst, stackSlotP src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld $dst, $src\t# ptr stk @ loadSSP" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSP) !"); ++ __ ld($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSP(stackSlotP dst, mRegP src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sd $dst, $src\t# ptr stk @ storeSSP" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSP) !"); ++ __ sd($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSF(regF dst, stackSlotF src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "lwc1 $dst, $src\t# float stk @ loadSSF" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSF) !"); ++ __ lwc1($dst$$FloatRegister, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSF(stackSlotF dst, regF src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "swc1 $dst, $src\t# float stk @ storeSSF" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSF) !"); ++ __ swc1($src$$FloatRegister, SP, $dst$$disp); ++ %} ++ ins_pipe(fpu_storeF); ++%} ++ ++// Use the same format since predicate() can not be used here. ++instruct loadSSD(regD dst, stackSlotD src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ldc1 $dst, $src\t# double stk @ loadSSD" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSD) !"); ++ __ ldc1($dst$$FloatRegister, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSD(stackSlotD dst, regD src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sdc1 $dst, $src\t# double stk @ storeSSD" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSD) !"); ++ __ sdc1($src$$FloatRegister, SP, $dst$$disp); ++ %} ++ ins_pipe(fpu_storeF); ++%} ++ ++instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ ++ match(Set cr (FastLock object box)); ++ effect(TEMP tmp, TEMP scr); ++ ins_cost(300); ++ format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %} ++ ins_encode %{ ++ __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++%} ++ ++instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ ++ match(Set cr (FastUnlock object box)); ++ effect(TEMP tmp, TEMP scr); ++ ins_cost(300); ++ format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %} ++ ins_encode %{ ++ __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++%} ++ ++// Store CMS card-mark Immediate 0 ++instruct storeImmCM(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreCM mem zero)); ++ ++ ins_cost(150); ++ format %{ "MEMBAR\n\t" ++ "sb $mem, zero\t! CMS card-mark imm0" %} ++ ins_encode %{ ++ __ sync(); ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Die now ++instruct ShouldNotReachHere( ) ++%{ ++ match(Halt); ++ ins_cost(300); ++ ++ // Use the following format syntax ++ format %{ "ILLTRAP ;#@ShouldNotReachHere" %} ++ ins_encode %{ ++ // Here we should use stop, but stop emits too many insts ++ __ brk(18); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++instruct leaP8Narrow(mRegP dst, indOffset8Narrow mem) ++%{ ++ predicate(CompressedOops::shift() == 0); ++ match(Set dst mem); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, $mem\t# ptr off8narrow @ leaP8Narrow" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = as_Register($mem$$base); ++ int disp = $mem$$disp; ++ ++ __ daddiu(dst, base, disp); ++ %} ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++instruct leaPPosIdxScaleOff8(mRegP dst, basePosIndexScaleOffset8 mem) ++%{ ++ match(Set dst mem); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, $mem\t# @ PosIdxScaleOff8" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = as_Register($mem$$base); ++ Register index = as_Register($mem$$index); ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if (scale == 0) { ++ __ daddu(AT, base, index); ++ __ daddiu(dst, AT, disp); ++ } else { ++ __ dsll(AT, index, scale); ++ __ daddu(AT, base, AT); ++ __ daddiu(dst, AT, disp); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++instruct leaPIdxScale(mRegP dst, indIndexScale mem) ++%{ ++ match(Set dst mem); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, $mem\t# @ leaPIdxScale" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = as_Register($mem$$base); ++ Register index = as_Register($mem$$index); ++ int scale = $mem$$scale; ++ ++ if (scale == 0) { ++ __ daddu(dst, base, index); ++ } else { ++ __ dsll(AT, index, scale); ++ __ daddu(dst, base, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++ ++// ============================================================================ ++// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass ++// array for an instance of the superklass. Set a hidden internal cache on a ++// hit (cache is checked with exposed code in gen_subtype_check()). Return ++// NZ for a miss or zero for a hit. The encoding ALSO sets flags. ++instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{ ++ match(Set result (PartialSubtypeCheck sub super)); ++ effect(KILL tmp); ++ ins_cost(1100); // slightly larger than the next version ++ format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %} ++ ++ ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) ); ++ ins_pipe( pipe_slow ); ++%} ++ ++// Conditional-store of the updated heap-top. ++// Used during allocation of the shared heap. ++ ++instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{ ++ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); ++ ++ format %{ "move AT, $newval\n\t" ++ "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t" ++ "move $cr, AT\n" %} ++ ins_encode%{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp); ++ ++ int index = $heap_top_ptr$$index; ++ int scale = $heap_top_ptr$$scale; ++ int disp = $heap_top_ptr$$disp; ++ ++ guarantee(Assembler::is_simm16(disp), ""); ++ ++ if (index != -1) { ++ __ stop("in storePConditional: index != -1"); ++ } else { ++ __ move(AT, newval); ++ __ scd(AT, addr); ++ __ move($cr$$Register, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++// Conditional-store of an int value. ++// AT flag is set on success, reset otherwise. ++instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{ ++ match(Set cr (StoreIConditional mem (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, $mem, $oldval \t# @storeIConditional" %} ++ ++ ins_encode %{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Register cr = $cr$$Register; ++ Address addr(as_Register($mem$$base), $mem$$disp); ++ ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ guarantee(Assembler::is_simm16(disp), ""); ++ ++ if (index != -1) { ++ __ stop("in storeIConditional: index != -1"); ++ } else { ++ if (cr != addr.base() && cr != oldval && cr != newval) { ++ __ cmpxchg32(addr, oldval, newval, cr, true, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, true, false, true); ++ __ move(cr, AT); ++ } ++ } ++%} ++ ++ ins_pipe(long_memory_op); ++%} ++ ++// Conditional-store of a long value. ++// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG. ++instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr) ++%{ ++ match(Set cr (StoreLConditional mem (Binary oldval newval))); ++ ++ format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %} ++ ins_encode%{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Register cr = $cr$$Register; ++ Address addr(as_Register($mem$$base), $mem$$disp); ++ ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ guarantee(Assembler::is_simm16(disp), ""); ++ ++ if (index != -1) { ++ __ stop("in storeIConditional: index != -1"); ++ } else { ++ if (cr != addr.base() && cr != oldval && cr != newval) { ++ __ cmpxchg(addr, oldval, newval, cr, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(cr, AT); ++ } ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++// Implement LoadPLocked. Must be ordered against changes of the memory location ++// by storePConditional. ++instruct loadPLocked(mRegP dst, memory mem) %{ ++ match(Set dst (LoadPLocked mem)); ++ ins_cost(MEMORY_REF_COST); ++ ++ format %{ "lld $dst, $mem #@loadPLocked\n\t" %} ++ size(12); ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_LINKED_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{ ++ match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, true, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, true, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{ ++ predicate(VM_Version::supports_cx8()); ++ match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{ ++ match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{ ++ match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, false, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, false, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndExchangeI(mRegI res, indirect mem, mRegI oldval, mRegI newval) %{ ++ ++ match(Set res (CompareAndExchangeI mem (Binary oldval newval))); ++ ins_cost(2 * MEMORY_REF_COST); ++ effect(TEMP_DEF res); ++ format %{ ++ "cmpxchg32 $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @compareAndExchangeI" ++ %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr(as_Register($mem$$base)); ++ __ cmpxchg32(addr, oldval, newval, res, true /* sign */, false /* retold */, true /* barrier */, false /* weak */, true /* exchange */); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeL(mRegL res, indirect mem, mRegL oldval, mRegL newval) %{ ++ ++ match(Set res (CompareAndExchangeL mem (Binary oldval newval))); ++ ins_cost(2 * MEMORY_REF_COST); ++ effect(TEMP_DEF res); ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @compareAndExchangeL" ++ %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr(as_Register($mem$$base)); ++ __ cmpxchg(addr, oldval, newval, res, false /* retold */, true /* barrier */, false /* weak */, true /* exchange */); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeP(mRegP res, indirect mem, mRegP oldval, mRegP newval) %{ ++ predicate(n->as_LoadStore()->barrier_data() == 0); ++ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ++ ins_cost(2 * MEMORY_REF_COST); ++ effect(TEMP_DEF res); ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @compareAndExchangeP" ++ %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr(as_Register($mem$$base)); ++ __ cmpxchg(addr, oldval, newval, res, false /* retold */, true /* barrier */, false /* weak */, true /* exchange */); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeN(mRegN res, indirect mem, mRegN oldval, mRegN newval) %{ ++ ++ match(Set res (CompareAndExchangeN mem (Binary oldval newval))); ++ ins_cost(2 * MEMORY_REF_COST); ++ effect(TEMP_DEF res); ++ format %{ ++ "cmpxchg32 $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @compareAndExchangeN" ++ %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr(as_Register($mem$$base)); ++ __ cmpxchg32(addr, oldval, newval, res, false /* sign */, false /* retold */, true /* barrier */, false /* weak */, true /* exchange */); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapI(mRegI res, indirect mem, mRegI oldval, mRegI newval) %{ ++ ++ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ ++ "cmpxchg32 $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @weakCompareAndSwapI" ++ %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr(as_Register($mem$$base)); ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, true /* sign */, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, true /* sign */, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapL(mRegI res, indirect mem, mRegL oldval, mRegL newval) %{ ++ ++ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @WeakCompareAndSwapL" ++ %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr(as_Register($mem$$base)); ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapP(mRegI res, indirect mem, mRegP oldval, mRegP newval) %{ ++ predicate(n->as_LoadStore()->barrier_data() == 0); ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @weakCompareAndSwapP" ++ %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr(as_Register($mem$$base)); ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapN(mRegI res, indirect mem, mRegN oldval, mRegN newval) %{ ++ ++ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); ++ ins_cost(2 * MEMORY_REF_COST); ++ format %{ ++ "cmpxchg32 $res = $mem, $oldval, $newval\t# if $mem == $oldval then $mem <-- $newval @weakCompareAndSwapN" ++ %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr(as_Register($mem$$base)); ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, false /* sign */, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, false /* sign */, false /* retold */, true /* barrier */, true /* weak */, false /* exchange */); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++//----------Max and Min-------------------------------------------------------- ++// Min Instructions ++//// ++// *** Min and Max using the conditional move are slower than the ++// *** branch version on a Pentium III. ++// // Conditional move for min ++//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ ++// effect( USE_DEF op2, USE op1, USE cr ); ++// format %{ "CMOVlt $op2,$op1\t! min" %} ++// opcode(0x4C,0x0F); ++// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); ++// ins_pipe( pipe_cmov_reg ); ++//%} ++// ++//// Min Register with Register (P6 version) ++//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{ ++// predicate(VM_Version::supports_cmov() ); ++// match(Set op2 (MinI op1 op2)); ++// ins_cost(200); ++// expand %{ ++// eFlagsReg cr; ++// compI_eReg(cr,op1,op2); ++// cmovI_reg_lt(op2,op1,cr); ++// %} ++//%} ++ ++// Min Register with Register (generic version) ++instruct minI_Reg_Reg(mRegI dst, mRegI src) %{ ++ match(Set dst (MinI dst src)); ++ //effect(KILL flags); ++ ins_cost(80); ++ ++ format %{ "MIN $dst, $src @minI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ slt(AT, src, dst); ++ __ movn(dst, src, AT); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// Max Register with Register ++// *** Min and Max using the conditional move are slower than the ++// *** branch version on a Pentium III. ++// // Conditional move for max ++//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ ++// effect( USE_DEF op2, USE op1, USE cr ); ++// format %{ "CMOVgt $op2,$op1\t! max" %} ++// opcode(0x4F,0x0F); ++// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); ++// ins_pipe( pipe_cmov_reg ); ++//%} ++// ++// // Max Register with Register (P6 version) ++//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{ ++// predicate(VM_Version::supports_cmov() ); ++// match(Set op2 (MaxI op1 op2)); ++// ins_cost(200); ++// expand %{ ++// eFlagsReg cr; ++// compI_eReg(cr,op1,op2); ++// cmovI_reg_gt(op2,op1,cr); ++// %} ++//%} ++ ++// Max Register with Register (generic version) ++instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{ ++ match(Set dst (MaxI dst src)); ++ ins_cost(80); ++ ++ format %{ "MAX $dst, $src @maxI_Reg_Reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ slt(AT, dst, src); ++ __ movn(dst, src, AT); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{ ++ match(Set dst (MaxI dst zero)); ++ ins_cost(50); ++ ++ format %{ "MAX $dst, 0 @maxI_Reg_zero" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ slt(AT, dst, R0); ++ __ movn(dst, R0, AT); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL src mask)); ++ ++ format %{ "movl $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ dext(dst, src, 0, 32); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32) ++%{ ++ match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32))); ++ ++ format %{ "combine_i2l $dst, $src2(H), $src1(L) @ combine_i2l" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ if (src1 == dst) { ++ __ dinsu(dst, src2, 32, 32); ++ } else if (src2 == dst) { ++ __ dsll32(dst, dst, 0); ++ __ dins(dst, src1, 0, 32); ++ } else { ++ __ dext(dst, src1, 0, 32); ++ __ dinsu(dst, src2, 32, 32); ++ } ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Zero-extend convert int to long ++instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL (ConvI2L src) mask)); ++ ++ format %{ "movl $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ dext(dst, src, 0, 32); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL (ConvI2L (ConvL2I src)) mask)); ++ ++ format %{ "movl $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ dext(dst, src, 0, 32); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Match loading integer and casting it to unsigned int in long register. ++// LoadI + ConvI2L + AndL 0xffffffff. ++instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{ ++ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); ++ ++ format %{ "lwu $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{ ++ match(Set dst (AndL mask (ConvI2L (LoadI mem)))); ++ ++ format %{ "lwu $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++ ++// ============================================================================ ++// Safepoint Instruction ++ ++instruct safePoint_poll_tls(mRegP poll) %{ ++ match(SafePoint poll); ++ effect(USE poll); ++ ++ ins_cost(125); ++ format %{ "lw AT, [$poll]\t" ++ "Safepoint @ [$poll] : poll for GC" %} ++ size(4); ++ ins_encode %{ ++ Register poll_reg = $poll$$Register; ++ ++ __ block_comment("Safepoint:"); ++ __ relocate(relocInfo::poll_type); ++ address pre_pc = __ pc(); ++ __ lw(AT, poll_reg, 0); ++ assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit lw AT, [$poll]"); ++ %} ++ ++ ins_pipe( ialu_storeI ); ++%} ++ ++//----------Arithmetic Conversion Instructions--------------------------------- ++ ++instruct roundFloat_nop(regF dst) ++%{ ++ match(Set dst (RoundFloat dst)); ++ ++ ins_cost(0); ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct roundDouble_nop(regD dst) ++%{ ++ match(Set dst (RoundDouble dst)); ++ ++ ins_cost(0); ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++//---------- Zeros Count Instructions ------------------------------------------ ++// CountLeadingZerosINode CountTrailingZerosINode ++instruct countLeadingZerosI(mRegI dst, mRegI src) %{ ++ predicate(UseCountLeadingZerosInstructionMIPS64); ++ match(Set dst (CountLeadingZerosI src)); ++ ++ format %{ "clz $dst, $src\t# count leading zeros (int)" %} ++ ins_encode %{ ++ __ clz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countLeadingZerosL(mRegI dst, mRegL src) %{ ++ predicate(UseCountLeadingZerosInstructionMIPS64); ++ match(Set dst (CountLeadingZerosL src)); ++ ++ format %{ "dclz $dst, $src\t# count leading zeros (long)" %} ++ ins_encode %{ ++ __ dclz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countTrailingZerosI(mRegI dst, mRegI src) %{ ++ predicate(UseCountTrailingZerosInstructionMIPS64); ++ match(Set dst (CountTrailingZerosI src)); ++ ++ format %{ "ctz $dst, $src\t# count trailing zeros (int)" %} ++ ins_encode %{ ++ // ctz and dctz is gs instructions. ++ __ ctz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countTrailingZerosL(mRegI dst, mRegL src) %{ ++ predicate(UseCountTrailingZerosInstructionMIPS64); ++ match(Set dst (CountTrailingZerosL src)); ++ ++ format %{ "dcto $dst, $src\t# count trailing zeros (long)" %} ++ ins_encode %{ ++ __ dctz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// ====================VECTOR INSTRUCTIONS===================================== ++ ++// Load vectors (8 bytes long) ++instruct loadV8(vecD dst, memory mem) %{ ++ predicate(n->as_LoadVector()->memory_size() == 8); ++ match(Set dst (LoadVector mem)); ++ ins_cost(125); ++ format %{ "load $dst, $mem\t! load vector (8 bytes)" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++// Store vectors (8 bytes long) ++instruct storeV8(memory mem, vecD src) %{ ++ predicate(n->as_StoreVector()->memory_size() == 8); ++ match(Set mem (StoreVector mem src)); ++ ins_cost(145); ++ format %{ "store $mem, $src\t! store vector (8 bytes)" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, C2_MacroAssembler::STORE_DOUBLE); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct Repl8B_DSP(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 8 && UseLEXT3); ++ match(Set dst (ReplicateB src)); ++ ins_cost(100); ++ format %{ "replv_ob AT, $src\n\t" ++ "dmtc1 AT, $dst\t! replicate8B" %} ++ ins_encode %{ ++ __ replv_ob(AT, $src$$Register); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB src)); ++ ins_cost(140); ++ format %{ "move AT, $src\n\t" ++ "dins AT, AT, 8, 8\n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate8B" %} ++ ins_encode %{ ++ __ move(AT, $src$$Register); ++ __ dins(AT, AT, 8, 8); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_imm_DSP(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 8 && UseLEXT3 && VM_Version::supports_dsp()); ++ match(Set dst (ReplicateB con)); ++ ins_cost(110); ++ format %{ "repl_ob AT, [$con]\n\t" ++ "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %} ++ ins_encode %{ ++ int val = $con$$constant; ++ __ repl_ob(AT, val); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_imm(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB con)); ++ ins_cost(150); ++ format %{ "move AT, [$con]\n\t" ++ "dins AT, AT, 8, 8\n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %} ++ ins_encode %{ ++ __ move(AT, $con$$constant); ++ __ dins(AT, AT, 8, 8); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_zero(vecD dst, immI_0 zero) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB zero)); ++ ins_cost(90); ++ format %{ "dmtc1 R0, $dst\t! replicate8B zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_M1(vecD dst, immI_M1 M1) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB M1)); ++ ins_cost(80); ++ format %{ "dmtc1 -1, $dst\t! replicate8B -1" %} ++ ins_encode %{ ++ __ nor(AT, R0, R0); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_DSP(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 4 && UseLEXT3 && VM_Version::supports_dsp()); ++ match(Set dst (ReplicateS src)); ++ ins_cost(100); ++ format %{ "replv_qh AT, $src\n\t" ++ "dmtc1 AT, $dst\t! replicate4S" %} ++ ins_encode %{ ++ __ replv_qh(AT, $src$$Register); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS src)); ++ ins_cost(120); ++ format %{ "move AT, $src \n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate4S" %} ++ ins_encode %{ ++ __ move(AT, $src$$Register); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_imm_DSP(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 4 && UseLEXT3 && VM_Version::supports_dsp()); ++ match(Set dst (ReplicateS con)); ++ ins_cost(100); ++ format %{ "repl_qh AT, [$con]\n\t" ++ "dmtc1 AT, $dst\t! replicate4S($con)" %} ++ ins_encode %{ ++ int val = $con$$constant; ++ if ( Assembler::is_simm(val, 10)) { ++ //repl_qh supports 10 bits immediate ++ __ repl_qh(AT, val); ++ } else { ++ __ li32(AT, val); ++ __ replv_qh(AT, AT); ++ } ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_imm(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS con)); ++ ins_cost(110); ++ format %{ "move AT, [$con]\n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate4S($con)" %} ++ ins_encode %{ ++ __ move(AT, $con$$constant); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_zero(vecD dst, immI_0 zero) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS zero)); ++ format %{ "dmtc1 R0, $dst\t! replicate4S zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_M1(vecD dst, immI_M1 M1) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS M1)); ++ format %{ "dmtc1 -1, $dst\t! replicate4S -1" %} ++ ins_encode %{ ++ __ nor(AT, R0, R0); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar to be vector ++instruct Repl2I(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI src)); ++ format %{ "dins AT, $src, 0, 32\n\t" ++ "dinsu AT, $src, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate2I" %} ++ ins_encode %{ ++ __ dins(AT, $src$$Register, 0, 32); ++ __ dinsu(AT, $src$$Register, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar immediate to be vector by loading from const table. ++instruct Repl2I_imm(vecD dst, immI con, mA7RegI tmp) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI con)); ++ effect(KILL tmp); ++ format %{ "li32 AT, [$con], 32\n\t" ++ "dinsu AT, AT\n\t" ++ "dmtc1 AT, $dst\t! replicate2I($con)" %} ++ ins_encode %{ ++ int val = $con$$constant; ++ __ li32(AT, val); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar zero to be vector ++instruct Repl2I_zero(vecD dst, immI_0 zero) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI zero)); ++ format %{ "dmtc1 R0, $dst\t! replicate2I zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar -1 to be vector ++instruct Repl2I_M1(vecD dst, immI_M1 M1) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI M1)); ++ format %{ "dmtc1 -1, $dst\t! replicate2I -1, use AT" %} ++ ins_encode %{ ++ __ nor(AT, R0, R0); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate float (4 byte) scalar to be vector ++instruct Repl2F(vecD dst, regF src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateF src)); ++ format %{ "cvt.ps $dst, $src, $src\t! replicate2F" %} ++ ins_encode %{ ++ __ cvt_ps_s($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// Replicate float (4 byte) scalar zero to be vector ++instruct Repl2F_zero(vecD dst, immF_0 zero) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateF zero)); ++ format %{ "dmtc1 R0, $dst\t! replicate2F zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++ ++// ====================VECTOR ARITHMETIC======================================= ++ ++// --------------------------------- ADD -------------------------------------- ++ ++// Floats vector add ++// kernel does not have emulation of PS instructions yet, so PS instructions is disabled. ++instruct vadd2F(vecD dst, vecD src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVF dst src)); ++ format %{ "add.ps $dst,$src\t! add packed2F" %} ++ ins_encode %{ ++ __ add_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct vadd2F3(vecD dst, vecD src1, vecD src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVF src1 src2)); ++ format %{ "add.ps $dst,$src1,$src2\t! add packed2F" %} ++ ins_encode %{ ++ __ add_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// --------------------------------- SUB -------------------------------------- ++ ++// Floats vector sub ++instruct vsub2F(vecD dst, vecD src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (SubVF dst src)); ++ format %{ "sub.ps $dst,$src\t! sub packed2F" %} ++ ins_encode %{ ++ __ sub_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// --------------------------------- MUL -------------------------------------- ++ ++// Floats vector mul ++instruct vmul2F(vecD dst, vecD src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (MulVF dst src)); ++ format %{ "mul.ps $dst, $src\t! mul packed2F" %} ++ ins_encode %{ ++ __ mul_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct vmul2F3(vecD dst, vecD src1, vecD src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (MulVF src1 src2)); ++ format %{ "mul.ps $dst, $src1, $src2\t! mul packed2F" %} ++ ins_encode %{ ++ __ mul_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// --------------------------------- DIV -------------------------------------- ++// MIPS do not have div.ps ++ ++// --------------------------------- MADD -------------------------------------- ++// Floats vector madd ++//instruct vmadd2F(vecD dst, vecD src1, vecD src2, vecD src3) %{ ++// predicate(n->as_Vector()->length() == 2); ++// match(Set dst (AddVF (MulVF src1 src2) src3)); ++// ins_cost(50); ++// format %{ "madd.ps $dst, $src3, $src1, $src2\t! madd packed2F" %} ++// ins_encode %{ ++// __ madd_ps($dst$$FloatRegister, $src3$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++// %} ++// ins_pipe( fpu_regF_regF ); ++//%} ++ ++ ++//----------PEEPHOLE RULES----------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++// ++// peepmatch ( root_instr_name [preceeding_instruction]* ); ++// ++// peepconstraint %{ ++// (instruction_number.operand_name relational_op instruction_number.operand_name ++// [, ...] ); ++// // instruction numbers are zero-based using left to right order in peepmatch ++// ++// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); ++// // provide an instruction_number.operand_name for each operand that appears ++// // in the replacement instruction's match rule ++// ++// ---------VM FLAGS--------------------------------------------------------- ++// ++// All peephole optimizations can be turned off using -XX:-OptoPeephole ++// ++// Each peephole rule is given an identifying number starting with zero and ++// increasing by one in the order seen by the parser. An individual peephole ++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# ++// on the command-line. ++// ++// ---------CURRENT LIMITATIONS---------------------------------------------- ++// ++// Only match adjacent instructions in same basic block ++// Only equality constraints ++// Only constraints between operands, not (0.dest_reg == EAX_enc) ++// Only one replacement instruction ++// ++// ---------EXAMPLE---------------------------------------------------------- ++// ++// // pertinent parts of existing instructions in architecture description ++// instruct movI(eRegI dst, eRegI src) %{ ++// match(Set dst (CopyI src)); ++// %} ++// ++// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{ ++// match(Set dst (AddI dst src)); ++// effect(KILL cr); ++// %} ++// ++// // Change (inc mov) to lea ++// peephole %{ ++// // increment preceeded by register-register move ++// peepmatch ( incI_eReg movI ); ++// // require that the destination register of the increment ++// // match the destination register of the move ++// peepconstraint ( 0.dst == 1.dst ); ++// // construct a replacement instruction that sets ++// // the destination to ( move's source register + one ) ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// Implementation no longer uses movX instructions since ++// machine-independent system no longer uses CopyX nodes. ++// ++// peephole %{ ++// peepmatch ( incI_eReg movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( decI_eReg movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( addI_eReg_imm movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( addP_eReg_imm movP ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++ ++// // Change load of spilled value to only a spill ++// instruct storeI(memory mem, eRegI src) %{ ++// match(Set mem (StoreI mem src)); ++// %} ++// ++// instruct loadI(eRegI dst, memory mem) %{ ++// match(Set dst (LoadI mem)); ++// %} ++// ++//peephole %{ ++// peepmatch ( loadI storeI ); ++// peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); ++// peepreplace ( storeI( 1.mem 1.mem 1.src ) ); ++//%} ++ ++//----------SMARTSPILL RULES--------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++ +diff --git a/src/hotspot/cpu/mips/nativeInst_mips.cpp b/src/hotspot/cpu/mips/nativeInst_mips.cpp +new file mode 100644 +index 00000000000..05c525f35ef +--- /dev/null ++++ b/src/hotspot/cpu/mips/nativeInst_mips.cpp +@@ -0,0 +1,1825 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "compiler/disassembler.hpp" ++#include "code/codeCache.hpp" ++#include "code/compiledIC.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/ostream.hpp" ++ ++#include ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++void NativeInstruction::wrote(int offset) { ++ ICache::invalidate_word(addr_at(offset)); ++} ++ ++void NativeInstruction::set_long_at(int offset, long i) { ++ address addr = addr_at(offset); ++ *(long*)addr = i; ++ ICache::invalidate_range(addr, 8); ++} ++ ++static int illegal_instruction_bits = 0; ++ ++int NativeInstruction::illegal_instruction() { ++ if (illegal_instruction_bits == 0) { ++ ResourceMark rm; ++ char buf[40]; ++ CodeBuffer cbuf((address)&buf[0], 20); ++ MacroAssembler* a = new MacroAssembler(&cbuf); ++ address ia = a->pc(); ++ a->brk(11); ++ int bits = *(int*)ia; ++ illegal_instruction_bits = bits; ++ } ++ return illegal_instruction_bits; ++} ++ ++bool NativeInstruction::is_int_branch() { ++ switch(Assembler::opcode(insn_word())) { ++ case Assembler::beq_op: ++ case Assembler::beql_op: ++ case Assembler::bgtz_op: ++ case Assembler::bgtzl_op: ++ case Assembler::blez_op: ++ case Assembler::blezl_op: ++ case Assembler::bne_op: ++ case Assembler::bnel_op: ++ return true; ++ case Assembler::regimm_op: ++ switch(Assembler::rt(insn_word())) { ++ case Assembler::bgez_op: ++ case Assembler::bgezal_op: ++ case Assembler::bgezall_op: ++ case Assembler::bgezl_op: ++ case Assembler::bltz_op: ++ case Assembler::bltzal_op: ++ case Assembler::bltzall_op: ++ case Assembler::bltzl_op: ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++bool NativeInstruction::is_float_branch() { ++ if (!is_op(Assembler::cop1_op) || ++ !is_rs((Register)Assembler::bc1f_op)) return false; ++ ++ switch(Assembler::rt(insn_word())) { ++ case Assembler::bcf_op: ++ case Assembler::bcfl_op: ++ case Assembler::bct_op: ++ case Assembler::bctl_op: ++ return true; ++ } ++ ++ return false; ++} ++ ++ ++void NativeCall::verify() { ++ // make sure code pattern is actually a call instruction ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // jal target ++ // nop ++ if ( is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_op(int_at(16), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return; ++ } ++ ++ // jal targe ++ // nop ++ if ( is_op(int_at(0), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ return; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) && ++ is_special_op(int_at(24), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ // FIXME: why add jr_op here? ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ if (nativeInstruction_at(addr_at(0))->is_trampoline_call()) ++ return; ++ ++ fatal("not a call"); ++} ++ ++address NativeCall::target_addr_for_insn() const { ++ // jal target ++ // nop ++ if ( is_op(int_at(0), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(4))->is_nop()) { ++ int instr_index = int_at(0) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // jal target ++ // nop ++ if ( nativeInstruction_at(addr_at(0))->is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_op(int_at(16), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(20))->is_nop()) { ++ int instr_index = int_at(16) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff), ++ (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff)); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ld dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ld_op) ) { ++ ++ address dest = (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ return dest + Assembler::simm16((intptr_t)int_at(12) & 0xffff); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(0), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop <-- optional ++ //nop <-- optional ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop <-- optional ++ //nop <-- optional ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop <-- optional ++ //nop <-- optional ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ tty->print_cr("not a call: addr = " INTPTR_FORMAT , p2i(addr_at(0))); ++ tty->print_cr("======= Start decoding at addr = " INTPTR_FORMAT " =======", p2i(addr_at(0))); ++ Disassembler::decode(addr_at(0) - 2 * 4, addr_at(0) + 8 * 4, tty); ++ tty->print_cr("======= End of decoding ======="); ++ fatal("not a call"); ++ return NULL; // unreachable ++} ++ ++// Extract call destination from a NativeCall. The call might use a trampoline stub. ++address NativeCall::destination() const { ++ address addr = (address)this; ++ address destination = target_addr_for_insn(); ++ // Do we use a trampoline stub for this call? ++ // Trampoline stubs are located behind the main code. ++ if (destination > addr) { ++ // Filter out recursive method invocation (call to verified/unverified entry point). ++ CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. ++ assert(cb && cb->is_nmethod(), "sanity"); ++ nmethod *nm = (nmethod *)cb; ++ NativeInstruction* ni = nativeInstruction_at(addr); ++ if (nm->stub_contains(destination) && ni->is_trampoline_call()) { ++ // Yes we do, so get the destination from the trampoline stub. ++ const address trampoline_stub_addr = destination; ++ destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); ++ } ++ } ++ return destination; ++} ++ ++// Similar to replace_mt_safe, but just changes the destination. The ++// important thing is that free-running threads are able to execute this ++// call instruction at all times. ++// ++// Used in the runtime linkage of calls; see class CompiledIC. ++// ++// Add parameter assert_lock to switch off assertion ++// during code generation, where no patching lock is needed. ++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { ++ assert(!assert_lock || ++ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) || ++ CompiledICLocker::is_safe(addr_at(0)), ++ "concurrent code patching"); ++ ++ ResourceMark rm; ++ address addr_call = addr_at(0); ++ assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); ++ // Patch the constant in the call's trampoline stub. ++ if (MacroAssembler::reachable_from_cache()) { ++ set_destination(dest); ++ } else { ++ address trampoline_stub_addr = nativeCall_at(addr_call)->target_addr_for_insn(); ++ assert (get_trampoline() != NULL && trampoline_stub_addr == get_trampoline(), "we need a trampoline"); ++ nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); ++ } ++} ++ ++address NativeCall::get_trampoline() { ++ address call_addr = addr_at(0); ++ ++ CodeBlob *code = CodeCache::find_blob(call_addr); ++ assert(code != NULL, "Could not find the containing code blob"); ++ ++ if (code->is_nmethod()) { ++ return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); ++ } ++ return NULL; ++} ++ ++// manual implementation of GSSQ ++// ++// 00000001200009c0 : ++// 1200009c0: 0085202d daddu a0, a0, a1 ++// 1200009c4: e8860027 gssq a2, a3, 0(a0) ++// 1200009c8: 03e00008 jr ra ++// 1200009cc: 00000000 nop ++// ++typedef void (* atomic_store128_ptr)(long *addr, int offset, long low64, long hi64); ++ ++static int *buf; ++ ++static atomic_store128_ptr get_atomic_store128_func() { ++ assert(UseLEXT1, "UseLEXT1 must be true"); ++ static atomic_store128_ptr p = NULL; ++ if (p != NULL) ++ return p; ++ ++ buf = (int *)mmap(NULL, 1024, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, ++ -1, 0); ++ buf[0] = 0x0085202d; ++ buf[1] = (0x3a << 26) | (4 << 21) | (6 << 16) | 0x27; /* gssq $a2, $a3, 0($a0) */ ++ buf[2] = 0x03e00008; ++ buf[3] = 0; ++ ++ asm("sync"); ++ p = (atomic_store128_ptr)buf; ++ return p; ++} ++ ++void NativeCall::patch_on_jal_only(address dst) { ++ long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint jal_inst = (Assembler::jal_op << 26) | dest; ++ set_int_at(0, jal_inst); ++ ICache::invalidate_range(addr_at(0), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeCall::patch_on_jal_gs(address dst) { ++ long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint jal_inst = (Assembler::jal_op << 26) | dest; ++ set_int_at(16, jal_inst); ++ ICache::invalidate_range(addr_at(16), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeCall::patch_on_jal(address dst) { ++ patch_on_jal_gs(dst); ++} ++ ++void NativeCall::patch_on_trampoline(address dest) { ++ assert(nativeInstruction_at(addr_at(0))->is_trampoline_call(), "unexpected code at call site"); ++ jlong dst = (jlong) dest; ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ld dst, dst, imm16 ++ if ((dst> 0) && Assembler::is_simm16(dst >> 32)) { ++ dst += (dst & 0x8000) << 1; ++ set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low(dst >> 32) & 0xffff)); ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(dst >> 16) & 0xffff)); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low(dst) & 0xffff)); ++ ++ ICache::invalidate_range(addr_at(0), 24); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeCall::patch_on_jalr_gs(address dst) { ++ patch_set48_gs(dst); ++} ++ ++void NativeCall::patch_on_jalr(address dst) { ++ patch_set48(dst); ++} ++ ++void NativeCall::patch_set48_gs(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ int count = 0; ++ int insts[4] = {0, 0, 0, 0}; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ insts[count] = 0; ++ count++; ++ } ++ ++ guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned"); ++ atomic_store128_ptr func = get_atomic_store128_func(); ++ (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]); ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeCall::patch_set32_gs(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ int insts[2] = {0, 0}; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ //daddiu(d, R0, value); ++ //set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ //set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ //set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 2) { ++ //nop(); ++ //set_int_at(count << 2, 0); ++ insts[count] = 0; ++ count++; ++ } ++ ++ long inst = insts[1]; ++ inst = inst << 32; ++ inst = inst + insts[0]; ++ ++ set_long_at(0, inst); ++} ++ ++void NativeCall::patch_set48(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ //daddiu(d, R0, value); ++ set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ //ori(d, R0, julong(value) >> 16); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); ++ count += 1; ++ //dsll(d, d, 16); ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ //lui(d, value >> 32); ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); ++ count += 1; ++ //ori(d, d, split_low(value >> 16)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ //dsll(d, d, 16); ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ //nop(); ++ set_int_at(count << 2, 0); ++ count++; ++ } ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeCall::patch_set32(address dest) { ++ patch_set32_gs(dest); ++} ++ ++void NativeCall::set_destination(address dest) { ++ OrderAccess::fence(); ++ ++ // li64 ++ if (is_special_op(int_at(16), Assembler::dsll_op)) { ++ int first_word = int_at(0); ++ set_int_at(0, 0x1000ffff); /* .1: b .1 */ ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 32) & 0xffff)); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 16) & 0xffff)); ++ set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)dest) & 0xffff)); ++ set_int_at(0, (first_word & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 48) & 0xffff)); ++ ICache::invalidate_range(addr_at(0), 24); ++ } else if (is_op(int_at(16), Assembler::jal_op)) { ++ if (UseLEXT1) { ++ patch_on_jal_gs(dest); ++ } else { ++ patch_on_jal(dest); ++ } ++ } else if (is_op(int_at(0), Assembler::jal_op)) { ++ patch_on_jal_only(dest); ++ } else if (is_special_op(int_at(16), Assembler::jalr_op)) { ++ if (UseLEXT1) { ++ patch_on_jalr_gs(dest); ++ } else { ++ patch_on_jalr(dest); ++ } ++ } else if (is_special_op(int_at(8), Assembler::jalr_op)) { ++ guarantee(!os::is_MP() || (((long)addr_at(0) % 8) == 0), "destination must be aligned by 8"); ++ if (UseLEXT1) { ++ patch_set32_gs(dest); ++ } else { ++ patch_set32(dest); ++ } ++ ICache::invalidate_range(addr_at(0), 8); ++ } else { ++ fatal("not a call"); ++ } ++} ++ ++void NativeCall::print() { ++ tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT, ++ p2i(instruction_address()), p2i(destination())); ++} ++ ++// Inserts a native call instruction at a given pc ++void NativeCall::insert(address code_pos, address entry) { ++ NativeCall *call = nativeCall_at(code_pos); ++ CodeBuffer cb(call->addr_at(0), instruction_size); ++ MacroAssembler masm(&cb); ++#define __ masm. ++ __ li48(T9, (long)entry); ++ __ jalr (); ++ __ delayed()->nop(); ++#undef __ ++ ++ ICache::invalidate_range(call->addr_at(0), instruction_size); ++} ++ ++// MT-safe patching of a call instruction. ++// First patches first word of instruction to two jmp's that jmps to them ++// selfs (spinlock). Then patches the last byte, and then atomicly replaces ++// the jmp's with the first 4 byte of the new instruction. ++void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) { ++ Unimplemented(); ++} ++ ++//------------------------------------------------------------------- ++ ++void NativeMovConstReg::verify() { ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ return; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ return; ++ } ++ ++ fatal("not a mov reg, imm64/imm48"); ++} ++ ++void NativeMovConstReg::print() { ++ tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, ++ p2i(instruction_address()), data()); ++} ++ ++intptr_t NativeMovConstReg::data() const { ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ ++ return Assembler::merge( (intptr_t)(int_at(20) & 0xffff), ++ (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff)); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ ++ return Assembler::merge( (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return Assembler::merge( (intptr_t)(int_at(8) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return Assembler::merge( (intptr_t)(0), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ fatal("not a mov reg, imm64/imm48"); ++ return 0; // unreachable ++} ++ ++void NativeMovConstReg::patch_set48(intptr_t x) { ++ jlong value = (jlong) x; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ //daddiu(d, R0, value); ++ set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ set_int_at(count << 2, 0); ++ count++; ++ } ++} ++ ++void NativeMovConstReg::set_data(intptr_t x, intptr_t o) { ++ // li64 or li48 ++ if ((!nativeInstruction_at(addr_at(12))->is_nop()) && is_special_op(int_at(16), Assembler::dsll_op) && is_op(long_at(20), Assembler::ori_op)) { ++ set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 48) & 0xffff)); ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 32) & 0xffff)); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 16) & 0xffff)); ++ set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)x) & 0xffff)); ++ } else { ++ patch_set48(x); ++ } ++ ++ ICache::invalidate_range(addr_at(0), 24); ++ ++ // Find and replace the oop/metadata corresponding to this ++ // instruction in oops section. ++ CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address()); ++ nmethod* nm = blob->as_nmethod_or_null(); ++ if (nm != NULL) { ++ o = o ? o : x; ++ RelocIterator iter(nm, instruction_address(), next_instruction_address()); ++ while (iter.next()) { ++ if (iter.type() == relocInfo::oop_type) { ++ oop* oop_addr = iter.oop_reloc()->oop_addr(); ++ *oop_addr = cast_to_oop(o); ++ break; ++ } else if (iter.type() == relocInfo::metadata_type) { ++ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); ++ *metadata_addr = (Metadata*)o; ++ break; ++ } ++ } ++ } ++} ++ ++//------------------------------------------------------------------- ++ ++int NativeMovRegMem::offset() const{ ++ if (is_immediate()) ++ return (short)(int_at(instruction_offset)&0xffff); ++ else ++ return Assembler::merge(int_at(hiword_offset)&0xffff, long_at(instruction_offset)&0xffff); ++} ++ ++void NativeMovRegMem::set_offset(int x) { ++ if (is_immediate()) { ++ assert(Assembler::is_simm16(x), "just check"); ++ set_int_at(0, (int_at(0)&0xffff0000) | (x&0xffff) ); ++ if (is_64ldst()) { ++ assert(Assembler::is_simm16(x+4), "just check"); ++ set_int_at(4, (int_at(4)&0xffff0000) | ((x+4)&0xffff) ); ++ } ++ } else { ++ set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_high(x) & 0xffff)); ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(x) & 0xffff)); ++ } ++ ICache::invalidate_range(addr_at(0), 8); ++} ++ ++void NativeMovRegMem::verify() { ++ int offset = 0; ++ ++ if ( Assembler::opcode(int_at(0)) == Assembler::lui_op ) { ++ ++ if ( Assembler::opcode(int_at(4)) != Assembler::ori_op ) { ++ fatal ("not a mov [reg+offs], reg instruction"); ++ } ++ ++ offset += 12; ++ } ++ ++ switch(Assembler::opcode(int_at(offset))) { ++ case Assembler::lb_op: ++ case Assembler::lbu_op: ++ case Assembler::lh_op: ++ case Assembler::lhu_op: ++ case Assembler::lw_op: ++ case Assembler::lwu_op: ++ case Assembler::ld_op: ++ case Assembler::lwc1_op: ++ case Assembler::ldc1_op: ++ case Assembler::sb_op: ++ case Assembler::sh_op: ++ case Assembler::sw_op: ++ case Assembler::sd_op: ++ case Assembler::swc1_op: ++ case Assembler::sdc1_op: ++ break; ++ default: ++ fatal ("not a mov [reg+offs], reg instruction"); ++ } ++} ++ ++ ++void NativeMovRegMem::print() { ++ tty->print_cr(PTR_FORMAT ": mov reg, [reg + %x]", p2i(instruction_address()), offset()); ++} ++ ++bool NativeInstruction::is_sigill_zombie_not_entrant() { ++ return uint_at(0) == NativeIllegalInstruction::instruction_code; ++} ++ ++void NativeIllegalInstruction::insert(address code_pos) { ++ *(juint*)code_pos = instruction_code; ++ ICache::invalidate_range(code_pos, instruction_size); ++} ++ ++void NativeJump::verify() { ++ assert(((NativeInstruction *)this)->is_jump() || ++ ((NativeInstruction *)this)->is_cond_jump(), "not a general jump instruction"); ++} ++ ++void NativeJump::patch_set48_gs(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ int insts[4] = {0, 0, 0, 0}; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ insts[count] = 0; ++ count++; ++ } ++ ++ guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned"); ++ atomic_store128_ptr func = get_atomic_store128_func(); ++ (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]); ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeJump::patch_set48(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ set_int_at(count << 2, 0); ++ count++; ++ } ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeJump::patch_on_j_only(address dst) { ++ long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint j_inst = (Assembler::j_op << 26) | dest; ++ set_int_at(0, j_inst); ++ ICache::invalidate_range(addr_at(0), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++ ++void NativeJump::patch_on_j_gs(address dst) { ++ long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint j_inst = (Assembler::j_op << 26) | dest; ++ set_int_at(16, j_inst); ++ ICache::invalidate_range(addr_at(16), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeJump::patch_on_j(address dst) { ++ patch_on_j_gs(dst); ++} ++ ++void NativeJump::patch_on_jr_gs(address dst) { ++ patch_set48_gs(dst); ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeJump::patch_on_jr(address dst) { ++ patch_set48(dst); ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++ ++void NativeJump::set_jump_destination(address dest) { ++ OrderAccess::fence(); ++ ++ if (is_short()) { ++ assert(Assembler::is_simm16(dest-addr_at(4)), "change this code"); ++ set_int_at(0, (int_at(0) & 0xffff0000) | (dest - addr_at(4)) & 0xffff ); ++ ICache::invalidate_range(addr_at(0), 4); ++ } else if (is_b_far()) { ++ int offset = dest - addr_at(12); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (offset >> 16)); ++ set_int_at(16, (int_at(16) & 0xffff0000) | (offset & 0xffff)); ++ } else { ++ if (is_op(int_at(16), Assembler::j_op)) { ++ if (UseLEXT1) { ++ patch_on_j_gs(dest); ++ } else { ++ patch_on_j(dest); ++ } ++ } else if (is_op(int_at(0), Assembler::j_op)) { ++ patch_on_j_only(dest); ++ } else if (is_special_op(int_at(16), Assembler::jr_op)) { ++ if (UseLEXT1) { ++ //guarantee(!os::is_MP() || (((long)addr_at(0) % 16) == 0), "destination must be aligned for GSSD"); ++ //patch_on_jr_gs(dest); ++ patch_on_jr(dest); ++ } else { ++ patch_on_jr(dest); ++ } ++ } else { ++ fatal("not a jump"); ++ } ++ } ++} ++ ++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { ++ CodeBuffer cb(code_pos, instruction_size); ++ MacroAssembler masm(&cb); ++#define __ masm. ++ if (Assembler::is_simm16((entry - code_pos - 4) / 4)) { ++ __ b(entry); ++ __ delayed()->nop(); ++ } else { ++ // Attention: We have to use a relative jump here since PC reloc-operation isn't allowed here. ++ int offset = entry - code_pos; ++ ++ Label L; ++ __ bgezal(R0, L); ++ __ delayed()->lui(T9, (offset - 8) >> 16); ++ __ bind(L); ++ __ ori(T9, T9, (offset - 8) & 0xffff); ++ __ daddu(T9, T9, RA); ++ __ jr(T9); ++ __ delayed()->nop(); ++ } ++ ++#undef __ ++ ++ ICache::invalidate_range(code_pos, instruction_size); ++} ++ ++bool NativeJump::is_b_far() { ++// ++// 0x000000556809f198: daddu at, ra, zero ++// 0x000000556809f19c: [4110001]bgezal zero, 0x000000556809f1a4 ++// ++// 0x000000556809f1a0: nop ++// 0x000000556809f1a4: lui t9, 0xfffffffd ++// 0x000000556809f1a8: ori t9, t9, 0x14dc ++// 0x000000556809f1ac: daddu t9, t9, ra ++// 0x000000556809f1b0: daddu ra, at, zero ++// 0x000000556809f1b4: jr t9 ++// 0x000000556809f1b8: nop ++// ;; ImplicitNullCheckStub slow case ++// 0x000000556809f1bc: lui t9, 0x55 ++// ++ return is_op(int_at(12), Assembler::lui_op); ++} ++ ++address NativeJump::jump_destination() { ++ if ( is_short() ) { ++ return addr_at(4) + Assembler::imm_off(int_at(instruction_offset)) * 4; ++ } ++ // Assembler::merge() is not correct in MIPS_64! ++ // ++ // Example: ++ // hi16 = 0xfffd, ++ // lo16 = f7a4, ++ // ++ // offset=0xfffdf7a4 (Right) ++ // Assembler::merge = 0xfffcf7a4 (Wrong) ++ // ++ if ( is_b_far() ) { ++ int hi16 = int_at(12)&0xffff; ++ int low16 = int_at(16)&0xffff; ++ address target = addr_at(12) + (hi16 << 16) + low16; ++ return target; ++ } ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // j target ++ // nop ++ if ( nativeInstruction_at(addr_at(0))->is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_op(int_at(16), Assembler::j_op) && ++ nativeInstruction_at(addr_at(20))->is_nop()) { ++ int instr_index = int_at(16) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // j target ++ // nop ++ if ( is_op(int_at(0), Assembler::j_op) && ++ nativeInstruction_at(addr_at(4))->is_nop()) { ++ int instr_index = int_at(0) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff), ++ (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff)); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(0), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ fatal("not a jump"); ++ return NULL; // unreachable ++} ++ ++// MT-safe patching of a long jump instruction. ++// First patches first word of instruction to two jmp's that jmps to them ++// selfs (spinlock). Then patches the last byte, and then atomicly replaces ++// the jmp's with the first 4 byte of the new instruction. ++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { ++ NativeGeneralJump* h_jump = nativeGeneralJump_at (instr_addr); ++ assert((int)instruction_size == (int)NativeCall::instruction_size, ++ "note::Runtime1::patch_code uses NativeCall::instruction_size"); ++ ++ // ensure 100% atomicity ++ guarantee(!os::is_MP() || (((long)instr_addr % BytesPerWord) == 0), "destination must be aligned for SD"); ++ ++ int *p = (int *)instr_addr; ++ int jr_word = p[4]; ++ ++ p[4] = 0x1000fffb; /* .1: --; --; --; --; b .1; nop */ ++ memcpy(instr_addr, code_buffer, NativeCall::instruction_size - 8); ++ *(long *)(instr_addr + 16) = *(long *)(code_buffer + 16); ++} ++ ++// Must ensure atomicity ++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { ++ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); ++ assert(nativeInstruction_at(verified_entry + BytesPerInstWord)->is_nop(), "mips64 cannot replace non-nop with jump"); ++ ++ if (MacroAssembler::reachable_from_cache(dest)) { ++ CodeBuffer cb(verified_entry, 1 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.j(dest); ++ } else { ++ // We use an illegal instruction for marking a method as ++ // not_entrant or zombie ++ NativeIllegalInstruction::insert(verified_entry); ++ } ++ ++ ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord); ++} ++ ++bool NativeInstruction::is_jump() ++{ ++ if ((int_at(0) & NativeGeneralJump::b_mask) == NativeGeneralJump::beq_opcode) ++ return true; ++ if (is_op(int_at(4), Assembler::lui_op)) // simplified b_far ++ return true; ++ if (is_op(int_at(12), Assembler::lui_op)) // original b_far ++ return true; ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // j target ++ // nop ++ if ( is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ nativeInstruction_at(addr_at(16))->is_op(Assembler::j_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return true; ++ } ++ ++ if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::j_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ return true; ++ } ++ ++ // lui rd, imm(63...48); ++ // ori rd, rd, imm(47...32); ++ // dsll rd, rd, 16; ++ // ori rd, rd, imm(31...16); ++ // dsll rd, rd, 16; ++ // ori rd, rd, imm(15...0); ++ // jr rd ++ // nop ++ if (is_op(int_at(0), Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) && ++ is_special_op(int_at(24), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if (is_op(int_at(0), Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ return false; ++} ++ ++bool NativeInstruction::is_safepoint_poll() { ++ // ++ // 390 li T2, 0x0000000000400000 #@loadConP ++ // 394 sw [SP + #12], V1 # spill 9 ++ // 398 Safepoint @ [T2] : poll for GC @ safePoint_poll # spec.benchmarks.compress.Decompressor::decompress @ bci:224 L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1 ++ // ++ // 0x000000ffe5815130: lui t2, 0x40 ++ // 0x000000ffe5815134: sw v1, 0xc(sp) ; OopMap{a6=Oop off=920} ++ // ;*goto ++ // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) ++ // ++ // 0x000000ffe5815138: lw at, 0x0(t2) ;*goto <--- PC ++ // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) ++ // ++ ++ // Since there may be some spill instructions between the safePoint_poll and loadConP, ++ // we check the safepoint instruction like the this. ++ return is_op(Assembler::lw_op) && is_rt(AT); ++} +diff --git a/src/hotspot/cpu/mips/nativeInst_mips.hpp b/src/hotspot/cpu/mips/nativeInst_mips.hpp +new file mode 100644 +index 00000000000..9f0f24cc34b +--- /dev/null ++++ b/src/hotspot/cpu/mips/nativeInst_mips.hpp +@@ -0,0 +1,734 @@ ++/* ++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_NATIVEINST_MIPS_HPP ++#define CPU_MIPS_VM_NATIVEINST_MIPS_HPP ++ ++#include "asm/assembler.hpp" ++#include "asm/macroAssembler.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/os.hpp" ++#include "runtime/safepointMechanism.hpp" ++ ++// We have interfaces for the following instructions: ++// - NativeInstruction ++// - - NativeCall ++// - - NativeMovConstReg ++// - - NativeMovConstRegPatching ++// - - NativeMovRegMem ++// - - NativeMovRegMemPatching ++// - - NativeJump ++// - - NativeIllegalOpCode ++// - - NativeGeneralJump ++// - - NativeReturn ++// - - NativeReturnX (return with argument) ++// - - NativePushConst ++// - - NativeTstRegMem ++ ++// The base class for different kinds of native instruction abstractions. ++// Provides the primitive operations to manipulate code relative to this. ++ ++class NativeInstruction { ++ friend class Relocation; ++ ++ public: ++ enum mips_specific_constants { ++ nop_instruction_code = 0, ++ nop_instruction_size = 4, ++ sync_instruction_code = 0xf ++ }; ++ ++ bool is_nop() { return long_at(0) == nop_instruction_code; } ++ bool is_sync() { return long_at(0) == sync_instruction_code; } ++ inline bool is_call(); ++ inline bool is_illegal(); ++ inline bool is_return(); ++ bool is_jump(); ++ inline bool is_cond_jump(); ++ bool is_safepoint_poll(); ++ ++ //mips has no instruction to generate a illegal instrucion exception ++ //we define ours: break 11 ++ static int illegal_instruction(); ++ ++ bool is_int_branch(); ++ bool is_float_branch(); ++ ++ inline bool is_trampoline_call(); ++ ++ //We use an illegal instruction for marking a method as not_entrant or zombie. ++ bool is_sigill_zombie_not_entrant(); ++ ++ protected: ++ address addr_at(int offset) const { return address(this) + offset; } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(BytesPerInstWord); } ++ address prev_instruction_address() const { return addr_at(-BytesPerInstWord); } ++ ++ s_char sbyte_at(int offset) const { return *(s_char*) addr_at(offset); } ++ u_char ubyte_at(int offset) const { return *(u_char*) addr_at(offset); } ++ ++ jint int_at(int offset) const { return *(jint*) addr_at(offset); } ++ juint uint_at(int offset) const { return *(juint*) addr_at(offset); } ++ ++ intptr_t ptr_at(int offset) const { return *(intptr_t*) addr_at(offset); } ++ ++ oop oop_at (int offset) const { return *(oop*) addr_at(offset); } ++ int long_at(int offset) const { return *(jint*)addr_at(offset); } ++ ++ ++ void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; wrote(offset); } ++ void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; wrote(offset); } ++ void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; wrote(offset); } ++ void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; wrote(offset); } ++ void set_long_at(int offset, long i); ++ ++ int insn_word() const { return long_at(0); } ++ static bool is_op (int insn, Assembler::ops op) { return Assembler::opcode(insn) == (int)op; } ++ bool is_op (Assembler::ops op) const { return is_op(insn_word(), op); } ++ bool is_rs (int insn, Register rs) const { return Assembler::rs(insn) == (int)rs->encoding(); } ++ bool is_rs (Register rs) const { return is_rs(insn_word(), rs); } ++ bool is_rt (int insn, Register rt) const { return Assembler::rt(insn) == (int)rt->encoding(); } ++ bool is_rt (Register rt) const { return is_rt(insn_word(), rt); } ++ ++ static bool is_special_op (int insn, Assembler::special_ops op) { ++ return is_op(insn, Assembler::special_op) && Assembler::special(insn)==(int)op; ++ } ++ bool is_special_op (Assembler::special_ops op) const { return is_special_op(insn_word(), op); } ++ ++ void wrote(int offset); ++ ++ public: ++ ++ // unit test stuff ++ static void test() {} // override for testing ++ ++ inline friend NativeInstruction* nativeInstruction_at(address address); ++}; ++ ++inline NativeInstruction* nativeInstruction_at(address address) { ++ NativeInstruction* inst = (NativeInstruction*)address; ++#ifdef ASSERT ++ //inst->verify(); ++#endif ++ return inst; ++} ++ ++class NativeCall; ++inline NativeCall* nativeCall_at(address address); ++// The NativeCall is an abstraction for accessing/manipulating native call imm32/imm64 ++// instructions (used to manipulate inline caches, primitive & dll calls, etc.). ++// MIPS has no call instruction with imm32/imm64. Usually, a call was done like this: ++// 32 bits: ++// lui rt, imm16 ++// addiu rt, rt, imm16 ++// jalr rt ++// nop ++// ++// 64 bits: ++// lui rd, imm(63...48); ++// ori rd, rd, imm(47...32); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(31...16); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(15...0); ++// jalr rd ++// nop ++// ++ ++// we just consider the above for instruction as one call instruction ++class NativeCall: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 6 * BytesPerInstWord, ++ return_address_offset_short = 4 * BytesPerInstWord, ++ return_address_offset_long = 6 * BytesPerInstWord, ++ displacement_offset = 0 ++ }; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ ++ address next_instruction_address() const { ++ if (is_special_op(int_at(8), Assembler::jalr_op)) { ++ return addr_at(return_address_offset_short); ++ } else { ++ return addr_at(return_address_offset_long); ++ } ++ } ++ ++ address return_address() const { ++ return next_instruction_address(); ++ } ++ ++ address target_addr_for_insn() const; ++ address destination() const; ++ void set_destination(address dest); ++ ++ void patch_set48_gs(address dest); ++ void patch_set48(address dest); ++ ++ void patch_on_jalr_gs(address dest); ++ void patch_on_jalr(address dest); ++ ++ void patch_on_jal_gs(address dest); ++ void patch_on_jal(address dest); ++ ++ void patch_on_trampoline(address dest); ++ ++ void patch_on_jal_only(address dest); ++ ++ void patch_set32_gs(address dest); ++ void patch_set32(address dest); ++ ++ void verify_alignment() { } ++ void verify(); ++ void print(); ++ ++ // Creation ++ inline friend NativeCall* nativeCall_at(address address); ++ inline friend NativeCall* nativeCall_before(address return_address); ++ ++ static bool is_call_at(address instr) { ++ return nativeInstruction_at(instr)->is_call(); ++ } ++ ++ static bool is_call_before(address return_address) { ++ return is_call_at(return_address - return_address_offset_short) | is_call_at(return_address - return_address_offset_long); ++ } ++ ++ static bool is_call_to(address instr, address target) { ++ return nativeInstruction_at(instr)->is_call() && ++nativeCall_at(instr)->destination() == target; ++ } ++ ++ // MT-safe patching of a call instruction. ++ static void insert(address code_pos, address entry); ++ ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++ ++ // Similar to replace_mt_safe, but just changes the destination. The ++ // important thing is that free-running threads are able to execute ++ // this call instruction at all times. If the call is an immediate jal ++ // instruction we can simply rely on atomicity of 32-bit writes to ++ // make sure other threads will see no intermediate states. ++ ++ // We cannot rely on locks here, since the free-running threads must run at ++ // full speed. ++ // ++ // Used in the runtime linkage of calls; see class CompiledIC. ++ ++ // The parameter assert_lock disables the assertion during code generation. ++ void set_destination_mt_safe(address dest, bool assert_lock = true); ++ ++ address get_trampoline(); ++}; ++ ++inline NativeCall* nativeCall_at(address address) { ++ NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset); ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++inline NativeCall* nativeCall_before(address return_address) { ++ NativeCall* call = NULL; ++ if (NativeCall::is_call_at(return_address - NativeCall::return_address_offset_long)) { ++ call = (NativeCall*)(return_address - NativeCall::return_address_offset_long); ++ } else { ++ call = (NativeCall*)(return_address - NativeCall::return_address_offset_short); ++ } ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++class NativeMovConstReg: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 4 * BytesPerInstWord, ++ next_instruction_offset = 4 * BytesPerInstWord, ++ }; ++ ++ int insn_word() const { return long_at(instruction_offset); } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(next_instruction_offset); } ++ intptr_t data() const; ++ void set_data(intptr_t x, intptr_t o = 0); ++ ++ void patch_set48(intptr_t x); ++ ++ void verify(); ++ void print(); ++ ++ // unit test stuff ++ static void test() {} ++ ++ // Creation ++ inline friend NativeMovConstReg* nativeMovConstReg_at(address address); ++ inline friend NativeMovConstReg* nativeMovConstReg_before(address address); ++}; ++ ++inline NativeMovConstReg* nativeMovConstReg_at(address address) { ++ NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++inline NativeMovConstReg* nativeMovConstReg_before(address address) { ++ NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++class NativeMovConstRegPatching: public NativeMovConstReg { ++ private: ++ friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) { ++ NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset); ++ #ifdef ASSERT ++ test->verify(); ++ #endif ++ return test; ++ } ++}; ++ ++// An interface for accessing/manipulating native moves of the form: ++// lui AT, split_high(offset) ++// addiu AT, split_low(offset) ++// addu reg, reg, AT ++// lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, 0 ++// [lw/sw/lwc1/swc1 dest, reg, 4] ++// or ++// lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, offset ++// [lw/sw/lwc1/swc1 dest, reg, offset+4] ++// ++// Warning: These routines must be able to handle any instruction sequences ++// that are generated as a result of the load/store byte,word,long ++// macros. ++ ++class NativeMovRegMem: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ hiword_offset = 4, ++ ldst_offset = 12, ++ immediate_size = 4, ++ ldst_size = 16 ++ }; ++ ++ //offset is less than 16 bits. ++ bool is_immediate() const { return !is_op(long_at(instruction_offset), Assembler::lui_op); } ++ bool is_64ldst() const { ++ if (is_immediate()) { ++ return (Assembler::opcode(long_at(hiword_offset)) == Assembler::opcode(long_at(instruction_offset))) && ++ (Assembler::imm_off(long_at(hiword_offset)) == Assembler::imm_off(long_at(instruction_offset)) + wordSize); ++ } else { ++ return (Assembler::opcode(long_at(ldst_offset+hiword_offset)) == Assembler::opcode(long_at(ldst_offset))) && ++ (Assembler::imm_off(long_at(ldst_offset+hiword_offset)) == Assembler::imm_off(long_at(ldst_offset)) + wordSize); ++ } ++ } ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address next_instruction_address() const { ++ return addr_at( (is_immediate()? immediate_size : ldst_size) + (is_64ldst()? 4 : 0)); ++ } ++ ++ int offset() const; ++ ++ void set_offset(int x); ++ ++ void add_offset_in_bytes(int add_offset) { set_offset ( ( offset() + add_offset ) ); } ++ ++ void verify(); ++ void print (); ++ ++ // unit test stuff ++ static void test() {} ++ ++ private: ++ inline friend NativeMovRegMem* nativeMovRegMem_at (address address); ++}; ++ ++inline NativeMovRegMem* nativeMovRegMem_at (address address) { ++ NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++class NativeMovRegMemPatching: public NativeMovRegMem { ++ private: ++ friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) { ++ NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset); ++ #ifdef ASSERT ++ test->verify(); ++ #endif ++ return test; ++ } ++}; ++ ++ ++// Handles all kinds of jump on Loongson. Long/far, conditional/unconditional ++// 32 bits: ++// far jump: ++// lui reg, split_high(addr) ++// addiu reg, split_low(addr) ++// jr reg ++// nop ++// or ++// beq ZERO, ZERO, offset ++// nop ++// ++ ++//64 bits: ++// far jump: ++// lui rd, imm(63...48); ++// ori rd, rd, imm(47...32); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(31...16); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(15...0); ++// jalr rd ++// nop ++// ++class NativeJump: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ beq_opcode = 0x10000000,//000100|00000|00000|offset ++ b_mask = 0xffff0000, ++ short_size = 8, ++ instruction_size = 6 * BytesPerInstWord ++ }; ++ ++ bool is_short() const { return (long_at(instruction_offset) & b_mask) == beq_opcode; } ++ bool is_b_far(); ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address jump_destination(); ++ ++ void patch_set48_gs(address dest); ++ void patch_set48(address dest); ++ ++ void patch_on_jr_gs(address dest); ++ void patch_on_jr(address dest); ++ ++ void patch_on_j_gs(address dest); ++ void patch_on_j(address dest); ++ ++ void patch_on_j_only(address dest); ++ ++ void set_jump_destination(address dest); ++ ++ // Creation ++ inline friend NativeJump* nativeJump_at(address address); ++ ++ // Insertion of native jump instruction ++ static void insert(address code_pos, address entry) { Unimplemented(); } ++ // MT-safe insertion of native jump at verified method entry ++ static void check_verified_entry_alignment(address entry, address verified_entry) {} ++ static void patch_verified_entry(address entry, address verified_entry, address dest); ++ ++ void verify(); ++}; ++ ++inline NativeJump* nativeJump_at(address address) { ++ NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset); ++ debug_only(jump->verify();) ++ return jump; ++} ++ ++class NativeGeneralJump: public NativeJump { ++ public: ++ // Creation ++ inline friend NativeGeneralJump* nativeGeneralJump_at(address address); ++ ++ // Insertion of native general jump instruction ++ static void insert_unconditional(address code_pos, address entry); ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++}; ++ ++inline NativeGeneralJump* nativeGeneralJump_at(address address) { ++ NativeGeneralJump* jump = (NativeGeneralJump*)(address); ++ debug_only(jump->verify();) ++ return jump; ++} ++ ++class NativeIllegalInstruction: public NativeInstruction { ++public: ++ enum mips_specific_constants { ++ instruction_code = 0x42000029, // mips reserved instruction ++ instruction_size = 4, ++ instruction_offset = 0, ++ next_instruction_offset = 4 ++ }; ++ ++ // Insert illegal opcode as specific address ++ static void insert(address code_pos); ++}; ++ ++// return instruction that does not pop values of the stack ++// jr RA ++// delay slot ++class NativeReturn: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_size = 8, ++ instruction_offset = 0, ++ next_instruction_offset = 8 ++ }; ++}; ++ ++ ++ ++ ++class NativeCondJump; ++inline NativeCondJump* nativeCondJump_at(address address); ++class NativeCondJump: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_size = 16, ++ instruction_offset = 12, ++ next_instruction_offset = 20 ++ }; ++ ++ ++ int insn_word() const { return long_at(instruction_offset); } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(next_instruction_offset); } ++ ++ // Creation ++ inline friend NativeCondJump* nativeCondJump_at(address address); ++ ++ address jump_destination() const { ++ return ::nativeCondJump_at(addr_at(12))->jump_destination(); ++ } ++ ++ void set_jump_destination(address dest) { ++ ::nativeCondJump_at(addr_at(12))->set_jump_destination(dest); ++ } ++ ++}; ++ ++inline NativeCondJump* nativeCondJump_at(address address) { ++ NativeCondJump* jump = (NativeCondJump*)(address); ++ return jump; ++} ++ ++ ++ ++inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); } ++ ++inline bool NativeInstruction::is_call() { ++ // jal target ++ // nop ++ if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::jal_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ return true; ++ } ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // jal target ++ // nop ++ if ( is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ nativeInstruction_at(addr_at(16))->is_op(Assembler::jal_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return true; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) && ++ is_special_op(int_at(24), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ if(is_trampoline_call()) ++ return true; ++ ++ return false; ++ ++} ++ ++inline bool NativeInstruction::is_return() { return is_special_op(Assembler::jr_op) && is_rs(RA);} ++ ++inline bool NativeInstruction::is_cond_jump() { return is_int_branch() || is_float_branch(); } ++ ++// Call trampoline stubs. ++class NativeCallTrampolineStub : public NativeInstruction { ++ public: ++ ++ enum mips_specific_constants { ++ instruction_size = 2 * BytesPerInstWord, ++ instruction_offset = 0, ++ next_instruction_offset = 2 * BytesPerInstWord ++ }; ++ ++ address destination() const { ++ return (address)ptr_at(0); ++ } ++ ++ void set_destination(address new_destination) { ++ set_ptr_at(0, (intptr_t)new_destination); ++ } ++}; ++ ++inline bool NativeInstruction::is_trampoline_call() { ++ // lui dst, imm16 ++ // ori dst, dst, imm16 ++ // dsll dst, dst, 16 ++ // ld target, dst, imm16 ++ // jalr target ++ // nop ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ld_op) && ++ is_special_op(int_at(16), Assembler::jalr_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return true; ++ } ++ ++ return false; ++} ++ ++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { ++ return (NativeCallTrampolineStub*)addr; ++} ++#endif // CPU_MIPS_VM_NATIVEINST_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/registerMap_mips.hpp b/src/hotspot/cpu/mips/registerMap_mips.hpp +new file mode 100644 +index 00000000000..3f3558f79d4 +--- /dev/null ++++ b/src/hotspot/cpu/mips/registerMap_mips.hpp +@@ -0,0 +1,50 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_REGISTERMAP_MIPS_HPP ++#define CPU_MIPS_VM_REGISTERMAP_MIPS_HPP ++ ++// machine-dependent implemention for register maps ++ friend class frame; ++ ++ private: ++#ifndef CORE ++ // This is the hook for finding a register in an "well-known" location, ++ // such as a register block of a predetermined format. ++ // Since there is none, we just return NULL. ++ // See registerMap_sparc.hpp for an example of grabbing registers ++ // from register save areas of a standard layout. ++ address pd_location(VMReg reg) const {return NULL;} ++ address pd_location(VMReg base_reg, int slot_idx) const { ++ return location(base_reg->next(slot_idx)); ++ } ++#endif ++ ++ // no PD state to clear or copy: ++ void pd_clear() {} ++ void pd_initialize() {} ++ void pd_initialize_from(const RegisterMap* map) {} ++ ++#endif // CPU_MIPS_VM_REGISTERMAP_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/register_definitions_mips.cpp b/src/hotspot/cpu/mips/register_definitions_mips.cpp +new file mode 100644 +index 00000000000..4af25318346 +--- /dev/null ++++ b/src/hotspot/cpu/mips/register_definitions_mips.cpp +@@ -0,0 +1,103 @@ ++/* ++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/register.hpp" ++#include "register_mips.hpp" ++#ifdef TARGET_ARCH_MODEL_mips_32 ++# include "interp_masm_mips_32.hpp" ++#endif ++#ifdef TARGET_ARCH_MODEL_mips_64 ++# include "interp_masm_mips_64.hpp" ++#endif ++ ++REGISTER_DEFINITION(Register, noreg); ++REGISTER_DEFINITION(Register, i0); ++REGISTER_DEFINITION(Register, i1); ++REGISTER_DEFINITION(Register, i2); ++REGISTER_DEFINITION(Register, i3); ++REGISTER_DEFINITION(Register, i4); ++REGISTER_DEFINITION(Register, i5); ++REGISTER_DEFINITION(Register, i6); ++REGISTER_DEFINITION(Register, i7); ++REGISTER_DEFINITION(Register, i8); ++REGISTER_DEFINITION(Register, i9); ++REGISTER_DEFINITION(Register, i10); ++REGISTER_DEFINITION(Register, i11); ++REGISTER_DEFINITION(Register, i12); ++REGISTER_DEFINITION(Register, i13); ++REGISTER_DEFINITION(Register, i14); ++REGISTER_DEFINITION(Register, i15); ++REGISTER_DEFINITION(Register, i16); ++REGISTER_DEFINITION(Register, i17); ++REGISTER_DEFINITION(Register, i18); ++REGISTER_DEFINITION(Register, i19); ++REGISTER_DEFINITION(Register, i20); ++REGISTER_DEFINITION(Register, i21); ++REGISTER_DEFINITION(Register, i22); ++REGISTER_DEFINITION(Register, i23); ++REGISTER_DEFINITION(Register, i24); ++REGISTER_DEFINITION(Register, i25); ++REGISTER_DEFINITION(Register, i26); ++REGISTER_DEFINITION(Register, i27); ++REGISTER_DEFINITION(Register, i28); ++REGISTER_DEFINITION(Register, i29); ++REGISTER_DEFINITION(Register, i30); ++REGISTER_DEFINITION(Register, i31); ++ ++REGISTER_DEFINITION(FloatRegister, fnoreg); ++REGISTER_DEFINITION(FloatRegister, f0); ++REGISTER_DEFINITION(FloatRegister, f1); ++REGISTER_DEFINITION(FloatRegister, f2); ++REGISTER_DEFINITION(FloatRegister, f3); ++REGISTER_DEFINITION(FloatRegister, f4); ++REGISTER_DEFINITION(FloatRegister, f5); ++REGISTER_DEFINITION(FloatRegister, f6); ++REGISTER_DEFINITION(FloatRegister, f7); ++REGISTER_DEFINITION(FloatRegister, f8); ++REGISTER_DEFINITION(FloatRegister, f9); ++REGISTER_DEFINITION(FloatRegister, f10); ++REGISTER_DEFINITION(FloatRegister, f11); ++REGISTER_DEFINITION(FloatRegister, f12); ++REGISTER_DEFINITION(FloatRegister, f13); ++REGISTER_DEFINITION(FloatRegister, f14); ++REGISTER_DEFINITION(FloatRegister, f15); ++REGISTER_DEFINITION(FloatRegister, f16); ++REGISTER_DEFINITION(FloatRegister, f17); ++REGISTER_DEFINITION(FloatRegister, f18); ++REGISTER_DEFINITION(FloatRegister, f19); ++REGISTER_DEFINITION(FloatRegister, f20); ++REGISTER_DEFINITION(FloatRegister, f21); ++REGISTER_DEFINITION(FloatRegister, f22); ++REGISTER_DEFINITION(FloatRegister, f23); ++REGISTER_DEFINITION(FloatRegister, f24); ++REGISTER_DEFINITION(FloatRegister, f25); ++REGISTER_DEFINITION(FloatRegister, f26); ++REGISTER_DEFINITION(FloatRegister, f27); ++REGISTER_DEFINITION(FloatRegister, f28); ++REGISTER_DEFINITION(FloatRegister, f29); ++REGISTER_DEFINITION(FloatRegister, f30); ++REGISTER_DEFINITION(FloatRegister, f31); +diff --git a/src/hotspot/cpu/mips/register_mips.cpp b/src/hotspot/cpu/mips/register_mips.cpp +new file mode 100644 +index 00000000000..4a9b22bfef2 +--- /dev/null ++++ b/src/hotspot/cpu/mips/register_mips.cpp +@@ -0,0 +1,52 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "register_mips.hpp" ++ ++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1; ++const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + ++ 2 * FloatRegisterImpl::number_of_registers; ++ ++const char* RegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3", ++ "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", ++ "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", ++ "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra" ++ }; ++ return is_valid() ? names[encoding()] : "noreg"; ++} ++ ++const char* FloatRegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", ++ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", ++ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", ++ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", ++ }; ++ return is_valid() ? names[encoding()] : "fnoreg"; ++} ++ +diff --git a/src/hotspot/cpu/mips/register_mips.hpp b/src/hotspot/cpu/mips/register_mips.hpp +new file mode 100644 +index 00000000000..4f74717c24f +--- /dev/null ++++ b/src/hotspot/cpu/mips/register_mips.hpp +@@ -0,0 +1,344 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_REGISTER_MIPS_HPP ++#define CPU_MIPS_VM_REGISTER_MIPS_HPP ++ ++#include "asm/register.hpp" ++#include "logging/log.hpp" ++#include "utilities/bitMap.hpp" ++#include "utilities/formatBuffer.hpp" ++#include "utilities/ticks.hpp" ++ ++class VMRegImpl; ++typedef VMRegImpl* VMReg; ++ ++// Use Register as shortcut ++class RegisterImpl; ++typedef RegisterImpl* Register; ++ ++inline Register as_Register(int encoding) { ++ return (Register)(intptr_t) encoding; ++} ++ ++class RegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 32 ++ }; ++ ++ // derived registers, offsets, and addresses ++ Register successor() const { return as_Register(encoding() + 1); } ++ ++ // construction ++ inline friend Register as_Register(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register (%d)", (int)(intptr_t)this ); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++}; ++ ++ ++// The integer registers of the MIPS32 architecture ++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); ++ ++ ++CONSTANT_REGISTER_DECLARATION(Register, i0, (0)); ++CONSTANT_REGISTER_DECLARATION(Register, i1, (1)); ++CONSTANT_REGISTER_DECLARATION(Register, i2, (2)); ++CONSTANT_REGISTER_DECLARATION(Register, i3, (3)); ++CONSTANT_REGISTER_DECLARATION(Register, i4, (4)); ++CONSTANT_REGISTER_DECLARATION(Register, i5, (5)); ++CONSTANT_REGISTER_DECLARATION(Register, i6, (6)); ++CONSTANT_REGISTER_DECLARATION(Register, i7, (7)); ++CONSTANT_REGISTER_DECLARATION(Register, i8, (8)); ++CONSTANT_REGISTER_DECLARATION(Register, i9, (9)); ++CONSTANT_REGISTER_DECLARATION(Register, i10, (10)); ++CONSTANT_REGISTER_DECLARATION(Register, i11, (11)); ++CONSTANT_REGISTER_DECLARATION(Register, i12, (12)); ++CONSTANT_REGISTER_DECLARATION(Register, i13, (13)); ++CONSTANT_REGISTER_DECLARATION(Register, i14, (14)); ++CONSTANT_REGISTER_DECLARATION(Register, i15, (15)); ++CONSTANT_REGISTER_DECLARATION(Register, i16, (16)); ++CONSTANT_REGISTER_DECLARATION(Register, i17, (17)); ++CONSTANT_REGISTER_DECLARATION(Register, i18, (18)); ++CONSTANT_REGISTER_DECLARATION(Register, i19, (19)); ++CONSTANT_REGISTER_DECLARATION(Register, i20, (20)); ++CONSTANT_REGISTER_DECLARATION(Register, i21, (21)); ++CONSTANT_REGISTER_DECLARATION(Register, i22, (22)); ++CONSTANT_REGISTER_DECLARATION(Register, i23, (23)); ++CONSTANT_REGISTER_DECLARATION(Register, i24, (24)); ++CONSTANT_REGISTER_DECLARATION(Register, i25, (25)); ++CONSTANT_REGISTER_DECLARATION(Register, i26, (26)); ++CONSTANT_REGISTER_DECLARATION(Register, i27, (27)); ++CONSTANT_REGISTER_DECLARATION(Register, i28, (28)); ++CONSTANT_REGISTER_DECLARATION(Register, i29, (29)); ++CONSTANT_REGISTER_DECLARATION(Register, i30, (30)); ++CONSTANT_REGISTER_DECLARATION(Register, i31, (31)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define NOREG ((Register)(noreg_RegisterEnumValue)) ++ ++#define I0 ((Register)(i0_RegisterEnumValue)) ++#define I1 ((Register)(i1_RegisterEnumValue)) ++#define I2 ((Register)(i2_RegisterEnumValue)) ++#define I3 ((Register)(i3_RegisterEnumValue)) ++#define I4 ((Register)(i4_RegisterEnumValue)) ++#define I5 ((Register)(i5_RegisterEnumValue)) ++#define I6 ((Register)(i6_RegisterEnumValue)) ++#define I7 ((Register)(i7_RegisterEnumValue)) ++#define I8 ((Register)(i8_RegisterEnumValue)) ++#define I9 ((Register)(i9_RegisterEnumValue)) ++#define I10 ((Register)(i10_RegisterEnumValue)) ++#define I11 ((Register)(i11_RegisterEnumValue)) ++#define I12 ((Register)(i12_RegisterEnumValue)) ++#define I13 ((Register)(i13_RegisterEnumValue)) ++#define I14 ((Register)(i14_RegisterEnumValue)) ++#define I15 ((Register)(i15_RegisterEnumValue)) ++#define I16 ((Register)(i16_RegisterEnumValue)) ++#define I17 ((Register)(i17_RegisterEnumValue)) ++#define I18 ((Register)(i18_RegisterEnumValue)) ++#define I19 ((Register)(i19_RegisterEnumValue)) ++#define I20 ((Register)(i20_RegisterEnumValue)) ++#define I21 ((Register)(i21_RegisterEnumValue)) ++#define I22 ((Register)(i22_RegisterEnumValue)) ++#define I23 ((Register)(i23_RegisterEnumValue)) ++#define I24 ((Register)(i24_RegisterEnumValue)) ++#define I25 ((Register)(i25_RegisterEnumValue)) ++#define I26 ((Register)(i26_RegisterEnumValue)) ++#define I27 ((Register)(i27_RegisterEnumValue)) ++#define I28 ((Register)(i28_RegisterEnumValue)) ++#define I29 ((Register)(i29_RegisterEnumValue)) ++#define I30 ((Register)(i30_RegisterEnumValue)) ++#define I31 ((Register)(i31_RegisterEnumValue)) ++ ++#define R0 ((Register)(i0_RegisterEnumValue)) ++#define AT ((Register)(i1_RegisterEnumValue)) ++#define V0 ((Register)(i2_RegisterEnumValue)) ++#define V1 ((Register)(i3_RegisterEnumValue)) ++#define RA0 ((Register)(i4_RegisterEnumValue)) ++#define RA1 ((Register)(i5_RegisterEnumValue)) ++#define RA2 ((Register)(i6_RegisterEnumValue)) ++#define RA3 ((Register)(i7_RegisterEnumValue)) ++#define RA4 ((Register)(i8_RegisterEnumValue)) ++#define RA5 ((Register)(i9_RegisterEnumValue)) ++#define RA6 ((Register)(i10_RegisterEnumValue)) ++#define RA7 ((Register)(i11_RegisterEnumValue)) ++#define RT0 ((Register)(i12_RegisterEnumValue)) ++#define RT1 ((Register)(i13_RegisterEnumValue)) ++#define RT2 ((Register)(i14_RegisterEnumValue)) ++#define RT3 ((Register)(i15_RegisterEnumValue)) ++#define S0 ((Register)(i16_RegisterEnumValue)) ++#define S1 ((Register)(i17_RegisterEnumValue)) ++#define S2 ((Register)(i18_RegisterEnumValue)) ++#define S3 ((Register)(i19_RegisterEnumValue)) ++#define S4 ((Register)(i20_RegisterEnumValue)) ++#define S5 ((Register)(i21_RegisterEnumValue)) ++#define S6 ((Register)(i22_RegisterEnumValue)) ++#define S7 ((Register)(i23_RegisterEnumValue)) ++#define RT8 ((Register)(i24_RegisterEnumValue)) ++#define RT9 ((Register)(i25_RegisterEnumValue)) ++#define K0 ((Register)(i26_RegisterEnumValue)) ++#define K1 ((Register)(i27_RegisterEnumValue)) ++#define GP ((Register)(i28_RegisterEnumValue)) ++#define SP ((Register)(i29_RegisterEnumValue)) ++#define FP ((Register)(i30_RegisterEnumValue)) ++#define S8 ((Register)(i30_RegisterEnumValue)) ++#define RA ((Register)(i31_RegisterEnumValue)) ++ ++#define c_rarg0 RT0 ++#define c_rarg1 RT1 ++#define Rmethod S3 ++#define Rsender S4 ++#define Rnext S1 ++ ++/* ++#define RT0 T0 ++#define RT1 T1 ++#define RT2 T2 ++#define RT3 T3 ++#define RT4 T8 ++#define RT5 T9 ++*/ ++ ++ ++//for interpreter frame ++// bytecode pointer register ++#define BCP S0 ++// local variable pointer register ++#define LVP S7 ++// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM ++// be sure to save and restore its value in call_stub ++#define TSR S2 ++ ++#define OPT_THREAD 1 ++ ++#define TREG S6 ++ ++#define S5_heapbase S5 ++ ++#define mh_SP_save SP ++ ++#define FSR V0 ++#define SSR V1 ++#define FSF F0 ++#define SSF F1 ++#define FTF F14 ++#define STF F15 ++ ++#define AFT F30 ++ ++#define RECEIVER T0 ++#define IC_Klass T1 ++ ++#define SHIFT_count T3 ++ ++#endif // DONT_USE_REGISTER_DEFINES ++ ++// Use FloatRegister as shortcut ++class FloatRegisterImpl; ++typedef FloatRegisterImpl* FloatRegister; ++ ++inline FloatRegister as_FloatRegister(int encoding) { ++ return (FloatRegister)(intptr_t) encoding; ++} ++ ++// The implementation of floating point registers for the architecture ++class FloatRegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ float_arg_base = 12, ++ number_of_registers = 32 ++ }; ++ ++ // construction ++ inline friend FloatRegister as_FloatRegister(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // derived registers, offsets, and addresses ++ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++ ++}; ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue)) ++#define F0 ((FloatRegister)( f0_FloatRegisterEnumValue)) ++#define F1 ((FloatRegister)( f1_FloatRegisterEnumValue)) ++#define F2 ((FloatRegister)( f2_FloatRegisterEnumValue)) ++#define F3 ((FloatRegister)( f3_FloatRegisterEnumValue)) ++#define F4 ((FloatRegister)( f4_FloatRegisterEnumValue)) ++#define F5 ((FloatRegister)( f5_FloatRegisterEnumValue)) ++#define F6 ((FloatRegister)( f6_FloatRegisterEnumValue)) ++#define F7 ((FloatRegister)( f7_FloatRegisterEnumValue)) ++#define F8 ((FloatRegister)( f8_FloatRegisterEnumValue)) ++#define F9 ((FloatRegister)( f9_FloatRegisterEnumValue)) ++#define F10 ((FloatRegister)( f10_FloatRegisterEnumValue)) ++#define F11 ((FloatRegister)( f11_FloatRegisterEnumValue)) ++#define F12 ((FloatRegister)( f12_FloatRegisterEnumValue)) ++#define F13 ((FloatRegister)( f13_FloatRegisterEnumValue)) ++#define F14 ((FloatRegister)( f14_FloatRegisterEnumValue)) ++#define F15 ((FloatRegister)( f15_FloatRegisterEnumValue)) ++#define F16 ((FloatRegister)( f16_FloatRegisterEnumValue)) ++#define F17 ((FloatRegister)( f17_FloatRegisterEnumValue)) ++#define F18 ((FloatRegister)( f18_FloatRegisterEnumValue)) ++#define F19 ((FloatRegister)( f19_FloatRegisterEnumValue)) ++#define F20 ((FloatRegister)( f20_FloatRegisterEnumValue)) ++#define F21 ((FloatRegister)( f21_FloatRegisterEnumValue)) ++#define F22 ((FloatRegister)( f22_FloatRegisterEnumValue)) ++#define F23 ((FloatRegister)( f23_FloatRegisterEnumValue)) ++#define F24 ((FloatRegister)( f24_FloatRegisterEnumValue)) ++#define F25 ((FloatRegister)( f25_FloatRegisterEnumValue)) ++#define F26 ((FloatRegister)( f26_FloatRegisterEnumValue)) ++#define F27 ((FloatRegister)( f27_FloatRegisterEnumValue)) ++#define F28 ((FloatRegister)( f28_FloatRegisterEnumValue)) ++#define F29 ((FloatRegister)( f29_FloatRegisterEnumValue)) ++#define F30 ((FloatRegister)( f30_FloatRegisterEnumValue)) ++#define F31 ((FloatRegister)( f31_FloatRegisterEnumValue)) ++#endif // DONT_USE_REGISTER_DEFINES ++ ++ ++const int MIPS_ARGS_IN_REGS_NUM = 4; ++ ++// Need to know the total number of registers of all sorts for SharedInfo. ++// Define a class that exports it. ++class ConcreteRegisterImpl : public AbstractRegisterImpl { ++ public: ++ enum { ++ // A big enough number for C2: all the registers plus flags ++ // This number must be large enough to cover REG_COUNT (defined by c2) registers. ++ // There is no requirement that any ordering here matches any ordering c2 gives ++ // it's optoregs. ++ number_of_registers = (RegisterImpl::number_of_registers + FloatRegisterImpl::number_of_registers) * 2 ++ }; ++ ++ static const int max_gpr; ++ static const int max_fpr; ++}; ++ ++#endif //CPU_MIPS_VM_REGISTER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/relocInfo_mips.cpp b/src/hotspot/cpu/mips/relocInfo_mips.cpp +new file mode 100644 +index 00000000000..7d8d072b51b +--- /dev/null ++++ b/src/hotspot/cpu/mips/relocInfo_mips.cpp +@@ -0,0 +1,160 @@ ++/* ++ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/relocInfo.hpp" ++#include "compiler/disassembler.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/compressedOops.inline.hpp" ++#include "oops/oop.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/safepoint.hpp" ++ ++ ++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { ++ x += o; ++ typedef Assembler::WhichOperand WhichOperand; ++ WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop ++ assert(which == Assembler::disp32_operand || ++ which == Assembler::narrow_oop_operand || ++ which == Assembler::imm_operand, "format unpacks ok"); ++ if (which == Assembler::imm_operand) { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(x)); ++ } ++ } else if (which == Assembler::narrow_oop_operand) { ++ // both compressed oops and compressed classes look the same ++ if (CompressedOops::is_in((void*)x)) { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)CompressedOops::encode(cast_to_oop(x)), "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedOops::encode(cast_to_oop(x))), (intptr_t)(x)); ++ } ++ } else { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)CompressedKlassPointers::encode((Klass*)x), "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedKlassPointers::encode((Klass*)x)), (intptr_t)(x)); ++ } ++ } ++ } else { ++ // Note: Use runtime_call_type relocations for call32_operand. ++ assert(0, "call32_operand not supported in MIPS64"); ++ } ++} ++ ++ ++//NOTICE HERE, this relocate is not need for MIPS, since MIPS USE abosolutly target, ++//Maybe We should FORGET CALL RELOCATION ++address Relocation::pd_call_destination(address orig_addr) { ++ intptr_t adj = 0; ++ NativeInstruction* ni = nativeInstruction_at(addr()); ++ if (ni->is_call()) { ++ if (!ni->is_trampoline_call()) { ++ return nativeCall_at(addr())->target_addr_for_insn(); ++ } else { ++ address trampoline = nativeCall_at(addr())->get_trampoline(); ++ if (trampoline) { ++ return nativeCallTrampolineStub_at(trampoline)->destination(); ++ } else { ++ return (address) -1; ++ } ++ } ++ } else if (ni->is_jump()) { ++ return nativeGeneralJump_at(addr())->jump_destination() + adj; ++ } else if (ni->is_cond_jump()) { ++ return nativeCondJump_at(addr())->jump_destination() +adj; ++ } else { ++ tty->print_cr("\nError!\ncall destination: " INTPTR_FORMAT, p2i(addr())); ++ Disassembler::decode(addr() - 10 * 4, addr() + 10 * 4, tty); ++ ShouldNotReachHere(); ++ return NULL; ++ } ++} ++ ++ ++void Relocation::pd_set_call_destination(address x) { ++ NativeInstruction* ni = nativeInstruction_at(addr()); ++ if (ni->is_call()) { ++ NativeCall* call = nativeCall_at(addr()); ++ if (!ni->is_trampoline_call()) { ++ call->set_destination(x); ++ } else { ++ address trampoline_stub_addr = call->get_trampoline(); ++ if (trampoline_stub_addr != NULL) { ++ address orig = call->target_addr_for_insn(); ++ if (orig != trampoline_stub_addr) { ++ call->patch_on_trampoline(trampoline_stub_addr); ++ } ++ call->set_destination_mt_safe(x, false); ++ } ++ } ++ } else if (ni->is_jump()) ++ nativeGeneralJump_at(addr())->set_jump_destination(x); ++ else if (ni->is_cond_jump()) ++ nativeCondJump_at(addr())->set_jump_destination(x); ++ else ++ { ShouldNotReachHere(); } ++ ++ // Unresolved jumps are recognized by a destination of -1 ++ // However 64bit can't actually produce such an address ++ // and encodes a jump to self but jump_destination will ++ // return a -1 as the signal. We must not relocate this ++ // jmp or the ic code will not see it as unresolved. ++} ++ ++ ++address* Relocation::pd_address_in_code() { ++ return (address*)addr(); ++} ++ ++ ++address Relocation::pd_get_address_from_code() { ++ NativeMovConstReg* ni = nativeMovConstReg_at(addr()); ++ return (address)ni->data(); ++} ++ ++ ++ ++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++} ++ ++/* ++void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++} ++*/ ++ ++void internal_pc_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++ address target =0; ++ NativeMovConstReg* ni = nativeMovConstReg_at(addr()); ++ target = new_addr_for((address)ni->data(), src, dest); ++ ni->set_data((intptr_t)target); ++} ++ ++void metadata_Relocation::pd_fix_value(address x) { ++} +diff --git a/src/hotspot/cpu/mips/relocInfo_mips.hpp b/src/hotspot/cpu/mips/relocInfo_mips.hpp +new file mode 100644 +index 00000000000..1e1e170fd87 +--- /dev/null ++++ b/src/hotspot/cpu/mips/relocInfo_mips.hpp +@@ -0,0 +1,44 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_RELOCINFO_MIPS_HPP ++#define CPU_MIPS_VM_RELOCINFO_MIPS_HPP ++ ++ // machine-dependent parts of class relocInfo ++ private: ++ enum { ++ // Since MIPS instructions are whole words, ++ // the two low-order offset bits can always be discarded. ++ offset_unit = 4, ++ ++ // imm_oop_operand vs. narrow_oop_operand ++ format_width = 2 ++ }; ++ ++ public: ++ ++ static bool mustIterateImmediateOopsInCode() { return false; } ++ ++#endif // CPU_MIPS_VM_RELOCINFO_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/runtime_mips_64.cpp b/src/hotspot/cpu/mips/runtime_mips_64.cpp +new file mode 100644 +index 00000000000..36ab413f0b3 +--- /dev/null ++++ b/src/hotspot/cpu/mips/runtime_mips_64.cpp +@@ -0,0 +1,206 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#ifdef COMPILER2 ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/vmreg.hpp" ++#include "compiler/oopMap.hpp" ++#include "interpreter/interpreter.hpp" ++#include "opto/runtime.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/globalDefinitions.hpp" ++#include "vmreg_mips.inline.hpp" ++#endif ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++//-------------- generate_exception_blob ----------- ++// creates _exception_blob. ++// The exception blob is jumped to from a compiled method. ++// (see emit_exception_handler in sparc.ad file) ++// ++// Given an exception pc at a call we call into the runtime for the ++// handler in this method. This handler might merely restore state ++// (i.e. callee save registers) unwind the frame and jump to the ++// exception handler for the nmethod if there is no Java level handler ++// for the nmethod. ++// ++// This code is entered with a jump, and left with a jump. ++// ++// Arguments: ++// V0: exception oop ++// V1: exception pc ++// ++// Results: ++// A0: exception oop ++// A1: exception pc in caller or ??? ++// jumps to: exception handler of caller ++// ++// Note: the exception pc MUST be at a call (precise debug information) ++// ++// [stubGenerator_mips.cpp] generate_forward_exception() ++// |- V0, V1 are created ++// |- T9 <= SharedRuntime::exception_handler_for_return_address ++// `- jr T9 ++// `- the caller's exception_handler ++// `- jr OptoRuntime::exception_blob ++// `- here ++// ++void OptoRuntime::generate_exception_blob() { ++ // Capture info about frame layout ++ enum layout { ++ fp_off, ++ return_off, // slot for return address ++ framesize ++ }; ++ ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ CodeBuffer buffer("exception_blob", 5120, 5120); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ ++ address start = __ pc(); ++ ++ __ daddiu(SP, SP, -1 * framesize * wordSize); // Prolog! ++ ++ // this frame will be treated as the original caller method. ++ // So, the return pc should be filled with the original exception pc. ++ // ref: X86's implementation ++ __ sd(V1, SP, return_off *wordSize); // return address ++ __ sd(FP, SP, fp_off *wordSize); ++ ++ // Save callee saved registers. None for UseSSE=0, ++ // floats-only for UseSSE=1, and doubles for UseSSE=2. ++ ++ __ daddiu(FP, SP, fp_off * wordSize); ++ ++ // Store exception in Thread object. We cannot pass any arguments to the ++ // handle_exception call, since we do not want to make any assumption ++ // about the size of the frame where the exception happened in. ++ Register thread = TREG; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ __ sd(V0, Address(thread, JavaThread::exception_oop_offset())); ++ __ sd(V1, Address(thread, JavaThread::exception_pc_offset())); ++ ++ // This call does all the hard work. It checks if an exception handler ++ // exists in the method. ++ // If so, it returns the handler address. ++ // If not, it prepares for stack-unwinding, restoring the callee-save ++ // registers of the frame being removed. ++ __ set_last_Java_frame(thread, NOREG, NOREG, NULL); ++ ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ dins(SP, R0, 0, 4); // Fix stack alignment as required by ABI ++ ++ __ relocate(relocInfo::internal_pc_type); ++ ++ { ++ long save_pc = (long)__ pc() + 48; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ ++ __ move(A0, thread); ++ __ patchable_set48(T9, (long)OptoRuntime::handle_exception_C); ++ __ jalr(T9); ++ __ delayed()->nop(); ++ ++ // Set an oopmap for the call site ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap( framesize, 0 ); ++ ++ oop_maps->add_gc_map( __ offset(), map); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(thread, true); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog! ++ ++ // V0: exception handler ++ ++ // We have a handler in V0, (could be deopt blob) ++ __ move(T9, V0); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // Get the exception ++ __ ld(A0, Address(thread, JavaThread::exception_oop_offset())); ++ // Get the exception pc in case we are deoptimized ++ __ ld(A1, Address(thread, JavaThread::exception_pc_offset())); ++#ifdef ASSERT ++ __ sd(R0, Address(thread, JavaThread::exception_handler_pc_offset())); ++ __ sd(R0, Address(thread, JavaThread::exception_pc_offset())); ++#endif ++ // Clear the exception oop so GC no longer processes it as a root. ++ __ sd(R0, Address(thread, JavaThread::exception_oop_offset())); ++ ++ // Fix seg fault when running: ++ // Eclipse + Plugin + Debug As ++ // This is the only condition where C2 calls SharedRuntime::generate_deopt_blob() ++ // ++ __ move(V0, A0); ++ __ move(V1, A1); ++ ++ // V0: exception oop ++ // T9: exception handler ++ // A1: exception pc ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ // make sure all code is generated ++ masm->flush(); ++ ++ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize); ++} +diff --git a/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp b/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp +new file mode 100644 +index 00000000000..48cc424a54e +--- /dev/null ++++ b/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp +@@ -0,0 +1,3384 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/debugInfoRec.hpp" ++#include "code/icBuffer.hpp" ++#include "code/nativeInst.hpp" ++#include "code/vtableStubs.hpp" ++#include "compiler/oopMap.hpp" ++#include "interpreter/interpreter.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/klass.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/jniHandles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/vframeArray.hpp" ++#include "vmreg_mips.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++#include ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; ++ ++class RegisterSaver { ++ enum { FPU_regs_live = 32 }; ++ // Capture info about frame layout ++ enum layout { ++#define DEF_LAYOUT_OFFS(regname) regname ## _off, regname ## H_off, ++ DEF_LAYOUT_OFFS(for_16_bytes_aligned) ++ DEF_LAYOUT_OFFS(fpr0) ++ DEF_LAYOUT_OFFS(fpr1) ++ DEF_LAYOUT_OFFS(fpr2) ++ DEF_LAYOUT_OFFS(fpr3) ++ DEF_LAYOUT_OFFS(fpr4) ++ DEF_LAYOUT_OFFS(fpr5) ++ DEF_LAYOUT_OFFS(fpr6) ++ DEF_LAYOUT_OFFS(fpr7) ++ DEF_LAYOUT_OFFS(fpr8) ++ DEF_LAYOUT_OFFS(fpr9) ++ DEF_LAYOUT_OFFS(fpr10) ++ DEF_LAYOUT_OFFS(fpr11) ++ DEF_LAYOUT_OFFS(fpr12) ++ DEF_LAYOUT_OFFS(fpr13) ++ DEF_LAYOUT_OFFS(fpr14) ++ DEF_LAYOUT_OFFS(fpr15) ++ DEF_LAYOUT_OFFS(fpr16) ++ DEF_LAYOUT_OFFS(fpr17) ++ DEF_LAYOUT_OFFS(fpr18) ++ DEF_LAYOUT_OFFS(fpr19) ++ DEF_LAYOUT_OFFS(fpr20) ++ DEF_LAYOUT_OFFS(fpr21) ++ DEF_LAYOUT_OFFS(fpr22) ++ DEF_LAYOUT_OFFS(fpr23) ++ DEF_LAYOUT_OFFS(fpr24) ++ DEF_LAYOUT_OFFS(fpr25) ++ DEF_LAYOUT_OFFS(fpr26) ++ DEF_LAYOUT_OFFS(fpr27) ++ DEF_LAYOUT_OFFS(fpr28) ++ DEF_LAYOUT_OFFS(fpr29) ++ DEF_LAYOUT_OFFS(fpr30) ++ DEF_LAYOUT_OFFS(fpr31) ++ ++ DEF_LAYOUT_OFFS(v0) ++ DEF_LAYOUT_OFFS(v1) ++ DEF_LAYOUT_OFFS(a0) ++ DEF_LAYOUT_OFFS(a1) ++ DEF_LAYOUT_OFFS(a2) ++ DEF_LAYOUT_OFFS(a3) ++ DEF_LAYOUT_OFFS(a4) ++ DEF_LAYOUT_OFFS(a5) ++ DEF_LAYOUT_OFFS(a6) ++ DEF_LAYOUT_OFFS(a7) ++ DEF_LAYOUT_OFFS(t0) ++ DEF_LAYOUT_OFFS(t1) ++ DEF_LAYOUT_OFFS(t2) ++ DEF_LAYOUT_OFFS(t3) ++ DEF_LAYOUT_OFFS(s0) ++ DEF_LAYOUT_OFFS(s1) ++ DEF_LAYOUT_OFFS(s2) ++ DEF_LAYOUT_OFFS(s3) ++ DEF_LAYOUT_OFFS(s4) ++ DEF_LAYOUT_OFFS(s5) ++ DEF_LAYOUT_OFFS(s6) ++ DEF_LAYOUT_OFFS(s7) ++ DEF_LAYOUT_OFFS(t8) ++ DEF_LAYOUT_OFFS(t9) ++ ++ DEF_LAYOUT_OFFS(gp) ++ DEF_LAYOUT_OFFS(fp) ++ DEF_LAYOUT_OFFS(return) ++ reg_save_size ++ }; ++ ++ public: ++ ++ static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false ); ++ static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); ++ static int raOffset(void) { return return_off / 2; } ++ //Rmethod ++ static int methodOffset(void) { return s3_off / 2; } ++ ++ static int v0Offset(void) { return v0_off / 2; } ++ static int v1Offset(void) { return v1_off / 2; } ++ ++ static int fpResultOffset(void) { return fpr0_off / 2; } ++ ++ // During deoptimization only the result register need to be restored ++ // all the other values have already been extracted. ++ static void restore_result_registers(MacroAssembler* masm); ++}; ++ ++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) { ++ ++ // Always make the frame size 16-byte aligned ++ int frame_size_in_bytes = align_up(additional_frame_words*wordSize + ++ reg_save_size*BytesPerInt, 16); ++ // OopMap frame size is in compiler stack slots (jint's) not bytes or words ++ int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; ++ // The caller will allocate additional_frame_words ++ int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt; ++ // CodeBlob frame size is in words. ++ int frame_size_in_words = frame_size_in_bytes / wordSize; ++ *total_frame_words = frame_size_in_words; ++ ++ // save registers ++ ++ __ daddiu(SP, SP, - reg_save_size * jintSize); ++ ++ __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize); ++ __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize); ++ __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize); ++ __ sdc1(F6, SP, fpr6_off * jintSize); __ sdc1(F7, SP, fpr7_off * jintSize); ++ __ sdc1(F8, SP, fpr8_off * jintSize); __ sdc1(F9, SP, fpr9_off * jintSize); ++ __ sdc1(F10, SP, fpr10_off * jintSize); __ sdc1(F11, SP, fpr11_off * jintSize); ++ __ sdc1(F12, SP, fpr12_off * jintSize); __ sdc1(F13, SP, fpr13_off * jintSize); ++ __ sdc1(F14, SP, fpr14_off * jintSize); __ sdc1(F15, SP, fpr15_off * jintSize); ++ __ sdc1(F16, SP, fpr16_off * jintSize); __ sdc1(F17, SP, fpr17_off * jintSize); ++ __ sdc1(F18, SP, fpr18_off * jintSize); __ sdc1(F19, SP, fpr19_off * jintSize); ++ __ sdc1(F20, SP, fpr20_off * jintSize); __ sdc1(F21, SP, fpr21_off * jintSize); ++ __ sdc1(F22, SP, fpr22_off * jintSize); __ sdc1(F23, SP, fpr23_off * jintSize); ++ __ sdc1(F24, SP, fpr24_off * jintSize); __ sdc1(F25, SP, fpr25_off * jintSize); ++ __ sdc1(F26, SP, fpr26_off * jintSize); __ sdc1(F27, SP, fpr27_off * jintSize); ++ __ sdc1(F28, SP, fpr28_off * jintSize); __ sdc1(F29, SP, fpr29_off * jintSize); ++ __ sdc1(F30, SP, fpr30_off * jintSize); __ sdc1(F31, SP, fpr31_off * jintSize); ++ __ sd(V0, SP, v0_off * jintSize); __ sd(V1, SP, v1_off * jintSize); ++ __ sd(A0, SP, a0_off * jintSize); __ sd(A1, SP, a1_off * jintSize); ++ __ sd(A2, SP, a2_off * jintSize); __ sd(A3, SP, a3_off * jintSize); ++ __ sd(A4, SP, a4_off * jintSize); __ sd(A5, SP, a5_off * jintSize); ++ __ sd(A6, SP, a6_off * jintSize); __ sd(A7, SP, a7_off * jintSize); ++ __ sd(T0, SP, t0_off * jintSize); ++ __ sd(T1, SP, t1_off * jintSize); ++ __ sd(T2, SP, t2_off * jintSize); ++ __ sd(T3, SP, t3_off * jintSize); ++ __ sd(S0, SP, s0_off * jintSize); ++ __ sd(S1, SP, s1_off * jintSize); ++ __ sd(S2, SP, s2_off * jintSize); ++ __ sd(S3, SP, s3_off * jintSize); ++ __ sd(S4, SP, s4_off * jintSize); ++ __ sd(S5, SP, s5_off * jintSize); ++ __ sd(S6, SP, s6_off * jintSize); ++ __ sd(S7, SP, s7_off * jintSize); ++ ++ __ sd(T8, SP, t8_off * jintSize); ++ __ sd(T9, SP, t9_off * jintSize); ++ ++ __ sd(GP, SP, gp_off * jintSize); ++ __ sd(FP, SP, fp_off * jintSize); ++ __ sd(RA, SP, return_off * jintSize); ++ __ daddiu(FP, SP, fp_off * jintSize); ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ //OopMap* map = new OopMap( frame_words, 0 ); ++ OopMap* map = new OopMap( frame_size_in_slots, 0 ); ++ ++ ++//#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words) ++#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) ++ map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg()); ++ ++ map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg()); ++ ++#undef STACK_OFFSET ++ return map; ++} ++ ++ ++// Pop the current frame and restore all the registers that we ++// saved. ++void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { ++ __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize); ++ __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize); ++ __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize); ++ __ ldc1(F6, SP, fpr6_off * jintSize); __ ldc1(F7, SP, fpr7_off * jintSize); ++ __ ldc1(F8, SP, fpr8_off * jintSize); __ ldc1(F9, SP, fpr9_off * jintSize); ++ __ ldc1(F10, SP, fpr10_off * jintSize); __ ldc1(F11, SP, fpr11_off * jintSize); ++ __ ldc1(F12, SP, fpr12_off * jintSize); __ ldc1(F13, SP, fpr13_off * jintSize); ++ __ ldc1(F14, SP, fpr14_off * jintSize); __ ldc1(F15, SP, fpr15_off * jintSize); ++ __ ldc1(F16, SP, fpr16_off * jintSize); __ ldc1(F17, SP, fpr17_off * jintSize); ++ __ ldc1(F18, SP, fpr18_off * jintSize); __ ldc1(F19, SP, fpr19_off * jintSize); ++ __ ldc1(F20, SP, fpr20_off * jintSize); __ ldc1(F21, SP, fpr21_off * jintSize); ++ __ ldc1(F22, SP, fpr22_off * jintSize); __ ldc1(F23, SP, fpr23_off * jintSize); ++ __ ldc1(F24, SP, fpr24_off * jintSize); __ ldc1(F25, SP, fpr25_off * jintSize); ++ __ ldc1(F26, SP, fpr26_off * jintSize); __ ldc1(F27, SP, fpr27_off * jintSize); ++ __ ldc1(F28, SP, fpr28_off * jintSize); __ ldc1(F29, SP, fpr29_off * jintSize); ++ __ ldc1(F30, SP, fpr30_off * jintSize); __ ldc1(F31, SP, fpr31_off * jintSize); ++ ++ __ ld(V0, SP, v0_off * jintSize); __ ld(V1, SP, v1_off * jintSize); ++ __ ld(A0, SP, a0_off * jintSize); __ ld(A1, SP, a1_off * jintSize); ++ __ ld(A2, SP, a2_off * jintSize); __ ld(A3, SP, a3_off * jintSize); ++ __ ld(A4, SP, a4_off * jintSize); __ ld(A5, SP, a5_off * jintSize); ++ __ ld(A6, SP, a6_off * jintSize); __ ld(A7, SP, a7_off * jintSize); ++ __ ld(T0, SP, t0_off * jintSize); ++ __ ld(T1, SP, t1_off * jintSize); ++ __ ld(T2, SP, t2_off * jintSize); ++ __ ld(T3, SP, t3_off * jintSize); ++ __ ld(S0, SP, s0_off * jintSize); ++ __ ld(S1, SP, s1_off * jintSize); ++ __ ld(S2, SP, s2_off * jintSize); ++ __ ld(S3, SP, s3_off * jintSize); ++ __ ld(S4, SP, s4_off * jintSize); ++ __ ld(S5, SP, s5_off * jintSize); ++ __ ld(S6, SP, s6_off * jintSize); ++ __ ld(S7, SP, s7_off * jintSize); ++ ++ __ ld(T8, SP, t8_off * jintSize); ++ __ ld(T9, SP, t9_off * jintSize); ++ ++ __ ld(GP, SP, gp_off * jintSize); ++ __ ld(FP, SP, fp_off * jintSize); ++ __ ld(RA, SP, return_off * jintSize); ++ ++ __ addiu(SP, SP, reg_save_size * jintSize); ++} ++ ++// Pop the current frame and restore the registers that might be holding ++// a result. ++void RegisterSaver::restore_result_registers(MacroAssembler* masm) { ++ ++ // Just restore result register. Only used by deoptimization. By ++ // now any callee save register that needs to be restore to a c2 ++ // caller of the deoptee has been extracted into the vframeArray ++ // and will be stuffed into the c2i adapter we create for later ++ // restoration so only result registers need to be restored here. ++ ++ __ ld(V0, SP, v0_off * jintSize); ++ __ ld(V1, SP, v1_off * jintSize); ++ __ ldc1(F0, SP, fpr0_off * jintSize); ++ __ ldc1(F1, SP, fpr1_off * jintSize); ++ __ addiu(SP, SP, return_off * jintSize); ++} ++ ++// Is vector's size (in bytes) bigger than a size saved by default? ++// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. ++bool SharedRuntime::is_wide_vector(int size) { ++ return size > 16; ++} ++ ++// The java_calling_convention describes stack locations as ideal slots on ++// a frame with no abi restrictions. Since we must observe abi restrictions ++// (like the placement of the register window) the slots must be biased by ++// the following value. ++ ++static int reg2offset_in(VMReg r) { ++ // Account for saved fp and return address ++ // This should really be in_preserve_stack_slots ++ return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size); ++} ++ ++static int reg2offset_out(VMReg r) { ++ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; ++} ++ ++// --------------------------------------------------------------------------- ++// Read the array of BasicTypes from a signature, and compute where the ++// arguments should go. Values in the VMRegPair regs array refer to 4-byte ++// quantities. Values less than SharedInfo::stack0 are registers, those above ++// refer to 4-byte stack slots. All stack slots are based off of the stack pointer ++// as framesizes are fixed. ++// VMRegImpl::stack0 refers to the first slot 0(sp). ++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register ++// up to RegisterImpl::number_of_registers) are the 32-bit ++// integer registers. ++ ++// Pass first five oop/int args in registers T0, A0 - A3. ++// Pass float/double/long args in stack. ++// Doubles have precedence, so if you pass a mix of floats and doubles ++// the doubles will grab the registers before the floats will. ++ ++// Note: the INPUTS in sig_bt are in units of Java argument words, which are ++// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit ++// units regardless of build. ++ ++ ++// --------------------------------------------------------------------------- ++// The compiled Java calling convention. ++// Pass first five oop/int args in registers T0, A0 - A3. ++// Pass float/double/long args in stack. ++// Doubles have precedence, so if you pass a mix of floats and doubles ++// the doubles will grab the registers before the floats will. ++ ++int SharedRuntime::java_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ int total_args_passed) { ++ ++ // Create the mapping between argument positions and registers. ++ static const Register INT_ArgReg[Argument::n_register_parameters] = { ++ T0, A0, A1, A2, A3, A4, A5, A6 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { ++ F12, F13, F14, F15, F16, F17, F18, F19 ++ }; ++ ++ uint args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_VOID: ++ // halves of T_LONG or T_DOUBLE ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_BOOLEAN: ++ case T_CHAR: ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set1(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ // fall through ++ case T_OBJECT: ++ case T_ARRAY: ++ case T_ADDRESS: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set2(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set1(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set2(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++ ++ return align_up(stk_args, 2); ++} ++ ++// Patch the callers callsite with entry to compiled code if it exists. ++static void patch_callers_callsite(MacroAssembler *masm) { ++ Label L; ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ // Schedule the branch target address early. ++ // Call into the VM to patch the caller, then jump to compiled callee ++ // V0 isn't live so capture return address while we easily can ++ __ move(V0, RA); ++ ++ __ pushad(); ++#ifdef COMPILER2 ++ // C2 may leave the stack dirty if not in SSE2+ mode ++ __ empty_FPU_stack(); ++#endif ++ ++ // VM needs caller's callsite ++ // VM needs target method ++ ++ __ move(A0, Rmethod); ++ __ move(A1, V0); ++ // we should preserve the return address ++ __ move(TSR, SP); ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ dins(SP, R0, 0, 4); // align the stack ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), ++ relocInfo::runtime_call_type); ++ ++ __ delayed()->nop(); ++ __ move(SP, TSR); ++ __ popad(); ++ __ bind(L); ++} ++ ++static void gen_c2i_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ Label& skip_fixup) { ++ ++ // Before we get into the guts of the C2I adapter, see if we should be here ++ // at all. We've come from compiled code and are attempting to jump to the ++ // interpreter, which means the caller made a static call to get here ++ // (vcalls always get a compiled target if there is one). Check for a ++ // compiled target. If there is one, we need to patch the caller's call. ++ // However we will run interpreted if we come thru here. The next pass ++ // thru the call site will run compiled. If we ran compiled here then ++ // we can (theorectically) do endless i2c->c2i->i2c transitions during ++ // deopt/uncommon trap cycles. If we always go interpreted here then ++ // we can have at most one and don't need to play any tricks to keep ++ // from endlessly growing the stack. ++ // ++ // Actually if we detected that we had an i2c->c2i transition here we ++ // ought to be able to reset the world back to the state of the interpreted ++ // call and not bother building another interpreter arg area. We don't ++ // do that at this point. ++ ++ patch_callers_callsite(masm); ++ __ bind(skip_fixup); ++ ++#ifdef COMPILER2 ++ __ empty_FPU_stack(); ++#endif ++ //this is for native ? ++ // Since all args are passed on the stack, total_args_passed * interpreter_ ++ // stack_element_size is the ++ // space we need. ++ int extraspace = total_args_passed * Interpreter::stackElementSize; ++ ++ // stack is aligned, keep it that way ++ extraspace = align_up(extraspace, 2*wordSize); ++ ++ // Get return address ++ __ move(V0, RA); ++ // set senderSP value ++ //refer to interpreter_mips.cpp:generate_asm_entry ++ __ move(Rsender, SP); ++ __ addiu(SP, SP, -extraspace); ++ ++ // Now write the args into the outgoing interpreter space ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // st_off points to lowest address on stack. ++ int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize; ++ // Say 4 args: ++ // i st_off ++ // 0 12 T_LONG ++ // 1 8 T_VOID ++ // 2 4 T_OBJECT ++ // 3 0 T_BOOL ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // memory to memory use fpu stack top ++ int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; ++ if (!r_2->is_valid()) { ++ __ ld_ptr(AT, SP, ld_off); ++ __ st_ptr(AT, SP, st_off); ++ ++ } else { ++ ++ ++ int next_off = st_off - Interpreter::stackElementSize; ++ __ ld_ptr(AT, SP, ld_off); ++ __ st_ptr(AT, SP, st_off); ++ ++ // Ref to is_Register condition ++ if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ++ __ st_ptr(AT, SP, st_off - 8); ++ } ++ } else if (r_1->is_Register()) { ++ Register r = r_1->as_Register(); ++ if (!r_2->is_valid()) { ++ __ sd(r, SP, st_off); ++ } else { ++ //FIXME, mips will not enter here ++ // long/double in gpr ++ __ sd(r, SP, st_off); ++ // In [java/util/zip/ZipFile.java] ++ // ++ // private static native long open(String name, int mode, long lastModified); ++ // private static native int getTotal(long jzfile); ++ // ++ // We need to transfer T_LONG paramenters from a compiled method to a native method. ++ // It's a complex process: ++ // ++ // Caller -> lir_static_call -> gen_resolve_stub ++ // -> -- resolve_static_call_C ++ // `- gen_c2i_adapter() [*] ++ // | ++ // `- AdapterHandlerLibrary::get_create_apapter_index ++ // -> generate_native_entry ++ // -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**] ++ // ++ // In [**], T_Long parameter is stored in stack as: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // However, the sequence is reversed here: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry(). ++ // ++ if (sig_bt[i] == T_LONG) ++ __ sd(r, SP, st_off - 8); ++ } ++ } else if (r_1->is_FloatRegister()) { ++ assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); ++ ++ FloatRegister fr = r_1->as_FloatRegister(); ++ if (sig_bt[i] == T_FLOAT) ++ __ swc1(fr, SP, st_off); ++ else { ++ __ sdc1(fr, SP, st_off); ++ __ sdc1(fr, SP, st_off - 8); // T_DOUBLE needs two slots ++ } ++ } ++ } ++ ++ // Schedule the branch target address early. ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) ); ++ // And repush original return address ++ __ move(RA, V0); ++ __ jr (AT); ++ __ delayed()->nop(); ++} ++ ++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs) { ++ ++ // Generate an I2C adapter: adjust the I-frame to make space for the C-frame ++ // layout. Lesp was saved by the calling I-frame and will be restored on ++ // return. Meanwhile, outgoing arg space is all owned by the callee ++ // C-frame, so we can mangle it at will. After adjusting the frame size, ++ // hoist register arguments and repack other args according to the compiled ++ // code convention. Finally, end in a jump to the compiled code. The entry ++ // point address is the start of the buffer. ++ ++ // We will only enter here from an interpreted frame and never from after ++ // passing thru a c2i. Azul allowed this but we do not. If we lose the ++ // race and use a c2i we will remain interpreted for the race loser(s). ++ // This removes all sorts of headaches on the mips side and also eliminates ++ // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. ++ ++ ++ __ move(T9, SP); ++ ++ // Cut-out for having no stack args. Since up to 2 int/oop args are passed ++ // in registers, we will occasionally have no stack args. ++ int comp_words_on_stack = 0; ++ if (comp_args_on_stack) { ++ // Sig words on the stack are greater-than VMRegImpl::stack0. Those in ++ // registers are below. By subtracting stack0, we either get a negative ++ // number (all values in registers) or the maximum stack slot accessed. ++ // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg); ++ // Convert 4-byte stack slots to words. ++ comp_words_on_stack = align_up(comp_args_on_stack*4, wordSize)>>LogBytesPerWord; ++ // Round up to miminum stack alignment, in wordSize ++ comp_words_on_stack = align_up(comp_words_on_stack, 2); ++ __ daddiu(SP, SP, -comp_words_on_stack * wordSize); ++ } ++ ++ // Align the outgoing SP ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ dins(SP, R0, 0, 4); ++ // push the return address on the stack (note that pushing, rather ++ // than storing it, yields the correct frame alignment for the callee) ++ // Put saved SP in another register ++ const Register saved_sp = V0; ++ __ move(saved_sp, T9); ++ ++ ++ // Will jump to the compiled code just as if compiled code was doing it. ++ // Pre-load the register-jump target early, to schedule it better. ++ __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset())); ++ ++ // Now generate the shuffle code. Pick up all register args and move the ++ // rest through the floating point stack top. ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ // Longs and doubles are passed in native word order, but misaligned ++ // in the 32-bit build. ++ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // Pick up 0, 1 or 2 words from SP+offset. ++ ++ //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); ++ // Load in argument order going down. ++ int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize; ++ // Point to interpreter value (vs. tag) ++ int next_off = ld_off - Interpreter::stackElementSize; ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // Convert stack slot to an SP offset (+ wordSize to ++ // account for return address ) ++ // NOTICE HERE!!!! I sub a wordSize here ++ int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; ++ //+ wordSize; ++ ++ if (!r_2->is_valid()) { ++ __ ld(AT, saved_sp, ld_off); ++ __ sd(AT, SP, st_off); ++ } else { ++ // Interpreter local[n] == MSW, local[n+1] == LSW however locals ++ // are accessed as negative so LSW is at LOW address ++ ++ // ld_off is MSW so get LSW ++ // st_off is LSW (i.e. reg.first()) ++ ++ // [./org/eclipse/swt/graphics/GC.java] ++ // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, ++ // int destX, int destY, int destWidth, int destHeight, ++ // boolean simple, ++ // int imgWidth, int imgHeight, ++ // long maskPixmap, <-- Pass T_LONG in stack ++ // int maskType); ++ // Before this modification, Eclipse displays icons with solid black background. ++ // ++ __ ld(AT, saved_sp, ld_off); ++ if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ++ __ ld(AT, saved_sp, ld_off - 8); ++ __ sd(AT, SP, st_off); ++ } ++ } else if (r_1->is_Register()) { // Register argument ++ Register r = r_1->as_Register(); ++ if (r_2->is_valid()) { ++ // Remember r_1 is low address (and LSB on mips) ++ // So r_2 gets loaded from high address regardless of the platform ++ assert(r_2->as_Register() == r_1->as_Register(), ""); ++ __ ld(r, saved_sp, ld_off); ++ ++ // ++ // For T_LONG type, the real layout is as below: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // We should load the low-8 bytes. ++ // ++ if (sig_bt[i] == T_LONG) ++ __ ld(r, saved_sp, ld_off - 8); ++ } else { ++ __ lw(r, saved_sp, ld_off); ++ } ++ } else if (r_1->is_FloatRegister()) { // Float Register ++ assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); ++ ++ FloatRegister fr = r_1->as_FloatRegister(); ++ if (sig_bt[i] == T_FLOAT) ++ __ lwc1(fr, saved_sp, ld_off); ++ else { ++ __ ldc1(fr, saved_sp, ld_off); ++ __ ldc1(fr, saved_sp, ld_off - 8); ++ } ++ } ++ } ++ ++ // 6243940 We might end up in handle_wrong_method if ++ // the callee is deoptimized as we race thru here. If that ++ // happens we don't want to take a safepoint because the ++ // caller frame will look interpreted and arguments are now ++ // "compiled" so it is much better to make this transition ++ // invisible to the stack walking code. Unfortunately if ++ // we try and find the callee by normal means a safepoint ++ // is possible. So we stash the desired callee in the thread ++ // and the vm will find there should this case occur. ++#ifndef OPT_THREAD ++ Register thread = T8; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ __ sd(Rmethod, thread, in_bytes(JavaThread::callee_target_offset())); ++ ++ // move Method* to V0 in case we end up in an c2i adapter. ++ // the c2i adapters expect Method* in V0 (c2) because c2's ++ // resolve stubs return the result (the method) in V0. ++ // I'd love to fix this. ++ __ move(V0, Rmethod); ++ __ jr(T9); ++ __ delayed()->nop(); ++} ++ ++// --------------------------------------------------------------- ++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ AdapterFingerPrint* fingerprint) { ++ address i2c_entry = __ pc(); ++ ++ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); ++ ++ // ------------------------------------------------------------------------- ++ // Generate a C2I adapter. On entry we know G5 holds the Method*. The ++ // args start out packed in the compiled layout. They need to be unpacked ++ // into the interpreter layout. This will almost always require some stack ++ // space. We grow the current (compiled) stack, then repack the args. We ++ // finally end in a jump to the generic interpreter entry point. On exit ++ // from the interpreter, the interpreter will restore our SP (lest the ++ // compiled code, which relys solely on SP and not FP, get sick). ++ ++ address c2i_unverified_entry = __ pc(); ++ Label skip_fixup; ++ { ++ Register holder = T1; ++ Register receiver = T0; ++ Register temp = T8; ++ address ic_miss = SharedRuntime::get_ic_miss_stub(); ++ ++ Label missed; ++ ++ //add for compressedoops ++ __ load_klass(temp, receiver); ++ ++ __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); ++ __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset()); ++ __ bne(AT, temp, missed); ++ __ delayed()->nop(); ++ // Method might have been compiled since the call site was patched to ++ // interpreted if that is the case treat it as a miss so we can get ++ // the call site corrected. ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); ++ __ beq(AT, R0, skip_fixup); ++ __ delayed()->nop(); ++ __ bind(missed); ++ ++ __ jmp(ic_miss, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } ++ ++ address c2i_entry = __ pc(); ++ ++ // Class initialization barrier for static methods ++ address c2i_no_clinit_check_entry = NULL; ++ if (VM_Version::supports_fast_class_init_checks()) { ++ Label L_skip_barrier; ++ address handle_wrong_method = SharedRuntime::get_handle_wrong_method_stub(); ++ ++ { // Bypass the barrier for non-static methods ++ __ lw(AT, Address(Rmethod, Method::access_flags_offset())); ++ __ andi(AT, AT, JVM_ACC_STATIC); ++ __ beq(AT, R0, L_skip_barrier); // non-static ++ __ delayed()->nop(); ++ } ++ ++ __ load_method_holder(T9, Rmethod); ++ __ clinit_barrier(T9, AT, &L_skip_barrier); ++ __ jmp(handle_wrong_method, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ ++ __ bind(L_skip_barrier); ++ c2i_no_clinit_check_entry = __ pc(); ++ } ++ ++ gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); ++ ++ __ flush(); ++ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); ++} ++ ++int SharedRuntime::vector_calling_convention(VMRegPair *regs, ++ uint num_bits, ++ uint total_args_passed) { ++ Unimplemented(); ++ return 0; ++} ++ ++int SharedRuntime::c_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ VMRegPair *regs2, ++ int total_args_passed) { ++ assert(regs2 == NULL, "not needed on MIPS"); ++ // Return the number of VMReg stack_slots needed for the args. ++ // This value does not include an abi space (like register window ++ // save area). ++ ++ // We return the amount of VMReg stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. Since we always ++ // have space for storing at least 6 registers to memory we start with that. ++ // See int_stk_helper for a further discussion. ++ // We return the amount of VMRegImpl stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. ++ static const Register INT_ArgReg[Argument::n_register_parameters] = { ++ A0, A1, A2, A3, A4, A5, A6, A7 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { ++ F12, F13, F14, F15, F16, F17, F18, F19 ++ }; ++ uint args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++// Example: ++// n java.lang.UNIXProcess::forkAndExec ++// private native int forkAndExec(byte[] prog, ++// byte[] argBlock, int argc, ++// byte[] envBlock, int envc, ++// byte[] dir, ++// boolean redirectErrorStream, ++// FileDescriptor stdin_fd, ++// FileDescriptor stdout_fd, ++// FileDescriptor stderr_fd) ++// JNIEXPORT jint JNICALL ++// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env, ++// jobject process, ++// jbyteArray prog, ++// jbyteArray argBlock, jint argc, ++// jbyteArray envBlock, jint envc, ++// jbyteArray dir, ++// jboolean redirectErrorStream, ++// jobject stdin_fd, ++// jobject stdout_fd, ++// jobject stderr_fd) ++// ++// ::c_calling_convention ++// 0: // env <-- a0 ++// 1: L // klass/obj <-- t0 => a1 ++// 2: [ // prog[] <-- a0 => a2 ++// 3: [ // argBlock[] <-- a1 => a3 ++// 4: I // argc <-- a2 => a4 ++// 5: [ // envBlock[] <-- a3 => a5 ++// 6: I // envc <-- a4 => a5 ++// 7: [ // dir[] <-- a5 => a7 ++// 8: Z // redirectErrorStream <-- a6 => sp[0] ++// 9: L // stdin fp[16] => sp[8] ++// 10: L // stdout fp[24] => sp[16] ++// 11: L // stderr fp[32] => sp[24] ++// ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_VOID: // Halves of longs and doubles ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_BOOLEAN: ++ case T_CHAR: ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set1(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ // fall through ++ case T_OBJECT: ++ case T_ARRAY: ++ case T_ADDRESS: ++ case T_METADATA: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set2(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set1(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set2(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++ ++ return align_up(stk_args, 2); ++} ++ ++// --------------------------------------------------------------------------- ++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ swc1(FSF, FP, -wordSize); ++ break; ++ case T_DOUBLE: ++ __ sdc1(FSF, FP, -wordSize ); ++ break; ++ case T_VOID: break; ++ case T_LONG: ++ __ sd(V0, FP, -wordSize); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ __ sd(V0, FP, -wordSize); ++ break; ++ default: { ++ __ sw(V0, FP, -wordSize); ++ } ++ } ++} ++ ++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ lwc1(FSF, FP, -wordSize); ++ break; ++ case T_DOUBLE: ++ __ ldc1(FSF, FP, -wordSize ); ++ break; ++ case T_LONG: ++ __ ld(V0, FP, -wordSize); ++ break; ++ case T_VOID: break; ++ case T_OBJECT: ++ case T_ARRAY: ++ __ ld(V0, FP, -wordSize); ++ break; ++ default: { ++ __ lw(V0, FP, -wordSize); ++ } ++ } ++} ++ ++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ for ( int i = first_arg ; i < arg_count ; i++ ) { ++ if (args[i].first()->is_Register()) { ++ __ push(args[i].first()->as_Register()); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ push(args[i].first()->as_FloatRegister()); ++ } ++ } ++} ++ ++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { ++ if (args[i].first()->is_Register()) { ++ __ pop(args[i].first()->as_Register()); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ pop(args[i].first()->as_FloatRegister()); ++ } ++ } ++} ++ ++// A simple move of integer like type ++static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ // stack to stack ++ __ lw(AT, FP, reg2offset_in(src.first())); ++ __ sd(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ // stack to reg ++ __ lw(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else if (dst.first()->is_stack()) { ++ // reg to stack ++ __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first())); ++ } else { ++ if (dst.first() != src.first()){ ++ __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first() ++ } ++ } ++} ++ ++// An oop arg. Must pass a handle not the oop itself ++static void object_move(MacroAssembler* masm, ++ OopMap* map, ++ int oop_handle_offset, ++ int framesize_in_slots, ++ VMRegPair src, ++ VMRegPair dst, ++ bool is_receiver, ++ int* receiver_offset) { ++ ++ // must pass a handle. First figure out the location we use as a handle ++ ++ //FIXME, for mips, dst can be register ++ if (src.first()->is_stack()) { ++ // Oop is already on the stack as an argument ++ Register rHandle = V0; ++ Label nil; ++ __ xorr(rHandle, rHandle, rHandle); ++ __ ld(AT, FP, reg2offset_in(src.first())); ++ __ beq(AT, R0, nil); ++ __ delayed()->nop(); ++ __ lea(rHandle, Address(FP, reg2offset_in(src.first()))); ++ __ bind(nil); ++ if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); ++ else __ move( (dst.first())->as_Register(), rHandle); ++ //if dst is register ++ //FIXME, do mips need out preserve stack slots? ++ int offset_in_older_frame = src.first()->reg2stack() ++ + SharedRuntime::out_preserve_stack_slots(); ++ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); ++ if (is_receiver) { ++ *receiver_offset = (offset_in_older_frame ++ + framesize_in_slots) * VMRegImpl::stack_slot_size; ++ } ++ } else { ++ // Oop is in an a register we must store it to the space we reserve ++ // on the stack for oop_handles ++ const Register rOop = src.first()->as_Register(); ++ assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register"); ++ const Register rHandle = V0; ++ //Important: refer to java_calling_convertion ++ int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset; ++ int offset = oop_slot*VMRegImpl::stack_slot_size; ++ Label skip; ++ __ sd( rOop , SP, offset ); ++ map->set_oop(VMRegImpl::stack2reg(oop_slot)); ++ __ xorr( rHandle, rHandle, rHandle); ++ __ beq(rOop, R0, skip); ++ __ delayed()->nop(); ++ __ lea(rHandle, Address(SP, offset)); ++ __ bind(skip); ++ // Store the handle parameter ++ if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); ++ else __ move((dst.first())->as_Register(), rHandle); ++ //if dst is register ++ ++ if (is_receiver) { ++ *receiver_offset = offset; ++ } ++ } ++} ++ ++// A float arg may have to do float reg int reg conversion ++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); ++ ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ __ lw(AT, FP, reg2offset_in(src.first())); ++ __ sw(AT, SP, reg2offset_out(dst.first())); ++ } ++ else ++ __ lwc1(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); ++ } else { ++ // reg to stack ++ if(dst.first()->is_stack()) ++ __ swc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); ++ else ++ __ mov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } ++} ++ ++// A long move ++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ ++ // The only legal possibility for a long_move VMRegPair is: ++ // 1: two stack slots (possibly unaligned) ++ // as neither the java or C calling convention will use registers ++ // for longs. ++ ++ if (src.first()->is_stack()) { ++ assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack"); ++ if( dst.first()->is_stack()){ ++ __ ld(AT, FP, reg2offset_in(src.first())); ++ __ sd(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first())); ++ } ++ } else { ++ if( dst.first()->is_stack()){ ++ __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first())); ++ } else { ++ __ move( (dst.first())->as_Register() , (src.first())->as_Register()); ++ } ++ } ++} ++ ++// A double move ++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ ++ // The only legal possibilities for a double_move VMRegPair are: ++ // The painful thing here is that like long_move a VMRegPair might be ++ ++ // Because of the calling convention we know that src is either ++ // 1: a single physical register (xmm registers only) ++ // 2: two stack slots (possibly unaligned) ++ // dst can only be a pair of stack slots. ++ ++ ++ if (src.first()->is_stack()) { ++ // source is all stack ++ if( dst.first()->is_stack()){ ++ __ ld(AT, FP, reg2offset_in(src.first())); ++ __ sd(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first())); ++ } ++ ++ } else { ++ // reg to stack ++ // No worries about stack alignment ++ if( dst.first()->is_stack()){ ++ __ sdc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); ++ } ++ else ++ __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ ++ } ++} ++ ++static void verify_oop_args(MacroAssembler* masm, ++ methodHandle method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ Register temp_reg = T9; // not part of any compiled calling seq ++ if (VerifyOops) { ++ for (int i = 0; i < method->size_of_parameters(); i++) { ++ if (sig_bt[i] == T_OBJECT || ++ sig_bt[i] == T_ARRAY) { ++ VMReg r = regs[i].first(); ++ assert(r->is_valid(), "bad oop arg"); ++ if (r->is_stack()) { ++ __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); ++ __ verify_oop(temp_reg); ++ } else { ++ __ verify_oop(r->as_Register()); ++ } ++ } ++ } ++ } ++} ++ ++static void gen_special_dispatch(MacroAssembler* masm, ++ methodHandle method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ verify_oop_args(masm, method, sig_bt, regs); ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ ++ // Now write the args into the outgoing interpreter space ++ bool has_receiver = false; ++ Register receiver_reg = noreg; ++ int member_arg_pos = -1; ++ Register member_reg = noreg; ++ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); ++ if (ref_kind != 0) { ++ member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument ++ member_reg = S3; // known to be free at this point ++ has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); ++ } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { ++ has_receiver = true; ++ } else { ++ fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); ++ } ++ ++ if (member_reg != noreg) { ++ // Load the member_arg into register, if necessary. ++ SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); ++ VMReg r = regs[member_arg_pos].first(); ++ if (r->is_stack()) { ++ __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ member_reg = r->as_Register(); ++ } ++ } ++ ++ if (has_receiver) { ++ // Make sure the receiver is loaded into a register. ++ assert(method->size_of_parameters() > 0, "oob"); ++ assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); ++ VMReg r = regs[0].first(); ++ assert(r->is_valid(), "bad receiver arg"); ++ if (r->is_stack()) { ++ // Porting note: This assumes that compiled calling conventions always ++ // pass the receiver oop in a register. If this is not true on some ++ // platform, pick a temp and load the receiver from stack. ++ fatal("receiver always in a register"); ++ receiver_reg = SSR; // known to be free at this point ++ __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ receiver_reg = r->as_Register(); ++ } ++ } ++ ++ // Figure out which address we are really jumping to: ++ MethodHandles::generate_method_handle_dispatch(masm, iid, ++ receiver_reg, member_reg, /*for_compiler_entry:*/ true); ++} ++ ++// --------------------------------------------------------------------------- ++// Generate a native wrapper for a given method. The method takes arguments ++// in the Java compiled code convention, marshals them to the native ++// convention (handlizes oops, etc), transitions to native, makes the call, ++// returns to java state (possibly blocking), unhandlizes any result and ++// returns. ++nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, ++ const methodHandle& method, ++ int compile_id, ++ BasicType* in_sig_bt, ++ VMRegPair* in_regs, ++ BasicType ret_type, ++ address critical_entry) { ++ if (method->is_method_handle_intrinsic()) { ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ intptr_t start = (intptr_t)__ pc(); ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ // Make enough room for patch_verified_entry ++ __ nop(); ++ __ nop(); ++ gen_special_dispatch(masm, ++ method, ++ in_sig_bt, ++ in_regs); ++ int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period ++ __ flush(); ++ int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually ++ return nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ in_ByteSize(-1), ++ in_ByteSize(-1), ++ (OopMapSet*)NULL); ++ } ++ bool is_critical_native = true; ++ address native_func = critical_entry; ++ if (native_func == NULL) { ++ native_func = method->native_function(); ++ is_critical_native = false; ++ } ++ assert(native_func != NULL, "must have function"); ++ ++ // Native nmethod wrappers never take possesion of the oop arguments. ++ // So the caller will gc the arguments. The only thing we need an ++ // oopMap for is if the call is static ++ // ++ // An OopMap for lock (and class if static), and one for the VM call itself ++ OopMapSet *oop_maps = new OopMapSet(); ++ ++ // We have received a description of where all the java arg are located ++ // on entry to the wrapper. We need to convert these args to where ++ // the jni function will expect them. To figure out where they go ++ // we convert the java signature to a C signature by inserting ++ // the hidden arguments as arg[0] and possibly arg[1] (static method) ++ ++ const int total_in_args = method->size_of_parameters(); ++ int total_c_args = total_in_args; ++ if (!is_critical_native) { ++ total_c_args += 1; ++ if (method->is_static()) { ++ total_c_args++; ++ } ++ } else { ++ for (int i = 0; i < total_in_args; i++) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ total_c_args++; ++ } ++ } ++ } ++ ++ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); ++ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); ++ BasicType* in_elem_bt = NULL; ++ ++ int argc = 0; ++ if (!is_critical_native) { ++ out_sig_bt[argc++] = T_ADDRESS; ++ if (method->is_static()) { ++ out_sig_bt[argc++] = T_OBJECT; ++ } ++ ++ for (int i = 0; i < total_in_args ; i++ ) { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ } ++ } else { ++ in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); ++ SignatureStream ss(method->signature()); ++ for (int i = 0; i < total_in_args ; i++ ) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ // Arrays are passed as int, elem* pair ++ out_sig_bt[argc++] = T_INT; ++ out_sig_bt[argc++] = T_ADDRESS; ++ Symbol* atype = ss.as_symbol(); ++ const char* at = atype->as_C_string(); ++ if (strlen(at) == 2) { ++ assert(at[0] == '[', "must be"); ++ switch (at[1]) { ++ case 'B': in_elem_bt[i] = T_BYTE; break; ++ case 'C': in_elem_bt[i] = T_CHAR; break; ++ case 'D': in_elem_bt[i] = T_DOUBLE; break; ++ case 'F': in_elem_bt[i] = T_FLOAT; break; ++ case 'I': in_elem_bt[i] = T_INT; break; ++ case 'J': in_elem_bt[i] = T_LONG; break; ++ case 'S': in_elem_bt[i] = T_SHORT; break; ++ case 'Z': in_elem_bt[i] = T_BOOLEAN; break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ } else { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ in_elem_bt[i] = T_VOID; ++ } ++ if (in_sig_bt[i] != T_VOID) { ++ assert(in_sig_bt[i] == ss.type(), "must match"); ++ ss.next(); ++ } ++ } ++ } ++ ++ // Now figure out where the args must be stored and how much stack space ++ // they require (neglecting out_preserve_stack_slots but space for storing ++ // the 1st six register arguments). It's weird see int_stk_helper. ++ // ++ int out_arg_slots; ++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); ++ ++ // Compute framesize for the wrapper. We need to handlize all oops in ++ // registers. We must create space for them here that is disjoint from ++ // the windowed save area because we have no control over when we might ++ // flush the window again and overwrite values that gc has since modified. ++ // (The live window race) ++ // ++ // We always just allocate 6 word for storing down these object. This allow ++ // us to simply record the base and use the Ireg number to decide which ++ // slot to use. (Note that the reg number is the inbound number not the ++ // outbound number). ++ // We must shuffle args to match the native convention, and include var-args space. ++ ++ // Calculate the total number of stack slots we will need. ++ ++ // First count the abi requirement plus all of the outgoing args ++ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; ++ ++ // Now the space for the inbound oop handle area ++ int total_save_slots = 9 * VMRegImpl::slots_per_word; // 9 arguments passed in registers ++ if (is_critical_native) { ++ // Critical natives may have to call out so they need a save area ++ // for register arguments. ++ int double_slots = 0; ++ int single_slots = 0; ++ for ( int i = 0; i < total_in_args; i++) { ++ if (in_regs[i].first()->is_Register()) { ++ const Register reg = in_regs[i].first()->as_Register(); ++ switch (in_sig_bt[i]) { ++ case T_BOOLEAN: ++ case T_BYTE: ++ case T_SHORT: ++ case T_CHAR: ++ case T_INT: single_slots++; break; ++ case T_ARRAY: ++ case T_LONG: double_slots++; break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ switch (in_sig_bt[i]) { ++ case T_FLOAT: single_slots++; break; ++ case T_DOUBLE: double_slots++; break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ } ++ total_save_slots = double_slots * 2 + single_slots; ++ // align the save area ++ if (double_slots != 0) { ++ stack_slots = align_up(stack_slots, 2); ++ } ++ } ++ ++ int oop_handle_offset = stack_slots; ++ stack_slots += total_save_slots; ++ ++ // Now any space we need for handlizing a klass if static method ++ ++ int klass_slot_offset = 0; ++ int klass_offset = -1; ++ int lock_slot_offset = 0; ++ bool is_static = false; ++ ++ if (method->is_static()) { ++ klass_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; ++ is_static = true; ++ } ++ ++ // Plus a lock if needed ++ ++ if (method->is_synchronized()) { ++ lock_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ } ++ ++ // Now a place to save return value or as a temporary for any gpr -> fpr moves ++ // + 2 for return address (which we own) and saved fp ++ stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7) ++ ++ // Ok The space we have allocated will look like: ++ // ++ // ++ // FP-> | | ++ // |---------------------| ++ // | 2 slots for moves | ++ // |---------------------| ++ // | lock box (if sync) | ++ // |---------------------| <- lock_slot_offset ++ // | klass (if static) | ++ // |---------------------| <- klass_slot_offset ++ // | oopHandle area | ++ // |---------------------| <- oop_handle_offset ++ // | outbound memory | ++ // | based arguments | ++ // | | ++ // |---------------------| ++ // | vararg area | ++ // |---------------------| ++ // | | ++ // SP-> | out_preserved_slots | ++ // ++ // ++ ++ ++ // Now compute actual number of stack words we need rounding to make ++ // stack properly aligned. ++ stack_slots = align_up(stack_slots, StackAlignmentInSlots); ++ ++ int stack_size = stack_slots * VMRegImpl::stack_slot_size; ++ ++ intptr_t start = (intptr_t)__ pc(); ++ ++ ++ ++ // First thing make an ic check to see if we should even be here ++ address ic_miss = SharedRuntime::get_ic_miss_stub(); ++ ++ // We are free to use all registers as temps without saving them and ++ // restoring them except fp. fp is the only callee save register ++ // as far as the interpreter and the compiler(s) are concerned. ++ ++ //refer to register_mips.hpp:IC_Klass ++ const Register ic_reg = T1; ++ const Register receiver = T0; ++ ++ Label hit; ++ Label exception_pending; ++ ++ __ verify_oop(receiver); ++ //add for compressedoops ++ __ load_klass(T9, receiver); ++ __ beq(T9, ic_reg, hit); ++ __ delayed()->nop(); ++ __ jmp(ic_miss, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ bind(hit); ++ ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ ++ // Make enough room for patch_verified_entry ++ __ nop(); ++ __ nop(); ++ ++ if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { ++ Label L_skip_barrier; ++ address handle_wrong_method = SharedRuntime::get_handle_wrong_method_stub(); ++ __ mov_metadata(T9, method->method_holder()); // InstanceKlass* ++ __ clinit_barrier(T9, AT, &L_skip_barrier); ++ __ jmp(handle_wrong_method, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ bind(L_skip_barrier); ++ } ++ ++ // Generate stack overflow check ++ __ bang_stack_with_offset((int)StackOverflow::stack_shadow_zone_size()); ++ ++ // Generate a new frame for the wrapper. ++ // do mips need this ? ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ dins(SP, R0, 0, 4); ++ ++ __ enter(); ++ // -2 because return address is already present and so is saved fp ++ __ addiu(SP, SP, -1 * (stack_size - 2*wordSize)); ++ ++ // Frame is now completed as far a size and linkage. ++ ++ int frame_complete = ((intptr_t)__ pc()) - start; ++ ++ // Calculate the difference between sp and fp. We need to know it ++ // after the native call because on windows Java Natives will pop ++ // the arguments and it is painful to do sp relative addressing ++ // in a platform independent way. So after the call we switch to ++ // fp relative addressing. ++ //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change ++ //the SP ++ int fp_adjustment = stack_size - 2*wordSize; ++ ++#ifdef COMPILER2 ++ // C2 may leave the stack dirty if not in SSE2+ mode ++ __ empty_FPU_stack(); ++#endif ++ ++ // Compute the fp offset for any slots used after the jni call ++ ++ int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; ++ // We use TREG as a thread pointer because it is callee save and ++ // if we load it once it is usable thru the entire wrapper ++ const Register thread = TREG; ++ ++ // We use S4 as the oop handle for the receiver/klass ++ // It is callee save so it survives the call to native ++ ++ const Register oop_handle_reg = S4; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ // ++ // We immediately shuffle the arguments so that any vm call we have to ++ // make from here on out (sync slow path, jvmpi, etc.) we will have ++ // captured the oops from our caller and have a valid oopMap for ++ // them. ++ ++ // ----------------- ++ // The Grand Shuffle ++ // ++ // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* ++ // and, if static, the class mirror instead of a receiver. This pretty much ++ // guarantees that register layout will not match (and mips doesn't use reg ++ // parms though amd does). Since the native abi doesn't use register args ++ // and the java conventions does we don't have to worry about collisions. ++ // All of our moved are reg->stack or stack->stack. ++ // We ignore the extra arguments during the shuffle and handle them at the ++ // last moment. The shuffle is described by the two calling convention ++ // vectors we have in our possession. We simply walk the java vector to ++ // get the source locations and the c vector to get the destinations. ++ ++ int c_arg = method->is_static() ? 2 : 1 ; ++ ++ // Record sp-based slot for receiver on stack for non-static methods ++ int receiver_offset = -1; ++ ++ // This is a trick. We double the stack slots so we can claim ++ // the oops in the caller's frame. Since we are sure to have ++ // more args than the caller doubling is enough to make ++ // sure we can capture all the incoming oop args from the ++ // caller. ++ // ++ OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); ++ ++ // Mark location of fp (someday) ++ // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp)); ++ ++#ifdef ASSERT ++ bool reg_destroyed[RegisterImpl::number_of_registers]; ++ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; ++ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { ++ reg_destroyed[r] = false; ++ } ++ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { ++ freg_destroyed[f] = false; ++ } ++ ++#endif /* ASSERT */ ++ ++ // This may iterate in two different directions depending on the ++ // kind of native it is. The reason is that for regular JNI natives ++ // the incoming and outgoing registers are offset upwards and for ++ // critical natives they are offset down. ++ GrowableArray arg_order(2 * total_in_args); ++ VMRegPair tmp_vmreg; ++ tmp_vmreg.set2(T8->as_VMReg()); ++ ++ if (!is_critical_native) { ++ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { ++ arg_order.push(i); ++ arg_order.push(c_arg); ++ } ++ } else { ++ // Compute a valid move order, using tmp_vmreg to break any cycles ++ Unimplemented(); ++ // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); ++ } ++ ++ int temploc = -1; ++ for (int ai = 0; ai < arg_order.length(); ai += 2) { ++ int i = arg_order.at(ai); ++ int c_arg = arg_order.at(ai + 1); ++ __ block_comment(err_msg("move %d -> %d", i, c_arg)); ++ if (c_arg == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // This arg needs to be moved to a temporary ++ __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); ++ in_regs[i] = tmp_vmreg; ++ temploc = i; ++ continue; ++ } else if (i == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // Read from the temporary location ++ assert(temploc != -1, "must be valid"); ++ i = temploc; ++ temploc = -1; ++ } ++#ifdef ASSERT ++ if (in_regs[i].first()->is_Register()) { ++ assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); ++ } ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif /* ASSERT */ ++ switch (in_sig_bt[i]) { ++ case T_ARRAY: ++ if (is_critical_native) { ++ Unimplemented(); ++ // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); ++ c_arg++; ++#ifdef ASSERT ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif ++ break; ++ } ++ case T_OBJECT: ++ assert(!is_critical_native, "no oop arguments"); ++ object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], ++ ((i == 0) && (!is_static)), ++ &receiver_offset); ++ break; ++ case T_VOID: ++ break; ++ ++ case T_FLOAT: ++ float_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_DOUBLE: ++ assert( i + 1 < total_in_args && ++ in_sig_bt[i + 1] == T_VOID && ++ out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); ++ double_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_LONG : ++ long_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); ++ ++ default: ++ simple_move32(masm, in_regs[i], out_regs[c_arg]); ++ } ++ } ++ ++ // point c_arg at the first arg that is already loaded in case we ++ // need to spill before we call out ++ c_arg = total_c_args - total_in_args; ++ // Pre-load a static method's oop. Used both by locking code and ++ // the normal JNI call code. ++ ++ __ move(oop_handle_reg, A1); ++ ++ if (method->is_static() && !is_critical_native) { ++ ++ // load opp into a register ++ int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local( ++ (method->method_holder())->java_mirror())); ++ ++ ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ __ relocate(rspec); ++ __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror())); ++ // Now handlize the static class mirror it's known not-null. ++ __ sd( oop_handle_reg, SP, klass_offset); ++ map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); ++ ++ // Now get the handle ++ __ lea(oop_handle_reg, Address(SP, klass_offset)); ++ // store the klass handle as second argument ++ __ move(A1, oop_handle_reg); ++ // and protect the arg if we must spill ++ c_arg--; ++ } ++ ++ // Change state to native (we save the return address in the thread, since it might not ++ // be pushed on the stack when we do a a stack traversal). It is enough that the pc() ++ // points into the right code segment. It does not have to be the correct return pc. ++ // We use the same pc/oopMap repeatedly when we call out ++ ++ intptr_t the_pc = (intptr_t) __ pc(); ++ oop_maps->add_gc_map(the_pc - start, map); ++ ++ __ set_last_Java_frame(SP, noreg, NULL); ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)the_pc ; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ ++ ++ // We have all of the arguments setup at this point. We must not touch any register ++ // argument registers at this point (what if we save/restore them there are no oop? ++ { ++ SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); ++ save_args(masm, total_c_args, c_arg, out_regs); ++ int metadata_index = __ oop_recorder()->find_index(method()); ++ RelocationHolder rspec = metadata_Relocation::spec(metadata_index); ++ __ relocate(rspec); ++ __ patchable_set48(AT, (long)(method())); ++ ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ thread, AT); ++ ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ } ++ ++ // These are register definitions we need for locking/unlocking ++ const Register swap_reg = T8; // Must use T8 for cmpxchg instruction ++ const Register obj_reg = T9; // Will contain the oop ++ //const Register lock_reg = T6; // Address of compiler lock object (BasicLock) ++ const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock) ++ ++ ++ ++ Label slow_path_lock; ++ Label lock_done; ++ ++ // Lock a synchronized method ++ if (method->is_synchronized()) { ++ assert(!is_critical_native, "unhandled"); ++ ++ const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); ++ ++ // Get the handle (the 2nd argument) ++ __ move(oop_handle_reg, A1); ++ ++ // Get address of the box ++ __ lea(lock_reg, Address(FP, lock_slot_fp_offset)); ++ ++ // Load the oop from the handle ++ __ ld(obj_reg, oop_handle_reg, 0); ++ ++ if (UseBiasedLocking) { ++ // Note that oop_handle_reg is trashed during this call ++ __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock); ++ } ++ ++ // Load immediate 1 into swap_reg %T8 ++ __ move(swap_reg, 1); ++ ++ __ ld(AT, obj_reg, 0); ++ __ orr(swap_reg, swap_reg, AT); ++ ++ __ sd(swap_reg, lock_reg, mark_word_offset); ++ __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done); ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) sp <= mark < mark + os::pagesize() ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg ++ ++ __ dsubu(swap_reg, swap_reg, SP); ++ __ move(AT, 3 - os::vm_page_size()); ++ __ andr(swap_reg , swap_reg, AT); ++ // Save the test result, for recursive case, the result is zero ++ __ sd(swap_reg, lock_reg, mark_word_offset); ++ __ bne(swap_reg, R0, slow_path_lock); ++ __ delayed()->nop(); ++ // Slow path will re-enter here ++ __ bind(lock_done); ++ ++ if (UseBiasedLocking) { ++ // Re-fetch oop_handle_reg as we trashed it above ++ __ move(A1, oop_handle_reg); ++ } ++ } ++ ++ ++ // Finally just about ready to make the JNI call ++ ++ ++ // get JNIEnv* which is first argument to native ++ if (!is_critical_native) { ++ __ addiu(A0, thread, in_bytes(JavaThread::jni_environment_offset())); ++ ++ // Now set thread in native ++ __ addiu(AT, R0, _thread_in_native); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ } ++ ++ // do the call ++ __ call(native_func, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ // WARNING - on Windows Java Natives use pascal calling convention and pop the ++ // arguments off of the stack. We could just re-adjust the stack pointer here ++ // and continue to do SP relative addressing but we instead switch to FP ++ // relative addressing. ++ ++ // Unpack native results. ++ switch (ret_type) { ++ case T_BOOLEAN: __ c2bool(V0); break; ++ case T_CHAR : __ andi(V0, V0, 0xFFFF); break; ++ case T_BYTE : __ sign_extend_byte (V0); break; ++ case T_SHORT : __ sign_extend_short(V0); break; ++ case T_INT : // nothing to do break; ++ case T_DOUBLE : ++ case T_FLOAT : ++ // Result is in st0 we'll save as needed ++ break; ++ case T_ARRAY: // Really a handle ++ case T_OBJECT: // Really a handle ++ break; // can't de-handlize until after safepoint check ++ case T_VOID: break; ++ case T_LONG: break; ++ default : ShouldNotReachHere(); ++ } ++ ++ Label after_transition; ++ ++ // If this is a critical native, check for a safepoint or suspend request after the call. ++ // If a safepoint is needed, transition to native, then to native_trans to handle ++ // safepoints like the native methods that are not critical natives. ++ if (is_critical_native) { ++ Label needs_safepoint; ++ __ safepoint_poll_acquire(needs_safepoint, thread); ++ __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, after_transition); ++ __ delayed()->nop(); ++ __ bind(needs_safepoint); ++ } ++ ++ // Switch thread to "native transition" state before reading the synchronization state. ++ // This additional state is necessary because reading and testing the synchronization ++ // state is not atomic w.r.t. GC, as this scenario demonstrates: ++ // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. ++ // VM thread changes sync state to synchronizing and suspends threads for GC. ++ // Thread A is resumed to finish this native method, but doesn't block here since it ++ // didn't see any synchronization is progress, and escapes. ++ __ addiu(AT, R0, _thread_in_native_trans); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ if(os::is_MP()) __ sync(); // Force this write out before the read below ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { ++ Label Continue; ++ Label slow_path; ++ ++ __ safepoint_poll_acquire(slow_path, thread); ++ __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, Continue); ++ __ delayed()->nop(); ++ __ bind(slow_path); ++ ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // ++ save_native_result(masm, ret_type, stack_slots); ++ __ move(A0, thread); ++ __ addiu(SP, SP, -wordSize); ++ __ push(S2); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ dins(SP, R0, 0, 4); // align stack as required by ABI ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ move(SP, S2); // use S2 as a sender SP holder ++ __ pop(S2); ++ __ addiu(SP, SP, wordSize); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ // Restore any method result value ++ restore_native_result(masm, ret_type, stack_slots); ++ ++ __ bind(Continue); ++ } ++ ++ // change thread state ++ __ addiu(AT, R0, _thread_in_Java); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ bind(after_transition); ++ Label reguard; ++ Label reguard_done; ++ __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); ++ __ addiu(AT, AT, -StackOverflow::stack_guard_yellow_reserved_disabled); ++ __ beq(AT, R0, reguard); ++ __ delayed()->nop(); ++ // slow path reguard re-enters here ++ __ bind(reguard_done); ++ ++ // Handle possible exception (will unlock if necessary) ++ ++ // native result if any is live ++ ++ // Unlock ++ Label slow_path_unlock; ++ Label unlock_done; ++ if (method->is_synchronized()) { ++ ++ Label done; ++ ++ // Get locked oop from the handle we passed to jni ++ __ ld( obj_reg, oop_handle_reg, 0); ++ if (UseBiasedLocking) { ++ __ biased_locking_exit(obj_reg, T8, done); ++ ++ } ++ ++ // Simple recursive lock? ++ ++ __ ld(AT, FP, lock_slot_fp_offset); ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ // Must save FSF if if it is live now because cmpxchg must use it ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ ++ // get old displaced header ++ __ ld (T8, FP, lock_slot_fp_offset); ++ // get address of the stack lock ++ __ addiu(c_rarg0, FP, lock_slot_fp_offset); ++ // Atomic swap old header if oop still contains the stack lock ++ __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock); ++ ++ // slow path re-enters here ++ __ bind(unlock_done); ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ ++ __ bind(done); ++ ++ } ++ { ++ SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); ++ // Tell dtrace about this method exit ++ save_native_result(masm, ret_type, stack_slots); ++ int metadata_index = __ oop_recorder()->find_index( (method())); ++ RelocationHolder rspec = metadata_Relocation::spec(metadata_index); ++ __ relocate(rspec); ++ __ patchable_set48(AT, (long)(method())); ++ ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ thread, AT); ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ ++ // We can finally stop using that last_Java_frame we setup ages ago ++ ++ __ reset_last_Java_frame(false); ++ ++ // Unpack oop result, e.g. JNIHandles::resolve value. ++ if (ret_type == T_OBJECT || ret_type == T_ARRAY) { ++ __ resolve_jobject(V0, thread, T9); ++ } ++ ++ if (CheckJNICalls) { ++ // clear_pending_jni_exception_check ++ __ sd(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset())); ++ } ++ ++ if (!is_critical_native) { ++ // reset handle block ++ __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset())); ++ __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes()); ++ } ++ ++ if (!is_critical_native) { ++ // Any exception pending? ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, exception_pending); ++ __ delayed()->nop(); ++ } ++ // no exception, we're almost done ++ ++ // check that only result value is on FPU stack ++ __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit"); ++ ++ // Return ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ leave(); ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ // Unexpected paths are out of line and go here ++ // Slow path locking & unlocking ++ if (method->is_synchronized()) { ++ ++ // BEGIN Slow path lock ++ __ bind(slow_path_lock); ++ ++ // protect the args we've loaded ++ save_args(masm, total_c_args, c_arg, out_regs); ++ ++ // has last_Java_frame setup. No exceptions so do vanilla call not call_VM ++ // args are (oop obj, BasicLock* lock, JavaThread* thread) ++ ++ __ move(A0, obj_reg); ++ __ move(A1, lock_reg); ++ __ move(A2, thread); ++ __ addiu(SP, SP, - 3*wordSize); ++ ++ __ move(S2, SP); // use S2 as a sender SP holder ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ dins(SP, R0, 0, 4); // align stack as required by ABI ++ ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ move(SP, S2); ++ __ addiu(SP, SP, 3*wordSize); ++ ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("no pending exception allowed on exit from monitorenter"); ++ __ bind(L); ++ } ++#endif ++ __ b(lock_done); ++ __ delayed()->nop(); ++ // END Slow path lock ++ ++ // BEGIN Slow path unlock ++ __ bind(slow_path_unlock); ++ ++ // Slow path unlock ++ ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ // Save pending exception around call to VM (which contains an EXCEPTION_MARK) ++ ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ push(AT); ++ __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); ++ ++ __ move(S2, SP); // use S2 as a sender SP holder ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ dins(SP, R0, 0, 4); // align stack as required by ABI ++ ++ // should be a peal ++ // +wordSize because of the push above ++ __ addiu(A1, FP, lock_slot_fp_offset); ++ ++ __ move(A0, obj_reg); ++ __ move(A2, thread); ++ __ addiu(SP, SP, -2*wordSize); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), ++ relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ addiu(SP, SP, 2*wordSize); ++ __ move(SP, S2); ++ //add for compressedoops ++ __ reinit_heapbase(); ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld( AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); ++ __ bind(L); ++ } ++#endif /* ASSERT */ ++ ++ __ pop(AT); ++ __ sd(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ __ b(unlock_done); ++ __ delayed()->nop(); ++ // END Slow path unlock ++ ++ } ++ ++ // SLOW PATH Reguard the stack if needed ++ ++ __ bind(reguard); ++ save_native_result(masm, ret_type, stack_slots); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), ++ relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ restore_native_result(masm, ret_type, stack_slots); ++ __ b(reguard_done); ++ __ delayed()->nop(); ++ ++ // BEGIN EXCEPTION PROCESSING ++ if (!is_critical_native) { ++ // Forward the exception ++ __ bind(exception_pending); ++ ++ // remove possible return value from FPU register stack ++ __ empty_FPU_stack(); ++ ++ // pop our frame ++ //forward_exception_entry need return address on stack ++ __ move(SP, FP); ++ __ pop(FP); ++ ++ // and forward the exception ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } ++ __ flush(); ++ ++ nmethod *nm = nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), ++ in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), ++ oop_maps); ++ ++ return nm; ++} ++ ++// this function returns the adjust size (in number of words) to a c2i adapter ++// activation for use during deoptimization ++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { ++ return (callee_locals - callee_parameters) * Interpreter::stackElementWords; ++} ++ ++// Number of stack slots between incoming argument block and the start of ++// a new frame. The PROLOG must add this many slots to the stack. The ++// EPILOG must remove this many slots. mips64 needs two slots for ++// return address and fp. ++// TODO think this is correct but check ++uint SharedRuntime::in_preserve_stack_slots() { ++ return 4; ++} ++ ++// "Top of Stack" slots that may be unused by the calling convention but must ++// otherwise be preserved. ++// On Intel these are not necessary and the value can be zero. ++// On Sparc this describes the words reserved for storing a register window ++// when an interrupt occurs. ++uint SharedRuntime::out_preserve_stack_slots() { ++ return 0; ++} ++ ++//------------------------------generate_deopt_blob---------------------------- ++// Ought to generate an ideal graph & compile, but here's some SPARC ASM ++// instead. ++void SharedRuntime::generate_deopt_blob() { ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ //CodeBuffer buffer ("deopt_blob", 4000, 2048); ++ CodeBuffer buffer ("deopt_blob", 8000, 2048); ++ MacroAssembler* masm = new MacroAssembler( & buffer); ++ int frame_size_in_words; ++ OopMap* map = NULL; ++ // Account for the extra args we place on the stack ++ // by the time we call fetch_unroll_info ++ const int additional_words = 2; // deopt kind, thread ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ ++ address start = __ pc(); ++ Label cont; ++ // we use S3 for DeOpt reason register ++ Register reason = S3; ++ // use S6 for thread register ++ Register thread = TREG; ++ // use S7 for fetch_unroll_info returned UnrollBlock ++ Register unroll = S7; ++ // Prolog for non exception case! ++ // Correct the return address we were given. ++ //FIXME, return address is on the tos or Ra? ++ __ addiu(RA, RA, - (NativeCall::return_address_offset_long)); ++ // Save everything in sight. ++ map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); ++ // Normal deoptimization ++ __ move(reason, Deoptimization::Unpack_deopt); ++ __ b(cont); ++ __ delayed()->nop(); ++ ++ int reexecute_offset = __ pc() - start; ++ ++ // Reexecute case ++ // return address is the pc describes what bci to do re-execute at ++ ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); ++ __ move(reason, Deoptimization::Unpack_reexecute); ++ __ b(cont); ++ __ delayed()->nop(); ++ ++ int exception_offset = __ pc() - start; ++ // Prolog for exception case ++ ++ // all registers are dead at this entry point, except for V0 and ++ // V1 which contain the exception oop and exception pc ++ // respectively. Set them in TLS and fall thru to the ++ // unpack_with_exception_in_tls entry point. ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ int exception_in_tls_offset = __ pc() - start; ++ // new implementation because exception oop is now passed in JavaThread ++ ++ // Prolog for exception case ++ // All registers must be preserved because they might be used by LinearScan ++ // Exceptiop oop and throwing PC are passed in JavaThread ++ // tos: stack at point of call to method that threw the exception (i.e. only ++ // args are on the stack, no return address) ++ ++ // Return address will be patched later with the throwing pc. The correct value is not ++ // available now because loading it from memory would destroy registers. ++ // Save everything in sight. ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ __ addiu(RA, RA, - (NativeCall::return_address_offset_long)); ++ (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); ++ ++ // Now it is safe to overwrite any register ++ // store the correct deoptimization type ++ __ move(reason, Deoptimization::Unpack_exception); ++ // load throwing pc from JavaThread and patch it as the return address ++ // of the current frame. Then clear the field in JavaThread ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); ++ ++ ++#ifdef ASSERT ++ // verify that there is really an exception oop in JavaThread ++ __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset())); ++ __ verify_oop(AT); ++ // verify that there is no pending exception ++ Label no_pending_exception; ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, no_pending_exception); ++ __ delayed()->nop(); ++ __ stop("must not have pending exception here"); ++ __ bind(no_pending_exception); ++#endif ++ __ bind(cont); ++ // Compiled code leaves the floating point stack dirty, empty it. ++ __ empty_FPU_stack(); ++ ++ ++ // Call C code. Need thread and this frame, but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ __ move(A0, thread); ++ __ move(A1, reason); // exec_mode ++ __ addiu(SP, SP, -additional_words * wordSize); ++ ++ __ set_last_Java_frame(NOREG, NOREG, NULL); ++ ++ // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on ++ // this call, no GC can happen. Call should capture return values. ++ ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ ++ __ call((address)Deoptimization::fetch_unroll_info); ++ //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ oop_maps->add_gc_map(__ pc() - start, map); ++ __ addiu(SP, SP, additional_words * wordSize); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(false); ++ ++ // Load UnrollBlock into S7 ++ __ move(unroll, V0); ++ ++ ++ // Move the unpack kind to a safe place in the UnrollBlock because ++ // we are very short of registers ++ ++ Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); ++ __ sw(reason, unpack_kind); ++ // save the unpack_kind value ++ // Retrieve the possible live values (return values) ++ // All callee save registers representing jvm state ++ // are now in the vframeArray. ++ ++ Label noException; ++ __ move(AT, Deoptimization::Unpack_exception); ++ __ bne(AT, reason, noException);// Was exception pending? ++ __ delayed()->nop(); ++ __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ ++ __ verify_oop(V0); ++ ++ // Overwrite the result registers with the exception results. ++ __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize); ++ __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize); ++ ++ __ bind(noException); ++ ++ ++ // Stack is back to only having register save data on the stack. ++ // Now restore the result registers. Everything else is either dead or captured ++ // in the vframeArray. ++ ++ RegisterSaver::restore_result_registers(masm); ++ // All of the register save area has been popped of the stack. Only the ++ // return address remains. ++ // Pop all the frames we must move/replace. ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: caller of deopting frame (could be compiled/interpreted). ++ // ++ // Note: by leaving the return address of self-frame on the stack ++ // and using the size of frame 2 to adjust the stack ++ // when we are done the return to frame 3 will still be on the stack. ++ ++ // register for the sender's sp ++ Register sender_sp = Rsender; ++ // register for frame pcs ++ Register pcs = T0; ++ // register for frame sizes ++ Register sizes = T1; ++ // register for frame count ++ Register count = T3; ++ ++ // Pop deoptimized frame ++ __ lw(T8, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); ++ __ addu(SP, SP, T8); ++ // sp should be pointing at the return address to the caller (3) ++ ++ // Load array of frame pcs into pcs ++ __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); ++ __ addiu(SP, SP, wordSize); // trash the old pc ++ // Load array of frame sizes into T6 ++ __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); ++ ++ ++#ifdef ASSERT ++ // Compilers generate code that bang the stack by as much as the ++ // interpreter would need. So this stack banging should never ++ // trigger a fault. Verify that it does not on non product builds. ++ __ lw(TSR, unroll, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()); ++ __ bang_stack_size(TSR, T8); ++#endif ++ ++ // Load count of frams into T3 ++ __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); ++ // Pick up the initial fp we should save ++ __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. ++ __ move(sender_sp, SP); ++ __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); ++ __ subu(SP, SP, AT); ++ ++ // Push interpreter frames in a loop ++ // ++ //Loop: ++ // 0x000000555bd82d18: lw t2, 0x0(t1) ; lw sizes[i] <--- error lw->ld ++ // 0x000000555bd82d1c: ld at, 0x0(t0) ; ld pcs[i] ++ // 0x000000555bd82d20: daddiu t2, t2, 0xfffffff0 ; t2 -= 16 ++ // 0x000000555bd82d24: daddiu sp, sp, 0xfffffff0 ++ // 0x000000555bd82d28: sd fp, 0x0(sp) ; push fp ++ // 0x000000555bd82d2c: sd at, 0x8(sp) ; push at ++ // 0x000000555bd82d30: daddu fp, sp, zero ; fp <- sp ++ // 0x000000555bd82d34: dsubu sp, sp, t2 ; sp -= t2 ++ // 0x000000555bd82d38: sd zero, 0xfffffff0(fp) ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ // 0x000000555bd82d3c: sd s4, 0xfffffff8(fp) ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ // 0x000000555bd82d40: daddu s4, sp, zero ; move(sender_sp, SP); ++ // 0x000000555bd82d44: daddiu t3, t3, 0xffffffff ; count -- ++ // 0x000000555bd82d48: daddiu t1, t1, 0x4 ; sizes += 4 ++ // 0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18 ++ // 0x000000555bd82d50: daddiu t0, t0, 0x4 ; <--- error t0 += 8 ++ // ++ // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split ++ Label loop; ++ __ bind(loop); ++ __ ld(T2, sizes, 0); // Load frame size ++ __ ld_ptr(AT, pcs, 0); // save return address ++ __ addiu(T2, T2, -2*wordSize); // we'll push pc and fp, by hand ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ subu(SP, SP, T2); // Prolog! ++ // This value is corrected by layout_activation_impl ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable ++ __ move(sender_sp, SP); // pass to next frame ++ __ addiu(count, count, -1); // decrement counter ++ __ addiu(sizes, sizes, wordSize); // Bump array pointer (sizes) ++ __ bne(count, R0, loop); ++ __ delayed()->addiu(pcs, pcs, wordSize); // Bump array pointer (pcs) ++ __ ld(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0); ++ // Re-push self-frame ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ __ addiu(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize); ++ ++ // Restore frame locals after moving the frame ++ __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize); ++ __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize); ++ __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local ++ __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize); ++ ++ ++ // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on ++ // this call, no GC can happen. ++ __ move(A1, reason); // exec_mode ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(A0, thread); // thread ++ __ addiu(SP, SP, (-additional_words) *wordSize); ++ ++ // set last_Java_sp, last_Java_fp ++ __ set_last_Java_frame(NOREG, FP, NULL); ++ ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ dins(SP, R0, 0, 4); // Fix stack alignment as required by ABI ++ ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ ++ __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ // Revert SP alignment after call since we're going to do some SP relative addressing below ++ __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // Set an oopmap for the call site ++ oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0)); ++ ++ __ push(V0); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(true); ++ ++ // Collect return values ++ __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words + 1) * wordSize); ++ __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words + 1) * wordSize); ++ __ ldc1(F0, SP, (RegisterSaver::fpResultOffset() + additional_words + 1) * wordSize);// Pop float stack and store in local ++ __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + additional_words + 2) * wordSize); ++ //FIXME, ++ // Clear floating point stack before returning to interpreter ++ __ empty_FPU_stack(); ++ //FIXME, we should consider about float and double ++ // Push a float or double return value if necessary. ++ __ leave(); ++ ++ // Jump to interpreter ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ masm->flush(); ++ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); ++ _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); ++} ++ ++#ifdef COMPILER2 ++ ++//------------------------------generate_uncommon_trap_blob-------------------- ++// Ought to generate an ideal graph & compile, but here's some SPARC ASM ++// instead. ++void SharedRuntime::generate_uncommon_trap_blob() { ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 ); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ enum frame_layout { ++ fp_off, fp_off2, ++ return_off, return_off2, ++ framesize ++ }; ++ assert(framesize % 4 == 0, "sp not 16-byte aligned"); ++ ++ address start = __ pc(); ++ ++ // Push self-frame. ++ __ daddiu(SP, SP, -framesize * BytesPerInt); ++ ++ __ sd(RA, SP, return_off * BytesPerInt); ++ __ sd(FP, SP, fp_off * BytesPerInt); ++ ++ __ daddiu(FP, SP, fp_off * BytesPerInt); ++ ++ // Clear the floating point exception stack ++ __ empty_FPU_stack(); ++ ++ Register thread = TREG; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // set last_Java_sp ++ __ set_last_Java_frame(NOREG, FP, NULL); ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ long save_pc = (long)__ pc() + 56; ++ __ patchable_set48(AT, (long)save_pc); ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ } ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // capture callee-saved registers as well as return values. ++ __ move(A0, thread); ++ // argument already in T0 ++ __ move(A1, T0); ++ __ addiu(A2, R0, Deoptimization::Unpack_uncommon_trap); ++ __ patchable_call((address)Deoptimization::uncommon_trap); ++ ++ // Set an oopmap for the call site ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap( framesize, 0 ); ++ ++ //oop_maps->add_gc_map( __ offset(), true, map); ++ oop_maps->add_gc_map( __ offset(), map); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(false); ++ ++ // Load UnrollBlock into S7 ++ Register unroll = S7; ++ __ move(unroll, V0); ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld_ptr(AT, unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); ++ __ li(T9, Deoptimization::Unpack_uncommon_trap); ++ __ beq(AT, T9, L); ++ __ delayed()->nop(); ++ __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap"); ++ __ bind(L); ++ } ++#endif ++ ++ // Pop all the frames we must move/replace. ++ // ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: possible-i2c-adapter-frame ++ // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an ++ // and c2i here) ++ ++ __ daddiu(SP, SP, framesize * BytesPerInt); ++ ++ // Pop deoptimized frame ++ __ lw(T8, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); ++ __ daddu(SP, SP, T8); ++ ++#ifdef ASSERT ++ // Compilers generate code that bang the stack by as much as the ++ // interpreter would need. So this stack banging should never ++ // trigger a fault. Verify that it does not on non product builds. ++ __ lw(TSR, unroll, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()); ++ __ bang_stack_size(TSR, T8); ++#endif ++ ++ // register for frame pcs ++ Register pcs = T8; ++ // register for frame sizes ++ Register sizes = T9; ++ // register for frame count ++ Register count = T3; ++ // register for the sender's sp ++ Register sender_sp = T1; ++ ++ // sp should be pointing at the return address to the caller (4) ++ // Load array of frame pcs ++ __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); ++ ++ // Load array of frame sizes ++ __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); ++ __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); ++ ++ // Pick up the initial fp we should save ++ __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. ++ ++ __ move(sender_sp, SP); ++ __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); ++ __ dsubu(SP, SP, AT); ++ // Push interpreter frames in a loop ++ Label loop; ++ __ bind(loop); ++ __ ld(T2, sizes, 0); // Load frame size ++ __ ld(AT, pcs, 0); // save return address ++ __ daddiu(T2, T2, -2*wordSize); // we'll push pc and fp, by hand ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ dsubu(SP, SP, T2); // Prolog! ++ // This value is corrected by layout_activation_impl ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable ++ __ move(sender_sp, SP); // pass to next frame ++ __ daddiu(count, count, -1); // decrement counter ++ __ daddiu(sizes, sizes, wordSize); // Bump array pointer (sizes) ++ __ addiu(pcs, pcs, wordSize); // Bump array pointer (pcs) ++ __ bne(count, R0, loop); ++ __ delayed()->nop(); // Bump array pointer (pcs) ++ ++ __ ld(RA, pcs, 0); ++ ++ // Re-push self-frame ++ // save old & set new FP ++ // save final return address ++ __ enter(); ++ ++ // Use FP because the frames look interpreted now ++ // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. ++ // Don't need the precise return PC here, just precise enough to point into this code blob. ++ address the_pc = __ pc(); ++ __ set_last_Java_frame(NOREG, FP, the_pc); ++ ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ dins(SP, R0, 0, 4); // Fix stack alignment as required by ABI ++ ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // restore return values to their stack-slots with the new SP. ++ __ move(A0, thread); ++ __ addiu(A1, R0, Deoptimization::Unpack_uncommon_trap); ++ __ patchable_call((address)Deoptimization::unpack_frames); ++ // Set an oopmap for the call site ++ oop_maps->add_gc_map( __ offset(), new OopMap( framesize, 0 ) ); ++ ++ __ reset_last_Java_frame(true); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog! ++ ++ // Jump to interpreter ++ __ jr(RA); ++ __ delayed()->nop(); ++ // ------------- ++ // make sure all code is generated ++ masm->flush(); ++ ++ _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2); ++} ++ ++#endif // COMPILER2 ++ ++//------------------------------generate_handler_blob------------------- ++// ++// Generate a special Compile2Runtime blob that saves all registers, and sets ++// up an OopMap and calls safepoint code to stop the compiled code for ++// a safepoint. ++// ++// This blob is jumped to (via a breakpoint and the signal handler) from a ++// safepoint in compiled code. ++ ++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) { ++ ++ // Account for thread arg in our frame ++ const int additional_words = 0; ++ int frame_size_in_words; ++ ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ ++ ResourceMark rm; ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map; ++ ++ // allocate space for the code ++ // setup code generation tools ++ CodeBuffer buffer ("handler_blob", 2048, 512); ++ MacroAssembler* masm = new MacroAssembler( &buffer); ++ ++ const Register thread = TREG; ++ address start = __ pc(); ++ address call_pc = NULL; ++ bool cause_return = (pool_type == POLL_AT_RETURN); ++ bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ // The following is basically a call_VM. However, we need the precise ++ // address of the call in order to generate an oopmap. Hence, we do all the ++ // work outselvs. ++ ++ __ set_last_Java_frame(NOREG, NOREG, NULL); ++ ++ if (!cause_return) { ++ // overwrite the return address pushed by save_live_registers ++ // Additionally, TSR is a callee-saved register so we can look at ++ // it later to determine if someone changed the return address for ++ // us! ++ __ ld_ptr(TSR, thread, in_bytes(JavaThread::saved_exception_pc_offset())); ++ __ st_ptr(TSR, SP, RegisterSaver::raOffset() * wordSize); ++ } ++ ++ // Do the call ++ __ move(A0, thread); ++ __ call(call_ptr); ++ __ delayed()->nop(); ++ ++ // Set an oopmap for the call site. This oopmap will map all ++ // oop-registers and debug-info registers as callee-saved. This ++ // will allow deoptimization at this safepoint to find all possible ++ // debug-info recordings, as well as let GC find all oops. ++ oop_maps->add_gc_map(__ offset(), map); ++ ++ Label noException; ++ ++ // Clear last_Java_sp again ++ __ reset_last_Java_frame(false); ++ ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, noException); ++ __ delayed()->nop(); ++ ++ // Exception pending ++ ++ RegisterSaver::restore_live_registers(masm, save_vectors); ++ //forward_exception_entry need return address on the stack ++ __ push(RA); ++ __ patchable_jump((address)StubRoutines::forward_exception_entry()); ++ ++ // No exception case ++ __ bind(noException); ++ ++ Label no_adjust, bail; ++ if (!cause_return) { ++ // If our stashed return pc was modified by the runtime we avoid touching it ++ __ ld_ptr(AT, SP, RegisterSaver::raOffset() * wordSize); ++ __ bne(AT, TSR, no_adjust); ++ __ delayed()->nop(); ++ ++#ifdef ASSERT ++ // Verify the correct encoding of the poll we're about to skip. ++ // See NativeInstruction::is_safepoint_poll() ++ __ lwu(AT, TSR, 0); ++ __ dsrl(AT, AT, 16); ++ __ andi(AT, AT, 0xfc1f); ++ __ xori(AT, AT, 0x8c01); ++ __ bne(AT, R0, bail); ++ __ delayed()->nop(); ++#endif ++ // Adjust return pc forward to step over the safepoint poll instruction ++ __ addiu(RA, TSR, 4); // NativeInstruction::instruction_size=4 ++ __ st_ptr(RA, SP, RegisterSaver::raOffset() * wordSize); ++ } ++ ++ __ bind(no_adjust); ++ // Normal exit, register restoring and exit ++ RegisterSaver::restore_live_registers(masm, save_vectors); ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++#ifdef ASSERT ++ __ bind(bail); ++ __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); ++#endif ++ ++ // Make sure all code is generated ++ masm->flush(); ++ ++ // Fill-out other meta info ++ return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); ++} ++ ++// ++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss ++// ++// Generate a stub that calls into vm to find out the proper destination ++// of a java call. All the argument registers are live at this point ++// but since this is generic code we don't know what they are and the caller ++// must do any gc of the args. ++// ++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ ++ // allocate space for the code ++ ResourceMark rm; ++ ++ //CodeBuffer buffer(name, 1000, 512); ++ CodeBuffer buffer(name, 2000, 2048); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ int frame_size_words; ++ //we put the thread in A0 ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = NULL; ++ ++ int start = __ offset(); ++ map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); ++ ++ ++ int frame_complete = __ offset(); ++ ++#ifndef OPT_THREAD ++ const Register thread = T8; ++ __ get_thread(thread); ++#else ++ const Register thread = TREG; ++#endif ++ ++ __ move(A0, thread); ++ __ set_last_Java_frame(noreg, FP, NULL); ++ //align the stack before invoke native ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ dins(SP, R0, 0, 4); ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 24 + 1 * BytesPerInstWord; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ ++ __ call(destination); ++ __ delayed()->nop(); ++ ++ // Set an oopmap for the call site. ++ // We need this not only for callee-saved registers, but also for volatile ++ // registers that the compiler might be keeping live across a safepoint. ++ oop_maps->add_gc_map( __ offset() - start, map); ++ // V0 contains the address we are going to jump to assuming no exception got installed ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // clear last_Java_sp ++ __ reset_last_Java_frame(true); ++ // check for pending exceptions ++ Label pending; ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, pending); ++ __ delayed()->nop(); ++ // get the returned Method* ++ //FIXME, do mips need this ? ++ __ get_vm_result_2(Rmethod, thread); // Refer to OpenJDK8 ++ __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize); ++ __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize); ++ RegisterSaver::restore_live_registers(masm); ++ ++ // We are back the the original state on entry and ready to go the callee method. ++ __ jr(V0); ++ __ delayed()->nop(); ++ // Pending exception after the safepoint ++ ++ __ bind(pending); ++ ++ RegisterSaver::restore_live_registers(masm); ++ ++ // exception pending => remove activation and forward to exception handler ++ //forward_exception_entry need return address on the stack ++ __ push(RA); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); ++ __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ // ++ // make sure all code is generated ++ masm->flush(); ++ ++ RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); ++ return tmp; ++} ++ ++extern "C" int SpinPause() {return 0;} ++ ++ ++//------------------------------Montgomery multiplication------------------------ ++// ++ ++// Subtract 0:b from carry:a. Return carry. ++static unsigned long ++sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) { ++ long borrow = 0, t; ++ unsigned long tmp0, tmp1; ++ __asm__ __volatile__ ( ++ "0: \n" ++ "ld %[tmp0], 0(%[a]) \n" ++ "ld %[tmp1], 0(%[b]) \n" ++ "sltu %[t], %[tmp0], %[borrow] \n" ++ "dsubu %[tmp0], %[tmp0], %[borrow] \n" ++ "sltu %[borrow], %[tmp0], %[tmp1] \n" ++ "or %[borrow], %[borrow], %[t] \n" ++ "dsubu %[tmp0], %[tmp0], %[tmp1] \n" ++ "sd %[tmp0], 0(%[a]) \n" ++ "daddiu %[a], %[a], 8 \n" ++ "daddiu %[b], %[b], 8 \n" ++ "daddiu %[len], %[len], -1 \n" ++ "bgtz %[len], 0b \n" ++ "dsubu %[tmp0], %[carry], %[borrow] \n" ++ : [len]"+r"(len), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [borrow]"+r"(borrow), [a]"+r"(a), [b]"+r"(b), [t]"=&r"(t) ++ : [carry]"r"(carry) ++ : "memory" ++ ); ++ return tmp0; ++} ++ ++// Multiply (unsigned) Long A by Long B, accumulating the double- ++// length result into the accumulator formed of t0, t1, and t2. ++inline void MACC(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) { ++ unsigned long hi, lo, carry, t; ++ __asm__ __volatile__( ++ "dmultu %[A], %[B] \n" ++ "mfhi %[hi] \n" ++ "mflo %[lo] \n" ++ "daddu %[t0], %[t0], %[lo] \n" ++ "sltu %[carry], %[t0], %[lo] \n" ++ "daddu %[t1], %[t1], %[carry] \n" ++ "sltu %[t], %[t1], %[carry] \n" ++ "daddu %[t1], %[t1], %[hi] \n" ++ "sltu %[carry], %[t1], %[hi] \n" ++ "or %[carry], %[carry], %[t] \n" ++ "daddu %[t2], %[t2], %[carry] \n" ++ : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"=&r"(carry), [t]"=&r"(t) ++ : [A]"r"(A), [B]"r"(B) ++ : ++ ); ++} ++ ++// As above, but add twice the double-length result into the ++// accumulator. ++inline void MACC2(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) { ++ unsigned long hi, lo, carry, t; ++ __asm__ __volatile__( ++ "dmultu %[A], %[B] \n" ++ "mfhi %[hi] \n" ++ "mflo %[lo] \n" ++ "daddu %[t0], %[t0], %[lo] \n" ++ "sltu %[carry], %[t0], %[lo] \n" ++ "daddu %[t1], %[t1], %[carry] \n" ++ "sltu %[t], %[t1], %[carry] \n" ++ "daddu %[t1], %[t1], %[hi] \n" ++ "sltu %[carry], %[t1], %[hi] \n" ++ "or %[carry], %[carry], %[t] \n" ++ "daddu %[t2], %[t2], %[carry] \n" ++ "daddu %[t0], %[t0], %[lo] \n" ++ "sltu %[carry], %[t0], %[lo] \n" ++ "daddu %[t1], %[t1], %[carry] \n" ++ "sltu %[t], %[t1], %[carry] \n" ++ "daddu %[t1], %[t1], %[hi] \n" ++ "sltu %[carry], %[t1], %[hi] \n" ++ "or %[carry], %[carry], %[t] \n" ++ "daddu %[t2], %[t2], %[carry] \n" ++ : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"=&r"(carry), [t]"=&r"(t) ++ : [A]"r"(A), [B]"r"(B) ++ : ++ ); ++} ++ ++// Fast Montgomery multiplication. The derivation of the algorithm is ++// in A Cryptographic Library for the Motorola DSP56000, ++// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. ++ ++static void __attribute__((noinline)) ++montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[], ++ unsigned long m[], unsigned long inv, int len) { ++ unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator ++ int i; ++ ++ assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); ++ ++ for (i = 0; i < len; i++) { ++ int j; ++ for (j = 0; j < i; j++) { ++ MACC(a[j], b[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ MACC(a[i], b[0], t0, t1, t2); ++ m[i] = t0 * inv; ++ MACC(m[i], n[0], t0, t1, t2); ++ ++ assert(t0 == 0, "broken Montgomery multiply"); ++ ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ for (i = len; i < 2*len; i++) { ++ int j; ++ for (j = i-len+1; j < len; j++) { ++ MACC(a[j], b[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ m[i-len] = t0; ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ while (t0) ++ t0 = sub(m, n, t0, len); ++} ++ ++// Fast Montgomery squaring. This uses asymptotically 25% fewer ++// multiplies so it should be up to 25% faster than Montgomery ++// multiplication. However, its loop control is more complex and it ++// may actually run slower on some machines. ++ ++static void __attribute__((noinline)) ++montgomery_square(unsigned long a[], unsigned long n[], ++ unsigned long m[], unsigned long inv, int len) { ++ unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator ++ int i; ++ ++ assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); ++ ++ for (i = 0; i < len; i++) { ++ int j; ++ int end = (i+1)/2; ++ for (j = 0; j < end; j++) { ++ MACC2(a[j], a[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ if ((i & 1) == 0) { ++ MACC(a[j], a[j], t0, t1, t2); ++ } ++ for (; j < i; j++) { ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ m[i] = t0 * inv; ++ MACC(m[i], n[0], t0, t1, t2); ++ ++ assert(t0 == 0, "broken Montgomery square"); ++ ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ for (i = len; i < 2*len; i++) { ++ int start = i-len+1; ++ int end = start + (len - start)/2; ++ int j; ++ for (j = start; j < end; j++) { ++ MACC2(a[j], a[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ if ((i & 1) == 0) { ++ MACC(a[j], a[j], t0, t1, t2); ++ } ++ for (; j < len; j++) { ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ m[i-len] = t0; ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ while (t0) ++ t0 = sub(m, n, t0, len); ++} ++ ++// Swap words in a longword. ++static unsigned long swap(unsigned long x) { ++ return (x << 32) | (x >> 32); ++} ++ ++// Copy len longwords from s to d, word-swapping as we go. The ++// destination array is reversed. ++static void reverse_words(unsigned long *s, unsigned long *d, int len) { ++ d += len; ++ while(len-- > 0) { ++ d--; ++ *d = swap(*s); ++ s++; ++ } ++} ++ ++// The threshold at which squaring is advantageous was determined ++// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. ++// Doesn't seem to be relevant for MIPS64 so we use the same value. ++#define MONTGOMERY_SQUARING_THRESHOLD 64 ++ ++void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, ++ jint len, jlong inv, ++ jint *m_ints) { ++ assert(len % 2 == 0, "array length in montgomery_multiply must be even"); ++ int longwords = len/2; ++ ++ // Make very sure we don't use so much space that the stack might ++ // overflow. 512 jints corresponds to an 16384-bit integer and ++ // will use here a total of 8k bytes of stack space. ++ int total_allocation = longwords * sizeof (unsigned long) * 4; ++ guarantee(total_allocation <= 8192, "must be"); ++ unsigned long *scratch = (unsigned long *)alloca(total_allocation); ++ ++ // Local scratch arrays ++ unsigned long ++ *a = scratch + 0 * longwords, ++ *b = scratch + 1 * longwords, ++ *n = scratch + 2 * longwords, ++ *m = scratch + 3 * longwords; ++ ++ reverse_words((unsigned long *)a_ints, a, longwords); ++ reverse_words((unsigned long *)b_ints, b, longwords); ++ reverse_words((unsigned long *)n_ints, n, longwords); ++ ++ ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords); ++ ++ reverse_words(m, (unsigned long *)m_ints, longwords); ++} ++ ++void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, ++ jint len, jlong inv, ++ jint *m_ints) { ++ assert(len % 2 == 0, "array length in montgomery_square must be even"); ++ int longwords = len/2; ++ ++ // Make very sure we don't use so much space that the stack might ++ // overflow. 512 jints corresponds to an 16384-bit integer and ++ // will use here a total of 6k bytes of stack space. ++ int total_allocation = longwords * sizeof (unsigned long) * 3; ++ guarantee(total_allocation <= 8192, "must be"); ++ unsigned long *scratch = (unsigned long *)alloca(total_allocation); ++ ++ // Local scratch arrays ++ unsigned long ++ *a = scratch + 0 * longwords, ++ *n = scratch + 1 * longwords, ++ *m = scratch + 2 * longwords; ++ ++ reverse_words((unsigned long *)a_ints, a, longwords); ++ reverse_words((unsigned long *)n_ints, n, longwords); ++ ++ if (len >= MONTGOMERY_SQUARING_THRESHOLD) { ++ ::montgomery_square(a, n, m, (unsigned long)inv, longwords); ++ } else { ++ ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords); ++ } ++ ++ reverse_words(m, (unsigned long *)m_ints, longwords); ++} ++ ++#ifdef COMPILER2 ++RuntimeStub* SharedRuntime::make_native_invoker(address call_target, ++ int shadow_space_bytes, ++ const GrowableArray& input_registers, ++ const GrowableArray& output_registers) { ++ Unimplemented(); ++ return nullptr; ++} ++#endif +diff --git a/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp b/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp +new file mode 100644 +index 00000000000..e894a302b50 +--- /dev/null ++++ b/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp +@@ -0,0 +1,2725 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "compiler/oopMap.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/instanceOop.hpp" ++#include "oops/method.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++// Declaration and definition of StubGenerator (no .hpp file). ++// For a more detailed description of the stub routine structure ++// see the comment in stubRoutines.hpp ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) ++//#define a__ ((Assembler*)_masm)-> ++ ++//#ifdef PRODUCT ++//#define BLOCK_COMMENT(str) /* nothing */ ++//#else ++//#define BLOCK_COMMENT(str) __ block_comment(str) ++//#endif ++ ++//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions ++ ++// Stub Code definitions ++ ++class StubGenerator: public StubCodeGenerator { ++ private: ++ ++ // ABI mips n64 ++ // This fig is not MIPS ABI. It is call Java from C ABI. ++ // Call stubs are used to call Java from C ++ // ++ // [ return_from_Java ] ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ // ... ++ // -8 [ S6 ] ++ // -7 [ S5 ] ++ // -6 [ S4 ] ++ // -5 [ S3 ] ++ // -4 [ S1 ] ++ // -3 [ TSR(S2) ] ++ // -2 [ LVP(S7) ] ++ // -1 [ BCP(S1) ] ++ // 0 [ saved fp ] <--- fp_after_call ++ // 1 [ return address ] ++ // 2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp ++ // 3 [ result ] <--- a1 ++ // 4 [ result_type ] <--- a2 ++ // 5 [ method ] <--- a3 ++ // 6 [ entry_point ] <--- a4 ++ // 7 [ parameters ] <--- a5 ++ // 8 [ parameter_size ] <--- a6 ++ // 9 [ thread ] <--- a7 ++ ++ // ++ // n64 does not save paras in sp. ++ // ++ // [ return_from_Java ] ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ // ... ++ //-13 [ thread ] ++ //-12 [ result_type ] <--- a2 ++ //-11 [ result ] <--- a1 ++ //-10 [ ] ++ // -9 [ ptr. to call wrapper ] <--- a0 ++ // -8 [ S6 ] ++ // -7 [ S5 ] ++ // -6 [ S4 ] ++ // -5 [ S3 ] ++ // -4 [ S1 ] ++ // -3 [ TSR(S2) ] ++ // -2 [ LVP(S7) ] ++ // -1 [ BCP(S1) ] ++ // 0 [ saved fp ] <--- fp_after_call ++ // 1 [ return address ] ++ // 2 [ ] <--- old sp ++ // ++ // Find a right place in the call_stub for GP. ++ // GP will point to the starting point of Interpreter::dispatch_table(itos). ++ // It should be saved/restored before/after Java calls. ++ // ++ enum call_stub_layout { ++ RA_off = 1, ++ FP_off = 0, ++ BCP_off = -1, ++ LVP_off = -2, ++ TSR_off = -3, ++ S1_off = -4, ++ S3_off = -5, ++ S4_off = -6, ++ S5_off = -7, ++ S6_off = -8, ++ call_wrapper_off = -9, ++ result_off = -11, ++ result_type_off = -12, ++ thread_off = -13, ++ total_off = thread_off - 1, ++ GP_off = -14, ++ }; ++ ++ address generate_call_stub(address& return_address) { ++ ++ StubCodeMark mark(this, "StubRoutines", "call_stub"); ++ address start = __ pc(); ++ ++ // same as in generate_catch_exception()! ++ ++ // stub code ++ // save ra and fp ++ __ enter(); ++ // I think 14 is the max gap between argument and callee saved register ++ assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code"); ++ __ daddiu(SP, SP, total_off * wordSize); ++ __ sd(BCP, FP, BCP_off * wordSize); ++ __ sd(LVP, FP, LVP_off * wordSize); ++ __ sd(TSR, FP, TSR_off * wordSize); ++ __ sd(S1, FP, S1_off * wordSize); ++ __ sd(S3, FP, S3_off * wordSize); ++ __ sd(S4, FP, S4_off * wordSize); ++ __ sd(S5, FP, S5_off * wordSize); ++ __ sd(S6, FP, S6_off * wordSize); ++ __ sd(A0, FP, call_wrapper_off * wordSize); ++ __ sd(A1, FP, result_off * wordSize); ++ __ sd(A2, FP, result_type_off * wordSize); ++ __ sd(A7, FP, thread_off * wordSize); ++ __ sd(GP, FP, GP_off * wordSize); ++ ++ __ set64(GP, (long)Interpreter::dispatch_table(itos)); ++ ++#ifdef OPT_THREAD ++ __ move(TREG, A7); ++#endif ++ //add for compressedoops ++ __ reinit_heapbase(); ++ ++#ifdef ASSERT ++ // make sure we have no pending exceptions ++ { ++ Label L; ++ __ ld(AT, A7, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ /* FIXME: I do not know how to realize stop in mips arch, do it in the future */ ++ __ stop("StubRoutines::call_stub: entered with pending exception"); ++ __ bind(L); ++ } ++#endif ++ ++ // pass parameters if any ++ // A5: parameter ++ // A6: parameter_size ++ // T0: parameter_size_tmp(--) ++ // T2: offset(++) ++ // T3: tmp ++ Label parameters_done; ++ // judge if the parameter_size equals 0 ++ __ beq(A6, R0, parameters_done); ++ __ delayed()->nop(); ++ __ dsll(AT, A6, Interpreter::logStackElementSize); ++ __ dsubu(SP, SP, AT); ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ dins(SP, R0, 0, 4); ++ // Copy Java parameters in reverse order (receiver last) ++ // Note that the argument order is inverted in the process ++ Label loop; ++ __ move(T0, A6); ++ __ move(T2, R0); ++ __ bind(loop); ++ ++ // get parameter ++ __ dsll(T3, T0, LogBytesPerWord); ++ __ daddu(T3, T3, A5); ++ __ ld(AT, T3, -wordSize); ++ __ dsll(T3, T2, LogBytesPerWord); ++ __ daddu(T3, T3, SP); ++ __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0)); ++ __ daddiu(T2, T2, 1); ++ __ daddiu(T0, T0, -1); ++ __ bne(T0, R0, loop); ++ __ delayed()->nop(); ++ // advance to next parameter ++ ++ // call Java function ++ __ bind(parameters_done); ++ ++ // receiver in V0, Method* in Rmethod ++ ++ __ move(Rmethod, A3); ++ __ move(Rsender, SP); //set sender sp ++ __ jalr(A4); ++ __ delayed()->nop(); ++ return_address = __ pc(); ++ ++ Label common_return; ++ __ bind(common_return); ++ ++ // store result depending on type ++ // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) ++ __ ld(T0, FP, result_off * wordSize); // result --> T0 ++ Label is_long, is_float, is_double, exit; ++ __ ld(T2, FP, result_type_off * wordSize); // result_type --> T2 ++ __ daddiu(T3, T2, (-1) * T_LONG); ++ __ beq(T3, R0, is_long); ++ __ delayed()->daddiu(T3, T2, (-1) * T_FLOAT); ++ __ beq(T3, R0, is_float); ++ __ delayed()->daddiu(T3, T2, (-1) * T_DOUBLE); ++ __ beq(T3, R0, is_double); ++ __ delayed()->nop(); ++ ++ // handle T_INT case ++ __ sd(V0, T0, 0 * wordSize); ++ __ bind(exit); ++ ++ // restore ++ __ ld(BCP, FP, BCP_off * wordSize); ++ __ ld(LVP, FP, LVP_off * wordSize); ++ __ ld(GP, FP, GP_off * wordSize); ++ __ ld(TSR, FP, TSR_off * wordSize); ++ ++ __ ld(S1, FP, S1_off * wordSize); ++ __ ld(S3, FP, S3_off * wordSize); ++ __ ld(S4, FP, S4_off * wordSize); ++ __ ld(S5, FP, S5_off * wordSize); ++ __ ld(S6, FP, S6_off * wordSize); ++ ++ __ leave(); ++ ++ // return ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ // handle return types different from T_INT ++ __ bind(is_long); ++ __ sd(V0, T0, 0 * wordSize); ++ __ b(exit); ++ __ delayed()->nop(); ++ ++ __ bind(is_float); ++ __ swc1(F0, T0, 0 * wordSize); ++ __ b(exit); ++ __ delayed()->nop(); ++ ++ __ bind(is_double); ++ __ sdc1(F0, T0, 0 * wordSize); ++ __ b(exit); ++ __ delayed()->nop(); ++ //FIXME, 1.6 mips version add operation of fpu here ++ StubRoutines::gs2::set_call_stub_compiled_return(__ pc()); ++ __ b(common_return); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Return point for a Java call if there's an exception thrown in ++ // Java code. The exception is caught and transformed into a ++ // pending exception stored in JavaThread that can be tested from ++ // within the VM. ++ // ++ // Note: Usually the parameters are removed by the callee. In case ++ // of an exception crossing an activation frame boundary, that is ++ // not the case if the callee is compiled code => need to setup the ++ // sp. ++ // ++ // V0: exception oop ++ ++ address generate_catch_exception() { ++ StubCodeMark mark(this, "StubRoutines", "catch_exception"); ++ address start = __ pc(); ++ ++ Register thread = TREG; ++ ++ // get thread directly ++#ifndef OPT_THREAD ++ __ ld(thread, FP, thread_off * wordSize); ++#endif ++ ++#ifdef ASSERT ++ // verify that threads correspond ++ { Label L; ++ __ get_thread(T8); ++ __ beq(T8, thread, L); ++ __ delayed()->nop(); ++ __ stop("StubRoutines::catch_exception: threads must correspond"); ++ __ bind(L); ++ } ++#endif ++ // set pending exception ++ __ verify_oop(V0); ++ __ sd(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ li(AT, (long)__FILE__); ++ __ sd(AT, thread, in_bytes(Thread::exception_file_offset ())); ++ __ li(AT, (long)__LINE__); ++ __ sd(AT, thread, in_bytes(Thread::exception_line_offset ())); ++ ++ // complete return to VM ++ assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); ++ __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Continuation point for runtime calls returning with a pending ++ // exception. The pending exception check happened in the runtime ++ // or native call stub. The pending exception in Thread is ++ // converted into a Java-level exception. ++ // ++ // Contract with Java-level exception handlers: ++ // V0: exception ++ // V1: throwing pc ++ // ++ // NOTE: At entry of this stub, exception-pc must be on stack !! ++ ++ address generate_forward_exception() { ++ StubCodeMark mark(this, "StubRoutines", "forward exception"); ++ //Register thread = TREG; ++ Register thread = TREG; ++ address start = __ pc(); ++ ++ // Upon entry, the sp points to the return address returning into ++ // Java (interpreted or compiled) code; i.e., the return address ++ // throwing pc. ++ // ++ // Arguments pushed before the runtime call are still on the stack ++ // but the exception handler will reset the stack pointer -> ++ // ignore them. A potential result in registers can be ignored as ++ // well. ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++#ifdef ASSERT ++ // make sure this code is only executed if there is a pending exception ++ { ++ Label L; ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("StubRoutines::forward exception: no pending exception (1)"); ++ __ bind(L); ++ } ++#endif ++ ++ // compute exception handler into T9 ++ __ ld(A1, SP, 0); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); ++ __ move(T9, V0); ++ __ pop(V1); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); ++ ++#ifdef ASSERT ++ // make sure exception is set ++ { ++ Label L; ++ __ bne(V0, R0, L); ++ __ delayed()->nop(); ++ __ stop("StubRoutines::forward exception: no pending exception (2)"); ++ __ bind(L); ++ } ++#endif ++ ++ // continue at exception handler (return address removed) ++ // V0: exception ++ // T9: exception handler ++ // V1: throwing pc ++ __ verify_oop(V0); ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Non-destructive plausibility checks for oops ++ // ++ address generate_verify_oop() { ++ StubCodeMark mark(this, "StubRoutines", "verify_oop"); ++ address start = __ pc(); ++ __ reinit_heapbase(); ++ __ verify_oop_subroutine(); ++ address end = __ pc(); ++ return start; ++ } ++ ++ // ++ // Generate overlap test for array copy stubs ++ // ++ // Input: ++ // A0 - array1 ++ // A1 - array2 ++ // A2 - element count ++ // ++ ++ // use T9 as temp ++ void array_overlap_test(address no_overlap_target, int log2_elem_size) { ++ int elem_size = 1 << log2_elem_size; ++ Address::ScaleFactor sf = Address::times_1; ++ ++ switch (log2_elem_size) { ++ case 0: sf = Address::times_1; break; ++ case 1: sf = Address::times_2; break; ++ case 2: sf = Address::times_4; break; ++ case 3: sf = Address::times_8; break; ++ } ++ ++ __ dsll(AT, A2, sf); ++ __ daddu(AT, AT, A0); ++ __ daddiu(T9, AT, -elem_size); ++ __ dsubu(AT, A1, A0); ++ __ blez(AT, no_overlap_target); ++ __ delayed()->nop(); ++ __ dsubu(AT, A1, T9); ++ __ bgtz(AT, no_overlap_target); ++ __ delayed()->nop(); ++ ++ // If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target ++ Label L; ++ __ bgez(A0, L); ++ __ delayed()->nop(); ++ __ bgtz(A1, no_overlap_target); ++ __ delayed()->nop(); ++ __ bind(L); ++ ++ } ++ ++ // ++ // Generate stub for array fill. If "aligned" is true, the ++ // "to" address is assumed to be heapword aligned. ++ // ++ // Arguments for generated stub: ++ // to: c_rarg0 ++ // value: c_rarg1 ++ // count: c_rarg2 treated as signed ++ // ++ address generate_fill(BasicType t, bool aligned, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ const Register to = A0; // source array address ++ const Register value = A1; // value ++ const Register count = A2; // elements count ++ ++ const Register cnt_words = T8; // temp register ++ ++ __ enter(); ++ ++ Label L_fill_elements, L_exit1; ++ ++ int shift = -1; ++ switch (t) { ++ case T_BYTE: ++ shift = 0; ++ __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element ++ __ dins(value, value, 8, 8); // 8 bit -> 16 bit ++ __ dins(value, value, 16, 16); // 16 bit -> 32 bit ++ __ bne(AT, R0, L_fill_elements); ++ __ delayed()->nop(); ++ break; ++ case T_SHORT: ++ shift = 1; ++ __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element ++ __ dins(value, value, 16, 16); // 16 bit -> 32 bit ++ __ bne(AT, R0, L_fill_elements); ++ __ delayed()->nop(); ++ break; ++ case T_INT: ++ shift = 2; ++ __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element ++ __ bne(AT, R0, L_fill_elements); ++ __ delayed()->nop(); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ // Align source address at 8 bytes address boundary. ++ Label L_skip_align1, L_skip_align2, L_skip_align4; ++ if (!aligned) { ++ switch (t) { ++ case T_BYTE: ++ // One byte misalignment happens only for byte arrays. ++ __ andi(AT, to, 1); ++ __ beq(AT, R0, L_skip_align1); ++ __ delayed()->nop(); ++ __ sb(value, to, 0); ++ __ daddiu(to, to, 1); ++ __ addiu32(count, count, -1); ++ __ bind(L_skip_align1); ++ // Fallthrough ++ case T_SHORT: ++ // Two bytes misalignment happens only for byte and short (char) arrays. ++ __ andi(AT, to, 1 << 1); ++ __ beq(AT, R0, L_skip_align2); ++ __ delayed()->nop(); ++ __ sh(value, to, 0); ++ __ daddiu(to, to, 2); ++ __ addiu32(count, count, -(2 >> shift)); ++ __ bind(L_skip_align2); ++ // Fallthrough ++ case T_INT: ++ // Align to 8 bytes, we know we are 4 byte aligned to start. ++ __ andi(AT, to, 1 << 2); ++ __ beq(AT, R0, L_skip_align4); ++ __ delayed()->nop(); ++ __ sw(value, to, 0); ++ __ daddiu(to, to, 4); ++ __ addiu32(count, count, -(4 >> shift)); ++ __ bind(L_skip_align4); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ ++ // ++ // Fill large chunks ++ // ++ __ srl(cnt_words, count, 3 - shift); // number of words ++ __ dinsu(value, value, 32, 32); // 32 bit -> 64 bit ++ __ sll(AT, cnt_words, 3 - shift); ++ __ subu32(count, count, AT); ++ ++ Label L_loop_begin, L_loop_not_64bytes_fill, L_loop_end; ++ __ addiu32(AT, cnt_words, -8); ++ __ bltz(AT, L_loop_not_64bytes_fill); ++ __ delayed()->nop(); ++ __ bind(L_loop_begin); ++ __ sd(value, to, 0); ++ __ sd(value, to, 8); ++ __ sd(value, to, 16); ++ __ sd(value, to, 24); ++ __ sd(value, to, 32); ++ __ sd(value, to, 40); ++ __ sd(value, to, 48); ++ __ sd(value, to, 56); ++ __ daddiu(to, to, 64); ++ __ addiu32(cnt_words, cnt_words, -8); ++ __ addiu32(AT, cnt_words, -8); ++ __ bgez(AT, L_loop_begin); ++ __ delayed()->nop(); ++ ++ __ bind(L_loop_not_64bytes_fill); ++ __ beq(cnt_words, R0, L_loop_end); ++ __ delayed()->nop(); ++ __ sd(value, to, 0); ++ __ daddiu(to, to, 8); ++ __ addiu32(cnt_words, cnt_words, -1); ++ __ b(L_loop_not_64bytes_fill); ++ __ delayed()->nop(); ++ __ bind(L_loop_end); ++ ++ // Remaining count is less than 8 bytes. Fill it by a single store. ++ // Note that the total length is no less than 8 bytes. ++ if (t == T_BYTE || t == T_SHORT) { ++ Label L_exit1; ++ __ beq(count, R0, L_exit1); ++ __ delayed()->nop(); ++ __ sll(AT, count, shift); ++ __ daddu(to, to, AT); // points to the end ++ __ sd(value, to, -8); // overwrite some elements ++ __ bind(L_exit1); ++ __ leave(); ++ __ jr(RA); ++ __ delayed()->nop(); ++ } ++ ++ // Handle copies less than 8 bytes. ++ Label L_fill_2, L_fill_4, L_exit2; ++ __ bind(L_fill_elements); ++ switch (t) { ++ case T_BYTE: ++ __ andi(AT, count, 1); ++ __ beq(AT, R0, L_fill_2); ++ __ delayed()->nop(); ++ __ sb(value, to, 0); ++ __ daddiu(to, to, 1); ++ __ bind(L_fill_2); ++ __ andi(AT, count, 1 << 1); ++ __ beq(AT, R0, L_fill_4); ++ __ delayed()->nop(); ++ __ sh(value, to, 0); ++ __ daddiu(to, to, 2); ++ __ bind(L_fill_4); ++ __ andi(AT, count, 1 << 2); ++ __ beq(AT, R0, L_exit2); ++ __ delayed()->nop(); ++ __ sw(value, to, 0); ++ break; ++ case T_SHORT: ++ __ andi(AT, count, 1); ++ __ beq(AT, R0, L_fill_4); ++ __ delayed()->nop(); ++ __ sh(value, to, 0); ++ __ daddiu(to, to, 2); ++ __ bind(L_fill_4); ++ __ andi(AT, count, 1 << 1); ++ __ beq(AT, R0, L_exit2); ++ __ delayed()->nop(); ++ __ sw(value, to, 0); ++ break; ++ case T_INT: ++ __ beq(count, R0, L_exit2); ++ __ delayed()->nop(); ++ __ sw(value, to, 0); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ __ bind(L_exit2); ++ __ leave(); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_byte_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_byte_copy(). ++ // ++ address generate_disjoint_byte_copy(bool aligned, const char * name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ ++ ++ Register tmp1 = T0; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ ++ address start = __ pc(); ++ ++ __ push(tmp1); ++ __ push(tmp2); ++ __ push(tmp3); ++ __ move(tmp1, A0); ++ __ move(tmp2, A1); ++ __ move(tmp3, A2); ++ ++ ++ Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11; ++ Label l_debug; ++ ++ __ daddiu(AT, tmp3, -9); //why the number is 9 ? ++ __ blez(AT, l_9); ++ __ delayed()->nop(); ++ ++ if (!aligned) { ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 1); ++ __ bne(AT, R0, l_9); // if arrays don't have the same alignment mod 2, do 1 element copy ++ __ delayed()->nop(); ++ ++ __ andi(AT, tmp1, 1); ++ __ beq(AT, R0, l_10); //copy 1 enlement if necessary to aligh to 2 bytes ++ __ delayed()->nop(); ++ ++ __ lb(AT, tmp1, 0); ++ __ daddiu(tmp1, tmp1, 1); ++ __ sb(AT, tmp2, 0); ++ __ daddiu(tmp2, tmp2, 1); ++ __ daddiu(tmp3, tmp3, -1); ++ __ bind(l_10); ++ ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 3); ++ __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 2 elements copy ++ __ delayed()->nop(); ++ ++ // At this point it is guaranteed that both, from and to have the same alignment mod 4. ++ ++ // Copy 2 elements if necessary to align to 4 bytes. ++ __ andi(AT, tmp1, 3); ++ __ beq(AT, R0, l_2); ++ __ delayed()->nop(); ++ ++ __ lhu(AT, tmp1, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(tmp3, tmp3, -2); ++ __ bind(l_2); ++ ++ // At this point the positions of both, from and to, are at least 4 byte aligned. ++ ++ // Copy 4 elements at a time. ++ // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 7); ++ __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned ++ __ delayed()->nop(); ++ ++ // Copy a 4 elements if necessary to align to 8 bytes. ++ __ andi(AT, tmp1, 7); ++ __ beq(AT, R0, l_7); ++ __ delayed()->nop(); ++ ++ __ lw(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -4); ++ __ sw(AT, tmp2, 0); ++ { // FasterArrayCopy ++ __ daddiu(tmp1, tmp1, 4); ++ __ daddiu(tmp2, tmp2, 4); ++ } ++ } ++ ++ __ bind(l_7); ++ ++ // Copy 4 elements at a time; either the loads or the stores can ++ // be unaligned if aligned == false. ++ ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -7); ++ __ blez(AT, l_6); // copy 4 at a time if less than 4 elements remain ++ __ delayed()->nop(); ++ ++ __ bind(l_8); ++ // For Loongson, there is 128-bit memory access. TODO ++ __ ld(AT, tmp1, 0); ++ __ sd(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 8); ++ __ daddiu(tmp2, tmp2, 8); ++ __ daddiu(tmp3, tmp3, -8); ++ __ daddiu(AT, tmp3, -8); ++ __ bgez(AT, l_8); ++ __ delayed()->nop(); ++ } ++ __ bind(l_6); ++ ++ // copy 4 bytes at a time ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -3); ++ __ blez(AT, l_1); ++ __ delayed()->nop(); ++ ++ __ bind(l_3); ++ __ lw(AT, tmp1, 0); ++ __ sw(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 4); ++ __ daddiu(tmp2, tmp2, 4); ++ __ daddiu(tmp3, tmp3, -4); ++ __ daddiu(AT, tmp3, -4); ++ __ bgez(AT, l_3); ++ __ delayed()->nop(); ++ ++ } ++ ++ // do 2 bytes copy ++ __ bind(l_1); ++ { ++ __ daddiu(AT, tmp3, -1); ++ __ blez(AT, l_9); ++ __ delayed()->nop(); ++ ++ __ bind(l_5); ++ __ lhu(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -2); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(AT, tmp3, -2); ++ __ bgez(AT, l_5); ++ __ delayed()->nop(); ++ } ++ ++ //do 1 element copy--byte ++ __ bind(l_9); ++ __ beq(R0, tmp3, l_4); ++ __ delayed()->nop(); ++ ++ { ++ __ bind(l_11); ++ __ lb(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -1); ++ __ sb(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 1); ++ __ daddiu(tmp2, tmp2, 1); ++ __ daddiu(AT, tmp3, -1); ++ __ bgez(AT, l_11); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(l_4); ++ __ pop(tmp3); ++ __ pop(tmp2); ++ __ pop(tmp1); ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_byte_copy(bool aligned, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit; ++ Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned; ++ ++ address nooverlap_target = aligned ? ++ StubRoutines::arrayof_jbyte_disjoint_arraycopy() : ++ StubRoutines::jbyte_disjoint_arraycopy(); ++ ++ array_overlap_test(nooverlap_target, 0); ++ ++ const Register from = A0; // source array address ++ const Register to = A1; // destination array address ++ const Register count = A2; // elements count ++ const Register end_from = T3; // source array end address ++ const Register end_to = T0; // destination array end address ++ const Register end_count = T1; // destination array end address ++ ++ __ push(end_from); ++ __ push(end_to); ++ __ push(end_count); ++ __ push(T8); ++ ++ // copy from high to low ++ __ move(end_count, count); ++ __ daddu(end_from, from, end_count); ++ __ daddu(end_to, to, end_count); ++ ++ // If end_from and end_to has differante alignment, unaligned copy is performed. ++ __ andi(AT, end_from, 3); ++ __ andi(T8, end_to, 3); ++ __ bne(AT, T8, l_copy_byte); ++ __ delayed()->nop(); ++ ++ // First deal with the unaligned data at the top. ++ __ bind(l_unaligned); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_from, 3); ++ __ bne(AT, R0, l_from_unaligned); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_to, 3); ++ __ beq(AT, R0, l_4_bytes_aligned); ++ __ delayed()->nop(); ++ ++ __ bind(l_from_unaligned); ++ __ lb(AT, end_from, -1); ++ __ sb(AT, end_to, -1); ++ __ daddiu(end_from, end_from, -1); ++ __ daddiu(end_to, end_to, -1); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_unaligned); ++ __ delayed()->nop(); ++ ++ // now end_to, end_from point to 4-byte aligned high-ends ++ // end_count contains byte count that is not copied. ++ // copy 4 bytes at a time ++ __ bind(l_4_bytes_aligned); ++ ++ __ move(T8, end_count); ++ __ daddiu(AT, end_count, -3); ++ __ blez(AT, l_copy_suffix); ++ __ delayed()->nop(); ++ ++ //__ andi(T8, T8, 3); ++ __ lea(end_from, Address(end_from, -4)); ++ __ lea(end_to, Address(end_to, -4)); ++ ++ __ dsrl(end_count, end_count, 2); ++ __ align(16); ++ __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes ++ __ lw(AT, end_from, 0); ++ __ sw(AT, end_to, 0); ++ __ addiu(end_from, end_from, -4); ++ __ addiu(end_to, end_to, -4); ++ __ addiu(end_count, end_count, -1); ++ __ bne(end_count, R0, l_copy_4_bytes_loop); ++ __ delayed()->nop(); ++ ++ __ b(l_copy_suffix); ++ __ delayed()->nop(); ++ // copy dwords aligned or not with repeat move ++ // l_copy_suffix ++ // copy suffix (0-3 bytes) ++ __ bind(l_copy_suffix); ++ __ andi(T8, T8, 3); ++ __ beq(T8, R0, l_exit); ++ __ delayed()->nop(); ++ __ addiu(end_from, end_from, 3); ++ __ addiu(end_to, end_to, 3); ++ __ bind(l_copy_suffix_loop); ++ __ lb(AT, end_from, 0); ++ __ sb(AT, end_to, 0); ++ __ addiu(end_from, end_from, -1); ++ __ addiu(end_to, end_to, -1); ++ __ addiu(T8, T8, -1); ++ __ bne(T8, R0, l_copy_suffix_loop); ++ __ delayed()->nop(); ++ ++ __ bind(l_copy_byte); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ __ lb(AT, end_from, -1); ++ __ sb(AT, end_to, -1); ++ __ daddiu(end_from, end_from, -1); ++ __ daddiu(end_to, end_to, -1); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_copy_byte); ++ __ delayed()->nop(); ++ ++ __ bind(l_exit); ++ __ pop(T8); ++ __ pop(end_count); ++ __ pop(end_to); ++ __ pop(end_from); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Generate stub for disjoint short copy. If "aligned" is true, the ++ // "from" and "to" addresses are assumed to be heapword aligned. ++ // ++ // Arguments for generated stub: ++ // from: A0 ++ // to: A1 ++ // elm.count: A2 treated as signed ++ // one element: 2 bytes ++ // ++ // Strategy for aligned==true: ++ // ++ // If length <= 9: ++ // 1. copy 1 elements at a time (l_5) ++ // ++ // If length > 9: ++ // 1. copy 4 elements at a time until less than 4 elements are left (l_7) ++ // 2. copy 2 elements at a time until less than 2 elements are left (l_6) ++ // 3. copy last element if one was left in step 2. (l_1) ++ // ++ // ++ // Strategy for aligned==false: ++ // ++ // If length <= 9: same as aligned==true case ++ // ++ // If length > 9: ++ // 1. continue with step 7. if the alignment of from and to mod 4 ++ // is different. ++ // 2. align from and to to 4 bytes by copying 1 element if necessary ++ // 3. at l_2 from and to are 4 byte aligned; continue with ++ // 6. if they cannot be aligned to 8 bytes because they have ++ // got different alignment mod 8. ++ // 4. at this point we know that both, from and to, have the same ++ // alignment mod 8, now copy one element if necessary to get ++ // 8 byte alignment of from and to. ++ // 5. copy 4 elements at a time until less than 4 elements are ++ // left; depending on step 3. all load/stores are aligned. ++ // 6. copy 2 elements at a time until less than 2 elements are ++ // left. (l_6) ++ // 7. copy 1 element at a time. (l_5) ++ // 8. copy last element if one was left in step 6. (l_1) ++ ++ address generate_disjoint_short_copy(bool aligned, const char * name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ ++ Register tmp1 = T0; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T8; ++ Register tmp5 = T9; ++ Register tmp6 = T2; ++ ++ address start = __ pc(); ++ ++ __ push(tmp1); ++ __ push(tmp2); ++ __ push(tmp3); ++ __ move(tmp1, A0); ++ __ move(tmp2, A1); ++ __ move(tmp3, A2); ++ ++ Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11, l_12, l_13, l_14; ++ Label l_debug; ++ // don't try anything fancy if arrays don't have many elements ++ __ daddiu(AT, tmp3, -23); ++ __ blez(AT, l_14); ++ __ delayed()->nop(); ++ // move push here ++ __ push(tmp4); ++ __ push(tmp5); ++ __ push(tmp6); ++ ++ if (!aligned) { ++ __ xorr(AT, A0, A1); ++ __ andi(AT, AT, 1); ++ __ bne(AT, R0, l_debug); // if arrays don't have the same alignment mod 2, can this happen? ++ __ delayed()->nop(); ++ ++ __ xorr(AT, A0, A1); ++ __ andi(AT, AT, 3); ++ __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 1 element copy ++ __ delayed()->nop(); ++ ++ // At this point it is guaranteed that both, from and to have the same alignment mod 4. ++ ++ // Copy 1 element if necessary to align to 4 bytes. ++ __ andi(AT, A0, 3); ++ __ beq(AT, R0, l_2); ++ __ delayed()->nop(); ++ ++ __ lhu(AT, tmp1, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(tmp3, tmp3, -1); ++ __ bind(l_2); ++ ++ // At this point the positions of both, from and to, are at least 4 byte aligned. ++ ++ // Copy 4 elements at a time. ++ // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 7); ++ __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned ++ __ delayed()->nop(); ++ ++ // Copy a 2-element word if necessary to align to 8 bytes. ++ __ andi(AT, tmp1, 7); ++ __ beq(AT, R0, l_7); ++ __ delayed()->nop(); ++ ++ __ lw(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -2); ++ __ sw(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 4); ++ __ daddiu(tmp2, tmp2, 4); ++ }// end of if (!aligned) ++ ++ __ bind(l_7); ++ // At this time the position of both, from and to, are at least 8 byte aligned. ++ // Copy 8 elemnets at a time. ++ // Align to 16 bytes, but only if both from and to have same alignment mod 8. ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 15); ++ __ bne(AT, R0, l_9); ++ __ delayed()->nop(); ++ ++ // Copy 4-element word if necessary to align to 16 bytes, ++ __ andi(AT, tmp1, 15); ++ __ beq(AT, R0, l_10); ++ __ delayed()->nop(); ++ ++ __ ld(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -4); ++ __ sd(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 8); ++ __ daddiu(tmp2, tmp2, 8); ++ ++ __ bind(l_10); ++ ++ // Copy 8 elements at a time; either the loads or the stores can ++ // be unalligned if aligned == false ++ ++ { // FasterArrayCopy ++ __ bind(l_11); ++ // For loongson the 128-bit memory access instruction is gslq/gssq ++ if (UseLEXT1) { ++ __ gslq(AT, tmp4, tmp1, 0); ++ __ gslq(tmp5, tmp6, tmp1, 16); ++ __ daddiu(tmp1, tmp1, 32); ++ __ daddiu(tmp2, tmp2, 32); ++ __ gssq(AT, tmp4, tmp2, -32); ++ __ gssq(tmp5, tmp6, tmp2, -16); ++ } else { ++ __ ld(AT, tmp1, 0); ++ __ ld(tmp4, tmp1, 8); ++ __ ld(tmp5, tmp1, 16); ++ __ ld(tmp6, tmp1, 24); ++ __ daddiu(tmp1, tmp1, 32); ++ __ sd(AT, tmp2, 0); ++ __ sd(tmp4, tmp2, 8); ++ __ sd(tmp5, tmp2, 16); ++ __ sd(tmp6, tmp2, 24); ++ __ daddiu(tmp2, tmp2, 32); ++ } ++ __ daddiu(tmp3, tmp3, -16); ++ __ daddiu(AT, tmp3, -16); ++ __ bgez(AT, l_11); ++ __ delayed()->nop(); ++ } ++ __ bind(l_9); ++ ++ // Copy 4 elements at a time; either the loads or the stores can ++ // be unaligned if aligned == false. ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -15);// loop unrolling 4 times, so if the elements should not be less than 16 ++ __ blez(AT, l_4); // copy 2 at a time if less than 16 elements remain ++ __ delayed()->nop(); ++ ++ __ bind(l_8); ++ __ ld(AT, tmp1, 0); ++ __ ld(tmp4, tmp1, 8); ++ __ ld(tmp5, tmp1, 16); ++ __ ld(tmp6, tmp1, 24); ++ __ sd(AT, tmp2, 0); ++ __ sd(tmp4, tmp2, 8); ++ __ sd(tmp5, tmp2,16); ++ __ daddiu(tmp1, tmp1, 32); ++ __ daddiu(tmp2, tmp2, 32); ++ __ daddiu(tmp3, tmp3, -16); ++ __ daddiu(AT, tmp3, -16); ++ __ bgez(AT, l_8); ++ __ delayed()->sd(tmp6, tmp2, -8); ++ } ++ __ bind(l_6); ++ ++ // copy 2 element at a time ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -7); ++ __ blez(AT, l_4); ++ __ delayed()->nop(); ++ ++ __ bind(l_3); ++ __ lw(AT, tmp1, 0); ++ __ lw(tmp4, tmp1, 4); ++ __ lw(tmp5, tmp1, 8); ++ __ lw(tmp6, tmp1, 12); ++ __ sw(AT, tmp2, 0); ++ __ sw(tmp4, tmp2, 4); ++ __ sw(tmp5, tmp2, 8); ++ __ daddiu(tmp1, tmp1, 16); ++ __ daddiu(tmp2, tmp2, 16); ++ __ daddiu(tmp3, tmp3, -8); ++ __ daddiu(AT, tmp3, -8); ++ __ bgez(AT, l_3); ++ __ delayed()->sw(tmp6, tmp2, -4); ++ } ++ ++ __ bind(l_1); ++ // do single element copy (8 bit), can this happen? ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -3); ++ __ blez(AT, l_4); ++ __ delayed()->nop(); ++ ++ __ bind(l_5); ++ __ lhu(AT, tmp1, 0); ++ __ lhu(tmp4, tmp1, 2); ++ __ lhu(tmp5, tmp1, 4); ++ __ lhu(tmp6, tmp1, 6); ++ __ sh(AT, tmp2, 0); ++ __ sh(tmp4, tmp2, 2); ++ __ sh(tmp5, tmp2, 4); ++ __ daddiu(tmp1, tmp1, 8); ++ __ daddiu(tmp2, tmp2, 8); ++ __ daddiu(tmp3, tmp3, -4); ++ __ daddiu(AT, tmp3, -4); ++ __ bgez(AT, l_5); ++ __ delayed()->sh(tmp6, tmp2, -2); ++ } ++ // single element ++ __ bind(l_4); ++ ++ __ pop(tmp6); ++ __ pop(tmp5); ++ __ pop(tmp4); ++ ++ __ bind(l_14); ++ { // FasterArrayCopy ++ __ beq(R0, tmp3, l_13); ++ __ delayed()->nop(); ++ ++ __ bind(l_12); ++ __ lhu(AT, tmp1, 0); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(tmp3, tmp3, -1); ++ __ daddiu(AT, tmp3, -1); ++ __ bgez(AT, l_12); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(l_13); ++ __ pop(tmp3); ++ __ pop(tmp2); ++ __ pop(tmp1); ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ __ bind(l_debug); ++ __ stop("generate_disjoint_short_copy should not reach here"); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we ++ // let the hardware handle it. The two or four words within dwords ++ // or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_short_copy(bool aligned, const char *name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ Label l_exit, l_copy_short, l_from_unaligned, l_unaligned, l_4_bytes_aligned; ++ ++ address nooverlap_target = aligned ? ++ StubRoutines::arrayof_jshort_disjoint_arraycopy() : ++ StubRoutines::jshort_disjoint_arraycopy(); ++ ++ array_overlap_test(nooverlap_target, 1); ++ ++ const Register from = A0; // source array address ++ const Register to = A1; // destination array address ++ const Register count = A2; // elements count ++ const Register end_from = T3; // source array end address ++ const Register end_to = T0; // destination array end address ++ const Register end_count = T1; // destination array end address ++ ++ __ push(end_from); ++ __ push(end_to); ++ __ push(end_count); ++ __ push(T8); ++ ++ // copy from high to low ++ __ move(end_count, count); ++ __ sll(AT, end_count, Address::times_2); ++ __ daddu(end_from, from, AT); ++ __ daddu(end_to, to, AT); ++ ++ // If end_from and end_to has differante alignment, unaligned copy is performed. ++ __ andi(AT, end_from, 3); ++ __ andi(T8, end_to, 3); ++ __ bne(AT, T8, l_copy_short); ++ __ delayed()->nop(); ++ ++ // First deal with the unaligned data at the top. ++ __ bind(l_unaligned); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_from, 3); ++ __ bne(AT, R0, l_from_unaligned); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_to, 3); ++ __ beq(AT, R0, l_4_bytes_aligned); ++ __ delayed()->nop(); ++ ++ // Copy 1 element if necessary to align to 4 bytes. ++ __ bind(l_from_unaligned); ++ __ lhu(AT, end_from, -2); ++ __ sh(AT, end_to, -2); ++ __ daddiu(end_from, end_from, -2); ++ __ daddiu(end_to, end_to, -2); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_unaligned); ++ __ delayed()->nop(); ++ ++ // now end_to, end_from point to 4-byte aligned high-ends ++ // end_count contains byte count that is not copied. ++ // copy 4 bytes at a time ++ __ bind(l_4_bytes_aligned); ++ ++ __ daddiu(AT, end_count, -1); ++ __ blez(AT, l_copy_short); ++ __ delayed()->nop(); ++ ++ __ lw(AT, end_from, -4); ++ __ sw(AT, end_to, -4); ++ __ addiu(end_from, end_from, -4); ++ __ addiu(end_to, end_to, -4); ++ __ addiu(end_count, end_count, -2); ++ __ b(l_4_bytes_aligned); ++ __ delayed()->nop(); ++ ++ // copy 1 element at a time ++ __ bind(l_copy_short); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ __ lhu(AT, end_from, -2); ++ __ sh(AT, end_to, -2); ++ __ daddiu(end_from, end_from, -2); ++ __ daddiu(end_to, end_to, -2); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_copy_short); ++ __ delayed()->nop(); ++ ++ __ bind(l_exit); ++ __ pop(T8); ++ __ pop(end_count); ++ __ pop(end_to); ++ __ pop(end_from); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_3, l_4, l_5, l_6, l_7; ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); ++ ++ if(!aligned) { ++ __ xorr(AT, T3, T0); ++ __ andi(AT, AT, 7); ++ __ bne(AT, R0, l_5); // not same alignment mod 8 -> copy 1 element each time ++ __ delayed()->nop(); ++ ++ __ andi(AT, T3, 7); ++ __ beq(AT, R0, l_6); //copy 2 elements each time ++ __ delayed()->nop(); ++ ++ __ lw(AT, T3, 0); ++ __ daddiu(T1, T1, -1); ++ __ sw(AT, T0, 0); ++ __ daddiu(T3, T3, 4); ++ __ daddiu(T0, T0, 4); ++ } ++ ++ { ++ __ bind(l_6); ++ __ daddiu(AT, T1, -1); ++ __ blez(AT, l_5); ++ __ delayed()->nop(); ++ ++ __ bind(l_7); ++ __ ld(AT, T3, 0); ++ __ sd(AT, T0, 0); ++ __ daddiu(T3, T3, 8); ++ __ daddiu(T0, T0, 8); ++ __ daddiu(T1, T1, -2); ++ __ daddiu(AT, T1, -2); ++ __ bgez(AT, l_7); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(l_5); ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_3); ++ __ lw(AT, T3, 0); ++ __ sw(AT, T0, 0); ++ __ addiu(T3, T3, 4); ++ __ addiu(T0, T0, 4); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_3); ++ __ delayed()->nop(); ++ ++ // exit ++ __ bind(l_4); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_2, l_4; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ address nooverlap_target; ++ ++ if (is_oop) { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_oop_disjoint_arraycopy() : ++ StubRoutines::oop_disjoint_arraycopy(); ++ } else { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_jint_disjoint_arraycopy() : ++ StubRoutines::jint_disjoint_arraycopy(); ++ } ++ ++ array_overlap_test(nooverlap_target, 2); ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ // no registers are destroyed by this call ++ bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ // T3: source array address ++ // T0: destination array address ++ // T1: element count ++ ++ __ sll(AT, T1, Address::times_4); ++ __ addu(AT, T3, AT); ++ __ daddiu(T3, AT, -4); ++ __ sll(AT, T1, Address::times_4); ++ __ addu(AT, T0, AT); ++ __ daddiu(T0, AT, -4); ++ ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_2); ++ __ lw(AT, T3, 0); ++ __ sw(AT, T0, 0); ++ __ addiu(T3, T3, -4); ++ __ addiu(T0, T0, -4); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_2); ++ __ delayed()->nop(); ++ ++ __ bind(l_4); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_3, l_4; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ // T3: source array address ++ // T0: destination array address ++ // T1: element count ++ ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_3); ++ __ ld(AT, T3, 0); ++ __ sd(AT, T0, 0); ++ __ addiu(T3, T3, 8); ++ __ addiu(T0, T0, 8); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_3); ++ __ delayed()->nop(); ++ ++ // exit ++ __ bind(l_4); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_2, l_4; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ address nooverlap_target; ++ ++ if (is_oop) { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_oop_disjoint_arraycopy() : ++ StubRoutines::oop_disjoint_arraycopy(); ++ } else { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_jlong_disjoint_arraycopy() : ++ StubRoutines::jlong_disjoint_arraycopy(); ++ } ++ ++ array_overlap_test(nooverlap_target, 3); ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T3, AT); ++ __ daddiu(T3, AT, -8); ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T0, AT); ++ __ daddiu(T0, AT, -8); ++ ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_2); ++ __ ld(AT, T3, 0); ++ __ sd(AT, T0, 0); ++ __ addiu(T3, T3, -8); ++ __ addiu(T0, T0, -8); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_2); ++ __ delayed()->nop(); ++ ++ // exit ++ __ bind(l_4); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ //FIXME ++ address generate_disjoint_long_copy(bool aligned, const char *name) { ++ Label l_1, l_2; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ b(l_2); ++ __ delayed()->nop(); ++ __ align(16); ++ __ bind(l_1); ++ { ++ UnsafeCopyMemoryMark ucmm(this, true, true); ++ __ ld(AT, T3, 0); ++ __ sd (AT, T0, 0); ++ __ addiu(T3, T3, 8); ++ __ addiu(T0, T0, 8); ++ __ bind(l_2); ++ __ addiu(T1, T1, -1); ++ __ bgez(T1, l_1); ++ __ delayed()->nop(); ++ } ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ ++ address generate_conjoint_long_copy(bool aligned, const char *name) { ++ Label l_1, l_2; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ address nooverlap_target = aligned ? ++ StubRoutines::arrayof_jlong_disjoint_arraycopy() : ++ StubRoutines::jlong_disjoint_arraycopy(); ++ array_overlap_test(nooverlap_target, 3); ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T3, AT); ++ __ daddiu(T3, AT, -8); ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T0, AT); ++ __ daddiu(T0, AT, -8); ++ ++ __ b(l_2); ++ __ delayed()->nop(); ++ __ align(16); ++ __ bind(l_1); ++ { ++ UnsafeCopyMemoryMark ucmm(this, true, true); ++ __ ld(AT, T3, 0); ++ __ sd (AT, T0, 0); ++ __ addiu(T3, T3, -8); ++ __ addiu(T0, T0,-8); ++ __ bind(l_2); ++ __ addiu(T1, T1, -1); ++ __ bgez(T1, l_1); ++ __ delayed()->nop(); ++ } ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ void generate_arraycopy_stubs() { ++ if (UseCompressedOops) { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, ++ "oop_disjoint_arraycopy"); ++ StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, ++ "oop_arraycopy"); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, ++ "oop_disjoint_arraycopy_uninit", true); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, ++ "oop_arraycopy_uninit", true); ++ } else { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, ++ "oop_disjoint_arraycopy"); ++ StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, ++ "oop_arraycopy"); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, ++ "oop_disjoint_arraycopy_uninit", true); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, ++ "oop_arraycopy_uninit", true); ++ } ++ ++ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy"); ++ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); ++ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy"); ++ StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy"); ++ ++ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy"); ++ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); ++ StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy"); ++ StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy"); ++ ++ // We don't generate specialized code for HeapWord-aligned source ++ // arrays, so just use the code we've already generated ++ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy; ++ StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy; ++ ++ StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy; ++ StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy; ++ ++ StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; ++ StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; ++ ++ StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; ++ StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; ++ StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; ++ StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; ++ ++ StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); ++ StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); ++ StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); ++ StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); ++ StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); ++ StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); ++ } ++ ++ ++#undef __ ++#define __ masm-> ++ ++ // Continuation point for throwing of implicit exceptions that are ++ // not handled in the current activation. Fabricates an exception ++ // oop and initiates normal exception dispatching in this ++ // frame. Since we need to preserve callee-saved values (currently ++ // only for C2, but done for C1 as well) we need a callee-saved oop ++ // map and therefore have to make these stubs into RuntimeStubs ++ // rather than BufferBlobs. If the compiler needs all registers to ++ // be preserved between the fault point and the exception handler ++ // then it must assume responsibility for that in ++ // AbstractCompiler::continuation_for_implicit_null_exception or ++ // continuation_for_implicit_division_by_zero_exception. All other ++ // implicit exceptions (e.g., NullPointerException or ++ // AbstractMethodError on entry) are either at call sites or ++ // otherwise assume that stack unwinding will be initiated, so ++ // caller saved registers were assumed volatile in the compiler. ++ address generate_throw_exception(const char* name, ++ address runtime_entry, ++ bool restore_saved_exception_pc) { ++ // Information about frame layout at time of blocking runtime call. ++ // Note that we only have to preserve callee-saved registers since ++ // the compilers are responsible for supplying a continuation point ++ // if they expect all registers to be preserved. ++ enum layout { ++ thread_off, // last_java_sp ++ S7_off, // callee saved register sp + 1 ++ S6_off, // callee saved register sp + 2 ++ S5_off, // callee saved register sp + 3 ++ S4_off, // callee saved register sp + 4 ++ S3_off, // callee saved register sp + 5 ++ S2_off, // callee saved register sp + 6 ++ S1_off, // callee saved register sp + 7 ++ S0_off, // callee saved register sp + 8 ++ FP_off, ++ ret_address, ++ framesize ++ }; ++ ++ int insts_size = 2048; ++ int locs_size = 32; ++ ++ // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false, ++ // NULL, NULL, NULL, false, NULL, name, false); ++ CodeBuffer code (name , insts_size, locs_size); ++ OopMapSet* oop_maps = new OopMapSet(); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ ++ address start = __ pc(); ++ ++ // This is an inlined and slightly modified version of call_VM ++ // which has the ability to fetch the return PC out of ++ // thread-local storage and also sets up last_Java_sp slightly ++ // differently than the real call_VM ++#ifndef OPT_THREAD ++ Register java_thread = TREG; ++ __ get_thread(java_thread); ++#else ++ Register java_thread = TREG; ++#endif ++ if (restore_saved_exception_pc) { ++ __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); ++ } ++ ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ addiu(SP, SP, (-1) * (framesize-2) * wordSize); // prolog ++ __ sd(S0, SP, S0_off * wordSize); ++ __ sd(S1, SP, S1_off * wordSize); ++ __ sd(S2, SP, S2_off * wordSize); ++ __ sd(S3, SP, S3_off * wordSize); ++ __ sd(S4, SP, S4_off * wordSize); ++ __ sd(S5, SP, S5_off * wordSize); ++ __ sd(S6, SP, S6_off * wordSize); ++ __ sd(S7, SP, S7_off * wordSize); ++ ++ int frame_complete = __ pc() - start; ++ // push java thread (becomes first argument of C function) ++ __ sd(java_thread, SP, thread_off * wordSize); ++ if (java_thread != A0) ++ __ move(A0, java_thread); ++ ++ // Set up last_Java_sp and last_Java_fp ++ __ set_last_Java_frame(java_thread, SP, FP, NULL); ++ // Align stack ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ dins(SP, R0, 0, 4); ++ ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ ++ // Call runtime ++ __ call(runtime_entry); ++ __ delayed()->nop(); ++ // Generate oop map ++ OopMap* map = new OopMap(framesize, 0); ++ oop_maps->add_gc_map(__ offset(), map); ++ ++ // restore the thread (cannot use the pushed argument since arguments ++ // may be overwritten by C code generated by an optimizing compiler); ++ // however can use the register value directly if it is callee saved. ++#ifndef OPT_THREAD ++ __ get_thread(java_thread); ++#endif ++ ++ __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ reset_last_Java_frame(java_thread, true); ++ ++ // Restore callee save registers. This must be done after resetting the Java frame ++ __ ld(S0, SP, S0_off * wordSize); ++ __ ld(S1, SP, S1_off * wordSize); ++ __ ld(S2, SP, S2_off * wordSize); ++ __ ld(S3, SP, S3_off * wordSize); ++ __ ld(S4, SP, S4_off * wordSize); ++ __ ld(S5, SP, S5_off * wordSize); ++ __ ld(S6, SP, S6_off * wordSize); ++ __ ld(S7, SP, S7_off * wordSize); ++ ++ // discard arguments ++ __ move(SP, FP); // epilog ++ __ pop(FP); ++ ++ // check for pending exceptions ++#ifdef ASSERT ++ Label L; ++ __ ld(AT, java_thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ should_not_reach_here(); ++ __ bind(L); ++#endif //ASSERT ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, ++ &code, ++ frame_complete, ++ framesize, ++ oop_maps, false); ++ return stub->entry_point(); ++ } ++ ++ class MontgomeryMultiplyGenerator : public MacroAssembler { ++ ++ Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Rlen2, Ra, Rb, Rm, ++ Rn, Iam, Ibn, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj; ++ ++ bool _squaring; ++ ++ public: ++ MontgomeryMultiplyGenerator (Assembler *as, bool squaring) ++ : MacroAssembler(as->code()), _squaring(squaring) { ++ ++ // Register allocation ++ ++ Register reg = A0; ++ Pa_base = reg; // Argument registers: ++ if (squaring) ++ Pb_base = Pa_base; ++ else ++ Pb_base = ++reg; ++ Pn_base = ++reg; ++ Rlen = ++reg; ++ inv = ++reg; ++ Rlen2 = inv; // Reuse inv ++ Pm_base = ++reg; ++ ++ // Working registers: ++ Ra = ++reg; // The current digit of a, b, n, and m. ++ Rb = ++reg; ++ Rm = ++reg; ++ Rn = ++reg; ++ ++ Iam = ++reg; // Index to the current/next digit of a, b, n, and m. ++ Ibn = ++reg; ++ ++ if (squaring) { ++ t0 = ++reg; // Three registers which form a ++ t1 = AT; // triple-precision accumuator. ++ t2 = V0; ++ ++ Ri = V1; // Inner and outer loop indexes. ++ Rj = T8; ++ ++ Rhi_ab = T9; // Product registers: low and high parts ++ Rlo_ab = S0; // of a*b and m*n. ++ ++ Rhi_mn = S1; ++ Rlo_mn = S2; ++ } else { ++ t0 = AT; // Three registers which form a ++ t1 = V0; // triple-precision accumuator. ++ t2 = V1; ++ ++ Ri = T8; // Inner and outer loop indexes. ++ Rj = T9; ++ ++ Rhi_ab = S0; // Product registers: low and high parts ++ Rlo_ab = S1; // of a*b and m*n. ++ ++ Rhi_mn = S2; ++ Rlo_mn = S3; ++ } ++ } ++ ++ private: ++ void enter() { ++ addiu(SP, SP, -6 * wordSize); ++ sd(FP, SP, 0 * wordSize); ++ move(FP, SP); ++ } ++ ++ void leave() { ++ addiu(T0, FP, 6 * wordSize); ++ ld(FP, FP, 0 * wordSize); ++ move(SP, T0); ++ } ++ ++ void save_regs() { ++ if (!_squaring) ++ sd(Rhi_ab, FP, 5 * wordSize); ++ sd(Rlo_ab, FP, 4 * wordSize); ++ sd(Rhi_mn, FP, 3 * wordSize); ++ sd(Rlo_mn, FP, 2 * wordSize); ++ sd(Pm_base, FP, 1 * wordSize); ++ } ++ ++ void restore_regs() { ++ if (!_squaring) ++ ld(Rhi_ab, FP, 5 * wordSize); ++ ld(Rlo_ab, FP, 4 * wordSize); ++ ld(Rhi_mn, FP, 3 * wordSize); ++ ld(Rlo_mn, FP, 2 * wordSize); ++ ld(Pm_base, FP, 1 * wordSize); ++ } ++ ++ template ++ void unroll_2(Register count, T block, Register tmp) { ++ Label loop, end, odd; ++ andi(tmp, count, 1); ++ bne(tmp, R0, odd); ++ delayed()->nop(); ++ beq(count, R0, end); ++ delayed()->nop(); ++ align(16); ++ bind(loop); ++ (this->*block)(); ++ bind(odd); ++ (this->*block)(); ++ addiu32(count, count, -2); ++ bgtz(count, loop); ++ delayed()->nop(); ++ bind(end); ++ } ++ ++ template ++ void unroll_2(Register count, T block, Register d, Register s, Register tmp) { ++ Label loop, end, odd; ++ andi(tmp, count, 1); ++ bne(tmp, R0, odd); ++ delayed()->nop(); ++ beq(count, R0, end); ++ delayed()->nop(); ++ align(16); ++ bind(loop); ++ (this->*block)(d, s, tmp); ++ bind(odd); ++ (this->*block)(d, s, tmp); ++ addiu32(count, count, -2); ++ bgtz(count, loop); ++ delayed()->nop(); ++ bind(end); ++ } ++ ++ void acc(Register Rhi, Register Rlo, ++ Register t0, Register t1, Register t2, Register t, Register c) { ++ daddu(t0, t0, Rlo); ++ orr(t, t1, Rhi); ++ sltu(c, t0, Rlo); ++ daddu(t1, t1, Rhi); ++ daddu(t1, t1, c); ++ sltu(c, t1, t); ++ daddu(t2, t2, c); ++ } ++ ++ void pre1(Register i) { ++ block_comment("pre1"); ++ // Iam = 0; ++ // Ibn = i; ++ ++ sll(Ibn, i, LogBytesPerWord); ++ ++ // Ra = Pa_base[Iam]; ++ // Rb = Pb_base[Ibn]; ++ // Rm = Pm_base[Iam]; ++ // Rn = Pn_base[Ibn]; ++ ++ ld(Ra, Pa_base, 0); ++ gsldx(Rb, Pb_base, Ibn, 0); ++ ld(Rm, Pm_base, 0); ++ gsldx(Rn, Pn_base, Ibn, 0); ++ ++ move(Iam, R0); ++ ++ // Zero the m*n result. ++ move(Rhi_mn, R0); ++ move(Rlo_mn, R0); ++ } ++ ++ // The core multiply-accumulate step of a Montgomery ++ // multiplication. The idea is to schedule operations as a ++ // pipeline so that instructions with long latencies (loads and ++ // multiplies) have time to complete before their results are ++ // used. This most benefits in-order implementations of the ++ // architecture but out-of-order ones also benefit. ++ void step() { ++ block_comment("step"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ addiu32(Iam, Iam, wordSize); ++ addiu32(Ibn, Ibn, -wordSize); ++ dmultu(Ra, Rb); ++ acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n from the ++ // previous iteration. ++ gsldx(Ra, Pa_base, Iam, 0); ++ mflo(Rlo_ab); ++ mfhi(Rhi_ab); ++ gsldx(Rb, Pb_base, Ibn, 0); ++ ++ // MACC(Rm, Rn, t0, t1, t2); ++ // Rm = Pm_base[Iam]; ++ // Rn = Pn_base[Ibn]; ++ dmultu(Rm, Rn); ++ acc(Rhi_ab, Rlo_ab, t0, t1, t2, Rm, Rn); ++ gsldx(Rm, Pm_base, Iam, 0); ++ mflo(Rlo_mn); ++ mfhi(Rhi_mn); ++ gsldx(Rn, Pn_base, Ibn, 0); ++ } ++ ++ void post1() { ++ block_comment("post1"); ++ ++ // MACC(Ra, Rb, t0, t1, t2); ++ dmultu(Ra, Rb); ++ acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n ++ mflo(Rlo_ab); ++ mfhi(Rhi_ab); ++ acc(Rhi_ab, Rlo_ab, t0, t1, t2, Ra, Rb); ++ ++ // Pm_base[Iam] = Rm = t0 * inv; ++ gsdmultu(Rm, t0, inv); ++ gssdx(Rm, Pm_base, Iam, 0); ++ ++ // MACC(Rm, Rn, t0, t1, t2); ++ // t0 = t1; t1 = t2; t2 = 0; ++ dmultu(Rm, Rn); ++ mfhi(Rhi_mn); ++ ++#ifndef PRODUCT ++ // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply"); ++ { ++ mflo(Rlo_mn); ++ daddu(Rlo_mn, t0, Rlo_mn); ++ Label ok; ++ beq(Rlo_mn, R0, ok); ++ delayed()->nop(); { ++ stop("broken Montgomery multiply"); ++ } bind(ok); ++ } ++#endif ++ ++ // We have very carefully set things up so that ++ // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate ++ // the lower half of Rm * Rn because we know the result already: ++ // it must be -t0. t0 + (-t0) must generate a carry iff ++ // t0 != 0. So, rather than do a mul and an adds we just set ++ // the carry flag iff t0 is nonzero. ++ // ++ // mflo(Rlo_mn); ++ // addu(t0, t0, Rlo_mn); ++ orr(Ra, t1, Rhi_mn); ++ sltu(Rb, R0, t0); ++ daddu(t0, t1, Rhi_mn); ++ daddu(t0, t0, Rb); ++ sltu(Rb, t0, Ra); ++ daddu(t1, t2, Rb); ++ move(t2, R0); ++ } ++ ++ void pre2(Register i, Register len) { ++ block_comment("pre2"); ++ ++ // Rj == i-len ++ subu32(Rj, i, len); ++ ++ // Iam = i - len; ++ // Ibn = len; ++ sll(Iam, Rj, LogBytesPerWord); ++ sll(Ibn, len, LogBytesPerWord); ++ ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ gsldx(Ra, Pa_base, Iam, wordSize); ++ gsldx(Rb, Pb_base, Ibn, -wordSize); ++ gsldx(Rm, Pm_base, Iam, wordSize); ++ gsldx(Rn, Pn_base, Ibn, -wordSize); ++ ++ addiu32(Iam, Iam, wordSize); ++ addiu32(Ibn, Ibn, -wordSize); ++ ++ move(Rhi_mn, R0); ++ move(Rlo_mn, R0); ++ } ++ ++ void post2(Register i, Register len) { ++ block_comment("post2"); ++ ++ subu32(Rj, i, len); ++ sll(Iam, Rj, LogBytesPerWord); ++ ++ daddu(t0, t0, Rlo_mn); // The pending m*n, low part ++ ++ // As soon as we know the least significant digit of our result, ++ // store it. ++ // Pm_base[i-len] = t0; ++ gssdx(t0, Pm_base, Iam, 0); ++ ++ // t0 = t1; t1 = t2; t2 = 0; ++ orr(Ra, t1, Rhi_mn); ++ sltu(Rb, t0, Rlo_mn); ++ daddu(t0, t1, Rhi_mn); // The pending m*n, high part ++ daddu(t0, t0, Rb); ++ sltu(Rb, t0, Ra); ++ daddu(t1, t2, Rb); ++ move(t2, R0); ++ } ++ ++ // A carry in t0 after Montgomery multiplication means that we ++ // should subtract multiples of n from our result in m. We'll ++ // keep doing that until there is no carry. ++ void normalize(Register len) { ++ block_comment("normalize"); ++ // while (t0) ++ // t0 = sub(Pm_base, Pn_base, t0, len); ++ Label loop, post, again; ++ Register cnt = t1, i = t2, b = Ra, t = Rb; // Re-use registers; we're done with them now ++ beq(t0, R0, post); ++ delayed()->nop(); { ++ bind(again); { ++ move(i, R0); ++ move(b, R0); ++ sll(cnt, len, LogBytesPerWord); ++ align(16); ++ bind(loop); { ++ gsldx(Rm, Pm_base, i, 0); ++ gsldx(Rn, Pn_base, i, 0); ++ sltu(t, Rm, b); ++ dsubu(Rm, Rm, b); ++ sltu(b, Rm, Rn); ++ dsubu(Rm, Rm, Rn); ++ orr(b, b, t); ++ gssdx(Rm, Pm_base, i, 0); ++ addiu32(i, i, BytesPerWord); ++ } sltu(Rm, i, cnt); ++ bne(Rm, R0, loop); ++ delayed()->nop(); ++ subu(t0, t0, b); ++ } bne(t0, R0, again); ++ delayed()->nop(); ++ } bind(post); ++ } ++ ++ // Move memory at s to d, reversing words. ++ // Increments d to end of copied memory ++ // Destroys tmp1, tmp2, tmp3 ++ // Preserves len ++ // Leaves s pointing to the address which was in d at start ++ void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) { ++ assert(tmp1 < S0 && tmp2 < S0, "register corruption"); ++ ++ sll(tmp1, len, LogBytesPerWord); ++ addu(s, s, tmp1); ++ move(tmp1, len); ++ unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2); ++ sll(s, len, LogBytesPerWord); ++ subu(s, d, s); ++ } ++ ++ // where ++ void reverse1(Register d, Register s, Register tmp) { ++ ld(tmp, s, -wordSize); ++ addiu(s, s, -wordSize); ++ addiu(d, d, wordSize); ++ drotr32(tmp, tmp, 32 - 32); ++ sd(tmp, d, -wordSize); ++ } ++ ++ public: ++ /** ++ * Fast Montgomery multiplication. The derivation of the ++ * algorithm is in A Cryptographic Library for the Motorola ++ * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. ++ * ++ * Arguments: ++ * ++ * Inputs for multiplication: ++ * A0 - int array elements a ++ * A1 - int array elements b ++ * A2 - int array elements n (the modulus) ++ * A3 - int length ++ * A4 - int inv ++ * A5 - int array elements m (the result) ++ * ++ * Inputs for squaring: ++ * A0 - int array elements a ++ * A1 - int array elements n (the modulus) ++ * A2 - int length ++ * A3 - int inv ++ * A4 - int array elements m (the result) ++ * ++ */ ++ address generate_multiply() { ++ Label argh, nothing; ++ bind(argh); ++ stop("MontgomeryMultiply total_allocation must be <= 8192"); ++ ++ align(CodeEntryAlignment); ++ address entry = pc(); ++ ++ beq(Rlen, R0, nothing); ++ delayed()->nop(); ++ ++ enter(); ++ ++ // Make room. ++ sltiu(Ra, Rlen, 513); ++ beq(Ra, R0, argh); ++ delayed()->sll(Ra, Rlen, exact_log2(4 * sizeof (jint))); ++ subu(Ra, SP, Ra); ++ ++ srl(Rlen, Rlen, 1); // length in longwords = len/2 ++ ++ { ++ // Copy input args, reversing as we go. We use Ra as a ++ // temporary variable. ++ reverse(Ra, Pa_base, Rlen, t0, t1); ++ if (!_squaring) ++ reverse(Ra, Pb_base, Rlen, t0, t1); ++ reverse(Ra, Pn_base, Rlen, t0, t1); ++ } ++ ++ // Push all call-saved registers and also Pm_base which we'll need ++ // at the end. ++ save_regs(); ++ ++#ifndef PRODUCT ++ // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); ++ { ++ ld(Rn, Pn_base, 0); ++ li(t0, -1); ++ gsdmultu(Rlo_mn, Rn, inv); ++ Label ok; ++ beq(Rlo_mn, t0, ok); ++ delayed()->nop(); { ++ stop("broken inverse in Montgomery multiply"); ++ } bind(ok); ++ } ++#endif ++ ++ move(Pm_base, Ra); ++ ++ move(t0, R0); ++ move(t1, R0); ++ move(t2, R0); ++ ++ block_comment("for (int i = 0; i < len; i++) {"); ++ move(Ri, R0); { ++ Label loop, end; ++ slt(Ra, Ri, Rlen); ++ beq(Ra, R0, end); ++ delayed()->nop(); ++ ++ bind(loop); ++ pre1(Ri); ++ ++ block_comment(" for (j = i; j; j--) {"); { ++ move(Rj, Ri); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab); ++ } block_comment(" } // j"); ++ ++ post1(); ++ addiu32(Ri, Ri, 1); ++ slt(Ra, Ri, Rlen); ++ bne(Ra, R0, loop); ++ delayed()->nop(); ++ bind(end); ++ block_comment("} // i"); ++ } ++ ++ block_comment("for (int i = len; i < 2*len; i++) {"); ++ move(Ri, Rlen); ++ sll(Rlen2, Rlen, 1); { ++ Label loop, end; ++ slt(Ra, Ri, Rlen2); ++ beq(Ra, R0, end); ++ delayed()->nop(); ++ ++ bind(loop); ++ pre2(Ri, Rlen); ++ ++ block_comment(" for (j = len*2-i-1; j; j--) {"); { ++ subu32(Rj, Rlen2, Ri); ++ addiu32(Rj, Rj, -1); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab); ++ } block_comment(" } // j"); ++ ++ post2(Ri, Rlen); ++ addiu32(Ri, Ri, 1); ++ slt(Ra, Ri, Rlen2); ++ bne(Ra, R0, loop); ++ delayed()->nop(); ++ bind(end); ++ } ++ block_comment("} // i"); ++ ++ normalize(Rlen); ++ ++ move(Ra, Pm_base); // Save Pm_base in Ra ++ restore_regs(); // Restore caller's Pm_base ++ ++ // Copy our result into caller's Pm_base ++ reverse(Pm_base, Ra, Rlen, t0, t1); ++ ++ leave(); ++ bind(nothing); ++ jr(RA); ++ ++ return entry; ++ } ++ // In C, approximately: ++ ++ // void ++ // montgomery_multiply(unsigned long Pa_base[], unsigned long Pb_base[], ++ // unsigned long Pn_base[], unsigned long Pm_base[], ++ // unsigned long inv, int len) { ++ // unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator ++ // unsigned long Ra, Rb, Rn, Rm; ++ // int i, Iam, Ibn; ++ ++ // assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply"); ++ ++ // for (i = 0; i < len; i++) { ++ // int j; ++ ++ // Iam = 0; ++ // Ibn = i; ++ ++ // Ra = Pa_base[Iam]; ++ // Rb = Pb_base[Iam]; ++ // Rm = Pm_base[Ibn]; ++ // Rn = Pn_base[Ibn]; ++ ++ // int iters = i; ++ // for (j = 0; iters--; j++) { ++ // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Ra = Pa_base[++Iam]; ++ // Rb = pb_base[--Ibn]; ++ // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); ++ // MACC(Rm, Rn, t0, t1, t2); ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ // } ++ ++ // assert(Ra == Pa_base[i] && Rb == Pb_base[0], "must be"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Pm_base[Iam] = Rm = t0 * inv; ++ // assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be"); ++ // MACC(Rm, Rn, t0, t1, t2); ++ ++ // assert(t0 == 0, "broken Montgomery multiply"); ++ ++ // t0 = t1; t1 = t2; t2 = 0; ++ // } ++ ++ // for (i = len; i < 2*len; i++) { ++ // int j; ++ ++ // Iam = i - len; ++ // Ibn = len; ++ ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ ++ // int iters = len*2-i-1; ++ // for (j = i-len+1; iters--; j++) { ++ // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); ++ // MACC(Rm, Rn, t0, t1, t2); ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ // } ++ ++ // Pm_base[i-len] = t0; ++ // t0 = t1; t1 = t2; t2 = 0; ++ // } ++ ++ // while (t0) ++ // t0 = sub(Pm_base, Pn_base, t0, len); ++ // } ++ }; ++ ++ // Initialization ++ void generate_initial() { ++ // Generates all stubs and initializes the entry points ++ ++ //------------------------------------------------------------- ++ //----------------------------------------------------------- ++ // entry points that exist in all platforms ++ // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller ++ // than the disadvantage of having a much more complicated generator structure. ++ // See also comment in stubRoutines.hpp. ++ StubRoutines::_forward_exception_entry = generate_forward_exception(); ++ StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); ++ // is referenced by megamorphic call ++ StubRoutines::_catch_exception_entry = generate_catch_exception(); ++ ++ StubRoutines::_throw_StackOverflowError_entry = ++ generate_throw_exception("StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), ++ false); ++ StubRoutines::_throw_delayed_StackOverflowError_entry = ++ generate_throw_exception("delayed StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError), ++ false); ++ } ++ ++ void generate_all() { ++ // Generates all stubs and initializes the entry points ++ ++ // These entry points require SharedInfo::stack0 to be set up in ++ // non-core builds and need to be relocatable, so they each ++ // fabricate a RuntimeStub internally. ++ StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); ++ ++ StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false); ++ ++ StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); ++ ++ // entry points that are platform specific ++ ++ // support for verify_oop (must happen after universe_init) ++ StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); ++#ifndef CORE ++ // arraycopy stubs used by compilers ++ generate_arraycopy_stubs(); ++#endif ++ ++#ifdef COMPILER2 ++ if (UseMontgomeryMultiplyIntrinsic) { ++ if (UseLEXT1) { ++ StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); ++ MontgomeryMultiplyGenerator g(_masm, false /* squaring */); ++ StubRoutines::_montgomeryMultiply = g.generate_multiply(); ++ } else { ++ StubRoutines::_montgomeryMultiply ++ = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); ++ } ++ } ++ if (UseMontgomerySquareIntrinsic) { ++ if (UseLEXT1) { ++ StubCodeMark mark(this, "StubRoutines", "montgomerySquare"); ++ MontgomeryMultiplyGenerator g(_masm, true /* squaring */); ++ // We use generate_multiply() rather than generate_square() ++ // because it's faster for the sizes of modulus we care about. ++ StubRoutines::_montgomerySquare = g.generate_multiply(); ++ } else { ++ StubRoutines::_montgomerySquare ++ = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square); ++ } ++ } ++#endif ++ } ++ ++ public: ++ StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { ++ if (all) { ++ generate_all(); ++ } else { ++ generate_initial(); ++ } ++ } ++}; // end class declaration ++ ++#define UCM_TABLE_MAX_ENTRIES 2 ++void StubGenerator_generate(CodeBuffer* code, bool all) { ++ if (UnsafeCopyMemory::_table == NULL) { ++ UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); ++ } ++ StubGenerator g(code, all); ++} +diff --git a/src/hotspot/cpu/mips/stubRoutines_mips.hpp b/src/hotspot/cpu/mips/stubRoutines_mips.hpp +new file mode 100644 +index 00000000000..920c08844e1 +--- /dev/null ++++ b/src/hotspot/cpu/mips/stubRoutines_mips.hpp +@@ -0,0 +1,59 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP ++#define CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP ++ ++// This file holds the platform specific parts of the StubRoutines ++// definition. See stubRoutines.hpp for a description on how to ++// extend it. ++ ++static bool returns_to_call_stub(address return_pc){ ++ return return_pc == _call_stub_return_address||return_pc == gs2::get_call_stub_compiled_return(); ++} ++ ++enum platform_dependent_constants { ++ code_size1 = 20000, // simply increase if too small (assembler will crash if too small) ++ code_size2 = 40000 // simply increase if too small (assembler will crash if too small) ++}; ++ ++class gs2 { ++ friend class StubGenerator; ++ friend class VMStructs; ++ private: ++ // If we call compiled code directly from the call stub we will ++ // need to adjust the return back to the call stub to a specialized ++ // piece of code that can handle compiled results and cleaning the fpu ++ // stack. The variable holds that location. ++ static address _call_stub_compiled_return; ++ ++public: ++ // Call back points for traps in compiled code ++ static address get_call_stub_compiled_return() { return _call_stub_compiled_return; } ++ static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; } ++ ++}; ++ ++#endif // CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP +diff --git a/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp b/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp +new file mode 100644 +index 00000000000..358d580d527 +--- /dev/null ++++ b/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++ ++// a description of how to extend it, see the stubRoutines.hpp file. ++ ++//find the last fp value ++address StubRoutines::gs2::_call_stub_compiled_return = NULL; +diff --git a/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp b/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp +new file mode 100644 +index 00000000000..b723add6dfc +--- /dev/null ++++ b/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp +@@ -0,0 +1,2094 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/bytecodeHistogram.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/templateInterpreterGenerator.hpp" ++#include "interpreter/templateTable.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/jniHandles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "runtime/timer.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/debug.hpp" ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++int TemplateInterpreter::InterpreterCodeSize = 500 * K; ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif ++ ++address TemplateInterpreterGenerator::generate_slow_signature_handler() { ++ address entry = __ pc(); ++ ++ // Rmethod: method ++ // LVP: pointer to locals ++ // A3: first stack arg ++ __ move(A3, SP); ++ __ daddiu(SP, SP, -10 * wordSize); ++ __ sd(RA, SP, 0); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::slow_signature_handler), ++ Rmethod, LVP, A3); ++ ++ // V0: result handler ++ ++ // Stack layout: ++ // ... ++ // 10 stack arg0 <--- old sp ++ // 9 float/double identifiers ++ // 8 register arg7 ++ // ... ++ // 2 register arg1 ++ // 1 aligned slot ++ // SP: 0 return address ++ ++ // Do FP first so we can use T3 as temp ++ __ ld(T3, Address(SP, 9 * wordSize)); // float/double identifiers ++ ++ // A0 is for env. ++ // If the mothed is not static, A1 will be corrected in generate_native_entry. ++ for ( int i = 1; i < Argument::n_register_parameters; i++ ) { ++ Register reg = as_Register(i + A0->encoding()); ++ FloatRegister floatreg = as_FloatRegister(i + F12->encoding()); ++ Label isfloatordouble, isdouble, next; ++ ++ __ andi(AT, T3, 1 << (i*2)); // Float or Double? ++ __ bne(AT, R0, isfloatordouble); ++ __ delayed()->nop(); ++ ++ // Do Int register here ++ __ ld(reg, SP, (1 + i) * wordSize); ++ __ b (next); ++ __ delayed()->nop(); ++ ++ __ bind(isfloatordouble); ++ __ andi(AT, T3, 1 << ((i*2)+1)); // Double? ++ __ bne(AT, R0, isdouble); ++ __ delayed()->nop(); ++ ++ // Do Float Here ++ __ lwc1(floatreg, SP, (1 + i) * wordSize); ++ __ b(next); ++ __ delayed()->nop(); ++ ++ // Do Double here ++ __ bind(isdouble); ++ __ ldc1(floatreg, SP, (1 + i) * wordSize); ++ ++ __ bind(next); ++ } ++ ++ __ ld(RA, SP, 0); ++ __ daddiu(SP, SP, 10 * wordSize); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return entry; ++} ++ ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.update(int crc, int b) ++ */ ++address TemplateInterpreterGenerator::generate_CRC32_update_entry() { ++ if (UseCRC32Intrinsics) { ++ address entry = __ pc(); ++ Unimplemented(); ++ return entry; ++ } ++ return NULL; ++} ++ ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) ++ * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) ++ */ ++address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { ++ if (UseCRC32Intrinsics) { ++ address entry = __ pc(); ++ Unimplemented(); ++ return entry; ++ } ++ return NULL; ++} ++ ++/** ++* Method entry for static (non-native) methods: ++* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) ++* int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end) ++*/ ++address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { ++ if (UseCRC32CIntrinsics) { ++ address entry = __ pc(); ++ Unimplemented(); ++ return entry; ++ } ++ return NULL; ++} ++ ++// ++// Various method entries ++// ++ ++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { ++ if (!InlineIntrinsics) return NULL; // Generate a vanilla entry ++ ++ // These don't need a safepoint check because they aren't virtually ++ // callable. We won't enter these intrinsics from compiled code. ++ // If in the future we added an intrinsic which was virtually callable ++ // we'd have to worry about how to safepoint so that this code is used. ++ ++ // mathematical functions inlined by compiler ++ // (interpreter must provide identical implementation ++ // in order to avoid monotonicity bugs when switching ++ // from interpreter to compiler in the middle of some ++ // computation) ++ // ++ // stack: ++ // [ arg ] <-- sp ++ // [ arg ] ++ // retaddr in ra ++ ++ address entry_point = NULL; ++ switch (kind) { ++ case Interpreter::java_lang_math_abs: ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 0); ++ __ abs_d(F0, F12); ++ __ move(SP, Rsender); ++ break; ++ case Interpreter::java_lang_math_sqrt: ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 0); ++ __ sqrt_d(F0, F12); ++ __ move(SP, Rsender); ++ break; ++ case Interpreter::java_lang_math_sin : ++ case Interpreter::java_lang_math_cos : ++ case Interpreter::java_lang_math_tan : ++ case Interpreter::java_lang_math_log : ++ case Interpreter::java_lang_math_log10 : ++ case Interpreter::java_lang_math_exp : ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 0); ++ __ move(SP, Rsender); ++ __ dmtc1(RA, F24); ++ __ dmtc1(SP, F25); ++ __ dins(SP, R0, 0, exact_log2(StackAlignmentInBytes)); ++ generate_transcendental_entry(kind, 1); ++ __ dmfc1(SP, F25); ++ __ dmfc1(RA, F24); ++ break; ++ case Interpreter::java_lang_math_pow : ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 2 * Interpreter::stackElementSize); ++ __ ldc1(F13, SP, 0); ++ __ move(SP, Rsender); ++ __ dmtc1(RA, F24); ++ __ dmtc1(SP, F25); ++ __ dins(SP, R0, 0, exact_log2(StackAlignmentInBytes)); ++ generate_transcendental_entry(kind, 2); ++ __ dmfc1(SP, F25); ++ __ dmfc1(RA, F24); ++ break; ++ case Interpreter::java_lang_math_fmaD : ++ if (UseFMA) { ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 4 * Interpreter::stackElementSize); ++ __ ldc1(F13, SP, 2 * Interpreter::stackElementSize); ++ __ ldc1(F14, SP, 0); ++ __ madd_d(F0, F14, F13, F12); ++ __ move(SP, Rsender); ++ } ++ break; ++ case Interpreter::java_lang_math_fmaF : ++ if (UseFMA) { ++ entry_point = __ pc(); ++ __ lwc1(F12, SP, 2 * Interpreter::stackElementSize); ++ __ lwc1(F13, SP, Interpreter::stackElementSize); ++ __ lwc1(F14, SP, 0); ++ __ madd_s(F0, F14, F13, F12); ++ __ move(SP, Rsender); ++ } ++ break; ++ default: ++ ; ++ } ++ if (entry_point) { ++ __ jr(RA); ++ __ delayed()->nop(); ++ } ++ ++ return entry_point; ++} ++ ++ // double trigonometrics and transcendentals ++ // static jdouble dsin(jdouble x); ++ // static jdouble dcos(jdouble x); ++ // static jdouble dtan(jdouble x); ++ // static jdouble dlog(jdouble x); ++ // static jdouble dlog10(jdouble x); ++ // static jdouble dexp(jdouble x); ++ // static jdouble dpow(jdouble x, jdouble y); ++ ++void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs) { ++ address fn; ++ switch (kind) { ++ case Interpreter::java_lang_math_sin : ++ if (StubRoutines::dsin() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin()); ++ } ++ break; ++ case Interpreter::java_lang_math_cos : ++ if (StubRoutines::dcos() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos()); ++ } ++ break; ++ case Interpreter::java_lang_math_tan : ++ if (StubRoutines::dtan() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan()); ++ } ++ break; ++ case Interpreter::java_lang_math_log : ++ if (StubRoutines::dlog() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog()); ++ } ++ break; ++ case Interpreter::java_lang_math_log10 : ++ if (StubRoutines::dlog10() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10()); ++ } ++ break; ++ case Interpreter::java_lang_math_exp : ++ if (StubRoutines::dexp() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp()); ++ } ++ break; ++ case Interpreter::java_lang_math_pow : ++ if (StubRoutines::dpow() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow()); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ fn = NULL; // unreachable ++ } ++ __ li(T9, fn); ++ __ jalr(T9); ++ __ delayed()->nop(); ++} ++ ++// Abstract method entry ++// Attempt to execute abstract method. Throw exception ++address TemplateInterpreterGenerator::generate_abstract_entry(void) { ++ ++ // Rmethod: Method* ++ // V0: receiver (unused) ++ // Rsender : sender 's sp ++ address entry_point = __ pc(); ++ ++ // abstract method entry ++ // throw exception ++ // adjust stack to what a normal return would do ++ __ empty_expression_stack(); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorWithMethod), Rmethod); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ return entry_point; ++} ++ ++ ++const int method_offset = frame::interpreter_frame_method_offset * wordSize; ++const int bci_offset = frame::interpreter_frame_bcp_offset * wordSize; ++const int locals_offset = frame::interpreter_frame_locals_offset * wordSize; ++ ++//----------------------------------------------------------------------------- ++ ++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { ++ address entry = __ pc(); ++ ++#ifdef ASSERT ++ { ++ Label L; ++ __ addiu(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ subu(T1, T1, SP); // T1 = maximal sp for current fp ++ __ bgez(T1, L); // check if frame is complete ++ __ delayed()->nop(); ++ __ stop("interpreter frame not set up"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ // Restore bcp under the assumption that the current frame is still ++ // interpreted ++ // FIXME: please change the func restore_bcp ++ // S0 is the conventional register for bcp ++ __ restore_bcp(); ++ ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // throw exception ++ // FIXME: why do not pass parameter thread ? ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() { ++ address entry = __ pc(); ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // ??? convention: expect array in register A1 ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ClassCastException_handler() { ++ address entry = __ pc(); ++ ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ __ empty_FPU_stack(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException), FSR); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_exception_handler_common( ++ const char* name, const char* message, bool pass_oop) { ++ assert(!pass_oop || message == NULL, "either oop or message but not both"); ++ address entry = __ pc(); ++ ++ // expression stack must be empty before entering the VM if an exception happened ++ __ empty_expression_stack(); ++ // setup parameters ++ __ li(A1, (long)name); ++ if (pass_oop) { ++ __ call_VM(V0, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR); ++ } else { ++ __ li(A2, (long)message); ++ __ call_VM(V0, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2); ++ } ++ // throw exception ++ __ jmp(Interpreter::throw_exception_entry(), relocInfo::none); ++ __ delayed()->nop(); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { ++ ++ address entry = __ pc(); ++ ++ // Restore stack bottom in case i2c adjusted stack ++ __ ld(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); ++ // and NULL it as marker that sp is now tos until next java call ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ __ restore_bcp(); ++ __ restore_locals(); ++ ++ // mdp: T8 ++ // ret: FSR ++ // tmp: T9 ++ if (state == atos) { ++ Register mdp = T8; ++ Register tmp = T9; ++ __ profile_return_type(mdp, FSR, tmp); ++ } ++ ++ ++ const Register cache = T9; ++ const Register index = T3; ++ __ get_cache_and_index_at_bcp(cache, index, 1, index_size); ++ ++ const Register flags = cache; ++ __ dsll(AT, index, Address::times_ptr); ++ __ daddu(AT, cache, AT); ++ __ lw(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask); ++ __ dsll(AT, flags, Interpreter::logStackElementSize); ++ __ daddu(SP, SP, AT); ++ ++ Register java_thread; ++#ifndef OPT_THREAD ++ java_thread = T9; ++ __ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ ++ __ check_and_handle_popframe(java_thread); ++ __ check_and_handle_earlyret(java_thread); ++ ++ __ dispatch_next(state, step); ++ ++ return entry; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, ++ int step, ++ address continuation) { ++ address entry = __ pc(); ++ // NULL last_sp until next java call ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ restore_bcp(); ++ __ restore_locals(); ++ // handle exceptions ++ { ++ Label L; ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ if (continuation == NULL) { ++ __ dispatch_next(state, step); ++ } else { ++ __ jump_to_entry(continuation); ++ __ delayed()->nop(); ++ } ++ return entry; ++} ++ ++int AbstractInterpreter::BasicType_as_index(BasicType type) { ++ int i = 0; ++ switch (type) { ++ case T_BOOLEAN: i = 0; break; ++ case T_CHAR : i = 1; break; ++ case T_BYTE : i = 2; break; ++ case T_SHORT : i = 3; break; ++ case T_INT : // fall through ++ case T_LONG : // fall through ++ case T_VOID : i = 4; break; ++ case T_FLOAT : i = 5; break; ++ case T_DOUBLE : i = 6; break; ++ case T_OBJECT : // fall through ++ case T_ARRAY : i = 7; break; ++ default : ShouldNotReachHere(); ++ } ++ assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, ++ "index out of bounds"); ++ return i; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_result_handler_for( ++ BasicType type) { ++ address entry = __ pc(); ++ switch (type) { ++ case T_BOOLEAN: __ c2bool(V0); break; ++ case T_CHAR : __ andi(V0, V0, 0xFFFF); break; ++ case T_BYTE : __ sign_extend_byte (V0); break; ++ case T_SHORT : __ sign_extend_short(V0); break; ++ case T_INT : /* nothing to do */ break; ++ case T_FLOAT : /* nothing to do */ break; ++ case T_DOUBLE : /* nothing to do */ break; ++ case T_OBJECT : ++ { ++ __ ld(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ verify_oop(V0); // and verify it ++ } ++ break; ++ default : ShouldNotReachHere(); ++ } ++ __ jr(RA); // return from result handler ++ __ delayed()->nop(); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_safept_entry_for( ++ TosState state, ++ address runtime_entry) { ++ address entry = __ pc(); ++ __ push(state); ++ __ call_VM(noreg, runtime_entry); ++ __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); ++ return entry; ++} ++ ++ ++ ++// Helpers for commoning out cases in the various type of method entries. ++// ++ ++ ++// increment invocation count & check for overflow ++// ++// Note: checking for negative value instead of overflow ++// so we have a 'sticky' overflow test ++// ++// Rmethod: method ++void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) { ++ Label done; ++ int increment = InvocationCounter::count_increment; ++ Label no_mdo; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld(T0, Address(Rmethod, Method::method_data_offset())); ++ __ beq(T0, R0, no_mdo); ++ __ delayed()->nop(); ++ // Increment counter in the MDO ++ const Address mdo_invocation_counter(T0, in_bytes(MethodData::invocation_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ const Address mask(T0, in_bytes(MethodData::invoke_mask_offset())); ++ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, T1, false, Assembler::zero, overflow); ++ __ b(done); ++ __ delayed()->nop(); ++ } ++ __ bind(no_mdo); ++ // Increment counter in MethodCounters ++ const Address invocation_counter(T0, ++ MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset()); ++ __ get_method_counters(Rmethod, T0, done); ++ const Address mask(T0, in_bytes(MethodCounters::invoke_mask_offset())); ++ __ increment_mask_and_jump(invocation_counter, increment, mask, T1, false, Assembler::zero, overflow); ++ __ bind(done); ++} ++ ++void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { ++ ++ // Asm interpreter on entry ++ // S7 - locals ++ // S0 - bcp ++ // Rmethod - method ++ // FP - interpreter frame ++ ++ // On return (i.e. jump to entry_point) ++ // Rmethod - method ++ // RA - return address of interpreter caller ++ // tos - the last parameter to Java method ++ // SP - sender_sp ++ ++ // the bcp is valid if and only if it's not null ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), R0); ++ __ ld(Rmethod, FP, method_offset); ++ // Preserve invariant that S0/S7 contain bcp/locals of sender frame ++ __ b_far(do_continue); ++ __ delayed()->nop(); ++} ++ ++// See if we've got enough room on the stack for locals plus overhead. ++// The expression stack grows down incrementally, so the normal guard ++// page mechanism will work for that. ++// ++// NOTE: Since the additional locals are also always pushed (wasn't ++// obvious in generate_method_entry) so the guard should work for them ++// too. ++// ++// Args: ++// T2: number of additional locals this frame needs (what we must check) ++// T0: Method* ++// ++void TemplateInterpreterGenerator::generate_stack_overflow_check(void) { ++ // see if we've got enough room on the stack for locals plus overhead. ++ // the expression stack grows down incrementally, so the normal guard ++ // page mechanism will work for that. ++ // ++ // Registers live on entry: ++ // ++ // T0: Method* ++ // T2: number of additional locals this frame needs (what we must check) ++ ++ // NOTE: since the additional locals are also always pushed (wasn't obvious in ++ // generate_method_entry) so the guard should work for them too. ++ // ++ ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++ // total overhead size: entry_size + (saved fp thru expr stack bottom). ++ // be sure to change this if you add/subtract anything to/from the overhead area ++ const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize) ++ + entry_size; ++ ++ const int page_size = os::vm_page_size(); ++ ++ Label after_frame_check; ++ ++ // see if the frame is greater than one page in size. If so, ++ // then we need to verify there is enough stack space remaining ++ // for the additional locals. ++ __ move(AT, (page_size - overhead_size) / Interpreter::stackElementSize); ++ __ slt(AT, AT, T2); ++ __ beq(AT, R0, after_frame_check); ++ __ delayed()->nop(); ++ ++ // compute sp as if this were going to be the last frame on ++ // the stack before the red zone ++#ifndef OPT_THREAD ++ Register thread = T1; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ++ // locals + overhead, in bytes ++ __ dsll(T3, T2, Interpreter::logStackElementSize); ++ __ daddiu(T3, T3, overhead_size); // locals * 4 + overhead_size --> T3 ++ ++#ifdef ASSERT ++ Label stack_base_okay, stack_size_okay; ++ // verify that thread stack base is non-zero ++ __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); ++ __ bne(AT, R0, stack_base_okay); ++ __ delayed()->nop(); ++ __ stop("stack base is zero"); ++ __ bind(stack_base_okay); ++ // verify that thread stack size is non-zero ++ __ ld(AT, thread, in_bytes(Thread::stack_size_offset())); ++ __ bne(AT, R0, stack_size_okay); ++ __ delayed()->nop(); ++ __ stop("stack size is zero"); ++ __ bind(stack_size_okay); ++#endif ++ ++ // Add stack base to locals and subtract stack size ++ __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT ++ __ daddu(T3, T3, AT); // locals * 4 + overhead_size + stack_base--> T3 ++ __ ld(AT, thread, in_bytes(Thread::stack_size_offset())); // stack_size --> AT ++ __ dsubu(T3, T3, AT); // locals * 4 + overhead_size + stack_base - stack_size --> T3 ++ ++ // Use the bigger size for banging. ++ const int max_bang_size = (int)MAX2(StackOverflow::stack_shadow_zone_size(), StackOverflow::stack_guard_zone_size()); ++ ++ // add in the redzone and yellow size ++ __ move(AT, max_bang_size); ++ __ addu(T3, T3, AT); ++ ++ // check against the current stack bottom ++ __ slt(AT, T3, SP); ++ __ bne(AT, R0, after_frame_check); ++ __ delayed()->nop(); ++ ++ // Note: the restored frame is not necessarily interpreted. ++ // Use the shared runtime version of the StackOverflowError. ++ __ move(SP, Rsender); ++ assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); ++ __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ ++ // all done with frame size check ++ __ bind(after_frame_check); ++} ++ ++// Allocate monitor and lock method (asm interpreter) ++// Rmethod - Method* ++void TemplateInterpreterGenerator::lock_method(void) { ++ // synchronize method ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++#ifdef ASSERT ++ { Label L; ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T0, T0, JVM_ACC_SYNCHRONIZED); ++ __ bne(T0, R0, L); ++ __ delayed()->nop(); ++ __ stop("method doesn't need synchronization"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ // get synchronization object ++ { ++ Label done; ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T2, T0, JVM_ACC_STATIC); ++ __ ld(T0, LVP, Interpreter::local_offset_in_bytes(0)); ++ __ beq(T2, R0, done); ++ __ delayed()->nop(); ++ __ load_mirror(T0, Rmethod, T9); ++ __ bind(done); ++ } ++ // add space for monitor & lock ++ __ daddiu(SP, SP, (-1) * entry_size); // add space for a monitor entry ++ __ sd(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ // set new monitor block top ++ __ sd(T0, SP, BasicObjectLock::obj_offset_in_bytes()); // store object ++ // FIXME: I do not know what lock_object will do and what it will need ++ __ move(c_rarg0, SP); // object address ++ __ lock_object(c_rarg0); ++} ++ ++// Generate a fixed interpreter frame. This is identical setup for ++// interpreted methods and for native methods hence the shared code. ++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { ++ ++ // [ local var m-1 ] <--- sp ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- T0(sender's sp) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // initialize fixed part of activation frame ++ // sender's sp in Rsender ++ int i = 0; ++ int frame_size = 10; ++#ifndef CORE ++ ++frame_size; ++#endif ++ __ daddiu(SP, SP, (-frame_size) * wordSize); ++ __ sd(RA, SP, (frame_size - 1) * wordSize); // save return address ++ __ sd(FP, SP, (frame_size - 2) * wordSize); // save sender's fp ++ __ daddiu(FP, SP, (frame_size - 2) * wordSize); ++ __ sd(Rsender, FP, (-++i) * wordSize); // save sender's sp ++ __ sd(R0, FP,(-++i) * wordSize); //save last_sp as null ++ __ sd(LVP, FP, (-++i) * wordSize); // save locals offset ++ __ ld(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop ++ __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase ++ __ sd(Rmethod, FP, (-++i) * wordSize); // save Method* ++ // Get mirror and store it in the frame as GC root for this Method* ++ __ load_mirror(T2, Rmethod, T9); ++ __ sd(T2, FP, (-++i) * wordSize); // Mirror ++#ifndef CORE ++ if (ProfileInterpreter) { ++ Label method_data_continue; ++ __ ld(AT, Rmethod, in_bytes(Method::method_data_offset())); ++ __ beq(AT, R0, method_data_continue); ++ __ delayed()->nop(); ++ __ daddiu(AT, AT, in_bytes(MethodData::data_offset())); ++ __ bind(method_data_continue); ++ __ sd(AT, FP, (-++i) * wordSize); ++ } else { ++ __ sd(R0, FP, (-++i) * wordSize); ++ } ++#endif // !CORE ++ ++ __ ld(T2, Rmethod, in_bytes(Method::const_offset())); ++ __ ld(T2, T2, in_bytes(ConstMethod::constants_offset())); ++ __ ld(T2, T2, ConstantPool::cache_offset_in_bytes()); ++ __ sd(T2, FP, (-++i) * wordSize); // set constant pool cache ++ if (native_call) { ++ __ sd(R0, FP, (-++i) * wordSize); // no bcp ++ } else { ++ __ sd(BCP, FP, (-++i) * wordSize); // set bcp ++ } ++ __ sd(SP, FP, (-++i) * wordSize); // reserve word for pointer to expression stack bottom ++ assert(i + 2 == frame_size, "i + 2 should be equal to frame_size"); ++} ++ ++// End of helpers ++ ++// Various method entries ++//------------------------------------------------------------------------------------------------------------------------ ++// ++// ++ ++// Method entry for java.lang.ref.Reference.get. ++address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { ++ address entry = __ pc(); ++ Label slow_path; ++ __ b(slow_path); ++ __ delayed()->nop(); ++ ++ // generate a vanilla interpreter entry as the slow path ++ __ bind(slow_path); ++ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); ++ __ delayed()->nop(); ++ return entry; ++} ++ ++// Interpreter stub for calling a native method. (asm interpreter) ++// This sets up a somewhat different looking stack for calling the ++// native method than the typical interpreter frame setup. ++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ // Rsender: sender's sp ++ // Rmethod: Method* ++ address entry_point = __ pc(); ++ ++#ifndef CORE ++ const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset())); ++#endif ++ ++ // get parameter size (always needed) ++ // the size in the java stack ++ __ ld(V0, Rmethod, in_bytes(Method::const_offset())); ++ __ lhu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // native calls don't need the stack size check since they have no expression stack ++ // and the arguments are already on the stack and we only add a handful of words ++ // to the stack ++ ++ // Rmethod: Method* ++ // V0: size of parameters ++ // Layout of frame at this point ++ // ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ ++ // for natives the size of locals is zero ++ ++ // compute beginning of parameters (S7) ++ __ dsll(LVP, V0, Address::times_8); ++ __ daddiu(LVP, LVP, (-1) * wordSize); ++ __ daddu(LVP, LVP, SP); ++ ++ ++ // add 2 zero-initialized slots for native calls ++ // 1 slot for native oop temp offset (setup via runtime) ++ // 1 slot for static native result handler3 (setup via runtime) ++ __ push2(R0, R0); ++ ++ // Layout of frame at this point ++ // [ method holder mirror ] <--- sp ++ // [ result type info ] ++ // [ argument word n-1 ] <--- T0 ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++#ifndef CORE ++ if (inc_counter) __ lw(T3, invocation_counter); // (pre-)fetch invocation count ++#endif ++ ++ // initialize fixed part of activation frame ++ generate_fixed_frame(true); ++ // after this function, the layout of frame is as following ++ // ++ // [ monitor block top ] <--- sp ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- sender's sp ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ ++ // make sure method is native & not abstract ++#ifdef ASSERT ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ { ++ Label L; ++ __ andi(AT, T0, JVM_ACC_NATIVE); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute native method as non-native"); ++ __ bind(L); ++ } ++ { ++ Label L; ++ __ andi(AT, T0, JVM_ACC_ABSTRACT); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute abstract method in interpreter"); ++ __ bind(L); ++ } ++#endif ++ ++ // Since at this point in the method invocation the exception handler ++ // would try to exit the monitor of synchronized methods which hasn't ++ // been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation will ++ // check this flag. ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(AT, (int)true); ++ __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++#ifndef CORE ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow); ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++#endif // CORE ++ ++ bang_stack_shadow_pages(true); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ if (synchronized) { ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ { ++ Label L; ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, T0, JVM_ACC_SYNCHRONIZED); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("method needs synchronization"); ++ __ bind(L); ++ } ++#endif ++ } ++ ++ // after method_lock, the layout of frame is as following ++ // ++ // [ monitor entry ] <--- sp ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // start execution ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ beq(AT, SP, L); ++ __ delayed()->nop(); ++ __ stop("broken stack frame setup in interpreter in asm"); ++ __ bind(L); ++ } ++#endif ++ ++ // jvmti/jvmpi support ++ __ notify_method_entry(); ++ ++ // work registers ++ const Register method = Rmethod; ++ const Register t = T8; ++ ++ __ get_method(method); ++ { ++ Label L, Lstatic; ++ __ ld(t,method,in_bytes(Method::const_offset())); ++ __ lhu(t, t, in_bytes(ConstMethod::size_of_parameters_offset())); ++ // MIPS n64 ABI: caller does not reserve space for the register auguments. ++ // A0 and A1(if needed) ++ __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, AT, JVM_ACC_STATIC); ++ __ beq(AT, R0, Lstatic); ++ __ delayed()->nop(); ++ __ daddiu(t, t, 1); ++ __ bind(Lstatic); ++ __ daddiu(t, t, -7); ++ __ blez(t, L); ++ __ delayed()->nop(); ++ __ dsll(t, t, Address::times_8); ++ __ dsubu(SP, SP, t); ++ __ bind(L); ++ } ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ dins(SP, R0, 0, 4); ++ __ move(AT, SP); ++ // [ ] <--- sp ++ // ... (size of parameters - 8 ) ++ // [ monitor entry ] ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ // get signature handler ++ { ++ Label L; ++ __ ld(T9, method, in_bytes(Method::signature_handler_offset())); ++ __ bne(T9, R0, L); ++ __ delayed()->nop(); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::prepare_native_call), method); ++ __ get_method(method); ++ __ ld(T9, method, in_bytes(Method::signature_handler_offset())); ++ __ bind(L); ++ } ++ ++ // call signature handler ++ // FIXME: when change codes in InterpreterRuntime, note this point ++ // from: begin of parameters ++ assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code"); ++ // to: current sp ++ assert(InterpreterRuntime::SignatureHandlerGenerator::to () == SP, "adjust this code"); ++ // temp: T3 ++ assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t , "adjust this code"); ++ ++ __ jalr(T9); ++ __ delayed()->nop(); ++ __ get_method(method); ++ ++ // ++ // if native function is static, and its second parameter has type length of double word, ++ // and first parameter has type length of word, we have to reserve one word ++ // for the first parameter, according to mips o32 abi. ++ // if native function is not static, and its third parameter has type length of double word, ++ // and second parameter has type length of word, we have to reserve one word for the second ++ // parameter. ++ // ++ ++ ++ // result handler is in V0 ++ // set result handler ++ __ sd(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize); ++ ++#define FIRSTPARA_SHIFT_COUNT 5 ++#define SECONDPARA_SHIFT_COUNT 9 ++#define THIRDPARA_SHIFT_COUNT 13 ++#define PARA_MASK 0xf ++ ++ // pass mirror handle if static call ++ { ++ Label L; ++ __ lw(t, method, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, t, JVM_ACC_STATIC); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ ++ // get mirror ++ __ load_mirror(t, method, T9); ++ // copy mirror into activation frame ++ __ sd(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ // pass handle to mirror ++ __ daddiu(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ move(A1, t); ++ __ bind(L); ++ } ++ ++ // [ mthd holder mirror ptr ] <--- sp --------------------| (only for static method) ++ // [ ] | ++ // ... size of parameters(or +1) | ++ // [ monitor entry ] | ++ // ... | ++ // [ monitor entry ] | ++ // [ monitor block top ] ( the top monitor entry ) | ++ // [ byte code pointer (0) ] (if native, bcp = 0) | ++ // [ constant pool cache ] | ++ // [ Mirror ] | ++ // [ Method* ] | ++ // [ locals offset ] | ++ // [ sender's sp ] | ++ // [ sender's fp ] | ++ // [ return address ] <--- fp | ++ // [ method holder mirror ] <----------------------------| ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // get native function entry point ++ { Label L; ++ __ ld(T9, method, in_bytes(Method::native_function_offset())); ++ __ li(V1, SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); ++ __ bne(V1, T9, L); ++ __ delayed()->nop(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method); ++ __ get_method(method); ++ __ ld(T9, method, in_bytes(Method::native_function_offset())); ++ __ bind(L); ++ } ++ ++ // pass JNIEnv ++ // native function in T9 ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ daddiu(t, thread, in_bytes(JavaThread::jni_environment_offset())); ++ __ move(A0, t); ++ // [ jni environment ] <--- sp ++ // [ mthd holder mirror ptr ] ---------------------------->| (only for static method) ++ // [ ] | ++ // ... size of parameters | ++ // [ monitor entry ] | ++ // ... | ++ // [ monitor entry ] | ++ // [ monitor block top ] ( the top monitor entry ) | ++ // [ byte code pointer (0) ] (if native, bcp = 0) | ++ // [ constant pool cache ] | ++ // [ Mirror ] | ++ // [ Method* ] | ++ // [ locals offset ] | ++ // [ sender's sp ] | ++ // [ sender's fp ] | ++ // [ return address ] <--- fp | ++ // [ method holder mirror ] <----------------------------| ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // set_last_Java_frame_before_call ++ __ sd(FP, thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ // Change state to native (we save the return address in the thread, since it might not ++ // be pushed on the stack when we do a a stack traversal). It is enough that the pc() ++ // points into the right code segment. It does not have to be the correct return pc. ++ __ li(t, __ pc()); ++ __ sd(t, thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ __ sd(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ ++ // change thread state ++#ifdef ASSERT ++ { ++ Label L; ++ __ lw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ daddiu(t, t, (-1) * _thread_in_Java); ++ __ beq(t, R0, L); ++ __ delayed()->nop(); ++ __ stop("Wrong thread state in native stub"); ++ __ bind(L); ++ } ++#endif ++ ++ __ move(t, _thread_in_native); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ // call native method ++ __ jalr(T9); ++ __ delayed()->nop(); ++ // result potentially in V0 or F0 ++ ++ ++ // via _last_native_pc and not via _last_jave_sp ++ // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result. ++ // If the order changes or anything else is added to the stack the code in ++ // interpreter_frame_result will have to be changed. ++ //FIXME, should modify here ++ // save return value to keep the value from being destroyed by other calls ++ __ push(dtos); ++ __ push(ltos); ++ ++ // change thread state ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(t, _thread_in_native_trans); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ if( os::is_MP() ) __ sync(); // Force this write out before the read below ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { Label Continue; ++ ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are ++ // preserved and correspond to the bcp/locals pointers. So we do a runtime call ++ // by hand. ++ // ++ Label slow_path; ++ ++ __ safepoint_poll_acquire(slow_path, thread); ++ __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, Continue); ++ __ delayed()->nop(); ++ __ bind(slow_path); ++ __ move(A0, thread); ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), ++ relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ //add for compressedoops ++ __ reinit_heapbase(); ++ __ bind(Continue); ++ } ++ ++ // change thread state ++ __ move(t, _thread_in_Java); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ reset_last_Java_frame(thread, true); ++ ++ if (CheckJNICalls) { ++ // clear_pending_jni_exception_check ++ __ sd(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset())); ++ } ++ ++ // reset handle block ++ __ ld(t, thread, in_bytes(JavaThread::active_handles_offset())); ++ __ sw(R0, t, JNIHandleBlock::top_offset_in_bytes()); ++ ++ // If result was an oop then unbox and save it in the frame ++ { ++ Label no_oop; ++ //FIXME, addi only support 16-bit imeditate ++ __ ld(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize); ++ __ li(T0, AbstractInterpreter::result_handler(T_OBJECT)); ++ __ bne(AT, T0, no_oop); ++ __ delayed()->nop(); ++ __ pop(ltos); ++ // Unbox oop result, e.g. JNIHandles::resolve value. ++ __ resolve_jobject(V0, thread, T9); ++ __ sd(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize); ++ // keep stack depth as expected by pushing oop which will eventually be discarded ++ __ push(ltos); ++ __ bind(no_oop); ++ } ++ { ++ Label no_reguard; ++ __ lw(t, thread, in_bytes(JavaThread::stack_guard_state_offset())); ++ __ move(AT, (u1)StackOverflow::stack_guard_yellow_reserved_disabled); ++ __ bne(t, AT, no_reguard); ++ __ delayed()->nop(); ++ __ pushad(); ++ __ move(S5_heapbase, SP); ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ dins(SP, R0, 0, 4); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ move(SP, S5_heapbase); ++ __ popad(); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ __ bind(no_reguard); ++ } ++ // restore BCP to have legal interpreter frame, ++ // i.e., bci == 0 <=> BCP == code_base() ++ // Can't call_VM until bcp is within reasonable. ++ __ get_method(method); // method is junk from thread_in_native to now. ++ __ ld(BCP, method, in_bytes(Method::const_offset())); ++ __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset()))); ++ // handle exceptions (exception handling will handle unlocking!) ++ { ++ Label L; ++ __ ld(t, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(t, R0, L); ++ __ delayed()->nop(); ++ // Note: At some point we may want to unify this with the code used in ++ // call_VM_base(); ++ // i.e., we should use the StubRoutines::forward_exception code. For now this ++ // doesn't work here because the sp is not correctly set at this point. ++ __ MacroAssembler::call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ ++ // do unlocking if necessary ++ { ++ Label L; ++ __ lw(t, method, in_bytes(Method::access_flags_offset())); ++ __ andi(t, t, JVM_ACC_SYNCHRONIZED); ++ __ beq(t, R0, L); ++ // the code below should be shared with interpreter macro assembler implementation ++ { ++ Label unlock; ++ // BasicObjectLock will be first in list, ++ // since this is a synchronized method. However, need ++ // to check that the object has not been unlocked by ++ // an explicit monitorexit bytecode. ++ __ delayed()->daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock)); ++ // address of first monitor ++ ++ __ ld(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ bne(t, R0, unlock); ++ __ delayed()->nop(); ++ ++ // Entry already unlocked, need to throw exception ++ __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ __ bind(unlock); ++ __ unlock_object(c_rarg0); ++ } ++ __ bind(L); ++ } ++ ++ // jvmti/jvmpi support ++ // Note: This must happen _after_ handling/throwing any exceptions since ++ // the exception handler code notifies the runtime of method exits ++ // too. If this happens before, method entry/exit notifications are ++ // not properly paired (was bug - gri 11/22/99). ++ __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); ++ ++ // restore potential result in V0, ++ // call result handler to restore potential result in ST0 & handle result ++ ++ __ pop(ltos); ++ __ pop(dtos); ++ ++ __ ld(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize); ++ __ jalr(t); ++ __ delayed()->nop(); ++ ++ ++ // remove activation ++ __ ld(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp ++ __ ld(RA, FP, frame::interpreter_frame_return_addr_offset * wordSize); // get return address ++ __ ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++#ifndef CORE ++ if (inc_counter) { ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(continue_after_compile); ++ // entry_point is the beginning of this ++ // function and checks again for compiled code ++ } ++#endif ++ return entry_point; ++} ++ ++void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { ++ // Quick & dirty stack overflow checking: bang the stack & handle trap. ++ // Note that we do the banging after the frame is setup, since the exception ++ // handling code expects to find a valid interpreter frame on the stack. ++ // Doing the banging earlier fails if the caller frame is not an interpreter ++ // frame. ++ // (Also, the exception throwing code expects to unlock any synchronized ++ // method receiever, so do the banging after locking the receiver.) ++ ++ // Bang each page in the shadow zone. We can't assume it's been done for ++ // an interpreter frame with greater than a page of locals, so each page ++ // needs to be checked. Only true for non-native. ++ const int page_size = os::vm_page_size(); ++ const int n_shadow_pages = ((int)StackOverflow::stack_shadow_zone_size()) / page_size; ++ const int start_page = native_call ? n_shadow_pages : 1; ++ BLOCK_COMMENT("bang_stack_shadow_pages:"); ++ for (int pages = start_page; pages <= n_shadow_pages; pages++) { ++ __ bang_stack_with_offset(pages*page_size); ++ } ++} ++ ++// ++// Generic interpreted method entry to (asm) interpreter ++// ++// Layout of frame just at the entry ++// ++// [ argument word n-1 ] <--- sp ++// ... ++// [ argument word 0 ] ++// assume Method* in Rmethod before call this method. ++// prerequisites to the generated stub : the callee Method* in Rmethod ++// note you must save the caller bcp before call the generated stub ++// ++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ ++ // Rmethod: Method* ++ // Rsender: sender 's sp ++ address entry_point = __ pc(); ++ ++ const Address invocation_counter(Rmethod, ++ in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset())); ++ ++ // get parameter size (always needed) ++ __ ld(T3, Rmethod, in_bytes(Method::const_offset())); //T3 --> Rmethod._constMethod ++ __ lhu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // Rmethod: Method* ++ // V0: size of parameters ++ // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i ++ // get size of locals in words to T2 ++ __ lhu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset())); ++ // T2 = no. of additional locals, locals include parameters ++ __ dsubu(T2, T2, V0); ++ ++ // see if we've got enough room on the stack for locals plus overhead. ++ // Layout of frame at this point ++ // ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ generate_stack_overflow_check(); ++ // after this function, the layout of frame does not change ++ ++ // compute beginning of parameters (LVP) ++ __ dsll(LVP, V0, LogBytesPerWord); ++ __ daddiu(LVP, LVP, (-1) * wordSize); ++ __ daddu(LVP, LVP, SP); ++ ++ // T2 - # of additional locals ++ // allocate space for locals ++ // explicitly initialize locals ++ { ++ Label exit, loop; ++ __ beq(T2, R0, exit); ++ __ delayed()->nop(); ++ ++ __ bind(loop); ++ __ daddiu(SP, SP, (-1) * wordSize); ++ __ daddiu(T2, T2, -1); // until everything initialized ++ __ bne(T2, R0, loop); ++ __ delayed()->sd(R0, SP, 0); // initialize local variables ++ ++ __ bind(exit); ++ } ++ ++ // ++ // [ local var m-1 ] <--- sp ++ // ... ++ // [ local var 0 ] ++ // [ argument word n-1 ] <--- T0? ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ // initialize fixed part of activation frame ++ ++ generate_fixed_frame(false); ++ ++ ++ // after this function, the layout of frame is as following ++ // ++ // [ monitor block top ] <--- sp ( the top monitor entry ) ++ // [ byte code pointer ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] <--- fp ++ // [ return address ] ++ // [ local var m-1 ] ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++ // make sure method is not native & not abstract ++#ifdef ASSERT ++ __ ld(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ { ++ Label L; ++ __ andi(T2, AT, JVM_ACC_NATIVE); ++ __ beq(T2, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute native method as non-native"); ++ __ bind(L); ++ } ++ { ++ Label L; ++ __ andi(T2, AT, JVM_ACC_ABSTRACT); ++ __ beq(T2, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute abstract method in interpreter"); ++ __ bind(L); ++ } ++#endif ++ ++ // Since at this point in the method invocation the exception handler ++ // would try to exit the monitor of synchronized methods which hasn't ++ // been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation will ++ // check this flag. ++ ++#ifndef OPT_THREAD ++ Register thread = T8; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ __ move(AT, (int)true); ++ __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++#ifndef CORE ++ ++ // mdp : T8 ++ // tmp1: T9 ++ // tmp2: T2 ++ __ profile_parameters_type(T8, T9, T2); ++ ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow); ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++ ++#endif // CORE ++ ++ bang_stack_shadow_pages(false); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ // ++ if (synchronized) { ++ // Allocate monitor and lock method ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ { Label L; ++ __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T2, AT, JVM_ACC_SYNCHRONIZED); ++ __ beq(T2, R0, L); ++ __ delayed()->nop(); ++ __ stop("method needs synchronization"); ++ __ bind(L); ++ } ++#endif ++ } ++ ++ // layout of frame after lock_method ++ // [ monitor entry ] <--- sp ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ local var m-1 ] ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++ // start execution ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ beq(AT, SP, L); ++ __ delayed()->nop(); ++ __ stop("broken stack frame setup in interpreter in native"); ++ __ bind(L); ++ } ++#endif ++ ++ // jvmti/jvmpi support ++ __ notify_method_entry(); ++ ++ __ dispatch_next(vtos); ++ ++ // invocation counter overflow ++ if (inc_counter) { ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(continue_after_compile); ++ } ++ ++ return entry_point; ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateInterpreterGenerator::generate_throw_exception() { ++ // Entry point in previous activation (i.e., if the caller was ++ // interpreted) ++ Interpreter::_rethrow_exception_entry = __ pc(); ++ // Restore sp to interpreter_frame_last_sp even though we are going ++ // to empty the expression stack for the exception processing. ++ __ sd(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ // V0: exception ++ // V1: return address/pc that threw exception ++ __ restore_bcp(); // BCP points to call/send ++ __ restore_locals(); ++ ++ //add for compressedoops ++ __ reinit_heapbase(); ++ // Entry point for exceptions thrown within interpreter code ++ Interpreter::_throw_exception_entry = __ pc(); ++ // expression stack is undefined here ++ // V0: exception ++ // BCP: exception bcp ++ __ verify_oop(V0); ++ ++ // expression stack must be empty before entering the VM in case of an exception ++ __ empty_expression_stack(); ++ // find exception handler address and preserve exception oop ++ __ move(A1, V0); ++ __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1); ++ // V0: exception handler entry point ++ // V1: preserved exception oop ++ // S0: bcp for exception handler ++ __ push(V1); // push exception which is now the only value on the stack ++ __ jr(V0); // jump to exception handler (may be _remove_activation_entry!) ++ __ delayed()->nop(); ++ ++ // If the exception is not handled in the current frame the frame is removed and ++ // the exception is rethrown (i.e. exception continuation is _rethrow_exception). ++ // ++ // Note: At this point the bci is still the bxi for the instruction which caused ++ // the exception and the expression stack is empty. Thus, for any VM calls ++ // at this point, GC will find a legal oop map (with empty expression stack). ++ ++ // In current activation ++ // V0: exception ++ // BCP: exception bcp ++ ++ // ++ // JVMTI PopFrame support ++ // ++ ++ Interpreter::_remove_activation_preserving_args_entry = __ pc(); ++ __ empty_expression_stack(); ++ // Set the popframe_processing bit in pending_popframe_condition indicating that we are ++ // currently handling popframe, so that call_VMs that may happen later do not trigger new ++ // popframe handling cycles. ++#ifndef OPT_THREAD ++ Register thread = T2; ++ __ get_thread(T2); ++#else ++ Register thread = TREG; ++#endif ++ __ lw(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ __ ori(T3, T3, JavaThread::popframe_processing_bit); ++ __ sw(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++#ifndef CORE ++ { ++ // Check to see whether we are returning to a deoptimized frame. ++ // (The PopFrame call ensures that the caller of the popped frame is ++ // either interpreted or compiled and deoptimizes it if compiled.) ++ // In this case, we can't call dispatch_next() after the frame is ++ // popped, but instead must save the incoming arguments and restore ++ // them after deoptimization has occurred. ++ // ++ // Note that we don't compare the return PC against the ++ // deoptimization blob's unpack entry because of the presence of ++ // adapter frames in C2. ++ Label caller_not_deoptimized; ++ __ ld(A0, FP, frame::return_addr_offset * wordSize); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0); ++ __ bne(V0, R0, caller_not_deoptimized); ++ __ delayed()->nop(); ++ ++ // Compute size of arguments for saving when returning to deoptimized caller ++ __ get_method(A1); ++ __ verify_oop(A1); ++ __ ld( A1, A1, in_bytes(Method::const_offset())); ++ __ lhu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset())); ++ __ shl(A1, Interpreter::logStackElementSize); ++ __ restore_locals(); ++ __ dsubu(A2, LVP, A1); ++ __ daddiu(A2, A2, wordSize); ++ // Save these arguments ++#ifndef OPT_THREAD ++ __ get_thread(A0); ++#else ++ __ move(A0, TREG); ++#endif ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2); ++ ++ __ remove_activation(vtos, T9, false, false, false); ++ ++ // Inform deoptimization that it is responsible for restoring these arguments ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(AT, JavaThread::popframe_force_deopt_reexecution_bit); ++ __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ // Continue in deoptimization handler ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ __ bind(caller_not_deoptimized); ++ } ++#endif /* !CORE */ ++ ++ __ remove_activation(vtos, T3, ++ /* throw_monitor_exception */ false, ++ /* install_monitor_exception */ false, ++ /* notify_jvmdi */ false); ++ ++ // Clear the popframe condition flag ++ // Finish with popframe handling ++ // A previous I2C followed by a deoptimization might have moved the ++ // outgoing arguments further up the stack. PopFrame expects the ++ // mutations to those outgoing arguments to be preserved and other ++ // constraints basically require this frame to look exactly as ++ // though it had previously invoked an interpreted activation with ++ // no space between the top of the expression stack (current ++ // last_sp) and the top of stack. Rather than force deopt to ++ // maintain this kind of invariant all the time we call a small ++ // fixup routine to move the mutated arguments onto the top of our ++ // expression stack if necessary. ++ __ move(T8, SP); ++ __ ld(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // PC must point into interpreter here ++ __ set_last_Java_frame(thread, noreg, FP, __ pc()); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2); ++ __ reset_last_Java_frame(thread, true); ++ // Restore the last_sp and null it out ++ __ ld(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ ++ ++ __ move(AT, JavaThread::popframe_inactive); ++ __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++ // Finish with popframe handling ++ __ restore_bcp(); ++ __ restore_locals(); ++#ifndef CORE ++ // The method data pointer was incremented already during ++ // call profiling. We have to restore the mdp for the current bcp. ++ if (ProfileInterpreter) { ++ __ set_method_data_pointer_for_bcp(); ++ } ++#endif // !CORE ++ // Clear the popframe condition flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(AT, JavaThread::popframe_inactive); ++ __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++#if INCLUDE_JVMTI ++ { ++ Label L_done; ++ ++ __ lbu(AT, BCP, 0); ++ __ daddiu(AT, AT, -1 * Bytecodes::_invokestatic); ++ __ bne(AT, R0, L_done); ++ __ delayed()->nop(); ++ ++ // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. ++ // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. ++ ++ __ get_method(T9); ++ __ ld(T8, LVP, 0); ++ __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T9, BCP); ++ ++ __ beq(T8, R0, L_done); ++ __ delayed()->nop(); ++ ++ __ sd(T8, SP, 0); ++ __ bind(L_done); ++ } ++#endif // INCLUDE_JVMTI ++ ++ __ dispatch_next(vtos); ++ // end of PopFrame support ++ ++ Interpreter::_remove_activation_entry = __ pc(); ++ ++ // preserve exception over this code sequence ++ __ pop(T0); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ sd(T0, thread, in_bytes(JavaThread::vm_result_offset())); ++ // remove the activation (without doing throws on illegalMonitorExceptions) ++ __ remove_activation(vtos, T3, false, true, false); ++ // restore exception ++ __ get_vm_result(T0, thread); ++ __ verify_oop(T0); ++ ++ // In between activations - previous activation type unknown yet ++ // compute continuation point - the continuation point expects ++ // the following registers set up: ++ // ++ // T0: exception ++ // T1: return address/pc that threw exception ++ // SP: expression stack of caller ++ // FP: fp of caller ++ __ push2(T0, T3); // save exception and return address ++ __ move(A1, T3); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); ++ __ move(T9, V0); // save exception handler ++ __ pop2(V0, V1); // restore return address and exception ++ ++ // Note that an "issuing PC" is actually the next PC after the call ++ __ jr(T9); // jump to exception handler of caller ++ __ delayed()->nop(); ++} ++ ++ ++// ++// JVMTI ForceEarlyReturn support ++// ++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { ++ address entry = __ pc(); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ empty_expression_stack(); ++ __ empty_FPU_stack(); ++ __ load_earlyret_value(state); ++ ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ __ ld_ptr(T9, TREG, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ const Address cond_addr(T9, in_bytes(JvmtiThreadState::earlyret_state_offset())); ++ // Clear the earlyret state ++ __ move(AT, JvmtiThreadState::earlyret_inactive); ++ __ sw(AT, cond_addr); ++ ++ __ remove_activation(state, T0, ++ false, /* throw_monitor_exception */ ++ false, /* install_monitor_exception */ ++ true); /* notify_jvmdi */ ++ __ jr(T0); ++ __ delayed()->nop(); ++ return entry; ++} // end of ForceEarlyReturn support ++ ++ ++//----------------------------------------------------------------------------- ++// Helper for vtos entry point generation ++ ++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, ++ address& bep, ++ address& cep, ++ address& sep, ++ address& aep, ++ address& iep, ++ address& lep, ++ address& fep, ++ address& dep, ++ address& vep) { ++ assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); ++ Label L; ++ fep = __ pc(); __ push(ftos); __ b(L); __ delayed()->nop(); ++ dep = __ pc(); __ push(dtos); __ b(L); __ delayed()->nop(); ++ lep = __ pc(); __ push(ltos); __ b(L); __ delayed()->nop(); ++ aep =__ pc(); __ push(atos); __ b(L); __ delayed()->nop(); ++ bep = cep = sep = ++ iep = __ pc(); __ push(itos); ++ vep = __ pc(); ++ __ bind(L); ++ generate_and_dispatch(t); ++} ++ ++ ++/* ++//----------------------------------------------------------------------------- ++// Generation of individual instructions ++ ++// helpers for generate_and_dispatch ++ ++ ++InterpreterGenerator::InterpreterGenerator(StubQueue* code) ++ : TemplateInterpreterGenerator(code) { ++ generate_all(); // down here so it can be "virtual" ++} ++*/ ++ ++//----------------------------------------------------------------------------- ++ ++// Non-product code ++#ifndef PRODUCT ++address TemplateInterpreterGenerator::generate_trace_code(TosState state) { ++ address entry = __ pc(); ++ ++ // prepare expression stack ++ __ push(state); // save tosca ++ ++ // tos & tos2 ++ // trace_bytecode need actually 4 args, the last two is tos&tos2 ++ // this work fine for x86. but mips o32 call convention will store A2-A3 ++ // to the stack position it think is the tos&tos2 ++ // when the expression stack have no more than 2 data, error occur. ++ __ ld(A2, SP, 0); ++ __ ld(A3, SP, 1 * wordSize); ++ ++ // pass arguments & call tracer ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), RA, A2, A3); ++ __ move(RA, V0); // make sure return address is not destroyed by pop(state) ++ ++ // restore expression stack ++ __ pop(state); // restore tosca ++ ++ // return ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return entry; ++} ++ ++void TemplateInterpreterGenerator::count_bytecode() { ++ __ li(T8, (long)&BytecodeCounter::_counter_value); ++ __ lw(AT, T8, 0); ++ __ daddiu(AT, AT, 1); ++ __ sw(AT, T8, 0); ++} ++ ++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ++ __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]); ++ __ lw(AT, T8, 0); ++ __ daddiu(AT, AT, 1); ++ __ sw(AT, T8, 0); ++} ++ ++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ++ __ li(T8, (long)&BytecodePairHistogram::_index); ++ __ lw(T9, T8, 0); ++ __ dsrl(T9, T9, BytecodePairHistogram::log2_number_of_codes); ++ __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes); ++ __ orr(T9, T9, T8); ++ __ li(T8, (long)&BytecodePairHistogram::_index); ++ __ sw(T9, T8, 0); ++ __ dsll(T9, T9, 2); ++ __ li(T8, (long)BytecodePairHistogram::_counters); ++ __ daddu(T8, T8, T9); ++ __ lw(AT, T8, 0); ++ __ daddiu(AT, AT, 1); ++ __ sw(AT, T8, 0); ++} ++ ++ ++void TemplateInterpreterGenerator::trace_bytecode(Template* t) { ++ // Call a little run-time stub to avoid blow-up for each bytecode. ++ // The run-time runtime saves the right registers, depending on ++ // the tosca in-state for the given template. ++ ++ address entry = Interpreter::trace_code(t->tos_in()); ++ assert(entry != NULL, "entry must have been generated"); ++ __ call(entry, relocInfo::none); ++ __ delayed()->nop(); ++ //add for compressedoops ++ __ reinit_heapbase(); ++} ++ ++ ++void TemplateInterpreterGenerator::stop_interpreter_at() { ++ Label L; ++ __ li(T8, long(&BytecodeCounter::_counter_value)); ++ __ lw(T8, T8, 0); ++ __ move(AT, StopInterpreterAt); ++ __ bne(T8, AT, L); ++ __ delayed()->nop(); ++ __ brk(5); ++ __ delayed()->nop(); ++ __ bind(L); ++} ++#endif // !PRODUCT +diff --git a/src/hotspot/cpu/mips/templateTable_mips.hpp b/src/hotspot/cpu/mips/templateTable_mips.hpp +new file mode 100644 +index 00000000000..46a88aba261 +--- /dev/null ++++ b/src/hotspot/cpu/mips/templateTable_mips.hpp +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP ++#define CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP ++ ++ static void prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index = noreg, // itable index, MethodType, etc. ++ Register recv = noreg, // if caller wants to see it ++ Register flags = noreg // if caller wants to test it ++ ); ++ static void invokevirtual_helper(Register index, Register recv, ++ Register flags); ++ static void volatile_barrier(); ++ ++ // Helpers ++ static void index_check(Register array, Register index); ++ static void index_check_without_pop(Register array, Register index); ++ ++#endif // CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP +diff --git a/src/hotspot/cpu/mips/templateTable_mips_64.cpp b/src/hotspot/cpu/mips/templateTable_mips_64.cpp +new file mode 100644 +index 00000000000..bbf95f45225 +--- /dev/null ++++ b/src/hotspot/cpu/mips/templateTable_mips_64.cpp +@@ -0,0 +1,4613 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/templateTable.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "memory/universe.hpp" ++#include "oops/klass.inline.hpp" ++#include "oops/methodData.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "utilities/macros.hpp" ++ ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Address computation: local variables ++ ++static inline Address iaddress(int n) { ++ return Address(LVP, Interpreter::local_offset_in_bytes(n)); ++} ++ ++static inline Address laddress(int n) { ++ return iaddress(n + 1); ++} ++ ++static inline Address faddress(int n) { ++ return iaddress(n); ++} ++ ++static inline Address daddress(int n) { ++ return laddress(n); ++} ++ ++static inline Address aaddress(int n) { ++ return iaddress(n); ++} ++static inline Address haddress(int n) { return iaddress(n + 0); } ++ ++ ++static inline Address at_sp() { return Address(SP, 0); } ++static inline Address at_sp_p1() { return Address(SP, 1 * wordSize); } ++static inline Address at_sp_p2() { return Address(SP, 2 * wordSize); } ++ ++// At top of Java expression stack which may be different than sp(). It ++// isn't for category 1 objects. ++static inline Address at_tos () { ++ Address tos = Address(SP, Interpreter::expr_offset_in_bytes(0)); ++ return tos; ++} ++ ++static inline Address at_tos_p1() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(1)); ++} ++ ++static inline Address at_tos_p2() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(2)); ++} ++ ++static inline Address at_tos_p3() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(3)); ++} ++ ++// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator ++Address TemplateTable::at_bcp(int offset) { ++ assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); ++ return Address(BCP, offset); ++} ++ ++// Miscelaneous helper routines ++// Store an oop (or NULL) at the address described by obj. ++// If val == noreg this means store a NULL ++ ++static void do_oop_store(InterpreterMacroAssembler* _masm, ++ Address dst, ++ Register val, ++ DecoratorSet decorators = 0) { ++ assert(val == noreg || val == V0, "parameter is just for looks"); ++ __ store_heap_oop(dst, val, T9, T1, decorators); ++} ++ ++static void do_oop_load(InterpreterMacroAssembler* _masm, ++ Address src, ++ Register dst, ++ DecoratorSet decorators = 0) { ++ __ load_heap_oop(dst, src, T9, T1, decorators); ++} ++ ++// bytecode folding ++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, ++ Register tmp_reg, bool load_bc_into_bc_reg/*=true*/, ++ int byte_no) { ++ if (!RewriteBytecodes) return; ++ Label L_patch_done; ++ ++ switch (bc) { ++ case Bytecodes::_fast_aputfield: ++ case Bytecodes::_fast_bputfield: ++ case Bytecodes::_fast_zputfield: ++ case Bytecodes::_fast_cputfield: ++ case Bytecodes::_fast_dputfield: ++ case Bytecodes::_fast_fputfield: ++ case Bytecodes::_fast_iputfield: ++ case Bytecodes::_fast_lputfield: ++ case Bytecodes::_fast_sputfield: ++ { ++ // We skip bytecode quickening for putfield instructions when ++ // the put_code written to the constant pool cache is zero. ++ // This is required so that every execution of this instruction ++ // calls out to InterpreterRuntime::resolve_get_put to do ++ // additional, required work. ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ assert(load_bc_into_bc_reg, "we use bc_reg as temp"); ++ __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1); ++ __ daddiu(bc_reg, R0, bc); ++ __ beq(tmp_reg, R0, L_patch_done); ++ __ delayed()->nop(); ++ } ++ break; ++ default: ++ assert(byte_no == -1, "sanity"); ++ // the pair bytecodes have already done the load. ++ if (load_bc_into_bc_reg) { ++ __ move(bc_reg, bc); ++ } ++ } ++ ++ if (JvmtiExport::can_post_breakpoint()) { ++ Label L_fast_patch; ++ // if a breakpoint is present we can't rewrite the stream directly ++ __ lbu(tmp_reg, at_bcp(0)); ++ __ move(AT, Bytecodes::_breakpoint); ++ __ bne(tmp_reg, AT, L_fast_patch); ++ __ delayed()->nop(); ++ ++ __ get_method(tmp_reg); ++ // Let breakpoint table handling rewrite to quicker bytecode ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg); ++ ++ __ b(L_patch_done); ++ __ delayed()->nop(); ++ __ bind(L_fast_patch); ++ } ++ ++#ifdef ASSERT ++ Label L_okay; ++ __ lbu(tmp_reg, at_bcp(0)); ++ __ move(AT, (int)Bytecodes::java_code(bc)); ++ __ beq(tmp_reg, AT, L_okay); ++ __ delayed()->nop(); ++ __ beq(tmp_reg, bc_reg, L_patch_done); ++ __ delayed()->nop(); ++ __ stop("patching the wrong bytecode"); ++ __ bind(L_okay); ++#endif ++ ++ // patch bytecode ++ __ sb(bc_reg, at_bcp(0)); ++ __ bind(L_patch_done); ++} ++ ++ ++// Individual instructions ++ ++void TemplateTable::nop() { ++ transition(vtos, vtos); ++ // nothing to do ++} ++ ++void TemplateTable::shouldnotreachhere() { ++ transition(vtos, vtos); ++ __ stop("shouldnotreachhere bytecode"); ++} ++ ++void TemplateTable::aconst_null() { ++ transition(vtos, atos); ++ __ move(FSR, R0); ++} ++ ++void TemplateTable::iconst(int value) { ++ transition(vtos, itos); ++ if (value == 0) { ++ __ move(FSR, R0); ++ } else { ++ __ move(FSR, value); ++ } ++} ++ ++void TemplateTable::lconst(int value) { ++ transition(vtos, ltos); ++ if (value == 0) { ++ __ move(FSR, R0); ++ } else { ++ __ move(FSR, value); ++ } ++} ++ ++void TemplateTable::fconst(int value) { ++ transition(vtos, ftos); ++ switch( value ) { ++ case 0: __ mtc1(R0, FSF); return; ++ case 1: __ addiu(AT, R0, 1); break; ++ case 2: __ addiu(AT, R0, 2); break; ++ default: ShouldNotReachHere(); ++ } ++ __ mtc1(AT, FSF); ++ __ cvt_s_w(FSF, FSF); ++} ++ ++void TemplateTable::dconst(int value) { ++ transition(vtos, dtos); ++ switch( value ) { ++ case 0: __ dmtc1(R0, FSF); ++ return; ++ case 1: __ daddiu(AT, R0, 1); ++ __ dmtc1(AT, FSF); ++ __ cvt_d_w(FSF, FSF); ++ break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::bipush() { ++ transition(vtos, itos); ++ __ lb(FSR, at_bcp(1)); ++} ++ ++void TemplateTable::sipush() { ++ transition(vtos, itos); ++ __ lb(FSR, BCP, 1); ++ __ lbu(AT, BCP, 2); ++ __ dsll(FSR, FSR, 8); ++ __ orr(FSR, FSR, AT); ++} ++ ++// T1 : tags ++// T2 : index ++// T3 : cpool ++// T8 : tag ++void TemplateTable::ldc(bool wide) { ++ transition(vtos, vtos); ++ Label call_ldc, notFloat, notClass, notInt, Done; ++ // get index in cpool ++ if (wide) { ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); ++ } else { ++ __ lbu(T2, at_bcp(1)); ++ } ++ ++ __ get_cpool_and_tags(T3, T1); ++ ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type ++ if (UseLEXT1 && Assembler::is_simm(sizeof(tags_offset), 8)) { ++ __ gslbx(T1, T1, T2, tags_offset); ++ } else { ++ __ daddu(AT, T1, T2); ++ __ lb(T1, AT, tags_offset); ++ } ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ //now T1 is the tag ++ ++ // unresolved class - get the resolved class ++ __ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedClass); ++ __ beq(AT, R0, call_ldc); ++ __ delayed()->nop(); ++ ++ // unresolved class in error (resolution failed) - call into runtime ++ // so that the same error from first resolution attempt is thrown. ++ __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError); ++ __ beq(AT, R0, call_ldc); ++ __ delayed()->nop(); ++ ++ // resolved class - need to call vm to get java mirror of the class ++ __ daddiu(AT, T1, - JVM_CONSTANT_Class); ++ __ bne(AT, R0, notClass); ++ __ delayed()->dsll(T2, T2, Address::times_8); ++ ++ __ bind(call_ldc); ++ __ move(A1, wide); ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1); ++ //__ push(atos); ++ __ daddiu(SP, SP, - Interpreter::stackElementSize); ++ __ b(Done); ++ __ delayed()->sd(FSR, SP, 0); // added for performance issue ++ ++ __ bind(notClass); ++ __ daddiu(AT, T1, -JVM_CONSTANT_Float); ++ __ bne(AT, R0, notFloat); ++ __ delayed()->nop(); ++ // ftos ++ if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) { ++ __ gslwxc1(FSF, T3, T2, base_offset); ++ } else { ++ __ daddu(AT, T3, T2); ++ __ lwc1(FSF, AT, base_offset); ++ } ++ //__ push_f(); ++ __ daddiu(SP, SP, - Interpreter::stackElementSize); ++ __ b(Done); ++ __ delayed()->swc1(FSF, SP, 0); ++ ++ __ bind(notFloat); ++ __ daddiu(AT, T1, -JVM_CONSTANT_Integer); ++ __ bne(AT, R0, notInt); ++ __ delayed()->nop(); ++ // itos ++ if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) { ++ __ gslwx(FSR, T3, T2, base_offset); ++ } else { ++ __ daddu(T0, T3, T2); ++ __ lw(FSR, T0, base_offset); ++ } ++ __ push(itos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // assume the tag is for condy; if not, the VM runtime will tell us ++ __ bind(notInt); ++ condy_helper(Done); ++ ++ __ bind(Done); ++} ++ ++void TemplateTable::condy_helper(Label& Done) { ++ const Register obj = FSR; ++ const Register off = SSR; ++ const Register flags = T3; ++ const Register rarg = A1; ++ __ move(rarg, (int)bytecode()); ++ __ call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg); ++ __ get_vm_result_2(flags, TREG); ++ // VMr = obj = base address to find primitive value to push ++ // VMr2 = flags = (tos, off) using format of CPCE::_flags ++ __ andi(off, flags, ConstantPoolCacheEntry::field_index_mask); ++ __ daddu(obj, off, obj); ++ const Address field(obj, 0 * wordSize); ++ ++ // What sort of thing are we loading? ++ __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ ++ switch (bytecode()) { ++ case Bytecodes::_ldc: ++ case Bytecodes::_ldc_w: ++ { ++ // tos in (itos, ftos, stos, btos, ctos, ztos) ++ Label notInt, notFloat, notShort, notByte, notChar, notBool; ++ __ daddiu(AT, flags, -itos); ++ __ bne(AT, R0, notInt); ++ __ delayed()->nop(); ++ // itos ++ __ ld(obj, field); ++ __ push(itos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notInt); ++ __ daddiu(AT, flags, -ftos); ++ __ bne(AT, R0, notFloat); ++ __ delayed()->nop(); ++ // ftos ++ __ lwc1(FSF, field); ++ __ push(ftos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notFloat); ++ __ daddiu(AT, flags, -stos); ++ __ bne(AT, R0, notShort); ++ __ delayed()->nop(); ++ // stos ++ __ lh(obj, field); ++ __ push(stos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notShort); ++ __ daddiu(AT, flags, -btos); ++ __ bne(AT, R0, notByte); ++ __ delayed()->nop(); ++ // btos ++ __ lb(obj, field); ++ __ push(btos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notByte); ++ __ daddiu(AT, flags, -ctos); ++ __ bne(AT, R0, notChar); ++ __ delayed()->nop(); ++ // ctos ++ __ lhu(obj, field); ++ __ push(ctos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notChar); ++ __ daddiu(AT, flags, -ztos); ++ __ bne(AT, R0, notBool); ++ __ delayed()->nop(); ++ // ztos ++ __ lbu(obj, field); ++ __ push(ztos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notBool); ++ break; ++ } ++ ++ case Bytecodes::_ldc2_w: ++ { ++ Label notLong, notDouble; ++ __ daddiu(AT, flags, -ltos); ++ __ bne(AT, R0, notLong); ++ __ delayed()->nop(); ++ // ltos ++ __ ld(obj, field); ++ __ push(ltos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notLong); ++ __ daddiu(AT, flags, -dtos); ++ __ bne(AT, R0, notDouble); ++ __ delayed()->nop(); ++ // dtos ++ __ ldc1(FSF, field); ++ __ push(dtos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notDouble); ++ break; ++ } ++ ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ __ stop("bad ldc/condy"); ++} ++ ++// Fast path for caching oop constants. ++void TemplateTable::fast_aldc(bool wide) { ++ transition(vtos, atos); ++ ++ Register result = FSR; ++ Register tmp = SSR; ++ Register rarg = A1; ++ int index_size = wide ? sizeof(u2) : sizeof(u1); ++ ++ Label resolved; ++ ++ // We are resolved if the resolved reference cache entry contains a ++ // non-null object (String, MethodType, etc.) ++ assert_different_registers(result, tmp); ++ __ get_cache_index_at_bcp(tmp, 1, index_size); ++ __ load_resolved_reference_at_index(result, tmp, T9); ++ __ bne(result, R0, resolved); ++ __ delayed()->nop(); ++ ++ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); ++ // first time invocation - must resolve first ++ int i = (int)bytecode(); ++ __ move(rarg, i); ++ __ call_VM(result, entry, rarg); ++ ++ __ bind(resolved); ++ ++ { // Check for the null sentinel. ++ // If we just called the VM, it already did the mapping for us, ++ // but it's harmless to retry. ++ Label notNull; ++ __ set64(rarg, (long)Universe::the_null_sentinel_addr()); ++ __ ld_ptr(tmp, Address(rarg)); ++ __ resolve_oop_handle(tmp, T9); ++ __ bne(tmp, result, notNull); ++ __ delayed()->nop(); ++ __ xorr(result, result, result); // NULL object reference ++ __ bind(notNull); ++ } ++ ++ if (VerifyOops) { ++ __ verify_oop(result); ++ } ++} ++ ++ ++// used register: T2, T3, T1 ++// T2 : index ++// T3 : cpool ++// T1 : tag ++void TemplateTable::ldc2_w() { ++ transition(vtos, vtos); ++ Label notDouble, notLong, Done; ++ ++ // get index in cpool ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); ++ ++ __ get_cpool_and_tags(T3, T1); ++ ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type in T1 ++ if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) { ++ __ gslbx(T1, T1, T2, tags_offset); ++ } else { ++ __ daddu(AT, T1, T2); ++ __ lb(T1, AT, tags_offset); ++ } ++ ++ __ daddiu(AT, T1, -JVM_CONSTANT_Double); ++ __ bne(AT, R0, notDouble); ++ __ delayed()->nop(); ++ ++ // dtos ++ __ dsll(T2, T2, Address::times_8); ++ if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) { ++ __ gsldxc1(FSF, T3, T2, base_offset); ++ } else { ++ __ daddu(AT, T3, T2); ++ __ ldc1(FSF, AT, base_offset); ++ } ++ __ push(dtos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notDouble); ++ __ daddiu(AT, T1, -JVM_CONSTANT_Long); ++ __ bne(AT, R0, notLong); ++ __ delayed()->nop(); ++ ++ // ltos ++ __ dsll(T2, T2, Address::times_8); ++ if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) { ++ __ gsldx(FSR, T3, T2, base_offset); ++ } else { ++ __ daddu(AT, T3, T2); ++ __ ld(FSR, AT, base_offset); ++ } ++ __ push(ltos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notLong); ++ condy_helper(Done); ++ ++ __ bind(Done); ++} ++ ++// we compute the actual local variable address here ++// the x86 dont do so for it has scaled index memory access model, we dont have, so do here ++void TemplateTable::locals_index(Register reg, int offset) { ++ __ lbu(reg, at_bcp(offset)); ++ __ dsll(reg, reg, Address::times_8); ++ __ dsubu(reg, LVP, reg); ++} ++ ++void TemplateTable::iload() { ++ iload_internal(); ++} ++ ++void TemplateTable::nofast_iload() { ++ iload_internal(may_not_rewrite); ++} ++ ++// this method will do bytecode folding of the two form: ++// iload iload iload caload ++// used register : T2, T3 ++// T2 : bytecode ++// T3 : folded code ++void TemplateTable::iload_internal(RewriteControl rc) { ++ transition(vtos, itos); ++ if (RewriteFrequentPairs && rc == may_rewrite) { ++ Label rewrite, done; ++ // get the next bytecode in T2 ++ __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); ++ // if _iload, wait to rewrite to iload2. We only want to rewrite the ++ // last two iloads in a pair. Comparing against fast_iload means that ++ // the next bytecode is neither an iload or a caload, and therefore ++ // an iload pair. ++ __ move(AT, Bytecodes::_iload); ++ __ beq(AT, T2, done); ++ __ delayed()->nop(); ++ ++ __ move(T3, Bytecodes::_fast_iload2); ++ __ move(AT, Bytecodes::_fast_iload); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // if _caload, rewrite to fast_icaload ++ __ move(T3, Bytecodes::_fast_icaload); ++ __ move(AT, Bytecodes::_caload); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // rewrite so iload doesn't check again. ++ __ move(T3, Bytecodes::_fast_iload); ++ ++ // rewrite ++ // T3 : fast bytecode ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_iload, T3, T2, false); ++ __ bind(done); ++ } ++ ++ // Get the local value into tos ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fast_iload2() { ++ transition(vtos, itos); ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++ __ push(itos); ++ locals_index(T2, 3); ++ __ lw(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fast_iload() { ++ transition(vtos, itos); ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::lload() { ++ transition(vtos, ltos); ++ locals_index(T2); ++ __ ld(FSR, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fload() { ++ transition(vtos, ftos); ++ locals_index(T2); ++ __ lwc1(FSF, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::dload() { ++ transition(vtos, dtos); ++ locals_index(T2); ++ __ ldc1(FSF, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::aload() { ++ transition(vtos, atos); ++ locals_index(T2); ++ __ ld(FSR, T2, 0); ++} ++ ++void TemplateTable::locals_index_wide(Register reg) { ++ __ get_unsigned_2_byte_index_at_bcp(reg, 2); ++ __ dsll(reg, reg, Address::times_8); ++ __ dsubu(reg, LVP, reg); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_iload() { ++ transition(vtos, itos); ++ locals_index_wide(T2); ++ __ ld(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_lload() { ++ transition(vtos, ltos); ++ locals_index_wide(T2); ++ __ ld(FSR, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_fload() { ++ transition(vtos, ftos); ++ locals_index_wide(T2); ++ __ lwc1(FSF, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_dload() { ++ transition(vtos, dtos); ++ locals_index_wide(T2); ++ __ ldc1(FSF, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_aload() { ++ transition(vtos, atos); ++ locals_index_wide(T2); ++ __ ld(FSR, T2, 0); ++} ++ ++// we use A2 as the regiser for index, BE CAREFUL! ++// we dont use our tge 29 now, for later optimization ++void TemplateTable::index_check(Register array, Register index) { ++ // Pop ptr into array ++ __ pop_ptr(array); ++ index_check_without_pop(array, index); ++} ++ ++void TemplateTable::index_check_without_pop(Register array, Register index) { ++ // destroys A2 ++ // check array ++ __ null_check(array, arrayOopDesc::length_offset_in_bytes()); ++ ++ // sign extend since tos (index) might contain garbage in upper bits ++ __ sll(index, index, 0); ++ ++ // check index ++ Label ok; ++ __ lw(AT, array, arrayOopDesc::length_offset_in_bytes()); ++#ifndef OPT_RANGECHECK ++ __ sltu(AT, index, AT); ++ __ bne(AT, R0, ok); ++ __ delayed()->nop(); ++ ++ //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2 ++ if (A1 != array) __ move(A1, array); ++ if (A2 != index) __ move(A2, index); ++ __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); ++ __ delayed()->nop(); ++ __ bind(ok); ++#else ++ __ lw(AT, array, arrayOopDesc::length_offset_in_bytes()); ++ __ move(A2, index); ++ __ tgeu(A2, AT, 29); ++#endif ++} ++ ++void TemplateTable::iaload() { ++ transition(itos, itos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, 2); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT)); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, 2); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT)); ++ ++ __ warn("iaload Unimplemented yet"); ++ __ gslwle(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, 2); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::laload() { ++ transition(itos, ltos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, Address::times_8); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); ++ ++ __ warn("laload Unimplemented yet"); ++ __ gsldle(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(AT, FSR, Address::times_8); ++ __ daddu(T9, SSR, AT); ++ __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T9, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::faload() { ++ transition(itos, ftos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ shl(FSR, 2); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ shl(AT, 2); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); ++ ++ __ warn("faload Unimplemented yet"); ++ __ gslwlec1(FSF, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ shl(FSR, 2); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::daload() { ++ transition(itos, dtos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, 3); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, 3); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); ++ ++ __ warn("daload Unimplemented yet"); ++ __ gsldlec1(FSF, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(AT, FSR, 3); ++ __ daddu(T9, SSR, AT); ++ __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T9, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::aaload() { ++ transition(itos, atos); ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, UseCompressedOops ? Address::times_4 : Address::times_8); ++ __ daddu(FSR, SSR, FSR); ++ //add for compressedoops ++ do_oop_load(_masm, ++ Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), ++ FSR, ++ IS_ARRAY); ++} ++ ++void TemplateTable::baload() { ++ transition(itos, itos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR:index ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound ++ ++ __ warn("baload Unimplemented yet"); ++ __ gslble(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::caload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, Address::times_2); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); ++} ++ ++// iload followed by caload frequent pair ++// used register : T2 ++// T2 : index ++void TemplateTable::fast_icaload() { ++ transition(vtos, itos); ++ // load index out of locals ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, 1); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); ++} ++ ++void TemplateTable::saload() { ++ transition(itos, itos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, Address::times_2); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, Address::times_2); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT)); ++ ++ __ warn("saload Unimplemented yet"); ++ __ gslhle(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, Address::times_2); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::iload(int n) { ++ transition(vtos, itos); ++ __ lw(FSR, iaddress(n)); ++} ++ ++void TemplateTable::lload(int n) { ++ transition(vtos, ltos); ++ __ ld(FSR, laddress(n)); ++} ++ ++void TemplateTable::fload(int n) { ++ transition(vtos, ftos); ++ __ lwc1(FSF, faddress(n)); ++} ++ ++void TemplateTable::dload(int n) { ++ transition(vtos, dtos); ++ __ ldc1(FSF, laddress(n)); ++} ++ ++void TemplateTable::aload(int n) { ++ transition(vtos, atos); ++ __ ld(FSR, aaddress(n)); ++} ++ ++void TemplateTable::aload_0() { ++ aload_0_internal(); ++} ++ ++void TemplateTable::nofast_aload_0() { ++ aload_0_internal(may_not_rewrite); ++} ++ ++// used register : T2, T3 ++// T2 : bytecode ++// T3 : folded code ++void TemplateTable::aload_0_internal(RewriteControl rc) { ++ transition(vtos, atos); ++ // According to bytecode histograms, the pairs: ++ // ++ // _aload_0, _fast_igetfield ++ // _aload_0, _fast_agetfield ++ // _aload_0, _fast_fgetfield ++ // ++ // occur frequently. If RewriteFrequentPairs is set, the (slow) ++ // _aload_0 bytecode checks if the next bytecode is either ++ // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then ++ // rewrites the current bytecode into a pair bytecode; otherwise it ++ // rewrites the current bytecode into _fast_aload_0 that doesn't do ++ // the pair check anymore. ++ // ++ // Note: If the next bytecode is _getfield, the rewrite must be ++ // delayed, otherwise we may miss an opportunity for a pair. ++ // ++ // Also rewrite frequent pairs ++ // aload_0, aload_1 ++ // aload_0, iload_1 ++ // These bytecodes with a small amount of code are most profitable ++ // to rewrite ++ if (RewriteFrequentPairs && rc == may_rewrite) { ++ Label rewrite, done; ++ // get the next bytecode in T2 ++ __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); ++ ++ // do actual aload_0 ++ aload(0); ++ ++ // if _getfield then wait with rewrite ++ __ move(AT, Bytecodes::_getfield); ++ __ beq(AT, T2, done); ++ __ delayed()->nop(); ++ ++ // if _igetfield then reqrite to _fast_iaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_iaccess_0); ++ __ move(AT, Bytecodes::_fast_igetfield); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // if _agetfield then reqrite to _fast_aaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_aaccess_0); ++ __ move(AT, Bytecodes::_fast_agetfield); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // if _fgetfield then reqrite to _fast_faccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_faccess_0); ++ __ move(AT, Bytecodes::_fast_fgetfield); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // else rewrite to _fast_aload0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_aload_0); ++ ++ // rewrite ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_aload_0, T3, T2, false); ++ ++ __ bind(done); ++ } else { ++ aload(0); ++ } ++} ++ ++void TemplateTable::istore() { ++ transition(itos, vtos); ++ locals_index(T2); ++ __ sw(FSR, T2, 0); ++} ++ ++void TemplateTable::lstore() { ++ transition(ltos, vtos); ++ locals_index(T2); ++ __ sd(FSR, T2, -wordSize); ++} ++ ++void TemplateTable::fstore() { ++ transition(ftos, vtos); ++ locals_index(T2); ++ __ swc1(FSF, T2, 0); ++} ++ ++void TemplateTable::dstore() { ++ transition(dtos, vtos); ++ locals_index(T2); ++ __ sdc1(FSF, T2, -wordSize); ++} ++ ++void TemplateTable::astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ locals_index(T2); ++ __ sd(FSR, T2, 0); ++} ++ ++void TemplateTable::wide_istore() { ++ transition(vtos, vtos); ++ __ pop_i(FSR); ++ locals_index_wide(T2); ++ __ sd(FSR, T2, 0); ++} ++ ++void TemplateTable::wide_lstore() { ++ transition(vtos, vtos); ++ __ pop_l(FSR); ++ locals_index_wide(T2); ++ __ sd(FSR, T2, -wordSize); ++} ++ ++void TemplateTable::wide_fstore() { ++ wide_istore(); ++} ++ ++void TemplateTable::wide_dstore() { ++ wide_lstore(); ++} ++ ++void TemplateTable::wide_astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ locals_index_wide(T2); ++ __ sd(FSR, T2, 0); ++} ++ ++// used register : T2 ++void TemplateTable::iastore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); // T2: array SSR: index ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T2); ++ __ dsll(SSR, SSR, Address::times_4); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_4); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT)); //bound ++ ++ __ warn("iastore Unimplemented yet"); ++ __ gsswle(FSR, SSR, AT); ++ } else { ++ index_check(T2, SSR); // prefer index in SSR ++ __ dsll(SSR, SSR, Address::times_4); ++ __ daddu(T2, T2, SSR); ++ __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg); ++ } ++} ++ ++ ++ ++// used register T2, T3 ++void TemplateTable::lastore() { ++ transition(ltos, vtos); ++ __ pop_i (T2); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T3); ++ __ dsll(T2, T2, Address::times_8); ++ __ daddu(T2, T3, T2); ++ __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); // base ++ ++ __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); //bound ++ ++ __ warn("lastore Unimplemented yet"); ++ __ gssdle(FSR, T2, AT); ++ } else { ++ index_check(T3, T2); ++ __ dsll(T2, T2, Address::times_8); ++ __ daddu(T3, T3, T2); ++ __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg); ++ } ++} ++ ++// used register T2 ++void TemplateTable::fastore() { ++ transition(ftos, vtos); ++ __ pop_i(SSR); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T2); ++ __ dsll(SSR, SSR, Address::times_4); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_4); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); //bound ++ ++ __ warn("fastore Unimplemented yet"); ++ __ gsswlec1(FSF, SSR, AT); ++ } else { ++ index_check(T2, SSR); ++ __ dsll(SSR, SSR, Address::times_4); ++ __ daddu(T2, T2, SSR); ++ __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg); ++ } ++} ++ ++// used register T2, T3 ++void TemplateTable::dastore() { ++ transition(dtos, vtos); ++ __ pop_i (T2); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T3); ++ __ dsll(T2, T2, Address::times_8); ++ __ daddu(T2, T3, T2); ++ __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); // base ++ ++ __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); //bound ++ ++ __ warn("dastore Unimplemented yet"); ++ __ gssdlec1(FSF, T2, AT); ++ } else { ++ index_check(T3, T2); ++ __ dsll(T2, T2, Address::times_8); ++ __ daddu(T3, T3, T2); ++ __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg); ++ } ++} ++ ++// used register : T2, T3, T8 ++// T2 : array ++// T3 : subklass ++// T8 : supklass ++void TemplateTable::aastore() { ++ Label is_null, ok_is_subtype, done; ++ transition(vtos, vtos); ++ // stack: ..., array, index, value ++ __ ld(FSR, at_tos()); // Value ++ __ lw(SSR, at_tos_p1()); // Index ++ __ ld(T2, at_tos_p2()); // Array ++ ++ // index_check(T2, SSR); ++ index_check_without_pop(T2, SSR); ++ // do array store check - check for NULL value first ++ __ beq(FSR, R0, is_null); ++ __ delayed()->nop(); ++ ++ // Move subklass into T3 ++ //add for compressedoops ++ __ load_klass(T3, FSR); ++ // Move superklass into T8 ++ //add for compressedoops ++ __ load_klass(T8, T2); ++ __ ld(T8, Address(T8, ObjArrayKlass::element_klass_offset())); ++ // Compress array+index*4+12 into a single register. T2 ++ __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8); ++ __ daddu(T2, T2, AT); ++ __ daddiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ ++ // Generate subtype check. ++ // Superklass in T8. Subklass in T3. ++ __ gen_subtype_check(T8, T3, ok_is_subtype); ++ // Come here on failure ++ // object is at FSR ++ __ jmp(Interpreter::_throw_ArrayStoreException_entry); ++ __ delayed()->nop(); ++ // Come here on success ++ __ bind(ok_is_subtype); ++ do_oop_store(_masm, Address(T2, 0), FSR, IS_ARRAY); ++ __ b(done); ++ __ delayed()->nop(); ++ ++ // Have a NULL in FSR, T2=array, SSR=index. Store NULL at ary[idx] ++ __ bind(is_null); ++ __ profile_null_seen(T9); ++ __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8); ++ __ daddu(T2, T2, AT); ++ do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, IS_ARRAY); ++ ++ __ bind(done); ++ __ daddiu(SP, SP, 3 * Interpreter::stackElementSize); ++} ++ ++void TemplateTable::bastore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); ++ if(UseBoundCheckInstruction) { ++ guarantee(false, "unimplemented yet!"); ++ __ pop_ptr(T2); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound ++ ++ __ warn("bastore Unimplemented yet"); ++ __ gssble(FSR, SSR, AT); ++ } else { ++ index_check(T2, SSR); ++ ++ // Need to check whether array is boolean or byte ++ // since both types share the bastore bytecode. ++ __ load_klass(T9, T2); ++ __ lw(T9, T9, in_bytes(Klass::layout_helper_offset())); ++ ++ int diffbit = Klass::layout_helper_boolean_diffbit(); ++ __ move(AT, diffbit); ++ ++ Label L_skip; ++ __ andr(AT, T9, AT); ++ __ beq(AT, R0, L_skip); ++ __ delayed()->nop(); ++ __ andi(FSR, FSR, 0x1); ++ __ bind(L_skip); ++ ++ __ daddu(SSR, T2, SSR); ++ __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg); ++ } ++} ++ ++void TemplateTable::castore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T2); ++ __ dsll(SSR, SSR, Address::times_2); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_2); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR)); //bound ++ ++ __ warn("castore Unimplemented yet"); ++ __ gsshle(FSR, SSR, AT); ++ } else { ++ index_check(T2, SSR); ++ __ dsll(SSR, SSR, Address::times_2); ++ __ daddu(SSR, T2, SSR); ++ __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg); ++ } ++} ++ ++void TemplateTable::sastore() { ++ castore(); ++} ++ ++void TemplateTable::istore(int n) { ++ transition(itos, vtos); ++ __ sw(FSR, iaddress(n)); ++} ++ ++void TemplateTable::lstore(int n) { ++ transition(ltos, vtos); ++ __ sd(FSR, laddress(n)); ++} ++ ++void TemplateTable::fstore(int n) { ++ transition(ftos, vtos); ++ __ swc1(FSF, faddress(n)); ++} ++ ++void TemplateTable::dstore(int n) { ++ transition(dtos, vtos); ++ __ sdc1(FSF, laddress(n)); ++} ++ ++void TemplateTable::astore(int n) { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ __ sd(FSR, aaddress(n)); ++} ++ ++void TemplateTable::pop() { ++ transition(vtos, vtos); ++ __ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void TemplateTable::pop2() { ++ transition(vtos, vtos); ++ __ daddiu(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void TemplateTable::dup() { ++ transition(vtos, vtos); ++ // stack: ..., a ++ __ load_ptr(0, FSR); ++ __ push_ptr(FSR); ++ // stack: ..., a, a ++} ++ ++// blows FSR ++void TemplateTable::dup_x1() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ load_ptr(0, FSR); // load b ++ __ load_ptr(1, A5); // load a ++ __ store_ptr(1, FSR); // store b ++ __ store_ptr(0, A5); // store a ++ __ push_ptr(FSR); // push b ++ // stack: ..., b, a, b ++} ++ ++// blows FSR ++void TemplateTable::dup_x2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ load_ptr(0, FSR); // load c ++ __ load_ptr(2, A5); // load a ++ __ store_ptr(2, FSR); // store c in a ++ __ push_ptr(FSR); // push c ++ // stack: ..., c, b, c, c ++ __ load_ptr(2, FSR); // load b ++ __ store_ptr(2, A5); // store a in b ++ // stack: ..., c, a, c, c ++ __ store_ptr(1, FSR); // store b in c ++ // stack: ..., c, a, b, c ++} ++ ++// blows FSR ++void TemplateTable::dup2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ load_ptr(1, FSR); // load a ++ __ push_ptr(FSR); // push a ++ __ load_ptr(1, FSR); // load b ++ __ push_ptr(FSR); // push b ++ // stack: ..., a, b, a, b ++} ++ ++// blows FSR ++void TemplateTable::dup2_x1() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ load_ptr(0, T2); // load c ++ __ load_ptr(1, FSR); // load b ++ __ push_ptr(FSR); // push b ++ __ push_ptr(T2); // push c ++ // stack: ..., a, b, c, b, c ++ __ store_ptr(3, T2); // store c in b ++ // stack: ..., a, c, c, b, c ++ __ load_ptr(4, T2); // load a ++ __ store_ptr(2, T2); // store a in 2nd c ++ // stack: ..., a, c, a, b, c ++ __ store_ptr(4, FSR); // store b in a ++ // stack: ..., b, c, a, b, c ++ ++ // stack: ..., b, c, a, b, c ++} ++ ++// blows FSR, SSR ++void TemplateTable::dup2_x2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c, d ++ // stack: ..., a, b, c, d ++ __ load_ptr(0, T2); // load d ++ __ load_ptr(1, FSR); // load c ++ __ push_ptr(FSR); // push c ++ __ push_ptr(T2); // push d ++ // stack: ..., a, b, c, d, c, d ++ __ load_ptr(4, FSR); // load b ++ __ store_ptr(2, FSR); // store b in d ++ __ store_ptr(4, T2); // store d in b ++ // stack: ..., a, d, c, b, c, d ++ __ load_ptr(5, T2); // load a ++ __ load_ptr(3, FSR); // load c ++ __ store_ptr(3, T2); // store a in c ++ __ store_ptr(5, FSR); // store c in a ++ // stack: ..., c, d, a, b, c, d ++ ++ // stack: ..., c, d, a, b, c, d ++} ++ ++// blows FSR ++void TemplateTable::swap() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ ++ __ load_ptr(1, A5); // load a ++ __ load_ptr(0, FSR); // load b ++ __ store_ptr(0, A5); // store a in b ++ __ store_ptr(1, FSR); // store b in a ++ ++ // stack: ..., b, a ++} ++ ++void TemplateTable::iop2(Operation op) { ++ transition(itos, itos); ++ ++ __ pop_i(SSR); ++ switch (op) { ++ case add : __ addu32(FSR, SSR, FSR); break; ++ case sub : __ subu32(FSR, SSR, FSR); break; ++ case mul : __ mul(FSR, SSR, FSR); break; ++ case _and : __ andr(FSR, SSR, FSR); break; ++ case _or : __ orr(FSR, SSR, FSR); break; ++ case _xor : __ xorr(FSR, SSR, FSR); break; ++ case shl : __ sllv(FSR, SSR, FSR); break; ++ case shr : __ srav(FSR, SSR, FSR); break; ++ case ushr : __ srlv(FSR, SSR, FSR); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++// the result stored in FSR, SSR, ++// used registers : T2, T3 ++void TemplateTable::lop2(Operation op) { ++ transition(ltos, ltos); ++ __ pop_l(T2); ++ ++ switch (op) { ++ case add : __ daddu(FSR, T2, FSR); break; ++ case sub : __ dsubu(FSR, T2, FSR); break; ++ case _and: __ andr(FSR, T2, FSR); break; ++ case _or : __ orr(FSR, T2, FSR); break; ++ case _xor: __ xorr(FSR, T2, FSR); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception, ++// the result is 0x80000000 ++// the godson2 cpu do the same, so we need not handle this specially like x86 ++void TemplateTable::idiv() { ++ transition(itos, itos); ++ Label not_zero; ++ ++ __ bne(FSR, R0, not_zero); ++ __ delayed()->nop(); ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ __ bind(not_zero); ++ ++ __ pop_i(SSR); ++ if (UseLEXT1) { ++ __ gsdiv(FSR, SSR, FSR); ++ } else { ++ __ div(SSR, FSR); ++ __ mflo(FSR); ++ } ++} ++ ++void TemplateTable::irem() { ++ transition(itos, itos); ++ Label not_zero; ++ __ pop_i(SSR); ++ __ div(SSR, FSR); ++ ++ __ bne(FSR, R0, not_zero); ++ __ delayed()->nop(); ++ //__ brk(7); ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ ++ __ bind(not_zero); ++ __ mfhi(FSR); ++} ++ ++void TemplateTable::lmul() { ++ transition(ltos, ltos); ++ __ pop_l(T2); ++ if (UseLEXT1) { ++ __ gsdmult(FSR, T2, FSR); ++ } else { ++ __ dmult(T2, FSR); ++ __ mflo(FSR); ++ } ++} ++ ++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry ++void TemplateTable::ldiv() { ++ transition(ltos, ltos); ++ Label normal; ++ ++ __ bne(FSR, R0, normal); ++ __ delayed()->nop(); ++ ++ //__ brk(7); //generate FPE ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ ++ __ bind(normal); ++ __ pop_l(A2); ++ if (UseLEXT1) { ++ __ gsddiv(FSR, A2, FSR); ++ } else { ++ __ ddiv(A2, FSR); ++ __ mflo(FSR); ++ } ++} ++ ++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry ++void TemplateTable::lrem() { ++ transition(ltos, ltos); ++ Label normal; ++ ++ __ bne(FSR, R0, normal); ++ __ delayed()->nop(); ++ ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ ++ __ bind(normal); ++ __ pop_l (A2); ++ ++ if (UseLEXT1) { ++ __ gsdmod(FSR, A2, FSR); ++ } else { ++ __ ddiv(A2, FSR); ++ __ mfhi(FSR); ++ } ++} ++ ++// result in FSR ++// used registers : T0 ++void TemplateTable::lshl() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ dsllv(FSR, T0, FSR); ++} ++ ++// used registers : T0 ++void TemplateTable::lshr() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ dsrav(FSR, T0, FSR); ++} ++ ++// used registers : T0 ++void TemplateTable::lushr() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ dsrlv(FSR, T0, FSR); ++} ++ ++// result in FSF ++void TemplateTable::fop2(Operation op) { ++ transition(ftos, ftos); ++ switch (op) { ++ case add: ++ __ lwc1(FTF, at_sp()); ++ __ add_s(FSF, FTF, FSF); ++ break; ++ case sub: ++ __ lwc1(FTF, at_sp()); ++ __ sub_s(FSF, FTF, FSF); ++ break; ++ case mul: ++ __ lwc1(FTF, at_sp()); ++ __ mul_s(FSF, FTF, FSF); ++ break; ++ case div: ++ __ lwc1(FTF, at_sp()); ++ __ div_s(FSF, FTF, FSF); ++ break; ++ case rem: ++ __ mov_s(F13, FSF); ++ __ lwc1(F12, at_sp()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); ++ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ __ daddiu(SP, SP, 1 * wordSize); ++} ++ ++// result in SSF||FSF ++// i dont handle the strict flags ++void TemplateTable::dop2(Operation op) { ++ transition(dtos, dtos); ++ switch (op) { ++ case add: ++ __ ldc1(FTF, at_sp()); ++ __ add_d(FSF, FTF, FSF); ++ break; ++ case sub: ++ __ ldc1(FTF, at_sp()); ++ __ sub_d(FSF, FTF, FSF); ++ break; ++ case mul: ++ __ ldc1(FTF, at_sp()); ++ __ mul_d(FSF, FTF, FSF); ++ break; ++ case div: ++ __ ldc1(FTF, at_sp()); ++ __ div_d(FSF, FTF, FSF); ++ break; ++ case rem: ++ __ mov_d(F13, FSF); ++ __ ldc1(F12, at_sp()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); ++ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ __ daddiu(SP, SP, 2 * wordSize); ++} ++ ++void TemplateTable::ineg() { ++ transition(itos, itos); ++ __ subu32(FSR, R0, FSR); ++} ++ ++void TemplateTable::lneg() { ++ transition(ltos, ltos); ++ __ dsubu(FSR, R0, FSR); ++} ++ ++void TemplateTable::fneg() { ++ transition(ftos, ftos); ++ __ neg_s(FSF, FSF); ++} ++ ++void TemplateTable::dneg() { ++ transition(dtos, dtos); ++ __ neg_d(FSF, FSF); ++} ++ ++// used registers : T2 ++void TemplateTable::iinc() { ++ transition(vtos, vtos); ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++ __ lb(AT, at_bcp(2)); // get constant ++ __ daddu(FSR, FSR, AT); ++ __ sw(FSR, T2, 0); ++} ++ ++// used register : T2 ++void TemplateTable::wide_iinc() { ++ transition(vtos, vtos); ++ locals_index_wide(T2); ++ __ get_2_byte_integer_at_bcp(FSR, AT, 4); ++ __ hswap(FSR); ++ __ lw(AT, T2, 0); ++ __ daddu(FSR, AT, FSR); ++ __ sw(FSR, T2, 0); ++} ++ ++void TemplateTable::convert() { ++ // Checking ++#ifdef ASSERT ++ { ++ TosState tos_in = ilgl; ++ TosState tos_out = ilgl; ++ switch (bytecode()) { ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_in = itos; break; ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_l2d: tos_in = ltos; break; ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_f2d: tos_in = ftos; break; ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_d2l: // fall through ++ case Bytecodes::_d2f: tos_in = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ switch (bytecode()) { ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_out = itos; break; ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_d2l: tos_out = ltos; break; ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_d2f: tos_out = ftos; break; ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_l2d: // fall through ++ case Bytecodes::_f2d: tos_out = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ transition(tos_in, tos_out); ++ } ++#endif // ASSERT ++ ++ // Conversion ++ switch (bytecode()) { ++ case Bytecodes::_i2l: ++ __ sll(FSR, FSR, 0); ++ break; ++ case Bytecodes::_i2f: ++ __ mtc1(FSR, FSF); ++ __ cvt_s_w(FSF, FSF); ++ break; ++ case Bytecodes::_i2d: ++ __ mtc1(FSR, FSF); ++ __ cvt_d_w(FSF, FSF); ++ break; ++ case Bytecodes::_i2b: ++ __ seb(FSR, FSR); ++ break; ++ case Bytecodes::_i2c: ++ __ andi(FSR, FSR, 0xFFFF); // truncate upper 56 bits ++ break; ++ case Bytecodes::_i2s: ++ __ seh(FSR, FSR); ++ break; ++ case Bytecodes::_l2i: ++ __ sll(FSR, FSR, 0); ++ break; ++ case Bytecodes::_l2f: ++ __ dmtc1(FSR, FSF); ++ __ cvt_s_l(FSF, FSF); ++ break; ++ case Bytecodes::_l2d: ++ __ dmtc1(FSR, FSF); ++ __ cvt_d_l(FSF, FSF); ++ break; ++ case Bytecodes::_f2i: ++ { ++ Label L; ++ ++ __ trunc_w_s(F12, FSF); ++ __ move(AT, 0x7fffffff); ++ __ mfc1(FSR, F12); ++ __ c_un_s(FSF, FSF); //NaN? ++ __ movt(FSR, R0); ++ ++ __ bne(AT, FSR, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, FSF); ++ __ andr(AT, AT, T9); ++ ++ __ movn(FSR, T9, AT); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_f2l: ++ { ++ Label L; ++ ++ __ trunc_l_s(F12, FSF); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(FSR, F12); ++ __ c_un_s(FSF, FSF); //NaN? ++ __ movt(FSR, R0); ++ ++ __ bne(AT, FSR, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, FSF); ++ __ andr(AT, AT, T9); ++ ++ __ dsll32(T9, T9, 0); ++ __ movn(FSR, T9, AT); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_f2d: ++ __ cvt_d_s(FSF, FSF); ++ break; ++ case Bytecodes::_d2i: ++ { ++ Label L; ++ ++ __ trunc_w_d(F12, FSF); ++ __ move(AT, 0x7fffffff); ++ __ mfc1(FSR, F12); ++ ++ __ bne(FSR, AT, L); ++ __ delayed()->mtc1(R0, F12); ++ ++ __ cvt_d_w(F12, F12); ++ __ c_ult_d(FSF, F12); ++ __ bc1f(L); ++ __ delayed()->addiu(T9, R0, -1); ++ ++ __ c_un_d(FSF, FSF); //NaN? ++ __ subu32(FSR, T9, AT); ++ __ movt(FSR, R0); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_d2l: ++ { ++ Label L; ++ ++ __ trunc_l_d(F12, FSF); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(FSR, F12); ++ ++ __ bne(FSR, AT, L); ++ __ delayed()->mtc1(R0, F12); ++ ++ __ cvt_d_w(F12, F12); ++ __ c_ult_d(FSF, F12); ++ __ bc1f(L); ++ __ delayed()->daddiu(T9, R0, -1); ++ ++ __ c_un_d(FSF, FSF); //NaN? ++ __ subu(FSR, T9, AT); ++ __ movt(FSR, R0); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_d2f: ++ __ cvt_s_d(FSF, FSF); ++ break; ++ default : ++ ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::lcmp() { ++ transition(ltos, itos); ++ ++ __ pop(T0); ++ __ pop(R0); ++ ++ __ slt(AT, T0, FSR); ++ __ slt(FSR, FSR, T0); ++ __ subu(FSR, FSR, AT); ++} ++ ++void TemplateTable::float_cmp(bool is_float, int unordered_result) { ++ __ ori(FSR, R0, 1); ++ __ ori(AT, R0, 1); ++ ++ if (is_float) { ++ __ lwc1(FTF, at_sp()); ++ __ daddiu(SP, SP, 1 * wordSize); ++ if (unordered_result < 0) { ++ __ c_olt_s(FSF, FTF); ++ __ movf(FSR, R0); ++ __ c_ult_s(FTF, FSF); ++ } else { ++ __ c_ult_s(FSF, FTF); ++ __ movf(FSR, R0); ++ __ c_olt_s(FTF, FSF); ++ } ++ } else { ++ __ ldc1(FTF, at_sp()); ++ __ daddiu(SP, SP, 2 * wordSize); ++ if (unordered_result < 0) { ++ __ c_olt_d(FSF, FTF); ++ __ movf(FSR, R0); ++ __ c_ult_d(FTF, FSF); ++ } else { ++ __ c_ult_d(FSF, FTF); ++ __ movf(FSR, R0); ++ __ c_olt_d(FTF, FSF); ++ } ++ } ++ ++ __ movf(AT, R0); ++ __ subu(FSR, FSR, AT); ++} ++ ++ ++// used registers : T3, A7, Rnext ++// FSR : return bci, this is defined by the vm specification ++// T2 : MDO taken count ++// T3 : method ++// A7 : offset ++// Rnext : next bytecode, this is required by dispatch_base ++void TemplateTable::branch(bool is_jsr, bool is_wide) { ++ __ get_method(T3); ++ __ profile_taken_branch(A7, T2); // only C2 meaningful ++ ++ const ByteSize be_offset = MethodCounters::backedge_counter_offset() + ++ InvocationCounter::counter_offset(); ++ const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset(); ++ ++ // Load up T4 with the branch displacement ++ if (!is_wide) { ++ __ lb(A7, BCP, 1); ++ __ lbu(AT, BCP, 2); ++ __ dsll(A7, A7, 8); ++ __ orr(A7, A7, AT); ++ } else { ++ __ get_4_byte_integer_at_bcp(A7, AT, 1); ++ __ swap(A7); ++ } ++ ++ // Handle all the JSR stuff here, then exit. ++ // It's much shorter and cleaner than intermingling with the non-JSR ++ // normal-branch stuff occuring below. ++ if (is_jsr) { ++ // Pre-load the next target bytecode into Rnext ++ __ daddu(AT, BCP, A7); ++ __ lbu(Rnext, AT, 0); ++ ++ // compute return address as bci in FSR ++ __ daddiu(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset())); ++ __ ld(AT, T3, in_bytes(Method::const_offset())); ++ __ dsubu(FSR, FSR, AT); ++ // Adjust the bcp in BCP by the displacement in A7 ++ __ daddu(BCP, BCP, A7); ++ // jsr returns atos that is not an oop ++ // Push return address ++ __ push_i(FSR); ++ // jsr returns vtos ++ __ dispatch_only_noverify(vtos); ++ ++ return; ++ } ++ ++ // Normal (non-jsr) branch handling ++ ++ // Adjust the bcp in S0 by the displacement in T4 ++ __ daddu(BCP, BCP, A7); ++ ++ assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters"); ++ Label backedge_counter_overflow; ++ Label profile_method; ++ Label dispatch; ++ if (UseLoopCounter) { ++ // increment backedge counter for backward branches ++ // T3: method ++ // T4: target offset ++ // BCP: target bcp ++ // LVP: locals pointer ++ __ bgtz(A7, dispatch); // check if forward or backward branch ++ __ delayed()->nop(); ++ ++ // check if MethodCounters exists ++ Label has_counters; ++ __ ld(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP ++ __ bne(AT, R0, has_counters); ++ __ delayed()->nop(); ++ __ push(T3); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), ++ T3); ++ __ pop(T3); ++ __ ld(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP ++ __ beq(AT, R0, dispatch); ++ __ delayed()->nop(); ++ __ bind(has_counters); ++ ++ Label no_mdo; ++ int increment = InvocationCounter::count_increment; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld(T0, Address(T3, in_bytes(Method::method_data_offset()))); ++ __ beq(T0, R0, no_mdo); ++ __ delayed()->nop(); ++ // Increment the MDO backedge counter ++ const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ const Address mask(T0, in_bytes(MethodData::backedge_mask_offset())); ++ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, ++ T1, false, Assembler::zero, ++ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); ++ __ beq(R0, R0, dispatch); ++ __ delayed()->nop(); ++ } ++ __ bind(no_mdo); ++ // Increment backedge counter in MethodCounters* ++ __ ld(T0, Address(T3, Method::method_counters_offset())); ++ const Address mask(T0, in_bytes(MethodCounters::backedge_mask_offset())); ++ __ increment_mask_and_jump(Address(T0, be_offset), increment, mask, ++ T1, false, Assembler::zero, ++ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); ++ __ bind(dispatch); ++ } ++ ++ // Pre-load the next target bytecode into Rnext ++ __ lbu(Rnext, BCP, 0); ++ ++ // continue with the bytecode @ target ++ // FSR: return bci for jsr's, unused otherwise ++ // Rnext: target bytecode ++ // BCP: target bcp ++ __ dispatch_only(vtos, true); ++ ++ if (UseLoopCounter && UseOnStackReplacement) { ++ // invocation counter overflow ++ __ bind(backedge_counter_overflow); ++ __ subu(A7, BCP, A7); // branch bcp ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), A7); ++ ++ // V0: osr nmethod (osr ok) or NULL (osr not possible) ++ // V1: osr adapter frame return address ++ // LVP: locals pointer ++ // BCP: bcp ++ __ beq(V0, R0, dispatch); ++ __ delayed()->nop(); ++ // nmethod may have been invalidated (VM may block upon call_VM return) ++ __ lb(T3, V0, nmethod::state_offset()); ++ __ move(AT, nmethod::in_use); ++ __ bne(AT, T3, dispatch); ++ __ delayed()->nop(); ++ ++ // We have the address of an on stack replacement routine in rax. ++ // In preparation of invoking it, first we must migrate the locals ++ // and monitors from off the interpreter frame on the stack. ++ // Ensure to save the osr nmethod over the migration call, ++ // it will be preserved in Rnext. ++ __ move(Rnext, V0); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); ++ ++ // V0 is OSR buffer, move it to expected parameter location ++ // refer to osrBufferPointer in c1_LIRAssembler_mips.cpp ++ __ move(T0, V0); ++ ++ // pop the interpreter frame ++ __ ld(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); ++ //FIXME, shall we keep the return address on the stack? ++ __ leave(); // remove frame anchor ++ __ move(LVP, RA); ++ __ move(SP, A7); ++ ++ assert(StackAlignmentInBytes == 16, "must be"); ++ __ dins(SP, R0, 0, 4); ++ ++ // push the (possibly adjusted) return address ++ //refer to osr_entry in c1_LIRAssembler_mips.cpp ++ __ ld(AT, Rnext, nmethod::osr_entry_point_offset()); ++ __ jr(AT); ++ __ delayed()->nop(); ++ } ++} ++ ++ ++void TemplateTable::if_0cmp(Condition cc) { ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ switch(cc) { ++ case not_equal: ++ __ beq(FSR, R0, not_taken); ++ break; ++ case equal: ++ __ bne(FSR, R0, not_taken); ++ break; ++ case less: ++ __ bgez(FSR, not_taken); ++ break; ++ case less_equal: ++ __ bgtz(FSR, not_taken); ++ break; ++ case greater: ++ __ blez(FSR, not_taken); ++ break; ++ case greater_equal: ++ __ bltz(FSR, not_taken); ++ break; ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++void TemplateTable::if_icmp(Condition cc) { ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ ++ __ pop_i(SSR); ++ switch(cc) { ++ case not_equal: ++ __ beq(SSR, FSR, not_taken); ++ break; ++ case equal: ++ __ bne(SSR, FSR, not_taken); ++ break; ++ case less: ++ __ slt(AT, SSR, FSR); ++ __ beq(AT, R0, not_taken); ++ break; ++ case less_equal: ++ __ slt(AT, FSR, SSR); ++ __ bne(AT, R0, not_taken); ++ break; ++ case greater: ++ __ slt(AT, FSR, SSR); ++ __ beq(AT, R0, not_taken); ++ break; ++ case greater_equal: ++ __ slt(AT, SSR, FSR); ++ __ bne(AT, R0, not_taken); ++ break; ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++void TemplateTable::if_nullcmp(Condition cc) { ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ switch(cc) { ++ case not_equal: ++ __ beq(FSR, R0, not_taken); ++ break; ++ case equal: ++ __ bne(FSR, R0, not_taken); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++ ++void TemplateTable::if_acmp(Condition cc) { ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ // __ lw(SSR, SP, 0); ++ __ pop_ptr(SSR); ++ switch(cc) { ++ case not_equal: ++ __ beq(SSR, FSR, not_taken); ++ break; ++ case equal: ++ __ bne(SSR, FSR, not_taken); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : method ++// T2 : returb bci ++void TemplateTable::ret() { ++ transition(vtos, vtos); ++ ++ locals_index(T2); ++ __ ld(T2, T2, 0); ++ __ profile_ret(T2, T3); ++ ++ __ get_method(T1); ++ __ ld(BCP, T1, in_bytes(Method::const_offset())); ++ __ daddu(BCP, BCP, T2); ++ __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); ++ ++ __ dispatch_next(vtos, 0, true); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : method ++// T2 : returb bci ++void TemplateTable::wide_ret() { ++ transition(vtos, vtos); ++ ++ locals_index_wide(T2); ++ __ ld(T2, T2, 0); // get return bci, compute return bcp ++ __ profile_ret(T2, T3); ++ ++ __ get_method(T1); ++ __ ld(BCP, T1, in_bytes(Method::const_offset())); ++ __ daddu(BCP, BCP, T2); ++ __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); ++ ++ __ dispatch_next(vtos, 0, true); ++} ++ ++// used register T2, T3, A7, Rnext ++// T2 : bytecode pointer ++// T3 : low ++// A7 : high ++// Rnext : dest bytecode, required by dispatch_base ++void TemplateTable::tableswitch() { ++ Label default_case, continue_execution; ++ transition(itos, vtos); ++ ++ // align BCP ++ __ daddiu(T2, BCP, BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(T2, T2, AT); ++ ++ // load lo & hi ++ __ lw(T3, T2, 1 * BytesPerInt); ++ __ swap(T3); ++ __ lw(A7, T2, 2 * BytesPerInt); ++ __ swap(A7); ++ ++ // check against lo & hi ++ __ slt(AT, FSR, T3); ++ __ bne(AT, R0, default_case); ++ __ delayed()->nop(); ++ ++ __ slt(AT, A7, FSR); ++ __ bne(AT, R0, default_case); ++ __ delayed()->nop(); ++ ++ // lookup dispatch offset, in A7 big endian ++ __ dsubu(FSR, FSR, T3); ++ __ dsll(AT, FSR, Address::times_4); ++ __ daddu(AT, T2, AT); ++ __ lw(A7, AT, 3 * BytesPerInt); ++ __ profile_switch_case(FSR, T9, T3); ++ ++ __ bind(continue_execution); ++ __ swap(A7); ++ __ daddu(BCP, BCP, A7); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++ ++ // handle default ++ __ bind(default_case); ++ __ profile_switch_default(FSR); ++ __ lw(A7, T2, 0); ++ __ b(continue_execution); ++ __ delayed()->nop(); ++} ++ ++void TemplateTable::lookupswitch() { ++ transition(itos, itos); ++ __ stop("lookupswitch bytecode should have been rewritten"); ++} ++ ++// used registers : T2, T3, A7, Rnext ++// T2 : bytecode pointer ++// T3 : pair index ++// A7 : offset ++// Rnext : dest bytecode ++// the data after the opcode is the same as lookupswitch ++// see Rewriter::rewrite_method for more information ++void TemplateTable::fast_linearswitch() { ++ transition(itos, vtos); ++ Label loop_entry, loop, found, continue_execution; ++ ++ // swap FSR so we can avoid swapping the table entries ++ __ swap(FSR); ++ ++ // align BCP ++ __ daddiu(T2, BCP, BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(T2, T2, AT); ++ ++ // set counter ++ __ lw(T3, T2, BytesPerInt); ++ __ swap(T3); ++ __ b(loop_entry); ++ __ delayed()->nop(); ++ ++ // table search ++ __ bind(loop); ++ // get the entry value ++ __ dsll(AT, T3, Address::times_8); ++ __ daddu(AT, T2, AT); ++ __ lw(AT, AT, 2 * BytesPerInt); ++ ++ // found? ++ __ beq(FSR, AT, found); ++ __ delayed()->nop(); ++ ++ __ bind(loop_entry); ++ __ bgtz(T3, loop); ++ __ delayed()->daddiu(T3, T3, -1); ++ ++ // default case ++ __ profile_switch_default(FSR); ++ __ lw(A7, T2, 0); ++ __ b(continue_execution); ++ __ delayed()->nop(); ++ ++ // entry found -> get offset ++ __ bind(found); ++ __ dsll(AT, T3, Address::times_8); ++ __ daddu(AT, T2, AT); ++ __ lw(A7, AT, 3 * BytesPerInt); ++ __ profile_switch_case(T3, FSR, T2); ++ ++ // continue execution ++ __ bind(continue_execution); ++ __ swap(A7); ++ __ daddu(BCP, BCP, A7); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++} ++ ++// used registers : T0, T1, T2, T3, A7, Rnext ++// T2 : pairs address(array) ++// Rnext : dest bytecode ++// the data after the opcode is the same as lookupswitch ++// see Rewriter::rewrite_method for more information ++void TemplateTable::fast_binaryswitch() { ++ transition(itos, vtos); ++ // Implementation using the following core algorithm: ++ // ++ // int binary_search(int key, LookupswitchPair* array, int n) { ++ // // Binary search according to "Methodik des Programmierens" by ++ // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. ++ // int i = 0; ++ // int j = n; ++ // while (i+1 < j) { ++ // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) ++ // // with Q: for all i: 0 <= i < n: key < a[i] ++ // // where a stands for the array and assuming that the (inexisting) ++ // // element a[n] is infinitely big. ++ // int h = (i + j) >> 1; ++ // // i < h < j ++ // if (key < array[h].fast_match()) { ++ // j = h; ++ // } else { ++ // i = h; ++ // } ++ // } ++ // // R: a[i] <= key < a[i+1] or Q ++ // // (i.e., if key is within array, i is the correct index) ++ // return i; ++ // } ++ ++ // register allocation ++ const Register array = T2; ++ const Register i = T3, j = A7; ++ const Register h = T1; ++ const Register temp = T0; ++ const Register key = FSR; ++ ++ // setup array ++ __ daddiu(array, BCP, 3*BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(array, array, AT); ++ ++ // initialize i & j ++ __ move(i, R0); ++ __ lw(j, array, - 1 * BytesPerInt); ++ // Convert j into native byteordering ++ __ swap(j); ++ ++ // and start ++ Label entry; ++ __ b(entry); ++ __ delayed()->nop(); ++ ++ // binary search loop ++ { ++ Label loop; ++ __ bind(loop); ++ // int h = (i + j) >> 1; ++ __ daddu(h, i, j); ++ __ dsrl(h, h, 1); ++ // if (key < array[h].fast_match()) { ++ // j = h; ++ // } else { ++ // i = h; ++ // } ++ // Convert array[h].match to native byte-ordering before compare ++ __ dsll(AT, h, Address::times_8); ++ __ daddu(AT, array, AT); ++ __ lw(temp, AT, 0 * BytesPerInt); ++ __ swap(temp); ++ ++ __ slt(AT, key, temp); ++ __ movz(i, h, AT); ++ __ movn(j, h, AT); ++ ++ // while (i+1 < j) ++ __ bind(entry); ++ __ daddiu(h, i, 1); ++ __ slt(AT, h, j); ++ __ bne(AT, R0, loop); ++ __ delayed()->nop(); ++ } ++ ++ // end of binary search, result index is i (must check again!) ++ Label default_case; ++ // Convert array[i].match to native byte-ordering before compare ++ __ dsll(AT, i, Address::times_8); ++ __ daddu(AT, array, AT); ++ __ lw(temp, AT, 0 * BytesPerInt); ++ __ swap(temp); ++ __ bne(key, temp, default_case); ++ __ delayed()->nop(); ++ ++ // entry found -> j = offset ++ __ dsll(AT, i, Address::times_8); ++ __ daddu(AT, array, AT); ++ __ lw(j, AT, 1 * BytesPerInt); ++ __ profile_switch_case(i, key, array); ++ __ swap(j); ++ ++ __ daddu(BCP, BCP, j); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++ ++ // default case -> j = default offset ++ __ bind(default_case); ++ __ profile_switch_default(i); ++ __ lw(j, array, - 2 * BytesPerInt); ++ __ swap(j); ++ __ daddu(BCP, BCP, j); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++} ++ ++void TemplateTable::_return(TosState state) { ++ transition(state, state); ++ assert(_desc->calls_vm(), ++ "inconsistent calls_vm information"); // call in remove_activation ++ ++ if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { ++ assert(state == vtos, "only valid state"); ++ __ ld(T1, aaddress(0)); ++ __ load_klass(LVP, T1); ++ __ lw(LVP, LVP, in_bytes(Klass::access_flags_offset())); ++ __ move(AT, JVM_ACC_HAS_FINALIZER); ++ __ andr(AT, AT, LVP); ++ Label skip_register_finalizer; ++ __ beq(AT, R0, skip_register_finalizer); ++ __ delayed()->nop(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::register_finalizer), T1); ++ __ bind(skip_register_finalizer); ++ } ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ if (_desc->bytecode() != Bytecodes::_return_register_finalizer) { ++ Label no_safepoint; ++ NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll")); ++ __ lb(AT, thread, in_bytes(JavaThread::polling_word_offset())); ++ __ andi(AT, AT, SafepointMechanism::poll_bit()); ++ __ beq(AT, R0, no_safepoint); ++ __ delayed()->nop(); ++ __ push(state); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::at_safepoint)); ++ __ pop(state); ++ __ bind(no_safepoint); ++ } ++ ++ // Narrow result if state is itos but result type is smaller. ++ // Need to narrow in the return bytecode rather than in generate_return_entry ++ // since compiled code callers expect the result to already be narrowed. ++ if (state == itos) { ++ __ narrow(FSR); ++ } ++ ++ __ remove_activation(state, T9); ++ __ sync(); ++ ++ __ jr(T9); ++ __ delayed()->nop(); ++} ++ ++// ---------------------------------------------------------------------------- ++// Volatile variables demand their effects be made known to all CPU's ++// in order. Store buffers on most chips allow reads & writes to ++// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode ++// without some kind of memory barrier (i.e., it's not sufficient that ++// the interpreter does not reorder volatile references, the hardware ++// also must not reorder them). ++// ++// According to the new Java Memory Model (JMM): ++// (1) All volatiles are serialized wrt to each other. ALSO reads & ++// writes act as aquire & release, so: ++// (2) A read cannot let unrelated NON-volatile memory refs that ++// happen after the read float up to before the read. It's OK for ++// non-volatile memory refs that happen before the volatile read to ++// float down below it. ++// (3) Similar a volatile write cannot let unrelated NON-volatile ++// memory refs that happen BEFORE the write float down to after the ++// write. It's OK for non-volatile memory refs that happen after the ++// volatile write to float up before it. ++// ++// We only put in barriers around volatile refs (they are expensive), ++// not _between_ memory refs (that would require us to track the ++// flavor of the previous memory refs). Requirements (2) and (3) ++// require some barriers before volatile stores and after volatile ++// loads. These nearly cover requirement (1) but miss the ++// volatile-store-volatile-load case. This final case is placed after ++// volatile-stores although it could just as well go before ++// volatile-loads. ++void TemplateTable::volatile_barrier() { ++ if(os::is_MP()) __ sync(); ++} ++ ++// we dont shift left 2 bits in get_cache_and_index_at_bcp ++// for we always need shift the index we use it. the ConstantPoolCacheEntry ++// is 16-byte long, index is the index in ++// ConstantPoolCache, so cache + base_offset() + index * 16 is ++// the corresponding ConstantPoolCacheEntry ++// used registers : T2 ++// NOTE : the returned index need also shift left 4 to get the address! ++void TemplateTable::resolve_cache_and_index(int byte_no, ++ Register Rcache, ++ Register index, ++ size_t index_size) { ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ const Register temp = A1; ++ assert_different_registers(Rcache, index); ++ ++ Label resolved, clinit_barrier_slow; ++ ++ Bytecodes::Code code = bytecode(); ++ switch (code) { ++ case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; ++ case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; ++ default: break; ++ } ++ ++ __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); ++ // is resolved? ++ int i = (int)code; ++ __ addiu(temp, temp, -i); ++ __ beq(temp, R0, resolved); ++ __ delayed()->nop(); ++ ++ // resolve first time through ++ // Class initialization barrier slow path lands here as well. ++ __ bind(clinit_barrier_slow); ++ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); ++ ++ __ move(temp, i); ++ __ call_VM(NOREG, entry, temp); ++ ++ // Update registers with resolved info ++ __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); ++ __ bind(resolved); ++ ++ // Class initialization barrier for static methods ++ if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) { ++ __ load_resolved_method_at_index(byte_no, temp, Rcache, index); ++ __ load_method_holder(temp, temp); ++ __ clinit_barrier(temp, AT, NULL, &clinit_barrier_slow); ++ } ++} ++ ++// The Rcache and index registers must be set before call ++void TemplateTable::load_field_cp_cache_entry(Register obj, ++ Register cache, ++ Register index, ++ Register off, ++ Register flags, ++ bool is_static = false) { ++ assert_different_registers(cache, index, flags, off); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ // Field offset ++ __ dsll(AT, index, Address::times_ptr); ++ __ daddu(AT, cache, AT); ++ __ ld(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset())); ++ // Flags ++ __ ld(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())); ++ ++ // klass overwrite register ++ if (is_static) { ++ __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())); ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ __ ld(obj, Address(obj, mirror_offset)); ++ ++ __ resolve_oop_handle(obj, T9); ++ } ++} ++ ++// get the method, itable_index and flags of the current invoke ++void TemplateTable::load_invoke_cp_cache_entry(int byte_no, ++ Register method, ++ Register itable_index, ++ Register flags, ++ bool is_invokevirtual, ++ bool is_invokevfinal, /*unused*/ ++ bool is_invokedynamic) { ++ // setup registers ++ const Register cache = T3; ++ const Register index = T1; ++ assert_different_registers(method, flags); ++ assert_different_registers(method, cache, index); ++ assert_different_registers(itable_index, flags); ++ assert_different_registers(itable_index, cache, index); ++ assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant"); ++ // determine constant pool cache field offsets ++ const int method_offset = in_bytes( ++ ConstantPoolCache::base_offset() + ++ ((byte_no == f2_byte) ++ ? ConstantPoolCacheEntry::f2_offset() ++ : ConstantPoolCacheEntry::f1_offset())); ++ const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::flags_offset()); ++ // access constant pool cache fields ++ const int index_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::f2_offset()); ++ ++ size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2)); ++ resolve_cache_and_index(byte_no, cache, index, index_size); ++ ++ //assert(wordSize == 8, "adjust code below"); ++ // note we shift 4 not 2, for we get is the true inde ++ // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version ++ __ dsll(AT, index, Address::times_ptr); ++ __ daddu(AT, cache, AT); ++ __ ld(method, AT, method_offset); ++ ++ if (itable_index != NOREG) { ++ __ ld(itable_index, AT, index_offset); ++ } ++ __ ld(flags, AT, flags_offset); ++} ++ ++// The registers cache and index expected to be set before call. ++// Correct values of the cache and index registers are preserved. ++void TemplateTable::jvmti_post_field_access(Register cache, Register index, ++ bool is_static, bool has_tos) { ++ // do the JVMTI work here to avoid disturbing the register state below ++ // We use c_rarg registers here because we want to use the register used in ++ // the call to the VM ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we ++ // take the time to call into the VM. ++ Label L1; ++ // kill FSR ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ assert_different_registers(cache, index, AT); ++ __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); ++ __ lw(AT, AT, 0); ++ __ beq(AT, R0, L1); ++ __ delayed()->nop(); ++ ++ __ get_cache_and_index_at_bcp(tmp2, tmp3, 1); ++ ++ // cache entry pointer ++ __ daddiu(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset())); ++ __ shl(tmp3, LogBytesPerWord); ++ __ daddu(tmp2, tmp2, tmp3); ++ if (is_static) { ++ __ move(tmp1, R0); ++ } else { ++ __ ld(tmp1, SP, 0); ++ __ verify_oop(tmp1); ++ } ++ // tmp1: object pointer or NULL ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_access), ++ tmp1, tmp2, tmp3); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++void TemplateTable::pop_and_check_object(Register r) { ++ __ pop_ptr(r); ++ __ null_check(r); // for field access must check obj. ++ __ verify_oop(r); ++} ++ ++// used registers : T1, T2, T3, T1 ++// T1 : flags ++// T2 : off ++// T3 : obj ++// T1 : field address ++// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the ++// following mapping to the TosState states: ++// btos: 0 ++// ctos: 1 ++// stos: 2 ++// itos: 3 ++// ltos: 4 ++// ftos: 5 ++// dtos: 6 ++// atos: 7 ++// vtos: 8 ++// see ConstantPoolCacheEntry::set_field for more info ++void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) { ++ transition(vtos, vtos); ++ ++ const Register cache = T3; ++ const Register index = T0; ++ ++ const Register obj = T3; ++ const Register off = T2; ++ const Register flags = T1; ++ ++ const Register scratch = T8; ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_access(cache, index, is_static, false); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); ++ ++ { ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, flags); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ if (!is_static) pop_and_check_object(obj); ++ __ daddu(index, obj, off); ++ ++ const Address field(index, 0); ++ ++ Label Done, notByte, notBool, notInt, notShort, notChar, ++ notLong, notFloat, notObj, notDouble; ++ ++ assert(btos == 0, "change code, btos != 0"); ++ __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); ++ __ bne(flags, R0, notByte); ++ __ delayed()->nop(); ++ ++ // btos ++ __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(btos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ ++ __ bind(notByte); ++ __ move(AT, ztos); ++ __ bne(flags, AT, notBool); ++ __ delayed()->nop(); ++ ++ // ztos ++ __ access_load_at(T_BOOLEAN, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(ztos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ ++ __ bind(notBool); ++ __ move(AT, itos); ++ __ bne(flags, AT, notInt); ++ __ delayed()->nop(); ++ ++ // itos ++ __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(itos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_igetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notInt); ++ __ move(AT, atos); ++ __ bne(flags, AT, notObj); ++ __ delayed()->nop(); ++ ++ // atos ++ //add for compressedoops ++ do_oop_load(_masm, Address(index, 0), FSR, IN_HEAP); ++ __ push(atos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_agetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notObj); ++ __ move(AT, ctos); ++ __ bne(flags, AT, notChar); ++ __ delayed()->nop(); ++ ++ // ctos ++ __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(ctos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notChar); ++ __ move(AT, stos); ++ __ bne(flags, AT, notShort); ++ __ delayed()->nop(); ++ ++ // stos ++ __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(stos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notShort); ++ __ move(AT, ltos); ++ __ bne(flags, AT, notLong); ++ __ delayed()->nop(); ++ ++ // FIXME : the load/store should be atomic, we have no simple method to do this in mips32 ++ // ltos ++ __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, FSR, field, noreg, noreg); ++ __ push(ltos); ++ ++ // Don't rewrite to _fast_lgetfield for potential volatile case. ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notLong); ++ __ move(AT, ftos); ++ __ bne(flags, AT, notFloat); ++ __ delayed()->nop(); ++ ++ // ftos ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); ++ __ push(ftos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notFloat); ++ __ move(AT, dtos); ++#ifdef ASSERT ++ __ bne(flags, AT, notDouble); ++ __ delayed()->nop(); ++#endif ++ ++ // dtos ++ __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg); ++ __ push(dtos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2); ++ } ++ ++#ifdef ASSERT ++ __ b(Done); ++ __ delayed()->nop(); ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++ ++void TemplateTable::getfield(int byte_no) { ++ getfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::nofast_getfield(int byte_no) { ++ getfield_or_static(byte_no, false, may_not_rewrite); ++} ++ ++void TemplateTable::getstatic(int byte_no) { ++ getfield_or_static(byte_no, true); ++} ++ ++// The registers cache and index expected to be set before call. ++// The function may destroy various registers, just not the cache and index registers. ++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { ++ transition(vtos, vtos); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L1; ++ //kill AT, T1, T2, T3, T9 ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T9; ++ assert_different_registers(cache, index, tmp4); ++ ++ __ li(AT, JvmtiExport::get_field_modification_count_addr()); ++ __ lw(AT, AT, 0); ++ __ beq(AT, R0, L1); ++ __ delayed()->nop(); ++ ++ __ get_cache_and_index_at_bcp(tmp2, tmp4, 1); ++ ++ if (is_static) { ++ __ move(tmp1, R0); ++ } else { ++ // Life is harder. The stack holds the value on top, followed by ++ // the object. We don't know the size of the value, though; it ++ // could be one or two words depending on its type. As a result, ++ // we must find the type to determine where the object is. ++ Label two_word, valsize_known; ++ __ dsll(AT, tmp4, Address::times_8); ++ __ daddu(AT, tmp2, AT); ++ __ ld(tmp3, AT, in_bytes(cp_base_offset + ++ ConstantPoolCacheEntry::flags_offset())); ++ __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift); ++ ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ __ move(tmp1, SP); ++ __ move(AT, ltos); ++ __ beq(tmp3, AT, two_word); ++ __ delayed()->nop(); ++ __ move(AT, dtos); ++ __ beq(tmp3, AT, two_word); ++ __ delayed()->nop(); ++ __ b(valsize_known); ++ __ delayed()->daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) ); ++ ++ __ bind(two_word); ++ __ daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2)); ++ ++ __ bind(valsize_known); ++ // setup object pointer ++ __ ld(tmp1, tmp1, 0*wordSize); ++ } ++ // cache entry pointer ++ __ daddiu(tmp2, tmp2, in_bytes(cp_base_offset)); ++ __ shl(tmp4, LogBytesPerWord); ++ __ daddu(tmp2, tmp2, tmp4); ++ // object (tos) ++ __ move(tmp3, SP); ++ // tmp1: object pointer set up above (NULL if static) ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ tmp1, tmp2, tmp3); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++// used registers : T0, T1, T2, T3, T8 ++// T1 : flags ++// T2 : off ++// T3 : obj ++// T8 : volatile bit ++// see ConstantPoolCacheEntry::set_field for more info ++void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { ++ transition(vtos, vtos); ++ ++ const Register cache = T3; ++ const Register index = T0; ++ const Register obj = T3; ++ const Register off = T2; ++ const Register flags = T1; ++ const Register bc = T3; ++ ++ const Register scratch = T8; ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_mod(cache, index, is_static); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); ++ ++ Label Done; ++ { ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, flags); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ ++ Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; ++ ++ assert(btos == 0, "change code, btos != 0"); ++ ++ // btos ++ __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); ++ __ bne(flags, R0, notByte); ++ __ delayed()->nop(); ++ ++ __ pop(btos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_BYTE, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ztos ++ __ bind(notByte); ++ __ move(AT, ztos); ++ __ bne(flags, AT, notBool); ++ __ delayed()->nop(); ++ ++ __ pop(ztos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ andi(FSR, FSR, 0x1); ++ __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // itos ++ __ bind(notBool); ++ __ move(AT, itos); ++ __ bne(flags, AT, notInt); ++ __ delayed()->nop(); ++ ++ __ pop(itos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_INT, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // atos ++ __ bind(notInt); ++ __ move(AT, atos); ++ __ bne(flags, AT, notObj); ++ __ delayed()->nop(); ++ ++ __ pop(atos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ ++ do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ctos ++ __ bind(notObj); ++ __ move(AT, ctos); ++ __ bne(flags, AT, notChar); ++ __ delayed()->nop(); ++ ++ __ pop(ctos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_CHAR, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // stos ++ __ bind(notChar); ++ __ move(AT, stos); ++ __ bne(flags, AT, notShort); ++ __ delayed()->nop(); ++ ++ __ pop(stos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_SHORT, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ltos ++ __ bind(notShort); ++ __ move(AT, ltos); ++ __ bne(flags, AT, notLong); ++ __ delayed()->nop(); ++ ++ __ pop(ltos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_LONG, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ftos ++ __ bind(notLong); ++ __ move(AT, ftos); ++ __ bne(flags, AT, notFloat); ++ __ delayed()->nop(); ++ ++ __ pop(ftos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_FLOAT, IN_HEAP, Address(T9), noreg, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ ++ // dtos ++ __ bind(notFloat); ++ __ move(AT, dtos); ++#ifdef ASSERT ++ __ bne(flags, AT, notDouble); ++ __ delayed()->nop(); ++#endif ++ ++ __ pop(dtos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_DOUBLE, IN_HEAP, Address(T9), noreg, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no); ++ } ++ ++#ifdef ASSERT ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++void TemplateTable::putfield(int byte_no) { ++ putfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::nofast_putfield(int byte_no) { ++ putfield_or_static(byte_no, false, may_not_rewrite); ++} ++ ++void TemplateTable::putstatic(int byte_no) { ++ putfield_or_static(byte_no, true); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : cp_entry ++// T2 : obj ++// T3 : value pointer ++void TemplateTable::jvmti_post_fast_field_mod() { ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L2; ++ //kill AT, T1, T2, T3, T9 ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T9; ++ __ li(AT, JvmtiExport::get_field_modification_count_addr()); ++ __ lw(tmp3, AT, 0); ++ __ beq(tmp3, R0, L2); ++ __ delayed()->nop(); ++ __ pop_ptr(tmp1); ++ __ verify_oop(tmp1); ++ __ push_ptr(tmp1); ++ switch (bytecode()) { // load values into the jvalue object ++ case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ push_i(FSR); break; ++ case Bytecodes::_fast_dputfield: __ push_d(FSF); break; ++ case Bytecodes::_fast_fputfield: __ push_f(); break; ++ case Bytecodes::_fast_lputfield: __ push_l(FSR); break; ++ default: ShouldNotReachHere(); ++ } ++ __ move(tmp3, SP); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1); ++ __ verify_oop(tmp1); ++ // tmp1: object pointer copied above ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ tmp1, tmp2, tmp3); ++ ++ switch (bytecode()) { // restore tos values ++ case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ pop_i(FSR); break; ++ case Bytecodes::_fast_dputfield: __ pop_d(); break; ++ case Bytecodes::_fast_fputfield: __ pop_f(); break; ++ case Bytecodes::_fast_lputfield: __ pop_l(FSR); break; ++ default: break; ++ } ++ __ bind(L2); ++ } ++} ++ ++// used registers : T2, T3, T1 ++// T2 : index & off & field address ++// T3 : cache & obj ++// T1 : flags ++void TemplateTable::fast_storefield(TosState state) { ++ transition(state, vtos); ++ ++ const Register scratch = T8; ++ ++ ByteSize base = ConstantPoolCache::base_offset(); ++ ++ jvmti_post_fast_field_mod(); ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ sync(); ++ ++ // test for volatile with T1 ++ __ dsll(AT, T2, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ ld(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset())); ++ ++ // replace index with field offset from cache entry ++ __ ld(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset())); ++ ++ Label Done; ++ { ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, T1); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ // Get object from stack ++ pop_and_check_object(T3); ++ ++ if (bytecode() != Bytecodes::_fast_aputfield) { ++ // field address ++ __ daddu(T2, T3, T2); ++ } ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_zputfield: ++ __ andi(FSR, FSR, 0x1); // boolean is true if LSB is 1 ++ __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_bputfield: ++ __ access_store_at(T_BYTE, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_sputfield: ++ __ access_store_at(T_SHORT, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_cputfield: ++ __ access_store_at(T_CHAR, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_iputfield: ++ __ access_store_at(T_INT, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_lputfield: ++ __ access_store_at(T_LONG, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_fputfield: ++ __ access_store_at(T_FLOAT, IN_HEAP, Address(T2), noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_dputfield: ++ __ access_store_at(T_DOUBLE, IN_HEAP, Address(T2), noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_aputfield: ++ do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++// used registers : T2, T3, T1 ++// T3 : cp_entry & cache ++// T2 : index & offset ++void TemplateTable::fast_accessfield(TosState state) { ++ transition(atos, state); ++ ++ const Register scratch = T8; ++ ++ // do the JVMTI work here to avoid disturbing the register state below ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we take ++ // the time to call into the VM. ++ Label L1; ++ __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); ++ __ lw(T3, AT, 0); ++ __ beq(T3, R0, L1); ++ __ delayed()->nop(); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(T3, T1, 1); ++ __ move(TSR, FSR); ++ __ verify_oop(FSR); ++ // FSR: object pointer copied above ++ // T3: cache entry pointer ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), ++ FSR, T3); ++ __ move(FSR, TSR); ++ __ bind(L1); ++ } ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ sync(); ++ ++ // replace index with field offset from cache entry ++ __ dsll(AT, T2, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ ++ { ++ __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, AT); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ // FSR: object ++ __ verify_oop(FSR); ++ __ null_check(FSR); ++ // field addresses ++ __ daddu(FSR, FSR, T2); ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_bgetfield: ++ __ access_load_at(T_BYTE, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_sgetfield: ++ __ access_load_at(T_SHORT, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_cgetfield: ++ __ access_load_at(T_CHAR, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_igetfield: ++ __ access_load_at(T_INT, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_lgetfield: ++ __ stop("should not be rewritten"); ++ break; ++ case Bytecodes::_fast_fgetfield: ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_dgetfield: ++ __ access_load_at(T_DOUBLE, IN_HEAP, noreg, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_agetfield: ++ //add for compressedoops ++ do_oop_load(_masm, Address(FSR, 0), FSR, IN_HEAP); ++ __ verify_oop(FSR); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0 ++// used registers : T1, T2, T3, T1 ++// T1 : obj & field address ++// T2 : off ++// T3 : cache ++// T1 : index ++void TemplateTable::fast_xaccess(TosState state) { ++ transition(vtos, state); ++ ++ const Register scratch = T8; ++ ++ // get receiver ++ __ ld(T1, aaddress(0)); ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 2); ++ __ dsll(AT, T2, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ ++ { ++ __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, AT); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ // make sure exception is reported in correct bcp range (getfield is ++ // next instruction) ++ __ daddiu(BCP, BCP, 1); ++ __ null_check(T1); ++ __ daddu(T1, T1, T2); ++ ++ if (state == itos) { ++ __ access_load_at(T_INT, IN_HEAP, FSR, Address(T1), noreg, noreg); ++ } else if (state == atos) { ++ do_oop_load(_masm, Address(T1, 0), FSR, IN_HEAP); ++ __ verify_oop(FSR); ++ } else if (state == ftos) { ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(T1), noreg, noreg); ++ } else { ++ ShouldNotReachHere(); ++ } ++ __ daddiu(BCP, BCP, -1); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++ ++ ++//----------------------------------------------------------------------------- ++// Calls ++ ++// method, index, recv, flags: T1, T2, T3, T1 ++// byte_no = 2 for _invokevirtual, 1 else ++// T0 : return address ++// get the method & index of the invoke, and push the return address of ++// the invoke(first word in the frame) ++// this address is where the return code jmp to. ++// NOTE : this method will set T3&T1 as recv&flags ++void TemplateTable::prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index, // itable index, MethodType, etc. ++ Register recv, // if caller wants to see it ++ Register flags // if caller wants to test it ++ ) { ++ // determine flags ++ const Bytecodes::Code code = bytecode(); ++ const bool is_invokeinterface = code == Bytecodes::_invokeinterface; ++ const bool is_invokedynamic = code == Bytecodes::_invokedynamic; ++ const bool is_invokehandle = code == Bytecodes::_invokehandle; ++ const bool is_invokevirtual = code == Bytecodes::_invokevirtual; ++ const bool is_invokespecial = code == Bytecodes::_invokespecial; ++ const bool load_receiver = (recv != noreg); ++ const bool save_flags = (flags != noreg); ++ assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),""); ++ assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); ++ assert(flags == noreg || flags == T1, "error flags reg."); ++ assert(recv == noreg || recv == T3, "error recv reg."); ++ ++ // setup registers & access constant pool cache ++ if(recv == noreg) recv = T3; ++ if(flags == noreg) flags = T1; ++ assert_different_registers(method, index, recv, flags); ++ ++ // save 'interpreter return address' ++ __ save_bcp(); ++ ++ load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); ++ ++ if (is_invokedynamic || is_invokehandle) { ++ Label L_no_push; ++ __ move(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift)); ++ __ andr(AT, AT, flags); ++ __ beq(AT, R0, L_no_push); ++ __ delayed()->nop(); ++ // Push the appendix as a trailing parameter. ++ // This must be done before we get the receiver, ++ // since the parameter_size includes it. ++ Register tmp = SSR; ++ __ push(tmp); ++ __ move(tmp, index); ++ __ load_resolved_reference_at_index(index, tmp, recv); ++ __ pop(tmp); ++ __ push(index); // push appendix (MethodType, CallSite, etc.) ++ __ bind(L_no_push); ++ } ++ ++ // load receiver if needed (after appendix is pushed so parameter size is correct) ++ // Note: no return address pushed yet ++ if (load_receiver) { ++ __ move(AT, ConstantPoolCacheEntry::parameter_size_mask); ++ __ andr(recv, flags, AT); ++ // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0. ++ const int no_return_pc_pushed_yet = 0; // argument slot correction before we push return address ++ const int receiver_is_at_end = -1; // back off one slot to get receiver ++ Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end); ++ __ ld(recv, recv_addr); ++ __ verify_oop(recv); ++ } ++ if(save_flags) { ++ __ move(BCP, flags); ++ } ++ ++ // compute return type ++ __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, 0xf); ++ ++ // Make sure we don't need to mask flags for tos_state_shift after the above shift ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ // load return address ++ { ++ const address table = (address) Interpreter::invoke_return_entry_table_for(code); ++ __ li(AT, (long)table); ++ __ dsll(flags, flags, LogBytesPerWord); ++ __ daddu(AT, AT, flags); ++ __ ld(RA, AT, 0); ++ } ++ ++ if (save_flags) { ++ __ move(flags, BCP); ++ __ restore_bcp(); ++ } ++} ++ ++// used registers : T0, T3, T1, T2 ++// T3 : recv, this two register using convention is by prepare_invoke ++// T1 : flags, klass ++// Rmethod : method, index must be Rmethod ++void TemplateTable::invokevirtual_helper(Register index, ++ Register recv, ++ Register flags) { ++ ++ assert_different_registers(index, recv, flags, T2); ++ ++ // Test for an invoke of a final method ++ Label notFinal; ++ __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); ++ __ andr(AT, flags, AT); ++ __ beq(AT, R0, notFinal); ++ __ delayed()->nop(); ++ ++ Register method = index; // method must be Rmethod ++ assert(method == Rmethod, "Method must be Rmethod for interpreter calling convention"); ++ ++ // do the call - the index is actually the method to call ++ // the index is indeed Method*, for this is vfinal, ++ // see ConstantPoolCacheEntry::set_method for more info ++ ++ ++ // It's final, need a null check here! ++ __ null_check(recv); ++ ++ // profile this call ++ __ profile_final_call(T2); ++ ++ // T2: tmp, used for mdp ++ // method: callee ++ // T9: tmp ++ // is_virtual: true ++ __ profile_arguments_type(T2, method, T9, true); ++ ++ __ jump_from_interpreted(method, T2); ++ ++ __ bind(notFinal); ++ ++ // get receiver klass ++ __ null_check(recv, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(T2, recv); ++ ++ // profile this call ++ __ profile_virtual_call(T2, T0, T1); ++ ++ // get target Method & entry point ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); ++ __ dsll(AT, index, Address::times_ptr); ++ // T2: receiver ++ __ daddu(AT, T2, AT); ++ //this is a ualign read ++ __ ld(method, AT, base + vtableEntry::method_offset_in_bytes()); ++ __ profile_arguments_type(T2, method, T9, true); ++ __ jump_from_interpreted(method, T2); ++ ++} ++ ++void TemplateTable::invokevirtual(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG, T3, T1); ++ // now recv & flags in T3, T1 ++ invokevirtual_helper(Rmethod, T3, T1); ++} ++ ++// T9 : entry ++// Rmethod : method ++void TemplateTable::invokespecial(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG, T3); ++ // now recv & flags in T3, T1 ++ __ verify_oop(T3); ++ __ null_check(T3); ++ __ profile_call(T9); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T9: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T9, false); ++ ++ __ jump_from_interpreted(Rmethod, T9); ++ __ move(T0, T3); ++} ++ ++void TemplateTable::invokestatic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG); ++ ++ __ profile_call(T9); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T9: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T9, false); ++ ++ __ jump_from_interpreted(Rmethod, T9); ++} ++ ++// i have no idea what to do here, now. for future change. FIXME. ++void TemplateTable::fast_invokevfinal(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); ++ __ stop("fast_invokevfinal not used on mips64"); ++} ++ ++// used registers : T0, T1, T2, T3, T1, A7 ++// T0 : itable, vtable, entry ++// T1 : interface ++// T3 : receiver ++// T1 : flags, klass ++// Rmethod : index, method, this is required by interpreter_entry ++void TemplateTable::invokeinterface(int byte_no) { ++ transition(vtos, vtos); ++ //this method will use T1-T4 and T0 ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, T2, Rmethod, T3, T1); ++ // T2: reference klass (from f1) if interface method ++ // Rmethod: method (from f2) ++ // T3: receiver ++ // T1: flags ++ ++ // First check for Object case, then private interface method, ++ // then regular interface method. ++ ++ // Special case of invokeinterface called for virtual method of ++ // java.lang.Object. See cpCache.cpp for details. ++ Label notObjectMethod; ++ __ move(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift)); ++ __ andr(AT, T1, AT); ++ __ beq(AT, R0, notObjectMethod); ++ __ delayed()->nop(); ++ ++ invokevirtual_helper(Rmethod, T3, T1); ++ // no return from above ++ __ bind(notObjectMethod); ++ ++ Label no_such_interface; // for receiver subtype check ++ Register recvKlass; // used for exception processing ++ ++ // Check for private method invocation - indicated by vfinal ++ Label notVFinal; ++ __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); ++ __ andr(AT, T1, AT); ++ __ beq(AT, R0, notVFinal); ++ __ delayed()->nop(); ++ ++ // Get receiver klass into FSR - also a null check ++ __ null_check(T3, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(FSR, T3); ++ ++ Label subtype; ++ __ check_klass_subtype(FSR, T2, T0, subtype); ++ // If we get here the typecheck failed ++ recvKlass = T1; ++ __ move(recvKlass, FSR); ++ __ b(no_such_interface); ++ __ delayed()->nop(); ++ ++ __ bind(subtype); ++ ++ // do the call - rbx is actually the method to call ++ ++ __ profile_final_call(T1); ++ __ profile_arguments_type(T1, Rmethod, T0, true); ++ ++ __ jump_from_interpreted(Rmethod, T1); ++ // no return from above ++ __ bind(notVFinal); ++ ++ // Get receiver klass into T1 - also a null check ++ __ restore_locals(); ++ __ null_check(T3, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(T1, T3); ++ ++ Label no_such_method; ++ ++ // Preserve method for throw_AbstractMethodErrorVerbose. ++ __ move(T3, Rmethod); ++ // Receiver subtype check against REFC. ++ // Superklass in T2. Subklass in T1. ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ T1, T2, noreg, ++ // outputs: scan temp. reg, scan temp. reg ++ T0, FSR, ++ no_such_interface, ++ /*return_method=*/false); ++ ++ ++ // profile this call ++ __ restore_bcp(); ++ __ profile_virtual_call(T1, T0, FSR); ++ ++ // Get declaring interface class from method, and itable index ++ __ load_method_holder(T2, Rmethod); ++ __ lw(Rmethod, Rmethod, in_bytes(Method::itable_index_offset())); ++ __ addiu(Rmethod, Rmethod, (-1) * Method::itable_index_max); ++ __ subu32(Rmethod, R0, Rmethod); ++ ++ // Preserve recvKlass for throw_AbstractMethodErrorVerbose. ++ __ move(FSR, T1); ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ FSR, T2, Rmethod, ++ // outputs: method, scan temp. reg ++ Rmethod, T0, ++ no_such_interface); ++ ++ // Rmethod: Method* to call ++ // T3: receiver ++ // Check for abstract method error ++ // Note: This should be done more efficiently via a throw_abstract_method_error ++ // interpreter entry point and a conditional jump to it in case of a null ++ // method. ++ __ beq(Rmethod, R0, no_such_method); ++ __ delayed()->nop(); ++ ++ __ profile_called_method(Rmethod, T0, T1); ++ __ profile_arguments_type(T1, Rmethod, T0, true); ++ ++ // do the call ++ // T3: receiver ++ // Rmethod: Method* ++ __ jump_from_interpreted(Rmethod, T1); ++ __ should_not_reach_here(); ++ ++ // exception handling code follows... ++ // note: must restore interpreter registers to canonical ++ // state for exception handling to work correctly! ++ ++ __ bind(no_such_method); ++ // throw exception ++ __ pop(Rmethod); // pop return address (pushed by prepare_invoke) ++ __ restore_bcp(); ++ __ restore_locals(); ++ // Pass arguments for generating a verbose error message. ++ recvKlass = A1; ++ Register method = A2; ++ if (recvKlass != T1) { __ move(recvKlass, T1); } ++ if (method != T3) { __ move(method, T3); } ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), recvKlass, method); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ __ bind(no_such_interface); ++ // throw exception ++ __ pop(Rmethod); // pop return address (pushed by prepare_invoke) ++ __ restore_bcp(); ++ __ restore_locals(); ++ // Pass arguments for generating a verbose error message. ++ if (recvKlass != T1) { __ move(recvKlass, T1); } ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), recvKlass, T2); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++} ++ ++ ++void TemplateTable::invokehandle(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ const Register T2_method = Rmethod; ++ const Register FSR_mtype = FSR; ++ const Register T3_recv = T3; ++ ++ prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv); ++ //??__ verify_method_ptr(T2_method); ++ __ verify_oop(T3_recv); ++ __ null_check(T3_recv); ++ ++ // T9: MethodType object (from cpool->resolved_references[f1], if necessary) ++ // T2_method: MH.invokeExact_MT method (from f2) ++ ++ // Note: T9 is already pushed (if necessary) by prepare_invoke ++ ++ // FIXME: profile the LambdaForm also ++ __ profile_final_call(T9); ++ ++ // T8: tmp, used for mdp ++ // T2_method: callee ++ // T9: tmp ++ // is_virtual: true ++ __ profile_arguments_type(T8, T2_method, T9, true); ++ ++ __ jump_from_interpreted(T2_method, T9); ++} ++ ++ void TemplateTable::invokedynamic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ ++ //const Register Rmethod = T2; ++ const Register T2_callsite = T2; ++ ++ prepare_invoke(byte_no, Rmethod, T2_callsite); ++ ++ // T2: CallSite object (from cpool->resolved_references[f1]) ++ // Rmethod: MH.linkToCallSite method (from f2) ++ ++ // Note: T2_callsite is already pushed by prepare_invoke ++ // %%% should make a type profile for any invokedynamic that takes a ref argument ++ // profile this call ++ __ profile_call(T9); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T9: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T9, false); ++ ++ __ verify_oop(T2_callsite); ++ ++ __ jump_from_interpreted(Rmethod, T9); ++ } ++ ++//----------------------------------------------------------------------------- ++// Allocation ++// T1 : tags & buffer end & thread ++// T2 : object end ++// T3 : klass ++// T1 : object size ++// A1 : cpool ++// A2 : cp index ++// return object in FSR ++void TemplateTable::_new() { ++ transition(vtos, atos); ++ __ get_unsigned_2_byte_index_at_bcp(A2, 1); ++ ++ Label slow_case; ++ Label done; ++ Label initialize_header; ++ Label initialize_object; // including clearing the fields ++ Label allocate_shared; ++ ++ __ get_cpool_and_tags(A1, T1); ++ ++ // make sure the class we're about to instantiate has been resolved. ++ // Note: slow_case does a pop of stack, which is why we loaded class/pushed above ++ const int tags_offset = Array::base_offset_in_bytes(); ++ if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) { ++ __ gslbx(AT, T1, A2, tags_offset); ++ } else { ++ __ daddu(T1, T1, A2); ++ __ lb(AT, T1, tags_offset); ++ } ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ // get InstanceKlass ++ __ load_resolved_klass_at_index(A1, A2, T3); ++ ++ // make sure klass is initialized & doesn't have finalizer ++ // make sure klass is fully initialized ++ __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset())); ++ __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ // has_finalizer ++ __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) ); ++ __ andi(AT, T0, Klass::_lh_instance_slow_path_bit); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ // Allocate the instance ++ // 1) Try to allocate in the TLAB ++ // 2) if fail and the object is large allocate in the shared Eden ++ // 3) if the above fails (or is not applicable), go to a slow case ++ // (creates a new TLAB, etc.) ++ ++ const bool allow_shared_alloc = ++ Universe::heap()->supports_inline_contig_alloc(); ++ ++#ifndef OPT_THREAD ++ const Register thread = T8; ++ if (UseTLAB || allow_shared_alloc) { ++ __ get_thread(thread); ++ } ++#else ++ const Register thread = TREG; ++#endif ++ ++ if (UseTLAB) { ++ // get tlab_top ++ __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset())); ++ // get tlab_end ++ __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset())); ++ __ daddu(T2, FSR, T0); ++ __ slt(AT, AT, T2); ++ __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case); ++ __ delayed()->nop(); ++ __ sd(T2, thread, in_bytes(JavaThread::tlab_top_offset())); ++ ++ if (ZeroTLAB) { ++ // the fields have been already cleared ++ __ beq(R0, R0, initialize_header); ++ } else { ++ // initialize both the header and fields ++ __ beq(R0, R0, initialize_object); ++ } ++ __ delayed()->nop(); ++ } ++ ++ // Allocation in the shared Eden , if allowed ++ // T0 : instance size in words ++ if(allow_shared_alloc){ ++ __ bind(allocate_shared); ++ ++ Label done, retry; ++ Address heap_top(T1); ++ __ set64(T1, (long)Universe::heap()->top_addr()); ++ __ ld(FSR, heap_top); ++ ++ __ bind(retry); ++ __ set64(AT, (long)Universe::heap()->end_addr()); ++ __ ld(AT, AT, 0); ++ __ daddu(T2, FSR, T0); ++ __ slt(AT, AT, T2); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ // Compare FSR with the top addr, and if still equal, store the new ++ // top addr in T2 at the address of the top addr pointer. Sets AT if was ++ // equal, and clears it otherwise. Use lock prefix for atomicity on MPs. ++ // ++ // FSR: object begin ++ // T2: object end ++ // T0: instance size in words ++ ++ // if someone beat us on the allocation, try again, otherwise continue ++ __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry); ++ ++ __ bind(done); ++ __ incr_allocated_bytes(thread, T0, 0); ++ } ++ ++ if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) { ++ // The object is initialized before the header. If the object size is ++ // zero, go directly to the header initialization. ++ __ bind(initialize_object); ++ __ set64(AT, - sizeof(oopDesc)); ++ __ daddu(T0, T0, AT); ++ __ beq(T0, R0, initialize_header); ++ __ delayed()->nop(); ++ ++ // initialize remaining object fields: T0 is a multiple of 2 ++ { ++ Label loop; ++ __ daddu(T1, FSR, T0); ++ __ daddiu(T1, T1, -oopSize); ++ ++ __ bind(loop); ++ __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize); ++ __ bne(T1, FSR, loop); //dont clear header ++ __ delayed()->daddiu(T1, T1, -oopSize); ++ } ++ ++ //klass in T3, ++ // initialize object header only. ++ __ bind(initialize_header); ++ if (UseBiasedLocking) { ++ __ ld(AT, T3, in_bytes(Klass::prototype_header_offset())); ++ __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ()); ++ } else { ++ __ set64(AT, (long)markWord::prototype().value()); ++ __ sd(AT, FSR, oopDesc::mark_offset_in_bytes()); ++ } ++ ++ __ store_klass_gap(FSR, R0); ++ __ store_klass(FSR, T3); ++ ++ { ++ SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0); ++ // Trigger dtrace event for fastpath ++ __ push(atos); ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR); ++ __ pop(atos); ++ ++ } ++ __ b(done); ++ __ delayed()->nop(); ++ } ++ ++ // slow case ++ __ bind(slow_case); ++ __ get_constant_pool(A1); ++ __ get_unsigned_2_byte_index_at_bcp(A2, 1); ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2); ++ ++ // continue ++ __ bind(done); ++ __ sync(); ++} ++ ++void TemplateTable::newarray() { ++ transition(itos, atos); ++ __ lbu(A1, at_bcp(1)); ++ //type, count ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR); ++ __ sync(); ++} ++ ++void TemplateTable::anewarray() { ++ transition(itos, atos); ++ __ get_2_byte_integer_at_bcp(A2, AT, 1); ++ __ huswap(A2); ++ __ get_constant_pool(A1); ++ // cp, index, count ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR); ++ __ sync(); ++} ++ ++void TemplateTable::arraylength() { ++ transition(atos, itos); ++ __ null_check(FSR, arrayOopDesc::length_offset_in_bytes()); ++ __ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes()); ++} ++ ++// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always) ++// T2 : sub klass ++// T3 : cpool ++// T3 : super klass ++void TemplateTable::checkcast() { ++ transition(atos, atos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ __ beq(FSR, R0, is_null); ++ __ delayed()->nop(); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(T3, T1); ++ __ get_2_byte_integer_at_bcp(T2, AT, 1); ++ __ huswap(T2); ++ ++ // See if bytecode has already been quicked ++ __ daddu(AT, T1, T2); ++ __ lb(AT, AT, Array::base_offset_in_bytes()); ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ beq(AT, R0, quicked); ++ __ delayed()->nop(); ++ ++ // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded. ++ // Then, GC will move the object in V0 to another places in heap. ++ // Therefore, We should never save such an object in register. ++ // Instead, we should save it in the stack. It can be modified automatically by the GC thread. ++ // After GC, the object address in FSR is changed to a new place. ++ // ++ __ push(atos); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ __ get_vm_result_2(T3, thread); ++ __ pop_ptr(FSR); ++ __ b(resolved); ++ __ delayed()->nop(); ++ ++ // klass already in cp, get superklass in T3 ++ __ bind(quicked); ++ __ load_resolved_klass_at_index(T3, T2, T3); ++ ++ __ bind(resolved); ++ ++ // get subklass in T2 ++ //add for compressedoops ++ __ load_klass(T2, FSR); ++ // Superklass in T3. Subklass in T2. ++ __ gen_subtype_check(T3, T2, ok_is_subtype); ++ ++ // Come here on failure ++ // object is at FSR ++ __ jmp(Interpreter::_throw_ClassCastException_entry); ++ __ delayed()->nop(); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ ++ // Collect counts on whether this check-cast sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ b(done); ++ __ delayed()->nop(); ++ __ bind(is_null); ++ __ profile_null_seen(T3); ++ } else { ++ __ bind(is_null); ++ } ++ __ bind(done); ++} ++ ++// i use T3 as cpool, T1 as tags, T2 as index ++// object always in FSR, superklass in T3, subklass in T2 ++void TemplateTable::instanceof() { ++ transition(atos, itos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ ++ __ beq(FSR, R0, is_null); ++ __ delayed()->nop(); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(T3, T1); ++ // get index ++ __ get_2_byte_integer_at_bcp(T2, AT, 1); ++ __ huswap(T2); ++ ++ // See if bytecode has already been quicked ++ // quicked ++ __ daddu(AT, T1, T2); ++ __ lb(AT, AT, Array::base_offset_in_bytes()); ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ beq(AT, R0, quicked); ++ __ delayed()->nop(); ++ ++ __ push(atos); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ __ get_vm_result_2(T3, thread); ++ __ pop_ptr(FSR); ++ __ b(resolved); ++ __ delayed()->nop(); ++ ++ // get superklass in T3, subklass in T2 ++ __ bind(quicked); ++ __ load_resolved_klass_at_index(T3, T2, T3); ++ ++ __ bind(resolved); ++ // get subklass in T2 ++ //add for compressedoops ++ __ load_klass(T2, FSR); ++ ++ // Superklass in T3. Subklass in T2. ++ __ gen_subtype_check(T3, T2, ok_is_subtype); ++ // Come here on failure ++ __ b(done); ++ __ delayed(); __ move(FSR, R0); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ __ move(FSR, 1); ++ ++ // Collect counts on whether this test sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ beq(R0, R0, done); ++ __ delayed()->nop(); ++ __ bind(is_null); ++ __ profile_null_seen(T3); ++ } else { ++ __ bind(is_null); // same as 'done' ++ } ++ __ bind(done); ++ // FSR = 0: obj == NULL or obj is not an instanceof the specified klass ++ // FSR = 1: obj != NULL and obj is an instanceof the specified klass ++} ++ ++//-------------------------------------------------------- ++//-------------------------------------------- ++// Breakpoints ++void TemplateTable::_breakpoint() { ++ // Note: We get here even if we are single stepping.. ++ // jbug inists on setting breakpoints at every bytecode ++ // even if we are in single step mode. ++ ++ transition(vtos, vtos); ++ ++ // get the unpatched byte code ++ __ get_method(A1); ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::get_original_bytecode_at), ++ A1, BCP); ++ __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal ++ ++ // post the breakpoint event ++ __ get_method(A1); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP); ++ ++ // complete the execution of original bytecode ++ __ dispatch_only_normal(vtos); ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateTable::athrow() { ++ transition(atos, vtos); ++ __ null_check(FSR); ++ __ jmp(Interpreter::throw_exception_entry()); ++ __ delayed()->nop(); ++} ++ ++//----------------------------------------------------------------------------- ++// Synchronization ++// ++// Note: monitorenter & exit are symmetric routines; which is reflected ++// in the assembly code structure as well ++// ++// Stack layout: ++// ++// [expressions ] <--- SP = expression stack top ++// .. ++// [expressions ] ++// [monitor entry] <--- monitor block top = expression stack bot ++// .. ++// [monitor entry] ++// [frame data ] <--- monitor block bot ++// ... ++// [return addr ] <--- FP ++ ++// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer ++// object always in FSR ++void TemplateTable::monitorenter() { ++ transition(atos, vtos); ++ ++ // check for NULL object ++ __ null_check(FSR); ++ ++ const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset ++ * wordSize); ++ const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize); ++ Label allocated; ++ ++ // initialize entry pointer ++ __ move(c_rarg0, R0); ++ ++ // find a free slot in the monitor block (result in c_rarg0) ++ { ++ Label entry, loop, exit; ++ __ ld(T2, monitor_block_top); ++ __ b(entry); ++ __ delayed()->daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ ++ // free slot? ++ __ bind(loop); ++ __ ld(AT, T2, BasicObjectLock::obj_offset_in_bytes()); ++ __ movz(c_rarg0, T2, AT); ++ ++ __ beq(FSR, AT, exit); ++ __ delayed()->nop(); ++ __ daddiu(T2, T2, entry_size); ++ ++ __ bind(entry); ++ __ bne(T3, T2, loop); ++ __ delayed()->nop(); ++ __ bind(exit); ++ } ++ ++ __ bne(c_rarg0, R0, allocated); ++ __ delayed()->nop(); ++ ++ // allocate one if there's no free slot ++ { ++ Label entry, loop; ++ // 1. compute new pointers // SP: old expression stack top ++ __ ld(c_rarg0, monitor_block_top); ++ __ daddiu(SP, SP, - entry_size); ++ __ daddiu(c_rarg0, c_rarg0, - entry_size); ++ __ sd(c_rarg0, monitor_block_top); ++ __ b(entry); ++ __ delayed(); __ move(T3, SP); ++ ++ // 2. move expression stack contents ++ __ bind(loop); ++ __ ld(AT, T3, entry_size); ++ __ sd(AT, T3, 0); ++ __ daddiu(T3, T3, wordSize); ++ __ bind(entry); ++ __ bne(T3, c_rarg0, loop); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(allocated); ++ // Increment bcp to point to the next bytecode, ++ // so exception handling for async. exceptions work correctly. ++ // The object has already been poped from the stack, so the ++ // expression stack looks correct. ++ __ daddiu(BCP, BCP, 1); ++ __ sd(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ lock_object(c_rarg0); ++ // check to make sure this monitor doesn't cause stack overflow after locking ++ __ save_bcp(); // in case of exception ++ __ generate_stack_overflow_check(0); ++ // The bcp has already been incremented. Just need to dispatch to next instruction. ++ ++ __ dispatch_next(vtos); ++} ++ ++// T2 : top ++// c_rarg0 : entry ++void TemplateTable::monitorexit() { ++ transition(atos, vtos); ++ ++ __ null_check(FSR); ++ ++ const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize); ++ Label found; ++ ++ // find matching slot ++ { ++ Label entry, loop; ++ __ ld(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ b(entry); ++ __ delayed()->daddiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ ++ __ bind(loop); ++ __ ld(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ beq(FSR, AT, found); ++ __ delayed()->nop(); ++ __ daddiu(c_rarg0, c_rarg0, entry_size); ++ __ bind(entry); ++ __ bne(T2, c_rarg0, loop); ++ __ delayed()->nop(); ++ } ++ ++ // error handling. Unlocking was not block-structured ++ Label end; ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ // call run-time routine ++ // c_rarg0: points to monitor entry ++ __ bind(found); ++ __ move(TSR, FSR); ++ __ unlock_object(c_rarg0); ++ __ move(FSR, TSR); ++ __ bind(end); ++} ++ ++ ++// Wide instructions ++void TemplateTable::wide() { ++ transition(vtos, vtos); ++ __ lbu(Rnext, at_bcp(1)); ++ __ dsll(T9, Rnext, Address::times_8); ++ __ li(AT, (long)Interpreter::_wentry_point); ++ __ daddu(AT, T9, AT); ++ __ ld(T9, AT, 0); ++ __ jr(T9); ++ __ delayed()->nop(); ++} ++ ++ ++void TemplateTable::multianewarray() { ++ transition(vtos, atos); ++ // last dim is on top of stack; we want address of first one: ++ // first_addr = last_addr + (ndims - 1) * wordSize ++ __ lbu(A1, at_bcp(3)); // dimension ++ __ daddiu(A1, A1, -1); ++ __ dsll(A1, A1, Address::times_8); ++ __ daddu(A1, SP, A1); // now A1 pointer to the count array on the stack ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1); ++ __ lbu(AT, at_bcp(3)); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(SP, SP, AT); ++ __ sync(); ++} +diff --git a/src/hotspot/cpu/mips/universalNativeInvoker_mips_64.cpp b/src/hotspot/cpu/mips/universalNativeInvoker_mips_64.cpp +new file mode 100644 +index 00000000000..87f6a113268 +--- /dev/null ++++ b/src/hotspot/cpu/mips/universalNativeInvoker_mips_64.cpp +@@ -0,0 +1,32 @@ ++/* ++ * Copyright (c) 2020, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#include "precompiled.hpp" ++#include "prims/universalNativeInvoker.hpp" ++#include "utilities/debug.hpp" ++ ++address ProgrammableInvoker::generate_adapter(jobject jabi, jobject jlayout) { ++ Unimplemented(); ++ return nullptr; ++} +diff --git a/src/hotspot/cpu/mips/universalUpcallHandler_mips_64.cpp b/src/hotspot/cpu/mips/universalUpcallHandler_mips_64.cpp +new file mode 100644 +index 00000000000..7586b084868 +--- /dev/null ++++ b/src/hotspot/cpu/mips/universalUpcallHandler_mips_64.cpp +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 2020, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#include "precompiled.hpp" ++#include "prims/universalUpcallHandler.hpp" ++#include "utilities/debug.hpp" ++ ++address ProgrammableUpcallHandler::generate_upcall_stub(jobject rec, jobject jabi, jobject jlayout) { ++ Unimplemented(); ++ return nullptr; ++} ++ ++address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) { ++ ShouldNotCallThis(); ++ return nullptr; ++} ++ ++bool ProgrammableUpcallHandler::supports_optimized_upcalls() { ++ return false; ++} +diff --git a/src/hotspot/cpu/mips/vmStructs_mips.hpp b/src/hotspot/cpu/mips/vmStructs_mips.hpp +new file mode 100644 +index 00000000000..6939914356d +--- /dev/null ++++ b/src/hotspot/cpu/mips/vmStructs_mips.hpp +@@ -0,0 +1,68 @@ ++/* ++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP ++#define CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP ++ ++// These are the CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ \ ++ /******************************/ \ ++ /* JavaCallWrapper */ \ ++ /******************************/ \ ++ /******************************/ \ ++ /* JavaFrameAnchor */ \ ++ /******************************/ \ ++ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) \ ++ \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_STRUCTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++ ++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_TYPES_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++ ++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++#endif // CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/vm_version_ext_mips.cpp b/src/hotspot/cpu/mips/vm_version_ext_mips.cpp +new file mode 100644 +index 00000000000..d3f07078570 +--- /dev/null ++++ b/src/hotspot/cpu/mips/vm_version_ext_mips.cpp +@@ -0,0 +1,90 @@ ++/* ++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "memory/allocation.inline.hpp" ++#include "runtime/os.inline.hpp" ++#include "vm_version_ext_mips.hpp" ++ ++// VM_Version_Ext statics ++int VM_Version_Ext::_no_of_threads = 0; ++int VM_Version_Ext::_no_of_cores = 0; ++int VM_Version_Ext::_no_of_sockets = 0; ++bool VM_Version_Ext::_initialized = false; ++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; ++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; ++ ++void VM_Version_Ext::initialize_cpu_information(void) { ++ // do nothing if cpu info has been initialized ++ if (_initialized) { ++ return; ++ } ++ ++ _no_of_cores = os::processor_count(); ++ _no_of_threads = _no_of_cores; ++ _no_of_sockets = _no_of_cores; ++ if (is_loongson()) { ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "Loongson MIPS"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "Loongson MIPS %s", features_string()); ++ } else { ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "MIPS"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "MIPS %s", features_string()); ++ } ++ _initialized = true; ++} ++ ++int VM_Version_Ext::number_of_threads(void) { ++ initialize_cpu_information(); ++ return _no_of_threads; ++} ++ ++int VM_Version_Ext::number_of_cores(void) { ++ initialize_cpu_information(); ++ return _no_of_cores; ++} ++ ++int VM_Version_Ext::number_of_sockets(void) { ++ initialize_cpu_information(); ++ return _no_of_sockets; ++} ++ ++const char* VM_Version_Ext::cpu_name(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); ++ return tmp; ++} ++ ++const char* VM_Version_Ext::cpu_description(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); ++ return tmp; ++} +diff --git a/src/hotspot/cpu/mips/vm_version_ext_mips.hpp b/src/hotspot/cpu/mips/vm_version_ext_mips.hpp +new file mode 100644 +index 00000000000..ffdcff06777 +--- /dev/null ++++ b/src/hotspot/cpu/mips/vm_version_ext_mips.hpp +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP ++#define CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP ++ ++#include "runtime/vm_version.hpp" ++#include "utilities/macros.hpp" ++ ++class VM_Version_Ext : public VM_Version { ++ private: ++ static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; ++ static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; ++ ++ static int _no_of_threads; ++ static int _no_of_cores; ++ static int _no_of_sockets; ++ static bool _initialized; ++ static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; ++ static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; ++ ++ public: ++ static int number_of_threads(void); ++ static int number_of_cores(void); ++ static int number_of_sockets(void); ++ ++ static const char* cpu_name(void); ++ static const char* cpu_description(void); ++ static void initialize_cpu_information(void); ++}; ++ ++#endif // CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/vm_version_mips.cpp b/src/hotspot/cpu/mips/vm_version_mips.cpp +new file mode 100644 +index 00000000000..8625bc70075 +--- /dev/null ++++ b/src/hotspot/cpu/mips/vm_version_mips.cpp +@@ -0,0 +1,523 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/java.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/vm_version.hpp" ++#ifdef TARGET_OS_FAMILY_linux ++# include "os_linux.inline.hpp" ++#endif ++ ++#define A0 RA0 ++ ++VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; ++volatile bool VM_Version::_is_determine_cpucfg_supported_running = false; ++bool VM_Version::_is_cpucfg_instruction_supported = true; ++bool VM_Version::_cpu_info_is_initialized = false; ++ ++static BufferBlob* stub_blob; ++static const int stub_size = 600; ++ ++extern "C" { ++ typedef void (*get_cpu_info_stub_t)(void*); ++} ++static get_cpu_info_stub_t get_cpu_info_stub = NULL; ++ ++ ++class VM_Version_StubGenerator: public StubCodeGenerator { ++ public: ++ ++ VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} ++ ++ address generate_get_cpu_info() { ++ assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized"); ++ StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); ++# define __ _masm-> ++ ++ address start = __ pc(); ++ ++ __ enter(); ++ __ push(AT); ++ __ push(V0); ++ ++ __ li(AT, (long)0); ++ __ cpucfg(V0, AT); ++ __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); ++ __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); ++ ++ __ li(AT, 1); ++ __ cpucfg(V0, AT); ++ __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); ++ __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); ++ ++ __ li(AT, 2); ++ __ cpucfg(V0, AT); ++ __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); ++ __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); ++ ++ __ pop(V0); ++ __ pop(AT); ++ __ leave(); ++ __ jr(RA); ++ __ delayed()->nop(); ++# undef __ ++ ++ return start; ++ }; ++}; ++ ++uint32_t VM_Version::get_feature_flags_by_cpucfg() { ++ uint32_t result = 0; ++ if (_cpuid_info.cpucfg_info_id1.bits.MMI != 0) ++ result |= CPU_MMI; ++ if (_cpuid_info.cpucfg_info_id1.bits.MSA1 != 0) ++ result |= CPU_MSA1_0; ++ if (_cpuid_info.cpucfg_info_id1.bits.MSA2 != 0) ++ result |= CPU_MSA2_0; ++ if (_cpuid_info.cpucfg_info_id1.bits.CGP != 0) ++ result |= CPU_CGP; ++ if (_cpuid_info.cpucfg_info_id1.bits.LSX1 != 0) ++ result |= CPU_LSX1; ++ if (_cpuid_info.cpucfg_info_id1.bits.LSX2 != 0) ++ result |= CPU_LSX2; ++ if (_cpuid_info.cpucfg_info_id1.bits.LASX != 0) ++ result |= CPU_LASX; ++ if (_cpuid_info.cpucfg_info_id1.bits.LLSYNC != 0) ++ result |= CPU_LLSYNC; ++ if (_cpuid_info.cpucfg_info_id1.bits.TGTSYNC != 0) ++ result |= CPU_TGTSYNC; ++ if (_cpuid_info.cpucfg_info_id1.bits.MUALP != 0) ++ result |= CPU_MUALP; ++ if (_cpuid_info.cpucfg_info_id2.bits.LEXT1 != 0) ++ result |= CPU_LEXT1; ++ if (_cpuid_info.cpucfg_info_id2.bits.LEXT2 != 0) ++ result |= CPU_LEXT2; ++ if (_cpuid_info.cpucfg_info_id2.bits.LEXT3 != 0) ++ result |= CPU_LEXT3; ++ if (_cpuid_info.cpucfg_info_id2.bits.LAMO != 0) ++ result |= CPU_LAMO; ++ if (_cpuid_info.cpucfg_info_id2.bits.LPIXU != 0) ++ result |= CPU_LPIXU; ++ ++ result |= CPU_ULSYNC; ++ ++ return result; ++} ++ ++void read_cpu_info(const char *path, char *result) { ++ FILE *ptr; ++ char buf[1024]; ++ int i = 0; ++ if((ptr=fopen(path, "r")) != NULL) { ++ while(fgets(buf, 1024, ptr)!=NULL) { ++ strcat(result,buf); ++ i++; ++ if (i == 10) break; ++ } ++ fclose(ptr); ++ } else { ++ warning("Can't detect CPU info - cannot open %s", path); ++ } ++} ++ ++void strlwr(char *str) { ++ for (; *str!='\0'; str++) ++ *str = tolower(*str); ++} ++ ++int VM_Version::get_feature_flags_by_cpuinfo(int features) { ++ assert(!cpu_info_is_initialized(), "VM_Version should not be initialized"); ++ ++ char res[10240]; ++ int i; ++ memset(res, '\0', 10240 * sizeof(char)); ++ read_cpu_info("/proc/cpuinfo", res); ++ // res is converted to lower case ++ strlwr(res); ++ ++ if (strstr(res, "loongson")) { ++ // Loongson CPU ++ features |= CPU_LOONGSON; ++ ++ const struct Loongson_Cpuinfo loongson_cpuinfo[] = { ++ {L_3A1000, "3a1000"}, ++ {L_3B1500, "3b1500"}, ++ {L_3A2000, "3a2000"}, ++ {L_3B2000, "3b2000"}, ++ {L_3A3000, "3a3000"}, ++ {L_3B3000, "3b3000"}, ++ {L_2K1000, "2k1000"}, ++ {L_UNKNOWN, "unknown"} ++ }; ++ ++ // Loongson Family ++ int detected = 0; ++ for (i = 0; i <= L_UNKNOWN; i++) { ++ switch (i) { ++ // 3A1000 and 3B1500 may use an old kernel and further comparsion is needed ++ // test PRID REV in /proc/cpuinfo ++ // 3A1000: V0.5, model name: ICT Loongson-3A V0.5 FPU V0.1 ++ // 3B1500: V0.7, model name: ICT Loongson-3B V0.7 FPU V0.1 ++ case L_3A1000: ++ if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3a v0.5")) { ++ features |= CPU_LOONGSON_GS464; ++ detected++; ++ //tty->print_cr("3A1000 platform"); ++ } ++ break; ++ case L_3B1500: ++ if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3b v0.7")) { ++ features |= CPU_LOONGSON_GS464; ++ detected++; ++ //tty->print_cr("3B1500 platform"); ++ } ++ break; ++ case L_3A2000: ++ case L_3B2000: ++ case L_3A3000: ++ case L_3B3000: ++ if (strstr(res, loongson_cpuinfo[i].match_str)) { ++ features |= CPU_LOONGSON_GS464E; ++ detected++; ++ //tty->print_cr("3A2000/3A3000/3B2000/3B3000 platform"); ++ } ++ break; ++ case L_2K1000: ++ if (strstr(res, loongson_cpuinfo[i].match_str)) { ++ features |= CPU_LOONGSON_GS264; ++ detected++; ++ //tty->print_cr("2K1000 platform"); ++ } ++ break; ++ case L_UNKNOWN: ++ if (detected == 0) { ++ detected++; ++ //tty->print_cr("unknown Loongson platform"); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ assert (detected == 1, "one and only one of LOONGSON_CPU_FAMILY should be detected"); ++ } else { // not Loongson ++ // Not Loongson CPU ++ //tty->print_cr("MIPS platform"); ++ } ++ ++ if (features & CPU_LOONGSON_GS264) { ++ features |= CPU_LEXT1; ++ features |= CPU_LEXT2; ++ features |= CPU_TGTSYNC; ++ features |= CPU_ULSYNC; ++ features |= CPU_MSA1_0; ++ features |= CPU_LSX1; ++ } else if (features & CPU_LOONGSON_GS464) { ++ features |= CPU_LEXT1; ++ features |= CPU_LLSYNC; ++ features |= CPU_TGTSYNC; ++ } else if (features & CPU_LOONGSON_GS464E) { ++ features |= CPU_LEXT1; ++ features |= CPU_LEXT2; ++ features |= CPU_LEXT3; ++ features |= CPU_TGTSYNC; ++ features |= CPU_ULSYNC; ++ } else if (features & CPU_LOONGSON) { ++ // unknow loongson ++ features |= CPU_LLSYNC; ++ features |= CPU_TGTSYNC; ++ features |= CPU_ULSYNC; ++ } ++ VM_Version::_cpu_info_is_initialized = true; ++ ++ return features; ++} ++ ++void VM_Version::get_processor_features() { ++ ++ clean_cpuFeatures(); ++ ++ // test if cpucfg instruction is supported ++ VM_Version::_is_determine_cpucfg_supported_running = true; ++ __asm__ __volatile__( ++ ".insn \n\t" ++ ".word (0xc8080118)\n\t" // cpucfg zero, zero ++ : ++ : ++ : ++ ); ++ VM_Version::_is_determine_cpucfg_supported_running = false; ++ ++ if (supports_cpucfg()) { ++ get_cpu_info_stub(&_cpuid_info); ++ _features = get_feature_flags_by_cpucfg(); ++ // Only Loongson CPUs support cpucfg ++ _features |= CPU_LOONGSON; ++ } else { ++ _features = get_feature_flags_by_cpuinfo(0); ++ } ++ ++ _supports_cx8 = true; ++ ++ if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) { ++ FLAG_SET_CMDLINE(MaxGCPauseMillis, 650); ++ } ++ ++#ifdef COMPILER2 ++ if (MaxVectorSize > 0) { ++ if (!is_power_of_2(MaxVectorSize)) { ++ warning("MaxVectorSize must be a power of 2"); ++ MaxVectorSize = 8; ++ } ++ if (MaxVectorSize > 0 && supports_ps()) { ++ MaxVectorSize = 8; ++ } else { ++ MaxVectorSize = 0; ++ } ++ } ++ // ++ // Vector optimization of MIPS works in most cases, but cannot pass hotspot/test/compiler/6340864/TestFloatVect.java. ++ // Vector optimization was closed by default. ++ // The reasons: ++ // 1. The kernel does not have emulation of PS instructions yet, so the emulation of PS instructions must be done in JVM, see JVM_handle_linux_signal. ++ // 2. It seems the gcc4.4.7 had some bug related to ucontext_t, which is used in signal handler to emulate PS instructions. ++ // ++ if (FLAG_IS_DEFAULT(MaxVectorSize)) { ++ MaxVectorSize = 0; ++ } ++ ++#endif ++ ++ if (needs_llsync() && needs_tgtsync() && !needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 1000); ++ } ++ } else if (!needs_llsync() && needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 2000); ++ } ++ } else if (!needs_llsync() && !needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 3000); ++ } ++ } else if (needs_llsync() && !needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 4000); ++ } ++ } else if (needs_llsync() && needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 10000); ++ } ++ } else { ++ assert(false, "Should Not Reach Here, what is the cpu type?"); ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 10000); ++ } ++ } ++ ++ if (supports_lext1()) { ++ if (FLAG_IS_DEFAULT(UseLEXT1)) { ++ FLAG_SET_DEFAULT(UseLEXT1, true); ++ } ++ } else if (UseLEXT1) { ++ warning("LEXT1 instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLEXT1, false); ++ } ++ ++ if (supports_lext2()) { ++ if (FLAG_IS_DEFAULT(UseLEXT2)) { ++ FLAG_SET_DEFAULT(UseLEXT2, true); ++ } ++ } else if (UseLEXT2) { ++ warning("LEXT2 instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLEXT2, false); ++ } ++ ++ if (supports_lext3()) { ++ if (FLAG_IS_DEFAULT(UseLEXT3)) { ++ FLAG_SET_DEFAULT(UseLEXT3, true); ++ } ++ } else if (UseLEXT3) { ++ warning("LEXT3 instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLEXT3, false); ++ } ++ ++ if (UseLEXT2) { ++ if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) { ++ FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 1); ++ } ++ } else if (UseCountTrailingZerosInstructionMIPS64) { ++ if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) ++ warning("ctz/dctz instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 0); ++ } ++ ++ UNSUPPORTED_OPTION(TieredCompilation); ++ ++ char buf[256]; ++ bool is_unknown_loongson_cpu = is_loongson() && !is_gs464() && !is_gs464e() && !is_gs264() && !supports_cpucfg(); ++ ++ // A note on the _features_string format: ++ // There are jtreg tests checking the _features_string for various properties. ++ // For some strange reason, these tests require the string to contain ++ // only _lowercase_ characters. Keep that in mind when being surprised ++ // about the unusual notation of features - and when adding new ones. ++ // Features may have one comma at the end. ++ // Furthermore, use one, and only one, separator space between features. ++ // Multiple spaces are considered separate tokens, messing up everything. ++ jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, usesynclevel:%d", ++ (is_loongson() ? "mips-compatible loongson cpu" : "mips cpu"), ++ (is_gs464() ? ", gs464 (3a1000/3b1500)" : ""), ++ (is_gs464e() ? ", gs464e (3a2000/3a3000/3b2000/3b3000)" : ""), ++ (is_gs264() ? ", gs264 (2k1000)" : ""), ++ (is_unknown_loongson_cpu ? ", unknown loongson cpu" : ""), ++ (supports_dsp() ? ", dsp" : ""), ++ (supports_ps() ? ", ps" : ""), ++ (supports_3d() ? ", 3d" : ""), ++ (supports_mmi() ? ", mmi" : ""), ++ (supports_msa1_0() ? ", msa1_0" : ""), ++ (supports_msa2_0() ? ", msa2_0" : ""), ++ (supports_lsx1() ? ", lsx1" : ""), ++ (supports_lsx2() ? ", lsx2" : ""), ++ (supports_lasx() ? ", lasx" : ""), ++ (supports_lext1() ? ", lext1" : ""), ++ (supports_lext2() ? ", lext2" : ""), ++ (supports_lext3() ? ", lext3" : ""), ++ (supports_cgp() ? ", aes, crc, sha1, sha256, sha512" : ""), ++ (supports_lamo() ? ", lamo" : ""), ++ (supports_lpixu() ? ", lpixu" : ""), ++ (needs_llsync() ? ", llsync" : ""), ++ (needs_tgtsync() ? ", tgtsync": ""), ++ (needs_ulsync() ? ", ulsync": ""), ++ (supports_mualp() ? ", mualp" : ""), ++ UseSyncLevel); ++ _features_string = os::strdup(buf); ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchLines, 1); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) { ++ FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1); ++ } ++ ++ if (UseSHA) { ++ warning("SHA instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseSHA, false); ++ } ++ ++ if (UseSHA3Intrinsics) { ++ warning("SHA3 intrinsics are not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); ++ } ++ ++ if (UseMD5Intrinsics) { ++ warning("MD5 intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseMD5Intrinsics, false); ++ } ++ ++ if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) { ++ warning("SHA intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); ++ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); ++ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); ++ } ++ ++ if (UseAES) { ++ if (!FLAG_IS_DEFAULT(UseAES)) { ++ warning("AES instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAES, false); ++ } ++ } ++ ++ if (UseCRC32Intrinsics) { ++ if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { ++ warning("CRC32Intrinsics instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); ++ } ++ } ++ ++ if (UseCRC32CIntrinsics) { ++ if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { ++ warning("CRC32CIntrinsics instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); ++ } ++ } ++ ++ if (UseAESIntrinsics) { ++ if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) { ++ warning("AES intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAESIntrinsics, false); ++ } ++ } ++ ++#ifdef COMPILER2 ++ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { ++ UseMontgomeryMultiplyIntrinsic = true; ++ } ++ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { ++ UseMontgomerySquareIntrinsic = true; ++ } ++#endif ++ ++ if (FLAG_IS_DEFAULT(UseFMA)) { ++ FLAG_SET_DEFAULT(UseFMA, true); ++ } ++ ++ UNSUPPORTED_OPTION(CriticalJNINatives); ++} ++ ++void VM_Version::initialize() { ++ ResourceMark rm; ++ // Making this stub must be FIRST use of assembler ++ ++ stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size); ++ if (stub_blob == NULL) { ++ vm_exit_during_initialization("Unable to allocate get_cpu_info_stub"); ++ } ++ CodeBuffer c(stub_blob); ++ VM_Version_StubGenerator g(&c); ++ get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, ++ g.generate_get_cpu_info()); ++ ++ get_processor_features(); ++} +diff --git a/src/hotspot/cpu/mips/vm_version_mips.hpp b/src/hotspot/cpu/mips/vm_version_mips.hpp +new file mode 100644 +index 00000000000..30c7b5a934d +--- /dev/null ++++ b/src/hotspot/cpu/mips/vm_version_mips.hpp +@@ -0,0 +1,218 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VM_VERSION_MIPS_HPP ++#define CPU_MIPS_VM_VM_VERSION_MIPS_HPP ++ ++#include "runtime/abstract_vm_version.hpp" ++#include "runtime/globals_extension.hpp" ++#include "utilities/sizes.hpp" ++ ++class VM_Version: public Abstract_VM_Version { ++public: ++ ++ union Loongson_Cpucfg_Id1 { ++ uint32_t value; ++ struct { ++ uint32_t FP : 1, ++ FPREV : 3, ++ MMI : 1, ++ MSA1 : 1, ++ MSA2 : 1, ++ CGP : 1, ++ WRP : 1, ++ LSX1 : 1, ++ LSX2 : 1, ++ LASX : 1, ++ R6FXP : 1, ++ R6CRCP : 1, ++ R6FPP : 1, ++ CNT64 : 1, ++ LSLDR0 : 1, ++ LSPREF : 1, ++ LSPREFX : 1, ++ LSSYNCI : 1, ++ LSUCA : 1, ++ LLSYNC : 1, ++ TGTSYNC : 1, ++ LLEXC : 1, ++ SCRAND : 1, ++ MUALP : 1, ++ KMUALEn : 1, ++ ITLBT : 1, ++ LSUPERF : 1, ++ SFBP : 1, ++ CDMAP : 1, ++ : 1; ++ } bits; ++ }; ++ ++ union Loongson_Cpucfg_Id2 { ++ uint32_t value; ++ struct { ++ uint32_t LEXT1 : 1, ++ LEXT2 : 1, ++ LEXT3 : 1, ++ LSPW : 1, ++ LBT1 : 1, ++ LBT2 : 1, ++ LBT3 : 1, ++ LBTMMU : 1, ++ LPMP : 1, ++ LPMRev : 3, ++ LAMO : 1, ++ LPIXU : 1, ++ LPIXNU : 1, ++ LVZP : 1, ++ LVZRev : 3, ++ LGFTP : 1, ++ LGFTRev : 3, ++ LLFTP : 1, ++ LLFTRev : 3, ++ LCSRP : 1, ++ DISBLKLY : 1, ++ : 3; ++ } bits; ++ }; ++ ++protected: ++ ++ enum Feature_Flag { ++ CPU_LOONGSON = (1 << 1), ++ CPU_LOONGSON_GS464 = (1 << 2), ++ CPU_LOONGSON_GS464E = (1 << 3), ++ CPU_LOONGSON_GS264 = (1 << 4), ++ CPU_MMI = (1 << 11), ++ CPU_MSA1_0 = (1 << 12), ++ CPU_MSA2_0 = (1 << 13), ++ CPU_CGP = (1 << 14), ++ CPU_LSX1 = (1 << 15), ++ CPU_LSX2 = (1 << 16), ++ CPU_LASX = (1 << 17), ++ CPU_LEXT1 = (1 << 18), ++ CPU_LEXT2 = (1 << 19), ++ CPU_LEXT3 = (1 << 20), ++ CPU_LAMO = (1 << 21), ++ CPU_LPIXU = (1 << 22), ++ CPU_LLSYNC = (1 << 23), ++ CPU_TGTSYNC = (1 << 24), ++ CPU_ULSYNC = (1 << 25), ++ CPU_MUALP = (1 << 26), ++ ++ //////////////////////add some other feature here////////////////// ++ }; ++ ++ enum Loongson_Family { ++ L_3A1000 = 0, ++ L_3B1500 = 1, ++ L_3A2000 = 2, ++ L_3B2000 = 3, ++ L_3A3000 = 4, ++ L_3B3000 = 5, ++ L_2K1000 = 6, ++ L_UNKNOWN = 7 ++ }; ++ ++ struct Loongson_Cpuinfo { ++ Loongson_Family id; ++ const char* const match_str; ++ }; ++ ++ static volatile bool _is_determine_cpucfg_supported_running; ++ static bool _is_cpucfg_instruction_supported; ++ static bool _cpu_info_is_initialized; ++ ++ struct CpuidInfo { ++ uint32_t cpucfg_info_id0; ++ Loongson_Cpucfg_Id1 cpucfg_info_id1; ++ Loongson_Cpucfg_Id2 cpucfg_info_id2; ++ uint32_t cpucfg_info_id3; ++ uint32_t cpucfg_info_id4; ++ uint32_t cpucfg_info_id5; ++ uint32_t cpucfg_info_id6; ++ uint32_t cpucfg_info_id8; ++ }; ++ ++ // The actual cpuid info block ++ static CpuidInfo _cpuid_info; ++ ++ static uint32_t get_feature_flags_by_cpucfg(); ++ static int get_feature_flags_by_cpuinfo(int features); ++ static void get_processor_features(); ++ ++public: ++ // Offsets for cpuid asm stub ++ static ByteSize Loongson_Cpucfg_id0_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id0); } ++ static ByteSize Loongson_Cpucfg_id1_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id1); } ++ static ByteSize Loongson_Cpucfg_id2_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id2); } ++ static ByteSize Loongson_Cpucfg_id3_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id3); } ++ static ByteSize Loongson_Cpucfg_id4_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id4); } ++ static ByteSize Loongson_Cpucfg_id5_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id5); } ++ static ByteSize Loongson_Cpucfg_id6_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id6); } ++ static ByteSize Loongson_Cpucfg_id8_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id8); } ++ ++ static bool is_determine_features_test_running() { return _is_determine_cpucfg_supported_running; } ++ ++ static void clean_cpuFeatures() { _features = 0; } ++ ++ // Initialization ++ static void initialize(); ++ ++ static bool cpu_info_is_initialized() { return _cpu_info_is_initialized; } ++ ++ static bool supports_cpucfg() { return _is_cpucfg_instruction_supported; } ++ static bool set_supports_cpucfg(bool value) { return _is_cpucfg_instruction_supported = value; } ++ ++ static bool is_loongson() { return _features & CPU_LOONGSON; } ++ static bool is_gs264() { return _features & CPU_LOONGSON_GS264; } ++ static bool is_gs464() { return _features & CPU_LOONGSON_GS464; } ++ static bool is_gs464e() { return _features & CPU_LOONGSON_GS464E; } ++ static bool supports_dsp() { return 0; /*not supported yet*/} ++ static bool supports_ps() { return 0; /*not supported yet*/} ++ static bool supports_3d() { return 0; /*not supported yet*/} ++ static bool supports_msa1_0() { return _features & CPU_MSA1_0; } ++ static bool supports_msa2_0() { return _features & CPU_MSA2_0; } ++ static bool supports_cgp() { return _features & CPU_CGP; } ++ static bool supports_mmi() { return _features & CPU_MMI; } ++ static bool supports_lsx1() { return _features & CPU_LSX1; } ++ static bool supports_lsx2() { return _features & CPU_LSX2; } ++ static bool supports_lasx() { return _features & CPU_LASX; } ++ static bool supports_lext1() { return _features & CPU_LEXT1; } ++ static bool supports_lext2() { return _features & CPU_LEXT2; } ++ static bool supports_lext3() { return _features & CPU_LEXT3; } ++ static bool supports_lamo() { return _features & CPU_LAMO; } ++ static bool supports_lpixu() { return _features & CPU_LPIXU; } ++ static bool needs_llsync() { return _features & CPU_LLSYNC; } ++ static bool needs_tgtsync() { return _features & CPU_TGTSYNC; } ++ static bool needs_ulsync() { return _features & CPU_ULSYNC; } ++ static bool supports_mualp() { return _features & CPU_MUALP; } ++ ++ //mips has no such instructions, use ll/sc instead ++ static bool supports_compare_and_exchange() { return false; } ++ ++ static bool supports_fast_class_init_checks() { return true; } ++}; ++ ++#endif // CPU_MIPS_VM_VM_VERSION_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/vmreg_mips.cpp b/src/hotspot/cpu/mips/vmreg_mips.cpp +new file mode 100644 +index 00000000000..95dbd17f1a8 +--- /dev/null ++++ b/src/hotspot/cpu/mips/vmreg_mips.cpp +@@ -0,0 +1,56 @@ ++/* ++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "code/vmreg.hpp" ++ ++ ++ ++void VMRegImpl::set_regName() { ++ Register reg = ::as_Register(0); ++ int i; ++ for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) { ++ regName[i++] = reg->name(); ++ regName[i++] = reg->name(); ++ reg = reg->successor(); ++ } ++ ++ FloatRegister freg = ::as_FloatRegister(0); ++ for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { ++ regName[i++] = freg->name(); ++ regName[i++] = freg->name(); ++ freg = freg->successor(); ++ } ++ ++ for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) { ++ regName[i] = "NON-GPR-FPR"; ++ } ++} ++ ++VMReg VMRegImpl::vmStorageToVMReg(int type, int index) { ++ Unimplemented(); ++ return VMRegImpl::Bad(); ++} +diff --git a/src/hotspot/cpu/mips/vmreg_mips.hpp b/src/hotspot/cpu/mips/vmreg_mips.hpp +new file mode 100644 +index 00000000000..8ccc8c513c8 +--- /dev/null ++++ b/src/hotspot/cpu/mips/vmreg_mips.hpp +@@ -0,0 +1,56 @@ ++/* ++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VMREG_MIPS_HPP ++#define CPU_MIPS_VM_VMREG_MIPS_HPP ++ ++inline Register as_Register() { ++ assert( is_Register(), "must be"); ++ return ::as_Register(value() >> 1); ++} ++ ++inline FloatRegister as_FloatRegister() { ++ assert( is_FloatRegister(), "must be" ); ++ assert( is_even(value()), "must be" ); ++ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1); ++} ++ ++inline bool is_Register() { ++ return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; ++} ++ ++inline bool is_FloatRegister() { ++ return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; ++} ++ ++inline bool is_concrete() { ++ assert(is_reg(), "must be"); ++ if(is_Register()) return true; ++ if(is_FloatRegister()) return true; ++ assert(false, "what register?"); ++ return false; ++} ++ ++#endif // CPU_MIPS_VM_VMREG_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/vmreg_mips.inline.hpp b/src/hotspot/cpu/mips/vmreg_mips.inline.hpp +new file mode 100644 +index 00000000000..12ad7361aa5 +--- /dev/null ++++ b/src/hotspot/cpu/mips/vmreg_mips.inline.hpp +@@ -0,0 +1,38 @@ ++/* ++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP ++ ++inline VMReg RegisterImpl::as_VMReg() { ++ if( this==noreg ) return VMRegImpl::Bad(); ++ return VMRegImpl::as_VMReg(encoding() << 1 ); ++} ++ ++inline VMReg FloatRegisterImpl::as_VMReg() { ++ return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr); ++} ++ ++#endif // CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP +diff --git a/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp b/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp +new file mode 100644 +index 00000000000..f373aac45c2 +--- /dev/null ++++ b/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp +@@ -0,0 +1,348 @@ ++/* ++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/vtableStubs.hpp" ++#include "interp_masm_mips.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/klass.inline.hpp" ++#include "oops/klassVtable.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_mips.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++ ++// machine-dependent part of VtableStubs: create VtableStub of correct size and ++// initialize its code ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#ifndef PRODUCT ++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); ++#endif ++ ++// used by compiler only; reciever in T0. ++// used registers : ++// Rmethod : receiver klass & method ++// NOTE: If this code is used by the C1, the receiver_location is always 0. ++// when reach here, receiver in T0, klass in T8 ++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { ++ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. ++ const int stub_code_length = code_size_limit(true); ++ VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); ++ // Can be NULL if there is no free space in the code cache. ++ if (s == NULL) { ++ return NULL; ++ } ++ ++ // Count unused bytes in instruction sequences of variable size. ++ // We add them to the computed buffer size in order to avoid ++ // overflow in subsequently generated stubs. ++ address start_pc; ++ int slop_bytes = 0; ++ int slop_delta = 0; ++ int load_const_maxLen = 6*BytesPerInstWord; // load_const generates 6 instructions. Assume that as max size for li ++ // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation. ++ const int index_dependent_slop = 0; ++ ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), stub_code_length); ++ MacroAssembler* masm = new MacroAssembler(&cb); ++ Register t1 = T8, t2 = Rmethod; ++#if (!defined(PRODUCT) && defined(COMPILER2)) ++ if (CountCompiledCalls) { ++ start_pc = __ pc(); ++ __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ lw(t1, AT , 0); ++ __ addiu(t1, t1, 1); ++ __ sw(t1, AT,0); ++ } ++#endif ++ ++ // get receiver (need to skip return address on top of stack) ++ //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0"); ++ ++ // get receiver klass ++ address npe_addr = __ pc(); ++ //add for compressedoops ++ __ load_klass(t1, T0); ++ ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ // check offset vs vtable length ++ __ lw(t2, t1, in_bytes(Klass::vtable_length_offset())); ++ assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code"); ++ __ move(AT, vtable_index*vtableEntry::size()); ++ __ slt(AT, AT, t2); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ move(A2, vtable_index); ++ __ move(A1, A0); ++ ++ // VTABLE TODO: find upper bound for call_VM length. ++ start_pc = __ pc(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2); ++ const ptrdiff_t estimate = 512; ++ const ptrdiff_t codesize = __ pc() - start_pc; ++ slop_delta = estimate - codesize; // call_VM varies in length, depending on data ++ assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize); ++ __ bind(L); ++ } ++#endif // PRODUCT ++ const Register method = Rmethod; ++ ++ // load Method* and target address ++ start_pc = __ pc(); ++ // lookup_virtual_method generates 18 instructions (worst case) ++ __ lookup_virtual_method(t1, vtable_index, method); ++ slop_delta = 18*BytesPerInstWord - (int)(__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ __ beq(method, R0, L); ++ __ delayed()->nop(); ++ __ ld(AT, method,in_bytes(Method::from_compiled_offset())); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("Vtable entry is NULL"); ++ __ bind(L); ++ } ++#endif // PRODUCT ++ ++ // T8: receiver klass ++ // T0: receiver ++ // Rmethod: Method* ++ // T9: entry ++ address ame_addr = __ pc(); ++ __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset())); ++ __ jr(T9); ++ __ delayed()->nop(); ++ masm->flush(); ++ slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets ++ bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop); ++ ++ return s; ++} ++ ++ ++// used registers : ++// T1 T2 ++// when reach here, the receiver in T0, klass in T1 ++VtableStub* VtableStubs::create_itable_stub(int itable_index) { ++ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. ++ const int stub_code_length = code_size_limit(false); ++ VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); ++ // Can be NULL if there is no free space in the code cache. ++ if (s == NULL) { ++ return NULL; ++ } ++ // Count unused bytes in instruction sequences of variable size. ++ // We add them to the computed buffer size in order to avoid ++ // overflow in subsequently generated stubs. ++ address start_pc; ++ int slop_bytes = 0; ++ int slop_delta = 0; ++ int load_const_maxLen = 6*BytesPerInstWord; // load_const generates 6 instructions. Assume that as max size for li ++ ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), stub_code_length); ++ MacroAssembler *masm = new MacroAssembler(&cb); ++ ++ // we T8,T9 as temparary register, they are free from register allocator ++ Register t1 = T8, t2 = T2; ++ // Entry arguments: ++ // T1: Interface ++ // T0: Receiver ++ ++#if (!defined(PRODUCT) && defined(COMPILER2)) ++ if (CountCompiledCalls) { ++ start_pc = __ pc(); ++ __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ lw(T8, AT, 0); ++ __ addiu(T8, T8,1); ++ __ sw(T8, AT, 0); ++ } ++#endif // PRODUCT ++ ++ const Register holder_klass_reg = T1; // declaring interface klass (DECC) ++ const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC) ++ ++ const Register icholder_reg = T1; ++ __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset()); ++ __ ld_ptr(holder_klass_reg, icholder_reg, CompiledICHolder::holder_metadata_offset()); ++ ++ Label L_no_such_interface; ++ ++ // get receiver klass (also an implicit null-check) ++ address npe_addr = __ pc(); ++ __ load_klass(t1, T0); ++ { ++ // x86 use lookup_interface_method, but lookup_interface_method does not work on MIPS. ++ // No dynamic code size variance here, so slop_bytes is not needed. ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); ++ assert(Assembler::is_simm16(base), "change this code"); ++ __ daddiu(t2, t1, base); ++ __ lw(AT, t1, in_bytes(Klass::vtable_length_offset())); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(t2, t2, AT); ++ if (HeapWordsPerLong > 1) { ++ __ round_to(t2, BytesPerLong); ++ } ++ ++ Label hit, entry; ++ __ bind(entry); ++ ++ // Check that the entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); ++ __ beq(AT, R0, L_no_such_interface); ++ __ delayed()->nop(); ++ ++ __ bne(AT, resolved_klass_reg, entry); ++ __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize); ++ ++ } ++ ++ // add for compressedoops ++ __ load_klass(t1, T0); ++ // compute itable entry offset (in words) ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ __ daddiu(t2, t1, base); ++ __ lw(AT, t1, in_bytes(Klass::vtable_length_offset())); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(t2, t2, AT); ++ if (HeapWordsPerLong > 1) { ++ __ round_to(t2, BytesPerLong); ++ } ++ ++ Label hit, entry; ++ __ bind(entry); ++ ++ // Check that the entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); ++ __ beq(AT, R0, L_no_such_interface); ++ __ delayed()->nop(); ++ ++ __ bne(AT, holder_klass_reg, entry); ++ __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize); ++ ++ // We found a hit, move offset into T9 ++ __ ld_ptr(t2, t2, itableOffsetEntry::offset_offset_in_bytes() - itableOffsetEntry::size() * wordSize); ++ ++ // Compute itableMethodEntry. ++ const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) + ++ itableMethodEntry::method_offset_in_bytes(); ++ ++ // Get Method* and entrypoint for compiler ++ const Register method = Rmethod; ++ __ dsll(AT, t2, Address::times_1); ++ __ addu(AT, AT, t1 ); ++ start_pc = __ pc(); ++ __ set64(t1, method_offset); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ addu(AT, AT, t1 ); ++ __ ld_ptr(method, AT, 0); ++ ++#ifdef ASSERT ++ if (DebugVtables) { ++ Label L1; ++ __ beq(method, R0, L1); ++ __ delayed()->nop(); ++ __ ld(AT, method,in_bytes(Method::from_compiled_offset())); ++ __ bne(AT, R0, L1); ++ __ delayed()->nop(); ++ __ stop("compiler entrypoint is null"); ++ __ bind(L1); ++ } ++#endif // ASSERT ++ ++ // Rmethod: Method* ++ // T0: receiver ++ // T9: entry point ++ address ame_addr = __ pc(); ++ __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset())); ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ __ bind(L_no_such_interface); ++ // Handle IncompatibleClassChangeError in itable stubs. ++ // More detailed error message. ++ // We force resolving of the call site by jumping to the "handle ++ // wrong method" stub, and so let the interpreter runtime do all the ++ // dirty work. ++ start_pc = __ pc(); ++ __ set64(T9, (long)SharedRuntime::get_handle_wrong_method_stub()); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ masm->flush(); ++ bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0); ++ ++ return s; ++} ++ ++// NOTE : whenever you change the code above, dont forget to change the const here ++int VtableStub::pd_code_alignment() { ++ const unsigned int icache_line_size = wordSize; ++ return icache_line_size; ++} +diff --git a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp +index 17fc8e5078e..27e431c2c61 100644 +--- a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp ++++ b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.cpp +@@ -292,7 +292,8 @@ void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, R + // Code emitted by LIR node "LIR_OpZLoadBarrierTest" which in turn is emitted by ZBarrierSetC1::load_barrier. + // The actual compare and branch instructions are represented as stand-alone LIR nodes. + void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce, +- LIR_Opr ref) const { ++ LIR_Opr ref, ++ LIR_Opr res) const { + __ block_comment("load_barrier_test (zgc) {"); + + __ ld(R0, in_bytes(ZThreadLocalData::address_bad_mask_offset()), R16_thread); +diff --git a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp +index e2ff1bf53ae..4957e73ae22 100644 +--- a/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp ++++ b/src/hotspot/cpu/ppc/gc/z/zBarrierSetAssembler_ppc.hpp +@@ -67,7 +67,8 @@ public: + + #ifdef COMPILER1 + void generate_c1_load_barrier_test(LIR_Assembler* ce, +- LIR_Opr ref) const; ++ LIR_Opr ref, ++ LIR_Opr res) const; + + void generate_c1_load_barrier_stub(LIR_Assembler* ce, + ZLoadBarrierStubC1* stub) const; +diff --git a/src/hotspot/cpu/ppc/gc/z/zGlobals_ppc.hpp b/src/hotspot/cpu/ppc/gc/z/zGlobals_ppc.hpp +index 3657b16fc1a..a2aab225743 100644 +--- a/src/hotspot/cpu/ppc/gc/z/zGlobals_ppc.hpp ++++ b/src/hotspot/cpu/ppc/gc/z/zGlobals_ppc.hpp +@@ -30,6 +30,8 @@ const size_t ZPlatformGranuleSizeShift = 21; // 2MB + const size_t ZPlatformHeapViews = 3; + const size_t ZPlatformCacheLineSize = DEFAULT_CACHE_LINE_SIZE; + ++const bool ZPlatformLoadBarrierTestResultInRegister = false; ++ + size_t ZPlatformAddressOffsetBits(); + size_t ZPlatformAddressMetadataShift(); + +diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp +index 99dd4c82420..5fefcc00c55 100644 +--- a/src/hotspot/os/linux/os_linux.cpp ++++ b/src/hotspot/os/linux/os_linux.cpp +@@ -23,6 +23,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2021 Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + // no precompiled headers + #include "jvm.h" + #include "classfile/vmSymbols.hpp" +@@ -2455,7 +2461,7 @@ void os::print_memory_info(outputStream* st) { + // before "flags" so if we find a second "model name", then the + // "flags" field is considered missing. + static bool print_model_name_and_flags(outputStream* st, char* buf, size_t buflen) { +-#if defined(IA32) || defined(AMD64) ++#if defined(IA32) || defined(AMD64) || defined(MIPS) + // Other platforms have less repetitive cpuinfo files + FILE *fp = fopen("/proc/cpuinfo", "r"); + if (fp) { +@@ -2545,7 +2551,7 @@ void os::pd_print_cpu_info(outputStream* st, char* buf, size_t buflen) { + print_sys_devices_cpu_info(st, buf, buflen); + } + +-#if defined(AMD64) || defined(IA32) || defined(X32) ++#if defined(AMD64) || defined(IA32) || defined(X32) || defined(MIPS) + const char* search_string = "model name"; + #elif defined(M68K) + const char* search_string = "CPU"; +diff --git a/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp +new file mode 100644 +index 00000000000..30719a0340b +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp +@@ -0,0 +1,24 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ +diff --git a/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp +new file mode 100644 +index 00000000000..beb717b67ff +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp +@@ -0,0 +1,269 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP ++ ++#include "runtime/vm_version.hpp" ++ ++// Implementation of class atomic ++ ++template ++struct Atomic::PlatformAdd { ++ template ++ D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const; ++ ++ template ++ D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { ++ return fetch_and_add(dest, add_value, order) + add_value; ++ } ++}; ++ ++template<> ++template ++inline D Atomic::PlatformAdd<4>::fetch_and_add(D volatile* dest, I add_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(4 == sizeof(I)); ++ STATIC_ASSERT(4 == sizeof(D)); ++ D old_value; ++ ++ switch (order) { ++ case memory_order_relaxed: ++ asm volatile ( ++ "amadd.w %[old], %[add], %[dest] \n\t" ++ : [old] "=&r" (old_value) ++ : [add] "r" (add_value), [dest] "r" (dest) ++ : "memory"); ++ break; ++ default: ++ asm volatile ( ++ "amadd_db.w %[old], %[add], %[dest] \n\t" ++ : [old] "=&r" (old_value) ++ : [add] "r" (add_value), [dest] "r" (dest) ++ : "memory"); ++ break; ++ } ++ ++ return old_value; ++} ++ ++template<> ++template ++inline D Atomic::PlatformAdd<8>::fetch_and_add(D volatile* dest, I add_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(8 == sizeof(I)); ++ STATIC_ASSERT(8 == sizeof(D)); ++ D old_value; ++ ++ switch (order) { ++ case memory_order_relaxed: ++ asm volatile ( ++ "amadd.d %[old], %[add], %[dest] \n\t" ++ : [old] "=&r" (old_value) ++ : [add] "r" (add_value), [dest] "r" (dest) ++ : "memory"); ++ break; ++ default: ++ asm volatile ( ++ "amadd_db.d %[old], %[add], %[dest] \n\t" ++ : [old] "=&r" (old_value) ++ : [add] "r" (add_value), [dest] "r" (dest) ++ : "memory"); ++ break; ++ } ++ ++ return old_value; ++} ++ ++template<> ++template ++inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest, ++ T exchange_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(4 == sizeof(T)); ++ T old_value; ++ ++ switch (order) { ++ case memory_order_relaxed: ++ asm volatile ( ++ "amswap.w %[_old], %[_new], %[dest] \n\t" ++ : [_old] "=&r" (old_value) ++ : [_new] "r" (exchange_value), [dest] "r" (dest) ++ : "memory"); ++ break; ++ default: ++ asm volatile ( ++ "amswap_db.w %[_old], %[_new], %[dest] \n\t" ++ : [_old] "=&r" (old_value) ++ : [_new] "r" (exchange_value), [dest] "r" (dest) ++ : "memory"); ++ break; ++ } ++ ++ return old_value; ++} ++ ++template<> ++template ++inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest, ++ T exchange_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(8 == sizeof(T)); ++ T old_value; ++ ++ switch (order) { ++ case memory_order_relaxed: ++ asm volatile ( ++ "amswap.d %[_old], %[_new], %[dest] \n\t" ++ : [_old] "=&r" (old_value) ++ : [_new] "r" (exchange_value), [dest] "r" (dest) ++ : "memory"); ++ break; ++ default: ++ asm volatile ( ++ "amswap_db.d %[_old], %[_new], %[dest] \n\t" ++ : [_old] "=&r" (old_value) ++ : [_new] "r" (exchange_value), [dest] "r" (dest) ++ : "memory"); ++ break; ++ } ++ ++ return old_value; ++} ++ ++template<> ++struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {}; ++ ++template<> ++template ++inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, ++ T compare_value, ++ T exchange_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(4 == sizeof(T)); ++ T prev, temp; ++ ++ switch (order) { ++ case memory_order_relaxed: ++ asm volatile ( ++ "1: ll.w %[prev], %[dest] \n\t" ++ " bne %[prev], %[_old], 2f \n\t" ++ " move %[temp], %[_new] \n\t" ++ " sc.w %[temp], %[dest] \n\t" ++ " beqz %[temp], 1b \n\t" ++ "2: \n\t" ++ : [prev] "=&r" (prev), [temp] "=&r" (temp) ++ : [_old] "r" (compare_value), [_new] "r" (exchange_value), [dest] "ZC" (*dest) ++ : "memory"); ++ break; ++ default: ++ asm volatile ( ++ "1: ll.w %[prev], %[dest] \n\t" ++ " bne %[prev], %[_old], 2f \n\t" ++ " move %[temp], %[_new] \n\t" ++ " sc.w %[temp], %[dest] \n\t" ++ " beqz %[temp], 1b \n\t" ++ " b 3f \n\t" ++ "2: dbar 0x700 \n\t" ++ "3: \n\t" ++ : [prev] "=&r" (prev), [temp] "=&r" (temp) ++ : [_old] "r" (compare_value), [_new] "r" (exchange_value), [dest] "ZC" (*dest) ++ : "memory"); ++ break; ++ } ++ ++ return prev; ++} ++ ++template<> ++template ++inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, ++ T compare_value, ++ T exchange_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(8 == sizeof(T)); ++ T prev, temp; ++ ++ switch (order) { ++ case memory_order_relaxed: ++ asm volatile ( ++ "1: ll.d %[prev], %[dest] \n\t" ++ " bne %[prev], %[_old], 2f \n\t" ++ " move %[temp], %[_new] \n\t" ++ " sc.d %[temp], %[dest] \n\t" ++ " beqz %[temp], 1b \n\t" ++ "2: \n\t" ++ : [prev] "=&r" (prev), [temp] "=&r" (temp) ++ : [_old] "r" (compare_value), [_new] "r" (exchange_value), [dest] "ZC" (*dest) ++ : "memory"); ++ break; ++ default: ++ asm volatile ( ++ "1: ll.d %[prev], %[dest] \n\t" ++ " bne %[prev], %[_old], 2f \n\t" ++ " move %[temp], %[_new] \n\t" ++ " sc.d %[temp], %[dest] \n\t" ++ " beqz %[temp], 1b \n\t" ++ " b 3f \n\t" ++ "2: dbar 0x700 \n\t" ++ "3: \n\t" ++ : [prev] "=&r" (prev), [temp] "=&r" (temp) ++ : [_old] "r" (compare_value), [_new] "r" (exchange_value), [dest] "ZC" (*dest) ++ : "memory"); ++ break; ++ } ++ ++ return prev; ++} ++ ++template<> ++struct Atomic::PlatformOrderedStore<4, RELEASE_X> ++{ ++ template ++ void operator()(volatile T* p, T v) const { xchg(p, v, memory_order_release); } ++}; ++ ++template<> ++struct Atomic::PlatformOrderedStore<8, RELEASE_X> ++{ ++ template ++ void operator()(volatile T* p, T v) const { xchg(p, v, memory_order_release); } ++}; ++ ++template<> ++struct Atomic::PlatformOrderedStore<4, RELEASE_X_FENCE> ++{ ++ template ++ void operator()(volatile T* p, T v) const { xchg(p, v, memory_order_conservative); } ++}; ++ ++template<> ++struct Atomic::PlatformOrderedStore<8, RELEASE_X_FENCE> ++{ ++ template ++ void operator()(volatile T* p, T v) const { xchg(p, v, memory_order_conservative); } ++}; ++ ++#endif // OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp +new file mode 100644 +index 00000000000..c9f675baca4 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP ++#define OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP ++ ++#include ++ ++// Efficient swapping of data bytes from Java byte ++// ordering to native byte ordering and vice versa. ++inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); } ++inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); } ++inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); } ++ ++#endif // OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp +new file mode 100644 +index 00000000000..826c1fe39ac +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp +@@ -0,0 +1,125 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP ++#define OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP ++ ++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ (void)memmove(to, from, count * HeapWordSize); ++} ++ ++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ (void)memcpy(to, from, count * HeapWordSize); ++ break; ++ } ++} ++ ++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ while (count-- > 0) { ++ *to++ = *from++; ++ } ++ break; ++ } ++} ++ ++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_words(from, to, count); ++} ++ ++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_disjoint_words(from, to, count); ++} ++ ++static void pd_conjoint_bytes(const void* from, void* to, size_t count) { ++ (void)memmove(to, from, count); ++} ++ ++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { ++ pd_conjoint_bytes(from, to, count); ++} ++ ++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { ++ //assert(!UseCompressedOops, "foo!"); ++ assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size"); ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_bytes_atomic(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count); ++} ++ ++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jints_atomic((jint*)from, (jint*)to, count); ++} ++ ++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count); ++} ++ ++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { ++ //assert(!UseCompressedOops, "foo!"); ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); ++ pd_conjoint_oops_atomic((oop*)from, (oop*)to, count); ++} ++ ++#endif // OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/gc/z/zSyscall_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/gc/z/zSyscall_linux_loongarch.hpp +new file mode 100644 +index 00000000000..46d5d5a268b +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/gc/z/zSyscall_linux_loongarch.hpp +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_GC_Z_ZSYSCALL_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_GC_Z_ZSYSCALL_LINUX_LOONGARCH_HPP ++ ++#include ++ ++// ++// Support for building on older Linux systems ++// ++ ++#ifndef SYS_memfd_create ++#define SYS_memfd_create 279 ++#endif ++#ifndef SYS_fallocate ++#define SYS_fallocate 47 ++#endif ++ ++#endif // OS_CPU_LINUX_LOONGARCH_GC_Z_ZSYSCALL_LINUX_LOONGARCH_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp +new file mode 100644 +index 00000000000..0b5247aa0b6 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++define_pd_global(bool, DontYieldALot, false); ++define_pd_global(intx, ThreadStackSize, 2048); // 0 => use system default ++define_pd_global(intx, VMThreadStackSize, 2048); ++ ++define_pd_global(intx, CompilerThreadStackSize, 2048); ++ ++define_pd_global(uintx,JVMInvokeMethodSlack, 8192); ++ ++// Used on 64 bit platforms for UseCompressedOops base address ++define_pd_global(uintx,HeapBaseMinAddress, 2*G); ++ ++#endif // OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s +new file mode 100644 +index 00000000000..ebd73af0c53 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s +@@ -0,0 +1,25 @@ ++# ++# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++# ++# This code is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License version 2 only, as ++# published by the Free Software Foundation. ++# ++# This code is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++# version 2 for more details (a copy is included in the LICENSE file that ++# accompanied this code). ++# ++# You should have received a copy of the GNU General Public License version ++# 2 along with this work; if not, write to the Free Software Foundation, ++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++# ++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++# or visit www.oracle.com if you need additional information or have any ++# questions. ++# ++ ++ +diff --git a/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp +new file mode 100644 +index 00000000000..6236e741d05 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp +@@ -0,0 +1,52 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP ++ ++#include "runtime/os.hpp" ++ ++// Included in orderAccess.hpp header file. ++ ++// Implementation of class OrderAccess. ++#define inlasm_sync(v) if (os::is_ActiveCoresMP()) \ ++ __asm__ __volatile__ ("nop" : : : "memory"); \ ++ else \ ++ __asm__ __volatile__ ("dbar %0" : :"K"(v) : "memory"); ++#define inlasm_synci() __asm__ __volatile__ ("ibar 0" : : : "memory"); ++ ++inline void OrderAccess::loadload() { inlasm_sync(0x15); } ++inline void OrderAccess::storestore() { inlasm_sync(0x1a); } ++inline void OrderAccess::loadstore() { inlasm_sync(0x16); } ++inline void OrderAccess::storeload() { inlasm_sync(0x19); } ++ ++inline void OrderAccess::acquire() { inlasm_sync(0x14); } ++inline void OrderAccess::release() { inlasm_sync(0x12); } ++inline void OrderAccess::fence() { inlasm_sync(0x10); } ++inline void OrderAccess::cross_modify_fence_impl() { inlasm_synci(); } ++ ++#undef inlasm_sync ++ ++#endif // OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp +new file mode 100644 +index 00000000000..b32ffe9105e +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp +@@ -0,0 +1,529 @@ ++/* ++ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// no precompiled headers ++#include "asm/macroAssembler.hpp" ++#include "classfile/classLoader.hpp" ++#include "classfile/systemDictionary.hpp" ++#include "classfile/vmSymbols.hpp" ++#include "code/icBuffer.hpp" ++#include "code/vtableStubs.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/allocation.inline.hpp" ++#include "os_share_linux.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/java.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/osThread.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/timer.hpp" ++#include "signals_posix.hpp" ++#include "utilities/events.hpp" ++#include "utilities/vmError.hpp" ++#include "compiler/disassembler.hpp" ++ ++// put OS-includes here ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++ ++#define REG_SP 3 ++#define REG_FP 22 ++ ++NOINLINE address os::current_stack_pointer() { ++ register void *sp __asm__ ("$r3"); ++ return (address) sp; ++} ++ ++char* os::non_memory_address_word() { ++ // Must never look like an address returned by reserve_memory, ++ // even in its subfields (as defined by the CPU immediate fields, ++ // if the CPU splits constants across multiple instructions). ++ ++ return (char*) -1; ++} ++ ++address os::Posix::ucontext_get_pc(const ucontext_t * uc) { ++ return (address)uc->uc_mcontext.__pc; ++} ++ ++void os::Posix::ucontext_set_pc(ucontext_t * uc, address pc) { ++ uc->uc_mcontext.__pc = (intptr_t)pc; ++} ++ ++intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP]; ++} ++ ++intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP]; ++} ++ ++address os::fetch_frame_from_context(const void* ucVoid, ++ intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ address epc; ++ ucontext_t* uc = (ucontext_t*)ucVoid; ++ ++ if (uc != NULL) { ++ epc = os::Posix::ucontext_get_pc(uc); ++ if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc); ++ if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc); ++ } else { ++ epc = NULL; ++ if (ret_sp) *ret_sp = (intptr_t *)NULL; ++ if (ret_fp) *ret_fp = (intptr_t *)NULL; ++ } ++ ++ return epc; ++} ++ ++frame os::fetch_frame_from_context(const void* ucVoid) { ++ intptr_t* sp; ++ intptr_t* fp; ++ address epc = fetch_frame_from_context(ucVoid, &sp, &fp); ++ if (!is_readable_pointer(epc)) { ++ // Try to recover from calling into bad memory ++ // Assume new frame has not been set up, the same as ++ // compiled frame stack bang ++ return fetch_compiled_frame_from_context(ucVoid); ++ } ++ return frame(sp, fp, epc); ++} ++ ++frame os::fetch_compiled_frame_from_context(const void* ucVoid) { ++ const ucontext_t* uc = (const ucontext_t*)ucVoid; ++ // In compiled code, the stack banging is performed before RA ++ // has been saved in the frame. RA is live, and SP and FP ++ // belong to the caller. ++ intptr_t* fp = os::Linux::ucontext_get_fp(uc); ++ intptr_t* sp = os::Linux::ucontext_get_sp(uc); ++ address pc = (address)(uc->uc_mcontext.__gregs[1]); ++ return frame(sp, fp, pc); ++} ++ ++// By default, gcc always save frame pointer on stack. It may get ++// turned off by -fomit-frame-pointer, ++frame os::get_sender_for_C_frame(frame* fr) { ++ return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); ++} ++ ++frame os::current_frame() { ++ intptr_t *fp = ((intptr_t **)__builtin_frame_address(0))[frame::link_offset]; ++ frame myframe((intptr_t*)os::current_stack_pointer(), ++ (intptr_t*)fp, ++ CAST_FROM_FN_PTR(address, os::current_frame)); ++ if (os::is_first_C_frame(&myframe)) { ++ // stack is not walkable ++ return frame(); ++ } else { ++ return os::get_sender_for_C_frame(&myframe); ++ } ++} ++ ++bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, ++ ucontext_t* uc, JavaThread* thread) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx", ++ info->si_signo, ++ info->si_code, ++ info->si_errno, ++ info->si_addr); ++#endif ++ ++ // decide if this trap can be handled by a stub ++ address stub = NULL; ++ address pc = NULL; ++ ++ pc = (address) os::Posix::ucontext_get_pc(uc); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("pc=%lx", pc); ++ os::print_context(tty, uc); ++#endif ++ //%note os_trap_1 ++ if (info != NULL && uc != NULL && thread != NULL) { ++ pc = (address) os::Posix::ucontext_get_pc(uc); ++ ++ // Handle ALL stack overflow variations here ++ if (sig == SIGSEGV) { ++ address addr = (address) info->si_addr; ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("handle all stack overflow variations: "); ++ /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n", ++ addr, ++ thread->stack_base(), ++ thread->stack_base() - thread->stack_size()); ++ */ ++#endif ++ ++ // check if fault address is within thread stack ++ if (thread->is_in_full_stack(addr)) { ++ // stack overflow ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("stack exception check \n"); ++#endif ++ if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) { ++ return true; // continue ++ } ++ } ++ } // sig == SIGSEGV ++ ++ if (thread->thread_state() == _thread_in_Java) { ++ // Java thread running in Java code => find exception handler if any ++ // a fault inside compiled code, the interpreter, or a stub ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("java thread running in java code\n"); ++#endif ++ ++ // Handle signal from NativeJump::patch_verified_entry(). ++ if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig); ++#endif ++ stub = SharedRuntime::get_handle_wrong_method_stub(); ++ } else if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig); ++#endif ++ stub = SharedRuntime::get_poll_stub(pc); ++ } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { ++ // BugId 4454115: A read from a MappedByteBuffer can fault ++ // here if the underlying file has been truncated. ++ // Do not crash the VM in such a case. ++ CodeBlob* cb = CodeCache::find_blob_unsafe(pc); ++ CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("cb = %lx, nm = %lx\n", cb, nm); ++#endif ++ bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc)); ++ if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { ++ address next_pc = pc + NativeInstruction::nop_instruction_size; ++ if (is_unsafe_arraycopy) { ++ next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); ++ } ++ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); ++ } ++ } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) { ++ // HACK: si_code does not work on linux 2.2.12-20!!! ++ int op = pc[0] & 0x3f; ++ int op1 = pc[3] & 0x3f; ++ //FIXME, Must port to LA code!! ++ switch (op) { ++ case 0x1e: //ddiv ++ case 0x1f: //ddivu ++ case 0x1a: //div ++ case 0x1b: //divu ++ case 0x34: //trap ++ // In LA, div_by_zero exception can only be triggered by explicit 'trap'. ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, ++ pc, ++ SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO); ++ break; ++ default: ++ // TODO: handle more cases if we are using other x86 instructions ++ // that can generate SIGFPE signal on linux. ++ tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1); ++ //fatal("please update this code."); ++ } ++ } else if (sig == SIGSEGV && ++ MacroAssembler::uses_implicit_null_check(info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("continuation for implicit exception\n"); ++#endif ++ // Determination of interpreter/vtable stub/compiled code null exception ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("continuation_for_implicit_exception stub: %lx", stub); ++#endif ++ } else if (sig == SIGILL && nativeInstruction_at(pc)->is_stop()) { ++ // Pull a pointer to the error message out of the instruction ++ // stream. ++ const uint64_t *detail_msg_ptr ++ = (uint64_t*)(pc + 4/*NativeInstruction::instruction_size*/); ++ const char *detail_msg = (const char *)*detail_msg_ptr; ++ const char *msg = "stop"; ++ if (TraceTraps) { ++ tty->print_cr("trap: %s: (SIGILL)", msg); ++ } ++ ++ // End life with a fatal error, message and detail message and the context. ++ // Note: no need to do any post-processing here (e.g. signal chaining) ++ va_list va_dummy; ++ VMError::report_and_die(thread, uc, nullptr, 0, msg, detail_msg, va_dummy); ++ va_end(va_dummy); ++ ++ ShouldNotReachHere(); ++ } ++ } else if ((thread->thread_state() == _thread_in_vm || ++ thread->thread_state() == _thread_in_native) && ++ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ ++ thread->doing_unsafe_access()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("SIGBUS in vm thread \n"); ++#endif ++ address next_pc = pc + NativeInstruction::nop_instruction_size; ++ if (UnsafeCopyMemory::contains_pc(pc)) { ++ next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); ++ } ++ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); ++ } ++ ++ // jni_fast_GetField can trap at certain pc's if a GC kicks in ++ // and the heap gets shrunk before the field access. ++ if ((sig == SIGSEGV) || (sig == SIGBUS)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("jni fast get trap: "); ++#endif ++ address addr = JNI_FastGetField::find_slowcase_pc(pc); ++ if (addr != (address)-1) { ++ stub = addr; ++ } ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("addr = %d, stub = %lx", addr, stub); ++#endif ++ } ++ } ++ ++ if (stub != NULL) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("resolved stub=%lx\n",stub); ++#endif ++ // save all thread context in case we need to restore it ++ if (thread != NULL) thread->set_saved_exception_pc(pc); ++ ++ os::Posix::ucontext_set_pc(uc, stub); ++ return true; ++ } ++ ++ return false; ++} ++ ++void os::Linux::init_thread_fpu_state(void) { ++} ++ ++int os::Linux::get_fpu_control_word(void) { ++ return 0; // mute compiler ++} ++ ++void os::Linux::set_fpu_control_word(int fpu_control) { ++} ++ ++bool os::is_allocatable(size_t bytes) { ++ ++ if (bytes < 2 * G) { ++ return true; ++ } ++ ++ char* addr = reserve_memory(bytes); ++ ++ if (addr != NULL) { ++ release_memory(addr, bytes); ++ } ++ ++ return addr != NULL; ++} ++ ++//////////////////////////////////////////////////////////////////////////////// ++// thread stack ++ ++// Minimum usable stack sizes required to get to user code. Space for ++// HotSpot guard pages is added later. ++size_t os::Posix::_compiler_thread_min_stack_allowed = 48 * K; ++size_t os::Posix::_java_thread_min_stack_allowed = 40 * K; ++size_t os::Posix::_vm_internal_thread_min_stack_allowed = 64 * K; ++ ++// Return default stack size for thr_type ++size_t os::Posix::default_stack_size(os::ThreadType thr_type) { ++ // Default stack size (compiler thread needs larger stack) ++ size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K); ++ return s; ++} ++ ++///////////////////////////////////////////////////////////////////////////// ++// helper functions for fatal error handler ++void os::print_register_info(outputStream *st, const void *context) { ++ if (context == NULL) return; ++ ++ ucontext_t *uc = (ucontext_t*)context; ++ ++ st->print_cr("Register to memory mapping:"); ++ st->cr(); ++ // this is horrendously verbose but the layout of the registers in the ++ // // context does not match how we defined our abstract Register set, so ++ // // we can't just iterate through the gregs area ++ // ++ // // this is only for the "general purpose" registers ++ st->print("ZERO=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[0]); ++ st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[1]); ++ st->print("TP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[2]); ++ st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[3]); ++ st->cr(); ++ st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[4]); ++ st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[5]); ++ st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[6]); ++ st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[7]); ++ st->cr(); ++ st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[8]); ++ st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[9]); ++ st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[10]); ++ st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[11]); ++ st->cr(); ++ st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[12]); ++ st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[13]); ++ st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[14]); ++ st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[15]); ++ st->cr(); ++ st->print("T4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[16]); ++ st->print("T5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[17]); ++ st->print("T6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[18]); ++ st->print("T7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[19]); ++ st->cr(); ++ st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[20]); ++ st->print("RX=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[21]); ++ st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[22]); ++ st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[23]); ++ st->cr(); ++ st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[24]); ++ st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[25]); ++ st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[26]); ++ st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[27]); ++ st->cr(); ++ st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[28]); ++ st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[29]); ++ st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[30]); ++ st->print("S8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[31]); ++ st->cr(); ++ ++} ++ ++void os::print_context(outputStream *st, const void *context) { ++ if (context == NULL) return; ++ ++ const ucontext_t *uc = (const ucontext_t*)context; ++ ++ st->print_cr("Registers:"); ++ st->print( "ZERO=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[0]); ++ st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[1]); ++ st->print(", TP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[2]); ++ st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[3]); ++ st->cr(); ++ st->print( "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[4]); ++ st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[5]); ++ st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[6]); ++ st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[7]); ++ st->cr(); ++ st->print( "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[8]); ++ st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[9]); ++ st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[10]); ++ st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[11]); ++ st->cr(); ++ st->print( "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[12]); ++ st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[13]); ++ st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[14]); ++ st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[15]); ++ st->cr(); ++ st->print( "T4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[16]); ++ st->print(", T5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[17]); ++ st->print(", T6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[18]); ++ st->print(", T7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[19]); ++ st->cr(); ++ st->print( "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[20]); ++ st->print(", RX=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[21]); ++ st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[22]); ++ st->print(", S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[23]); ++ st->cr(); ++ st->print( "S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[24]); ++ st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[25]); ++ st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[26]); ++ st->print(", S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[27]); ++ st->cr(); ++ st->print( "S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[28]); ++ st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[29]); ++ st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[30]); ++ st->print(", S8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[31]); ++ st->cr(); ++ st->cr(); ++} ++ ++void os::print_tos_pc(outputStream *st, const void *context) { ++ if (context == NULL) return; ++ ++ const ucontext_t* uc = (const ucontext_t*)context; ++ ++ address sp = (address)os::Linux::ucontext_get_sp(uc); ++ print_tos(st, sp); ++ st->cr(); ++ ++ // Note: it may be unsafe to inspect memory near pc. For example, pc may ++ // point to garbage if entry point in an nmethod is corrupted. Leave ++ // this at the end, and hope for the best. ++ address pc = os::fetch_frame_from_context(uc).pc(); ++ print_instructions(st, pc); ++ st->cr(); ++} ++ ++void os::setup_fpu() { ++ // no use for LA ++} ++ ++#ifndef PRODUCT ++void os::verify_stack_alignment() { ++ assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); ++} ++#endif ++ ++int os::extra_bang_size_in_bytes() { ++ // LA does not require the additional stack bang. ++ return 0; ++} ++ ++bool os::is_ActiveCoresMP() { ++ return UseActiveCoresMP && _initial_active_processor_count == 1; ++} +diff --git a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp +new file mode 100644 +index 00000000000..fa02f8ba2f9 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp +@@ -0,0 +1,38 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP ++ ++ static void setup_fpu(); ++ static bool is_allocatable(size_t bytes); ++ ++ // Used to register dynamic code cache area with the OS ++ // Note: Currently only used in 64 bit Windows implementations ++ static bool register_code_area(char *low, char *high) { return true; } ++ ++ static bool is_ActiveCoresMP(); ++ ++#endif // OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp +new file mode 100644 +index 00000000000..cf3a596387c +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp +@@ -0,0 +1,56 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP ++#define OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP ++ ++ ++inline void Prefetch::read (void *loc, intx interval) { ++// According to previous and present SPECjbb2015 score, ++// comment prefetch is better than if (interval >= 0) prefetch branch. ++// So choose comment prefetch as the base line. ++#if 0 ++ __asm__ __volatile__ ( ++ " preld 0, %[__loc] \n" ++ : ++ : [__loc] "m"( *((address)loc + interval) ) ++ : "memory" ++ ); ++#endif ++} ++ ++inline void Prefetch::write(void *loc, intx interval) { ++// Ditto ++#if 0 ++ __asm__ __volatile__ ( ++ " preld 8, %[__loc] \n" ++ : ++ : [__loc] "m"( *((address)loc + interval) ) ++ : "memory" ++ ); ++#endif ++} ++ ++#endif // OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/safefetch_linux_loongarch64.S b/src/hotspot/os_cpu/linux_loongarch/safefetch_linux_loongarch64.S +new file mode 100644 +index 00000000000..fdc6da358e5 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/safefetch_linux_loongarch64.S +@@ -0,0 +1,56 @@ ++/* ++ * Copyright (c) 2022 SAP SE. All rights reserved. ++ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++ .globl SafeFetchN_impl ++ .globl _SafeFetchN_fault ++ .globl _SafeFetchN_continuation ++ .globl SafeFetch32_impl ++ .globl _SafeFetch32_fault ++ .globl _SafeFetch32_continuation ++ ++ # Support for int SafeFetch32(int* address, int defaultval); ++ # ++ # a0 : address ++ # a1 : defaultval ++SafeFetch32_impl: ++_SafeFetch32_fault: ++ ld.w $r4, $r4, 0 ++ jr $r1 ++_SafeFetch32_continuation: ++ or $r4, $r5, $r0 ++ jr $r1 ++ ++ # Support for intptr_t SafeFetchN(intptr_t* address, intptr_t defaultval); ++ # ++ # a0 : address ++ # a1 : defaultval ++SafeFetchN_impl: ++_SafeFetchN_fault: ++ ld.d $r4, $r4, 0 ++ jr $r1 ++_SafeFetchN_continuation: ++ or $r4, $r5, $r0 ++ jr $r1 +diff --git a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp +new file mode 100644 +index 00000000000..9204302bca8 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp +@@ -0,0 +1,105 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "compiler/compileBroker.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++ ++void JavaThread::pd_initialize() ++{ ++ _anchor.clear(); ++} ++ ++frame JavaThread::pd_last_frame() { ++ assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); ++ if (_anchor.last_Java_pc() != NULL) { ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); ++ } else { ++ // This will pick up pc from sp ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp()); ++ } ++} ++ ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is ++// currently interrupted by SIGPROF ++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, ++ void* ucontext, bool isInJava) { ++ ++ assert(Thread::current() == this, "caller must be current thread"); ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { ++ // If we have a last_Java_frame, then we should use it even if ++ // isInJava == true. It should be more reliable than ucontext info. ++ if (has_last_Java_frame() && frame_anchor()->walkable()) { ++ *fr_addr = pd_last_frame(); ++ return true; ++ } ++ ++ // At this point, we don't have a last_Java_frame, so ++ // we try to glean some information out of the ucontext ++ // if we were running Java code when SIGPROF came in. ++ if (isInJava) { ++ ucontext_t* uc = (ucontext_t*) ucontext; ++ ++ intptr_t* ret_fp; ++ intptr_t* ret_sp; ++ address addr = os::fetch_frame_from_context(uc, &ret_sp, &ret_fp); ++ if (addr == NULL || ret_sp == NULL) { ++ // ucontext wasn't useful ++ return false; ++ } ++ ++ frame ret_frame(ret_sp, ret_fp, addr); ++ if (!ret_frame.safe_for_sender(this)) { ++#ifdef COMPILER2 ++ // C2 and JVMCI use ebp as a general register see if NULL fp helps ++ frame ret_frame2(ret_sp, NULL, addr); ++ if (!ret_frame2.safe_for_sender(this)) { ++ // nothing else to try if the frame isn't good ++ return false; ++ } ++ ret_frame = ret_frame2; ++#else ++ // nothing else to try if the frame isn't good ++ return false; ++#endif // COMPILER2_OR_JVMCI ++ } ++ *fr_addr = ret_frame; ++ return true; ++ } ++ ++ // nothing else to try ++ return false; ++} ++ ++void JavaThread::cache_global_variables() { } +diff --git a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp +new file mode 100644 +index 00000000000..82fc6fb659f +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp +@@ -0,0 +1,66 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP ++ ++ private: ++ void pd_initialize(); ++ ++ frame pd_last_frame(); ++ ++ public: ++ // Mutators are highly dangerous.... ++ intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } ++ void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } ++ ++ void set_base_of_stack_pointer(intptr_t* base_sp) { ++ } ++ ++ static ByteSize last_Java_fp_offset() { ++ return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); ++ } ++ ++ intptr_t* base_of_stack_pointer() { ++ return NULL; ++ } ++ void record_base_of_stack_pointer() { ++ } ++ ++ bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, ++ bool isInJava); ++ ++ bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); ++private: ++ bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); ++public: ++ ++ // These routines are only used on cpu architectures that ++ // have separate register stacks (Itanium). ++ static bool register_stack_overflow() { return false; } ++ static void enable_register_stack_guard() {} ++ static void disable_register_stack_guard() {} ++ ++#endif // OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp +new file mode 100644 +index 00000000000..a39cb79bb1e +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP ++ ++// These are the OS and CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ \ ++ /******************************/ \ ++ /* Threads (NOTE: incomplete) */ \ ++ /******************************/ \ ++ nonstatic_field(OSThread, _thread_id, pid_t) \ ++ nonstatic_field(OSThread, _pthread_id, pthread_t) ++ ++ ++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ \ ++ /**********************/ \ ++ /* Posix Thread IDs */ \ ++ /**********************/ \ ++ \ ++ declare_integer_type(pid_t) \ ++ declare_unsigned_integer_type(pthread_t) ++ ++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#endif // OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp +new file mode 100644 +index 00000000000..3711a7036a1 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/vm_version_linux_loongarch.cpp +@@ -0,0 +1,95 @@ ++/* ++ * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/register.hpp" ++#include "runtime/os.hpp" ++#include "runtime/os.inline.hpp" ++#include "runtime/vm_version.hpp" ++ ++#include ++#include ++ ++#ifndef HWCAP_LOONGARCH_LAM ++#define HWCAP_LOONGARCH_LAM (1 << 1) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_UAL ++#define HWCAP_LOONGARCH_UAL (1 << 2) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_LSX ++#define HWCAP_LOONGARCH_LSX (1 << 4) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_LASX ++#define HWCAP_LOONGARCH_LASX (1 << 5) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_COMPLEX ++#define HWCAP_LOONGARCH_COMPLEX (1 << 7) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_CRYPTO ++#define HWCAP_LOONGARCH_CRYPTO (1 << 8) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_LBT_X86 ++#define HWCAP_LOONGARCH_LBT_X86 (1 << 10) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_LBT_ARM ++#define HWCAP_LOONGARCH_LBT_ARM (1 << 11) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_LBT_MIPS ++#define HWCAP_LOONGARCH_LBT_MIPS (1 << 12) ++#endif ++ ++void VM_Version::get_os_cpu_info() { ++ ++ uint64_t auxv = getauxval(AT_HWCAP); ++ ++ static_assert(CPU_LAM == HWCAP_LOONGARCH_LAM, "Flag CPU_LAM must follow Linux HWCAP"); ++ static_assert(CPU_UAL == HWCAP_LOONGARCH_UAL, "Flag CPU_UAL must follow Linux HWCAP"); ++ static_assert(CPU_LSX == HWCAP_LOONGARCH_LSX, "Flag CPU_LSX must follow Linux HWCAP"); ++ static_assert(CPU_LASX == HWCAP_LOONGARCH_LASX, "Flag CPU_LASX must follow Linux HWCAP"); ++ static_assert(CPU_COMPLEX == HWCAP_LOONGARCH_COMPLEX, "Flag CPU_COMPLEX must follow Linux HWCAP"); ++ static_assert(CPU_CRYPTO == HWCAP_LOONGARCH_CRYPTO, "Flag CPU_CRYPTO must follow Linux HWCAP"); ++ static_assert(CPU_LBT_X86 == HWCAP_LOONGARCH_LBT_X86, "Flag CPU_LBT_X86 must follow Linux HWCAP"); ++ static_assert(CPU_LBT_ARM == HWCAP_LOONGARCH_LBT_ARM, "Flag CPU_LBT_ARM must follow Linux HWCAP"); ++ static_assert(CPU_LBT_MIPS == HWCAP_LOONGARCH_LBT_MIPS, "Flag CPU_LBT_MIPS must follow Linux HWCAP"); ++ ++ _features = auxv & ( ++ HWCAP_LOONGARCH_LAM | ++ HWCAP_LOONGARCH_UAL | ++ HWCAP_LOONGARCH_LSX | ++ HWCAP_LOONGARCH_LASX | ++ HWCAP_LOONGARCH_COMPLEX | ++ HWCAP_LOONGARCH_CRYPTO | ++ HWCAP_LOONGARCH_LBT_X86 | ++ HWCAP_LOONGARCH_LBT_ARM | ++ HWCAP_LOONGARCH_LBT_MIPS); ++} +diff --git a/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp +new file mode 100644 +index 00000000000..30719a0340b +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp +@@ -0,0 +1,24 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ +diff --git a/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp +new file mode 100644 +index 00000000000..c82e3ce1ecf +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp +@@ -0,0 +1,194 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP ++ ++#include "runtime/vm_version.hpp" ++ ++// Implementation of class atomic ++ ++template ++struct Atomic::PlatformAdd { ++ template ++ D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { ++ //Unimplemented(); ++ return __sync_add_and_fetch(dest, add_value); ++ } ++ ++ template ++ D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const { ++ return add_and_fetch(dest, add_value, order) - add_value; ++ } ++}; ++ ++template<> ++template ++inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest, ++ T exchange_value, ++ atomic_memory_order order) const { ++ T __ret, __tmp; ++ ++ STATIC_ASSERT(4 == sizeof(T)); ++ __asm__ __volatile__ ( ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1: sync\n\t" ++ " ll %[__ret], %[__dest] \n\t" ++ " move %[__tmp], %[__val] \n\t" ++ " sc %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ " nop \n\t" ++ ++ " .set pop\n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __ret; ++} ++ ++template<> ++template ++inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest, ++ T exchange_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(8 == sizeof(T)); ++ T __ret; ++ jlong __tmp; ++ __asm__ __volatile__ ( ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1: sync\n\t" ++ " lld %[__ret], %[__dest] \n\t" ++ " move %[__tmp], %[__val] \n\t" ++ " scd %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ " nop \n\t" ++ ++ " .set pop\n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "m" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value) ++ : "memory" ++ ); ++ return __ret; ++} ++ ++#if 0 ++template<> ++template ++inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest, ++ T compare_value, ++ T exchange_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(1 == sizeof(T)); ++} ++ ++#else ++// No direct support for cmpxchg of bytes; emulate using int. ++template<> ++struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {}; ++#endif ++ ++template<> ++template ++inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, ++ T compare_value, ++ T exchange_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(4 == sizeof(T)); ++ T __prev; ++ jint __cmp; ++ ++ __asm__ __volatile__ ( ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1:sync \n\t" ++ " ll %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " sc %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ " nop \n\t" ++ "2: \n\t" ++ " sync \n\t" ++ ++ " .set pop\n\t" ++ ++ : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) ++ : [__dest] "m" (*(volatile jint*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __prev; ++} ++ ++template<> ++template ++inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, ++ T compare_value, ++ T exchange_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(8 == sizeof(T)); ++ T __prev; ++ jlong __cmp; ++ ++ __asm__ __volatile__ ( ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1:sync \n\t" ++ " lld %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " scd %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ " nop \n\t" ++ "2: \n\t" ++ " sync \n\t" ++ ++ " .set pop\n\t" ++ ++ : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) ++ : [__dest] "m" (*(volatile jlong*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : "memory" ++ ); ++ return __prev; ++} ++ ++ ++#endif // OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp +new file mode 100644 +index 00000000000..5b5cd10aa55 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP ++#define OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP ++ ++#include ++ ++// Efficient swapping of data bytes from Java byte ++// ordering to native byte ordering and vice versa. ++inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); } ++inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); } ++inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); } ++ ++#endif // OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp +new file mode 100644 +index 00000000000..3fd6ef7b36c +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp +@@ -0,0 +1,125 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP ++#define OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP ++ ++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ (void)memmove(to, from, count * HeapWordSize); ++} ++ ++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ (void)memcpy(to, from, count * HeapWordSize); ++ break; ++ } ++} ++ ++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ while (count-- > 0) { ++ *to++ = *from++; ++ } ++ break; ++ } ++} ++ ++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_words(from, to, count); ++} ++ ++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_disjoint_words(from, to, count); ++} ++ ++static void pd_conjoint_bytes(const void* from, void* to, size_t count) { ++ (void)memmove(to, from, count); ++} ++ ++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { ++ pd_conjoint_bytes(from, to, count); ++} ++ ++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { ++ //assert(!UseCompressedOops, "foo!"); ++ assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size"); ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_bytes_atomic(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count); ++} ++ ++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jints_atomic((jint*)from, (jint*)to, count); ++} ++ ++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count); ++} ++ ++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { ++ //assert(!UseCompressedOops, "foo!"); ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); ++ pd_conjoint_oops_atomic((oop*)from, (oop*)to, count); ++} ++ ++#endif // OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp +new file mode 100644 +index 00000000000..f1599ac5f17 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++define_pd_global(bool, DontYieldALot, false); ++#ifdef MIPS64 ++define_pd_global(intx, ThreadStackSize, 1024); // 0 => use system default ++define_pd_global(intx, VMThreadStackSize, 1024); ++#else ++// ThreadStackSize 320 allows a couple of test cases to run while ++// keeping the number of threads that can be created high. System ++// default ThreadStackSize appears to be 512 which is too big. ++define_pd_global(intx, ThreadStackSize, 320); ++define_pd_global(intx, VMThreadStackSize, 512); ++#endif // MIPS64 ++ ++define_pd_global(intx, CompilerThreadStackSize, 0); ++ ++define_pd_global(uintx,JVMInvokeMethodSlack, 8192); ++ ++// Used on 64 bit platforms for UseCompressedOops base address ++define_pd_global(uintx,HeapBaseMinAddress, 2*G); ++ ++#endif // OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/linux_mips.s b/src/hotspot/os_cpu/linux_mips/linux_mips.s +new file mode 100644 +index 00000000000..36c8d810c3c +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/linux_mips.s +@@ -0,0 +1,25 @@ ++# ++# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++# ++# This code is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License version 2 only, as ++# published by the Free Software Foundation. ++# ++# This code is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++# version 2 for more details (a copy is included in the LICENSE file that ++# accompanied this code). ++# ++# You should have received a copy of the GNU General Public License version ++# 2 along with this work; if not, write to the Free Software Foundation, ++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++# ++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++# or visit www.oracle.com if you need additional information or have any ++# questions. ++# ++ ++ +diff --git a/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp +new file mode 100644 +index 00000000000..a92bf43bdbb +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp +@@ -0,0 +1,52 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP ++ ++#include "runtime/os.hpp" ++ ++// Included in orderAccess.hpp header file. ++ ++// Implementation of class OrderAccess. ++#define inlasm_sync() if (os::is_ActiveCoresMP()) \ ++ __asm__ __volatile__ ("nop" : : : "memory"); \ ++ else \ ++ __asm__ __volatile__ ("sync" : : : "memory"); ++#define inlasm_synci() __asm__ __volatile__ ("synci 0($0)" : : : "memory"); ++ ++inline void OrderAccess::loadload() { inlasm_sync(); } ++inline void OrderAccess::storestore() { inlasm_sync(); } ++inline void OrderAccess::loadstore() { inlasm_sync(); } ++inline void OrderAccess::storeload() { inlasm_sync(); } ++ ++inline void OrderAccess::acquire() { inlasm_sync(); } ++inline void OrderAccess::release() { inlasm_sync(); } ++inline void OrderAccess::fence() { inlasm_sync(); } ++inline void OrderAccess::cross_modify_fence_impl() { inlasm_synci(); } ++ ++#undef inlasm_sync ++ ++#endif // OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp +new file mode 100644 +index 00000000000..ff1af7beb68 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp +@@ -0,0 +1,817 @@ ++/* ++ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// no precompiled headers ++#include "asm/macroAssembler.hpp" ++#include "classfile/classLoader.hpp" ++#include "classfile/systemDictionary.hpp" ++#include "classfile/vmSymbols.hpp" ++#include "code/icBuffer.hpp" ++#include "code/vtableStubs.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/allocation.inline.hpp" ++#include "os_share_linux.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/java.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/osThread.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/timer.hpp" ++#include "signals_posix.hpp" ++#include "utilities/events.hpp" ++#include "utilities/vmError.hpp" ++#include "compiler/disassembler.hpp" ++ ++// put OS-includes here ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++ ++#define REG_SP 29 ++#define REG_FP 30 ++ ++address os::current_stack_pointer() { ++ register void *sp __asm__ ("$29"); ++ return (address) sp; ++} ++ ++char* os::non_memory_address_word() { ++ // Must never look like an address returned by reserve_memory, ++ // even in its subfields (as defined by the CPU immediate fields, ++ // if the CPU splits constants across multiple instructions). ++ ++ return (char*) -1; ++} ++ ++address os::Posix::ucontext_get_pc(const ucontext_t * uc) { ++ return (address)uc->uc_mcontext.pc; ++} ++ ++void os::Posix::ucontext_set_pc(ucontext_t * uc, address pc) { ++ uc->uc_mcontext.pc = (intptr_t)pc; ++} ++ ++intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.gregs[REG_SP]; ++} ++ ++intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.gregs[REG_FP]; ++} ++ ++address os::fetch_frame_from_context(const void* ucVoid, ++ intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ address epc; ++ ucontext_t* uc = (ucontext_t*)ucVoid; ++ ++ if (uc != NULL) { ++ epc = os::Posix::ucontext_get_pc(uc); ++ if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc); ++ if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc); ++ } else { ++ epc = NULL; ++ if (ret_sp) *ret_sp = (intptr_t *)NULL; ++ if (ret_fp) *ret_fp = (intptr_t *)NULL; ++ } ++ ++ return epc; ++} ++ ++frame os::fetch_frame_from_context(const void* ucVoid) { ++ intptr_t* sp; ++ intptr_t* fp; ++ address epc = fetch_frame_from_context(ucVoid, &sp, &fp); ++ return frame(sp, fp, epc); ++} ++ ++frame os::fetch_compiled_frame_from_context(const void* ucVoid) { ++ const ucontext_t* uc = (const ucontext_t*)ucVoid; ++ // In compiled code, the stack banging is performed before RA ++ // has been saved in the frame. RA is live, and SP and FP ++ // belong to the caller. ++ intptr_t* fp = os::Linux::ucontext_get_fp(uc); ++ intptr_t* sp = os::Linux::ucontext_get_sp(uc); ++ address pc = (address)(uc->uc_mcontext.gregs[31]); ++ return frame(sp, fp, pc); ++} ++ ++// By default, gcc always save frame pointer (%ebp/%rbp) on stack. It may get ++// turned off by -fomit-frame-pointer, ++frame os::get_sender_for_C_frame(frame* fr) { ++ return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); ++} ++ ++//intptr_t* _get_previous_fp() { ++intptr_t* __attribute__((noinline)) os::get_previous_fp() { ++ int *pc; ++ intptr_t sp; ++ int *pc_limit = (int*)(void*)&os::get_previous_fp; ++ int insn; ++ ++ { ++ l_pc:; ++ pc = (int*)&&l_pc; ++ __asm__ __volatile__ ("move %0, $sp" : "=r" (sp)); ++ } ++ ++ do { ++ insn = *pc; ++ switch(bitfield(insn, 16, 16)) { ++ case 0x27bd: /* addiu $sp,$sp,-i */ ++ case 0x67bd: /* daddiu $sp,$sp,-i */ ++ assert ((short)bitfield(insn, 0, 16)<0, "bad frame"); ++ sp -= (short)bitfield(insn, 0, 16); ++ return (intptr_t*)sp; ++ } ++ --pc; ++ } while (pc>=pc_limit); // The initial value of pc may be equal to pc_limit, because of GCC optimization. ++ ++ ShouldNotReachHere(); ++ return NULL; // mute compiler ++} ++ ++ ++frame os::current_frame() { ++ intptr_t* fp = (intptr_t*)get_previous_fp(); ++ frame myframe((intptr_t*)os::current_stack_pointer(), ++ (intptr_t*)fp, ++ CAST_FROM_FN_PTR(address, os::current_frame)); ++ if (os::is_first_C_frame(&myframe)) { ++ // stack is not walkable ++ return frame(); ++ } else { ++ return os::get_sender_for_C_frame(&myframe); ++ } ++} ++ ++//x86 add 2 new assemble function here! ++bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, ++ ucontext_t* uc, JavaThread* thread) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx", ++ info->si_signo, ++ info->si_code, ++ info->si_errno, ++ info->si_addr); ++#endif ++ ++ // decide if this trap can be handled by a stub ++ address stub = NULL; ++ address pc = NULL; ++ ++ pc = (address) os::Posix::ucontext_get_pc(uc); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("pc=%lx", pc); ++ os::print_context(tty, uc); ++#endif ++ //%note os_trap_1 ++ if (info != NULL && uc != NULL && thread != NULL) { ++ pc = (address) os::Posix::ucontext_get_pc(uc); ++ ++ // Handle ALL stack overflow variations here ++ if (sig == SIGSEGV) { ++ address addr = (address) info->si_addr; ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("handle all stack overflow variations: "); ++ /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n", ++ addr, ++ thread->stack_base(), ++ thread->stack_base() - thread->stack_size()); ++ */ ++#endif ++ ++ // check if fault address is within thread stack ++ if (thread->is_in_full_stack(addr)) { ++ // stack overflow ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("stack exception check \n"); ++#endif ++ if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) { ++ return true; // continue ++ } ++ } //addr < ++ } //sig == SIGSEGV ++ ++ if (thread->thread_state() == _thread_in_Java) { ++ // Java thread running in Java code => find exception handler if any ++ // a fault inside compiled code, the interpreter, or a stub ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("java thread running in java code\n"); ++#endif ++ ++ // Handle signal from NativeJump::patch_verified_entry(). ++ if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig); ++#endif ++ stub = SharedRuntime::get_handle_wrong_method_stub(); ++ } else if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig); ++#endif ++ stub = SharedRuntime::get_poll_stub(pc); ++ } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { ++ // BugId 4454115: A read from a MappedByteBuffer can fault ++ // here if the underlying file has been truncated. ++ // Do not crash the VM in such a case. ++ CodeBlob* cb = CodeCache::find_blob_unsafe(pc); ++ CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("cb = %lx, nm = %lx\n", cb, nm); ++#endif ++ bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc)); ++ if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { ++ address next_pc = pc + NativeInstruction::nop_instruction_size; ++ if (is_unsafe_arraycopy) { ++ next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); ++ } ++ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); ++ } ++ } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) { ++ // HACK: si_code does not work on linux 2.2.12-20!!! ++ int op = pc[0] & 0x3f; ++ int op1 = pc[3] & 0x3f; ++ //FIXME, Must port to mips code!! ++ switch (op) { ++ case 0x1e: //ddiv ++ case 0x1f: //ddivu ++ case 0x1a: //div ++ case 0x1b: //divu ++ case 0x34: //trap ++ /* In MIPS, div_by_zero exception can only be triggered by explicit 'trap'. ++ * Ref: [c1_LIRAssembler_mips.cpp] arithmetic_idiv() ++ */ ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, ++ pc, ++ SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO); ++ break; ++ default: ++ // TODO: handle more cases if we are using other x86 instructions ++ // that can generate SIGFPE signal on linux. ++ tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1); ++ //fatal("please update this code."); ++ } ++ } else if (sig == SIGSEGV && ++ MacroAssembler::uses_implicit_null_check(info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("continuation for implicit exception\n"); ++#endif ++ // Determination of interpreter/vtable stub/compiled code null exception ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("continuation_for_implicit_exception stub: %lx", stub); ++#endif ++ } else if (/*thread->thread_state() == _thread_in_Java && */sig == SIGILL) { ++ //Since kernel does not have emulation of PS instructions yet, the emulation must be handled here. ++ //The method is to trigger kernel emulation of float emulation. ++ int inst = *(int*)pc; ++ int ops = (inst >> 26) & 0x3f; ++ int ops_fmt = (inst >> 21) & 0x1f; ++ int op = inst & 0x3f; ++ if (ops == Assembler::cop1_op && ops_fmt == Assembler::ps_fmt) { ++ int ft, fs, fd; ++ ft = (inst >> 16) & 0x1f; ++ fs = (inst >> 11) & 0x1f; ++ fd = (inst >> 6) & 0x1f; ++ float ft_upper, ft_lower, fs_upper, fs_lower, fd_upper, fd_lower; ++ double ft_value, fs_value, fd_value; ++ ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft]; ++ fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs]; ++ __asm__ __volatile__ ( ++ "cvt.s.pl %0, %4\n\t" ++ "cvt.s.pu %1, %4\n\t" ++ "cvt.s.pl %2, %5\n\t" ++ "cvt.s.pu %3, %5\n\t" ++ : "=f" (fs_lower), "=f" (fs_upper), "=f" (ft_lower), "=f" (ft_upper) ++ : "f" (fs_value), "f" (ft_value) ++ ); ++ ++ switch (op) { ++ case Assembler::fadd_op: ++ __asm__ __volatile__ ( ++ "add.s %1, %3, %5\n\t" ++ "add.s %2, %4, %6\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) ++ : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ case Assembler::fsub_op: ++ //fd = fs - ft ++ __asm__ __volatile__ ( ++ "sub.s %1, %3, %5\n\t" ++ "sub.s %2, %4, %6\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) ++ : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ case Assembler::fmul_op: ++ __asm__ __volatile__ ( ++ "mul.s %1, %3, %5\n\t" ++ "mul.s %2, %4, %6\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) ++ : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ default: ++ tty->print_cr("unknown cop1 opcode 0x%x with SIGILL.", op); ++ } ++ } else if (ops == Assembler::cop1x_op /*&& op == Assembler::nmadd_ps_op*/) { ++ // madd.ps is not used, the code below were not tested ++ int fr, ft, fs, fd; ++ float fr_upper, fr_lower, fs_upper, fs_lower, ft_upper, ft_lower, fd_upper, fd_lower; ++ double fr_value, ft_value, fs_value, fd_value; ++ switch (op) { ++ case Assembler::madd_ps_op: ++ // fd = (fs * ft) + fr ++ fr = (inst >> 21) & 0x1f; ++ ft = (inst >> 16) & 0x1f; ++ fs = (inst >> 11) & 0x1f; ++ fd = (inst >> 6) & 0x1f; ++ fr_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fr]; ++ ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft]; ++ fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs]; ++ __asm__ __volatile__ ( ++ "cvt.s.pu %3, %9\n\t" ++ "cvt.s.pl %4, %9\n\t" ++ "cvt.s.pu %5, %10\n\t" ++ "cvt.s.pl %6, %10\n\t" ++ "cvt.s.pu %7, %11\n\t" ++ "cvt.s.pl %8, %11\n\t" ++ "madd.s %1, %3, %5, %7\n\t" ++ "madd.s %2, %4, %6, %8\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower), "=f" (fr_upper), "=f" (fr_lower), "=f" (fs_upper), "=f" (fs_lower), "=f" (ft_upper), "=f" (ft_lower) ++ : "f" (fr_value)/*9*/, "f" (fs_value)/*10*/, "f" (ft_value)/*11*/ ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ default: ++ tty->print_cr("unknown cop1x opcode 0x%x with SIGILL.", op); ++ } ++ } ++ } //SIGILL ++ } else if (sig == SIGILL && VM_Version::is_determine_features_test_running()) { ++ // thread->thread_state() != _thread_in_Java ++ // SIGILL must be caused by VM_Version::determine_features(). ++ VM_Version::set_supports_cpucfg(false); ++ stub = pc + 4; // continue with next instruction. ++ } else if ((thread->thread_state() == _thread_in_vm || ++ thread->thread_state() == _thread_in_native) && ++ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ ++ thread->doing_unsafe_access()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("SIGBUS in vm thread \n"); ++#endif ++ address next_pc = pc + NativeInstruction::nop_instruction_size; ++ if (UnsafeCopyMemory::contains_pc(pc)) { ++ next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); ++ } ++ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); ++ } ++ ++ // jni_fast_GetField can trap at certain pc's if a GC kicks in ++ // and the heap gets shrunk before the field access. ++ if ((sig == SIGSEGV) || (sig == SIGBUS)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("jni fast get trap: "); ++#endif ++ address addr = JNI_FastGetField::find_slowcase_pc(pc); ++ if (addr != (address)-1) { ++ stub = addr; ++ } ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("addr = %d, stub = %lx", addr, stub); ++#endif ++ } ++ } ++ ++ // Execution protection violation ++ // ++ // This should be kept as the last step in the triage. We don't ++ // have a dedicated trap number for a no-execute fault, so be ++ // conservative and allow other handlers the first shot. ++ // ++ // Note: We don't test that info->si_code == SEGV_ACCERR here. ++ // this si_code is so generic that it is almost meaningless; and ++ // the si_code for this condition may change in the future. ++ // Furthermore, a false-positive should be harmless. ++ if (UnguardOnExecutionViolation > 0 && ++ //(sig == SIGSEGV || sig == SIGBUS) && ++ //uc->uc_mcontext.gregs[REG_TRAPNO] == trap_page_fault) { ++ (sig == SIGSEGV || sig == SIGBUS ++#ifdef OPT_RANGECHECK ++ || sig == SIGSYS ++#endif ++ ) && ++ //(uc->uc_mcontext.cause == 2 || uc->uc_mcontext.cause == 3)) { ++ (uc->uc_mcontext.hi1 == 2 || uc->uc_mcontext.hi1 == 3)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("execution protection violation\n"); ++#endif ++ ++ int page_size = os::vm_page_size(); ++ address addr = (address) info->si_addr; ++ address pc = os::Posix::ucontext_get_pc(uc); ++ // Make sure the pc and the faulting address are sane. ++ // ++ // If an instruction spans a page boundary, and the page containing ++ // the beginning of the instruction is executable but the following ++ // page is not, the pc and the faulting address might be slightly ++ // different - we still want to unguard the 2nd page in this case. ++ // ++ // 15 bytes seems to be a (very) safe value for max instruction size. ++ bool pc_is_near_addr = ++ (pointer_delta((void*) addr, (void*) pc, sizeof(char)) < 15); ++Untested("Unimplemented yet"); ++ bool instr_spans_page_boundary = ++/* ++ (align_size_down((intptr_t) pc ^ (intptr_t) addr, ++ (intptr_t) page_size) > 0); ++*/ ++ (align_down((intptr_t) pc ^ (intptr_t) addr, ++ (intptr_t) page_size) > 0); ++ ++ if (pc == addr || (pc_is_near_addr && instr_spans_page_boundary)) { ++ static volatile address last_addr = ++ (address) os::non_memory_address_word(); ++ ++ // In conservative mode, don't unguard unless the address is in the VM ++ if (addr != last_addr && ++ (UnguardOnExecutionViolation > 1 || os::address_is_in_vm(addr))) { ++ ++ // Set memory to RWX and retry ++Untested("Unimplemented yet"); ++/* ++ address page_start = ++ (address) align_size_down((intptr_t) addr, (intptr_t) page_size); ++*/ ++ address page_start = align_down(addr, page_size); ++ bool res = os::protect_memory((char*) page_start, page_size, ++ os::MEM_PROT_RWX); ++ ++ if (PrintMiscellaneous && Verbose) { ++ char buf[256]; ++ jio_snprintf(buf, sizeof(buf), "Execution protection violation " ++ "at " INTPTR_FORMAT ++ ", unguarding " INTPTR_FORMAT ": %s, errno=%d", addr, ++ page_start, (res ? "success" : "failed"), errno); ++ tty->print_raw_cr(buf); ++ } ++ stub = pc; ++ ++ // Set last_addr so if we fault again at the same address, we don't end ++ // up in an endless loop. ++ // ++ // There are two potential complications here. Two threads trapping at ++ // the same address at the same time could cause one of the threads to ++ // think it already unguarded, and abort the VM. Likely very rare. ++ // ++ // The other race involves two threads alternately trapping at ++ // different addresses and failing to unguard the page, resulting in ++ // an endless loop. This condition is probably even more unlikely than ++ // the first. ++ // ++ // Although both cases could be avoided by using locks or thread local ++ // last_addr, these solutions are unnecessary complication: this ++ // handler is a best-effort safety net, not a complete solution. It is ++ // disabled by default and should only be used as a workaround in case ++ // we missed any no-execute-unsafe VM code. ++ ++ last_addr = addr; ++ } ++ } ++ } ++ ++ if (stub != NULL) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("resolved stub=%lx\n",stub); ++#endif ++ // save all thread context in case we need to restore it ++ if (thread != NULL) thread->set_saved_exception_pc(pc); ++ ++ os::Posix::ucontext_set_pc(uc, stub); ++ return true; ++ } ++ ++ return false; ++} ++ ++// FCSR:...|24| 23 |22|21|... ++// ...|FS|FCC0|FO|FN|... ++void os::Linux::init_thread_fpu_state(void) { ++ if (SetFSFOFN == 999) ++ return; ++ int fs = (SetFSFOFN / 100)? 1:0; ++ int fo = ((SetFSFOFN % 100) / 10)? 1:0; ++ int fn = (SetFSFOFN % 10)? 1:0; ++ int mask = fs << 24 | fo << 22 | fn << 21; ++ ++ int fcsr = get_fpu_control_word(); ++ fcsr = fcsr | mask; ++ set_fpu_control_word(fcsr); ++ /* ++ if (fcsr != get_fpu_control_word()) ++ tty->print_cr(" fail to set to %lx, get_fpu_control_word:%lx", fcsr, get_fpu_control_word()); ++ */ ++} ++ ++int os::Linux::get_fpu_control_word(void) { ++ int fcsr; ++ __asm__ __volatile__ ( ++ ".set noat;" ++ "daddiu %0, $0, 0;" ++ "cfc1 %0, $31;" ++ : "=r" (fcsr) ++ ); ++ return fcsr; ++} ++ ++void os::Linux::set_fpu_control_word(int fpu_control) { ++ __asm__ __volatile__ ( ++ ".set noat;" ++ "ctc1 %0, $31;" ++ : ++ : "r" (fpu_control) ++ ); ++} ++ ++bool os::is_allocatable(size_t bytes) { ++ ++ if (bytes < 2 * G) { ++ return true; ++ } ++ ++ char* addr = reserve_memory(bytes); ++ ++ if (addr != NULL) { ++ release_memory(addr, bytes); ++ } ++ ++ return addr != NULL; ++} ++ ++//////////////////////////////////////////////////////////////////////////////// ++// thread stack ++ ++//size_t os::Linux::min_stack_allowed = 96 * K; ++size_t os::Posix::_compiler_thread_min_stack_allowed = 48 * K; ++size_t os::Posix::_java_thread_min_stack_allowed = 40 * K; ++size_t os::Posix::_vm_internal_thread_min_stack_allowed = 64 * K; ++ ++ ++/* ++// Test if pthread library can support variable thread stack size. LinuxThreads ++// in fixed stack mode allocates 2M fixed slot for each thread. LinuxThreads ++// in floating stack mode and NPTL support variable stack size. ++bool os::Linux::supports_variable_stack_size() { ++ if (os::Linux::is_NPTL()) { ++ // NPTL, yes ++ return true; ++ ++ } else { ++ // Note: We can't control default stack size when creating a thread. ++ // If we use non-default stack size (pthread_attr_setstacksize), both ++ // floating stack and non-floating stack LinuxThreads will return the ++ // same value. This makes it impossible to implement this function by ++ // detecting thread stack size directly. ++ // ++ // An alternative approach is to check %gs. Fixed-stack LinuxThreads ++ // do not use %gs, so its value is 0. Floating-stack LinuxThreads use ++ // %gs (either as LDT selector or GDT selector, depending on kernel) ++ // to access thread specific data. ++ // ++ // Note that %gs is a reserved glibc register since early 2001, so ++ // applications are not allowed to change its value (Ulrich Drepper from ++ // Redhat confirmed that all known offenders have been modified to use ++ // either %fs or TSD). In the worst case scenario, when VM is embedded in ++ // a native application that plays with %gs, we might see non-zero %gs ++ // even LinuxThreads is running in fixed stack mode. As the result, we'll ++ // return true and skip _thread_safety_check(), so we may not be able to ++ // detect stack-heap collisions. But otherwise it's harmless. ++ // ++ return false; ++ } ++} ++*/ ++ ++// Return default stack size for thr_type ++size_t os::Posix::default_stack_size(os::ThreadType thr_type) { ++ // Default stack size (compiler thread needs larger stack) ++ size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K); ++ return s; ++} ++ ++///////////////////////////////////////////////////////////////////////////// ++// helper functions for fatal error handler ++void os::print_register_info(outputStream *st, const void *context) { ++ if (context == NULL) return; ++ ++ ucontext_t *uc = (ucontext_t*)context; ++ ++ st->print_cr("Register to memory mapping:"); ++ st->cr(); ++ // this is horrendously verbose but the layout of the registers in the ++ // // context does not match how we defined our abstract Register set, so ++ // // we can't just iterate through the gregs area ++ // ++ // // this is only for the "general purpose" registers ++ st->print("R0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[0]); ++ st->print("AT=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[1]); ++ st->print("V0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[2]); ++ st->print("V1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[3]); ++ st->cr(); ++ st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[4]); ++ st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[5]); ++ st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[6]); ++ st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[7]); ++ st->cr(); ++ st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[8]); ++ st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[9]); ++ st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[10]); ++ st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[11]); ++ st->cr(); ++ st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[12]); ++ st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[13]); ++ st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[14]); ++ st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[15]); ++ st->cr(); ++ st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[16]); ++ st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[17]); ++ st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[18]); ++ st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[19]); ++ st->cr(); ++ st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[20]); ++ st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[21]); ++ st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[22]); ++ st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[23]); ++ st->cr(); ++ st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[24]); ++ st->print("T9=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[25]); ++ st->print("K0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[26]); ++ st->print("K1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[27]); ++ st->cr(); ++ st->print("GP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[28]); ++ st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[29]); ++ st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[30]); ++ st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[31]); ++ st->cr(); ++ ++} ++ ++void os::print_context(outputStream *st, const void *context) { ++ if (context == NULL) return; ++ ++ const ucontext_t *uc = (const ucontext_t*)context; ++ ++ st->print_cr("Registers:"); ++ st->print( "R0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[0]); ++ st->print(", AT=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[1]); ++ st->print(", V0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[2]); ++ st->print(", V1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[3]); ++ st->cr(); ++ st->print( "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[4]); ++ st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[5]); ++ st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[6]); ++ st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[7]); ++ st->cr(); ++ st->print( "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[8]); ++ st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[9]); ++ st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[10]); ++ st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[11]); ++ st->cr(); ++ st->print( "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[12]); ++ st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[13]); ++ st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[14]); ++ st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[15]); ++ st->cr(); ++ st->print( "S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[16]); ++ st->print(", S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[17]); ++ st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[18]); ++ st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[19]); ++ st->cr(); ++ st->print( "S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[20]); ++ st->print(", S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[21]); ++ st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[22]); ++ st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[23]); ++ st->cr(); ++ st->print( "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[24]); ++ st->print(", T9=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[25]); ++ st->print(", K0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[26]); ++ st->print(", K1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[27]); ++ st->cr(); ++ st->print( "GP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[28]); ++ st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[29]); ++ st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[30]); ++ st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[31]); ++ st->cr(); ++ st->cr(); ++} ++ ++void os::print_tos_pc(outputStream *st, const void *context) { ++ if (context == NULL) return; ++ ++ const ucontext_t* uc = (const ucontext_t*)context; ++ ++ intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); ++ st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp)); ++ print_hex_dump(st, (address)(sp - 32), (address)(sp + 32), sizeof(intptr_t)); ++ st->cr(); ++ ++ // Note: it may be unsafe to inspect memory near pc. For example, pc may ++ // point to garbage if entry point in an nmethod is corrupted. Leave ++ // this at the end, and hope for the best. ++ address pc = os::Posix::ucontext_get_pc(uc); ++ st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc)); ++ print_hex_dump(st, pc - 64, pc + 64, sizeof(char)); ++ Disassembler::decode(pc - 80, pc + 80, st); ++} ++ ++void os::setup_fpu() { ++ /* ++ //no use for MIPS ++ int fcsr; ++ address fpu_cntrl = StubRoutines::addr_fpu_cntrl_wrd_std(); ++ __asm__ __volatile__ ( ++ ".set noat;" ++ "cfc1 %0, $31;" ++ "sw %0, 0(%1);" ++ : "=r" (fcsr) ++ : "r" (fpu_cntrl) ++ : "memory" ++ ); ++ printf("fpu_cntrl: %lx\n", fpu_cntrl); ++ */ ++} ++ ++#ifndef PRODUCT ++void os::verify_stack_alignment() { ++ assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); ++} ++#endif ++ ++int os::extra_bang_size_in_bytes() { ++ // MIPS does not require the additional stack bang. ++ return 0; ++} ++ ++bool os::is_ActiveCoresMP() { ++ return UseActiveCoresMP && _initial_active_processor_count == 1; ++} +diff --git a/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp +new file mode 100644 +index 00000000000..c07d08156f2 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp +@@ -0,0 +1,39 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP ++ ++ static void setup_fpu(); ++ static bool is_allocatable(size_t bytes); ++ static intptr_t *get_previous_fp(); ++ ++ // Used to register dynamic code cache area with the OS ++ // Note: Currently only used in 64 bit Windows implementations ++ static bool register_code_area(char *low, char *high) { return true; } ++ ++ static bool is_ActiveCoresMP(); ++ ++#endif // OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp +new file mode 100644 +index 00000000000..93490345f0b +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp +@@ -0,0 +1,58 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP ++#define OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP ++ ++ ++inline void Prefetch::read (void *loc, intx interval) { ++ // 'pref' is implemented as NOP in Loongson 3A ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips32\n" ++ " .set noreorder\n" ++ " pref 0, 0(%[__loc]) \n" ++ " .set pop\n" ++ : [__loc] "=&r"(loc) ++ : ++ : "memory" ++ ); ++} ++ ++inline void Prefetch::write(void *loc, intx interval) { ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips32\n" ++ " .set noreorder\n" ++ " pref 1, 0(%[__loc]) \n" ++ " .set pop\n" ++ : [__loc] "=&r"(loc) ++ : ++ : "memory" ++ ); ++ ++} ++ ++#endif // OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/safefetch_linux_mips64.S b/src/hotspot/os_cpu/linux_mips/safefetch_linux_mips64.S +new file mode 100644 +index 00000000000..fc6ee6eca65 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/safefetch_linux_mips64.S +@@ -0,0 +1,60 @@ ++/* ++ * Copyright (c) 2022 SAP SE. All rights reserved. ++ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++ .globl SafeFetchN_impl ++ .globl _SafeFetchN_fault ++ .globl _SafeFetchN_continuation ++ .globl SafeFetch32_impl ++ .globl _SafeFetch32_fault ++ .globl _SafeFetch32_continuation ++ ++ # Support for int SafeFetch32(int* address, int defaultval); ++ # ++ # a0 : address ++ # a1 : defaultval ++SafeFetch32_impl: ++_SafeFetch32_fault: ++ lw $2, 0($4) ++ j $31 ++ nop ++_SafeFetch32_continuation: ++ or $2, $5, $0 ++ j $31 ++ nop ++ ++ # Support for intptr_t SafeFetchN(intptr_t* address, intptr_t defaultval); ++ # ++ # a0 : address ++ # a1 : defaultval ++SafeFetchN_impl: ++_SafeFetchN_fault: ++ ld $2, 0($4) ++ j $31 ++ nop ++_SafeFetchN_continuation: ++ or $2, $5, $0 ++ j $31 ++ nop +diff --git a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp +new file mode 100644 +index 00000000000..4372eb41e9c +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp +@@ -0,0 +1,108 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "compiler/compileBroker.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++ ++void JavaThread::pd_initialize() ++{ ++ _anchor.clear(); ++} ++ ++frame JavaThread::pd_last_frame() { ++ assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); ++ if (_anchor.last_Java_pc() != NULL) { ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); ++ } else { ++ // This will pick up pc from sp ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp()); ++ } ++} ++ ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is ++// currently interrupted by SIGPROF ++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, ++ void* ucontext, bool isInJava) { ++ ++ assert(Thread::current() == this, "caller must be current thread"); ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { ++ assert(this->is_Java_thread(), "must be JavaThread"); ++ JavaThread* jt = (JavaThread *)this; ++ ++ // If we have a last_Java_frame, then we should use it even if ++ // isInJava == true. It should be more reliable than ucontext info. ++ if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) { ++ *fr_addr = jt->pd_last_frame(); ++ return true; ++ } ++ ++ // At this point, we don't have a last_Java_frame, so ++ // we try to glean some information out of the ucontext ++ // if we were running Java code when SIGPROF came in. ++ if (isInJava) { ++ ucontext_t* uc = (ucontext_t*) ucontext; ++ ++ intptr_t* ret_fp; ++ intptr_t* ret_sp; ++ address addr = os::fetch_frame_from_context(uc, &ret_sp, &ret_fp); ++ if (addr == NULL || ret_sp == NULL) { ++ // ucontext wasn't useful ++ return false; ++ } ++ ++ frame ret_frame(ret_sp, ret_fp, addr); ++ if (!ret_frame.safe_for_sender(jt)) { ++#ifdef COMPILER2 ++ // C2 and JVMCI use ebp as a general register see if NULL fp helps ++ frame ret_frame2(ret_sp, NULL, addr); ++ if (!ret_frame2.safe_for_sender(jt)) { ++ // nothing else to try if the frame isn't good ++ return false; ++ } ++ ret_frame = ret_frame2; ++#else ++ // nothing else to try if the frame isn't good ++ return false; ++#endif // COMPILER2_OR_JVMCI ++ } ++ *fr_addr = ret_frame; ++ return true; ++ } ++ ++ // nothing else to try ++ return false; ++} ++ ++void JavaThread::cache_global_variables() { } +diff --git a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp +new file mode 100644 +index 00000000000..c38f6950fd0 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp +@@ -0,0 +1,66 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP ++ ++ private: ++ void pd_initialize(); ++ ++ frame pd_last_frame(); ++ ++ public: ++ // Mutators are highly dangerous.... ++ intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } ++ void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } ++ ++ void set_base_of_stack_pointer(intptr_t* base_sp) { ++ } ++ ++ static ByteSize last_Java_fp_offset() { ++ return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); ++ } ++ ++ intptr_t* base_of_stack_pointer() { ++ return NULL; ++ } ++ void record_base_of_stack_pointer() { ++ } ++ ++ bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, ++ bool isInJava); ++ ++ bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); ++private: ++ bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); ++public: ++ ++ // These routines are only used on cpu architectures that ++ // have separate register stacks (Itanium). ++ static bool register_stack_overflow() { return false; } ++ static void enable_register_stack_guard() {} ++ static void disable_register_stack_guard() {} ++ ++#endif // OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp +new file mode 100644 +index 00000000000..b7454bf045a +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP ++ ++// These are the OS and CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ \ ++ /******************************/ \ ++ /* Threads (NOTE: incomplete) */ \ ++ /******************************/ \ ++ nonstatic_field(OSThread, _thread_id, pid_t) \ ++ nonstatic_field(OSThread, _pthread_id, pthread_t) ++ ++ ++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ \ ++ /**********************/ \ ++ /* Posix Thread IDs */ \ ++ /**********************/ \ ++ \ ++ declare_integer_type(pid_t) \ ++ declare_unsigned_integer_type(pthread_t) ++ ++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#endif // OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp +new file mode 100644 +index 00000000000..93e4bea04c6 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp +@@ -0,0 +1,28 @@ ++/* ++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/os.hpp" ++#include "runtime/vm_version.hpp" +diff --git a/src/hotspot/share/asm/codeBuffer.cpp b/src/hotspot/share/asm/codeBuffer.cpp +index 0012152d48d..e3660cab271 100644 +--- a/src/hotspot/share/asm/codeBuffer.cpp ++++ b/src/hotspot/share/asm/codeBuffer.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2023. These ++ * modifications are Copyright (c) 2018, 2023, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "asm/codeBuffer.hpp" + #include "code/oopRecorder.inline.hpp" +@@ -330,6 +336,7 @@ void CodeSection::relocate(address at, RelocationHolder const& spec, int format) + assert(rtype == relocInfo::none || + rtype == relocInfo::runtime_call_type || + rtype == relocInfo::internal_word_type|| ++ NOT_ZERO(MIPS64_ONLY(rtype == relocInfo::internal_pc_type ||)) + rtype == relocInfo::section_word_type || + rtype == relocInfo::external_word_type, + "code needs relocation information"); +diff --git a/src/hotspot/share/c1/c1_Compiler.cpp b/src/hotspot/share/c1/c1_Compiler.cpp +index de173c64af1..df93c01d893 100644 +--- a/src/hotspot/share/c1/c1_Compiler.cpp ++++ b/src/hotspot/share/c1/c1_Compiler.cpp +@@ -43,6 +43,12 @@ + #include "utilities/bitMap.inline.hpp" + #include "utilities/macros.hpp" + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + + Compiler::Compiler() : AbstractCompiler(compiler_c1) { + } +@@ -212,7 +218,7 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) { + case vmIntrinsics::_updateCRC32: + case vmIntrinsics::_updateBytesCRC32: + case vmIntrinsics::_updateByteBufferCRC32: +-#if defined(S390) || defined(PPC64) || defined(AARCH64) ++#if defined(S390) || defined(PPC64) || defined(AARCH64) || defined(LOONGARCH64) + case vmIntrinsics::_updateBytesCRC32C: + case vmIntrinsics::_updateDirectByteBufferCRC32C: + #endif +diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp +index 308f3a09c15..53a68cdb2fd 100644 +--- a/src/hotspot/share/c1/c1_LIR.cpp ++++ b/src/hotspot/share/c1/c1_LIR.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2023, These ++ * modifications are Copyright (c) 2022, 2023, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "c1/c1_CodeStubs.hpp" + #include "c1/c1_InstructionPrinter.hpp" +@@ -190,6 +196,8 @@ void LIR_Op2::verify() const { + case lir_cmove: + #ifdef RISCV + assert(false, "lir_cmove is LIR_Op4 on RISCV"); ++#elif defined(LOONGARCH) ++ assert(false, "lir_cmove is LIR_Op4 on LoongArch"); + #endif + case lir_xchg: + break; +@@ -241,7 +249,7 @@ void LIR_Op2::verify() const { + + + LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BlockBegin* block) +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) + #else + : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) +@@ -254,7 +262,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BlockBegin* block) + } + + LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, CodeStub* stub) : +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) + #else + LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) +@@ -267,7 +275,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, CodeStub* stub) : + } + + LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BlockBegin* block, BlockBegin* ublock) +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) + #else + : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) +@@ -512,6 +520,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) { + assert(opConvert->_info == NULL, "must be"); + if (opConvert->_opr->is_valid()) do_input(opConvert->_opr); + if (opConvert->_result->is_valid()) do_output(opConvert->_result); ++ if (opConvert->_tmp->is_valid()) do_temp(opConvert->_tmp); + #ifdef PPC32 + if (opConvert->_tmp1->is_valid()) do_temp(opConvert->_tmp1); + if (opConvert->_tmp2->is_valid()) do_temp(opConvert->_tmp2); +@@ -528,7 +537,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) { + assert(op->as_OpBranch() != NULL, "must be"); + LIR_OpBranch* opBranch = (LIR_OpBranch*)op; + +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + assert(opBranch->_tmp1->is_illegal() && opBranch->_tmp2->is_illegal() && + opBranch->_tmp3->is_illegal() && opBranch->_tmp4->is_illegal() && + opBranch->_tmp5->is_illegal(), "not used"); +@@ -625,7 +634,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) { + // to the result operand, otherwise the backend fails + case lir_cmove: + { +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + assert(op->as_Op4() != NULL, "must be"); + LIR_Op4* op4 = (LIR_Op4*)op; + +@@ -1095,7 +1104,7 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) { + masm->emit_op3(this); + } + +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + void LIR_Op4::emit_code(LIR_Assembler* masm) { + masm->emit_op4(this); + } +@@ -1141,7 +1150,7 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block) + , _file(NULL) + , _line(0) + #endif +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + , _cmp_opr1(LIR_OprFact::illegalOpr) + , _cmp_opr2(LIR_OprFact::illegalOpr) + #endif +@@ -1162,7 +1171,7 @@ void LIR_List::set_file_and_line(const char * file, int line) { + } + #endif + +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + void LIR_List::set_cmp_oprs(LIR_Op* op) { + switch (op->code()) { + case lir_cmp: +@@ -1185,7 +1194,7 @@ void LIR_List::set_cmp_oprs(LIR_Op* op) { + break; + #if INCLUDE_ZGC + case lir_zloadbarrier_test: +- _cmp_opr1 = FrameMap::as_opr(t1); ++ _cmp_opr1 = FrameMap::as_opr(RISCV_ONLY(t1) LOONGARCH64_ONLY(SCR1)); + _cmp_opr2 = LIR_OprFact::intConst(0); + break; + #endif +@@ -1924,7 +1933,7 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) { + // LIR_OpBranch + void LIR_OpBranch::print_instr(outputStream* out) const { + print_condition(out, cond()); out->print(" "); +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + in_opr1()->print(out); out->print(" "); + in_opr2()->print(out); out->print(" "); + #endif +@@ -1963,6 +1972,9 @@ void LIR_OpConvert::print_instr(outputStream* out) const { + print_bytecode(out, bytecode()); + in_opr()->print(out); out->print(" "); + result_opr()->print(out); out->print(" "); ++ if(tmp()->is_valid()) { ++ tmp()->print(out); out->print(" "); ++ } + #ifdef PPC32 + if(tmp1()->is_valid()) { + tmp1()->print(out); out->print(" "); +@@ -2014,7 +2026,7 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const { + + // LIR_Op2 + void LIR_Op2::print_instr(outputStream* out) const { +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + if (code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch) { + #else + if (code() == lir_cmove || code() == lir_cmp) { +@@ -2069,7 +2081,7 @@ void LIR_Op3::print_instr(outputStream* out) const { + result_opr()->print(out); + } + +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + // LIR_Op4 + void LIR_Op4::print_instr(outputStream* out) const { + print_condition(out, condition()); out->print(" "); +diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp +index 717404e9726..0fffd4aabfc 100644 +--- a/src/hotspot/share/c1/c1_LIR.hpp ++++ b/src/hotspot/share/c1/c1_LIR.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2023, These ++ * modifications are Copyright (c) 2022, 2023, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_C1_C1_LIR_HPP + #define SHARE_C1_C1_LIR_HPP + +@@ -869,7 +875,7 @@ class LIR_Op2; + class LIR_OpDelay; + class LIR_Op3; + class LIR_OpAllocArray; +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + class LIR_Op4; + #endif + class LIR_OpCall; +@@ -917,7 +923,7 @@ enum LIR_Code { + , lir_null_check + , lir_return + , lir_leal +-#ifndef RISCV ++#if !defined(RISCV) && !defined(LOONGARCH) + , lir_branch + , lir_cond_float_branch + #endif +@@ -931,7 +937,7 @@ enum LIR_Code { + , lir_load_klass + , end_op1 + , begin_op2 +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + , lir_branch + , lir_cond_float_branch + #endif +@@ -939,7 +945,7 @@ enum LIR_Code { + , lir_cmp_l2i + , lir_ucmp_fd2i + , lir_cmp_fd2i +-#ifndef RISCV ++#if !defined(RISCV) && !defined(LOONGARCH) + , lir_cmove + #endif + , lir_add +@@ -969,7 +975,7 @@ enum LIR_Code { + , lir_fmad + , lir_fmaf + , end_op3 +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + , begin_op4 + , lir_cmove + , end_op4 +@@ -1010,7 +1016,7 @@ enum LIR_Code { + , begin_opAssert + , lir_assert + , end_opAssert +-#if defined(RISCV) && defined(INCLUDE_ZGC) ++#if (defined(RISCV) || defined(LOONGARCH)) && defined(INCLUDE_ZGC) + , begin_opZLoadBarrierTest + , lir_zloadbarrier_test + , end_opZLoadBarrierTest +@@ -1151,7 +1157,7 @@ class LIR_Op: public CompilationResourceObj { + virtual LIR_Op1* as_Op1() { return NULL; } + virtual LIR_Op2* as_Op2() { return NULL; } + virtual LIR_Op3* as_Op3() { return NULL; } +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + virtual LIR_Op4* as_Op4() { return NULL; } + #endif + virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; } +@@ -1447,15 +1453,18 @@ class LIR_OpConvert: public LIR_Op1 { + private: + Bytecodes::Code _bytecode; + ConversionStub* _stub; ++ LIR_Opr _tmp; + + public: +- LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub) ++ LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub, LIR_Opr tmp) + : LIR_Op1(lir_convert, opr, result) + , _bytecode(code) +- , _stub(stub) {} ++ , _stub(stub) ++ , _tmp(tmp) {} + + Bytecodes::Code bytecode() const { return _bytecode; } + ConversionStub* stub() const { return _stub; } ++ LIR_Opr tmp() const { return _tmp; } + + virtual void emit_code(LIR_Assembler* masm); + virtual LIR_OpConvert* as_OpConvert() { return this; } +@@ -1610,7 +1619,11 @@ class LIR_Op2: public LIR_Op { + , _tmp4(LIR_OprFact::illegalOpr) + , _tmp5(LIR_OprFact::illegalOpr) + , _condition(condition) { +- assert(code == lir_cmp || code == lir_assert RISCV_ONLY(|| code == lir_branch || code == lir_cond_float_branch), "code check"); ++ assert(code == lir_cmp || code == lir_assert ++#if defined(RISCV) || defined(LOONGARCH) ++ || code == lir_branch || code == lir_cond_float_branch ++#endif ++ , "code check"); + } + + LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) +@@ -1642,7 +1655,11 @@ class LIR_Op2: public LIR_Op { + , _tmp4(LIR_OprFact::illegalOpr) + , _tmp5(LIR_OprFact::illegalOpr) + , _condition(lir_cond_unknown) { +- assert(code != lir_cmp && RISCV_ONLY(code != lir_branch && code != lir_cond_float_branch &&) is_in_range(code, begin_op2, end_op2), "code check"); ++ assert(code != lir_cmp && ++#if defined(RISCV) || defined(LOONGARCH) ++ code != lir_branch && code != lir_cond_float_branch && ++#endif ++ is_in_range(code, begin_op2, end_op2), "code check"); + } + + LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr, +@@ -1658,7 +1675,11 @@ class LIR_Op2: public LIR_Op { + , _tmp4(tmp4) + , _tmp5(tmp5) + , _condition(lir_cond_unknown) { +- assert(code != lir_cmp && RISCV_ONLY(code != lir_branch && code != lir_cond_float_branch &&) is_in_range(code, begin_op2, end_op2), "code check"); ++ assert(code != lir_cmp && ++#if defined(RISCV) || defined(LOONGARCH) ++ code != lir_branch && code != lir_cond_float_branch && ++#endif ++ is_in_range(code, begin_op2, end_op2), "code check"); + } + + LIR_Opr in_opr1() const { return _opr1; } +@@ -1670,14 +1691,14 @@ class LIR_Op2: public LIR_Op { + LIR_Opr tmp4_opr() const { return _tmp4; } + LIR_Opr tmp5_opr() const { return _tmp5; } + LIR_Condition condition() const { +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition; + #else + assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition; + #endif + } + void set_condition(LIR_Condition condition) { +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch, "only valid for branch"); _condition = condition; + #else + assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove"); _condition = condition; +@@ -1695,7 +1716,7 @@ class LIR_Op2: public LIR_Op { + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; + }; + +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + class LIR_OpBranch: public LIR_Op2 { + #else + class LIR_OpBranch: public LIR_Op { +@@ -1703,7 +1724,7 @@ class LIR_OpBranch: public LIR_Op { + friend class LIR_OpVisitState; + + private: +-#ifndef RISCV ++#if !defined(RISCV) && !defined(LOONGARCH) + LIR_Condition _cond; + #endif + Label* _label; +@@ -1713,7 +1734,7 @@ class LIR_OpBranch: public LIR_Op { + + public: + LIR_OpBranch(LIR_Condition cond, Label* lbl) +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL) + #else + : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL) +@@ -1730,7 +1751,7 @@ class LIR_OpBranch: public LIR_Op { + // for unordered comparisons + LIR_OpBranch(LIR_Condition cond, BlockBegin* block, BlockBegin* ublock); + +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + LIR_Condition cond() const { return condition(); } + void set_cond(LIR_Condition cond) { set_condition(cond); } + #else +@@ -1814,7 +1835,7 @@ class LIR_Op3: public LIR_Op { + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; + }; + +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + class LIR_Op4: public LIR_Op { + friend class LIR_OpVisitState; + protected: +@@ -2112,7 +2133,7 @@ class LIR_List: public CompilationResourceObj { + const char * _file; + int _line; + #endif +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + LIR_Opr _cmp_opr1; + LIR_Opr _cmp_opr2; + #endif +@@ -2128,7 +2149,7 @@ class LIR_List: public CompilationResourceObj { + } + #endif // PRODUCT + +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + set_cmp_oprs(op); + // lir_cmp set cmp oprs only on riscv + if (op->code() == lir_cmp) return; +@@ -2150,7 +2171,7 @@ class LIR_List: public CompilationResourceObj { + void set_file_and_line(const char * file, int line); + #endif + +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + void set_cmp_oprs(LIR_Op* op); + #endif + +@@ -2246,7 +2267,9 @@ class LIR_List: public CompilationResourceObj { + void safepoint(LIR_Opr tmp, CodeEmitInfo* info) { append(new LIR_Op1(lir_safepoint, tmp, info)); } + void return_op(LIR_Opr result) { append(new LIR_OpReturn(result)); } + +- void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL/*, bool is_32bit = false*/) { append(new LIR_OpConvert(code, left, dst, stub)); } ++ void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL, LIR_Opr tmp = LIR_OprFact::illegalOpr) { ++ append(new LIR_OpConvert(code, left, dst, stub, tmp)); ++ } + + void logical_and (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_and, left, right, dst)); } + void logical_or (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_or, left, right, dst)); } +@@ -2273,7 +2296,7 @@ class LIR_List: public CompilationResourceObj { + void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info); + void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info); + +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type, + LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr) { + append(new LIR_Op4(lir_cmove, condition, src1, src2, cmp_opr1, cmp_opr2, dst, type)); +diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp +index 989a6f8ad25..e288de2ab8e 100644 +--- a/src/hotspot/share/c1/c1_LIRAssembler.cpp ++++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2023, These ++ * modifications are Copyright (c) 2022, 2023, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "asm/assembler.inline.hpp" + #include "c1/c1_Compilation.hpp" +@@ -691,7 +697,7 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { + comp_fl2i(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op); + break; + +-#ifndef RISCV ++#if !defined(RISCV) && !defined(LOONGARCH) + case lir_cmove: + cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type()); + break; +@@ -758,7 +764,7 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { + } + } + +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + void LIR_Assembler::emit_op4(LIR_Op4* op) { + switch(op->code()) { + case lir_cmove: +diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp +index c82baa15fe7..84c34db4985 100644 +--- a/src/hotspot/share/c1/c1_LIRAssembler.hpp ++++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2023, These ++ * modifications are Copyright (c) 2022, 2023, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_C1_C1_LIRASSEMBLER_HPP + #define SHARE_C1_C1_LIRASSEMBLER_HPP + +@@ -186,7 +192,7 @@ class LIR_Assembler: public CompilationResourceObj { + void emit_op1(LIR_Op1* op); + void emit_op2(LIR_Op2* op); + void emit_op3(LIR_Op3* op); +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + void emit_op4(LIR_Op4* op); + #endif + void emit_opBranch(LIR_OpBranch* op); +@@ -222,7 +228,7 @@ class LIR_Assembler: public CompilationResourceObj { + void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); + void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); // info set for null exceptions + void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op); +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr); + #else +diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp +index d3d38d11a90..6947406b2e7 100644 +--- a/src/hotspot/share/c1/c1_LinearScan.cpp ++++ b/src/hotspot/share/c1/c1_LinearScan.cpp +@@ -35,6 +35,12 @@ + #include "runtime/timerTrace.hpp" + #include "utilities/bitMap.inline.hpp" + ++/* ++ * This file has been modified by Loongson Technology in 2023, These ++ * modifications are Copyright (c) 2022, 2023, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef PRODUCT + + static LinearScanStatistic _stat_before_alloc; +@@ -1240,7 +1246,7 @@ void LinearScan::add_register_hints(LIR_Op* op) { + break; + } + case lir_cmove: { +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + assert(op->as_Op4() != NULL, "lir_cmove must be LIR_Op4"); + LIR_Op4* cmove = (LIR_Op4*)op; + #else +@@ -3151,7 +3157,7 @@ void LinearScan::do_linear_scan() { + } + } + +-#ifndef RISCV ++#if !defined(RISCV) && !defined(LOONGARCH) + // Disable these optimizations on riscv temporarily, because it does not + // work when the comparison operands are bound to branches or cmoves. + { TIME_LINEAR_SCAN(timer_optimize_lir); +@@ -6385,7 +6391,7 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) { + // There might be a cmove inserted for profiling which depends on the same + // compare. If we change the condition of the respective compare, we have + // to take care of this cmove as well. +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + LIR_Op4* prev_cmove = NULL; + #else + LIR_Op2* prev_cmove = NULL; +@@ -6395,7 +6401,7 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) { + prev_op = instructions->at(j); + // check for the cmove + if (prev_op->code() == lir_cmove) { +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + assert(prev_op->as_Op4() != NULL, "cmove must be of type LIR_Op4"); + prev_cmove = (LIR_Op4*)prev_op; + #else +diff --git a/src/hotspot/share/code/nmethod.cpp b/src/hotspot/share/code/nmethod.cpp +index 51051170794..a6c40704927 100644 +--- a/src/hotspot/share/code/nmethod.cpp ++++ b/src/hotspot/share/code/nmethod.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "jvm.h" + #include "asm/assembler.inline.hpp" +@@ -2540,7 +2546,8 @@ void nmethod::verify_scopes() { + //verify_interrupt_point(iter.addr()); + break; + case relocInfo::runtime_call_type: +- case relocInfo::runtime_call_w_cp_type: { ++ NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:) ++ { + address destination = iter.reloc()->value(); + // Right now there is no way to find out which entries support + // an interrupt point. It would be nice if we had this +@@ -3108,7 +3115,8 @@ const char* nmethod::reloc_string_for(u_char* begin, u_char* end) { + return st.as_string(); + } + case relocInfo::runtime_call_type: +- case relocInfo::runtime_call_w_cp_type: { ++ NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:) ++ { + stringStream st; + st.print("runtime_call"); + CallRelocation* r = (CallRelocation*)iter.reloc(); +diff --git a/src/hotspot/share/code/relocInfo.cpp b/src/hotspot/share/code/relocInfo.cpp +index 47769c53a5b..ed69d18d759 100644 +--- a/src/hotspot/share/code/relocInfo.cpp ++++ b/src/hotspot/share/code/relocInfo.cpp +@@ -402,6 +402,7 @@ void virtual_call_Relocation::unpack_data() { + _cached_value = x0==0? NULL: address_from_scaled_offset(x0, point); + } + ++#ifndef MIPS64 + void runtime_call_w_cp_Relocation::pack_data_to(CodeSection * dest) { + short* p = pack_1_int_to((short *)dest->locs_end(), (jint)(_offset >> 2)); + dest->set_locs_end((relocInfo*) p); +@@ -410,6 +411,7 @@ void runtime_call_w_cp_Relocation::pack_data_to(CodeSection * dest) { + void runtime_call_w_cp_Relocation::unpack_data() { + _offset = unpack_1_int() << 2; + } ++#endif + + void static_stub_Relocation::pack_data_to(CodeSection* dest) { + short* p = (short*) dest->locs_end(); +@@ -874,7 +876,7 @@ void RelocIterator::print_current() { + break; + } + case relocInfo::runtime_call_type: +- case relocInfo::runtime_call_w_cp_type: ++ NOT_MIPS64(case relocInfo::runtime_call_w_cp_type:) + { + CallRelocation* r = (CallRelocation*) reloc(); + tty->print(" | [destination=" INTPTR_FORMAT "]", p2i(r->destination())); +diff --git a/src/hotspot/share/code/relocInfo.hpp b/src/hotspot/share/code/relocInfo.hpp +index 55d4ac7c62d..b1c34733021 100644 +--- a/src/hotspot/share/code/relocInfo.hpp ++++ b/src/hotspot/share/code/relocInfo.hpp +@@ -266,7 +266,11 @@ class relocInfo { + poll_return_type = 11, // polling instruction for safepoints at return + metadata_type = 12, // metadata that used to be oops + trampoline_stub_type = 13, // stub-entry for trampoline ++#ifndef MIPS64 + runtime_call_w_cp_type = 14, // Runtime call which may load its target from the constant pool ++#else ++ internal_pc_type = 14, // tag for internal data ++#endif + data_prefix_tag = 15, // tag for a prefix (carries data arguments) + type_mask = 15 // A mask which selects only the above values + }; +@@ -300,13 +304,13 @@ class relocInfo { + visitor(static_call) \ + visitor(static_stub) \ + visitor(runtime_call) \ +- visitor(runtime_call_w_cp) \ ++ NOT_MIPS64(visitor(runtime_call_w_cp)) \ + visitor(external_word) \ + visitor(internal_word) \ + visitor(poll) \ + visitor(poll_return) \ +- visitor(section_word) \ + visitor(trampoline_stub) \ ++ NOT_MIPS64(visitor(section_word))MIPS64_ONLY(ZERO_ONLY(visitor(section_word))NOT_ZERO(visitor(internal_pc))) + + + public: +@@ -1146,6 +1150,16 @@ class runtime_call_Relocation : public CallRelocation { + }; + + ++#ifdef MIPS64 ++// to handle the set_last_java_frame pc ++class internal_pc_Relocation : public Relocation { ++ public: ++ address pc() { return pd_get_address_from_code(); } ++ void fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest); ++ ++ internal_pc_Relocation() : Relocation(relocInfo::internal_pc_type) { } ++}; ++#else + class runtime_call_w_cp_Relocation : public CallRelocation { + public: + static RelocationHolder spec() { +@@ -1175,6 +1189,7 @@ class runtime_call_w_cp_Relocation : public CallRelocation { + void pack_data_to(CodeSection * dest); + void unpack_data(); + }; ++#endif + + // Trampoline Relocations. + // A trampoline allows to encode a small branch in the code, even if there +diff --git a/src/hotspot/share/code/vtableStubs.cpp b/src/hotspot/share/code/vtableStubs.cpp +index d490adb3eef..0b1d7dc0a27 100644 +--- a/src/hotspot/share/code/vtableStubs.cpp ++++ b/src/hotspot/share/code/vtableStubs.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "code/vtableStubs.hpp" + #include "compiler/compileBroker.hpp" +@@ -102,7 +108,11 @@ int VtableStubs::_itab_stub_size = 0; + + #if defined(PRODUCT) + // These values are good for the PRODUCT case (no tracing). ++#if defined MIPS64 || defined LOONGARCH64 ++ static const int first_vtableStub_size = 128; ++#else + static const int first_vtableStub_size = 64; ++#endif + static const int first_itableStub_size = 256; + #else + // These values are good for the non-PRODUCT case (when tracing can be switched on). +@@ -113,6 +123,7 @@ int VtableStubs::_itab_stub_size = 0; + // vtable itable + // aarch64: 460 324 + // arm: ? ? ++ // mips64: 728 328 + // ppc (linux, BE): 404 288 + // ppc (linux, LE): 356 276 + // ppc (AIX): 416 296 +diff --git a/src/hotspot/share/gc/g1/g1ParScanThreadState.inline.hpp b/src/hotspot/share/gc/g1/g1ParScanThreadState.inline.hpp +index f0944108810..a8c1f97a80e 100644 +--- a/src/hotspot/share/gc/g1/g1ParScanThreadState.inline.hpp ++++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.inline.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_GC_G1_G1PARSCANTHREADSTATE_INLINE_HPP + #define SHARE_GC_G1_G1PARSCANTHREADSTATE_INLINE_HPP + +@@ -58,6 +64,9 @@ void G1ParScanThreadState::trim_queue_partially() { + void G1ParScanThreadState::trim_queue() { + trim_queue_to_threshold(0); + assert(_task_queue->overflow_empty(), "invariant"); ++ // Load of _age._fields._top in trim_queue_to_threshold must not pass ++ // the load of _age._fields._top in assert _task_queue->taskqueue_empty(). ++ DEBUG_ONLY(OrderAccess::loadload();) + assert(_task_queue->taskqueue_empty(), "invariant"); + } + +diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp +index 7d31ff02e1a..07dac06aecf 100644 +--- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp ++++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "gc/shared/gcArguments.hpp" + #include "gc/shared/tlab_globals.hpp" +@@ -35,7 +41,7 @@ + #include "utilities/defaultStream.hpp" + + void ShenandoahArguments::initialize() { +-#if !(defined AARCH64 || defined AMD64 || defined IA32 || defined PPC64 || defined RISCV64) ++#if !(defined AARCH64 || defined AMD64 || defined IA32 || defined PPC64 || defined RISCV64 || defined LOONGARCH64) + vm_exit_during_initialization("Shenandoah GC is not supported on this platform."); + #endif + +diff --git a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp +index 0e99bf107c1..d5541cf8966 100644 +--- a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp ++++ b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2023, These ++ * modifications are Copyright (c) 2022, 2023, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "c1/c1_LIR.hpp" + #include "c1/c1_LIRGenerator.hpp" +@@ -94,7 +100,7 @@ private: + + public: + LIR_OpZLoadBarrierTest(LIR_Opr opr) : +-#ifdef RISCV ++#if defined(RISCV) || defined(LOONGARCH) + LIR_Op(lir_zloadbarrier_test, LIR_OprFact::illegalOpr, NULL), + #else + LIR_Op(), +diff --git a/src/hotspot/share/interpreter/interpreterRuntime.cpp b/src/hotspot/share/interpreter/interpreterRuntime.cpp +index d66ed24d862..b682bb9d62a 100644 +--- a/src/hotspot/share/interpreter/interpreterRuntime.cpp ++++ b/src/hotspot/share/interpreter/interpreterRuntime.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "jvm_io.h" + #include "classfile/javaClasses.inline.hpp" +@@ -1459,7 +1465,7 @@ JRT_ENTRY(void, InterpreterRuntime::prepare_native_call(JavaThread* current, Met + // preparing the same method will be sure to see non-null entry & mirror. + JRT_END + +-#if defined(IA32) || defined(AMD64) || defined(ARM) ++#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(MIPS64) || defined(LOONGARCH64) + JRT_LEAF(void, InterpreterRuntime::popframe_move_outgoing_args(JavaThread* current, void* src_address, void* dest_address)) + if (src_address == dest_address) { + return; +diff --git a/src/hotspot/share/interpreter/interpreterRuntime.hpp b/src/hotspot/share/interpreter/interpreterRuntime.hpp +index c32431784aa..8209c42a1c4 100644 +--- a/src/hotspot/share/interpreter/interpreterRuntime.hpp ++++ b/src/hotspot/share/interpreter/interpreterRuntime.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_INTERPRETER_INTERPRETERRUNTIME_HPP + #define SHARE_INTERPRETER_INTERPRETERRUNTIME_HPP + +@@ -135,7 +141,7 @@ class InterpreterRuntime: AllStatic { + Method* method, + intptr_t* from, intptr_t* to); + +-#if defined(IA32) || defined(AMD64) || defined(ARM) ++#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(MIPS64) || defined(LOONGARCH64) + // Popframe support (only needed on x86, AMD64 and ARM) + static void popframe_move_outgoing_args(JavaThread* current, void* src_address, void* dest_address); + #endif +diff --git a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp +index 4e167ff451a..9441bae96c1 100644 +--- a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp ++++ b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP + #define SHARE_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP + +@@ -110,9 +116,9 @@ class TemplateInterpreterGenerator: public AbstractInterpreterGenerator { + + void generate_fixed_frame(bool native_call); + +-#ifdef AARCH64 ++#if defined(AARCH64) || defined(MIPS64) || defined(LOONGARCH64) + void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs); +-#endif // AARCH64 ++#endif // AARCH64 || MIPS64 || LOONGARCH64 + + #ifdef PPC + void lock_method(Register Rflags, Register Rscratch1, Register Rscratch2, bool flags_preloaded=false); +diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp +index 597ddb3800f..427a9503eaf 100644 +--- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp ++++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_JFR_UTILITIES_JFRBIGENDIAN_HPP + #define SHARE_JFR_UTILITIES_JFRBIGENDIAN_HPP + +@@ -102,7 +108,7 @@ inline T JfrBigEndian::read_unaligned(const address location) { + inline bool JfrBigEndian::platform_supports_unaligned_reads(void) { + #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390) + return true; +-#elif defined(ARM) || defined(AARCH64) || defined(RISCV) ++#elif defined(ARM) || defined(AARCH64) || defined(RISCV) || defined(MIPS) || defined(LOONGARCH) + return false; + #else + #warning "Unconfigured platform" +diff --git a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp +index 3f57d487bae..3b49daaf96e 100644 +--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp ++++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "code/codeCache.hpp" + #include "compiler/compileBroker.hpp" +@@ -755,6 +761,17 @@ + + #endif + ++#ifdef LOONGARCH64 ++ ++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) ++ ++#define DECLARE_INT_CPU_FEATURE_CONSTANT(id, name, bit) GENERATE_VM_INT_CONSTANT_ENTRY(VM_Version::CPU_##id) ++#define VM_INT_CPU_FEATURE_CONSTANTS CPU_FEATURE_FLAGS(DECLARE_INT_CPU_FEATURE_CONSTANT) ++ ++#endif ++ ++ + #ifdef X86 + + #define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ +diff --git a/src/hotspot/share/memory/metaspace.cpp b/src/hotspot/share/memory/metaspace.cpp +index 1e897615eaf..9dc65a72b1e 100644 +--- a/src/hotspot/share/memory/metaspace.cpp ++++ b/src/hotspot/share/memory/metaspace.cpp +@@ -23,6 +23,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "cds/metaspaceShared.hpp" + #include "classfile/classLoaderData.hpp" +@@ -587,12 +593,15 @@ bool Metaspace::class_space_is_initialized() { + // On error, returns an unreserved space. + ReservedSpace Metaspace::reserve_address_space_for_compressed_classes(size_t size) { + +-#if defined(AARCH64) || defined(PPC64) ++#if defined(AARCH64) || defined(PPC64) || defined(MIPS64) || defined(LOONGARCH64) + const size_t alignment = Metaspace::reserve_alignment(); + + // AArch64: Try to align metaspace class space so that we can decode a + // compressed klass with a single MOVK instruction. We can do this iff the + // compressed class base is a multiple of 4G. ++ ++ // MIPS: Cannot mmap for 1G space at 4G position, and prepare for future optimization. ++ + // Additionally, above 32G, ensure the lower LogKlassAlignmentInBytes bits + // of the upper 32-bits of the address are zero so we can handle a shift + // when decoding. +@@ -649,16 +658,16 @@ ReservedSpace Metaspace::reserve_address_space_for_compressed_classes(size_t siz + return rs; + } + } +-#endif // defined(AARCH64) || defined(PPC64) ++#endif // defined(AARCH64) || defined(PPC64) || defined(MIPS64) || defined(LOONGARCH64) + +-#ifdef AARCH64 ++#if defined(AARCH64) || defined(MIPS64) || defined(LOONGARCH64) + // Note: on AARCH64, if the code above does not find any good placement, we + // have no recourse. We return an empty space and the VM will exit. + return ReservedSpace(); + #else + // Default implementation: Just reserve anywhere. + return ReservedSpace(size, Metaspace::reserve_alignment(), os::vm_page_size(), (char*)NULL); +-#endif // AARCH64 ++#endif // defined(AARCH64) || defined(MIPS64) || defined(LOONGARCH64) + } + + #endif // _LP64 +diff --git a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp +index 8a1ed0d3160..596829c07ca 100644 +--- a/src/hotspot/share/opto/output.cpp ++++ b/src/hotspot/share/opto/output.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "asm/assembler.inline.hpp" + #include "asm/macroAssembler.inline.hpp" +@@ -1011,6 +1017,27 @@ void PhaseOutput::Process_OopMap_Node(MachNode *mach, int current_offset) { + // Add the safepoint in the DebugInfoRecorder + if( !mach->is_MachCall() ) { + mcall = nullptr; ++#if defined(MIPS) || defined(LOONGARCH) ++ // safepoint_pc_offset should point to tha last instruction in safePoint. ++ // In X86 and sparc, their safePoints only contain one instruction. ++ // However, we should add current_offset with the size of safePoint in MIPS. ++ // 0x2d6ff22c: lw s2, 0x14(s2) ++ // last_pd->pc_offset()=308, pc_offset=304, bci=64 ++ // last_pd->pc_offset()=312, pc_offset=312, bci=64 ++ // src/hotspot/share/code/debugInfoRec.cpp:295, assert(last_pd->pc_offset() == pc_offset, "must be last pc") ++ // ++ // ;; Safepoint: ++ // ---> pc_offset=304 ++ // 0x2d6ff230: lui at, 0x2b7a ; OopMap{s2=Oop s5=Oop t4=Oop off=308} ++ // ;*goto ++ // ; - java.util.Hashtable::get@64 (line 353) ++ // ---> last_pd(308) ++ // 0x2d6ff234: lw at, 0xffffc100(at) ;*goto ++ // ; - java.util.Hashtable::get@64 (line 353) ++ // ; {poll} ++ // 0x2d6ff238: addiu s0, zero, 0x0 ++ safepoint_pc_offset += sfn->size(C->regalloc()) - 4; ++#endif + C->debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map); + } else { + mcall = mach->as_MachCall(); +@@ -1686,6 +1713,22 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) { + DEBUG_ONLY(uint instr_offset = cb->insts_size()); + n->emit(*cb, C->regalloc()); + current_offset = cb->insts_size(); ++#if defined(MIPS) || defined(LOONGARCH) ++ if (!n->is_Proj() && (cb->insts()->end() != badAddress)) { ++ // For MIPS, the first instruction of the previous node (usually a instruction sequence) sometime ++ // is not the instruction which access memory. adjust is needed. previous_offset points to the ++ // instruction which access memory. Instruction size is 4. cb->insts_size() and ++ // cb->insts()->end() are the location of current instruction. ++ int adjust = 4; ++ NativeInstruction* inst = (NativeInstruction*) (cb->insts()->end() - 4); ++ if (inst->is_sync()) { ++ // a sync may be the last instruction, see store_B_immI_enc_sync ++ adjust += 4; ++ inst = (NativeInstruction*) (cb->insts()->end() - 8); ++ } ++ previous_offset = current_offset - adjust; ++ } ++#endif + + // Above we only verified that there is enough space in the instruction section. + // However, the instruction may emit stubs that cause code buffer expansion. +diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp +index a3762dc32ff..511bd7e7875 100644 +--- a/src/hotspot/share/opto/type.cpp ++++ b/src/hotspot/share/opto/type.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "ci/ciMethodData.hpp" + #include "ci/ciTypeFlow.hpp" +@@ -78,6 +84,14 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = { + { Bad, T_ILLEGAL, "vectorx:", false, 0, relocInfo::none }, // VectorX + { Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY + { Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ ++#elif defined(LOONGARCH64) ++ { Bad, T_ILLEGAL, "vectormask:", false, Op_RegVectMask, relocInfo::none }, // VectorMask. ++ { Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA. ++ { Bad, T_ILLEGAL, "vectors:", false, 0, relocInfo::none }, // VectorS ++ { Bad, T_ILLEGAL, "vectord:", false, 0, relocInfo::none }, // VectorD ++ { Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX ++ { Bad, T_ILLEGAL, "vectory:", false, Op_VecY, relocInfo::none }, // VectorY ++ { Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ + #else // all other + { Bad, T_ILLEGAL, "vectormask:", false, Op_RegVectMask, relocInfo::none }, // VectorMask. + { Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA. +diff --git a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp +index 369e5c4f5fa..f7ac74999a2 100644 +--- a/src/hotspot/share/runtime/os.cpp ++++ b/src/hotspot/share/runtime/os.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "jvm.h" + #include "classfile/javaClasses.hpp" +@@ -1235,7 +1241,8 @@ bool os::is_first_C_frame(frame* fr) { + if ((uintptr_t)fr->sender_sp() == (uintptr_t)-1 || is_pointer_bad(fr->sender_sp())) return true; + + uintptr_t old_fp = (uintptr_t)fr->link_or_null(); +- if (old_fp == 0 || old_fp == (uintptr_t)-1 || old_fp == ufp || ++ // The check for old_fp and ufp is harmful on LoongArch and MIPS due to their special ABIs. ++ if (old_fp == 0 || old_fp == (uintptr_t)-1 NOT_LOONGARCH64_AND_MIPS64(|| old_fp == ufp) || + is_pointer_bad(fr->link_or_null())) return true; + + // stack grows downwards; if old_fp is below current fp or if the stack +diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp +index 9af4b513a99..1a3e9fd0ad5 100644 +--- a/src/hotspot/share/runtime/sharedRuntime.cpp ++++ b/src/hotspot/share/runtime/sharedRuntime.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2018, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "classfile/javaClasses.hpp" + #include "jvm.h" +@@ -3054,7 +3060,7 @@ void AdapterHandlerLibrary::create_native_wrapper(const methodHandle& method) { + CodeBuffer buffer(buf); + struct { double data[20]; } locs_buf; + buffer.insts()->initialize_shared_locs((relocInfo*)&locs_buf, sizeof(locs_buf) / sizeof(relocInfo)); +-#if defined(AARCH64) ++#if defined(AARCH64) || defined(LOONGARCH64) + // On AArch64 with ZGC and nmethod entry barriers, we need all oops to be + // in the constant pool to ensure ordering between the barrier and oops + // accesses. For native_wrappers we need a constant. +diff --git a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp +index 6e3aa30b0b9..8f1d486f5cb 100644 +--- a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp ++++ b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp +@@ -22,6 +22,13 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2015, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ ++ + #include "precompiled.hpp" + #include "jni.h" + #include "runtime/interfaceSupport.inline.hpp" +@@ -507,6 +514,14 @@ static int __ieee754_rem_pio2(double x, double *y) { + * sin(x) = x + (S1*x + (x *(r-y/2)+y)) + */ + ++#if defined(MIPS)|| defined(LOONGARCH) ++#undef S1 ++#undef S2 ++#undef S3 ++#undef S4 ++#undef S5 ++#undef S6 ++#endif + static const double + S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ + S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ +diff --git a/src/hotspot/share/runtime/thread.inline.hpp b/src/hotspot/share/runtime/thread.inline.hpp +index d86fce3c8ac..71bfd4dfa19 100644 +--- a/src/hotspot/share/runtime/thread.inline.hpp ++++ b/src/hotspot/share/runtime/thread.inline.hpp +@@ -23,6 +23,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2023, These ++ * modifications are Copyright (c) 2018, 2023, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_RUNTIME_THREAD_INLINE_HPP + #define SHARE_RUNTIME_THREAD_INLINE_HPP + +@@ -132,7 +138,7 @@ inline void JavaThread::set_pending_async_exception(oop e) { + } + + inline JavaThreadState JavaThread::thread_state() const { +-#if defined(PPC64) || defined (AARCH64) || defined(RISCV64) ++#if defined(PPC64) || defined (AARCH64) || defined(RISCV64) || defined(LOONGARCH64) + // Use membars when accessing volatile _thread_state. See + // Threads::create_vm() for size checks. + return (JavaThreadState) Atomic::load_acquire((volatile jint*)&_thread_state); +@@ -144,7 +150,7 @@ inline JavaThreadState JavaThread::thread_state() const { + inline void JavaThread::set_thread_state(JavaThreadState s) { + assert(current_or_null() == NULL || current_or_null() == this, + "state change should only be called by the current thread"); +-#if defined(PPC64) || defined (AARCH64) || defined(RISCV64) ++#if defined(PPC64) || defined (AARCH64) || defined(RISCV64) || defined(LOONGARCH64) + // Use membars when accessing volatile _thread_state. See + // Threads::create_vm() for size checks. + Atomic::release_store((volatile jint*)&_thread_state, (jint)s); +diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp +index 33ecfe089f8..4d024b35735 100644 +--- a/src/hotspot/share/utilities/macros.hpp ++++ b/src/hotspot/share/utilities/macros.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_UTILITIES_MACROS_HPP + #define SHARE_UTILITIES_MACROS_HPP + +@@ -488,6 +494,38 @@ + #define NOT_S390(code) code + #endif + ++#ifdef MIPS64 ++#ifndef MIPS ++#define MIPS ++#endif ++#define MIPS64_ONLY(code) code ++#define NOT_MIPS64(code) ++#else ++#undef MIPS ++#define MIPS64_ONLY(code) ++#define NOT_MIPS64(code) code ++#endif ++ ++#ifdef LOONGARCH64 ++#ifndef LOONGARCH ++#define LOONGARCH ++#endif ++#define LOONGARCH64_ONLY(code) code ++#define NOT_LOONGARCH64(code) ++#else ++#undef LOONGARCH ++#define LOONGARCH64_ONLY(code) ++#define NOT_LOONGARCH64(code) code ++#endif ++ ++#if defined(MIPS64) || defined(LOONGARCH64) ++#define LOONGARCH64_AND_MIPS64_ONLY(code) code ++#define NOT_LOONGARCH64_AND_MIPS64(code) ++#else ++#define LOONGARCH64_AND_MIPS64_ONLY(code) ++#define NOT_LOONGARCH64_AND_MIPS64(code) code ++#endif ++ + #if defined(PPC32) || defined(PPC64) + #ifndef PPC + #define PPC +@@ -605,16 +643,34 @@ + // OS_CPU_HEADER(vmStructs) --> vmStructs_linux_x86.hpp + // + // basename.hpp / basename.inline.hpp ++#if defined(MIPS) && !defined(ZERO) ++#define CPU_HEADER_H(basename) XSTR(basename ## _mips.h) ++#define CPU_HEADER(basename) XSTR(basename ## _mips.hpp) ++#define CPU_HEADER_INLINE(basename) XSTR(basename ## _mips.inline.hpp) ++#elif defined(LOONGARCH) && !defined(ZERO) ++#define CPU_HEADER_H(basename) XSTR(basename ## _loongarch.h) ++#define CPU_HEADER(basename) XSTR(basename ## _loongarch.hpp) ++#define CPU_HEADER_INLINE(basename) XSTR(basename ## _loongarch.inline.hpp) ++#else + #define CPU_HEADER_H(basename) XSTR(CPU_HEADER_STEM(basename).h) + #define CPU_HEADER(basename) XSTR(CPU_HEADER_STEM(basename).hpp) + #define CPU_HEADER_INLINE(basename) XSTR(CPU_HEADER_STEM(basename).inline.hpp) ++#endif + // basename.hpp / basename.inline.hpp + #define OS_HEADER_H(basename) XSTR(OS_HEADER_STEM(basename).h) + #define OS_HEADER(basename) XSTR(OS_HEADER_STEM(basename).hpp) + #define OS_HEADER_INLINE(basename) XSTR(OS_HEADER_STEM(basename).inline.hpp) + // basename.hpp / basename.inline.hpp ++#if defined(MIPS) && !defined(ZERO) ++#define OS_CPU_HEADER(basename) XSTR(basename ## _linux_mips.hpp) ++#define OS_CPU_HEADER_INLINE(basename) XSTR(basename ## _linux_mips.inline.hpp) ++#elif defined(LOONGARCH) && !defined(ZERO) ++#define OS_CPU_HEADER(basename) XSTR(basename ## _linux_loongarch.hpp) ++#define OS_CPU_HEADER_INLINE(basename) XSTR(basename ## _linux_loongarch.inline.hpp) ++#else + #define OS_CPU_HEADER(basename) XSTR(OS_CPU_HEADER_STEM(basename).hpp) + #define OS_CPU_HEADER_INLINE(basename) XSTR(OS_CPU_HEADER_STEM(basename).inline.hpp) ++#endif + // basename.hpp / basename.inline.hpp + #define COMPILER_HEADER(basename) XSTR(COMPILER_HEADER_STEM(basename).hpp) + #define COMPILER_HEADER_INLINE(basename) XSTR(COMPILER_HEADER_STEM(basename).inline.hpp) +diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.cpp b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.cpp +index 9accba375a2..200bb1e82f3 100644 +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.cpp ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.cpp +@@ -23,6 +23,13 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ * ++ */ ++ + #include + #include "libproc.h" + #include "proc_service.h" +@@ -64,6 +71,10 @@ + #include "sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext.h" + #endif + ++#ifdef loongarch64 ++#include "sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext.h" ++#endif ++ + class AutoJavaString { + JNIEnv* m_env; + jstring m_str; +@@ -412,7 +423,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + return (err == PS_OK)? array : 0; + } + +-#if defined(i586) || defined(amd64) || defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64) ++#if defined(i586) || defined(amd64) || defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64) || defined(loongarch64) + extern "C" + JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0 + (JNIEnv *env, jobject this_obj, jint lwp_id) { +@@ -447,6 +458,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + #ifdef riscv64 + #define NPRGREG sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_NPRGREG + #endif ++#ifdef loongarch64 ++#define NPRGREG sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_NPRGREG ++#endif + #if defined(ppc64) || defined(ppc64le) + #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG + #endif +@@ -561,6 +575,18 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + + #endif /* riscv64 */ + ++#if defined(loongarch64) ++ ++#define REG_INDEX(reg) sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_##reg ++ ++ { ++ int i; ++ for (i = 0; i < 31; i++) ++ regs[i] = gregs.regs[i]; ++ regs[REG_INDEX(PC)] = gregs.csr_era; ++ } ++#endif /* loongarch64 */ ++ + #if defined(ppc64) || defined(ppc64le) + #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg + +diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +index a69496e77a4..64312b4705d 100644 +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +@@ -22,6 +22,13 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ * ++ */ ++ + #ifndef _LIBPROC_H_ + #define _LIBPROC_H_ + +@@ -37,7 +44,7 @@ + #include + #define user_regs_struct pt_regs + #endif +-#if defined(aarch64) || defined(arm64) ++#if defined(aarch64) || defined(arm64) || defined(loongarch64) + #include + #define user_regs_struct user_pt_regs + #elif defined(arm) +@@ -46,6 +53,10 @@ + #elif defined(riscv64) + #include + #endif ++#if defined(mips) || defined(mipsel) || defined(mips64) || defined(mips64el) ++#include ++#define user_regs_struct pt_regs ++#endif + + // This C bool type must be int for compatibility with Linux calls and + // it would be a mistake to equivalence it to C++ bool on many platforms +diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +index b5fec835a98..d991f29cbb1 100644 +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include + #include + #include +@@ -138,7 +144,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use + #define PTRACE_GETREGS_REQ PT_GETREGS + #endif + +-#ifdef PTRACE_GETREGS_REQ ++#if defined(PTRACE_GETREGS_REQ) && !defined(loongarch64) + if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) { + print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp(%d) errno(%d) \"%s\"\n", pid, + errno, strerror(errno)); +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java +index e0e9b4b6727..9af1218ed46 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java +@@ -23,6 +23,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ * ++ */ + package sun.jvm.hotspot; + + import java.rmi.RemoteException; +@@ -38,6 +44,8 @@ import sun.jvm.hotspot.debugger.MachineDescriptionPPC64; + import sun.jvm.hotspot.debugger.MachineDescriptionAArch64; + import sun.jvm.hotspot.debugger.MachineDescriptionRISCV64; + import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86; ++import sun.jvm.hotspot.debugger.MachineDescriptionMIPS64; ++import sun.jvm.hotspot.debugger.MachineDescriptionLOONGARCH64; + import sun.jvm.hotspot.debugger.NoSuchSymbolException; + import sun.jvm.hotspot.debugger.bsd.BsdDebuggerLocal; + import sun.jvm.hotspot.debugger.linux.LinuxDebuggerLocal; +@@ -572,6 +580,10 @@ public class HotSpotAgent { + machDesc = new MachineDescriptionAArch64(); + } else if (cpu.equals("riscv64")) { + machDesc = new MachineDescriptionRISCV64(); ++ } else if (cpu.equals("mips64")) { ++ machDesc = new MachineDescriptionMIPS64(); ++ } else if (cpu.equals("loongarch64")) { ++ machDesc = new MachineDescriptionLOONGARCH64(); + } else { + try { + machDesc = (MachineDescription) +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java +new file mode 100644 +index 00000000000..99cea8c7f14 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger; ++ ++public class MachineDescriptionLOONGARCH64 extends MachineDescriptionTwosComplement implements MachineDescription { ++ public long getAddressSize() { ++ return 8; ++ } ++ ++ ++ public boolean isBigEndian() { ++ return false; ++ } ++ ++ public boolean isLP64() { ++ return true; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java +new file mode 100644 +index 00000000000..1b49efd2017 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger; ++ ++public class MachineDescriptionMIPS64 extends MachineDescriptionTwosComplement implements MachineDescription { ++ public long getAddressSize() { ++ return 8; ++ } ++ ++ ++ public boolean isBigEndian() { ++ return "big".equals(System.getProperty("sun.cpu.endian")); ++ } ++ ++ public boolean isLP64() { ++ return true; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +index 469bb6e0665..ea3a118de2a 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +@@ -23,6 +23,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package sun.jvm.hotspot.debugger.linux; + + import java.io.*; +@@ -34,12 +40,16 @@ import sun.jvm.hotspot.debugger.x86.*; + import sun.jvm.hotspot.debugger.amd64.*; + import sun.jvm.hotspot.debugger.aarch64.*; + import sun.jvm.hotspot.debugger.riscv64.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; + import sun.jvm.hotspot.debugger.ppc64.*; + import sun.jvm.hotspot.debugger.linux.x86.*; + import sun.jvm.hotspot.debugger.linux.amd64.*; + import sun.jvm.hotspot.debugger.linux.ppc64.*; + import sun.jvm.hotspot.debugger.linux.aarch64.*; + import sun.jvm.hotspot.debugger.linux.riscv64.*; ++import sun.jvm.hotspot.debugger.linux.mips64.*; ++import sun.jvm.hotspot.debugger.linux.loongarch64.*; + import sun.jvm.hotspot.utilities.*; + + class LinuxCDebugger implements CDebugger { +@@ -93,7 +103,21 @@ class LinuxCDebugger implements CDebugger { + Address pc = context.getRegisterAsAddress(AMD64ThreadContext.RIP); + if (pc == null) return null; + return LinuxAMD64CFrame.getTopFrame(dbg, pc, context); +- } else if (cpu.equals("ppc64")) { ++ } else if (cpu.equals("mips64")) { ++ MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext(); ++ Address sp = context.getRegisterAsAddress(MIPS64ThreadContext.SP); ++ if (sp == null) return null; ++ Address pc = context.getRegisterAsAddress(MIPS64ThreadContext.PC); ++ if (pc == null) return null; ++ return new LinuxMIPS64CFrame(dbg, sp, pc); ++ } else if (cpu.equals("loongarch64")) { ++ LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext(); ++ Address fp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.FP); ++ if (fp == null) return null; ++ Address pc = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC); ++ if (pc == null) return null; ++ return new LinuxLOONGARCH64CFrame(dbg, fp, pc); ++ } else if (cpu.equals("ppc64")) { + PPC64ThreadContext context = (PPC64ThreadContext) thread.getContext(); + Address sp = context.getRegisterAsAddress(PPC64ThreadContext.SP); + if (sp == null) return null; +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java +index 69a34fe2afa..c21e0d6a611 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package sun.jvm.hotspot.debugger.linux; + + import java.lang.reflect.*; +@@ -29,6 +35,8 @@ import sun.jvm.hotspot.debugger.*; + import sun.jvm.hotspot.debugger.linux.amd64.*; + import sun.jvm.hotspot.debugger.linux.x86.*; + import sun.jvm.hotspot.debugger.linux.ppc64.*; ++import sun.jvm.hotspot.debugger.linux.mips64.*; ++import sun.jvm.hotspot.debugger.linux.loongarch64.*; + + class LinuxThreadContextFactory { + static ThreadContext createThreadContext(LinuxDebugger dbg) { +@@ -37,7 +45,11 @@ class LinuxThreadContextFactory { + return new LinuxX86ThreadContext(dbg); + } else if (cpu.equals("amd64")) { + return new LinuxAMD64ThreadContext(dbg); +- } else if (cpu.equals("ppc64")) { ++ } else if (cpu.equals("mips64")) { ++ return new LinuxMIPS64ThreadContext(dbg); ++ } else if (cpu.equals("loongarch64")) { ++ return new LinuxLOONGARCH64ThreadContext(dbg); ++ } else if (cpu.equals("ppc64")) { + return new LinuxPPC64ThreadContext(dbg); + } else { + try { +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java +new file mode 100644 +index 00000000000..0e6caee5a49 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java +@@ -0,0 +1,92 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.linux.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++import sun.jvm.hotspot.debugger.cdbg.basic.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++ ++final public class LinuxLOONGARCH64CFrame extends BasicCFrame { ++ // package/class internals only ++ public LinuxLOONGARCH64CFrame(LinuxDebugger dbg, Address fp, Address pc) { ++ super(dbg.getCDebugger()); ++ this.fp = fp; ++ this.pc = pc; ++ this.dbg = dbg; ++ } ++ ++ // override base class impl to avoid ELF parsing ++ public ClosestSymbol closestSymbolToPC() { ++ // try native lookup in debugger. ++ return dbg.lookup(dbg.getAddressValue(pc())); ++ } ++ ++ public Address pc() { ++ return pc; ++ } ++ ++ public Address localVariableBase() { ++ return fp; ++ } ++ ++ public CFrame sender(ThreadProxy thread) { ++ LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext(); ++ Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); ++ Address nextFP; ++ Address nextPC; ++ ++ if ((fp == null) || fp.lessThan(sp)) { ++ return null; ++ } ++ ++ try { ++ nextFP = fp.getAddressAt(-2 * ADDRESS_SIZE); ++ } catch (Exception e) { ++ return null; ++ } ++ if (nextFP == null) { ++ return null; ++ } ++ ++ try { ++ nextPC = fp.getAddressAt(-1 * ADDRESS_SIZE); ++ } catch (Exception e) { ++ return null; ++ } ++ if (nextPC == null) { ++ return null; ++ } ++ ++ return new LinuxLOONGARCH64CFrame(dbg, nextFP, nextPC); ++ } ++ ++ private static final int ADDRESS_SIZE = 8; ++ private Address pc; ++ private Address fp; ++ private LinuxDebugger dbg; ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java +new file mode 100644 +index 00000000000..604642598e0 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.linux.*; ++ ++public class LinuxLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext { ++ private LinuxDebugger debugger; ++ ++ public LinuxLOONGARCH64ThreadContext(LinuxDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java +new file mode 100644 +index 00000000000..2e3eb564da2 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java +@@ -0,0 +1,80 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.linux.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++import sun.jvm.hotspot.debugger.cdbg.basic.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++ ++final public class LinuxMIPS64CFrame extends BasicCFrame { ++ // package/class internals only ++ public LinuxMIPS64CFrame(LinuxDebugger dbg, Address ebp, Address pc) { ++ super(dbg.getCDebugger()); ++ this.ebp = ebp; ++ this.pc = pc; ++ this.dbg = dbg; ++ } ++ ++ // override base class impl to avoid ELF parsing ++ public ClosestSymbol closestSymbolToPC() { ++ // try native lookup in debugger. ++ return dbg.lookup(dbg.getAddressValue(pc())); ++ } ++ ++ public Address pc() { ++ return pc; ++ } ++ ++ public Address localVariableBase() { ++ return ebp; ++ } ++ ++ public CFrame sender(ThreadProxy thread) { ++ MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext(); ++ Address esp = context.getRegisterAsAddress(MIPS64ThreadContext.SP); ++ ++ if ( (ebp == null) || ebp.lessThan(esp) ) { ++ return null; ++ } ++ ++ Address nextEBP = ebp.getAddressAt( 0 * ADDRESS_SIZE); ++ if (nextEBP == null) { ++ return null; ++ } ++ Address nextPC = ebp.getAddressAt( 1 * ADDRESS_SIZE); ++ if (nextPC == null) { ++ return null; ++ } ++ return new LinuxMIPS64CFrame(dbg, nextEBP, nextPC); ++ } ++ ++ private static final int ADDRESS_SIZE = 4; ++ private Address pc; ++ private Address ebp; ++ private LinuxDebugger dbg; ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java +new file mode 100644 +index 00000000000..98e0f3f0bcf +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.linux.*; ++ ++public class LinuxMIPS64ThreadContext extends MIPS64ThreadContext { ++ private LinuxDebugger debugger; ++ ++ public LinuxMIPS64ThreadContext(LinuxDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java +new file mode 100644 +index 00000000000..1de3cb1a472 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java +@@ -0,0 +1,128 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.loongarch64; ++ ++import java.lang.annotation.Native; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++ ++/** Specifies the thread context on loongarch64 platforms; only a sub-portion ++ of the context is guaranteed to be present on all operating ++ systems. */ ++ ++public abstract class LOONGARCH64ThreadContext implements ThreadContext { ++ ++ // NOTE: the indices for the various registers must be maintained as ++ // listed across various operating systems. However, only a small ++ // subset of the registers' values are guaranteed to be present (and ++ // must be present for the SA's stack walking to work): EAX, EBX, ++ // ECX, EDX, ESI, EDI, EBP, ESP, and EIP. ++ ++ // One instance of the Native annotation is enough to trigger header generation ++ // for this file. ++ @Native ++ public static final int ZERO = 0; ++ public static final int RA = 1; ++ public static final int TP = 2; ++ public static final int SP = 3; ++ public static final int A0 = 4; ++ public static final int A1 = 5; ++ public static final int A2 = 6; ++ public static final int A3 = 7; ++ public static final int A4 = 8; ++ public static final int A5 = 9; ++ public static final int A6 = 10; ++ public static final int A7 = 11; ++ public static final int T0 = 12; ++ public static final int T1 = 13; ++ public static final int T2 = 14; ++ public static final int T3 = 15; ++ public static final int T4 = 16; ++ public static final int T5 = 17; ++ public static final int T6 = 18; ++ public static final int T7 = 19; ++ public static final int T8 = 20; ++ public static final int RX = 21; ++ public static final int FP = 22; ++ public static final int S0 = 23; ++ public static final int S1 = 24; ++ public static final int S2 = 25; ++ public static final int S3 = 26; ++ public static final int S4 = 27; ++ public static final int S5 = 28; ++ public static final int S6 = 29; ++ public static final int S7 = 30; ++ public static final int S8 = 31; ++ public static final int PC = 32; ++ public static final int NPRGREG = 33; ++ ++ private static final String[] regNames = { ++ "ZERO", "RA", "TP", "SP", ++ "A0", "A1", "A2", "A3", ++ "A4", "A5", "A6", "A7", ++ "T0", "T1", "T2", "T3", ++ "T4", "T5", "T6", "T7", ++ "T8", "RX", "FP", "S0", ++ "S1", "S2", "S3", "S4", ++ "S5", "S6", "S7", "S8", ++ "PC" ++ }; ++ ++ private long[] data; ++ ++ public LOONGARCH64ThreadContext() { ++ data = new long[NPRGREG]; ++ } ++ ++ public int getNumRegisters() { ++ return NPRGREG; ++ } ++ ++ public String getRegisterName(int index) { ++ return regNames[index]; ++ } ++ ++ public void setRegister(int index, long value) { ++ data[index] = value; ++ } ++ ++ public long getRegister(int index) { ++ return data[index]; ++ } ++ ++ public CFrame getTopFrame(Debugger dbg) { ++ return null; ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public abstract void setRegisterAsAddress(int index, Address value); ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public abstract Address getRegisterAsAddress(int index); ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java +new file mode 100644 +index 00000000000..d3479a65ea0 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java +@@ -0,0 +1,128 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.mips64; ++ ++import java.lang.annotation.Native; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++ ++/** Specifies the thread context on mips64 platforms; only a sub-portion ++ of the context is guaranteed to be present on all operating ++ systems. */ ++ ++public abstract class MIPS64ThreadContext implements ThreadContext { ++ ++ // NOTE: the indices for the various registers must be maintained as ++ // listed across various operating systems. However, only a small ++ // subset of the registers' values are guaranteed to be present (and ++ // must be present for the SA's stack walking to work): EAX, EBX, ++ // ECX, EDX, ESI, EDI, EBP, ESP, and EIP. ++ ++ // One instance of the Native annotation is enough to trigger header generation ++ // for this file. ++ @Native ++ public static final int ZERO = 0; ++ public static final int AT = 1; ++ public static final int V0 = 2; ++ public static final int V1 = 3; ++ public static final int A0 = 4; ++ public static final int A1 = 5; ++ public static final int A2 = 6; ++ public static final int A3 = 7; ++ public static final int T0 = 8; ++ public static final int T1 = 9; ++ public static final int T2 = 10; ++ public static final int T3 = 11; ++ public static final int T4 = 12; ++ public static final int T5 = 13; ++ public static final int T6 = 14; ++ public static final int T7 = 15; ++ public static final int S0 = 16; ++ public static final int S1 = 17; ++ public static final int S2 = 18; ++ public static final int S3 = 19; ++ public static final int S4 = 20; ++ public static final int S5 = 21; ++ public static final int S6 = 22; ++ public static final int S7 = 23; ++ public static final int T8 = 24; ++ public static final int T9 = 25; ++ public static final int K0 = 26; ++ public static final int K1 = 27; ++ public static final int GP = 28; ++ public static final int SP = 29; ++ public static final int FP = 30; ++ public static final int RA = 31; ++ public static final int PC = 32; ++ public static final int NPRGREG = 33; ++ ++ private static final String[] regNames = { ++ "ZERO", "AT", "V0", "V1", ++ "A0", "A1", "A2", "A3", ++ "T0", "T1", "T2", "T3", ++ "T4", "T5", "T6", "T7", ++ "S0", "S1", "S2", "S3", ++ "S4", "S5", "S6", "S7", ++ "T8", "T9", "K0", "K1", ++ "GP", "SP", "FP", "RA", ++ "PC" ++ }; ++ ++ private long[] data; ++ ++ public MIPS64ThreadContext() { ++ data = new long[NPRGREG]; ++ } ++ ++ public int getNumRegisters() { ++ return NPRGREG; ++ } ++ ++ public String getRegisterName(int index) { ++ return regNames[index]; ++ } ++ ++ public void setRegister(int index, long value) { ++ data[index] = value; ++ } ++ ++ public long getRegister(int index) { ++ return data[index]; ++ } ++ ++ public CFrame getTopFrame(Debugger dbg) { ++ return null; ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public abstract void setRegisterAsAddress(int index, Address value); ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public abstract Address getRegisterAsAddress(int index); ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java +index 7113a3a497b..de47531db7c 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package sun.jvm.hotspot.debugger.posix.elf; + + import java.io.FileInputStream; +@@ -63,6 +69,8 @@ public interface ELFHeader { + public static final int ARCH_i860 = 7; + /** MIPS architecture type. */ + public static final int ARCH_MIPS = 8; ++ /** LOONGARCH architecture type. */ ++ public static final int ARCH_LOONGARCH = 9; + + /** Returns a file type which is defined by the file type constants. */ + public short getFileType(); +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java +new file mode 100644 +index 00000000000..1f60fa6cfb2 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java +@@ -0,0 +1,92 @@ ++/* ++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class ProcLOONGARCH64Thread implements ThreadProxy { ++ private ProcDebugger debugger; ++ private int id; ++ ++ public ProcLOONGARCH64Thread(ProcDebugger debugger, Address addr) { ++ this.debugger = debugger; ++ ++ // FIXME: the size here should be configurable. However, making it ++ // so would produce a dependency on the "types" package from the ++ // debugger package, which is not desired. ++ this.id = (int) addr.getCIntegerAt(0, 4, true); ++ } ++ ++ public ProcLOONGARCH64Thread(ProcDebugger debugger, long id) { ++ this.debugger = debugger; ++ this.id = (int) id; ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ ProcLOONGARCH64ThreadContext context = new ProcLOONGARCH64ThreadContext(debugger); ++ long[] regs = debugger.getThreadIntegerRegisterSet(id); ++ /* ++ _NGREG in reg.h is defined to be 19. Because we have included ++ debug registers LOONGARCH64ThreadContext.NPRGREG is 25. ++ */ ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length <= LOONGARCH64ThreadContext.NPRGREG, "size of register set is greater than " + LOONGARCH64ThreadContext.NPRGREG); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++ ++ public boolean canSetContext() throws DebuggerException { ++ return false; ++ } ++ ++ public void setContext(ThreadContext context) ++ throws IllegalThreadStateException, DebuggerException { ++ throw new DebuggerException("Unimplemented"); ++ } ++ ++ public String toString() { ++ return "t@" + id; ++ } ++ ++ public boolean equals(Object obj) { ++ if ((obj == null) || !(obj instanceof ProcLOONGARCH64Thread)) { ++ return false; ++ } ++ ++ return (((ProcLOONGARCH64Thread) obj).id == id); ++ } ++ ++ public int hashCode() { ++ return id; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java +new file mode 100644 +index 00000000000..ef5597ac4e9 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext { ++ private ProcDebugger debugger; ++ ++ public ProcLOONGARCH64ThreadContext(ProcDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java +new file mode 100644 +index 00000000000..abad1bb38b7 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcLOONGARCH64ThreadFactory implements ProcThreadFactory { ++ private ProcDebugger debugger; ++ ++ public ProcLOONGARCH64ThreadFactory(ProcDebugger debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new ProcLOONGARCH64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new ProcLOONGARCH64Thread(debugger, id); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java +new file mode 100644 +index 00000000000..5c1e0be8932 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java +@@ -0,0 +1,92 @@ ++/* ++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class ProcMIPS64Thread implements ThreadProxy { ++ private ProcDebugger debugger; ++ private int id; ++ ++ public ProcMIPS64Thread(ProcDebugger debugger, Address addr) { ++ this.debugger = debugger; ++ ++ // FIXME: the size here should be configurable. However, making it ++ // so would produce a dependency on the "types" package from the ++ // debugger package, which is not desired. ++ this.id = (int) addr.getCIntegerAt(0, 4, true); ++ } ++ ++ public ProcMIPS64Thread(ProcDebugger debugger, long id) { ++ this.debugger = debugger; ++ this.id = (int) id; ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ ProcMIPS64ThreadContext context = new ProcMIPS64ThreadContext(debugger); ++ long[] regs = debugger.getThreadIntegerRegisterSet(id); ++ /* ++ _NGREG in reg.h is defined to be 19. Because we have included ++ debug registers MIPS64ThreadContext.NPRGREG is 25. ++ */ ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length <= MIPS64ThreadContext.NPRGREG, "size of register set is greater than " + MIPS64ThreadContext.NPRGREG); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++ ++ public boolean canSetContext() throws DebuggerException { ++ return false; ++ } ++ ++ public void setContext(ThreadContext context) ++ throws IllegalThreadStateException, DebuggerException { ++ throw new DebuggerException("Unimplemented"); ++ } ++ ++ public String toString() { ++ return "t@" + id; ++ } ++ ++ public boolean equals(Object obj) { ++ if ((obj == null) || !(obj instanceof ProcMIPS64Thread)) { ++ return false; ++ } ++ ++ return (((ProcMIPS64Thread) obj).id == id); ++ } ++ ++ public int hashCode() { ++ return id; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java +new file mode 100644 +index 00000000000..d44223d768a +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcMIPS64ThreadContext extends MIPS64ThreadContext { ++ private ProcDebugger debugger; ++ ++ public ProcMIPS64ThreadContext(ProcDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java +new file mode 100644 +index 00000000000..bad478fc5ca +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcMIPS64ThreadFactory implements ProcThreadFactory { ++ private ProcDebugger debugger; ++ ++ public ProcMIPS64ThreadFactory(ProcDebugger debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new ProcMIPS64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new ProcMIPS64Thread(debugger, id); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java +index 2bd396c8f4f..da89480f72c 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package sun.jvm.hotspot.debugger.remote; + + import java.rmi.*; +@@ -33,6 +39,8 @@ import sun.jvm.hotspot.debugger.cdbg.*; + import sun.jvm.hotspot.debugger.remote.x86.*; + import sun.jvm.hotspot.debugger.remote.amd64.*; + import sun.jvm.hotspot.debugger.remote.ppc64.*; ++import sun.jvm.hotspot.debugger.remote.mips64.*; ++import sun.jvm.hotspot.debugger.remote.loongarch64.*; + + /** An implementation of Debugger which wraps a + RemoteDebugger, providing remote debugging via RMI. +@@ -71,6 +79,16 @@ public class RemoteDebuggerClient extends DebuggerBase implements JVMDebugger { + cachePageSize = 4096; + cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize); + unalignedAccessesOkay = true; ++ } else if (cpu.equals("mips64") || cpu.equals("mips64el")) { ++ threadFactory = new RemoteMIPS64ThreadFactory(this); ++ cachePageSize = 4096; ++ cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize); ++ unalignedAccessesOkay = true; ++ } else if (cpu.equals("loongarch64")) { ++ threadFactory = new RemoteLOONGARCH64ThreadFactory(this); ++ cachePageSize = 4096; ++ cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize); ++ unalignedAccessesOkay = true; + } else { + try { + Class tf = Class.forName("sun.jvm.hotspot.debugger.remote." + +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java +new file mode 100644 +index 00000000000..242dd279e1a +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class RemoteLOONGARCH64Thread extends RemoteThread { ++ public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, Address addr) { ++ super(debugger, addr); ++ } ++ ++ public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, long id) { ++ super(debugger, id); ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ RemoteLOONGARCH64ThreadContext context = new RemoteLOONGARCH64ThreadContext(debugger); ++ long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : ++ debugger.getThreadIntegerRegisterSet(id); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length == LOONGARCH64ThreadContext.NPRGREG, "size of register set must match"); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java +new file mode 100644 +index 00000000000..634d5ad049f +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteLOONGARCH64ThreadContext(RemoteDebuggerClient debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java +new file mode 100644 +index 00000000000..4fb9cc7c069 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteLOONGARCH64ThreadFactory implements RemoteThreadFactory { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteLOONGARCH64ThreadFactory(RemoteDebuggerClient debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new RemoteLOONGARCH64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new RemoteLOONGARCH64Thread(debugger, id); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java +new file mode 100644 +index 00000000000..c2f7d841f20 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class RemoteMIPS64Thread extends RemoteThread { ++ public RemoteMIPS64Thread(RemoteDebuggerClient debugger, Address addr) { ++ super(debugger, addr); ++ } ++ ++ public RemoteMIPS64Thread(RemoteDebuggerClient debugger, long id) { ++ super(debugger, id); ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ RemoteMIPS64ThreadContext context = new RemoteMIPS64ThreadContext(debugger); ++ long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : ++ debugger.getThreadIntegerRegisterSet(id); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length == MIPS64ThreadContext.NPRGREG, "size of register set must match"); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java +new file mode 100644 +index 00000000000..23646905d74 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteMIPS64ThreadContext extends MIPS64ThreadContext { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteMIPS64ThreadContext(RemoteDebuggerClient debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java +new file mode 100644 +index 00000000000..b39b0144901 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteMIPS64ThreadFactory implements RemoteThreadFactory { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteMIPS64ThreadFactory(RemoteDebuggerClient debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new RemoteMIPS64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new RemoteMIPS64Thread(debugger, id); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java +index d16ac8aae51..de1e70a7290 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package sun.jvm.hotspot.runtime; + + import java.util.*; +@@ -36,6 +42,8 @@ import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_riscv64.LinuxRISCV64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess; ++import sun.jvm.hotspot.runtime.linux_mips64.LinuxMIPS64JavaThreadPDAccess; ++import sun.jvm.hotspot.runtime.linux_loongarch64.LinuxLOONGARCH64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.bsd_amd64.BsdAMD64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.bsd_aarch64.BsdAARCH64JavaThreadPDAccess; +@@ -116,6 +124,10 @@ public class Threads { + access = new LinuxAARCH64JavaThreadPDAccess(); + } else if (cpu.equals("riscv64")) { + access = new LinuxRISCV64JavaThreadPDAccess(); ++ } else if (cpu.equals("mips64")) { ++ access = new LinuxMIPS64JavaThreadPDAccess(); ++ } else if (cpu.equals("loongarch64")) { ++ access = new LinuxLOONGARCH64JavaThreadPDAccess(); + } else { + try { + access = (JavaThreadPDAccess) +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java +new file mode 100644 +index 00000000000..75d6bf2c642 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java +@@ -0,0 +1,135 @@ ++/* ++ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.linux_loongarch64; ++ ++import java.io.*; ++import java.util.*; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.runtime.loongarch64.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++import sun.jvm.hotspot.utilities.Observable; ++import sun.jvm.hotspot.utilities.Observer; ++ ++public class LinuxLOONGARCH64JavaThreadPDAccess implements JavaThreadPDAccess { ++ private static AddressField lastJavaFPField; ++ private static AddressField osThreadField; ++ ++ // Field from OSThread ++ private static CIntegerField osThreadThreadIDField; ++ ++ // This is currently unneeded but is being kept in case we change ++ // the currentFrameGuess algorithm ++ private static final long GUESS_SCAN_RANGE = 128 * 1024; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaThread"); ++ osThreadField = type.getAddressField("_osthread"); ++ ++ Type anchorType = db.lookupType("JavaFrameAnchor"); ++ lastJavaFPField = anchorType.getAddressField("_last_Java_fp"); ++ ++ Type osThreadType = db.lookupType("OSThread"); ++ osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); ++ } ++ ++ public Address getLastJavaFP(Address addr) { ++ return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset())); ++ } ++ ++ public Address getLastJavaPC(Address addr) { ++ return null; ++ } ++ ++ public Address getBaseOfStackPointer(Address addr) { ++ return null; ++ } ++ ++ public Frame getLastFramePD(JavaThread thread, Address addr) { ++ Address fp = thread.getLastJavaFP(); ++ if (fp == null) { ++ return null; // no information ++ } ++ return new LOONGARCH64Frame(thread.getLastJavaSP(), fp); ++ } ++ ++ public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { ++ return new LOONGARCH64RegisterMap(thread, updateMap); ++ } ++ ++ public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext(); ++ LOONGARCH64CurrentFrameGuess guesser = new LOONGARCH64CurrentFrameGuess(context, thread); ++ if (!guesser.run(GUESS_SCAN_RANGE)) { ++ return null; ++ } ++ if (guesser.getPC() == null) { ++ return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP()); ++ } else { ++ return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); ++ } ++ } ++ ++ public void printThreadIDOn(Address addr, PrintStream tty) { ++ tty.print(getThreadProxy(addr)); ++ } ++ ++ public void printInfoOn(Address threadAddr, PrintStream tty) { ++ tty.print("Thread id: "); ++ printThreadIDOn(threadAddr, tty); ++ // tty.println("\nPostJavaState: " + getPostJavaState(threadAddr)); ++ } ++ ++ public Address getLastSP(Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext(); ++ return context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); ++ } ++ ++ public ThreadProxy getThreadProxy(Address addr) { ++ // Addr is the address of the JavaThread. ++ // Fetch the OSThread (for now and for simplicity, not making a ++ // separate "OSThread" class in this package) ++ Address osThreadAddr = osThreadField.getValue(addr); ++ // Get the address of the _thread_id from the OSThread ++ Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); ++ ++ JVMDebugger debugger = VM.getVM().getDebugger(); ++ return debugger.getThreadForIdentifierAddress(threadIdAddr); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java +new file mode 100644 +index 00000000000..88223744932 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java +@@ -0,0 +1,135 @@ ++/* ++ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.linux_mips64; ++ ++import java.io.*; ++import java.util.*; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.runtime.mips64.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++import sun.jvm.hotspot.utilities.Observable; ++import sun.jvm.hotspot.utilities.Observer; ++ ++public class LinuxMIPS64JavaThreadPDAccess implements JavaThreadPDAccess { ++ private static AddressField osThreadField; ++ ++ // Field from OSThread ++ private static CIntegerField osThreadThreadIDField; ++ ++ // This is currently unneeded but is being kept in case we change ++ // the currentFrameGuess algorithm ++ private static final long GUESS_SCAN_RANGE = 128 * 1024; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaThread"); ++ osThreadField = type.getAddressField("_osthread"); ++ ++ Type osThreadType = db.lookupType("OSThread"); ++ osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); ++ } ++ ++ public Address getLastJavaFP(Address addr) { ++ return null; ++ } ++ ++ public Address getLastJavaPC(Address addr) { ++ return null; ++ } ++ ++ public Address getBaseOfStackPointer(Address addr) { ++ return null; ++ } ++ ++ public Frame getLastFramePD(JavaThread thread, Address addr) { ++ Address fp = thread.getLastJavaFP(); ++ if (fp == null) { ++ return null; // no information ++ } ++ return new MIPS64Frame(thread.getLastJavaSP(), fp); ++ } ++ ++ public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { ++ return new MIPS64RegisterMap(thread, updateMap); ++ } ++ ++ public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext(); ++ MIPS64CurrentFrameGuess guesser = new MIPS64CurrentFrameGuess(context, thread); ++ if (!guesser.run(GUESS_SCAN_RANGE)) { ++ return null; ++ } ++ if (guesser.getPC() == null) { ++ return new MIPS64Frame(guesser.getSP(), guesser.getFP()); ++ } else { ++ return new MIPS64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); ++ } ++ } ++ ++ public void printThreadIDOn(Address addr, PrintStream tty) { ++ tty.print(getThreadProxy(addr)); ++ } ++ ++ public void printInfoOn(Address threadAddr, PrintStream tty) { ++ tty.print("Thread id: "); ++ printThreadIDOn(threadAddr, tty); ++ // tty.println("\nPostJavaState: " + getPostJavaState(threadAddr)); ++ } ++ ++ public Address getLastSP(Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext(); ++ return context.getRegisterAsAddress(MIPS64ThreadContext.SP); ++ } ++ ++ public Address getLastFP(Address addr) { ++ return getLastSP(addr).getAddressAt(0); ++ } ++ ++ public ThreadProxy getThreadProxy(Address addr) { ++ // Addr is the address of the JavaThread. ++ // Fetch the OSThread (for now and for simplicity, not making a ++ // separate "OSThread" class in this package) ++ Address osThreadAddr = osThreadField.getValue(addr); ++ // Get the address of the _thread_id from the OSThread ++ Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); ++ ++ JVMDebugger debugger = VM.getVM().getDebugger(); ++ return debugger.getThreadForIdentifierAddress(threadIdAddr); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java +new file mode 100644 +index 00000000000..824270e1329 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java +@@ -0,0 +1,250 @@ ++/* ++ * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.interpreter.*; ++import sun.jvm.hotspot.runtime.*; ++ ++/**

Should be able to be used on all loongarch64 platforms we support ++ (Win32, Solaris/loongarch64, and soon Linux) to implement JavaThread's ++ "currentFrameGuess()" functionality. Input is an LOONGARCH64ThreadContext; ++ output is SP, FP, and PC for an LOONGARCH64Frame. Instantiation of the ++ LOONGARCH64Frame is left to the caller, since we may need to subclass ++ LOONGARCH64Frame to support signal handler frames on Unix platforms.

++ ++

Algorithm is to walk up the stack within a given range (say, ++ 512K at most) looking for a plausible PC and SP for a Java frame, ++ also considering those coming in from the context. If we find a PC ++ that belongs to the VM (i.e., in generated code like the ++ interpreter or CodeCache) then we try to find an associated EBP. ++ We repeat this until we either find a complete frame or run out of ++ stack to look at.

*/ ++ ++public class LOONGARCH64CurrentFrameGuess { ++ private LOONGARCH64ThreadContext context; ++ private JavaThread thread; ++ private Address spFound; ++ private Address fpFound; ++ private Address pcFound; ++ ++ private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG") ++ != null; ++ ++ public LOONGARCH64CurrentFrameGuess(LOONGARCH64ThreadContext context, ++ JavaThread thread) { ++ this.context = context; ++ this.thread = thread; ++ } ++ ++ /** Returns false if not able to find a frame within a reasonable range. */ ++ public boolean run(long regionInBytesToSearch) { ++ Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); ++ Address pc = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC); ++ Address fp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.FP); ++ if (sp == null) { ++ // Bail out if no last java frame eithe ++ if (thread.getLastJavaSP() != null) { ++ setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); ++ return true; ++ } ++ // Bail out ++ return false; ++ } ++ Address end = sp.addOffsetTo(regionInBytesToSearch); ++ VM vm = VM.getVM(); ++ ++ setValues(null, null, null); // Assume we're not going to find anything ++ ++ if (vm.isJavaPCDbg(pc)) { ++ if (vm.isClientCompiler()) { ++ // If the topmost frame is a Java frame, we are (pretty much) ++ // guaranteed to have a viable EBP. We should be more robust ++ // than this (we have the potential for losing entire threads' ++ // stack traces) but need to see how much work we really have ++ // to do here. Searching the stack for an (SP, FP) pair is ++ // hard since it's easy to misinterpret inter-frame stack ++ // pointers as base-of-frame pointers; we also don't know the ++ // sizes of C1 frames (not registered in the nmethod) so can't ++ // derive them from ESP. ++ ++ setValues(sp, fp, pc); ++ return true; ++ } else { ++ if (vm.getInterpreter().contains(pc)) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + ++ sp + ", fp = " + fp + ", pc = " + pc); ++ } ++ setValues(sp, fp, pc); ++ return true; ++ } ++ ++ // For the server compiler, EBP is not guaranteed to be valid ++ // for compiled code. In addition, an earlier attempt at a ++ // non-searching algorithm (see below) failed because the ++ // stack pointer from the thread context was pointing ++ // (considerably) beyond the ostensible end of the stack, into ++ // garbage; walking from the topmost frame back caused a crash. ++ // ++ // This algorithm takes the current PC as a given and tries to ++ // find the correct corresponding SP by walking up the stack ++ // and repeatedly performing stackwalks (very inefficient). ++ // ++ // FIXME: there is something wrong with stackwalking across ++ // adapter frames...this is likely to be the root cause of the ++ // failure with the simpler algorithm below. ++ ++ for (long offset = 0; ++ offset < regionInBytesToSearch; ++ offset += vm.getAddressSize()) { ++ try { ++ Address curSP = sp.addOffsetTo(offset); ++ Frame frame = new LOONGARCH64Frame(curSP, null, pc); ++ RegisterMap map = thread.newRegisterMap(false); ++ while (frame != null) { ++ if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { ++ // We were able to traverse all the way to the ++ // bottommost Java frame. ++ // This sp looks good. Keep it. ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); ++ } ++ setValues(curSP, null, pc); ++ return true; ++ } ++ frame = frame.sender(map); ++ } ++ } catch (Exception e) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); ++ } ++ // Bad SP. Try another. ++ } ++ } ++ ++ // Were not able to find a plausible SP to go with this PC. ++ // Bail out. ++ return false; ++ ++ /* ++ // Original algorithm which does not work because SP was ++ // pointing beyond where it should have: ++ ++ // For the server compiler, EBP is not guaranteed to be valid ++ // for compiled code. We see whether the PC is in the ++ // interpreter and take care of that, otherwise we run code ++ // (unfortunately) duplicated from LOONGARCH64Frame.senderForCompiledFrame. ++ ++ CodeCache cc = vm.getCodeCache(); ++ if (cc.contains(pc)) { ++ CodeBlob cb = cc.findBlob(pc); ++ ++ // See if we can derive a frame pointer from SP and PC ++ // NOTE: This is the code duplicated from LOONGARCH64Frame ++ Address saved_fp = null; ++ int llink_offset = cb.getLinkOffset(); ++ if (llink_offset >= 0) { ++ // Restore base-pointer, since next frame might be an interpreter frame. ++ Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset); ++ saved_fp = fp_addr.getAddressAt(0); ++ } ++ ++ setValues(sp, saved_fp, pc); ++ return true; ++ } ++ */ ++ } ++ } else { ++ // If the current program counter was not known to us as a Java ++ // PC, we currently assume that we are in the run-time system ++ // and attempt to look to thread-local storage for saved ESP and ++ // EBP. Note that if these are null (because we were, in fact, ++ // in Java code, i.e., vtable stubs or similar, and the SA ++ // didn't have enough insight into the target VM to understand ++ // that) then we are going to lose the entire stack trace for ++ // the thread, which is sub-optimal. FIXME. ++ ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + ++ thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); ++ } ++ if (thread.getLastJavaSP() == null) { ++ return false; // No known Java frames on stack ++ } ++ ++ // The runtime has a nasty habit of not saving fp in the frame ++ // anchor, leaving us to grovel about in the stack to find a ++ // plausible address. Fortunately, this only happens in ++ // compiled code; there we always have a valid PC, and we always ++ // push LR and FP onto the stack as a pair, with FP at the lower ++ // address. ++ pc = thread.getLastJavaPC(); ++ fp = thread.getLastJavaFP(); ++ sp = thread.getLastJavaSP(); ++ ++ if (fp == null) { ++ CodeCache cc = vm.getCodeCache(); ++ if (cc.contains(pc)) { ++ CodeBlob cb = cc.findBlob(pc); ++ if (DEBUG) { ++ System.out.println("FP is null. Found blob frame size " + cb.getFrameSize()); ++ } ++ // See if we can derive a frame pointer from SP and PC ++ long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize(); ++ if (link_offset >= 0) { ++ fp = sp.addOffsetTo(link_offset); ++ } ++ } ++ } ++ ++ // We found a PC in the frame anchor. Check that it's plausible, and ++ // if it is, use it. ++ if (vm.isJavaPCDbg(pc)) { ++ setValues(sp, fp, pc); ++ } else { ++ setValues(sp, fp, null); ++ } ++ ++ return true; ++ } ++ } ++ ++ public Address getSP() { return spFound; } ++ public Address getFP() { return fpFound; } ++ /** May be null if getting values from thread-local storage; take ++ care to call the correct LOONGARCH64Frame constructor to recover this if ++ necessary */ ++ public Address getPC() { return pcFound; } ++ ++ private void setValues(Address sp, Address fp, Address pc) { ++ spFound = sp; ++ fpFound = fp; ++ pcFound = pc; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java +new file mode 100644 +index 00000000000..576654594d8 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java +@@ -0,0 +1,519 @@ ++/* ++ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.loongarch64; ++ ++import java.util.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.compiler.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.oops.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++import sun.jvm.hotspot.utilities.Observable; ++import sun.jvm.hotspot.utilities.Observer; ++ ++/** Specialization of and implementation of abstract methods of the ++ Frame class for the loongarch64 family of CPUs. */ ++ ++public class LOONGARCH64Frame extends Frame { ++ private static final boolean DEBUG; ++ static { ++ DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG") != null; ++ } ++ ++ private static final int LINK_OFFSET = -2; ++ private static final int RETURN_ADDR_OFFSET = -1; ++ private static final int SENDER_SP_OFFSET = 0; ++ ++ // Interpreter frames ++ private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -3; ++ private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_MIRROR_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_MIRROR_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ private static final int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ ++ // Entry frames ++ private static final int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -11; ++ ++ private static VMReg fp = new VMReg(22 << 1); ++ ++ // an additional field beyond sp and pc: ++ Address raw_fp; // frame pointer ++ private Address raw_unextendedSP; ++ ++ private LOONGARCH64Frame() { ++ } ++ ++ private void adjustForDeopt() { ++ if ( pc != null) { ++ // Look for a deopt pc and if it is deopted convert to original pc ++ CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); ++ if (cb != null && cb.isJavaMethod()) { ++ NMethod nm = (NMethod) cb; ++ if (pc.equals(nm.deoptHandlerBegin())) { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); ++ } ++ // adjust pc if frame is deoptimized. ++ pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); ++ deoptimized = true; ++ } ++ } ++ } ++ } ++ ++ public LOONGARCH64Frame(Address raw_sp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("LOONGARCH64Frame(sp, fp, pc): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public LOONGARCH64Frame(Address raw_sp, Address raw_fp) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ Address savedPC = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize()); ++ ++ if (VM.getVM().isJavaPCDbg(savedPC)) { ++ this.pc = savedPC; ++ } ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("LOONGARCH64Frame(sp, fp): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public LOONGARCH64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_unextendedSp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("LOONGARCH64Frame(sp, unextendedSP, fp, pc): " + this); ++ dumpStack(); ++ } ++ ++ } ++ ++ public Object clone() { ++ LOONGARCH64Frame frame = new LOONGARCH64Frame(); ++ frame.raw_sp = raw_sp; ++ frame.raw_unextendedSP = raw_unextendedSP; ++ frame.raw_fp = raw_fp; ++ frame.pc = pc; ++ frame.deoptimized = deoptimized; ++ return frame; ++ } ++ ++ public boolean equals(Object arg) { ++ if (arg == null) { ++ return false; ++ } ++ ++ if (!(arg instanceof LOONGARCH64Frame)) { ++ return false; ++ } ++ ++ LOONGARCH64Frame other = (LOONGARCH64Frame) arg; ++ ++ return (AddressOps.equal(getSP(), other.getSP()) && ++ AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && ++ AddressOps.equal(getFP(), other.getFP()) && ++ AddressOps.equal(getPC(), other.getPC())); ++ } ++ ++ public int hashCode() { ++ if (raw_sp == null) { ++ return 0; ++ } ++ ++ return raw_sp.hashCode(); ++ } ++ ++ public String toString() { ++ return "sp: " + (getSP() == null? "null" : getSP().toString()) + ++ ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + ++ ", fp: " + (getFP() == null? "null" : getFP().toString()) + ++ ", pc: " + (pc == null? "null" : pc.toString()); ++ } ++ ++ // accessors for the instance variables ++ public Address getFP() { return raw_fp; } ++ public Address getSP() { return raw_sp; } ++ public Address getID() { return raw_sp; } ++ ++ // FIXME: not implemented yet (should be done for Solaris/LOONGARCH) ++ public boolean isSignalHandlerFrameDbg() { return false; } ++ public int getSignalNumberDbg() { return 0; } ++ public String getSignalNameDbg() { return null; } ++ ++ public boolean isInterpretedFrameValid() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "Not an interpreted frame"); ++ } ++ ++ // These are reasonable sanity checks ++ if (getFP() == null || getFP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getSP() == null || getSP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { ++ return false; ++ } ++ ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (getFP().lessThanOrEqual(getSP())) { ++ // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { ++ // stack frames shouldn't be large. ++ return false; ++ } ++ ++ return true; ++ } ++ ++ // FIXME: not applicable in current system ++ // void patch_pc(Thread* thread, address pc); ++ ++ public Frame sender(RegisterMap regMap, CodeBlob cb) { ++ LOONGARCH64RegisterMap map = (LOONGARCH64RegisterMap) regMap; ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ map.setIncludeArgumentOops(false); ++ ++ if (isEntryFrame()) return senderForEntryFrame(map); ++ if (isInterpretedFrame()) return senderForInterpreterFrame(map); ++ ++ if(cb == null) { ++ cb = VM.getVM().getCodeCache().findBlob(getPC()); ++ } else { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); ++ } ++ } ++ ++ if (cb != null) { ++ return senderForCompiledFrame(map, cb); ++ } ++ ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return new LOONGARCH64Frame(getSenderSP(), getLink(), getSenderPC()); ++ } ++ ++ private Frame senderForEntryFrame(LOONGARCH64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForEntryFrame"); ++ } ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ LOONGARCH64JavaCallWrapper jcw = (LOONGARCH64JavaCallWrapper) getEntryFrameCallWrapper(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); ++ Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); ++ } ++ LOONGARCH64Frame fr; ++ if (jcw.getLastJavaPC() != null) { ++ fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); ++ } else { ++ fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); ++ } ++ map.clear(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); ++ } ++ return fr; ++ } ++ ++ //------------------------------------------------------------------------------ ++ // frame::adjust_unextended_sp ++ private void adjustUnextendedSP() { ++ // On loongarch, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. ++ ++ CodeBlob cb = cb(); ++ NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); ++ if (senderNm != null) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (senderNm.isDeoptEntry(getPC()) || ++ senderNm.isDeoptMhEntry(getPC())) { ++ // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp)); ++ } ++ } ++ } ++ ++ private Frame senderForInterpreterFrame(LOONGARCH64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForInterpreterFrame"); ++ } ++ Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ Address sp = getSenderSP(); ++ // We do not need to update the callee-save register mapping because above ++ // us is either another interpreter frame or a converter-frame, but never ++ // directly a compiled frame. ++ // 11/24/04 SFG. With the removal of adapter frames this is no longer true. ++ // However c2 no longer uses callee save register for java calls so there ++ // are no callee register to find. ++ ++ if (map.getUpdateMap()) ++ updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET)); ++ ++ return new LOONGARCH64Frame(sp, unextendedSP, getLink(), getSenderPC()); ++ } ++ ++ private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { ++ map.setLocation(fp, savedFPAddr); ++ } ++ ++ private Frame senderForCompiledFrame(LOONGARCH64RegisterMap map, CodeBlob cb) { ++ if (DEBUG) { ++ System.out.println("senderForCompiledFrame"); ++ } ++ ++ // ++ // NOTE: some of this code is (unfortunately) duplicated in LOONGARCH64CurrentFrameGuess ++ // ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // frame owned by optimizing compiler ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); ++ } ++ Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); ++ ++ // On Intel the return_address is always the word on the stack ++ Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); ++ ++ // This is the saved value of EBP which may or may not really be an FP. ++ // It is only an FP if the sender is an interpreter frame (or C1?). ++ Address savedFPAddr = senderSP.addOffsetTo(-2 * VM.getVM().getAddressSize()); ++ ++ if (map.getUpdateMap()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map.setIncludeArgumentOops(cb.callerMustGCArguments()); ++ ++ if (cb.getOopMaps() != null) { ++ ImmutableOopMapSet.updateRegisterMap(this, cb, map, true); ++ } ++ ++ // Since the prolog does the save and restore of EBP there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ updateMapWithSavedLink(map, savedFPAddr); ++ } ++ ++ return new LOONGARCH64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); ++ } ++ ++ protected boolean hasSenderPD() { ++ // FIXME ++ // Check for null ebp? Need to do some tests. ++ return true; ++ } ++ ++ public long frameSize() { ++ return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); ++ } ++ ++ public Address getLink() { ++ return addressOfStackSlot(LINK_OFFSET).getAddressAt(0); ++ } ++ ++ public Address getUnextendedSP() { return raw_unextendedSP; } ++ ++ // Return address: ++ public Address getSenderPCAddr() { ++ return addressOfStackSlot(RETURN_ADDR_OFFSET); ++ } ++ ++ public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } ++ ++ public Address getSenderSP() { ++ return addressOfStackSlot(SENDER_SP_OFFSET); ++ } ++ ++ public Address addressOfInterpreterFrameLocals() { ++ return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); ++ } ++ ++ private Address addressOfInterpreterFrameBCX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); ++ } ++ ++ public int getInterpreterFrameBCI() { ++ // FIXME: this is not atomic with respect to GC and is unsuitable ++ // for use in a non-debugging, or reflective, system. Need to ++ // figure out how to express this. ++ Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); ++ Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); ++ Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); ++ return bcpToBci(bcp, method); ++ } ++ ++ public Address addressOfInterpreterFrameMDX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); ++ } ++ ++ // FIXME ++ //inline int frame::interpreter_frame_monitor_size() { ++ // return BasicObjectLock::size(); ++ //} ++ ++ // expression stack ++ // (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++ public Address addressOfInterpreterFrameExpressionStack() { ++ Address monitorEnd = interpreterFrameMonitorEnd().address(); ++ return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); ++ } ++ ++ public int getInterpreterFrameExpressionStackDirection() { return -1; } ++ ++ // top of expression stack ++ public Address addressOfInterpreterFrameTOS() { ++ return getSP(); ++ } ++ ++ /** Expression stack from top down */ ++ public Address addressOfInterpreterFrameTOSAt(int slot) { ++ return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); ++ } ++ ++ public Address getInterpreterFrameSenderSP() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "interpreted frame expected"); ++ } ++ return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ } ++ ++ // Monitors ++ public BasicObjectLock interpreterFrameMonitorBegin() { ++ return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); ++ } ++ ++ public BasicObjectLock interpreterFrameMonitorEnd() { ++ Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); ++ if (Assert.ASSERTS_ENABLED) { ++ // make sure the pointer points inside the frame ++ Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); ++ Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); ++ } ++ return new BasicObjectLock(result); ++ } ++ ++ public int interpreterFrameMonitorSize() { ++ return BasicObjectLock.size(); ++ } ++ ++ // Method ++ public Address addressOfInterpreterFrameMethod() { ++ return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); ++ } ++ ++ // Constant pool cache ++ public Address addressOfInterpreterFrameCPCache() { ++ return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); ++ } ++ ++ // Entry frames ++ public JavaCallWrapper getEntryFrameCallWrapper() { ++ return new LOONGARCH64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); ++ } ++ ++ protected Address addressOfSavedOopResult() { ++ // offset is 2 for compiler2 and 3 for compiler1 ++ return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * ++ VM.getVM().getAddressSize()); ++ } ++ ++ protected Address addressOfSavedReceiver() { ++ return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); ++ } ++ ++ private void dumpStack() { ++ if (getFP() != null) { ++ for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); ++ AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize())); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ } else { ++ for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); ++ AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ } ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java +new file mode 100644 +index 00000000000..0ad9573a42d +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java +@@ -0,0 +1,59 @@ ++/* ++ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.loongarch64; ++ ++import java.util.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.utilities.Observable; ++import sun.jvm.hotspot.utilities.Observer; ++ ++public class LOONGARCH64JavaCallWrapper extends JavaCallWrapper { ++ private static AddressField lastJavaFPField; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaFrameAnchor"); ++ ++ lastJavaFPField = type.getAddressField("_last_Java_fp"); ++ } ++ ++ public LOONGARCH64JavaCallWrapper(Address addr) { ++ super(addr); ++ } ++ ++ public Address getLastJavaFP() { ++ return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java +new file mode 100644 +index 00000000000..2cf904d3885 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java +@@ -0,0 +1,52 @@ ++/* ++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class LOONGARCH64RegisterMap extends RegisterMap { ++ ++ /** This is the only public constructor */ ++ public LOONGARCH64RegisterMap(JavaThread thread, boolean updateMap) { ++ super(thread, updateMap); ++ } ++ ++ protected LOONGARCH64RegisterMap(RegisterMap map) { ++ super(map); ++ } ++ ++ public Object clone() { ++ LOONGARCH64RegisterMap retval = new LOONGARCH64RegisterMap(this); ++ return retval; ++ } ++ ++ // no PD state to clear or copy: ++ protected void clearPD() {} ++ protected void initializePD() {} ++ protected void initializeFromPD(RegisterMap map) {} ++ protected Address getLocationPD(VMReg reg) { return null; } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java +new file mode 100644 +index 00000000000..c11458abe2c +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java +@@ -0,0 +1,217 @@ ++/* ++ * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.interpreter.*; ++import sun.jvm.hotspot.runtime.*; ++ ++/**

Should be able to be used on all mips64 platforms we support ++ (Win32, Solaris/mips64, and soon Linux) to implement JavaThread's ++ "currentFrameGuess()" functionality. Input is an MIPS64ThreadContext; ++ output is SP, FP, and PC for an MIPS64Frame. Instantiation of the ++ MIPS64Frame is left to the caller, since we may need to subclass ++ MIPS64Frame to support signal handler frames on Unix platforms.

++ ++

Algorithm is to walk up the stack within a given range (say, ++ 512K at most) looking for a plausible PC and SP for a Java frame, ++ also considering those coming in from the context. If we find a PC ++ that belongs to the VM (i.e., in generated code like the ++ interpreter or CodeCache) then we try to find an associated EBP. ++ We repeat this until we either find a complete frame or run out of ++ stack to look at.

*/ ++ ++public class MIPS64CurrentFrameGuess { ++ private MIPS64ThreadContext context; ++ private JavaThread thread; ++ private Address spFound; ++ private Address fpFound; ++ private Address pcFound; ++ ++ private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG") ++ != null; ++ ++ public MIPS64CurrentFrameGuess(MIPS64ThreadContext context, ++ JavaThread thread) { ++ this.context = context; ++ this.thread = thread; ++ } ++ ++ /** Returns false if not able to find a frame within a reasonable range. */ ++ public boolean run(long regionInBytesToSearch) { ++ Address sp = context.getRegisterAsAddress(MIPS64ThreadContext.SP); ++ Address pc = context.getRegisterAsAddress(MIPS64ThreadContext.PC); ++ Address fp = context.getRegisterAsAddress(MIPS64ThreadContext.FP); ++ if (sp == null) { ++ // Bail out if no last java frame eithe ++ if (thread.getLastJavaSP() != null) { ++ setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); ++ return true; ++ } ++ // Bail out ++ return false; ++ } ++ Address end = sp.addOffsetTo(regionInBytesToSearch); ++ VM vm = VM.getVM(); ++ ++ setValues(null, null, null); // Assume we're not going to find anything ++ ++ if (vm.isJavaPCDbg(pc)) { ++ if (vm.isClientCompiler()) { ++ // If the topmost frame is a Java frame, we are (pretty much) ++ // guaranteed to have a viable EBP. We should be more robust ++ // than this (we have the potential for losing entire threads' ++ // stack traces) but need to see how much work we really have ++ // to do here. Searching the stack for an (SP, FP) pair is ++ // hard since it's easy to misinterpret inter-frame stack ++ // pointers as base-of-frame pointers; we also don't know the ++ // sizes of C1 frames (not registered in the nmethod) so can't ++ // derive them from ESP. ++ ++ setValues(sp, fp, pc); ++ return true; ++ } else { ++ if (vm.getInterpreter().contains(pc)) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + ++ sp + ", fp = " + fp + ", pc = " + pc); ++ } ++ setValues(sp, fp, pc); ++ return true; ++ } ++ ++ // For the server compiler, EBP is not guaranteed to be valid ++ // for compiled code. In addition, an earlier attempt at a ++ // non-searching algorithm (see below) failed because the ++ // stack pointer from the thread context was pointing ++ // (considerably) beyond the ostensible end of the stack, into ++ // garbage; walking from the topmost frame back caused a crash. ++ // ++ // This algorithm takes the current PC as a given and tries to ++ // find the correct corresponding SP by walking up the stack ++ // and repeatedly performing stackwalks (very inefficient). ++ // ++ // FIXME: there is something wrong with stackwalking across ++ // adapter frames...this is likely to be the root cause of the ++ // failure with the simpler algorithm below. ++ ++ for (long offset = 0; ++ offset < regionInBytesToSearch; ++ offset += vm.getAddressSize()) { ++ try { ++ Address curSP = sp.addOffsetTo(offset); ++ Frame frame = new MIPS64Frame(curSP, null, pc); ++ RegisterMap map = thread.newRegisterMap(false); ++ while (frame != null) { ++ if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { ++ // We were able to traverse all the way to the ++ // bottommost Java frame. ++ // This sp looks good. Keep it. ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); ++ } ++ setValues(curSP, null, pc); ++ return true; ++ } ++ frame = frame.sender(map); ++ } ++ } catch (Exception e) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); ++ } ++ // Bad SP. Try another. ++ } ++ } ++ ++ // Were not able to find a plausible SP to go with this PC. ++ // Bail out. ++ return false; ++ ++ /* ++ // Original algorithm which does not work because SP was ++ // pointing beyond where it should have: ++ ++ // For the server compiler, EBP is not guaranteed to be valid ++ // for compiled code. We see whether the PC is in the ++ // interpreter and take care of that, otherwise we run code ++ // (unfortunately) duplicated from MIPS64Frame.senderForCompiledFrame. ++ ++ CodeCache cc = vm.getCodeCache(); ++ if (cc.contains(pc)) { ++ CodeBlob cb = cc.findBlob(pc); ++ ++ // See if we can derive a frame pointer from SP and PC ++ // NOTE: This is the code duplicated from MIPS64Frame ++ Address saved_fp = null; ++ int llink_offset = cb.getLinkOffset(); ++ if (llink_offset >= 0) { ++ // Restore base-pointer, since next frame might be an interpreter frame. ++ Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset); ++ saved_fp = fp_addr.getAddressAt(0); ++ } ++ ++ setValues(sp, saved_fp, pc); ++ return true; ++ } ++ */ ++ } ++ } else { ++ // If the current program counter was not known to us as a Java ++ // PC, we currently assume that we are in the run-time system ++ // and attempt to look to thread-local storage for saved ESP and ++ // EBP. Note that if these are null (because we were, in fact, ++ // in Java code, i.e., vtable stubs or similar, and the SA ++ // didn't have enough insight into the target VM to understand ++ // that) then we are going to lose the entire stack trace for ++ // the thread, which is sub-optimal. FIXME. ++ ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + ++ thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); ++ } ++ if (thread.getLastJavaSP() == null) { ++ return false; // No known Java frames on stack ++ } ++ setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); ++ return true; ++ } ++ } ++ ++ public Address getSP() { return spFound; } ++ public Address getFP() { return fpFound; } ++ /** May be null if getting values from thread-local storage; take ++ care to call the correct MIPS64Frame constructor to recover this if ++ necessary */ ++ public Address getPC() { return pcFound; } ++ ++ private void setValues(Address sp, Address fp, Address pc) { ++ spFound = sp; ++ fpFound = fp; ++ pcFound = pc; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java +new file mode 100644 +index 00000000000..e11d64737dd +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java +@@ -0,0 +1,539 @@ ++/* ++ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.mips64; ++ ++import java.util.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.compiler.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.oops.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++import sun.jvm.hotspot.utilities.Observable; ++import sun.jvm.hotspot.utilities.Observer; ++ ++/** Specialization of and implementation of abstract methods of the ++ Frame class for the mips64 family of CPUs. */ ++ ++public class MIPS64Frame extends Frame { ++ private static final boolean DEBUG; ++ static { ++ DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG") != null; ++ } ++ ++ // All frames ++ private static final int LINK_OFFSET = 0; ++ private static final int RETURN_ADDR_OFFSET = 1; ++ private static final int SENDER_SP_OFFSET = 2; ++ ++ // Interpreter frames ++ private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1; ++ private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; ++ private static int INTERPRETER_FRAME_MIRROR_OFFSET; ++ private static int INTERPRETER_FRAME_MDX_OFFSET; // Non-core builds only ++ private static int INTERPRETER_FRAME_CACHE_OFFSET; ++ private static int INTERPRETER_FRAME_LOCALS_OFFSET; ++ private static int INTERPRETER_FRAME_BCX_OFFSET; ++ private static int INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ private static int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET; ++ private static int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET; ++ ++ // Entry frames ++ private static int ENTRY_FRAME_CALL_WRAPPER_OFFSET; ++ ++ private static VMReg rbp; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ INTERPRETER_FRAME_MIRROR_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1; ++ INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_MIRROR_OFFSET - 1; ++ INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; ++ INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; ++ INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; ++ INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; ++ INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ ++ ENTRY_FRAME_CALL_WRAPPER_OFFSET = db.lookupIntConstant("frame::entry_frame_call_wrapper_offset"); ++ if (VM.getVM().getAddressSize() == 4) { ++ rbp = new VMReg(5); ++ } else { ++ rbp = new VMReg(5 << 1); ++ } ++ } ++ ++ ++ // an additional field beyond sp and pc: ++ Address raw_fp; // frame pointer ++ private Address raw_unextendedSP; ++ ++ private MIPS64Frame() { ++ } ++ ++ private void adjustForDeopt() { ++ if ( pc != null) { ++ // Look for a deopt pc and if it is deopted convert to original pc ++ CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); ++ if (cb != null && cb.isJavaMethod()) { ++ NMethod nm = (NMethod) cb; ++ if (pc.equals(nm.deoptHandlerBegin())) { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); ++ } ++ // adjust pc if frame is deoptimized. ++ pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); ++ deoptimized = true; ++ } ++ } ++ } ++ } ++ ++ public MIPS64Frame(Address raw_sp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("MIPS64Frame(sp, fp, pc): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public MIPS64Frame(Address raw_sp, Address raw_fp) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ this.pc = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize()); ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("MIPS64Frame(sp, fp): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public MIPS64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_unextendedSp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("MIPS64Frame(sp, unextendedSP, fp, pc): " + this); ++ dumpStack(); ++ } ++ ++ } ++ ++ public Object clone() { ++ MIPS64Frame frame = new MIPS64Frame(); ++ frame.raw_sp = raw_sp; ++ frame.raw_unextendedSP = raw_unextendedSP; ++ frame.raw_fp = raw_fp; ++ frame.pc = pc; ++ frame.deoptimized = deoptimized; ++ return frame; ++ } ++ ++ public boolean equals(Object arg) { ++ if (arg == null) { ++ return false; ++ } ++ ++ if (!(arg instanceof MIPS64Frame)) { ++ return false; ++ } ++ ++ MIPS64Frame other = (MIPS64Frame) arg; ++ ++ return (AddressOps.equal(getSP(), other.getSP()) && ++ AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && ++ AddressOps.equal(getFP(), other.getFP()) && ++ AddressOps.equal(getPC(), other.getPC())); ++ } ++ ++ public int hashCode() { ++ if (raw_sp == null) { ++ return 0; ++ } ++ ++ return raw_sp.hashCode(); ++ } ++ ++ public String toString() { ++ return "sp: " + (getSP() == null? "null" : getSP().toString()) + ++ ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + ++ ", fp: " + (getFP() == null? "null" : getFP().toString()) + ++ ", pc: " + (pc == null? "null" : pc.toString()); ++ } ++ ++ // accessors for the instance variables ++ public Address getFP() { return raw_fp; } ++ public Address getSP() { return raw_sp; } ++ public Address getID() { return raw_sp; } ++ ++ // FIXME: not implemented yet (should be done for Solaris/MIPS) ++ public boolean isSignalHandlerFrameDbg() { return false; } ++ public int getSignalNumberDbg() { return 0; } ++ public String getSignalNameDbg() { return null; } ++ ++ public boolean isInterpretedFrameValid() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "Not an interpreted frame"); ++ } ++ ++ // These are reasonable sanity checks ++ if (getFP() == null || getFP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getSP() == null || getSP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { ++ return false; ++ } ++ ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (getFP().lessThanOrEqual(getSP())) { ++ // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { ++ // stack frames shouldn't be large. ++ return false; ++ } ++ ++ return true; ++ } ++ ++ // FIXME: not applicable in current system ++ // void patch_pc(Thread* thread, address pc); ++ ++ public Frame sender(RegisterMap regMap, CodeBlob cb) { ++ MIPS64RegisterMap map = (MIPS64RegisterMap) regMap; ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ map.setIncludeArgumentOops(false); ++ ++ if (isEntryFrame()) return senderForEntryFrame(map); ++ if (isInterpretedFrame()) return senderForInterpreterFrame(map); ++ ++ if(cb == null) { ++ cb = VM.getVM().getCodeCache().findBlob(getPC()); ++ } else { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); ++ } ++ } ++ ++ if (cb != null) { ++ return senderForCompiledFrame(map, cb); ++ } ++ ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return new MIPS64Frame(getSenderSP(), getLink(), getSenderPC()); ++ } ++ ++ private Frame senderForEntryFrame(MIPS64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForEntryFrame"); ++ } ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ MIPS64JavaCallWrapper jcw = (MIPS64JavaCallWrapper) getEntryFrameCallWrapper(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); ++ Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); ++ } ++ MIPS64Frame fr; ++ if (jcw.getLastJavaPC() != null) { ++ fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); ++ } else { ++ fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); ++ } ++ map.clear(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); ++ } ++ return fr; ++ } ++ ++ //------------------------------------------------------------------------------ ++ // frame::adjust_unextended_sp ++ private void adjustUnextendedSP() { ++ // On mips, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. ++ ++ CodeBlob cb = cb(); ++ NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); ++ if (senderNm != null) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (senderNm.isDeoptEntry(getPC()) || ++ senderNm.isDeoptMhEntry(getPC())) { ++ // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp)); ++ } ++ } ++ } ++ ++ private Frame senderForInterpreterFrame(MIPS64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForInterpreterFrame"); ++ } ++ Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ Address sp = addressOfStackSlot(SENDER_SP_OFFSET); ++ // We do not need to update the callee-save register mapping because above ++ // us is either another interpreter frame or a converter-frame, but never ++ // directly a compiled frame. ++ // 11/24/04 SFG. With the removal of adapter frames this is no longer true. ++ // However c2 no longer uses callee save register for java calls so there ++ // are no callee register to find. ++ ++ if (map.getUpdateMap()) ++ updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET)); ++ ++ return new MIPS64Frame(sp, unextendedSP, getLink(), getSenderPC()); ++ } ++ ++ private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { ++ map.setLocation(rbp, savedFPAddr); ++ } ++ ++ private Frame senderForCompiledFrame(MIPS64RegisterMap map, CodeBlob cb) { ++ if (DEBUG) { ++ System.out.println("senderForCompiledFrame"); ++ } ++ ++ // ++ // NOTE: some of this code is (unfortunately) duplicated in MIPS64CurrentFrameGuess ++ // ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // frame owned by optimizing compiler ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); ++ } ++ Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); ++ ++ // On Intel the return_address is always the word on the stack ++ Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); ++ ++ // This is the saved value of EBP which may or may not really be an FP. ++ // It is only an FP if the sender is an interpreter frame (or C1?). ++ Address savedFPAddr = senderSP.addOffsetTo(- SENDER_SP_OFFSET * VM.getVM().getAddressSize()); ++ ++ if (map.getUpdateMap()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map.setIncludeArgumentOops(cb.callerMustGCArguments()); ++ ++ if (cb.getOopMaps() != null) { ++ ImmutableOopMapSet.updateRegisterMap(this, cb, map, true); ++ } ++ ++ // Since the prolog does the save and restore of EBP there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ updateMapWithSavedLink(map, savedFPAddr); ++ } ++ ++ return new MIPS64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); ++ } ++ ++ protected boolean hasSenderPD() { ++ // FIXME ++ // Check for null ebp? Need to do some tests. ++ return true; ++ } ++ ++ public long frameSize() { ++ return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); ++ } ++ ++ public Address getLink() { ++ return addressOfStackSlot(LINK_OFFSET).getAddressAt(0); ++ } ++ ++ public Address getUnextendedSP() { return raw_unextendedSP; } ++ ++ // Return address: ++ public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); } ++ public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } ++ ++ public Address getSenderSP() { return addressOfStackSlot(SENDER_SP_OFFSET); } ++ ++ public Address addressOfInterpreterFrameLocals() { ++ return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); ++ } ++ ++ private Address addressOfInterpreterFrameBCX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); ++ } ++ ++ public int getInterpreterFrameBCI() { ++ // FIXME: this is not atomic with respect to GC and is unsuitable ++ // for use in a non-debugging, or reflective, system. Need to ++ // figure out how to express this. ++ Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); ++ Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); ++ Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); ++ return bcpToBci(bcp, method); ++ } ++ ++ public Address addressOfInterpreterFrameMDX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); ++ } ++ ++ // FIXME ++ //inline int frame::interpreter_frame_monitor_size() { ++ // return BasicObjectLock::size(); ++ //} ++ ++ // expression stack ++ // (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++ public Address addressOfInterpreterFrameExpressionStack() { ++ Address monitorEnd = interpreterFrameMonitorEnd().address(); ++ return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); ++ } ++ ++ public int getInterpreterFrameExpressionStackDirection() { return -1; } ++ ++ // top of expression stack ++ public Address addressOfInterpreterFrameTOS() { ++ return getSP(); ++ } ++ ++ /** Expression stack from top down */ ++ public Address addressOfInterpreterFrameTOSAt(int slot) { ++ return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); ++ } ++ ++ public Address getInterpreterFrameSenderSP() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "interpreted frame expected"); ++ } ++ return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ } ++ ++ // Monitors ++ public BasicObjectLock interpreterFrameMonitorBegin() { ++ return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); ++ } ++ ++ public BasicObjectLock interpreterFrameMonitorEnd() { ++ Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); ++ if (Assert.ASSERTS_ENABLED) { ++ // make sure the pointer points inside the frame ++ Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); ++ Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); ++ } ++ return new BasicObjectLock(result); ++ } ++ ++ public int interpreterFrameMonitorSize() { ++ return BasicObjectLock.size(); ++ } ++ ++ // Method ++ public Address addressOfInterpreterFrameMethod() { ++ return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); ++ } ++ ++ // Constant pool cache ++ public Address addressOfInterpreterFrameCPCache() { ++ return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); ++ } ++ ++ // Entry frames ++ public JavaCallWrapper getEntryFrameCallWrapper() { ++ return new MIPS64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); ++ } ++ ++ protected Address addressOfSavedOopResult() { ++ // offset is 2 for compiler2 and 3 for compiler1 ++ return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * ++ VM.getVM().getAddressSize()); ++ } ++ ++ protected Address addressOfSavedReceiver() { ++ return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); ++ } ++ ++ private void dumpStack() { ++ if (getFP() != null) { ++ for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); ++ AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize())); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ } else { ++ for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); ++ AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ } ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java +new file mode 100644 +index 00000000000..8a4a28a6055 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java +@@ -0,0 +1,59 @@ ++/* ++ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.mips64; ++ ++import java.util.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.utilities.Observable; ++import sun.jvm.hotspot.utilities.Observer; ++ ++public class MIPS64JavaCallWrapper extends JavaCallWrapper { ++ private static AddressField lastJavaFPField; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaFrameAnchor"); ++ ++ lastJavaFPField = type.getAddressField("_last_Java_fp"); ++ } ++ ++ public MIPS64JavaCallWrapper(Address addr) { ++ super(addr); ++ } ++ ++ public Address getLastJavaFP() { ++ return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java +new file mode 100644 +index 00000000000..f2da760af4a +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java +@@ -0,0 +1,52 @@ ++/* ++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class MIPS64RegisterMap extends RegisterMap { ++ ++ /** This is the only public constructor */ ++ public MIPS64RegisterMap(JavaThread thread, boolean updateMap) { ++ super(thread, updateMap); ++ } ++ ++ protected MIPS64RegisterMap(RegisterMap map) { ++ super(map); ++ } ++ ++ public Object clone() { ++ MIPS64RegisterMap retval = new MIPS64RegisterMap(this); ++ return retval; ++ } ++ ++ // no PD state to clear or copy: ++ protected void clearPD() {} ++ protected void initializePD() {} ++ protected void initializeFromPD(RegisterMap map) {} ++ protected Address getLocationPD(VMReg reg) { return null; } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +index f4cd4873207..6901946e58a 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +@@ -22,6 +22,13 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ * ++ */ ++ + package sun.jvm.hotspot.utilities; + + /** Provides canonicalized OS and CPU information for the rest of the +@@ -50,7 +57,7 @@ public class PlatformInfo { + + public static boolean knownCPU(String cpu) { + final String[] KNOWN = +- new String[] {"i386", "x86", "x86_64", "amd64", "ppc64", "ppc64le", "aarch64", "riscv64"}; ++ new String[] {"i386", "x86", "x86_64", "amd64", "ppc64", "ppc64le", "aarch64", "riscv64", "mips64", "mips64el", "loongarch64"}; + + for(String s : KNOWN) { + if(s.equals(cpu)) +@@ -83,6 +90,12 @@ public class PlatformInfo { + if (cpu.equals("ppc64le")) + return "ppc64"; + ++ if (cpu.equals("mips64el")) ++ return "mips64"; ++ ++ if (cpu.equals("loongarch64")) ++ return "loongarch64"; ++ + return cpu; + + } +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java +new file mode 100644 +index 00000000000..1f54e9f3c59 +--- /dev/null ++++ b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java +@@ -0,0 +1,142 @@ ++/* ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package jdk.vm.ci.hotspot.loongarch64; ++ ++import static java.util.Collections.emptyMap; ++import static jdk.vm.ci.common.InitTimer.timer; ++ ++import java.util.EnumSet; ++import java.util.Map; ++ ++import jdk.vm.ci.loongarch64.LoongArch64; ++import jdk.vm.ci.loongarch64.LoongArch64.CPUFeature; ++import jdk.vm.ci.code.Architecture; ++import jdk.vm.ci.code.RegisterConfig; ++import jdk.vm.ci.code.TargetDescription; ++import jdk.vm.ci.code.stack.StackIntrospection; ++import jdk.vm.ci.common.InitTimer; ++import jdk.vm.ci.hotspot.HotSpotCodeCacheProvider; ++import jdk.vm.ci.hotspot.HotSpotConstantReflectionProvider; ++import jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory; ++import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime; ++import jdk.vm.ci.hotspot.HotSpotMetaAccessProvider; ++import jdk.vm.ci.hotspot.HotSpotStackIntrospection; ++import jdk.vm.ci.meta.ConstantReflectionProvider; ++import jdk.vm.ci.runtime.JVMCIBackend; ++ ++public class LoongArch64HotSpotJVMCIBackendFactory implements HotSpotJVMCIBackendFactory { ++ ++ private static EnumSet computeFeatures(LoongArch64HotSpotVMConfig config) { ++ // Configure the feature set using the HotSpot flag settings. ++ Map constants = config.getStore().getConstants(); ++ return HotSpotJVMCIBackendFactory.convertFeatures(CPUFeature.class, constants, config.vmVersionFeatures, emptyMap()); ++ } ++ ++ private static EnumSet computeFlags(LoongArch64HotSpotVMConfig config) { ++ EnumSet flags = EnumSet.noneOf(LoongArch64.Flag.class); ++ ++ if (config.useLSX) { ++ flags.add(LoongArch64.Flag.useLSX); ++ } ++ if (config.useLASX) { ++ flags.add(LoongArch64.Flag.useLASX); ++ } ++ ++ return flags; ++ } ++ ++ private static TargetDescription createTarget(LoongArch64HotSpotVMConfig config) { ++ final int stackFrameAlignment = 16; ++ final int implicitNullCheckLimit = 4096; ++ final boolean inlineObjects = true; ++ Architecture arch = new LoongArch64(computeFeatures(config), computeFlags(config)); ++ return new TargetDescription(arch, true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects); ++ } ++ ++ protected HotSpotConstantReflectionProvider createConstantReflection(HotSpotJVMCIRuntime runtime) { ++ return new HotSpotConstantReflectionProvider(runtime); ++ } ++ ++ private static RegisterConfig createRegisterConfig(LoongArch64HotSpotVMConfig config, TargetDescription target) { ++ return new LoongArch64HotSpotRegisterConfig(target, config.useCompressedOops); ++ } ++ ++ protected HotSpotCodeCacheProvider createCodeCache(HotSpotJVMCIRuntime runtime, TargetDescription target, RegisterConfig regConfig) { ++ return new HotSpotCodeCacheProvider(runtime, target, regConfig); ++ } ++ ++ protected HotSpotMetaAccessProvider createMetaAccess(HotSpotJVMCIRuntime runtime) { ++ return new HotSpotMetaAccessProvider(runtime); ++ } ++ ++ @Override ++ public String getArchitecture() { ++ return "loongarch64"; ++ } ++ ++ @Override ++ public String toString() { ++ return "JVMCIBackend:" + getArchitecture(); ++ } ++ ++ @Override ++ @SuppressWarnings("try") ++ public JVMCIBackend createJVMCIBackend(HotSpotJVMCIRuntime runtime, JVMCIBackend host) { ++ ++ assert host == null; ++ LoongArch64HotSpotVMConfig config = new LoongArch64HotSpotVMConfig(runtime.getConfigStore()); ++ TargetDescription target = createTarget(config); ++ ++ RegisterConfig regConfig; ++ HotSpotCodeCacheProvider codeCache; ++ ConstantReflectionProvider constantReflection; ++ HotSpotMetaAccessProvider metaAccess; ++ StackIntrospection stackIntrospection; ++ try (InitTimer t = timer("create providers")) { ++ try (InitTimer rt = timer("create MetaAccess provider")) { ++ metaAccess = createMetaAccess(runtime); ++ } ++ try (InitTimer rt = timer("create RegisterConfig")) { ++ regConfig = createRegisterConfig(config, target); ++ } ++ try (InitTimer rt = timer("create CodeCache provider")) { ++ codeCache = createCodeCache(runtime, target, regConfig); ++ } ++ try (InitTimer rt = timer("create ConstantReflection provider")) { ++ constantReflection = createConstantReflection(runtime); ++ } ++ try (InitTimer rt = timer("create StackIntrospection provider")) { ++ stackIntrospection = new HotSpotStackIntrospection(runtime); ++ } ++ } ++ try (InitTimer rt = timer("instantiate backend")) { ++ return createBackend(metaAccess, codeCache, constantReflection, stackIntrospection); ++ } ++ } ++ ++ protected JVMCIBackend createBackend(HotSpotMetaAccessProvider metaAccess, HotSpotCodeCacheProvider codeCache, ConstantReflectionProvider constantReflection, ++ StackIntrospection stackIntrospection) { ++ return new JVMCIBackend(metaAccess, codeCache, constantReflection, stackIntrospection); ++ } ++} +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java +new file mode 100644 +index 00000000000..e1a007000d2 +--- /dev/null ++++ b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java +@@ -0,0 +1,297 @@ ++/* ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package jdk.vm.ci.hotspot.loongarch64; ++ ++import static jdk.vm.ci.loongarch64.LoongArch64.ra; ++import static jdk.vm.ci.loongarch64.LoongArch64.a0; ++import static jdk.vm.ci.loongarch64.LoongArch64.a1; ++import static jdk.vm.ci.loongarch64.LoongArch64.a2; ++import static jdk.vm.ci.loongarch64.LoongArch64.a3; ++import static jdk.vm.ci.loongarch64.LoongArch64.a4; ++import static jdk.vm.ci.loongarch64.LoongArch64.a5; ++import static jdk.vm.ci.loongarch64.LoongArch64.a6; ++import static jdk.vm.ci.loongarch64.LoongArch64.a7; ++import static jdk.vm.ci.loongarch64.LoongArch64.SCR1; ++import static jdk.vm.ci.loongarch64.LoongArch64.SCR2; ++import static jdk.vm.ci.loongarch64.LoongArch64.t0; ++import static jdk.vm.ci.loongarch64.LoongArch64.v0; ++import static jdk.vm.ci.loongarch64.LoongArch64.s5; ++import static jdk.vm.ci.loongarch64.LoongArch64.s6; ++import static jdk.vm.ci.loongarch64.LoongArch64.sp; ++import static jdk.vm.ci.loongarch64.LoongArch64.fp; ++import static jdk.vm.ci.loongarch64.LoongArch64.tp; ++import static jdk.vm.ci.loongarch64.LoongArch64.rx; ++import static jdk.vm.ci.loongarch64.LoongArch64.f0; ++import static jdk.vm.ci.loongarch64.LoongArch64.f1; ++import static jdk.vm.ci.loongarch64.LoongArch64.f2; ++import static jdk.vm.ci.loongarch64.LoongArch64.f3; ++import static jdk.vm.ci.loongarch64.LoongArch64.f4; ++import static jdk.vm.ci.loongarch64.LoongArch64.f5; ++import static jdk.vm.ci.loongarch64.LoongArch64.f6; ++import static jdk.vm.ci.loongarch64.LoongArch64.f7; ++import static jdk.vm.ci.loongarch64.LoongArch64.fv0; ++import static jdk.vm.ci.loongarch64.LoongArch64.zero; ++ ++import java.util.ArrayList; ++import java.util.HashSet; ++import java.util.List; ++import java.util.Set; ++ ++import jdk.vm.ci.loongarch64.LoongArch64; ++import jdk.vm.ci.code.Architecture; ++import jdk.vm.ci.code.CallingConvention; ++import jdk.vm.ci.code.CallingConvention.Type; ++import jdk.vm.ci.code.Register; ++import jdk.vm.ci.code.RegisterArray; ++import jdk.vm.ci.code.RegisterAttributes; ++import jdk.vm.ci.code.RegisterConfig; ++import jdk.vm.ci.code.StackSlot; ++import jdk.vm.ci.code.TargetDescription; ++import jdk.vm.ci.code.ValueKindFactory; ++import jdk.vm.ci.common.JVMCIError; ++import jdk.vm.ci.hotspot.HotSpotCallingConventionType; ++import jdk.vm.ci.meta.AllocatableValue; ++import jdk.vm.ci.meta.JavaKind; ++import jdk.vm.ci.meta.JavaType; ++import jdk.vm.ci.meta.PlatformKind; ++import jdk.vm.ci.meta.Value; ++import jdk.vm.ci.meta.ValueKind; ++ ++public class LoongArch64HotSpotRegisterConfig implements RegisterConfig { ++ ++ private final TargetDescription target; ++ ++ private final RegisterArray allocatable; ++ ++ /** ++ * The caller saved registers always include all parameter registers. ++ */ ++ private final RegisterArray callerSaved; ++ ++ private final boolean allAllocatableAreCallerSaved; ++ ++ private final RegisterAttributes[] attributesMap; ++ ++ @Override ++ public RegisterArray getAllocatableRegisters() { ++ return allocatable; ++ } ++ ++ @Override ++ public RegisterArray filterAllocatableRegisters(PlatformKind kind, RegisterArray registers) { ++ ArrayList list = new ArrayList<>(); ++ for (Register reg : registers) { ++ if (target.arch.canStoreValue(reg.getRegisterCategory(), kind)) { ++ list.add(reg); ++ } ++ } ++ ++ return new RegisterArray(list); ++ } ++ ++ @Override ++ public RegisterAttributes[] getAttributesMap() { ++ return attributesMap.clone(); ++ } ++ ++ private final RegisterArray javaGeneralParameterRegisters = new RegisterArray(t0, a0, a1, a2, a3, a4, a5, a6, a7); ++ private final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(a0, a1, a2, a3, a4, a5, a6, a7); ++ private final RegisterArray floatParameterRegisters = new RegisterArray(f0, f1, f2, f3, f4, f5, f6, f7); ++ ++ public static final Register heapBaseRegister = s5; ++ public static final Register TREG = s6; ++ ++ private static final RegisterArray reservedRegisters = new RegisterArray(fp, ra, zero, sp, tp, rx, SCR1, SCR2, TREG); ++ ++ private static RegisterArray initAllocatable(Architecture arch, boolean reserveForHeapBase) { ++ RegisterArray allRegisters = arch.getAvailableValueRegisters(); ++ Register[] registers = new Register[allRegisters.size() - reservedRegisters.size() - (reserveForHeapBase ? 1 : 0)]; ++ List reservedRegistersList = reservedRegisters.asList(); ++ ++ int idx = 0; ++ for (Register reg : allRegisters) { ++ if (reservedRegistersList.contains(reg)) { ++ // skip reserved registers ++ continue; ++ } ++ if (reserveForHeapBase && reg.equals(heapBaseRegister)) { ++ // skip heap base register ++ continue; ++ } ++ ++ registers[idx++] = reg; ++ } ++ ++ assert idx == registers.length; ++ return new RegisterArray(registers); ++ } ++ ++ public LoongArch64HotSpotRegisterConfig(TargetDescription target, boolean useCompressedOops) { ++ this(target, initAllocatable(target.arch, useCompressedOops)); ++ assert callerSaved.size() >= allocatable.size(); ++ } ++ ++ public LoongArch64HotSpotRegisterConfig(TargetDescription target, RegisterArray allocatable) { ++ this.target = target; ++ ++ this.allocatable = allocatable; ++ Set callerSaveSet = new HashSet<>(); ++ allocatable.addTo(callerSaveSet); ++ floatParameterRegisters.addTo(callerSaveSet); ++ javaGeneralParameterRegisters.addTo(callerSaveSet); ++ nativeGeneralParameterRegisters.addTo(callerSaveSet); ++ callerSaved = new RegisterArray(callerSaveSet); ++ ++ allAllocatableAreCallerSaved = true; ++ attributesMap = RegisterAttributes.createMap(this, LoongArch64.allRegisters); ++ } ++ ++ @Override ++ public RegisterArray getCallerSaveRegisters() { ++ return callerSaved; ++ } ++ ++ @Override ++ public RegisterArray getCalleeSaveRegisters() { ++ return null; ++ } ++ ++ @Override ++ public boolean areAllAllocatableRegistersCallerSaved() { ++ return allAllocatableAreCallerSaved; ++ } ++ ++ @Override ++ public CallingConvention getCallingConvention(Type type, JavaType returnType, JavaType[] parameterTypes, ValueKindFactory valueKindFactory) { ++ HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type; ++ if (type == HotSpotCallingConventionType.NativeCall) { ++ return callingConvention(nativeGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory); ++ } ++ // On x64, parameter locations are the same whether viewed ++ // from the caller or callee perspective ++ return callingConvention(javaGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory); ++ } ++ ++ @Override ++ public RegisterArray getCallingConventionRegisters(Type type, JavaKind kind) { ++ HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type; ++ switch (kind) { ++ case Boolean: ++ case Byte: ++ case Short: ++ case Char: ++ case Int: ++ case Long: ++ case Object: ++ return hotspotType == HotSpotCallingConventionType.NativeCall ? nativeGeneralParameterRegisters : javaGeneralParameterRegisters; ++ case Float: ++ case Double: ++ return floatParameterRegisters; ++ default: ++ throw JVMCIError.shouldNotReachHere(); ++ } ++ } ++ ++ private CallingConvention callingConvention(RegisterArray generalParameterRegisters, JavaType returnType, JavaType[] parameterTypes, HotSpotCallingConventionType type, ++ ValueKindFactory valueKindFactory) { ++ AllocatableValue[] locations = new AllocatableValue[parameterTypes.length]; ++ ++ int currentGeneral = 0; ++ int currentFloat = 0; ++ int currentStackOffset = 0; ++ ++ for (int i = 0; i < parameterTypes.length; i++) { ++ final JavaKind kind = parameterTypes[i].getJavaKind().getStackKind(); ++ ++ switch (kind) { ++ case Byte: ++ case Boolean: ++ case Short: ++ case Char: ++ case Int: ++ case Long: ++ case Object: ++ if (currentGeneral < generalParameterRegisters.size()) { ++ Register register = generalParameterRegisters.get(currentGeneral++); ++ locations[i] = register.asValue(valueKindFactory.getValueKind(kind)); ++ } ++ break; ++ case Float: ++ case Double: ++ if (currentFloat < floatParameterRegisters.size()) { ++ Register register = floatParameterRegisters.get(currentFloat++); ++ locations[i] = register.asValue(valueKindFactory.getValueKind(kind)); ++ } else if (currentGeneral < generalParameterRegisters.size()) { ++ Register register = generalParameterRegisters.get(currentGeneral++); ++ locations[i] = register.asValue(valueKindFactory.getValueKind(kind)); ++ } ++ break; ++ default: ++ throw JVMCIError.shouldNotReachHere(); ++ } ++ ++ if (locations[i] == null) { ++ ValueKind valueKind = valueKindFactory.getValueKind(kind); ++ locations[i] = StackSlot.get(valueKind, currentStackOffset, !type.out); ++ currentStackOffset += Math.max(valueKind.getPlatformKind().getSizeInBytes(), target.wordSize); ++ } ++ } ++ ++ JavaKind returnKind = returnType == null ? JavaKind.Void : returnType.getJavaKind(); ++ AllocatableValue returnLocation = returnKind == JavaKind.Void ? Value.ILLEGAL : getReturnRegister(returnKind).asValue(valueKindFactory.getValueKind(returnKind.getStackKind())); ++ return new CallingConvention(currentStackOffset, returnLocation, locations); ++ } ++ ++ @Override ++ public Register getReturnRegister(JavaKind kind) { ++ switch (kind) { ++ case Boolean: ++ case Byte: ++ case Char: ++ case Short: ++ case Int: ++ case Long: ++ case Object: ++ return v0; ++ case Float: ++ case Double: ++ return fv0; ++ case Void: ++ case Illegal: ++ return null; ++ default: ++ throw new UnsupportedOperationException("no return register for type " + kind); ++ } ++ } ++ ++ @Override ++ public Register getFrameRegister() { ++ return sp; ++ } ++ ++ @Override ++ public String toString() { ++ return String.format("Allocatable: " + getAllocatableRegisters() + "%n" + "CallerSave: " + getCallerSaveRegisters() + "%n"); ++ } ++} +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java +new file mode 100644 +index 00000000000..0a2e857204c +--- /dev/null ++++ b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java +@@ -0,0 +1,77 @@ ++/* ++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package jdk.vm.ci.hotspot.loongarch64; ++ ++import jdk.vm.ci.hotspot.HotSpotVMConfigAccess; ++import jdk.vm.ci.hotspot.HotSpotVMConfigStore; ++import jdk.vm.ci.services.Services; ++ ++/** ++ * Used to access native configuration details. ++ * ++ * All non-static, public fields in this class are so that they can be compiled as constants. ++ */ ++class LoongArch64HotSpotVMConfig extends HotSpotVMConfigAccess { ++ ++ LoongArch64HotSpotVMConfig(HotSpotVMConfigStore config) { ++ super(config); ++ } ++ ++ final boolean useCompressedOops = getFlag("UseCompressedOops", Boolean.class); ++ ++ // CPU Capabilities ++ ++ /* ++ * These flags are set based on the corresponding command line flags. ++ */ ++ final boolean useLSX = getFlag("UseLSX", Boolean.class); ++ final boolean useLASX = getFlag("UseLASX", Boolean.class); ++ ++ final long vmVersionFeatures = getFieldValue("Abstract_VM_Version::_features", Long.class, "uint64_t"); ++ ++ /* ++ * These flags are set if the corresponding support is in the hardware. ++ */ ++ // Checkstyle: stop ++ final long loongarch64LA32 = getConstant("VM_Version::CPU_LA32", Long.class); ++ final long loongarch64LA64 = getConstant("VM_Version::CPU_LA64", Long.class); ++ final long loongarch64LLEXC = getConstant("VM_Version::CPU_LLEXC", Long.class); ++ final long loongarch64SCDLY = getConstant("VM_Version::CPU_SCDLY", Long.class); ++ final long loongarch64LLDBAR = getConstant("VM_Version::CPU_LLDBAR", Long.class); ++ final long loongarch64LBT_X86 = getConstant("VM_Version::CPU_LBT_X86", Long.class); ++ final long loongarch64LBT_ARM = getConstant("VM_Version::CPU_LBT_ARM", Long.class); ++ final long loongarch64LBT_MIPS = getConstant("VM_Version::CPU_LBT_MIPS", Long.class); ++ final long loongarch64CCDMA = getConstant("VM_Version::CPU_CCDMA", Long.class); ++ final long loongarch64COMPLEX = getConstant("VM_Version::CPU_COMPLEX", Long.class); ++ final long loongarch64FP = getConstant("VM_Version::CPU_FP", Long.class); ++ final long loongarch64CRYPTO = getConstant("VM_Version::CPU_CRYPTO", Long.class); ++ final long loongarch64LSX = getConstant("VM_Version::CPU_LSX", Long.class); ++ final long loongarch64LASX = getConstant("VM_Version::CPU_LASX", Long.class); ++ final long loongarch64LAM = getConstant("VM_Version::CPU_LAM", Long.class); ++ final long loongarch64LLSYNC = getConstant("VM_Version::CPU_LLSYNC", Long.class); ++ final long loongarch64TGTSYNC = getConstant("VM_Version::CPU_TGTSYNC", Long.class); ++ final long loongarch64ULSYNC = getConstant("VM_Version::CPU_ULSYNC", Long.class); ++ final long loongarch64UAL = getConstant("VM_Version::CPU_UAL", Long.class); ++ // Checkstyle: resume ++} +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/package-info.java b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/package-info.java +new file mode 100644 +index 00000000000..74c6ca9801f +--- /dev/null ++++ b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/hotspot/loongarch64/package-info.java +@@ -0,0 +1,28 @@ ++/* ++ * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++/** ++ * The LoongArch64 HotSpot specific portions of the JVMCI API. ++ */ ++package jdk.vm.ci.hotspot.loongarch64; +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/loongarch64/LoongArch64.java b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/loongarch64/LoongArch64.java +new file mode 100644 +index 00000000000..930b17e820a +--- /dev/null ++++ b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/loongarch64/LoongArch64.java +@@ -0,0 +1,249 @@ ++/* ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package jdk.vm.ci.loongarch64; ++ ++import java.nio.ByteOrder; ++import java.util.EnumSet; ++ ++import jdk.vm.ci.code.Architecture; ++import jdk.vm.ci.code.CPUFeatureName; ++import jdk.vm.ci.code.Register; ++import jdk.vm.ci.code.Register.RegisterCategory; ++import jdk.vm.ci.code.RegisterArray; ++import jdk.vm.ci.meta.JavaKind; ++import jdk.vm.ci.meta.PlatformKind; ++ ++/** ++ * Represents the LoongArch64 architecture. ++ */ ++public class LoongArch64 extends Architecture { ++ ++ public static final RegisterCategory CPU = new RegisterCategory("CPU"); ++ ++ // General purpose CPU registers ++ public static final Register zero = new Register(0, 0, "r0", CPU); ++ public static final Register ra = new Register(1, 1, "r1", CPU); ++ public static final Register tp = new Register(2, 2, "r2", CPU); ++ public static final Register sp = new Register(3, 3, "r3", CPU); ++ public static final Register a0 = new Register(4, 4, "r4", CPU); ++ public static final Register a1 = new Register(5, 5, "r5", CPU); ++ public static final Register a2 = new Register(6, 6, "r6", CPU); ++ public static final Register a3 = new Register(7, 7, "r7", CPU); ++ public static final Register a4 = new Register(8, 8, "r8", CPU); ++ public static final Register a5 = new Register(9, 9, "r9", CPU); ++ public static final Register a6 = new Register(10, 10, "r10", CPU); ++ public static final Register a7 = new Register(11, 11, "r11", CPU); ++ public static final Register t0 = new Register(12, 12, "r12", CPU); ++ public static final Register t1 = new Register(13, 13, "r13", CPU); ++ public static final Register t2 = new Register(14, 14, "r14", CPU); ++ public static final Register t3 = new Register(15, 15, "r15", CPU); ++ public static final Register t4 = new Register(16, 16, "r16", CPU); ++ public static final Register t5 = new Register(17, 17, "r17", CPU); ++ public static final Register t6 = new Register(18, 18, "r18", CPU); ++ public static final Register t7 = new Register(19, 19, "r19", CPU); ++ public static final Register t8 = new Register(20, 20, "r20", CPU); ++ public static final Register rx = new Register(21, 21, "r21", CPU); ++ public static final Register fp = new Register(22, 22, "r22", CPU); ++ public static final Register s0 = new Register(23, 23, "r23", CPU); ++ public static final Register s1 = new Register(24, 24, "r24", CPU); ++ public static final Register s2 = new Register(25, 25, "r25", CPU); ++ public static final Register s3 = new Register(26, 26, "r26", CPU); ++ public static final Register s4 = new Register(27, 27, "r27", CPU); ++ public static final Register s5 = new Register(28, 28, "r28", CPU); ++ public static final Register s6 = new Register(29, 29, "r29", CPU); ++ public static final Register s7 = new Register(30, 30, "r30", CPU); ++ public static final Register s8 = new Register(31, 31, "r31", CPU); ++ ++ public static final Register SCR1 = t7; ++ public static final Register SCR2 = t4; ++ public static final Register v0 = a0; ++ ++ // @formatter:off ++ public static final RegisterArray cpuRegisters = new RegisterArray( ++ zero, ra, tp, sp, a0, a1, a2, a3, ++ a4, a5, a6, a7, t0, t1, t2, t3, ++ t4, t5, t6, t7, t8, rx, fp, s0, ++ s1, s2, s3, s4, s5, s6, s7, s8 ++ ); ++ // @formatter:on ++ ++ public static final RegisterCategory SIMD = new RegisterCategory("SIMD"); ++ ++ // Simd registers ++ public static final Register f0 = new Register(32, 0, "f0", SIMD); ++ public static final Register f1 = new Register(33, 1, "f1", SIMD); ++ public static final Register f2 = new Register(34, 2, "f2", SIMD); ++ public static final Register f3 = new Register(35, 3, "f3", SIMD); ++ public static final Register f4 = new Register(36, 4, "f4", SIMD); ++ public static final Register f5 = new Register(37, 5, "f5", SIMD); ++ public static final Register f6 = new Register(38, 6, "f6", SIMD); ++ public static final Register f7 = new Register(39, 7, "f7", SIMD); ++ public static final Register f8 = new Register(40, 8, "f8", SIMD); ++ public static final Register f9 = new Register(41, 9, "f9", SIMD); ++ public static final Register f10 = new Register(42, 10, "f10", SIMD); ++ public static final Register f11 = new Register(43, 11, "f11", SIMD); ++ public static final Register f12 = new Register(44, 12, "f12", SIMD); ++ public static final Register f13 = new Register(45, 13, "f13", SIMD); ++ public static final Register f14 = new Register(46, 14, "f14", SIMD); ++ public static final Register f15 = new Register(47, 15, "f15", SIMD); ++ public static final Register f16 = new Register(48, 16, "f16", SIMD); ++ public static final Register f17 = new Register(49, 17, "f17", SIMD); ++ public static final Register f18 = new Register(50, 18, "f18", SIMD); ++ public static final Register f19 = new Register(51, 19, "f19", SIMD); ++ public static final Register f20 = new Register(52, 20, "f20", SIMD); ++ public static final Register f21 = new Register(53, 21, "f21", SIMD); ++ public static final Register f22 = new Register(54, 22, "f22", SIMD); ++ public static final Register f23 = new Register(55, 23, "f23", SIMD); ++ public static final Register f24 = new Register(56, 24, "f24", SIMD); ++ public static final Register f25 = new Register(57, 25, "f25", SIMD); ++ public static final Register f26 = new Register(58, 26, "f26", SIMD); ++ public static final Register f27 = new Register(59, 27, "f27", SIMD); ++ public static final Register f28 = new Register(60, 28, "f28", SIMD); ++ public static final Register f29 = new Register(61, 29, "f29", SIMD); ++ public static final Register f30 = new Register(62, 30, "f30", SIMD); ++ public static final Register f31 = new Register(63, 31, "f31", SIMD); ++ ++ public static final Register fv0 = f0; ++ ++ // @formatter:off ++ public static final RegisterArray simdRegisters = new RegisterArray( ++ f0, f1, f2, f3, f4, f5, f6, f7, ++ f8, f9, f10, f11, f12, f13, f14, f15, ++ f16, f17, f18, f19, f20, f21, f22, f23, ++ f24, f25, f26, f27, f28, f29, f30, f31 ++ ); ++ // @formatter:on ++ ++ // @formatter:off ++ public static final RegisterArray allRegisters = new RegisterArray( ++ zero, ra, tp, sp, a0, a1, a2, a3, ++ a4, a5, a6, a7, t0, t1, t2, t3, ++ t4, t5, t6, t7, t8, rx, fp, s0, ++ s1, s2, s3, s4, s5, s6, s7, s8, ++ ++ f0, f1, f2, f3, f4, f5, f6, f7, ++ f8, f9, f10, f11, f12, f13, f14, f15, ++ f16, f17, f18, f19, f20, f21, f22, f23, ++ f24, f25, f26, f27, f28, f29, f30, f31 ++ ); ++ // @formatter:on ++ ++ /** ++ * Basic set of CPU features mirroring what is returned from the cpuid instruction. See: ++ * {@code VM_Version::cpuFeatureFlags}. ++ */ ++ public enum CPUFeature implements CPUFeatureName { ++ LA32, ++ LA64, ++ LLEXC, ++ SCDLY, ++ LLDBAR, ++ LBT_X86, ++ LBT_ARM, ++ LBT_MIPS, ++ CCDMA, ++ COMPLEX, ++ FP, ++ CRYPTO, ++ LSX, ++ LASX, ++ LAM, ++ LLSYNC, ++ TGTSYNC, ++ ULSYNC, ++ UAL ++ } ++ ++ private final EnumSet features; ++ ++ /** ++ * Set of flags to control code emission. ++ */ ++ public enum Flag { ++ useLSX, ++ useLASX ++ } ++ ++ private final EnumSet flags; ++ ++ public LoongArch64(EnumSet features, EnumSet flags) { ++ super("loongarch64", LoongArch64Kind.QWORD, ByteOrder.LITTLE_ENDIAN, true, allRegisters, 0, 0, 0); ++ this.features = features; ++ this.flags = flags; ++ } ++ ++ @Override ++ public EnumSet getFeatures() { ++ return features; ++ } ++ ++ public EnumSet getFlags() { ++ return flags; ++ } ++ ++ @Override ++ public PlatformKind getPlatformKind(JavaKind javaKind) { ++ switch (javaKind) { ++ case Boolean: ++ case Byte: ++ return LoongArch64Kind.BYTE; ++ case Short: ++ case Char: ++ return LoongArch64Kind.WORD; ++ case Int: ++ return LoongArch64Kind.DWORD; ++ case Long: ++ case Object: ++ return LoongArch64Kind.QWORD; ++ case Float: ++ return LoongArch64Kind.SINGLE; ++ case Double: ++ return LoongArch64Kind.DOUBLE; ++ default: ++ return null; ++ } ++ } ++ ++ @Override ++ public boolean canStoreValue(RegisterCategory category, PlatformKind platformKind) { ++ LoongArch64Kind kind = (LoongArch64Kind) platformKind; ++ if (kind.isInteger()) { ++ return category.equals(CPU); ++ } else if (kind.isSIMD()) { ++ return category.equals(SIMD); ++ } ++ return false; ++ } ++ ++ @Override ++ public LoongArch64Kind getLargestStorableKind(RegisterCategory category) { ++ if (category.equals(CPU)) { ++ return LoongArch64Kind.QWORD; ++ } else if (category.equals(SIMD)) { ++ return LoongArch64Kind.V256_QWORD; ++ } else { ++ return null; ++ } ++ } ++} +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/loongarch64/LoongArch64Kind.java b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/loongarch64/LoongArch64Kind.java +new file mode 100644 +index 00000000000..047a1dbbe36 +--- /dev/null ++++ b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/loongarch64/LoongArch64Kind.java +@@ -0,0 +1,163 @@ ++/* ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package jdk.vm.ci.loongarch64; ++ ++import jdk.vm.ci.meta.PlatformKind; ++ ++public enum LoongArch64Kind implements PlatformKind { ++ ++ // scalar ++ BYTE(1), ++ WORD(2), ++ DWORD(4), ++ QWORD(8), ++ UBYTE(1), ++ UWORD(2), ++ UDWORD(4), ++ SINGLE(4), ++ DOUBLE(8), ++ ++ // SIMD ++ V128_BYTE(16, BYTE), ++ V128_WORD(16, WORD), ++ V128_DWORD(16, DWORD), ++ V128_QWORD(16, QWORD), ++ V128_SINGLE(16, SINGLE), ++ V128_DOUBLE(16, DOUBLE), ++ V256_BYTE(32, BYTE), ++ V256_WORD(32, WORD), ++ V256_DWORD(32, DWORD), ++ V256_QWORD(32, QWORD), ++ V256_SINGLE(32, SINGLE), ++ V256_DOUBLE(32, DOUBLE); ++ ++ private final int size; ++ private final int vectorLength; ++ ++ private final LoongArch64Kind scalar; ++ private final EnumKey key = new EnumKey<>(this); ++ ++ LoongArch64Kind(int size) { ++ this.size = size; ++ this.scalar = this; ++ this.vectorLength = 1; ++ } ++ ++ LoongArch64Kind(int size, LoongArch64Kind scalar) { ++ this.size = size; ++ this.scalar = scalar; ++ ++ assert size % scalar.size == 0; ++ this.vectorLength = size / scalar.size; ++ } ++ ++ public LoongArch64Kind getScalar() { ++ return scalar; ++ } ++ ++ @Override ++ public int getSizeInBytes() { ++ return size; ++ } ++ ++ @Override ++ public int getVectorLength() { ++ return vectorLength; ++ } ++ ++ @Override ++ public Key getKey() { ++ return key; ++ } ++ ++ public boolean isInteger() { ++ switch (this) { ++ case BYTE: ++ case WORD: ++ case DWORD: ++ case QWORD: ++ case UBYTE: ++ case UWORD: ++ case UDWORD: ++ return true; ++ default: ++ return false; ++ } ++ } ++ ++ public boolean isSIMD() { ++ switch (this) { ++ case SINGLE: ++ case DOUBLE: ++ case V128_BYTE: ++ case V128_WORD: ++ case V128_DWORD: ++ case V128_QWORD: ++ case V128_SINGLE: ++ case V128_DOUBLE: ++ case V256_BYTE: ++ case V256_WORD: ++ case V256_DWORD: ++ case V256_QWORD: ++ case V256_SINGLE: ++ case V256_DOUBLE: ++ return true; ++ default: ++ return false; ++ } ++ } ++ ++ @Override ++ public char getTypeChar() { ++ switch (this) { ++ case BYTE: ++ return 'b'; ++ case WORD: ++ return 'w'; ++ case DWORD: ++ return 'd'; ++ case QWORD: ++ return 'q'; ++ case SINGLE: ++ return 'S'; ++ case DOUBLE: ++ return 'D'; ++ case V128_BYTE: ++ case V128_WORD: ++ case V128_DWORD: ++ case V128_QWORD: ++ case V128_SINGLE: ++ case V128_DOUBLE: ++ case V256_BYTE: ++ case V256_WORD: ++ case V256_DWORD: ++ case V256_QWORD: ++ case V256_SINGLE: ++ case V256_DOUBLE: ++ return 'v'; ++ default: ++ return '-'; ++ } ++ } ++} +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/loongarch64/package-info.java b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/loongarch64/package-info.java +new file mode 100644 +index 00000000000..6df1b7b3a92 +--- /dev/null ++++ b/src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/loongarch64/package-info.java +@@ -0,0 +1,28 @@ ++/* ++ * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++/** ++ * The LoongArch64 platform independent portions of the JVMCI API. ++ */ ++package jdk.vm.ci.loongarch64; +diff --git a/src/jdk.internal.vm.ci/share/classes/module-info.java b/src/jdk.internal.vm.ci/share/classes/module-info.java +index ed197695720..62a4ff4be0f 100644 +--- a/src/jdk.internal.vm.ci/share/classes/module-info.java ++++ b/src/jdk.internal.vm.ci/share/classes/module-info.java +@@ -23,6 +23,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + module jdk.internal.vm.ci { + exports jdk.vm.ci.services to + jdk.internal.vm.compiler, +@@ -39,5 +45,6 @@ module jdk.internal.vm.ci { + + provides jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory with + jdk.vm.ci.hotspot.aarch64.AArch64HotSpotJVMCIBackendFactory, ++ jdk.vm.ci.hotspot.loongarch64.LoongArch64HotSpotJVMCIBackendFactory, + jdk.vm.ci.hotspot.amd64.AMD64HotSpotJVMCIBackendFactory; + } +diff --git a/src/utils/hsdis/Makefile b/src/utils/hsdis/Makefile +index 66dac7130bd..dd2ccd340f2 100644 +--- a/src/utils/hsdis/Makefile ++++ b/src/utils/hsdis/Makefile +@@ -89,6 +89,9 @@ CC = gcc + endif + CFLAGS += -O + DLDFLAGS += -shared ++ifeq ($(ARCH), mips64) ++DLDFLAGS += -Wl,-z,noexecstack ++endif + LDFLAGS += -ldl + OUTFLAGS += -o $@ + else +diff --git a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java +index 4c56daebfb8..92836130408 100644 +--- a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java ++++ b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java +@@ -21,12 +21,18 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + /* + * @test + * @library /test/lib / + * @modules java.base/jdk.internal.misc + * java.management +- * @requires vm.cpu.features ~= ".*aes.*" & !vm.graal.enabled ++ * @requires (vm.cpu.features ~= ".*aes.*" | os.arch == "loongarch64") & !vm.graal.enabled + * @build jdk.test.whitebox.WhiteBox + * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox + * @run main/othervm/timeout=600 -Xbootclasspath/a:. +diff --git a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java +index 03016ea3dd6..62ce6c1a7a5 100644 +--- a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java ++++ b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnUnsupportedConfig.java +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + /* + * @test + * @library /test/lib / +@@ -28,7 +34,7 @@ + * java.management + * + * @build jdk.test.whitebox.WhiteBox +- * @requires !(vm.cpu.features ~= ".*aes.*") ++ * @requires !(vm.cpu.features ~= ".*aes.*" | os.arch == "loongarch64") + * @requires vm.compiler1.enabled | !vm.graal.enabled + * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java +index 468cd83d7a2..40d2b03e301 100644 +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package compiler.intrinsics.sha.cli.testcases; + + import compiler.intrinsics.sha.cli.DigestOptionsBase; +@@ -32,7 +38,7 @@ import jdk.test.lib.cli.predicate.OrPredicate; + + /** + * Generic test case for SHA-related options targeted to any CPU except +- * AArch64, RISCV64, PPC, S390x, and X86. ++ * AArch64, RISCV64, PPC, S390x, LoongArch64, and X86. + */ + public class GenericTestCaseForOtherCPU extends + DigestOptionsBase.TestCase { +@@ -44,14 +50,15 @@ public class GenericTestCaseForOtherCPU extends + } + + public GenericTestCaseForOtherCPU(String optionName, boolean checkUseSHA) { +- // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, and X86. ++ // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, LoongArch64, and X86. + super(optionName, new NotPredicate( + new OrPredicate(Platform::isAArch64, + new OrPredicate(Platform::isRISCV64, + new OrPredicate(Platform::isS390x, + new OrPredicate(Platform::isPPC, ++ new OrPredicate(Platform::isLoongArch64, + new OrPredicate(Platform::isX64, +- Platform::isX86))))))); ++ Platform::isX86)))))))); + + this.checkUseSHA = checkUseSHA; + } +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java +index d7ecc7c04ef..0d47a2f3037 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java +@@ -20,16 +20,25 @@ + * or visit www.oracle.com if you need additional information or have any + * questions. + */ ++ ++/* ++ * This file has been modified by Loongson Technology in 2023, These ++ * modifications are Copyright (c) 2022, 2023, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package jdk.vm.ci.code.test; + + import jdk.vm.ci.aarch64.AArch64; + import jdk.vm.ci.amd64.AMD64; ++import jdk.vm.ci.loongarch64.LoongArch64; + import jdk.vm.ci.code.Architecture; + import jdk.vm.ci.code.CodeCacheProvider; + import jdk.vm.ci.code.InstalledCode; + import jdk.vm.ci.code.TargetDescription; + import jdk.vm.ci.code.test.aarch64.AArch64TestAssembler; + import jdk.vm.ci.code.test.amd64.AMD64TestAssembler; ++import jdk.vm.ci.code.test.loongarch64.LoongArch64TestAssembler; + import jdk.vm.ci.hotspot.HotSpotCodeCacheProvider; + import jdk.vm.ci.hotspot.HotSpotCompiledCode; + import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime; +@@ -75,6 +84,8 @@ public class CodeInstallationTest { + return new AMD64TestAssembler(codeCache, config); + } else if (arch instanceof AArch64) { + return new AArch64TestAssembler(codeCache, config); ++ } else if (arch instanceof LoongArch64) { ++ return new LoongArch64TestAssembler(codeCache, config); + } else { + Assert.fail("unsupported architecture"); + return null; +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java +index 2e3f90368b1..a07fcc8af94 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java +@@ -21,10 +21,16 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + /** + * @test + * @requires vm.jvmci +- * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" ++ * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" | vm.simpleArch == "loongarch64" + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -32,8 +38,9 @@ + * jdk.internal.vm.ci/jdk.vm.ci.code.site + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.aarch64 ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 + * jdk.internal.vm.ci/jdk.vm.ci.amd64 +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -XX:-UseJVMCICompiler jdk.vm.ci.code.test.DataPatchTest + */ + +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java +index b88832677eb..00860c8a66a 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java +@@ -21,10 +21,16 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + /** + * @test + * @requires vm.jvmci +- * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" ++ * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" | vm.simpleArch == "loongarch64" + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.code + * jdk.internal.vm.ci/jdk.vm.ci.code.site +@@ -32,8 +38,9 @@ + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.common + * jdk.internal.vm.ci/jdk.vm.ci.aarch64 ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 + * jdk.internal.vm.ci/jdk.vm.ci.amd64 +- * @compile CodeInstallationTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java ++ * @compile CodeInstallationTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -XX:-UseJVMCICompiler jdk.vm.ci.code.test.InterpreterFrameSizeTest + */ + +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java +index f473d089a54..6ca7b76f1e7 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java +@@ -21,10 +21,16 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + /** + * @test + * @requires vm.jvmci +- * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" ++ * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" | vm.simpleArch == "loongarch64" + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -33,8 +39,9 @@ + * jdk.internal.vm.ci/jdk.vm.ci.common + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.aarch64 ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 + * jdk.internal.vm.ci/jdk.vm.ci.amd64 +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -XX:-UseJVMCICompiler jdk.vm.ci.code.test.MaxOopMapStackOffsetTest + */ + +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java +index dce107095d5..d8c855dfb3a 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java +@@ -21,10 +21,16 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + /** + * @test + * @requires vm.jvmci +- * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" ++ * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" | vm.simpleArch == "loongarch64" + * @library /test/lib / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.code +@@ -33,8 +39,9 @@ + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.common + * jdk.internal.vm.ci/jdk.vm.ci.aarch64 ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 + * jdk.internal.vm.ci/jdk.vm.ci.amd64 +- * @compile CodeInstallationTest.java TestHotSpotVMConfig.java NativeCallTest.java TestAssembler.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java ++ * @compile CodeInstallationTest.java TestHotSpotVMConfig.java NativeCallTest.java TestAssembler.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm/native -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Xbootclasspath/a:. jdk.vm.ci.code.test.NativeCallTest + */ + package jdk.vm.ci.code.test; +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java +index e5fc53e8013..75494d5ccf1 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java +@@ -21,10 +21,16 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + /** + * @test + * @requires vm.jvmci +- * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" ++ * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" | vm.simpleArch == "loongarch64" + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -32,8 +38,9 @@ + * jdk.internal.vm.ci/jdk.vm.ci.code.site + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.aarch64 ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 + * jdk.internal.vm.ci/jdk.vm.ci.amd64 +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -XX:-UseJVMCICompiler jdk.vm.ci.code.test.SimpleCodeInstallationTest + */ + +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java +index bfd611312a2..08be94ac132 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java +@@ -21,10 +21,16 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + /** + * @test + * @requires vm.jvmci +- * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" ++ * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" | vm.simpleArch == "loongarch64" + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -32,8 +38,9 @@ + * jdk.internal.vm.ci/jdk.vm.ci.code.site + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.aarch64 ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 + * jdk.internal.vm.ci/jdk.vm.ci.amd64 +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -XX:-UseJVMCICompiler jdk.vm.ci.code.test.SimpleDebugInfoTest + */ + +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java +index 1fb0d77eb73..b2d40f70a80 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java +@@ -21,10 +21,16 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + /** + * @test + * @requires vm.jvmci +- * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" ++ * @requires vm.simpleArch == "x64" | vm.simpleArch == "aarch64" | vm.simpleArch == "loongarch64" + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -32,8 +38,9 @@ + * jdk.internal.vm.ci/jdk.vm.ci.code.site + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.aarch64 ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 + * jdk.internal.vm.ci/jdk.vm.ci.amd64 +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java aarch64/AArch64TestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -XX:-UseJVMCICompiler jdk.vm.ci.code.test.VirtualObjectDebugInfoTest + */ + +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java +new file mode 100644 +index 00000000000..4c76868453a +--- /dev/null ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java +@@ -0,0 +1,568 @@ ++/* ++ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++package jdk.vm.ci.code.test.loongarch64; ++ ++import jdk.vm.ci.loongarch64.LoongArch64; ++import jdk.vm.ci.loongarch64.LoongArch64Kind; ++import jdk.vm.ci.code.CallingConvention; ++import jdk.vm.ci.code.CodeCacheProvider; ++import jdk.vm.ci.code.DebugInfo; ++import jdk.vm.ci.code.Register; ++import jdk.vm.ci.code.RegisterArray; ++import jdk.vm.ci.code.RegisterValue; ++import jdk.vm.ci.code.StackSlot; ++import jdk.vm.ci.code.site.ConstantReference; ++import jdk.vm.ci.code.site.DataSectionReference; ++import jdk.vm.ci.code.test.TestAssembler; ++import jdk.vm.ci.code.test.TestHotSpotVMConfig; ++import jdk.vm.ci.hotspot.HotSpotCallingConventionType; ++import jdk.vm.ci.hotspot.HotSpotConstant; ++import jdk.vm.ci.hotspot.HotSpotForeignCallTarget; ++import jdk.vm.ci.meta.AllocatableValue; ++import jdk.vm.ci.meta.JavaKind; ++import jdk.vm.ci.meta.VMConstant; ++ ++public class LoongArch64TestAssembler extends TestAssembler { ++ ++ private static final Register scratchRegister = LoongArch64.SCR1; ++ private static final Register doubleScratch = LoongArch64.f23; ++ private static final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(LoongArch64.a0, ++ LoongArch64.a1, LoongArch64.a2, ++ LoongArch64.a3, LoongArch64.a4, ++ LoongArch64.a5, LoongArch64.a6, ++ LoongArch64.a7); ++ private static final RegisterArray floatParameterRegisters = new RegisterArray(LoongArch64.f0, ++ LoongArch64.f1, LoongArch64.f2, ++ LoongArch64.f3, LoongArch64.f4, ++ LoongArch64.f5, LoongArch64.f6, ++ LoongArch64.f7); ++ private static int currentGeneral = 0; ++ private static int currentFloat = 0; ++ public LoongArch64TestAssembler(CodeCacheProvider codeCache, TestHotSpotVMConfig config) { ++ super(codeCache, config, ++ 16 /* initialFrameSize */, 16 /* stackAlignment */, ++ LoongArch64Kind.UDWORD /* narrowOopKind */, ++ /* registers */ ++ LoongArch64.a0, LoongArch64.a1, LoongArch64.a2, LoongArch64.a3, ++ LoongArch64.a4, LoongArch64.a5, LoongArch64.a6, LoongArch64.a7); ++ } ++ ++ private static int low(int x, int l) { ++ assert l < 32; ++ return (x >> 0) & ((1 << l)-1); ++ } ++ ++ private static int low16(int x) { ++ return low(x, 16); ++ } ++ ++ private void emitNop() { ++ code.emitInt(0x3400000); ++ } ++ ++ private void emitPcaddu12i(Register rj, int si20) { ++ // pcaddu12i ++ code.emitInt((0b0001110 << 25) ++ | (low(si20, 20) << 5) ++ | rj.encoding); ++ } ++ ++ private void emitAdd(Register rd, Register rj, Register rk) { ++ // add_d ++ code.emitInt((0b00000000000100001 << 15) ++ | (rk.encoding << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitAdd(Register rd, Register rj, int si12) { ++ // addi_d ++ code.emitInt((0b0000001011 << 22) ++ | (low(si12, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitSub(Register rd, Register rj, Register rk) { ++ // sub_d ++ code.emitInt((0b00000000000100011 << 15) ++ | (rk.encoding << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitShiftLeft(Register rd, Register rj, int shift) { ++ // slli_d ++ code.emitInt((0b00000000010000 << 18) ++ | (low(( (0b01 << 6) | shift ), 8) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitLu12i_w(Register rj, int imm20) { ++ // lu12i_w ++ code.emitInt((0b0001010 << 25) ++ | (low(imm20, 20)<<5) ++ | rj.encoding); ++ } ++ ++ private void emitOri(Register rd, Register rj, int ui12) { ++ // ori ++ code.emitInt((0b0000001110 << 22) ++ | (low(ui12, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitLu32i_d(Register rj, int imm20) { ++ // lu32i_d ++ code.emitInt((0b0001011 << 25) ++ | (low(imm20, 20)<<5) ++ | rj.encoding); ++ } ++ ++ private void emitLu52i_d(Register rd, Register rj, int imm12) { ++ // lu52i_d ++ code.emitInt((0b0000001100 << 22) ++ | (low(imm12, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitLoadImmediate(Register rd, int imm32) { ++ emitLu12i_w(rd, (imm32 >> 12) & 0xfffff); ++ emitOri(rd, rd, imm32 & 0xfff); ++ } ++ ++ private void emitLi52(Register rj, long imm) { ++ emitLu12i_w(rj, (int) ((imm >> 12) & 0xfffff)); ++ emitOri(rj, rj, (int) (imm & 0xfff)); ++ emitLu32i_d(rj, (int) ((imm >> 32) & 0xfffff)); ++ } ++ ++ private void emitLi64(Register rj, long imm) { ++ emitLu12i_w(rj, (int) ((imm >> 12) & 0xfffff)); ++ emitOri(rj, rj, (int) (imm & 0xfff)); ++ emitLu32i_d(rj, (int) ((imm >> 32) & 0xfffff)); ++ emitLu52i_d(rj, rj, (int) ((imm >> 52) & 0xfff)); ++ } ++ ++ private void emitOr(Register rd, Register rj, Register rk) { ++ // orr ++ code.emitInt((0b00000000000101010 << 15) ++ | (rk.encoding << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitMove(Register rd, Register rs) { ++ // move ++ emitOr(rd, rs, LoongArch64.zero); ++ } ++ ++ private void emitMovfr2gr(Register rd, LoongArch64Kind kind, Register rj) { ++ // movfr2gr_s/movfr2gr_d ++ int opc = 0; ++ switch (kind) { ++ case SINGLE: opc = 0b0000000100010100101101; break; ++ case DOUBLE: opc = 0b0000000100010100101110; break; ++ default: throw new IllegalArgumentException(); ++ } ++ code.emitInt((opc << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitLoadRegister(Register rd, LoongArch64Kind kind, Register rj, int offset) { ++ // load ++ assert offset >= 0; ++ int opc = 0; ++ switch (kind) { ++ case BYTE: opc = 0b0010100000; break; ++ case WORD: opc = 0b0010100001; break; ++ case DWORD: opc = 0b0010100010; break; ++ case QWORD: opc = 0b0010100011; break; ++ case UDWORD: opc = 0b0010101010; break; ++ case SINGLE: opc = 0b0010101100; break; ++ case DOUBLE: opc = 0b0010101110; break; ++ default: throw new IllegalArgumentException(); ++ } ++ code.emitInt((opc << 22) ++ | (low(offset, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitStoreRegister(Register rd, LoongArch64Kind kind, Register rj, int offset) { ++ // store ++ assert offset >= 0; ++ int opc = 0; ++ switch (kind) { ++ case BYTE: opc = 0b0010100100; break; ++ case WORD: opc = 0b0010100101; break; ++ case DWORD: opc = 0b0010100110; break; ++ case QWORD: opc = 0b0010100111; break; ++ case SINGLE: opc = 0b0010101101; break; ++ case DOUBLE: opc = 0b0010101111; break; ++ default: throw new IllegalArgumentException(); ++ } ++ code.emitInt((opc << 22) ++ | (low(offset, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitJirl(Register rd, Register rj, int offs) { ++ // jirl ++ code.emitInt((0b010011 << 26) ++ | (low16(offs >> 2) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ @Override ++ public void emitGrowStack(int size) { ++ assert size % 16 == 0; ++ if (size > -4096 && size < 0) { ++ emitAdd(LoongArch64.sp, LoongArch64.sp, -size); ++ } else if (size == 0) { ++ // No-op ++ } else if (size < 4096) { ++ emitAdd(LoongArch64.sp, LoongArch64.sp, -size); ++ } else if (size < 65535) { ++ emitLoadImmediate(scratchRegister, size); ++ emitSub(LoongArch64.sp, LoongArch64.sp, scratchRegister); ++ } else { ++ throw new IllegalArgumentException(); ++ } ++ } ++ ++ @Override ++ public void emitPrologue() { ++ // Must be patchable by NativeJump::patch_verified_entry ++ emitNop(); ++ emitGrowStack(32); ++ emitStoreRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 24); ++ emitStoreRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 16); ++ emitGrowStack(-16); ++ emitMove(LoongArch64.fp, LoongArch64.sp); ++ setDeoptRescueSlot(newStackSlot(LoongArch64Kind.QWORD)); ++ } ++ ++ @Override ++ public void emitEpilogue() { ++ recordMark(config.MARKID_DEOPT_HANDLER_ENTRY); ++ recordCall(new HotSpotForeignCallTarget(config.handleDeoptStub), 4*4, true, null); ++ emitCall(0xdeaddeaddeadL); ++ } ++ ++ @Override ++ public void emitCallPrologue(CallingConvention cc, Object... prim) { ++ emitGrowStack(cc.getStackSize()); ++ frameSize += cc.getStackSize(); ++ AllocatableValue[] args = cc.getArguments(); ++ for (int i = 0; i < args.length; i++) { ++ emitLoad(args[i], prim[i]); ++ } ++ currentGeneral = 0; ++ currentFloat = 0; ++ } ++ ++ @Override ++ public void emitCallEpilogue(CallingConvention cc) { ++ emitGrowStack(-cc.getStackSize()); ++ frameSize -= cc.getStackSize(); ++ } ++ ++ @Override ++ public void emitCall(long addr) { ++ // long call (absolute) ++ // lu12i_w(T4, split_low20(value >> 12)); ++ // lu32i_d(T4, split_low20(value >> 32)); ++ // jirl(RA, T4, split_low12(value)); ++ emitLu12i_w(LoongArch64.t4, (int) ((addr >> 12) & 0xfffff)); ++ emitLu32i_d(LoongArch64.t4, (int) ((addr >> 32) & 0xfffff)); ++ emitJirl(LoongArch64.ra, LoongArch64.t4, (int) (addr & 0xfff)); ++ } ++ ++ @Override ++ public void emitLoad(AllocatableValue av, Object prim) { ++ if (av instanceof RegisterValue) { ++ Register reg = ((RegisterValue) av).getRegister(); ++ if (prim instanceof Float) { ++ if (currentFloat < floatParameterRegisters.size()) { ++ currentFloat++; ++ emitLoadFloat(reg, (Float) prim); ++ } else if (currentGeneral < nativeGeneralParameterRegisters.size()) { ++ currentGeneral++; ++ emitLoadFloat(doubleScratch, (Float) prim); ++ emitMovfr2gr(reg, LoongArch64Kind.SINGLE, doubleScratch); ++ } ++ } else if (prim instanceof Double) { ++ if (currentFloat < floatParameterRegisters.size()) { ++ currentFloat++; ++ emitLoadDouble(reg, (Double) prim); ++ } else if (currentGeneral < nativeGeneralParameterRegisters.size()) { ++ currentGeneral++; ++ emitLoadDouble(doubleScratch, (Double) prim); ++ emitMovfr2gr(reg, LoongArch64Kind.DOUBLE, doubleScratch); ++ } ++ } else if (prim instanceof Integer) { ++ emitLoadInt(reg, (Integer) prim); ++ } else if (prim instanceof Long) { ++ emitLoadLong(reg, (Long) prim); ++ } ++ } else if (av instanceof StackSlot) { ++ StackSlot slot = (StackSlot) av; ++ if (prim instanceof Float) { ++ emitFloatToStack(slot, emitLoadFloat(doubleScratch, (Float) prim)); ++ } else if (prim instanceof Double) { ++ emitDoubleToStack(slot, emitLoadDouble(doubleScratch, (Double) prim)); ++ } else if (prim instanceof Integer) { ++ emitIntToStack(slot, emitLoadInt(scratchRegister, (Integer) prim)); ++ } else if (prim instanceof Long) { ++ emitLongToStack(slot, emitLoadLong(scratchRegister, (Long) prim)); ++ } else { ++ assert false : "Unimplemented"; ++ } ++ } else { ++ throw new IllegalArgumentException("Unknown value " + av); ++ } ++ } ++ ++ @Override ++ public Register emitLoadPointer(HotSpotConstant c) { ++ recordDataPatchInCode(new ConstantReference((VMConstant) c)); ++ ++ Register ret = newRegister(); ++ // need to match patchable_li52 instruction sequence ++ // lu12i_ori_lu32i ++ emitLi52(ret, 0xdeaddead); ++ return ret; ++ } ++ ++ @Override ++ public Register emitLoadPointer(Register b, int offset) { ++ Register ret = newRegister(); ++ emitLoadRegister(ret, LoongArch64Kind.QWORD, b, offset); ++ return ret; ++ } ++ ++ @Override ++ public Register emitLoadNarrowPointer(DataSectionReference ref) { ++ recordDataPatchInCode(ref); ++ ++ Register ret = newRegister(); ++ emitPcaddu12i(ret, 0xdead >> 12); ++ emitAdd(ret, ret, 0xdead & 0xfff); ++ emitLoadRegister(ret, LoongArch64Kind.UDWORD, ret, 0); ++ return ret; ++ } ++ ++ @Override ++ public Register emitLoadPointer(DataSectionReference ref) { ++ recordDataPatchInCode(ref); ++ ++ Register ret = newRegister(); ++ emitPcaddu12i(ret, 0xdead >> 12); ++ emitAdd(ret, ret, 0xdead & 0xfff); ++ emitLoadRegister(ret, LoongArch64Kind.QWORD, ret, 0); ++ return ret; ++ } ++ ++ private Register emitLoadDouble(Register reg, double c) { ++ DataSectionReference ref = new DataSectionReference(); ++ ref.setOffset(data.position()); ++ data.emitDouble(c); ++ ++ recordDataPatchInCode(ref); ++ emitPcaddu12i(scratchRegister, 0xdead >> 12); ++ emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff); ++ emitLoadRegister(reg, LoongArch64Kind.DOUBLE, scratchRegister, 0); ++ return reg; ++ } ++ ++ private Register emitLoadFloat(Register reg, float c) { ++ DataSectionReference ref = new DataSectionReference(); ++ ref.setOffset(data.position()); ++ data.emitFloat(c); ++ ++ recordDataPatchInCode(ref); ++ emitPcaddu12i(scratchRegister, 0xdead >> 12); ++ emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff); ++ emitLoadRegister(reg, LoongArch64Kind.SINGLE, scratchRegister, 0); ++ return reg; ++ } ++ ++ @Override ++ public Register emitLoadFloat(float c) { ++ Register ret = LoongArch64.fv0; ++ return emitLoadFloat(ret, c); ++ } ++ ++ private Register emitLoadLong(Register reg, long c) { ++ emitLi64(reg, c); ++ return reg; ++ } ++ ++ @Override ++ public Register emitLoadLong(long c) { ++ Register ret = newRegister(); ++ return emitLoadLong(ret, c); ++ } ++ ++ private Register emitLoadInt(Register reg, int c) { ++ emitLoadImmediate(reg, c); ++ return reg; ++ } ++ ++ @Override ++ public Register emitLoadInt(int c) { ++ Register ret = newRegister(); ++ return emitLoadInt(ret, c); ++ } ++ ++ @Override ++ public Register emitIntArg0() { ++ return codeCache.getRegisterConfig() ++ .getCallingConventionRegisters(HotSpotCallingConventionType.JavaCall, JavaKind.Int) ++ .get(0); ++ } ++ ++ @Override ++ public Register emitIntArg1() { ++ return codeCache.getRegisterConfig() ++ .getCallingConventionRegisters(HotSpotCallingConventionType.JavaCall, JavaKind.Int) ++ .get(1); ++ } ++ ++ @Override ++ public Register emitIntAdd(Register a, Register b) { ++ emitAdd(a, a, b); ++ return a; ++ } ++ ++ @Override ++ public void emitTrap(DebugInfo info) { ++ // Dereference null pointer ++ emitMove(scratchRegister, LoongArch64.zero); ++ recordImplicitException(info); ++ emitLoadRegister(LoongArch64.zero, LoongArch64Kind.QWORD, scratchRegister, 0); ++ } ++ ++ @Override ++ public void emitIntRet(Register a) { ++ emitMove(LoongArch64.v0, a); ++ emitMove(LoongArch64.sp, LoongArch64.fp); ++ emitLoadRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 8); ++ emitLoadRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 0); ++ emitGrowStack(-16); ++ emitJirl(LoongArch64.zero, LoongArch64.ra, 0); ++ } ++ ++ @Override ++ public void emitFloatRet(Register a) { ++ assert a == LoongArch64.fv0 : "Unimplemented move " + a; ++ emitMove(LoongArch64.sp, LoongArch64.fp); ++ emitLoadRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 8); ++ emitLoadRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 0); ++ emitGrowStack(-16); ++ emitJirl(LoongArch64.zero, LoongArch64.ra, 0); ++ } ++ ++ @Override ++ public void emitPointerRet(Register a) { ++ emitIntRet(a); ++ } ++ ++ @Override ++ public StackSlot emitPointerToStack(Register a) { ++ return emitLongToStack(a); ++ } ++ ++ @Override ++ public StackSlot emitNarrowPointerToStack(Register a) { ++ return emitIntToStack(a); ++ } ++ ++ @Override ++ public Register emitUncompressPointer(Register compressed, long base, int shift) { ++ if (shift > 0) { ++ emitShiftLeft(compressed, compressed, shift); ++ } ++ ++ if (base != 0) { ++ emitLoadLong(scratchRegister, base); ++ emitAdd(compressed, compressed, scratchRegister); ++ } ++ ++ return compressed; ++ } ++ ++ private StackSlot emitDoubleToStack(StackSlot slot, Register a) { ++ emitStoreRegister(a, LoongArch64Kind.DOUBLE, LoongArch64.sp, slot.getOffset(frameSize)); ++ return slot; ++ } ++ ++ @Override ++ public StackSlot emitDoubleToStack(Register a) { ++ StackSlot ret = newStackSlot(LoongArch64Kind.DOUBLE); ++ return emitDoubleToStack(ret, a); ++ } ++ ++ private StackSlot emitFloatToStack(StackSlot slot, Register a) { ++ emitStoreRegister(a, LoongArch64Kind.SINGLE, LoongArch64.sp, slot.getOffset(frameSize)); ++ return slot; ++ } ++ ++ @Override ++ public StackSlot emitFloatToStack(Register a) { ++ StackSlot ret = newStackSlot(LoongArch64Kind.SINGLE); ++ return emitFloatToStack(ret, a); ++ } ++ ++ private StackSlot emitIntToStack(StackSlot slot, Register a) { ++ emitStoreRegister(a, LoongArch64Kind.DWORD, LoongArch64.sp, slot.getOffset(frameSize)); ++ return slot; ++ } ++ ++ @Override ++ public StackSlot emitIntToStack(Register a) { ++ StackSlot ret = newStackSlot(LoongArch64Kind.DWORD); ++ return emitIntToStack(ret, a); ++ } ++ ++ private StackSlot emitLongToStack(StackSlot slot, Register a) { ++ emitStoreRegister(a, LoongArch64Kind.QWORD, LoongArch64.sp, slot.getOffset(frameSize)); ++ return slot; ++ } ++ ++ @Override ++ public StackSlot emitLongToStack(Register a) { ++ StackSlot ret = newStackSlot(LoongArch64Kind.QWORD); ++ return emitLongToStack(ret, a); ++ } ++ ++} +diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java +index 2f2395b77c6..58482edb32e 100644 +--- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java ++++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package compiler.lib.ir_framework; + + import compiler.lib.ir_framework.driver.irmatching.IRMatcher; +@@ -58,8 +64,8 @@ public class IRNode { + public static final String ALLOC_ARRAY = "(.*precise klass \\[L.*\\R((.*(?i:mov|mv|xor|nop|spill).*|\\s*|.*LGHI.*)\\R)*.*(?i:call,static).*wrapper for: _new_array_Java" + END; + public static final String ALLOC_ARRAY_OF = COMPOSITE_PREFIX + "(.*precise klass \\[L.*" + IS_REPLACED + ";:.*\\R((.*(?i:mov|mv|xorl|nop|spill).*|\\s*|.*LGHI.*)\\R)*.*(?i:call,static).*wrapper for: _new_array_Java" + END; + +- public static final String CHECKCAST_ARRAY = "(((?i:cmp|CLFI|CLR).*precise klass \\[.*;:|.*(?i:mov|mv|or).*precise klass \\[.*;:.*\\R.*(cmp|CMP|CLR))" + END; +- public static final String CHECKCAST_ARRAY_OF = COMPOSITE_PREFIX + "(((?i:cmp|CLFI|CLR).*precise klass \\[.*" + IS_REPLACED + ";:|.*(?i:mov|mv|or).*precise klass \\[.*" + IS_REPLACED + ";:.*\\R.*(cmp|CMP|CLR))" + END; ++ public static final String CHECKCAST_ARRAY = "(((?i:cmp|CLFI|CLR).*precise klass \\[.*;:|.*(?i:mov|mv|or|li).*precise klass \\[.*;:.*\\R.*(cmp|CMP|CLR))" + END; ++ public static final String CHECKCAST_ARRAY_OF = COMPOSITE_PREFIX + "(((?i:cmp|CLFI|CLR).*precise klass \\[.*" + IS_REPLACED + ";:|.*(?i:mov|mv|or|li).*precise klass \\[.*" + IS_REPLACED + ";:.*\\R.*(cmp|CMP|CLR))" + END; + // Does not work on s390 (a rule containing this regex will be skipped on s390). + public static final String CHECKCAST_ARRAYCOPY = "(.*((?i:call_leaf_nofp,runtime)|CALL,\\s?runtime leaf nofp|BCTRL.*.leaf call).*checkcast_arraycopy.*" + END; + +diff --git a/test/hotspot/jtreg/compiler/runtime/TestConstantsInError.java b/test/hotspot/jtreg/compiler/runtime/TestConstantsInError.java +index 85fd3fa938d..0655f2b0bd1 100644 +--- a/test/hotspot/jtreg/compiler/runtime/TestConstantsInError.java ++++ b/test/hotspot/jtreg/compiler/runtime/TestConstantsInError.java +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2022 Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + /* + * @test + * @bug 8279822 +@@ -130,7 +136,7 @@ public abstract class TestConstantsInError implements OutputProcessor { + results.shouldMatch("Test_C1/.*::test \\(3 bytes\\)$") + .shouldMatch("Test_C2/.*::test \\(3 bytes\\)$"); + +- if (isC1 && (Platform.isAArch64() || Platform.isRISCV64())) { // no code patching ++ if (isC1 && (Platform.isAArch64() || Platform.isRISCV64() || Platform.isLoongArch64())) { // no code patching + results.shouldMatch("Test_C1/.*::test \\(3 bytes\\) made not entrant") + .shouldMatch("Test_C2/.*::test \\(3 bytes\\) made not entrant"); + } else { +@@ -168,7 +174,7 @@ public abstract class TestConstantsInError implements OutputProcessor { + .shouldMatch("Test_MH3/.*::test \\(3 bytes\\)$") + .shouldMatch("Test_MH4/.*::test \\(3 bytes\\)$"); + +- if (isC1 && (Platform.isAArch64() || Platform.isRISCV64())) { // no code patching ++ if (isC1 && (Platform.isAArch64() || Platform.isRISCV64() || Platform.isLoongArch64())) { // no code patching + results.shouldMatch("Test_MH1/.*::test \\(3 bytes\\) made not entrant") + .shouldMatch("Test_MH2/.*::test \\(3 bytes\\) made not entrant") + .shouldMatch("Test_MH3/.*::test \\(3 bytes\\) made not entrant") +@@ -191,7 +197,7 @@ public abstract class TestConstantsInError implements OutputProcessor { + results.shouldMatch("Test_MT1/.*::test \\(3 bytes\\)$") + .shouldMatch("Test_MT2/.*::test \\(3 bytes\\)$"); + +- if (isC1 && (Platform.isAArch64() || Platform.isRISCV64())) { // no code patching ++ if (isC1 && (Platform.isAArch64() || Platform.isRISCV64() || Platform.isLoongArch64())) { // no code patching + results.shouldMatch("Test_MT1/.*::test \\(3 bytes\\) made not entrant") + .shouldMatch("Test_MT2/.*::test \\(3 bytes\\) made not entrant"); + } else { +diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java +index 10d87d51f0f..dbea76741d6 100644 +--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java ++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java +@@ -21,10 +21,17 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + + /* @test + * @bug 8167409 + * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64") & (vm.flavor != "zero") ++ * @requires (os.arch != "mips64el") & (os.arch != "loongarch64") & (vm.flavor != "zero") + * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs + */ + package compiler.runtime.criticalnatives.argumentcorruption; +diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java +index 23c1e6e6acb..2f402d567d9 100644 +--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java ++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java +@@ -21,10 +21,17 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + + /* @test + * @bug 8167408 + * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64") & (vm.flavor != "zero") ++ * @requires (os.arch != "mips64el") & (os.arch != "loongarch64") & (vm.flavor != "zero") + * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp + */ + package compiler.runtime.criticalnatives.lookup; +diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +index 689c7c8cc2f..f734c1baa3f 100644 +--- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java ++++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package compiler.testlibrary.sha.predicate; + + import jdk.test.lib.Platform; +@@ -61,19 +67,22 @@ public class IntrinsicPredicates { + + public static final BooleanSupplier MD5_INSTRUCTION_AVAILABLE + = new OrPredicate(new CPUSpecificPredicate("aarch64.*", null, null), ++ new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, null), + // x86 variants + new OrPredicate(new CPUSpecificPredicate("amd64.*", null, null), + new OrPredicate(new CPUSpecificPredicate("i386.*", null, null), +- new CPUSpecificPredicate("x86.*", null, null)))); ++ new CPUSpecificPredicate("x86.*", null, null))))); + + public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE + = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null), + new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha1" }, null), + new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha1" }, null), ++ // Basic instructions are used to implement SHA1 Intrinsics on LA, so "sha1" feature is not needed. ++ new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, null), + // x86 variants + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), +- new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null)))))); ++ new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null))))))); + + public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE + = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256" }, null), +@@ -81,12 +90,14 @@ public class IntrinsicPredicates { + new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha256" }, null), + new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("ppc64le.*", new String[] { "sha" }, null), ++ // Basic instructions are used to implement SHA256 Intrinsics on LA, so "sha256" feature is not needed. ++ new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, null), + // x86 variants + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null), +- new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null)))))))))); ++ new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))))); + + public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE + = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512" }, null), +diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java +index 36f74d01b54..035b91b9d8e 100644 +--- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java ++++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + /* + * @test ReservedStackTest + * +@@ -240,7 +246,8 @@ public class ReservedStackTest { + return Platform.isAix() || + (Platform.isLinux() && + (Platform.isPPC() || Platform.isS390x() || Platform.isX64() || +- Platform.isX86() || Platform.isAArch64() || Platform.isRISCV64())) || ++ Platform.isX86() || Platform.isAArch64() || Platform.isRISCV64() || ++ Platform.isMIPS() || Platform.isLoongArch64())) || + Platform.isOSX(); + } + +diff --git a/test/hotspot/jtreg/testlibrary_tests/ir_framework/tests/TestIRMatching.java b/test/hotspot/jtreg/testlibrary_tests/ir_framework/tests/TestIRMatching.java +index 26dd3514e8e..2818343ec3d 100644 +--- a/test/hotspot/jtreg/testlibrary_tests/ir_framework/tests/TestIRMatching.java ++++ b/test/hotspot/jtreg/testlibrary_tests/ir_framework/tests/TestIRMatching.java +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package ir_framework.tests; + + import compiler.lib.ir_framework.*; +@@ -215,7 +221,7 @@ public class TestIRMatching { + runCheck(BadFailOnConstraint.create(Membar.class, "membar()", 1, "MemBar")); + + String cmp; +- if (Platform.isPPC() || Platform.isX86()) { ++ if (Platform.isPPC() || Platform.isX86() || Platform.isLoongArch64()) { + cmp = "CMP"; + } else if (Platform.isS390x()){ + cmp = "CLFI"; +diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java +index c5166580010..913136a1fd1 100644 +--- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java ++++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package jdk.jfr.event.os; + + import java.util.List; +@@ -52,8 +58,8 @@ public class TestCPUInformation { + Events.assertField(event, "hwThreads").atLeast(1); + Events.assertField(event, "cores").atLeast(1); + Events.assertField(event, "sockets").atLeast(1); +- Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390"); +- Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390"); ++ Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390", "MIPS", "LoongArch"); ++ Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390", "MIPS", "LoongArch"); + } + } + } +diff --git a/test/jdk/sun/security/pkcs11/PKCS11Test.java b/test/jdk/sun/security/pkcs11/PKCS11Test.java +index a9a8a8178ee..99295d779c5 100644 +--- a/test/jdk/sun/security/pkcs11/PKCS11Test.java ++++ b/test/jdk/sun/security/pkcs11/PKCS11Test.java +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + // common infrastructure for SunPKCS11 tests + + import java.io.ByteArrayOutputStream; +@@ -693,6 +699,9 @@ public abstract class PKCS11Test { + "/usr/lib64/" }); + osMap.put("Linux-ppc64-64", new String[] { "/usr/lib64/" }); + osMap.put("Linux-ppc64le-64", new String[] { "/usr/lib64/" }); ++ osMap.put("Linux-mips64el-64", new String[]{"/usr/lib64/"}); ++ osMap.put("Linux-loongarch64-64", new String[]{"/usr/lib/loongarch64-linux-gnu/", ++ "/usr/lib64/" }); + osMap.put("Linux-s390x-64", new String[] { "/usr/lib64/" }); + osMap.put("Windows-x86-32", new String[] {}); + osMap.put("Windows-amd64-64", new String[] {}); +diff --git a/test/lib-test/jdk/test/lib/TestMutuallyExclusivePlatformPredicates.java b/test/lib-test/jdk/test/lib/TestMutuallyExclusivePlatformPredicates.java +index c71a6034748..427ebda770f 100644 +--- a/test/lib-test/jdk/test/lib/TestMutuallyExclusivePlatformPredicates.java ++++ b/test/lib-test/jdk/test/lib/TestMutuallyExclusivePlatformPredicates.java +@@ -33,6 +33,12 @@ import java.util.HashSet; + import java.util.List; + import java.util.Set; + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + /** + * @test + * @summary Verify that for each group of mutually exclusive predicates defined +@@ -45,7 +51,7 @@ import java.util.Set; + */ + public class TestMutuallyExclusivePlatformPredicates { + private static enum MethodGroup { +- ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isX64", "isX86"), ++ ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isX64", "isX86", "isMIPS", "isLoongArch64"), + BITNESS("is32bit", "is64bit"), + OS("isAix", "isLinux", "isOSX", "isWindows"), + VM_TYPE("isClient", "isServer", "isMinimal", "isZero", "isEmbedded"), +diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java +index a4f2c03d10f..7d3b1a62ecb 100644 +--- a/test/lib/jdk/test/lib/Platform.java ++++ b/test/lib/jdk/test/lib/Platform.java +@@ -21,6 +21,12 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021, These ++ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + package jdk.test.lib; + + import java.io.BufferedReader; +@@ -233,6 +239,14 @@ public class Platform { + return isArch("(i386)|(x86(?!_64))"); + } + ++ public static boolean isMIPS() { ++ return isArch("mips.*"); ++ } ++ ++ public static boolean isLoongArch64() { ++ return isArch("loongarch64"); ++ } ++ + public static String getOsArch() { + return osArch; + } diff --git a/java8-openjdk/PKGBUILD b/java8-openjdk/PKGBUILD index d576f6e8dd..cb75a3bda9 100644 --- a/java8-openjdk/PKGBUILD +++ b/java8-openjdk/PKGBUILD @@ -10,7 +10,7 @@ _majorver=8 _minorver=402 _updatever=06 pkgver=${_majorver}.${_minorver}.u${_updatever} -pkgrel=1 +pkgrel=7 arch=('loong64' 'x86_64') url='https://openjdk.java.net/' license=('custom') @@ -31,13 +31,17 @@ makedepends=( ) options=(!lto) source=(https://github.com/openjdk/jdk${_majorver}u/archive/refs/tags/jdk${_majorver}u${_minorver}-b${_updatever}.tar.gz - gcc11.patch) + gcc11.patch + jdk8u382-la64.patch) b2sums=('dee05e214756da4d1dcce0f923a0c10b9e385b5945689039c370ae8ac60f3e1324c629c24d9194f63471430b3c94680f0dcb2c3bdfd13d1e2034673cf9123cae' - '9679e4dfb6027a87376081489c09810812d6849573afac4ea96abe3a3e00ca5b6af7d0ffb010c43b93cfa913f9e97fbb9f11e19fcc86a89b4548442671c32da1') + '9679e4dfb6027a87376081489c09810812d6849573afac4ea96abe3a3e00ca5b6af7d0ffb010c43b93cfa913f9e97fbb9f11e19fcc86a89b4548442671c32da1' + '8010001cc05570986c901353e6e4c52849faf41e879c7356b35d628b84af50fa78a2c3a5476f3c93bc3f49d0de8c0ca21879e779824648cbe5aadd5a6207ab02') +SKIPCONFIG=1 case "${CARCH}" in 'x86_64') _JARCH=amd64 ; _DOC_ARCH=x86_64 ;; 'i686' ) _JARCH=i386 ; _DOC_ARCH=x86 ;; + 'loong64' ) _JARCH=loongarch64 ; _DOC_ARCH=loongarch64 ;; esac _jdkname=openjdk8 @@ -54,6 +58,7 @@ prepare() { # Fix build with C++17 (Fedora) patch -Np1 -i "${srcdir}"/gcc11.patch + patch -Np1 -i "${srcdir}"/jdk8u382-la64.patch } build() { @@ -181,6 +186,8 @@ package_jre8-openjdk-headless() { install -D -m 644 "${pkgdir}${_filepkgpath}" "${pkgdir}/${file}" ln -sf /${file} "${pkgdir}${_filepkgpath}" done + # The built out libjvm.so is error, so copy it from the current system. + cp /usr/lib/jvm/java-8-openjdk/jre/lib/loongarch64/server/libjvm.so ${pkgdir}/usr/lib/jvm/java-8-openjdk/jre/lib/loongarch64/server/libjvm.so } package_jre8-openjdk() { diff --git a/java8-openjdk/jdk8u382-la64.patch b/java8-openjdk/jdk8u382-la64.patch new file mode 100644 index 0000000000..c3bf3c60be --- /dev/null +++ b/java8-openjdk/jdk8u382-la64.patch @@ -0,0 +1,116949 @@ +diff --git a/common/autoconf/build-aux/autoconf-config.guess b/common/autoconf/build-aux/autoconf-config.guess +index 15ee438926..3d7555b52d 100644 +--- a/common/autoconf/build-aux/autoconf-config.guess ++++ b/common/autoconf/build-aux/autoconf-config.guess +@@ -977,6 +977,9 @@ EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + ;; ++ loongarch64:Linux:*:*) ++ echo ${UNAME_MACHINE}-unknown-linux-gnu ++ exit ;; + or32:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; +diff --git a/common/autoconf/build-aux/autoconf-config.sub b/common/autoconf/build-aux/autoconf-config.sub +index 1aab2b303e..bd910bddbe 100644 +--- a/common/autoconf/build-aux/autoconf-config.sub ++++ b/common/autoconf/build-aux/autoconf-config.sub +@@ -275,6 +275,7 @@ case $basic_machine in + | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ + | i370 | i860 | i960 | ia64 \ + | ip2k | iq2000 \ ++ | loongarch | loongarch64 \ + | m32c | m32r | m32rle | m68000 | m68k | m88k \ + | maxq | mb | microblaze | mcore | mep \ + | mips | mipsbe | mipseb | mipsel | mipsle \ +diff --git a/common/autoconf/build-aux/config.guess b/common/autoconf/build-aux/config.guess +index 355c91e4eb..d03d029ce3 100644 +--- a/common/autoconf/build-aux/config.guess ++++ b/common/autoconf/build-aux/config.guess +@@ -86,4 +86,15 @@ if [ "x$OUT" = x ]; then + fi + fi + ++# Test and fix little endian MIPS. ++if [ "x$OUT" = x ]; then ++ if [ `uname -s` = Linux ]; then ++ if [ `uname -m` = mipsel ]; then ++ OUT=mipsel-unknown-linux-gnu ++ elif [ `uname -m` = mips64el ]; then ++ OUT=mips64el-unknown-linux-gnu ++ fi ++ fi ++fi ++ + echo $OUT +diff --git a/common/autoconf/configure.ac b/common/autoconf/configure.ac +index 151e5a109f..5072409dd4 100644 +--- a/common/autoconf/configure.ac ++++ b/common/autoconf/configure.ac +@@ -23,6 +23,12 @@ + # questions. + # + ++# ++# This file has been modified by Loongson Technology in 2018. These ++# modifications are Copyright (c) 2018 Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + ############################################################################### + # + # Includes and boilerplate +@@ -186,6 +192,7 @@ FLAGS_SETUP_INIT_FLAGS + # Now we can test some aspects on the target using configure macros. + PLATFORM_SETUP_OPENJDK_TARGET_BITS + PLATFORM_SETUP_OPENJDK_TARGET_ENDIANNESS ++GET_BUILDER_AND_HOST_DATA + + # Configure flags for the tools + FLAGS_SETUP_COMPILER_FLAGS_FOR_LIBS +diff --git a/common/autoconf/generated-configure.sh b/common/autoconf/generated-configure.sh +index b3c5819161..bae7e64749 100644 +--- a/common/autoconf/generated-configure.sh ++++ b/common/autoconf/generated-configure.sh +@@ -716,6 +716,9 @@ SET_EXECUTABLE_ORIGIN + SHARED_LIBRARY_FLAGS + CXX_FLAG_REORDER + C_FLAG_REORDER ++HOST_NAME ++BUILDER_NAME ++BUILDER_ID + SYSROOT_LDFLAGS + SYSROOT_CFLAGS + RC_FLAGS +@@ -4078,6 +4081,12 @@ fi + # questions. + # + ++# ++# This file has been modified by Loongson Technology in 2022. These ++# modifications are Copyright (c) 2018, 2022, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + # Support macro for PLATFORM_EXTRACT_TARGET_AND_BUILD. + # Converts autoconf style CPU name to OpenJDK style, into + # VAR_CPU, VAR_CPU_ARCH, VAR_CPU_BITS and VAR_CPU_ENDIAN. +@@ -13741,6 +13750,18 @@ test -n "$target_alias" && + VAR_CPU_BITS=64 + VAR_CPU_ENDIAN=big + ;; ++ mips64el) ++ VAR_CPU=mips64 ++ VAR_CPU_ARCH=mips ++ VAR_CPU_BITS=64 ++ VAR_CPU_ENDIAN=little ++ ;; ++ loongarch64) ++ VAR_CPU=loongarch64 ++ VAR_CPU_ARCH=loongarch ++ VAR_CPU_BITS=64 ++ VAR_CPU_ENDIAN=little ++ ;; + *) + as_fn_error $? "unsupported cpu $build_cpu" "$LINENO" 5 + ;; +@@ -13879,6 +13900,18 @@ $as_echo "$OPENJDK_BUILD_OS-$OPENJDK_BUILD_CPU" >&6; } + VAR_CPU_BITS=64 + VAR_CPU_ENDIAN=big + ;; ++ mips64el) ++ VAR_CPU=mips64 ++ VAR_CPU_ARCH=mips ++ VAR_CPU_BITS=64 ++ VAR_CPU_ENDIAN=little ++ ;; ++ loongarch64) ++ VAR_CPU=loongarch64 ++ VAR_CPU_ARCH=loongarch ++ VAR_CPU_BITS=64 ++ VAR_CPU_ENDIAN=little ++ ;; + *) + as_fn_error $? "unsupported cpu $host_cpu" "$LINENO" 5 + ;; +@@ -14001,6 +14034,8 @@ $as_echo "$COMPILE_TYPE" >&6; } + OPENJDK_TARGET_CPU_LEGACY_LIB="i386" + elif test "x$OPENJDK_TARGET_CPU" = xx86_64; then + OPENJDK_TARGET_CPU_LEGACY_LIB="amd64" ++ elif test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then ++ OPENJDK_TARGET_CPU_LEGACY_LIB="mips64el" + fi + + +@@ -14034,6 +14069,9 @@ $as_echo "$COMPILE_TYPE" >&6; } + elif test "x$OPENJDK_TARGET_OS" != xmacosx && test "x$OPENJDK_TARGET_CPU" = xx86_64; then + # On all platforms except macosx, we replace x86_64 with amd64. + OPENJDK_TARGET_CPU_OSARCH="amd64" ++ elif test "x$OPENJDK_TARGET_OS" = xlinux && test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then ++ # System.getProperty("os.arch"): mips64 -> mips64el ++ OPENJDK_TARGET_CPU_OSARCH="mips64el" + fi + + +@@ -14043,6 +14081,8 @@ $as_echo "$COMPILE_TYPE" >&6; } + elif test "x$OPENJDK_TARGET_OS" != xmacosx && test "x$OPENJDK_TARGET_CPU" = xx86_64; then + # On all platforms except macosx, we replace x86_64 with amd64. + OPENJDK_TARGET_CPU_JLI="amd64" ++ elif test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then ++ OPENJDK_TARGET_CPU_JLI="mips64el" + fi + # Now setup the -D flags for building libjli. + OPENJDK_TARGET_CPU_JLI_CFLAGS="-DLIBARCHNAME='\"$OPENJDK_TARGET_CPU_JLI\"'" +@@ -14055,6 +14095,9 @@ $as_echo "$COMPILE_TYPE" >&6; } + elif test "x$OPENJDK_TARGET_OS" = xmacosx && test "x$TOOLCHAIN_TYPE" = xclang ; then + OPENJDK_TARGET_CPU_JLI_CFLAGS="$OPENJDK_TARGET_CPU_JLI_CFLAGS -stdlib=libc++ -mmacosx-version-min=\$(MACOSX_VERSION_MIN)" + fi ++ if test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then ++ OPENJDK_TARGET_CPU_JLI_CFLAGS="$OPENJDK_TARGET_CPU_JLI_CFLAGS -DLIBARCH32NAME='\"mips32el\"' -DLIBARCH64NAME='\"mips64el\"'" ++ fi + + + # Setup OPENJDK_TARGET_OS_API_DIR, used in source paths. +@@ -42235,6 +42278,47 @@ $as_echo "$ac_cv_c_bigendian" >&6; } + fi + + ++BUILDER_NAME="$build_os" ++BUILDER_ID="Custom build ($(date))" ++if test -f /etc/issue; then ++ etc_issue_info=`cat /etc/issue` ++ if test -n "$etc_issue_info"; then ++ BUILDER_NAME=`cat /etc/issue | head -n 1 | cut -d " " -f 1` ++ fi ++fi ++if test -f /etc/redhat-release; then ++ etc_issue_info=`cat /etc/redhat-release` ++ if test -n "$etc_issue_info"; then ++ BUILDER_NAME=`cat /etc/redhat-release | head -n 1 | cut -d " " -f 1` ++ fi ++fi ++if test -f /etc/neokylin-release; then ++ etc_issue_info=`cat /etc/neokylin-release` ++ if test -n "$etc_issue_info"; then ++ BUILDER_NAME=`cat /etc/neokylin-release | head -n 1 | cut -d " " -f 1` ++ fi ++fi ++if test -z "$BUILDER_NAME"; then ++ BUILDER_NAME="unknown" ++fi ++BUILDER_NAME=`echo $BUILDER_NAME | sed -r "s/-//g"` ++if test -n "$OPENJDK_TARGET_CPU_OSARCH"; then ++ HOST_NAME="$OPENJDK_TARGET_CPU_OSARCH" ++else ++ HOST_NAME="unknown" ++fi ++if test -f "/usr/bin/cpp"; then ++ # gcc_with_arch_info=`gcc -v 2>&1 | grep '\-\-with-arch=' | sed 's/.*--with-arch=//;s/ .*$//'` ++ gcc_with_arch_info=`cpp -dM /dev/null | grep '\<_MIPS_ARCH\>' | sed 's/^#define _MIPS_ARCH "//;s/"$//'` ++ if test -n "$gcc_with_arch_info"; then ++ HOST_NAME="$gcc_with_arch_info" ++ fi ++fi ++ ++ ++ ++ ++ + # Configure flags for the tools + + ############################################################################### +diff --git a/common/autoconf/platform.m4 b/common/autoconf/platform.m4 +index 51df988f61..51cc28c312 100644 +--- a/common/autoconf/platform.m4 ++++ b/common/autoconf/platform.m4 +@@ -23,6 +23,12 @@ + # questions. + # + ++# ++# This file has been modified by Loongson Technology in 2022. These ++# modifications are Copyright (c) 2018, 2022, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + # Support macro for PLATFORM_EXTRACT_TARGET_AND_BUILD. + # Converts autoconf style CPU name to OpenJDK style, into + # VAR_CPU, VAR_CPU_ARCH, VAR_CPU_BITS and VAR_CPU_ENDIAN. +@@ -96,6 +102,18 @@ AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_CPU], + VAR_CPU_BITS=64 + VAR_CPU_ENDIAN=big + ;; ++ mips64el) ++ VAR_CPU=mips64 ++ VAR_CPU_ARCH=mips ++ VAR_CPU_BITS=64 ++ VAR_CPU_ENDIAN=little ++ ;; ++ loongarch64) ++ VAR_CPU=loongarch64 ++ VAR_CPU_ARCH=loongarch ++ VAR_CPU_BITS=64 ++ VAR_CPU_ENDIAN=little ++ ;; + *) + AC_MSG_ERROR([unsupported cpu $1]) + ;; +@@ -283,6 +301,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS], + OPENJDK_TARGET_CPU_LEGACY_LIB="i386" + elif test "x$OPENJDK_TARGET_CPU" = xx86_64; then + OPENJDK_TARGET_CPU_LEGACY_LIB="amd64" ++ elif test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then ++ OPENJDK_TARGET_CPU_LEGACY_LIB="mips64el" + fi + AC_SUBST(OPENJDK_TARGET_CPU_LEGACY_LIB) + +@@ -316,6 +336,9 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS], + elif test "x$OPENJDK_TARGET_OS" != xmacosx && test "x$OPENJDK_TARGET_CPU" = xx86_64; then + # On all platforms except macosx, we replace x86_64 with amd64. + OPENJDK_TARGET_CPU_OSARCH="amd64" ++ elif test "x$OPENJDK_TARGET_OS" = xlinux && test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then ++ # System.getProperty("os.arch"): mips64 -> mips64el ++ OPENJDK_TARGET_CPU_OSARCH="mips64el" + fi + AC_SUBST(OPENJDK_TARGET_CPU_OSARCH) + +@@ -325,6 +348,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS], + elif test "x$OPENJDK_TARGET_OS" != xmacosx && test "x$OPENJDK_TARGET_CPU" = xx86_64; then + # On all platforms except macosx, we replace x86_64 with amd64. + OPENJDK_TARGET_CPU_JLI="amd64" ++ elif test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then ++ OPENJDK_TARGET_CPU_JLI="mips64el" + fi + # Now setup the -D flags for building libjli. + OPENJDK_TARGET_CPU_JLI_CFLAGS="-DLIBARCHNAME='\"$OPENJDK_TARGET_CPU_JLI\"'" +@@ -337,6 +362,9 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS], + elif test "x$OPENJDK_TARGET_OS" = xmacosx && test "x$TOOLCHAIN_TYPE" = xclang ; then + OPENJDK_TARGET_CPU_JLI_CFLAGS="$OPENJDK_TARGET_CPU_JLI_CFLAGS -stdlib=libc++ -mmacosx-version-min=\$(MACOSX_VERSION_MIN)" + fi ++ if test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then ++ OPENJDK_TARGET_CPU_JLI_CFLAGS="$OPENJDK_TARGET_CPU_JLI_CFLAGS -DLIBARCH32NAME='\"mips32el\"' -DLIBARCH64NAME='\"mips64el\"'" ++ fi + AC_SUBST(OPENJDK_TARGET_CPU_JLI_CFLAGS) + + # Setup OPENJDK_TARGET_OS_API_DIR, used in source paths. +@@ -550,3 +578,46 @@ AC_DEFUN_ONCE([PLATFORM_SETUP_OPENJDK_TARGET_ENDIANNESS], + AC_MSG_ERROR([The tested endian in the target ($ENDIAN) differs from the endian expected to be found in the target ($OPENJDK_TARGET_CPU_ENDIAN)]) + fi + ]) ++ ++AC_DEFUN([GET_BUILDER_AND_HOST_DATA], ++[ ++BUILDER_NAME="$build_os" ++BUILDER_ID="Custom build ($(date))" ++if test -f /etc/issue; then ++ etc_issue_info=`cat /etc/issue` ++ if test -n "$etc_issue_info"; then ++ BUILDER_NAME=`cat /etc/issue | head -n 1 | cut -d " " -f 1` ++ fi ++fi ++if test -f /etc/redhat-release; then ++ etc_issue_info=`cat /etc/redhat-release` ++ if test -n "$etc_issue_info"; then ++ BUILDER_NAME=`cat /etc/redhat-release | head -n 1 | cut -d " " -f 1` ++ fi ++fi ++if test -f /etc/neokylin-release; then ++ etc_issue_info=`cat /etc/neokylin-release` ++ if test -n "$etc_issue_info"; then ++ BUILDER_NAME=`cat /etc/neokylin-release | head -n 1 | cut -d " " -f 1` ++ fi ++fi ++if test -z "$BUILDER_NAME"; then ++ BUILDER_NAME="unknown" ++fi ++BUILDER_NAME=`echo $BUILDER_NAME | sed -r "s/-//g"` ++if test -n "$OPENJDK_TARGET_CPU_OSARCH"; then ++ HOST_NAME="$OPENJDK_TARGET_CPU_OSARCH" ++else ++ HOST_NAME="unknown" ++fi ++if test -f "/usr/bin/cpp"; then ++ # gcc_with_arch_info=`gcc -v 2>&1 | grep '\-\-with-arch=' | sed 's/.*--with-arch=//;s/ .*$//'` ++ gcc_with_arch_info=`cpp -dM /dev/null | grep '\<_MIPS_ARCH\>' | sed 's/^#define _MIPS_ARCH "//;s/"$//'` ++ if test -n "$gcc_with_arch_info"; then ++ HOST_NAME="$gcc_with_arch_info" ++ fi ++fi ++AC_SUBST(BUILDER_ID) ++AC_SUBST(BUILDER_NAME) ++AC_SUBST(HOST_NAME) ++]) +diff --git a/common/autoconf/spec.gmk.in b/common/autoconf/spec.gmk.in +index 461ec59711..70d56b331c 100644 +--- a/common/autoconf/spec.gmk.in ++++ b/common/autoconf/spec.gmk.in +@@ -23,6 +23,12 @@ + # questions. + # + ++# ++# This file has been modified by Loongson Technology in 2023. These ++# modifications are Copyright (c) 2018, 2023, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + # Configured @DATE_WHEN_CONFIGURED@ to build + # for target system @OPENJDK_TARGET_OS@-@OPENJDK_TARGET_CPU@ + # (called @OPENJDK_TARGET_AUTOCONF_NAME@ by autoconf) +@@ -219,6 +225,23 @@ else + endif + JRE_RELEASE_VERSION:=$(FULL_VERSION) + ++# Build OS and host values for use in Loongson OpenJDK release ++BUILDER_ID:=@BUILDER_ID@ ++BUILDER_NAME:=@BUILDER_NAME@ ++HOST_NAME:=@HOST_NAME@ ++ ++# Loongson OpenJDK Version info ++VER=8.1.16 ++ifeq ($(HOST_NAME), ) ++ HOST_NAME=unknown ++endif ++ifeq ($(BUILDER_NAME), ) ++ BUILDER_NAME=unknown ++endif ++HOST_NAME_STRING=-$(HOST_NAME) ++BUILDER_NAME_STRING=-$(BUILDER_NAME) ++LOONGSON_RUNTIME_NAME=Loongson $(VER)$(HOST_NAME_STRING)$(BUILDER_NAME_STRING) ++ + # How to compile the code: release, fastdebug or slowdebug + DEBUG_LEVEL:=@DEBUG_LEVEL@ + +diff --git a/hotspot/agent/make/saenv.sh b/hotspot/agent/make/saenv.sh +index ab9a0a431c..a2de3fc329 100644 +--- a/hotspot/agent/make/saenv.sh ++++ b/hotspot/agent/make/saenv.sh +@@ -23,6 +23,12 @@ + # + # + ++# ++# This file has been modified by Loongson Technology in 2020. These ++# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + # This file sets common environment variables for all SA scripts + + OS=`uname` +@@ -42,6 +48,14 @@ if [ "$OS" = "Linux" ]; then + SA_LIBPATH=$STARTDIR/../src/os/linux/amd64:$STARTDIR/linux/amd64 + OPTIONS="-Dsa.library.path=$SA_LIBPATH" + CPU=amd64 ++ elif [ "$ARCH" = "mips64" ] ; then ++ SA_LIBPATH=$STARTDIR/../src/os/linux/mips:$STARTDIR/linux/mips ++ OPTIONS="-Dsa.library.path=$SA_LIBPATH" ++ CPU=mips ++ elif [ "$ARCH" = "loongarch64" ] ; then ++ SA_LIBPATH=$STARTDIR/../src/os/linux/loongarch64:$STARTDIR/linux/loongarch64 ++ OPTIONS="-Dsa.library.path=$SA_LIBPATH" ++ CPU=loongarch64 + else + SA_LIBPATH=$STARTDIR/../src/os/linux/i386:$STARTDIR/linux/i386 + OPTIONS="-Dsa.library.path=$SA_LIBPATH" +diff --git a/hotspot/agent/src/os/linux/LinuxDebuggerLocal.c b/hotspot/agent/src/os/linux/LinuxDebuggerLocal.c +index d6a0c7d9a9..b3b1380b29 100644 +--- a/hotspot/agent/src/os/linux/LinuxDebuggerLocal.c ++++ b/hotspot/agent/src/os/linux/LinuxDebuggerLocal.c +@@ -22,6 +22,13 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ * ++ */ ++ + #include + #include "libproc.h" + +@@ -49,10 +56,18 @@ + #include "sun_jvm_hotspot_debugger_sparc_SPARCThreadContext.h" + #endif + ++#if defined(mips64el) || defined(mips64) ++#include "sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext.h" ++#endif ++ + #ifdef aarch64 + #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h" + #endif + ++#ifdef loongarch64 ++#include "sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext.h" ++#endif ++ + static jfieldID p_ps_prochandle_ID = 0; + static jfieldID threadList_ID = 0; + static jfieldID loadObjectList_ID = 0; +@@ -337,7 +352,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + return (err == PS_OK)? array : 0; + } + +-#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) || defined(aarch64) ++#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) || defined(aarch64) || defined(loongarch64) + JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0 + (JNIEnv *env, jobject this_obj, jint lwp_id) { + +@@ -364,6 +379,12 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + #endif + #if defined(sparc) || defined(sparcv9) + #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG ++#endif ++#ifdef loongarch64 ++#define NPRGREG sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_NPRGREG ++#endif ++#if defined(mips64) || defined(mips64el) ++#define NPRGREG sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext_NPRGREG + #endif + + array = (*env)->NewLongArray(env, NPRGREG); +@@ -470,6 +491,55 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + } + #endif /* aarch64 */ + ++#if defined(loongarch64) ++ ++#define REG_INDEX(reg) sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_##reg ++ ++ { ++ int i; ++ for (i = 0; i < 31; i++) ++ regs[i] = gregs.regs[i]; ++ regs[REG_INDEX(PC)] = gregs.csr_era; ++ } ++#endif /* loongarch64 */ ++#if defined(mips64) || defined(mips64el) ++ ++#define REG_INDEX(reg) sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext_##reg ++ ++ regs[REG_INDEX(ZERO)] = gregs.regs[0]; ++ regs[REG_INDEX(AT)] = gregs.regs[1]; ++ regs[REG_INDEX(V0)] = gregs.regs[2]; ++ regs[REG_INDEX(V1)] = gregs.regs[3]; ++ regs[REG_INDEX(A0)] = gregs.regs[4]; ++ regs[REG_INDEX(A1)] = gregs.regs[5]; ++ regs[REG_INDEX(A2)] = gregs.regs[6]; ++ regs[REG_INDEX(A3)] = gregs.regs[7]; ++ regs[REG_INDEX(T0)] = gregs.regs[8]; ++ regs[REG_INDEX(T1)] = gregs.regs[9]; ++ regs[REG_INDEX(T2)] = gregs.regs[10]; ++ regs[REG_INDEX(T3)] = gregs.regs[11]; ++ regs[REG_INDEX(T4)] = gregs.regs[12]; ++ regs[REG_INDEX(T5)] = gregs.regs[13]; ++ regs[REG_INDEX(T6)] = gregs.regs[14]; ++ regs[REG_INDEX(T7)] = gregs.regs[15]; ++ regs[REG_INDEX(S0)] = gregs.regs[16]; ++ regs[REG_INDEX(S1)] = gregs.regs[17]; ++ regs[REG_INDEX(S2)] = gregs.regs[18]; ++ regs[REG_INDEX(S3)] = gregs.regs[19]; ++ regs[REG_INDEX(S4)] = gregs.regs[20]; ++ regs[REG_INDEX(S5)] = gregs.regs[21]; ++ regs[REG_INDEX(S6)] = gregs.regs[22]; ++ regs[REG_INDEX(S7)] = gregs.regs[23]; ++ regs[REG_INDEX(T8)] = gregs.regs[24]; ++ regs[REG_INDEX(T9)] = gregs.regs[25]; ++ regs[REG_INDEX(K0)] = gregs.regs[26]; ++ regs[REG_INDEX(K1)] = gregs.regs[27]; ++ regs[REG_INDEX(GP)] = gregs.regs[28]; ++ regs[REG_INDEX(SP)] = gregs.regs[29]; ++ regs[REG_INDEX(FP)] = gregs.regs[30]; ++ regs[REG_INDEX(S8)] = gregs.regs[30]; ++ regs[REG_INDEX(RA)] = gregs.regs[31]; ++#endif /* mips64 */ + + (*env)->ReleaseLongArrayElements(env, array, regs, JNI_COMMIT); + return array; +diff --git a/hotspot/agent/src/os/linux/Makefile b/hotspot/agent/src/os/linux/Makefile +index c0b5c869c1..2cc50b6fab 100644 +--- a/hotspot/agent/src/os/linux/Makefile ++++ b/hotspot/agent/src/os/linux/Makefile +@@ -22,7 +22,13 @@ + # + # + +-ARCH := $(shell if ([ `uname -m` = "ia64" ]) ; then echo ia64 ; elif ([ `uname -m` = "x86_64" ]) ; then echo amd64; elif ([ `uname -m` = "sparc64" ]) ; then echo sparc; else echo i386 ; fi ) ++# ++# This file has been modified by Loongson Technology in 2020. These ++# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ ++ARCH := $(shell if ([ `uname -m` = "ia64" ]) ; then echo ia64 ; elif ([ `uname -m` = "mips64el" ]) ; then echo mips64 ; elif ([ `uname -m` = "x86_64" ]) ; then echo amd64; elif ([ `uname -m` = "sparc64" ]) ; then echo sparc; else echo i386 ; fi ) + GCC = gcc + + JAVAH = ${JAVA_HOME}/bin/javah +@@ -53,6 +59,8 @@ $(ARCH)/LinuxDebuggerLocal.o: LinuxDebuggerLocal.c + $(JAVAH) -jni -classpath ../../../build/classes -d $(ARCH) \ + sun.jvm.hotspot.debugger.x86.X86ThreadContext \ + sun.jvm.hotspot.debugger.sparc.SPARCThreadContext \ ++ sun.jvm.hotspot.debugger.mips64.MIPS64ThreadContext \ ++ sun.jvm.hotspot.debugger.loongarch64.LOONGARCH64ThreadContext \ + sun.jvm.hotspot.debugger.amd64.AMD64ThreadContext \ + sun.jvm.hotspot.debugger.aarch64.AARCH64ThreadContext + $(GCC) $(CFLAGS) $< -o $@ +diff --git a/hotspot/agent/src/os/linux/libproc.h b/hotspot/agent/src/os/linux/libproc.h +index 6b6e41cab4..5eb8211aa9 100644 +--- a/hotspot/agent/src/os/linux/libproc.h ++++ b/hotspot/agent/src/os/linux/libproc.h +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef _LIBPROC_H_ + #define _LIBPROC_H_ + +@@ -36,7 +42,7 @@ + + #include + +-#if defined(aarch64) ++#if defined(aarch64) || defined(loongarch64) + #include "asm/ptrace.h" + #endif + +@@ -76,7 +82,12 @@ combination of ptrace and /proc calls. + #include + #define user_regs_struct pt_regs + #endif +-#if defined(aarch64) ++ ++#if defined(mips) || defined(mipsel) || defined(mips64) || defined(mips64el) ++#include ++#define user_regs_struct pt_regs ++#endif ++#if defined(aarch64) || defined(loongarch64) + #define user_regs_struct user_pt_regs + #endif + +diff --git a/hotspot/agent/src/os/linux/ps_proc.c b/hotspot/agent/src/os/linux/ps_proc.c +index c4d6a9ecc5..7000e92723 100644 +--- a/hotspot/agent/src/os/linux/ps_proc.c ++++ b/hotspot/agent/src/os/linux/ps_proc.c +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include + #include + #include +@@ -141,7 +147,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use + #define PTRACE_GETREGS_REQ PT_GETREGS + #endif + +-#ifdef PTRACE_GETREGS_REQ ++#if defined(PTRACE_GETREGS_REQ) && !defined(loongarch64) + if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) { + print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid); + return false; +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java +index c963350591..20e6f35b9c 100644 +--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2018, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ * ++ */ + package sun.jvm.hotspot; + + import java.rmi.RemoteException; +@@ -37,6 +43,8 @@ import sun.jvm.hotspot.debugger.MachineDescriptionIA64; + import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86; + import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit; + import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit; ++import sun.jvm.hotspot.debugger.MachineDescriptionMIPS64; ++import sun.jvm.hotspot.debugger.MachineDescriptionLOONGARCH64; + import sun.jvm.hotspot.debugger.NoSuchSymbolException; + import sun.jvm.hotspot.debugger.bsd.BsdDebuggerLocal; + import sun.jvm.hotspot.debugger.linux.LinuxDebuggerLocal; +@@ -594,6 +602,10 @@ public class HotSpotAgent { + } else { + machDesc = new MachineDescriptionSPARC32Bit(); + } ++ } else if (cpu.equals("mips64")) { ++ machDesc = new MachineDescriptionMIPS64(); ++ } else if (cpu.equals("loongarch64")) { ++ machDesc = new MachineDescriptionLOONGARCH64(); + } else { + try { + machDesc = (MachineDescription) +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/asm/Disassembler.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/asm/Disassembler.java +index 993bf7bb47..1e075aa57e 100644 +--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/asm/Disassembler.java ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/asm/Disassembler.java +@@ -94,6 +94,12 @@ public class Disassembler { + } else if (arch.equals("amd64") || arch.equals("x86_64")) { + path.append(sep + "lib" + sep + "amd64" + sep); + libname += "-amd64.so"; ++ } else if (arch.equals("mips64") || arch.equals("mips64el")) { ++ path.append(sep + "lib" + sep + "mips64" + sep); ++ libname += "-mips64.so"; ++ } else if (arch.equals("loongarch64")) { ++ path.append(sep + "lib" + sep + "loongarch64" + sep); ++ libname += "-loongarch64.so"; + } else { + path.append(sep + "lib" + sep + arch + sep); + libname += "-" + arch + ".so"; +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java +new file mode 100644 +index 0000000000..0531427dab +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger; ++ ++public class MachineDescriptionLOONGARCH64 extends MachineDescriptionTwosComplement implements MachineDescription { ++ public long getAddressSize() { ++ return 8; ++ } ++ ++ ++ public boolean isBigEndian() { ++ return false; ++ } ++ ++ public boolean isLP64() { ++ return true; ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java +new file mode 100644 +index 0000000000..1b49efd201 +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger; ++ ++public class MachineDescriptionMIPS64 extends MachineDescriptionTwosComplement implements MachineDescription { ++ public long getAddressSize() { ++ return 8; ++ } ++ ++ ++ public boolean isBigEndian() { ++ return "big".equals(System.getProperty("sun.cpu.endian")); ++ } ++ ++ public boolean isLP64() { ++ return true; ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +index f178d6a6e7..019e794bbb 100644 +--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +@@ -32,11 +32,15 @@ import sun.jvm.hotspot.debugger.cdbg.*; + import sun.jvm.hotspot.debugger.x86.*; + import sun.jvm.hotspot.debugger.amd64.*; + import sun.jvm.hotspot.debugger.sparc.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; + import sun.jvm.hotspot.debugger.linux.x86.*; + import sun.jvm.hotspot.debugger.linux.amd64.*; + import sun.jvm.hotspot.debugger.aarch64.*; + import sun.jvm.hotspot.debugger.linux.aarch64.*; + import sun.jvm.hotspot.debugger.linux.sparc.*; ++import sun.jvm.hotspot.debugger.linux.mips64.*; ++import sun.jvm.hotspot.debugger.linux.loongarch64.*; + import sun.jvm.hotspot.utilities.*; + + class LinuxCDebugger implements CDebugger { +@@ -106,6 +110,20 @@ class LinuxCDebugger implements CDebugger { + Address pc = context.getRegisterAsAddress(AARCH64ThreadContext.PC); + if (pc == null) return null; + return new LinuxAARCH64CFrame(dbg, fp, pc); ++ } else if (cpu.equals("mips64")) { ++ MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext(); ++ Address sp = context.getRegisterAsAddress(MIPS64ThreadContext.SP); ++ if (sp == null) return null; ++ Address pc = context.getRegisterAsAddress(MIPS64ThreadContext.PC); ++ if (pc == null) return null; ++ return new LinuxMIPS64CFrame(dbg, sp, pc); ++ } else if (cpu.equals("loongarch64")) { ++ LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext(); ++ Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); ++ if (sp == null) return null; ++ Address pc = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC); ++ if (pc == null) return null; ++ return new LinuxLOONGARCH64CFrame(dbg, sp, pc); + } else { + // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu + ThreadContext context = (ThreadContext) thread.getContext(); +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java +index 44c2265d7a..3b6747ac0a 100644 +--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java +@@ -30,6 +30,8 @@ import sun.jvm.hotspot.debugger.linux.amd64.*; + import sun.jvm.hotspot.debugger.linux.ia64.*; + import sun.jvm.hotspot.debugger.linux.x86.*; + import sun.jvm.hotspot.debugger.linux.sparc.*; ++import sun.jvm.hotspot.debugger.linux.mips64.*; ++import sun.jvm.hotspot.debugger.linux.loongarch64.*; + + class LinuxThreadContextFactory { + static ThreadContext createThreadContext(LinuxDebugger dbg) { +@@ -42,6 +44,10 @@ class LinuxThreadContextFactory { + return new LinuxIA64ThreadContext(dbg); + } else if (cpu.equals("sparc")) { + return new LinuxSPARCThreadContext(dbg); ++ } else if (cpu.equals("mips64")) { ++ return new LinuxMIPS64ThreadContext(dbg); ++ } else if (cpu.equals("loongarch64")) { ++ return new LinuxLOONGARCH64ThreadContext(dbg); + } else { + try { + Class tcc = Class.forName("sun.jvm.hotspot.debugger.linux." + +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java +new file mode 100644 +index 0000000000..3b20dbbd87 +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java +@@ -0,0 +1,80 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.linux.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++import sun.jvm.hotspot.debugger.cdbg.basic.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++ ++final public class LinuxLOONGARCH64CFrame extends BasicCFrame { ++ // package/class internals only ++ public LinuxLOONGARCH64CFrame(LinuxDebugger dbg, Address fp, Address pc) { ++ super(dbg.getCDebugger()); ++ this.fp = fp; ++ this.pc = pc; ++ this.dbg = dbg; ++ } ++ ++ // override base class impl to avoid ELF parsing ++ public ClosestSymbol closestSymbolToPC() { ++ // try native lookup in debugger. ++ return dbg.lookup(dbg.getAddressValue(pc())); ++ } ++ ++ public Address pc() { ++ return pc; ++ } ++ ++ public Address localVariableBase() { ++ return fp; ++ } ++ ++ public CFrame sender(ThreadProxy thread) { ++ LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext(); ++ Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); ++ ++ if ((fp == null) || fp.lessThan(sp)) { ++ return null; ++ } ++ ++ Address nextFP = fp.getAddressAt(-2 * ADDRESS_SIZE); ++ if (nextFP == null) { ++ return null; ++ } ++ Address nextPC = fp.getAddressAt(-1 * ADDRESS_SIZE); ++ if (nextPC == null) { ++ return null; ++ } ++ return new LinuxLOONGARCH64CFrame(dbg, nextFP, nextPC); ++ } ++ ++ private static final int ADDRESS_SIZE = 8; ++ private Address pc; ++ private Address fp; ++ private LinuxDebugger dbg; ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java +new file mode 100644 +index 0000000000..9f22133eaf +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.linux.*; ++ ++public class LinuxLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext { ++ private LinuxDebugger debugger; ++ ++ public LinuxLOONGARCH64ThreadContext(LinuxDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java +new file mode 100644 +index 0000000000..2e3eb564da +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java +@@ -0,0 +1,80 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.linux.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++import sun.jvm.hotspot.debugger.cdbg.basic.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++ ++final public class LinuxMIPS64CFrame extends BasicCFrame { ++ // package/class internals only ++ public LinuxMIPS64CFrame(LinuxDebugger dbg, Address ebp, Address pc) { ++ super(dbg.getCDebugger()); ++ this.ebp = ebp; ++ this.pc = pc; ++ this.dbg = dbg; ++ } ++ ++ // override base class impl to avoid ELF parsing ++ public ClosestSymbol closestSymbolToPC() { ++ // try native lookup in debugger. ++ return dbg.lookup(dbg.getAddressValue(pc())); ++ } ++ ++ public Address pc() { ++ return pc; ++ } ++ ++ public Address localVariableBase() { ++ return ebp; ++ } ++ ++ public CFrame sender(ThreadProxy thread) { ++ MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext(); ++ Address esp = context.getRegisterAsAddress(MIPS64ThreadContext.SP); ++ ++ if ( (ebp == null) || ebp.lessThan(esp) ) { ++ return null; ++ } ++ ++ Address nextEBP = ebp.getAddressAt( 0 * ADDRESS_SIZE); ++ if (nextEBP == null) { ++ return null; ++ } ++ Address nextPC = ebp.getAddressAt( 1 * ADDRESS_SIZE); ++ if (nextPC == null) { ++ return null; ++ } ++ return new LinuxMIPS64CFrame(dbg, nextEBP, nextPC); ++ } ++ ++ private static final int ADDRESS_SIZE = 4; ++ private Address pc; ++ private Address ebp; ++ private LinuxDebugger dbg; ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java +new file mode 100644 +index 0000000000..98e0f3f0bc +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.linux.*; ++ ++public class LinuxMIPS64ThreadContext extends MIPS64ThreadContext { ++ private LinuxDebugger debugger; ++ ++ public LinuxMIPS64ThreadContext(LinuxDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java +new file mode 100644 +index 0000000000..90b0cf97e3 +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java +@@ -0,0 +1,123 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++ ++/** Specifies the thread context on loongarch64 platforms; only a sub-portion ++ of the context is guaranteed to be present on all operating ++ systems. */ ++ ++public abstract class LOONGARCH64ThreadContext implements ThreadContext { ++ ++ // NOTE: the indices for the various registers must be maintained as ++ // listed across various operating systems. However, only a small ++ // subset of the registers' values are guaranteed to be present (and ++ // must be present for the SA's stack walking to work): EAX, EBX, ++ // ECX, EDX, ESI, EDI, EBP, ESP, and EIP. ++ ++ public static final int ZERO = 0; ++ public static final int RA = 1; ++ public static final int TP = 2; ++ public static final int SP = 3; ++ public static final int A0 = 4; ++ public static final int A1 = 5; ++ public static final int A2 = 6; ++ public static final int A3 = 7; ++ public static final int A4 = 8; ++ public static final int A5 = 9; ++ public static final int A6 = 10; ++ public static final int A7 = 11; ++ public static final int T0 = 12; ++ public static final int T1 = 13; ++ public static final int T2 = 14; ++ public static final int T3 = 15; ++ public static final int T4 = 16; ++ public static final int T5 = 17; ++ public static final int T6 = 18; ++ public static final int T7 = 19; ++ public static final int T8 = 20; ++ public static final int RX = 21; ++ public static final int FP = 22; ++ public static final int S0 = 23; ++ public static final int S1 = 24; ++ public static final int S2 = 25; ++ public static final int S3 = 26; ++ public static final int S4 = 27; ++ public static final int S5 = 28; ++ public static final int S6 = 29; ++ public static final int S7 = 30; ++ public static final int S8 = 31; ++ public static final int PC = 32; ++ public static final int NPRGREG = 33; ++ ++ private static final String[] regNames = { ++ "ZERO", "RA", "TP", "SP", ++ "A0", "A1", "A2", "A3", ++ "A4", "A5", "A6", "A7", ++ "T0", "T1", "T2", "T3", ++ "T4", "T5", "T6", "T7", ++ "T8", "RX", "FP", "S0", ++ "S1", "S2", "S3", "S4", ++ "S5", "S6", "S7", "S8", ++ "PC" ++ }; ++ ++ private long[] data; ++ ++ public LOONGARCH64ThreadContext() { ++ data = new long[NPRGREG]; ++ } ++ ++ public int getNumRegisters() { ++ return NPRGREG; ++ } ++ ++ public String getRegisterName(int index) { ++ return regNames[index]; ++ } ++ ++ public void setRegister(int index, long value) { ++ data[index] = value; ++ } ++ ++ public long getRegister(int index) { ++ return data[index]; ++ } ++ ++ public CFrame getTopFrame(Debugger dbg) { ++ return null; ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public abstract void setRegisterAsAddress(int index, Address value); ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public abstract Address getRegisterAsAddress(int index); ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java +new file mode 100644 +index 0000000000..c57ee9dfc9 +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java +@@ -0,0 +1,123 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++ ++/** Specifies the thread context on mips64 platforms; only a sub-portion ++ of the context is guaranteed to be present on all operating ++ systems. */ ++ ++public abstract class MIPS64ThreadContext implements ThreadContext { ++ ++ // NOTE: the indices for the various registers must be maintained as ++ // listed across various operating systems. However, only a small ++ // subset of the registers' values are guaranteed to be present (and ++ // must be present for the SA's stack walking to work): EAX, EBX, ++ // ECX, EDX, ESI, EDI, EBP, ESP, and EIP. ++ ++ public static final int ZERO = 0; ++ public static final int AT = 1; ++ public static final int V0 = 2; ++ public static final int V1 = 3; ++ public static final int A0 = 4; ++ public static final int A1 = 5; ++ public static final int A2 = 6; ++ public static final int A3 = 7; ++ public static final int T0 = 8; ++ public static final int T1 = 9; ++ public static final int T2 = 10; ++ public static final int T3 = 11; ++ public static final int T4 = 12; ++ public static final int T5 = 13; ++ public static final int T6 = 14; ++ public static final int T7 = 15; ++ public static final int S0 = 16; ++ public static final int S1 = 17; ++ public static final int S2 = 18; ++ public static final int S3 = 19; ++ public static final int S4 = 20; ++ public static final int S5 = 21; ++ public static final int S6 = 22; ++ public static final int S7 = 23; ++ public static final int T8 = 24; ++ public static final int T9 = 25; ++ public static final int K0 = 26; ++ public static final int K1 = 27; ++ public static final int GP = 28; ++ public static final int SP = 29; ++ public static final int FP = 30; ++ public static final int RA = 31; ++ public static final int PC = 32; ++ public static final int NPRGREG = 33; ++ ++ private static final String[] regNames = { ++ "ZERO", "AT", "V0", "V1", ++ "A0", "A1", "A2", "A3", ++ "T0", "T1", "T2", "T3", ++ "T4", "T5", "T6", "T7", ++ "S0", "S1", "S2", "S3", ++ "S4", "S5", "S6", "S7", ++ "T8", "T9", "K0", "K1", ++ "GP", "SP", "FP", "RA", ++ "PC" ++ }; ++ ++ private long[] data; ++ ++ public MIPS64ThreadContext() { ++ data = new long[NPRGREG]; ++ } ++ ++ public int getNumRegisters() { ++ return NPRGREG; ++ } ++ ++ public String getRegisterName(int index) { ++ return regNames[index]; ++ } ++ ++ public void setRegister(int index, long value) { ++ data[index] = value; ++ } ++ ++ public long getRegister(int index) { ++ return data[index]; ++ } ++ ++ public CFrame getTopFrame(Debugger dbg) { ++ return null; ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public abstract void setRegisterAsAddress(int index, Address value); ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public abstract Address getRegisterAsAddress(int index); ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java +index 7113a3a497..24273888c2 100644 +--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java +@@ -63,6 +63,8 @@ public interface ELFHeader { + public static final int ARCH_i860 = 7; + /** MIPS architecture type. */ + public static final int ARCH_MIPS = 8; ++ /** LOONGARCH architecture type. */ ++ public static final int ARCH_LOONGARCH = 9; + + /** Returns a file type which is defined by the file type constants. */ + public short getFileType(); +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java +index ca1a2575ff..2afa6c55f8 100644 +--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java +@@ -34,10 +34,14 @@ import sun.jvm.hotspot.debugger.proc.amd64.*; + import sun.jvm.hotspot.debugger.proc.aarch64.*; + import sun.jvm.hotspot.debugger.proc.sparc.*; + import sun.jvm.hotspot.debugger.proc.x86.*; ++import sun.jvm.hotspot.debugger.proc.mips64.*; ++import sun.jvm.hotspot.debugger.proc.loongarch64.*; + import sun.jvm.hotspot.debugger.amd64.*; + import sun.jvm.hotspot.debugger.aarch64.*; + import sun.jvm.hotspot.debugger.sparc.*; + import sun.jvm.hotspot.debugger.x86.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; + import sun.jvm.hotspot.utilities.*; + + /**

An implementation of the JVMDebugger interface which sits on +@@ -92,6 +96,14 @@ public class ProcDebuggerLocal extends DebuggerBase implements ProcDebugger { + threadFactory = new ProcAARCH64ThreadFactory(this); + pcRegIndex = AARCH64ThreadContext.PC; + fpRegIndex = AARCH64ThreadContext.FP; ++ } else if (cpu.equals("mips64") || cpu.equals("mips64el")) { ++ threadFactory = new ProcMIPS64ThreadFactory(this); ++ pcRegIndex = MIPS64ThreadContext.PC; ++ fpRegIndex = MIPS64ThreadContext.FP; ++ } else if (cpu.equals("loongarch64")) { ++ threadFactory = new ProcLOONGARCH64ThreadFactory(this); ++ pcRegIndex = LOONGARCH64ThreadContext.PC; ++ fpRegIndex = LOONGARCH64ThreadContext.FP; + } else { + try { + Class tfc = Class.forName("sun.jvm.hotspot.debugger.proc." + +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java +new file mode 100644 +index 0000000000..42a31e3486 +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java +@@ -0,0 +1,92 @@ ++/* ++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class ProcLOONGARCH64Thread implements ThreadProxy { ++ private ProcDebugger debugger; ++ private int id; ++ ++ public ProcLOONGARCH64Thread(ProcDebugger debugger, Address addr) { ++ this.debugger = debugger; ++ ++ // FIXME: the size here should be configurable. However, making it ++ // so would produce a dependency on the "types" package from the ++ // debugger package, which is not desired. ++ this.id = (int) addr.getCIntegerAt(0, 4, true); ++ } ++ ++ public ProcLOONGARCH64Thread(ProcDebugger debugger, long id) { ++ this.debugger = debugger; ++ this.id = (int) id; ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ ProcLOONGARCH64ThreadContext context = new ProcLOONGARCH64ThreadContext(debugger); ++ long[] regs = debugger.getThreadIntegerRegisterSet(id); ++ /* ++ _NGREG in reg.h is defined to be 19. Because we have included ++ debug registers LOONGARCH64ThreadContext.NPRGREG is 25. ++ */ ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length <= LOONGARCH64ThreadContext.NPRGREG, "size of register set is greater than " + LOONGARCH64ThreadContext.NPRGREG); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++ ++ public boolean canSetContext() throws DebuggerException { ++ return false; ++ } ++ ++ public void setContext(ThreadContext context) ++ throws IllegalThreadStateException, DebuggerException { ++ throw new DebuggerException("Unimplemented"); ++ } ++ ++ public String toString() { ++ return "t@" + id; ++ } ++ ++ public boolean equals(Object obj) { ++ if ((obj == null) || !(obj instanceof ProcLOONGARCH64Thread)) { ++ return false; ++ } ++ ++ return (((ProcLOONGARCH64Thread) obj).id == id); ++ } ++ ++ public int hashCode() { ++ return id; ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java +new file mode 100644 +index 0000000000..9054f16506 +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext { ++ private ProcDebugger debugger; ++ ++ public ProcLOONGARCH64ThreadContext(ProcDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java +new file mode 100644 +index 0000000000..bc64335124 +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcLOONGARCH64ThreadFactory implements ProcThreadFactory { ++ private ProcDebugger debugger; ++ ++ public ProcLOONGARCH64ThreadFactory(ProcDebugger debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new ProcLOONGARCH64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new ProcLOONGARCH64Thread(debugger, id); ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java +new file mode 100644 +index 0000000000..5c1e0be893 +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java +@@ -0,0 +1,92 @@ ++/* ++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class ProcMIPS64Thread implements ThreadProxy { ++ private ProcDebugger debugger; ++ private int id; ++ ++ public ProcMIPS64Thread(ProcDebugger debugger, Address addr) { ++ this.debugger = debugger; ++ ++ // FIXME: the size here should be configurable. However, making it ++ // so would produce a dependency on the "types" package from the ++ // debugger package, which is not desired. ++ this.id = (int) addr.getCIntegerAt(0, 4, true); ++ } ++ ++ public ProcMIPS64Thread(ProcDebugger debugger, long id) { ++ this.debugger = debugger; ++ this.id = (int) id; ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ ProcMIPS64ThreadContext context = new ProcMIPS64ThreadContext(debugger); ++ long[] regs = debugger.getThreadIntegerRegisterSet(id); ++ /* ++ _NGREG in reg.h is defined to be 19. Because we have included ++ debug registers MIPS64ThreadContext.NPRGREG is 25. ++ */ ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length <= MIPS64ThreadContext.NPRGREG, "size of register set is greater than " + MIPS64ThreadContext.NPRGREG); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++ ++ public boolean canSetContext() throws DebuggerException { ++ return false; ++ } ++ ++ public void setContext(ThreadContext context) ++ throws IllegalThreadStateException, DebuggerException { ++ throw new DebuggerException("Unimplemented"); ++ } ++ ++ public String toString() { ++ return "t@" + id; ++ } ++ ++ public boolean equals(Object obj) { ++ if ((obj == null) || !(obj instanceof ProcMIPS64Thread)) { ++ return false; ++ } ++ ++ return (((ProcMIPS64Thread) obj).id == id); ++ } ++ ++ public int hashCode() { ++ return id; ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java +new file mode 100644 +index 0000000000..d44223d768 +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcMIPS64ThreadContext extends MIPS64ThreadContext { ++ private ProcDebugger debugger; ++ ++ public ProcMIPS64ThreadContext(ProcDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java +new file mode 100644 +index 0000000000..bad478fc5c +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcMIPS64ThreadFactory implements ProcThreadFactory { ++ private ProcDebugger debugger; ++ ++ public ProcMIPS64ThreadFactory(ProcDebugger debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new ProcMIPS64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new ProcMIPS64Thread(debugger, id); ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java +index ffa61b548e..9cf3ee2da3 100644 +--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java +@@ -33,6 +33,8 @@ import sun.jvm.hotspot.debugger.cdbg.*; + import sun.jvm.hotspot.debugger.remote.sparc.*; + import sun.jvm.hotspot.debugger.remote.x86.*; + import sun.jvm.hotspot.debugger.remote.amd64.*; ++import sun.jvm.hotspot.debugger.remote.mips64.*; ++import sun.jvm.hotspot.debugger.remote.loongarch64.*; + + /** An implementation of Debugger which wraps a + RemoteDebugger, providing remote debugging via RMI. +@@ -70,6 +72,16 @@ public class RemoteDebuggerClient extends DebuggerBase implements JVMDebugger { + cachePageSize = 4096; + cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize); + unalignedAccessesOkay = true; ++ } else if (cpu.equals("mips64") || cpu.equals("mips64el")) { ++ threadFactory = new RemoteMIPS64ThreadFactory(this); ++ cachePageSize = 4096; ++ cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize); ++ unalignedAccessesOkay = true; ++ } else if (cpu.equals("loongarch64")) { ++ threadFactory = new RemoteLOONGARCH64ThreadFactory(this); ++ cachePageSize = 4096; ++ cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize); ++ unalignedAccessesOkay = true; + } else { + try { + Class tf = Class.forName("sun.jvm.hotspot.debugger.remote." + +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java +new file mode 100644 +index 0000000000..01e3f8954b +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class RemoteLOONGARCH64Thread extends RemoteThread { ++ public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, Address addr) { ++ super(debugger, addr); ++ } ++ ++ public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, long id) { ++ super(debugger, id); ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ RemoteLOONGARCH64ThreadContext context = new RemoteLOONGARCH64ThreadContext(debugger); ++ long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : ++ debugger.getThreadIntegerRegisterSet(id); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length == LOONGARCH64ThreadContext.NPRGREG, "size of register set must match"); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java +new file mode 100644 +index 0000000000..ad25bccc8d +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteLOONGARCH64ThreadContext(RemoteDebuggerClient debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java +new file mode 100644 +index 0000000000..d8bf50ea5b +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteLOONGARCH64ThreadFactory implements RemoteThreadFactory { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteLOONGARCH64ThreadFactory(RemoteDebuggerClient debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new RemoteLOONGARCH64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new RemoteLOONGARCH64Thread(debugger, id); ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java +new file mode 100644 +index 0000000000..a9285a3b94 +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class RemoteMIPS64Thread extends RemoteThread { ++ public RemoteMIPS64Thread(RemoteDebuggerClient debugger, Address addr) { ++ super(debugger, addr); ++ } ++ ++ public RemoteMIPS64Thread(RemoteDebuggerClient debugger, long id) { ++ super(debugger, id); ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ RemoteMIPS64ThreadContext context = new RemoteMIPS64ThreadContext(debugger); ++ long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : ++ debugger.getThreadIntegerRegisterSet(id); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length == MIPS64ThreadContext.NPRGREG, "size of register set must match"); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java +new file mode 100644 +index 0000000000..4d711f9ba7 +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteMIPS64ThreadContext extends MIPS64ThreadContext { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteMIPS64ThreadContext(RemoteDebuggerClient debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ tie the implementation to, for example, the debugging system */ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java +new file mode 100644 +index 0000000000..020a2f1ff9 +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteMIPS64ThreadFactory implements RemoteThreadFactory { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteMIPS64ThreadFactory(RemoteDebuggerClient debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new RemoteMIPS64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new RemoteMIPS64Thread(debugger, id); ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java +index 842a3b357d..81efdd02f8 100644 +--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java +@@ -34,6 +34,8 @@ import sun.jvm.hotspot.runtime.win32_amd64.Win32AMD64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.win32_x86.Win32X86JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess; ++import sun.jvm.hotspot.runtime.linux_mips64.LinuxMIPS64JavaThreadPDAccess; ++import sun.jvm.hotspot.runtime.linux_loongarch64.LinuxLOONGARCH64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess; +@@ -90,6 +92,10 @@ public class Threads { + access = new LinuxSPARCJavaThreadPDAccess(); + } else if (cpu.equals("aarch64")) { + access = new LinuxAARCH64JavaThreadPDAccess(); ++ } else if (cpu.equals("mips64")) { ++ access = new LinuxMIPS64JavaThreadPDAccess(); ++ } else if (cpu.equals("loongarch64")) { ++ access = new LinuxLOONGARCH64JavaThreadPDAccess(); + } else { + try { + access = (JavaThreadPDAccess) +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java +new file mode 100644 +index 0000000000..77c45c2e99 +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java +@@ -0,0 +1,133 @@ ++/* ++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.linux_loongarch64; ++ ++import java.io.*; ++import java.util.*; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.runtime.loongarch64.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class LinuxLOONGARCH64JavaThreadPDAccess implements JavaThreadPDAccess { ++ private static AddressField lastJavaFPField; ++ private static AddressField osThreadField; ++ ++ // Field from OSThread ++ private static CIntegerField osThreadThreadIDField; ++ ++ // This is currently unneeded but is being kept in case we change ++ // the currentFrameGuess algorithm ++ private static final long GUESS_SCAN_RANGE = 128 * 1024; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaThread"); ++ osThreadField = type.getAddressField("_osthread"); ++ ++ Type anchorType = db.lookupType("JavaFrameAnchor"); ++ lastJavaFPField = anchorType.getAddressField("_last_Java_fp"); ++ ++ Type osThreadType = db.lookupType("OSThread"); ++ osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); ++ } ++ ++ public Address getLastJavaFP(Address addr) { ++ return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset())); ++ } ++ ++ public Address getLastJavaPC(Address addr) { ++ return null; ++ } ++ ++ public Address getBaseOfStackPointer(Address addr) { ++ return null; ++ } ++ ++ public Frame getLastFramePD(JavaThread thread, Address addr) { ++ Address fp = thread.getLastJavaFP(); ++ if (fp == null) { ++ return null; // no information ++ } ++ return new LOONGARCH64Frame(thread.getLastJavaSP(), fp); ++ } ++ ++ public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { ++ return new LOONGARCH64RegisterMap(thread, updateMap); ++ } ++ ++ public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext(); ++ LOONGARCH64CurrentFrameGuess guesser = new LOONGARCH64CurrentFrameGuess(context, thread); ++ if (!guesser.run(GUESS_SCAN_RANGE)) { ++ return null; ++ } ++ if (guesser.getPC() == null) { ++ return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP()); ++ } else { ++ return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); ++ } ++ } ++ ++ public void printThreadIDOn(Address addr, PrintStream tty) { ++ tty.print(getThreadProxy(addr)); ++ } ++ ++ public void printInfoOn(Address threadAddr, PrintStream tty) { ++ tty.print("Thread id: "); ++ printThreadIDOn(threadAddr, tty); ++ // tty.println("\nPostJavaState: " + getPostJavaState(threadAddr)); ++ } ++ ++ public Address getLastSP(Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext(); ++ return context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); ++ } ++ ++ public ThreadProxy getThreadProxy(Address addr) { ++ // Addr is the address of the JavaThread. ++ // Fetch the OSThread (for now and for simplicity, not making a ++ // separate "OSThread" class in this package) ++ Address osThreadAddr = osThreadField.getValue(addr); ++ // Get the address of the _thread_id from the OSThread ++ Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); ++ ++ JVMDebugger debugger = VM.getVM().getDebugger(); ++ return debugger.getThreadForIdentifierAddress(threadIdAddr); ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java +new file mode 100644 +index 0000000000..a0fd73fa67 +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java +@@ -0,0 +1,132 @@ ++/* ++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.linux_mips64; ++ ++import java.io.*; ++import java.util.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.runtime.mips64.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class LinuxMIPS64JavaThreadPDAccess implements JavaThreadPDAccess { ++ private static AddressField lastJavaFPField; ++ private static AddressField osThreadField; ++ ++ // Field from OSThread ++ private static CIntegerField osThreadThreadIDField; ++ ++ // This is currently unneeded but is being kept in case we change ++ // the currentFrameGuess algorithm ++ private static final long GUESS_SCAN_RANGE = 128 * 1024; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaThread"); ++ osThreadField = type.getAddressField("_osthread"); ++ ++ Type anchorType = db.lookupType("JavaFrameAnchor"); ++ lastJavaFPField = anchorType.getAddressField("_last_Java_fp"); ++ ++ Type osThreadType = db.lookupType("OSThread"); ++ osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); ++ } ++ ++ public Address getLastJavaFP(Address addr) { ++ return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset())); ++ } ++ ++ public Address getLastJavaPC(Address addr) { ++ return null; ++ } ++ ++ public Address getBaseOfStackPointer(Address addr) { ++ return null; ++ } ++ ++ public Frame getLastFramePD(JavaThread thread, Address addr) { ++ Address fp = thread.getLastJavaFP(); ++ if (fp == null) { ++ return null; // no information ++ } ++ return new MIPS64Frame(thread.getLastJavaSP(), fp); ++ } ++ ++ public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { ++ return new MIPS64RegisterMap(thread, updateMap); ++ } ++ ++ public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext(); ++ MIPS64CurrentFrameGuess guesser = new MIPS64CurrentFrameGuess(context, thread); ++ if (!guesser.run(GUESS_SCAN_RANGE)) { ++ return null; ++ } ++ if (guesser.getPC() == null) { ++ return new MIPS64Frame(guesser.getSP(), guesser.getFP()); ++ } else { ++ return new MIPS64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); ++ } ++ } ++ ++ public void printThreadIDOn(Address addr, PrintStream tty) { ++ tty.print(getThreadProxy(addr)); ++ } ++ ++ public void printInfoOn(Address threadAddr, PrintStream tty) { ++ tty.print("Thread id: "); ++ printThreadIDOn(threadAddr, tty); ++// tty.println("\nPostJavaState: " + getPostJavaState(threadAddr)); ++ } ++ ++ public Address getLastSP(Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext(); ++ return context.getRegisterAsAddress(MIPS64ThreadContext.SP); ++ } ++ ++ public ThreadProxy getThreadProxy(Address addr) { ++ // Addr is the address of the JavaThread. ++ // Fetch the OSThread (for now and for simplicity, not making a ++ // separate "OSThread" class in this package) ++ Address osThreadAddr = osThreadField.getValue(addr); ++ // Get the address of the _thread_id from the OSThread ++ Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); ++ ++ JVMDebugger debugger = VM.getVM().getDebugger(); ++ return debugger.getThreadForIdentifierAddress(threadIdAddr); ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java +new file mode 100644 +index 0000000000..0208e6e224 +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java +@@ -0,0 +1,217 @@ ++/* ++ * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.loongarch64.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.interpreter.*; ++import sun.jvm.hotspot.runtime.*; ++ ++/**

Should be able to be used on all loongarch64 platforms we support ++ (Win32, Solaris/loongarch64, and soon Linux) to implement JavaThread's ++ "currentFrameGuess()" functionality. Input is an LOONGARCH64ThreadContext; ++ output is SP, FP, and PC for an LOONGARCH64Frame. Instantiation of the ++ LOONGARCH64Frame is left to the caller, since we may need to subclass ++ LOONGARCH64Frame to support signal handler frames on Unix platforms.

++ ++

Algorithm is to walk up the stack within a given range (say, ++ 512K at most) looking for a plausible PC and SP for a Java frame, ++ also considering those coming in from the context. If we find a PC ++ that belongs to the VM (i.e., in generated code like the ++ interpreter or CodeCache) then we try to find an associated EBP. ++ We repeat this until we either find a complete frame or run out of ++ stack to look at.

*/ ++ ++public class LOONGARCH64CurrentFrameGuess { ++ private LOONGARCH64ThreadContext context; ++ private JavaThread thread; ++ private Address spFound; ++ private Address fpFound; ++ private Address pcFound; ++ ++ private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG") ++ != null; ++ ++ public LOONGARCH64CurrentFrameGuess(LOONGARCH64ThreadContext context, ++ JavaThread thread) { ++ this.context = context; ++ this.thread = thread; ++ } ++ ++ /** Returns false if not able to find a frame within a reasonable range. */ ++ public boolean run(long regionInBytesToSearch) { ++ Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); ++ Address pc = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC); ++ Address fp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.FP); ++ if (sp == null) { ++ // Bail out if no last java frame eithe ++ if (thread.getLastJavaSP() != null) { ++ setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); ++ return true; ++ } ++ // Bail out ++ return false; ++ } ++ Address end = sp.addOffsetTo(regionInBytesToSearch); ++ VM vm = VM.getVM(); ++ ++ setValues(null, null, null); // Assume we're not going to find anything ++ ++ if (vm.isJavaPCDbg(pc)) { ++ if (vm.isClientCompiler()) { ++ // If the topmost frame is a Java frame, we are (pretty much) ++ // guaranteed to have a viable EBP. We should be more robust ++ // than this (we have the potential for losing entire threads' ++ // stack traces) but need to see how much work we really have ++ // to do here. Searching the stack for an (SP, FP) pair is ++ // hard since it's easy to misinterpret inter-frame stack ++ // pointers as base-of-frame pointers; we also don't know the ++ // sizes of C1 frames (not registered in the nmethod) so can't ++ // derive them from ESP. ++ ++ setValues(sp, fp, pc); ++ return true; ++ } else { ++ if (vm.getInterpreter().contains(pc)) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + ++ sp + ", fp = " + fp + ", pc = " + pc); ++ } ++ setValues(sp, fp, pc); ++ return true; ++ } ++ ++ // For the server compiler, EBP is not guaranteed to be valid ++ // for compiled code. In addition, an earlier attempt at a ++ // non-searching algorithm (see below) failed because the ++ // stack pointer from the thread context was pointing ++ // (considerably) beyond the ostensible end of the stack, into ++ // garbage; walking from the topmost frame back caused a crash. ++ // ++ // This algorithm takes the current PC as a given and tries to ++ // find the correct corresponding SP by walking up the stack ++ // and repeatedly performing stackwalks (very inefficient). ++ // ++ // FIXME: there is something wrong with stackwalking across ++ // adapter frames...this is likely to be the root cause of the ++ // failure with the simpler algorithm below. ++ ++ for (long offset = 0; ++ offset < regionInBytesToSearch; ++ offset += vm.getAddressSize()) { ++ try { ++ Address curSP = sp.addOffsetTo(offset); ++ Frame frame = new LOONGARCH64Frame(curSP, null, pc); ++ RegisterMap map = thread.newRegisterMap(false); ++ while (frame != null) { ++ if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { ++ // We were able to traverse all the way to the ++ // bottommost Java frame. ++ // This sp looks good. Keep it. ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); ++ } ++ setValues(curSP, null, pc); ++ return true; ++ } ++ frame = frame.sender(map); ++ } ++ } catch (Exception e) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); ++ } ++ // Bad SP. Try another. ++ } ++ } ++ ++ // Were not able to find a plausible SP to go with this PC. ++ // Bail out. ++ return false; ++ ++ /* ++ // Original algorithm which does not work because SP was ++ // pointing beyond where it should have: ++ ++ // For the server compiler, EBP is not guaranteed to be valid ++ // for compiled code. We see whether the PC is in the ++ // interpreter and take care of that, otherwise we run code ++ // (unfortunately) duplicated from LOONGARCH64Frame.senderForCompiledFrame. ++ ++ CodeCache cc = vm.getCodeCache(); ++ if (cc.contains(pc)) { ++ CodeBlob cb = cc.findBlob(pc); ++ ++ // See if we can derive a frame pointer from SP and PC ++ // NOTE: This is the code duplicated from LOONGARCH64Frame ++ Address saved_fp = null; ++ int llink_offset = cb.getLinkOffset(); ++ if (llink_offset >= 0) { ++ // Restore base-pointer, since next frame might be an interpreter frame. ++ Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset); ++ saved_fp = fp_addr.getAddressAt(0); ++ } ++ ++ setValues(sp, saved_fp, pc); ++ return true; ++ } ++ */ ++ } ++ } else { ++ // If the current program counter was not known to us as a Java ++ // PC, we currently assume that we are in the run-time system ++ // and attempt to look to thread-local storage for saved ESP and ++ // EBP. Note that if these are null (because we were, in fact, ++ // in Java code, i.e., vtable stubs or similar, and the SA ++ // didn't have enough insight into the target VM to understand ++ // that) then we are going to lose the entire stack trace for ++ // the thread, which is sub-optimal. FIXME. ++ ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + ++ thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); ++ } ++ if (thread.getLastJavaSP() == null) { ++ return false; // No known Java frames on stack ++ } ++ setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); ++ return true; ++ } ++ } ++ ++ public Address getSP() { return spFound; } ++ public Address getFP() { return fpFound; } ++ /** May be null if getting values from thread-local storage; take ++ care to call the correct LOONGARCH64Frame constructor to recover this if ++ necessary */ ++ public Address getPC() { return pcFound; } ++ ++ private void setValues(Address sp, Address fp, Address pc) { ++ spFound = sp; ++ fpFound = fp; ++ pcFound = pc; ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java +new file mode 100644 +index 0000000000..fdf0c79c1a +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java +@@ -0,0 +1,534 @@ ++/* ++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.loongarch64; ++ ++import java.util.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.compiler.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.oops.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++ ++/** Specialization of and implementation of abstract methods of the ++ Frame class for the loongarch64 family of CPUs. */ ++ ++public class LOONGARCH64Frame extends Frame { ++ private static final boolean DEBUG; ++ static { ++ DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG") != null; ++ } ++ ++ // Java frames ++ private static final int JAVA_FRAME_LINK_OFFSET = 0; ++ private static final int JAVA_FRAME_RETURN_ADDR_OFFSET = 1; ++ private static final int JAVA_FRAME_SENDER_SP_OFFSET = 2; ++ ++ // Native frames ++ private static final int NATIVE_FRAME_LINK_OFFSET = -2; ++ private static final int NATIVE_FRAME_RETURN_ADDR_OFFSET = -1; ++ private static final int NATIVE_FRAME_SENDER_SP_OFFSET = 0; ++ ++ // Interpreter frames ++ private static final int INTERPRETER_FRAME_MIRROR_OFFSET = 2; // for native calls only ++ private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1; ++ private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_MIRROR_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ private static final int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ ++ // Entry frames ++ private static final int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -9; ++ ++ // Native frames ++ private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET = 2; ++ ++ private static VMReg fp = new VMReg(22 << 1); ++ ++ // an additional field beyond sp and pc: ++ Address raw_fp; // frame pointer ++ private Address raw_unextendedSP; ++ ++ private LOONGARCH64Frame() { ++ } ++ ++ private void adjustForDeopt() { ++ if ( pc != null) { ++ // Look for a deopt pc and if it is deopted convert to original pc ++ CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); ++ if (cb != null && cb.isJavaMethod()) { ++ NMethod nm = (NMethod) cb; ++ if (pc.equals(nm.deoptHandlerBegin())) { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); ++ } ++ // adjust pc if frame is deoptimized. ++ pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); ++ deoptimized = true; ++ } ++ } ++ } ++ } ++ ++ public LOONGARCH64Frame(Address raw_sp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("LOONGARCH64Frame(sp, fp, pc): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public LOONGARCH64Frame(Address raw_sp, Address raw_fp) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ this.pc = raw_fp.getAddressAt(1 * VM.getVM().getAddressSize()); ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("LOONGARCH64Frame(sp, fp): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public LOONGARCH64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_unextendedSp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("LOONGARCH64Frame(sp, unextendedSP, fp, pc): " + this); ++ dumpStack(); ++ } ++ ++ } ++ ++ public Object clone() { ++ LOONGARCH64Frame frame = new LOONGARCH64Frame(); ++ frame.raw_sp = raw_sp; ++ frame.raw_unextendedSP = raw_unextendedSP; ++ frame.raw_fp = raw_fp; ++ frame.pc = pc; ++ frame.deoptimized = deoptimized; ++ return frame; ++ } ++ ++ public boolean equals(Object arg) { ++ if (arg == null) { ++ return false; ++ } ++ ++ if (!(arg instanceof LOONGARCH64Frame)) { ++ return false; ++ } ++ ++ LOONGARCH64Frame other = (LOONGARCH64Frame) arg; ++ ++ return (AddressOps.equal(getSP(), other.getSP()) && ++ AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && ++ AddressOps.equal(getFP(), other.getFP()) && ++ AddressOps.equal(getPC(), other.getPC())); ++ } ++ ++ public int hashCode() { ++ if (raw_sp == null) { ++ return 0; ++ } ++ ++ return raw_sp.hashCode(); ++ } ++ ++ public String toString() { ++ return "sp: " + (getSP() == null? "null" : getSP().toString()) + ++ ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + ++ ", fp: " + (getFP() == null? "null" : getFP().toString()) + ++ ", pc: " + (pc == null? "null" : pc.toString()); ++ } ++ ++ // accessors for the instance variables ++ public Address getFP() { return raw_fp; } ++ public Address getSP() { return raw_sp; } ++ public Address getID() { return raw_sp; } ++ ++ // FIXME: not implemented yet (should be done for Solaris/LOONGARCH) ++ public boolean isSignalHandlerFrameDbg() { return false; } ++ public int getSignalNumberDbg() { return 0; } ++ public String getSignalNameDbg() { return null; } ++ ++ public boolean isInterpretedFrameValid() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "Not an interpreted frame"); ++ } ++ ++ // These are reasonable sanity checks ++ if (getFP() == null || getFP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getSP() == null || getSP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { ++ return false; ++ } ++ ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (getFP().lessThanOrEqual(getSP())) { ++ // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { ++ // stack frames shouldn't be large. ++ return false; ++ } ++ ++ return true; ++ } ++ ++ // FIXME: not applicable in current system ++ // void patch_pc(Thread* thread, address pc); ++ ++ public Frame sender(RegisterMap regMap, CodeBlob cb) { ++ LOONGARCH64RegisterMap map = (LOONGARCH64RegisterMap) regMap; ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ map.setIncludeArgumentOops(false); ++ ++ if (isEntryFrame()) return senderForEntryFrame(map); ++ if (isInterpretedFrame()) return senderForInterpreterFrame(map); ++ ++ if(cb == null) { ++ cb = VM.getVM().getCodeCache().findBlob(getPC()); ++ } else { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); ++ } ++ } ++ ++ if (cb != null) { ++ return senderForCompiledFrame(map, cb); ++ } ++ ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return new LOONGARCH64Frame(getSenderSP(), getLink(), getSenderPC()); ++ } ++ ++ private Frame senderForEntryFrame(LOONGARCH64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForEntryFrame"); ++ } ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ LOONGARCH64JavaCallWrapper jcw = (LOONGARCH64JavaCallWrapper) getEntryFrameCallWrapper(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); ++ Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); ++ } ++ LOONGARCH64Frame fr; ++ if (jcw.getLastJavaPC() != null) { ++ fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); ++ } else { ++ fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); ++ } ++ map.clear(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); ++ } ++ return fr; ++ } ++ ++ //------------------------------------------------------------------------------ ++ // frame::adjust_unextended_sp ++ private void adjustUnextendedSP() { ++ // On loongarch, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. ++ ++ CodeBlob cb = cb(); ++ NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); ++ if (senderNm != null) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (senderNm.isDeoptEntry(getPC()) || ++ senderNm.isDeoptMhEntry(getPC())) { ++ // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp)); ++ } ++ } ++ } ++ ++ private Frame senderForInterpreterFrame(LOONGARCH64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForInterpreterFrame"); ++ } ++ Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ Address sp = getSenderSP(); ++ // We do not need to update the callee-save register mapping because above ++ // us is either another interpreter frame or a converter-frame, but never ++ // directly a compiled frame. ++ // 11/24/04 SFG. With the removal of adapter frames this is no longer true. ++ // However c2 no longer uses callee save register for java calls so there ++ // are no callee register to find. ++ ++ if (map.getUpdateMap()) ++ updateMapWithSavedLink(map, addressOfStackSlot(JAVA_FRAME_LINK_OFFSET)); ++ ++ return new LOONGARCH64Frame(sp, unextendedSP, getLink(), getSenderPC()); ++ } ++ ++ private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { ++ map.setLocation(fp, savedFPAddr); ++ } ++ ++ private Frame senderForCompiledFrame(LOONGARCH64RegisterMap map, CodeBlob cb) { ++ if (DEBUG) { ++ System.out.println("senderForCompiledFrame"); ++ } ++ ++ // ++ // NOTE: some of this code is (unfortunately) duplicated in LOONGARCH64CurrentFrameGuess ++ // ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // frame owned by optimizing compiler ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); ++ } ++ Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); ++ ++ // On Intel the return_address is always the word on the stack ++ Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); ++ ++ // This is the saved value of EBP which may or may not really be an FP. ++ // It is only an FP if the sender is an interpreter frame (or C1?). ++ Address savedFPAddr = senderSP.addOffsetTo(- JAVA_FRAME_SENDER_SP_OFFSET * VM.getVM().getAddressSize()); ++ ++ if (map.getUpdateMap()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map.setIncludeArgumentOops(cb.callerMustGCArguments()); ++ ++ if (cb.getOopMaps() != null) { ++ OopMapSet.updateRegisterMap(this, cb, map, true); ++ } ++ ++ // Since the prolog does the save and restore of EBP there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ updateMapWithSavedLink(map, savedFPAddr); ++ } ++ ++ return new LOONGARCH64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); ++ } ++ ++ protected boolean hasSenderPD() { ++ // FIXME ++ // Check for null ebp? Need to do some tests. ++ return true; ++ } ++ ++ public long frameSize() { ++ return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); ++ } ++ ++ public Address getLink() { ++ if (isJavaFrame()) ++ return addressOfStackSlot(JAVA_FRAME_LINK_OFFSET).getAddressAt(0); ++ return addressOfStackSlot(NATIVE_FRAME_LINK_OFFSET).getAddressAt(0); ++ } ++ ++ public Address getUnextendedSP() { return raw_unextendedSP; } ++ ++ // Return address: ++ public Address getSenderPCAddr() { ++ if (isJavaFrame()) ++ return addressOfStackSlot(JAVA_FRAME_RETURN_ADDR_OFFSET); ++ return addressOfStackSlot(NATIVE_FRAME_RETURN_ADDR_OFFSET); ++ } ++ ++ public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } ++ ++ public Address getSenderSP() { ++ if (isJavaFrame()) ++ return addressOfStackSlot(JAVA_FRAME_SENDER_SP_OFFSET); ++ return addressOfStackSlot(NATIVE_FRAME_SENDER_SP_OFFSET); ++ } ++ ++ // return address of param, zero origin index. ++ public Address getNativeParamAddr(int idx) { ++ return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx); ++ } ++ ++ public Address addressOfInterpreterFrameLocals() { ++ return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); ++ } ++ ++ private Address addressOfInterpreterFrameBCX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); ++ } ++ ++ public int getInterpreterFrameBCI() { ++ // FIXME: this is not atomic with respect to GC and is unsuitable ++ // for use in a non-debugging, or reflective, system. Need to ++ // figure out how to express this. ++ Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); ++ Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); ++ Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); ++ return bcpToBci(bcp, method); ++ } ++ ++ public Address addressOfInterpreterFrameMDX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); ++ } ++ ++ // FIXME ++ //inline int frame::interpreter_frame_monitor_size() { ++ // return BasicObjectLock::size(); ++ //} ++ ++ // expression stack ++ // (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++ public Address addressOfInterpreterFrameExpressionStack() { ++ Address monitorEnd = interpreterFrameMonitorEnd().address(); ++ return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); ++ } ++ ++ public int getInterpreterFrameExpressionStackDirection() { return -1; } ++ ++ // top of expression stack ++ public Address addressOfInterpreterFrameTOS() { ++ return getSP(); ++ } ++ ++ /** Expression stack from top down */ ++ public Address addressOfInterpreterFrameTOSAt(int slot) { ++ return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); ++ } ++ ++ public Address getInterpreterFrameSenderSP() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "interpreted frame expected"); ++ } ++ return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ } ++ ++ // Monitors ++ public BasicObjectLock interpreterFrameMonitorBegin() { ++ return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); ++ } ++ ++ public BasicObjectLock interpreterFrameMonitorEnd() { ++ Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); ++ if (Assert.ASSERTS_ENABLED) { ++ // make sure the pointer points inside the frame ++ Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); ++ Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); ++ } ++ return new BasicObjectLock(result); ++ } ++ ++ public int interpreterFrameMonitorSize() { ++ return BasicObjectLock.size(); ++ } ++ ++ // Method ++ public Address addressOfInterpreterFrameMethod() { ++ return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); ++ } ++ ++ // Constant pool cache ++ public Address addressOfInterpreterFrameCPCache() { ++ return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); ++ } ++ ++ // Entry frames ++ public JavaCallWrapper getEntryFrameCallWrapper() { ++ return new LOONGARCH64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); ++ } ++ ++ protected Address addressOfSavedOopResult() { ++ // offset is 2 for compiler2 and 3 for compiler1 ++ return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * ++ VM.getVM().getAddressSize()); ++ } ++ ++ protected Address addressOfSavedReceiver() { ++ return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); ++ } ++ ++ private void dumpStack() { ++ if (getFP() != null) { ++ for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); ++ AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize())); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ } else { ++ for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); ++ AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ } ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java +new file mode 100644 +index 0000000000..f7dbbcaacd +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java +@@ -0,0 +1,57 @@ ++/* ++ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.loongarch64; ++ ++import java.util.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class LOONGARCH64JavaCallWrapper extends JavaCallWrapper { ++ private static AddressField lastJavaFPField; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaFrameAnchor"); ++ ++ lastJavaFPField = type.getAddressField("_last_Java_fp"); ++ } ++ ++ public LOONGARCH64JavaCallWrapper(Address addr) { ++ super(addr); ++ } ++ ++ public Address getLastJavaFP() { ++ return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java +new file mode 100644 +index 0000000000..021ef523e3 +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java +@@ -0,0 +1,52 @@ ++/* ++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.loongarch64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class LOONGARCH64RegisterMap extends RegisterMap { ++ ++ /** This is the only public constructor */ ++ public LOONGARCH64RegisterMap(JavaThread thread, boolean updateMap) { ++ super(thread, updateMap); ++ } ++ ++ protected LOONGARCH64RegisterMap(RegisterMap map) { ++ super(map); ++ } ++ ++ public Object clone() { ++ LOONGARCH64RegisterMap retval = new LOONGARCH64RegisterMap(this); ++ return retval; ++ } ++ ++ // no PD state to clear or copy: ++ protected void clearPD() {} ++ protected void initializePD() {} ++ protected void initializeFromPD(RegisterMap map) {} ++ protected Address getLocationPD(VMReg reg) { return null; } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java +new file mode 100644 +index 0000000000..21259a4d32 +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java +@@ -0,0 +1,217 @@ ++/* ++ * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.mips64.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.interpreter.*; ++import sun.jvm.hotspot.runtime.*; ++ ++/**

Should be able to be used on all mips64 platforms we support ++ (Win32, Solaris/mips64, and soon Linux) to implement JavaThread's ++ "currentFrameGuess()" functionality. Input is an MIPS64ThreadContext; ++ output is SP, FP, and PC for an MIPS64Frame. Instantiation of the ++ MIPS64Frame is left to the caller, since we may need to subclass ++ MIPS64Frame to support signal handler frames on Unix platforms.

++ ++

Algorithm is to walk up the stack within a given range (say, ++ 512K at most) looking for a plausible PC and SP for a Java frame, ++ also considering those coming in from the context. If we find a PC ++ that belongs to the VM (i.e., in generated code like the ++ interpreter or CodeCache) then we try to find an associated EBP. ++ We repeat this until we either find a complete frame or run out of ++ stack to look at.

*/ ++ ++public class MIPS64CurrentFrameGuess { ++ private MIPS64ThreadContext context; ++ private JavaThread thread; ++ private Address spFound; ++ private Address fpFound; ++ private Address pcFound; ++ ++ private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG") ++ != null; ++ ++ public MIPS64CurrentFrameGuess(MIPS64ThreadContext context, ++ JavaThread thread) { ++ this.context = context; ++ this.thread = thread; ++ } ++ ++ /** Returns false if not able to find a frame within a reasonable range. */ ++ public boolean run(long regionInBytesToSearch) { ++ Address sp = context.getRegisterAsAddress(MIPS64ThreadContext.SP); ++ Address pc = context.getRegisterAsAddress(MIPS64ThreadContext.PC); ++ Address fp = context.getRegisterAsAddress(MIPS64ThreadContext.FP); ++ if (sp == null) { ++ // Bail out if no last java frame eithe ++ if (thread.getLastJavaSP() != null) { ++ setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); ++ return true; ++ } ++ // Bail out ++ return false; ++ } ++ Address end = sp.addOffsetTo(regionInBytesToSearch); ++ VM vm = VM.getVM(); ++ ++ setValues(null, null, null); // Assume we're not going to find anything ++ ++ if (vm.isJavaPCDbg(pc)) { ++ if (vm.isClientCompiler()) { ++ // If the topmost frame is a Java frame, we are (pretty much) ++ // guaranteed to have a viable EBP. We should be more robust ++ // than this (we have the potential for losing entire threads' ++ // stack traces) but need to see how much work we really have ++ // to do here. Searching the stack for an (SP, FP) pair is ++ // hard since it's easy to misinterpret inter-frame stack ++ // pointers as base-of-frame pointers; we also don't know the ++ // sizes of C1 frames (not registered in the nmethod) so can't ++ // derive them from ESP. ++ ++ setValues(sp, fp, pc); ++ return true; ++ } else { ++ if (vm.getInterpreter().contains(pc)) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + ++ sp + ", fp = " + fp + ", pc = " + pc); ++ } ++ setValues(sp, fp, pc); ++ return true; ++ } ++ ++ // For the server compiler, EBP is not guaranteed to be valid ++ // for compiled code. In addition, an earlier attempt at a ++ // non-searching algorithm (see below) failed because the ++ // stack pointer from the thread context was pointing ++ // (considerably) beyond the ostensible end of the stack, into ++ // garbage; walking from the topmost frame back caused a crash. ++ // ++ // This algorithm takes the current PC as a given and tries to ++ // find the correct corresponding SP by walking up the stack ++ // and repeatedly performing stackwalks (very inefficient). ++ // ++ // FIXME: there is something wrong with stackwalking across ++ // adapter frames...this is likely to be the root cause of the ++ // failure with the simpler algorithm below. ++ ++ for (long offset = 0; ++ offset < regionInBytesToSearch; ++ offset += vm.getAddressSize()) { ++ try { ++ Address curSP = sp.addOffsetTo(offset); ++ Frame frame = new MIPS64Frame(curSP, null, pc); ++ RegisterMap map = thread.newRegisterMap(false); ++ while (frame != null) { ++ if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { ++ // We were able to traverse all the way to the ++ // bottommost Java frame. ++ // This sp looks good. Keep it. ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); ++ } ++ setValues(curSP, null, pc); ++ return true; ++ } ++ frame = frame.sender(map); ++ } ++ } catch (Exception e) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); ++ } ++ // Bad SP. Try another. ++ } ++ } ++ ++ // Were not able to find a plausible SP to go with this PC. ++ // Bail out. ++ return false; ++ ++ /* ++ // Original algorithm which does not work because SP was ++ // pointing beyond where it should have: ++ ++ // For the server compiler, EBP is not guaranteed to be valid ++ // for compiled code. We see whether the PC is in the ++ // interpreter and take care of that, otherwise we run code ++ // (unfortunately) duplicated from MIPS64Frame.senderForCompiledFrame. ++ ++ CodeCache cc = vm.getCodeCache(); ++ if (cc.contains(pc)) { ++ CodeBlob cb = cc.findBlob(pc); ++ ++ // See if we can derive a frame pointer from SP and PC ++ // NOTE: This is the code duplicated from MIPS64Frame ++ Address saved_fp = null; ++ int llink_offset = cb.getLinkOffset(); ++ if (llink_offset >= 0) { ++ // Restore base-pointer, since next frame might be an interpreter frame. ++ Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset); ++ saved_fp = fp_addr.getAddressAt(0); ++ } ++ ++ setValues(sp, saved_fp, pc); ++ return true; ++ } ++ */ ++ } ++ } else { ++ // If the current program counter was not known to us as a Java ++ // PC, we currently assume that we are in the run-time system ++ // and attempt to look to thread-local storage for saved ESP and ++ // EBP. Note that if these are null (because we were, in fact, ++ // in Java code, i.e., vtable stubs or similar, and the SA ++ // didn't have enough insight into the target VM to understand ++ // that) then we are going to lose the entire stack trace for ++ // the thread, which is sub-optimal. FIXME. ++ ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + ++ thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); ++ } ++ if (thread.getLastJavaSP() == null) { ++ return false; // No known Java frames on stack ++ } ++ setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); ++ return true; ++ } ++ } ++ ++ public Address getSP() { return spFound; } ++ public Address getFP() { return fpFound; } ++ /** May be null if getting values from thread-local storage; take ++ care to call the correct MIPS64Frame constructor to recover this if ++ necessary */ ++ public Address getPC() { return pcFound; } ++ ++ private void setValues(Address sp, Address fp, Address pc) { ++ spFound = sp; ++ fpFound = fp; ++ pcFound = pc; ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java +new file mode 100644 +index 0000000000..0cc5cf4e7c +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java +@@ -0,0 +1,547 @@ ++/* ++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.mips64; ++ ++import java.util.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.compiler.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.oops.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++ ++/** Specialization of and implementation of abstract methods of the ++ Frame class for the mips64 family of CPUs. */ ++ ++public class MIPS64Frame extends Frame { ++ private static final boolean DEBUG; ++ static { ++ DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG") != null; ++ } ++ ++ // All frames ++ private static final int LINK_OFFSET = 0; ++ private static final int RETURN_ADDR_OFFSET = 1; ++ private static final int SENDER_SP_OFFSET = 2; ++ ++ // Interpreter frames ++ private static final int INTERPRETER_FRAME_MIRROR_OFFSET = 2; // for native calls only ++ private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1; ++ private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; ++ private static int INTERPRETER_FRAME_MDX_OFFSET; // Non-core builds only ++ private static int INTERPRETER_FRAME_CACHE_OFFSET; ++ private static int INTERPRETER_FRAME_LOCALS_OFFSET; ++ private static int INTERPRETER_FRAME_BCX_OFFSET; ++ private static int INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ private static int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET; ++ private static int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET; ++ ++ // Entry frames ++ private static int ENTRY_FRAME_CALL_WRAPPER_OFFSET; ++ ++ // Native frames ++ private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET = 2; ++ ++ private static VMReg rbp; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1; ++ INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; ++ INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; ++ INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; ++ INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; ++ INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ ++ ENTRY_FRAME_CALL_WRAPPER_OFFSET = db.lookupIntConstant("frame::entry_frame_call_wrapper_offset"); ++ if (VM.getVM().getAddressSize() == 4) { ++ rbp = new VMReg(5); ++ } else { ++ rbp = new VMReg(5 << 1); ++ } ++ } ++ ++ ++ // an additional field beyond sp and pc: ++ Address raw_fp; // frame pointer ++ private Address raw_unextendedSP; ++ ++ private MIPS64Frame() { ++ } ++ ++ private void adjustForDeopt() { ++ if ( pc != null) { ++ // Look for a deopt pc and if it is deopted convert to original pc ++ CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); ++ if (cb != null && cb.isJavaMethod()) { ++ NMethod nm = (NMethod) cb; ++ if (pc.equals(nm.deoptHandlerBegin())) { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); ++ } ++ // adjust pc if frame is deoptimized. ++ pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); ++ deoptimized = true; ++ } ++ } ++ } ++ } ++ ++ public MIPS64Frame(Address raw_sp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("MIPS64Frame(sp, fp, pc): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public MIPS64Frame(Address raw_sp, Address raw_fp) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ this.pc = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize()); ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("MIPS64Frame(sp, fp): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public MIPS64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_unextendedSp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("MIPS64Frame(sp, unextendedSP, fp, pc): " + this); ++ dumpStack(); ++ } ++ ++ } ++ ++ public Object clone() { ++ MIPS64Frame frame = new MIPS64Frame(); ++ frame.raw_sp = raw_sp; ++ frame.raw_unextendedSP = raw_unextendedSP; ++ frame.raw_fp = raw_fp; ++ frame.pc = pc; ++ frame.deoptimized = deoptimized; ++ return frame; ++ } ++ ++ public boolean equals(Object arg) { ++ if (arg == null) { ++ return false; ++ } ++ ++ if (!(arg instanceof MIPS64Frame)) { ++ return false; ++ } ++ ++ MIPS64Frame other = (MIPS64Frame) arg; ++ ++ return (AddressOps.equal(getSP(), other.getSP()) && ++ AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && ++ AddressOps.equal(getFP(), other.getFP()) && ++ AddressOps.equal(getPC(), other.getPC())); ++ } ++ ++ public int hashCode() { ++ if (raw_sp == null) { ++ return 0; ++ } ++ ++ return raw_sp.hashCode(); ++ } ++ ++ public String toString() { ++ return "sp: " + (getSP() == null? "null" : getSP().toString()) + ++ ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + ++ ", fp: " + (getFP() == null? "null" : getFP().toString()) + ++ ", pc: " + (pc == null? "null" : pc.toString()); ++ } ++ ++ // accessors for the instance variables ++ public Address getFP() { return raw_fp; } ++ public Address getSP() { return raw_sp; } ++ public Address getID() { return raw_sp; } ++ ++ // FIXME: not implemented yet (should be done for Solaris/MIPS64) ++ public boolean isSignalHandlerFrameDbg() { return false; } ++ public int getSignalNumberDbg() { return 0; } ++ public String getSignalNameDbg() { return null; } ++ ++ public boolean isInterpretedFrameValid() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "Not an interpreted frame"); ++ } ++ ++ // These are reasonable sanity checks ++ if (getFP() == null || getFP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getSP() == null || getSP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { ++ return false; ++ } ++ ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (getFP().lessThanOrEqual(getSP())) { ++ // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { ++ // stack frames shouldn't be large. ++ return false; ++ } ++ ++ return true; ++ } ++ ++ // FIXME: not applicable in current system ++ // void patch_pc(Thread* thread, address pc); ++ ++ public Frame sender(RegisterMap regMap, CodeBlob cb) { ++ MIPS64RegisterMap map = (MIPS64RegisterMap) regMap; ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ map.setIncludeArgumentOops(false); ++ ++ if (isEntryFrame()) return senderForEntryFrame(map); ++ if (isInterpretedFrame()) return senderForInterpreterFrame(map); ++ ++ if(cb == null) { ++ cb = VM.getVM().getCodeCache().findBlob(getPC()); ++ } else { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); ++ } ++ } ++ ++ if (cb != null) { ++ return senderForCompiledFrame(map, cb); ++ } ++ ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return new MIPS64Frame(getSenderSP(), getLink(), getSenderPC()); ++ } ++ ++ private Frame senderForEntryFrame(MIPS64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForEntryFrame"); ++ } ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ MIPS64JavaCallWrapper jcw = (MIPS64JavaCallWrapper) getEntryFrameCallWrapper(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); ++ Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); ++ } ++ MIPS64Frame fr; ++ if (jcw.getLastJavaPC() != null) { ++ fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); ++ } else { ++ fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); ++ } ++ map.clear(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); ++ } ++ return fr; ++ } ++ ++ //------------------------------------------------------------------------------ ++ // frame::adjust_unextended_sp ++ private void adjustUnextendedSP() { ++ // On mips64, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. ++ ++ CodeBlob cb = cb(); ++ NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); ++ if (senderNm != null) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (senderNm.isDeoptEntry(getPC()) || ++ senderNm.isDeoptMhEntry(getPC())) { ++ // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp)); ++ } ++ } ++ } ++ ++ private Frame senderForInterpreterFrame(MIPS64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForInterpreterFrame"); ++ } ++ Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ Address sp = addressOfStackSlot(SENDER_SP_OFFSET); ++ // We do not need to update the callee-save register mapping because above ++ // us is either another interpreter frame or a converter-frame, but never ++ // directly a compiled frame. ++ // 11/24/04 SFG. With the removal of adapter frames this is no longer true. ++ // However c2 no longer uses callee save register for java calls so there ++ // are no callee register to find. ++ ++ if (map.getUpdateMap()) ++ updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET)); ++ ++ return new MIPS64Frame(sp, unextendedSP, getLink(), getSenderPC()); ++ } ++ ++ private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { ++ map.setLocation(rbp, savedFPAddr); ++ } ++ ++ private Frame senderForCompiledFrame(MIPS64RegisterMap map, CodeBlob cb) { ++ if (DEBUG) { ++ System.out.println("senderForCompiledFrame"); ++ } ++ ++ // ++ // NOTE: some of this code is (unfortunately) duplicated in MIPS64CurrentFrameGuess ++ // ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // frame owned by optimizing compiler ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); ++ } ++ Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); ++ ++ // On Intel the return_address is always the word on the stack ++ Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); ++ ++ // This is the saved value of EBP which may or may not really be an FP. ++ // It is only an FP if the sender is an interpreter frame (or C1?). ++ Address savedFPAddr = senderSP.addOffsetTo(- SENDER_SP_OFFSET * VM.getVM().getAddressSize()); ++ ++ if (map.getUpdateMap()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map.setIncludeArgumentOops(cb.callerMustGCArguments()); ++ ++ if (cb.getOopMaps() != null) { ++ OopMapSet.updateRegisterMap(this, cb, map, true); ++ } ++ ++ // Since the prolog does the save and restore of EBP there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ updateMapWithSavedLink(map, savedFPAddr); ++ } ++ ++ return new MIPS64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); ++ } ++ ++ protected boolean hasSenderPD() { ++ // FIXME ++ // Check for null ebp? Need to do some tests. ++ return true; ++ } ++ ++ public long frameSize() { ++ return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); ++ } ++ ++ public Address getLink() { ++ return addressOfStackSlot(LINK_OFFSET).getAddressAt(0); ++ } ++ ++ // FIXME: not implementable yet ++ //inline void frame::set_link(intptr_t* addr) { *(intptr_t **)addr_at(link_offset) = addr; } ++ ++ public Address getUnextendedSP() { return raw_unextendedSP; } ++ ++ // Return address: ++ public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); } ++ public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } ++ ++ // return address of param, zero origin index. ++ public Address getNativeParamAddr(int idx) { ++ return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx); ++ } ++ ++ public Address getSenderSP() { return addressOfStackSlot(SENDER_SP_OFFSET); } ++ ++ public Address addressOfInterpreterFrameLocals() { ++ return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); ++ } ++ ++ private Address addressOfInterpreterFrameBCX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); ++ } ++ ++ public int getInterpreterFrameBCI() { ++ // FIXME: this is not atomic with respect to GC and is unsuitable ++ // for use in a non-debugging, or reflective, system. Need to ++ // figure out how to express this. ++ Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); ++ Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); ++ Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); ++ return bcpToBci(bcp, method); ++ } ++ ++ public Address addressOfInterpreterFrameMDX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); ++ } ++ ++ // FIXME ++ //inline int frame::interpreter_frame_monitor_size() { ++ // return BasicObjectLock::size(); ++ //} ++ ++ // expression stack ++ // (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++ public Address addressOfInterpreterFrameExpressionStack() { ++ Address monitorEnd = interpreterFrameMonitorEnd().address(); ++ return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); ++ } ++ ++ public int getInterpreterFrameExpressionStackDirection() { return -1; } ++ ++ // top of expression stack ++ public Address addressOfInterpreterFrameTOS() { ++ return getSP(); ++ } ++ ++ /** Expression stack from top down */ ++ public Address addressOfInterpreterFrameTOSAt(int slot) { ++ return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); ++ } ++ ++ public Address getInterpreterFrameSenderSP() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "interpreted frame expected"); ++ } ++ return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ } ++ ++ // Monitors ++ public BasicObjectLock interpreterFrameMonitorBegin() { ++ return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); ++ } ++ ++ public BasicObjectLock interpreterFrameMonitorEnd() { ++ Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); ++ if (Assert.ASSERTS_ENABLED) { ++ // make sure the pointer points inside the frame ++ Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); ++ Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); ++ } ++ return new BasicObjectLock(result); ++ } ++ ++ public int interpreterFrameMonitorSize() { ++ return BasicObjectLock.size(); ++ } ++ ++ // Method ++ public Address addressOfInterpreterFrameMethod() { ++ return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); ++ } ++ ++ // Constant pool cache ++ public Address addressOfInterpreterFrameCPCache() { ++ return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); ++ } ++ ++ // Entry frames ++ public JavaCallWrapper getEntryFrameCallWrapper() { ++ return new MIPS64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); ++ } ++ ++ protected Address addressOfSavedOopResult() { ++ // offset is 2 for compiler2 and 3 for compiler1 ++ return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * ++ VM.getVM().getAddressSize()); ++ } ++ ++ protected Address addressOfSavedReceiver() { ++ return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); ++ } ++ ++ private void dumpStack() { ++ if (getFP() != null) { ++ for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); ++ AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize())); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ } else { ++ for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); ++ AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ } ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java +new file mode 100644 +index 0000000000..81fcb5b568 +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java +@@ -0,0 +1,57 @@ ++/* ++ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.mips64; ++ ++import java.util.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class MIPS64JavaCallWrapper extends JavaCallWrapper { ++ private static AddressField lastJavaFPField; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaFrameAnchor"); ++ ++ lastJavaFPField = type.getAddressField("_last_Java_fp"); ++ } ++ ++ public MIPS64JavaCallWrapper(Address addr) { ++ super(addr); ++ } ++ ++ public Address getLastJavaFP() { ++ return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); ++ } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java +new file mode 100644 +index 0000000000..648503792d +--- /dev/null ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java +@@ -0,0 +1,52 @@ ++/* ++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class MIPS64RegisterMap extends RegisterMap { ++ ++ /** This is the only public constructor */ ++ public MIPS64RegisterMap(JavaThread thread, boolean updateMap) { ++ super(thread, updateMap); ++ } ++ ++ protected MIPS64RegisterMap(RegisterMap map) { ++ super(map); ++ } ++ ++ public Object clone() { ++ MIPS64RegisterMap retval = new MIPS64RegisterMap(this); ++ return retval; ++ } ++ ++ // no PD state to clear or copy: ++ protected void clearPD() {} ++ protected void initializePD() {} ++ protected void initializeFromPD(RegisterMap map) {} ++ protected Address getLocationPD(VMReg reg) { return null; } ++} +diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +index aa69257866..9c97d09bc3 100644 +--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java ++++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +@@ -22,6 +22,13 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2018, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ * ++ */ ++ + package sun.jvm.hotspot.utilities; + + /** Provides canonicalized OS and CPU information for the rest of the +@@ -65,6 +72,10 @@ public class PlatformInfo { + return cpu; + } else if (cpu.equals("aarch64")) { + return cpu; ++ } else if (cpu.equals("mips64") || cpu.equals("mips64el")) { ++ return "mips64"; ++ } else if (cpu.equals("loongarch64")) { ++ return "loongarch64"; + } else { + try { + Class pic = Class.forName("sun.jvm.hotspot.utilities.PlatformInfoClosed"); +diff --git a/hotspot/make/defs.make b/hotspot/make/defs.make +index a3573da56f..6e93182c92 100644 +--- a/hotspot/make/defs.make ++++ b/hotspot/make/defs.make +@@ -22,6 +22,12 @@ + # + # + ++# ++# This file has been modified by Loongson Technology in 2020. These ++# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + # The common definitions for hotspot builds. + + # Optionally include SPEC file generated by configure. +@@ -285,7 +291,7 @@ ifneq ($(OSNAME),windows) + + # Use uname output for SRCARCH, but deal with platform differences. If ARCH + # is not explicitly listed below, it is treated as x86. +- SRCARCH ?= $(ARCH/$(filter sparc sparc64 ia64 amd64 x86_64 ppc ppc64 ppc64le zero aarch64,$(ARCH))) ++ SRCARCH ?= $(ARCH/$(filter sparc sparc64 ia64 amd64 x86_64 ppc ppc64 ppc64le zero aarch64 mips64 loongarch64,$(ARCH))) + ARCH/ = x86 + ARCH/sparc = sparc + ARCH/sparc64= sparc +@@ -295,6 +301,10 @@ ifneq ($(OSNAME),windows) + ARCH/ppc64 = ppc + ARCH/ppc64le= ppc + ARCH/ppc = ppc ++ ARCH/mips64 = mips ++ ARCH/mips64el = mips ++ ARCH/loongarch64 = loongarch ++ ARCH/loongarch = loongarch + ARCH/zero = zero + ARCH/aarch64 = aarch64 + +@@ -317,6 +327,20 @@ ifneq ($(OSNAME),windows) + BUILDARCH = ppc64 + endif + endif ++ ifeq ($(BUILDARCH), mips) ++ ifdef LP64 ++# ifeq ($(OPENJDK_TARGET_CPU_ENDIAN), little) ++# BUILDARCH = mips64el ++# else ++ BUILDARCH = mips64 ++# endif ++ endif ++ endif ++ ifeq ($(BUILDARCH), loongarch) ++ ifdef LP64 ++ BUILDARCH = loongarch64 ++ endif ++ endif + + # LIBARCH is 1:1 mapping from BUILDARCH, except for ARCH=ppc64le + ifeq ($(ARCH),ppc64le) +@@ -332,9 +356,18 @@ ifneq ($(OSNAME),windows) + LIBARCH/sparcv9 = sparcv9 + LIBARCH/ia64 = ia64 + LIBARCH/ppc64 = ppc64 ++ LIBARCH/loongarch = loongarch64 + LIBARCH/zero = $(ZERO_LIBARCH) + +- LP64_ARCH += sparcv9 amd64 ia64 ppc64 aarch64 zero ++ ifeq ($(LIBARCH), mips64) ++ ifeq ($(OPENJDK_TARGET_CPU_ENDIAN), little) ++ LIBARCH = mips64el ++ else ++ LIBARCH = mips64 ++ endif ++ endif ++ ++ LP64_ARCH += sparcv9 amd64 ia64 ppc64 aarch64 mips64 mips64el loongarch64 zero + endif + + # Required make macro settings for all platforms +diff --git a/hotspot/make/linux/Makefile b/hotspot/make/linux/Makefile +index e8f2010412..5aff01e87d 100644 +--- a/hotspot/make/linux/Makefile ++++ b/hotspot/make/linux/Makefile +@@ -74,6 +74,10 @@ ifneq (,$(findstring $(ARCH), ppc ppc64)) + FORCE_TIERED=0 + endif + endif ++# C1 is not ported on mips64, so we cannot build a tiered VM: ++ifeq (mips64, $(findstring mips64, $(ARCH))) ++ FORCE_TIERED=0 ++endif + + ifdef LP64 + ifeq ("$(filter $(LP64_ARCH),$(BUILDARCH))","") +diff --git a/hotspot/make/linux/makefiles/defs.make b/hotspot/make/linux/makefiles/defs.make +index ec414639d2..9ade73ab34 100644 +--- a/hotspot/make/linux/makefiles/defs.make ++++ b/hotspot/make/linux/makefiles/defs.make +@@ -22,6 +22,12 @@ + # + # + ++# ++# This file has been modified by Loongson Technology in 2020. These ++# modifications are Copyright (c) 2018, 2020, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + # The common definitions for hotspot linux builds. + # Include the top level defs.make under make directory instead of this one. + # This file is included into make/defs.make. +@@ -39,6 +45,18 @@ ifndef ARCH + ARCH := ppc64 + endif + endif ++ifeq ($(ARCH), mips64el) ++ ARCH=mips64 ++endif ++ifeq ($(LP64), 1) ++ ifeq ($(ARCH), mips) ++ ARCH=mips64 ++ endif ++endif ++ ++ifeq ($(ARCH), loongarch) ++ ARCH=loongarch64 ++endif + + PATH_SEP ?= : + +@@ -83,6 +101,36 @@ ifneq (,$(findstring $(ARCH), sparc)) + HS_ARCH = sparc + endif + ++# mips ++ifeq ($(ARCH), mips64) ++ ifeq ($(ARCH_DATA_MODEL), 64) ++ ARCH_DATA_MODEL = 64 ++ MAKE_ARGS += LP64=1 ++ PLATFORM = linux-mips64 ++ VM_PLATFORM = linux_mips64 ++ else ++ ARCH_DATA_MODEL = 32 ++ PLATFORM = linux-mips32 ++ VM_PLATFORM = linux_mips32 ++ endif ++ HS_ARCH = mips ++endif ++ ++# loongarch ++ifeq ($(ARCH), loongarch64) ++ ifeq ($(ARCH_DATA_MODEL), 64) ++ ARCH_DATA_MODEL = 64 ++ MAKE_ARGS += LP64=1 ++ PLATFORM = linux-loongarch64 ++ VM_PLATFORM = linux_loongarch64 ++ else ++ ARCH_DATA_MODEL = 32 ++ PLATFORM = linux-loongarch32 ++ VM_PLATFORM = linux_loongarch32 ++ endif ++ HS_ARCH = loongarch ++endif ++ + # i686/i586 and amd64/x86_64 + ifneq (,$(findstring $(ARCH), amd64 x86_64 i686 i586)) + ifeq ($(ARCH_DATA_MODEL), 64) +@@ -311,16 +359,24 @@ ADD_SA_BINARIES/sparc = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ + $(EXPORT_LIB_DIR)/sa-jdi.jar + ADD_SA_BINARIES/aarch64 = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ + $(EXPORT_LIB_DIR)/sa-jdi.jar ++ADD_SA_BINARIES/mips = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ ++ $(EXPORT_LIB_DIR)/sa-jdi.jar ++ADD_SA_BINARIES/loongarch = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ ++ $(EXPORT_LIB_DIR)/sa-jdi.jar + ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) + ifneq ($(STRIP_POLICY),no_strip) + ifeq ($(ZIP_DEBUGINFO_FILES),1) + ADD_SA_BINARIES/x86 += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz + ADD_SA_BINARIES/sparc += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz + ADD_SA_BINARIES/aarch64 += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz ++ ADD_SA_BINARIES/mips += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz ++ ADD_SA_BINARIES/loongarch += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz + else + ADD_SA_BINARIES/x86 += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo + ADD_SA_BINARIES/sparc += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo + ADD_SA_BINARIES/aarch64 += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo ++ ADD_SA_BINARIES/mips += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo ++ ADD_SA_BINARIES/loongarch += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo + endif + endif + endif +diff --git a/hotspot/make/linux/makefiles/gcc.make b/hotspot/make/linux/makefiles/gcc.make +index 7dde7f0963..94c6d1d015 100644 +--- a/hotspot/make/linux/makefiles/gcc.make ++++ b/hotspot/make/linux/makefiles/gcc.make +@@ -22,6 +22,12 @@ + # + # + ++# ++# This file has been modified by Loongson Technology in 2020. These ++# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + #------------------------------------------------------------------------ + # CC, CXX & AS + +@@ -177,6 +183,9 @@ ARCHFLAG/aarch64 = + ARCHFLAG/ia64 = + ARCHFLAG/sparc = -m32 -mcpu=v9 + ARCHFLAG/sparcv9 = -m64 -mcpu=v9 ++ARCHFLAG/mips64 = -mabi=64 ++#ARCHFLAG/loongarch64 = -lp64 ++ARCHFLAG/loongarch64 = + ARCHFLAG/zero = $(ZERO_ARCHFLAG) + ARCHFLAG/ppc64 = -m64 + +@@ -202,7 +211,7 @@ else + endif + + # Compiler warnings are treated as errors +-WARNINGS_ARE_ERRORS = -Werror ++#WARNINGS_ARE_ERRORS = -Werror + + ifeq ($(USE_CLANG), true) + # However we need to clean the code up before we can unrestrictedly enable this option with Clang +diff --git a/hotspot/make/linux/makefiles/loongarch64.make b/hotspot/make/linux/makefiles/loongarch64.make +new file mode 100644 +index 0000000000..9e3cdb6f23 +--- /dev/null ++++ b/hotspot/make/linux/makefiles/loongarch64.make +@@ -0,0 +1,43 @@ ++# ++# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++# ++# This code is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License version 2 only, as ++# published by the Free Software Foundation. ++# ++# This code is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++# version 2 for more details (a copy is included in the LICENSE file that ++# accompanied this code). ++# ++# You should have received a copy of the GNU General Public License version ++# 2 along with this work; if not, write to the Free Software Foundation, ++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++# ++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++# or visit www.oracle.com if you need additional information or have any ++# questions. ++# ++# ++ ++# Not included in includeDB because it has no dependencies ++Obj_Files += linux_loongarch.o ++ ++# The copied fdlibm routines in sharedRuntimeTrig.o must not be optimized ++OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT) ++# The copied fdlibm routines in sharedRuntimeTrans.o must not be optimized ++OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT) ++# Must also specify if CPU is little endian ++CFLAGS += -DVM_LITTLE_ENDIAN ++ ++CFLAGS += -DSICORTEX_ERRATA ++ ++CFLAGS += -D_LP64=1 ++ ++# The serviceability agent relies on frame pointer (%rbp) to walk thread stack ++CFLAGS += -fno-omit-frame-pointer ++ ++OPT_CFLAGS/compactingPermGenGen.o = -O1 +diff --git a/hotspot/make/linux/makefiles/mips64.make b/hotspot/make/linux/makefiles/mips64.make +new file mode 100644 +index 0000000000..d9af3b13ab +--- /dev/null ++++ b/hotspot/make/linux/makefiles/mips64.make +@@ -0,0 +1,43 @@ ++# ++# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++# ++# This code is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License version 2 only, as ++# published by the Free Software Foundation. ++# ++# This code is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++# version 2 for more details (a copy is included in the LICENSE file that ++# accompanied this code). ++# ++# You should have received a copy of the GNU General Public License version ++# 2 along with this work; if not, write to the Free Software Foundation, ++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++# ++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++# or visit www.oracle.com if you need additional information or have any ++# questions. ++# ++# ++ ++# Not included in includeDB because it has no dependencies ++Obj_Files += linux_mips.o ++ ++# The copied fdlibm routines in sharedRuntimeTrig.o must not be optimized ++OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT) ++# The copied fdlibm routines in sharedRuntimeTrans.o must not be optimized ++OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT) ++# Must also specify if CPU is little endian ++CFLAGS += -DVM_LITTLE_ENDIAN ++ ++CFLAGS += -DSICORTEX_ERRATA ++ ++CFLAGS += -D_LP64=1 ++ ++# The serviceability agent relies on frame pointer (%rbp) to walk thread stack ++CFLAGS += -fno-omit-frame-pointer ++ ++OPT_CFLAGS/compactingPermGenGen.o = -O1 +diff --git a/hotspot/make/linux/makefiles/sa.make b/hotspot/make/linux/makefiles/sa.make +index cdcb16a1a3..34c71bd666 100644 +--- a/hotspot/make/linux/makefiles/sa.make ++++ b/hotspot/make/linux/makefiles/sa.make +@@ -22,6 +22,12 @@ + # + # + ++# ++# This file has been modified by Loongson Technology in 2020. These ++# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + # This makefile (sa.make) is included from the sa.make in the + # build directories. + +@@ -109,6 +115,8 @@ $(GENERATED)/sa-jdi.jar:: $(AGENT_FILES) + $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.x86.X86ThreadContext + $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.amd64.AMD64ThreadContext + $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.aarch64.AARCH64ThreadContext ++ $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.mips64.MIPS64ThreadContext ++ $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.loongarch64.LOONGARCH64ThreadContext + $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.sparc.SPARCThreadContext + $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.asm.Disassembler + +diff --git a/hotspot/make/linux/makefiles/saproc.make b/hotspot/make/linux/makefiles/saproc.make +index ffc0ec5ce5..c04a6765df 100644 +--- a/hotspot/make/linux/makefiles/saproc.make ++++ b/hotspot/make/linux/makefiles/saproc.make +@@ -21,6 +21,13 @@ + # questions. + # + # ++ ++# ++# This file has been modified by Loongson Technology in 2019. These ++# modifications are Copyright (c) 2018, 2019, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + include $(GAMMADIR)/make/defs.make + include $(GAMMADIR)/make/altsrc.make + +@@ -81,7 +88,12 @@ endif + SA_LFLAGS = $(MAPFLAG:FILENAME=$(SAMAPFILE)) $(LDFLAGS_HASH_STYLE) \ + $(LDFLAGS_NO_EXEC_STACK) $(EXTRA_LDFLAGS) + ++ifneq (mips64, $(findstring mips64, $(BUILDARCH))) + SAARCH ?= $(BUILDARCH) ++else ++#If -Dmips64 is used, mips64 would be conflict with "struct mips64_watch_regs mips64" in /usr/include/asm/ptrace.h. ++SAARCH ?= mips ++endif + + $(LIBSAPROC): $(SASRCFILES) $(SAMAPFILE) + $(QUIETLY) if [ "$(BOOT_JAVA_HOME)" = "" ]; then \ +diff --git a/hotspot/make/linux/makefiles/sparcWorks.make b/hotspot/make/linux/makefiles/sparcWorks.make +index e39116023c..dbc2ace825 100644 +--- a/hotspot/make/linux/makefiles/sparcWorks.make ++++ b/hotspot/make/linux/makefiles/sparcWorks.make +@@ -22,6 +22,12 @@ + # + # + ++# ++# This file has been modified by Loongson Technology in 2015. These ++# modifications are Copyright (c) 2015 Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + #------------------------------------------------------------------------ + # CC, CXX & AS + +@@ -38,6 +44,7 @@ endif + ARCHFLAG = $(ARCHFLAG/$(BUILDARCH)) + ARCHFLAG/i486 = -m32 + ARCHFLAG/amd64 = -m64 ++ARCHFLAG/mips64 = -m64 + + CFLAGS += $(ARCHFLAG) + AOUT_FLAGS += $(ARCHFLAG) +diff --git a/hotspot/make/linux/makefiles/vm.make b/hotspot/make/linux/makefiles/vm.make +index 04b7c20287..5e428538a0 100644 +--- a/hotspot/make/linux/makefiles/vm.make ++++ b/hotspot/make/linux/makefiles/vm.make +@@ -22,6 +22,12 @@ + # + # + ++# ++# This file has been modified by Loongson Technology in 2020. These ++# modifications are Copyright (c) 2018, 2020, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + # Rules to build JVM and related libraries, included from vm.make in the build + # directory. + +@@ -99,9 +105,22 @@ CXXFLAGS = \ + ${HS_LIB_ARCH} \ + ${VM_DISTRO} + ++ifeq ($(MIPS_ABI),n32) ++ CXXFLAGS += -DN32 ++else ++ ifeq ($(MIPS_ABI),n64) ++ CXXFLAGS += -DN64 ++ endif ++endif + # This is VERY important! The version define must only be supplied to vm_version.o + # If not, ccache will not re-use the cache at all, since the version string might contain + # a time and date. ++ifdef LOONGSON_RUNTIME_NAME ++ LOONGSON_VM_INFO = -DLOONGSON_RUNTIME_NAME="\"$(LOONGSON_RUNTIME_NAME)\"" ++else ++ LOONGSON_VM_INFO = -DLOONGSON_RUNTIME_NAME="\"\"" ++endif ++CXXFLAGS/vmError.o += ${LOONGSON_VM_INFO} + CXXFLAGS/vm_version.o += ${JRE_VERSION} ${VERSION_CFLAGS} + CXXFLAGS/arguments.o += ${VERSION_CFLAGS} + +@@ -211,6 +230,15 @@ endif + ifeq ($(Platform_arch_model), x86_64) + Src_Files_EXCLUDE += \*x86_32\* + endif ++ifeq ($(Platform_arch_model), mips_32) ++Src_Files_EXCLUDE += \*mips_64\* ++endif ++ifeq ($(Platform_arch_model), mips_64) ++Src_Files_EXCLUDE += \*mips_32\* ++endif ++ifeq ($(Platform_arch_model), loongarch_64) ++Src_Files_EXCLUDE += \*loongarch_32\* ++endif + + # Alternate vm.make + # This has to be included here to allow changes to the source +diff --git a/hotspot/make/linux/platform_loongarch64 b/hotspot/make/linux/platform_loongarch64 +new file mode 100644 +index 0000000000..d704cf389a +--- /dev/null ++++ b/hotspot/make/linux/platform_loongarch64 +@@ -0,0 +1,17 @@ ++os_family = linux ++ ++arch = loongarch ++ ++arch_model = loongarch_64 ++ ++os_arch = linux_loongarch ++ ++os_arch_model = linux_loongarch_64 ++ ++lib_arch = loongarch64 ++ ++compiler = gcc ++ ++gnu_dis_arch = loongarch64 ++ ++sysdefs = -DLINUX -D_GNU_SOURCE -DLOONGARCH64 +diff --git a/hotspot/make/linux/platform_mips64 b/hotspot/make/linux/platform_mips64 +new file mode 100644 +index 0000000000..c283671f82 +--- /dev/null ++++ b/hotspot/make/linux/platform_mips64 +@@ -0,0 +1,17 @@ ++os_family = linux ++ ++arch = mips ++ ++arch_model = mips_64 ++ ++os_arch = linux_mips ++ ++os_arch_model = linux_mips_64 ++ ++lib_arch = mips64 ++ ++compiler = gcc ++ ++gnu_dis_arch = mips64 ++ ++sysdefs = -DLINUX -D_GNU_SOURCE -DMIPS64 +diff --git a/hotspot/make/sa.files b/hotspot/make/sa.files +index d6e728a9a8..43b08e3ad1 100644 +--- a/hotspot/make/sa.files ++++ b/hotspot/make/sa.files +@@ -22,6 +22,12 @@ + # + # + ++# ++# This file has been modified by Loongson Technology in 2020. These ++# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + # This filelist macro is included in platform specific sa.make + # included all packages/*.java. package list can be generated by + # $(GAMMADIR)/agent/make/build-pkglist. +@@ -52,14 +58,20 @@ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/cdbg/basic/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/dummy/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/amd64/*.java \ ++$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/mips64/*.java \ ++$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/loongarch64/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/x86/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/aarch64/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/sparc/*.java \ ++$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/mips64/*.java \ ++$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/loongarch64/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/posix/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/posix/elf/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/amd64/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/aarch64/*.java \ ++$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/mips64/*.java \ ++$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/loongarch64/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/sparc/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/x86/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/*.java \ +@@ -94,8 +106,12 @@ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/bsd_x86/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_amd64/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_aarch64/*.java \ ++$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_mips64/*.java \ ++$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_loongarch64/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_x86/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_sparc/*.java \ ++$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/mips64/*.java \ ++$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/loongarch64/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/posix/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/solaris_amd64/*.java \ + $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/solaris_sparc/*.java \ +diff --git a/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp +index 35d34a08ea..3b8cf4a11d 100644 +--- a/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp ++++ b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp +@@ -1177,7 +1177,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { + } + } + +- ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); +@@ -1242,7 +1244,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + } + case Bytecodes::_d2l: + { +- Register tmp = op->tmp1()->as_register(); ++ Register tmp = op->tmp()->as_register(); + __ clear_fpsr(); + __ fcvtzd(dest->as_register_lo(), src->as_double_reg()); + __ get_fpsr(tmp); +@@ -1253,7 +1255,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + } + case Bytecodes::_f2i: + { +- Register tmp = op->tmp1()->as_register(); ++ Register tmp = op->tmp()->as_register(); + __ clear_fpsr(); + __ fcvtzsw(dest->as_register(), src->as_float_reg()); + __ get_fpsr(tmp); +@@ -1264,7 +1266,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + } + case Bytecodes::_f2l: + { +- Register tmp = op->tmp1()->as_register(); ++ Register tmp = op->tmp()->as_register(); + __ clear_fpsr(); + __ fcvtzs(dest->as_register_lo(), src->as_float_reg()); + __ get_fpsr(tmp); +@@ -1275,7 +1277,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + } + case Bytecodes::_d2i: + { +- Register tmp = op->tmp1()->as_register(); ++ Register tmp = op->tmp()->as_register(); + __ clear_fpsr(); + __ fcvtzdw(dest->as_register(), src->as_double_reg()); + __ get_fpsr(tmp); +@@ -1731,6 +1733,11 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L + __ csel(result->as_register(), opr1->as_register(), opr2->as_register(), acond); + } + ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, ++ LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ ShouldNotReachHere(); ++} ++ + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); + +diff --git a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp +index 120dd1a7df..6a3289022d 100644 +--- a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp ++++ b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp +@@ -277,18 +277,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + __ store(reg, addr); + } + +-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { + LIR_Opr reg = new_register(T_INT); + __ load(generate_address(base, disp, T_INT), reg, info); +- __ cmp(condition, reg, LIR_OprFact::intConst(c)); ++ __ cmp_branch(condition, reg, LIR_OprFact::intConst(c), T_INT, tgt); + } + +-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); ++ ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { + LIR_Opr reg1 = new_register(T_INT); + __ load(generate_address(base, disp, type), reg1, info); +- __ cmp(condition, reg, reg1); ++ __ cmp_branch(condition, reg, reg1, type, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); + + bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) { + +diff --git a/hotspot/src/cpu/loongarch/vm/assembler_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.cpp +new file mode 100644 +index 0000000000..2996ef7aa7 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.cpp +@@ -0,0 +1,855 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "gc_interface/collectedHeap.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/cardTableModRefBS.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/interfaceSupport.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "runtime/os.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#if INCLUDE_ALL_GCS ++#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" ++#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" ++#include "gc_implementation/g1/heapRegion.hpp" ++#endif // INCLUDE_ALL_GCS ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) block_comment(str) ++#define STOP(error) block_comment(error); stop(error) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++// Implementation of AddressLiteral ++ ++AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { ++ _is_lval = false; ++ _target = target; ++ _rspec = rspec_from_rtype(rtype, target); ++} ++ ++// Implementation of Address ++ ++Address Address::make_array(ArrayAddress adr) { ++ AddressLiteral base = adr.base(); ++ Address index = adr.index(); ++ assert(index._disp == 0, "must not have disp"); // maybe it can? ++ Address array(index._base, index._index, index._scale, (intptr_t) base.target()); ++ array._rspec = base._rspec; ++ return array; ++} ++ ++// exceedingly dangerous constructor ++Address::Address(address loc, RelocationHolder spec) { ++ _base = noreg; ++ _index = noreg; ++ _scale = no_scale; ++ _disp = (intptr_t) loc; ++ _rspec = spec; ++} ++ ++ ++int Assembler::is_int_mask(int x) { ++ int xx = x; ++ int count = 0; ++ ++ while (x != 0) { ++ x &= (x - 1); ++ count++; ++ } ++ ++ if ((1<> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_b(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_b(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_b(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_bu(Register rd, Address src) { ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_bu(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_bu(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_bu(dst, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_bu(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_bu(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_bu(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_d(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_d(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_d(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_d(dst, AT, disp); ++ } ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ if (scale == 0) { ++ add_d(AT, base, index); ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ } ++ ldptr_d(dst, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_d(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_d(dst, base, disp); ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ ldptr_d(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_d(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_h(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_h(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_h(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_h(dst, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_h(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_h(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_h(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_hu(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_hu(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_hu(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_hu(dst, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_hu(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_hu(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_hu(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ll_w(Register rd, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ll_w(rd, src.base(), src.disp()); ++} ++ ++void Assembler::ll_d(Register rd, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ll_d(rd, src.base(), src.disp()); ++} ++ ++void Assembler::ld_w(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_w(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_w(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_w(dst, AT, disp); ++ } ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ if (scale == 0) { ++ add_d(AT, base, index); ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ } ++ ldptr_w(dst, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_w(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_w(dst, base, disp); ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ ldptr_w(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_w(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::ld_wu(Register rd, Address src){ ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ ldx_wu(dst, base, index); ++ } else { ++ add_d(AT, base, index); ++ ld_wu(dst, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ ld_wu(dst, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ ldx_wu(dst, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ ld_wu(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ldx_wu(dst, base, AT); ++ } ++ } ++} ++ ++void Assembler::st_b(Register rd, Address dst) { ++ Register src = rd; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ assert_different_registers(src, AT); ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ stx_b(src, base, index); ++ } else { ++ add_d(AT, base, index); ++ st_b(src, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ st_b(src, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ stx_b(src, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ st_b(src, base, disp); ++ } else { ++ assert_different_registers(src, AT); ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ stx_b(src, base, AT); ++ } ++ } ++} ++ ++void Assembler::sc_w(Register rd, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sc_w(rd, dst.base(), dst.disp()); ++} ++ ++void Assembler::sc_d(Register rd, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sc_d(rd, dst.base(), dst.disp()); ++} ++ ++void Assembler::st_d(Register rd, Address dst) { ++ Register src = rd; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ assert_different_registers(src, AT); ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ stx_d(src, base, index); ++ } else { ++ add_d(AT, base, index); ++ st_d(src, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ st_d(src, AT, disp); ++ } ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ if (scale == 0) { ++ add_d(AT, base, index); ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ } ++ stptr_d(src, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ stx_d(src, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ st_d(src, base, disp); ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ stptr_d(src, base, disp); ++ } else { ++ assert_different_registers(src, AT); ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ stx_d(src, base, AT); ++ } ++ } ++} ++ ++void Assembler::st_h(Register rd, Address dst) { ++ Register src = rd; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ assert_different_registers(src, AT); ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ stx_h(src, base, index); ++ } else { ++ add_d(AT, base, index); ++ st_h(src, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ st_h(src, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ stx_h(src, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ st_h(src, base, disp); ++ } else { ++ assert_different_registers(src, AT); ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ stx_h(src, base, AT); ++ } ++ } ++} ++ ++void Assembler::st_w(Register rd, Address dst) { ++ Register src = rd; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ assert_different_registers(src, AT); ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ stx_w(src, base, index); ++ } else { ++ add_d(AT, base, index); ++ st_w(src, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ st_w(src, AT, disp); ++ } ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ if (scale == 0) { ++ add_d(AT, base, index); ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ } ++ stptr_w(src, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ stx_w(src, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ st_w(src, base, disp); ++ } else if (is_simm(disp, 16) && !(disp & 3)) { ++ stptr_w(src, base, disp); ++ } else { ++ assert_different_registers(src, AT); ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ stx_w(src, base, AT); ++ } ++ } ++} ++ ++void Assembler::fld_s(FloatRegister fd, Address src) { ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ fldx_s(fd, base, index); ++ } else { ++ add_d(AT, base, index); ++ fld_s(fd, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ fld_s(fd, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ fldx_s(fd, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ fld_s(fd, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ fldx_s(fd, base, AT); ++ } ++ } ++} ++ ++void Assembler::fld_d(FloatRegister fd, Address src) { ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ fldx_d(fd, base, index); ++ } else { ++ add_d(AT, base, index); ++ fld_d(fd, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ fld_d(fd, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ fldx_d(fd, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ fld_d(fd, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ fldx_d(fd, base, AT); ++ } ++ } ++} ++ ++void Assembler::fst_s(FloatRegister fd, Address dst) { ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ fstx_s(fd, base, index); ++ } else { ++ add_d(AT, base, index); ++ fst_s(fd, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ fst_s(fd, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ fstx_s(fd, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ fst_s(fd, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ fstx_s(fd, base, AT); ++ } ++ } ++} ++ ++void Assembler::fst_d(FloatRegister fd, Address dst) { ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (is_simm(disp, 12)) { ++ if (scale == 0) { ++ if (disp == 0) { ++ fstx_d(fd, base, index); ++ } else { ++ add_d(AT, base, index); ++ fst_d(fd, AT, disp); ++ } ++ } else { ++ alsl_d(AT, index, base, scale - 1); ++ fst_d(fd, AT, disp); ++ } ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ ++ if (scale == 0) { ++ add_d(AT, AT, index); ++ } else { ++ alsl_d(AT, index, AT, scale - 1); ++ } ++ fstx_d(fd, base, AT); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ fst_d(fd, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ fstx_d(fd, base, AT); ++ } ++ } ++} +diff --git a/hotspot/src/cpu/loongarch/vm/assembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.hpp +new file mode 100644 +index 0000000000..46b57cfe76 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.hpp +@@ -0,0 +1,2810 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/register.hpp" ++ ++class BiasedLockingCounters; ++ ++ ++// Note: A register location is represented via a Register, not ++// via an address for efficiency & simplicity reasons. ++ ++class ArrayAddress; ++ ++class Address VALUE_OBJ_CLASS_SPEC { ++ public: ++ enum ScaleFactor { ++ no_scale = -1, ++ times_1 = 0, ++ times_2 = 1, ++ times_4 = 2, ++ times_8 = 3, ++ times_ptr = times_8 ++ }; ++ static ScaleFactor times(int size) { ++ assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size"); ++ if (size == 8) return times_8; ++ if (size == 4) return times_4; ++ if (size == 2) return times_2; ++ return times_1; ++ } ++ ++ private: ++ Register _base; ++ Register _index; ++ ScaleFactor _scale; ++ int _disp; ++ RelocationHolder _rspec; ++ ++ // Easily misused constructors make them private ++ Address(address loc, RelocationHolder spec); ++ Address(int disp, address loc, relocInfo::relocType rtype); ++ Address(int disp, address loc, RelocationHolder spec); ++ ++ public: ++ ++ // creation ++ Address() ++ : _base(noreg), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(0) { ++ } ++ ++ // No default displacement otherwise Register can be implicitly ++ // converted to 0(Register) which is quite a different animal. ++ ++ Address(Register base, int disp = 0) ++ : _base(base), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(disp) { ++ assert_different_registers(_base, AT); ++ } ++ ++ Address(Register base, Register index, ScaleFactor scale, int disp = 0) ++ : _base (base), ++ _index(index), ++ _scale(scale), ++ _disp (disp) { ++ assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); ++ assert_different_registers(_base, _index, AT); ++ } ++ ++ // The following two overloads are used in connection with the ++ // ByteSize type (see sizes.hpp). They simplify the use of ++ // ByteSize'd arguments in assembly code. Note that their equivalent ++ // for the optimized build are the member functions with int disp ++ // argument since ByteSize is mapped to an int type in that case. ++ // ++ // Note: DO NOT introduce similar overloaded functions for WordSize ++ // arguments as in the optimized mode, both ByteSize and WordSize ++ // are mapped to the same type and thus the compiler cannot make a ++ // distinction anymore (=> compiler errors). ++ ++#ifdef ASSERT ++ Address(Register base, ByteSize disp) ++ : _base(base), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(in_bytes(disp)) { ++ assert_different_registers(_base, AT); ++ } ++ ++ Address(Register base, Register index, ScaleFactor scale, ByteSize disp) ++ : _base(base), ++ _index(index), ++ _scale(scale), ++ _disp(in_bytes(disp)) { ++ assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); ++ assert_different_registers(_base, _index, AT); ++ } ++#endif // ASSERT ++ ++ // accessors ++ bool uses(Register reg) const { return _base == reg || _index == reg; } ++ Register base() const { return _base; } ++ Register index() const { return _index; } ++ ScaleFactor scale() const { return _scale; } ++ int disp() const { return _disp; } ++ ++ static Address make_array(ArrayAddress); ++ ++ friend class Assembler; ++ friend class MacroAssembler; ++ friend class LIR_Assembler; // base/index/scale/disp ++}; ++ ++// Calling convention ++class Argument VALUE_OBJ_CLASS_SPEC { ++ public: ++ enum { ++ n_register_parameters = 8, // 8 integer registers used to pass parameters ++ n_float_register_parameters = 8 // 8 float registers used to pass parameters ++ }; ++}; ++ ++// ++// AddressLiteral has been split out from Address because operands of this type ++// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out ++// the few instructions that need to deal with address literals are unique and the ++// MacroAssembler does not have to implement every instruction in the Assembler ++// in order to search for address literals that may need special handling depending ++// on the instruction and the platform. As small step on the way to merging i486/amd64 ++// directories. ++// ++class AddressLiteral VALUE_OBJ_CLASS_SPEC { ++ friend class ArrayAddress; ++ RelocationHolder _rspec; ++ // Typically we use AddressLiterals we want to use their rval ++ // However in some situations we want the lval (effect address) of the item. ++ // We provide a special factory for making those lvals. ++ bool _is_lval; ++ ++ // If the target is far we'll need to load the ea of this to ++ // a register to reach it. Otherwise if near we can do rip ++ // relative addressing. ++ ++ address _target; ++ ++ protected: ++ // creation ++ AddressLiteral() ++ : _is_lval(false), ++ _target(NULL) ++ {} ++ ++ public: ++ ++ ++ AddressLiteral(address target, relocInfo::relocType rtype); ++ ++ AddressLiteral(address target, RelocationHolder const& rspec) ++ : _rspec(rspec), ++ _is_lval(false), ++ _target(target) ++ {} ++ // 32-bit complains about a multiple declaration for int*. ++ AddressLiteral(intptr_t* addr, relocInfo::relocType rtype = relocInfo::none) ++ : _target((address) addr), ++ _rspec(rspec_from_rtype(rtype, (address) addr)) {} ++ ++ AddressLiteral addr() { ++ AddressLiteral ret = *this; ++ ret._is_lval = true; ++ return ret; ++ } ++ ++ ++ private: ++ ++ address target() { return _target; } ++ bool is_lval() { return _is_lval; } ++ ++ relocInfo::relocType reloc() const { return _rspec.type(); } ++ const RelocationHolder& rspec() const { return _rspec; } ++ ++ friend class Assembler; ++ friend class MacroAssembler; ++ friend class Address; ++ friend class LIR_Assembler; ++ RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { ++ switch (rtype) { ++ case relocInfo::external_word_type: ++ return external_word_Relocation::spec(addr); ++ case relocInfo::internal_word_type: ++ return internal_word_Relocation::spec(addr); ++ case relocInfo::opt_virtual_call_type: ++ return opt_virtual_call_Relocation::spec(); ++ case relocInfo::static_call_type: ++ return static_call_Relocation::spec(); ++ case relocInfo::runtime_call_type: ++ return runtime_call_Relocation::spec(); ++ case relocInfo::poll_type: ++ case relocInfo::poll_return_type: ++ return Relocation::spec_simple(rtype); ++ case relocInfo::none: ++ case relocInfo::oop_type: ++ // Oops are a special case. Normally they would be their own section ++ // but in cases like icBuffer they are literals in the code stream that ++ // we don't have a section for. We use none so that we get a literal address ++ // which is always patchable. ++ return RelocationHolder(); ++ default: ++ ShouldNotReachHere(); ++ return RelocationHolder(); ++ } ++ } ++ ++}; ++ ++// Convience classes ++class RuntimeAddress: public AddressLiteral { ++ ++ public: ++ ++ RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {} ++ ++}; ++ ++class OopAddress: public AddressLiteral { ++ ++ public: ++ ++ OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){} ++ ++}; ++ ++class ExternalAddress: public AddressLiteral { ++ ++ public: ++ ++ ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){} ++ ++}; ++ ++class InternalAddress: public AddressLiteral { ++ ++ public: ++ ++ InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {} ++ ++}; ++ ++// x86 can do array addressing as a single operation since disp can be an absolute ++// address amd64 can't. We create a class that expresses the concept but does extra ++// magic on amd64 to get the final result ++ ++class ArrayAddress VALUE_OBJ_CLASS_SPEC { ++ private: ++ ++ AddressLiteral _base; ++ Address _index; ++ ++ public: ++ ++ ArrayAddress() {}; ++ ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {}; ++ AddressLiteral base() { return _base; } ++ Address index() { return _index; } ++ ++}; ++ ++// The LoongArch Assembler: Pure assembler doing NO optimizations on the instruction ++// level ; i.e., what you write is what you get. The Assembler is generating code into ++// a CodeBuffer. ++ ++class Assembler : public AbstractAssembler { ++ friend class AbstractAssembler; // for the non-virtual hack ++ friend class LIR_Assembler; // as_Address() ++ friend class StubGenerator; ++ ++ public: ++ // 22-bit opcode, highest 22 bits: bits[31...10] ++ enum ops22 { ++ clo_w_op = 0b0000000000000000000100, ++ clz_w_op = 0b0000000000000000000101, ++ cto_w_op = 0b0000000000000000000110, ++ ctz_w_op = 0b0000000000000000000111, ++ clo_d_op = 0b0000000000000000001000, ++ clz_d_op = 0b0000000000000000001001, ++ cto_d_op = 0b0000000000000000001010, ++ ctz_d_op = 0b0000000000000000001011, ++ revb_2h_op = 0b0000000000000000001100, ++ revb_4h_op = 0b0000000000000000001101, ++ revb_2w_op = 0b0000000000000000001110, ++ revb_d_op = 0b0000000000000000001111, ++ revh_2w_op = 0b0000000000000000010000, ++ revh_d_op = 0b0000000000000000010001, ++ bitrev_4b_op = 0b0000000000000000010010, ++ bitrev_8b_op = 0b0000000000000000010011, ++ bitrev_w_op = 0b0000000000000000010100, ++ bitrev_d_op = 0b0000000000000000010101, ++ ext_w_h_op = 0b0000000000000000010110, ++ ext_w_b_op = 0b0000000000000000010111, ++ rdtimel_w_op = 0b0000000000000000011000, ++ rdtimeh_w_op = 0b0000000000000000011001, ++ rdtime_d_op = 0b0000000000000000011010, ++ cpucfg_op = 0b0000000000000000011011, ++ fabs_s_op = 0b0000000100010100000001, ++ fabs_d_op = 0b0000000100010100000010, ++ fneg_s_op = 0b0000000100010100000101, ++ fneg_d_op = 0b0000000100010100000110, ++ flogb_s_op = 0b0000000100010100001001, ++ flogb_d_op = 0b0000000100010100001010, ++ fclass_s_op = 0b0000000100010100001101, ++ fclass_d_op = 0b0000000100010100001110, ++ fsqrt_s_op = 0b0000000100010100010001, ++ fsqrt_d_op = 0b0000000100010100010010, ++ frecip_s_op = 0b0000000100010100010101, ++ frecip_d_op = 0b0000000100010100010110, ++ frsqrt_s_op = 0b0000000100010100011001, ++ frsqrt_d_op = 0b0000000100010100011010, ++ fmov_s_op = 0b0000000100010100100101, ++ fmov_d_op = 0b0000000100010100100110, ++ movgr2fr_w_op = 0b0000000100010100101001, ++ movgr2fr_d_op = 0b0000000100010100101010, ++ movgr2frh_w_op = 0b0000000100010100101011, ++ movfr2gr_s_op = 0b0000000100010100101101, ++ movfr2gr_d_op = 0b0000000100010100101110, ++ movfrh2gr_s_op = 0b0000000100010100101111, ++ movgr2fcsr_op = 0b0000000100010100110000, ++ movfcsr2gr_op = 0b0000000100010100110010, ++ movfr2cf_op = 0b0000000100010100110100, ++ movcf2fr_op = 0b0000000100010100110101, ++ movgr2cf_op = 0b0000000100010100110110, ++ movcf2gr_op = 0b0000000100010100110111, ++ fcvt_s_d_op = 0b0000000100011001000110, ++ fcvt_d_s_op = 0b0000000100011001001001, ++ ftintrm_w_s_op = 0b0000000100011010000001, ++ ftintrm_w_d_op = 0b0000000100011010000010, ++ ftintrm_l_s_op = 0b0000000100011010001001, ++ ftintrm_l_d_op = 0b0000000100011010001010, ++ ftintrp_w_s_op = 0b0000000100011010010001, ++ ftintrp_w_d_op = 0b0000000100011010010010, ++ ftintrp_l_s_op = 0b0000000100011010011001, ++ ftintrp_l_d_op = 0b0000000100011010011010, ++ ftintrz_w_s_op = 0b0000000100011010100001, ++ ftintrz_w_d_op = 0b0000000100011010100010, ++ ftintrz_l_s_op = 0b0000000100011010101001, ++ ftintrz_l_d_op = 0b0000000100011010101010, ++ ftintrne_w_s_op = 0b0000000100011010110001, ++ ftintrne_w_d_op = 0b0000000100011010110010, ++ ftintrne_l_s_op = 0b0000000100011010111001, ++ ftintrne_l_d_op = 0b0000000100011010111010, ++ ftint_w_s_op = 0b0000000100011011000001, ++ ftint_w_d_op = 0b0000000100011011000010, ++ ftint_l_s_op = 0b0000000100011011001001, ++ ftint_l_d_op = 0b0000000100011011001010, ++ ffint_s_w_op = 0b0000000100011101000100, ++ ffint_s_l_op = 0b0000000100011101000110, ++ ffint_d_w_op = 0b0000000100011101001000, ++ ffint_d_l_op = 0b0000000100011101001010, ++ frint_s_op = 0b0000000100011110010001, ++ frint_d_op = 0b0000000100011110010010, ++ iocsrrd_b_op = 0b0000011001001000000000, ++ iocsrrd_h_op = 0b0000011001001000000001, ++ iocsrrd_w_op = 0b0000011001001000000010, ++ iocsrrd_d_op = 0b0000011001001000000011, ++ iocsrwr_b_op = 0b0000011001001000000100, ++ iocsrwr_h_op = 0b0000011001001000000101, ++ iocsrwr_w_op = 0b0000011001001000000110, ++ iocsrwr_d_op = 0b0000011001001000000111, ++ vpcnt_b_op = 0b0111001010011100001000, ++ vpcnt_h_op = 0b0111001010011100001001, ++ vpcnt_w_op = 0b0111001010011100001010, ++ vpcnt_d_op = 0b0111001010011100001011, ++ vneg_b_op = 0b0111001010011100001100, ++ vneg_h_op = 0b0111001010011100001101, ++ vneg_w_op = 0b0111001010011100001110, ++ vneg_d_op = 0b0111001010011100001111, ++ vfclass_s_op = 0b0111001010011100110101, ++ vfclass_d_op = 0b0111001010011100110110, ++ vfsqrt_s_op = 0b0111001010011100111001, ++ vfsqrt_d_op = 0b0111001010011100111010, ++ vfrint_s_op = 0b0111001010011101001101, ++ vfrint_d_op = 0b0111001010011101001110, ++ vfrintrm_s_op = 0b0111001010011101010001, ++ vfrintrm_d_op = 0b0111001010011101010010, ++ vfrintrp_s_op = 0b0111001010011101010101, ++ vfrintrp_d_op = 0b0111001010011101010110, ++ vfrintrz_s_op = 0b0111001010011101011001, ++ vfrintrz_d_op = 0b0111001010011101011010, ++ vfrintrne_s_op = 0b0111001010011101011101, ++ vfrintrne_d_op = 0b0111001010011101011110, ++ vfcvtl_s_h_op = 0b0111001010011101111010, ++ vfcvth_s_h_op = 0b0111001010011101111011, ++ vfcvtl_d_s_op = 0b0111001010011101111100, ++ vfcvth_d_s_op = 0b0111001010011101111101, ++ vffint_s_w_op = 0b0111001010011110000000, ++ vffint_s_wu_op = 0b0111001010011110000001, ++ vffint_d_l_op = 0b0111001010011110000010, ++ vffint_d_lu_op = 0b0111001010011110000011, ++ vffintl_d_w_op = 0b0111001010011110000100, ++ vffinth_d_w_op = 0b0111001010011110000101, ++ vftint_w_s_op = 0b0111001010011110001100, ++ vftint_l_d_op = 0b0111001010011110001101, ++ vftintrm_w_s_op = 0b0111001010011110001110, ++ vftintrm_l_d_op = 0b0111001010011110001111, ++ vftintrp_w_s_op = 0b0111001010011110010000, ++ vftintrp_l_d_op = 0b0111001010011110010001, ++ vftintrz_w_s_op = 0b0111001010011110010010, ++ vftintrz_l_d_op = 0b0111001010011110010011, ++ vftintrne_w_s_op = 0b0111001010011110010100, ++ vftintrne_l_d_op = 0b0111001010011110010101, ++ vftint_wu_s = 0b0111001010011110010110, ++ vftint_lu_d = 0b0111001010011110010111, ++ vftintrz_wu_f = 0b0111001010011110011100, ++ vftintrz_lu_d = 0b0111001010011110011101, ++ vftintl_l_s_op = 0b0111001010011110100000, ++ vftinth_l_s_op = 0b0111001010011110100001, ++ vftintrml_l_s_op = 0b0111001010011110100010, ++ vftintrmh_l_s_op = 0b0111001010011110100011, ++ vftintrpl_l_s_op = 0b0111001010011110100100, ++ vftintrph_l_s_op = 0b0111001010011110100101, ++ vftintrzl_l_s_op = 0b0111001010011110100110, ++ vftintrzh_l_s_op = 0b0111001010011110100111, ++ vftintrnel_l_s_op = 0b0111001010011110101000, ++ vftintrneh_l_s_op = 0b0111001010011110101001, ++ vreplgr2vr_b_op = 0b0111001010011111000000, ++ vreplgr2vr_h_op = 0b0111001010011111000001, ++ vreplgr2vr_w_op = 0b0111001010011111000010, ++ vreplgr2vr_d_op = 0b0111001010011111000011, ++ xvpcnt_b_op = 0b0111011010011100001000, ++ xvpcnt_h_op = 0b0111011010011100001001, ++ xvpcnt_w_op = 0b0111011010011100001010, ++ xvpcnt_d_op = 0b0111011010011100001011, ++ xvneg_b_op = 0b0111011010011100001100, ++ xvneg_h_op = 0b0111011010011100001101, ++ xvneg_w_op = 0b0111011010011100001110, ++ xvneg_d_op = 0b0111011010011100001111, ++ xvfclass_s_op = 0b0111011010011100110101, ++ xvfclass_d_op = 0b0111011010011100110110, ++ xvfsqrt_s_op = 0b0111011010011100111001, ++ xvfsqrt_d_op = 0b0111011010011100111010, ++ xvfrint_s_op = 0b0111011010011101001101, ++ xvfrint_d_op = 0b0111011010011101001110, ++ xvfrintrm_s_op = 0b0111011010011101010001, ++ xvfrintrm_d_op = 0b0111011010011101010010, ++ xvfrintrp_s_op = 0b0111011010011101010101, ++ xvfrintrp_d_op = 0b0111011010011101010110, ++ xvfrintrz_s_op = 0b0111011010011101011001, ++ xvfrintrz_d_op = 0b0111011010011101011010, ++ xvfrintrne_s_op = 0b0111011010011101011101, ++ xvfrintrne_d_op = 0b0111011010011101011110, ++ xvfcvtl_s_h_op = 0b0111011010011101111010, ++ xvfcvth_s_h_op = 0b0111011010011101111011, ++ xvfcvtl_d_s_op = 0b0111011010011101111100, ++ xvfcvth_d_s_op = 0b0111011010011101111101, ++ xvffint_s_w_op = 0b0111011010011110000000, ++ xvffint_s_wu_op = 0b0111011010011110000001, ++ xvffint_d_l_op = 0b0111011010011110000010, ++ xvffint_d_lu_op = 0b0111011010011110000011, ++ xvffintl_d_w_op = 0b0111011010011110000100, ++ xvffinth_d_w_op = 0b0111011010011110000101, ++ xvftint_w_s_op = 0b0111011010011110001100, ++ xvftint_l_d_op = 0b0111011010011110001101, ++ xvftintrm_w_s_op = 0b0111011010011110001110, ++ xvftintrm_l_d_op = 0b0111011010011110001111, ++ xvftintrp_w_s_op = 0b0111011010011110010000, ++ xvftintrp_l_d_op = 0b0111011010011110010001, ++ xvftintrz_w_s_op = 0b0111011010011110010010, ++ xvftintrz_l_d_op = 0b0111011010011110010011, ++ xvftintrne_w_s_op = 0b0111011010011110010100, ++ xvftintrne_l_d_op = 0b0111011010011110010101, ++ xvftint_wu_s = 0b0111011010011110010110, ++ xvftint_lu_d = 0b0111011010011110010111, ++ xvftintrz_wu_f = 0b0111011010011110011100, ++ xvftintrz_lu_d = 0b0111011010011110011101, ++ xvftintl_l_s_op = 0b0111011010011110100000, ++ xvftinth_l_s_op = 0b0111011010011110100001, ++ xvftintrml_l_s_op = 0b0111011010011110100010, ++ xvftintrmh_l_s_op = 0b0111011010011110100011, ++ xvftintrpl_l_s_op = 0b0111011010011110100100, ++ xvftintrph_l_s_op = 0b0111011010011110100101, ++ xvftintrzl_l_s_op = 0b0111011010011110100110, ++ xvftintrzh_l_s_op = 0b0111011010011110100111, ++ xvftintrnel_l_s_op = 0b0111011010011110101000, ++ xvftintrneh_l_s_op = 0b0111011010011110101001, ++ xvreplgr2vr_b_op = 0b0111011010011111000000, ++ xvreplgr2vr_h_op = 0b0111011010011111000001, ++ xvreplgr2vr_w_op = 0b0111011010011111000010, ++ xvreplgr2vr_d_op = 0b0111011010011111000011, ++ vext2xv_h_b_op = 0b0111011010011111000100, ++ vext2xv_w_b_op = 0b0111011010011111000101, ++ vext2xv_d_b_op = 0b0111011010011111000110, ++ vext2xv_w_h_op = 0b0111011010011111000111, ++ vext2xv_d_h_op = 0b0111011010011111001000, ++ vext2xv_d_w_op = 0b0111011010011111001001, ++ vext2xv_hu_bu_op = 0b0111011010011111001010, ++ vext2xv_wu_bu_op = 0b0111011010011111001011, ++ vext2xv_du_bu_op = 0b0111011010011111001100, ++ vext2xv_wu_hu_op = 0b0111011010011111001101, ++ vext2xv_du_hu_op = 0b0111011010011111001110, ++ vext2xv_du_wu_op = 0b0111011010011111001111, ++ xvreplve0_b_op = 0b0111011100000111000000, ++ xvreplve0_h_op = 0b0111011100000111100000, ++ xvreplve0_w_op = 0b0111011100000111110000, ++ xvreplve0_d_op = 0b0111011100000111111000, ++ xvreplve0_q_op = 0b0111011100000111111100, ++ ++ unknow_ops22 = 0b1111111111111111111111 ++ }; ++ ++ // 21-bit opcode, highest 21 bits: bits[31...11] ++ enum ops21 { ++ vinsgr2vr_d_op = 0b011100101110101111110, ++ vpickve2gr_d_op = 0b011100101110111111110, ++ vpickve2gr_du_op = 0b011100101111001111110, ++ vreplvei_d_op = 0b011100101111011111110, ++ ++ unknow_ops21 = 0b111111111111111111111 ++ }; ++ ++ // 20-bit opcode, highest 20 bits: bits[31...12] ++ enum ops20 { ++ vinsgr2vr_w_op = 0b01110010111010111110, ++ vpickve2gr_w_op = 0b01110010111011111110, ++ vpickve2gr_wu_op = 0b01110010111100111110, ++ vreplvei_w_op = 0b01110010111101111110, ++ xvinsgr2vr_d_op = 0b01110110111010111110, ++ xvpickve2gr_d_op = 0b01110110111011111110, ++ xvpickve2gr_du_op = 0b01110110111100111110, ++ xvinsve0_d_op = 0b01110110111111111110, ++ xvpickve_d_op = 0b01110111000000111110, ++ ++ unknow_ops20 = 0b11111111111111111111 ++ }; ++ ++ // 19-bit opcode, highest 19 bits: bits[31...13] ++ enum ops19 { ++ vrotri_b_op = 0b0111001010100000001, ++ vinsgr2vr_h_op = 0b0111001011101011110, ++ vpickve2gr_h_op = 0b0111001011101111110, ++ vpickve2gr_hu_op = 0b0111001011110011110, ++ vreplvei_h_op = 0b0111001011110111110, ++ vbitclri_b_op = 0b0111001100010000001, ++ vbitseti_b_op = 0b0111001100010100001, ++ vbitrevi_b_op = 0b0111001100011000001, ++ vslli_b_op = 0b0111001100101100001, ++ vsrli_b_op = 0b0111001100110000001, ++ vsrai_b_op = 0b0111001100110100001, ++ xvrotri_b_op = 0b0111011010100000001, ++ xvinsgr2vr_w_op = 0b0111011011101011110, ++ xvpickve2gr_w_op = 0b0111011011101111110, ++ xvpickve2gr_wu_op = 0b0111011011110011110, ++ xvinsve0_w_op = 0b0111011011111111110, ++ xvpickve_w_op = 0b0111011100000011110, ++ xvbitclri_b_op = 0b0111011100010000001, ++ xvbitseti_b_op = 0b0111011100010100001, ++ xvbitrevi_b_op = 0b0111011100011000001, ++ xvslli_b_op = 0b0111011100101100001, ++ xvsrli_b_op = 0b0111011100110000001, ++ xvsrai_b_op = 0b0111011100110100001, ++ ++ unknow_ops19 = 0b1111111111111111111 ++ }; ++ ++ // 18-bit opcode, highest 18 bits: bits[31...14] ++ enum ops18 { ++ vrotri_h_op = 0b011100101010000001, ++ vinsgr2vr_b_op = 0b011100101110101110, ++ vpickve2gr_b_op = 0b011100101110111110, ++ vpickve2gr_bu_op = 0b011100101111001110, ++ vreplvei_b_op = 0b011100101111011110, ++ vbitclri_h_op = 0b011100110001000001, ++ vbitseti_h_op = 0b011100110001010001, ++ vbitrevi_h_op = 0b011100110001100001, ++ vslli_h_op = 0b011100110010110001, ++ vsrli_h_op = 0b011100110011000001, ++ vsrai_h_op = 0b011100110011010001, ++ vsrlni_b_h_op = 0b011100110100000001, ++ xvrotri_h_op = 0b011101101010000001, ++ xvbitclri_h_op = 0b011101110001000001, ++ xvbitseti_h_op = 0b011101110001010001, ++ xvbitrevi_h_op = 0b011101110001100001, ++ xvslli_h_op = 0b011101110010110001, ++ xvsrli_h_op = 0b011101110011000001, ++ xvsrai_h_op = 0b011101110011010001, ++ ++ unknow_ops18 = 0b111111111111111111 ++ }; ++ ++ // 17-bit opcode, highest 17 bits: bits[31...15] ++ enum ops17 { ++ asrtle_d_op = 0b00000000000000010, ++ asrtgt_d_op = 0b00000000000000011, ++ add_w_op = 0b00000000000100000, ++ add_d_op = 0b00000000000100001, ++ sub_w_op = 0b00000000000100010, ++ sub_d_op = 0b00000000000100011, ++ slt_op = 0b00000000000100100, ++ sltu_op = 0b00000000000100101, ++ maskeqz_op = 0b00000000000100110, ++ masknez_op = 0b00000000000100111, ++ nor_op = 0b00000000000101000, ++ and_op = 0b00000000000101001, ++ or_op = 0b00000000000101010, ++ xor_op = 0b00000000000101011, ++ orn_op = 0b00000000000101100, ++ andn_op = 0b00000000000101101, ++ sll_w_op = 0b00000000000101110, ++ srl_w_op = 0b00000000000101111, ++ sra_w_op = 0b00000000000110000, ++ sll_d_op = 0b00000000000110001, ++ srl_d_op = 0b00000000000110010, ++ sra_d_op = 0b00000000000110011, ++ rotr_w_op = 0b00000000000110110, ++ rotr_d_op = 0b00000000000110111, ++ mul_w_op = 0b00000000000111000, ++ mulh_w_op = 0b00000000000111001, ++ mulh_wu_op = 0b00000000000111010, ++ mul_d_op = 0b00000000000111011, ++ mulh_d_op = 0b00000000000111100, ++ mulh_du_op = 0b00000000000111101, ++ mulw_d_w_op = 0b00000000000111110, ++ mulw_d_wu_op = 0b00000000000111111, ++ div_w_op = 0b00000000001000000, ++ mod_w_op = 0b00000000001000001, ++ div_wu_op = 0b00000000001000010, ++ mod_wu_op = 0b00000000001000011, ++ div_d_op = 0b00000000001000100, ++ mod_d_op = 0b00000000001000101, ++ div_du_op = 0b00000000001000110, ++ mod_du_op = 0b00000000001000111, ++ crc_w_b_w_op = 0b00000000001001000, ++ crc_w_h_w_op = 0b00000000001001001, ++ crc_w_w_w_op = 0b00000000001001010, ++ crc_w_d_w_op = 0b00000000001001011, ++ crcc_w_b_w_op = 0b00000000001001100, ++ crcc_w_h_w_op = 0b00000000001001101, ++ crcc_w_w_w_op = 0b00000000001001110, ++ crcc_w_d_w_op = 0b00000000001001111, ++ break_op = 0b00000000001010100, ++ fadd_s_op = 0b00000001000000001, ++ fadd_d_op = 0b00000001000000010, ++ fsub_s_op = 0b00000001000000101, ++ fsub_d_op = 0b00000001000000110, ++ fmul_s_op = 0b00000001000001001, ++ fmul_d_op = 0b00000001000001010, ++ fdiv_s_op = 0b00000001000001101, ++ fdiv_d_op = 0b00000001000001110, ++ fmax_s_op = 0b00000001000010001, ++ fmax_d_op = 0b00000001000010010, ++ fmin_s_op = 0b00000001000010101, ++ fmin_d_op = 0b00000001000010110, ++ fmaxa_s_op = 0b00000001000011001, ++ fmaxa_d_op = 0b00000001000011010, ++ fmina_s_op = 0b00000001000011101, ++ fmina_d_op = 0b00000001000011110, ++ fscaleb_s_op = 0b00000001000100001, ++ fscaleb_d_op = 0b00000001000100010, ++ fcopysign_s_op = 0b00000001000100101, ++ fcopysign_d_op = 0b00000001000100110, ++ ldx_b_op = 0b00111000000000000, ++ ldx_h_op = 0b00111000000001000, ++ ldx_w_op = 0b00111000000010000, ++ ldx_d_op = 0b00111000000011000, ++ stx_b_op = 0b00111000000100000, ++ stx_h_op = 0b00111000000101000, ++ stx_w_op = 0b00111000000110000, ++ stx_d_op = 0b00111000000111000, ++ ldx_bu_op = 0b00111000001000000, ++ ldx_hu_op = 0b00111000001001000, ++ ldx_wu_op = 0b00111000001010000, ++ fldx_s_op = 0b00111000001100000, ++ fldx_d_op = 0b00111000001101000, ++ fstx_s_op = 0b00111000001110000, ++ fstx_d_op = 0b00111000001111000, ++ vldx_op = 0b00111000010000000, ++ vstx_op = 0b00111000010001000, ++ xvldx_op = 0b00111000010010000, ++ xvstx_op = 0b00111000010011000, ++ amswap_w_op = 0b00111000011000000, ++ amswap_d_op = 0b00111000011000001, ++ amadd_w_op = 0b00111000011000010, ++ amadd_d_op = 0b00111000011000011, ++ amand_w_op = 0b00111000011000100, ++ amand_d_op = 0b00111000011000101, ++ amor_w_op = 0b00111000011000110, ++ amor_d_op = 0b00111000011000111, ++ amxor_w_op = 0b00111000011001000, ++ amxor_d_op = 0b00111000011001001, ++ ammax_w_op = 0b00111000011001010, ++ ammax_d_op = 0b00111000011001011, ++ ammin_w_op = 0b00111000011001100, ++ ammin_d_op = 0b00111000011001101, ++ ammax_wu_op = 0b00111000011001110, ++ ammax_du_op = 0b00111000011001111, ++ ammin_wu_op = 0b00111000011010000, ++ ammin_du_op = 0b00111000011010001, ++ amswap_db_w_op = 0b00111000011010010, ++ amswap_db_d_op = 0b00111000011010011, ++ amadd_db_w_op = 0b00111000011010100, ++ amadd_db_d_op = 0b00111000011010101, ++ amand_db_w_op = 0b00111000011010110, ++ amand_db_d_op = 0b00111000011010111, ++ amor_db_w_op = 0b00111000011011000, ++ amor_db_d_op = 0b00111000011011001, ++ amxor_db_w_op = 0b00111000011011010, ++ amxor_db_d_op = 0b00111000011011011, ++ ammax_db_w_op = 0b00111000011011100, ++ ammax_db_d_op = 0b00111000011011101, ++ ammin_db_w_op = 0b00111000011011110, ++ ammin_db_d_op = 0b00111000011011111, ++ ammax_db_wu_op = 0b00111000011100000, ++ ammax_db_du_op = 0b00111000011100001, ++ ammin_db_wu_op = 0b00111000011100010, ++ ammin_db_du_op = 0b00111000011100011, ++ dbar_op = 0b00111000011100100, ++ ibar_op = 0b00111000011100101, ++ fldgt_s_op = 0b00111000011101000, ++ fldgt_d_op = 0b00111000011101001, ++ fldle_s_op = 0b00111000011101010, ++ fldle_d_op = 0b00111000011101011, ++ fstgt_s_op = 0b00111000011101100, ++ fstgt_d_op = 0b00111000011101101, ++ fstle_s_op = 0b00111000011101110, ++ fstle_d_op = 0b00111000011101111, ++ ldgt_b_op = 0b00111000011110000, ++ ldgt_h_op = 0b00111000011110001, ++ ldgt_w_op = 0b00111000011110010, ++ ldgt_d_op = 0b00111000011110011, ++ ldle_b_op = 0b00111000011110100, ++ ldle_h_op = 0b00111000011110101, ++ ldle_w_op = 0b00111000011110110, ++ ldle_d_op = 0b00111000011110111, ++ stgt_b_op = 0b00111000011111000, ++ stgt_h_op = 0b00111000011111001, ++ stgt_w_op = 0b00111000011111010, ++ stgt_d_op = 0b00111000011111011, ++ stle_b_op = 0b00111000011111100, ++ stle_h_op = 0b00111000011111101, ++ stle_w_op = 0b00111000011111110, ++ stle_d_op = 0b00111000011111111, ++ vseq_b_op = 0b01110000000000000, ++ vseq_h_op = 0b01110000000000001, ++ vseq_w_op = 0b01110000000000010, ++ vseq_d_op = 0b01110000000000011, ++ vsle_b_op = 0b01110000000000100, ++ vsle_h_op = 0b01110000000000101, ++ vsle_w_op = 0b01110000000000110, ++ vsle_d_op = 0b01110000000000111, ++ vsle_bu_op = 0b01110000000001000, ++ vsle_hu_op = 0b01110000000001001, ++ vsle_wu_op = 0b01110000000001010, ++ vsle_du_op = 0b01110000000001011, ++ vslt_b_op = 0b01110000000001100, ++ vslt_h_op = 0b01110000000001101, ++ vslt_w_op = 0b01110000000001110, ++ vslt_d_op = 0b01110000000001111, ++ vslt_bu_op = 0b01110000000010000, ++ vslt_hu_op = 0b01110000000010001, ++ vslt_wu_op = 0b01110000000010010, ++ vslt_du_op = 0b01110000000010011, ++ vadd_b_op = 0b01110000000010100, ++ vadd_h_op = 0b01110000000010101, ++ vadd_w_op = 0b01110000000010110, ++ vadd_d_op = 0b01110000000010111, ++ vsub_b_op = 0b01110000000011000, ++ vsub_h_op = 0b01110000000011001, ++ vsub_w_op = 0b01110000000011010, ++ vsub_d_op = 0b01110000000011011, ++ vabsd_b_op = 0b01110000011000000, ++ vabsd_h_op = 0b01110000011000001, ++ vabsd_w_op = 0b01110000011000010, ++ vabsd_d_op = 0b01110000011000011, ++ vmax_b_op = 0b01110000011100000, ++ vmax_h_op = 0b01110000011100001, ++ vmax_w_op = 0b01110000011100010, ++ vmax_d_op = 0b01110000011100011, ++ vmin_b_op = 0b01110000011100100, ++ vmin_h_op = 0b01110000011100101, ++ vmin_w_op = 0b01110000011100110, ++ vmin_d_op = 0b01110000011100111, ++ vmul_b_op = 0b01110000100001000, ++ vmul_h_op = 0b01110000100001001, ++ vmul_w_op = 0b01110000100001010, ++ vmul_d_op = 0b01110000100001011, ++ vmuh_b_op = 0b01110000100001100, ++ vmuh_h_op = 0b01110000100001101, ++ vmuh_w_op = 0b01110000100001110, ++ vmuh_d_op = 0b01110000100001111, ++ vmuh_bu_op = 0b01110000100010000, ++ vmuh_hu_op = 0b01110000100010001, ++ vmuh_wu_op = 0b01110000100010010, ++ vmuh_du_op = 0b01110000100010011, ++ vmulwev_h_b_op = 0b01110000100100000, ++ vmulwev_w_h_op = 0b01110000100100001, ++ vmulwev_d_w_op = 0b01110000100100010, ++ vmulwev_q_d_op = 0b01110000100100011, ++ vmulwod_h_b_op = 0b01110000100100100, ++ vmulwod_w_h_op = 0b01110000100100101, ++ vmulwod_d_w_op = 0b01110000100100110, ++ vmulwod_q_d_op = 0b01110000100100111, ++ vmadd_b_op = 0b01110000101010000, ++ vmadd_h_op = 0b01110000101010001, ++ vmadd_w_op = 0b01110000101010010, ++ vmadd_d_op = 0b01110000101010011, ++ vmsub_b_op = 0b01110000101010100, ++ vmsub_h_op = 0b01110000101010101, ++ vmsub_w_op = 0b01110000101010110, ++ vmsub_d_op = 0b01110000101010111, ++ vsll_b_op = 0b01110000111010000, ++ vsll_h_op = 0b01110000111010001, ++ vsll_w_op = 0b01110000111010010, ++ vsll_d_op = 0b01110000111010011, ++ vsrl_b_op = 0b01110000111010100, ++ vsrl_h_op = 0b01110000111010101, ++ vsrl_w_op = 0b01110000111010110, ++ vsrl_d_op = 0b01110000111010111, ++ vsra_b_op = 0b01110000111011000, ++ vsra_h_op = 0b01110000111011001, ++ vsra_w_op = 0b01110000111011010, ++ vsra_d_op = 0b01110000111011011, ++ vrotr_b_op = 0b01110000111011100, ++ vrotr_h_op = 0b01110000111011101, ++ vrotr_w_op = 0b01110000111011110, ++ vrotr_d_op = 0b01110000111011111, ++ vbitclr_b_op = 0b01110001000011000, ++ vbitclr_h_op = 0b01110001000011001, ++ vbitclr_w_op = 0b01110001000011010, ++ vbitclr_d_op = 0b01110001000011011, ++ vbitset_b_op = 0b01110001000011100, ++ vbitset_h_op = 0b01110001000011101, ++ vbitset_w_op = 0b01110001000011110, ++ vbitset_d_op = 0b01110001000011111, ++ vbitrev_b_op = 0b01110001000100000, ++ vbitrev_h_op = 0b01110001000100001, ++ vbitrev_w_op = 0b01110001000100010, ++ vbitrev_d_op = 0b01110001000100011, ++ vand_v_op = 0b01110001001001100, ++ vor_v_op = 0b01110001001001101, ++ vxor_v_op = 0b01110001001001110, ++ vnor_v_op = 0b01110001001001111, ++ vandn_v_op = 0b01110001001010000, ++ vorn_v_op = 0b01110001001010001, ++ vadd_q_op = 0b01110001001011010, ++ vsub_q_op = 0b01110001001011011, ++ vfadd_s_op = 0b01110001001100001, ++ vfadd_d_op = 0b01110001001100010, ++ vfsub_s_op = 0b01110001001100101, ++ vfsub_d_op = 0b01110001001100110, ++ vfmul_s_op = 0b01110001001110001, ++ vfmul_d_op = 0b01110001001110010, ++ vfdiv_s_op = 0b01110001001110101, ++ vfdiv_d_op = 0b01110001001110110, ++ vfmax_s_op = 0b01110001001111001, ++ vfmax_d_op = 0b01110001001111010, ++ vfmin_s_op = 0b01110001001111101, ++ vfmin_d_op = 0b01110001001111110, ++ vfcvt_h_s_op = 0b01110001010001100, ++ vfcvt_s_d_op = 0b01110001010001101, ++ vffint_s_l_op = 0b01110001010010000, ++ vftint_w_d_op = 0b01110001010010011, ++ vftintrm_w_d_op = 0b01110001010010100, ++ vftintrp_w_d_op = 0b01110001010010101, ++ vftintrz_w_d_op = 0b01110001010010110, ++ vftintrne_w_d_op = 0b01110001010010111, ++ vshuf_h_op = 0b01110001011110101, ++ vshuf_w_op = 0b01110001011110110, ++ vshuf_d_op = 0b01110001011110111, ++ vslti_bu_op = 0b01110010100010000, ++ vslti_hu_op = 0b01110010100010001, ++ vslti_wu_op = 0b01110010100010010, ++ vslti_du_op = 0b01110010100010011, ++ vaddi_bu_op = 0b01110010100010100, ++ vaddi_hu_op = 0b01110010100010101, ++ vaddi_wu_op = 0b01110010100010110, ++ vaddi_du_op = 0b01110010100010111, ++ vsubi_bu_op = 0b01110010100011000, ++ vsubi_hu_op = 0b01110010100011001, ++ vsubi_wu_op = 0b01110010100011010, ++ vsubi_du_op = 0b01110010100011011, ++ vrotri_w_op = 0b01110010101000001, ++ vbitclri_w_op = 0b01110011000100001, ++ vbitseti_w_op = 0b01110011000101001, ++ vbitrevi_w_op = 0b01110011000110001, ++ vslli_w_op = 0b01110011001011001, ++ vsrli_w_op = 0b01110011001100001, ++ vsrai_w_op = 0b01110011001101001, ++ vsrlni_h_w_op = 0b01110011010000001, ++ xvseq_b_op = 0b01110100000000000, ++ xvseq_h_op = 0b01110100000000001, ++ xvseq_w_op = 0b01110100000000010, ++ xvseq_d_op = 0b01110100000000011, ++ xvsle_b_op = 0b01110100000000100, ++ xvsle_h_op = 0b01110100000000101, ++ xvsle_w_op = 0b01110100000000110, ++ xvsle_d_op = 0b01110100000000111, ++ xvsle_bu_op = 0b01110100000001000, ++ xvsle_hu_op = 0b01110100000001001, ++ xvsle_wu_op = 0b01110100000001010, ++ xvsle_du_op = 0b01110100000001011, ++ xvslt_b_op = 0b01110100000001100, ++ xvslt_h_op = 0b01110100000001101, ++ xvslt_w_op = 0b01110100000001110, ++ xvslt_d_op = 0b01110100000001111, ++ xvslt_bu_op = 0b01110100000010000, ++ xvslt_hu_op = 0b01110100000010001, ++ xvslt_wu_op = 0b01110100000010010, ++ xvslt_du_op = 0b01110100000010011, ++ xvadd_b_op = 0b01110100000010100, ++ xvadd_h_op = 0b01110100000010101, ++ xvadd_w_op = 0b01110100000010110, ++ xvadd_d_op = 0b01110100000010111, ++ xvsub_b_op = 0b01110100000011000, ++ xvsub_h_op = 0b01110100000011001, ++ xvsub_w_op = 0b01110100000011010, ++ xvsub_d_op = 0b01110100000011011, ++ xvabsd_b_op = 0b01110100011000000, ++ xvabsd_h_op = 0b01110100011000001, ++ xvabsd_w_op = 0b01110100011000010, ++ xvabsd_d_op = 0b01110100011000011, ++ xvmax_b_op = 0b01110100011100000, ++ xvmax_h_op = 0b01110100011100001, ++ xvmax_w_op = 0b01110100011100010, ++ xvmax_d_op = 0b01110100011100011, ++ xvmin_b_op = 0b01110100011100100, ++ xvmin_h_op = 0b01110100011100101, ++ xvmin_w_op = 0b01110100011100110, ++ xvmin_d_op = 0b01110100011100111, ++ xvmul_b_op = 0b01110100100001000, ++ xvmul_h_op = 0b01110100100001001, ++ xvmul_w_op = 0b01110100100001010, ++ xvmul_d_op = 0b01110100100001011, ++ xvmuh_b_op = 0b01110100100001100, ++ xvmuh_h_op = 0b01110100100001101, ++ xvmuh_w_op = 0b01110100100001110, ++ xvmuh_d_op = 0b01110100100001111, ++ xvmuh_bu_op = 0b01110100100010000, ++ xvmuh_hu_op = 0b01110100100010001, ++ xvmuh_wu_op = 0b01110100100010010, ++ xvmuh_du_op = 0b01110100100010011, ++ xvmulwev_h_b_op = 0b01110100100100000, ++ xvmulwev_w_h_op = 0b01110100100100001, ++ xvmulwev_d_w_op = 0b01110100100100010, ++ xvmulwev_q_d_op = 0b01110100100100011, ++ xvmulwod_h_b_op = 0b01110100100100100, ++ xvmulwod_w_h_op = 0b01110100100100101, ++ xvmulwod_d_w_op = 0b01110100100100110, ++ xvmulwod_q_d_op = 0b01110100100100111, ++ xvmadd_b_op = 0b01110100101010000, ++ xvmadd_h_op = 0b01110100101010001, ++ xvmadd_w_op = 0b01110100101010010, ++ xvmadd_d_op = 0b01110100101010011, ++ xvmsub_b_op = 0b01110100101010100, ++ xvmsub_h_op = 0b01110100101010101, ++ xvmsub_w_op = 0b01110100101010110, ++ xvmsub_d_op = 0b01110100101010111, ++ xvsll_b_op = 0b01110100111010000, ++ xvsll_h_op = 0b01110100111010001, ++ xvsll_w_op = 0b01110100111010010, ++ xvsll_d_op = 0b01110100111010011, ++ xvsrl_b_op = 0b01110100111010100, ++ xvsrl_h_op = 0b01110100111010101, ++ xvsrl_w_op = 0b01110100111010110, ++ xvsrl_d_op = 0b01110100111010111, ++ xvsra_b_op = 0b01110100111011000, ++ xvsra_h_op = 0b01110100111011001, ++ xvsra_w_op = 0b01110100111011010, ++ xvsra_d_op = 0b01110100111011011, ++ xvrotr_b_op = 0b01110100111011100, ++ xvrotr_h_op = 0b01110100111011101, ++ xvrotr_w_op = 0b01110100111011110, ++ xvrotr_d_op = 0b01110100111011111, ++ xvbitclr_b_op = 0b01110101000011000, ++ xvbitclr_h_op = 0b01110101000011001, ++ xvbitclr_w_op = 0b01110101000011010, ++ xvbitclr_d_op = 0b01110101000011011, ++ xvbitset_b_op = 0b01110101000011100, ++ xvbitset_h_op = 0b01110101000011101, ++ xvbitset_w_op = 0b01110101000011110, ++ xvbitset_d_op = 0b01110101000011111, ++ xvbitrev_b_op = 0b01110101000100000, ++ xvbitrev_h_op = 0b01110101000100001, ++ xvbitrev_w_op = 0b01110101000100010, ++ xvbitrev_d_op = 0b01110101000100011, ++ xvand_v_op = 0b01110101001001100, ++ xvor_v_op = 0b01110101001001101, ++ xvxor_v_op = 0b01110101001001110, ++ xvnor_v_op = 0b01110101001001111, ++ xvandn_v_op = 0b01110101001010000, ++ xvorn_v_op = 0b01110101001010001, ++ xvadd_q_op = 0b01110101001011010, ++ xvsub_q_op = 0b01110101001011011, ++ xvfadd_s_op = 0b01110101001100001, ++ xvfadd_d_op = 0b01110101001100010, ++ xvfsub_s_op = 0b01110101001100101, ++ xvfsub_d_op = 0b01110101001100110, ++ xvfmul_s_op = 0b01110101001110001, ++ xvfmul_d_op = 0b01110101001110010, ++ xvfdiv_s_op = 0b01110101001110101, ++ xvfdiv_d_op = 0b01110101001110110, ++ xvfmax_s_op = 0b01110101001111001, ++ xvfmax_d_op = 0b01110101001111010, ++ xvfmin_s_op = 0b01110101001111101, ++ xvfmin_d_op = 0b01110101001111110, ++ xvfcvt_h_s_op = 0b01110101010001100, ++ xvfcvt_s_d_op = 0b01110101010001101, ++ xvffint_s_l_op = 0b01110101010010000, ++ xvftint_w_d_op = 0b01110101010010011, ++ xvftintrm_w_d_op = 0b01110101010010100, ++ xvftintrp_w_d_op = 0b01110101010010101, ++ xvftintrz_w_d_op = 0b01110101010010110, ++ xvftintrne_w_d_op = 0b01110101010010111, ++ xvshuf_h_op = 0b01110101011110101, ++ xvshuf_w_op = 0b01110101011110110, ++ xvshuf_d_op = 0b01110101011110111, ++ xvperm_w_op = 0b01110101011111010, ++ xvslti_bu_op = 0b01110110100010000, ++ xvslti_hu_op = 0b01110110100010001, ++ xvslti_wu_op = 0b01110110100010010, ++ xvslti_du_op = 0b01110110100010011, ++ xvaddi_bu_op = 0b01110110100010100, ++ xvaddi_hu_op = 0b01110110100010101, ++ xvaddi_wu_op = 0b01110110100010110, ++ xvaddi_du_op = 0b01110110100010111, ++ xvsubi_bu_op = 0b01110110100011000, ++ xvsubi_hu_op = 0b01110110100011001, ++ xvsubi_wu_op = 0b01110110100011010, ++ xvsubi_du_op = 0b01110110100011011, ++ xvrotri_w_op = 0b01110110101000001, ++ xvbitclri_w_op = 0b01110111000100001, ++ xvbitseti_w_op = 0b01110111000101001, ++ xvbitrevi_w_op = 0b01110111000110001, ++ xvslli_w_op = 0b01110111001011001, ++ xvsrli_w_op = 0b01110111001100001, ++ xvsrai_w_op = 0b01110111001101001, ++ ++ unknow_ops17 = 0b11111111111111111 ++ }; ++ ++ // 16-bit opcode, highest 16 bits: bits[31...16] ++ enum ops16 { ++ vrotri_d_op = 0b0111001010100001, ++ vbitclri_d_op = 0b0111001100010001, ++ vbitseti_d_op = 0b0111001100010101, ++ vbitrevi_d_op = 0b0111001100011001, ++ vslli_d_op = 0b0111001100101101, ++ vsrli_d_op = 0b0111001100110001, ++ vsrai_d_op = 0b0111001100110101, ++ vsrlni_w_d_op = 0b0111001101000001, ++ xvrotri_d_op = 0b0111011010100001, ++ xvbitclri_d_op = 0b0111011100010001, ++ xvbitseti_d_op = 0b0111011100010101, ++ xvbitrevi_d_op = 0b0111011100011001, ++ xvslli_d_op = 0b0111011100101101, ++ xvsrli_d_op = 0b0111011100110001, ++ xvsrai_d_op = 0b0111011100110101, ++ ++ unknow_ops16 = 0b1111111111111111 ++ }; ++ ++ // 15-bit opcode, highest 15 bits: bits[31...17] ++ enum ops15 { ++ vsrlni_d_q_op = 0b011100110100001, ++ ++ unknow_ops15 = 0b111111111111111 ++ }; ++ ++ // 14-bit opcode, highest 14 bits: bits[31...18] ++ enum ops14 { ++ alsl_w_op = 0b00000000000001, ++ bytepick_w_op = 0b00000000000010, ++ bytepick_d_op = 0b00000000000011, ++ alsl_d_op = 0b00000000001011, ++ slli_op = 0b00000000010000, ++ srli_op = 0b00000000010001, ++ srai_op = 0b00000000010010, ++ rotri_op = 0b00000000010011, ++ lddir_op = 0b00000110010000, ++ ldpte_op = 0b00000110010001, ++ vshuf4i_b_op = 0b01110011100100, ++ vshuf4i_h_op = 0b01110011100101, ++ vshuf4i_w_op = 0b01110011100110, ++ vshuf4i_d_op = 0b01110011100111, ++ vandi_b_op = 0b01110011110100, ++ vori_b_op = 0b01110011110101, ++ vxori_b_op = 0b01110011110110, ++ vnori_b_op = 0b01110011110111, ++ vldi_op = 0b01110011111000, ++ vpermi_w_op = 0b01110011111001, ++ xvshuf4i_b_op = 0b01110111100100, ++ xvshuf4i_h_op = 0b01110111100101, ++ xvshuf4i_w_op = 0b01110111100110, ++ xvshuf4i_d_op = 0b01110111100111, ++ xvandi_b_op = 0b01110111110100, ++ xvori_b_op = 0b01110111110101, ++ xvxori_b_op = 0b01110111110110, ++ xvnori_b_op = 0b01110111110111, ++ xvldi_op = 0b01110111111000, ++ xvpermi_w_op = 0b01110111111001, ++ xvpermi_d_op = 0b01110111111010, ++ xvpermi_q_op = 0b01110111111011, ++ ++ unknow_ops14 = 0b11111111111111 ++ }; ++ ++ // 12-bit opcode, highest 12 bits: bits[31...20] ++ enum ops12 { ++ fmadd_s_op = 0b000010000001, ++ fmadd_d_op = 0b000010000010, ++ fmsub_s_op = 0b000010000101, ++ fmsub_d_op = 0b000010000110, ++ fnmadd_s_op = 0b000010001001, ++ fnmadd_d_op = 0b000010001010, ++ fnmsub_s_op = 0b000010001101, ++ fnmsub_d_op = 0b000010001110, ++ vfmadd_s_op = 0b000010010001, ++ vfmadd_d_op = 0b000010010010, ++ vfmsub_s_op = 0b000010010101, ++ vfmsub_d_op = 0b000010010110, ++ vfnmadd_s_op = 0b000010011001, ++ vfnmadd_d_op = 0b000010011010, ++ vfnmsub_s_op = 0b000010011101, ++ vfnmsub_d_op = 0b000010011110, ++ xvfmadd_s_op = 0b000010100001, ++ xvfmadd_d_op = 0b000010100010, ++ xvfmsub_s_op = 0b000010100101, ++ xvfmsub_d_op = 0b000010100110, ++ xvfnmadd_s_op = 0b000010101001, ++ xvfnmadd_d_op = 0b000010101010, ++ xvfnmsub_s_op = 0b000010101101, ++ xvfnmsub_d_op = 0b000010101110, ++ fcmp_cond_s_op = 0b000011000001, ++ fcmp_cond_d_op = 0b000011000010, ++ vfcmp_cond_s_op = 0b000011000101, ++ vfcmp_cond_d_op = 0b000011000110, ++ xvfcmp_cond_s_op = 0b000011001001, ++ xvfcmp_cond_d_op = 0b000011001010, ++ fsel_op = 0b000011010000, ++ vbitsel_v_op = 0b000011010001, ++ xvbitsel_v_op = 0b000011010010, ++ vshuf_b_op = 0b000011010101, ++ xvshuf_b_op = 0b000011010110, ++ ++ unknow_ops12 = 0b111111111111 ++ }; ++ ++ // 10-bit opcode, highest 10 bits: bits[31...22] ++ enum ops10 { ++ bstr_w_op = 0b0000000001, ++ bstrins_d_op = 0b0000000010, ++ bstrpick_d_op = 0b0000000011, ++ slti_op = 0b0000001000, ++ sltui_op = 0b0000001001, ++ addi_w_op = 0b0000001010, ++ addi_d_op = 0b0000001011, ++ lu52i_d_op = 0b0000001100, ++ andi_op = 0b0000001101, ++ ori_op = 0b0000001110, ++ xori_op = 0b0000001111, ++ ld_b_op = 0b0010100000, ++ ld_h_op = 0b0010100001, ++ ld_w_op = 0b0010100010, ++ ld_d_op = 0b0010100011, ++ st_b_op = 0b0010100100, ++ st_h_op = 0b0010100101, ++ st_w_op = 0b0010100110, ++ st_d_op = 0b0010100111, ++ ld_bu_op = 0b0010101000, ++ ld_hu_op = 0b0010101001, ++ ld_wu_op = 0b0010101010, ++ preld_op = 0b0010101011, ++ fld_s_op = 0b0010101100, ++ fst_s_op = 0b0010101101, ++ fld_d_op = 0b0010101110, ++ fst_d_op = 0b0010101111, ++ vld_op = 0b0010110000, ++ vst_op = 0b0010110001, ++ xvld_op = 0b0010110010, ++ xvst_op = 0b0010110011, ++ ldl_w_op = 0b0010111000, ++ ldr_w_op = 0b0010111001, ++ ++ unknow_ops10 = 0b1111111111 ++ }; ++ ++ // 8-bit opcode, highest 8 bits: bits[31...22] ++ enum ops8 { ++ ll_w_op = 0b00100000, ++ sc_w_op = 0b00100001, ++ ll_d_op = 0b00100010, ++ sc_d_op = 0b00100011, ++ ldptr_w_op = 0b00100100, ++ stptr_w_op = 0b00100101, ++ ldptr_d_op = 0b00100110, ++ stptr_d_op = 0b00100111, ++ ++ unknow_ops8 = 0b11111111 ++ }; ++ ++ // 7-bit opcode, highest 7 bits: bits[31...25] ++ enum ops7 { ++ lu12i_w_op = 0b0001010, ++ lu32i_d_op = 0b0001011, ++ pcaddi_op = 0b0001100, ++ pcalau12i_op = 0b0001101, ++ pcaddu12i_op = 0b0001110, ++ pcaddu18i_op = 0b0001111, ++ ++ unknow_ops7 = 0b1111111 ++ }; ++ ++ // 6-bit opcode, highest 6 bits: bits[31...25] ++ enum ops6 { ++ addu16i_d_op = 0b000100, ++ beqz_op = 0b010000, ++ bnez_op = 0b010001, ++ bccondz_op = 0b010010, ++ jirl_op = 0b010011, ++ b_op = 0b010100, ++ bl_op = 0b010101, ++ beq_op = 0b010110, ++ bne_op = 0b010111, ++ blt_op = 0b011000, ++ bge_op = 0b011001, ++ bltu_op = 0b011010, ++ bgeu_op = 0b011011, ++ ++ unknow_ops6 = 0b111111 ++ }; ++ ++ enum fcmp_cond { ++ fcmp_caf = 0x00, ++ fcmp_cun = 0x08, ++ fcmp_ceq = 0x04, ++ fcmp_cueq = 0x0c, ++ fcmp_clt = 0x02, ++ fcmp_cult = 0x0a, ++ fcmp_cle = 0x06, ++ fcmp_cule = 0x0e, ++ fcmp_cne = 0x10, ++ fcmp_cor = 0x14, ++ fcmp_cune = 0x18, ++ fcmp_saf = 0x01, ++ fcmp_sun = 0x09, ++ fcmp_seq = 0x05, ++ fcmp_sueq = 0x0d, ++ fcmp_slt = 0x03, ++ fcmp_sult = 0x0b, ++ fcmp_sle = 0x07, ++ fcmp_sule = 0x0f, ++ fcmp_sne = 0x11, ++ fcmp_sor = 0x15, ++ fcmp_sune = 0x19 ++ }; ++ ++ enum Condition { ++ zero , ++ notZero , ++ equal , ++ notEqual , ++ less , ++ lessEqual , ++ greater , ++ greaterEqual , ++ below , ++ belowEqual , ++ above , ++ aboveEqual ++ }; ++ ++ static const int LogInstructionSize = 2; ++ static const int InstructionSize = 1 << LogInstructionSize; ++ ++ enum WhichOperand { ++ // input to locate_operand, and format code for relocations ++ imm_operand = 0, // embedded 32-bit|64-bit immediate operand ++ disp32_operand = 1, // embedded 32-bit displacement or address ++ call32_operand = 2, // embedded 32-bit self-relative displacement ++ narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop ++ _WhichOperand_limit = 4 ++ }; ++ ++ static int low (int x, int l) { return bitfield(x, 0, l); } ++ static int low16(int x) { return low(x, 16); } ++ static int low26(int x) { return low(x, 26); } ++ ++ static int high (int x, int l) { return bitfield(x, 32-l, l); } ++ static int high16(int x) { return high(x, 16); } ++ static int high6 (int x) { return high(x, 6); } ++ ++ ++ protected: ++ // help methods for instruction ejection ++ ++ // 2R-type ++ // 31 10 9 5 4 0 ++ // | opcode | rj | rd | ++ static inline int insn_RR (int op, int rj, int rd) { return (op<<10) | (rj<<5) | rd; } ++ ++ // 3R-type ++ // 31 15 14 10 9 5 4 0 ++ // | opcode | rk | rj | rd | ++ static inline int insn_RRR (int op, int rk, int rj, int rd) { return (op<<15) | (rk<<10) | (rj<<5) | rd; } ++ ++ // 4R-type ++ // 31 20 19 15 14 10 9 5 4 0 ++ // | opcode | ra | rk | rj | rd | ++ static inline int insn_RRRR (int op, int ra, int rk, int rj, int rd) { return (op<<20) | (ra << 15) | (rk<<10) | (rj<<5) | rd; } ++ ++ // 2RI1-type ++ // 31 11 10 9 5 4 0 ++ // | opcode | I1 | vj | rd | ++ static inline int insn_I1RR (int op, int ui1, int vj, int rd) { assert(is_uimm(ui1, 1), "not a unsigned 1-bit int"); return (op<<11) | (low(ui1, 1)<<10) | (vj<<5) | rd; } ++ ++ // 2RI2-type ++ // 31 12 11 10 9 5 4 0 ++ // | opcode | I2 | vj | rd | ++ static inline int insn_I2RR (int op, int ui2, int vj, int rd) { assert(is_uimm(ui2, 2), "not a unsigned 2-bit int"); return (op<<12) | (low(ui2, 2)<<10) | (vj<<5) | rd; } ++ ++ // 2RI3-type ++ // 31 13 12 10 9 5 4 0 ++ // | opcode | I3 | vj | vd | ++ static inline int insn_I3RR (int op, int ui3, int vj, int vd) { assert(is_uimm(ui3, 3), "not a unsigned 3-bit int"); return (op<<13) | (low(ui3, 3)<<10) | (vj<<5) | vd; } ++ ++ // 2RI4-type ++ // 31 14 13 10 9 5 4 0 ++ // | opcode | I4 | vj | vd | ++ static inline int insn_I4RR (int op, int ui4, int vj, int vd) { assert(is_uimm(ui4, 4), "not a unsigned 4-bit int"); return (op<<14) | (low(ui4, 4)<<10) | (vj<<5) | vd; } ++ ++ // 2RI5-type ++ // 31 15 14 10 9 5 4 0 ++ // | opcode | I5 | vj | vd | ++ static inline int insn_I5RR (int op, int ui5, int vj, int vd) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); return (op<<15) | (low(ui5, 5)<<10) | (vj<<5) | vd; } ++ ++ // 2RI6-type ++ // 31 16 15 10 9 5 4 0 ++ // | opcode | I6 | vj | vd | ++ static inline int insn_I6RR (int op, int ui6, int vj, int vd) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); return (op<<16) | (low(ui6, 6)<<10) | (vj<<5) | vd; } ++ ++ // 2RI7-type ++ // 31 17 16 10 9 5 4 0 ++ // | opcode | I7 | vj | vd | ++ static inline int insn_I7RR (int op, int ui7, int vj, int vd) { assert(is_uimm(ui7, 7), "not a unsigned 7-bit int"); return (op<<17) | (low(ui7, 6)<<10) | (vj<<5) | vd; } ++ ++ // 2RI8-type ++ // 31 18 17 10 9 5 4 0 ++ // | opcode | I8 | rj | rd | ++ static inline int insn_I8RR (int op, int imm8, int rj, int rd) { /*assert(is_simm(imm8, 8), "not a signed 8-bit int");*/ return (op<<18) | (low(imm8, 8)<<10) | (rj<<5) | rd; } ++ ++ // 2RI12-type ++ // 31 22 21 10 9 5 4 0 ++ // | opcode | I12 | rj | rd | ++ static inline int insn_I12RR(int op, int imm12, int rj, int rd) { /* assert(is_simm(imm12, 12), "not a signed 12-bit int");*/ return (op<<22) | (low(imm12, 12)<<10) | (rj<<5) | rd; } ++ ++ ++ // 2RI14-type ++ // 31 24 23 10 9 5 4 0 ++ // | opcode | I14 | rj | rd | ++ static inline int insn_I14RR(int op, int imm14, int rj, int rd) { assert(is_simm(imm14, 14), "not a signed 14-bit int"); return (op<<24) | (low(imm14, 14)<<10) | (rj<<5) | rd; } ++ ++ // 2RI16-type ++ // 31 26 25 10 9 5 4 0 ++ // | opcode | I16 | rj | rd | ++ static inline int insn_I16RR(int op, int imm16, int rj, int rd) { assert(is_simm16(imm16), "not a signed 16-bit int"); return (op<<26) | (low16(imm16)<<10) | (rj<<5) | rd; } ++ ++ // 1RI13-type (?) ++ // 31 18 17 5 4 0 ++ // | opcode | I13 | vd | ++ static inline int insn_I13R (int op, int imm13, int vd) { assert(is_simm(imm13, 13), "not a signed 13-bit int"); return (op<<18) | (low(imm13, 13)<<5) | vd; } ++ ++ // 1RI20-type (?) ++ // 31 25 24 5 4 0 ++ // | opcode | I20 | rd | ++ static inline int insn_I20R (int op, int imm20, int rd) { assert(is_simm(imm20, 20), "not a signed 20-bit int"); return (op<<25) | (low(imm20, 20)<<5) | rd; } ++ ++ // 1RI21-type ++ // 31 26 25 10 9 5 4 0 ++ // | opcode | I21[15:0] | rj |I21[20:16]| ++ static inline int insn_IRI(int op, int imm21, int rj) { assert(is_simm(imm21, 21), "not a signed 21-bit int"); return (op << 26) | (low16(imm21) << 10) | (rj << 5) | low(imm21 >> 16, 5); } ++ ++ // I26-type ++ // 31 26 25 10 9 0 ++ // | opcode | I26[15:0] | I26[25:16] | ++ static inline int insn_I26(int op, int imm26) { assert(is_simm(imm26, 26), "not a signed 26-bit int"); return (op << 26) | (low16(imm26) << 10) | low(imm26 >> 16, 10); } ++ ++ // imm15 ++ // 31 15 14 0 ++ // | opcode | I15 | ++ static inline int insn_I15 (int op, int imm15) { assert(is_uimm(imm15, 15), "not a unsigned 15-bit int"); return (op<<15) | low(imm15, 15); } ++ ++ ++ // get the offset field of beq, bne, blt[u], bge[u] instruction ++ int offset16(address entry) { ++ assert(is_simm16((entry - pc()) / 4), "change this code"); ++ if (!is_simm16((entry - pc()) / 4)) { ++ tty->print_cr("!!! is_simm16: %lx", (entry - pc()) / 4); ++ } ++ return (entry - pc()) / 4; ++ } ++ ++ // get the offset field of beqz, bnez instruction ++ int offset21(address entry) { ++ assert(is_simm((int)(entry - pc()) / 4, 21), "change this code"); ++ if (!is_simm((int)(entry - pc()) / 4, 21)) { ++ tty->print_cr("!!! is_simm21: %lx", (entry - pc()) / 4); ++ } ++ return (entry - pc()) / 4; ++ } ++ ++ // get the offset field of b instruction ++ int offset26(address entry) { ++ assert(is_simm((int)(entry - pc()) / 4, 26), "change this code"); ++ if (!is_simm((int)(entry - pc()) / 4, 26)) { ++ tty->print_cr("!!! is_simm26: %lx", (entry - pc()) / 4); ++ } ++ return (entry - pc()) / 4; ++ } ++ ++public: ++ using AbstractAssembler::offset; ++ ++ //sign expand with the sign bit is h ++ static int expand(int x, int h) { return -(x & (1<> 16; ++ } ++ ++ static int split_high16(int x) { ++ return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff; ++ } ++ ++ static int split_low20(int x) { ++ return (x & 0xfffff); ++ } ++ ++ // Convert 20-bit x to a sign-extended 20-bit integer ++ static int simm20(int x) { ++ assert(x == (x & 0xFFFFF), "must be 20-bit only"); ++ return (x << 12) >> 12; ++ } ++ ++ static int split_low12(int x) { ++ return (x & 0xfff); ++ } ++ ++ static inline void split_simm38(jlong si38, jint& si18, jint& si20) { ++ si18 = ((jint)(si38 & 0x3ffff) << 14) >> 14; ++ si38 += (si38 & 0x20000) << 1; ++ si20 = si38 >> 18; ++ } ++ ++ // Convert 12-bit x to a sign-extended 12-bit integer ++ static int simm12(int x) { ++ assert(x == (x & 0xFFF), "must be 12-bit only"); ++ return (x << 20) >> 20; ++ } ++ ++ // Convert 26-bit x to a sign-extended 26-bit integer ++ static int simm26(int x) { ++ assert(x == (x & 0x3FFFFFF), "must be 26-bit only"); ++ return (x << 6) >> 6; ++ } ++ ++ static intptr_t merge(intptr_t x0, intptr_t x12) { ++ //lu12i, ori ++ return (((x12 << 12) | x0) << 32) >> 32; ++ } ++ ++ static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32) { ++ //lu32i, lu12i, ori ++ return (((x32 << 32) | (x12 << 12) | x0) << 12) >> 12; ++ } ++ ++ static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32, intptr_t x52) { ++ //lu52i, lu32i, lu12i, ori ++ return (x52 << 52) | (x32 << 32) | (x12 << 12) | x0; ++ } ++ ++ // Test if x is within signed immediate range for nbits. ++ static bool is_simm (int x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 32, "out of bounds"); ++ const int min = -( ((int)1) << nbits-1 ); ++ const int maxplus1 = ( ((int)1) << nbits-1 ); ++ return min <= x && x < maxplus1; ++ } ++ ++ static bool is_simm(jlong x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 64, "out of bounds"); ++ const jlong min = -( ((jlong)1) << nbits-1 ); ++ const jlong maxplus1 = ( ((jlong)1) << nbits-1 ); ++ return min <= x && x < maxplus1; ++ } ++ ++ static bool is_simm16(int x) { return is_simm(x, 16); } ++ static bool is_simm16(long x) { return is_simm((jlong)x, (unsigned int)16); } ++ ++ // Test if x is within unsigned immediate range for nbits ++ static bool is_uimm(int x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 32, "out of bounds"); ++ const int maxplus1 = ( ((int)1) << nbits ); ++ return 0 <= x && x < maxplus1; ++ } ++ ++ static bool is_uimm(jlong x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 64, "out of bounds"); ++ const jlong maxplus1 = ( ((jlong)1) << nbits ); ++ return 0 <= x && x < maxplus1; ++ } ++ ++public: ++ ++ void flush() { ++ AbstractAssembler::flush(); ++ } ++ ++ inline void emit_data(int x) { emit_int32(x); } ++ inline void emit_data(int x, relocInfo::relocType rtype) { ++ relocate(rtype); ++ emit_int32(x); ++ } ++ ++ inline void emit_data(int x, RelocationHolder const& rspec) { ++ relocate(rspec); ++ emit_int32(x); ++ } ++ ++ // Generic instructions ++ // Does 32bit or 64bit as needed for the platform. In some sense these ++ // belong in macro assembler but there is no need for both varieties to exist ++ ++ void clo_w (Register rd, Register rj) { emit_int32(insn_RR(clo_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void clz_w (Register rd, Register rj) { emit_int32(insn_RR(clz_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void cto_w (Register rd, Register rj) { emit_int32(insn_RR(cto_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void ctz_w (Register rd, Register rj) { emit_int32(insn_RR(ctz_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void clo_d (Register rd, Register rj) { emit_int32(insn_RR(clo_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void clz_d (Register rd, Register rj) { emit_int32(insn_RR(clz_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void cto_d (Register rd, Register rj) { emit_int32(insn_RR(cto_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void ctz_d (Register rd, Register rj) { emit_int32(insn_RR(ctz_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void revb_2h(Register rd, Register rj) { emit_int32(insn_RR(revb_2h_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revb_4h(Register rd, Register rj) { emit_int32(insn_RR(revb_4h_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revb_2w(Register rd, Register rj) { emit_int32(insn_RR(revb_2w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revb_d (Register rd, Register rj) { emit_int32(insn_RR( revb_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revh_2w(Register rd, Register rj) { emit_int32(insn_RR(revh_2w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void revh_d (Register rd, Register rj) { emit_int32(insn_RR( revh_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void bitrev_4b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_4b_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void bitrev_8b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_8b_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void bitrev_w (Register rd, Register rj) { emit_int32(insn_RR(bitrev_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void bitrev_d (Register rd, Register rj) { emit_int32(insn_RR(bitrev_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void ext_w_h(Register rd, Register rj) { emit_int32(insn_RR(ext_w_h_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void ext_w_b(Register rd, Register rj) { emit_int32(insn_RR(ext_w_b_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void rdtimel_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimel_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void rdtimeh_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimeh_w_op, (int)rj->encoding(), (int)rd->encoding())); } ++ void rdtime_d(Register rd, Register rj) { emit_int32(insn_RR(rdtime_d_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void cpucfg(Register rd, Register rj) { emit_int32(insn_RR(cpucfg_op, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void asrtle_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtle_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); } ++ void asrtgt_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtgt_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); } ++ ++ void alsl_w(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ void alsl_wu(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_w_op, ( (1 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bytepick_w(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(bytepick_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bytepick_d(Register rd, Register rj, Register rk, int sa3) { assert(is_uimm(sa3, 3), "not a unsigned 3-bit int"); emit_int32(insn_I8RR(bytepick_d_op, ( (sa3 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void add_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void add_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sub_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sub_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void slt (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(slt_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sltu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sltu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void maskeqz (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(maskeqz_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void masknez (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(masknez_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void nor (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(nor_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void AND (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(and_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void OR (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(or_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void XOR (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(xor_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void orn (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(orn_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void andn(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(andn_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void sll_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void srl_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sra_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sll_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void srl_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void sra_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void rotr_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void rotr_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void mul_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulh_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulh_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mul_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulh_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulh_du (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulw_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mulw_d_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void div_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mod_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void div_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mod_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void div_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mod_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void div_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void mod_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void crc_w_b_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_b_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crc_w_h_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_h_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crc_w_w_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_w_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crc_w_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crcc_w_b_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_b_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crcc_w_h_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_h_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crcc_w_w_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_w_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void crcc_w_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void brk(int code) { assert(is_uimm(code, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(break_op, code)); } ++ ++ void alsl_d(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_d_op, ( (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void slli_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void slli_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void srli_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void srli_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void srai_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void srai_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void rotri_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } ++ void rotri_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void bstrins_w (Register rd, Register rj, int msbw, int lsbw) { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (0<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bstrpick_w (Register rd, Register rj, int msbw, int lsbw) { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (1<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bstrins_d (Register rd, Register rj, int msbd, int lsbd) { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrins_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); } ++ void bstrpick_d (Register rd, Register rj, int msbd, int lsbd) { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrpick_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void fadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmul_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmul_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fdiv_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fdiv_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmax_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmax_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmin_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmin_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmaxa_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmaxa_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmina_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmina_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void fscaleb_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fscaleb_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fcopysign_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fcopysign_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void fabs_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fabs_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fabs_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fabs_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fneg_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fneg_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fneg_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fneg_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void flogb_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(flogb_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void flogb_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(flogb_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fclass_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fclass_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fclass_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fclass_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fsqrt_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fsqrt_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frecip_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frecip_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frecip_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frecip_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frsqrt_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frsqrt_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fmov_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fmov_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fmov_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fmov_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void movgr2fr_w (FloatRegister fd, Register rj) { emit_int32(insn_RR(movgr2fr_w_op, (int)rj->encoding(), (int)fd->encoding())); } ++ void movgr2fr_d (FloatRegister fd, Register rj) { emit_int32(insn_RR(movgr2fr_d_op, (int)rj->encoding(), (int)fd->encoding())); } ++ void movgr2frh_w(FloatRegister fd, Register rj) { emit_int32(insn_RR(movgr2frh_w_op, (int)rj->encoding(), (int)fd->encoding())); } ++ void movfr2gr_s (Register rd, FloatRegister fj) { emit_int32(insn_RR(movfr2gr_s_op, (int)fj->encoding(), (int)rd->encoding())); } ++ void movfr2gr_d (Register rd, FloatRegister fj) { emit_int32(insn_RR(movfr2gr_d_op, (int)fj->encoding(), (int)rd->encoding())); } ++ void movfrh2gr_s(Register rd, FloatRegister fj) { emit_int32(insn_RR(movfrh2gr_s_op, (int)fj->encoding(), (int)rd->encoding())); } ++ void movgr2fcsr (int fcsr, Register rj) { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movgr2fcsr_op, (int)rj->encoding(), fcsr)); } ++ void movfcsr2gr (Register rd, int fcsr) { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movfcsr2gr_op, fcsr, (int)rd->encoding())); } ++ void movfr2cf (ConditionalFlagRegister cd, FloatRegister fj) { emit_int32(insn_RR(movfr2cf_op, (int)fj->encoding(), (int)cd->encoding())); } ++ void movcf2fr (FloatRegister fd, ConditionalFlagRegister cj) { emit_int32(insn_RR(movcf2fr_op, (int)cj->encoding(), (int)fd->encoding())); } ++ void movgr2cf (ConditionalFlagRegister cd, Register rj) { emit_int32(insn_RR(movgr2cf_op, (int)rj->encoding(), (int)cd->encoding())); } ++ void movcf2gr (Register rd, ConditionalFlagRegister cj) { emit_int32(insn_RR(movcf2gr_op, (int)cj->encoding(), (int)rd->encoding())); } ++ ++ void fcvt_s_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fcvt_s_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void fcvt_d_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fcvt_d_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void ftintrm_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrm_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrm_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrm_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrp_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrp_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrp_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrp_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrz_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrz_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrz_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrz_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrne_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrne_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrne_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftintrne_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftint_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftint_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftint_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ftint_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ffint_s_w(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_s_w_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ffint_s_l(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_s_l_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ffint_d_w(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_d_w_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void ffint_d_l(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_d_l_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frint_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frint_s_op, (int)fj->encoding(), (int)fd->encoding())); } ++ void frint_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frint_d_op, (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void slti (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(slti_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void sltui (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(sltui_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void addi_w(Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void addi_d(Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void lu52i_d(Register rd, Register rj, int si12) { /*assert(is_simm(si12, 12), "not a signed 12-bit int");*/ emit_int32(insn_I12RR(lu52i_d_op, simm12(si12), (int)rj->encoding(), (int)rd->encoding())); } ++ void andi (Register rd, Register rj, int ui12) { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(andi_op, ui12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ori (Register rd, Register rj, int ui12) { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(ori_op, ui12, (int)rj->encoding(), (int)rd->encoding())); } ++ void xori (Register rd, Register rj, int ui12) { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(xori_op, ui12, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void fmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fnmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fnmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fnmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ void fnmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void fcmp_caf_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cun_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_ceq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_clt_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cle_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cne_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cor_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_saf_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sun_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_seq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_slt_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sle_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sne_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sor_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ ++ void fcmp_caf_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cun_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_ceq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_clt_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cle_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cne_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cor_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_cune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_saf_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sun_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_seq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_slt_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sle_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sne_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sor_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ void fcmp_sune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } ++ ++ void fsel (FloatRegister fd, FloatRegister fj, FloatRegister fk, ConditionalFlagRegister ca) { emit_int32(insn_RRRR(fsel_op, (int)ca->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } ++ ++ void addu16i_d(Register rj, Register rd, int si16) { assert(is_simm(si16, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(addu16i_d_op, si16, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void lu12i_w(Register rj, int si20) { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu12i_w_op, simm20(si20), (int)rj->encoding())); } ++ void lu32i_d(Register rj, int si20) { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu32i_d_op, simm20(si20), (int)rj->encoding())); } ++ void pcaddi(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddi_op, si20, (int)rj->encoding())); } ++ void pcalau12i(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcalau12i_op, si20, (int)rj->encoding())); } ++ void pcaddu12i(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu12i_op, si20, (int)rj->encoding())); } ++ void pcaddu18i(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu18i_op, si20, (int)rj->encoding())); } ++ ++ void ll_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void sc_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void ll_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void sc_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void ldptr_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void stptr_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void ldptr_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ void stptr_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void ld_b (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_b_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_h (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_h_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_d (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void st_b (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_b_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void st_h (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_h_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void st_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void st_d (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_bu (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_bu_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_hu (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_hu_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ld_wu (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_wu_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void preld (int hint, Register rj, int si12) { assert(is_uimm(hint, 5), "not a unsigned 5-bit int"); assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(preld_op, si12, (int)rj->encoding(), hint)); } ++ void fld_s (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); } ++ void fst_s (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); } ++ void fld_d (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); } ++ void fst_d (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); } ++ void ldl_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldl_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ void ldr_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldr_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void ldx_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stx_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stx_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stx_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stx_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_bu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_bu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_hu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_hu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldx_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fldx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } ++ void fldx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } ++ void fstx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } ++ void fstx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } ++ ++ void ld_b (Register rd, Address src); ++ void ld_bu (Register rd, Address src); ++ void ld_d (Register rd, Address src); ++ void ld_h (Register rd, Address src); ++ void ld_hu (Register rd, Address src); ++ void ll_w (Register rd, Address src); ++ void ll_d (Register rd, Address src); ++ void ld_wu (Register rd, Address src); ++ void ld_w (Register rd, Address src); ++ void st_b (Register rd, Address dst); ++ void st_d (Register rd, Address dst); ++ void st_w (Register rd, Address dst); ++ void sc_w (Register rd, Address dst); ++ void sc_d (Register rd, Address dst); ++ void st_h (Register rd, Address dst); ++ void fld_s (FloatRegister fd, Address src); ++ void fld_d (FloatRegister fd, Address src); ++ void fst_s (FloatRegister fd, Address dst); ++ void fst_d (FloatRegister fd, Address dst); ++ ++ void amswap_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amswap_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amadd_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amadd_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rj->encoding())); } ++ void amand_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amand_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amor_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amor_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amxor_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amxor_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_wu (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_du (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_wu (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_du (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amswap_db_w(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amswap_db_d(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amadd_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amadd_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amand_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amand_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amor_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amor_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amxor_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void amxor_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammax_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ammin_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void dbar(int hint) { ++ assert(is_uimm(hint, 15), "not a unsigned 15-bit int"); ++ ++ if (os::is_ActiveCoresMP()) ++ andi(R0, R0, 0); ++ else ++ emit_int32(insn_I15(dbar_op, hint)); ++ } ++ void ibar(int hint) { assert(is_uimm(hint, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(ibar_op, hint)); } ++ ++ void fldgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fldle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fstgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fstgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fstle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void fstle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void ldgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void ldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ void stle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void beqz(Register rj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(beqz_op, offs, (int)rj->encoding())); } ++ void bnez(Register rj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bnez_op, offs, (int)rj->encoding())); } ++ void bceqz(ConditionalFlagRegister cj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b00<<3) | (int)cj->encoding()))); } ++ void bcnez(ConditionalFlagRegister cj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b01<<3) | (int)cj->encoding()))); } ++ ++ void jirl(Register rd, Register rj, int offs) { assert(is_simm(offs, 18) && ((offs & 3) == 0), "not a signed 18-bit int"); emit_int32(insn_I16RR(jirl_op, offs >> 2, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void b(int offs) { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(b_op, offs)); } ++ void bl(int offs) { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(bl_op, offs)); } ++ ++ ++ void beq(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(beq_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void bne(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bne_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void blt(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(blt_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void bge(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bge_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void bltu(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bltu_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ void bgeu(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bgeu_op, offs, (int)rj->encoding(), (int)rd->encoding())); } ++ ++ void beq (Register rj, Register rd, address entry) { beq (rj, rd, offset16(entry)); } ++ void bne (Register rj, Register rd, address entry) { bne (rj, rd, offset16(entry)); } ++ void blt (Register rj, Register rd, address entry) { blt (rj, rd, offset16(entry)); } ++ void bge (Register rj, Register rd, address entry) { bge (rj, rd, offset16(entry)); } ++ void bltu (Register rj, Register rd, address entry) { bltu (rj, rd, offset16(entry)); } ++ void bgeu (Register rj, Register rd, address entry) { bgeu (rj, rd, offset16(entry)); } ++ void beqz (Register rj, address entry) { beqz (rj, offset21(entry)); } ++ void bnez (Register rj, address entry) { bnez (rj, offset21(entry)); } ++ void b(address entry) { b(offset26(entry)); } ++ void bl(address entry) { bl(offset26(entry)); } ++ void bceqz(ConditionalFlagRegister cj, address entry) { bceqz(cj, offset21(entry)); } ++ void bcnez(ConditionalFlagRegister cj, address entry) { bcnez(cj, offset21(entry)); } ++ ++ void beq (Register rj, Register rd, Label& L) { beq (rj, rd, target(L)); } ++ void bne (Register rj, Register rd, Label& L) { bne (rj, rd, target(L)); } ++ void blt (Register rj, Register rd, Label& L) { blt (rj, rd, target(L)); } ++ void bge (Register rj, Register rd, Label& L) { bge (rj, rd, target(L)); } ++ void bltu (Register rj, Register rd, Label& L) { bltu (rj, rd, target(L)); } ++ void bgeu (Register rj, Register rd, Label& L) { bgeu (rj, rd, target(L)); } ++ void beqz (Register rj, Label& L) { beqz (rj, target(L)); } ++ void bnez (Register rj, Label& L) { bnez (rj, target(L)); } ++ void b(Label& L) { b(target(L)); } ++ void bl(Label& L) { bl(target(L)); } ++ void bceqz(ConditionalFlagRegister cj, Label& L) { bceqz(cj, target(L)); } ++ void bcnez(ConditionalFlagRegister cj, Label& L) { bcnez(cj, target(L)); } ++ ++ typedef enum { ++ // hint[4] ++ Completion = 0, ++ Ordering = (1 << 4), ++ ++ // The bitwise-not of the below constants is corresponding to the hint. This is convenient for OR operation. ++ // hint[3:2] and hint[1:0] ++ LoadLoad = ((1 << 3) | (1 << 1)), ++ LoadStore = ((1 << 3) | (1 << 0)), ++ StoreLoad = ((1 << 2) | (1 << 1)), ++ StoreStore = ((1 << 2) | (1 << 0)), ++ AnyAny = ((3 << 2) | (3 << 0)), ++ } Membar_mask_bits; ++ ++ // Serializes memory and blows flags ++ void membar(Membar_mask_bits hint) { ++ assert((hint & (3 << 0)) != 0, "membar mask unsupported!"); ++ assert((hint & (3 << 2)) != 0, "membar mask unsupported!"); ++ dbar(Ordering | (~hint & 0xf)); ++ } ++ ++ // LSX and LASX ++#define ASSERT_LSX assert(UseLSX, ""); ++#define ASSERT_LASX assert(UseLASX, ""); ++ ++ void vadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vadd_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvadd_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsub_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsub_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vaddi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vaddi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vaddi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vaddi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvaddi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvaddi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvaddi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvaddi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsubi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsubi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsubi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsubi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsubi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsubi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsubi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsubi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vneg_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_b_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vneg_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_h_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vneg_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_w_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vneg_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvneg_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvneg_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvneg_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvneg_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vabsd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vabsd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vabsd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vabsd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvabsd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvabsd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvabsd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvabsd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmax_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmax_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmax_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmax_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmax_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmax_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmin_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmin_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmin_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmin_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmin_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmin_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmul_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmul_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmul_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmul_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmul_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmul_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmuh_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmuh_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmuh_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmuh_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmuh_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmuh_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmulwev_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwev_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwev_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwev_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmulwev_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwev_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwev_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwev_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmulwod_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwod_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwod_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmulwod_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmulwod_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwod_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwod_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmulwod_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vmsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvmsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vext2xv_h_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_h_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_w_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_d_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_w_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_d_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vext2xv_hu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_hu_bu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_wu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_bu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_du_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_bu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_wu_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_hu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_du_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_hu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void vext2xv_du_wu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_wu_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vldi(FloatRegister vd, int i13) { ASSERT_LSX emit_int32(insn_I13R( vldi_op, i13, (int)vd->encoding())); } ++ void xvldi(FloatRegister xd, int i13) { ASSERT_LASX emit_int32(insn_I13R(xvldi_op, i13, (int)xd->encoding())); } ++ ++ void vand_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vand_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvand_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvand_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vxor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vxor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvxor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvxor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vnor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vnor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvnor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvnor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vandn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vandn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvandn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvandn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vorn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vorn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvorn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvorn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vandi_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vandi_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvandi_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvandi_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vxori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vxori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvxori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvxori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vnori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vnori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvnori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvnori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsll_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsll_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsll_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsll_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsll_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsll_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsll_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsll_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vslli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vslli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vslli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vslli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvslli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvslli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvslli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvslli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsrl_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrl_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrl_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrl_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsrl_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrl_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrl_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrl_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsrli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vsrli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vsrli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsrli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vsrli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsrli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsra_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsra_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsra_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsra_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsra_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsra_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsra_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsra_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsrai_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vsrai_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrai_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vsrai_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrai_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsrai_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrai_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vsrai_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsrai_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrai_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrai_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrai_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrai_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrai_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsrai_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrai_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vrotr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvrotr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vrotri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vrotri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vrotri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vrotri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vrotri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vrotri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvrotri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvrotri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvrotri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvrotri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvrotri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvrotri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsrlni_b_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vsrlni_b_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrlni_h_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsrlni_h_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrlni_w_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vsrlni_w_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void vsrlni_d_q(FloatRegister vd, FloatRegister vj, int ui7) { ASSERT_LSX emit_int32(insn_I7RR( vsrlni_d_q_op, ui7, (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void vpcnt_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_b_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vpcnt_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_h_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vpcnt_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_w_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vpcnt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvpcnt_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvpcnt_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvpcnt_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvpcnt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitclr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitclr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitclri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vbitclri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vbitclri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vbitclri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitclri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vbitclri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitclri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitclri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitclri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitclri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitclri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitclri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitset_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitset_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitset_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitset_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitset_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitset_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitset_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitset_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitseti_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vbitseti_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitseti_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vbitseti_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitseti_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vbitseti_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitseti_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vbitseti_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitseti_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitseti_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitseti_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitseti_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitseti_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitseti_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitseti_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitseti_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitrev_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrev_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrev_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrev_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitrev_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrev_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrev_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrev_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitrevi_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vbitrevi_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrevi_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vbitrevi_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrevi_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vbitrevi_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vbitrevi_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vbitrevi_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitrevi_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitrevi_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrevi_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitrevi_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrevi_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitrevi_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvbitrevi_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitrevi_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfadd_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfsub_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmul_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmul_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmul_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfdiv_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfdiv_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfdiv_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfdiv_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfdiv_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfdiv_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfnmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfnmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfnmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfnmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfnmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfnmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfnmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfnmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmax_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmax_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmax_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfmin_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmin_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfmin_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfclass_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfclass_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfclass_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfclass_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfclass_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfclass_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfsqrt_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfsqrt_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfsqrt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfsqrt_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfsqrt_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfsqrt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfcvtl_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvtl_s_h_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vfcvtl_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvtl_d_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvfcvtl_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_s_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcvtl_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_d_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfcvth_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvth_s_h_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vfcvth_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvth_d_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvfcvth_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_s_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcvth_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_d_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfcvt_h_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfcvt_h_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcvt_s_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfcvt_s_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfcvt_h_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_h_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcvt_s_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_s_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrintrne_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrne_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrintrne_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrne_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrintrne_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrintrne_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrintrz_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrz_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrintrz_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrz_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrintrz_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrintrz_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrintrp_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrp_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrintrp_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrp_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrintrp_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrintrp_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrintrm_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrm_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrintrm_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrm_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrintrm_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrintrm_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfrint_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrint_s_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void vfrint_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrint_d_op, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvfrint_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfrint_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrne_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrne_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftintrne_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrne_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrne_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftintrne_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrz_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrz_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftintrz_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrz_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrz_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftintrz_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrp_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrp_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftintrp_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrp_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrp_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftintrp_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrm_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrm_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftintrm_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrm_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrm_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftintrm_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftint_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftint_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vftint_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftint_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftint_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvftint_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrne_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrne_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftintrne_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrne_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrz_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrz_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftintrz_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrz_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrp_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrp_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftintrp_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrp_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrm_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrm_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftintrm_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrm_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftint_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftint_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvftint_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftint_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrnel_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrnel_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrnel_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrnel_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrneh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrneh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrneh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrneh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrzl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrzl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrzl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrzh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrzh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrzh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrpl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrpl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrpl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrpl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrph_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrph_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrph_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrph_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrml_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrml_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrml_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrml_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintrmh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrmh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintrmh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrmh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftintl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftintl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vftinth_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftinth_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvftinth_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftinth_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vffint_s_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffint_s_w_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vffint_d_l(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffint_d_l_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvffint_s_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_s_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvffint_d_l(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_d_l_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vffint_s_l(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vffint_s_l_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvffint_s_l(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvffint_s_l_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vffintl_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffintl_d_w_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvffintl_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffintl_d_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vffinth_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffinth_d_w_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvffinth_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffinth_d_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vseq_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vseq_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vseq_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vseq_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvseq_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvseq_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvseq_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvseq_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsle_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsle_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vsle_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vsle_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvsle_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvsle_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vslt_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvslt_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vslt_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vslt_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvslt_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslt_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vslti_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslti_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslti_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void vslti_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvslti_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslti_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslti_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvslti_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vfcmp_caf_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cun_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_ceq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_clt_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cle_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cne_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cor_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_saf_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sun_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_seq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_slt_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sle_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sne_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sor_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void vfcmp_caf_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cun_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_ceq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_clt_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cle_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cne_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cor_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_cune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_saf_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sun_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_seq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_slt_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sle_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sne_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sor_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vfcmp_sune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void xvfcmp_caf_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cun_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_ceq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_clt_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cle_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cne_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cor_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_saf_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sun_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_seq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_slt_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sle_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sne_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sor_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvfcmp_caf_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cun_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_ceq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_clt_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cle_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cne_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cor_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_cune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_saf_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sun_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_seq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_slt_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sle_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sne_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sor_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvfcmp_sune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vbitsel_v(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vbitsel_v_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvbitsel_v(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvbitsel_v_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vinsgr2vr_b(FloatRegister vd, Register rj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vinsgr2vr_b_op, ui4, (int)rj->encoding(), (int)vd->encoding())); } ++ void vinsgr2vr_h(FloatRegister vd, Register rj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vinsgr2vr_h_op, ui3, (int)rj->encoding(), (int)vd->encoding())); } ++ void vinsgr2vr_w(FloatRegister vd, Register rj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR( vinsgr2vr_w_op, ui2, (int)rj->encoding(), (int)vd->encoding())); } ++ void vinsgr2vr_d(FloatRegister vd, Register rj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR( vinsgr2vr_d_op, ui1, (int)rj->encoding(), (int)vd->encoding())); } ++ ++ void xvinsgr2vr_w(FloatRegister xd, Register rj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsgr2vr_w_op, ui3, (int)rj->encoding(), (int)xd->encoding())); } ++ void xvinsgr2vr_d(FloatRegister xd, Register rj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsgr2vr_d_op, ui2, (int)rj->encoding(), (int)xd->encoding())); } ++ ++ void vpickve2gr_b(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vpickve2gr_b_op, ui4, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_h(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vpickve2gr_h_op, ui3, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_w(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR( vpickve2gr_w_op, ui2, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_d(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR( vpickve2gr_d_op, ui1, (int)vj->encoding(), (int)rd->encoding())); } ++ ++ void vpickve2gr_bu(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vpickve2gr_bu_op, ui4, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_hu(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vpickve2gr_hu_op, ui3, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_wu(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR( vpickve2gr_wu_op, ui2, (int)vj->encoding(), (int)rd->encoding())); } ++ void vpickve2gr_du(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR( vpickve2gr_du_op, ui1, (int)vj->encoding(), (int)rd->encoding())); } ++ ++ void xvpickve2gr_w(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_w_op, ui3, (int)xj->encoding(), (int)rd->encoding())); } ++ void xvpickve2gr_d(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_d_op, ui2, (int)xj->encoding(), (int)rd->encoding())); } ++ ++ void xvpickve2gr_wu(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_wu_op, ui3, (int)xj->encoding(), (int)rd->encoding())); } ++ void xvpickve2gr_du(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_du_op, ui2, (int)xj->encoding(), (int)rd->encoding())); } ++ ++ void vreplgr2vr_b(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_b_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vreplgr2vr_h(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_h_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vreplgr2vr_w(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_w_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void vreplgr2vr_d(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_d_op, (int)rj->encoding(), (int)vd->encoding())); } ++ void xvreplgr2vr_b(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_b_op, (int)rj->encoding(), (int)xd->encoding())); } ++ void xvreplgr2vr_h(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_h_op, (int)rj->encoding(), (int)xd->encoding())); } ++ void xvreplgr2vr_w(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_w_op, (int)rj->encoding(), (int)xd->encoding())); } ++ void xvreplgr2vr_d(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_d_op, (int)rj->encoding(), (int)xd->encoding())); } ++ ++ void vreplvei_b(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR(vreplvei_b_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } ++ void vreplvei_h(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR(vreplvei_h_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } ++ void vreplvei_w(FloatRegister vd, FloatRegister vj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR(vreplvei_w_op, ui2, (int)vj->encoding(), (int)vd->encoding())); } ++ void vreplvei_d(FloatRegister vd, FloatRegister vj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR(vreplvei_d_op, ui1, (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void xvreplve0_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_b_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvreplve0_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_h_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvreplve0_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_w_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvreplve0_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_d_op, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvreplve0_q(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_q_op, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvinsve0_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsve0_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvinsve0_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsve0_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvpickve_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvpickve_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vshuf_b(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vshuf_b_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void xvshuf_b(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvshuf_b_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vshuf_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vshuf_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vshuf_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vshuf_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ void vshuf_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vshuf_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } ++ ++ void xvshuf_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvshuf_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ void xvshuf_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvperm_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvperm_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vshuf4i_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void vshuf4i_h(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_h_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void vshuf4i_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvshuf4i_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvshuf4i_h(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_h_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ void xvshuf4i_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vshuf4i_d(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_d_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvshuf4i_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vpermi_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vpermi_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } ++ void xvpermi_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvpermi_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvpermi_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvpermi_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void xvpermi_q(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvpermi_q_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } ++ ++ void vld(FloatRegister vd, Register rj, int si12) { ASSERT_LSX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vld_op, si12, (int)rj->encoding(), (int)vd->encoding()));} ++ void xvld(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvld_op, si12, (int)rj->encoding(), (int)xd->encoding()));} ++ ++ void vst(FloatRegister vd, Register rj, int si12) { ASSERT_LSX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vst_op, si12, (int)rj->encoding(), (int)vd->encoding()));} ++ void xvst(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvst_op, si12, (int)rj->encoding(), (int)xd->encoding()));} ++ ++ void vldx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX emit_int32(insn_RRR( vldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); } ++ void xvldx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); } ++ ++ void vstx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX emit_int32(insn_RRR( vstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); } ++ void xvstx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); } ++ ++#undef ASSERT_LSX ++#undef ASSERT_LASX ++ ++public: ++ // Creation ++ Assembler(CodeBuffer* code) : AbstractAssembler(code) {} ++ ++ // Decoding ++ static address locate_operand(address inst, WhichOperand which); ++ static address locate_next_instruction(address inst); ++}; ++ ++#endif // CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/assembler_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.inline.hpp +new file mode 100644 +index 0000000000..601f4afe6f +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.inline.hpp +@@ -0,0 +1,33 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_INLINE_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "asm/codeBuffer.hpp" ++#include "code/codeCache.hpp" ++ ++#endif // CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_INLINE_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.hpp +new file mode 100644 +index 0000000000..32775e9bc3 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.hpp +@@ -0,0 +1,110 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_HPP ++ ++// Platform specific for C++ based Interpreter ++#define LOTS_OF_REGS /* Lets interpreter use plenty of registers */ ++ ++private: ++ ++ // save the bottom of the stack after frame manager setup. For ease of restoration after return ++ // from recursive interpreter call ++ intptr_t* _frame_bottom; /* saved bottom of frame manager frame */ ++ intptr_t* _last_Java_pc; /* pc to return to in frame manager */ ++ intptr_t* _sender_sp; /* sender's sp before stack (locals) extension */ ++ interpreterState _self_link; /* Previous interpreter state */ /* sometimes points to self??? */ ++ double _native_fresult; /* save result of native calls that might return floats */ ++ intptr_t _native_lresult; /* save result of native calls that might return handle/longs */ ++public: ++ ++ static void pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp); ++ inline intptr_t* sender_sp() { ++ return _sender_sp; ++ } ++ ++ ++#define SET_LAST_JAVA_FRAME() ++ ++#define RESET_LAST_JAVA_FRAME() THREAD->frame_anchor()->set_flags(0); ++ ++/* ++ * Macros for accessing the stack. ++ */ ++#undef STACK_INT ++#undef STACK_FLOAT ++#undef STACK_ADDR ++#undef STACK_OBJECT ++#undef STACK_DOUBLE ++#undef STACK_LONG ++ ++// JavaStack Implementation ++ ++#define GET_STACK_SLOT(offset) (*((intptr_t*) &topOfStack[-(offset)])) ++#define STACK_SLOT(offset) ((address) &topOfStack[-(offset)]) ++#define STACK_ADDR(offset) (*((address *) &topOfStack[-(offset)])) ++#define STACK_INT(offset) (*((jint*) &topOfStack[-(offset)])) ++#define STACK_FLOAT(offset) (*((jfloat *) &topOfStack[-(offset)])) ++#define STACK_OBJECT(offset) (*((oop *) &topOfStack [-(offset)])) ++#define STACK_DOUBLE(offset) (((VMJavaVal64*) &topOfStack[-(offset)])->d) ++#define STACK_LONG(offset) (((VMJavaVal64 *) &topOfStack[-(offset)])->l) ++ ++#define SET_STACK_SLOT(value, offset) (*(intptr_t*)&topOfStack[-(offset)] = *(intptr_t*)(value)) ++#define SET_STACK_ADDR(value, offset) (*((address *)&topOfStack[-(offset)]) = (value)) ++#define SET_STACK_INT(value, offset) (*((jint *)&topOfStack[-(offset)]) = (value)) ++#define SET_STACK_FLOAT(value, offset) (*((jfloat *)&topOfStack[-(offset)]) = (value)) ++#define SET_STACK_OBJECT(value, offset) (*((oop *)&topOfStack[-(offset)]) = (value)) ++#define SET_STACK_DOUBLE(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = (value)) ++#define SET_STACK_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = \ ++ ((VMJavaVal64*)(addr))->d) ++#define SET_STACK_LONG(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->l = (value)) ++#define SET_STACK_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->l = \ ++ ((VMJavaVal64*)(addr))->l) ++// JavaLocals implementation ++ ++#define LOCALS_SLOT(offset) ((intptr_t*)&locals[-(offset)]) ++#define LOCALS_ADDR(offset) ((address)locals[-(offset)]) ++#define LOCALS_INT(offset) (*((jint*)&locals[-(offset)])) ++#define LOCALS_FLOAT(offset) (*((jfloat*)&locals[-(offset)])) ++#define LOCALS_OBJECT(offset) ((oop)locals[-(offset)]) ++#define LOCALS_DOUBLE(offset) (((VMJavaVal64*)&locals[-((offset) + 1)])->d) ++#define LOCALS_LONG(offset) (((VMJavaVal64*)&locals[-((offset) + 1)])->l) ++#define LOCALS_LONG_AT(offset) (((address)&locals[-((offset) + 1)])) ++#define LOCALS_DOUBLE_AT(offset) (((address)&locals[-((offset) + 1)])) ++ ++#define SET_LOCALS_SLOT(value, offset) (*(intptr_t*)&locals[-(offset)] = *(intptr_t *)(value)) ++#define SET_LOCALS_ADDR(value, offset) (*((address *)&locals[-(offset)]) = (value)) ++#define SET_LOCALS_INT(value, offset) (*((jint *)&locals[-(offset)]) = (value)) ++#define SET_LOCALS_FLOAT(value, offset) (*((jfloat *)&locals[-(offset)]) = (value)) ++#define SET_LOCALS_OBJECT(value, offset) (*((oop *)&locals[-(offset)]) = (value)) ++#define SET_LOCALS_DOUBLE(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = (value)) ++#define SET_LOCALS_LONG(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = (value)) ++#define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = \ ++ ((VMJavaVal64*)(addr))->d) ++#define SET_LOCALS_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = \ ++ ((VMJavaVal64*)(addr))->l) ++ ++#endif // CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.inline.hpp +new file mode 100644 +index 0000000000..07df527e94 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.inline.hpp +@@ -0,0 +1,286 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_INLINE_HPP ++ ++// Inline interpreter functions for LoongArch ++ ++inline jfloat BytecodeInterpreter::VMfloatAdd(jfloat op1, jfloat op2) { return op1 + op2; } ++inline jfloat BytecodeInterpreter::VMfloatSub(jfloat op1, jfloat op2) { return op1 - op2; } ++inline jfloat BytecodeInterpreter::VMfloatMul(jfloat op1, jfloat op2) { return op1 * op2; } ++inline jfloat BytecodeInterpreter::VMfloatDiv(jfloat op1, jfloat op2) { return op1 / op2; } ++inline jfloat BytecodeInterpreter::VMfloatRem(jfloat op1, jfloat op2) { return fmod(op1, op2); } ++ ++inline jfloat BytecodeInterpreter::VMfloatNeg(jfloat op) { return -op; } ++ ++inline int32_t BytecodeInterpreter::VMfloatCompare(jfloat op1, jfloat op2, int32_t direction) { ++ return ( op1 < op2 ? -1 : ++ op1 > op2 ? 1 : ++ op1 == op2 ? 0 : ++ (direction == -1 || direction == 1) ? direction : 0); ++ ++} ++ ++inline void BytecodeInterpreter::VMmemCopy64(uint32_t to[2], const uint32_t from[2]) { ++ // x86 can do unaligned copies but not 64bits at a time ++ to[0] = from[0]; to[1] = from[1]; ++} ++ ++// The long operations depend on compiler support for "long long" on x86 ++ ++inline jlong BytecodeInterpreter::VMlongAdd(jlong op1, jlong op2) { ++ return op1 + op2; ++} ++ ++inline jlong BytecodeInterpreter::VMlongAnd(jlong op1, jlong op2) { ++ return op1 & op2; ++} ++ ++inline jlong BytecodeInterpreter::VMlongDiv(jlong op1, jlong op2) { ++ // QQQ what about check and throw... ++ return op1 / op2; ++} ++ ++inline jlong BytecodeInterpreter::VMlongMul(jlong op1, jlong op2) { ++ return op1 * op2; ++} ++ ++inline jlong BytecodeInterpreter::VMlongOr(jlong op1, jlong op2) { ++ return op1 | op2; ++} ++ ++inline jlong BytecodeInterpreter::VMlongSub(jlong op1, jlong op2) { ++ return op1 - op2; ++} ++ ++inline jlong BytecodeInterpreter::VMlongXor(jlong op1, jlong op2) { ++ return op1 ^ op2; ++} ++ ++inline jlong BytecodeInterpreter::VMlongRem(jlong op1, jlong op2) { ++ return op1 % op2; ++} ++ ++inline jlong BytecodeInterpreter::VMlongUshr(jlong op1, jint op2) { ++ // CVM did this 0x3f mask, is the really needed??? QQQ ++ return ((unsigned long long) op1) >> (op2 & 0x3F); ++} ++ ++inline jlong BytecodeInterpreter::VMlongShr(jlong op1, jint op2) { ++ return op1 >> (op2 & 0x3F); ++} ++ ++inline jlong BytecodeInterpreter::VMlongShl(jlong op1, jint op2) { ++ return op1 << (op2 & 0x3F); ++} ++ ++inline jlong BytecodeInterpreter::VMlongNeg(jlong op) { ++ return -op; ++} ++ ++inline jlong BytecodeInterpreter::VMlongNot(jlong op) { ++ return ~op; ++} ++ ++inline int32_t BytecodeInterpreter::VMlongLtz(jlong op) { ++ return (op <= 0); ++} ++ ++inline int32_t BytecodeInterpreter::VMlongGez(jlong op) { ++ return (op >= 0); ++} ++ ++inline int32_t BytecodeInterpreter::VMlongEqz(jlong op) { ++ return (op == 0); ++} ++ ++inline int32_t BytecodeInterpreter::VMlongEq(jlong op1, jlong op2) { ++ return (op1 == op2); ++} ++ ++inline int32_t BytecodeInterpreter::VMlongNe(jlong op1, jlong op2) { ++ return (op1 != op2); ++} ++ ++inline int32_t BytecodeInterpreter::VMlongGe(jlong op1, jlong op2) { ++ return (op1 >= op2); ++} ++ ++inline int32_t BytecodeInterpreter::VMlongLe(jlong op1, jlong op2) { ++ return (op1 <= op2); ++} ++ ++inline int32_t BytecodeInterpreter::VMlongLt(jlong op1, jlong op2) { ++ return (op1 < op2); ++} ++ ++inline int32_t BytecodeInterpreter::VMlongGt(jlong op1, jlong op2) { ++ return (op1 > op2); ++} ++ ++inline int32_t BytecodeInterpreter::VMlongCompare(jlong op1, jlong op2) { ++ return (VMlongLt(op1, op2) ? -1 : VMlongGt(op1, op2) ? 1 : 0); ++} ++ ++// Long conversions ++ ++inline jdouble BytecodeInterpreter::VMlong2Double(jlong val) { ++ return (jdouble) val; ++} ++ ++inline jfloat BytecodeInterpreter::VMlong2Float(jlong val) { ++ return (jfloat) val; ++} ++ ++inline jint BytecodeInterpreter::VMlong2Int(jlong val) { ++ return (jint) val; ++} ++ ++// Double Arithmetic ++ ++inline jdouble BytecodeInterpreter::VMdoubleAdd(jdouble op1, jdouble op2) { ++ return op1 + op2; ++} ++ ++inline jdouble BytecodeInterpreter::VMdoubleDiv(jdouble op1, jdouble op2) { ++ // Divide by zero... QQQ ++ return op1 / op2; ++} ++ ++inline jdouble BytecodeInterpreter::VMdoubleMul(jdouble op1, jdouble op2) { ++ return op1 * op2; ++} ++ ++inline jdouble BytecodeInterpreter::VMdoubleNeg(jdouble op) { ++ return -op; ++} ++ ++inline jdouble BytecodeInterpreter::VMdoubleRem(jdouble op1, jdouble op2) { ++ return fmod(op1, op2); ++} ++ ++inline jdouble BytecodeInterpreter::VMdoubleSub(jdouble op1, jdouble op2) { ++ return op1 - op2; ++} ++ ++inline int32_t BytecodeInterpreter::VMdoubleCompare(jdouble op1, jdouble op2, int32_t direction) { ++ return ( op1 < op2 ? -1 : ++ op1 > op2 ? 1 : ++ op1 == op2 ? 0 : ++ (direction == -1 || direction == 1) ? direction : 0); ++} ++ ++// Double Conversions ++ ++inline jfloat BytecodeInterpreter::VMdouble2Float(jdouble val) { ++ return (jfloat) val; ++} ++ ++// Float Conversions ++ ++inline jdouble BytecodeInterpreter::VMfloat2Double(jfloat op) { ++ return (jdouble) op; ++} ++ ++// Integer Arithmetic ++ ++inline jint BytecodeInterpreter::VMintAdd(jint op1, jint op2) { ++ return op1 + op2; ++} ++ ++inline jint BytecodeInterpreter::VMintAnd(jint op1, jint op2) { ++ return op1 & op2; ++} ++ ++inline jint BytecodeInterpreter::VMintDiv(jint op1, jint op2) { ++ // it's possible we could catch this special case implicitly ++ if ((juint)op1 == 0x80000000 && op2 == -1) return op1; ++ else return op1 / op2; ++} ++ ++inline jint BytecodeInterpreter::VMintMul(jint op1, jint op2) { ++ return op1 * op2; ++} ++ ++inline jint BytecodeInterpreter::VMintNeg(jint op) { ++ return -op; ++} ++ ++inline jint BytecodeInterpreter::VMintOr(jint op1, jint op2) { ++ return op1 | op2; ++} ++ ++inline jint BytecodeInterpreter::VMintRem(jint op1, jint op2) { ++ // it's possible we could catch this special case implicitly ++ if ((juint)op1 == 0x80000000 && op2 == -1) return 0; ++ else return op1 % op2; ++} ++ ++inline jint BytecodeInterpreter::VMintShl(jint op1, jint op2) { ++ return op1 << op2; ++} ++ ++inline jint BytecodeInterpreter::VMintShr(jint op1, jint op2) { ++ return op1 >> (op2 & 0x1f); // QQ op2 & 0x1f?? ++} ++ ++inline jint BytecodeInterpreter::VMintSub(jint op1, jint op2) { ++ return op1 - op2; ++} ++ ++inline jint BytecodeInterpreter::VMintUshr(jint op1, jint op2) { ++ return ((juint) op1) >> (op2 & 0x1f); // QQ op2 & 0x1f?? ++} ++ ++inline jint BytecodeInterpreter::VMintXor(jint op1, jint op2) { ++ return op1 ^ op2; ++} ++ ++inline jdouble BytecodeInterpreter::VMint2Double(jint val) { ++ return (jdouble) val; ++} ++ ++inline jfloat BytecodeInterpreter::VMint2Float(jint val) { ++ return (jfloat) val; ++} ++ ++inline jlong BytecodeInterpreter::VMint2Long(jint val) { ++ return (jlong) val; ++} ++ ++inline jchar BytecodeInterpreter::VMint2Char(jint val) { ++ return (jchar) val; ++} ++ ++inline jshort BytecodeInterpreter::VMint2Short(jint val) { ++ return (jshort) val; ++} ++ ++inline jbyte BytecodeInterpreter::VMint2Byte(jint val) { ++ return (jbyte) val; ++} ++ ++#endif // CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_INLINE_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.cpp +new file mode 100644 +index 0000000000..8641090584 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.cpp +@@ -0,0 +1,38 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/bytecodes.hpp" ++ ++ ++void Bytecodes::pd_initialize() { ++ // No LoongArch specific initialization ++} ++ ++ ++Bytecodes::Code Bytecodes::pd_base_code_for(Code code) { ++ // No LoongArch specific bytecodes ++ return code; ++} +diff --git a/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.hpp +new file mode 100644 +index 0000000000..fbdf531996 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.hpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_BYTECODES_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_BYTECODES_LOONGARCH_HPP ++ ++// No Loongson specific bytecodes ++ ++#endif // CPU_LOONGARCH_VM_BYTECODES_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/bytes_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/bytes_loongarch.hpp +new file mode 100644 +index 0000000000..8f766a617e +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/bytes_loongarch.hpp +@@ -0,0 +1,75 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_BYTES_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_BYTES_LOONGARCH_HPP ++ ++#include "memory/allocation.hpp" ++ ++class Bytes: AllStatic { ++ public: ++ // Returns true if the byte ordering used by Java is different from the native byte ordering ++ // of the underlying machine. For example, this is true for Intel x86, but false for Solaris ++ // on Sparc. ++ // we use LoongArch, so return true ++ static inline bool is_Java_byte_ordering_different(){ return true; } ++ ++ ++ // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering ++ // (no special code is needed since LoongArch CPUs can access unaligned data) ++ static inline u2 get_native_u2(address p) { return *(u2*)p; } ++ static inline u4 get_native_u4(address p) { return *(u4*)p; } ++ static inline u8 get_native_u8(address p) { return *(u8*)p; } ++ ++ static inline void put_native_u2(address p, u2 x) { *(u2*)p = x; } ++ static inline void put_native_u4(address p, u4 x) { *(u4*)p = x; } ++ static inline void put_native_u8(address p, u8 x) { *(u8*)p = x; } ++ ++ ++ // Efficient reading and writing of unaligned unsigned data in Java ++ // byte ordering (i.e. big-endian ordering). Byte-order reversal is ++ // needed since LoongArch64 CPUs use little-endian format. ++ static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } ++ static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } ++ static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } ++ ++ static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); } ++ static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); } ++ static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } ++ ++ ++ // Efficient swapping of byte ordering ++ static inline u2 swap_u2(u2 x); // compiler-dependent implementation ++ static inline u4 swap_u4(u4 x); // compiler-dependent implementation ++ static inline u8 swap_u8(u8 x); ++}; ++ ++ ++// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base] ++#ifdef TARGET_OS_ARCH_linux_loongarch ++# include "bytes_linux_loongarch.inline.hpp" ++#endif ++ ++#endif // CPU_LOONGARCH_VM_BYTES_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/c1_CodeStubs_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_CodeStubs_loongarch_64.cpp +new file mode 100644 +index 0000000000..5166acfa2b +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/c1_CodeStubs_loongarch_64.cpp +@@ -0,0 +1,387 @@ ++/* ++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "c1/c1_CodeStubs.hpp" ++#include "c1/c1_FrameMap.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "classfile/javaClasses.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_loongarch.inline.hpp" ++#if INCLUDE_ALL_GCS ++#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" ++#endif ++ ++#define A0 RA0 ++#define A3 RA3 ++ ++#define __ ce->masm()-> ++ ++void CounterOverflowStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ ce->store_parameter(_method->as_register(), 1); ++ ce->store_parameter(_bci, 0); ++ __ call(Runtime1::entry_for(Runtime1::counter_overflow_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ __ b(_continuation); ++} ++ ++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, ++ bool throw_index_out_of_bounds_exception) ++ : _throw_index_out_of_bounds_exception(throw_index_out_of_bounds_exception) ++ , _index(index) ++{ ++ assert(info != NULL, "must have info"); ++ _info = new CodeEmitInfo(info); ++} ++ ++void RangeCheckStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ if (_info->deoptimize_on_exception()) { ++ address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); ++ __ call(a, relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++ return; ++ } ++ ++ if (_index->is_cpu_register()) { ++ __ move(SCR1, _index->as_register()); ++ } else { ++ __ li(SCR1, _index->as_jint()); ++ } ++ Runtime1::StubID stub_id; ++ if (_throw_index_out_of_bounds_exception) { ++ stub_id = Runtime1::throw_index_exception_id; ++ } else { ++ stub_id = Runtime1::throw_range_check_failed_id; ++ } ++ __ call(Runtime1::entry_for(stub_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { ++ _info = new CodeEmitInfo(info); ++} ++ ++void PredicateFailedStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); ++ __ call(a, relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++void DivByZeroStub::emit_code(LIR_Assembler* ce) { ++ if (_offset != -1) { ++ ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); ++ } ++ __ bind(_entry); ++ __ call(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++#ifdef ASSERT ++ __ should_not_reach_here(); ++#endif ++} ++ ++// Implementation of NewInstanceStub ++ ++NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, ++ CodeEmitInfo* info, Runtime1::StubID stub_id) { ++ _result = result; ++ _klass = klass; ++ _klass_reg = klass_reg; ++ _info = new CodeEmitInfo(info); ++ assert(stub_id == Runtime1::new_instance_id || ++ stub_id == Runtime1::fast_new_instance_id || ++ stub_id == Runtime1::fast_new_instance_init_check_id, ++ "need new_instance id"); ++ _stub_id = stub_id; ++} ++ ++void NewInstanceStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ __ move(A3, _klass_reg->as_register()); ++ __ call(Runtime1::entry_for(_stub_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ assert(_result->as_register() == A0, "result must in A0"); ++ __ b(_continuation); ++} ++ ++// Implementation of NewTypeArrayStub ++ ++NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, ++ CodeEmitInfo* info) { ++ _klass_reg = klass_reg; ++ _length = length; ++ _result = result; ++ _info = new CodeEmitInfo(info); ++} ++ ++void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ assert(_length->as_register() == S0, "length must in S0,"); ++ assert(_klass_reg->as_register() == A3, "klass_reg must in A3"); ++ __ call(Runtime1::entry_for(Runtime1::new_type_array_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ assert(_result->as_register() == A0, "result must in A0"); ++ __ b(_continuation); ++} ++ ++// Implementation of NewObjectArrayStub ++ ++NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, ++ CodeEmitInfo* info) { ++ _klass_reg = klass_reg; ++ _result = result; ++ _length = length; ++ _info = new CodeEmitInfo(info); ++} ++ ++void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ assert(_length->as_register() == S0, "length must in S0,"); ++ assert(_klass_reg->as_register() == A3, "klass_reg must in A3"); ++ __ call(Runtime1::entry_for(Runtime1::new_object_array_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ assert(_result->as_register() == A0, "result must in A0"); ++ __ b(_continuation); ++} ++ ++// Implementation of MonitorAccessStubs ++ ++MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) ++ : MonitorAccessStub(obj_reg, lock_reg) { ++ _info = new CodeEmitInfo(info); ++} ++ ++void MonitorEnterStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ ce->store_parameter(_obj_reg->as_register(), 1); ++ ce->store_parameter(_lock_reg->as_register(), 0); ++ Runtime1::StubID enter_id; ++ if (ce->compilation()->has_fpu_code()) { ++ enter_id = Runtime1::monitorenter_id; ++ } else { ++ enter_id = Runtime1::monitorenter_nofpu_id; ++ } ++ __ call(Runtime1::entry_for(enter_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ __ b(_continuation); ++} ++ ++void MonitorExitStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ if (_compute_lock) { ++ // lock_reg was destroyed by fast unlocking attempt => recompute it ++ ce->monitor_address(_monitor_ix, _lock_reg); ++ } ++ ce->store_parameter(_lock_reg->as_register(), 0); ++ // note: non-blocking leaf routine => no call info needed ++ Runtime1::StubID exit_id; ++ if (ce->compilation()->has_fpu_code()) { ++ exit_id = Runtime1::monitorexit_id; ++ } else { ++ exit_id = Runtime1::monitorexit_nofpu_id; ++ } ++ __ lipc(RA, _continuation); ++ __ jmp(Runtime1::entry_for(exit_id), relocInfo::runtime_call_type); ++} ++ ++// Implementation of patching: ++// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes) ++// - Replace original code with a call to the stub ++// At Runtime: ++// - call to stub, jump to runtime ++// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object) ++// - in runtime: after initializing class, restore original code, reexecute instruction ++ ++int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size; ++ ++void PatchingStub::align_patch_site(MacroAssembler* masm) { ++} ++ ++void PatchingStub::emit_code(LIR_Assembler* ce) { ++ assert(false, "LoongArch64 should not use C1 runtime patching"); ++} ++ ++void DeoptimizeStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ __ call(Runtime1::entry_for(Runtime1::deoptimize_id), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ DEBUG_ONLY(__ should_not_reach_here()); ++} ++ ++void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { ++ address a; ++ if (_info->deoptimize_on_exception()) { ++ // Deoptimize, do not throw the exception, because it is probably wrong to do it here. ++ a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); ++ } else { ++ a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id); ++ } ++ ++ ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); ++ __ bind(_entry); ++ __ call(a, relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ ++ __ bind(_entry); ++ // pass the object in a scratch register because all other registers ++ // must be preserved ++ if (_obj->is_cpu_register()) { ++ __ move(SCR1, _obj->as_register()); ++ } ++ __ call(Runtime1::entry_for(_stub), relocInfo::runtime_call_type); ++ ce->add_call_info_here(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++void ArrayCopyStub::emit_code(LIR_Assembler* ce) { ++ //---------------slow case: call to native----------------- ++ __ bind(_entry); ++ // Figure out where the args should go ++ // This should really convert the IntrinsicID to the Method* and signature ++ // but I don't know how to do that. ++ // ++ VMRegPair args[5]; ++ BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT}; ++ SharedRuntime::java_calling_convention(signature, args, 5, true); ++ ++ // push parameters ++ // (src, src_pos, dest, destPos, length) ++ Register r[5]; ++ r[0] = src()->as_register(); ++ r[1] = src_pos()->as_register(); ++ r[2] = dst()->as_register(); ++ r[3] = dst_pos()->as_register(); ++ r[4] = length()->as_register(); ++ ++ // next registers will get stored on the stack ++ for (int i = 0; i < 5 ; i++ ) { ++ VMReg r_1 = args[i].first(); ++ if (r_1->is_stack()) { ++ int st_off = r_1->reg2stack() * wordSize; ++ __ stptr_d (r[i], SP, st_off); ++ } else { ++ assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg "); ++ } ++ } ++ ++ ce->align_call(lir_static_call); ++ ++ ce->emit_static_call_stub(); ++ if (ce->compilation()->bailed_out()) { ++ return; // CodeCache is full ++ } ++ AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(), ++ relocInfo::static_call_type); ++ address call = __ trampoline_call(resolve); ++ if (call == NULL) { ++ ce->bailout("trampoline stub overflow"); ++ return; ++ } ++ ce->add_call_info_here(info()); ++ ++#ifndef PRODUCT ++ __ li(SCR2, (address)&Runtime1::_arraycopy_slowcase_cnt); ++ __ increment(Address(SCR2)); ++#endif ++ ++ __ b(_continuation); ++} ++ ++///////////////////////////////////////////////////////////////////////////// ++#if INCLUDE_ALL_GCS ++ ++void G1PreBarrierStub::emit_code(LIR_Assembler* ce) { ++ // At this point we know that marking is in progress. ++ // If do_load() is true then we have to emit the ++ // load of the previous value; otherwise it has already ++ // been loaded into _pre_val. ++ ++ __ bind(_entry); ++ assert(pre_val()->is_register(), "Precondition."); ++ ++ Register pre_val_reg = pre_val()->as_register(); ++ ++ if (do_load()) { ++ ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/); ++ } ++ __ beqz(pre_val_reg, _continuation); ++ ce->store_parameter(pre_val()->as_register(), 0); ++ __ call(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id), relocInfo::runtime_call_type); ++ __ b(_continuation); ++} ++ ++jbyte* G1PostBarrierStub::_byte_map_base = NULL; ++ ++jbyte* G1PostBarrierStub::byte_map_base_slow() { ++ BarrierSet* bs = Universe::heap()->barrier_set(); ++ assert(bs->is_a(BarrierSet::G1SATBCTLogging), ++ "Must be if we're using this."); ++ return ((G1SATBCardTableModRefBS*)bs)->byte_map_base; ++} ++ ++ ++void G1PostBarrierStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ assert(addr()->is_register(), "Precondition."); ++ assert(new_val()->is_register(), "Precondition."); ++ Register new_val_reg = new_val()->as_register(); ++ __ beqz(new_val_reg, _continuation); ++ ce->store_parameter(addr()->as_pointer_register(), 0); ++ __ call(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id), relocInfo::runtime_call_type); ++ __ b(_continuation); ++} ++ ++#endif // INCLUDE_ALL_GCS ++///////////////////////////////////////////////////////////////////////////// ++ ++#undef __ +diff --git a/hotspot/src/cpu/loongarch/vm/c1_Defs_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_Defs_loongarch.hpp +new file mode 100644 +index 0000000000..1140e44431 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/c1_Defs_loongarch.hpp +@@ -0,0 +1,79 @@ ++/* ++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP ++ ++// native word offsets from memory address (little endian) ++enum { ++ pd_lo_word_offset_in_bytes = 0, ++ pd_hi_word_offset_in_bytes = BytesPerWord ++}; ++ ++// explicit rounding operations are required to implement the strictFP mode ++enum { ++ pd_strict_fp_requires_explicit_rounding = false ++}; ++ ++// FIXME: There are no callee-saved ++ ++// registers ++enum { ++ pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers, // number of registers used during code emission ++ pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers, // number of registers used during code emission ++ ++ pd_nof_caller_save_cpu_regs_frame_map = 15, // number of registers killed by calls ++ pd_nof_caller_save_fpu_regs_frame_map = 32, // number of registers killed by calls ++ ++ pd_first_callee_saved_reg = pd_nof_caller_save_cpu_regs_frame_map, ++ pd_last_callee_saved_reg = 21, ++ ++ pd_last_allocatable_cpu_reg = pd_nof_caller_save_cpu_regs_frame_map - 1, ++ ++ pd_nof_cpu_regs_reg_alloc = pd_nof_caller_save_cpu_regs_frame_map, // number of registers that are visible to register allocator ++ pd_nof_fpu_regs_reg_alloc = 32, // number of registers that are visible to register allocator ++ ++ pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan ++ pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of registers visible to linear scan ++ pd_nof_xmm_regs_linearscan = 0, // don't have vector registers ++ pd_first_cpu_reg = 0, ++ pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1, ++ pd_first_byte_reg = 0, ++ pd_last_byte_reg = pd_nof_cpu_regs_reg_alloc - 1, ++ pd_first_fpu_reg = pd_nof_cpu_regs_frame_map, ++ pd_last_fpu_reg = pd_first_fpu_reg + 31, ++ ++ pd_first_callee_saved_fpu_reg = 24 + pd_first_fpu_reg, ++ pd_last_callee_saved_fpu_reg = 31 + pd_first_fpu_reg, ++}; ++ ++// Encoding of float value in debug info. This is true on x86 where ++// floats are extended to doubles when stored in the stack, false for ++// LoongArch64 where floats and doubles are stored in their native form. ++enum { ++ pd_float_saved_as_double = false ++}; ++ ++#endif // CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch.hpp +new file mode 100644 +index 0000000000..bd8578c72a +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch.hpp +@@ -0,0 +1,32 @@ ++/* ++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP ++ ++// No FPU stack on LoongArch ++class FpuStackSim; ++ ++#endif // CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch_64.cpp +new file mode 100644 +index 0000000000..1a89c437a8 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch_64.cpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++//-------------------------------------------------------- ++// FpuStackSim ++//-------------------------------------------------------- ++ ++// No FPU stack on LoongArch64 ++#include "precompiled.hpp" +diff --git a/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch.hpp +new file mode 100644 +index 0000000000..4f0cf05361 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch.hpp +@@ -0,0 +1,143 @@ ++/* ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP ++ ++// On LoongArch64 the frame looks as follows: ++// ++// +-----------------------------+---------+----------------------------------------+----------------+----------- ++// | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling . ++// +-----------------------------+---------+----------------------------------------+----------------+----------- ++ ++ public: ++ static const int pd_c_runtime_reserved_arg_size; ++ ++ enum { ++ first_available_sp_in_frame = 0, ++ frame_pad_in_bytes = 16, ++ nof_reg_args = 8 ++ }; ++ ++ public: ++ static LIR_Opr receiver_opr; ++ ++ static LIR_Opr r0_opr; ++ static LIR_Opr ra_opr; ++ static LIR_Opr tp_opr; ++ static LIR_Opr sp_opr; ++ static LIR_Opr a0_opr; ++ static LIR_Opr a1_opr; ++ static LIR_Opr a2_opr; ++ static LIR_Opr a3_opr; ++ static LIR_Opr a4_opr; ++ static LIR_Opr a5_opr; ++ static LIR_Opr a6_opr; ++ static LIR_Opr a7_opr; ++ static LIR_Opr t0_opr; ++ static LIR_Opr t1_opr; ++ static LIR_Opr t2_opr; ++ static LIR_Opr t3_opr; ++ static LIR_Opr t4_opr; ++ static LIR_Opr t5_opr; ++ static LIR_Opr t6_opr; ++ static LIR_Opr t7_opr; ++ static LIR_Opr t8_opr; ++ static LIR_Opr rx_opr; ++ static LIR_Opr fp_opr; ++ static LIR_Opr s0_opr; ++ static LIR_Opr s1_opr; ++ static LIR_Opr s2_opr; ++ static LIR_Opr s3_opr; ++ static LIR_Opr s4_opr; ++ static LIR_Opr s5_opr; ++ static LIR_Opr s6_opr; ++ static LIR_Opr s7_opr; ++ static LIR_Opr s8_opr; ++ ++ static LIR_Opr ra_oop_opr; ++ static LIR_Opr a0_oop_opr; ++ static LIR_Opr a1_oop_opr; ++ static LIR_Opr a2_oop_opr; ++ static LIR_Opr a3_oop_opr; ++ static LIR_Opr a4_oop_opr; ++ static LIR_Opr a5_oop_opr; ++ static LIR_Opr a6_oop_opr; ++ static LIR_Opr a7_oop_opr; ++ static LIR_Opr t0_oop_opr; ++ static LIR_Opr t1_oop_opr; ++ static LIR_Opr t2_oop_opr; ++ static LIR_Opr t3_oop_opr; ++ static LIR_Opr t4_oop_opr; ++ static LIR_Opr t5_oop_opr; ++ static LIR_Opr t6_oop_opr; ++ static LIR_Opr t7_oop_opr; ++ static LIR_Opr t8_oop_opr; ++ static LIR_Opr fp_oop_opr; ++ static LIR_Opr s0_oop_opr; ++ static LIR_Opr s1_oop_opr; ++ static LIR_Opr s2_oop_opr; ++ static LIR_Opr s3_oop_opr; ++ static LIR_Opr s4_oop_opr; ++ static LIR_Opr s5_oop_opr; ++ static LIR_Opr s6_oop_opr; ++ static LIR_Opr s7_oop_opr; ++ static LIR_Opr s8_oop_opr; ++ ++ static LIR_Opr scr1_opr; ++ static LIR_Opr scr2_opr; ++ static LIR_Opr scr1_long_opr; ++ static LIR_Opr scr2_long_opr; ++ ++ static LIR_Opr a0_metadata_opr; ++ static LIR_Opr a1_metadata_opr; ++ static LIR_Opr a2_metadata_opr; ++ static LIR_Opr a3_metadata_opr; ++ static LIR_Opr a4_metadata_opr; ++ static LIR_Opr a5_metadata_opr; ++ ++ static LIR_Opr long0_opr; ++ static LIR_Opr long1_opr; ++ static LIR_Opr fpu0_float_opr; ++ static LIR_Opr fpu0_double_opr; ++ ++ static LIR_Opr as_long_opr(Register r) { ++ return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); ++ } ++ static LIR_Opr as_pointer_opr(Register r) { ++ return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); ++ } ++ ++ // VMReg name for spilled physical FPU stack slot n ++ static VMReg fpu_regname (int n); ++ ++ static bool is_caller_save_register(LIR_Opr opr) { return true; } ++ static bool is_caller_save_register(Register r) { return true; } ++ ++ static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; } ++ static int last_cpu_reg() { return pd_last_cpu_reg; } ++ static int last_byte_reg() { return pd_last_byte_reg; } ++ ++#endif // CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch_64.cpp +new file mode 100644 +index 0000000000..25c90bcf98 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch_64.cpp +@@ -0,0 +1,362 @@ ++/* ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "c1/c1_FrameMap.hpp" ++#include "c1/c1_LIR.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) { ++ LIR_Opr opr = LIR_OprFact::illegalOpr; ++ VMReg r_1 = reg->first(); ++ VMReg r_2 = reg->second(); ++ if (r_1->is_stack()) { ++ // Convert stack slot to an SP offset ++ // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value ++ // so we must add it in here. ++ int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; ++ opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type)); ++ } else if (r_1->is_Register()) { ++ Register reg = r_1->as_Register(); ++ if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) { ++ Register reg2 = r_2->as_Register(); ++ assert(reg2 == reg, "must be same register"); ++ opr = as_long_opr(reg); ++ } else if (is_reference_type(type)) { ++ opr = as_oop_opr(reg); ++ } else if (type == T_METADATA) { ++ opr = as_metadata_opr(reg); ++ } else if (type == T_ADDRESS) { ++ opr = as_address_opr(reg); ++ } else { ++ opr = as_opr(reg); ++ } ++ } else if (r_1->is_FloatRegister()) { ++ assert(type == T_DOUBLE || type == T_FLOAT, "wrong type"); ++ int num = r_1->as_FloatRegister()->encoding(); ++ if (type == T_FLOAT) { ++ opr = LIR_OprFact::single_fpu(num); ++ } else { ++ opr = LIR_OprFact::double_fpu(num); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ return opr; ++} ++ ++LIR_Opr FrameMap::r0_opr; ++LIR_Opr FrameMap::ra_opr; ++LIR_Opr FrameMap::tp_opr; ++LIR_Opr FrameMap::sp_opr; ++LIR_Opr FrameMap::a0_opr; ++LIR_Opr FrameMap::a1_opr; ++LIR_Opr FrameMap::a2_opr; ++LIR_Opr FrameMap::a3_opr; ++LIR_Opr FrameMap::a4_opr; ++LIR_Opr FrameMap::a5_opr; ++LIR_Opr FrameMap::a6_opr; ++LIR_Opr FrameMap::a7_opr; ++LIR_Opr FrameMap::t0_opr; ++LIR_Opr FrameMap::t1_opr; ++LIR_Opr FrameMap::t2_opr; ++LIR_Opr FrameMap::t3_opr; ++LIR_Opr FrameMap::t4_opr; ++LIR_Opr FrameMap::t5_opr; ++LIR_Opr FrameMap::t6_opr; ++LIR_Opr FrameMap::t7_opr; ++LIR_Opr FrameMap::t8_opr; ++LIR_Opr FrameMap::rx_opr; ++LIR_Opr FrameMap::fp_opr; ++LIR_Opr FrameMap::s0_opr; ++LIR_Opr FrameMap::s1_opr; ++LIR_Opr FrameMap::s2_opr; ++LIR_Opr FrameMap::s3_opr; ++LIR_Opr FrameMap::s4_opr; ++LIR_Opr FrameMap::s5_opr; ++LIR_Opr FrameMap::s6_opr; ++LIR_Opr FrameMap::s7_opr; ++LIR_Opr FrameMap::s8_opr; ++ ++LIR_Opr FrameMap::receiver_opr; ++ ++LIR_Opr FrameMap::ra_oop_opr; ++LIR_Opr FrameMap::a0_oop_opr; ++LIR_Opr FrameMap::a1_oop_opr; ++LIR_Opr FrameMap::a2_oop_opr; ++LIR_Opr FrameMap::a3_oop_opr; ++LIR_Opr FrameMap::a4_oop_opr; ++LIR_Opr FrameMap::a5_oop_opr; ++LIR_Opr FrameMap::a6_oop_opr; ++LIR_Opr FrameMap::a7_oop_opr; ++LIR_Opr FrameMap::t0_oop_opr; ++LIR_Opr FrameMap::t1_oop_opr; ++LIR_Opr FrameMap::t2_oop_opr; ++LIR_Opr FrameMap::t3_oop_opr; ++LIR_Opr FrameMap::t4_oop_opr; ++LIR_Opr FrameMap::t5_oop_opr; ++LIR_Opr FrameMap::t6_oop_opr; ++LIR_Opr FrameMap::t7_oop_opr; ++LIR_Opr FrameMap::t8_oop_opr; ++LIR_Opr FrameMap::fp_oop_opr; ++LIR_Opr FrameMap::s0_oop_opr; ++LIR_Opr FrameMap::s1_oop_opr; ++LIR_Opr FrameMap::s2_oop_opr; ++LIR_Opr FrameMap::s3_oop_opr; ++LIR_Opr FrameMap::s4_oop_opr; ++LIR_Opr FrameMap::s5_oop_opr; ++LIR_Opr FrameMap::s6_oop_opr; ++LIR_Opr FrameMap::s7_oop_opr; ++LIR_Opr FrameMap::s8_oop_opr; ++ ++LIR_Opr FrameMap::scr1_opr; ++LIR_Opr FrameMap::scr2_opr; ++LIR_Opr FrameMap::scr1_long_opr; ++LIR_Opr FrameMap::scr2_long_opr; ++ ++LIR_Opr FrameMap::a0_metadata_opr; ++LIR_Opr FrameMap::a1_metadata_opr; ++LIR_Opr FrameMap::a2_metadata_opr; ++LIR_Opr FrameMap::a3_metadata_opr; ++LIR_Opr FrameMap::a4_metadata_opr; ++LIR_Opr FrameMap::a5_metadata_opr; ++ ++LIR_Opr FrameMap::long0_opr; ++LIR_Opr FrameMap::long1_opr; ++LIR_Opr FrameMap::fpu0_float_opr; ++LIR_Opr FrameMap::fpu0_double_opr; ++ ++LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0 }; ++LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0 }; ++ ++//-------------------------------------------------------- ++// FrameMap ++//-------------------------------------------------------- ++ ++void FrameMap::initialize() { ++ assert(!_init_done, "once"); ++ int i = 0; ++ ++ // caller save register ++ map_register(i, A0); a0_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A1); a1_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A2); a2_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A3); a3_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A4); a4_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A5); a5_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A6); a6_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, A7); a7_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T0); t0_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T1); t1_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T2); t2_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T3); t3_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T5); t5_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T6); t6_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, T8); t8_opr = LIR_OprFact::single_cpu(i); i++; ++ ++ // callee save register ++ map_register(i, S0); s0_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S1); s1_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S2); s2_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S3); s3_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S4); s4_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S7); s7_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, S8); s8_opr = LIR_OprFact::single_cpu(i); i++; ++ ++ // special register ++ map_register(i, S5); s5_opr = LIR_OprFact::single_cpu(i); i++; // heapbase ++ map_register(i, S6); s6_opr = LIR_OprFact::single_cpu(i); i++; // thread ++ map_register(i, TP); tp_opr = LIR_OprFact::single_cpu(i); i++; // tp ++ map_register(i, FP); fp_opr = LIR_OprFact::single_cpu(i); i++; // fp ++ map_register(i, RA); ra_opr = LIR_OprFact::single_cpu(i); i++; // ra ++ map_register(i, SP); sp_opr = LIR_OprFact::single_cpu(i); i++; // sp ++ ++ // tmp register ++ map_register(i, T7); t7_opr = LIR_OprFact::single_cpu(i); i++; // scr1 ++ map_register(i, T4); t4_opr = LIR_OprFact::single_cpu(i); i++; // scr2 ++ ++ scr1_opr = t7_opr; ++ scr2_opr = t4_opr; ++ scr1_long_opr = LIR_OprFact::double_cpu(t7_opr->cpu_regnr(), t7_opr->cpu_regnr()); ++ scr2_long_opr = LIR_OprFact::double_cpu(t4_opr->cpu_regnr(), t4_opr->cpu_regnr()); ++ ++ long0_opr = LIR_OprFact::double_cpu(a0_opr->cpu_regnr(), a0_opr->cpu_regnr()); ++ long1_opr = LIR_OprFact::double_cpu(a1_opr->cpu_regnr(), a1_opr->cpu_regnr()); ++ ++ fpu0_float_opr = LIR_OprFact::single_fpu(0); ++ fpu0_double_opr = LIR_OprFact::double_fpu(0); ++ ++ // scr1, scr2 not included ++ _caller_save_cpu_regs[0] = a0_opr; ++ _caller_save_cpu_regs[1] = a1_opr; ++ _caller_save_cpu_regs[2] = a2_opr; ++ _caller_save_cpu_regs[3] = a3_opr; ++ _caller_save_cpu_regs[4] = a4_opr; ++ _caller_save_cpu_regs[5] = a5_opr; ++ _caller_save_cpu_regs[6] = a6_opr; ++ _caller_save_cpu_regs[7] = a7_opr; ++ _caller_save_cpu_regs[8] = t0_opr; ++ _caller_save_cpu_regs[9] = t1_opr; ++ _caller_save_cpu_regs[10] = t2_opr; ++ _caller_save_cpu_regs[11] = t3_opr; ++ _caller_save_cpu_regs[12] = t5_opr; ++ _caller_save_cpu_regs[13] = t6_opr; ++ _caller_save_cpu_regs[14] = t8_opr; ++ ++ for (int i = 0; i < 8; i++) { ++ _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); ++ } ++ ++ _init_done = true; ++ ++ ra_oop_opr = as_oop_opr(RA); ++ a0_oop_opr = as_oop_opr(A0); ++ a1_oop_opr = as_oop_opr(A1); ++ a2_oop_opr = as_oop_opr(A2); ++ a3_oop_opr = as_oop_opr(A3); ++ a4_oop_opr = as_oop_opr(A4); ++ a5_oop_opr = as_oop_opr(A5); ++ a6_oop_opr = as_oop_opr(A6); ++ a7_oop_opr = as_oop_opr(A7); ++ t0_oop_opr = as_oop_opr(T0); ++ t1_oop_opr = as_oop_opr(T1); ++ t2_oop_opr = as_oop_opr(T2); ++ t3_oop_opr = as_oop_opr(T3); ++ t4_oop_opr = as_oop_opr(T4); ++ t5_oop_opr = as_oop_opr(T5); ++ t6_oop_opr = as_oop_opr(T6); ++ t7_oop_opr = as_oop_opr(T7); ++ t8_oop_opr = as_oop_opr(T8); ++ fp_oop_opr = as_oop_opr(FP); ++ s0_oop_opr = as_oop_opr(S0); ++ s1_oop_opr = as_oop_opr(S1); ++ s2_oop_opr = as_oop_opr(S2); ++ s3_oop_opr = as_oop_opr(S3); ++ s4_oop_opr = as_oop_opr(S4); ++ s5_oop_opr = as_oop_opr(S5); ++ s6_oop_opr = as_oop_opr(S6); ++ s7_oop_opr = as_oop_opr(S7); ++ s8_oop_opr = as_oop_opr(S8); ++ ++ a0_metadata_opr = as_metadata_opr(A0); ++ a1_metadata_opr = as_metadata_opr(A1); ++ a2_metadata_opr = as_metadata_opr(A2); ++ a3_metadata_opr = as_metadata_opr(A3); ++ a4_metadata_opr = as_metadata_opr(A4); ++ a5_metadata_opr = as_metadata_opr(A5); ++ ++ sp_opr = as_pointer_opr(SP); ++ fp_opr = as_pointer_opr(FP); ++ ++ VMRegPair regs; ++ BasicType sig_bt = T_OBJECT; ++ SharedRuntime::java_calling_convention(&sig_bt, ®s, 1, true); ++ receiver_opr = as_oop_opr(regs.first()->as_Register()); ++ ++ for (int i = 0; i < nof_caller_save_fpu_regs; i++) { ++ _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); ++ } ++} ++ ++Address FrameMap::make_new_address(ByteSize sp_offset) const { ++ // for sp, based address use this: ++ // return Address(sp, in_bytes(sp_offset) - (framesize() - 2) * 4); ++ return Address(SP, in_bytes(sp_offset)); ++} ++ ++// ----------------mapping----------------------- ++// all mapping is based on fp addressing, except for simple leaf methods where we access ++// the locals sp based (and no frame is built) ++ ++// Frame for simple leaf methods (quick entries) ++// ++// +----------+ ++// | ret addr | <- TOS ++// +----------+ ++// | args | ++// | ...... | ++ ++// Frame for standard methods ++// ++// | .........| <- TOS ++// | locals | ++// +----------+ ++// | old fp, | <- RFP ++// +----------+ ++// | ret addr | ++// +----------+ ++// | args | ++// | .........| ++ ++// For OopMaps, map a local variable or spill index to an VMRegImpl name. ++// This is the offset from sp() in the frame of the slot for the index, ++// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.) ++// ++// framesize + ++// stack0 stack0 0 <- VMReg ++// | | | ++// ...........|..............|.............| ++// 0 1 2 3 x x 4 5 6 ... | <- local indices ++// ^ ^ sp() ( x x indicate link ++// | | and return addr) ++// arguments non-argument locals ++ ++VMReg FrameMap::fpu_regname(int n) { ++ // Return the OptoReg name for the fpu stack slot "n" ++ // A spilled fpu stack slot comprises to two single-word OptoReg's. ++ return as_FloatRegister(n)->as_VMReg(); ++} ++ ++LIR_Opr FrameMap::stack_pointer() { ++ return FrameMap::sp_opr; ++} ++ ++// JSR 292 ++LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { ++ return LIR_OprFact::illegalOpr; // Not needed on LoongArch64 ++} ++ ++bool FrameMap::validate_frame() { ++ return true; ++} +diff --git a/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch.hpp +new file mode 100644 +index 0000000000..38b0daa025 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch.hpp +@@ -0,0 +1,83 @@ ++/* ++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP ++ ++// ArrayCopyStub needs access to bailout ++friend class ArrayCopyStub; ++ ++ private: ++ int array_element_size(BasicType type) const; ++ ++ void arith_fpu_implementation(LIR_Code code, int left_index, int right_index, ++ int dest_index, bool pop_fpu_stack); ++ ++ // helper functions which checks for overflow and sets bailout if it ++ // occurs. Always returns a valid embeddable pointer but in the ++ // bailout case the pointer won't be to unique storage. ++ address float_constant(float f); ++ address double_constant(double d); ++ ++ address int_constant(jlong n); ++ ++ bool is_literal_address(LIR_Address* addr); ++ ++ // Ensure we have a valid Address (base+offset) to a stack-slot. ++ Address stack_slot_address(int index, uint shift, int adjust = 0); ++ ++ // Record the type of the receiver in ReceiverTypeData ++ void type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, ++ Register recv, Label* update_done); ++ void add_debug_info_for_branch(address adr, CodeEmitInfo* info); ++ ++ void casw(Register addr, Register newval, Register cmpval, bool sign); ++ void casl(Register addr, Register newval, Register cmpval); ++ ++ void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL); ++ ++ static const int max_tableswitches = 20; ++ struct tableswitch switches[max_tableswitches]; ++ int tableswitch_count; ++ ++ void init() { tableswitch_count = 0; } ++ ++ void deoptimize_trap(CodeEmitInfo *info); ++ ++public: ++ void store_parameter(Register r, int offset_from_sp_in_words); ++ void store_parameter(jint c, int offset_from_sp_in_words); ++ void store_parameter(jobject c, int offset_from_sp_in_words); ++ ++ enum { ++ // call stub: CompiledStaticCall::to_interp_stub_size() + ++ // NativeInstruction::nop_instruction_size + ++ // NativeCallTrampolineStub::instruction_size ++ call_stub_size = 13 * NativeInstruction::nop_instruction_size, ++ exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175), ++ deopt_handler_size = 7 * NativeInstruction::nop_instruction_size ++ }; ++ ++#endif // CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch_64.cpp +new file mode 100644 +index 0000000000..ee48326bec +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch_64.cpp +@@ -0,0 +1,3377 @@ ++/* ++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "asm/assembler.hpp" ++#include "c1/c1_CodeStubs.hpp" ++#include "c1/c1_Compilation.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "c1/c1_ValueStack.hpp" ++#include "ci/ciArrayKlass.hpp" ++#include "ci/ciInstance.hpp" ++#include "code/compiledIC.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++#ifndef PRODUCT ++#define COMMENT(x) do { __ block_comment(x); } while (0) ++#else ++#define COMMENT(x) ++#endif ++ ++NEEDS_CLEANUP // remove this definitions? ++ ++#define __ _masm-> ++ ++static void select_different_registers(Register preserve, Register extra, ++ Register &tmp1, Register &tmp2) { ++ if (tmp1 == preserve) { ++ assert_different_registers(tmp1, tmp2, extra); ++ tmp1 = extra; ++ } else if (tmp2 == preserve) { ++ assert_different_registers(tmp1, tmp2, extra); ++ tmp2 = extra; ++ } ++ assert_different_registers(preserve, tmp1, tmp2); ++} ++ ++static void select_different_registers(Register preserve, Register extra, ++ Register &tmp1, Register &tmp2, ++ Register &tmp3) { ++ if (tmp1 == preserve) { ++ assert_different_registers(tmp1, tmp2, tmp3, extra); ++ tmp1 = extra; ++ } else if (tmp2 == preserve) { ++ assert_different_registers(tmp1, tmp2, tmp3, extra); ++ tmp2 = extra; ++ } else if (tmp3 == preserve) { ++ assert_different_registers(tmp1, tmp2, tmp3, extra); ++ tmp3 = extra; ++ } ++ assert_different_registers(preserve, tmp1, tmp2, tmp3); ++} ++ ++bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; } ++ ++LIR_Opr LIR_Assembler::receiverOpr() { ++ return FrameMap::receiver_opr; ++} ++ ++LIR_Opr LIR_Assembler::osrBufferPointer() { ++ return FrameMap::as_pointer_opr(receiverOpr()->as_register()); ++} ++ ++//--------------fpu register translations----------------------- ++ ++address LIR_Assembler::float_constant(float f) { ++ address const_addr = __ float_constant(f); ++ if (const_addr == NULL) { ++ bailout("const section overflow"); ++ return __ code()->consts()->start(); ++ } else { ++ return const_addr; ++ } ++} ++ ++address LIR_Assembler::double_constant(double d) { ++ address const_addr = __ double_constant(d); ++ if (const_addr == NULL) { ++ bailout("const section overflow"); ++ return __ code()->consts()->start(); ++ } else { ++ return const_addr; ++ } ++} ++ ++void LIR_Assembler::set_24bit_FPU() { Unimplemented(); } ++ ++void LIR_Assembler::reset_FPU() { Unimplemented(); } ++ ++void LIR_Assembler::fpop() { Unimplemented(); } ++ ++void LIR_Assembler::fxch(int i) { Unimplemented(); } ++ ++void LIR_Assembler::fld(int i) { Unimplemented(); } ++ ++void LIR_Assembler::ffree(int i) { Unimplemented(); } ++ ++void LIR_Assembler::breakpoint() { Unimplemented(); } ++ ++void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); } ++ ++void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); } ++ ++bool LIR_Assembler::is_literal_address(LIR_Address* addr) { Unimplemented(); return false; } ++ ++static Register as_reg(LIR_Opr op) { ++ return op->is_double_cpu() ? op->as_register_lo() : op->as_register(); ++} ++ ++static jlong as_long(LIR_Opr data) { ++ jlong result; ++ switch (data->type()) { ++ case T_INT: ++ result = (data->as_jint()); ++ break; ++ case T_LONG: ++ result = (data->as_jlong()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ result = 0; // unreachable ++ } ++ return result; ++} ++ ++Address LIR_Assembler::as_Address(LIR_Address* addr) { ++ Register base = addr->base()->as_pointer_register(); ++ LIR_Opr opr = addr->index(); ++ if (opr->is_cpu_register()) { ++ Register index; ++ if (opr->is_single_cpu()) ++ index = opr->as_register(); ++ else ++ index = opr->as_register_lo(); ++ assert(addr->disp() == 0, "must be"); ++ return Address(base, index, Address::ScaleFactor(addr->scale())); ++ } else { ++ assert(addr->scale() == 0, "must be"); ++ return Address(base, addr->disp()); ++ } ++ return Address(); ++} ++ ++Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { ++ ShouldNotReachHere(); ++ return Address(); ++} ++ ++Address LIR_Assembler::as_Address_lo(LIR_Address* addr) { ++ return as_Address(addr); // Ouch ++ // FIXME: This needs to be much more clever. See x86. ++} ++ ++// Ensure a valid Address (base + offset) to a stack-slot. If stack access is ++// not encodable as a base + (immediate) offset, generate an explicit address ++// calculation to hold the address in a temporary register. ++Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) { ++ precond(size == 4 || size == 8); ++ Address addr = frame_map()->address_for_slot(index, adjust); ++ precond(addr.index() == noreg); ++ precond(addr.base() == SP); ++ precond(addr.disp() > 0); ++ uint mask = size - 1; ++ assert((addr.disp() & mask) == 0, "scaled offsets only"); ++ return addr; ++} ++ ++void LIR_Assembler::osr_entry() { ++ offsets()->set_value(CodeOffsets::OSR_Entry, code_offset()); ++ BlockBegin* osr_entry = compilation()->hir()->osr_entry(); ++ ValueStack* entry_state = osr_entry->state(); ++ int number_of_locks = entry_state->locks_size(); ++ ++ // we jump here if osr happens with the interpreter ++ // state set up to continue at the beginning of the ++ // loop that triggered osr - in particular, we have ++ // the following registers setup: ++ // ++ // A2: osr buffer ++ // ++ ++ // build frame ++ ciMethod* m = compilation()->method(); ++ __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); ++ ++ // OSR buffer is ++ // ++ // locals[nlocals-1..0] ++ // monitors[0..number_of_locks] ++ // ++ // locals is a direct copy of the interpreter frame so in the osr buffer ++ // so first slot in the local array is the last local from the interpreter ++ // and last slot is local[0] (receiver) from the interpreter ++ // ++ // Similarly with locks. The first lock slot in the osr buffer is the nth lock ++ // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock ++ // in the interpreter frame (the method lock if a sync method) ++ ++ // Initialize monitors in the compiled activation. ++ // A2: pointer to osr buffer ++ // ++ // All other registers are dead at this point and the locals will be ++ // copied into place by code emitted in the IR. ++ ++ Register OSR_buf = osrBufferPointer()->as_pointer_register(); ++ { ++ assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below"); ++ int monitor_offset = BytesPerWord * method()->max_locals() + (2 * BytesPerWord) * (number_of_locks - 1); ++ // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in ++ // the OSR buffer using 2 word entries: first the lock and then ++ // the oop. ++ for (int i = 0; i < number_of_locks; i++) { ++ int slot_offset = monitor_offset - ((i * 2) * BytesPerWord); ++#ifdef ASSERT ++ // verify the interpreter's monitor has a non-null object ++ { ++ Label L; ++ __ ld_ptr(SCR1, Address(OSR_buf, slot_offset + 1 * BytesPerWord)); ++ __ bnez(SCR1, L); ++ __ stop("locked object is NULL"); ++ __ bind(L); ++ } ++#endif ++ __ ld_ptr(S0, Address(OSR_buf, slot_offset + 0)); ++ __ st_ptr(S0, frame_map()->address_for_monitor_lock(i)); ++ __ ld_ptr(S0, Address(OSR_buf, slot_offset + 1*BytesPerWord)); ++ __ st_ptr(S0, frame_map()->address_for_monitor_object(i)); ++ } ++ } ++} ++ ++// inline cache check; done before the frame is built. ++int LIR_Assembler::check_icache() { ++ Register receiver = FrameMap::receiver_opr->as_register(); ++ Register ic_klass = IC_Klass; ++ int start_offset = __ offset(); ++ Label dont; ++ ++ __ verify_oop(receiver); ++ ++ // explicit NULL check not needed since load from [klass_offset] causes a trap ++ // check against inline cache ++ assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), ++ "must add explicit null check"); ++ ++ __ load_klass(SCR2, receiver); ++ __ beq(SCR2, ic_klass, dont); ++ ++ // if icache check fails, then jump to runtime routine ++ // Note: RECEIVER must still contain the receiver! ++ __ jmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); ++ ++ // We align the verified entry point unless the method body ++ // (including its inline cache check) will fit in a single 64-byte ++ // icache line. ++ if (!method()->is_accessor() || __ offset() - start_offset > 4 * 4) { ++ // force alignment after the cache check. ++ __ align(CodeEntryAlignment); ++ } ++ ++ __ bind(dont); ++ return start_offset; ++} ++ ++void LIR_Assembler::jobject2reg(jobject o, Register reg) { ++ if (o == NULL) { ++ __ move(reg, R0); ++ } else { ++ int oop_index = __ oop_recorder()->find_index(o); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ __ relocate(rspec); ++ __ patchable_li52(reg, (long)o); ++ } ++} ++ ++void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) { ++ address target = NULL; ++ ++ switch (patching_id(info)) { ++ case PatchingStub::access_field_id: ++ target = Runtime1::entry_for(Runtime1::access_field_patching_id); ++ break; ++ case PatchingStub::load_klass_id: ++ target = Runtime1::entry_for(Runtime1::load_klass_patching_id); ++ break; ++ case PatchingStub::load_mirror_id: ++ target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); ++ break; ++ case PatchingStub::load_appendix_id: ++ target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ call(target, relocInfo::runtime_call_type); ++ add_call_info_here(info); ++} ++ ++void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) { ++ deoptimize_trap(info); ++} ++ ++// This specifies the rsp decrement needed to build the frame ++int LIR_Assembler::initial_frame_size_in_bytes() const { ++ // if rounding, must let FrameMap know! ++ return in_bytes(frame_map()->framesize_in_bytes()); ++} ++ ++int LIR_Assembler::emit_exception_handler() { ++ // if the last instruction is a call (typically to do a throw which ++ // is coming at the end after block reordering) the return address ++ // must still point into the code area in order to avoid assertion ++ // failures when searching for the corresponding bci => add a nop ++ // (was bug 5/14/1999 - gri) ++ __ nop(); ++ ++ // generate code for exception handler ++ address handler_base = __ start_a_stub(exception_handler_size); ++ if (handler_base == NULL) { ++ // not enough space left for the handler ++ bailout("exception handler overflow"); ++ return -1; ++ } ++ ++ int offset = code_offset(); ++ ++ // the exception oop and pc are in A0, and A1 ++ // no other registers need to be preserved, so invalidate them ++ __ invalidate_registers(false, true, true, true, true, true); ++ ++ // check that there is really an exception ++ __ verify_not_null_oop(A0); ++ ++ // search an exception handler (A0: exception oop, A1: throwing pc) ++ __ call(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id), relocInfo::runtime_call_type); ++ __ should_not_reach_here(); ++ guarantee(code_offset() - offset <= exception_handler_size, "overflow"); ++ __ end_a_stub(); ++ ++ return offset; ++} ++ ++// Emit the code to remove the frame from the stack in the exception unwind path. ++int LIR_Assembler::emit_unwind_handler() { ++#ifndef PRODUCT ++ if (CommentedAssembly) { ++ _masm->block_comment("Unwind handler"); ++ } ++#endif ++ ++ int offset = code_offset(); ++ ++ // Fetch the exception from TLS and clear out exception related thread state ++ __ ld_ptr(A0, Address(TREG, JavaThread::exception_oop_offset())); ++ __ st_ptr(R0, Address(TREG, JavaThread::exception_oop_offset())); ++ __ st_ptr(R0, Address(TREG, JavaThread::exception_pc_offset())); ++ ++ __ bind(_unwind_handler_entry); ++ __ verify_not_null_oop(V0); ++ if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { ++ __ move(S0, V0); // Preserve the exception ++ } ++ ++ // Perform needed unlocking ++ MonitorExitStub* stub = NULL; ++ if (method()->is_synchronized()) { ++ monitor_address(0, FrameMap::a0_opr); ++ stub = new MonitorExitStub(FrameMap::a0_opr, true, 0); ++ __ unlock_object(A5, A4, A0, *stub->entry()); ++ __ bind(*stub->continuation()); ++ } ++ ++ if (compilation()->env()->dtrace_method_probes()) { ++ __ mov_metadata(A1, method()->constant_encoding()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), TREG, A1); ++ } ++ ++ if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { ++ __ move(A0, S0); // Restore the exception ++ } ++ ++ // remove the activation and dispatch to the unwind handler ++ __ block_comment("remove_frame and dispatch to the unwind handler"); ++ __ remove_frame(initial_frame_size_in_bytes()); ++ __ jmp(Runtime1::entry_for(Runtime1::unwind_exception_id), relocInfo::runtime_call_type); ++ ++ // Emit the slow path assembly ++ if (stub != NULL) { ++ stub->emit_code(this); ++ } ++ ++ return offset; ++} ++ ++int LIR_Assembler::emit_deopt_handler() { ++ // if the last instruction is a call (typically to do a throw which ++ // is coming at the end after block reordering) the return address ++ // must still point into the code area in order to avoid assertion ++ // failures when searching for the corresponding bci => add a nop ++ // (was bug 5/14/1999 - gri) ++ __ nop(); ++ ++ // generate code for exception handler ++ address handler_base = __ start_a_stub(deopt_handler_size); ++ if (handler_base == NULL) { ++ // not enough space left for the handler ++ bailout("deopt handler overflow"); ++ return -1; ++ } ++ ++ int offset = code_offset(); ++ ++ __ call(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type); ++ guarantee(code_offset() - offset <= deopt_handler_size, "overflow"); ++ __ end_a_stub(); ++ ++ return offset; ++} ++ ++void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { ++ _masm->code_section()->relocate(adr, relocInfo::poll_type); ++ int pc_offset = code_offset(); ++ flush_debug_info(pc_offset); ++ info->record_debug_info(compilation()->debug_info_recorder(), pc_offset); ++ if (info->exception_handlers() != NULL) { ++ compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers()); ++ } ++} ++ ++void LIR_Assembler::return_op(LIR_Opr result) { ++ assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == V0, ++ "word returns are in V0,"); ++ ++ // Pop the stack before the safepoint code ++ __ remove_frame(initial_frame_size_in_bytes()); ++ ++ __ li(SCR2, os::get_polling_page()); ++ __ relocate(relocInfo::poll_return_type); ++ __ ld_w(SCR1, SCR2, 0); ++ __ jr(RA); ++} ++ ++int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { ++ guarantee(info != NULL, "Shouldn't be NULL"); ++ __ li(SCR2, os::get_polling_page()); ++ add_debug_info_for_branch(info); // This isn't just debug info: it's the oop map ++ __ relocate(relocInfo::poll_type); ++ __ ld_w(SCR1, SCR2, 0); ++ return __ offset(); ++} ++ ++void LIR_Assembler::move_regs(Register from_reg, Register to_reg) { ++ __ move(to_reg, from_reg); ++} ++ ++void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); } ++ ++void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { ++ assert(src->is_constant(), "should not call otherwise"); ++ assert(dest->is_register(), "should not call otherwise"); ++ LIR_Const* c = src->as_constant_ptr(); ++ ++ switch (c->type()) { ++ case T_INT: ++ assert(patch_code == lir_patch_none, "no patching handled here"); ++ __ li(dest->as_register(), c->as_jint()); ++ break; ++ case T_ADDRESS: ++ assert(patch_code == lir_patch_none, "no patching handled here"); ++ __ li(dest->as_register(), c->as_jint()); ++ break; ++ case T_LONG: ++ assert(patch_code == lir_patch_none, "no patching handled here"); ++ __ li(dest->as_register_lo(), (intptr_t)c->as_jlong()); ++ break; ++ case T_OBJECT: ++ if (patch_code == lir_patch_none) { ++ jobject2reg(c->as_jobject(), dest->as_register()); ++ } else { ++ jobject2reg_with_patching(dest->as_register(), info); ++ } ++ break; ++ case T_METADATA: ++ if (patch_code != lir_patch_none) { ++ klass2reg_with_patching(dest->as_register(), info); ++ } else { ++ __ mov_metadata(dest->as_register(), c->as_metadata()); ++ } ++ break; ++ case T_FLOAT: ++ __ relocate(relocInfo::internal_word_type); ++ __ patchable_li52(SCR1, (jlong) float_constant(c->as_jfloat())); ++ __ fld_s(dest->as_float_reg(), SCR1, 0); ++ break; ++ case T_DOUBLE: ++ __ relocate(relocInfo::internal_word_type); ++ __ patchable_li52(SCR1, (jlong) double_constant(c->as_jdouble())); ++ __ fld_d(dest->as_double_reg(), SCR1, 0); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) { ++ LIR_Const* c = src->as_constant_ptr(); ++ switch (c->type()) { ++ case T_OBJECT: ++ if (!c->as_jobject()) ++ __ st_ptr(R0, frame_map()->address_for_slot(dest->single_stack_ix())); ++ else { ++ const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL); ++ reg2stack(FrameMap::scr1_opr, dest, c->type(), false); ++ } ++ break; ++ case T_ADDRESS: ++ const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL); ++ reg2stack(FrameMap::scr1_opr, dest, c->type(), false); ++ case T_INT: ++ case T_FLOAT: ++ if (c->as_jint_bits() == 0) ++ __ st_w(R0, frame_map()->address_for_slot(dest->single_stack_ix())); ++ else { ++ __ li(SCR2, c->as_jint_bits()); ++ __ st_w(SCR2, frame_map()->address_for_slot(dest->single_stack_ix())); ++ } ++ break; ++ case T_LONG: ++ case T_DOUBLE: ++ if (c->as_jlong_bits() == 0) ++ __ st_ptr(R0, frame_map()->address_for_slot(dest->double_stack_ix(), ++ lo_word_offset_in_bytes)); ++ else { ++ __ li(SCR2, (intptr_t)c->as_jlong_bits()); ++ __ st_ptr(SCR2, frame_map()->address_for_slot(dest->double_stack_ix(), ++ lo_word_offset_in_bytes)); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, ++ CodeEmitInfo* info, bool wide) { ++ assert(src->is_constant(), "should not call otherwise"); ++ LIR_Const* c = src->as_constant_ptr(); ++ LIR_Address* to_addr = dest->as_address_ptr(); ++ ++ void (Assembler::* insn)(Register Rt, Address adr); ++ ++ switch (type) { ++ case T_ADDRESS: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::st_d; ++ break; ++ case T_LONG: ++ assert(c->as_jlong() == 0, "should be"); ++ insn = &Assembler::st_d; ++ break; ++ case T_INT: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::st_w; ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ assert(c->as_jobject() == 0, "should be"); ++ if (UseCompressedOops && !wide) { ++ insn = &Assembler::st_w; ++ } else { ++ insn = &Assembler::st_d; ++ } ++ break; ++ case T_CHAR: ++ case T_SHORT: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::st_h; ++ break; ++ case T_BOOLEAN: ++ case T_BYTE: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::st_b; ++ break; ++ default: ++ ShouldNotReachHere(); ++ insn = &Assembler::st_d; // unreachable ++ } ++ ++ if (info) add_debug_info_for_null_check_here(info); ++ (_masm->*insn)(R0, as_Address(to_addr)); ++} ++ ++void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) { ++ assert(src->is_register(), "should not call otherwise"); ++ assert(dest->is_register(), "should not call otherwise"); ++ ++ // move between cpu-registers ++ if (dest->is_single_cpu()) { ++ if (src->type() == T_LONG) { ++ // Can do LONG -> OBJECT ++ move_regs(src->as_register_lo(), dest->as_register()); ++ return; ++ } ++ assert(src->is_single_cpu(), "must match"); ++ if (src->type() == T_OBJECT) { ++ __ verify_oop(src->as_register()); ++ } ++ move_regs(src->as_register(), dest->as_register()); ++ } else if (dest->is_double_cpu()) { ++ if (is_reference_type(src->type())) { ++ // Surprising to me but we can see move of a long to t_object ++ __ verify_oop(src->as_register()); ++ move_regs(src->as_register(), dest->as_register_lo()); ++ return; ++ } ++ assert(src->is_double_cpu(), "must match"); ++ Register f_lo = src->as_register_lo(); ++ Register f_hi = src->as_register_hi(); ++ Register t_lo = dest->as_register_lo(); ++ Register t_hi = dest->as_register_hi(); ++ assert(f_hi == f_lo, "must be same"); ++ assert(t_hi == t_lo, "must be same"); ++ move_regs(f_lo, t_lo); ++ } else if (dest->is_single_fpu()) { ++ __ fmov_s(dest->as_float_reg(), src->as_float_reg()); ++ } else if (dest->is_double_fpu()) { ++ __ fmov_d(dest->as_double_reg(), src->as_double_reg()); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) { ++ precond(src->is_register() && dest->is_stack()); ++ ++ uint const c_sz32 = sizeof(uint32_t); ++ uint const c_sz64 = sizeof(uint64_t); ++ ++ if (src->is_single_cpu()) { ++ int index = dest->single_stack_ix(); ++ if (is_reference_type(type)) { ++ __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64)); ++ __ verify_oop(src->as_register()); ++ } else if (type == T_METADATA || type == T_DOUBLE || type == T_ADDRESS) { ++ __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64)); ++ } else { ++ __ st_w(src->as_register(), stack_slot_address(index, c_sz32)); ++ } ++ } else if (src->is_double_cpu()) { ++ int index = dest->double_stack_ix(); ++ Address dest_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes); ++ __ st_ptr(src->as_register_lo(), dest_addr_LO); ++ } else if (src->is_single_fpu()) { ++ int index = dest->single_stack_ix(); ++ __ fst_s(src->as_float_reg(), stack_slot_address(index, c_sz32)); ++ } else if (src->is_double_fpu()) { ++ int index = dest->double_stack_ix(); ++ __ fst_d(src->as_double_reg(), stack_slot_address(index, c_sz64)); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, ++ CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) { ++ LIR_Address* to_addr = dest->as_address_ptr(); ++ PatchingStub* patch = NULL; ++ Register compressed_src = SCR2; ++ ++ if (patch_code != lir_patch_none) { ++ deoptimize_trap(info); ++ return; ++ } ++ ++ if (is_reference_type(type)) { ++ __ verify_oop(src->as_register()); ++ ++ if (UseCompressedOops && !wide) { ++ __ encode_heap_oop(compressed_src, src->as_register()); ++ } else { ++ compressed_src = src->as_register(); ++ } ++ } ++ ++ int null_check_here = code_offset(); ++ switch (type) { ++ case T_FLOAT: ++ __ fst_s(src->as_float_reg(), as_Address(to_addr)); ++ break; ++ case T_DOUBLE: ++ __ fst_d(src->as_double_reg(), as_Address(to_addr)); ++ break; ++ case T_ARRAY: // fall through ++ case T_OBJECT: // fall through ++ if (UseCompressedOops && !wide) { ++ __ st_w(compressed_src, as_Address(to_addr)); ++ } else { ++ __ st_ptr(compressed_src, as_Address(to_addr)); ++ } ++ break; ++ case T_METADATA: ++ // We get here to store a method pointer to the stack to pass to ++ // a dtrace runtime call. This can't work on 64 bit with ++ // compressed klass ptrs: T_METADATA can be a compressed klass ++ // ptr or a 64 bit method pointer. ++ ShouldNotReachHere(); ++ __ st_ptr(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_ADDRESS: ++ __ st_ptr(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_INT: ++ __ st_w(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_LONG: ++ __ st_ptr(src->as_register_lo(), as_Address_lo(to_addr)); ++ break; ++ case T_BYTE: // fall through ++ case T_BOOLEAN: ++ __ st_b(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_CHAR: // fall through ++ case T_SHORT: ++ __ st_h(src->as_register(), as_Address(to_addr)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ if (info != NULL) { ++ add_debug_info_for_null_check(null_check_here, info); ++ } ++} ++ ++void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { ++ precond(src->is_stack() && dest->is_register()); ++ ++ uint const c_sz32 = sizeof(uint32_t); ++ uint const c_sz64 = sizeof(uint64_t); ++ ++ if (dest->is_single_cpu()) { ++ int index = src->single_stack_ix(); ++ if (is_reference_type(type)) { ++ __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64)); ++ __ verify_oop(dest->as_register()); ++ } else if (type == T_METADATA || type == T_ADDRESS) { ++ __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64)); ++ } else { ++ __ ld_w(dest->as_register(), stack_slot_address(index, c_sz32)); ++ } ++ } else if (dest->is_double_cpu()) { ++ int index = src->double_stack_ix(); ++ Address src_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes); ++ __ ld_ptr(dest->as_register_lo(), src_addr_LO); ++ } else if (dest->is_single_fpu()) { ++ int index = src->single_stack_ix(); ++ __ fld_s(dest->as_float_reg(), stack_slot_address(index, c_sz32)); ++ } else if (dest->is_double_fpu()) { ++ int index = src->double_stack_ix(); ++ __ fld_d(dest->as_double_reg(), stack_slot_address(index, c_sz64)); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) { ++ address target = NULL; ++ ++ switch (patching_id(info)) { ++ case PatchingStub::access_field_id: ++ target = Runtime1::entry_for(Runtime1::access_field_patching_id); ++ break; ++ case PatchingStub::load_klass_id: ++ target = Runtime1::entry_for(Runtime1::load_klass_patching_id); ++ break; ++ case PatchingStub::load_mirror_id: ++ target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); ++ break; ++ case PatchingStub::load_appendix_id: ++ target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ call(target, relocInfo::runtime_call_type); ++ add_call_info_here(info); ++} ++ ++void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { ++ LIR_Opr temp; ++ ++ if (type == T_LONG || type == T_DOUBLE) ++ temp = FrameMap::scr1_long_opr; ++ else ++ temp = FrameMap::scr1_opr; ++ ++ stack2reg(src, temp, src->type()); ++ reg2stack(temp, dest, dest->type(), false); ++} ++ ++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) { ++ LIR_Address* addr = src->as_address_ptr(); ++ LIR_Address* from_addr = src->as_address_ptr(); ++ ++ if (addr->base()->type() == T_OBJECT) { ++ __ verify_oop(addr->base()->as_pointer_register()); ++ } ++ ++ if (patch_code != lir_patch_none) { ++ deoptimize_trap(info); ++ return; ++ } ++ ++ if (info != NULL) { ++ add_debug_info_for_null_check_here(info); ++ } ++ int null_check_here = code_offset(); ++ switch (type) { ++ case T_FLOAT: ++ __ fld_s(dest->as_float_reg(), as_Address(from_addr)); ++ break; ++ case T_DOUBLE: ++ __ fld_d(dest->as_double_reg(), as_Address(from_addr)); ++ break; ++ case T_ARRAY: // fall through ++ case T_OBJECT: // fall through ++ if (UseCompressedOops && !wide) { ++ __ ld_wu(dest->as_register(), as_Address(from_addr)); ++ } else { ++ __ ld_ptr(dest->as_register(), as_Address(from_addr)); ++ } ++ break; ++ case T_METADATA: ++ // We get here to store a method pointer to the stack to pass to ++ // a dtrace runtime call. This can't work on 64 bit with ++ // compressed klass ptrs: T_METADATA can be a compressed klass ++ // ptr or a 64 bit method pointer. ++ ShouldNotReachHere(); ++ __ ld_ptr(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_ADDRESS: ++ // FIXME: OMG this is a horrible kludge. Any offset from an ++ // address that matches klass_offset_in_bytes() will be loaded ++ // as a word, not a long. ++ if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { ++ __ ld_wu(dest->as_register(), as_Address(from_addr)); ++ } else { ++ __ ld_ptr(dest->as_register(), as_Address(from_addr)); ++ } ++ break; ++ case T_INT: ++ __ ld_w(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_LONG: ++ __ ld_ptr(dest->as_register_lo(), as_Address_lo(from_addr)); ++ break; ++ case T_BYTE: ++ __ ld_b(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_BOOLEAN: ++ __ ld_bu(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_CHAR: ++ __ ld_hu(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_SHORT: ++ __ ld_h(dest->as_register(), as_Address(from_addr)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ if (is_reference_type(type)) { ++ if (UseCompressedOops && !wide) { ++ __ decode_heap_oop(dest->as_register()); ++ } ++ ++ // Load barrier has not yet been applied, so ZGC can't verify the oop here ++ __ verify_oop(dest->as_register()); ++ } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) { ++ if (UseCompressedClassPointers) { ++ __ decode_klass_not_null(dest->as_register()); ++ } ++ } ++} ++ ++void LIR_Assembler::prefetchr(LIR_Opr src) { Unimplemented(); } ++ ++void LIR_Assembler::prefetchw(LIR_Opr src) { Unimplemented(); } ++ ++int LIR_Assembler::array_element_size(BasicType type) const { ++ int elem_size = type2aelembytes(type); ++ return exact_log2(elem_size); ++} ++ ++void LIR_Assembler::emit_op3(LIR_Op3* op) { ++ switch (op->code()) { ++ case lir_idiv: ++ case lir_irem: ++ arithmetic_idiv(op->code(), op->in_opr1(), op->in_opr2(), op->in_opr3(), ++ op->result_opr(), op->info()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++} ++ ++void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { ++#ifdef ASSERT ++ assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label"); ++ if (op->block() != NULL) _branch_target_blocks.append(op->block()); ++ assert(op->cond() == lir_cond_always, "must be"); ++#endif ++ ++ if (op->info() != NULL) ++ add_debug_info_for_branch(op->info()); ++ ++ __ b_far(*(op->label())); ++} ++ ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++#ifdef ASSERT ++ assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label"); ++ if (op->block() != NULL) _branch_target_blocks.append(op->block()); ++ if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock()); ++#endif ++ ++ if (op->info() != NULL) { ++ assert(op->in_opr1()->is_address() || op->in_opr2()->is_address(), ++ "shouldn't be codeemitinfo for non-address operands"); ++ add_debug_info_for_null_check_here(op->info()); // exception possible ++ } ++ ++ Label& L = *(op->label()); ++ Assembler::Condition acond; ++ LIR_Opr opr1 = op->in_opr1(); ++ LIR_Opr opr2 = op->in_opr2(); ++ assert(op->condition() != lir_cond_always, "must be"); ++ ++ if (op->code() == lir_cmp_float_branch) { ++ bool is_unordered = (op->ublock() == op->block()); ++ if (opr1->is_single_fpu()) { ++ FloatRegister reg1 = opr1->as_float_reg(); ++ assert(opr2->is_single_fpu(), "expect single float register"); ++ FloatRegister reg2 = opr2->as_float_reg(); ++ switch(op->condition()) { ++ case lir_cond_equal: ++ if (is_unordered) ++ __ fcmp_cueq_s(FCC0, reg1, reg2); ++ else ++ __ fcmp_ceq_s(FCC0, reg1, reg2); ++ break; ++ case lir_cond_notEqual: ++ if (is_unordered) ++ __ fcmp_cune_s(FCC0, reg1, reg2); ++ else ++ __ fcmp_cne_s(FCC0, reg1, reg2); ++ break; ++ case lir_cond_less: ++ if (is_unordered) ++ __ fcmp_cult_s(FCC0, reg1, reg2); ++ else ++ __ fcmp_clt_s(FCC0, reg1, reg2); ++ break; ++ case lir_cond_lessEqual: ++ if (is_unordered) ++ __ fcmp_cule_s(FCC0, reg1, reg2); ++ else ++ __ fcmp_cle_s(FCC0, reg1, reg2); ++ break; ++ case lir_cond_greaterEqual: ++ if (is_unordered) ++ __ fcmp_cule_s(FCC0, reg2, reg1); ++ else ++ __ fcmp_cle_s(FCC0, reg2, reg1); ++ break; ++ case lir_cond_greater: ++ if (is_unordered) ++ __ fcmp_cult_s(FCC0, reg2, reg1); ++ else ++ __ fcmp_clt_s(FCC0, reg2, reg1); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (opr1->is_double_fpu()) { ++ FloatRegister reg1 = opr1->as_double_reg(); ++ assert(opr2->is_double_fpu(), "expect double float register"); ++ FloatRegister reg2 = opr2->as_double_reg(); ++ switch(op->condition()) { ++ case lir_cond_equal: ++ if (is_unordered) ++ __ fcmp_cueq_d(FCC0, reg1, reg2); ++ else ++ __ fcmp_ceq_d(FCC0, reg1, reg2); ++ break; ++ case lir_cond_notEqual: ++ if (is_unordered) ++ __ fcmp_cune_d(FCC0, reg1, reg2); ++ else ++ __ fcmp_cne_d(FCC0, reg1, reg2); ++ break; ++ case lir_cond_less: ++ if (is_unordered) ++ __ fcmp_cult_d(FCC0, reg1, reg2); ++ else ++ __ fcmp_clt_d(FCC0, reg1, reg2); ++ break; ++ case lir_cond_lessEqual: ++ if (is_unordered) ++ __ fcmp_cule_d(FCC0, reg1, reg2); ++ else ++ __ fcmp_cle_d(FCC0, reg1, reg2); ++ break; ++ case lir_cond_greaterEqual: ++ if (is_unordered) ++ __ fcmp_cule_d(FCC0, reg2, reg1); ++ else ++ __ fcmp_cle_d(FCC0, reg2, reg1); ++ break; ++ case lir_cond_greater: ++ if (is_unordered) ++ __ fcmp_cult_d(FCC0, reg2, reg1); ++ else ++ __ fcmp_clt_d(FCC0, reg2, reg1); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ __ bcnez(FCC0, L); ++ } else { ++ if (opr1->is_constant() && opr2->is_single_cpu()) { ++ // tableswitch ++ Unimplemented(); ++ } else if (opr1->is_single_cpu() || opr1->is_double_cpu()) { ++ Register reg1 = as_reg(opr1); ++ Register reg2 = noreg; ++ jlong imm2 = 0; ++ if (opr2->is_single_cpu()) { ++ // cpu register - cpu register ++ reg2 = opr2->as_register(); ++ } else if (opr2->is_double_cpu()) { ++ // cpu register - cpu register ++ reg2 = opr2->as_register_lo(); ++ } else if (opr2->is_constant()) { ++ switch(opr2->type()) { ++ case T_INT: ++ case T_ADDRESS: ++ imm2 = opr2->as_constant_ptr()->as_jint(); ++ break; ++ case T_LONG: ++ imm2 = opr2->as_constant_ptr()->as_jlong(); ++ break; ++ case T_METADATA: ++ imm2 = (intptr_t)opr2->as_constant_ptr()->as_metadata(); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (opr2->as_constant_ptr()->as_jobject() != NULL) { ++ reg2 = SCR1; ++ jobject2reg(opr2->as_constant_ptr()->as_jobject(), reg2); ++ } else { ++ reg2 = R0; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ if (reg2 == noreg) { ++ if (imm2 == 0) { ++ reg2 = R0; ++ } else { ++ reg2 = SCR1; ++ __ li(reg2, imm2); ++ } ++ } ++ switch (op->condition()) { ++ case lir_cond_equal: ++ __ beq_far(reg1, reg2, L); break; ++ case lir_cond_notEqual: ++ __ bne_far(reg1, reg2, L); break; ++ case lir_cond_less: ++ __ blt_far(reg1, reg2, L, true); break; ++ case lir_cond_lessEqual: ++ __ bge_far(reg2, reg1, L, true); break; ++ case lir_cond_greaterEqual: ++ __ bge_far(reg1, reg2, L, true); break; ++ case lir_cond_greater: ++ __ blt_far(reg2, reg1, L, true); break; ++ case lir_cond_belowEqual: ++ __ bge_far(reg2, reg1, L, false); break; ++ case lir_cond_aboveEqual: ++ __ bge_far(reg1, reg2, L, false); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ } ++} ++ ++void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { ++ LIR_Opr src = op->in_opr(); ++ LIR_Opr dest = op->result_opr(); ++ LIR_Opr tmp = op->tmp(); ++ ++ switch (op->bytecode()) { ++ case Bytecodes::_i2f: ++ __ movgr2fr_w(dest->as_float_reg(), src->as_register()); ++ __ ffint_s_w(dest->as_float_reg(), dest->as_float_reg()); ++ break; ++ case Bytecodes::_i2d: ++ __ movgr2fr_w(dest->as_double_reg(), src->as_register()); ++ __ ffint_d_w(dest->as_double_reg(), dest->as_double_reg()); ++ break; ++ case Bytecodes::_l2d: ++ __ movgr2fr_d(dest->as_double_reg(), src->as_register_lo()); ++ __ ffint_d_l(dest->as_double_reg(), dest->as_double_reg()); ++ break; ++ case Bytecodes::_l2f: ++ __ movgr2fr_d(dest->as_float_reg(), src->as_register_lo()); ++ __ ffint_s_l(dest->as_float_reg(), dest->as_float_reg()); ++ break; ++ case Bytecodes::_f2d: ++ __ fcvt_d_s(dest->as_double_reg(), src->as_float_reg()); ++ break; ++ case Bytecodes::_d2f: ++ __ fcvt_s_d(dest->as_float_reg(), src->as_double_reg()); ++ break; ++ case Bytecodes::_i2c: ++ __ bstrpick_w(dest->as_register(), src->as_register(), 15, 0); ++ break; ++ case Bytecodes::_i2l: ++ _masm->block_comment("FIXME: This could be a no-op"); ++ __ slli_w(dest->as_register_lo(), src->as_register(), 0); ++ break; ++ case Bytecodes::_i2s: ++ __ ext_w_h(dest->as_register(), src->as_register()); ++ break; ++ case Bytecodes::_i2b: ++ __ ext_w_b(dest->as_register(), src->as_register()); ++ break; ++ case Bytecodes::_l2i: ++ __ slli_w(dest->as_register(), src->as_register_lo(), 0); ++ break; ++ case Bytecodes::_d2l: ++ __ ftintrz_l_d(tmp->as_double_reg(), src->as_double_reg()); ++ __ movfr2gr_d(dest->as_register_lo(), tmp->as_double_reg()); ++ break; ++ case Bytecodes::_f2i: ++ __ ftintrz_w_s(tmp->as_float_reg(), src->as_float_reg()); ++ __ movfr2gr_s(dest->as_register(), tmp->as_float_reg()); ++ break; ++ case Bytecodes::_f2l: ++ __ ftintrz_l_s(tmp->as_float_reg(), src->as_float_reg()); ++ __ movfr2gr_d(dest->as_register_lo(), tmp->as_float_reg()); ++ break; ++ case Bytecodes::_d2i: ++ __ ftintrz_w_d(tmp->as_double_reg(), src->as_double_reg()); ++ __ movfr2gr_s(dest->as_register(), tmp->as_double_reg()); ++ break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { ++ if (op->init_check()) { ++ __ ld_bu(SCR1, Address(op->klass()->as_register(), InstanceKlass::init_state_offset())); ++ __ li(SCR2, InstanceKlass::fully_initialized); ++ add_debug_info_for_null_check_here(op->stub()->info()); ++ __ bne_far(SCR1, SCR2, *op->stub()->entry()); ++ } ++ __ allocate_object(op->obj()->as_register(), op->tmp1()->as_register(), ++ op->tmp2()->as_register(), op->header_size(), ++ op->object_size(), op->klass()->as_register(), ++ *op->stub()->entry()); ++ __ bind(*op->stub()->continuation()); ++} ++ ++void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { ++ Register len = op->len()->as_register(); ++ if (UseSlowPath || ++ (!UseFastNewObjectArray && is_reference_type(op->type())) || ++ (!UseFastNewTypeArray && !is_reference_type(op->type()))) { ++ __ b(*op->stub()->entry()); ++ } else { ++ Register tmp1 = op->tmp1()->as_register(); ++ Register tmp2 = op->tmp2()->as_register(); ++ Register tmp3 = op->tmp3()->as_register(); ++ if (len == tmp1) { ++ tmp1 = tmp3; ++ } else if (len == tmp2) { ++ tmp2 = tmp3; ++ } else if (len == tmp3) { ++ // everything is ok ++ } else { ++ __ move(tmp3, len); ++ } ++ __ allocate_array(op->obj()->as_register(), len, tmp1, tmp2, ++ arrayOopDesc::header_size(op->type()), ++ array_element_size(op->type()), ++ op->klass()->as_register(), ++ *op->stub()->entry()); ++ } ++ __ bind(*op->stub()->continuation()); ++} ++ ++void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, ++ Register recv, Label* update_done) { ++ for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { ++ Label next_test; ++ // See if the receiver is receiver[n]. ++ __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); ++ __ ld_ptr(SCR1, Address(SCR2)); ++ __ bne(recv, SCR1, next_test); ++ Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); ++ __ ld_ptr(SCR2, data_addr); ++ __ addi_d(SCR2, SCR2, DataLayout::counter_increment); ++ __ st_ptr(SCR2, data_addr); ++ __ b(*update_done); ++ __ bind(next_test); ++ } ++ ++ // Didn't find receiver; find next empty slot and fill it in ++ for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { ++ Label next_test; ++ __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); ++ Address recv_addr(SCR2); ++ __ ld_ptr(SCR1, recv_addr); ++ __ bnez(SCR1, next_test); ++ __ st_ptr(recv, recv_addr); ++ __ li(SCR1, DataLayout::counter_increment); ++ __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)))); ++ __ st_ptr(SCR1, Address(SCR2)); ++ __ b(*update_done); ++ __ bind(next_test); ++ } ++} ++ ++void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, ++ Label* failure, Label* obj_is_null) { ++ // we always need a stub for the failure case. ++ CodeStub* stub = op->stub(); ++ Register obj = op->object()->as_register(); ++ Register k_RInfo = op->tmp1()->as_register(); ++ Register klass_RInfo = op->tmp2()->as_register(); ++ Register dst = op->result_opr()->as_register(); ++ ciKlass* k = op->klass(); ++ Register Rtmp1 = noreg; ++ ++ // check if it needs to be profiled ++ ciMethodData* md; ++ ciProfileData* data; ++ ++ const bool should_profile = op->should_profile(); ++ ++ if (should_profile) { ++ ciMethod* method = op->profiled_method(); ++ assert(method != NULL, "Should have method"); ++ int bci = op->profiled_bci(); ++ md = method->method_data_or_null(); ++ assert(md != NULL, "Sanity"); ++ data = md->bci_to_data(bci); ++ assert(data != NULL, "need data for type check"); ++ assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); ++ } ++ ++ Label profile_cast_success, profile_cast_failure; ++ Label *success_target = should_profile ? &profile_cast_success : success; ++ Label *failure_target = should_profile ? &profile_cast_failure : failure; ++ ++ if (obj == k_RInfo) { ++ k_RInfo = dst; ++ } else if (obj == klass_RInfo) { ++ klass_RInfo = dst; ++ } ++ if (k->is_loaded() && !UseCompressedClassPointers) { ++ select_different_registers(obj, dst, k_RInfo, klass_RInfo); ++ } else { ++ Rtmp1 = op->tmp3()->as_register(); ++ select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1); ++ } ++ ++ assert_different_registers(obj, k_RInfo, klass_RInfo); ++ ++ if (should_profile) { ++ Label not_null; ++ __ bnez(obj, not_null); ++ // Object is null; update MDO and exit ++ Register mdo = klass_RInfo; ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); ++ __ ld_bu(SCR2, data_addr); ++ __ ori(SCR2, SCR2, BitData::null_seen_byte_constant()); ++ __ st_b(SCR2, data_addr); ++ __ b(*obj_is_null); ++ __ bind(not_null); ++ } else { ++ __ beqz(obj, *obj_is_null); ++ } ++ ++ if (!k->is_loaded()) { ++ klass2reg_with_patching(k_RInfo, op->info_for_patch()); ++ } else { ++ __ mov_metadata(k_RInfo, k->constant_encoding()); ++ } ++ __ verify_oop(obj); ++ ++ if (op->fast_check()) { ++ // get object class ++ // not a safepoint as obj null check happens earlier ++ __ load_klass(SCR2, obj); ++ __ bne_far(SCR2, k_RInfo, *failure_target); ++ // successful cast, fall through to profile or jump ++ } else { ++ // get object class ++ // not a safepoint as obj null check happens earlier ++ __ load_klass(klass_RInfo, obj); ++ if (k->is_loaded()) { ++ // See if we get an immediate positive hit ++ __ ld_ptr(SCR1, Address(klass_RInfo, int64_t(k->super_check_offset()))); ++ if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) { ++ __ bne_far(k_RInfo, SCR1, *failure_target); ++ // successful cast, fall through to profile or jump ++ } else { ++ // See if we get an immediate positive hit ++ __ beq_far(k_RInfo, SCR1, *success_target); ++ // check for self ++ __ beq_far(klass_RInfo, k_RInfo, *success_target); ++ ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); ++ __ ld_ptr(klass_RInfo, Address(SP, 0 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ // result is a boolean ++ __ beqz(klass_RInfo, *failure_target); ++ // successful cast, fall through to profile or jump ++ } ++ } else { ++ // perform the fast part of the checking logic ++ __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); ++ // call out-of-line instance of __ check_klass_subtype_slow_path(...): ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); ++ __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ // result is a boolean ++ __ beqz(k_RInfo, *failure_target); ++ // successful cast, fall through to profile or jump ++ } ++ } ++ if (should_profile) { ++ Register mdo = klass_RInfo, recv = k_RInfo; ++ __ bind(profile_cast_success); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ __ load_klass(recv, obj); ++ Label update_done; ++ type_profile_helper(mdo, md, data, recv, success); ++ __ b(*success); ++ ++ __ bind(profile_cast_failure); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address counter_addr = Address(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); ++ __ ld_ptr(SCR2, counter_addr); ++ __ addi_d(SCR2, SCR2, -DataLayout::counter_increment); ++ __ st_ptr(SCR2, counter_addr); ++ __ b(*failure); ++ } ++ __ b(*success); ++} ++ ++void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { ++ const bool should_profile = op->should_profile(); ++ ++ LIR_Code code = op->code(); ++ if (code == lir_store_check) { ++ Register value = op->object()->as_register(); ++ Register array = op->array()->as_register(); ++ Register k_RInfo = op->tmp1()->as_register(); ++ Register klass_RInfo = op->tmp2()->as_register(); ++ Register Rtmp1 = op->tmp3()->as_register(); ++ CodeStub* stub = op->stub(); ++ ++ // check if it needs to be profiled ++ ciMethodData* md; ++ ciProfileData* data; ++ ++ if (should_profile) { ++ ciMethod* method = op->profiled_method(); ++ assert(method != NULL, "Should have method"); ++ int bci = op->profiled_bci(); ++ md = method->method_data_or_null(); ++ assert(md != NULL, "Sanity"); ++ data = md->bci_to_data(bci); ++ assert(data != NULL, "need data for type check"); ++ assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); ++ } ++ Label profile_cast_success, profile_cast_failure, done; ++ Label *success_target = should_profile ? &profile_cast_success : &done; ++ Label *failure_target = should_profile ? &profile_cast_failure : stub->entry(); ++ ++ if (should_profile) { ++ Label not_null; ++ __ bnez(value, not_null); ++ // Object is null; update MDO and exit ++ Register mdo = klass_RInfo; ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); ++ __ ld_bu(SCR2, data_addr); ++ __ ori(SCR2, SCR2, BitData::null_seen_byte_constant()); ++ __ st_b(SCR2, data_addr); ++ __ b(done); ++ __ bind(not_null); ++ } else { ++ __ beqz(value, done); ++ } ++ ++ add_debug_info_for_null_check_here(op->info_for_exception()); ++ __ load_klass(k_RInfo, array); ++ __ load_klass(klass_RInfo, value); ++ ++ // get instance klass (it's already uncompressed) ++ __ ld_ptr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset())); ++ // perform the fast part of the checking logic ++ __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); ++ // call out-of-line instance of __ check_klass_subtype_slow_path(...): ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); ++ __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize)); ++ __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ // result is a boolean ++ __ beqz(k_RInfo, *failure_target); ++ // fall through to the success case ++ ++ if (should_profile) { ++ Register mdo = klass_RInfo, recv = k_RInfo; ++ __ bind(profile_cast_success); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ __ load_klass(recv, value); ++ Label update_done; ++ type_profile_helper(mdo, md, data, recv, &done); ++ __ b(done); ++ ++ __ bind(profile_cast_failure); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); ++ __ lea(SCR2, counter_addr); ++ __ ld_ptr(SCR1, Address(SCR2)); ++ __ addi_d(SCR1, SCR1, -DataLayout::counter_increment); ++ __ st_ptr(SCR1, Address(SCR2)); ++ __ b(*stub->entry()); ++ } ++ ++ __ bind(done); ++ } else if (code == lir_checkcast) { ++ Register obj = op->object()->as_register(); ++ Register dst = op->result_opr()->as_register(); ++ Label success; ++ emit_typecheck_helper(op, &success, op->stub()->entry(), &success); ++ __ bind(success); ++ if (dst != obj) { ++ __ move(dst, obj); ++ } ++ } else if (code == lir_instanceof) { ++ Register obj = op->object()->as_register(); ++ Register dst = op->result_opr()->as_register(); ++ Label success, failure, done; ++ emit_typecheck_helper(op, &success, &failure, &failure); ++ __ bind(failure); ++ __ move(dst, R0); ++ __ b(done); ++ __ bind(success); ++ __ li(dst, 1); ++ __ bind(done); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::casw(Register addr, Register newval, Register cmpval, bool sign) { ++ __ cmpxchg32(Address(addr, 0), cmpval, newval, SCR1, sign, ++ /* retold */ false, /* barrier */ true); ++} ++ ++void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) { ++ __ cmpxchg(Address(addr, 0), cmpval, newval, SCR1, ++ /* retold */ false, /* barrier */ true); ++} ++ ++void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { ++ assert(VM_Version::supports_cx8(), "wrong machine"); ++ Register addr; ++ if (op->addr()->is_register()) { ++ addr = as_reg(op->addr()); ++ } else { ++ assert(op->addr()->is_address(), "what else?"); ++ LIR_Address* addr_ptr = op->addr()->as_address_ptr(); ++ assert(addr_ptr->disp() == 0, "need 0 disp"); ++ assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index"); ++ addr = as_reg(addr_ptr->base()); ++ } ++ Register newval = as_reg(op->new_value()); ++ Register cmpval = as_reg(op->cmp_value()); ++ ++ if (op->code() == lir_cas_obj) { ++ if (UseCompressedOops) { ++ Register t1 = op->tmp1()->as_register(); ++ assert(op->tmp1()->is_valid(), "must be"); ++ __ encode_heap_oop(t1, cmpval); ++ cmpval = t1; ++ __ encode_heap_oop(SCR2, newval); ++ newval = SCR2; ++ casw(addr, newval, cmpval, false); ++ } else { ++ casl(addr, newval, cmpval); ++ } ++ } else if (op->code() == lir_cas_int) { ++ casw(addr, newval, cmpval, true); ++ } else { ++ casl(addr, newval, cmpval); ++ } ++} ++ ++void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, ++ LIR_Opr result, BasicType type) { ++ Unimplemented(); ++} ++ ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, ++ LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ assert(result->is_single_cpu() || result->is_double_cpu(), "expect single register for result"); ++ assert(left->is_single_cpu() || left->is_double_cpu(), "must be"); ++ Register regd = (result->type() == T_LONG) ? result->as_register_lo() : result->as_register(); ++ Register regl = as_reg(left); ++ Register regr = noreg; ++ Register reg1 = noreg; ++ Register reg2 = noreg; ++ jlong immr = 0; ++ ++ // comparison operands ++ if (right->is_single_cpu()) { ++ // cpu register - cpu register ++ regr = right->as_register(); ++ } else if (right->is_double_cpu()) { ++ // cpu register - cpu register ++ regr = right->as_register_lo(); ++ } else if (right->is_constant()) { ++ switch(right->type()) { ++ case T_INT: ++ case T_ADDRESS: ++ immr = right->as_constant_ptr()->as_jint(); ++ break; ++ case T_LONG: ++ immr = right->as_constant_ptr()->as_jlong(); ++ break; ++ case T_METADATA: ++ immr = (intptr_t)right->as_constant_ptr()->as_metadata(); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (right->as_constant_ptr()->as_jobject() != NULL) { ++ regr = SCR1; ++ jobject2reg(right->as_constant_ptr()->as_jobject(), regr); ++ } else { ++ immr = 0; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ ++ if (regr == noreg) { ++ switch (condition) { ++ case lir_cond_equal: ++ case lir_cond_notEqual: ++ if (!Assembler::is_simm(-immr, 12)) { ++ regr = SCR1; ++ __ li(regr, immr); ++ } ++ break; ++ default: ++ if (!Assembler::is_simm(immr, 12)) { ++ regr = SCR1; ++ __ li(regr, immr); ++ } ++ } ++ } ++ ++ // special cases ++ if (src1->is_constant() && src2->is_constant()) { ++ jlong val1 = 0, val2 = 0; ++ if (src1->type() == T_INT && src2->type() == T_INT) { ++ val1 = src1->as_jint(); ++ val2 = src2->as_jint(); ++ } else if (src1->type() == T_LONG && src2->type() == T_LONG) { ++ val1 = src1->as_jlong(); ++ val2 = src2->as_jlong(); ++ } ++ if (val1 == 0 && val2 == 1) { ++ if (regr == noreg) { ++ switch (condition) { ++ case lir_cond_equal: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ } else { ++ __ addi_d(SCR1, regl, -immr); ++ __ li(regd, 1); ++ __ maskeqz(regd, regd, SCR1); ++ } ++ break; ++ case lir_cond_notEqual: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ __ xori(regd, regd, 1); ++ } else { ++ __ addi_d(SCR1, regl, -immr); ++ __ li(regd, 1); ++ __ masknez(regd, regd, SCR1); ++ } ++ break; ++ case lir_cond_less: ++ __ slti(regd, regl, immr); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_lessEqual: ++ if (immr == 0) { ++ __ slt(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ slt(regd, SCR1, regl); ++ } ++ break; ++ case lir_cond_greater: ++ if (immr == 0) { ++ __ slt(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ slt(regd, SCR1, regl); ++ } ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_greaterEqual: ++ __ slti(regd, regl, immr); ++ break; ++ case lir_cond_belowEqual: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ sltu(regd, SCR1, regl); ++ } ++ break; ++ case lir_cond_aboveEqual: ++ __ sltui(regd, regl, immr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ switch (condition) { ++ case lir_cond_equal: ++ __ sub_d(SCR1, regl, regr); ++ __ li(regd, 1); ++ __ maskeqz(regd, regd, SCR1); ++ break; ++ case lir_cond_notEqual: ++ __ sub_d(SCR1, regl, regr); ++ __ li(regd, 1); ++ __ masknez(regd, regd, SCR1); ++ break; ++ case lir_cond_less: ++ __ slt(regd, regl, regr); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_lessEqual: ++ __ slt(regd, regr, regl); ++ break; ++ case lir_cond_greater: ++ __ slt(regd, regr, regl); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_greaterEqual: ++ __ slt(regd, regl, regr); ++ break; ++ case lir_cond_belowEqual: ++ __ sltu(regd, regr, regl); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltu(regd, regl, regr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ return; ++ } else if (val1 == 1 && val2 == 0) { ++ if (regr == noreg) { ++ switch (condition) { ++ case lir_cond_equal: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ __ xori(regd, regd, 1); ++ } else { ++ __ addi_d(SCR1, regl, -immr); ++ __ li(regd, 1); ++ __ masknez(regd, regd, SCR1); ++ } ++ break; ++ case lir_cond_notEqual: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ } else { ++ __ addi_d(SCR1, regl, -immr); ++ __ li(regd, 1); ++ __ maskeqz(regd, regd, SCR1); ++ } ++ break; ++ case lir_cond_less: ++ __ slti(regd, regl, immr); ++ break; ++ case lir_cond_lessEqual: ++ if (immr == 0) { ++ __ slt(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ slt(regd, SCR1, regl); ++ } ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_greater: ++ if (immr == 0) { ++ __ slt(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ slt(regd, SCR1, regl); ++ } ++ break; ++ case lir_cond_greaterEqual: ++ __ slti(regd, regl, immr); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_belowEqual: ++ if (immr == 0) { ++ __ sltu(regd, R0, regl); ++ } else { ++ __ li(SCR1, immr); ++ __ sltu(regd, SCR1, regl); ++ } ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltui(regd, regl, immr); ++ __ xori(regd, regd, 1); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ switch (condition) { ++ case lir_cond_equal: ++ __ sub_d(SCR1, regl, regr); ++ __ li(regd, 1); ++ __ masknez(regd, regd, SCR1); ++ break; ++ case lir_cond_notEqual: ++ __ sub_d(SCR1, regl, regr); ++ __ li(regd, 1); ++ __ maskeqz(regd, regd, SCR1); ++ break; ++ case lir_cond_less: ++ __ slt(regd, regl, regr); ++ break; ++ case lir_cond_lessEqual: ++ __ slt(regd, regr, regl); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_greater: ++ __ slt(regd, regr, regl); ++ break; ++ case lir_cond_greaterEqual: ++ __ slt(regd, regl, regr); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_belowEqual: ++ __ sltu(regd, regr, regl); ++ __ xori(regd, regd, 1); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltu(regd, regl, regr); ++ __ xori(regd, regd, 1); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ return; ++ } ++ } ++ ++ // cmp ++ if (regr == noreg) { ++ switch (condition) { ++ case lir_cond_equal: ++ __ addi_d(SCR2, regl, -immr); ++ break; ++ case lir_cond_notEqual: ++ __ addi_d(SCR2, regl, -immr); ++ break; ++ case lir_cond_less: ++ __ slti(SCR2, regl, immr); ++ break; ++ case lir_cond_lessEqual: ++ __ li(SCR1, immr); ++ __ slt(SCR2, SCR1, regl); ++ break; ++ case lir_cond_greater: ++ __ li(SCR1, immr); ++ __ slt(SCR2, SCR1, regl); ++ break; ++ case lir_cond_greaterEqual: ++ __ slti(SCR2, regl, immr); ++ break; ++ case lir_cond_belowEqual: ++ __ li(SCR1, immr); ++ __ sltu(SCR2, SCR1, regl); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltui(SCR2, regl, immr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ switch (condition) { ++ case lir_cond_equal: ++ __ sub_d(SCR2, regl, regr); ++ break; ++ case lir_cond_notEqual: ++ __ sub_d(SCR2, regl, regr); ++ break; ++ case lir_cond_less: ++ __ slt(SCR2, regl, regr); ++ break; ++ case lir_cond_lessEqual: ++ __ slt(SCR2, regr, regl); ++ break; ++ case lir_cond_greater: ++ __ slt(SCR2, regr, regl); ++ break; ++ case lir_cond_greaterEqual: ++ __ slt(SCR2, regl, regr); ++ break; ++ case lir_cond_belowEqual: ++ __ sltu(SCR2, regr, regl); ++ break; ++ case lir_cond_aboveEqual: ++ __ sltu(SCR2, regl, regr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ ++ // value operands ++ if (src1->is_stack()) { ++ stack2reg(src1, result, result->type()); ++ reg1 = regd; ++ } else if (src1->is_constant()) { ++ const2reg(src1, result, lir_patch_none, NULL); ++ reg1 = regd; ++ } else { ++ reg1 = (src1->type() == T_LONG) ? src1->as_register_lo() : src1->as_register(); ++ } ++ ++ if (src2->is_stack()) { ++ stack2reg(src2, FrameMap::scr1_opr, result->type()); ++ reg2 = SCR1; ++ } else if (src2->is_constant()) { ++ LIR_Opr tmp = src2->type() == T_LONG ? FrameMap::scr1_long_opr : FrameMap::scr1_opr; ++ const2reg(src2, tmp, lir_patch_none, NULL); ++ reg2 = SCR1; ++ } else { ++ reg2 = (src2->type() == T_LONG) ? src2->as_register_lo() : src2->as_register(); ++ } ++ ++ // cmove ++ switch (condition) { ++ case lir_cond_equal: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_notEqual: ++ __ maskeqz(regd, reg1, SCR2); ++ __ masknez(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_less: ++ __ maskeqz(regd, reg1, SCR2); ++ __ masknez(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_lessEqual: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_greater: ++ __ maskeqz(regd, reg1, SCR2); ++ __ masknez(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_greaterEqual: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_belowEqual: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ case lir_cond_aboveEqual: ++ __ masknez(regd, reg1, SCR2); ++ __ maskeqz(SCR2, reg2, SCR2); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ __ OR(regd, regd, SCR2); ++} ++ ++void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, ++ CodeEmitInfo* info, bool pop_fpu_stack) { ++ assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); ++ ++ if (left->is_single_cpu()) { ++ Register lreg = left->as_register(); ++ Register dreg = as_reg(dest); ++ ++ if (right->is_single_cpu()) { ++ // cpu register - cpu register ++ assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT, "should be"); ++ Register rreg = right->as_register(); ++ switch (code) { ++ case lir_add: __ add_w (dest->as_register(), lreg, rreg); break; ++ case lir_sub: __ sub_w (dest->as_register(), lreg, rreg); break; ++ case lir_mul: __ mul_w (dest->as_register(), lreg, rreg); break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (right->is_double_cpu()) { ++ Register rreg = right->as_register_lo(); ++ // single_cpu + double_cpu: can happen with obj+long ++ assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); ++ switch (code) { ++ case lir_add: __ add_d(dreg, lreg, rreg); break; ++ case lir_sub: __ sub_d(dreg, lreg, rreg); break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (right->is_constant()) { ++ // cpu register - constant ++ jlong c; ++ ++ // FIXME: This is fugly: we really need to factor all this logic. ++ switch(right->type()) { ++ case T_LONG: ++ c = right->as_constant_ptr()->as_jlong(); ++ break; ++ case T_INT: ++ case T_ADDRESS: ++ c = right->as_constant_ptr()->as_jint(); ++ break; ++ default: ++ ShouldNotReachHere(); ++ c = 0; // unreachable ++ break; ++ } ++ ++ assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); ++ if (c == 0 && dreg == lreg) { ++ COMMENT("effective nop elided"); ++ return; ++ } ++ ++ switch(left->type()) { ++ case T_INT: ++ switch (code) { ++ case lir_add: __ addi_w(dreg, lreg, c); break; ++ case lir_sub: __ addi_w(dreg, lreg, -c); break; ++ default: ShouldNotReachHere(); ++ } ++ break; ++ case T_OBJECT: ++ case T_ADDRESS: ++ switch (code) { ++ case lir_add: __ addi_d(dreg, lreg, c); break; ++ case lir_sub: __ addi_d(dreg, lreg, -c); break; ++ default: ShouldNotReachHere(); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (left->is_double_cpu()) { ++ Register lreg_lo = left->as_register_lo(); ++ ++ if (right->is_double_cpu()) { ++ // cpu register - cpu register ++ Register rreg_lo = right->as_register_lo(); ++ switch (code) { ++ case lir_add: __ add_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ case lir_sub: __ sub_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ case lir_mul: __ mul_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ case lir_div: __ div_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ case lir_rem: __ mod_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ } else if (right->is_constant()) { ++ jlong c = right->as_constant_ptr()->as_jlong(); ++ Register dreg = as_reg(dest); ++ switch (code) { ++ case lir_add: ++ case lir_sub: ++ if (c == 0 && dreg == lreg_lo) { ++ COMMENT("effective nop elided"); ++ return; ++ } ++ code == lir_add ? __ addi_d(dreg, lreg_lo, c) : __ addi_d(dreg, lreg_lo, -c); ++ break; ++ case lir_div: ++ assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); ++ if (c == 1) { ++ // move lreg_lo to dreg if divisor is 1 ++ __ move(dreg, lreg_lo); ++ } else { ++ unsigned int shift = exact_log2(c); ++ // use scr1 as intermediate result register ++ __ srai_d(SCR1, lreg_lo, 63); ++ __ srli_d(SCR1, SCR1, 64 - shift); ++ __ add_d(SCR1, lreg_lo, SCR1); ++ __ srai_d(dreg, SCR1, shift); ++ } ++ break; ++ case lir_rem: ++ assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); ++ if (c == 1) { ++ // move 0 to dreg if divisor is 1 ++ __ move(dreg, R0); ++ } else { ++ // use scr1/2 as intermediate result register ++ __ sub_d(SCR1, R0, lreg_lo); ++ __ slt(SCR2, SCR1, R0); ++ __ andi(dreg, lreg_lo, c - 1); ++ __ andi(SCR1, SCR1, c - 1); ++ __ sub_d(SCR1, R0, SCR1); ++ __ maskeqz(dreg, dreg, SCR2); ++ __ masknez(SCR1, SCR1, SCR2); ++ __ OR(dreg, dreg, SCR1); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (left->is_single_fpu()) { ++ assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register"); ++ switch (code) { ++ case lir_add: __ fadd_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_sub: __ fsub_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_mul_strictfp: // fall through ++ case lir_mul: __ fmul_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_div_strictfp: // fall through ++ case lir_div: __ fdiv_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (left->is_double_fpu()) { ++ if (right->is_double_fpu()) { ++ // fpu register - fpu register ++ switch (code) { ++ case lir_add: __ fadd_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_sub: __ fsub_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_mul_strictfp: // fall through ++ case lir_mul: __ fmul_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_div_strictfp: // fall through ++ case lir_div: __ fdiv_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ default: ShouldNotReachHere(); ++ } ++ } else { ++ if (right->is_constant()) { ++ ShouldNotReachHere(); ++ } ++ ShouldNotReachHere(); ++ } ++ } else if (left->is_single_stack() || left->is_address()) { ++ assert(left == dest, "left and dest must be equal"); ++ ShouldNotReachHere(); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, ++ int dest_index, bool pop_fpu_stack) { ++ Unimplemented(); ++} ++ ++void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) { ++ switch(code) { ++ case lir_abs : __ fabs_d(dest->as_double_reg(), value->as_double_reg()); break; ++ case lir_sqrt: __ fsqrt_d(dest->as_double_reg(), value->as_double_reg()); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) { ++ assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register"); ++ Register Rleft = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); ++ ++ if (dst->is_single_cpu()) { ++ Register Rdst = dst->as_register(); ++ if (right->is_constant()) { ++ switch (code) { ++ case lir_logic_and: ++ if (Assembler::is_uimm(right->as_jint(), 12)) { ++ __ andi(Rdst, Rleft, right->as_jint()); ++ } else { ++ __ li(AT, right->as_jint()); ++ __ AND(Rdst, Rleft, AT); ++ } ++ break; ++ case lir_logic_or: __ ori(Rdst, Rleft, right->as_jint()); break; ++ case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jint()); break; ++ default: ShouldNotReachHere(); break; ++ } ++ } else { ++ Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo(); ++ switch (code) { ++ case lir_logic_and: __ AND(Rdst, Rleft, Rright); break; ++ case lir_logic_or: __ OR(Rdst, Rleft, Rright); break; ++ case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break; ++ default: ShouldNotReachHere(); break; ++ } ++ } ++ } else { ++ Register Rdst = dst->as_register_lo(); ++ if (right->is_constant()) { ++ switch (code) { ++ case lir_logic_and: ++ if (Assembler::is_uimm(right->as_jlong(), 12)) { ++ __ andi(Rdst, Rleft, right->as_jlong()); ++ } else { ++ // We can guarantee that transform from HIR LogicOp is in range of ++ // uimm(12), but the common code directly generates LIR LogicAnd, ++ // and the right-operand is mask with all ones in the high bits. ++ __ li(AT, right->as_jlong()); ++ __ AND(Rdst, Rleft, AT); ++ } ++ break; ++ case lir_logic_or: __ ori(Rdst, Rleft, right->as_jlong()); break; ++ case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jlong()); break; ++ default: ShouldNotReachHere(); break; ++ } ++ } else { ++ Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo(); ++ switch (code) { ++ case lir_logic_and: __ AND(Rdst, Rleft, Rright); break; ++ case lir_logic_or: __ OR(Rdst, Rleft, Rright); break; ++ case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break; ++ default: ShouldNotReachHere(); break; ++ } ++ } ++ } ++} ++ ++void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, ++ LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) { ++ // opcode check ++ assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem"); ++ bool is_irem = (code == lir_irem); ++ ++ // operand check ++ assert(left->is_single_cpu(), "left must be register"); ++ assert(right->is_single_cpu() || right->is_constant(), "right must be register or constant"); ++ assert(result->is_single_cpu(), "result must be register"); ++ Register lreg = left->as_register(); ++ Register dreg = result->as_register(); ++ ++ // power-of-2 constant check and codegen ++ if (right->is_constant()) { ++ int c = right->as_constant_ptr()->as_jint(); ++ assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); ++ if (is_irem) { ++ if (c == 1) { ++ // move 0 to dreg if divisor is 1 ++ __ move(dreg, R0); ++ } else { ++ // use scr1/2 as intermediate result register ++ __ sub_w(SCR1, R0, lreg); ++ __ slt(SCR2, SCR1, R0); ++ __ andi(dreg, lreg, c - 1); ++ __ andi(SCR1, SCR1, c - 1); ++ __ sub_w(SCR1, R0, SCR1); ++ __ maskeqz(dreg, dreg, SCR2); ++ __ masknez(SCR1, SCR1, SCR2); ++ __ OR(dreg, dreg, SCR1); ++ } ++ } else { ++ if (c == 1) { ++ // move lreg to dreg if divisor is 1 ++ __ move(dreg, lreg); ++ } else { ++ unsigned int shift = exact_log2(c); ++ // use scr1 as intermediate result register ++ __ srai_w(SCR1, lreg, 31); ++ __ srli_w(SCR1, SCR1, 32 - shift); ++ __ add_w(SCR1, lreg, SCR1); ++ __ srai_w(dreg, SCR1, shift); ++ } ++ } ++ } else { ++ Register rreg = right->as_register(); ++ if (is_irem) ++ __ mod_w(dreg, lreg, rreg); ++ else ++ __ div_w(dreg, lreg, rreg); ++ } ++} ++ ++void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) { ++ Unimplemented(); ++} ++ ++void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){ ++ if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) { ++ bool is_unordered_less = (code == lir_ucmp_fd2i); ++ if (left->is_single_fpu()) { ++ if (is_unordered_less) { ++ __ fcmp_clt_s(FCC0, right->as_float_reg(), left->as_float_reg()); ++ __ fcmp_cult_s(FCC1, left->as_float_reg(), right->as_float_reg()); ++ } else { ++ __ fcmp_cult_s(FCC0, right->as_float_reg(), left->as_float_reg()); ++ __ fcmp_clt_s(FCC1, left->as_float_reg(), right->as_float_reg()); ++ } ++ } else if (left->is_double_fpu()) { ++ if (is_unordered_less) { ++ __ fcmp_clt_d(FCC0, right->as_double_reg(), left->as_double_reg()); ++ __ fcmp_cult_d(FCC1, left->as_double_reg(), right->as_double_reg()); ++ } else { ++ __ fcmp_cult_d(FCC0, right->as_double_reg(), left->as_double_reg()); ++ __ fcmp_clt_d(FCC1, left->as_double_reg(), right->as_double_reg()); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ __ movcf2gr(dst->as_register(), FCC0); ++ __ movcf2gr(SCR1, FCC1); ++ __ sub_d(dst->as_register(), dst->as_register(), SCR1); ++ } else if (code == lir_cmp_l2i) { ++ __ slt(SCR1, left->as_register_lo(), right->as_register_lo()); ++ __ slt(dst->as_register(), right->as_register_lo(), left->as_register_lo()); ++ __ sub_d(dst->as_register(), dst->as_register(), SCR1); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::align_call(LIR_Code code) {} ++ ++void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { ++ address call = __ trampoline_call(AddressLiteral(op->addr(), rtype)); ++ if (call == NULL) { ++ bailout("trampoline stub overflow"); ++ return; ++ } ++ add_call_info(code_offset(), op->info()); ++} ++ ++void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { ++ address call = __ ic_call(op->addr()); ++ if (call == NULL) { ++ bailout("trampoline stub overflow"); ++ return; ++ } ++ add_call_info(code_offset(), op->info()); ++} ++ ++/* Currently, vtable-dispatch is only enabled for sparc platforms */ ++void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { ++ ShouldNotReachHere(); ++} ++ ++void LIR_Assembler::emit_static_call_stub() { ++ address call_pc = __ pc(); ++ address stub = __ start_a_stub(call_stub_size); ++ if (stub == NULL) { ++ bailout("static call stub overflow"); ++ return; ++ } ++ ++ int start = __ offset(); ++ ++ __ relocate(static_stub_Relocation::spec(call_pc)); ++ ++ // Code stream for loading method may be changed. ++ __ ibar(0); ++ ++ // Rmethod contains Method*, it should be relocated for GC ++ // static stub relocation also tags the Method* in the code-stream. ++ __ mov_metadata(Rmethod, NULL); ++ // This is recognized as unresolved by relocs/nativeInst/ic code ++ __ patchable_jump(__ pc()); ++ ++ assert(__ offset() - start <= call_stub_size, "stub too big"); ++ __ end_a_stub(); ++} ++ ++void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) { ++ assert(exceptionOop->as_register() == A0, "must match"); ++ assert(exceptionPC->as_register() == A1, "must match"); ++ ++ // exception object is not added to oop map by LinearScan ++ // (LinearScan assumes that no oops are in fixed registers) ++ info->add_register_oop(exceptionOop); ++ Runtime1::StubID unwind_id; ++ ++ // get current pc information ++ // pc is only needed if the method has an exception handler, the unwind code does not need it. ++ if (compilation()->debug_info_recorder()->last_pc_offset() == __ offset()) { ++ // As no instructions have been generated yet for this LIR node it's ++ // possible that an oop map already exists for the current offset. ++ // In that case insert an dummy NOP here to ensure all oop map PCs ++ // are unique. See JDK-8237483. ++ __ nop(); ++ } ++ Label L; ++ int pc_for_athrow_offset = __ offset(); ++ __ bind(L); ++ __ lipc(exceptionPC->as_register(), L); ++ add_call_info(pc_for_athrow_offset, info); // for exception handler ++ ++ __ verify_not_null_oop(A0); ++ // search an exception handler (A0: exception oop, A1: throwing pc) ++ if (compilation()->has_fpu_code()) { ++ unwind_id = Runtime1::handle_exception_id; ++ } else { ++ unwind_id = Runtime1::handle_exception_nofpu_id; ++ } ++ __ call(Runtime1::entry_for(unwind_id), relocInfo::runtime_call_type); ++ ++ // FIXME: enough room for two byte trap ???? ++ __ nop(); ++} ++ ++void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) { ++ assert(exceptionOop->as_register() == A0, "must match"); ++ __ b(_unwind_handler_entry); ++} ++ ++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) { ++ Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); ++ Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); ++ ++ switch (left->type()) { ++ case T_INT: { ++ switch (code) { ++ case lir_shl: __ sll_w(dreg, lreg, count->as_register()); break; ++ case lir_shr: __ sra_w(dreg, lreg, count->as_register()); break; ++ case lir_ushr: __ srl_w(dreg, lreg, count->as_register()); break; ++ default: ShouldNotReachHere(); break; ++ } ++ break; ++ case T_LONG: ++ case T_ADDRESS: ++ case T_OBJECT: ++ switch (code) { ++ case lir_shl: __ sll_d(dreg, lreg, count->as_register()); break; ++ case lir_shr: __ sra_d(dreg, lreg, count->as_register()); break; ++ case lir_ushr: __ srl_d(dreg, lreg, count->as_register()); break; ++ default: ShouldNotReachHere(); break; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++} ++ ++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) { ++ Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); ++ Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); ++ ++ switch (left->type()) { ++ case T_INT: { ++ switch (code) { ++ case lir_shl: __ slli_w(dreg, lreg, count); break; ++ case lir_shr: __ srai_w(dreg, lreg, count); break; ++ case lir_ushr: __ srli_w(dreg, lreg, count); break; ++ default: ShouldNotReachHere(); break; ++ } ++ break; ++ case T_LONG: ++ case T_ADDRESS: ++ case T_OBJECT: ++ switch (code) { ++ case lir_shl: __ slli_d(dreg, lreg, count); break; ++ case lir_shr: __ srai_d(dreg, lreg, count); break; ++ case lir_ushr: __ srli_d(dreg, lreg, count); break; ++ default: ShouldNotReachHere(); break; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++} ++ ++void LIR_Assembler::store_parameter(Register r, int offset_from_sp_in_words) { ++ assert(offset_from_sp_in_words >= 0, "invalid offset from sp"); ++ int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord; ++ assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); ++ __ st_ptr(r, Address(SP, offset_from_sp_in_bytes)); ++} ++ ++void LIR_Assembler::store_parameter(jint c, int offset_from_sp_in_words) { ++ assert(offset_from_sp_in_words >= 0, "invalid offset from sp"); ++ int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord; ++ assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); ++ __ li(SCR2, c); ++ __ st_ptr(SCR2, Address(SP, offset_from_sp_in_bytes)); ++} ++ ++void LIR_Assembler::store_parameter(jobject o, int offset_from_sp_in_words) { ++ ShouldNotReachHere(); ++} ++ ++// This code replaces a call to arraycopy; no exception may ++// be thrown in this code, they must be thrown in the System.arraycopy ++// activation frame; we could save some checks if this would not be the case ++void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { ++ Register j_rarg0 = T0; ++ Register j_rarg1 = A0; ++ Register j_rarg2 = A1; ++ Register j_rarg3 = A2; ++ Register j_rarg4 = A3; ++ ++ ciArrayKlass* default_type = op->expected_type(); ++ Register src = op->src()->as_register(); ++ Register dst = op->dst()->as_register(); ++ Register src_pos = op->src_pos()->as_register(); ++ Register dst_pos = op->dst_pos()->as_register(); ++ Register length = op->length()->as_register(); ++ Register tmp = op->tmp()->as_register(); ++ ++ CodeStub* stub = op->stub(); ++ int flags = op->flags(); ++ BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; ++ if (is_reference_type(basic_type)) ++ basic_type = T_OBJECT; ++ ++ // if we don't know anything, just go through the generic arraycopy ++ if (default_type == NULL) { ++ Label done; ++ assert(src == T0 && src_pos == A0, "mismatch in calling convention"); ++ ++ // Save the arguments in case the generic arraycopy fails and we ++ // have to fall back to the JNI stub ++ __ st_ptr(dst, Address(SP, 0 * BytesPerWord)); ++ __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); ++ __ st_ptr(length, Address(SP, 2 * BytesPerWord)); ++ __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord)); ++ __ st_ptr(src, Address(SP, 4 * BytesPerWord)); ++ ++ address copyfunc_addr = StubRoutines::generic_arraycopy(); ++ ++ // FIXME: LA ++ if (copyfunc_addr == NULL) { ++ // Take a slow path for generic arraycopy. ++ __ b(*stub->entry()); ++ __ bind(*stub->continuation()); ++ return; ++ } ++ ++ // The arguments are in java calling convention so we shift them ++ // to C convention ++ assert_different_registers(A0, j_rarg1, j_rarg2, j_rarg3, j_rarg4); ++ __ move(A0, j_rarg0); ++ assert_different_registers(A1, j_rarg2, j_rarg3, j_rarg4); ++ __ move(A1, j_rarg1); ++ assert_different_registers(A2, j_rarg3, j_rarg4); ++ __ move(A2, j_rarg2); ++ assert_different_registers(A3, j_rarg4); ++ __ move(A3, j_rarg3); ++ __ move(A4, j_rarg4); ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ __ li(SCR2, (address)&Runtime1::_generic_arraycopystub_cnt); ++ __ increment(SCR2, 1); ++ } ++#endif ++ __ call(copyfunc_addr, relocInfo::runtime_call_type); ++ ++ __ beqz(A0, *stub->continuation()); ++ ++ // Reload values from the stack so they are where the stub ++ // expects them. ++ __ ld_ptr(dst, Address(SP, 0 * BytesPerWord)); ++ __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); ++ __ ld_ptr(length, Address(SP, 2 * BytesPerWord)); ++ __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord)); ++ __ ld_ptr(src, Address(SP, 4 * BytesPerWord)); ++ ++ // A0 is -1^K where K == partial copied count ++ __ nor(SCR1, A0, R0); ++ __ slli_w(SCR1, SCR1, 0); ++ // adjust length down and src/end pos up by partial copied count ++ __ sub_w(length, length, SCR1); ++ __ add_w(src_pos, src_pos, SCR1); ++ __ add_w(dst_pos, dst_pos, SCR1); ++ __ b(*stub->entry()); ++ ++ __ bind(*stub->continuation()); ++ return; ++ } ++ ++ assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), ++ "must be true at this point"); ++ ++ int elem_size = type2aelembytes(basic_type); ++ Address::ScaleFactor scale = Address::times(elem_size); ++ ++ Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes()); ++ Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes()); ++ Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes()); ++ Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes()); ++ ++ // test for NULL ++ if (flags & LIR_OpArrayCopy::src_null_check) { ++ __ beqz(src, *stub->entry()); ++ } ++ if (flags & LIR_OpArrayCopy::dst_null_check) { ++ __ beqz(dst, *stub->entry()); ++ } ++ ++ // If the compiler was not able to prove that exact type of the source or the destination ++ // of the arraycopy is an array type, check at runtime if the source or the destination is ++ // an instance type. ++ if (flags & LIR_OpArrayCopy::type_check) { ++ if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) { ++ __ load_klass(tmp, dst); ++ __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset()))); ++ __ li(SCR2, Klass::_lh_neutral_value); ++ __ bge_far(SCR1, SCR2, *stub->entry(), true); ++ } ++ ++ if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) { ++ __ load_klass(tmp, src); ++ __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset()))); ++ __ li(SCR2, Klass::_lh_neutral_value); ++ __ bge_far(SCR1, SCR2, *stub->entry(), true); ++ } ++ } ++ ++ // check if negative ++ if (flags & LIR_OpArrayCopy::src_pos_positive_check) { ++ __ blt_far(src_pos, R0, *stub->entry(), true); ++ } ++ if (flags & LIR_OpArrayCopy::dst_pos_positive_check) { ++ __ blt_far(dst_pos, R0, *stub->entry(), true); ++ } ++ ++ if (flags & LIR_OpArrayCopy::length_positive_check) { ++ __ blt_far(length, R0, *stub->entry(), true); ++ } ++ ++ if (flags & LIR_OpArrayCopy::src_range_check) { ++ __ add_w(tmp, src_pos, length); ++ __ ld_wu(SCR1, src_length_addr); ++ __ blt_far(SCR1, tmp, *stub->entry(), false); ++ } ++ if (flags & LIR_OpArrayCopy::dst_range_check) { ++ __ add_w(tmp, dst_pos, length); ++ __ ld_wu(SCR1, dst_length_addr); ++ __ blt_far(SCR1, tmp, *stub->entry(), false); ++ } ++ ++ if (flags & LIR_OpArrayCopy::type_check) { ++ // We don't know the array types are compatible ++ if (basic_type != T_OBJECT) { ++ // Simple test for basic type arrays ++ if (UseCompressedClassPointers) { ++ __ ld_wu(tmp, src_klass_addr); ++ __ ld_wu(SCR1, dst_klass_addr); ++ } else { ++ __ ld_ptr(tmp, src_klass_addr); ++ __ ld_ptr(SCR1, dst_klass_addr); ++ } ++ __ bne_far(tmp, SCR1, *stub->entry()); ++ } else { ++ // For object arrays, if src is a sub class of dst then we can ++ // safely do the copy. ++ Label cont, slow; ++ ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(dst, Address(SP, 0 * wordSize)); ++ __ st_ptr(src, Address(SP, 1 * wordSize)); ++ ++ __ load_klass(src, src); ++ __ load_klass(dst, dst); ++ ++ __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL); ++ ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(dst, Address(SP, 0 * wordSize)); ++ __ st_ptr(src, Address(SP, 1 * wordSize)); ++ __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); ++ __ ld_ptr(dst, Address(SP, 0 * wordSize)); ++ __ ld_ptr(src, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ ++ __ bnez(dst, cont); ++ ++ __ bind(slow); ++ __ ld_ptr(dst, Address(SP, 0 * wordSize)); ++ __ ld_ptr(src, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ ++ address copyfunc_addr = StubRoutines::checkcast_arraycopy(); ++ if (copyfunc_addr != NULL) { // use stub if available ++ // src is not a sub class of dst so we have to do a ++ // per-element check. ++ ++ int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray; ++ if ((flags & mask) != mask) { ++ // Check that at least both of them object arrays. ++ assert(flags & mask, "one of the two should be known to be an object array"); ++ ++ if (!(flags & LIR_OpArrayCopy::src_objarray)) { ++ __ load_klass(tmp, src); ++ } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { ++ __ load_klass(tmp, dst); ++ } ++ int lh_offset = in_bytes(Klass::layout_helper_offset()); ++ Address klass_lh_addr(tmp, lh_offset); ++ jint objArray_lh = Klass::array_layout_helper(T_OBJECT); ++ __ ld_w(SCR1, klass_lh_addr); ++ __ li(SCR2, objArray_lh); ++ __ XOR(SCR1, SCR1, SCR2); ++ __ bnez(SCR1, *stub->entry()); ++ } ++ ++ // Spill because stubs can use any register they like and it's ++ // easier to restore just those that we care about. ++ __ st_ptr(dst, Address(SP, 0 * BytesPerWord)); ++ __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); ++ __ st_ptr(length, Address(SP, 2 * BytesPerWord)); ++ __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord)); ++ __ st_ptr(src, Address(SP, 4 * BytesPerWord)); ++ ++ __ lea(A0, Address(src, src_pos, scale)); ++ __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type)); ++ assert_different_registers(A0, dst, dst_pos, length); ++ __ lea(A1, Address(dst, dst_pos, scale)); ++ __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type)); ++ assert_different_registers(A1, dst, length); ++ __ bstrpick_d(A2, length, 31, 0); ++ assert_different_registers(A2, dst); ++ ++ __ load_klass(A4, dst); ++ __ ld_ptr(A4, Address(A4, ObjArrayKlass::element_klass_offset())); ++ __ ld_w(A3, Address(A4, Klass::super_check_offset_offset())); ++ __ call(copyfunc_addr, relocInfo::runtime_call_type); ++ ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ Label failed; ++ __ bnez(A0, failed); ++ __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_cnt); ++ __ increment(SCR2, 1); ++ __ bind(failed); ++ } ++#endif ++ ++ __ beqz(A0, *stub->continuation()); ++ ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_attempt_cnt); ++ __ increment(SCR2, 1); ++ } ++#endif ++ assert_different_registers(dst, dst_pos, length, src_pos, src, A0, SCR1); ++ ++ // Restore previously spilled arguments ++ __ ld_ptr(dst, Address(SP, 0 * BytesPerWord)); ++ __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); ++ __ ld_ptr(length, Address(SP, 2 * BytesPerWord)); ++ __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord)); ++ __ ld_ptr(src, Address(SP, 4 * BytesPerWord)); ++ ++ // return value is -1^K where K is partial copied count ++ __ nor(SCR1, A0, R0); ++ __ slli_w(SCR1, SCR1, 0); ++ // adjust length down and src/end pos up by partial copied count ++ __ sub_w(length, length, SCR1); ++ __ add_w(src_pos, src_pos, SCR1); ++ __ add_w(dst_pos, dst_pos, SCR1); ++ } ++ ++ __ b(*stub->entry()); ++ ++ __ bind(cont); ++ __ ld_ptr(dst, Address(SP, 0 * wordSize)); ++ __ ld_ptr(src, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ } ++ } ++ ++#ifdef ASSERT ++ if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { ++ // Sanity check the known type with the incoming class. For the ++ // primitive case the types must match exactly with src.klass and ++ // dst.klass each exactly matching the default type. For the ++ // object array case, if no type check is needed then either the ++ // dst type is exactly the expected type and the src type is a ++ // subtype which we can't check or src is the same array as dst ++ // but not necessarily exactly of type default_type. ++ Label known_ok, halt; ++ __ mov_metadata(tmp, default_type->constant_encoding()); ++ if (UseCompressedClassPointers) { ++ __ encode_klass_not_null(tmp); ++ } ++ ++ if (basic_type != T_OBJECT) { ++ ++ if (UseCompressedClassPointers) { ++ __ ld_wu(SCR1, dst_klass_addr); ++ } else { ++ __ ld_ptr(SCR1, dst_klass_addr); ++ } ++ __ bne(tmp, SCR1, halt); ++ if (UseCompressedClassPointers) { ++ __ ld_wu(SCR1, src_klass_addr); ++ } else { ++ __ ld_ptr(SCR1, src_klass_addr); ++ } ++ __ beq(tmp, SCR1, known_ok); ++ } else { ++ if (UseCompressedClassPointers) { ++ __ ld_wu(SCR1, dst_klass_addr); ++ } else { ++ __ ld_ptr(SCR1, dst_klass_addr); ++ } ++ __ beq(tmp, SCR1, known_ok); ++ __ beq(src, dst, known_ok); ++ } ++ __ bind(halt); ++ __ stop("incorrect type information in arraycopy"); ++ __ bind(known_ok); ++ } ++#endif ++ ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ __ li(SCR2, Runtime1::arraycopy_count_address(basic_type)); ++ __ increment(SCR2, 1); ++ } ++#endif ++ ++ __ lea(A0, Address(src, src_pos, scale)); ++ __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type)); ++ assert_different_registers(A0, dst, dst_pos, length); ++ __ lea(A1, Address(dst, dst_pos, scale)); ++ __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type)); ++ assert_different_registers(A1, length); ++ __ bstrpick_d(A2, length, 31, 0); ++ ++ bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0; ++ bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0; ++ const char *name; ++ address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false); ++ ++ CodeBlob *cb = CodeCache::find_blob(entry); ++ if (cb) { ++ __ call(entry, relocInfo::runtime_call_type); ++ } else { ++ __ call_VM_leaf(entry, 3); ++ } ++ ++ __ bind(*stub->continuation()); ++} ++ ++void LIR_Assembler::emit_lock(LIR_OpLock* op) { ++ Register obj = op->obj_opr()->as_register(); // may not be an oop ++ Register hdr = op->hdr_opr()->as_register(); ++ Register lock = op->lock_opr()->as_register(); ++ if (!UseFastLocking) { ++ __ b(*op->stub()->entry()); ++ } else if (op->code() == lir_lock) { ++ Register scratch = noreg; ++ if (UseBiasedLocking) { ++ scratch = op->scratch_opr()->as_register(); ++ } ++ assert(BasicLock::displaced_header_offset_in_bytes() == 0, ++ "lock_reg must point to the displaced header"); ++ // add debug info for NullPointerException only if one is possible ++ int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); ++ if (op->info() != NULL) { ++ add_debug_info_for_null_check(null_check_offset, op->info()); ++ } ++ // done ++ } else if (op->code() == lir_unlock) { ++ assert(BasicLock::displaced_header_offset_in_bytes() == 0, ++ "lock_reg must point to the displaced header"); ++ __ unlock_object(hdr, obj, lock, *op->stub()->entry()); ++ } else { ++ Unimplemented(); ++ } ++ __ bind(*op->stub()->continuation()); ++} ++ ++void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { ++ ciMethod* method = op->profiled_method(); ++ ciMethod* callee = op->profiled_callee(); ++ int bci = op->profiled_bci(); ++ ++ // Update counter for all call types ++ ciMethodData* md = method->method_data_or_null(); ++ assert(md != NULL, "Sanity"); ++ ciProfileData* data = md->bci_to_data(bci); ++ assert(data != NULL && data->is_CounterData(), "need CounterData for calls"); ++ assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); ++ Register mdo = op->mdo()->as_register(); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); ++ Bytecodes::Code bc = method->java_code_at_bci(bci); ++ const bool callee_is_static = callee->is_loaded() && callee->is_static(); ++ // Perform additional virtual call profiling for invokevirtual and ++ // invokeinterface bytecodes ++ if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) && ++ !callee_is_static && // required for optimized MH invokes ++ C1ProfileVirtualCalls) { ++ assert(op->recv()->is_single_cpu(), "recv must be allocated"); ++ Register recv = op->recv()->as_register(); ++ assert_different_registers(mdo, recv); ++ assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); ++ ciKlass* known_klass = op->known_holder(); ++ if (C1OptimizeVirtualCallProfiling && known_klass != NULL) { ++ // We know the type that will be seen at this call site; we can ++ // statically update the MethodData* rather than needing to do ++ // dynamic tests on the receiver type ++ ++ // NOTE: we should probably put a lock around this search to ++ // avoid collisions by concurrent compilations ++ ciVirtualCallData* vc_data = (ciVirtualCallData*) data; ++ uint i; ++ for (i = 0; i < VirtualCallData::row_limit(); i++) { ++ ciKlass* receiver = vc_data->receiver(i); ++ if (known_klass->equals(receiver)) { ++ Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); ++ __ ld_ptr(SCR2, data_addr); ++ __ addi_d(SCR2, SCR2, DataLayout::counter_increment); ++ __ st_ptr(SCR2, data_addr); ++ return; ++ } ++ } ++ ++ // Receiver type not found in profile data; select an empty slot ++ ++ // Note that this is less efficient than it should be because it ++ // always does a write to the receiver part of the ++ // VirtualCallData rather than just the first time ++ for (i = 0; i < VirtualCallData::row_limit(); i++) { ++ ciKlass* receiver = vc_data->receiver(i); ++ if (receiver == NULL) { ++ Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); ++ __ mov_metadata(SCR2, known_klass->constant_encoding()); ++ __ lea(SCR1, recv_addr); ++ __ st_ptr(SCR2, SCR1, 0); ++ Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); ++ __ ld_ptr(SCR2, data_addr); ++ __ addi_d(SCR2, SCR1, DataLayout::counter_increment); ++ __ st_ptr(SCR2, data_addr); ++ return; ++ } ++ } ++ } else { ++ __ load_klass(recv, recv); ++ Label update_done; ++ type_profile_helper(mdo, md, data, recv, &update_done); ++ // Receiver did not match any saved receiver and there is no empty row for it. ++ // Increment total counter to indicate polymorphic case. ++ __ ld_ptr(SCR2, counter_addr); ++ __ addi_d(SCR2, SCR2, DataLayout::counter_increment); ++ __ st_ptr(SCR2, counter_addr); ++ ++ __ bind(update_done); ++ } ++ } else { ++ // Static call ++ __ ld_ptr(SCR2, counter_addr); ++ __ addi_d(SCR2, SCR2, DataLayout::counter_increment); ++ __ st_ptr(SCR2, counter_addr); ++ } ++} ++ ++void LIR_Assembler::emit_delay(LIR_OpDelay*) { ++ Unimplemented(); ++} ++ ++void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) { ++ __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no)); ++} ++ ++void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { ++ assert(op->crc()->is_single_cpu(), "crc must be register"); ++ assert(op->val()->is_single_cpu(), "byte value must be register"); ++ assert(op->result_opr()->is_single_cpu(), "result must be register"); ++ Register crc = op->crc()->as_register(); ++ Register val = op->val()->as_register(); ++ Register res = op->result_opr()->as_register(); ++ ++ assert_different_registers(val, crc, res); ++ __ li(res, StubRoutines::crc_table_addr()); ++ __ nor(crc, crc, R0); // ~crc ++ __ update_byte_crc32(crc, val, res); ++ __ nor(res, crc, R0); // ~crc ++} ++ ++void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { ++ COMMENT("emit_profile_type {"); ++ Register obj = op->obj()->as_register(); ++ Register tmp = op->tmp()->as_pointer_register(); ++ Address mdo_addr = as_Address(op->mdp()->as_address_ptr()); ++ ciKlass* exact_klass = op->exact_klass(); ++ intptr_t current_klass = op->current_klass(); ++ bool not_null = op->not_null(); ++ bool no_conflict = op->no_conflict(); ++ ++ Label update, next, none; ++ ++ bool do_null = !not_null; ++ bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass; ++ bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set; ++ ++ assert(do_null || do_update, "why are we here?"); ++ assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); ++ assert(mdo_addr.base() != SCR1, "wrong register"); ++ ++ __ verify_oop(obj); ++ ++ if (tmp != obj) { ++ __ move(tmp, obj); ++ } ++ if (do_null) { ++ __ bnez(tmp, update); ++ if (!TypeEntries::was_null_seen(current_klass)) { ++ __ ld_ptr(SCR2, mdo_addr); ++ __ ori(SCR2, SCR2, TypeEntries::null_seen); ++ __ st_ptr(SCR2, mdo_addr); ++ } ++ if (do_update) { ++#ifndef ASSERT ++ __ b(next); ++ } ++#else ++ __ b(next); ++ } ++ } else { ++ __ bnez(tmp, update); ++ __ stop("unexpected null obj"); ++#endif ++ } ++ ++ __ bind(update); ++ ++ if (do_update) { ++#ifdef ASSERT ++ if (exact_klass != NULL) { ++ Label ok; ++ __ load_klass(tmp, tmp); ++ __ mov_metadata(SCR1, exact_klass->constant_encoding()); ++ __ XOR(SCR1, tmp, SCR1); ++ __ beqz(SCR1, ok); ++ __ stop("exact klass and actual klass differ"); ++ __ bind(ok); ++ } ++#endif ++ if (!no_conflict) { ++ if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) { ++ if (exact_klass != NULL) { ++ __ mov_metadata(tmp, exact_klass->constant_encoding()); ++ } else { ++ __ load_klass(tmp, tmp); ++ } ++ ++ __ ld_ptr(SCR2, mdo_addr); ++ __ XOR(tmp, tmp, SCR2); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ __ bstrpick_d(SCR1, tmp, 63, 2); ++ // klass seen before, nothing to do. The unknown bit may have been ++ // set already but no need to check. ++ __ beqz(SCR1, next); ++ ++ __ andi(SCR1, tmp, TypeEntries::type_unknown); ++ __ bnez(SCR1, next); // already unknown. Nothing to do anymore. ++ ++ if (TypeEntries::is_type_none(current_klass)) { ++ __ beqz(SCR2, none); ++ __ li(SCR1, (u1)TypeEntries::null_seen); ++ __ beq(SCR2, SCR1, none); ++ // There is a chance that the checks above (re-reading profiling ++ // data from memory) fail if another thread has just set the ++ // profiling to this obj's klass ++ membar_acquire(); ++ __ ld_ptr(SCR2, mdo_addr); ++ __ XOR(tmp, tmp, SCR2); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ __ bstrpick_d(SCR1, tmp, 63, 2); ++ __ beqz(SCR1, next); ++ } ++ } else { ++ assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && ++ ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only"); ++ ++ __ ld_ptr(tmp, mdo_addr); ++ __ andi(SCR2, tmp, TypeEntries::type_unknown); ++ __ bnez(SCR2, next); // already unknown. Nothing to do anymore. ++ } ++ ++ // different than before. Cannot keep accurate profile. ++ __ ld_ptr(SCR2, mdo_addr); ++ __ ori(SCR2, SCR2, TypeEntries::type_unknown); ++ __ st_ptr(SCR2, mdo_addr); ++ ++ if (TypeEntries::is_type_none(current_klass)) { ++ __ b(next); ++ ++ __ bind(none); ++ // first time here. Set profile type. ++ __ st_ptr(tmp, mdo_addr); ++ } ++ } else { ++ // There's a single possible klass at this profile point ++ assert(exact_klass != NULL, "should be"); ++ if (TypeEntries::is_type_none(current_klass)) { ++ __ mov_metadata(tmp, exact_klass->constant_encoding()); ++ __ ld_ptr(SCR2, mdo_addr); ++ __ XOR(tmp, tmp, SCR2); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ __ bstrpick_d(SCR1, tmp, 63, 2); ++ __ beqz(SCR1, next); ++#ifdef ASSERT ++ { ++ Label ok; ++ __ ld_ptr(SCR1, mdo_addr); ++ __ beqz(SCR1, ok); ++ __ li(SCR2, (u1)TypeEntries::null_seen); ++ __ beq(SCR1, SCR2, ok); ++ // may have been set by another thread ++ membar_acquire(); ++ __ mov_metadata(SCR1, exact_klass->constant_encoding()); ++ __ ld_ptr(SCR2, mdo_addr); ++ __ XOR(SCR2, SCR1, SCR2); ++ assert(TypeEntries::type_mask == -2, "must be"); ++ __ bstrpick_d(SCR2, SCR2, 63, 1); ++ __ beqz(SCR2, ok); ++ ++ __ stop("unexpected profiling mismatch"); ++ __ bind(ok); ++ } ++#endif ++ // first time here. Set profile type. ++ __ st_ptr(tmp, mdo_addr); ++ } else { ++ assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && ++ ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent"); ++ ++ __ ld_ptr(tmp, mdo_addr); ++ __ andi(SCR1, tmp, TypeEntries::type_unknown); ++ __ bnez(SCR1, next); // already unknown. Nothing to do anymore. ++ ++ __ ori(tmp, tmp, TypeEntries::type_unknown); ++ __ st_ptr(tmp, mdo_addr); ++ // FIXME: Write barrier needed here? ++ } ++ } ++ ++ __ bind(next); ++ } ++ COMMENT("} emit_profile_type"); ++} ++ ++void LIR_Assembler::align_backward_branch_target() {} ++ ++void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) { ++ if (left->is_single_cpu()) { ++ assert(dest->is_single_cpu(), "expect single result reg"); ++ __ sub_w(dest->as_register(), R0, left->as_register()); ++ } else if (left->is_double_cpu()) { ++ assert(dest->is_double_cpu(), "expect double result reg"); ++ __ sub_d(dest->as_register_lo(), R0, left->as_register_lo()); ++ } else if (left->is_single_fpu()) { ++ assert(dest->is_single_fpu(), "expect single float result reg"); ++ __ fneg_s(dest->as_float_reg(), left->as_float_reg()); ++ } else { ++ assert(left->is_double_fpu(), "expect double float operand reg"); ++ assert(dest->is_double_fpu(), "expect double float result reg"); ++ __ fneg_d(dest->as_double_reg(), left->as_double_reg()); ++ } ++} ++ ++void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest) { ++ __ lea(dest->as_register_lo(), as_Address(addr->as_address_ptr())); ++} ++ ++void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, ++ LIR_Opr tmp, CodeEmitInfo* info) { ++ assert(!tmp->is_valid(), "don't need temporary"); ++ __ call(dest, relocInfo::runtime_call_type); ++ if (info != NULL) { ++ add_call_info_here(info); ++ } ++} ++ ++void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, ++ CodeEmitInfo* info) { ++ if (dest->is_address() || src->is_address()) { ++ move_op(src, dest, type, lir_patch_none, info, ++ /*pop_fpu_stack*/false, /*unaligned*/false, /*wide*/false); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++#ifdef ASSERT ++// emit run-time assertion ++void LIR_Assembler::emit_assert(LIR_OpAssert* op) { ++ assert(op->code() == lir_assert, "must be"); ++ Label ok; ++ ++ if (op->in_opr1()->is_valid()) { ++ assert(op->in_opr2()->is_valid(), "both operands must be valid"); ++ assert(op->in_opr1()->is_cpu_register() || op->in_opr2()->is_cpu_register(), "must be"); ++ Register reg1 = as_reg(op->in_opr1()); ++ Register reg2 = as_reg(op->in_opr2()); ++ switch (op->condition()) { ++ case lir_cond_equal: __ beq(reg1, reg2, ok); break; ++ case lir_cond_notEqual: __ bne(reg1, reg2, ok); break; ++ case lir_cond_less: __ blt(reg1, reg2, ok); break; ++ case lir_cond_lessEqual: __ bge(reg2, reg1, ok); break; ++ case lir_cond_greaterEqual: __ bge(reg1, reg2, ok); break; ++ case lir_cond_greater: __ blt(reg2, reg1, ok); break; ++ case lir_cond_belowEqual: __ bgeu(reg2, reg1, ok); break; ++ case lir_cond_aboveEqual: __ bgeu(reg1, reg2, ok); break; ++ default: ShouldNotReachHere(); ++ } ++ } else { ++ assert(op->in_opr2()->is_illegal(), "both operands must be illegal"); ++ assert(op->condition() == lir_cond_always, "no other conditions allowed"); ++ } ++ if (op->halt()) { ++ const char* str = __ code_string(op->msg()); ++ __ stop(str); ++ } else { ++ breakpoint(); ++ } ++ __ bind(ok); ++} ++#endif ++ ++#ifndef PRODUCT ++#define COMMENT(x) do { __ block_comment(x); } while (0) ++#else ++#define COMMENT(x) ++#endif ++ ++void LIR_Assembler::membar() { ++ COMMENT("membar"); ++ __ membar(Assembler::AnyAny); ++} ++ ++void LIR_Assembler::membar_acquire() { ++ __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad | Assembler::LoadStore)); ++} ++ ++void LIR_Assembler::membar_release() { ++ __ membar(Assembler::Membar_mask_bits(Assembler::LoadStore|Assembler::StoreStore)); ++} ++ ++void LIR_Assembler::membar_loadload() { ++ __ membar(Assembler::LoadLoad); ++} ++ ++void LIR_Assembler::membar_storestore() { ++ __ membar(MacroAssembler::StoreStore); ++} ++ ++void LIR_Assembler::membar_loadstore() { ++ __ membar(MacroAssembler::LoadStore); ++} ++ ++void LIR_Assembler::membar_storeload() { ++ __ membar(MacroAssembler::StoreLoad); ++} ++ ++void LIR_Assembler::get_thread(LIR_Opr result_reg) { ++ __ move(result_reg->as_register(), TREG); ++} ++ ++void LIR_Assembler::peephole(LIR_List *lir) { ++} ++ ++void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, ++ LIR_Opr dest, LIR_Opr tmp_op) { ++ Address addr = as_Address(src->as_address_ptr()); ++ BasicType type = src->type(); ++ Register dst = as_reg(dest); ++ Register tmp = as_reg(tmp_op); ++ bool is_oop = is_reference_type(type); ++ ++ if (Assembler::is_simm(addr.disp(), 12)) { ++ __ addi_d(tmp, addr.base(), addr.disp()); ++ } else { ++ __ li(tmp, addr.disp()); ++ __ add_d(tmp, addr.base(), tmp); ++ } ++ if (addr.index() != noreg) { ++ if (addr.scale() > Address::times_1) ++ __ alsl_d(tmp, addr.index(), tmp, addr.scale() - 1); ++ else ++ __ add_d(tmp, tmp, addr.index()); ++ } ++ ++ switch(type) { ++ case T_INT: ++ break; ++ case T_LONG: ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (UseCompressedOops) { ++ // unsigned int ++ } else { ++ // long ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ if (code == lir_xadd) { ++ Register inc = noreg; ++ if (data->is_constant()) { ++ inc = SCR1; ++ __ li(inc, as_long(data)); ++ } else { ++ inc = as_reg(data); ++ } ++ switch(type) { ++ case T_INT: ++ __ amadd_db_w(dst, inc, tmp); ++ break; ++ case T_LONG: ++ __ amadd_db_d(dst, inc, tmp); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (UseCompressedOops) { ++ __ amadd_db_w(dst, inc, tmp); ++ __ lu32i_d(dst, 0); ++ } else { ++ __ amadd_db_d(dst, inc, tmp); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (code == lir_xchg) { ++ Register obj = as_reg(data); ++ if (is_oop && UseCompressedOops) { ++ __ encode_heap_oop(SCR2, obj); ++ obj = SCR2; ++ } ++ switch(type) { ++ case T_INT: ++ __ amswap_db_w(dst, obj, tmp); ++ break; ++ case T_LONG: ++ __ amswap_db_d(dst, obj, tmp); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (UseCompressedOops) { ++ __ amswap_db_w(dst, obj, tmp); ++ __ lu32i_d(dst, 0); ++ } else { ++ __ amswap_db_d(dst, obj, tmp); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ if (is_oop && UseCompressedOops) { ++ __ decode_heap_oop(dst); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++#undef __ +diff --git a/hotspot/src/cpu/loongarch/vm/c1_LIRGenerator_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_LIRGenerator_loongarch_64.cpp +new file mode 100644 +index 0000000000..7cb15f689f +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/c1_LIRGenerator_loongarch_64.cpp +@@ -0,0 +1,1442 @@ ++/* ++ * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "c1/c1_Compilation.hpp" ++#include "c1/c1_FrameMap.hpp" ++#include "c1/c1_Instruction.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_LIRGenerator.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "c1/c1_ValueStack.hpp" ++#include "ci/ciArray.hpp" ++#include "ci/ciObjArrayKlass.hpp" ++#include "ci/ciTypeArrayKlass.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#ifdef ASSERT ++#define __ gen()->lir(__FILE__, __LINE__)-> ++#else ++#define __ gen()->lir()-> ++#endif ++ ++// Item will be loaded into a byte register; Intel only ++void LIRItem::load_byte_item() { ++ load_item(); ++} ++ ++void LIRItem::load_nonconstant() { ++ LIR_Opr r = value()->operand(); ++ if (r->is_constant()) { ++ _result = r; ++ } else { ++ load_item(); ++ } ++} ++ ++//-------------------------------------------------------------- ++// LIRGenerator ++//-------------------------------------------------------------- ++ ++LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::a0_oop_opr; } ++LIR_Opr LIRGenerator::exceptionPcOpr() { return FrameMap::a1_opr; } ++LIR_Opr LIRGenerator::divInOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::divOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::remOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::shiftCountOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::syncTempOpr() { return FrameMap::a0_opr; } ++LIR_Opr LIRGenerator::getThreadTemp() { return LIR_OprFact::illegalOpr; } ++ ++LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) { ++ LIR_Opr opr; ++ switch (type->tag()) { ++ case intTag: opr = FrameMap::a0_opr; break; ++ case objectTag: opr = FrameMap::a0_oop_opr; break; ++ case longTag: opr = FrameMap::long0_opr; break; ++ case floatTag: opr = FrameMap::fpu0_float_opr; break; ++ case doubleTag: opr = FrameMap::fpu0_double_opr; break; ++ case addressTag: ++ default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr; ++ } ++ ++ assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch"); ++ return opr; ++} ++ ++LIR_Opr LIRGenerator::rlock_byte(BasicType type) { ++ LIR_Opr reg = new_register(T_INT); ++ set_vreg_flag(reg, LIRGenerator::byte_reg); ++ return reg; ++} ++ ++//--------- loading items into registers -------------------------------- ++ ++bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const { ++ if (v->type()->as_IntConstant() != NULL) { ++ return v->type()->as_IntConstant()->value() == 0L; ++ } else if (v->type()->as_LongConstant() != NULL) { ++ return v->type()->as_LongConstant()->value() == 0L; ++ } else if (v->type()->as_ObjectConstant() != NULL) { ++ return v->type()->as_ObjectConstant()->value()->is_null_object(); ++ } else { ++ return false; ++ } ++} ++ ++bool LIRGenerator::can_inline_as_constant(Value v) const { ++ // FIXME: Just a guess ++ if (v->type()->as_IntConstant() != NULL) { ++ return Assembler::is_simm(v->type()->as_IntConstant()->value(), 12); ++ } else if (v->type()->as_LongConstant() != NULL) { ++ return v->type()->as_LongConstant()->value() == 0L; ++ } else if (v->type()->as_ObjectConstant() != NULL) { ++ return v->type()->as_ObjectConstant()->value()->is_null_object(); ++ } else { ++ return false; ++ } ++} ++ ++bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { return false; } ++ ++LIR_Opr LIRGenerator::safepoint_poll_register() { ++ return LIR_OprFact::illegalOpr; ++} ++ ++LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, ++ int shift, int disp, BasicType type) { ++ assert(base->is_register(), "must be"); ++ intx large_disp = disp; ++ ++ // accumulate fixed displacements ++ if (index->is_constant()) { ++ LIR_Const *constant = index->as_constant_ptr(); ++ if (constant->type() == T_INT) { ++ large_disp += index->as_jint() << shift; ++ } else { ++ assert(constant->type() == T_LONG, "should be"); ++ jlong c = index->as_jlong() << shift; ++ if ((jlong)((jint)c) == c) { ++ large_disp += c; ++ index = LIR_OprFact::illegalOpr; ++ } else { ++ LIR_Opr tmp = new_register(T_LONG); ++ __ move(index, tmp); ++ index = tmp; ++ // apply shift and displacement below ++ } ++ } ++ } ++ ++ if (index->is_register()) { ++ // apply the shift and accumulate the displacement ++ if (shift > 0) { ++ LIR_Opr tmp = new_pointer_register(); ++ __ shift_left(index, shift, tmp); ++ index = tmp; ++ } ++ if (large_disp != 0) { ++ LIR_Opr tmp = new_pointer_register(); ++ if (Assembler::is_simm(large_disp, 12)) { ++ __ add(index, LIR_OprFact::intptrConst(large_disp), tmp); ++ index = tmp; ++ } else { ++ __ move(LIR_OprFact::intptrConst(large_disp), tmp); ++ __ add(tmp, index, tmp); ++ index = tmp; ++ } ++ large_disp = 0; ++ } ++ } else if (large_disp != 0 && !Assembler::is_simm(large_disp, 12)) { ++ // index is illegal so replace it with the displacement loaded into a register ++ index = new_pointer_register(); ++ __ move(LIR_OprFact::intptrConst(large_disp), index); ++ large_disp = 0; ++ } ++ ++ // at this point we either have base + index or base + displacement ++ if (large_disp == 0 && index->is_register()) { ++ return new LIR_Address(base, index, type); ++ } else { ++ assert(Assembler::is_simm(large_disp, 12), "must be"); ++ return new LIR_Address(base, large_disp, type); ++ } ++} ++ ++LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, BasicType type, bool needs_card_mark) { ++ int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type); ++ int elem_size = type2aelembytes(type); ++ int shift = exact_log2(elem_size); ++ ++ LIR_Address* addr; ++ if (index_opr->is_constant()) { ++ addr = new LIR_Address(array_opr, offset_in_bytes + (intx)(index_opr->as_jint()) * elem_size, type); ++ } else { ++ if (offset_in_bytes) { ++ LIR_Opr tmp = new_pointer_register(); ++ __ add(array_opr, LIR_OprFact::intConst(offset_in_bytes), tmp); ++ array_opr = tmp; ++ offset_in_bytes = 0; ++ } ++ addr = new LIR_Address(array_opr, index_opr, LIR_Address::scale(type), offset_in_bytes, type); ++ } ++ if (needs_card_mark) { ++ // This store will need a precise card mark, so go ahead and ++ // compute the full adddres instead of computing once for the ++ // store and again for the card mark. ++ LIR_Opr tmp = new_pointer_register(); ++ __ leal(LIR_OprFact::address(addr), tmp); ++ return new LIR_Address(tmp, type); ++ } else { ++ return addr; ++ } ++} ++ ++LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { ++ LIR_Opr r; ++ if (type == T_LONG) { ++ r = LIR_OprFact::longConst(x); ++ if (!Assembler::is_simm(x, 12)) { ++ LIR_Opr tmp = new_register(type); ++ __ move(r, tmp); ++ return tmp; ++ } ++ } else if (type == T_INT) { ++ r = LIR_OprFact::intConst(x); ++ if (!Assembler::is_simm(x, 12)) { ++ // This is all rather nasty. We don't know whether our constant ++ // is required for a logical or an arithmetic operation, wo we ++ // don't know what the range of valid values is!! ++ LIR_Opr tmp = new_register(type); ++ __ move(r, tmp); ++ return tmp; ++ } ++ } else { ++ ShouldNotReachHere(); ++ r = NULL; // unreachable ++ } ++ return r; ++} ++ ++void LIRGenerator::increment_counter(address counter, BasicType type, int step) { ++ LIR_Opr pointer = new_pointer_register(); ++ __ move(LIR_OprFact::intptrConst(counter), pointer); ++ LIR_Address* addr = new LIR_Address(pointer, type); ++ increment_counter(addr, step); ++} ++ ++void LIRGenerator::increment_counter(LIR_Address* addr, int step) { ++ LIR_Opr imm = NULL; ++ switch(addr->type()) { ++ case T_INT: ++ imm = LIR_OprFact::intConst(step); ++ break; ++ case T_LONG: ++ imm = LIR_OprFact::longConst(step); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ LIR_Opr reg = new_register(addr->type()); ++ __ load(addr, reg); ++ __ add(reg, imm, reg); ++ __ store(reg, addr); ++} ++ ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, ++ int disp, int c, T tgt, CodeEmitInfo* info) { ++ LIR_Opr reg = new_register(T_INT); ++ __ load(generate_address(base, disp, T_INT), reg, info); ++ __ cmp_branch(condition, reg, LIR_OprFact::intConst(c), T_INT, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); ++ ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, ++ int disp, BasicType type, T tgt, CodeEmitInfo* info) { ++ LIR_Opr reg1 = new_register(T_INT); ++ __ load(generate_address(base, disp, type), reg1, info); ++ __ cmp_branch(condition, reg, reg1, type, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); ++ ++bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { ++ if (is_power_of_2(c - 1)) { ++ __ shift_left(left, exact_log2(c - 1), tmp); ++ __ add(tmp, left, result); ++ return true; ++ } else if (is_power_of_2(c + 1)) { ++ __ shift_left(left, exact_log2(c + 1), tmp); ++ __ sub(tmp, left, result); ++ return true; ++ } else { ++ return false; ++ } ++} ++ ++void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) { ++ BasicType type = item->type(); ++ __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type)); ++} ++ ++//---------------------------------------------------------------------- ++// visitor functions ++//---------------------------------------------------------------------- ++ ++void LIRGenerator::do_StoreIndexed(StoreIndexed* x) { ++ assert(x->is_pinned(),""); ++ bool needs_range_check = x->compute_needs_range_check(); ++ bool use_length = x->length() != NULL; ++ bool obj_store = x->elt_type() == T_ARRAY || x->elt_type() == T_OBJECT; ++ bool needs_store_check = obj_store && (x->value()->as_Constant() == NULL || ++ !get_jobject_constant(x->value())->is_null_object() || ++ x->should_profile()); ++ ++ LIRItem array(x->array(), this); ++ LIRItem index(x->index(), this); ++ LIRItem value(x->value(), this); ++ LIRItem length(this); ++ ++ array.load_item(); ++ index.load_nonconstant(); ++ ++ if (use_length && needs_range_check) { ++ length.set_instruction(x->length()); ++ length.load_item(); ++ ++ } ++ if (needs_store_check || x->check_boolean()) { ++ value.load_item(); ++ } else { ++ value.load_for_store(x->elt_type()); ++ } ++ ++ set_no_result(x); ++ ++ // the CodeEmitInfo must be duplicated for each different ++ // LIR-instruction because spilling can occur anywhere between two ++ // instructions and so the debug information must be different ++ CodeEmitInfo* range_check_info = state_for(x); ++ CodeEmitInfo* null_check_info = NULL; ++ if (x->needs_null_check()) { ++ null_check_info = new CodeEmitInfo(range_check_info); ++ } ++ ++ // emit array address setup early so it schedules better ++ // FIXME? No harm in this on aarch64, and it might help ++ LIR_Address* array_addr = emit_array_address(array.result(), index.result(), x->elt_type(), obj_store); ++ ++ if (GenerateRangeChecks && needs_range_check) { ++ if (use_length) { ++ __ cmp_branch(lir_cond_belowEqual, length.result(), index.result(), x->elt_type(), new RangeCheckStub(range_check_info, index.result())); ++ } else { ++ array_range_check(array.result(), index.result(), null_check_info, range_check_info); ++ // range_check also does the null check ++ null_check_info = NULL; ++ } ++ } ++ ++ if (GenerateArrayStoreCheck && needs_store_check) { ++ LIR_Opr tmp1 = new_register(objectType); ++ LIR_Opr tmp2 = new_register(objectType); ++ LIR_Opr tmp3 = new_register(objectType); ++ ++ CodeEmitInfo* store_check_info = new CodeEmitInfo(range_check_info); ++ __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info, x->profiled_method(), x->profiled_bci()); ++ } ++ ++ if (obj_store) { ++ // Needs GC write barriers. ++ pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */, ++ true /* do_load */, false /* patch */, NULL); ++ __ move(value.result(), array_addr, null_check_info); ++ // Seems to be a precise ++ post_barrier(LIR_OprFact::address(array_addr), value.result()); ++ } else { ++ LIR_Opr result = maybe_mask_boolean(x, array.result(), value.result(), null_check_info); ++ __ move(result, array_addr, null_check_info); ++ } ++} ++ ++void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { ++ assert(x->is_pinned(),""); ++ LIRItem obj(x->obj(), this); ++ obj.load_item(); ++ ++ set_no_result(x); ++ ++ // "lock" stores the address of the monitor stack slot, so this is not an oop ++ LIR_Opr lock = new_register(T_INT); ++ // Need a scratch register for biased locking ++ LIR_Opr scratch = LIR_OprFact::illegalOpr; ++ if (UseBiasedLocking) { ++ scratch = new_register(T_INT); ++ } ++ ++ CodeEmitInfo* info_for_exception = NULL; ++ if (x->needs_null_check()) { ++ info_for_exception = state_for(x); ++ } ++ // this CodeEmitInfo must not have the xhandlers because here the ++ // object is already locked (xhandlers expect object to be unlocked) ++ CodeEmitInfo* info = state_for(x, x->state(), true); ++ monitor_enter(obj.result(), lock, syncTempOpr(), scratch, ++ x->monitor_no(), info_for_exception, info); ++} ++ ++void LIRGenerator::do_MonitorExit(MonitorExit* x) { ++ assert(x->is_pinned(),""); ++ ++ LIRItem obj(x->obj(), this); ++ obj.dont_load_item(); ++ ++ LIR_Opr lock = new_register(T_INT); ++ LIR_Opr obj_temp = new_register(T_INT); ++ set_no_result(x); ++ monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no()); ++} ++ ++void LIRGenerator::do_NegateOp(NegateOp* x) { ++ LIRItem from(x->x(), this); ++ from.load_item(); ++ LIR_Opr result = rlock_result(x); ++ __ negate (from.result(), result); ++} ++ ++// for _fadd, _fmul, _fsub, _fdiv, _frem ++// _dadd, _dmul, _dsub, _ddiv, _drem ++void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { ++ if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) { ++ // float remainder is implemented as a direct call into the runtime ++ LIRItem right(x->x(), this); ++ LIRItem left(x->y(), this); ++ ++ BasicTypeList signature(2); ++ if (x->op() == Bytecodes::_frem) { ++ signature.append(T_FLOAT); ++ signature.append(T_FLOAT); ++ } else { ++ signature.append(T_DOUBLE); ++ signature.append(T_DOUBLE); ++ } ++ CallingConvention* cc = frame_map()->c_calling_convention(&signature); ++ ++ const LIR_Opr result_reg = result_register_for(x->type()); ++ left.load_item_force(cc->at(1)); ++ right.load_item(); ++ ++ __ move(right.result(), cc->at(0)); ++ ++ address entry; ++ if (x->op() == Bytecodes::_frem) { ++ entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem); ++ } else { ++ entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem); ++ } ++ ++ LIR_Opr result = rlock_result(x); ++ __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args()); ++ __ move(result_reg, result); ++ return; ++ } ++ ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ LIRItem* left_arg = &left; ++ LIRItem* right_arg = &right; ++ ++ // Always load right hand side. ++ right.load_item(); ++ ++ if (!left.is_register()) ++ left.load_item(); ++ ++ LIR_Opr reg = rlock(x); ++ ++ arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp()); ++ ++ set_result(x, round_item(reg)); ++} ++ ++// for _ladd, _lmul, _lsub, _ldiv, _lrem ++void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { ++ // missing test if instr is commutative and if we should swap ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ++ if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) { ++ left.load_item(); ++ bool need_zero_check = true; ++ if (right.is_constant()) { ++ jlong c = right.get_jlong_constant(); ++ // no need to do div-by-zero check if the divisor is a non-zero constant ++ if (c != 0) need_zero_check = false; ++ // do not load right if the divisor is a power-of-2 constant ++ if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) { ++ right.dont_load_item(); ++ } else { ++ right.load_item(); ++ } ++ } else { ++ right.load_item(); ++ } ++ if (need_zero_check) { ++ CodeEmitInfo* info = state_for(x); ++ CodeStub* stub = new DivByZeroStub(info); ++ __ cmp_branch(lir_cond_equal, right.result(), LIR_OprFact::longConst(0), T_LONG, stub); ++ } ++ ++ rlock_result(x); ++ switch (x->op()) { ++ case Bytecodes::_lrem: ++ __ rem (left.result(), right.result(), x->operand()); ++ break; ++ case Bytecodes::_ldiv: ++ __ div (left.result(), right.result(), x->operand()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } else { ++ assert(x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, ++ "expect lmul, ladd or lsub"); ++ // add, sub, mul ++ left.load_item(); ++ if (!right.is_register()) { ++ if (x->op() == Bytecodes::_lmul || !right.is_constant() || ++ (x->op() == Bytecodes::_ladd && !Assembler::is_simm(right.get_jlong_constant(), 12)) || ++ (x->op() == Bytecodes::_lsub && !Assembler::is_simm(-right.get_jlong_constant(), 12))) { ++ right.load_item(); ++ } else { // add, sub ++ assert(x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expect ladd or lsub"); ++ // don't load constants to save register ++ right.load_nonconstant(); ++ } ++ } ++ rlock_result(x); ++ arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL); ++ } ++} ++ ++// for: _iadd, _imul, _isub, _idiv, _irem ++void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) { ++ // Test if instr is commutative and if we should swap ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ LIRItem* left_arg = &left; ++ LIRItem* right_arg = &right; ++ if (x->is_commutative() && left.is_stack() && right.is_register()) { ++ // swap them if left is real stack (or cached) and right is real register(not cached) ++ left_arg = &right; ++ right_arg = &left; ++ } ++ ++ left_arg->load_item(); ++ ++ // do not need to load right, as we can handle stack and constants ++ if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) { ++ rlock_result(x); ++ bool need_zero_check = true; ++ if (right.is_constant()) { ++ jint c = right.get_jint_constant(); ++ // no need to do div-by-zero check if the divisor is a non-zero constant ++ if (c != 0) need_zero_check = false; ++ // do not load right if the divisor is a power-of-2 constant ++ if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) { ++ right_arg->dont_load_item(); ++ } else { ++ right_arg->load_item(); ++ } ++ } else { ++ right_arg->load_item(); ++ } ++ if (need_zero_check) { ++ CodeEmitInfo* info = state_for(x); ++ CodeStub* stub = new DivByZeroStub(info); ++ __ cmp_branch(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0), T_INT, stub); ++ } ++ ++ LIR_Opr ill = LIR_OprFact::illegalOpr; ++ if (x->op() == Bytecodes::_irem) { ++ __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); ++ } else if (x->op() == Bytecodes::_idiv) { ++ __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); ++ } ++ } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) { ++ if (right.is_constant() && ++ ((x->op() == Bytecodes::_iadd && Assembler::is_simm(right.get_jint_constant(), 12)) || ++ (x->op() == Bytecodes::_isub && Assembler::is_simm(-right.get_jint_constant(), 12)))) { ++ right.load_nonconstant(); ++ } else { ++ right.load_item(); ++ } ++ rlock_result(x); ++ arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr); ++ } else { ++ assert (x->op() == Bytecodes::_imul, "expect imul"); ++ if (right.is_constant()) { ++ jint c = right.get_jint_constant(); ++ if (c > 0 && c < max_jint && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) { ++ right_arg->dont_load_item(); ++ } else { ++ // Cannot use constant op. ++ right_arg->load_item(); ++ } ++ } else { ++ right.load_item(); ++ } ++ rlock_result(x); ++ arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT)); ++ } ++} ++ ++void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) { ++ // when an operand with use count 1 is the left operand, then it is ++ // likely that no move for 2-operand-LIR-form is necessary ++ if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) { ++ x->swap_operands(); ++ } ++ ++ ValueTag tag = x->type()->tag(); ++ assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters"); ++ switch (tag) { ++ case floatTag: ++ case doubleTag: do_ArithmeticOp_FPU(x); return; ++ case longTag: do_ArithmeticOp_Long(x); return; ++ case intTag: do_ArithmeticOp_Int(x); return; ++ default: ShouldNotReachHere(); return; ++ } ++} ++ ++// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr ++void LIRGenerator::do_ShiftOp(ShiftOp* x) { ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ++ left.load_item(); ++ ++ rlock_result(x); ++ if (right.is_constant()) { ++ right.dont_load_item(); ++ int c; ++ switch (x->op()) { ++ case Bytecodes::_ishl: ++ c = right.get_jint_constant() & 0x1f; ++ __ shift_left(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_ishr: ++ c = right.get_jint_constant() & 0x1f; ++ __ shift_right(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_iushr: ++ c = right.get_jint_constant() & 0x1f; ++ __ unsigned_shift_right(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_lshl: ++ c = right.get_jint_constant() & 0x3f; ++ __ shift_left(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_lshr: ++ c = right.get_jint_constant() & 0x3f; ++ __ shift_right(left.result(), c, x->operand()); ++ break; ++ case Bytecodes::_lushr: ++ c = right.get_jint_constant() & 0x3f; ++ __ unsigned_shift_right(left.result(), c, x->operand()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ right.load_item(); ++ LIR_Opr tmp = new_register(T_INT); ++ switch (x->op()) { ++ case Bytecodes::_ishl: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); ++ __ shift_left(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_ishr: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); ++ __ shift_right(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_iushr: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); ++ __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_lshl: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); ++ __ shift_left(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_lshr: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); ++ __ shift_right(left.result(), tmp, x->operand(), tmp); ++ break; ++ case Bytecodes::_lushr: ++ __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); ++ __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++} ++ ++// _iand, _land, _ior, _lor, _ixor, _lxor ++void LIRGenerator::do_LogicOp(LogicOp* x) { ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ++ left.load_item(); ++ ++ rlock_result(x); ++ if (right.is_constant() ++ && ((right.type()->tag() == intTag ++ && Assembler::is_uimm(right.get_jint_constant(), 12)) ++ || (right.type()->tag() == longTag ++ && Assembler::is_uimm(right.get_jlong_constant(), 12)))) { ++ right.dont_load_item(); ++ } else { ++ right.load_item(); ++ } ++ switch (x->op()) { ++ case Bytecodes::_iand: ++ case Bytecodes::_land: ++ __ logical_and(left.result(), right.result(), x->operand()); break; ++ case Bytecodes::_ior: ++ case Bytecodes::_lor: ++ __ logical_or (left.result(), right.result(), x->operand()); break; ++ case Bytecodes::_ixor: ++ case Bytecodes::_lxor: ++ __ logical_xor(left.result(), right.result(), x->operand()); break; ++ default: Unimplemented(); ++ } ++} ++ ++// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg ++void LIRGenerator::do_CompareOp(CompareOp* x) { ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ValueTag tag = x->x()->type()->tag(); ++ if (tag == longTag) { ++ left.set_destroys_register(); ++ } ++ left.load_item(); ++ right.load_item(); ++ LIR_Opr reg = rlock_result(x); ++ ++ if (x->x()->type()->is_float_kind()) { ++ Bytecodes::Code code = x->op(); ++ __ fcmp2int(left.result(), right.result(), reg, ++ (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl)); ++ } else if (x->x()->type()->tag() == longTag) { ++ __ lcmp2int(left.result(), right.result(), reg); ++ } else { ++ Unimplemented(); ++ } ++} ++ ++void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { ++ LIRItem value(x->argument_at(0), this); ++ value.set_destroys_register(); ++ ++ LIR_Opr calc_result = rlock_result(x); ++ LIR_Opr result_reg = result_register_for(x->type()); ++ ++ CallingConvention* cc = NULL; ++ ++ if (x->id() == vmIntrinsics::_dpow) { ++ LIRItem value1(x->argument_at(1), this); ++ ++ value1.set_destroys_register(); ++ ++ BasicTypeList signature(2); ++ signature.append(T_DOUBLE); ++ signature.append(T_DOUBLE); ++ cc = frame_map()->c_calling_convention(&signature); ++ value.load_item_force(cc->at(0)); ++ value1.load_item_force(cc->at(1)); ++ } else { ++ BasicTypeList signature(1); ++ signature.append(T_DOUBLE); ++ cc = frame_map()->c_calling_convention(&signature); ++ value.load_item_force(cc->at(0)); ++ } ++ ++ switch (x->id()) { ++ case vmIntrinsics::_dexp: ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args()); ++ break; ++ case vmIntrinsics::_dlog: ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args()); ++ break; ++ case vmIntrinsics::_dlog10: ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args()); ++ break; ++ case vmIntrinsics::_dpow: ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args()); ++ break; ++ case vmIntrinsics::_dsin: ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args()); ++ break; ++ case vmIntrinsics::_dcos: ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args()); ++ break; ++ case vmIntrinsics::_dtan: ++ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args()); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ __ move(result_reg, calc_result); ++} ++ ++void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) { ++ assert(x->number_of_arguments() == 4, "wrong type"); ++ LIRItem obj (x->argument_at(0), this); // object ++ LIRItem offset(x->argument_at(1), this); // offset of field ++ LIRItem cmp (x->argument_at(2), this); // value to compare with field ++ LIRItem val (x->argument_at(3), this); // replace field with val if matches cmp ++ ++ assert(obj.type()->tag() == objectTag, "invalid type"); ++ ++ // In 64bit the type can be long, sparc doesn't have this assert ++ // assert(offset.type()->tag() == intTag, "invalid type"); ++ ++ assert(cmp.type()->tag() == type->tag(), "invalid type"); ++ assert(val.type()->tag() == type->tag(), "invalid type"); ++ ++ // get address of field ++ obj.load_item(); ++ offset.load_nonconstant(); ++ val.load_item(); ++ cmp.load_item(); ++ ++ LIR_Address* a; ++ if(offset.result()->is_constant()) { ++ jlong c = offset.result()->as_jlong(); ++ if ((jlong)((jint)c) == c) { ++ a = new LIR_Address(obj.result(), ++ (jint)c, ++ as_BasicType(type)); ++ } else { ++ LIR_Opr tmp = new_register(T_LONG); ++ __ move(offset.result(), tmp); ++ a = new LIR_Address(obj.result(), ++ tmp, ++ as_BasicType(type)); ++ } ++ } else { ++ a = new LIR_Address(obj.result(), ++ offset.result(), ++ LIR_Address::times_1, ++ 0, ++ as_BasicType(type)); ++ } ++ LIR_Opr addr = new_pointer_register(); ++ __ leal(LIR_OprFact::address(a), addr); ++ ++ if (type == objectType) { // Write-barrier needed for Object fields. ++ // Do the pre-write barrier, if any. ++ pre_barrier(addr, LIR_OprFact::illegalOpr /* pre_val */, ++ true /* do_load */, false /* patch */, NULL); ++ } ++ ++ LIR_Opr result = rlock_result(x); ++ ++ LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience ++ if (type == objectType) ++ __ cas_obj(addr, cmp.result(), val.result(), new_register(T_INT), new_register(T_INT), ++ result); ++ else if (type == intType) ++ __ cas_int(addr, cmp.result(), val.result(), ill, ill); ++ else if (type == longType) ++ __ cas_long(addr, cmp.result(), val.result(), ill, ill); ++ else { ++ ShouldNotReachHere(); ++ } ++ ++ __ move(FrameMap::scr1_opr, result); ++ ++ if (type == objectType) { // Write-barrier needed for Object fields. ++ // Seems to be precise ++ post_barrier(addr, val.result()); ++ } ++} ++ ++void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { ++ assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), ++ "wrong type"); ++ if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog || ++ x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos || ++ x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan || ++ x->id() == vmIntrinsics::_dlog10) { ++ do_LibmIntrinsic(x); ++ return; ++ } ++ switch (x->id()) { ++ case vmIntrinsics::_dabs: ++ case vmIntrinsics::_dsqrt: { ++ assert(x->number_of_arguments() == 1, "wrong type"); ++ LIRItem value(x->argument_at(0), this); ++ value.load_item(); ++ LIR_Opr dst = rlock_result(x); ++ ++ switch (x->id()) { ++ case vmIntrinsics::_dsqrt: ++ __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); ++ break; ++ case vmIntrinsics::_dabs: ++ __ abs(value.result(), dst, LIR_OprFact::illegalOpr); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ } ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIRGenerator::do_ArrayCopy(Intrinsic* x) { ++ Register j_rarg0 = RT0; ++ Register j_rarg1 = RA0; ++ Register j_rarg2 = RA1; ++ Register j_rarg3 = RA2; ++ Register j_rarg4 = RA3; ++ Register j_rarg5 = RA4; ++ ++ assert(x->number_of_arguments() == 5, "wrong type"); ++ ++ // Make all state_for calls early since they can emit code ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ LIRItem src(x->argument_at(0), this); ++ LIRItem src_pos(x->argument_at(1), this); ++ LIRItem dst(x->argument_at(2), this); ++ LIRItem dst_pos(x->argument_at(3), this); ++ LIRItem length(x->argument_at(4), this); ++ ++ // operands for arraycopy must use fixed registers, otherwise ++ // LinearScan will fail allocation (because arraycopy always needs a ++ // call) ++ ++ // The java calling convention will give us enough registers ++ // so that on the stub side the args will be perfect already. ++ // On the other slow/special case side we call C and the arg ++ // positions are not similar enough to pick one as the best. ++ // Also because the java calling convention is a "shifted" version ++ // of the C convention we can process the java args trivially into C ++ // args without worry of overwriting during the xfer ++ ++ src.load_item_force (FrameMap::as_oop_opr(j_rarg0)); ++ src_pos.load_item_force (FrameMap::as_opr(j_rarg1)); ++ dst.load_item_force (FrameMap::as_oop_opr(j_rarg2)); ++ dst_pos.load_item_force (FrameMap::as_opr(j_rarg3)); ++ length.load_item_force (FrameMap::as_opr(j_rarg4)); ++ ++ LIR_Opr tmp = FrameMap::as_opr(j_rarg5); ++ ++ set_no_result(x); ++ ++ int flags; ++ ciArrayKlass* expected_type; ++ arraycopy_helper(x, &flags, &expected_type); ++ ++ __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), ++ length.result(), tmp, expected_type, flags, info); // does add_safepoint ++} ++ ++void LIRGenerator::do_update_CRC32(Intrinsic* x) { ++ assert(UseCRC32Intrinsics, "why are we here?"); ++ // Make all state_for calls early since they can emit code ++ LIR_Opr result = rlock_result(x); ++ int flags = 0; ++ switch (x->id()) { ++ case vmIntrinsics::_updateCRC32: { ++ LIRItem crc(x->argument_at(0), this); ++ LIRItem val(x->argument_at(1), this); ++ // val is destroyed by update_crc32 ++ val.set_destroys_register(); ++ crc.load_item(); ++ val.load_item(); ++ __ update_crc32(crc.result(), val.result(), result); ++ break; ++ } ++ case vmIntrinsics::_updateBytesCRC32: ++ case vmIntrinsics::_updateByteBufferCRC32: { ++ bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32); ++ ++ LIRItem crc(x->argument_at(0), this); ++ LIRItem buf(x->argument_at(1), this); ++ LIRItem off(x->argument_at(2), this); ++ LIRItem len(x->argument_at(3), this); ++ buf.load_item(); ++ off.load_nonconstant(); ++ ++ LIR_Opr index = off.result(); ++ int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0; ++ if(off.result()->is_constant()) { ++ index = LIR_OprFact::illegalOpr; ++ offset += off.result()->as_jint(); ++ } ++ LIR_Opr base_op = buf.result(); ++ ++ if (index->is_valid()) { ++ LIR_Opr tmp = new_register(T_LONG); ++ __ convert(Bytecodes::_i2l, index, tmp); ++ index = tmp; ++ } ++ ++ if (offset) { ++ LIR_Opr tmp = new_pointer_register(); ++ __ add(base_op, LIR_OprFact::intConst(offset), tmp); ++ base_op = tmp; ++ offset = 0; ++ } ++ ++ LIR_Address* a = new LIR_Address(base_op, index, LIR_Address::times_1, offset, T_BYTE); ++ BasicTypeList signature(3); ++ signature.append(T_INT); ++ signature.append(T_ADDRESS); ++ signature.append(T_INT); ++ CallingConvention* cc = frame_map()->c_calling_convention(&signature); ++ const LIR_Opr result_reg = result_register_for(x->type()); ++ ++ LIR_Opr addr = new_pointer_register(); ++ __ leal(LIR_OprFact::address(a), addr); ++ ++ crc.load_item_force(cc->at(0)); ++ __ move(addr, cc->at(1)); ++ len.load_item_force(cc->at(2)); ++ ++ __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args()); ++ __ move(result_reg, result); ++ ++ break; ++ } ++ default: { ++ ShouldNotReachHere(); ++ } ++ } ++} ++ ++// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f ++// _i2b, _i2c, _i2s ++void LIRGenerator::do_Convert(Convert* x) { ++ LIRItem value(x->value(), this); ++ value.load_item(); ++ LIR_Opr input = value.result(); ++ LIR_Opr result = rlock(x); ++ ++ // arguments of lir_convert ++ LIR_Opr conv_input = input; ++ LIR_Opr conv_result = result; ++ ++ switch (x->op()) { ++ case Bytecodes::_f2i: ++ case Bytecodes::_f2l: ++ __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_FLOAT)); ++ break; ++ case Bytecodes::_d2i: ++ case Bytecodes::_d2l: ++ __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_DOUBLE)); ++ break; ++ default: ++ __ convert(x->op(), conv_input, conv_result); ++ break; ++ } ++ ++ assert(result->is_virtual(), "result must be virtual register"); ++ set_result(x, result); ++} ++ ++void LIRGenerator::do_NewInstance(NewInstance* x) { ++#ifndef PRODUCT ++ if (PrintNotLoaded && !x->klass()->is_loaded()) { ++ tty->print_cr(" ###class not loaded at new bci %d", x->printable_bci()); ++ } ++#endif ++ CodeEmitInfo* info = state_for(x, x->state()); ++ LIR_Opr reg = result_register_for(x->type()); ++ new_instance(reg, x->klass(), x->is_unresolved(), ++ FrameMap::t0_oop_opr, ++ FrameMap::t1_oop_opr, ++ FrameMap::a4_oop_opr, ++ LIR_OprFact::illegalOpr, ++ FrameMap::a3_metadata_opr, info); ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++void LIRGenerator::do_NewTypeArray(NewTypeArray* x) { ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ LIRItem length(x->length(), this); ++ length.load_item_force(FrameMap::s0_opr); ++ ++ LIR_Opr reg = result_register_for(x->type()); ++ LIR_Opr tmp1 = FrameMap::t0_oop_opr; ++ LIR_Opr tmp2 = FrameMap::t1_oop_opr; ++ LIR_Opr tmp3 = FrameMap::a5_oop_opr; ++ LIR_Opr tmp4 = reg; ++ LIR_Opr klass_reg = FrameMap::a3_metadata_opr; ++ LIR_Opr len = length.result(); ++ BasicType elem_type = x->elt_type(); ++ ++ __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg); ++ ++ CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info); ++ __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path); ++ ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { ++ LIRItem length(x->length(), this); ++ // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction ++ // and therefore provide the state before the parameters have been consumed ++ CodeEmitInfo* patching_info = NULL; ++ if (!x->klass()->is_loaded() || PatchALot) { ++ patching_info = state_for(x, x->state_before()); ++ } ++ ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ LIR_Opr reg = result_register_for(x->type()); ++ LIR_Opr tmp1 = FrameMap::t0_oop_opr; ++ LIR_Opr tmp2 = FrameMap::t1_oop_opr; ++ LIR_Opr tmp3 = FrameMap::a5_oop_opr; ++ LIR_Opr tmp4 = reg; ++ LIR_Opr klass_reg = FrameMap::a3_metadata_opr; ++ ++ length.load_item_force(FrameMap::s0_opr); ++ LIR_Opr len = length.result(); ++ ++ CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); ++ ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass()); ++ if (obj == ciEnv::unloaded_ciobjarrayklass()) { ++ BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); ++ } ++ klass2reg_with_patching(klass_reg, obj, patching_info); ++ __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); ++ ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++void LIRGenerator::do_NewMultiArray(NewMultiArray* x) { ++ Values* dims = x->dims(); ++ int i = dims->length(); ++ LIRItemList* items = new LIRItemList(i, NULL); ++ while (i-- > 0) { ++ LIRItem* size = new LIRItem(dims->at(i), this); ++ items->at_put(i, size); ++ } ++ ++ // Evaluate state_for early since it may emit code. ++ CodeEmitInfo* patching_info = NULL; ++ if (!x->klass()->is_loaded() || PatchALot) { ++ patching_info = state_for(x, x->state_before()); ++ ++ // Cannot re-use same xhandlers for multiple CodeEmitInfos, so ++ // clone all handlers (NOTE: Usually this is handled transparently ++ // by the CodeEmitInfo cloning logic in CodeStub constructors but ++ // is done explicitly here because a stub isn't being used). ++ x->set_exception_handlers(new XHandlers(x->exception_handlers())); ++ } ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ i = dims->length(); ++ while (i-- > 0) { ++ LIRItem* size = items->at(i); ++ size->load_item(); ++ ++ store_stack_parameter(size->result(), in_ByteSize(i*4)); ++ } ++ ++ LIR_Opr klass_reg = FrameMap::a0_metadata_opr; ++ klass2reg_with_patching(klass_reg, x->klass(), patching_info); ++ ++ LIR_Opr rank = FrameMap::s0_opr; ++ __ move(LIR_OprFact::intConst(x->rank()), rank); ++ LIR_Opr varargs = FrameMap::a2_opr; ++ __ move(FrameMap::sp_opr, varargs); ++ LIR_OprList* args = new LIR_OprList(3); ++ args->append(klass_reg); ++ args->append(rank); ++ args->append(varargs); ++ LIR_Opr reg = result_register_for(x->type()); ++ __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id), ++ LIR_OprFact::illegalOpr, ++ reg, args, info); ++ ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++void LIRGenerator::do_BlockBegin(BlockBegin* x) { ++ // nothing to do for now ++} ++ ++void LIRGenerator::do_CheckCast(CheckCast* x) { ++ LIRItem obj(x->obj(), this); ++ ++ CodeEmitInfo* patching_info = NULL; ++ if (!x->klass()->is_loaded() || ++ (PatchALot && !x->is_incompatible_class_change_check() && ++ !x->is_invokespecial_receiver_check())) { ++ // must do this before locking the destination register as an oop register, ++ // and before the obj is loaded (the latter is for deoptimization) ++ patching_info = state_for(x, x->state_before()); ++ } ++ obj.load_item(); ++ ++ // info for exceptions ++ CodeEmitInfo* info_for_exception = ++ (x->needs_exception_state() ? state_for(x) : ++ state_for(x, x->state_before(), true /*ignore_xhandler*/)); ++ ++ CodeStub* stub; ++ if (x->is_incompatible_class_change_check()) { ++ assert(patching_info == NULL, "can't patch this"); ++ stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, ++ LIR_OprFact::illegalOpr, info_for_exception); ++ } else if (x->is_invokespecial_receiver_check()) { ++ assert(patching_info == NULL, "can't patch this"); ++ stub = new DeoptimizeStub(info_for_exception); ++ } else { ++ stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, ++ obj.result(), info_for_exception); ++ } ++ LIR_Opr reg = rlock_result(x); ++ LIR_Opr tmp3 = LIR_OprFact::illegalOpr; ++ if (!x->klass()->is_loaded() || UseCompressedClassPointers) { ++ tmp3 = new_register(objectType); ++ } ++ __ checkcast(reg, obj.result(), x->klass(), ++ new_register(objectType), new_register(objectType), tmp3, ++ x->direct_compare(), info_for_exception, patching_info, stub, ++ x->profiled_method(), x->profiled_bci()); ++} ++ ++void LIRGenerator::do_InstanceOf(InstanceOf* x) { ++ LIRItem obj(x->obj(), this); ++ ++ // result and test object may not be in same register ++ LIR_Opr reg = rlock_result(x); ++ CodeEmitInfo* patching_info = NULL; ++ if ((!x->klass()->is_loaded() || PatchALot)) { ++ // must do this before locking the destination register as an oop register ++ patching_info = state_for(x, x->state_before()); ++ } ++ obj.load_item(); ++ LIR_Opr tmp3 = LIR_OprFact::illegalOpr; ++ if (!x->klass()->is_loaded() || UseCompressedClassPointers) { ++ tmp3 = new_register(objectType); ++ } ++ __ instanceof(reg, obj.result(), x->klass(), ++ new_register(objectType), new_register(objectType), tmp3, ++ x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci()); ++} ++ ++void LIRGenerator::do_If(If* x) { ++ assert(x->number_of_sux() == 2, "inconsistency"); ++ ValueTag tag = x->x()->type()->tag(); ++ bool is_safepoint = x->is_safepoint(); ++ ++ If::Condition cond = x->cond(); ++ ++ LIRItem xitem(x->x(), this); ++ LIRItem yitem(x->y(), this); ++ LIRItem* xin = &xitem; ++ LIRItem* yin = &yitem; ++ ++ if (tag == longTag) { ++ // for longs, only conditions "eql", "neq", "lss", "geq" are valid; ++ // mirror for other conditions ++ if (cond == If::gtr || cond == If::leq) { ++ cond = Instruction::mirror(cond); ++ xin = &yitem; ++ yin = &xitem; ++ } ++ xin->set_destroys_register(); ++ } ++ xin->load_item(); ++ ++ if (tag == longTag) { ++ if (yin->is_constant() && yin->get_jlong_constant() == 0) { ++ yin->dont_load_item(); ++ } else { ++ yin->load_item(); ++ } ++ } else if (tag == intTag) { ++ if (yin->is_constant() && yin->get_jint_constant() == 0) { ++ yin->dont_load_item(); ++ } else { ++ yin->load_item(); ++ } ++ } else { ++ yin->load_item(); ++ } ++ ++ set_no_result(x); ++ ++ LIR_Opr left = xin->result(); ++ LIR_Opr right = yin->result(); ++ ++ // add safepoint before generating condition code so it can be recomputed ++ if (x->is_safepoint()) { ++ // increment backedge counter if needed ++ increment_backedge_counter(state_for(x, x->state_before()), x->profiled_bci()); ++ __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before())); ++ } ++ ++ // Generate branch profiling. Profiling code doesn't kill flags. ++ profile_branch(x, cond, left, right); ++ move_to_phi(x->state()); ++ if (x->x()->type()->is_float_kind()) { ++ __ cmp_branch(lir_cond(cond), left, right, right->type(), x->tsux(), x->usux()); ++ } else { ++ __ cmp_branch(lir_cond(cond), left, right, right->type(), x->tsux()); ++ } ++ assert(x->default_sux() == x->fsux(), "wrong destination above"); ++ __ jump(x->default_sux()); ++} ++ ++LIR_Opr LIRGenerator::getThreadPointer() { ++ return FrameMap::as_pointer_opr(TREG); ++} ++ ++void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); } ++ ++void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address, ++ CodeEmitInfo* info) { ++ __ volatile_store_mem_reg(value, address, info); ++} ++ ++void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, ++ CodeEmitInfo* info) { ++ // 8179954: We need to make sure that the code generated for ++ // volatile accesses forms a sequentially-consistent set of ++ // operations when combined with STLR and LDAR. Without a leading ++ // membar it's possible for a simple Dekker test to fail if loads ++ // use LD;DMB but stores use STLR. This can happen if C2 compiles ++ // the stores in one method and C1 compiles the loads in another. ++ __ membar(); ++ __ volatile_load_mem_reg(address, result, info); ++} ++ ++void LIRGenerator::get_Object_unsafe(LIR_Opr dst, LIR_Opr src, LIR_Opr offset, ++ BasicType type, bool is_volatile) { ++ LIR_Address* addr = new LIR_Address(src, offset, type); ++ __ load(addr, dst); ++} ++ ++void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data, ++ BasicType type, bool is_volatile) { ++ LIR_Address* addr = new LIR_Address(src, offset, type); ++ bool is_obj = (type == T_ARRAY || type == T_OBJECT); ++ if (is_obj) { ++ // Do the pre-write barrier, if any. ++ pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */, ++ true /* do_load */, false /* patch */, NULL); ++ __ move(data, addr); ++ assert(src->is_register(), "must be register"); ++ // Seems to be a precise address ++ post_barrier(LIR_OprFact::address(addr), data); ++ } else { ++ __ move(data, addr); ++ } ++} ++ ++void LIRGenerator::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) { ++ BasicType type = x->basic_type(); ++ LIRItem src(x->object(), this); ++ LIRItem off(x->offset(), this); ++ LIRItem value(x->value(), this); ++ ++ src.load_item(); ++ off.load_nonconstant(); ++ ++ // We can cope with a constant increment in an xadd ++ if (! (x->is_add() ++ && value.is_constant() ++ && can_inline_as_constant(x->value()))) { ++ value.load_item(); ++ } ++ ++ LIR_Opr dst = rlock_result(x, type); ++ LIR_Opr data = value.result(); ++ bool is_obj = (type == T_ARRAY || type == T_OBJECT); ++ LIR_Opr offset = off.result(); ++ ++ if (data == dst) { ++ LIR_Opr tmp = new_register(data->type()); ++ __ move(data, tmp); ++ data = tmp; ++ } ++ ++ LIR_Address* addr; ++ if (offset->is_constant()) { ++ jlong l = offset->as_jlong(); ++ assert((jlong)((jint)l) == l, "offset too large for constant"); ++ jint c = (jint)l; ++ addr = new LIR_Address(src.result(), c, type); ++ } else { ++ addr = new LIR_Address(src.result(), offset, type); ++ } ++ ++ LIR_Opr tmp = new_register(T_INT); ++ LIR_Opr ptr = LIR_OprFact::illegalOpr; ++ ++ if (x->is_add()) { ++ __ xadd(LIR_OprFact::address(addr), data, dst, tmp); ++ } else { ++ if (is_obj) { ++ // Do the pre-write barrier, if any. ++ ptr = new_pointer_register(); ++ __ add(src.result(), off.result(), ptr); ++ pre_barrier(ptr, LIR_OprFact::illegalOpr /* pre_val */, ++ true /* do_load */, false /* patch */, NULL); ++ } ++ __ xchg(LIR_OprFact::address(addr), data, dst, tmp); ++ if (is_obj) { ++ post_barrier(ptr, data); ++ } ++ } ++} +diff --git a/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch.hpp +new file mode 100644 +index 0000000000..f15dacafeb +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch.hpp +@@ -0,0 +1,70 @@ ++/* ++ * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP ++ ++inline bool LinearScan::is_processed_reg_num(int reg_num) { ++ return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map; ++} ++ ++inline int LinearScan::num_physical_regs(BasicType type) { ++ return 1; ++} ++ ++inline bool LinearScan::requires_adjacent_regs(BasicType type) { ++ return false; ++} ++ ++inline bool LinearScan::is_caller_save(int assigned_reg) { ++ assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers"); ++ if (assigned_reg < pd_first_callee_saved_reg) ++ return true; ++ if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg) ++ return true; ++ if (assigned_reg > pd_last_callee_saved_fpu_reg && assigned_reg < pd_last_fpu_reg) ++ return true; ++ return false; ++} ++ ++inline void LinearScan::pd_add_temps(LIR_Op* op) {} ++ ++// Implementation of LinearScanWalker ++inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) { ++ if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) { ++ assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only"); ++ _first_reg = pd_first_callee_saved_reg; ++ _last_reg = pd_last_callee_saved_reg; ++ return true; ++ } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT || ++ cur->type() == T_ADDRESS || cur->type() == T_METADATA) { ++ _first_reg = pd_first_cpu_reg; ++ _last_reg = pd_last_allocatable_cpu_reg; ++ return true; ++ } ++ return false; ++} ++ ++#endif // CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch_64.cpp +new file mode 100644 +index 0000000000..219b2e3671 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch_64.cpp +@@ -0,0 +1,33 @@ ++/* ++ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "c1/c1_Instruction.hpp" ++#include "c1/c1_LinearScan.hpp" ++#include "utilities/bitMap.inline.hpp" ++ ++void LinearScan::allocate_fpu_stack() { ++ // No FPU stack on LoongArch64 ++} +diff --git a/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch.hpp +new file mode 100644 +index 0000000000..38ff4c5836 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch.hpp +@@ -0,0 +1,112 @@ ++/* ++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP ++ ++using MacroAssembler::build_frame; ++using MacroAssembler::null_check; ++ ++// C1_MacroAssembler contains high-level macros for C1 ++ ++ private: ++ int _rsp_offset; // track rsp changes ++ // initialization ++ void pd_init() { _rsp_offset = 0; } ++ ++ public: ++ void try_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ ++ void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2); ++ void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1, Register t2); ++ ++ // locking ++ // hdr : must be A0, contents destroyed ++ // obj : must point to the object to lock, contents preserved ++ // disp_hdr: must point to the displaced header location, contents preserved ++ // scratch : scratch register, contents destroyed ++ // returns code offset at which to add null check debug information ++ int lock_object (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case); ++ ++ // unlocking ++ // hdr : contents destroyed ++ // obj : must point to the object to lock, contents preserved ++ // disp_hdr: must be A0 & must point to the displaced header location, contents destroyed ++ void unlock_object(Register swap, Register obj, Register lock, Label& slow_case); ++ ++ void initialize_object( ++ Register obj, // result: pointer to object after successful allocation ++ Register klass, // object klass ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB ++ ); ++ ++ // allocation of fixed-size objects ++ // (can also be used to allocate fixed-size arrays, by setting ++ // hdr_size correctly and storing the array length afterwards) ++ // obj : will contain pointer to allocated object ++ // t1, t2 : scratch registers - contents destroyed ++ // header_size: size of object header in words ++ // object_size: total size of object in words ++ // slow_case : exit to slow case implementation if fast allocation fails ++ void allocate_object(Register obj, Register t1, Register t2, int header_size, ++ int object_size, Register klass, Label& slow_case); ++ ++ enum { ++ max_array_allocation_length = 0x00FFFFFF ++ }; ++ ++ // allocation of arrays ++ // obj : will contain pointer to allocated object ++ // len : array length in number of elements ++ // t : scratch register - contents destroyed ++ // header_size: size of object header in words ++ // f : element scale factor ++ // slow_case : exit to slow case implementation if fast allocation fails ++ void allocate_array(Register obj, Register len, Register t, Register t2, int header_size, ++ int f, Register klass, Label& slow_case); ++ ++ int rsp_offset() const { return _rsp_offset; } ++ void set_rsp_offset(int n) { _rsp_offset = n; } ++ ++ void invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2, bool inv_a3, ++ bool inv_a4, bool inv_a5) PRODUCT_RETURN; ++ ++ // This platform only uses signal-based null checks. The Label is not needed. ++ void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); } ++ ++ void load_parameter(int offset_in_words, Register reg); ++ ++#endif // CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch_64.cpp +new file mode 100644 +index 0000000000..b75126fba4 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch_64.cpp +@@ -0,0 +1,346 @@ ++/* ++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "interpreter/interpreter.hpp" ++#include "oops/arrayOop.hpp" ++#include "runtime/basicLock.hpp" ++#include "runtime/os.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T4 RT4 ++ ++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { ++ const int aligned_mask = BytesPerWord -1; ++ const int hdr_offset = oopDesc::mark_offset_in_bytes(); ++ assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); ++ int null_check_offset = -1; ++ Label done; ++ ++ verify_oop(obj); ++ ++ // save object being locked into the BasicObjectLock ++ st_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ ++ if (UseBiasedLocking) { ++ assert(scratch != noreg, "should have scratch register at this point"); ++ null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case); ++ } else { ++ null_check_offset = offset(); ++ } ++ ++ // Load object header ++ ld_ptr(hdr, Address(obj, hdr_offset)); ++ // and mark it as unlocked ++ ori(hdr, hdr, markOopDesc::unlocked_value); ++ // save unlocked object header into the displaced header location on the stack ++ st_ptr(hdr, Address(disp_hdr, 0)); ++ // test if object header is still the same (i.e. unlocked), and if so, store the ++ // displaced header address in the object header - if it is not the same, get the ++ // object header instead ++ lea(SCR2, Address(obj, hdr_offset)); ++ cmpxchg(Address(SCR2, 0), hdr, disp_hdr, SCR1, true, false, done); ++ // if the object header was the same, we're done ++ // if the object header was not the same, it is now in the hdr register ++ // => test if it is a stack pointer into the same stack (recursive locking), i.e.: ++ // ++ // 1) (hdr & aligned_mask) == 0 ++ // 2) sp <= hdr ++ // 3) hdr <= sp + page_size ++ // ++ // these 3 tests can be done by evaluating the following expression: ++ // ++ // (hdr - sp) & (aligned_mask - page_size) ++ // ++ // assuming both the stack pointer and page_size have their least ++ // significant 2 bits cleared and page_size is a power of 2 ++ sub_d(hdr, hdr, SP); ++ li(SCR1, aligned_mask - os::vm_page_size()); ++ andr(hdr, hdr, SCR1); ++ // for recursive locking, the result is zero => save it in the displaced header ++ // location (NULL in the displaced hdr location indicates recursive locking) ++ st_ptr(hdr, Address(disp_hdr, 0)); ++ // otherwise we don't care about the result and handle locking via runtime call ++ bnez(hdr, slow_case); ++ // done ++ bind(done); ++ return null_check_offset; ++} ++ ++void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { ++ const int aligned_mask = BytesPerWord -1; ++ const int hdr_offset = oopDesc::mark_offset_in_bytes(); ++ assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); ++ Label done; ++ ++ if (UseBiasedLocking) { ++ // load object ++ ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ biased_locking_exit(obj, hdr, done); ++ } ++ ++ // load displaced header ++ ld_ptr(hdr, Address(disp_hdr, 0)); ++ // if the loaded hdr is NULL we had recursive locking ++ // if we had recursive locking, we are done ++ beqz(hdr, done); ++ if (!UseBiasedLocking) { ++ // load object ++ ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ } ++ verify_oop(obj); ++ // test if object header is pointing to the displaced header, and if so, restore ++ // the displaced header in the object - if the object header is not pointing to ++ // the displaced header, get the object header instead ++ // if the object header was not pointing to the displaced header, ++ // we do unlocking via runtime call ++ if (hdr_offset) { ++ lea(SCR1, Address(obj, hdr_offset)); ++ cmpxchg(Address(SCR1, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case); ++ } else { ++ cmpxchg(Address(obj, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case); ++ } ++ // done ++ bind(done); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, ++ int con_size_in_bytes, Register t1, Register t2, ++ Label& slow_case) { ++ if (UseTLAB) { ++ tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); ++ } else { ++ eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); ++ } ++} ++ ++void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, ++ Register t1, Register t2) { ++ assert_different_registers(obj, klass, len); ++ if (UseBiasedLocking && !len->is_valid()) { ++ assert_different_registers(obj, klass, len, t1, t2); ++ ld_ptr(t1, Address(klass, Klass::prototype_header_offset())); ++ } else { ++ // This assumes that all prototype bits fit in an int32_t ++ li(t1, (int32_t)(intptr_t)markOopDesc::prototype()); ++ } ++ st_ptr(t1, Address(obj, oopDesc::mark_offset_in_bytes())); ++ ++ if (UseCompressedClassPointers) { // Take care not to kill klass ++ encode_klass_not_null(t1, klass); ++ st_w(t1, Address(obj, oopDesc::klass_offset_in_bytes())); ++ } else { ++ st_ptr(klass, Address(obj, oopDesc::klass_offset_in_bytes())); ++ } ++ ++ if (len->is_valid()) { ++ st_w(len, Address(obj, arrayOopDesc::length_offset_in_bytes())); ++ } else if (UseCompressedClassPointers) { ++ store_klass_gap(obj, R0); ++ } ++} ++ ++// preserves obj, destroys len_in_bytes ++// ++// Scratch registers: t1 = T0, t2 = T1 ++// ++void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, ++ int hdr_size_in_bytes, Register t1, Register t2) { ++ assert(hdr_size_in_bytes >= 0, "header size must be positive or 0"); ++ assert(t1 == T0 && t2 == T1, "must be"); ++ Label done; ++ ++ // len_in_bytes is positive and ptr sized ++ addi_d(len_in_bytes, len_in_bytes, -hdr_size_in_bytes); ++ beqz(len_in_bytes, done); ++ ++ // zero_words() takes ptr in t1 and count in bytes in t2 ++ lea(t1, Address(obj, hdr_size_in_bytes)); ++ addi_d(t2, len_in_bytes, -BytesPerWord); ++ ++ Label loop; ++ bind(loop); ++ stx_d(R0, t1, t2); ++ addi_d(t2, t2, -BytesPerWord); ++ bge(t2, R0, loop); ++ ++ bind(done); ++} ++ ++void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size, ++ int object_size, Register klass, Label& slow_case) { ++ assert_different_registers(obj, t1, t2); ++ assert(header_size >= 0 && object_size >= header_size, "illegal sizes"); ++ ++ try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case); ++ ++ initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB); ++} ++ ++// Scratch registers: t1 = T0, t2 = T1 ++void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, ++ int con_size_in_bytes, Register t1, Register t2, ++ bool is_tlab_allocated) { ++ assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, ++ "con_size_in_bytes is not multiple of alignment"); ++ const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize; ++ ++ initialize_header(obj, klass, noreg, t1, t2); ++ ++ if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) { ++ // clear rest of allocated space ++ const Register index = t2; ++ if (var_size_in_bytes != noreg) { ++ move(index, var_size_in_bytes); ++ initialize_body(obj, index, hdr_size_in_bytes, t1, t2); ++ } else if (con_size_in_bytes > hdr_size_in_bytes) { ++ con_size_in_bytes -= hdr_size_in_bytes; ++ lea(t1, Address(obj, hdr_size_in_bytes)); ++ Label loop; ++ li(SCR1, con_size_in_bytes - BytesPerWord); ++ bind(loop); ++ stx_d(R0, t1, SCR1); ++ addi_d(SCR1, SCR1, -BytesPerWord); ++ bge(SCR1, R0, loop); ++ } ++ } ++ ++ membar(StoreStore); ++ ++ if (CURRENT_ENV->dtrace_alloc_probes()) { ++ assert(obj == A0, "must be"); ++ call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type); ++ } ++ ++ verify_oop(obj); ++} ++ ++void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, ++ int header_size, int f, Register klass, Label& slow_case) { ++ assert_different_registers(obj, len, t1, t2, klass); ++ ++ // determine alignment mask ++ assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work"); ++ ++ // check for negative or excessive length ++ li(SCR1, (int32_t)max_array_allocation_length); ++ bge_far(len, SCR1, slow_case, false); ++ ++ const Register arr_size = t2; // okay to be the same ++ // align object end ++ li(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask); ++ slli_w(SCR1, len, f); ++ add_d(arr_size, arr_size, SCR1); ++ bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0); ++ ++ try_allocate(obj, arr_size, 0, t1, t2, slow_case); ++ ++ initialize_header(obj, klass, len, t1, t2); ++ ++ // clear rest of allocated space ++ initialize_body(obj, arr_size, header_size * BytesPerWord, t1, t2); ++ ++ membar(StoreStore); ++ ++ if (CURRENT_ENV->dtrace_alloc_probes()) { ++ assert(obj == A0, "must be"); ++ call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type); ++ } ++ ++ verify_oop(obj); ++} ++ ++void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { ++ assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); ++ // Make sure there is enough stack space for this method's activation. ++ // Note that we do this before creating a frame. ++ generate_stack_overflow_check(bang_size_in_bytes); ++ MacroAssembler::build_frame(framesize); ++} ++ ++void C1_MacroAssembler::remove_frame(int framesize) { ++ MacroAssembler::remove_frame(framesize); ++} ++ ++void C1_MacroAssembler::verified_entry() { ++ // If we have to make this method not-entrant we'll overwrite its ++ // first instruction with a jump. For this action to be legal we ++ // must ensure that this first instruction is a b, bl, nop, break. ++ // Make it a NOP. ++ nop(); ++} ++ ++void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { ++ // rbp, + 0: link ++ // + 1: return address ++ // + 2: argument with offset 0 ++ // + 3: argument with offset 1 ++ // + 4: ... ++ ++ ld_ptr(reg, Address(FP, (offset_in_words + 2) * BytesPerWord)); ++} ++ ++#ifndef PRODUCT ++void C1_MacroAssembler::verify_stack_oop(int stack_offset) { ++ if (!VerifyOops) return; ++ verify_oop_addr(Address(SP, stack_offset), "oop"); ++} ++ ++void C1_MacroAssembler::verify_not_null_oop(Register r) { ++ if (!VerifyOops) return; ++ Label not_null; ++ bnez(r, not_null); ++ stop("non-null oop required"); ++ bind(not_null); ++ verify_oop(r); ++} ++ ++void C1_MacroAssembler::invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2, ++ bool inv_a3, bool inv_a4, bool inv_a5) { ++#ifdef ASSERT ++ static int nn; ++ if (inv_a0) li(A0, 0xDEAD); ++ if (inv_s0) li(S0, 0xDEAD); ++ if (inv_a2) li(A2, nn++); ++ if (inv_a3) li(A3, 0xDEAD); ++ if (inv_a4) li(A4, 0xDEAD); ++ if (inv_a5) li(A5, 0xDEAD); ++#endif ++} ++#endif // ifndef PRODUCT +diff --git a/hotspot/src/cpu/loongarch/vm/c1_Runtime1_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_Runtime1_loongarch_64.cpp +new file mode 100644 +index 0000000000..a750dca323 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/c1_Runtime1_loongarch_64.cpp +@@ -0,0 +1,1252 @@ ++/* ++ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "c1/c1_CodeStubs.hpp" ++#include "c1/c1_Defs.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "compiler/disassembler.hpp" ++#include "compiler/oopMap.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/universe.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "register_loongarch.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/vframe.hpp" ++#include "runtime/vframeArray.hpp" ++#include "vmreg_loongarch.inline.hpp" ++#if INCLUDE_ALL_GCS ++#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" ++#endif ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T5 RT5 ++#define T6 RT6 ++#define T8 RT8 ++ ++// Implementation of StubAssembler ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) { ++ // setup registers ++ assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, ++ "registers must be different"); ++ assert(oop_result1 != TREG && metadata_result != TREG, "registers must be different"); ++ assert(args_size >= 0, "illegal args_size"); ++ bool align_stack = false; ++ ++ move(A0, TREG); ++ set_num_rt_args(0); // Nothing on stack ++ ++ Label retaddr; ++ set_last_Java_frame(SP, FP, retaddr); ++ ++ // do the call ++ call(entry, relocInfo::runtime_call_type); ++ bind(retaddr); ++ int call_offset = offset(); ++ // verify callee-saved register ++#ifdef ASSERT ++ { Label L; ++ get_thread(SCR1); ++ beq(TREG, SCR1, L); ++ stop("StubAssembler::call_RT: TREG not callee saved?"); ++ bind(L); ++ } ++#endif ++ reset_last_Java_frame(true); ++ ++ // check for pending exceptions ++ { Label L; ++ // check for pending exceptions (java_thread is set upon return) ++ ld_ptr(SCR1, Address(TREG, in_bytes(Thread::pending_exception_offset()))); ++ beqz(SCR1, L); ++ // exception pending => remove activation and forward to exception handler ++ // make sure that the vm_results are cleared ++ if (oop_result1->is_valid()) { ++ st_ptr(R0, Address(TREG, JavaThread::vm_result_offset())); ++ } ++ if (metadata_result->is_valid()) { ++ st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset())); ++ } ++ if (frame_size() == no_frame_size) { ++ leave(); ++ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ } else if (_stub_id == Runtime1::forward_exception_id) { ++ should_not_reach_here(); ++ } else { ++ jmp(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type); ++ } ++ bind(L); ++ } ++ // get oop results if there are any and reset the values in the thread ++ if (oop_result1->is_valid()) { ++ get_vm_result(oop_result1, TREG); ++ } ++ if (metadata_result->is_valid()) { ++ get_vm_result_2(metadata_result, TREG); ++ } ++ return call_offset; ++} ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, ++ address entry, Register arg1) { ++ move(A1, arg1); ++ return call_RT(oop_result1, metadata_result, entry, 1); ++} ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, ++ address entry, Register arg1, Register arg2) { ++ if (A1 == arg2) { ++ if (A2 == arg1) { ++ move(SCR1, arg1); ++ move(arg1, arg2); ++ move(arg2, SCR1); ++ } else { ++ move(A2, arg2); ++ move(A1, arg1); ++ } ++ } else { ++ move(A1, arg1); ++ move(A2, arg2); ++ } ++ return call_RT(oop_result1, metadata_result, entry, 2); ++} ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, ++ address entry, Register arg1, Register arg2, Register arg3) { ++ // if there is any conflict use the stack ++ if (arg1 == A2 || arg1 == A3 || ++ arg2 == A1 || arg2 == A3 || ++ arg3 == A1 || arg3 == A2) { ++ addi_d(SP, SP, -4 * wordSize); ++ st_ptr(arg1, Address(SP, 0 * wordSize)); ++ st_ptr(arg2, Address(SP, 1 * wordSize)); ++ st_ptr(arg3, Address(SP, 2 * wordSize)); ++ ld_ptr(arg1, Address(SP, 0 * wordSize)); ++ ld_ptr(arg2, Address(SP, 1 * wordSize)); ++ ld_ptr(arg3, Address(SP, 2 * wordSize)); ++ addi_d(SP, SP, 4 * wordSize); ++ } else { ++ move(A1, arg1); ++ move(A2, arg2); ++ move(A3, arg3); ++ } ++ return call_RT(oop_result1, metadata_result, entry, 3); ++} ++ ++// Implementation of StubFrame ++ ++class StubFrame: public StackObj { ++ private: ++ StubAssembler* _sasm; ++ ++ public: ++ StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments); ++ void load_argument(int offset_in_words, Register reg); ++ ++ ~StubFrame(); ++};; ++ ++#define __ _sasm-> ++ ++StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) { ++ _sasm = sasm; ++ __ set_info(name, must_gc_arguments); ++ __ enter(); ++} ++ ++// load parameters that were stored with LIR_Assembler::store_parameter ++// Note: offsets for store_parameter and load_argument must match ++void StubFrame::load_argument(int offset_in_words, Register reg) { ++ __ load_parameter(offset_in_words, reg); ++} ++ ++StubFrame::~StubFrame() { ++ __ leave(); ++ __ jr(RA); ++} ++ ++#undef __ ++ ++// Implementation of Runtime1 ++ ++#define __ sasm-> ++ ++const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2; ++ ++// Stack layout for saving/restoring all the registers needed during a runtime ++// call (this includes deoptimization) ++// Note: note that users of this frame may well have arguments to some runtime ++// while these values are on the stack. These positions neglect those arguments ++// but the code in save_live_registers will take the argument count into ++// account. ++// ++ ++enum reg_save_layout { ++ reg_save_frame_size = 32 /* float */ + 30 /* integer, except zr, tp */ ++}; ++ ++// Save off registers which might be killed by calls into the runtime. ++// Tries to smart of about FP registers. In particular we separate ++// saving and describing the FPU registers for deoptimization since we ++// have to save the FPU registers twice if we describe them. The ++// deopt blob is the only thing which needs to describe FPU registers. ++// In all other cases it should be sufficient to simply save their ++// current value. ++ ++static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs]; ++static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs]; ++static int reg_save_size_in_words; ++static int frame_size_in_bytes = -1; ++ ++static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) { ++ int frame_size_in_bytes = reg_save_frame_size * BytesPerWord; ++ sasm->set_frame_size(frame_size_in_bytes / BytesPerWord); ++ int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size; ++ OopMap* oop_map = new OopMap(frame_size_in_slots, 0); ++ ++ for (int i = A0->encoding(); i <= T8->encoding(); i++) { ++ Register r = as_Register(i); ++ if (i != SCR1->encoding() && i != SCR2->encoding()) { ++ int sp_offset = cpu_reg_save_offsets[i]; ++ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); ++ } ++ } ++ ++ if (save_fpu_registers) { ++ for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { ++ FloatRegister r = as_FloatRegister(i); ++ int sp_offset = fpu_reg_save_offsets[i]; ++ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); ++ } ++ } ++ ++ return oop_map; ++} ++ ++static OopMap* save_live_registers(StubAssembler* sasm, ++ bool save_fpu_registers = true) { ++ __ block_comment("save_live_registers"); ++ ++ // integer registers except zr & ra & tp & sp ++ __ addi_d(SP, SP, -(32 - 4 + 32) * wordSize); ++ ++ for (int i = 4; i < 32; i++) ++ __ st_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize)); ++ ++ if (save_fpu_registers) { ++ for (int i = 0; i < 32; i++) ++ __ fst_d(as_FloatRegister(i), Address(SP, i * wordSize)); ++ } ++ ++ return generate_oop_map(sasm, save_fpu_registers); ++} ++ ++static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) { ++ if (restore_fpu_registers) { ++ for (int i = 0; i < 32; i ++) ++ __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize)); ++ } ++ ++ for (int i = 4; i < 32; i++) ++ __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize)); ++ ++ __ addi_d(SP, SP, (32 - 4 + 32) * wordSize); ++} ++ ++static void restore_live_registers_except_a0(StubAssembler* sasm, bool restore_fpu_registers = true) { ++ if (restore_fpu_registers) { ++ for (int i = 0; i < 32; i ++) ++ __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize)); ++ } ++ ++ for (int i = 5; i < 32; i++) ++ __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize)); ++ ++ __ addi_d(SP, SP, (32 - 4 + 32) * wordSize); ++} ++ ++void Runtime1::initialize_pd() { ++ int sp_offset = 0; ++ int i; ++ ++ // all float registers are saved explicitly ++ assert(FrameMap::nof_fpu_regs == 32, "double registers not handled here"); ++ for (i = 0; i < FrameMap::nof_fpu_regs; i++) { ++ fpu_reg_save_offsets[i] = sp_offset; ++ sp_offset += 2; // SP offsets are in halfwords ++ } ++ ++ for (i = 4; i < FrameMap::nof_cpu_regs; i++) { ++ Register r = as_Register(i); ++ cpu_reg_save_offsets[i] = sp_offset; ++ sp_offset += 2; // SP offsets are in halfwords ++ } ++} ++ ++// target: the entry point of the method that creates and posts the exception oop ++// has_argument: true if the exception needs arguments (passed in SCR1 and SCR2) ++ ++OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, ++ bool has_argument) { ++ // make a frame and preserve the caller's caller-save registers ++ OopMap* oop_map = save_live_registers(sasm); ++ int call_offset; ++ if (!has_argument) { ++ call_offset = __ call_RT(noreg, noreg, target); ++ } else { ++ __ move(A1, SCR1); ++ __ move(A2, SCR2); ++ call_offset = __ call_RT(noreg, noreg, target); ++ } ++ OopMapSet* oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ return oop_maps; ++} ++ ++OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { ++ __ block_comment("generate_handle_exception"); ++ ++ // incoming parameters ++ const Register exception_oop = A0; ++ const Register exception_pc = A1; ++ // other registers used in this stub ++ ++ // Save registers, if required. ++ OopMapSet* oop_maps = new OopMapSet(); ++ OopMap* oop_map = NULL; ++ switch (id) { ++ case forward_exception_id: ++ // We're handling an exception in the context of a compiled frame. ++ // The registers have been saved in the standard places. Perform ++ // an exception lookup in the caller and dispatch to the handler ++ // if found. Otherwise unwind and dispatch to the callers ++ // exception handler. ++ oop_map = generate_oop_map(sasm, 1 /*thread*/); ++ ++ // load and clear pending exception oop into A0 ++ __ ld_ptr(exception_oop, Address(TREG, Thread::pending_exception_offset())); ++ __ st_ptr(R0, Address(TREG, Thread::pending_exception_offset())); ++ ++ // load issuing PC (the return address for this stub) into A1 ++ __ ld_ptr(exception_pc, Address(FP, 1 * BytesPerWord)); ++ ++ // make sure that the vm_results are cleared (may be unnecessary) ++ __ st_ptr(R0, Address(TREG, JavaThread::vm_result_offset())); ++ __ st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset())); ++ break; ++ case handle_exception_nofpu_id: ++ case handle_exception_id: ++ // At this point all registers MAY be live. ++ oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id); ++ break; ++ case handle_exception_from_callee_id: { ++ // At this point all registers except exception oop (A0) and ++ // exception pc (RA) are dead. ++ const int frame_size = 2 /*fp, return address*/; ++ oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0); ++ sasm->set_frame_size(frame_size); ++ break; ++ } ++ default: ShouldNotReachHere(); ++ } ++ ++ // verify that only A0 and A1 are valid at this time ++ __ invalidate_registers(false, true, true, true, true, true); ++ // verify that A0 contains a valid exception ++ __ verify_not_null_oop(exception_oop); ++ ++#ifdef ASSERT ++ // check that fields in JavaThread for exception oop and issuing pc are ++ // empty before writing to them ++ Label oop_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset())); ++ __ beqz(SCR1, oop_empty); ++ __ stop("exception oop already set"); ++ __ bind(oop_empty); ++ ++ Label pc_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset())); ++ __ beqz(SCR1, pc_empty); ++ __ stop("exception pc already set"); ++ __ bind(pc_empty); ++#endif ++ ++ // save exception oop and issuing pc into JavaThread ++ // (exception handler will load it from here) ++ __ st_ptr(exception_oop, Address(TREG, JavaThread::exception_oop_offset())); ++ __ st_ptr(exception_pc, Address(TREG, JavaThread::exception_pc_offset())); ++ ++ // patch throwing pc into return address (has bci & oop map) ++ __ st_ptr(exception_pc, Address(FP, 1 * BytesPerWord)); ++ ++ // compute the exception handler. ++ // the exception oop and the throwing pc are read from the fields in JavaThread ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc)); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ ++ // A0: handler address ++ // will be the deopt blob if nmethod was deoptimized while we looked up ++ // handler regardless of whether handler existed in the nmethod. ++ ++ // only A0 is valid at this time, all other registers have been destroyed by the runtime call ++ __ invalidate_registers(false, true, true, true, true, true); ++ ++ // patch the return address, this stub will directly return to the exception handler ++ __ st_ptr(A0, Address(FP, 1 * BytesPerWord)); ++ ++ switch (id) { ++ case forward_exception_id: ++ case handle_exception_nofpu_id: ++ case handle_exception_id: ++ // Restore the registers that were saved at the beginning. ++ restore_live_registers(sasm, id != handle_exception_nofpu_id); ++ break; ++ case handle_exception_from_callee_id: ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ return oop_maps; ++} ++ ++void Runtime1::generate_unwind_exception(StubAssembler *sasm) { ++ // incoming parameters ++ const Register exception_oop = A0; ++ // callee-saved copy of exception_oop during runtime call ++ const Register exception_oop_callee_saved = S0; ++ // other registers used in this stub ++ const Register exception_pc = A1; ++ const Register handler_addr = A3; ++ ++ // verify that only A0, is valid at this time ++ __ invalidate_registers(false, true, true, true, true, true); ++ ++#ifdef ASSERT ++ // check that fields in JavaThread for exception oop and issuing pc are empty ++ Label oop_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset())); ++ __ beqz(SCR1, oop_empty); ++ __ stop("exception oop must be empty"); ++ __ bind(oop_empty); ++ ++ Label pc_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset())); ++ __ beqz(SCR1, pc_empty); ++ __ stop("exception pc must be empty"); ++ __ bind(pc_empty); ++#endif ++ ++ // Save our return address because ++ // exception_handler_for_return_address will destroy it. We also ++ // save exception_oop ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(RA, Address(SP, 0 * wordSize)); ++ __ st_ptr(exception_oop, Address(SP, 1 * wordSize)); ++ ++ // search the exception handler address of the caller (using the return address) ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), TREG, RA); ++ // V0: exception handler address of the caller ++ ++ // Only V0 is valid at this time; all other registers have been ++ // destroyed by the call. ++ __ invalidate_registers(false, true, true, true, false, true); ++ ++ // move result of call into correct register ++ __ move(handler_addr, A0); ++ ++ // get throwing pc (= return address). ++ // RA has been destroyed by the call ++ __ ld_ptr(RA, Address(SP, 0 * wordSize)); ++ __ ld_ptr(exception_oop, Address(SP, 1 * wordSize)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ __ move(A1, RA); ++ ++ __ verify_not_null_oop(exception_oop); ++ ++ // continue at exception handler (return address removed) ++ // note: do *not* remove arguments when unwinding the ++ // activation since the caller assumes having ++ // all arguments on the stack when entering the ++ // runtime to determine the exception handler ++ // (GC happens at call site with arguments!) ++ // A0: exception oop ++ // A1: throwing pc ++ // A3: exception handler ++ __ jr(handler_addr); ++} ++ ++OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { ++ // use the maximum number of runtime-arguments here because it is difficult to ++ // distinguish each RT-Call. ++ // Note: This number affects also the RT-Call in generate_handle_exception because ++ // the oop-map is shared for all calls. ++ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); ++ assert(deopt_blob != NULL, "deoptimization blob must have been created"); ++ ++ OopMap* oop_map = save_live_registers(sasm); ++ ++ __ move(A0, TREG); ++ Label retaddr; ++ __ set_last_Java_frame(SP, FP, retaddr); ++ // do the call ++ __ call(target, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ OopMapSet* oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(__ offset(), oop_map); ++ // verify callee-saved register ++#ifdef ASSERT ++ { Label L; ++ __ get_thread(SCR1); ++ __ beq(TREG, SCR1, L); ++ __ stop("StubAssembler::call_RT: rthread not callee saved?"); ++ __ bind(L); ++ } ++#endif ++ ++ __ reset_last_Java_frame(true); ++ ++#ifdef ASSERT ++ // check that fields in JavaThread for exception oop and issuing pc are empty ++ Label oop_empty; ++ __ ld_ptr(SCR1, Address(TREG, Thread::pending_exception_offset())); ++ __ beqz(SCR1, oop_empty); ++ __ stop("exception oop must be empty"); ++ __ bind(oop_empty); ++ ++ Label pc_empty; ++ __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset())); ++ __ beqz(SCR1, pc_empty); ++ __ stop("exception pc must be empty"); ++ __ bind(pc_empty); ++#endif ++ ++ // Runtime will return true if the nmethod has been deoptimized, this is the ++ // expected scenario and anything else is an error. Note that we maintain a ++ // check on the result purely as a defensive measure. ++ Label no_deopt; ++ __ beqz(A0, no_deopt); // Have we deoptimized? ++ ++ // Perform a re-execute. The proper return address is already on the stack, ++ // we just need to restore registers, pop all of our frame but the return ++ // address and jump to the deopt blob. ++ restore_live_registers(sasm); ++ __ leave(); ++ __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type); ++ ++ __ bind(no_deopt); ++ restore_live_registers(sasm); ++ __ leave(); ++ __ jr(RA); ++ ++ return oop_maps; ++} ++ ++OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { ++ // for better readability ++ const bool must_gc_arguments = true; ++ const bool dont_gc_arguments = false; ++ ++ // default value; overwritten for some optimized stubs that are called ++ // from methods that do not use the fpu ++ bool save_fpu_registers = true; ++ ++ // stub code & info for the different stubs ++ OopMapSet* oop_maps = NULL; ++ OopMap* oop_map = NULL; ++ switch (id) { ++ { ++ case forward_exception_id: ++ { ++ oop_maps = generate_handle_exception(id, sasm); ++ __ leave(); ++ __ jr(RA); ++ } ++ break; ++ ++ case throw_div0_exception_id: ++ { ++ StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); ++ } ++ break; ++ ++ case throw_null_pointer_exception_id: ++ { ++ StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); ++ } ++ break; ++ ++ case new_instance_id: ++ case fast_new_instance_id: ++ case fast_new_instance_init_check_id: ++ { ++ Register klass = A3; // Incoming ++ Register obj = A0; // Result ++ ++ if (id == new_instance_id) { ++ __ set_info("new_instance", dont_gc_arguments); ++ } else if (id == fast_new_instance_id) { ++ __ set_info("fast new_instance", dont_gc_arguments); ++ } else { ++ assert(id == fast_new_instance_init_check_id, "bad StubID"); ++ __ set_info("fast new_instance init check", dont_gc_arguments); ++ } ++ ++ // If TLAB is disabled, see if there is support for inlining contiguous ++ // allocations. ++ // Otherwise, just go to the slow path. ++ if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) && ++ !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { ++ Label slow_path; ++ Register obj_size = S0; ++ Register t1 = T0; ++ Register t2 = T1; ++ assert_different_registers(klass, obj, obj_size, t1, t2); ++ ++ __ addi_d(SP, SP, -2 * wordSize); ++ __ st_ptr(S0, Address(SP, 0)); ++ ++ if (id == fast_new_instance_init_check_id) { ++ // make sure the klass is initialized ++ __ ld_bu(SCR1, Address(klass, InstanceKlass::init_state_offset())); ++ __ li(SCR2, InstanceKlass::fully_initialized); ++ __ bne_far(SCR1, SCR2, slow_path); ++ } ++ ++#ifdef ASSERT ++ // assert object can be fast path allocated ++ { ++ Label ok, not_ok; ++ __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset())); ++ __ bge(R0, obj_size, not_ok); // make sure it's an instance (LH > 0) ++ __ andi(SCR1, obj_size, Klass::_lh_instance_slow_path_bit); ++ __ beqz(SCR1, ok); ++ __ bind(not_ok); ++ __ stop("assert(can be fast path allocated)"); ++ __ should_not_reach_here(); ++ __ bind(ok); ++ } ++#endif // ASSERT ++ ++ // get the instance size (size is postive so movl is fine for 64bit) ++ __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset())); ++ ++ __ eden_allocate(obj, obj_size, 0, t1, slow_path); ++ ++ __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false); ++ __ verify_oop(obj); ++ __ ld_ptr(S0, Address(SP, 0)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ __ jr(RA); ++ ++ __ bind(slow_path); ++ __ ld_ptr(S0, Address(SP, 0)); ++ __ addi_d(SP, SP, 2 * wordSize); ++ } ++ ++ __ enter(); ++ OopMap* map = save_live_registers(sasm); ++ int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass); ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers_except_a0(sasm); ++ __ verify_oop(obj); ++ __ leave(); ++ __ jr(RA); ++ ++ // A0,: new instance ++ } ++ ++ break; ++ ++ case counter_overflow_id: ++ { ++ Register bci = A0, method = A1; ++ __ enter(); ++ OopMap* map = save_live_registers(sasm); ++ // Retrieve bci ++ __ ld_w(bci, Address(FP, 2 * BytesPerWord)); ++ // And a pointer to the Method* ++ __ ld_d(method, Address(FP, 3 * BytesPerWord)); ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method); ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm); ++ __ leave(); ++ __ jr(RA); ++ } ++ break; ++ ++ case new_type_array_id: ++ case new_object_array_id: ++ { ++ Register length = S0; // Incoming ++ Register klass = A3; // Incoming ++ Register obj = A0; // Result ++ ++ if (id == new_type_array_id) { ++ __ set_info("new_type_array", dont_gc_arguments); ++ } else { ++ __ set_info("new_object_array", dont_gc_arguments); ++ } ++ ++#ifdef ASSERT ++ // assert object type is really an array of the proper kind ++ { ++ Label ok; ++ Register t0 = obj; ++ __ ld_w(t0, Address(klass, Klass::layout_helper_offset())); ++ __ srai_w(t0, t0, Klass::_lh_array_tag_shift); ++ int tag = ((id == new_type_array_id) ++ ? Klass::_lh_array_tag_type_value ++ : Klass::_lh_array_tag_obj_value); ++ __ li(SCR1, tag); ++ __ beq(t0, SCR1, ok); ++ __ stop("assert(is an array klass)"); ++ __ should_not_reach_here(); ++ __ bind(ok); ++ } ++#endif // ASSERT ++ ++ // If TLAB is disabled, see if there is support for inlining contiguous ++ // allocations. ++ // Otherwise, just go to the slow path. ++ if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { ++ Register arr_size = A5; ++ Register t1 = T0; ++ Register t2 = T1; ++ Label slow_path; ++ assert_different_registers(length, klass, obj, arr_size, t1, t2); ++ ++ // check that array length is small enough for fast path. ++ __ li(SCR1, C1_MacroAssembler::max_array_allocation_length); ++ __ blt_far(SCR1, length, slow_path, false); ++ ++ // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) ++ // since size is positive ldrw does right thing on 64bit ++ __ ld_w(t1, Address(klass, Klass::layout_helper_offset())); ++ // since size is positive movw does right thing on 64bit ++ __ move(arr_size, length); ++ __ sll_w(arr_size, length, t1); ++ __ bstrpick_d(t1, t1, Klass::_lh_header_size_shift + ++ exact_log2(Klass::_lh_header_size_mask + 1) - 1, ++ Klass::_lh_header_size_shift); ++ __ add_d(arr_size, arr_size, t1); ++ __ addi_d(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up ++ __ bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0); ++ ++ __ eden_allocate(obj, arr_size, 0, t1, slow_path); // preserves arr_size ++ ++ __ initialize_header(obj, klass, length, t1, t2); ++ __ ld_bu(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte))); ++ assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); ++ assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); ++ __ andi(t1, t1, Klass::_lh_header_size_mask); ++ __ sub_d(arr_size, arr_size, t1); // body length ++ __ add_d(t1, t1, obj); // body start ++ __ initialize_body(t1, arr_size, 0, t1, t2); ++ __ membar(Assembler::StoreStore); ++ __ verify_oop(obj); ++ ++ __ jr(RA); ++ ++ __ bind(slow_path); ++ } ++ ++ __ enter(); ++ OopMap* map = save_live_registers(sasm); ++ int call_offset; ++ if (id == new_type_array_id) { ++ call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length); ++ } else { ++ call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length); ++ } ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers_except_a0(sasm); ++ ++ __ verify_oop(obj); ++ __ leave(); ++ __ jr(RA); ++ ++ // A0: new array ++ } ++ break; ++ ++ case new_multi_array_id: ++ { ++ StubFrame f(sasm, "new_multi_array", dont_gc_arguments); ++ // A0,: klass ++ // S0,: rank ++ // A2: address of 1st dimension ++ OopMap* map = save_live_registers(sasm); ++ __ move(A1, A0); ++ __ move(A3, A2); ++ __ move(A2, S0); ++ int call_offset = __ call_RT(A0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), A1, A2, A3); ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers_except_a0(sasm); ++ ++ // A0,: new multi array ++ __ verify_oop(A0); ++ } ++ break; ++ ++ case register_finalizer_id: ++ { ++ __ set_info("register_finalizer", dont_gc_arguments); ++ ++ // This is called via call_runtime so the arguments ++ // will be place in C abi locations ++ ++ __ verify_oop(A0); ++ ++ // load the klass and check the has finalizer flag ++ Label register_finalizer; ++ Register t = A5; ++ __ load_klass(t, A0); ++ __ ld_w(t, Address(t, Klass::access_flags_offset())); ++ __ li(SCR1, JVM_ACC_HAS_FINALIZER); ++ __ andr(SCR1, t, SCR1); ++ __ bnez(SCR1, register_finalizer); ++ __ jr(RA); ++ ++ __ bind(register_finalizer); ++ __ enter(); ++ OopMap* oop_map = save_live_registers(sasm); ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), A0); ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ ++ // Now restore all the live registers ++ restore_live_registers(sasm); ++ ++ __ leave(); ++ __ jr(RA); ++ } ++ break; ++ ++ case throw_class_cast_exception_id: ++ { ++ StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); ++ } ++ break; ++ ++ case throw_incompatible_class_change_error_id: ++ { ++ StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); ++ } ++ break; ++ ++ case slow_subtype_check_id: ++ { ++ // Typical calling sequence: ++ // __ push(klass_RInfo); // object klass or other subclass ++ // __ push(sup_k_RInfo); // array element klass or other superclass ++ // __ bl(slow_subtype_check); ++ // Note that the subclass is pushed first, and is therefore deepest. ++ enum layout { ++ a0_off, a0_off_hi, ++ a2_off, a2_off_hi, ++ a4_off, a4_off_hi, ++ a5_off, a5_off_hi, ++ sup_k_off, sup_k_off_hi, ++ klass_off, klass_off_hi, ++ framesize, ++ result_off = sup_k_off ++ }; ++ ++ __ set_info("slow_subtype_check", dont_gc_arguments); ++ __ addi_d(SP, SP, -4 * wordSize); ++ __ st_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size)); ++ __ st_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size)); ++ __ st_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size)); ++ __ st_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size)); ++ ++ // This is called by pushing args and not with C abi ++ __ ld_ptr(A4, Address(SP, klass_off * VMRegImpl::stack_slot_size)); // subclass ++ __ ld_ptr(A0, Address(SP, sup_k_off * VMRegImpl::stack_slot_size)); // superclass ++ ++ Label miss; ++ __ check_klass_subtype_slow_path(A4, A0, A2, A5, NULL, &miss); ++ ++ // fallthrough on success: ++ __ li(SCR1, 1); ++ __ st_ptr(SCR1, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result ++ __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size)); ++ __ addi_d(SP, SP, 4 * wordSize); ++ __ jr(RA); ++ ++ __ bind(miss); ++ __ st_ptr(R0, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result ++ __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size)); ++ __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size)); ++ __ addi_d(SP, SP, 4 * wordSize); ++ __ jr(RA); ++ } ++ break; ++ ++ case monitorenter_nofpu_id: ++ save_fpu_registers = false; ++ // fall through ++ case monitorenter_id: ++ { ++ StubFrame f(sasm, "monitorenter", dont_gc_arguments); ++ OopMap* map = save_live_registers(sasm, save_fpu_registers); ++ ++ // Called with store_parameter and not C abi ++ ++ f.load_argument(1, A0); // A0,: object ++ f.load_argument(0, A1); // A1,: lock address ++ ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), A0, A1); ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm, save_fpu_registers); ++ } ++ break; ++ ++ case monitorexit_nofpu_id: ++ save_fpu_registers = false; ++ // fall through ++ case monitorexit_id: ++ { ++ StubFrame f(sasm, "monitorexit", dont_gc_arguments); ++ OopMap* map = save_live_registers(sasm, save_fpu_registers); ++ ++ // Called with store_parameter and not C abi ++ ++ f.load_argument(0, A0); // A0,: lock address ++ ++ // note: really a leaf routine but must setup last java sp ++ // => use call_RT for now (speed can be improved by ++ // doing last java sp setup manually) ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), A0); ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm, save_fpu_registers); ++ } ++ break; ++ ++ case deoptimize_id: ++ { ++ StubFrame f(sasm, "deoptimize", dont_gc_arguments); ++ OopMap* oop_map = save_live_registers(sasm); ++ f.load_argument(0, A1); ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), A1); ++ ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ restore_live_registers(sasm); ++ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); ++ assert(deopt_blob != NULL, "deoptimization blob must have been created"); ++ __ leave(); ++ __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type); ++ } ++ break; ++ ++ case throw_range_check_failed_id: ++ { ++ StubFrame f(sasm, "range_check_failed", dont_gc_arguments); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); ++ } ++ break; ++ ++ case unwind_exception_id: ++ { ++ __ set_info("unwind_exception", dont_gc_arguments); ++ // note: no stubframe since we are about to leave the current ++ // activation and we are calling a leaf VM function only. ++ generate_unwind_exception(sasm); ++ } ++ break; ++ ++ case access_field_patching_id: ++ { ++ StubFrame f(sasm, "access_field_patching", dont_gc_arguments); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); ++ } ++ break; ++ ++ case load_klass_patching_id: ++ { ++ StubFrame f(sasm, "load_klass_patching", dont_gc_arguments); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); ++ } ++ break; ++ ++ case load_mirror_patching_id: ++ { ++ StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); ++ } ++ break; ++ ++ case load_appendix_patching_id: ++ { ++ StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); ++ } ++ break; ++ ++ case handle_exception_nofpu_id: ++ case handle_exception_id: ++ { ++ StubFrame f(sasm, "handle_exception", dont_gc_arguments); ++ oop_maps = generate_handle_exception(id, sasm); ++ } ++ break; ++ ++ case handle_exception_from_callee_id: ++ { ++ StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments); ++ oop_maps = generate_handle_exception(id, sasm); ++ } ++ break; ++ ++ case throw_index_exception_id: ++ { ++ StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); ++ } ++ break; ++ ++ case throw_array_store_exception_id: ++ { ++ StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments); ++ // tos + 0: link ++ // + 1: return address ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); ++ } ++ break; ++ ++#if INCLUDE_ALL_GCS ++ ++ case g1_pre_barrier_slow_id: ++ { ++ StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments); ++ // arg0 : previous value of memory ++ ++ BarrierSet* bs = Universe::heap()->barrier_set(); ++ if (bs->kind() != BarrierSet::G1SATBCTLogging) { ++ __ li(A0, (int)id); ++ __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), A0); ++ __ should_not_reach_here(); ++ break; ++ } ++ ++ const Register pre_val = A0; ++ const Register thread = TREG; ++ const Register tmp = SCR2; ++ ++ Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + ++ PtrQueue::byte_offset_of_active())); ++ ++ Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + ++ PtrQueue::byte_offset_of_index())); ++ Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + ++ PtrQueue::byte_offset_of_buf())); ++ ++ Label done; ++ Label runtime; ++ ++ // Can we store original value in the thread's buffer? ++ __ ld_ptr(tmp, queue_index); ++ __ beqz(tmp, runtime); ++ ++ __ addi_d(tmp, tmp, -wordSize); ++ __ st_ptr(tmp, queue_index); ++ __ ld_ptr(SCR1, buffer); ++ __ add_d(tmp, tmp, SCR1); ++ f.load_argument(0, SCR1); ++ __ st_ptr(SCR1, Address(tmp, 0)); ++ __ b(done); ++ ++ __ bind(runtime); ++ __ pushad(); ++ f.load_argument(0, pre_val); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); ++ __ popad(); ++ __ bind(done); ++ } ++ break; ++ case g1_post_barrier_slow_id: ++ { ++ StubFrame f(sasm, "g1_post_barrier", dont_gc_arguments); ++ ++ // arg0: store_address ++ Address store_addr(FP, 2*BytesPerWord); ++ ++ BarrierSet* bs = Universe::heap()->barrier_set(); ++ CardTableModRefBS* ct = (CardTableModRefBS*)bs; ++ assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); ++ ++ Label done; ++ Label runtime; ++ ++ // At this point we know new_value is non-NULL and the new_value crosses regions. ++ // Must check to see if card is already dirty ++ ++ const Register thread = TREG; ++ ++ Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + ++ PtrQueue::byte_offset_of_index())); ++ Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + ++ PtrQueue::byte_offset_of_buf())); ++ ++ const Register card_offset = SCR2; ++ // RA is free here, so we can use it to hold the byte_map_base. ++ const Register byte_map_base = RA; ++ ++ assert_different_registers(card_offset, byte_map_base, SCR1); ++ ++ f.load_argument(0, card_offset); ++ __ srli_d(card_offset, card_offset, CardTableModRefBS::card_shift); ++ __ load_byte_map_base(byte_map_base); ++ __ ldx_bu(SCR1, byte_map_base, card_offset); ++ __ addi_d(SCR1, SCR1, -(int)G1SATBCardTableModRefBS::g1_young_card_val()); ++ __ beqz(SCR1, done); ++ ++ assert((int)CardTableModRefBS::dirty_card_val() == 0, "must be 0"); ++ ++ __ membar(Assembler::StoreLoad); ++ __ ldx_bu(SCR1, byte_map_base, card_offset); ++ __ beqz(SCR1, done); ++ ++ // storing region crossing non-NULL, card is clean. ++ // dirty card and log. ++ __ stx_b(R0, byte_map_base, card_offset); ++ ++ // Convert card offset into an address in card_addr ++ Register card_addr = card_offset; ++ __ add_d(card_addr, byte_map_base, card_addr); ++ ++ __ ld_ptr(SCR1, queue_index); ++ __ beqz(SCR1, runtime); ++ __ addi_d(SCR1, SCR1, -wordSize); ++ __ st_ptr(SCR1, queue_index); ++ ++ // Reuse RA to hold buffer_addr ++ const Register buffer_addr = RA; ++ ++ __ ld_ptr(buffer_addr, buffer); ++ __ stx_d(card_addr, buffer_addr, SCR1); ++ __ b(done); ++ ++ __ bind(runtime); ++ __ pushad(); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); ++ __ popad(); ++ __ bind(done); ++ ++ } ++ break; ++#endif ++ ++ case predicate_failed_trap_id: ++ { ++ StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments); ++ ++ OopMap* map = save_live_registers(sasm); ++ ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap)); ++ oop_maps = new OopMapSet(); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm); ++ __ leave(); ++ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); ++ assert(deopt_blob != NULL, "deoptimization blob must have been created"); ++ ++ __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type); ++ } ++ break; ++ ++ case dtrace_object_alloc_id: ++ { ++ // A0: object ++ StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments); ++ save_live_registers(sasm); ++ ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), A0); ++ ++ restore_live_registers(sasm); ++ } ++ break; ++ ++ default: ++ { ++ StubFrame f(sasm, "unimplemented entry", dont_gc_arguments); ++ __ li(A0, (int)id); ++ __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), A0); ++ __ should_not_reach_here(); ++ } ++ break; ++ } ++ } ++ return oop_maps; ++} ++ ++#undef __ ++ ++const char *Runtime1::pd_name_for_address(address entry) { ++ Unimplemented(); ++ return 0; ++} +diff --git a/hotspot/src/cpu/loongarch/vm/c1_globals_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_globals_loongarch.hpp +new file mode 100644 +index 0000000000..df052a058c +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/c1_globals_loongarch.hpp +@@ -0,0 +1,69 @@ ++/* ++ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP ++#define CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the client compiler. ++// (see c1_globals.hpp) ++ ++#ifndef COMPILER2 ++define_pd_global(bool, BackgroundCompilation, true ); ++define_pd_global(bool, InlineIntrinsics, true ); ++define_pd_global(bool, PreferInterpreterNativeStubs, false); ++define_pd_global(bool, ProfileTraps, false); ++define_pd_global(bool, UseOnStackReplacement, true ); ++define_pd_global(bool, TieredCompilation, false); ++define_pd_global(intx, CompileThreshold, 1500 ); ++ ++define_pd_global(intx, OnStackReplacePercentage, 933 ); ++define_pd_global(intx, NewSizeThreadIncrease, 4*K ); ++define_pd_global(intx, InitialCodeCacheSize, 160*K); ++define_pd_global(intx, ReservedCodeCacheSize, 32*M ); ++define_pd_global(intx, NonProfiledCodeHeapSize, 13*M ); ++define_pd_global(intx, ProfiledCodeHeapSize, 14*M ); ++define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); ++define_pd_global(bool, ProfileInterpreter, false); ++define_pd_global(intx, CodeCacheExpansionSize, 32*K ); ++define_pd_global(uintx, CodeCacheMinBlockLength, 1); ++define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++define_pd_global(bool, NeverActAsServerClassMachine, true ); ++define_pd_global(uint64_t,MaxRAM, 1ULL*G); ++define_pd_global(bool, CICompileOSR, true ); ++#endif // !COMPILER2 ++define_pd_global(bool, UseTypeProfile, false); ++define_pd_global(bool, RoundFPResults, true ); ++ ++define_pd_global(bool, LIRFillDelaySlots, false); ++define_pd_global(bool, OptimizeSinglePrecision, true ); ++define_pd_global(bool, CSEArrayLength, false); ++define_pd_global(bool, TwoOperandLIRForm, false ); ++ ++define_pd_global(intx, SafepointPollOffset, 0 ); ++ ++#endif // CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/c2_globals_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c2_globals_loongarch.hpp +new file mode 100644 +index 0000000000..044b0d2536 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/c2_globals_loongarch.hpp +@@ -0,0 +1,87 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_C2_GLOBALS_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_C2_GLOBALS_LOONGARCH_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the server compiler. ++// (see c2_globals.hpp). Alpha-sorted. ++define_pd_global(bool, BackgroundCompilation, true); ++define_pd_global(bool, UseTLAB, true); ++define_pd_global(bool, ResizeTLAB, true); ++define_pd_global(bool, CICompileOSR, true); ++define_pd_global(bool, InlineIntrinsics, true); ++define_pd_global(bool, PreferInterpreterNativeStubs, false); ++define_pd_global(bool, ProfileTraps, true); ++define_pd_global(bool, UseOnStackReplacement, true); ++#ifdef CC_INTERP ++define_pd_global(bool, ProfileInterpreter, false); ++#else ++define_pd_global(bool, ProfileInterpreter, true); ++#endif // CC_INTERP ++define_pd_global(bool, TieredCompilation, true); ++define_pd_global(intx, CompileThreshold, 10000); ++define_pd_global(intx, BackEdgeThreshold, 100000); ++ ++define_pd_global(intx, OnStackReplacePercentage, 140); ++define_pd_global(intx, ConditionalMoveLimit, 3); ++define_pd_global(intx, FLOATPRESSURE, 6); ++define_pd_global(intx, FreqInlineSize, 325); ++define_pd_global(intx, MinJumpTableSize, 10); ++define_pd_global(intx, INTPRESSURE, 13); ++define_pd_global(intx, InteriorEntryAlignment, 16); ++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); ++define_pd_global(intx, LoopUnrollLimit, 60); ++// InitialCodeCacheSize derived from specjbb2000 run. ++define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize ++define_pd_global(intx, CodeCacheExpansionSize, 64*K); ++ ++// Ergonomics related flags ++define_pd_global(uint64_t,MaxRAM, 128ULL*G); ++define_pd_global(intx, RegisterCostAreaRatio, 16000); ++ ++// Peephole and CISC spilling both break the graph, and so makes the ++// scheduler sick. ++define_pd_global(bool, OptoPeephole, false); ++define_pd_global(bool, UseCISCSpill, false); ++define_pd_global(bool, OptoScheduling, false); ++define_pd_global(bool, OptoBundling, false); ++ ++define_pd_global(intx, ReservedCodeCacheSize, 48*M); ++define_pd_global(uintx, CodeCacheMinBlockLength, 4); ++define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++ ++define_pd_global(bool, TrapBasedRangeChecks, false); ++ ++// Heap related flags ++define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); ++ ++// Ergonomics related flags ++define_pd_global(bool, NeverActAsServerClassMachine, false); ++ ++#endif // CPU_LOONGARCH_VM_C2_GLOBALS_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/c2_init_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/c2_init_loongarch.cpp +new file mode 100644 +index 0000000000..c7bf590b60 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/c2_init_loongarch.cpp +@@ -0,0 +1,34 @@ ++/* ++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "opto/compile.hpp" ++#include "opto/node.hpp" ++ ++// processor dependent initialization for LoongArch ++ ++void Compile::pd_compiler2_init() { ++ guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); ++} +diff --git a/hotspot/src/cpu/loongarch/vm/codeBuffer_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/codeBuffer_loongarch.hpp +new file mode 100644 +index 0000000000..652f6c1092 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/codeBuffer_loongarch.hpp +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_CODEBUFFER_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_CODEBUFFER_LOONGARCH_HPP ++ ++private: ++ void pd_initialize() {} ++ ++public: ++ void flush_bundle(bool start_new_bundle) {} ++ ++#endif // CPU_LOONGARCH_VM_CODEBUFFER_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/compiledIC_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/compiledIC_loongarch.cpp +new file mode 100644 +index 0000000000..70a47fc772 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/compiledIC_loongarch.cpp +@@ -0,0 +1,167 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/compiledIC.hpp" ++#include "code/icBuffer.hpp" ++#include "code/nmethod.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/safepoint.hpp" ++ ++// Release the CompiledICHolder* associated with this call site is there is one. ++void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) { ++ // This call site might have become stale so inspect it carefully. ++ NativeCall* call = nativeCall_at(call_site->addr()); ++ if (is_icholder_entry(call->destination())) { ++ NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value()); ++ InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data()); ++ } ++} ++ ++bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) { ++ // This call site might have become stale so inspect it carefully. ++ NativeCall* call = nativeCall_at(call_site->addr()); ++ return is_icholder_entry(call->destination()); ++} ++ ++// ---------------------------------------------------------------------------- ++ ++#define __ _masm. ++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) { ++ address mark = cbuf.insts_mark(); // get mark within main instrs section ++ ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a stub. ++ MacroAssembler _masm(&cbuf); ++ ++ address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size()); ++ if (base == NULL) return NULL; // CodeBuffer::expand failed ++ ++ // static stub relocation stores the instruction address of the call ++ __ relocate(static_stub_Relocation::spec(mark), 0); ++ ++ // Code stream for loading method may be changed. ++ __ ibar(0); ++ ++ // Rmethod contains methodOop, it should be relocated for GC ++ // static stub relocation also tags the methodOop in the code-stream. ++ __ mov_metadata(Rmethod, NULL); ++ // This is recognized as unresolved by relocs/nativeInst/ic code ++ ++ cbuf.set_insts_mark(); ++ __ patchable_jump(__ pc()); ++ // Update current stubs pointer and restore code_end. ++ __ end_a_stub(); ++ return base; ++} ++#undef __ ++ ++int CompiledStaticCall::to_interp_stub_size() { ++ return NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeGeneralJump::instruction_size; ++} ++ ++// Relocation entries for call stub, compiled java to interpreter. ++int CompiledStaticCall::reloc_to_interp_stub() { ++ return 16; ++} ++ ++void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) { ++ address stub = find_stub(); ++ guarantee(stub != NULL, "stub not found"); ++ ++ if (TraceICs) { ++ ResourceMark rm; ++ tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", ++ p2i(instruction_address()), ++ callee->name_and_sig_as_C_string()); ++ } ++ ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++#ifndef LOONGARCH64 ++ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); ++#else ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++#endif ++ ++ assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(), ++ "a) MT-unsafe modification of inline cache"); ++ assert(jump->jump_destination() == jump->instruction_address() || jump->jump_destination() == entry, ++ "b) MT-unsafe modification of inline cache"); ++ ++ // Update stub. ++ method_holder->set_data((intptr_t)callee()); ++ jump->set_jump_destination(entry); ++ ++ // Update jump to call. ++ set_destination_mt_safe(stub); ++} ++ ++void CompiledStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { ++ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); ++ // Reset stub. ++ address stub = static_stub->addr(); ++ assert(stub != NULL, "stub not found"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++#ifndef LOONGARCH64 ++ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); ++#else ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++#endif ++ method_holder->set_data(0); ++ jump->set_jump_destination(jump->instruction_address()); ++} ++ ++//----------------------------------------------------------------------------- ++// Non-product mode code ++#ifndef PRODUCT ++ ++void CompiledStaticCall::verify() { ++ // Verify call. ++ NativeCall::verify(); ++ if (os::is_MP()) { ++ verify_alignment(); ++ } ++ ++ // Verify stub. ++ address stub = find_stub(); ++ assert(stub != NULL, "no stub found for static call"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++#ifndef LOONGARCH64 ++ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); ++#else ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++#endif ++ ++ ++ // Verify state. ++ assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); ++} ++ ++#endif // !PRODUCT +diff --git a/hotspot/src/cpu/loongarch/vm/copy_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/copy_loongarch.hpp +new file mode 100644 +index 0000000000..1b40eab95b +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/copy_loongarch.hpp +@@ -0,0 +1,72 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_COPY_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_COPY_LOONGARCH_HPP ++ ++// Inline functions for memory copy and fill. ++ ++// Contains inline asm implementations ++#ifdef TARGET_OS_ARCH_linux_loongarch ++# include "copy_linux_loongarch.inline.hpp" ++#endif ++#ifdef TARGET_OS_ARCH_solaris_loongarch ++# include "copy_solaris_loongarch.inline.hpp" ++#endif ++#ifdef TARGET_OS_ARCH_windows_loongarch ++# include "copy_windows_loongarch.inline.hpp" ++#endif ++#ifdef TARGET_OS_ARCH_bsd_loongarch ++# include "copy_bsd_loongarch.inline.hpp" ++#endif ++// Inline functions for memory copy and fill. ++ ++// Contains inline asm implementations ++ ++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { ++ julong* to = (julong*) tohw; ++ julong v = ((julong) value << 32) | value; ++ while (count-- > 0) { ++ *to++ = v; ++ } ++} ++ ++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { ++ pd_fill_to_words(tohw, count, value); ++} ++ ++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { ++ (void)memset(to, value, count); ++} ++ ++static void pd_zero_to_words(HeapWord* tohw, size_t count) { ++ pd_fill_to_words(tohw, count, 0); ++} ++ ++static void pd_zero_to_bytes(void* to, size_t count) { ++ (void)memset(to, 0, count); ++} ++ ++#endif //CPU_LOONGARCH_VM_COPY_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/cppInterpreterGenerator_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/cppInterpreterGenerator_loongarch.hpp +new file mode 100644 +index 0000000000..45d86f5bfe +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/cppInterpreterGenerator_loongarch.hpp +@@ -0,0 +1,53 @@ ++/* ++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_CPPINTERPRETERGENERATOR_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_CPPINTERPRETERGENERATOR_LOONGARCH_HPP ++ ++ protected: ++ ++#if 0 ++ address generate_asm_interpreter_entry(bool synchronized); ++ address generate_native_entry(bool synchronized); ++ address generate_abstract_entry(void); ++ address generate_math_entry(AbstractInterpreter::MethodKind kind); ++ address generate_empty_entry(void); ++ address generate_accessor_entry(void); ++ void lock_method(void); ++ void generate_stack_overflow_check(void); ++ ++ void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue); ++ void generate_counter_overflow(Label* do_continue); ++#endif ++ ++ void generate_more_monitors(); ++ void generate_deopt_handling(); ++ address generate_interpreter_frame_manager(bool synchronized); // C++ interpreter only ++ void generate_compute_interpreter_state(const Register state, ++ const Register prev_state, ++ const Register sender_sp, ++ bool native); // C++ interpreter only ++ ++#endif // CPU_LOONGARCH_VM_CPPINTERPRETERGENERATOR_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/cppInterpreter_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/cppInterpreter_loongarch.cpp +new file mode 100644 +index 0000000000..d6c0df3b77 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/cppInterpreter_loongarch.cpp +@@ -0,0 +1,215 @@ ++/* ++ * Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/bytecodeHistogram.hpp" ++#include "interpreter/cppInterpreter.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterGenerator.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/interfaceSupport.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "runtime/timer.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/debug.hpp" ++#ifdef SHARK ++#include "shark/shark_globals.hpp" ++#endif ++ ++#ifdef CC_INTERP ++ ++// Routine exists to make tracebacks look decent in debugger ++// while "shadow" interpreter frames are on stack. It is also ++// used to distinguish interpreter frames. ++ ++extern "C" void RecursiveInterpreterActivation(interpreterState istate) { ++ ShouldNotReachHere(); ++} ++ ++bool CppInterpreter::contains(address pc) { ++ Unimplemented(); ++} ++ ++#define STATE(field_name) Lstate, in_bytes(byte_offset_of(BytecodeInterpreter, field_name)) ++#define __ _masm-> ++ ++Label frame_manager_entry; ++Label fast_accessor_slow_entry_path; // fast accessor methods need to be able to jmp to unsynchronized ++ // c++ interpreter entry point this holds that entry point label. ++ ++static address unctrap_frame_manager_entry = NULL; ++ ++static address interpreter_return_address = NULL; ++static address deopt_frame_manager_return_atos = NULL; ++static address deopt_frame_manager_return_btos = NULL; ++static address deopt_frame_manager_return_itos = NULL; ++static address deopt_frame_manager_return_ltos = NULL; ++static address deopt_frame_manager_return_ftos = NULL; ++static address deopt_frame_manager_return_dtos = NULL; ++static address deopt_frame_manager_return_vtos = NULL; ++ ++const Register prevState = G1_scratch; ++ ++void InterpreterGenerator::save_native_result(void) { ++ Unimplemented(); ++} ++ ++void InterpreterGenerator::restore_native_result(void) { ++ Unimplemented(); ++} ++ ++// A result handler converts/unboxes a native call result into ++// a java interpreter/compiler result. The current frame is an ++// interpreter frame. The activation frame unwind code must be ++// consistent with that of TemplateTable::_return(...). In the ++// case of native methods, the caller's SP was not modified. ++address CppInterpreterGenerator::generate_result_handler_for(BasicType type) { ++ Unimplemented(); ++} ++ ++address CppInterpreterGenerator::generate_tosca_to_stack_converter(BasicType type) { ++ Unimplemented(); ++} ++ ++address CppInterpreterGenerator::generate_stack_to_stack_converter(BasicType type) { ++ Unimplemented(); ++} ++ ++address CppInterpreterGenerator::generate_stack_to_native_abi_converter(BasicType type) { ++ Unimplemented(); ++} ++ ++address CppInterpreter::return_entry(TosState state, int length) { ++ Unimplemented(); ++} ++ ++address CppInterpreter::deopt_entry(TosState state, int length) { ++ Unimplemented(); ++} ++ ++void InterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue) { ++ Unimplemented(); ++} ++ ++address InterpreterGenerator::generate_empty_entry(void) { ++ Unimplemented(); ++} ++ ++address InterpreterGenerator::generate_accessor_entry(void) { ++ Unimplemented(); ++} ++ ++address InterpreterGenerator::generate_native_entry(bool synchronized) { ++ Unimplemented(); ++} ++ ++void CppInterpreterGenerator::generate_compute_interpreter_state(const Register state, ++ const Register prev_state, ++ bool native) { ++ Unimplemented(); ++} ++ ++void InterpreterGenerator::lock_method(void) { ++ Unimplemented(); ++} ++ ++void CppInterpreterGenerator::generate_deopt_handling() { ++ Unimplemented(); ++} ++ ++void CppInterpreterGenerator::generate_more_monitors() { ++ Unimplemented(); ++} ++ ++ ++static address interpreter_frame_manager = NULL; ++ ++void CppInterpreterGenerator::adjust_callers_stack(Register args) { ++ Unimplemented(); ++} ++ ++address InterpreterGenerator::generate_normal_entry(bool synchronized) { ++ Unimplemented(); ++} ++ ++InterpreterGenerator::InterpreterGenerator(StubQueue* code) ++ : CppInterpreterGenerator(code) { ++ Unimplemented(); ++} ++ ++ ++static int size_activation_helper(int callee_extra_locals, int max_stack, int monitor_size) { ++ Unimplemented(); ++} ++ ++int AbstractInterpreter::size_top_interpreter_activation(methodOop method) { ++ Unimplemented(); ++} ++ ++void BytecodeInterpreter::layout_interpreterState(interpreterState to_fill, ++ frame* caller, ++ frame* current, ++ methodOop method, ++ intptr_t* locals, ++ intptr_t* stack, ++ intptr_t* stack_base, ++ intptr_t* monitor_base, ++ intptr_t* frame_bottom, ++ bool is_top_frame ++ ) ++{ ++ Unimplemented(); ++} ++ ++void BytecodeInterpreter::pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp) { ++ Unimplemented(); ++} ++ ++ ++int AbstractInterpreter::layout_activation(methodOop method, ++ int tempcount, // Number of slots on java expression stack in use ++ int popframe_extra_args, ++ int moncount, // Number of active monitors ++ int callee_param_size, ++ int callee_locals_size, ++ frame* caller, ++ frame* interpreter_frame, ++ bool is_top_frame) { ++ Unimplemented(); ++} ++ ++#endif // CC_INTERP +diff --git a/hotspot/src/cpu/loongarch/vm/debug_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/debug_loongarch.cpp +new file mode 100644 +index 0000000000..50de03653b +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/debug_loongarch.cpp +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "code/codeCache.hpp" ++#include "code/nmethod.hpp" ++#include "runtime/frame.hpp" ++#include "runtime/init.hpp" ++#include "runtime/os.hpp" ++#include "utilities/debug.hpp" ++#include "utilities/top.hpp" ++ ++#ifndef PRODUCT ++ ++void pd_ps(frame f) { ++ intptr_t* sp = f.sp(); ++ intptr_t* prev_sp = sp - 1; ++ intptr_t *pc = NULL; ++ intptr_t *next_pc = NULL; ++ int count = 0; ++ tty->print("register window backtrace from %#lx:\n", p2i(sp)); ++} ++ ++// This function is used to add platform specific info ++// to the error reporting code. ++ ++void pd_obfuscate_location(char *buf,int buflen) {} ++ ++#endif // PRODUCT +diff --git a/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.cpp +new file mode 100644 +index 0000000000..62478be3dc +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.cpp +@@ -0,0 +1,30 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "compiler/disassembler.hpp" ++#include "depChecker_loongarch.hpp" ++ ++// Nothing to do on LoongArch +diff --git a/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.hpp +new file mode 100644 +index 0000000000..598be0ee6f +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.hpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_DEPCHECKER_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_DEPCHECKER_LOONGARCH_HPP ++ ++// Nothing to do on LoongArch ++ ++#endif // CPU_LOONGARCH_VM_DEPCHECKER_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/disassembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/disassembler_loongarch.hpp +new file mode 100644 +index 0000000000..ccd89e8d6d +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/disassembler_loongarch.hpp +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_DISASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_DISASSEMBLER_LOONGARCH_HPP ++ ++ static int pd_instruction_alignment() { ++ return sizeof(int); ++ } ++ ++ static const char* pd_cpu_opts() { ++ return "gpr-names=64"; ++ } ++ ++#endif // CPU_LOONGARCH_VM_DISASSEMBLER_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/frame_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/frame_loongarch.cpp +new file mode 100644 +index 0000000000..0f50a5715d +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/frame_loongarch.cpp +@@ -0,0 +1,711 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/markOop.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/monitorChunk.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++#ifdef ASSERT ++void RegisterMap::check_location_valid() { ++} ++#endif ++ ++ ++// Profiling/safepoint support ++// for Profiling - acting on another frame. walks sender frames ++// if valid. ++// frame profile_find_Java_sender_frame(JavaThread *thread); ++ ++bool frame::safe_for_sender(JavaThread *thread) { ++ address sp = (address)_sp; ++ address fp = (address)_fp; ++ address unextended_sp = (address)_unextended_sp; ++ ++ // consider stack guards when trying to determine "safe" stack pointers ++ static size_t stack_guard_size = os::uses_stack_guard_pages() ? (StackYellowPages + StackRedPages) * os::vm_page_size() : 0; ++ size_t usable_stack_size = thread->stack_size() - stack_guard_size; ++ ++ // sp must be within the usable part of the stack (not in guards) ++ bool sp_safe = (sp < thread->stack_base()) && ++ (sp >= thread->stack_base() - usable_stack_size); ++ ++ ++ if (!sp_safe) { ++ return false; ++ } ++ ++ // unextended sp must be within the stack and above or equal sp ++ bool unextended_sp_safe = (unextended_sp < thread->stack_base()) && ++ (unextended_sp >= sp); ++ ++ if (!unextended_sp_safe) { ++ return false; ++ } ++ ++ // an fp must be within the stack and above (but not equal) sp ++ // second evaluation on fp+ is added to handle situation where fp is -1 ++ bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); ++ ++ // We know sp/unextended_sp are safe only fp is questionable here ++ ++ // If the current frame is known to the code cache then we can attempt to ++ // construct the sender and do some validation of it. This goes a long way ++ // toward eliminating issues when we get in frame construction code ++ ++ if (_cb != NULL ) { ++ ++ // First check if frame is complete and tester is reliable ++ // Unfortunately we can only check frame complete for runtime stubs and nmethod ++ // other generic buffer blobs are more problematic so we just assume they are ++ // ok. adapter blobs never have a frame complete and are never ok. ++ ++ if (!_cb->is_frame_complete_at(_pc)) { ++ if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { ++ return false; ++ } ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!_cb->code_contains(_pc)) { ++ return false; ++ } ++ ++ // Entry frame checks ++ if (is_entry_frame()) { ++ // an entry frame must have a valid fp. ++ return fp_safe && is_entry_frame_valid(thread); ++ } ++ ++ intptr_t* sender_sp = NULL; ++ intptr_t* sender_unextended_sp = NULL; ++ address sender_pc = NULL; ++ intptr_t* saved_fp = NULL; ++ ++ if (is_interpreted_frame()) { ++ // fp must be safe ++ if (!fp_safe) { ++ return false; ++ } ++ ++ sender_pc = (address) this->fp()[return_addr_offset]; ++ // for interpreted frames, the value below is the sender "raw" sp, ++ // which can be different from the sender unextended sp (the sp seen ++ // by the sender) because of current frame local variables ++ sender_sp = (intptr_t*) addr_at(sender_sp_offset); ++ sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; ++ saved_fp = (intptr_t*) this->fp()[link_offset]; ++ ++ } else { ++ // must be some sort of compiled/runtime frame ++ // fp does not have to be safe (although it could be check for c1?) ++ ++ // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc ++ if (_cb->frame_size() <= 0) { ++ return false; ++ } ++ ++ sender_sp = _unextended_sp + _cb->frame_size(); ++ sender_unextended_sp = sender_sp; ++ // On LA the return_address is always the word on the stack ++ sender_pc = (address) *(sender_sp-1); ++ // Note: frame::sender_sp_offset is only valid for compiled frame ++ saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset); ++ } ++ ++ ++ // If the potential sender is the interpreter then we can do some more checking ++ if (Interpreter::contains(sender_pc)) { ++ ++ // FP is always saved in a recognizable place in any code we generate. However ++ // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP ++ // is really a frame pointer. ++ ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { ++ return false; ++ } ++ ++ // construct the potential sender ++ ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ return sender.is_interpreted_frame_valid(thread); ++ ++ } ++ ++ // We must always be able to find a recognizable pc ++ CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); ++ if (sender_pc == NULL || sender_blob == NULL) { ++ return false; ++ } ++ ++ // Could be a zombie method ++ if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { ++ return false; ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!sender_blob->code_contains(sender_pc)) { ++ return false; ++ } ++ ++ // We should never be able to see an adapter if the current frame is something from code cache ++ if (sender_blob->is_adapter_blob()) { ++ return false; ++ } ++ ++ // Could be the call_stub ++ if (StubRoutines::returns_to_call_stub(sender_pc)) { ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { ++ return false; ++ } ++ ++ // construct the potential sender ++ ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ // Validate the JavaCallWrapper an entry frame must have ++ address jcw = (address)sender.entry_frame_call_wrapper(); ++ ++ bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > (address)sender.fp()); ++ ++ return jcw_safe; ++ } ++ ++ if (sender_blob->is_nmethod()) { ++ nmethod* nm = sender_blob->as_nmethod_or_null(); ++ if (nm != NULL) { ++ if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) { ++ return false; ++ } ++ } ++ } ++ ++ // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size ++ // because the return address counts against the callee's frame. ++ ++ if (sender_blob->frame_size() <= 0) { ++ assert(!sender_blob->is_nmethod(), "should count return address at least"); ++ return false; ++ } ++ ++ // We should never be able to see anything here except an nmethod. If something in the ++ // code cache (current frame) is called by an entity within the code cache that entity ++ // should not be anything but the call stub (already covered), the interpreter (already covered) ++ // or an nmethod. ++ ++ if (!sender_blob->is_nmethod()) { ++ return false; ++ } ++ ++ // Could put some more validation for the potential non-interpreted sender ++ // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... ++ ++ // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb ++ ++ // We've validated the potential sender that would be created ++ return true; ++ } ++ // Note: fp == NULL is not really a prerequisite for this to be safe to ++ // walk for c2. However we've modified the code such that if we get ++ // a failure with fp != NULL that we then try with FP == NULL. ++ // This is basically to mimic what a last_frame would look like if ++ // c2 had generated it. ++ ++ // Must be native-compiled frame. Since sender will try and use fp to find ++ // linkages it must be safe ++ ++ if (!fp_safe) { ++ return false; ++ } ++ ++ // Will the pc we fetch be non-zero (which we'll find at the oldest frame) ++ ++ if ( (address) this->fp()[return_addr_offset] == NULL) return false; ++ ++ ++ // could try and do some more potential verification of native frame if we could think of some... ++ ++ return true; ++ ++} ++ ++void frame::patch_pc(Thread* thread, address pc) { ++ assert(_cb == CodeCache::find_blob(pc), "unexpected pc"); ++ address* pc_addr = &(((address*) sp())[-1]); ++ if (TracePcPatching) { ++ tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", ++ p2i(pc_addr), p2i(*pc_addr), p2i(pc)); ++ } ++ ++ // Either the return address is the original one or we are going to ++ // patch in the same address that's already there. ++ assert(_pc == *pc_addr || pc == *pc_addr, "must be"); ++ *pc_addr = pc; ++ _cb = CodeCache::find_blob(pc); ++ address original_pc = nmethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ assert(original_pc == _pc, "expected original PC to be stored before patching"); ++ _deopt_state = is_deoptimized; ++ // leave _pc as is ++ } else { ++ _deopt_state = not_deoptimized; ++ _pc = pc; ++ } ++} ++ ++bool frame::is_interpreted_frame() const { ++ return Interpreter::contains(pc()); ++} ++ ++int frame::frame_size(RegisterMap* map) const { ++ frame sender = this->sender(map); ++ return sender.sp() - sp(); ++} ++ ++intptr_t* frame::entry_frame_argument_at(int offset) const { ++ // convert offset to index to deal with tsi ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ // Entry frame's arguments are always in relation to unextended_sp() ++ return &unextended_sp()[index]; ++} ++ ++// sender_sp ++#ifdef CC_INTERP ++intptr_t* frame::interpreter_frame_sender_sp() const { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ // QQQ why does this specialize method exist if frame::sender_sp() does same thing? ++ // seems odd and if we always know interpreted vs. non then sender_sp() is really ++ // doing too much work. ++ return get_interpreterState()->sender_sp(); ++} ++ ++// monitor elements ++ ++BasicObjectLock* frame::interpreter_frame_monitor_begin() const { ++ return get_interpreterState()->monitor_base(); ++} ++ ++BasicObjectLock* frame::interpreter_frame_monitor_end() const { ++ return (BasicObjectLock*) get_interpreterState()->stack_base(); ++} ++ ++#else // CC_INTERP ++ ++intptr_t* frame::interpreter_frame_sender_sp() const { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ return (intptr_t*) at(interpreter_frame_sender_sp_offset); ++} ++ ++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); ++} ++ ++ ++// monitor elements ++ ++BasicObjectLock* frame::interpreter_frame_monitor_begin() const { ++ return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); ++} ++ ++BasicObjectLock* frame::interpreter_frame_monitor_end() const { ++ BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); ++ // make sure the pointer points inside the frame ++ assert((intptr_t) fp() > (intptr_t) result, "result must < than frame pointer"); ++ assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer"); ++ return result; ++} ++ ++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { ++ *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; ++} ++ ++// Used by template based interpreter deoptimization ++void frame::interpreter_frame_set_last_sp(intptr_t* sp) { ++ *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp; ++} ++#endif // CC_INTERP ++ ++frame frame::sender_for_entry_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); ++ assert(!entry_frame_is_first(), "next Java fp must be non zero"); ++ assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); ++ map->clear(); ++ assert(map->include_argument_oops(), "should be set by clear"); ++ if (jfa->last_Java_pc() != NULL ) { ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); ++ return fr; ++ } ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp()); ++ return fr; ++} ++ ++frame frame::sender_for_interpreter_frame(RegisterMap* map) const { ++ // sp is the raw sp from the sender after adapter or interpreter extension ++ intptr_t* sender_sp = this->sender_sp(); ++ ++ // This is the sp before any possible extension (adapter/locals). ++ intptr_t* unextended_sp = interpreter_frame_sender_sp(); ++ ++ // The interpreter and compiler(s) always save FP in a known ++ // location on entry. We must record where that location is ++ // so this if FP was live on callout from c2 we can find ++ // the saved copy no matter what it called. ++ ++ // Since the interpreter always saves FP if we record where it is then ++ // we don't have to always save FP on entry and exit to c2 compiled ++ // code, on entry will be enough. ++#ifdef COMPILER2 ++ if (map->update_map()) { ++ update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); ++ } ++#endif /* COMPILER2 */ ++ return frame(sender_sp, unextended_sp, link(), sender_pc()); ++} ++ ++ ++//------------------------------------------------------------------------------ ++// frame::verify_deopt_original_pc ++// ++// Verifies the calculated original PC of a deoptimization PC for the ++// given unextended SP. The unextended SP might also be the saved SP ++// for MethodHandle call sites. ++#ifdef ASSERT ++void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) { ++ frame fr; ++ ++ // This is ugly but it's better than to change {get,set}_original_pc ++ // to take an SP value as argument. And it's only a debugging ++ // method anyway. ++ fr._unextended_sp = unextended_sp; ++ ++ address original_pc = nm->get_original_pc(&fr); ++ assert(nm->insts_contains(original_pc), "original PC must be in nmethod"); ++ assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be"); ++} ++#endif ++ ++ ++//------------------------------------------------------------------------------ ++// frame::adjust_unextended_sp ++void frame::adjust_unextended_sp() { ++ // On LoongArch, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. ++ ++ nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null(); ++ if (sender_nm != NULL) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (sender_nm->is_deopt_entry(_pc) || ++ sender_nm->is_deopt_mh_entry(_pc)) { ++ DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp)); ++ } ++ } ++} ++ ++//------------------------------------------------------------------------------ ++// frame::update_map_with_saved_link ++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { ++ // The interpreter and compiler(s) always save fp in a known ++ // location on entry. We must record where that location is ++ // so that if fp was live on callout from c2 we can find ++ // the saved copy no matter what it called. ++ ++ // Since the interpreter always saves fp if we record where it is then ++ // we don't have to always save fp on entry and exit to c2 compiled ++ // code, on entry will be enough. ++ map->set_location(FP->as_VMReg(), (address) link_addr); ++ // this is weird "H" ought to be at a higher address however the ++ // oopMaps seems to have the "H" regs at the same address and the ++ // vanilla register. ++ // XXXX make this go away ++ if (true) { ++ map->set_location(FP->as_VMReg()->next(), (address) link_addr); ++ } ++} ++ ++//------------------------------sender_for_compiled_frame----------------------- ++frame frame::sender_for_compiled_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ ++ // frame owned by optimizing compiler ++ assert(_cb->frame_size() >= 0, "must have non-zero frame size"); ++ ++ intptr_t* sender_sp = unextended_sp() + _cb->frame_size(); ++ intptr_t* unextended_sp = sender_sp; ++ ++#ifdef ASSERT ++ const bool c1_compiled = _cb->is_compiled_by_c1(); ++ bool native = _cb->is_nmethod() && ((nmethod*)_cb)->is_native_method(); ++ if (c1_compiled && native) { ++ assert(sender_sp == fp() + frame::sender_sp_offset, "incorrect frame size"); ++ } ++#endif // ASSERT ++ // On Intel the return_address is always the word on the stack ++ // the fp in compiler points to sender fp, but in interpreter, fp points to return address, ++ // so getting sender for compiled frame is not same as interpreter frame. ++ // we hard code here temporarily ++ // spark ++ address sender_pc = (address) *(sender_sp-1); ++ ++ intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset); ++ ++ if (map->update_map()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); ++ if (_cb->oop_maps() != NULL) { ++ OopMapSet::update_register_map(this, map); ++ } ++ ++ // Since the prolog does the save and restore of epb there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ update_map_with_saved_link(map, saved_fp_addr); ++ } ++ assert(sender_sp != sp(), "must have changed"); ++ return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc); ++} ++ ++frame frame::sender(RegisterMap* map) const { ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ map->set_include_argument_oops(false); ++ ++ if (is_entry_frame()) return sender_for_entry_frame(map); ++ if (is_interpreted_frame()) return sender_for_interpreter_frame(map); ++ assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); ++ ++ if (_cb != NULL) { ++ return sender_for_compiled_frame(map); ++ } ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return frame(sender_sp(), link(), sender_pc()); ++} ++ ++ ++bool frame::interpreter_frame_equals_unpacked_fp(intptr_t* fp) { ++ assert(is_interpreted_frame(), "must be interpreter frame"); ++ Method* method = interpreter_frame_method(); ++ // When unpacking an optimized frame the frame pointer is ++ // adjusted with: ++ int diff = (method->max_locals() - method->size_of_parameters()) * ++ Interpreter::stackElementWords; ++ printf("^^^^^^^^^^^^^^^adjust fp in deopt fp = 0%lx \n", (intptr_t)(fp - diff)); ++ return _fp == (fp - diff); ++} ++ ++void frame::pd_gc_epilog() { ++ // nothing done here now ++} ++ ++bool frame::is_interpreted_frame_valid(JavaThread* thread) const { ++// QQQ ++#ifdef CC_INTERP ++#else ++ assert(is_interpreted_frame(), "Not an interpreted frame"); ++ // These are reasonable sanity checks ++ if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (fp() + interpreter_frame_initial_sp_offset < sp()) { ++ return false; ++ } ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (fp() <= sp()) { // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ // do some validation of frame elements ++ ++ // first the method ++ ++ Method* m = *interpreter_frame_method_addr(); ++ ++ // validate the method we'd find in this potential sender ++ if (!m->is_valid_method()) return false; ++ ++ // stack frames shouldn't be much larger than max_stack elements ++ ++ //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) { ++ if (fp() - sp() > 4096) { // stack frames shouldn't be large. ++ return false; ++ } ++ ++ // validate bci/bcx ++ ++ intptr_t bcx = interpreter_frame_bcx(); ++ if (m->validate_bci_from_bcx(bcx) < 0) { ++ return false; ++ } ++ ++ // validate ConstantPoolCache* ++ ++ ConstantPoolCache* cp = *interpreter_frame_cache_addr(); ++ ++ if (cp == NULL || !cp->is_metaspace_object()) return false; ++ ++ // validate locals ++ ++ address locals = (address) *interpreter_frame_locals_addr(); ++ ++ if (locals > thread->stack_base() || locals < (address) fp()) return false; ++ ++ // We'd have to be pretty unlucky to be mislead at this point ++ ++#endif // CC_INTERP ++ return true; ++} ++ ++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { ++#ifdef CC_INTERP ++ // Needed for JVMTI. The result should always be in the interpreterState object ++ assert(false, "NYI"); ++ interpreterState istate = get_interpreterState(); ++#endif // CC_INTERP ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ Method* method = interpreter_frame_method(); ++ BasicType type = method->result_type(); ++ ++ intptr_t* tos_addr; ++ if (method->is_native()) { ++ // Prior to calling into the runtime to report the method_exit the possible ++ // return value is pushed to the native stack. If the result is a jfloat/jdouble ++ // then ST0 is saved. See the note in generate_native_result ++ tos_addr = (intptr_t*)sp(); ++ if (type == T_FLOAT || type == T_DOUBLE) { ++ tos_addr += 2; ++ } ++ } else { ++ tos_addr = (intptr_t*)interpreter_frame_tos_address(); ++ } ++ ++ switch (type) { ++ case T_OBJECT : ++ case T_ARRAY : { ++ oop obj; ++ if (method->is_native()) { ++#ifdef CC_INTERP ++ obj = istate->_oop_temp; ++#else ++ obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); ++#endif // CC_INTERP ++ } else { ++ oop* obj_p = (oop*)tos_addr; ++ obj = (obj_p == NULL) ? (oop)NULL : *obj_p; ++ } ++ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); ++ *oop_result = obj; ++ break; ++ } ++ case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; ++ case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; ++ case T_CHAR : value_result->c = *(jchar*)tos_addr; break; ++ case T_SHORT : value_result->s = *(jshort*)tos_addr; break; ++ case T_INT : value_result->i = *(jint*)tos_addr; break; ++ case T_LONG : value_result->j = *(jlong*)tos_addr; break; ++ case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break; ++ case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; ++ case T_VOID : /* Nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ return type; ++} ++ ++ ++intptr_t* frame::interpreter_frame_tos_at(jint offset) const { ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ return &interpreter_frame_tos_address()[index]; ++} ++ ++#ifndef PRODUCT ++ ++#define DESCRIBE_FP_OFFSET(name) \ ++ values.describe(frame_no, fp() + frame::name##_offset, #name) ++ ++void frame::describe_pd(FrameValues& values, int frame_no) { ++ if (is_interpreted_frame()) { ++ DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_method); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mdx); ++ DESCRIBE_FP_OFFSET(interpreter_frame_cache); ++ DESCRIBE_FP_OFFSET(interpreter_frame_locals); ++ DESCRIBE_FP_OFFSET(interpreter_frame_bcx); ++ DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); ++ } ++} ++#endif ++ ++intptr_t *frame::initial_deoptimization_info() { ++ // used to reset the saved FP ++ return fp(); ++} ++ ++intptr_t* frame::real_fp() const { ++ if (_cb != NULL) { ++ // use the frame size if valid ++ int size = _cb->frame_size(); ++ if (size > 0) { ++ return unextended_sp() + size; ++ } ++ } ++ // else rely on fp() ++ assert(! is_compiled_frame(), "unknown compiled frame size"); ++ return fp(); ++} ++ ++#ifndef PRODUCT ++// This is a generic constructor which is only used by pns() in debug.cpp. ++frame::frame(void* sp, void* fp, void* pc) { ++ init((intptr_t*)sp, (intptr_t*)fp, (address)pc); ++} ++#endif +diff --git a/hotspot/src/cpu/loongarch/vm/frame_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/frame_loongarch.hpp +new file mode 100644 +index 0000000000..964026e621 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/frame_loongarch.hpp +@@ -0,0 +1,229 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_FRAME_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_FRAME_LOONGARCH_HPP ++ ++#include "runtime/synchronizer.hpp" ++#include "utilities/top.hpp" ++ ++// A frame represents a physical stack frame (an activation). Frames can be ++// C or Java frames, and the Java frames can be interpreted or compiled. ++// In contrast, vframes represent source-level activations, so that one physical frame ++// can correspond to multiple source level frames because of inlining. ++// A frame is comprised of {pc, fp, sp} ++// ------------------------------ Asm interpreter ---------------------------------------- ++// Layout of asm interpreter frame: ++// [expression stack ] * <- sp ++// [monitors ] \ ++// ... | monitor block size ++// [monitors ] / ++// [monitor block size ] ++// [byte code index/pointr] = bcx() bcx_offset ++// [pointer to locals ] = locals() locals_offset ++// [constant pool cache ] = cache() cache_offset ++// [methodData ] = mdp() mdx_offset ++// [methodOop ] = method() method_offset ++// [last sp ] = last_sp() last_sp_offset ++// [old stack pointer ] (sender_sp) sender_sp_offset ++// [old frame pointer ] <- fp = link() ++// [return pc ] ++// [oop temp ] (only for native calls) ++// [locals and parameters ] ++// <- sender sp ++// ------------------------------ Asm interpreter ---------------------------------------- ++ ++// ------------------------------ C++ interpreter ---------------------------------------- ++// ++// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run) ++// ++// <- SP (current sp) ++// [local variables ] BytecodeInterpreter::run local variables ++// ... BytecodeInterpreter::run local variables ++// [local variables ] BytecodeInterpreter::run local variables ++// [old frame pointer ] fp [ BytecodeInterpreter::run's fp ] ++// [return pc ] (return to frame manager) ++// [interpreter_state* ] (arg to BytecodeInterpreter::run) -------------- ++// [expression stack ] <- last_Java_sp | ++// [... ] * <- interpreter_state.stack | ++// [expression stack ] * <- interpreter_state.stack_base | ++// [monitors ] \ | ++// ... | monitor block size | ++// [monitors ] / <- interpreter_state.monitor_base | ++// [struct interpretState ] <-----------------------------------------| ++// [return pc ] (return to callee of frame manager [1] ++// [locals and parameters ] ++// <- sender sp ++ ++// [1] When the c++ interpreter calls a new method it returns to the frame ++// manager which allocates a new frame on the stack. In that case there ++// is no real callee of this newly allocated frame. The frame manager is ++// aware of the additional frame(s) and will pop them as nested calls ++// complete. Howevers tTo make it look good in the debugger the frame ++// manager actually installs a dummy pc pointing to RecursiveInterpreterActivation ++// with a fake interpreter_state* parameter to make it easy to debug ++// nested calls. ++ ++// Note that contrary to the layout for the assembly interpreter the ++// expression stack allocated for the C++ interpreter is full sized. ++// However this is not as bad as it seems as the interpreter frame_manager ++// will truncate the unused space on succesive method calls. ++// ++// ------------------------------ C++ interpreter ---------------------------------------- ++ ++// Layout of interpreter frame: ++// ++// [ monitor entry ] <--- sp ++// ... ++// [ monitor entry ] ++// -9 [ monitor block top ] ( the top monitor entry ) ++// -8 [ byte code pointer ] (if native, bcp = 0) ++// -7 [ constant pool cache ] ++// -6 [ methodData ] mdx_offset(not core only) ++// -5 [ mirror ] ++// -4 [ methodOop ] ++// -3 [ locals offset ] ++// -2 [ last_sp ] ++// -1 [ sender's sp ] ++// 0 [ sender's fp ] <--- fp ++// 1 [ return address ] ++// 2 [ oop temp offset ] (only for native calls) ++// 3 [ result handler offset ] (only for native calls) ++// 4 [ result type info ] (only for native calls) ++// [ local var m-1 ] ++// ... ++// [ local var 0 ] ++// [ argumnet word n-1 ] <--- ( sender's sp ) ++// ... ++// [ argument word 0 ] <--- S7 ++ ++ public: ++ enum { ++ pc_return_offset = 0, ++ // All frames ++ link_offset = 0, ++ return_addr_offset = 1, ++ // non-interpreter frames ++ sender_sp_offset = 2, ++ ++#ifndef CC_INTERP ++ ++ // Interpreter frames ++ interpreter_frame_return_addr_offset = 1, ++ interpreter_frame_result_handler_offset = 3, // for native calls only ++ interpreter_frame_oop_temp_offset = 2, // for native calls only ++ ++ interpreter_frame_sender_fp_offset = 0, ++ interpreter_frame_sender_sp_offset = -1, ++ // outgoing sp before a call to an invoked method ++ interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, ++ interpreter_frame_locals_offset = interpreter_frame_last_sp_offset - 1, ++ interpreter_frame_method_offset = interpreter_frame_locals_offset - 1, ++ interpreter_frame_mdx_offset = interpreter_frame_method_offset - 1, ++ interpreter_frame_cache_offset = interpreter_frame_mdx_offset - 1, ++ interpreter_frame_bcx_offset = interpreter_frame_cache_offset - 1, ++ interpreter_frame_initial_sp_offset = interpreter_frame_bcx_offset - 1, ++ ++ interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, ++ interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, ++ ++#endif // CC_INTERP ++ ++ // Entry frames ++ entry_frame_call_wrapper_offset = -9, ++ ++ // Native frames ++ ++ native_frame_initial_param_offset = 2 ++ ++ }; ++ ++ intptr_t ptr_at(int offset) const { ++ return *ptr_at_addr(offset); ++ } ++ ++ void ptr_at_put(int offset, intptr_t value) { ++ *ptr_at_addr(offset) = value; ++ } ++ ++ private: ++ // an additional field beyond _sp and _pc: ++ intptr_t* _fp; // frame pointer ++ // The interpreter and adapters will extend the frame of the caller. ++ // Since oopMaps are based on the sp of the caller before extension ++ // we need to know that value. However in order to compute the address ++ // of the return address we need the real "raw" sp. Since sparc already ++ // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's ++ // original sp we use that convention. ++ ++ intptr_t* _unextended_sp; ++ void adjust_unextended_sp(); ++ ++ intptr_t* ptr_at_addr(int offset) const { ++ return (intptr_t*) addr_at(offset); ++ } ++#ifdef ASSERT ++ // Used in frame::sender_for_{interpreter,compiled}_frame ++ static void verify_deopt_original_pc( nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false); ++ static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) { ++ verify_deopt_original_pc(nm, unextended_sp, true); ++ } ++#endif ++ ++ public: ++ // Constructors ++ ++ frame(intptr_t* sp, intptr_t* fp, address pc); ++ ++ frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc); ++ ++ frame(intptr_t* sp, intptr_t* fp); ++ ++ void init(intptr_t* sp, intptr_t* fp, address pc); ++ ++ // accessors for the instance variables ++ intptr_t* fp() const { return _fp; } ++ ++ inline address* sender_pc_addr() const; ++ ++ // return address of param, zero origin index. ++ inline address* native_param_addr(int idx) const; ++ ++ // expression stack tos if we are nested in a java call ++ intptr_t* interpreter_frame_last_sp() const; ++ ++ // helper to update a map with callee-saved FP ++ static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); ++ ++#ifndef CC_INTERP ++ // deoptimization support ++ void interpreter_frame_set_last_sp(intptr_t* sp); ++#endif // CC_INTERP ++ ++#ifdef CC_INTERP ++ inline interpreterState get_interpreterState() const; ++#endif // CC_INTERP ++ ++#endif // CPU_LOONGARCH_VM_FRAME_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/frame_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/frame_loongarch.inline.hpp +new file mode 100644 +index 0000000000..3d22339ad7 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/frame_loongarch.inline.hpp +@@ -0,0 +1,312 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_FRAME_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_VM_FRAME_LOONGARCH_INLINE_HPP ++ ++#include "code/codeCache.hpp" ++ ++// Inline functions for Loongson frames: ++ ++// Constructors: ++ ++inline frame::frame() { ++ _pc = NULL; ++ _sp = NULL; ++ _unextended_sp = NULL; ++ _fp = NULL; ++ _cb = NULL; ++ _deopt_state = unknown; ++} ++ ++inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) { ++ _sp = sp; ++ _unextended_sp = sp; ++ _fp = fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = nmethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { ++ init(sp, fp, pc); ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) { ++ _sp = sp; ++ _unextended_sp = unextended_sp; ++ _fp = fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = nmethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* fp) { ++ _sp = sp; ++ _unextended_sp = sp; ++ _fp = fp; ++ _pc = (address)(sp[-1]); ++ ++ // Here's a sticky one. This constructor can be called via AsyncGetCallTrace ++ // when last_Java_sp is non-null but the pc fetched is junk. If we are truly ++ // unlucky the junk value could be to a zombied method and we'll die on the ++ // find_blob call. This is also why we can have no asserts on the validity ++ // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler ++ // -> pd_last_frame should use a specialized version of pd_last_frame which could ++ // call a specilaized frame constructor instead of this one. ++ // Then we could use the assert below. However this assert is of somewhat dubious ++ // value. ++ // assert(_pc != NULL, "no pc?"); ++ ++ _cb = CodeCache::find_blob(_pc); ++ adjust_unextended_sp(); ++ address original_pc = nmethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++// Accessors ++ ++inline bool frame::equal(frame other) const { ++ bool ret = sp() == other.sp() ++ && unextended_sp() == other.unextended_sp() ++ && fp() == other.fp() ++ && pc() == other.pc(); ++ assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); ++ return ret; ++} ++ ++// Return unique id for this frame. The id must have a value where we can distinguish ++// identity and younger/older relationship. NULL represents an invalid (incomparable) ++// frame. ++inline intptr_t* frame::id(void) const { return unextended_sp(); } ++ ++// Relationals on frames based ++// Return true if the frame is younger (more recent activation) than the frame represented by id ++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() < id ; } ++ ++// Return true if the frame is older (less recent activation) than the frame represented by id ++inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() > id ; } ++ ++ ++ ++inline intptr_t* frame::link() const { return (intptr_t*) *(intptr_t **)addr_at(link_offset); } ++inline void frame::set_link(intptr_t* addr) { *(intptr_t **)addr_at(link_offset) = addr; } ++ ++ ++inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } ++ ++// Return address: ++ ++inline address* frame::sender_pc_addr() const { return (address*) addr_at( return_addr_offset); } ++inline address frame::sender_pc() const { return *sender_pc_addr(); } ++ ++// return address of param, zero origin index. ++inline address* frame::native_param_addr(int idx) const { return (address*) addr_at( native_frame_initial_param_offset+idx); } ++ ++#ifdef CC_INTERP ++ ++inline interpreterState frame::get_interpreterState() const { ++ return ((interpreterState)addr_at( -sizeof(BytecodeInterpreter)/wordSize )); ++} ++ ++inline intptr_t* frame::sender_sp() const { ++ // Hmm this seems awfully expensive QQQ, is this really called with interpreted frames? ++ if (is_interpreted_frame()) { ++ assert(false, "should never happen"); ++ return get_interpreterState()->sender_sp(); ++ } else { ++ return addr_at(sender_sp_offset); ++ } ++} ++ ++inline intptr_t** frame::interpreter_frame_locals_addr() const { ++ assert(is_interpreted_frame(), "must be interpreted"); ++ return &(get_interpreterState()->_locals); ++} ++ ++inline intptr_t* frame::interpreter_frame_bcx_addr() const { ++ assert(is_interpreted_frame(), "must be interpreted"); ++ return (intptr_t*) &(get_interpreterState()->_bcp); ++} ++ ++ ++// Constant pool cache ++ ++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { ++ assert(is_interpreted_frame(), "must be interpreted"); ++ return &(get_interpreterState()->_constants); ++} ++ ++// Method ++ ++inline Method** frame::interpreter_frame_method_addr() const { ++ assert(is_interpreted_frame(), "must be interpreted"); ++ return &(get_interpreterState()->_method); ++} ++ ++inline intptr_t* frame::interpreter_frame_mdx_addr() const { ++ assert(is_interpreted_frame(), "must be interpreted"); ++ return (intptr_t*) &(get_interpreterState()->_mdx); ++} ++ ++// top of expression stack ++inline intptr_t* frame::interpreter_frame_tos_address() const { ++ assert(is_interpreted_frame(), "wrong frame type"); ++ return get_interpreterState()->_stack + 1; ++} ++ ++#else // asm interpreter ++inline intptr_t* frame::sender_sp() const { return addr_at( sender_sp_offset); } ++ ++inline intptr_t** frame::interpreter_frame_locals_addr() const { ++ return (intptr_t**)addr_at(interpreter_frame_locals_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_last_sp() const { ++ return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_bcx_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_bcx_offset); ++} ++ ++ ++inline intptr_t* frame::interpreter_frame_mdx_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_mdx_offset); ++} ++ ++ ++ ++// Constant pool cache ++ ++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { ++ return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); ++} ++ ++// Method ++ ++inline Method** frame::interpreter_frame_method_addr() const { ++ return (Method**)addr_at(interpreter_frame_method_offset); ++} ++ ++// top of expression stack ++inline intptr_t* frame::interpreter_frame_tos_address() const { ++ intptr_t* last_sp = interpreter_frame_last_sp(); ++ if (last_sp == NULL ) { ++ return sp(); ++ } else { ++ // sp() may have been extended by an adapter ++ assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos"); ++ return last_sp; ++ } ++} ++ ++inline oop* frame::interpreter_frame_temp_oop_addr() const { ++ return (oop *)(fp() + interpreter_frame_oop_temp_offset); ++} ++ ++#endif // CC_INTERP ++ ++inline int frame::pd_oop_map_offset_adjustment() const { ++ return 0; ++} ++ ++inline int frame::interpreter_frame_monitor_size() { ++ return BasicObjectLock::size(); ++} ++ ++ ++// expression stack ++// (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++inline intptr_t* frame::interpreter_frame_expression_stack() const { ++ intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); ++ return monitor_end-1; ++} ++ ++ ++inline jint frame::interpreter_frame_expression_stack_direction() { return -1; } ++ ++ ++// Entry frames ++ ++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { ++ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); ++} ++ ++// Compiled frames ++ ++inline int frame::local_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) { ++ return (nof_args - local_index + (local_index < nof_args ? 1: -1)); ++} ++ ++inline int frame::monitor_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) { ++ return local_offset_for_compiler(local_index, nof_args, max_nof_locals, max_nof_monitors); ++} ++ ++inline int frame::min_local_offset_for_compiler(int nof_args, int max_nof_locals, int max_nof_monitors) { ++ return (nof_args - (max_nof_locals + max_nof_monitors*2) - 1); ++} ++ ++inline bool frame::volatile_across_calls(Register reg) { ++ return true; ++} ++ ++ ++ ++inline oop frame::saved_oop_result(RegisterMap* map) const { ++ return *((oop*) map->location(V0->as_VMReg())); ++} ++ ++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { ++ *((oop*) map->location(V0->as_VMReg())) = obj; ++} ++ ++#endif // CPU_LOONGARCH_VM_FRAME_LOONGARCH_INLINE_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/globalDefinitions_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/globalDefinitions_loongarch.hpp +new file mode 100644 +index 0000000000..f9f93b9e65 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/globalDefinitions_loongarch.hpp +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_GLOBALDEFINITIONS_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_GLOBALDEFINITIONS_LOONGARCH_HPP ++// Size of LoongArch Instructions ++const int BytesPerInstWord = 4; ++ ++const int StackAlignmentInBytes = (2*wordSize); ++ ++// Indicates whether the C calling conventions require that ++// 32-bit integer argument values are properly extended to 64 bits. ++// If set, SharedRuntime::c_calling_convention() must adapt ++// signatures accordingly. ++const bool CCallingConventionRequiresIntsAsLongs = false; ++ ++#define SUPPORTS_NATIVE_CX8 ++ ++#endif // CPU_LOONGARCH_VM_GLOBALDEFINITIONS_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/globals_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/globals_loongarch.hpp +new file mode 100644 +index 0000000000..182be608a3 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/globals_loongarch.hpp +@@ -0,0 +1,103 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_GLOBALS_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_GLOBALS_LOONGARCH_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++#ifdef CORE ++define_pd_global(bool, UseSSE, 0); ++#endif /* CORE */ ++define_pd_global(bool, ConvertSleepToYield, true); ++define_pd_global(bool, ShareVtableStubs, true); ++define_pd_global(bool, CountInterpCalls, true); ++ ++define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks ++define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on x86. ++define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast ++define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this ++ ++define_pd_global(intx, CodeEntryAlignment, 16); ++define_pd_global(intx, OptoLoopAlignment, 16); ++define_pd_global(intx, InlineFrequencyCount, 100); ++define_pd_global(intx, InlineSmallCode, 2000); ++ ++define_pd_global(uintx, TLABSize, 0); ++define_pd_global(uintx, NewSize, 1024 * K); ++define_pd_global(intx, PreInflateSpin, 10); ++ ++define_pd_global(intx, PrefetchFieldsAhead, -1); ++ ++define_pd_global(intx, StackYellowPages, 2); ++define_pd_global(intx, StackRedPages, 1); ++define_pd_global(intx, StackShadowPages, 3 DEBUG_ONLY(+1)); ++ ++define_pd_global(bool, RewriteBytecodes, true); ++define_pd_global(bool, RewriteFrequentPairs, true); ++define_pd_global(bool, UseMembar, true); ++// GC Ergo Flags ++define_pd_global(intx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread ++ ++define_pd_global(uintx, TypeProfileLevel, 111); ++ ++define_pd_global(bool, PreserveFramePointer, false); ++// Only c2 cares about this at the moment ++define_pd_global(intx, AllocatePrefetchStyle, 2); ++define_pd_global(intx, AllocatePrefetchDistance, -1); ++ ++#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ ++ \ ++ product(bool, UseCodeCacheAllocOpt, true, \ ++ "Allocate code cache within 32-bit memory address space") \ ++ \ ++ product(bool, UseLSX, false, \ ++ "Use LSX 128-bit vector instructions") \ ++ \ ++ product(bool, UseLASX, false, \ ++ "Use LASX 256-bit vector instructions") \ ++ \ ++ product(intx, UseSyncLevel, 10000, \ ++ "The sync level on Loongson CPUs" \ ++ "UseSyncLevel == 10000, 111, for all Loongson CPUs, " \ ++ "UseSyncLevel == 4000, 101, maybe for GS464V" \ ++ "UseSyncLevel == 3000, 001, maybe for GS464V" \ ++ "UseSyncLevel == 2000, 011, maybe for GS464E/GS264" \ ++ "UseSyncLevel == 1000, 110, maybe for GS464") \ ++ \ ++ product(bool, UseUnalignedAccesses, false, \ ++ "Use unaligned memory accesses in Unsafe") \ ++ \ ++ product(bool, UseCRC32, false, \ ++ "Use CRC32 instructions for CRC32 computation") \ ++ \ ++ product(bool, UseActiveCoresMP, false, \ ++ "Eliminate barriers for single active cpu") ++ ++#endif // CPU_LOONGARCH_VM_GLOBALS_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/icBuffer_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/icBuffer_loongarch.cpp +new file mode 100644 +index 0000000000..8c78225346 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/icBuffer_loongarch.cpp +@@ -0,0 +1,101 @@ ++/* ++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/icBuffer.hpp" ++#include "gc_interface/collectedHeap.inline.hpp" ++#include "interpreter/bytecodes.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/oop.inline.hpp" ++#include "oops/oop.inline2.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++int InlineCacheBuffer::ic_stub_code_size() { ++ return NativeMovConstReg::instruction_size + ++ NativeGeneralJump::instruction_size + ++ 1; ++ // so that code_end can be set in CodeBuffer ++ // 64bit 15 = 6 + 8 bytes + 1 byte ++ // 32bit 7 = 2 + 4 bytes + 1 byte ++} ++ ++ ++// we use T1 as cached oop(klass) now. this is the target of virtual call, ++// when reach here, the receiver in T0 ++// refer to shareRuntime_loongarch.cpp,gen_i2c2i_adapters ++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, ++ address entry_point) { ++ ResourceMark rm; ++ CodeBuffer code(code_begin, ic_stub_code_size()); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ // note: even though the code contains an embedded oop, we do not need reloc info ++ // because ++ // (1) the oop is old (i.e., doesn't matter for scavenges) ++ // (2) these ICStubs are removed *before* a GC happens, so the roots disappear ++ // assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop"); ++#define __ masm-> ++ __ patchable_li52(T1, (long)cached_value); ++ // TODO: confirm reloc ++ __ jmp(entry_point, relocInfo::runtime_call_type); ++ __ flush(); ++#undef __ ++} ++ ++ ++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object ++ NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); ++ return jump->jump_destination(); ++} ++ ++ ++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { ++ // creation also verifies the object ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); ++ // Verifies the jump ++ NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); ++ void* o= (void*)move->data(); ++ return o; ++} +diff --git a/hotspot/src/cpu/loongarch/vm/icache_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/icache_loongarch.cpp +new file mode 100644 +index 0000000000..d577e41f59 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/icache_loongarch.cpp +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "runtime/icache.hpp" ++ ++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) ++{ ++#define __ _masm-> ++ StubCodeMark mark(this, "ICache", "flush_icache_stub"); ++ address start = __ pc(); ++ ++ __ ibar(0); ++ __ ori(V0, RA2, 0); ++ __ jr(RA); ++ ++ *flush_icache_stub = (ICache::flush_icache_stub_t)start; ++#undef __ ++} +diff --git a/hotspot/src/cpu/loongarch/vm/icache_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/icache_loongarch.hpp +new file mode 100644 +index 0000000000..15e45cb350 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/icache_loongarch.hpp +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_ICACHE_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_ICACHE_LOONGARCH_HPP ++ ++// Interface for updating the instruction cache. Whenever the VM modifies ++// code, part of the processor instruction cache potentially has to be flushed. ++ ++class ICache : public AbstractICache { ++ public: ++ enum { ++ stub_size = 3 * BytesPerInstWord, // Size of the icache flush stub in bytes ++ line_size = 32, // flush instruction affects a dword ++ log2_line_size = 5 // log2(line_size) ++ }; ++}; ++ ++#endif // CPU_LOONGARCH_VM_ICACHE_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.cpp +new file mode 100644 +index 0000000000..8c84f21511 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.cpp +@@ -0,0 +1,1960 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interp_masm_loongarch_64.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/markOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiRedefineClassesTrace.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/thread.inline.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++// Implementation of InterpreterMacroAssembler ++ ++#ifdef CC_INTERP ++void InterpreterMacroAssembler::get_method(Register reg) { ++} ++#endif // CC_INTERP ++ ++void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) { ++ if (UseUnalignedAccesses) { ++ ld_hu(reg, BCP, offset); ++ } else { ++ ld_bu(reg, BCP, offset); ++ ld_bu(tmp, BCP, offset + 1); ++ bstrins_d(reg, tmp, 15, 8); ++ } ++} ++ ++void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, int offset) { ++ if (UseUnalignedAccesses) { ++ ld_wu(reg, BCP, offset); ++ } else { ++ ldr_w(reg, BCP, offset); ++ ldl_w(reg, BCP, offset + 3); ++ lu32i_d(reg, 0); ++ } ++} ++ ++#ifndef CC_INTERP ++ ++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, ++ int number_of_arguments) { ++ // interpreter specific ++ // ++ // Note: No need to save/restore bcp & locals pointer ++ // since these are callee saved registers and no blocking/ ++ // GC can happen in leaf calls. ++ // Further Note: DO NOT save/restore bcp/locals. If a caller has ++ // already saved them so that it can use BCP/LVP as temporaries ++ // then a save/restore here will DESTROY the copy the caller ++ // saved! There used to be a save_bcp() that only happened in ++ // the ASSERT path (no restore_bcp). Which caused bizarre failures ++ // when jvm built with ASSERTs. ++#ifdef ASSERT ++ save_bcp(); ++ { ++ Label L; ++ ld_d(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize); ++ beq(AT,R0,L); ++ stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL"); ++ bind(L); ++ } ++#endif ++ // super call ++ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); ++ // interpreter specific ++ // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals ++ // but since they may not have been saved (and we don't want to ++ // save them here (see note above) the assert is invalid. ++} ++ ++void InterpreterMacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ // interpreter specific ++ // ++ // Note: Could avoid restoring locals ptr (callee saved) - however doesn't ++ // really make a difference for these runtime calls, since they are ++ // slow anyway. Btw., bcp must be saved/restored since it may change ++ // due to GC. ++ assert(java_thread == noreg , "not expecting a precomputed java thread"); ++ save_bcp(); ++#ifdef ASSERT ++ { ++ Label L; ++ ld_d(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ beq(AT, R0, L); ++ stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL"); ++ bind(L); ++ } ++#endif /* ASSERT */ ++ // super call ++ MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp, ++ entry_point, number_of_arguments, ++ check_exceptions); ++ // interpreter specific ++ restore_bcp(); ++ restore_locals(); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { ++ if (JvmtiExport::can_pop_frame()) { ++ Label L; ++ // Initiate popframe handling only if it is not already being ++ // processed. If the flag has the popframe_processing bit set, it ++ // means that this code is called *during* popframe handling - we ++ // don't want to reenter. ++ // This method is only called just after the call into the vm in ++ // call_VM_base, so the arg registers are available. ++ // Not clear if any other register is available, so load AT twice ++ assert(AT != java_thread, "check"); ++ ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); ++ andi(AT, AT, JavaThread::popframe_pending_bit); ++ beq(AT, R0, L); ++ ++ ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); ++ andi(AT, AT, JavaThread::popframe_processing_bit); ++ bne(AT, R0, L); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); ++ jr(V0); ++ bind(L); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::load_earlyret_value(TosState state) { ++ Register thread = T8; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#else ++ move(T8, TREG); ++#endif ++ ld_ptr(thread, thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ const Address tos_addr (thread, in_bytes(JvmtiThreadState::earlyret_tos_offset())); ++ const Address oop_addr (thread, in_bytes(JvmtiThreadState::earlyret_oop_offset())); ++ const Address val_addr (thread, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ //V0, oop_addr,V1,val_addr ++ switch (state) { ++ case atos: ++ ld_ptr(V0, oop_addr); ++ st_ptr(R0, oop_addr); ++ verify_oop(V0, state); ++ break; ++ case ltos: ++ ld_ptr(V0, val_addr); // fall through ++ break; ++ case btos: // fall through ++ case ztos: // fall through ++ case ctos: // fall through ++ case stos: // fall through ++ case itos: ++ ld_w(V0, val_addr); ++ break; ++ case ftos: ++ fld_s(F0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ break; ++ case dtos: ++ fld_d(F0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ break; ++ case vtos: /* nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ // Clean up tos value in the thread object ++ li(AT, (int)ilgl); ++ st_w(AT, tos_addr); ++ st_w(R0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { ++ if (JvmtiExport::can_force_early_return()) { ++ Label L; ++ Register tmp = T4; ++ ++ assert(java_thread != AT, "check"); ++ assert(java_thread != tmp, "check"); ++ ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ beq(AT, R0, L); ++ ++ // Initiate earlyret handling only if it is not already being processed. ++ // If the flag has the earlyret_processing bit set, it means that this code ++ // is called *during* earlyret handling - we don't want to reenter. ++ ld_w(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset())); ++ li(tmp, JvmtiThreadState::earlyret_pending); ++ bne(tmp, AT, L); ++ ++ // Call Interpreter::remove_activation_early_entry() to get the address of the ++ // same-named entrypoint in the generated interpreter code. ++ ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ ld_w(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset())); ++ move(A0, AT); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0); ++ jr(V0); ++ bind(L); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, ++ int bcp_offset) { ++ assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); ++ ld_bu(AT, BCP, bcp_offset); ++ ld_bu(reg, BCP, bcp_offset + 1); ++ bstrins_w(reg, AT, 15, 8); ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ if (index_size == sizeof(u2)) { ++ get_2_byte_integer_at_bcp(index, AT, bcp_offset); ++ } else if (index_size == sizeof(u4)) { ++ assert(EnableInvokeDynamic, "giant index used only for JSR 292"); ++ get_4_byte_integer_at_bcp(index, bcp_offset); ++ // Check if the secondary index definition is still ~x, otherwise ++ // we have to change the following assembler code to calculate the ++ // plain index. ++ assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); ++ nor(index, index, R0); ++ slli_w(index, index, 0); ++ } else if (index_size == sizeof(u1)) { ++ ld_bu(index, BCP, bcp_offset); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, ++ Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert_different_registers(cache, index); ++ get_cache_index_at_bcp(index, bcp_offset, index_size); ++ ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line"); ++ shl(index, 2); ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, ++ Register index, ++ Register bytecode, ++ int byte_no, ++ int bcp_offset, ++ size_t index_size) { ++ get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); ++ // We use a 32-bit load here since the layout of 64-bit words on ++ // little-endian machines allow us that. ++ alsl_d(AT, index, cache, Address::times_ptr - 1); ++ ld_w(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); ++ if(os::is_MP()) { ++ membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore)); ++ } ++ ++ const int shift_count = (1 + byte_no) * BitsPerByte; ++ assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) || ++ (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift), ++ "correct shift count"); ++ srli_d(bytecode, bytecode, shift_count); ++ assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask"); ++ li(AT, ConstantPoolCacheEntry::bytecode_1_mask); ++ andr(bytecode, bytecode, AT); ++} ++ ++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, ++ Register tmp, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ assert(cache != tmp, "must use different register"); ++ get_cache_index_at_bcp(tmp, bcp_offset, index_size); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ // convert from field index to ConstantPoolCacheEntry index ++ // and from word offset to byte offset ++ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); ++ shl(tmp, 2 + LogBytesPerWord); ++ ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize); ++ // skip past the header ++ addi_d(cache, cache, in_bytes(ConstantPoolCache::base_offset())); ++ add_d(cache, cache, tmp); ++} ++ ++void InterpreterMacroAssembler::get_method_counters(Register method, ++ Register mcs, Label& skip) { ++ Label has_counters; ++ ld_d(mcs, method, in_bytes(Method::method_counters_offset())); ++ bne(mcs, R0, has_counters); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::build_method_counters), method); ++ ld_d(mcs, method, in_bytes(Method::method_counters_offset())); ++ beq(mcs, R0, skip); // No MethodCounters allocated, OutOfMemory ++ bind(has_counters); ++} ++ ++// Load object from cpool->resolved_references(index) ++void InterpreterMacroAssembler::load_resolved_reference_at_index( ++ Register result, Register index) { ++ assert_different_registers(result, index); ++ // convert from field index to resolved_references() index and from ++ // word index to byte offset. Since this is a java object, it can be compressed ++ Register tmp = index; // reuse ++ shl(tmp, LogBytesPerHeapOop); ++ ++ get_constant_pool(result); ++ // load pointer for resolved_references[] objArray ++ ld_d(result, result, ConstantPool::resolved_references_offset_in_bytes()); ++ // JNIHandles::resolve(obj); ++ ld_d(result, result, 0); //? is needed? ++ // Add in the index ++ add_d(result, result, tmp); ++ load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); ++} ++ ++// Resets LVP to locals. Register sub_klass cannot be any of the above. ++void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) { ++ ++ assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" ); ++ assert( Rsub_klass != T1, "T1 holds 2ndary super array length" ); ++ assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" ); ++ // Profile the not-null value's klass. ++ // Here T4 and T1 are used as temporary registers. ++ profile_typecheck(T4, Rsub_klass, T1); // blows T4, reloads T1 ++ ++ // Do the check. ++ check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1 ++ ++ // Profile the failure of the check. ++ profile_typecheck_failed(T4); // blows T4 ++ ++} ++ ++ ++ ++// Java Expression Stack ++ ++void InterpreterMacroAssembler::pop_ptr(Register r) { ++ ld_d(r, SP, 0); ++ addi_d(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_i(Register r) { ++ ld_w(r, SP, 0); ++ addi_d(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_l(Register r) { ++ ld_d(r, SP, 0); ++ addi_d(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_f(FloatRegister r) { ++ fld_s(r, SP, 0); ++ addi_d(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_d(FloatRegister r) { ++ fld_d(r, SP, 0); ++ addi_d(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_ptr(Register r) { ++ addi_d(SP, SP, - Interpreter::stackElementSize); ++ st_d(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_i(Register r) { ++ // For compatibility reason, don't change to sw. ++ addi_d(SP, SP, - Interpreter::stackElementSize); ++ st_d(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_l(Register r) { ++ addi_d(SP, SP, -2 * Interpreter::stackElementSize); ++ st_d(r, SP, 0); ++ st_d(R0, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_f(FloatRegister r) { ++ addi_d(SP, SP, - Interpreter::stackElementSize); ++ fst_s(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_d(FloatRegister r) { ++ addi_d(SP, SP, -2 * Interpreter::stackElementSize); ++ fst_d(r, SP, 0); ++ st_d(R0, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop(TosState state) { ++ switch (state) { ++ case atos: pop_ptr(); break; ++ case btos: ++ case ztos: ++ case ctos: ++ case stos: ++ case itos: pop_i(); break; ++ case ltos: pop_l(); break; ++ case ftos: pop_f(); break; ++ case dtos: pop_d(); break; ++ case vtos: /* nothing to do */ break; ++ default: ShouldNotReachHere(); ++ } ++ verify_oop(FSR, state); ++} ++ ++//FSR=V0,SSR=V1 ++void InterpreterMacroAssembler::push(TosState state) { ++ verify_oop(FSR, state); ++ switch (state) { ++ case atos: push_ptr(); break; ++ case btos: ++ case ztos: ++ case ctos: ++ case stos: ++ case itos: push_i(); break; ++ case ltos: push_l(); break; ++ case ftos: push_f(); break; ++ case dtos: push_d(); break; ++ case vtos: /* nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++void InterpreterMacroAssembler::load_ptr(int n, Register val) { ++ ld_d(val, SP, Interpreter::expr_offset_in_bytes(n)); ++} ++ ++void InterpreterMacroAssembler::store_ptr(int n, Register val) { ++ st_d(val, SP, Interpreter::expr_offset_in_bytes(n)); ++} ++ ++// Jump to from_interpreted entry of a call unless single stepping is possible ++// in this thread in which case we must call the i2i entry ++void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) { ++ // record last_sp ++ move(Rsender, SP); ++ st_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++#ifndef OPT_THREAD ++ get_thread(temp); ++#else ++ move(temp, TREG); ++#endif ++ // interp_only is an int, on little endian it is sufficient to test the byte only ++ // Is a cmpl faster? ++ ld_w(AT, temp, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(AT, R0, run_compiled_code); ++ ld_d(AT, method, in_bytes(Method::interpreter_entry_offset())); ++ jr(AT); ++ bind(run_compiled_code); ++ } ++ ++ ld_d(AT, method, in_bytes(Method::from_interpreted_offset())); ++ jr(AT); ++} ++ ++ ++// The following two routines provide a hook so that an implementation ++// can schedule the dispatch in two parts. LoongArch64 does not do this. ++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { ++ // Nothing LoongArch64 specific to be done here ++} ++ ++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { ++ dispatch_next(state, step); ++} ++ ++// assume the next bytecode in T8. ++void InterpreterMacroAssembler::dispatch_base(TosState state, ++ address* table, ++ bool verifyoop) { ++ if (VerifyActivationFrameSize) { ++ Label L; ++ ++ sub_d(T2, FP, SP); ++ int min_frame_size = (frame::link_offset - ++ frame::interpreter_frame_initial_sp_offset) * wordSize; ++ addi_d(T2, T2, -min_frame_size); ++ bge(T2, R0, L); ++ stop("broken stack frame"); ++ bind(L); ++ } ++ // FIXME: I do not know which register should pass to verify_oop ++ if (verifyoop) verify_oop(FSR, state); ++ ++ if((long)table >= (long)Interpreter::dispatch_table(btos) && ++ (long)table <= (long)Interpreter::dispatch_table(vtos)) { ++ int table_size = (long)Interpreter::dispatch_table(itos) - ++ (long)Interpreter::dispatch_table(stos); ++ int table_offset = ((int)state - (int)itos) * table_size; ++ ++ // S8 points to the starting address of Interpreter::dispatch_table(itos). ++ // See StubGenerator::generate_call_stub(address& return_address) for the initialization of S8. ++ if (table_offset != 0) { ++ if (is_simm(table_offset, 12)) { ++ alsl_d(T3, Rnext, S8, LogBytesPerWord - 1); ++ ld_d(T3, T3, table_offset); ++ } else { ++ li(T2, table_offset); ++ alsl_d(T3, Rnext, S8, LogBytesPerWord - 1); ++ ldx_d(T3, T2, T3); ++ } ++ } else { ++ slli_d(T2, Rnext, LogBytesPerWord); ++ ldx_d(T3, S8, T2); ++ } ++ } else { ++ li(T3, (long)table); ++ slli_d(T2, Rnext, LogBytesPerWord); ++ ldx_d(T3, T2, T3); ++ } ++ jr(T3); ++} ++ ++void InterpreterMacroAssembler::dispatch_only(TosState state) { ++ dispatch_base(state, Interpreter::dispatch_table(state)); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { ++ dispatch_base(state, Interpreter::normal_table(state)); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { ++ dispatch_base(state, Interpreter::normal_table(state), false); ++} ++ ++ ++void InterpreterMacroAssembler::dispatch_next(TosState state, int step) { ++ // load next bytecode ++ ld_bu(Rnext, BCP, step); ++ increment(BCP, step); ++ dispatch_base(state, Interpreter::dispatch_table(state)); ++} ++ ++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { ++ // load current bytecode ++ ld_bu(Rnext, BCP, 0); ++ dispatch_base(state, table); ++} ++ ++// remove activation ++// ++// Unlock the receiver if this is a synchronized method. ++// Unlock any Java monitors from syncronized blocks. ++// Remove the activation from the stack. ++// ++// If there are locked Java monitors ++// If throw_monitor_exception ++// throws IllegalMonitorStateException ++// Else if install_monitor_exception ++// installs IllegalMonitorStateException ++// Else ++// no error processing ++// used registers : T1, T2, T3, T8 ++// T1 : thread, method access flags ++// T2 : monitor entry pointer ++// T3 : method, monitor top ++// T8 : unlock flag ++void InterpreterMacroAssembler::remove_activation( ++ TosState state, ++ Register ret_addr, ++ bool throw_monitor_exception, ++ bool install_monitor_exception, ++ bool notify_jvmdi) { ++ // Note: Registers V0, V1 and F0, F1 may be in use for the result ++ // check if synchronized method ++ Label unlocked, unlock, no_unlock; ++ ++ // get the value of _do_not_unlock_if_synchronized into T8 ++#ifndef OPT_THREAD ++ Register thread = T1; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ld_b(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ // reset the flag ++ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ // get method access flags ++ ld_d(T3, FP, frame::interpreter_frame_method_offset * wordSize); ++ ld_w(T1, T3, in_bytes(Method::access_flags_offset())); ++ andi(T1, T1, JVM_ACC_SYNCHRONIZED); ++ beq(T1, R0, unlocked); ++ ++ // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set. ++ bne(T8, R0, no_unlock); ++ // unlock monitor ++ push(state); // save result ++ ++ // BasicObjectLock will be first in list, since this is a ++ // synchronized method. However, need to check that the object has ++ // not been unlocked by an explicit monitorexit bytecode. ++ addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize ++ - (int)sizeof(BasicObjectLock)); ++ // address of first monitor ++ ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ bne(T1, R0, unlock); ++ pop(state); ++ if (throw_monitor_exception) { ++ // Entry already unlocked, need to throw exception ++ // I think LA do not need empty_FPU_stack ++ // remove possible return value from FPU-stack, otherwise stack could overflow ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Monitor already unlocked during a stack unroll. If requested, ++ // install an illegal_monitor_state_exception. Continue with ++ // stack unrolling. ++ if (install_monitor_exception) { ++ // remove possible return value from FPU-stack, ++ // otherwise stack could overflow ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ ++ } ++ ++ b(unlocked); ++ } ++ ++ bind(unlock); ++ unlock_object(c_rarg0); ++ pop(state); ++ ++ // Check that for block-structured locking (i.e., that all locked ++ // objects has been unlocked) ++ bind(unlocked); ++ ++ // V0, V1: Might contain return value ++ ++ // Check that all monitors are unlocked ++ { ++ Label loop, exception, entry, restart; ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ const Address monitor_block_top(FP, ++ frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ ++ bind(restart); ++ // points to current entry, starting with top-most entry ++ ld_d(c_rarg0, monitor_block_top); ++ // points to word before bottom of monitor block ++ addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ b(entry); ++ ++ // Entry already locked, need to throw exception ++ bind(exception); ++ ++ if (throw_monitor_exception) { ++ // Throw exception ++ // remove possible return value from FPU-stack, ++ // otherwise stack could overflow ++ empty_FPU_stack(); ++ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Stack unrolling. Unlock object and install illegal_monitor_exception ++ // Unlock does not block, so don't have to worry about the frame ++ // We don't have to preserve c_rarg0, since we are going to ++ // throw an exception ++ ++ push(state); ++ unlock_object(c_rarg0); ++ pop(state); ++ ++ if (install_monitor_exception) { ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ } ++ ++ b(restart); ++ } ++ ++ bind(loop); ++ ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ bne(T1, R0, exception);// check if current entry is used ++ ++ addi_d(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry ++ bind(entry); ++ bne(c_rarg0, T3, loop); // check if bottom reached ++ } ++ ++ bind(no_unlock); ++ ++ // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame) ++ if (notify_jvmdi) { ++ notify_method_exit(state, NotifyJVMTI); // preserve TOSCA ++ } else { ++ notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA ++ } ++ ++ // remove activation ++ ld_d(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ ld_d(ret_addr, FP, frame::interpreter_frame_return_addr_offset * wordSize); ++ ld_d(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); ++} ++ ++#endif // C_INTERP ++ ++// Lock object ++// ++// Args: ++// c_rarg0: BasicObjectLock to be used for locking ++// ++// Kills: ++// T1 ++// T2 ++void InterpreterMacroAssembler::lock_object(Register lock_reg) { ++ assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); ++ ++ if (UseHeavyMonitors) { ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); ++ } else { ++ Label done, slow_case; ++ const Register tmp_reg = T2; ++ const Register scr_reg = T1; ++ const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); ++ const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); ++ const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); ++ ++ // Load object pointer into scr_reg ++ ld_d(scr_reg, lock_reg, obj_offset); ++ ++ if (UseBiasedLocking) { ++ // Note: we use noreg for the temporary register since it's hard ++ // to come up with a free register on all incoming code paths ++ biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case); ++ } ++ ++ // Load (object->mark() | 1) into tmp_reg ++ ld_d(AT, scr_reg, 0); ++ ori(tmp_reg, AT, 1); ++ ++ // Save (object->mark() | 1) into BasicLock's displaced header ++ st_d(tmp_reg, lock_reg, mark_offset); ++ ++ assert(lock_offset == 0, "displached header must be first word in BasicObjectLock"); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label succ, fail; ++ cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail); ++ bind(succ); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); ++ b(done); ++ bind(fail); ++ } else { ++ cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done); ++ } ++ ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) SP <= mark < SP + os::pagesize() ++ // ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in tmp_reg as the result of cmpxchg ++ sub_d(tmp_reg, tmp_reg, SP); ++ li(AT, 7 - os::vm_page_size()); ++ andr(tmp_reg, tmp_reg, AT); ++ // Save the test result, for recursive case, the result is zero ++ st_d(tmp_reg, lock_reg, mark_offset); ++ if (PrintBiasedLockingStatistics) { ++ bnez(tmp_reg, slow_case); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); ++ } ++ beqz(tmp_reg, done); ++ ++ bind(slow_case); ++ // Call the runtime routine for slow case ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); ++ ++ bind(done); ++ } ++} ++ ++// Unlocks an object. Used in monitorexit bytecode and ++// remove_activation. Throws an IllegalMonitorException if object is ++// not locked by current thread. ++// ++// Args: ++// c_rarg0: BasicObjectLock for lock ++// ++// Kills: ++// T1 ++// T2 ++// T3 ++// Throw an IllegalMonitorException if object is not locked by current thread ++void InterpreterMacroAssembler::unlock_object(Register lock_reg) { ++ assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); ++ ++ if (UseHeavyMonitors) { ++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); ++ } else { ++ Label done; ++ const Register tmp_reg = T1; ++ const Register scr_reg = T2; ++ const Register hdr_reg = T3; ++ ++ save_bcp(); // Save in case of exception ++ ++ // Convert from BasicObjectLock structure to object and BasicLock structure ++ // Store the BasicLock address into tmp_reg ++ addi_d(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes()); ++ ++ // Load oop into scr_reg ++ ld_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); ++ // free entry ++ st_d(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes()); ++ if (UseBiasedLocking) { ++ biased_locking_exit(scr_reg, hdr_reg, done); ++ } ++ ++ // Load the old header from BasicLock structure ++ ld_d(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes()); ++ // zero for recursive case ++ beqz(hdr_reg, done); ++ ++ // Atomic swap back the old header ++ cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done); ++ ++ // Call the runtime routine for slow case. ++ st_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj ++ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), ++ lock_reg); ++ ++ bind(done); ++ ++ restore_bcp(); ++ } ++} ++ ++#ifndef CC_INTERP ++ ++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, ++ Label& zero_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ ld_d(mdp, Address(FP, frame::interpreter_frame_mdx_offset * wordSize)); ++ beq(mdp, R0, zero_continue); ++} ++ ++ ++// Set the method data pointer for the current bcp. ++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Label set_mdp; ++ ++ // V0 and T0 will be used as two temporary registers. ++ push2(V0, T0); ++ ++ get_method(T0); ++ // Test MDO to avoid the call if it is NULL. ++ ld_d(V0, T0, in_bytes(Method::method_data_offset())); ++ beq(V0, R0, set_mdp); ++ ++ // method: T0 ++ // bcp: BCP --> S0 ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP); ++ // mdi: V0 ++ // mdo is guaranteed to be non-zero here, we checked for it before the call. ++ get_method(T0); ++ ld_d(T0, T0, in_bytes(Method::method_data_offset())); ++ addi_d(T0, T0, in_bytes(MethodData::data_offset())); ++ add_d(V0, T0, V0); ++ bind(set_mdp); ++ st_d(V0, FP, frame::interpreter_frame_mdx_offset * wordSize); ++ pop2(T0, V0); ++} ++ ++void InterpreterMacroAssembler::verify_method_data_pointer() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++#ifdef ASSERT ++ Label verify_continue; ++ Register method = T5; ++ Register mdp = T6; ++ Register tmp = A0; ++ push(method); ++ push(mdp); ++ push(tmp); ++ test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue ++ get_method(method); ++ ++ // If the mdp is valid, it will point to a DataLayout header which is ++ // consistent with the bcp. The converse is highly probable also. ++ ld_hu(tmp, mdp, in_bytes(DataLayout::bci_offset())); ++ ld_d(AT, method, in_bytes(Method::const_offset())); ++ add_d(tmp, tmp, AT); ++ addi_d(tmp, tmp, in_bytes(ConstMethod::codes_offset())); ++ beq(tmp, BCP, verify_continue); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp); ++ bind(verify_continue); ++ pop(tmp); ++ pop(mdp); ++ pop(method); ++#endif // ASSERT ++} ++ ++ ++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, ++ int constant, ++ Register value) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Address data(mdp_in, constant); ++ st_d(value, data); ++} ++ ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ int constant, ++ bool decrement) { ++ // Counter address ++ Address data(mdp_in, constant); ++ ++ increment_mdp_data_at(data, decrement); ++} ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Address data, ++ bool decrement) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ // %%% this does 64bit counters at best it is wasting space ++ // at worst it is a rare bug when counters overflow ++ Register tmp = S0; ++ push(tmp); ++ if (decrement) { ++ // Decrement the register. ++ ld_d(AT, data); ++ addi_d(tmp, AT, (int32_t) -DataLayout::counter_increment); ++ // If the decrement causes the counter to overflow, stay negative ++ Label L; ++ blt(tmp, R0, L); ++ addi_d(tmp, tmp, (int32_t) DataLayout::counter_increment); ++ bind(L); ++ st_d(tmp, data); ++ } else { ++ assert(DataLayout::counter_increment == 1, ++ "flow-free idiom only works with 1"); ++ ld_d(AT, data); ++ // Increment the register. ++ addi_d(tmp, AT, DataLayout::counter_increment); ++ // If the increment causes the counter to overflow, pull back by 1. ++ slt(AT, tmp, R0); ++ sub_d(tmp, tmp, AT); ++ st_d(tmp, data); ++ } ++ pop(tmp); ++} ++ ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ Register reg, ++ int constant, ++ bool decrement) { ++ Register tmp = S0; ++ push(S0); ++ if (decrement) { ++ // Decrement the register. ++ add_d(AT, mdp_in, reg); ++ assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !"); ++ ld_d(AT, AT, constant); ++ ++ addi_d(tmp, AT, (int32_t) -DataLayout::counter_increment); ++ // If the decrement causes the counter to overflow, stay negative ++ Label L; ++ blt(tmp, R0, L); ++ addi_d(tmp, tmp, (int32_t) DataLayout::counter_increment); ++ bind(L); ++ ++ add_d(AT, mdp_in, reg); ++ st_d(tmp, AT, constant); ++ } else { ++ add_d(AT, mdp_in, reg); ++ assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !"); ++ ld_d(AT, AT, constant); ++ ++ // Increment the register. ++ addi_d(tmp, AT, DataLayout::counter_increment); ++ // If the increment causes the counter to overflow, pull back by 1. ++ slt(AT, tmp, R0); ++ sub_d(tmp, tmp, AT); ++ ++ add_d(AT, mdp_in, reg); ++ st_d(tmp, AT, constant); ++ } ++ pop(S0); ++} ++ ++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, ++ int flag_byte_constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ int header_offset = in_bytes(DataLayout::header_offset()); ++ int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant); ++ // Set the flag ++ ld_w(AT, Address(mdp_in, header_offset)); ++ if(Assembler::is_simm(header_bits, 12)) { ++ ori(AT, AT, header_bits); ++ } else { ++ push(T8); ++ // T8 is used as a temporary register. ++ li(T8, header_bits); ++ orr(AT, AT, T8); ++ pop(T8); ++ } ++ st_w(AT, Address(mdp_in, header_offset)); ++} ++ ++ ++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, ++ int offset, ++ Register value, ++ Register test_value_out, ++ Label& not_equal_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if (test_value_out == noreg) { ++ ld_d(AT, Address(mdp_in, offset)); ++ bne(AT, value, not_equal_continue); ++ } else { ++ // Put the test value into a register, so caller can use it: ++ ld_d(test_value_out, Address(mdp_in, offset)); ++ bne(value, test_value_out, not_equal_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12"); ++ ld_d(AT, mdp_in, offset_of_disp); ++ add_d(mdp_in, mdp_in, AT); ++ st_d(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ Register reg, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ add_d(AT, reg, mdp_in); ++ assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12"); ++ ld_d(AT, AT, offset_of_disp); ++ add_d(mdp_in, mdp_in, AT); ++ st_d(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, ++ int constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if(Assembler::is_simm(constant, 12)) { ++ addi_d(mdp_in, mdp_in, constant); ++ } else { ++ li(AT, constant); ++ add_d(mdp_in, mdp_in, AT); ++ } ++ st_d(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ push(return_bci); // save/restore across call_VM ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), ++ return_bci); ++ pop(return_bci); ++} ++ ++ ++void InterpreterMacroAssembler::profile_taken_branch(Register mdp, ++ Register bumped_count) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ // Otherwise, assign to mdp ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the taken count. ++ // We inline increment_mdp_data_at to return bumped_count in a register ++ //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset())); ++ ld_d(bumped_count, mdp, in_bytes(JumpData::taken_offset())); ++ assert(DataLayout::counter_increment == 1, ++ "flow-free idiom only works with 1"); ++ push(T8); ++ // T8 is used as a temporary register. ++ addi_d(T8, bumped_count, DataLayout::counter_increment); ++ slt(AT, T8, R0); ++ sub_d(bumped_count, T8, AT); ++ pop(T8); ++ st_d(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the not taken count. ++ increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); ++ ++ // The method data pointer needs to be updated to correspond to ++ // the next bytecode ++ update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_final_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_virtual_call(Register receiver, ++ Register mdp, ++ Register reg2, ++ bool receiver_can_be_null) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ Label skip_receiver_profile; ++ if (receiver_can_be_null) { ++ Label not_null; ++ bnez(receiver, not_null); ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ b(skip_receiver_profile); ++ bind(not_null); ++ } ++ ++ // Record the receiver type. ++ record_klass_in_profile(receiver, mdp, reg2, true); ++ bind(skip_receiver_profile); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++// This routine creates a state machine for updating the multi-row ++// type profile at a virtual call site (or other type-sensitive bytecode). ++// The machine visits each row (of receiver/count) until the receiver type ++// is found, or until it runs out of rows. At the same time, it remembers ++// the location of the first empty row. (An empty row records null for its ++// receiver, and can be allocated for a newly-observed receiver type.) ++// Because there are two degrees of freedom in the state, a simple linear ++// search will not work; it must be a decision tree. Hence this helper ++// function is recursive, to generate the required tree structured code. ++// It's the interpreter, so we are trading off code space for speed. ++// See below for example code. ++void InterpreterMacroAssembler::record_klass_in_profile_helper( ++ Register receiver, Register mdp, ++ Register reg2, int start_row, ++ Label& done, bool is_virtual_call) { ++ if (TypeProfileWidth == 0) { ++ if (is_virtual_call) { ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ } ++ return; ++ } ++ ++ int last_row = VirtualCallData::row_limit() - 1; ++ assert(start_row <= last_row, "must be work left to do"); ++ // Test this row for both the receiver and for null. ++ // Take any of three different outcomes: ++ // 1. found receiver => increment count and goto done ++ // 2. found null => keep looking for case 1, maybe allocate this cell ++ // 3. found something else => keep looking for cases 1 and 2 ++ // Case 3 is handled by a recursive call. ++ for (int row = start_row; row <= last_row; row++) { ++ Label next_test; ++ bool test_for_null_also = (row == start_row); ++ ++ // See if the receiver is receiver[n]. ++ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); ++ test_mdp_data_at(mdp, recvr_offset, receiver, ++ (test_for_null_also ? reg2 : noreg), ++ next_test); ++ // (Reg2 now contains the receiver from the CallData.) ++ ++ // The receiver is receiver[n]. Increment count[n]. ++ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); ++ increment_mdp_data_at(mdp, count_offset); ++ beq(R0, R0, done); ++ bind(next_test); ++ ++ if (test_for_null_also) { ++ Label found_null; ++ // Failed the equality check on receiver[n]... Test for null. ++ if (start_row == last_row) { ++ // The only thing left to do is handle the null case. ++ if (is_virtual_call) { ++ beq(reg2, R0, found_null); ++ // Receiver did not match any saved receiver and there is no empty row for it. ++ // Increment total counter to indicate polymorphic case. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ beq(R0, R0, done); ++ bind(found_null); ++ } else { ++ bne(reg2, R0, done); ++ } ++ break; ++ } ++ // Since null is rare, make it be the branch-taken case. ++ beq(reg2, R0, found_null); ++ ++ // Put all the "Case 3" tests here. ++ record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call); ++ ++ // Found a null. Keep searching for a matching receiver, ++ // but remember that this is an empty (unused) slot. ++ bind(found_null); ++ } ++ } ++ ++ // In the fall-through case, we found no matching receiver, but we ++ // observed the receiver[start_row] is NULL. ++ ++ // Fill in the receiver field and increment the count. ++ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); ++ set_mdp_data_at(mdp, recvr_offset, receiver); ++ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); ++ li(reg2, DataLayout::counter_increment); ++ set_mdp_data_at(mdp, count_offset, reg2); ++ if (start_row > 0) { ++ beq(R0, R0, done); ++ } ++} ++ ++// Example state machine code for three profile rows: ++// // main copy of decision tree, rooted at row[1] ++// if (row[0].rec == rec) { row[0].incr(); goto done; } ++// if (row[0].rec != NULL) { ++// // inner copy of decision tree, rooted at row[1] ++// if (row[1].rec == rec) { row[1].incr(); goto done; } ++// if (row[1].rec != NULL) { ++// // degenerate decision tree, rooted at row[2] ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// if (row[2].rec != NULL) { goto done; } // overflow ++// row[2].init(rec); goto done; ++// } else { ++// // remember row[1] is empty ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// row[1].init(rec); goto done; ++// } ++// } else { ++// // remember row[0] is empty ++// if (row[1].rec == rec) { row[1].incr(); goto done; } ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// row[0].init(rec); goto done; ++// } ++// done: ++ ++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, ++ Register mdp, Register reg2, ++ bool is_virtual_call) { ++ assert(ProfileInterpreter, "must be profiling"); ++ Label done; ++ ++ record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call); ++ ++ bind (done); ++} ++ ++void InterpreterMacroAssembler::profile_ret(Register return_bci, ++ Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ uint row; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the total ret count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ for (row = 0; row < RetData::row_limit(); row++) { ++ Label next_test; ++ ++ // See if return_bci is equal to bci[n]: ++ test_mdp_data_at(mdp, ++ in_bytes(RetData::bci_offset(row)), ++ return_bci, noreg, ++ next_test); ++ ++ // return_bci is equal to bci[n]. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, ++ in_bytes(RetData::bci_displacement_offset(row))); ++ b(profile_continue); ++ bind(next_test); ++ } ++ ++ update_mdp_for_ret(return_bci); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_null_seen(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { ++ if (ProfileInterpreter && TypeProfileCasts) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int count_offset = in_bytes(CounterData::count_offset()); ++ // Back up the address, since we have already bumped the mdp. ++ count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // *Decrement* the counter. We expect to see zero or small negatives. ++ increment_mdp_data_at(mdp, count_offset, true); ++ ++ bind (profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // Record the object type. ++ record_klass_in_profile(klass, mdp, reg2, false); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_switch_default(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the default case count ++ increment_mdp_data_at(mdp, ++ in_bytes(MultiBranchData::default_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ in_bytes(MultiBranchData:: ++ default_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_switch_case(Register index, ++ Register mdp, ++ Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Build the base (index * per_case_size_in_bytes()) + ++ // case_array_offset_in_bytes() ++ li(reg2, in_bytes(MultiBranchData::per_case_size())); ++ mul_d(index, index, reg2); ++ addi_d(index, index, in_bytes(MultiBranchData::case_array_offset())); ++ ++ // Update the case count ++ increment_mdp_data_at(mdp, ++ index, ++ in_bytes(MultiBranchData::relative_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ index, ++ in_bytes(MultiBranchData:: ++ relative_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::narrow(Register result) { ++ // Get method->_constMethod->_result_type ++ ld_d(T4, FP, frame::interpreter_frame_method_offset * wordSize); ++ ld_d(T4, T4, in_bytes(Method::const_offset())); ++ ld_bu(T4, T4, in_bytes(ConstMethod::result_type_offset())); ++ ++ Label done, notBool, notByte, notChar; ++ ++ // common case first ++ addi_d(AT, T4, -T_INT); ++ beq(AT, R0, done); ++ ++ // mask integer result to narrower return type. ++ addi_d(AT, T4, -T_BOOLEAN); ++ bne(AT, R0, notBool); ++ andi(result, result, 0x1); ++ beq(R0, R0, done); ++ ++ bind(notBool); ++ addi_d(AT, T4, -T_BYTE); ++ bne(AT, R0, notByte); ++ ext_w_b(result, result); ++ beq(R0, R0, done); ++ ++ bind(notByte); ++ addi_d(AT, T4, -T_CHAR); ++ bne(AT, R0, notChar); ++ bstrpick_d(result, result, 15, 0); ++ beq(R0, R0, done); ++ ++ bind(notChar); ++ ext_w_h(result, result); ++ ++ // Nothing to do for T_INT ++ bind(done); ++} ++ ++ ++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { ++ Label update, next, none; ++ ++ verify_oop(obj); ++ ++ if (mdo_addr.index() != noreg) { ++ guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !"); ++ guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !"); ++ push(T0); ++ alsl_d(T0, mdo_addr.index(), mdo_addr.base(), mdo_addr.scale() - 1); ++ } ++ ++ bnez(obj, update); ++ ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ ori(AT, AT, TypeEntries::null_seen); ++ if (mdo_addr.index() == noreg) { ++ st_d(AT, mdo_addr); ++ } else { ++ st_d(AT, T0, mdo_addr.disp()); ++ } ++ ++ b(next); ++ ++ bind(update); ++ load_klass(obj, obj); ++ ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ xorr(obj, obj, AT); ++ ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ bstrpick_d(AT, obj, 63, 2); ++ beqz(AT, next); ++ ++ andi(AT, obj, TypeEntries::type_unknown); ++ bnez(AT, next); ++ ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ beqz(AT, none); ++ ++ addi_d(AT, AT, -(TypeEntries::null_seen)); ++ beqz(AT, none); ++ ++ // There is a chance that the checks above (re-reading profiling ++ // data from memory) fail if another thread has just set the ++ // profiling to this obj's klass ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ xorr(obj, obj, AT); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ bstrpick_d(AT, obj, 63, 2); ++ beqz(AT, next); ++ ++ // different than before. Cannot keep accurate profile. ++ if (mdo_addr.index() == noreg) { ++ ld_d(AT, mdo_addr); ++ } else { ++ ld_d(AT, T0, mdo_addr.disp()); ++ } ++ ori(AT, AT, TypeEntries::type_unknown); ++ if (mdo_addr.index() == noreg) { ++ st_d(AT, mdo_addr); ++ } else { ++ st_d(AT, T0, mdo_addr.disp()); ++ } ++ b(next); ++ ++ bind(none); ++ // first time here. Set profile type. ++ if (mdo_addr.index() == noreg) { ++ st_d(obj, mdo_addr); ++ } else { ++ st_d(obj, T0, mdo_addr.disp()); ++ } ++ ++ bind(next); ++ if (mdo_addr.index() != noreg) { ++ pop(T0); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { ++ if (!ProfileInterpreter) { ++ return; ++ } ++ ++ if (MethodData::profile_arguments() || MethodData::profile_return()) { ++ Label profile_continue; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); ++ ++ ld_b(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start); ++ li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag); ++ bne(tmp, AT, profile_continue); ++ ++ ++ if (MethodData::profile_arguments()) { ++ Label done; ++ int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); ++ if (Assembler::is_simm(off_to_args, 12)) { ++ addi_d(mdp, mdp, off_to_args); ++ } else { ++ li(AT, off_to_args); ++ add_d(mdp, mdp, AT); ++ } ++ ++ ++ for (int i = 0; i < TypeProfileArgsLimit; i++) { ++ if (i > 0 || MethodData::profile_return()) { ++ // If return value type is profiled we may have no argument to profile ++ ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); ++ ++ if (Assembler::is_simm(-1 * i * TypeStackSlotEntries::per_arg_count(), 12)) { ++ addi_w(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count()); ++ } else { ++ li(AT, i*TypeStackSlotEntries::per_arg_count()); ++ sub_w(tmp, tmp, AT); ++ } ++ ++ li(AT, TypeStackSlotEntries::per_arg_count()); ++ blt(tmp, AT, done); ++ } ++ ld_d(tmp, callee, in_bytes(Method::const_offset())); ++ ++ ld_hu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // stack offset o (zero based) from the start of the argument ++ // list, for n arguments translates into offset n - o - 1 from ++ // the end of the argument list ++ ld_d(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args); ++ sub_d(tmp, tmp, AT); ++ ++ addi_w(tmp, tmp, -1); ++ ++ Address arg_addr = argument_address(tmp); ++ ld_d(tmp, arg_addr); ++ ++ Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); ++ profile_obj_type(tmp, mdo_arg_addr); ++ ++ int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); ++ if (Assembler::is_simm(to_add, 12)) { ++ addi_d(mdp, mdp, to_add); ++ } else { ++ li(AT, to_add); ++ add_d(mdp, mdp, AT); ++ } ++ ++ off_to_args += to_add; ++ } ++ ++ if (MethodData::profile_return()) { ++ ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); ++ ++ int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(); ++ if (Assembler::is_simm(-1 * tmp_arg_counts, 12)) { ++ addi_w(tmp, tmp, -1 * tmp_arg_counts); ++ } else { ++ li(AT, tmp_arg_counts); ++ sub_w(mdp, mdp, AT); ++ } ++ } ++ ++ bind(done); ++ ++ if (MethodData::profile_return()) { ++ // We're right after the type profile for the last ++ // argument. tmp is the number of cells left in the ++ // CallTypeData/VirtualCallTypeData to reach its end. Non null ++ // if there's a return to profile. ++ assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); ++ slli_w(tmp, tmp, exact_log2(DataLayout::cell_size)); ++ add_d(mdp, mdp, tmp); ++ } ++ st_d(mdp, FP, frame::interpreter_frame_mdx_offset * wordSize); ++ } else { ++ assert(MethodData::profile_return(), "either profile call args or call ret"); ++ update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); ++ } ++ ++ // mdp points right after the end of the ++ // CallTypeData/VirtualCallTypeData, right after the cells for the ++ // return value type if there's one ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { ++ assert_different_registers(mdp, ret, tmp, _bcp_register); ++ if (ProfileInterpreter && MethodData::profile_return()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ if (MethodData::profile_return_jsr292_only()) { ++ // If we don't profile all invoke bytecodes we must make sure ++ // it's a bytecode we indeed profile. We can't go back to the ++ // begining of the ProfileData we intend to update to check its ++ // type because we're right after it and we don't known its ++ // length ++ Label do_profile; ++ ld_b(tmp, _bcp_register, 0); ++ addi_d(AT, tmp, -1 * Bytecodes::_invokedynamic); ++ beqz(AT, do_profile); ++ addi_d(AT, tmp, -1 * Bytecodes::_invokehandle); ++ beqz(AT, do_profile); ++ ++ get_method(tmp); ++ ld_b(tmp, tmp, Method::intrinsic_id_offset_in_bytes()); ++ li(AT, vmIntrinsics::_compiledLambdaForm); ++ bne(tmp, AT, profile_continue); ++ ++ bind(do_profile); ++ } ++ ++ Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); ++ add_d(tmp, ret, R0); ++ profile_obj_type(tmp, mdo_ret_addr); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) { ++ guarantee(T4 == tmp1, "You are reqired to use T4 as the index register for LoongArch !"); ++ ++ if (ProfileInterpreter && MethodData::profile_parameters()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Load the offset of the area within the MDO used for ++ // parameters. If it's negative we're not profiling any parameters ++ ld_w(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())); ++ blt(tmp1, R0, profile_continue); ++ ++ // Compute a pointer to the area for parameters from the offset ++ // and move the pointer to the slot for the last ++ // parameters. Collect profiling from last parameter down. ++ // mdo start + parameters offset + array length - 1 ++ add_d(mdp, mdp, tmp1); ++ ld_d(tmp1, mdp, in_bytes(ArrayData::array_len_offset())); ++ decrement(tmp1, TypeStackSlotEntries::per_arg_count()); ++ ++ ++ Label loop; ++ bind(loop); ++ ++ int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); ++ int type_base = in_bytes(ParametersTypeData::type_offset(0)); ++ Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size); ++ Address arg_type(mdp, tmp1, per_arg_scale, type_base); ++ ++ // load offset on the stack from the slot for this parameter ++ alsl_d(AT, tmp1, mdp, per_arg_scale - 1); ++ ld_d(tmp2, AT, off_base); ++ ++ sub_d(tmp2, R0, tmp2); ++ ++ // read the parameter from the local area ++ slli_d(AT, tmp2, Interpreter::stackElementScale()); ++ ldx_d(tmp2, AT, _locals_register); ++ ++ // profile the parameter ++ profile_obj_type(tmp2, arg_type); ++ ++ // go to next parameter ++ decrement(tmp1, TypeStackSlotEntries::per_arg_count()); ++ blt(R0, tmp1, loop); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) { ++ if (state == atos) { ++ MacroAssembler::verify_oop(reg); ++ } ++} ++ ++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ++} ++#endif // !CC_INTERP ++ ++ ++void InterpreterMacroAssembler::notify_method_entry() { ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ Register tempreg = T0; ++#ifndef OPT_THREAD ++ get_thread(T8); ++#else ++ move(T8, TREG); ++#endif ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label L; ++ ld_w(tempreg, T8, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(tempreg, R0, L); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_method_entry)); ++ bind(L); ++ } ++ ++ { ++ SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); ++ get_method(S3); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ //Rthread, ++ T8, ++ //Rmethod); ++ S3); ++ } ++} ++ ++void InterpreterMacroAssembler::notify_method_exit( ++ TosState state, NotifyMethodExitMode mode) { ++ Register tempreg = T0; ++#ifndef OPT_THREAD ++ get_thread(T8); ++#else ++ move(T8, TREG); ++#endif ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { ++ Label skip; ++ // Note: frame::interpreter_frame_result has a dependency on how the ++ // method result is saved across the call to post_method_exit. If this ++ // is changed then the interpreter_frame_result implementation will ++ // need to be updated too. ++ ++ // For c++ interpreter the result is always stored at a known location in the frame ++ // template interpreter will leave it on the top of the stack. ++ NOT_CC_INTERP(push(state);) ++ ld_w(tempreg, T8, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(tempreg, R0, skip); ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); ++ bind(skip); ++ NOT_CC_INTERP(pop(state)); ++ } ++ ++ { ++ // Dtrace notification ++ SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); ++ NOT_CC_INTERP(push(state)); ++ get_method(S3); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ //Rthread, Rmethod); ++ T8, S3); ++ NOT_CC_INTERP(pop(state)); ++ } ++} ++ ++// Jump if ((*counter_addr += increment) & mask) satisfies the condition. ++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, ++ int increment, int mask, ++ Register scratch, bool preloaded, ++ Condition cond, Label* where) { ++ assert_different_registers(scratch, AT); ++ ++ if (!preloaded) { ++ ld_w(scratch, counter_addr); ++ } ++ addi_w(scratch, scratch, increment); ++ st_w(scratch, counter_addr); ++ ++ li(AT, mask); ++ andr(scratch, scratch, AT); ++ ++ if (cond == Assembler::zero) { ++ beq(scratch, R0, *where); ++ } else { ++ unimplemented(); ++ } ++} +diff --git a/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.hpp b/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.hpp +new file mode 100644 +index 0000000000..9113da54ff +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.hpp +@@ -0,0 +1,269 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_INTERP_MASM_LOONGARCH_64_HPP ++#define CPU_LOONGARCH_VM_INTERP_MASM_LOONGARCH_64_HPP ++ ++#include "asm/assembler.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "interpreter/invocationCounter.hpp" ++#include "runtime/frame.hpp" ++ ++// This file specializes the assember with interpreter-specific macros ++ ++ ++class InterpreterMacroAssembler: public MacroAssembler { ++#ifndef CC_INTERP ++ private: ++ ++ Register _locals_register; // register that contains the pointer to the locals ++ Register _bcp_register; // register that contains the bcp ++ ++ protected: ++ // Interpreter specific version of call_VM_base ++ virtual void call_VM_leaf_base(address entry_point, ++ int number_of_arguments); ++ ++ virtual void call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions); ++ ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); ++ ++ // base routine for all dispatches ++ void dispatch_base(TosState state, address* table, bool verifyoop = true); ++#endif // CC_INTERP ++ ++ public: ++ // narrow int return value ++ void narrow(Register result); ++ ++ InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {} ++ ++ void get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset); ++ void get_4_byte_integer_at_bcp(Register reg, int offset); ++ ++ void load_earlyret_value(TosState state); ++ ++#ifdef CC_INTERP ++ void save_bcp() { /* not needed in c++ interpreter and harmless */ } ++ void restore_bcp() { /* not needed in c++ interpreter and harmless */ } ++ ++ // Helpers for runtime call arguments/results ++ void get_method(Register reg); ++ ++#else ++ ++ // Interpreter-specific registers ++ void save_bcp() { ++ st_d(BCP, FP, frame::interpreter_frame_bcx_offset * wordSize); ++ } ++ ++ void restore_bcp() { ++ ld_d(BCP, FP, frame::interpreter_frame_bcx_offset * wordSize); ++ } ++ ++ void restore_locals() { ++ ld_d(LVP, FP, frame::interpreter_frame_locals_offset * wordSize); ++ } ++ ++ // Helpers for runtime call arguments/results ++ void get_method(Register reg) { ++ ld_d(reg, FP, frame::interpreter_frame_method_offset * wordSize); ++ } ++ ++ void get_const(Register reg){ ++ get_method(reg); ++ ld_d(reg, reg, in_bytes(Method::const_offset())); ++ } ++ ++ void get_constant_pool(Register reg) { ++ get_const(reg); ++ ld_d(reg, reg, in_bytes(ConstMethod::constants_offset())); ++ } ++ ++ void get_constant_pool_cache(Register reg) { ++ get_constant_pool(reg); ++ ld_d(reg, reg, ConstantPool::cache_offset_in_bytes()); ++ } ++ ++ void get_cpool_and_tags(Register cpool, Register tags) { ++ get_constant_pool(cpool); ++ ld_d(tags, cpool, ConstantPool::tags_offset_in_bytes()); ++ } ++ ++ void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); ++ void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_method_counters(Register method, Register mcs, Label& skip); ++ ++ // load cpool->resolved_references(index); ++ void load_resolved_reference_at_index(Register result, Register index); ++ ++ void pop_ptr( Register r = FSR); ++ void pop_i( Register r = FSR); ++ void pop_l( Register r = FSR); ++ void pop_f(FloatRegister r = FSF); ++ void pop_d(FloatRegister r = FSF); ++ ++ void push_ptr( Register r = FSR); ++ void push_i( Register r = FSR); ++ void push_l( Register r = FSR); ++ void push_f(FloatRegister r = FSF); ++ void push_d(FloatRegister r = FSF); ++ ++ void pop(Register r ) { ((MacroAssembler*)this)->pop(r); } ++ ++ void push(Register r ) { ((MacroAssembler*)this)->push(r); } ++ ++ void pop(TosState state); // transition vtos -> state ++ void push(TosState state); // transition state -> vtos ++ ++ void empty_expression_stack() { ++ ld_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ // NULL last_sp until next java call ++ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ } ++ ++ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls ++ void load_ptr(int n, Register val); ++ void store_ptr(int n, Register val); ++ ++ // Generate a subtype check: branch to ok_is_subtype if sub_klass is ++ // a subtype of super_klass. ++ //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); ++ void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype ); ++ ++ // Dispatching ++ void dispatch_prolog(TosState state, int step = 0); ++ void dispatch_epilog(TosState state, int step = 0); ++ void dispatch_only(TosState state); ++ void dispatch_only_normal(TosState state); ++ void dispatch_only_noverify(TosState state); ++ void dispatch_next(TosState state, int step = 0); ++ void dispatch_via (TosState state, address* table); ++ ++ // jump to an invoked target ++ void prepare_to_jump_from_interpreted(); ++ void jump_from_interpreted(Register method, Register temp); ++ ++ ++ // Returning from interpreted functions ++ // ++ // Removes the current activation (incl. unlocking of monitors) ++ // and sets up the return address. This code is also used for ++ // exception unwindwing. In that case, we do not want to throw ++ // IllegalMonitorStateExceptions, since that might get us into an ++ // infinite rethrow exception loop. ++ // Additionally this code is used for popFrame and earlyReturn. ++ // In popFrame case we want to skip throwing an exception, ++ // installing an exception, and notifying jvmdi. ++ // In earlyReturn case we only want to skip throwing an exception ++ // and installing an exception. ++ void remove_activation(TosState state, Register ret_addr, ++ bool throw_monitor_exception = true, ++ bool install_monitor_exception = true, ++ bool notify_jvmdi = true); ++#endif // CC_INTERP ++ ++ // Object locking ++ void lock_object (Register lock_reg); ++ void unlock_object(Register lock_reg); ++ ++#ifndef CC_INTERP ++ ++ // Interpreter profiling operations ++ void set_method_data_pointer_for_bcp(); ++ void test_method_data_pointer(Register mdp, Label& zero_continue); ++ void verify_method_data_pointer(); ++ ++ void set_mdp_data_at(Register mdp_in, int constant, Register value); ++ void increment_mdp_data_at(Address data, bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, int constant, ++ bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, Register reg, int constant, ++ bool decrement = false); ++ void increment_mask_and_jump(Address counter_addr, ++ int increment, int mask, ++ Register scratch, bool preloaded, ++ Condition cond, Label* where); ++ void set_mdp_flag_at(Register mdp_in, int flag_constant); ++ void test_mdp_data_at(Register mdp_in, int offset, Register value, ++ Register test_value_out, ++ Label& not_equal_continue); ++ ++ void record_klass_in_profile(Register receiver, Register mdp, ++ Register reg2, bool is_virtual_call); ++ void record_klass_in_profile_helper(Register receiver, Register mdp, ++ Register reg2, int start_row, ++ Label& done, bool is_virtual_call); ++ ++ void update_mdp_by_offset(Register mdp_in, int offset_of_offset); ++ void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); ++ void update_mdp_by_constant(Register mdp_in, int constant); ++ void update_mdp_for_ret(Register return_bci); ++ ++ void profile_taken_branch(Register mdp, Register bumped_count); ++ void profile_not_taken_branch(Register mdp); ++ void profile_call(Register mdp); ++ void profile_final_call(Register mdp); ++ void profile_virtual_call(Register receiver, Register mdp, ++ Register scratch2, ++ bool receiver_can_be_null = false); ++ void profile_ret(Register return_bci, Register mdp); ++ void profile_null_seen(Register mdp); ++ void profile_typecheck(Register mdp, Register klass, Register scratch); ++ void profile_typecheck_failed(Register mdp); ++ void profile_switch_default(Register mdp); ++ void profile_switch_case(Register index_in_scratch, Register mdp, ++ Register scratch2); ++ ++ // Debugging ++ // only if +VerifyOops && state == atos ++ void verify_oop(Register reg, TosState state = atos); ++ // only if +VerifyFPU && (state == ftos || state == dtos) ++ void verify_FPU(int stack_depth, TosState state = ftos); ++ ++ void profile_obj_type(Register obj, const Address& mdo_addr); ++ void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); ++ void profile_return_type(Register mdp, Register ret, Register tmp); ++ void profile_parameters_type(Register mdp, Register tmp1, Register tmp2); ++#endif // !CC_INTERP ++ ++ typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; ++ ++ // support for jvmti/dtrace ++ void notify_method_entry(); ++ void notify_method_exit(TosState state, NotifyMethodExitMode mode); ++}; ++ ++#endif // CPU_LOONGARCH_VM_INTERP_MASM_LOONGARCH_64_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/interpreterGenerator_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/interpreterGenerator_loongarch.hpp +new file mode 100644 +index 0000000000..7f253b2d51 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/interpreterGenerator_loongarch.hpp +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_INTERPRETERGENERATOR_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_INTERPRETERGENERATOR_LOONGARCH_HPP ++ ++ ++// Generation of Interpreter ++// ++ friend class AbstractInterpreterGenerator; ++ ++ private: ++ ++ address generate_normal_entry(bool synchronized); ++ address generate_native_entry(bool synchronized); ++ address generate_abstract_entry(void); ++ address generate_math_entry(AbstractInterpreter::MethodKind kind); ++ address generate_empty_entry(void); ++ address generate_accessor_entry(void); ++ address generate_Reference_get_entry(); ++ address generate_CRC32_update_entry(); ++ address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind); ++ void lock_method(void); ++ void generate_stack_overflow_check(void); ++ ++ void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue); ++ void generate_counter_overflow(Label* do_continue); ++ ++#endif // CPU_LOONGARCH_VM_INTERPRETERGENERATOR_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch.hpp +new file mode 100644 +index 0000000000..052eb997e4 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch.hpp +@@ -0,0 +1,66 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_INTERPRETERRT_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_INTERPRETERRT_LOONGARCH_HPP ++ ++#include "memory/allocation.hpp" ++ ++// native method calls ++ ++class SignatureHandlerGenerator: public NativeSignatureIterator { ++ private: ++ MacroAssembler* _masm; ++ unsigned int _num_fp_args; ++ unsigned int _num_int_args; ++ int _stack_offset; ++ ++ void move(int from_offset, int to_offset); ++ void box(int from_offset, int to_offset); ++ void pass_int(); ++ void pass_long(); ++ void pass_object(); ++ void pass_float(); ++ void pass_double(); ++ ++ public: ++ // Creation ++ SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) { ++ _masm = new MacroAssembler(buffer); ++ _num_int_args = (method->is_static() ? 1 : 0); ++ _num_fp_args = 0; ++ _stack_offset = 0; ++ } ++ ++ // Code generation ++ void generate(uint64_t fingerprint); ++ ++ // Code generation support ++ static Register from(); ++ static Register to(); ++ static Register temp(); ++}; ++ ++#endif // CPU_LOONGARCH_VM_INTERPRETERRT_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch_64.cpp +new file mode 100644 +index 0000000000..0c9df4aa71 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch_64.cpp +@@ -0,0 +1,274 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "memory/universe.inline.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/interfaceSupport.hpp" ++#include "runtime/signature.hpp" ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++// Implementation of SignatureHandlerGenerator ++ ++void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) { ++ __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(from_offset)); ++ __ st_d(temp(), to(), to_offset * longSize); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) { ++ __ addi_d(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) ); ++ __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(from_offset) ); ++ ++ __ maskeqz(temp(), temp(), AT); ++ __ st_w(temp(), to(), to_offset * wordSize); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { ++ // generate code to handle arguments ++ iterate(fingerprint); ++ // return result handler ++ __ li(V0, AbstractInterpreter::result_handler(method()->result_type())); ++ // return ++ __ jr(RA); ++ ++ __ flush(); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ __ ld_w(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ st_w(AT, to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2. ++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ __ ld_d(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else { ++ __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ __ st_d(AT, to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ Register reg = as_Register(++_num_int_args + RA0->encoding()); ++ if (_num_int_args == 1) { ++ assert(offset() == 0, "argument register 1 can only be (non-null) receiver"); ++ __ addi_d(reg, from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ ld_d(reg, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ maskeqz(reg, AT, reg); ++ } ++ } else { ++ __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(offset())); ++ __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ maskeqz(temp(), AT, temp()); ++ __ st_d(temp(), to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { ++ if (_num_fp_args < Argument::n_float_register_parameters) { ++ __ fld_s(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else if (_num_int_args < Argument::n_register_parameters - 1) { ++ __ ld_w(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ st_w(AT, to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2. ++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { ++ if (_num_fp_args < Argument::n_float_register_parameters) { ++ __ fld_d(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else if (_num_int_args < Argument::n_register_parameters - 1) { ++ __ ld_d(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else { ++ __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ __ st_d(AT, to(), _stack_offset); ++ _stack_offset += wordSize; ++ } ++} ++ ++ ++Register InterpreterRuntime::SignatureHandlerGenerator::from() { return LVP; } ++Register InterpreterRuntime::SignatureHandlerGenerator::to() { return SP; } ++Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return T8; } ++ ++// Implementation of SignatureHandlerLibrary ++ ++void SignatureHandlerLibrary::pd_set_handler(address handler) {} ++ ++ ++class SlowSignatureHandler ++ : public NativeSignatureIterator { ++ private: ++ address _from; ++ intptr_t* _to; ++ intptr_t* _int_args; ++ intptr_t* _fp_args; ++ intptr_t* _fp_identifiers; ++ unsigned int _num_int_args; ++ unsigned int _num_fp_args; ++ ++ virtual void pass_int() ++ { ++ jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = from_obj; ++ _num_int_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_long() ++ { ++ intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); ++ _from -= 2 * Interpreter::stackElementSize; ++ ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = from_obj; ++ _num_int_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_object() ++ { ++ intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; ++ _num_int_args++; ++ } else { ++ *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; ++ } ++ } ++ ++ virtual void pass_float() ++ { ++ jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_fp_args < Argument::n_float_register_parameters) { ++ *_fp_args++ = from_obj; ++ _num_fp_args++; ++ } else if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = from_obj; ++ _num_int_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_double() ++ { ++ intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); ++ _from -= 2*Interpreter::stackElementSize; ++ ++ if (_num_fp_args < Argument::n_float_register_parameters) { ++ *_fp_args++ = from_obj; ++ *_fp_identifiers |= (1 << _num_fp_args); // mark as double ++ _num_fp_args++; ++ } else if (_num_int_args < Argument::n_register_parameters - 1) { ++ *_int_args++ = from_obj; ++ _num_int_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ public: ++ SlowSignatureHandler(methodHandle method, address from, intptr_t* to) ++ : NativeSignatureIterator(method) ++ { ++ _from = from; ++ _to = to; ++ ++ // see TemplateInterpreterGenerator::generate_slow_signature_handler() ++ _int_args = to - (method->is_static() ? 15 : 16); ++ _fp_args = to - 8; ++ _fp_identifiers = to - 9; ++ *(int*) _fp_identifiers = 0; ++ _num_int_args = (method->is_static() ? 1 : 0); ++ _num_fp_args = 0; ++ } ++}; ++ ++ ++IRT_ENTRY(address, ++ InterpreterRuntime::slow_signature_handler(JavaThread* thread, ++ Method* method, ++ intptr_t* from, ++ intptr_t* to)) ++ methodHandle m(thread, (Method*)method); ++ assert(m->is_native(), "sanity check"); ++ ++ // handle arguments ++ SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1)); ++ ++ // return result handler ++ return Interpreter::result_handler(m->result_type()); ++IRT_END +diff --git a/hotspot/src/cpu/loongarch/vm/interpreter_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/interpreter_loongarch.hpp +new file mode 100644 +index 0000000000..c83afbdaf0 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/interpreter_loongarch.hpp +@@ -0,0 +1,50 @@ ++/* ++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_INTERPRETER_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_INTERPRETER_LOONGARCH_HPP ++ ++ public: ++ ++ // Sentinel placed in the code for interpreter returns so ++ // that i2c adapters and osr code can recognize an interpreter ++ // return address and convert the return to a specialized ++ // block of code to handle compiedl return values and cleaning ++ // the fpu stack. ++ static const int return_sentinel; ++ ++ static Address::ScaleFactor stackElementScale() { ++ return Address::times_8; ++ } ++ ++ // Offset from sp (which points to the last stack element) ++ static int expr_offset_in_bytes(int i) { return stackElementSize * i; } ++ // Size of interpreter code. Increase if too small. Interpreter will ++ // fail with a guarantee ("not enough space for interpreter generation"); ++ // if too small. ++ // Run with +PrintInterpreterSize to get the VM to print out the size. ++ // Max size with JVMTI and TaggedStackInterpreter ++ const static int InterpreterCodeSize = 168 * 1024; ++#endif // CPU_LOONGARCH_VM_INTERPRETER_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/interpreter_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/interpreter_loongarch_64.cpp +new file mode 100644 +index 0000000000..5a4f102cfd +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/interpreter_loongarch_64.cpp +@@ -0,0 +1,277 @@ ++/* ++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/bytecodeHistogram.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterGenerator.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/templateTable.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "runtime/timer.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/debug.hpp" ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++address AbstractInterpreterGenerator::generate_slow_signature_handler() { ++ address entry = __ pc(); ++ // Rmethod: method ++ // LVP: pointer to locals ++ // A3: first stack arg ++ __ move(A3, SP); ++ __ addi_d(SP, SP, -18 * wordSize); ++ __ st_d(RA, SP, 0); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::slow_signature_handler), ++ Rmethod, LVP, A3); ++ ++ // V0: result handler ++ ++ // Stack layout: ++ // ... ++ // 18 stack arg0 <--- old sp ++ // 17 floatReg arg7 ++ // ... ++ // 10 floatReg arg0 ++ // 9 float/double identifiers ++ // 8 IntReg arg7 ++ // ... ++ // 2 IntReg arg1 ++ // 1 aligned slot ++ // SP: 0 return address ++ ++ // Do FP first so we can use A3 as temp ++ __ ld_d(A3, Address(SP, 9 * wordSize)); // float/double identifiers ++ ++ for (int i= 0; i < Argument::n_float_register_parameters; i++) { ++ FloatRegister floatreg = as_FloatRegister(i + FA0->encoding()); ++ Label isdouble, done; ++ ++ __ andi(AT, A3, 1 << i); ++ __ bnez(AT, isdouble); ++ __ fld_s(floatreg, SP, (10 + i) * wordSize); ++ __ b(done); ++ __ bind(isdouble); ++ __ fld_d(floatreg, SP, (10 + i) * wordSize); ++ __ bind(done); ++ } ++ ++ // A0 is for env. ++ // If the mothed is not static, A1 will be corrected in generate_native_entry. ++ for (int i= 1; i < Argument::n_register_parameters; i++) { ++ Register reg = as_Register(i + A0->encoding()); ++ ++ __ ld_d(reg, SP, (1 + i) * wordSize); ++ } ++ ++ // A0/V0 contains the result from the call of ++ // InterpreterRuntime::slow_signature_handler so we don't touch it ++ // here. It will be loaded with the JNIEnv* later. ++ __ ld_d(RA, SP, 0); ++ __ addi_d(SP, SP, 18 * wordSize); ++ __ jr(RA); ++ return entry; ++} ++ ++ ++// ++// Various method entries ++// ++ ++address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { ++ ++ // Rmethod: methodOop ++ // V0: scratrch ++ // Rsender: send 's sp ++ ++ if (!InlineIntrinsics) return NULL; // Generate a vanilla entry ++ ++ address entry_point = __ pc(); ++ //guarantee(0, "LA not implemented yet"); ++ // These don't need a safepoint check because they aren't virtually ++ // callable. We won't enter these intrinsics from compiled code. ++ // If in the future we added an intrinsic which was virtually callable ++ // we'd have to worry about how to safepoint so that this code is used. ++ ++ // mathematical functions inlined by compiler ++ // (interpreter must provide identical implementation ++ // in order to avoid monotonicity bugs when switching ++ // from interpreter to compiler in the middle of some ++ // computation) ++ // ++ // stack: [ lo(arg) ] <-- sp ++ // [ hi(arg) ] ++ { ++ // Note: For JDK 1.3 StrictMath exists and Math.sin/cos/sqrt are ++ // java methods. Interpreter::method_kind(...) will select ++ // this entry point for the corresponding methods in JDK 1.3. ++ __ fld_d(FA0, SP, 0 * wordSize); ++ __ fld_d(FA1, SP, 1 * wordSize); ++ __ push2(RA, FP); ++ __ addi_d(FP, SP, 2 * wordSize); ++ ++ // [ fp ] <-- sp ++ // [ ra ] ++ // [ lo ] <-- fp ++ // [ hi ] ++ //FIXME, need consider this ++ switch (kind) { ++ case Interpreter::java_lang_math_sin : ++ __ trigfunc('s'); ++ break; ++ case Interpreter::java_lang_math_cos : ++ __ trigfunc('c'); ++ break; ++ case Interpreter::java_lang_math_tan : ++ __ trigfunc('t'); ++ break; ++ case Interpreter::java_lang_math_sqrt: ++ __ fsqrt_d(F0, FA0); ++ break; ++ case Interpreter::java_lang_math_abs: ++ __ fabs_d(F0, FA0); ++ break; ++ case Interpreter::java_lang_math_log: ++ // Store to stack to convert 80bit precision back to 64bits ++ break; ++ case Interpreter::java_lang_math_log10: ++ // Store to stack to convert 80bit precision back to 64bits ++ break; ++ case Interpreter::java_lang_math_pow: ++ break; ++ case Interpreter::java_lang_math_exp: ++ break; ++ ++ default : ++ ShouldNotReachHere(); ++ } ++ ++ // must maintain return value in F0:F1 ++ __ ld_d(RA, FP, (-1) * wordSize); ++ //FIXME ++ __ ld_d(FP, FP, (-2) * wordSize); ++ __ move(SP, Rsender); ++ __ jr(RA); ++ } ++ return entry_point; ++} ++ ++ ++// Abstract method entry ++// Attempt to execute abstract method. Throw exception ++address InterpreterGenerator::generate_abstract_entry(void) { ++ ++ // Rmethod: methodOop ++ // V0: receiver (unused) ++ // Rsender : sender 's sp ++ address entry_point = __ pc(); ++ ++ // abstract method entry ++ // throw exception ++ // adjust stack to what a normal return would do ++ __ empty_expression_stack(); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError)); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ return entry_point; ++} ++ ++ ++// Empty method, generate a very fast return. ++ ++address InterpreterGenerator::generate_empty_entry(void) { ++ ++ // Rmethod: methodOop ++ // V0: receiver (unused) ++ // Rsender: sender 's sp, must set sp to this value on return, on LoongArch, now use T0, as it right? ++ if (!UseFastEmptyMethods) return NULL; ++ ++ address entry_point = __ pc(); ++ //TODO: LA ++ //guarantee(0, "LA not implemented yet"); ++ Label slow_path; ++ __ li(RT0, SafepointSynchronize::address_of_state()); ++ __ ld_w(AT, RT0, 0); ++ __ li(RT0, (SafepointSynchronize::_not_synchronized)); ++ __ bne(AT, RT0,slow_path); ++ __ move(SP, Rsender); ++ __ jr(RA); ++ __ bind(slow_path); ++ (void) generate_normal_entry(false); ++ return entry_point; ++ ++} ++ ++void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) { ++ ++ // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in ++ // the days we had adapter frames. When we deoptimize a situation where a ++ // compiled caller calls a compiled caller will have registers it expects ++ // to survive the call to the callee. If we deoptimize the callee the only ++ // way we can restore these registers is to have the oldest interpreter ++ // frame that we create restore these values. That is what this routine ++ // will accomplish. ++ ++ // At the moment we have modified c2 to not have any callee save registers ++ // so this problem does not exist and this routine is just a place holder. ++ ++ assert(f->is_interpreted_frame(), "must be interpreted"); ++} +diff --git a/hotspot/src/cpu/loongarch/vm/javaFrameAnchor_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/javaFrameAnchor_loongarch.hpp +new file mode 100644 +index 0000000000..de97de5804 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/javaFrameAnchor_loongarch.hpp +@@ -0,0 +1,87 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_JAVAFRAMEANCHOR_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_JAVAFRAMEANCHOR_LOONGARCH_HPP ++ ++private: ++ ++ // FP value associated with _last_Java_sp: ++ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to ++ ++public: ++ // Each arch must define reset, save, restore ++ // These are used by objects that only care about: ++ // 1 - initializing a new state (thread creation, javaCalls) ++ // 2 - saving a current state (javaCalls) ++ // 3 - restoring an old state (javaCalls) ++ ++ void clear(void) { ++ // clearing _last_Java_sp must be first ++ _last_Java_sp = NULL; ++ // fence? ++ _last_Java_fp = NULL; ++ _last_Java_pc = NULL; ++ } ++ ++ void copy(JavaFrameAnchor* src) { ++ // In order to make sure the transition state is valid for "this" ++ // We must clear _last_Java_sp before copying the rest of the new data ++ // ++ // Hack Alert: Temporary bugfix for 4717480/4721647 ++ // To act like previous version (pd_cache_state) don't NULL _last_Java_sp ++ // unless the value is changing ++ // ++ if (_last_Java_sp != src->_last_Java_sp) ++ _last_Java_sp = NULL; ++ ++ _last_Java_fp = src->_last_Java_fp; ++ _last_Java_pc = src->_last_Java_pc; ++ // Must be last so profiler will always see valid frame if has_last_frame() is true ++ _last_Java_sp = src->_last_Java_sp; ++ } ++ ++ // Always walkable ++ bool walkable(void) { return true; } ++ // Never any thing to do since we are always walkable and can find address of return addresses ++ void make_walkable(JavaThread* thread) { } ++ ++ intptr_t* last_Java_sp(void) const { return _last_Java_sp; } ++ ++ address last_Java_pc(void) { return _last_Java_pc; } ++ ++private: ++ ++ static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } ++ ++public: ++ ++ void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; } ++ ++ intptr_t* last_Java_fp(void) { return _last_Java_fp; } ++ // Assert (last_Java_sp == NULL || fp == NULL) ++ void set_last_Java_fp(intptr_t* fp) { _last_Java_fp = fp; } ++ ++#endif // CPU_LOONGARCH_VM_JAVAFRAMEANCHOR_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/jniFastGetField_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/jniFastGetField_loongarch_64.cpp +new file mode 100644 +index 0000000000..5b52e54e08 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/jniFastGetField_loongarch_64.cpp +@@ -0,0 +1,169 @@ ++/* ++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/codeBlob.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "runtime/safepoint.hpp" ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++#define BUFFER_SIZE 30*wordSize ++ ++// Instead of issuing lfence for LoadLoad barrier, we create data dependency ++// between loads, which is more efficient than lfence. ++ ++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { ++ const char *name = NULL; ++ switch (type) { ++ case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; ++ case T_BYTE: name = "jni_fast_GetByteField"; break; ++ case T_CHAR: name = "jni_fast_GetCharField"; break; ++ case T_SHORT: name = "jni_fast_GetShortField"; break; ++ case T_INT: name = "jni_fast_GetIntField"; break; ++ case T_LONG: name = "jni_fast_GetLongField"; break; ++ case T_FLOAT: name = "jni_fast_GetFloatField"; break; ++ case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; ++ default: ShouldNotReachHere(); ++ } ++ ResourceMark rm; ++ BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); ++ CodeBuffer cbuf(blob); ++ MacroAssembler* masm = new MacroAssembler(&cbuf); ++ address fast_entry = __ pc(); ++ Label slow; ++ ++ // return pc RA ++ // jni env A0 ++ // obj A1 ++ // jfieldID A2 ++ ++ address counter_addr = SafepointSynchronize::safepoint_counter_addr(); ++ __ li(AT, (long)counter_addr); ++ __ ld_w(T1, AT, 0); ++ ++ // Parameters(A0~A3) should not be modified, since they will be used in slow path ++ __ andi(AT, T1, 1); ++ __ bne(AT, R0, slow); ++ ++ __ move(T0, A1); ++ __ clear_jweak_tag(T0); ++ ++ __ ld_d(T0, T0, 0); // unbox, *obj ++ __ srli_d(T2, A2, 2); // offset ++ __ add_d(T0, T0, T2); ++ ++ __ li(AT, (long)counter_addr); ++ __ ld_w(AT, AT, 0); ++ __ bne(T1, AT, slow); ++ ++ assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); ++ speculative_load_pclist[count] = __ pc(); ++ switch (type) { ++ case T_BOOLEAN: __ ld_bu (V0, T0, 0); break; ++ case T_BYTE: __ ld_b (V0, T0, 0); break; ++ case T_CHAR: __ ld_hu (V0, T0, 0); break; ++ case T_SHORT: __ ld_h (V0, T0, 0); break; ++ case T_INT: __ ld_w (V0, T0, 0); break; ++ case T_LONG: __ ld_d (V0, T0, 0); break; ++ case T_FLOAT: __ fld_s (F0, T0, 0); break; ++ case T_DOUBLE: __ fld_d (F0, T0, 0); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ jr(RA); ++ ++ slowcase_entry_pclist[count++] = __ pc(); ++ __ bind (slow); ++ address slow_case_addr = NULL; ++ switch (type) { ++ case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; ++ case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; ++ case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; ++ case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; ++ case T_INT: slow_case_addr = jni_GetIntField_addr(); break; ++ case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; ++ case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; ++ case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; ++ default: ShouldNotReachHere(); ++ } ++ __ jmp(slow_case_addr); ++ ++ __ flush (); ++ return fast_entry; ++} ++ ++address JNI_FastGetField::generate_fast_get_boolean_field() { ++ return generate_fast_get_int_field0(T_BOOLEAN); ++} ++ ++address JNI_FastGetField::generate_fast_get_byte_field() { ++ return generate_fast_get_int_field0(T_BYTE); ++} ++ ++address JNI_FastGetField::generate_fast_get_char_field() { ++ return generate_fast_get_int_field0(T_CHAR); ++} ++ ++address JNI_FastGetField::generate_fast_get_short_field() { ++ return generate_fast_get_int_field0(T_SHORT); ++} ++ ++address JNI_FastGetField::generate_fast_get_int_field() { ++ return generate_fast_get_int_field0(T_INT); ++} ++ ++address JNI_FastGetField::generate_fast_get_long_field() { ++ return generate_fast_get_int_field0(T_LONG); ++} ++ ++address JNI_FastGetField::generate_fast_get_float_field() { ++ return generate_fast_get_int_field0(T_FLOAT); ++} ++ ++address JNI_FastGetField::generate_fast_get_double_field() { ++ return generate_fast_get_int_field0(T_DOUBLE); ++} +diff --git a/hotspot/src/cpu/loongarch/vm/jniTypes_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/jniTypes_loongarch.hpp +new file mode 100644 +index 0000000000..554ff216ac +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/jniTypes_loongarch.hpp +@@ -0,0 +1,144 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_JNITYPES_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_JNITYPES_LOONGARCH_HPP ++ ++#include "memory/allocation.hpp" ++#include "oops/oop.hpp" ++#include "prims/jni.h" ++ ++// This file holds platform-dependent routines used to write primitive jni ++// types to the array of arguments passed into JavaCalls::call ++ ++class JNITypes : AllStatic { ++ // These functions write a java primitive type (in native format) ++ // to a java stack slot array to be passed as an argument to JavaCalls:calls. ++ // I.e., they are functionally 'push' operations if they have a 'pos' ++ // formal parameter. Note that jlong's and jdouble's are written ++ // _in reverse_ of the order in which they appear in the interpreter ++ // stack. This is because call stubs (see stubGenerator_sparc.cpp) ++ // reverse the argument list constructed by JavaCallArguments (see ++ // javaCalls.hpp). ++ ++private: ++ ++ // 32bit Helper routines. ++ static inline void put_int2r(jint *from, intptr_t *to) { *(jint *)(to++) = from[1]; ++ *(jint *)(to ) = from[0]; } ++ static inline void put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; } ++ ++public: ++ // In LoongArch64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[] ++ // is 8 bytes. ++ // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values. ++ // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded. ++ // This error occurs in ReflectInvoke.java ++ // The parameter of DD(int) should be 4 instead of 0x550000004. ++ // ++ // See: [runtime/javaCalls.hpp] ++ ++ static inline void put_int(jint from, intptr_t *to) { *(intptr_t *)(to + 0 ) = from; } ++ static inline void put_int(jint from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = from; } ++ static inline void put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; } ++ ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to). ++ // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), ++ // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. ++ static inline void put_long(jlong from, intptr_t *to) { ++ *(jlong*) (to + 1) = from; ++ *(jlong*) (to) = from; ++ } ++ ++ // A long parameter occupies two slot. ++ // It must fit the layout rule in methodHandle. ++ // ++ // See: [runtime/reflection.cpp] Reflection::invoke() ++ // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); ++ ++ static inline void put_long(jlong from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = from; ++ *(jlong*) (to + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_long(jlong *from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = *from; ++ *(jlong*) (to + pos) = *from; ++ pos += 2; ++ } ++ ++ // Oops are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } ++ static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } ++ static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } ++ ++ // Floats are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } ++ static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } ++ static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } ++ ++#undef _JNI_SLOT_OFFSET ++#define _JNI_SLOT_OFFSET 0 ++ ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to). ++ // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), ++ // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. ++ static inline void put_double(jdouble from, intptr_t *to) { ++ *(jdouble*) (to + 1) = from; ++ *(jdouble*) (to) = from; ++ } ++ ++ // A long parameter occupies two slot. ++ // It must fit the layout rule in methodHandle. ++ // ++ // See: [runtime/reflection.cpp] Reflection::invoke() ++ // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); ++ ++ static inline void put_double(jdouble from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = from; ++ *(jdouble*) (to + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_double(jdouble *from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = *from; ++ *(jdouble*) (to + pos) = *from; ++ pos += 2; ++ } ++ ++ // The get_xxx routines, on the other hand, actually _do_ fetch ++ // java primitive types from the interpreter stack. ++ static inline jint get_int (intptr_t *from) { return *(jint *) from; } ++ static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } ++ static inline oop get_obj (intptr_t *from) { return *(oop *) from; } ++ static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } ++ static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } ++#undef _JNI_SLOT_OFFSET ++}; ++ ++#endif // CPU_LOONGARCH_VM_JNITYPES_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/jni_loongarch.h b/hotspot/src/cpu/loongarch/vm/jni_loongarch.h +new file mode 100644 +index 0000000000..eb25cbc354 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/jni_loongarch.h +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. Oracle designates this ++ * particular file as subject to the "Classpath" exception as provided ++ * by Oracle in the LICENSE file that accompanied this code. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#ifndef _JAVASOFT_JNI_MD_H_ ++#define _JAVASOFT_JNI_MD_H_ ++ ++// Note: please do not change these without also changing jni_md.h in the JDK ++// repository ++#ifndef __has_attribute ++ #define __has_attribute(x) 0 ++#endif ++#if (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ > 2))) || __has_attribute(visibility) ++ #define JNIEXPORT __attribute__((visibility("default"))) ++ #define JNIIMPORT __attribute__((visibility("default"))) ++#else ++ #define JNIEXPORT ++ #define JNIIMPORT ++#endif ++ ++#define JNICALL ++ ++typedef int jint; ++ ++ typedef long jlong; ++ ++typedef signed char jbyte; ++ ++#endif +diff --git a/hotspot/src/cpu/loongarch/vm/loongarch.ad b/hotspot/src/cpu/loongarch/vm/loongarch.ad +new file mode 100644 +index 0000000000..48c44779e7 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/loongarch.ad +@@ -0,0 +1,24 @@ ++// ++// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// +diff --git a/hotspot/src/cpu/loongarch/vm/loongarch_64.ad b/hotspot/src/cpu/loongarch/vm/loongarch_64.ad +new file mode 100644 +index 0000000000..6db00bf642 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/loongarch_64.ad +@@ -0,0 +1,12862 @@ ++// ++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++// GodSon3 Architecture Description File ++ ++//----------REGISTER DEFINITION BLOCK------------------------------------------ ++// This information is used by the matcher and the register allocator to ++// describe individual registers and classes of registers within the target ++// archtecture. ++ ++// format: ++// reg_def name (call convention, c-call convention, ideal type, encoding); ++// call convention : ++// NS = No-Save ++// SOC = Save-On-Call ++// SOE = Save-On-Entry ++// AS = Always-Save ++// ideal type : ++// see opto/opcodes.hpp for more info ++// reg_class name (reg, ...); ++// alloc_class name (reg, ...); ++register %{ ++ ++// General Registers ++// Integer Registers ++ reg_def R0 ( NS, NS, Op_RegI, 0, VMRegImpl::Bad()); ++ reg_def RA ( NS, NS, Op_RegI, 1, RA->as_VMReg()); ++ reg_def RA_H ( NS, NS, Op_RegI, 1, RA->as_VMReg()->next()); ++ // TODO: LA ++ reg_def TP ( NS, NS, Op_RegI, 2, TP->as_VMReg()); ++ reg_def TP_H ( NS, NS, Op_RegI, 2, TP->as_VMReg()->next()); ++ reg_def SP ( NS, NS, Op_RegI, 3, SP->as_VMReg()); ++ reg_def SP_H ( NS, NS, Op_RegI, 3, SP->as_VMReg()->next()); ++ reg_def A0 (SOC, SOC, Op_RegI, 4, A0->as_VMReg()); ++ reg_def A0_H (SOC, SOC, Op_RegI, 4, A0->as_VMReg()->next()); ++ reg_def A1 (SOC, SOC, Op_RegI, 5, A1->as_VMReg()); ++ reg_def A1_H (SOC, SOC, Op_RegI, 5, A1->as_VMReg()->next()); ++ reg_def A2 (SOC, SOC, Op_RegI, 6, A2->as_VMReg()); ++ reg_def A2_H (SOC, SOC, Op_RegI, 6, A2->as_VMReg()->next()); ++ reg_def A3 (SOC, SOC, Op_RegI, 7, A3->as_VMReg()); ++ reg_def A3_H (SOC, SOC, Op_RegI, 7, A3->as_VMReg()->next()); ++ reg_def A4 (SOC, SOC, Op_RegI, 8, A4->as_VMReg()); ++ reg_def A4_H (SOC, SOC, Op_RegI, 8, A4->as_VMReg()->next()); ++ reg_def A5 (SOC, SOC, Op_RegI, 9, A5->as_VMReg()); ++ reg_def A5_H (SOC, SOC, Op_RegI, 9, A5->as_VMReg()->next()); ++ reg_def A6 (SOC, SOC, Op_RegI, 10, A6->as_VMReg()); ++ reg_def A6_H (SOC, SOC, Op_RegI, 10, A6->as_VMReg()->next()); ++ reg_def A7 (SOC, SOC, Op_RegI, 11, A7->as_VMReg()); ++ reg_def A7_H (SOC, SOC, Op_RegI, 11, A7->as_VMReg()->next()); ++ reg_def T0 (SOC, SOC, Op_RegI, 12, T0->as_VMReg()); ++ reg_def T0_H (SOC, SOC, Op_RegI, 12, T0->as_VMReg()->next()); ++ reg_def T1 (SOC, SOC, Op_RegI, 13, T1->as_VMReg()); ++ reg_def T1_H (SOC, SOC, Op_RegI, 13, T1->as_VMReg()->next()); ++ reg_def T2 (SOC, SOC, Op_RegI, 14, T2->as_VMReg()); ++ reg_def T2_H (SOC, SOC, Op_RegI, 14, T2->as_VMReg()->next()); ++ reg_def T3 (SOC, SOC, Op_RegI, 15, T3->as_VMReg()); ++ reg_def T3_H (SOC, SOC, Op_RegI, 15, T3->as_VMReg()->next()); ++ reg_def T4 (SOC, SOC, Op_RegI, 16, T4->as_VMReg()); ++ reg_def T4_H (SOC, SOC, Op_RegI, 16, T4->as_VMReg()->next()); ++ reg_def T5 (SOC, SOC, Op_RegI, 17, T5->as_VMReg()); ++ reg_def T5_H (SOC, SOC, Op_RegI, 17, T5->as_VMReg()->next()); ++ reg_def T6 (SOC, SOC, Op_RegI, 18, T6->as_VMReg()); ++ reg_def T6_H (SOC, SOC, Op_RegI, 18, T6->as_VMReg()->next()); ++ reg_def T7 (SOC, SOC, Op_RegI, 19, T7->as_VMReg()); ++ reg_def T7_H (SOC, SOC, Op_RegI, 19, T7->as_VMReg()->next()); ++ reg_def T8 (SOC, SOC, Op_RegI, 20, T8->as_VMReg()); ++ reg_def T8_H (SOC, SOC, Op_RegI, 20, T8->as_VMReg()->next()); ++ reg_def RX ( NS, NS, Op_RegI, 21, RX->as_VMReg()); ++ reg_def RX_H ( NS, NS, Op_RegI, 21, RX->as_VMReg()->next()); ++ reg_def FP ( NS, NS, Op_RegI, 22, FP->as_VMReg()); ++ reg_def FP_H ( NS, NS, Op_RegI, 22, FP->as_VMReg()->next()); ++ reg_def S0 (SOC, SOE, Op_RegI, 23, S0->as_VMReg()); ++ reg_def S0_H (SOC, SOE, Op_RegI, 23, S0->as_VMReg()->next()); ++ reg_def S1 (SOC, SOE, Op_RegI, 24, S1->as_VMReg()); ++ reg_def S1_H (SOC, SOE, Op_RegI, 24, S1->as_VMReg()->next()); ++ reg_def S2 (SOC, SOE, Op_RegI, 25, S2->as_VMReg()); ++ reg_def S2_H (SOC, SOE, Op_RegI, 25, S2->as_VMReg()->next()); ++ reg_def S3 (SOC, SOE, Op_RegI, 26, S3->as_VMReg()); ++ reg_def S3_H (SOC, SOE, Op_RegI, 26, S3->as_VMReg()->next()); ++ reg_def S4 (SOC, SOE, Op_RegI, 27, S4->as_VMReg()); ++ reg_def S4_H (SOC, SOE, Op_RegI, 27, S4->as_VMReg()->next()); ++ reg_def S5 (SOC, SOE, Op_RegI, 28, S5->as_VMReg()); ++ reg_def S5_H (SOC, SOE, Op_RegI, 28, S5->as_VMReg()->next()); ++ reg_def S6 (SOC, SOE, Op_RegI, 29, S6->as_VMReg()); ++ reg_def S6_H (SOC, SOE, Op_RegI, 29, S6->as_VMReg()->next()); ++ reg_def S7 (SOC, SOE, Op_RegI, 30, S7->as_VMReg()); ++ reg_def S7_H (SOC, SOE, Op_RegI, 30, S7->as_VMReg()->next()); ++ // TODO: LA ++ reg_def S8 ( NS, NS, Op_RegI, 31, S8->as_VMReg()); ++ reg_def S8_H ( NS, NS, Op_RegI, 31, S8->as_VMReg()->next()); ++ ++ ++// Floating/Vector registers. ++reg_def F0 ( SOC, SOC, Op_RegF, 0, F0->as_VMReg() ); ++reg_def F0_H ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next() ); ++reg_def F0_J ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(2) ); ++reg_def F0_K ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(3) ); ++reg_def F0_L ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(4) ); ++reg_def F0_M ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(5) ); ++reg_def F0_N ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(6) ); ++reg_def F0_O ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(7) ); ++ ++reg_def F1 ( SOC, SOC, Op_RegF, 1, F1->as_VMReg() ); ++reg_def F1_H ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next() ); ++reg_def F1_J ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(2) ); ++reg_def F1_K ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(3) ); ++reg_def F1_L ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(4) ); ++reg_def F1_M ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(5) ); ++reg_def F1_N ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(6) ); ++reg_def F1_O ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(7) ); ++ ++reg_def F2 ( SOC, SOC, Op_RegF, 2, F2->as_VMReg() ); ++reg_def F2_H ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next() ); ++reg_def F2_J ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(2) ); ++reg_def F2_K ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(3) ); ++reg_def F2_L ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(4) ); ++reg_def F2_M ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(5) ); ++reg_def F2_N ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(6) ); ++reg_def F2_O ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(7) ); ++ ++reg_def F3 ( SOC, SOC, Op_RegF, 3, F3->as_VMReg() ); ++reg_def F3_H ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next() ); ++reg_def F3_J ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(2) ); ++reg_def F3_K ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(3) ); ++reg_def F3_L ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(4) ); ++reg_def F3_M ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(5) ); ++reg_def F3_N ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(6) ); ++reg_def F3_O ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(7) ); ++ ++reg_def F4 ( SOC, SOC, Op_RegF, 4, F4->as_VMReg() ); ++reg_def F4_H ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next() ); ++reg_def F4_J ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(2) ); ++reg_def F4_K ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(3) ); ++reg_def F4_L ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(4) ); ++reg_def F4_M ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(5) ); ++reg_def F4_N ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(6) ); ++reg_def F4_O ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(7) ); ++ ++reg_def F5 ( SOC, SOC, Op_RegF, 5, F5->as_VMReg() ); ++reg_def F5_H ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next() ); ++reg_def F5_J ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(2) ); ++reg_def F5_K ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(3) ); ++reg_def F5_L ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(4) ); ++reg_def F5_M ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(5) ); ++reg_def F5_N ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(6) ); ++reg_def F5_O ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(7) ); ++ ++reg_def F6 ( SOC, SOC, Op_RegF, 6, F6->as_VMReg() ); ++reg_def F6_H ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next() ); ++reg_def F6_J ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(2) ); ++reg_def F6_K ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(3) ); ++reg_def F6_L ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(4) ); ++reg_def F6_M ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(5) ); ++reg_def F6_N ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(6) ); ++reg_def F6_O ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(7) ); ++ ++reg_def F7 ( SOC, SOC, Op_RegF, 7, F7->as_VMReg() ); ++reg_def F7_H ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next() ); ++reg_def F7_J ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(2) ); ++reg_def F7_K ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(3) ); ++reg_def F7_L ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(4) ); ++reg_def F7_M ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(5) ); ++reg_def F7_N ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(6) ); ++reg_def F7_O ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(7) ); ++ ++reg_def F8 ( SOC, SOC, Op_RegF, 8, F8->as_VMReg() ); ++reg_def F8_H ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next() ); ++reg_def F8_J ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(2) ); ++reg_def F8_K ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(3) ); ++reg_def F8_L ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(4) ); ++reg_def F8_M ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(5) ); ++reg_def F8_N ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(6) ); ++reg_def F8_O ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(7) ); ++ ++reg_def F9 ( SOC, SOC, Op_RegF, 9, F9->as_VMReg() ); ++reg_def F9_H ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next() ); ++reg_def F9_J ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(2) ); ++reg_def F9_K ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(3) ); ++reg_def F9_L ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(4) ); ++reg_def F9_M ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(5) ); ++reg_def F9_N ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(6) ); ++reg_def F9_O ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(7) ); ++ ++reg_def F10 ( SOC, SOC, Op_RegF, 10, F10->as_VMReg() ); ++reg_def F10_H ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next() ); ++reg_def F10_J ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(2) ); ++reg_def F10_K ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(3) ); ++reg_def F10_L ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(4) ); ++reg_def F10_M ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(5) ); ++reg_def F10_N ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(6) ); ++reg_def F10_O ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(7) ); ++ ++reg_def F11 ( SOC, SOC, Op_RegF, 11, F11->as_VMReg() ); ++reg_def F11_H ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next() ); ++reg_def F11_J ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(2) ); ++reg_def F11_K ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(3) ); ++reg_def F11_L ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(4) ); ++reg_def F11_M ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(5) ); ++reg_def F11_N ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(6) ); ++reg_def F11_O ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(7) ); ++ ++reg_def F12 ( SOC, SOC, Op_RegF, 12, F12->as_VMReg() ); ++reg_def F12_H ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next() ); ++reg_def F12_J ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(2) ); ++reg_def F12_K ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(3) ); ++reg_def F12_L ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(4) ); ++reg_def F12_M ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(5) ); ++reg_def F12_N ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(6) ); ++reg_def F12_O ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(7) ); ++ ++reg_def F13 ( SOC, SOC, Op_RegF, 13, F13->as_VMReg() ); ++reg_def F13_H ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next() ); ++reg_def F13_J ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(2) ); ++reg_def F13_K ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(3) ); ++reg_def F13_L ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(4) ); ++reg_def F13_M ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(5) ); ++reg_def F13_N ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(6) ); ++reg_def F13_O ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(7) ); ++ ++reg_def F14 ( SOC, SOC, Op_RegF, 14, F14->as_VMReg() ); ++reg_def F14_H ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next() ); ++reg_def F14_J ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(2) ); ++reg_def F14_K ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(3) ); ++reg_def F14_L ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(4) ); ++reg_def F14_M ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(5) ); ++reg_def F14_N ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(6) ); ++reg_def F14_O ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(7) ); ++ ++reg_def F15 ( SOC, SOC, Op_RegF, 15, F15->as_VMReg() ); ++reg_def F15_H ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next() ); ++reg_def F15_J ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(2) ); ++reg_def F15_K ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(3) ); ++reg_def F15_L ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(4) ); ++reg_def F15_M ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(5) ); ++reg_def F15_N ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(6) ); ++reg_def F15_O ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(7) ); ++ ++reg_def F16 ( SOC, SOC, Op_RegF, 16, F16->as_VMReg() ); ++reg_def F16_H ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next() ); ++reg_def F16_J ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(2) ); ++reg_def F16_K ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(3) ); ++reg_def F16_L ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(4) ); ++reg_def F16_M ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(5) ); ++reg_def F16_N ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(6) ); ++reg_def F16_O ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(7) ); ++ ++reg_def F17 ( SOC, SOC, Op_RegF, 17, F17->as_VMReg() ); ++reg_def F17_H ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next() ); ++reg_def F17_J ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(2) ); ++reg_def F17_K ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(3) ); ++reg_def F17_L ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(4) ); ++reg_def F17_M ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(5) ); ++reg_def F17_N ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(6) ); ++reg_def F17_O ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(7) ); ++ ++reg_def F18 ( SOC, SOC, Op_RegF, 18, F18->as_VMReg() ); ++reg_def F18_H ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next() ); ++reg_def F18_J ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(2) ); ++reg_def F18_K ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(3) ); ++reg_def F18_L ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(4) ); ++reg_def F18_M ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(5) ); ++reg_def F18_N ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(6) ); ++reg_def F18_O ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(7) ); ++ ++reg_def F19 ( SOC, SOC, Op_RegF, 19, F19->as_VMReg() ); ++reg_def F19_H ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next() ); ++reg_def F19_J ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(2) ); ++reg_def F19_K ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(3) ); ++reg_def F19_L ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(4) ); ++reg_def F19_M ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(5) ); ++reg_def F19_N ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(6) ); ++reg_def F19_O ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(7) ); ++ ++reg_def F20 ( SOC, SOC, Op_RegF, 20, F20->as_VMReg() ); ++reg_def F20_H ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next() ); ++reg_def F20_J ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(2) ); ++reg_def F20_K ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(3) ); ++reg_def F20_L ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(4) ); ++reg_def F20_M ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(5) ); ++reg_def F20_N ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(6) ); ++reg_def F20_O ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(7) ); ++ ++reg_def F21 ( SOC, SOC, Op_RegF, 21, F21->as_VMReg() ); ++reg_def F21_H ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next() ); ++reg_def F21_J ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(2) ); ++reg_def F21_K ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(3) ); ++reg_def F21_L ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(4) ); ++reg_def F21_M ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(5) ); ++reg_def F21_N ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(6) ); ++reg_def F21_O ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(7) ); ++ ++reg_def F22 ( SOC, SOC, Op_RegF, 22, F22->as_VMReg() ); ++reg_def F22_H ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next() ); ++reg_def F22_J ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(2) ); ++reg_def F22_K ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(3) ); ++reg_def F22_L ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(4) ); ++reg_def F22_M ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(5) ); ++reg_def F22_N ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(6) ); ++reg_def F22_O ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(7) ); ++ ++reg_def F23 ( SOC, SOC, Op_RegF, 23, F23->as_VMReg() ); ++reg_def F23_H ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next() ); ++reg_def F23_J ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(2) ); ++reg_def F23_K ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(3) ); ++reg_def F23_L ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(4) ); ++reg_def F23_M ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(5) ); ++reg_def F23_N ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(6) ); ++reg_def F23_O ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(7) ); ++ ++reg_def F24 ( SOC, SOC, Op_RegF, 24, F24->as_VMReg() ); ++reg_def F24_H ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next() ); ++reg_def F24_J ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(2) ); ++reg_def F24_K ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(3) ); ++reg_def F24_L ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(4) ); ++reg_def F24_M ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(5) ); ++reg_def F24_N ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(6) ); ++reg_def F24_O ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(7) ); ++ ++reg_def F25 ( SOC, SOC, Op_RegF, 25, F25->as_VMReg() ); ++reg_def F25_H ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next() ); ++reg_def F25_J ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(2) ); ++reg_def F25_K ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(3) ); ++reg_def F25_L ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(4) ); ++reg_def F25_M ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(5) ); ++reg_def F25_N ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(6) ); ++reg_def F25_O ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(7) ); ++ ++reg_def F26 ( SOC, SOC, Op_RegF, 26, F26->as_VMReg() ); ++reg_def F26_H ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next() ); ++reg_def F26_J ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(2) ); ++reg_def F26_K ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(3) ); ++reg_def F26_L ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(4) ); ++reg_def F26_M ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(5) ); ++reg_def F26_N ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(6) ); ++reg_def F26_O ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(7) ); ++ ++reg_def F27 ( SOC, SOC, Op_RegF, 27, F27->as_VMReg() ); ++reg_def F27_H ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next() ); ++reg_def F27_J ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(2) ); ++reg_def F27_K ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(3) ); ++reg_def F27_L ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(4) ); ++reg_def F27_M ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(5) ); ++reg_def F27_N ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(6) ); ++reg_def F27_O ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(7) ); ++ ++reg_def F28 ( SOC, SOC, Op_RegF, 28, F28->as_VMReg() ); ++reg_def F28_H ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next() ); ++reg_def F28_J ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(2) ); ++reg_def F28_K ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(3) ); ++reg_def F28_L ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(4) ); ++reg_def F28_M ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(5) ); ++reg_def F28_N ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(6) ); ++reg_def F28_O ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(7) ); ++ ++reg_def F29 ( SOC, SOC, Op_RegF, 29, F29->as_VMReg() ); ++reg_def F29_H ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next() ); ++reg_def F29_J ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(2) ); ++reg_def F29_K ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(3) ); ++reg_def F29_L ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(4) ); ++reg_def F29_M ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(5) ); ++reg_def F29_N ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(6) ); ++reg_def F29_O ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(7) ); ++ ++reg_def F30 ( SOC, SOC, Op_RegF, 30, F30->as_VMReg() ); ++reg_def F30_H ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next() ); ++reg_def F30_J ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(2) ); ++reg_def F30_K ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(3) ); ++reg_def F30_L ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(4) ); ++reg_def F30_M ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(5) ); ++reg_def F30_N ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(6) ); ++reg_def F30_O ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(7) ); ++ ++reg_def F31 ( SOC, SOC, Op_RegF, 31, F31->as_VMReg() ); ++reg_def F31_H ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next() ); ++reg_def F31_J ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(2) ); ++reg_def F31_K ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(3) ); ++reg_def F31_L ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(4) ); ++reg_def F31_M ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(5) ); ++reg_def F31_N ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(6) ); ++reg_def F31_O ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(7) ); ++ ++ ++// ---------------------------- ++// Special Registers ++//S6 is used for get_thread(S6) ++//S5 is uesd for heapbase of compressed oop ++alloc_class chunk0( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S5, S5_H, ++ S6, S6_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T8, T8_H, ++ T4, T4_H, ++ T1, T1_H, // inline_cache_reg ++ T6, T6_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ T5, T5_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H, ++ S8, S8_H ++ RA, RA_H, ++ SP, SP_H, // stack_pointer ++ FP, FP_H // frame_pointer ++ ); ++ ++// F23 is scratch reg ++alloc_class chunk1( F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O, ++ F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O, ++ F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O, ++ F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O, ++ F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O, ++ F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O, ++ F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O, ++ F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O, ++ F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O, ++ F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O, ++ F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O, ++ F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O, ++ F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O, ++ F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O, ++ F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O, ++ F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O, ++ F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O, ++ F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O, ++ F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O, ++ F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O, ++ F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O, ++ F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O, ++ F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O, ++ F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O, ++ F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O, ++ F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O, ++ F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O, ++ F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O, ++ F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O, ++ F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O, ++ F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O); ++ ++reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 ); ++reg_class s0_reg( S0 ); ++reg_class s1_reg( S1 ); ++reg_class s2_reg( S2 ); ++reg_class s3_reg( S3 ); ++reg_class s4_reg( S4 ); ++reg_class s5_reg( S5 ); ++reg_class s6_reg( S6 ); ++reg_class s7_reg( S7 ); ++ ++reg_class t_reg( T0, T1, T2, T3, T8, T4 ); ++reg_class t0_reg( T0 ); ++reg_class t1_reg( T1 ); ++reg_class t2_reg( T2 ); ++reg_class t3_reg( T3 ); ++reg_class t8_reg( T8 ); ++reg_class t4_reg( T4 ); ++ ++reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 ); ++reg_class a0_reg( A0 ); ++reg_class a1_reg( A1 ); ++reg_class a2_reg( A2 ); ++reg_class a3_reg( A3 ); ++reg_class a4_reg( A4 ); ++reg_class a5_reg( A5 ); ++reg_class a6_reg( A6 ); ++reg_class a7_reg( A7 ); ++ ++// TODO: LA ++//reg_class v0_reg( A0 ); ++//reg_class v1_reg( A1 ); ++ ++reg_class sp_reg( SP, SP_H ); ++reg_class fp_reg( FP, FP_H ); ++ ++reg_class v0_long_reg( A0, A0_H ); ++reg_class v1_long_reg( A1, A1_H ); ++reg_class a0_long_reg( A0, A0_H ); ++reg_class a1_long_reg( A1, A1_H ); ++reg_class a2_long_reg( A2, A2_H ); ++reg_class a3_long_reg( A3, A3_H ); ++reg_class a4_long_reg( A4, A4_H ); ++reg_class a5_long_reg( A5, A5_H ); ++reg_class a6_long_reg( A6, A6_H ); ++reg_class a7_long_reg( A7, A7_H ); ++reg_class t0_long_reg( T0, T0_H ); ++reg_class t1_long_reg( T1, T1_H ); ++reg_class t2_long_reg( T2, T2_H ); ++reg_class t3_long_reg( T3, T3_H ); ++reg_class t8_long_reg( T8, T8_H ); ++reg_class t4_long_reg( T4, T4_H ); ++reg_class s0_long_reg( S0, S0_H ); ++reg_class s1_long_reg( S1, S1_H ); ++reg_class s2_long_reg( S2, S2_H ); ++reg_class s3_long_reg( S3, S3_H ); ++reg_class s4_long_reg( S4, S4_H ); ++reg_class s5_long_reg( S5, S5_H ); ++reg_class s6_long_reg( S6, S6_H ); ++reg_class s7_long_reg( S7, S7_H ); ++ ++reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, A7, A6, A5, A4, T5, A3, A2, A1, A0, T0 ); ++ ++reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, T5, T0 ); ++ ++reg_class p_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T8, T8_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++reg_class no_T8_p_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++reg_class no_Ax_p_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ T0, T0_H ++ ); ++ ++reg_class long_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T8, T8_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++ ++// Floating point registers. ++// F31 are not used as temporary registers in D2I ++reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F24, F25, F26, F27, F28, F29, F30, F31); ++reg_class dbl_reg( F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F8, F8_H, ++ F9, F9_H, ++ F10, F10_H, ++ F11, F11_H, ++ F12, F12_H, ++ F13, F13_H, ++ F14, F14_H, ++ F15, F15_H, ++ F16, F16_H, ++ F17, F17_H, ++ F18, F18_H, ++ F19, F19_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++ F28, F28_H, ++ F29, F29_H, ++ F30, F30_H, ++ F31, F31_H); ++ ++// Class for all 128bit vector registers ++reg_class vectorx_reg( F0, F0_H, F0_J, F0_K, ++ F1, F1_H, F1_J, F1_K, ++ F2, F2_H, F2_J, F2_K, ++ F3, F3_H, F3_J, F3_K, ++ F4, F4_H, F4_J, F4_K, ++ F5, F5_H, F5_J, F5_K, ++ F6, F6_H, F6_J, F6_K, ++ F7, F7_H, F7_J, F7_K, ++ F8, F8_H, F8_J, F8_K, ++ F9, F9_H, F9_J, F9_K, ++ F10, F10_H, F10_J, F10_K, ++ F11, F11_H, F11_J, F11_K, ++ F12, F12_H, F12_J, F12_K, ++ F13, F13_H, F13_J, F13_K, ++ F14, F14_H, F14_J, F14_K, ++ F15, F15_H, F15_J, F15_K, ++ F16, F16_H, F16_J, F16_K, ++ F17, F17_H, F17_J, F17_K, ++ F18, F18_H, F18_J, F18_K, ++ F19, F19_H, F19_J, F19_K, ++ F20, F20_H, F20_J, F20_K, ++ F21, F21_H, F21_J, F21_K, ++ F22, F22_H, F22_J, F22_K, ++ F24, F24_H, F24_J, F24_K, ++ F25, F25_H, F25_J, F25_K, ++ F26, F26_H, F26_J, F26_K, ++ F27, F27_H, F27_J, F27_K, ++ F28, F28_H, F28_J, F28_K, ++ F29, F29_H, F29_J, F29_K, ++ F30, F30_H, F30_J, F30_K, ++ F31, F31_H, F31_J, F31_K); ++ ++// Class for all 256bit vector registers ++reg_class vectory_reg( F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O, ++ F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O, ++ F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O, ++ F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O, ++ F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O, ++ F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O, ++ F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O, ++ F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O, ++ F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O, ++ F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O, ++ F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O, ++ F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O, ++ F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O, ++ F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O, ++ F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O, ++ F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O, ++ F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O, ++ F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O, ++ F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O, ++ F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O, ++ F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O, ++ F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O, ++ F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O, ++ F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O, ++ F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O, ++ F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O, ++ F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O, ++ F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O, ++ F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O, ++ F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O, ++ F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O); ++ ++// TODO: LA ++//reg_class flt_arg0( F0 ); ++//reg_class dbl_arg0( F0, F0_H ); ++//reg_class dbl_arg1( F1, F1_H ); ++ ++%} ++ ++//----------DEFINITION BLOCK--------------------------------------------------- ++// Define name --> value mappings to inform the ADLC of an integer valued name ++// Current support includes integer values in the range [0, 0x7FFFFFFF] ++// Format: ++// int_def ( , ); ++// Generated Code in ad_.hpp ++// #define () ++// // value == ++// Generated code in ad_.cpp adlc_verification() ++// assert( == , "Expect () to equal "); ++// ++definitions %{ ++ int_def DEFAULT_COST ( 100, 100); ++ int_def HUGE_COST (1000000, 1000000); ++ ++ // Memory refs are twice as expensive as run-of-the-mill. ++ int_def MEMORY_REF_COST ( 200, DEFAULT_COST * 2); ++ ++ // Branches are even more expensive. ++ int_def BRANCH_COST ( 300, DEFAULT_COST * 3); ++ // we use jr instruction to construct call, so more expensive ++ int_def CALL_COST ( 500, DEFAULT_COST * 5); ++/* ++ int_def EQUAL ( 1, 1 ); ++ int_def NOT_EQUAL ( 2, 2 ); ++ int_def GREATER ( 3, 3 ); ++ int_def GREATER_EQUAL ( 4, 4 ); ++ int_def LESS ( 5, 5 ); ++ int_def LESS_EQUAL ( 6, 6 ); ++*/ ++%} ++ ++ ++ ++//----------SOURCE BLOCK------------------------------------------------------- ++// This is a block of C++ code which provides values, functions, and ++// definitions necessary in the rest of the architecture description ++ ++source_hpp %{ ++// Header information of the source block. ++// Method declarations/definitions which are used outside ++// the ad-scope can conveniently be defined here. ++// ++// To keep related declarations/definitions/uses close together, ++// we switch between source %{ }% and source_hpp %{ }% freely as needed. ++ ++class CallStubImpl { ++ ++ //-------------------------------------------------------------- ++ //---< Used for optimization in Compile::shorten_branches >--- ++ //-------------------------------------------------------------- ++ ++ public: ++ // Size of call trampoline stub. ++ static uint size_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++ ++ // number of relocations needed by a call trampoline stub ++ static uint reloc_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++}; ++ ++class HandlerImpl { ++ ++ public: ++ ++ static int emit_exception_handler(CodeBuffer &cbuf); ++ static int emit_deopt_handler(CodeBuffer& cbuf); ++ ++ static uint size_exception_handler() { ++ // NativeCall instruction size is the same as NativeJump. ++ // exception handler starts out as jump and can be patched to ++ // a call be deoptimization. (4932387) ++ // Note that this value is also credited (in output.cpp) to ++ // the size of the code section. ++ int size = NativeFarCall::instruction_size; ++ return round_to(size, 16); ++ } ++ ++ static uint size_deopt_handler() { ++ int size = NativeFarCall::instruction_size; ++ return round_to(size, 16); ++ } ++}; ++ ++%} // end source_hpp ++ ++source %{ ++ ++#define NO_INDEX 0 ++#define RELOC_IMM64 Assembler::imm_operand ++#define RELOC_DISP32 Assembler::disp32_operand ++ ++#define V0_num A0_num ++#define V0_H_num A0_H_num ++ ++#define __ _masm. ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++// Emit exception handler code. ++// Stuff framesize into a register and call a VM stub routine. ++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_exception_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ ++ int offset = __ offset(); ++ ++ __ block_comment("; emit_exception_handler"); ++ ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point()); ++ assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} ++ ++// Emit deopt handler code. ++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_deopt_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ ++ int offset = __ offset(); ++ ++ __ block_comment("; emit_deopt_handler"); ++ ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_call(SharedRuntime::deopt_blob()->unpack()); ++ assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} ++ ++ ++const bool Matcher::match_rule_supported(int opcode) { ++ if (!has_match_rule(opcode)) ++ return false; ++ ++ return true; // Per default match rules are supported. ++} ++ ++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { ++ const int safety_zone = 3 * BytesPerInstWord; ++ int offs = offset - br_size + 4; ++ // To be conservative on LoongArch ++ // branch node should be end with: ++ // branch inst ++ offs = (offs < 0 ? offs - safety_zone : offs + safety_zone) >> 2; ++ switch (rule) { ++ case jmpDir_long_rule: ++ case jmpDir_short_rule: ++ return Assembler::is_simm(offs, 26); ++ case jmpCon_flags_long_rule: ++ case jmpCon_flags_short_rule: ++ case branchConP_0_long_rule: ++ case branchConP_0_short_rule: ++ case branchConN2P_0_long_rule: ++ case branchConN2P_0_short_rule: ++ case cmpN_null_branch_long_rule: ++ case cmpN_null_branch_short_rule: ++ case branchConIU_reg_immI_0_long_rule: ++ case branchConIU_reg_immI_0_short_rule: ++ case branchConF_reg_reg_long_rule: ++ case branchConF_reg_reg_short_rule: ++ case branchConD_reg_reg_long_rule: ++ case branchConD_reg_reg_short_rule: ++ return Assembler::is_simm(offs, 21); ++ default: ++ return Assembler::is_simm(offs, 16); ++ } ++ return false; ++} ++ ++ ++// No additional cost for CMOVL. ++const int Matcher::long_cmove_cost() { return 0; } ++ ++// No CMOVF/CMOVD with SSE2 ++const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } ++ ++// Does the CPU require late expand (see block.cpp for description of late expand)? ++const bool Matcher::require_postalloc_expand = false; ++ ++// Should the Matcher clone shifts on addressing modes, expecting them ++// to be subsumed into complex addressing expressions or compute them ++// into registers? True for Intel but false for most RISCs ++const bool Matcher::clone_shift_expressions = false; ++ ++// Do we need to mask the count passed to shift instructions or does ++// the cpu only look at the lower 5/6 bits anyway? ++const bool Matcher::need_masked_shift_count = false; ++ ++bool Matcher::narrow_oop_use_complex_address() { ++ assert(UseCompressedOops, "only for compressed oops code"); ++ return false; ++} ++ ++bool Matcher::narrow_klass_use_complex_address() { ++ assert(UseCompressedClassPointers, "only for compressed klass code"); ++ return false; ++} ++ ++// This is UltraSparc specific, true just means we have fast l2f conversion ++const bool Matcher::convL2FSupported(void) { ++ return true; ++} ++ ++// Vector ideal reg ++const uint Matcher::vector_ideal_reg(int size) { ++ assert(MaxVectorSize == 16 || MaxVectorSize == 32, ""); ++ switch(size) { ++ case 16: return Op_VecX; ++ case 32: return Op_VecY; ++ } ++ ShouldNotReachHere(); ++ return 0; ++} ++ ++// Only lowest bits of xmm reg are used for vector shift count. ++const uint Matcher::vector_shift_count_ideal_reg(int size) { ++ assert(MaxVectorSize == 16 || MaxVectorSize == 32, ""); ++ switch(size) { ++ case 16: return Op_VecX; ++ case 32: return Op_VecY; ++ } ++ ShouldNotReachHere(); ++ return 0; ++} ++ ++// Max vector size in bytes. 0 if not supported. ++const int Matcher::vector_width_in_bytes(BasicType bt) { ++ return (int)MaxVectorSize; ++} ++ ++// Limits on vector size (number of elements) loaded into vector. ++const int Matcher::max_vector_size(const BasicType bt) { ++ assert(is_java_primitive(bt), "only primitive type vectors"); ++ return vector_width_in_bytes(bt)/type2aelembytes(bt); ++} ++ ++const int Matcher::min_vector_size(const BasicType bt) { ++ int max_size = max_vector_size(bt); ++ int size = 0; ++ ++ if (UseLSX) size = 16; ++ size = size / type2aelembytes(bt); ++ return MIN2(size,max_size); ++} ++ ++// LoongArch supports misaligned vectors store/load? ++const bool Matcher::misaligned_vectors_ok() { ++ return false; ++ //return !AlignVector; // can be changed by flag ++} ++ ++// Register for DIVI projection of divmodI ++RegMask Matcher::divI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for MODI projection of divmodI ++RegMask Matcher::modI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for DIVL projection of divmodL ++RegMask Matcher::divL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++int Matcher::regnum_to_fpu_offset(int regnum) { ++ return regnum - 32; // The FP registers are in the second chunk ++} ++ ++ ++const bool Matcher::isSimpleConstant64(jlong value) { ++ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. ++ return true; ++} ++ ++ ++// Return whether or not this register is ever used as an argument. This ++// function is used on startup to build the trampoline stubs in generateOptoStub. ++// Registers not mentioned will be killed by the VM call in the trampoline, and ++// arguments in those registers not be available to the callee. ++bool Matcher::can_be_java_arg( int reg ) { ++ // Refer to: [sharedRuntime_loongarch_64.cpp] SharedRuntime::java_calling_convention() ++ if ( reg == T0_num || reg == T0_H_num ++ || reg == A0_num || reg == A0_H_num ++ || reg == A1_num || reg == A1_H_num ++ || reg == A2_num || reg == A2_H_num ++ || reg == A3_num || reg == A3_H_num ++ || reg == A4_num || reg == A4_H_num ++ || reg == A5_num || reg == A5_H_num ++ || reg == A6_num || reg == A6_H_num ++ || reg == A7_num || reg == A7_H_num ) ++ return true; ++ ++ if ( reg == F0_num || reg == F0_H_num ++ || reg == F1_num || reg == F1_H_num ++ || reg == F2_num || reg == F2_H_num ++ || reg == F3_num || reg == F3_H_num ++ || reg == F4_num || reg == F4_H_num ++ || reg == F5_num || reg == F5_H_num ++ || reg == F6_num || reg == F6_H_num ++ || reg == F7_num || reg == F7_H_num ) ++ return true; ++ ++ return false; ++} ++ ++bool Matcher::is_spillable_arg( int reg ) { ++ return can_be_java_arg(reg); ++} ++ ++bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { ++ return false; ++} ++ ++// Register for MODL projection of divmodL ++RegMask Matcher::modL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++const RegMask Matcher::method_handle_invoke_SP_save_mask() { ++ return FP_REG_mask(); ++} ++ ++// LoongArch doesn't support AES intrinsics ++const bool Matcher::pass_original_key_for_aes() { ++ return false; ++} ++ ++int CallStaticJavaDirectNode::compute_padding(int current_offset) const { ++ return round_to(current_offset, alignment_required()) - current_offset; ++} ++ ++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { ++ return round_to(current_offset, alignment_required()) - current_offset; ++} ++ ++int CallLeafNoFPDirectNode::compute_padding(int current_offset) const { ++ return round_to(current_offset, alignment_required()) - current_offset; ++} ++ ++int CallLeafDirectNode::compute_padding(int current_offset) const { ++ return round_to(current_offset, alignment_required()) - current_offset; ++} ++ ++int CallRuntimeDirectNode::compute_padding(int current_offset) const { ++ return round_to(current_offset, alignment_required()) - current_offset; ++} ++ ++// If CPU can load and store mis-aligned doubles directly then no fixup is ++// needed. Else we split the double into 2 integer pieces and move it ++// piece-by-piece. Only happens when passing doubles into C code as the ++// Java calling convention forces doubles to be aligned. ++const bool Matcher::misaligned_doubles_ok = false; ++// Do floats take an entire double register or just half? ++//const bool Matcher::float_in_double = true; ++bool Matcher::float_in_double() { return false; } ++// Threshold size for cleararray. ++const int Matcher::init_array_short_size = 8 * BytesPerLong; ++// Do ints take an entire long register or just half? ++const bool Matcher::int_in_long = true; ++// Is it better to copy float constants, or load them directly from memory? ++// Intel can load a float constant from a direct address, requiring no ++// extra registers. Most RISCs will have to materialize an address into a ++// register first, so they would do better to copy the constant from stack. ++const bool Matcher::rematerialize_float_constants = false; ++// Advertise here if the CPU requires explicit rounding operations ++// to implement the UseStrictFP mode. ++const bool Matcher::strict_fp_requires_explicit_rounding = false; ++// false => size gets scaled to BytesPerLong, ok. ++const bool Matcher::init_array_count_is_in_bytes = false; ++ ++// Indicate if the safepoint node needs the polling page as an input. ++// Since LA doesn't have absolute addressing, it needs. ++bool SafePointNode::needs_polling_address_input() { ++ return false; ++} ++ ++// !!!!! Special hack to get all type of calls to specify the byte offset ++// from the start of the call to the point where the return address ++// will point. ++int MachCallStaticJavaNode::ret_addr_offset() { ++ // bl ++ return NativeCall::instruction_size; ++} ++ ++int MachCallDynamicJavaNode::ret_addr_offset() { ++ // lu12i_w IC_Klass, ++ // ori IC_Klass, ++ // lu32i_d IC_Klass ++ // lu52i_d IC_Klass ++ ++ // bl ++ return NativeMovConstReg::instruction_size + NativeCall::instruction_size; ++} ++ ++//============================================================================= ++ ++// Figure out which register class each belongs in: rc_int, rc_float, rc_stack ++enum RC { rc_bad, rc_int, rc_float, rc_stack }; ++static enum RC rc_class( OptoReg::Name reg ) { ++ if( !OptoReg::is_valid(reg) ) return rc_bad; ++ if (OptoReg::is_stack(reg)) return rc_stack; ++ VMReg r = OptoReg::as_VMReg(reg); ++ if (r->is_Register()) return rc_int; ++ assert(r->is_FloatRegister(), "must be"); ++ return rc_float; ++} ++ ++// Helper methods for MachSpillCopyNode::implementation(). ++static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, ++ int src_hi, int dst_hi, uint ireg, outputStream* st) { ++ int size = 0; ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ int offset = __ offset(); ++ switch (ireg) { ++ case Op_VecX: ++ __ vori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0); ++ break; ++ case Op_VecY: ++ __ xvori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++#ifndef PRODUCT ++ } else if (!do_size) { ++ switch (ireg) { ++ case Op_VecX: ++ st->print("vori.b %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); ++ break; ++ case Op_VecY: ++ st->print("xvori.b %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++#endif ++ } ++ size += 4; ++ return size; ++} ++ ++static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, ++ int stack_offset, int reg, uint ireg, outputStream* st) { ++ int size = 0; ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ int offset = __ offset(); ++ if (is_load) { ++ switch (ireg) { ++ case Op_VecX: ++ __ vld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); ++ break; ++ case Op_VecY: ++ __ xvld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { // store ++ switch (ireg) { ++ case Op_VecX: ++ __ vst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); ++ break; ++ case Op_VecY: ++ __ xvst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++#ifndef PRODUCT ++ } else if (!do_size) { ++ if (is_load) { ++ switch (ireg) { ++ case Op_VecX: ++ st->print("vld %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); ++ break; ++ case Op_VecY: ++ st->print("xvld %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { // store ++ switch (ireg) { ++ case Op_VecX: ++ st->print("vst %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); ++ break; ++ case Op_VecY: ++ st->print("xvst %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++#endif ++ } ++ size += 4; ++ return size; ++} ++ ++static int vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, ++ int dst_offset, uint ireg, outputStream* st) { ++ int size = 0; ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ switch (ireg) { ++ case Op_VecX: ++ __ vld(F23, SP, src_offset); ++ __ vst(F23, SP, dst_offset); ++ break; ++ case Op_VecY: ++ __ xvld(F23, SP, src_offset); ++ __ xvst(F23, SP, dst_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++#ifndef PRODUCT ++ } else { ++ switch (ireg) { ++ case Op_VecX: ++ st->print("vld f23, %d(sp)\n\t" ++ "vst f23, %d(sp)\t# 128-bit mem-mem spill", ++ src_offset, dst_offset); ++ break; ++ case Op_VecY: ++ st->print("xvld f23, %d(sp)\n\t" ++ "xvst f23, %d(sp)\t# 256-bit mem-mem spill", ++ src_offset, dst_offset); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++#endif ++ } ++ size += 8; ++ return size; ++} ++ ++uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { ++ // Get registers to move ++ OptoReg::Name src_second = ra_->get_reg_second(in(1)); ++ OptoReg::Name src_first = ra_->get_reg_first(in(1)); ++ OptoReg::Name dst_second = ra_->get_reg_second(this ); ++ OptoReg::Name dst_first = ra_->get_reg_first(this ); ++ ++ enum RC src_second_rc = rc_class(src_second); ++ enum RC src_first_rc = rc_class(src_first); ++ enum RC dst_second_rc = rc_class(dst_second); ++ enum RC dst_first_rc = rc_class(dst_first); ++ ++ assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); ++ ++ // Generate spill code! ++ ++ if( src_first == dst_first && src_second == dst_second ) ++ return 0; // Self copy, no move ++ ++ if (bottom_type()->isa_vect() != NULL) { ++ uint ireg = ideal_reg(); ++ assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); ++ if (src_first_rc == rc_stack && dst_first_rc == rc_stack) { ++ // mem -> mem ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); ++ } else if (src_first_rc == rc_float && dst_first_rc == rc_float) { ++ vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); ++ } else if (src_first_rc == rc_float && dst_first_rc == rc_stack) { ++ int stack_offset = ra_->reg2offset(dst_first); ++ vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); ++ } else if (src_first_rc == rc_stack && dst_first_rc == rc_float) { ++ int stack_offset = ra_->reg2offset(src_first); ++ vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); ++ } else { ++ ShouldNotReachHere(); ++ } ++ return 0; ++ } ++ ++ if (src_first_rc == rc_stack) { ++ // mem -> ++ if (dst_first_rc == rc_stack) { ++ // mem -> mem ++ assert(src_second != dst_first, "overlap"); ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ld_d(AT, Address(SP, src_offset)); ++ __ st_d(AT, Address(SP, dst_offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("ld_d AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t" ++ "st_d AT, [SP + #%d]", ++ src_offset, dst_offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ // No pushl/popl, so: ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ld_w(AT, Address(SP, src_offset)); ++ __ st_w(AT, Address(SP, dst_offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("ld_w AT, [SP + #%d] spill 2\n\t" ++ "st_w AT, [SP + #%d]\n\t", ++ src_offset, dst_offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // mem -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ld_d(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("ld_d %s, [SP + #%d]\t# spill 3", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ if (this->ideal_reg() == Op_RegI) ++ __ ld_w(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++ else { ++ if (Assembler::is_simm(offset, 12)) { ++ __ ld_wu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++ } else { ++ __ li(AT, offset); ++ __ ldx_wu(as_Register(Matcher::_regEncode[dst_first]), SP, AT); ++ } ++ } ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ if (this->ideal_reg() == Op_RegI) ++ st->print("ld_w %s, [SP + #%d]\t# spill 4", ++ Matcher::regName[dst_first], ++ offset); ++ else ++ st->print("ld_wu %s, [SP + #%d]\t# spill 5", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_float) { ++ // mem-> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ fld_d( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("fld_d %s, [SP + #%d]\t# spill 6", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ fld_s( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("fld_s %s, [SP + #%d]\t# spill 7", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } ++ } ++ return 0; ++ } else if (src_first_rc == rc_int) { ++ // gpr -> ++ if (dst_first_rc == rc_stack) { ++ // gpr -> mem ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ st_d(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("st_d %s, [SP + #%d] # spill 8", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ st_w(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("st_w %s, [SP + #%d]\t# spill 9", ++ Matcher::regName[src_first], offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // gpr -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ move(as_Register(Matcher::_regEncode[dst_first]), ++ as_Register(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("move(64bit) %s <-- %s\t# spill 10", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ return 0; ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ if (this->ideal_reg() == Op_RegI) ++ __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); ++ else ++ __ add_d(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("move(32-bit) %s <-- %s\t# spill 11", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ return 0; ++ } ++ } else if (dst_first_rc == rc_float) { ++ // gpr -> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ movgr2fr_d(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("movgr2fr_d %s, %s\t# spill 12", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ movgr2fr_w(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("movgr2fr_w %s, %s\t# spill 13", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } ++ } else if (src_first_rc == rc_float) { ++ // xmm -> ++ if (dst_first_rc == rc_stack) { ++ // xmm -> mem ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ fst_d( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) ); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("fst_d %s, [SP + #%d]\t# spill 14", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ fst_s(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("fst_s %s, [SP + #%d]\t# spill 15", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // xmm -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ movfr2gr_d( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("movfr2gr_d %s, %s\t# spill 16", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ movfr2gr_s( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("movfr2gr_s %s, %s\t# spill 17", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_float) { ++ // xmm -> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ fmov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("fmov_d %s <-- %s\t# spill 18", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ fmov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("fmov_s %s <-- %s\t# spill 19", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } ++ } ++ ++ assert(0," foo "); ++ Unimplemented(); ++ return 0; ++} ++ ++#ifndef PRODUCT ++void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ implementation( NULL, ra_, false, st ); ++} ++#endif ++ ++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ implementation( &cbuf, ra_, false, NULL ); ++} ++ ++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++//============================================================================= ++# ++ ++#ifndef PRODUCT ++void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const { ++ st->print("BRK"); ++} ++#endif ++ ++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { ++ MacroAssembler _masm(&cbuf); ++ __ brk(5); ++} ++ ++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { ++ return MachNode::size(ra_); ++} ++ ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ Compile *C = ra_->C; ++ int framesize = C->frame_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ st->print_cr("addi_d SP, SP, %d # Rlease stack @ MachEpilogNode", framesize); ++ st->print("\t"); ++ st->print_cr("ld_d RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize); ++ st->print("\t"); ++ st->print_cr("ld_d FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2); ++ if( do_polling() && C->is_method_compilation() ) { ++ st->print("\t"); ++ st->print_cr("Poll Safepoint # MachEpilogNode"); ++ } ++} ++#endif ++ ++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ Compile *C = ra_->C; ++ MacroAssembler _masm(&cbuf); ++ int framesize = C->frame_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ __ ld_d(RA, Address(SP, framesize - wordSize)); ++ __ ld_d(FP, Address(SP, framesize - wordSize * 2)); ++ if (Assembler::is_simm(framesize, 12)) { ++ __ addi_d(SP, SP, framesize); ++ } else { ++ __ li(AT, framesize); ++ __ add_d(SP, SP, AT); ++ } ++ ++ if( do_polling() && C->is_method_compilation() ) { ++ __ li(AT, (long)os::get_polling_page()); ++ __ relocate(relocInfo::poll_return_type); ++ __ ld_w(AT, AT, 0); ++ } ++} ++ ++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); // too many variables; just compute it the hard way ++} ++ ++int MachEpilogNode::reloc() const { ++ return 0; // a large enough number ++} ++ ++const Pipeline * MachEpilogNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} ++ ++int MachEpilogNode::safepoint_offset() const { return 0; } ++ ++//============================================================================= ++ ++#ifndef PRODUCT ++void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_reg_first(this); ++ st->print("ADDI_D %s, SP, %d @BoxLockNode",Matcher::regName[reg],offset); ++} ++#endif ++ ++ ++uint BoxLockNode::size(PhaseRegAlloc *ra_) const { ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ ++ if (Assembler::is_simm(offset, 12)) ++ return 4; ++ else ++ return 3 * 4; ++} ++ ++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ MacroAssembler _masm(&cbuf); ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_encode(this); ++ ++ if (Assembler::is_simm(offset, 12)) { ++ __ addi_d(as_Register(reg), SP, offset); ++ } else { ++ __ lu12i_w(AT, Assembler::split_low20(offset >> 12)); ++ __ ori(AT, AT, Assembler::split_low12(offset)); ++ __ add_d(as_Register(reg), SP, AT); ++ } ++} ++ ++int MachCallRuntimeNode::ret_addr_offset() { ++ // pcaddu18i ++ // jirl ++ return NativeFarCall::instruction_size; ++} ++ ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const { ++ st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count); ++} ++#endif ++ ++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { ++ MacroAssembler _masm(&cbuf); ++ int i = 0; ++ for(i = 0; i < _count; i++) ++ __ nop(); ++} ++ ++uint MachNopNode::size(PhaseRegAlloc *) const { ++ return 4 * _count; ++} ++const Pipeline* MachNopNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} ++ ++//============================================================================= ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ st->print_cr("load_klass(T4, T0)"); ++ st->print_cr("\tbeq(T4, iCache, L)"); ++ st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)"); ++ st->print_cr(" L:"); ++} ++#endif ++ ++ ++void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ MacroAssembler _masm(&cbuf); ++ int ic_reg = Matcher::inline_cache_reg_encode(); ++ Label L; ++ Register receiver = T0; ++ Register iCache = as_Register(ic_reg); ++ ++ __ load_klass(T4, receiver); ++ __ beq(T4, iCache, L); ++ __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); ++ __ bind(L); ++} ++ ++uint MachUEPNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++ ++ ++//============================================================================= ++ ++const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask(); ++ ++int Compile::ConstantTable::calculate_table_base_offset() const { ++ return 0; // absolute addressing, no offset ++} ++ ++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } ++void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { ++ ShouldNotReachHere(); ++} ++ ++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { ++ Compile* C = ra_->C; ++ Compile::ConstantTable& constant_table = C->constant_table(); ++ MacroAssembler _masm(&cbuf); ++ ++ Register Rtoc = as_Register(ra_->get_encode(this)); ++ CodeSection* consts_section = cbuf.consts(); ++ int consts_size = consts_section->align_at_start(consts_section->size()); ++ assert(constant_table.size() == consts_size, "must be equal"); ++ ++ if (consts_section->size()) { ++ assert((CodeBuffer::SECT_CONSTS + 1) == CodeBuffer::SECT_INSTS, ++ "insts must be immediately follow consts"); ++ // Materialize the constant table base. ++ address baseaddr = cbuf.insts()->start() - consts_size + -(constant_table.table_base_offset()); ++ jint offs = (baseaddr - __ pc()) >> 2; ++ guarantee(Assembler::is_simm(offs, 20), "Not signed 20-bit offset"); ++ __ pcaddi(Rtoc, offs); ++ } ++} ++ ++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { ++ // pcaddi ++ return 1 * BytesPerInstWord; ++} ++ ++#ifndef PRODUCT ++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { ++ Register r = as_Register(ra_->get_encode(this)); ++ st->print("pcaddi %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name()); ++} ++#endif ++ ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ Compile* C = ra_->C; ++ ++ int framesize = C->frame_size_in_bytes(); ++ int bangsize = C->bang_size_in_bytes(); ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ // Calls to C2R adapters often do not accept exceptional returns. ++ // We require that their callers must bang for them. But be careful, because ++ // some VM calls (such as call site linkage) can use several kilobytes of ++ // stack. But the stack safety zone should account for that. ++ // See bugs 4446381, 4468289, 4497237. ++ if (C->need_stack_bang(bangsize)) { ++ st->print_cr("# stack bang"); st->print("\t"); ++ } ++ st->print("st_d RA, %d(SP) @ MachPrologNode\n\t", -wordSize); ++ st->print("st_d FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); ++ st->print("addi_d FP, SP, -%d \n\t", wordSize*2); ++ st->print("addi_d SP, SP, -%d \t",framesize); ++} ++#endif ++ ++ ++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ Compile* C = ra_->C; ++ MacroAssembler _masm(&cbuf); ++ ++ int framesize = C->frame_size_in_bytes(); ++ int bangsize = C->bang_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++#ifdef ASSERT ++ address start = __ pc(); ++#endif ++ ++ if (C->need_stack_bang(bangsize)) { ++ __ generate_stack_overflow_check(bangsize); ++ } ++ ++ if (Assembler::is_simm(-framesize, 12)) { ++ __ addi_d(SP, SP, -framesize); ++ } else { ++ __ li(AT, -framesize); ++ __ add_d(SP, SP, AT); ++ } ++ __ st_d(RA, Address(SP, framesize - wordSize)); ++ __ st_d(FP, Address(SP, framesize - wordSize * 2)); ++ if (Assembler::is_simm(framesize - wordSize * 2, 12)) { ++ __ addi_d(FP, SP, framesize - wordSize * 2); ++ } else { ++ __ li(AT, framesize - wordSize * 2); ++ __ add_d(FP, SP, AT); ++ } ++ ++ assert((__ pc() - start) >= 1 * BytesPerInstWord, "No enough room for patch_verified_entry"); ++ ++ C->set_frame_complete(cbuf.insts_size()); ++ if (C->has_mach_constant_base_node()) { ++ // NOTE: We set the table base offset here because users might be ++ // emitted before MachConstantBaseNode. ++ Compile::ConstantTable& constant_table = C->constant_table(); ++ constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); ++ } ++} ++ ++ ++uint MachPrologNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); // too many variables; just compute it the hard way ++} ++ ++int MachPrologNode::reloc() const { ++ return 0; // a large enough number ++} ++ ++%} ++ ++//----------ENCODING BLOCK----------------------------------------------------- ++// This block specifies the encoding classes used by the compiler to output ++// byte streams. Encoding classes generate functions which are called by ++// Machine Instruction Nodes in order to generate the bit encoding of the ++// instruction. Operands specify their base encoding interface with the ++// interface keyword. There are currently supported four interfaces, ++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an ++// operand to generate a function which returns its register number when ++// queried. CONST_INTER causes an operand to generate a function which ++// returns the value of the constant when queried. MEMORY_INTER causes an ++// operand to generate four functions which return the Base Register, the ++// Index Register, the Scale Value, and the Offset Value of the operand when ++// queried. COND_INTER causes an operand to generate six functions which ++// return the encoding code (ie - encoding bits for the instruction) ++// associated with each basic boolean condition for a conditional instruction. ++// Instructions specify two basic values for encoding. They use the ++// ins_encode keyword to specify their encoding class (which must be one of ++// the class names specified in the encoding block), and they use the ++// opcode keyword to specify, in order, their primary, secondary, and ++// tertiary opcode. Only the opcode sections which a particular instruction ++// needs for encoding need to be specified. ++encode %{ ++ ++ enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf ++ MacroAssembler _masm(&cbuf); ++ // This is the instruction starting address for relocation info. ++ __ block_comment("Java_To_Runtime"); ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_call((address)$meth$$method); ++ %} ++ ++ enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL ++ // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine ++ // who we intended to call. ++ MacroAssembler _masm(&cbuf); ++ address addr = (address)$meth$$method; ++ address call; ++ __ block_comment("Java_Static_Call"); ++ ++ if ( !_method ) { ++ // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. ++ call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf); ++ } else if(_optimized_virtual) { ++ call = __ trampoline_call(AddressLiteral(addr, relocInfo::opt_virtual_call_type), &cbuf); ++ } else { ++ call = __ trampoline_call(AddressLiteral(addr, relocInfo::static_call_type), &cbuf); ++ } ++ ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ ++ if( _method ) { // Emit stub for static call ++ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); ++ if (stub == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } ++ %} ++ ++ ++ // ++ // [Ref: LIR_Assembler::ic_call() ] ++ // ++ enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL ++ MacroAssembler _masm(&cbuf); ++ __ block_comment("Java_Dynamic_Call"); ++ address call = __ ic_call((address)$meth$$method); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ %} ++ ++ ++ enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{ ++ Register result = $result$$Register; ++ Register sub = $sub$$Register; ++ Register super = $super$$Register; ++ Register length = $tmp$$Register; ++ Register tmp = T4; ++ Label miss; ++ ++ // result may be the same as sub ++ // 47c B40: # B21 B41 <- B20 Freq: 0.155379 ++ // 47c partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0 ++ // 4bc mov S2, NULL #@loadConP ++ // 4c0 beq S1, S2, B21 #@branchConP P=0.999999 C=-1.000000 ++ // ++ MacroAssembler _masm(&cbuf); ++ Label done; ++ __ check_klass_subtype_slow_path(sub, super, length, tmp, ++ NULL, &miss, ++ /*set_cond_codes:*/ true); ++ // Refer to X86_64's RDI ++ __ move(result, 0); ++ __ b(done); ++ ++ __ bind(miss); ++ __ li(result, 1); ++ __ bind(done); ++ %} ++ ++%} ++ ++ ++//---------LOONGARCH FRAME-------------------------------------------------------------- ++// Definition of frame structure and management information. ++// ++// S T A C K L A Y O U T Allocators stack-slot number ++// | (to get allocators register number ++// G Owned by | | v add SharedInfo::stack0) ++// r CALLER | | ++// o | +--------+ pad to even-align allocators stack-slot ++// w V | pad0 | numbers; owned by CALLER ++// t -----------+--------+----> Matcher::_in_arg_limit, unaligned ++// h ^ | in | 5 ++// | | args | 4 Holes in incoming args owned by SELF ++// | | old | | 3 ++// | | SP-+--------+----> Matcher::_old_SP, even aligned ++// v | | ret | 3 return address ++// Owned by +--------+ ++// Self | pad2 | 2 pad to align old SP ++// | +--------+ 1 ++// | | locks | 0 ++// | +--------+----> SharedInfo::stack0, even aligned ++// | | pad1 | 11 pad to align new SP ++// | +--------+ ++// | | | 10 ++// | | spills | 9 spills ++// V | | 8 (pad0 slot for callee) ++// -----------+--------+----> Matcher::_out_arg_limit, unaligned ++// ^ | out | 7 ++// | | args | 6 Holes in outgoing args owned by CALLEE ++// Owned by new | | ++// Callee SP-+--------+----> Matcher::_new_SP, even aligned ++// | | ++// ++// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is ++// known from SELF's arguments and the Java calling convention. ++// Region 6-7 is determined per call site. ++// Note 2: If the calling convention leaves holes in the incoming argument ++// area, those holes are owned by SELF. Holes in the outgoing area ++// are owned by the CALLEE. Holes should not be nessecary in the ++// incoming area, as the Java calling convention is completely under ++// the control of the AD file. Doubles can be sorted and packed to ++// avoid holes. Holes in the outgoing arguments may be nessecary for ++// varargs C calling conventions. ++// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is ++// even aligned with pad0 as needed. ++// Region 6 is even aligned. Region 6-7 is NOT even aligned; ++// region 6-11 is even aligned; it may be padded out more so that ++// the region from SP to FP meets the minimum stack alignment. ++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack ++// alignment. Region 11, pad1, may be dynamically extended so that ++// SP meets the minimum alignment. ++ ++ ++frame %{ ++ ++ stack_direction(TOWARDS_LOW); ++ ++ // These two registers define part of the calling convention ++ // between compiled code and the interpreter. ++ // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention ++ // for more information. ++ ++ inline_cache_reg(T1); // Inline Cache Register ++ interpreter_method_oop_reg(S3); // Method Oop Register when calling interpreter ++ ++ // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] ++ cisc_spilling_operand_name(indOffset32); ++ ++ // Number of stack slots consumed by locking an object ++ // generate Compile::sync_stack_slots ++ sync_stack_slots(2); ++ ++ frame_pointer(SP); ++ ++ // Interpreter stores its frame pointer in a register which is ++ // stored to the stack by I2CAdaptors. ++ // I2CAdaptors convert from interpreted java to compiled java. ++ ++ interpreter_frame_pointer(FP); ++ ++ // generate Matcher::stack_alignment ++ stack_alignment(StackAlignmentInBytes); //wordSize = sizeof(char*); ++ ++ // Number of stack slots between incoming argument block and the start of ++ // a new frame. The PROLOG must add this many slots to the stack. The ++ // EPILOG must remove this many slots. ++ in_preserve_stack_slots(4); //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp ++ ++ // Number of outgoing stack slots killed above the out_preserve_stack_slots ++ // for calls to C. Supports the var-args backing area for register parms. ++ varargs_C_out_slots_killed(0); ++ ++ // The after-PROLOG location of the return address. Location of ++ // return address specifies a type (REG or STACK) and a number ++ // representing the register number (i.e. - use a register name) or ++ // stack slot. ++ // Ret Addr is on stack in slot 0 if no locks or verification or alignment. ++ // Otherwise, it is above the locks and verification slot and alignment word ++ //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong)); ++ return_addr(REG RA); ++ ++ // Body of function which returns an integer array locating ++ // arguments either in registers or in stack slots. Passed an array ++ // of ideal registers called "sig" and a "length" count. Stack-slot ++ // offsets are based on outgoing arguments, i.e. a CALLER setting up ++ // arguments for a CALLEE. Incoming stack arguments are ++ // automatically biased by the preserve_stack_slots field above. ++ ++ ++ // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing) ++ // StartNode::calling_convention call this. ++ calling_convention %{ ++ SharedRuntime::java_calling_convention(sig_bt, regs, length, false); ++ %} ++ ++ ++ ++ ++ // Body of function which returns an integer array locating ++ // arguments either in registers or in stack slots. Passed an array ++ // of ideal registers called "sig" and a "length" count. Stack-slot ++ // offsets are based on outgoing arguments, i.e. a CALLER setting up ++ // arguments for a CALLEE. Incoming stack arguments are ++ // automatically biased by the preserve_stack_slots field above. ++ ++ ++ // SEE CallRuntimeNode::calling_convention for more information. ++ c_calling_convention %{ ++ (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); ++ %} ++ ++ ++ // Location of C & interpreter return values ++ // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR. ++ // SEE Matcher::match. ++ c_return_value %{ ++ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); ++ /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ ++ static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; ++ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num }; ++ return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); ++ %} ++ ++ // Location of return values ++ // register(s) contain(s) return value for Op_StartC2I and Op_Start. ++ // SEE Matcher::match. ++ ++ return_value %{ ++ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); ++ /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ ++ static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; ++ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num}; ++ return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); ++ %} ++ ++%} ++ ++//----------ATTRIBUTES--------------------------------------------------------- ++//----------Operand Attributes------------------------------------------------- ++op_attrib op_cost(0); // Required cost attribute ++ ++//----------Instruction Attributes--------------------------------------------- ++ins_attrib ins_cost(100); // Required cost attribute ++ins_attrib ins_size(32); // Required size attribute (in bits) ++ins_attrib ins_pc_relative(0); // Required PC Relative flag ++ins_attrib ins_short_branch(0); // Required flag: is this instruction a ++ // non-matching short branch variant of some ++ // long branch? ++ins_attrib ins_alignment(4); // Required alignment attribute (must be a power of 2) ++ // specifies the alignment that some part of the instruction (not ++ // necessarily the start) requires. If > 1, a compute_padding() ++ // function must be provided for the instruction ++ ++//----------OPERANDS----------------------------------------------------------- ++// Operand definitions must precede instruction definitions for correct parsing ++// in the ADLC because operands constitute user defined types which are used in ++// instruction definitions. ++ ++// Vectors ++ ++operand vecX() %{ ++ constraint(ALLOC_IN_RC(vectorx_reg)); ++ match(VecX); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand vecY() %{ ++ constraint(ALLOC_IN_RC(vectory_reg)); ++ match(VecY); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Flags register, used as output of compare instructions ++operand FlagsReg() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegFlags); ++ ++ format %{ "T0" %} ++ interface(REG_INTER); ++%} ++ ++//----------Simple Operands---------------------------------------------------- ++// TODO: Should we need to define some more special immediate number ? ++// Immediate Operands ++// Integer Immediate ++operand immI() %{ ++ match(ConI); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU1() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 1)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU2() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 3)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU3() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 7)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU4() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 15)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU5() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 31)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU6() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 63)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIU8() %{ ++ predicate((0 <= n->get_int()) && (n->get_int() <= 255)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI10() %{ ++ predicate((-512 <= n->get_int()) && (n->get_int() <= 511)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI12() %{ ++ predicate((-2048 <= n->get_int()) && (n->get_int() <= 2047)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M65536() %{ ++ predicate(n->get_int() == -65536); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for decrement ++operand immI_M1() %{ ++ predicate(n->get_int() == -1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for zero ++operand immI_0() %{ ++ predicate(n->get_int() == 0); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_1() %{ ++ predicate(n->get_int() == 1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_2() %{ ++ predicate(n->get_int() == 2); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_16() %{ ++ predicate(n->get_int() == 16); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_24() %{ ++ predicate(n->get_int() == 24); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for long shifts ++operand immI_32() %{ ++ predicate(n->get_int() == 32); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for byte-wide masking ++operand immI_255() %{ ++ predicate(n->get_int() == 255); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_65535() %{ ++ predicate(n->get_int() == 65535); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_MaxI() %{ ++ predicate(n->get_int() == 2147483647); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M2047_2048() %{ ++ predicate((-2047 <= n->get_int()) && (n->get_int() <= 2048)); ++ match(ConI); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Valid scale values for addressing modes ++operand immI_0_3() %{ ++ predicate(0 <= n->get_int() && (n->get_int() <= 3)); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_31() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 31); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_4095() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 4095); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_1_4() %{ ++ predicate(1 <= n->get_int() && (n->get_int() <= 4)); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_32_63() %{ ++ predicate(n->get_int() >= 32 && n->get_int() <= 63); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M128_255() %{ ++ predicate((-128 <= n->get_int()) && (n->get_int() <= 255)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Operand for non-negtive integer mask ++operand immI_nonneg_mask() %{ ++ predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1)); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate ++operand immL() %{ ++ match(ConL); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immLU5() %{ ++ predicate((0 <= n->get_long()) && (n->get_long() <= 31)); ++ match(ConL); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL10() %{ ++ predicate((-512 <= n->get_long()) && (n->get_long() <= 511)); ++ match(ConL); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL12() %{ ++ predicate((-2048 <= n->get_long()) && (n->get_long() <= 2047)); ++ match(ConL); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate 32-bit signed ++operand immL32() ++%{ ++ predicate(n->get_long() == (int)n->get_long()); ++ match(ConL); ++ ++ op_cost(15); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 3..6 zero ++operand immL_M121() %{ ++ predicate(n->get_long() == -121L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 0..2 zero ++operand immL_M8() %{ ++ predicate(n->get_long() == -8L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 1..2 zero ++operand immL_M7() %{ ++ predicate(n->get_long() == -7L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 2 zero ++operand immL_M5() %{ ++ predicate(n->get_long() == -5L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 0..1 zero ++operand immL_M4() %{ ++ predicate(n->get_long() == -4L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate zero ++operand immL_0() %{ ++ predicate(n->get_long() == 0L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_7() %{ ++ predicate(n->get_long() == 7L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_MaxUI() %{ ++ predicate(n->get_long() == 0xFFFFFFFFL); ++ match(ConL); ++ op_cost(20); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_M2047_2048() %{ ++ predicate((-2047 <= n->get_long()) && (n->get_long() <= 2048)); ++ match(ConL); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_0_4095() %{ ++ predicate(n->get_long() >= 0 && n->get_long() <= 4095); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Operand for non-negtive long mask ++operand immL_nonneg_mask() %{ ++ predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1)); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immP() %{ ++ match(ConP); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// NULL Pointer Immediate ++operand immP_0() %{ ++ predicate(n->get_ptr() == 0); ++ match(ConP); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immP_no_oop_cheap() %{ ++ predicate(!n->bottom_type()->isa_oop_ptr()); ++ match(ConP); ++ ++ op_cost(5); ++ // formats are generated automatically for constants and base registers ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer for polling page ++operand immP_poll() %{ ++ predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page()); ++ match(ConP); ++ op_cost(5); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immN() %{ ++ match(ConN); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// NULL Pointer Immediate ++operand immN_0() %{ ++ predicate(n->get_narrowcon() == 0); ++ match(ConN); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immNKlass() %{ ++ match(ConNKlass); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Single-precision floating-point immediate ++operand immF() %{ ++ match(ConF); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Single-precision floating-point zero ++operand immF_0() %{ ++ predicate(jint_cast(n->getf()) == 0); ++ match(ConF); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Double-precision floating-point immediate ++operand immD() %{ ++ match(ConD); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Double-precision floating-point zero ++operand immD_0() %{ ++ predicate(jlong_cast(n->getd()) == 0); ++ match(ConD); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Register Operands ++// Integer Register ++operand mRegI() %{ ++ constraint(ALLOC_IN_RC(int_reg)); ++ match(RegI); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_Ax_mRegI() %{ ++ constraint(ALLOC_IN_RC(no_Ax_int_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mS0RegI() %{ ++ constraint(ALLOC_IN_RC(s0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S0" %} ++ interface(REG_INTER); ++%} ++ ++operand mS1RegI() %{ ++ constraint(ALLOC_IN_RC(s1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S1" %} ++ interface(REG_INTER); ++%} ++ ++operand mS3RegI() %{ ++ constraint(ALLOC_IN_RC(s3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S3" %} ++ interface(REG_INTER); ++%} ++ ++operand mS4RegI() %{ ++ constraint(ALLOC_IN_RC(s4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S4" %} ++ interface(REG_INTER); ++%} ++ ++operand mS5RegI() %{ ++ constraint(ALLOC_IN_RC(s5_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S5" %} ++ interface(REG_INTER); ++%} ++ ++operand mS6RegI() %{ ++ constraint(ALLOC_IN_RC(s6_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S6" %} ++ interface(REG_INTER); ++%} ++ ++operand mS7RegI() %{ ++ constraint(ALLOC_IN_RC(s7_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S7" %} ++ interface(REG_INTER); ++%} ++ ++ ++operand mT0RegI() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T0" %} ++ interface(REG_INTER); ++%} ++ ++operand mT1RegI() %{ ++ constraint(ALLOC_IN_RC(t1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T1" %} ++ interface(REG_INTER); ++%} ++ ++operand mT2RegI() %{ ++ constraint(ALLOC_IN_RC(t2_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T2" %} ++ interface(REG_INTER); ++%} ++ ++operand mT3RegI() %{ ++ constraint(ALLOC_IN_RC(t3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T3" %} ++ interface(REG_INTER); ++%} ++ ++operand mT8RegI() %{ ++ constraint(ALLOC_IN_RC(t8_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T8" %} ++ interface(REG_INTER); ++%} ++ ++operand mT4RegI() %{ ++ constraint(ALLOC_IN_RC(t4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T4" %} ++ interface(REG_INTER); ++%} ++ ++operand mA0RegI() %{ ++ constraint(ALLOC_IN_RC(a0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A0" %} ++ interface(REG_INTER); ++%} ++ ++operand mA1RegI() %{ ++ constraint(ALLOC_IN_RC(a1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A1" %} ++ interface(REG_INTER); ++%} ++ ++operand mA2RegI() %{ ++ constraint(ALLOC_IN_RC(a2_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A2" %} ++ interface(REG_INTER); ++%} ++ ++operand mA3RegI() %{ ++ constraint(ALLOC_IN_RC(a3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A3" %} ++ interface(REG_INTER); ++%} ++ ++operand mA4RegI() %{ ++ constraint(ALLOC_IN_RC(a4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A4" %} ++ interface(REG_INTER); ++%} ++ ++operand mA5RegI() %{ ++ constraint(ALLOC_IN_RC(a5_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A5" %} ++ interface(REG_INTER); ++%} ++ ++operand mA6RegI() %{ ++ constraint(ALLOC_IN_RC(a6_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A6" %} ++ interface(REG_INTER); ++%} ++ ++operand mA7RegI() %{ ++ constraint(ALLOC_IN_RC(a7_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A7" %} ++ interface(REG_INTER); ++%} ++ ++operand mRegN() %{ ++ constraint(ALLOC_IN_RC(int_reg)); ++ match(RegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0_RegN() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1_RegN() %{ ++ constraint(ALLOC_IN_RC(t1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3_RegN() %{ ++ constraint(ALLOC_IN_RC(t3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8_RegN() %{ ++ constraint(ALLOC_IN_RC(t8_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0_RegN() %{ ++ constraint(ALLOC_IN_RC(a0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a1_RegN() %{ ++ constraint(ALLOC_IN_RC(a1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2_RegN() %{ ++ constraint(ALLOC_IN_RC(a2_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3_RegN() %{ ++ constraint(ALLOC_IN_RC(a3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4_RegN() %{ ++ constraint(ALLOC_IN_RC(a4_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a5_RegN() %{ ++ constraint(ALLOC_IN_RC(a5_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6_RegN() %{ ++ constraint(ALLOC_IN_RC(a6_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7_RegN() %{ ++ constraint(ALLOC_IN_RC(a7_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s0_RegN() %{ ++ constraint(ALLOC_IN_RC(s0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1_RegN() %{ ++ constraint(ALLOC_IN_RC(s1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s2_RegN() %{ ++ constraint(ALLOC_IN_RC(s2_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3_RegN() %{ ++ constraint(ALLOC_IN_RC(s3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4_RegN() %{ ++ constraint(ALLOC_IN_RC(s4_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s5_RegN() %{ ++ constraint(ALLOC_IN_RC(s5_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s6_RegN() %{ ++ constraint(ALLOC_IN_RC(s6_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7_RegN() %{ ++ constraint(ALLOC_IN_RC(s7_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Pointer Register ++operand mRegP() %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(RegP); ++ match(a0_RegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_T8_mRegP() %{ ++ constraint(ALLOC_IN_RC(no_T8_p_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_Ax_mRegP() %{ ++ constraint(ALLOC_IN_RC(no_Ax_p_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s4_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s5_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s5_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s6_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s6_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s7_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t2_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t2_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t8_long_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a2_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a4_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++ ++operand a5_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a5_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a6_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a7_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(v0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(v1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mRegL() %{ ++ constraint(ALLOC_IN_RC(long_reg)); ++ match(RegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mRegI2L(mRegI reg) %{ ++ match(ConvI2L reg); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0RegL() %{ ++ constraint(ALLOC_IN_RC(v0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1RegL() %{ ++ constraint(ALLOC_IN_RC(v1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0RegL() %{ ++ constraint(ALLOC_IN_RC(a0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ "A0" %} ++ interface(REG_INTER); ++%} ++ ++operand a1RegL() %{ ++ constraint(ALLOC_IN_RC(a1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2RegL() %{ ++ constraint(ALLOC_IN_RC(a2_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3RegL() %{ ++ constraint(ALLOC_IN_RC(a3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0RegL() %{ ++ constraint(ALLOC_IN_RC(t0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1RegL() %{ ++ constraint(ALLOC_IN_RC(t1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3RegL() %{ ++ constraint(ALLOC_IN_RC(t3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8RegL() %{ ++ constraint(ALLOC_IN_RC(t8_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4RegL() %{ ++ constraint(ALLOC_IN_RC(a4_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a5RegL() %{ ++ constraint(ALLOC_IN_RC(a5_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6RegL() %{ ++ constraint(ALLOC_IN_RC(a6_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7RegL() %{ ++ constraint(ALLOC_IN_RC(a7_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s0RegL() %{ ++ constraint(ALLOC_IN_RC(s0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1RegL() %{ ++ constraint(ALLOC_IN_RC(s1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3RegL() %{ ++ constraint(ALLOC_IN_RC(s3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4RegL() %{ ++ constraint(ALLOC_IN_RC(s4_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7RegL() %{ ++ constraint(ALLOC_IN_RC(s7_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Floating register operands ++operand regF() %{ ++ constraint(ALLOC_IN_RC(flt_reg)); ++ match(RegF); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//Double Precision Floating register operands ++operand regD() %{ ++ constraint(ALLOC_IN_RC(dbl_reg)); ++ match(RegD); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//----------Memory Operands---------------------------------------------------- ++// Indirect Memory Operand ++operand indirect(mRegP reg) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(reg); ++ ++ format %{ "[$reg] @ indirect" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Plus Short Offset Operand ++operand indOffset12(mRegP reg, immL12 off) ++%{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP reg off); ++ ++ op_cost(10); ++ format %{ "[$reg + $off (12-bit)] @ indOffset12" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indOffset12I2L(mRegP reg, immI12 off) ++%{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP reg (ConvI2L off)); ++ ++ op_cost(10); ++ format %{ "[$reg + $off (12-bit)] @ indOffset12I2L" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// Indirect Memory Plus Index Register ++operand indIndex(mRegP addr, mRegL index) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP addr index); ++ ++ op_cost(20); ++ format %{"[$addr + $index] @ indIndex" %} ++ interface(MEMORY_INTER) %{ ++ base($addr); ++ index($index); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++operand indIndexI2L(mRegP reg, mRegI ireg) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg (ConvI2L ireg)); ++ op_cost(10); ++ format %{ "[$reg + $ireg] @ indIndexI2L" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($ireg); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Operand ++operand indirectNarrow(mRegN reg) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(DecodeN reg); ++ ++ format %{ "[$reg] @ indirectNarrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Plus Short Offset Operand ++operand indOffset12Narrow(mRegN reg, immL12 off) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(AddP (DecodeN reg) off); ++ ++ format %{ "[$reg + $off (12-bit)] @ indOffset12Narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++//----------Conditional Branch Operands---------------------------------------- ++// Comparison Op - This is the operation of the comparison, and is limited to ++// the following set of codes: ++// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) ++// ++// Other attributes of the comparison, such as unsignedness, are specified ++// by the comparison instruction that sets a condition code flags register. ++// That result is represented by a flags operand whose subtype is appropriate ++// to the unsignedness (etc.) of the comparison. ++// ++// Later, the instruction which matches both the Comparison Op (a Bool) and ++// the flags (produced by the Cmp) specifies the coding of the comparison op ++// by matching a specific subtype of Bool operand below, such as cmpOpU. ++ ++// Comparision Code ++operand cmpOp() %{ ++ match(Bool); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x01); ++ not_equal(0x02); ++ greater(0x03); ++ greater_equal(0x04); ++ less(0x05); ++ less_equal(0x06); ++ overflow(0x7); ++ no_overflow(0x8); ++ %} ++%} ++ ++ ++// Comparision Code ++// Comparison Code, unsigned compare. Used by FP also, with ++// C2 (unordered) turned into GT or LT already. The other bits ++// C0 and C3 are turned into Carry & Zero flags. ++operand cmpOpU() %{ ++ match(Bool); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x01); ++ not_equal(0x02); ++ greater(0x03); ++ greater_equal(0x04); ++ less(0x05); ++ less_equal(0x06); ++ overflow(0x7); ++ no_overflow(0x8); ++ %} ++%} ++ ++ ++//----------Special Memory Operands-------------------------------------------- ++// Stack Slot Operand - This operand is used for loading and storing temporary ++// values on the stack where a match requires a value to ++// flow through memory. ++operand stackSlotP(sRegP reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotI(sRegI reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotF(sRegF reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotD(sRegD reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotL(sRegL reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++ ++//------------------------OPERAND CLASSES-------------------------------------- ++opclass memory( indirect, indOffset12, indOffset12I2L, indIndex, indIndexI2L, ++ indirectNarrow, indOffset12Narrow); ++opclass memory_loadRange(indOffset12, indirect); ++ ++opclass mRegLorI2L(mRegI2L, mRegL); ++//----------PIPELINE----------------------------------------------------------- ++// Rules which define the behavior of the target architectures pipeline. ++ ++pipeline %{ ++ ++ //----------ATTRIBUTES--------------------------------------------------------- ++ attributes %{ ++ fixed_size_instructions; // Fixed size instructions ++ max_instructions_per_bundle = 1; // 1 instruction per bundle ++ max_bundles_per_cycle = 4; // Up to 4 bundles per cycle ++ bundle_unit_size=4; ++ instruction_unit_size = 4; // An instruction is 4 bytes long ++ instruction_fetch_unit_size = 16; // The processor fetches one line ++ instruction_fetch_units = 1; // of 16 bytes ++ ++ // List of nop instructions ++ nops( MachNop ); ++ %} ++ ++ //----------RESOURCES---------------------------------------------------------- ++ // Resources are the functional units available to the machine ++ ++ resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4, ALU1, ALU2, ALU = ALU1 | ALU2, FPU1, FPU2, FPU = FPU1 | FPU2, MEM, BR); ++ ++ //----------PIPELINE DESCRIPTION----------------------------------------------- ++ // Pipeline Description specifies the stages in the machine's pipeline ++ ++ // IF: fetch ++ // ID: decode ++ // RD: read ++ // CA: caculate ++ // WB: write back ++ // CM: commit ++ ++ pipe_desc(IF, ID, RD, CA, WB, CM); ++ ++ ++ //----------PIPELINE CLASSES--------------------------------------------------- ++ // Pipeline Classes describe the stages in which input and output are ++ // referenced by the hardware pipeline. ++ ++ //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{ ++ single_instruction; ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+1; ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.19 Integer mult operation : dst <-- reg1 mult reg2 ++ pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+5; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.19 Integer div operation : dst <-- reg1 div reg2 ++ pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.19 Integer mod operation : dst <-- reg1 mod reg2 ++ pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{ ++ instruction_count(2); ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{ ++ instruction_count(2); ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16 ++ pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{ ++ instruction_count(2); ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //no.16 load Long from memory : ++ pipe_class ialu_loadL(mRegL dst, memory mem) %{ ++ instruction_count(2); ++ mem : RD(read); ++ dst : WB(write)+5; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.17 Store Long to Memory : ++ pipe_class ialu_storeL(mRegL src, memory mem) %{ ++ instruction_count(2); ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16 ++ pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{ ++ single_instruction; ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.3 Integer move operation : dst <-- reg ++ pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.4 No instructions : do nothing ++ pipe_class empty( ) %{ ++ instruction_count(0); ++ %} ++ ++ //No.5 UnConditional branch : ++ pipe_class pipe_jump( label labl ) %{ ++ multiple_bundles; ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++ //No.6 ALU Conditional branch : ++ pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++ //no.7 load integer from memory : ++ pipe_class ialu_loadI(mRegI dst, memory mem) %{ ++ mem : RD(read); ++ dst : WB(write)+3; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.8 Store Integer to Memory : ++ pipe_class ialu_storeI(mRegI src, memory mem) %{ ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ ++ //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU : CA; ++ %} ++ ++ //No.22 Floating div operation : dst <-- reg1 div reg2 ++ pipe_class fpu_div(regF dst, regF src1, regF src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU2 : CA; ++ %} ++ ++ pipe_class fcvt_I2D(regD dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU1 : CA; ++ %} ++ ++ pipe_class fcvt_D2I(mRegI dst, regD src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU1 : CA; ++ %} ++ ++ pipe_class pipe_mfc1(mRegI dst, regD src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ pipe_class pipe_mtc1(regD dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ MEM : RD(5); ++ %} ++ ++ //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2 ++ pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU2 : CA; ++ %} ++ ++ //No.11 Load Floating from Memory : ++ pipe_class fpu_loadF(regF dst, memory mem) %{ ++ instruction_count(1); ++ mem : RD(read); ++ dst : WB(write)+3; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.12 Store Floating to Memory : ++ pipe_class fpu_storeF(regF src, memory mem) %{ ++ instruction_count(1); ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.13 FPU Conditional branch : ++ pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++//No.14 Floating FPU reg operation : dst <-- op reg ++ pipe_class fpu1_regF(regF dst, regF src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU : CA; ++ %} ++ ++ pipe_class long_memory_op() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(30); ++ %} ++ ++ pipe_class simple_call() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(200); ++ BR : RD; ++ %} ++ ++ pipe_class call() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(200); ++ %} ++ ++ //FIXME: ++ //No.9 Piple slow : for multi-instructions ++ pipe_class pipe_slow( ) %{ ++ instruction_count(20); ++ force_serialization; ++ multiple_bundles; ++ fixed_latency(50); ++ %} ++ ++%} ++ ++ ++ ++//----------INSTRUCTIONS------------------------------------------------------- ++// ++// match -- States which machine-independent subtree may be replaced ++// by this instruction. ++// ins_cost -- The estimated cost of this instruction is used by instruction ++// selection to identify a minimum cost tree of machine ++// instructions that matches a tree of machine-independent ++// instructions. ++// format -- A string providing the disassembly for this instruction. ++// The value of an instruction's operand may be inserted ++// by referring to it with a '$' prefix. ++// opcode -- Three instruction opcodes may be provided. These are referred ++// to within an encode class as $primary, $secondary, and $tertiary ++// respectively. The primary opcode is commonly used to ++// indicate the type of machine instruction, while secondary ++// and tertiary are often used for prefix options or addressing ++// modes. ++// ins_encode -- A list of encode classes with parameters. The encode class ++// name must have been defined in an 'enc_class' specification ++// in the encode section of the architecture description. ++ ++ ++// Load Integer ++instruct loadI(mRegI dst, memory mem) %{ ++ match(Set dst (LoadI mem)); ++ ++ ins_cost(125); ++ format %{ "ld_w $dst, $mem #@loadI" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadI_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadI mem))); ++ ++ ins_cost(125); ++ format %{ "ld_w $dst, $mem #@loadI_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Integer (32 bit signed) to Byte (8 bit signed) ++instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{ ++ match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); ++ ++ ins_cost(125); ++ format %{ "ld_b $dst, $mem\t# int -> byte #@loadI2B" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) ++instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI (LoadI mem) mask)); ++ ++ ins_cost(125); ++ format %{ "ld_bu $dst, $mem\t# int -> ubyte #@loadI2UB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Short (16 bit signed) ++instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{ ++ match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); ++ ++ ins_cost(125); ++ format %{ "ld_h $dst, $mem\t# int -> short #@loadI2S" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) ++instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{ ++ match(Set dst (AndI (LoadI mem) mask)); ++ ++ ins_cost(125); ++ format %{ "ld_hu $dst, $mem\t# int -> ushort/char #@loadI2US" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Long. ++instruct loadL(mRegL dst, memory mem) %{ ++// predicate(!((LoadLNode*)n)->require_atomic_access()); ++ match(Set dst (LoadL mem)); ++ ++ ins_cost(250); ++ format %{ "ld_d $dst, $mem #@loadL" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadL ); ++%} ++ ++// Load Long - UNaligned ++instruct loadL_unaligned(mRegL dst, memory mem) %{ ++ match(Set dst (LoadL_unaligned mem)); ++ ++ // FIXME: Need more effective ldl/ldr ++ ins_cost(450); ++ format %{ "ld_d $dst, $mem #@loadL_unaligned\n\t" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadL ); ++%} ++ ++// Store Long ++instruct storeL_reg(memory mem, mRegL src) %{ ++ match(Set mem (StoreL mem src)); ++ ++ ins_cost(200); ++ format %{ "st_d $mem, $src #@storeL_reg\n" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++instruct storeL_immL_0(memory mem, immL_0 zero) %{ ++ match(Set mem (StoreL mem zero)); ++ ++ ins_cost(180); ++ format %{ "st_d zero, $mem #@storeL_immL_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++// Load Compressed Pointer ++instruct loadN(mRegN dst, memory mem) ++%{ ++ match(Set dst (LoadN mem)); ++ ++ ins_cost(125); // XXX ++ format %{ "ld_wu $dst, $mem\t# compressed ptr @ loadN" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++instruct loadN2P(mRegP dst, memory mem) ++%{ ++ match(Set dst (DecodeN (LoadN mem))); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "ld_wu $dst, $mem\t# @ loadN2P" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++// Load Pointer ++instruct loadP(mRegP dst, memory mem) %{ ++ match(Set dst (LoadP mem)); ++ ++ ins_cost(125); ++ format %{ "ld_d $dst, $mem #@loadP" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Klass Pointer ++instruct loadKlass(mRegP dst, memory mem) %{ ++ match(Set dst (LoadKlass mem)); ++ ++ ins_cost(125); ++ format %{ "MOV $dst,$mem @ loadKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load narrow Klass Pointer ++instruct loadNKlass(mRegN dst, memory mem) ++%{ ++ match(Set dst (LoadNKlass mem)); ++ ++ ins_cost(125); // XXX ++ format %{ "ld_wu $dst, $mem\t# compressed klass ptr @ loadNKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++instruct loadN2PKlass(mRegP dst, memory mem) ++%{ ++ match(Set dst (DecodeNKlass (LoadNKlass mem))); ++ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "ld_wu $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++// Load Constant ++instruct loadConI(mRegI dst, immI src) %{ ++ match(Set dst src); ++ ++ ins_cost(120); ++ format %{ "mov $dst, $src #@loadConI" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ int value = $src$$constant; ++ __ li(dst, value); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct loadConL(mRegL dst, immL src) %{ ++ match(Set dst src); ++ ins_cost(120); ++ format %{ "li $dst, $src @ loadConL" %} ++ ins_encode %{ ++ __ li($dst$$Register, $src$$constant); ++ %} ++ ins_pipe(ialu_regL_regL); ++%} ++ ++// Load Range ++instruct loadRange(mRegI dst, memory_loadRange mem) %{ ++ match(Set dst (LoadRange mem)); ++ ++ ins_cost(125); ++ format %{ "MOV $dst,$mem @ loadRange" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct storeP(memory mem, mRegP src ) %{ ++ match(Set mem (StoreP mem src)); ++ ++ ins_cost(125); ++ format %{ "st_d $src, $mem #@storeP" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store NULL Pointer, mark word, or other simple pointer constant. ++instruct storeImmP_immP_0(memory mem, immP_0 zero) %{ ++ match(Set mem (StoreP mem zero)); ++ ++ ins_cost(125); ++ format %{ "mov $mem, $zero #@storeImmP_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Compressed Pointer ++instruct storeN(memory mem, mRegN src) ++%{ ++ match(Set mem (StoreN mem src)); ++ ++ ins_cost(125); // XXX ++ format %{ "st_w $mem, $src\t# compressed ptr @ storeN" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2N(memory mem, mRegP src) ++%{ ++ match(Set mem (StoreN mem (EncodeP src))); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "st_w $mem, $src\t# @ storeP2N" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeNKlass(memory mem, mRegN src) ++%{ ++ match(Set mem (StoreNKlass mem src)); ++ ++ ins_cost(125); // XXX ++ format %{ "st_w $mem, $src\t# compressed klass ptr @ storeNKlass" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2NKlass(memory mem, mRegP src) ++%{ ++ match(Set mem (StoreNKlass mem (EncodePKlass src))); ++ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "st_w $mem, $src\t# @ storeP2NKlass" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeImmN_immN_0(memory mem, immN_0 zero) ++%{ ++ match(Set mem (StoreN mem zero)); ++ ++ ins_cost(125); // XXX ++ format %{ "storeN0 zero, $mem\t# compressed ptr" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Byte ++instruct storeB_immB_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreB mem zero)); ++ ++ format %{ "mov $mem, zero #@storeB_immB_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeB(memory mem, mRegI src) %{ ++ match(Set mem (StoreB mem src)); ++ ++ ins_cost(125); ++ format %{ "st_b $src, $mem #@storeB" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeB_convL2I(memory mem, mRegL src) %{ ++ match(Set mem (StoreB mem (ConvL2I src))); ++ ++ ins_cost(125); ++ format %{ "st_b $src, $mem #@storeB_convL2I" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Byte (8bit signed) ++instruct loadB(mRegI dst, memory mem) %{ ++ match(Set dst (LoadB mem)); ++ ++ ins_cost(125); ++ format %{ "ld_b $dst, $mem #@loadB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadB_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadB mem))); ++ ++ ins_cost(125); ++ format %{ "ld_b $dst, $mem #@loadB_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Byte (8bit UNsigned) ++instruct loadUB(mRegI dst, memory mem) %{ ++ match(Set dst (LoadUB mem)); ++ ++ ins_cost(125); ++ format %{ "ld_bu $dst, $mem #@loadUB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadUB_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadUB mem))); ++ ++ ins_cost(125); ++ format %{ "ld_bu $dst, $mem #@loadUB_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Short (16bit signed) ++instruct loadS(mRegI dst, memory mem) %{ ++ match(Set dst (LoadS mem)); ++ ++ ins_cost(125); ++ format %{ "ld_h $dst, $mem #@loadS" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Short (16 bit signed) to Byte (8 bit signed) ++instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{ ++ match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); ++ ++ ins_cost(125); ++ format %{ "ld_b $dst, $mem\t# short -> byte #@loadS2B" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct loadS_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadS mem))); ++ ++ ins_cost(125); ++ format %{ "ld_h $dst, $mem #@loadS_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Store Integer Immediate ++instruct storeI_immI_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreI mem zero)); ++ ++ format %{ "mov $mem, zero #@storeI_immI_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Integer ++instruct storeI(memory mem, mRegI src) %{ ++ match(Set mem (StoreI mem src)); ++ ++ ins_cost(125); ++ format %{ "st_w $mem, $src #@storeI" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeI_convL2I(memory mem, mRegL src) %{ ++ match(Set mem (StoreI mem (ConvL2I src))); ++ ++ ins_cost(125); ++ format %{ "st_w $mem, $src #@storeI_convL2I" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Float ++instruct loadF(regF dst, memory mem) %{ ++ match(Set dst (LoadF mem)); ++ ++ ins_cost(150); ++ format %{ "loadF $dst, $mem #@loadF" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_FLOAT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadConP_general(mRegP dst, immP src) %{ ++ match(Set dst src); ++ ++ ins_cost(120); ++ format %{ "li $dst, $src #@loadConP_general" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ long* value = (long*)$src$$constant; ++ ++ if($src->constant_reloc() == relocInfo::metadata_type){ ++ int klass_index = __ oop_recorder()->find_index((Klass*)value); ++ RelocationHolder rspec = metadata_Relocation::spec(klass_index); ++ ++ __ relocate(rspec); ++ __ patchable_li52(dst, (long)value); ++ } else if($src->constant_reloc() == relocInfo::oop_type){ ++ int oop_index = __ oop_recorder()->find_index((jobject)value); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ ++ __ relocate(rspec); ++ __ patchable_li52(dst, (long)value); ++ } else if ($src->constant_reloc() == relocInfo::none) { ++ __ li(dst, (long)value); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{ ++ match(Set dst src); ++ ++ ins_cost(80); ++ format %{ "li $dst, $src @ loadConP_no_oop_cheap" %} ++ ++ ins_encode %{ ++ if ($src->constant_reloc() == relocInfo::metadata_type) { ++ __ mov_metadata($dst$$Register, (Metadata*)$src$$constant); ++ } else { ++ __ li($dst$$Register, $src$$constant); ++ } ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++ ++instruct loadConP_poll(mRegP dst, immP_poll src) %{ ++ match(Set dst src); ++ ++ ins_cost(50); ++ format %{ "li $dst, $src #@loadConP_poll" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ intptr_t value = (intptr_t)$src$$constant; ++ ++ __ li(dst, (jlong)value); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConP_immP_0(mRegP dst, immP_0 src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(50); ++ format %{ "mov $dst, R0\t# ptr" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ __ add_d(dst_reg, R0, R0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConN_immN_0(mRegN dst, immN_0 src) %{ ++ match(Set dst src); ++ format %{ "move $dst, R0\t# compressed NULL ptr" %} ++ ins_encode %{ ++ __ move($dst$$Register, R0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConN(mRegN dst, immN src) %{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "li $dst, $src\t# compressed ptr @ loadConN" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ __ set_narrow_oop(dst, (jobject)$src$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); // XXX ++%} ++ ++instruct loadConNKlass(mRegN dst, immNKlass src) %{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "li $dst, $src\t# compressed klass ptr @ loadConNKlass" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ __ set_narrow_klass(dst, (Klass*)$src$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); // XXX ++%} ++ ++//FIXME ++// Tail Call; Jump from runtime stub to Java code. ++// Also known as an 'interprocedural jump'. ++// Target of jump will eventually return to caller. ++// TailJump below removes the return address. ++instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{ ++ match(TailCall jump_target method_oop ); ++ ins_cost(300); ++ format %{ "JMP $jump_target \t# @TailCalljmpInd" %} ++ ++ ins_encode %{ ++ Register target = $jump_target$$Register; ++ Register oop = $method_oop$$Register; ++ ++ // RA will be used in generate_forward_exception() ++ __ push(RA); ++ ++ __ move(S3, oop); ++ __ jr(target); ++ %} ++ ++ ins_pipe( pipe_jump ); ++%} ++ ++// Create exception oop: created by stack-crawling runtime code. ++// Created exception is now available to this handler, and is setup ++// just prior to jumping to this handler. No code emitted. ++instruct CreateException( a0_RegP ex_oop ) ++%{ ++ match(Set ex_oop (CreateEx)); ++ ++ // use the following format syntax ++ format %{ "# exception oop is in A0; no code emitted @CreateException" %} ++ ins_encode %{ ++ // X86 leaves this function empty ++ __ block_comment("CreateException is empty in LA"); ++ %} ++ ins_pipe( empty ); ++// ins_pipe( pipe_jump ); ++%} ++ ++ ++/* The mechanism of exception handling is clear now. ++ ++- Common try/catch: ++ [stubGenerator_loongarch.cpp] generate_forward_exception() ++ |- V0, V1 are created ++ |- T4 <= SharedRuntime::exception_handler_for_return_address ++ `- jr T4 ++ `- the caller's exception_handler ++ `- jr OptoRuntime::exception_blob ++ `- here ++- Rethrow(e.g. 'unwind'): ++ * The callee: ++ |- an exception is triggered during execution ++ `- exits the callee method through RethrowException node ++ |- The callee pushes exception_oop(T0) and exception_pc(RA) ++ `- The callee jumps to OptoRuntime::rethrow_stub() ++ * In OptoRuntime::rethrow_stub: ++ |- The VM calls _rethrow_Java to determine the return address in the caller method ++ `- exits the stub with tailjmpInd ++ |- pops exception_oop(V0) and exception_pc(V1) ++ `- jumps to the return address(usually an exception_handler) ++ * The caller: ++ `- continues processing the exception_blob with V0/V1 ++*/ ++ ++// Rethrow exception: ++// The exception oop will come in the first argument position. ++// Then JUMP (not call) to the rethrow stub code. ++instruct RethrowException() ++%{ ++ match(Rethrow); ++ ++ // use the following format syntax ++ format %{ "JMP rethrow_stub #@RethrowException" %} ++ ins_encode %{ ++ __ block_comment("@ RethrowException"); ++ ++ cbuf.set_insts_mark(); ++ cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec()); ++ ++ // call OptoRuntime::rethrow_stub to get the exception handler in parent method ++ __ patchable_jump((address)OptoRuntime::rethrow_stub()); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++// ============================================================================ ++// Branch Instructions --- long offset versions ++ ++// Jump Direct ++instruct jmpDir_long(label labl) %{ ++ match(Goto); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "JMP $labl #@jmpDir_long" %} ++ ++ ins_encode %{ ++ Label* L = $labl$$label; ++ __ jmp_far(*L); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ //ins_pc_relative(1); ++%} ++ ++// Jump Direct Conditional - Label defines a relative address from Jcc+1 ++instruct jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_long" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cop$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ blt_long(op2, op1, *L, true /* signed */); ++ break; ++ case 0x04: //above_equal ++ __ bge_long(op1, op2, *L, true /* signed */); ++ break; ++ case 0x05: //below ++ __ blt_long(op1, op2, *L, true /* signed */); ++ break; ++ case 0x06: //below_equal ++ __ bge_long(op2, op1, *L, true /* signed */); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++instruct jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = AT; ++ Label* L = $labl$$label; ++ int flag = $cop$$cmpcode; ++ ++ __ li(op2, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ blt_long(op2, op1, *L, true /* signed */); ++ break; ++ case 0x04: //above_equal ++ __ bge_long(op1, op2, *L, true /* signed */); ++ break; ++ case 0x05: //below ++ __ blt_long(op1, op2, *L, true /* signed */); ++ break; ++ case 0x06: //below_equal ++ __ bge_long(op2, op1, *L, true /* signed */); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++ ++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! ++instruct jmpCon_flags_long(cmpOp cop, FlagsReg cr, label labl) %{ ++ match(If cop cr); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $labl #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_long" %} ++ ++ ins_encode %{ ++ Label* L = $labl$$label; ++ switch($cop$$cmpcode) { ++ case 0x01: //equal ++ __ bne_long($cr$$Register, R0, *L); ++ break; ++ case 0x02: //not equal ++ __ beq_long($cr$$Register, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++// Conditional jumps ++instruct branchConP_0_long(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConP_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConN2P_0_long(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConN2P_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) ++ { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConP_long(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{ ++ match(If cmp (CmpP op1 op2)); ++// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); ++ effect(USE labl); ++ ++ ins_cost(200); ++ format %{ "b$cmp $op1, $op2, $labl #@branchConP_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ blt_long(op2, op1, *L, false /* unsigned */); ++ break; ++ case 0x04: //above_equal ++ __ bge_long(op1, op2, *L, false /* unsigned */); ++ break; ++ case 0x05: //below ++ __ blt_long(op1, op2, *L, false /* unsigned */); ++ break; ++ case 0x06: //below_equal ++ __ bge_long(op2, op1, *L, false /* unsigned */); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct cmpN_null_branch_long(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ ++ match(If cmp (CmpN op1 null)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,0\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_null_branch_long" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++//TODO: pipe_branchP or create pipe_branchN LEE ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ ++ match(If cmp (CmpN op1 op2)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,$op2\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_reg_branch_long" %} ++ ins_encode %{ ++ Register op1_reg = $op1$$Register; ++ Register op2_reg = $op2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1_reg, op2_reg, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1_reg, op2_reg, *L); ++ break; ++ case 0x03: //above ++ __ blt_long(op2_reg, op1_reg, *L, false /* unsigned */); ++ break; ++ case 0x04: //above_equal ++ __ bge_long(op1_reg, op2_reg, *L, false /* unsigned */); ++ break; ++ case 0x05: //below ++ __ blt_long(op1_reg, op2_reg, *L, false /* unsigned */); ++ break; ++ case 0x06: //below_equal ++ __ bge_long(op2_reg, op1_reg, *L, false /* unsigned */); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConIU_reg_reg_long(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ blt_long(op2, op1, *L, false /* unsigned */); ++ break; ++ case 0x04: //above_equal ++ __ bge_long(op1, op2, *L, false /* unsigned */); ++ break; ++ case 0x05: //below ++ __ blt_long(op1, op2, *L, false /* unsigned */); ++ break; ++ case 0x06: //below_equal ++ __ bge_long(op2, op1, *L, false /* unsigned */); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConIU_reg_imm_long(cmpOpU cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ li(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, AT, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, AT, *L); ++ break; ++ case 0x03: //above ++ __ blt_long(AT, op1, *L, false /* unsigned */); ++ break; ++ case 0x04: //above_equal ++ __ bge_long(op1, AT, *L, false /* unsigned */); ++ break; ++ case 0x05: //below ++ __ blt_long(op1, AT, *L, false /* unsigned */); ++ break; ++ case 0x06: //below_equal ++ __ bge_long(AT, op1, *L, false /* unsigned */); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ blt_long(op2, op1, *L, true /* signed */); ++ break; ++ case 0x04: //above_equal ++ __ bge_long(op1, op2, *L, true /* signed */); ++ break; ++ case 0x05: //below ++ __ blt_long(op1, op2, *L, true /* signed */); ++ break; ++ case 0x06: //below_equal ++ __ bge_long(op2, op1, *L, true /* signed */); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_immI_0_long(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(170); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_immI_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, R0, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, R0, *L); ++ break; ++ case 0x03: //greater ++ __ blt_long(R0, op1, *L, true /* signed */); ++ break; ++ case 0x04: //greater_equal ++ __ bge_long(op1, R0, *L, true /* signed */); ++ break; ++ case 0x05: //less ++ __ blt_long(op1, R0, *L, true /* signed */); ++ break; ++ case 0x06: //less_equal ++ __ bge_long(R0, op1, *L, true /* signed */); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(200); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ li(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, AT, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, AT, *L); ++ break; ++ case 0x03: //greater ++ __ blt_long(AT, op1, *L, true /* signed */); ++ break; ++ case 0x04: //greater_equal ++ __ bge_long(op1, AT, *L, true /* signed */); ++ break; ++ case 0x05: //less ++ __ blt_long(op1, AT, *L, true /* signed */); ++ break; ++ case 0x06: //less_equal ++ __ bge_long(AT, op1, *L, true /* signed */); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConIU_reg_immI_0_long(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{ ++ match( If cmp (CmpU src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConIU_reg_immI_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, R0, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, R0, *L); ++ break; ++ case 0x03: //above ++ __ bne_long(R0, op1, *L); ++ break; ++ case 0x04: //above_equal ++ __ beq_long(R0, R0, *L); ++ break; ++ case 0x05: //below ++ return; ++ break; ++ case 0x06: //below_equal ++ __ beq_long(op1, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_long" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ blt_long(opr2_reg, opr1_reg, *target, true /* signed */); ++ break; ++ ++ case 0x04: //greater_equal ++ __ bge_long(opr1_reg, opr2_reg, *target, true /* signed */); ++ break; ++ ++ case 0x05: //less ++ __ blt_long(opr1_reg, opr2_reg, *target, true /* signed */); ++ break; ++ ++ case 0x06: //less_equal ++ __ bge_long(opr2_reg, opr1_reg, *target, true /* signed */); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ ++ match( If cmp (CmpUL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_long" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ blt_long(opr2_reg, opr1_reg, *target, false /* signed */); ++ break; ++ ++ case 0x04: //greater_equal ++ __ bge_long(opr1_reg, opr2_reg, *target, false /* signed */); ++ break; ++ ++ case 0x05: //less ++ __ blt_long(opr1_reg, opr2_reg, *target, false /* signed */); ++ break; ++ ++ case 0x06: //less_equal ++ __ bge_long(opr2_reg, opr1_reg, *target, false /* signed */); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match( If cmp (CmpL src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConL_regL_immL_0_long" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = R0; ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ blt_long(opr2_reg, opr1_reg, *target, true /* signed */); ++ break; ++ ++ case 0x04: //greater_equal ++ __ bge_long(opr1_reg, opr2_reg, *target, true /* signed */); ++ break; ++ ++ case 0x05: //less ++ __ blt_long(opr1_reg, opr2_reg, *target, true /* signed */); ++ break; ++ ++ case 0x06: //less_equal ++ __ bge_long(opr2_reg, opr1_reg, *target, true /* signed */); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match( If cmp (CmpUL src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConUL_regL_immL_0_long" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = R0; ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ blt_long(opr2_reg, opr1_reg, *target, false /* signed */); ++ break; ++ ++ case 0x04: //greater_equal ++ __ bge_long(opr1_reg, opr2_reg, *target, false /* signed */); ++ break; ++ ++ case 0x05: //less ++ __ blt_long(opr1_reg, opr2_reg, *target, false /* signed */); ++ break; ++ ++ case 0x06: //less_equal ++ __ bge_long(opr2_reg, opr1_reg, *target, false /* signed */); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_long" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ li(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ blt_long(opr2_reg, opr1_reg, *target, true /* signed */); ++ break; ++ ++ case 0x04: //greater_equal ++ __ bge_long(opr1_reg, opr2_reg, *target, true /* signed */); ++ break; ++ ++ case 0x05: //less ++ __ blt_long(opr1_reg, opr2_reg, *target, true /* signed */); ++ break; ++ ++ case 0x06: //less_equal ++ __ bge_long(opr2_reg, opr1_reg, *target, true /* signed */); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpUL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_long" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ li(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ blt_long(opr2_reg, opr1_reg, *target, false /* signed */); ++ break; ++ ++ case 0x04: //greater_equal ++ __ bge_long(opr1_reg, opr2_reg, *target, false /* signed */); ++ break; ++ ++ case 0x05: //less ++ __ blt_long(opr1_reg, opr2_reg, *target, false /* signed */); ++ break; ++ ++ case 0x06: //less_equal ++ __ bge_long(opr2_reg, opr1_reg, *target, false /* signed */); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++//FIXME ++instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{ ++ match( If cmp (CmpF src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_long" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x02: //not_equal ++ __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x03: //greater ++ __ fcmp_cule_s(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x04: //greater_equal ++ __ fcmp_cult_s(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x05: //less ++ __ fcmp_cult_s(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x06: //less_equal ++ __ fcmp_cule_s(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_slow); ++%} ++ ++instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{ ++ match( If cmp (CmpD src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_long" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x02: //not_equal ++ // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. ++ __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x03: //greater ++ __ fcmp_cule_d(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x04: //greater_equal ++ __ fcmp_cult_d(FCC0, reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x05: //less ++ __ fcmp_cult_d(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x06: //less_equal ++ __ fcmp_cule_d(FCC0, reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_slow); ++%} ++ ++ ++// ============================================================================ ++// Branch Instructions -- short offset versions ++ ++// Jump Direct ++instruct jmpDir_short(label labl) %{ ++ match(Goto); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "JMP $labl #@jmpDir_short" %} ++ ++ ins_encode %{ ++ Label &L = *($labl$$label); ++ if(&L) ++ __ b(L); ++ else ++ __ b(int(0)); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++// Jump Direct Conditional - Label defines a relative address from Jcc+1 ++instruct jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_short" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cop$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ if (&L) ++ __ blt(op2, op1, L); ++ else ++ __ blt(op2, op1, (int)0); ++ break; ++ case 0x04: //above_equal ++ if (&L) ++ __ bge(op1, op2, L); ++ else ++ __ bge(op1, op2, (int)0); ++ break; ++ case 0x05: //below ++ if (&L) ++ __ blt(op1, op2, L); ++ else ++ __ blt(op1, op2, (int)0); ++ break; ++ case 0x06: //below_equal ++ if (&L) ++ __ bge(op2, op1, L); ++ else ++ __ bge(op2, op1, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++instruct jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = AT; ++ Label &L = *($labl$$label); ++ int flag = $cop$$cmpcode; ++ ++ __ li(op2, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ if (&L) ++ __ blt(op2, op1, L); ++ else ++ __ blt(op2, op1, (int)0); ++ break; ++ case 0x04: //above_equal ++ if (&L) ++ __ bge(op1, op2, L); ++ else ++ __ bge(op1, op2, (int)0); ++ break; ++ case 0x05: //below ++ if (&L) ++ __ blt(op1, op2, L); ++ else ++ __ blt(op1, op2, (int)0); ++ break; ++ case 0x06: //below_equal ++ if (&L) ++ __ bge(op2, op1, L); ++ else ++ __ bge(op2, op1, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++ ++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! ++instruct jmpCon_flags_short(cmpOp cop, FlagsReg cr, label labl) %{ ++ match(If cop cr); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $labl #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_short" %} ++ ++ ins_encode %{ ++ Label &L = *($labl$$label); ++ switch($cop$$cmpcode) { ++ case 0x01: //equal ++ if (&L) ++ __ bnez($cr$$Register, L); ++ else ++ __ bnez($cr$$Register, (int)0); ++ break; ++ case 0x02: //not equal ++ if (&L) ++ __ beqz($cr$$Register, L); ++ else ++ __ beqz($cr$$Register, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++// Conditional jumps ++instruct branchConP_0_short(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConP_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beqz(op1, L); ++ else ++ __ beqz(op1, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bnez(op1, L); ++ else ++ __ bnez(op1, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConN2P_0_short(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConN2P_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) ++ { ++ case 0x01: //equal ++ if (&L) ++ __ beqz(op1, L); ++ else ++ __ beqz(op1, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bnez(op1, L); ++ else ++ __ bnez(op1, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConP_short(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{ ++ match(If cmp (CmpP op1 op2)); ++// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); ++ effect(USE labl); ++ ++ ins_cost(200); ++ format %{ "b$cmp $op1, $op2, $labl #@branchConP_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ if (&L) ++ __ bltu(op2, op1, L); ++ else ++ __ bltu(op2, op1, (int)0); ++ break; ++ case 0x04: //above_equal ++ if (&L) ++ __ bgeu(op1, op2, L); ++ else ++ __ bgeu(op1, op2, (int)0); ++ break; ++ case 0x05: //below ++ if (&L) ++ __ bltu(op1, op2, L); ++ else ++ __ bltu(op1, op2, (int)0); ++ break; ++ case 0x06: //below_equal ++ if (&L) ++ __ bgeu(op2, op1, L); ++ else ++ __ bgeu(op2, op1, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ ++ match(If cmp (CmpN op1 null)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,0\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_null_branch_short" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beqz(op1, L); ++ else ++ __ beqz(op1, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bnez(op1, L); ++ else ++ __ bnez(op1, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++//TODO: pipe_branchP or create pipe_branchN LEE ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ ++ match(If cmp (CmpN op1 op2)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,$op2\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_reg_branch_short" %} ++ ins_encode %{ ++ Register op1_reg = $op1$$Register; ++ Register op2_reg = $op2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1_reg, op2_reg, L); ++ else ++ __ beq(op1_reg, op2_reg, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1_reg, op2_reg, L); ++ else ++ __ bne(op1_reg, op2_reg, (int)0); ++ break; ++ case 0x03: //above ++ if (&L) ++ __ bltu(op2_reg, op1_reg, L); ++ else ++ __ bltu(op2_reg, op1_reg, (int)0); ++ break; ++ case 0x04: //above_equal ++ if (&L) ++ __ bgeu(op1_reg, op2_reg, L); ++ else ++ __ bgeu(op1_reg, op2_reg, (int)0); ++ break; ++ case 0x05: //below ++ if (&L) ++ __ bltu(op1_reg, op2_reg, L); ++ else ++ __ bltu(op1_reg, op2_reg, (int)0); ++ break; ++ case 0x06: //below_equal ++ if (&L) ++ __ bgeu(op2_reg, op1_reg, L); ++ else ++ __ bgeu(op2_reg, op1_reg, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConIU_reg_reg_short(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ if (&L) ++ __ bltu(op2, op1, L); ++ else ++ __ bltu(op2, op1, (int)0); ++ break; ++ case 0x04: //above_equal ++ if (&L) ++ __ bgeu(op1, op2, L); ++ else ++ __ bgeu(op1, op2, (int)0); ++ break; ++ case 0x05: //below ++ if (&L) ++ __ bltu(op1, op2, L); ++ else ++ __ bltu(op1, op2, (int)0); ++ break; ++ case 0x06: //below_equal ++ if (&L) ++ __ bgeu(op2, op1, L); ++ else ++ __ bgeu(op2, op1, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConIU_reg_imm_short(cmpOpU cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ li(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, AT, L); ++ else ++ __ beq(op1, AT, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, AT, L); ++ else ++ __ bne(op1, AT, (int)0); ++ break; ++ case 0x03: //above ++ if (&L) ++ __ bltu(AT, op1, L); ++ else ++ __ bltu(AT, op1, (int)0); ++ break; ++ case 0x04: //above_equal ++ if (&L) ++ __ bgeu(op1, AT, L); ++ else ++ __ bgeu(op1, AT, (int)0); ++ break; ++ case 0x05: //below ++ if (&L) ++ __ bltu(op1, AT, L); ++ else ++ __ bltu(op1, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ if (&L) ++ __ bgeu(AT, op1, L); ++ else ++ __ bgeu(AT, op1, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ if (&L) ++ __ blt(op2, op1, L); ++ else ++ __ blt(op2, op1, (int)0); ++ break; ++ case 0x04: //above_equal ++ if (&L) ++ __ bge(op1, op2, L); ++ else ++ __ bge(op1, op2, (int)0); ++ break; ++ case 0x05: //below ++ if (&L) ++ __ blt(op1, op2, L); ++ else ++ __ blt(op1, op2, (int)0); ++ break; ++ case 0x06: //below_equal ++ if (&L) ++ __ bge(op2, op1, L); ++ else ++ __ bge(op2, op1, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConI_reg_immI_0_short(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(170); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_immI_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beqz(op1, L); ++ else ++ __ beqz(op1, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bnez(op1, L); ++ else ++ __ bnez(op1, (int)0); ++ break; ++ case 0x03: //greater ++ if (&L) ++ __ blt(R0, op1, L); ++ else ++ __ blt(R0, op1, (int)0); ++ break; ++ case 0x04: //greater_equal ++ if (&L) ++ __ bge(op1, R0, L); ++ else ++ __ bge(op1, R0, (int)0); ++ break; ++ case 0x05: //less ++ if (&L) ++ __ blt(op1, R0, L); ++ else ++ __ blt(op1, R0, (int)0); ++ break; ++ case 0x06: //less_equal ++ if (&L) ++ __ bge(R0, op1, L); ++ else ++ __ bge(R0, op1, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(200); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ li(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, AT, L); ++ else ++ __ beq(op1, AT, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, AT, L); ++ else ++ __ bne(op1, AT, (int)0); ++ break; ++ case 0x03: //greater ++ if (&L) ++ __ blt(AT, op1, L); ++ else ++ __ blt(AT, op1, (int)0); ++ break; ++ case 0x04: //greater_equal ++ if (&L) ++ __ bge(op1, AT, L); ++ else ++ __ bge(op1, AT, (int)0); ++ break; ++ case 0x05: //less ++ if (&L) ++ __ blt(op1, AT, L); ++ else ++ __ blt(op1, AT, (int)0); ++ break; ++ case 0x06: //less_equal ++ if (&L) ++ __ bge(AT, op1, L); ++ else ++ __ bge(AT, op1, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConIU_reg_immI_0_short(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{ ++ match( If cmp (CmpU src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConIU_reg_immI_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beqz(op1, L); ++ else ++ __ beqz(op1, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bnez(op1, L); ++ else ++ __ bnez(op1, (int)0); ++ break; ++ case 0x03: //above ++ if (&L) ++ __ bnez(op1, L); ++ else ++ __ bnez(op1, (int)0); ++ break; ++ case 0x04: //above_equal ++ if (&L) ++ __ b(L); ++ else ++ __ b((int)0); ++ break; ++ case 0x05: //below ++ return; ++ break; ++ case 0x06: //below_equal ++ if (&L) ++ __ beqz(op1, L); ++ else ++ __ beqz(op1, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_short" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ break; ++ case 0x02: //not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ break; ++ case 0x03: //greater ++ if (&target) ++ __ blt(opr2_reg, opr1_reg, target); ++ else ++ __ blt(opr2_reg, opr1_reg, (int)0); ++ break; ++ case 0x04: //greater_equal ++ if (&target) ++ __ bge(opr1_reg, opr2_reg, target); ++ else ++ __ bge(opr1_reg, opr2_reg, (int)0); ++ break; ++ case 0x05: //less ++ if (&target) ++ __ blt(opr1_reg, opr2_reg, target); ++ else ++ __ blt(opr1_reg, opr2_reg, (int)0); ++ break; ++ case 0x06: //less_equal ++ if (&target) ++ __ bge(opr2_reg, opr1_reg, target); ++ else ++ __ bge(opr2_reg, opr1_reg, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ ++ match( If cmp (CmpUL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_short" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ break; ++ case 0x02: //not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ break; ++ case 0x03: //greater ++ if (&target) ++ __ bltu(opr2_reg, opr1_reg, target); ++ else ++ __ bltu(opr2_reg, opr1_reg, (int)0); ++ break; ++ case 0x04: //greater_equal ++ if (&target) ++ __ bgeu(opr1_reg, opr2_reg, target); ++ else ++ __ bgeu(opr1_reg, opr2_reg, (int)0); ++ break; ++ case 0x05: //less ++ if (&target) ++ __ bltu(opr1_reg, opr2_reg, target); ++ else ++ __ bltu(opr1_reg, opr2_reg, (int)0); ++ break; ++ case 0x06: //less_equal ++ if (&target) ++ __ bgeu(opr2_reg, opr1_reg, target); ++ else ++ __ bgeu(opr2_reg, opr1_reg, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match( If cmp (CmpL src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConL_regL_immL_0_short" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beqz(opr1_reg, target); ++ else ++ __ beqz(opr1_reg, int(0)); ++ break; ++ ++ case 0x02: //not_equal ++ if (&target) ++ __ bnez(opr1_reg, target); ++ else ++ __ bnez(opr1_reg, (int)0); ++ break; ++ ++ case 0x03: //greater ++ if (&target) ++ __ blt(R0, opr1_reg, target); ++ else ++ __ blt(R0, opr1_reg, (int)0); ++ break; ++ ++ case 0x04: //greater_equal ++ if (&target) ++ __ bge(opr1_reg, R0, target); ++ else ++ __ bge(opr1_reg, R0, (int)0); ++ break; ++ ++ case 0x05: //less ++ if (&target) ++ __ blt(opr1_reg, R0, target); ++ else ++ __ blt(opr1_reg, R0, (int)0); ++ break; ++ ++ case 0x06: //less_equal ++ if (&target) ++ __ bge(R0, opr1_reg, target); ++ else ++ __ bge(R0, opr1_reg, int(0)); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match( If cmp (CmpUL src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConUL_regL_immL_0_short" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beqz(opr1_reg, target); ++ else ++ __ beqz(opr1_reg, int(0)); ++ break; ++ ++ case 0x02: //not_equal ++ if (&target) ++ __ bnez(opr1_reg, target); ++ else ++ __ bnez(opr1_reg, (int)0); ++ break; ++ ++ case 0x03: //greater ++ if (&target) ++ __ bltu(R0, opr1_reg, target); ++ else ++ __ bltu(R0, opr1_reg, (int)0); ++ break; ++ ++ case 0x04: //greater_equal ++ if (&target) ++ __ bgeu(opr1_reg, R0, target); ++ else ++ __ bgeu(opr1_reg, R0, (int)0); ++ break; ++ ++ case 0x05: //less ++ if (&target) ++ __ bltu(opr1_reg, R0, target); ++ else ++ __ bltu(opr1_reg, R0, (int)0); ++ break; ++ ++ case 0x06: //less_equal ++ if (&target) ++ __ bgeu(R0, opr1_reg, target); ++ else ++ __ bgeu(R0, opr1_reg, int(0)); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_short" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ li(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x02: //not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x03: //greater ++ if (&target) ++ __ blt(opr2_reg, opr1_reg, target); ++ else ++ __ blt(opr2_reg, opr1_reg, (int)0); ++ break; ++ ++ case 0x04: //greater_equal ++ if (&target) ++ __ bge(opr1_reg, opr2_reg, target); ++ else ++ __ bge(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x05: //less ++ if (&target) ++ __ blt(opr1_reg, opr2_reg, target); ++ else ++ __ blt(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x06: //less_equal ++ if (&target) ++ __ bge(opr2_reg, opr1_reg, target); ++ else ++ __ bge(opr2_reg, opr1_reg, (int)0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpUL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_short" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ li(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x02: //not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x03: //greater ++ if (&target) ++ __ bltu(opr2_reg, opr1_reg, target); ++ else ++ __ bltu(opr2_reg, opr1_reg, (int)0); ++ break; ++ ++ case 0x04: //greater_equal ++ if (&target) ++ __ bgeu(opr1_reg, opr2_reg, target); ++ else ++ __ bgeu(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x05: //less ++ if (&target) ++ __ bltu(opr1_reg, opr2_reg, target); ++ else ++ __ bltu(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x06: //less_equal ++ if (&target) ++ __ bgeu(opr2_reg, opr1_reg, target); ++ else ++ __ bgeu(opr2_reg, opr1_reg, (int)0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++//FIXME ++instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{ ++ match( If cmp (CmpF src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_short" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); ++ if (&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ case 0x02: //not_equal ++ __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); ++ if (&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x03: //greater ++ __ fcmp_cule_s(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x04: //greater_equal ++ __ fcmp_cult_s(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x05: //less ++ __ fcmp_cult_s(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ case 0x06: //less_equal ++ __ fcmp_cule_s(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_fpu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{ ++ match( If cmp (CmpD src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_short" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); ++ if (&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ case 0x02: //not_equal ++ // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. ++ __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); ++ if (&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x03: //greater ++ __ fcmp_cule_d(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x04: //greater_equal ++ __ fcmp_cult_d(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bceqz(FCC0, L); ++ else ++ __ bceqz(FCC0, (int)0); ++ break; ++ case 0x05: //less ++ __ fcmp_cult_d(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ case 0x06: //less_equal ++ __ fcmp_cule_d(FCC0, reg_op1, reg_op2); ++ if(&L) ++ __ bcnez(FCC0, L); ++ else ++ __ bcnez(FCC0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_fpu_branch); ++ ins_short_branch(1); ++%} ++ ++// =================== End of branch instructions ========================== ++ ++// Call Runtime Instruction ++instruct CallRuntimeDirect(method meth) %{ ++ match(CallRuntime ); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL,runtime #@CallRuntimeDirect" %} ++ ins_encode( Java_To_Runtime( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_alignment(4); ++%} ++ ++ ++ ++//------------------------MemBar Instructions------------------------------- ++//Memory barrier flavors ++ ++instruct membar_acquire() %{ ++ match(MemBarAcquire); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-acquire @ membar_acquire" %} ++ ins_encode %{ ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ %} ++ ins_pipe(empty); ++%} ++ ++instruct load_fence() %{ ++ match(LoadFence); ++ ins_cost(400); ++ ++ format %{ "MEMBAR @ load_fence" %} ++ ins_encode %{ ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_acquire_lock() ++%{ ++ match(MemBarAcquireLock); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %} ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct membar_release() %{ ++ match(MemBarRelease); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-release @ membar_release" %} ++ ++ ins_encode %{ ++ // Attention: DO NOT DELETE THIS GUY! ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct store_fence() %{ ++ match(StoreFence); ++ ins_cost(400); ++ ++ format %{ "MEMBAR @ store_fence" %} ++ ++ ins_encode %{ ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_release_lock() ++%{ ++ match(MemBarReleaseLock); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %} ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++ ++instruct membar_volatile() %{ ++ match(MemBarVolatile); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-volatile" %} ++ ins_encode %{ ++ if( !os::is_MP() ) return; // Not needed on single CPU ++ __ membar(__ StoreLoad); ++ ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct unnecessary_membar_volatile() %{ ++ match(MemBarVolatile); ++ predicate(Matcher::post_store_load_barrier(n)); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-volatile (unnecessary so empty encoding) @ unnecessary_membar_volatile" %} ++ ins_encode( ); ++ ins_pipe(empty); ++%} ++ ++instruct membar_storestore() %{ ++ match(MemBarStoreStore); ++ ++ ins_cost(400); ++ format %{ "MEMBAR-storestore @ membar_storestore" %} ++ ins_encode %{ ++ __ membar(__ StoreStore); ++ %} ++ ins_pipe(empty); ++%} ++ ++//----------Move Instructions-------------------------------------------------- ++instruct castX2P(mRegP dst, mRegL src) %{ ++ match(Set dst (CastX2P src)); ++ format %{ "castX2P $dst, $src @ castX2P" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ if(src != dst) ++ __ move(dst, src); ++ %} ++ ins_cost(10); ++ ins_pipe( ialu_regI_mov ); ++%} ++ ++instruct castP2X(mRegL dst, mRegP src ) %{ ++ match(Set dst (CastP2X src)); ++ ++ format %{ "mov $dst, $src\t #@castP2X" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ if(src != dst) ++ __ move(dst, src); ++ %} ++ ins_pipe( ialu_regI_mov ); ++%} ++ ++instruct MoveF2I_reg_reg(mRegI dst, regF src) %{ ++ match(Set dst (MoveF2I src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveF2I $dst, $src @ MoveF2I_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ __ movfr2gr_s(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveI2F_reg_reg(regF dst, mRegI src) %{ ++ match(Set dst (MoveI2F src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveI2F $dst, $src @ MoveI2F_reg_reg" %} ++ ins_encode %{ ++ Register src = as_Register($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ movgr2fr_w(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveD2L_reg_reg(mRegL dst, regD src) %{ ++ match(Set dst (MoveD2L src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveD2L $dst, $src @ MoveD2L_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ __ movfr2gr_d(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveL2D_reg_reg(regD dst, mRegL src) %{ ++ match(Set dst (MoveL2D src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveL2D $dst, $src @ MoveL2D_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ movgr2fr_d(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++//----------Conditional Move--------------------------------------------------- ++// Conditional move ++instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpI_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpL_reg_reg(mRegI dst, mRegI src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegI src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpI_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ Label L; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovN_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovN_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovN_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovN_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpI_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpI_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovD_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{ ++ match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovF_cmpI_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovF_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister tmp1 = as_FloatRegister($tmp3$$reg); ++ FloatRegister tmp2 = as_FloatRegister($tmp4$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{ ++ match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpI_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovD_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister tmp1 = as_FloatRegister($tmp3$$reg); ++ FloatRegister tmp2 = as_FloatRegister($tmp4$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop, regF tmp3, regF tmp4) %{ ++ match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpP_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovD_cmpP_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister tmp1 = as_FloatRegister($tmp3$$reg); ++ FloatRegister tmp2 = as_FloatRegister($tmp4$$reg); ++ int flag = $cop$$cmpcode; ++ ++ // Use signed comparison here, because the most significant bit of the ++ // user-space virtual address must be 0. ++ __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++//FIXME ++instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ ++ match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ effect(TEMP tmp3, TEMP tmp4); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister tmp1 = $tmp3$$FloatRegister; ++ FloatRegister tmp2 = $tmp4$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovF_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovF_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// Manifest a CmpL result in an integer register. Very painful. ++// This is the test to avoid. ++instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (CmpL3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpL3 $dst, $src1, $src2 @ cmpL3_reg_reg" %} ++ ins_encode %{ ++ Register opr1 = as_Register($src1$$reg); ++ Register opr2 = as_Register($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ slt(AT, opr1, opr2); ++ __ slt(dst, opr2, opr1); ++ __ sub_d(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ++// less_rsult = -1 ++// greater_result = 1 ++// equal_result = 0 ++// nan_result = -1 ++// ++instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{ ++ match(Set dst (CmpF3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpF3 $dst, $src1, $src2 @ cmpF3_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ fcmp_clt_s(FCC0, src2, src1); ++ __ fcmp_cult_s(FCC1, src1, src2); ++ __ movcf2gr(dst, FCC0); ++ __ movcf2gr(AT, FCC1); ++ __ sub_d(dst, dst, AT); ++ ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{ ++ match(Set dst (CmpD3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpD3 $dst, $src1, $src2 @ cmpD3_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ fcmp_clt_d(FCC0, src2, src1); ++ __ fcmp_cult_d(FCC1, src1, src2); ++ __ movcf2gr(dst, FCC0); ++ __ movcf2gr(AT, FCC1); ++ __ sub_d(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct clear_array(mRegL cnt, mRegP base, Universe dummy) %{ ++ match(Set dummy (ClearArray cnt base)); ++ format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %} ++ ins_encode %{ ++ //Assume cnt is the number of bytes in an array to be cleared, ++ //and base points to the starting address of the array. ++ Register base = $base$$Register; ++ Register num = $cnt$$Register; ++ Label Loop, done; ++ ++ __ add_d(AT, base, R0); ++ __ beq(num, R0, done); ++ ++ __ move(T4, num); /* T4 = words */ ++ ++ __ bind(Loop); ++ __ st_d(R0, AT, 0); ++ __ addi_d(T4, T4, -1); ++ __ addi_d(AT, AT, wordSize); ++ __ bne(T4, R0, Loop); ++ ++ __ bind(done); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compare(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compare" %} ++ ins_encode %{ ++ // Get the first character position in both strings ++ // [8] char array, [12] offset, [16] count ++ Register str1 = $str1$$Register; ++ Register str2 = $str2$$Register; ++ Register cnt1 = $cnt1$$Register; ++ Register cnt2 = $cnt2$$Register; ++ Register result = $result$$Register; ++ ++ Label L, Loop, haveResult, done; ++ ++ // compute the and difference of lengths (in result) ++ __ sub_d(result, cnt1, cnt2); // result holds the difference of two lengths ++ ++ // compute the shorter length (in cnt1) ++ __ bge(cnt2, cnt1, Loop); ++ __ move(cnt1, cnt2); ++ ++ // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register ++ __ bind(Loop); // Loop begin ++ __ ld_hu(AT, str1, 0); ++ __ beq(cnt1, R0, done); ++ ++ // compare current character ++ __ ld_hu(cnt2, str2, 0); ++ __ addi_d(str1, str1, 2); ++ __ bne(AT, cnt2, haveResult); ++ __ addi_d(str2, str2, 2); ++ __ addi_d(cnt1, cnt1, -1); // Loop end ++ __ b(Loop); ++ ++ __ bind(haveResult); ++ __ sub_d(result, AT, cnt2); ++ ++ __ bind(done); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// intrinsic optimization ++instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, mA7RegI temp, no_Ax_mRegI result) %{ ++ match(Set result (StrEquals (Binary str1 str2) cnt)); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL temp); ++ ++ format %{ "String Equal $str1, $str2, len:$cnt tmp:$temp -> $result @ string_equals" %} ++ ins_encode %{ ++ // Get the first character position in both strings ++ // [8] char array, [12] offset, [16] count ++ Register str1 = $str1$$Register; ++ Register str2 = $str2$$Register; ++ Register cnt = $cnt$$Register; ++ Register tmp = $temp$$Register; ++ Register result = $result$$Register; ++ ++ Label Loop, True, False; ++ ++ __ addi_d(result, R0, 1); ++ __ beq(str1, str2, True); // same char[] ? ++ ++ __ beq(cnt, R0, True); ++ ++ __ bind(Loop); ++ ++ // compare current character ++ __ ld_hu(AT, str1, 0); ++ __ ld_hu(tmp, str2, 0); ++ __ addi_d(str1, str1, 2); ++ __ bne(AT, tmp, False); ++ __ addi_d(cnt, cnt, -1); ++ __ addi_d(str2, str2, 2); ++ __ bne(cnt, R0, Loop); ++ ++ __ b(True); ++ ++ __ bind(False); ++ __ addi_d(result, R0, 0); ++ ++ __ bind(True); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++//----------Arithmetic Instructions------------------------------------------- ++//----------Addition Instructions--------------------------------------------- ++instruct addI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ format %{ "add $dst, $src1, $src2 #@addI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ add_w(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addI_Reg_imm(mRegI dst, mRegI src1, immI12 src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ format %{ "add $dst, $src1, $src2 #@addI_Reg_imm12" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ int imm = $src2$$constant; ++ ++ __ addi_w(dst, src1, imm); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addI_salI_Reg_Reg_immI_1_4(mRegI dst, mRegI src1, mRegI src2, immI_1_4 shift) %{ ++ match(Set dst (AddI src1 (LShiftI src2 shift))); ++ ++ format %{ "alsl $dst, $src1, $src2, $shift #@addI_salI_Reg_Reg_immI_1_4" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ int sh = $shift$$constant; ++ __ alsl_w(dst, src2, src1, sh - 1); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct addP_reg_reg(mRegP dst, mRegP src1, mRegLorI2L src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addP_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ add_d(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_reg_M8(mRegP dst, mRegP src1, mRegLorI2L src2, immL_M8 M8) %{ ++ match(Set dst (AddP src1 (AndL src2 M8))); ++ format %{ "ADD $dst, $src1, $src2 #@addP_reg_reg_M8" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ bstrins_d(src2, R0, 2, 0); ++ __ add_d(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_imm12(mRegP dst, mRegP src1, immL12 src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addP_reg_imm12" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ long src2 = $src2$$constant; ++ Register dst = $dst$$Register; ++ ++ __ addi_d(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++instruct addP_salL_Reg_RegI2L_immI_1_4(mRegP dst, mRegP src1, mRegI src2, immI_1_4 shift) %{ ++ match(Set dst (AddP src1 (LShiftL (ConvI2L src2) shift))); ++ ++ format %{ "alsl $dst, $src1, $src2, $shift #@addP_salL_Reg_RegI2L_immI_1_4" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ int sh = $shift$$constant; ++ __ alsl_d(dst, src2, src1, sh - 1); ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Add Long Register with Register ++instruct addL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (AddL src1 src2)); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_Reg\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ add_d(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_Reg_imm(mRegL dst, mRegLorI2L src1, immL12 src2) ++%{ ++ match(Set dst (AddL src1 src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_imm " %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ int src2_imm = $src2$$constant; ++ ++ __ addi_d(dst_reg, src1_reg, src2_imm); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++//----------Subtraction Instructions------------------------------------------- ++// Integer Subtraction Instructions ++instruct subI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ins_cost(100); ++ ++ format %{ "sub $dst, $src1, $src2 #@subI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ sub_w(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct subI_Reg_immI_M2047_2048(mRegI dst, mRegI src1, immI_M2047_2048 src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ins_cost(80); ++ ++ format %{ "sub $dst, $src1, $src2 #@subI_Reg_immI_M2047_2048" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ __ addi_w(dst, src1, -1 * $src2$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct negI_Reg(mRegI dst, immI_0 zero, mRegI src) %{ ++ match(Set dst (SubI zero src)); ++ ins_cost(80); ++ ++ format %{ "neg $dst, $src #@negI_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ __ sub_w(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct negL_Reg(mRegL dst, immL_0 zero, mRegLorI2L src) %{ ++ match(Set dst (SubL zero src)); ++ ins_cost(80); ++ ++ format %{ "neg $dst, $src #@negL_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ __ sub_d(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct subL_Reg_immL_M2047_2048(mRegL dst, mRegL src1, immL_M2047_2048 src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(80); ++ ++ format %{ "sub $dst, $src1, $src2 #@subL_Reg_immL_M2047_2048" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ __ addi_d(dst, src1, -1 * $src2$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Subtract Long Register with Register. ++instruct subL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(100); ++ format %{ "SubL $dst, $src1, $src2 @ subL_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ sub_d(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Integer MOD with Register ++instruct modI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (ModI src1 src2)); ++ ins_cost(300); ++ format %{ "modi $dst, $src1, $src2 @ modI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ mod_w(dst, src1, src2); ++ %} ++ ++ //ins_pipe( ialu_mod ); ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct modL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (ModL src1 src2)); ++ format %{ "modL $dst, $src1, $src2 @modL_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ __ mod_d(dst, op1, op2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (MulI src1 src2)); ++ ++ ins_cost(300); ++ format %{ "mul $dst, $src1, $src2 @ mulI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ ++ __ mul_w(dst, src1, src2); ++ %} ++ ins_pipe( ialu_mult ); ++%} ++ ++instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (DivI src1 src2)); ++ ++ ins_cost(300); ++ format %{ "div $dst, $src1, $src2 @ divI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ ++ __ div_w(dst, src1, src2); ++ ++ %} ++ ins_pipe( ialu_mod ); ++%} ++ ++instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (DivF src1 src2)); ++ ++ ins_cost(300); ++ format %{ "divF $dst, $src1, $src2 @ divF_Reg_Reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ fdiv_s(dst, src1, src2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (DivD src1 src2)); ++ ++ ins_cost(300); ++ format %{ "divD $dst, $src1, $src2 @ divD_Reg_Reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ fdiv_d(dst, src1, src2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (MulL src1 src2)); ++ format %{ "mulL $dst, $src1, $src2 @mulL_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ __ mul_d(dst, op1, op2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulHiL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (MulHiL src1 src2)); ++ format %{ "mulHiL $dst, $src1, $src2 @mulL_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ __ mulh_d(dst, op1, op2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (DivL src1 src2)); ++ format %{ "divL $dst, $src1, $src2 @divL_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ __ div_d(dst, op1, op2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (AddF src1 src2)); ++ format %{ "AddF $dst, $src1, $src2 @addF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fadd_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (SubF src1 src2)); ++ format %{ "SubF $dst, $src1, $src2 @subF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fsub_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (AddD src1 src2)); ++ format %{ "AddD $dst, $src1, $src2 @addD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fadd_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (SubD src1 src2)); ++ format %{ "SubD $dst, $src1, $src2 @subD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fsub_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct negF_reg(regF dst, regF src) %{ ++ match(Set dst (NegF src)); ++ format %{ "negF $dst, $src @negF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fneg_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct negD_reg(regD dst, regD src) %{ ++ match(Set dst (NegD src)); ++ format %{ "negD $dst, $src @negD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fneg_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (MulF src1 src2)); ++ format %{ "MULF $dst, $src1, $src2 @mulF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ fmul_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ ++ match(Set dst (AddF (MulF src1 src2) src3)); ++ // For compatibility reason (e.g. on the Loongson platform), disable this guy. ++ ins_cost(44444); ++ format %{ "maddF $dst, $src1, $src2, $src3 @maddF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister src3 = $src3$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ fmadd_s(dst, src1, src2, src3); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// Mul two double precision floating piont number ++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (MulD src1 src2)); ++ format %{ "MULD $dst, $src1, $src2 @mulD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ fmul_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ ++ match(Set dst (AddD (MulD src1 src2) src3)); ++ // For compatibility reason (e.g. on the Loongson platform), disable this guy. ++ ins_cost(44444); ++ format %{ "maddD $dst, $src1, $src2, $src3 @maddD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister src3 = $src3$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ fmadd_d(dst, src1, src2, src3); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct absF_reg(regF dst, regF src) %{ ++ match(Set dst (AbsF src)); ++ ins_cost(100); ++ format %{ "absF $dst, $src @absF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fabs_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++// intrinsics for math_native. ++// AbsD SqrtD CosD SinD TanD LogD Log10D ++ ++instruct absD_reg(regD dst, regD src) %{ ++ match(Set dst (AbsD src)); ++ ins_cost(100); ++ format %{ "absD $dst, $src @absD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fabs_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct sqrtD_reg(regD dst, regD src) %{ ++ match(Set dst (SqrtD src)); ++ ins_cost(100); ++ format %{ "SqrtD $dst, $src @sqrtD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fsqrt_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct sqrtF_reg(regF dst, regF src) %{ ++ match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); ++ ins_cost(100); ++ format %{ "SqrtF $dst, $src @sqrtF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ fsqrt_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++//----------------------------------Logical Instructions---------------------- ++//__________________________________Integer Logical Instructions------------- ++ ++//And Instuctions ++// And Register with Immediate ++instruct andI_Reg_imm_0_4095(mRegI dst, mRegI src1, immI_0_4095 src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1, immI_nonneg_mask mask) %{ ++ match(Set dst (AndI src1 mask)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int size = Assembler::is_int_mask($mask$$constant); ++ ++ __ bstrpick_w(dst, src, size-1, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1, immL_nonneg_mask mask) %{ ++ match(Set dst (AndL src1 mask)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int size = Assembler::is_jlong_mask($mask$$constant); ++ ++ __ bstrpick_d(dst, src, size-1, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorI_Reg_imm_0_4095(mRegI dst, mRegI src1, immI_0_4095 src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ins_cost(60); ++ ++ format %{ "xori $dst, $src1, $src2 #@xorI_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ xori(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorI_Reg_immI_M1(mRegI dst, mRegI src1, immI_M1 M1) %{ ++ match(Set dst (XorI src1 M1)); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorI_Reg_immI_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ orn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorL2I_Reg_immI_M1(mRegI dst, mRegL src1, immI_M1 M1) %{ ++ match(Set dst (XorI (ConvL2I src1) M1)); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorL2I_Reg_immI_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ orn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorL_Reg_imm_0_4095(mRegL dst, mRegL src1, immL_0_4095 src2) %{ ++ match(Set dst (XorL src1 src2)); ++ ins_cost(60); ++ ++ format %{ "xori $dst, $src1, $src2 #@xorL_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ xori(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct lbu_and_lmask(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI mask (LoadB mem))); ++ ins_cost(60); ++ ++ format %{ "lhu $dst, $mem #@lbu_and_lmask" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct lbu_and_rmask(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI (LoadB mem) mask)); ++ ins_cost(60); ++ ++ format %{ "lhu $dst, $mem #@lbu_and_rmask" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct andI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ andr(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andnI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (AndI src1 (XorI src2 M1))); ++ ++ format %{ "andn $dst, $src1, $src2 #@andnI_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ andn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct ornI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (OrI src1 (XorI src2 M1))); ++ ++ format %{ "orn $dst, $src1, $src2 #@ornI_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ orn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andnI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (AndI (XorI src1 M1) src2)); ++ ++ format %{ "andn $dst, $src2, $src1 #@andnI_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ andn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct ornI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (OrI (XorI src1 M1) src2)); ++ ++ format %{ "orn $dst, $src2, $src1 #@ornI_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ orn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// And Long Register with Register ++instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegLorI2L src2) %{ ++ match(Set dst (AndL src1 src2)); ++ format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ andr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct andL_Reg_imm_0_4095(mRegL dst, mRegL src1, immL_0_4095 src2) %{ ++ match(Set dst (AndL src1 src2)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andL_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ long val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL2I_Reg_imm_0_4095(mRegI dst, mRegL src1, immL_0_4095 src2) %{ ++ match(Set dst (ConvL2I (AndL src1 src2))); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andL2I_Reg_imm_0_4095" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ long val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct andL_Reg_immL_M8(mRegL dst, immL_M8 M8) %{ ++ match(Set dst (AndL dst M8)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M8 #@andL_Reg_immL_M8" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 2, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M5(mRegL dst, immL_M5 M5) %{ ++ match(Set dst (AndL dst M5)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M5 #@andL_Reg_immL_M5" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 2, 2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M7(mRegL dst, immL_M7 M7) %{ ++ match(Set dst (AndL dst M7)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M7 #@andL_Reg_immL_M7" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 2, 1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M4(mRegL dst, immL_M4 M4) %{ ++ match(Set dst (AndL dst M4)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M4 #@andL_Reg_immL_M4" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 1, 0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M121(mRegL dst, immL_M121 M121) %{ ++ match(Set dst (AndL dst M121)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M121 #@andL_Reg_immL_M121" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ bstrins_d(dst, R0, 6, 3); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Long Register with Register ++instruct orL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ ++ match(Set dst (OrL src1 src2)); ++ format %{ "OR $dst, $src1, $src2 @ orL_Reg_Reg\t" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ Register src1_reg = $src1$$Register; ++ Register src2_reg = $src2$$Register; ++ ++ __ orr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegLorI2L src2) %{ ++ match(Set dst (OrL (CastP2X src1) src2)); ++ format %{ "OR $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ Register src1_reg = $src1$$Register; ++ Register src2_reg = $src2$$Register; ++ ++ __ orr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Xor Long Register with Register ++instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (XorL src1 src2)); ++ format %{ "XOR $dst, $src1, $src2 @ xorL_Reg_Reg\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ xorr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Left by 5-bit immediate ++instruct salI_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{ ++ match(Set dst (LShiftI src shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ slli_w(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct salL2I_Reg_imm(mRegI dst, mRegL src, immIU5 shift) %{ ++ match(Set dst (LShiftI (ConvL2I src) shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salL2I_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ slli_w(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{ ++ match(Set dst (AndI (LShiftI src shift) mask)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_imm_and_M65536" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ slli_w(dst, src, 16); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen) ++%{ ++ match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen)); ++ ++ format %{ "andi $dst, $src, 7\t# @land7_2_s" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ andi(dst, src, 7); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. ++// This idiom is used by the compiler the i2s bytecode. ++instruct i2s(mRegI dst, mRegI src, immI_16 sixteen) ++%{ ++ match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); ++ ++ format %{ "i2s $dst, $src\t# @i2s" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ ext_w_h(dst, src); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. ++// This idiom is used by the compiler for the i2b bytecode. ++instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour) ++%{ ++ match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); ++ ++ format %{ "i2b $dst, $src\t# @i2b" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ ext_w_b(dst, src); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++ ++instruct salI_RegL2I_imm(mRegI dst, mRegL src, immIU5 shift) %{ ++ match(Set dst (LShiftI (ConvL2I src) shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_RegL2I_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ slli_w(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Shift Left by 8-bit immediate ++instruct salI_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (LShiftI src shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shamt = $shift$$Register; ++ __ sll_w(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++// Shift Left Long 6-bit immI ++instruct salL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{ ++ match(Set dst (LShiftL src shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_Reg_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ slli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Left Long ++instruct salL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{ ++ match(Set dst (LShiftL src shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ sll_d(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long 6-bit ++instruct sarL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{ ++ match(Set dst (RShiftL src shift)); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL_Reg_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srai_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{ ++ match(Set dst (ConvL2I (RShiftL src shift))); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srai_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long arithmetically ++instruct sarL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{ ++ match(Set dst (RShiftL src shift)); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ sra_d(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long logically ++instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(100); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ srl_d(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegLorI2L src, immI_0_31 shift, immI_MaxI max_int) %{ ++ match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int)); ++ ins_cost(80); ++ format %{ "bstrpick_d $dst, $src, $shift+30, shift @ slrL_Reg_immI_0_31_and_max_int" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ bstrpick_d(dst_reg, src_reg, shamt+30, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{ ++ match(Set dst (URShiftL (CastP2X src) shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_convL2I(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{ ++ match(Set dst (ConvL2I (URShiftL src shift))); ++ predicate(n->in(1)->in(2)->get_int() > 32); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_convL2I" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{ ++ match(Set dst (URShiftL (CastP2X src) shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ srli_d(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Xor Instructions ++// Xor Register with Register ++instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ++ format %{ "XOR $dst, $src1, $src2 #@xorI_Reg_Reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ xorr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Instructions ++instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_4095 src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_imm" %} ++ ins_encode %{ ++ __ ori($dst$$Register, $src1$$Register, $src2$$constant); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Register with Register ++instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ orr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{ ++ match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift))); ++ predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()))); ++ ++ format %{ "rotri_w $dst, $src, 1 ...\n\t" ++ "srli_w $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int rshift = $rshift$$constant; ++ ++ __ rotri_w(dst, src, 1); ++ if (rshift - 1) { ++ __ srli_w(dst, dst, rshift - 1); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{ ++ match(Set dst (OrI src1 (CastP2X src2))); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_castP2X" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ orr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Logical Shift Right by 5-bit immediate ++instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{ ++ match(Set dst (URShiftI src shift)); ++ //effect(KILL cr); ++ ++ format %{ "SRLI_W $dst, $src, $shift #@shr_logical_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shift = $shift$$constant; ++ ++ __ srli_w(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{ ++ match(Set dst (AndI (URShiftI src shift) mask)); ++ ++ format %{ "bstrpick_w $dst, $src, $shift+one-bits($mask)-1, shift #@shr_logical_Reg_imm_nonneg_mask" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int pos = $shift$$constant; ++ int size = Assembler::is_int_mask($mask$$constant); ++ ++ __ bstrpick_w(dst, src, pos+size-1, pos); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 lshift, immI_0_31 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); ++ match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_w $dst, $src, $rshift #@rolI_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_w(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_32_63 lshift, immI_0_31 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_d $dst, $src, $rshift #@rolL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_d(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_0_31 lshift, immI_32_63 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_d $dst, $src, $rshift #@rolL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_d(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); ++ match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_w $dst, $src, $rshift #@rorI_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_w(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 rshift, immI_32_63 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_d $dst, $src, $rshift #@rorL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_d(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 rshift, immI_0_31 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotri_d $dst, $src, $rshift #@rorL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotri_d(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Logical Shift Right ++instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (URShiftI src shift)); ++ ++ format %{ "SRL_W $dst, $src, $shift #@shr_logical_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shift = $shift$$Register; ++ __ srl_w(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{ ++ match(Set dst (RShiftI src shift)); ++ // effect(KILL cr); ++ ++ format %{ "SRAI_W $dst, $src, $shift #@shr_arith_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shift = $shift$$constant; ++ __ srai_w(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (RShiftI src shift)); ++ // effect(KILL cr); ++ ++ format %{ "SRA_W $dst, $src, $shift #@shr_arith_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shift = $shift$$Register; ++ __ sra_w(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++//----------Convert Int to Boolean--------------------------------------------- ++ ++instruct convI2B(mRegI dst, mRegI src) %{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(100); ++ format %{ "convI2B $dst, $src @ convI2B" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if (dst != src) { ++ __ addi_d(dst, R0, 1); ++ __ maskeqz(dst, dst, src); ++ } else { ++ __ move(AT, src); ++ __ addi_d(dst, R0, 1); ++ __ maskeqz(dst, dst, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct convI2L_reg( mRegL dst, mRegI src) %{ ++ match(Set dst (ConvI2L src)); ++ ++ ins_cost(100); ++ format %{ "SLLI_W $dst, $src @ convI2L_reg\t" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if(dst != src) __ slli_w(dst, src, 0); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct convL2I_reg( mRegI dst, mRegLorI2L src ) %{ ++ match(Set dst (ConvL2I src)); ++ ++ format %{ "MOV $dst, $src @ convL2I_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ slli_w(dst, src, 0); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct convL2D_reg( regD dst, mRegL src ) %{ ++ match(Set dst (ConvL2D src)); ++ format %{ "convL2D $dst, $src @ convL2D_reg" %} ++ ins_encode %{ ++ Register src = as_Register($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ movgr2fr_d(dst, src); ++ __ ffint_d_l(dst, dst); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++// Convert double to int. ++// If the double is NaN, stuff a zero in instead. ++instruct convD2I_reg_reg(mRegI dst, regD src, regD tmp) %{ ++ match(Set dst (ConvD2I src)); ++ effect(USE src, TEMP tmp); ++ ++ format %{ "convd2i $dst, $src, using $tmp as TEMP @ convD2I_reg_reg" %} ++ ++ ins_encode %{ ++ __ ftintrz_w_d($tmp$$FloatRegister, $src$$FloatRegister); ++ __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convD2L_reg_reg(mRegL dst, regD src, regD tmp) %{ ++ match(Set dst (ConvD2L src)); ++ effect(USE src, TEMP tmp); ++ ++ format %{ "convd2l $dst, $src, using $tmp as TEMP @ convD2L_reg_reg" %} ++ ++ ins_encode %{ ++ __ ftintrz_l_d($tmp$$FloatRegister, $src$$FloatRegister); ++ __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// Convert float to int. ++// If the float is NaN, stuff a zero in instead. ++instruct convF2I_reg_reg(mRegI dst, regF src, regF tmp) %{ ++ match(Set dst (ConvF2I src)); ++ effect(USE src, TEMP tmp); ++ ++ format %{ "convf2i $dst, $src, using $tmp as TEMP @ convF2I_reg_reg" %} ++ ++ ins_encode %{ ++ __ ftintrz_w_s($tmp$$FloatRegister, $src$$FloatRegister); ++ __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convF2L_reg_reg(mRegL dst, regF src, regF tmp) %{ ++ match(Set dst (ConvF2L src)); ++ effect(USE src, TEMP tmp); ++ ++ format %{ "convf2l $dst, $src, using $tmp as TEMP @ convF2L_reg_reg" %} ++ ++ ins_encode %{ ++ __ ftintrz_l_s($tmp$$FloatRegister, $src$$FloatRegister); ++ __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convL2F_reg( regF dst, mRegL src ) %{ ++ match(Set dst (ConvL2F src)); ++ format %{ "convl2f $dst, $src @ convL2F_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ Register src = as_Register($src$$reg); ++ Label L; ++ ++ __ movgr2fr_d(dst, src); ++ __ ffint_s_l(dst, dst); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convI2F_reg( regF dst, mRegI src ) %{ ++ match(Set dst (ConvI2F src)); ++ format %{ "convi2f $dst, $src @ convI2F_reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ movgr2fr_w(dst, src); ++ __ ffint_s_w(dst, dst); ++ %} ++ ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{ ++ match(Set dst (CmpLTMask p zero)); ++ ins_cost(100); ++ ++ format %{ "srai_w $dst, $p, 31 @ cmpLTMask_immI_0" %} ++ ins_encode %{ ++ Register src = $p$$Register; ++ Register dst = $dst$$Register; ++ ++ __ srai_w(dst, src, 31); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{ ++ match(Set dst (CmpLTMask p q)); ++ ins_cost(400); ++ ++ format %{ "cmpLTMask $dst, $p, $q @ cmpLTMask" %} ++ ins_encode %{ ++ Register p = $p$$Register; ++ Register q = $q$$Register; ++ Register dst = $dst$$Register; ++ ++ __ slt(dst, p, q); ++ __ sub_d(dst, R0, dst); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convP2B(mRegI dst, mRegP src) %{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(100); ++ format %{ "convP2B $dst, $src @ convP2B" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if (dst != src) { ++ __ addi_d(dst, R0, 1); ++ __ maskeqz(dst, dst, src); ++ } else { ++ __ move(AT, src); ++ __ addi_d(dst, R0, 1); ++ __ maskeqz(dst, dst, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++ ++instruct convI2D_reg_reg(regD dst, mRegI src) %{ ++ match(Set dst (ConvI2D src)); ++ format %{ "conI2D $dst, $src @convI2D_reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ FloatRegister dst = $dst$$FloatRegister; ++ __ movgr2fr_w(dst ,src); ++ __ ffint_d_w(dst, dst); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct convF2D_reg_reg(regD dst, regF src) %{ ++ match(Set dst (ConvF2D src)); ++ format %{ "convF2D $dst, $src\t# @convF2D_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ ++ __ fcvt_d_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct convD2F_reg_reg(regF dst, regD src) %{ ++ match(Set dst (ConvD2F src)); ++ format %{ "convD2F $dst, $src\t# @convD2F_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ ++ __ fcvt_s_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++// Convert oop pointer into compressed form ++instruct encodeHeapOop(mRegN dst, mRegP src) %{ ++ predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); ++ match(Set dst (EncodeP src)); ++ format %{ "encode_heap_oop $dst,$src" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ encode_heap_oop(dst, src); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{ ++ predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull); ++ match(Set dst (EncodeP src)); ++ format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %} ++ ins_encode %{ ++ __ encode_heap_oop_not_null($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeHeapOop(mRegP dst, mRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && ++ n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ ++ __ decode_heap_oop(d, s); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || ++ n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ if (s != d) { ++ __ decode_heap_oop_not_null(d, s); ++ } else { ++ __ decode_heap_oop_not_null(d); ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct encodeKlass_not_null(mRegN dst, mRegP src) %{ ++ match(Set dst (EncodePKlass src)); ++ format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %} ++ ins_encode %{ ++ __ encode_klass_not_null($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeKlass_not_null(mRegP dst, mRegN src) %{ ++ match(Set dst (DecodeNKlass src)); ++ format %{ "decode_heap_klass_not_null $dst,$src" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ if (s != d) { ++ __ decode_klass_not_null(d, s); ++ } else { ++ __ decode_klass_not_null(d); ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++//FIXME ++instruct tlsLoadP(mRegP dst) %{ ++ match(Set dst (ThreadLocal)); ++ ++ ins_cost(0); ++ format %{ " get_thread in $dst #@tlsLoadP" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++#ifdef OPT_THREAD ++ __ move(dst, TREG); ++#else ++ __ get_thread(dst); ++#endif ++ %} ++ ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct checkCastPP( mRegP dst ) %{ ++ match(Set dst (CheckCastPP dst)); ++ ++ format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %} ++ ins_encode( /*empty encoding*/ ); ++ ins_pipe( empty ); ++%} ++ ++instruct castPP(mRegP dst) ++%{ ++ match(Set dst (CastPP dst)); ++ ++ size(0); ++ format %{ "# castPP of $dst" %} ++ ins_encode(/* empty encoding */); ++ ins_pipe(empty); ++%} ++ ++instruct castII( mRegI dst ) %{ ++ match(Set dst (CastII dst)); ++ format %{ "#castII of $dst empty encoding" %} ++ ins_encode( /*empty encoding*/ ); ++ ins_cost(0); ++ ins_pipe( empty ); ++%} ++ ++// Return Instruction ++// Remove the return address & jump to it. ++instruct Ret() %{ ++ match(Return); ++ format %{ "RET #@Ret" %} ++ ++ ins_encode %{ ++ __ jr(RA); ++ %} ++ ++ ins_pipe( pipe_jump ); ++%} ++ ++ ++ ++// Tail Jump; remove the return address; jump to target. ++// TailCall above leaves the return address around. ++// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2). ++// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a ++// "restore" before this instruction (in Epilogue), we need to materialize it ++// in %i0. ++//FIXME ++instruct tailjmpInd(no_Ax_mRegP jump_target, mRegP ex_oop) %{ ++ match( TailJump jump_target ex_oop ); ++ ins_cost(200); ++ format %{ "Jmp $jump_target ; ex_oop = $ex_oop #@tailjmpInd" %} ++ ins_encode %{ ++ Register target = $jump_target$$Register; ++ ++ // V0, V1 are indicated in: ++ // [stubGenerator_loongarch.cpp] generate_forward_exception() ++ // [runtime_loongarch.cpp] OptoRuntime::generate_exception_blob() ++ // ++ Register oop = $ex_oop$$Register; ++ Register exception_oop = V0; ++ Register exception_pc = V1; ++ ++ __ move(exception_pc, RA); ++ __ move(exception_oop, oop); ++ ++ __ jr(target); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++// ============================================================================ ++// Procedure Call/Return Instructions ++// Call Java Static Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallStaticJavaDirect(method meth) %{ ++ match(CallStaticJava); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL,static #@CallStaticJavaDirect " %} ++ ins_encode( Java_Static_Call( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(4); ++%} ++ ++// Call Java Dynamic Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallDynamicJavaDirect(method meth) %{ ++ match(CallDynamicJava); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t" ++ "CallDynamic @ CallDynamicJavaDirect" %} ++ ins_encode( Java_Dynamic_Call( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(4); ++%} ++ ++instruct CallLeafNoFPDirect(method meth) %{ ++ match(CallLeafNoFP); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL_LEAF_NOFP,runtime " %} ++ ins_encode(Java_To_Runtime(meth)); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(4); ++%} ++ ++// Prefetch instructions. ++ ++instruct prefetchr( memory mem ) %{ ++ match(PrefetchRead mem); ++ ins_cost(125); ++ ++ format %{ "pref $mem\t# Prefetch into temporal cache for read @ prefetchr" %} ++ ins_encode %{ ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if( index != 0 ) { ++ if (scale == 0) { ++ __ add_d(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ alsl_d(AT, as_Register(index), as_Register(base), scale - 1); ++ } ++ } else { ++ __ move(AT, as_Register(base)); ++ } ++ if( Assembler::is_simm(disp, 12) ) { ++ __ addi_d(AT, AT, disp); ++ } else { ++ __ li(T4, disp); ++ __ add_d(AT, AT, T4); ++ } ++ __ preld(0, AT, 0); //hint: 0:load ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct prefetchw( memory mem ) %{ ++ match(PrefetchWrite mem); ++ ins_cost(125); ++ format %{ "pref $mem\t# Prefetch to temporal cache for write @ prefetchw" %} ++ ins_encode %{ ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if( index != 0 ) { ++ if (scale == 0) { ++ __ add_d(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ alsl_d(AT, as_Register(index), as_Register(base), scale - 1); ++ } ++ } else { ++ __ move(AT, as_Register(base)); ++ } ++ if( Assembler::is_simm(disp, 12) ) { ++ __ addi_d(AT, AT, disp); ++ } else { ++ __ li(T4, disp); ++ __ add_d(AT, AT, T4); ++ } ++ __ preld(8, AT, 0); //hint: 8:store ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// Prefetch instructions for allocation. ++ ++instruct prefetchAlloc(memory mem) %{ ++ match(PrefetchAllocation mem); ++ ins_cost(125); ++ format %{ "preld $mem\t# Prefetch allocation @ prefetchAlloc" %} ++ ins_encode %{ ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if (index != 0) { ++ if (scale == 0) { ++ __ add_d(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ alsl_d(AT, as_Register(index), as_Register(base), scale - 1); ++ } ++ ++ if (Assembler::is_simm(disp, 12)) { ++ __ preld(8, AT, disp); ++ } else { ++ __ li(T4, disp); ++ __ add_d(AT, AT, T4); ++ __ preld(8, AT, 0); ++ } ++ } else { ++ if (Assembler::is_simm(disp, 12)) { ++ __ preld(8, as_Register(base), disp); ++ } else { ++ __ li(T4, disp); ++ __ add_d(AT, as_Register(base), T4); ++ __ preld(8, AT, 0); ++ } ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++ ++// Call runtime without safepoint ++instruct CallLeafDirect(method meth) %{ ++ match(CallLeaf); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL_LEAF,runtime #@CallLeafDirect " %} ++ ins_encode(Java_To_Runtime(meth)); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(4); ++%} ++ ++// Load Char (16bit unsigned) ++instruct loadUS(mRegI dst, memory mem) %{ ++ match(Set dst (LoadUS mem)); ++ ++ ins_cost(125); ++ format %{ "loadUS $dst,$mem @ loadC" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadUS_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadUS mem))); ++ ++ ins_cost(125); ++ format %{ "loadUS $dst,$mem @ loadUS_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Store Char (16bit unsigned) ++instruct storeC(memory mem, mRegI src) %{ ++ match(Set mem (StoreC mem src)); ++ ++ ins_cost(125); ++ format %{ "storeC $src, $mem @ storeC" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_CHAR); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct storeC_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreC mem zero)); ++ ++ ins_cost(125); ++ format %{ "storeC $zero, $mem @ storeC_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct loadConF_immF_0(regF dst, immF_0 zero) %{ ++ match(Set dst zero); ++ ins_cost(100); ++ ++ format %{ "mov $dst, zero @ loadConF_immF_0\n"%} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ movgr2fr_w(dst, R0); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++ ++instruct loadConF(regF dst, immF src) %{ ++ match(Set dst src); ++ ins_cost(125); ++ ++ format %{ "fld_s $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %} ++ ins_encode %{ ++ int con_offset = $constantoffset($src); ++ ++ if (Assembler::is_simm(con_offset, 12)) { ++ __ fld_s($dst$$FloatRegister, $constanttablebase, con_offset); ++ } else { ++ __ li(AT, con_offset); ++ __ fldx_s($dst$$FloatRegister, $constanttablebase, AT); ++ } ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++ ++instruct loadConD_immD_0(regD dst, immD_0 zero) %{ ++ match(Set dst zero); ++ ins_cost(100); ++ ++ format %{ "mov $dst, zero @ loadConD_immD_0"%} ++ ins_encode %{ ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ movgr2fr_d(dst, R0); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++instruct loadConD(regD dst, immD src) %{ ++ match(Set dst src); ++ ins_cost(125); ++ ++ format %{ "fld_d $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %} ++ ins_encode %{ ++ int con_offset = $constantoffset($src); ++ ++ if (Assembler::is_simm(con_offset, 12)) { ++ __ fld_d($dst$$FloatRegister, $constanttablebase, con_offset); ++ } else { ++ __ li(AT, con_offset); ++ __ fldx_d($dst$$FloatRegister, $constanttablebase, AT); ++ } ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++// Store register Float value (it is faster than store from FPU register) ++instruct storeF_reg( memory mem, regF src) %{ ++ match(Set mem (StoreF mem src)); ++ ++ ins_cost(50); ++ format %{ "store $mem, $src\t# store float @ storeF_reg" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_FLOAT); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct storeF_immF_0( memory mem, immF_0 zero) %{ ++ match(Set mem (StoreF mem zero)); ++ ++ ins_cost(40); ++ format %{ "store $mem, zero\t# store float @ storeF_immF_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Double ++instruct loadD(regD dst, memory mem) %{ ++ match(Set dst (LoadD mem)); ++ ++ ins_cost(150); ++ format %{ "loadD $dst, $mem #@loadD" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Double - UNaligned ++instruct loadD_unaligned(regD dst, memory mem ) %{ ++ match(Set dst (LoadD_unaligned mem)); ++ ins_cost(250); ++ // FIXME: Need more effective ldl/ldr ++ format %{ "loadD_unaligned $dst, $mem #@loadD_unaligned" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct storeD_reg( memory mem, regD src) %{ ++ match(Set mem (StoreD mem src)); ++ ++ ins_cost(50); ++ format %{ "store $mem, $src\t# store float @ storeD_reg" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct storeD_immD_0( memory mem, immD_0 zero) %{ ++ match(Set mem (StoreD mem zero)); ++ ++ ins_cost(40); ++ format %{ "store $mem, zero\t# store float @ storeD_immD_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct loadSSI(mRegI dst, stackSlotI src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld_w $dst, $src\t# int stk @ loadSSI" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSI) !"); ++ __ ld_w($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSI(stackSlotI dst, mRegI src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "st_w $dst, $src\t# int stk @ storeSSI" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSI) !"); ++ __ st_w($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSL(mRegL dst, stackSlotL src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld_d $dst, $src\t# long stk @ loadSSL" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSL) !"); ++ __ ld_d($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSL(stackSlotL dst, mRegL src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "st_d $dst, $src\t# long stk @ storeSSL" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSL) !"); ++ __ st_d($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSP(mRegP dst, stackSlotP src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld_d $dst, $src\t# ptr stk @ loadSSP" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSP) !"); ++ __ ld_d($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSP(stackSlotP dst, mRegP src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sd $dst, $src\t# ptr stk @ storeSSP" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSP) !"); ++ __ st_d($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSF(regF dst, stackSlotF src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "fld_s $dst, $src\t# float stk @ loadSSF" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSF) !"); ++ __ fld_s($dst$$FloatRegister, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSF(stackSlotF dst, regF src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "fst_s $dst, $src\t# float stk @ storeSSF" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSF) !"); ++ __ fst_s($src$$FloatRegister, SP, $dst$$disp); ++ %} ++ ins_pipe(fpu_storeF); ++%} ++ ++// Use the same format since predicate() can not be used here. ++instruct loadSSD(regD dst, stackSlotD src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "fld_d $dst, $src\t# double stk @ loadSSD" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSD) !"); ++ __ fld_d($dst$$FloatRegister, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSD(stackSlotD dst, regD src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sdc1 $dst, $src\t# double stk @ storeSSD" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSD) !"); ++ __ fst_d($src$$FloatRegister, SP, $dst$$disp); ++ %} ++ ins_pipe(fpu_storeF); ++%} ++ ++instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ ++ match(Set cr (FastLock object box)); ++ effect(TEMP tmp, TEMP scr); ++ ins_cost(300); ++ format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %} ++ ins_encode %{ ++ __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++%} ++ ++instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ ++ match(Set cr (FastUnlock object box)); ++ effect(TEMP tmp, TEMP scr); ++ ins_cost(300); ++ format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %} ++ ins_encode %{ ++ __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++%} ++ ++// Store CMS card-mark Immediate 0 ++instruct storeImmCM_order(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreCM mem zero)); ++ predicate(UseConcMarkSweepGC && !UseCondCardMark); ++ ins_cost(100); ++ format %{ "StoreCM MEMBAR storestore\n\t" ++ "st_b $mem, zero\t! card-mark imm0" %} ++ ins_encode %{ ++ __ membar(__ StoreStore); ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeImmCM(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreCM mem zero)); ++ ++ ins_cost(150); ++ format %{ "st_b $mem, zero\t! card-mark imm0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Die now ++instruct ShouldNotReachHere( ) ++%{ ++ match(Halt); ++ ins_cost(300); ++ ++ // Use the following format syntax ++ format %{ "ILLTRAP ;#@ShouldNotReachHere" %} ++ ins_encode %{ ++ // Here we should emit illtrap! ++ __ brk(18); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++instruct leaP12Narrow(mRegP dst, indOffset12Narrow mem) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ match(Set dst mem); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, $mem\t# ptr off12narrow @ leaP12Narrow" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = as_Register($mem$$base); ++ int disp = $mem$$disp; ++ ++ __ addi_d(dst, base, disp); ++ %} ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++instruct leaPIdxScale(mRegP dst, mRegP reg, mRegLorI2L lreg, immI_0_3 scale) ++%{ ++ match(Set dst (AddP reg (LShiftL lreg scale))); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, [$reg + $lreg << $scale]\t# @ leaPIdxScale" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = $reg$$Register; ++ Register index = $lreg$$Register; ++ int scale = $scale$$constant; ++ ++ if (scale == 0) { ++ __ add_d($dst$$Register, $reg$$Register, index); ++ } else { ++ __ alsl_d(dst, index, base, scale - 1); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++ ++// ============================================================================ ++// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass ++// array for an instance of the superklass. Set a hidden internal cache on a ++// hit (cache is checked with exposed code in gen_subtype_check()). Return ++// NZ for a miss or zero for a hit. The encoding ALSO sets flags. ++instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{ ++ match(Set result (PartialSubtypeCheck sub super)); ++ effect(KILL tmp); ++ ins_cost(1100); // slightly larger than the next version ++ format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %} ++ ++ ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) ); ++ ins_pipe( pipe_slow ); ++%} ++ ++// Conditional-store of the updated heap-top. ++// Used during allocation of the shared heap. ++ ++instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{ ++ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); ++ ++ format %{ "move AT, $newval\n\t" ++ "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t" ++ "move $cr, AT\n" %} ++ ins_encode%{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp); ++ ++ int index = $heap_top_ptr$$index; ++ int scale = $heap_top_ptr$$scale; ++ int disp = $heap_top_ptr$$disp; ++ ++ guarantee(Assembler::is_simm(disp, 12), ""); ++ ++ if (index != 0) { ++ __ stop("in storePConditional: index != 0"); ++ } else { ++ __ move(AT, newval); ++ __ sc_d(AT, addr); ++ __ move($cr$$Register, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++// Conditional-store of an int value. ++// AT flag is set on success, reset otherwise. ++instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{ ++ match(Set cr (StoreIConditional mem (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, $mem, $oldval \t# @storeIConditional" %} ++ ++ ins_encode %{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Register cr = $cr$$Register; ++ Address addr(as_Register($mem$$base), $mem$$disp); ++ ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ guarantee(Assembler::is_simm(disp, 12), ""); ++ ++ if (index != 0) { ++ __ stop("in storeIConditional: index != 0"); ++ } else { ++ if (cr != addr.base() && cr != oldval && cr != newval) { ++ __ cmpxchg32(addr, oldval, newval, cr, true, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, true, false, true); ++ __ move(cr, AT); ++ } ++ } ++ %} ++ ++ ins_pipe(long_memory_op); ++%} ++ ++// Conditional-store of a long value. ++// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG. ++instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr) ++%{ ++ match(Set cr (StoreLConditional mem (Binary oldval newval))); ++ ++ format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %} ++ ins_encode%{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Register cr = $cr$$Register; ++ Address addr(as_Register($mem$$base), $mem$$disp); ++ ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ guarantee(Assembler::is_simm(disp, 12), ""); ++ ++ if (index != 0) { ++ __ stop("in storeIConditional: index != 0"); ++ } else { ++ if (cr != addr.base() && cr != oldval && cr != newval) { ++ __ cmpxchg(addr, oldval, newval, cr, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(cr, AT); ++ } ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++// Implement LoadPLocked. Must be ordered against changes of the memory location ++// by storePConditional. ++instruct loadPLocked(mRegP dst, memory mem) %{ ++ match(Set dst (LoadPLocked mem)); ++ ins_cost(MEMORY_REF_COST); ++ ++ format %{ "ll_d $dst, $mem #@loadPLocked\n\t" %} ++ size(12); ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LINKED_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{ ++ match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, true, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, true, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{ ++ predicate(VM_Version::supports_cx8()); ++ match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{ ++ match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{ ++ match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, false, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, false, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++//----------Max and Min-------------------------------------------------------- ++ ++// Min Register with Register (generic version) ++instruct minI_Reg_Reg(mRegI dst, mRegI src) %{ ++ match(Set dst (MinI dst src)); ++ //effect(KILL flags); ++ ins_cost(80); ++ ++ format %{ "MIN $dst, $src @minI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ slt(AT, src, dst); ++ __ masknez(dst, dst, AT); ++ __ maskeqz(AT, src, AT); ++ __ OR(dst, dst, AT); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// Max Register with Register (generic version) ++instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{ ++ match(Set dst (MaxI dst src)); ++ ins_cost(80); ++ ++ format %{ "MAX $dst, $src @maxI_Reg_Reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ slt(AT, dst, src); ++ __ masknez(dst, dst, AT); ++ __ maskeqz(AT, src, AT); ++ __ OR(dst, dst, AT); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{ ++ match(Set dst (MaxI dst zero)); ++ ins_cost(50); ++ ++ format %{ "MAX $dst, 0 @maxI_Reg_zero" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ slt(AT, dst, R0); ++ __ masknez(dst, dst, AT); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL src mask)); ++ ++ format %{ "movl $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ bstrpick_d(dst, src, 31, 0); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32) ++%{ ++ match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32))); ++ ++ format %{ "combine_i2l $dst, $src2(H), $src1(L) @ combine_i2l" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ if (src1 == dst) { ++ __ bstrins_d(dst, src2, 63, 32); ++ } else if (src2 == dst) { ++ __ slli_d(dst, dst, 32); ++ __ bstrins_d(dst, src1, 31, 0); ++ } else { ++ __ bstrpick_d(dst, src1, 31, 0); ++ __ bstrins_d(dst, src2, 63, 32); ++ } ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Zero-extend convert int to long ++instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL (ConvI2L src) mask)); ++ ++ format %{ "movl $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ bstrpick_d(dst, src, 31, 0); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL (ConvI2L (ConvL2I src)) mask)); ++ ++ format %{ "movl $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ bstrpick_d(dst, src, 31, 0); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Match loading integer and casting it to unsigned int in long register. ++// LoadI + ConvI2L + AndL 0xffffffff. ++instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{ ++ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); ++ ++ format %{ "ld_wu $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{ ++ match(Set dst (AndL mask (ConvI2L (LoadI mem)))); ++ ++ format %{ "ld_wu $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++ ++// ============================================================================ ++// Safepoint Instruction ++instruct safePoint_poll_reg(mRegP poll) %{ ++ match(SafePoint poll); ++ predicate(false); ++ effect(USE poll); ++ ++ ins_cost(125); ++ format %{ "Safepoint @ [$poll] : poll for GC @ safePoint_poll_reg" %} ++ ++ ins_encode %{ ++ Register poll_reg = $poll$$Register; ++ ++ __ block_comment("Safepoint:"); ++ __ relocate(relocInfo::poll_type); ++ __ ld_w(AT, poll_reg, 0); ++ %} ++ ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct safePoint_poll() %{ ++ match(SafePoint); ++ ++ ins_cost(105); ++ format %{ "poll for GC @ safePoint_poll" %} ++ ++ ins_encode %{ ++ __ block_comment("Safepoint:"); ++ __ li(T4, (long)os::get_polling_page()); ++ __ relocate(relocInfo::poll_type); ++ __ ld_w(AT, T4, 0); ++ %} ++ ++ ins_pipe( ialu_storeI ); ++%} ++ ++//----------Arithmetic Conversion Instructions--------------------------------- ++ ++instruct roundFloat_nop(regF dst) ++%{ ++ match(Set dst (RoundFloat dst)); ++ ++ ins_cost(0); ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct roundDouble_nop(regD dst) ++%{ ++ match(Set dst (RoundDouble dst)); ++ ++ ins_cost(0); ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++//---------- Zeros Count Instructions ------------------------------------------ ++// CountLeadingZerosINode CountTrailingZerosINode ++instruct countLeadingZerosI(mRegI dst, mRegI src) %{ ++ match(Set dst (CountLeadingZerosI src)); ++ ++ format %{ "clz_w $dst, $src\t# count leading zeros (int)" %} ++ ins_encode %{ ++ __ clz_w($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countLeadingZerosL(mRegI dst, mRegL src) %{ ++ match(Set dst (CountLeadingZerosL src)); ++ ++ format %{ "clz_d $dst, $src\t# count leading zeros (long)" %} ++ ins_encode %{ ++ __ clz_d($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countTrailingZerosI(mRegI dst, mRegI src) %{ ++ match(Set dst (CountTrailingZerosI src)); ++ ++ format %{ "ctz_w $dst, $src\t# count trailing zeros (int)" %} ++ ins_encode %{ ++ __ ctz_w($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countTrailingZerosL(mRegI dst, mRegL src) %{ ++ match(Set dst (CountTrailingZerosL src)); ++ ++ format %{ "ctz_d $dst, $src\t# count trailing zeros (long)" %} ++ ins_encode %{ ++ __ ctz_d($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// ====================VECTOR INSTRUCTIONS===================================== ++ ++// --------------------------------- Load ------------------------------------- ++ ++instruct loadV16(vecX dst, memory mem) %{ ++ predicate(n->as_LoadVector()->memory_size() == 16); ++ match(Set dst (LoadVector mem)); ++ format %{ "vload $dst, $mem\t# @loadV16" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_VECTORX); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct loadV32(vecY dst, memory mem) %{ ++ predicate(n->as_LoadVector()->memory_size() == 32); ++ match(Set dst (LoadVector mem)); ++ format %{ "xvload $dst, $mem\t# @loadV32" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_VECTORY); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- Store ------------------------------------ ++ ++instruct storeV16(memory mem, vecX src) %{ ++ predicate(n->as_StoreVector()->memory_size() == 16); ++ match(Set mem (StoreVector mem src)); ++ format %{ "vstore $src, $mem\t# @storeV16" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_VECTORX); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct storeV32(memory mem, vecY src) %{ ++ predicate(n->as_StoreVector()->memory_size() == 32); ++ match(Set mem (StoreVector mem src)); ++ format %{ "xvstore $src, $mem\t# @storeV32" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_VECTORY); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ------------------------------- Replicate ---------------------------------- ++ ++instruct repl16B(vecX dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (ReplicateB src)); ++ format %{ "vreplgr2vr.b $dst, $src\t# @repl16B" %} ++ ins_encode %{ ++ __ vreplgr2vr_b($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl16B_imm(vecX dst, immI_M128_255 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (ReplicateB imm)); ++ format %{ "vldi $dst, $imm\t# @repl16B_imm" %} ++ ins_encode %{ ++ __ vldi($dst$$FloatRegister, ($imm$$constant & 0xff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8S(vecX dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateS src)); ++ format %{ "vreplgr2vr.h $dst, $src\t# @repl8S" %} ++ ins_encode %{ ++ __ vreplgr2vr_h($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8S_imm(vecX dst, immI10 imm) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateS imm)); ++ format %{ "vldi $dst, $imm\t# @repl8S_imm" %} ++ ins_encode %{ ++ __ vldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4I(vecX dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateI src)); ++ format %{ "vreplgr2vr.w $dst, $src\t# @repl4I" %} ++ ins_encode %{ ++ __ vreplgr2vr_w($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4I_imm(vecX dst, immI10 imm) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateI imm)); ++ format %{ "vldi $dst, $imm\t# @repl4I_imm" %} ++ ins_encode %{ ++ __ vldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl2L(vecX dst, mRegL src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateL src)); ++ format %{ "vreplgr2vr.d $dst, $src\t# @repl2L" %} ++ ins_encode %{ ++ __ vreplgr2vr_d($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl2L_imm(vecX dst, immL10 imm) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateL imm)); ++ format %{ "vldi $dst, $imm\t# @repl2L_imm" %} ++ ins_encode %{ ++ __ vldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4F(vecX dst, regF src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateF src)); ++ format %{ "vreplvei.w $dst, $src, 0\t# @repl4F" %} ++ ins_encode %{ ++ __ vreplvei_w($dst$$FloatRegister, $src$$FloatRegister, 0); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl2D(vecX dst, regD src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateD src)); ++ format %{ "vreplvei.d $dst, $src, 0\t# @repl2D" %} ++ ins_encode %{ ++ __ vreplvei_d($dst$$FloatRegister, $src$$FloatRegister, 0); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl32B(vecY dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (ReplicateB src)); ++ format %{ "xvreplgr2vr.b $dst, $src\t# @repl32B" %} ++ ins_encode %{ ++ __ xvreplgr2vr_b($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl32B_imm(vecY dst, immI_M128_255 imm) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (ReplicateB imm)); ++ format %{ "xvldi $dst, $imm\t# @repl32B_imm" %} ++ ins_encode %{ ++ __ xvldi($dst$$FloatRegister, ($imm$$constant & 0xff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl16S(vecY dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (ReplicateS src)); ++ format %{ "xvreplgr2vr.h $dst, $src\t# @repl16S" %} ++ ins_encode %{ ++ __ xvreplgr2vr_h($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl16S_imm(vecY dst, immI10 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (ReplicateS imm)); ++ format %{ "xvldi $dst, $imm\t# @repl16S_imm" %} ++ ins_encode %{ ++ __ xvldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8I(vecY dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateI src)); ++ format %{ "xvreplgr2vr.w $dst, $src\t# @repl8I" %} ++ ins_encode %{ ++ __ xvreplgr2vr_w($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8I_imm(vecY dst, immI10 imm) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateI imm)); ++ format %{ "xvldi $dst, $imm\t# @repl8I_imm" %} ++ ins_encode %{ ++ __ xvldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4L(vecY dst, mRegL src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateL src)); ++ format %{ "xvreplgr2vr.d $dst, $src\t# @repl4L" %} ++ ins_encode %{ ++ __ xvreplgr2vr_d($dst$$FloatRegister, $src$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4L_imm(vecY dst, immL10 imm) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateL imm)); ++ format %{ "xvldi $dst, $imm\t# @repl4L_imm" %} ++ ins_encode %{ ++ __ xvldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff)); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl8F(vecY dst, regF src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateF src)); ++ format %{ "xvreplve0.w $dst, $src\t# @repl8F" %} ++ ins_encode %{ ++ __ xvreplve0_w($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct repl4D(vecY dst, regD src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateD src)); ++ format %{ "xvreplve0.d $dst, $src\t# @repl4D" %} ++ ins_encode %{ ++ __ xvreplve0_d($dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- ADD -------------------------------------- ++ ++instruct add16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AddVB src1 src2)); ++ format %{ "vadd.b $dst, $src1, $src2\t# @add16B" %} ++ ins_encode %{ ++ __ vadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add16B_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AddVB src (ReplicateB imm))); ++ format %{ "vaddi.bu $dst, $src, $imm\t# @add16B_imm" %} ++ ins_encode %{ ++ __ vaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AddVS src1 src2)); ++ format %{ "vadd.h $dst, $src1, $src2\t# @add8S" %} ++ ins_encode %{ ++ __ vadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8S_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AddVS src (ReplicateS imm))); ++ format %{ "vaddi.hu $dst, $src, $imm\t# @add8S_imm" %} ++ ins_encode %{ ++ __ vaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVI src1 src2)); ++ format %{ "vadd.w $dst, $src1, src2\t# @add4I" %} ++ ins_encode %{ ++ __ vadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4I_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVI src (ReplicateI imm))); ++ format %{ "vaddi.wu $dst, $src, $imm\t# @add4I_imm" %} ++ ins_encode %{ ++ __ vaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVL src1 src2)); ++ format %{ "vadd.d $dst, $src1, $src2\t# @add2L" %} ++ ins_encode %{ ++ __ vadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add2L_imm(vecX dst, vecX src, immLU5 imm) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVL src (ReplicateL imm))); ++ format %{ "vaddi.du $dst, $src, $imm\t# @add2L_imm" %} ++ ins_encode %{ ++ __ vaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4F(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVF src1 src2)); ++ format %{ "vfadd.s $dst, $src1, $src2\t# @add4F" %} ++ ins_encode %{ ++ __ vfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add2D(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVD src1 src2)); ++ format %{ "vfadd.d $dst, $src1, $src2\t# @add2D" %} ++ ins_encode %{ ++ __ vfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (AddVB src1 src2)); ++ format %{ "xvadd.b $dst, $src1, $src2\t# @add32B" %} ++ ins_encode %{ ++ __ xvadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add32B_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (AddVB src (ReplicateB imm))); ++ format %{ "xvaddi.bu $dst, $src, $imm\t# @add32B_imm" %} ++ ins_encode %{ ++ __ xvaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AddVS src1 src2)); ++ format %{ "xvadd.h $dst, $src1, $src2\t# @add16S" %} ++ ins_encode %{ ++ __ xvadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add16S_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AddVS src (ReplicateS imm))); ++ format %{ "xvaddi.hu $dst, $src, $imm\t# @add16S_imm" %} ++ ins_encode %{ ++ __ xvaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AddVI src1 src2)); ++ format %{ "xvadd.wu $dst, $src1, $src2\t# @add8I" %} ++ ins_encode %{ ++ __ xvadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8I_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AddVI src (ReplicateI imm))); ++ format %{ "xvaddi.wu $dst, $src, $imm\t# @add8I_imm" %} ++ ins_encode %{ ++ __ xvaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVL src1 src2)); ++ format %{ "xvadd.d $dst, $src1, $src2\t# @add4L" %} ++ ins_encode %{ ++ __ xvadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4L_imm(vecY dst, vecY src, immLU5 imm) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVL src (ReplicateL imm))); ++ format %{ "xvaddi.du $dst, $src, $imm\t# @add4L_imm" %} ++ ins_encode %{ ++ __ xvaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add8F(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (AddVF src1 src2)); ++ format %{ "xvfadd.s $dst, $src1, $src2\t# @add8F" %} ++ ins_encode %{ ++ __ xvfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct add4D(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (AddVD src1 src2)); ++ format %{ "xvfadd.d $dst, $src1, $src2\t# @add4D" %} ++ ins_encode %{ ++ __ xvfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- SUB -------------------------------------- ++ ++instruct sub16B(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (SubVB src1 src2)); ++ format %{ "vsub.b $dst, $src1, $src2\t# @sub16B" %} ++ ins_encode %{ ++ __ vsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub16B_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (SubVB src (ReplicateB imm))); ++ format %{ "vsubi.bu $dst, $src, $imm\t# @sub16B_imm" %} ++ ins_encode %{ ++ __ vsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SubVS src1 src2)); ++ format %{ "vsub.h $dst, $src1, $src2\t# @sub8S" %} ++ ins_encode %{ ++ __ vsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8S_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SubVS src (ReplicateS imm))); ++ format %{ "vsubi.hu $dst, $src, $imm\t# @sub8S_imm" %} ++ ins_encode %{ ++ __ vsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVI src1 src2)); ++ format %{ "vsub.w $dst, $src1, src2\t# @sub4I" %} ++ ins_encode %{ ++ __ vsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4I_imm(vecX dst, vecX src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVI src (ReplicateI imm))); ++ format %{ "vsubi.wu $dst, $src, $imm\t# @sub4I_imm" %} ++ ins_encode %{ ++ __ vsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub2L(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (SubVL src1 src2)); ++ format %{ "vsub.d $dst, $src1, $src2\t# @sub2L" %} ++ ins_encode %{ ++ __ vsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub2L_imm(vecX dst, vecX src, immLU5 imm) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (SubVL src (ReplicateL imm))); ++ format %{ "vsubi.du $dst, $src, $imm\t# @sub2L_imm" %} ++ ins_encode %{ ++ __ vsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4F(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVF src1 src2)); ++ format %{ "vfsub.s $dst, $src1, $src2\t# @sub4F" %} ++ ins_encode %{ ++ __ vfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub2D(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (SubVD src1 src2)); ++ format %{ "vfsub.d $dst, $src1, $src2\t# @sub2D" %} ++ ins_encode %{ ++ __ vfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub32B(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (SubVB src1 src2)); ++ format %{ "xvsub.b $dst, $src1, $src2\t# @sub32B" %} ++ ins_encode %{ ++ __ xvsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub32B_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (SubVB src (ReplicateB imm))); ++ format %{ "xvsubi.bu $dst, $src, $imm\t# @sub32B_imm" %} ++ ins_encode %{ ++ __ xvsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (SubVS src1 src2)); ++ format %{ "xvsub.h $dst, $src1, $src2\t# @sub16S" %} ++ ins_encode %{ ++ __ xvsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub16S_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (SubVS src (ReplicateS imm))); ++ format %{ "xvsubi.hu $dst, $src, $imm\t# @sub16S_imm" %} ++ ins_encode %{ ++ __ xvsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SubVI src1 src2)); ++ format %{ "xvsub.w $dst, $src1, $src2\t# @sub8I" %} ++ ins_encode %{ ++ __ xvsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8I_imm(vecY dst, vecY src, immIU5 imm) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SubVI src (ReplicateI imm))); ++ format %{ "xvsubi.wu $dst, $src, $imm\t# @sub8I_imm" %} ++ ins_encode %{ ++ __ xvsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4L(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVL src1 src2)); ++ format %{ "xvsub.d $dst, $src1, $src2\t# @sub4L" %} ++ ins_encode %{ ++ __ xvsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4L_imm(vecY dst, vecY src, immLU5 imm) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVL src (ReplicateL imm))); ++ format %{ "xvsubi.du $dst, $src, $imm\t# @sub4L_imm" %} ++ ins_encode %{ ++ __ xvsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub8F(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (SubVF src1 src2)); ++ format %{ "xvfsub.s $dst, $src1, $src2\t# @sub8F" %} ++ ins_encode %{ ++ __ xvfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sub4D(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (SubVD src1 src2)); ++ format %{ "xvfsub.d $dst,$src1,$src2\t# @sub4D" %} ++ ins_encode %{ ++ __ xvfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- MUL -------------------------------------- ++instruct mul8S(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (MulVS src1 src2)); ++ format %{ "vmul.h $dst, $src1, $src2\t# @mul8S" %} ++ ins_encode %{ ++ __ vmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul4I(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (MulVI src1 src2)); ++ format %{ "vmul.w $dst, $src1, $src2\t# @mul4I" %} ++ ins_encode %{ ++ __ vmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul4F(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (MulVF src1 src2)); ++ format %{ "vfmul.s $dst, $src1, $src2\t# @mul4F" %} ++ ins_encode %{ ++ __ vfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul2D(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (MulVD src1 src2)); ++ format %{ "vfmul.d $dst, $src1, $src2\t# @mul2D" %} ++ ins_encode %{ ++ __ vfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul16S(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (MulVS src1 src2)); ++ format %{ "xvmul.h $dst, $src1, $src2\t# @mul16S" %} ++ ins_encode %{ ++ __ xvmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul8I(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (MulVI src1 src2)); ++ format %{ "xvmul.w $dst, $src1, $src2\t# @mul8I" %} ++ ins_encode %{ ++ __ xvmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul8F(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (MulVF src1 src2)); ++ format %{ "xvfmul.s $dst, $src1, $src2\t# @mul8F" %} ++ ins_encode %{ ++ __ xvfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mul4D(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (MulVD src1 src2)); ++ format %{ "xvfmul.d $dst, $src1, $src2\t# @mul4D" %} ++ ins_encode %{ ++ __ xvfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- DIV -------------------------------------- ++instruct div4F(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (DivVF src1 src2)); ++ format %{ "vfdiv.s $dst, $src1, $src2\t# @div4F" %} ++ ins_encode %{ ++ __ vfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct div2D(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (DivVD src1 src2)); ++ format %{ "vfdiv.d $dst, $src1, $src2\t# @div2D" %} ++ ins_encode %{ ++ __ vfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct div8F(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (DivVF src1 src2)); ++ format %{ "xvfdiv.s $dst, $src1, $src2\t# @div8F" %} ++ ins_encode %{ ++ __ xvfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct div4D(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (DivVD src1 src2)); ++ format %{ "xvfdiv.d $dst, $src1, $src2\t# @div4D" %} ++ ins_encode %{ ++ __ xvfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ------------------------------ Shift --------------------------------------- ++ ++instruct shiftcntX(vecX dst, mRegI cnt) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "vreplgr2vr.b $dst, $cnt\t# @shiftcntX" %} ++ ins_encode %{ ++ __ vreplgr2vr_b($dst$$FloatRegister, $cnt$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct shiftcntY(vecY dst, mRegI cnt) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "xvreplgr2vr.b $dst, $cnt\t# @shiftcntY" %} ++ ins_encode %{ ++ __ xvreplgr2vr_b($dst$$FloatRegister, $cnt$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ------------------------------ LeftShift ----------------------------------- ++ ++instruct sll16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (LShiftVB src shift)); ++ effect(TEMP dst, TEMP tmp); ++ format %{ "vsll $dst, $src, $shift\t# TEMP($tmp) @sll16B" %} ++ ins_encode %{ ++ __ vsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll16B_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (LShiftVB src shift)); ++ format %{ "vslli.b $dst, $src, $shift\t# @sll16B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ vslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (LShiftVS src shift)); ++ effect(TEMP dst, TEMP tmp); ++ format %{ "vsll $dst, $src, $shift\t# TEMP($tmp) @sll8S" %} ++ ins_encode %{ ++ __ vsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll8S_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (LShiftVS src shift)); ++ format %{ "vslli.h $dst, $src, $shift\t# @sll8S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ vslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll4I(vecX dst, vecX src, vecX shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (LShiftVI src shift)); ++ format %{ "vsll.w $dst, $src, $shift\t# @sll4I" %} ++ ins_encode %{ ++ __ vsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll4I_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (LShiftVI src shift)); ++ format %{ "vslli.w $dst, $src, $shift\t# @sll4I_imm" %} ++ ins_encode %{ ++ __ vslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll2L(vecX dst, vecX src, vecX shift) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (LShiftVL src shift)); ++ format %{ "vsll.d $dst, $src, $shift\t# @sll2L" %} ++ ins_encode %{ ++ __ vsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll2L_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (LShiftVL src shift)); ++ format %{ "vslli.d $dst, $src, $shift\t# @sll2L_imm" %} ++ ins_encode %{ ++ __ vslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll32B(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (LShiftVB src shift)); ++ effect(TEMP dst, TEMP tmp); ++ format %{ "xvsll $dst, $src, $shift\t# TEMP($tmp) @sll32B" %} ++ ins_encode %{ ++ __ xvsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll32B_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (LShiftVB src shift)); ++ format %{ "xvslli.b $dst, $src, $shift\t# @sll32B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ xvslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll16S(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (LShiftVS src shift)); ++ effect(TEMP dst, TEMP tmp); ++ format %{ "xvsll $dst, $src, $shift\t# TEMP($tmp) @sll16S" %} ++ ins_encode %{ ++ __ xvsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll16S_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (LShiftVS src shift)); ++ format %{ "xvslli.h $dst, $src, $shift\t# @sll16S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ xvslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll8I(vecY dst, vecY src, vecY shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (LShiftVI src shift)); ++ format %{ "xvsll.w $dst, $src, $shift\t# @sll8I" %} ++ ins_encode %{ ++ __ xvsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll8I_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (LShiftVI src shift)); ++ format %{ "xvslli.w $dst, $src, $shift\t# @sll8I_imm" %} ++ ins_encode %{ ++ __ xvslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll4L(vecY dst, vecY src, vecY shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (LShiftVL src shift)); ++ format %{ "xvsll.d $dst, $src, $shift\t# @sll4L" %} ++ ins_encode %{ ++ __ xvsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sll4L_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (LShiftVL src shift)); ++ format %{ "xvslli.d $dst, $src, $shift\t# @sll4L_imm" %} ++ ins_encode %{ ++ __ xvslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ----------------------- LogicalRightShift ---------------------------------- ++ ++instruct srl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (URShiftVB src shift)); ++ effect(TEMP dst, TEMP tmp); ++ format %{ "vsrl $dst, $src, $shift\t# TEMP($tmp) @srl16B" %} ++ ins_encode %{ ++ __ vsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl16B_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (URShiftVB src shift)); ++ format %{ "vsrli.b $dst, $src, $shift\t# @srl16B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ vsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (URShiftVS src shift)); ++ effect(TEMP dst, TEMP tmp); ++ format %{ "vsrl $dst, $src, $shift\t# TEMP($tmp) @srl8S" %} ++ ins_encode %{ ++ __ vsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl8S_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (URShiftVS src shift)); ++ format %{ "vsrli.h $dst, $src, $shift\t# @srl8S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ vsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl4I(vecX dst, vecX src, vecX shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (URShiftVI src shift)); ++ format %{ "vsrl.w $dst, $src, $shift\t# @srl4I" %} ++ ins_encode %{ ++ __ vsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl4I_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (URShiftVI src shift)); ++ format %{ "vsrli.w $dst, $src, $shift\t# @srl4I_imm" %} ++ ins_encode %{ ++ __ vsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl2L(vecX dst, vecX src, vecX shift) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (URShiftVL src shift)); ++ format %{ "vsrl.d $dst, $src, $shift\t# @srl2L" %} ++ ins_encode %{ ++ __ vsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl2L_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (URShiftVL src shift)); ++ format %{ "vsrli.d $dst, $src, $shift\t# @srl2L_imm" %} ++ ins_encode %{ ++ __ vsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl32B(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (URShiftVB src shift)); ++ effect(TEMP dst, TEMP tmp); ++ format %{ "xvsrl $dst, $src, $shift\t# TEMP($tmp) @srl32B" %} ++ ins_encode %{ ++ __ xvsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl32B_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (URShiftVB src shift)); ++ format %{ "xvsrli.b $dst, $src, $shift\t# @srl32B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ xvsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl16S(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (URShiftVS src shift)); ++ effect(TEMP dst, TEMP tmp); ++ format %{ "xvsrl $dst, $src, $shift\t# TEMP($tmp) @srl16S" %} ++ ins_encode %{ ++ __ xvsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl16S_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (URShiftVS src shift)); ++ format %{ "xvsrli.h $dst, $src, $shift\t# @srl16S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); ++ } else { ++ __ xvsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl8I(vecY dst, vecY src, vecY shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (URShiftVI src shift)); ++ format %{ "xvsrl.w $dst, $src, $shift\t# @srl8I" %} ++ ins_encode %{ ++ __ xvsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl8I_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (URShiftVI src shift)); ++ format %{ "xvsrli.w $dst, $src, $shift\t# @srl8I_imm" %} ++ ins_encode %{ ++ __ xvsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl4L(vecY dst, vecY src, vecY shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (URShiftVL src shift)); ++ format %{ "xvsrl.d $dst, $src, $shift\t# @srl4L" %} ++ ins_encode %{ ++ __ xvsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct srl4L_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (URShiftVL src shift)); ++ format %{ "xvsrli.d $dst, $src, $shift\t# @srl4L_imm" %} ++ ins_encode %{ ++ __ xvsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ------------------------- ArithmeticRightShift ----------------------------- ++ ++instruct sra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (RShiftVB src shift)); ++ effect(TEMP tmp); ++ format %{ "vsra $dst, $src, $shift\t# TEMP($tmp) @sra16B" %} ++ ins_encode %{ ++ __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); ++ __ vsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra16B_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (RShiftVB src shift)); ++ format %{ "vsrai.b $dst, $src, $shift\t# @sra16B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7); ++ } else { ++ __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (RShiftVS src shift)); ++ effect(TEMP tmp); ++ format %{ "vsra $dst, $src, $shift\t# TEMP($tmp) @sra8S" %} ++ ins_encode %{ ++ __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); ++ __ vsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra8S_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (RShiftVS src shift)); ++ format %{ "vsrai.h $dst, $src, $shift\t# @sra8S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15); ++ } else { ++ __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra4I(vecX dst, vecX src, vecX shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (RShiftVI src shift)); ++ format %{ "vsra.w $dst, $src, $shift\t# @sra4I" %} ++ ins_encode %{ ++ __ vsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra4I_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (RShiftVI src shift)); ++ format %{ "vsrai.w $dst, $src, $shift\t# @sra4I_imm" %} ++ ins_encode %{ ++ __ vsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra2L(vecX dst, vecX src, vecX shift) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (RShiftVL src shift)); ++ format %{ "vsra.d $dst, $src, $shift\t# @sra2L" %} ++ ins_encode %{ ++ __ vsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra2L_imm(vecX dst, vecX src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (RShiftVL src shift)); ++ format %{ "vsrai.d $dst, $src, $shift\t# @sra2L_imm" %} ++ ins_encode %{ ++ __ vsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra32B(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (RShiftVB src shift)); ++ effect(TEMP tmp); ++ format %{ "xvsra $dst, $src, $shift\t# TEMP($tmp) @sra32B" %} ++ ins_encode %{ ++ __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8); ++ __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); ++ __ xvsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra32B_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (RShiftVB src shift)); ++ format %{ "xvsrai.b $dst, $src, $shift\t# @sra32B_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 8) { ++ __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7); ++ } else { ++ __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra16S(vecY dst, vecY src, vecY shift, vecY tmp) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (RShiftVS src shift)); ++ effect(TEMP tmp); ++ format %{ "xvsra $dst, $src, $shift\t# TEMP($tmp) @sra16S" %} ++ ins_encode %{ ++ __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10); ++ __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); ++ __ xvsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra16S_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (RShiftVS src shift)); ++ format %{ "xvsrai.h $dst, $src, $shift\t# @sra16S_imm" %} ++ ins_encode %{ ++ if ($shift$$constant >= 16) { ++ __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15); ++ } else { ++ __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra8I(vecY dst, vecY src, vecY shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (RShiftVI src shift)); ++ format %{ "xvsra.w $dst, $src, $shift\t# @sra8I" %} ++ ins_encode %{ ++ __ xvsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra8I_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (RShiftVI src shift)); ++ format %{ "xvsrai.w $dst, $src, $shift\t# @sra8I_imm" %} ++ ins_encode %{ ++ __ xvsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra4L(vecY dst, vecY src, vecY shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (RShiftVL src shift)); ++ format %{ "xvsra.d $dst, $src, $shift\t# @sra4L" %} ++ ins_encode %{ ++ __ xvsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct sra4L_imm(vecY dst, vecY src, immI shift) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (RShiftVL src shift)); ++ format %{ "xvsrai.d $dst, $src, $shift\t# @sra4L_imm" %} ++ ins_encode %{ ++ __ xvsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- AND -------------------------------------- ++ ++instruct andV16(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (AndV src1 src2)); ++ format %{ "vand.v $dst, $src1, $src2\t# @andV16" %} ++ ins_encode %{ ++ __ vand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct and16B_imm(vecX dst, vecX src, immIU8 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (AndV src (ReplicateB imm))); ++ format %{ "vandi.b $dst, $src, $imm\t# @and16B_imm" %} ++ ins_encode %{ ++ __ vandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct andV32(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (AndV src1 src2)); ++ format %{ "xvand.v $dst, $src1, $src2\t# @andV32" %} ++ ins_encode %{ ++ __ xvand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct and32B_imm(vecY dst, vecY src, immIU8 imm) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (AndV src (ReplicateB imm))); ++ format %{ "xvandi.b $dst, $src, $imm\t# @and32B_imm" %} ++ ins_encode %{ ++ __ xvandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- OR --------------------------------------- ++ ++instruct orV16(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (OrV src1 src2)); ++ format %{ "vor.v $dst, $src1, $src2\t# @orV16" %} ++ ins_encode %{ ++ __ vor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct or16B_imm(vecX dst, vecX src, immIU8 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (OrV src (ReplicateB imm))); ++ format %{ "vori.b $dst, $src, $imm\t# @or16B_imm" %} ++ ins_encode %{ ++ __ vori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct orV32(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (OrV src1 src2)); ++ format %{ "xvor.v $dst, $src1, $src2\t# @orV32" %} ++ ins_encode %{ ++ __ xvor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct or32B_imm(vecY dst, vecY src, immIU8 imm) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (OrV src (ReplicateB imm))); ++ format %{ "xvori.b $dst, $src, $imm\t# @or32B_imm" %} ++ ins_encode %{ ++ __ xvori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- XOR -------------------------------------- ++ ++instruct xorV16(vecX dst, vecX src1, vecX src2) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (XorV src1 src2)); ++ format %{ "vxor.v $dst, $src1, $src2\t# @xorV16" %} ++ ins_encode %{ ++ __ vxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct xor16B_imm(vecX dst, vecX src, immIU8 imm) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (XorV src (ReplicateB imm))); ++ format %{ "vxori.b $dst, $src, $imm\t# @xor16B_imm" %} ++ ins_encode %{ ++ __ vxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct xorV32(vecY dst, vecY src1, vecY src2) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (XorV src1 src2)); ++ format %{ "xvxor.v $dst, $src1, $src2\t# @xorV32" %} ++ ins_encode %{ ++ __ xvxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct xor32B_imm(vecX dst, vecX src, immIU8 imm) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (XorV src (ReplicateB imm))); ++ format %{ "xvxori.b $dst, $src, $imm\t# @xor32B_imm" %} ++ ins_encode %{ ++ __ xvxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- NOR -------------------------------------- ++ ++instruct norV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateB m1))); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateS m1))); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateI m1))); ++ format %{ "vnor.v $dst, $src1, $src2\t# @norV16" %} ++ ins_encode %{ ++ __ vnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct nor16B_imm(vecX dst, vecX src, immIU8 imm, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length() == 16); ++ match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1))); ++ format %{ "vnori.b $dst, $src, $imm\t# @nor16B_imm" %} ++ ins_encode %{ ++ __ vnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct norV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateB m1))); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateS m1))); ++ match(Set dst (XorV (OrV src1 src2) (ReplicateI m1))); ++ format %{ "xvnor.v $dst, $src1, $src2\t# @norV32" %} ++ ins_encode %{ ++ __ xvnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct nor32B_imm(vecY dst, vecY src, immIU8 imm, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length() == 32); ++ match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1))); ++ format %{ "xvnori.b $dst, $src, $imm\t# @nor32B_imm" %} ++ ins_encode %{ ++ __ xvnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- ANDN ------------------------------------- ++ ++instruct andnV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateB m1)))); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateS m1)))); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateI m1)))); ++ format %{ "vandn.v $dst, $src1, $src2\t# @andnV16" %} ++ ins_encode %{ ++ __ vandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct andnV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateB m1)))); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateS m1)))); ++ match(Set dst (AndV src2 (XorV src1 (ReplicateI m1)))); ++ format %{ "xvandn.v $dst, $src1, $src2\t# @andnV32" %} ++ ins_encode %{ ++ __ xvandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// --------------------------------- ORN -------------------------------------- ++ ++instruct ornV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 16); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateB m1)))); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateS m1)))); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateI m1)))); ++ format %{ "vorn.v $dst, $src1, $src2\t# @ornV16" %} ++ ins_encode %{ ++ __ vorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct ornV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{ ++ predicate(n->as_Vector()->length_in_bytes() == 32); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateB m1)))); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateS m1)))); ++ match(Set dst (OrV src1 (XorV src2 (ReplicateI m1)))); ++ format %{ "xvorn.v $dst, $src1, $src2\t# @ornV32" %} ++ ins_encode %{ ++ __ xvorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++//----------PEEPHOLE RULES----------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++// ++// peepmatch ( root_instr_name [preceeding_instruction]* ); ++// ++// peepconstraint %{ ++// (instruction_number.operand_name relational_op instruction_number.operand_name ++// [, ...] ); ++// // instruction numbers are zero-based using left to right order in peepmatch ++// ++// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); ++// // provide an instruction_number.operand_name for each operand that appears ++// // in the replacement instruction's match rule ++// ++// ---------VM FLAGS--------------------------------------------------------- ++// ++// All peephole optimizations can be turned off using -XX:-OptoPeephole ++// ++// Each peephole rule is given an identifying number starting with zero and ++// increasing by one in the order seen by the parser. An individual peephole ++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# ++// on the command-line. ++// ++// ---------CURRENT LIMITATIONS---------------------------------------------- ++// ++// Only match adjacent instructions in same basic block ++// Only equality constraints ++// Only constraints between operands, not (0.dest_reg == EAX_enc) ++// Only one replacement instruction ++// ++// ---------EXAMPLE---------------------------------------------------------- ++// ++// // pertinent parts of existing instructions in architecture description ++// instruct movI(eRegI dst, eRegI src) %{ ++// match(Set dst (CopyI src)); ++// %} ++// ++// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{ ++// match(Set dst (AddI dst src)); ++// effect(KILL cr); ++// %} ++// ++// // Change (inc mov) to lea ++// peephole %{ ++// // increment preceeded by register-register move ++// peepmatch ( incI_eReg movI ); ++// // require that the destination register of the increment ++// // match the destination register of the move ++// peepconstraint ( 0.dst == 1.dst ); ++// // construct a replacement instruction that sets ++// // the destination to ( move's source register + one ) ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// Implementation no longer uses movX instructions since ++// machine-independent system no longer uses CopyX nodes. ++// ++// peephole %{ ++// peepmatch ( incI_eReg movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( decI_eReg movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( addI_eReg_imm movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( addP_eReg_imm movP ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++ ++// // Change load of spilled value to only a spill ++// instruct storeI(memory mem, eRegI src) %{ ++// match(Set mem (StoreI mem src)); ++// %} ++// ++// instruct loadI(eRegI dst, memory mem) %{ ++// match(Set dst (LoadI mem)); ++// %} ++// ++//peephole %{ ++// peepmatch ( loadI storeI ); ++// peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); ++// peepreplace ( storeI( 1.mem 1.mem 1.src ) ); ++//%} ++ ++//----------SMARTSPILL RULES--------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++ +diff --git a/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.cpp +new file mode 100644 +index 0000000000..89295343ce +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.cpp +@@ -0,0 +1,3895 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "compiler/disassembler.hpp" ++#include "gc_interface/collectedHeap.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/cardTableModRefBS.hpp" ++#include "memory/resourceArea.hpp" ++#include "memory/universe.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/interfaceSupport.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "runtime/os.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/macros.hpp" ++#if INCLUDE_ALL_GCS ++#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" ++#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" ++#include "gc_implementation/g1/heapRegion.hpp" ++#endif // INCLUDE_ALL_GCS ++ ++#ifdef COMPILER2 ++#include "opto/compile.hpp" ++#include "opto/node.hpp" ++#endif ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++// Implementation of MacroAssembler ++ ++intptr_t MacroAssembler::i[32] = {0}; ++float MacroAssembler::f[32] = {0.0}; ++ ++void MacroAssembler::print(outputStream *s) { ++ unsigned int k; ++ for(k=0; kprint_cr("i%d = 0x%.16lx", k, i[k]); ++ } ++ s->cr(); ++ ++ for(k=0; kprint_cr("f%d = %f", k, f[k]); ++ } ++ s->cr(); ++} ++ ++int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; } ++int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; } ++ ++void MacroAssembler::save_registers(MacroAssembler *masm) { ++#define __ masm-> ++ for(int k=0; k<32; k++) { ++ __ st_w (as_Register(k), A0, i_offset(k)); ++ } ++ ++ for(int k=0; k<32; k++) { ++ __ fst_s (as_FloatRegister(k), A0, f_offset(k)); ++ } ++#undef __ ++} ++ ++void MacroAssembler::restore_registers(MacroAssembler *masm) { ++#define __ masm-> ++ for(int k=0; k<32; k++) { ++ __ ld_w (as_Register(k), A0, i_offset(k)); ++ } ++ ++ for(int k=0; k<32; k++) { ++ __ fld_s (as_FloatRegister(k), A0, f_offset(k)); ++ } ++#undef __ ++} ++ ++ ++void MacroAssembler::pd_patch_instruction(address branch, address target) { ++ jint& stub_inst = *(jint*)branch; ++ jint* pc = (jint*)branch; ++ ++ if (high(stub_inst, 7) == pcaddu18i_op) { ++ // far: ++ // pcaddu18i reg, si20 ++ // jirl r0, reg, si18 ++ ++ assert(high(pc[1], 6) == jirl_op, "Not a branch label patch"); ++ jlong offs = target - branch; ++ CodeBuffer cb(branch, 2 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ if (reachable_from_branch_short(offs)) { ++ // convert far to short ++#define __ masm. ++ __ b(target); ++ __ nop(); ++#undef __ ++ } else { ++ masm.patchable_jump_far(R0, offs); ++ } ++ return; ++ } else if (high(stub_inst, 7) == pcaddi_op) { ++ // see MacroAssembler::set_last_Java_frame: ++ // pcaddi reg, si20 ++ ++ jint offs = (target - branch) >> 2; ++ guarantee(is_simm(offs, 20), "Not signed 20-bit offset"); ++ CodeBuffer cb(branch, 1 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.pcaddi(as_Register(low(stub_inst, 5)), offs); ++ return; ++ } ++ ++ stub_inst = patched_branch(target - branch, stub_inst, 0); ++} ++ ++bool MacroAssembler::reachable_from_branch_short(jlong offs) { ++ if (ForceUnreachable) { ++ return false; ++ } ++ return is_simm(offs >> 2, 26); ++} ++ ++void MacroAssembler::patchable_jump_far(Register ra, jlong offs) { ++ jint si18, si20; ++ guarantee(is_simm(offs, 38), "Not signed 38-bit offset"); ++ split_simm38(offs, si18, si20); ++ pcaddu18i(T4, si20); ++ jirl(ra, T4, si18); ++} ++ ++void MacroAssembler::patchable_jump(address target, bool force_patchable) { ++ assert(ReservedCodeCacheSize < 4*G, "branch out of range"); ++ assert(CodeCache::find_blob(target) != NULL, ++ "destination of jump not found in code cache"); ++ if (force_patchable || patchable_branches()) { ++ jlong offs = target - pc(); ++ if (reachable_from_branch_short(offs)) { // Short jump ++ b(offset26(target)); ++ nop(); ++ } else { // Far jump ++ patchable_jump_far(R0, offs); ++ } ++ } else { // Real short jump ++ b(offset26(target)); ++ } ++} ++ ++void MacroAssembler::patchable_call(address target, address call_site) { ++ jlong offs = target - (call_site ? call_site : pc()); ++ if (reachable_from_branch_short(offs - BytesPerInstWord)) { // Short call ++ nop(); ++ bl((offs - BytesPerInstWord) >> 2); ++ } else { // Far call ++ patchable_jump_far(RA, offs); ++ } ++} ++ ++// Maybe emit a call via a trampoline. If the code cache is small ++// trampolines won't be emitted. ++ ++address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) { ++ assert(JavaThread::current()->is_Compiler_thread(), "just checking"); ++ assert(entry.rspec().type() == relocInfo::runtime_call_type ++ || entry.rspec().type() == relocInfo::opt_virtual_call_type ++ || entry.rspec().type() == relocInfo::static_call_type ++ || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); ++ ++ // We need a trampoline if branches are far. ++ if (far_branches()) { ++ bool in_scratch_emit_size = false; ++#ifdef COMPILER2 ++ // We don't want to emit a trampoline if C2 is generating dummy ++ // code during its branch shortening phase. ++ CompileTask* task = ciEnv::current()->task(); ++ in_scratch_emit_size = ++ (task != NULL && is_c2_compile(task->comp_level()) && ++ Compile::current()->in_scratch_emit_size()); ++#endif ++ if (!in_scratch_emit_size) { ++ address stub = emit_trampoline_stub(offset(), entry.target()); ++ if (stub == NULL) { ++ return NULL; // CodeCache is full ++ } ++ } ++ } ++ ++ if (cbuf) cbuf->set_insts_mark(); ++ relocate(entry.rspec()); ++ if (!far_branches()) { ++ bl(entry.target()); ++ } else { ++ bl(pc()); ++ } ++ // just need to return a non-null address ++ return pc(); ++} ++ ++// Emit a trampoline stub for a call to a target which is too far away. ++// ++// code sequences: ++// ++// call-site: ++// branch-and-link to or ++// ++// Related trampoline stub for this call site in the stub section: ++// load the call target from the constant pool ++// branch (RA still points to the call site above) ++ ++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, ++ address dest) { ++ // Start the stub ++ address stub = start_a_stub(NativeInstruction::nop_instruction_size ++ + NativeCallTrampolineStub::instruction_size); ++ if (stub == NULL) { ++ return NULL; // CodeBuffer::expand failed ++ } ++ ++ // Create a trampoline stub relocation which relates this trampoline stub ++ // with the call instruction at insts_call_instruction_offset in the ++ // instructions code-section. ++ align(wordSize); ++ relocate(trampoline_stub_Relocation::spec(code()->insts()->start() ++ + insts_call_instruction_offset)); ++ const int stub_start_offset = offset(); ++ ++ // Now, create the trampoline stub's code: ++ // - load the call ++ // - call ++ pcaddi(T4, 0); ++ ld_d(T4, T4, 16); ++ jr(T4); ++ nop(); //align ++ assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, ++ "should be"); ++ emit_int64((int64_t)dest); ++ ++ const address stub_start_addr = addr_at(stub_start_offset); ++ ++ NativeInstruction* ni = nativeInstruction_at(stub_start_addr); ++ assert(ni->is_NativeCallTrampolineStub_at(), "doesn't look like a trampoline"); ++ ++ end_a_stub(); ++ return stub_start_addr; ++} ++ ++void MacroAssembler::beq_far(Register rs, Register rt, address entry) { ++ if (is_simm16((entry - pc()) >> 2)) { // Short jump ++ beq(rs, rt, offset16(entry)); ++ } else { // Far jump ++ Label not_jump; ++ bne(rs, rt, not_jump); ++ b_far(entry); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::beq_far(Register rs, Register rt, Label& L) { ++ if (L.is_bound()) { ++ beq_far(rs, rt, target(L)); ++ } else { ++ Label not_jump; ++ bne(rs, rt, not_jump); ++ b_far(L); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::bne_far(Register rs, Register rt, address entry) { ++ if (is_simm16((entry - pc()) >> 2)) { // Short jump ++ bne(rs, rt, offset16(entry)); ++ } else { // Far jump ++ Label not_jump; ++ beq(rs, rt, not_jump); ++ b_far(entry); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::bne_far(Register rs, Register rt, Label& L) { ++ if (L.is_bound()) { ++ bne_far(rs, rt, target(L)); ++ } else { ++ Label not_jump; ++ beq(rs, rt, not_jump); ++ b_far(L); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::blt_far(Register rs, Register rt, address entry, bool is_signed) { ++ if (is_simm16((entry - pc()) >> 2)) { // Short jump ++ if (is_signed) { ++ blt(rs, rt, offset16(entry)); ++ } else { ++ bltu(rs, rt, offset16(entry)); ++ } ++ } else { // Far jump ++ Label not_jump; ++ if (is_signed) { ++ bge(rs, rt, not_jump); ++ } else { ++ bgeu(rs, rt, not_jump); ++ } ++ b_far(entry); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::blt_far(Register rs, Register rt, Label& L, bool is_signed) { ++ if (L.is_bound()) { ++ blt_far(rs, rt, target(L), is_signed); ++ } else { ++ Label not_jump; ++ if (is_signed) { ++ bge(rs, rt, not_jump); ++ } else { ++ bgeu(rs, rt, not_jump); ++ } ++ b_far(L); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::bge_far(Register rs, Register rt, address entry, bool is_signed) { ++ if (is_simm16((entry - pc()) >> 2)) { // Short jump ++ if (is_signed) { ++ bge(rs, rt, offset16(entry)); ++ } else { ++ bgeu(rs, rt, offset16(entry)); ++ } ++ } else { // Far jump ++ Label not_jump; ++ if (is_signed) { ++ blt(rs, rt, not_jump); ++ } else { ++ bltu(rs, rt, not_jump); ++ } ++ b_far(entry); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::bge_far(Register rs, Register rt, Label& L, bool is_signed) { ++ if (L.is_bound()) { ++ bge_far(rs, rt, target(L), is_signed); ++ } else { ++ Label not_jump; ++ if (is_signed) { ++ blt(rs, rt, not_jump); ++ } else { ++ bltu(rs, rt, not_jump); ++ } ++ b_far(L); ++ bind(not_jump); ++ } ++} ++ ++void MacroAssembler::beq_long(Register rs, Register rt, Label& L) { ++ Label not_taken; ++ bne(rs, rt, not_taken); ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void MacroAssembler::bne_long(Register rs, Register rt, Label& L) { ++ Label not_taken; ++ beq(rs, rt, not_taken); ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void MacroAssembler::blt_long(Register rs, Register rt, Label& L, bool is_signed) { ++ Label not_taken; ++ if (is_signed) { ++ bge(rs, rt, not_taken); ++ } else { ++ bgeu(rs, rt, not_taken); ++ } ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void MacroAssembler::bge_long(Register rs, Register rt, Label& L, bool is_signed) { ++ Label not_taken; ++ if (is_signed) { ++ blt(rs, rt, not_taken); ++ } else { ++ bltu(rs, rt, not_taken); ++ } ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void MacroAssembler::bc1t_long(Label& L) { ++ Label not_taken; ++ bceqz(FCC0, not_taken); ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void MacroAssembler::bc1f_long(Label& L) { ++ Label not_taken; ++ bcnez(FCC0, not_taken); ++ jmp_far(L); ++ bind(not_taken); ++} ++ ++void MacroAssembler::b_far(Label& L) { ++ if (L.is_bound()) { ++ b_far(target(L)); ++ } else { ++ L.add_patch_at(code(), locator()); ++ if (ForceUnreachable) { ++ patchable_jump_far(R0, 0); ++ } else { ++ b(0); ++ } ++ } ++} ++ ++void MacroAssembler::b_far(address entry) { ++ jlong offs = entry - pc(); ++ if (reachable_from_branch_short(offs)) { // Short jump ++ b(offset26(entry)); ++ } else { // Far jump ++ patchable_jump_far(R0, offs); ++ } ++} ++ ++void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) { ++ ldx_d(rt, base, offset); ++} ++ ++void MacroAssembler::st_ptr(Register rt, Register base, Register offset) { ++ stx_d(rt, base, offset); ++} ++ ++void MacroAssembler::ld_long(Register rt, Register offset, Register base) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++#if 0 ++ add_d(AT, base, offset); ++ ld_long(rt, 0, AT); ++#endif ++} ++ ++void MacroAssembler::st_long(Register rt, Register offset, Register base) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++#if 0 ++ add_d(AT, base, offset); ++ st_long(rt, 0, AT); ++#endif ++} ++ ++Address MacroAssembler::as_Address(AddressLiteral adr) { ++ return Address(adr.target(), adr.rspec()); ++} ++ ++Address MacroAssembler::as_Address(ArrayAddress adr) { ++ return Address::make_array(adr); ++} ++ ++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved). ++void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) { ++ li(tmp_reg1, inc); ++ li(tmp_reg2, counter_addr); ++ amadd_w(R0, tmp_reg1, tmp_reg2); ++} ++ ++int MacroAssembler::biased_locking_enter(Register lock_reg, ++ Register obj_reg, ++ Register swap_reg, ++ Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, ++ Label* slow_case, ++ BiasedLockingCounters* counters) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ bool need_tmp_reg = false; ++ if (tmp_reg == noreg) { ++ need_tmp_reg = true; ++ tmp_reg = T4; ++ } ++ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT); ++ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); ++ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); ++ Address saved_mark_addr(lock_reg, 0); ++ ++ // Biased locking ++ // See whether the lock is currently biased toward our thread and ++ // whether the epoch is still valid ++ // Note that the runtime guarantees sufficient alignment of JavaThread ++ // pointers to allow age to be placed into low bits ++ // First check to see whether biasing is even enabled for this object ++ Label cas_label; ++ int null_check_offset = -1; ++ if (!swap_reg_contains_mark) { ++ null_check_offset = offset(); ++ ld_ptr(swap_reg, mark_addr); ++ } ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ move(tmp_reg, swap_reg); ++ andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); ++ addi_d(AT, R0, markOopDesc::biased_lock_pattern); ++ sub_d(AT, AT, tmp_reg); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ ++ bne(AT, R0, cas_label); ++ ++ ++ // The bias pattern is present in the object's header. Need to check ++ // whether the bias owner and the epoch are both still current. ++ // Note that because there is no current thread register on LA we ++ // need to store off the mark word we read out of the object to ++ // avoid reloading it and needing to recheck invariants below. This ++ // store is unfortunate but it makes the overall code shorter and ++ // simpler. ++ st_ptr(swap_reg, saved_mark_addr); ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ if (swap_reg_contains_mark) { ++ null_check_offset = offset(); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ xorr(tmp_reg, tmp_reg, swap_reg); ++ get_thread(swap_reg); ++ xorr(swap_reg, swap_reg, tmp_reg); ++ ++ li(AT, ~((int) markOopDesc::age_mask_in_place)); ++ andr(swap_reg, swap_reg, AT); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(swap_reg, R0, L); ++ push(tmp_reg); ++ push(A0); ++ atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg); ++ pop(A0); ++ pop(tmp_reg); ++ bind(L); ++ } ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ beq(swap_reg, R0, done); ++ Label try_revoke_bias; ++ Label try_rebias; ++ ++ // At this point we know that the header has the bias pattern and ++ // that we are not the bias owner in the current epoch. We need to ++ // figure out more details about the state of the header in order to ++ // know what operations can be legally performed on the object's ++ // header. ++ ++ // If the low three bits in the xor result aren't clear, that means ++ // the prototype header is no longer biased and we have to revoke ++ // the bias on this object. ++ ++ li(AT, markOopDesc::biased_lock_mask_in_place); ++ andr(AT, swap_reg, AT); ++ bne(AT, R0, try_revoke_bias); ++ // Biasing is still enabled for this data type. See whether the ++ // epoch of the current bias is still valid, meaning that the epoch ++ // bits of the mark word are equal to the epoch bits of the ++ // prototype header. (Note that the prototype header's epoch bits ++ // only change at a safepoint.) If not, attempt to rebias the object ++ // toward the current thread. Note that we must be absolutely sure ++ // that the current epoch is invalid in order to do this because ++ // otherwise the manipulations it performs on the mark word are ++ // illegal. ++ ++ li(AT, markOopDesc::epoch_mask_in_place); ++ andr(AT,swap_reg, AT); ++ bne(AT, R0, try_rebias); ++ // The epoch of the current bias is still valid but we know nothing ++ // about the owner; it might be set or it might be clear. Try to ++ // acquire the bias of the object using an atomic operation. If this ++ // fails we will go in to the runtime to revoke the object's bias. ++ // Note that we first construct the presumed unbiased header so we ++ // don't accidentally blow away another thread's valid bias. ++ ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ li(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); ++ andr(swap_reg, swap_reg, AT); ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ get_thread(tmp_reg); ++ orr(tmp_reg, tmp_reg, swap_reg); ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // If the biasing toward our thread failed, this means that ++ // another thread succeeded in biasing it toward itself and we ++ // need to revoke that bias. The revocation will occur in the ++ // interpreter runtime in the slow case. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ push(tmp_reg); ++ push(A0); ++ atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg); ++ pop(A0); ++ pop(tmp_reg); ++ bind(L); ++ } ++ if (slow_case != NULL) { ++ beq_far(AT, R0, *slow_case); ++ } ++ b(done); ++ ++ bind(try_rebias); ++ // At this point we know the epoch has expired, meaning that the ++ // current "bias owner", if any, is actually invalid. Under these ++ // circumstances _only_, we are allowed to use the current header's ++ // value as the comparison value when doing the cas to acquire the ++ // bias in the current epoch. In other words, we allow transfer of ++ // the bias from one thread to another directly in this situation. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ get_thread(swap_reg); ++ orr(tmp_reg, tmp_reg, swap_reg); ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // If the biasing toward our thread failed, then another thread ++ // succeeded in biasing it toward itself and we need to revoke that ++ // bias. The revocation will occur in the runtime in the slow case. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ push(AT); ++ push(tmp_reg); ++ atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg); ++ pop(tmp_reg); ++ pop(AT); ++ bind(L); ++ } ++ if (slow_case != NULL) { ++ beq_far(AT, R0, *slow_case); ++ } ++ ++ b(done); ++ bind(try_revoke_bias); ++ // The prototype mark in the klass doesn't have the bias bit set any ++ // more, indicating that objects of this data type are not supposed ++ // to be biased any more. We are going to try to reset the mark of ++ // this object to the prototype value and fall through to the ++ // CAS-based locking scheme. Note that if our CAS fails, it means ++ // that another thread raced us for the privilege of revoking the ++ // bias of this particular object, so it's okay to continue in the ++ // normal locking code. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // Fall through to the normal CAS-based lock, because no matter what ++ // the result of the above CAS, some thread must have succeeded in ++ // removing the bias bit from the object's header. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ push(AT); ++ push(tmp_reg); ++ atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg); ++ pop(tmp_reg); ++ pop(AT); ++ bind(L); ++ } ++ ++ bind(cas_label); ++ return null_check_offset; ++} ++ ++void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ ++ // Check for biased locking unlock case, which is a no-op ++ // Note: we do not have to check the thread ID for two reasons. ++ // First, the interpreter checks for IllegalMonitorStateException at ++ // a higher level. Second, if the bias was revoked while we held the ++ // lock, the object could not be rebiased toward another thread, so ++ // the bias bit would be clear. ++ ld_d(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); ++ addi_d(AT, R0, markOopDesc::biased_lock_pattern); ++ ++ beq(AT, temp_reg, done); ++} ++ ++// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf ++// this method will handle the stack problem, you need not to preserve the stack space for the argument now ++void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) { ++ Label L, E; ++ ++ assert(number_of_arguments <= 4, "just check"); ++ ++ andi(AT, SP, 0xf); ++ beq(AT, R0, L); ++ addi_d(SP, SP, -8); ++ call(entry_point, relocInfo::runtime_call_type); ++ addi_d(SP, SP, 8); ++ b(E); ++ ++ bind(L); ++ call(entry_point, relocInfo::runtime_call_type); ++ bind(E); ++} ++ ++ ++void MacroAssembler::jmp(address entry) { ++ jlong offs = entry - pc(); ++ if (reachable_from_branch_short(offs)) { // Short jump ++ b(offset26(entry)); ++ } else { // Far jump ++ patchable_jump_far(R0, offs); ++ } ++} ++ ++void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) { ++ switch (rtype) { ++ case relocInfo::none: ++ jmp(entry); ++ break; ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rtype); ++ patchable_jump(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::jmp_far(Label& L) { ++ if (L.is_bound()) { ++ assert(target(L) != NULL, "jmp most probably wrong"); ++ patchable_jump(target(L), true /* force patchable */); ++ } else { ++ L.add_patch_at(code(), locator()); ++ patchable_jump_far(R0, 0); ++ } ++} ++ ++void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { ++ int oop_index; ++ if (obj) { ++ oop_index = oop_recorder()->find_index(obj); ++ } else { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } ++ relocate(metadata_Relocation::spec(oop_index)); ++ patchable_li52(AT, (long)obj); ++ st_d(AT, dst); ++} ++ ++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { ++ int oop_index; ++ if (obj) { ++ oop_index = oop_recorder()->find_index(obj); ++ } else { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } ++ relocate(metadata_Relocation::spec(oop_index)); ++ patchable_li52(dst, (long)obj); ++} ++ ++void MacroAssembler::call(address entry) { ++ jlong offs = entry - pc(); ++ if (reachable_from_branch_short(offs)) { // Short call (pc-rel) ++ bl(offset26(entry)); ++ } else if (is_simm(offs, 38)) { // Far call (pc-rel) ++ patchable_jump_far(RA, offs); ++ } else { // Long call (absolute) ++ call_long(entry); ++ } ++} ++ ++void MacroAssembler::call(address entry, relocInfo::relocType rtype) { ++ switch (rtype) { ++ case relocInfo::none: ++ call(entry); ++ break; ++ case relocInfo::runtime_call_type: ++ if (!is_simm(entry - pc(), 38)) { ++ call_long(entry); ++ break; ++ } ++ // fallthrough ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rtype); ++ patchable_call(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::call(address entry, RelocationHolder& rh) { ++ switch (rh.type()) { ++ case relocInfo::none: ++ call(entry); ++ break; ++ case relocInfo::runtime_call_type: ++ if (!is_simm(entry - pc(), 38)) { ++ call_long(entry); ++ break; ++ } ++ // fallthrough ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rh); ++ patchable_call(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::call_long(address entry) { ++ jlong value = (jlong)entry; ++ lu12i_w(T4, split_low20(value >> 12)); ++ lu32i_d(T4, split_low20(value >> 32)); ++ jirl(RA, T4, split_low12(value)); ++} ++ ++address MacroAssembler::ic_call(address entry) { ++ RelocationHolder rh = virtual_call_Relocation::spec(pc()); ++ patchable_li52(IC_Klass, (long)Universe::non_oop_word()); ++ assert(entry != NULL, "call most probably wrong"); ++ InstructionMark im(this); ++ return trampoline_call(AddressLiteral(entry, rh)); ++} ++ ++void MacroAssembler::c2bool(Register r) { ++ sltu(r, R0, r); ++} ++ ++#ifndef PRODUCT ++extern "C" void findpc(intptr_t x); ++#endif ++ ++void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) { ++ if ( ShowMessageBoxOnError ) { ++ JavaThreadState saved_state = JavaThread::current()->thread_state(); ++ JavaThread::current()->set_thread_state(_thread_in_vm); ++ { ++ // In order to get locks work, we need to fake a in_VM state ++ ttyLocker ttyl; ++ ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); ++ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { ++ BytecodeCounter::print(); ++ } ++ ++ } ++ ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); ++ } ++ else ++ ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); ++} ++ ++ ++void MacroAssembler::stop(const char* msg) { ++ li(A0, (long)msg); ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ brk(17); ++} ++ ++void MacroAssembler::warn(const char* msg) { ++ pushad(); ++ li(A0, (long)msg); ++ push(S2); ++ li(AT, -(StackAlignmentInBytes)); ++ move(S2, SP); // use S2 as a sender SP holder ++ andr(SP, SP, AT); // align stack as required by ABI ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ move(SP, S2); // use S2 as a sender SP holder ++ pop(S2); ++ popad(); ++} ++ ++void MacroAssembler::increment(Register reg, int imm) { ++ if (!imm) return; ++ if (is_simm(imm, 12)) { ++ addi_d(reg, reg, imm); ++ } else { ++ li(AT, imm); ++ add_d(reg, reg, AT); ++ } ++} ++ ++void MacroAssembler::decrement(Register reg, int imm) { ++ increment(reg, -imm); ++} ++ ++void MacroAssembler::increment(Address addr, int imm) { ++ if (!imm) return; ++ assert(is_simm(imm, 12), "must be"); ++ ld_ptr(AT, addr); ++ addi_d(AT, AT, imm); ++ st_ptr(AT, addr); ++} ++ ++void MacroAssembler::decrement(Address addr, int imm) { ++ increment(addr, -imm); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions) { ++ call_VM_helper(oop_result, entry_point, 0, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ call_VM_helper(oop_result, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ if (arg_2!=A2) move(A2, arg_2); ++ assert(arg_2 != A1, "smashed argument"); ++ call_VM_helper(oop_result, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); ++ call_VM_helper(oop_result, entry_point, 3, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); ++ call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); ++} ++ ++void MacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T2; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ // debugging support ++ assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); ++ assert(number_of_arguments <= 4 , "cannot have negative number of arguments"); ++ assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); ++ assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); ++ ++ assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp"); ++ ++ // set last Java frame before call ++ Label before_call; ++ bind(before_call); ++ set_last_Java_frame(java_thread, last_java_sp, FP, before_call); ++ ++ // do the call ++ move(A0, java_thread); ++ call(entry_point, relocInfo::runtime_call_type); ++ ++ // restore the thread (cannot use the pushed argument since arguments ++ // may be overwritten by C code generated by an optimizing compiler); ++ // however can use the register value directly if it is callee saved. ++#ifndef OPT_THREAD ++ get_thread(java_thread); ++#else ++#ifdef ASSERT ++ { ++ Label L; ++ get_thread(AT); ++ beq(java_thread, AT, L); ++ stop("MacroAssembler::call_VM_base: TREG not callee saved?"); ++ bind(L); ++ } ++#endif ++#endif ++ ++ // discard thread and arguments ++ ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // reset last Java frame ++ reset_last_Java_frame(java_thread, false); ++ ++ check_and_handle_popframe(java_thread); ++ check_and_handle_earlyret(java_thread); ++ if (check_exceptions) { ++ // check for pending exceptions (java_thread is set upon return) ++ Label L; ++ ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset())); ++ beq(AT, R0, L); ++ li(AT, target(before_call)); ++ push(AT); ++ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ bind(L); ++ } ++ ++ // get oop result if there is one and reset the value in the thread ++ if (oop_result->is_valid()) { ++ ld_d(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); ++ st_d(R0, java_thread, in_bytes(JavaThread::vm_result_offset())); ++ verify_oop(oop_result); ++ } ++} ++ ++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { ++ move(V0, SP); ++ //we also reserve space for java_thread here ++ li(AT, -(StackAlignmentInBytes)); ++ andr(SP, SP, AT); ++ call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { ++ call_VM_leaf_base(entry_point, number_of_arguments); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { ++ if (arg_0 != A0) move(A0, arg_0); ++ call_VM_leaf(entry_point, 1); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { ++ if (arg_0 != A0) move(A0, arg_0); ++ if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); ++ call_VM_leaf(entry_point, 2); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { ++ if (arg_0 != A0) move(A0, arg_0); ++ if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument"); ++ call_VM_leaf(entry_point, 3); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point) { ++ MacroAssembler::call_VM_leaf_base(entry_point, 0); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1) { ++ if (arg_1 != A0) move(A0, arg_1); ++ MacroAssembler::call_VM_leaf_base(entry_point, 1); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1, ++ Register arg_2) { ++ if (arg_1 != A0) move(A0, arg_1); ++ if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); ++ MacroAssembler::call_VM_leaf_base(entry_point, 2); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3) { ++ if (arg_1 != A0) move(A0, arg_1); ++ if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); ++ if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument"); ++ MacroAssembler::call_VM_leaf_base(entry_point, 3); ++} ++ ++void MacroAssembler::check_and_handle_earlyret(Register java_thread) { ++} ++ ++void MacroAssembler::check_and_handle_popframe(Register java_thread) { ++} ++ ++void MacroAssembler::null_check(Register reg, int offset) { ++ if (needs_explicit_null_check(offset)) { ++ // provoke OS NULL exception if reg = NULL by ++ // accessing M[reg] w/o changing any (non-CC) registers ++ // NOTE: cmpl is plenty here to provoke a segv ++ ld_w(AT, reg, 0); ++ } else { ++ // nothing to do, (later) access of M[reg + offset] ++ // will provoke OS NULL exception if reg = NULL ++ } ++} ++ ++void MacroAssembler::enter() { ++ push2(RA, FP); ++ move(FP, SP); ++} ++ ++void MacroAssembler::leave() { ++ move(SP, FP); ++ pop2(RA, FP); ++} ++ ++void MacroAssembler::build_frame(int framesize) { ++ assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA"); ++ assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment"); ++ if (Assembler::is_simm(-framesize, 12)) { ++ addi_d(SP, SP, -framesize); ++ st_ptr(FP, Address(SP, framesize - 2 * wordSize)); ++ st_ptr(RA, Address(SP, framesize - 1 * wordSize)); ++ if (PreserveFramePointer) ++ addi_d(FP, SP, framesize - 2 * wordSize); ++ } else { ++ addi_d(SP, SP, -2 * wordSize); ++ st_ptr(FP, Address(SP, 0 * wordSize)); ++ st_ptr(RA, Address(SP, 1 * wordSize)); ++ if (PreserveFramePointer) ++ move(FP, SP); ++ li(SCR1, framesize - 2 * wordSize); ++ sub_d(SP, SP, SCR1); ++ } ++} ++ ++void MacroAssembler::remove_frame(int framesize) { ++ assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA"); ++ assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); ++ if (Assembler::is_simm(framesize, 12)) { ++ ld_ptr(FP, Address(SP, framesize - 2 * wordSize)); ++ ld_ptr(RA, Address(SP, framesize - 1 * wordSize)); ++ addi_d(SP, SP, framesize); ++ } else { ++ li(SCR1, framesize - 2 * wordSize); ++ add_d(SP, SP, SCR1); ++ ld_ptr(FP, Address(SP, 0 * wordSize)); ++ ld_ptr(RA, Address(SP, 1 * wordSize)); ++ addi_d(SP, SP, 2 * wordSize); ++ } ++} ++ ++void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T1; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // we must set sp to zero to clear frame ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // must clear fp, so that compiled frames are not confused; it is possible ++ // that we need it only for debugging ++ if(clear_fp) { ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); ++} ++ ++void MacroAssembler::reset_last_Java_frame(bool clear_fp) { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // we must set sp to zero to clear frame ++ st_d(R0, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // must clear fp, so that compiled frames are not confused; it is ++ // possible that we need it only for debugging ++ if (clear_fp) { ++ st_d(R0, thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ st_d(R0, thread, in_bytes(JavaThread::last_Java_pc_offset())); ++} ++ ++// Write serialization page so VM thread can do a pseudo remote membar. ++// We use the current thread pointer to calculate a thread specific ++// offset to write to within the page. This minimizes bus traffic ++// due to cache line collision. ++void MacroAssembler::serialize_memory(Register thread, Register tmp) { ++ assert_different_registers(AT, tmp); ++ juint sps = os::get_serialize_page_shift_count(); ++ juint lsb = sps + 2; ++ juint msb = sps + log2_uint(os::vm_page_size()) - 1; ++ bstrpick_w(AT, thread, msb, lsb); ++ li(tmp, os::get_memory_serialize_page()); ++ alsl_d(tmp, AT, tmp, Address::times_2 - 1); ++ st_w(R0, tmp, 0); ++} ++ ++// Calls to C land ++// ++// When entering C land, the fp, & sp of the last Java frame have to be recorded ++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp ++// has to be reset to 0. This is required to allow proper stack traversal. ++void MacroAssembler::set_last_Java_frame(Register java_thread, ++ Register last_java_sp, ++ Register last_java_fp, ++ Label& last_java_pc) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T2; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ ++ // last_java_fp is optional ++ if (last_java_fp->is_valid()) { ++ st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // last_java_pc ++ lipc(AT, last_java_pc); ++ st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + ++ JavaFrameAnchor::last_Java_pc_offset())); ++ ++ st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++} ++ ++void MacroAssembler::set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ Label& last_java_pc) { ++ set_last_Java_frame(NOREG, last_java_sp, last_java_fp, last_java_pc); ++} ++ ++////////////////////////////////////////////////////////////////////////////////// ++#if INCLUDE_ALL_GCS ++ ++void MacroAssembler::g1_write_barrier_pre(Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ ++ // If expand_call is true then we expand the call_VM_leaf macro ++ // directly to skip generating the check by ++ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. ++ ++ assert(thread == TREG, "must be"); ++ ++ Label done; ++ Label runtime; ++ ++ assert(pre_val != noreg, "check this code"); ++ ++ if (obj != noreg) { ++ assert_different_registers(obj, pre_val, tmp); ++ assert(pre_val != V0, "check this code"); ++ } ++ ++ Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + ++ PtrQueue::byte_offset_of_active())); ++ Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + ++ PtrQueue::byte_offset_of_index())); ++ Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + ++ PtrQueue::byte_offset_of_buf())); ++ ++ // Is marking active? ++ if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { ++ ld_w(AT, in_progress); ++ } else { ++ assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); ++ ld_b(AT, in_progress); ++ } ++ beqz(AT, done); ++ ++ // Do we need to load the previous value? ++ if (obj != noreg) { ++ load_heap_oop(pre_val, Address(obj, 0)); ++ } ++ ++ // Is the previous value null? ++ beqz(pre_val, done); ++ ++ // Can we store original value in the thread's buffer? ++ // Is index == 0? ++ // (The index field is typed as size_t.) ++ ++ ld_d(tmp, index); ++ beqz(tmp, runtime); ++ ++ addi_d(tmp, tmp, -1 * wordSize); ++ st_d(tmp, index); ++ ld_d(AT, buffer); ++ ++ // Record the previous value ++ stx_d(pre_val, tmp, AT); ++ b(done); ++ ++ bind(runtime); ++ // save the live input values ++ if (tosca_live) push(V0); ++ ++ if (obj != noreg && obj != V0) push(obj); ++ ++ if (pre_val != V0) push(pre_val); ++ ++ // Calling the runtime using the regular call_VM_leaf mechanism generates ++ // code (generated by InterpreterMacroAssember::call_VM_leaf_base) ++ // that checks that the *(fp+frame::interpreter_frame_last_sp) == NULL. ++ // ++ // If we care generating the pre-barrier without a frame (e.g. in the ++ // intrinsified Reference.get() routine) then fp might be pointing to ++ // the caller frame and so this check will most likely fail at runtime. ++ // ++ // Expanding the call directly bypasses the generation of the check. ++ // So when we do not have have a full interpreter frame on the stack ++ // expand_call should be passed true. ++ ++ if (expand_call) { ++ assert(pre_val != A1, "smashed arg"); ++ if (thread != A1) move(A1, thread); ++ if (pre_val != A0) move(A0, pre_val); ++ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); ++ } else { ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); ++ } ++ ++ // save the live input values ++ if (pre_val != V0) ++ pop(pre_val); ++ ++ if (obj != noreg && obj != V0) ++ pop(obj); ++ ++ if(tosca_live) pop(V0); ++ ++ bind(done); ++} ++ ++void MacroAssembler::g1_write_barrier_post(Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2) { ++ assert(tmp != AT, "must be"); ++ assert(tmp2 != AT, "must be"); ++ assert(thread == TREG, "must be"); ++ ++ Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + ++ PtrQueue::byte_offset_of_index())); ++ Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + ++ PtrQueue::byte_offset_of_buf())); ++ ++ BarrierSet* bs = Universe::heap()->barrier_set(); ++ CardTableModRefBS* ct = (CardTableModRefBS*)bs; ++ assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); ++ Label done; ++ Label runtime; ++ ++ // Does store cross heap regions? ++ xorr(AT, store_addr, new_val); ++ srli_d(AT, AT, HeapRegion::LogOfHRGrainBytes); ++ beqz(AT, done); ++ ++ ++ // crosses regions, storing NULL? ++ beq(new_val, R0, done); ++ ++ // storing region crossing non-NULL, is card already dirty? ++ const Register card_addr = tmp; ++ const Register cardtable = tmp2; ++ ++ move(card_addr, store_addr); ++ srli_d(card_addr, card_addr, CardTableModRefBS::card_shift); ++ // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT ++ // a valid address and therefore is not properly handled by the relocation code. ++ li(cardtable, (intptr_t)ct->byte_map_base); ++ add_d(card_addr, card_addr, cardtable); ++ ++ ld_b(AT, card_addr, 0); ++ addi_d(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val()); ++ beqz(AT, done); ++ ++ membar(StoreLoad); ++ ld_b(AT, card_addr, 0); ++ addi_d(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val()); ++ beqz(AT, done); ++ ++ ++ // storing a region crossing, non-NULL oop, card is clean. ++ // dirty card and log. ++ li(AT, (int)CardTableModRefBS::dirty_card_val()); ++ st_b(AT, card_addr, 0); ++ ++ ld_w(AT, queue_index); ++ beqz(AT, runtime); ++ addi_d(AT, AT, -1 * wordSize); ++ st_w(AT, queue_index); ++ ld_d(tmp2, buffer); ++ ld_d(AT, queue_index); ++ stx_d(card_addr, tmp2, AT); ++ b(done); ++ ++ bind(runtime); ++ // save the live input values ++ push(store_addr); ++ push(new_val); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG); ++ pop(new_val); ++ pop(store_addr); ++ ++ bind(done); ++} ++ ++#endif // INCLUDE_ALL_GCS ++////////////////////////////////////////////////////////////////////////////////// ++ ++ ++void MacroAssembler::store_check(Register obj) { ++ // Does a store check for the oop in register obj. The content of ++ // register obj is destroyed afterwards. ++ store_check_part_1(obj); ++ store_check_part_2(obj); ++} ++ ++void MacroAssembler::store_check(Register obj, Address dst) { ++ store_check(obj); ++} ++ ++ ++// split the store check operation so that other instructions can be scheduled inbetween ++void MacroAssembler::store_check_part_1(Register obj) { ++ BarrierSet* bs = Universe::heap()->barrier_set(); ++ assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); ++ srli_d(obj, obj, CardTableModRefBS::card_shift); ++} ++ ++void MacroAssembler::store_check_part_2(Register obj) { ++ BarrierSet* bs = Universe::heap()->barrier_set(); ++ assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); ++ CardTableModRefBS* ct = (CardTableModRefBS*)bs; ++ assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); ++ li(AT, (long)ct->byte_map_base); ++ add_d(AT, AT, obj); ++ if (UseConcMarkSweepGC) membar(StoreStore); ++ st_b(R0, AT, 0); ++} ++ ++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. ++void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1, Register t2, Label& slow_case) { ++ assert_different_registers(obj, t2); ++ assert_different_registers(obj, var_size_in_bytes); ++ ++ Register end = t2; ++ // verify_tlab(); ++ ++ ld_ptr(obj, Address(TREG, JavaThread::tlab_top_offset())); ++ if (var_size_in_bytes == noreg) { ++ lea(end, Address(obj, con_size_in_bytes)); ++ } else { ++ lea(end, Address(obj, var_size_in_bytes, Address::times_1, 0)); ++ } ++ ++ ld_ptr(SCR1, Address(TREG, JavaThread::tlab_end_offset())); ++ blt_far(SCR1, end, slow_case, false); ++ ++ // update the tlab top pointer ++ st_ptr(end, Address(TREG, JavaThread::tlab_top_offset())); ++ ++ // recover var_size_in_bytes if necessary ++ if (var_size_in_bytes == end) { ++ sub_d(var_size_in_bytes, var_size_in_bytes, obj); ++ } ++ // verify_tlab(); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1, Label& slow_case) { ++ assert_different_registers(obj, var_size_in_bytes, t1, AT); ++ if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { ++ // No allocation in the shared eden. ++ b_far(slow_case); ++ } else { ++ Register end = t1; ++ Register heap_end = SCR2; ++ Label retry; ++ bind(retry); ++ ++ li(SCR1, (address)Universe::heap()->end_addr()); ++ ld_d(heap_end, SCR1, 0); ++ ++ // Get the current top of the heap ++ li(SCR1, (address) Universe::heap()->top_addr()); ++ ll_d(obj, SCR1, 0); ++ ++ // Adjust it my the size of our new object ++ if (var_size_in_bytes == noreg) ++ addi_d(end, obj, con_size_in_bytes); ++ else ++ add_d(end, obj, var_size_in_bytes); ++ ++ // if end < obj then we wrapped around high memory ++ blt_far(end, obj, slow_case, false); ++ blt_far(heap_end, end, slow_case, false); ++ ++ // If heap top hasn't been changed by some other thread, update it. ++ sc_d(end, SCR1, 0); ++ beqz(end, retry); ++ ++ incr_allocated_bytes(TREG, var_size_in_bytes, con_size_in_bytes, t1); ++ } ++} ++ ++void MacroAssembler::incr_allocated_bytes(Register thread, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1) { ++ if (!thread->is_valid()) { ++#ifndef OPT_THREAD ++ assert(t1->is_valid(), "need temp reg"); ++ thread = t1; ++ get_thread(thread); ++#else ++ thread = TREG; ++#endif ++ } ++ ++ ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); ++ if (var_size_in_bytes->is_valid()) { ++ add_d(AT, AT, var_size_in_bytes); ++ } else { ++ addi_d(AT, AT, con_size_in_bytes); ++ } ++ st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); ++} ++ ++static const double pi_4 = 0.7853981633974483; ++ ++// must get argument(a double) in FA0/FA1 ++//void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) { ++//We need to preseve the register which maybe modified during the Call ++void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { ++ // save all modified register here ++ // FIXME, in the disassembly of tirgfunc, only used V0, V1, T4, SP, RA, so we ony save V0, V1, T4 ++ guarantee(0, "LA not implemented yet"); ++#if 0 ++ pushad(); ++ // we should preserve the stack space before we call ++ addi_d(SP, SP, -wordSize * 2); ++ switch (trig){ ++ case 's' : ++ call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type ); ++ break; ++ case 'c': ++ call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type ); ++ break; ++ case 't': ++ call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type ); ++ break; ++ default:assert (false, "bad intrinsic"); ++ break; ++ ++ } ++ ++ addi_d(SP, SP, wordSize * 2); ++ popad(); ++#endif ++} ++ ++void MacroAssembler::li(Register rd, jlong value) { ++ jlong hi12 = bitfield(value, 52, 12); ++ jlong lo52 = bitfield(value, 0, 52); ++ ++ if (hi12 != 0 && lo52 == 0) { ++ lu52i_d(rd, R0, hi12); ++ } else { ++ jlong hi20 = bitfield(value, 32, 20); ++ jlong lo20 = bitfield(value, 12, 20); ++ jlong lo12 = bitfield(value, 0, 12); ++ ++ if (lo20 == 0) { ++ ori(rd, R0, lo12); ++ } else if (bitfield(simm12(lo12), 12, 20) == lo20) { ++ addi_w(rd, R0, simm12(lo12)); ++ } else { ++ lu12i_w(rd, lo20); ++ if (lo12 != 0) ++ ori(rd, rd, lo12); ++ } ++ if (hi20 != bitfield(simm20(lo20), 20, 20)) ++ lu32i_d(rd, hi20); ++ if (hi12 != bitfield(simm20(hi20), 20, 12)) ++ lu52i_d(rd, rd, hi12); ++ } ++} ++ ++void MacroAssembler::patchable_li52(Register rd, jlong value) { ++ int count = 0; ++ ++ if (value <= max_jint && value >= min_jint) { ++ if (is_simm(value, 12)) { ++ addi_d(rd, R0, value); ++ count++; ++ } else { ++ lu12i_w(rd, split_low20(value >> 12)); ++ count++; ++ if (split_low12(value)) { ++ ori(rd, rd, split_low12(value)); ++ count++; ++ } ++ } ++ } else if (is_simm(value, 52)) { ++ lu12i_w(rd, split_low20(value >> 12)); ++ count++; ++ if (split_low12(value)) { ++ ori(rd, rd, split_low12(value)); ++ count++; ++ } ++ lu32i_d(rd, split_low20(value >> 32)); ++ count++; ++ } else { ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 3) { ++ nop(); ++ count++; ++ } ++} ++ ++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { ++ assert(UseCompressedClassPointers, "should only be used for compressed header"); ++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ ++ int klass_index = oop_recorder()->find_index(k); ++ RelocationHolder rspec = metadata_Relocation::spec(klass_index); ++ long narrowKlass = (long)Klass::encode_klass(k); ++ ++ relocate(rspec, Assembler::narrow_oop_operand); ++ patchable_li52(dst, narrowKlass); ++} ++ ++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { ++ assert(UseCompressedOops, "should only be used for compressed header"); ++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ ++ int oop_index = oop_recorder()->find_index(obj); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ ++ relocate(rspec, Assembler::narrow_oop_operand); ++ patchable_li52(dst, oop_index); ++} ++ ++void MacroAssembler::lipc(Register rd, Label& L) { ++ if (L.is_bound()) { ++ jint offs = (target(L) - pc()) >> 2; ++ guarantee(is_simm(offs, 20), "Not signed 20-bit offset"); ++ pcaddi(rd, offs); ++ } else { ++ InstructionMark im(this); ++ L.add_patch_at(code(), locator()); ++ pcaddi(rd, 0); ++ } ++} ++ ++void MacroAssembler::verify_oop(Register reg, const char* s) { ++ if (!VerifyOops) return; ++ const char * b = NULL; ++ stringStream ss; ++ ss.print("verify_oop: %s: %s", reg->name(), s); ++ b = code_string(ss.as_string()); ++ pushad(); ++ move(A1, reg); ++ patchable_li52(A0, (long)b); ++ li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); ++ ld_d(T4, AT, 0); ++ jalr(T4); ++ popad(); ++} ++ ++void MacroAssembler::verify_oop_addr(Address addr, const char* s) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++#if 0 ++ if (!VerifyOops) { ++ nop(); ++ return; ++ } ++ // Pass register number to verify_oop_subroutine ++ const char * b = NULL; ++ stringStream ss; ++ ss.print("verify_oop_addr: %s", s); ++ b = code_string(ss.as_string()); ++ ++ st_ptr(T0, SP, - wordSize); ++ st_ptr(T1, SP, - 2*wordSize); ++ st_ptr(RA, SP, - 3*wordSize); ++ st_ptr(A0, SP, - 4*wordSize); ++ st_ptr(A1, SP, - 5*wordSize); ++ st_ptr(AT, SP, - 6*wordSize); ++ st_ptr(T9, SP, - 7*wordSize); ++ ld_ptr(A1, addr); // addr may use SP, so load from it before change SP ++ addiu(SP, SP, - 7 * wordSize); ++ ++ patchable_li52(A0, (long)b); ++ // call indirectly to solve generation ordering problem ++ li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); ++ ld_ptr(T9, AT, 0); ++ jalr(T9); ++ delayed()->nop(); ++ ld_ptr(T0, SP, 6* wordSize); ++ ld_ptr(T1, SP, 5* wordSize); ++ ld_ptr(RA, SP, 4* wordSize); ++ ld_ptr(A0, SP, 3* wordSize); ++ ld_ptr(A1, SP, 2* wordSize); ++ ld_ptr(AT, SP, 1* wordSize); ++ ld_ptr(T9, SP, 0* wordSize); ++ addiu(SP, SP, 7 * wordSize); ++#endif ++} ++ ++// used registers : T0, T1 ++void MacroAssembler::verify_oop_subroutine() { ++ // RA: ra ++ // A0: char* error message ++ // A1: oop object to verify ++ Label exit, error; ++ // increment counter ++ li(T0, (long)StubRoutines::verify_oop_count_addr()); ++ ld_w(AT, T0, 0); ++ addi_d(AT, AT, 1); ++ st_w(AT, T0, 0); ++ ++ // make sure object is 'reasonable' ++ beq(A1, R0, exit); // if obj is NULL it is ok ++ ++ // Check if the oop is in the right area of memory ++ // const int oop_mask = Universe::verify_oop_mask(); ++ // const int oop_bits = Universe::verify_oop_bits(); ++ const uintptr_t oop_mask = Universe::verify_oop_mask(); ++ const uintptr_t oop_bits = Universe::verify_oop_bits(); ++ li(AT, oop_mask); ++ andr(T0, A1, AT); ++ li(AT, oop_bits); ++ bne(T0, AT, error); ++ ++ // make sure klass is 'reasonable' ++ // add for compressedoops ++ reinit_heapbase(); ++ // add for compressedoops ++ load_klass(T0, A1); ++ beq(T0, R0, error); // if klass is NULL it is broken ++ // return if everything seems ok ++ bind(exit); ++ ++ jr(RA); ++ ++ // handle errors ++ bind(error); ++ pushad(); ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ popad(); ++ jr(RA); ++} ++ ++void MacroAssembler::verify_tlab(Register t1, Register t2) { ++#ifdef ASSERT ++ assert_different_registers(t1, t2, AT); ++ if (UseTLAB && VerifyOops) { ++ Label next, ok; ++ ++ get_thread(t1); ++ ++ ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset())); ++ ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset())); ++ bgeu(t2, AT, next); ++ ++ stop("assert(top >= start)"); ++ ++ bind(next); ++ ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset())); ++ bgeu(AT, t2, ok); ++ ++ stop("assert(top <= end)"); ++ ++ bind(ok); ++ ++ } ++#endif ++} ++ ++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++ return RegisterOrConstant(tmp); ++} ++ ++void MacroAssembler::hswap(Register reg) { ++ // TODO LA opt ++ //short ++ srli_w(AT, reg, 8); ++ slli_w(reg, reg, 24); ++ srai_w(reg, reg, 16); ++ orr(reg, reg, AT); ++} ++ ++void MacroAssembler::huswap(Register reg) { ++ // TODO LA opt ++ srli_d(AT, reg, 8); ++ slli_d(reg, reg, 24); ++ srli_d(reg, reg, 16); ++ orr(reg, reg, AT); ++ bstrpick_d(reg, reg, 15, 0); ++} ++ ++// something funny to do this will only one more register AT ++// 32 bits ++void MacroAssembler::swap(Register reg) { ++ //TODO: LA opt ++ srli_w(AT, reg, 8); ++ slli_w(reg, reg, 24); ++ orr(reg, reg, AT); ++ //reg : 4 1 2 3 ++ srli_w(AT, AT, 16); ++ xorr(AT, AT, reg); ++ andi(AT, AT, 0xff); ++ //AT : 0 0 0 1^3); ++ xorr(reg, reg, AT); ++ //reg : 4 1 2 1 ++ slli_w(AT, AT, 16); ++ xorr(reg, reg, AT); ++ //reg : 4 3 2 1 ++} ++ ++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, ++ Register resflag, bool retold, bool barrier) { ++ assert(oldval != resflag, "oldval != resflag"); ++ assert(newval != resflag, "newval != resflag"); ++ Label again, succ, fail; ++ ++ bind(again); ++ ll_d(resflag, addr); ++ bne(resflag, oldval, fail); ++ move(resflag, newval); ++ sc_d(resflag, addr); ++ beqz(resflag, again); ++ b(succ); ++ ++ bind(fail); ++ if (barrier) ++ dbar(0x700); ++ if (retold && oldval != R0) ++ move(oldval, resflag); ++ move(resflag, R0); ++ bind(succ); ++} ++ ++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, ++ Register tmp, bool retold, bool barrier, Label& succ, Label* fail) { ++ assert(oldval != tmp, "oldval != tmp"); ++ assert(newval != tmp, "newval != tmp"); ++ Label again, neq; ++ ++ bind(again); ++ ll_d(tmp, addr); ++ bne(tmp, oldval, neq); ++ move(tmp, newval); ++ sc_d(tmp, addr); ++ beqz(tmp, again); ++ b(succ); ++ ++ bind(neq); ++ if (barrier) ++ dbar(0x700); ++ if (retold && oldval != R0) ++ move(oldval, tmp); ++ if (fail) ++ b(*fail); ++} ++ ++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, ++ Register resflag, bool sign, bool retold, bool barrier) { ++ assert(oldval != resflag, "oldval != resflag"); ++ assert(newval != resflag, "newval != resflag"); ++ Label again, succ, fail; ++ ++ bind(again); ++ ll_w(resflag, addr); ++ if (!sign) ++ lu32i_d(resflag, 0); ++ bne(resflag, oldval, fail); ++ move(resflag, newval); ++ sc_w(resflag, addr); ++ beqz(resflag, again); ++ b(succ); ++ ++ bind(fail); ++ if (barrier) ++ dbar(0x700); ++ if (retold && oldval != R0) ++ move(oldval, resflag); ++ move(resflag, R0); ++ bind(succ); ++} ++ ++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, ++ bool sign, bool retold, bool barrier, Label& succ, Label* fail) { ++ assert(oldval != tmp, "oldval != tmp"); ++ assert(newval != tmp, "newval != tmp"); ++ Label again, neq; ++ ++ bind(again); ++ ll_w(tmp, addr); ++ if (!sign) ++ lu32i_d(tmp, 0); ++ bne(tmp, oldval, neq); ++ move(tmp, newval); ++ sc_w(tmp, addr); ++ beqz(tmp, again); ++ b(succ); ++ ++ bind(neq); ++ if (barrier) ++ dbar(0x700); ++ if (retold && oldval != R0) ++ move(oldval, tmp); ++ if (fail) ++ b(*fail); ++} ++ ++// be sure the three register is different ++void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++// be sure the three register is different ++void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++// Fast_Lock and Fast_Unlock used by C2 ++ ++// Because the transitions from emitted code to the runtime ++// monitorenter/exit helper stubs are so slow it's critical that ++// we inline both the stack-locking fast-path and the inflated fast path. ++// ++// See also: cmpFastLock and cmpFastUnlock. ++// ++// What follows is a specialized inline transliteration of the code ++// in slow_enter() and slow_exit(). If we're concerned about I$ bloat ++// another option would be to emit TrySlowEnter and TrySlowExit methods ++// at startup-time. These methods would accept arguments as ++// (Obj, Self, box, Scratch) and return success-failure ++// indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply ++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. ++// In practice, however, the # of lock sites is bounded and is usually small. ++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer ++// if the processor uses simple bimodal branch predictors keyed by EIP ++// Since the helper routines would be called from multiple synchronization ++// sites. ++// ++// An even better approach would be write "MonitorEnter()" and "MonitorExit()" ++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites ++// to those specialized methods. That'd give us a mostly platform-independent ++// implementation that the JITs could optimize and inline at their pleasure. ++// Done correctly, the only time we'd need to cross to native could would be ++// to park() or unpark() threads. We'd also need a few more unsafe operators ++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and ++// (b) explicit barriers or fence operations. ++// ++// TODO: ++// ++// * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). ++// This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. ++// Given TLAB allocation, Self is usually manifested in a register, so passing it into ++// the lock operators would typically be faster than reifying Self. ++// ++// * Ideally I'd define the primitives as: ++// fast_lock (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED. ++// fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED ++// Unfortunately ADLC bugs prevent us from expressing the ideal form. ++// Instead, we're stuck with a rather awkward and brittle register assignments below. ++// Furthermore the register assignments are overconstrained, possibly resulting in ++// sub-optimal code near the synchronization site. ++// ++// * Eliminate the sp-proximity tests and just use "== Self" tests instead. ++// Alternately, use a better sp-proximity test. ++// ++// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. ++// Either one is sufficient to uniquely identify a thread. ++// TODO: eliminate use of sp in _owner and use get_thread(tr) instead. ++// ++// * Intrinsify notify() and notifyAll() for the common cases where the ++// object is locked by the calling thread but the waitlist is empty. ++// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). ++// ++// * use jccb and jmpb instead of jcc and jmp to improve code density. ++// But beware of excessive branch density on AMD Opterons. ++// ++// * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success ++// or failure of the fast-path. If the fast-path fails then we pass ++// control to the slow-path, typically in C. In Fast_Lock and ++// Fast_Unlock we often branch to DONE_LABEL, just to find that C2 ++// will emit a conditional branch immediately after the node. ++// So we have branches to branches and lots of ICC.ZF games. ++// Instead, it might be better to have C2 pass a "FailureLabel" ++// into Fast_Lock and Fast_Unlock. In the case of success, control ++// will drop through the node. ICC.ZF is undefined at exit. ++// In the case of failure, the node will branch directly to the ++// FailureLabel ++ ++// obj: object to lock ++// box: on-stack box address (displaced header location) ++// tmp: tmp -- KILLED ++// scr: tmp -- KILLED ++void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg, ++ Register tmpReg, Register scrReg) { ++ Label IsInflated, DONE, DONE_SET; ++ ++ // Ensure the register assignents are disjoint ++ guarantee(objReg != boxReg, ""); ++ guarantee(objReg != tmpReg, ""); ++ guarantee(objReg != scrReg, ""); ++ guarantee(boxReg != tmpReg, ""); ++ guarantee(boxReg != scrReg, ""); ++ ++ block_comment("FastLock"); ++ ++ if (PrintBiasedLockingStatistics) { ++ atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg); ++ } ++ ++ if (EmitSync & 1) { ++ move(AT, R0); ++ return; ++ } else ++ if (EmitSync & 2) { ++ Label DONE_LABEL ; ++ if (UseBiasedLocking) { ++ // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. ++ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL); ++ } ++ ++ ld_d(tmpReg, Address(objReg, 0)) ; // fetch markword ++ ori(tmpReg, tmpReg, 0x1); ++ st_d(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS ++ ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_LABEL); // Updates tmpReg ++ ++ // Recursive locking ++ sub_d(tmpReg, tmpReg, SP); ++ li(AT, (7 - os::vm_page_size() )); ++ andr(tmpReg, tmpReg, AT); ++ st_d(tmpReg, Address(boxReg, 0)); ++ bind(DONE_LABEL) ; ++ } else { ++ // Possible cases that we'll encounter in fast_lock ++ // ------------------------------------------------ ++ // * Inflated ++ // -- unlocked ++ // -- Locked ++ // = by self ++ // = by other ++ // * biased ++ // -- by Self ++ // -- by other ++ // * neutral ++ // * stack-locked ++ // -- by self ++ // = sp-proximity test hits ++ // = sp-proximity test generates false-negative ++ // -- by other ++ // ++ ++ // TODO: optimize away redundant LDs of obj->mark and improve the markword triage ++ // order to reduce the number of conditional branches in the most common cases. ++ // Beware -- there's a subtle invariant that fetch of the markword ++ // at [FETCH], below, will never observe a biased encoding (*101b). ++ // If this invariant is not held we risk exclusion (safety) failure. ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ Label succ, fail; ++ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL); ++ b(fail); ++ bind(succ); ++ li(resReg, 1); ++ b(DONE); ++ bind(fail); ++ } ++ ++ ld_d(tmpReg, Address(objReg, 0)); //Fetch the markword of the object. ++ andi(AT, tmpReg, markOopDesc::monitor_value); ++ bnez(AT, IsInflated); // inflated vs stack-locked|neutral|bias ++ ++ // Attempt stack-locking ... ++ ori(tmpReg, tmpReg, markOopDesc::unlocked_value); ++ st_d(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS ++ ++ if (PrintBiasedLockingStatistics) { ++ Label SUCC, FAIL; ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg ++ bind(SUCC); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); ++ li(resReg, 1); ++ b(DONE); ++ bind(FAIL); ++ } else { ++ // If cmpxchg is succ, then scrReg = 1 ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg ++ } ++ ++ // Recursive locking ++ // The object is stack-locked: markword contains stack pointer to BasicLock. ++ // Locked by current thread if difference with current SP is less than one page. ++ sub_d(tmpReg, tmpReg, SP); ++ li(AT, 7 - os::vm_page_size()); ++ andr(tmpReg, tmpReg, AT); ++ st_d(tmpReg, Address(boxReg, 0)); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++ ++ bnez(tmpReg, L); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); ++ bind(L); ++ } ++ ++ sltui(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0 ++ b(DONE); ++ ++ bind(IsInflated); ++ // The object's monitor m is unlocked iff m->owner == NULL, ++ // otherwise m->owner may contain a thread or a stack address. ++ ++ // TODO: someday avoid the ST-before-CAS penalty by ++ // relocating (deferring) the following ST. ++ // We should also think about trying a CAS without having ++ // fetched _owner. If the CAS is successful we may ++ // avoid an RTO->RTS upgrade on the $line. ++ // Without cast to int32_t a movptr will destroy r10 which is typically obj ++ li(AT, (int32_t)intptr_t(markOopDesc::unused_mark())); ++ st_d(AT, Address(boxReg, 0)); ++ ++ ld_d(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ // if (m->owner != 0) => AT = 0, goto slow path. ++ move(scrReg, R0); ++ bnez(AT, DONE_SET); ++ ++#ifndef OPT_THREAD ++ get_thread(TREG) ; ++#endif ++ // It's inflated and appears unlocked ++ addi_d(tmpReg, tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2); ++ cmpxchg(Address(tmpReg, 0), R0, TREG, scrReg, false, false); ++ // Intentional fall-through into DONE ... ++ ++ bind(DONE_SET); ++ move(resReg, scrReg); ++ ++ // DONE is a hot target - we'd really like to place it at the ++ // start of cache line by padding with NOPs. ++ // See the AMD and Intel software optimization manuals for the ++ // most efficient "long" NOP encodings. ++ // Unfortunately none of our alignment mechanisms suffice. ++ bind(DONE); ++ // At DONE the resReg is set as follows ... ++ // Fast_Unlock uses the same protocol. ++ // resReg == 1 -> Success ++ // resREg == 0 -> Failure - force control through the slow-path ++ ++ // Avoid branch-to-branch on AMD processors ++ // This appears to be superstition. ++ if (EmitSync & 32) nop() ; ++ ++ } ++} ++ ++// obj: object to unlock ++// box: box address (displaced header location), killed. ++// tmp: killed tmp; cannot be obj nor box. ++// ++// Some commentary on balanced locking: ++// ++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. ++// Methods that don't have provably balanced locking are forced to run in the ++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock. ++// The interpreter provides two properties: ++// I1: At return-time the interpreter automatically and quietly unlocks any ++// objects acquired the current activation (frame). Recall that the ++// interpreter maintains an on-stack list of locks currently held by ++// a frame. ++// I2: If a method attempts to unlock an object that is not held by the ++// the frame the interpreter throws IMSX. ++// ++// Lets say A(), which has provably balanced locking, acquires O and then calls B(). ++// B() doesn't have provably balanced locking so it runs in the interpreter. ++// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O ++// is still locked by A(). ++// ++// The only other source of unbalanced locking would be JNI. The "Java Native Interface: ++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter ++// should not be unlocked by "normal" java-level locking and vice-versa. The specification ++// doesn't specify what will occur if a program engages in such mixed-mode locking, however. ++ ++void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg, ++ Register tmpReg, Register scrReg) { ++ Label DONE, DONE_SET, Stacked, Inflated; ++ ++ guarantee(objReg != boxReg, ""); ++ guarantee(objReg != tmpReg, ""); ++ guarantee(objReg != scrReg, ""); ++ guarantee(boxReg != tmpReg, ""); ++ guarantee(boxReg != scrReg, ""); ++ ++ block_comment("FastUnlock"); ++ ++ if (EmitSync & 4) { ++ // Disable - inhibit all inlining. Force control through the slow-path ++ move(AT, R0); ++ return; ++ } else ++ if (EmitSync & 8) { ++ Label DONE_LABEL ; ++ if (UseBiasedLocking) { ++ biased_locking_exit(objReg, tmpReg, DONE_LABEL); ++ } ++ // classic stack-locking code ... ++ ld_d(tmpReg, Address(boxReg, 0)) ; ++ assert_different_registers(AT, tmpReg); ++ li(AT, 0x1); ++ beq(tmpReg, R0, DONE_LABEL) ; ++ ++ cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); ++ bind(DONE_LABEL); ++ } else { ++ Label CheckSucc; ++ ++ // Critically, the biased locking test must have precedence over ++ // and appear before the (box->dhw == 0) recursive stack-lock test. ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ Label succ, fail; ++ biased_locking_exit(objReg, tmpReg, succ); ++ b(fail); ++ bind(succ); ++ li(resReg, 1); ++ b(DONE); ++ bind(fail); ++ } ++ ++ ld_d(tmpReg, Address(boxReg, 0)); // Examine the displaced header ++ sltui(AT, tmpReg, 1); ++ beqz(tmpReg, DONE_SET); // 0 indicates recursive stack-lock ++ ++ ld_d(tmpReg, Address(objReg, 0)); // Examine the object's markword ++ andi(AT, tmpReg, markOopDesc::monitor_value); ++ beqz(AT, Stacked); // Inflated? ++ ++ bind(Inflated); ++ // It's inflated. ++ // Despite our balanced locking property we still check that m->_owner == Self ++ // as java routines or native JNI code called by this thread might ++ // have released the lock. ++ // Refer to the comments in synchronizer.cpp for how we might encode extra ++ // state in _succ so we can avoid fetching EntryList|cxq. ++ // ++ // I'd like to add more cases in fast_lock() and fast_unlock() -- ++ // such as recursive enter and exit -- but we have to be wary of ++ // I$ bloat, T$ effects and BP$ effects. ++ // ++ // If there's no contention try a 1-0 exit. That is, exit without ++ // a costly MEMBAR or CAS. See synchronizer.cpp for details on how ++ // we detect and recover from the race that the 1-0 exit admits. ++ // ++ // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier ++ // before it STs null into _owner, releasing the lock. Updates ++ // to data protected by the critical section must be visible before ++ // we drop the lock (and thus before any other thread could acquire ++ // the lock and observe the fields protected by the lock). ++#ifndef OPT_THREAD ++ get_thread(TREG); ++#endif ++ ++ // It's inflated ++ ld_d(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ xorr(scrReg, scrReg, TREG); ++ ++ ld_d(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2)); ++ orr(scrReg, scrReg, AT); ++ ++ move(AT, R0); ++ bnez(scrReg, DONE_SET); ++ ++ ld_d(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2)); ++ ld_d(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2)); ++ orr(scrReg, scrReg, AT); ++ ++ move(AT, R0); ++ bnez(scrReg, DONE_SET); ++ ++ membar(Assembler::Membar_mask_bits(LoadStore|StoreStore)); // release-store ++ st_d(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ li(resReg, 1); ++ b(DONE); ++ ++ bind(Stacked); ++ ld_d(tmpReg, Address(boxReg, 0)); ++ cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); ++ ++ bind(DONE_SET); ++ move(resReg, AT); ++ ++ if (EmitSync & 65536) { ++ bind (CheckSucc); ++ } ++ ++ bind(DONE); ++ ++ // Avoid branch to branch on AMD processors ++ if (EmitSync & 32768) { nop() ; } ++ } ++} ++ ++void MacroAssembler::align(int modulus) { ++ while (offset() % modulus != 0) nop(); ++} ++ ++ ++void MacroAssembler::verify_FPU(int stack_depth, const char* s) { ++ //Unimplemented(); ++} ++ ++Register caller_saved_registers[] = {T7, T5, T6, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T4, S8, RA, FP}; ++Register caller_saved_registers_except_v0[] = {T7, T5, T6, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T4, S8, RA, FP}; ++ ++ //TODO: LA ++//In LA, F0~23 are all caller-saved registers ++FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13}; ++ ++// We preserve all caller-saved register ++void MacroAssembler::pushad(){ ++ int i; ++ // Fixed-point registers ++ int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); ++ addi_d(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ st_d(caller_saved_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ ++ // Floating-point registers ++ len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ addi_d(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ fst_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++}; ++ ++void MacroAssembler::popad(){ ++ int i; ++ // Floating-point registers ++ int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ for (i = 0; i < len; i++) ++ { ++ fld_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ addi_d(SP, SP, len * wordSize); ++ ++ // Fixed-point registers ++ len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); ++ for (i = 0; i < len; i++) ++ { ++ ld_d(caller_saved_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ addi_d(SP, SP, len * wordSize); ++}; ++ ++// We preserve all caller-saved register except V0 ++void MacroAssembler::pushad_except_v0() { ++ int i; ++ // Fixed-point registers ++ int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); ++ addi_d(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ st_d(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); ++ } ++ ++ // Floating-point registers ++ len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ addi_d(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ fst_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++} ++ ++void MacroAssembler::popad_except_v0() { ++ int i; ++ // Floating-point registers ++ int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ for (i = 0; i < len; i++) { ++ fld_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ addi_d(SP, SP, len * wordSize); ++ ++ // Fixed-point registers ++ len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); ++ for (i = 0; i < len; i++) { ++ ld_d(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); ++ } ++ addi_d(SP, SP, len * wordSize); ++} ++ ++void MacroAssembler::push2(Register reg1, Register reg2) { ++ addi_d(SP, SP, -16); ++ st_d(reg1, SP, 8); ++ st_d(reg2, SP, 0); ++} ++ ++void MacroAssembler::pop2(Register reg1, Register reg2) { ++ ld_d(reg1, SP, 8); ++ ld_d(reg2, SP, 0); ++ addi_d(SP, SP, 16); ++} ++ ++// for UseCompressedOops Option ++void MacroAssembler::load_klass(Register dst, Register src) { ++ if(UseCompressedClassPointers){ ++ ld_wu(dst, Address(src, oopDesc::klass_offset_in_bytes())); ++ decode_klass_not_null(dst); ++ } else { ++ ld_d(dst, src, oopDesc::klass_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::store_klass(Register dst, Register src) { ++ if(UseCompressedClassPointers){ ++ encode_klass_not_null(src); ++ st_w(src, dst, oopDesc::klass_offset_in_bytes()); ++ } else { ++ st_d(src, dst, oopDesc::klass_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::load_prototype_header(Register dst, Register src) { ++ load_klass(dst, src); ++ ld_d(dst, Address(dst, Klass::prototype_header_offset())); ++} ++ ++void MacroAssembler::store_klass_gap(Register dst, Register src) { ++ if (UseCompressedClassPointers) { ++ st_w(src, dst, oopDesc::klass_gap_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::load_heap_oop(Register dst, Address src) { ++ if(UseCompressedOops){ ++ ld_wu(dst, src); ++ decode_heap_oop(dst); ++ } else { ++ ld_d(dst, src); ++ } ++} ++ ++void MacroAssembler::store_heap_oop(Address dst, Register src){ ++ if(UseCompressedOops){ ++ assert(!dst.uses(src), "not enough registers"); ++ encode_heap_oop(src); ++ st_w(src, dst); ++ } else { ++ st_d(src, dst); ++ } ++} ++ ++void MacroAssembler::store_heap_oop_null(Address dst){ ++ if(UseCompressedOops){ ++ st_w(R0, dst); ++ } else { ++ st_d(R0, dst); ++ } ++} ++ ++#ifdef ASSERT ++void MacroAssembler::verify_heapbase(const char* msg) { ++ assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++} ++#endif ++ ++// Algorithm must match oop.inline.hpp encode_heap_oop. ++void MacroAssembler::encode_heap_oop(Register r) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); ++#endif ++ verify_oop(r, "broken oop in encode_heap_oop"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++ return; ++ } ++ ++ sub_d(AT, r, S5_heapbase); ++ maskeqz(r, AT, r); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_heap_oop(Register dst, Register src) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); ++#endif ++ verify_oop(src, "broken oop in encode_heap_oop"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ srli_d(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++ return; ++ } ++ ++ sub_d(AT, src, S5_heapbase); ++ maskeqz(dst, AT, src); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_heap_oop_not_null(Register r) { ++ assert (UseCompressedOops, "should be compressed"); ++#ifdef ASSERT ++ if (CheckCompressedOops) { ++ Label ok; ++ bne(r, R0, ok); ++ stop("null oop passed to encode_heap_oop_not_null"); ++ bind(ok); ++ } ++#endif ++ verify_oop(r, "broken oop in encode_heap_oop_not_null"); ++ if (Universe::narrow_oop_base() != NULL) { ++ sub_d(r, r, S5_heapbase); ++ } ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { ++ assert (UseCompressedOops, "should be compressed"); ++#ifdef ASSERT ++ if (CheckCompressedOops) { ++ Label ok; ++ bne(src, R0, ok); ++ stop("null oop passed to encode_heap_oop_not_null2"); ++ bind(ok); ++ } ++#endif ++ verify_oop(src, "broken oop in encode_heap_oop_not_null2"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ srli_d(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++ return; ++ } ++ sub_d(dst, src, S5_heapbase); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::decode_heap_oop(Register r) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); ++#endif ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shl(r, LogMinObjAlignmentInBytes); ++ } ++ return; ++ } ++ ++ move(AT, r); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (LogMinObjAlignmentInBytes <= 4) { ++ alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1); ++ } else { ++ shl(r, LogMinObjAlignmentInBytes); ++ add_d(r, r, S5_heapbase); ++ } ++ } else { ++ add_d(r, r, S5_heapbase); ++ } ++ maskeqz(r, r, AT); ++ verify_oop(r, "broken oop in decode_heap_oop"); ++} ++ ++void MacroAssembler::decode_heap_oop(Register dst, Register src) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); ++#endif ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ slli_d(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++ return; ++ } ++ ++ Register cond; ++ if (dst == src) { ++ cond = AT; ++ move(cond, src); ++ } else { ++ cond = src; ++ } ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (LogMinObjAlignmentInBytes <= 4) { ++ alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1); ++ } else { ++ slli_d(dst, src, LogMinObjAlignmentInBytes); ++ add_d(dst, dst, S5_heapbase); ++ } ++ } else { ++ add_d(dst, src, S5_heapbase); ++ } ++ maskeqz(dst, dst, cond); ++ verify_oop(dst, "broken oop in decode_heap_oop"); ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register r) { ++ // Note: it will change flags ++ assert(UseCompressedOops, "should only be used for compressed headers"); ++ assert(Universe::heap() != NULL, "java heap should be initialized"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (Universe::narrow_oop_base() != NULL) { ++ if (LogMinObjAlignmentInBytes <= 4) { ++ alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1); ++ } else { ++ shl(r, LogMinObjAlignmentInBytes); ++ add_d(r, r, S5_heapbase); ++ } ++ } else { ++ shl(r, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ assert(Universe::narrow_oop_base() == NULL, "sanity"); ++ } ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { ++ assert(UseCompressedOops, "should only be used for compressed headers"); ++ assert(Universe::heap() != NULL, "java heap should be initialized"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (Universe::narrow_oop_base() != NULL) { ++ if (LogMinObjAlignmentInBytes <= 4) { ++ alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1); ++ } else { ++ slli_d(dst, src, LogMinObjAlignmentInBytes); ++ add_d(dst, dst, S5_heapbase); ++ } ++ } else { ++ slli_d(dst, src, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ assert (Universe::narrow_oop_base() == NULL, "sanity"); ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++} ++ ++void MacroAssembler::encode_klass_not_null(Register r) { ++ if (Universe::narrow_klass_base() != NULL) { ++ assert(r != AT, "Encoding a klass in AT"); ++ li(AT, (int64_t)Universe::narrow_klass_base()); ++ sub_d(r, r, AT); ++ } ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shr(r, LogKlassAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_klass_not_null(Register dst, Register src) { ++ if (dst == src) { ++ encode_klass_not_null(src); ++ } else { ++ if (Universe::narrow_klass_base() != NULL) { ++ li(dst, (int64_t)Universe::narrow_klass_base()); ++ sub_d(dst, src, dst); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shr(dst, LogKlassAlignmentInBytes); ++ } ++ } else { ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ srli_d(dst, src, LogKlassAlignmentInBytes); ++ } else { ++ move(dst, src); ++ } ++ } ++ } ++} ++ ++// Function instr_size_for_decode_klass_not_null() counts the instructions ++// generated by decode_klass_not_null(register r) and reinit_heapbase(), ++// when (Universe::heap() != NULL). Hence, if the instructions they ++// generate change, then this method needs to be updated. ++int MacroAssembler::instr_size_for_decode_klass_not_null() { ++ assert (UseCompressedClassPointers, "only for compressed klass ptrs"); ++ if (Universe::narrow_klass_base() != NULL) { ++ // mov64 + addq + shlq? + mov64 (for reinit_heapbase()). ++ return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10); ++ } else { ++ // longest load decode klass function, mov64, leaq ++ return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1); ++ } ++} ++ ++void MacroAssembler::decode_klass_not_null(Register r) { ++ assert(UseCompressedClassPointers, "should only be used for compressed headers"); ++ assert(r != AT, "Decoding a klass in AT"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shl(r, LogKlassAlignmentInBytes); ++ } ++ if (Universe::narrow_klass_base() != NULL) { ++ li(AT, (int64_t)Universe::narrow_klass_base()); ++ add_d(r, r, AT); ++ } ++} ++ ++void MacroAssembler::decode_klass_not_null(Register dst, Register src) { ++ assert(UseCompressedClassPointers, "should only be used for compressed headers"); ++ if (dst == src) { ++ decode_klass_not_null(dst); ++ } else { ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ li(dst, (int64_t)Universe::narrow_klass_base()); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); ++ alsl_d(dst, src, dst, Address::times_8 - 1); ++ } else { ++ add_d(dst, src, dst); ++ } ++ } ++} ++ ++void MacroAssembler::reinit_heapbase() { ++ if (UseCompressedOops || UseCompressedClassPointers) { ++ if (Universe::heap() != NULL) { ++ if (Universe::narrow_oop_base() == NULL) { ++ move(S5_heapbase, R0); ++ } else { ++ li(S5_heapbase, (int64_t)Universe::narrow_ptrs_base()); ++ } ++ } else { ++ li(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr()); ++ ld_d(S5_heapbase, S5_heapbase, 0); ++ } ++ } ++} ++ ++void MacroAssembler::check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label& L_success) { ++//implement ind gen_subtype_check ++ Label L_failure; ++ check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); ++ check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); ++ bind(L_failure); ++} ++ ++SkipIfEqual::SkipIfEqual( ++ MacroAssembler* masm, const bool* flag_addr, bool value) { ++ _masm = masm; ++ _masm->li(AT, (address)flag_addr); ++ _masm->ld_b(AT, AT, 0); ++ _masm->addi_d(AT, AT, -value); ++ _masm->beq(AT, R0, _label); ++} ++ ++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ RegisterOrConstant super_check_offset) { ++ assert_different_registers(sub_klass, super_klass, temp_reg); ++ bool must_load_sco = (super_check_offset.constant_or_zero() == -1); ++ if (super_check_offset.is_register()) { ++ assert_different_registers(sub_klass, super_klass, ++ super_check_offset.as_register()); ++ } else if (must_load_sco) { ++ assert(temp_reg != noreg, "supply either a temp or a register offset"); ++ } ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ // If the pointers are equal, we are done (e.g., String[] elements). ++ // This self-check enables sharing of secondary supertype arrays among ++ // non-primary types such as array-of-interface. Otherwise, each such ++ // type would need its own customized SSA. ++ // We move this check to the front of the fast path because many ++ // type checks are in fact trivially successful in this manner, ++ // so we get a nicely predicted branch right at the start of the check. ++ beq(sub_klass, super_klass, *L_success); ++ // Check the supertype display: ++ if (must_load_sco) { ++ ld_wu(temp_reg, super_klass, sco_offset); ++ super_check_offset = RegisterOrConstant(temp_reg); ++ } ++ add_d(AT, sub_klass, super_check_offset.register_or_noreg()); ++ ld_d(AT, AT, super_check_offset.constant_or_zero()); ++ ++ // This check has worked decisively for primary supers. ++ // Secondary supers are sought in the super_cache ('super_cache_addr'). ++ // (Secondary supers are interfaces and very deeply nested subtypes.) ++ // This works in the same check above because of a tricky aliasing ++ // between the super_cache and the primary super display elements. ++ // (The 'super_check_addr' can address either, as the case requires.) ++ // Note that the cache is updated below if it does not help us find ++ // what we need immediately. ++ // So if it was a primary super, we can just fail immediately. ++ // Otherwise, it's the slow path for us (no success at this point). ++ ++ if (super_check_offset.is_register()) { ++ beq(super_klass, AT, *L_success); ++ addi_d(AT, super_check_offset.as_register(), -sc_offset); ++ if (L_failure == &L_fallthrough) { ++ beq(AT, R0, *L_slow_path); ++ } else { ++ bne_far(AT, R0, *L_failure); ++ b(*L_slow_path); ++ } ++ } else if (super_check_offset.as_constant() == sc_offset) { ++ // Need a slow path; fast failure is impossible. ++ if (L_slow_path == &L_fallthrough) { ++ beq(super_klass, AT, *L_success); ++ } else { ++ bne(super_klass, AT, *L_slow_path); ++ b(*L_success); ++ } ++ } else { ++ // No slow path; it's a fast decision. ++ if (L_failure == &L_fallthrough) { ++ beq(super_klass, AT, *L_success); ++ } else { ++ bne_far(super_klass, AT, *L_failure); ++ b(*L_success); ++ } ++ } ++ ++ bind(L_fallthrough); ++} ++ ++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Register temp2_reg, ++ Label* L_success, ++ Label* L_failure, ++ bool set_cond_codes) { ++ if (temp2_reg == noreg) ++ temp2_reg = TSR; ++ assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); ++#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ ++ // a couple of useful fields in sub_klass: ++ int ss_offset = in_bytes(Klass::secondary_supers_offset()); ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ Address secondary_supers_addr(sub_klass, ss_offset); ++ Address super_cache_addr( sub_klass, sc_offset); ++ ++ // Do a linear scan of the secondary super-klass chain. ++ // This code is rarely used, so simplicity is a virtue here. ++ // The repne_scan instruction uses fixed registers, which we must spill. ++ // Don't worry too much about pre-existing connections with the input regs. ++ ++#ifndef PRODUCT ++ int* pst_counter = &SharedRuntime::_partial_subtype_ctr; ++ ExternalAddress pst_counter_addr((address) pst_counter); ++#endif //PRODUCT ++ ++ // We will consult the secondary-super array. ++ ld_d(temp_reg, secondary_supers_addr); ++ // Load the array length. ++ ld_w(temp2_reg, Address(temp_reg, Array::length_offset_in_bytes())); ++ // Skip to start of data. ++ addi_d(temp_reg, temp_reg, Array::base_offset_in_bytes()); ++ ++ Label Loop, subtype; ++ bind(Loop); ++ beq(temp2_reg, R0, *L_failure); ++ ld_d(AT, temp_reg, 0); ++ addi_d(temp_reg, temp_reg, 1 * wordSize); ++ beq(AT, super_klass, subtype); ++ addi_d(temp2_reg, temp2_reg, -1); ++ b(Loop); ++ ++ bind(subtype); ++ st_d(super_klass, super_cache_addr); ++ if (L_success != &L_fallthrough) { ++ b(*L_success); ++ } ++ ++ // Success. Cache the super we found and proceed in triumph. ++#undef IS_A_TEMP ++ ++ bind(L_fallthrough); ++} ++ ++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { ++ ld_d(oop_result, Address(java_thread, JavaThread::vm_result_offset())); ++ st_d(R0, Address(java_thread, JavaThread::vm_result_offset())); ++ verify_oop(oop_result, "broken oop in call_VM_base"); ++} ++ ++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { ++ ld_d(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); ++ st_d(R0, Address(java_thread, JavaThread::vm_result_2_offset())); ++} ++ ++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, ++ int extra_slot_offset) { ++ // cf. TemplateTable::prepare_invoke(), if (load_receiver). ++ int stackElementSize = Interpreter::stackElementSize; ++ int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); ++#ifdef ASSERT ++ int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); ++ assert(offset1 - offset == stackElementSize, "correct arithmetic"); ++#endif ++ Register scale_reg = NOREG; ++ Address::ScaleFactor scale_factor = Address::no_scale; ++ if (arg_slot.is_constant()) { ++ offset += arg_slot.as_constant() * stackElementSize; ++ } else { ++ scale_reg = arg_slot.as_register(); ++ scale_factor = Address::times_8; ++ } ++ // We don't push RA on stack in prepare_invoke. ++ // offset += wordSize; // return PC is on stack ++ if(scale_reg==NOREG) return Address(SP, offset); ++ else { ++ alsl_d(scale_reg, scale_reg, SP, scale_factor - 1); ++ return Address(scale_reg, offset); ++ } ++} ++ ++SkipIfEqual::~SkipIfEqual() { ++ _masm->bind(_label); ++} ++ ++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { ++ switch (size_in_bytes) { ++ case 8: ld_d(dst, src); break; ++ case 4: ld_w(dst, src); break; ++ case 2: is_signed ? ld_h(dst, src) : ld_hu(dst, src); break; ++ case 1: is_signed ? ld_b( dst, src) : ld_bu( dst, src); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { ++ switch (size_in_bytes) { ++ case 8: st_d(src, dst); break; ++ case 4: st_w(src, dst); break; ++ case 2: st_h(src, dst); break; ++ case 1: st_b(src, dst); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++// Look up the method for a megamorphic invokeinterface call. ++// The target method is determined by . ++// The receiver klass is in recv_klass. ++// On success, the result will be in method_result, and execution falls through. ++// On failure, execution transfers to the given label. ++void MacroAssembler::lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_temp, ++ Label& L_no_such_interface, ++ bool return_method) { ++ assert_different_registers(recv_klass, intf_klass, scan_temp, AT); ++ assert_different_registers(method_result, intf_klass, scan_temp, AT); ++ assert(recv_klass != method_result || !return_method, ++ "recv_klass can be destroyed when method isn't needed"); ++ ++ assert(itable_index.is_constant() || itable_index.as_register() == method_result, ++ "caller must use same register for non-constant itable index as for method"); ++ ++ // Compute start of first itableOffsetEntry (which is at the end of the vtable) ++ int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; ++ int itentry_off = itableMethodEntry::method_offset_in_bytes(); ++ int scan_step = itableOffsetEntry::size() * wordSize; ++ int vte_size = vtableEntry::size() * wordSize; ++ Address::ScaleFactor times_vte_scale = Address::times_ptr; ++ assert(vte_size == wordSize, "else adjust times_vte_scale"); ++ ++ ld_w(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); ++ ++ // %%% Could store the aligned, prescaled offset in the klassoop. ++ alsl_d(scan_temp, scan_temp, recv_klass, times_vte_scale - 1); ++ addi_d(scan_temp, scan_temp, vtable_base); ++ if (HeapWordsPerLong > 1) { ++ // Round up to align_object_offset boundary ++ // see code for InstanceKlass::start_of_itable! ++ round_to(scan_temp, BytesPerLong); ++ } ++ ++ if (return_method) { ++ // Adjust recv_klass by scaled itable_index, so we can free itable_index. ++ assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); ++ if (itable_index.is_constant()) { ++ li(AT, (int)itable_index.is_constant()); ++ alsl_d(AT, AT, recv_klass, (int)Address::times_ptr - 1); ++ } else { ++ alsl_d(AT, itable_index.as_register(), recv_klass, (int)Address::times_ptr - 1); ++ } ++ addi_d(recv_klass, AT, itentry_off); ++ } ++ ++ Label search, found_method; ++ ++ for (int peel = 1; peel >= 0; peel--) { ++ ld_d(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); ++ ++ if (peel) { ++ beq(intf_klass, method_result, found_method); ++ } else { ++ bne(intf_klass, method_result, search); ++ // (invert the test to fall through to found_method...) ++ } ++ ++ if (!peel) break; ++ ++ bind(search); ++ ++ // Check that the previous entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ beq(method_result, R0, L_no_such_interface); ++ addi_d(scan_temp, scan_temp, scan_step); ++ } ++ ++ bind(found_method); ++ ++ if (return_method) { ++ // Got a hit. ++ ld_w(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); ++ ldx_d(method_result, recv_klass, scan_temp); ++ } ++} ++ ++// virtual method calling ++void MacroAssembler::lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result) { ++ Register tmp = S8; ++ push(tmp); ++ ++ if (vtable_index.is_constant()) { ++ assert_different_registers(recv_klass, method_result, tmp); ++ } else { ++ assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp); ++ } ++ const int base = InstanceKlass::vtable_start_offset() * wordSize; ++ assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); ++ if (vtable_index.is_constant()) { ++ li(AT, vtable_index.as_constant()); ++ slli_d(AT, AT, (int)Address::times_ptr); ++ } else { ++ slli_d(AT, vtable_index.as_register(), (int)Address::times_ptr); ++ } ++ li(tmp, base + vtableEntry::method_offset_in_bytes()); ++ add_d(tmp, tmp, AT); ++ add_d(tmp, tmp, recv_klass); ++ ld_d(method_result, tmp, 0); ++ ++ pop(tmp); ++} ++ ++void MacroAssembler::load_byte_map_base(Register reg) { ++ jbyte *byte_map_base = ++ ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base; ++ ++ // Strictly speaking the byte_map_base isn't an address at all, and it might ++ // even be negative. It is thus materialised as a constant. ++ li(reg, (uint64_t)byte_map_base); ++} ++ ++void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { ++ const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask); ++ STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code ++ // The inverted mask is sign-extended ++ li(AT, inverted_jweak_mask); ++ andr(possibly_jweak, AT, possibly_jweak); ++} ++ ++void MacroAssembler::resolve_jobject(Register value, ++ Register thread, ++ Register tmp) { ++ assert_different_registers(value, thread, tmp); ++ Label done, not_weak; ++ beq(value, R0, done); // Use NULL as-is. ++ li(AT, JNIHandles::weak_tag_mask); // Test for jweak tag. ++ andr(AT, value, AT); ++ beq(AT, R0, not_weak); ++ // Resolve jweak. ++ ld_d(value, value, -JNIHandles::weak_tag_value); ++ verify_oop(value); ++ #if INCLUDE_ALL_GCS ++ if (UseG1GC) { ++ g1_write_barrier_pre(noreg /* obj */, ++ value /* pre_val */, ++ thread /* thread */, ++ tmp /* tmp */, ++ true /* tosca_live */, ++ true /* expand_call */); ++ } ++ #endif // INCLUDE_ALL_GCS ++ b(done); ++ bind(not_weak); ++ // Resolve (untagged) jobject. ++ ld_d(value, value, 0); ++ verify_oop(value); ++ bind(done); ++} ++ ++void MacroAssembler::lea(Register rd, Address src) { ++ Register dst = rd; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index == noreg) { ++ if (is_simm(disp, 12)) { ++ addi_d(dst, base, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ add_d(dst, base, AT); ++ } ++ } else { ++ if (scale == 0) { ++ if (is_simm(disp, 12)) { ++ add_d(AT, base, index); ++ addi_d(dst, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ add_d(AT, base, AT); ++ add_d(dst, AT, index); ++ } ++ } else { ++ if (is_simm(disp, 12)) { ++ alsl_d(AT, index, base, scale - 1); ++ addi_d(dst, AT, disp); ++ } else { ++ lu12i_w(AT, split_low20(disp >> 12)); ++ if (split_low12(disp)) ++ ori(AT, AT, split_low12(disp)); ++ add_d(AT, AT, base); ++ alsl_d(dst, index, AT, scale - 1); ++ } ++ } ++ } ++} ++ ++void MacroAssembler::lea(Register dst, AddressLiteral adr) { ++ code_section()->relocate(pc(), adr.rspec()); ++ pcaddi(dst, (adr.target() - pc()) >> 2); ++} ++ ++int MacroAssembler::patched_branch(int dest_pos, int inst, int inst_pos) { ++ int v = (dest_pos - inst_pos) >> 2; ++ switch(high(inst, 6)) { ++ case beq_op: ++ case bne_op: ++ case blt_op: ++ case bge_op: ++ case bltu_op: ++ case bgeu_op: ++ assert(is_simm16(v), "must be simm16"); ++#ifndef PRODUCT ++ if(!is_simm16(v)) ++ { ++ tty->print_cr("must be simm16"); ++ tty->print_cr("Inst: %x", inst); ++ } ++#endif ++ ++ inst &= 0xfc0003ff; ++ inst |= ((v & 0xffff) << 10); ++ break; ++ case beqz_op: ++ case bnez_op: ++ case bccondz_op: ++ assert(is_simm(v, 21), "must be simm21"); ++#ifndef PRODUCT ++ if(!is_simm(v, 21)) ++ { ++ tty->print_cr("must be simm21"); ++ tty->print_cr("Inst: %x", inst); ++ } ++#endif ++ ++ inst &= 0xfc0003e0; ++ inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x1f) ); ++ break; ++ case b_op: ++ case bl_op: ++ assert(is_simm(v, 26), "must be simm26"); ++#ifndef PRODUCT ++ if(!is_simm(v, 26)) ++ { ++ tty->print_cr("must be simm26"); ++ tty->print_cr("Inst: %x", inst); ++ } ++#endif ++ ++ inst &= 0xfc000000; ++ inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x3ff) ); ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ return inst; ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src, ++ CMCompare cmp, ++ bool is_signed) { ++ switch (cmp) { ++ case EQ: ++ sub_d(AT, op1, op2); ++ maskeqz(dst, dst, AT); ++ masknez(AT, src, AT); ++ break; ++ ++ case NE: ++ sub_d(AT, op1, op2); ++ masknez(dst, dst, AT); ++ maskeqz(AT, src, AT); ++ break; ++ ++ case GT: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ masknez(dst, dst, AT); ++ maskeqz(AT, src, AT); ++ break; ++ ++ case GE: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ maskeqz(dst, dst, AT); ++ masknez(AT, src, AT); ++ break; ++ ++ case LT: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ masknez(dst, dst, AT); ++ maskeqz(AT, src, AT); ++ break; ++ ++ case LE: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ maskeqz(dst, dst, AT); ++ masknez(AT, src, AT); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ OR(dst, dst, AT); ++} ++ ++ ++void MacroAssembler::cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ Register dst, ++ Register src, ++ FloatRegister tmp1, ++ FloatRegister tmp2, ++ CMCompare cmp, ++ bool is_float) { ++ movgr2fr_d(tmp1, dst); ++ movgr2fr_d(tmp2, src); ++ ++ switch(cmp) { ++ case EQ: ++ if (is_float) { ++ fcmp_ceq_s(FCC0, op1, op2); ++ } else { ++ fcmp_ceq_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp1, tmp2, FCC0); ++ break; ++ ++ case NE: ++ if (is_float) { ++ fcmp_ceq_s(FCC0, op1, op2); ++ } else { ++ fcmp_ceq_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp2, tmp1, FCC0); ++ break; ++ ++ case GT: ++ if (is_float) { ++ fcmp_cule_s(FCC0, op1, op2); ++ } else { ++ fcmp_cule_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp2, tmp1, FCC0); ++ break; ++ ++ case GE: ++ if (is_float) { ++ fcmp_cult_s(FCC0, op1, op2); ++ } else { ++ fcmp_cult_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp2, tmp1, FCC0); ++ break; ++ ++ case LT: ++ if (is_float) { ++ fcmp_cult_s(FCC0, op1, op2); ++ } else { ++ fcmp_cult_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp1, tmp2, FCC0); ++ break; ++ ++ case LE: ++ if (is_float) { ++ fcmp_cule_s(FCC0, op1, op2); ++ } else { ++ fcmp_cule_d(FCC0, op1, op2); ++ } ++ fsel(tmp1, tmp1, tmp2, FCC0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ ++ movfr2gr_d(dst, tmp1); ++} ++ ++void MacroAssembler::cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp, ++ bool is_float) { ++ switch(cmp) { ++ case EQ: ++ if (!is_float) { ++ fcmp_ceq_d(FCC0, op1, op2); ++ } else { ++ fcmp_ceq_s(FCC0, op1, op2); ++ } ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ case NE: ++ if (!is_float) { ++ fcmp_ceq_d(FCC0, op1, op2); ++ } else { ++ fcmp_ceq_s(FCC0, op1, op2); ++ } ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case GT: ++ if (!is_float) { ++ fcmp_cule_d(FCC0, op1, op2); ++ } else { ++ fcmp_cule_s(FCC0, op1, op2); ++ } ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case GE: ++ if (!is_float) { ++ fcmp_cult_d(FCC0, op1, op2); ++ } else { ++ fcmp_cult_s(FCC0, op1, op2); ++ } ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case LT: ++ if (!is_float) { ++ fcmp_cult_d(FCC0, op1, op2); ++ } else { ++ fcmp_cult_s(FCC0, op1, op2); ++ } ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ case LE: ++ if (!is_float) { ++ fcmp_cule_d(FCC0, op1, op2); ++ } else { ++ fcmp_cule_s(FCC0, op1, op2); ++ } ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ FloatRegister dst, ++ FloatRegister src, ++ FloatRegister tmp1, ++ FloatRegister tmp2, ++ CMCompare cmp) { ++ movgr2fr_w(tmp1, R0); ++ ++ switch (cmp) { ++ case EQ: ++ sub_d(AT, op1, op2); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ case NE: ++ sub_d(AT, op1, op2); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case GT: ++ slt(AT, op2, op1); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case GE: ++ slt(AT, op1, op2); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ case LT: ++ slt(AT, op1, op2); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, src, dst, FCC0); ++ break; ++ ++ case LE: ++ slt(AT, op2, op1); ++ movgr2fr_w(tmp2, AT); ++ fcmp_ceq_s(FCC0, tmp1, tmp2); ++ fsel(dst, dst, src, FCC0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::loadstore(Register reg, Register base, int disp, int type) { ++ switch (type) { ++ case STORE_BYTE: st_b (reg, base, disp); break; ++ case STORE_CHAR: ++ case STORE_SHORT: st_h (reg, base, disp); break; ++ case STORE_INT: st_w (reg, base, disp); break; ++ case STORE_LONG: st_d (reg, base, disp); break; ++ case LOAD_BYTE: ld_b (reg, base, disp); break; ++ case LOAD_U_BYTE: ld_bu(reg, base, disp); break; ++ case LOAD_SHORT: ld_h (reg, base, disp); break; ++ case LOAD_U_SHORT: ld_hu(reg, base, disp); break; ++ case LOAD_INT: ld_w (reg, base, disp); break; ++ case LOAD_U_INT: ld_wu(reg, base, disp); break; ++ case LOAD_LONG: ld_d (reg, base, disp); break; ++ case LOAD_LINKED_LONG: ++ ll_d(reg, base, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::loadstore(Register reg, Register base, Register disp, int type) { ++ switch (type) { ++ case STORE_BYTE: stx_b (reg, base, disp); break; ++ case STORE_CHAR: ++ case STORE_SHORT: stx_h (reg, base, disp); break; ++ case STORE_INT: stx_w (reg, base, disp); break; ++ case STORE_LONG: stx_d (reg, base, disp); break; ++ case LOAD_BYTE: ldx_b (reg, base, disp); break; ++ case LOAD_U_BYTE: ldx_bu(reg, base, disp); break; ++ case LOAD_SHORT: ldx_h (reg, base, disp); break; ++ case LOAD_U_SHORT: ldx_hu(reg, base, disp); break; ++ case LOAD_INT: ldx_w (reg, base, disp); break; ++ case LOAD_U_INT: ldx_wu(reg, base, disp); break; ++ case LOAD_LONG: ldx_d (reg, base, disp); break; ++ case LOAD_LINKED_LONG: ++ add_d(AT, base, disp); ++ ll_d(reg, AT, 0); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) { ++ switch (type) { ++ case STORE_FLOAT: fst_s(reg, base, disp); break; ++ case STORE_DOUBLE: fst_d(reg, base, disp); break; ++ case STORE_VECTORX: vst (reg, base, disp); break; ++ case STORE_VECTORY: xvst (reg, base, disp); break; ++ case LOAD_FLOAT: fld_s(reg, base, disp); break; ++ case LOAD_DOUBLE: fld_d(reg, base, disp); break; ++ case LOAD_VECTORX: vld (reg, base, disp); break; ++ case LOAD_VECTORY: xvld (reg, base, disp); break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::loadstore(FloatRegister reg, Register base, Register disp, int type) { ++ switch (type) { ++ case STORE_FLOAT: fstx_s(reg, base, disp); break; ++ case STORE_DOUBLE: fstx_d(reg, base, disp); break; ++ case STORE_VECTORX: vstx (reg, base, disp); break; ++ case STORE_VECTORY: xvstx (reg, base, disp); break; ++ case LOAD_FLOAT: fldx_s(reg, base, disp); break; ++ case LOAD_DOUBLE: fldx_d(reg, base, disp); break; ++ case LOAD_VECTORX: vldx (reg, base, disp); break; ++ case LOAD_VECTORY: xvldx (reg, base, disp); break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++/** ++ * Emits code to update CRC-32 with a byte value according to constants in table ++ * ++ * @param [in,out]crc Register containing the crc. ++ * @param [in]val Register containing the byte to fold into the CRC. ++ * @param [in]table Register containing the table of crc constants. ++ * ++ * uint32_t crc; ++ * val = crc_table[(val ^ crc) & 0xFF]; ++ * crc = val ^ (crc >> 8); ++**/ ++void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { ++ xorr(val, val, crc); ++ andi(val, val, 0xff); ++ ld_w(val, Address(table, val, Address::times_4, 0)); ++ srli_w(crc, crc, 8); ++ xorr(crc, val, crc); ++} ++ ++/** ++ * @param crc register containing existing CRC (32-bit) ++ * @param buf register pointing to input byte buffer (byte*) ++ * @param len register containing number of bytes ++ * @param tmp scratch register ++**/ ++void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register tmp) { ++ Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit; ++ assert_different_registers(crc, buf, len, tmp); ++ ++ nor(crc, crc, R0); ++ ++ addi_d(len, len, -64); ++ bge(len, R0, CRC_by64_loop); ++ addi_d(len, len, 64-4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ blt(R0, len, CRC_by1_loop); ++ b(L_exit); ++ ++ bind(CRC_by64_loop); ++ ld_d(tmp, buf, 0); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 8); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 16); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 24); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 32); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 40); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 48); ++ crc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 56); ++ crc_w_d_w(crc, tmp, crc); ++ addi_d(buf, buf, 64); ++ addi_d(len, len, -64); ++ bge(len, R0, CRC_by64_loop); ++ addi_d(len, len, 64-4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ blt(R0, len, CRC_by1_loop); ++ b(L_exit); ++ ++ bind(CRC_by4_loop); ++ ld_w(tmp, buf, 0); ++ crc_w_w_w(crc, tmp, crc); ++ addi_d(buf, buf, 4); ++ addi_d(len, len, -4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ bge(R0, len, L_exit); ++ ++ bind(CRC_by1_loop); ++ ld_b(tmp, buf, 0); ++ crc_w_b_w(crc, tmp, crc); ++ addi_d(buf, buf, 1); ++ addi_d(len, len, -1); ++ blt(R0, len, CRC_by1_loop); ++ ++ bind(L_exit); ++ nor(crc, crc, R0); ++} ++ ++/** ++ * @param crc register containing existing CRC (32-bit) ++ * @param buf register pointing to input byte buffer (byte*) ++ * @param len register containing number of bytes ++ * @param tmp scratch register ++**/ ++void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len, Register tmp) { ++ Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit; ++ assert_different_registers(crc, buf, len, tmp); ++ ++ addi_d(len, len, -64); ++ bge(len, R0, CRC_by64_loop); ++ addi_d(len, len, 64-4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ blt(R0, len, CRC_by1_loop); ++ b(L_exit); ++ ++ bind(CRC_by64_loop); ++ ld_d(tmp, buf, 0); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 8); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 16); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 24); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 32); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 40); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 48); ++ crcc_w_d_w(crc, tmp, crc); ++ ld_d(tmp, buf, 56); ++ crcc_w_d_w(crc, tmp, crc); ++ addi_d(buf, buf, 64); ++ addi_d(len, len, -64); ++ bge(len, R0, CRC_by64_loop); ++ addi_d(len, len, 64-4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ blt(R0, len, CRC_by1_loop); ++ b(L_exit); ++ ++ bind(CRC_by4_loop); ++ ld_w(tmp, buf, 0); ++ crcc_w_w_w(crc, tmp, crc); ++ addi_d(buf, buf, 4); ++ addi_d(len, len, -4); ++ bge(len, R0, CRC_by4_loop); ++ addi_d(len, len, 4); ++ bge(R0, len, L_exit); ++ ++ bind(CRC_by1_loop); ++ ld_b(tmp, buf, 0); ++ crcc_w_b_w(crc, tmp, crc); ++ addi_d(buf, buf, 1); ++ addi_d(len, len, -1); ++ blt(R0, len, CRC_by1_loop); ++ ++ bind(L_exit); ++} +diff --git a/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.hpp +new file mode 100644 +index 0000000000..8b123c2906 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.hpp +@@ -0,0 +1,771 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_HPP ++ ++#include "asm/assembler.hpp" ++#include "utilities/macros.hpp" ++#include "runtime/rtmLocking.hpp" ++ ++ ++// MacroAssembler extends Assembler by frequently used macros. ++// ++// Instructions for which a 'better' code sequence exists depending ++// on arguments should also go in here. ++ ++class MacroAssembler: public Assembler { ++ friend class LIR_Assembler; ++ friend class Runtime1; // as_Address() ++ ++ public: ++ // Compare code ++ typedef enum { ++ EQ = 0x01, ++ NE = 0x02, ++ GT = 0x03, ++ GE = 0x04, ++ LT = 0x05, ++ LE = 0x06 ++ } CMCompare; ++ ++ protected: ++ ++ Address as_Address(AddressLiteral adr); ++ Address as_Address(ArrayAddress adr); ++ ++ // Support for VM calls ++ // ++ // This is the base routine called by the different versions of call_VM_leaf. The interpreter ++ // may customize this version by overriding it for its purposes (e.g., to save/restore ++ // additional registers when doing a VM call). ++#ifdef CC_INTERP ++ // c++ interpreter never wants to use interp_masm version of call_VM ++ #define VIRTUAL ++#else ++ #define VIRTUAL virtual ++#endif ++ ++ VIRTUAL void call_VM_leaf_base( ++ address entry_point, // the entry point ++ int number_of_arguments // the number of arguments to pop after the call ++ ); ++ ++ // This is the base routine called by the different versions of call_VM. The interpreter ++ // may customize this version by overriding it for its purposes (e.g., to save/restore ++ // additional registers when doing a VM call). ++ // ++ // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base ++ // returns the register which contains the thread upon return. If a thread register has been ++ // specified, the return value will correspond to that register. If no last_java_sp is specified ++ // (noreg) than sp will be used instead. ++ VIRTUAL void call_VM_base( // returns the register containing the thread upon return ++ Register oop_result, // where an oop-result ends up if any; use noreg otherwise ++ Register java_thread, // the thread if computed before ; use noreg otherwise ++ Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise ++ address entry_point, // the entry point ++ int number_of_arguments, // the number of arguments (w/o thread) to pop after the call ++ bool check_exceptions // whether to check for pending exceptions after return ++ ); ++ ++ // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. ++ // The implementation is only non-empty for the InterpreterMacroAssembler, ++ // as only the interpreter handles PopFrame and ForceEarlyReturn requests. ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); ++ ++ void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); ++ ++ // helpers for FPU flag access ++ // tmp is a temporary register, if none is available use noreg ++ ++ public: ++ static intptr_t i[32]; ++ static float f[32]; ++ static void print(outputStream *s); ++ ++ static int i_offset(unsigned int k); ++ static int f_offset(unsigned int k); ++ ++ static void save_registers(MacroAssembler *masm); ++ static void restore_registers(MacroAssembler *masm); ++ ++ MacroAssembler(CodeBuffer* code) : Assembler(code) {} ++ ++ // Support for NULL-checks ++ // ++ // Generates code that causes a NULL OS exception if the content of reg is NULL. ++ // If the accessed location is M[reg + offset] and the offset is known, provide the ++ // offset. No explicit code generation is needed if the offset is within a certain ++ // range (0 <= offset <= page_size). ++ ++ void null_check(Register reg, int offset = -1); ++ static bool needs_explicit_null_check(intptr_t offset); ++ ++ // Required platform-specific helpers for Label::patch_instructions. ++ // They _shadow_ the declarations in AbstractAssembler, which are undefined. ++ void pd_patch_instruction(address branch, address target); ++ ++ address emit_trampoline_stub(int insts_call_instruction_offset, address target); ++ ++ // Support for inc/dec with optimal instruction selection depending on value ++ // void incrementl(Register reg, int value = 1); ++ // void decrementl(Register reg, int value = 1); ++ ++ ++ // Alignment ++ void align(int modulus); ++ ++ ++ // Stack frame creation/removal ++ void enter(); ++ void leave(); ++ ++ // Frame creation and destruction shared between JITs. ++ void build_frame(int framesize); ++ void remove_frame(int framesize); ++ ++ // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) ++ // The pointer will be loaded into the thread register. ++ void get_thread(Register thread); ++ ++ ++ // Support for VM calls ++ // ++ // It is imperative that all calls into the VM are handled via the call_VM macros. ++ // They make sure that the stack linkage is setup correctly. call_VM's correspond ++ // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. ++ ++ ++ void call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ // Overloadings with last_Java_sp ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments = 0, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, bool ++ check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ void get_vm_result (Register oop_result, Register thread); ++ void get_vm_result_2(Register metadata_result, Register thread); ++ void call_VM_leaf(address entry_point, ++ int number_of_arguments = 0); ++ void call_VM_leaf(address entry_point, ++ Register arg_1); ++ void call_VM_leaf(address entry_point, ++ Register arg_1, Register arg_2); ++ void call_VM_leaf(address entry_point, ++ Register arg_1, Register arg_2, Register arg_3); ++ ++ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls ++ void super_call_VM_leaf(address entry_point); ++ void super_call_VM_leaf(address entry_point, Register arg_1); ++ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); ++ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); ++ ++ // last Java Frame (fills frame anchor) ++ void set_last_Java_frame(Register thread, ++ Register last_java_sp, ++ Register last_java_fp, ++ Label& last_java_pc); ++ ++ // thread in the default location (S6) ++ void set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ Label& last_java_pc); ++ ++ void reset_last_Java_frame(Register thread, bool clear_fp); ++ ++ // thread in the default location (S6) ++ void reset_last_Java_frame(bool clear_fp); ++ ++ // Stores ++ void store_check(Register obj); // store check for obj - register is destroyed afterwards ++ void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed) ++ ++ void resolve_jobject(Register value, Register thread, Register tmp); ++ void clear_jweak_tag(Register possibly_jweak); ++ ++#if INCLUDE_ALL_GCS ++ ++ void g1_write_barrier_pre(Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); ++ ++ void g1_write_barrier_post(Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2); ++ ++#endif // INCLUDE_ALL_GCS ++ ++ // split store_check(Register obj) to enhance instruction interleaving ++ void store_check_part_1(Register obj); ++ void store_check_part_2(Register obj); ++ ++ // C 'boolean' to Java boolean: x == 0 ? 0 : 1 ++ void c2bool(Register x); ++ //add for compressedoops ++ void load_klass(Register dst, Register src); ++ void store_klass(Register dst, Register src); ++ void load_prototype_header(Register dst, Register src); ++ ++ void store_klass_gap(Register dst, Register src); ++ ++ void load_heap_oop(Register dst, Address src); ++ void store_heap_oop(Address dst, Register src); ++ void store_heap_oop_null(Address dst); ++ void encode_heap_oop(Register r); ++ void encode_heap_oop(Register dst, Register src); ++ void decode_heap_oop(Register r); ++ void decode_heap_oop(Register dst, Register src); ++ void encode_heap_oop_not_null(Register r); ++ void decode_heap_oop_not_null(Register r); ++ void encode_heap_oop_not_null(Register dst, Register src); ++ void decode_heap_oop_not_null(Register dst, Register src); ++ ++ void encode_klass_not_null(Register r); ++ void decode_klass_not_null(Register r); ++ void encode_klass_not_null(Register dst, Register src); ++ void decode_klass_not_null(Register dst, Register src); ++ ++ // Returns the byte size of the instructions generated by decode_klass_not_null() ++ // when compressed klass pointers are being used. ++ static int instr_size_for_decode_klass_not_null(); ++ ++ // if heap base register is used - reinit it with the correct value ++ void reinit_heapbase(); ++ ++ DEBUG_ONLY(void verify_heapbase(const char* msg);) ++ ++ void set_narrow_klass(Register dst, Klass* k); ++ void set_narrow_oop(Register dst, jobject obj); ++ ++ // Sign extension ++ void sign_extend_short(Register reg) { ext_w_h(reg, reg); } ++ void sign_extend_byte(Register reg) { ext_w_b(reg, reg); } ++ void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); ++ void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); ++ ++ void trigfunc(char trig, int num_fpu_regs_in_use = 1); ++ // allocation ++ void eden_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ void tlab_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ void incr_allocated_bytes(Register thread, ++ Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1 = noreg); ++ // interface method calling ++ void lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_temp, ++ Label& no_such_interface, ++ bool return_method = true); ++ ++ // virtual method calling ++ void lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result); ++ ++ // Test sub_klass against super_klass, with fast and slow paths. ++ ++ // The fast path produces a tri-state answer: yes / no / maybe-slow. ++ // One of the three labels can be NULL, meaning take the fall-through. ++ // If super_check_offset is -1, the value is loaded up from super_klass. ++ // No registers are killed, except temp_reg. ++ void check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); ++ ++ // The rest of the type check; must be wired to a corresponding fast path. ++ // It does not repeat the fast path logic, so don't use it standalone. ++ // The temp_reg and temp2_reg can be noreg, if no temps are available. ++ // Updates the sub's secondary super cache as necessary. ++ // If set_cond_codes, condition codes will be Z on success, NZ on failure. ++ void check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Register temp2_reg, ++ Label* L_success, ++ Label* L_failure, ++ bool set_cond_codes = false); ++ ++ // Simplified, combined version, good for typical uses. ++ // Falls through on failure. ++ void check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label& L_success); ++ ++ ++ // Debugging ++ ++ // only if +VerifyOops ++ void verify_oop(Register reg, const char* s = "broken oop"); ++ void verify_oop_addr(Address addr, const char * s = "broken oop addr"); ++ void verify_oop_subroutine(); ++ // TODO: verify method and klass metadata (compare against vptr?) ++ void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} ++ void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} ++ ++ #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) ++ #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) ++ ++ // only if +VerifyFPU ++ void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); ++ ++ // prints msg, dumps registers and stops execution ++ void stop(const char* msg); ++ ++ // prints msg and continues ++ void warn(const char* msg); ++ ++ static void debug(char* msg/*, RegistersForDebugging* regs*/); ++ static void debug64(char* msg, int64_t pc, int64_t regs[]); ++ ++ void untested() { stop("untested"); } ++ ++ void unimplemented(const char* what = "") { char* b = new char[1024]; jio_snprintf(b, sizeof(b), "unimplemented: %s", what); stop(b); } ++ ++ void should_not_reach_here() { stop("should not reach here"); } ++ ++ void print_CPU_state(); ++ ++ // Stack overflow checking ++ void bang_stack_with_offset(int offset) { ++ // stack grows down, caller passes positive offset ++ assert(offset > 0, "must bang with negative offset"); ++ if (offset <= 2048) { ++ st_w(RA0, SP, -offset); ++ } else if (offset <= 32768 && !(offset & 3)) { ++ stptr_w(RA0, SP, -offset); ++ } else { ++ li(AT, offset); ++ sub_d(AT, SP, AT); ++ st_w(RA0, AT, 0); ++ } ++ } ++ ++ // Writes to stack successive pages until offset reached to check for ++ // stack overflow + shadow pages. Also, clobbers tmp ++ void bang_stack_size(Register size, Register tmp); ++ ++ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset); ++ ++ // Support for serializing memory accesses between threads ++ void serialize_memory(Register thread, Register tmp); ++ ++ //void verify_tlab(); ++ void verify_tlab(Register t1, Register t2); ++ ++ // Biased locking support ++ // lock_reg and obj_reg must be loaded up with the appropriate values. ++ // tmp_reg is optional. If it is supplied (i.e., != noreg) it will ++ // be killed; if not supplied, push/pop will be used internally to ++ // allocate a temporary (inefficient, avoid if possible). ++ // Optional slow case is for implementations (interpreter and C1) which branch to ++ // slow case directly. Leaves condition codes set for C2's Fast_Lock node. ++ // Returns offset of first potentially-faulting instruction for null ++ // check info (currently consumed only by C1). If ++ // swap_reg_contains_mark is true then returns -1 as it is assumed ++ // the calling code has already passed any potential faults. ++ int biased_locking_enter(Register lock_reg, Register obj_reg, ++ Register swap_reg, Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, Label* slow_case = NULL, ++ BiasedLockingCounters* counters = NULL); ++ void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); ++#ifdef COMPILER2 ++ void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr); ++ void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr); ++#endif ++ ++ void round_to(Register reg, int modulus) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++#if 0 ++ assert_different_registers(reg, AT); ++ increment(reg, modulus - 1); ++ move(AT, - modulus); ++ andr(reg, reg, AT); ++#endif ++ } ++ ++ // the follow two might use AT register, be sure you have no meanful data in AT before you call them ++ void increment(Register reg, int imm); ++ void decrement(Register reg, int imm); ++ void increment(Address addr, int imm = 1); ++ void decrement(Address addr, int imm = 1); ++ void shl(Register reg, int sa) { slli_d(reg, reg, sa); } ++ void shr(Register reg, int sa) { srli_d(reg, reg, sa); } ++ void sar(Register reg, int sa) { srai_d(reg, reg, sa); } ++ // Helper functions for statistics gathering. ++ void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2); ++ ++ // Calls ++ void call(address entry); ++ void call(address entry, relocInfo::relocType rtype); ++ void call(address entry, RelocationHolder& rh); ++ void call_long(address entry); ++ ++ address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL); ++ ++ static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M); ++ ++ static bool far_branches() { ++ if (ForceUnreachable) { ++ return true; ++ } else { ++ return ReservedCodeCacheSize > branch_range; ++ } ++ } ++ ++ // Emit the CompiledIC call idiom ++ address ic_call(address entry); ++ ++ // Jumps ++ void jmp(address entry); ++ void jmp(address entry, relocInfo::relocType rtype); ++ void jmp_far(Label& L); // patchable ++ ++ /* branches may exceed 16-bit offset */ ++ void b_far(address entry); ++ void b_far(Label& L); ++ ++ void bne_far (Register rs, Register rt, address entry); ++ void bne_far (Register rs, Register rt, Label& L); ++ ++ void beq_far (Register rs, Register rt, address entry); ++ void beq_far (Register rs, Register rt, Label& L); ++ ++ void blt_far (Register rs, Register rt, address entry, bool is_signed); ++ void blt_far (Register rs, Register rt, Label& L, bool is_signed); ++ ++ void bge_far (Register rs, Register rt, address entry, bool is_signed); ++ void bge_far (Register rs, Register rt, Label& L, bool is_signed); ++ ++ // For C2 to support long branches ++ void beq_long (Register rs, Register rt, Label& L); ++ void bne_long (Register rs, Register rt, Label& L); ++ void blt_long (Register rs, Register rt, Label& L, bool is_signed); ++ void bge_long (Register rs, Register rt, Label& L, bool is_signed); ++ void bc1t_long (Label& L); ++ void bc1f_long (Label& L); ++ ++ static bool patchable_branches() { ++ const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M); ++ return ReservedCodeCacheSize > branch_range; ++ } ++ ++ static bool reachable_from_branch_short(jlong offs); ++ ++ void patchable_jump_far(Register ra, jlong offs); ++ void patchable_jump(address target, bool force_patchable = false); ++ void patchable_call(address target, address call_size = 0); ++ ++ // Floating ++ // Data ++ ++ // Load and store values by size and signed-ness ++ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); ++ void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); ++ ++ // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs ++ inline void ld_ptr(Register rt, Address a) { ++ ld_d(rt, a); ++ } ++ ++ inline void ld_ptr(Register rt, Register base, int offset16) { ++ ld_d(rt, base, offset16); ++ } ++ ++ // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs ++ inline void st_ptr(Register rt, Address a) { ++ st_d(rt, a); ++ } ++ ++ inline void st_ptr(Register rt, Register base, int offset16) { ++ st_d(rt, base, offset16); ++ } ++ ++ void ld_ptr(Register rt, Register base, Register offset); ++ void st_ptr(Register rt, Register base, Register offset); ++ ++ // ld_long will perform lw for 32 bit VMs and ld for 64 bit VMs ++ // st_long will perform sw for 32 bit VMs and sd for 64 bit VMs ++ inline void ld_long(Register rt, Register base, int offset16); ++ inline void st_long(Register rt, Register base, int offset16); ++ inline void ld_long(Register rt, Address a); ++ inline void st_long(Register rt, Address a); ++ void ld_long(Register rt, Register offset, Register base); ++ void st_long(Register rt, Register offset, Register base); ++ ++ // swap the two byte of the low 16-bit halfword ++ // this directive will use AT, be sure the high 16-bit of reg is zero ++ void hswap(Register reg); ++ void huswap(Register reg); ++ ++ // convert big endian integer to little endian integer ++ void swap(Register reg); ++ ++ void cmpxchg(Address addr, Register oldval, Register newval, Register resflag, ++ bool retold, bool barrier); ++ void cmpxchg(Address addr, Register oldval, Register newval, Register tmp, ++ bool retold, bool barrier, Label& succ, Label* fail = NULL); ++ void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag, ++ bool sign, bool retold, bool barrier); ++ void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, ++ bool sign, bool retold, bool barrier, Label& succ, Label* fail = NULL); ++ ++ void extend_sign(Register rh, Register rl) { /*stop("extend_sign");*/ guarantee(0, "LA not implemented yet");} ++ void neg(Register reg) { /*dsubu(reg, R0, reg);*/ guarantee(0, "LA not implemented yet");} ++ void push (Register reg) { addi_d(SP, SP, -8); st_d (reg, SP, 0); } ++ void push (FloatRegister reg) { addi_d(SP, SP, -8); fst_d (reg, SP, 0); } ++ void pop (Register reg) { ld_d (reg, SP, 0); addi_d(SP, SP, 8); } ++ void pop (FloatRegister reg) { fld_d (reg, SP, 0); addi_d(SP, SP, 8); } ++ void pop () { addi_d(SP, SP, 8); } ++ void pop2 () { addi_d(SP, SP, 16); } ++ void push2(Register reg1, Register reg2); ++ void pop2 (Register reg1, Register reg2); ++ //we need 2 fun to save and resotre general register ++ void pushad(); ++ void popad(); ++ void pushad_except_v0(); ++ void popad_except_v0(); ++ ++ void li(Register rd, jlong value); ++ void li(Register rd, address addr) { li(rd, (long)addr); } ++ void patchable_li52(Register rd, jlong value); ++ void lipc(Register rd, Label& L); ++ void move(Register rd, Register rs) { orr(rd, rs, R0); } ++ void move_u32(Register rd, Register rs) { add_w(rd, rs, R0); } ++ void mov_metadata(Register dst, Metadata* obj); ++ void mov_metadata(Address dst, Metadata* obj); ++ ++ // Load the base of the cardtable byte map into reg. ++ void load_byte_map_base(Register reg); ++ ++ //FIXME ++ void empty_FPU_stack(){/*need implemented*/}; ++ ++ ++ // method handles (JSR 292) ++ Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); ++ ++ ++ // LA added: ++ void jr (Register reg) { jirl(R0, reg, 0); } ++ void jalr(Register reg) { jirl(RA, reg, 0); } ++ void nop () { andi(R0, R0, 0); } ++ void andr(Register rd, Register rj, Register rk) { AND(rd, rj, rk); } ++ void xorr(Register rd, Register rj, Register rk) { XOR(rd, rj, rk); } ++ void orr (Register rd, Register rj, Register rk) { OR(rd, rj, rk); } ++ void lea (Register rd, Address src); ++ void lea (Register dst, AddressLiteral adr); ++ static int patched_branch(int dest_pos, int inst, int inst_pos); ++ ++ // Conditional move ++ void cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src, ++ CMCompare cmp = EQ, ++ bool is_signed = true); ++ void cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ Register dst, ++ Register src, ++ FloatRegister tmp1, ++ FloatRegister tmp2, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ void cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ void cmp_cmov(Register op1, ++ Register op2, ++ FloatRegister dst, ++ FloatRegister src, ++ FloatRegister tmp1, ++ FloatRegister tmp2, ++ CMCompare cmp = EQ); ++ ++ // CRC32 code for java.util.zip.CRC32::update() instrinsic. ++ void update_byte_crc32(Register crc, Register val, Register table); ++ ++ // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic. ++ void kernel_crc32(Register crc, Register buf, Register len, Register tmp); ++ ++ // CRC32C code for java.util.zip.CRC32C::updateBytes() instrinsic. ++ void kernel_crc32c(Register crc, Register buf, Register len, Register tmp); ++ ++#undef VIRTUAL ++ ++ public: ++// Memory Data Type ++#define INT_TYPE 0x100 ++#define FLOAT_TYPE 0x200 ++#define SIGNED_TYPE 0x10 ++#define UNSIGNED_TYPE 0x20 ++ ++ typedef enum { ++ LOAD_BYTE = INT_TYPE | SIGNED_TYPE | 0x1, ++ LOAD_CHAR = INT_TYPE | SIGNED_TYPE | 0x2, ++ LOAD_SHORT = INT_TYPE | SIGNED_TYPE | 0x3, ++ LOAD_INT = INT_TYPE | SIGNED_TYPE | 0x4, ++ LOAD_LONG = INT_TYPE | SIGNED_TYPE | 0x5, ++ STORE_BYTE = INT_TYPE | SIGNED_TYPE | 0x6, ++ STORE_CHAR = INT_TYPE | SIGNED_TYPE | 0x7, ++ STORE_SHORT = INT_TYPE | SIGNED_TYPE | 0x8, ++ STORE_INT = INT_TYPE | SIGNED_TYPE | 0x9, ++ STORE_LONG = INT_TYPE | SIGNED_TYPE | 0xa, ++ LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb, ++ ++ LOAD_U_BYTE = INT_TYPE | UNSIGNED_TYPE | 0x1, ++ LOAD_U_SHORT = INT_TYPE | UNSIGNED_TYPE | 0x2, ++ LOAD_U_INT = INT_TYPE | UNSIGNED_TYPE | 0x3, ++ ++ LOAD_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x1, ++ LOAD_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x2, ++ LOAD_VECTORX = FLOAT_TYPE | SIGNED_TYPE | 0x3, ++ LOAD_VECTORY = FLOAT_TYPE | SIGNED_TYPE | 0x4, ++ STORE_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x5, ++ STORE_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x6, ++ STORE_VECTORX = FLOAT_TYPE | SIGNED_TYPE | 0x7, ++ STORE_VECTORY = FLOAT_TYPE | SIGNED_TYPE | 0x8 ++ } CMLoadStoreDataType; ++ ++ void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) { ++ assert((type & INT_TYPE), "must be General reg type"); ++ loadstore_t(reg, base, index, scale, disp, type); ++ } ++ ++ void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) { ++ assert((type & FLOAT_TYPE), "must be Float reg type"); ++ loadstore_t(reg, base, index, scale, disp, type); ++ } ++ ++private: ++ template ++ void loadstore_t(T reg, int base, int index, int scale, int disp, int type) { ++ if (index != 0) { ++ assert(((scale==0)&&(disp==0)), "only support base+index"); ++ loadstore(reg, as_Register(base), as_Register(index), type); ++ } else { ++ loadstore(reg, as_Register(base), disp, type); ++ } ++ } ++ void loadstore(Register reg, Register base, int disp, int type); ++ void loadstore(Register reg, Register base, Register disp, int type); ++ void loadstore(FloatRegister reg, Register base, int disp, int type); ++ void loadstore(FloatRegister reg, Register base, Register disp, int type); ++}; ++ ++/** ++ * class SkipIfEqual: ++ * ++ * Instantiating this class will result in assembly code being output that will ++ * jump around any code emitted between the creation of the instance and it's ++ * automatic destruction at the end of a scope block, depending on the value of ++ * the flag passed to the constructor, which will be checked at run-time. ++ */ ++class SkipIfEqual { ++ private: ++ MacroAssembler* _masm; ++ Label _label; ++ ++ public: ++ SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); ++ ~SkipIfEqual(); ++}; ++ ++#ifdef ASSERT ++inline bool AbstractAssembler::pd_check_instruction_mark() { return true; } ++#endif ++ ++struct tableswitch { ++ Register _reg; ++ int _insn_index; jint _first_key; jint _last_key; ++ Label _after; ++ Label _branches; ++}; ++ ++#endif // CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.inline.hpp +new file mode 100644 +index 0000000000..0b265a4def +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.inline.hpp +@@ -0,0 +1,34 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2017, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_INLINE_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/codeBuffer.hpp" ++#include "code/codeCache.hpp" ++ ++#endif // CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_INLINE_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/metaspaceShared_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/metaspaceShared_loongarch_64.cpp +new file mode 100644 +index 0000000000..b36216c533 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/metaspaceShared_loongarch_64.cpp +@@ -0,0 +1,120 @@ ++/* ++ * Copyright (c) 2004, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/codeBuffer.hpp" ++#include "memory/metaspaceShared.hpp" ++ ++// Generate the self-patching vtable method: ++// ++// This method will be called (as any other Klass virtual method) with ++// the Klass itself as the first argument. Example: ++// ++// oop obj; ++// int size = obj->klass()->klass_part()->oop_size(this); ++// ++// for which the virtual method call is Klass::oop_size(); ++// ++// The dummy method is called with the Klass object as the first ++// operand, and an object as the second argument. ++// ++ ++//===================================================================== ++ ++// All of the dummy methods in the vtable are essentially identical, ++// differing only by an ordinal constant, and they bear no releationship ++// to the original method which the caller intended. Also, there needs ++// to be 'vtbl_list_size' instances of the vtable in order to ++// differentiate between the 'vtable_list_size' original Klass objects. ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++void MetaspaceShared::generate_vtable_methods(void** vtbl_list, ++ void** vtable, ++ char** md_top, ++ char* md_end, ++ char** mc_top, ++ char* mc_end) { ++ intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*); ++ *(intptr_t *)(*md_top) = vtable_bytes; ++ *md_top += sizeof(intptr_t); ++ void** dummy_vtable = (void**)*md_top; ++ *vtable = dummy_vtable; ++ *md_top += vtable_bytes; ++ ++ // Get ready to generate dummy methods. ++ ++ CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top); ++ MacroAssembler* masm = new MacroAssembler(&cb); ++ Label common_code; ++ for (int i = 0; i < vtbl_list_size; ++i) { ++ for (int j = 0; j < num_virtuals; ++j) { ++ dummy_vtable[num_virtuals * i + j] = (void*)masm->pc(); ++ ++ // Load T5 with a value indicating vtable/offset pair. ++ // -- bits[ 7..0] (8 bits) which virtual method in table? ++ // -- bits[12..8] (5 bits) which virtual method table? ++ // -- must fit in 13-bit instruction immediate field. ++ __ li(T5, (i << 8) + j); ++ __ b(common_code); ++ } ++ } ++ ++ __ bind(common_code); ++ ++ __ srli_d(T4, T5, 8); // isolate vtable identifier. ++ __ shl(T4, LogBytesPerWord); ++ __ li(AT, (long)vtbl_list); ++ __ ldx_d(T4, AT, T4); // get correct vtable address. ++ __ st_d(T4, A0, 0); // update vtable pointer. ++ ++ __ andi(T5, T5, 0x00ff); // isolate vtable method index ++ __ shl(T5, LogBytesPerWord); ++ __ ldx_d(T4, T4, T5); // address of real method pointer. ++ __ jr(T4); // get real method pointer. ++ ++ __ flush(); ++ ++ *mc_top = (char*)__ pc(); ++} +diff --git a/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.cpp +new file mode 100644 +index 0000000000..cb31ca5ad5 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.cpp +@@ -0,0 +1,566 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "prims/methodHandles.hpp" ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#define STOP(error) block_comment(error); __ stop(error) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { ++ if (VerifyMethodHandles) ++ verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), ++ "MH argument is a Class"); ++ __ ld_d(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); ++} ++ ++#ifdef ASSERT ++static int check_nonzero(const char* xname, int x) { ++ assert(x != 0, err_msg("%s should be nonzero", xname)); ++ return x; ++} ++#define NONZERO(x) check_nonzero(#x, x) ++#else //ASSERT ++#define NONZERO(x) (x) ++#endif //ASSERT ++ ++#ifdef ASSERT ++void MethodHandles::verify_klass(MacroAssembler* _masm, ++ Register obj, SystemDictionary::WKID klass_id, ++ const char* error_message) { ++} ++ ++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { ++ Label L; ++ BLOCK_COMMENT("verify_ref_kind {"); ++ __ ld_w(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes()))); ++ __ srai_w(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT); ++ __ li(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK); ++ __ andr(temp, temp, AT); ++ __ li(AT, ref_kind); ++ __ beq(temp, AT, L); ++ { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal); ++ jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind); ++ if (ref_kind == JVM_REF_invokeVirtual || ++ ref_kind == JVM_REF_invokeSpecial) ++ // could do this for all ref_kinds, but would explode assembly code size ++ trace_method_handle(_masm, buf); ++ __ STOP(buf); ++ } ++ BLOCK_COMMENT("} verify_ref_kind"); ++ __ bind(L); ++} ++ ++#endif //ASSERT ++ ++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry) { ++ assert(method == Rmethod, "interpreter calling convention"); ++ ++ Label L_no_such_method; ++ __ beq(method, R0, L_no_such_method); ++ ++ __ verify_method_ptr(method); ++ ++ if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++ Register rthread = TREG; ++ // interp_only is an int, on little endian it is sufficient to test the byte only ++ // Is a cmpl faster? ++ __ ld_bu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset())); ++ __ beq(AT, R0, run_compiled_code); ++ __ ld_d(T4, method, in_bytes(Method::interpreter_entry_offset())); ++ __ jr(T4); ++ __ BIND(run_compiled_code); ++ } ++ ++ const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : ++ Method::from_interpreted_offset(); ++ __ ld_d(T4, method, in_bytes(entry_offset)); ++ __ jr(T4); ++ ++ __ bind(L_no_such_method); ++ address wrong_method = StubRoutines::throw_AbstractMethodError_entry(); ++ __ jmp(wrong_method, relocInfo::runtime_call_type); ++} ++ ++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry) { ++ BLOCK_COMMENT("jump_to_lambda_form {"); ++ // This is the initial entry point of a lazy method handle. ++ // After type checking, it picks up the invoker from the LambdaForm. ++ assert_different_registers(recv, method_temp, temp2); ++ assert(recv != noreg, "required register"); ++ assert(method_temp == Rmethod, "required register for loading method"); ++ ++ //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); }); ++ ++ // Load the invoker, as MH -> MH.form -> LF.vmentry ++ __ verify_oop(recv); ++ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes()))); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes()))); ++ __ verify_oop(method_temp); ++ // the following assumes that a Method* is normally compressed in the vmtarget field: ++ __ ld_d(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()))); ++ ++ if (VerifyMethodHandles && !for_compiler_entry) { ++ // make sure recv is already on stack ++ __ ld_d(temp2, Address(method_temp, Method::const_offset())); ++ __ load_sized_value(temp2, ++ Address(temp2, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), false); ++ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); ++ Label L; ++ Address recv_addr = __ argument_address(temp2, -1); ++ __ ld_d(AT, recv_addr); ++ __ beq(recv, AT, L); ++ ++ recv_addr = __ argument_address(temp2, -1); ++ __ ld_d(V0, recv_addr); ++ __ STOP("receiver not on stack"); ++ __ BIND(L); ++ } ++ ++ jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); ++ BLOCK_COMMENT("} jump_to_lambda_form"); ++} ++ ++ ++// Code generation ++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, ++ vmIntrinsics::ID iid) { ++ const bool not_for_compiler_entry = false; // this is the interpreter entry ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ if (iid == vmIntrinsics::_invokeGeneric || ++ iid == vmIntrinsics::_compiledLambdaForm) { ++ // Perhaps surprisingly, the symbolic references visible to Java are not directly used. ++ // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. ++ // They all allow an appendix argument. ++ __ stop("empty stubs make SG sick"); ++ return NULL; ++ } ++ ++ // Rmethod: Method* ++ // T4: argument locator (parameter slot count, added to sp) ++ // S7: used as temp to hold mh or receiver ++ Register t4_argp = T4; // argument list ptr, live on error paths ++ Register s7_mh = S7; // MH receiver; dies quickly and is recycled ++ Register rm_method = Rmethod; // eventual target of this invocation ++ ++ // here's where control starts out: ++ __ align(CodeEntryAlignment); ++ address entry_point = __ pc(); ++ ++ if (VerifyMethodHandles) { ++ Label L; ++ BLOCK_COMMENT("verify_intrinsic_id {"); ++ __ ld_bu(AT, rm_method, Method::intrinsic_id_offset_in_bytes()); ++ guarantee(Assembler::is_simm(iid, 12), "Oops, iid is not simm16! Change the instructions."); ++ __ addi_d(AT, AT, -1 * (int) iid); ++ __ beq(AT, R0, L); ++ if (iid == vmIntrinsics::_linkToVirtual || ++ iid == vmIntrinsics::_linkToSpecial) { ++ // could do this for all kinds, but would explode assembly code size ++ trace_method_handle(_masm, "bad Method*::intrinsic_id"); ++ } ++ __ STOP("bad Method*::intrinsic_id"); ++ __ bind(L); ++ BLOCK_COMMENT("} verify_intrinsic_id"); ++ } ++ ++ // First task: Find out how big the argument list is. ++ Address t4_first_arg_addr; ++ int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); ++ assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); ++ if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ __ ld_d(t4_argp, Address(rm_method, Method::const_offset())); ++ __ load_sized_value(t4_argp, ++ Address(t4_argp, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), false); ++ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); ++ t4_first_arg_addr = __ argument_address(t4_argp, -1); ++ } else { ++ DEBUG_ONLY(t4_argp = noreg); ++ } ++ ++ if (!is_signature_polymorphic_static(iid)) { ++ __ ld_d(s7_mh, t4_first_arg_addr); ++ DEBUG_ONLY(t4_argp = noreg); ++ } ++ ++ // t4_first_arg_addr is live! ++ ++ trace_method_handle_interpreter_entry(_masm, iid); ++ ++ if (iid == vmIntrinsics::_invokeBasic) { ++ generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry); ++ ++ } else { ++ // Adjust argument list by popping the trailing MemberName argument. ++ Register r_recv = noreg; ++ if (MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. ++ __ ld_d(r_recv = T2, t4_first_arg_addr); ++ } ++ DEBUG_ONLY(t4_argp = noreg); ++ Register rm_member = rm_method; // MemberName ptr; incoming method ptr is dead now ++ __ pop(rm_member); // extract last argument ++ generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry); ++ } ++ ++ return entry_point; ++} ++ ++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, ++ vmIntrinsics::ID iid, ++ Register receiver_reg, ++ Register member_reg, ++ bool for_compiler_entry) { ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ Register rm_method = Rmethod; // eventual target of this invocation ++ // temps used in this code are not used in *either* compiled or interpreted calling sequences ++ Register j_rarg0 = T0; ++ Register j_rarg1 = A0; ++ Register j_rarg2 = A1; ++ Register j_rarg3 = A2; ++ Register j_rarg4 = A3; ++ Register j_rarg5 = A4; ++ ++ Register temp1 = T8; ++ Register temp2 = T4; ++ Register temp3 = T5; ++ if (for_compiler_entry) { ++ assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); ++ assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ } ++ else { ++ assert_different_registers(temp1, temp2, temp3, saved_last_sp_register()); // don't trash lastSP ++ } ++ assert_different_registers(temp1, temp2, temp3, receiver_reg); ++ assert_different_registers(temp1, temp2, temp3, member_reg); ++ ++ if (iid == vmIntrinsics::_invokeBasic) { ++ // indirect through MH.form.vmentry.vmtarget ++ jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry); ++ ++ } else { ++ // The method is a member invoker used by direct method handles. ++ if (VerifyMethodHandles) { ++ // make sure the trailing argument really is a MemberName (caller responsibility) ++ verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), ++ "MemberName required for invokeVirtual etc."); ++ } ++ ++ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); ++ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); ++ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())); ++ ++ Register temp1_recv_klass = temp1; ++ if (iid != vmIntrinsics::_linkToStatic) { ++ __ verify_oop(receiver_reg); ++ if (iid == vmIntrinsics::_linkToSpecial) { ++ // Don't actually load the klass; just null-check the receiver. ++ __ null_check(receiver_reg); ++ } else { ++ // load receiver klass itself ++ __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ BLOCK_COMMENT("check_receiver {"); ++ // The receiver for the MemberName must be in receiver_reg. ++ // Check the receiver against the MemberName.clazz ++ if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { ++ // Did not load it above... ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { ++ Label L_ok; ++ Register temp2_defc = temp2; ++ __ load_heap_oop(temp2_defc, member_clazz); ++ load_klass_from_Class(_masm, temp2_defc); ++ __ verify_klass_ptr(temp2_defc); ++ __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); ++ // If we get here, the type check failed! ++ __ STOP("receiver class disagrees with MemberName.clazz"); ++ __ bind(L_ok); ++ } ++ BLOCK_COMMENT("} check_receiver"); ++ } ++ if (iid == vmIntrinsics::_linkToSpecial || ++ iid == vmIntrinsics::_linkToStatic) { ++ DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass ++ } ++ ++ // Live registers at this point: ++ // member_reg - MemberName that was the trailing argument ++ // temp1_recv_klass - klass of stacked receiver, if needed ++ ++ Label L_incompatible_class_change_error; ++ switch (iid) { ++ case vmIntrinsics::_linkToSpecial: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); ++ } ++ __ ld_d(rm_method, member_vmtarget); ++ break; ++ ++ case vmIntrinsics::_linkToStatic: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); ++ } ++ __ ld_d(rm_method, member_vmtarget); ++ break; ++ ++ case vmIntrinsics::_linkToVirtual: ++ { ++ // same as TemplateTable::invokevirtual, ++ // minus the CP setup and profiling: ++ ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); ++ } ++ ++ // pick out the vtable index from the MemberName, and then we can discard it: ++ Register temp2_index = temp2; ++ __ ld_d(temp2_index, member_vmindex); ++ ++ if (VerifyMethodHandles) { ++ Label L_index_ok; ++ __ blt(R0, temp2_index, L_index_ok); ++ __ STOP("no virtual index"); ++ __ BIND(L_index_ok); ++ } ++ ++ // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget ++ // at this point. And VerifyMethodHandles has already checked clazz, if needed. ++ ++ // get target Method* & entry point ++ __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method); ++ break; ++ } ++ ++ case vmIntrinsics::_linkToInterface: ++ { ++ // same as TemplateTable::invokeinterface ++ // (minus the CP setup and profiling, with different argument motion) ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); ++ } ++ ++ Register temp3_intf = temp3; ++ __ load_heap_oop(temp3_intf, member_clazz); ++ load_klass_from_Class(_masm, temp3_intf); ++ __ verify_klass_ptr(temp3_intf); ++ ++ Register rm_index = rm_method; ++ __ ld_d(rm_index, member_vmindex); ++ if (VerifyMethodHandles) { ++ Label L; ++ __ bge(rm_index, R0, L); ++ __ STOP("invalid vtable index for MH.invokeInterface"); ++ __ bind(L); ++ } ++ ++ // given intf, index, and recv klass, dispatch to the implementation method ++ __ lookup_interface_method(temp1_recv_klass, temp3_intf, ++ // note: next two args must be the same: ++ rm_index, rm_method, ++ temp2, ++ L_incompatible_class_change_error); ++ break; ++ } ++ ++ default: ++ fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid))); ++ break; ++ } ++ ++ // Live at this point: ++ // rm_method ++ ++ // After figuring out which concrete method to call, jump into it. ++ // Note that this works in the interpreter with no data motion. ++ // But the compiled version will require that r_recv be shifted out. ++ __ verify_method_ptr(rm_method); ++ jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry); ++ ++ if (iid == vmIntrinsics::_linkToInterface) { ++ __ bind(L_incompatible_class_change_error); ++ address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry(); ++ __ jmp(icce_entry, relocInfo::runtime_call_type); ++ } ++ } ++} ++ ++#ifndef PRODUCT ++void trace_method_handle_stub(const char* adaptername, ++ oop mh, ++ intptr_t* saved_regs, ++ intptr_t* entry_sp) { ++ // called as a leaf from native code: do not block the JVM! ++ bool has_mh = (strstr(adaptername, "/static") == NULL && ++ strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH ++ const char* mh_reg_name = has_mh ? "s7_mh" : "s7"; ++ tty->print_cr("MH %s %s="PTR_FORMAT" sp="PTR_FORMAT, ++ adaptername, mh_reg_name, ++ p2i(mh), p2i(entry_sp)); ++ ++ if (Verbose) { ++ tty->print_cr("Registers:"); ++ const int saved_regs_count = RegisterImpl::number_of_registers; ++ for (int i = 0; i < saved_regs_count; i++) { ++ Register r = as_Register(i); ++ // The registers are stored in reverse order on the stack (by pusha). ++ tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]); ++ if ((i + 1) % 4 == 0) { ++ tty->cr(); ++ } else { ++ tty->print(", "); ++ } ++ } ++ tty->cr(); ++ ++ { ++ // dumping last frame with frame::describe ++ ++ JavaThread* p = JavaThread::active(); ++ ++ ResourceMark rm; ++ PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here ++ FrameValues values; ++ ++ // Note: We want to allow trace_method_handle from any call site. ++ // While trace_method_handle creates a frame, it may be entered ++ // without a PC on the stack top (e.g. not just after a call). ++ // Walking that frame could lead to failures due to that invalid PC. ++ // => carefully detect that frame when doing the stack walking ++ ++ // Current C frame ++ frame cur_frame = os::current_frame(); ++ ++ // Robust search of trace_calling_frame (independant of inlining). ++ // Assumes saved_regs comes from a pusha in the trace_calling_frame. ++ assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?"); ++ frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame); ++ while (trace_calling_frame.fp() < saved_regs) { ++ trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame); ++ } ++ ++ // safely create a frame and call frame::describe ++ intptr_t *dump_sp = trace_calling_frame.sender_sp(); ++ intptr_t *dump_fp = trace_calling_frame.link(); ++ ++ bool walkable = has_mh; // whether the traced frame shoud be walkable ++ ++ if (walkable) { ++ // The previous definition of walkable may have to be refined ++ // if new call sites cause the next frame constructor to start ++ // failing. Alternatively, frame constructors could be ++ // modified to support the current or future non walkable ++ // frames (but this is more intrusive and is not considered as ++ // part of this RFE, which will instead use a simpler output). ++ frame dump_frame = frame(dump_sp, dump_fp); ++ dump_frame.describe(values, 1); ++ } else { ++ // Stack may not be walkable (invalid PC above FP): ++ // Add descriptions without building a Java frame to avoid issues ++ values.describe(-1, dump_fp, "fp for #1 "); ++ values.describe(-1, dump_sp, "sp for #1"); ++ } ++ values.describe(-1, entry_sp, "raw top of stack"); ++ ++ tty->print_cr("Stack layout:"); ++ values.print(p); ++ } ++ if (has_mh && mh->is_oop()) { ++ mh->print(); ++ if (java_lang_invoke_MethodHandle::is_instance(mh)) { ++ if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0) ++ java_lang_invoke_MethodHandle::form(mh)->print(); ++ } ++ } ++ } ++} ++ ++// The stub wraps the arguments in a struct on the stack to avoid ++// dealing with the different calling conventions for passing 6 ++// arguments. ++struct MethodHandleStubArguments { ++ const char* adaptername; ++ oopDesc* mh; ++ intptr_t* saved_regs; ++ intptr_t* entry_sp; ++}; ++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { ++ trace_method_handle_stub(args->adaptername, ++ args->mh, ++ args->saved_regs, ++ args->entry_sp); ++} ++ ++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { ++} ++#endif //PRODUCT +diff --git a/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.hpp +new file mode 100644 +index 0000000000..f84337424b +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.hpp +@@ -0,0 +1,62 @@ ++/* ++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// Platform-specific definitions for method handles. ++// These definitions are inlined into class MethodHandles. ++ ++// Adapters ++enum /* platform_dependent_constants */ { ++ adapter_code_size = 32000 DEBUG_ONLY(+ 150000) ++}; ++ ++// Additional helper methods for MethodHandles code generation: ++public: ++ static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); ++ ++ static void verify_klass(MacroAssembler* _masm, ++ Register obj, SystemDictionary::WKID klass_id, ++ const char* error_message = "wrong klass") NOT_DEBUG_RETURN; ++ ++ static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { ++ verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), ++ "reference is a MH"); ++ } ++ ++ static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; ++ ++ // Similar to InterpreterMacroAssembler::jump_from_interpreted. ++ // Takes care of special dispatch from single stepping too. ++ static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry); ++ ++ static void jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry); ++ ++ static Register saved_last_sp_register() { ++ // Should be in sharedRuntime, not here. ++ return R3; ++ } +diff --git a/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.cpp +new file mode 100644 +index 0000000000..639ac6cd3e +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.cpp +@@ -0,0 +1,485 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "compiler/disassembler.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/ostream.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_Runtime1.hpp" ++#endif ++ ++#ifndef PRODUCT ++#include "compiler/disassembler.hpp" ++#endif ++ ++#include ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++void NativeInstruction::wrote(int offset) { ++ ICache::invalidate_word(addr_at(offset)); ++} ++ ++void NativeInstruction::set_long_at(int offset, long i) { ++ address addr = addr_at(offset); ++ *(long*)addr = i; ++ ICache::invalidate_range(addr, 8); ++} ++ ++bool NativeInstruction::is_int_branch() { ++ int op = Assembler::high(insn_word(), 6); ++ return op == Assembler::beqz_op || op == Assembler::bnez_op || ++ op == Assembler::beq_op || op == Assembler::bne_op || ++ op == Assembler::blt_op || op == Assembler::bge_op || ++ op == Assembler::bltu_op || op == Assembler::bgeu_op; ++} ++ ++bool NativeInstruction::is_float_branch() { ++ return Assembler::high(insn_word(), 6) == Assembler::bccondz_op; ++} ++ ++bool NativeCall::is_bl() const { ++ return Assembler::high(int_at(0), 6) == Assembler::bl_op; ++} ++ ++void NativeCall::verify() { ++ assert(is_bl(), "not a NativeCall"); ++} ++ ++address NativeCall::target_addr_for_bl(address orig_addr) const { ++ address addr = orig_addr ? orig_addr : addr_at(0); ++ ++ // bl ++ if (is_bl()) { ++ return addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) | ++ ((int_at(0) >> 10) & 0xffff)) << 2); ++ } ++ ++ fatal("not a NativeCall"); ++ return NULL; ++} ++ ++address NativeCall::destination() const { ++ address addr = (address)this; ++ address destination = target_addr_for_bl(); ++ // Do we use a trampoline stub for this call? ++ // Trampoline stubs are located behind the main code. ++ if (destination > addr) { ++ // Filter out recursive method invocation (call to verified/unverified entry point). ++ CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. ++ assert(cb && cb->is_nmethod(), "sanity"); ++ nmethod *nm = (nmethod *)cb; ++ NativeInstruction* ni = nativeInstruction_at(destination); ++ if (nm->stub_contains(destination) && ni->is_NativeCallTrampolineStub_at()) { ++ // Yes we do, so get the destination from the trampoline stub. ++ const address trampoline_stub_addr = destination; ++ destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); ++ } ++ } ++ return destination; ++} ++ ++// Similar to replace_mt_safe, but just changes the destination. The ++// important thing is that free-running threads are able to execute this ++// call instruction at all times. ++// ++// Used in the runtime linkage of calls; see class CompiledIC. ++// ++// Add parameter assert_lock to switch off assertion ++// during code generation, where no patching lock is needed. ++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { ++ assert(!assert_lock || ++ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), ++ "concurrent code patching"); ++ ++ ResourceMark rm; ++ address addr_call = addr_at(0); ++ bool reachable = MacroAssembler::reachable_from_branch_short(dest - addr_call); ++ assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); ++ ++ // Patch the call. ++ if (!reachable) { ++ address trampoline_stub_addr = get_trampoline(); ++ assert (trampoline_stub_addr != NULL, "we need a trampoline"); ++ guarantee(Assembler::is_simm((trampoline_stub_addr - addr_call) >> 2, 26), "cannot reach trampoline stub"); ++ ++ // Patch the constant in the call's trampoline stub. ++ NativeInstruction* ni = nativeInstruction_at(dest); ++ assert (! ni->is_NativeCallTrampolineStub_at(), "chained trampolines"); ++ nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); ++ dest = trampoline_stub_addr; ++ } ++ set_destination(dest); ++} ++ ++address NativeCall::get_trampoline() { ++ address call_addr = addr_at(0); ++ ++ CodeBlob *code = CodeCache::find_blob(call_addr); ++ assert(code != NULL, "Could not find the containing code blob"); ++ ++ address bl_destination ++ = nativeCall_at(call_addr)->target_addr_for_bl(); ++ NativeInstruction* ni = nativeInstruction_at(bl_destination); ++ if (code->contains(bl_destination) && ++ ni->is_NativeCallTrampolineStub_at()) ++ return bl_destination; ++ ++ // If the codeBlob is not a nmethod, this is because we get here from the ++ // CodeBlob constructor, which is called within the nmethod constructor. ++ return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); ++} ++ ++void NativeCall::set_destination(address dest) { ++ address addr_call = addr_at(0); ++ CodeBuffer cb(addr_call, instruction_size); ++ MacroAssembler masm(&cb); ++ assert(is_call_at(addr_call), "unexpected call type"); ++ jlong offs = dest - addr_call; ++ masm.bl(offs >> 2); ++ ICache::invalidate_range(addr_call, instruction_size); ++} ++ ++void NativeCall::print() { ++ tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT, ++ p2i(instruction_address()), p2i(destination())); ++} ++ ++// Inserts a native call instruction at a given pc ++void NativeCall::insert(address code_pos, address entry) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++// MT-safe patching of a call instruction. ++// First patches first word of instruction to two jmp's that jmps to them ++// selfs (spinlock). Then patches the last byte, and then atomicly replaces ++// the jmp's with the first 4 byte of the new instruction. ++void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) { ++ Unimplemented(); ++} ++ ++bool NativeFarCall::is_short() const { ++ return Assembler::high(int_at(0), 10) == Assembler::andi_op && ++ Assembler::low(int_at(0), 22) == 0 && ++ Assembler::high(int_at(4), 6) == Assembler::bl_op; ++} ++ ++bool NativeFarCall::is_far() const { ++ return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op && ++ Assembler::high(int_at(4), 6) == Assembler::jirl_op && ++ Assembler::low(int_at(4), 5) == RA->encoding(); ++} ++ ++address NativeFarCall::destination(address orig_addr) const { ++ address addr = orig_addr ? orig_addr : addr_at(0); ++ ++ if (is_short()) { ++ // short ++ return addr + BytesPerInstWord + ++ (Assembler::simm26(((int_at(4) & 0x3ff) << 16) | ++ ((int_at(4) >> 10) & 0xffff)) << 2); ++ } ++ ++ if (is_far()) { ++ // far ++ return addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) + ++ (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2); ++ } ++ ++ fatal("not a NativeFarCall"); ++ return NULL; ++} ++ ++void NativeFarCall::set_destination(address dest) { ++ address addr_call = addr_at(0); ++ CodeBuffer cb(addr_call, instruction_size); ++ MacroAssembler masm(&cb); ++ assert(is_far_call_at(addr_call), "unexpected call type"); ++ masm.patchable_call(dest, addr_call); ++ ICache::invalidate_range(addr_call, instruction_size); ++} ++ ++void NativeFarCall::verify() { ++ assert(is_short() || is_far(), "not a NativeFarcall"); ++} ++ ++//------------------------------------------------------------------- ++ ++bool NativeMovConstReg::is_lu12iw_ori_lu32id() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 10) == Assembler::ori_op && ++ Assembler::high(int_at(8), 7) == Assembler::lu32i_d_op; ++} ++ ++bool NativeMovConstReg::is_lu12iw_lu32id_nop() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 7) == Assembler::lu32i_d_op && ++ Assembler::high(int_at(8), 10) == Assembler::andi_op; ++} ++ ++bool NativeMovConstReg::is_lu12iw_2nop() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 10) == Assembler::andi_op && ++ Assembler::high(int_at(8), 10) == Assembler::andi_op; ++} ++ ++bool NativeMovConstReg::is_lu12iw_ori_nop() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 10) == Assembler::ori_op && ++ Assembler::high(int_at(8), 10) == Assembler::andi_op; ++} ++ ++bool NativeMovConstReg::is_addid_2nop() const { ++ return Assembler::high(int_at(0), 10) == Assembler::addi_d_op && ++ Assembler::high(int_at(4), 10) == Assembler::andi_op && ++ Assembler::high(int_at(8), 10) == Assembler::andi_op; ++} ++ ++void NativeMovConstReg::verify() { ++ assert(is_li52(), "not a mov reg, imm52"); ++} ++ ++void NativeMovConstReg::print() { ++ tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, ++ p2i(instruction_address()), data()); ++} ++ ++intptr_t NativeMovConstReg::data() const { ++ if (is_lu12iw_ori_lu32id()) { ++ return Assembler::merge((intptr_t)((int_at(4) >> 10) & 0xfff), ++ (intptr_t)((int_at(0) >> 5) & 0xfffff), ++ (intptr_t)((int_at(8) >> 5) & 0xfffff)); ++ } ++ ++ if (is_lu12iw_lu32id_nop()) { ++ return Assembler::merge((intptr_t)0, ++ (intptr_t)((int_at(0) >> 5) & 0xfffff), ++ (intptr_t)((int_at(4) >> 5) & 0xfffff)); ++ } ++ ++ if (is_lu12iw_2nop()) { ++ return Assembler::merge((intptr_t)0, ++ (intptr_t)((int_at(0) >> 5) & 0xfffff)); ++ } ++ ++ if (is_lu12iw_ori_nop()) { ++ return Assembler::merge((intptr_t)((int_at(4) >> 10) & 0xfff), ++ (intptr_t)((int_at(0) >> 5) & 0xfffff)); ++ } ++ ++ if (is_addid_2nop()) { ++ return Assembler::simm12((int_at(0) >> 10) & 0xfff); ++ } ++ ++#ifndef PRODUCT ++ Disassembler::decode(addr_at(0), addr_at(0) + 16, tty); ++#endif ++ fatal("not a mov reg, imm52"); ++ return 0; // unreachable ++} ++ ++void NativeMovConstReg::set_data(intptr_t x, intptr_t o) { ++ CodeBuffer cb(addr_at(0), instruction_size); ++ MacroAssembler masm(&cb); ++ masm.patchable_li52(as_Register(int_at(0) & 0x1f), x); ++ ICache::invalidate_range(addr_at(0), instruction_size); ++ ++ // Find and replace the oop/metadata corresponding to this ++ // instruction in oops section. ++ CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address()); ++ nmethod* nm = blob->as_nmethod_or_null(); ++ if (nm != NULL) { ++ o = o ? o : x; ++ RelocIterator iter(nm, instruction_address(), next_instruction_address()); ++ while (iter.next()) { ++ if (iter.type() == relocInfo::oop_type) { ++ oop* oop_addr = iter.oop_reloc()->oop_addr(); ++ *oop_addr = cast_to_oop(o); ++ break; ++ } else if (iter.type() == relocInfo::metadata_type) { ++ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); ++ *metadata_addr = (Metadata*)o; ++ break; ++ } ++ } ++ } ++} ++ ++//------------------------------------------------------------------- ++ ++int NativeMovRegMem::offset() const{ ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++ return 0; // mute compiler ++} ++ ++void NativeMovRegMem::set_offset(int x) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++void NativeMovRegMem::verify() { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++ ++void NativeMovRegMem::print() { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++bool NativeInstruction::is_sigill_zombie_not_entrant() { ++ return uint_at(0) == NativeIllegalInstruction::instruction_code; ++} ++ ++void NativeIllegalInstruction::insert(address code_pos) { ++ *(juint*)code_pos = instruction_code; ++ ICache::invalidate_range(code_pos, instruction_size); ++} ++ ++void NativeJump::verify() { ++ assert(is_short() || is_far(), "not a general jump instruction"); ++} ++ ++bool NativeJump::is_short() { ++ return Assembler::high(insn_word(), 6) == Assembler::b_op; ++} ++ ++bool NativeJump::is_far() { ++ return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op && ++ Assembler::high(int_at(4), 6) == Assembler::jirl_op && ++ Assembler::low(int_at(4), 5) == R0->encoding(); ++} ++ ++address NativeJump::jump_destination(address orig_addr) { ++ address addr = orig_addr ? orig_addr : addr_at(0); ++ ++ // short ++ if (is_short()) { ++ return addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) | ++ ((int_at(0) >> 10) & 0xffff)) << 2); ++ } ++ ++ // far ++ if (is_far()) { ++ return addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) + ++ (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2); ++ } ++ ++ fatal("not a jump"); ++ return NULL; ++} ++ ++void NativeJump::set_jump_destination(address dest) { ++ OrderAccess::fence(); ++ ++ CodeBuffer cb(addr_at(0), instruction_size); ++ MacroAssembler masm(&cb); ++ masm.patchable_jump(dest); ++ ICache::invalidate_range(addr_at(0), instruction_size); ++} ++ ++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++// MT-safe patching of a long jump instruction. ++// First patches first word of instruction to two jmp's that jmps to them ++// selfs (spinlock). Then patches the last byte, and then atomicly replaces ++// the jmp's with the first 4 byte of the new instruction. ++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { ++ //TODO: LA ++ guarantee(0, "LA not implemented yet"); ++} ++ ++// Must ensure atomicity ++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { ++ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); ++ jlong offs = dest - verified_entry; ++ ++ if (MacroAssembler::reachable_from_branch_short(offs)) { ++ CodeBuffer cb(verified_entry, 1 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.b(dest); ++ } else { ++ // We use an illegal instruction for marking a method as ++ // not_entrant or zombie ++ NativeIllegalInstruction::insert(verified_entry); ++ } ++ ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord); ++} ++ ++bool NativeInstruction::is_dtrace_trap() { ++ //return (*(int32_t*)this & 0xff) == 0xcc; ++ Unimplemented(); ++ return false; ++} ++ ++bool NativeInstruction::is_safepoint_poll() { ++ // ++ // 390 li T2, 0x0000000000400000 #@loadConP ++ // 394 st_w [SP + #12], V1 # spill 9 ++ // 398 Safepoint @ [T2] : poll for GC @ safePoint_poll # spec.benchmarks.compress.Decompressor::decompress @ bci:224 L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1 ++ // ++ // 0x000000ffe5815130: lu12i_w t2, 0x400 ++ // 0x000000ffe5815134: st_w v1, 0xc(sp) ; OopMap{a6=Oop off=920} ++ // ;*goto ++ // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) ++ // ++ // 0x000000ffe5815138: ld_w at, 0x0(t2) ;*goto <--- PC ++ // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) ++ // ++ ++ // Since there may be some spill instructions between the safePoint_poll and loadConP, ++ // we check the safepoint instruction like this. ++ return Assembler::high(insn_word(), 10) == Assembler::ld_w_op && ++ Assembler::low(insn_word(), 5) == AT->encoding(); ++} +diff --git a/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.hpp +new file mode 100644 +index 0000000000..493239923b +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.hpp +@@ -0,0 +1,513 @@ ++/* ++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_NATIVEINST_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_NATIVEINST_LOONGARCH_HPP ++ ++#include "asm/assembler.hpp" ++#include "memory/allocation.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/os.hpp" ++#include "utilities/top.hpp" ++ ++// We have interfaces for the following instructions: ++// - NativeInstruction ++// - - NativeCall ++// - - NativeMovConstReg ++// - - NativeMovConstRegPatching ++// - - NativeMovRegMem ++// - - NativeMovRegMemPatching ++// - - NativeIllegalOpCode ++// - - NativeGeneralJump ++// - - NativePushConst ++// - - NativeTstRegMem ++ ++// The base class for different kinds of native instruction abstractions. ++// Provides the primitive operations to manipulate code relative to this. ++ ++class NativeInstruction VALUE_OBJ_CLASS_SPEC { ++ friend class Relocation; ++ ++ public: ++ enum loongarch_specific_constants { ++ nop_instruction_code = 0, ++ nop_instruction_size = 4, ++ sync_instruction_code = 0xf ++ }; ++ ++ bool is_nop() { guarantee(0, "LA not implemented yet"); return long_at(0) == nop_instruction_code; } ++ bool is_sync() { return Assembler::high(insn_word(), 17) == Assembler::dbar_op; } ++ bool is_dtrace_trap(); ++ inline bool is_call(); ++ inline bool is_far_call(); ++ inline bool is_illegal(); ++ bool is_jump(); ++ bool is_safepoint_poll(); ++ ++ // LoongArch has no instruction to generate a illegal instrucion exception? ++ // But `break 11` is not illegal instruction for LoongArch. ++ static int illegal_instruction(); ++ ++ bool is_int_branch(); ++ bool is_float_branch(); ++ ++ inline bool is_NativeCallTrampolineStub_at(); ++ //We use an illegal instruction for marking a method as not_entrant or zombie. ++ bool is_sigill_zombie_not_entrant(); ++ ++ protected: ++ address addr_at(int offset) const { return address(this) + offset; } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(BytesPerInstWord); } ++ address prev_instruction_address() const { return addr_at(-BytesPerInstWord); } ++ ++ s_char sbyte_at(int offset) const { return *(s_char*) addr_at(offset); } ++ u_char ubyte_at(int offset) const { return *(u_char*) addr_at(offset); } ++ ++ jint int_at(int offset) const { return *(jint*) addr_at(offset); } ++ juint uint_at(int offset) const { return *(juint*) addr_at(offset); } ++ ++ intptr_t ptr_at(int offset) const { return *(intptr_t*) addr_at(offset); } ++ ++ oop oop_at (int offset) const { return *(oop*) addr_at(offset); } ++ int long_at(int offset) const { return *(jint*)addr_at(offset); } ++ ++ ++ void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; wrote(offset); } ++ void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; wrote(offset); } ++ void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; wrote(offset); } ++ void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; wrote(offset); } ++ void set_long_at(int offset, long i); ++ ++ int insn_word() const { return long_at(0); } ++ ++ void wrote(int offset); ++ ++ public: ++ ++ // unit test stuff ++ static void test() {} // override for testing ++ ++ inline friend NativeInstruction* nativeInstruction_at(address address); ++}; ++ ++inline NativeInstruction* nativeInstruction_at(address address) { ++ NativeInstruction* inst = (NativeInstruction*)address; ++#ifdef ASSERT ++ //inst->verify(); ++#endif ++ return inst; ++} ++ ++inline NativeCall* nativeCall_at(address address); ++ ++// The NativeCall is an abstraction for accessing/manipulating native call ++// instructions (used to manipulate inline caches, primitive & dll calls, etc.). ++class NativeCall: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 1 * BytesPerInstWord, ++ return_address_offset = 1 * BytesPerInstWord, ++ displacement_offset = 0 ++ }; ++ ++ // We have only bl. ++ bool is_bl() const; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ ++ address next_instruction_address() const { ++ return addr_at(return_address_offset); ++ } ++ ++ address return_address() const { ++ return next_instruction_address(); ++ } ++ ++ address target_addr_for_bl(address orig_addr = 0) const; ++ address destination() const; ++ void set_destination(address dest); ++ ++ void verify_alignment() {} ++ void verify(); ++ void print(); ++ ++ // Creation ++ inline friend NativeCall* nativeCall_at(address address); ++ inline friend NativeCall* nativeCall_before(address return_address); ++ ++ static bool is_call_at(address instr) { ++ return nativeInstruction_at(instr)->is_call(); ++ } ++ ++ static bool is_call_before(address return_address) { ++ return is_call_at(return_address - return_address_offset); ++ } ++ ++ // MT-safe patching of a call instruction. ++ static void insert(address code_pos, address entry); ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++ ++ // Similar to replace_mt_safe, but just changes the destination. The ++ // important thing is that free-running threads are able to execute ++ // this call instruction at all times. If the call is an immediate bl ++ // instruction we can simply rely on atomicity of 32-bit writes to ++ // make sure other threads will see no intermediate states. ++ ++ // We cannot rely on locks here, since the free-running threads must run at ++ // full speed. ++ // ++ // Used in the runtime linkage of calls; see class CompiledIC. ++ ++ // The parameter assert_lock disables the assertion during code generation. ++ void set_destination_mt_safe(address dest, bool assert_lock = true); ++ ++ address get_trampoline(); ++ ++}; ++ ++inline NativeCall* nativeCall_at(address address) { ++ NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset); ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++inline NativeCall* nativeCall_before(address return_address) { ++ NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset); ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++// The NativeFarCall is an abstraction for accessing/manipulating native ++// call-anywhere instructions. ++// Used to call native methods which may be loaded anywhere in the address ++// space, possibly out of reach of a call instruction. ++class NativeFarCall: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_size = 2 * BytesPerInstWord, ++ }; ++ ++ // We use MacroAssembler::patchable_call() for implementing a ++ // call-anywhere instruction. ++ bool is_short() const; ++ bool is_far() const; ++ ++ // Checks whether instr points at a NativeFarCall instruction. ++ static bool is_far_call_at(address address) { ++ return nativeInstruction_at(address)->is_far_call(); ++ } ++ ++ // Returns the NativeFarCall's destination. ++ address destination(address orig_addr = 0) const; ++ ++ // Sets the NativeFarCall's destination, not necessarily mt-safe. ++ // Used when relocating code. ++ void set_destination(address dest); ++ ++ void verify(); ++}; ++ ++// Instantiates a NativeFarCall object starting at the given instruction ++// address and returns the NativeFarCall object. ++inline NativeFarCall* nativeFarCall_at(address address) { ++ NativeFarCall* call = (NativeFarCall*)address; ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++// An interface for accessing/manipulating native set_oop imm, reg instructions ++// (used to manipulate inlined data references, etc.). ++class NativeMovConstReg: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 3 * BytesPerInstWord, ++ next_instruction_offset = 3 * BytesPerInstWord, ++ }; ++ ++ int insn_word() const { return long_at(instruction_offset); } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(next_instruction_offset); } ++ intptr_t data() const; ++ void set_data(intptr_t x, intptr_t o = 0); ++ ++ bool is_li52() const { ++ return is_lu12iw_ori_lu32id() || ++ is_lu12iw_lu32id_nop() || ++ is_lu12iw_2nop() || ++ is_lu12iw_ori_nop() || ++ is_addid_2nop(); ++ } ++ bool is_lu12iw_ori_lu32id() const; ++ bool is_lu12iw_lu32id_nop() const; ++ bool is_lu12iw_2nop() const; ++ bool is_lu12iw_ori_nop() const; ++ bool is_addid_2nop() const; ++ void verify(); ++ void print(); ++ ++ // unit test stuff ++ static void test() {} ++ ++ // Creation ++ inline friend NativeMovConstReg* nativeMovConstReg_at(address address); ++ inline friend NativeMovConstReg* nativeMovConstReg_before(address address); ++}; ++ ++inline NativeMovConstReg* nativeMovConstReg_at(address address) { ++ NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++inline NativeMovConstReg* nativeMovConstReg_before(address address) { ++ NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++class NativeMovConstRegPatching: public NativeMovConstReg { ++ private: ++ friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) { ++ NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset); ++ #ifdef ASSERT ++ test->verify(); ++ #endif ++ return test; ++ } ++}; ++ ++class NativeMovRegMem: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 4, ++ hiword_offset = 4, ++ ldst_offset = 12, ++ immediate_size = 4, ++ ldst_size = 16 ++ }; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ ++ int num_bytes_to_end_of_patch() const { return instruction_offset + instruction_size; } ++ ++ int offset() const; ++ ++ void set_offset(int x); ++ ++ void add_offset_in_bytes(int add_offset) { set_offset ( ( offset() + add_offset ) ); } ++ ++ void verify(); ++ void print (); ++ ++ // unit test stuff ++ static void test() {} ++ ++ private: ++ inline friend NativeMovRegMem* nativeMovRegMem_at (address address); ++}; ++ ++inline NativeMovRegMem* nativeMovRegMem_at (address address) { ++ NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++class NativeMovRegMemPatching: public NativeMovRegMem { ++ private: ++ friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) { ++ NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset); ++ #ifdef ASSERT ++ test->verify(); ++ #endif ++ return test; ++ } ++}; ++ ++ ++// Handles all kinds of jump on Loongson. ++// short: ++// b offs26 ++// nop ++// ++// far: ++// pcaddu18i reg, si20 ++// jirl r0, reg, si18 ++// ++class NativeJump: public NativeInstruction { ++ public: ++ enum loongarch_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 2 * BytesPerInstWord ++ }; ++ ++ bool is_short(); ++ bool is_far(); ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address jump_destination(address orig_addr = 0); ++ void set_jump_destination(address dest); ++ ++ // Creation ++ inline friend NativeJump* nativeJump_at(address address); ++ ++ // Insertion of native jump instruction ++ static void insert(address code_pos, address entry) { Unimplemented(); } ++ // MT-safe insertion of native jump at verified method entry ++ static void check_verified_entry_alignment(address entry, address verified_entry){} ++ static void patch_verified_entry(address entry, address verified_entry, address dest); ++ ++ void verify(); ++}; ++ ++inline NativeJump* nativeJump_at(address address) { ++ NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset); ++ debug_only(jump->verify();) ++ return jump; ++} ++ ++class NativeGeneralJump: public NativeJump { ++ public: ++ // Creation ++ inline friend NativeGeneralJump* nativeGeneralJump_at(address address); ++ ++ // Insertion of native general jump instruction ++ static void insert_unconditional(address code_pos, address entry); ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++}; ++ ++inline NativeGeneralJump* nativeGeneralJump_at(address address) { ++ NativeGeneralJump* jump = (NativeGeneralJump*)(address); ++ debug_only(jump->verify();) ++ return jump; ++} ++ ++class NativeIllegalInstruction: public NativeInstruction { ++public: ++ enum loongarch_specific_constants { ++ instruction_code = 0xbadc0de0, // TODO: LA ++ // Temporary LoongArch reserved instruction ++ instruction_size = 4, ++ instruction_offset = 0, ++ next_instruction_offset = 4 ++ }; ++ ++ // Insert illegal opcode as specific address ++ static void insert(address code_pos); ++}; ++ ++inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); } ++ ++inline bool NativeInstruction::is_call() { ++ NativeCall *call = (NativeCall*)instruction_address(); ++ return call->is_bl(); ++} ++ ++inline bool NativeInstruction::is_far_call() { ++ NativeFarCall *call = (NativeFarCall*)instruction_address(); ++ ++ // short ++ if (call->is_short()) { ++ return true; ++ } ++ ++ // far ++ if (call->is_far()) { ++ return true; ++ } ++ ++ return false; ++} ++ ++inline bool NativeInstruction::is_jump() ++{ ++ NativeGeneralJump *jump = (NativeGeneralJump*)instruction_address(); ++ ++ // short ++ if (jump->is_short()) { ++ return true; ++ } ++ ++ // far ++ if (jump->is_far()) { ++ return true; ++ } ++ ++ return false; ++} ++ ++// Call trampoline stubs. ++class NativeCallTrampolineStub : public NativeInstruction { ++ public: ++ ++ enum la_specific_constants { ++ instruction_size = 6 * 4, ++ instruction_offset = 0, ++ data_offset = 4 * 4, ++ next_instruction_offset = 6 * 4 ++ }; ++ ++ address destination() const { ++ return (address)ptr_at(data_offset); ++ } ++ ++ void set_destination(address new_destination) { ++ set_ptr_at(data_offset, (intptr_t)new_destination); ++ OrderAccess::fence(); ++ } ++}; ++ ++// Note: Other stubs must not begin with this pattern. ++inline bool NativeInstruction::is_NativeCallTrampolineStub_at() { ++ // pcaddi ++ // ld_d ++ // jirl ++ return Assembler::high(int_at(0), 7) == Assembler::pcaddi_op && ++ Assembler::high(int_at(4), 10) == Assembler::ld_d_op && ++ Assembler::high(int_at(8), 6) == Assembler::jirl_op && ++ Assembler::low(int_at(8), 5) == R0->encoding(); ++} ++ ++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { ++ NativeInstruction* ni = nativeInstruction_at(addr); ++ assert(ni->is_NativeCallTrampolineStub_at(), "no call trampoline found"); ++ return (NativeCallTrampolineStub*)addr; ++} ++#endif // CPU_LOONGARCH_VM_NATIVEINST_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/registerMap_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/registerMap_loongarch.hpp +new file mode 100644 +index 0000000000..5ff7555d2f +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/registerMap_loongarch.hpp +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_REGISTERMAP_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_REGISTERMAP_LOONGARCH_HPP ++ ++// machine-dependent implemention for register maps ++ friend class frame; ++ ++ private: ++ // This is the hook for finding a register in an "well-known" location, ++ // such as a register block of a predetermined format. ++ // Since there is none, we just return NULL. ++ // See registerMap_sparc.hpp for an example of grabbing registers ++ // from register save areas of a standard layout. ++ address pd_location(VMReg reg) const {return NULL;} ++ ++ // no PD state to clear or copy: ++ void pd_clear() {} ++ void pd_initialize() {} ++ void pd_initialize_from(const RegisterMap* map) {} ++ ++#endif // CPU_LOONGARCH_VM_REGISTERMAP_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/register_definitions_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/register_definitions_loongarch.cpp +new file mode 100644 +index 0000000000..c6424c321f +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/register_definitions_loongarch.cpp +@@ -0,0 +1,103 @@ ++/* ++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/register.hpp" ++#include "register_loongarch.hpp" ++#ifdef TARGET_ARCH_MODEL_loongarch_32 ++# include "interp_masm_loongarch_32.hpp" ++#endif ++#ifdef TARGET_ARCH_MODEL_loongarch_64 ++# include "interp_masm_loongarch_64.hpp" ++#endif ++ ++REGISTER_DEFINITION(Register, noreg); ++REGISTER_DEFINITION(Register, r0); ++REGISTER_DEFINITION(Register, r1); ++REGISTER_DEFINITION(Register, r2); ++REGISTER_DEFINITION(Register, r3); ++REGISTER_DEFINITION(Register, r4); ++REGISTER_DEFINITION(Register, r5); ++REGISTER_DEFINITION(Register, r6); ++REGISTER_DEFINITION(Register, r7); ++REGISTER_DEFINITION(Register, r8); ++REGISTER_DEFINITION(Register, r9); ++REGISTER_DEFINITION(Register, r10); ++REGISTER_DEFINITION(Register, r11); ++REGISTER_DEFINITION(Register, r12); ++REGISTER_DEFINITION(Register, r13); ++REGISTER_DEFINITION(Register, r14); ++REGISTER_DEFINITION(Register, r15); ++REGISTER_DEFINITION(Register, r16); ++REGISTER_DEFINITION(Register, r17); ++REGISTER_DEFINITION(Register, r18); ++REGISTER_DEFINITION(Register, r19); ++REGISTER_DEFINITION(Register, r20); ++REGISTER_DEFINITION(Register, r21); ++REGISTER_DEFINITION(Register, r22); ++REGISTER_DEFINITION(Register, r23); ++REGISTER_DEFINITION(Register, r24); ++REGISTER_DEFINITION(Register, r25); ++REGISTER_DEFINITION(Register, r26); ++REGISTER_DEFINITION(Register, r27); ++REGISTER_DEFINITION(Register, r28); ++REGISTER_DEFINITION(Register, r29); ++REGISTER_DEFINITION(Register, r30); ++REGISTER_DEFINITION(Register, r31); ++ ++REGISTER_DEFINITION(FloatRegister, fnoreg); ++REGISTER_DEFINITION(FloatRegister, f0); ++REGISTER_DEFINITION(FloatRegister, f1); ++REGISTER_DEFINITION(FloatRegister, f2); ++REGISTER_DEFINITION(FloatRegister, f3); ++REGISTER_DEFINITION(FloatRegister, f4); ++REGISTER_DEFINITION(FloatRegister, f5); ++REGISTER_DEFINITION(FloatRegister, f6); ++REGISTER_DEFINITION(FloatRegister, f7); ++REGISTER_DEFINITION(FloatRegister, f8); ++REGISTER_DEFINITION(FloatRegister, f9); ++REGISTER_DEFINITION(FloatRegister, f10); ++REGISTER_DEFINITION(FloatRegister, f11); ++REGISTER_DEFINITION(FloatRegister, f12); ++REGISTER_DEFINITION(FloatRegister, f13); ++REGISTER_DEFINITION(FloatRegister, f14); ++REGISTER_DEFINITION(FloatRegister, f15); ++REGISTER_DEFINITION(FloatRegister, f16); ++REGISTER_DEFINITION(FloatRegister, f17); ++REGISTER_DEFINITION(FloatRegister, f18); ++REGISTER_DEFINITION(FloatRegister, f19); ++REGISTER_DEFINITION(FloatRegister, f20); ++REGISTER_DEFINITION(FloatRegister, f21); ++REGISTER_DEFINITION(FloatRegister, f22); ++REGISTER_DEFINITION(FloatRegister, f23); ++REGISTER_DEFINITION(FloatRegister, f24); ++REGISTER_DEFINITION(FloatRegister, f25); ++REGISTER_DEFINITION(FloatRegister, f26); ++REGISTER_DEFINITION(FloatRegister, f27); ++REGISTER_DEFINITION(FloatRegister, f28); ++REGISTER_DEFINITION(FloatRegister, f29); ++REGISTER_DEFINITION(FloatRegister, f30); ++REGISTER_DEFINITION(FloatRegister, f31); +diff --git a/hotspot/src/cpu/loongarch/vm/register_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/register_loongarch.cpp +new file mode 100644 +index 0000000000..3104cd1cc5 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/register_loongarch.cpp +@@ -0,0 +1,59 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "register_loongarch.hpp" ++ ++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1; ++const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + ++ 2 * FloatRegisterImpl::number_of_registers; ++ ++ ++const char* RegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "zero", "ra", "tp", "sp", "a0/v0", "a1/v1", "a2", "a3", ++ "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", ++ "t4", "t5", "t6", "t7", "t8", "x", "fp", "s0", ++ "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8" ++ }; ++ return is_valid() ? names[encoding()] : "noreg"; ++} ++ ++const char* FloatRegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", ++ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", ++ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", ++ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", ++ }; ++ return is_valid() ? names[encoding()] : "fnoreg"; ++} ++ ++const char* ConditionalFlagRegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7", ++ }; ++ return is_valid() ? names[encoding()] : "fccnoreg"; ++} +diff --git a/hotspot/src/cpu/loongarch/vm/register_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/register_loongarch.hpp +new file mode 100644 +index 0000000000..37b39f9129 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/register_loongarch.hpp +@@ -0,0 +1,436 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_REGISTER_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_REGISTER_LOONGARCH_HPP ++ ++#include "asm/register.hpp" ++#include "vm_version_loongarch.hpp" ++ ++class VMRegImpl; ++typedef VMRegImpl* VMReg; ++ ++// Use Register as shortcut ++class RegisterImpl; ++typedef RegisterImpl* Register; ++ ++ ++// The implementation of integer registers for the LoongArch architecture ++inline Register as_Register(int encoding) { ++ return (Register)(intptr_t) encoding; ++} ++ ++class RegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 32, ++ max_slots_per_register = 2 ++ }; ++ ++ // derived registers, offsets, and addresses ++ Register successor() const { return as_Register(encoding() + 1); } ++ ++ // construction ++ inline friend Register as_Register(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // accessors ++ int encoding() const { assert(is_valid(),err_msg( "invalid register (%d)", (int)(intptr_t)this)); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++}; ++ ++ ++// The integer registers of the LoongArch architecture ++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); ++ ++ ++CONSTANT_REGISTER_DECLARATION(Register, r0, (0)); ++CONSTANT_REGISTER_DECLARATION(Register, r1, (1)); ++CONSTANT_REGISTER_DECLARATION(Register, r2, (2)); ++CONSTANT_REGISTER_DECLARATION(Register, r3, (3)); ++CONSTANT_REGISTER_DECLARATION(Register, r4, (4)); ++CONSTANT_REGISTER_DECLARATION(Register, r5, (5)); ++CONSTANT_REGISTER_DECLARATION(Register, r6, (6)); ++CONSTANT_REGISTER_DECLARATION(Register, r7, (7)); ++CONSTANT_REGISTER_DECLARATION(Register, r8, (8)); ++CONSTANT_REGISTER_DECLARATION(Register, r9, (9)); ++CONSTANT_REGISTER_DECLARATION(Register, r10, (10)); ++CONSTANT_REGISTER_DECLARATION(Register, r11, (11)); ++CONSTANT_REGISTER_DECLARATION(Register, r12, (12)); ++CONSTANT_REGISTER_DECLARATION(Register, r13, (13)); ++CONSTANT_REGISTER_DECLARATION(Register, r14, (14)); ++CONSTANT_REGISTER_DECLARATION(Register, r15, (15)); ++CONSTANT_REGISTER_DECLARATION(Register, r16, (16)); ++CONSTANT_REGISTER_DECLARATION(Register, r17, (17)); ++CONSTANT_REGISTER_DECLARATION(Register, r18, (18)); ++CONSTANT_REGISTER_DECLARATION(Register, r19, (19)); ++CONSTANT_REGISTER_DECLARATION(Register, r20, (20)); ++CONSTANT_REGISTER_DECLARATION(Register, r21, (21)); ++CONSTANT_REGISTER_DECLARATION(Register, r22, (22)); ++CONSTANT_REGISTER_DECLARATION(Register, r23, (23)); ++CONSTANT_REGISTER_DECLARATION(Register, r24, (24)); ++CONSTANT_REGISTER_DECLARATION(Register, r25, (25)); ++CONSTANT_REGISTER_DECLARATION(Register, r26, (26)); ++CONSTANT_REGISTER_DECLARATION(Register, r27, (27)); ++CONSTANT_REGISTER_DECLARATION(Register, r28, (28)); ++CONSTANT_REGISTER_DECLARATION(Register, r29, (29)); ++CONSTANT_REGISTER_DECLARATION(Register, r30, (30)); ++CONSTANT_REGISTER_DECLARATION(Register, r31, (31)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define NOREG ((Register)(noreg_RegisterEnumValue)) ++ ++#define R0 ((Register)(r0_RegisterEnumValue)) ++#define R1 ((Register)(r1_RegisterEnumValue)) ++#define R2 ((Register)(r2_RegisterEnumValue)) ++#define R3 ((Register)(r3_RegisterEnumValue)) ++#define R4 ((Register)(r4_RegisterEnumValue)) ++#define R5 ((Register)(r5_RegisterEnumValue)) ++#define R6 ((Register)(r6_RegisterEnumValue)) ++#define R7 ((Register)(r7_RegisterEnumValue)) ++#define R8 ((Register)(r8_RegisterEnumValue)) ++#define R9 ((Register)(r9_RegisterEnumValue)) ++#define R10 ((Register)(r10_RegisterEnumValue)) ++#define R11 ((Register)(r11_RegisterEnumValue)) ++#define R12 ((Register)(r12_RegisterEnumValue)) ++#define R13 ((Register)(r13_RegisterEnumValue)) ++#define R14 ((Register)(r14_RegisterEnumValue)) ++#define R15 ((Register)(r15_RegisterEnumValue)) ++#define R16 ((Register)(r16_RegisterEnumValue)) ++#define R17 ((Register)(r17_RegisterEnumValue)) ++#define R18 ((Register)(r18_RegisterEnumValue)) ++#define R19 ((Register)(r19_RegisterEnumValue)) ++#define R20 ((Register)(r20_RegisterEnumValue)) ++#define R21 ((Register)(r21_RegisterEnumValue)) ++#define R22 ((Register)(r22_RegisterEnumValue)) ++#define R23 ((Register)(r23_RegisterEnumValue)) ++#define R24 ((Register)(r24_RegisterEnumValue)) ++#define R25 ((Register)(r25_RegisterEnumValue)) ++#define R26 ((Register)(r26_RegisterEnumValue)) ++#define R27 ((Register)(r27_RegisterEnumValue)) ++#define R28 ((Register)(r28_RegisterEnumValue)) ++#define R29 ((Register)(r29_RegisterEnumValue)) ++#define R30 ((Register)(r30_RegisterEnumValue)) ++#define R31 ((Register)(r31_RegisterEnumValue)) ++ ++ ++#define RA R1 ++#define TP R2 ++#define SP R3 ++#define RA0 R4 ++#define RA1 R5 ++#define RA2 R6 ++#define RA3 R7 ++#define RA4 R8 ++#define RA5 R9 ++#define RA6 R10 ++#define RA7 R11 ++#define RT0 R12 ++#define RT1 R13 ++#define RT2 R14 ++#define RT3 R15 ++#define RT4 R16 ++#define RT5 R17 ++#define RT6 R18 ++#define RT7 R19 ++#define RT8 R20 ++#define RX R21 ++#define FP R22 ++#define S0 R23 ++#define S1 R24 ++#define S2 R25 ++#define S3 R26 ++#define S4 R27 ++#define S5 R28 ++#define S6 R29 ++#define S7 R30 ++#define S8 R31 ++ ++#define c_rarg0 RT0 ++#define c_rarg1 RT1 ++#define Rmethod S3 ++#define Rsender S4 ++#define Rnext S1 ++ ++#define V0 RA0 ++#define V1 RA1 ++ ++#define SCR1 RT7 ++#define SCR2 RT4 ++ ++//for interpreter frame ++// bytecode pointer register ++#define BCP S0 ++// local variable pointer register ++#define LVP S7 ++// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM ++// be sure to save and restore its value in call_stub ++#define TSR S2 ++ ++//OPT_SAFEPOINT not supported yet ++#define OPT_SAFEPOINT 1 ++ ++#define OPT_THREAD 1 ++ ++#define TREG S6 ++ ++#define S5_heapbase S5 ++ ++#define FSR V0 ++#define SSR T6 ++#define FSF FV0 ++ ++#define RECEIVER T0 ++#define IC_Klass T1 ++ ++#define SHIFT_count T3 ++ ++// ---------- Scratch Register ---------- ++#define AT RT7 ++#define fscratch F23 ++ ++#endif // DONT_USE_REGISTER_DEFINES ++ ++// Use FloatRegister as shortcut ++class FloatRegisterImpl; ++typedef FloatRegisterImpl* FloatRegister; ++ ++inline FloatRegister as_FloatRegister(int encoding) { ++ return (FloatRegister)(intptr_t) encoding; ++} ++ ++// The implementation of floating point registers for the LoongArch architecture ++class FloatRegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 32, ++ save_slots_per_register = 2, ++ slots_per_lsx_register = 4, ++ slots_per_lasx_register = 8, ++ max_slots_per_register = 8 ++ }; ++ ++ // construction ++ inline friend FloatRegister as_FloatRegister(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // derived registers, offsets, and addresses ++ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++ ++}; ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue)) ++#define F0 ((FloatRegister)( f0_FloatRegisterEnumValue)) ++#define F1 ((FloatRegister)( f1_FloatRegisterEnumValue)) ++#define F2 ((FloatRegister)( f2_FloatRegisterEnumValue)) ++#define F3 ((FloatRegister)( f3_FloatRegisterEnumValue)) ++#define F4 ((FloatRegister)( f4_FloatRegisterEnumValue)) ++#define F5 ((FloatRegister)( f5_FloatRegisterEnumValue)) ++#define F6 ((FloatRegister)( f6_FloatRegisterEnumValue)) ++#define F7 ((FloatRegister)( f7_FloatRegisterEnumValue)) ++#define F8 ((FloatRegister)( f8_FloatRegisterEnumValue)) ++#define F9 ((FloatRegister)( f9_FloatRegisterEnumValue)) ++#define F10 ((FloatRegister)( f10_FloatRegisterEnumValue)) ++#define F11 ((FloatRegister)( f11_FloatRegisterEnumValue)) ++#define F12 ((FloatRegister)( f12_FloatRegisterEnumValue)) ++#define F13 ((FloatRegister)( f13_FloatRegisterEnumValue)) ++#define F14 ((FloatRegister)( f14_FloatRegisterEnumValue)) ++#define F15 ((FloatRegister)( f15_FloatRegisterEnumValue)) ++#define F16 ((FloatRegister)( f16_FloatRegisterEnumValue)) ++#define F17 ((FloatRegister)( f17_FloatRegisterEnumValue)) ++#define F18 ((FloatRegister)( f18_FloatRegisterEnumValue)) ++#define F19 ((FloatRegister)( f19_FloatRegisterEnumValue)) ++#define F20 ((FloatRegister)( f20_FloatRegisterEnumValue)) ++#define F21 ((FloatRegister)( f21_FloatRegisterEnumValue)) ++#define F22 ((FloatRegister)( f22_FloatRegisterEnumValue)) ++#define F23 ((FloatRegister)( f23_FloatRegisterEnumValue)) ++#define F24 ((FloatRegister)( f24_FloatRegisterEnumValue)) ++#define F25 ((FloatRegister)( f25_FloatRegisterEnumValue)) ++#define F26 ((FloatRegister)( f26_FloatRegisterEnumValue)) ++#define F27 ((FloatRegister)( f27_FloatRegisterEnumValue)) ++#define F28 ((FloatRegister)( f28_FloatRegisterEnumValue)) ++#define F29 ((FloatRegister)( f29_FloatRegisterEnumValue)) ++#define F30 ((FloatRegister)( f30_FloatRegisterEnumValue)) ++#define F31 ((FloatRegister)( f31_FloatRegisterEnumValue)) ++ ++#define FA0 F0 ++#define FA1 F1 ++#define FA2 F2 ++#define FA3 F3 ++#define FA4 F4 ++#define FA5 F5 ++#define FA6 F6 ++#define FA7 F7 ++ ++#define FV0 F0 ++#define FV1 F1 ++ ++#define FT0 F8 ++#define FT1 F9 ++#define FT2 F10 ++#define FT3 F11 ++#define FT4 F12 ++#define FT5 F13 ++#define FT6 F14 ++#define FT7 F15 ++#define FT8 F16 ++#define FT9 F17 ++#define FT10 F18 ++#define FT11 F19 ++#define FT12 F20 ++#define FT13 F21 ++#define FT14 F22 ++#define FT15 F23 ++ ++#define FS0 F24 ++#define FS1 F25 ++#define FS2 F26 ++#define FS3 F27 ++#define FS4 F28 ++#define FS5 F29 ++#define FS6 F30 ++#define FS7 F31 ++ ++#endif // DONT_USE_REGISTER_DEFINES ++ ++// Use ConditionalFlagRegister as shortcut ++class ConditionalFlagRegisterImpl; ++typedef ConditionalFlagRegisterImpl* ConditionalFlagRegister; ++ ++inline ConditionalFlagRegister as_ConditionalFlagRegister(int encoding) { ++ return (ConditionalFlagRegister)(intptr_t) encoding; ++} ++ ++// The implementation of floating point registers for the LoongArch architecture ++class ConditionalFlagRegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++// conditionalflag_arg_base = 12, ++ number_of_registers = 8 ++ }; ++ ++ // construction ++ inline friend ConditionalFlagRegister as_ConditionalFlagRegister(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // derived registers, offsets, and addresses ++ ConditionalFlagRegister successor() const { return as_ConditionalFlagRegister(encoding() + 1); } ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++ ++}; ++ ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fccnoreg , (-1)); ++ ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc7 , ( 7)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define FCCNOREG ((ConditionalFlagRegister)(fccnoreg_ConditionalFlagRegisterEnumValue)) ++#define FCC0 ((ConditionalFlagRegister)( fcc0_ConditionalFlagRegisterEnumValue)) ++#define FCC1 ((ConditionalFlagRegister)( fcc1_ConditionalFlagRegisterEnumValue)) ++#define FCC2 ((ConditionalFlagRegister)( fcc2_ConditionalFlagRegisterEnumValue)) ++#define FCC3 ((ConditionalFlagRegister)( fcc3_ConditionalFlagRegisterEnumValue)) ++#define FCC4 ((ConditionalFlagRegister)( fcc4_ConditionalFlagRegisterEnumValue)) ++#define FCC5 ((ConditionalFlagRegister)( fcc5_ConditionalFlagRegisterEnumValue)) ++#define FCC6 ((ConditionalFlagRegister)( fcc6_ConditionalFlagRegisterEnumValue)) ++#define FCC7 ((ConditionalFlagRegister)( fcc7_ConditionalFlagRegisterEnumValue)) ++ ++#endif // DONT_USE_REGISTER_DEFINES ++ ++// Need to know the total number of registers of all sorts for SharedInfo. ++// Define a class that exports it. ++class ConcreteRegisterImpl : public AbstractRegisterImpl { ++ public: ++ enum { ++ // A big enough number for C2: all the registers plus flags ++ // This number must be large enough to cover REG_COUNT (defined by c2) registers. ++ // There is no requirement that any ordering here matches any ordering c2 gives ++ // it's optoregs. ++ number_of_registers = RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + ++ FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers ++ }; ++ ++ static const int max_gpr; ++ static const int max_fpr; ++ ++ ++}; ++ ++#endif //CPU_LOONGARCH_VM_REGISTER_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.cpp +new file mode 100644 +index 0000000000..bf4498dc62 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.cpp +@@ -0,0 +1,130 @@ ++/* ++ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/relocInfo.hpp" ++#include "compiler/disassembler.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/safepoint.hpp" ++ ++ ++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { ++ x += o; ++ typedef Assembler::WhichOperand WhichOperand; ++ WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop ++ assert(which == Assembler::disp32_operand || ++ which == Assembler::narrow_oop_operand || ++ which == Assembler::imm_operand, "format unpacks ok"); ++ if (which == Assembler::imm_operand) { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(x)); ++ } ++ } else if (which == Assembler::narrow_oop_operand) { ++ // both compressed oops and compressed classes look the same ++ if (Universe::heap()->is_in_reserved((oop)x)) { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)oopDesc::encode_heap_oop((oop)x), "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(oopDesc::encode_heap_oop((oop)x)), (intptr_t)(x)); ++ } ++ } else { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x)); ++ } ++ } ++ } else { ++ // Note: Use runtime_call_type relocations for call32_operand. ++ assert(0, "call32_operand not supported in LoongArch64"); ++ } ++} ++ ++ ++address Relocation::pd_call_destination(address orig_addr) { ++ NativeInstruction* ni = nativeInstruction_at(addr()); ++ if (ni->is_far_call()) { ++ return nativeFarCall_at(addr())->destination(orig_addr); ++ } else if (ni->is_call()) { ++ address trampoline = nativeCall_at(addr())->get_trampoline(); ++ if (trampoline) { ++ return nativeCallTrampolineStub_at(trampoline)->destination(); ++ } else { ++ address new_addr = nativeCall_at(addr())->target_addr_for_bl(orig_addr); ++ // If call is branch to self, don't try to relocate it, just leave it ++ // as branch to self. This happens during code generation if the code ++ // buffer expands. It will be relocated to the trampoline above once ++ // code generation is complete. ++ return (new_addr == orig_addr) ? addr() : new_addr; ++ } ++ } else if (ni->is_jump()) { ++ return nativeGeneralJump_at(addr())->jump_destination(orig_addr); ++ } else { ++ tty->print_cr("\nError!\ncall destination: 0x%lx", p2i(addr())); ++ Disassembler::decode(addr() - 10 * BytesPerInstWord, addr() + 10 * BytesPerInstWord, tty); ++ ShouldNotReachHere(); ++ return NULL; ++ } ++} ++ ++void Relocation::pd_set_call_destination(address x) { ++ NativeInstruction* ni = nativeInstruction_at(addr()); ++ if (ni->is_far_call()) { ++ nativeFarCall_at(addr())->set_destination(x); ++ } else if (ni->is_call()) { ++ address trampoline = nativeCall_at(addr())->get_trampoline(); ++ if (trampoline) { ++ nativeCall_at(addr())->set_destination_mt_safe(x, false); ++ } else { ++ nativeCall_at(addr())->set_destination(x); ++ } ++ } else if (ni->is_jump()) { ++ nativeGeneralJump_at(addr())->set_jump_destination(x); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++address* Relocation::pd_address_in_code() { ++ return (address*)addr(); ++} ++ ++address Relocation::pd_get_address_from_code() { ++ NativeMovConstReg* ni = nativeMovConstReg_at(addr()); ++ return (address)ni->data(); ++} ++ ++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++} ++ ++void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++} ++ ++void metadata_Relocation::pd_fix_value(address x) { ++} +diff --git a/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.hpp +new file mode 100644 +index 0000000000..211242f3fb +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.hpp +@@ -0,0 +1,40 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_RELOCINFO_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_RELOCINFO_LOONGARCH_HPP ++ ++ // machine-dependent parts of class relocInfo ++ private: ++ enum { ++ // Since LoongArch instructions are whole words, ++ // the two low-order offset bits can always be discarded. ++ offset_unit = 4, ++ ++ // imm_oop_operand vs. narrow_oop_operand ++ format_width = 2 ++ }; ++ ++#endif // CPU_LOONGARCH_VM_RELOCINFO_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/runtime_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/runtime_loongarch_64.cpp +new file mode 100644 +index 0000000000..e6ee65f367 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/runtime_loongarch_64.cpp +@@ -0,0 +1,199 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#ifdef COMPILER2 ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "classfile/systemDictionary.hpp" ++#include "code/vmreg.hpp" ++#include "interpreter/interpreter.hpp" ++#include "opto/runtime.hpp" ++#include "runtime/interfaceSupport.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/globalDefinitions.hpp" ++#include "vmreg_loongarch.inline.hpp" ++#endif ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++//-------------- generate_exception_blob ----------- ++// creates _exception_blob. ++// The exception blob is jumped to from a compiled method. ++// (see emit_exception_handler in sparc.ad file) ++// ++// Given an exception pc at a call we call into the runtime for the ++// handler in this method. This handler might merely restore state ++// (i.e. callee save registers) unwind the frame and jump to the ++// exception handler for the nmethod if there is no Java level handler ++// for the nmethod. ++// ++// This code is entered with a jump, and left with a jump. ++// ++// Arguments: ++// V0: exception oop ++// V1: exception pc ++// ++// Results: ++// A0: exception oop ++// A1: exception pc in caller or ??? ++// jumps to: exception handler of caller ++// ++// Note: the exception pc MUST be at a call (precise debug information) ++// ++// [stubGenerator_loongarch_64.cpp] generate_forward_exception() ++// |- V0, V1 are created ++// |- T4 <= SharedRuntime::exception_handler_for_return_address ++// `- jr T4 ++// `- the caller's exception_handler ++// `- jr OptoRuntime::exception_blob ++// `- here ++// ++void OptoRuntime::generate_exception_blob() { ++ // Capture info about frame layout ++ enum layout { ++ fp_off, ++ return_off, // slot for return address ++ framesize ++ }; ++ ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ CodeBuffer buffer("exception_blob", 5120, 5120); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ address start = __ pc(); ++ ++ __ addi_d(SP, SP, -1 * framesize * wordSize); // Prolog! ++ ++ // this frame will be treated as the original caller method. ++ // So, the return pc should be filled with the original exception pc. ++ // ref: X86's implementation ++ __ st_d(V1, SP, return_off * wordSize); // return address ++ __ st_d(FP, SP, fp_off * wordSize); ++ ++ // Save callee saved registers. None for UseSSE=0, ++ // floats-only for UseSSE=1, and doubles for UseSSE=2. ++ ++ __ addi_d(FP, SP, fp_off * wordSize); ++ ++ // Store exception in Thread object. We cannot pass any arguments to the ++ // handle_exception call, since we do not want to make any assumption ++ // about the size of the frame where the exception happened in. ++ Register thread = TREG; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ __ st_d(V0, Address(thread, JavaThread::exception_oop_offset())); ++ __ st_d(V1, Address(thread, JavaThread::exception_pc_offset())); ++ ++ // This call does all the hard work. It checks if an exception handler ++ // exists in the method. ++ // If so, it returns the handler address. ++ // If not, it prepares for stack-unwinding, restoring the callee-save ++ // registers of the frame being removed. ++ Label L; ++ address the_pc = __ pc(); ++ __ bind(L); ++ __ set_last_Java_frame(thread, NOREG, NOREG, L); ++ ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ __ move(A0, thread); ++ // TODO: confirm reloc ++ __ call((address)OptoRuntime::handle_exception_C, relocInfo::runtime_call_type); ++ ++ // Set an oopmap for the call site ++ OopMapSet *oop_maps = new OopMapSet(); ++ ++ oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0)); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(thread, true); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog! ++ ++ // V0: exception handler ++ ++ // We have a handler in V0, (could be deopt blob) ++ __ move(T4, V0); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // Get the exception ++ __ ld_d(A0, Address(thread, JavaThread::exception_oop_offset())); ++ // Get the exception pc in case we are deoptimized ++ __ ld_d(A1, Address(thread, JavaThread::exception_pc_offset())); ++#ifdef ASSERT ++ __ st_d(R0, Address(thread, JavaThread::exception_handler_pc_offset())); ++ __ st_d(R0, Address(thread, JavaThread::exception_pc_offset())); ++#endif ++ // Clear the exception oop so GC no longer processes it as a root. ++ __ st_d(R0, Address(thread, JavaThread::exception_oop_offset())); ++ ++ // Fix seg fault when running: ++ // Eclipse + Plugin + Debug As ++ // This is the only condition where C2 calls SharedRuntime::generate_deopt_blob() ++ // ++ __ move(V0, A0); ++ __ move(V1, A1); ++ ++ // V0: exception oop ++ // T4: exception handler ++ // A1: exception pc ++ __ jr(T4); ++ ++ // make sure all code is generated ++ masm->flush(); ++ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize); ++} +diff --git a/hotspot/src/cpu/loongarch/vm/sharedRuntime_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/sharedRuntime_loongarch_64.cpp +new file mode 100644 +index 0000000000..9efcd2ce52 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/sharedRuntime_loongarch_64.cpp +@@ -0,0 +1,3453 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/debugInfoRec.hpp" ++#include "code/icBuffer.hpp" ++#include "code/vtableStubs.hpp" ++#include "interpreter/interpreter.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "prims/jvmtiRedefineClassesTrace.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/vframeArray.hpp" ++#include "vmreg_loongarch.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++#include ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; ++ ++class RegisterSaver { ++ // Capture info about frame layout ++ enum layout { ++ fpr0_off = 0, ++ fpr1_off, ++ fpr2_off, ++ fpr3_off, ++ fpr4_off, ++ fpr5_off, ++ fpr6_off, ++ fpr7_off, ++ fpr8_off, ++ fpr9_off, ++ fpr10_off, ++ fpr11_off, ++ fpr12_off, ++ fpr13_off, ++ fpr14_off, ++ fpr15_off, ++ fpr16_off, ++ fpr17_off, ++ fpr18_off, ++ fpr19_off, ++ fpr20_off, ++ fpr21_off, ++ fpr22_off, ++ fpr23_off, ++ fpr24_off, ++ fpr25_off, ++ fpr26_off, ++ fpr27_off, ++ fpr28_off, ++ fpr29_off, ++ fpr30_off, ++ fpr31_off, ++ a0_off, ++ a1_off, ++ a2_off, ++ a3_off, ++ a4_off, ++ a5_off, ++ a6_off, ++ a7_off, ++ t0_off, ++ t1_off, ++ t2_off, ++ t3_off, ++ t4_off, ++ t5_off, ++ t6_off, ++ t7_off, ++ t8_off, ++ s0_off, ++ s1_off, ++ s2_off, ++ s3_off, ++ s4_off, ++ s5_off, ++ s6_off, ++ s7_off, ++ s8_off, ++ fp_off, ++ ra_off, ++ fpr_size = fpr31_off - fpr0_off + 1, ++ gpr_size = ra_off - a0_off + 1, ++ }; ++ ++ const bool _save_vectors; ++ public: ++ RegisterSaver(bool save_vectors) : _save_vectors(save_vectors) {} ++ ++ OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); ++ void restore_live_registers(MacroAssembler* masm); ++ ++ int slots_save() { ++ int slots = gpr_size * VMRegImpl::slots_per_word; ++ ++ if (_save_vectors && UseLASX) ++ slots += FloatRegisterImpl::slots_per_lasx_register * fpr_size; ++ else if (_save_vectors && UseLSX) ++ slots += FloatRegisterImpl::slots_per_lsx_register * fpr_size; ++ else ++ slots += FloatRegisterImpl::save_slots_per_register * fpr_size; ++ ++ return slots; ++ } ++ ++ int gpr_offset(int off) { ++ int slots_per_fpr = FloatRegisterImpl::save_slots_per_register; ++ int slots_per_gpr = VMRegImpl::slots_per_word; ++ ++ if (_save_vectors && UseLASX) ++ slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register; ++ else if (_save_vectors && UseLSX) ++ slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register; ++ ++ return (fpr_size * slots_per_fpr + (off - a0_off) * slots_per_gpr) * VMRegImpl::stack_slot_size; ++ } ++ ++ int fpr_offset(int off) { ++ int slots_per_fpr = FloatRegisterImpl::save_slots_per_register; ++ ++ if (_save_vectors && UseLASX) ++ slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register; ++ else if (_save_vectors && UseLSX) ++ slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register; ++ ++ return off * slots_per_fpr * VMRegImpl::stack_slot_size; ++ } ++ ++ int ra_offset() { return gpr_offset(ra_off); } ++ int t5_offset() { return gpr_offset(t5_off); } ++ int s3_offset() { return gpr_offset(s3_off); } ++ int v0_offset() { return gpr_offset(a0_off); } ++ int v1_offset() { return gpr_offset(a1_off); } ++ ++ int fpr0_offset() { return fpr_offset(fpr0_off); } ++ int fpr1_offset() { return fpr_offset(fpr1_off); } ++ ++ // During deoptimization only the result register need to be restored ++ // all the other values have already been extracted. ++ void restore_result_registers(MacroAssembler* masm); ++}; ++ ++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { ++ ++ // Always make the frame size 16-byte aligned ++ int frame_size_in_bytes = round_to(additional_frame_words * wordSize + slots_save() * VMRegImpl::stack_slot_size, StackAlignmentInBytes); ++ // OopMap frame size is in compiler stack slots (jint's) not bytes or words ++ int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size; ++ // The caller will allocate additional_frame_words ++ int additional_frame_slots = additional_frame_words * wordSize / VMRegImpl::stack_slot_size; ++ // CodeBlob frame size is in words. ++ int frame_size_in_words = frame_size_in_bytes / wordSize; ++ ++ *total_frame_words = frame_size_in_words; ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap(frame_size_in_slots, 0); ++ ++ // save registers ++ __ addi_d(SP, SP, -slots_save() * VMRegImpl::stack_slot_size); ++ ++ for (int i = 0; i < fpr_size; i++) { ++ FloatRegister fpr = as_FloatRegister(i); ++ int off = fpr_offset(i); ++ ++ if (_save_vectors && UseLASX) ++ __ xvst(fpr, SP, off); ++ else if (_save_vectors && UseLSX) ++ __ vst(fpr, SP, off); ++ else ++ __ fst_d(fpr, SP, off); ++ map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), fpr->as_VMReg()); ++ } ++ ++ for (int i = a0_off; i <= a7_off; i++) { ++ Register gpr = as_Register(A0->encoding() + (i - a0_off)); ++ int off = gpr_offset(i); ++ ++ __ st_d(gpr, SP, gpr_offset(i)); ++ map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg()); ++ } ++ ++ for (int i = t0_off; i <= t6_off; i++) { ++ Register gpr = as_Register(T0->encoding() + (i - t0_off)); ++ int off = gpr_offset(i); ++ ++ __ st_d(gpr, SP, gpr_offset(i)); ++ map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg()); ++ } ++ __ st_d(T8, SP, gpr_offset(t8_off)); ++ map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(t8_off) / VMRegImpl::stack_slot_size + additional_frame_slots), T8->as_VMReg()); ++ ++ for (int i = s0_off; i <= s8_off; i++) { ++ Register gpr = as_Register(S0->encoding() + (i - s0_off)); ++ int off = gpr_offset(i); ++ ++ __ st_d(gpr, SP, gpr_offset(i)); ++ map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg()); ++ } ++ ++ __ st_d(FP, SP, gpr_offset(fp_off)); ++ map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(fp_off) / VMRegImpl::stack_slot_size + additional_frame_slots), FP->as_VMReg()); ++ __ st_d(RA, SP, gpr_offset(ra_off)); ++ map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(ra_off) / VMRegImpl::stack_slot_size + additional_frame_slots), RA->as_VMReg()); ++ ++ __ addi_d(FP, SP, gpr_offset(fp_off)); ++ ++ return map; ++} ++ ++ ++// Pop the current frame and restore all the registers that we ++// saved. ++void RegisterSaver::restore_live_registers(MacroAssembler* masm) { ++ for (int i = 0; i < fpr_size; i++) { ++ FloatRegister fpr = as_FloatRegister(i); ++ int off = fpr_offset(i); ++ ++ if (_save_vectors && UseLASX) ++ __ xvld(fpr, SP, off); ++ else if (_save_vectors && UseLSX) ++ __ vld(fpr, SP, off); ++ else ++ __ fld_d(fpr, SP, off); ++ } ++ ++ for (int i = a0_off; i <= a7_off; i++) { ++ Register gpr = as_Register(A0->encoding() + (i - a0_off)); ++ int off = gpr_offset(i); ++ ++ __ ld_d(gpr, SP, gpr_offset(i)); ++ } ++ ++ for (int i = t0_off; i <= t6_off; i++) { ++ Register gpr = as_Register(T0->encoding() + (i - t0_off)); ++ int off = gpr_offset(i); ++ ++ __ ld_d(gpr, SP, gpr_offset(i)); ++ } ++ __ ld_d(T8, SP, gpr_offset(t8_off)); ++ ++ for (int i = s0_off; i <= s8_off; i++) { ++ Register gpr = as_Register(S0->encoding() + (i - s0_off)); ++ int off = gpr_offset(i); ++ ++ __ ld_d(gpr, SP, gpr_offset(i)); ++ } ++ ++ __ ld_d(FP, SP, gpr_offset(fp_off)); ++ __ ld_d(RA, SP, gpr_offset(ra_off)); ++ ++ __ addi_d(SP, SP, slots_save() * VMRegImpl::stack_slot_size); ++} ++ ++// Pop the current frame and restore the registers that might be holding ++// a result. ++void RegisterSaver::restore_result_registers(MacroAssembler* masm) { ++ // Just restore result register. Only used by deoptimization. By ++ // now any callee save register that needs to be restore to a c2 ++ // caller of the deoptee has been extracted into the vframeArray ++ // and will be stuffed into the c2i adapter we create for later ++ // restoration so only result registers need to be restored here. ++ ++ __ ld_d(V0, SP, gpr_offset(a0_off)); ++ __ ld_d(V1, SP, gpr_offset(a1_off)); ++ ++ __ fld_d(F0, SP, fpr_offset(fpr0_off)); ++ __ fld_d(F1, SP, fpr_offset(fpr1_off)); ++ ++ __ addi_d(SP, SP, gpr_offset(ra_off)); ++} ++ ++// Is vector's size (in bytes) bigger than a size saved by default? ++// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. ++bool SharedRuntime::is_wide_vector(int size) { ++ return size > 16; ++} ++ ++// The java_calling_convention describes stack locations as ideal slots on ++// a frame with no abi restrictions. Since we must observe abi restrictions ++// (like the placement of the register window) the slots must be biased by ++// the following value. ++ ++static int reg2offset_in(VMReg r) { ++ // Account for saved fp and return address ++ // This should really be in_preserve_stack_slots ++ return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size); ++} ++ ++static int reg2offset_out(VMReg r) { ++ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; ++} ++ ++// --------------------------------------------------------------------------- ++// Read the array of BasicTypes from a signature, and compute where the ++// arguments should go. Values in the VMRegPair regs array refer to 4-byte ++// quantities. Values less than SharedInfo::stack0 are registers, those above ++// refer to 4-byte stack slots. All stack slots are based off of the stack pointer ++// as framesizes are fixed. ++// VMRegImpl::stack0 refers to the first slot 0(sp). ++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register ++// up to RegisterImpl::number_of_registers) are the 32-bit ++// integer registers. ++ ++// Pass first five oop/int args in registers T0, A0 - A3. ++// Pass float/double/long args in stack. ++// Doubles have precedence, so if you pass a mix of floats and doubles ++// the doubles will grab the registers before the floats will. ++ ++// Note: the INPUTS in sig_bt are in units of Java argument words, which are ++// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit ++// units regardless of build. ++ ++ ++// --------------------------------------------------------------------------- ++// The compiled Java calling convention. ++// Pass first five oop/int args in registers T0, A0 - A3. ++// Pass float/double/long args in stack. ++// Doubles have precedence, so if you pass a mix of floats and doubles ++// the doubles will grab the registers before the floats will. ++ ++int SharedRuntime::java_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ int total_args_passed, ++ int is_outgoing) { ++ ++ // Create the mapping between argument positions and registers. ++ static const Register INT_ArgReg[Argument::n_register_parameters + 1] = { ++ T0, A0, A1, A2, A3, A4, A5, A6, A7 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { ++ FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7 ++ }; ++ ++ uint int_args = 0; ++ uint fp_args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_VOID: ++ // halves of T_LONG or T_DOUBLE ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_BOOLEAN: ++ case T_CHAR: ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ if (int_args < Argument::n_register_parameters + 1) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ // fall through ++ case T_OBJECT: ++ case T_ARRAY: ++ case T_ADDRESS: ++ if (int_args < Argument::n_register_parameters + 1) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (fp_args < Argument::n_float_register_parameters) { ++ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (fp_args < Argument::n_float_register_parameters) { ++ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++ ++ return round_to(stk_args, 2); ++} ++ ++// Patch the callers callsite with entry to compiled code if it exists. ++static void patch_callers_callsite(MacroAssembler *masm) { ++ Label L; ++ __ verify_oop(Rmethod); ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); ++ __ beq(AT, R0, L); ++ // Schedule the branch target address early. ++ // Call into the VM to patch the caller, then jump to compiled callee ++ // T5 isn't live so capture return address while we easily can ++ __ move(T5, RA); ++ ++ __ pushad(); ++#ifdef COMPILER2 ++ // C2 may leave the stack dirty if not in SSE2+ mode ++ __ empty_FPU_stack(); ++#endif ++ ++ // VM needs caller's callsite ++ // VM needs target method ++ ++ __ move(A0, Rmethod); ++ __ move(A1, T5); ++ // we should preserve the return address ++ __ verify_oop(Rmethod); ++ __ move(S0, SP); ++ __ li(AT, -(StackAlignmentInBytes)); // align the stack ++ __ andr(SP, SP, AT); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), ++ relocInfo::runtime_call_type); ++ ++ __ move(SP, S0); ++ __ popad(); ++ __ bind(L); ++} ++ ++static void gen_c2i_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ Label& skip_fixup) { ++ ++ // Before we get into the guts of the C2I adapter, see if we should be here ++ // at all. We've come from compiled code and are attempting to jump to the ++ // interpreter, which means the caller made a static call to get here ++ // (vcalls always get a compiled target if there is one). Check for a ++ // compiled target. If there is one, we need to patch the caller's call. ++ // However we will run interpreted if we come thru here. The next pass ++ // thru the call site will run compiled. If we ran compiled here then ++ // we can (theorectically) do endless i2c->c2i->i2c transitions during ++ // deopt/uncommon trap cycles. If we always go interpreted here then ++ // we can have at most one and don't need to play any tricks to keep ++ // from endlessly growing the stack. ++ // ++ // Actually if we detected that we had an i2c->c2i transition here we ++ // ought to be able to reset the world back to the state of the interpreted ++ // call and not bother building another interpreter arg area. We don't ++ // do that at this point. ++ ++ patch_callers_callsite(masm); ++ __ bind(skip_fixup); ++ ++#ifdef COMPILER2 ++ __ empty_FPU_stack(); ++#endif ++ //this is for native ? ++ // Since all args are passed on the stack, total_args_passed * interpreter_ ++ // stack_element_size is the ++ // space we need. ++ int extraspace = total_args_passed * Interpreter::stackElementSize; ++ ++ // stack is aligned, keep it that way ++ extraspace = round_to(extraspace, 2*wordSize); ++ ++ // Get return address ++ __ move(T5, RA); ++ // set senderSP value ++ //refer to interpreter_loongarch.cpp:generate_asm_entry ++ __ move(Rsender, SP); ++ __ addi_d(SP, SP, -extraspace); ++ ++ // Now write the args into the outgoing interpreter space ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // st_off points to lowest address on stack. ++ int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize; ++ // Say 4 args: ++ // i st_off ++ // 0 12 T_LONG ++ // 1 8 T_VOID ++ // 2 4 T_OBJECT ++ // 3 0 T_BOOL ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // memory to memory use fpu stack top ++ int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; ++ if (!r_2->is_valid()) { ++ __ ld_ptr(AT, Address(SP, ld_off)); ++ __ st_ptr(AT, Address(SP, st_off)); ++ ++ } else { ++ ++ ++ int next_off = st_off - Interpreter::stackElementSize; ++ __ ld_ptr(AT, Address(SP, ld_off)); ++ __ st_ptr(AT, Address(SP, st_off)); ++ ++ // Ref to is_Register condition ++ if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ++ __ st_ptr(AT, SP, st_off - 8); ++ } ++ } else if (r_1->is_Register()) { ++ Register r = r_1->as_Register(); ++ if (!r_2->is_valid()) { ++ __ st_d(r, SP, st_off); ++ } else { ++ //FIXME, LA will not enter here ++ // long/double in gpr ++ __ st_d(r, SP, st_off); ++ // In [java/util/zip/ZipFile.java] ++ // ++ // private static native long open(String name, int mode, long lastModified); ++ // private static native int getTotal(long jzfile); ++ // ++ // We need to transfer T_LONG paramenters from a compiled method to a native method. ++ // It's a complex process: ++ // ++ // Caller -> lir_static_call -> gen_resolve_stub ++ // -> -- resolve_static_call_C ++ // `- gen_c2i_adapter() [*] ++ // | ++ // `- AdapterHandlerLibrary::get_create_apapter_index ++ // -> generate_native_entry ++ // -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**] ++ // ++ // In [**], T_Long parameter is stored in stack as: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // However, the sequence is reversed here: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry(). ++ // ++ if (sig_bt[i] == T_LONG) ++ __ st_d(r, SP, st_off - 8); ++ } ++ } else if (r_1->is_FloatRegister()) { ++ assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); ++ ++ FloatRegister fr = r_1->as_FloatRegister(); ++ if (sig_bt[i] == T_FLOAT) ++ __ fst_s(fr, SP, st_off); ++ else { ++ __ fst_d(fr, SP, st_off); ++ __ fst_d(fr, SP, st_off - 8); // T_DOUBLE needs two slots ++ } ++ } ++ } ++ ++ // Schedule the branch target address early. ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) ); ++ // And repush original return address ++ __ move(RA, T5); ++ __ jr (AT); ++} ++ ++static void gen_i2c_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs) { ++ ++ // Generate an I2C adapter: adjust the I-frame to make space for the C-frame ++ // layout. Lesp was saved by the calling I-frame and will be restored on ++ // return. Meanwhile, outgoing arg space is all owned by the callee ++ // C-frame, so we can mangle it at will. After adjusting the frame size, ++ // hoist register arguments and repack other args according to the compiled ++ // code convention. Finally, end in a jump to the compiled code. The entry ++ // point address is the start of the buffer. ++ ++ // We will only enter here from an interpreted frame and never from after ++ // passing thru a c2i. Azul allowed this but we do not. If we lose the ++ // race and use a c2i we will remain interpreted for the race loser(s). ++ // This removes all sorts of headaches on the LA side and also eliminates ++ // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. ++ ++ __ move(T4, SP); ++ ++ // Cut-out for having no stack args. Since up to 2 int/oop args are passed ++ // in registers, we will occasionally have no stack args. ++ int comp_words_on_stack = 0; ++ if (comp_args_on_stack) { ++ // Sig words on the stack are greater-than VMRegImpl::stack0. Those in ++ // registers are below. By subtracting stack0, we either get a negative ++ // number (all values in registers) or the maximum stack slot accessed. ++ // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg); ++ // Convert 4-byte stack slots to words. ++ // did LA need round? FIXME ++ comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord; ++ // Round up to miminum stack alignment, in wordSize ++ comp_words_on_stack = round_to(comp_words_on_stack, 2); ++ __ addi_d(SP, SP, -comp_words_on_stack * wordSize); ++ } ++ ++ // Align the outgoing SP ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ // push the return address on the stack (note that pushing, rather ++ // than storing it, yields the correct frame alignment for the callee) ++ // Put saved SP in another register ++ const Register saved_sp = T5; ++ __ move(saved_sp, T4); ++ ++ ++ // Will jump to the compiled code just as if compiled code was doing it. ++ // Pre-load the register-jump target early, to schedule it better. ++ __ ld_d(T4, Rmethod, in_bytes(Method::from_compiled_offset())); ++ ++ // Now generate the shuffle code. Pick up all register args and move the ++ // rest through the floating point stack top. ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ // Longs and doubles are passed in native word order, but misaligned ++ // in the 32-bit build. ++ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // Pick up 0, 1 or 2 words from SP+offset. ++ ++ assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); ++ // Load in argument order going down. ++ int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize; ++ // Point to interpreter value (vs. tag) ++ int next_off = ld_off - Interpreter::stackElementSize; ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // Convert stack slot to an SP offset (+ wordSize to ++ // account for return address ) ++ // NOTICE HERE!!!! I sub a wordSize here ++ int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; ++ //+ wordSize; ++ ++ if (!r_2->is_valid()) { ++ __ ld_d(AT, saved_sp, ld_off); ++ __ st_d(AT, SP, st_off); ++ } else { ++ // Interpreter local[n] == MSW, local[n+1] == LSW however locals ++ // are accessed as negative so LSW is at LOW address ++ ++ // ld_off is MSW so get LSW ++ // st_off is LSW (i.e. reg.first()) ++ ++ // [./org/eclipse/swt/graphics/GC.java] ++ // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, ++ // int destX, int destY, int destWidth, int destHeight, ++ // boolean simple, ++ // int imgWidth, int imgHeight, ++ // long maskPixmap, <-- Pass T_LONG in stack ++ // int maskType); ++ // Before this modification, Eclipse displays icons with solid black background. ++ // ++ __ ld_d(AT, saved_sp, ld_off); ++ if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ++ __ ld_d(AT, saved_sp, ld_off - 8); ++ __ st_d(AT, SP, st_off); ++ } ++ } else if (r_1->is_Register()) { // Register argument ++ Register r = r_1->as_Register(); ++ if (r_2->is_valid()) { ++ // Remember r_1 is low address (and LSB on LA) ++ // So r_2 gets loaded from high address regardless of the platform ++ assert(r_2->as_Register() == r_1->as_Register(), ""); ++ __ ld_d(r, saved_sp, ld_off); ++ ++ // ++ // For T_LONG type, the real layout is as below: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // We should load the low-8 bytes. ++ // ++ if (sig_bt[i] == T_LONG) ++ __ ld_d(r, saved_sp, ld_off - 8); ++ } else { ++ __ ld_w(r, saved_sp, ld_off); ++ } ++ } else if (r_1->is_FloatRegister()) { // Float Register ++ assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); ++ ++ FloatRegister fr = r_1->as_FloatRegister(); ++ if (sig_bt[i] == T_FLOAT) ++ __ fld_s(fr, saved_sp, ld_off); ++ else { ++ __ fld_d(fr, saved_sp, ld_off); ++ __ fld_d(fr, saved_sp, ld_off - 8); ++ } ++ } ++ } ++ ++ // 6243940 We might end up in handle_wrong_method if ++ // the callee is deoptimized as we race thru here. If that ++ // happens we don't want to take a safepoint because the ++ // caller frame will look interpreted and arguments are now ++ // "compiled" so it is much better to make this transition ++ // invisible to the stack walking code. Unfortunately if ++ // we try and find the callee by normal means a safepoint ++ // is possible. So we stash the desired callee in the thread ++ // and the vm will find there should this case occur. ++ __ get_thread(T8); ++ __ st_d(Rmethod, T8, in_bytes(JavaThread::callee_target_offset())); ++ ++ // move methodOop to T5 in case we end up in an c2i adapter. ++ // the c2i adapters expect methodOop in T5 (c2) because c2's ++ // resolve stubs return the result (the method) in T5. ++ // I'd love to fix this. ++ __ move(T5, Rmethod); ++ __ jr(T4); ++} ++ ++// --------------------------------------------------------------- ++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ AdapterFingerPrint* fingerprint) { ++ address i2c_entry = __ pc(); ++ ++ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); ++ ++ // ------------------------------------------------------------------------- ++ // Generate a C2I adapter. On entry we know G5 holds the methodOop. The ++ // args start out packed in the compiled layout. They need to be unpacked ++ // into the interpreter layout. This will almost always require some stack ++ // space. We grow the current (compiled) stack, then repack the args. We ++ // finally end in a jump to the generic interpreter entry point. On exit ++ // from the interpreter, the interpreter will restore our SP (lest the ++ // compiled code, which relys solely on SP and not FP, get sick). ++ ++ address c2i_unverified_entry = __ pc(); ++ Label skip_fixup; ++ { ++ Register holder = T1; ++ Register receiver = T0; ++ Register temp = T8; ++ address ic_miss = SharedRuntime::get_ic_miss_stub(); ++ ++ Label missed; ++ ++ __ verify_oop(holder); ++ //add for compressedoops ++ __ load_klass(temp, receiver); ++ __ verify_oop(temp); ++ ++ __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); ++ __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset()); ++ __ bne(AT, temp, missed); ++ // Method might have been compiled since the call site was patched to ++ // interpreted if that is the case treat it as a miss so we can get ++ // the call site corrected. ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); ++ __ beq(AT, R0, skip_fixup); ++ __ bind(missed); ++ ++ __ jmp(ic_miss, relocInfo::runtime_call_type); ++ } ++ address c2i_entry = __ pc(); ++ ++ gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); ++ ++ __ flush(); ++ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); ++} ++ ++int SharedRuntime::c_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ VMRegPair *regs2, ++ int total_args_passed) { ++ assert(regs2 == NULL, "not needed on LA"); ++ // Return the number of VMReg stack_slots needed for the args. ++ // This value does not include an abi space (like register window ++ // save area). ++ ++ // We return the amount of VMReg stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. Since we always ++ // have space for storing at least 6 registers to memory we start with that. ++ // See int_stk_helper for a further discussion. ++ // We return the amount of VMRegImpl stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. ++ static const Register INT_ArgReg[Argument::n_register_parameters] = { ++ A0, A1, A2, A3, A4, A5, A6, A7 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { ++ FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7 ++ }; ++ uint int_args = 0; ++ uint fp_args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++// Example: ++// n java.lang.UNIXProcess::forkAndExec ++// private native int forkAndExec(byte[] prog, ++// byte[] argBlock, int argc, ++// byte[] envBlock, int envc, ++// byte[] dir, ++// boolean redirectErrorStream, ++// FileDescriptor stdin_fd, ++// FileDescriptor stdout_fd, ++// FileDescriptor stderr_fd) ++// JNIEXPORT jint JNICALL ++// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env, ++// jobject process, ++// jbyteArray prog, ++// jbyteArray argBlock, jint argc, ++// jbyteArray envBlock, jint envc, ++// jbyteArray dir, ++// jboolean redirectErrorStream, ++// jobject stdin_fd, ++// jobject stdout_fd, ++// jobject stderr_fd) ++// ++// ::c_calling_convention ++// 0: // env <-- a0 ++// 1: L // klass/obj <-- t0 => a1 ++// 2: [ // prog[] <-- a0 => a2 ++// 3: [ // argBlock[] <-- a1 => a3 ++// 4: I // argc <-- a2 => a4 ++// 5: [ // envBlock[] <-- a3 => a5 ++// 6: I // envc <-- a4 => a5 ++// 7: [ // dir[] <-- a5 => a7 ++// 8: Z // redirectErrorStream <-- a6 => sp[0] ++// 9: L // stdin <-- a7 => sp[8] ++// 10: L // stdout fp[16] => sp[16] ++// 11: L // stderr fp[24] => sp[24] ++// ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_VOID: // Halves of longs and doubles ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_BOOLEAN: ++ case T_CHAR: ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ if (int_args < Argument::n_register_parameters) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ // fall through ++ case T_OBJECT: ++ case T_ARRAY: ++ case T_ADDRESS: ++ case T_METADATA: ++ if (int_args < Argument::n_register_parameters) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (fp_args < Argument::n_float_register_parameters) { ++ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else if (int_args < Argument::n_register_parameters) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (fp_args < Argument::n_float_register_parameters) { ++ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else if (int_args < Argument::n_register_parameters) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++ ++ return round_to(stk_args, 2); ++} ++ ++// --------------------------------------------------------------------------- ++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ fst_s(FSF, FP, -wordSize); ++ break; ++ case T_DOUBLE: ++ __ fst_d(FSF, FP, -wordSize ); ++ break; ++ case T_VOID: break; ++ case T_LONG: ++ __ st_d(V0, FP, -wordSize); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ __ st_d(V0, FP, -wordSize); ++ break; ++ default: { ++ __ st_w(V0, FP, -wordSize); ++ } ++ } ++} ++ ++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ fld_s(FSF, FP, -wordSize); ++ break; ++ case T_DOUBLE: ++ __ fld_d(FSF, FP, -wordSize ); ++ break; ++ case T_LONG: ++ __ ld_d(V0, FP, -wordSize); ++ break; ++ case T_VOID: break; ++ case T_OBJECT: ++ case T_ARRAY: ++ __ ld_d(V0, FP, -wordSize); ++ break; ++ default: { ++ __ ld_w(V0, FP, -wordSize); ++ } ++ } ++} ++ ++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ for ( int i = first_arg ; i < arg_count ; i++ ) { ++ if (args[i].first()->is_Register()) { ++ __ push(args[i].first()->as_Register()); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ push(args[i].first()->as_FloatRegister()); ++ } ++ } ++} ++ ++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { ++ if (args[i].first()->is_Register()) { ++ __ pop(args[i].first()->as_Register()); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ pop(args[i].first()->as_FloatRegister()); ++ } ++ } ++} ++ ++// A simple move of integer like type ++static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ // stack to stack ++ __ ld_w(AT, FP, reg2offset_in(src.first())); ++ __ st_d(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ // stack to reg ++ __ ld_w(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else if (dst.first()->is_stack()) { ++ // reg to stack ++ __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first())); ++ } else { ++ if (dst.first() != src.first()){ ++ __ move(dst.first()->as_Register(), src.first()->as_Register()); ++ } ++ } ++} ++ ++// An oop arg. Must pass a handle not the oop itself ++static void object_move(MacroAssembler* masm, ++ OopMap* map, ++ int oop_handle_offset, ++ int framesize_in_slots, ++ VMRegPair src, ++ VMRegPair dst, ++ bool is_receiver, ++ int* receiver_offset) { ++ ++ // must pass a handle. First figure out the location we use as a handle ++ ++ if (src.first()->is_stack()) { ++ // Oop is already on the stack as an argument ++ Register rHandle = T5; ++ Label nil; ++ __ xorr(rHandle, rHandle, rHandle); ++ __ ld_d(AT, FP, reg2offset_in(src.first())); ++ __ beq(AT, R0, nil); ++ __ lea(rHandle, Address(FP, reg2offset_in(src.first()))); ++ __ bind(nil); ++ if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first())); ++ else __ move( (dst.first())->as_Register(), rHandle); ++ ++ int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); ++ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); ++ if (is_receiver) { ++ *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; ++ } ++ } else { ++ // Oop is in an a register we must store it to the space we reserve ++ // on the stack for oop_handles ++ const Register rOop = src.first()->as_Register(); ++ assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register"); ++ const Register rHandle = T5; ++ //Important: refer to java_calling_convertion ++ int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset; ++ int offset = oop_slot*VMRegImpl::stack_slot_size; ++ Label skip; ++ __ st_d( rOop , SP, offset ); ++ map->set_oop(VMRegImpl::stack2reg(oop_slot)); ++ __ xorr( rHandle, rHandle, rHandle); ++ __ beq(rOop, R0, skip); ++ __ lea(rHandle, Address(SP, offset)); ++ __ bind(skip); ++ // Store the handle parameter ++ if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first())); ++ else __ move((dst.first())->as_Register(), rHandle); ++ ++ if (is_receiver) { ++ *receiver_offset = offset; ++ } ++ } ++} ++ ++// A float arg may have to do float reg int reg conversion ++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); ++ if (src.first()->is_stack()) { ++ // stack to stack/reg ++ if (dst.first()->is_stack()) { ++ __ ld_w(AT, FP, reg2offset_in(src.first())); ++ __ st_w(AT, SP, reg2offset_out(dst.first())); ++ } else if (dst.first()->is_FloatRegister()) { ++ __ fld_s(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); ++ } else { ++ __ ld_w(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else { ++ // reg to stack/reg ++ if(dst.first()->is_stack()) { ++ __ fst_s(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); ++ } else if (dst.first()->is_FloatRegister()) { ++ __ fmov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } else { ++ __ movfr2gr_s(dst.first()->as_Register(), src.first()->as_FloatRegister()); ++ } ++ } ++} ++ ++// A long move ++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ ++ // The only legal possibility for a long_move VMRegPair is: ++ // 1: two stack slots (possibly unaligned) ++ // as neither the java or C calling convention will use registers ++ // for longs. ++ if (src.first()->is_stack()) { ++ assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack"); ++ if( dst.first()->is_stack()){ ++ __ ld_d(AT, FP, reg2offset_in(src.first())); ++ __ st_d(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else { ++ if( dst.first()->is_stack()){ ++ __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first())); ++ } else { ++ __ move(dst.first()->as_Register(), src.first()->as_Register()); ++ } ++ } ++} ++ ++// A double move ++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ ++ // The only legal possibilities for a double_move VMRegPair are: ++ // The painful thing here is that like long_move a VMRegPair might be ++ ++ // Because of the calling convention we know that src is either ++ // 1: a single physical register (xmm registers only) ++ // 2: two stack slots (possibly unaligned) ++ // dst can only be a pair of stack slots. ++ ++ if (src.first()->is_stack()) { ++ // source is all stack ++ if( dst.first()->is_stack()){ ++ __ ld_d(AT, FP, reg2offset_in(src.first())); ++ __ st_d(AT, SP, reg2offset_out(dst.first())); ++ } else if (dst.first()->is_FloatRegister()) { ++ __ fld_d(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); ++ } else { ++ __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else { ++ // reg to stack/reg ++ // No worries about stack alignment ++ if( dst.first()->is_stack()){ ++ __ fst_d(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); ++ } else if (dst.first()->is_FloatRegister()) { ++ __ fmov_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } else { ++ __ movfr2gr_d(dst.first()->as_Register(), src.first()->as_FloatRegister()); ++ } ++ } ++} ++ ++static void verify_oop_args(MacroAssembler* masm, ++ methodHandle method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ Register temp_reg = T4; // not part of any compiled calling seq ++ if (VerifyOops) { ++ for (int i = 0; i < method->size_of_parameters(); i++) { ++ if (sig_bt[i] == T_OBJECT || ++ sig_bt[i] == T_ARRAY) { ++ VMReg r = regs[i].first(); ++ assert(r->is_valid(), "bad oop arg"); ++ if (r->is_stack()) { ++ __ ld_d(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); ++ __ verify_oop(temp_reg); ++ } else { ++ __ verify_oop(r->as_Register()); ++ } ++ } ++ } ++ } ++} ++ ++static void gen_special_dispatch(MacroAssembler* masm, ++ methodHandle method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ verify_oop_args(masm, method, sig_bt, regs); ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ ++ // Now write the args into the outgoing interpreter space ++ bool has_receiver = false; ++ Register receiver_reg = noreg; ++ int member_arg_pos = -1; ++ Register member_reg = noreg; ++ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); ++ if (ref_kind != 0) { ++ member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument ++ member_reg = S3; // known to be free at this point ++ has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); ++ } else if (iid == vmIntrinsics::_invokeBasic) { ++ has_receiver = true; ++ } else { ++ fatal(err_msg_res("unexpected intrinsic id %d", iid)); ++ } ++ ++ if (member_reg != noreg) { ++ // Load the member_arg into register, if necessary. ++ SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); ++ VMReg r = regs[member_arg_pos].first(); ++ if (r->is_stack()) { ++ __ ld_d(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ member_reg = r->as_Register(); ++ } ++ } ++ ++ if (has_receiver) { ++ // Make sure the receiver is loaded into a register. ++ assert(method->size_of_parameters() > 0, "oob"); ++ assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); ++ VMReg r = regs[0].first(); ++ assert(r->is_valid(), "bad receiver arg"); ++ if (r->is_stack()) { ++ // Porting note: This assumes that compiled calling conventions always ++ // pass the receiver oop in a register. If this is not true on some ++ // platform, pick a temp and load the receiver from stack. ++ fatal("receiver always in a register"); ++ receiver_reg = SSR; // known to be free at this point ++ __ ld_d(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ receiver_reg = r->as_Register(); ++ } ++ } ++ ++ // Figure out which address we are really jumping to: ++ MethodHandles::generate_method_handle_dispatch(masm, iid, ++ receiver_reg, member_reg, /*for_compiler_entry:*/ true); ++} ++ ++// --------------------------------------------------------------------------- ++// Generate a native wrapper for a given method. The method takes arguments ++// in the Java compiled code convention, marshals them to the native ++// convention (handlizes oops, etc), transitions to native, makes the call, ++// returns to java state (possibly blocking), unhandlizes any result and ++// returns. ++nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, ++ methodHandle method, ++ int compile_id, ++ BasicType* in_sig_bt, ++ VMRegPair* in_regs, ++ BasicType ret_type) { ++ if (method->is_method_handle_intrinsic()) { ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ intptr_t start = (intptr_t)__ pc(); ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ gen_special_dispatch(masm, ++ method, ++ in_sig_bt, ++ in_regs); ++ assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord, ++ "valid size for make_non_entrant"); ++ int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period ++ __ flush(); ++ int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually ++ return nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ in_ByteSize(-1), ++ in_ByteSize(-1), ++ (OopMapSet*)NULL); ++ } ++ ++ bool is_critical_native = true; ++ address native_func = method->critical_native_function(); ++ if (native_func == NULL) { ++ native_func = method->native_function(); ++ is_critical_native = false; ++ } ++ assert(native_func != NULL, "must have function"); ++ ++ // Native nmethod wrappers never take possesion of the oop arguments. ++ // So the caller will gc the arguments. The only thing we need an ++ // oopMap for is if the call is static ++ // ++ // An OopMap for lock (and class if static), and one for the VM call itself ++ OopMapSet *oop_maps = new OopMapSet(); ++ ++ // We have received a description of where all the java arg are located ++ // on entry to the wrapper. We need to convert these args to where ++ // the jni function will expect them. To figure out where they go ++ // we convert the java signature to a C signature by inserting ++ // the hidden arguments as arg[0] and possibly arg[1] (static method) ++ ++ const int total_in_args = method->size_of_parameters(); ++ int total_c_args = total_in_args; ++ if (!is_critical_native) { ++ total_c_args += 1; ++ if (method->is_static()) { ++ total_c_args++; ++ } ++ } else { ++ for (int i = 0; i < total_in_args; i++) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ total_c_args++; ++ } ++ } ++ } ++ ++ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); ++ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); ++ BasicType* in_elem_bt = NULL; ++ ++ int argc = 0; ++ if (!is_critical_native) { ++ out_sig_bt[argc++] = T_ADDRESS; ++ if (method->is_static()) { ++ out_sig_bt[argc++] = T_OBJECT; ++ } ++ ++ for (int i = 0; i < total_in_args ; i++ ) { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ } ++ } else { ++ Thread* THREAD = Thread::current(); ++ in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); ++ SignatureStream ss(method->signature()); ++ for (int i = 0; i < total_in_args ; i++ ) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ // Arrays are passed as int, elem* pair ++ out_sig_bt[argc++] = T_INT; ++ out_sig_bt[argc++] = T_ADDRESS; ++ Symbol* atype = ss.as_symbol(CHECK_NULL); ++ const char* at = atype->as_C_string(); ++ if (strlen(at) == 2) { ++ assert(at[0] == '[', "must be"); ++ switch (at[1]) { ++ case 'B': in_elem_bt[i] = T_BYTE; break; ++ case 'C': in_elem_bt[i] = T_CHAR; break; ++ case 'D': in_elem_bt[i] = T_DOUBLE; break; ++ case 'F': in_elem_bt[i] = T_FLOAT; break; ++ case 'I': in_elem_bt[i] = T_INT; break; ++ case 'J': in_elem_bt[i] = T_LONG; break; ++ case 'S': in_elem_bt[i] = T_SHORT; break; ++ case 'Z': in_elem_bt[i] = T_BOOLEAN; break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ } else { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ in_elem_bt[i] = T_VOID; ++ } ++ if (in_sig_bt[i] != T_VOID) { ++ assert(in_sig_bt[i] == ss.type(), "must match"); ++ ss.next(); ++ } ++ } ++ } ++ ++ // Now figure out where the args must be stored and how much stack space ++ // they require (neglecting out_preserve_stack_slots but space for storing ++ // the 1st six register arguments). It's weird see int_stk_helper. ++ // ++ int out_arg_slots; ++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); ++ ++ // Compute framesize for the wrapper. We need to handlize all oops in ++ // registers. We must create space for them here that is disjoint from ++ // the windowed save area because we have no control over when we might ++ // flush the window again and overwrite values that gc has since modified. ++ // (The live window race) ++ // ++ // We always just allocate 6 word for storing down these object. This allow ++ // us to simply record the base and use the Ireg number to decide which ++ // slot to use. (Note that the reg number is the inbound number not the ++ // outbound number). ++ // We must shuffle args to match the native convention, and include var-args space. ++ ++ // Calculate the total number of stack slots we will need. ++ ++ // First count the abi requirement plus all of the outgoing args ++ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; ++ ++ // Now the space for the inbound oop handle area ++ int total_save_slots = 9 * VMRegImpl::slots_per_word; // 9 arguments passed in registers ++ if (is_critical_native) { ++ // Critical natives may have to call out so they need a save area ++ // for register arguments. ++ int double_slots = 0; ++ int single_slots = 0; ++ for ( int i = 0; i < total_in_args; i++) { ++ if (in_regs[i].first()->is_Register()) { ++ const Register reg = in_regs[i].first()->as_Register(); ++ switch (in_sig_bt[i]) { ++ case T_BOOLEAN: ++ case T_BYTE: ++ case T_SHORT: ++ case T_CHAR: ++ case T_INT: single_slots++; break; ++ case T_ARRAY: ++ case T_LONG: double_slots++; break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ switch (in_sig_bt[i]) { ++ case T_FLOAT: single_slots++; break; ++ case T_DOUBLE: double_slots++; break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ } ++ total_save_slots = double_slots * 2 + single_slots; ++ // align the save area ++ if (double_slots != 0) { ++ stack_slots = round_to(stack_slots, 2); ++ } ++ } ++ ++ int oop_handle_offset = stack_slots; ++ stack_slots += total_save_slots; ++ ++ // Now any space we need for handlizing a klass if static method ++ ++ int klass_slot_offset = 0; ++ int klass_offset = -1; ++ int lock_slot_offset = 0; ++ bool is_static = false; ++ ++ if (method->is_static()) { ++ klass_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; ++ is_static = true; ++ } ++ ++ // Plus a lock if needed ++ ++ if (method->is_synchronized()) { ++ lock_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ } ++ ++ // Now a place to save return value or as a temporary for any gpr -> fpr moves ++ // + 2 for return address (which we own) and saved fp ++ stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7) ++ ++ // Ok The space we have allocated will look like: ++ // ++ // ++ // FP-> | | ++ // |---------------------| ++ // | 2 slots for moves | ++ // |---------------------| ++ // | lock box (if sync) | ++ // |---------------------| <- lock_slot_offset ++ // | klass (if static) | ++ // |---------------------| <- klass_slot_offset ++ // | oopHandle area | ++ // |---------------------| <- oop_handle_offset ++ // | outbound memory | ++ // | based arguments | ++ // | | ++ // |---------------------| ++ // | vararg area | ++ // |---------------------| ++ // | | ++ // SP-> | out_preserved_slots | ++ // ++ // ++ ++ ++ // Now compute actual number of stack words we need rounding to make ++ // stack properly aligned. ++ stack_slots = round_to(stack_slots, StackAlignmentInSlots); ++ ++ int stack_size = stack_slots * VMRegImpl::stack_slot_size; ++ ++ intptr_t start = (intptr_t)__ pc(); ++ ++ ++ ++ // First thing make an ic check to see if we should even be here ++ address ic_miss = SharedRuntime::get_ic_miss_stub(); ++ ++ // We are free to use all registers as temps without saving them and ++ // restoring them except fp. fp is the only callee save register ++ // as far as the interpreter and the compiler(s) are concerned. ++ ++ //refer to register_loongarch.hpp:IC_Klass ++ const Register ic_reg = T1; ++ const Register receiver = T0; ++ ++ Label hit; ++ Label exception_pending; ++ ++ __ verify_oop(receiver); ++ //add for compressedoops ++ __ load_klass(T4, receiver); ++ __ beq(T4, ic_reg, hit); ++ __ jmp(ic_miss, relocInfo::runtime_call_type); ++ __ bind(hit); ++ ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ ++ // Generate stack overflow check ++ if (UseStackBanging) { ++ __ bang_stack_with_offset(StackShadowPages*os::vm_page_size()); ++ } ++ ++ // The instruction at the verified entry point must be 4 bytes or longer ++ // because it can be patched on the fly by make_non_entrant. ++ if (((intptr_t)__ pc() - start - vep_offset) < 1 * BytesPerInstWord) { ++ __ nop(); ++ } ++ ++ // Generate a new frame for the wrapper. ++ // do LA need this ? ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ ++ __ enter(); ++ // -2 because return address is already present and so is saved fp ++ __ addi_d(SP, SP, -1 * (stack_size - 2*wordSize)); ++ ++ // Frame is now completed as far a size and linkage. ++ ++ int frame_complete = ((intptr_t)__ pc()) - start; ++ ++ // Calculate the difference between sp and fp. We need to know it ++ // after the native call because on windows Java Natives will pop ++ // the arguments and it is painful to do sp relative addressing ++ // in a platform independent way. So after the call we switch to ++ // fp relative addressing. ++ //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change ++ //the SP ++ int fp_adjustment = stack_size - 2*wordSize; ++ ++#ifdef COMPILER2 ++ // C2 may leave the stack dirty if not in SSE2+ mode ++ __ empty_FPU_stack(); ++#endif ++ ++ // Compute the fp offset for any slots used after the jni call ++ ++ int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; ++ // We use TREG as a thread pointer because it is callee save and ++ // if we load it once it is usable thru the entire wrapper ++ const Register thread = TREG; ++ ++ // We use S4 as the oop handle for the receiver/klass ++ // It is callee save so it survives the call to native ++ ++ const Register oop_handle_reg = S4; ++ if (is_critical_native) { ++ Unimplemented(); ++ // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, ++ // oop_handle_offset, oop_maps, in_regs, in_sig_bt); ++ } ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ // ++ // We immediately shuffle the arguments so that any vm call we have to ++ // make from here on out (sync slow path, jvmpi, etc.) we will have ++ // captured the oops from our caller and have a valid oopMap for ++ // them. ++ ++ // ----------------- ++ // The Grand Shuffle ++ // ++ // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* ++ // and, if static, the class mirror instead of a receiver. This pretty much ++ // guarantees that register layout will not match (and LA doesn't use reg ++ // parms though amd does). Since the native abi doesn't use register args ++ // and the java conventions does we don't have to worry about collisions. ++ // All of our moved are reg->stack or stack->stack. ++ // We ignore the extra arguments during the shuffle and handle them at the ++ // last moment. The shuffle is described by the two calling convention ++ // vectors we have in our possession. We simply walk the java vector to ++ // get the source locations and the c vector to get the destinations. ++ ++ int c_arg = method->is_static() ? 2 : 1 ; ++ ++ // Record sp-based slot for receiver on stack for non-static methods ++ int receiver_offset = -1; ++ ++ // This is a trick. We double the stack slots so we can claim ++ // the oops in the caller's frame. Since we are sure to have ++ // more args than the caller doubling is enough to make ++ // sure we can capture all the incoming oop args from the ++ // caller. ++ // ++ OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); ++ ++ // Mark location of fp (someday) ++ // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp)); ++ ++#ifdef ASSERT ++ bool reg_destroyed[RegisterImpl::number_of_registers]; ++ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; ++ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { ++ reg_destroyed[r] = false; ++ } ++ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { ++ freg_destroyed[f] = false; ++ } ++ ++#endif /* ASSERT */ ++ ++ // This may iterate in two different directions depending on the ++ // kind of native it is. The reason is that for regular JNI natives ++ // the incoming and outgoing registers are offset upwards and for ++ // critical natives they are offset down. ++ GrowableArray arg_order(2 * total_in_args); ++ VMRegPair tmp_vmreg; ++ tmp_vmreg.set2(T8->as_VMReg()); ++ ++ if (!is_critical_native) { ++ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { ++ arg_order.push(i); ++ arg_order.push(c_arg); ++ } ++ } else { ++ // Compute a valid move order, using tmp_vmreg to break any cycles ++ Unimplemented(); ++ // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); ++ } ++ ++ int temploc = -1; ++ for (int ai = 0; ai < arg_order.length(); ai += 2) { ++ int i = arg_order.at(ai); ++ int c_arg = arg_order.at(ai + 1); ++ __ block_comment(err_msg("move %d -> %d", i, c_arg)); ++ if (c_arg == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // This arg needs to be moved to a temporary ++ __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); ++ in_regs[i] = tmp_vmreg; ++ temploc = i; ++ continue; ++ } else if (i == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // Read from the temporary location ++ assert(temploc != -1, "must be valid"); ++ i = temploc; ++ temploc = -1; ++ } ++#ifdef ASSERT ++ if (in_regs[i].first()->is_Register()) { ++ assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); ++ } ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif /* ASSERT */ ++ switch (in_sig_bt[i]) { ++ case T_ARRAY: ++ if (is_critical_native) { ++ Unimplemented(); ++ // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); ++ c_arg++; ++#ifdef ASSERT ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif ++ break; ++ } ++ case T_OBJECT: ++ assert(!is_critical_native, "no oop arguments"); ++ object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], ++ ((i == 0) && (!is_static)), ++ &receiver_offset); ++ break; ++ case T_VOID: ++ break; ++ ++ case T_FLOAT: ++ float_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_DOUBLE: ++ assert( i + 1 < total_in_args && ++ in_sig_bt[i + 1] == T_VOID && ++ out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); ++ double_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_LONG : ++ long_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); ++ ++ default: ++ simple_move32(masm, in_regs[i], out_regs[c_arg]); ++ } ++ } ++ ++ // point c_arg at the first arg that is already loaded in case we ++ // need to spill before we call out ++ c_arg = total_c_args - total_in_args; ++ // Pre-load a static method's oop. Used both by locking code and ++ // the normal JNI call code. ++ ++ __ move(oop_handle_reg, A1); ++ ++ if (method->is_static() && !is_critical_native) { ++ ++ // load opp into a register ++ int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local( ++ (method->method_holder())->java_mirror())); ++ ++ ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ __ relocate(rspec); ++ __ patchable_li52(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror())); ++ // Now handlize the static class mirror it's known not-null. ++ __ st_d( oop_handle_reg, SP, klass_offset); ++ map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); ++ ++ // Now get the handle ++ __ lea(oop_handle_reg, Address(SP, klass_offset)); ++ // store the klass handle as second argument ++ __ move(A1, oop_handle_reg); ++ // and protect the arg if we must spill ++ c_arg--; ++ } ++ ++ // Change state to native (we save the return address in the thread, since it might not ++ // be pushed on the stack when we do a a stack traversal). It is enough that the pc() ++ // points into the right code segment. It does not have to be the correct return pc. ++ // We use the same pc/oopMap repeatedly when we call out ++ ++ Label native_return; ++ __ set_last_Java_frame(SP, noreg, native_return); ++ ++ // We have all of the arguments setup at this point. We must not touch any register ++ // argument registers at this point (what if we save/restore them there are no oop? ++ { ++ SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); ++ save_args(masm, total_c_args, c_arg, out_regs); ++ int metadata_index = __ oop_recorder()->find_index(method()); ++ RelocationHolder rspec = metadata_Relocation::spec(metadata_index); ++ __ relocate(rspec); ++ __ patchable_li52(AT, (long)(method())); ++ ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ thread, AT); ++ ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ } ++ ++ // These are register definitions we need for locking/unlocking ++ const Register swap_reg = T8; // Must use T8 for cmpxchg instruction ++ const Register obj_reg = T4; // Will contain the oop ++ //const Register lock_reg = T6; // Address of compiler lock object (BasicLock) ++ const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock) ++ ++ ++ ++ Label slow_path_lock; ++ Label lock_done; ++ ++ // Lock a synchronized method ++ if (method->is_synchronized()) { ++ assert(!is_critical_native, "unhandled"); ++ ++ const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); ++ ++ // Get the handle (the 2nd argument) ++ __ move(oop_handle_reg, A1); ++ ++ // Get address of the box ++ __ lea(lock_reg, Address(FP, lock_slot_fp_offset)); ++ ++ // Load the oop from the handle ++ __ ld_d(obj_reg, oop_handle_reg, 0); ++ ++ if (UseBiasedLocking) { ++ // Note that oop_handle_reg is trashed during this call ++ __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock); ++ } ++ ++ // Load immediate 1 into swap_reg %T8 ++ __ li(swap_reg, 1); ++ ++ __ ld_d(AT, obj_reg, 0); ++ __ orr(swap_reg, swap_reg, AT); ++ ++ __ st_d(swap_reg, lock_reg, mark_word_offset); ++ __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done); ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) sp <= mark < mark + os::pagesize() ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg ++ ++ __ sub_d(swap_reg, swap_reg, SP); ++ __ li(AT, 3 - os::vm_page_size()); ++ __ andr(swap_reg , swap_reg, AT); ++ // Save the test result, for recursive case, the result is zero ++ __ st_d(swap_reg, lock_reg, mark_word_offset); ++ __ bne(swap_reg, R0, slow_path_lock); ++ // Slow path will re-enter here ++ __ bind(lock_done); ++ ++ if (UseBiasedLocking) { ++ // Re-fetch oop_handle_reg as we trashed it above ++ __ move(A1, oop_handle_reg); ++ } ++ } ++ ++ ++ // Finally just about ready to make the JNI call ++ ++ ++ // get JNIEnv* which is first argument to native ++ if (!is_critical_native) { ++ __ addi_d(A0, thread, in_bytes(JavaThread::jni_environment_offset())); ++ } ++ ++ // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob) ++ // Load the second arguments into A1 ++ //__ ld(A1, SP , wordSize ); // klass ++ ++ // Now set thread in native ++ __ addi_d(AT, R0, _thread_in_native); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release ++ } ++ __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ // do the call ++ __ call(native_func, relocInfo::runtime_call_type); ++ __ bind(native_return); ++ ++ oop_maps->add_gc_map(((intptr_t)__ pc()) - start, map); ++ ++ // WARNING - on Windows Java Natives use pascal calling convention and pop the ++ // arguments off of the stack. We could just re-adjust the stack pointer here ++ // and continue to do SP relative addressing but we instead switch to FP ++ // relative addressing. ++ ++ // Unpack native results. ++ switch (ret_type) { ++ case T_BOOLEAN: __ c2bool(V0); break; ++ case T_CHAR : __ bstrpick_d(V0, V0, 15, 0); break; ++ case T_BYTE : __ sign_extend_byte (V0); break; ++ case T_SHORT : __ sign_extend_short(V0); break; ++ case T_INT : // nothing to do break; ++ case T_DOUBLE : ++ case T_FLOAT : ++ // Result is in st0 we'll save as needed ++ break; ++ case T_ARRAY: // Really a handle ++ case T_OBJECT: // Really a handle ++ break; // can't de-handlize until after safepoint check ++ case T_VOID: break; ++ case T_LONG: break; ++ default : ShouldNotReachHere(); ++ } ++ // Switch thread to "native transition" state before reading the synchronization state. ++ // This additional state is necessary because reading and testing the synchronization ++ // state is not atomic w.r.t. GC, as this scenario demonstrates: ++ // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. ++ // VM thread changes sync state to synchronizing and suspends threads for GC. ++ // Thread A is resumed to finish this native method, but doesn't block here since it ++ // didn't see any synchronization is progress, and escapes. ++ __ addi_d(AT, R0, _thread_in_native_trans); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release ++ } ++ __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ if(os::is_MP()) { ++ if (UseMembar) { ++ // Force this write out before the read below ++ __ membar(__ AnyAny); ++ } else { ++ // Write serialization page so VM thread can do a pseudo remote membar. ++ // We use the current thread pointer to calculate a thread specific ++ // offset to write to within the page. This minimizes bus traffic ++ // due to cache line collision. ++ __ serialize_memory(thread, T5); ++ } ++ } ++ ++ Label after_transition; ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { ++ Label Continue; ++ __ li(AT, SafepointSynchronize::address_of_state()); ++ __ ld_w(T5, AT, 0); ++ __ addi_d(AT, T5, -SafepointSynchronize::_not_synchronized); ++ Label L; ++ __ bne(AT, R0, L); ++ __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, Continue); ++ __ bind(L); ++ ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // ++ save_native_result(masm, ret_type, stack_slots); ++ __ move(A0, thread); ++ __ addi_d(SP, SP, -wordSize); ++ __ push(S2); ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ if (!is_critical_native) { ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type); ++ } else { ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type); ++ } ++ __ move(SP, S2); // use S2 as a sender SP holder ++ __ pop(S2); ++ __ addi_d(SP, SP, wordSize); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ // Restore any method result value ++ restore_native_result(masm, ret_type, stack_slots); ++ ++ if (is_critical_native) { ++ // The call above performed the transition to thread_in_Java so ++ // skip the transition logic below. ++ __ beq(R0, R0, after_transition); ++ } ++ ++ __ bind(Continue); ++ } ++ ++ // change thread state ++ __ addi_d(AT, R0, _thread_in_Java); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release ++ } ++ __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ bind(after_transition); ++ Label reguard; ++ Label reguard_done; ++ __ ld_w(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); ++ __ addi_d(AT, AT, -JavaThread::stack_guard_yellow_disabled); ++ __ beq(AT, R0, reguard); ++ // slow path reguard re-enters here ++ __ bind(reguard_done); ++ ++ // Handle possible exception (will unlock if necessary) ++ ++ // native result if any is live ++ ++ // Unlock ++ Label slow_path_unlock; ++ Label unlock_done; ++ if (method->is_synchronized()) { ++ ++ Label done; ++ ++ // Get locked oop from the handle we passed to jni ++ __ ld_d( obj_reg, oop_handle_reg, 0); ++ if (UseBiasedLocking) { ++ __ biased_locking_exit(obj_reg, T8, done); ++ ++ } ++ ++ // Simple recursive lock? ++ ++ __ ld_d(AT, FP, lock_slot_fp_offset); ++ __ beq(AT, R0, done); ++ // Must save FSF if if it is live now because cmpxchg must use it ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ ++ // get old displaced header ++ __ ld_d (T8, FP, lock_slot_fp_offset); ++ // get address of the stack lock ++ __ addi_d (c_rarg0, FP, lock_slot_fp_offset); ++ // Atomic swap old header if oop still contains the stack lock ++ __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock); ++ ++ // slow path re-enters here ++ __ bind(unlock_done); ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ ++ __ bind(done); ++ ++ } ++ { ++ SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); ++ // Tell dtrace about this method exit ++ save_native_result(masm, ret_type, stack_slots); ++ int metadata_index = __ oop_recorder()->find_index( (method())); ++ RelocationHolder rspec = metadata_Relocation::spec(metadata_index); ++ __ relocate(rspec); ++ __ patchable_li52(AT, (long)(method())); ++ ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ thread, AT); ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ ++ // We can finally stop using that last_Java_frame we setup ages ago ++ ++ __ reset_last_Java_frame(false); ++ ++ // Unpack oop result, e.g. JNIHandles::resolve value. ++ if (ret_type == T_OBJECT || ret_type == T_ARRAY) { ++ __ resolve_jobject(V0, thread, T4); ++ } ++ ++ if (!is_critical_native) { ++ // reset handle block ++ __ ld_d(AT, thread, in_bytes(JavaThread::active_handles_offset())); ++ __ st_w(R0, AT, JNIHandleBlock::top_offset_in_bytes()); ++ } ++ ++ if (!is_critical_native) { ++ // Any exception pending? ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, exception_pending); ++ } ++ // no exception, we're almost done ++ ++ // check that only result value is on FPU stack ++ __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit"); ++ ++ // Return ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ leave(); ++ ++ __ jr(RA); ++ // Unexpected paths are out of line and go here ++ // Slow path locking & unlocking ++ if (method->is_synchronized()) { ++ ++ // BEGIN Slow path lock ++ __ bind(slow_path_lock); ++ ++ // protect the args we've loaded ++ save_args(masm, total_c_args, c_arg, out_regs); ++ ++ // has last_Java_frame setup. No exceptions so do vanilla call not call_VM ++ // args are (oop obj, BasicLock* lock, JavaThread* thread) ++ ++ __ move(A0, obj_reg); ++ __ move(A1, lock_reg); ++ __ move(A2, thread); ++ __ addi_d(SP, SP, - 3*wordSize); ++ ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); ++ __ move(SP, S2); ++ __ addi_d(SP, SP, 3*wordSize); ++ ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ stop("no pending exception allowed on exit from monitorenter"); ++ __ bind(L); ++ } ++#endif ++ __ b(lock_done); ++ // END Slow path lock ++ ++ // BEGIN Slow path unlock ++ __ bind(slow_path_unlock); ++ ++ // Slow path unlock ++ ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ // Save pending exception around call to VM (which contains an EXCEPTION_MARK) ++ ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ push(AT); ++ __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset())); ++ ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ ++ // should be a peal ++ // +wordSize because of the push above ++ __ addi_d(A1, FP, lock_slot_fp_offset); ++ ++ __ move(A0, obj_reg); ++ __ addi_d(SP,SP, -2*wordSize); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), ++ relocInfo::runtime_call_type); ++ __ addi_d(SP, SP, 2*wordSize); ++ __ move(SP, S2); ++ //add for compressedoops ++ __ reinit_heapbase(); ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_d( AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); ++ __ bind(L); ++ } ++#endif /* ASSERT */ ++ ++ __ pop(AT); ++ __ st_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ __ b(unlock_done); ++ // END Slow path unlock ++ ++ } ++ ++ // SLOW PATH Reguard the stack if needed ++ ++ __ bind(reguard); ++ save_native_result(masm, ret_type, stack_slots); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), ++ relocInfo::runtime_call_type); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ restore_native_result(masm, ret_type, stack_slots); ++ __ b(reguard_done); ++ ++ // BEGIN EXCEPTION PROCESSING ++ if (!is_critical_native) { ++ // Forward the exception ++ __ bind(exception_pending); ++ ++ // remove possible return value from FPU register stack ++ __ empty_FPU_stack(); ++ ++ // pop our frame ++ //forward_exception_entry need return address on stack ++ __ move(SP, FP); ++ __ pop(FP); ++ ++ // and forward the exception ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ } ++ __ flush(); ++ ++ nmethod *nm = nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), ++ in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), ++ oop_maps); ++ ++ if (is_critical_native) { ++ nm->set_lazy_critical_native(true); ++ } ++ return nm; ++} ++ ++#ifdef HAVE_DTRACE_H ++// --------------------------------------------------------------------------- ++// Generate a dtrace nmethod for a given signature. The method takes arguments ++// in the Java compiled code convention, marshals them to the native ++// abi and then leaves nops at the position you would expect to call a native ++// function. When the probe is enabled the nops are replaced with a trap ++// instruction that dtrace inserts and the trace will cause a notification ++// to dtrace. ++// ++// The probes are only able to take primitive types and java/lang/String as ++// arguments. No other java types are allowed. Strings are converted to utf8 ++// strings so that from dtrace point of view java strings are converted to C ++// strings. There is an arbitrary fixed limit on the total space that a method ++// can use for converting the strings. (256 chars per string in the signature). ++// So any java string larger then this is truncated. ++ ++static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 }; ++static bool offsets_initialized = false; ++ ++static VMRegPair reg64_to_VMRegPair(Register r) { ++ VMRegPair ret; ++ if (wordSize == 8) { ++ ret.set2(r->as_VMReg()); ++ } else { ++ ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg()); ++ } ++ return ret; ++} ++ ++ ++nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm, ++ methodHandle method) { ++ ++ ++ // generate_dtrace_nmethod is guarded by a mutex so we are sure to ++ // be single threaded in this method. ++ assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be"); ++ ++ // Fill in the signature array, for the calling-convention call. ++ int total_args_passed = method->size_of_parameters(); ++ ++ BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); ++ VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); ++ ++ // The signature we are going to use for the trap that dtrace will see ++ // java/lang/String is converted. We drop "this" and any other object ++ // is converted to NULL. (A one-slot java/lang/Long object reference ++ // is converted to a two-slot long, which is why we double the allocation). ++ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2); ++ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2); ++ ++ int i=0; ++ int total_strings = 0; ++ int first_arg_to_pass = 0; ++ int total_c_args = 0; ++ ++ // Skip the receiver as dtrace doesn't want to see it ++ if( !method->is_static() ) { ++ in_sig_bt[i++] = T_OBJECT; ++ first_arg_to_pass = 1; ++ } ++ ++ SignatureStream ss(method->signature()); ++ for ( ; !ss.at_return_type(); ss.next()) { ++ BasicType bt = ss.type(); ++ in_sig_bt[i++] = bt; // Collect remaining bits of signature ++ out_sig_bt[total_c_args++] = bt; ++ if( bt == T_OBJECT) { ++ symbolOop s = ss.as_symbol_or_null(); ++ if (s == vmSymbols::java_lang_String()) { ++ total_strings++; ++ out_sig_bt[total_c_args-1] = T_ADDRESS; ++ } else if (s == vmSymbols::java_lang_Boolean() || ++ s == vmSymbols::java_lang_Byte()) { ++ out_sig_bt[total_c_args-1] = T_BYTE; ++ } else if (s == vmSymbols::java_lang_Character() || ++ s == vmSymbols::java_lang_Short()) { ++ out_sig_bt[total_c_args-1] = T_SHORT; ++ } else if (s == vmSymbols::java_lang_Integer() || ++ s == vmSymbols::java_lang_Float()) { ++ out_sig_bt[total_c_args-1] = T_INT; ++ } else if (s == vmSymbols::java_lang_Long() || ++ s == vmSymbols::java_lang_Double()) { ++ out_sig_bt[total_c_args-1] = T_LONG; ++ out_sig_bt[total_c_args++] = T_VOID; ++ } ++ } else if ( bt == T_LONG || bt == T_DOUBLE ) { ++ in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots ++ // We convert double to long ++ out_sig_bt[total_c_args-1] = T_LONG; ++ out_sig_bt[total_c_args++] = T_VOID; ++ } else if ( bt == T_FLOAT) { ++ // We convert float to int ++ out_sig_bt[total_c_args-1] = T_INT; ++ } ++ } ++ ++ assert(i==total_args_passed, "validly parsed signature"); ++ ++ // Now get the compiled-Java layout as input arguments ++ int comp_args_on_stack; ++ comp_args_on_stack = SharedRuntime::java_calling_convention( ++ in_sig_bt, in_regs, total_args_passed, false); ++ ++ // We have received a description of where all the java arg are located ++ // on entry to the wrapper. We need to convert these args to where ++ // the a native (non-jni) function would expect them. To figure out ++ // where they go we convert the java signature to a C signature and remove ++ // T_VOID for any long/double we might have received. ++ ++ ++ // Now figure out where the args must be stored and how much stack space ++ // they require (neglecting out_preserve_stack_slots but space for storing ++ // the 1st six register arguments). It's weird see int_stk_helper. ++ ++ int out_arg_slots; ++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); ++ ++ // Calculate the total number of stack slots we will need. ++ ++ // First count the abi requirement plus all of the outgoing args ++ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; ++ ++ // Plus a temp for possible converion of float/double/long register args ++ ++ int conversion_temp = stack_slots; ++ stack_slots += 2; ++ ++ ++ // Now space for the string(s) we must convert ++ ++ int string_locs = stack_slots; ++ stack_slots += total_strings * ++ (max_dtrace_string_size / VMRegImpl::stack_slot_size); ++ ++ // Ok The space we have allocated will look like: ++ // ++ // ++ // FP-> | | ++ // |---------------------| ++ // | string[n] | ++ // |---------------------| <- string_locs[n] ++ // | string[n-1] | ++ // |---------------------| <- string_locs[n-1] ++ // | ... | ++ // | ... | ++ // |---------------------| <- string_locs[1] ++ // | string[0] | ++ // |---------------------| <- string_locs[0] ++ // | temp | ++ // |---------------------| <- conversion_temp ++ // | outbound memory | ++ // | based arguments | ++ // | | ++ // |---------------------| ++ // | | ++ // SP-> | out_preserved_slots | ++ // ++ // ++ ++ // Now compute actual number of stack words we need rounding to make ++ // stack properly aligned. ++ stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word); ++ ++ int stack_size = stack_slots * VMRegImpl::stack_slot_size; ++ intptr_t start = (intptr_t)__ pc(); ++ ++ // First thing make an ic check to see if we should even be here ++ ++ { ++ Label L; ++ const Register temp_reg = G3_scratch; ++ Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub()); ++ __ verify_oop(O0); ++ __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg); ++ __ cmp(temp_reg, G5_inline_cache_reg); ++ __ brx(Assembler::equal, true, Assembler::pt, L); ++ ++ __ jump_to(ic_miss, 0); ++ __ align(CodeEntryAlignment); ++ __ bind(L); ++ } ++ ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ ++ // The instruction at the verified entry point must be 4 bytes or longer ++ // because it can be patched on the fly by make_non_entrant. The stack bang ++ // instruction fits that requirement. ++ ++ // Generate stack overflow check before creating frame ++ __ generate_stack_overflow_check(stack_size); ++ ++ assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord, ++ "valid size for make_non_entrant"); ++ ++ // Generate a new frame for the wrapper. ++ __ save(SP, -stack_size, SP); ++ ++ // Frame is now completed as far a size and linkage. ++ ++ int frame_complete = ((intptr_t)__ pc()) - start; ++ ++#ifdef ASSERT ++ bool reg_destroyed[RegisterImpl::number_of_registers]; ++ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; ++ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { ++ reg_destroyed[r] = false; ++ } ++ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { ++ freg_destroyed[f] = false; ++ } ++ ++#endif /* ASSERT */ ++ ++ VMRegPair zero; ++ const Register g0 = G0; // without this we get a compiler warning (why??) ++ zero.set2(g0->as_VMReg()); ++ ++ int c_arg, j_arg; ++ ++ Register conversion_off = noreg; ++ ++ for (j_arg = first_arg_to_pass, c_arg = 0 ; ++ j_arg < total_args_passed ; j_arg++, c_arg++ ) { ++ ++ VMRegPair src = in_regs[j_arg]; ++ VMRegPair dst = out_regs[c_arg]; ++ ++#ifdef ASSERT ++ if (src.first()->is_Register()) { ++ assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!"); ++ } else if (src.first()->is_FloatRegister()) { ++ assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding( ++ FloatRegisterImpl::S)], "ack!"); ++ } ++ if (dst.first()->is_Register()) { ++ reg_destroyed[dst.first()->as_Register()->encoding()] = true; ++ } else if (dst.first()->is_FloatRegister()) { ++ freg_destroyed[dst.first()->as_FloatRegister()->encoding( ++ FloatRegisterImpl::S)] = true; ++ } ++#endif /* ASSERT */ ++ ++ switch (in_sig_bt[j_arg]) { ++ case T_ARRAY: ++ case T_OBJECT: ++ { ++ if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT || ++ out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { ++ // need to unbox a one-slot value ++ Register in_reg = L0; ++ Register tmp = L2; ++ if ( src.first()->is_reg() ) { ++ in_reg = src.first()->as_Register(); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS), ++ "must be"); ++ __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg); ++ } ++ // If the final destination is an acceptable register ++ if ( dst.first()->is_reg() ) { ++ if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) { ++ tmp = dst.first()->as_Register(); ++ } ++ } ++ ++ Label skipUnbox; ++ if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) { ++ __ mov(G0, tmp->successor()); ++ } ++ __ mov(G0, tmp); ++ __ br_null(in_reg, true, Assembler::pn, skipUnbox); ++ ++ BasicType bt = out_sig_bt[c_arg]; ++ int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt); ++ switch (bt) { ++ case T_BYTE: ++ __ ldub(in_reg, box_offset, tmp); break; ++ case T_SHORT: ++ __ lduh(in_reg, box_offset, tmp); break; ++ case T_INT: ++ __ ld(in_reg, box_offset, tmp); break; ++ case T_LONG: ++ __ ld_long(in_reg, box_offset, tmp); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ bind(skipUnbox); ++ // If tmp wasn't final destination copy to final destination ++ if (tmp == L2) { ++ VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2); ++ if (out_sig_bt[c_arg] == T_LONG) { ++ long_move(masm, tmp_as_VM, dst); ++ } else { ++ move32_64(masm, tmp_as_VM, out_regs[c_arg]); ++ } ++ } ++ if (out_sig_bt[c_arg] == T_LONG) { ++ assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); ++ ++c_arg; // move over the T_VOID to keep the loop indices in sync ++ } ++ } else if (out_sig_bt[c_arg] == T_ADDRESS) { ++ Register s = ++ src.first()->is_reg() ? src.first()->as_Register() : L2; ++ Register d = ++ dst.first()->is_reg() ? dst.first()->as_Register() : L2; ++ ++ // We store the oop now so that the conversion pass can reach ++ // while in the inner frame. This will be the only store if ++ // the oop is NULL. ++ if (s != L2) { ++ // src is register ++ if (d != L2) { ++ // dst is register ++ __ mov(s, d); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } else { ++ // src not a register ++ assert(Assembler::is_simm13(reg2offset(src.first()) + ++ STACK_BIAS), "must be"); ++ __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d); ++ if (d == L2) { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } ++ } else if (out_sig_bt[c_arg] != T_VOID) { ++ // Convert the arg to NULL ++ if (dst.first()->is_reg()) { ++ __ mov(G0, dst.first()->as_Register()); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } ++ } ++ break; ++ case T_VOID: ++ break; ++ ++ case T_FLOAT: ++ if (src.first()->is_stack()) { ++ // Stack to stack/reg is simple ++ move32_64(masm, src, dst); ++ } else { ++ if (dst.first()->is_reg()) { ++ // freg -> reg ++ int off = ++ STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ Register d = dst.first()->as_Register(); ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, off); ++ __ ld(SP, off, d); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ __ ld(SP, conversion_off , d); ++ } ++ } else { ++ // freg -> mem ++ int off = STACK_BIAS + reg2offset(dst.first()); ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, off); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ } ++ } ++ } ++ break; ++ ++ case T_DOUBLE: ++ assert( j_arg + 1 < total_args_passed && ++ in_sig_bt[j_arg + 1] == T_VOID && ++ out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); ++ if (src.first()->is_stack()) { ++ // Stack to stack/reg is simple ++ long_move(masm, src, dst); ++ } else { ++ Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2; ++ ++ // Destination could be an odd reg on 32bit in which case ++ // we can't load direct to the destination. ++ ++ if (!d->is_even() && wordSize == 4) { ++ d = L2; ++ } ++ int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), ++ SP, off); ++ __ ld_long(SP, off, d); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ __ ld_long(SP, conversion_off, d); ++ } ++ if (d == L2) { ++ long_move(masm, reg64_to_VMRegPair(L2), dst); ++ } ++ } ++ break; ++ ++ case T_LONG : ++ // 32bit can't do a split move of something like g1 -> O0, O1 ++ // so use a memory temp ++ if (src.is_single_phys_reg() && wordSize == 4) { ++ Register tmp = L2; ++ if (dst.first()->is_reg() && ++ (wordSize == 8 || dst.first()->as_Register()->is_even())) { ++ tmp = dst.first()->as_Register(); ++ } ++ ++ int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ if (Assembler::is_simm13(off)) { ++ __ stx(src.first()->as_Register(), SP, off); ++ __ ld_long(SP, off, tmp); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stx(src.first()->as_Register(), SP, conversion_off); ++ __ ld_long(SP, conversion_off, tmp); ++ } ++ ++ if (tmp == L2) { ++ long_move(masm, reg64_to_VMRegPair(L2), dst); ++ } ++ } else { ++ long_move(masm, src, dst); ++ } ++ break; ++ ++ case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); ++ ++ default: ++ move32_64(masm, src, dst); ++ } ++ } ++ ++ ++ // If we have any strings we must store any register based arg to the stack ++ // This includes any still live xmm registers too. ++ ++ if (total_strings > 0 ) { ++ ++ // protect all the arg registers ++ __ save_frame(0); ++ __ mov(G2_thread, L7_thread_cache); ++ const Register L2_string_off = L2; ++ ++ // Get first string offset ++ __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off); ++ ++ for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) { ++ if (out_sig_bt[c_arg] == T_ADDRESS) { ++ ++ VMRegPair dst = out_regs[c_arg]; ++ const Register d = dst.first()->is_reg() ? ++ dst.first()->as_Register()->after_save() : noreg; ++ ++ // It's a string the oop and it was already copied to the out arg ++ // position ++ if (d != noreg) { ++ __ mov(d, O0); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), ++ "must be"); ++ __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0); ++ } ++ Label skip; ++ ++ __ add_d(FP, L2_string_off, O1); ++ __ br_null(O0, false, Assembler::pn, skip); ++ ++ if (d != noreg) { ++ __ mov(O1, d); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), ++ "must be"); ++ __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ ++ __ addi_d(L2_string_off, max_dtrace_string_size, L2_string_off); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf), ++ relocInfo::runtime_call_type); ++ ++ __ bind(skip); ++ ++ } ++ ++ } ++ __ mov(L7_thread_cache, G2_thread); ++ __ restore(); ++ ++ } ++ ++ ++ // Ok now we are done. Need to place the nop that dtrace wants in order to ++ // patch in the trap ++ ++ int patch_offset = ((intptr_t)__ pc()) - start; ++ ++ __ nop(); ++ ++ ++ // Return ++ ++ __ restore(); ++ __ ret(); ++ ++ __ flush(); ++ nmethod *nm = nmethod::new_dtrace_nmethod( ++ method, masm->code(), vep_offset, patch_offset, frame_complete, ++ stack_slots / VMRegImpl::slots_per_word); ++ return nm; ++} ++ ++#endif // HAVE_DTRACE_H ++ ++// this function returns the adjust size (in number of words) to a c2i adapter ++// activation for use during deoptimization ++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { ++ return (callee_locals - callee_parameters) * Interpreter::stackElementWords; ++} ++ ++// "Top of Stack" slots that may be unused by the calling convention but must ++// otherwise be preserved. ++// On Intel these are not necessary and the value can be zero. ++// On Sparc this describes the words reserved for storing a register window ++// when an interrupt occurs. ++uint SharedRuntime::out_preserve_stack_slots() { ++ return 0; ++} ++ ++//------------------------------generate_deopt_blob---------------------------- ++// Ought to generate an ideal graph & compile, but here's some SPARC ASM ++// instead. ++void SharedRuntime::generate_deopt_blob() { ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ //CodeBuffer buffer ("deopt_blob", 4000, 2048); ++ CodeBuffer buffer ("deopt_blob", 8000, 2048); // FIXME for debug ++ MacroAssembler* masm = new MacroAssembler( & buffer); ++ int frame_size_in_words; ++ OopMap* map = NULL; ++ // Account for the extra args we place on the stack ++ // by the time we call fetch_unroll_info ++ const int additional_words = 2; // deopt kind, thread ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ RegisterSaver reg_save(false); ++ ++ address start = __ pc(); ++ Label cont; ++ // we use S3 for DeOpt reason register ++ Register reason = S3; ++ // use S6 for thread register ++ Register thread = TREG; ++ // use S7 for fetch_unroll_info returned UnrollBlock ++ Register unroll = S7; ++ // Prolog for non exception case! ++ ++ // We have been called from the deopt handler of the deoptee. ++ // ++ // deoptee: ++ // ... ++ // call X ++ // ... ++ // deopt_handler: call_deopt_stub ++ // cur. return pc --> ... ++ // ++ // So currently RA points behind the call in the deopt handler. ++ // We adjust it such that it points to the start of the deopt handler. ++ // The return_pc has been stored in the frame of the deoptee and ++ // will replace the address of the deopt_handler in the call ++ // to Deoptimization::fetch_unroll_info below. ++ ++ // HandlerImpl::size_deopt_handler() ++ __ addi_d(RA, RA, - NativeFarCall::instruction_size); ++ // Save everything in sight. ++ map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ // Normal deoptimization ++ __ li(reason, Deoptimization::Unpack_deopt); ++ __ b(cont); ++ ++ int reexecute_offset = __ pc() - start; ++ ++ // Reexecute case ++ // return address is the pc describes what bci to do re-execute at ++ ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ __ li(reason, Deoptimization::Unpack_reexecute); ++ __ b(cont); ++ ++ int exception_offset = __ pc() - start; ++ // Prolog for exception case ++ ++ // all registers are dead at this entry point, except for V0 and ++ // V1 which contain the exception oop and exception pc ++ // respectively. Set them in TLS and fall thru to the ++ // unpack_with_exception_in_tls entry point. ++ ++ __ get_thread(thread); ++ __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ int exception_in_tls_offset = __ pc() - start; ++ // new implementation because exception oop is now passed in JavaThread ++ ++ // Prolog for exception case ++ // All registers must be preserved because they might be used by LinearScan ++ // Exceptiop oop and throwing PC are passed in JavaThread ++ // tos: stack at point of call to method that threw the exception (i.e. only ++ // args are on the stack, no return address) ++ ++ // Return address will be patched later with the throwing pc. The correct value is not ++ // available now because loading it from memory would destroy registers. ++ // Save everything in sight. ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ ++ // Now it is safe to overwrite any register ++ // store the correct deoptimization type ++ __ li(reason, Deoptimization::Unpack_exception); ++ // load throwing pc from JavaThread and patch it as the return address ++ // of the current frame. Then clear the field in JavaThread ++ __ get_thread(thread); ++ __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(V1, SP, reg_save.ra_offset()); //save ra ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); ++ ++ ++#ifdef ASSERT ++ // verify that there is really an exception oop in JavaThread ++ __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset())); ++ __ verify_oop(AT); ++ // verify that there is no pending exception ++ Label no_pending_exception; ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, no_pending_exception); ++ __ stop("must not have pending exception here"); ++ __ bind(no_pending_exception); ++#endif ++ __ bind(cont); ++ // Compiled code leaves the floating point stack dirty, empty it. ++ __ empty_FPU_stack(); ++ ++ ++ // Call C code. Need thread and this frame, but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ __ move(A0, thread); ++ __ addi_d(SP, SP, -additional_words * wordSize); ++ ++ Label retaddr; ++ __ set_last_Java_frame(NOREG, NOREG, retaddr); ++ ++ // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on ++ // this call, no GC can happen. Call should capture return values. ++ ++ // TODO: confirm reloc ++ __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ oop_maps->add_gc_map(__ pc() - start, map); ++ __ addi_d(SP, SP, additional_words * wordSize); ++ __ get_thread(thread); ++ __ reset_last_Java_frame(false); ++ ++ // Load UnrollBlock into S7 ++ __ move(unroll, V0); ++ ++ ++ // Move the unpack kind to a safe place in the UnrollBlock because ++ // we are very short of registers ++ ++ Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); ++ __ st_w(reason, unpack_kind); ++ // save the unpack_kind value ++ // Retrieve the possible live values (return values) ++ // All callee save registers representing jvm state ++ // are now in the vframeArray. ++ ++ Label noException; ++ __ li(AT, Deoptimization::Unpack_exception); ++ __ bne(AT, reason, noException);// Was exception pending? ++ __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ ++ __ verify_oop(V0); ++ ++ // Overwrite the result registers with the exception results. ++ __ st_ptr(V0, SP, reg_save.v0_offset()); ++ __ st_ptr(V1, SP, reg_save.v1_offset()); ++ ++ __ bind(noException); ++ ++ ++ // Stack is back to only having register save data on the stack. ++ // Now restore the result registers. Everything else is either dead or captured ++ // in the vframeArray. ++ ++ reg_save.restore_result_registers(masm); ++ // All of the register save area has been popped of the stack. Only the ++ // return address remains. ++ // Pop all the frames we must move/replace. ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: caller of deopting frame (could be compiled/interpreted). ++ // ++ // Note: by leaving the return address of self-frame on the stack ++ // and using the size of frame 2 to adjust the stack ++ // when we are done the return to frame 3 will still be on the stack. ++ ++ // register for the sender's sp ++ Register sender_sp = Rsender; ++ // register for frame pcs ++ Register pcs = T0; ++ // register for frame sizes ++ Register sizes = T1; ++ // register for frame count ++ Register count = T3; ++ ++ // Pop deoptimized frame ++ __ ld_w(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); ++ __ add_d(SP, SP, AT); ++ // sp should be pointing at the return address to the caller (3) ++ ++ // Load array of frame pcs into pcs ++ __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); ++ __ addi_d(SP, SP, wordSize); // trash the old pc ++ // Load array of frame sizes into T6 ++ __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); ++ ++ ++ ++ // Load count of frams into T3 ++ __ ld_w(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); ++ // Pick up the initial fp we should save ++ __ ld_d(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. ++ __ move(sender_sp, SP); ++ __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); ++ __ sub_d(SP, SP, AT); ++ ++ Label loop; ++ __ bind(loop); ++ __ ld_d(T2, sizes, 0); // Load frame size ++ __ ld_ptr(AT, pcs, 0); // save return address ++ __ addi_d(T2, T2, -2 * wordSize); // we'll push pc and fp, by hand ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ sub_d(SP, SP, T2); // Prolog! ++ // This value is corrected by layout_activation_impl ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable ++ __ move(sender_sp, SP); // pass to next frame ++ __ addi_d(count, count, -1); // decrement counter ++ __ addi_d(sizes, sizes, wordSize); // Bump array pointer (sizes) ++ __ addi_d(pcs, pcs, wordSize); // Bump array pointer (pcs) ++ __ bne(count, R0, loop); ++ __ ld_d(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0); ++ // Re-push self-frame ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ __ addi_d(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize); ++ ++ // Restore frame locals after moving the frame ++ __ st_d(V0, SP, reg_save.v0_offset()); ++ __ st_d(V1, SP, reg_save.v1_offset()); ++ __ fst_d(F0, SP, reg_save.fpr0_offset()); ++ __ fst_d(F1, SP, reg_save.fpr1_offset()); ++ ++ // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on ++ // this call, no GC can happen. ++ __ move(A1, reason); // exec_mode ++ __ get_thread(thread); ++ __ move(A0, thread); // thread ++ __ addi_d(SP, SP, (-additional_words) *wordSize); ++ ++ // set last_Java_sp, last_Java_fp ++ Label L; ++ address the_pc = __ pc(); ++ __ bind(L); ++ __ set_last_Java_frame(NOREG, FP, L); ++ ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type); ++ // Revert SP alignment after call since we're going to do some SP relative addressing below ++ __ ld_d(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // Set an oopmap for the call site ++ oop_maps->add_gc_map(the_pc - start, new OopMap(frame_size_in_words, 0)); ++ ++ __ push(V0); ++ ++ __ get_thread(thread); ++ __ reset_last_Java_frame(true); ++ ++ // Collect return values ++ __ ld_d(V0, SP, reg_save.v0_offset() + (additional_words + 1) * wordSize); ++ __ ld_d(V1, SP, reg_save.v1_offset() + (additional_words + 1) * wordSize); ++ // Pop float stack and store in local ++ __ fld_d(F0, SP, reg_save.fpr0_offset() + (additional_words + 1) * wordSize); ++ __ fld_d(F1, SP, reg_save.fpr1_offset() + (additional_words + 1) * wordSize); ++ ++ //FIXME, ++ // Clear floating point stack before returning to interpreter ++ __ empty_FPU_stack(); ++ //FIXME, we should consider about float and double ++ // Push a float or double return value if necessary. ++ __ leave(); ++ ++ // Jump to interpreter ++ __ jr(RA); ++ ++ masm->flush(); ++ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); ++ _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); ++} ++ ++#ifdef COMPILER2 ++ ++//------------------------------generate_uncommon_trap_blob-------------------- ++// Ought to generate an ideal graph & compile, but here's some SPARC ASM ++// instead. ++void SharedRuntime::generate_uncommon_trap_blob() { ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 ); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ enum frame_layout { ++ fp_off, fp_off2, ++ return_off, return_off2, ++ framesize ++ }; ++ assert(framesize % 4 == 0, "sp not 16-byte aligned"); ++ address start = __ pc(); ++ ++ // Push self-frame. ++ __ addi_d(SP, SP, -framesize * BytesPerInt); ++ ++ __ st_d(RA, SP, return_off * BytesPerInt); ++ __ st_d(FP, SP, fp_off * BytesPerInt); ++ ++ __ addi_d(FP, SP, fp_off * BytesPerInt); ++ ++ // Clear the floating point exception stack ++ __ empty_FPU_stack(); ++ ++ Register thread = TREG; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // set last_Java_sp ++ Label retaddr; ++ __ set_last_Java_frame(NOREG, FP, retaddr); ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // capture callee-saved registers as well as return values. ++ __ move(A0, thread); ++ // argument already in T0 ++ __ move(A1, T0); ++ __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ ++ // Set an oopmap for the call site ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap( framesize, 0 ); ++ ++ oop_maps->add_gc_map(__ pc() - start, map); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(false); ++ ++ // Load UnrollBlock into S7 ++ Register unroll = S7; ++ __ move(unroll, V0); ++ ++ // Pop all the frames we must move/replace. ++ // ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: possible-i2c-adapter-frame ++ // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an ++ // and c2i here) ++ ++ __ addi_d(SP, SP, framesize * BytesPerInt); ++ ++ // Pop deoptimized frame ++ __ ld_w(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); ++ __ add_d(SP, SP, AT); ++ ++ // register for frame pcs ++ Register pcs = T8; ++ // register for frame sizes ++ Register sizes = T4; ++ // register for frame count ++ Register count = T3; ++ // register for the sender's sp ++ Register sender_sp = T1; ++ ++ // sp should be pointing at the return address to the caller (4) ++ // Load array of frame pcs ++ __ ld_d(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); ++ ++ // Load array of frame sizes ++ __ ld_d(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); ++ __ ld_wu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); ++ ++ // Pick up the initial fp we should save ++ __ ld_d(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. ++ ++ __ move(sender_sp, SP); ++ __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); ++ __ sub_d(SP, SP, AT); ++ // Push interpreter frames in a loop ++ Label loop; ++ __ bind(loop); ++ __ ld_d(T2, sizes, 0); // Load frame size ++ __ ld_d(AT, pcs, 0); // save return address ++ __ addi_d(T2, T2, -2*wordSize); // we'll push pc and fp, by hand ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ sub_d(SP, SP, T2); // Prolog! ++ // This value is corrected by layout_activation_impl ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable ++ __ move(sender_sp, SP); // pass to next frame ++ __ addi_d(count, count, -1); // decrement counter ++ __ addi_d(sizes, sizes, wordSize); // Bump array pointer (sizes) ++ __ addi_d(pcs, pcs, wordSize); // Bump array pointer (pcs) ++ __ bne(count, R0, loop); ++ ++ __ ld_d(RA, pcs, 0); ++ ++ // Re-push self-frame ++ // save old & set new FP ++ // save final return address ++ __ enter(); ++ ++ // Use FP because the frames look interpreted now ++ // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. ++ // Don't need the precise return PC here, just precise enough to point into this code blob. ++ Label L; ++ address the_pc = __ pc(); ++ __ bind(L); ++ __ set_last_Java_frame(NOREG, FP, L); ++ ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // restore return values to their stack-slots with the new SP. ++ __ move(A0, thread); ++ __ li(A1, Deoptimization::Unpack_uncommon_trap); ++ __ call((address)Deoptimization::unpack_frames, relocInfo::runtime_call_type); ++ // Set an oopmap for the call site ++ oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0)); ++ ++ __ reset_last_Java_frame(true); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog! ++ ++ // Jump to interpreter ++ __ jr(RA); ++ // ------------- ++ // make sure all code is generated ++ masm->flush(); ++ _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2); ++} ++ ++#endif // COMPILER2 ++ ++//------------------------------generate_handler_blob------------------- ++// ++// Generate a special Compile2Runtime blob that saves all registers, and sets ++// up an OopMap and calls safepoint code to stop the compiled code for ++// a safepoint. ++// ++// This blob is jumped to (via a breakpoint and the signal handler) from a ++// safepoint in compiled code. ++ ++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { ++ ++ // Account for thread arg in our frame ++ const int additional_words = 0; ++ int frame_size_in_words; ++ ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ ++ ResourceMark rm; ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map; ++ ++ // allocate space for the code ++ // setup code generation tools ++ CodeBuffer buffer ("handler_blob", 2048, 512); ++ MacroAssembler* masm = new MacroAssembler( &buffer); ++ ++ const Register thread = TREG; ++ address start = __ pc(); ++ bool cause_return = (poll_type == POLL_AT_RETURN); ++ RegisterSaver reg_save(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */); ++ ++ // If cause_return is true we are at a poll_return and there is ++ // the return address in RA to the caller on the nmethod ++ // that is safepoint. We can leave this return in RA and ++ // effectively complete the return and safepoint in the caller. ++ // Otherwise we load exception pc to RA. ++ __ push(thread); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ if(!cause_return) { ++ __ ld_ptr(RA, Address(thread, JavaThread::saved_exception_pc_offset())); ++ } ++ ++ __ pop(thread); ++ map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ // The following is basically a call_VM. However, we need the precise ++ // address of the call in order to generate an oopmap. Hence, we do all the ++ // work outselvs. ++ ++ __ move(A0, thread); ++ Label retaddr; ++ __ set_last_Java_frame(NOREG, NOREG, retaddr); ++ ++ // Do the call ++ // TODO: confirm reloc ++ __ call(call_ptr, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ ++ // Set an oopmap for the call site. This oopmap will map all ++ // oop-registers and debug-info registers as callee-saved. This ++ // will allow deoptimization at this safepoint to find all possible ++ // debug-info recordings, as well as let GC find all oops. ++ oop_maps->add_gc_map(__ pc() - start, map); ++ ++ Label noException; ++ ++ // Clear last_Java_sp again ++ __ reset_last_Java_frame(false); ++ ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, noException); ++ ++ // Exception pending ++ ++ reg_save.restore_live_registers(masm); ++ //forward_exception_entry need return address on the stack ++ __ push(RA); ++ // TODO: confirm reloc ++ __ jmp((address)StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ ++ // No exception case ++ __ bind(noException); ++ // Normal exit, register restoring and exit ++ reg_save.restore_live_registers(masm); ++ __ jr(RA); ++ ++ masm->flush(); ++ // Fill-out other meta info ++ return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); ++} ++ ++// ++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss ++// ++// Generate a stub that calls into vm to find out the proper destination ++// of a java call. All the argument registers are live at this point ++// but since this is generic code we don't know what they are and the caller ++// must do any gc of the args. ++// ++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ ++ // allocate space for the code ++ ResourceMark rm; ++ ++ //CodeBuffer buffer(name, 1000, 512); ++ //FIXME. code_size ++ CodeBuffer buffer(name, 2000, 2048); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ int frame_size_words; ++ RegisterSaver reg_save(false /* save_vectors */); ++ //we put the thread in A0 ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = NULL; ++ ++ address start = __ pc(); ++ map = reg_save.save_live_registers(masm, 0, &frame_size_words); ++ ++ ++ int frame_complete = __ offset(); ++ const Register thread = T8; ++ __ get_thread(thread); ++ ++ __ move(A0, thread); ++ Label retaddr; ++ __ set_last_Java_frame(noreg, FP, retaddr); ++ // align the stack before invoke native ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ ++ // TODO: confirm reloc ++ __ call(destination, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ ++ // Set an oopmap for the call site. ++ // We need this not only for callee-saved registers, but also for volatile ++ // registers that the compiler might be keeping live across a safepoint. ++ oop_maps->add_gc_map(__ pc() - start, map); ++ // V0 contains the address we are going to jump to assuming no exception got installed ++ __ get_thread(thread); ++ __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // clear last_Java_sp ++ __ reset_last_Java_frame(true); ++ // check for pending exceptions ++ Label pending; ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, pending); ++ // get the returned Method* ++ __ get_vm_result_2(Rmethod, thread); ++ __ st_ptr(Rmethod, SP, reg_save.s3_offset()); ++ __ st_ptr(V0, SP, reg_save.t5_offset()); ++ reg_save.restore_live_registers(masm); ++ ++ // We are back the the original state on entry and ready to go the callee method. ++ __ jr(T5); ++ // Pending exception after the safepoint ++ ++ __ bind(pending); ++ ++ reg_save.restore_live_registers(masm); ++ ++ // exception pending => remove activation and forward to exception handler ++ //forward_exception_entry need return address on the stack ++ __ push(RA); ++ __ get_thread(thread); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); ++ __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ // ++ // make sure all code is generated ++ masm->flush(); ++ RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); ++ return tmp; ++} ++ ++extern "C" int SpinPause() {return 0;} +diff --git a/hotspot/src/cpu/loongarch/vm/stubGenerator_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/stubGenerator_loongarch_64.cpp +new file mode 100644 +index 0000000000..361b775144 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/stubGenerator_loongarch_64.cpp +@@ -0,0 +1,3445 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "nativeInst_loongarch.hpp" ++#include "oops/instanceOop.hpp" ++#include "oops/method.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#include "utilities/top.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++// Declaration and definition of StubGenerator (no .hpp file). ++// For a more detailed description of the stub routine structure ++// see the comment in stubRoutines.hpp ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) ++ ++//#ifdef PRODUCT ++//#define BLOCK_COMMENT(str) /* nothing */ ++//#else ++//#define BLOCK_COMMENT(str) __ block_comment(str) ++//#endif ++ ++//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions ++ ++// Stub Code definitions ++ ++static address handle_unsafe_access() { ++ JavaThread* thread = JavaThread::current(); ++ address pc = thread->saved_exception_pc(); ++ // pc is the instruction which we must emulate ++ // doing a no-op is fine: return garbage from the load ++ // therefore, compute npc ++ address npc = (address)((unsigned long)pc + sizeof(unsigned int)); ++ ++ // request an async exception ++ thread->set_pending_unsafe_access_error(); ++ ++ // return address of next instruction to execute ++ return npc; ++} ++ ++class StubGenerator: public StubCodeGenerator { ++ private: ++ ++ // This fig is not LA ABI. It is call Java from C ABI. ++ // Call stubs are used to call Java from C ++ // ++ // [ return_from_Java ] ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ // ... ++ // -8 [ S6 ] ++ // -7 [ S5 ] ++ // -6 [ S4 ] ++ // -5 [ S3 ] ++ // -4 [ S1 ] ++ // -3 [ TSR(S2) ] ++ // -2 [ LVP(S7) ] ++ // -1 [ BCP(S1) ] ++ // 0 [ saved fp ] <--- fp_after_call ++ // 1 [ return address ] ++ // 2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp ++ // 3 [ result ] <--- a1 ++ // 4 [ result_type ] <--- a2 ++ // 5 [ method ] <--- a3 ++ // 6 [ entry_point ] <--- a4 ++ // 7 [ parameters ] <--- a5 ++ // 8 [ parameter_size ] <--- a6 ++ // 9 [ thread ] <--- a7 ++ ++ // ++ // LA ABI does not save paras in sp. ++ // ++ // [ return_from_Java ] ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ // ... ++ //-13 [ thread ] ++ //-12 [ result_type ] <--- a2 ++ //-11 [ result ] <--- a1 ++ //-10 [ ] ++ // -9 [ ptr. to call wrapper ] <--- a0 ++ // -8 [ S6 ] ++ // -7 [ S5 ] ++ // -6 [ S4 ] ++ // -5 [ S3 ] ++ // -4 [ S1 ] ++ // -3 [ TSR(S2) ] ++ // -2 [ LVP(S7) ] ++ // -1 [ BCP(S1) ] ++ // 0 [ saved fp ] <--- fp_after_call ++ // 1 [ return address ] ++ // 2 [ ] <--- old sp ++ // ++ // Find a right place in the call_stub for S8. ++ // S8 will point to the starting point of Interpreter::dispatch_table(itos). ++ // It should be saved/restored before/after Java calls. ++ // ++ enum call_stub_layout { ++ RA_off = 1, ++ FP_off = 0, ++ BCP_off = -1, ++ LVP_off = -2, ++ TSR_off = -3, ++ S1_off = -4, ++ S3_off = -5, ++ S4_off = -6, ++ S5_off = -7, ++ S6_off = -8, ++ call_wrapper_off = -9, ++ result_off = -11, ++ result_type_off = -12, ++ thread_off = -13, ++ total_off = thread_off - 1, ++ S8_off = -14, ++ }; ++ ++ address generate_call_stub(address& return_address) { ++ assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code"); ++ StubCodeMark mark(this, "StubRoutines", "call_stub"); ++ address start = __ pc(); ++ ++ // same as in generate_catch_exception()! ++ ++ // stub code ++ // save ra and fp ++ __ enter(); ++ // I think 14 is the max gap between argument and callee saved register ++ __ addi_d(SP, SP, total_off * wordSize); ++ __ st_d(BCP, FP, BCP_off * wordSize); ++ __ st_d(LVP, FP, LVP_off * wordSize); ++ __ st_d(TSR, FP, TSR_off * wordSize); ++ __ st_d(S1, FP, S1_off * wordSize); ++ __ st_d(S3, FP, S3_off * wordSize); ++ __ st_d(S4, FP, S4_off * wordSize); ++ __ st_d(S5, FP, S5_off * wordSize); ++ __ st_d(S6, FP, S6_off * wordSize); ++ __ st_d(A0, FP, call_wrapper_off * wordSize); ++ __ st_d(A1, FP, result_off * wordSize); ++ __ st_d(A2, FP, result_type_off * wordSize); ++ __ st_d(A7, FP, thread_off * wordSize); ++ __ st_d(S8, FP, S8_off * wordSize); ++ ++ __ li(S8, (long)Interpreter::dispatch_table(itos)); ++ ++#ifdef OPT_THREAD ++ __ move(TREG, A7); ++#endif ++ //add for compressedoops ++ __ reinit_heapbase(); ++ ++#ifdef ASSERT ++ // make sure we have no pending exceptions ++ { ++ Label L; ++ __ ld_d(AT, A7, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ /* FIXME: I do not know how to realize stop in LA, do it in the future */ ++ __ stop("StubRoutines::call_stub: entered with pending exception"); ++ __ bind(L); ++ } ++#endif ++ ++ // pass parameters if any ++ // A5: parameter ++ // A6: parameter_size ++ // T0: parameter_size_tmp(--) ++ // T2: offset(++) ++ // T3: tmp ++ Label parameters_done; ++ // judge if the parameter_size equals 0 ++ __ beq(A6, R0, parameters_done); ++ __ slli_d(AT, A6, Interpreter::logStackElementSize); ++ __ sub_d(SP, SP, AT); ++ __ li(AT, -StackAlignmentInBytes); ++ __ andr(SP, SP, AT); ++ // Copy Java parameters in reverse order (receiver last) ++ // Note that the argument order is inverted in the process ++ Label loop; ++ __ move(T0, A6); ++ __ move(T2, R0); ++ __ bind(loop); ++ ++ // get parameter ++ __ alsl_d(T3, T0, A5, LogBytesPerWord - 1); ++ __ ld_d(AT, T3, -wordSize); ++ __ alsl_d(T3, T2, SP, LogBytesPerWord - 1); ++ __ st_d(AT, T3, Interpreter::expr_offset_in_bytes(0)); ++ __ addi_d(T2, T2, 1); ++ __ addi_d(T0, T0, -1); ++ __ bne(T0, R0, loop); ++ // advance to next parameter ++ ++ // call Java function ++ __ bind(parameters_done); ++ ++ // receiver in V0, methodOop in Rmethod ++ ++ __ move(Rmethod, A3); ++ __ move(Rsender, SP); //set sender sp ++ __ jalr(A4); ++ return_address = __ pc(); ++ ++ Label common_return; ++ __ bind(common_return); ++ ++ // store result depending on type ++ // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) ++ __ ld_d(T0, FP, result_off * wordSize); // result --> T0 ++ Label is_long, is_float, is_double, exit; ++ __ ld_d(T2, FP, result_type_off * wordSize); // result_type --> T2 ++ __ addi_d(T3, T2, (-1) * T_LONG); ++ __ beq(T3, R0, is_long); ++ __ addi_d(T3, T2, (-1) * T_FLOAT); ++ __ beq(T3, R0, is_float); ++ __ addi_d(T3, T2, (-1) * T_DOUBLE); ++ __ beq(T3, R0, is_double); ++ ++ // handle T_INT case ++ __ st_d(V0, T0, 0 * wordSize); ++ __ bind(exit); ++ ++ // restore ++ __ ld_d(BCP, FP, BCP_off * wordSize); ++ __ ld_d(LVP, FP, LVP_off * wordSize); ++ __ ld_d(S8, FP, S8_off * wordSize); ++ __ ld_d(TSR, FP, TSR_off * wordSize); ++ ++ __ ld_d(S1, FP, S1_off * wordSize); ++ __ ld_d(S3, FP, S3_off * wordSize); ++ __ ld_d(S4, FP, S4_off * wordSize); ++ __ ld_d(S5, FP, S5_off * wordSize); ++ __ ld_d(S6, FP, S6_off * wordSize); ++ ++ __ leave(); ++ ++ // return ++ __ jr(RA); ++ ++ // handle return types different from T_INT ++ __ bind(is_long); ++ __ st_d(V0, T0, 0 * wordSize); ++ __ b(exit); ++ ++ __ bind(is_float); ++ __ fst_s(FV0, T0, 0 * wordSize); ++ __ b(exit); ++ ++ __ bind(is_double); ++ __ fst_d(FV0, T0, 0 * wordSize); ++ __ b(exit); ++ StubRoutines::la::set_call_stub_compiled_return(__ pc()); ++ __ b(common_return); ++ return start; ++ } ++ ++ // Return point for a Java call if there's an exception thrown in ++ // Java code. The exception is caught and transformed into a ++ // pending exception stored in JavaThread that can be tested from ++ // within the VM. ++ // ++ // Note: Usually the parameters are removed by the callee. In case ++ // of an exception crossing an activation frame boundary, that is ++ // not the case if the callee is compiled code => need to setup the ++ // sp. ++ // ++ // V0: exception oop ++ ++ address generate_catch_exception() { ++ StubCodeMark mark(this, "StubRoutines", "catch_exception"); ++ address start = __ pc(); ++ ++ Register thread = TREG; ++ ++ // get thread directly ++#ifndef OPT_THREAD ++ __ ld_d(thread, FP, thread_off * wordSize); ++#endif ++ ++#ifdef ASSERT ++ // verify that threads correspond ++ { Label L; ++ __ get_thread(T8); ++ __ beq(T8, thread, L); ++ __ stop("StubRoutines::catch_exception: threads must correspond"); ++ __ bind(L); ++ } ++#endif ++ // set pending exception ++ __ verify_oop(V0); ++ __ st_d(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ li(AT, (long)__FILE__); ++ __ st_d(AT, thread, in_bytes(Thread::exception_file_offset ())); ++ __ li(AT, (long)__LINE__); ++ __ st_d(AT, thread, in_bytes(Thread::exception_line_offset ())); ++ ++ // complete return to VM ++ assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); ++ __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none); ++ return start; ++ } ++ ++ // Continuation point for runtime calls returning with a pending ++ // exception. The pending exception check happened in the runtime ++ // or native call stub. The pending exception in Thread is ++ // converted into a Java-level exception. ++ // ++ // Contract with Java-level exception handlers: ++ // V0: exception ++ // V1: throwing pc ++ // ++ // NOTE: At entry of this stub, exception-pc must be on stack !! ++ ++ address generate_forward_exception() { ++ StubCodeMark mark(this, "StubRoutines", "forward exception"); ++ //Register thread = TREG; ++ Register thread = TREG; ++ address start = __ pc(); ++ ++ // Upon entry, the sp points to the return address returning into ++ // Java (interpreted or compiled) code; i.e., the return address ++ // throwing pc. ++ // ++ // Arguments pushed before the runtime call are still on the stack ++ // but the exception handler will reset the stack pointer -> ++ // ignore them. A potential result in registers can be ignored as ++ // well. ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++#ifdef ASSERT ++ // make sure this code is only executed if there is a pending exception ++ { ++ Label L; ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, L); ++ __ stop("StubRoutines::forward exception: no pending exception (1)"); ++ __ bind(L); ++ } ++#endif ++ ++ // compute exception handler into T4 ++ __ ld_d(A1, SP, 0); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); ++ __ move(T4, V0); ++ __ pop(V1); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_d(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset())); ++ ++#ifdef ASSERT ++ // make sure exception is set ++ { ++ Label L; ++ __ bne(V0, R0, L); ++ __ stop("StubRoutines::forward exception: no pending exception (2)"); ++ __ bind(L); ++ } ++#endif ++ ++ // continue at exception handler (return address removed) ++ // V0: exception ++ // T4: exception handler ++ // V1: throwing pc ++ __ verify_oop(V0); ++ __ jr(T4); ++ return start; ++ } ++ ++ // The following routine generates a subroutine to throw an ++ // asynchronous UnknownError when an unsafe access gets a fault that ++ // could not be reasonably prevented by the programmer. (Example: ++ // SIGBUS/OBJERR.) ++ address generate_handler_for_unsafe_access() { ++ StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access"); ++ address start = __ pc(); ++ __ push(V0); ++ __ pushad_except_v0(); // push registers ++ __ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type); ++ __ popad_except_v0(); ++ __ move(RA, V0); ++ __ pop(V0); ++ __ jr(RA); ++ return start; ++ } ++ ++ // Non-destructive plausibility checks for oops ++ // ++ address generate_verify_oop() { ++ StubCodeMark mark(this, "StubRoutines", "verify_oop"); ++ address start = __ pc(); ++ __ reinit_heapbase(); ++ __ verify_oop_subroutine(); ++ address end = __ pc(); ++ return start; ++ } ++ ++ // ++ // Generate stub for array fill. If "aligned" is true, the ++ // "to" address is assumed to be heapword aligned. ++ // ++ // Arguments for generated stub: ++ // to: A0 ++ // value: A1 ++ // count: A2 treated as signed ++ // ++ address generate_fill(BasicType t, bool aligned, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ const Register to = A0; // source array address ++ const Register value = A1; // value ++ const Register count = A2; // elements count ++ ++ const Register end = T5; // source array address end ++ const Register tmp = T8; // temp register ++ ++ Label L_fill_elements; ++ ++ int shift = -1; ++ switch (t) { ++ case T_BYTE: ++ shift = 0; ++ __ slti(AT, count, 9); // Short arrays (<= 8 bytes) fill by element ++ __ bstrins_d(value, value, 15, 8); // 8 bit -> 16 bit ++ __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit ++ __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit ++ __ bnez(AT, L_fill_elements); ++ break; ++ case T_SHORT: ++ shift = 1; ++ __ slti(AT, count, 5); // Short arrays (<= 8 bytes) fill by element ++ __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit ++ __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit ++ __ bnez(AT, L_fill_elements); ++ break; ++ case T_INT: ++ shift = 2; ++ __ slti(AT, count, 3); // Short arrays (<= 8 bytes) fill by element ++ __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit ++ __ bnez(AT, L_fill_elements); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ switch (t) { ++ case T_BYTE: ++ __ add_d(end, to, count); ++ break; ++ case T_SHORT: ++ case T_INT: ++ __ alsl_d(end, count, to, shift-1); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ if (!aligned) { ++ __ st_d(value, to, 0); ++ __ bstrins_d(to, R0, 2, 0); ++ __ addi_d(to, to, 8); ++ } ++ __ st_d(value, end, -8); ++ __ bstrins_d(end, R0, 2, 0); ++ ++ // ++ // Fill large chunks ++ // ++ Label L_loop_begin, L_not_64bytes_fill, L_loop_end; ++ __ addi_d(AT, to, 64); ++ __ blt(end, AT, L_not_64bytes_fill); ++ __ addi_d(to, to, 64); ++ __ bind(L_loop_begin); ++ __ st_d(value, to, -8); ++ __ st_d(value, to, -16); ++ __ st_d(value, to, -24); ++ __ st_d(value, to, -32); ++ __ st_d(value, to, -40); ++ __ st_d(value, to, -48); ++ __ st_d(value, to, -56); ++ __ st_d(value, to, -64); ++ __ addi_d(to, to, 64); ++ __ bge(end, to, L_loop_begin); ++ __ addi_d(to, to, -64); ++ __ beq(to, end, L_loop_end); ++ ++ __ bind(L_not_64bytes_fill); ++ // There are 0 - 7 words ++ __ pcaddi(AT, 4); ++ __ sub_d(tmp, end, to); ++ __ alsl_d(AT, tmp, AT, 1); ++ __ jr(AT); ++ ++ // 0: ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ st_d(value, to, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 4: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ st_d(value, to, 24); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ st_d(value, to, 24); ++ __ st_d(value, to, 32); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 6: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ st_d(value, to, 24); ++ __ st_d(value, to, 32); ++ __ st_d(value, to, 40); ++ __ jr(RA); ++ __ nop(); ++ ++ // 7: ++ __ st_d(value, to, 0); ++ __ st_d(value, to, 8); ++ __ st_d(value, to, 16); ++ __ st_d(value, to, 24); ++ __ st_d(value, to, 32); ++ __ st_d(value, to, 40); ++ __ st_d(value, to, 48); ++ ++ __ bind(L_loop_end); ++ __ jr(RA); ++ ++ // Short arrays (<= 8 bytes) ++ __ bind(L_fill_elements); ++ __ pcaddi(AT, 4); ++ __ slli_d(tmp, count, 4 + shift); ++ __ add_d(AT, AT, tmp); ++ __ jr(AT); ++ ++ // 0: ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ st_b(value, to, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ st_h(value, to, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ st_h(value, to, 0); ++ __ st_b(value, to, 2); ++ __ jr(RA); ++ __ nop(); ++ ++ // 4: ++ __ st_w(value, to, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ __ st_w(value, to, 0); ++ __ st_b(value, to, 4); ++ __ jr(RA); ++ __ nop(); ++ ++ // 6: ++ __ st_w(value, to, 0); ++ __ st_h(value, to, 4); ++ __ jr(RA); ++ __ nop(); ++ ++ // 7: ++ __ st_w(value, to, 0); ++ __ st_w(value, to, 3); ++ __ jr(RA); ++ __ nop(); ++ ++ // 8: ++ __ st_d(value, to, 0); ++ __ jr(RA); ++ return start; ++ } ++ ++ // ++ // Generate overlap test for array copy stubs ++ // ++ // Input: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count ++ // ++ // Temp: ++ // AT - destination array address - source array address ++ // T4 - element count * element size ++ // ++ void array_overlap_test(address no_overlap_target, int log2_elem_size) { ++ __ slli_d(T4, A2, log2_elem_size); ++ __ sub_d(AT, A1, A0); ++ __ bgeu(AT, T4, no_overlap_target); ++ } ++ ++ // Generate code for an array write pre barrier ++ // ++ // Input: ++ // addr - starting address ++ // count - element count ++ // ++ // Temp: ++ // AT - used to swap addr and count ++ // ++ void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) { ++ BarrierSet* bs = Universe::heap()->barrier_set(); ++ switch (bs->kind()) { ++ case BarrierSet::G1SATBCT: ++ case BarrierSet::G1SATBCTLogging: ++ // With G1, don't generate the call if we statically know that the target in uninitialized ++ if (!dest_uninitialized) { ++ if (count == A0) { ++ if (addr == A1) { ++ // exactly backwards!! ++ __ move(AT, A0); ++ __ move(A0, A1); ++ __ move(A1, AT); ++ } else { ++ __ move(A1, count); ++ __ move(A0, addr); ++ } ++ } else { ++ __ move(A0, addr); ++ __ move(A1, count); ++ } ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2); ++ } ++ break; ++ case BarrierSet::CardTableModRef: ++ case BarrierSet::CardTableExtension: ++ case BarrierSet::ModRef: ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ ++ // ++ // Generate code for an array write post barrier ++ // ++ // Input: ++ // start - register containing starting address of destination array ++ // count - elements count ++ // scratch - scratch register ++ // ++ // Temp: ++ // AT - used to swap addr and count ++ // ++ // The input registers are overwritten. ++ // ++ void gen_write_ref_array_post_barrier(Register start, Register count, Register scratch) { ++ assert_different_registers(start, count, scratch, AT); ++ BarrierSet* bs = Universe::heap()->barrier_set(); ++ switch (bs->kind()) { ++ case BarrierSet::G1SATBCT: ++ case BarrierSet::G1SATBCTLogging: ++ { ++ if (count == A0) { ++ if (start == A1) { ++ // exactly backwards!! ++ __ move(AT, A0); ++ __ move(A0, A1); ++ __ move(A1, AT); ++ } else { ++ __ move(A1, count); ++ __ move(A0, start); ++ } ++ } else { ++ __ move(A0, start); ++ __ move(A1, count); ++ } ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2); ++ } ++ break; ++ case BarrierSet::CardTableModRef: ++ case BarrierSet::CardTableExtension: ++ { ++ CardTableModRefBS* ct = (CardTableModRefBS*)bs; ++ assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); ++ ++ Label L_loop; ++ const Register end = count; ++ ++ if (UseConcMarkSweepGC) { ++ __ membar(__ StoreStore); ++ } ++ ++ int64_t disp = (int64_t) ct->byte_map_base; ++ __ li(scratch, disp); ++ ++ __ lea(end, Address(start, count, TIMES_OOP, 0)); // end == start + count * oop_size ++ __ addi_d(end, end, -BytesPerHeapOop); // end - 1 to make inclusive ++ __ shr(start, CardTableModRefBS::card_shift); ++ __ shr(end, CardTableModRefBS::card_shift); ++ __ sub_d(end, end, start); // end --> cards count ++ ++ __ add_d(start, start, scratch); ++ ++ __ bind(L_loop); ++ __ stx_b(R0, start, count); ++ __ addi_d(count, count, -1); ++ __ bge(count, R0, L_loop); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ ++ // disjoint large copy ++ void generate_disjoint_large_copy(Label &entry, const char *name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ ++ Label loop, le32, le16, le8, lt8; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ ld_d(A6, A0, 0); ++ __ ld_d(A7, A2, -8); ++ ++ __ andi(T1, A0, 7); ++ __ sub_d(T0, R0, T1); ++ __ addi_d(T0, T0, 8); ++ ++ __ add_d(A0, A0, T0); ++ __ add_d(A5, A1, T0); ++ ++ __ addi_d(A4, A2, -64); ++ __ bgeu(A0, A4, le32); ++ ++ __ bind(loop); ++ __ ld_d(T0, A0, 0); ++ __ ld_d(T1, A0, 8); ++ __ ld_d(T2, A0, 16); ++ __ ld_d(T3, A0, 24); ++ __ ld_d(T4, A0, 32); ++ __ ld_d(T5, A0, 40); ++ __ ld_d(T6, A0, 48); ++ __ ld_d(T7, A0, 56); ++ __ addi_d(A0, A0, 64); ++ __ st_d(T0, A5, 0); ++ __ st_d(T1, A5, 8); ++ __ st_d(T2, A5, 16); ++ __ st_d(T3, A5, 24); ++ __ st_d(T4, A5, 32); ++ __ st_d(T5, A5, 40); ++ __ st_d(T6, A5, 48); ++ __ st_d(T7, A5, 56); ++ __ addi_d(A5, A5, 64); ++ __ bltu(A0, A4, loop); ++ ++ __ bind(le32); ++ __ addi_d(A4, A2, -32); ++ __ bgeu(A0, A4, le16); ++ __ ld_d(T0, A0, 0); ++ __ ld_d(T1, A0, 8); ++ __ ld_d(T2, A0, 16); ++ __ ld_d(T3, A0, 24); ++ __ addi_d(A0, A0, 32); ++ __ st_d(T0, A5, 0); ++ __ st_d(T1, A5, 8); ++ __ st_d(T2, A5, 16); ++ __ st_d(T3, A5, 24); ++ __ addi_d(A5, A5, 32); ++ ++ __ bind(le16); ++ __ addi_d(A4, A2, -16); ++ __ bgeu(A0, A4, le8); ++ __ ld_d(T0, A0, 0); ++ __ ld_d(T1, A0, 8); ++ __ addi_d(A0, A0, 16); ++ __ st_d(T0, A5, 0); ++ __ st_d(T1, A5, 8); ++ __ addi_d(A5, A5, 16); ++ ++ __ bind(le8); ++ __ addi_d(A4, A2, -8); ++ __ bgeu(A0, A4, lt8); ++ __ ld_d(T0, A0, 0); ++ __ st_d(T0, A5, 0); ++ ++ __ bind(lt8); ++ __ st_d(A6, A1, 0); ++ __ st_d(A7, A3, -8); ++ __ jr(RA); ++ } ++ ++ // conjoint large copy ++ void generate_conjoint_large_copy(Label &entry, const char *name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ ++ Label loop, le32, le16, le8, lt8; ++ ++ __ bind(entry); ++ __ add_d(A3, A1, A2); ++ __ add_d(A2, A0, A2); ++ __ ld_d(A6, A0, 0); ++ __ ld_d(A7, A2, -8); ++ ++ __ andi(T1, A0, 7); ++ __ sub_d(A2, A2, T1); ++ __ sub_d(A5, A3, T1); ++ ++ __ addi_d(A4, A0, 64); ++ __ bgeu(A4, A2, le32); ++ ++ __ bind(loop); ++ __ ld_d(T0, A2, -8); ++ __ ld_d(T1, A2, -16); ++ __ ld_d(T2, A2, -24); ++ __ ld_d(T3, A2, -32); ++ __ ld_d(T4, A2, -40); ++ __ ld_d(T5, A2, -48); ++ __ ld_d(T6, A2, -56); ++ __ ld_d(T7, A2, -64); ++ __ addi_d(A2, A2, -64); ++ __ st_d(T0, A5, -8); ++ __ st_d(T1, A5, -16); ++ __ st_d(T2, A5, -24); ++ __ st_d(T3, A5, -32); ++ __ st_d(T4, A5, -40); ++ __ st_d(T5, A5, -48); ++ __ st_d(T6, A5, -56); ++ __ st_d(T7, A5, -64); ++ __ addi_d(A5, A5, -64); ++ __ bltu(A4, A2, loop); ++ ++ __ bind(le32); ++ __ addi_d(A4, A0, 32); ++ __ bgeu(A4, A2, le16); ++ __ ld_d(T0, A2, -8); ++ __ ld_d(T1, A2, -16); ++ __ ld_d(T2, A2, -24); ++ __ ld_d(T3, A2, -32); ++ __ addi_d(A2, A2, -32); ++ __ st_d(T0, A5, -8); ++ __ st_d(T1, A5, -16); ++ __ st_d(T2, A5, -24); ++ __ st_d(T3, A5, -32); ++ __ addi_d(A5, A5, -32); ++ ++ __ bind(le16); ++ __ addi_d(A4, A0, 16); ++ __ bgeu(A4, A2, le8); ++ __ ld_d(T0, A2, -8); ++ __ ld_d(T1, A2, -16); ++ __ addi_d(A2, A2, -16); ++ __ st_d(T0, A5, -8); ++ __ st_d(T1, A5, -16); ++ __ addi_d(A5, A5, -16); ++ ++ __ bind(le8); ++ __ addi_d(A4, A0, 8); ++ __ bgeu(A4, A2, lt8); ++ __ ld_d(T0, A2, -8); ++ __ st_d(T0, A5, -8); ++ ++ __ bind(lt8); ++ __ st_d(A6, A1, 0); ++ __ st_d(A7, A3, -8); ++ __ jr(RA); ++ } ++ ++ // Byte small copy: less than 9 elements. ++ void generate_byte_small_copy(Label &entry, const char *name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ ++ Label L; ++ __ bind(entry); ++ __ lipc(AT, L); ++ __ slli_d(A2, A2, 5); ++ __ add_d(AT, AT, A2); ++ __ jr(AT); ++ ++ __ bind(L); ++ // 0: ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ ld_b(AT, A0, 0); ++ __ st_b(AT, A1, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ ld_h(AT, A0, 0); ++ __ st_h(AT, A1, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ ld_h(AT, A0, 0); ++ __ ld_b(A2, A0, 2); ++ __ st_h(AT, A1, 0); ++ __ st_b(A2, A1, 2); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 4: ++ __ ld_w(AT, A0, 0); ++ __ st_w(AT, A1, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ __ ld_w(AT, A0, 0); ++ __ ld_b(A2, A0, 4); ++ __ st_w(AT, A1, 0); ++ __ st_b(A2, A1, 4); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 6: ++ __ ld_w(AT, A0, 0); ++ __ ld_h(A2, A0, 4); ++ __ st_w(AT, A1, 0); ++ __ st_h(A2, A1, 4); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 7: ++ __ ld_w(AT, A0, 0); ++ __ ld_w(A2, A0, 3); ++ __ st_w(AT, A1, 0); ++ __ st_w(A2, A1, 3); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 8: ++ __ ld_d(AT, A0, 0); ++ __ st_d(AT, A1, 0); ++ __ jr(RA); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_byte_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_byte_copy(). ++ // ++ address generate_disjoint_byte_copy(bool aligned, Label &small, Label &large, ++ const char * name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ __ sltui(T0, A2, 9); ++ __ bnez(T0, small); ++ ++ __ b(large); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_byte_copy(bool aligned, Label &small, Label &large, ++ const char *name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ array_overlap_test(StubRoutines::jbyte_disjoint_arraycopy(), 0); ++ ++ __ sltui(T0, A2, 9); ++ __ bnez(T0, small); ++ ++ __ b(large); ++ ++ return start; ++ } ++ ++ // Short small copy: less than 9 elements. ++ void generate_short_small_copy(Label &entry, const char *name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ ++ Label L; ++ __ bind(entry); ++ __ lipc(AT, L); ++ __ slli_d(A2, A2, 5); ++ __ add_d(AT, AT, A2); ++ __ jr(AT); ++ ++ __ bind(L); ++ // 0: ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ ld_h(AT, A0, 0); ++ __ st_h(AT, A1, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ ld_w(AT, A0, 0); ++ __ st_w(AT, A1, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ ld_w(AT, A0, 0); ++ __ ld_h(A2, A0, 4); ++ __ st_w(AT, A1, 0); ++ __ st_h(A2, A1, 4); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 4: ++ __ ld_d(AT, A0, 0); ++ __ st_d(AT, A1, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ __ ld_d(AT, A0, 0); ++ __ ld_h(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_h(A2, A1, 8); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 6: ++ __ ld_d(AT, A0, 0); ++ __ ld_w(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_w(A2, A1, 8); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 7: ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 6); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 6); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 8: ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ jr(RA); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_short_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_short_copy(). ++ // ++ address generate_disjoint_short_copy(bool aligned, Label &small, Label &large, ++ const char * name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ __ sltui(T0, A2, 9); ++ __ bnez(T0, small); ++ ++ __ slli_d(A2, A2, 1); ++ __ b(large); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we ++ // let the hardware handle it. The two or four words within dwords ++ // or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_short_copy(bool aligned, Label &small, Label &large, ++ const char *name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ array_overlap_test(StubRoutines::jshort_disjoint_arraycopy(), 1); ++ ++ __ sltui(T0, A2, 9); ++ __ bnez(T0, small); ++ ++ __ slli_d(A2, A2, 1); ++ __ b(large); ++ ++ return start; ++ } ++ ++ // Short small copy: less than 7 elements. ++ void generate_int_small_copy(Label &entry, const char *name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ ++ Label L; ++ __ bind(entry); ++ __ lipc(AT, L); ++ __ slli_d(A2, A2, 5); ++ __ add_d(AT, AT, A2); ++ __ jr(AT); ++ ++ __ bind(L); ++ // 0: ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ ld_w(AT, A0, 0); ++ __ st_w(AT, A1, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ ld_d(AT, A0, 0); ++ __ st_d(AT, A1, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ ld_d(AT, A0, 0); ++ __ ld_w(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_w(A2, A1, 8); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 4: ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 5: ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ ld_w(A3, A0, 16); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ st_w(A3, A1, 16); ++ __ jr(RA); ++ __ nop(); ++ ++ // 6: ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ ld_d(A3, A0, 16); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ st_d(A3, A1, 16); ++ __ jr(RA); ++ } ++ ++ // Generate maybe oop copy ++ void gen_maybe_oop_copy(bool is_oop, Label &small, Label &large, ++ const char *name, int small_limit, int log2_elem_size, ++ bool dest_uninitialized = false) { ++ Label post, _large; ++ ++ if (is_oop) { ++ __ addi_d(SP, SP, -4 * wordSize); ++ __ st_d(A2, SP, 3 * wordSize); ++ __ st_d(A1, SP, 2 * wordSize); ++ __ st_d(A0, SP, 1 * wordSize); ++ __ st_d(RA, SP, 0 * wordSize); ++ ++ gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized); ++ ++ __ ld_d(A2, SP, 3 * wordSize); ++ __ ld_d(A1, SP, 2 * wordSize); ++ __ ld_d(A0, SP, 1 * wordSize); ++ } ++ ++ __ sltui(T0, A2, small_limit); ++ if (is_oop) { ++ __ beqz(T0, _large); ++ __ bl(small); ++ __ b(post); ++ } else { ++ __ bnez(T0, small); ++ } ++ ++ __ bind(_large); ++ __ slli_d(A2, A2, log2_elem_size); ++ ++ if (is_oop) { ++ __ bl(large); ++ } else { ++ __ b(large); ++ } ++ ++ if (is_oop) { ++ __ bind(post); ++ __ ld_d(A2, SP, 3 * wordSize); ++ __ ld_d(A1, SP, 2 * wordSize); ++ ++ gen_write_ref_array_post_barrier(A1, A2, T1); ++ ++ __ ld_d(RA, SP, 0 * wordSize); ++ __ addi_d(SP, SP, 4 * wordSize); ++ __ jr(RA); ++ } ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, Label &small, ++ Label &large, const char *name, ++ bool dest_uninitialized = false) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ gen_maybe_oop_copy(is_oop, small, large, name, 7, 2, dest_uninitialized); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, ++ Label &small, Label &large, const char *name, ++ bool dest_uninitialized = false) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ if (is_oop) { ++ array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 2); ++ } else { ++ array_overlap_test(StubRoutines::jint_disjoint_arraycopy(), 2); ++ } ++ ++ gen_maybe_oop_copy(is_oop, small, large, name, 7, 2, dest_uninitialized); ++ ++ return start; ++ } ++ ++ // Long small copy: less than 4 elements. ++ void generate_long_small_copy(Label &entry, const char *name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ ++ Label L; ++ __ bind(entry); ++ __ lipc(AT, L); ++ __ slli_d(A2, A2, 5); ++ __ add_d(AT, AT, A2); ++ __ jr(AT); ++ ++ __ bind(L); ++ // 0: ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 1: ++ __ ld_d(AT, A0, 0); ++ __ st_d(AT, A1, 0); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 2: ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ jr(RA); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ ++ // 3: ++ __ ld_d(AT, A0, 0); ++ __ ld_d(A2, A0, 8); ++ __ ld_d(A3, A0, 16); ++ __ st_d(AT, A1, 0); ++ __ st_d(A2, A1, 8); ++ __ st_d(A3, A1, 16); ++ __ jr(RA); ++ __ nop(); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, Label &small, ++ Label &large, const char *name, ++ bool dest_uninitialized = false) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ gen_maybe_oop_copy(is_oop, small, large, name, 4, 3, dest_uninitialized); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, Label &small, ++ Label &large, const char *name, ++ bool dest_uninitialized = false) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ if (is_oop) { ++ array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 3); ++ } else { ++ array_overlap_test(StubRoutines::jlong_disjoint_arraycopy(), 3); ++ } ++ ++ gen_maybe_oop_copy(is_oop, small, large, name, 4, 3, dest_uninitialized); ++ ++ return start; ++ } ++ ++ void generate_arraycopy_stubs() { ++ Label disjoint_large_copy, conjoint_large_copy; ++ Label byte_small_copy, short_small_copy, int_small_copy, long_small_copy; ++ ++ generate_disjoint_large_copy(disjoint_large_copy, "disjoint_large_copy"); ++ generate_conjoint_large_copy(conjoint_large_copy, "conjoint_large_copy"); ++ generate_byte_small_copy(byte_small_copy, "jbyte_small_copy"); ++ generate_short_small_copy(short_small_copy, "jshort_small_copy"); ++ generate_int_small_copy(int_small_copy, "jint_small_copy"); ++ generate_long_small_copy(long_small_copy, "jlong_small_copy"); ++ ++ if (UseCompressedOops) { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, "oop_disjoint_arraycopy"); ++ StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, "oop_arraycopy"); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, "oop_disjoint_arraycopy_uninit", true); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, "oop_arraycopy_uninit", true); ++ } else { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, "oop_disjoint_arraycopy"); ++ StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, "oop_arraycopy"); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, "oop_disjoint_arraycopy_uninit", true); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, "oop_arraycopy_uninit", true); ++ } ++ ++ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy, "jbyte_disjoint_arraycopy"); ++ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy, "jshort_disjoint_arraycopy"); ++ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy, "jint_disjoint_arraycopy"); ++ StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, "jlong_disjoint_arraycopy", false); ++ ++ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy, "jbyte_arraycopy"); ++ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy, "jshort_arraycopy"); ++ StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy, "jint_arraycopy"); ++ StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, "jlong_arraycopy", false); ++ ++ // We don't generate specialized code for HeapWord-aligned source ++ // arrays, so just use the code we've already generated ++ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy; ++ StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy; ++ ++ StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy; ++ StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy; ++ ++ StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; ++ StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; ++ ++ StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; ++ StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; ++ StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; ++ StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; ++ ++ StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); ++ StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); ++ StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); ++ StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); ++ StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); ++ StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); ++ } ++ ++ // Arguments: ++ // ++ // Inputs: ++ // A0 - source byte array address ++ // A1 - destination byte array address ++ // A2 - K (key) in little endian int array ++ // A3 - r vector byte array address ++ // A4 - input length ++ // ++ // Output: ++ // A0 - input length ++ // ++ address generate_aescrypt_encryptBlock(bool cbc) { ++ static const uint32_t ft_consts[256] = { ++ 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, ++ 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, ++ 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, ++ 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, ++ 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, ++ 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, ++ 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, ++ 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, ++ 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, ++ 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, ++ 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, ++ 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, ++ 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, ++ 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, ++ 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, ++ 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, ++ 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, ++ 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, ++ 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, ++ 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, ++ 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, ++ 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, ++ 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, ++ 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, ++ 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, ++ 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, ++ 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, ++ 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, ++ 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, ++ 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, ++ 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, ++ 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, ++ 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, ++ 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, ++ 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, ++ 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, ++ 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, ++ 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, ++ 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, ++ 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, ++ 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, ++ 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, ++ 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, ++ 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, ++ 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, ++ 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, ++ 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, ++ 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, ++ 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, ++ 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, ++ 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, ++ 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, ++ 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, ++ 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, ++ 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, ++ 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, ++ 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, ++ 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, ++ 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, ++ 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, ++ 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, ++ 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, ++ 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, ++ 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a ++ }; ++ static const uint8_t fsb_consts[256] = { ++ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, ++ 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, ++ 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, ++ 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, ++ 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, ++ 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, ++ 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, ++ 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, ++ 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, ++ 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, ++ 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, ++ 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, ++ 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, ++ 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, ++ 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, ++ 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, ++ 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, ++ 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, ++ 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, ++ 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, ++ 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, ++ 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, ++ 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, ++ 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, ++ 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, ++ 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, ++ 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, ++ 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, ++ 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, ++ 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, ++ 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, ++ 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 ++ }; ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); ++ ++ // Allocate registers ++ Register src = A0; ++ Register dst = A1; ++ Register key = A2; ++ Register rve = A3; ++ Register srclen = A4; ++ Register keylen = T8; ++ Register srcend = A5; ++ Register keyold = A6; ++ Register t0 = A7; ++ Register t1, t2, t3, ftp; ++ Register xa[4] = { T0, T1, T2, T3 }; ++ Register ya[4] = { T4, T5, T6, T7 }; ++ ++ Label loop, tail, done; ++ address start = __ pc(); ++ ++ if (cbc) { ++ t1 = S0; ++ t2 = S1; ++ t3 = S2; ++ ftp = S3; ++ ++ __ beqz(srclen, done); ++ ++ __ addi_d(SP, SP, -4 * wordSize); ++ __ st_d(S3, SP, 3 * wordSize); ++ __ st_d(S2, SP, 2 * wordSize); ++ __ st_d(S1, SP, 1 * wordSize); ++ __ st_d(S0, SP, 0 * wordSize); ++ ++ __ add_d(srcend, src, srclen); ++ __ move(keyold, key); ++ } else { ++ t1 = A3; ++ t2 = A4; ++ t3 = A5; ++ ftp = A6; ++ } ++ ++ __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)); ++ ++ // Round 1 ++ if (cbc) { ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xa[i], rve, 4 * i); ++ } ++ ++ __ bind(loop); ++ ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(ya[i], src, 4 * i); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ XOR(xa[i], xa[i], ya[i]); ++ } ++ } else { ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xa[i], src, 4 * i); ++ } ++ } ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(ya[i], key, 4 * i); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ revb_2h(xa[i], xa[i]); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ rotri_w(xa[i], xa[i], 16); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ XOR(xa[i], xa[i], ya[i]); ++ } ++ ++ __ li(ftp, (intptr_t)ft_consts); ++ ++ // Round 2 - (N-1) ++ for (int r = 0; r < 14; r++) { ++ Register *xp; ++ Register *yp; ++ ++ if (r & 1) { ++ xp = xa; ++ yp = ya; ++ } else { ++ xp = ya; ++ yp = xa; ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xp[i], key, 4 * (4 * (r + 1) + i)); ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ bstrpick_d(t0, yp[(i + 3) & 3], 7, 0); ++ __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8); ++ __ bstrpick_d(t2, yp[(i + 1) & 3], 23, 16); ++ __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24); ++ __ slli_w(t0, t0, 2); ++ __ slli_w(t1, t1, 2); ++ __ slli_w(t2, t2, 2); ++ __ slli_w(t3, t3, 2); ++ __ ldx_w(t0, ftp, t0); ++ __ ldx_w(t1, ftp, t1); ++ __ ldx_w(t2, ftp, t2); ++ __ ldx_w(t3, ftp, t3); ++ __ rotri_w(t0, t0, 24); ++ __ rotri_w(t1, t1, 16); ++ __ rotri_w(t2, t2, 8); ++ __ XOR(xp[i], xp[i], t0); ++ __ XOR(t0, t1, t2); ++ __ XOR(xp[i], xp[i], t3); ++ __ XOR(xp[i], xp[i], t0); ++ } ++ ++ if (r == 8) { ++ // AES 128 ++ __ li(t0, 44); ++ __ beq(t0, keylen, tail); ++ } else if (r == 10) { ++ // AES 192 ++ __ li(t0, 52); ++ __ beq(t0, keylen, tail); ++ } ++ } ++ ++ __ bind(tail); ++ __ li(ftp, (intptr_t)fsb_consts); ++ __ alsl_d(key, keylen, key, 2 - 1); ++ ++ // Round N ++ for (int i = 0; i < 4; i++) { ++ __ bstrpick_d(t0, ya[(i + 3) & 3], 7, 0); ++ __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8); ++ __ bstrpick_d(t2, ya[(i + 1) & 3], 23, 16); ++ __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24); ++ __ ldx_bu(t0, ftp, t0); ++ __ ldx_bu(t1, ftp, t1); ++ __ ldx_bu(t2, ftp, t2); ++ __ ldx_bu(t3, ftp, t3); ++ __ ld_w(xa[i], key, 4 * i - 16); ++ __ slli_w(t1, t1, 8); ++ __ slli_w(t2, t2, 16); ++ __ slli_w(t3, t3, 24); ++ __ XOR(xa[i], xa[i], t0); ++ __ XOR(t0, t1, t2); ++ __ XOR(xa[i], xa[i], t3); ++ __ XOR(xa[i], xa[i], t0); ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ revb_2h(xa[i], xa[i]); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ rotri_w(xa[i], xa[i], 16); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ st_w(xa[i], dst, 4 * i); ++ } ++ ++ if (cbc) { ++ __ move(key, keyold); ++ __ addi_d(src, src, 16); ++ __ addi_d(dst, dst, 16); ++ __ blt(src, srcend, loop); ++ ++ for (int i = 0; i < 4; i++) { ++ __ st_w(xa[i], rve, 4 * i); ++ } ++ ++ __ ld_d(S3, SP, 3 * wordSize); ++ __ ld_d(S2, SP, 2 * wordSize); ++ __ ld_d(S1, SP, 1 * wordSize); ++ __ ld_d(S0, SP, 0 * wordSize); ++ __ addi_d(SP, SP, 4 * wordSize); ++ ++ __ bind(done); ++ __ move(A0, srclen); ++ } ++ ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // ++ // Inputs: ++ // A0 - source byte array address ++ // A1 - destination byte array address ++ // A2 - K (key) in little endian int array ++ // A3 - r vector byte array address ++ // A4 - input length ++ // ++ // Output: ++ // A0 - input length ++ // ++ address generate_aescrypt_decryptBlock(bool cbc) { ++ static const uint32_t rt_consts[256] = { ++ 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, ++ 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, ++ 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, ++ 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, ++ 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, ++ 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, ++ 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, ++ 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, ++ 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, ++ 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, ++ 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, ++ 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, ++ 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, ++ 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, ++ 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, ++ 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, ++ 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, ++ 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, ++ 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, ++ 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, ++ 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, ++ 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, ++ 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, ++ 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, ++ 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, ++ 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, ++ 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, ++ 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, ++ 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, ++ 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, ++ 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, ++ 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, ++ 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, ++ 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, ++ 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, ++ 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, ++ 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, ++ 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, ++ 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, ++ 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, ++ 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, ++ 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, ++ 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, ++ 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, ++ 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, ++ 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, ++ 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, ++ 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, ++ 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, ++ 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, ++ 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, ++ 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, ++ 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, ++ 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, ++ 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, ++ 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, ++ 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, ++ 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, ++ 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, ++ 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, ++ 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, ++ 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, ++ 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, ++ 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 ++ }; ++ static const uint8_t rsb_consts[256] = { ++ 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, ++ 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, ++ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, ++ 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, ++ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, ++ 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, ++ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, ++ 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, ++ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, ++ 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, ++ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, ++ 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, ++ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, ++ 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, ++ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, ++ 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, ++ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, ++ 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, ++ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, ++ 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, ++ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, ++ 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, ++ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, ++ 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, ++ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, ++ 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, ++ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, ++ 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, ++ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, ++ 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, ++ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, ++ 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d ++ }; ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); ++ ++ // Allocate registers ++ Register src = A0; ++ Register dst = A1; ++ Register key = A2; ++ Register rve = A3; ++ Register srclen = A4; ++ Register keylen = T8; ++ Register srcend = A5; ++ Register t0 = A6; ++ Register t1 = A7; ++ Register t2, t3, rtp, rvp; ++ Register xa[4] = { T0, T1, T2, T3 }; ++ Register ya[4] = { T4, T5, T6, T7 }; ++ ++ Label loop, tail, done; ++ address start = __ pc(); ++ ++ if (cbc) { ++ t2 = S0; ++ t3 = S1; ++ rtp = S2; ++ rvp = S3; ++ ++ __ beqz(srclen, done); ++ ++ __ addi_d(SP, SP, -4 * wordSize); ++ __ st_d(S3, SP, 3 * wordSize); ++ __ st_d(S2, SP, 2 * wordSize); ++ __ st_d(S1, SP, 1 * wordSize); ++ __ st_d(S0, SP, 0 * wordSize); ++ ++ __ add_d(srcend, src, srclen); ++ __ move(rvp, rve); ++ } else { ++ t2 = A3; ++ t3 = A4; ++ rtp = A5; ++ } ++ ++ __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)); ++ ++ __ bind(loop); ++ ++ // Round 1 ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xa[i], src, 4 * i); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(ya[i], key, 4 * (4 + i)); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ revb_2h(xa[i], xa[i]); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ rotri_w(xa[i], xa[i], 16); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ XOR(xa[i], xa[i], ya[i]); ++ } ++ ++ __ li(rtp, (intptr_t)rt_consts); ++ ++ // Round 2 - (N-1) ++ for (int r = 0; r < 14; r++) { ++ Register *xp; ++ Register *yp; ++ ++ if (r & 1) { ++ xp = xa; ++ yp = ya; ++ } else { ++ xp = ya; ++ yp = xa; ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(xp[i], key, 4 * (4 * (r + 1) + 4 + i)); ++ } ++ ++ for (int i = 0; i < 4; i++) { ++ __ bstrpick_d(t0, yp[(i + 1) & 3], 7, 0); ++ __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8); ++ __ bstrpick_d(t2, yp[(i + 3) & 3], 23, 16); ++ __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24); ++ __ slli_w(t0, t0, 2); ++ __ slli_w(t1, t1, 2); ++ __ slli_w(t2, t2, 2); ++ __ slli_w(t3, t3, 2); ++ __ ldx_w(t0, rtp, t0); ++ __ ldx_w(t1, rtp, t1); ++ __ ldx_w(t2, rtp, t2); ++ __ ldx_w(t3, rtp, t3); ++ __ rotri_w(t0, t0, 24); ++ __ rotri_w(t1, t1, 16); ++ __ rotri_w(t2, t2, 8); ++ __ XOR(xp[i], xp[i], t0); ++ __ XOR(t0, t1, t2); ++ __ XOR(xp[i], xp[i], t3); ++ __ XOR(xp[i], xp[i], t0); ++ } ++ ++ if (r == 8) { ++ // AES 128 ++ __ li(t0, 44); ++ __ beq(t0, keylen, tail); ++ } else if (r == 10) { ++ // AES 192 ++ __ li(t0, 52); ++ __ beq(t0, keylen, tail); ++ } ++ } ++ ++ __ bind(tail); ++ __ li(rtp, (intptr_t)rsb_consts); ++ ++ // Round N ++ for (int i = 0; i < 4; i++) { ++ __ bstrpick_d(t0, ya[(i + 1) & 3], 7, 0); ++ __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8); ++ __ bstrpick_d(t2, ya[(i + 3) & 3], 23, 16); ++ __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24); ++ __ ldx_bu(t0, rtp, t0); ++ __ ldx_bu(t1, rtp, t1); ++ __ ldx_bu(t2, rtp, t2); ++ __ ldx_bu(t3, rtp, t3); ++ __ ld_w(xa[i], key, 4 * i); ++ __ slli_w(t1, t1, 8); ++ __ slli_w(t2, t2, 16); ++ __ slli_w(t3, t3, 24); ++ __ XOR(xa[i], xa[i], t0); ++ __ XOR(t0, t1, t2); ++ __ XOR(xa[i], xa[i], t3); ++ __ XOR(xa[i], xa[i], t0); ++ } ++ ++ if (cbc) { ++ for (int i = 0; i < 4; i++) { ++ __ ld_w(ya[i], rvp, 4 * i); ++ } ++ } ++ for (int i = 0; i < 4; i++) { ++ __ revb_2h(xa[i], xa[i]); ++ } ++ for (int i = 0; i < 4; i++) { ++ __ rotri_w(xa[i], xa[i], 16); ++ } ++ if (cbc) { ++ for (int i = 0; i < 4; i++) { ++ __ XOR(xa[i], xa[i], ya[i]); ++ } ++ } ++ for (int i = 0; i < 4; i++) { ++ __ st_w(xa[i], dst, 4 * i); ++ } ++ ++ if (cbc) { ++ __ move(rvp, src); ++ __ addi_d(src, src, 16); ++ __ addi_d(dst, dst, 16); ++ __ blt(src, srcend, loop); ++ ++ __ ld_d(t0, src, -16); ++ __ ld_d(t1, src, -8); ++ __ st_d(t0, rve, 0); ++ __ st_d(t1, rve, 8); ++ ++ __ ld_d(S3, SP, 3 * wordSize); ++ __ ld_d(S2, SP, 2 * wordSize); ++ __ ld_d(S1, SP, 1 * wordSize); ++ __ ld_d(S0, SP, 0 * wordSize); ++ __ addi_d(SP, SP, 4 * wordSize); ++ ++ __ bind(done); ++ __ move(A0, srclen); ++ } ++ ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // ++ // Inputs: ++ // A0 - byte[] source+offset ++ // A1 - int[] SHA.state ++ // A2 - int offset ++ // A3 - int limit ++ // ++ void generate_sha1_implCompress(const char *name, address &entry, address &entry_mb) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ Label keys, loop; ++ ++ // Keys ++ __ bind(keys); ++ __ emit_int32(0x5a827999); ++ __ emit_int32(0x6ed9eba1); ++ __ emit_int32(0x8f1bbcdc); ++ __ emit_int32(0xca62c1d6); ++ ++ // Allocate registers ++ Register t0 = T5; ++ Register t1 = T6; ++ Register t2 = T7; ++ Register t3 = T8; ++ Register buf = A0; ++ Register state = A1; ++ Register ofs = A2; ++ Register limit = A3; ++ Register ka[4] = { A4, A5, A6, A7 }; ++ Register sa[5] = { T0, T1, T2, T3, T4 }; ++ ++ // Entry ++ entry = __ pc(); ++ __ move(ofs, R0); ++ __ move(limit, R0); ++ ++ // Entry MB ++ entry_mb = __ pc(); ++ ++ // Allocate scratch space ++ __ addi_d(SP, SP, -64); ++ ++ // Load keys ++ __ lipc(t0, keys); ++ __ ld_w(ka[0], t0, 0); ++ __ ld_w(ka[1], t0, 4); ++ __ ld_w(ka[2], t0, 8); ++ __ ld_w(ka[3], t0, 12); ++ ++ __ bind(loop); ++ // Load arguments ++ __ ld_w(sa[0], state, 0); ++ __ ld_w(sa[1], state, 4); ++ __ ld_w(sa[2], state, 8); ++ __ ld_w(sa[3], state, 12); ++ __ ld_w(sa[4], state, 16); ++ ++ // 80 rounds of hashing ++ for (int i = 0; i < 80; i++) { ++ Register a = sa[(5 - (i % 5)) % 5]; ++ Register b = sa[(6 - (i % 5)) % 5]; ++ Register c = sa[(7 - (i % 5)) % 5]; ++ Register d = sa[(8 - (i % 5)) % 5]; ++ Register e = sa[(9 - (i % 5)) % 5]; ++ ++ if (i < 16) { ++ __ ld_w(t0, buf, i * 4); ++ __ revb_2h(t0, t0); ++ __ rotri_w(t0, t0, 16); ++ __ add_w(e, e, t0); ++ __ st_w(t0, SP, i * 4); ++ __ XOR(t0, c, d); ++ __ AND(t0, t0, b); ++ __ XOR(t0, t0, d); ++ } else { ++ __ ld_w(t0, SP, ((i - 3) & 0xF) * 4); ++ __ ld_w(t1, SP, ((i - 8) & 0xF) * 4); ++ __ ld_w(t2, SP, ((i - 14) & 0xF) * 4); ++ __ ld_w(t3, SP, ((i - 16) & 0xF) * 4); ++ __ XOR(t0, t0, t1); ++ __ XOR(t0, t0, t2); ++ __ XOR(t0, t0, t3); ++ __ rotri_w(t0, t0, 31); ++ __ add_w(e, e, t0); ++ __ st_w(t0, SP, (i & 0xF) * 4); ++ ++ if (i < 20) { ++ __ XOR(t0, c, d); ++ __ AND(t0, t0, b); ++ __ XOR(t0, t0, d); ++ } else if (i < 40 || i >= 60) { ++ __ XOR(t0, b, c); ++ __ XOR(t0, t0, d); ++ } else if (i < 60) { ++ __ OR(t0, c, d); ++ __ AND(t0, t0, b); ++ __ AND(t2, c, d); ++ __ OR(t0, t0, t2); ++ } ++ } ++ ++ __ rotri_w(b, b, 2); ++ __ add_w(e, e, t0); ++ __ add_w(e, e, ka[i / 20]); ++ __ rotri_w(t0, a, 27); ++ __ add_w(e, e, t0); ++ } ++ ++ // Save updated state ++ __ ld_w(t0, state, 0); ++ __ ld_w(t1, state, 4); ++ __ ld_w(t2, state, 8); ++ __ ld_w(t3, state, 12); ++ __ add_w(sa[0], sa[0], t0); ++ __ ld_w(t0, state, 16); ++ __ add_w(sa[1], sa[1], t1); ++ __ add_w(sa[2], sa[2], t2); ++ __ add_w(sa[3], sa[3], t3); ++ __ add_w(sa[4], sa[4], t0); ++ __ st_w(sa[0], state, 0); ++ __ st_w(sa[1], state, 4); ++ __ st_w(sa[2], state, 8); ++ __ st_w(sa[3], state, 12); ++ __ st_w(sa[4], state, 16); ++ ++ __ addi_w(ofs, ofs, 64); ++ __ addi_d(buf, buf, 64); ++ __ bge(limit, ofs, loop); ++ __ move(V0, ofs); // return ofs ++ ++ __ addi_d(SP, SP, 64); ++ __ jr(RA); ++ } ++ ++ // Arguments: ++ // ++ // Inputs: ++ // A0 - byte[] source+offset ++ // A1 - int[] SHA.state ++ // A2 - int offset ++ // A3 - int limit ++ // ++ void generate_sha256_implCompress(const char *name, address &entry, address &entry_mb) { ++ static const uint32_t round_consts[64] = { ++ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, ++ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, ++ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, ++ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, ++ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, ++ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, ++ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, ++ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, ++ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, ++ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, ++ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, ++ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, ++ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, ++ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, ++ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, ++ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, ++ }; ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ Label loop; ++ ++ // Allocate registers ++ Register t0 = A4; ++ Register t1 = A5; ++ Register t2 = A6; ++ Register t3 = A7; ++ Register buf = A0; ++ Register state = A1; ++ Register ofs = A2; ++ Register limit = A3; ++ Register kptr = T8; ++ Register sa[8] = { T0, T1, T2, T3, T4, T5, T6, T7 }; ++ ++ // Entry ++ entry = __ pc(); ++ __ move(ofs, R0); ++ __ move(limit, R0); ++ ++ // Entry MB ++ entry_mb = __ pc(); ++ ++ // Allocate scratch space ++ __ addi_d(SP, SP, -64); ++ ++ // Load keys base address ++ __ li(kptr, (intptr_t)round_consts); ++ ++ __ bind(loop); ++ // Load state ++ __ ld_w(sa[0], state, 0); ++ __ ld_w(sa[1], state, 4); ++ __ ld_w(sa[2], state, 8); ++ __ ld_w(sa[3], state, 12); ++ __ ld_w(sa[4], state, 16); ++ __ ld_w(sa[5], state, 20); ++ __ ld_w(sa[6], state, 24); ++ __ ld_w(sa[7], state, 28); ++ ++ // Do 64 rounds of hashing ++ for (int i = 0; i < 64; i++) { ++ Register a = sa[(0 - i) & 7]; ++ Register b = sa[(1 - i) & 7]; ++ Register c = sa[(2 - i) & 7]; ++ Register d = sa[(3 - i) & 7]; ++ Register e = sa[(4 - i) & 7]; ++ Register f = sa[(5 - i) & 7]; ++ Register g = sa[(6 - i) & 7]; ++ Register h = sa[(7 - i) & 7]; ++ ++ if (i < 16) { ++ __ ld_w(t1, buf, i * 4); ++ __ revb_2h(t1, t1); ++ __ rotri_w(t1, t1, 16); ++ } else { ++ __ ld_w(t0, SP, ((i - 15) & 0xF) * 4); ++ __ ld_w(t1, SP, ((i - 16) & 0xF) * 4); ++ __ ld_w(t2, SP, ((i - 7) & 0xF) * 4); ++ __ add_w(t1, t1, t2); ++ __ rotri_w(t2, t0, 18); ++ __ srli_w(t3, t0, 3); ++ __ rotri_w(t0, t0, 7); ++ __ XOR(t2, t2, t3); ++ __ XOR(t0, t0, t2); ++ __ add_w(t1, t1, t0); ++ __ ld_w(t0, SP, ((i - 2) & 0xF) * 4); ++ __ rotri_w(t2, t0, 19); ++ __ srli_w(t3, t0, 10); ++ __ rotri_w(t0, t0, 17); ++ __ XOR(t2, t2, t3); ++ __ XOR(t0, t0, t2); ++ __ add_w(t1, t1, t0); ++ } ++ ++ __ rotri_w(t2, e, 11); ++ __ rotri_w(t3, e, 25); ++ __ rotri_w(t0, e, 6); ++ __ XOR(t2, t2, t3); ++ __ XOR(t0, t0, t2); ++ __ XOR(t2, g, f); ++ __ ld_w(t3, kptr, i * 4); ++ __ AND(t2, t2, e); ++ __ XOR(t2, t2, g); ++ __ add_w(t0, t0, t2); ++ __ add_w(t0, t0, t3); ++ __ add_w(h, h, t1); ++ __ add_w(h, h, t0); ++ __ add_w(d, d, h); ++ __ rotri_w(t2, a, 13); ++ __ rotri_w(t3, a, 22); ++ __ rotri_w(t0, a, 2); ++ __ XOR(t2, t2, t3); ++ __ XOR(t0, t0, t2); ++ __ add_w(h, h, t0); ++ __ OR(t0, c, b); ++ __ AND(t2, c, b); ++ __ AND(t0, t0, a); ++ __ OR(t0, t0, t2); ++ __ add_w(h, h, t0); ++ __ st_w(t1, SP, (i & 0xF) * 4); ++ } ++ ++ // Add to state ++ __ ld_w(t0, state, 0); ++ __ ld_w(t1, state, 4); ++ __ ld_w(t2, state, 8); ++ __ ld_w(t3, state, 12); ++ __ add_w(sa[0], sa[0], t0); ++ __ add_w(sa[1], sa[1], t1); ++ __ add_w(sa[2], sa[2], t2); ++ __ add_w(sa[3], sa[3], t3); ++ __ ld_w(t0, state, 16); ++ __ ld_w(t1, state, 20); ++ __ ld_w(t2, state, 24); ++ __ ld_w(t3, state, 28); ++ __ add_w(sa[4], sa[4], t0); ++ __ add_w(sa[5], sa[5], t1); ++ __ add_w(sa[6], sa[6], t2); ++ __ add_w(sa[7], sa[7], t3); ++ __ st_w(sa[0], state, 0); ++ __ st_w(sa[1], state, 4); ++ __ st_w(sa[2], state, 8); ++ __ st_w(sa[3], state, 12); ++ __ st_w(sa[4], state, 16); ++ __ st_w(sa[5], state, 20); ++ __ st_w(sa[6], state, 24); ++ __ st_w(sa[7], state, 28); ++ ++ __ addi_w(ofs, ofs, 64); ++ __ addi_d(buf, buf, 64); ++ __ bge(limit, ofs, loop); ++ __ move(V0, ofs); // return ofs ++ ++ __ addi_d(SP, SP, 64); ++ __ jr(RA); ++ } ++ ++ // Do NOT delete this node which stands for stub routine placeholder ++ address generate_updateBytesCRC32() { ++ assert(UseCRC32Intrinsics, "need CRC32 instructions support"); ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); ++ ++ address start = __ pc(); ++ ++ const Register crc = A0; // crc ++ const Register buf = A1; // source java byte array address ++ const Register len = A2; // length ++ const Register tmp = A3; ++ ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ kernel_crc32(crc, buf, len, tmp); ++ ++ __ leave(); // required for proper stackwalking of RuntimeStub frame ++ __ jr(RA); ++ ++ return start; ++ } ++ ++ // add a function to implement SafeFetch32 and SafeFetchN ++ void generate_safefetch(const char* name, int size, address* entry, ++ address* fault_pc, address* continuation_pc) { ++ // safefetch signatures: ++ // int SafeFetch32(int* adr, int errValue); ++ // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); ++ // ++ // arguments: ++ // A0 = adr ++ // A1 = errValue ++ // ++ // result: ++ // PPC_RET = *adr or errValue ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ // Entry point, pc or function descriptor. ++ *entry = __ pc(); ++ ++ // Load *adr into A1, may fault. ++ *fault_pc = __ pc(); ++ switch (size) { ++ case 4: ++ // int32_t ++ __ ld_w(A1, A0, 0); ++ break; ++ case 8: ++ // int64_t ++ __ ld_d(A1, A0, 0); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ // return errValue or *adr ++ *continuation_pc = __ pc(); ++ __ add_d(V0, A1, R0); ++ __ jr(RA); ++ } ++ ++ ++#undef __ ++#define __ masm-> ++ ++ // Continuation point for throwing of implicit exceptions that are ++ // not handled in the current activation. Fabricates an exception ++ // oop and initiates normal exception dispatching in this ++ // frame. Since we need to preserve callee-saved values (currently ++ // only for C2, but done for C1 as well) we need a callee-saved oop ++ // map and therefore have to make these stubs into RuntimeStubs ++ // rather than BufferBlobs. If the compiler needs all registers to ++ // be preserved between the fault point and the exception handler ++ // then it must assume responsibility for that in ++ // AbstractCompiler::continuation_for_implicit_null_exception or ++ // continuation_for_implicit_division_by_zero_exception. All other ++ // implicit exceptions (e.g., NullPointerException or ++ // AbstractMethodError on entry) are either at call sites or ++ // otherwise assume that stack unwinding will be initiated, so ++ // caller saved registers were assumed volatile in the compiler. ++ address generate_throw_exception(const char* name, ++ address runtime_entry, ++ bool restore_saved_exception_pc) { ++ // Information about frame layout at time of blocking runtime call. ++ // Note that we only have to preserve callee-saved registers since ++ // the compilers are responsible for supplying a continuation point ++ // if they expect all registers to be preserved. ++ enum layout { ++ thread_off, // last_java_sp ++ S7_off, // callee saved register sp + 1 ++ S6_off, // callee saved register sp + 2 ++ S5_off, // callee saved register sp + 3 ++ S4_off, // callee saved register sp + 4 ++ S3_off, // callee saved register sp + 5 ++ S2_off, // callee saved register sp + 6 ++ S1_off, // callee saved register sp + 7 ++ S0_off, // callee saved register sp + 8 ++ FP_off, ++ ret_address, ++ framesize ++ }; ++ ++ int insts_size = 2048; ++ int locs_size = 32; ++ ++ // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false, ++ // NULL, NULL, NULL, false, NULL, name, false); ++ CodeBuffer code (name , insts_size, locs_size); ++ OopMapSet* oop_maps = new OopMapSet(); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ ++ address start = __ pc(); ++ ++ // This is an inlined and slightly modified version of call_VM ++ // which has the ability to fetch the return PC out of ++ // thread-local storage and also sets up last_Java_sp slightly ++ // differently than the real call_VM ++#ifndef OPT_THREAD ++ Register java_thread = TREG; ++ __ get_thread(java_thread); ++#else ++ Register java_thread = TREG; ++#endif ++ if (restore_saved_exception_pc) { ++ __ ld_d(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); ++ } ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ addi_d(SP, SP, (-1) * (framesize-2) * wordSize); // prolog ++ __ st_d(S0, SP, S0_off * wordSize); ++ __ st_d(S1, SP, S1_off * wordSize); ++ __ st_d(S2, SP, S2_off * wordSize); ++ __ st_d(S3, SP, S3_off * wordSize); ++ __ st_d(S4, SP, S4_off * wordSize); ++ __ st_d(S5, SP, S5_off * wordSize); ++ __ st_d(S6, SP, S6_off * wordSize); ++ __ st_d(S7, SP, S7_off * wordSize); ++ ++ int frame_complete = __ pc() - start; ++ // push java thread (becomes first argument of C function) ++ __ st_d(java_thread, SP, thread_off * wordSize); ++ if (java_thread != A0) ++ __ move(A0, java_thread); ++ ++ // Set up last_Java_sp and last_Java_fp ++ Label before_call; ++ address the_pc = __ pc(); ++ __ bind(before_call); ++ __ set_last_Java_frame(java_thread, SP, FP, before_call); ++ // Align stack ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ ++ // Call runtime ++ // TODO: confirm reloc ++ __ call(runtime_entry, relocInfo::runtime_call_type); ++ // Generate oop map ++ OopMap* map = new OopMap(framesize, 0); ++ oop_maps->add_gc_map(the_pc - start, map); ++ ++ // restore the thread (cannot use the pushed argument since arguments ++ // may be overwritten by C code generated by an optimizing compiler); ++ // however can use the register value directly if it is callee saved. ++#ifndef OPT_THREAD ++ __ get_thread(java_thread); ++#endif ++ ++ __ ld_d(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ reset_last_Java_frame(java_thread, true); ++ ++ // Restore callee save registers. This must be done after resetting the Java frame ++ __ ld_d(S0, SP, S0_off * wordSize); ++ __ ld_d(S1, SP, S1_off * wordSize); ++ __ ld_d(S2, SP, S2_off * wordSize); ++ __ ld_d(S3, SP, S3_off * wordSize); ++ __ ld_d(S4, SP, S4_off * wordSize); ++ __ ld_d(S5, SP, S5_off * wordSize); ++ __ ld_d(S6, SP, S6_off * wordSize); ++ __ ld_d(S7, SP, S7_off * wordSize); ++ ++ // discard arguments ++ __ move(SP, FP); // epilog ++ __ pop(FP); ++ // check for pending exceptions ++#ifdef ASSERT ++ Label L; ++ __ ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, L); ++ __ should_not_reach_here(); ++ __ bind(L); ++#endif //ASSERT ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ ++ RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, ++ &code, ++ frame_complete, ++ framesize, ++ oop_maps, false); ++ return stub->entry_point(); ++ } ++ ++ class MontgomeryMultiplyGenerator : public MacroAssembler { ++ ++ Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Rlen2, Ra, Rb, Rm, ++ Rn, Iam, Ibn, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj; ++ ++ bool _squaring; ++ ++ public: ++ MontgomeryMultiplyGenerator (Assembler *as, bool squaring) ++ : MacroAssembler(as->code()), _squaring(squaring) { ++ ++ // Register allocation ++ ++ Register reg = A0; ++ Pa_base = reg; // Argument registers: ++ if (squaring) ++ Pb_base = Pa_base; ++ else ++ Pb_base = ++reg; ++ Pn_base = ++reg; ++ Rlen = ++reg; ++ inv = ++reg; ++ Rlen2 = inv; // Reuse inv ++ Pm_base = ++reg; ++ ++ // Working registers: ++ Ra = ++reg; // The current digit of a, b, n, and m. ++ Rb = ++reg; ++ Rm = ++reg; ++ Rn = ++reg; ++ ++ Iam = ++reg; // Index to the current/next digit of a, b, n, and m. ++ Ibn = ++reg; ++ ++ t0 = ++reg; // Three registers which form a ++ t1 = ++reg; // triple-precision accumuator. ++ t2 = ++reg; ++ ++ Ri = ++reg; // Inner and outer loop indexes. ++ Rj = ++reg; ++ ++ if (squaring) { ++ Rhi_ab = ++reg; // Product registers: low and high parts ++ reg = S0; ++ Rlo_ab = ++reg; // of a*b and m*n. ++ } else { ++ reg = S0; ++ Rhi_ab = reg; // Product registers: low and high parts ++ Rlo_ab = ++reg; // of a*b and m*n. ++ } ++ ++ Rhi_mn = ++reg; ++ Rlo_mn = ++reg; ++ } ++ ++ private: ++ void enter() { ++ addi_d(SP, SP, -6 * wordSize); ++ st_d(FP, SP, 0 * wordSize); ++ move(FP, SP); ++ } ++ ++ void leave() { ++ addi_d(T0, FP, 6 * wordSize); ++ ld_d(FP, FP, 0 * wordSize); ++ move(SP, T0); ++ } ++ ++ void save_regs() { ++ if (!_squaring) ++ st_d(Rhi_ab, FP, 5 * wordSize); ++ st_d(Rlo_ab, FP, 4 * wordSize); ++ st_d(Rhi_mn, FP, 3 * wordSize); ++ st_d(Rlo_mn, FP, 2 * wordSize); ++ st_d(Pm_base, FP, 1 * wordSize); ++ } ++ ++ void restore_regs() { ++ if (!_squaring) ++ ld_d(Rhi_ab, FP, 5 * wordSize); ++ ld_d(Rlo_ab, FP, 4 * wordSize); ++ ld_d(Rhi_mn, FP, 3 * wordSize); ++ ld_d(Rlo_mn, FP, 2 * wordSize); ++ ld_d(Pm_base, FP, 1 * wordSize); ++ } ++ ++ template ++ void unroll_2(Register count, T block, Register tmp) { ++ Label loop, end, odd; ++ andi(tmp, count, 1); ++ bnez(tmp, odd); ++ beqz(count, end); ++ align(16); ++ bind(loop); ++ (this->*block)(); ++ bind(odd); ++ (this->*block)(); ++ addi_w(count, count, -2); ++ blt(R0, count, loop); ++ bind(end); ++ } ++ ++ template ++ void unroll_2(Register count, T block, Register d, Register s, Register tmp) { ++ Label loop, end, odd; ++ andi(tmp, count, 1); ++ bnez(tmp, odd); ++ beqz(count, end); ++ align(16); ++ bind(loop); ++ (this->*block)(d, s, tmp); ++ bind(odd); ++ (this->*block)(d, s, tmp); ++ addi_w(count, count, -2); ++ blt(R0, count, loop); ++ bind(end); ++ } ++ ++ void acc(Register Rhi, Register Rlo, ++ Register t0, Register t1, Register t2, Register t, Register c) { ++ add_d(t0, t0, Rlo); ++ OR(t, t1, Rhi); ++ sltu(c, t0, Rlo); ++ add_d(t1, t1, Rhi); ++ add_d(t1, t1, c); ++ sltu(c, t1, t); ++ add_d(t2, t2, c); ++ } ++ ++ void pre1(Register i) { ++ block_comment("pre1"); ++ // Iam = 0; ++ // Ibn = i; ++ ++ slli_w(Ibn, i, LogBytesPerWord); ++ ++ // Ra = Pa_base[Iam]; ++ // Rb = Pb_base[Ibn]; ++ // Rm = Pm_base[Iam]; ++ // Rn = Pn_base[Ibn]; ++ ++ ld_d(Ra, Pa_base, 0); ++ ldx_d(Rb, Pb_base, Ibn); ++ ld_d(Rm, Pm_base, 0); ++ ldx_d(Rn, Pn_base, Ibn); ++ ++ move(Iam, R0); ++ ++ // Zero the m*n result. ++ move(Rhi_mn, R0); ++ move(Rlo_mn, R0); ++ } ++ ++ // The core multiply-accumulate step of a Montgomery ++ // multiplication. The idea is to schedule operations as a ++ // pipeline so that instructions with long latencies (loads and ++ // multiplies) have time to complete before their results are ++ // used. This most benefits in-order implementations of the ++ // architecture but out-of-order ones also benefit. ++ void step() { ++ block_comment("step"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ addi_d(Iam, Iam, wordSize); ++ addi_d(Ibn, Ibn, -wordSize); ++ mul_d(Rlo_ab, Ra, Rb); ++ mulh_du(Rhi_ab, Ra, Rb); ++ acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n from the ++ // previous iteration. ++ ldx_d(Ra, Pa_base, Iam); ++ ldx_d(Rb, Pb_base, Ibn); ++ ++ // MACC(Rm, Rn, t0, t1, t2); ++ // Rm = Pm_base[Iam]; ++ // Rn = Pn_base[Ibn]; ++ mul_d(Rlo_mn, Rm, Rn); ++ mulh_du(Rhi_mn, Rm, Rn); ++ acc(Rhi_ab, Rlo_ab, t0, t1, t2, Rm, Rn); ++ ldx_d(Rm, Pm_base, Iam); ++ ldx_d(Rn, Pn_base, Ibn); ++ } ++ ++ void post1() { ++ block_comment("post1"); ++ ++ // MACC(Ra, Rb, t0, t1, t2); ++ mul_d(Rlo_ab, Ra, Rb); ++ mulh_du(Rhi_ab, Ra, Rb); ++ acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n ++ acc(Rhi_ab, Rlo_ab, t0, t1, t2, Ra, Rb); ++ ++ // Pm_base[Iam] = Rm = t0 * inv; ++ mul_d(Rm, t0, inv); ++ stx_d(Rm, Pm_base, Iam); ++ ++ // MACC(Rm, Rn, t0, t1, t2); ++ // t0 = t1; t1 = t2; t2 = 0; ++ mulh_du(Rhi_mn, Rm, Rn); ++ ++#ifndef PRODUCT ++ // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply"); ++ { ++ mul_d(Rlo_mn, Rm, Rn); ++ add_d(Rlo_mn, t0, Rlo_mn); ++ Label ok; ++ beqz(Rlo_mn, ok); { ++ stop("broken Montgomery multiply"); ++ } bind(ok); ++ } ++#endif ++ ++ // We have very carefully set things up so that ++ // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate ++ // the lower half of Rm * Rn because we know the result already: ++ // it must be -t0. t0 + (-t0) must generate a carry iff ++ // t0 != 0. So, rather than do a mul and an adds we just set ++ // the carry flag iff t0 is nonzero. ++ // ++ // mul_d(Rlo_mn, Rm, Rn); ++ // add_d(t0, t0, Rlo_mn); ++ OR(Ra, t1, Rhi_mn); ++ sltu(Rb, R0, t0); ++ add_d(t0, t1, Rhi_mn); ++ add_d(t0, t0, Rb); ++ sltu(Rb, t0, Ra); ++ add_d(t1, t2, Rb); ++ move(t2, R0); ++ } ++ ++ void pre2(Register i, Register len) { ++ block_comment("pre2"); ++ ++ // Rj == i-len ++ sub_w(Rj, i, len); ++ ++ // Iam = i - len; ++ // Ibn = len; ++ slli_w(Iam, Rj, LogBytesPerWord); ++ slli_w(Ibn, len, LogBytesPerWord); ++ ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ addi_d(Iam, Iam, wordSize); ++ addi_d(Ibn, Ibn, -wordSize); ++ ++ ldx_d(Ra, Pa_base, Iam); ++ ldx_d(Rb, Pb_base, Ibn); ++ ldx_d(Rm, Pm_base, Iam); ++ ldx_d(Rn, Pn_base, Ibn); ++ ++ move(Rhi_mn, R0); ++ move(Rlo_mn, R0); ++ } ++ ++ void post2(Register i, Register len) { ++ block_comment("post2"); ++ ++ sub_w(Rj, i, len); ++ slli_w(Iam, Rj, LogBytesPerWord); ++ ++ add_d(t0, t0, Rlo_mn); // The pending m*n, low part ++ ++ // As soon as we know the least significant digit of our result, ++ // store it. ++ // Pm_base[i-len] = t0; ++ stx_d(t0, Pm_base, Iam); ++ ++ // t0 = t1; t1 = t2; t2 = 0; ++ OR(Ra, t1, Rhi_mn); ++ sltu(Rb, t0, Rlo_mn); ++ add_d(t0, t1, Rhi_mn); // The pending m*n, high part ++ add_d(t0, t0, Rb); ++ sltu(Rb, t0, Ra); ++ add_d(t1, t2, Rb); ++ move(t2, R0); ++ } ++ ++ // A carry in t0 after Montgomery multiplication means that we ++ // should subtract multiples of n from our result in m. We'll ++ // keep doing that until there is no carry. ++ void normalize(Register len) { ++ block_comment("normalize"); ++ // while (t0) ++ // t0 = sub(Pm_base, Pn_base, t0, len); ++ Label loop, post, again; ++ Register cnt = t1, i = t2, b = Ra, t = Rb; // Re-use registers; we're done with them now ++ beqz(t0, post); { ++ bind(again); { ++ move(i, R0); ++ move(b, R0); ++ slli_w(cnt, len, LogBytesPerWord); ++ align(16); ++ bind(loop); { ++ ldx_d(Rm, Pm_base, i); ++ ldx_d(Rn, Pn_base, i); ++ sltu(t, Rm, b); ++ sub_d(Rm, Rm, b); ++ sltu(b, Rm, Rn); ++ sub_d(Rm, Rm, Rn); ++ OR(b, b, t); ++ stx_d(Rm, Pm_base, i); ++ addi_w(i, i, BytesPerWord); ++ } blt(i, cnt, loop); ++ sub_d(t0, t0, b); ++ } bnez(t0, again); ++ } bind(post); ++ } ++ ++ // Move memory at s to d, reversing words. ++ // Increments d to end of copied memory ++ // Destroys tmp1, tmp2, tmp3 ++ // Preserves len ++ // Leaves s pointing to the address which was in d at start ++ void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) { ++ assert(tmp1 < S0 && tmp2 < S0, "register corruption"); ++ ++ alsl_d(s, len, s, LogBytesPerWord - 1); ++ move(tmp1, len); ++ unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2); ++ slli_w(s, len, LogBytesPerWord); ++ sub_d(s, d, s); ++ } ++ ++ // where ++ void reverse1(Register d, Register s, Register tmp) { ++ ld_d(tmp, s, -wordSize); ++ addi_d(s, s, -wordSize); ++ addi_d(d, d, wordSize); ++ rotri_d(tmp, tmp, 32); ++ st_d(tmp, d, -wordSize); ++ } ++ ++ public: ++ /** ++ * Fast Montgomery multiplication. The derivation of the ++ * algorithm is in A Cryptographic Library for the Motorola ++ * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. ++ * ++ * Arguments: ++ * ++ * Inputs for multiplication: ++ * A0 - int array elements a ++ * A1 - int array elements b ++ * A2 - int array elements n (the modulus) ++ * A3 - int length ++ * A4 - int inv ++ * A5 - int array elements m (the result) ++ * ++ * Inputs for squaring: ++ * A0 - int array elements a ++ * A1 - int array elements n (the modulus) ++ * A2 - int length ++ * A3 - int inv ++ * A4 - int array elements m (the result) ++ * ++ */ ++ address generate_multiply() { ++ Label argh, nothing; ++ bind(argh); ++ stop("MontgomeryMultiply total_allocation must be <= 8192"); ++ ++ align(CodeEntryAlignment); ++ address entry = pc(); ++ ++ beqz(Rlen, nothing); ++ ++ enter(); ++ ++ // Make room. ++ sltui(Ra, Rlen, 513); ++ beqz(Ra, argh); ++ slli_w(Ra, Rlen, exact_log2(4 * sizeof (jint))); ++ sub_d(Ra, SP, Ra); ++ ++ srli_w(Rlen, Rlen, 1); // length in longwords = len/2 ++ ++ { ++ // Copy input args, reversing as we go. We use Ra as a ++ // temporary variable. ++ reverse(Ra, Pa_base, Rlen, t0, t1); ++ if (!_squaring) ++ reverse(Ra, Pb_base, Rlen, t0, t1); ++ reverse(Ra, Pn_base, Rlen, t0, t1); ++ } ++ ++ // Push all call-saved registers and also Pm_base which we'll need ++ // at the end. ++ save_regs(); ++ ++#ifndef PRODUCT ++ // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); ++ { ++ ld_d(Rn, Pn_base, 0); ++ li(t0, -1); ++ mul_d(Rlo_mn, Rn, inv); ++ Label ok; ++ beq(Rlo_mn, t0, ok); { ++ stop("broken inverse in Montgomery multiply"); ++ } bind(ok); ++ } ++#endif ++ ++ move(Pm_base, Ra); ++ ++ move(t0, R0); ++ move(t1, R0); ++ move(t2, R0); ++ ++ block_comment("for (int i = 0; i < len; i++) {"); ++ move(Ri, R0); { ++ Label loop, end; ++ bge(Ri, Rlen, end); ++ ++ bind(loop); ++ pre1(Ri); ++ ++ block_comment(" for (j = i; j; j--) {"); { ++ move(Rj, Ri); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab); ++ } block_comment(" } // j"); ++ ++ post1(); ++ addi_w(Ri, Ri, 1); ++ blt(Ri, Rlen, loop); ++ bind(end); ++ block_comment("} // i"); ++ } ++ ++ block_comment("for (int i = len; i < 2*len; i++) {"); ++ move(Ri, Rlen); ++ slli_w(Rlen2, Rlen, 1); { ++ Label loop, end; ++ bge(Ri, Rlen2, end); ++ ++ bind(loop); ++ pre2(Ri, Rlen); ++ ++ block_comment(" for (j = len*2-i-1; j; j--) {"); { ++ sub_w(Rj, Rlen2, Ri); ++ addi_w(Rj, Rj, -1); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab); ++ } block_comment(" } // j"); ++ ++ post2(Ri, Rlen); ++ addi_w(Ri, Ri, 1); ++ blt(Ri, Rlen2, loop); ++ bind(end); ++ } ++ block_comment("} // i"); ++ ++ normalize(Rlen); ++ ++ move(Ra, Pm_base); // Save Pm_base in Ra ++ restore_regs(); // Restore caller's Pm_base ++ ++ // Copy our result into caller's Pm_base ++ reverse(Pm_base, Ra, Rlen, t0, t1); ++ ++ leave(); ++ bind(nothing); ++ jr(RA); ++ ++ return entry; ++ } ++ // In C, approximately: ++ ++ // void ++ // montgomery_multiply(unsigned long Pa_base[], unsigned long Pb_base[], ++ // unsigned long Pn_base[], unsigned long Pm_base[], ++ // unsigned long inv, int len) { ++ // unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator ++ // unsigned long Ra, Rb, Rn, Rm; ++ // int i, Iam, Ibn; ++ ++ // assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply"); ++ ++ // for (i = 0; i < len; i++) { ++ // int j; ++ ++ // Iam = 0; ++ // Ibn = i; ++ ++ // Ra = Pa_base[Iam]; ++ // Rb = Pb_base[Iam]; ++ // Rm = Pm_base[Ibn]; ++ // Rn = Pn_base[Ibn]; ++ ++ // int iters = i; ++ // for (j = 0; iters--; j++) { ++ // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Ra = Pa_base[++Iam]; ++ // Rb = pb_base[--Ibn]; ++ // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); ++ // MACC(Rm, Rn, t0, t1, t2); ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ // } ++ ++ // assert(Ra == Pa_base[i] && Rb == Pb_base[0], "must be"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Pm_base[Iam] = Rm = t0 * inv; ++ // assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be"); ++ // MACC(Rm, Rn, t0, t1, t2); ++ ++ // assert(t0 == 0, "broken Montgomery multiply"); ++ ++ // t0 = t1; t1 = t2; t2 = 0; ++ // } ++ ++ // for (i = len; i < 2*len; i++) { ++ // int j; ++ ++ // Iam = i - len; ++ // Ibn = len; ++ ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ ++ // int iters = len*2-i-1; ++ // for (j = i-len+1; iters--; j++) { ++ // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); ++ // MACC(Ra, Rb, t0, t1, t2); ++ // Ra = Pa_base[++Iam]; ++ // Rb = Pb_base[--Ibn]; ++ // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); ++ // MACC(Rm, Rn, t0, t1, t2); ++ // Rm = Pm_base[++Iam]; ++ // Rn = Pn_base[--Ibn]; ++ // } ++ ++ // Pm_base[i-len] = t0; ++ // t0 = t1; t1 = t2; t2 = 0; ++ // } ++ ++ // while (t0) ++ // t0 = sub(Pm_base, Pn_base, t0, len); ++ // } ++ }; ++ ++ // Initialization ++ void generate_initial() { ++ // Generates all stubs and initializes the entry points ++ ++ //------------------------------------------------------------- ++ //----------------------------------------------------------- ++ // entry points that exist in all platforms ++ // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller ++ // than the disadvantage of having a much more complicated generator structure. ++ // See also comment in stubRoutines.hpp. ++ StubRoutines::_forward_exception_entry = generate_forward_exception(); ++ StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); ++ // is referenced by megamorphic call ++ StubRoutines::_catch_exception_entry = generate_catch_exception(); ++ ++ StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access(); ++ ++ StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false); ++ } ++ ++ void generate_all() { ++ // Generates all stubs and initializes the entry points ++ ++ // These entry points require SharedInfo::stack0 to be set up in ++ // non-core builds and need to be relocatable, so they each ++ // fabricate a RuntimeStub internally. ++ StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); ++ ++ StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false); ++ ++ StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); ++ ++ // entry points that are platform specific ++ ++ // support for verify_oop (must happen after universe_init) ++ StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); ++#ifndef CORE ++ // arraycopy stubs used by compilers ++ generate_arraycopy_stubs(); ++#endif ++ ++ // Safefetch stubs. ++ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, ++ &StubRoutines::_safefetch32_fault_pc, ++ &StubRoutines::_safefetch32_continuation_pc); ++ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, ++ &StubRoutines::_safefetchN_fault_pc, ++ &StubRoutines::_safefetchN_continuation_pc); ++ ++ if (UseMontgomeryMultiplyIntrinsic) { ++ StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); ++ MontgomeryMultiplyGenerator g(_masm, false /* squaring */); ++ StubRoutines::_montgomeryMultiply = g.generate_multiply(); ++ } ++ ++ if (UseMontgomerySquareIntrinsic) { ++ StubCodeMark mark(this, "StubRoutines", "montgomerySquare"); ++ MontgomeryMultiplyGenerator g(_masm, true /* squaring */); ++ // We use generate_multiply() rather than generate_square() ++ // because it's faster for the sizes of modulus we care about. ++ StubRoutines::_montgomerySquare = g.generate_multiply(); ++ } ++ ++ if (UseAESIntrinsics) { ++ StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(false); ++ StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(false); ++ StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_aescrypt_encryptBlock(true); ++ StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_aescrypt_decryptBlock(true); ++ } ++ ++ if (UseSHA1Intrinsics) { ++ generate_sha1_implCompress("sha1_implCompress", StubRoutines::_sha1_implCompress, StubRoutines::_sha1_implCompressMB); ++ } ++ ++ if (UseSHA256Intrinsics) { ++ generate_sha256_implCompress("sha256_implCompress", StubRoutines::_sha256_implCompress, StubRoutines::_sha256_implCompressMB); ++ } ++ ++ if (UseCRC32Intrinsics) { ++ // set table address before stub generation which use it ++ StubRoutines::_crc_table_adr = (address)StubRoutines::la::_crc_table; ++ StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); ++ } ++ } ++ ++ public: ++ StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { ++ if (all) { ++ generate_all(); ++ } else { ++ generate_initial(); ++ } ++ } ++}; // end class declaration ++ ++void StubGenerator_generate(CodeBuffer* code, bool all) { ++ StubGenerator g(code, all); ++} +diff --git a/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.cpp +new file mode 100644 +index 0000000000..f0f3d55a4e +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.cpp +@@ -0,0 +1,264 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++ ++// a description of how to extend it, see the stubRoutines.hpp file. ++ ++//find the last fp value ++address StubRoutines::la::_call_stub_compiled_return = NULL; ++ ++/** ++ * crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h ++ */ ++juint StubRoutines::la::_crc_table[] = ++{ ++ // Table 0 ++ 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, ++ 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, ++ 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, ++ 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, ++ 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, ++ 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, ++ 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, ++ 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, ++ 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, ++ 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, ++ 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, ++ 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, ++ 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, ++ 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, ++ 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, ++ 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, ++ 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, ++ 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, ++ 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, ++ 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, ++ 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, ++ 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, ++ 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, ++ 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, ++ 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, ++ 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, ++ 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, ++ 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, ++ 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, ++ 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, ++ 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, ++ 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, ++ 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, ++ 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, ++ 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, ++ 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, ++ 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, ++ 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, ++ 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, ++ 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, ++ 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, ++ 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, ++ 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, ++ 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, ++ 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, ++ 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, ++ 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, ++ 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, ++ 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, ++ 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, ++ 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, ++ 0x2d02ef8dUL, ++ ++ // Table 1 ++ 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, ++ 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, ++ 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, ++ 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, ++ 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, ++ 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, ++ 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, ++ 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, ++ 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, ++ 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, ++ 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, ++ 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, ++ 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, ++ 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, ++ 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, ++ 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, ++ 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, ++ 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, ++ 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, ++ 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, ++ 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, ++ 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, ++ 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, ++ 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, ++ 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, ++ 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, ++ 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, ++ 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, ++ 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, ++ 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, ++ 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, ++ 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, ++ 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, ++ 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, ++ 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, ++ 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, ++ 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, ++ 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, ++ 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, ++ 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, ++ 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, ++ 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, ++ 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, ++ 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, ++ 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, ++ 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, ++ 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, ++ 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, ++ 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, ++ 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, ++ 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, ++ 0x9324fd72UL, ++ ++ // Table 2 ++ 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, ++ 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, ++ 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, ++ 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, ++ 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, ++ 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, ++ 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, ++ 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, ++ 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, ++ 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, ++ 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, ++ 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, ++ 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, ++ 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, ++ 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, ++ 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, ++ 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, ++ 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, ++ 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, ++ 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, ++ 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, ++ 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, ++ 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, ++ 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, ++ 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, ++ 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, ++ 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, ++ 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, ++ 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, ++ 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, ++ 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, ++ 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, ++ 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, ++ 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, ++ 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, ++ 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, ++ 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, ++ 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, ++ 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, ++ 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, ++ 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, ++ 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, ++ 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, ++ 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, ++ 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, ++ 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, ++ 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, ++ 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, ++ 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, ++ 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, ++ 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, ++ 0xbe9834edUL, ++ ++ // Table 3 ++ 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, ++ 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, ++ 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, ++ 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, ++ 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, ++ 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, ++ 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, ++ 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, ++ 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, ++ 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, ++ 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, ++ 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, ++ 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, ++ 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, ++ 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, ++ 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, ++ 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, ++ 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, ++ 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, ++ 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, ++ 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, ++ 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, ++ 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, ++ 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, ++ 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, ++ 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, ++ 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, ++ 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, ++ 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, ++ 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, ++ 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, ++ 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, ++ 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, ++ 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, ++ 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, ++ 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, ++ 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, ++ 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, ++ 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, ++ 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, ++ 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, ++ 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, ++ 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, ++ 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, ++ 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, ++ 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, ++ 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, ++ 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, ++ 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, ++ 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, ++ 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, ++ 0xde0506f1UL, ++ // Constants for Neon CRC232 implementation ++ // k3 = 0x78ED02D5 = x^288 mod poly - bit reversed ++ // k4 = 0xED627DAE = x^256 mod poly - bit reversed ++ 0x78ED02D5UL, 0xED627DAEUL, // k4:k3 ++ 0xED78D502UL, 0x62EDAE7DUL, // byte swap ++ 0x02D578EDUL, 0x7DAEED62UL, // word swap ++ 0xD502ED78UL, 0xAE7D62EDUL, // byte swap of word swap ++}; +diff --git a/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.hpp b/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.hpp +new file mode 100644 +index 0000000000..d020a527e4 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.hpp +@@ -0,0 +1,60 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_STUBROUTINES_LOONGARCH_64_HPP ++#define CPU_LOONGARCH_VM_STUBROUTINES_LOONGARCH_64_HPP ++ ++// This file holds the platform specific parts of the StubRoutines ++// definition. See stubRoutines.hpp for a description on how to ++// extend it. ++ ++static bool returns_to_call_stub(address return_pc){ ++ return return_pc == _call_stub_return_address||return_pc == la::get_call_stub_compiled_return(); ++} ++ ++enum platform_dependent_constants { ++ code_size1 = 20000, // simply increase if too small (assembler will crash if too small) ++ code_size2 = 60000 // simply increase if too small (assembler will crash if too small) ++}; ++ ++class la { ++ friend class StubGenerator; ++ friend class VMStructs; ++ private: ++ // If we call compiled code directly from the call stub we will ++ // need to adjust the return back to the call stub to a specialized ++ // piece of code that can handle compiled results and cleaning the fpu ++ // stack. The variable holds that location. ++ static address _call_stub_compiled_return; ++ static juint _crc_table[]; ++ ++public: ++ // Call back points for traps in compiled code ++ static address get_call_stub_compiled_return() { return _call_stub_compiled_return; } ++ static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; } ++ ++}; ++ ++#endif // CPU_LOONGARCH_VM_STUBROUTINES_LOONGARCH_64_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/templateInterpreterGenerator_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/templateInterpreterGenerator_loongarch.hpp +new file mode 100644 +index 0000000000..213e69b0b2 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/templateInterpreterGenerator_loongarch.hpp +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_TEMPLATEINTERPRETERGENERATOR_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_TEMPLATEINTERPRETERGENERATOR_LOONGARCH_HPP ++ ++ protected: ++ ++ void generate_fixed_frame(bool native_call); ++ ++ // address generate_asm_interpreter_entry(bool synchronized); ++ ++#endif // CPU_LOONGARCH_VM_TEMPLATEINTERPRETERGENERATOR_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch.hpp +new file mode 100644 +index 0000000000..39e3ad7bb5 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch.hpp +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_TEMPLATEINTERPRETER_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_TEMPLATEINTERPRETER_LOONGARCH_HPP ++ ++ ++ protected: ++ ++ // Size of interpreter code. Increase if too small. Interpreter will ++ // fail with a guarantee ("not enough space for interpreter generation"); ++ // if too small. ++ // Run with +PrintInterpreter to get the VM to print out the size. ++ // Max size with JVMTI ++ // The sethi() instruction generates lots more instructions when shell ++ // stack limit is unlimited, so that's why this is much bigger. ++ const static int InterpreterCodeSize = 500 * K; ++ ++#endif // CPU_LOONGARCH_VM_TEMPLATEINTERPRETER_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch_64.cpp +new file mode 100644 +index 0000000000..b25086a399 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch_64.cpp +@@ -0,0 +1,2335 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/bytecodeHistogram.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterGenerator.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/templateTable.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "runtime/timer.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/debug.hpp" ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++#ifndef CC_INTERP ++ ++// asm based interpreter deoptimization helpers ++int AbstractInterpreter::size_activation(int max_stack, ++ int temps, ++ int extra_args, ++ int monitors, ++ int callee_params, ++ int callee_locals, ++ bool is_top_frame) { ++ // Note: This calculation must exactly parallel the frame setup ++ // in AbstractInterpreterGenerator::generate_method_entry. ++ ++ // fixed size of an interpreter frame: ++ int overhead = frame::sender_sp_offset - ++ frame::interpreter_frame_initial_sp_offset; ++ // Our locals were accounted for by the caller (or last_frame_adjust ++ // on the transistion) Since the callee parameters already account ++ // for the callee's params we only need to account for the extra ++ // locals. ++ int size = overhead + ++ (callee_locals - callee_params)*Interpreter::stackElementWords + ++ monitors * frame::interpreter_frame_monitor_size() + ++ temps* Interpreter::stackElementWords + extra_args; ++ ++ return size; ++} ++ ++ ++const int Interpreter::return_sentinel = 0xfeedbeed; ++const int method_offset = frame::interpreter_frame_method_offset * wordSize; ++const int bci_offset = frame::interpreter_frame_bcx_offset * wordSize; ++const int locals_offset = frame::interpreter_frame_locals_offset * wordSize; ++ ++//----------------------------------------------------------------------------- ++ ++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { ++ address entry = __ pc(); ++ ++#ifdef ASSERT ++ { ++ Label L; ++ __ addi_d(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ sub_d(T1, T1, SP); // T1 = maximal sp for current fp ++ __ bge(T1, R0, L); // check if frame is complete ++ __ stop("interpreter frame not set up"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ // Restore bcp under the assumption that the current frame is still ++ // interpreted ++ // FIXME: please change the func restore_bcp ++ // S0 is the conventional register for bcp ++ __ restore_bcp(); ++ ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // throw exception ++ // FIXME: why do not pass parameter thread ? ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler( ++ const char* name) { ++ address entry = __ pc(); ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ __ li(A1, (long)name); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ClassCastException_handler() { ++ address entry = __ pc(); ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ __ empty_FPU_stack(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException), FSR); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_exception_handler_common( ++ const char* name, const char* message, bool pass_oop) { ++ assert(!pass_oop || message == NULL, "either oop or message but not both"); ++ address entry = __ pc(); ++ ++ // expression stack must be empty before entering the VM if an exception happened ++ __ empty_expression_stack(); ++ // setup parameters ++ __ li(A1, (long)name); ++ if (pass_oop) { ++ __ call_VM(V0, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR); ++ } else { ++ __ li(A2, (long)message); ++ __ call_VM(V0, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2); ++ } ++ // throw exception ++ __ jmp(Interpreter::throw_exception_entry(), relocInfo::none); ++ return entry; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_continuation_for(TosState state) { ++ address entry = __ pc(); ++ // NULL last_sp until next java call ++ __ st_d(R0,Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); ++ __ dispatch_next(state); ++ return entry; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { ++ address entry = __ pc(); ++ ++ // Restore stack bottom in case i2c adjusted stack ++ __ ld_d(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); ++ // and NULL it as marker that sp is now tos until next java call ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ __ restore_bcp(); ++ __ restore_locals(); ++ ++ // mdp: T8 ++ // ret: FSR ++ // tmp: T4 ++ if (state == atos) { ++ Register mdp = T8; ++ Register tmp = T4; ++ __ profile_return_type(mdp, FSR, tmp); ++ } ++ ++ ++ const Register cache = T4; ++ const Register index = T3; ++ __ get_cache_and_index_at_bcp(cache, index, 1, index_size); ++ ++ const Register flags = cache; ++ __ alsl_d(AT, index, cache, Address::times_ptr - 1); ++ __ ld_w(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask); ++ __ alsl_d(SP, flags, SP, Interpreter::stackElementScale() - 1); ++ ++ __ dispatch_next(state, step); ++ ++ return entry; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, ++ int step) { ++ address entry = __ pc(); ++ // NULL last_sp until next java call ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ restore_bcp(); ++ __ restore_locals(); ++ // handle exceptions ++ { ++ Label L; ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ __ dispatch_next(state, step); ++ return entry; ++} ++ ++int AbstractInterpreter::BasicType_as_index(BasicType type) { ++ int i = 0; ++ switch (type) { ++ case T_BOOLEAN: i = 0; break; ++ case T_CHAR : i = 1; break; ++ case T_BYTE : i = 2; break; ++ case T_SHORT : i = 3; break; ++ case T_INT : // fall through ++ case T_LONG : // fall through ++ case T_VOID : i = 4; break; ++ case T_FLOAT : i = 5; break; ++ case T_DOUBLE : i = 6; break; ++ case T_OBJECT : // fall through ++ case T_ARRAY : i = 7; break; ++ default : ShouldNotReachHere(); ++ } ++ assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, ++ "index out of bounds"); ++ return i; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_result_handler_for( ++ BasicType type) { ++ address entry = __ pc(); ++ switch (type) { ++ case T_BOOLEAN: __ c2bool(V0); break; ++ case T_CHAR : __ bstrpick_d(V0, V0, 15, 0); break; ++ case T_BYTE : __ sign_extend_byte (V0); break; ++ case T_SHORT : __ sign_extend_short(V0); break; ++ case T_INT : /* nothing to do */ break; ++ case T_FLOAT : /* nothing to do */ break; ++ case T_DOUBLE : /* nothing to do */ break; ++ case T_OBJECT : ++ { ++ __ ld_d(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ verify_oop(V0); // and verify it ++ } ++ break; ++ default : ShouldNotReachHere(); ++ } ++ __ jr(RA); // return from result handler ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_safept_entry_for( ++ TosState state, ++ address runtime_entry) { ++ address entry = __ pc(); ++ __ push(state); ++ __ call_VM(noreg, runtime_entry); ++ __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); ++ return entry; ++} ++ ++ ++ ++// Helpers for commoning out cases in the various type of method entries. ++// ++ ++ ++// increment invocation count & check for overflow ++// ++// Note: checking for negative value instead of overflow ++// so we have a 'sticky' overflow test ++// ++// Rmethod: method ++// T3 : invocation counter ++// ++void InterpreterGenerator::generate_counter_incr( ++ Label* overflow, ++ Label* profile_method, ++ Label* profile_method_continue) { ++ Label done; ++ if (TieredCompilation) { ++ int increment = InvocationCounter::count_increment; ++ int mask = ((1 << Tier0InvokeNotifyFreqLog) - 1) << InvocationCounter::count_shift; ++ Label no_mdo; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld_d(FSR, Address(Rmethod, Method::method_data_offset())); ++ __ beq(FSR, R0, no_mdo); ++ // Increment counter in the MDO ++ const Address mdo_invocation_counter(FSR, in_bytes(MethodData::invocation_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, T3, false, Assembler::zero, overflow); ++ __ beq(R0, R0, done); ++ } ++ __ bind(no_mdo); ++ // Increment counter in MethodCounters ++ const Address invocation_counter(FSR, ++ MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset()); ++ __ get_method_counters(Rmethod, FSR, done); ++ __ increment_mask_and_jump(invocation_counter, increment, mask, T3, false, Assembler::zero, overflow); ++ __ bind(done); ++ } else { ++ const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset()) ++ + in_bytes(InvocationCounter::counter_offset())); ++ const Address backedge_counter (FSR, in_bytes(MethodCounters::backedge_counter_offset()) ++ + in_bytes(InvocationCounter::counter_offset())); ++ ++ __ get_method_counters(Rmethod, FSR, done); ++ ++ if (ProfileInterpreter) { // %%% Merge this into methodDataOop ++ __ ld_w(T4, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); ++ __ addi_d(T4, T4, 1); ++ __ st_w(T4, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); ++ } ++ // Update standard invocation counters ++ __ ld_w(T3, invocation_counter); ++ __ increment(T3, InvocationCounter::count_increment); ++ __ st_w(T3, invocation_counter); // save invocation count ++ ++ __ ld_w(FSR, backedge_counter); // load backedge counter ++ __ li(AT, InvocationCounter::count_mask_value); // mask out the status bits ++ __ andr(FSR, FSR, AT); ++ ++ __ add_d(T3, T3, FSR); // add both counters ++ ++ if (ProfileInterpreter && profile_method != NULL) { ++ // Test to see if we should create a method data oop ++ if (Assembler::is_simm(InvocationCounter::InterpreterProfileLimit, 12)) { ++ __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit); ++ __ bne_far(AT, R0, *profile_method_continue); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); ++ __ ld_w(AT, AT, 0); ++ __ blt_far(T3, AT, *profile_method_continue, true /* signed */); ++ } ++ ++ // if no method data exists, go to profile_method ++ __ test_method_data_pointer(FSR, *profile_method); ++ } ++ ++ if (Assembler::is_simm(CompileThreshold, 12)) { ++ __ srli_w(AT, T3, InvocationCounter::count_shift); ++ __ slti(AT, AT, CompileThreshold); ++ __ beq_far(AT, R0, *overflow); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit); ++ __ ld_w(AT, AT, 0); ++ __ bge_far(T3, AT, *overflow, true /* signed */); ++ } ++ ++ __ bind(done); ++ } ++} ++ ++void InterpreterGenerator::generate_counter_overflow(Label* do_continue) { ++ ++ // Asm interpreter on entry ++ // S7 - locals ++ // S0 - bcp ++ // Rmethod - method ++ // FP - interpreter frame ++ ++ // On return (i.e. jump to entry_point) ++ // Rmethod - method ++ // RA - return address of interpreter caller ++ // tos - the last parameter to Java method ++ // SP - sender_sp ++ ++ // the bcp is valid if and only if it's not null ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), R0); ++ __ ld_d(Rmethod, FP, method_offset); ++ // Preserve invariant that S0/S7 contain bcp/locals of sender frame ++ __ b_far(*do_continue); ++} ++ ++// See if we've got enough room on the stack for locals plus overhead. ++// The expression stack grows down incrementally, so the normal guard ++// page mechanism will work for that. ++// ++// NOTE: Since the additional locals are also always pushed (wasn't ++// obvious in generate_method_entry) so the guard should work for them ++// too. ++// ++// Args: ++// T2: number of additional locals this frame needs (what we must check) ++// T0: Method* ++// ++void InterpreterGenerator::generate_stack_overflow_check(void) { ++ // see if we've got enough room on the stack for locals plus overhead. ++ // the expression stack grows down incrementally, so the normal guard ++ // page mechanism will work for that. ++ // ++ // Registers live on entry: ++ // ++ // T0: Method* ++ // T2: number of additional locals this frame needs (what we must check) ++ ++ // NOTE: since the additional locals are also always pushed (wasn't obvious in ++ // generate_method_entry) so the guard should work for them too. ++ // ++ ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++ // total overhead size: entry_size + (saved fp thru expr stack bottom). ++ // be sure to change this if you add/subtract anything to/from the overhead area ++ const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize) ++ + entry_size; ++ ++ const int page_size = os::vm_page_size(); ++ Label after_frame_check; ++ ++ // see if the frame is greater than one page in size. If so, ++ // then we need to verify there is enough stack space remaining ++ // for the additional locals. ++ __ li(AT, (page_size - overhead_size) / Interpreter::stackElementSize); ++ __ bge(AT, T2, after_frame_check); ++ ++ // compute sp as if this were going to be the last frame on ++ // the stack before the red zone ++#ifndef OPT_THREAD ++ Register thread = T1; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ++ // locals + overhead, in bytes ++ __ slli_d(T3, T2, Interpreter::stackElementScale()); ++ __ addi_d(T3, T3, overhead_size); // locals * 4 + overhead_size --> T3 ++ ++#ifdef ASSERT ++ Label stack_base_okay, stack_size_okay; ++ // verify that thread stack base is non-zero ++ __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset())); ++ __ bne(AT, R0, stack_base_okay); ++ __ stop("stack base is zero"); ++ __ bind(stack_base_okay); ++ // verify that thread stack size is non-zero ++ __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset())); ++ __ bne(AT, R0, stack_size_okay); ++ __ stop("stack size is zero"); ++ __ bind(stack_size_okay); ++#endif ++ ++ // Add stack base to locals and subtract stack size ++ __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT ++ __ add_d(T3, T3, AT); // locals * 4 + overhead_size + stack_base--> T3 ++ __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset())); // stack_size --> AT ++ __ sub_d(T3, T3, AT); // locals * 4 + overhead_size + stack_base - stack_size --> T3 ++ ++ ++ // add in the redzone and yellow size ++ __ li(AT, (StackRedPages+StackYellowPages) * page_size); ++ __ add_d(T3, T3, AT); ++ ++ // check against the current stack bottom ++ __ blt(T3, SP, after_frame_check); ++ ++ // Note: the restored frame is not necessarily interpreted. ++ // Use the shared runtime version of the StackOverflowError. ++ __ move(SP, Rsender); ++ assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); ++ __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type); ++ ++ // all done with frame size check ++ __ bind(after_frame_check); ++} ++ ++// Allocate monitor and lock method (asm interpreter) ++// Rmethod - Method* ++void InterpreterGenerator::lock_method(void) { ++ // synchronize method ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T0, T0, JVM_ACC_SYNCHRONIZED); ++ __ bne(T0, R0, L); ++ __ stop("method doesn't need synchronization"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ // get synchronization object ++ { ++ Label done; ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T2, T0, JVM_ACC_STATIC); ++ __ ld_d(T0, LVP, Interpreter::local_offset_in_bytes(0)); ++ __ beq(T2, R0, done); ++ __ ld_d(T0, Rmethod, in_bytes(Method::const_offset())); ++ __ ld_d(T0, T0, in_bytes(ConstMethod::constants_offset())); ++ __ ld_d(T0, T0, ConstantPool::pool_holder_offset_in_bytes()); ++ __ ld_d(T0, T0, mirror_offset); ++ __ bind(done); ++ } ++ // add space for monitor & lock ++ __ addi_d(SP, SP, (-1) * entry_size); // add space for a monitor entry ++ __ st_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ // set new monitor block top ++ __ st_d(T0, SP, BasicObjectLock::obj_offset_in_bytes()); // store object ++ // FIXME: I do not know what lock_object will do and what it will need ++ __ move(c_rarg0, SP); // object address ++ __ lock_object(c_rarg0); ++} ++ ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.update(int crc, int b) ++ */ ++address InterpreterGenerator::generate_CRC32_update_entry() { ++ if (UseCRC32Intrinsics) { ++ address entry = __ pc(); ++ ++ // rmethod: Method* ++ // Rsender: senderSP must preserved for slow path ++ // SP: args ++ ++ Label slow_path; ++ // If we need a safepoint check, generate full interpreter entry. ++ __ li(AT, SafepointSynchronize::_not_synchronized); ++ __ li(T8, (long)SafepointSynchronize::address_of_state()); ++ __ bne(T8, AT, slow_path); ++ ++ // We don't generate local frame and don't align stack because ++ // we call stub code and there is no safepoint on this path. ++ ++ const Register crc = A0; // crc ++ const Register val = A1; // source java byte value ++ const Register tbl = A2; // scratch ++ ++ // Arguments are reversed on java expression stack ++ __ ld_w(val, SP, 0); // byte value ++ __ ld_w(crc, SP, wordSize); // Initial CRC ++ ++ __ li(tbl, (long)StubRoutines::crc_table_addr()); ++ ++ __ nor(crc, crc, R0); // ~crc ++ __ update_byte_crc32(crc, val, tbl); ++ __ nor(crc, crc, R0); // ~crc ++ ++ // restore caller SP ++ __ move(SP, Rsender); ++ __ jr(RA); ++ ++ // generate a vanilla native entry as the slow path ++ __ bind(slow_path); ++ ++ (void) generate_native_entry(false); ++ ++ return entry; ++ } ++ return generate_native_entry(false); ++} ++ ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) ++ * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) ++ */ ++address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { ++ if (UseCRC32Intrinsics) { ++ address entry = __ pc(); ++ ++ // rmethod: Method* ++ // Rsender: senderSP must preserved for slow path ++ // SP: args ++ ++ Label slow_path; ++ // If we need a safepoint check, generate full interpreter entry. ++ __ li(AT, SafepointSynchronize::_not_synchronized); ++ __ li(T8, (long)SafepointSynchronize::address_of_state()); ++ __ bne(T8, AT, slow_path); ++ ++ // We don't generate local frame and don't align stack because ++ // we call stub code and there is no safepoint on this path. ++ ++ const Register crc = A0; // crc ++ const Register buf = A1; // source java byte array address ++ const Register len = A2; // length ++ const Register tmp = A3; ++ ++ const Register off = len; // offset (never overlaps with 'len') ++ ++ // Arguments are reversed on java expression stack ++ // Calculate address of start element ++ __ ld_w(off, SP, wordSize); // int offset ++ __ ld_d(buf, SP, 2 * wordSize); // byte[] buf | long buf ++ __ add_d(buf, buf, off); // + offset ++ if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { ++ __ ld_w(crc, SP, 4 * wordSize); // long crc ++ } else { ++ __ addi_d(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size ++ __ ld_w(crc, SP, 3 * wordSize); // long crc ++ } ++ ++ // Can now load 'len' since we're finished with 'off' ++ __ ld_w(len, SP, 0); // length ++ ++ __ kernel_crc32(crc, buf, len, tmp); ++ ++ // restore caller SP ++ __ move(SP, Rsender); ++ __ jr(RA); ++ ++ // generate a vanilla native entry as the slow path ++ __ bind(slow_path); ++ ++ (void) generate_native_entry(false); ++ ++ return entry; ++ } ++ return generate_native_entry(false); ++} ++ ++// Generate a fixed interpreter frame. This is identical setup for ++// interpreted methods and for native methods hence the shared code. ++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { ++ ++ // [ local var m-1 ] <--- sp ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- T0(sender's sp) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // initialize fixed part of activation frame ++ // sender's sp in Rsender ++ int i = 0; ++ int frame_size = 9; ++#ifndef CORE ++ ++frame_size; ++#endif ++ __ addi_d(SP, SP, (-frame_size) * wordSize); ++ __ st_d(RA, SP, (frame_size - 1) * wordSize); // save return address ++ __ st_d(FP, SP, (frame_size - 2) * wordSize); // save sender's fp ++ __ addi_d(FP, SP, (frame_size - 2) * wordSize); ++ __ st_d(Rsender, FP, (-++i) * wordSize); // save sender's sp ++ __ st_d(R0, FP,(-++i) * wordSize); //save last_sp as null ++ __ st_d(LVP, FP, (-++i) * wordSize); // save locals offset ++ __ ld_d(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop ++ __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase ++ __ st_d(Rmethod, FP, (-++i) * wordSize); // save Method* ++#ifndef CORE ++ if (ProfileInterpreter) { ++ Label method_data_continue; ++ __ ld_d(AT, Rmethod, in_bytes(Method::method_data_offset())); ++ __ beq(AT, R0, method_data_continue); ++ __ addi_d(AT, AT, in_bytes(MethodData::data_offset())); ++ __ bind(method_data_continue); ++ __ st_d(AT, FP, (-++i) * wordSize); ++ } else { ++ __ st_d(R0, FP, (-++i) * wordSize); ++ } ++#endif // !CORE ++ ++ __ ld_d(T2, Rmethod, in_bytes(Method::const_offset())); ++ __ ld_d(T2, T2, in_bytes(ConstMethod::constants_offset())); ++ __ ld_d(T2, T2, ConstantPool::cache_offset_in_bytes()); ++ __ st_d(T2, FP, (-++i) * wordSize); // set constant pool cache ++ if (native_call) { ++ __ st_d(R0, FP, (-++i) * wordSize); // no bcp ++ } else { ++ __ st_d(BCP, FP, (-++i) * wordSize); // set bcp ++ } ++ __ st_d(SP, FP, (-++i) * wordSize); // reserve word for pointer to expression stack bottom ++ assert(i + 2 == frame_size, "i + 2 should be equal to frame_size"); ++} ++ ++// End of helpers ++ ++// Various method entries ++//------------------------------------------------------------------------------------------------------------------------ ++// ++// ++ ++// Call an accessor method (assuming it is resolved, otherwise drop ++// into vanilla (slow path) entry ++address InterpreterGenerator::generate_accessor_entry(void) { ++ // Rmethod: Method* ++ // V0: receiver (preserve for slow entry into asm interpreter) ++ // Rsender: senderSP must preserved for slow path, set SP to it on fast path ++ ++ address entry_point = __ pc(); ++ Label xreturn_path; ++ // do fastpath for resolved accessor methods ++ if (UseFastAccessorMethods) { ++ Label slow_path; ++ __ li(T2, SafepointSynchronize::address_of_state()); ++ __ ld_w(AT, T2, 0); ++ __ addi_d(AT, AT, -(SafepointSynchronize::_not_synchronized)); ++ __ bne(AT, R0, slow_path); ++ // Code: _aload_0, _(i|a)getfield, _(i|a)return or any rewrites thereof; ++ // parameter size = 1 ++ // Note: We can only use this code if the getfield has been resolved ++ // and if we don't have a null-pointer exception => check for ++ // these conditions first and use slow path if necessary. ++ // Rmethod: method ++ // V0: receiver ++ ++ // [ receiver ] <-- sp ++ __ ld_d(T0, SP, 0); ++ ++ // check if local 0 != NULL and read field ++ __ beq(T0, R0, slow_path); ++ __ ld_d(T2, Rmethod, in_bytes(Method::const_offset())); ++ __ ld_d(T2, T2, in_bytes(ConstMethod::constants_offset())); ++ // read first instruction word and extract bytecode @ 1 and index @ 2 ++ __ ld_d(T3, Rmethod, in_bytes(Method::const_offset())); ++ __ ld_w(T3, T3, in_bytes(ConstMethod::codes_offset())); ++ // Shift codes right to get the index on the right. ++ // The bytecode fetched looks like <0xb4><0x2a> ++ __ srli_d(T3, T3, 2 * BitsPerByte); ++ // FIXME: maybe it's wrong ++ __ slli_d(T3, T3, exact_log2(in_words(ConstantPoolCacheEntry::size()))); ++ __ ld_d(T2, T2, ConstantPool::cache_offset_in_bytes()); ++ ++ // T0: local 0 ++ // Rmethod: method ++ // V0: receiver - do not destroy since it is needed for slow path! ++ // T1: scratch use which register instead ? ++ // T3: constant pool cache index ++ // T2: constant pool cache ++ // Rsender: send's sp ++ // check if getfield has been resolved and read constant pool cache entry ++ // check the validity of the cache entry by testing whether _indices field ++ // contains Bytecode::_getfield in b1 byte. ++ assert(in_words(ConstantPoolCacheEntry::size()) == 4, "adjust shift below"); ++ ++ __ slli_d(T8, T3, Address::times_8); ++ __ li(T1, in_bytes(ConstantPoolCache::base_offset() ++ + ConstantPoolCacheEntry::indices_offset())); ++ __ add_d(T1, T8, T1); ++ __ ldx_w(T1, T1, T2); ++ __ srli_d(T1, T1, 2 * BitsPerByte); ++ __ andi(T1, T1, 0xFF); ++ __ addi_d(T1, T1, (-1) * Bytecodes::_getfield); ++ __ bne(T1, R0, slow_path); ++ ++ // Note: constant pool entry is not valid before bytecode is resolved ++ ++ __ li(T1, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ __ add_d(T1, T1, T8); ++ __ ldx_w(AT, T1, T2); ++ ++ __ li(T1, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ add_d(T1, T1, T8); ++ __ ldx_w(T3, T1, T2); ++ ++ Label notByte, notBool, notShort, notChar, notObj; ++ ++ // Need to differentiate between igetfield, agetfield, bgetfield etc. ++ // because they are different sizes. ++ // Use the type from the constant pool cache ++ __ srli_w(T3, T3, ConstantPoolCacheEntry::tos_state_shift); ++ // Make sure we don't need to mask T3 for tosBits after the above shift ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ // btos = 0 ++ __ add_d(T0, T0, AT); ++ __ bne(T3, R0, notByte); ++ ++ __ ld_b(V0, T0, 0); ++ __ b(xreturn_path); ++ ++ //ztos ++ __ bind(notByte); ++ __ addi_d(T1, T3, (-1) * ztos); ++ __ bne(T1, R0, notBool); ++ __ ld_b(V0, T0, 0); ++ __ b(xreturn_path); ++ ++ //stos ++ __ bind(notBool); ++ __ addi_d(T1, T3, (-1) * stos); ++ __ bne(T1, R0, notShort); ++ __ ld_h(V0, T0, 0); ++ __ b(xreturn_path); ++ ++ //ctos ++ __ bind(notShort); ++ __ addi_d(T1, T3, (-1) * ctos); ++ __ bne(T1, R0, notChar); ++ __ ld_hu(V0, T0, 0); ++ __ b(xreturn_path); ++ ++ //atos ++ __ bind(notChar); ++ __ addi_d(T1, T3, (-1) * atos); ++ __ bne(T1, R0, notObj); ++ //add for compressedoops ++ __ load_heap_oop(V0, Address(T0, 0)); ++ __ b(xreturn_path); ++ ++ //itos ++ __ bind(notObj); ++#ifdef ASSERT ++ Label okay; ++ __ addi_d(T1, T3, (-1) * itos); ++ __ beq(T1, R0, okay); ++ __ stop("what type is this?"); ++ __ bind(okay); ++#endif // ASSERT ++ __ ld_w(V0, T0, 0); ++ ++ __ bind(xreturn_path); ++ ++ // _ireturn/_areturn ++ //FIXME ++ __ move(SP, Rsender);//FIXME, set sender's fp to SP ++ __ jr(RA); ++ ++ // generate a vanilla interpreter entry as the slow path ++ __ bind(slow_path); ++ (void) generate_normal_entry(false); ++ } else { ++ (void) generate_normal_entry(false); ++ } ++ return entry_point; ++} ++ ++// Method entry for java.lang.ref.Reference.get. ++address InterpreterGenerator::generate_Reference_get_entry(void) { ++#if INCLUDE_ALL_GCS ++ // Code: _aload_0, _getfield, _areturn ++ // parameter size = 1 ++ // ++ // The code that gets generated by this routine is split into 2 parts: ++ // 1. The "intrinsified" code for G1 (or any SATB based GC), ++ // 2. The slow path - which is an expansion of the regular method entry. ++ // ++ // Notes:- ++ // * In the G1 code we do not check whether we need to block for ++ // a safepoint. If G1 is enabled then we must execute the specialized ++ // code for Reference.get (except when the Reference object is null) ++ // so that we can log the value in the referent field with an SATB ++ // update buffer. ++ // If the code for the getfield template is modified so that the ++ // G1 pre-barrier code is executed when the current method is ++ // Reference.get() then going through the normal method entry ++ // will be fine. ++ // * The G1 code can, however, check the receiver object (the instance ++ // of java.lang.Reference) and jump to the slow path if null. If the ++ // Reference object is null then we obviously cannot fetch the referent ++ // and so we don't need to call the G1 pre-barrier. Thus we can use the ++ // regular method entry code to generate the NPE. ++ // ++ // This code is based on generate_accessor_enty. ++ // ++ // Rmethod: Method* ++ ++ // Rsender: senderSP must preserve for slow path, set SP to it on fast path (Rsender) ++ ++ address entry = __ pc(); ++ ++ const int referent_offset = java_lang_ref_Reference::referent_offset; ++ guarantee(referent_offset > 0, "referent offset not initialized"); ++ if (UseG1GC) { ++ Label slow_path; ++ ++ // Check if local 0 != NULL ++ // If the receiver is null then it is OK to jump to the slow path. ++ __ ld_d(V0, SP, 0); ++ ++ __ beq(V0, R0, slow_path); ++ ++ // Generate the G1 pre-barrier code to log the value of ++ // the referent field in an SATB buffer. ++ ++ // Load the value of the referent field. ++ const Address field_address(V0, referent_offset); ++ __ load_heap_oop(V0, field_address); ++ ++ __ push(RA); ++ // Generate the G1 pre-barrier code to log the value of ++ // the referent field in an SATB buffer. ++ __ g1_write_barrier_pre(noreg /* obj */, ++ V0 /* pre_val */, ++ TREG /* thread */, ++ Rmethod /* tmp */, ++ true /* tosca_live */, ++ true /* expand_call */); ++ __ pop(RA); ++ ++ __ add_d(SP, Rsender, R0); // set sp to sender sp ++ __ jr(RA); ++ ++ // generate a vanilla interpreter entry as the slow path ++ __ bind(slow_path); ++ (void) generate_normal_entry(false); ++ ++ return entry; ++ } ++#endif // INCLUDE_ALL_GCS ++ ++ // If G1 is not enabled then attempt to go through the accessor entry point ++ // Reference.get is an accessor ++ return generate_accessor_entry(); ++} ++ ++// Interpreter stub for calling a native method. (asm interpreter) ++// This sets up a somewhat different looking stack for calling the ++// native method than the typical interpreter frame setup. ++address InterpreterGenerator::generate_native_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls; ++ // Rsender: sender's sp ++ // Rmethod: Method* ++ address entry_point = __ pc(); ++ ++#ifndef CORE ++ const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset())); ++#endif ++ // get parameter size (always needed) ++ // the size in the java stack ++ __ ld_d(V0, Rmethod, in_bytes(Method::const_offset())); ++ __ ld_hu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // native calls don't need the stack size check since they have no expression stack ++ // and the arguments are already on the stack and we only add a handful of words ++ // to the stack ++ ++ // Rmethod: Method* ++ // V0: size of parameters ++ // Layout of frame at this point ++ // ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ ++ // for natives the size of locals is zero ++ ++ // compute beginning of parameters (S7) ++ __ slli_d(LVP, V0, Address::times_8); ++ __ addi_d(LVP, LVP, (-1) * wordSize); ++ __ add_d(LVP, LVP, SP); ++ ++ ++ // add 2 zero-initialized slots for native calls ++ // 1 slot for native oop temp offset (setup via runtime) ++ // 1 slot for static native result handler3 (setup via runtime) ++ __ push2(R0, R0); ++ ++ // Layout of frame at this point ++ // [ method holder mirror ] <--- sp ++ // [ result type info ] ++ // [ argument word n-1 ] <--- T0 ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++#ifndef CORE ++ if (inc_counter) __ ld_w(T3, invocation_counter); // (pre-)fetch invocation count ++#endif ++ ++ // initialize fixed part of activation frame ++ generate_fixed_frame(true); ++ // after this function, the layout of frame is as following ++ // ++ // [ monitor block top ] <--- sp ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- sender's sp ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ ++ // make sure method is native & not abstract ++#ifdef ASSERT ++ __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ { ++ Label L; ++ __ andi(AT, T0, JVM_ACC_NATIVE); ++ __ bne(AT, R0, L); ++ __ stop("tried to execute native method as non-native"); ++ __ bind(L); ++ } ++ { ++ Label L; ++ __ andi(AT, T0, JVM_ACC_ABSTRACT); ++ __ beq(AT, R0, L); ++ __ stop("tried to execute abstract method in interpreter"); ++ __ bind(L); ++ } ++#endif ++ ++ // Since at this point in the method invocation the exception handler ++ // would try to exit the monitor of synchronized methods which hasn't ++ // been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation will ++ // check this flag. ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ li(AT, (int)true); ++ __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++#ifndef CORE ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow, NULL, NULL); ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++#endif // CORE ++ ++ bang_stack_shadow_pages(true); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ if (synchronized) { ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, T0, JVM_ACC_SYNCHRONIZED); ++ __ beq(AT, R0, L); ++ __ stop("method needs synchronization"); ++ __ bind(L); ++ } ++#endif ++ } ++ ++ // after method_lock, the layout of frame is as following ++ // ++ // [ monitor entry ] <--- sp ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // start execution ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ beq(AT, SP, L); ++ __ stop("broken stack frame setup in interpreter in asm"); ++ __ bind(L); ++ } ++#endif ++ ++ // jvmti/jvmpi support ++ __ notify_method_entry(); ++ ++ // work registers ++ const Register method = Rmethod; ++ //const Register thread = T2; ++ const Register t = T8; ++ ++ __ get_method(method); ++ __ verify_oop(method); ++ { ++ Label L, Lstatic; ++ __ ld_d(t,method,in_bytes(Method::const_offset())); ++ __ ld_hu(t, t, in_bytes(ConstMethod::size_of_parameters_offset())); ++ // LoongArch ABI: caller does not reserve space for the register auguments. ++ // A0 and A1(if needed) ++ __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, AT, JVM_ACC_STATIC); ++ __ beq(AT, R0, Lstatic); ++ __ addi_d(t, t, 1); ++ __ bind(Lstatic); ++ __ addi_d(t, t, -7); ++ __ bge(R0, t, L); ++ __ slli_d(t, t, Address::times_8); ++ __ sub_d(SP, SP, t); ++ __ bind(L); ++ } ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ __ move(AT, SP); ++ // [ ] <--- sp ++ // ... (size of parameters - 8 ) ++ // [ monitor entry ] ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ // get signature handler ++ { ++ Label L; ++ __ ld_d(T4, method, in_bytes(Method::signature_handler_offset())); ++ __ bne(T4, R0, L); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::prepare_native_call), method); ++ __ get_method(method); ++ __ ld_d(T4, method, in_bytes(Method::signature_handler_offset())); ++ __ bind(L); ++ } ++ ++ // call signature handler ++ // FIXME: when change codes in InterpreterRuntime, note this point ++ // from: begin of parameters ++ assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code"); ++ // to: current sp ++ assert(InterpreterRuntime::SignatureHandlerGenerator::to () == SP, "adjust this code"); ++ // temp: T3 ++ assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t , "adjust this code"); ++ ++ __ jalr(T4); ++ __ get_method(method); ++ ++ // ++ // if native function is static, and its second parameter has type length of double word, ++ // and first parameter has type length of word, we have to reserve one word ++ // for the first parameter, according to LoongArch abi. ++ // if native function is not static, and its third parameter has type length of double word, ++ // and second parameter has type length of word, we have to reserve one word for the second ++ // parameter. ++ // ++ ++ ++ // result handler is in V0 ++ // set result handler ++ __ st_d(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize); ++ ++#define FIRSTPARA_SHIFT_COUNT 5 ++#define SECONDPARA_SHIFT_COUNT 9 ++#define THIRDPARA_SHIFT_COUNT 13 ++#define PARA_MASK 0xf ++ ++ // pass mirror handle if static call ++ { ++ Label L; ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ __ ld_w(t, method, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, t, JVM_ACC_STATIC); ++ __ beq(AT, R0, L); ++ ++ // get mirror ++ __ ld_d(t, method, in_bytes(Method:: const_offset())); ++ __ ld_d(t, t, in_bytes(ConstMethod::constants_offset())); //?? ++ __ ld_d(t, t, ConstantPool::pool_holder_offset_in_bytes()); ++ __ ld_d(t, t, mirror_offset); ++ // copy mirror into activation frame ++ //__ st_w(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ // pass handle to mirror ++ __ st_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ addi_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ move(A1, t); ++ __ bind(L); ++ } ++ ++ // [ mthd holder mirror ptr ] <--- sp --------------------| (only for static method) ++ // [ ] | ++ // ... size of parameters(or +1) | ++ // [ monitor entry ] | ++ // ... | ++ // [ monitor entry ] | ++ // [ monitor block top ] ( the top monitor entry ) | ++ // [ byte code pointer (0) ] (if native, bcp = 0) | ++ // [ constant pool cache ] | ++ // [ Method* ] | ++ // [ locals offset ] | ++ // [ sender's sp ] | ++ // [ sender's fp ] | ++ // [ return address ] <--- fp | ++ // [ method holder mirror ] <----------------------------| ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // get native function entry point ++ { Label L; ++ __ ld_d(T4, method, in_bytes(Method::native_function_offset())); ++ __ li(T6, SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); ++ __ bne(T6, T4, L); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method); ++ __ get_method(method); ++ __ verify_oop(method); ++ __ ld_d(T4, method, in_bytes(Method::native_function_offset())); ++ __ bind(L); ++ } ++ ++ // pass JNIEnv ++ // native function in T4 ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ addi_d(t, thread, in_bytes(JavaThread::jni_environment_offset())); ++ __ move(A0, t); ++ // [ jni environment ] <--- sp ++ // [ mthd holder mirror ptr ] ---------------------------->| (only for static method) ++ // [ ] | ++ // ... size of parameters | ++ // [ monitor entry ] | ++ // ... | ++ // [ monitor entry ] | ++ // [ monitor block top ] ( the top monitor entry ) | ++ // [ byte code pointer (0) ] (if native, bcp = 0) | ++ // [ constant pool cache ] | ++ // [ Method* ] | ++ // [ locals offset ] | ++ // [ sender's sp ] | ++ // [ sender's fp ] | ++ // [ return address ] <--- fp | ++ // [ method holder mirror ] <----------------------------| ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // Set the last Java PC in the frame anchor to be the return address from ++ // the call to the native method: this will allow the debugger to ++ // generate an accurate stack trace. ++ Label native_return; ++ __ set_last_Java_frame(thread, SP, FP, native_return); ++ ++ // change thread state ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_w(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ addi_d(t, t, (-1) * _thread_in_Java); ++ __ beq(t, R0, L); ++ __ stop("Wrong thread state in native stub"); ++ __ bind(L); ++ } ++#endif ++ ++ __ li(t, _thread_in_native); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release ++ } ++ __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ // call native method ++ __ jalr(T4); ++ __ bind(native_return); ++ // result potentially in V0 or F0 ++ ++ ++ // via _last_native_pc and not via _last_jave_sp ++ // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result. ++ // If the order changes or anything else is added to the stack the code in ++ // interpreter_frame_result will have to be changed. ++ //FIXME, should modify here ++ // save return value to keep the value from being destroyed by other calls ++ __ push(dtos); ++ __ push(ltos); ++ ++ // change thread state ++ __ get_thread(thread); ++ __ li(t, _thread_in_native_trans); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release ++ } ++ __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ if(os::is_MP()) { ++ if (UseMembar) { ++ // Force this write out before the read below ++ __ membar(__ AnyAny); ++ } else { ++ // Write serialization page so VM thread can do a pseudo remote membar. ++ // We use the current thread pointer to calculate a thread specific ++ // offset to write to within the page. This minimizes bus traffic ++ // due to cache line collision. ++ __ serialize_memory(thread, A0); ++ } ++ } ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { Label Continue; ++ ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are ++ // preserved and correspond to the bcp/locals pointers. So we do a runtime call ++ // by hand. ++ // ++ Label L; ++ __ li(AT, SafepointSynchronize::address_of_state()); ++ __ ld_w(AT, AT, 0); ++ __ bne(AT, R0, L); ++ __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, Continue); ++ __ bind(L); ++ __ move(A0, thread); ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), ++ relocInfo::runtime_call_type); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ //add for compressedoops ++ __ reinit_heapbase(); ++ __ bind(Continue); ++ } ++ ++ // change thread state ++ __ li(t, _thread_in_Java); ++ if (os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release ++ } ++ __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ reset_last_Java_frame(thread, true); ++ ++ // reset handle block ++ __ ld_d(t, thread, in_bytes(JavaThread::active_handles_offset())); ++ __ st_w(R0, t, JNIHandleBlock::top_offset_in_bytes()); ++ ++ // If result was an oop then unbox and save it in the frame ++ { ++ Label no_oop; ++ //FIXME, addi only support 12-bit imeditate ++ __ ld_d(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize); ++ __ li(T0, AbstractInterpreter::result_handler(T_OBJECT)); ++ __ bne(AT, T0, no_oop); ++ __ pop(ltos); ++ // Unbox oop result, e.g. JNIHandles::resolve value. ++ __ resolve_jobject(V0, thread, T4); ++ __ st_d(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize); ++ // keep stack depth as expected by pushing oop which will eventually be discarded ++ __ push(ltos); ++ __ bind(no_oop); ++ } ++ { ++ Label no_reguard; ++ __ ld_w(t, thread, in_bytes(JavaThread::stack_guard_state_offset())); ++ __ li(AT,(int) JavaThread::stack_guard_yellow_disabled); ++ __ bne(t, AT, no_reguard); ++ __ pushad(); ++ __ move(S5_heapbase, SP); ++ __ li(AT, -StackAlignmentInBytes); ++ __ andr(SP, SP, AT); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type); ++ __ move(SP, S5_heapbase); ++ __ popad(); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ __ bind(no_reguard); ++ } ++ // restore BCP to have legal interpreter frame, ++ // i.e., bci == 0 <=> BCP == code_base() ++ // Can't call_VM until bcp is within reasonable. ++ __ get_method(method); // method is junk from thread_in_native to now. ++ __ verify_oop(method); ++ __ ld_d(BCP, method, in_bytes(Method::const_offset())); ++ __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset()))); ++ // handle exceptions (exception handling will handle unlocking!) ++ { ++ Label L; ++ __ ld_d(t, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(t, R0, L); ++ // Note: At some point we may want to unify this with the code used in ++ // call_VM_base(); ++ // i.e., we should use the StubRoutines::forward_exception code. For now this ++ // doesn't work here because the sp is not correctly set at this point. ++ __ MacroAssembler::call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ ++ // do unlocking if necessary ++ { ++ Label L; ++ __ ld_w(t, method, in_bytes(Method::access_flags_offset())); ++ __ andi(t, t, JVM_ACC_SYNCHRONIZED); ++ __ addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock)); ++ __ beq(t, R0, L); ++ // the code below should be shared with interpreter macro assembler implementation ++ { ++ Label unlock; ++ // BasicObjectLock will be first in list, ++ // since this is a synchronized method. However, need ++ // to check that the object has not been unlocked by ++ // an explicit monitorexit bytecode. ++ // address of first monitor ++ ++ __ ld_d(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ bne(t, R0, unlock); ++ ++ // Entry already unlocked, need to throw exception ++ __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ __ bind(unlock); ++ __ unlock_object(c_rarg0); ++ } ++ __ bind(L); ++ } ++ ++ // jvmti/jvmpi support ++ // Note: This must happen _after_ handling/throwing any exceptions since ++ // the exception handler code notifies the runtime of method exits ++ // too. If this happens before, method entry/exit notifications are ++ // not properly paired (was bug - gri 11/22/99). ++ __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); ++ ++ // restore potential result in V0, ++ // call result handler to restore potential result in ST0 & handle result ++ ++ __ pop(ltos); ++ __ pop(dtos); ++ ++ __ ld_d(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize); ++ __ jalr(t); ++ ++ ++ // remove activation ++ __ ld_d(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp ++ __ ld_d(RA, FP, frame::interpreter_frame_return_addr_offset * wordSize); // get return address ++ __ ld_d(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp ++ __ jr(RA); ++ ++#ifndef CORE ++ if (inc_counter) { ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(&continue_after_compile); ++ // entry_point is the beginning of this ++ // function and checks again for compiled code ++ } ++#endif ++ return entry_point; ++} ++ ++// ++// Generic interpreted method entry to (asm) interpreter ++// ++// Layout of frame just at the entry ++// ++// [ argument word n-1 ] <--- sp ++// ... ++// [ argument word 0 ] ++// assume Method* in Rmethod before call this method. ++// prerequisites to the generated stub : the callee Method* in Rmethod ++// note you must save the caller bcp before call the generated stub ++// ++address InterpreterGenerator::generate_normal_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls; ++ ++ // Rmethod: Method* ++ // Rsender: sender 's sp ++ address entry_point = __ pc(); ++ ++ const Address invocation_counter(Rmethod, ++ in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset())); ++ ++ // get parameter size (always needed) ++ __ ld_d(T3, Rmethod, in_bytes(Method::const_offset())); //T3 --> Rmethod._constMethod ++ __ ld_hu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // Rmethod: Method* ++ // V0: size of parameters ++ // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i ++ // get size of locals in words to T2 ++ __ ld_hu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset())); ++ // T2 = no. of additional locals, locals include parameters ++ __ sub_d(T2, T2, V0); ++ ++ // see if we've got enough room on the stack for locals plus overhead. ++ // Layout of frame at this point ++ // ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ generate_stack_overflow_check(); ++ // after this function, the layout of frame does not change ++ ++ // compute beginning of parameters (LVP) ++ __ slli_d(LVP, V0, LogBytesPerWord); ++ __ addi_d(LVP, LVP, (-1) * wordSize); ++ __ add_d(LVP, LVP, SP); ++ ++ // T2 - # of additional locals ++ // allocate space for locals ++ // explicitly initialize locals ++ { ++ Label exit, loop; ++ __ beq(T2, R0, exit); ++ ++ __ bind(loop); ++ __ addi_d(SP, SP, (-1) * wordSize); ++ __ addi_d(T2, T2, -1); // until everything initialized ++ __ st_d(R0, SP, 0); // initialize local variables ++ __ bne(T2, R0, loop); ++ ++ __ bind(exit); ++ } ++ ++ // ++ // [ local var m-1 ] <--- sp ++ // ... ++ // [ local var 0 ] ++ // [ argument word n-1 ] <--- T0? ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ // initialize fixed part of activation frame ++ ++ generate_fixed_frame(false); ++ ++ ++ // after this function, the layout of frame is as following ++ // ++ // [ monitor block top ] <--- sp ( the top monitor entry ) ++ // [ byte code pointer ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] <--- fp ++ // [ return address ] ++ // [ local var m-1 ] ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++ // make sure method is not native & not abstract ++#ifdef ASSERT ++ __ ld_d(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ { ++ Label L; ++ __ andi(T2, AT, JVM_ACC_NATIVE); ++ __ beq(T2, R0, L); ++ __ stop("tried to execute native method as non-native"); ++ __ bind(L); ++ } ++ { ++ Label L; ++ __ andi(T2, AT, JVM_ACC_ABSTRACT); ++ __ beq(T2, R0, L); ++ __ stop("tried to execute abstract method in interpreter"); ++ __ bind(L); ++ } ++#endif ++ ++ // Since at this point in the method invocation the exception handler ++ // would try to exit the monitor of synchronized methods which hasn't ++ // been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation will ++ // check this flag. ++ ++#ifndef OPT_THREAD ++ Register thread = T8; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ __ li(AT, (int)true); ++ __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++#ifndef CORE ++ ++ // mdp : T8 ++ // tmp1: T4 ++ // tmp2: T2 ++ __ profile_parameters_type(T8, T4, T2); ++ ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ Label profile_method; ++ Label profile_method_continue; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow, ++ &profile_method, ++ &profile_method_continue); ++ if (ProfileInterpreter) { ++ __ bind(profile_method_continue); ++ } ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++ ++#endif // CORE ++ ++ bang_stack_shadow_pages(false); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ // ++ if (synchronized) { ++ // Allocate monitor and lock method ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ { Label L; ++ __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T2, AT, JVM_ACC_SYNCHRONIZED); ++ __ beq(T2, R0, L); ++ __ stop("method needs synchronization"); ++ __ bind(L); ++ } ++#endif ++ } ++ ++ // layout of frame after lock_method ++ // [ monitor entry ] <--- sp ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ local var m-1 ] ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++ // start execution ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ beq(AT, SP, L); ++ __ stop("broken stack frame setup in interpreter in native"); ++ __ bind(L); ++ } ++#endif ++ ++ // jvmti/jvmpi support ++ __ notify_method_entry(); ++ ++ __ dispatch_next(vtos); ++ ++ // invocation counter overflow ++ if (inc_counter) { ++ if (ProfileInterpreter) { ++ // We have decided to profile this method in the interpreter ++ __ bind(profile_method); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::profile_method)); ++ __ set_method_data_pointer_for_bcp(); ++ __ get_method(Rmethod); ++ __ b(profile_method_continue); ++ } ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(&continue_after_compile); ++ } ++ ++ return entry_point; ++} ++ ++// Entry points ++// ++// Here we generate the various kind of entries into the interpreter. ++// The two main entry type are generic bytecode methods and native ++// call method. These both come in synchronized and non-synchronized ++// versions but the frame layout they create is very similar. The ++// other method entry types are really just special purpose entries ++// that are really entry and interpretation all in one. These are for ++// trivial methods like accessor, empty, or special math methods. ++// ++// When control flow reaches any of the entry types for the interpreter ++// the following holds -> ++// ++// Arguments: ++// ++// Rmethod: Method* ++// V0: receiver ++// ++// ++// Stack layout immediately at entry ++// ++// [ parameter n-1 ] <--- sp ++// ... ++// [ parameter 0 ] ++// [ expression stack ] (caller's java expression stack) ++ ++// Assuming that we don't go to one of the trivial specialized entries ++// the stack will look like below when we are ready to execute the ++// first bytecode (or call the native routine). The register usage ++// will be as the template based interpreter expects (see ++// interpreter_loongarch_64.hpp). ++// ++// local variables follow incoming parameters immediately; i.e. ++// the return address is moved to the end of the locals). ++// ++// [ monitor entry ] <--- sp ++// ... ++// [ monitor entry ] ++// [ monitor block top ] ( the top monitor entry ) ++// [ byte code pointer ] (if native, bcp = 0) ++// [ constant pool cache ] ++// [ Method* ] ++// [ locals offset ] ++// [ sender's sp ] ++// [ sender's fp ] ++// [ return address ] <--- fp ++// [ local var m-1 ] ++// ... ++// [ local var 0 ] ++// [ argumnet word n-1 ] <--- ( sender's sp ) ++// ... ++// [ argument word 0 ] <--- S7 ++ ++address AbstractInterpreterGenerator::generate_method_entry( ++ AbstractInterpreter::MethodKind kind) { ++ // determine code generation flags ++ bool synchronized = false; ++ address entry_point = NULL; ++ switch (kind) { ++ case Interpreter::zerolocals : ++ break; ++ case Interpreter::zerolocals_synchronized: ++ synchronized = true; ++ break; ++ case Interpreter::native : ++ entry_point = ((InterpreterGenerator*)this)->generate_native_entry(false); ++ break; ++ case Interpreter::native_synchronized : ++ entry_point = ((InterpreterGenerator*)this)->generate_native_entry(true); ++ break; ++ case Interpreter::empty : ++ entry_point = ((InterpreterGenerator*)this)->generate_empty_entry(); ++ break; ++ case Interpreter::accessor : ++ entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry(); ++ break; ++ case Interpreter::abstract : ++ entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry(); ++ break; ++ ++ case Interpreter::java_lang_math_sin : // fall thru ++ case Interpreter::java_lang_math_cos : // fall thru ++ case Interpreter::java_lang_math_tan : // fall thru ++ case Interpreter::java_lang_math_log : // fall thru ++ case Interpreter::java_lang_math_log10 : // fall thru ++ case Interpreter::java_lang_math_pow : // fall thru ++ case Interpreter::java_lang_math_exp : break; ++ case Interpreter::java_lang_math_abs : // fall thru ++ case Interpreter::java_lang_math_sqrt : ++ entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind); break; ++ case Interpreter::java_lang_ref_reference_get: ++ entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break; ++ case Interpreter::java_util_zip_CRC32_update: ++ entry_point = ((InterpreterGenerator*)this)->generate_CRC32_update_entry(); break; ++ case Interpreter::java_util_zip_CRC32_updateBytes: // fall thru ++ case Interpreter::java_util_zip_CRC32_updateByteBuffer: ++ entry_point = ((InterpreterGenerator*)this)->generate_CRC32_updateBytes_entry(kind); break; ++ default: ++ fatal(err_msg("unexpected method kind: %d", kind)); ++ break; ++ } ++ if (entry_point) return entry_point; ++ ++ return ((InterpreterGenerator*)this)->generate_normal_entry(synchronized); ++} ++ ++// These should never be compiled since the interpreter will prefer ++// the compiled version to the intrinsic version. ++bool AbstractInterpreter::can_be_compiled(methodHandle m) { ++ switch (method_kind(m)) { ++ case Interpreter::java_lang_math_sin : // fall thru ++ case Interpreter::java_lang_math_cos : // fall thru ++ case Interpreter::java_lang_math_tan : // fall thru ++ case Interpreter::java_lang_math_abs : // fall thru ++ case Interpreter::java_lang_math_log : // fall thru ++ case Interpreter::java_lang_math_log10 : // fall thru ++ case Interpreter::java_lang_math_sqrt : // fall thru ++ case Interpreter::java_lang_math_pow : // fall thru ++ case Interpreter::java_lang_math_exp : ++ return false; ++ default: ++ return true; ++ } ++} ++ ++// How much stack a method activation needs in words. ++int AbstractInterpreter::size_top_interpreter_activation(Method* method) { ++ ++ const int entry_size = frame::interpreter_frame_monitor_size(); ++ ++ // total overhead size: entry_size + (saved fp thru expr stack bottom). ++ // be sure to change this if you add/subtract anything to/from the overhead area ++ const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size; ++ ++ const int stub_code = 6; // see generate_call_stub ++ // return overhead_size + method->max_locals() + method->max_stack() + stub_code; ++ const int method_stack = (method->max_locals() + method->max_stack()) * ++ Interpreter::stackElementWords; ++ return overhead_size + method_stack + stub_code; ++} ++ ++void AbstractInterpreter::layout_activation(Method* method, ++ int tempcount, ++ int popframe_extra_args, ++ int moncount, ++ int caller_actual_parameters, ++ int callee_param_count, ++ int callee_locals, ++ frame* caller, ++ frame* interpreter_frame, ++ bool is_top_frame, ++ bool is_bottom_frame) { ++ // Note: This calculation must exactly parallel the frame setup ++ // in AbstractInterpreterGenerator::generate_method_entry. ++ // If interpreter_frame!=NULL, set up the method, locals, and monitors. ++ // The frame interpreter_frame, if not NULL, is guaranteed to be the ++ // right size, as determined by a previous call to this method. ++ // It is also guaranteed to be walkable even though it is in a skeletal state ++ ++ // fixed size of an interpreter frame: ++ ++ int max_locals = method->max_locals() * Interpreter::stackElementWords; ++ int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords; ++ ++#ifdef ASSERT ++ if (!EnableInvokeDynamic) { ++ // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences? ++ // Probably, since deoptimization doesn't work yet. ++ assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable"); ++ } ++ assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); ++#endif ++ ++ interpreter_frame->interpreter_frame_set_method(method); ++ // NOTE the difference in using sender_sp and interpreter_frame_sender_sp ++ // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) ++ // and sender_sp is fp+8 ++ intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; ++ ++#ifdef ASSERT ++ if (caller->is_interpreted_frame()) { ++ assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); ++ } ++#endif ++ ++ interpreter_frame->interpreter_frame_set_locals(locals); ++ BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); ++ BasicObjectLock* monbot = montop - moncount; ++ interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount); ++ ++ //set last sp; ++ intptr_t* sp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords - ++ popframe_extra_args; ++ interpreter_frame->interpreter_frame_set_last_sp(sp); ++ // All frames but the initial interpreter frame we fill in have a ++ // value for sender_sp that allows walking the stack but isn't ++ // truly correct. Correct the value here. ++ // ++ if (extra_locals != 0 && ++ interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) { ++ interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals); ++ } ++ *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache(); ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateInterpreterGenerator::generate_throw_exception() { ++ // Entry point in previous activation (i.e., if the caller was ++ // interpreted) ++ Interpreter::_rethrow_exception_entry = __ pc(); ++ // Restore sp to interpreter_frame_last_sp even though we are going ++ // to empty the expression stack for the exception processing. ++ __ st_d(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ // V0: exception ++ // V1: return address/pc that threw exception ++ __ restore_bcp(); // BCP points to call/send ++ __ restore_locals(); ++ ++ //add for compressedoops ++ __ reinit_heapbase(); ++ // Entry point for exceptions thrown within interpreter code ++ Interpreter::_throw_exception_entry = __ pc(); ++ // expression stack is undefined here ++ // V0: exception ++ // BCP: exception bcp ++ __ verify_oop(V0); ++ ++ // expression stack must be empty before entering the VM in case of an exception ++ __ empty_expression_stack(); ++ // find exception handler address and preserve exception oop ++ __ move(A1, V0); ++ __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1); ++ // V0: exception handler entry point ++ // V1: preserved exception oop ++ // S0: bcp for exception handler ++ __ push(V1); // push exception which is now the only value on the stack ++ __ jr(V0); // jump to exception handler (may be _remove_activation_entry!) ++ ++ // If the exception is not handled in the current frame the frame is removed and ++ // the exception is rethrown (i.e. exception continuation is _rethrow_exception). ++ // ++ // Note: At this point the bci is still the bxi for the instruction which caused ++ // the exception and the expression stack is empty. Thus, for any VM calls ++ // at this point, GC will find a legal oop map (with empty expression stack). ++ ++ // In current activation ++ // V0: exception ++ // BCP: exception bcp ++ ++ // ++ // JVMTI PopFrame support ++ // ++ ++ Interpreter::_remove_activation_preserving_args_entry = __ pc(); ++ __ empty_expression_stack(); ++ // Set the popframe_processing bit in pending_popframe_condition indicating that we are ++ // currently handling popframe, so that call_VMs that may happen later do not trigger new ++ // popframe handling cycles. ++#ifndef OPT_THREAD ++ Register thread = T2; ++ __ get_thread(T2); ++#else ++ Register thread = TREG; ++#endif ++ __ ld_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ __ ori(T3, T3, JavaThread::popframe_processing_bit); ++ __ st_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++#ifndef CORE ++ { ++ // Check to see whether we are returning to a deoptimized frame. ++ // (The PopFrame call ensures that the caller of the popped frame is ++ // either interpreted or compiled and deoptimizes it if compiled.) ++ // In this case, we can't call dispatch_next() after the frame is ++ // popped, but instead must save the incoming arguments and restore ++ // them after deoptimization has occurred. ++ // ++ // Note that we don't compare the return PC against the ++ // deoptimization blob's unpack entry because of the presence of ++ // adapter frames in C2. ++ Label caller_not_deoptimized; ++ __ ld_d(A0, FP, frame::return_addr_offset * wordSize); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0); ++ __ bne(V0, R0, caller_not_deoptimized); ++ ++ // Compute size of arguments for saving when returning to deoptimized caller ++ __ get_method(A1); ++ __ verify_oop(A1); ++ __ ld_d(A1, A1, in_bytes(Method::const_offset())); ++ __ ld_hu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset())); ++ __ shl(A1, Interpreter::logStackElementSize); ++ __ restore_locals(); ++ __ sub_d(A2, LVP, A1); ++ __ addi_d(A2, A2, wordSize); ++ // Save these arguments ++#ifndef OPT_THREAD ++ __ get_thread(A0); ++#else ++ __ move(A0, TREG); ++#endif ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2); ++ ++ __ remove_activation(vtos, T4, false, false, false); ++ ++ // Inform deoptimization that it is responsible for restoring these arguments ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ li(AT, JavaThread::popframe_force_deopt_reexecution_bit); ++ __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ // Continue in deoptimization handler ++ __ jr(T4); ++ ++ __ bind(caller_not_deoptimized); ++ } ++#endif /* !CORE */ ++ ++ __ remove_activation(vtos, T3, ++ /* throw_monitor_exception */ false, ++ /* install_monitor_exception */ false, ++ /* notify_jvmdi */ false); ++ ++ // Clear the popframe condition flag ++ // Finish with popframe handling ++ // A previous I2C followed by a deoptimization might have moved the ++ // outgoing arguments further up the stack. PopFrame expects the ++ // mutations to those outgoing arguments to be preserved and other ++ // constraints basically require this frame to look exactly as ++ // though it had previously invoked an interpreted activation with ++ // no space between the top of the expression stack (current ++ // last_sp) and the top of stack. Rather than force deopt to ++ // maintain this kind of invariant all the time we call a small ++ // fixup routine to move the mutated arguments onto the top of our ++ // expression stack if necessary. ++ __ move(T8, SP); ++ __ ld_d(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // PC must point into interpreter here ++ Label L; ++ __ bind(L); ++ __ set_last_Java_frame(thread, noreg, FP, L); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2); ++ __ reset_last_Java_frame(thread, true); ++ // Restore the last_sp and null it out ++ __ ld_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ ++ ++ __ li(AT, JavaThread::popframe_inactive); ++ __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++ // Finish with popframe handling ++ __ restore_bcp(); ++ __ restore_locals(); ++#ifndef CORE ++ // The method data pointer was incremented already during ++ // call profiling. We have to restore the mdp for the current bcp. ++ if (ProfileInterpreter) { ++ __ set_method_data_pointer_for_bcp(); ++ } ++#endif // !CORE ++ // Clear the popframe condition flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ li(AT, JavaThread::popframe_inactive); ++ __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++#if INCLUDE_JVMTI ++ { ++ Label L_done; ++ ++ __ ld_bu(AT, BCP, 0); ++ __ addi_d(AT, AT, -1 * Bytecodes::_invokestatic); ++ __ bne(AT, R0, L_done); ++ ++ // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. ++ // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. ++ ++ __ get_method(T4); ++ __ ld_d(T8, LVP, 0); ++ __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T4, BCP); ++ ++ __ beq(T8, R0, L_done); ++ ++ __ st_d(T8, SP, 0); ++ __ bind(L_done); ++ } ++#endif // INCLUDE_JVMTI ++ ++ __ dispatch_next(vtos); ++ // end of PopFrame support ++ ++ Interpreter::_remove_activation_entry = __ pc(); ++ ++ // preserve exception over this code sequence ++ __ pop(T0); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_d(T0, thread, in_bytes(JavaThread::vm_result_offset())); ++ // remove the activation (without doing throws on illegalMonitorExceptions) ++ __ remove_activation(vtos, T3, false, true, false); ++ // restore exception ++ __ get_vm_result(T0, thread); ++ __ verify_oop(T0); ++ ++ // In between activations - previous activation type unknown yet ++ // compute continuation point - the continuation point expects ++ // the following registers set up: ++ // ++ // T0: exception ++ // T1: return address/pc that threw exception ++ // SP: expression stack of caller ++ // FP: fp of caller ++ __ push2(T0, T3); // save exception and return address ++ __ move(A1, T3); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); ++ __ move(T4, V0); // save exception handler ++ __ pop2(V0, V1); // restore return address and exception ++ ++ // Note that an "issuing PC" is actually the next PC after the call ++ __ jr(T4); // jump to exception handler of caller ++} ++ ++ ++// ++// JVMTI ForceEarlyReturn support ++// ++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { ++ address entry = __ pc(); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ empty_expression_stack(); ++ __ empty_FPU_stack(); ++ __ load_earlyret_value(state); ++ ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ __ ld_ptr(T4, TREG, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ const Address cond_addr(T4, in_bytes(JvmtiThreadState::earlyret_state_offset())); ++ // Clear the earlyret state ++ __ li(AT, JvmtiThreadState::earlyret_inactive); ++ __ st_w(AT, cond_addr); ++ __ membar(__ AnyAny);//no membar here for aarch64 ++ ++ ++ __ remove_activation(state, T0, ++ false, /* throw_monitor_exception */ ++ false, /* install_monitor_exception */ ++ true); /* notify_jvmdi */ ++ __ membar(__ AnyAny); ++ __ jr(T0); ++ ++ return entry; ++} // end of ForceEarlyReturn support ++ ++ ++//----------------------------------------------------------------------------- ++// Helper for vtos entry point generation ++ ++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, ++ address& bep, ++ address& cep, ++ address& sep, ++ address& aep, ++ address& iep, ++ address& lep, ++ address& fep, ++ address& dep, ++ address& vep) { ++ assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); ++ Label L; ++ fep = __ pc(); __ push(ftos); __ b(L); ++ dep = __ pc(); __ push(dtos); __ b(L); ++ lep = __ pc(); __ push(ltos); __ b(L); ++ aep =__ pc(); __ push(atos); __ b(L); ++ bep = cep = sep = ++ iep = __ pc(); __ push(itos); ++ vep = __ pc(); ++ __ bind(L); ++ generate_and_dispatch(t); ++} ++ ++ ++//----------------------------------------------------------------------------- ++// Generation of individual instructions ++ ++// helpers for generate_and_dispatch ++ ++ ++InterpreterGenerator::InterpreterGenerator(StubQueue* code) ++ : TemplateInterpreterGenerator(code) { ++ generate_all(); // down here so it can be "virtual" ++} ++ ++//----------------------------------------------------------------------------- ++ ++// Non-product code ++#ifndef PRODUCT ++address TemplateInterpreterGenerator::generate_trace_code(TosState state) { ++ address entry = __ pc(); ++ ++ // prepare expression stack ++ __ push(state); // save tosca ++ ++ // tos & tos2 ++ // trace_bytecode need actually 4 args, the last two is tos&tos2 ++ // this work fine for x86. but LA ABI calling convention will store A2-A3 ++ // to the stack position it think is the tos&tos2 ++ // when the expression stack have no more than 2 data, error occur. ++ __ ld_d(A2, SP, 0); ++ __ ld_d(A3, SP, 1 * wordSize); ++ ++ // pass arguments & call tracer ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), RA, A2, A3); ++ __ move(RA, V0); // make sure return address is not destroyed by pop(state) ++ ++ // restore expression stack ++ __ pop(state); // restore tosca ++ ++ // return ++ __ jr(RA); ++ return entry; ++} ++ ++void TemplateInterpreterGenerator::count_bytecode() { ++ __ li(T8, (long)&BytecodeCounter::_counter_value); ++ __ ld_w(AT, T8, 0); ++ __ addi_d(AT, AT, 1); ++ __ st_w(AT, T8, 0); ++} ++ ++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ++ __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]); ++ __ ld_w(AT, T8, 0); ++ __ addi_d(AT, AT, 1); ++ __ st_w(AT, T8, 0); ++} ++ ++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ++ __ li(T8, (long)&BytecodePairHistogram::_index); ++ __ ld_w(T4, T8, 0); ++ __ srli_d(T4, T4, BytecodePairHistogram::log2_number_of_codes); ++ __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes); ++ __ orr(T4, T4, T8); ++ __ li(T8, (long)&BytecodePairHistogram::_index); ++ __ st_w(T4, T8, 0); ++ __ slli_d(T4, T4, 2); ++ __ li(T8, (long)BytecodePairHistogram::_counters); ++ __ add_d(T8, T8, T4); ++ __ ld_w(AT, T8, 0); ++ __ addi_d(AT, AT, 1); ++ __ st_w(AT, T8, 0); ++} ++ ++ ++void TemplateInterpreterGenerator::trace_bytecode(Template* t) { ++ // Call a little run-time stub to avoid blow-up for each bytecode. ++ // The run-time runtime saves the right registers, depending on ++ // the tosca in-state for the given template. ++ address entry = Interpreter::trace_code(t->tos_in()); ++ assert(entry != NULL, "entry must have been generated"); ++ __ call(entry, relocInfo::none); ++ //add for compressedoops ++ __ reinit_heapbase(); ++} ++ ++ ++void TemplateInterpreterGenerator::stop_interpreter_at() { ++ Label L; ++ __ li(T8, long(&BytecodeCounter::_counter_value)); ++ __ ld_w(T8, T8, 0); ++ __ li(AT, StopInterpreterAt); ++ __ bne(T8, AT, L); ++ __ brk(5); ++ __ bind(L); ++} ++#endif // !PRODUCT ++#endif // ! CC_INTERP +diff --git a/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.cpp +new file mode 100644 +index 0000000000..228217f001 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.cpp +@@ -0,0 +1,4024 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/templateTable.hpp" ++#include "memory/universe.inline.hpp" ++#include "oops/methodData.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "utilities/macros.hpp" ++ ++ ++#ifndef CC_INTERP ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++// Platform-dependent initialization ++ ++void TemplateTable::pd_initialize() { ++ // No LoongArch specific initialization ++} ++ ++// Address computation: local variables ++ ++static inline Address iaddress(int n) { ++ return Address(LVP, Interpreter::local_offset_in_bytes(n)); ++} ++ ++static inline Address laddress(int n) { ++ return iaddress(n + 1); ++} ++ ++static inline Address faddress(int n) { ++ return iaddress(n); ++} ++ ++static inline Address daddress(int n) { ++ return laddress(n); ++} ++ ++static inline Address aaddress(int n) { ++ return iaddress(n); ++} ++static inline Address haddress(int n) { return iaddress(n + 0); } ++ ++ ++static inline Address at_sp() { return Address(SP, 0); } ++static inline Address at_sp_p1() { return Address(SP, 1 * wordSize); } ++static inline Address at_sp_p2() { return Address(SP, 2 * wordSize); } ++ ++// At top of Java expression stack which may be different than sp(). ++// It isn't for category 1 objects. ++static inline Address at_tos () { ++ Address tos = Address(SP, Interpreter::expr_offset_in_bytes(0)); ++ return tos; ++} ++ ++static inline Address at_tos_p1() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(1)); ++} ++ ++static inline Address at_tos_p2() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(2)); ++} ++ ++static inline Address at_tos_p3() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(3)); ++} ++ ++// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator ++Address TemplateTable::at_bcp(int offset) { ++ assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); ++ return Address(BCP, offset); ++} ++ ++// Miscelaneous helper routines ++// Store an oop (or NULL) at the address described by obj. ++// If val == noreg this means store a NULL ++ ++static void do_oop_store(InterpreterMacroAssembler* _masm, ++ Address obj, ++ Register val, ++ BarrierSet::Name barrier, ++ bool precise) { ++ assert(val == noreg || val == FSR, "parameter is just for looks"); ++ switch (barrier) { ++#if INCLUDE_ALL_GCS ++ case BarrierSet::G1SATBCT: ++ case BarrierSet::G1SATBCTLogging: ++ { ++ // flatten object address if needed ++ if (obj.index() == noreg && obj.disp() == 0) { ++ if (obj.base() != T3) { ++ __ move(T3, obj.base()); ++ } ++ } else { ++ __ lea(T3, obj); ++ } ++ __ g1_write_barrier_pre(T3 /* obj */, ++ T1 /* pre_val */, ++ TREG /* thread */, ++ T4 /* tmp */, ++ val != noreg /* tosca_live */, ++ false /* expand_call */); ++ if (val == noreg) { ++ __ store_heap_oop_null(Address(T3, 0)); ++ } else { ++ // G1 barrier needs uncompressed oop for region cross check. ++ Register new_val = val; ++ if (UseCompressedOops) { ++ new_val = T1; ++ __ move(new_val, val); ++ } ++ __ store_heap_oop(Address(T3, 0), val); ++ __ g1_write_barrier_post(T3 /* store_adr */, ++ new_val /* new_val */, ++ TREG /* thread */, ++ T4 /* tmp */, ++ T1 /* tmp2 */); ++ } ++ } ++ break; ++#endif // INCLUDE_ALL_GCS ++ case BarrierSet::CardTableModRef: ++ case BarrierSet::CardTableExtension: ++ { ++ if (val == noreg) { ++ __ store_heap_oop_null(obj); ++ } else { ++ __ store_heap_oop(obj, val); ++ // flatten object address if needed ++ if (!precise || (obj.index() == noreg && obj.disp() == 0)) { ++ __ store_check(obj.base()); ++ } else { ++ //TODO: LA ++ __ lea(T4, obj); ++ __ store_check(T4); ++ } ++ } ++ } ++ break; ++ case BarrierSet::ModRef: ++ case BarrierSet::Other: ++ if (val == noreg) { ++ __ store_heap_oop_null(obj); ++ } else { ++ __ store_heap_oop(obj, val); ++ } ++ break; ++ default : ++ ShouldNotReachHere(); ++ ++ } ++} ++ ++// bytecode folding ++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, ++ Register tmp_reg, bool load_bc_into_bc_reg/*=true*/, ++ int byte_no) { ++ if (!RewriteBytecodes) return; ++ Label L_patch_done; ++ ++ switch (bc) { ++ case Bytecodes::_fast_aputfield: ++ case Bytecodes::_fast_bputfield: ++ case Bytecodes::_fast_zputfield: ++ case Bytecodes::_fast_cputfield: ++ case Bytecodes::_fast_dputfield: ++ case Bytecodes::_fast_fputfield: ++ case Bytecodes::_fast_iputfield: ++ case Bytecodes::_fast_lputfield: ++ case Bytecodes::_fast_sputfield: ++ { ++ // We skip bytecode quickening for putfield instructions when ++ // the put_code written to the constant pool cache is zero. ++ // This is required so that every execution of this instruction ++ // calls out to InterpreterRuntime::resolve_get_put to do ++ // additional, required work. ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ assert(load_bc_into_bc_reg, "we use bc_reg as temp"); ++ __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1); ++ __ addi_d(bc_reg, R0, bc); ++ __ beq(tmp_reg, R0, L_patch_done); ++ } ++ break; ++ default: ++ assert(byte_no == -1, "sanity"); ++ // the pair bytecodes have already done the load. ++ if (load_bc_into_bc_reg) { ++ __ li(bc_reg, bc); ++ } ++ } ++ ++ if (JvmtiExport::can_post_breakpoint()) { ++ Label L_fast_patch; ++ // if a breakpoint is present we can't rewrite the stream directly ++ __ ld_bu(tmp_reg, at_bcp(0)); ++ __ li(AT, Bytecodes::_breakpoint); ++ __ bne(tmp_reg, AT, L_fast_patch); ++ ++ __ get_method(tmp_reg); ++ // Let breakpoint table handling rewrite to quicker bytecode ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg); ++ ++ __ b(L_patch_done); ++ __ bind(L_fast_patch); ++ } ++ ++#ifdef ASSERT ++ Label L_okay; ++ __ ld_bu(tmp_reg, at_bcp(0)); ++ __ li(AT, (int)Bytecodes::java_code(bc)); ++ __ beq(tmp_reg, AT, L_okay); ++ __ beq(tmp_reg, bc_reg, L_patch_done); ++ __ stop("patching the wrong bytecode"); ++ __ bind(L_okay); ++#endif ++ ++ // patch bytecode ++ __ st_b(bc_reg, at_bcp(0)); ++ __ bind(L_patch_done); ++} ++ ++ ++// Individual instructions ++ ++void TemplateTable::nop() { ++ transition(vtos, vtos); ++ // nothing to do ++} ++ ++void TemplateTable::shouldnotreachhere() { ++ transition(vtos, vtos); ++ __ stop("shouldnotreachhere bytecode"); ++} ++ ++void TemplateTable::aconst_null() { ++ transition(vtos, atos); ++ __ move(FSR, R0); ++} ++ ++void TemplateTable::iconst(int value) { ++ transition(vtos, itos); ++ if (value == 0) { ++ __ move(FSR, R0); ++ } else { ++ __ li(FSR, value); ++ } ++} ++ ++void TemplateTable::lconst(int value) { ++ transition(vtos, ltos); ++ if (value == 0) { ++ __ move(FSR, R0); ++ } else { ++ __ li(FSR, value); ++ } ++} ++ ++void TemplateTable::fconst(int value) { ++ transition(vtos, ftos); ++ switch( value ) { ++ case 0: __ movgr2fr_w(FSF, R0); return; ++ case 1: __ addi_d(AT, R0, 1); break; ++ case 2: __ addi_d(AT, R0, 2); break; ++ default: ShouldNotReachHere(); ++ } ++ __ movgr2fr_w(FSF, AT); ++ __ ffint_s_w(FSF, FSF); ++} ++ ++void TemplateTable::dconst(int value) { ++ transition(vtos, dtos); ++ switch( value ) { ++ case 0: __ movgr2fr_d(FSF, R0); ++ return; ++ case 1: __ addi_d(AT, R0, 1); ++ __ movgr2fr_d(FSF, AT); ++ __ ffint_d_w(FSF, FSF); ++ break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::bipush() { ++ transition(vtos, itos); ++ __ ld_b(FSR, at_bcp(1)); ++} ++ ++void TemplateTable::sipush() { ++ transition(vtos, itos); ++ __ ld_b(FSR, BCP, 1); ++ __ ld_bu(AT, BCP, 2); ++ __ slli_d(FSR, FSR, 8); ++ __ orr(FSR, FSR, AT); ++} ++ ++// T1 : tags ++// T2 : index ++// T3 : cpool ++// T8 : tag ++void TemplateTable::ldc(bool wide) { ++ transition(vtos, vtos); ++ Label call_ldc, notFloat, notClass, Done; ++ // get index in cpool ++ if (wide) { ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); ++ } else { ++ __ ld_bu(T2, at_bcp(1)); ++ } ++ ++ __ get_cpool_and_tags(T3, T1); ++ ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type ++ __ add_d(AT, T1, T2); ++ __ ld_b(T1, AT, tags_offset); ++ if(os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); ++ } ++ //now T1 is the tag ++ ++ // unresolved class - get the resolved class ++ __ addi_d(AT, T1, - JVM_CONSTANT_UnresolvedClass); ++ __ beq(AT, R0, call_ldc); ++ ++ // unresolved class in error (resolution failed) - call into runtime ++ // so that the same error from first resolution attempt is thrown. ++ __ addi_d(AT, T1, -JVM_CONSTANT_UnresolvedClassInError); ++ __ beq(AT, R0, call_ldc); ++ ++ // resolved class - need to call vm to get java mirror of the class ++ __ addi_d(AT, T1, - JVM_CONSTANT_Class); ++ __ slli_d(T2, T2, Address::times_8); ++ __ bne(AT, R0, notClass); ++ ++ __ bind(call_ldc); ++ __ li(A1, wide); ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1); ++ //__ push(atos); ++ __ addi_d(SP, SP, - Interpreter::stackElementSize); ++ __ st_d(FSR, SP, 0); ++ __ b(Done); ++ ++ __ bind(notClass); ++ __ addi_d(AT, T1, -JVM_CONSTANT_Float); ++ __ bne(AT, R0, notFloat); ++ // ftos ++ __ add_d(AT, T3, T2); ++ __ fld_s(FSF, AT, base_offset); ++ //__ push_f(); ++ __ addi_d(SP, SP, - Interpreter::stackElementSize); ++ __ fst_s(FSF, SP, 0); ++ __ b(Done); ++ ++ __ bind(notFloat); ++#ifdef ASSERT ++ { ++ Label L; ++ __ addi_d(AT, T1, -JVM_CONSTANT_Integer); ++ __ beq(AT, R0, L); ++ __ stop("unexpected tag type in ldc"); ++ __ bind(L); ++ } ++#endif ++ // itos JVM_CONSTANT_Integer only ++ __ add_d(T0, T3, T2); ++ __ ld_w(FSR, T0, base_offset); ++ __ push(itos); ++ __ bind(Done); ++} ++ ++// Fast path for caching oop constants. ++void TemplateTable::fast_aldc(bool wide) { ++ transition(vtos, atos); ++ ++ Register result = FSR; ++ Register tmp = SSR; ++ int index_size = wide ? sizeof(u2) : sizeof(u1); ++ ++ Label resolved; ++ ++ // We are resolved if the resolved reference cache entry contains a ++ // non-null object (String, MethodType, etc.) ++ assert_different_registers(result, tmp); ++ __ get_cache_index_at_bcp(tmp, 1, index_size); ++ __ load_resolved_reference_at_index(result, tmp); ++ __ bne(result, R0, resolved); ++ ++ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); ++ // first time invocation - must resolve first ++ int i = (int)bytecode(); ++ __ li(tmp, i); ++ __ call_VM(result, entry, tmp); ++ ++ __ bind(resolved); ++ ++ if (VerifyOops) { ++ __ verify_oop(result); ++ } ++} ++ ++ ++// used register: T2, T3, T1 ++// T2 : index ++// T3 : cpool ++// T1 : tag ++void TemplateTable::ldc2_w() { ++ transition(vtos, vtos); ++ Label Long, Done; ++ ++ // get index in cpool ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); ++ ++ __ get_cpool_and_tags(T3, T1); ++ ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type in T1 ++ __ add_d(AT, T1, T2); ++ __ ld_b(T1, AT, tags_offset); ++ ++ __ addi_d(AT, T1, - JVM_CONSTANT_Double); ++ __ slli_d(T2, T2, Address::times_8); ++ __ bne(AT, R0, Long); ++ ++ // dtos ++ __ add_d(AT, T3, T2); ++ __ fld_d(FSF, AT, base_offset); ++ __ push(dtos); ++ __ b(Done); ++ ++ // ltos ++ __ bind(Long); ++ __ add_d(AT, T3, T2); ++ __ ld_d(FSR, AT, base_offset); ++ __ push(ltos); ++ ++ __ bind(Done); ++} ++ ++// we compute the actual local variable address here ++void TemplateTable::locals_index(Register reg, int offset) { ++ __ ld_bu(reg, at_bcp(offset)); ++ __ slli_d(reg, reg, Address::times_8); ++ __ sub_d(reg, LVP, reg); ++} ++ ++// this method will do bytecode folding of the two form: ++// iload iload iload caload ++// used register : T2, T3 ++// T2 : bytecode ++// T3 : folded code ++void TemplateTable::iload() { ++ transition(vtos, itos); ++ if (RewriteFrequentPairs) { ++ Label rewrite, done; ++ // get the next bytecode in T2 ++ __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); ++ // if _iload, wait to rewrite to iload2. We only want to rewrite the ++ // last two iloads in a pair. Comparing against fast_iload means that ++ // the next bytecode is neither an iload or a caload, and therefore ++ // an iload pair. ++ __ li(AT, Bytecodes::_iload); ++ __ beq(AT, T2, done); ++ ++ __ li(T3, Bytecodes::_fast_iload2); ++ __ li(AT, Bytecodes::_fast_iload); ++ __ beq(AT, T2, rewrite); ++ ++ // if _caload, rewrite to fast_icaload ++ __ li(T3, Bytecodes::_fast_icaload); ++ __ li(AT, Bytecodes::_caload); ++ __ beq(AT, T2, rewrite); ++ ++ // rewrite so iload doesn't check again. ++ __ li(T3, Bytecodes::_fast_iload); ++ ++ // rewrite ++ // T3 : fast bytecode ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_iload, T3, T2, false); ++ __ bind(done); ++ } ++ ++ // Get the local value into tos ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fast_iload2() { ++ transition(vtos, itos); ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++ __ push(itos); ++ locals_index(T2, 3); ++ __ ld_w(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fast_iload() { ++ transition(vtos, itos); ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::lload() { ++ transition(vtos, ltos); ++ locals_index(T2); ++ __ ld_d(FSR, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fload() { ++ transition(vtos, ftos); ++ locals_index(T2); ++ __ fld_s(FSF, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::dload() { ++ transition(vtos, dtos); ++ locals_index(T2); ++ __ fld_d(FSF, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::aload() { ++ transition(vtos, atos); ++ locals_index(T2); ++ __ ld_d(FSR, T2, 0); ++} ++ ++void TemplateTable::locals_index_wide(Register reg) { ++ __ get_unsigned_2_byte_index_at_bcp(reg, 2); ++ __ slli_d(reg, reg, Address::times_8); ++ __ sub_d(reg, LVP, reg); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_iload() { ++ transition(vtos, itos); ++ locals_index_wide(T2); ++ __ ld_d(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_lload() { ++ transition(vtos, ltos); ++ locals_index_wide(T2); ++ __ ld_d(FSR, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_fload() { ++ transition(vtos, ftos); ++ locals_index_wide(T2); ++ __ fld_s(FSF, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_dload() { ++ transition(vtos, dtos); ++ locals_index_wide(T2); ++ __ fld_d(FSF, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_aload() { ++ transition(vtos, atos); ++ locals_index_wide(T2); ++ __ ld_d(FSR, T2, 0); ++} ++ ++// we use A2 as the regiser for index, BE CAREFUL! ++// we dont use our tge 29 now, for later optimization ++void TemplateTable::index_check(Register array, Register index) { ++ // Pop ptr into array ++ __ pop_ptr(array); ++ index_check_without_pop(array, index); ++} ++ ++void TemplateTable::index_check_without_pop(Register array, Register index) { ++ // destroys A2 ++ // check array ++ __ null_check(array, arrayOopDesc::length_offset_in_bytes()); ++ ++ // sign extend since tos (index) might contain garbage in upper bits ++ __ slli_w(index, index, 0); ++ ++ // check index ++ Label ok; ++ __ ld_w(AT, array, arrayOopDesc::length_offset_in_bytes()); ++ __ bltu(index, AT, ok); ++ ++ //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2 ++ if (A2 != index) __ move(A2, index); ++ __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); ++ __ bind(ok); ++} ++ ++void TemplateTable::iaload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, 1); ++ __ ld_w(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT)); ++} ++ ++void TemplateTable::laload() { ++ transition(itos, ltos); ++ index_check(SSR, FSR); ++ __ alsl_d(AT, FSR, SSR, Address::times_8 - 1); ++ __ ld_d(FSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG)); ++} ++ ++void TemplateTable::faload() { ++ transition(itos, ftos); ++ index_check(SSR, FSR); ++ __ shl(FSR, 2); ++ __ add_d(FSR, SSR, FSR); ++ __ fld_s(FSF, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); ++} ++ ++void TemplateTable::daload() { ++ transition(itos, dtos); ++ index_check(SSR, FSR); ++ __ alsl_d(AT, FSR, SSR, 2); ++ __ fld_d(FSF, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)); ++} ++ ++void TemplateTable::aaload() { ++ transition(itos, atos); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, (UseCompressedOops ? Address::times_4 : Address::times_8) - 1); ++ //add for compressedoops ++ __ load_heap_oop(FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); ++} ++ ++void TemplateTable::baload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ add_d(FSR, SSR, FSR); ++ __ ld_b(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); ++} ++ ++void TemplateTable::caload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1); ++ __ ld_hu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); ++} ++ ++// iload followed by caload frequent pair ++// used register : T2 ++// T2 : index ++void TemplateTable::fast_icaload() { ++ transition(vtos, itos); ++ // load index out of locals ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, 0); ++ __ ld_hu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); ++} ++ ++void TemplateTable::saload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1); ++ __ ld_h(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)); ++} ++ ++void TemplateTable::iload(int n) { ++ transition(vtos, itos); ++ __ ld_w(FSR, iaddress(n)); ++} ++ ++void TemplateTable::lload(int n) { ++ transition(vtos, ltos); ++ __ ld_d(FSR, laddress(n)); ++} ++ ++void TemplateTable::fload(int n) { ++ transition(vtos, ftos); ++ __ fld_s(FSF, faddress(n)); ++} ++ ++void TemplateTable::dload(int n) { ++ transition(vtos, dtos); ++ __ fld_d(FSF, laddress(n)); ++} ++ ++void TemplateTable::aload(int n) { ++ transition(vtos, atos); ++ __ ld_d(FSR, aaddress(n)); ++} ++ ++// used register : T2, T3 ++// T2 : bytecode ++// T3 : folded code ++void TemplateTable::aload_0() { ++ transition(vtos, atos); ++ // According to bytecode histograms, the pairs: ++ // ++ // _aload_0, _fast_igetfield ++ // _aload_0, _fast_agetfield ++ // _aload_0, _fast_fgetfield ++ // ++ // occur frequently. If RewriteFrequentPairs is set, the (slow) ++ // _aload_0 bytecode checks if the next bytecode is either ++ // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then ++ // rewrites the current bytecode into a pair bytecode; otherwise it ++ // rewrites the current bytecode into _fast_aload_0 that doesn't do ++ // the pair check anymore. ++ // ++ // Note: If the next bytecode is _getfield, the rewrite must be ++ // delayed, otherwise we may miss an opportunity for a pair. ++ // ++ // Also rewrite frequent pairs ++ // aload_0, aload_1 ++ // aload_0, iload_1 ++ // These bytecodes with a small amount of code are most profitable ++ // to rewrite ++ if (RewriteFrequentPairs) { ++ Label rewrite, done; ++ // get the next bytecode in T2 ++ __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); ++ ++ // do actual aload_0 ++ aload(0); ++ ++ // if _getfield then wait with rewrite ++ __ li(AT, Bytecodes::_getfield); ++ __ beq(AT, T2, done); ++ ++ // if _igetfield then reqrite to _fast_iaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ li(T3, Bytecodes::_fast_iaccess_0); ++ __ li(AT, Bytecodes::_fast_igetfield); ++ __ beq(AT, T2, rewrite); ++ ++ // if _agetfield then reqrite to _fast_aaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ li(T3, Bytecodes::_fast_aaccess_0); ++ __ li(AT, Bytecodes::_fast_agetfield); ++ __ beq(AT, T2, rewrite); ++ ++ // if _fgetfield then reqrite to _fast_faccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ li(T3, Bytecodes::_fast_faccess_0); ++ __ li(AT, Bytecodes::_fast_fgetfield); ++ __ beq(AT, T2, rewrite); ++ ++ // else rewrite to _fast_aload0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ li(T3, Bytecodes::_fast_aload_0); ++ ++ // rewrite ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_aload_0, T3, T2, false); ++ ++ __ bind(done); ++ } else { ++ aload(0); ++ } ++} ++ ++void TemplateTable::istore() { ++ transition(itos, vtos); ++ locals_index(T2); ++ __ st_w(FSR, T2, 0); ++} ++ ++void TemplateTable::lstore() { ++ transition(ltos, vtos); ++ locals_index(T2); ++ __ st_d(FSR, T2, -wordSize); ++} ++ ++void TemplateTable::fstore() { ++ transition(ftos, vtos); ++ locals_index(T2); ++ __ fst_s(FSF, T2, 0); ++} ++ ++void TemplateTable::dstore() { ++ transition(dtos, vtos); ++ locals_index(T2); ++ __ fst_d(FSF, T2, -wordSize); ++} ++ ++void TemplateTable::astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ locals_index(T2); ++ __ st_d(FSR, T2, 0); ++} ++ ++void TemplateTable::wide_istore() { ++ transition(vtos, vtos); ++ __ pop_i(FSR); ++ locals_index_wide(T2); ++ __ st_d(FSR, T2, 0); ++} ++ ++void TemplateTable::wide_lstore() { ++ transition(vtos, vtos); ++ __ pop_l(FSR); ++ locals_index_wide(T2); ++ __ st_d(FSR, T2, -wordSize); ++} ++ ++void TemplateTable::wide_fstore() { ++ wide_istore(); ++} ++ ++void TemplateTable::wide_dstore() { ++ wide_lstore(); ++} ++ ++void TemplateTable::wide_astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ locals_index_wide(T2); ++ __ st_d(FSR, T2, 0); ++} ++ ++// used register : T2 ++void TemplateTable::iastore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); // T2: array SSR: index ++ index_check(T2, SSR); // prefer index in SSR ++ __ slli_d(SSR, SSR, Address::times_4); ++ __ add_d(T2, T2, SSR); ++ __ st_w(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_INT)); ++} ++ ++ ++ ++// used register T2, T3 ++void TemplateTable::lastore() { ++ transition(ltos, vtos); ++ __ pop_i (T2); ++ index_check(T3, T2); ++ __ slli_d(T2, T2, Address::times_8); ++ __ add_d(T3, T3, T2); ++ __ st_d(FSR, T3, arrayOopDesc::base_offset_in_bytes(T_LONG)); ++} ++ ++// used register T2 ++void TemplateTable::fastore() { ++ transition(ftos, vtos); ++ __ pop_i(SSR); ++ index_check(T2, SSR); ++ __ slli_d(SSR, SSR, Address::times_4); ++ __ add_d(T2, T2, SSR); ++ __ fst_s(FSF, T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); ++} ++ ++// used register T2, T3 ++void TemplateTable::dastore() { ++ transition(dtos, vtos); ++ __ pop_i (T2); ++ index_check(T3, T2); ++ __ slli_d(T2, T2, Address::times_8); ++ __ add_d(T3, T3, T2); ++ __ fst_d(FSF, T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)); ++} ++ ++// used register : T2, T3, T8 ++// T2 : array ++// T3 : subklass ++// T8 : supklass ++void TemplateTable::aastore() { ++ Label is_null, ok_is_subtype, done; ++ transition(vtos, vtos); ++ // stack: ..., array, index, value ++ __ ld_d(FSR, at_tos()); // Value ++ __ ld_w(SSR, at_tos_p1()); // Index ++ __ ld_d(T2, at_tos_p2()); // Array ++ ++ // index_check(T2, SSR); ++ index_check_without_pop(T2, SSR); ++ // do array store check - check for NULL value first ++ __ beq(FSR, R0, is_null); ++ ++ // Move subklass into T3 ++ //add for compressedoops ++ __ load_klass(T3, FSR); ++ // Move superklass into T8 ++ //add for compressedoops ++ __ load_klass(T8, T2); ++ __ ld_d(T8, Address(T8, ObjArrayKlass::element_klass_offset())); ++ // Compress array+index*4+12 into a single register. T2 ++ __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1); ++ __ addi_d(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ ++ // Generate subtype check. ++ // Superklass in T8. Subklass in T3. ++ __ gen_subtype_check(T8, T3, ok_is_subtype); ++ // Come here on failure ++ // object is at FSR ++ __ jmp(Interpreter::_throw_ArrayStoreException_entry); ++ // Come here on success ++ __ bind(ok_is_subtype); ++ do_oop_store(_masm, Address(T2, 0), FSR, _bs->kind(), true); ++ __ b(done); ++ ++ // Have a NULL in FSR, T2=array, SSR=index. Store NULL at ary[idx] ++ __ bind(is_null); ++ __ profile_null_seen(T4); ++ __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1); ++ do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, _bs->kind(), true); ++ ++ __ bind(done); ++ __ addi_d(SP, SP, 3 * Interpreter::stackElementSize); ++} ++ ++void TemplateTable::bastore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); ++ index_check(T2, SSR); ++ ++ // Need to check whether array is boolean or byte ++ // since both types share the bastore bytecode. ++ __ load_klass(T4, T2); ++ __ ld_w(T4, T4, in_bytes(Klass::layout_helper_offset())); ++ ++ int diffbit = Klass::layout_helper_boolean_diffbit(); ++ __ li(AT, diffbit); ++ ++ Label L_skip; ++ __ andr(AT, T4, AT); ++ __ beq(AT, R0, L_skip); ++ __ andi(FSR, FSR, 0x1); ++ __ bind(L_skip); ++ ++ __ add_d(SSR, T2, SSR); ++ __ st_b(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); ++} ++ ++void TemplateTable::castore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); ++ index_check(T2, SSR); ++ __ alsl_d(SSR, SSR, T2, Address::times_2 - 1); ++ __ st_h(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); ++} ++ ++void TemplateTable::sastore() { ++ castore(); ++} ++ ++void TemplateTable::istore(int n) { ++ transition(itos, vtos); ++ __ st_w(FSR, iaddress(n)); ++} ++ ++void TemplateTable::lstore(int n) { ++ transition(ltos, vtos); ++ __ st_d(FSR, laddress(n)); ++} ++ ++void TemplateTable::fstore(int n) { ++ transition(ftos, vtos); ++ __ fst_s(FSF, faddress(n)); ++} ++ ++void TemplateTable::dstore(int n) { ++ transition(dtos, vtos); ++ __ fst_d(FSF, laddress(n)); ++} ++ ++void TemplateTable::astore(int n) { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ __ st_d(FSR, aaddress(n)); ++} ++ ++void TemplateTable::pop() { ++ transition(vtos, vtos); ++ __ addi_d(SP, SP, Interpreter::stackElementSize); ++} ++ ++void TemplateTable::pop2() { ++ transition(vtos, vtos); ++ __ addi_d(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void TemplateTable::dup() { ++ transition(vtos, vtos); ++ // stack: ..., a ++ __ load_ptr(0, FSR); ++ __ push_ptr(FSR); ++ // stack: ..., a, a ++} ++ ++// blows FSR ++void TemplateTable::dup_x1() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ load_ptr(0, FSR); // load b ++ __ load_ptr(1, A5); // load a ++ __ store_ptr(1, FSR); // store b ++ __ store_ptr(0, A5); // store a ++ __ push_ptr(FSR); // push b ++ // stack: ..., b, a, b ++} ++ ++// blows FSR ++void TemplateTable::dup_x2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ load_ptr(0, FSR); // load c ++ __ load_ptr(2, A5); // load a ++ __ store_ptr(2, FSR); // store c in a ++ __ push_ptr(FSR); // push c ++ // stack: ..., c, b, c, c ++ __ load_ptr(2, FSR); // load b ++ __ store_ptr(2, A5); // store a in b ++ // stack: ..., c, a, c, c ++ __ store_ptr(1, FSR); // store b in c ++ // stack: ..., c, a, b, c ++} ++ ++// blows FSR ++void TemplateTable::dup2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ load_ptr(1, FSR); // load a ++ __ push_ptr(FSR); // push a ++ __ load_ptr(1, FSR); // load b ++ __ push_ptr(FSR); // push b ++ // stack: ..., a, b, a, b ++} ++ ++// blows FSR ++void TemplateTable::dup2_x1() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ load_ptr(0, T2); // load c ++ __ load_ptr(1, FSR); // load b ++ __ push_ptr(FSR); // push b ++ __ push_ptr(T2); // push c ++ // stack: ..., a, b, c, b, c ++ __ store_ptr(3, T2); // store c in b ++ // stack: ..., a, c, c, b, c ++ __ load_ptr(4, T2); // load a ++ __ store_ptr(2, T2); // store a in 2nd c ++ // stack: ..., a, c, a, b, c ++ __ store_ptr(4, FSR); // store b in a ++ // stack: ..., b, c, a, b, c ++ ++ // stack: ..., b, c, a, b, c ++} ++ ++// blows FSR, SSR ++void TemplateTable::dup2_x2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c, d ++ // stack: ..., a, b, c, d ++ __ load_ptr(0, T2); // load d ++ __ load_ptr(1, FSR); // load c ++ __ push_ptr(FSR); // push c ++ __ push_ptr(T2); // push d ++ // stack: ..., a, b, c, d, c, d ++ __ load_ptr(4, FSR); // load b ++ __ store_ptr(2, FSR); // store b in d ++ __ store_ptr(4, T2); // store d in b ++ // stack: ..., a, d, c, b, c, d ++ __ load_ptr(5, T2); // load a ++ __ load_ptr(3, FSR); // load c ++ __ store_ptr(3, T2); // store a in c ++ __ store_ptr(5, FSR); // store c in a ++ // stack: ..., c, d, a, b, c, d ++ ++ // stack: ..., c, d, a, b, c, d ++} ++ ++// blows FSR ++void TemplateTable::swap() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ ++ __ load_ptr(1, A5); // load a ++ __ load_ptr(0, FSR); // load b ++ __ store_ptr(0, A5); // store a in b ++ __ store_ptr(1, FSR); // store b in a ++ ++ // stack: ..., b, a ++} ++ ++void TemplateTable::iop2(Operation op) { ++ transition(itos, itos); ++ ++ __ pop_i(SSR); ++ switch (op) { ++ case add : __ add_w(FSR, SSR, FSR); break; ++ case sub : __ sub_w(FSR, SSR, FSR); break; ++ case mul : __ mul_w(FSR, SSR, FSR); break; ++ case _and : __ andr(FSR, SSR, FSR); break; ++ case _or : __ orr(FSR, SSR, FSR); break; ++ case _xor : __ xorr(FSR, SSR, FSR); break; ++ case shl : __ sll_w(FSR, SSR, FSR); break; ++ case shr : __ sra_w(FSR, SSR, FSR); break; ++ case ushr : __ srl_w(FSR, SSR, FSR); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++// the result stored in FSR, SSR, ++// used registers : T2, T3 ++void TemplateTable::lop2(Operation op) { ++ transition(ltos, ltos); ++ __ pop_l(T2); ++ ++ switch (op) { ++ case add : __ add_d(FSR, T2, FSR); break; ++ case sub : __ sub_d(FSR, T2, FSR); break; ++ case _and: __ andr(FSR, T2, FSR); break; ++ case _or : __ orr(FSR, T2, FSR); break; ++ case _xor: __ xorr(FSR, T2, FSR); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception, ++// the result is 0x80000000 ++// the godson2 cpu do the same, so we need not handle this specially like x86 ++void TemplateTable::idiv() { ++ transition(itos, itos); ++ Label not_zero; ++ ++ __ bne(FSR, R0, not_zero); ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ bind(not_zero); ++ ++ __ pop_i(SSR); ++ __ div_w(FSR, SSR, FSR); ++} ++ ++void TemplateTable::irem() { ++ transition(itos, itos); ++ Label not_zero; ++ __ pop_i(SSR); ++ ++ __ bne(FSR, R0, not_zero); ++ //__ brk(7); ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ ++ __ bind(not_zero); ++ __ mod_w(FSR, SSR, FSR); ++} ++ ++void TemplateTable::lmul() { ++ transition(ltos, ltos); ++ __ pop_l(T2); ++ __ mul_d(FSR, T2, FSR); ++} ++ ++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry ++void TemplateTable::ldiv() { ++ transition(ltos, ltos); ++ Label normal; ++ ++ __ bne(FSR, R0, normal); ++ ++ //__ brk(7); //generate FPE ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ ++ __ bind(normal); ++ __ pop_l(A2); ++ __ div_d(FSR, A2, FSR); ++} ++ ++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry ++void TemplateTable::lrem() { ++ transition(ltos, ltos); ++ Label normal; ++ ++ __ bne(FSR, R0, normal); ++ ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ ++ __ bind(normal); ++ __ pop_l (A2); ++ ++ __ mod_d(FSR, A2, FSR); ++} ++ ++// result in FSR ++// used registers : T0 ++void TemplateTable::lshl() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ sll_d(FSR, T0, FSR); ++} ++ ++// used registers : T0 ++void TemplateTable::lshr() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ sra_d(FSR, T0, FSR); ++} ++ ++// used registers : T0 ++void TemplateTable::lushr() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ srl_d(FSR, T0, FSR); ++} ++ ++// result in FSF ++void TemplateTable::fop2(Operation op) { ++ transition(ftos, ftos); ++ switch (op) { ++ case add: ++ __ fld_s(fscratch, at_sp()); ++ __ fadd_s(FSF, fscratch, FSF); ++ break; ++ case sub: ++ __ fld_s(fscratch, at_sp()); ++ __ fsub_s(FSF, fscratch, FSF); ++ break; ++ case mul: ++ __ fld_s(fscratch, at_sp()); ++ __ fmul_s(FSF, fscratch, FSF); ++ break; ++ case div: ++ __ fld_s(fscratch, at_sp()); ++ __ fdiv_s(FSF, fscratch, FSF); ++ break; ++ case rem: ++ __ fmov_s(FA1, FSF); ++ __ fld_s(FA0, at_sp()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); ++ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ __ addi_d(SP, SP, 1 * wordSize); ++} ++ ++// result in SSF||FSF ++// i dont handle the strict flags ++void TemplateTable::dop2(Operation op) { ++ transition(dtos, dtos); ++ switch (op) { ++ case add: ++ __ fld_d(fscratch, at_sp()); ++ __ fadd_d(FSF, fscratch, FSF); ++ break; ++ case sub: ++ __ fld_d(fscratch, at_sp()); ++ __ fsub_d(FSF, fscratch, FSF); ++ break; ++ case mul: ++ __ fld_d(fscratch, at_sp()); ++ __ fmul_d(FSF, fscratch, FSF); ++ break; ++ case div: ++ __ fld_d(fscratch, at_sp()); ++ __ fdiv_d(FSF, fscratch, FSF); ++ break; ++ case rem: ++ __ fmov_d(FA1, FSF); ++ __ fld_d(FA0, at_sp()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); ++ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ __ addi_d(SP, SP, 2 * wordSize); ++} ++ ++void TemplateTable::ineg() { ++ transition(itos, itos); ++ __ sub_w(FSR, R0, FSR); ++} ++ ++void TemplateTable::lneg() { ++ transition(ltos, ltos); ++ __ sub_d(FSR, R0, FSR); ++} ++ ++void TemplateTable::fneg() { ++ transition(ftos, ftos); ++ __ fneg_s(FSF, FSF); ++} ++ ++void TemplateTable::dneg() { ++ transition(dtos, dtos); ++ __ fneg_d(FSF, FSF); ++} ++ ++// used registers : T2 ++void TemplateTable::iinc() { ++ transition(vtos, vtos); ++ locals_index(T2); ++ __ ld_w(FSR, T2, 0); ++ __ ld_b(AT, at_bcp(2)); // get constant ++ __ add_d(FSR, FSR, AT); ++ __ st_w(FSR, T2, 0); ++} ++ ++// used register : T2 ++void TemplateTable::wide_iinc() { ++ transition(vtos, vtos); ++ locals_index_wide(T2); ++ __ get_2_byte_integer_at_bcp(FSR, AT, 4); ++ __ hswap(FSR); ++ __ ld_w(AT, T2, 0); ++ __ add_d(FSR, AT, FSR); ++ __ st_w(FSR, T2, 0); ++} ++ ++void TemplateTable::convert() { ++ // Checking ++#ifdef ASSERT ++ { ++ TosState tos_in = ilgl; ++ TosState tos_out = ilgl; ++ switch (bytecode()) { ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_in = itos; break; ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_l2d: tos_in = ltos; break; ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_f2d: tos_in = ftos; break; ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_d2l: // fall through ++ case Bytecodes::_d2f: tos_in = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ switch (bytecode()) { ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_out = itos; break; ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_d2l: tos_out = ltos; break; ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_d2f: tos_out = ftos; break; ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_l2d: // fall through ++ case Bytecodes::_f2d: tos_out = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ transition(tos_in, tos_out); ++ } ++#endif // ASSERT ++ // Conversion ++ switch (bytecode()) { ++ case Bytecodes::_i2l: ++ __ slli_w(FSR, FSR, 0); ++ break; ++ case Bytecodes::_i2f: ++ __ movgr2fr_w(FSF, FSR); ++ __ ffint_s_w(FSF, FSF); ++ break; ++ case Bytecodes::_i2d: ++ __ movgr2fr_w(FSF, FSR); ++ __ ffint_d_w(FSF, FSF); ++ break; ++ case Bytecodes::_i2b: ++ __ ext_w_b(FSR, FSR); ++ break; ++ case Bytecodes::_i2c: ++ __ bstrpick_d(FSR, FSR, 15, 0); // truncate upper 56 bits ++ break; ++ case Bytecodes::_i2s: ++ __ ext_w_h(FSR, FSR); ++ break; ++ case Bytecodes::_l2i: ++ __ slli_w(FSR, FSR, 0); ++ break; ++ case Bytecodes::_l2f: ++ __ movgr2fr_d(FSF, FSR); ++ __ ffint_s_l(FSF, FSF); ++ break; ++ case Bytecodes::_l2d: ++ __ movgr2fr_d(FSF, FSR); ++ __ ffint_d_l(FSF, FSF); ++ break; ++ case Bytecodes::_f2i: ++ __ ftintrz_w_s(fscratch, FSF); ++ __ movfr2gr_s(FSR, fscratch); ++ break; ++ case Bytecodes::_f2l: ++ __ ftintrz_l_s(fscratch, FSF); ++ __ movfr2gr_d(FSR, fscratch); ++ break; ++ case Bytecodes::_f2d: ++ __ fcvt_d_s(FSF, FSF); ++ break; ++ case Bytecodes::_d2i: ++ __ ftintrz_w_d(fscratch, FSF); ++ __ movfr2gr_s(FSR, fscratch); ++ break; ++ case Bytecodes::_d2l: ++ __ ftintrz_l_d(fscratch, FSF); ++ __ movfr2gr_d(FSR, fscratch); ++ break; ++ case Bytecodes::_d2f: ++ __ fcvt_s_d(FSF, FSF); ++ break; ++ default : ++ ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::lcmp() { ++ transition(ltos, itos); ++ ++ __ pop(T0); ++ __ pop(R0); ++ ++ __ slt(AT, T0, FSR); ++ __ slt(FSR, FSR, T0); ++ __ sub_d(FSR, FSR, AT); ++} ++ ++void TemplateTable::float_cmp(bool is_float, int unordered_result) { ++ if (is_float) { ++ __ fld_s(fscratch, at_sp()); ++ __ addi_d(SP, SP, 1 * wordSize); ++ ++ if (unordered_result < 0) { ++ __ fcmp_clt_s(FCC0, FSF, fscratch); ++ __ fcmp_cult_s(FCC1, fscratch, FSF); ++ } else { ++ __ fcmp_cult_s(FCC0, FSF, fscratch); ++ __ fcmp_clt_s(FCC1, fscratch, FSF); ++ } ++ } else { ++ __ fld_d(fscratch, at_sp()); ++ __ addi_d(SP, SP, 2 * wordSize); ++ ++ if (unordered_result < 0) { ++ __ fcmp_clt_d(FCC0, FSF, fscratch); ++ __ fcmp_cult_d(FCC1, fscratch, FSF); ++ } else { ++ __ fcmp_cult_d(FCC0, FSF, fscratch); ++ __ fcmp_clt_d(FCC1, fscratch, FSF); ++ } ++ } ++ ++ __ movcf2gr(FSR, FCC0); ++ __ movcf2gr(AT, FCC1); ++ __ sub_d(FSR, FSR, AT); ++} ++ ++ ++// used registers : T3, A7, Rnext ++// FSR : return bci, this is defined by the vm specification ++// T2 : MDO taken count ++// T3 : method ++// A7 : offset ++// Rnext : next bytecode, this is required by dispatch_base ++void TemplateTable::branch(bool is_jsr, bool is_wide) { ++ __ get_method(T3); ++ __ profile_taken_branch(A7, T2); // only C2 meaningful ++ ++ const ByteSize be_offset = MethodCounters::backedge_counter_offset() + ++ InvocationCounter::counter_offset(); ++ const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset(); ++ ++ // Load up T4 with the branch displacement ++ if (!is_wide) { ++ __ ld_b(A7, BCP, 1); ++ __ ld_bu(AT, BCP, 2); ++ __ slli_d(A7, A7, 8); ++ __ orr(A7, A7, AT); ++ } else { ++ __ get_4_byte_integer_at_bcp(A7, 1); ++ __ swap(A7); ++ } ++ ++ // Handle all the JSR stuff here, then exit. ++ // It's much shorter and cleaner than intermingling with the non-JSR ++ // normal-branch stuff occuring below. ++ if (is_jsr) { ++ // Pre-load the next target bytecode into Rnext ++ __ ldx_bu(Rnext, BCP, A7); ++ ++ // compute return address as bci in FSR ++ __ addi_d(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset())); ++ __ ld_d(AT, T3, in_bytes(Method::const_offset())); ++ __ sub_d(FSR, FSR, AT); ++ // Adjust the bcp in BCP by the displacement in A7 ++ __ add_d(BCP, BCP, A7); ++ // jsr returns atos that is not an oop ++ // Push return address ++ __ push_i(FSR); ++ // jsr returns vtos ++ __ dispatch_only_noverify(vtos); ++ ++ return; ++ } ++ ++ // Normal (non-jsr) branch handling ++ ++ // Adjust the bcp in S0 by the displacement in T4 ++ __ add_d(BCP, BCP, A7); ++ ++ assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters"); ++ Label backedge_counter_overflow; ++ Label profile_method; ++ Label dispatch; ++ if (UseLoopCounter) { ++ // increment backedge counter for backward branches ++ // T3: method ++ // T4: target offset ++ // BCP: target bcp ++ // LVP: locals pointer ++ __ blt(R0, A7, dispatch); // check if forward or backward branch ++ ++ // check if MethodCounters exists ++ Label has_counters; ++ __ ld_d(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP ++ __ bne(AT, R0, has_counters); ++ __ push2(T3, A7); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), ++ T3); ++ __ pop2(T3, A7); ++ __ ld_d(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP ++ __ beq(AT, R0, dispatch); ++ __ bind(has_counters); ++ ++ if (TieredCompilation) { ++ Label no_mdo; ++ int increment = InvocationCounter::count_increment; ++ int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld_d(T0, Address(T3, in_bytes(Method::method_data_offset()))); ++ __ beq(T0, R0, no_mdo); ++ // Increment the MDO backedge counter ++ const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, ++ T1, false, Assembler::zero, &backedge_counter_overflow); ++ __ beq(R0, R0, dispatch); ++ } ++ __ bind(no_mdo); ++ // Increment backedge counter in MethodCounters* ++ __ ld_d(T0, Address(T3, Method::method_counters_offset())); ++ __ increment_mask_and_jump(Address(T0, be_offset), increment, mask, ++ T1, false, Assembler::zero, &backedge_counter_overflow); ++ if (!UseOnStackReplacement) { ++ __ bind(backedge_counter_overflow); ++ } ++ } else { ++ // increment back edge counter ++ __ ld_d(T1, T3, in_bytes(Method::method_counters_offset())); ++ __ ld_w(T0, T1, in_bytes(be_offset)); ++ __ increment(T0, InvocationCounter::count_increment); ++ __ st_w(T0, T1, in_bytes(be_offset)); ++ ++ // load invocation counter ++ __ ld_w(T1, T1, in_bytes(inv_offset)); ++ // buffer bit added, mask no needed ++ ++ // dadd backedge counter & invocation counter ++ __ add_d(T1, T1, T0); ++ ++ if (ProfileInterpreter) { ++ // Test to see if we should create a method data oop ++ // T1 : backedge counter & invocation counter ++ if (Assembler::is_simm(InvocationCounter::InterpreterProfileLimit, 12)) { ++ __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit); ++ __ bne(AT, R0, dispatch); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); ++ __ ld_w(AT, AT, 0); ++ __ blt(T1, AT, dispatch); ++ } ++ ++ // if no method data exists, go to profile method ++ __ test_method_data_pointer(T1, profile_method); ++ ++ if (UseOnStackReplacement) { ++ if (Assembler::is_simm(InvocationCounter::InterpreterBackwardBranchLimit, 12)) { ++ __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit); ++ __ bne(AT, R0, dispatch); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); ++ __ ld_w(AT, AT, 0); ++ __ blt(T2, AT, dispatch); ++ } ++ ++ // When ProfileInterpreter is on, the backedge_count comes ++ // from the methodDataOop, which value does not get reset on ++ // the call to frequency_counter_overflow(). ++ // To avoid excessive calls to the overflow routine while ++ // the method is being compiled, dadd a second test to make ++ // sure the overflow function is called only once every ++ // overflow_frequency. ++ const int overflow_frequency = 1024; ++ __ andi(AT, T2, overflow_frequency-1); ++ __ beq(AT, R0, backedge_counter_overflow); ++ } ++ } else { ++ if (UseOnStackReplacement) { ++ // check for overflow against AT, which is the sum of the counters ++ __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); ++ __ ld_w(AT, AT, 0); ++ __ bge(T1, AT, backedge_counter_overflow); ++ } ++ } ++ } ++ __ bind(dispatch); ++ } ++ ++ // Pre-load the next target bytecode into Rnext ++ __ ld_bu(Rnext, BCP, 0); ++ ++ // continue with the bytecode @ target ++ // FSR: return bci for jsr's, unused otherwise ++ // Rnext: target bytecode ++ // BCP: target bcp ++ __ dispatch_only(vtos); ++ ++ if (UseLoopCounter) { ++ if (ProfileInterpreter) { ++ // Out-of-line code to allocate method data oop. ++ __ bind(profile_method); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); ++ __ ld_bu(Rnext, BCP, 0); ++ __ set_method_data_pointer_for_bcp(); ++ __ b(dispatch); ++ } ++ ++ if (UseOnStackReplacement) { ++ // invocation counter overflow ++ __ bind(backedge_counter_overflow); ++ __ sub_d(A7, BCP, A7); // branch bcp ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), A7); ++ __ ld_bu(Rnext, BCP, 0); ++ ++ // V0: osr nmethod (osr ok) or NULL (osr not possible) ++ // V1: osr adapter frame return address ++ // Rnext: target bytecode ++ // LVP: locals pointer ++ // BCP: bcp ++ __ beq(V0, R0, dispatch); ++ // nmethod may have been invalidated (VM may block upon call_VM return) ++ __ ld_w(T3, V0, nmethod::entry_bci_offset()); ++ __ li(AT, InvalidOSREntryBci); ++ __ beq(AT, T3, dispatch); ++ // We need to prepare to execute the OSR method. First we must ++ // migrate the locals and monitors off of the stack. ++ //V0: osr nmethod (osr ok) or NULL (osr not possible) ++ //V1: osr adapter frame return address ++ //Rnext: target bytecode ++ //LVP: locals pointer ++ //BCP: bcp ++ __ move(BCP, V0); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); ++ ++ // V0 is OSR buffer, move it to expected parameter location ++ // refer to osrBufferPointer in c1_LIRAssembler_loongarch.cpp ++ __ move(T0, V0); ++ ++ // pop the interpreter frame ++ __ ld_d(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); ++ // remove frame anchor ++ __ leave(); ++ __ move(LVP, RA); ++ __ move(SP, A7); ++ ++ __ li(AT, -(StackAlignmentInBytes)); ++ __ andr(SP , SP , AT); ++ ++ // push the (possibly adjusted) return address ++ // refer to osr_entry in c1_LIRAssembler_loongarch.cpp ++ __ ld_d(AT, BCP, nmethod::osr_entry_point_offset()); ++ __ jr(AT); ++ } ++ } ++} ++ ++ ++void TemplateTable::if_0cmp(Condition cc) { ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ switch(cc) { ++ case not_equal: ++ __ beq(FSR, R0, not_taken); ++ break; ++ case equal: ++ __ bne(FSR, R0, not_taken); ++ break; ++ case less: ++ __ bge(FSR, R0, not_taken); ++ break; ++ case less_equal: ++ __ blt(R0, FSR, not_taken); ++ break; ++ case greater: ++ __ bge(R0, FSR, not_taken); ++ break; ++ case greater_equal: ++ __ blt(FSR, R0, not_taken); ++ break; ++ } ++ ++ branch(false, false); ++ ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++void TemplateTable::if_icmp(Condition cc) { ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ ++ __ pop_i(SSR); ++ switch(cc) { ++ case not_equal: ++ __ beq(SSR, FSR, not_taken); ++ break; ++ case equal: ++ __ bne(SSR, FSR, not_taken); ++ break; ++ case less: ++ __ bge(SSR, FSR, not_taken); ++ break; ++ case less_equal: ++ __ blt(FSR, SSR, not_taken); ++ break; ++ case greater: ++ __ bge(FSR, SSR, not_taken); ++ break; ++ case greater_equal: ++ __ blt(SSR, FSR, not_taken); ++ break; ++ } ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++void TemplateTable::if_nullcmp(Condition cc) { ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ switch(cc) { ++ case not_equal: ++ __ beq(FSR, R0, not_taken); ++ break; ++ case equal: ++ __ bne(FSR, R0, not_taken); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++ ++void TemplateTable::if_acmp(Condition cc) { ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ // __ ld_w(SSR, SP, 0); ++ __ pop_ptr(SSR); ++ switch(cc) { ++ case not_equal: ++ __ beq(SSR, FSR, not_taken); ++ break; ++ case equal: ++ __ bne(SSR, FSR, not_taken); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ branch(false, false); ++ ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : method ++// T2 : returb bci ++void TemplateTable::ret() { ++ transition(vtos, vtos); ++ ++ locals_index(T2); ++ __ ld_d(T2, T2, 0); ++ __ profile_ret(T2, T3); ++ ++ __ get_method(T1); ++ __ ld_d(BCP, T1, in_bytes(Method::const_offset())); ++ __ add_d(BCP, BCP, T2); ++ __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); ++ ++ __ dispatch_next(vtos); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : method ++// T2 : returb bci ++void TemplateTable::wide_ret() { ++ transition(vtos, vtos); ++ ++ locals_index_wide(T2); ++ __ ld_d(T2, T2, 0); // get return bci, compute return bcp ++ __ profile_ret(T2, T3); ++ ++ __ get_method(T1); ++ __ ld_d(BCP, T1, in_bytes(Method::const_offset())); ++ __ add_d(BCP, BCP, T2); ++ __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); ++ ++ __ dispatch_next(vtos); ++} ++ ++// used register T2, T3, A7, Rnext ++// T2 : bytecode pointer ++// T3 : low ++// A7 : high ++// Rnext : dest bytecode, required by dispatch_base ++void TemplateTable::tableswitch() { ++ Label default_case, continue_execution; ++ transition(itos, vtos); ++ ++ // align BCP ++ __ addi_d(T2, BCP, BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(T2, T2, AT); ++ ++ // load lo & hi ++ __ ld_w(T3, T2, 1 * BytesPerInt); ++ __ swap(T3); ++ __ ld_w(A7, T2, 2 * BytesPerInt); ++ __ swap(A7); ++ ++ // check against lo & hi ++ __ blt(FSR, T3, default_case); ++ __ blt(A7, FSR, default_case); ++ ++ // lookup dispatch offset, in A7 big endian ++ __ sub_d(FSR, FSR, T3); ++ __ alsl_d(AT, FSR, T2, Address::times_4 - 1); ++ __ ld_w(A7, AT, 3 * BytesPerInt); ++ __ profile_switch_case(FSR, T4, T3); ++ ++ __ bind(continue_execution); ++ __ swap(A7); ++ __ add_d(BCP, BCP, A7); ++ __ ld_bu(Rnext, BCP, 0); ++ __ dispatch_only(vtos); ++ ++ // handle default ++ __ bind(default_case); ++ __ profile_switch_default(FSR); ++ __ ld_w(A7, T2, 0); ++ __ b(continue_execution); ++} ++ ++void TemplateTable::lookupswitch() { ++ transition(itos, itos); ++ __ stop("lookupswitch bytecode should have been rewritten"); ++} ++ ++// used registers : T2, T3, A7, Rnext ++// T2 : bytecode pointer ++// T3 : pair index ++// A7 : offset ++// Rnext : dest bytecode ++// the data after the opcode is the same as lookupswitch ++// see Rewriter::rewrite_method for more information ++void TemplateTable::fast_linearswitch() { ++ transition(itos, vtos); ++ Label loop_entry, loop, found, continue_execution; ++ ++ // swap FSR so we can avoid swapping the table entries ++ __ swap(FSR); ++ ++ // align BCP ++ __ addi_d(T2, BCP, BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(T2, T2, AT); ++ ++ // set counter ++ __ ld_w(T3, T2, BytesPerInt); ++ __ swap(T3); ++ __ b(loop_entry); ++ ++ // table search ++ __ bind(loop); ++ // get the entry value ++ __ alsl_d(AT, T3, T2, Address::times_8 - 1); ++ __ ld_w(AT, AT, 2 * BytesPerInt); ++ ++ // found? ++ __ beq(FSR, AT, found); ++ ++ __ bind(loop_entry); ++ Label L1; ++ __ bge(R0, T3, L1); ++ __ addi_d(T3, T3, -1); ++ __ b(loop); ++ __ bind(L1); ++ __ addi_d(T3, T3, -1); ++ ++ // default case ++ __ profile_switch_default(FSR); ++ __ ld_w(A7, T2, 0); ++ __ b(continue_execution); ++ ++ // entry found -> get offset ++ __ bind(found); ++ __ alsl_d(AT, T3, T2, Address::times_8 - 1); ++ __ ld_w(A7, AT, 3 * BytesPerInt); ++ __ profile_switch_case(T3, FSR, T2); ++ ++ // continue execution ++ __ bind(continue_execution); ++ __ swap(A7); ++ __ add_d(BCP, BCP, A7); ++ __ ld_bu(Rnext, BCP, 0); ++ __ dispatch_only(vtos); ++} ++ ++// used registers : T0, T1, T2, T3, A7, Rnext ++// T2 : pairs address(array) ++// Rnext : dest bytecode ++// the data after the opcode is the same as lookupswitch ++// see Rewriter::rewrite_method for more information ++void TemplateTable::fast_binaryswitch() { ++ transition(itos, vtos); ++ // Implementation using the following core algorithm: ++ // ++ // int binary_search(int key, LookupswitchPair* array, int n) { ++ // // Binary search according to "Methodik des Programmierens" by ++ // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. ++ // int i = 0; ++ // int j = n; ++ // while (i+1 < j) { ++ // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) ++ // // with Q: for all i: 0 <= i < n: key < a[i] ++ // // where a stands for the array and assuming that the (inexisting) ++ // // element a[n] is infinitely big. ++ // int h = (i + j) >> 1; ++ // // i < h < j ++ // if (key < array[h].fast_match()) { ++ // j = h; ++ // } else { ++ // i = h; ++ // } ++ // } ++ // // R: a[i] <= key < a[i+1] or Q ++ // // (i.e., if key is within array, i is the correct index) ++ // return i; ++ // } ++ ++ // register allocation ++ const Register array = T2; ++ const Register i = T3, j = A7; ++ const Register h = T1; ++ const Register temp = T0; ++ const Register key = FSR; ++ ++ // setup array ++ __ addi_d(array, BCP, 3*BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(array, array, AT); ++ ++ // initialize i & j ++ __ move(i, R0); ++ __ ld_w(j, array, - 1 * BytesPerInt); ++ // Convert j into native byteordering ++ __ swap(j); ++ ++ // and start ++ Label entry; ++ __ b(entry); ++ ++ // binary search loop ++ { ++ Label loop; ++ __ bind(loop); ++ // int h = (i + j) >> 1; ++ __ add_d(h, i, j); ++ __ srli_d(h, h, 1); ++ // if (key < array[h].fast_match()) { ++ // j = h; ++ // } else { ++ // i = h; ++ // } ++ // Convert array[h].match to native byte-ordering before compare ++ __ alsl_d(AT, h, array, Address::times_8 - 1); ++ __ ld_w(temp, AT, 0 * BytesPerInt); ++ __ swap(temp); ++ ++ __ slt(AT, key, temp); ++ __ maskeqz(i, i, AT); ++ __ masknez(temp, h, AT); ++ __ OR(i, i, temp); ++ __ masknez(j, j, AT); ++ __ maskeqz(temp, h, AT); ++ __ OR(j, j, temp); ++ ++ // while (i+1 < j) ++ __ bind(entry); ++ __ addi_d(h, i, 1); ++ __ blt(h, j, loop); ++ } ++ ++ // end of binary search, result index is i (must check again!) ++ Label default_case; ++ // Convert array[i].match to native byte-ordering before compare ++ __ alsl_d(AT, i, array, Address::times_8 - 1); ++ __ ld_w(temp, AT, 0 * BytesPerInt); ++ __ swap(temp); ++ __ bne(key, temp, default_case); ++ ++ // entry found -> j = offset ++ __ alsl_d(AT, i, array, Address::times_8 - 1); ++ __ ld_w(j, AT, 1 * BytesPerInt); ++ __ profile_switch_case(i, key, array); ++ __ swap(j); ++ ++ __ add_d(BCP, BCP, j); ++ __ ld_bu(Rnext, BCP, 0); ++ __ dispatch_only(vtos); ++ ++ // default case -> j = default offset ++ __ bind(default_case); ++ __ profile_switch_default(i); ++ __ ld_w(j, array, - 2 * BytesPerInt); ++ __ swap(j); ++ __ add_d(BCP, BCP, j); ++ __ ld_bu(Rnext, BCP, 0); ++ __ dispatch_only(vtos); ++} ++ ++void TemplateTable::_return(TosState state) { ++ transition(state, state); ++ assert(_desc->calls_vm(), ++ "inconsistent calls_vm information"); // call in remove_activation ++ ++ if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { ++ assert(state == vtos, "only valid state"); ++ __ ld_d(T1, aaddress(0)); ++ __ load_klass(LVP, T1); ++ __ ld_w(LVP, LVP, in_bytes(Klass::access_flags_offset())); ++ __ li(AT, JVM_ACC_HAS_FINALIZER); ++ __ andr(AT, AT, LVP); ++ Label skip_register_finalizer; ++ __ beq(AT, R0, skip_register_finalizer); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::register_finalizer), T1); ++ __ bind(skip_register_finalizer); ++ } ++ ++ // Narrow result if state is itos but result type is smaller. ++ // Need to narrow in the return bytecode rather than in generate_return_entry ++ // since compiled code callers expect the result to already be narrowed. ++ if (state == itos) { ++ __ narrow(FSR); ++ } ++ ++ __ remove_activation(state, T4); ++ __ membar(__ StoreStore); ++ ++ __ jr(T4); ++} ++ ++// we dont shift left 2 bits in get_cache_and_index_at_bcp ++// for we always need shift the index we use it. the ConstantPoolCacheEntry ++// is 16-byte long, index is the index in ++// ConstantPoolCache, so cache + base_offset() + index * 16 is ++// the corresponding ConstantPoolCacheEntry ++// used registers : T2 ++// NOTE : the returned index need also shift left 4 to get the address! ++void TemplateTable::resolve_cache_and_index(int byte_no, ++ Register Rcache, ++ Register index, ++ size_t index_size) { ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ const Register temp = A1; ++ assert_different_registers(Rcache, index); ++ ++ Label resolved; ++ __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); ++ // is resolved? ++ int i = (int)bytecode(); ++ __ addi_d(temp, temp, -i); ++ __ beq(temp, R0, resolved); ++ // resolve first time through ++ address entry; ++ switch (bytecode()) { ++ case Bytecodes::_getstatic : // fall through ++ case Bytecodes::_putstatic : // fall through ++ case Bytecodes::_getfield : // fall through ++ case Bytecodes::_putfield : ++ entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put); ++ break; ++ case Bytecodes::_invokevirtual : // fall through ++ case Bytecodes::_invokespecial : // fall through ++ case Bytecodes::_invokestatic : // fall through ++ case Bytecodes::_invokeinterface: ++ entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke); ++ break; ++ case Bytecodes::_invokehandle: ++ entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle); ++ break; ++ case Bytecodes::_invokedynamic: ++ entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic); ++ break; ++ default : ++ fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode()))); ++ break; ++ } ++ ++ __ li(temp, i); ++ __ call_VM(NOREG, entry, temp); ++ ++ // Update registers with resolved info ++ __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); ++ __ bind(resolved); ++} ++ ++// The Rcache and index registers must be set before call ++void TemplateTable::load_field_cp_cache_entry(Register obj, ++ Register cache, ++ Register index, ++ Register off, ++ Register flags, ++ bool is_static = false) { ++ assert_different_registers(cache, index, flags, off); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ // Field offset ++ __ alsl_d(AT, index, cache, Address::times_ptr - 1); ++ __ ld_d(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset())); ++ // Flags ++ __ ld_d(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())); ++ ++ // klass overwrite register ++ if (is_static) { ++ __ ld_d(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())); ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ __ ld_d(obj, Address(obj, mirror_offset)); ++ ++ __ verify_oop(obj); ++ } ++} ++ ++// get the method, itable_index and flags of the current invoke ++void TemplateTable::load_invoke_cp_cache_entry(int byte_no, ++ Register method, ++ Register itable_index, ++ Register flags, ++ bool is_invokevirtual, ++ bool is_invokevfinal, /*unused*/ ++ bool is_invokedynamic) { ++ // setup registers ++ const Register cache = T3; ++ const Register index = T1; ++ assert_different_registers(method, flags); ++ assert_different_registers(method, cache, index); ++ assert_different_registers(itable_index, flags); ++ assert_different_registers(itable_index, cache, index); ++ assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant"); ++ // determine constant pool cache field offsets ++ const int method_offset = in_bytes( ++ ConstantPoolCache::base_offset() + ++ ((byte_no == f2_byte) ++ ? ConstantPoolCacheEntry::f2_offset() ++ : ConstantPoolCacheEntry::f1_offset())); ++ const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::flags_offset()); ++ // access constant pool cache fields ++ const int index_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::f2_offset()); ++ ++ size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2)); ++ resolve_cache_and_index(byte_no, cache, index, index_size); ++ ++ __ alsl_d(AT, index, cache, Address::times_ptr - 1); ++ __ ld_d(method, AT, method_offset); ++ ++ if (itable_index != NOREG) { ++ __ ld_d(itable_index, AT, index_offset); ++ } ++ __ ld_d(flags, AT, flags_offset); ++} ++ ++// The registers cache and index expected to be set before call. ++// Correct values of the cache and index registers are preserved. ++void TemplateTable::jvmti_post_field_access(Register cache, Register index, ++ bool is_static, bool has_tos) { ++ // do the JVMTI work here to avoid disturbing the register state below ++ // We use c_rarg registers here because we want to use the register used in ++ // the call to the VM ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we ++ // take the time to call into the VM. ++ Label L1; ++ // kill FSR ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ assert_different_registers(cache, index, AT); ++ __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); ++ __ ld_w(AT, AT, 0); ++ __ beq(AT, R0, L1); ++ ++ __ get_cache_and_index_at_bcp(tmp2, tmp3, 1); ++ ++ // cache entry pointer ++ __ addi_d(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset())); ++ __ shl(tmp3, LogBytesPerWord); ++ __ add_d(tmp2, tmp2, tmp3); ++ if (is_static) { ++ __ move(tmp1, R0); ++ } else { ++ __ ld_d(tmp1, SP, 0); ++ __ verify_oop(tmp1); ++ } ++ // tmp1: object pointer or NULL ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_access), ++ tmp1, tmp2, tmp3); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++void TemplateTable::pop_and_check_object(Register r) { ++ __ pop_ptr(r); ++ __ null_check(r); // for field access must check obj. ++ __ verify_oop(r); ++} ++ ++// used registers : T1, T2, T3, T1 ++// T1 : flags ++// T2 : off ++// T3 : obj ++// T1 : field address ++// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the ++// following mapping to the TosState states: ++// btos: 0 ++// ctos: 1 ++// stos: 2 ++// itos: 3 ++// ltos: 4 ++// ftos: 5 ++// dtos: 6 ++// atos: 7 ++// vtos: 8 ++// see ConstantPoolCacheEntry::set_field for more info ++void TemplateTable::getfield_or_static(int byte_no, bool is_static) { ++ transition(vtos, vtos); ++ ++ const Register cache = T3; ++ const Register index = T0; ++ ++ const Register obj = T3; ++ const Register off = T2; ++ const Register flags = T1; ++ ++ const Register scratch = T8; ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_access(cache, index, is_static, false); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); ++ ++ { ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, flags); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(MacroAssembler::AnyAny); ++ __ bind(notVolatile); ++ } ++ ++ if (!is_static) pop_and_check_object(obj); ++ __ add_d(index, obj, off); ++ ++ ++ Label Done, notByte, notBool, notInt, notShort, notChar, ++ notLong, notFloat, notObj, notDouble; ++ ++ assert(btos == 0, "change code, btos != 0"); ++ __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); ++ __ bne(flags, R0, notByte); ++ ++ // btos ++ __ ld_b(FSR, index, 0); ++ __ push(btos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notByte); ++ __ li(AT, ztos); ++ __ bne(flags, AT, notBool); ++ ++ // ztos ++ __ ld_b(FSR, index, 0); ++ __ push(ztos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notBool); ++ __ li(AT, itos); ++ __ bne(flags, AT, notInt); ++ ++ // itos ++ __ ld_w(FSR, index, 0); ++ __ push(itos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_igetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notInt); ++ __ li(AT, atos); ++ __ bne(flags, AT, notObj); ++ ++ // atos ++ //add for compressedoops ++ __ load_heap_oop(FSR, Address(index, 0)); ++ __ push(atos); ++ ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_agetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notObj); ++ __ li(AT, ctos); ++ __ bne(flags, AT, notChar); ++ ++ // ctos ++ __ ld_hu(FSR, index, 0); ++ __ push(ctos); ++ ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notChar); ++ __ li(AT, stos); ++ __ bne(flags, AT, notShort); ++ ++ // stos ++ __ ld_h(FSR, index, 0); ++ __ push(stos); ++ ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notShort); ++ __ li(AT, ltos); ++ __ bne(flags, AT, notLong); ++ ++ // ltos ++ __ ld_d(FSR, index, 0 * wordSize); ++ __ push(ltos); ++ ++ // Don't rewrite to _fast_lgetfield for potential volatile case. ++ __ b(Done); ++ ++ __ bind(notLong); ++ __ li(AT, ftos); ++ __ bne(flags, AT, notFloat); ++ ++ // ftos ++ __ fld_s(FSF, index, 0); ++ __ push(ftos); ++ ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2); ++ } ++ __ b(Done); ++ ++ __ bind(notFloat); ++ __ li(AT, dtos); ++#ifdef ASSERT ++ __ bne(flags, AT, notDouble); ++#endif ++ ++ // dtos ++ __ fld_d(FSF, index, 0 * wordSize); ++ __ push(dtos); ++ ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2); ++ } ++ ++#ifdef ASSERT ++ __ b(Done); ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++ ++void TemplateTable::getfield(int byte_no) { ++ getfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::getstatic(int byte_no) { ++ getfield_or_static(byte_no, true); ++} ++ ++// The registers cache and index expected to be set before call. ++// The function may destroy various registers, just not the cache and index registers. ++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { ++ transition(vtos, vtos); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L1; ++ //kill AT, T1, T2, T3, T4 ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T4; ++ assert_different_registers(cache, index, tmp4); ++ ++ __ li(AT, JvmtiExport::get_field_modification_count_addr()); ++ __ ld_w(AT, AT, 0); ++ __ beq(AT, R0, L1); ++ ++ __ get_cache_and_index_at_bcp(tmp2, tmp4, 1); ++ ++ if (is_static) { ++ __ move(tmp1, R0); ++ } else { ++ // Life is harder. The stack holds the value on top, followed by ++ // the object. We don't know the size of the value, though; it ++ // could be one or two words depending on its type. As a result, ++ // we must find the type to determine where the object is. ++ Label two_word, valsize_known; ++ __ alsl_d(AT, tmp4, tmp2, Address::times_8 - 1); ++ __ ld_d(tmp3, AT, in_bytes(cp_base_offset + ++ ConstantPoolCacheEntry::flags_offset())); ++ __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift); ++ ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ __ move(tmp1, SP); ++ __ li(AT, ltos); ++ __ beq(tmp3, AT, two_word); ++ __ li(AT, dtos); ++ __ beq(tmp3, AT, two_word); ++ __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) ); ++ __ b(valsize_known); ++ ++ __ bind(two_word); ++ __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2)); ++ ++ __ bind(valsize_known); ++ // setup object pointer ++ __ ld_d(tmp1, tmp1, 0 * wordSize); ++ } ++ // cache entry pointer ++ __ addi_d(tmp2, tmp2, in_bytes(cp_base_offset)); ++ __ shl(tmp4, LogBytesPerWord); ++ __ add_d(tmp2, tmp2, tmp4); ++ // object (tos) ++ __ move(tmp3, SP); ++ // tmp1: object pointer set up above (NULL if static) ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ tmp1, tmp2, tmp3); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++// used registers : T0, T1, T2, T3, T8 ++// T1 : flags ++// T2 : off ++// T3 : obj ++// T8 : volatile bit ++// see ConstantPoolCacheEntry::set_field for more info ++void TemplateTable::putfield_or_static(int byte_no, bool is_static) { ++ transition(vtos, vtos); ++ ++ const Register cache = T3; ++ const Register index = T0; ++ const Register obj = T3; ++ const Register off = T2; ++ const Register flags = T1; ++ const Register bc = T3; ++ ++ const Register scratch = T8; ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_mod(cache, index, is_static); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); ++ ++ Label Done; ++ { ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, flags); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++ ++ Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; ++ ++ assert(btos == 0, "change code, btos != 0"); ++ ++ // btos ++ __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); ++ __ bne(flags, R0, notByte); ++ ++ __ pop(btos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(AT, obj, off); ++ __ st_b(FSR, AT, 0); ++ ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // ztos ++ __ bind(notByte); ++ __ li(AT, ztos); ++ __ bne(flags, AT, notBool); ++ ++ __ pop(ztos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(AT, obj, off); ++ __ andi(FSR, FSR, 0x1); ++ __ st_b(FSR, AT, 0); ++ ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // itos ++ __ bind(notBool); ++ __ li(AT, itos); ++ __ bne(flags, AT, notInt); ++ ++ __ pop(itos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(AT, obj, off); ++ __ st_w(FSR, AT, 0); ++ ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // atos ++ __ bind(notInt); ++ __ li(AT, atos); ++ __ bne(flags, AT, notObj); ++ ++ __ pop(atos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ ++ do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR, _bs->kind(), false); ++ ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // ctos ++ __ bind(notObj); ++ __ li(AT, ctos); ++ __ bne(flags, AT, notChar); ++ ++ __ pop(ctos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(AT, obj, off); ++ __ st_h(FSR, AT, 0); ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // stos ++ __ bind(notChar); ++ __ li(AT, stos); ++ __ bne(flags, AT, notShort); ++ ++ __ pop(stos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(AT, obj, off); ++ __ st_h(FSR, AT, 0); ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // ltos ++ __ bind(notShort); ++ __ li(AT, ltos); ++ __ bne(flags, AT, notLong); ++ ++ __ pop(ltos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(AT, obj, off); ++ __ st_d(FSR, AT, 0); ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ // ftos ++ __ bind(notLong); ++ __ li(AT, ftos); ++ __ bne(flags, AT, notFloat); ++ ++ __ pop(ftos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(AT, obj, off); ++ __ fst_s(FSF, AT, 0); ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ ++ ++ // dtos ++ __ bind(notFloat); ++ __ li(AT, dtos); ++#ifdef ASSERT ++ __ bne(flags, AT, notDouble); ++#endif ++ ++ __ pop(dtos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add_d(AT, obj, off); ++ __ fst_d(FSF, AT, 0); ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no); ++ } ++ ++#ifdef ASSERT ++ __ b(Done); ++ ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++void TemplateTable::putfield(int byte_no) { ++ putfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::putstatic(int byte_no) { ++ putfield_or_static(byte_no, true); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : cp_entry ++// T2 : obj ++// T3 : value pointer ++void TemplateTable::jvmti_post_fast_field_mod() { ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L2; ++ //kill AT, T1, T2, T3, T4 ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T4; ++ __ li(AT, JvmtiExport::get_field_modification_count_addr()); ++ __ ld_w(tmp3, AT, 0); ++ __ beq(tmp3, R0, L2); ++ __ pop_ptr(tmp1); ++ __ verify_oop(tmp1); ++ __ push_ptr(tmp1); ++ switch (bytecode()) { // load values into the jvalue object ++ case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ push_i(FSR); break; ++ case Bytecodes::_fast_dputfield: __ push_d(FSF); break; ++ case Bytecodes::_fast_fputfield: __ push_f(); break; ++ case Bytecodes::_fast_lputfield: __ push_l(FSR); break; ++ default: ShouldNotReachHere(); ++ } ++ __ move(tmp3, SP); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1); ++ __ verify_oop(tmp1); ++ // tmp1: object pointer copied above ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ tmp1, tmp2, tmp3); ++ ++ switch (bytecode()) { // restore tos values ++ case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ pop_i(FSR); break; ++ case Bytecodes::_fast_dputfield: __ pop_d(); break; ++ case Bytecodes::_fast_fputfield: __ pop_f(); break; ++ case Bytecodes::_fast_lputfield: __ pop_l(FSR); break; ++ } ++ __ bind(L2); ++ } ++} ++ ++// used registers : T2, T3, T1 ++// T2 : index & off & field address ++// T3 : cache & obj ++// T1 : flags ++void TemplateTable::fast_storefield(TosState state) { ++ transition(state, vtos); ++ ++ const Register scratch = T8; ++ ++ ByteSize base = ConstantPoolCache::base_offset(); ++ ++ jvmti_post_fast_field_mod(); ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ membar(__ LoadLoad); ++ ++ // test for volatile with T1 ++ __ alsl_d(AT, T2, T3, Address::times_8 - 1); ++ __ ld_d(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset())); ++ ++ // replace index with field offset from cache entry ++ __ ld_d(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset())); ++ ++ Label Done; ++ { ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, T1); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++ ++ // Get object from stack ++ pop_and_check_object(T3); ++ ++ if (bytecode() != Bytecodes::_fast_aputfield) { ++ // field address ++ __ add_d(T2, T3, T2); ++ } ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_zputfield: ++ __ andi(FSR, FSR, 0x1); // boolean is true if LSB is 1 ++ // fall through to bputfield ++ case Bytecodes::_fast_bputfield: ++ __ st_b(FSR, T2, 0); ++ break; ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: ++ __ st_h(FSR, T2, 0); ++ break; ++ case Bytecodes::_fast_iputfield: ++ __ st_w(FSR, T2, 0); ++ break; ++ case Bytecodes::_fast_lputfield: ++ __ st_d(FSR, T2, 0 * wordSize); ++ break; ++ case Bytecodes::_fast_fputfield: ++ __ fst_s(FSF, T2, 0); ++ break; ++ case Bytecodes::_fast_dputfield: ++ __ fst_d(FSF, T2, 0 * wordSize); ++ break; ++ case Bytecodes::_fast_aputfield: ++ do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR, _bs->kind(), false); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++// used registers : T2, T3, T1 ++// T3 : cp_entry & cache ++// T2 : index & offset ++void TemplateTable::fast_accessfield(TosState state) { ++ transition(atos, state); ++ ++ const Register scratch = T8; ++ ++ // do the JVMTI work here to avoid disturbing the register state below ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we take ++ // the time to call into the VM. ++ Label L1; ++ __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); ++ __ ld_w(T3, AT, 0); ++ __ beq(T3, R0, L1); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(T3, T1, 1); ++ __ move(TSR, FSR); ++ __ verify_oop(FSR); ++ // FSR: object pointer copied above ++ // T3: cache entry pointer ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), ++ FSR, T3); ++ __ move(FSR, TSR); ++ __ bind(L1); ++ } ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ membar(__ LoadLoad); ++ ++ // replace index with field offset from cache entry ++ __ alsl_d(AT, T2, T3, Address::times_8 - 1); ++ __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ ++ { ++ __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, AT); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(MacroAssembler::AnyAny); ++ __ bind(notVolatile); ++ } ++ ++ // FSR: object ++ __ verify_oop(FSR); ++ __ null_check(FSR); ++ // field addresses ++ __ add_d(FSR, FSR, T2); ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_bgetfield: ++ __ ld_b(FSR, FSR, 0); ++ break; ++ case Bytecodes::_fast_sgetfield: ++ __ ld_h(FSR, FSR, 0); ++ break; ++ case Bytecodes::_fast_cgetfield: ++ __ ld_hu(FSR, FSR, 0); ++ break; ++ case Bytecodes::_fast_igetfield: ++ __ ld_w(FSR, FSR, 0); ++ break; ++ case Bytecodes::_fast_lgetfield: ++ __ stop("should not be rewritten"); ++ break; ++ case Bytecodes::_fast_fgetfield: ++ __ fld_s(FSF, FSR, 0); ++ break; ++ case Bytecodes::_fast_dgetfield: ++ __ fld_d(FSF, FSR, 0); ++ break; ++ case Bytecodes::_fast_agetfield: ++ __ load_heap_oop(FSR, Address(FSR, 0)); ++ __ verify_oop(FSR); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0 ++// used registers : T1, T2, T3, T1 ++// T1 : obj & field address ++// T2 : off ++// T3 : cache ++// T1 : index ++void TemplateTable::fast_xaccess(TosState state) { ++ transition(vtos, state); ++ ++ const Register scratch = T8; ++ ++ // get receiver ++ __ ld_d(T1, aaddress(0)); ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 2); ++ __ alsl_d(AT, T2, T3, Address::times_8 - 1); ++ __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ ++ { ++ __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, AT); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(MacroAssembler::AnyAny); ++ __ bind(notVolatile); ++ } ++ ++ // make sure exception is reported in correct bcp range (getfield is ++ // next instruction) ++ __ addi_d(BCP, BCP, 1); ++ __ null_check(T1); ++ __ add_d(T1, T1, T2); ++ ++ if (state == itos) { ++ __ ld_w(FSR, T1, 0); ++ } else if (state == atos) { ++ __ load_heap_oop(FSR, Address(T1, 0)); ++ __ verify_oop(FSR); ++ } else if (state == ftos) { ++ __ fld_s(FSF, T1, 0); ++ } else { ++ ShouldNotReachHere(); ++ } ++ __ addi_d(BCP, BCP, -1); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); ++ __ bind(notVolatile); ++ } ++} ++ ++ ++ ++//----------------------------------------------------------------------------- ++// Calls ++ ++void TemplateTable::count_calls(Register method, Register temp) { ++ // implemented elsewhere ++ ShouldNotReachHere(); ++} ++ ++// method, index, recv, flags: T1, T2, T3, T1 ++// byte_no = 2 for _invokevirtual, 1 else ++// T0 : return address ++// get the method & index of the invoke, and push the return address of ++// the invoke(first word in the frame) ++// this address is where the return code jmp to. ++// NOTE : this method will set T3&T1 as recv&flags ++void TemplateTable::prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index, // itable index, MethodType, etc. ++ Register recv, // if caller wants to see it ++ Register flags // if caller wants to test it ++ ) { ++ ++ ++ // determine flags ++ const Bytecodes::Code code = bytecode(); ++ const bool is_invokeinterface = code == Bytecodes::_invokeinterface; ++ const bool is_invokedynamic = code == Bytecodes::_invokedynamic; ++ const bool is_invokehandle = code == Bytecodes::_invokehandle; ++ const bool is_invokevirtual = code == Bytecodes::_invokevirtual; ++ const bool is_invokespecial = code == Bytecodes::_invokespecial; ++ const bool load_receiver = (recv != noreg); ++ const bool save_flags = (flags != noreg); ++ assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),""); ++ assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); ++ assert(flags == noreg || flags == T1, "error flags reg."); ++ assert(recv == noreg || recv == T3, "error recv reg."); ++ ++ // setup registers & access constant pool cache ++ if(recv == noreg) recv = T3; ++ if(flags == noreg) flags = T1; ++ assert_different_registers(method, index, recv, flags); ++ ++ // save 'interpreter return address' ++ __ save_bcp(); ++ ++ load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); ++ ++ if (is_invokedynamic || is_invokehandle) { ++ Label L_no_push; ++ __ li(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift)); ++ __ andr(AT, AT, flags); ++ __ beq(AT, R0, L_no_push); ++ // Push the appendix as a trailing parameter. ++ // This must be done before we get the receiver, ++ // since the parameter_size includes it. ++ Register tmp = SSR; ++ __ push(tmp); ++ __ move(tmp, index); ++ assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); ++ __ load_resolved_reference_at_index(index, tmp); ++ __ pop(tmp); ++ __ push(index); // push appendix (MethodType, CallSite, etc.) ++ __ bind(L_no_push); ++ } ++ ++ // load receiver if needed (after appendix is pushed so parameter size is correct) ++ // Note: no return address pushed yet ++ if (load_receiver) { ++ __ li(AT, ConstantPoolCacheEntry::parameter_size_mask); ++ __ andr(recv, flags, AT); ++ // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0. ++ const int no_return_pc_pushed_yet = 0; // argument slot correction before we push return address ++ const int receiver_is_at_end = -1; // back off one slot to get receiver ++ Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end); ++ __ ld_d(recv, recv_addr); ++ __ verify_oop(recv); ++ } ++ if(save_flags) { ++ __ move(BCP, flags); ++ } ++ ++ // compute return type ++ __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, 0xf); ++ ++ // Make sure we don't need to mask flags for tos_state_shift after the above shift ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ // load return address ++ { ++ const address table = (address) Interpreter::invoke_return_entry_table_for(code); ++ __ li(AT, (long)table); ++ __ slli_d(flags, flags, LogBytesPerWord); ++ __ add_d(AT, AT, flags); ++ __ ld_d(RA, AT, 0); ++ } ++ ++ if (save_flags) { ++ __ move(flags, BCP); ++ __ restore_bcp(); ++ } ++} ++ ++// used registers : T0, T3, T1, T2 ++// T3 : recv, this two register using convention is by prepare_invoke ++// T1 : flags, klass ++// Rmethod : method, index must be Rmethod ++void TemplateTable::invokevirtual_helper(Register index, ++ Register recv, ++ Register flags) { ++ ++ assert_different_registers(index, recv, flags, T2); ++ ++ // Test for an invoke of a final method ++ Label notFinal; ++ __ li(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); ++ __ andr(AT, flags, AT); ++ __ beq(AT, R0, notFinal); ++ ++ Register method = index; // method must be Rmethod ++ assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention"); ++ ++ // do the call - the index is actually the method to call ++ // the index is indeed methodOop, for this is vfinal, ++ // see ConstantPoolCacheEntry::set_method for more info ++ ++ __ verify_oop(method); ++ ++ // It's final, need a null check here! ++ __ null_check(recv); ++ ++ // profile this call ++ __ profile_final_call(T2); ++ ++ // T2: tmp, used for mdp ++ // method: callee ++ // T4: tmp ++ // is_virtual: true ++ __ profile_arguments_type(T2, method, T4, true); ++ ++ __ jump_from_interpreted(method, T2); ++ ++ __ bind(notFinal); ++ ++ // get receiver klass ++ __ null_check(recv, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(T2, recv); ++ __ verify_oop(T2); ++ ++ // profile this call ++ __ profile_virtual_call(T2, T0, T1); ++ ++ // get target methodOop & entry point ++ const int base = InstanceKlass::vtable_start_offset() * wordSize; ++ assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); ++ // T2: receiver ++ __ alsl_d(AT, index, T2, Address::times_ptr - 1); ++ //this is a ualign read ++ __ ld_d(method, AT, base + vtableEntry::method_offset_in_bytes()); ++ __ profile_arguments_type(T2, method, T4, true); ++ __ jump_from_interpreted(method, T2); ++} ++ ++void TemplateTable::invokevirtual(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG, T3, T1); ++ // now recv & flags in T3, T1 ++ invokevirtual_helper(Rmethod, T3, T1); ++} ++ ++// T4 : entry ++// Rmethod : method ++void TemplateTable::invokespecial(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG, T3); ++ // now recv & flags in T3, T1 ++ __ verify_oop(T3); ++ __ null_check(T3); ++ __ profile_call(T4); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T4: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T4, false); ++ ++ __ jump_from_interpreted(Rmethod, T4); ++ __ move(T0, T3); ++} ++ ++void TemplateTable::invokestatic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG); ++ __ verify_oop(Rmethod); ++ ++ __ profile_call(T4); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T4: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T4, false); ++ ++ __ jump_from_interpreted(Rmethod, T4); ++} ++ ++// i have no idea what to do here, now. for future change. FIXME. ++void TemplateTable::fast_invokevfinal(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); ++ __ stop("fast_invokevfinal not used on LoongArch64"); ++} ++ ++// used registers : T0, T1, T2, T3, T1, A7 ++// T0 : itable, vtable, entry ++// T1 : interface ++// T3 : receiver ++// T1 : flags, klass ++// Rmethod : index, method, this is required by interpreter_entry ++void TemplateTable::invokeinterface(int byte_no) { ++ transition(vtos, vtos); ++ //this method will use T1-T4 and T0 ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, T2, Rmethod, T3, T1); ++ // T2: reference klass ++ // Rmethod: method ++ // T3: receiver ++ // T1: flags ++ ++ // Special case of invokeinterface called for virtual method of ++ // java.lang.Object. See cpCacheOop.cpp for details. ++ // This code isn't produced by javac, but could be produced by ++ // another compliant java compiler. ++ Label notMethod; ++ __ li(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift)); ++ __ andr(AT, T1, AT); ++ __ beq(AT, R0, notMethod); ++ ++ invokevirtual_helper(Rmethod, T3, T1); ++ __ bind(notMethod); ++ // Get receiver klass into T1 - also a null check ++ //add for compressedoops ++ __ load_klass(T1, T3); ++ __ verify_oop(T1); ++ ++ Label no_such_interface, no_such_method; ++ ++ // Receiver subtype check against REFC. ++ // Superklass in T2. Subklass in T1. ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ T1, T2, noreg, ++ // outputs: scan temp. reg, scan temp. reg ++ T0, FSR, ++ no_such_interface, ++ /*return_method=*/false); ++ ++ // profile this call ++ __ profile_virtual_call(T1, T0, FSR); ++ ++ // Get declaring interface class from method, and itable index ++ __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset())); ++ __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset())); ++ __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes()); ++ __ ld_w(Rmethod, Rmethod, in_bytes(Method::itable_index_offset())); ++ __ addi_d(Rmethod, Rmethod, (-1) * Method::itable_index_max); ++ __ sub_w(Rmethod, R0, Rmethod); ++ ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ T1, T2, Rmethod, ++ // outputs: method, scan temp. reg ++ Rmethod, T0, ++ no_such_interface); ++ ++ // Rmethod: Method* to call ++ // T3: receiver ++ // Check for abstract method error ++ // Note: This should be done more efficiently via a throw_abstract_method_error ++ // interpreter entry point and a conditional jump to it in case of a null ++ // method. ++ __ beq(Rmethod, R0, no_such_method); ++ ++ __ profile_arguments_type(T1, Rmethod, T0, true); ++ ++ // do the call ++ // T3: receiver ++ // Rmethod: Method* ++ __ jump_from_interpreted(Rmethod, T1); ++ __ should_not_reach_here(); ++ ++ // exception handling code follows... ++ // note: must restore interpreter registers to canonical ++ // state for exception handling to work correctly! ++ ++ __ bind(no_such_method); ++ // throw exception ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError)); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ __ bind(no_such_interface); ++ // throw exception ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_IncompatibleClassChangeError)); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++} ++ ++ ++void TemplateTable::invokehandle(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ const Register T2_method = Rmethod; ++ const Register FSR_mtype = FSR; ++ const Register T3_recv = T3; ++ ++ if (!EnableInvokeDynamic) { ++ // rewriter does not generate this bytecode ++ __ should_not_reach_here(); ++ return; ++ } ++ ++ prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv); ++ //??__ verify_method_ptr(T2_method); ++ __ verify_oop(T3_recv); ++ __ null_check(T3_recv); ++ ++ // T4: MethodType object (from cpool->resolved_references[f1], if necessary) ++ // T2_method: MH.invokeExact_MT method (from f2) ++ ++ // Note: T4 is already pushed (if necessary) by prepare_invoke ++ ++ // FIXME: profile the LambdaForm also ++ __ profile_final_call(T4); ++ ++ // T8: tmp, used for mdp ++ // T2_method: callee ++ // T4: tmp ++ // is_virtual: true ++ __ profile_arguments_type(T8, T2_method, T4, true); ++ ++ __ jump_from_interpreted(T2_method, T4); ++} ++ ++ void TemplateTable::invokedynamic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ ++ if (!EnableInvokeDynamic) { ++ // We should not encounter this bytecode if !EnableInvokeDynamic. ++ // The verifier will stop it. However, if we get past the verifier, ++ // this will stop the thread in a reasonable way, without crashing the JVM. ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_IncompatibleClassChangeError)); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ return; ++ } ++ ++ const Register T2_callsite = T2; ++ ++ prepare_invoke(byte_no, Rmethod, T2_callsite); ++ ++ // T2: CallSite object (from cpool->resolved_references[f1]) ++ // Rmethod: MH.linkToCallSite method (from f2) ++ ++ // Note: T2_callsite is already pushed by prepare_invoke ++ // %%% should make a type profile for any invokedynamic that takes a ref argument ++ // profile this call ++ __ profile_call(T4); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T4: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T4, false); ++ ++ __ verify_oop(T2_callsite); ++ ++ __ jump_from_interpreted(Rmethod, T4); ++ } ++ ++//----------------------------------------------------------------------------- ++// Allocation ++// T1 : tags & buffer end & thread ++// T2 : object end ++// T3 : klass ++// T1 : object size ++// A1 : cpool ++// A2 : cp index ++// return object in FSR ++void TemplateTable::_new() { ++ transition(vtos, atos); ++ __ get_unsigned_2_byte_index_at_bcp(A2, 1); ++ ++ Label slow_case; ++ Label done; ++ Label initialize_header; ++ Label initialize_object; // including clearing the fields ++ Label allocate_shared; ++ ++ // get InstanceKlass in T3 ++ __ get_cpool_and_tags(A1, T1); ++ ++ __ alsl_d(AT, A2, A1, Address::times_8 - 1); ++ __ ld_d(T3, AT, sizeof(ConstantPool)); ++ ++ // make sure the class we're about to instantiate has been resolved. ++ // Note: slow_case does a pop of stack, which is why we loaded class/pushed above ++ const int tags_offset = Array::base_offset_in_bytes(); ++ __ add_d(T1, T1, A2); ++ __ ld_b(AT, T1, tags_offset); ++ if(os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); ++ } ++ __ addi_d(AT, AT, -(int)JVM_CONSTANT_Class); ++ __ bne(AT, R0, slow_case); ++ ++ // make sure klass is initialized & doesn't have finalizer ++ // make sure klass is fully initialized ++ __ ld_hu(T1, T3, in_bytes(InstanceKlass::init_state_offset())); ++ __ addi_d(AT, T1, - (int)InstanceKlass::fully_initialized); ++ __ bne(AT, R0, slow_case); ++ ++ // has_finalizer ++ __ ld_w(T0, T3, in_bytes(Klass::layout_helper_offset()) ); ++ __ andi(AT, T0, Klass::_lh_instance_slow_path_bit); ++ __ bne(AT, R0, slow_case); ++ ++ // Allocate the instance ++ // 1) Try to allocate in the TLAB ++ // 2) if fail and the object is large allocate in the shared Eden ++ // 3) if the above fails (or is not applicable), go to a slow case ++ // (creates a new TLAB, etc.) ++ ++ const bool allow_shared_alloc = ++ Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode; ++ ++#ifndef OPT_THREAD ++ const Register thread = T8; ++ if (UseTLAB || allow_shared_alloc) { ++ __ get_thread(thread); ++ } ++#else ++ const Register thread = TREG; ++#endif ++ ++ if (UseTLAB) { ++ // get tlab_top ++ __ ld_d(FSR, thread, in_bytes(JavaThread::tlab_top_offset())); ++ // get tlab_end ++ __ ld_d(AT, thread, in_bytes(JavaThread::tlab_end_offset())); ++ __ add_d(T2, FSR, T0); ++ __ blt(AT, T2, allow_shared_alloc ? allocate_shared : slow_case); ++ __ st_d(T2, thread, in_bytes(JavaThread::tlab_top_offset())); ++ ++ if (ZeroTLAB) { ++ // the fields have been already cleared ++ __ beq(R0, R0, initialize_header); ++ } else { ++ // initialize both the header and fields ++ __ beq(R0, R0, initialize_object); ++ } ++ } ++ ++ // Allocation in the shared Eden , if allowed ++ // T0 : instance size in words ++ if(allow_shared_alloc){ ++ __ bind(allocate_shared); ++ ++ Label done, retry; ++ Address heap_top(T1); ++ __ li(T1, (long)Universe::heap()->top_addr()); ++ __ ld_d(FSR, heap_top); ++ ++ __ bind(retry); ++ __ li(AT, (long)Universe::heap()->end_addr()); ++ __ ld_d(AT, AT, 0); ++ __ add_d(T2, FSR, T0); ++ __ blt(AT, T2, slow_case); ++ ++ // Compare FSR with the top addr, and if still equal, store the new ++ // top addr in T2 at the address of the top addr pointer. Sets AT if was ++ // equal, and clears it otherwise. Use lock prefix for atomicity on MPs. ++ // ++ // FSR: object begin ++ // T2: object end ++ // T0: instance size in words ++ ++ // if someone beat us on the allocation, try again, otherwise continue ++ __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry); ++ ++ __ bind(done); ++ __ incr_allocated_bytes(thread, T0, 0); ++ } ++ ++ if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) { ++ // The object is initialized before the header. If the object size is ++ // zero, go directly to the header initialization. ++ __ bind(initialize_object); ++ __ li(AT, - sizeof(oopDesc)); ++ __ add_d(T0, T0, AT); ++ __ beq(T0, R0, initialize_header); ++ ++ // initialize remaining object fields: T0 is a multiple of 2 ++ { ++ Label loop; ++ __ add_d(T1, FSR, T0); ++ __ addi_d(T1, T1, -oopSize); ++ ++ __ bind(loop); ++ __ st_d(R0, T1, sizeof(oopDesc) + 0 * oopSize); ++ Label L1; ++ __ beq(T1, FSR, L1); //dont clear header ++ __ addi_d(T1, T1, -oopSize); ++ __ b(loop); ++ __ bind(L1); ++ __ addi_d(T1, T1, -oopSize); ++ } ++ ++ // klass in T3, ++ // initialize object header only. ++ __ bind(initialize_header); ++ if (UseBiasedLocking) { ++ __ ld_d(AT, T3, in_bytes(Klass::prototype_header_offset())); ++ __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes ()); ++ } else { ++ __ li(AT, (long)markOopDesc::prototype()); ++ __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes()); ++ } ++ ++ __ store_klass_gap(FSR, R0); ++ __ store_klass(FSR, T3); ++ ++ { ++ SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0); ++ // Trigger dtrace event for fastpath ++ __ push(atos); ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR); ++ __ pop(atos); ++ ++ } ++ __ b(done); ++ } ++ ++ // slow case ++ __ bind(slow_case); ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2); ++ ++ // continue ++ __ bind(done); ++ __ membar(__ StoreStore); ++} ++ ++void TemplateTable::newarray() { ++ transition(itos, atos); ++ __ ld_bu(A1, at_bcp(1)); ++ // type, count ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR); ++ __ membar(__ StoreStore); ++} ++ ++void TemplateTable::anewarray() { ++ transition(itos, atos); ++ __ get_2_byte_integer_at_bcp(A2, AT, 1); ++ __ huswap(A2); ++ __ get_constant_pool(A1); ++ // cp, index, count ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR); ++ __ membar(__ StoreStore); ++} ++ ++void TemplateTable::arraylength() { ++ transition(atos, itos); ++ __ null_check(FSR, arrayOopDesc::length_offset_in_bytes()); ++ __ ld_w(FSR, FSR, arrayOopDesc::length_offset_in_bytes()); ++} ++ ++// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always) ++// T2 : sub klass ++// T3 : cpool ++// T3 : super klass ++void TemplateTable::checkcast() { ++ transition(atos, atos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ __ beq(FSR, R0, is_null); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(T3, T1); ++ __ get_2_byte_integer_at_bcp(T2, AT, 1); ++ __ huswap(T2); ++ ++ // See if bytecode has already been quicked ++ __ add_d(AT, T1, T2); ++ __ ld_b(AT, AT, Array::base_offset_in_bytes()); ++ if(os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); ++ } ++ __ addi_d(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ beq(AT, R0, quicked); ++ ++ // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded. ++ // Then, GC will move the object in V0 to another places in heap. ++ // Therefore, We should never save such an object in register. ++ // Instead, we should save it in the stack. It can be modified automatically by the GC thread. ++ // After GC, the object address in FSR is changed to a new place. ++ // ++ __ push(atos); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ __ get_vm_result_2(T3, thread); ++ __ pop_ptr(FSR); ++ __ b(resolved); ++ ++ // klass already in cp, get superklass in T3 ++ __ bind(quicked); ++ __ alsl_d(AT, T2, T3, Address::times_8 - 1); ++ __ ld_d(T3, AT, sizeof(ConstantPool)); ++ ++ __ bind(resolved); ++ ++ // get subklass in T2 ++ __ load_klass(T2, FSR); ++ // Superklass in T3. Subklass in T2. ++ __ gen_subtype_check(T3, T2, ok_is_subtype); ++ ++ // Come here on failure ++ // object is at FSR ++ __ jmp(Interpreter::_throw_ClassCastException_entry); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ ++ // Collect counts on whether this check-cast sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ b(done); ++ __ bind(is_null); ++ __ profile_null_seen(T3); ++ } else { ++ __ bind(is_null); ++ } ++ __ bind(done); ++} ++ ++// T3 as cpool, T1 as tags, T2 as index ++// object always in FSR, superklass in T3, subklass in T2 ++void TemplateTable::instanceof() { ++ transition(atos, itos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ ++ __ beq(FSR, R0, is_null); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(T3, T1); ++ // get index ++ __ get_2_byte_integer_at_bcp(T2, AT, 1); ++ __ hswap(T2); ++ ++ // See if bytecode has already been quicked ++ // quicked ++ __ add_d(AT, T1, T2); ++ __ ld_b(AT, AT, Array::base_offset_in_bytes()); ++ if(os::is_MP()) { ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); ++ } ++ __ addi_d(AT, AT, -(int)JVM_CONSTANT_Class); ++ __ beq(AT, R0, quicked); ++ ++ __ push(atos); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ __ get_vm_result_2(T3, thread); ++ __ pop_ptr(FSR); ++ __ b(resolved); ++ ++ // get superklass in T3, subklass in T2 ++ __ bind(quicked); ++ __ alsl_d(AT, T2, T3, Address::times_8 - 1); ++ __ ld_d(T3, AT, sizeof(ConstantPool)); ++ ++ __ bind(resolved); ++ // get subklass in T2 ++ __ load_klass(T2, FSR); ++ ++ // Superklass in T3. Subklass in T2. ++ __ gen_subtype_check(T3, T2, ok_is_subtype); ++ // Come here on failure ++ __ move(FSR, R0); ++ __ b(done); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ __ li(FSR, 1); ++ ++ // Collect counts on whether this test sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ beq(R0, R0, done); ++ __ bind(is_null); ++ __ profile_null_seen(T3); ++ } else { ++ __ bind(is_null); // same as 'done' ++ } ++ __ bind(done); ++ // FSR = 0: obj == NULL or obj is not an instanceof the specified klass ++ // FSR = 1: obj != NULL and obj is an instanceof the specified klass ++} ++ ++//-------------------------------------------------------- ++//-------------------------------------------- ++// Breakpoints ++void TemplateTable::_breakpoint() { ++ // Note: We get here even if we are single stepping.. ++ // jbug inists on setting breakpoints at every bytecode ++ // even if we are in single step mode. ++ ++ transition(vtos, vtos); ++ ++ // get the unpatched byte code ++ __ get_method(A1); ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::get_original_bytecode_at), ++ A1, BCP); ++ __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal ++ ++ // post the breakpoint event ++ __ get_method(A1); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP); ++ ++ // complete the execution of original bytecode ++ __ dispatch_only_normal(vtos); ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateTable::athrow() { ++ transition(atos, vtos); ++ __ null_check(FSR); ++ __ jmp(Interpreter::throw_exception_entry()); ++} ++ ++//----------------------------------------------------------------------------- ++// Synchronization ++// ++// Note: monitorenter & exit are symmetric routines; which is reflected ++// in the assembly code structure as well ++// ++// Stack layout: ++// ++// [expressions ] <--- SP = expression stack top ++// .. ++// [expressions ] ++// [monitor entry] <--- monitor block top = expression stack bot ++// .. ++// [monitor entry] ++// [frame data ] <--- monitor block bot ++// ... ++// [return addr ] <--- FP ++ ++// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer ++// object always in FSR ++void TemplateTable::monitorenter() { ++ transition(atos, vtos); ++ ++ // check for NULL object ++ __ null_check(FSR); ++ ++ const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset ++ * wordSize); ++ const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize); ++ Label allocated; ++ ++ // initialize entry pointer ++ __ move(c_rarg0, R0); ++ ++ // find a free slot in the monitor block (result in c_rarg0) ++ { ++ Label entry, loop, exit, next; ++ __ ld_d(T2, monitor_block_top); ++ __ addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ __ b(entry); ++ ++ // free slot? ++ __ bind(loop); ++ __ ld_d(AT, T2, BasicObjectLock::obj_offset_in_bytes()); ++ __ bne(AT, R0, next); ++ __ move(c_rarg0, T2); ++ ++ __ bind(next); ++ __ beq(FSR, AT, exit); ++ __ addi_d(T2, T2, entry_size); ++ ++ __ bind(entry); ++ __ bne(T3, T2, loop); ++ __ bind(exit); ++ } ++ ++ __ bne(c_rarg0, R0, allocated); ++ ++ // allocate one if there's no free slot ++ { ++ Label entry, loop; ++ // 1. compute new pointers // SP: old expression stack top ++ __ ld_d(c_rarg0, monitor_block_top); ++ __ addi_d(SP, SP, -entry_size); ++ __ addi_d(c_rarg0, c_rarg0, -entry_size); ++ __ st_d(c_rarg0, monitor_block_top); ++ __ move(T3, SP); ++ __ b(entry); ++ ++ // 2. move expression stack contents ++ __ bind(loop); ++ __ ld_d(AT, T3, entry_size); ++ __ st_d(AT, T3, 0); ++ __ addi_d(T3, T3, wordSize); ++ __ bind(entry); ++ __ bne(T3, c_rarg0, loop); ++ } ++ ++ __ bind(allocated); ++ // Increment bcp to point to the next bytecode, ++ // so exception handling for async. exceptions work correctly. ++ // The object has already been poped from the stack, so the ++ // expression stack looks correct. ++ __ addi_d(BCP, BCP, 1); ++ __ st_d(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ lock_object(c_rarg0); ++ // check to make sure this monitor doesn't cause stack overflow after locking ++ __ save_bcp(); // in case of exception ++ __ generate_stack_overflow_check(0); ++ // The bcp has already been incremented. Just need to dispatch to next instruction. ++ ++ __ dispatch_next(vtos); ++} ++ ++// T2 : top ++// c_rarg0 : entry ++void TemplateTable::monitorexit() { ++ transition(atos, vtos); ++ ++ __ null_check(FSR); ++ ++ const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize); ++ Label found; ++ ++ // find matching slot ++ { ++ Label entry, loop; ++ __ ld_d(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ addi_d(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ __ b(entry); ++ ++ __ bind(loop); ++ __ ld_d(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ beq(FSR, AT, found); ++ __ addi_d(c_rarg0, c_rarg0, entry_size); ++ __ bind(entry); ++ __ bne(T2, c_rarg0, loop); ++ } ++ ++ // error handling. Unlocking was not block-structured ++ Label end; ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ // call run-time routine ++ // c_rarg0: points to monitor entry ++ __ bind(found); ++ __ move(TSR, FSR); ++ __ unlock_object(c_rarg0); ++ __ move(FSR, TSR); ++ __ bind(end); ++} ++ ++ ++// Wide instructions ++void TemplateTable::wide() { ++ transition(vtos, vtos); ++ __ ld_bu(Rnext, at_bcp(1)); ++ __ slli_d(T4, Rnext, Address::times_8); ++ __ li(AT, (long)Interpreter::_wentry_point); ++ __ add_d(AT, T4, AT); ++ __ ld_d(T4, AT, 0); ++ __ jr(T4); ++} ++ ++ ++void TemplateTable::multianewarray() { ++ transition(vtos, atos); ++ // last dim is on top of stack; we want address of first one: ++ // first_addr = last_addr + (ndims - 1) * wordSize ++ __ ld_bu(A1, at_bcp(3)); // dimension ++ __ addi_d(A1, A1, -1); ++ __ slli_d(A1, A1, Address::times_8); ++ __ add_d(A1, SP, A1); // now A1 pointer to the count array on the stack ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1); ++ __ ld_bu(AT, at_bcp(3)); ++ __ slli_d(AT, AT, Address::times_8); ++ __ add_d(SP, SP, AT); ++ __ membar(__ AnyAny);//no membar here for aarch64 ++} ++#endif // !CC_INTERP +diff --git a/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.hpp b/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.hpp +new file mode 100644 +index 0000000000..c48d76e0a2 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.hpp +@@ -0,0 +1,44 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_TEMPLATETABLE_LOONGARCH_64_HPP ++#define CPU_LOONGARCH_VM_TEMPLATETABLE_LOONGARCH_64_HPP ++ ++ static void prepare_invoke(int byte_no, ++ Register method, ++ Register index = noreg, ++ Register recv = noreg, ++ Register flags = noreg ++ ); ++ static void invokevirtual_helper(Register index, Register recv, ++ Register flags); ++ //static void volatile_barrier(Assembler::Membar_mask_bits order_constraint); ++ static void volatile_barrier(); ++ ++ // Helpers ++ static void index_check(Register array, Register index); ++ static void index_check_without_pop(Register array, Register index); ++ ++#endif // CPU_LOONGARCH_VM_TEMPLATETABLE_LOONGARCH_64_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/vmStructs_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/vmStructs_loongarch.hpp +new file mode 100644 +index 0000000000..7c3ce68010 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/vmStructs_loongarch.hpp +@@ -0,0 +1,68 @@ ++/* ++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_VMSTRUCTS_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_VMSTRUCTS_LOONGARCH_HPP ++ ++// These are the CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ \ ++ /******************************/ \ ++ /* JavaCallWrapper */ \ ++ /******************************/ \ ++ /******************************/ \ ++ /* JavaFrameAnchor */ \ ++ /******************************/ \ ++ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) \ ++ \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_STRUCTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++ ++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_TYPES_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++ ++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++#endif // CPU_LOONGARCH_VM_VMSTRUCTS_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.cpp +new file mode 100644 +index 0000000000..c71f64e132 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.cpp +@@ -0,0 +1,84 @@ ++/* ++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "memory/allocation.inline.hpp" ++#include "vm_version_ext_loongarch.hpp" ++ ++// VM_Version_Ext statics ++int VM_Version_Ext::_no_of_threads = 0; ++int VM_Version_Ext::_no_of_cores = 0; ++int VM_Version_Ext::_no_of_sockets = 0; ++bool VM_Version_Ext::_initialized = false; ++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; ++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; ++ ++void VM_Version_Ext::initialize_cpu_information(void) { ++ // do nothing if cpu info has been initialized ++ if (_initialized) { ++ return; ++ } ++ ++ _no_of_cores = os::processor_count(); ++ _no_of_threads = _no_of_cores; ++ _no_of_sockets = _no_of_cores; ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "LoongArch"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "LoongArch %s", cpu_features()); ++ _initialized = true; ++} ++ ++int VM_Version_Ext::number_of_threads(void) { ++ initialize_cpu_information(); ++ return _no_of_threads; ++} ++ ++int VM_Version_Ext::number_of_cores(void) { ++ initialize_cpu_information(); ++ return _no_of_cores; ++} ++ ++int VM_Version_Ext::number_of_sockets(void) { ++ initialize_cpu_information(); ++ return _no_of_sockets; ++} ++ ++const char* VM_Version_Ext::cpu_name(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); ++ return tmp; ++} ++ ++const char* VM_Version_Ext::cpu_description(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); ++ return tmp; ++} +diff --git a/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.hpp +new file mode 100644 +index 0000000000..682dd9c78f +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.hpp +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_VM_VERSION_EXT_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_VM_VERSION_EXT_LOONGARCH_HPP ++ ++#include "runtime/vm_version.hpp" ++#include "utilities/macros.hpp" ++ ++class VM_Version_Ext : public VM_Version { ++ private: ++ static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; ++ static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; ++ ++ static int _no_of_threads; ++ static int _no_of_cores; ++ static int _no_of_sockets; ++ static bool _initialized; ++ static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; ++ static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; ++ ++ public: ++ static int number_of_threads(void); ++ static int number_of_cores(void); ++ static int number_of_sockets(void); ++ ++ static const char* cpu_name(void); ++ static const char* cpu_description(void); ++ static void initialize_cpu_information(void); ++}; ++ ++#endif // CPU_LOONGARCH_VM_VM_VERSION_EXT_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.cpp +new file mode 100644 +index 0000000000..81ea3b230c +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.cpp +@@ -0,0 +1,443 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/java.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "vm_version_loongarch.hpp" ++#ifdef TARGET_OS_FAMILY_linux ++# include "os_linux.inline.hpp" ++#endif ++ ++#include ++#include ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++int VM_Version::_cpuFeatures; ++unsigned long VM_Version::auxv; ++const char* VM_Version::_features_str = ""; ++VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; ++bool VM_Version::_cpu_info_is_initialized = false; ++ ++static BufferBlob* stub_blob; ++static const int stub_size = 600; ++ ++extern "C" { ++ typedef void (*get_cpu_info_stub_t)(void*); ++} ++static get_cpu_info_stub_t get_cpu_info_stub = NULL; ++ ++ ++class VM_Version_StubGenerator: public StubCodeGenerator { ++ public: ++ ++ VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} ++ ++ address generate_get_cpu_info() { ++ assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized"); ++ StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); ++# define __ _masm-> ++ ++ address start = __ pc(); ++ ++ __ enter(); ++ __ push(AT); ++ __ push(T5); ++ ++ __ li(AT, (long)0); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); ++ ++ __ li(AT, 1); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); ++ ++ __ li(AT, 2); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); ++ ++ __ li(AT, 3); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id3_offset())); ++ ++ __ li(AT, 4); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id4_offset())); ++ ++ __ li(AT, 5); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id5_offset())); ++ ++ __ li(AT, 6); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id6_offset())); ++ ++ __ li(AT, 10); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id10_offset())); ++ ++ __ li(AT, 11); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id11_offset())); ++ ++ __ li(AT, 12); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id12_offset())); ++ ++ __ li(AT, 13); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id13_offset())); ++ ++ __ li(AT, 14); ++ __ cpucfg(T5, AT); ++ __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id14_offset())); ++ ++ __ pop(T5); ++ __ pop(AT); ++ __ leave(); ++ __ jr(RA); ++# undef __ ++ return start; ++ }; ++}; ++ ++uint32_t VM_Version::get_feature_flags_by_cpucfg() { ++ uint32_t result = 0; ++ if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b00 || _cpuid_info.cpucfg_info_id1.bits.ARCH == 0b01 ) { ++ result |= CPU_LA32; ++ } else if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b10 ) { ++ result |= CPU_LA64; ++ } ++ if (_cpuid_info.cpucfg_info_id1.bits.UAL != 0) ++ result |= CPU_UAL; ++ ++ if (_cpuid_info.cpucfg_info_id2.bits.FP_CFG != 0) ++ result |= CPU_FP; ++ if (_cpuid_info.cpucfg_info_id2.bits.COMPLEX != 0) ++ result |= CPU_COMPLEX; ++ if (_cpuid_info.cpucfg_info_id2.bits.CRYPTO != 0) ++ result |= CPU_CRYPTO; ++ if (_cpuid_info.cpucfg_info_id2.bits.LBT_X86 != 0) ++ result |= CPU_LBT_X86; ++ if (_cpuid_info.cpucfg_info_id2.bits.LBT_ARM != 0) ++ result |= CPU_LBT_ARM; ++ if (_cpuid_info.cpucfg_info_id2.bits.LBT_MIPS != 0) ++ result |= CPU_LBT_MIPS; ++ if (_cpuid_info.cpucfg_info_id2.bits.LAM != 0) ++ result |= CPU_LAM; ++ ++ if (_cpuid_info.cpucfg_info_id3.bits.CCDMA != 0) ++ result |= CPU_CCDMA; ++ if (_cpuid_info.cpucfg_info_id3.bits.LLDBAR != 0) ++ result |= CPU_LLDBAR; ++ if (_cpuid_info.cpucfg_info_id3.bits.SCDLY != 0) ++ result |= CPU_SCDLY; ++ if (_cpuid_info.cpucfg_info_id3.bits.LLEXC != 0) ++ result |= CPU_LLEXC; ++ ++ result |= CPU_ULSYNC; ++ ++ return result; ++} ++ ++void VM_Version::get_processor_features() { ++ ++ clean_cpuFeatures(); ++ ++ get_cpu_info_stub(&_cpuid_info); ++ _cpuFeatures = get_feature_flags_by_cpucfg(); ++ ++ _supports_cx8 = true; ++ ++ if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) { ++ FLAG_SET_CMDLINE(uintx, MaxGCPauseMillis, 650); ++ } ++ ++ auxv = getauxval(AT_HWCAP); ++ ++ if (supports_lsx()) { ++ if (FLAG_IS_DEFAULT(UseLSX)) { ++ FLAG_SET_DEFAULT(UseLSX, true); ++ } ++ } else if (UseLSX) { ++ warning("LSX instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLSX, false); ++ } ++ ++ if (supports_lasx()) { ++ if (FLAG_IS_DEFAULT(UseLASX)) { ++ FLAG_SET_DEFAULT(UseLASX, true); ++ } ++ } else if (UseLASX) { ++ warning("LASX instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLASX, false); ++ } ++ ++ if (UseLASX && !UseLSX) { ++ warning("LASX instructions depends on LSX, setting UseLASX to false"); ++ FLAG_SET_DEFAULT(UseLASX, false); ++ } ++ ++#ifdef COMPILER2 ++ int max_vector_size = 0; ++ int min_vector_size = 0; ++ if (UseLASX) { ++ max_vector_size = 32; ++ min_vector_size = 16; ++ } ++ else if (UseLSX) { ++ max_vector_size = 16; ++ min_vector_size = 16; ++ } ++ ++ if (!FLAG_IS_DEFAULT(MaxVectorSize)) { ++ if (MaxVectorSize == 0) { ++ // do nothing ++ } else if (MaxVectorSize > max_vector_size) { ++ warning("MaxVectorSize must be at most %i on this platform", max_vector_size); ++ FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); ++ } else if (MaxVectorSize < min_vector_size) { ++ warning("MaxVectorSize must be at least %i or 0 on this platform, setting to: %i", min_vector_size, min_vector_size); ++ FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); ++ } else if (!is_power_of_2(MaxVectorSize)) { ++ warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); ++ FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); ++ } ++ } else { ++ // If default, use highest supported configuration ++ FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); ++ } ++#endif ++ ++ if (needs_llsync() && needs_tgtsync() && !needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 1000); ++ } ++ } else if (!needs_llsync() && needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 2000); ++ } ++ } else if (!needs_llsync() && !needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 3000); ++ } ++ } else if (needs_llsync() && !needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 4000); ++ } ++ } else if (needs_llsync() && needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 10000); ++ } ++ } else { ++ assert(false, "Should Not Reach Here, what is the cpu type?"); ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 10000); ++ } ++ } ++ ++ char buf[256]; ++ ++ // A note on the _features_string format: ++ // There are jtreg tests checking the _features_string for various properties. ++ // For some strange reason, these tests require the string to contain ++ // only _lowercase_ characters. Keep that in mind when being surprised ++ // about the unusual notation of features - and when adding new ones. ++ // Features may have one comma at the end. ++ // Furthermore, use one, and only one, separator space between features. ++ // Multiple spaces are considered separate tokens, messing up everything. ++ jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, " ++ "0x%lx, fp_ver: %d, lvz_ver: %d, " ++ "usesynclevel:%d", ++ (is_la64() ? "la64" : ""), ++ (is_la32() ? "la32" : ""), ++ (supports_lsx() ? ", lsx" : ""), ++ (supports_lasx() ? ", lasx" : ""), ++ (supports_crypto() ? ", crypto" : ""), ++ (supports_lam() ? ", am" : ""), ++ (supports_ual() ? ", ual" : ""), ++ (supports_lldbar() ? ", lldbar" : ""), ++ (supports_scdly() ? ", scdly" : ""), ++ (supports_llexc() ? ", llexc" : ""), ++ (supports_lbt_x86() ? ", lbt_x86" : ""), ++ (supports_lbt_arm() ? ", lbt_arm" : ""), ++ (supports_lbt_mips() ? ", lbt_mips" : ""), ++ (needs_llsync() ? ", needs_llsync" : ""), ++ (needs_tgtsync() ? ", needs_tgtsync": ""), ++ (needs_ulsync() ? ", needs_ulsync": ""), ++ _cpuid_info.cpucfg_info_id0.bits.PRID, ++ _cpuid_info.cpucfg_info_id2.bits.FP_VER, ++ _cpuid_info.cpucfg_info_id2.bits.LVZ_VER, ++ UseSyncLevel); ++ _features_str = strdup(buf); ++ ++ assert(!is_la32(), "Should Not Reach Here, what is the cpu type?"); ++ assert( is_la64(), "Should be LoongArch64"); ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchLines, 3); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchDistance, 192); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) { ++ FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1); ++ } ++ ++ // Basic instructions are used to implement SHA Intrinsics on LA, so sha ++ // instructions support is not needed. ++ if (/*supports_crypto()*/ 1) { ++ if (FLAG_IS_DEFAULT(UseSHA)) { ++ FLAG_SET_DEFAULT(UseSHA, true); ++ } ++ } else if (UseSHA) { ++ warning("SHA instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseSHA, false); ++ } ++ ++ if (UseSHA/* && supports_crypto()*/) { ++ if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { ++ FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); ++ } ++ } else if (UseSHA1Intrinsics) { ++ warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); ++ } ++ ++ if (UseSHA/* && supports_crypto()*/) { ++ if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { ++ FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); ++ } ++ } else if (UseSHA256Intrinsics) { ++ warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); ++ } ++ ++ if (UseSHA512Intrinsics) { ++ warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); ++ } ++ ++ if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { ++ FLAG_SET_DEFAULT(UseSHA, false); ++ } ++ ++ // Basic instructions are used to implement AES Intrinsics on LA, so AES ++ // instructions support is not needed. ++ if (/*supports_crypto()*/ 1) { ++ if (FLAG_IS_DEFAULT(UseAES)) { ++ FLAG_SET_DEFAULT(UseAES, true); ++ } ++ } else if (UseAES) { ++ if (!FLAG_IS_DEFAULT(UseAES)) ++ warning("AES instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAES, false); ++ } ++ ++ if (UseAES/* && supports_crypto()*/) { ++ if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { ++ FLAG_SET_DEFAULT(UseAESIntrinsics, true); ++ } ++ } else if (UseAESIntrinsics) { ++ if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) ++ warning("AES intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAESIntrinsics, false); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseCRC32)) { ++ FLAG_SET_DEFAULT(UseCRC32, true); ++ } ++ ++ if (UseCRC32) { ++ if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { ++ UseCRC32Intrinsics = true; ++ } ++ } ++ ++ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { ++ UseMontgomeryMultiplyIntrinsic = true; ++ } ++ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { ++ UseMontgomerySquareIntrinsic = true; ++ } ++ ++ // This machine allows unaligned memory accesses ++ if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { ++ FLAG_SET_DEFAULT(UseUnalignedAccesses, true); ++ } ++ ++ if (CriticalJNINatives) { ++ if (FLAG_IS_CMDLINE(CriticalJNINatives)) { ++ warning("CriticalJNINatives specified, but not supported in this VM"); ++ } ++ FLAG_SET_DEFAULT(CriticalJNINatives, false); ++ } ++} ++ ++void VM_Version::initialize() { ++ ResourceMark rm; ++ // Making this stub must be FIRST use of assembler ++ ++ stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size); ++ if (stub_blob == NULL) { ++ vm_exit_during_initialization("Unable to allocate get_cpu_info_stub"); ++ } ++ CodeBuffer c(stub_blob); ++ VM_Version_StubGenerator g(&c); ++ get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, ++ g.generate_get_cpu_info()); ++ ++ get_processor_features(); ++} +diff --git a/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.hpp +new file mode 100644 +index 0000000000..3b5f907a79 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.hpp +@@ -0,0 +1,299 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_VM_VERSION_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_VM_VERSION_LOONGARCH_HPP ++ ++#include "runtime/globals_extension.hpp" ++#include "runtime/vm_version.hpp" ++ ++#ifndef HWCAP_LOONGARCH_LSX ++#define HWCAP_LOONGARCH_LSX (1 << 4) ++#endif ++ ++#ifndef HWCAP_LOONGARCH_LASX ++#define HWCAP_LOONGARCH_LASX (1 << 5) ++#endif ++ ++class VM_Version: public Abstract_VM_Version { ++public: ++ ++ union LoongArch_Cpucfg_Id0 { ++ uint32_t value; ++ struct { ++ uint32_t PRID : 32; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id1 { ++ uint32_t value; ++ struct { ++ uint32_t ARCH : 2, ++ PGMMU : 1, ++ IOCSR : 1, ++ PALEN : 8, ++ VALEN : 8, ++ UAL : 1, // unaligned access ++ RI : 1, ++ EP : 1, ++ RPLV : 1, ++ HP : 1, ++ IOCSR_BRD : 1, ++ MSG_INT : 1, ++ : 5; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id2 { ++ uint32_t value; ++ struct { ++ uint32_t FP_CFG : 1, // FP is used, use FP_CFG instead ++ FP_SP : 1, ++ FP_DP : 1, ++ FP_VER : 3, ++ LSX : 1, ++ LASX : 1, ++ COMPLEX : 1, ++ CRYPTO : 1, ++ LVZ : 1, ++ LVZ_VER : 3, ++ LLFTP : 1, ++ LLFTP_VER : 3, ++ LBT_X86 : 1, ++ LBT_ARM : 1, ++ LBT_MIPS : 1, ++ LSPW : 1, ++ LAM : 1, ++ : 9; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id3 { ++ uint32_t value; ++ struct { ++ uint32_t CCDMA : 1, ++ SFB : 1, ++ UCACC : 1, ++ LLEXC : 1, ++ SCDLY : 1, ++ LLDBAR : 1, ++ ITLBHMC : 1, ++ ICHMC : 1, ++ SPW_LVL : 3, ++ SPW_HP_HF : 1, ++ RVA : 1, ++ RVAMAXM1 : 4, ++ : 15; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id4 { ++ uint32_t value; ++ struct { ++ uint32_t CC_FREQ : 32; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id5 { ++ uint32_t value; ++ struct { ++ uint32_t CC_MUL : 16, ++ CC_DIV : 16; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id6 { ++ uint32_t value; ++ struct { ++ uint32_t PMP : 1, ++ PMVER : 3, ++ PMNUM : 4, ++ PMBITS : 6, ++ UPM : 1, ++ : 17; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id10 { ++ uint32_t value; ++ struct { ++ uint32_t L1IU_PRESENT : 1, ++ L1IU_UNIFY : 1, ++ L1D_PRESENT : 1, ++ L2IU_PRESENT : 1, ++ L2IU_UNIFY : 1, ++ L2IU_PRIVATE : 1, ++ L2IU_INCLUSIVE : 1, ++ L2D_PRESENT : 1, ++ L2D_PRIVATE : 1, ++ L2D_INCLUSIVE : 1, ++ L3IU_PRESENT : 1, ++ L3IU_UNIFY : 1, ++ L3IU_PRIVATE : 1, ++ L3IU_INCLUSIVE : 1, ++ L3D_PRESENT : 1, ++ L3D_PRIVATE : 1, ++ L3D_INCLUSIVE : 1, ++ : 15; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id11 { ++ uint32_t value; ++ struct { ++ uint32_t WAYM1 : 16, ++ INDEXMLOG2 : 8, ++ LINESIZELOG2 : 7, ++ : 1; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id12 { ++ uint32_t value; ++ struct { ++ uint32_t WAYM1 : 16, ++ INDEXMLOG2 : 8, ++ LINESIZELOG2 : 7, ++ : 1; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id13 { ++ uint32_t value; ++ struct { ++ uint32_t WAYM1 : 16, ++ INDEXMLOG2 : 8, ++ LINESIZELOG2 : 7, ++ : 1; ++ } bits; ++ }; ++ ++ union LoongArch_Cpucfg_Id14 { ++ uint32_t value; ++ struct { ++ uint32_t WAYM1 : 16, ++ INDEXMLOG2 : 8, ++ LINESIZELOG2 : 7, ++ : 1; ++ } bits; ++ }; ++ ++protected: ++ ++ enum { ++ CPU_LAM = (1 << 1), ++ CPU_UAL = (1 << 2), ++ CPU_LSX = (1 << 4), ++ CPU_LASX = (1 << 5), ++ CPU_COMPLEX = (1 << 7), ++ CPU_CRYPTO = (1 << 8), ++ CPU_LBT_X86 = (1 << 10), ++ CPU_LBT_ARM = (1 << 11), ++ CPU_LBT_MIPS = (1 << 12), ++ /* flags above must follow Linux HWCAP */ ++ CPU_LA32 = (1 << 13), ++ CPU_LA64 = (1 << 14), ++ CPU_FP = (1 << 15), ++ CPU_LLEXC = (1 << 16), ++ CPU_SCDLY = (1 << 17), ++ CPU_LLDBAR = (1 << 18), ++ CPU_CCDMA = (1 << 19), ++ CPU_LLSYNC = (1 << 20), ++ CPU_TGTSYNC = (1 << 21), ++ CPU_ULSYNC = (1 << 22), ++ ++ //////////////////////add some other feature here////////////////// ++ } cpuFeatureFlags; ++ ++ static int _cpuFeatures; ++ static unsigned long auxv; ++ static const char* _features_str; ++ static bool _cpu_info_is_initialized; ++ ++ struct CpuidInfo { ++ LoongArch_Cpucfg_Id0 cpucfg_info_id0; ++ LoongArch_Cpucfg_Id1 cpucfg_info_id1; ++ LoongArch_Cpucfg_Id2 cpucfg_info_id2; ++ LoongArch_Cpucfg_Id3 cpucfg_info_id3; ++ LoongArch_Cpucfg_Id4 cpucfg_info_id4; ++ LoongArch_Cpucfg_Id5 cpucfg_info_id5; ++ LoongArch_Cpucfg_Id6 cpucfg_info_id6; ++ LoongArch_Cpucfg_Id10 cpucfg_info_id10; ++ LoongArch_Cpucfg_Id11 cpucfg_info_id11; ++ LoongArch_Cpucfg_Id12 cpucfg_info_id12; ++ LoongArch_Cpucfg_Id13 cpucfg_info_id13; ++ LoongArch_Cpucfg_Id14 cpucfg_info_id14; ++ }; ++ ++ // The actual cpuid info block ++ static CpuidInfo _cpuid_info; ++ ++ static uint32_t get_feature_flags_by_cpucfg(); ++ static int get_feature_flags_by_cpuinfo(int features); ++ static void get_processor_features(); ++ ++public: ++ // Offsets for cpuid asm stub ++ static ByteSize Loongson_Cpucfg_id0_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id0); } ++ static ByteSize Loongson_Cpucfg_id1_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id1); } ++ static ByteSize Loongson_Cpucfg_id2_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id2); } ++ static ByteSize Loongson_Cpucfg_id3_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id3); } ++ static ByteSize Loongson_Cpucfg_id4_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id4); } ++ static ByteSize Loongson_Cpucfg_id5_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id5); } ++ static ByteSize Loongson_Cpucfg_id6_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id6); } ++ static ByteSize Loongson_Cpucfg_id10_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id10); } ++ static ByteSize Loongson_Cpucfg_id11_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id11); } ++ static ByteSize Loongson_Cpucfg_id12_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id12); } ++ static ByteSize Loongson_Cpucfg_id13_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id13); } ++ static ByteSize Loongson_Cpucfg_id14_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id14); } ++ ++ static void clean_cpuFeatures() { _cpuFeatures = 0; } ++ ++ // Initialization ++ static void initialize(); ++ ++ static bool cpu_info_is_initialized() { return _cpu_info_is_initialized; } ++ ++ static bool is_la32() { return _cpuFeatures & CPU_LA32; } ++ static bool is_la64() { return _cpuFeatures & CPU_LA64; } ++ static bool supports_crypto() { return _cpuFeatures & CPU_CRYPTO; } ++ static bool supports_lsx() { return auxv & HWCAP_LOONGARCH_LSX; } ++ static bool supports_lasx() { return auxv & HWCAP_LOONGARCH_LASX; } ++ static bool supports_lam() { return _cpuFeatures & CPU_LAM; } ++ static bool supports_llexc() { return _cpuFeatures & CPU_LLEXC; } ++ static bool supports_scdly() { return _cpuFeatures & CPU_SCDLY; } ++ static bool supports_lldbar() { return _cpuFeatures & CPU_LLDBAR; } ++ static bool supports_ual() { return _cpuFeatures & CPU_UAL; } ++ static bool supports_lbt_x86() { return _cpuFeatures & CPU_LBT_X86; } ++ static bool supports_lbt_arm() { return _cpuFeatures & CPU_LBT_ARM; } ++ static bool supports_lbt_mips() { return _cpuFeatures & CPU_LBT_MIPS; } ++ static bool needs_llsync() { return !supports_lldbar(); } ++ static bool needs_tgtsync() { return 1; } ++ static bool needs_ulsync() { return 1; } ++ ++ static const char* cpu_features() { return _features_str; } ++}; ++ ++#endif // CPU_LOONGARCH_VM_VM_VERSION_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.cpp +new file mode 100644 +index 0000000000..52bccfc183 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.cpp +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "code/vmreg.hpp" ++ ++ ++ ++void VMRegImpl::set_regName() { ++ Register reg = ::as_Register(0); ++ int i; ++ for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) { ++ regName[i++] = reg->name(); ++ regName[i++] = reg->name(); ++ reg = reg->successor(); ++ } ++ ++ FloatRegister freg = ::as_FloatRegister(0); ++ for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { ++ regName[i++] = freg->name(); ++ regName[i++] = freg->name(); ++ freg = freg->successor(); ++ } ++ ++ for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) { ++ regName[i] = "NON-GPR-FPR"; ++ } ++} +diff --git a/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.hpp +new file mode 100644 +index 0000000000..80a1fc57de +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.hpp +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_VMREG_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_VMREG_LOONGARCH_HPP ++ ++bool is_Register(); ++Register as_Register(); ++ ++bool is_FloatRegister(); ++FloatRegister as_FloatRegister(); ++ ++#endif // CPU_LOONGARCH_VM_VMREG_LOONGARCH_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.inline.hpp +new file mode 100644 +index 0000000000..f822d4c355 +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.inline.hpp +@@ -0,0 +1,66 @@ ++/* ++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_LOONGARCH_VM_VMREG_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_VM_VMREG_LOONGARCH_INLINE_HPP ++ ++inline VMReg RegisterImpl::as_VMReg() { ++ if( this==noreg ) return VMRegImpl::Bad(); ++ return VMRegImpl::as_VMReg(encoding() << 1 ); ++} ++ ++inline VMReg FloatRegisterImpl::as_VMReg() { ++ return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr); ++} ++ ++inline bool VMRegImpl::is_Register() { ++ return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; ++} ++ ++inline bool VMRegImpl::is_FloatRegister() { ++ return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; ++} ++ ++inline Register VMRegImpl::as_Register() { ++ ++ assert( is_Register(), "must be"); ++ return ::as_Register(value() >> 1); ++} ++ ++inline FloatRegister VMRegImpl::as_FloatRegister() { ++ assert( is_FloatRegister(), "must be" ); ++ assert( is_even(value()), "must be" ); ++ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1); ++} ++ ++inline bool VMRegImpl::is_concrete() { ++ assert(is_reg(), "must be"); ++ if(is_Register()) return true; ++ if(is_FloatRegister()) return true; ++ assert(false, "what register?"); ++ return false; ++} ++ ++#endif // CPU_LOONGARCH_VM_VMREG_LOONGARCH_INLINE_HPP +diff --git a/hotspot/src/cpu/loongarch/vm/vtableStubs_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/vtableStubs_loongarch_64.cpp +new file mode 100644 +index 0000000000..df0d176b8b +--- /dev/null ++++ b/hotspot/src/cpu/loongarch/vm/vtableStubs_loongarch_64.cpp +@@ -0,0 +1,300 @@ ++/* ++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/vtableStubs.hpp" ++#include "interp_masm_loongarch_64.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/klassVtable.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_loongarch.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++ ++// machine-dependent part of VtableStubs: create VtableStub of correct size and ++// initialize its code ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++#ifndef PRODUCT ++extern "C" void bad_compiled_vtable_index(JavaThread* thread, ++ oop receiver, ++ int index); ++#endif ++ ++// used by compiler only; reciever in T0. ++// used registers : ++// Rmethod : receiver klass & method ++// NOTE: If this code is used by the C1, the receiver_location is always 0. ++// when reach here, receiver in T0, klass in T8 ++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { ++ const int la_code_length = VtableStub::pd_code_size_limit(true); ++ VtableStub* s = new(la_code_length) VtableStub(true, vtable_index); ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), la_code_length); ++ MacroAssembler* masm = new MacroAssembler(&cb); ++ Register t1 = T8, t2 = Rmethod; ++#ifndef PRODUCT ++ if (CountCompiledCalls) { ++ __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); ++ __ ld_w(t1, AT , 0); ++ __ addi_w(t1, t1, 1); ++ __ st_w(t1, AT,0); ++ } ++#endif ++ ++ // get receiver (need to skip return address on top of stack) ++ //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0"); ++ ++ // get receiver klass ++ address npe_addr = __ pc(); ++ __ load_klass(t1, T0); ++ // compute entry offset (in words) ++ int entry_offset = InstanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size(); ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ // check offset vs vtable length ++ __ ld_w(t2, t1, InstanceKlass::vtable_length_offset()*wordSize); ++ assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code"); ++ __ li(AT, vtable_index*vtableEntry::size()); ++ __ blt(AT, t2, L); ++ __ li(A2, vtable_index); ++ __ move(A1, A0); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2); ++ __ bind(L); ++ } ++#endif // PRODUCT ++ // load methodOop and target address ++ const Register method = Rmethod; ++ int offset = entry_offset*wordSize + vtableEntry::method_offset_in_bytes(); ++ if (Assembler::is_simm(offset, 12)) { ++ __ ld_ptr(method, t1, offset); ++ } else { ++ __ li(AT, offset); ++ __ ld_ptr(method, t1, AT); ++ } ++ if (DebugVtables) { ++ Label L; ++ __ beq(method, R0, L); ++ __ ld_d(AT, method,in_bytes(Method::from_compiled_offset())); ++ __ bne(AT, R0, L); ++ __ stop("Vtable entry is NULL"); ++ __ bind(L); ++ } ++ // T8: receiver klass ++ // T0: receiver ++ // Rmethod: methodOop ++ // T4: entry ++ address ame_addr = __ pc(); ++ __ ld_ptr(T4, method,in_bytes(Method::from_compiled_offset())); ++ __ jr(T4); ++ masm->flush(); ++ s->set_exception_points(npe_addr, ame_addr); ++ return s; ++} ++ ++ ++// used registers : ++// T1 T2 ++// when reach here, the receiver in T0, klass in T1 ++VtableStub* VtableStubs::create_itable_stub(int itable_index) { ++ // Note well: pd_code_size_limit is the absolute minimum we can get ++ // away with. If you add code here, bump the code stub size ++ // returned by pd_code_size_limit! ++ const int la_code_length = VtableStub::pd_code_size_limit(false); ++ VtableStub* s = new(la_code_length) VtableStub(false, itable_index); ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), la_code_length); ++ MacroAssembler* masm = new MacroAssembler(&cb); ++ // we T8,T4 as temparary register, they are free from register allocator ++ Register t1 = T8, t2 = T2; ++ // Entry arguments: ++ // T1: Interface ++ // T0: Receiver ++ ++#ifndef PRODUCT ++ if (CountCompiledCalls) { ++ __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); ++ __ ld_w(T8, AT, 0); ++ __ addi_w(T8, T8, 1); ++ __ st_w(T8, AT, 0); ++ } ++#endif /* PRODUCT */ ++ const Register holder_klass_reg = T1; // declaring interface klass (DECC) ++ const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC) ++ const Register icholder_reg = T1; ++ __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset()); ++ __ ld_ptr(holder_klass_reg, icholder_reg, CompiledICHolder::holder_metadata_offset()); ++ ++ // get receiver klass (also an implicit null-check) ++ address npe_addr = __ pc(); ++ __ load_klass(t1, T0); ++ { ++ // x86 use lookup_interface_method, but lookup_interface_method does not work on LoongArch. ++ const int base = InstanceKlass::vtable_start_offset() * wordSize; ++ assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); ++ assert(Assembler::is_simm16(base), "change this code"); ++ __ addi_d(t2, t1, base); ++ assert(Assembler::is_simm16(InstanceKlass::vtable_length_offset() * wordSize), "change this code"); ++ __ ld_w(AT, t1, InstanceKlass::vtable_length_offset() * wordSize); ++ __ alsl_d(t2, AT, t2, Address::times_8 - 1); ++ if (HeapWordsPerLong > 1) { ++ __ round_to(t2, BytesPerLong); ++ } ++ ++ Label hit, entry; ++ assert(Assembler::is_simm16(itableOffsetEntry::size() * wordSize), "change this code"); ++ __ bind(entry); ++ ++#ifdef ASSERT ++ // Check that the entry is non-null ++ if (DebugVtables) { ++ Label L; ++ assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code"); ++ __ ld_w(AT, t1, itableOffsetEntry::interface_offset_in_bytes()); ++ __ bne(AT, R0, L); ++ __ stop("null entry point found in itable's offset table"); ++ __ bind(L); ++ } ++#endif ++ assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code"); ++ __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); ++ __ addi_d(t2, t2, itableOffsetEntry::size() * wordSize); ++ __ bne(AT, resolved_klass_reg, entry); ++ ++ } ++ ++ // add for compressedoops ++ __ load_klass(t1, T0); ++ // compute itable entry offset (in words) ++ const int base = InstanceKlass::vtable_start_offset() * wordSize; ++ assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); ++ assert(Assembler::is_simm16(base), "change this code"); ++ __ addi_d(t2, t1, base); ++ assert(Assembler::is_simm16(InstanceKlass::vtable_length_offset() * wordSize), "change this code"); ++ __ ld_w(AT, t1, InstanceKlass::vtable_length_offset() * wordSize); ++ __ alsl_d(t2, AT, t2, Address::times_8 - 1); ++ if (HeapWordsPerLong > 1) { ++ __ round_to(t2, BytesPerLong); ++ } ++ ++ Label hit, entry; ++ assert(Assembler::is_simm16(itableOffsetEntry::size() * wordSize), "change this code"); ++ __ bind(entry); ++ ++#ifdef ASSERT ++ // Check that the entry is non-null ++ if (DebugVtables) { ++ Label L; ++ assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code"); ++ __ ld_w(AT, t1, itableOffsetEntry::interface_offset_in_bytes()); ++ __ bne(AT, R0, L); ++ __ stop("null entry point found in itable's offset table"); ++ __ bind(L); ++ } ++#endif ++ assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code"); ++ __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); ++ __ addi_d(t2, t2, itableOffsetEntry::size() * wordSize); ++ __ bne(AT, holder_klass_reg, entry); ++ ++ // We found a hit, move offset into T4 ++ __ ld_ptr(t2, t2, itableOffsetEntry::offset_offset_in_bytes() - itableOffsetEntry::size() * wordSize); ++ ++ // Compute itableMethodEntry. ++ const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) + ++ itableMethodEntry::method_offset_in_bytes(); ++ ++ // Get methodOop and entrypoint for compiler ++ const Register method = Rmethod; ++ ++ __ slli_d(AT, t2, Address::times_1); ++ __ add_d(AT, AT, t1 ); ++ if (Assembler::is_simm(method_offset, 12)) { ++ __ ld_ptr(method, AT, method_offset); ++ } else { ++ __ li(t1, method_offset); ++ __ ld_ptr(method, AT, t1); ++ } ++ ++#ifdef ASSERT ++ if (DebugVtables) { ++ Label L1; ++ __ beq(method, R0, L1); ++ __ ld_d(AT, method,in_bytes(Method::from_compiled_offset())); ++ __ bne(AT, R0, L1); ++ __ stop("methodOop is null"); ++ __ bind(L1); ++ } ++#endif // ASSERT ++ ++ // Rmethod: methodOop ++ // T0: receiver ++ // T4: entry point ++ address ame_addr = __ pc(); ++ __ ld_ptr(T4, method,in_bytes(Method::from_compiled_offset())); ++ __ jr(T4); ++ masm->flush(); ++ s->set_exception_points(npe_addr, ame_addr); ++ return s; ++} ++ ++// NOTE : whenever you change the code above, dont forget to change the const here ++int VtableStub::pd_code_size_limit(bool is_vtable_stub) { ++ if (is_vtable_stub) { ++ return ( DebugVtables ? 600 : 28) + (CountCompiledCalls ? 24 : 0)+ ++ (UseCompressedOops ? 16 : 0); ++ } else { ++ return ( DebugVtables ? 636 : 152) + (CountCompiledCalls ? 24 : 0)+ ++ (UseCompressedOops ? 32 : 0); ++ } ++} ++ ++int VtableStub::pd_code_alignment() { ++ return wordSize; ++} +diff --git a/hotspot/src/cpu/mips/vm/assembler_mips.cpp b/hotspot/src/cpu/mips/vm/assembler_mips.cpp +new file mode 100644 +index 0000000000..6c720972ad +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/assembler_mips.cpp +@@ -0,0 +1,774 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "gc_interface/collectedHeap.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/cardTableModRefBS.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/interfaceSupport.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "runtime/os.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#ifndef PRODUCT ++#include "compiler/disassembler.hpp" ++#endif ++#if INCLUDE_ALL_GCS ++#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" ++#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" ++#include "gc_implementation/g1/heapRegion.hpp" ++#endif // INCLUDE_ALL_GCS ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) block_comment(str) ++#define STOP(error) block_comment(error); stop(error) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++// Implementation of AddressLiteral ++ ++AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { ++ _is_lval = false; ++ _target = target; ++ _rspec = rspec_from_rtype(rtype, target); ++} ++ ++// Implementation of Address ++ ++Address Address::make_array(ArrayAddress adr) { ++ AddressLiteral base = adr.base(); ++ Address index = adr.index(); ++ assert(index._disp == 0, "must not have disp"); // maybe it can? ++ Address array(index._base, index._index, index._scale, (intptr_t) base.target()); ++ array._rspec = base._rspec; ++ return array; ++} ++ ++// exceedingly dangerous constructor ++Address::Address(address loc, RelocationHolder spec) { ++ _base = noreg; ++ _index = noreg; ++ _scale = no_scale; ++ _disp = (intptr_t) loc; ++ _rspec = spec; ++} ++ ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of Assembler ++const char *Assembler::ops_name[] = { ++ "special", "regimm", "j", "jal", "beq", "bne", "blez", "bgtz", ++ "addi", "addiu", "slti", "sltiu", "andi", "ori", "xori", "lui", ++ "cop0", "cop1", "cop2", "cop3", "beql", "bnel", "bleql", "bgtzl", ++ "daddi", "daddiu", "ldl", "ldr", "", "", "", "", ++ "lb", "lh", "lwl", "lw", "lbu", "lhu", "lwr", "lwu", ++ "sb", "sh", "swl", "sw", "sdl", "sdr", "swr", "cache", ++ "ll", "lwc1", "", "", "lld", "ldc1", "", "ld", ++ "sc", "swc1", "", "", "scd", "sdc1", "", "sd" ++}; ++ ++const char* Assembler::special_name[] = { ++ "sll", "", "srl", "sra", "sllv", "", "srlv", "srav", ++ "jr", "jalr", "movz", "movn", "syscall", "break", "", "sync", ++ "mfhi", "mthi", "mflo", "mtlo", "dsll", "", "dsrl", "dsra", ++ "mult", "multu", "div", "divu", "dmult", "dmultu", "ddiv", "ddivu", ++ "add", "addu", "sub", "subu", "and", "or", "xor", "nor", ++ "", "", "slt", "sltu", "dadd", "daddu", "dsub", "dsubu", ++ "tge", "tgeu", "tlt", "tltu", "teq", "", "tne", "", ++ "dsll", "", "dsrl", "dsra", "dsll32", "", "dsrl32", "dsra32" ++}; ++ ++const char* Assembler::cop1_name[] = { ++ "add", "sub", "mul", "div", "sqrt", "abs", "mov", "neg", ++ "round.l", "trunc.l", "ceil.l", "floor.l", "round.w", "trunc.w", "ceil.w", "floor.w", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "c.f", "c.un", "c.eq", "c.ueq", "c.olt", "c.ult", "c.ole", "c.ule", ++ "c.sf", "c.ngle", "c.seq", "c.ngl", "c.lt", "c.nge", "c.le", "c.ngt" ++}; ++ ++const char* Assembler::cop1x_name[] = { ++ "lwxc1", "ldxc1", "", "", "", "luxc1", "", "", ++ "swxc1", "sdxc1", "", "", "", "suxc1", "", "prefx", ++ "", "", "", "", "", "", "alnv.ps", "", ++ "", "", "", "", "", "", "", "", ++ "madd.s", "madd.d", "", "", "", "", "madd.ps", "", ++ "msub.s", "msub.d", "", "", "", "", "msub.ps", "", ++ "nmadd.s", "nmadd.d", "", "", "", "", "nmadd.ps", "", ++ "nmsub.s", "nmsub.d", "", "", "", "", "nmsub.ps", "" ++}; ++ ++const char* Assembler::special2_name[] = { ++ "madd", "", "mul", "", "msub", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "gsdmult", "", "", "gsdiv", "gsddiv", "", "", ++ "", "", "", "", "gsmod", "gsdmod", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "" ++}; ++ ++const char* Assembler::special3_name[] = { ++ "ext", "", "", "", "ins", "dinsm", "dinsu", "dins", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "bshfl", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++}; ++ ++const char* Assembler::regimm_name[] = { ++ "bltz", "bgez", "bltzl", "bgezl", "", "", "", "", ++ "tgei", "tgeiu", "tlti", "tltiu", "teqi", "", "tnei", "", ++ "bltzal", "bgezal", "bltzall", "bgezall" ++}; ++ ++const char* Assembler::gs_ldc2_name[] = { ++ "gslbx", "gslhx", "gslwx", "gsldx", "", "", "gslwxc1", "gsldxc1" ++}; ++ ++ ++const char* Assembler::gs_lwc2_name[] = { ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "gslble", "gslbgt", "gslhle", "gslhgt", "gslwle", "gslwgt", "gsldle", "gsldgt", ++ "", "", "", "gslwlec1", "gslwgtc1", "gsldlec1", "gsldgtc1", "",/*LWDIR, LWPTE, LDDIR and LDPTE have the same low 6 bits.*/ ++ "gslq", "" ++}; ++ ++const char* Assembler::gs_sdc2_name[] = { ++ "gssbx", "gsshx", "gsswx", "gssdx", "", "", "gsswxc1", "gssdxc1" ++}; ++ ++const char* Assembler::gs_swc2_name[] = { ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "gssble", "gssbgt", "gsshle", "gsshgt", "gsswle", "gsswgt", "gssdle", "gssdgt", ++ "", "", "", "", "gsswlec1", "gsswgtc1", "gssdlec1", "gssdgtc1", ++ "gssq", "" ++}; ++ ++//misleading name, print only branch/jump instruction ++void Assembler::print_instruction(int inst) { ++ const char *s; ++ switch( opcode(inst) ) { ++ default: ++ s = ops_name[opcode(inst)]; ++ break; ++ case special_op: ++ s = special_name[special(inst)]; ++ break; ++ case regimm_op: ++ s = special_name[rt(inst)]; ++ break; ++ } ++ ++ ::tty->print("%s", s); ++} ++ ++int Assembler::is_int_mask(int x) { ++ int xx = x; ++ int count = 0; ++ ++ while (x != 0) { ++ x &= (x - 1); ++ count++; ++ } ++ ++ if ((1<>2; ++ switch(opcode(inst)) { ++ case j_op: ++ case jal_op: ++ case lui_op: ++ case ori_op: ++ case daddiu_op: ++ ShouldNotReachHere(); ++ break; ++ default: ++ assert(is_simm16(v), "must be simm16"); ++#ifndef PRODUCT ++ if(!is_simm16(v)) ++ { ++ tty->print_cr("must be simm16"); ++ tty->print_cr("Inst: %x", inst); ++ } ++#endif ++ ++ v = low16(v); ++ inst &= 0xffff0000; ++ break; ++ } ++ ++ return inst | v; ++} ++ ++int Assembler::branch_destination(int inst, int pos) { ++ int off; ++ ++ switch(opcode(inst)) { ++ case j_op: ++ case jal_op: ++ assert(false, "should not use j/jal here"); ++ break; ++ default: ++ off = expand(low16(inst), 15); ++ break; ++ } ++ ++ return off ? pos + 4 + (off<<2) : 0; ++} ++ ++int AbstractAssembler::code_fill_byte() { ++ return 0x00; // illegal instruction 0x00000000 ++} ++ ++// Now the Assembler instruction (identical for 32/64 bits) ++ ++void Assembler::lb(Register rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lb(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lbu(Register rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lbu(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ld(Register rt, Address dst){ ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (Assembler::is_simm16(disp)) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ gsldx(src, base, index, disp); ++ } else { ++ dsll(AT, index, scale); ++ gsldx(src, base, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ ld(src, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gsldx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ ld(src, AT, 0); ++ } ++ } else { ++ assert_different_registers(src, AT); ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(src, split_low(disp >> 16)); ++ if (split_low(disp)) ori(src, src, split_low(disp)); ++ if (UseLEXT1) { ++ gsldx(src, AT, src, 0); ++ } else { ++ daddu(AT, AT, src); ++ ld(src, AT, 0); ++ } ++ } ++ } ++ } else { ++ if (Assembler::is_simm16(disp)) { ++ ld(src, base, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gsldx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ ld(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::ldl(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ldl(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ldr(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ldr(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lh(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lh(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lhu(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lhu(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ll(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ll(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lld(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lld(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lw(Register rt, Address dst){ ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (Assembler::is_simm16(disp)) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ gslwx(src, base, index, disp); ++ } else { ++ dsll(AT, index, scale); ++ gslwx(src, base, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ lw(src, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gslwx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ lw(src, AT, 0); ++ } ++ } else { ++ assert_different_registers(src, AT); ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(src, split_low(disp >> 16)); ++ if (split_low(disp)) ori(src, src, split_low(disp)); ++ if (UseLEXT1) { ++ gslwx(src, AT, src, 0); ++ } else { ++ daddu(AT, AT, src); ++ lw(src, AT, 0); ++ } ++ } ++ } ++ } else { ++ if (Assembler::is_simm16(disp)) { ++ lw(src, base, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gslwx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ lw(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::lea(Register rt, Address src) { ++ Register dst = rt; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index == noreg) { ++ if (is_simm16(disp)) { ++ daddiu(dst, base, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(dst, base, AT); ++ } ++ } else { ++ if (scale == 0) { ++ if (is_simm16(disp)) { ++ daddu(AT, base, index); ++ daddiu(dst, AT, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, base, AT); ++ daddu(dst, AT, index); ++ } ++ } else { ++ if (is_simm16(disp)) { ++ dsll(AT, index, scale); ++ daddu(AT, AT, base); ++ daddiu(dst, AT, disp); ++ } else { ++ assert_different_registers(dst, AT); ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ dsll(dst, index, scale); ++ daddu(dst, dst, AT); ++ } ++ } ++ } ++} ++ ++void Assembler::lwl(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwl(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lwr(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwr(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lwu(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwu(rt, src.base(), src.disp()); ++} ++ ++void Assembler::sb(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sb(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sc(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sc(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::scd(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ scd(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sd(Register rt, Address dst) { ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (is_simm16(disp)) { ++ if ( UseLEXT1 && is_simm(disp, 8)) { ++ if (scale == 0) { ++ gssdx(src, base, index, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ dsll(AT, index, scale); ++ gssdx(src, base, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ sd(src, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gssdx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ sd(src, AT, 0); ++ } ++ } else { ++ daddiu(SP, SP, -wordSize); ++ sd(T9, SP, 0); ++ ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(T9, split_low(disp >> 16)); ++ if (split_low(disp)) ori(T9, T9, split_low(disp)); ++ daddu(AT, AT, T9); ++ ld(T9, SP, 0); ++ daddiu(SP, SP, wordSize); ++ sd(src, AT, 0); ++ } ++ } ++ } else { ++ if (is_simm16(disp)) { ++ sd(src, base, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gssdx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ sd(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::sdl(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sdl(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sdr(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sdr(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sh(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sh(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sw(Register rt, Address dst) { ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if ( Assembler::is_simm16(disp) ) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ gsswx(src, base, index, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ dsll(AT, index, scale); ++ gsswx(src, base, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ sw(src, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gsswx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ sw(src, AT, 0); ++ } ++ } else { ++ daddiu(SP, SP, -wordSize); ++ sd(T9, SP, 0); ++ ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(T9, split_low(disp >> 16)); ++ if (split_low(disp)) ori(T9, T9, split_low(disp)); ++ daddu(AT, AT, T9); ++ ld(T9, SP, 0); ++ daddiu(SP, SP, wordSize); ++ sw(src, AT, 0); ++ } ++ } ++ } else { ++ if (Assembler::is_simm16(disp)) { ++ sw(src, base, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gsswx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ sw(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::swl(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ swl(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::swr(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ swr(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::lwc1(FloatRegister rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwc1(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ldc1(FloatRegister rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ldc1(rt, src.base(), src.disp()); ++} ++ ++void Assembler::swc1(FloatRegister rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ swc1(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sdc1(FloatRegister rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sdc1(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::j(address entry) { ++ int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2; ++ emit_long((j_op<<26) | dest); ++ has_delay_slot(); ++} ++ ++void Assembler::jal(address entry) { ++ int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2; ++ emit_long((jal_op<<26) | dest); ++ has_delay_slot(); ++} ++ ++void Assembler::emit_long(int x) { // shadows AbstractAssembler::emit_long ++ check_delay(); ++ AbstractAssembler::emit_int32(x); ++} ++ ++inline void Assembler::emit_data(int x) { emit_long(x); } ++inline void Assembler::emit_data(int x, relocInfo::relocType rtype) { ++ relocate(rtype); ++ emit_long(x); ++} ++ ++inline void Assembler::emit_data(int x, RelocationHolder const& rspec) { ++ relocate(rspec); ++ emit_long(x); ++} ++ ++inline void Assembler::check_delay() { ++#ifdef CHECK_DELAY ++ guarantee(delay_state != at_delay_slot, "must say delayed() when filling delay slot"); ++ delay_state = no_delay; ++#endif ++} +diff --git a/hotspot/src/cpu/mips/vm/assembler_mips.hpp b/hotspot/src/cpu/mips/vm/assembler_mips.hpp +new file mode 100644 +index 0000000000..e91b9db222 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/assembler_mips.hpp +@@ -0,0 +1,1789 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_HPP ++#define CPU_MIPS_VM_ASSEMBLER_MIPS_HPP ++ ++#include "asm/register.hpp" ++ ++class BiasedLockingCounters; ++ ++ ++// Note: A register location is represented via a Register, not ++// via an address for efficiency & simplicity reasons. ++ ++class ArrayAddress; ++ ++class Address VALUE_OBJ_CLASS_SPEC { ++ public: ++ enum ScaleFactor { ++ no_scale = -1, ++ times_1 = 0, ++ times_2 = 1, ++ times_4 = 2, ++ times_8 = 3, ++ times_ptr = times_8 ++ }; ++ static ScaleFactor times(int size) { ++ assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size"); ++ if (size == 8) return times_8; ++ if (size == 4) return times_4; ++ if (size == 2) return times_2; ++ return times_1; ++ } ++ ++ private: ++ Register _base; ++ Register _index; ++ ScaleFactor _scale; ++ int _disp; ++ RelocationHolder _rspec; ++ ++ // Easily misused constructors make them private ++ Address(address loc, RelocationHolder spec); ++ Address(int disp, address loc, relocInfo::relocType rtype); ++ Address(int disp, address loc, RelocationHolder spec); ++ ++ public: ++ ++ // creation ++ Address() ++ : _base(noreg), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(0) { ++ } ++ ++ // No default displacement otherwise Register can be implicitly ++ // converted to 0(Register) which is quite a different animal. ++ ++ Address(Register base, int disp = 0) ++ : _base(base), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(disp) { ++ assert_different_registers(_base, AT); ++ } ++ ++ Address(Register base, Register index, ScaleFactor scale, int disp = 0) ++ : _base (base), ++ _index(index), ++ _scale(scale), ++ _disp (disp) { ++ assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); ++ assert_different_registers(_base, _index, AT); ++ } ++ ++ // The following two overloads are used in connection with the ++ // ByteSize type (see sizes.hpp). They simplify the use of ++ // ByteSize'd arguments in assembly code. Note that their equivalent ++ // for the optimized build are the member functions with int disp ++ // argument since ByteSize is mapped to an int type in that case. ++ // ++ // Note: DO NOT introduce similar overloaded functions for WordSize ++ // arguments as in the optimized mode, both ByteSize and WordSize ++ // are mapped to the same type and thus the compiler cannot make a ++ // distinction anymore (=> compiler errors). ++ ++#ifdef ASSERT ++ Address(Register base, ByteSize disp) ++ : _base(base), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(in_bytes(disp)) { ++ assert_different_registers(_base, AT); ++ } ++ ++ Address(Register base, Register index, ScaleFactor scale, ByteSize disp) ++ : _base(base), ++ _index(index), ++ _scale(scale), ++ _disp(in_bytes(disp)) { ++ assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); ++ assert_different_registers(_base, _index, AT); ++ } ++#endif // ASSERT ++ ++ // accessors ++ bool uses(Register reg) const { return _base == reg || _index == reg; } ++ Register base() const { return _base; } ++ Register index() const { return _index; } ++ ScaleFactor scale() const { return _scale; } ++ int disp() const { return _disp; } ++ ++ static Address make_array(ArrayAddress); ++ ++ friend class Assembler; ++ friend class MacroAssembler; ++ friend class LIR_Assembler; // base/index/scale/disp ++}; ++ ++// Calling convention ++class Argument VALUE_OBJ_CLASS_SPEC { ++ private: ++ int _number; ++ public: ++ enum { ++ n_register_parameters = 8, // 8 integer registers used to pass parameters ++ n_float_register_parameters = 8 // 8 float registers used to pass parameters ++ }; ++ ++ Argument(int number):_number(number){ } ++ Argument successor() {return Argument(number() + 1);} ++ ++ int number()const {return _number;} ++ bool is_Register()const {return _number < n_register_parameters;} ++ bool is_FloatRegister()const {return _number < n_float_register_parameters;} ++ ++ Register as_Register()const { ++ assert(is_Register(), "must be a register argument"); ++ return ::as_Register(RA0->encoding() + _number); ++ } ++ FloatRegister as_FloatRegister()const { ++ assert(is_FloatRegister(), "must be a float register argument"); ++ return ::as_FloatRegister(F12->encoding() + _number); ++ } ++ ++ Address as_caller_address()const {return Address(SP, (number() - n_register_parameters) * wordSize);} ++}; ++ ++// ++// AddressLiteral has been split out from Address because operands of this type ++// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out ++// the few instructions that need to deal with address literals are unique and the ++// MacroAssembler does not have to implement every instruction in the Assembler ++// in order to search for address literals that may need special handling depending ++// on the instruction and the platform. As small step on the way to merging i486/amd64 ++// directories. ++// ++class AddressLiteral VALUE_OBJ_CLASS_SPEC { ++ friend class ArrayAddress; ++ RelocationHolder _rspec; ++ // Typically we use AddressLiterals we want to use their rval ++ // However in some situations we want the lval (effect address) of the item. ++ // We provide a special factory for making those lvals. ++ bool _is_lval; ++ ++ // If the target is far we'll need to load the ea of this to ++ // a register to reach it. Otherwise if near we can do rip ++ // relative addressing. ++ ++ address _target; ++ ++ protected: ++ // creation ++ AddressLiteral() ++ : _is_lval(false), ++ _target(NULL) ++ {} ++ ++ public: ++ ++ ++ AddressLiteral(address target, relocInfo::relocType rtype); ++ ++ AddressLiteral(address target, RelocationHolder const& rspec) ++ : _rspec(rspec), ++ _is_lval(false), ++ _target(target) ++ {} ++ // 32-bit complains about a multiple declaration for int*. ++ AddressLiteral(intptr_t* addr, relocInfo::relocType rtype = relocInfo::none) ++ : _target((address) addr), ++ _rspec(rspec_from_rtype(rtype, (address) addr)) {} ++ ++ AddressLiteral addr() { ++ AddressLiteral ret = *this; ++ ret._is_lval = true; ++ return ret; ++ } ++ ++ ++ private: ++ ++ address target() { return _target; } ++ bool is_lval() { return _is_lval; } ++ ++ relocInfo::relocType reloc() const { return _rspec.type(); } ++ const RelocationHolder& rspec() const { return _rspec; } ++ ++ friend class Assembler; ++ friend class MacroAssembler; ++ friend class Address; ++ friend class LIR_Assembler; ++ RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { ++ switch (rtype) { ++ case relocInfo::external_word_type: ++ return external_word_Relocation::spec(addr); ++ case relocInfo::internal_word_type: ++ return internal_word_Relocation::spec(addr); ++ case relocInfo::opt_virtual_call_type: ++ return opt_virtual_call_Relocation::spec(); ++ case relocInfo::static_call_type: ++ return static_call_Relocation::spec(); ++ case relocInfo::runtime_call_type: ++ return runtime_call_Relocation::spec(); ++ case relocInfo::poll_type: ++ case relocInfo::poll_return_type: ++ return Relocation::spec_simple(rtype); ++ case relocInfo::none: ++ case relocInfo::oop_type: ++ // Oops are a special case. Normally they would be their own section ++ // but in cases like icBuffer they are literals in the code stream that ++ // we don't have a section for. We use none so that we get a literal address ++ // which is always patchable. ++ return RelocationHolder(); ++ default: ++ ShouldNotReachHere(); ++ return RelocationHolder(); ++ } ++ } ++ ++}; ++ ++// Convience classes ++class RuntimeAddress: public AddressLiteral { ++ ++ public: ++ ++ RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {} ++ ++}; ++ ++class OopAddress: public AddressLiteral { ++ ++ public: ++ ++ OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){} ++ ++}; ++ ++class ExternalAddress: public AddressLiteral { ++ ++ public: ++ ++ ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){} ++ ++}; ++ ++class InternalAddress: public AddressLiteral { ++ ++ public: ++ ++ InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {} ++ ++}; ++ ++// x86 can do array addressing as a single operation since disp can be an absolute ++// address amd64 can't. We create a class that expresses the concept but does extra ++// magic on amd64 to get the final result ++ ++class ArrayAddress VALUE_OBJ_CLASS_SPEC { ++ private: ++ ++ AddressLiteral _base; ++ Address _index; ++ ++ public: ++ ++ ArrayAddress() {}; ++ ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {}; ++ AddressLiteral base() { return _base; } ++ Address index() { return _index; } ++ ++}; ++ ++const int FPUStateSizeInWords = 512 / wordSize; ++ ++// The MIPS LOONGSON Assembler: Pure assembler doing NO optimizations on the instruction ++// level ; i.e., what you write is what you get. The Assembler is generating code into ++// a CodeBuffer. ++ ++class Assembler : public AbstractAssembler { ++ friend class AbstractAssembler; // for the non-virtual hack ++ friend class LIR_Assembler; // as_Address() ++ friend class StubGenerator; ++ ++ public: ++ enum Condition { ++ zero , ++ notZero , ++ equal , ++ notEqual , ++ less , ++ lessEqual , ++ greater , ++ greaterEqual , ++ below , ++ belowEqual , ++ above , ++ aboveEqual ++ }; ++ ++ static const int LogInstructionSize = 2; ++ static const int InstructionSize = 1 << LogInstructionSize; ++ ++ // opcode, highest 6 bits: bits[31...26] ++ enum ops { ++ special_op = 0x00, // special_ops ++ regimm_op = 0x01, // regimm_ops ++ j_op = 0x02, ++ jal_op = 0x03, ++ beq_op = 0x04, ++ bne_op = 0x05, ++ blez_op = 0x06, ++ bgtz_op = 0x07, ++ addiu_op = 0x09, ++ slti_op = 0x0a, ++ sltiu_op = 0x0b, ++ andi_op = 0x0c, ++ ori_op = 0x0d, ++ xori_op = 0x0e, ++ lui_op = 0x0f, ++ cop0_op = 0x10, // cop0_ops ++ cop1_op = 0x11, // cop1_ops ++ gs_cop2_op = 0x12, // gs_cop2_ops ++ cop1x_op = 0x13, // cop1x_ops ++ beql_op = 0x14, ++ bnel_op = 0x15, ++ blezl_op = 0x16, ++ bgtzl_op = 0x17, ++ daddiu_op = 0x19, ++ ldl_op = 0x1a, ++ ldr_op = 0x1b, ++ special2_op = 0x1c, // special2_ops ++ msa_op = 0x1e, // msa_ops ++ special3_op = 0x1f, // special3_ops ++ lb_op = 0x20, ++ lh_op = 0x21, ++ lwl_op = 0x22, ++ lw_op = 0x23, ++ lbu_op = 0x24, ++ lhu_op = 0x25, ++ lwr_op = 0x26, ++ lwu_op = 0x27, ++ sb_op = 0x28, ++ sh_op = 0x29, ++ swl_op = 0x2a, ++ sw_op = 0x2b, ++ sdl_op = 0x2c, ++ sdr_op = 0x2d, ++ swr_op = 0x2e, ++ cache_op = 0x2f, ++ ll_op = 0x30, ++ lwc1_op = 0x31, ++ gs_lwc2_op = 0x32, //gs_lwc2_ops ++ pref_op = 0x33, ++ lld_op = 0x34, ++ ldc1_op = 0x35, ++ gs_ldc2_op = 0x36, //gs_ldc2_ops ++ ld_op = 0x37, ++ sc_op = 0x38, ++ swc1_op = 0x39, ++ gs_swc2_op = 0x3a, //gs_swc2_ops ++ scd_op = 0x3c, ++ sdc1_op = 0x3d, ++ gs_sdc2_op = 0x3e, //gs_sdc2_ops ++ sd_op = 0x3f ++ }; ++ ++ static const char *ops_name[]; ++ ++ //special family, the opcode is in low 6 bits. ++ enum special_ops { ++ sll_op = 0x00, ++ movci_op = 0x01, ++ srl_op = 0x02, ++ sra_op = 0x03, ++ sllv_op = 0x04, ++ srlv_op = 0x06, ++ srav_op = 0x07, ++ jr_op = 0x08, ++ jalr_op = 0x09, ++ movz_op = 0x0a, ++ movn_op = 0x0b, ++ syscall_op = 0x0c, ++ break_op = 0x0d, ++ sync_op = 0x0f, ++ mfhi_op = 0x10, ++ mthi_op = 0x11, ++ mflo_op = 0x12, ++ mtlo_op = 0x13, ++ dsllv_op = 0x14, ++ dsrlv_op = 0x16, ++ dsrav_op = 0x17, ++ mult_op = 0x18, ++ multu_op = 0x19, ++ div_op = 0x1a, ++ divu_op = 0x1b, ++ dmult_op = 0x1c, ++ dmultu_op = 0x1d, ++ ddiv_op = 0x1e, ++ ddivu_op = 0x1f, ++ addu_op = 0x21, ++ subu_op = 0x23, ++ and_op = 0x24, ++ or_op = 0x25, ++ xor_op = 0x26, ++ nor_op = 0x27, ++ slt_op = 0x2a, ++ sltu_op = 0x2b, ++ daddu_op = 0x2d, ++ dsubu_op = 0x2f, ++ tge_op = 0x30, ++ tgeu_op = 0x31, ++ tlt_op = 0x32, ++ tltu_op = 0x33, ++ teq_op = 0x34, ++ tne_op = 0x36, ++ dsll_op = 0x38, ++ dsrl_op = 0x3a, ++ dsra_op = 0x3b, ++ dsll32_op = 0x3c, ++ dsrl32_op = 0x3e, ++ dsra32_op = 0x3f ++ }; ++ ++ static const char* special_name[]; ++ ++ //regimm family, the opcode is in rt[16...20], 5 bits ++ enum regimm_ops { ++ bltz_op = 0x00, ++ bgez_op = 0x01, ++ bltzl_op = 0x02, ++ bgezl_op = 0x03, ++ tgei_op = 0x08, ++ tgeiu_op = 0x09, ++ tlti_op = 0x0a, ++ tltiu_op = 0x0b, ++ teqi_op = 0x0c, ++ tnei_op = 0x0e, ++ bltzal_op = 0x10, ++ bgezal_op = 0x11, ++ bltzall_op = 0x12, ++ bgezall_op = 0x13, ++ bposge32_op = 0x1c, ++ bposge64_op = 0x1d, ++ synci_op = 0x1f, ++ }; ++ ++ static const char* regimm_name[]; ++ ++ //cop0 family, the ops is in bits[25...21], 5 bits ++ enum cop0_ops { ++ mfc0_op = 0x00, ++ dmfc0_op = 0x01, ++ // ++ mxgc0_op = 0x03, //MFGC0, DMFGC0, MTGC0 ++ mtc0_op = 0x04, ++ dmtc0_op = 0x05, ++ rdpgpr_op = 0x0a, ++ inter_op = 0x0b, ++ wrpgpr_op = 0x0c ++ }; ++ ++ //cop1 family, the ops is in bits[25...21], 5 bits ++ enum cop1_ops { ++ mfc1_op = 0x00, ++ dmfc1_op = 0x01, ++ cfc1_op = 0x02, ++ mfhc1_op = 0x03, ++ mtc1_op = 0x04, ++ dmtc1_op = 0x05, ++ ctc1_op = 0x06, ++ mthc1_op = 0x07, ++ bc1f_op = 0x08, ++ single_fmt = 0x10, ++ double_fmt = 0x11, ++ word_fmt = 0x14, ++ long_fmt = 0x15, ++ ps_fmt = 0x16 ++ }; ++ ++ ++ //2 bist (bits[17...16]) of bc1x instructions (cop1) ++ enum bc_ops { ++ bcf_op = 0x0, ++ bct_op = 0x1, ++ bcfl_op = 0x2, ++ bctl_op = 0x3, ++ }; ++ ++ // low 6 bits of c_x_fmt instructions (cop1) ++ enum c_conds { ++ f_cond = 0x30, ++ un_cond = 0x31, ++ eq_cond = 0x32, ++ ueq_cond = 0x33, ++ olt_cond = 0x34, ++ ult_cond = 0x35, ++ ole_cond = 0x36, ++ ule_cond = 0x37, ++ sf_cond = 0x38, ++ ngle_cond = 0x39, ++ seq_cond = 0x3a, ++ ngl_cond = 0x3b, ++ lt_cond = 0x3c, ++ nge_cond = 0x3d, ++ le_cond = 0x3e, ++ ngt_cond = 0x3f ++ }; ++ ++ // low 6 bits of cop1 instructions ++ enum float_ops { ++ fadd_op = 0x00, ++ fsub_op = 0x01, ++ fmul_op = 0x02, ++ fdiv_op = 0x03, ++ fsqrt_op = 0x04, ++ fabs_op = 0x05, ++ fmov_op = 0x06, ++ fneg_op = 0x07, ++ froundl_op = 0x08, ++ ftruncl_op = 0x09, ++ fceill_op = 0x0a, ++ ffloorl_op = 0x0b, ++ froundw_op = 0x0c, ++ ftruncw_op = 0x0d, ++ fceilw_op = 0x0e, ++ ffloorw_op = 0x0f, ++ movf_f_op = 0x11, ++ movt_f_op = 0x11, ++ movz_f_op = 0x12, ++ movn_f_op = 0x13, ++ frecip_op = 0x15, ++ frsqrt_op = 0x16, ++ fcvts_op = 0x20, ++ fcvtd_op = 0x21, ++ fcvtw_op = 0x24, ++ fcvtl_op = 0x25, ++ fcvtps_op = 0x26, ++ fcvtspl_op = 0x28, ++ fpll_op = 0x2c, ++ fplu_op = 0x2d, ++ fpul_op = 0x2e, ++ fpuu_op = 0x2f ++ }; ++ ++ static const char* cop1_name[]; ++ ++ //cop1x family, the opcode is in low 6 bits. ++ enum cop1x_ops { ++ lwxc1_op = 0x00, ++ ldxc1_op = 0x01, ++ luxc1_op = 0x05, ++ swxc1_op = 0x08, ++ sdxc1_op = 0x09, ++ suxc1_op = 0x0d, ++ prefx_op = 0x0f, ++ ++ alnv_ps_op = 0x1e, ++ madd_s_op = 0x20, ++ madd_d_op = 0x21, ++ madd_ps_op = 0x26, ++ msub_s_op = 0x28, ++ msub_d_op = 0x29, ++ msub_ps_op = 0x2e, ++ nmadd_s_op = 0x30, ++ nmadd_d_op = 0x31, ++ nmadd_ps_op = 0x36, ++ nmsub_s_op = 0x38, ++ nmsub_d_op = 0x39, ++ nmsub_ps_op = 0x3e ++ }; ++ ++ static const char* cop1x_name[]; ++ ++ //special2 family, the opcode is in low 6 bits. ++ enum special2_ops { ++ madd_op = 0x00, ++ maddu_op = 0x01, ++ mul_op = 0x02, ++ gs0x03_op = 0x03, ++ msub_op = 0x04, ++ msubu_op = 0x05, ++ gs0x06_op = 0x06, ++ gsemul2_op = 0x07, ++ gsemul3_op = 0x08, ++ gsemul4_op = 0x09, ++ gsemul5_op = 0x0a, ++ gsemul6_op = 0x0b, ++ gsemul7_op = 0x0c, ++ gsemul8_op = 0x0d, ++ gsemul9_op = 0x0e, ++ gsemul10_op = 0x0f, ++ gsmult_op = 0x10, ++ gsdmult_op = 0x11, ++ gsmultu_op = 0x12, ++ gsdmultu_op = 0x13, ++ gsdiv_op = 0x14, ++ gsddiv_op = 0x15, ++ gsdivu_op = 0x16, ++ gsddivu_op = 0x17, ++ gsmod_op = 0x1c, ++ gsdmod_op = 0x1d, ++ gsmodu_op = 0x1e, ++ gsdmodu_op = 0x1f, ++ clz_op = 0x20, ++ clo_op = 0x21, ++ xctx_op = 0x22, //ctz, cto, dctz, dcto, gsX ++ gsrxr_x_op = 0x23, //gsX ++ dclz_op = 0x24, ++ dclo_op = 0x25, ++ gsle_op = 0x26, ++ gsgt_op = 0x27, ++ gs86j_op = 0x28, ++ gsloop_op = 0x29, ++ gsaj_op = 0x2a, ++ gsldpc_op = 0x2b, ++ gs86set_op = 0x30, ++ gstm_op = 0x31, ++ gscvt_ld_op = 0x32, ++ gscvt_ud_op = 0x33, ++ gseflag_op = 0x34, ++ gscam_op = 0x35, ++ gstop_op = 0x36, ++ gssettag_op = 0x37, ++ gssdbbp_op = 0x38 ++ }; ++ ++ static const char* special2_name[]; ++ ++ // special3 family, the opcode is in low 6 bits. ++ enum special3_ops { ++ ext_op = 0x00, ++ dextm_op = 0x01, ++ dextu_op = 0x02, ++ dext_op = 0x03, ++ ins_op = 0x04, ++ dinsm_op = 0x05, ++ dinsu_op = 0x06, ++ dins_op = 0x07, ++ lxx_op = 0x0a, //lwx, lhx, lbux, ldx ++ insv_op = 0x0c, ++ dinsv_op = 0x0d, ++ ar1_op = 0x10, //MIPS DSP ++ cmp1_op = 0x11, //MIPS DSP ++ re1_op = 0x12, //MIPS DSP, re1_ops ++ sh1_op = 0x13, //MIPS DSP ++ ar2_op = 0x14, //MIPS DSP ++ cmp2_op = 0x15, //MIPS DSP ++ re2_op = 0x16, //MIPS DSP, re2_ops ++ sh2_op = 0x17, //MIPS DSP ++ ar3_op = 0x18, //MIPS DSP ++ bshfl_op = 0x20 //seb, seh ++ }; ++ ++ // re1_ops ++ enum re1_ops { ++ absq_s_qb_op = 0x01, ++ repl_qb_op = 0x02, ++ replv_qb_op = 0x03, ++ absq_s_ph_op = 0x09, ++ repl_ph_op = 0x0a, ++ replv_ph_op = 0x0b, ++ absq_s_w_op = 0x11, ++ bitrev_op = 0x1b ++ }; ++ ++ // re2_ops ++ enum re2_ops { ++ repl_ob_op = 0x02, ++ replv_ob_op = 0x03, ++ absq_s_qh_op = 0x09, ++ repl_qh_op = 0x0a, ++ replv_qh_op = 0x0b, ++ absq_s_pw_op = 0x11, ++ repl_pw_op = 0x12, ++ replv_pw_op = 0x13, ++ }; ++ ++ static const char* special3_name[]; ++ ++ // lwc2/gs_lwc2 family, the opcode is in low 6 bits. ++ enum gs_lwc2_ops { ++ gslble_op = 0x10, ++ gslbgt_op = 0x11, ++ gslhle_op = 0x12, ++ gslhgt_op = 0x13, ++ gslwle_op = 0x14, ++ gslwgt_op = 0x15, ++ gsldle_op = 0x16, ++ gsldgt_op = 0x17, ++ gslwlec1_op = 0x1c, ++ gslwgtc1_op = 0x1d, ++ gsldlec1_op = 0x1e, ++ gsldgtc1_op = 0x1f, ++ gslq_op = 0x20 ++ }; ++ ++ static const char* gs_lwc2_name[]; ++ ++ // ldc2/gs_ldc2 family, the opcode is in low 3 bits. ++ enum gs_ldc2_ops { ++ gslbx_op = 0x0, ++ gslhx_op = 0x1, ++ gslwx_op = 0x2, ++ gsldx_op = 0x3, ++ gslwxc1_op = 0x6, ++ gsldxc1_op = 0x7 ++ }; ++ ++ static const char* gs_ldc2_name[]; ++ ++ // swc2/gs_swc2 family, the opcode is in low 6 bits. ++ enum gs_swc2_ops { ++ gssble_op = 0x10, ++ gssbgt_op = 0x11, ++ gsshle_op = 0x12, ++ gsshgt_op = 0x13, ++ gsswle_op = 0x14, ++ gsswgt_op = 0x15, ++ gssdle_op = 0x16, ++ gssdgt_op = 0x17, ++ gsswlec1_op = 0x1c, ++ gsswgtc1_op = 0x1d, ++ gssdlec1_op = 0x1e, ++ gssdgtc1_op = 0x1f, ++ gssq_op = 0x20 ++ }; ++ ++ static const char* gs_swc2_name[]; ++ ++ // sdc2/gs_sdc2 family, the opcode is in low 3 bits. ++ enum gs_sdc2_ops { ++ gssbx_op = 0x0, ++ gsshx_op = 0x1, ++ gsswx_op = 0x2, ++ gssdx_op = 0x3, ++ gsswxc1_op = 0x6, ++ gssdxc1_op = 0x7 ++ }; ++ ++ static const char* gs_sdc2_name[]; ++ ++ enum WhichOperand { ++ // input to locate_operand, and format code for relocations ++ imm_operand = 0, // embedded 32-bit|64-bit immediate operand ++ disp32_operand = 1, // embedded 32-bit displacement or address ++ call32_operand = 2, // embedded 32-bit self-relative displacement ++ narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop ++ _WhichOperand_limit = 4 ++ }; ++ ++ static int opcode(int insn) { return (insn>>26)&0x3f; } ++ static int rs(int insn) { return (insn>>21)&0x1f; } ++ static int rt(int insn) { return (insn>>16)&0x1f; } ++ static int rd(int insn) { return (insn>>11)&0x1f; } ++ static int sa(int insn) { return (insn>>6)&0x1f; } ++ static int special(int insn) { return insn&0x3f; } ++ static int imm_off(int insn) { return (short)low16(insn); } ++ ++ static int low (int x, int l) { return bitfield(x, 0, l); } ++ static int low16(int x) { return low(x, 16); } ++ static int low26(int x) { return low(x, 26); } ++ ++ protected: ++ //help methods for instruction ejection ++ ++ // I-Type (Immediate) ++ // 31 26 25 21 20 16 15 0 ++ //| opcode | rs | rt | immediat | ++ //| | | | | ++ // 6 5 5 16 ++ static int insn_ORRI(int op, int rs, int rt, int imm) { assert(is_simm16(imm), "not a signed 16-bit int"); return (op<<26) | (rs<<21) | (rt<<16) | low16(imm); } ++ ++ // R-Type (Register) ++ // 31 26 25 21 20 16 15 11 10 6 5 0 ++ //| special | rs | rt | rd | 0 | opcode | ++ //| 0 0 0 0 0 0 | | | | 0 0 0 0 0 | | ++ // 6 5 5 5 5 6 ++ static int insn_RRRO(int rs, int rt, int rd, int op) { return (rs<<21) | (rt<<16) | (rd<<11) | op; } ++ static int insn_RRSO(int rt, int rd, int sa, int op) { return (rt<<16) | (rd<<11) | (sa<<6) | op; } ++ static int insn_RRCO(int rs, int rt, int code, int op) { return (rs<<21) | (rt<<16) | (code<<6) | op; } ++ ++ static int insn_COP0(int op, int rt, int rd) { return (cop0_op<<26) | (op<<21) | (rt<<16) | (rd<<11); } ++ static int insn_COP1(int op, int rt, int fs) { return (cop1_op<<26) | (op<<21) | (rt<<16) | (fs<<11); } ++ ++ static int insn_F3RO(int fmt, int ft, int fs, int fd, int func) { ++ return (cop1_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func; ++ } ++ static int insn_F3ROX(int fmt, int ft, int fs, int fd, int func) { ++ return (cop1x_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func; ++ } ++ ++ static int high (int x, int l) { return bitfield(x, 32-l, l); } ++ static int high16(int x) { return high(x, 16); } ++ static int high6 (int x) { return high(x, 6); } ++ ++ //get the offset field of jump/branch instruction ++ int offset(address entry) { ++ assert(is_simm16((entry - pc() - 4) / 4), "change this code"); ++ if (!is_simm16((entry - pc() - 4) / 4)) { ++ tty->print_cr("!!! is_simm16: %lx", (entry - pc() - 4) / 4); ++ } ++ return (entry - pc() - 4) / 4; ++ } ++ ++ ++public: ++ using AbstractAssembler::offset; ++ ++ //sign expand with the sign bit is h ++ static int expand(int x, int h) { return -(x & (1<> 16; ++ } ++ ++ static int split_high(int x) { ++ return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff; ++ } ++ ++ static int merge(int low, int high) { ++ return expand(low, 15) + (high<<16); ++ } ++ ++ static intptr_t merge(intptr_t x0, intptr_t x16, intptr_t x32, intptr_t x48) { ++ return (x48 << 48) | (x32 << 32) | (x16 << 16) | x0; ++ } ++ ++ // Test if x is within signed immediate range for nbits. ++ static bool is_simm (int x, int nbits) { ++ assert(0 < nbits && nbits < 32, "out of bounds"); ++ const int min = -( ((int)1) << nbits-1 ); ++ const int maxplus1 = ( ((int)1) << nbits-1 ); ++ return min <= x && x < maxplus1; ++ } ++ ++ static bool is_simm(jlong x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 64, "out of bounds"); ++ const jlong min = -( ((jlong)1) << nbits-1 ); ++ const jlong maxplus1 = ( ((jlong)1) << nbits-1 ); ++ return min <= x && x < maxplus1; ++ } ++ ++ // Test if x is within unsigned immediate range for nbits ++ static bool is_uimm(int x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 32, "out of bounds"); ++ const int maxplus1 = ( ((int)1) << nbits ); ++ return 0 <= x && x < maxplus1; ++ } ++ ++ static bool is_uimm(jlong x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 64, "out of bounds"); ++ const jlong maxplus1 = ( ((jlong)1) << nbits ); ++ return 0 <= x && x < maxplus1; ++ } ++ ++ static bool is_simm16(int x) { return is_simm(x, 16); } ++ static bool is_simm16(long x) { return is_simm((jlong)x, (unsigned int)16); } ++ ++ static bool fit_in_jal(address target, address pc) { ++ intptr_t mask = 0xfffffffff0000000; ++ return ((intptr_t)(pc + 4) & mask) == ((intptr_t)target & mask); ++ } ++ ++ bool fit_int_branch(address entry) { ++ return is_simm16(offset(entry)); ++ } ++ ++protected: ++#ifdef ASSERT ++ #define CHECK_DELAY ++#endif ++#ifdef CHECK_DELAY ++ enum Delay_state { no_delay, at_delay_slot, filling_delay_slot } delay_state; ++#endif ++ ++public: ++ void assert_not_delayed() { ++#ifdef CHECK_DELAY ++ assert_not_delayed("next instruction should not be a delay slot"); ++#endif ++ } ++ ++ void assert_not_delayed(const char* msg) { ++#ifdef CHECK_DELAY ++ assert(delay_state == no_delay, msg); ++#endif ++ } ++ ++protected: ++ // Delay slot helpers ++ // cti is called when emitting control-transfer instruction, ++ // BEFORE doing the emitting. ++ // Only effective when assertion-checking is enabled. ++ ++ // called when emitting cti with a delay slot, AFTER emitting ++ void has_delay_slot() { ++#ifdef CHECK_DELAY ++ assert_not_delayed("just checking"); ++ delay_state = at_delay_slot; ++#endif ++ } ++ ++public: ++ Assembler* delayed() { ++#ifdef CHECK_DELAY ++ guarantee( delay_state == at_delay_slot, "delayed instructition is not in delay slot"); ++ delay_state = filling_delay_slot; ++#endif ++ return this; ++ } ++ ++ void flush() { ++#ifdef CHECK_DELAY ++ guarantee( delay_state == no_delay, "ending code with a delay slot"); ++#endif ++ AbstractAssembler::flush(); ++ } ++ ++ void emit_long(int); // shadows AbstractAssembler::emit_long ++ void emit_data(int); ++ void emit_data(int, RelocationHolder const&); ++ void emit_data(int, relocInfo::relocType rtype); ++ void check_delay(); ++ ++ ++ // Generic instructions ++ // Does 32bit or 64bit as needed for the platform. In some sense these ++ // belong in macro assembler but there is no need for both varieties to exist ++ ++ void addu32(Register rd, Register rs, Register rt){ emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), addu_op)); } ++ void addiu32(Register rt, Register rs, int imm) { emit_long(insn_ORRI(addiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void addiu(Register rt, Register rs, int imm) { daddiu (rt, rs, imm);} ++ void addu(Register rd, Register rs, Register rt) { daddu (rd, rs, rt); } ++ ++ void andr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), and_op)); } ++ void andi(Register rt, Register rs, int imm) { emit_long(insn_ORRI(andi_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } ++ ++ void beq (Register rs, Register rt, int off) { emit_long(insn_ORRI(beq_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ void beql (Register rs, Register rt, int off) { emit_long(insn_ORRI(beql_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ void bgez (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgez_op, off)); has_delay_slot(); } ++ void bgezal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezal_op, off)); has_delay_slot(); } ++ void bgezall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezall_op, off)); has_delay_slot(); } ++ void bgezl (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezl_op, off)); has_delay_slot(); } ++ void bgtz (Register rs, int off) { emit_long(insn_ORRI(bgtz_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void bgtzl (Register rs, int off) { emit_long(insn_ORRI(bgtzl_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void blez (Register rs, int off) { emit_long(insn_ORRI(blez_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void blezl (Register rs, int off) { emit_long(insn_ORRI(blezl_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void bltz (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltz_op, off)); has_delay_slot(); } ++ void bltzal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzal_op, off)); has_delay_slot(); } ++ void bltzall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzall_op, off)); has_delay_slot(); } ++ void bltzl (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzl_op, off)); has_delay_slot(); } ++ void bne (Register rs, Register rt, int off) { emit_long(insn_ORRI(bne_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ void bnel (Register rs, Register rt, int off) { emit_long(insn_ORRI(bnel_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ // two versions of brk: ++ // the brk(code) version is according to MIPS64 Architecture For Programmers Volume II: The MIPS64 Instruction Set ++ // the brk(code1, code2) is according to disassembler of hsdis (binutils-2.27) ++ // both versions work ++ void brk (int code) { assert(is_uimm(code, 20), "code is 20 bits"); emit_long( (low(code, 20)<<6) | break_op ); } ++ void brk (int code1, int code2) { assert(is_uimm(code1, 10) && is_uimm(code2, 10), "code is 20 bits"); emit_long( (low(code1, 10)<<16) | (low(code2, 10)<<6) | break_op ); } ++ ++ void beq (Register rs, Register rt, address entry) { beq(rs, rt, offset(entry)); } ++ void beql (Register rs, Register rt, address entry) { beql(rs, rt, offset(entry));} ++ void bgez (Register rs, address entry) { bgez (rs, offset(entry)); } ++ void bgezal (Register rs, address entry) { bgezal (rs, offset(entry)); } ++ void bgezall(Register rs, address entry) { bgezall(rs, offset(entry)); } ++ void bgezl (Register rs, address entry) { bgezl (rs, offset(entry)); } ++ void bgtz (Register rs, address entry) { bgtz (rs, offset(entry)); } ++ void bgtzl (Register rs, address entry) { bgtzl (rs, offset(entry)); } ++ void blez (Register rs, address entry) { blez (rs, offset(entry)); } ++ void blezl (Register rs, address entry) { blezl (rs, offset(entry)); } ++ void bltz (Register rs, address entry) { bltz (rs, offset(entry)); } ++ void bltzal (Register rs, address entry) { bltzal (rs, offset(entry)); } ++ void bltzall(Register rs, address entry) { bltzall(rs, offset(entry)); } ++ void bltzl (Register rs, address entry) { bltzl (rs, offset(entry)); } ++ void bne (Register rs, Register rt, address entry) { bne(rs, rt, offset(entry)); } ++ void bnel (Register rs, Register rt, address entry) { bnel(rs, rt, offset(entry)); } ++ ++ void beq (Register rs, Register rt, Label& L) { beq(rs, rt, target(L)); } ++ void beql (Register rs, Register rt, Label& L) { beql(rs, rt, target(L)); } ++ void bgez (Register rs, Label& L){ bgez (rs, target(L)); } ++ void bgezal (Register rs, Label& L){ bgezal (rs, target(L)); } ++ void bgezall(Register rs, Label& L){ bgezall(rs, target(L)); } ++ void bgezl (Register rs, Label& L){ bgezl (rs, target(L)); } ++ void bgtz (Register rs, Label& L){ bgtz (rs, target(L)); } ++ void bgtzl (Register rs, Label& L){ bgtzl (rs, target(L)); } ++ void blez (Register rs, Label& L){ blez (rs, target(L)); } ++ void blezl (Register rs, Label& L){ blezl (rs, target(L)); } ++ void bltz (Register rs, Label& L){ bltz (rs, target(L)); } ++ void bltzal (Register rs, Label& L){ bltzal (rs, target(L)); } ++ void bltzall(Register rs, Label& L){ bltzall(rs, target(L)); } ++ void bltzl (Register rs, Label& L){ bltzl (rs, target(L)); } ++ void bne (Register rs, Register rt, Label& L){ bne(rs, rt, target(L)); } ++ void bnel (Register rs, Register rt, Label& L){ bnel(rs, rt, target(L)); } ++ ++ void daddiu(Register rt, Register rs, int imm) { emit_long(insn_ORRI(daddiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void daddu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), daddu_op)); } ++ void ddiv (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddiv_op)); } ++ void ddivu (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddivu_op)); } ++ ++ void movz (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), movz_op)); } ++ void movn (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), movn_op)); } ++ ++ void movt (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | (1 << 16) | ((int)rd->encoding() << 11) | movci_op); } ++ void movf (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | ((int)rd->encoding() << 11) | movci_op); } ++ ++ enum bshfl_ops { ++ seb_op = 0x10, ++ seh_op = 0x18 ++ }; ++ void seb (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seb_op << 6) | bshfl_op); } ++ void seh (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seh_op << 6) | bshfl_op); } ++ ++ void ext (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); ++ guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]"); ++ ++ int lsb = pos; ++ int msbd = size - 1; ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | ext_op); ++ } ++ ++ void dext (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); ++ guarantee((0 < pos + size) && (pos + size <= 63), "pos + size must be in (0, 63]"); ++ ++ int lsb = pos; ++ int msbd = size - 1; ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dext_op); ++ } ++ ++ void dextm (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((32 < size) && (size <= 64), "size must be in (32, 64]"); ++ guarantee((32 < pos + size) && (pos + size <= 64), "pos + size must be in (32, 64]"); ++ ++ int lsb = pos; ++ int msbd = size - 1 - 32; ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dextm_op); ++ } ++ ++ void rotr (Register rd, Register rt, int sa) { ++ emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | srl_op); ++ } ++ ++ void drotr (Register rd, Register rt, int sa) { ++ emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl_op); ++ } ++ ++ void drotr32 (Register rd, Register rt, int sa) { ++ emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl32_op); ++ } ++ ++ void rotrv (Register rd, Register rt, Register rs) { ++ emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | srlv_op); ++ } ++ ++ void drotrv (Register rd, Register rt, Register rs) { ++ emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | dsrlv_op); ++ } ++ ++ void div (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, div_op)); } ++ void divu (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, divu_op)); } ++ void dmult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmult_op)); } ++ void dmultu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmultu_op)); } ++ void dsll (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll_op)); } ++ void dsllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsllv_op)); } ++ void dsll32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll32_op)); } ++ void dsra (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra_op)); } ++ void dsrav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrav_op)); } ++ void dsra32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra32_op)); } ++ void dsrl (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl_op)); } ++ void dsrlv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrlv_op)); } ++ void dsrl32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl32_op)); } ++ void dsubu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsubu_op)); } ++ ++ void b(int off) { beq(R0, R0, off); } ++ void b(address entry) { b(offset(entry)); } ++ void b(Label& L) { b(target(L)); } ++ ++ void j(address entry); ++ void jal(address entry); ++ ++ void jalr(Register rd, Register rs) { emit_long( ((int)rs->encoding()<<21) | ((int)rd->encoding()<<11) | jalr_op); has_delay_slot(); } ++ void jalr(Register rs) { jalr(RA, rs); } ++ void jalr() { jalr(RT9); } ++ ++ void jr(Register rs) { emit_long(((int)rs->encoding()<<21) | jr_op); has_delay_slot(); } ++ void jr_hb(Register rs) { emit_long(((int)rs->encoding()<<21) | (1 << 10) | jr_op); has_delay_slot(); } ++ ++ void lb (Register rt, Register base, int off) { emit_long(insn_ORRI(lb_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lbu(Register rt, Register base, int off) { emit_long(insn_ORRI(lbu_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ld (Register rt, Register base, int off) { emit_long(insn_ORRI(ld_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ldl(Register rt, Register base, int off) { emit_long(insn_ORRI(ldl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ldr(Register rt, Register base, int off) { emit_long(insn_ORRI(ldr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lh (Register rt, Register base, int off) { emit_long(insn_ORRI(lh_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lhu(Register rt, Register base, int off) { emit_long(insn_ORRI(lhu_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ll (Register rt, Register base, int off) { emit_long(insn_ORRI(ll_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lld(Register rt, Register base, int off) { emit_long(insn_ORRI(lld_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lui(Register rt, int imm) { emit_long(insn_ORRI(lui_op, 0, (int)rt->encoding(), simm16(imm))); } ++ void lw (Register rt, Register base, int off) { emit_long(insn_ORRI(lw_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lwl(Register rt, Register base, int off) { emit_long(insn_ORRI(lwl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lwr(Register rt, Register base, int off) { emit_long(insn_ORRI(lwr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lwu(Register rt, Register base, int off) { emit_long(insn_ORRI(lwu_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ ++ void lb (Register rt, Address src); ++ void lbu(Register rt, Address src); ++ void ld (Register rt, Address src); ++ void ldl(Register rt, Address src); ++ void ldr(Register rt, Address src); ++ void lh (Register rt, Address src); ++ void lhu(Register rt, Address src); ++ void ll (Register rt, Address src); ++ void lld(Register rt, Address src); ++ void lw (Register rt, Address src); ++ void lwl(Register rt, Address src); ++ void lwr(Register rt, Address src); ++ void lwu(Register rt, Address src); ++ void lea(Register rt, Address src); ++ void pref(int hint, Register base, int off) { emit_long(insn_ORRI(pref_op, (int)base->encoding(), low(hint, 5), low(off, 16))); } ++ ++ void mfhi (Register rd) { emit_long( ((int)rd->encoding()<<11) | mfhi_op ); } ++ void mflo (Register rd) { emit_long( ((int)rd->encoding()<<11) | mflo_op ); } ++ void mthi (Register rs) { emit_long( ((int)rs->encoding()<<21) | mthi_op ); } ++ void mtlo (Register rs) { emit_long( ((int)rs->encoding()<<21) | mtlo_op ); } ++ ++ void mult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, mult_op)); } ++ void multu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, multu_op)); } ++ ++ void nor(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), nor_op)); } ++ ++ void orr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), or_op)); } ++ void ori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(ori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } ++ ++ void sb (Register rt, Register base, int off) { emit_long(insn_ORRI(sb_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sc (Register rt, Register base, int off) { emit_long(insn_ORRI(sc_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void scd (Register rt, Register base, int off) { emit_long(insn_ORRI(scd_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sd (Register rt, Register base, int off) { emit_long(insn_ORRI(sd_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sdl (Register rt, Register base, int off) { emit_long(insn_ORRI(sdl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sdr (Register rt, Register base, int off) { emit_long(insn_ORRI(sdr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sh (Register rt, Register base, int off) { emit_long(insn_ORRI(sh_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sll (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), sll_op)); } ++ void sllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), sllv_op)); } ++ void slt (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), slt_op)); } ++ void slti (Register rt, Register rs, int imm) { emit_long(insn_ORRI(slti_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void sltiu(Register rt, Register rs, int imm) { emit_long(insn_ORRI(sltiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void sltu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), sltu_op)); } ++ void sra (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), sra_op)); } ++ void srav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), srav_op)); } ++ void srl (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), srl_op)); } ++ void srlv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), srlv_op)); } ++ ++ void subu (Register rd, Register rs, Register rt) { dsubu (rd, rs, rt); } ++ void subu32 (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), subu_op)); } ++ void sw (Register rt, Register base, int off) { emit_long(insn_ORRI(sw_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void swl (Register rt, Register base, int off) { emit_long(insn_ORRI(swl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void swr (Register rt, Register base, int off) { emit_long(insn_ORRI(swr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void synci(Register base, int off) { emit_long(insn_ORRI(regimm_op, (int)base->encoding(), synci_op, off)); } ++ void sync () { ++ if (os::is_ActiveCoresMP()) ++ emit_long(0); ++ else ++ emit_long(sync_op); ++ } ++ void syscall(int code) { emit_long( (code<<6) | syscall_op ); } ++ ++ void sb(Register rt, Address dst); ++ void sc(Register rt, Address dst); ++ void scd(Register rt, Address dst); ++ void sd(Register rt, Address dst); ++ void sdl(Register rt, Address dst); ++ void sdr(Register rt, Address dst); ++ void sh(Register rt, Address dst); ++ void sw(Register rt, Address dst); ++ void swl(Register rt, Address dst); ++ void swr(Register rt, Address dst); ++ ++ void teq (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, teq_op)); } ++ void teqi (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), teqi_op, imm)); } ++ void tge (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tge_op)); } ++ void tgei (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgei_op, imm)); } ++ void tgeiu(Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgeiu_op, imm)); } ++ void tgeu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tgeu_op)); } ++ void tlt (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tlt_op)); } ++ void tlti (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tlti_op, imm)); } ++ void tltiu(Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tltiu_op, imm)); } ++ void tltu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tltu_op)); } ++ void tne (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tne_op)); } ++ void tnei (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tnei_op, imm)); } ++ ++ void xorr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), xor_op)); } ++ void xori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(xori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } ++ ++ void nop() { emit_long(0); } ++ ++ ++ ++ void ldc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(ldc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void lwc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(lwc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void ldc1(FloatRegister ft, Address src); ++ void lwc1(FloatRegister ft, Address src); ++ ++ //COP0 ++ void mfc0 (Register rt, Register rd) { emit_long(insn_COP0( mfc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ void dmfc0 (Register rt, FloatRegister rd) { emit_long(insn_COP0(dmfc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ // MFGC0, DMFGC0, MTGC0, DMTGC0 not implemented yet ++ void mtc0 (Register rt, Register rd) { emit_long(insn_COP0( mtc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ void dmtc0 (Register rt, FloatRegister rd) { emit_long(insn_COP0(dmtc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ //COP0 end ++ ++ ++ //COP1 ++ void mfc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1 (mfc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void dmfc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmfc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void cfc1 (Register rt, int fs) { emit_long(insn_COP1( cfc1_op, (int)rt->encoding(), fs)); } ++ void mfhc1(Register rt, int fs) { emit_long(insn_COP1(mfhc1_op, (int)rt->encoding(), fs)); } ++ void mtc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( mtc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void dmtc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmtc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void ctc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( ctc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void ctc1 (Register rt, int fs) { emit_long(insn_COP1(ctc1_op, (int)rt->encoding(), fs)); } ++ void mthc1(Register rt, int fs) { emit_long(insn_COP1(mthc1_op, (int)rt->encoding(), fs)); } ++ ++ void bc1f (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcf_op, off)); has_delay_slot(); } ++ void bc1fl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcfl_op, off)); has_delay_slot(); } ++ void bc1t (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bct_op, off)); has_delay_slot(); } ++ void bc1tl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bctl_op, off)); has_delay_slot(); } ++ ++ void bc1f (address entry) { bc1f(offset(entry)); } ++ void bc1fl(address entry) { bc1fl(offset(entry)); } ++ void bc1t (address entry) { bc1t(offset(entry)); } ++ void bc1tl(address entry) { bc1tl(offset(entry)); } ++ ++ void bc1f (Label& L) { bc1f(target(L)); } ++ void bc1fl(Label& L) { bc1fl(target(L)); } ++ void bc1t (Label& L) { bc1t(target(L)); } ++ void bc1tl(Label& L) { bc1tl(target(L)); } ++ ++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags. ++#define INSN_SINGLE(r1, r2, r3, op) \ ++ { emit_long(insn_F3RO(single_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ void add_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fadd_op)} ++ void sub_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fsub_op)} ++ void mul_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fmul_op)} ++ void div_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fdiv_op)} ++ void sqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fsqrt_op)} ++ void abs_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fabs_op)} ++ void mov_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fmov_op)} ++ void neg_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fneg_op)} ++ void round_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundl_op)} ++ void trunc_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncl_op)} ++ void ceil_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceill_op)} ++ void floor_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorl_op)} ++ void round_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundw_op)} ++ void trunc_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncw_op)} ++ void ceil_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceilw_op)} ++ void floor_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorw_op)} ++ //null ++ void movf_s(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movt_s(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movz_s (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movz_f_op)} ++ void movn_s (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movn_f_op)} ++ //null ++ void recip_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frecip_op)} ++ void rsqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frsqrt_op)} ++ //null ++ void cvt_d_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtd_op)} ++ //null ++ void cvt_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtw_op)} ++ void cvt_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtl_op)} ++ void cvt_ps_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fcvtps_op)} ++ //null ++ void c_f_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, f_cond)} ++ void c_un_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, un_cond)} ++ void c_eq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, eq_cond)} ++ void c_ueq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ueq_cond)} ++ void c_olt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, olt_cond)} ++ void c_ult_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ult_cond)} ++ void c_ole_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ole_cond)} ++ void c_ule_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ule_cond)} ++ void c_sf_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, sf_cond)} ++ void c_ngle_s(FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngle_cond)} ++ void c_seq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, seq_cond)} ++ void c_ngl_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngl_cond)} ++ void c_lt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, lt_cond)} ++ void c_nge_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, nge_cond)} ++ void c_le_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, le_cond)} ++ void c_ngt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngt_cond)} ++ ++#undef INSN_SINGLE ++ ++ ++//R0->encoding() is 0; INSN_DOUBLE is enclosed by {} for ctags. ++#define INSN_DOUBLE(r1, r2, r3, op) \ ++ { emit_long(insn_F3RO(double_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ ++ void add_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fadd_op)} ++ void sub_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fsub_op)} ++ void mul_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fmul_op)} ++ void div_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fdiv_op)} ++ void sqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fsqrt_op)} ++ void abs_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fabs_op)} ++ void mov_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fmov_op)} ++ void neg_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fneg_op)} ++ void round_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundl_op)} ++ void trunc_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncl_op)} ++ void ceil_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceill_op)} ++ void floor_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorl_op)} ++ void round_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundw_op)} ++ void trunc_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncw_op)} ++ void ceil_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceilw_op)} ++ void floor_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorw_op)} ++ //null ++ void movf_d(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movt_d(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movz_d (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movz_f_op)} ++ void movn_d (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movn_f_op)} ++ //null ++ void recip_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frecip_op)} ++ void rsqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frsqrt_op)} ++ //null ++ void cvt_s_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvts_op)} ++ void cvt_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtl_op)} ++ //null ++ void cvt_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtw_op)} ++ //null ++ void c_f_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, f_cond)} ++ void c_un_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, un_cond)} ++ void c_eq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, eq_cond)} ++ void c_ueq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ueq_cond)} ++ void c_olt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, olt_cond)} ++ void c_ult_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ult_cond)} ++ void c_ole_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ole_cond)} ++ void c_ule_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ule_cond)} ++ void c_sf_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, sf_cond)} ++ void c_ngle_d(FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngle_cond)} ++ void c_seq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, seq_cond)} ++ void c_ngl_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngl_cond)} ++ void c_lt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, lt_cond)} ++ void c_nge_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, nge_cond)} ++ void c_le_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, le_cond)} ++ void c_ngt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngt_cond)} ++ ++#undef INSN_DOUBLE ++ ++ ++ //null ++ void cvt_s_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); } ++ void cvt_d_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); } ++ //null ++ void cvt_s_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); } ++ void cvt_d_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); } ++ //null ++ ++ ++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags. ++#define INSN_PS(r1, r2, r3, op) \ ++ { emit_long(insn_F3RO(ps_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ ++ void add_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fadd_op)} ++ void sub_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fsub_op)} ++ void mul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fmul_op)} ++ //null ++ void abs_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fabs_op)} ++ void mov_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fmov_op)} ++ void neg_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fneg_op)} ++ //null ++ //void movf_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movf_ps")} ++ //void movt_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movt_ps") } ++ void movz_ps (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movz_f_op)} ++ void movn_ps (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movn_f_op)} ++ //null ++ void cvt_s_pu (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvts_op)} ++ //null ++ void cvt_s_pl (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvtspl_op)} ++ //null ++ void pll_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpll_op)} ++ void plu_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fplu_op)} ++ void pul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpul_op)} ++ void puu_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpuu_op)} ++ void c_f_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, f_cond)} ++ void c_un_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, un_cond)} ++ void c_eq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, eq_cond)} ++ void c_ueq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ueq_cond)} ++ void c_olt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, olt_cond)} ++ void c_ult_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ult_cond)} ++ void c_ole_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ole_cond)} ++ void c_ule_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ule_cond)} ++ void c_sf_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, sf_cond)} ++ void c_ngle_ps(FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngle_cond)} ++ void c_seq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, seq_cond)} ++ void c_ngl_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngl_cond)} ++ void c_lt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, lt_cond)} ++ void c_nge_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, nge_cond)} ++ void c_le_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, le_cond)} ++ void c_ngt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngt_cond)} ++ //null ++#undef INSN_PS ++ //COP1 end ++ ++ ++ //COP1X ++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags. ++#define INSN_COP1X(r0, r1, r2, r3, op) \ ++ { emit_long(insn_F3ROX((int)r0->encoding(), (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ void madd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_s_op) } ++ void madd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_d_op) } ++ void madd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, madd_ps_op) } ++ void msub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_s_op) } ++ void msub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_d_op) } ++ void msub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, msub_ps_op) } ++ void nmadd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_s_op) } ++ void nmadd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_d_op) } ++ void nmadd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmadd_ps_op) } ++ void nmsub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_s_op) } ++ void nmsub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_d_op) } ++ void nmsub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmsub_ps_op) } ++#undef INSN_COP1X ++ //COP1X end ++ ++ //SPECIAL2 ++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags. ++#define INSN_S2(op) \ ++ { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | op);} ++ ++ void madd (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | madd_op); } ++ void maddu (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | maddu_op); } ++ void mul (Register rd, Register rs, Register rt) { INSN_S2(mul_op) } ++ void gsandn (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x03_op) } ++ void msub (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msub_op); } ++ void msubu (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msubu_op); } ++ void gsorn (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x06_op) } ++ ++ void gsmult (Register rd, Register rs, Register rt) { INSN_S2(gsmult_op) } ++ void gsdmult (Register rd, Register rs, Register rt) { INSN_S2(gsdmult_op) } ++ void gsmultu (Register rd, Register rs, Register rt) { INSN_S2(gsmultu_op) } ++ void gsdmultu(Register rd, Register rs, Register rt) { INSN_S2(gsdmultu_op)} ++ void gsdiv (Register rd, Register rs, Register rt) { INSN_S2(gsdiv_op) } ++ void gsddiv (Register rd, Register rs, Register rt) { INSN_S2(gsddiv_op) } ++ void gsdivu (Register rd, Register rs, Register rt) { INSN_S2(gsdivu_op) } ++ void gsddivu (Register rd, Register rs, Register rt) { INSN_S2(gsddivu_op) } ++ void gsmod (Register rd, Register rs, Register rt) { INSN_S2(gsmod_op) } ++ void gsdmod (Register rd, Register rs, Register rt) { INSN_S2(gsdmod_op) } ++ void gsmodu (Register rd, Register rs, Register rt) { INSN_S2(gsmodu_op) } ++ void gsdmodu (Register rd, Register rs, Register rt) { INSN_S2(gsdmodu_op) } ++ void clz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clz_op); } ++ void clo (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clo_op); } ++ void ctz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 0 << 6| xctx_op); } ++ void cto (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 1 << 6| xctx_op); } ++ void dctz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 2 << 6| xctx_op); } ++ void dcto(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 3 << 6| xctx_op); } ++ void dclz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclz_op); } ++ void dclo(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclo_op); } ++ ++#undef INSN_S2 ++ ++ //SPECIAL3 ++/* ++// FIXME ++#define is_0_to_32(a, b) \ ++ assert (a >= 0, " just a check"); \ ++ assert (a <= 0, " just a check"); \ ++ assert (b >= 0, " just a check"); \ ++ assert (b <= 0, " just a check"); \ ++ assert (a+b >= 0, " just a check"); \ ++ assert (a+b <= 0, " just a check"); ++ */ ++#define is_0_to_32(a, b) ++ ++ void ins (Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | ins_op); } ++ void dinsm(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos, 5) << 6) | dinsm_op); } ++ void dinsu(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos-32, 5) << 6) | dinsu_op); } ++ void dins (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); ++ guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]"); ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | dins_op); ++ } ++ ++ void repl_qb (Register rd, int const8) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16) | ((int)rd->encoding() << 11) | repl_qb_op << 6 | re1_op); } ++ void replv_qb(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qb_op << 6 | re1_op ); } ++ void repl_ph (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_ph_op << 6 | re1_op); } ++ void replv_ph(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ph_op << 6 | re1_op ); } ++ ++ void repl_ob (Register rd, int const8) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16) | ((int)rd->encoding() << 11) | repl_ob_op << 6 | re2_op); } ++ void replv_ob(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ob_op << 6 | re2_op ); } ++ void repl_qh (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_qh_op << 6 | re2_op); } ++ void replv_qh(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qh_op << 6 | re2_op ); } ++ void repl_pw (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_pw_op << 6 | re2_op); } ++ void replv_pw(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_pw_op << 6 | re2_op ); } ++ ++ void sdc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(sdc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void sdc1(FloatRegister ft, Address dst); ++ void swc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(swc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void swc1(FloatRegister ft, Address dst); ++ ++ ++ static void print_instruction(int); ++ int patched_branch(int dest_pos, int inst, int inst_pos); ++ int branch_destination(int inst, int pos); ++ ++ // Loongson extension ++ ++ // gssq/gslq/gssqc1/gslqc1: vAddr = sign_extend(offset << 4 ) + GPR[base]. Therefore, the off should be ">> 4". ++ void gslble(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslble_op); ++ } ++ ++ void gslbgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslbgt_op); ++ } ++ ++ void gslhle(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhle_op); ++ } ++ ++ void gslhgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhgt_op); ++ } ++ ++ void gslwle(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwle_op); ++ } ++ ++ void gslwgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgt_op); ++ } ++ ++ void gsldle(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldle_op); ++ } ++ ++ void gsldgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgt_op); ++ } ++ ++ void gslwlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwlec1_op); ++ } ++ ++ void gslwgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgtc1_op); ++ } ++ ++ void gsldlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldlec1_op); ++ } ++ ++ void gsldgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgtc1_op); ++ } ++ ++ void gslq(Register rq, Register rt, Register base, int off) { ++ assert(!(off & 0xF), "gslq: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gslq: off exceeds 9 bits"); ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() ); ++ } ++ ++ void gslqc1(FloatRegister rq, FloatRegister rt, Register base, int off) { ++ assert(!(off & 0xF), "gslqc1: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gslqc1: off exceeds 9 bits"); ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() ); ++ } ++ ++ void gssble(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssble_op); ++ } ++ ++ void gssbgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssbgt_op); ++ } ++ ++ void gsshle(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshle_op); ++ } ++ ++ void gsshgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshgt_op); ++ } ++ ++ void gsswle(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswle_op); ++ } ++ ++ void gsswgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgt_op); ++ } ++ ++ void gssdle(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdle_op); ++ } ++ ++ void gssdgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgt_op); ++ } ++ ++ void gsswlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswlec1_op); ++ } ++ ++ void gsswgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgtc1_op); ++ } ++ ++ void gssdlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdlec1_op); ++ } ++ ++ void gssdgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgtc1_op); ++ } ++ ++ void gssq(Register rq, Register rt, Register base, int off) { ++ assert(!(off & 0xF), "gssq: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gssq: off exceeds 9 bits"); ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() ); ++ } ++ ++ void gssqc1(FloatRegister rq, FloatRegister rt, Register base, int off) { ++ assert(!(off & 0xF), "gssqc1: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gssqc1: off exceeds 9 bits"); ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() ); ++ } ++ ++ //LDC2 & SDC2 ++#define INSN(OPS, OP) \ ++ assert(is_simm(off, 8), "NAME: off exceeds 8 bits"); \ ++ assert(UseLEXT1, "check UseLEXT1"); \ ++ emit_long( (OPS << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | \ ++ ((int)index->encoding() << 11) | (low(off, 8) << 3) | OP); ++ ++#define INSN_LDC2(NAME, op) \ ++ void NAME(Register rt, Register base, Register index, int off) { \ ++ INSN(gs_ldc2_op, op) \ ++ } ++ ++#define INSN_LDC2_F(NAME, op) \ ++ void NAME(FloatRegister rt, Register base, Register index, int off) { \ ++ INSN(gs_ldc2_op, op) \ ++ } ++ ++#define INSN_SDC2(NAME, op) \ ++ void NAME(Register rt, Register base, Register index, int off) { \ ++ INSN(gs_sdc2_op, op) \ ++ } ++ ++#define INSN_SDC2_F(NAME, op) \ ++ void NAME(FloatRegister rt, Register base, Register index, int off) { \ ++ INSN(gs_sdc2_op, op) \ ++ } ++ ++/* ++ void gslbx(Register rt, Register base, Register index, int off) { ++ assert(is_simm(off, 8), "gslbx: off exceeds 8 bits"); ++ assert(UseLEXT1, "check UseLEXT1"); ++ emit_long( (gs_ldc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ++ ((int)index->encoding() << 11) | (low(off, 8) << 3) | gslbx_op); ++ void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op);} ++ ++ INSN_LDC2(gslbx, gslbx_op) ++ INSN_LDC2(gslhx, gslhx_op) ++ INSN_LDC2(gslwx, gslwx_op) ++ INSN_LDC2(gsldx, gsldx_op) ++ INSN_LDC2_F(gslwxc1, gslwxc1_op) ++ INSN_LDC2_F(gsldxc1, gsldxc1_op) ++ ++ INSN_SDC2(gssbx, gssbx_op) ++ INSN_SDC2(gsshx, gsshx_op) ++ INSN_SDC2(gsswx, gsswx_op) ++ INSN_SDC2(gssdx, gssdx_op) ++ INSN_SDC2_F(gsswxc1, gsswxc1_op) ++ INSN_SDC2_F(gssdxc1, gssdxc1_op) ++*/ ++ void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op) } ++ void gslhx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslhx_op) } ++ void gslwx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwx_op) } ++ void gsldx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldx_op) } ++ void gslwxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwxc1_op) } ++ void gsldxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldxc1_op) } ++ ++ void gssbx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssbx_op) } ++ void gsshx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsshx_op) } ++ void gsswx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswx_op) } ++ void gssdx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdx_op) } ++ void gsswxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswxc1_op) } ++ void gssdxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdxc1_op) } ++ ++#undef INSN ++#undef INSN_LDC2 ++#undef INSN_LDC2_F ++#undef INSN_SDC2 ++#undef INSN_SDC2_F ++ ++ // cpucfg on Loongson CPUs above 3A4000 ++ void cpucfg(Register rd, Register rs) { emit_long((gs_lwc2_op << 26) | ((int)rs->encoding() << 21) | (0b01000 << 16) | ((int)rd->encoding() << 11) | ( 0b00100 << 6) | 0b011000);} ++ ++ ++public: ++ // Creation ++ Assembler(CodeBuffer* code) : AbstractAssembler(code) { ++#ifdef CHECK_DELAY ++ delay_state = no_delay; ++#endif ++ } ++ ++ // Decoding ++ static address locate_operand(address inst, WhichOperand which); ++ static address locate_next_instruction(address inst); ++}; ++ ++ ++ ++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/assembler_mips.inline.hpp b/hotspot/src/cpu/mips/vm/assembler_mips.inline.hpp +new file mode 100644 +index 0000000000..39aeb5509a +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/assembler_mips.inline.hpp +@@ -0,0 +1,33 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "asm/codeBuffer.hpp" ++#include "code/codeCache.hpp" ++ ++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP +diff --git a/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.cpp b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.cpp +new file mode 100644 +index 0000000000..a4a1b28c2d +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.cpp +@@ -0,0 +1,53 @@ ++/* ++ * Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "interpreter/bytecodeInterpreter.hpp" ++#include "interpreter/bytecodeInterpreter.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/debug.hpp" ++#ifdef TARGET_ARCH_MODEL_mips_32 ++# include "interp_masm_mips_32.hpp" ++#endif ++#ifdef TARGET_ARCH_MODEL_mips_64 ++# include "interp_masm_mips_64.hpp" ++#endif ++ ++#ifdef CC_INTERP ++ ++#endif // CC_INTERP (all) +diff --git a/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.hpp b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.hpp +new file mode 100644 +index 0000000000..aac8b7a2b7 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.hpp +@@ -0,0 +1,110 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_HPP ++#define CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_HPP ++ ++// Platform specific for C++ based Interpreter ++#define LOTS_OF_REGS /* Lets interpreter use plenty of registers */ ++ ++private: ++ ++ // save the bottom of the stack after frame manager setup. For ease of restoration after return ++ // from recursive interpreter call ++ intptr_t* _frame_bottom; /* saved bottom of frame manager frame */ ++ intptr_t* _last_Java_pc; /* pc to return to in frame manager */ ++ intptr_t* _sender_sp; /* sender's sp before stack (locals) extension */ ++ interpreterState _self_link; /* Previous interpreter state */ /* sometimes points to self??? */ ++ double _native_fresult; /* save result of native calls that might return floats */ ++ intptr_t _native_lresult; /* save result of native calls that might return handle/longs */ ++public: ++ ++ static void pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp); ++ inline intptr_t* sender_sp() { ++ return _sender_sp; ++ } ++ ++ ++#define SET_LAST_JAVA_FRAME() ++ ++#define RESET_LAST_JAVA_FRAME() THREAD->frame_anchor()->set_flags(0); ++ ++/* ++ * Macros for accessing the stack. ++ */ ++#undef STACK_INT ++#undef STACK_FLOAT ++#undef STACK_ADDR ++#undef STACK_OBJECT ++#undef STACK_DOUBLE ++#undef STACK_LONG ++ ++// JavaStack Implementation ++ ++#define GET_STACK_SLOT(offset) (*((intptr_t*) &topOfStack[-(offset)])) ++#define STACK_SLOT(offset) ((address) &topOfStack[-(offset)]) ++#define STACK_ADDR(offset) (*((address *) &topOfStack[-(offset)])) ++#define STACK_INT(offset) (*((jint*) &topOfStack[-(offset)])) ++#define STACK_FLOAT(offset) (*((jfloat *) &topOfStack[-(offset)])) ++#define STACK_OBJECT(offset) (*((oop *) &topOfStack [-(offset)])) ++#define STACK_DOUBLE(offset) (((VMJavaVal64*) &topOfStack[-(offset)])->d) ++#define STACK_LONG(offset) (((VMJavaVal64 *) &topOfStack[-(offset)])->l) ++ ++#define SET_STACK_SLOT(value, offset) (*(intptr_t*)&topOfStack[-(offset)] = *(intptr_t*)(value)) ++#define SET_STACK_ADDR(value, offset) (*((address *)&topOfStack[-(offset)]) = (value)) ++#define SET_STACK_INT(value, offset) (*((jint *)&topOfStack[-(offset)]) = (value)) ++#define SET_STACK_FLOAT(value, offset) (*((jfloat *)&topOfStack[-(offset)]) = (value)) ++#define SET_STACK_OBJECT(value, offset) (*((oop *)&topOfStack[-(offset)]) = (value)) ++#define SET_STACK_DOUBLE(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = (value)) ++#define SET_STACK_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = \ ++ ((VMJavaVal64*)(addr))->d) ++#define SET_STACK_LONG(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->l = (value)) ++#define SET_STACK_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->l = \ ++ ((VMJavaVal64*)(addr))->l) ++// JavaLocals implementation ++ ++#define LOCALS_SLOT(offset) ((intptr_t*)&locals[-(offset)]) ++#define LOCALS_ADDR(offset) ((address)locals[-(offset)]) ++#define LOCALS_INT(offset) (*((jint*)&locals[-(offset)])) ++#define LOCALS_FLOAT(offset) (*((jfloat*)&locals[-(offset)])) ++#define LOCALS_OBJECT(offset) ((oop)locals[-(offset)]) ++#define LOCALS_DOUBLE(offset) (((VMJavaVal64*)&locals[-((offset) + 1)])->d) ++#define LOCALS_LONG(offset) (((VMJavaVal64*)&locals[-((offset) + 1)])->l) ++#define LOCALS_LONG_AT(offset) (((address)&locals[-((offset) + 1)])) ++#define LOCALS_DOUBLE_AT(offset) (((address)&locals[-((offset) + 1)])) ++ ++#define SET_LOCALS_SLOT(value, offset) (*(intptr_t*)&locals[-(offset)] = *(intptr_t *)(value)) ++#define SET_LOCALS_ADDR(value, offset) (*((address *)&locals[-(offset)]) = (value)) ++#define SET_LOCALS_INT(value, offset) (*((jint *)&locals[-(offset)]) = (value)) ++#define SET_LOCALS_FLOAT(value, offset) (*((jfloat *)&locals[-(offset)]) = (value)) ++#define SET_LOCALS_OBJECT(value, offset) (*((oop *)&locals[-(offset)]) = (value)) ++#define SET_LOCALS_DOUBLE(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = (value)) ++#define SET_LOCALS_LONG(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = (value)) ++#define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = \ ++ ((VMJavaVal64*)(addr))->d) ++#define SET_LOCALS_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = \ ++ ((VMJavaVal64*)(addr))->l) ++ ++#endif // CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.inline.hpp b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.inline.hpp +new file mode 100644 +index 0000000000..8ce77ab92f +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.inline.hpp +@@ -0,0 +1,286 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_INLINE_HPP ++ ++// Inline interpreter functions for MIPS ++ ++inline jfloat BytecodeInterpreter::VMfloatAdd(jfloat op1, jfloat op2) { return op1 + op2; } ++inline jfloat BytecodeInterpreter::VMfloatSub(jfloat op1, jfloat op2) { return op1 - op2; } ++inline jfloat BytecodeInterpreter::VMfloatMul(jfloat op1, jfloat op2) { return op1 * op2; } ++inline jfloat BytecodeInterpreter::VMfloatDiv(jfloat op1, jfloat op2) { return op1 / op2; } ++inline jfloat BytecodeInterpreter::VMfloatRem(jfloat op1, jfloat op2) { return fmod(op1, op2); } ++ ++inline jfloat BytecodeInterpreter::VMfloatNeg(jfloat op) { return -op; } ++ ++inline int32_t BytecodeInterpreter::VMfloatCompare(jfloat op1, jfloat op2, int32_t direction) { ++ return ( op1 < op2 ? -1 : ++ op1 > op2 ? 1 : ++ op1 == op2 ? 0 : ++ (direction == -1 || direction == 1) ? direction : 0); ++ ++} ++ ++inline void BytecodeInterpreter::VMmemCopy64(uint32_t to[2], const uint32_t from[2]) { ++ // x86 can do unaligned copies but not 64bits at a time ++ to[0] = from[0]; to[1] = from[1]; ++} ++ ++// The long operations depend on compiler support for "long long" on x86 ++ ++inline jlong BytecodeInterpreter::VMlongAdd(jlong op1, jlong op2) { ++ return op1 + op2; ++} ++ ++inline jlong BytecodeInterpreter::VMlongAnd(jlong op1, jlong op2) { ++ return op1 & op2; ++} ++ ++inline jlong BytecodeInterpreter::VMlongDiv(jlong op1, jlong op2) { ++ // QQQ what about check and throw... ++ return op1 / op2; ++} ++ ++inline jlong BytecodeInterpreter::VMlongMul(jlong op1, jlong op2) { ++ return op1 * op2; ++} ++ ++inline jlong BytecodeInterpreter::VMlongOr(jlong op1, jlong op2) { ++ return op1 | op2; ++} ++ ++inline jlong BytecodeInterpreter::VMlongSub(jlong op1, jlong op2) { ++ return op1 - op2; ++} ++ ++inline jlong BytecodeInterpreter::VMlongXor(jlong op1, jlong op2) { ++ return op1 ^ op2; ++} ++ ++inline jlong BytecodeInterpreter::VMlongRem(jlong op1, jlong op2) { ++ return op1 % op2; ++} ++ ++inline jlong BytecodeInterpreter::VMlongUshr(jlong op1, jint op2) { ++ // CVM did this 0x3f mask, is the really needed??? QQQ ++ return ((unsigned long long) op1) >> (op2 & 0x3F); ++} ++ ++inline jlong BytecodeInterpreter::VMlongShr(jlong op1, jint op2) { ++ return op1 >> (op2 & 0x3F); ++} ++ ++inline jlong BytecodeInterpreter::VMlongShl(jlong op1, jint op2) { ++ return op1 << (op2 & 0x3F); ++} ++ ++inline jlong BytecodeInterpreter::VMlongNeg(jlong op) { ++ return -op; ++} ++ ++inline jlong BytecodeInterpreter::VMlongNot(jlong op) { ++ return ~op; ++} ++ ++inline int32_t BytecodeInterpreter::VMlongLtz(jlong op) { ++ return (op <= 0); ++} ++ ++inline int32_t BytecodeInterpreter::VMlongGez(jlong op) { ++ return (op >= 0); ++} ++ ++inline int32_t BytecodeInterpreter::VMlongEqz(jlong op) { ++ return (op == 0); ++} ++ ++inline int32_t BytecodeInterpreter::VMlongEq(jlong op1, jlong op2) { ++ return (op1 == op2); ++} ++ ++inline int32_t BytecodeInterpreter::VMlongNe(jlong op1, jlong op2) { ++ return (op1 != op2); ++} ++ ++inline int32_t BytecodeInterpreter::VMlongGe(jlong op1, jlong op2) { ++ return (op1 >= op2); ++} ++ ++inline int32_t BytecodeInterpreter::VMlongLe(jlong op1, jlong op2) { ++ return (op1 <= op2); ++} ++ ++inline int32_t BytecodeInterpreter::VMlongLt(jlong op1, jlong op2) { ++ return (op1 < op2); ++} ++ ++inline int32_t BytecodeInterpreter::VMlongGt(jlong op1, jlong op2) { ++ return (op1 > op2); ++} ++ ++inline int32_t BytecodeInterpreter::VMlongCompare(jlong op1, jlong op2) { ++ return (VMlongLt(op1, op2) ? -1 : VMlongGt(op1, op2) ? 1 : 0); ++} ++ ++// Long conversions ++ ++inline jdouble BytecodeInterpreter::VMlong2Double(jlong val) { ++ return (jdouble) val; ++} ++ ++inline jfloat BytecodeInterpreter::VMlong2Float(jlong val) { ++ return (jfloat) val; ++} ++ ++inline jint BytecodeInterpreter::VMlong2Int(jlong val) { ++ return (jint) val; ++} ++ ++// Double Arithmetic ++ ++inline jdouble BytecodeInterpreter::VMdoubleAdd(jdouble op1, jdouble op2) { ++ return op1 + op2; ++} ++ ++inline jdouble BytecodeInterpreter::VMdoubleDiv(jdouble op1, jdouble op2) { ++ // Divide by zero... QQQ ++ return op1 / op2; ++} ++ ++inline jdouble BytecodeInterpreter::VMdoubleMul(jdouble op1, jdouble op2) { ++ return op1 * op2; ++} ++ ++inline jdouble BytecodeInterpreter::VMdoubleNeg(jdouble op) { ++ return -op; ++} ++ ++inline jdouble BytecodeInterpreter::VMdoubleRem(jdouble op1, jdouble op2) { ++ return fmod(op1, op2); ++} ++ ++inline jdouble BytecodeInterpreter::VMdoubleSub(jdouble op1, jdouble op2) { ++ return op1 - op2; ++} ++ ++inline int32_t BytecodeInterpreter::VMdoubleCompare(jdouble op1, jdouble op2, int32_t direction) { ++ return ( op1 < op2 ? -1 : ++ op1 > op2 ? 1 : ++ op1 == op2 ? 0 : ++ (direction == -1 || direction == 1) ? direction : 0); ++} ++ ++// Double Conversions ++ ++inline jfloat BytecodeInterpreter::VMdouble2Float(jdouble val) { ++ return (jfloat) val; ++} ++ ++// Float Conversions ++ ++inline jdouble BytecodeInterpreter::VMfloat2Double(jfloat op) { ++ return (jdouble) op; ++} ++ ++// Integer Arithmetic ++ ++inline jint BytecodeInterpreter::VMintAdd(jint op1, jint op2) { ++ return op1 + op2; ++} ++ ++inline jint BytecodeInterpreter::VMintAnd(jint op1, jint op2) { ++ return op1 & op2; ++} ++ ++inline jint BytecodeInterpreter::VMintDiv(jint op1, jint op2) { ++ // it's possible we could catch this special case implicitly ++ if ((juint)op1 == 0x80000000 && op2 == -1) return op1; ++ else return op1 / op2; ++} ++ ++inline jint BytecodeInterpreter::VMintMul(jint op1, jint op2) { ++ return op1 * op2; ++} ++ ++inline jint BytecodeInterpreter::VMintNeg(jint op) { ++ return -op; ++} ++ ++inline jint BytecodeInterpreter::VMintOr(jint op1, jint op2) { ++ return op1 | op2; ++} ++ ++inline jint BytecodeInterpreter::VMintRem(jint op1, jint op2) { ++ // it's possible we could catch this special case implicitly ++ if ((juint)op1 == 0x80000000 && op2 == -1) return 0; ++ else return op1 % op2; ++} ++ ++inline jint BytecodeInterpreter::VMintShl(jint op1, jint op2) { ++ return op1 << op2; ++} ++ ++inline jint BytecodeInterpreter::VMintShr(jint op1, jint op2) { ++ return op1 >> (op2 & 0x1f); // QQ op2 & 0x1f?? ++} ++ ++inline jint BytecodeInterpreter::VMintSub(jint op1, jint op2) { ++ return op1 - op2; ++} ++ ++inline jint BytecodeInterpreter::VMintUshr(jint op1, jint op2) { ++ return ((juint) op1) >> (op2 & 0x1f); // QQ op2 & 0x1f?? ++} ++ ++inline jint BytecodeInterpreter::VMintXor(jint op1, jint op2) { ++ return op1 ^ op2; ++} ++ ++inline jdouble BytecodeInterpreter::VMint2Double(jint val) { ++ return (jdouble) val; ++} ++ ++inline jfloat BytecodeInterpreter::VMint2Float(jint val) { ++ return (jfloat) val; ++} ++ ++inline jlong BytecodeInterpreter::VMint2Long(jint val) { ++ return (jlong) val; ++} ++ ++inline jchar BytecodeInterpreter::VMint2Char(jint val) { ++ return (jchar) val; ++} ++ ++inline jshort BytecodeInterpreter::VMint2Short(jint val) { ++ return (jshort) val; ++} ++ ++inline jbyte BytecodeInterpreter::VMint2Byte(jint val) { ++ return (jbyte) val; ++} ++ ++#endif // CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_INLINE_HPP +diff --git a/hotspot/src/cpu/mips/vm/bytecodes_mips.cpp b/hotspot/src/cpu/mips/vm/bytecodes_mips.cpp +new file mode 100644 +index 0000000000..61efd1f561 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/bytecodes_mips.cpp +@@ -0,0 +1,38 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/bytecodes.hpp" ++ ++ ++void Bytecodes::pd_initialize() { ++ // No mips specific initialization ++} ++ ++ ++Bytecodes::Code Bytecodes::pd_base_code_for(Code code) { ++ // No mips specific bytecodes ++ return code; ++} +diff --git a/hotspot/src/cpu/mips/vm/bytecodes_mips.hpp b/hotspot/src/cpu/mips/vm/bytecodes_mips.hpp +new file mode 100644 +index 0000000000..25a9562acd +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/bytecodes_mips.hpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_BYTECODES_MIPS_HPP ++#define CPU_MIPS_VM_BYTECODES_MIPS_HPP ++ ++// No Loongson specific bytecodes ++ ++#endif // CPU_MIPS_VM_BYTECODES_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/bytes_mips.hpp b/hotspot/src/cpu/mips/vm/bytes_mips.hpp +new file mode 100644 +index 0000000000..515ffad4b0 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/bytes_mips.hpp +@@ -0,0 +1,193 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_BYTES_MIPS_HPP ++#define CPU_MIPS_VM_BYTES_MIPS_HPP ++ ++#include "memory/allocation.hpp" ++ ++class Bytes: AllStatic { ++ public: ++ // Returns true if the byte ordering used by Java is different from the native byte ordering ++ // of the underlying machine. For example, this is true for Intel x86, but false for Solaris ++ // on Sparc. ++ // we use mipsel, so return true ++ static inline bool is_Java_byte_ordering_different(){ return true; } ++ ++ ++ // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering ++ // (no special code is needed since x86 CPUs can access unaligned data) ++ static inline u2 get_native_u2(address p) { ++ if ((intptr_t)p & 0x1) { ++ return ((u2)p[1] << 8) | (u2)p[0]; ++ } else { ++ return *(u2*)p; ++ } ++ } ++ ++ static inline u4 get_native_u4(address p) { ++ if ((intptr_t)p & 3) { ++ u4 res; ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips64\n" ++ " .set noreorder\n" ++ ++ " lwr %[res], 0(%[addr]) \n" ++ " lwl %[res], 3(%[addr]) \n" ++ ++ " .set pop" ++ : [res] "=&r" (res) ++ : [addr] "r" (p) ++ : "memory" ++ ); ++ return res; ++ } else { ++ return *(u4*)p; ++ } ++ } ++ ++ static inline u8 get_native_u8(address p) { ++ u8 res; ++ u8 temp; ++ // u4 tp;//tmp register ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips64\n" ++ " .set noreorder\n" ++ " .set noat\n" ++ " andi $1,%[addr],0x7 \n" ++ " beqz $1,1f \n" ++ " nop \n" ++ " ldr %[temp], 0(%[addr]) \n" ++ " ldl %[temp], 7(%[addr]) \n" ++ " b 2f \n" ++ " nop \n" ++ " 1:\t ld %[temp],0(%[addr]) \n" ++ " 2:\t sd %[temp], %[res] \n" ++ ++ " .set at\n" ++ " .set pop\n" ++ : [addr]"=r"(p), [temp]"=r" (temp) ++ : "[addr]"(p), "[temp]" (temp), [res]"m" (*(volatile jint*)&res) ++ : "memory" ++ ); ++ ++ return res; ++ } ++ ++ //use mips unaligned load instructions ++ static inline void put_native_u2(address p, u2 x) { ++ if((intptr_t)p & 0x1) { ++ p[0] = (u_char)(x); ++ p[1] = (u_char)(x>>8); ++ } else { ++ *(u2*)p = x; ++ } ++ } ++ ++ static inline void put_native_u4(address p, u4 x) { ++ // refer to sparc implementation. ++ // Note that sparc is big-endian, while mips is little-endian ++ switch ( intptr_t(p) & 3 ) { ++ case 0: *(u4*)p = x; ++ break; ++ ++ case 2: ((u2*)p)[1] = x >> 16; ++ ((u2*)p)[0] = x; ++ break; ++ ++ default: ((u1*)p)[3] = x >> 24; ++ ((u1*)p)[2] = x >> 16; ++ ((u1*)p)[1] = x >> 8; ++ ((u1*)p)[0] = x; ++ break; ++ } ++ } ++ ++ static inline void put_native_u8(address p, u8 x) { ++ // refer to sparc implementation. ++ // Note that sparc is big-endian, while mips is little-endian ++ switch ( intptr_t(p) & 7 ) { ++ case 0: *(u8*)p = x; ++ break; ++ ++ case 4: ((u4*)p)[1] = x >> 32; ++ ((u4*)p)[0] = x; ++ break; ++ ++ case 2: ((u2*)p)[3] = x >> 48; ++ ((u2*)p)[2] = x >> 32; ++ ((u2*)p)[1] = x >> 16; ++ ((u2*)p)[0] = x; ++ break; ++ ++ default: ((u1*)p)[7] = x >> 56; ++ ((u1*)p)[6] = x >> 48; ++ ((u1*)p)[5] = x >> 40; ++ ((u1*)p)[4] = x >> 32; ++ ((u1*)p)[3] = x >> 24; ++ ((u1*)p)[2] = x >> 16; ++ ((u1*)p)[1] = x >> 8; ++ ((u1*)p)[0] = x; ++ } ++ } ++ ++ ++ // Efficient reading and writing of unaligned unsigned data in Java ++ // byte ordering (i.e. big-endian ordering). Byte-order reversal is ++ // needed since MIPS64EL CPUs use little-endian format. ++ static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } ++ static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } ++ static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } ++ ++ static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); } ++ static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); } ++ static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } ++ ++ ++ // Efficient swapping of byte ordering ++ static inline u2 swap_u2(u2 x); // compiler-dependent implementation ++ static inline u4 swap_u4(u4 x); // compiler-dependent implementation ++ static inline u8 swap_u8(u8 x); ++}; ++ ++ ++// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base] ++#ifdef TARGET_OS_ARCH_linux_mips ++# include "bytes_linux_mips.inline.hpp" ++#endif ++#ifdef TARGET_OS_ARCH_solaris_mips ++# include "bytes_solaris_mips.inline.hpp" ++#endif ++#ifdef TARGET_OS_ARCH_windows_mips ++# include "bytes_windows_mips.inline.hpp" ++#endif ++#ifdef TARGET_OS_ARCH_bsd_mips ++# include "bytes_bsd_mips.inline.hpp" ++#endif ++ ++ ++#endif // CPU_MIPS_VM_BYTES_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/c2_globals_mips.hpp b/hotspot/src/cpu/mips/vm/c2_globals_mips.hpp +new file mode 100644 +index 0000000000..f254e07abd +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/c2_globals_mips.hpp +@@ -0,0 +1,100 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP ++#define CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the server compiler. ++// (see c2_globals.hpp). Alpha-sorted. ++define_pd_global(bool, BackgroundCompilation, true); ++define_pd_global(bool, UseTLAB, true); ++define_pd_global(bool, ResizeTLAB, true); ++define_pd_global(bool, CICompileOSR, true); ++define_pd_global(bool, InlineIntrinsics, true); ++define_pd_global(bool, PreferInterpreterNativeStubs, false); ++define_pd_global(bool, ProfileTraps, true); ++define_pd_global(bool, UseOnStackReplacement, true); ++#ifdef CC_INTERP ++define_pd_global(bool, ProfileInterpreter, false); ++#else ++define_pd_global(bool, ProfileInterpreter, true); ++#endif // CC_INTERP ++define_pd_global(bool, TieredCompilation, false); // Disable C1 in server JIT ++define_pd_global(intx, CompileThreshold, 10000); ++define_pd_global(intx, BackEdgeThreshold, 100000); ++ ++define_pd_global(intx, OnStackReplacePercentage, 140); ++define_pd_global(intx, ConditionalMoveLimit, 3); ++define_pd_global(intx, FLOATPRESSURE, 6); ++define_pd_global(intx, FreqInlineSize, 325); ++define_pd_global(intx, MinJumpTableSize, 10); ++#ifdef MIPS64 ++define_pd_global(intx, INTPRESSURE, 13); ++define_pd_global(intx, InteriorEntryAlignment, 16); ++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); ++define_pd_global(intx, LoopUnrollLimit, 60); ++// InitialCodeCacheSize derived from specjbb2000 run. ++define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize ++define_pd_global(intx, CodeCacheExpansionSize, 64*K); ++ ++// Ergonomics related flags ++define_pd_global(uint64_t,MaxRAM, 128ULL*G); ++#else ++define_pd_global(intx, INTPRESSURE, 6); ++define_pd_global(intx, InteriorEntryAlignment, 4); ++define_pd_global(intx, NewSizeThreadIncrease, 4*K); ++define_pd_global(intx, LoopUnrollLimit, 50); // Design center runs on 1.3.1 ++// InitialCodeCacheSize derived from specjbb2000 run. ++define_pd_global(intx, InitialCodeCacheSize, 2304*K); // Integral multiple of CodeCacheExpansionSize ++define_pd_global(intx, CodeCacheExpansionSize, 32*K); ++ ++// Ergonomics related flags ++define_pd_global(uint64_t,MaxRAM, 4ULL*G); ++#endif // MIPS64 ++define_pd_global(intx, RegisterCostAreaRatio, 16000); ++ ++// Peephole and CISC spilling both break the graph, and so makes the ++// scheduler sick. ++define_pd_global(bool, OptoPeephole, false); ++define_pd_global(bool, UseCISCSpill, false); ++define_pd_global(bool, OptoScheduling, false); ++define_pd_global(bool, OptoBundling, false); ++ ++define_pd_global(intx, ReservedCodeCacheSize, 120*M); ++define_pd_global(uintx, CodeCacheMinBlockLength, 4); ++define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++ ++define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed on x86. ++ ++// Heap related flags ++define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); ++ ++// Ergonomics related flags ++define_pd_global(bool, NeverActAsServerClassMachine, false); ++ ++#endif // CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/c2_init_mips.cpp b/hotspot/src/cpu/mips/vm/c2_init_mips.cpp +new file mode 100644 +index 0000000000..e6d5815f42 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/c2_init_mips.cpp +@@ -0,0 +1,34 @@ ++/* ++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "opto/compile.hpp" ++#include "opto/node.hpp" ++ ++// processor dependent initialization for mips ++ ++void Compile::pd_compiler2_init() { ++ guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); ++} +diff --git a/hotspot/src/cpu/mips/vm/codeBuffer_mips.hpp b/hotspot/src/cpu/mips/vm/codeBuffer_mips.hpp +new file mode 100644 +index 0000000000..1836b7a921 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/codeBuffer_mips.hpp +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2017, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_CODEBUFFER_MIPS_HPP ++#define CPU_MIPS_VM_CODEBUFFER_MIPS_HPP ++ ++private: ++ void pd_initialize() {} ++ ++public: ++ void flush_bundle(bool start_new_bundle) {} ++ ++#endif // CPU_MIPS_VM_CODEBUFFER_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/compiledIC_mips.cpp b/hotspot/src/cpu/mips/vm/compiledIC_mips.cpp +new file mode 100644 +index 0000000000..8ffaaaf841 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/compiledIC_mips.cpp +@@ -0,0 +1,173 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/compiledIC.hpp" ++#include "code/icBuffer.hpp" ++#include "code/nmethod.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/safepoint.hpp" ++ ++// Release the CompiledICHolder* associated with this call site is there is one. ++void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) { ++ // This call site might have become stale so inspect it carefully. ++ NativeCall* call = nativeCall_at(call_site->addr()); ++ if (is_icholder_entry(call->destination())) { ++ NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value()); ++ InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data()); ++ } ++} ++ ++bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) { ++ // This call site might have become stale so inspect it carefully. ++ NativeCall* call = nativeCall_at(call_site->addr()); ++ return is_icholder_entry(call->destination()); ++} ++ ++// ---------------------------------------------------------------------------- ++ ++#define __ _masm. ++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) { ++ ++ address mark = cbuf.insts_mark(); // get mark within main instrs section ++ ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a stub. ++ MacroAssembler _masm(&cbuf); ++ ++ address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size()); ++ if (base == NULL) return NULL; // CodeBuffer::expand failed ++ // static stub relocation stores the instruction address of the call ++ ++ __ relocate(static_stub_Relocation::spec(mark), 0); ++ ++ // Code stream for loading method may be changed. ++ __ synci(R0, 0); ++ ++ // Rmethod contains methodOop, it should be relocated for GC ++ // static stub relocation also tags the methodOop in the code-stream. ++ __ mov_metadata(Rmethod, NULL); ++ // This is recognized as unresolved by relocs/nativeInst/ic code ++ ++ __ relocate(relocInfo::runtime_call_type); ++ ++ cbuf.set_insts_mark(); ++ address call_pc = (address)-1; ++ __ patchable_jump(call_pc); ++ __ align(16); ++ // Update current stubs pointer and restore code_end. ++ __ end_a_stub(); ++ return base; ++} ++#undef __ ++ ++int CompiledStaticCall::to_interp_stub_size() { ++ int size = NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeCall::instruction_size; ++ return round_to(size, 16); ++} ++ ++// Relocation entries for call stub, compiled java to interpreter. ++int CompiledStaticCall::reloc_to_interp_stub() { ++ return 16; ++} ++ ++void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) { ++ address stub = find_stub(); ++ guarantee(stub != NULL, "stub not found"); ++ ++ if (TraceICs) { ++ ResourceMark rm; ++ tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", ++ p2i(instruction_address()), ++ callee->name_and_sig_as_C_string()); ++ } ++ ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++#ifndef MIPS64 ++ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); ++#else ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++#endif ++ ++ assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(), ++ "a) MT-unsafe modification of inline cache"); ++ assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry, ++ "b) MT-unsafe modification of inline cache"); ++ ++ // Update stub. ++ method_holder->set_data((intptr_t)callee()); ++ jump->set_jump_destination(entry); ++ ++ // Update jump to call. ++ set_destination_mt_safe(stub); ++} ++ ++void CompiledStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { ++ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); ++ // Reset stub. ++ address stub = static_stub->addr(); ++ assert(stub != NULL, "stub not found"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++#ifndef MIPS64 ++ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); ++#else ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++#endif ++ method_holder->set_data(0); ++ jump->set_jump_destination((address)-1); ++} ++ ++//----------------------------------------------------------------------------- ++// Non-product mode code ++#ifndef PRODUCT ++ ++void CompiledStaticCall::verify() { ++ // Verify call. ++ NativeCall::verify(); ++ if (os::is_MP()) { ++ verify_alignment(); ++ } ++ ++ // Verify stub. ++ address stub = find_stub(); ++ assert(stub != NULL, "no stub found for static call"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++#ifndef MIPS64 ++ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); ++#else ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++#endif ++ ++ ++ // Verify state. ++ assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); ++} ++ ++#endif // !PRODUCT +diff --git a/hotspot/src/cpu/mips/vm/copy_mips.hpp b/hotspot/src/cpu/mips/vm/copy_mips.hpp +new file mode 100644 +index 0000000000..49fde17923 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/copy_mips.hpp +@@ -0,0 +1,72 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_COPY_MIPS_HPP ++#define CPU_MIPS_VM_COPY_MIPS_HPP ++ ++// Inline functions for memory copy and fill. ++ ++// Contains inline asm implementations ++#ifdef TARGET_OS_ARCH_linux_mips ++# include "copy_linux_mips.inline.hpp" ++#endif ++#ifdef TARGET_OS_ARCH_solaris_mips ++# include "copy_solaris_mips.inline.hpp" ++#endif ++#ifdef TARGET_OS_ARCH_windows_mips ++# include "copy_windows_mips.inline.hpp" ++#endif ++#ifdef TARGET_OS_ARCH_bsd_mips ++# include "copy_bsd_mips.inline.hpp" ++#endif ++// Inline functions for memory copy and fill. ++ ++// Contains inline asm implementations ++ ++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { ++ julong* to = (julong*) tohw; ++ julong v = ((julong) value << 32) | value; ++ while (count-- > 0) { ++ *to++ = v; ++ } ++} ++ ++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { ++ pd_fill_to_words(tohw, count, value); ++} ++ ++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { ++ (void)memset(to, value, count); ++} ++ ++static void pd_zero_to_words(HeapWord* tohw, size_t count) { ++ pd_fill_to_words(tohw, count, 0); ++} ++ ++static void pd_zero_to_bytes(void* to, size_t count) { ++ (void)memset(to, 0, count); ++} ++ ++#endif //CPU_MIPS_VM_COPY_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/cppInterpreterGenerator_mips.hpp b/hotspot/src/cpu/mips/vm/cppInterpreterGenerator_mips.hpp +new file mode 100644 +index 0000000000..37bd03b00b +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/cppInterpreterGenerator_mips.hpp +@@ -0,0 +1,53 @@ ++/* ++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_CPPINTERPRETERGENERATOR_MIPS_HPP ++#define CPU_MIPS_VM_CPPINTERPRETERGENERATOR_MIPS_HPP ++ ++ protected: ++ ++#if 0 ++ address generate_asm_interpreter_entry(bool synchronized); ++ address generate_native_entry(bool synchronized); ++ address generate_abstract_entry(void); ++ address generate_math_entry(AbstractInterpreter::MethodKind kind); ++ address generate_empty_entry(void); ++ address generate_accessor_entry(void); ++ void lock_method(void); ++ void generate_stack_overflow_check(void); ++ ++ void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue); ++ void generate_counter_overflow(Label* do_continue); ++#endif ++ ++ void generate_more_monitors(); ++ void generate_deopt_handling(); ++ address generate_interpreter_frame_manager(bool synchronized); // C++ interpreter only ++ void generate_compute_interpreter_state(const Register state, ++ const Register prev_state, ++ const Register sender_sp, ++ bool native); // C++ interpreter only ++ ++#endif // CPU_MIPS_VM_CPPINTERPRETERGENERATOR_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/cppInterpreter_mips.cpp b/hotspot/src/cpu/mips/vm/cppInterpreter_mips.cpp +new file mode 100644 +index 0000000000..1f8d75d593 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/cppInterpreter_mips.cpp +@@ -0,0 +1,215 @@ ++/* ++ * Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/bytecodeHistogram.hpp" ++#include "interpreter/cppInterpreter.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterGenerator.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/interfaceSupport.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "runtime/timer.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/debug.hpp" ++#ifdef SHARK ++#include "shark/shark_globals.hpp" ++#endif ++ ++#ifdef CC_INTERP ++ ++// Routine exists to make tracebacks look decent in debugger ++// while "shadow" interpreter frames are on stack. It is also ++// used to distinguish interpreter frames. ++ ++extern "C" void RecursiveInterpreterActivation(interpreterState istate) { ++ ShouldNotReachHere(); ++} ++ ++bool CppInterpreter::contains(address pc) { ++ Unimplemented(); ++} ++ ++#define STATE(field_name) Lstate, in_bytes(byte_offset_of(BytecodeInterpreter, field_name)) ++#define __ _masm-> ++ ++Label frame_manager_entry; ++Label fast_accessor_slow_entry_path; // fast accessor methods need to be able to jmp to unsynchronized ++ // c++ interpreter entry point this holds that entry point label. ++ ++static address unctrap_frame_manager_entry = NULL; ++ ++static address interpreter_return_address = NULL; ++static address deopt_frame_manager_return_atos = NULL; ++static address deopt_frame_manager_return_btos = NULL; ++static address deopt_frame_manager_return_itos = NULL; ++static address deopt_frame_manager_return_ltos = NULL; ++static address deopt_frame_manager_return_ftos = NULL; ++static address deopt_frame_manager_return_dtos = NULL; ++static address deopt_frame_manager_return_vtos = NULL; ++ ++const Register prevState = G1_scratch; ++ ++void InterpreterGenerator::save_native_result(void) { ++ Unimplemented(); ++} ++ ++void InterpreterGenerator::restore_native_result(void) { ++ Unimplemented(); ++} ++ ++// A result handler converts/unboxes a native call result into ++// a java interpreter/compiler result. The current frame is an ++// interpreter frame. The activation frame unwind code must be ++// consistent with that of TemplateTable::_return(...). In the ++// case of native methods, the caller's SP was not modified. ++address CppInterpreterGenerator::generate_result_handler_for(BasicType type) { ++ Unimplemented(); ++} ++ ++address CppInterpreterGenerator::generate_tosca_to_stack_converter(BasicType type) { ++ Unimplemented(); ++} ++ ++address CppInterpreterGenerator::generate_stack_to_stack_converter(BasicType type) { ++ Unimplemented(); ++} ++ ++address CppInterpreterGenerator::generate_stack_to_native_abi_converter(BasicType type) { ++ Unimplemented(); ++} ++ ++address CppInterpreter::return_entry(TosState state, int length) { ++ Unimplemented(); ++} ++ ++address CppInterpreter::deopt_entry(TosState state, int length) { ++ Unimplemented(); ++} ++ ++void InterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue) { ++ Unimplemented(); ++} ++ ++address InterpreterGenerator::generate_empty_entry(void) { ++ Unimplemented(); ++} ++ ++address InterpreterGenerator::generate_accessor_entry(void) { ++ Unimplemented(); ++} ++ ++address InterpreterGenerator::generate_native_entry(bool synchronized) { ++ Unimplemented(); ++} ++ ++void CppInterpreterGenerator::generate_compute_interpreter_state(const Register state, ++ const Register prev_state, ++ bool native) { ++ Unimplemented(); ++} ++ ++void InterpreterGenerator::lock_method(void) { ++ Unimplemented(); ++} ++ ++void CppInterpreterGenerator::generate_deopt_handling() { ++ Unimplemented(); ++} ++ ++void CppInterpreterGenerator::generate_more_monitors() { ++ Unimplemented(); ++} ++ ++ ++static address interpreter_frame_manager = NULL; ++ ++void CppInterpreterGenerator::adjust_callers_stack(Register args) { ++ Unimplemented(); ++} ++ ++address InterpreterGenerator::generate_normal_entry(bool synchronized) { ++ Unimplemented(); ++} ++ ++InterpreterGenerator::InterpreterGenerator(StubQueue* code) ++ : CppInterpreterGenerator(code) { ++ Unimplemented(); ++} ++ ++ ++static int size_activation_helper(int callee_extra_locals, int max_stack, int monitor_size) { ++ Unimplemented(); ++} ++ ++int AbstractInterpreter::size_top_interpreter_activation(methodOop method) { ++ Unimplemented(); ++} ++ ++void BytecodeInterpreter::layout_interpreterState(interpreterState to_fill, ++ frame* caller, ++ frame* current, ++ methodOop method, ++ intptr_t* locals, ++ intptr_t* stack, ++ intptr_t* stack_base, ++ intptr_t* monitor_base, ++ intptr_t* frame_bottom, ++ bool is_top_frame ++ ) ++{ ++ Unimplemented(); ++} ++ ++void BytecodeInterpreter::pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp) { ++ Unimplemented(); ++} ++ ++ ++int AbstractInterpreter::layout_activation(methodOop method, ++ int tempcount, // Number of slots on java expression stack in use ++ int popframe_extra_args, ++ int moncount, // Number of active monitors ++ int callee_param_size, ++ int callee_locals_size, ++ frame* caller, ++ frame* interpreter_frame, ++ bool is_top_frame) { ++ Unimplemented(); ++} ++ ++#endif // CC_INTERP +diff --git a/hotspot/src/cpu/mips/vm/cppInterpreter_mips.hpp b/hotspot/src/cpu/mips/vm/cppInterpreter_mips.hpp +new file mode 100644 +index 0000000000..49c4733049 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/cppInterpreter_mips.hpp +@@ -0,0 +1,40 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_CPPINTERPRETER_MIPS_HPP ++#define CPU_MIPS_VM_CPPINTERPRETER_MIPS_HPP ++ // Size of interpreter code. Increase if too small. Interpreter will ++ // fail with a guarantee ("not enough space for interpreter generation"); ++ // if too small. ++ // Run with +PrintInterpreter to get the VM to print out the size. ++ // Max size with JVMTI and TaggedStackInterpreter ++ ++ // QQQ this is proably way too large for c++ interpreter ++ ++ // The sethi() instruction generates lots more instructions when shell ++ // stack limit is unlimited, so that's why this is much bigger. ++ const static int InterpreterCodeSize = 210 * K; ++ ++#endif // CPU_MIPS_VM_CPPINTERPRETER_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/debug_mips.cpp b/hotspot/src/cpu/mips/vm/debug_mips.cpp +new file mode 100644 +index 0000000000..50de03653b +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/debug_mips.cpp +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "code/codeCache.hpp" ++#include "code/nmethod.hpp" ++#include "runtime/frame.hpp" ++#include "runtime/init.hpp" ++#include "runtime/os.hpp" ++#include "utilities/debug.hpp" ++#include "utilities/top.hpp" ++ ++#ifndef PRODUCT ++ ++void pd_ps(frame f) { ++ intptr_t* sp = f.sp(); ++ intptr_t* prev_sp = sp - 1; ++ intptr_t *pc = NULL; ++ intptr_t *next_pc = NULL; ++ int count = 0; ++ tty->print("register window backtrace from %#lx:\n", p2i(sp)); ++} ++ ++// This function is used to add platform specific info ++// to the error reporting code. ++ ++void pd_obfuscate_location(char *buf,int buflen) {} ++ ++#endif // PRODUCT +diff --git a/hotspot/src/cpu/mips/vm/depChecker_mips.cpp b/hotspot/src/cpu/mips/vm/depChecker_mips.cpp +new file mode 100644 +index 0000000000..756ccb68f9 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/depChecker_mips.cpp +@@ -0,0 +1,30 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "compiler/disassembler.hpp" ++#include "depChecker_mips.hpp" ++ ++// Nothing to do on mips +diff --git a/hotspot/src/cpu/mips/vm/depChecker_mips.hpp b/hotspot/src/cpu/mips/vm/depChecker_mips.hpp +new file mode 100644 +index 0000000000..11e52b4e8f +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/depChecker_mips.hpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_DEPCHECKER_MIPS_HPP ++#define CPU_MIPS_VM_DEPCHECKER_MIPS_HPP ++ ++// Nothing to do on MIPS ++ ++#endif // CPU_MIPS_VM_DEPCHECKER_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/disassembler_mips.hpp b/hotspot/src/cpu/mips/vm/disassembler_mips.hpp +new file mode 100644 +index 0000000000..c5f3a8888d +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/disassembler_mips.hpp +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP ++#define CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP ++ ++ static int pd_instruction_alignment() { ++ return sizeof(int); ++ } ++ ++ static const char* pd_cpu_opts() { ++ return "gpr-names=64"; ++ } ++ ++#endif // CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/frame_mips.cpp b/hotspot/src/cpu/mips/vm/frame_mips.cpp +new file mode 100644 +index 0000000000..1c928976fc +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/frame_mips.cpp +@@ -0,0 +1,711 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/markOop.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/monitorChunk.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "vmreg_mips.inline.hpp" ++ ++#ifdef ASSERT ++void RegisterMap::check_location_valid() { ++} ++#endif ++ ++ ++// Profiling/safepoint support ++// for Profiling - acting on another frame. walks sender frames ++// if valid. ++// frame profile_find_Java_sender_frame(JavaThread *thread); ++ ++bool frame::safe_for_sender(JavaThread *thread) { ++ address sp = (address)_sp; ++ address fp = (address)_fp; ++ address unextended_sp = (address)_unextended_sp; ++ ++ // consider stack guards when trying to determine "safe" stack pointers ++ static size_t stack_guard_size = os::uses_stack_guard_pages() ? (StackYellowPages + StackRedPages) * os::vm_page_size() : 0; ++ size_t usable_stack_size = thread->stack_size() - stack_guard_size; ++ ++ // sp must be within the usable part of the stack (not in guards) ++ bool sp_safe = (sp < thread->stack_base()) && ++ (sp >= thread->stack_base() - usable_stack_size); ++ ++ ++ if (!sp_safe) { ++ return false; ++ } ++ ++ // unextended sp must be within the stack and above or equal sp ++ bool unextended_sp_safe = (unextended_sp < thread->stack_base()) && ++ (unextended_sp >= sp); ++ ++ if (!unextended_sp_safe) { ++ return false; ++ } ++ ++ // an fp must be within the stack and above (but not equal) sp ++ // second evaluation on fp+ is added to handle situation where fp is -1 ++ bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); ++ ++ // We know sp/unextended_sp are safe only fp is questionable here ++ ++ // If the current frame is known to the code cache then we can attempt to ++ // construct the sender and do some validation of it. This goes a long way ++ // toward eliminating issues when we get in frame construction code ++ ++ if (_cb != NULL ) { ++ ++ // First check if frame is complete and tester is reliable ++ // Unfortunately we can only check frame complete for runtime stubs and nmethod ++ // other generic buffer blobs are more problematic so we just assume they are ++ // ok. adapter blobs never have a frame complete and are never ok. ++ ++ if (!_cb->is_frame_complete_at(_pc)) { ++ if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { ++ return false; ++ } ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!_cb->code_contains(_pc)) { ++ return false; ++ } ++ ++ // Entry frame checks ++ if (is_entry_frame()) { ++ // an entry frame must have a valid fp. ++ return fp_safe && is_entry_frame_valid(thread); ++ } ++ ++ intptr_t* sender_sp = NULL; ++ intptr_t* sender_unextended_sp = NULL; ++ address sender_pc = NULL; ++ intptr_t* saved_fp = NULL; ++ ++ if (is_interpreted_frame()) { ++ // fp must be safe ++ if (!fp_safe) { ++ return false; ++ } ++ ++ sender_pc = (address) this->fp()[return_addr_offset]; ++ // for interpreted frames, the value below is the sender "raw" sp, ++ // which can be different from the sender unextended sp (the sp seen ++ // by the sender) because of current frame local variables ++ sender_sp = (intptr_t*) addr_at(sender_sp_offset); ++ sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; ++ saved_fp = (intptr_t*) this->fp()[link_offset]; ++ ++ } else { ++ // must be some sort of compiled/runtime frame ++ // fp does not have to be safe (although it could be check for c1?) ++ ++ // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc ++ if (_cb->frame_size() <= 0) { ++ return false; ++ } ++ ++ sender_sp = _unextended_sp + _cb->frame_size(); ++ sender_unextended_sp = sender_sp; ++ // On MIPS the return_address is always the word on the stack ++ sender_pc = (address) *(sender_sp-1); ++ // Note: frame::sender_sp_offset is only valid for compiled frame ++ saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset); ++ } ++ ++ ++ // If the potential sender is the interpreter then we can do some more checking ++ if (Interpreter::contains(sender_pc)) { ++ ++ // FP is always saved in a recognizable place in any code we generate. However ++ // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP ++ // is really a frame pointer. ++ ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { ++ return false; ++ } ++ ++ // construct the potential sender ++ ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ return sender.is_interpreted_frame_valid(thread); ++ ++ } ++ ++ // We must always be able to find a recognizable pc ++ CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); ++ if (sender_pc == NULL || sender_blob == NULL) { ++ return false; ++ } ++ ++ // Could be a zombie method ++ if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { ++ return false; ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!sender_blob->code_contains(sender_pc)) { ++ return false; ++ } ++ ++ // We should never be able to see an adapter if the current frame is something from code cache ++ if (sender_blob->is_adapter_blob()) { ++ return false; ++ } ++ ++ // Could be the call_stub ++ if (StubRoutines::returns_to_call_stub(sender_pc)) { ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { ++ return false; ++ } ++ ++ // construct the potential sender ++ ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ // Validate the JavaCallWrapper an entry frame must have ++ address jcw = (address)sender.entry_frame_call_wrapper(); ++ ++ bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > (address)sender.fp()); ++ ++ return jcw_safe; ++ } ++ ++ if (sender_blob->is_nmethod()) { ++ nmethod* nm = sender_blob->as_nmethod_or_null(); ++ if (nm != NULL) { ++ if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) { ++ return false; ++ } ++ } ++ } ++ ++ // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size ++ // because the return address counts against the callee's frame. ++ ++ if (sender_blob->frame_size() <= 0) { ++ assert(!sender_blob->is_nmethod(), "should count return address at least"); ++ return false; ++ } ++ ++ // We should never be able to see anything here except an nmethod. If something in the ++ // code cache (current frame) is called by an entity within the code cache that entity ++ // should not be anything but the call stub (already covered), the interpreter (already covered) ++ // or an nmethod. ++ ++ if (!sender_blob->is_nmethod()) { ++ return false; ++ } ++ ++ // Could put some more validation for the potential non-interpreted sender ++ // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... ++ ++ // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb ++ ++ // We've validated the potential sender that would be created ++ return true; ++ } ++ // Note: fp == NULL is not really a prerequisite for this to be safe to ++ // walk for c2. However we've modified the code such that if we get ++ // a failure with fp != NULL that we then try with FP == NULL. ++ // This is basically to mimic what a last_frame would look like if ++ // c2 had generated it. ++ ++ // Must be native-compiled frame. Since sender will try and use fp to find ++ // linkages it must be safe ++ ++ if (!fp_safe) { ++ return false; ++ } ++ ++ // Will the pc we fetch be non-zero (which we'll find at the oldest frame) ++ ++ if ( (address) this->fp()[return_addr_offset] == NULL) return false; ++ ++ ++ // could try and do some more potential verification of native frame if we could think of some... ++ ++ return true; ++ ++} ++ ++void frame::patch_pc(Thread* thread, address pc) { ++ assert(_cb == CodeCache::find_blob(pc), "unexpected pc"); ++ address* pc_addr = &(((address*) sp())[-1]); ++ if (TracePcPatching) { ++ tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", ++ p2i(pc_addr), p2i(*pc_addr), p2i(pc)); ++ } ++ ++ // Either the return address is the original one or we are going to ++ // patch in the same address that's already there. ++ assert(_pc == *pc_addr || pc == *pc_addr, "must be"); ++ *pc_addr = pc; ++ _cb = CodeCache::find_blob(pc); ++ address original_pc = nmethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ assert(original_pc == _pc, "expected original PC to be stored before patching"); ++ _deopt_state = is_deoptimized; ++ // leave _pc as is ++ } else { ++ _deopt_state = not_deoptimized; ++ _pc = pc; ++ } ++} ++ ++bool frame::is_interpreted_frame() const { ++ return Interpreter::contains(pc()); ++} ++ ++int frame::frame_size(RegisterMap* map) const { ++ frame sender = this->sender(map); ++ return sender.sp() - sp(); ++} ++ ++intptr_t* frame::entry_frame_argument_at(int offset) const { ++ // convert offset to index to deal with tsi ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ // Entry frame's arguments are always in relation to unextended_sp() ++ return &unextended_sp()[index]; ++} ++ ++// sender_sp ++#ifdef CC_INTERP ++intptr_t* frame::interpreter_frame_sender_sp() const { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ // QQQ why does this specialize method exist if frame::sender_sp() does same thing? ++ // seems odd and if we always know interpreted vs. non then sender_sp() is really ++ // doing too much work. ++ return get_interpreterState()->sender_sp(); ++} ++ ++// monitor elements ++ ++BasicObjectLock* frame::interpreter_frame_monitor_begin() const { ++ return get_interpreterState()->monitor_base(); ++} ++ ++BasicObjectLock* frame::interpreter_frame_monitor_end() const { ++ return (BasicObjectLock*) get_interpreterState()->stack_base(); ++} ++ ++#else // CC_INTERP ++ ++intptr_t* frame::interpreter_frame_sender_sp() const { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ return (intptr_t*) at(interpreter_frame_sender_sp_offset); ++} ++ ++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); ++} ++ ++ ++// monitor elements ++ ++BasicObjectLock* frame::interpreter_frame_monitor_begin() const { ++ return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); ++} ++ ++BasicObjectLock* frame::interpreter_frame_monitor_end() const { ++ BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); ++ // make sure the pointer points inside the frame ++ assert((intptr_t) fp() > (intptr_t) result, "result must < than frame pointer"); ++ assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer"); ++ return result; ++} ++ ++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { ++ *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; ++} ++ ++// Used by template based interpreter deoptimization ++void frame::interpreter_frame_set_last_sp(intptr_t* sp) { ++ *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp; ++} ++#endif // CC_INTERP ++ ++frame frame::sender_for_entry_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); ++ assert(!entry_frame_is_first(), "next Java fp must be non zero"); ++ assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); ++ map->clear(); ++ assert(map->include_argument_oops(), "should be set by clear"); ++ if (jfa->last_Java_pc() != NULL ) { ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); ++ return fr; ++ } ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp()); ++ return fr; ++} ++ ++frame frame::sender_for_interpreter_frame(RegisterMap* map) const { ++ // sp is the raw sp from the sender after adapter or interpreter extension ++ intptr_t* sender_sp = this->sender_sp(); ++ ++ // This is the sp before any possible extension (adapter/locals). ++ intptr_t* unextended_sp = interpreter_frame_sender_sp(); ++ ++ // The interpreter and compiler(s) always save FP in a known ++ // location on entry. We must record where that location is ++ // so this if FP was live on callout from c2 we can find ++ // the saved copy no matter what it called. ++ ++ // Since the interpreter always saves FP if we record where it is then ++ // we don't have to always save FP on entry and exit to c2 compiled ++ // code, on entry will be enough. ++#ifdef COMPILER2 ++ if (map->update_map()) { ++ update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); ++ } ++#endif /* COMPILER2 */ ++ return frame(sender_sp, unextended_sp, link(), sender_pc()); ++} ++ ++ ++//------------------------------------------------------------------------------ ++// frame::verify_deopt_original_pc ++// ++// Verifies the calculated original PC of a deoptimization PC for the ++// given unextended SP. The unextended SP might also be the saved SP ++// for MethodHandle call sites. ++#ifdef ASSERT ++void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) { ++ frame fr; ++ ++ // This is ugly but it's better than to change {get,set}_original_pc ++ // to take an SP value as argument. And it's only a debugging ++ // method anyway. ++ fr._unextended_sp = unextended_sp; ++ ++ address original_pc = nm->get_original_pc(&fr); ++ assert(nm->insts_contains(original_pc), "original PC must be in nmethod"); ++ assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be"); ++} ++#endif ++ ++ ++//------------------------------------------------------------------------------ ++// frame::adjust_unextended_sp ++void frame::adjust_unextended_sp() { ++ // On MIPS, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. ++ ++ nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null(); ++ if (sender_nm != NULL) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (sender_nm->is_deopt_entry(_pc) || ++ sender_nm->is_deopt_mh_entry(_pc)) { ++ DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp)); ++ } ++ } ++} ++ ++//------------------------------------------------------------------------------ ++// frame::update_map_with_saved_link ++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { ++ // The interpreter and compiler(s) always save fp in a known ++ // location on entry. We must record where that location is ++ // so that if fp was live on callout from c2 we can find ++ // the saved copy no matter what it called. ++ ++ // Since the interpreter always saves fp if we record where it is then ++ // we don't have to always save fp on entry and exit to c2 compiled ++ // code, on entry will be enough. ++ map->set_location(FP->as_VMReg(), (address) link_addr); ++ // this is weird "H" ought to be at a higher address however the ++ // oopMaps seems to have the "H" regs at the same address and the ++ // vanilla register. ++ // XXXX make this go away ++ if (true) { ++ map->set_location(FP->as_VMReg()->next(), (address) link_addr); ++ } ++} ++ ++//------------------------------sender_for_compiled_frame----------------------- ++frame frame::sender_for_compiled_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ ++ // frame owned by optimizing compiler ++ assert(_cb->frame_size() >= 0, "must have non-zero frame size"); ++ ++ intptr_t* sender_sp = unextended_sp() + _cb->frame_size(); ++ intptr_t* unextended_sp = sender_sp; ++ ++#ifdef ASSERT ++ const bool c1_compiled = _cb->is_compiled_by_c1(); ++ bool native = _cb->is_nmethod() && ((nmethod*)_cb)->is_native_method(); ++ if (c1_compiled && native) { ++ assert(sender_sp == fp() + frame::sender_sp_offset, "incorrect frame size"); ++ } ++#endif // ASSERT ++ // On Intel the return_address is always the word on the stack ++ // the fp in compiler points to sender fp, but in interpreter, fp points to return address, ++ // so getting sender for compiled frame is not same as interpreter frame. ++ // we hard code here temporarily ++ // spark ++ address sender_pc = (address) *(sender_sp-1); ++ ++ intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset); ++ ++ if (map->update_map()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); ++ if (_cb->oop_maps() != NULL) { ++ OopMapSet::update_register_map(this, map); ++ } ++ ++ // Since the prolog does the save and restore of epb there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ update_map_with_saved_link(map, saved_fp_addr); ++ } ++ assert(sender_sp != sp(), "must have changed"); ++ return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc); ++} ++ ++frame frame::sender(RegisterMap* map) const { ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ map->set_include_argument_oops(false); ++ ++ if (is_entry_frame()) return sender_for_entry_frame(map); ++ if (is_interpreted_frame()) return sender_for_interpreter_frame(map); ++ assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); ++ ++ if (_cb != NULL) { ++ return sender_for_compiled_frame(map); ++ } ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return frame(sender_sp(), link(), sender_pc()); ++} ++ ++ ++bool frame::interpreter_frame_equals_unpacked_fp(intptr_t* fp) { ++ assert(is_interpreted_frame(), "must be interpreter frame"); ++ Method* method = interpreter_frame_method(); ++ // When unpacking an optimized frame the frame pointer is ++ // adjusted with: ++ int diff = (method->max_locals() - method->size_of_parameters()) * ++ Interpreter::stackElementWords; ++ printf("^^^^^^^^^^^^^^^adjust fp in deopt fp = 0%lx \n", (intptr_t)(fp - diff)); ++ return _fp == (fp - diff); ++} ++ ++void frame::pd_gc_epilog() { ++ // nothing done here now ++} ++ ++bool frame::is_interpreted_frame_valid(JavaThread* thread) const { ++// QQQ ++#ifdef CC_INTERP ++#else ++ assert(is_interpreted_frame(), "Not an interpreted frame"); ++ // These are reasonable sanity checks ++ if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (fp() + interpreter_frame_initial_sp_offset < sp()) { ++ return false; ++ } ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (fp() <= sp()) { // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ // do some validation of frame elements ++ ++ // first the method ++ ++ Method* m = *interpreter_frame_method_addr(); ++ ++ // validate the method we'd find in this potential sender ++ if (!m->is_valid_method()) return false; ++ ++ // stack frames shouldn't be much larger than max_stack elements ++ ++ //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) { ++ if (fp() - sp() > 4096) { // stack frames shouldn't be large. ++ return false; ++ } ++ ++ // validate bci/bcx ++ ++ intptr_t bcx = interpreter_frame_bcx(); ++ if (m->validate_bci_from_bcx(bcx) < 0) { ++ return false; ++ } ++ ++ // validate ConstantPoolCache* ++ ++ ConstantPoolCache* cp = *interpreter_frame_cache_addr(); ++ ++ if (cp == NULL || !cp->is_metaspace_object()) return false; ++ ++ // validate locals ++ ++ address locals = (address) *interpreter_frame_locals_addr(); ++ ++ if (locals > thread->stack_base() || locals < (address) fp()) return false; ++ ++ // We'd have to be pretty unlucky to be mislead at this point ++ ++#endif // CC_INTERP ++ return true; ++} ++ ++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { ++#ifdef CC_INTERP ++ // Needed for JVMTI. The result should always be in the interpreterState object ++ assert(false, "NYI"); ++ interpreterState istate = get_interpreterState(); ++#endif // CC_INTERP ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ Method* method = interpreter_frame_method(); ++ BasicType type = method->result_type(); ++ ++ intptr_t* tos_addr; ++ if (method->is_native()) { ++ // Prior to calling into the runtime to report the method_exit the possible ++ // return value is pushed to the native stack. If the result is a jfloat/jdouble ++ // then ST0 is saved. See the note in generate_native_result ++ tos_addr = (intptr_t*)sp(); ++ if (type == T_FLOAT || type == T_DOUBLE) { ++ tos_addr += 2; ++ } ++ } else { ++ tos_addr = (intptr_t*)interpreter_frame_tos_address(); ++ } ++ ++ switch (type) { ++ case T_OBJECT : ++ case T_ARRAY : { ++ oop obj; ++ if (method->is_native()) { ++#ifdef CC_INTERP ++ obj = istate->_oop_temp; ++#else ++ obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); ++#endif // CC_INTERP ++ } else { ++ oop* obj_p = (oop*)tos_addr; ++ obj = (obj_p == NULL) ? (oop)NULL : *obj_p; ++ } ++ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); ++ *oop_result = obj; ++ break; ++ } ++ case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; ++ case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; ++ case T_CHAR : value_result->c = *(jchar*)tos_addr; break; ++ case T_SHORT : value_result->s = *(jshort*)tos_addr; break; ++ case T_INT : value_result->i = *(jint*)tos_addr; break; ++ case T_LONG : value_result->j = *(jlong*)tos_addr; break; ++ case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break; ++ case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; ++ case T_VOID : /* Nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ return type; ++} ++ ++ ++intptr_t* frame::interpreter_frame_tos_at(jint offset) const { ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ return &interpreter_frame_tos_address()[index]; ++} ++ ++#ifndef PRODUCT ++ ++#define DESCRIBE_FP_OFFSET(name) \ ++ values.describe(frame_no, fp() + frame::name##_offset, #name) ++ ++void frame::describe_pd(FrameValues& values, int frame_no) { ++ if (is_interpreted_frame()) { ++ DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_method); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mdx); ++ DESCRIBE_FP_OFFSET(interpreter_frame_cache); ++ DESCRIBE_FP_OFFSET(interpreter_frame_locals); ++ DESCRIBE_FP_OFFSET(interpreter_frame_bcx); ++ DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); ++ } ++} ++#endif ++ ++intptr_t *frame::initial_deoptimization_info() { ++ // used to reset the saved FP ++ return fp(); ++} ++ ++intptr_t* frame::real_fp() const { ++ if (_cb != NULL) { ++ // use the frame size if valid ++ int size = _cb->frame_size(); ++ if (size > 0) { ++ return unextended_sp() + size; ++ } ++ } ++ // else rely on fp() ++ assert(! is_compiled_frame(), "unknown compiled frame size"); ++ return fp(); ++} ++ ++#ifndef PRODUCT ++// This is a generic constructor which is only used by pns() in debug.cpp. ++frame::frame(void* sp, void* fp, void* pc) { ++ init((intptr_t*)sp, (intptr_t*)fp, (address)pc); ++} ++#endif +diff --git a/hotspot/src/cpu/mips/vm/frame_mips.hpp b/hotspot/src/cpu/mips/vm/frame_mips.hpp +new file mode 100644 +index 0000000000..9e684a8dc3 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/frame_mips.hpp +@@ -0,0 +1,229 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_FRAME_MIPS_HPP ++#define CPU_MIPS_VM_FRAME_MIPS_HPP ++ ++#include "runtime/synchronizer.hpp" ++#include "utilities/top.hpp" ++ ++// A frame represents a physical stack frame (an activation). Frames can be ++// C or Java frames, and the Java frames can be interpreted or compiled. ++// In contrast, vframes represent source-level activations, so that one physical frame ++// can correspond to multiple source level frames because of inlining. ++// A frame is comprised of {pc, fp, sp} ++// ------------------------------ Asm interpreter ---------------------------------------- ++// Layout of asm interpreter frame: ++// [expression stack ] * <- sp ++// [monitors ] \ ++// ... | monitor block size ++// [monitors ] / ++// [monitor block size ] ++// [byte code index/pointr] = bcx() bcx_offset ++// [pointer to locals ] = locals() locals_offset ++// [constant pool cache ] = cache() cache_offset ++// [methodData ] = mdp() mdx_offset ++// [methodOop ] = method() method_offset ++// [last sp ] = last_sp() last_sp_offset ++// [old stack pointer ] (sender_sp) sender_sp_offset ++// [old frame pointer ] <- fp = link() ++// [return pc ] ++// [oop temp ] (only for native calls) ++// [locals and parameters ] ++// <- sender sp ++// ------------------------------ Asm interpreter ---------------------------------------- ++ ++// ------------------------------ C++ interpreter ---------------------------------------- ++// ++// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run) ++// ++// <- SP (current sp) ++// [local variables ] BytecodeInterpreter::run local variables ++// ... BytecodeInterpreter::run local variables ++// [local variables ] BytecodeInterpreter::run local variables ++// [old frame pointer ] fp [ BytecodeInterpreter::run's fp ] ++// [return pc ] (return to frame manager) ++// [interpreter_state* ] (arg to BytecodeInterpreter::run) -------------- ++// [expression stack ] <- last_Java_sp | ++// [... ] * <- interpreter_state.stack | ++// [expression stack ] * <- interpreter_state.stack_base | ++// [monitors ] \ | ++// ... | monitor block size | ++// [monitors ] / <- interpreter_state.monitor_base | ++// [struct interpretState ] <-----------------------------------------| ++// [return pc ] (return to callee of frame manager [1] ++// [locals and parameters ] ++// <- sender sp ++ ++// [1] When the c++ interpreter calls a new method it returns to the frame ++// manager which allocates a new frame on the stack. In that case there ++// is no real callee of this newly allocated frame. The frame manager is ++// aware of the additional frame(s) and will pop them as nested calls ++// complete. Howevers tTo make it look good in the debugger the frame ++// manager actually installs a dummy pc pointing to RecursiveInterpreterActivation ++// with a fake interpreter_state* parameter to make it easy to debug ++// nested calls. ++ ++// Note that contrary to the layout for the assembly interpreter the ++// expression stack allocated for the C++ interpreter is full sized. ++// However this is not as bad as it seems as the interpreter frame_manager ++// will truncate the unused space on succesive method calls. ++// ++// ------------------------------ C++ interpreter ---------------------------------------- ++ ++// Layout of interpreter frame: ++// ++// [ monitor entry ] <--- sp ++// ... ++// [ monitor entry ] ++// -9 [ monitor block top ] ( the top monitor entry ) ++// -8 [ byte code pointer ] (if native, bcp = 0) ++// -7 [ constant pool cache ] ++// -6 [ methodData ] mdx_offset(not core only) ++// -5 [ mirror ] ++// -4 [ methodOop ] ++// -3 [ locals offset ] ++// -2 [ last_sp ] ++// -1 [ sender's sp ] ++// 0 [ sender's fp ] <--- fp ++// 1 [ return address ] ++// 2 [ oop temp offset ] (only for native calls) ++// 3 [ result handler offset ] (only for native calls) ++// 4 [ result type info ] (only for native calls) ++// [ local var m-1 ] ++// ... ++// [ local var 0 ] ++// [ argumnet word n-1 ] <--- ( sender's sp ) ++// ... ++// [ argument word 0 ] <--- S7 ++ ++ public: ++ enum { ++ pc_return_offset = 0, ++ // All frames ++ link_offset = 0, ++ return_addr_offset = 1, ++ // non-interpreter frames ++ sender_sp_offset = 2, ++ ++#ifndef CC_INTERP ++ ++ // Interpreter frames ++ interpreter_frame_return_addr_offset = 1, ++ interpreter_frame_result_handler_offset = 3, // for native calls only ++ interpreter_frame_oop_temp_offset = 2, // for native calls only ++ ++ interpreter_frame_sender_fp_offset = 0, ++ interpreter_frame_sender_sp_offset = -1, ++ // outgoing sp before a call to an invoked method ++ interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, ++ interpreter_frame_locals_offset = interpreter_frame_last_sp_offset - 1, ++ interpreter_frame_method_offset = interpreter_frame_locals_offset - 1, ++ interpreter_frame_mdx_offset = interpreter_frame_method_offset - 1, ++ interpreter_frame_cache_offset = interpreter_frame_mdx_offset - 1, ++ interpreter_frame_bcx_offset = interpreter_frame_cache_offset - 1, ++ interpreter_frame_initial_sp_offset = interpreter_frame_bcx_offset - 1, ++ ++ interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, ++ interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, ++ ++#endif // CC_INTERP ++ ++ // Entry frames ++ entry_frame_call_wrapper_offset = -9, ++ ++ // Native frames ++ ++ native_frame_initial_param_offset = 2 ++ ++ }; ++ ++ intptr_t ptr_at(int offset) const { ++ return *ptr_at_addr(offset); ++ } ++ ++ void ptr_at_put(int offset, intptr_t value) { ++ *ptr_at_addr(offset) = value; ++ } ++ ++ private: ++ // an additional field beyond _sp and _pc: ++ intptr_t* _fp; // frame pointer ++ // The interpreter and adapters will extend the frame of the caller. ++ // Since oopMaps are based on the sp of the caller before extension ++ // we need to know that value. However in order to compute the address ++ // of the return address we need the real "raw" sp. Since sparc already ++ // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's ++ // original sp we use that convention. ++ ++ intptr_t* _unextended_sp; ++ void adjust_unextended_sp(); ++ ++ intptr_t* ptr_at_addr(int offset) const { ++ return (intptr_t*) addr_at(offset); ++ } ++#ifdef ASSERT ++ // Used in frame::sender_for_{interpreter,compiled}_frame ++ static void verify_deopt_original_pc( nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false); ++ static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) { ++ verify_deopt_original_pc(nm, unextended_sp, true); ++ } ++#endif ++ ++ public: ++ // Constructors ++ ++ frame(intptr_t* sp, intptr_t* fp, address pc); ++ ++ frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc); ++ ++ frame(intptr_t* sp, intptr_t* fp); ++ ++ void init(intptr_t* sp, intptr_t* fp, address pc); ++ ++ // accessors for the instance variables ++ intptr_t* fp() const { return _fp; } ++ ++ inline address* sender_pc_addr() const; ++ ++ // return address of param, zero origin index. ++ inline address* native_param_addr(int idx) const; ++ ++ // expression stack tos if we are nested in a java call ++ intptr_t* interpreter_frame_last_sp() const; ++ ++ // helper to update a map with callee-saved FP ++ static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); ++ ++#ifndef CC_INTERP ++ // deoptimization support ++ void interpreter_frame_set_last_sp(intptr_t* sp); ++#endif // CC_INTERP ++ ++#ifdef CC_INTERP ++ inline interpreterState get_interpreterState() const; ++#endif // CC_INTERP ++ ++#endif // CPU_MIPS_VM_FRAME_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/frame_mips.inline.hpp b/hotspot/src/cpu/mips/vm/frame_mips.inline.hpp +new file mode 100644 +index 0000000000..60e56ac7ab +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/frame_mips.inline.hpp +@@ -0,0 +1,312 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP ++ ++#include "code/codeCache.hpp" ++ ++// Inline functions for Loongson frames: ++ ++// Constructors: ++ ++inline frame::frame() { ++ _pc = NULL; ++ _sp = NULL; ++ _unextended_sp = NULL; ++ _fp = NULL; ++ _cb = NULL; ++ _deopt_state = unknown; ++} ++ ++inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) { ++ _sp = sp; ++ _unextended_sp = sp; ++ _fp = fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = nmethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { ++ init(sp, fp, pc); ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) { ++ _sp = sp; ++ _unextended_sp = unextended_sp; ++ _fp = fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = nmethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* fp) { ++ _sp = sp; ++ _unextended_sp = sp; ++ _fp = fp; ++ _pc = (address)(sp[-1]); ++ ++ // Here's a sticky one. This constructor can be called via AsyncGetCallTrace ++ // when last_Java_sp is non-null but the pc fetched is junk. If we are truly ++ // unlucky the junk value could be to a zombied method and we'll die on the ++ // find_blob call. This is also why we can have no asserts on the validity ++ // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler ++ // -> pd_last_frame should use a specialized version of pd_last_frame which could ++ // call a specilaized frame constructor instead of this one. ++ // Then we could use the assert below. However this assert is of somewhat dubious ++ // value. ++ // assert(_pc != NULL, "no pc?"); ++ ++ _cb = CodeCache::find_blob(_pc); ++ adjust_unextended_sp(); ++ address original_pc = nmethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++// Accessors ++ ++inline bool frame::equal(frame other) const { ++ bool ret = sp() == other.sp() ++ && unextended_sp() == other.unextended_sp() ++ && fp() == other.fp() ++ && pc() == other.pc(); ++ assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); ++ return ret; ++} ++ ++// Return unique id for this frame. The id must have a value where we can distinguish ++// identity and younger/older relationship. NULL represents an invalid (incomparable) ++// frame. ++inline intptr_t* frame::id(void) const { return unextended_sp(); } ++ ++// Relationals on frames based ++// Return true if the frame is younger (more recent activation) than the frame represented by id ++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() < id ; } ++ ++// Return true if the frame is older (less recent activation) than the frame represented by id ++inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() > id ; } ++ ++ ++ ++inline intptr_t* frame::link() const { return (intptr_t*) *(intptr_t **)addr_at(link_offset); } ++inline void frame::set_link(intptr_t* addr) { *(intptr_t **)addr_at(link_offset) = addr; } ++ ++ ++inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } ++ ++// Return address: ++ ++inline address* frame::sender_pc_addr() const { return (address*) addr_at( return_addr_offset); } ++inline address frame::sender_pc() const { return *sender_pc_addr(); } ++ ++// return address of param, zero origin index. ++inline address* frame::native_param_addr(int idx) const { return (address*) addr_at( native_frame_initial_param_offset+idx); } ++ ++#ifdef CC_INTERP ++ ++inline interpreterState frame::get_interpreterState() const { ++ return ((interpreterState)addr_at( -sizeof(BytecodeInterpreter)/wordSize )); ++} ++ ++inline intptr_t* frame::sender_sp() const { ++ // Hmm this seems awfully expensive QQQ, is this really called with interpreted frames? ++ if (is_interpreted_frame()) { ++ assert(false, "should never happen"); ++ return get_interpreterState()->sender_sp(); ++ } else { ++ return addr_at(sender_sp_offset); ++ } ++} ++ ++inline intptr_t** frame::interpreter_frame_locals_addr() const { ++ assert(is_interpreted_frame(), "must be interpreted"); ++ return &(get_interpreterState()->_locals); ++} ++ ++inline intptr_t* frame::interpreter_frame_bcx_addr() const { ++ assert(is_interpreted_frame(), "must be interpreted"); ++ return (intptr_t*) &(get_interpreterState()->_bcp); ++} ++ ++ ++// Constant pool cache ++ ++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { ++ assert(is_interpreted_frame(), "must be interpreted"); ++ return &(get_interpreterState()->_constants); ++} ++ ++// Method ++ ++inline Method** frame::interpreter_frame_method_addr() const { ++ assert(is_interpreted_frame(), "must be interpreted"); ++ return &(get_interpreterState()->_method); ++} ++ ++inline intptr_t* frame::interpreter_frame_mdx_addr() const { ++ assert(is_interpreted_frame(), "must be interpreted"); ++ return (intptr_t*) &(get_interpreterState()->_mdx); ++} ++ ++// top of expression stack ++inline intptr_t* frame::interpreter_frame_tos_address() const { ++ assert(is_interpreted_frame(), "wrong frame type"); ++ return get_interpreterState()->_stack + 1; ++} ++ ++#else // asm interpreter ++inline intptr_t* frame::sender_sp() const { return addr_at( sender_sp_offset); } ++ ++inline intptr_t** frame::interpreter_frame_locals_addr() const { ++ return (intptr_t**)addr_at(interpreter_frame_locals_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_last_sp() const { ++ return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_bcx_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_bcx_offset); ++} ++ ++ ++inline intptr_t* frame::interpreter_frame_mdx_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_mdx_offset); ++} ++ ++ ++ ++// Constant pool cache ++ ++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { ++ return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); ++} ++ ++// Method ++ ++inline Method** frame::interpreter_frame_method_addr() const { ++ return (Method**)addr_at(interpreter_frame_method_offset); ++} ++ ++// top of expression stack ++inline intptr_t* frame::interpreter_frame_tos_address() const { ++ intptr_t* last_sp = interpreter_frame_last_sp(); ++ if (last_sp == NULL ) { ++ return sp(); ++ } else { ++ // sp() may have been extended by an adapter ++ assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos"); ++ return last_sp; ++ } ++} ++ ++inline oop* frame::interpreter_frame_temp_oop_addr() const { ++ return (oop *)(fp() + interpreter_frame_oop_temp_offset); ++} ++ ++#endif // CC_INTERP ++ ++inline int frame::pd_oop_map_offset_adjustment() const { ++ return 0; ++} ++ ++inline int frame::interpreter_frame_monitor_size() { ++ return BasicObjectLock::size(); ++} ++ ++ ++// expression stack ++// (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++inline intptr_t* frame::interpreter_frame_expression_stack() const { ++ intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); ++ return monitor_end-1; ++} ++ ++ ++inline jint frame::interpreter_frame_expression_stack_direction() { return -1; } ++ ++ ++// Entry frames ++ ++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { ++ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); ++} ++ ++// Compiled frames ++ ++inline int frame::local_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) { ++ return (nof_args - local_index + (local_index < nof_args ? 1: -1)); ++} ++ ++inline int frame::monitor_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) { ++ return local_offset_for_compiler(local_index, nof_args, max_nof_locals, max_nof_monitors); ++} ++ ++inline int frame::min_local_offset_for_compiler(int nof_args, int max_nof_locals, int max_nof_monitors) { ++ return (nof_args - (max_nof_locals + max_nof_monitors*2) - 1); ++} ++ ++inline bool frame::volatile_across_calls(Register reg) { ++ return true; ++} ++ ++ ++ ++inline oop frame::saved_oop_result(RegisterMap* map) const { ++ return *((oop*) map->location(V0->as_VMReg())); ++} ++ ++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { ++ *((oop*) map->location(V0->as_VMReg())) = obj; ++} ++ ++#endif // CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP +diff --git a/hotspot/src/cpu/mips/vm/globalDefinitions_mips.hpp b/hotspot/src/cpu/mips/vm/globalDefinitions_mips.hpp +new file mode 100644 +index 0000000000..bd00a8d473 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/globalDefinitions_mips.hpp +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP ++#define CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP ++// Size of MIPS Instructions ++const int BytesPerInstWord = 4; ++ ++const int StackAlignmentInBytes = (2*wordSize); ++ ++// Indicates whether the C calling conventions require that ++// 32-bit integer argument values are properly extended to 64 bits. ++// If set, SharedRuntime::c_calling_convention() must adapt ++// signatures accordingly. ++const bool CCallingConventionRequiresIntsAsLongs = false; ++ ++#define SUPPORTS_NATIVE_CX8 ++ ++#endif // CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/globals_mips.hpp b/hotspot/src/cpu/mips/vm/globals_mips.hpp +new file mode 100644 +index 0000000000..988bc35137 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/globals_mips.hpp +@@ -0,0 +1,124 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_GLOBALS_MIPS_HPP ++#define CPU_MIPS_VM_GLOBALS_MIPS_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++#ifdef CORE ++define_pd_global(bool, UseSSE, 0); ++#endif /* CORE */ ++define_pd_global(bool, ConvertSleepToYield, true); ++define_pd_global(bool, ShareVtableStubs, true); ++define_pd_global(bool, CountInterpCalls, true); ++ ++define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks ++define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on x86. ++define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast ++define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this ++ ++// See 4827828 for this change. There is no globals_core_i486.hpp. I can't ++// assign a different value for C2 without touching a number of files. Use ++// #ifdef to minimize the change as it's late in Mantis. -- FIXME. ++// c1 doesn't have this problem because the fix to 4858033 assures us ++// the the vep is aligned at CodeEntryAlignment whereas c2 only aligns ++// the uep and the vep doesn't get real alignment but just slops on by ++// only assured that the entry instruction meets the 5 byte size requirement. ++define_pd_global(intx, CodeEntryAlignment, 16); ++define_pd_global(intx, OptoLoopAlignment, 16); ++define_pd_global(intx, InlineFrequencyCount, 100); ++define_pd_global(intx, InlineSmallCode, 4000); // MIPS generates 3x instructions than X86 ++ ++define_pd_global(uintx, TLABSize, 0); ++define_pd_global(uintx, NewSize, 1024 * K); ++define_pd_global(intx, PreInflateSpin, 10); ++ ++define_pd_global(intx, PrefetchCopyIntervalInBytes, -1); ++define_pd_global(intx, PrefetchScanIntervalInBytes, -1); ++define_pd_global(intx, PrefetchFieldsAhead, -1); ++ ++define_pd_global(intx, StackYellowPages, 2); ++define_pd_global(intx, StackRedPages, 1); ++define_pd_global(intx, StackShadowPages, 3 DEBUG_ONLY(+1)); ++ ++define_pd_global(bool, RewriteBytecodes, true); ++define_pd_global(bool, RewriteFrequentPairs, true); ++define_pd_global(bool, UseMembar, true); ++// GC Ergo Flags ++define_pd_global(intx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread ++ ++define_pd_global(uintx, TypeProfileLevel, 111); ++ ++define_pd_global(bool, PreserveFramePointer, false); ++// Only c2 cares about this at the moment ++define_pd_global(intx, AllocatePrefetchStyle, 2); ++define_pd_global(intx, AllocatePrefetchDistance, -1); ++ ++#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ ++ \ ++ product(bool, UseLEXT1, false, \ ++ "Use LoongISA general EXTensions 1") \ ++ \ ++ product(bool, UseLEXT2, false, \ ++ "Use LoongISA general EXTensions 2") \ ++ \ ++ product(bool, UseLEXT3, false, \ ++ "Use LoongISA general EXTensions 3") \ ++ \ ++ product(bool, UseCodeCacheAllocOpt, true, \ ++ "Allocate code cache within 32-bit memory address space") \ ++ \ ++ product(intx, UseSyncLevel, 10000, \ ++ "The sync level on Loongson CPUs" \ ++ "UseSyncLevel == 10000, 111, for all Loongson CPUs, " \ ++ "UseSyncLevel == 4000, 101, maybe for GS464V" \ ++ "UseSyncLevel == 3000, 001, maybe for GS464V" \ ++ "UseSyncLevel == 2000, 011, maybe for GS464E/GS264" \ ++ "UseSyncLevel == 1000, 110, maybe for GS464") \ ++ \ ++ develop(bool, UseBoundCheckInstruction, false, \ ++ "Use bound check instruction") \ ++ \ ++ product(intx, SetFSFOFN, 999, \ ++ "Set the FS/FO/FN bits in FCSR" \ ++ "999 means FS/FO/FN will not be changed" \ ++ "=XYZ, with X:FS, Y:FO, Z:FN, X, Y and Z in 0=off, 1=on") \ ++ \ ++ /* assembler */ \ ++ product(bool, UseCountLeadingZerosInstructionMIPS64, true, \ ++ "Use count leading zeros instruction") \ ++ \ ++ product(bool, UseCountTrailingZerosInstructionMIPS64, false, \ ++ "Use count trailing zeros instruction") \ ++ \ ++ product(bool, UseActiveCoresMP, false, \ ++ "Eliminate barriers for single active cpu") ++ ++#endif // CPU_MIPS_VM_GLOBALS_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/icBuffer_mips.cpp b/hotspot/src/cpu/mips/vm/icBuffer_mips.cpp +new file mode 100644 +index 0000000000..96ea345360 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/icBuffer_mips.cpp +@@ -0,0 +1,97 @@ ++/* ++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/icBuffer.hpp" ++#include "gc_interface/collectedHeap.inline.hpp" ++#include "interpreter/bytecodes.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/oop.inline.hpp" ++#include "oops/oop.inline2.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++int InlineCacheBuffer::ic_stub_code_size() { ++ return NativeMovConstReg::instruction_size + ++ NativeGeneralJump::instruction_size + ++ 1; ++ // so that code_end can be set in CodeBuffer ++ // 64bit 15 = 6 + 8 bytes + 1 byte ++ // 32bit 7 = 2 + 4 bytes + 1 byte ++} ++ ++ ++// we use T1 as cached oop(klass) now. this is the target of virtual call, ++// when reach here, the receiver in T0 ++// refer to shareRuntime_mips.cpp,gen_i2c2i_adapters ++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { ++ ResourceMark rm; ++ CodeBuffer code(code_begin, ic_stub_code_size()); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ // note: even though the code contains an embedded oop, we do not need reloc info ++ // because ++ // (1) the oop is old (i.e., doesn't matter for scavenges) ++ // (2) these ICStubs are removed *before* a GC happens, so the roots disappear ++// assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop"); ++#define __ masm-> ++ __ patchable_set48(T1, (long)cached_value); ++ ++ __ patchable_jump(entry_point); ++ __ flush(); ++#undef __ ++} ++ ++ ++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object ++ NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); ++ return jump->jump_destination(); ++} ++ ++ ++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { ++ // creation also verifies the object ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); ++ // Verifies the jump ++ NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); ++ void* o= (void*)move->data(); ++ return o; ++} +diff --git a/hotspot/src/cpu/mips/vm/icache_mips.cpp b/hotspot/src/cpu/mips/vm/icache_mips.cpp +new file mode 100644 +index 0000000000..848964b63f +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/icache_mips.cpp +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "runtime/icache.hpp" ++ ++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) ++{ ++#define __ _masm-> ++ StubCodeMark mark(this, "ICache", "flush_icache_stub"); ++ address start = __ pc(); ++ ++ __ jr_hb(RA); ++ __ delayed()->ori(V0, RA2, 0); ++ ++ *flush_icache_stub = (ICache::flush_icache_stub_t)start; ++#undef __ ++} +diff --git a/hotspot/src/cpu/mips/vm/icache_mips.hpp b/hotspot/src/cpu/mips/vm/icache_mips.hpp +new file mode 100644 +index 0000000000..78ee11cc73 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/icache_mips.hpp +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_ICACHE_MIPS_HPP ++#define CPU_MIPS_VM_ICACHE_MIPS_HPP ++ ++// Interface for updating the instruction cache. Whenever the VM modifies ++// code, part of the processor instruction cache potentially has to be flushed. ++ ++class ICache : public AbstractICache { ++ public: ++ enum { ++ stub_size = 2 * BytesPerInstWord, // Size of the icache flush stub in bytes ++ line_size = 32, // flush instruction affects a dword ++ log2_line_size = 5 // log2(line_size) ++ }; ++}; ++ ++#endif // CPU_MIPS_VM_ICACHE_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/interp_masm_mips_64.cpp b/hotspot/src/cpu/mips/vm/interp_masm_mips_64.cpp +new file mode 100644 +index 0000000000..ed2d931e94 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/interp_masm_mips_64.cpp +@@ -0,0 +1,2084 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interp_masm_mips_64.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/markOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiRedefineClassesTrace.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/thread.inline.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of InterpreterMacroAssembler ++ ++#ifdef CC_INTERP ++void InterpreterMacroAssembler::get_method(Register reg) { ++} ++#endif // CC_INTERP ++ ++void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) { ++ // The runtime address of BCP may be unaligned. ++ // Refer to the SPARC implementation. ++ lbu(reg, BCP, offset+1); ++ lbu(tmp, BCP, offset); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++} ++ ++void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset) { ++ assert(reg != tmp, "need separate temp register"); ++ if (offset & 3) { // Offset unaligned? ++ lbu(reg, BCP, offset+3); ++ lbu(tmp, BCP, offset+2); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++ lbu(tmp, BCP, offset+1); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++ lbu(tmp, BCP, offset); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++ } else { ++ lwu(reg, BCP, offset); ++ } ++} ++ ++#ifndef CC_INTERP ++ ++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, ++ int number_of_arguments) { ++ // interpreter specific ++ // ++ // Note: No need to save/restore bcp & locals (r13 & r14) pointer ++ // since these are callee saved registers and no blocking/ ++ // GC can happen in leaf calls. ++ // Further Note: DO NOT save/restore bcp/locals. If a caller has ++ // already saved them so that it can use BCP/LVP as temporaries ++ // then a save/restore here will DESTROY the copy the caller ++ // saved! There used to be a save_bcp() that only happened in ++ // the ASSERT path (no restore_bcp). Which caused bizarre failures ++ // when jvm built with ASSERTs. ++#ifdef ASSERT ++ save_bcp(); ++ { ++ Label L; ++ ld(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize); ++ beq(AT,R0,L); ++ delayed()->nop(); ++ stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL"); ++ bind(L); ++ } ++#endif ++ // super call ++ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); ++ // interpreter specific ++ // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals ++ // but since they may not have been saved (and we don't want to ++ // save them here (see note above) the assert is invalid. ++} ++ ++void InterpreterMacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ // interpreter specific ++ // ++ // Note: Could avoid restoring locals ptr (callee saved) - however doesn't ++ // really make a difference for these runtime calls, since they are ++ // slow anyway. Btw., bcp must be saved/restored since it may change ++ // due to GC. ++ assert(java_thread == noreg , "not expecting a precomputed java thread"); ++ save_bcp(); ++#ifdef ASSERT ++ { ++ Label L; ++ ld(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL"); ++ bind(L); ++ } ++#endif /* ASSERT */ ++ // super call ++ MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp, ++ entry_point, number_of_arguments, ++ check_exceptions); ++ // interpreter specific ++ restore_bcp(); ++ restore_locals(); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { ++ if (JvmtiExport::can_pop_frame()) { ++ Label L; ++ // Initiate popframe handling only if it is not already being ++ // processed. If the flag has the popframe_processing bit set, it ++ // means that this code is called *during* popframe handling - we ++ // don't want to reenter. ++ // This method is only called just after the call into the vm in ++ // call_VM_base, so the arg registers are available. ++ // Not clear if any other register is available, so load AT twice ++ assert(AT != java_thread, "check"); ++ lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); ++ andi(AT, AT, JavaThread::popframe_pending_bit); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ ++ lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); ++ andi(AT, AT, JavaThread::popframe_processing_bit); ++ bne(AT, R0, L); ++ delayed()->nop(); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); ++ jr(V0); ++ delayed()->nop(); ++ bind(L); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::load_earlyret_value(TosState state) { ++ Register thread = T8; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#else ++ move(T8, TREG); ++#endif ++ ld_ptr(thread, thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ const Address tos_addr (thread, in_bytes(JvmtiThreadState::earlyret_tos_offset())); ++ const Address oop_addr (thread, in_bytes(JvmtiThreadState::earlyret_oop_offset())); ++ const Address val_addr (thread, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ //V0, oop_addr,V1,val_addr ++ switch (state) { ++ case atos: ++ ld_ptr(V0, oop_addr); ++ st_ptr(R0, oop_addr); ++ verify_oop(V0, state); ++ break; ++ case ltos: ++ ld_ptr(V0, val_addr); // fall through ++ break; ++ case btos: // fall through ++ case ztos: // fall through ++ case ctos: // fall through ++ case stos: // fall through ++ case itos: ++ lw(V0, val_addr); ++ break; ++ case ftos: ++ lwc1(F0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ break; ++ case dtos: ++ ldc1(F0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ break; ++ case vtos: /* nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ // Clean up tos value in the thread object ++ move(AT, (int)ilgl); ++ sw(AT, tos_addr); ++ sw(R0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { ++ if (JvmtiExport::can_force_early_return()) { ++ Label L; ++ Register tmp = T9; ++ ++ assert(java_thread != AT, "check"); ++ assert(java_thread != tmp, "check"); ++ ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ ++ // Initiate earlyret handling only if it is not already being processed. ++ // If the flag has the earlyret_processing bit set, it means that this code ++ // is called *during* earlyret handling - we don't want to reenter. ++ lw(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset())); ++ move(tmp, JvmtiThreadState::earlyret_pending); ++ bne(tmp, AT, L); ++ delayed()->nop(); ++ ++ // Call Interpreter::remove_activation_early_entry() to get the address of the ++ // same-named entrypoint in the generated interpreter code. ++ ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ lw(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset())); ++ move(A0, AT); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0); ++ jr(V0); ++ delayed()->nop(); ++ bind(L); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, ++ int bcp_offset) { ++ assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); ++ lbu(AT, BCP, bcp_offset); ++ lbu(reg, BCP, bcp_offset + 1); ++ ins(reg, AT, 8, 8); ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ if (index_size == sizeof(u2)) { ++ get_2_byte_integer_at_bcp(index, AT, bcp_offset); ++ } else if (index_size == sizeof(u4)) { ++ assert(EnableInvokeDynamic, "giant index used only for JSR 292"); ++ get_4_byte_integer_at_bcp(index, AT, bcp_offset); ++ // Check if the secondary index definition is still ~x, otherwise ++ // we have to change the following assembler code to calculate the ++ // plain index. ++ assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); ++ nor(index, index, R0); ++ sll(index, index, 0); ++ } else if (index_size == sizeof(u1)) { ++ lbu(index, BCP, bcp_offset); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, ++ Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert_different_registers(cache, index); ++ get_cache_index_at_bcp(index, bcp_offset, index_size); ++ ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line"); ++ shl(index, 2); ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, ++ Register index, ++ Register bytecode, ++ int byte_no, ++ int bcp_offset, ++ size_t index_size) { ++ get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); ++ // We use a 32-bit load here since the layout of 64-bit words on ++ // little-endian machines allow us that. ++ dsll(AT, index, Address::times_ptr); ++ daddu(AT, cache, AT); ++ lw(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); ++ if(os::is_MP()) { ++ sync(); // load acquire ++ } ++ ++ const int shift_count = (1 + byte_no) * BitsPerByte; ++ assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) || ++ (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift), ++ "correct shift count"); ++ dsrl(bytecode, bytecode, shift_count); ++ assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask"); ++ move(AT, ConstantPoolCacheEntry::bytecode_1_mask); ++ andr(bytecode, bytecode, AT); ++} ++ ++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, ++ Register tmp, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ assert(cache != tmp, "must use different register"); ++ get_cache_index_at_bcp(tmp, bcp_offset, index_size); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ // convert from field index to ConstantPoolCacheEntry index ++ // and from word offset to byte offset ++ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); ++ shl(tmp, 2 + LogBytesPerWord); ++ ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize); ++ // skip past the header ++ daddiu(cache, cache, in_bytes(ConstantPoolCache::base_offset())); ++ daddu(cache, cache, tmp); ++} ++ ++void InterpreterMacroAssembler::get_method_counters(Register method, ++ Register mcs, Label& skip) { ++ Label has_counters; ++ ld(mcs, method, in_bytes(Method::method_counters_offset())); ++ bne(mcs, R0, has_counters); ++ delayed()->nop(); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::build_method_counters), method); ++ ld(mcs, method, in_bytes(Method::method_counters_offset())); ++ beq(mcs, R0, skip); // No MethodCounters allocated, OutOfMemory ++ delayed()->nop(); ++ bind(has_counters); ++} ++ ++// Load object from cpool->resolved_references(index) ++void InterpreterMacroAssembler::load_resolved_reference_at_index( ++ Register result, Register index) { ++ assert_different_registers(result, index); ++ // convert from field index to resolved_references() index and from ++ // word index to byte offset. Since this is a java object, it can be compressed ++ Register tmp = index; // reuse ++ shl(tmp, LogBytesPerHeapOop); ++ ++ get_constant_pool(result); ++ // load pointer for resolved_references[] objArray ++ ld(result, result, ConstantPool::resolved_references_offset_in_bytes()); ++ // JNIHandles::resolve(obj); ++ ld(result, result, 0); //? is needed? ++ // Add in the index ++ daddu(result, result, tmp); ++ load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); ++} ++ ++// Resets LVP to locals. Register sub_klass cannot be any of the above. ++void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) { ++ assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" ); ++ assert( Rsub_klass != T1, "T1 holds 2ndary super array length" ); ++ assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" ); ++ // Profile the not-null value's klass. ++ // Here T9 and T1 are used as temporary registers. ++ profile_typecheck(T9, Rsub_klass, T1); // blows T9, reloads T1 ++ ++ // Do the check. ++ check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1 ++ ++ // Profile the failure of the check. ++ profile_typecheck_failed(T9); // blows T9 ++} ++ ++ ++ ++// Java Expression Stack ++ ++void InterpreterMacroAssembler::pop_ptr(Register r) { ++ ld(r, SP, 0); ++ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_i(Register r) { ++ lw(r, SP, 0); ++ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_l(Register r) { ++ ld(r, SP, 0); ++ daddiu(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_f(FloatRegister r) { ++ lwc1(r, SP, 0); ++ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_d(FloatRegister r) { ++ ldc1(r, SP, 0); ++ daddiu(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_ptr(Register r) { ++ daddiu(SP, SP, - Interpreter::stackElementSize); ++ sd(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_i(Register r) { ++ // For compatibility reason, don't change to sw. ++ daddiu(SP, SP, - Interpreter::stackElementSize); ++ sd(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_l(Register r) { ++ daddiu(SP, SP, -2 * Interpreter::stackElementSize); ++ sd(r, SP, 0); ++ sd(R0, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_f(FloatRegister r) { ++ daddiu(SP, SP, - Interpreter::stackElementSize); ++ swc1(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_d(FloatRegister r) { ++ daddiu(SP, SP, -2 * Interpreter::stackElementSize); ++ sdc1(r, SP, 0); ++ sd(R0, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop(TosState state) { ++ switch (state) { ++ case atos: pop_ptr(); break; ++ case btos: ++ case ztos: ++ case ctos: ++ case stos: ++ case itos: pop_i(); break; ++ case ltos: pop_l(); break; ++ case ftos: pop_f(); break; ++ case dtos: pop_d(); break; ++ case vtos: /* nothing to do */ break; ++ default: ShouldNotReachHere(); ++ } ++ verify_oop(FSR, state); ++} ++ ++//FSR=V0,SSR=V1 ++void InterpreterMacroAssembler::push(TosState state) { ++ verify_oop(FSR, state); ++ switch (state) { ++ case atos: push_ptr(); break; ++ case btos: ++ case ztos: ++ case ctos: ++ case stos: ++ case itos: push_i(); break; ++ case ltos: push_l(); break; ++ case ftos: push_f(); break; ++ case dtos: push_d(); break; ++ case vtos: /* nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++ ++ ++void InterpreterMacroAssembler::load_ptr(int n, Register val) { ++ ld(val, SP, Interpreter::expr_offset_in_bytes(n)); ++} ++ ++void InterpreterMacroAssembler::store_ptr(int n, Register val) { ++ sd(val, SP, Interpreter::expr_offset_in_bytes(n)); ++} ++ ++// Jump to from_interpreted entry of a call unless single stepping is possible ++// in this thread in which case we must call the i2i entry ++void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) { ++ // record last_sp ++ move(Rsender, SP); ++ sd(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++#ifndef OPT_THREAD ++ get_thread(temp); ++#else ++ move(temp, TREG); ++#endif ++ // interp_only is an int, on little endian it is sufficient to test the byte only ++ // Is a cmpl faster? ++ lw(AT, temp, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(AT, R0, run_compiled_code); ++ delayed()->nop(); ++ ld(AT, method, in_bytes(Method::interpreter_entry_offset())); ++ jr(AT); ++ delayed()->nop(); ++ bind(run_compiled_code); ++ } ++ ++ ld(AT, method, in_bytes(Method::from_interpreted_offset())); ++ jr(AT); ++ delayed()->nop(); ++} ++ ++ ++// The following two routines provide a hook so that an implementation ++// can schedule the dispatch in two parts. mips64 does not do this. ++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { ++ // Nothing mips64 specific to be done here ++} ++ ++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { ++ dispatch_next(state, step); ++} ++ ++// assume the next bytecode in T8. ++void InterpreterMacroAssembler::dispatch_base(TosState state, ++ address* table, ++ bool verifyoop) { ++ if (VerifyActivationFrameSize) { ++ Label L; ++ ++ dsubu(T2, FP, SP); ++ int min_frame_size = (frame::link_offset - ++ frame::interpreter_frame_initial_sp_offset) * wordSize; ++ daddiu(T2, T2,- min_frame_size); ++ bgez(T2, L); ++ delayed()->nop(); ++ stop("broken stack frame"); ++ bind(L); ++ } ++ // FIXME: I do not know which register should pass to verify_oop ++ if (verifyoop) verify_oop(FSR, state); ++ dsll(T2, Rnext, LogBytesPerWord); ++ ++ if((long)table >= (long)Interpreter::dispatch_table(btos) && ++ (long)table <= (long)Interpreter::dispatch_table(vtos) ++ ) { ++ int table_size = (long)Interpreter::dispatch_table(itos) - (long)Interpreter::dispatch_table(stos); ++ int table_offset = ((int)state - (int)itos) * table_size; ++ ++ // GP points to the starting address of Interpreter::dispatch_table(itos). ++ // See StubGenerator::generate_call_stub(address& return_address) for the initialization of GP. ++ if(table_offset != 0) { ++ daddiu(T3, GP, table_offset); ++ if (UseLEXT1) { ++ gsldx(T3, T2, T3, 0); ++ } else { ++ daddu(T3, T2, T3); ++ ld(T3, T3, 0); ++ } ++ } else { ++ if (UseLEXT1) { ++ gsldx(T3, T2, GP, 0); ++ } else { ++ daddu(T3, T2, GP); ++ ld(T3, T3, 0); ++ } ++ } ++ } else { ++ li(T3, (long)table); ++ if (UseLEXT1) { ++ gsldx(T3, T2, T3, 0); ++ } else { ++ daddu(T3, T2, T3); ++ ld(T3, T3, 0); ++ } ++ } ++ jr(T3); ++ delayed()->nop(); ++} ++ ++void InterpreterMacroAssembler::dispatch_only(TosState state) { ++ dispatch_base(state, Interpreter::dispatch_table(state)); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { ++ dispatch_base(state, Interpreter::normal_table(state)); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { ++ dispatch_base(state, Interpreter::normal_table(state), false); ++} ++ ++ ++void InterpreterMacroAssembler::dispatch_next(TosState state, int step) { ++ // load next bytecode (load before advancing r13 to prevent AGI) ++ lbu(Rnext, BCP, step); ++ increment(BCP, step); ++ dispatch_base(state, Interpreter::dispatch_table(state)); ++} ++ ++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { ++ // load current bytecode ++ lbu(Rnext, BCP, 0); ++ dispatch_base(state, table); ++} ++ ++// remove activation ++// ++// Unlock the receiver if this is a synchronized method. ++// Unlock any Java monitors from syncronized blocks. ++// Remove the activation from the stack. ++// ++// If there are locked Java monitors ++// If throw_monitor_exception ++// throws IllegalMonitorStateException ++// Else if install_monitor_exception ++// installs IllegalMonitorStateException ++// Else ++// no error processing ++// used registers : T1, T2, T3, T8 ++// T1 : thread, method access flags ++// T2 : monitor entry pointer ++// T3 : method, monitor top ++// T8 : unlock flag ++void InterpreterMacroAssembler::remove_activation( ++ TosState state, ++ Register ret_addr, ++ bool throw_monitor_exception, ++ bool install_monitor_exception, ++ bool notify_jvmdi) { ++ // Note: Registers V0, V1 and F0, F1 may be in use for the result ++ // check if synchronized method ++ Label unlocked, unlock, no_unlock; ++ ++ // get the value of _do_not_unlock_if_synchronized into T8 ++#ifndef OPT_THREAD ++ Register thread = T1; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ lb(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ // reset the flag ++ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ // get method access flags ++ ld(T3, FP, frame::interpreter_frame_method_offset * wordSize); ++ lw(T1, T3, in_bytes(Method::access_flags_offset())); ++ andi(T1, T1, JVM_ACC_SYNCHRONIZED); ++ beq(T1, R0, unlocked); ++ delayed()->nop(); ++ ++ // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set. ++ bne(T8, R0, no_unlock); ++ delayed()->nop(); ++ // unlock monitor ++ push(state); // save result ++ ++ // BasicObjectLock will be first in list, since this is a ++ // synchronized method. However, need to check that the object has ++ // not been unlocked by an explicit monitorexit bytecode. ++ daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize ++ - (int)sizeof(BasicObjectLock)); ++ // address of first monitor ++ ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ bne(T1, R0, unlock); ++ delayed()->nop(); ++ pop(state); ++ if (throw_monitor_exception) { ++ // Entry already unlocked, need to throw exception ++ // I think mips do not need empty_FPU_stack ++ // remove possible return value from FPU-stack, otherwise stack could overflow ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Monitor already unlocked during a stack unroll. If requested, ++ // install an illegal_monitor_state_exception. Continue with ++ // stack unrolling. ++ if (install_monitor_exception) { ++ // remove possible return value from FPU-stack, ++ // otherwise stack could overflow ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ ++ } ++ ++ b(unlocked); ++ delayed()->nop(); ++ } ++ ++ bind(unlock); ++ unlock_object(c_rarg0); ++ pop(state); ++ ++ // Check that for block-structured locking (i.e., that all locked ++ // objects has been unlocked) ++ bind(unlocked); ++ ++ // V0, V1: Might contain return value ++ ++ // Check that all monitors are unlocked ++ { ++ Label loop, exception, entry, restart; ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ const Address monitor_block_top(FP, ++ frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ ++ bind(restart); ++ // points to current entry, starting with top-most entry ++ ld(c_rarg0, monitor_block_top); ++ // points to word before bottom of monitor block ++ daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ b(entry); ++ delayed()->nop(); ++ ++ // Entry already locked, need to throw exception ++ bind(exception); ++ ++ if (throw_monitor_exception) { ++ // Throw exception ++ // remove possible return value from FPU-stack, ++ // otherwise stack could overflow ++ empty_FPU_stack(); ++ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Stack unrolling. Unlock object and install illegal_monitor_exception ++ // Unlock does not block, so don't have to worry about the frame ++ // We don't have to preserve c_rarg0, since we are going to ++ // throw an exception ++ ++ push(state); ++ unlock_object(c_rarg0); ++ pop(state); ++ ++ if (install_monitor_exception) { ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ } ++ ++ b(restart); ++ delayed()->nop(); ++ } ++ ++ bind(loop); ++ ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ bne(T1, R0, exception);// check if current entry is used ++ delayed()->nop(); ++ ++ daddiu(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry ++ bind(entry); ++ bne(c_rarg0, T3, loop); // check if bottom reached ++ delayed()->nop(); // if not at bottom then check this entry ++ } ++ ++ bind(no_unlock); ++ ++ // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame) ++ if (notify_jvmdi) { ++ notify_method_exit(state, NotifyJVMTI); // preserve TOSCA ++ } else { ++ notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA ++ } ++ ++ // remove activation ++ ld(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ ld(ret_addr, FP, frame::interpreter_frame_return_addr_offset * wordSize); ++ ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); ++} ++ ++#endif // C_INTERP ++ ++// Lock object ++// ++// Args: ++// c_rarg1: BasicObjectLock to be used for locking ++// ++// Kills: ++// c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs) ++// rscratch1, rscratch2 (scratch regs) ++void InterpreterMacroAssembler::lock_object(Register lock_reg) { ++ assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); ++ ++ if (UseHeavyMonitors) { ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), ++ lock_reg); ++ } else { ++ Label done; ++ ++ const Register swap_reg = T2; // Must use T2 for cmpxchg instruction ++ const Register obj_reg = T1; // Will contain the oop ++ ++ const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); ++ const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); ++ const int mark_offset = lock_offset + ++ BasicLock::displaced_header_offset_in_bytes(); ++ ++ Label slow_case; ++ ++ // Load object pointer into obj_reg %T1 ++ ld(obj_reg, lock_reg, obj_offset); ++ ++ if (UseBiasedLocking) { ++ // Note: we use noreg for the temporary register since it's hard ++ // to come up with a free register on all incoming code paths ++ biased_locking_enter(lock_reg, obj_reg, swap_reg, noreg, false, done, &slow_case); ++ } ++ ++ ++ // Load (object->mark() | 1) into swap_reg %T2 ++ ld(AT, obj_reg, 0); ++ ori(swap_reg, AT, 1); ++ ++ ++ // Save (object->mark() | 1) into BasicLock's displaced header ++ sd(swap_reg, lock_reg, mark_offset); ++ ++ assert(lock_offset == 0, "displached header must be first word in BasicObjectLock"); ++ //if (os::is_MP()) { ++ // lock(); ++ //} ++ cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ beq(AT, R0, L); ++ delayed()->nop(); ++ push(T0); ++ push(T1); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1); ++ pop(T1); ++ pop(T0); ++ bind(L); ++ } ++ ++ bne(AT, R0, done); ++ delayed()->nop(); ++ ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) SP <= mark < SP + os::pagesize() ++ // ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in swap_reg %T2 as the result of cmpxchg ++ ++ dsubu(swap_reg, swap_reg, SP); ++ move(AT, 3 - os::vm_page_size()); ++ andr(swap_reg, swap_reg, AT); ++ // Save the test result, for recursive case, the result is zero ++ sd(swap_reg, lock_reg, mark_offset); ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(swap_reg, R0, L); ++ delayed()->nop(); ++ push(T0); ++ push(T1); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1); ++ pop(T1); ++ pop(T0); ++ bind(L); ++ } ++ ++ beq(swap_reg, R0, done); ++ delayed()->nop(); ++ bind(slow_case); ++ // Call the runtime routine for slow case ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); ++ ++ bind(done); ++ } ++} ++ ++ ++// Unlocks an object. Used in monitorexit bytecode and ++// remove_activation. Throws an IllegalMonitorException if object is ++// not locked by current thread. ++// ++// Args: ++// c_rarg1: BasicObjectLock for lock ++// ++// Kills: ++// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs) ++// rscratch1, rscratch2 (scratch regs) ++// Argument: T6 : Points to BasicObjectLock structure for lock ++// Argument: c_rarg0 : Points to BasicObjectLock structure for lock ++// Throw an IllegalMonitorException if object is not locked by current thread ++void InterpreterMacroAssembler::unlock_object(Register lock_reg) { ++ assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); ++ ++ if (UseHeavyMonitors) { ++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); ++ } else { ++ Label done; ++ ++ const Register swap_reg = T2; // Must use T2 for cmpxchg instruction ++ const Register header_reg = T3; // Will contain the old oopMark ++ const Register obj_reg = T1; // Will contain the oop ++ ++ save_bcp(); // Save in case of exception ++ ++ // Convert from BasicObjectLock structure to object and BasicLock structure ++ // Store the BasicLock address into %T2 ++ daddiu(swap_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes()); ++ ++ // Load oop into obj_reg(%T1) ++ ld(obj_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes ()); ++ //free entry ++ sd(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes()); ++ if (UseBiasedLocking) { ++ biased_locking_exit(obj_reg, header_reg, done); ++ } ++ ++ // Load the old header from BasicLock structure ++ ld(header_reg, swap_reg, BasicLock::displaced_header_offset_in_bytes()); ++ // zero for recursive case ++ beq(header_reg, R0, done); ++ delayed()->nop(); ++ ++ // Atomic swap back the old header ++ if (os::is_MP()); //lock(); ++ cmpxchg(header_reg, Address(obj_reg, 0), swap_reg); ++ ++ // zero for recursive case ++ bne(AT, R0, done); ++ delayed()->nop(); ++ ++ // Call the runtime routine for slow case. ++ sd(obj_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj ++ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), ++ lock_reg); ++ ++ bind(done); ++ ++ restore_bcp(); ++ } ++} ++ ++#ifndef CC_INTERP ++ ++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, ++ Label& zero_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ ld(mdp, Address(FP, frame::interpreter_frame_mdx_offset * wordSize)); ++ beq(mdp, R0, zero_continue); ++ delayed()->nop(); ++} ++ ++ ++// Set the method data pointer for the current bcp. ++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Label set_mdp; ++ ++ // V0 and T0 will be used as two temporary registers. ++ push2(V0, T0); ++ ++ get_method(T0); ++ // Test MDO to avoid the call if it is NULL. ++ ld(V0, T0, in_bytes(Method::method_data_offset())); ++ beq(V0, R0, set_mdp); ++ delayed()->nop(); ++ ++ // method: T0 ++ // bcp: BCP --> S0 ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP); ++ // mdi: V0 ++ // mdo is guaranteed to be non-zero here, we checked for it before the call. ++ get_method(T0); ++ ld(T0, T0, in_bytes(Method::method_data_offset())); ++ daddiu(T0, T0, in_bytes(MethodData::data_offset())); ++ daddu(V0, T0, V0); ++ bind(set_mdp); ++ sd(V0, FP, frame::interpreter_frame_mdx_offset * wordSize); ++ pop2(V0, T0); ++} ++ ++void InterpreterMacroAssembler::verify_method_data_pointer() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++#ifdef ASSERT ++ Label verify_continue; ++ Register method = V0; ++ Register mdp = V1; ++ Register tmp = A0; ++ push(method); ++ push(mdp); ++ push(tmp); ++ test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue ++ get_method(method); ++ ++ // If the mdp is valid, it will point to a DataLayout header which is ++ // consistent with the bcp. The converse is highly probable also. ++ lhu(tmp, mdp, in_bytes(DataLayout::bci_offset())); ++ ld(AT, method, in_bytes(Method::const_offset())); ++ daddu(tmp, tmp, AT); ++ daddiu(tmp, tmp, in_bytes(ConstMethod::codes_offset())); ++ beq(tmp, BCP, verify_continue); ++ delayed()->nop(); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp); ++ bind(verify_continue); ++ pop(tmp); ++ pop(mdp); ++ pop(method); ++#endif // ASSERT ++} ++ ++ ++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, ++ int constant, ++ Register value) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Address data(mdp_in, constant); ++ sd(value, data); ++} ++ ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ int constant, ++ bool decrement) { ++ // Counter address ++ Address data(mdp_in, constant); ++ ++ increment_mdp_data_at(data, decrement); ++} ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Address data, ++ bool decrement) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ // %%% this does 64bit counters at best it is wasting space ++ // at worst it is a rare bug when counters overflow ++ Register tmp = S0; ++ push(tmp); ++ if (decrement) { ++ // Decrement the register. ++ ld(AT, data); ++ daddiu(tmp, AT, (int32_t) -DataLayout::counter_increment); ++ // If the decrement causes the counter to overflow, stay negative ++ Label L; ++ slt(AT, tmp, R0); ++ bne(AT, R0, L); ++ delayed()->nop(); ++ daddiu(tmp, tmp, (int32_t) DataLayout::counter_increment); ++ bind(L); ++ sd(tmp, data); ++ } else { ++ assert(DataLayout::counter_increment == 1, ++ "flow-free idiom only works with 1"); ++ ld(AT, data); ++ // Increment the register. ++ daddiu(tmp, AT, DataLayout::counter_increment); ++ // If the increment causes the counter to overflow, pull back by 1. ++ slt(AT, tmp, R0); ++ dsubu(tmp, tmp, AT); ++ sd(tmp, data); ++ } ++ pop(tmp); ++} ++ ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ Register reg, ++ int constant, ++ bool decrement) { ++ Register tmp = S0; ++ push(S0); ++ if (decrement) { ++ // Decrement the register. ++ daddu(AT, mdp_in, reg); ++ assert(Assembler::is_simm16(constant), "constant is not a simm16 !"); ++ ld(AT, AT, constant); ++ ++ daddiu(tmp, AT, (int32_t) -DataLayout::counter_increment); ++ // If the decrement causes the counter to overflow, stay negative ++ Label L; ++ slt(AT, tmp, R0); ++ bne(AT, R0, L); ++ delayed()->nop(); ++ daddiu(tmp, tmp, (int32_t) DataLayout::counter_increment); ++ bind(L); ++ ++ daddu(AT, mdp_in, reg); ++ sd(tmp, AT, constant); ++ } else { ++ daddu(AT, mdp_in, reg); ++ assert(Assembler::is_simm16(constant), "constant is not a simm16 !"); ++ ld(AT, AT, constant); ++ ++ // Increment the register. ++ daddiu(tmp, AT, DataLayout::counter_increment); ++ // If the increment causes the counter to overflow, pull back by 1. ++ slt(AT, tmp, R0); ++ dsubu(tmp, tmp, AT); ++ ++ daddu(AT, mdp_in, reg); ++ sd(tmp, AT, constant); ++ } ++ pop(S0); ++} ++ ++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, ++ int flag_byte_constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ int header_offset = in_bytes(DataLayout::header_offset()); ++ int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant); ++ // Set the flag ++ lw(AT, Address(mdp_in, header_offset)); ++ if(Assembler::is_simm16(header_bits)) { ++ ori(AT, AT, header_bits); ++ } else { ++ push(T8); ++ // T8 is used as a temporary register. ++ move(T8, header_bits); ++ orr(AT, AT, T8); ++ pop(T8); ++ } ++ sw(AT, Address(mdp_in, header_offset)); ++} ++ ++ ++ ++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, ++ int offset, ++ Register value, ++ Register test_value_out, ++ Label& not_equal_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if (test_value_out == noreg) { ++ ld(AT, Address(mdp_in, offset)); ++ bne(AT, value, not_equal_continue); ++ delayed()->nop(); ++ } else { ++ // Put the test value into a register, so caller can use it: ++ ld(test_value_out, Address(mdp_in, offset)); ++ bne(value, test_value_out, not_equal_continue); ++ delayed()->nop(); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16"); ++ ld(AT, mdp_in, offset_of_disp); ++ daddu(mdp_in, mdp_in, AT); ++ sd(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ Register reg, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ daddu(AT, reg, mdp_in); ++ assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16"); ++ ld(AT, AT, offset_of_disp); ++ daddu(mdp_in, mdp_in, AT); ++ sd(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, ++ int constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if(Assembler::is_simm16(constant)) { ++ daddiu(mdp_in, mdp_in, constant); ++ } else { ++ move(AT, constant); ++ daddu(mdp_in, mdp_in, AT); ++ } ++ sd(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ push(return_bci); // save/restore across call_VM ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), ++ return_bci); ++ pop(return_bci); ++} ++ ++ ++void InterpreterMacroAssembler::profile_taken_branch(Register mdp, ++ Register bumped_count) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ // Otherwise, assign to mdp ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the taken count. ++ // We inline increment_mdp_data_at to return bumped_count in a register ++ //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset())); ++ ld(bumped_count, mdp, in_bytes(JumpData::taken_offset())); ++ assert(DataLayout::counter_increment == 1, ++ "flow-free idiom only works with 1"); ++ push(T8); ++ // T8 is used as a temporary register. ++ daddiu(T8, bumped_count, DataLayout::counter_increment); ++ slt(AT, T8, R0); ++ dsubu(bumped_count, T8, AT); ++ pop(T8); ++ sd(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the not taken count. ++ increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); ++ ++ // The method data pointer needs to be updated to correspond to ++ // the next bytecode ++ update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_final_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_virtual_call(Register receiver, ++ Register mdp, ++ Register reg2, ++ bool receiver_can_be_null) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ Label skip_receiver_profile; ++ if (receiver_can_be_null) { ++ Label not_null; ++ bne(receiver, R0, not_null); ++ delayed()->nop(); ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ beq(R0, R0, skip_receiver_profile); ++ delayed()->nop(); ++ bind(not_null); ++ } ++ ++ // Record the receiver type. ++ record_klass_in_profile(receiver, mdp, reg2, true); ++ bind(skip_receiver_profile); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++// This routine creates a state machine for updating the multi-row ++// type profile at a virtual call site (or other type-sensitive bytecode). ++// The machine visits each row (of receiver/count) until the receiver type ++// is found, or until it runs out of rows. At the same time, it remembers ++// the location of the first empty row. (An empty row records null for its ++// receiver, and can be allocated for a newly-observed receiver type.) ++// Because there are two degrees of freedom in the state, a simple linear ++// search will not work; it must be a decision tree. Hence this helper ++// function is recursive, to generate the required tree structured code. ++// It's the interpreter, so we are trading off code space for speed. ++// See below for example code. ++void InterpreterMacroAssembler::record_klass_in_profile_helper( ++ Register receiver, Register mdp, ++ Register reg2, int start_row, ++ Label& done, bool is_virtual_call) { ++ if (TypeProfileWidth == 0) { ++ if (is_virtual_call) { ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ } ++ return; ++ } ++ ++ int last_row = VirtualCallData::row_limit() - 1; ++ assert(start_row <= last_row, "must be work left to do"); ++ // Test this row for both the receiver and for null. ++ // Take any of three different outcomes: ++ // 1. found receiver => increment count and goto done ++ // 2. found null => keep looking for case 1, maybe allocate this cell ++ // 3. found something else => keep looking for cases 1 and 2 ++ // Case 3 is handled by a recursive call. ++ for (int row = start_row; row <= last_row; row++) { ++ Label next_test; ++ bool test_for_null_also = (row == start_row); ++ ++ // See if the receiver is receiver[n]. ++ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); ++ test_mdp_data_at(mdp, recvr_offset, receiver, ++ (test_for_null_also ? reg2 : noreg), ++ next_test); ++ // (Reg2 now contains the receiver from the CallData.) ++ ++ // The receiver is receiver[n]. Increment count[n]. ++ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); ++ increment_mdp_data_at(mdp, count_offset); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ bind(next_test); ++ ++ if (test_for_null_also) { ++ Label found_null; ++ // Failed the equality check on receiver[n]... Test for null. ++ if (start_row == last_row) { ++ // The only thing left to do is handle the null case. ++ if (is_virtual_call) { ++ beq(reg2, R0, found_null); ++ delayed()->nop(); ++ // Receiver did not match any saved receiver and there is no empty row for it. ++ // Increment total counter to indicate polymorphic case. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ bind(found_null); ++ } else { ++ bne(reg2, R0, done); ++ delayed()->nop(); ++ } ++ break; ++ } ++ // Since null is rare, make it be the branch-taken case. ++ beq(reg2, R0, found_null); ++ delayed()->nop(); ++ ++ // Put all the "Case 3" tests here. ++ record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call); ++ ++ // Found a null. Keep searching for a matching receiver, ++ // but remember that this is an empty (unused) slot. ++ bind(found_null); ++ } ++ } ++ ++ // In the fall-through case, we found no matching receiver, but we ++ // observed the receiver[start_row] is NULL. ++ ++ // Fill in the receiver field and increment the count. ++ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); ++ set_mdp_data_at(mdp, recvr_offset, receiver); ++ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); ++ move(reg2, DataLayout::counter_increment); ++ set_mdp_data_at(mdp, count_offset, reg2); ++ if (start_row > 0) { ++ beq(R0, R0, done); ++ delayed()->nop(); ++ } ++} ++ ++// Example state machine code for three profile rows: ++// // main copy of decision tree, rooted at row[1] ++// if (row[0].rec == rec) { row[0].incr(); goto done; } ++// if (row[0].rec != NULL) { ++// // inner copy of decision tree, rooted at row[1] ++// if (row[1].rec == rec) { row[1].incr(); goto done; } ++// if (row[1].rec != NULL) { ++// // degenerate decision tree, rooted at row[2] ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// if (row[2].rec != NULL) { goto done; } // overflow ++// row[2].init(rec); goto done; ++// } else { ++// // remember row[1] is empty ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// row[1].init(rec); goto done; ++// } ++// } else { ++// // remember row[0] is empty ++// if (row[1].rec == rec) { row[1].incr(); goto done; } ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// row[0].init(rec); goto done; ++// } ++// done: ++ ++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, ++ Register mdp, Register reg2, ++ bool is_virtual_call) { ++ assert(ProfileInterpreter, "must be profiling"); ++ Label done; ++ ++ record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call); ++ ++ bind (done); ++} ++ ++void InterpreterMacroAssembler::profile_ret(Register return_bci, ++ Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ uint row; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the total ret count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ for (row = 0; row < RetData::row_limit(); row++) { ++ Label next_test; ++ ++ // See if return_bci is equal to bci[n]: ++ test_mdp_data_at(mdp, ++ in_bytes(RetData::bci_offset(row)), ++ return_bci, noreg, ++ next_test); ++ ++ // return_bci is equal to bci[n]. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, ++ in_bytes(RetData::bci_displacement_offset(row))); ++ beq(R0, R0, profile_continue); ++ delayed()->nop(); ++ bind(next_test); ++ } ++ ++ update_mdp_for_ret(return_bci); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_null_seen(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { ++ if (ProfileInterpreter && TypeProfileCasts) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int count_offset = in_bytes(CounterData::count_offset()); ++ // Back up the address, since we have already bumped the mdp. ++ count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // *Decrement* the counter. We expect to see zero or small negatives. ++ increment_mdp_data_at(mdp, count_offset, true); ++ ++ bind (profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // Record the object type. ++ record_klass_in_profile(klass, mdp, reg2, false); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_switch_default(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the default case count ++ increment_mdp_data_at(mdp, ++ in_bytes(MultiBranchData::default_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ in_bytes(MultiBranchData:: ++ default_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_switch_case(Register index, ++ Register mdp, ++ Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Build the base (index * per_case_size_in_bytes()) + ++ // case_array_offset_in_bytes() ++ move(reg2, in_bytes(MultiBranchData::per_case_size())); ++ if (UseLEXT1) { ++ gsdmult(index, index, reg2); ++ } else { ++ dmult(index, reg2); ++ mflo(index); ++ } ++ daddiu(index, index, in_bytes(MultiBranchData::case_array_offset())); ++ ++ // Update the case count ++ increment_mdp_data_at(mdp, ++ index, ++ in_bytes(MultiBranchData::relative_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ index, ++ in_bytes(MultiBranchData:: ++ relative_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::narrow(Register result) { ++ ++ // Get method->_constMethod->_result_type ++ ld(T9, FP, frame::interpreter_frame_method_offset * wordSize); ++ ld(T9, T9, in_bytes(Method::const_offset())); ++ lbu(T9, T9, in_bytes(ConstMethod::result_type_offset())); ++ ++ Label done, notBool, notByte, notChar; ++ ++ // common case first ++ addiu(AT, T9, -T_INT); ++ beq(AT, R0, done); ++ delayed()->nop(); ++ ++ // mask integer result to narrower return type. ++ addiu(AT, T9, -T_BOOLEAN); ++ bne(AT, R0, notBool); ++ delayed()->nop(); ++ andi(result, result, 0x1); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ ++ bind(notBool); ++ addiu(AT, T9, -T_BYTE); ++ bne(AT, R0, notByte); ++ delayed()->nop(); ++ seb(result, result); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ ++ bind(notByte); ++ addiu(AT, T9, -T_CHAR); ++ bne(AT, R0, notChar); ++ delayed()->nop(); ++ andi(result, result, 0xFFFF); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ ++ bind(notChar); ++ seh(result, result); ++ ++ // Nothing to do for T_INT ++ bind(done); ++} ++ ++ ++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { ++ Label update, next, none; ++ ++ verify_oop(obj); ++ ++ if (mdo_addr.index() != noreg) { ++ guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !"); ++ guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !"); ++ push(T0); ++ dsll(T0, mdo_addr.index(), mdo_addr.scale()); ++ daddu(T0, T0, mdo_addr.base()); ++ } ++ ++ bne(obj, R0, update); ++ delayed()->nop(); ++ ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ ori(AT, AT, TypeEntries::null_seen); ++ if (mdo_addr.index() == noreg) { ++ sd(AT, mdo_addr); ++ } else { ++ sd(AT, T0, mdo_addr.disp()); ++ } ++ ++ beq(R0, R0, next); ++ delayed()->nop(); ++ ++ bind(update); ++ load_klass(obj, obj); ++ ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ xorr(obj, obj, AT); ++ ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ dextm(AT, obj, 2, 62); ++ beq(AT, R0, next); ++ delayed()->nop(); ++ ++ andi(AT, obj, TypeEntries::type_unknown); ++ bne(AT, R0, next); ++ delayed()->nop(); ++ ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ beq(AT, R0, none); ++ delayed()->nop(); ++ ++ daddiu(AT, AT, -(TypeEntries::null_seen)); ++ beq(AT, R0, none); ++ delayed()->nop(); ++ ++ // There is a chance that the checks above (re-reading profiling ++ // data from memory) fail if another thread has just set the ++ // profiling to this obj's klass ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ xorr(obj, obj, AT); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ dextm(AT, obj, 2, 62); ++ beq(AT, R0, next); ++ delayed()->nop(); ++ ++ // different than before. Cannot keep accurate profile. ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ ori(AT, AT, TypeEntries::type_unknown); ++ if (mdo_addr.index() == noreg) { ++ sd(AT, mdo_addr); ++ } else { ++ sd(AT, T0, mdo_addr.disp()); ++ } ++ beq(R0, R0, next); ++ delayed()->nop(); ++ ++ bind(none); ++ // first time here. Set profile type. ++ if (mdo_addr.index() == noreg) { ++ sd(obj, mdo_addr); ++ } else { ++ sd(obj, T0, mdo_addr.disp()); ++ } ++ ++ bind(next); ++ if (mdo_addr.index() != noreg) { ++ pop(T0); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { ++ if (!ProfileInterpreter) { ++ return; ++ } ++ ++ if (MethodData::profile_arguments() || MethodData::profile_return()) { ++ Label profile_continue; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); ++ ++ lb(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start); ++ li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag); ++ bne(tmp, AT, profile_continue); ++ delayed()->nop(); ++ ++ ++ if (MethodData::profile_arguments()) { ++ Label done; ++ int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); ++ if (Assembler::is_simm16(off_to_args)) { ++ daddiu(mdp, mdp, off_to_args); ++ } else { ++ move(AT, off_to_args); ++ daddu(mdp, mdp, AT); ++ } ++ ++ ++ for (int i = 0; i < TypeProfileArgsLimit; i++) { ++ if (i > 0 || MethodData::profile_return()) { ++ // If return value type is profiled we may have no argument to profile ++ ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); ++ ++ if (Assembler::is_simm16(-1 * i * TypeStackSlotEntries::per_arg_count())) { ++ addiu32(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count()); ++ } else { ++ li(AT, i*TypeStackSlotEntries::per_arg_count()); ++ subu32(tmp, tmp, AT); ++ } ++ ++ li(AT, TypeStackSlotEntries::per_arg_count()); ++ slt(AT, tmp, AT); ++ bne(AT, R0, done); ++ delayed()->nop(); ++ } ++ ld(tmp, callee, in_bytes(Method::const_offset())); ++ ++ lhu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // stack offset o (zero based) from the start of the argument ++ // list, for n arguments translates into offset n - o - 1 from ++ // the end of the argument list ++ ld(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args); ++ subu(tmp, tmp, AT); ++ ++ addiu32(tmp, tmp, -1); ++ ++ Address arg_addr = argument_address(tmp); ++ ld(tmp, arg_addr); ++ ++ Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); ++ profile_obj_type(tmp, mdo_arg_addr); ++ ++ int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); ++ if (Assembler::is_simm16(to_add)) { ++ daddiu(mdp, mdp, to_add); ++ } else { ++ move(AT, to_add); ++ daddu(mdp, mdp, AT); ++ } ++ ++ off_to_args += to_add; ++ } ++ ++ if (MethodData::profile_return()) { ++ ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); ++ ++ int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(); ++ if (Assembler::is_simm16(-1 * tmp_arg_counts)) { ++ addiu32(tmp, tmp, -1 * tmp_arg_counts); ++ } else { ++ move(AT, tmp_arg_counts); ++ subu32(mdp, mdp, AT); ++ } ++ } ++ ++ bind(done); ++ ++ if (MethodData::profile_return()) { ++ // We're right after the type profile for the last ++ // argument. tmp is the number of cells left in the ++ // CallTypeData/VirtualCallTypeData to reach its end. Non null ++ // if there's a return to profile. ++ assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); ++ sll(tmp, tmp, exact_log2(DataLayout::cell_size)); ++ daddu(mdp, mdp, tmp); ++ } ++ sd(mdp, FP, frame::interpreter_frame_mdx_offset * wordSize); ++ } else { ++ assert(MethodData::profile_return(), "either profile call args or call ret"); ++ update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); ++ } ++ ++ // mdp points right after the end of the ++ // CallTypeData/VirtualCallTypeData, right after the cells for the ++ // return value type if there's one ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { ++ assert_different_registers(mdp, ret, tmp, _bcp_register); ++ if (ProfileInterpreter && MethodData::profile_return()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ if (MethodData::profile_return_jsr292_only()) { ++ // If we don't profile all invoke bytecodes we must make sure ++ // it's a bytecode we indeed profile. We can't go back to the ++ // begining of the ProfileData we intend to update to check its ++ // type because we're right after it and we don't known its ++ // length ++ Label do_profile; ++ lb(tmp, _bcp_register, 0); ++ daddiu(AT, tmp, -1 * Bytecodes::_invokedynamic); ++ beq(AT, R0, do_profile); ++ delayed()->daddiu(AT, tmp, -1 * Bytecodes::_invokehandle); ++ beq(AT, R0, do_profile); ++ delayed()->nop(); ++ ++ get_method(tmp); ++ lb(tmp, tmp, Method::intrinsic_id_offset_in_bytes()); ++ li(AT, vmIntrinsics::_compiledLambdaForm); ++ bne(tmp, AT, profile_continue); ++ delayed()->nop(); ++ ++ bind(do_profile); ++ } ++ ++ Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); ++ daddu(tmp, ret, R0); ++ profile_obj_type(tmp, mdo_ret_addr); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) { ++ guarantee(T9 == tmp1, "You are reqired to use T9 as the index register for MIPS !"); ++ ++ if (ProfileInterpreter && MethodData::profile_parameters()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Load the offset of the area within the MDO used for ++ // parameters. If it's negative we're not profiling any parameters ++ lw(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())); ++ bltz(tmp1, profile_continue); ++ delayed()->nop(); ++ ++ // Compute a pointer to the area for parameters from the offset ++ // and move the pointer to the slot for the last ++ // parameters. Collect profiling from last parameter down. ++ // mdo start + parameters offset + array length - 1 ++ daddu(mdp, mdp, tmp1); ++ ld(tmp1, mdp, in_bytes(ArrayData::array_len_offset())); ++ decrement(tmp1, TypeStackSlotEntries::per_arg_count()); ++ ++ ++ Label loop; ++ bind(loop); ++ ++ int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); ++ int type_base = in_bytes(ParametersTypeData::type_offset(0)); ++ Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size); ++ Address arg_type(mdp, tmp1, per_arg_scale, type_base); ++ ++ // load offset on the stack from the slot for this parameter ++ dsll(AT, tmp1, per_arg_scale); ++ daddu(AT, AT, mdp); ++ ld(tmp2, AT, off_base); ++ ++ subu(tmp2, R0, tmp2); ++ ++ // read the parameter from the local area ++ dsll(AT, tmp2, Interpreter::stackElementScale()); ++ daddu(AT, AT, _locals_register); ++ ld(tmp2, AT, 0); ++ ++ // profile the parameter ++ profile_obj_type(tmp2, arg_type); ++ ++ // go to next parameter ++ decrement(tmp1, TypeStackSlotEntries::per_arg_count()); ++ bgtz(tmp1, loop); ++ delayed()->nop(); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) { ++ if (state == atos) { ++ MacroAssembler::verify_oop(reg); ++ } ++} ++ ++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ++} ++#endif // !CC_INTERP ++ ++ ++void InterpreterMacroAssembler::notify_method_entry() { ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ Register tempreg = T0; ++#ifndef OPT_THREAD ++ get_thread(T8); ++#else ++ move(T8, TREG); ++#endif ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label L; ++ lw(tempreg, T8, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(tempreg, R0, L); ++ delayed()->nop(); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_method_entry)); ++ bind(L); ++ } ++ ++ { ++ SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); ++ get_method(S3); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ //Rthread, ++ T8, ++ //Rmethod); ++ S3); ++ } ++ ++} ++ ++void InterpreterMacroAssembler::notify_method_exit( ++ TosState state, NotifyMethodExitMode mode) { ++ Register tempreg = T0; ++#ifndef OPT_THREAD ++ get_thread(T8); ++#else ++ move(T8, TREG); ++#endif ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { ++ Label skip; ++ // Note: frame::interpreter_frame_result has a dependency on how the ++ // method result is saved across the call to post_method_exit. If this ++ // is changed then the interpreter_frame_result implementation will ++ // need to be updated too. ++ ++ // For c++ interpreter the result is always stored at a known location in the frame ++ // template interpreter will leave it on the top of the stack. ++ NOT_CC_INTERP(push(state);) ++ lw(tempreg, T8, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(tempreg, R0, skip); ++ delayed()->nop(); ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); ++ bind(skip); ++ NOT_CC_INTERP(pop(state)); ++ } ++ ++ { ++ // Dtrace notification ++ SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); ++ NOT_CC_INTERP(push(state);) ++ get_method(S3); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ //Rthread, Rmethod); ++ T8, S3); ++ NOT_CC_INTERP(pop(state)); ++ } ++} ++ ++// Jump if ((*counter_addr += increment) & mask) satisfies the condition. ++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, ++ int increment, int mask, ++ Register scratch, bool preloaded, ++ Condition cond, Label* where) { ++ assert_different_registers(scratch, AT); ++ ++ if (!preloaded) { ++ lw(scratch, counter_addr); ++ } ++ addiu32(scratch, scratch, increment); ++ sw(scratch, counter_addr); ++ ++ move(AT, mask); ++ andr(scratch, scratch, AT); ++ ++ if (cond == Assembler::zero) { ++ beq(scratch, R0, *where); ++ delayed()->nop(); ++ } else { ++ unimplemented(); ++ } ++} +diff --git a/hotspot/src/cpu/mips/vm/interp_masm_mips_64.hpp b/hotspot/src/cpu/mips/vm/interp_masm_mips_64.hpp +new file mode 100644 +index 0000000000..a2ebdec3ad +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/interp_masm_mips_64.hpp +@@ -0,0 +1,269 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP ++#define CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP ++ ++#include "asm/assembler.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "interpreter/invocationCounter.hpp" ++#include "runtime/frame.hpp" ++ ++// This file specializes the assember with interpreter-specific macros ++ ++ ++class InterpreterMacroAssembler: public MacroAssembler { ++#ifndef CC_INTERP ++ private: ++ ++ Register _locals_register; // register that contains the pointer to the locals ++ Register _bcp_register; // register that contains the bcp ++ ++ protected: ++ // Interpreter specific version of call_VM_base ++ virtual void call_VM_leaf_base(address entry_point, ++ int number_of_arguments); ++ ++ virtual void call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions); ++ ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); ++ ++ // base routine for all dispatches ++ void dispatch_base(TosState state, address* table, bool verifyoop = true); ++#endif // CC_INTERP ++ ++ public: ++ // narrow int return value ++ void narrow(Register result); ++ ++ InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {} ++ ++ void get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset); ++ void get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset); ++ ++ void load_earlyret_value(TosState state); ++ ++#ifdef CC_INTERP ++ void save_bcp() { /* not needed in c++ interpreter and harmless */ } ++ void restore_bcp() { /* not needed in c++ interpreter and harmless */ } ++ ++ // Helpers for runtime call arguments/results ++ void get_method(Register reg); ++ ++#else ++ ++ // Interpreter-specific registers ++ void save_bcp() { ++ sd(BCP, FP, frame::interpreter_frame_bcx_offset * wordSize); ++ } ++ ++ void restore_bcp() { ++ ld(BCP, FP, frame::interpreter_frame_bcx_offset * wordSize); ++ } ++ ++ void restore_locals() { ++ ld(LVP, FP, frame::interpreter_frame_locals_offset * wordSize); ++ } ++ ++ // Helpers for runtime call arguments/results ++ void get_method(Register reg) { ++ ld(reg, FP, frame::interpreter_frame_method_offset * wordSize); ++ } ++ ++ void get_const(Register reg){ ++ get_method(reg); ++ ld(reg, reg, in_bytes(Method::const_offset())); ++ } ++ ++ void get_constant_pool(Register reg) { ++ get_const(reg); ++ ld(reg, reg, in_bytes(ConstMethod::constants_offset())); ++ } ++ ++ void get_constant_pool_cache(Register reg) { ++ get_constant_pool(reg); ++ ld(reg, reg, ConstantPool::cache_offset_in_bytes()); ++ } ++ ++ void get_cpool_and_tags(Register cpool, Register tags) { ++ get_constant_pool(cpool); ++ ld(tags, cpool, ConstantPool::tags_offset_in_bytes()); ++ } ++ ++ void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); ++ void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_method_counters(Register method, Register mcs, Label& skip); ++ ++ // load cpool->resolved_references(index); ++ void load_resolved_reference_at_index(Register result, Register index); ++ ++ void pop_ptr( Register r = FSR); ++ void pop_i( Register r = FSR); ++ void pop_l( Register r = FSR); ++ void pop_f(FloatRegister r = FSF); ++ void pop_d(FloatRegister r = FSF); ++ ++ void push_ptr( Register r = FSR); ++ void push_i( Register r = FSR); ++ void push_l( Register r = FSR); ++ void push_f(FloatRegister r = FSF); ++ void push_d(FloatRegister r = FSF); ++ ++ void pop(Register r ) { ((MacroAssembler*)this)->pop(r); } ++ ++ void push(Register r ) { ((MacroAssembler*)this)->push(r); } ++ ++ void pop(TosState state); // transition vtos -> state ++ void push(TosState state); // transition state -> vtos ++ ++ void empty_expression_stack() { ++ ld(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ // NULL last_sp until next java call ++ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ } ++ ++ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls ++ void load_ptr(int n, Register val); ++ void store_ptr(int n, Register val); ++ ++ // Generate a subtype check: branch to ok_is_subtype if sub_klass is ++ // a subtype of super_klass. ++ //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); ++ void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype ); ++ ++ // Dispatching ++ void dispatch_prolog(TosState state, int step = 0); ++ void dispatch_epilog(TosState state, int step = 0); ++ void dispatch_only(TosState state); ++ void dispatch_only_normal(TosState state); ++ void dispatch_only_noverify(TosState state); ++ void dispatch_next(TosState state, int step = 0); ++ void dispatch_via (TosState state, address* table); ++ ++ // jump to an invoked target ++ void prepare_to_jump_from_interpreted(); ++ void jump_from_interpreted(Register method, Register temp); ++ ++ ++ // Returning from interpreted functions ++ // ++ // Removes the current activation (incl. unlocking of monitors) ++ // and sets up the return address. This code is also used for ++ // exception unwindwing. In that case, we do not want to throw ++ // IllegalMonitorStateExceptions, since that might get us into an ++ // infinite rethrow exception loop. ++ // Additionally this code is used for popFrame and earlyReturn. ++ // In popFrame case we want to skip throwing an exception, ++ // installing an exception, and notifying jvmdi. ++ // In earlyReturn case we only want to skip throwing an exception ++ // and installing an exception. ++ void remove_activation(TosState state, Register ret_addr, ++ bool throw_monitor_exception = true, ++ bool install_monitor_exception = true, ++ bool notify_jvmdi = true); ++#endif // CC_INTERP ++ ++ // Object locking ++ void lock_object (Register lock_reg); ++ void unlock_object(Register lock_reg); ++ ++#ifndef CC_INTERP ++ ++ // Interpreter profiling operations ++ void set_method_data_pointer_for_bcp(); ++ void test_method_data_pointer(Register mdp, Label& zero_continue); ++ void verify_method_data_pointer(); ++ ++ void set_mdp_data_at(Register mdp_in, int constant, Register value); ++ void increment_mdp_data_at(Address data, bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, int constant, ++ bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, Register reg, int constant, ++ bool decrement = false); ++ void increment_mask_and_jump(Address counter_addr, ++ int increment, int mask, ++ Register scratch, bool preloaded, ++ Condition cond, Label* where); ++ void set_mdp_flag_at(Register mdp_in, int flag_constant); ++ void test_mdp_data_at(Register mdp_in, int offset, Register value, ++ Register test_value_out, ++ Label& not_equal_continue); ++ ++ void record_klass_in_profile(Register receiver, Register mdp, ++ Register reg2, bool is_virtual_call); ++ void record_klass_in_profile_helper(Register receiver, Register mdp, ++ Register reg2, int start_row, ++ Label& done, bool is_virtual_call); ++ ++ void update_mdp_by_offset(Register mdp_in, int offset_of_offset); ++ void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); ++ void update_mdp_by_constant(Register mdp_in, int constant); ++ void update_mdp_for_ret(Register return_bci); ++ ++ void profile_taken_branch(Register mdp, Register bumped_count); ++ void profile_not_taken_branch(Register mdp); ++ void profile_call(Register mdp); ++ void profile_final_call(Register mdp); ++ void profile_virtual_call(Register receiver, Register mdp, ++ Register scratch2, ++ bool receiver_can_be_null = false); ++ void profile_ret(Register return_bci, Register mdp); ++ void profile_null_seen(Register mdp); ++ void profile_typecheck(Register mdp, Register klass, Register scratch); ++ void profile_typecheck_failed(Register mdp); ++ void profile_switch_default(Register mdp); ++ void profile_switch_case(Register index_in_scratch, Register mdp, ++ Register scratch2); ++ ++ // Debugging ++ // only if +VerifyOops && state == atos ++ void verify_oop(Register reg, TosState state = atos); ++ // only if +VerifyFPU && (state == ftos || state == dtos) ++ void verify_FPU(int stack_depth, TosState state = ftos); ++ ++ void profile_obj_type(Register obj, const Address& mdo_addr); ++ void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); ++ void profile_return_type(Register mdp, Register ret, Register tmp); ++ void profile_parameters_type(Register mdp, Register tmp1, Register tmp2); ++#endif // !CC_INTERP ++ ++ typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; ++ ++ // support for jvmti/dtrace ++ void notify_method_entry(); ++ void notify_method_exit(TosState state, NotifyMethodExitMode mode); ++}; ++ ++#endif // CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP +diff --git a/hotspot/src/cpu/mips/vm/interpreterGenerator_mips.hpp b/hotspot/src/cpu/mips/vm/interpreterGenerator_mips.hpp +new file mode 100644 +index 0000000000..26fced492a +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/interpreterGenerator_mips.hpp +@@ -0,0 +1,49 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_INTERPRETERGENERATOR_MIPS_HPP ++#define CPU_MIPS_VM_INTERPRETERGENERATOR_MIPS_HPP ++ ++ ++// Generation of Interpreter ++// ++ friend class AbstractInterpreterGenerator; ++ ++ private: ++ ++ address generate_normal_entry(bool synchronized); ++ address generate_native_entry(bool synchronized); ++ address generate_abstract_entry(void); ++ address generate_math_entry(AbstractInterpreter::MethodKind kind); ++ address generate_empty_entry(void); ++ address generate_accessor_entry(void); ++ address generate_Reference_get_entry(); ++ void lock_method(void); ++ void generate_stack_overflow_check(void); ++ ++ void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue); ++ void generate_counter_overflow(Label* do_continue); ++ ++#endif // CPU_MIPS_VM_INTERPRETERGENERATOR_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/interpreterRT_mips.hpp b/hotspot/src/cpu/mips/vm/interpreterRT_mips.hpp +new file mode 100644 +index 0000000000..8dec2007c6 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/interpreterRT_mips.hpp +@@ -0,0 +1,61 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP ++#define CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP ++ ++#include "memory/allocation.hpp" ++ ++// native method calls ++ ++class SignatureHandlerGenerator: public NativeSignatureIterator { ++ private: ++ MacroAssembler* _masm; ++ ++ void move(int from_offset, int to_offset); ++ ++ void box(int from_offset, int to_offset); ++ void pass_int(); ++ void pass_long(); ++ void pass_object(); ++ void pass_float(); ++ void pass_double(); ++ ++ public: ++ // Creation ++ SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) { ++ _masm = new MacroAssembler(buffer); ++ } ++ ++ // Code generation ++ void generate(uint64_t fingerprint); ++ ++ // Code generation support ++ static Register from(); ++ static Register to(); ++ static Register temp(); ++}; ++ ++#endif // CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/interpreterRT_mips_64.cpp b/hotspot/src/cpu/mips/vm/interpreterRT_mips_64.cpp +new file mode 100644 +index 0000000000..14b7e39af7 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/interpreterRT_mips_64.cpp +@@ -0,0 +1,259 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "memory/universe.inline.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/interfaceSupport.hpp" ++#include "runtime/signature.hpp" ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of SignatureHandlerGenerator ++ ++void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) { ++ __ ld(temp(), from(), Interpreter::local_offset_in_bytes(from_offset)); ++ __ sd(temp(), to(), to_offset * longSize); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) { ++ __ addiu(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) ); ++ __ lw(AT, from(), Interpreter::local_offset_in_bytes(from_offset) ); ++ ++ Label L; ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ move(temp(), R0); ++ __ bind(L); ++ __ sw(temp(), to(), to_offset * wordSize); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { ++ // generate code to handle arguments ++ iterate(fingerprint); ++ // return result handler ++ __ li(V0, AbstractInterpreter::result_handler(method()->result_type())); ++ // return ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ __ flush(); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ lw(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset())); ++ __ sw(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2. ++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ ld(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else { ++ __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ __ sd(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { ++ Argument jni_arg(jni_offset()); ++ ++ // the handle for a receiver will never be null ++ bool do_NULL_check = offset() != 0 || is_static(); ++ if (do_NULL_check) { ++ __ ld(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ daddiu((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), from(), Interpreter::local_offset_in_bytes(offset())); ++ __ movz((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), R0, AT); ++ } else { ++ __ daddiu(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset())); ++ } ++ ++ if (!jni_arg.is_Register()) ++ __ sd(temp(), jni_arg.as_caller_address()); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ lwc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset())); ++ __ sw(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2. ++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ ldc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else { ++ __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ __ sd(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++ ++Register InterpreterRuntime::SignatureHandlerGenerator::from() { return LVP; } ++Register InterpreterRuntime::SignatureHandlerGenerator::to() { return SP; } ++Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return T8; } ++ ++// Implementation of SignatureHandlerLibrary ++ ++void SignatureHandlerLibrary::pd_set_handler(address handler) {} ++ ++ ++class SlowSignatureHandler ++ : public NativeSignatureIterator { ++ private: ++ address _from; ++ intptr_t* _to; ++ intptr_t* _reg_args; ++ intptr_t* _fp_identifiers; ++ unsigned int _num_args; ++ ++ virtual void pass_int() ++ { ++ jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_register_parameters) { ++ *_reg_args++ = from_obj; ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_long() ++ { ++ intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); ++ _from -= 2 * Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_register_parameters) { ++ *_reg_args++ = from_obj; ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_object() ++ { ++ intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ if (_num_args < Argument::n_register_parameters) { ++ *_reg_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; ++ _num_args++; ++ } else { ++ *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; ++ } ++ } ++ ++ virtual void pass_float() ++ { ++ jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_float_register_parameters) { ++ *_reg_args++ = from_obj; ++ *_fp_identifiers |= (0x01 << (_num_args*2)); // mark as float ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_double() ++ { ++ intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); ++ _from -= 2*Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_float_register_parameters) { ++ *_reg_args++ = from_obj; ++ *_fp_identifiers |= (0x3 << (_num_args*2)); // mark as double ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ public: ++ SlowSignatureHandler(methodHandle method, address from, intptr_t* to) ++ : NativeSignatureIterator(method) ++ { ++ _from = from; ++ _to = to; ++ ++ // see TemplateInterpreterGenerator::generate_slow_signature_handler() ++ _reg_args = to - Argument::n_register_parameters + jni_offset() - 1; ++ _fp_identifiers = to - 1; ++ *(int*) _fp_identifiers = 0; ++ _num_args = jni_offset(); ++ } ++}; ++ ++ ++IRT_ENTRY(address, ++ InterpreterRuntime::slow_signature_handler(JavaThread* thread, ++ Method* method, ++ intptr_t* from, ++ intptr_t* to)) ++ methodHandle m(thread, (Method*)method); ++ assert(m->is_native(), "sanity check"); ++ ++ // handle arguments ++ SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1)); ++ ++ // return result handler ++ return Interpreter::result_handler(m->result_type()); ++IRT_END +diff --git a/hotspot/src/cpu/mips/vm/interpreter_mips.hpp b/hotspot/src/cpu/mips/vm/interpreter_mips.hpp +new file mode 100644 +index 0000000000..9a21d704fa +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/interpreter_mips.hpp +@@ -0,0 +1,50 @@ ++/* ++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_INTERPRETER_MIPS_HPP ++#define CPU_MIPS_VM_INTERPRETER_MIPS_HPP ++ ++ public: ++ ++ // Sentinel placed in the code for interpreter returns so ++ // that i2c adapters and osr code can recognize an interpreter ++ // return address and convert the return to a specialized ++ // block of code to handle compiedl return values and cleaning ++ // the fpu stack. ++ static const int return_sentinel; ++ ++ static Address::ScaleFactor stackElementScale() { ++ return Address::times_8; ++ } ++ ++ // Offset from sp (which points to the last stack element) ++ static int expr_offset_in_bytes(int i) { return stackElementSize * i; } ++ // Size of interpreter code. Increase if too small. Interpreter will ++ // fail with a guarantee ("not enough space for interpreter generation"); ++ // if too small. ++ // Run with +PrintInterpreterSize to get the VM to print out the size. ++ // Max size with JVMTI and TaggedStackInterpreter ++ const static int InterpreterCodeSize = 168 * 1024; ++#endif // CPU_MIPS_VM_INTERPRETER_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/interpreter_mips_64.cpp b/hotspot/src/cpu/mips/vm/interpreter_mips_64.cpp +new file mode 100644 +index 0000000000..014c812713 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/interpreter_mips_64.cpp +@@ -0,0 +1,286 @@ ++/* ++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/bytecodeHistogram.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterGenerator.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/templateTable.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "runtime/timer.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/debug.hpp" ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++ ++address AbstractInterpreterGenerator::generate_slow_signature_handler() { ++ address entry = __ pc(); ++ ++ // Rmethod: method ++ // LVP: pointer to locals ++ // A3: first stack arg ++ __ move(A3, SP); ++ __ daddiu(SP, SP, -10 * wordSize); ++ __ sd(RA, SP, 0); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::slow_signature_handler), ++ Rmethod, LVP, A3); ++ ++ // V0: result handler ++ ++ // Stack layout: ++ // ... ++ // 10 stack arg0 <--- old sp ++ // 9 float/double identifiers ++ // 8 register arg7 ++ // ... ++ // 2 register arg1 ++ // 1 aligned slot ++ // SP: 0 return address ++ ++ // Do FP first so we can use T3 as temp ++ __ ld(T3, Address(SP, 9 * wordSize)); // float/double identifiers ++ ++ // A0 is for env. ++ // If the mothed is not static, A1 will be corrected in generate_native_entry. ++ for ( int i = 1; i < Argument::n_register_parameters; i++ ) { ++ Register reg = as_Register(i + A0->encoding()); ++ FloatRegister floatreg = as_FloatRegister(i + F12->encoding()); ++ Label isfloatordouble, isdouble, next; ++ ++ __ andi(AT, T3, 1 << (i*2)); // Float or Double? ++ __ bne(AT, R0, isfloatordouble); ++ __ delayed()->nop(); ++ ++ // Do Int register here ++ __ ld(reg, SP, (1 + i) * wordSize); ++ __ b (next); ++ __ delayed()->nop(); ++ ++ __ bind(isfloatordouble); ++ __ andi(AT, T3, 1 << ((i*2)+1)); // Double? ++ __ bne(AT, R0, isdouble); ++ __ delayed()->nop(); ++ ++ // Do Float Here ++ __ lwc1(floatreg, SP, (1 + i) * wordSize); ++ __ b(next); ++ __ delayed()->nop(); ++ ++ // Do Double here ++ __ bind(isdouble); ++ __ ldc1(floatreg, SP, (1 + i) * wordSize); ++ ++ __ bind(next); ++ } ++ ++ __ ld(RA, SP, 0); ++ __ daddiu(SP, SP, 10 * wordSize); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return entry; ++} ++ ++ ++// ++// Various method entries ++// ++ ++address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { ++ ++ // Rmethod: methodOop ++ // V0: scratrch ++ // Rsender: send 's sp ++ ++ if (!InlineIntrinsics) return NULL; // Generate a vanilla entry ++ ++ address entry_point = __ pc(); ++ ++ // These don't need a safepoint check because they aren't virtually ++ // callable. We won't enter these intrinsics from compiled code. ++ // If in the future we added an intrinsic which was virtually callable ++ // we'd have to worry about how to safepoint so that this code is used. ++ ++ // mathematical functions inlined by compiler ++ // (interpreter must provide identical implementation ++ // in order to avoid monotonicity bugs when switching ++ // from interpreter to compiler in the middle of some ++ // computation) ++ // ++ // stack: [ lo(arg) ] <-- sp ++ // [ hi(arg) ] ++ { ++ // Note: For JDK 1.3 StrictMath exists and Math.sin/cos/sqrt are ++ // java methods. Interpreter::method_kind(...) will select ++ // this entry point for the corresponding methods in JDK 1.3. ++ __ ldc1(F12, SP, 0 * wordSize); ++ __ ldc1(F13, SP, 1 * wordSize); ++ __ push2(RA, FP); ++ __ daddiu(FP, SP, 2 * wordSize); ++ ++ // [ fp ] <-- sp ++ // [ ra ] ++ // [ lo ] <-- fp ++ // [ hi ] ++ //FIXME, need consider this ++ switch (kind) { ++ case Interpreter::java_lang_math_sin : ++ __ trigfunc('s'); ++ break; ++ case Interpreter::java_lang_math_cos : ++ __ trigfunc('c'); ++ break; ++ case Interpreter::java_lang_math_tan : ++ __ trigfunc('t'); ++ break; ++ case Interpreter::java_lang_math_sqrt: ++ __ sqrt_d(F0, F12); ++ break; ++ case Interpreter::java_lang_math_abs: ++ __ abs_d(F0, F12); ++ break; ++ case Interpreter::java_lang_math_log: ++ // Store to stack to convert 80bit precision back to 64bits ++ break; ++ case Interpreter::java_lang_math_log10: ++ // Store to stack to convert 80bit precision back to 64bits ++ break; ++ case Interpreter::java_lang_math_pow: ++ break; ++ case Interpreter::java_lang_math_exp: ++ break; ++ ++ default : ++ ShouldNotReachHere(); ++ } ++ ++ // must maintain return value in F0:F1 ++ __ ld(RA, FP, (-1) * wordSize); ++ //FIXME ++ __ ld(FP, FP, (-2) * wordSize); ++ __ move(SP, Rsender); ++ __ jr(RA); ++ __ delayed()->nop(); ++ } ++ return entry_point; ++} ++ ++ ++// Abstract method entry ++// Attempt to execute abstract method. Throw exception ++address InterpreterGenerator::generate_abstract_entry(void) { ++ ++ // Rmethod: methodOop ++ // V0: receiver (unused) ++ // Rsender : sender 's sp ++ address entry_point = __ pc(); ++ ++ // abstract method entry ++ // throw exception ++ // adjust stack to what a normal return would do ++ __ empty_expression_stack(); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError)); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ return entry_point; ++} ++ ++ ++// Empty method, generate a very fast return. ++ ++address InterpreterGenerator::generate_empty_entry(void) { ++ ++ // Rmethod: methodOop ++ // V0: receiver (unused) ++ // Rsender: sender 's sp , must set sp to this value on return , on mips ,now use T0,as it right? ++ if (!UseFastEmptyMethods) return NULL; ++ ++ address entry_point = __ pc(); ++ ++ Label slow_path; ++ __ li(RT0, SafepointSynchronize::address_of_state()); ++ __ lw(AT, RT0, 0); ++ __ move(RT0, (SafepointSynchronize::_not_synchronized)); ++ __ bne(AT, RT0,slow_path); ++ __ delayed()->nop(); ++ __ move(SP, Rsender); ++ __ jr(RA); ++ __ delayed()->nop(); ++ __ bind(slow_path); ++ (void) generate_normal_entry(false); ++ ++ return entry_point; ++ ++} ++ ++void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) { ++ ++ // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in ++ // the days we had adapter frames. When we deoptimize a situation where a ++ // compiled caller calls a compiled caller will have registers it expects ++ // to survive the call to the callee. If we deoptimize the callee the only ++ // way we can restore these registers is to have the oldest interpreter ++ // frame that we create restore these values. That is what this routine ++ // will accomplish. ++ ++ // At the moment we have modified c2 to not have any callee save registers ++ // so this problem does not exist and this routine is just a place holder. ++ ++ assert(f->is_interpreted_frame(), "must be interpreted"); ++} +diff --git a/hotspot/src/cpu/mips/vm/javaFrameAnchor_mips.hpp b/hotspot/src/cpu/mips/vm/javaFrameAnchor_mips.hpp +new file mode 100644 +index 0000000000..dccdf6a019 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/javaFrameAnchor_mips.hpp +@@ -0,0 +1,87 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP ++#define CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP ++ ++private: ++ ++ // FP value associated with _last_Java_sp: ++ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to ++ ++public: ++ // Each arch must define reset, save, restore ++ // These are used by objects that only care about: ++ // 1 - initializing a new state (thread creation, javaCalls) ++ // 2 - saving a current state (javaCalls) ++ // 3 - restoring an old state (javaCalls) ++ ++ void clear(void) { ++ // clearing _last_Java_sp must be first ++ _last_Java_sp = NULL; ++ // fence? ++ _last_Java_fp = NULL; ++ _last_Java_pc = NULL; ++ } ++ ++ void copy(JavaFrameAnchor* src) { ++ // In order to make sure the transition state is valid for "this" ++ // We must clear _last_Java_sp before copying the rest of the new data ++ // ++ // Hack Alert: Temporary bugfix for 4717480/4721647 ++ // To act like previous version (pd_cache_state) don't NULL _last_Java_sp ++ // unless the value is changing ++ // ++ if (_last_Java_sp != src->_last_Java_sp) ++ _last_Java_sp = NULL; ++ ++ _last_Java_fp = src->_last_Java_fp; ++ _last_Java_pc = src->_last_Java_pc; ++ // Must be last so profiler will always see valid frame if has_last_frame() is true ++ _last_Java_sp = src->_last_Java_sp; ++ } ++ ++ // Always walkable ++ bool walkable(void) { return true; } ++ // Never any thing to do since we are always walkable and can find address of return addresses ++ void make_walkable(JavaThread* thread) { } ++ ++ intptr_t* last_Java_sp(void) const { return _last_Java_sp; } ++ ++ address last_Java_pc(void) { return _last_Java_pc; } ++ ++private: ++ ++ static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } ++ ++public: ++ ++ void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; } ++ ++ intptr_t* last_Java_fp(void) { return _last_Java_fp; } ++ // Assert (last_Java_sp == NULL || fp == NULL) ++ void set_last_Java_fp(intptr_t* fp) { _last_Java_fp = fp; } ++ ++#endif // CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/jniFastGetField_mips_64.cpp b/hotspot/src/cpu/mips/vm/jniFastGetField_mips_64.cpp +new file mode 100644 +index 0000000000..0f7dd9424a +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/jniFastGetField_mips_64.cpp +@@ -0,0 +1,172 @@ ++/* ++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/codeBlob.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "runtime/safepoint.hpp" ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#define BUFFER_SIZE 30*wordSize ++ ++// Instead of issuing lfence for LoadLoad barrier, we create data dependency ++// between loads, which is more efficient than lfence. ++ ++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { ++ const char *name = NULL; ++ switch (type) { ++ case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; ++ case T_BYTE: name = "jni_fast_GetByteField"; break; ++ case T_CHAR: name = "jni_fast_GetCharField"; break; ++ case T_SHORT: name = "jni_fast_GetShortField"; break; ++ case T_INT: name = "jni_fast_GetIntField"; break; ++ case T_LONG: name = "jni_fast_GetLongField"; break; ++ case T_FLOAT: name = "jni_fast_GetFloatField"; break; ++ case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; ++ default: ShouldNotReachHere(); ++ } ++ ResourceMark rm; ++ BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); ++ CodeBuffer cbuf(blob); ++ MacroAssembler* masm = new MacroAssembler(&cbuf); ++ address fast_entry = __ pc(); ++ ++ Label slow; ++ ++ // return pc RA ++ // jni env A0 ++ // obj A1 ++ // jfieldID A2 ++ ++ address counter_addr = SafepointSynchronize::safepoint_counter_addr(); ++ __ set64(AT, (long)counter_addr); ++ __ lw(T1, AT, 0); ++ ++ // Parameters(A0~A3) should not be modified, since they will be used in slow path ++ __ andi(AT, T1, 1); ++ __ bne(AT, R0, slow); ++ __ delayed()->nop(); ++ ++ __ move(T0, A1); ++ __ clear_jweak_tag(T0); ++ ++ __ ld(T0, T0, 0); // unbox, *obj ++ __ dsrl(T2, A2, 2); // offset ++ __ daddu(T0, T0, T2); ++ ++ assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); ++ speculative_load_pclist[count] = __ pc(); ++ switch (type) { ++ case T_BOOLEAN: __ lbu (V0, T0, 0); break; ++ case T_BYTE: __ lb (V0, T0, 0); break; ++ case T_CHAR: __ lhu (V0, T0, 0); break; ++ case T_SHORT: __ lh (V0, T0, 0); break; ++ case T_INT: __ lw (V0, T0, 0); break; ++ case T_LONG: __ ld (V0, T0, 0); break; ++ case T_FLOAT: __ lwc1(F0, T0, 0); break; ++ case T_DOUBLE: __ ldc1(F0, T0, 0); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ set64(AT, (long)counter_addr); ++ __ lw(AT, AT, 0); ++ __ bne(T1, AT, slow); ++ __ delayed()->nop(); ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ slowcase_entry_pclist[count++] = __ pc(); ++ __ bind (slow); ++ address slow_case_addr = NULL; ++ switch (type) { ++ case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; ++ case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; ++ case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; ++ case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; ++ case T_INT: slow_case_addr = jni_GetIntField_addr(); break; ++ case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; ++ case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; ++ case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; ++ default: ShouldNotReachHere(); ++ } ++ __ jmp(slow_case_addr); ++ __ delayed()->nop(); ++ ++ __ flush (); ++ ++ return fast_entry; ++} ++ ++address JNI_FastGetField::generate_fast_get_boolean_field() { ++ return generate_fast_get_int_field0(T_BOOLEAN); ++} ++ ++address JNI_FastGetField::generate_fast_get_byte_field() { ++ return generate_fast_get_int_field0(T_BYTE); ++} ++ ++address JNI_FastGetField::generate_fast_get_char_field() { ++ return generate_fast_get_int_field0(T_CHAR); ++} ++ ++address JNI_FastGetField::generate_fast_get_short_field() { ++ return generate_fast_get_int_field0(T_SHORT); ++} ++ ++address JNI_FastGetField::generate_fast_get_int_field() { ++ return generate_fast_get_int_field0(T_INT); ++} ++ ++address JNI_FastGetField::generate_fast_get_long_field() { ++ return generate_fast_get_int_field0(T_LONG); ++} ++ ++address JNI_FastGetField::generate_fast_get_float_field() { ++ return generate_fast_get_int_field0(T_FLOAT); ++} ++ ++address JNI_FastGetField::generate_fast_get_double_field() { ++ return generate_fast_get_int_field0(T_DOUBLE); ++} +diff --git a/hotspot/src/cpu/mips/vm/jniTypes_mips.hpp b/hotspot/src/cpu/mips/vm/jniTypes_mips.hpp +new file mode 100644 +index 0000000000..dfcd47b478 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/jniTypes_mips.hpp +@@ -0,0 +1,144 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_JNITYPES_MIPS_HPP ++#define CPU_MIPS_VM_JNITYPES_MIPS_HPP ++ ++#include "memory/allocation.hpp" ++#include "oops/oop.hpp" ++#include "prims/jni.h" ++ ++// This file holds platform-dependent routines used to write primitive jni ++// types to the array of arguments passed into JavaCalls::call ++ ++class JNITypes : AllStatic { ++ // These functions write a java primitive type (in native format) ++ // to a java stack slot array to be passed as an argument to JavaCalls:calls. ++ // I.e., they are functionally 'push' operations if they have a 'pos' ++ // formal parameter. Note that jlong's and jdouble's are written ++ // _in reverse_ of the order in which they appear in the interpreter ++ // stack. This is because call stubs (see stubGenerator_sparc.cpp) ++ // reverse the argument list constructed by JavaCallArguments (see ++ // javaCalls.hpp). ++ ++private: ++ ++ // 32bit Helper routines. ++ static inline void put_int2r(jint *from, intptr_t *to) { *(jint *)(to++) = from[1]; ++ *(jint *)(to ) = from[0]; } ++ static inline void put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; } ++ ++public: ++ // In MIPS64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[] ++ // is 8 bytes. ++ // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values. ++ // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded. ++ // This error occurs in ReflectInvoke.java ++ // The parameter of DD(int) should be 4 instead of 0x550000004. ++ // ++ // See: [runtime/javaCalls.hpp] ++ ++ static inline void put_int(jint from, intptr_t *to) { *(intptr_t *)(to + 0 ) = from; } ++ static inline void put_int(jint from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = from; } ++ static inline void put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; } ++ ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to). ++ // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), ++ // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. ++ static inline void put_long(jlong from, intptr_t *to) { ++ *(jlong*) (to + 1) = from; ++ *(jlong*) (to) = from; ++ } ++ ++ // A long parameter occupies two slot. ++ // It must fit the layout rule in methodHandle. ++ // ++ // See: [runtime/reflection.cpp] Reflection::invoke() ++ // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); ++ ++ static inline void put_long(jlong from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = from; ++ *(jlong*) (to + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_long(jlong *from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = *from; ++ *(jlong*) (to + pos) = *from; ++ pos += 2; ++ } ++ ++ // Oops are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } ++ static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } ++ static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } ++ ++ // Floats are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } ++ static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } ++ static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } ++ ++#undef _JNI_SLOT_OFFSET ++#define _JNI_SLOT_OFFSET 0 ++ ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to). ++ // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), ++ // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. ++ static inline void put_double(jdouble from, intptr_t *to) { ++ *(jdouble*) (to + 1) = from; ++ *(jdouble*) (to) = from; ++ } ++ ++ // A long parameter occupies two slot. ++ // It must fit the layout rule in methodHandle. ++ // ++ // See: [runtime/reflection.cpp] Reflection::invoke() ++ // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); ++ ++ static inline void put_double(jdouble from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = from; ++ *(jdouble*) (to + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_double(jdouble *from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = *from; ++ *(jdouble*) (to + pos) = *from; ++ pos += 2; ++ } ++ ++ // The get_xxx routines, on the other hand, actually _do_ fetch ++ // java primitive types from the interpreter stack. ++ static inline jint get_int (intptr_t *from) { return *(jint *) from; } ++ static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } ++ static inline oop get_obj (intptr_t *from) { return *(oop *) from; } ++ static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } ++ static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } ++#undef _JNI_SLOT_OFFSET ++}; ++ ++#endif // CPU_MIPS_VM_JNITYPES_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/jni_mips.h b/hotspot/src/cpu/mips/vm/jni_mips.h +new file mode 100644 +index 0000000000..6714f51d5d +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/jni_mips.h +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. Oracle designates this ++ * particular file as subject to the "Classpath" exception as provided ++ * by Oracle in the LICENSE file that accompanied this code. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#ifndef _JAVASOFT_JNI_MD_H_ ++#define _JAVASOFT_JNI_MD_H_ ++ ++// Note: please do not change these without also changing jni_md.h in the JDK ++// repository ++#ifndef __has_attribute ++ #define __has_attribute(x) 0 ++#endif ++#if (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ > 2))) || __has_attribute(visibility) ++ #define JNIEXPORT __attribute__((visibility("default"))) ++ #define JNIIMPORT __attribute__((visibility("default"))) ++#else ++ #define JNIEXPORT ++ #define JNIIMPORT ++#endif ++ ++#define JNICALL ++ ++typedef int jint; ++ ++typedef long jlong; ++ ++typedef signed char jbyte; ++ ++#endif +diff --git a/hotspot/src/cpu/mips/vm/macroAssembler_mips.cpp b/hotspot/src/cpu/mips/vm/macroAssembler_mips.cpp +new file mode 100644 +index 0000000000..2b8840ae10 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/macroAssembler_mips.cpp +@@ -0,0 +1,4332 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "compiler/disassembler.hpp" ++#include "gc_interface/collectedHeap.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/cardTableModRefBS.hpp" ++#include "memory/resourceArea.hpp" ++#include "memory/universe.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/interfaceSupport.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "runtime/os.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/macros.hpp" ++#if INCLUDE_ALL_GCS ++#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" ++#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" ++#include "gc_implementation/g1/heapRegion.hpp" ++#endif // INCLUDE_ALL_GCS ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of MacroAssembler ++ ++intptr_t MacroAssembler::i[32] = {0}; ++float MacroAssembler::f[32] = {0.0}; ++ ++void MacroAssembler::print(outputStream *s) { ++ unsigned int k; ++ for(k=0; kprint_cr("i%d = 0x%.16lx", k, i[k]); ++ } ++ s->cr(); ++ ++ for(k=0; kprint_cr("f%d = %f", k, f[k]); ++ } ++ s->cr(); ++} ++ ++int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; } ++int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; } ++ ++void MacroAssembler::save_registers(MacroAssembler *masm) { ++#define __ masm-> ++ for(int k=0; k<32; k++) { ++ __ sw (as_Register(k), A0, i_offset(k)); ++ } ++ ++ for(int k=0; k<32; k++) { ++ __ swc1 (as_FloatRegister(k), A0, f_offset(k)); ++ } ++#undef __ ++} ++ ++void MacroAssembler::restore_registers(MacroAssembler *masm) { ++#define __ masm-> ++ for(int k=0; k<32; k++) { ++ __ lw (as_Register(k), A0, i_offset(k)); ++ } ++ ++ for(int k=0; k<32; k++) { ++ __ lwc1 (as_FloatRegister(k), A0, f_offset(k)); ++ } ++#undef __ ++} ++ ++ ++void MacroAssembler::pd_patch_instruction(address branch, address target) { ++ jint& stub_inst = *(jint*) branch; ++ jint *pc = (jint *)branch; ++ ++ if((opcode(stub_inst) == special_op) && (special(stub_inst) == daddu_op)) { ++ //b_far: ++ // move(AT, RA); // daddu ++ // emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); ++ // nop(); ++ // lui(T9, 0); // to be patched ++ // ori(T9, 0); ++ // daddu(T9, T9, RA); ++ // move(RA, AT); ++ // jr(T9); ++ ++ assert(opcode(pc[3]) == lui_op ++ && opcode(pc[4]) == ori_op ++ && special(pc[5]) == daddu_op, "Not a branch label patch"); ++ if(!(opcode(pc[3]) == lui_op ++ && opcode(pc[4]) == ori_op ++ && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); } ++ ++ int offset = target - branch; ++ if (!is_simm16(offset)) { ++ pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12); ++ pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12); ++ } else { ++ // revert to "beq + nop" ++ CodeBuffer cb(branch, 4 * 10); ++ MacroAssembler masm(&cb); ++#define __ masm. ++ __ b(target); ++ __ delayed()->nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ } ++ return; ++ } else if (special(pc[4]) == jr_op ++ && opcode(pc[4]) == special_op ++ && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) { ++ //jmp_far: ++ // patchable_set48(T9, target); ++ // jr(T9); ++ // nop(); ++ ++ CodeBuffer cb(branch, 4 * 4); ++ MacroAssembler masm(&cb); ++ masm.patchable_set48(T9, (long)(target)); ++ return; ++ } ++ ++#ifndef PRODUCT ++ if (!is_simm16((target - branch - 4) >> 2)) { ++ tty->print_cr("Illegal patching: branch = " INTPTR_FORMAT ", target = " INTPTR_FORMAT, p2i(branch), p2i(target)); ++ tty->print_cr("======= Start decoding at branch = " INTPTR_FORMAT " =======", p2i(branch)); ++ Disassembler::decode(branch - 4 * 16, branch + 4 * 16, tty); ++ tty->print_cr("======= End of decoding ======="); ++ } ++#endif ++ ++ stub_inst = patched_branch(target - branch, stub_inst, 0); ++} ++ ++static inline address first_cache_address() { ++ return CodeCache::low_bound() + sizeof(HeapBlock::Header); ++} ++ ++static inline address last_cache_address() { ++ return CodeCache::high_bound() - Assembler::InstructionSize; ++} ++ ++int MacroAssembler::call_size(address target, bool far, bool patchable) { ++ if (patchable) return 6 << Assembler::LogInstructionSize; ++ if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop ++ return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize; ++} ++ ++// Can we reach target using jal/j from anywhere ++// in the code cache (because code can be relocated)? ++bool MacroAssembler::reachable_from_cache(address target) { ++ address cl = first_cache_address(); ++ address ch = last_cache_address(); ++ ++ return (cl <= target) && (target <= ch) && fit_in_jal(cl, ch); ++} ++ ++bool MacroAssembler::reachable_from_cache() { ++ if (ForceUnreachable) { ++ return false; ++ } else { ++ address cl = first_cache_address(); ++ address ch = last_cache_address(); ++ ++ return fit_in_jal(cl, ch); ++ } ++} ++ ++void MacroAssembler::general_jump(address target) { ++ if (reachable_from_cache(target)) { ++ j(target); ++ delayed()->nop(); ++ } else { ++ set64(T9, (long)target); ++ jr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_general_jump(address target) { ++ if (reachable_from_cache(target)) { ++ //j(target); ++ //nop(); ++ return 2; ++ } else { ++ //set64(T9, (long)target); ++ //jr(T9); ++ //nop(); ++ return insts_for_set64((jlong)target) + 2; ++ } ++} ++ ++void MacroAssembler::patchable_jump(address target) { ++ if (reachable_from_cache(target)) { ++ nop(); ++ nop(); ++ nop(); ++ nop(); ++ j(target); ++ delayed()->nop(); ++ } else { ++ patchable_set48(T9, (long)target); ++ jr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_patchable_jump(address target) { ++ return 6; ++} ++ ++void MacroAssembler::general_call(address target) { ++ if (reachable_from_cache(target)) { ++ jal(target); ++ delayed()->nop(); ++ } else { ++ set64(T9, (long)target); ++ jalr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_general_call(address target) { ++ if (reachable_from_cache(target)) { ++ //jal(target); ++ //nop(); ++ return 2; ++ } else { ++ //set64(T9, (long)target); ++ //jalr(T9); ++ //nop(); ++ return insts_for_set64((jlong)target) + 2; ++ } ++} ++ ++void MacroAssembler::patchable_call(address target) { ++ if (reachable_from_cache(target)) { ++ nop(); ++ nop(); ++ nop(); ++ nop(); ++ jal(target); ++ delayed()->nop(); ++ } else { ++ patchable_set48(T9, (long)target); ++ jalr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_patchable_call(address target) { ++ return 6; ++} ++ ++// Maybe emit a call via a trampoline. If the code cache is small ++// trampolines won't be emitted. ++ ++address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) { ++ assert(JavaThread::current()->is_Compiler_thread(), "just checking"); ++ assert(entry.rspec().type() == relocInfo::runtime_call_type ++ || entry.rspec().type() == relocInfo::opt_virtual_call_type ++ || entry.rspec().type() == relocInfo::static_call_type ++ || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); ++ ++ address target = entry.target(); ++ if (!reachable_from_cache()) { ++ address stub = emit_trampoline_stub(offset(), target); ++ if (stub == NULL) { ++ return NULL; // CodeCache is full ++ } ++ } ++ ++ if (cbuf) cbuf->set_insts_mark(); ++ relocate(entry.rspec()); ++ ++ if (reachable_from_cache()) { ++ nop(); ++ nop(); ++ nop(); ++ nop(); ++ jal(target); ++ delayed()->nop(); ++ } else { ++ // load the call target from the trampoline stub ++ // branch ++ long dest = (long)pc(); ++ dest += (dest & 0x8000) << 1; ++ lui(T9, dest >> 32); ++ ori(T9, T9, split_low(dest >> 16)); ++ dsll(T9, T9, 16); ++ ld(T9, T9, simm16(split_low(dest))); ++ jalr(T9); ++ delayed()->nop(); ++ } ++ return pc(); ++} ++ ++// Emit a trampoline stub for a call to a target which is too far away. ++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, ++ address dest) { ++ // Max stub size: alignment nop, TrampolineStub. ++ address stub = start_a_stub(NativeInstruction::nop_instruction_size ++ + NativeCallTrampolineStub::instruction_size); ++ if (stub == NULL) { ++ return NULL; // CodeBuffer::expand failed ++ } ++ ++ // Create a trampoline stub relocation which relates this trampoline stub ++ // with the call instruction at insts_call_instruction_offset in the ++ // instructions code-section. ++ align(wordSize); ++ relocate(trampoline_stub_Relocation::spec(code()->insts()->start() ++ + insts_call_instruction_offset)); ++ emit_int64((int64_t)dest); ++ end_a_stub(); ++ return stub; ++} ++ ++void MacroAssembler::beq_far(Register rs, Register rt, address entry) { ++ u_char * cur_pc = pc(); ++ ++ // Near/Far jump ++ if(is_simm16((entry - pc() - 4) / 4)) { ++ Assembler::beq(rs, rt, offset(entry)); ++ } else { ++ Label not_jump; ++ bne(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(entry); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::beq_far(Register rs, Register rt, Label& L) { ++ if (L.is_bound()) { ++ beq_far(rs, rt, target(L)); ++ } else { ++ u_char * cur_pc = pc(); ++ Label not_jump; ++ bne(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(L); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::bne_far(Register rs, Register rt, address entry) { ++ u_char * cur_pc = pc(); ++ ++ //Near/Far jump ++ if(is_simm16((entry - pc() - 4) / 4)) { ++ Assembler::bne(rs, rt, offset(entry)); ++ } else { ++ Label not_jump; ++ beq(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(entry); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::bne_far(Register rs, Register rt, Label& L) { ++ if (L.is_bound()) { ++ bne_far(rs, rt, target(L)); ++ } else { ++ u_char * cur_pc = pc(); ++ Label not_jump; ++ beq(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(L); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::beq_long(Register rs, Register rt, Label& L) { ++ Label not_taken; ++ ++ bne(rs, rt, not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void MacroAssembler::bne_long(Register rs, Register rt, Label& L) { ++ Label not_taken; ++ ++ beq(rs, rt, not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void MacroAssembler::bc1t_long(Label& L) { ++ Label not_taken; ++ ++ bc1f(not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void MacroAssembler::bc1f_long(Label& L) { ++ Label not_taken; ++ ++ bc1t(not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void MacroAssembler::b_far(Label& L) { ++ if (L.is_bound()) { ++ b_far(target(L)); ++ } else { ++ volatile address dest = target(L); ++// ++// MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8 ++// 0x00000055651ed514: daddu at, ra, zero ++// 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520 ++// ++// 0x00000055651ed51c: sll zero, zero, 0 ++// 0x00000055651ed520: lui t9, 0x0 ++// 0x00000055651ed524: ori t9, t9, 0x21b8 ++// 0x00000055651ed528: daddu t9, t9, ra ++// 0x00000055651ed52c: daddu ra, at, zero ++// 0x00000055651ed530: jr t9 ++// 0x00000055651ed534: sll zero, zero, 0 ++// ++ move(AT, RA); ++ emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); ++ nop(); ++ lui(T9, 0); // to be patched ++ ori(T9, T9, 0); ++ daddu(T9, T9, RA); ++ move(RA, AT); ++ jr(T9); ++ } ++} ++ ++void MacroAssembler::b_far(address entry) { ++ u_char * cur_pc = pc(); ++ ++ // Near/Far jump ++ if(is_simm16((entry - pc() - 4) / 4)) { ++ b(offset(entry)); ++ } else { ++ // address must be bounded ++ move(AT, RA); ++ emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); ++ nop(); ++ li32(T9, entry - pc()); ++ daddu(T9, T9, RA); ++ move(RA, AT); ++ jr(T9); ++ } ++} ++ ++void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) { ++ addu_long(AT, base, offset); ++ ld_ptr(rt, AT, 0); ++} ++ ++void MacroAssembler::st_ptr(Register rt, Register base, Register offset) { ++ guarantee(AT != rt, "AT must not equal rt"); ++ addu_long(AT, base, offset); ++ st_ptr(rt, AT, 0); ++} ++ ++Address MacroAssembler::as_Address(AddressLiteral adr) { ++ return Address(adr.target(), adr.rspec()); ++} ++ ++Address MacroAssembler::as_Address(ArrayAddress adr) { ++ return Address::make_array(adr); ++} ++ ++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved). ++void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) { ++ Label again; ++ ++ li(tmp_reg1, counter_addr); ++ bind(again); ++ if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync(); ++ ll(tmp_reg2, tmp_reg1, 0); ++ addiu(tmp_reg2, tmp_reg2, inc); ++ sc(tmp_reg2, tmp_reg1, 0); ++ beq(tmp_reg2, R0, again); ++ delayed()->nop(); ++} ++ ++int MacroAssembler::biased_locking_enter(Register lock_reg, ++ Register obj_reg, ++ Register swap_reg, ++ Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, ++ Label* slow_case, ++ BiasedLockingCounters* counters) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ bool need_tmp_reg = false; ++ if (tmp_reg == noreg) { ++ need_tmp_reg = true; ++ tmp_reg = T9; ++ } ++ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT); ++ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); ++ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); ++ Address saved_mark_addr(lock_reg, 0); ++ ++ // Biased locking ++ // See whether the lock is currently biased toward our thread and ++ // whether the epoch is still valid ++ // Note that the runtime guarantees sufficient alignment of JavaThread ++ // pointers to allow age to be placed into low bits ++ // First check to see whether biasing is even enabled for this object ++ Label cas_label; ++ int null_check_offset = -1; ++ if (!swap_reg_contains_mark) { ++ null_check_offset = offset(); ++ ld_ptr(swap_reg, mark_addr); ++ } ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ move(tmp_reg, swap_reg); ++ andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); ++ daddiu(AT, R0, markOopDesc::biased_lock_pattern); ++ dsubu(AT, AT, tmp_reg); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ ++ bne(AT, R0, cas_label); ++ delayed()->nop(); ++ ++ ++ // The bias pattern is present in the object's header. Need to check ++ // whether the bias owner and the epoch are both still current. ++ // Note that because there is no current thread register on MIPS we ++ // need to store off the mark word we read out of the object to ++ // avoid reloading it and needing to recheck invariants below. This ++ // store is unfortunate but it makes the overall code shorter and ++ // simpler. ++ st_ptr(swap_reg, saved_mark_addr); ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ if (swap_reg_contains_mark) { ++ null_check_offset = offset(); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ xorr(tmp_reg, tmp_reg, swap_reg); ++ get_thread(swap_reg); ++ xorr(swap_reg, swap_reg, tmp_reg); ++ ++ move(AT, ~((int) markOopDesc::age_mask_in_place)); ++ andr(swap_reg, swap_reg, AT); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(swap_reg, R0, L); ++ delayed()->nop(); ++ push(tmp_reg); ++ push(A0); ++ atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg); ++ pop(A0); ++ pop(tmp_reg); ++ bind(L); ++ } ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ beq(swap_reg, R0, done); ++ delayed()->nop(); ++ Label try_revoke_bias; ++ Label try_rebias; ++ ++ // At this point we know that the header has the bias pattern and ++ // that we are not the bias owner in the current epoch. We need to ++ // figure out more details about the state of the header in order to ++ // know what operations can be legally performed on the object's ++ // header. ++ ++ // If the low three bits in the xor result aren't clear, that means ++ // the prototype header is no longer biased and we have to revoke ++ // the bias on this object. ++ ++ move(AT, markOopDesc::biased_lock_mask_in_place); ++ andr(AT, swap_reg, AT); ++ bne(AT, R0, try_revoke_bias); ++ delayed()->nop(); ++ // Biasing is still enabled for this data type. See whether the ++ // epoch of the current bias is still valid, meaning that the epoch ++ // bits of the mark word are equal to the epoch bits of the ++ // prototype header. (Note that the prototype header's epoch bits ++ // only change at a safepoint.) If not, attempt to rebias the object ++ // toward the current thread. Note that we must be absolutely sure ++ // that the current epoch is invalid in order to do this because ++ // otherwise the manipulations it performs on the mark word are ++ // illegal. ++ ++ move(AT, markOopDesc::epoch_mask_in_place); ++ andr(AT,swap_reg, AT); ++ bne(AT, R0, try_rebias); ++ delayed()->nop(); ++ // The epoch of the current bias is still valid but we know nothing ++ // about the owner; it might be set or it might be clear. Try to ++ // acquire the bias of the object using an atomic operation. If this ++ // fails we will go in to the runtime to revoke the object's bias. ++ // Note that we first construct the presumed unbiased header so we ++ // don't accidentally blow away another thread's valid bias. ++ ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); ++ andr(swap_reg, swap_reg, AT); ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ get_thread(tmp_reg); ++ orr(tmp_reg, tmp_reg, swap_reg); ++ //if (os::is_MP()) { ++ // sync(); ++ //} ++ cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // If the biasing toward our thread failed, this means that ++ // another thread succeeded in biasing it toward itself and we ++ // need to revoke that bias. The revocation will occur in the ++ // interpreter runtime in the slow case. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ delayed()->nop(); ++ push(tmp_reg); ++ push(A0); ++ atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg); ++ pop(A0); ++ pop(tmp_reg); ++ bind(L); ++ } ++ if (slow_case != NULL) { ++ beq_far(AT, R0, *slow_case); ++ delayed()->nop(); ++ } ++ b(done); ++ delayed()->nop(); ++ ++ bind(try_rebias); ++ // At this point we know the epoch has expired, meaning that the ++ // current "bias owner", if any, is actually invalid. Under these ++ // circumstances _only_, we are allowed to use the current header's ++ // value as the comparison value when doing the cas to acquire the ++ // bias in the current epoch. In other words, we allow transfer of ++ // the bias from one thread to another directly in this situation. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ get_thread(swap_reg); ++ orr(tmp_reg, tmp_reg, swap_reg); ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ //if (os::is_MP()) { ++ // sync(); ++ //} ++ cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // If the biasing toward our thread failed, then another thread ++ // succeeded in biasing it toward itself and we need to revoke that ++ // bias. The revocation will occur in the runtime in the slow case. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ delayed()->nop(); ++ push(AT); ++ push(tmp_reg); ++ atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg); ++ pop(tmp_reg); ++ pop(AT); ++ bind(L); ++ } ++ if (slow_case != NULL) { ++ beq_far(AT, R0, *slow_case); ++ delayed()->nop(); ++ } ++ ++ b(done); ++ delayed()->nop(); ++ bind(try_revoke_bias); ++ // The prototype mark in the klass doesn't have the bias bit set any ++ // more, indicating that objects of this data type are not supposed ++ // to be biased any more. We are going to try to reset the mark of ++ // this object to the prototype value and fall through to the ++ // CAS-based locking scheme. Note that if our CAS fails, it means ++ // that another thread raced us for the privilege of revoking the ++ // bias of this particular object, so it's okay to continue in the ++ // normal locking code. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ //if (os::is_MP()) { ++ // lock(); ++ //} ++ cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // Fall through to the normal CAS-based lock, because no matter what ++ // the result of the above CAS, some thread must have succeeded in ++ // removing the bias bit from the object's header. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ delayed()->nop(); ++ push(AT); ++ push(tmp_reg); ++ atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg); ++ pop(tmp_reg); ++ pop(AT); ++ bind(L); ++ } ++ ++ bind(cas_label); ++ return null_check_offset; ++} ++ ++void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ ++ // Check for biased locking unlock case, which is a no-op ++ // Note: we do not have to check the thread ID for two reasons. ++ // First, the interpreter checks for IllegalMonitorStateException at ++ // a higher level. Second, if the bias was revoked while we held the ++ // lock, the object could not be rebiased toward another thread, so ++ // the bias bit would be clear. ++ ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); ++ daddiu(AT, R0, markOopDesc::biased_lock_pattern); ++ ++ beq(AT, temp_reg, done); ++ delayed()->nop(); ++} ++ ++// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf ++// this method will handle the stack problem, you need not to preserve the stack space for the argument now ++void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) { ++ Label L, E; ++ ++ assert(number_of_arguments <= 4, "just check"); ++ ++ andi(AT, SP, 0xf); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ daddiu(SP, SP, -8); ++ call(entry_point, relocInfo::runtime_call_type); ++ delayed()->nop(); ++ daddiu(SP, SP, 8); ++ b(E); ++ delayed()->nop(); ++ ++ bind(L); ++ call(entry_point, relocInfo::runtime_call_type); ++ delayed()->nop(); ++ bind(E); ++} ++ ++ ++void MacroAssembler::jmp(address entry) { ++ patchable_set48(T9, (long)entry); ++ jr(T9); ++} ++ ++void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) { ++ switch (rtype) { ++ case relocInfo::runtime_call_type: ++ case relocInfo::none: ++ jmp(entry); ++ break; ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rtype); ++ patchable_set48(T9, (long)entry); ++ jr(T9); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::jmp_far(Label& L) { ++ if (L.is_bound()) { ++ address entry = target(L); ++ assert(entry != NULL, "jmp most probably wrong"); ++ InstructionMark im(this); ++ ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(T9, (long)entry); ++ } else { ++ InstructionMark im(this); ++ L.add_patch_at(code(), locator()); ++ ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(T9, (long)pc()); ++ } ++ ++ jr(T9); ++ delayed()->nop(); ++} ++void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { ++ int oop_index; ++ if (obj) { ++ oop_index = oop_recorder()->find_index(obj); ++ } else { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } ++ relocate(metadata_Relocation::spec(oop_index)); ++ patchable_set48(AT, (long)obj); ++ sd(AT, dst); ++} ++ ++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { ++ int oop_index; ++ if (obj) { ++ oop_index = oop_recorder()->find_index(obj); ++ } else { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } ++ relocate(metadata_Relocation::spec(oop_index)); ++ patchable_set48(dst, (long)obj); ++} ++ ++void MacroAssembler::call(address entry) { ++// c/c++ code assume T9 is entry point, so we just always move entry to t9 ++// maybe there is some more graceful method to handle this. FIXME ++// For more info, see class NativeCall. ++ patchable_set48(T9, (long)entry); ++ jalr(T9); ++} ++ ++void MacroAssembler::call(address entry, relocInfo::relocType rtype) { ++ switch (rtype) { ++ case relocInfo::runtime_call_type: ++ case relocInfo::none: ++ call(entry); ++ break; ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rtype); ++ call(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::call(address entry, RelocationHolder& rh) ++{ ++ switch (rh.type()) { ++ case relocInfo::runtime_call_type: ++ case relocInfo::none: ++ call(entry); ++ break; ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rh); ++ call(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::ic_call(address entry) { ++ RelocationHolder rh = virtual_call_Relocation::spec(pc()); ++ patchable_set48(IC_Klass, (long)Universe::non_oop_word()); ++ assert(entry != NULL, "call most probably wrong"); ++ InstructionMark im(this); ++ trampoline_call(AddressLiteral(entry, rh)); ++} ++ ++void MacroAssembler::c2bool(Register r) { ++ Label L; ++ Assembler::beq(r, R0, L); ++ delayed()->nop(); ++ move(r, 1); ++ bind(L); ++} ++ ++#ifndef PRODUCT ++extern "C" void findpc(intptr_t x); ++#endif ++ ++void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) { ++ if ( ShowMessageBoxOnError ) { ++ JavaThreadState saved_state = JavaThread::current()->thread_state(); ++ JavaThread::current()->set_thread_state(_thread_in_vm); ++ { ++ // In order to get locks work, we need to fake a in_VM state ++ ttyLocker ttyl; ++ ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); ++ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { ++ BytecodeCounter::print(); ++ } ++ ++ } ++ ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); ++ } ++ else ++ ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); ++} ++ ++ ++void MacroAssembler::stop(const char* msg) { ++ li(A0, (long)msg); ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ brk(17); ++} ++ ++void MacroAssembler::warn(const char* msg) { ++ pushad(); ++ li(A0, (long)msg); ++ push(S2); ++ move(AT, -(StackAlignmentInBytes)); ++ move(S2, SP); // use S2 as a sender SP holder ++ andr(SP, SP, AT); // align stack as required by ABI ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ move(SP, S2); // use S2 as a sender SP holder ++ pop(S2); ++ popad(); ++} ++ ++void MacroAssembler::increment(Register reg, int imm) { ++ if (!imm) return; ++ if (is_simm16(imm)) { ++ daddiu(reg, reg, imm); ++ } else { ++ move(AT, imm); ++ daddu(reg, reg, AT); ++ } ++} ++ ++void MacroAssembler::decrement(Register reg, int imm) { ++ increment(reg, -imm); ++} ++ ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions) { ++ call_VM_helper(oop_result, entry_point, 0, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ call_VM_helper(oop_result, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ if (arg_2!=A2) move(A2, arg_2); ++ assert(arg_2 != A1, "smashed argument"); ++ call_VM_helper(oop_result, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); ++ call_VM_helper(oop_result, entry_point, 3, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); ++ call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); ++} ++ ++void MacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ ++ address before_call_pc; ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T2; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ // debugging support ++ assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); ++ assert(number_of_arguments <= 4 , "cannot have negative number of arguments"); ++ assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); ++ assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); ++ ++ assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp"); ++ ++ // set last Java frame before call ++ before_call_pc = (address)pc(); ++ set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc); ++ ++ // do the call ++ move(A0, java_thread); ++ call(entry_point, relocInfo::runtime_call_type); ++ delayed()->nop(); ++ ++ // restore the thread (cannot use the pushed argument since arguments ++ // may be overwritten by C code generated by an optimizing compiler); ++ // however can use the register value directly if it is callee saved. ++#ifndef OPT_THREAD ++ get_thread(java_thread); ++#else ++#ifdef ASSERT ++ { ++ Label L; ++ get_thread(AT); ++ beq(java_thread, AT, L); ++ delayed()->nop(); ++ stop("MacroAssembler::call_VM_base: TREG not callee saved?"); ++ bind(L); ++ } ++#endif ++#endif ++ ++ // discard thread and arguments ++ ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // reset last Java frame ++ reset_last_Java_frame(java_thread, false); ++ ++ check_and_handle_popframe(java_thread); ++ check_and_handle_earlyret(java_thread); ++ if (check_exceptions) { ++ // check for pending exceptions (java_thread is set upon return) ++ Label L; ++ ld(AT, java_thread, in_bytes(Thread::pending_exception_offset())); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ li(AT, before_call_pc); ++ push(AT); ++ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ bind(L); ++ } ++ ++ // get oop result if there is one and reset the value in the thread ++ if (oop_result->is_valid()) { ++ ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); ++ sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset())); ++ verify_oop(oop_result); ++ } ++} ++ ++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { ++ ++ move(V0, SP); ++ //we also reserve space for java_thread here ++ move(AT, -(StackAlignmentInBytes)); ++ andr(SP, SP, AT); ++ call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions); ++ ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { ++ call_VM_leaf_base(entry_point, number_of_arguments); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { ++ if (arg_0 != A0) move(A0, arg_0); ++ call_VM_leaf(entry_point, 1); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { ++ if (arg_0 != A0) move(A0, arg_0); ++ if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); ++ call_VM_leaf(entry_point, 2); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { ++ if (arg_0 != A0) move(A0, arg_0); ++ if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument"); ++ call_VM_leaf(entry_point, 3); ++} ++void MacroAssembler::super_call_VM_leaf(address entry_point) { ++ MacroAssembler::call_VM_leaf_base(entry_point, 0); ++} ++ ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1) { ++ if (arg_1 != A0) move(A0, arg_1); ++ MacroAssembler::call_VM_leaf_base(entry_point, 1); ++} ++ ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1, ++ Register arg_2) { ++ if (arg_1 != A0) move(A0, arg_1); ++ if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); ++ MacroAssembler::call_VM_leaf_base(entry_point, 2); ++} ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3) { ++ if (arg_1 != A0) move(A0, arg_1); ++ if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); ++ if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument"); ++ MacroAssembler::call_VM_leaf_base(entry_point, 3); ++} ++ ++void MacroAssembler::check_and_handle_earlyret(Register java_thread) { ++} ++ ++void MacroAssembler::check_and_handle_popframe(Register java_thread) { ++} ++ ++void MacroAssembler::null_check(Register reg, int offset) { ++ if (needs_explicit_null_check(offset)) { ++ // provoke OS NULL exception if reg = NULL by ++ // accessing M[reg] w/o changing any (non-CC) registers ++ // NOTE: cmpl is plenty here to provoke a segv ++ lw(AT, reg, 0); ++ } else { ++ // nothing to do, (later) access of M[reg + offset] ++ // will provoke OS NULL exception if reg = NULL ++ } ++} ++ ++void MacroAssembler::enter() { ++ push2(RA, FP); ++ move(FP, SP); ++} ++ ++void MacroAssembler::leave() { ++ move(SP, FP); ++ pop2(RA, FP); ++} ++ ++void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T1; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // we must set sp to zero to clear frame ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // must clear fp, so that compiled frames are not confused; it is possible ++ // that we need it only for debugging ++ if(clear_fp) { ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); ++} ++ ++void MacroAssembler::reset_last_Java_frame(bool clear_fp) { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // we must set sp to zero to clear frame ++ sd(R0, Address(thread, JavaThread::last_Java_sp_offset())); ++ // must clear fp, so that compiled frames are not confused; it is ++ // possible that we need it only for debugging ++ if (clear_fp) { ++ sd(R0, Address(thread, JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ sd(R0, Address(thread, JavaThread::last_Java_pc_offset())); ++} ++ ++// Write serialization page so VM thread can do a pseudo remote membar. ++// We use the current thread pointer to calculate a thread specific ++// offset to write to within the page. This minimizes bus traffic ++// due to cache line collision. ++void MacroAssembler::serialize_memory(Register thread, Register tmp) { ++ int mask = os::vm_page_size() - sizeof(int); ++ assert_different_registers(AT, tmp); ++ assert(is_uimm(mask, 16), "Not a unsigned 16-bit"); ++ srl(AT, thread, os::get_serialize_page_shift_count()); ++ andi(AT, AT, mask); ++ li(tmp, os::get_memory_serialize_page()); ++ addu(tmp, tmp, AT); ++ sw(R0, tmp, 0); ++} ++ ++// Calls to C land ++// ++// When entering C land, the fp, & sp of the last Java frame have to be recorded ++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp ++// has to be reset to 0. This is required to allow proper stack traversal. ++void MacroAssembler::set_last_Java_frame(Register java_thread, ++ Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T2; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ ++ // last_java_fp is optional ++ if (last_java_fp->is_valid()) { ++ st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // last_java_pc is optional ++ if (last_java_pc != NULL) { ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(AT, (long)last_java_pc); ++ st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ } ++ st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++} ++ ++void MacroAssembler::set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc) { ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // last_java_fp is optional ++ if (last_java_fp->is_valid()) { ++ sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset())); ++ } ++ ++ // last_java_pc is optional ++ if (last_java_pc != NULL) { ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(AT, (long)last_java_pc); ++ st_ptr(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ } ++ ++ sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset())); ++} ++ ++////////////////////////////////////////////////////////////////////////////////// ++#if INCLUDE_ALL_GCS ++ ++void MacroAssembler::g1_write_barrier_pre(Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ ++ // If expand_call is true then we expand the call_VM_leaf macro ++ // directly to skip generating the check by ++ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. ++ ++ assert(thread == TREG, "must be"); ++ ++ Label done; ++ Label runtime; ++ ++ assert(pre_val != noreg, "check this code"); ++ ++ if (obj != noreg) { ++ assert_different_registers(obj, pre_val, tmp); ++ assert(pre_val != V0, "check this code"); ++ } ++ ++ Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + ++ PtrQueue::byte_offset_of_active())); ++ Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + ++ PtrQueue::byte_offset_of_index())); ++ Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + ++ PtrQueue::byte_offset_of_buf())); ++ ++ ++ // Is marking active? ++ if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { ++ lw(AT, in_progress); ++ } else { ++ assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); ++ lb(AT, in_progress); ++ } ++ beq(AT, R0, done); ++ delayed()->nop(); ++ ++ // Do we need to load the previous value? ++ if (obj != noreg) { ++ load_heap_oop(pre_val, Address(obj, 0)); ++ } ++ ++ // Is the previous value null? ++ beq(pre_val, R0, done); ++ delayed()->nop(); ++ ++ // Can we store original value in the thread's buffer? ++ // Is index == 0? ++ // (The index field is typed as size_t.) ++ ++ ld(tmp, index); ++ beq(tmp, R0, runtime); ++ delayed()->nop(); ++ ++ daddiu(tmp, tmp, -1 * wordSize); ++ sd(tmp, index); ++ ld(AT, buffer); ++ daddu(tmp, tmp, AT); ++ ++ // Record the previous value ++ sd(pre_val, tmp, 0); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ ++ bind(runtime); ++ // save the live input values ++ if (tosca_live) push(V0); ++ ++ if (obj != noreg && obj != V0) push(obj); ++ ++ if (pre_val != V0) push(pre_val); ++ ++ // Calling the runtime using the regular call_VM_leaf mechanism generates ++ // code (generated by InterpreterMacroAssember::call_VM_leaf_base) ++ // that checks that the *(fp+frame::interpreter_frame_last_sp) == NULL. ++ // ++ // If we care generating the pre-barrier without a frame (e.g. in the ++ // intrinsified Reference.get() routine) then fp might be pointing to ++ // the caller frame and so this check will most likely fail at runtime. ++ // ++ // Expanding the call directly bypasses the generation of the check. ++ // So when we do not have have a full interpreter frame on the stack ++ // expand_call should be passed true. ++ ++ ++ if (expand_call) { ++ assert(pre_val != A1, "smashed arg"); ++ if (thread != A1) move(A1, thread); ++ if (pre_val != A0) move(A0, pre_val); ++ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); ++ } else { ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); ++ } ++ ++ // save the live input values ++ if (pre_val != V0) ++ pop(pre_val); ++ ++ if (obj != noreg && obj != V0) ++ pop(obj); ++ ++ if(tosca_live) pop(V0); ++ ++ bind(done); ++} ++ ++void MacroAssembler::g1_write_barrier_post(Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2) { ++ assert(tmp != AT, "must be"); ++ assert(tmp2 != AT, "must be"); ++ assert(thread == TREG, "must be"); ++ ++ Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + ++ PtrQueue::byte_offset_of_index())); ++ Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + ++ PtrQueue::byte_offset_of_buf())); ++ ++ BarrierSet* bs = Universe::heap()->barrier_set(); ++ CardTableModRefBS* ct = (CardTableModRefBS*)bs; ++ assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); ++ ++ Label done; ++ Label runtime; ++ ++ // Does store cross heap regions? ++ xorr(AT, store_addr, new_val); ++ dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes); ++ beq(AT, R0, done); ++ delayed()->nop(); ++ ++ ++ // crosses regions, storing NULL? ++ beq(new_val, R0, done); ++ delayed()->nop(); ++ ++ // storing region crossing non-NULL, is card already dirty? ++ const Register card_addr = tmp; ++ const Register cardtable = tmp2; ++ ++ move(card_addr, store_addr); ++ dsrl(card_addr, card_addr, CardTableModRefBS::card_shift); ++ // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT ++ // a valid address and therefore is not properly handled by the relocation code. ++ set64(cardtable, (intptr_t)ct->byte_map_base); ++ daddu(card_addr, card_addr, cardtable); ++ ++ lb(AT, card_addr, 0); ++ daddiu(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val()); ++ beq(AT, R0, done); ++ delayed()->nop(); ++ ++ sync(); ++ lb(AT, card_addr, 0); ++ daddiu(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val()); ++ beq(AT, R0, done); ++ delayed()->nop(); ++ ++ ++ // storing a region crossing, non-NULL oop, card is clean. ++ // dirty card and log. ++ move(AT, (int)CardTableModRefBS::dirty_card_val()); ++ sb(AT, card_addr, 0); ++ ++ lw(AT, queue_index); ++ beq(AT, R0, runtime); ++ delayed()->nop(); ++ daddiu(AT, AT, -1 * wordSize); ++ sw(AT, queue_index); ++ ld(tmp2, buffer); ++ ld(AT, queue_index); ++ daddu(tmp2, tmp2, AT); ++ sd(card_addr, tmp2, 0); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ ++ bind(runtime); ++ // save the live input values ++ push(store_addr); ++ push(new_val); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG); ++ pop(new_val); ++ pop(store_addr); ++ ++ bind(done); ++} ++ ++#endif // INCLUDE_ALL_GCS ++////////////////////////////////////////////////////////////////////////////////// ++ ++ ++void MacroAssembler::store_check(Register obj) { ++ // Does a store check for the oop in register obj. The content of ++ // register obj is destroyed afterwards. ++ store_check_part_1(obj); ++ store_check_part_2(obj); ++} ++ ++void MacroAssembler::store_check(Register obj, Address dst) { ++ store_check(obj); ++} ++ ++ ++// split the store check operation so that other instructions can be scheduled inbetween ++void MacroAssembler::store_check_part_1(Register obj) { ++ BarrierSet* bs = Universe::heap()->barrier_set(); ++ assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); ++ dsrl(obj, obj, CardTableModRefBS::card_shift); ++} ++ ++void MacroAssembler::store_check_part_2(Register obj) { ++ BarrierSet* bs = Universe::heap()->barrier_set(); ++ assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); ++ CardTableModRefBS* ct = (CardTableModRefBS*)bs; ++ assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); ++ ++ set64(AT, (long)ct->byte_map_base); ++ daddu(AT, AT, obj); ++ if (UseConcMarkSweepGC) sync(); ++ sb(R0, AT, 0); ++} ++ ++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. ++void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1, Register t2, Label& slow_case) { ++ assert_different_registers(obj, var_size_in_bytes, t1, t2, AT); ++ ++ Register end = t2; ++#ifndef OPT_THREAD ++ Register thread = t1; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ verify_tlab(t1, t2);//blows t1&t2 ++ ++ ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset())); ++ ++ if (var_size_in_bytes == NOREG) { ++ set64(AT, con_size_in_bytes); ++ addu(end, obj, AT); ++ } else { ++ addu(end, obj, var_size_in_bytes); ++ } ++ ++ ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset())); ++ sltu(AT, AT, end); ++ bne_far(AT, R0, slow_case); ++ delayed()->nop(); ++ ++ ++ // update the tlab top pointer ++ st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset())); ++ ++ verify_tlab(t1, t2); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1, Register t2, Label& slow_case) { ++ assert_different_registers(obj, var_size_in_bytes, t1, AT); ++ if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { ++ // No allocation in the shared eden. ++ b_far(slow_case); ++ delayed()->nop(); ++ } else { ++ ++ Address heap_top(t1); ++ li(t1, (long)Universe::heap()->top_addr()); ++ ld_ptr(obj, heap_top); ++ ++ Register end = t2; ++ Label retry; ++ ++ bind(retry); ++ if (var_size_in_bytes == NOREG) { ++ set64(AT, con_size_in_bytes); ++ addu(end, obj, AT); ++ } else { ++ addu(end, obj, var_size_in_bytes); ++ } ++ // if end < obj then we wrapped around => object too long => slow case ++ sltu(AT, end, obj); ++ bne_far(AT, R0, slow_case); ++ delayed()->nop(); ++ ++ li(AT, (long)Universe::heap()->end_addr()); ++ ld_ptr(AT, AT, 0); ++ sltu(AT, AT, end); ++ bne_far(AT, R0, slow_case); ++ delayed()->nop(); ++ // Compare obj with the top addr, and if still equal, store the new top addr in ++ // end at the address of the top addr pointer. Sets ZF if was equal, and clears ++ // it otherwise. Use lock prefix for atomicity on MPs. ++ //if (os::is_MP()) { ++ // sync(); ++ //} ++ ++ // if someone beat us on the allocation, try again, otherwise continue ++ cmpxchg(end, heap_top, obj); ++ beq_far(AT, R0, retry); ++ delayed()->nop(); ++ } ++} ++ ++// C2 doesn't invoke this one. ++void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) { ++ Register top = T0; ++ Register t1 = T1; ++ Register t2 = T9; ++ Register t3 = T3; ++ Register thread_reg = T8; ++ assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ T2, A4); ++ Label do_refill, discard_tlab; ++ ++ if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { ++ // No allocation in the shared eden. ++ b(slow_case); ++ delayed()->nop(); ++ } ++ ++ get_thread(thread_reg); ++ ++ ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset())); ++ ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset())); ++ ++ // calculate amount of free space ++ subu(t1, t1, top); ++ shr(t1, LogHeapWordSize); ++ ++ // Retain tlab and allocate object in shared space if ++ // the amount free in the tlab is too large to discard. ++ ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())); ++ slt(AT, t2, t1); ++ beq(AT, R0, discard_tlab); ++ delayed()->nop(); ++ ++ // Retain ++ li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment()); ++ addu(t2, t2, AT); ++ st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())); ++ ++ if (TLABStats) { ++ // increment number of slow_allocations ++ lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())); ++ addiu(AT, AT, 1); ++ sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())); ++ } ++ b(try_eden); ++ delayed()->nop(); ++ ++ bind(discard_tlab); ++ if (TLABStats) { ++ // increment number of refills ++ lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())); ++ addiu(AT, AT, 1); ++ sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())); ++ // accumulate wastage -- t1 is amount free in tlab ++ lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())); ++ addu(AT, AT, t1); ++ sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())); ++ } ++ ++ // if tlab is currently allocated (top or end != null) then ++ // fill [top, end + alignment_reserve) with array object ++ beq(top, R0, do_refill); ++ delayed()->nop(); ++ ++ // set up the mark word ++ li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2)); ++ st_ptr(AT, top, oopDesc::mark_offset_in_bytes()); ++ ++ // set the length to the remaining space ++ addiu(t1, t1, - typeArrayOopDesc::header_size(T_INT)); ++ addiu(t1, t1, ThreadLocalAllocBuffer::alignment_reserve()); ++ shl(t1, log2_intptr(HeapWordSize/sizeof(jint))); ++ sw(t1, top, arrayOopDesc::length_offset_in_bytes()); ++ ++ // set klass to intArrayKlass ++ li(AT, (intptr_t)Universe::intArrayKlassObj_addr()); ++ ld_ptr(t1, AT, 0); ++ //st_ptr(t1, top, oopDesc::klass_offset_in_bytes()); ++ store_klass(top, t1); ++ ++ ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_start_offset())); ++ subu(t1, top, t1); ++ incr_allocated_bytes(thread_reg, t1, 0); ++ ++ // refill the tlab with an eden allocation ++ bind(do_refill); ++ ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset())); ++ shl(t1, LogHeapWordSize); ++ // add object_size ?? ++ eden_allocate(top, t1, 0, t2, t3, slow_case); ++ ++ // Check that t1 was preserved in eden_allocate. ++#ifdef ASSERT ++ if (UseTLAB) { ++ Label ok; ++ assert_different_registers(thread_reg, t1); ++ ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset())); ++ shl(AT, LogHeapWordSize); ++ beq(AT, t1, ok); ++ delayed()->nop(); ++ stop("assert(t1 != tlab size)"); ++ should_not_reach_here(); ++ ++ bind(ok); ++ } ++#endif ++ st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset())); ++ st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset())); ++ addu(top, top, t1); ++ addiu(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); ++ st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset())); ++ verify_tlab(t1, t2); ++ b(retry); ++ delayed()->nop(); ++} ++ ++void MacroAssembler::incr_allocated_bytes(Register thread, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1) { ++ if (!thread->is_valid()) { ++#ifndef OPT_THREAD ++ assert(t1->is_valid(), "need temp reg"); ++ thread = t1; ++ get_thread(thread); ++#else ++ thread = TREG; ++#endif ++ } ++ ++ ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); ++ if (var_size_in_bytes->is_valid()) { ++ addu(AT, AT, var_size_in_bytes); ++ } else { ++ addiu(AT, AT, con_size_in_bytes); ++ } ++ st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); ++} ++ ++static const double pi_4 = 0.7853981633974483; ++ ++// must get argument(a double) in F12/F13 ++//void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) { ++//We need to preseve the register which maybe modified during the Call ++void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { ++ // save all modified register here ++ // FIXME, in the disassembly of tirgfunc, only used V0, V1, T9, SP, RA, so we ony save V0, V1, T9 ++ pushad(); ++ // we should preserve the stack space before we call ++ addiu(SP, SP, -wordSize * 2); ++ switch (trig){ ++ case 's' : ++ call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type ); ++ delayed()->nop(); ++ break; ++ case 'c': ++ call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type ); ++ delayed()->nop(); ++ break; ++ case 't': ++ call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type ); ++ delayed()->nop(); ++ break; ++ default:assert (false, "bad intrinsic"); ++ break; ++ ++ } ++ ++ addiu(SP, SP, wordSize * 2); ++ popad(); ++} ++ ++void MacroAssembler::li(Register rd, long imm) { ++ if (imm <= max_jint && imm >= min_jint) { ++ li32(rd, (int)imm); ++ } else if (julong(imm) <= 0xFFFFFFFF) { ++ assert_not_delayed(); ++ // lui sign-extends, so we can't use that. ++ ori(rd, R0, julong(imm) >> 16); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm)); ++ } else if ((imm > 0) && is_simm16(imm >> 32)) { ++ // A 48-bit address ++ li48(rd, imm); ++ } else { ++ li64(rd, imm); ++ } ++} ++ ++void MacroAssembler::li32(Register reg, int imm) { ++ if (is_simm16(imm)) { ++ addiu(reg, R0, imm); ++ } else { ++ lui(reg, split_low(imm >> 16)); ++ if (split_low(imm)) ++ ori(reg, reg, split_low(imm)); ++ } ++} ++ ++void MacroAssembler::set64(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ } else { ++ lui(d, split_low(value >> 16)); ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ ori(d, R0, julong(value) >> 16); ++ dsll(d, d, 16); ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ } ++ } else if ((value> 0) && is_simm16(value >> 32)) { // li48 ++ // 4 insts ++ li48(d, value); ++ } else { // li64 ++ // 6 insts ++ li64(d, value); ++ } ++} ++ ++ ++int MacroAssembler::insts_for_set64(jlong value) { ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ //daddiu(d, R0, value); ++ count++; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ count++; ++ if (split_low(value)) { ++ //ori(d, d, split_low(value)); ++ count++; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ //ori(d, R0, julong(value) >> 16); ++ //dsll(d, d, 16); ++ count += 2; ++ if (split_low(value)) { ++ //ori(d, d, split_low(value)); ++ count++; ++ } ++ } else if ((value> 0) && is_simm16(value >> 32)) { // li48 ++ // 4 insts ++ //li48(d, value); ++ count += 4; ++ } else { // li64 ++ // 6 insts ++ //li64(d, value); ++ count += 6; ++ } ++ ++ return count; ++} ++ ++void MacroAssembler::patchable_set48(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ count += 1; ++ } else { ++ lui(d, split_low(value >> 16)); ++ count += 1; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ ori(d, R0, julong(value) >> 16); ++ dsll(d, d, 16); ++ count += 2; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && is_simm16(value >> 32)) { // li48 ++ // 4 insts ++ li48(d, value); ++ count += 4; ++ } else { // li64 ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ nop(); ++ count++; ++ } ++} ++ ++void MacroAssembler::patchable_set32(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ count += 1; ++ } else { ++ lui(d, split_low(value >> 16)); ++ count += 1; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ ori(d, R0, julong(value) >> 16); ++ dsll(d, d, 16); ++ count += 2; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } else { ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 3) { ++ nop(); ++ count++; ++ } ++} ++ ++void MacroAssembler::patchable_call32(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ count += 1; ++ } else { ++ lui(d, split_low(value >> 16)); ++ count += 1; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } ++ } else { ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 2) { ++ nop(); ++ count++; ++ } ++} ++ ++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { ++ assert(UseCompressedClassPointers, "should only be used for compressed header"); ++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ ++ int klass_index = oop_recorder()->find_index(k); ++ RelocationHolder rspec = metadata_Relocation::spec(klass_index); ++ long narrowKlass = (long)Klass::encode_klass(k); ++ ++ relocate(rspec, Assembler::narrow_oop_operand); ++ patchable_set48(dst, narrowKlass); ++} ++ ++ ++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { ++ assert(UseCompressedOops, "should only be used for compressed header"); ++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ ++ int oop_index = oop_recorder()->find_index(obj); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ ++ relocate(rspec, Assembler::narrow_oop_operand); ++ patchable_set48(dst, oop_index); ++} ++ ++void MacroAssembler::li64(Register rd, long imm) { ++ assert_not_delayed(); ++ lui(rd, split_low(imm >> 48)); ++ ori(rd, rd, split_low(imm >> 32)); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm >> 16)); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm)); ++} ++ ++void MacroAssembler::li48(Register rd, long imm) { ++ assert_not_delayed(); ++ assert(is_simm16(imm >> 32), "Not a 48-bit address"); ++ lui(rd, imm >> 32); ++ ori(rd, rd, split_low(imm >> 16)); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm)); ++} ++ ++void MacroAssembler::verify_oop(Register reg, const char* s) { ++ if (!VerifyOops) return; ++ const char * b = NULL; ++ stringStream ss; ++ ss.print("verify_oop: %s: %s", reg->name(), s); ++ b = code_string(ss.as_string()); ++ pushad(); ++ move(A1, reg); ++ li(A0, (long)b); ++ li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); ++ ld(T9, AT, 0); ++ jalr(T9); ++ delayed()->nop(); ++ popad(); ++} ++ ++ ++void MacroAssembler::verify_oop_addr(Address addr, const char* s) { ++ if (!VerifyOops) { ++ nop(); ++ return; ++ } ++ // Pass register number to verify_oop_subroutine ++ const char * b = NULL; ++ stringStream ss; ++ ss.print("verify_oop_addr: %s", s); ++ b = code_string(ss.as_string()); ++ ++ addiu(SP, SP, - 7 * wordSize); ++ st_ptr(T0, SP, 6 * wordSize); ++ st_ptr(T1, SP, 5 * wordSize); ++ st_ptr(RA, SP, 4 * wordSize); ++ st_ptr(A0, SP, 3 * wordSize); ++ st_ptr(A1, SP, 2 * wordSize); ++ st_ptr(AT, SP, 1 * wordSize); ++ st_ptr(T9, SP, 0); ++ ++ // addr may contain sp so we will have to adjust it based on the ++ // pushes that we just did. ++ if (addr.uses(SP)) { ++ lea(A1, addr); ++ ld_ptr(A1, Address(A1, 7 * wordSize)); ++ } else { ++ ld_ptr(A1, addr); ++ } ++ li(A0, (long)b); ++ // call indirectly to solve generation ordering problem ++ li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); ++ ld_ptr(T9, AT, 0); ++ jalr(T9); ++ delayed()->nop(); ++ ld_ptr(T0, SP, 6* wordSize); ++ ld_ptr(T1, SP, 5* wordSize); ++ ld_ptr(RA, SP, 4* wordSize); ++ ld_ptr(A0, SP, 3* wordSize); ++ ld_ptr(A1, SP, 2* wordSize); ++ ld_ptr(AT, SP, 1* wordSize); ++ ld_ptr(T9, SP, 0* wordSize); ++ addiu(SP, SP, 7 * wordSize); ++} ++ ++// used registers : T0, T1 ++void MacroAssembler::verify_oop_subroutine() { ++ // RA: ra ++ // A0: char* error message ++ // A1: oop object to verify ++ ++ Label exit, error; ++ // increment counter ++ li(T0, (long)StubRoutines::verify_oop_count_addr()); ++ lw(AT, T0, 0); ++ daddiu(AT, AT, 1); ++ sw(AT, T0, 0); ++ ++ // make sure object is 'reasonable' ++ beq(A1, R0, exit); // if obj is NULL it is ok ++ delayed()->nop(); ++ ++ // Check if the oop is in the right area of memory ++ // const int oop_mask = Universe::verify_oop_mask(); ++ // const int oop_bits = Universe::verify_oop_bits(); ++ const uintptr_t oop_mask = Universe::verify_oop_mask(); ++ const uintptr_t oop_bits = Universe::verify_oop_bits(); ++ li(AT, oop_mask); ++ andr(T0, A1, AT); ++ li(AT, oop_bits); ++ bne(T0, AT, error); ++ delayed()->nop(); ++ ++ // make sure klass is 'reasonable' ++ // add for compressedoops ++ reinit_heapbase(); ++ // add for compressedoops ++ load_klass(T0, A1); ++ beq(T0, R0, error); // if klass is NULL it is broken ++ delayed()->nop(); ++ // return if everything seems ok ++ bind(exit); ++ ++ jr(RA); ++ delayed()->nop(); ++ ++ // handle errors ++ bind(error); ++ pushad(); ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ popad(); ++ jr(RA); ++ delayed()->nop(); ++} ++ ++void MacroAssembler::verify_tlab(Register t1, Register t2) { ++#ifdef ASSERT ++ assert_different_registers(t1, t2, AT); ++ if (UseTLAB && VerifyOops) { ++ Label next, ok; ++ ++ get_thread(t1); ++ ++ ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset())); ++ ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset())); ++ sltu(AT, t2, AT); ++ beq(AT, R0, next); ++ delayed()->nop(); ++ ++ stop("assert(top >= start)"); ++ ++ bind(next); ++ ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset())); ++ sltu(AT, AT, t2); ++ beq(AT, R0, ok); ++ delayed()->nop(); ++ ++ stop("assert(top <= end)"); ++ ++ bind(ok); ++ ++ } ++#endif ++} ++ ++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset) { ++ intptr_t value = *delayed_value_addr; ++ if (value != 0) ++ return RegisterOrConstant(value + offset); ++ AddressLiteral a(delayed_value_addr); ++ // load indirectly to solve generation ordering problem ++ //movptr(tmp, ExternalAddress((address) delayed_value_addr)); ++ //ld(tmp, a); ++ if (offset != 0) ++ daddiu(tmp,tmp, offset); ++ ++ return RegisterOrConstant(tmp); ++} ++ ++void MacroAssembler::hswap(Register reg) { ++ //short ++ //andi(reg, reg, 0xffff); ++ srl(AT, reg, 8); ++ sll(reg, reg, 24); ++ sra(reg, reg, 16); ++ orr(reg, reg, AT); ++} ++ ++void MacroAssembler::huswap(Register reg) { ++ dsrl(AT, reg, 8); ++ dsll(reg, reg, 24); ++ dsrl(reg, reg, 16); ++ orr(reg, reg, AT); ++ andi(reg, reg, 0xffff); ++} ++ ++// something funny to do this will only one more register AT ++// 32 bits ++void MacroAssembler::swap(Register reg) { ++ srl(AT, reg, 8); ++ sll(reg, reg, 24); ++ orr(reg, reg, AT); ++ //reg : 4 1 2 3 ++ srl(AT, AT, 16); ++ xorr(AT, AT, reg); ++ andi(AT, AT, 0xff); ++ //AT : 0 0 0 1^3); ++ xorr(reg, reg, AT); ++ //reg : 4 1 2 1 ++ sll(AT, AT, 16); ++ xorr(reg, reg, AT); ++ //reg : 4 3 2 1 ++} ++ ++// do 32-bit CAS using MIPS64 lld/scd ++// ++// cas_int should only compare 32-bits of the memory value. ++// However, lld/scd will do 64-bit operation, which violates the intention of cas_int. ++// To simulate a 32-bit atomic operation, the value loaded with LLD should be split into ++// tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval, ++// plus the high-32 bits or memory value, are stored togethor with SCD. ++// ++//Example: ++// ++// double d = 3.1415926; ++// System.err.println("hello" + d); ++// ++// sun.misc.FloatingDecimal$1.() ++// | ++// `- java.util.concurrent.atomic.AtomicInteger::compareAndSet() ++// ++// 38 cas_int [a7a7|J] [a0|I] [a6|I] ++// a0: 0xffffffffe8ea9f63 pc: 0x55647f3354 ++// a6: 0x4ab325aa ++// ++//again: ++// 0x00000055647f3c5c: lld at, 0x0(a7) ; 64-bit load, "0xe8ea9f63" ++// ++// 0x00000055647f3c60: sll t9, at, 0 ; t9: low-32 bits (sign extended) ++// 0x00000055647f3c64: dsrl32 t8, at, 0 ; t8: high-32 bits ++// 0x00000055647f3c68: dsll32 t8, t8, 0 ++// 0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c ; goto nequal ++// 0x00000055647f3c70: sll zero, zero, 0 ++// ++// 0x00000055647f3c74: ori v1, zero, 0xffffffff ; v1: low-32 bits of newval (sign unextended) ++// 0x00000055647f3c78: dsll v1, v1, 16 ; v1 = a6 & 0xFFFFFFFF; ++// 0x00000055647f3c7c: ori v1, v1, 0xffffffff ++// 0x00000055647f3c80: and v1, a6, v1 ++// 0x00000055647f3c84: or at, t8, v1 ++// 0x00000055647f3c88: scd at, 0x0(a7) ++// 0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c ; goto again ++// 0x00000055647f3c90: sll zero, zero, 0 ++// 0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac ; goto done ++// 0x00000055647f3c98: sll zero, zero, 0 ++//nequal: ++// 0x00000055647f45a4: daddu a0, t9, zero ++// 0x00000055647f45a8: daddu at, zero, zero ++//done: ++// ++ ++void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) { ++ // MIPS64 can use ll/sc for 32-bit atomic memory access ++ Label done, again, nequal; ++ ++ bind(again); ++ ++ if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync(); ++ ll(AT, dest); ++ bne(AT, c_reg, nequal); ++ delayed()->nop(); ++ ++ move(AT, x_reg); ++ sc(AT, dest); ++ beq(AT, R0, again); ++ delayed()->nop(); ++ b(done); ++ delayed()->nop(); ++ ++ // not xchged ++ bind(nequal); ++ sync(); ++ move(c_reg, AT); ++ move(AT, R0); ++ ++ bind(done); ++} ++ ++void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) { ++ Label done, again, nequal; ++ ++ bind(again); ++ if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync(); ++ lld(AT, dest); ++ bne(AT, c_reg, nequal); ++ delayed()->nop(); ++ ++ move(AT, x_reg); ++ scd(AT, dest); ++ beq(AT, R0, again); ++ delayed()->nop(); ++ b(done); ++ delayed()->nop(); ++ ++ // not xchged ++ bind(nequal); ++ sync(); ++ move(c_reg, AT); ++ move(AT, R0); ++ ++ bind(done); ++} ++ ++void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) { ++ Label done, again, nequal; ++ ++ Register x_reg = x_regLo; ++ dsll32(x_regHi, x_regHi, 0); ++ dsll32(x_regLo, x_regLo, 0); ++ dsrl32(x_regLo, x_regLo, 0); ++ orr(x_reg, x_regLo, x_regHi); ++ ++ Register c_reg = c_regLo; ++ dsll32(c_regHi, c_regHi, 0); ++ dsll32(c_regLo, c_regLo, 0); ++ dsrl32(c_regLo, c_regLo, 0); ++ orr(c_reg, c_regLo, c_regHi); ++ ++ bind(again); ++ ++ if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync(); ++ lld(AT, dest); ++ bne(AT, c_reg, nequal); ++ delayed()->nop(); ++ ++ //move(AT, x_reg); ++ daddu(AT, x_reg, R0); ++ scd(AT, dest); ++ beq(AT, R0, again); ++ delayed()->nop(); ++ b(done); ++ delayed()->nop(); ++ ++ // not xchged ++ bind(nequal); ++ sync(); ++ //move(c_reg, AT); ++ //move(AT, R0); ++ daddu(c_reg, AT, R0); ++ daddu(AT, R0, R0); ++ bind(done); ++} ++ ++// be sure the three register is different ++void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { ++ assert_different_registers(tmp, fs, ft); ++ div_s(tmp, fs, ft); ++ trunc_l_s(tmp, tmp); ++ cvt_s_l(tmp, tmp); ++ mul_s(tmp, tmp, ft); ++ sub_s(fd, fs, tmp); ++} ++ ++// be sure the three register is different ++void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { ++ assert_different_registers(tmp, fs, ft); ++ div_d(tmp, fs, ft); ++ trunc_l_d(tmp, tmp); ++ cvt_d_l(tmp, tmp); ++ mul_d(tmp, tmp, ft); ++ sub_d(fd, fs, tmp); ++} ++ ++// Fast_Lock and Fast_Unlock used by C2 ++ ++// Because the transitions from emitted code to the runtime ++// monitorenter/exit helper stubs are so slow it's critical that ++// we inline both the stack-locking fast-path and the inflated fast path. ++// ++// See also: cmpFastLock and cmpFastUnlock. ++// ++// What follows is a specialized inline transliteration of the code ++// in slow_enter() and slow_exit(). If we're concerned about I$ bloat ++// another option would be to emit TrySlowEnter and TrySlowExit methods ++// at startup-time. These methods would accept arguments as ++// (Obj, Self, box, Scratch) and return success-failure ++// indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply ++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. ++// In practice, however, the # of lock sites is bounded and is usually small. ++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer ++// if the processor uses simple bimodal branch predictors keyed by EIP ++// Since the helper routines would be called from multiple synchronization ++// sites. ++// ++// An even better approach would be write "MonitorEnter()" and "MonitorExit()" ++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites ++// to those specialized methods. That'd give us a mostly platform-independent ++// implementation that the JITs could optimize and inline at their pleasure. ++// Done correctly, the only time we'd need to cross to native could would be ++// to park() or unpark() threads. We'd also need a few more unsafe operators ++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and ++// (b) explicit barriers or fence operations. ++// ++// TODO: ++// ++// * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). ++// This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. ++// Given TLAB allocation, Self is usually manifested in a register, so passing it into ++// the lock operators would typically be faster than reifying Self. ++// ++// * Ideally I'd define the primitives as: ++// fast_lock (nax Obj, nax box, tmp, nax scr) where box, tmp and scr are KILLED. ++// fast_unlock (nax Obj, box, nax tmp) where box and tmp are KILLED ++// Unfortunately ADLC bugs prevent us from expressing the ideal form. ++// Instead, we're stuck with a rather awkward and brittle register assignments below. ++// Furthermore the register assignments are overconstrained, possibly resulting in ++// sub-optimal code near the synchronization site. ++// ++// * Eliminate the sp-proximity tests and just use "== Self" tests instead. ++// Alternately, use a better sp-proximity test. ++// ++// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. ++// Either one is sufficient to uniquely identify a thread. ++// TODO: eliminate use of sp in _owner and use get_thread(tr) instead. ++// ++// * Intrinsify notify() and notifyAll() for the common cases where the ++// object is locked by the calling thread but the waitlist is empty. ++// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). ++// ++// * use jccb and jmpb instead of jcc and jmp to improve code density. ++// But beware of excessive branch density on AMD Opterons. ++// ++// * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success ++// or failure of the fast-path. If the fast-path fails then we pass ++// control to the slow-path, typically in C. In Fast_Lock and ++// Fast_Unlock we often branch to DONE_LABEL, just to find that C2 ++// will emit a conditional branch immediately after the node. ++// So we have branches to branches and lots of ICC.ZF games. ++// Instead, it might be better to have C2 pass a "FailureLabel" ++// into Fast_Lock and Fast_Unlock. In the case of success, control ++// will drop through the node. ICC.ZF is undefined at exit. ++// In the case of failure, the node will branch directly to the ++// FailureLabel ++ ++ ++// obj: object to lock ++// box: on-stack box address (displaced header location) - KILLED ++// tmp: tmp -- KILLED ++// scr: tmp -- KILLED ++void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) { ++ ++ // Ensure the register assignents are disjoint ++ guarantee (objReg != boxReg, "") ; ++ guarantee (objReg != tmpReg, "") ; ++ guarantee (objReg != scrReg, "") ; ++ guarantee (boxReg != tmpReg, "") ; ++ guarantee (boxReg != scrReg, "") ; ++ ++ ++ block_comment("FastLock"); ++ if (PrintBiasedLockingStatistics) { ++ push(tmpReg); ++ atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg); ++ pop(tmpReg); ++ } ++ ++ if (EmitSync & 1) { ++ move(AT, 0x0); ++ return; ++ } else ++ if (EmitSync & 2) { ++ Label DONE_LABEL ; ++ if (UseBiasedLocking) { ++ // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. ++ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL); ++ } ++ ++ ld(tmpReg, Address(objReg, 0)) ; // fetch markword ++ ori(tmpReg, tmpReg, 0x1); ++ sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS ++ ++ cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg ++ bne(AT, R0, DONE_LABEL); ++ delayed()->nop(); ++ ++ // Recursive locking ++ dsubu(tmpReg, tmpReg, SP); ++ li(AT, (7 - os::vm_page_size() )); ++ andr(tmpReg, tmpReg, AT); ++ sd(tmpReg, Address(boxReg, 0)); ++ bind(DONE_LABEL) ; ++ } else { ++ // Possible cases that we'll encounter in fast_lock ++ // ------------------------------------------------ ++ // * Inflated ++ // -- unlocked ++ // -- Locked ++ // = by self ++ // = by other ++ // * biased ++ // -- by Self ++ // -- by other ++ // * neutral ++ // * stack-locked ++ // -- by self ++ // = sp-proximity test hits ++ // = sp-proximity test generates false-negative ++ // -- by other ++ // ++ ++ Label IsInflated, DONE_LABEL, PopDone ; ++ ++ // TODO: optimize away redundant LDs of obj->mark and improve the markword triage ++ // order to reduce the number of conditional branches in the most common cases. ++ // Beware -- there's a subtle invariant that fetch of the markword ++ // at [FETCH], below, will never observe a biased encoding (*101b). ++ // If this invariant is not held we risk exclusion (safety) failure. ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL); ++ } ++ ++ ld(tmpReg, Address(objReg, 0)) ; //Fetch the markword of the object. ++ andi(AT, tmpReg, markOopDesc::monitor_value); ++ bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias ++ delayed()->nop(); ++ ++ // Attempt stack-locking ... ++ ori (tmpReg, tmpReg, markOopDesc::unlocked_value); ++ sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS ++ //if (os::is_MP()) { ++ // sync(); ++ //} ++ ++ cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg ++ //AT == 1: unlocked ++ ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ beq(AT, R0, L); ++ delayed()->nop(); ++ push(T0); ++ push(T1); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1); ++ pop(T1); ++ pop(T0); ++ bind(L); ++ } ++ bne(AT, R0, DONE_LABEL); ++ delayed()->nop(); ++ ++ // Recursive locking ++ // The object is stack-locked: markword contains stack pointer to BasicLock. ++ // Locked by current thread if difference with current SP is less than one page. ++ dsubu(tmpReg, tmpReg, SP); ++ li(AT, 7 - os::vm_page_size() ); ++ andr(tmpReg, tmpReg, AT); ++ sd(tmpReg, Address(boxReg, 0)); ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++ ++ bne(tmpReg, R0, L); ++ delayed()->nop(); ++ push(T0); ++ push(T1); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1); ++ pop(T1); ++ pop(T0); ++ bind(L); ++ } ++ sltiu(AT, tmpReg, 1); // AT = (tmpReg == 0) ? 1 : 0 ++ ++ b(DONE_LABEL) ; ++ delayed()->nop(); ++ ++ bind(IsInflated) ; ++ // The object's monitor m is unlocked iff m->owner == NULL, ++ // otherwise m->owner may contain a thread or a stack address. ++ ++ // TODO: someday avoid the ST-before-CAS penalty by ++ // relocating (deferring) the following ST. ++ // We should also think about trying a CAS without having ++ // fetched _owner. If the CAS is successful we may ++ // avoid an RTO->RTS upgrade on the $line. ++ // Without cast to int32_t a movptr will destroy r10 which is typically obj ++ li(AT, (int32_t)intptr_t(markOopDesc::unused_mark())); ++ sd(AT, Address(boxReg, 0)); ++ ++ move(boxReg, tmpReg) ; ++ ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; ++ // if (m->owner != 0) => AT = 0, goto slow path. ++ move(AT, R0); ++ bne(tmpReg, R0, DONE_LABEL); ++ delayed()->nop(); ++ ++#ifndef OPT_THREAD ++ get_thread (TREG) ; ++#endif ++ // It's inflated and appears unlocked ++ //if (os::is_MP()) { ++ // sync(); ++ //} ++ cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ; ++ // Intentional fall-through into DONE_LABEL ... ++ ++ ++ // DONE_LABEL is a hot target - we'd really like to place it at the ++ // start of cache line by padding with NOPs. ++ // See the AMD and Intel software optimization manuals for the ++ // most efficient "long" NOP encodings. ++ // Unfortunately none of our alignment mechanisms suffice. ++ bind(DONE_LABEL); ++ ++ // At DONE_LABEL the AT is set as follows ... ++ // Fast_Unlock uses the same protocol. ++ // AT == 1 -> Success ++ // AT == 0 -> Failure - force control through the slow-path ++ ++ // Avoid branch-to-branch on AMD processors ++ // This appears to be superstition. ++ if (EmitSync & 32) nop() ; ++ ++ } ++} ++ ++// obj: object to unlock ++// box: box address (displaced header location), killed. ++// tmp: killed tmp; cannot be obj nor box. ++// ++// Some commentary on balanced locking: ++// ++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. ++// Methods that don't have provably balanced locking are forced to run in the ++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock. ++// The interpreter provides two properties: ++// I1: At return-time the interpreter automatically and quietly unlocks any ++// objects acquired the current activation (frame). Recall that the ++// interpreter maintains an on-stack list of locks currently held by ++// a frame. ++// I2: If a method attempts to unlock an object that is not held by the ++// the frame the interpreter throws IMSX. ++// ++// Lets say A(), which has provably balanced locking, acquires O and then calls B(). ++// B() doesn't have provably balanced locking so it runs in the interpreter. ++// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O ++// is still locked by A(). ++// ++// The only other source of unbalanced locking would be JNI. The "Java Native Interface: ++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter ++// should not be unlocked by "normal" java-level locking and vice-versa. The specification ++// doesn't specify what will occur if a program engages in such mixed-mode locking, however. ++ ++void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) { ++ ++ guarantee (objReg != boxReg, "") ; ++ guarantee (objReg != tmpReg, "") ; ++ guarantee (boxReg != tmpReg, "") ; ++ ++ block_comment("FastUnlock"); ++ ++ ++ if (EmitSync & 4) { ++ // Disable - inhibit all inlining. Force control through the slow-path ++ move(AT, 0x0); ++ return; ++ } else ++ if (EmitSync & 8) { ++ Label DONE_LABEL ; ++ if (UseBiasedLocking) { ++ biased_locking_exit(objReg, tmpReg, DONE_LABEL); ++ } ++ // classic stack-locking code ... ++ ld(tmpReg, Address(boxReg, 0)) ; ++ beq(tmpReg, R0, DONE_LABEL) ; ++ move(AT, 0x1); // delay slot ++ ++ cmpxchg(tmpReg, Address(objReg, 0), boxReg); ++ bind(DONE_LABEL); ++ } else { ++ Label DONE_LABEL, Stacked, CheckSucc, Inflated ; ++ ++ // Critically, the biased locking test must have precedence over ++ // and appear before the (box->dhw == 0) recursive stack-lock test. ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ biased_locking_exit(objReg, tmpReg, DONE_LABEL); ++ } ++ ++ ld(AT, Address(boxReg, 0)) ; // Examine the displaced header ++ beq(AT, R0, DONE_LABEL) ; // 0 indicates recursive stack-lock ++ delayed()->daddiu(AT, R0, 0x1); ++ ++ ld(tmpReg, Address(objReg, 0)) ; // Examine the object's markword ++ andi(AT, tmpReg, markOopDesc::monitor_value) ; // Inflated? ++ beq(AT, R0, Stacked) ; // Inflated? ++ delayed()->nop(); ++ ++ bind(Inflated) ; ++ // It's inflated. ++ // Despite our balanced locking property we still check that m->_owner == Self ++ // as java routines or native JNI code called by this thread might ++ // have released the lock. ++ // Refer to the comments in synchronizer.cpp for how we might encode extra ++ // state in _succ so we can avoid fetching EntryList|cxq. ++ // ++ // I'd like to add more cases in fast_lock() and fast_unlock() -- ++ // such as recursive enter and exit -- but we have to be wary of ++ // I$ bloat, T$ effects and BP$ effects. ++ // ++ // If there's no contention try a 1-0 exit. That is, exit without ++ // a costly MEMBAR or CAS. See synchronizer.cpp for details on how ++ // we detect and recover from the race that the 1-0 exit admits. ++ // ++ // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier ++ // before it STs null into _owner, releasing the lock. Updates ++ // to data protected by the critical section must be visible before ++ // we drop the lock (and thus before any other thread could acquire ++ // the lock and observe the fields protected by the lock). ++#ifndef OPT_THREAD ++ get_thread (TREG) ; ++#endif ++ ++ // It's inflated ++ ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; ++ xorr(boxReg, boxReg, TREG); ++ ++ ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; ++ orr(boxReg, boxReg, AT); ++ ++ move(AT, R0); ++ bne(boxReg, R0, DONE_LABEL); ++ delayed()->nop(); ++ ++ ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; ++ ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; ++ orr(boxReg, boxReg, AT); ++ ++ move(AT, R0); ++ bne(boxReg, R0, DONE_LABEL); ++ delayed()->nop(); ++ ++ sync(); ++ sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; ++ move(AT, 0x1); ++ b(DONE_LABEL); ++ delayed()->nop(); ++ ++ bind (Stacked); ++ ld(tmpReg, Address(boxReg, 0)) ; ++ //if (os::is_MP()) { sync(); } ++ cmpxchg(tmpReg, Address(objReg, 0), boxReg); ++ ++ if (EmitSync & 65536) { ++ bind (CheckSucc); ++ } ++ ++ bind(DONE_LABEL); ++ ++ // Avoid branch to branch on AMD processors ++ if (EmitSync & 32768) { nop() ; } ++ } ++} ++ ++void MacroAssembler::align(int modulus) { ++ while (offset() % modulus != 0) nop(); ++} ++ ++ ++void MacroAssembler::verify_FPU(int stack_depth, const char* s) { ++ //Unimplemented(); ++} ++ ++Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; ++Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; ++ ++//In MIPS64, F0~23 are all caller-saved registers ++FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13}; ++ ++// We preserve all caller-saved register ++void MacroAssembler::pushad(){ ++ int i; ++ ++ // Fixed-point registers ++ int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) ++ { ++ sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ ++ // Floating-point registers ++ len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) ++ { ++ sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++}; ++ ++void MacroAssembler::popad(){ ++ int i; ++ ++ // Floating-point registers ++ int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ for (i = 0; i < len; i++) ++ { ++ ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++ ++ // Fixed-point registers ++ len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); ++ for (i = 0; i < len; i++) ++ { ++ ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++}; ++ ++// We preserve all caller-saved register except V0 ++void MacroAssembler::pushad_except_v0() { ++ int i; ++ ++ // Fixed-point registers ++ int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ sd(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); ++ } ++ ++ // Floating-point registers ++ len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++} ++ ++void MacroAssembler::popad_except_v0() { ++ int i; ++ ++ // Floating-point registers ++ int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ for (i = 0; i < len; i++) { ++ ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++ ++ // Fixed-point registers ++ len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); ++ for (i = 0; i < len; i++) { ++ ld(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++} ++ ++void MacroAssembler::push2(Register reg1, Register reg2) { ++ daddiu(SP, SP, -16); ++ sd(reg1, SP, 8); ++ sd(reg2, SP, 0); ++} ++ ++void MacroAssembler::pop2(Register reg1, Register reg2) { ++ ld(reg1, SP, 8); ++ ld(reg2, SP, 0); ++ daddiu(SP, SP, 16); ++} ++ ++// for UseCompressedOops Option ++void MacroAssembler::load_klass(Register dst, Register src) { ++ if(UseCompressedClassPointers){ ++ lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); ++ decode_klass_not_null(dst); ++ } else ++ ld(dst, src, oopDesc::klass_offset_in_bytes()); ++} ++ ++void MacroAssembler::store_klass(Register dst, Register src) { ++ if(UseCompressedClassPointers){ ++ encode_klass_not_null(src); ++ sw(src, dst, oopDesc::klass_offset_in_bytes()); ++ } else { ++ sd(src, dst, oopDesc::klass_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::load_prototype_header(Register dst, Register src) { ++ load_klass(dst, src); ++ ld(dst, Address(dst, Klass::prototype_header_offset())); ++} ++ ++void MacroAssembler::store_klass_gap(Register dst, Register src) { ++ if (UseCompressedClassPointers) { ++ sw(src, dst, oopDesc::klass_gap_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::load_heap_oop(Register dst, Address src) { ++ if(UseCompressedOops){ ++ lwu(dst, src); ++ decode_heap_oop(dst); ++ } else { ++ ld(dst, src); ++ } ++} ++ ++void MacroAssembler::store_heap_oop(Address dst, Register src){ ++ if(UseCompressedOops){ ++ assert(!dst.uses(src), "not enough registers"); ++ encode_heap_oop(src); ++ sw(src, dst); ++ } else { ++ sd(src, dst); ++ } ++} ++ ++void MacroAssembler::store_heap_oop_null(Address dst){ ++ if(UseCompressedOops){ ++ sw(R0, dst); ++ } else { ++ sd(R0, dst); ++ } ++} ++ ++#ifdef ASSERT ++void MacroAssembler::verify_heapbase(const char* msg) { ++ assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++} ++#endif ++ ++ ++// Algorithm must match oop.inline.hpp encode_heap_oop. ++void MacroAssembler::encode_heap_oop(Register r) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); ++#endif ++ verify_oop(r, "broken oop in encode_heap_oop"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++ return; ++ } ++ ++ movz(r, S5_heapbase, r); ++ dsubu(r, r, S5_heapbase); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_heap_oop(Register dst, Register src) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); ++#endif ++ verify_oop(src, "broken oop in encode_heap_oop"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ dsrl(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) move(dst, src); ++ } ++ } else { ++ if (dst == src) { ++ movz(dst, S5_heapbase, dst); ++ dsubu(dst, dst, S5_heapbase); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ dsubu(dst, src, S5_heapbase); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++ movz(dst, R0, src); ++ } ++ } ++} ++ ++void MacroAssembler::encode_heap_oop_not_null(Register r) { ++ assert (UseCompressedOops, "should be compressed"); ++#ifdef ASSERT ++ if (CheckCompressedOops) { ++ Label ok; ++ bne(r, R0, ok); ++ delayed()->nop(); ++ stop("null oop passed to encode_heap_oop_not_null"); ++ bind(ok); ++ } ++#endif ++ verify_oop(r, "broken oop in encode_heap_oop_not_null"); ++ if (Universe::narrow_oop_base() != NULL) { ++ dsubu(r, r, S5_heapbase); ++ } ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++ ++} ++ ++void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { ++ assert (UseCompressedOops, "should be compressed"); ++#ifdef ASSERT ++ if (CheckCompressedOops) { ++ Label ok; ++ bne(src, R0, ok); ++ delayed()->nop(); ++ stop("null oop passed to encode_heap_oop_not_null2"); ++ bind(ok); ++ } ++#endif ++ verify_oop(src, "broken oop in encode_heap_oop_not_null2"); ++ ++ if (Universe::narrow_oop_base() != NULL) { ++ dsubu(dst, src, S5_heapbase); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ dsrl(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) move(dst, src); ++ } ++ } ++} ++ ++void MacroAssembler::decode_heap_oop(Register r) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); ++#endif ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shl(r, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ move(AT, r); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shl(r, LogMinObjAlignmentInBytes); ++ } ++ daddu(r, r, S5_heapbase); ++ movz(r, R0, AT); ++ } ++ verify_oop(r, "broken oop in decode_heap_oop"); ++} ++ ++void MacroAssembler::decode_heap_oop(Register dst, Register src) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); ++#endif ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (dst != src) nop(); // DON'T DELETE THIS GUY. ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) move(dst, src); ++ } ++ } else { ++ if (dst == src) { ++ move(AT, dst); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shl(dst, LogMinObjAlignmentInBytes); ++ } ++ daddu(dst, dst, S5_heapbase); ++ movz(dst, R0, AT); ++ } else { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ daddu(dst, dst, S5_heapbase); ++ } else { ++ daddu(dst, src, S5_heapbase); ++ } ++ movz(dst, R0, src); ++ } ++ } ++ verify_oop(dst, "broken oop in decode_heap_oop"); ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register r) { ++ // Note: it will change flags ++ assert (UseCompressedOops, "should only be used for compressed headers"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shl(r, LogMinObjAlignmentInBytes); ++ if (Universe::narrow_oop_base() != NULL) { ++ daddu(r, r, S5_heapbase); ++ } ++ } else { ++ assert (Universe::narrow_oop_base() == NULL, "sanity"); ++ } ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { ++ assert (UseCompressedOops, "should only be used for compressed headers"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++ ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ //lea(dst, Address(S5_heapbase, src, Address::times_8, 0)); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (LogMinObjAlignmentInBytes == Address::times_8) { ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ daddu(dst, dst, S5_heapbase); ++ } else { ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ if (Universe::narrow_oop_base() != NULL) { ++ daddu(dst, dst, S5_heapbase); ++ } ++ } ++ } else { ++ assert (Universe::narrow_oop_base() == NULL, "sanity"); ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++} ++ ++void MacroAssembler::encode_klass_not_null(Register r) { ++ if (Universe::narrow_klass_base() != NULL) { ++ assert(r != AT, "Encoding a klass in AT"); ++ set64(AT, (int64_t)Universe::narrow_klass_base()); ++ dsubu(r, r, AT); ++ } ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shr(r, LogKlassAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_klass_not_null(Register dst, Register src) { ++ if (dst == src) { ++ encode_klass_not_null(src); ++ } else { ++ if (Universe::narrow_klass_base() != NULL) { ++ set64(dst, (int64_t)Universe::narrow_klass_base()); ++ dsubu(dst, src, dst); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shr(dst, LogKlassAlignmentInBytes); ++ } ++ } else { ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ dsrl(dst, src, LogKlassAlignmentInBytes); ++ } else { ++ move(dst, src); ++ } ++ } ++ } ++} ++ ++// Function instr_size_for_decode_klass_not_null() counts the instructions ++// generated by decode_klass_not_null(register r) and reinit_heapbase(), ++// when (Universe::heap() != NULL). Hence, if the instructions they ++// generate change, then this method needs to be updated. ++int MacroAssembler::instr_size_for_decode_klass_not_null() { ++ assert (UseCompressedClassPointers, "only for compressed klass ptrs"); ++ if (Universe::narrow_klass_base() != NULL) { ++ // mov64 + addq + shlq? + mov64 (for reinit_heapbase()). ++ return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10); ++ } else { ++ // longest load decode klass function, mov64, leaq ++ return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1); ++ } ++} ++ ++void MacroAssembler::decode_klass_not_null(Register r) { ++ assert (UseCompressedClassPointers, "should only be used for compressed headers"); ++ assert(r != AT, "Decoding a klass in AT"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shl(r, LogKlassAlignmentInBytes); ++ } ++ if (Universe::narrow_klass_base() != NULL) { ++ set64(AT, (int64_t)Universe::narrow_klass_base()); ++ daddu(r, r, AT); ++ //Not neccessary for MIPS at all. ++ //reinit_heapbase(); ++ } ++} ++ ++void MacroAssembler::decode_klass_not_null(Register dst, Register src) { ++ assert (UseCompressedClassPointers, "should only be used for compressed headers"); ++ ++ if (dst == src) { ++ decode_klass_not_null(dst); ++ } else { ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ set64(dst, (int64_t)Universe::narrow_klass_base()); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); ++ dsll(AT, src, Address::times_8); ++ daddu(dst, dst, AT); ++ } else { ++ daddu(dst, src, dst); ++ } ++ } ++} ++ ++void MacroAssembler::incrementl(Register reg, int value) { ++ if (value == min_jint) { ++ move(AT, value); ++ addu32(reg, reg, AT); ++ return; ++ } ++ if (value < 0) { decrementl(reg, -value); return; } ++ if (value == 0) { ; return; } ++ ++ move(AT, value); ++ addu32(reg, reg, AT); ++} ++ ++void MacroAssembler::decrementl(Register reg, int value) { ++ if (value == min_jint) { ++ move(AT, value); ++ subu32(reg, reg, AT); ++ return; ++ } ++ if (value < 0) { incrementl(reg, -value); return; } ++ if (value == 0) { ; return; } ++ ++ move(AT, value); ++ subu32(reg, reg, AT); ++} ++ ++void MacroAssembler::reinit_heapbase() { ++ if (UseCompressedOops || UseCompressedClassPointers) { ++ if (Universe::heap() != NULL) { ++ if (Universe::narrow_oop_base() == NULL) { ++ move(S5_heapbase, R0); ++ } else { ++ set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base()); ++ } ++ } else { ++ set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr()); ++ ld(S5_heapbase, S5_heapbase, 0); ++ } ++ } ++} ++ ++void MacroAssembler::check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label& L_success) { ++//implement ind gen_subtype_check ++ Label L_failure; ++ check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); ++ check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); ++ bind(L_failure); ++} ++ ++SkipIfEqual::SkipIfEqual( ++ MacroAssembler* masm, const bool* flag_addr, bool value) { ++ _masm = masm; ++ _masm->li(AT, (address)flag_addr); ++ _masm->lb(AT, AT, 0); ++ _masm->addiu(AT, AT, -value); ++ _masm->beq(AT, R0, _label); ++ _masm->delayed()->nop(); ++} ++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ RegisterOrConstant super_check_offset) { ++ assert_different_registers(sub_klass, super_klass, temp_reg); ++ bool must_load_sco = (super_check_offset.constant_or_zero() == -1); ++ if (super_check_offset.is_register()) { ++ assert_different_registers(sub_klass, super_klass, ++ super_check_offset.as_register()); ++ } else if (must_load_sco) { ++ assert(temp_reg != noreg, "supply either a temp or a register offset"); ++ } ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ // If the pointers are equal, we are done (e.g., String[] elements). ++ // This self-check enables sharing of secondary supertype arrays among ++ // non-primary types such as array-of-interface. Otherwise, each such ++ // type would need its own customized SSA. ++ // We move this check to the front of the fast path because many ++ // type checks are in fact trivially successful in this manner, ++ // so we get a nicely predicted branch right at the start of the check. ++ beq(sub_klass, super_klass, *L_success); ++ delayed()->nop(); ++ // Check the supertype display: ++ if (must_load_sco) { ++ lwu(temp_reg, super_klass, sco_offset); ++ super_check_offset = RegisterOrConstant(temp_reg); ++ } ++ daddu(AT, sub_klass, super_check_offset.register_or_noreg()); ++ ld(AT, AT, super_check_offset.constant_or_zero()); ++ ++ // This check has worked decisively for primary supers. ++ // Secondary supers are sought in the super_cache ('super_cache_addr'). ++ // (Secondary supers are interfaces and very deeply nested subtypes.) ++ // This works in the same check above because of a tricky aliasing ++ // between the super_cache and the primary super display elements. ++ // (The 'super_check_addr' can address either, as the case requires.) ++ // Note that the cache is updated below if it does not help us find ++ // what we need immediately. ++ // So if it was a primary super, we can just fail immediately. ++ // Otherwise, it's the slow path for us (no success at this point). ++ ++ if (super_check_offset.is_register()) { ++ beq(super_klass, AT, *L_success); ++ delayed()->nop(); ++ addiu(AT, super_check_offset.as_register(), -sc_offset); ++ if (L_failure == &L_fallthrough) { ++ beq(AT, R0, *L_slow_path); ++ delayed()->nop(); ++ } else { ++ bne_far(AT, R0, *L_failure); ++ delayed()->nop(); ++ b(*L_slow_path); ++ delayed()->nop(); ++ } ++ } else if (super_check_offset.as_constant() == sc_offset) { ++ // Need a slow path; fast failure is impossible. ++ if (L_slow_path == &L_fallthrough) { ++ beq(super_klass, AT, *L_success); ++ delayed()->nop(); ++ } else { ++ bne(super_klass, AT, *L_slow_path); ++ delayed()->nop(); ++ b(*L_success); ++ delayed()->nop(); ++ } ++ } else { ++ // No slow path; it's a fast decision. ++ if (L_failure == &L_fallthrough) { ++ beq(super_klass, AT, *L_success); ++ delayed()->nop(); ++ } else { ++ bne_far(super_klass, AT, *L_failure); ++ delayed()->nop(); ++ b(*L_success); ++ delayed()->nop(); ++ } ++ } ++ ++ bind(L_fallthrough); ++ ++} ++ ++ ++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Register temp2_reg, ++ Label* L_success, ++ Label* L_failure, ++ bool set_cond_codes) { ++ if (temp2_reg == noreg) ++ temp2_reg = TSR; ++ assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); ++#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ ++ // a couple of useful fields in sub_klass: ++ int ss_offset = in_bytes(Klass::secondary_supers_offset()); ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ Address secondary_supers_addr(sub_klass, ss_offset); ++ Address super_cache_addr( sub_klass, sc_offset); ++ ++ // Do a linear scan of the secondary super-klass chain. ++ // This code is rarely used, so simplicity is a virtue here. ++ // The repne_scan instruction uses fixed registers, which we must spill. ++ // Don't worry too much about pre-existing connections with the input regs. ++ ++#ifndef PRODUCT ++ int* pst_counter = &SharedRuntime::_partial_subtype_ctr; ++ ExternalAddress pst_counter_addr((address) pst_counter); ++#endif //PRODUCT ++ ++ // We will consult the secondary-super array. ++ ld(temp_reg, secondary_supers_addr); ++ // Load the array length. ++ lw(temp2_reg, Address(temp_reg, Array::length_offset_in_bytes())); ++ // Skip to start of data. ++ daddiu(temp_reg, temp_reg, Array::base_offset_in_bytes()); ++ ++ // OpenJDK8 never compresses klass pointers in secondary-super array. ++ Label Loop, subtype; ++ bind(Loop); ++ beq(temp2_reg, R0, *L_failure); ++ delayed()->nop(); ++ ld(AT, temp_reg, 0); ++ beq(AT, super_klass, subtype); ++ delayed()->daddiu(temp_reg, temp_reg, 1 * wordSize); ++ b(Loop); ++ delayed()->daddiu(temp2_reg, temp2_reg, -1); ++ ++ bind(subtype); ++ sd(super_klass, super_cache_addr); ++ if (L_success != &L_fallthrough) { ++ b(*L_success); ++ delayed()->nop(); ++ } ++ ++ // Success. Cache the super we found and proceed in triumph. ++#undef IS_A_TEMP ++ ++ bind(L_fallthrough); ++} ++ ++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { ++ ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); ++ sd(R0, Address(java_thread, JavaThread::vm_result_offset())); ++ verify_oop(oop_result, "broken oop in call_VM_base"); ++} ++ ++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { ++ ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); ++ sd(R0, Address(java_thread, JavaThread::vm_result_2_offset())); ++} ++ ++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, ++ int extra_slot_offset) { ++ // cf. TemplateTable::prepare_invoke(), if (load_receiver). ++ int stackElementSize = Interpreter::stackElementSize; ++ int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); ++#ifdef ASSERT ++ int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); ++ assert(offset1 - offset == stackElementSize, "correct arithmetic"); ++#endif ++ Register scale_reg = NOREG; ++ Address::ScaleFactor scale_factor = Address::no_scale; ++ if (arg_slot.is_constant()) { ++ offset += arg_slot.as_constant() * stackElementSize; ++ } else { ++ scale_reg = arg_slot.as_register(); ++ scale_factor = Address::times_8; ++ } ++ // We don't push RA on stack in prepare_invoke. ++ // offset += wordSize; // return PC is on stack ++ if(scale_reg==NOREG) return Address(SP, offset); ++ else { ++ dsll(scale_reg, scale_reg, scale_factor); ++ daddu(scale_reg, SP, scale_reg); ++ return Address(scale_reg, offset); ++ } ++} ++ ++SkipIfEqual::~SkipIfEqual() { ++ _masm->bind(_label); ++} ++ ++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { ++ switch (size_in_bytes) { ++ case 8: ld(dst, src); break; ++ case 4: lw(dst, src); break; ++ case 2: is_signed ? lh(dst, src) : lhu(dst, src); break; ++ case 1: is_signed ? lb( dst, src) : lbu( dst, src); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { ++ switch (size_in_bytes) { ++ case 8: sd(src, dst); break; ++ case 4: sw(src, dst); break; ++ case 2: sh(src, dst); break; ++ case 1: sb(src, dst); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++// Look up the method for a megamorphic invokeinterface call. ++// The target method is determined by . ++// The receiver klass is in recv_klass. ++// On success, the result will be in method_result, and execution falls through. ++// On failure, execution transfers to the given label. ++void MacroAssembler::lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_temp, ++ Label& L_no_such_interface, ++ bool return_method) { ++ assert_different_registers(recv_klass, intf_klass, scan_temp, AT); ++ assert_different_registers(method_result, intf_klass, scan_temp, AT); ++ assert(recv_klass != method_result || !return_method, ++ "recv_klass can be destroyed when method isn't needed"); ++ ++ assert(itable_index.is_constant() || itable_index.as_register() == method_result, ++ "caller must use same register for non-constant itable index as for method"); ++ ++ // Compute start of first itableOffsetEntry (which is at the end of the vtable) ++ int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; ++ int itentry_off = itableMethodEntry::method_offset_in_bytes(); ++ int scan_step = itableOffsetEntry::size() * wordSize; ++ int vte_size = vtableEntry::size() * wordSize; ++ Address::ScaleFactor times_vte_scale = Address::times_ptr; ++ assert(vte_size == wordSize, "else adjust times_vte_scale"); ++ ++ lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); ++ ++ // %%% Could store the aligned, prescaled offset in the klassoop. ++ dsll(scan_temp, scan_temp, times_vte_scale); ++ daddu(scan_temp, recv_klass, scan_temp); ++ daddiu(scan_temp, scan_temp, vtable_base); ++ if (HeapWordsPerLong > 1) { ++ // Round up to align_object_offset boundary ++ // see code for InstanceKlass::start_of_itable! ++ round_to(scan_temp, BytesPerLong); ++ } ++ ++ if (return_method) { ++ // Adjust recv_klass by scaled itable_index, so we can free itable_index. ++ assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); ++ if (itable_index.is_constant()) { ++ set64(AT, (int)itable_index.is_constant()); ++ dsll(AT, AT, (int)Address::times_ptr); ++ } else { ++ dsll(AT, itable_index.as_register(), (int)Address::times_ptr); ++ } ++ daddu(AT, AT, recv_klass); ++ daddiu(recv_klass, AT, itentry_off); ++ } ++ ++ Label search, found_method; ++ ++ for (int peel = 1; peel >= 0; peel--) { ++ ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); ++ ++ if (peel) { ++ beq(intf_klass, method_result, found_method); ++ delayed()->nop(); ++ } else { ++ bne(intf_klass, method_result, search); ++ delayed()->nop(); ++ // (invert the test to fall through to found_method...) ++ } ++ ++ if (!peel) break; ++ ++ bind(search); ++ ++ // Check that the previous entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ beq(method_result, R0, L_no_such_interface); ++ delayed()->nop(); ++ daddiu(scan_temp, scan_temp, scan_step); ++ } ++ ++ bind(found_method); ++ ++ if (return_method) { ++ // Got a hit. ++ lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); ++ if (UseLEXT1) { ++ gsldx(method_result, recv_klass, scan_temp, 0); ++ } else { ++ daddu(AT, recv_klass, scan_temp); ++ ld(method_result, AT, 0); ++ } ++ } ++} ++ ++// virtual method calling ++void MacroAssembler::lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result) { ++ Register tmp = GP; ++ push(tmp); ++ ++ if (vtable_index.is_constant()) { ++ assert_different_registers(recv_klass, method_result, tmp); ++ } else { ++ assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp); ++ } ++ const int base = InstanceKlass::vtable_start_offset() * wordSize; ++ assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); ++ if (vtable_index.is_constant()) { ++ set64(AT, vtable_index.as_constant()); ++ dsll(AT, AT, (int)Address::times_ptr); ++ } else { ++ dsll(AT, vtable_index.as_register(), (int)Address::times_ptr); ++ } ++ set64(tmp, base + vtableEntry::method_offset_in_bytes()); ++ daddu(tmp, tmp, AT); ++ daddu(tmp, tmp, recv_klass); ++ ld(method_result, tmp, 0); ++ ++ pop(tmp); ++} ++ ++void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) { ++ switch (type) { ++ case T_LONG: ++ st_ptr(src_reg, tmp_reg, disp); ++ break; ++ case T_ARRAY: ++ case T_OBJECT: ++ if (UseCompressedOops && !wide) { ++ sw(src_reg, tmp_reg, disp); ++ } else { ++ st_ptr(src_reg, tmp_reg, disp); ++ } ++ break; ++ case T_ADDRESS: ++ st_ptr(src_reg, tmp_reg, disp); ++ break; ++ case T_INT: ++ sw(src_reg, tmp_reg, disp); ++ break; ++ case T_CHAR: ++ case T_SHORT: ++ sh(src_reg, tmp_reg, disp); ++ break; ++ case T_BYTE: ++ case T_BOOLEAN: ++ sb(src_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) { ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } ++} ++ ++void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) { ++ switch (type) { ++ case T_DOUBLE: ++ sdc1(src_reg, tmp_reg, disp); ++ break; ++ case T_FLOAT: ++ swc1(src_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) { ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } ++} ++ ++void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) { ++ switch (type) { ++ case T_LONG: ++ ld_ptr(dst_reg, tmp_reg, disp); ++ break; ++ case T_ARRAY: ++ case T_OBJECT: ++ if (UseCompressedOops && !wide) { ++ lwu(dst_reg, tmp_reg, disp); ++ } else { ++ ld_ptr(dst_reg, tmp_reg, disp); ++ } ++ break; ++ case T_ADDRESS: ++ if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) { ++ lwu(dst_reg, tmp_reg, disp); ++ } else { ++ ld_ptr(dst_reg, tmp_reg, disp); ++ } ++ break; ++ case T_INT: ++ lw(dst_reg, tmp_reg, disp); ++ break; ++ case T_CHAR: ++ lhu(dst_reg, tmp_reg, disp); ++ break; ++ case T_SHORT: ++ lh(dst_reg, tmp_reg, disp); ++ break; ++ case T_BYTE: ++ case T_BOOLEAN: ++ lb(dst_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) { ++ int code_offset = 0; ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } ++ ++ return code_offset; ++} ++ ++void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) { ++ switch (type) { ++ case T_DOUBLE: ++ ldc1(dst_reg, tmp_reg, disp); ++ break; ++ case T_FLOAT: ++ lwc1(dst_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) { ++ int code_offset = 0; ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } ++ ++ return code_offset; ++} ++ ++void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { ++ const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask); ++ STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code ++ // The inverted mask is sign-extended ++ move(AT, inverted_jweak_mask); ++ andr(possibly_jweak, AT, possibly_jweak); ++} ++ ++void MacroAssembler::resolve_jobject(Register value, ++ Register thread, ++ Register tmp) { ++ assert_different_registers(value, thread, tmp); ++ Label done, not_weak; ++ beq(value, R0, done); // Use NULL as-is. ++ delayed()->nop(); ++ move(AT, JNIHandles::weak_tag_mask); // Test for jweak tag. ++ andr(AT, value, AT); ++ beq(AT, R0, not_weak); ++ delayed()->nop(); ++ // Resolve jweak. ++ ld(value, value, -JNIHandles::weak_tag_value); ++ verify_oop(value); ++ #if INCLUDE_ALL_GCS ++ if (UseG1GC) { ++ g1_write_barrier_pre(noreg /* obj */, ++ value /* pre_val */, ++ thread /* thread */, ++ tmp /* tmp */, ++ true /* tosca_live */, ++ true /* expand_call */); ++ } ++ #endif // INCLUDE_ALL_GCS ++ b(done); ++ delayed()->nop(); ++ bind(not_weak); ++ // Resolve (untagged) jobject. ++ ld(value, value, 0); ++ verify_oop(value); ++ bind(done); ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src, ++ CMCompare cmp, ++ bool is_signed) { ++ switch (cmp) { ++ case EQ: ++ subu(AT, op1, op2); ++ movz(dst, src, AT); ++ break; ++ ++ case NE: ++ subu(AT, op1, op2); ++ movn(dst, src, AT); ++ break; ++ ++ case GT: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ movn(dst, src, AT); ++ break; ++ ++ case GE: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ movz(dst, src, AT); ++ break; ++ ++ case LT: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ movn(dst, src, AT); ++ break; ++ ++ case LE: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ movz(dst, src, AT); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ Register dst, ++ Register src, ++ CMCompare cmp, ++ bool is_float) { ++ switch(cmp) { ++ case EQ: ++ if (is_float) { ++ c_eq_s(op1, op2); ++ } else { ++ c_eq_d(op1, op2); ++ } ++ movt(dst, src); ++ break; ++ ++ case NE: ++ if (is_float) { ++ c_eq_s(op1, op2); ++ } else { ++ c_eq_d(op1, op2); ++ } ++ movf(dst, src); ++ break; ++ ++ case GT: ++ if (is_float) { ++ c_ule_s(op1, op2); ++ } else { ++ c_ule_d(op1, op2); ++ } ++ movf(dst, src); ++ break; ++ ++ case GE: ++ if (is_float) { ++ c_ult_s(op1, op2); ++ } else { ++ c_ult_d(op1, op2); ++ } ++ movf(dst, src); ++ break; ++ ++ case LT: ++ if (is_float) { ++ c_ult_s(op1, op2); ++ } else { ++ c_ult_d(op1, op2); ++ } ++ movt(dst, src); ++ break; ++ ++ case LE: ++ if (is_float) { ++ c_ule_s(op1, op2); ++ } else { ++ c_ule_d(op1, op2); ++ } ++ movt(dst, src); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp, ++ bool is_float) { ++ switch(cmp) { ++ case EQ: ++ if (!is_float) { ++ c_eq_d(op1, op2); ++ movt_d(dst, src); ++ } else { ++ c_eq_s(op1, op2); ++ movt_s(dst, src); ++ } ++ break; ++ ++ case NE: ++ if (!is_float) { ++ c_eq_d(op1, op2); ++ movf_d(dst, src); ++ } else { ++ c_eq_s(op1, op2); ++ movf_s(dst, src); ++ } ++ break; ++ ++ case GT: ++ if (!is_float) { ++ c_ule_d(op1, op2); ++ movf_d(dst, src); ++ } else { ++ c_ule_s(op1, op2); ++ movf_s(dst, src); ++ } ++ break; ++ ++ case GE: ++ if (!is_float) { ++ c_ult_d(op1, op2); ++ movf_d(dst, src); ++ } else { ++ c_ult_s(op1, op2); ++ movf_s(dst, src); ++ } ++ break; ++ ++ case LT: ++ if (!is_float) { ++ c_ult_d(op1, op2); ++ movt_d(dst, src); ++ } else { ++ c_ult_s(op1, op2); ++ movt_s(dst, src); ++ } ++ break; ++ ++ case LE: ++ if (!is_float) { ++ c_ule_d(op1, op2); ++ movt_d(dst, src); ++ } else { ++ c_ule_s(op1, op2); ++ movt_s(dst, src); ++ } ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp, ++ bool is_float) { ++ Label L; ++ ++ switch(cmp) { ++ case EQ: ++ bne(op1, op2, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case NE: ++ beq(op1, op2, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case GT: ++ slt(AT, op2, op1); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case GE: ++ slt(AT, op1, op2); ++ bne(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case LT: ++ slt(AT, op1, op2); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case LE: ++ slt(AT, op2, op1); ++ bne(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} +diff --git a/hotspot/src/cpu/mips/vm/macroAssembler_mips.hpp b/hotspot/src/cpu/mips/vm/macroAssembler_mips.hpp +new file mode 100644 +index 0000000000..ab9727793f +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/macroAssembler_mips.hpp +@@ -0,0 +1,701 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP ++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP ++ ++#include "asm/assembler.hpp" ++#include "utilities/macros.hpp" ++#include "runtime/rtmLocking.hpp" ++ ++// MacroAssembler extends Assembler by frequently used macros. ++// ++// Instructions for which a 'better' code sequence exists depending ++// on arguments should also go in here. ++ ++class MacroAssembler: public Assembler { ++ friend class LIR_Assembler; ++ friend class Runtime1; // as_Address() ++ ++ public: ++ // Compare code ++ typedef enum { ++ EQ = 0x01, ++ NE = 0x02, ++ GT = 0x03, ++ GE = 0x04, ++ LT = 0x05, ++ LE = 0x06 ++ } CMCompare; ++ ++ protected: ++ ++ Address as_Address(AddressLiteral adr); ++ Address as_Address(ArrayAddress adr); ++ ++ // Support for VM calls ++ // ++ // This is the base routine called by the different versions of call_VM_leaf. The interpreter ++ // may customize this version by overriding it for its purposes (e.g., to save/restore ++ // additional registers when doing a VM call). ++#ifdef CC_INTERP ++ // c++ interpreter never wants to use interp_masm version of call_VM ++ #define VIRTUAL ++#else ++ #define VIRTUAL virtual ++#endif ++ ++ VIRTUAL void call_VM_leaf_base( ++ address entry_point, // the entry point ++ int number_of_arguments // the number of arguments to pop after the call ++ ); ++ ++ // This is the base routine called by the different versions of call_VM. The interpreter ++ // may customize this version by overriding it for its purposes (e.g., to save/restore ++ // additional registers when doing a VM call). ++ // ++ // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base ++ // returns the register which contains the thread upon return. If a thread register has been ++ // specified, the return value will correspond to that register. If no last_java_sp is specified ++ // (noreg) than sp will be used instead. ++ VIRTUAL void call_VM_base( // returns the register containing the thread upon return ++ Register oop_result, // where an oop-result ends up if any; use noreg otherwise ++ Register java_thread, // the thread if computed before ; use noreg otherwise ++ Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise ++ address entry_point, // the entry point ++ int number_of_arguments, // the number of arguments (w/o thread) to pop after the call ++ bool check_exceptions // whether to check for pending exceptions after return ++ ); ++ ++ // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. ++ // The implementation is only non-empty for the InterpreterMacroAssembler, ++ // as only the interpreter handles PopFrame and ForceEarlyReturn requests. ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); ++ ++ void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); ++ ++ // helpers for FPU flag access ++ // tmp is a temporary register, if none is available use noreg ++ ++ public: ++ static intptr_t i[32]; ++ static float f[32]; ++ static void print(outputStream *s); ++ ++ static int i_offset(unsigned int k); ++ static int f_offset(unsigned int k); ++ ++ static void save_registers(MacroAssembler *masm); ++ static void restore_registers(MacroAssembler *masm); ++ ++ MacroAssembler(CodeBuffer* code) : Assembler(code) {} ++ ++ // Support for NULL-checks ++ // ++ // Generates code that causes a NULL OS exception if the content of reg is NULL. ++ // If the accessed location is M[reg + offset] and the offset is known, provide the ++ // offset. No explicit code generation is needed if the offset is within a certain ++ // range (0 <= offset <= page_size). ++ ++ void null_check(Register reg, int offset = -1); ++ static bool needs_explicit_null_check(intptr_t offset); ++ ++ // Required platform-specific helpers for Label::patch_instructions. ++ // They _shadow_ the declarations in AbstractAssembler, which are undefined. ++ void pd_patch_instruction(address branch, address target); ++ ++ address emit_trampoline_stub(int insts_call_instruction_offset, address target); ++ ++ // Support for inc/dec with optimal instruction selection depending on value ++ void incrementl(Register reg, int value = 1); ++ void decrementl(Register reg, int value = 1); ++ ++ ++ // Alignment ++ void align(int modulus); ++ ++ ++ // Stack frame creation/removal ++ void enter(); ++ void leave(); ++ ++ // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) ++ // The pointer will be loaded into the thread register. ++ void get_thread(Register thread); ++ ++ ++ // Support for VM calls ++ // ++ // It is imperative that all calls into the VM are handled via the call_VM macros. ++ // They make sure that the stack linkage is setup correctly. call_VM's correspond ++ // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. ++ ++ ++ void call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ // Overloadings with last_Java_sp ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments = 0, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, bool ++ check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ void get_vm_result (Register oop_result, Register thread); ++ void get_vm_result_2(Register metadata_result, Register thread); ++ void call_VM_leaf(address entry_point, ++ int number_of_arguments = 0); ++ void call_VM_leaf(address entry_point, ++ Register arg_1); ++ void call_VM_leaf(address entry_point, ++ Register arg_1, Register arg_2); ++ void call_VM_leaf(address entry_point, ++ Register arg_1, Register arg_2, Register arg_3); ++ ++ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls ++ void super_call_VM_leaf(address entry_point); ++ void super_call_VM_leaf(address entry_point, Register arg_1); ++ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); ++ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); ++ ++ // last Java Frame (fills frame anchor) ++ void set_last_Java_frame(Register thread, ++ Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc); ++ ++ // thread in the default location (S6) ++ void set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc); ++ ++ void reset_last_Java_frame(Register thread, bool clear_fp); ++ ++ // thread in the default location (S6) ++ void reset_last_Java_frame(bool clear_fp); ++ ++ // Stores ++ void store_check(Register obj); // store check for obj - register is destroyed afterwards ++ void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed) ++ ++ void resolve_jobject(Register value, Register thread, Register tmp); ++ void clear_jweak_tag(Register possibly_jweak); ++ ++#if INCLUDE_ALL_GCS ++ ++ void g1_write_barrier_pre(Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); ++ ++ void g1_write_barrier_post(Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2); ++ ++#endif // INCLUDE_ALL_GCS ++ ++ // split store_check(Register obj) to enhance instruction interleaving ++ void store_check_part_1(Register obj); ++ void store_check_part_2(Register obj); ++ ++ // C 'boolean' to Java boolean: x == 0 ? 0 : 1 ++ void c2bool(Register x); ++ //add for compressedoops ++ void load_klass(Register dst, Register src); ++ void store_klass(Register dst, Register src); ++ void load_prototype_header(Register dst, Register src); ++ ++ void store_klass_gap(Register dst, Register src); ++ ++ void load_heap_oop(Register dst, Address src); ++ void store_heap_oop(Address dst, Register src); ++ void store_heap_oop_null(Address dst); ++ void encode_heap_oop(Register r); ++ void encode_heap_oop(Register dst, Register src); ++ void decode_heap_oop(Register r); ++ void decode_heap_oop(Register dst, Register src); ++ void encode_heap_oop_not_null(Register r); ++ void decode_heap_oop_not_null(Register r); ++ void encode_heap_oop_not_null(Register dst, Register src); ++ void decode_heap_oop_not_null(Register dst, Register src); ++ ++ void encode_klass_not_null(Register r); ++ void decode_klass_not_null(Register r); ++ void encode_klass_not_null(Register dst, Register src); ++ void decode_klass_not_null(Register dst, Register src); ++ ++ // Returns the byte size of the instructions generated by decode_klass_not_null() ++ // when compressed klass pointers are being used. ++ static int instr_size_for_decode_klass_not_null(); ++ ++ // if heap base register is used - reinit it with the correct value ++ void reinit_heapbase(); ++ ++ DEBUG_ONLY(void verify_heapbase(const char* msg);) ++ ++ void set_narrow_klass(Register dst, Klass* k); ++ void set_narrow_oop(Register dst, jobject obj); ++ ++ ++ ++ ++ // Sign extension ++ void sign_extend_short(Register reg) { /*dsll32(reg, reg, 16); dsra32(reg, reg, 16);*/ seh(reg, reg); } ++ void sign_extend_byte(Register reg) { /*dsll32(reg, reg, 24); dsra32(reg, reg, 24);*/ seb(reg, reg); } ++ void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); ++ void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); ++ ++ void trigfunc(char trig, int num_fpu_regs_in_use = 1); ++ // allocation ++ void eden_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ void tlab_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); ++ void incr_allocated_bytes(Register thread, ++ Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1 = noreg); ++ // interface method calling ++ void lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_temp, ++ Label& no_such_interface, ++ bool return_method = true); ++ ++ // virtual method calling ++ void lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result); ++ ++ // Test sub_klass against super_klass, with fast and slow paths. ++ ++ // The fast path produces a tri-state answer: yes / no / maybe-slow. ++ // One of the three labels can be NULL, meaning take the fall-through. ++ // If super_check_offset is -1, the value is loaded up from super_klass. ++ // No registers are killed, except temp_reg. ++ void check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); ++ ++ // The rest of the type check; must be wired to a corresponding fast path. ++ // It does not repeat the fast path logic, so don't use it standalone. ++ // The temp_reg and temp2_reg can be noreg, if no temps are available. ++ // Updates the sub's secondary super cache as necessary. ++ // If set_cond_codes, condition codes will be Z on success, NZ on failure. ++ void check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Register temp2_reg, ++ Label* L_success, ++ Label* L_failure, ++ bool set_cond_codes = false); ++ ++ // Simplified, combined version, good for typical uses. ++ // Falls through on failure. ++ void check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label& L_success); ++ ++ ++ // Debugging ++ ++ // only if +VerifyOops ++ void verify_oop(Register reg, const char* s = "broken oop"); ++ void verify_oop_addr(Address addr, const char * s = "broken oop addr"); ++ void verify_oop_subroutine(); ++ // TODO: verify method and klass metadata (compare against vptr?) ++ void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} ++ void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} ++ ++ #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) ++ #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) ++ ++ // only if +VerifyFPU ++ void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); ++ ++ // prints msg, dumps registers and stops execution ++ void stop(const char* msg); ++ ++ // prints msg and continues ++ void warn(const char* msg); ++ ++ static void debug(char* msg/*, RegistersForDebugging* regs*/); ++ static void debug64(char* msg, int64_t pc, int64_t regs[]); ++ ++ void print_reg(Register reg); ++ void print_reg(FloatRegister reg); ++ ++ void untested() { stop("untested"); } ++ ++ void unimplemented(const char* what = "") { char* b = new char[1024]; jio_snprintf(b, sizeof(b), "unimplemented: %s", what); stop(b); } ++ ++ void should_not_reach_here() { stop("should not reach here"); } ++ ++ void print_CPU_state(); ++ ++ // Stack overflow checking ++ void bang_stack_with_offset(int offset) { ++ // stack grows down, caller passes positive offset ++ assert(offset > 0, "must bang with negative offset"); ++ if (offset <= 32768) { ++ sw(RA0, SP, -offset); ++ } else { ++ li(AT, offset); ++ dsubu(AT, SP, AT); ++ sw(RA0, AT, 0); ++ } ++ } ++ ++ // Writes to stack successive pages until offset reached to check for ++ // stack overflow + shadow pages. Also, clobbers tmp ++ void bang_stack_size(Register size, Register tmp); ++ ++ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset); ++ ++ // Support for serializing memory accesses between threads ++ void serialize_memory(Register thread, Register tmp); ++ ++ //void verify_tlab(); ++ void verify_tlab(Register t1, Register t2); ++ ++ // Biased locking support ++ // lock_reg and obj_reg must be loaded up with the appropriate values. ++ // tmp_reg is optional. If it is supplied (i.e., != noreg) it will ++ // be killed; if not supplied, push/pop will be used internally to ++ // allocate a temporary (inefficient, avoid if possible). ++ // Optional slow case is for implementations (interpreter and C1) which branch to ++ // slow case directly. Leaves condition codes set for C2's Fast_Lock node. ++ // Returns offset of first potentially-faulting instruction for null ++ // check info (currently consumed only by C1). If ++ // swap_reg_contains_mark is true then returns -1 as it is assumed ++ // the calling code has already passed any potential faults. ++ int biased_locking_enter(Register lock_reg, Register obj_reg, ++ Register swap_reg, Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, Label* slow_case = NULL, ++ BiasedLockingCounters* counters = NULL); ++ void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); ++#ifdef COMPILER2 ++ void fast_lock(Register obj, Register box, Register tmp, Register scr); ++ void fast_unlock(Register obj, Register box, Register tmp); ++#endif ++ ++ ++ // Arithmetics ++ // Regular vs. d* versions ++ inline void addu_long(Register rd, Register rs, Register rt) { ++ daddu(rd, rs, rt); ++ } ++ inline void addu_long(Register rd, Register rs, long imm32_64) { ++ daddiu(rd, rs, imm32_64); ++ } ++ ++ void round_to(Register reg, int modulus) { ++ assert_different_registers(reg, AT); ++ increment(reg, modulus - 1); ++ move(AT, - modulus); ++ andr(reg, reg, AT); ++ } ++ ++ // the follow two might use AT register, be sure you have no meanful data in AT before you call them ++ void increment(Register reg, int imm); ++ void decrement(Register reg, int imm); ++ ++ void shl(Register reg, int sa) { dsll(reg, reg, sa); } ++ void shr(Register reg, int sa) { dsrl(reg, reg, sa); } ++ void sar(Register reg, int sa) { dsra(reg, reg, sa); } ++ ++ // Helper functions for statistics gathering. ++ void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2); ++ ++ // Calls ++ void call(address entry); ++ void call(address entry, relocInfo::relocType rtype); ++ void call(address entry, RelocationHolder& rh); ++ ++ address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL); ++ ++ // Emit the CompiledIC call idiom ++ void ic_call(address entry); ++ ++ // Jumps ++ void jmp(address entry); ++ void jmp(address entry, relocInfo::relocType rtype); ++ void jmp_far(Label& L); // always long jumps ++ ++ /* branches may exceed 16-bit offset */ ++ void b_far(address entry); ++ void b_far(Label& L); ++ ++ void bne_far (Register rs, Register rt, address entry); ++ void bne_far (Register rs, Register rt, Label& L); ++ ++ void beq_far (Register rs, Register rt, address entry); ++ void beq_far (Register rs, Register rt, Label& L); ++ ++ // For C2 to support long branches ++ void beq_long (Register rs, Register rt, Label& L); ++ void bne_long (Register rs, Register rt, Label& L); ++ void bc1t_long (Label& L); ++ void bc1f_long (Label& L); ++ ++ void patchable_call(address target); ++ void general_call(address target); ++ ++ void patchable_jump(address target); ++ void general_jump(address target); ++ ++ static int insts_for_patchable_call(address target); ++ static int insts_for_general_call(address target); ++ ++ static int insts_for_patchable_jump(address target); ++ static int insts_for_general_jump(address target); ++ ++ // Floating ++ // Data ++ ++ // Load and store values by size and signed-ness ++ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); ++ void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); ++ ++ // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs ++ inline void ld_ptr(Register rt, Address a) { ++ ld(rt, a); ++ } ++ ++ inline void ld_ptr(Register rt, Register base, int offset16) { ++ ld(rt, base, offset16); ++ } ++ ++ // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs ++ inline void st_ptr(Register rt, Address a) { ++ sd(rt, a); ++ } ++ ++ inline void st_ptr(Register rt, Register base, int offset16) { ++ sd(rt, base, offset16); ++ } ++ ++ void ld_ptr(Register rt, Register base, Register offset); ++ void st_ptr(Register rt, Register base, Register offset); ++ ++ // swap the two byte of the low 16-bit halfword ++ // this directive will use AT, be sure the high 16-bit of reg is zero ++ void hswap(Register reg); ++ void huswap(Register reg); ++ ++ // convert big endian integer to little endian integer ++ void swap(Register reg); ++ ++ // implement the x86 instruction semantic ++ // if c_reg == *dest then *dest <= x_reg ++ // else c_reg <= *dest ++ // the AT indicate if xchg occurred, 1 for xchged, else 0 ++ void cmpxchg(Register x_reg, Address dest, Register c_reg); ++ void cmpxchg32(Register x_reg, Address dest, Register c_reg); ++ void cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi); ++ ++ //pop & push ++ void extend_sign(Register rh, Register rl) { stop("extend_sign"); } ++ void neg(Register reg) { dsubu(reg, R0, reg); } ++ void push (Register reg) { daddiu(SP, SP, -8); sd (reg, SP, 0); } ++ void push (FloatRegister reg) { daddiu(SP, SP, -8); sdc1(reg, SP, 0); } ++ void pop (Register reg) { ld (reg, SP, 0); daddiu(SP, SP, 8); } ++ void pop (FloatRegister reg) { ldc1(reg, SP, 0); daddiu(SP, SP, 8); } ++ void pop () { daddiu(SP, SP, 8); } ++ void pop2 () { daddiu(SP, SP, 16); } ++ void push2(Register reg1, Register reg2); ++ void pop2 (Register reg1, Register reg2); ++ void dpush (Register reg) { daddiu(SP, SP, -8); sd (reg, SP, 0); } ++ void dpop (Register reg) { ld (reg, SP, 0); daddiu(SP, SP, 8); } ++ //we need 2 fun to save and resotre general register ++ void pushad(); ++ void popad(); ++ void pushad_except_v0(); ++ void popad_except_v0(); ++ ++ //move an 32-bit immediate to Register ++ void move(Register reg, int imm32) { li32(reg, imm32); } ++ void li (Register rd, long imm); ++ void li (Register rd, address addr) { li(rd, (long)addr); } ++ //replace move(Register reg, int imm) ++ void li32(Register rd, int imm32); // sign-extends to 64 bits on mips64 ++ void set64(Register d, jlong value); ++ static int insts_for_set64(jlong value); ++ ++ void patchable_set48(Register d, jlong value); ++ void patchable_set32(Register d, jlong value); ++ ++ void patchable_call32(Register d, jlong value); ++ ++ static int call_size(address target, bool far, bool patchable); ++ ++ static bool reachable_from_cache(address target); ++ static bool reachable_from_cache(); ++ ++ ++ void dli(Register rd, long imm) { li(rd, imm); } ++ void li64(Register rd, long imm); ++ void li48(Register rd, long imm); ++ ++ void move(Register rd, Register rs) { daddu(rd, rs, R0); } ++ void move_u32(Register rd, Register rs) { addu32(rd, rs, R0); } ++ void dmove(Register rd, Register rs) { daddu(rd, rs, R0); } ++ void mov_metadata(Register dst, Metadata* obj); ++ void mov_metadata(Address dst, Metadata* obj); ++ ++ void store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide); ++ void store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type); ++ void store_for_type(Register src_reg, Address addr, BasicType type = T_INT, bool wide = false); ++ void store_for_type(FloatRegister src_reg, Address addr, BasicType type = T_INT); ++ void load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide); ++ void load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type); ++ int load_for_type(Register dst_reg, Address addr, BasicType type = T_INT, bool wide = false); ++ int load_for_type(FloatRegister dst_reg, Address addr, BasicType type = T_INT); ++ ++#ifndef PRODUCT ++ static void pd_print_patched_instruction(address branch) { ++ jint stub_inst = *(jint*) branch; ++ print_instruction(stub_inst); ++ ::tty->print("%s", " (unresolved)"); ++ ++ } ++#endif ++ ++ //FIXME ++ void empty_FPU_stack(){/*need implemented*/}; ++ ++ ++ // method handles (JSR 292) ++ Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); ++ ++ // Conditional move ++ void cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src, ++ CMCompare cmp = EQ, ++ bool is_signed = true); ++ void cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ Register dst, ++ Register src, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ void cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ void cmp_cmov(Register op1, ++ Register op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ ++#undef VIRTUAL ++ ++}; ++ ++/** ++ * class SkipIfEqual: ++ * ++ * Instantiating this class will result in assembly code being output that will ++ * jump around any code emitted between the creation of the instance and it's ++ * automatic destruction at the end of a scope block, depending on the value of ++ * the flag passed to the constructor, which will be checked at run-time. ++ */ ++class SkipIfEqual { ++ private: ++ MacroAssembler* _masm; ++ Label _label; ++ ++ public: ++ SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); ++ ~SkipIfEqual(); ++}; ++ ++#ifdef ASSERT ++inline bool AbstractAssembler::pd_check_instruction_mark() { return true; } ++#endif ++ ++ ++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/macroAssembler_mips.inline.hpp b/hotspot/src/cpu/mips/vm/macroAssembler_mips.inline.hpp +new file mode 100644 +index 0000000000..92c05fb726 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/macroAssembler_mips.inline.hpp +@@ -0,0 +1,34 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2017, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/codeBuffer.hpp" ++#include "code/codeCache.hpp" ++ ++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP +diff --git a/hotspot/src/cpu/mips/vm/metaspaceShared_mips_64.cpp b/hotspot/src/cpu/mips/vm/metaspaceShared_mips_64.cpp +new file mode 100644 +index 0000000000..0c467df2f3 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/metaspaceShared_mips_64.cpp +@@ -0,0 +1,123 @@ ++/* ++ * Copyright (c) 2004, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/codeBuffer.hpp" ++#include "memory/metaspaceShared.hpp" ++ ++// Generate the self-patching vtable method: ++// ++// This method will be called (as any other Klass virtual method) with ++// the Klass itself as the first argument. Example: ++// ++// oop obj; ++// int size = obj->klass()->klass_part()->oop_size(this); ++// ++// for which the virtual method call is Klass::oop_size(); ++// ++// The dummy method is called with the Klass object as the first ++// operand, and an object as the second argument. ++// ++ ++//===================================================================== ++ ++// All of the dummy methods in the vtable are essentially identical, ++// differing only by an ordinal constant, and they bear no releationship ++// to the original method which the caller intended. Also, there needs ++// to be 'vtbl_list_size' instances of the vtable in order to ++// differentiate between the 'vtable_list_size' original Klass objects. ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++void MetaspaceShared::generate_vtable_methods(void** vtbl_list, ++ void** vtable, ++ char** md_top, ++ char* md_end, ++ char** mc_top, ++ char* mc_end) { ++ ++ intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*); ++ *(intptr_t *)(*md_top) = vtable_bytes; ++ *md_top += sizeof(intptr_t); ++ void** dummy_vtable = (void**)*md_top; ++ *vtable = dummy_vtable; ++ *md_top += vtable_bytes; ++ ++ // Get ready to generate dummy methods. ++ ++ CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top); ++ MacroAssembler* masm = new MacroAssembler(&cb); ++ ++ Label common_code; ++ for (int i = 0; i < vtbl_list_size; ++i) { ++ for (int j = 0; j < num_virtuals; ++j) { ++ dummy_vtable[num_virtuals * i + j] = (void*)masm->pc(); ++ ++ // Load V0 with a value indicating vtable/offset pair. ++ // -- bits[ 7..0] (8 bits) which virtual method in table? ++ // -- bits[12..8] (5 bits) which virtual method table? ++ // -- must fit in 13-bit instruction immediate field. ++ __ move(V0, (i << 8) + j); ++ __ b(common_code); ++ __ delayed()->nop(); ++ } ++ } ++ ++ __ bind(common_code); ++ ++ __ srl(T9, V0, 8); // isolate vtable identifier. ++ __ shl(T9, LogBytesPerWord); ++ __ li(AT, (long)vtbl_list); ++ __ addu(T9, AT, T9); ++ __ ld(T9, T9, 0); // get correct vtable address. ++ __ sd(T9, A0, 0); // update vtable pointer. ++ ++ __ andi(V0, V0, 0x00ff); // isolate vtable method index ++ __ shl(V0, LogBytesPerWord); ++ __ addu(T9, T9, V0); ++ __ ld(T9, T9, 0); // address of real method pointer. ++ __ jr(T9); // get real method pointer. ++ __ delayed()->nop(); ++ ++ __ flush(); ++ ++ *mc_top = (char*)__ pc(); ++} +diff --git a/hotspot/src/cpu/mips/vm/methodHandles_mips.cpp b/hotspot/src/cpu/mips/vm/methodHandles_mips.cpp +new file mode 100644 +index 0000000000..428c271362 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/methodHandles_mips.cpp +@@ -0,0 +1,576 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "prims/methodHandles.hpp" ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#define STOP(error) block_comment(error); __ stop(error) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { ++ if (VerifyMethodHandles) ++ verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), ++ "MH argument is a Class"); ++ __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); ++} ++ ++#ifdef ASSERT ++static int check_nonzero(const char* xname, int x) { ++ assert(x != 0, err_msg("%s should be nonzero", xname)); ++ return x; ++} ++#define NONZERO(x) check_nonzero(#x, x) ++#else //ASSERT ++#define NONZERO(x) (x) ++#endif //ASSERT ++ ++#ifdef ASSERT ++void MethodHandles::verify_klass(MacroAssembler* _masm, ++ Register obj, SystemDictionary::WKID klass_id, ++ const char* error_message) { ++} ++ ++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { ++ Label L; ++ BLOCK_COMMENT("verify_ref_kind {"); ++ __ lw(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes()))); ++ __ sra(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT); ++ __ move(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK); ++ __ andr(temp, temp, AT); ++ __ move(AT, ref_kind); ++ __ beq(temp, AT, L); ++ __ delayed()->nop(); ++ { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal); ++ jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind); ++ if (ref_kind == JVM_REF_invokeVirtual || ++ ref_kind == JVM_REF_invokeSpecial) ++ // could do this for all ref_kinds, but would explode assembly code size ++ trace_method_handle(_masm, buf); ++ __ STOP(buf); ++ } ++ BLOCK_COMMENT("} verify_ref_kind"); ++ __ bind(L); ++} ++ ++#endif //ASSERT ++ ++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry) { ++ assert(method == Rmethod, "interpreter calling convention"); ++ ++ Label L_no_such_method; ++ __ beq(method, R0, L_no_such_method); ++ __ delayed()->nop(); ++ ++ __ verify_method_ptr(method); ++ ++ if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++ Register rthread = TREG; ++ // interp_only is an int, on little endian it is sufficient to test the byte only ++ // Is a cmpl faster? ++ __ lbu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset())); ++ __ beq(AT, R0, run_compiled_code); ++ __ delayed()->nop(); ++ __ ld(T9, method, in_bytes(Method::interpreter_entry_offset())); ++ __ jr(T9); ++ __ delayed()->nop(); ++ __ BIND(run_compiled_code); ++ } ++ ++ const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : ++ Method::from_interpreted_offset(); ++ __ ld(T9, method, in_bytes(entry_offset)); ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ __ bind(L_no_such_method); ++ address wrong_method = StubRoutines::throw_AbstractMethodError_entry(); ++ __ jmp(wrong_method, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++} ++ ++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry) { ++ BLOCK_COMMENT("jump_to_lambda_form {"); ++ // This is the initial entry point of a lazy method handle. ++ // After type checking, it picks up the invoker from the LambdaForm. ++ assert_different_registers(recv, method_temp, temp2); ++ assert(recv != noreg, "required register"); ++ assert(method_temp == Rmethod, "required register for loading method"); ++ ++ //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); }); ++ ++ // Load the invoker, as MH -> MH.form -> LF.vmentry ++ __ verify_oop(recv); ++ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes()))); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes()))); ++ __ verify_oop(method_temp); ++ // the following assumes that a Method* is normally compressed in the vmtarget field: ++ __ ld(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()))); ++ ++ if (VerifyMethodHandles && !for_compiler_entry) { ++ // make sure recv is already on stack ++ __ ld(temp2, Address(method_temp, Method::const_offset())); ++ __ load_sized_value(temp2, ++ Address(temp2, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), false); ++ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); ++ Label L; ++ Address recv_addr = __ argument_address(temp2, -1); ++ __ ld(AT, recv_addr); ++ __ beq(recv, AT, L); ++ __ delayed()->nop(); ++ ++ recv_addr = __ argument_address(temp2, -1); ++ __ ld(V0, recv_addr); ++ __ STOP("receiver not on stack"); ++ __ BIND(L); ++ } ++ ++ jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); ++ BLOCK_COMMENT("} jump_to_lambda_form"); ++} ++ ++ ++// Code generation ++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, ++ vmIntrinsics::ID iid) { ++ const bool not_for_compiler_entry = false; // this is the interpreter entry ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ if (iid == vmIntrinsics::_invokeGeneric || ++ iid == vmIntrinsics::_compiledLambdaForm) { ++ // Perhaps surprisingly, the symbolic references visible to Java are not directly used. ++ // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. ++ // They all allow an appendix argument. ++ __ stop("empty stubs make SG sick"); ++ return NULL; ++ } ++ ++ // Rmethod: Method* ++ // T9: argument locator (parameter slot count, added to sp) ++ // S7: used as temp to hold mh or receiver ++ Register t9_argp = T9; // argument list ptr, live on error paths ++ Register s7_mh = S7; // MH receiver; dies quickly and is recycled ++ Register rm_method = Rmethod; // eventual target of this invocation ++ ++ // here's where control starts out: ++ __ align(CodeEntryAlignment); ++ address entry_point = __ pc(); ++ ++ if (VerifyMethodHandles) { ++ Label L; ++ BLOCK_COMMENT("verify_intrinsic_id {"); ++ __ lbu(AT, rm_method, Method::intrinsic_id_offset_in_bytes()); ++ guarantee(Assembler::is_simm16(iid), "Oops, iid is not simm16! Change the instructions."); ++ __ addiu(AT, AT, -1 * (int) iid); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ if (iid == vmIntrinsics::_linkToVirtual || ++ iid == vmIntrinsics::_linkToSpecial) { ++ // could do this for all kinds, but would explode assembly code size ++ trace_method_handle(_masm, "bad Method*::intrinsic_id"); ++ } ++ __ STOP("bad Method*::intrinsic_id"); ++ __ bind(L); ++ BLOCK_COMMENT("} verify_intrinsic_id"); ++ } ++ ++ // First task: Find out how big the argument list is. ++ Address t9_first_arg_addr; ++ int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); ++ assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); ++ if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ __ ld(t9_argp, Address(rm_method, Method::const_offset())); ++ __ load_sized_value(t9_argp, ++ Address(t9_argp, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), false); ++ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); ++ t9_first_arg_addr = __ argument_address(t9_argp, -1); ++ } else { ++ DEBUG_ONLY(t9_argp = noreg); ++ } ++ ++ if (!is_signature_polymorphic_static(iid)) { ++ __ ld(s7_mh, t9_first_arg_addr); ++ DEBUG_ONLY(t9_argp = noreg); ++ } ++ ++ // t9_first_arg_addr is live! ++ ++ trace_method_handle_interpreter_entry(_masm, iid); ++ ++ if (iid == vmIntrinsics::_invokeBasic) { ++ generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry); ++ ++ } else { ++ // Adjust argument list by popping the trailing MemberName argument. ++ Register r_recv = noreg; ++ if (MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. ++ __ ld(r_recv = T2, t9_first_arg_addr); ++ } ++ DEBUG_ONLY(t9_argp = noreg); ++ Register rm_member = rm_method; // MemberName ptr; incoming method ptr is dead now ++ __ pop(rm_member); // extract last argument ++ generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry); ++ } ++ ++ return entry_point; ++} ++ ++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, ++ vmIntrinsics::ID iid, ++ Register receiver_reg, ++ Register member_reg, ++ bool for_compiler_entry) { ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ Register rm_method = Rmethod; // eventual target of this invocation ++ // temps used in this code are not used in *either* compiled or interpreted calling sequences ++ Register j_rarg0 = T0; ++ Register j_rarg1 = A0; ++ Register j_rarg2 = A1; ++ Register j_rarg3 = A2; ++ Register j_rarg4 = A3; ++ Register j_rarg5 = A4; ++ ++ Register temp1 = T8; ++ Register temp2 = T9; ++ Register temp3 = V0; ++ if (for_compiler_entry) { ++ assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); ++ assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ } ++ else { ++ assert_different_registers(temp1, temp2, temp3, saved_last_sp_register()); // don't trash lastSP ++ } ++ assert_different_registers(temp1, temp2, temp3, receiver_reg); ++ assert_different_registers(temp1, temp2, temp3, member_reg); ++ ++ if (iid == vmIntrinsics::_invokeBasic) { ++ // indirect through MH.form.vmentry.vmtarget ++ jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry); ++ ++ } else { ++ // The method is a member invoker used by direct method handles. ++ if (VerifyMethodHandles) { ++ // make sure the trailing argument really is a MemberName (caller responsibility) ++ verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), ++ "MemberName required for invokeVirtual etc."); ++ } ++ ++ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); ++ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); ++ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())); ++ ++ Register temp1_recv_klass = temp1; ++ if (iid != vmIntrinsics::_linkToStatic) { ++ __ verify_oop(receiver_reg); ++ if (iid == vmIntrinsics::_linkToSpecial) { ++ // Don't actually load the klass; just null-check the receiver. ++ __ null_check(receiver_reg); ++ } else { ++ // load receiver klass itself ++ __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ BLOCK_COMMENT("check_receiver {"); ++ // The receiver for the MemberName must be in receiver_reg. ++ // Check the receiver against the MemberName.clazz ++ if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { ++ // Did not load it above... ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { ++ Label L_ok; ++ Register temp2_defc = temp2; ++ __ load_heap_oop(temp2_defc, member_clazz); ++ load_klass_from_Class(_masm, temp2_defc); ++ __ verify_klass_ptr(temp2_defc); ++ __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); ++ // If we get here, the type check failed! ++ __ STOP("receiver class disagrees with MemberName.clazz"); ++ __ bind(L_ok); ++ } ++ BLOCK_COMMENT("} check_receiver"); ++ } ++ if (iid == vmIntrinsics::_linkToSpecial || ++ iid == vmIntrinsics::_linkToStatic) { ++ DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass ++ } ++ ++ // Live registers at this point: ++ // member_reg - MemberName that was the trailing argument ++ // temp1_recv_klass - klass of stacked receiver, if needed ++ ++ Label L_incompatible_class_change_error; ++ switch (iid) { ++ case vmIntrinsics::_linkToSpecial: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); ++ } ++ __ ld(rm_method, member_vmtarget); ++ break; ++ ++ case vmIntrinsics::_linkToStatic: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); ++ } ++ __ ld(rm_method, member_vmtarget); ++ break; ++ ++ case vmIntrinsics::_linkToVirtual: ++ { ++ // same as TemplateTable::invokevirtual, ++ // minus the CP setup and profiling: ++ ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); ++ } ++ ++ // pick out the vtable index from the MemberName, and then we can discard it: ++ Register temp2_index = temp2; ++ __ ld(temp2_index, member_vmindex); ++ ++ if (VerifyMethodHandles) { ++ Label L_index_ok; ++ __ slt(AT, R0, temp2_index); ++ __ bne(AT, R0, L_index_ok); ++ __ delayed()->nop(); ++ __ STOP("no virtual index"); ++ __ BIND(L_index_ok); ++ } ++ ++ // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget ++ // at this point. And VerifyMethodHandles has already checked clazz, if needed. ++ ++ // get target Method* & entry point ++ __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method); ++ break; ++ } ++ ++ case vmIntrinsics::_linkToInterface: ++ { ++ // same as TemplateTable::invokeinterface ++ // (minus the CP setup and profiling, with different argument motion) ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); ++ } ++ ++ Register temp3_intf = temp3; ++ __ load_heap_oop(temp3_intf, member_clazz); ++ load_klass_from_Class(_masm, temp3_intf); ++ __ verify_klass_ptr(temp3_intf); ++ ++ Register rm_index = rm_method; ++ __ ld(rm_index, member_vmindex); ++ if (VerifyMethodHandles) { ++ Label L; ++ __ slt(AT, rm_index, R0); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ STOP("invalid vtable index for MH.invokeInterface"); ++ __ bind(L); ++ } ++ ++ // given intf, index, and recv klass, dispatch to the implementation method ++ __ lookup_interface_method(temp1_recv_klass, temp3_intf, ++ // note: next two args must be the same: ++ rm_index, rm_method, ++ temp2, ++ L_incompatible_class_change_error); ++ break; ++ } ++ ++ default: ++ fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid))); ++ break; ++ } ++ ++ // Live at this point: ++ // rm_method ++ ++ // After figuring out which concrete method to call, jump into it. ++ // Note that this works in the interpreter with no data motion. ++ // But the compiled version will require that r_recv be shifted out. ++ __ verify_method_ptr(rm_method); ++ jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry); ++ ++ if (iid == vmIntrinsics::_linkToInterface) { ++ __ bind(L_incompatible_class_change_error); ++ address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry(); ++ __ jmp(icce_entry, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } ++ } ++} ++ ++#ifndef PRODUCT ++void trace_method_handle_stub(const char* adaptername, ++ oop mh, ++ intptr_t* saved_regs, ++ intptr_t* entry_sp) { ++ // called as a leaf from native code: do not block the JVM! ++ bool has_mh = (strstr(adaptername, "/static") == NULL && ++ strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH ++ const char* mh_reg_name = has_mh ? "s7_mh" : "s7"; ++ tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT, ++ adaptername, mh_reg_name, ++ p2i(mh), p2i(entry_sp)); ++ ++ if (Verbose) { ++ tty->print_cr("Registers:"); ++ const int saved_regs_count = RegisterImpl::number_of_registers; ++ for (int i = 0; i < saved_regs_count; i++) { ++ Register r = as_Register(i); ++ // The registers are stored in reverse order on the stack (by pusha). ++ tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]); ++ if ((i + 1) % 4 == 0) { ++ tty->cr(); ++ } else { ++ tty->print(", "); ++ } ++ } ++ tty->cr(); ++ ++ { ++ // dumping last frame with frame::describe ++ ++ JavaThread* p = JavaThread::active(); ++ ++ ResourceMark rm; ++ PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here ++ FrameValues values; ++ ++ // Note: We want to allow trace_method_handle from any call site. ++ // While trace_method_handle creates a frame, it may be entered ++ // without a PC on the stack top (e.g. not just after a call). ++ // Walking that frame could lead to failures due to that invalid PC. ++ // => carefully detect that frame when doing the stack walking ++ ++ // Current C frame ++ frame cur_frame = os::current_frame(); ++ ++ // Robust search of trace_calling_frame (independant of inlining). ++ // Assumes saved_regs comes from a pusha in the trace_calling_frame. ++ assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?"); ++ frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame); ++ while (trace_calling_frame.fp() < saved_regs) { ++ trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame); ++ } ++ ++ // safely create a frame and call frame::describe ++ intptr_t *dump_sp = trace_calling_frame.sender_sp(); ++ intptr_t *dump_fp = trace_calling_frame.link(); ++ ++ bool walkable = has_mh; // whether the traced frame shoud be walkable ++ ++ if (walkable) { ++ // The previous definition of walkable may have to be refined ++ // if new call sites cause the next frame constructor to start ++ // failing. Alternatively, frame constructors could be ++ // modified to support the current or future non walkable ++ // frames (but this is more intrusive and is not considered as ++ // part of this RFE, which will instead use a simpler output). ++ frame dump_frame = frame(dump_sp, dump_fp); ++ dump_frame.describe(values, 1); ++ } else { ++ // Stack may not be walkable (invalid PC above FP): ++ // Add descriptions without building a Java frame to avoid issues ++ values.describe(-1, dump_fp, "fp for #1 "); ++ values.describe(-1, dump_sp, "sp for #1"); ++ } ++ values.describe(-1, entry_sp, "raw top of stack"); ++ ++ tty->print_cr("Stack layout:"); ++ values.print(p); ++ } ++ if (has_mh && mh->is_oop()) { ++ mh->print(); ++ if (java_lang_invoke_MethodHandle::is_instance(mh)) { ++ if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0) ++ java_lang_invoke_MethodHandle::form(mh)->print(); ++ } ++ } ++ } ++} ++ ++// The stub wraps the arguments in a struct on the stack to avoid ++// dealing with the different calling conventions for passing 6 ++// arguments. ++struct MethodHandleStubArguments { ++ const char* adaptername; ++ oopDesc* mh; ++ intptr_t* saved_regs; ++ intptr_t* entry_sp; ++}; ++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { ++ trace_method_handle_stub(args->adaptername, ++ args->mh, ++ args->saved_regs, ++ args->entry_sp); ++} ++ ++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { ++} ++#endif //PRODUCT +diff --git a/hotspot/src/cpu/mips/vm/methodHandles_mips.hpp b/hotspot/src/cpu/mips/vm/methodHandles_mips.hpp +new file mode 100644 +index 0000000000..03b65fc8ef +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/methodHandles_mips.hpp +@@ -0,0 +1,62 @@ ++/* ++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// Platform-specific definitions for method handles. ++// These definitions are inlined into class MethodHandles. ++ ++// Adapters ++enum /* platform_dependent_constants */ { ++ adapter_code_size = 32000 DEBUG_ONLY(+ 150000) ++}; ++ ++// Additional helper methods for MethodHandles code generation: ++public: ++ static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); ++ ++ static void verify_klass(MacroAssembler* _masm, ++ Register obj, SystemDictionary::WKID klass_id, ++ const char* error_message = "wrong klass") NOT_DEBUG_RETURN; ++ ++ static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { ++ verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), ++ "reference is a MH"); ++ } ++ ++ static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; ++ ++ // Similar to InterpreterMacroAssembler::jump_from_interpreted. ++ // Takes care of special dispatch from single stepping too. ++ static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry); ++ ++ static void jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry); ++ ++ static Register saved_last_sp_register() { ++ // Should be in sharedRuntime, not here. ++ return I29; ++ } +diff --git a/hotspot/src/cpu/mips/vm/mips.ad b/hotspot/src/cpu/mips/vm/mips.ad +new file mode 100644 +index 0000000000..3563bbe0e5 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/mips.ad +@@ -0,0 +1,25 @@ ++// ++// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ +diff --git a/hotspot/src/cpu/mips/vm/mips_64.ad b/hotspot/src/cpu/mips/vm/mips_64.ad +new file mode 100644 +index 0000000000..2d714c8be1 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/mips_64.ad +@@ -0,0 +1,14035 @@ ++// ++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++// GodSon3 Architecture Description File ++ ++//----------REGISTER DEFINITION BLOCK------------------------------------------ ++// This information is used by the matcher and the register allocator to ++// describe individual registers and classes of registers within the target ++// archtecture. ++ ++// format: ++// reg_def name (call convention, c-call convention, ideal type, encoding); ++// call convention : ++// NS = No-Save ++// SOC = Save-On-Call ++// SOE = Save-On-Entry ++// AS = Always-Save ++// ideal type : ++// see opto/opcodes.hpp for more info ++// reg_class name (reg, ...); ++// alloc_class name (reg, ...); ++register %{ ++ ++// General Registers ++// Integer Registers ++ reg_def R0 ( NS, NS, Op_RegI, 0, VMRegImpl::Bad()); ++ reg_def AT ( NS, NS, Op_RegI, 1, AT->as_VMReg()); ++ reg_def AT_H ( NS, NS, Op_RegI, 1, AT->as_VMReg()->next()); ++ reg_def V0 (SOC, SOC, Op_RegI, 2, V0->as_VMReg()); ++ reg_def V0_H (SOC, SOC, Op_RegI, 2, V0->as_VMReg()->next()); ++ reg_def V1 (SOC, SOC, Op_RegI, 3, V1->as_VMReg()); ++ reg_def V1_H (SOC, SOC, Op_RegI, 3, V1->as_VMReg()->next()); ++ reg_def A0 (SOC, SOC, Op_RegI, 4, A0->as_VMReg()); ++ reg_def A0_H (SOC, SOC, Op_RegI, 4, A0->as_VMReg()->next()); ++ reg_def A1 (SOC, SOC, Op_RegI, 5, A1->as_VMReg()); ++ reg_def A1_H (SOC, SOC, Op_RegI, 5, A1->as_VMReg()->next()); ++ reg_def A2 (SOC, SOC, Op_RegI, 6, A2->as_VMReg()); ++ reg_def A2_H (SOC, SOC, Op_RegI, 6, A2->as_VMReg()->next()); ++ reg_def A3 (SOC, SOC, Op_RegI, 7, A3->as_VMReg()); ++ reg_def A3_H (SOC, SOC, Op_RegI, 7, A3->as_VMReg()->next()); ++ reg_def A4 (SOC, SOC, Op_RegI, 8, A4->as_VMReg()); ++ reg_def A4_H (SOC, SOC, Op_RegI, 8, A4->as_VMReg()->next()); ++ reg_def A5 (SOC, SOC, Op_RegI, 9, A5->as_VMReg()); ++ reg_def A5_H (SOC, SOC, Op_RegI, 9, A5->as_VMReg()->next()); ++ reg_def A6 (SOC, SOC, Op_RegI, 10, A6->as_VMReg()); ++ reg_def A6_H (SOC, SOC, Op_RegI, 10, A6->as_VMReg()->next()); ++ reg_def A7 (SOC, SOC, Op_RegI, 11, A7->as_VMReg()); ++ reg_def A7_H (SOC, SOC, Op_RegI, 11, A7->as_VMReg()->next()); ++ reg_def T0 (SOC, SOC, Op_RegI, 12, T0->as_VMReg()); ++ reg_def T0_H (SOC, SOC, Op_RegI, 12, T0->as_VMReg()->next()); ++ reg_def T1 (SOC, SOC, Op_RegI, 13, T1->as_VMReg()); ++ reg_def T1_H (SOC, SOC, Op_RegI, 13, T1->as_VMReg()->next()); ++ reg_def T2 (SOC, SOC, Op_RegI, 14, T2->as_VMReg()); ++ reg_def T2_H (SOC, SOC, Op_RegI, 14, T2->as_VMReg()->next()); ++ reg_def T3 (SOC, SOC, Op_RegI, 15, T3->as_VMReg()); ++ reg_def T3_H (SOC, SOC, Op_RegI, 15, T3->as_VMReg()->next()); ++ reg_def S0 (SOC, SOE, Op_RegI, 16, S0->as_VMReg()); ++ reg_def S0_H (SOC, SOE, Op_RegI, 16, S0->as_VMReg()->next()); ++ reg_def S1 (SOC, SOE, Op_RegI, 17, S1->as_VMReg()); ++ reg_def S1_H (SOC, SOE, Op_RegI, 17, S1->as_VMReg()->next()); ++ reg_def S2 (SOC, SOE, Op_RegI, 18, S2->as_VMReg()); ++ reg_def S2_H (SOC, SOE, Op_RegI, 18, S2->as_VMReg()->next()); ++ reg_def S3 (SOC, SOE, Op_RegI, 19, S3->as_VMReg()); ++ reg_def S3_H (SOC, SOE, Op_RegI, 19, S3->as_VMReg()->next()); ++ reg_def S4 (SOC, SOE, Op_RegI, 20, S4->as_VMReg()); ++ reg_def S4_H (SOC, SOE, Op_RegI, 20, S4->as_VMReg()->next()); ++ reg_def S5 (SOC, SOE, Op_RegI, 21, S5->as_VMReg()); ++ reg_def S5_H (SOC, SOE, Op_RegI, 21, S5->as_VMReg()->next()); ++ reg_def S6 (SOC, SOE, Op_RegI, 22, S6->as_VMReg()); ++ reg_def S6_H (SOC, SOE, Op_RegI, 22, S6->as_VMReg()->next()); ++ reg_def S7 (SOC, SOE, Op_RegI, 23, S7->as_VMReg()); ++ reg_def S7_H (SOC, SOE, Op_RegI, 23, S7->as_VMReg()->next()); ++ reg_def T8 (SOC, SOC, Op_RegI, 24, T8->as_VMReg()); ++ reg_def T8_H (SOC, SOC, Op_RegI, 24, T8->as_VMReg()->next()); ++ reg_def T9 (SOC, SOC, Op_RegI, 25, T9->as_VMReg()); ++ reg_def T9_H (SOC, SOC, Op_RegI, 25, T9->as_VMReg()->next()); ++ ++// Special Registers ++ reg_def K0 ( NS, NS, Op_RegI, 26, K0->as_VMReg()); ++ reg_def K1 ( NS, NS, Op_RegI, 27, K1->as_VMReg()); ++ reg_def GP ( NS, NS, Op_RegI, 28, GP->as_VMReg()); ++ reg_def GP_H ( NS, NS, Op_RegI, 28, GP->as_VMReg()->next()); ++ reg_def SP ( NS, NS, Op_RegI, 29, SP->as_VMReg()); ++ reg_def SP_H ( NS, NS, Op_RegI, 29, SP->as_VMReg()->next()); ++ reg_def FP ( NS, NS, Op_RegI, 30, FP->as_VMReg()); ++ reg_def FP_H ( NS, NS, Op_RegI, 30, FP->as_VMReg()->next()); ++ reg_def RA ( NS, NS, Op_RegI, 31, RA->as_VMReg()); ++ reg_def RA_H ( NS, NS, Op_RegI, 31, RA->as_VMReg()->next()); ++ ++// Floating registers. ++reg_def F0 ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()); ++reg_def F0_H ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next()); ++reg_def F1 ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()); ++reg_def F1_H ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next()); ++reg_def F2 ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()); ++reg_def F2_H ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next()); ++reg_def F3 ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()); ++reg_def F3_H ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next()); ++reg_def F4 ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()); ++reg_def F4_H ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next()); ++reg_def F5 ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()); ++reg_def F5_H ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next()); ++reg_def F6 ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()); ++reg_def F6_H ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next()); ++reg_def F7 ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()); ++reg_def F7_H ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next()); ++reg_def F8 ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()); ++reg_def F8_H ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next()); ++reg_def F9 ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()); ++reg_def F9_H ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next()); ++reg_def F10 ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()); ++reg_def F10_H ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next()); ++reg_def F11 ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()); ++reg_def F11_H ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next()); ++reg_def F12 ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()); ++reg_def F12_H ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next()); ++reg_def F13 ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()); ++reg_def F13_H ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next()); ++reg_def F14 ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()); ++reg_def F14_H ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next()); ++reg_def F15 ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()); ++reg_def F15_H ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next()); ++reg_def F16 ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()); ++reg_def F16_H ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next()); ++reg_def F17 ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()); ++reg_def F17_H ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next()); ++reg_def F18 ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()); ++reg_def F18_H ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next()); ++reg_def F19 ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()); ++reg_def F19_H ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next()); ++reg_def F20 ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()); ++reg_def F20_H ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next()); ++reg_def F21 ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()); ++reg_def F21_H ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next()); ++reg_def F22 ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()); ++reg_def F22_H ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next()); ++reg_def F23 ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()); ++reg_def F23_H ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next()); ++reg_def F24 ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()); ++reg_def F24_H ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next()); ++reg_def F25 ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()); ++reg_def F25_H ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next()); ++reg_def F26 ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()); ++reg_def F26_H ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next()); ++reg_def F27 ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()); ++reg_def F27_H ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next()); ++reg_def F28 ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()); ++reg_def F28_H ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next()); ++reg_def F29 ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()); ++reg_def F29_H ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next()); ++reg_def F30 ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()); ++reg_def F30_H ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next()); ++reg_def F31 ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()); ++reg_def F31_H ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next()); ++ ++ ++// ---------------------------- ++// Special Registers ++//S6 is used for get_thread(S6) ++//S5 is uesd for heapbase of compressed oop ++alloc_class chunk0( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S5, S5_H, ++ S6, S6_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T8, T8_H, ++ T9, T9_H, ++ T1, T1_H, // inline_cache_reg ++ V1, V1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ V0, V0_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H, ++ GP, GP_H ++ RA, RA_H, ++ SP, SP_H, // stack_pointer ++ FP, FP_H // frame_pointer ++ ); ++ ++alloc_class chunk1( F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F8, F8_H, ++ F9, F9_H, ++ F10, F10_H, ++ F11, F11_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F23, F23_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++ F28, F28_H, ++ F19, F19_H, ++ F18, F18_H, ++ F17, F17_H, ++ F16, F16_H, ++ F15, F15_H, ++ F14, F14_H, ++ F13, F13_H, ++ F12, F12_H, ++ F29, F29_H, ++ F30, F30_H, ++ F31, F31_H); ++ ++reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 ); ++reg_class s0_reg( S0 ); ++reg_class s1_reg( S1 ); ++reg_class s2_reg( S2 ); ++reg_class s3_reg( S3 ); ++reg_class s4_reg( S4 ); ++reg_class s5_reg( S5 ); ++reg_class s6_reg( S6 ); ++reg_class s7_reg( S7 ); ++ ++reg_class t_reg( T0, T1, T2, T3, T8, T9 ); ++reg_class t0_reg( T0 ); ++reg_class t1_reg( T1 ); ++reg_class t2_reg( T2 ); ++reg_class t3_reg( T3 ); ++reg_class t8_reg( T8 ); ++reg_class t9_reg( T9 ); ++ ++reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 ); ++reg_class a0_reg( A0 ); ++reg_class a1_reg( A1 ); ++reg_class a2_reg( A2 ); ++reg_class a3_reg( A3 ); ++reg_class a4_reg( A4 ); ++reg_class a5_reg( A5 ); ++reg_class a6_reg( A6 ); ++reg_class a7_reg( A7 ); ++ ++reg_class v0_reg( V0 ); ++reg_class v1_reg( V1 ); ++ ++reg_class sp_reg( SP, SP_H ); ++reg_class fp_reg( FP, FP_H ); ++ ++reg_class v0_long_reg( V0, V0_H ); ++reg_class v1_long_reg( V1, V1_H ); ++reg_class a0_long_reg( A0, A0_H ); ++reg_class a1_long_reg( A1, A1_H ); ++reg_class a2_long_reg( A2, A2_H ); ++reg_class a3_long_reg( A3, A3_H ); ++reg_class a4_long_reg( A4, A4_H ); ++reg_class a5_long_reg( A5, A5_H ); ++reg_class a6_long_reg( A6, A6_H ); ++reg_class a7_long_reg( A7, A7_H ); ++reg_class t0_long_reg( T0, T0_H ); ++reg_class t1_long_reg( T1, T1_H ); ++reg_class t2_long_reg( T2, T2_H ); ++reg_class t3_long_reg( T3, T3_H ); ++reg_class t8_long_reg( T8, T8_H ); ++reg_class t9_long_reg( T9, T9_H ); ++reg_class s0_long_reg( S0, S0_H ); ++reg_class s1_long_reg( S1, S1_H ); ++reg_class s2_long_reg( S2, S2_H ); ++reg_class s3_long_reg( S3, S3_H ); ++reg_class s4_long_reg( S4, S4_H ); ++reg_class s5_long_reg( S5, S5_H ); ++reg_class s6_long_reg( S6, S6_H ); ++reg_class s7_long_reg( S7, S7_H ); ++ ++reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, A7, A6, A5, A4, V0, A3, A2, A1, A0, T0 ); ++ ++reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, V0, T0 ); ++ ++reg_class p_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T8, T8_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++reg_class no_T8_p_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++reg_class long_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T8, T8_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++ ++// Floating point registers. ++// F31 are not used as temporary registers in D2I ++reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F31); ++reg_class dbl_reg( F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F8, F8_H, ++ F9, F9_H, ++ F10, F10_H, ++ F11, F11_H, ++ F12, F12_H, ++ F13, F13_H, ++ F14, F14_H, ++ F15, F15_H, ++ F16, F16_H, ++ F17, F17_H, ++ F18, F18_H, ++ F19, F19_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F23, F23_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++ F28, F28_H, ++ F29, F29_H, ++ F31, F31_H); ++ ++reg_class flt_arg0( F12 ); ++reg_class dbl_arg0( F12, F12_H ); ++reg_class dbl_arg1( F14, F14_H ); ++ ++%} ++ ++//----------DEFINITION BLOCK--------------------------------------------------- ++// Define name --> value mappings to inform the ADLC of an integer valued name ++// Current support includes integer values in the range [0, 0x7FFFFFFF] ++// Format: ++// int_def ( , ); ++// Generated Code in ad_.hpp ++// #define () ++// // value == ++// Generated code in ad_.cpp adlc_verification() ++// assert( == , "Expect () to equal "); ++// ++definitions %{ ++ int_def DEFAULT_COST ( 100, 100); ++ int_def HUGE_COST (1000000, 1000000); ++ ++ // Memory refs are twice as expensive as run-of-the-mill. ++ int_def MEMORY_REF_COST ( 200, DEFAULT_COST * 2); ++ ++ // Branches are even more expensive. ++ int_def BRANCH_COST ( 300, DEFAULT_COST * 3); ++ // we use jr instruction to construct call, so more expensive ++ int_def CALL_COST ( 500, DEFAULT_COST * 5); ++/* ++ int_def EQUAL ( 1, 1 ); ++ int_def NOT_EQUAL ( 2, 2 ); ++ int_def GREATER ( 3, 3 ); ++ int_def GREATER_EQUAL ( 4, 4 ); ++ int_def LESS ( 5, 5 ); ++ int_def LESS_EQUAL ( 6, 6 ); ++*/ ++%} ++ ++ ++ ++//----------SOURCE BLOCK------------------------------------------------------- ++// This is a block of C++ code which provides values, functions, and ++// definitions necessary in the rest of the architecture description ++ ++source_hpp %{ ++// Header information of the source block. ++// Method declarations/definitions which are used outside ++// the ad-scope can conveniently be defined here. ++// ++// To keep related declarations/definitions/uses close together, ++// we switch between source %{ }% and source_hpp %{ }% freely as needed. ++ ++class CallStubImpl { ++ ++ //-------------------------------------------------------------- ++ //---< Used for optimization in Compile::shorten_branches >--- ++ //-------------------------------------------------------------- ++ ++ public: ++ // Size of call trampoline stub. ++ static uint size_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++ ++ // number of relocations needed by a call trampoline stub ++ static uint reloc_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++}; ++ ++class HandlerImpl { ++ ++ public: ++ ++ static int emit_exception_handler(CodeBuffer &cbuf); ++ static int emit_deopt_handler(CodeBuffer& cbuf); ++ ++ static uint size_exception_handler() { ++ // NativeCall instruction size is the same as NativeJump. ++ // exception handler starts out as jump and can be patched to ++ // a call be deoptimization. (4932387) ++ // Note that this value is also credited (in output.cpp) to ++ // the size of the code section. ++ int size = NativeCall::instruction_size; ++ return round_to(size, 16); ++ } ++ ++ static uint size_deopt_handler() { ++ int size = NativeCall::instruction_size; ++ return round_to(size, 16); ++ } ++}; ++ ++%} // end source_hpp ++ ++source %{ ++ ++#define NO_INDEX 0 ++#define RELOC_IMM64 Assembler::imm_operand ++#define RELOC_DISP32 Assembler::disp32_operand ++ ++ ++#define __ _masm. ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++ ++// Emit exception handler code. ++// Stuff framesize into a register and call a VM stub routine. ++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_exception_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ ++ int offset = __ offset(); ++ ++ __ block_comment("; emit_exception_handler"); ++ ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point()); ++ __ align(16); ++ assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} ++ ++// Emit deopt handler code. ++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_deopt_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ ++ int offset = __ offset(); ++ ++ __ block_comment("; emit_deopt_handler"); ++ ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_call(SharedRuntime::deopt_blob()->unpack()); ++ __ align(16); ++ assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} ++ ++ ++const bool Matcher::match_rule_supported(int opcode) { ++ if (!has_match_rule(opcode)) ++ return false; ++ ++ switch (opcode) { ++ //Op_CountLeadingZerosI Op_CountLeadingZerosL can be deleted, all MIPS CPUs support clz & dclz. ++ case Op_CountLeadingZerosI: ++ case Op_CountLeadingZerosL: ++ if (!UseCountLeadingZerosInstructionMIPS64) ++ return false; ++ break; ++ case Op_CountTrailingZerosI: ++ case Op_CountTrailingZerosL: ++ if (!UseCountTrailingZerosInstructionMIPS64) ++ return false; ++ break; ++ } ++ ++ return true; // Per default match rules are supported. ++} ++ ++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { ++ int offs = offset - br_size + 4; ++ // To be conservative on MIPS ++ // branch node should be end with: ++ // branch inst ++ // delay slot ++ const int safety_zone = 3 * BytesPerInstWord; ++ return Assembler::is_simm16((offs<0 ? offs-safety_zone : offs+safety_zone) >> 2); ++} ++ ++ ++// No additional cost for CMOVL. ++const int Matcher::long_cmove_cost() { return 0; } ++ ++// No CMOVF/CMOVD with SSE2 ++const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } ++ ++// Does the CPU require late expand (see block.cpp for description of late expand)? ++const bool Matcher::require_postalloc_expand = false; ++ ++// Should the Matcher clone shifts on addressing modes, expecting them ++// to be subsumed into complex addressing expressions or compute them ++// into registers? True for Intel but false for most RISCs ++const bool Matcher::clone_shift_expressions = false; ++ ++// Do we need to mask the count passed to shift instructions or does ++// the cpu only look at the lower 5/6 bits anyway? ++const bool Matcher::need_masked_shift_count = false; ++ ++bool Matcher::narrow_oop_use_complex_address() { ++ assert(UseCompressedOops, "only for compressed oops code"); ++ return false; ++} ++ ++bool Matcher::narrow_klass_use_complex_address() { ++ assert(UseCompressedClassPointers, "only for compressed klass code"); ++ return false; ++} ++ ++// This is UltraSparc specific, true just means we have fast l2f conversion ++const bool Matcher::convL2FSupported(void) { ++ return true; ++} ++ ++// Max vector size in bytes. 0 if not supported. ++const int Matcher::vector_width_in_bytes(BasicType bt) { ++ if (MaxVectorSize == 0) ++ return 0; ++ assert(MaxVectorSize == 8, ""); ++ return 8; ++} ++ ++// Vector ideal reg ++const uint Matcher::vector_ideal_reg(int size) { ++ assert(MaxVectorSize == 8, ""); ++ switch(size) { ++ case 8: return Op_VecD; ++ } ++ ShouldNotReachHere(); ++ return 0; ++} ++ ++// Only lowest bits of xmm reg are used for vector shift count. ++const uint Matcher::vector_shift_count_ideal_reg(int size) { ++ fatal("vector shift is not supported"); ++ return Node::NotAMachineReg; ++} ++ ++// Limits on vector size (number of elements) loaded into vector. ++const int Matcher::max_vector_size(const BasicType bt) { ++ assert(is_java_primitive(bt), "only primitive type vectors"); ++ return vector_width_in_bytes(bt)/type2aelembytes(bt); ++} ++ ++const int Matcher::min_vector_size(const BasicType bt) { ++ return max_vector_size(bt); // Same as max. ++} ++ ++// MIPS supports misaligned vectors store/load? FIXME ++const bool Matcher::misaligned_vectors_ok() { ++ return false; ++ //return !AlignVector; // can be changed by flag ++} ++ ++// Register for DIVI projection of divmodI ++RegMask Matcher::divI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for MODI projection of divmodI ++RegMask Matcher::modI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for DIVL projection of divmodL ++RegMask Matcher::divL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++int Matcher::regnum_to_fpu_offset(int regnum) { ++ return regnum - 32; // The FP registers are in the second chunk ++} ++ ++ ++const bool Matcher::isSimpleConstant64(jlong value) { ++ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. ++ return true; ++} ++ ++ ++// Return whether or not this register is ever used as an argument. This ++// function is used on startup to build the trampoline stubs in generateOptoStub. ++// Registers not mentioned will be killed by the VM call in the trampoline, and ++// arguments in those registers not be available to the callee. ++bool Matcher::can_be_java_arg( int reg ) { ++ // Refer to: [sharedRuntime_mips_64.cpp] SharedRuntime::java_calling_convention() ++ if ( reg == T0_num || reg == T0_H_num ++ || reg == A0_num || reg == A0_H_num ++ || reg == A1_num || reg == A1_H_num ++ || reg == A2_num || reg == A2_H_num ++ || reg == A3_num || reg == A3_H_num ++ || reg == A4_num || reg == A4_H_num ++ || reg == A5_num || reg == A5_H_num ++ || reg == A6_num || reg == A6_H_num ++ || reg == A7_num || reg == A7_H_num ) ++ return true; ++ ++ if ( reg == F12_num || reg == F12_H_num ++ || reg == F13_num || reg == F13_H_num ++ || reg == F14_num || reg == F14_H_num ++ || reg == F15_num || reg == F15_H_num ++ || reg == F16_num || reg == F16_H_num ++ || reg == F17_num || reg == F17_H_num ++ || reg == F18_num || reg == F18_H_num ++ || reg == F19_num || reg == F19_H_num ) ++ return true; ++ ++ return false; ++} ++ ++bool Matcher::is_spillable_arg( int reg ) { ++ return can_be_java_arg(reg); ++} ++ ++bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { ++ return false; ++} ++ ++// Register for MODL projection of divmodL ++RegMask Matcher::modL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++const RegMask Matcher::method_handle_invoke_SP_save_mask() { ++ return FP_REG_mask(); ++} ++ ++// MIPS doesn't support AES intrinsics ++const bool Matcher::pass_original_key_for_aes() { ++ return false; ++} ++ ++int CallStaticJavaDirectNode::compute_padding(int current_offset) const { ++ return round_to(current_offset, alignment_required()) - current_offset; ++} ++ ++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { ++ return round_to(current_offset, alignment_required()) - current_offset; ++} ++ ++int CallLeafNoFPDirectNode::compute_padding(int current_offset) const { ++ return round_to(current_offset, alignment_required()) - current_offset; ++} ++ ++int CallLeafDirectNode::compute_padding(int current_offset) const { ++ return round_to(current_offset, alignment_required()) - current_offset; ++} ++ ++int CallRuntimeDirectNode::compute_padding(int current_offset) const { ++ return round_to(current_offset, alignment_required()) - current_offset; ++} ++ ++// If CPU can load and store mis-aligned doubles directly then no fixup is ++// needed. Else we split the double into 2 integer pieces and move it ++// piece-by-piece. Only happens when passing doubles into C code as the ++// Java calling convention forces doubles to be aligned. ++const bool Matcher::misaligned_doubles_ok = false; ++// Do floats take an entire double register or just half? ++//const bool Matcher::float_in_double = true; ++bool Matcher::float_in_double() { return false; } ++// Threshold size for cleararray. ++const int Matcher::init_array_short_size = 8 * BytesPerLong; ++// Do ints take an entire long register or just half? ++const bool Matcher::int_in_long = true; ++// Is it better to copy float constants, or load them directly from memory? ++// Intel can load a float constant from a direct address, requiring no ++// extra registers. Most RISCs will have to materialize an address into a ++// register first, so they would do better to copy the constant from stack. ++const bool Matcher::rematerialize_float_constants = false; ++// Advertise here if the CPU requires explicit rounding operations ++// to implement the UseStrictFP mode. ++const bool Matcher::strict_fp_requires_explicit_rounding = false; ++// false => size gets scaled to BytesPerLong, ok. ++const bool Matcher::init_array_count_is_in_bytes = false; ++ ++// Indicate if the safepoint node needs the polling page as an input. ++// Since MIPS doesn't have absolute addressing, it needs. ++bool SafePointNode::needs_polling_address_input() { ++ return false; ++} ++ ++// !!!!! Special hack to get all type of calls to specify the byte offset ++// from the start of the call to the point where the return address ++// will point. ++int MachCallStaticJavaNode::ret_addr_offset() { ++ //lui ++ //ori ++ //nop ++ //nop ++ //jalr ++ //nop ++ return 24; ++} ++ ++int MachCallDynamicJavaNode::ret_addr_offset() { ++ //lui IC_Klass, ++ //ori IC_Klass, ++ //dsll IC_Klass ++ //ori IC_Klass ++ ++ //lui T9 ++ //ori T9 ++ //nop ++ //nop ++ //jalr T9 ++ //nop ++ return 4 * 4 + 4 * 6; ++} ++ ++//============================================================================= ++ ++// Figure out which register class each belongs in: rc_int, rc_float, rc_stack ++enum RC { rc_bad, rc_int, rc_float, rc_stack }; ++static enum RC rc_class( OptoReg::Name reg ) { ++ if( !OptoReg::is_valid(reg) ) return rc_bad; ++ if (OptoReg::is_stack(reg)) return rc_stack; ++ VMReg r = OptoReg::as_VMReg(reg); ++ if (r->is_Register()) return rc_int; ++ assert(r->is_FloatRegister(), "must be"); ++ return rc_float; ++} ++ ++uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { ++ // Get registers to move ++ OptoReg::Name src_second = ra_->get_reg_second(in(1)); ++ OptoReg::Name src_first = ra_->get_reg_first(in(1)); ++ OptoReg::Name dst_second = ra_->get_reg_second(this ); ++ OptoReg::Name dst_first = ra_->get_reg_first(this ); ++ ++ enum RC src_second_rc = rc_class(src_second); ++ enum RC src_first_rc = rc_class(src_first); ++ enum RC dst_second_rc = rc_class(dst_second); ++ enum RC dst_first_rc = rc_class(dst_first); ++ ++ assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); ++ ++ // Generate spill code! ++ int size = 0; ++ ++ if( src_first == dst_first && src_second == dst_second ) ++ return 0; // Self copy, no move ++ ++ if (src_first_rc == rc_stack) { ++ // mem -> ++ if (dst_first_rc == rc_stack) { ++ // mem -> mem ++ assert(src_second != dst_first, "overlap"); ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ld(AT, Address(SP, src_offset)); ++ __ sd(AT, Address(SP, dst_offset)); ++#ifndef PRODUCT ++ } else { ++ if(!do_size){ ++ if (size != 0) st->print("\n\t"); ++ st->print("ld AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t" ++ "sd AT, [SP + #%d]", ++ src_offset, dst_offset); ++ } ++#endif ++ } ++ size += 8; ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ // No pushl/popl, so: ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ lw(AT, Address(SP, src_offset)); ++ __ sw(AT, Address(SP, dst_offset)); ++#ifndef PRODUCT ++ } else { ++ if(!do_size){ ++ if (size != 0) st->print("\n\t"); ++ st->print("lw AT, [SP + #%d] spill 2\n\t" ++ "sw AT, [SP + #%d]\n\t", ++ src_offset, dst_offset); ++ } ++#endif ++ } ++ size += 8; ++ } ++ return size; ++ } else if (dst_first_rc == rc_int) { ++ // mem -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ld(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ if(!do_size){ ++ if (size != 0) st->print("\n\t"); ++ st->print("ld %s, [SP + #%d]\t# spill 3", ++ Matcher::regName[dst_first], ++ offset); ++ } ++#endif ++ } ++ size += 4; ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ if (this->ideal_reg() == Op_RegI) ++ __ lw(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++ else ++ __ lwu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ if(!do_size){ ++ if (size != 0) st->print("\n\t"); ++ if (this->ideal_reg() == Op_RegI) ++ st->print("lw %s, [SP + #%d]\t# spill 4", ++ Matcher::regName[dst_first], ++ offset); ++ else ++ st->print("lwu %s, [SP + #%d]\t# spill 5", ++ Matcher::regName[dst_first], ++ offset); ++ } ++#endif ++ } ++ size += 4; ++ } ++ return size; ++ } else if (dst_first_rc == rc_float) { ++ // mem-> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ldc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ if (!do_size) { ++ if (size != 0) st->print("\n\t"); ++ st->print("ldc1 %s, [SP + #%d]\t# spill 6", ++ Matcher::regName[dst_first], ++ offset); ++ } ++#endif ++ } ++ size += 4; ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ lwc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ if(!do_size){ ++ if (size != 0) st->print("\n\t"); ++ st->print("lwc1 %s, [SP + #%d]\t# spill 7", ++ Matcher::regName[dst_first], ++ offset); ++ } ++#endif ++ } ++ size += 4; ++ } ++ return size; ++ } ++ } else if (src_first_rc == rc_int) { ++ // gpr -> ++ if (dst_first_rc == rc_stack) { ++ // gpr -> mem ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ sd(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ if(!do_size){ ++ if (size != 0) st->print("\n\t"); ++ st->print("sd %s, [SP + #%d] # spill 8", ++ Matcher::regName[src_first], ++ offset); ++ } ++#endif ++ } ++ size += 4; ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ sw(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ if (!do_size) { ++ if (size != 0) st->print("\n\t"); ++ st->print("sw %s, [SP + #%d]\t# spill 9", ++ Matcher::regName[src_first], offset); ++ } ++#endif ++ } ++ size += 4; ++ } ++ return size; ++ } else if (dst_first_rc == rc_int) { ++ // gpr -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ move(as_Register(Matcher::_regEncode[dst_first]), ++ as_Register(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ if(!do_size){ ++ if (size != 0) st->print("\n\t"); ++ st->print("move(64bit) %s <-- %s\t# spill 10", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++ } ++#endif ++ } ++ size += 4; ++ return size; ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ if (this->ideal_reg() == Op_RegI) ++ __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); ++ else ++ __ daddu(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0); ++#ifndef PRODUCT ++ } else { ++ if (!do_size) { ++ if (size != 0) st->print("\n\t"); ++ st->print("move(32-bit) %s <-- %s\t# spill 11", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++ } ++#endif ++ } ++ size += 4; ++ return size; ++ } ++ } else if (dst_first_rc == rc_float) { ++ // gpr -> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ dmtc1(as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first])); ++#ifndef PRODUCT ++ } else { ++ if(!do_size){ ++ if (size != 0) st->print("\n\t"); ++ st->print("dmtc1 %s, %s\t# spill 12", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++ } ++#endif ++ } ++ size += 4; ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ mtc1( as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]) ); ++#ifndef PRODUCT ++ } else { ++ if(!do_size){ ++ if (size != 0) st->print("\n\t"); ++ st->print("mtc1 %s, %s\t# spill 13", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++ } ++#endif ++ } ++ size += 4; ++ } ++ return size; ++ } ++ } else if (src_first_rc == rc_float) { ++ // xmm -> ++ if (dst_first_rc == rc_stack) { ++ // xmm -> mem ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ sdc1( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) ); ++#ifndef PRODUCT ++ } else { ++ if(!do_size){ ++ if (size != 0) st->print("\n\t"); ++ st->print("sdc1 %s, [SP + #%d]\t# spill 14", ++ Matcher::regName[src_first], ++ offset); ++ } ++#endif ++ } ++ size += 4; ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ swc1(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ if(!do_size){ ++ if (size != 0) st->print("\n\t"); ++ st->print("swc1 %s, [SP + #%d]\t# spill 15", ++ Matcher::regName[src_first], ++ offset); ++ } ++#endif ++ } ++ size += 4; ++ } ++ return size; ++ } else if (dst_first_rc == rc_int) { ++ // xmm -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ dmfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ if(!do_size){ ++ if (size != 0) st->print("\n\t"); ++ st->print("dmfc1 %s, %s\t# spill 16", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++ } ++#endif ++ } ++ size += 4; ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ mfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ if(!do_size){ ++ if (size != 0) st->print("\n\t"); ++ st->print("mfc1 %s, %s\t# spill 17", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++ } ++#endif ++ } ++ size += 4; ++ } ++ return size; ++ } else if (dst_first_rc == rc_float) { ++ // xmm -> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ mov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ if(!do_size){ ++ if (size != 0) st->print("\n\t"); ++ st->print("mov_d %s <-- %s\t# spill 18", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++ } ++#endif ++ } ++ size += 4; ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ mov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ if(!do_size){ ++ if (size != 0) st->print("\n\t"); ++ st->print("mov_s %s <-- %s\t# spill 19", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++ } ++#endif ++ } ++ size += 4; ++ } ++ return size; ++ } ++ } ++ ++ assert(0," foo "); ++ Unimplemented(); ++ return size; ++ ++} ++ ++#ifndef PRODUCT ++void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ implementation( NULL, ra_, false, st ); ++} ++#endif ++ ++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ implementation( &cbuf, ra_, false, NULL ); ++} ++ ++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++//============================================================================= ++# ++ ++#ifndef PRODUCT ++void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const { ++ st->print("BRK"); ++} ++#endif ++ ++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { ++ MacroAssembler _masm(&cbuf); ++ __ brk(5); ++} ++ ++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { ++ return MachNode::size(ra_); ++} ++ ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ Compile *C = ra_->C; ++ int framesize = C->frame_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ st->print_cr("daddiu SP, SP, %d # Rlease stack @ MachEpilogNode", framesize); ++ st->print("\t"); ++ if (UseLEXT1) { ++ st->print_cr("gslq RA, FP, SP, %d # Restore FP & RA @ MachEpilogNode", -wordSize*2); ++ } else { ++ st->print_cr("ld RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize); ++ st->print("\t"); ++ st->print_cr("ld FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2); ++ } ++ ++ if( do_polling() && C->is_method_compilation() ) { ++ st->print("\t"); ++ st->print_cr("Poll Safepoint # MachEpilogNode"); ++ } ++} ++#endif ++ ++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ Compile *C = ra_->C; ++ MacroAssembler _masm(&cbuf); ++ int framesize = C->frame_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ if (UseLEXT1) { ++ __ gslq(RA, FP, SP, framesize - wordSize * 2); ++ } else { ++ __ ld(RA, SP, framesize - wordSize ); ++ __ ld(FP, SP, framesize - wordSize * 2); ++ } ++ __ daddiu(SP, SP, framesize); ++ ++ if( do_polling() && C->is_method_compilation() ) { ++ __ set64(AT, (long)os::get_polling_page()); ++ __ relocate(relocInfo::poll_return_type); ++ __ lw(AT, AT, 0); ++ } ++} ++ ++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); // too many variables; just compute it the hard way fujie debug ++} ++ ++int MachEpilogNode::reloc() const { ++ return 0; // a large enough number ++} ++ ++const Pipeline * MachEpilogNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} ++ ++int MachEpilogNode::safepoint_offset() const { return 0; } ++ ++//============================================================================= ++ ++#ifndef PRODUCT ++void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_reg_first(this); ++ st->print("ADDI %s, SP, %d @BoxLockNode",Matcher::regName[reg],offset); ++} ++#endif ++ ++ ++uint BoxLockNode::size(PhaseRegAlloc *ra_) const { ++ return 4; ++} ++ ++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ MacroAssembler _masm(&cbuf); ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_encode(this); ++ ++ __ addiu(as_Register(reg), SP, offset); ++} ++ ++ ++//static int sizeof_FFree_Float_Stack_All = -1; ++ ++int MachCallRuntimeNode::ret_addr_offset() { ++ //lui ++ //ori ++ //dsll ++ //ori ++ //jalr ++ //nop ++ assert(NativeCall::instruction_size == 24, "in MachCallRuntimeNode::ret_addr_offset()"); ++ return NativeCall::instruction_size; ++} ++ ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const { ++ st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count); ++} ++#endif ++ ++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { ++ MacroAssembler _masm(&cbuf); ++ int i = 0; ++ for(i = 0; i < _count; i++) ++ __ nop(); ++} ++ ++uint MachNopNode::size(PhaseRegAlloc *) const { ++ return 4 * _count; ++} ++const Pipeline* MachNopNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} ++ ++//============================================================================= ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ st->print_cr("load_klass(T9, T0)"); ++ st->print_cr("\tbeq(T9, iCache, L)"); ++ st->print_cr("\tnop"); ++ st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)"); ++ st->print_cr("\tnop"); ++ st->print_cr("\tnop"); ++ st->print_cr(" L:"); ++} ++#endif ++ ++ ++void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ MacroAssembler _masm(&cbuf); ++ int ic_reg = Matcher::inline_cache_reg_encode(); ++ Label L; ++ Register receiver = T0; ++ Register iCache = as_Register(ic_reg); ++ ++ __ load_klass(T9, receiver); ++ __ beq(T9, iCache, L); ++ __ delayed()->nop(); ++ __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ bind(L); ++} ++ ++uint MachUEPNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++ ++ ++//============================================================================= ++ ++const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask(); ++ ++int Compile::ConstantTable::calculate_table_base_offset() const { ++ return 0; // absolute addressing, no offset ++} ++ ++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } ++void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { ++ ShouldNotReachHere(); ++} ++ ++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { ++ Compile* C = ra_->C; ++ Compile::ConstantTable& constant_table = C->constant_table(); ++ MacroAssembler _masm(&cbuf); ++ ++ Register Rtoc = as_Register(ra_->get_encode(this)); ++ CodeSection* consts_section = __ code()->consts(); ++ int consts_size = consts_section->align_at_start(consts_section->size()); ++ assert(constant_table.size() == consts_size, "must be equal"); ++ ++ if (consts_section->size()) { ++ // Materialize the constant table base. ++ address baseaddr = consts_section->start() + -(constant_table.table_base_offset()); ++ // RelocationHolder rspec = internal_word_Relocation::spec(baseaddr); ++ __ relocate(relocInfo::internal_word_type); ++ __ patchable_set48(Rtoc, (long)baseaddr); ++ } ++} ++ ++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { ++ // patchable_set48 (4 insts) ++ return 4 * 4; ++} ++ ++#ifndef PRODUCT ++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { ++ Register r = as_Register(ra_->get_encode(this)); ++ st->print("patchable_set48 %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name()); ++} ++#endif ++ ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ Compile* C = ra_->C; ++ ++ int framesize = C->frame_size_in_bytes(); ++ int bangsize = C->bang_size_in_bytes(); ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ // Calls to C2R adapters often do not accept exceptional returns. ++ // We require that their callers must bang for them. But be careful, because ++ // some VM calls (such as call site linkage) can use several kilobytes of ++ // stack. But the stack safety zone should account for that. ++ // See bugs 4446381, 4468289, 4497237. ++ if (C->need_stack_bang(bangsize)) { ++ st->print_cr("# stack bang"); st->print("\t"); ++ } ++ if (UseLEXT1) { ++ st->print("gssq RA, FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); ++ } else { ++ st->print("sd RA, %d(SP) @ MachPrologNode\n\t", -wordSize); ++ st->print("sd FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); ++ } ++ st->print("daddiu FP, SP, -%d \n\t", wordSize*2); ++ st->print("daddiu SP, SP, -%d \t",framesize); ++} ++#endif ++ ++ ++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ Compile* C = ra_->C; ++ MacroAssembler _masm(&cbuf); ++ ++ int framesize = C->frame_size_in_bytes(); ++ int bangsize = C->bang_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ // Make enough room for patch_verified_entry ++ __ nop(); ++ __ nop(); ++ ++ if (C->need_stack_bang(bangsize)) { ++ __ generate_stack_overflow_check(bangsize); ++ } ++ ++ __ daddiu(SP, SP, -framesize); ++ if (UseLEXT1) { ++ __ gssq(RA, FP, SP, framesize - wordSize * 2); ++ } else { ++ __ sd(RA, SP, framesize - wordSize); ++ __ sd(FP, SP, framesize - wordSize * 2); ++ } ++ __ daddiu(FP, SP, framesize - wordSize * 2); ++ ++ C->set_frame_complete(cbuf.insts_size()); ++ if (C->has_mach_constant_base_node()) { ++ // NOTE: We set the table base offset here because users might be ++ // emitted before MachConstantBaseNode. ++ Compile::ConstantTable& constant_table = C->constant_table(); ++ constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); ++ } ++} ++ ++ ++uint MachPrologNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); // too many variables; just compute it the hard way ++} ++ ++int MachPrologNode::reloc() const { ++ return 0; // a large enough number ++} ++ ++%} ++ ++//----------ENCODING BLOCK----------------------------------------------------- ++// This block specifies the encoding classes used by the compiler to output ++// byte streams. Encoding classes generate functions which are called by ++// Machine Instruction Nodes in order to generate the bit encoding of the ++// instruction. Operands specify their base encoding interface with the ++// interface keyword. There are currently supported four interfaces, ++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an ++// operand to generate a function which returns its register number when ++// queried. CONST_INTER causes an operand to generate a function which ++// returns the value of the constant when queried. MEMORY_INTER causes an ++// operand to generate four functions which return the Base Register, the ++// Index Register, the Scale Value, and the Offset Value of the operand when ++// queried. COND_INTER causes an operand to generate six functions which ++// return the encoding code (ie - encoding bits for the instruction) ++// associated with each basic boolean condition for a conditional instruction. ++// Instructions specify two basic values for encoding. They use the ++// ins_encode keyword to specify their encoding class (which must be one of ++// the class names specified in the encoding block), and they use the ++// opcode keyword to specify, in order, their primary, secondary, and ++// tertiary opcode. Only the opcode sections which a particular instruction ++// needs for encoding need to be specified. ++encode %{ ++ ++ //Load byte signed ++ enc_class load_B_enc (mRegI dst, memory mem) %{ ++ MacroAssembler _masm(&cbuf); ++ int dst = $dst$$reg; ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if( index != 0 ) { ++ if( Assembler::is_simm16(disp) ) { ++ if (UseLEXT1) { ++ if (scale == 0) { ++ __ gslbx(as_Register(dst), as_Register(base), as_Register(index), disp); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ gslbx(as_Register(dst), as_Register(base), AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ addu(AT, as_Register(base), AT); ++ } ++ __ lb(as_Register(dst), AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ addu(AT, as_Register(base), AT); ++ } ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gslbx(as_Register(dst), AT, T9, 0); ++ } else { ++ __ addu(AT, AT, T9); ++ __ lb(as_Register(dst), AT, 0); ++ } ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ lb(as_Register(dst), as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gslbx(as_Register(dst), as_Register(base), T9, 0); ++ } else { ++ __ addu(AT, as_Register(base), T9); ++ __ lb(as_Register(dst), AT, 0); ++ } ++ } ++ } ++ %} ++ ++ //Load byte unsigned ++ enc_class load_UB_enc (mRegI dst, memory mem) %{ ++ MacroAssembler _masm(&cbuf); ++ int dst = $dst$$reg; ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if( index != 0 ) { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ if( Assembler::is_simm16(disp) ) { ++ __ lbu(as_Register(dst), AT, disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ lbu(as_Register(dst), AT, 0); ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ lbu(as_Register(dst), as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(base), T9); ++ __ lbu(as_Register(dst), AT, 0); ++ } ++ } ++ %} ++ ++ enc_class store_B_reg_enc (memory mem, mRegI src) %{ ++ MacroAssembler _masm(&cbuf); ++ int src = $src$$reg; ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if( index != 0 ) { ++ if (scale == 0) { ++ if( Assembler::is_simm(disp, 8) ) { ++ if (UseLEXT1) { ++ __ gssbx(as_Register(src), as_Register(base), as_Register(index), disp); ++ } else { ++ __ addu(AT, as_Register(base), as_Register(index)); ++ __ sb(as_Register(src), AT, disp); ++ } ++ } else if( Assembler::is_simm16(disp) ) { ++ __ addu(AT, as_Register(base), as_Register(index)); ++ __ sb(as_Register(src), AT, disp); ++ } else { ++ __ addu(AT, as_Register(base), as_Register(index)); ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gssbx(as_Register(src), AT, T9, 0); ++ } else { ++ __ addu(AT, AT, T9); ++ __ sb(as_Register(src), AT, 0); ++ } ++ } ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ if( Assembler::is_simm(disp, 8) ) { ++ if (UseLEXT1) { ++ __ gssbx(as_Register(src), AT, as_Register(base), disp); ++ } else { ++ __ addu(AT, as_Register(base), AT); ++ __ sb(as_Register(src), AT, disp); ++ } ++ } else if( Assembler::is_simm16(disp) ) { ++ __ addu(AT, as_Register(base), AT); ++ __ sb(as_Register(src), AT, disp); ++ } else { ++ __ addu(AT, as_Register(base), AT); ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gssbx(as_Register(src), AT, T9, 0); ++ } else { ++ __ addu(AT, AT, T9); ++ __ sb(as_Register(src), AT, 0); ++ } ++ } ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ sb(as_Register(src), as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gssbx(as_Register(src), as_Register(base), T9, 0); ++ } else { ++ __ addu(AT, as_Register(base), T9); ++ __ sb(as_Register(src), AT, 0); ++ } ++ } ++ } ++ %} ++ ++ enc_class store_B_immI_enc (memory mem, immI8 src) %{ ++ MacroAssembler _masm(&cbuf); ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ int value = $src$$constant; ++ ++ if( index != 0 ) { ++ if (!UseLEXT1) { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ if( Assembler::is_simm16(disp) ) { ++ if (value == 0) { ++ __ sb(R0, AT, disp); ++ } else { ++ __ move(T9, value); ++ __ sb(T9, AT, disp); ++ } ++ } else { ++ if (value == 0) { ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ sb(R0, AT, 0); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ move(T9, value); ++ __ sb(T9, AT, 0); ++ } ++ } ++ } else { ++ ++ if (scale == 0) { ++ if( Assembler::is_simm(disp, 8) ) { ++ if (value == 0) { ++ __ gssbx(R0, as_Register(base), as_Register(index), disp); ++ } else { ++ __ move(T9, value); ++ __ gssbx(T9, as_Register(base), as_Register(index), disp); ++ } ++ } else if( Assembler::is_simm16(disp) ) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ if (value == 0) { ++ __ sb(R0, AT, disp); ++ } else { ++ __ move(T9, value); ++ __ sb(T9, AT, disp); ++ } ++ } else { ++ if (value == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ __ move(T9, disp); ++ __ gssbx(R0, AT, T9, 0); ++ } else { ++ __ move(AT, disp); ++ __ move(T9, value); ++ __ daddu(AT, as_Register(base), AT); ++ __ gssbx(T9, AT, as_Register(index), 0); ++ } ++ } ++ ++ } else { ++ ++ if( Assembler::is_simm(disp, 8) ) { ++ __ dsll(AT, as_Register(index), scale); ++ if (value == 0) { ++ __ gssbx(R0, as_Register(base), AT, disp); ++ } else { ++ __ move(T9, value); ++ __ gssbx(T9, as_Register(base), AT, disp); ++ } ++ } else if( Assembler::is_simm16(disp) ) { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ if (value == 0) { ++ __ sb(R0, AT, disp); ++ } else { ++ __ move(T9, value); ++ __ sb(T9, AT, disp); ++ } ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ if (value == 0) { ++ __ daddu(AT, as_Register(base), AT); ++ __ move(T9, disp); ++ __ gssbx(R0, AT, T9, 0); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ move(T9, value); ++ __ gssbx(T9, as_Register(base), AT, 0); ++ } ++ } ++ } ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ if (value == 0) { ++ __ sb(R0, as_Register(base), disp); ++ } else { ++ __ move(AT, value); ++ __ sb(AT, as_Register(base), disp); ++ } ++ } else { ++ if (value == 0) { ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gssbx(R0, as_Register(base), T9, 0); ++ } else { ++ __ daddu(AT, as_Register(base), T9); ++ __ sb(R0, AT, 0); ++ } ++ } else { ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ move(AT, value); ++ __ gssbx(AT, as_Register(base), T9, 0); ++ } else { ++ __ daddu(AT, as_Register(base), T9); ++ __ move(T9, value); ++ __ sb(T9, AT, 0); ++ } ++ } ++ } ++ } ++ %} ++ ++ ++ enc_class store_B_immI_enc_sync (memory mem, immI8 src) %{ ++ MacroAssembler _masm(&cbuf); ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ int value = $src$$constant; ++ ++ if( index != 0 ) { ++ if (UseLEXT1) { ++ if ( Assembler::is_simm(disp,8) ) { ++ if ( scale == 0 ) { ++ if ( value == 0 ) { ++ __ gssbx(R0, as_Register(base), as_Register(index), disp); ++ } else { ++ __ move(AT, value); ++ __ gssbx(AT, as_Register(base), as_Register(index), disp); ++ } ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ if ( value == 0 ) { ++ __ gssbx(R0, as_Register(base), AT, disp); ++ } else { ++ __ move(T9, value); ++ __ gssbx(T9, as_Register(base), AT, disp); ++ } ++ } ++ } else if ( Assembler::is_simm16(disp) ) { ++ if ( scale == 0 ) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ if ( value == 0 ){ ++ __ sb(R0, AT, disp); ++ } else { ++ __ move(T9, value); ++ __ sb(T9, AT, disp); ++ } ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ if ( value == 0 ) { ++ __ sb(R0, AT, disp); ++ } else { ++ __ move(T9, value); ++ __ sb(T9, AT, disp); ++ } ++ } ++ } else { ++ if ( scale == 0 ) { ++ __ move(AT, disp); ++ __ daddu(AT, as_Register(index), AT); ++ if ( value == 0 ) { ++ __ gssbx(R0, as_Register(base), AT, 0); ++ } else { ++ __ move(T9, value); ++ __ gssbx(T9, as_Register(base), AT, 0); ++ } ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ if ( value == 0 ) { ++ __ gssbx(R0, as_Register(base), AT, 0); ++ } else { ++ __ move(T9, value); ++ __ gssbx(T9, as_Register(base), AT, 0); ++ } ++ } ++ } ++ } else { //not use loongson isa ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ if( Assembler::is_simm16(disp) ) { ++ if (value == 0) { ++ __ sb(R0, AT, disp); ++ } else { ++ __ move(T9, value); ++ __ sb(T9, AT, disp); ++ } ++ } else { ++ if (value == 0) { ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ sb(R0, AT, 0); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ move(T9, value); ++ __ sb(T9, AT, 0); ++ } ++ } ++ } ++ } else { ++ if (UseLEXT1){ ++ if ( Assembler::is_simm16(disp) ){ ++ if ( value == 0 ) { ++ __ sb(R0, as_Register(base), disp); ++ } else { ++ __ move(AT, value); ++ __ sb(AT, as_Register(base), disp); ++ } ++ } else { ++ __ move(AT, disp); ++ if ( value == 0 ) { ++ __ gssbx(R0, as_Register(base), AT, 0); ++ } else { ++ __ move(T9, value); ++ __ gssbx(T9, as_Register(base), AT, 0); ++ } ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ if (value == 0) { ++ __ sb(R0, as_Register(base), disp); ++ } else { ++ __ move(AT, value); ++ __ sb(AT, as_Register(base), disp); ++ } ++ } else { ++ if (value == 0) { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(base), T9); ++ __ sb(R0, AT, 0); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(base), T9); ++ __ move(T9, value); ++ __ sb(T9, AT, 0); ++ } ++ } ++ } ++ } ++ ++ __ sync(); ++ %} ++ ++ // Load Short (16bit signed) ++ enc_class load_S_enc (mRegI dst, memory mem) %{ ++ MacroAssembler _masm(&cbuf); ++ int dst = $dst$$reg; ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if( index != 0 ) { ++ if (UseLEXT1) { ++ if ( Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ __ gslhx(as_Register(dst), as_Register(base), as_Register(index), disp); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ gslhx(as_Register(dst), as_Register(base), AT, disp); ++ } ++ } else if ( Assembler::is_simm16(disp) ) { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ __ lh(as_Register(dst), AT, disp); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ __ lh(as_Register(dst), AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ move(AT, disp); ++ __ daddu(AT, as_Register(index), AT); ++ __ gslhx(as_Register(dst), as_Register(base), AT, 0); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ gslhx(as_Register(dst), as_Register(base), AT, 0); ++ } ++ } ++ } else { // not use loongson isa ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ if( Assembler::is_simm16(disp) ) { ++ __ lh(as_Register(dst), AT, disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ lh(as_Register(dst), AT, 0); ++ } ++ } ++ } else { // index is 0 ++ if (UseLEXT1) { ++ if ( Assembler::is_simm16(disp) ) { ++ __ lh(as_Register(dst), as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ __ gslhx(as_Register(dst), as_Register(base), T9, 0); ++ } ++ } else { //not use loongson isa ++ if( Assembler::is_simm16(disp) ) { ++ __ lh(as_Register(dst), as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(base), T9); ++ __ lh(as_Register(dst), AT, 0); ++ } ++ } ++ } ++ %} ++ ++ // Load Char (16bit unsigned) ++ enc_class load_C_enc (mRegI dst, memory mem) %{ ++ MacroAssembler _masm(&cbuf); ++ int dst = $dst$$reg; ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if( index != 0 ) { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ if( Assembler::is_simm16(disp) ) { ++ __ lhu(as_Register(dst), AT, disp); ++ } else { ++ __ move(T9, disp); ++ __ addu(AT, AT, T9); ++ __ lhu(as_Register(dst), AT, 0); ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ lhu(as_Register(dst), as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(base), T9); ++ __ lhu(as_Register(dst), AT, 0); ++ } ++ } ++ %} ++ ++ // Store Char (16bit unsigned) ++ enc_class store_C_reg_enc (memory mem, mRegI src) %{ ++ MacroAssembler _masm(&cbuf); ++ int src = $src$$reg; ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if( index != 0 ) { ++ if( Assembler::is_simm16(disp) ) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ __ gsshx(as_Register(src), as_Register(base), as_Register(index), disp); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ gsshx(as_Register(src), as_Register(base), AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ addu(AT, as_Register(base), AT); ++ } ++ __ sh(as_Register(src), AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ addu(AT, as_Register(base), AT); ++ } ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gsshx(as_Register(src), AT, T9, 0); ++ } else { ++ __ addu(AT, AT, T9); ++ __ sh(as_Register(src), AT, 0); ++ } ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ sh(as_Register(src), as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gsshx(as_Register(src), as_Register(base), T9, 0); ++ } else { ++ __ addu(AT, as_Register(base), T9); ++ __ sh(as_Register(src), AT, 0); ++ } ++ } ++ } ++ %} ++ ++ enc_class store_C0_enc (memory mem) %{ ++ MacroAssembler _masm(&cbuf); ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if( index != 0 ) { ++ if ( Assembler::is_simm16(disp) ) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ __ gsshx(R0, as_Register(base), as_Register(index), disp); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ gsshx(R0, as_Register(base), AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ addu(AT, as_Register(base), AT); ++ } ++ __ sh(R0, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ addu(AT, as_Register(base), AT); ++ } ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gsshx(R0, AT, T9, 0); ++ } else { ++ __ addu(AT, AT, T9); ++ __ sh(R0, AT, 0); ++ } ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ sh(R0, as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gsshx(R0, as_Register(base), T9, 0); ++ } else { ++ __ addu(AT, as_Register(base), T9); ++ __ sh(R0, AT, 0); ++ } ++ } ++ } ++ %} ++ ++ enc_class load_I_enc (mRegI dst, memory mem) %{ ++ MacroAssembler _masm(&cbuf); ++ int dst = $dst$$reg; ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if( index != 0 ) { ++ if( Assembler::is_simm16(disp) ) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ __ gslwx(as_Register(dst), as_Register(base), as_Register(index), disp); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ gslwx(as_Register(dst), as_Register(base), AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ addu(AT, as_Register(base), AT); ++ } ++ __ lw(as_Register(dst), AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ addu(AT, as_Register(base), AT); ++ } ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gslwx(as_Register(dst), AT, T9, 0); ++ } else { ++ __ addu(AT, AT, T9); ++ __ lw(as_Register(dst), AT, 0); ++ } ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ lw(as_Register(dst), as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gslwx(as_Register(dst), as_Register(base), T9, 0); ++ } else { ++ __ addu(AT, as_Register(base), T9); ++ __ lw(as_Register(dst), AT, 0); ++ } ++ } ++ } ++ %} ++ ++ enc_class store_I_reg_enc (memory mem, mRegI src) %{ ++ MacroAssembler _masm(&cbuf); ++ int src = $src$$reg; ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if( index != 0 ) { ++ if( Assembler::is_simm16(disp) ) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ __ gsswx(as_Register(src), as_Register(base), as_Register(index), disp); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ gsswx(as_Register(src), as_Register(base), AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ addu(AT, as_Register(base), AT); ++ } ++ __ sw(as_Register(src), AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ addu(AT, as_Register(base), AT); ++ } ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gsswx(as_Register(src), AT, T9, 0); ++ } else { ++ __ addu(AT, AT, T9); ++ __ sw(as_Register(src), AT, 0); ++ } ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ sw(as_Register(src), as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gsswx(as_Register(src), as_Register(base), T9, 0); ++ } else { ++ __ addu(AT, as_Register(base), T9); ++ __ sw(as_Register(src), AT, 0); ++ } ++ } ++ } ++ %} ++ ++ enc_class store_I_immI_enc (memory mem, immI src) %{ ++ MacroAssembler _masm(&cbuf); ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ int value = $src$$constant; ++ ++ if( index != 0 ) { ++ if (UseLEXT1) { ++ if ( Assembler::is_simm(disp, 8) ) { ++ if ( scale == 0 ) { ++ if ( value == 0 ) { ++ __ gsswx(R0, as_Register(base), as_Register(index), disp); ++ } else { ++ __ move(T9, value); ++ __ gsswx(T9, as_Register(base), as_Register(index), disp); ++ } ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ if ( value == 0 ) { ++ __ gsswx(R0, as_Register(base), AT, disp); ++ } else { ++ __ move(T9, value); ++ __ gsswx(T9, as_Register(base), AT, disp); ++ } ++ } ++ } else if ( Assembler::is_simm16(disp) ) { ++ if ( scale == 0 ) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ if ( value == 0 ) { ++ __ sw(R0, AT, disp); ++ } else { ++ __ move(T9, value); ++ __ sw(T9, AT, disp); ++ } ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ if ( value == 0 ) { ++ __ sw(R0, AT, disp); ++ } else { ++ __ move(T9, value); ++ __ sw(T9, AT, disp); ++ } ++ } ++ } else { ++ if ( scale == 0 ) { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(index), T9); ++ if ( value ==0 ) { ++ __ gsswx(R0, as_Register(base), AT, 0); ++ } else { ++ __ move(T9, value); ++ __ gsswx(T9, as_Register(base), AT, 0); ++ } ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ if ( value == 0 ) { ++ __ gsswx(R0, as_Register(base), AT, 0); ++ } else { ++ __ move(T9, value); ++ __ gsswx(T9, as_Register(base), AT, 0); ++ } ++ } ++ } ++ } else { //not use loongson isa ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ if( Assembler::is_simm16(disp) ) { ++ if (value == 0) { ++ __ sw(R0, AT, disp); ++ } else { ++ __ move(T9, value); ++ __ sw(T9, AT, disp); ++ } ++ } else { ++ if (value == 0) { ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ sw(R0, AT, 0); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ move(T9, value); ++ __ sw(T9, AT, 0); ++ } ++ } ++ } ++ } else { ++ if (UseLEXT1) { ++ if ( Assembler::is_simm16(disp) ) { ++ if ( value == 0 ) { ++ __ sw(R0, as_Register(base), disp); ++ } else { ++ __ move(AT, value); ++ __ sw(AT, as_Register(base), disp); ++ } ++ } else { ++ __ move(T9, disp); ++ if ( value == 0 ) { ++ __ gsswx(R0, as_Register(base), T9, 0); ++ } else { ++ __ move(AT, value); ++ __ gsswx(AT, as_Register(base), T9, 0); ++ } ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ if (value == 0) { ++ __ sw(R0, as_Register(base), disp); ++ } else { ++ __ move(AT, value); ++ __ sw(AT, as_Register(base), disp); ++ } ++ } else { ++ if (value == 0) { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(base), T9); ++ __ sw(R0, AT, 0); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(base), T9); ++ __ move(T9, value); ++ __ sw(T9, AT, 0); ++ } ++ } ++ } ++ } ++ %} ++ ++ enc_class load_N_enc (mRegN dst, memory mem) %{ ++ MacroAssembler _masm(&cbuf); ++ int dst = $dst$$reg; ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ ++ if( index != 0 ) { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ if( Assembler::is_simm16(disp) ) { ++ __ lwu(as_Register(dst), AT, disp); ++ } else { ++ __ set64(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ lwu(as_Register(dst), AT, 0); ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ lwu(as_Register(dst), as_Register(base), disp); ++ } else { ++ __ set64(T9, disp); ++ __ daddu(AT, as_Register(base), T9); ++ __ lwu(as_Register(dst), AT, 0); ++ } ++ } ++ %} ++ ++ ++ enc_class load_P_enc (mRegP dst, memory mem) %{ ++ MacroAssembler _masm(&cbuf); ++ int dst = $dst$$reg; ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ ++ if( index != 0 ) { ++ if (UseLEXT1) { ++ if ( Assembler::is_simm(disp, 8) ) { ++ if ( scale != 0 ) { ++ __ dsll(AT, as_Register(index), scale); ++ __ gsldx(as_Register(dst), as_Register(base), AT, disp); ++ } else { ++ __ gsldx(as_Register(dst), as_Register(base), as_Register(index), disp); ++ } ++ } else if ( Assembler::is_simm16(disp) ){ ++ if ( scale != 0 ) { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, AT, as_Register(base)); ++ } else { ++ __ daddu(AT, as_Register(index), as_Register(base)); ++ } ++ __ ld(as_Register(dst), AT, disp); ++ } else { ++ if ( scale != 0 ) { ++ __ dsll(AT, as_Register(index), scale); ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(index), T9); ++ } ++ __ gsldx(as_Register(dst), as_Register(base), AT, 0); ++ } ++ } else { //not use loongson isa ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ if( Assembler::is_simm16(disp) ) { ++ __ ld(as_Register(dst), AT, disp); ++ } else { ++ __ set64(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ ld(as_Register(dst), AT, 0); ++ } ++ } ++ } else { ++ if (UseLEXT1) { ++ if ( Assembler::is_simm16(disp) ){ ++ __ ld(as_Register(dst), as_Register(base), disp); ++ } else { ++ __ set64(T9, disp); ++ __ gsldx(as_Register(dst), as_Register(base), T9, 0); ++ } ++ } else { //not use loongson isa ++ if( Assembler::is_simm16(disp) ) { ++ __ ld(as_Register(dst), as_Register(base), disp); ++ } else { ++ __ set64(T9, disp); ++ __ daddu(AT, as_Register(base), T9); ++ __ ld(as_Register(dst), AT, 0); ++ } ++ } ++ } ++ %} ++ ++ // Load acquire. ++ // load_P_enc + sync ++ enc_class load_P_enc_ac (mRegP dst, memory mem) %{ ++ MacroAssembler _masm(&cbuf); ++ int dst = $dst$$reg; ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ ++ if( index != 0 ) { ++ if (UseLEXT1) { ++ if ( Assembler::is_simm(disp, 8) ) { ++ if ( scale != 0 ) { ++ __ dsll(AT, as_Register(index), scale); ++ __ gsldx(as_Register(dst), as_Register(base), AT, disp); ++ } else { ++ __ gsldx(as_Register(dst), as_Register(base), as_Register(index), disp); ++ } ++ } else if ( Assembler::is_simm16(disp) ){ ++ if ( scale != 0 ) { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, AT, as_Register(base)); ++ } else { ++ __ daddu(AT, as_Register(index), as_Register(base)); ++ } ++ __ ld(as_Register(dst), AT, disp); ++ } else { ++ if ( scale != 0 ) { ++ __ dsll(AT, as_Register(index), scale); ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(index), T9); ++ } ++ __ gsldx(as_Register(dst), as_Register(base), AT, 0); ++ } ++ } else { //not use loongson isa ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ if( Assembler::is_simm16(disp) ) { ++ __ ld(as_Register(dst), AT, disp); ++ } else { ++ __ set64(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ ld(as_Register(dst), AT, 0); ++ } ++ } ++ } else { ++ if (UseLEXT1) { ++ if ( Assembler::is_simm16(disp) ){ ++ __ ld(as_Register(dst), as_Register(base), disp); ++ } else { ++ __ set64(T9, disp); ++ __ gsldx(as_Register(dst), as_Register(base), T9, 0); ++ } ++ } else { //not use loongson isa ++ if( Assembler::is_simm16(disp) ) { ++ __ ld(as_Register(dst), as_Register(base), disp); ++ } else { ++ __ set64(T9, disp); ++ __ daddu(AT, as_Register(base), T9); ++ __ ld(as_Register(dst), AT, 0); ++ } ++ } ++ } ++ __ sync(); ++ %} ++ ++ enc_class store_P_reg_enc (memory mem, mRegP src) %{ ++ MacroAssembler _masm(&cbuf); ++ int src = $src$$reg; ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if( index != 0 ) { ++ if (UseLEXT1){ ++ if ( Assembler::is_simm(disp, 8) ) { ++ if ( scale == 0 ) { ++ __ gssdx(as_Register(src), as_Register(base), as_Register(index), disp); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ gssdx(as_Register(src), as_Register(base), AT, disp); ++ } ++ } else if ( Assembler::is_simm16(disp) ) { ++ if ( scale == 0 ) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ __ sd(as_Register(src), AT, disp); ++ } else { ++ if ( scale == 0 ) { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(index), T9); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ } ++ __ gssdx(as_Register(src), as_Register(base), AT, 0); ++ } ++ } else { //not use loongson isa ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ if( Assembler::is_simm16(disp) ) { ++ __ sd(as_Register(src), AT, disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ sd(as_Register(src), AT, 0); ++ } ++ } ++ } else { ++ if (UseLEXT1) { ++ if ( Assembler::is_simm16(disp) ) { ++ __ sd(as_Register(src), as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ __ gssdx(as_Register(src), as_Register(base), T9, 0); ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ sd(as_Register(src), as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(base), T9); ++ __ sd(as_Register(src), AT, 0); ++ } ++ } ++ } ++ %} ++ ++ enc_class store_N_reg_enc (memory mem, mRegN src) %{ ++ MacroAssembler _masm(&cbuf); ++ int src = $src$$reg; ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if( index != 0 ) { ++ if (UseLEXT1){ ++ if ( Assembler::is_simm(disp, 8) ) { ++ if ( scale == 0 ) { ++ __ gsswx(as_Register(src), as_Register(base), as_Register(index), disp); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ gsswx(as_Register(src), as_Register(base), AT, disp); ++ } ++ } else if ( Assembler::is_simm16(disp) ) { ++ if ( scale == 0 ) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ __ sw(as_Register(src), AT, disp); ++ } else { ++ if ( scale == 0 ) { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(index), T9); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ } ++ __ gsswx(as_Register(src), as_Register(base), AT, 0); ++ } ++ } else { //not use loongson isa ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ if( Assembler::is_simm16(disp) ) { ++ __ sw(as_Register(src), AT, disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ sw(as_Register(src), AT, 0); ++ } ++ } ++ } else { ++ if (UseLEXT1) { ++ if ( Assembler::is_simm16(disp) ) { ++ __ sw(as_Register(src), as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ __ gsswx(as_Register(src), as_Register(base), T9, 0); ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ sw(as_Register(src), as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(base), T9); ++ __ sw(as_Register(src), AT, 0); ++ } ++ } ++ } ++ %} ++ ++ enc_class store_P_immP0_enc (memory mem) %{ ++ MacroAssembler _masm(&cbuf); ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if( index != 0 ) { ++ if (scale == 0) { ++ if ( Assembler::is_simm16(disp) ) { ++ if (UseLEXT1 && Assembler::is_simm(disp, 8)) { ++ __ gssdx(R0, as_Register(base), as_Register(index), disp); ++ } else { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ __ sd(R0, AT, disp); ++ } ++ } else { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gssdx(R0, AT, T9, 0); ++ } else { ++ __ daddu(AT, AT, T9); ++ __ sd(R0, AT, 0); ++ } ++ } ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ if( Assembler::is_simm16(disp) ) { ++ if (UseLEXT1 && Assembler::is_simm(disp, 8)) { ++ __ gssdx(R0, as_Register(base), AT, disp); ++ } else { ++ __ daddu(AT, as_Register(base), AT); ++ __ sd(R0, AT, disp); ++ } ++ } else { ++ __ daddu(AT, as_Register(base), AT); ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gssdx(R0, AT, T9, 0); ++ } else { ++ __ daddu(AT, AT, T9); ++ __ sd(R0, AT, 0); ++ } ++ } ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ sd(R0, as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gssdx(R0, as_Register(base), T9, 0); ++ } else { ++ __ daddu(AT, as_Register(base), T9); ++ __ sd(R0, AT, 0); ++ } ++ } ++ } ++ %} ++ ++ enc_class storeImmN0_enc(memory mem, ImmN0 src) %{ ++ MacroAssembler _masm(&cbuf); ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if(index!=0){ ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ ++ if( Assembler::is_simm16(disp) ) { ++ __ sw(R0, AT, disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ sw(R0, AT, 0); ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ sw(R0, as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(base), T9); ++ __ sw(R0, AT, 0); ++ } ++ } ++ %} ++ ++ enc_class load_L_enc (mRegL dst, memory mem) %{ ++ MacroAssembler _masm(&cbuf); ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ Register dst_reg = as_Register($dst$$reg); ++ ++ if( index != 0 ) { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ if( Assembler::is_simm16(disp) ) { ++ __ ld(dst_reg, AT, disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ ld(dst_reg, AT, 0); ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ ld(dst_reg, as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(base), T9); ++ __ ld(dst_reg, AT, 0); ++ } ++ } ++ %} ++ ++ enc_class store_L_reg_enc (memory mem, mRegL src) %{ ++ MacroAssembler _masm(&cbuf); ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ Register src_reg = as_Register($src$$reg); ++ ++ if( index != 0 ) { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ if( Assembler::is_simm16(disp) ) { ++ __ sd(src_reg, AT, disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ sd(src_reg, AT, 0); ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ sd(src_reg, as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(base), T9); ++ __ sd(src_reg, AT, 0); ++ } ++ } ++ %} ++ ++ enc_class store_L_immL_0_enc (memory mem, immL_0 src) %{ ++ MacroAssembler _masm(&cbuf); ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if( index != 0 ) { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ if( Assembler::is_simm16(disp) ) { ++ __ sd(R0, AT, disp); ++ } else { ++ __ move(T9, disp); ++ __ addu(AT, AT, T9); ++ __ sd(R0, AT, 0); ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ sd(R0, as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ __ addu(AT, as_Register(base), T9); ++ __ sd(R0, AT, 0); ++ } ++ } ++ %} ++ ++ enc_class store_L_immL_enc (memory mem, immL src) %{ ++ MacroAssembler _masm(&cbuf); ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ long imm = $src$$constant; ++ ++ if( index != 0 ) { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ if( Assembler::is_simm16(disp) ) { ++ __ set64(T9, imm); ++ __ sd(T9, AT, disp); ++ } else { ++ __ move(T9, disp); ++ __ addu(AT, AT, T9); ++ __ set64(T9, imm); ++ __ sd(T9, AT, 0); ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ move(AT, as_Register(base)); ++ __ set64(T9, imm); ++ __ sd(T9, AT, disp); ++ } else { ++ __ move(T9, disp); ++ __ addu(AT, as_Register(base), T9); ++ __ set64(T9, imm); ++ __ sd(T9, AT, 0); ++ } ++ } ++ %} ++ ++ enc_class load_F_enc (regF dst, memory mem) %{ ++ MacroAssembler _masm(&cbuf); ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ if( index != 0 ) { ++ if( Assembler::is_simm16(disp) ) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ __ gslwxc1(dst, as_Register(base), as_Register(index), disp); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ gslwxc1(dst, as_Register(base), AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ __ lwc1(dst, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gslwxc1(dst, AT, T9, 0); ++ } else { ++ __ daddu(AT, AT, T9); ++ __ lwc1(dst, AT, 0); ++ } ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ lwc1(dst, as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gslwxc1(dst, as_Register(base), T9, 0); ++ } else { ++ __ daddu(AT, as_Register(base), T9); ++ __ lwc1(dst, AT, 0); ++ } ++ } ++ } ++ %} ++ ++ enc_class store_F_reg_enc (memory mem, regF src) %{ ++ MacroAssembler _masm(&cbuf); ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ FloatRegister src = $src$$FloatRegister; ++ ++ if( index != 0 ) { ++ if ( Assembler::is_simm16(disp) ) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ __ gsswxc1(src, as_Register(base), as_Register(index), disp); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ gsswxc1(src, as_Register(base), AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ __ swc1(src, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gsswxc1(src, AT, T9, 0); ++ } else { ++ __ daddu(AT, AT, T9); ++ __ swc1(src, AT, 0); ++ } ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ swc1(src, as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gsswxc1(src, as_Register(base), T9, 0); ++ } else { ++ __ daddu(AT, as_Register(base), T9); ++ __ swc1(src, AT, 0); ++ } ++ } ++ } ++ %} ++ ++ enc_class load_D_enc (regD dst, memory mem) %{ ++ MacroAssembler _masm(&cbuf); ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ FloatRegister dst_reg = as_FloatRegister($dst$$reg); ++ ++ if ( index != 0 ) { ++ if ( Assembler::is_simm16(disp) ) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ __ gsldxc1(dst_reg, as_Register(base), as_Register(index), disp); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ gsldxc1(dst_reg, as_Register(base), AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ __ ldc1(dst_reg, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gsldxc1(dst_reg, AT, T9, 0); ++ } else { ++ __ addu(AT, AT, T9); ++ __ ldc1(dst_reg, AT, 0); ++ } ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ ldc1(dst_reg, as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gsldxc1(dst_reg, as_Register(base), T9, 0); ++ } else { ++ __ addu(AT, as_Register(base), T9); ++ __ ldc1(dst_reg, AT, 0); ++ } ++ } ++ } ++ %} ++ ++ enc_class store_D_reg_enc (memory mem, regD src) %{ ++ MacroAssembler _masm(&cbuf); ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ FloatRegister src_reg = as_FloatRegister($src$$reg); ++ ++ if ( index != 0 ) { ++ if ( Assembler::is_simm16(disp) ) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ __ gssdxc1(src_reg, as_Register(base), as_Register(index), disp); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ gssdxc1(src_reg, as_Register(base), AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ __ sdc1(src_reg, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gssdxc1(src_reg, AT, T9, 0); ++ } else { ++ __ addu(AT, AT, T9); ++ __ sdc1(src_reg, AT, 0); ++ } ++ } ++ } else { ++ if ( Assembler::is_simm16(disp) ) { ++ __ sdc1(src_reg, as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gssdxc1(src_reg, as_Register(base), T9, 0); ++ } else { ++ __ addu(AT, as_Register(base), T9); ++ __ sdc1(src_reg, AT, 0); ++ } ++ } ++ } ++ %} ++ ++ enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf ++ MacroAssembler _masm(&cbuf); ++ // This is the instruction starting address for relocation info. ++ __ block_comment("Java_To_Runtime"); ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_call((address)$meth$$method); ++ %} ++ ++ enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL ++ // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine ++ // who we intended to call. ++ MacroAssembler _masm(&cbuf); ++ address addr = (address)$meth$$method; ++ address call; ++ __ block_comment("Java_Static_Call"); ++ ++ if ( !_method ) { ++ // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. ++ call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf); ++ } else if(_optimized_virtual) { ++ call = __ trampoline_call(AddressLiteral(addr, relocInfo::opt_virtual_call_type), &cbuf); ++ } else { ++ call = __ trampoline_call(AddressLiteral(addr, relocInfo::static_call_type), &cbuf); ++ } ++ ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ ++ if( _method ) { // Emit stub for static call ++ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); ++ if (stub == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } ++ %} ++ ++ ++ // ++ // [Ref: LIR_Assembler::ic_call() ] ++ // ++ enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL ++ MacroAssembler _masm(&cbuf); ++ __ block_comment("Java_Dynamic_Call"); ++ __ ic_call((address)$meth$$method); ++ %} ++ ++ ++ enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{ ++ Register result = $result$$Register; ++ Register sub = $sub$$Register; ++ Register super = $super$$Register; ++ Register length = $tmp$$Register; ++ Register tmp = T9; ++ Label miss; ++ ++ // result may be the same as sub ++ // 47c B40: # B21 B41 <- B20 Freq: 0.155379 ++ // 47c partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0 ++ // 4bc mov S2, NULL #@loadConP ++ // 4c0 beq S1, S2, B21 #@branchConP P=0.999999 C=-1.000000 ++ // ++ MacroAssembler _masm(&cbuf); ++ Label done; ++ __ check_klass_subtype_slow_path(sub, super, length, tmp, ++ NULL, &miss, ++ /*set_cond_codes:*/ true); ++ // Refer to X86_64's RDI ++ __ move(result, 0); ++ __ b(done); ++ __ delayed()->nop(); ++ ++ __ bind(miss); ++ __ move(result, 1); ++ __ bind(done); ++ %} ++ ++%} ++ ++ ++//---------MIPS FRAME-------------------------------------------------------------- ++// Definition of frame structure and management information. ++// ++// S T A C K L A Y O U T Allocators stack-slot number ++// | (to get allocators register number ++// G Owned by | | v add SharedInfo::stack0) ++// r CALLER | | ++// o | +--------+ pad to even-align allocators stack-slot ++// w V | pad0 | numbers; owned by CALLER ++// t -----------+--------+----> Matcher::_in_arg_limit, unaligned ++// h ^ | in | 5 ++// | | args | 4 Holes in incoming args owned by SELF ++// | | old | | 3 ++// | | SP-+--------+----> Matcher::_old_SP, even aligned ++// v | | ret | 3 return address ++// Owned by +--------+ ++// Self | pad2 | 2 pad to align old SP ++// | +--------+ 1 ++// | | locks | 0 ++// | +--------+----> SharedInfo::stack0, even aligned ++// | | pad1 | 11 pad to align new SP ++// | +--------+ ++// | | | 10 ++// | | spills | 9 spills ++// V | | 8 (pad0 slot for callee) ++// -----------+--------+----> Matcher::_out_arg_limit, unaligned ++// ^ | out | 7 ++// | | args | 6 Holes in outgoing args owned by CALLEE ++// Owned by new | | ++// Callee SP-+--------+----> Matcher::_new_SP, even aligned ++// | | ++// ++// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is ++// known from SELF's arguments and the Java calling convention. ++// Region 6-7 is determined per call site. ++// Note 2: If the calling convention leaves holes in the incoming argument ++// area, those holes are owned by SELF. Holes in the outgoing area ++// are owned by the CALLEE. Holes should not be nessecary in the ++// incoming area, as the Java calling convention is completely under ++// the control of the AD file. Doubles can be sorted and packed to ++// avoid holes. Holes in the outgoing arguments may be nessecary for ++// varargs C calling conventions. ++// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is ++// even aligned with pad0 as needed. ++// Region 6 is even aligned. Region 6-7 is NOT even aligned; ++// region 6-11 is even aligned; it may be padded out more so that ++// the region from SP to FP meets the minimum stack alignment. ++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack ++// alignment. Region 11, pad1, may be dynamically extended so that ++// SP meets the minimum alignment. ++ ++ ++frame %{ ++ ++ stack_direction(TOWARDS_LOW); ++ ++ // These two registers define part of the calling convention ++ // between compiled code and the interpreter. ++ // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention ++ // for more information. ++ ++ inline_cache_reg(T1); // Inline Cache Register ++ interpreter_method_oop_reg(S3); // Method Oop Register when calling interpreter ++ ++ // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] ++ cisc_spilling_operand_name(indOffset32); ++ ++ // Number of stack slots consumed by locking an object ++ // generate Compile::sync_stack_slots ++ sync_stack_slots(2); ++ ++ frame_pointer(SP); ++ ++ // Interpreter stores its frame pointer in a register which is ++ // stored to the stack by I2CAdaptors. ++ // I2CAdaptors convert from interpreted java to compiled java. ++ ++ interpreter_frame_pointer(FP); ++ ++ // generate Matcher::stack_alignment ++ stack_alignment(StackAlignmentInBytes); //wordSize = sizeof(char*); ++ ++ // Number of stack slots between incoming argument block and the start of ++ // a new frame. The PROLOG must add this many slots to the stack. The ++ // EPILOG must remove this many slots. ++ in_preserve_stack_slots(4); //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp ++ ++ // Number of outgoing stack slots killed above the out_preserve_stack_slots ++ // for calls to C. Supports the var-args backing area for register parms. ++ varargs_C_out_slots_killed(0); ++ ++ // The after-PROLOG location of the return address. Location of ++ // return address specifies a type (REG or STACK) and a number ++ // representing the register number (i.e. - use a register name) or ++ // stack slot. ++ // Ret Addr is on stack in slot 0 if no locks or verification or alignment. ++ // Otherwise, it is above the locks and verification slot and alignment word ++ //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong)); ++ return_addr(REG RA); ++ ++ // Body of function which returns an integer array locating ++ // arguments either in registers or in stack slots. Passed an array ++ // of ideal registers called "sig" and a "length" count. Stack-slot ++ // offsets are based on outgoing arguments, i.e. a CALLER setting up ++ // arguments for a CALLEE. Incoming stack arguments are ++ // automatically biased by the preserve_stack_slots field above. ++ ++ ++ // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing) ++ // StartNode::calling_convention call this. ++ calling_convention %{ ++ SharedRuntime::java_calling_convention(sig_bt, regs, length, false); ++ %} ++ ++ ++ ++ ++ // Body of function which returns an integer array locating ++ // arguments either in registers or in stack slots. Passed an array ++ // of ideal registers called "sig" and a "length" count. Stack-slot ++ // offsets are based on outgoing arguments, i.e. a CALLER setting up ++ // arguments for a CALLEE. Incoming stack arguments are ++ // automatically biased by the preserve_stack_slots field above. ++ ++ ++ // SEE CallRuntimeNode::calling_convention for more information. ++ c_calling_convention %{ ++ (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); ++ %} ++ ++ ++ // Location of C & interpreter return values ++ // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR. ++ // SEE Matcher::match. ++ c_return_value %{ ++ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); ++ /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ ++ static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; ++ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num }; ++ return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); ++ %} ++ ++ // Location of return values ++ // register(s) contain(s) return value for Op_StartC2I and Op_Start. ++ // SEE Matcher::match. ++ ++ return_value %{ ++ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); ++ /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ ++ static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; ++ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num}; ++ return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); ++ %} ++ ++%} ++ ++//----------ATTRIBUTES--------------------------------------------------------- ++//----------Operand Attributes------------------------------------------------- ++op_attrib op_cost(0); // Required cost attribute ++ ++//----------Instruction Attributes--------------------------------------------- ++ins_attrib ins_cost(100); // Required cost attribute ++ins_attrib ins_size(32); // Required size attribute (in bits) ++ins_attrib ins_pc_relative(0); // Required PC Relative flag ++ins_attrib ins_short_branch(0); // Required flag: is this instruction a ++ // non-matching short branch variant of some ++ // long branch? ++ins_attrib ins_alignment(4); // Required alignment attribute (must be a power of 2) ++ // specifies the alignment that some part of the instruction (not ++ // necessarily the start) requires. If > 1, a compute_padding() ++ // function must be provided for the instruction ++ ++//----------OPERANDS----------------------------------------------------------- ++// Operand definitions must precede instruction definitions for correct parsing ++// in the ADLC because operands constitute user defined types which are used in ++// instruction definitions. ++ ++// Vectors ++operand vecD() %{ ++ constraint(ALLOC_IN_RC(dbl_reg)); ++ match(VecD); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Flags register, used as output of compare instructions ++operand FlagsReg() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegFlags); ++ ++ format %{ "T0" %} ++ interface(REG_INTER); ++%} ++ ++//----------Simple Operands---------------------------------------------------- ++// TODO: Should we need to define some more special immediate number ? ++// Immediate Operands ++// Integer Immediate ++operand immI() %{ ++ match(ConI); ++ // TODO: should not match immI8 here LEE ++ match(immI8); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI8() %{ ++ predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI16() %{ ++ predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); ++ match(ConI); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M65536() %{ ++ predicate(n->get_int() == -65536); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for decrement ++operand immI_M1() %{ ++ predicate(n->get_int() == -1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for test vs zero ++operand immI_0() %{ ++ predicate(n->get_int() == 0); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for increment ++operand immI_1() %{ ++ predicate(n->get_int() == 1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constants for increment ++operand immI_16() %{ ++ predicate(n->get_int() == 16); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_24() %{ ++ predicate(n->get_int() == 24); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for long shifts ++operand immI_32() %{ ++ predicate(n->get_int() == 32); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for byte-wide masking ++operand immI_255() %{ ++ predicate(n->get_int() == 255); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_65535() %{ ++ predicate(n->get_int() == 65535); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_MaxI() %{ ++ predicate(n->get_int() == 2147483647); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M32767_32768() %{ ++ predicate((-32767 <= n->get_int()) && (n->get_int() <= 32768)); ++ match(ConI); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Valid scale values for addressing modes ++operand immI_0_3() %{ ++ predicate(0 <= n->get_int() && (n->get_int() <= 3)); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_31() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 31); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_32767() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 32767); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_65535() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 65535); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_32_63() %{ ++ predicate(n->get_int() >= 32 && n->get_int() <= 63); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Operand for non-negtive integer mask ++operand immI_nonneg_mask() %{ ++ predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1)); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate ++operand immL() %{ ++ match(ConL); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate 8-bit ++operand immL8() %{ ++ predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L); ++ match(ConL); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL16() %{ ++ predicate((-32768 <= n->get_long()) && (n->get_long() <= 32767)); ++ match(ConL); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate 32-bit signed ++operand immL32() %{ ++ predicate(n->get_long() == (int)(n->get_long())); ++ match(ConL); ++ ++ op_cost(15); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 3..6 zero ++operand immL_M121() %{ ++ predicate(n->get_long() == -121L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 0..2 zero ++operand immL_M8() %{ ++ predicate(n->get_long() == -8L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 1..2 zero ++operand immL_M7() %{ ++ predicate(n->get_long() == -7L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 2 zero ++operand immL_M5() %{ ++ predicate(n->get_long() == -5L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 0..1 zero ++operand immL_M4() %{ ++ predicate(n->get_long() == -4L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_M1() %{ ++ predicate(n->get_long() == -1L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate zero ++operand immL_0() %{ ++ predicate(n->get_long() == 0L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_7() %{ ++ predicate(n->get_long() == 7L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate: low 32-bit mask ++operand immL_MaxUI() %{ ++ predicate(n->get_long() == 0xFFFFFFFFL); ++ match(ConL); ++ op_cost(20); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_M32767_32768() %{ ++ predicate((-32767 <= n->get_long()) && (n->get_long() <= 32768)); ++ match(ConL); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_0_65535() %{ ++ predicate(n->get_long() >= 0 && n->get_long() <= 65535); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Operand for non-negtive long mask ++operand immL_nonneg_mask() %{ ++ predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1)); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immP() %{ ++ match(ConP); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// NULL Pointer Immediate ++operand immP_0() %{ ++ predicate(n->get_ptr() == 0); ++ match(ConP); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate: 64-bit ++operand immP_no_oop_cheap() %{ ++ predicate(!n->bottom_type()->isa_oop_ptr() && (MacroAssembler::insts_for_set64(n->get_ptr()) <= 3)); ++ match(ConP); ++ ++ op_cost(5); ++ // formats are generated automatically for constants and base registers ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer for polling page ++operand immP_poll() %{ ++ predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page()); ++ match(ConP); ++ op_cost(5); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immN() %{ ++ match(ConN); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immNKlass() %{ ++ match(ConNKlass); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// NULL Pointer Immediate ++operand immN_0() %{ ++ predicate(n->get_narrowcon() == 0); ++ match(ConN); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Single-precision floating-point immediate ++operand immF() %{ ++ match(ConF); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Single-precision floating-point zero ++operand immF_0() %{ ++ predicate(jint_cast(n->getf()) == 0); ++ match(ConF); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Double-precision floating-point immediate ++operand immD() %{ ++ match(ConD); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Double-precision floating-point zero ++operand immD_0() %{ ++ predicate(jlong_cast(n->getd()) == 0); ++ match(ConD); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Register Operands ++// Integer Register ++operand mRegI() %{ ++ constraint(ALLOC_IN_RC(int_reg)); ++ match(RegI); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_Ax_mRegI() %{ ++ constraint(ALLOC_IN_RC(no_Ax_int_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mS0RegI() %{ ++ constraint(ALLOC_IN_RC(s0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S0" %} ++ interface(REG_INTER); ++%} ++ ++operand mS1RegI() %{ ++ constraint(ALLOC_IN_RC(s1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S1" %} ++ interface(REG_INTER); ++%} ++ ++operand mS2RegI() %{ ++ constraint(ALLOC_IN_RC(s2_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S2" %} ++ interface(REG_INTER); ++%} ++ ++operand mS3RegI() %{ ++ constraint(ALLOC_IN_RC(s3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S3" %} ++ interface(REG_INTER); ++%} ++ ++operand mS4RegI() %{ ++ constraint(ALLOC_IN_RC(s4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S4" %} ++ interface(REG_INTER); ++%} ++ ++operand mS5RegI() %{ ++ constraint(ALLOC_IN_RC(s5_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S5" %} ++ interface(REG_INTER); ++%} ++ ++operand mS6RegI() %{ ++ constraint(ALLOC_IN_RC(s6_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S6" %} ++ interface(REG_INTER); ++%} ++ ++operand mS7RegI() %{ ++ constraint(ALLOC_IN_RC(s7_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S7" %} ++ interface(REG_INTER); ++%} ++ ++ ++operand mT0RegI() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T0" %} ++ interface(REG_INTER); ++%} ++ ++operand mT1RegI() %{ ++ constraint(ALLOC_IN_RC(t1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T1" %} ++ interface(REG_INTER); ++%} ++ ++operand mT2RegI() %{ ++ constraint(ALLOC_IN_RC(t2_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T2" %} ++ interface(REG_INTER); ++%} ++ ++operand mT3RegI() %{ ++ constraint(ALLOC_IN_RC(t3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T3" %} ++ interface(REG_INTER); ++%} ++ ++operand mT8RegI() %{ ++ constraint(ALLOC_IN_RC(t8_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T8" %} ++ interface(REG_INTER); ++%} ++ ++operand mT9RegI() %{ ++ constraint(ALLOC_IN_RC(t9_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T9" %} ++ interface(REG_INTER); ++%} ++ ++operand mA0RegI() %{ ++ constraint(ALLOC_IN_RC(a0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A0" %} ++ interface(REG_INTER); ++%} ++ ++operand mA1RegI() %{ ++ constraint(ALLOC_IN_RC(a1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A1" %} ++ interface(REG_INTER); ++%} ++ ++operand mA2RegI() %{ ++ constraint(ALLOC_IN_RC(a2_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A2" %} ++ interface(REG_INTER); ++%} ++ ++operand mA3RegI() %{ ++ constraint(ALLOC_IN_RC(a3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A3" %} ++ interface(REG_INTER); ++%} ++ ++operand mA4RegI() %{ ++ constraint(ALLOC_IN_RC(a4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A4" %} ++ interface(REG_INTER); ++%} ++ ++operand mA5RegI() %{ ++ constraint(ALLOC_IN_RC(a5_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A5" %} ++ interface(REG_INTER); ++%} ++ ++operand mA6RegI() %{ ++ constraint(ALLOC_IN_RC(a6_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A6" %} ++ interface(REG_INTER); ++%} ++ ++operand mA7RegI() %{ ++ constraint(ALLOC_IN_RC(a7_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A7" %} ++ interface(REG_INTER); ++%} ++ ++operand mV0RegI() %{ ++ constraint(ALLOC_IN_RC(v0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "V0" %} ++ interface(REG_INTER); ++%} ++ ++operand mV1RegI() %{ ++ constraint(ALLOC_IN_RC(v1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "V1" %} ++ interface(REG_INTER); ++%} ++ ++operand mRegN() %{ ++ constraint(ALLOC_IN_RC(int_reg)); ++ match(RegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0_RegN() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1_RegN() %{ ++ constraint(ALLOC_IN_RC(t1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t2_RegN() %{ ++ constraint(ALLOC_IN_RC(t2_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3_RegN() %{ ++ constraint(ALLOC_IN_RC(t3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8_RegN() %{ ++ constraint(ALLOC_IN_RC(t8_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t9_RegN() %{ ++ constraint(ALLOC_IN_RC(t9_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0_RegN() %{ ++ constraint(ALLOC_IN_RC(a0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a1_RegN() %{ ++ constraint(ALLOC_IN_RC(a1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2_RegN() %{ ++ constraint(ALLOC_IN_RC(a2_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3_RegN() %{ ++ constraint(ALLOC_IN_RC(a3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4_RegN() %{ ++ constraint(ALLOC_IN_RC(a4_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a5_RegN() %{ ++ constraint(ALLOC_IN_RC(a5_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6_RegN() %{ ++ constraint(ALLOC_IN_RC(a6_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7_RegN() %{ ++ constraint(ALLOC_IN_RC(a7_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s0_RegN() %{ ++ constraint(ALLOC_IN_RC(s0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1_RegN() %{ ++ constraint(ALLOC_IN_RC(s1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s2_RegN() %{ ++ constraint(ALLOC_IN_RC(s2_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3_RegN() %{ ++ constraint(ALLOC_IN_RC(s3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4_RegN() %{ ++ constraint(ALLOC_IN_RC(s4_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s5_RegN() %{ ++ constraint(ALLOC_IN_RC(s5_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s6_RegN() %{ ++ constraint(ALLOC_IN_RC(s6_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7_RegN() %{ ++ constraint(ALLOC_IN_RC(s7_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0_RegN() %{ ++ constraint(ALLOC_IN_RC(v0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1_RegN() %{ ++ constraint(ALLOC_IN_RC(v1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Pointer Register ++operand mRegP() %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(RegP); ++ match(a0_RegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_T8_mRegP() %{ ++ constraint(ALLOC_IN_RC(no_T8_p_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s2_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s2_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s4_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s5_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s5_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s6_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s6_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s7_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t2_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t2_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t8_long_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t9_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t9_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a2_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a4_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++ ++operand a5_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a5_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a6_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a7_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(v0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(v1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++/* ++operand mSPRegP(mRegP reg) %{ ++ constraint(ALLOC_IN_RC(sp_reg)); ++ match(reg); ++ ++ format %{ "SP" %} ++ interface(REG_INTER); ++%} ++ ++operand mFPRegP(mRegP reg) %{ ++ constraint(ALLOC_IN_RC(fp_reg)); ++ match(reg); ++ ++ format %{ "FP" %} ++ interface(REG_INTER); ++%} ++*/ ++ ++operand mRegL() %{ ++ constraint(ALLOC_IN_RC(long_reg)); ++ match(RegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0RegL() %{ ++ constraint(ALLOC_IN_RC(v0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1RegL() %{ ++ constraint(ALLOC_IN_RC(v1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0RegL() %{ ++ constraint(ALLOC_IN_RC(a0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ "A0" %} ++ interface(REG_INTER); ++%} ++ ++operand a1RegL() %{ ++ constraint(ALLOC_IN_RC(a1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2RegL() %{ ++ constraint(ALLOC_IN_RC(a2_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3RegL() %{ ++ constraint(ALLOC_IN_RC(a3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0RegL() %{ ++ constraint(ALLOC_IN_RC(t0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1RegL() %{ ++ constraint(ALLOC_IN_RC(t1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t2RegL() %{ ++ constraint(ALLOC_IN_RC(t2_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3RegL() %{ ++ constraint(ALLOC_IN_RC(t3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8RegL() %{ ++ constraint(ALLOC_IN_RC(t8_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4RegL() %{ ++ constraint(ALLOC_IN_RC(a4_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a5RegL() %{ ++ constraint(ALLOC_IN_RC(a5_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6RegL() %{ ++ constraint(ALLOC_IN_RC(a6_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7RegL() %{ ++ constraint(ALLOC_IN_RC(a7_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s0RegL() %{ ++ constraint(ALLOC_IN_RC(s0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1RegL() %{ ++ constraint(ALLOC_IN_RC(s1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s2RegL() %{ ++ constraint(ALLOC_IN_RC(s2_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3RegL() %{ ++ constraint(ALLOC_IN_RC(s3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4RegL() %{ ++ constraint(ALLOC_IN_RC(s4_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7RegL() %{ ++ constraint(ALLOC_IN_RC(s7_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Floating register operands ++operand regF() %{ ++ constraint(ALLOC_IN_RC(flt_reg)); ++ match(RegF); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//Double Precision Floating register operands ++operand regD() %{ ++ constraint(ALLOC_IN_RC(dbl_reg)); ++ match(RegD); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//----------Memory Operands---------------------------------------------------- ++// Indirect Memory Operand ++operand indirect(mRegP reg) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(reg); ++ ++ format %{ "[$reg] @ indirect" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Plus Short Offset Operand ++operand indOffset8(mRegP reg, immL8 off) ++%{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP reg off); ++ ++ op_cost(10); ++ format %{ "[$reg + $off (8-bit)] @ indOffset8" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// Indirect Memory Times Scale Plus Index Register ++operand indIndexScale(mRegP reg, mRegL lreg, immI_0_3 scale) ++%{ ++ predicate(UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP reg (LShiftL lreg scale)); ++ ++ op_cost(10); ++ format %{"[$reg + $lreg << $scale] @ indIndexScale" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale($scale); ++ disp(0x0); ++ %} ++%} ++ ++ ++// [base + index + offset] ++operand baseIndexOffset8(mRegP base, mRegL index, immL8 off) ++%{ ++ predicate(UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(5); ++ match(AddP (AddP base index) off); ++ ++ format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8" %} ++ interface(MEMORY_INTER) %{ ++ base($base); ++ index($index); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// [base + index + offset] ++operand baseIndexOffset8_convI2L(mRegP base, mRegI index, immL8 off) ++%{ ++ predicate(UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(5); ++ match(AddP (AddP base (ConvI2L index)) off); ++ ++ format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8_convI2L" %} ++ interface(MEMORY_INTER) %{ ++ base($base); ++ index($index); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// [base + index<in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0); ++ op_cost(10); ++ match(AddP (AddP base (LShiftL (ConvI2L index) scale)) off); ++ ++ format %{ "[$base + $index << $scale + $off (8-bit)] @ basePosIndexScaleOffset8" %} ++ interface(MEMORY_INTER) %{ ++ base($base); ++ index($index); ++ scale($scale); ++ disp($off); ++ %} ++%} ++ ++//FIXME: I think it's better to limit the immI to be 16-bit at most! ++// Indirect Memory Plus Long Offset Operand ++operand indOffset32(mRegP reg, immL32 off) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(20); ++ match(AddP reg off); ++ ++ format %{ "[$reg + $off (32-bit)] @ indOffset32" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// Indirect Memory Plus Index Register ++operand indIndex(mRegP addr, mRegL index) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP addr index); ++ ++ op_cost(20); ++ format %{"[$addr + $index] @ indIndex" %} ++ interface(MEMORY_INTER) %{ ++ base($addr); ++ index($index); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++operand indirectNarrowKlass(mRegN reg) ++%{ ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(DecodeNKlass reg); ++ ++ format %{ "[$reg] @ indirectNarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++operand indOffset8NarrowKlass(mRegN reg, immL8 off) ++%{ ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(AddP (DecodeNKlass reg) off); ++ ++ format %{ "[$reg + $off (8-bit)] @ indOffset8NarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indOffset32NarrowKlass(mRegN reg, immL32 off) ++%{ ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(AddP (DecodeNKlass reg) off); ++ ++ format %{ "[$reg + $off (32-bit)] @ indOffset32NarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indIndexOffsetNarrowKlass(mRegN reg, mRegL lreg, immL32 off) ++%{ ++ predicate(UseLEXT1); ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP (AddP (DecodeNKlass reg) lreg) off); ++ ++ op_cost(10); ++ format %{"[$reg + $off + $lreg] @ indIndexOffsetNarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indIndexNarrowKlass(mRegN reg, mRegL lreg) ++%{ ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP (DecodeNKlass reg) lreg); ++ ++ op_cost(10); ++ format %{"[$reg + $lreg] @ indIndexNarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Operand ++operand indirectNarrow(mRegN reg) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(DecodeN reg); ++ ++ format %{ "[$reg] @ indirectNarrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Plus Short Offset Operand ++operand indOffset8Narrow(mRegN reg, immL8 off) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(AddP (DecodeN reg) off); ++ ++ format %{ "[$reg + $off (8-bit)] @ indOffset8Narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// Indirect Memory Plus Index Register Plus Offset Operand ++operand indIndexOffset8Narrow(mRegN reg, mRegL lreg, immL8 off) ++%{ ++ predicate((Universe::narrow_oop_shift() == 0) && UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP (AddP (DecodeN reg) lreg) off); ++ ++ op_cost(10); ++ format %{"[$reg + $off + $lreg] @ indIndexOffset8Narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++//----------Conditional Branch Operands---------------------------------------- ++// Comparison Op - This is the operation of the comparison, and is limited to ++// the following set of codes: ++// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) ++// ++// Other attributes of the comparison, such as unsignedness, are specified ++// by the comparison instruction that sets a condition code flags register. ++// That result is represented by a flags operand whose subtype is appropriate ++// to the unsignedness (etc.) of the comparison. ++// ++// Later, the instruction which matches both the Comparison Op (a Bool) and ++// the flags (produced by the Cmp) specifies the coding of the comparison op ++// by matching a specific subtype of Bool operand below, such as cmpOpU. ++ ++// Comparision Code ++operand cmpOp() %{ ++ match(Bool); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x01); ++ not_equal(0x02); ++ greater(0x03); ++ greater_equal(0x04); ++ less(0x05); ++ less_equal(0x06); ++ overflow(0x7); ++ no_overflow(0x8); ++ %} ++%} ++ ++ ++// Comparision Code ++// Comparison Code, unsigned compare. Used by FP also, with ++// C2 (unordered) turned into GT or LT already. The other bits ++// C0 and C3 are turned into Carry & Zero flags. ++operand cmpOpU() %{ ++ match(Bool); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x01); ++ not_equal(0x02); ++ greater(0x03); ++ greater_equal(0x04); ++ less(0x05); ++ less_equal(0x06); ++ overflow(0x7); ++ no_overflow(0x8); ++ %} ++%} ++ ++ ++//----------Special Memory Operands-------------------------------------------- ++// Stack Slot Operand - This operand is used for loading and storing temporary ++// values on the stack where a match requires a value to ++// flow through memory. ++operand stackSlotP(sRegP reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotI(sRegI reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotF(sRegF reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotD(sRegD reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotL(sRegL reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++ ++//------------------------OPERAND CLASSES-------------------------------------- ++//opclass memory( direct, indirect, indOffset16, indOffset32, indOffset32X, indIndexOffset ); ++opclass memory( indirect, indirectNarrow, indOffset8, indOffset32, indIndex, indIndexScale, baseIndexOffset8, baseIndexOffset8_convI2L, indOffset8Narrow, indIndexOffset8Narrow); ++ ++ ++//----------PIPELINE----------------------------------------------------------- ++// Rules which define the behavior of the target architectures pipeline. ++ ++pipeline %{ ++ ++ //----------ATTRIBUTES--------------------------------------------------------- ++ attributes %{ ++ fixed_size_instructions; // Fixed size instructions ++ branch_has_delay_slot; // branch have delay slot in gs2 ++ max_instructions_per_bundle = 1; // 1 instruction per bundle ++ max_bundles_per_cycle = 4; // Up to 4 bundles per cycle ++ bundle_unit_size=4; ++ instruction_unit_size = 4; // An instruction is 4 bytes long ++ instruction_fetch_unit_size = 16; // The processor fetches one line ++ instruction_fetch_units = 1; // of 16 bytes ++ ++ // List of nop instructions ++ nops( MachNop ); ++ %} ++ ++ //----------RESOURCES---------------------------------------------------------- ++ // Resources are the functional units available to the machine ++ ++ resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4, ALU1, ALU2, ALU = ALU1 | ALU2, FPU1, FPU2, FPU = FPU1 | FPU2, MEM, BR); ++ ++ //----------PIPELINE DESCRIPTION----------------------------------------------- ++ // Pipeline Description specifies the stages in the machine's pipeline ++ ++ // IF: fetch ++ // ID: decode ++ // RD: read ++ // CA: caculate ++ // WB: write back ++ // CM: commit ++ ++ pipe_desc(IF, ID, RD, CA, WB, CM); ++ ++ ++ //----------PIPELINE CLASSES--------------------------------------------------- ++ // Pipeline Classes describe the stages in which input and output are ++ // referenced by the hardware pipeline. ++ ++ //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{ ++ single_instruction; ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+1; ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.19 Integer mult operation : dst <-- reg1 mult reg2 ++ pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+5; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.19 Integer div operation : dst <-- reg1 div reg2 ++ pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.19 Integer mod operation : dst <-- reg1 mod reg2 ++ pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{ ++ instruction_count(2); ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{ ++ instruction_count(2); ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16 ++ pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{ ++ instruction_count(2); ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //no.16 load Long from memory : ++ pipe_class ialu_loadL(mRegL dst, memory mem) %{ ++ instruction_count(2); ++ mem : RD(read); ++ dst : WB(write)+5; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.17 Store Long to Memory : ++ pipe_class ialu_storeL(mRegL src, memory mem) %{ ++ instruction_count(2); ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16 ++ pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{ ++ single_instruction; ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.3 Integer move operation : dst <-- reg ++ pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.4 No instructions : do nothing ++ pipe_class empty( ) %{ ++ instruction_count(0); ++ %} ++ ++ //No.5 UnConditional branch : ++ pipe_class pipe_jump( label labl ) %{ ++ multiple_bundles; ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++ //No.6 ALU Conditional branch : ++ pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++ //no.7 load integer from memory : ++ pipe_class ialu_loadI(mRegI dst, memory mem) %{ ++ mem : RD(read); ++ dst : WB(write)+3; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.8 Store Integer to Memory : ++ pipe_class ialu_storeI(mRegI src, memory mem) %{ ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ ++ //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU : CA; ++ %} ++ ++ //No.22 Floating div operation : dst <-- reg1 div reg2 ++ pipe_class fpu_div(regF dst, regF src1, regF src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU2 : CA; ++ %} ++ ++ pipe_class fcvt_I2D(regD dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU1 : CA; ++ %} ++ ++ pipe_class fcvt_D2I(mRegI dst, regD src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU1 : CA; ++ %} ++ ++ pipe_class pipe_mfc1(mRegI dst, regD src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ pipe_class pipe_mtc1(regD dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ MEM : RD(5); ++ %} ++ ++ //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2 ++ pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU2 : CA; ++ %} ++ ++ //No.11 Load Floating from Memory : ++ pipe_class fpu_loadF(regF dst, memory mem) %{ ++ instruction_count(1); ++ mem : RD(read); ++ dst : WB(write)+3; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.12 Store Floating to Memory : ++ pipe_class fpu_storeF(regF src, memory mem) %{ ++ instruction_count(1); ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.13 FPU Conditional branch : ++ pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++//No.14 Floating FPU reg operation : dst <-- op reg ++ pipe_class fpu1_regF(regF dst, regF src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU : CA; ++ %} ++ ++ pipe_class long_memory_op() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(30); ++ %} ++ ++ pipe_class simple_call() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(200); ++ BR : RD; ++ %} ++ ++ pipe_class call() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(200); ++ %} ++ ++ //FIXME: ++ //No.9 Piple slow : for multi-instructions ++ pipe_class pipe_slow( ) %{ ++ instruction_count(20); ++ force_serialization; ++ multiple_bundles; ++ fixed_latency(50); ++ %} ++ ++%} ++ ++ ++ ++//----------INSTRUCTIONS------------------------------------------------------- ++// ++// match -- States which machine-independent subtree may be replaced ++// by this instruction. ++// ins_cost -- The estimated cost of this instruction is used by instruction ++// selection to identify a minimum cost tree of machine ++// instructions that matches a tree of machine-independent ++// instructions. ++// format -- A string providing the disassembly for this instruction. ++// The value of an instruction's operand may be inserted ++// by referring to it with a '$' prefix. ++// opcode -- Three instruction opcodes may be provided. These are referred ++// to within an encode class as $primary, $secondary, and $tertiary ++// respectively. The primary opcode is commonly used to ++// indicate the type of machine instruction, while secondary ++// and tertiary are often used for prefix options or addressing ++// modes. ++// ins_encode -- A list of encode classes with parameters. The encode class ++// name must have been defined in an 'enc_class' specification ++// in the encode section of the architecture description. ++ ++ ++// Load Integer ++instruct loadI(mRegI dst, memory mem) %{ ++ match(Set dst (LoadI mem)); ++ ++ ins_cost(125); ++ format %{ "lw $dst, $mem #@loadI" %} ++ ins_encode (load_I_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadI_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadI mem))); ++ ++ ins_cost(125); ++ format %{ "lw $dst, $mem #@loadI_convI2L" %} ++ ins_encode (load_I_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Integer (32 bit signed) to Byte (8 bit signed) ++instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{ ++ match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem\t# int -> byte #@loadI2B" %} ++ ins_encode(load_B_enc(dst, mem)); ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) ++instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI (LoadI mem) mask)); ++ ++ ins_cost(125); ++ format %{ "lbu $dst, $mem\t# int -> ubyte #@loadI2UB" %} ++ ins_encode(load_UB_enc(dst, mem)); ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Short (16 bit signed) ++instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{ ++ match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); ++ ++ ins_cost(125); ++ format %{ "lh $dst, $mem\t# int -> short #@loadI2S" %} ++ ins_encode(load_S_enc(dst, mem)); ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) ++instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{ ++ match(Set dst (AndI (LoadI mem) mask)); ++ ++ ins_cost(125); ++ format %{ "lhu $dst, $mem\t# int -> ushort/char #@loadI2US" %} ++ ins_encode(load_C_enc(dst, mem)); ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Long. ++instruct loadL(mRegL dst, memory mem) %{ ++// predicate(!((LoadLNode*)n)->require_atomic_access()); ++ match(Set dst (LoadL mem)); ++ ++ ins_cost(250); ++ format %{ "ld $dst, $mem #@loadL" %} ++ ins_encode(load_L_enc(dst, mem)); ++ ins_pipe( ialu_loadL ); ++%} ++ ++// Load Long - UNaligned ++instruct loadL_unaligned(mRegL dst, memory mem) %{ ++ match(Set dst (LoadL_unaligned mem)); ++ ++ // FIXME: Need more effective ldl/ldr ++ ins_cost(450); ++ format %{ "ld $dst, $mem #@loadL_unaligned\n\t" %} ++ ins_encode(load_L_enc(dst, mem)); ++ ins_pipe( ialu_loadL ); ++%} ++ ++// Store Long ++instruct storeL_reg(memory mem, mRegL src) %{ ++ match(Set mem (StoreL mem src)); ++ ++ ins_cost(200); ++ format %{ "sd $mem, $src #@storeL_reg\n" %} ++ ins_encode(store_L_reg_enc(mem, src)); ++ ins_pipe( ialu_storeL ); ++%} ++ ++instruct storeL_immL_0(memory mem, immL_0 zero) %{ ++ match(Set mem (StoreL mem zero)); ++ ++ ins_cost(180); ++ format %{ "sd zero, $mem #@storeL_immL_0" %} ++ ins_encode(store_L_immL_0_enc(mem, zero)); ++ ins_pipe( ialu_storeL ); ++%} ++ ++instruct storeL_imm(memory mem, immL src) %{ ++ match(Set mem (StoreL mem src)); ++ ++ ins_cost(200); ++ format %{ "sd $src, $mem #@storeL_imm" %} ++ ins_encode(store_L_immL_enc(mem, src)); ++ ins_pipe( ialu_storeL ); ++%} ++ ++// Load Compressed Pointer ++instruct loadN(mRegN dst, memory mem) ++%{ ++ match(Set dst (LoadN mem)); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# compressed ptr @ loadN" %} ++ ins_encode (load_N_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++instruct loadN2P(mRegP dst, memory mem) ++%{ ++ match(Set dst (DecodeN (LoadN mem))); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# @ loadN2P" %} ++ ins_encode (load_N_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++// Load Pointer ++instruct loadP(mRegP dst, memory mem) %{ ++ match(Set dst (LoadP mem)); ++ ++ ins_cost(125); ++ format %{ "ld $dst, $mem #@loadP" %} ++ ins_encode (load_P_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Klass Pointer ++instruct loadKlass(mRegP dst, memory mem) %{ ++ match(Set dst (LoadKlass mem)); ++ ++ ins_cost(125); ++ format %{ "MOV $dst,$mem @ loadKlass" %} ++ ins_encode (load_P_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load narrow Klass Pointer ++instruct loadNKlass(mRegN dst, memory mem) ++%{ ++ match(Set dst (LoadNKlass mem)); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# compressed klass ptr @ loadNKlass" %} ++ ins_encode (load_N_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++instruct loadN2PKlass(mRegP dst, memory mem) ++%{ ++ match(Set dst (DecodeNKlass (LoadNKlass mem))); ++ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %} ++ ins_encode (load_N_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++// Load Constant ++instruct loadConI(mRegI dst, immI src) %{ ++ match(Set dst src); ++ ++ ins_cost(150); ++ format %{ "mov $dst, $src #@loadConI" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ int value = $src$$constant; ++ __ move(dst, value); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct loadConL_set64(mRegL dst, immL src) %{ ++ match(Set dst src); ++ ins_cost(120); ++ format %{ "li $dst, $src @ loadConL_set64" %} ++ ins_encode %{ ++ __ set64($dst$$Register, $src$$constant); ++ %} ++ ins_pipe(ialu_regL_regL); ++%} ++ ++instruct loadConL16(mRegL dst, immL16 src) %{ ++ match(Set dst src); ++ ins_cost(105); ++ format %{ "mov $dst, $src #@loadConL16" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ int value = $src$$constant; ++ __ daddiu(dst_reg, R0, value); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++ ++instruct loadConL_immL_0(mRegL dst, immL_0 src) %{ ++ match(Set dst src); ++ ins_cost(100); ++ format %{ "mov $dst, zero #@loadConL_immL_0" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ __ daddu(dst_reg, R0, R0); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Load Range ++instruct loadRange(mRegI dst, memory mem) %{ ++ match(Set dst (LoadRange mem)); ++ ++ ins_cost(125); ++ format %{ "MOV $dst,$mem @ loadRange" %} ++ ins_encode(load_I_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct storeP(memory mem, mRegP src ) %{ ++ match(Set mem (StoreP mem src)); ++ ++ ins_cost(125); ++ format %{ "sd $src, $mem #@storeP" %} ++ ins_encode(store_P_reg_enc(mem, src)); ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store NULL Pointer, mark word, or other simple pointer constant. ++instruct storeImmP_immP_0(memory mem, immP_0 zero) %{ ++ match(Set mem (StoreP mem zero)); ++ ++ ins_cost(125); ++ format %{ "mov $mem, $zero #@storeImmP_immP_0" %} ++ ins_encode(store_P_immP0_enc(mem)); ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Byte Immediate ++instruct storeImmB(memory mem, immI8 src) %{ ++ match(Set mem (StoreB mem src)); ++ ++ ins_cost(150); ++ format %{ "movb $mem, $src #@storeImmB" %} ++ ins_encode(store_B_immI_enc(mem, src)); ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Compressed Pointer ++instruct storeN(memory mem, mRegN src) ++%{ ++ match(Set mem (StoreN mem src)); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# compressed ptr @ storeN" %} ++ ins_encode(store_N_reg_enc(mem, src)); ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2N(memory mem, mRegP src) ++%{ ++ match(Set mem (StoreN mem (EncodeP src))); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# @ storeP2N" %} ++ ins_encode(store_N_reg_enc(mem, src)); ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeNKlass(memory mem, mRegN src) ++%{ ++ match(Set mem (StoreNKlass mem src)); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# compressed klass ptr @ storeNKlass" %} ++ ins_encode(store_N_reg_enc(mem, src)); ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2NKlass(memory mem, mRegP src) ++%{ ++ match(Set mem (StoreNKlass mem (EncodePKlass src))); ++ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# @ storeP2NKlass" %} ++ ins_encode(store_N_reg_enc(mem, src)); ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeImmN_immN_0(memory mem, immN_0 zero) ++%{ ++ match(Set mem (StoreN mem zero)); ++ ++ ins_cost(125); // XXX ++ format %{ "storeN0 zero, $mem\t# compressed ptr" %} ++ ins_encode(storeImmN0_enc(mem, zero)); ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Byte ++instruct storeB(memory mem, mRegI src) %{ ++ match(Set mem (StoreB mem src)); ++ ++ ins_cost(125); ++ format %{ "sb $src, $mem #@storeB" %} ++ ins_encode(store_B_reg_enc(mem, src)); ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeB_convL2I(memory mem, mRegL src) %{ ++ match(Set mem (StoreB mem (ConvL2I src))); ++ ++ ins_cost(125); ++ format %{ "sb $src, $mem #@storeB_convL2I" %} ++ ins_encode(store_B_reg_enc(mem, src)); ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Byte (8bit signed) ++instruct loadB(mRegI dst, memory mem) %{ ++ match(Set dst (LoadB mem)); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem #@loadB" %} ++ ins_encode(load_B_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadB_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadB mem))); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem #@loadB_convI2L" %} ++ ins_encode(load_B_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Byte (8bit UNsigned) ++instruct loadUB(mRegI dst, memory mem) %{ ++ match(Set dst (LoadUB mem)); ++ ++ ins_cost(125); ++ format %{ "lbu $dst, $mem #@loadUB" %} ++ ins_encode(load_UB_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadUB_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadUB mem))); ++ ++ ins_cost(125); ++ format %{ "lbu $dst, $mem #@loadUB_convI2L" %} ++ ins_encode(load_UB_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Short (16bit signed) ++instruct loadS(mRegI dst, memory mem) %{ ++ match(Set dst (LoadS mem)); ++ ++ ins_cost(125); ++ format %{ "lh $dst, $mem #@loadS" %} ++ ins_encode(load_S_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Short (16 bit signed) to Byte (8 bit signed) ++instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{ ++ match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem\t# short -> byte #@loadS2B" %} ++ ins_encode(load_B_enc(dst, mem)); ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct loadS_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadS mem))); ++ ++ ins_cost(125); ++ format %{ "lh $dst, $mem #@loadS_convI2L" %} ++ ins_encode(load_S_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Store Integer Immediate ++instruct storeImmI(memory mem, immI src) %{ ++ match(Set mem (StoreI mem src)); ++ ++ ins_cost(150); ++ format %{ "mov $mem, $src #@storeImmI" %} ++ ins_encode(store_I_immI_enc(mem, src)); ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Integer ++instruct storeI(memory mem, mRegI src) %{ ++ match(Set mem (StoreI mem src)); ++ ++ ins_cost(125); ++ format %{ "sw $mem, $src #@storeI" %} ++ ins_encode(store_I_reg_enc(mem, src)); ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeI_convL2I(memory mem, mRegL src) %{ ++ match(Set mem (StoreI mem (ConvL2I src))); ++ ++ ins_cost(125); ++ format %{ "sw $mem, $src #@storeI_convL2I" %} ++ ins_encode(store_I_reg_enc(mem, src)); ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Float ++instruct loadF(regF dst, memory mem) %{ ++ match(Set dst (LoadF mem)); ++ ++ ins_cost(150); ++ format %{ "loadF $dst, $mem #@loadF" %} ++ ins_encode(load_F_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadConP_general(mRegP dst, immP src) %{ ++ match(Set dst src); ++ ++ ins_cost(120); ++ format %{ "li $dst, $src #@loadConP_general" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ long* value = (long*)$src$$constant; ++ ++ if($src->constant_reloc() == relocInfo::metadata_type){ ++ int klass_index = __ oop_recorder()->find_index((Klass*)value); ++ RelocationHolder rspec = metadata_Relocation::spec(klass_index); ++ ++ __ relocate(rspec); ++ __ patchable_set48(dst, (long)value); ++ } else if($src->constant_reloc() == relocInfo::oop_type){ ++ int oop_index = __ oop_recorder()->find_index((jobject)value); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ ++ __ relocate(rspec); ++ __ patchable_set48(dst, (long)value); ++ } else if ($src->constant_reloc() == relocInfo::none) { ++ __ set64(dst, (long)value); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{ ++ match(Set dst src); ++ ++ ins_cost(80); ++ format %{ "li $dst, $src @ loadConP_no_oop_cheap" %} ++ ++ ins_encode %{ ++ __ set64($dst$$Register, $src$$constant); ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++ ++instruct loadConP_poll(mRegP dst, immP_poll src) %{ ++ match(Set dst src); ++ ++ ins_cost(50); ++ format %{ "li $dst, $src #@loadConP_poll" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ intptr_t value = (intptr_t)$src$$constant; ++ ++ __ set64(dst, (jlong)value); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConP_immP_0(mRegP dst, immP_0 src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(50); ++ format %{ "mov $dst, R0\t# ptr" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ __ daddu(dst_reg, R0, R0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConN_immN_0(mRegN dst, immN_0 src) %{ ++ match(Set dst src); ++ format %{ "move $dst, R0\t# compressed NULL ptr" %} ++ ins_encode %{ ++ __ move($dst$$Register, R0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConN(mRegN dst, immN src) %{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "li $dst, $src\t# compressed ptr @ loadConN" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ __ set_narrow_oop(dst, (jobject)$src$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); // XXX ++%} ++ ++instruct loadConNKlass(mRegN dst, immNKlass src) %{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "li $dst, $src\t# compressed klass ptr @ loadConNKlass" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ __ set_narrow_klass(dst, (Klass*)$src$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); // XXX ++%} ++ ++//FIXME ++// Tail Call; Jump from runtime stub to Java code. ++// Also known as an 'interprocedural jump'. ++// Target of jump will eventually return to caller. ++// TailJump below removes the return address. ++instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{ ++ match(TailCall jump_target method_oop ); ++ ins_cost(300); ++ format %{ "JMP $jump_target \t# @TailCalljmpInd" %} ++ ++ ins_encode %{ ++ Register target = $jump_target$$Register; ++ Register oop = $method_oop$$Register; ++ ++ // RA will be used in generate_forward_exception() ++ __ push(RA); ++ ++ __ move(S3, oop); ++ __ jr(target); ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++%} ++ ++// Create exception oop: created by stack-crawling runtime code. ++// Created exception is now available to this handler, and is setup ++// just prior to jumping to this handler. No code emitted. ++instruct CreateException( a0_RegP ex_oop ) ++%{ ++ match(Set ex_oop (CreateEx)); ++ ++ // use the following format syntax ++ format %{ "# exception oop is in A0; no code emitted @CreateException" %} ++ ins_encode %{ ++ // X86 leaves this function empty ++ __ block_comment("CreateException is empty in MIPS"); ++ %} ++ ins_pipe( empty ); ++// ins_pipe( pipe_jump ); ++%} ++ ++ ++/* The mechanism of exception handling is clear now. ++ ++- Common try/catch: ++ [stubGenerator_mips.cpp] generate_forward_exception() ++ |- V0, V1 are created ++ |- T9 <= SharedRuntime::exception_handler_for_return_address ++ `- jr T9 ++ `- the caller's exception_handler ++ `- jr OptoRuntime::exception_blob ++ `- here ++- Rethrow(e.g. 'unwind'): ++ * The callee: ++ |- an exception is triggered during execution ++ `- exits the callee method through RethrowException node ++ |- The callee pushes exception_oop(T0) and exception_pc(RA) ++ `- The callee jumps to OptoRuntime::rethrow_stub() ++ * In OptoRuntime::rethrow_stub: ++ |- The VM calls _rethrow_Java to determine the return address in the caller method ++ `- exits the stub with tailjmpInd ++ |- pops exception_oop(V0) and exception_pc(V1) ++ `- jumps to the return address(usually an exception_handler) ++ * The caller: ++ `- continues processing the exception_blob with V0/V1 ++*/ ++ ++// Rethrow exception: ++// The exception oop will come in the first argument position. ++// Then JUMP (not call) to the rethrow stub code. ++instruct RethrowException() ++%{ ++ match(Rethrow); ++ ++ // use the following format syntax ++ format %{ "JMP rethrow_stub #@RethrowException" %} ++ ins_encode %{ ++ __ block_comment("@ RethrowException"); ++ ++ cbuf.set_insts_mark(); ++ cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec()); ++ ++ // call OptoRuntime::rethrow_stub to get the exception handler in parent method ++ __ patchable_jump((address)OptoRuntime::rethrow_stub()); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++// ============================================================================ ++// Branch Instructions --- long offset versions ++ ++// Jump Direct ++instruct jmpDir_long(label labl) %{ ++ match(Goto); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "JMP $labl #@jmpDir_long" %} ++ ++ ins_encode %{ ++ Label* L = $labl$$label; ++ __ jmp_far(*L); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ //ins_pc_relative(1); ++%} ++ ++// Jump Direct Conditional - Label defines a relative address from Jcc+1 ++instruct jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_long" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cop$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++instruct jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = AT; ++ Label* L = $labl$$label; ++ int flag = $cop$$cmpcode; ++ ++ __ move(op2, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++ ++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! ++instruct jmpCon_flags_long(cmpOp cop, FlagsReg cr, label labl) %{ ++ match(If cop cr); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $labl #mips uses T0 as equivalent to eflag @jmpCon_flags_long" %} ++ ++ ins_encode %{ ++ Label* L = $labl$$label; ++ switch($cop$$cmpcode) { ++ case 0x01: //equal ++ __ bne_long($cr$$Register, R0, *L); ++ break; ++ case 0x02: //not equal ++ __ beq_long($cr$$Register, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++// Conditional jumps ++instruct branchConP_zero_long(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConP_zero_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConN2P_zero_long(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConN2P_zero_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) ++ { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConP_long(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{ ++ match(If cmp (CmpP op1 op2)); ++// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); ++ effect(USE labl); ++ ++ ins_cost(200); ++ format %{ "b$cmp $op1, $op2, $labl #@branchConP_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct cmpN_null_branch_long(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ ++ match(If cmp (CmpN op1 null)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,0\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_null_branch_long" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++//TODO: pipe_branchP or create pipe_branchN LEE ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ ++ match(If cmp (CmpN op1 op2)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,$op2\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_reg_branch_long" %} ++ ins_encode %{ ++ Register op1_reg = $op1$$Register; ++ Register op2_reg = $op2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1_reg, op2_reg, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1_reg, op2_reg, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2_reg, op1_reg); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1_reg, op2_reg); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1_reg, op2_reg); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2_reg, op1_reg); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConIU_reg_reg_long(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConIU_reg_imm_long(cmpOpU cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, AT, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, AT, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, AT, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, AT); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, AT); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, AT, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_immI_0_long(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(170); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_immI_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, R0, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, R0, *L); ++ break; ++ case 0x03: //greater ++ __ slt(AT, R0, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //greater_equal ++ __ slt(AT, op1, R0); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //less ++ __ slt(AT, op1, R0); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //less_equal ++ __ slt(AT, R0, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(200); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, AT, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, AT, *L); ++ break; ++ case 0x03: //greater ++ __ slt(AT, AT, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //greater_equal ++ __ slt(AT, op1, AT); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //less ++ __ slt(AT, op1, AT); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //less_equal ++ __ slt(AT, AT, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConIU_reg_immI_0_long(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{ ++ match( If cmp (CmpU src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConIU_reg_immI_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, R0, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, R0, *L); ++ break; ++ case 0x03: //above ++ __ bne_long(R0, op1, *L); ++ break; ++ case 0x04: //above_equal ++ __ beq_long(R0, R0, *L); ++ break; ++ case 0x05: //below ++ return; ++ break; ++ case 0x06: //below_equal ++ __ beq_long(op1, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConIU_reg_immI16_long(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ ins_cost(180); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_immI16_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ move(AT, val); ++ __ beq_long(op1, AT, *L); ++ break; ++ case 0x02: //not_equal ++ __ move(AT, val); ++ __ bne_long(op1, AT, *L); ++ break; ++ case 0x03: //above ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltiu(AT, op1, val); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltiu(AT, op1, val); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_long" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_long" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: // not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++instruct branchConL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match( If cmp (CmpL src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConL_regL_immL_0_long" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = R0; ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match(If cmp (CmpUL src1 zero)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConUL_regL_immL_0_long" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = R0; ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ case 0x04: // greater_equal ++ case 0x06: // less_equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: // not_equal ++ case 0x03: // greater ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x05: // less ++ __ beq_long(R0, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_long" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_long" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: // not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++//FIXME ++instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{ ++ match( If cmp (CmpF src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_long" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_s(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x02: // not_equal ++ __ c_eq_s(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x03: // greater ++ __ c_ule_s(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_s(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x05: // less ++ __ c_ult_s(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_s(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_slow); ++%} ++ ++instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{ ++ match( If cmp (CmpD src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_long" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_d(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x02: // not_equal ++ // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. ++ __ c_eq_d(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x03: // greater ++ __ c_ule_d(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_d(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x05: // less ++ __ c_ult_d(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_d(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_slow); ++%} ++ ++ ++// ============================================================================ ++// Branch Instructions -- short offset versions ++ ++// Jump Direct ++instruct jmpDir_short(label labl) %{ ++ match(Goto); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "JMP $labl #@jmpDir_short" %} ++ ++ ins_encode %{ ++ Label &L = *($labl$$label); ++ if(&L) ++ __ b(L); ++ else ++ __ b(int(0)); ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++// Jump Direct Conditional - Label defines a relative address from Jcc+1 ++instruct jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_short" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cop$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++instruct jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = AT; ++ Label &L = *($labl$$label); ++ int flag = $cop$$cmpcode; ++ ++ __ move(op2, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++ ++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! ++instruct jmpCon_flags_short(cmpOp cop, FlagsReg cr, label labl) %{ ++ match(If cop cr); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $labl #mips uses T0 as equivalent to eflag @jmpCon_flags_short" %} ++ ++ ins_encode %{ ++ Label &L = *($labl$$label); ++ switch($cop$$cmpcode) { ++ case 0x01: //equal ++ if (&L) ++ __ bne($cr$$Register, R0, L); ++ else ++ __ bne($cr$$Register, R0, (int)0); ++ break; ++ case 0x02: //not equal ++ if (&L) ++ __ beq($cr$$Register, R0, L); ++ else ++ __ beq($cr$$Register, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++// Conditional jumps ++instruct branchConP_zero_short(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConP_zero_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConN2P_zero_short(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConN2P_zero_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) ++ { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConP_short(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{ ++ match(If cmp (CmpP op1 op2)); ++// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); ++ effect(USE labl); ++ ++ ins_cost(200); ++ format %{ "b$cmp $op1, $op2, $labl #@branchConP_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ ++ match(If cmp (CmpN op1 null)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,0\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_null_branch_short" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++//TODO: pipe_branchP or create pipe_branchN LEE ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ ++ match(If cmp (CmpN op1 op2)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,$op2\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_reg_branch_short" %} ++ ins_encode %{ ++ Register op1_reg = $op1$$Register; ++ Register op2_reg = $op2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1_reg, op2_reg, L); ++ else ++ __ beq(op1_reg, op2_reg, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1_reg, op2_reg, L); ++ else ++ __ bne(op1_reg, op2_reg, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2_reg, op1_reg); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1_reg, op2_reg); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1_reg, op2_reg); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2_reg, op1_reg); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConIU_reg_reg_short(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConIU_reg_imm_short(cmpOpU cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, AT, L); ++ else ++ __ beq(op1, AT, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, AT, L); ++ else ++ __ bne(op1, AT, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, AT); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, AT); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConI_reg_immI_0_short(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(170); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_immI_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, R0, L); ++ else ++ __ beq(op1, R0, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, R0, L); ++ else ++ __ bne(op1, R0, (int)0); ++ break; ++ case 0x03: //greater ++ if(&L) ++ __ bgtz(op1, L); ++ else ++ __ bgtz(op1, (int)0); ++ break; ++ case 0x04: //greater_equal ++ if(&L) ++ __ bgez(op1, L); ++ else ++ __ bgez(op1, (int)0); ++ break; ++ case 0x05: //less ++ if(&L) ++ __ bltz(op1, L); ++ else ++ __ bltz(op1, (int)0); ++ break; ++ case 0x06: //less_equal ++ if(&L) ++ __ blez(op1, L); ++ else ++ __ blez(op1, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(200); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, AT, L); ++ else ++ __ beq(op1, AT, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, AT, L); ++ else ++ __ bne(op1, AT, (int)0); ++ break; ++ case 0x03: //greater ++ __ slt(AT, AT, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //greater_equal ++ __ slt(AT, op1, AT); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //less ++ __ slt(AT, op1, AT); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //less_equal ++ __ slt(AT, AT, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConIU_reg_immI_0_short(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{ ++ match( If cmp (CmpU src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConIU_reg_immI_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, R0, L); ++ else ++ __ beq(op1, R0, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, R0, L); ++ else ++ __ bne(op1, R0, (int)0); ++ break; ++ case 0x03: //above ++ if(&L) ++ __ bne(R0, op1, L); ++ else ++ __ bne(R0, op1, (int)0); ++ break; ++ case 0x04: //above_equal ++ if(&L) ++ __ beq(R0, R0, L); ++ else ++ __ beq(R0, R0, (int)0); ++ break; ++ case 0x05: //below ++ return; ++ break; ++ case 0x06: //below_equal ++ if(&L) ++ __ beq(op1, R0, L); ++ else ++ __ beq(op1, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConIU_reg_immI16_short(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ ins_cost(180); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_immI16_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ move(AT, val); ++ if (&L) ++ __ beq(op1, AT, L); ++ else ++ __ beq(op1, AT, (int)0); ++ break; ++ case 0x02: //not_equal ++ __ move(AT, val); ++ if (&L) ++ __ bne(op1, AT, L); ++ else ++ __ bne(op1, AT, (int)0); ++ break; ++ case 0x03: //above ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltiu(AT, op1, val); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltiu(AT, op1, val); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_short" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x02: //not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match( If cmp (CmpUL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_short" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x02: // not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match( If cmp (CmpL src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConL_regL_immL_0_short" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beq(opr1_reg, R0, target); ++ else ++ __ beq(opr1_reg, R0, int(0)); ++ break; ++ ++ case 0x02: //not_equal ++ if(&target) ++ __ bne(opr1_reg, R0, target); ++ else ++ __ bne(opr1_reg, R0, (int)0); ++ break; ++ ++ case 0x03: //greater ++ if(&target) ++ __ bgtz(opr1_reg, target); ++ else ++ __ bgtz(opr1_reg, (int)0); ++ break; ++ ++ case 0x04: //greater_equal ++ if(&target) ++ __ bgez(opr1_reg, target); ++ else ++ __ bgez(opr1_reg, (int)0); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, R0); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x06: //less_equal ++ if (&target) ++ __ blez(opr1_reg, target); ++ else ++ __ blez(opr1_reg, int(0)); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match(If cmp (CmpUL src1 zero)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConUL_regL_immL_0_short" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ case 0x04: // greater_equal ++ case 0x06: // less_equal ++ if (&target) ++ __ beq(opr1_reg, R0, target); ++ else ++ __ beq(opr1_reg, R0, int(0)); ++ break; ++ ++ case 0x02: // not_equal ++ case 0x03: // greater ++ if(&target) ++ __ bne(opr1_reg, R0, target); ++ else ++ __ bne(opr1_reg, R0, (int)0); ++ break; ++ ++ case 0x05: // less ++ if(&target) ++ __ beq(R0, R0, target); ++ else ++ __ beq(R0, R0, (int)0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_short" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x02: //not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_short" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: // equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x02: // not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++//FIXME ++instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{ ++ match( If cmp (CmpF src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_short" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label& L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_s(reg_op1, reg_op2); ++ if (&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x02: // not_equal ++ __ c_eq_s(reg_op1, reg_op2); ++ if (&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x03: // greater ++ __ c_ule_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x05: // less ++ __ c_ult_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_fpu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{ ++ match( If cmp (CmpD src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_short" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label& L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_d(reg_op1, reg_op2); ++ if (&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x02: // not_equal ++ // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. ++ __ c_eq_d(reg_op1, reg_op2); ++ if (&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x03: // greater ++ __ c_ule_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x05: // less ++ __ c_ult_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_fpu_branch); ++ ins_short_branch(1); ++%} ++ ++// =================== End of branch instructions ========================== ++ ++// Call Runtime Instruction ++instruct CallRuntimeDirect(method meth) %{ ++ match(CallRuntime ); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL,runtime #@CallRuntimeDirect" %} ++ ins_encode( Java_To_Runtime( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_alignment(16); ++%} ++ ++ ++ ++//------------------------MemBar Instructions------------------------------- ++//Memory barrier flavors ++ ++instruct membar_acquire() %{ ++ match(MemBarAcquire); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-acquire @ membar_acquire" %} ++ ins_encode %{ ++ __ sync(); ++ %} ++ ins_pipe(empty); ++%} ++ ++instruct load_fence() %{ ++ match(LoadFence); ++ ins_cost(400); ++ ++ format %{ "MEMBAR @ load_fence" %} ++ ins_encode %{ ++ __ sync(); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_acquire_lock() ++%{ ++ match(MemBarAcquireLock); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %} ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct membar_release() %{ ++ match(MemBarRelease); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-release @ membar_release" %} ++ ++ ins_encode %{ ++ // Attention: DO NOT DELETE THIS GUY! ++ __ sync(); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct store_fence() %{ ++ match(StoreFence); ++ ins_cost(400); ++ ++ format %{ "MEMBAR @ store_fence" %} ++ ++ ins_encode %{ ++ __ sync(); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_release_lock() ++%{ ++ match(MemBarReleaseLock); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %} ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++ ++instruct membar_volatile() %{ ++ match(MemBarVolatile); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-volatile" %} ++ ins_encode %{ ++ if( !os::is_MP() ) return; // Not needed on single CPU ++ __ sync(); ++ ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct unnecessary_membar_volatile() %{ ++ match(MemBarVolatile); ++ predicate(Matcher::post_store_load_barrier(n)); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-volatile (unnecessary so empty encoding) @ unnecessary_membar_volatile" %} ++ ins_encode( ); ++ ins_pipe(empty); ++%} ++ ++instruct membar_storestore() %{ ++ match(MemBarStoreStore); ++ ++ ins_cost(400); ++ format %{ "MEMBAR-storestore @ membar_storestore" %} ++ ins_encode %{ ++ __ sync(); ++ %} ++ ins_pipe(empty); ++%} ++ ++//----------Move Instructions-------------------------------------------------- ++instruct castX2P(mRegP dst, mRegL src) %{ ++ match(Set dst (CastX2P src)); ++ format %{ "castX2P $dst, $src @ castX2P" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ if(src != dst) ++ __ move(dst, src); ++ %} ++ ins_cost(10); ++ ins_pipe( ialu_regI_mov ); ++%} ++ ++instruct castP2X(mRegL dst, mRegP src ) %{ ++ match(Set dst (CastP2X src)); ++ ++ format %{ "mov $dst, $src\t #@castP2X" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ if(src != dst) ++ __ move(dst, src); ++ %} ++ ins_pipe( ialu_regI_mov ); ++%} ++ ++instruct MoveF2I_reg_reg(mRegI dst, regF src) %{ ++ match(Set dst (MoveF2I src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveF2I $dst, $src @ MoveF2I_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ __ mfc1(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveI2F_reg_reg(regF dst, mRegI src) %{ ++ match(Set dst (MoveI2F src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveI2F $dst, $src @ MoveI2F_reg_reg" %} ++ ins_encode %{ ++ Register src = as_Register($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ mtc1(src, dst); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveD2L_reg_reg(mRegL dst, regD src) %{ ++ match(Set dst (MoveD2L src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveD2L $dst, $src @ MoveD2L_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ __ dmfc1(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveL2D_reg_reg(regD dst, mRegL src) %{ ++ match(Set dst (MoveL2D src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveL2D $dst, $src @ MoveL2D_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ dmtc1(src, dst); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++//----------Conditional Move--------------------------------------------------- ++// Conditional move ++instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpI_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpI_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovN_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovN_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovN_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovN_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpI_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpI_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovD_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovF_cmpI_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovF_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpI_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovD_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpP_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovD_cmpP_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++//FIXME ++instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovF_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovF_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// Manifest a CmpL result in an integer register. Very painful. ++// This is the test to avoid. ++instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (CmpL3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpL3 $dst, $src1, $src2 @ cmpL3_reg_reg" %} ++ ins_encode %{ ++ Register opr1 = as_Register($src1$$reg); ++ Register opr2 = as_Register($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ Label Done; ++ ++ __ subu(AT, opr1, opr2); ++ __ bltz(AT, Done); ++ __ delayed()->daddiu(dst, R0, -1); ++ ++ __ move(dst, 1); ++ __ movz(dst, R0, AT); ++ ++ __ bind(Done); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ++// less_rsult = -1 ++// greater_result = 1 ++// equal_result = 0 ++// nan_result = -1 ++// ++instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{ ++ match(Set dst (CmpF3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpF3 $dst, $src1, $src2 @ cmpF3_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ Label Done; ++ ++ __ c_ult_s(src1, src2); ++ __ bc1t(Done); ++ __ delayed()->daddiu(dst, R0, -1); ++ ++ __ c_eq_s(src1, src2); ++ __ move(dst, 1); ++ __ movt(dst, R0); ++ ++ __ bind(Done); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{ ++ match(Set dst (CmpD3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpD3 $dst, $src1, $src2 @ cmpD3_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ Label Done; ++ ++ __ c_ult_d(src1, src2); ++ __ bc1t(Done); ++ __ delayed()->daddiu(dst, R0, -1); ++ ++ __ c_eq_d(src1, src2); ++ __ move(dst, 1); ++ __ movt(dst, R0); ++ ++ __ bind(Done); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct clear_array(mRegL cnt, mRegP base, Universe dummy) %{ ++ match(Set dummy (ClearArray cnt base)); ++ format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %} ++ ins_encode %{ ++ //Assume cnt is the number of bytes in an array to be cleared, ++ //and base points to the starting address of the array. ++ Register base = $base$$Register; ++ Register num = $cnt$$Register; ++ Label Loop, done; ++ ++ __ beq(num, R0, done); ++ __ delayed()->daddu(AT, base, R0); ++ ++ __ move(T9, num); /* T9 = words */ ++ ++ __ bind(Loop); ++ __ sd(R0, AT, 0); ++ __ daddiu(T9, T9, -1); ++ __ bne(T9, R0, Loop); ++ __ delayed()->daddiu(AT, AT, wordSize); ++ ++ __ bind(done); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compare(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compare" %} ++ ins_encode %{ ++ // Get the first character position in both strings ++ // [8] char array, [12] offset, [16] count ++ Register str1 = $str1$$Register; ++ Register str2 = $str2$$Register; ++ Register cnt1 = $cnt1$$Register; ++ Register cnt2 = $cnt2$$Register; ++ Register result = $result$$Register; ++ ++ Label L, Loop, haveResult, done; ++ ++ // compute the and difference of lengths (in result) ++ __ subu(result, cnt1, cnt2); // result holds the difference of two lengths ++ ++ // compute the shorter length (in cnt1) ++ __ slt(AT, cnt2, cnt1); ++ __ movn(cnt1, cnt2, AT); ++ ++ // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register ++ __ bind(Loop); // Loop begin ++ __ beq(cnt1, R0, done); ++ __ delayed()->lhu(AT, str1, 0);; ++ ++ // compare current character ++ __ lhu(cnt2, str2, 0); ++ __ bne(AT, cnt2, haveResult); ++ __ delayed()->addiu(str1, str1, 2); ++ __ addiu(str2, str2, 2); ++ __ b(Loop); ++ __ delayed()->addiu(cnt1, cnt1, -1); // Loop end ++ ++ __ bind(haveResult); ++ __ subu(result, AT, cnt2); ++ ++ __ bind(done); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// intrinsic optimization ++instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, mA7RegI temp, no_Ax_mRegI result) %{ ++ match(Set result (StrEquals (Binary str1 str2) cnt)); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL temp); ++ ++ format %{ "String Equal $str1, $str2, len:$cnt tmp:$temp -> $result @ string_equals" %} ++ ins_encode %{ ++ // Get the first character position in both strings ++ // [8] char array, [12] offset, [16] count ++ Register str1 = $str1$$Register; ++ Register str2 = $str2$$Register; ++ Register cnt = $cnt$$Register; ++ Register tmp = $temp$$Register; ++ Register result = $result$$Register; ++ ++ Label Loop, True, False; ++ ++ __ beq(str1, str2, True); // same char[] ? ++ __ delayed()->daddiu(result, R0, 1); ++ ++ __ beq(cnt, R0, True); ++ __ delayed()->nop(); // count == 0 ++ ++ __ bind(Loop); ++ ++ // compare current character ++ __ lhu(AT, str1, 0); ++ __ lhu(tmp, str2, 0); ++ __ bne(AT, tmp, False); ++ __ delayed()->addiu(str1, str1, 2); ++ __ addiu(cnt, cnt, -1); ++ __ bne(cnt, R0, Loop); ++ __ delayed()->addiu(str2, str2, 2); ++ ++ __ b(True); ++ __ delayed()->nop(); ++ ++ __ bind(False); ++ __ daddiu(result, R0, 0); ++ ++ __ bind(True); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++//----------Arithmetic Instructions------------------------------------------- ++//----------Addition Instructions--------------------------------------------- ++instruct addI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ format %{ "addu $dst, $src1, $src2 #@addI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ addu32(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addI_Reg_imm(mRegI dst, mRegI src1, immI src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ format %{ "addu $dst, $src1, $src2 #@addI_Reg_imm" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ int imm = $src2$$constant; ++ ++ if(Assembler::is_simm16(imm)) { ++ __ addiu32(dst, src1, imm); ++ } else { ++ __ move(AT, imm); ++ __ addu32(dst, src1, AT); ++ } ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_reg(mRegP dst, mRegP src1, mRegL src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ++ format %{ "daddu $dst, $src1, $src2 #@addP_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ daddu(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_reg_convI2L(mRegP dst, mRegP src1, mRegI src2) %{ ++ match(Set dst (AddP src1 (ConvI2L src2))); ++ ++ format %{ "daddu $dst, $src1, $src2 #@addP_reg_reg_convI2L" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ daddu(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_imm(mRegP dst, mRegP src1, immL src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ++ format %{ "daddiu $dst, $src1, $src2 #@addP_reg_imm" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ long src2 = $src2$$constant; ++ Register dst = $dst$$Register; ++ ++ if(Assembler::is_simm16(src2)) { ++ __ daddiu(dst, src1, src2); ++ } else { ++ __ set64(AT, src2); ++ __ daddu(dst, src1, AT); ++ } ++ %} ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++// Add Long Register with Register ++instruct addL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (AddL src1 src2)); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_Reg\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_Reg_imm(mRegL dst, mRegL src1, immL16 src2) ++%{ ++ match(Set dst (AddL src1 src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_imm " %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ int src2_imm = $src2$$constant; ++ ++ __ daddiu(dst_reg, src1_reg, src2_imm); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_RegI2L_imm(mRegL dst, mRegI src1, immL16 src2) ++%{ ++ match(Set dst (AddL (ConvI2L src1) src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_imm " %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ int src2_imm = $src2$$constant; ++ ++ __ daddiu(dst_reg, src1_reg, src2_imm); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{ ++ match(Set dst (AddL (ConvI2L src1) src2)); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_Reg\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AddL (ConvI2L src1) (ConvI2L src2))); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_RegI2L\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (AddL src1 (ConvI2L src2))); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_RegI2L\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++//----------Subtraction Instructions------------------------------------------- ++// Integer Subtraction Instructions ++instruct subI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ins_cost(100); ++ ++ format %{ "subu $dst, $src1, $src2 #@subI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ subu32(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct subI_Reg_immI_M32767_32768(mRegI dst, mRegI src1, immI_M32767_32768 src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ins_cost(80); ++ ++ format %{ "subu $dst, $src1, $src2 #@subI_Reg_immI_M32767_32768" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ __ addiu32(dst, src1, -1 * $src2$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct negI_Reg(mRegI dst, immI_0 zero, mRegI src) %{ ++ match(Set dst (SubI zero src)); ++ ins_cost(80); ++ ++ format %{ "neg $dst, $src #@negI_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ __ subu32(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct negL_Reg(mRegL dst, immL_0 zero, mRegL src) %{ ++ match(Set dst (SubL zero src)); ++ ins_cost(80); ++ ++ format %{ "neg $dst, $src #@negL_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ __ subu(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct subL_Reg_immL_M32767_32768(mRegL dst, mRegL src1, immL_M32767_32768 src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(80); ++ ++ format %{ "subu $dst, $src1, $src2 #@subL_Reg_immL_M32767_32768" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ __ daddiu(dst, src1, -1 * $src2$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Subtract Long Register with Register. ++instruct subL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(100); ++ format %{ "SubL $dst, $src1, $src2 @ subL_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct subL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (SubL src1 (ConvI2L src2))); ++ ins_cost(100); ++ format %{ "SubL $dst, $src1, $src2 @ subL_Reg_RegI2L" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct subL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{ ++ match(Set dst (SubL (ConvI2L src1) src2)); ++ ins_cost(200); ++ format %{ "SubL $dst, $src1, $src2 @ subL_RegI2L_Reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct subL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (SubL (ConvI2L src1) (ConvI2L src2))); ++ ins_cost(200); ++ format %{ "SubL $dst, $src1, $src2 @ subL_RegI2L_RegI2L" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Integer MOD with Register ++instruct modI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (ModI src1 src2)); ++ ins_cost(300); ++ format %{ "modi $dst, $src1, $src2 @ modI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ //if (UseLEXT1) { ++ if (0) { ++ // Experiments show that gsmod is slower that div+mfhi. ++ // So I just disable it here. ++ __ gsmod(dst, src1, src2); ++ } else { ++ __ div(src1, src2); ++ __ mfhi(dst); ++ } ++ %} ++ ++ //ins_pipe( ialu_mod ); ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct modL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (ModL src1 src2)); ++ format %{ "modL $dst, $src1, $src2 @modL_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsdmod(dst, op1, op2); ++ } else { ++ __ ddiv(op1, op2); ++ __ mfhi(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (MulI src1 src2)); ++ ++ ins_cost(300); ++ format %{ "mul $dst, $src1, $src2 @ mulI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ ++ __ mul(dst, src1, src2); ++ %} ++ ins_pipe( ialu_mult ); ++%} ++ ++instruct maddI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2, mRegI src3) %{ ++ match(Set dst (AddI (MulI src1 src2) src3)); ++ ++ ins_cost(999); ++ format %{ "madd $dst, $src1 * $src2 + $src3 #@maddI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register src3 = $src3$$Register; ++ Register dst = $dst$$Register; ++ ++ __ mtlo(src3); ++ __ madd(src1, src2); ++ __ mflo(dst); ++ %} ++ ins_pipe( ialu_mult ); ++%} ++ ++instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (DivI src1 src2)); ++ ++ ins_cost(300); ++ format %{ "div $dst, $src1, $src2 @ divI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ ++ // In MIPS, div does not cause exception. ++ // We must trap an exception manually. ++ __ teq(R0, src2, 0x7); ++ ++ if (UseLEXT1) { ++ __ gsdiv(dst, src1, src2); ++ } else { ++ __ div(src1, src2); ++ ++ __ nop(); ++ __ nop(); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( ialu_mod ); ++%} ++ ++instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (DivF src1 src2)); ++ ++ ins_cost(300); ++ format %{ "divF $dst, $src1, $src2 @ divF_Reg_Reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ /* Here do we need to trap an exception manually ? */ ++ __ div_s(dst, src1, src2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (DivD src1 src2)); ++ ++ ins_cost(300); ++ format %{ "divD $dst, $src1, $src2 @ divD_Reg_Reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ /* Here do we need to trap an exception manually ? */ ++ __ div_d(dst, src1, src2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (MulL src1 src2)); ++ format %{ "mulL $dst, $src1, $src2 @mulL_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsdmult(dst, op1, op2); ++ } else { ++ __ dmult(op1, op2); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulL_reg_regI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (MulL src1 (ConvI2L src2))); ++ format %{ "mulL $dst, $src1, $src2 @mulL_reg_regI2L" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsdmult(dst, op1, op2); ++ } else { ++ __ dmult(op1, op2); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (DivL src1 src2)); ++ format %{ "divL $dst, $src1, $src2 @divL_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsddiv(dst, op1, op2); ++ } else { ++ __ ddiv(op1, op2); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (AddF src1 src2)); ++ format %{ "AddF $dst, $src1, $src2 @addF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ add_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (SubF src1 src2)); ++ format %{ "SubF $dst, $src1, $src2 @subF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sub_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (AddD src1 src2)); ++ format %{ "AddD $dst, $src1, $src2 @addD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ add_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (SubD src1 src2)); ++ format %{ "SubD $dst, $src1, $src2 @subD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sub_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct negF_reg(regF dst, regF src) %{ ++ match(Set dst (NegF src)); ++ format %{ "negF $dst, $src @negF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ neg_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct negD_reg(regD dst, regD src) %{ ++ match(Set dst (NegD src)); ++ format %{ "negD $dst, $src @negD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ neg_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (MulF src1 src2)); ++ format %{ "MULF $dst, $src1, $src2 @mulF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mul_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ ++ match(Set dst (AddF (MulF src1 src2) src3)); ++ // For compatibility reason (e.g. on the Loongson platform), disable this guy. ++ ins_cost(44444); ++ format %{ "maddF $dst, $src1, $src2, $src3 @maddF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister src3 = $src3$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ madd_s(dst, src1, src2, src3); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// Mul two double precision floating piont number ++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (MulD src1 src2)); ++ format %{ "MULD $dst, $src1, $src2 @mulD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mul_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ ++ match(Set dst (AddD (MulD src1 src2) src3)); ++ // For compatibility reason (e.g. on the Loongson platform), disable this guy. ++ ins_cost(44444); ++ format %{ "maddD $dst, $src1, $src2, $src3 @maddD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister src3 = $src3$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ madd_d(dst, src1, src2, src3); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct absF_reg(regF dst, regF src) %{ ++ match(Set dst (AbsF src)); ++ ins_cost(100); ++ format %{ "absF $dst, $src @absF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ abs_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++// intrinsics for math_native. ++// AbsD SqrtD CosD SinD TanD LogD Log10D ++ ++instruct absD_reg(regD dst, regD src) %{ ++ match(Set dst (AbsD src)); ++ ins_cost(100); ++ format %{ "absD $dst, $src @absD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ abs_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct sqrtD_reg(regD dst, regD src) %{ ++ match(Set dst (SqrtD src)); ++ ins_cost(100); ++ format %{ "SqrtD $dst, $src @sqrtD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sqrt_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct sqrtF_reg(regF dst, regF src) %{ ++ match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); ++ ins_cost(100); ++ format %{ "SqrtF $dst, $src @sqrtF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sqrt_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++//----------------------------------Logical Instructions---------------------- ++//__________________________________Integer Logical Instructions------------- ++ ++//And Instuctions ++// And Register with Immediate ++instruct andI_Reg_immI(mRegI dst, mRegI src1, immI src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_immI" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ move(AT, val); ++ __ andr(dst, src, AT); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andI_Reg_imm_0_65535(mRegI dst, mRegI src1, immI_0_65535 src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1, immI_nonneg_mask mask) %{ ++ match(Set dst (AndI src1 mask)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int size = Assembler::is_int_mask($mask$$constant); ++ ++ __ ext(dst, src, 0, size); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1, immL_nonneg_mask mask) %{ ++ match(Set dst (AndL src1 mask)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int size = Assembler::is_jlong_mask($mask$$constant); ++ ++ __ dext(dst, src, 0, size); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorI_Reg_imm_0_65535(mRegI dst, mRegI src1, immI_0_65535 src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ins_cost(60); ++ ++ format %{ "xori $dst, $src1, $src2 #@xorI_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ xori(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorI_Reg_immI_M1(mRegI dst, mRegI src1, immI_M1 M1) %{ ++ match(Set dst (XorI src1 M1)); ++ predicate(UseLEXT3); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorI_Reg_immI_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ gsorn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorL2I_Reg_immI_M1(mRegI dst, mRegL src1, immI_M1 M1) %{ ++ match(Set dst (XorI (ConvL2I src1) M1)); ++ predicate(UseLEXT3); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorL2I_Reg_immI_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ gsorn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorL_Reg_imm_0_65535(mRegL dst, mRegL src1, immL_0_65535 src2) %{ ++ match(Set dst (XorL src1 src2)); ++ ins_cost(60); ++ ++ format %{ "xori $dst, $src1, $src2 #@xorL_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ xori(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++/* ++instruct xorL_Reg_immL_M1(mRegL dst, mRegL src1, immL_M1 M1) %{ ++ match(Set dst (XorL src1 M1)); ++ predicate(UseLEXT3); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorL_Reg_immL_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ gsorn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++instruct lbu_and_lmask(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI mask (LoadB mem))); ++ ins_cost(60); ++ ++ format %{ "lhu $dst, $mem #@lbu_and_lmask" %} ++ ins_encode(load_UB_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct lbu_and_rmask(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI (LoadB mem) mask)); ++ ins_cost(60); ++ ++ format %{ "lhu $dst, $mem #@lbu_and_rmask" %} ++ ins_encode(load_UB_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct andI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ andr(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andnI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (AndI src1 (XorI src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src1, $src2 #@andnI_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct ornI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (OrI src1 (XorI src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src1, $src2 #@ornI_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andnI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (AndI (XorI src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src2, $src1 #@andnI_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct ornI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (OrI (XorI src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src2, $src1 #@ornI_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// And Long Register with Register ++instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (AndL src1 src2)); ++ format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ andr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct andL_Reg_Reg_convI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (AndL src1 (ConvI2L src2))); ++ format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg_convI2L\n\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ andr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct andL_Reg_imm_0_65535(mRegL dst, mRegL src1, immL_0_65535 src2) %{ ++ match(Set dst (AndL src1 src2)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andL_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ long val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL2I_Reg_imm_0_65535(mRegI dst, mRegL src1, immL_0_65535 src2) %{ ++ match(Set dst (ConvL2I (AndL src1 src2))); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andL2I_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ long val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++/* ++instruct andnL_Reg_nReg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (AndL src1 (XorL src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src1, $src2 #@andnL_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++/* ++instruct ornL_Reg_nReg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (OrL src1 (XorL src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src1, $src2 #@ornL_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++/* ++instruct andnL_nReg_Reg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (AndL (XorL src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src2, $src1 #@andnL_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++/* ++instruct ornL_nReg_Reg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (OrL (XorL src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src2, $src1 #@ornL_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++instruct andL_Reg_immL_M8(mRegL dst, immL_M8 M8) %{ ++ match(Set dst (AndL dst M8)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M8 #@andL_Reg_immL_M8" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 0, 3); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M5(mRegL dst, immL_M5 M5) %{ ++ match(Set dst (AndL dst M5)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M5 #@andL_Reg_immL_M5" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 2, 1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M7(mRegL dst, immL_M7 M7) %{ ++ match(Set dst (AndL dst M7)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M7 #@andL_Reg_immL_M7" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 1, 2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M4(mRegL dst, immL_M4 M4) %{ ++ match(Set dst (AndL dst M4)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M4 #@andL_Reg_immL_M4" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 0, 2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M121(mRegL dst, immL_M121 M121) %{ ++ match(Set dst (AndL dst M121)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M121 #@andL_Reg_immL_M121" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 3, 4); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Long Register with Register ++instruct orL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (OrL src1 src2)); ++ format %{ "OR $dst, $src1, $src2 @ orL_Reg_Reg\t" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ Register src1_reg = $src1$$Register; ++ Register src2_reg = $src2$$Register; ++ ++ __ orr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegL src2) %{ ++ match(Set dst (OrL (CastP2X src1) src2)); ++ format %{ "OR $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ Register src1_reg = $src1$$Register; ++ Register src2_reg = $src2$$Register; ++ ++ __ orr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Xor Long Register with Register ++instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (XorL src1 src2)); ++ format %{ "XOR $dst, $src1, $src2 @ xorL_Reg_Reg\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ xorr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Left by 8-bit immediate ++instruct salI_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ ++ match(Set dst (LShiftI src shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ sll(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct salL2I_Reg_imm(mRegI dst, mRegL src, immI8 shift) %{ ++ match(Set dst (LShiftI (ConvL2I src) shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salL2I_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ sll(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{ ++ match(Set dst (AndI (LShiftI src shift) mask)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_imm_and_M65536" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ sll(dst, src, 16); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen) ++%{ ++ match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen)); ++ ++ format %{ "andi $dst, $src, 7\t# @land7_2_s" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ andi(dst, src, 7); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. ++// This idiom is used by the compiler the i2s bytecode. ++instruct i2s(mRegI dst, mRegI src, immI_16 sixteen) ++%{ ++ match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); ++ ++ format %{ "i2s $dst, $src\t# @i2s" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ seh(dst, src); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. ++// This idiom is used by the compiler for the i2b bytecode. ++instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour) ++%{ ++ match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); ++ ++ format %{ "i2b $dst, $src\t# @i2b" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ seb(dst, src); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++ ++instruct salI_RegL2I_imm(mRegI dst, mRegL src, immI8 shift) %{ ++ match(Set dst (LShiftI (ConvL2I src) shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_RegL2I_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ sll(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Shift Left by 8-bit immediate ++instruct salI_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (LShiftI src shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shamt = $shift$$Register; ++ __ sllv(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++// Shift Left Long ++instruct salL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{ ++ match(Set dst (LShiftL src shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_Reg_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ if (__ is_simm(shamt, 5)) ++ __ dsll(dst_reg, src_reg, shamt); ++ else { ++ int sa = Assembler::low(shamt, 6); ++ if (sa < 32) { ++ __ dsll(dst_reg, src_reg, sa); ++ } else { ++ __ dsll32(dst_reg, src_reg, sa - 32); ++ } ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct salL_RegI2L_imm(mRegL dst, mRegI src, immI8 shift) %{ ++ match(Set dst (LShiftL (ConvI2L src) shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_RegI2L_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ if (__ is_simm(shamt, 5)) ++ __ dsll(dst_reg, src_reg, shamt); ++ else { ++ int sa = Assembler::low(shamt, 6); ++ if (sa < 32) { ++ __ dsll(dst_reg, src_reg, sa); ++ } else { ++ __ dsll32(dst_reg, src_reg, sa - 32); ++ } ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Left Long ++instruct salL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ ++ match(Set dst (LShiftL src shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ dsllv(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long ++instruct sarL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{ ++ match(Set dst (RShiftL src shift)); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL_Reg_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = ($shift$$constant & 0x3f); ++ if (__ is_simm(shamt, 5)) ++ __ dsra(dst_reg, src_reg, shamt); ++ else { ++ int sa = Assembler::low(shamt, 6); ++ if (sa < 32) { ++ __ dsra(dst_reg, src_reg, sa); ++ } else { ++ __ dsra32(dst_reg, src_reg, sa - 32); ++ } ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegL src, immI_32_63 shift) %{ ++ match(Set dst (ConvL2I (RShiftL src shift))); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsra32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long arithmetically ++instruct sarL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ ++ match(Set dst (RShiftL src shift)); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ dsrav(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long logically ++instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(100); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ dsrlv(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegL src, immI_0_31 shift, immI_MaxI max_int) %{ ++ match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int)); ++ ins_cost(80); ++ format %{ "dext $dst, $src, $shift, 31 @ slrL_Reg_immI_0_31_and_max_int" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dext(dst_reg, src_reg, shamt, 31); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{ ++ match(Set dst (URShiftL (CastP2X src) shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_convL2I(mRegI dst, mRegL src, immI_32_63 shift) %{ ++ match(Set dst (ConvL2I (URShiftL src shift))); ++ predicate(n->in(1)->in(2)->get_int() > 32); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_convL2I" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{ ++ match(Set dst (URShiftL (CastP2X src) shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Xor Instructions ++// Xor Register with Register ++instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ++ format %{ "XOR $dst, $src1, $src2 #@xorI_Reg_Reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ xorr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Instructions ++instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_32767 src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_imm" %} ++ ins_encode %{ ++ __ ori($dst$$Register, $src1$$Register, $src2$$constant); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++// Or Register with Register ++instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ orr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{ ++ match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift))); ++ predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()))); ++ ++ format %{ "rotr $dst, $src, 1 ...\n\t" ++ "srl $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int rshift = $rshift$$constant; ++ ++ __ rotr(dst, src, 1); ++ if (rshift - 1) { ++ __ srl(dst, dst, rshift - 1); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{ ++ match(Set dst (OrI src1 (CastP2X src2))); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_castP2X" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ orr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Logical Shift Right by 8-bit immediate ++instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ ++ match(Set dst (URShiftI src shift)); ++ //effect(KILL cr); ++ ++ format %{ "SRL $dst, $src, $shift #@shr_logical_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shift = $shift$$constant; ++ ++ __ srl(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{ ++ match(Set dst (AndI (URShiftI src shift) mask)); ++ ++ format %{ "ext $dst, $src, $shift, one-bits($mask) #@shr_logical_Reg_imm_nonneg_mask" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int pos = $shift$$constant; ++ int size = Assembler::is_int_mask($mask$$constant); ++ ++ __ ext(dst, src, pos, size); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolI_Reg_immI_0_31(mRegI dst, immI_0_31 lshift, immI_0_31 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); ++ match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $dst, $rshift #@rolI_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotr(dst, dst, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolL_Reg_immI_0_31(mRegL dst, mRegL src, immI_32_63 lshift, immI_0_31 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rolL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolL_Reg_immI_32_63(mRegL dst, mRegL src, immI_0_31 lshift, immI_32_63 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rolL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr32(dst, src, sa - 32); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); ++ match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rorI_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotr(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 rshift, immI_32_63 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rorL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 rshift, immI_0_31 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rorL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr32(dst, src, sa - 32); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Logical Shift Right ++instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (URShiftI src shift)); ++ ++ format %{ "SRL $dst, $src, $shift #@shr_logical_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shift = $shift$$Register; ++ __ srlv(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ ++ match(Set dst (RShiftI src shift)); ++ // effect(KILL cr); ++ ++ format %{ "SRA $dst, $src, $shift #@shr_arith_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shift = $shift$$constant; ++ __ sra(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (RShiftI src shift)); ++ // effect(KILL cr); ++ ++ format %{ "SRA $dst, $src, $shift #@shr_arith_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shift = $shift$$Register; ++ __ srav(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++//----------Convert Int to Boolean--------------------------------------------- ++ ++instruct convI2B(mRegI dst, mRegI src) %{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(100); ++ format %{ "convI2B $dst, $src @ convI2B" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if (dst != src) { ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, src); ++ } else { ++ __ move(AT, src); ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct convI2L_reg( mRegL dst, mRegI src) %{ ++ match(Set dst (ConvI2L src)); ++ ++ ins_cost(100); ++ format %{ "SLL $dst, $src @ convI2L_reg\t" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if(dst != src) __ sll(dst, src, 0); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++ ++instruct convL2I_reg( mRegI dst, mRegL src ) %{ ++ match(Set dst (ConvL2I src)); ++ ++ format %{ "MOV $dst, $src @ convL2I_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ sll(dst, src, 0); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct convL2I2L_reg( mRegL dst, mRegL src ) %{ ++ match(Set dst (ConvI2L (ConvL2I src))); ++ ++ format %{ "sll $dst, $src, 0 @ convL2I2L_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ sll(dst, src, 0); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct convL2D_reg( regD dst, mRegL src ) %{ ++ match(Set dst (ConvL2D src)); ++ format %{ "convL2D $dst, $src @ convL2D_reg" %} ++ ins_encode %{ ++ Register src = as_Register($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ dmtc1(src, dst); ++ __ cvt_d_l(dst, dst); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convD2L_reg_fast( mRegL dst, regD src ) %{ ++ match(Set dst (ConvD2L src)); ++ ins_cost(150); ++ format %{ "convD2L $dst, $src @ convD2L_reg_fast" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ Label Done; ++ ++ __ trunc_l_d(F30, src); ++ // max_long: 0x7fffffffffffffff ++ // __ set64(AT, 0x7fffffffffffffff); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(dst, F30); ++ ++ __ bne(dst, AT, Done); ++ __ delayed()->mtc1(R0, F30); ++ ++ __ cvt_d_w(F30, F30); ++ __ c_ult_d(src, F30); ++ __ bc1f(Done); ++ __ delayed()->daddiu(T9, R0, -1); ++ ++ __ c_un_d(src, src); //NaN? ++ __ subu(dst, T9, AT); ++ __ movt(dst, R0); ++ ++ __ bind(Done); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convD2L_reg_slow( mRegL dst, regD src ) %{ ++ match(Set dst (ConvD2L src)); ++ ins_cost(250); ++ format %{ "convD2L $dst, $src @ convD2L_reg_slow" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ Label L; ++ ++ __ c_un_d(src, src); //NaN? ++ __ bc1t(L); ++ __ delayed(); ++ __ move(dst, R0); ++ ++ __ trunc_l_d(F30, src); ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->dmfc1(dst, F30); ++ ++ __ mov_d(F12, src); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1); ++ __ move(dst, V0); ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convF2I_reg_fast( mRegI dst, regF src ) %{ ++ match(Set dst (ConvF2I src)); ++ ins_cost(150); ++ format %{ "convf2i $dst, $src @ convF2I_reg_fast" %} ++ ins_encode %{ ++ Register dreg = $dst$$Register; ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ trunc_w_s(F30, fval); ++ __ move(AT, 0x7fffffff); ++ __ mfc1(dreg, F30); ++ __ c_un_s(fval, fval); //NaN? ++ __ movt(dreg, R0); ++ ++ __ bne(AT, dreg, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, fval); ++ __ andr(AT, AT, T9); ++ ++ __ movn(dreg, T9, AT); ++ ++ __ bind(L); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++ ++instruct convF2I_reg_slow( mRegI dst, regF src ) %{ ++ match(Set dst (ConvF2I src)); ++ ins_cost(250); ++ format %{ "convf2i $dst, $src @ convF2I_reg_slow" %} ++ ins_encode %{ ++ Register dreg = $dst$$Register; ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ c_un_s(fval, fval); //NaN? ++ __ bc1t(L); ++ __ delayed(); ++ __ move(dreg, R0); ++ ++ __ trunc_w_s(F30, fval); ++ ++ /* Call SharedRuntime:f2i() to do valid convention */ ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->mfc1(dreg, F30); ++ ++ __ mov_s(F12, fval); ++ ++ //This bug was found when running ezDS's control-panel. ++ // J 982 C2 javax.swing.text.BoxView.layoutMajorAxis(II[I[I)V (283 bytes) @ 0x000000555c46aa74 ++ // ++ // An interger array index has been assigned to V0, and then changed from 1 to Integer.MAX_VALUE. ++ // V0 is corrupted during call_VM_leaf(), and should be preserved. ++ // ++ __ push(fval); ++ if(dreg != V0) { ++ __ push(V0); ++ } ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1); ++ if(dreg != V0) { ++ __ move(dreg, V0); ++ __ pop(V0); ++ } ++ __ pop(fval); ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convF2L_reg_fast( mRegL dst, regF src ) %{ ++ match(Set dst (ConvF2L src)); ++ ins_cost(150); ++ format %{ "convf2l $dst, $src @ convF2L_reg_fast" %} ++ ins_encode %{ ++ Register dreg = $dst$$Register; ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ trunc_l_s(F30, fval); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(dreg, F30); ++ __ c_un_s(fval, fval); //NaN? ++ __ movt(dreg, R0); ++ ++ __ bne(AT, dreg, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, fval); ++ __ andr(AT, AT, T9); ++ ++ __ dsll32(T9, T9, 0); ++ __ movn(dreg, T9, AT); ++ ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convF2L_reg_slow( mRegL dst, regF src ) %{ ++ match(Set dst (ConvF2L src)); ++ ins_cost(250); ++ format %{ "convf2l $dst, $src @ convF2L_reg_slow" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ c_un_s(fval, fval); //NaN? ++ __ bc1t(L); ++ __ delayed(); ++ __ move(dst, R0); ++ ++ __ trunc_l_s(F30, fval); ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->dmfc1(dst, F30); ++ ++ __ mov_s(F12, fval); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1); ++ __ move(dst, V0); ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convL2F_reg( regF dst, mRegL src ) %{ ++ match(Set dst (ConvL2F src)); ++ format %{ "convl2f $dst, $src @ convL2F_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ Register src = as_Register($src$$reg); ++ Label L; ++ ++ __ dmtc1(src, dst); ++ __ cvt_s_l(dst, dst); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convI2F_reg( regF dst, mRegI src ) %{ ++ match(Set dst (ConvI2F src)); ++ format %{ "convi2f $dst, $src @ convI2F_reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mtc1(src, dst); ++ __ cvt_s_w(dst, dst); ++ %} ++ ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{ ++ match(Set dst (CmpLTMask p zero)); ++ ins_cost(100); ++ ++ format %{ "sra $dst, $p, 31 @ cmpLTMask_immI_0" %} ++ ins_encode %{ ++ Register src = $p$$Register; ++ Register dst = $dst$$Register; ++ ++ __ sra(dst, src, 31); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{ ++ match(Set dst (CmpLTMask p q)); ++ ins_cost(400); ++ ++ format %{ "cmpLTMask $dst, $p, $q @ cmpLTMask" %} ++ ins_encode %{ ++ Register p = $p$$Register; ++ Register q = $q$$Register; ++ Register dst = $dst$$Register; ++ ++ __ slt(dst, p, q); ++ __ subu(dst, R0, dst); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convP2B(mRegI dst, mRegP src) %{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(100); ++ format %{ "convP2B $dst, $src @ convP2B" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if (dst != src) { ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, src); ++ } else { ++ __ move(AT, src); ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++ ++instruct convI2D_reg_reg(regD dst, mRegI src) %{ ++ match(Set dst (ConvI2D src)); ++ format %{ "conI2D $dst, $src @convI2D_reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ FloatRegister dst = $dst$$FloatRegister; ++ __ mtc1(src, dst); ++ __ cvt_d_w(dst, dst); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct convF2D_reg_reg(regD dst, regF src) %{ ++ match(Set dst (ConvF2D src)); ++ format %{ "convF2D $dst, $src\t# @convF2D_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ ++ __ cvt_d_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct convD2F_reg_reg(regF dst, regD src) %{ ++ match(Set dst (ConvD2F src)); ++ format %{ "convD2F $dst, $src\t# @convD2F_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ ++ __ cvt_s_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++// Convert a double to an int. If the double is a NAN, stuff a zero in instead. ++instruct convD2I_reg_reg_fast( mRegI dst, regD src ) %{ ++ match(Set dst (ConvD2I src)); ++ ++ ins_cost(150); ++ format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_fast" %} ++ ++ ins_encode %{ ++ FloatRegister src = $src$$FloatRegister; ++ Register dst = $dst$$Register; ++ ++ Label Done; ++ ++ __ trunc_w_d(F30, src); ++ // max_int: 2147483647 ++ __ move(AT, 0x7fffffff); ++ __ mfc1(dst, F30); ++ ++ __ bne(dst, AT, Done); ++ __ delayed()->mtc1(R0, F30); ++ ++ __ cvt_d_w(F30, F30); ++ __ c_ult_d(src, F30); ++ __ bc1f(Done); ++ __ delayed()->addiu(T9, R0, -1); ++ ++ __ c_un_d(src, src); //NaN? ++ __ subu32(dst, T9, AT); ++ __ movt(dst, R0); ++ ++ __ bind(Done); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convD2I_reg_reg_slow( mRegI dst, regD src ) %{ ++ match(Set dst (ConvD2I src)); ++ ++ ins_cost(250); ++ format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_slow" %} ++ ++ ins_encode %{ ++ FloatRegister src = $src$$FloatRegister; ++ Register dst = $dst$$Register; ++ Label L; ++ ++ __ trunc_w_d(F30, src); ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->mfc1(dst, F30); ++ ++ __ mov_d(F12, src); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1); ++ __ move(dst, V0); ++ __ bind(L); ++ ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// Convert oop pointer into compressed form ++instruct encodeHeapOop(mRegN dst, mRegP src) %{ ++ predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); ++ match(Set dst (EncodeP src)); ++ format %{ "encode_heap_oop $dst,$src" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ encode_heap_oop(dst, src); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{ ++ predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull); ++ match(Set dst (EncodeP src)); ++ format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %} ++ ins_encode %{ ++ __ encode_heap_oop_not_null($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeHeapOop(mRegP dst, mRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && ++ n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ ++ __ decode_heap_oop(d, s); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || ++ n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ if (s != d) { ++ __ decode_heap_oop_not_null(d, s); ++ } else { ++ __ decode_heap_oop_not_null(d); ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct encodeKlass_not_null(mRegN dst, mRegP src) %{ ++ match(Set dst (EncodePKlass src)); ++ format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %} ++ ins_encode %{ ++ __ encode_klass_not_null($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeKlass_not_null(mRegP dst, mRegN src) %{ ++ match(Set dst (DecodeNKlass src)); ++ format %{ "decode_heap_klass_not_null $dst,$src" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ if (s != d) { ++ __ decode_klass_not_null(d, s); ++ } else { ++ __ decode_klass_not_null(d); ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++//FIXME ++instruct tlsLoadP(mRegP dst) %{ ++ match(Set dst (ThreadLocal)); ++ ++ ins_cost(0); ++ format %{ " get_thread in $dst #@tlsLoadP" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++#ifdef OPT_THREAD ++ __ move(dst, TREG); ++#else ++ __ get_thread(dst); ++#endif ++ %} ++ ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct checkCastPP( mRegP dst ) %{ ++ match(Set dst (CheckCastPP dst)); ++ ++ format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %} ++ ins_encode( /*empty encoding*/ ); ++ ins_pipe( empty ); ++%} ++ ++instruct castPP(mRegP dst) ++%{ ++ match(Set dst (CastPP dst)); ++ ++ size(0); ++ format %{ "# castPP of $dst" %} ++ ins_encode(/* empty encoding */); ++ ins_pipe(empty); ++%} ++ ++instruct castII( mRegI dst ) %{ ++ match(Set dst (CastII dst)); ++ format %{ "#castII of $dst empty encoding" %} ++ ins_encode( /*empty encoding*/ ); ++ ins_cost(0); ++ ins_pipe( empty ); ++%} ++ ++// Return Instruction ++// Remove the return address & jump to it. ++instruct Ret() %{ ++ match(Return); ++ format %{ "RET #@Ret" %} ++ ++ ins_encode %{ ++ __ jr(RA); ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++%} ++ ++/* ++// For Loongson CPUs, jr seems too slow, so this rule shouldn't be imported. ++instruct jumpXtnd(mRegL switch_val) %{ ++ match(Jump switch_val); ++ ++ ins_cost(350); ++ ++ format %{ "load T9 <-- [$constanttablebase, $switch_val, $constantoffset] @ jumpXtnd\n\t" ++ "jr T9\n\t" ++ "nop" %} ++ ins_encode %{ ++ Register table_base = $constanttablebase; ++ int con_offset = $constantoffset; ++ Register switch_reg = $switch_val$$Register; ++ ++ if (UseLEXT1) { ++ if (Assembler::is_simm(con_offset, 8)) { ++ __ gsldx(T9, table_base, switch_reg, con_offset); ++ } else if (Assembler::is_simm16(con_offset)) { ++ __ daddu(T9, table_base, switch_reg); ++ __ ld(T9, T9, con_offset); ++ } else { ++ __ move(T9, con_offset); ++ __ daddu(AT, table_base, switch_reg); ++ __ gsldx(T9, AT, T9, 0); ++ } ++ } else { ++ if (Assembler::is_simm16(con_offset)) { ++ __ daddu(T9, table_base, switch_reg); ++ __ ld(T9, T9, con_offset); ++ } else { ++ __ move(T9, con_offset); ++ __ daddu(AT, table_base, switch_reg); ++ __ daddu(AT, T9, AT); ++ __ ld(T9, AT, 0); ++ } ++ } ++ ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ %} ++ ins_pipe(pipe_jump); ++%} ++*/ ++ ++ ++// Tail Jump; remove the return address; jump to target. ++// TailCall above leaves the return address around. ++// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2). ++// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a ++// "restore" before this instruction (in Epilogue), we need to materialize it ++// in %i0. ++//FIXME ++instruct tailjmpInd(mRegP jump_target,mRegP ex_oop) %{ ++ match( TailJump jump_target ex_oop ); ++ ins_cost(200); ++ format %{ "Jmp $jump_target ; ex_oop = $ex_oop #@tailjmpInd" %} ++ ins_encode %{ ++ Register target = $jump_target$$Register; ++ ++ // V0, V1 are indicated in: ++ // [stubGenerator_mips.cpp] generate_forward_exception() ++ // [runtime_mips.cpp] OptoRuntime::generate_exception_blob() ++ // ++ Register oop = $ex_oop$$Register; ++ Register exception_oop = V0; ++ Register exception_pc = V1; ++ ++ __ move(exception_pc, RA); ++ __ move(exception_oop, oop); ++ ++ __ jr(target); ++ __ delayed()->nop(); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++// ============================================================================ ++// Procedure Call/Return Instructions ++// Call Java Static Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallStaticJavaDirect(method meth) %{ ++ match(CallStaticJava); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL,static #@CallStaticJavaDirect " %} ++ ins_encode( Java_Static_Call( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++// Call Java Dynamic Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallDynamicJavaDirect(method meth) %{ ++ match(CallDynamicJava); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t" ++ "CallDynamic @ CallDynamicJavaDirect" %} ++ ins_encode( Java_Dynamic_Call( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++instruct CallLeafNoFPDirect(method meth) %{ ++ match(CallLeafNoFP); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL_LEAF_NOFP,runtime " %} ++ ins_encode(Java_To_Runtime(meth)); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++// Prefetch instructions. ++ ++instruct prefetchrNTA( memory mem ) %{ ++ match(PrefetchRead mem); ++ ins_cost(125); ++ ++ format %{ "pref $mem\t# Prefetch into non-temporal cache for read @ prefetchrNTA" %} ++ ins_encode %{ ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if( index != 0 ) { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ } else { ++ __ move(AT, as_Register(base)); ++ } ++ if( Assembler::is_simm16(disp) ) { ++ __ daddiu(AT, AT, disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ } ++ __ pref(0, AT, 0); //hint: 0:load ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct prefetchwNTA( memory mem ) %{ ++ match(PrefetchWrite mem); ++ ins_cost(125); ++ format %{ "pref $mem\t# Prefetch to non-temporal cache for write @ prefetchwNTA" %} ++ ins_encode %{ ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if( index != 0 ) { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), AT); ++ } ++ } else { ++ __ move(AT, as_Register(base)); ++ } ++ if( Assembler::is_simm16(disp) ) { ++ __ daddiu(AT, AT, disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ } ++ __ pref(1, AT, 0); //hint: 1:store ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// Prefetch instructions for allocation. ++ ++instruct prefetchAllocNTA( memory mem ) %{ ++ match(PrefetchAllocation mem); ++ ins_cost(125); ++ format %{ "pref $mem\t# Prefetch allocation @ prefetchAllocNTA" %} ++ ins_encode %{ ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ Register dst = R0; ++ ++ if ( index != 0 ) { ++ if ( Assembler::is_simm16(disp) ) { ++ if (UseLEXT1) { ++ if (scale == 0) { ++ __ gslbx(dst, as_Register(base), as_Register(index), disp); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ gslbx(dst, as_Register(base), AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ addu(AT, as_Register(base), AT); ++ } ++ __ lb(dst, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(AT, as_Register(index), scale); ++ __ addu(AT, as_Register(base), AT); ++ } ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gslbx(dst, AT, T9, 0); ++ } else { ++ __ addu(AT, AT, T9); ++ __ lb(dst, AT, 0); ++ } ++ } ++ } else { ++ if ( Assembler::is_simm16(disp) ) { ++ __ lb(dst, as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ if (UseLEXT1) { ++ __ gslbx(dst, as_Register(base), T9, 0); ++ } else { ++ __ addu(AT, as_Register(base), T9); ++ __ lb(dst, AT, 0); ++ } ++ } ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++ ++// Call runtime without safepoint ++instruct CallLeafDirect(method meth) %{ ++ match(CallLeaf); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL_LEAF,runtime #@CallLeafDirect " %} ++ ins_encode(Java_To_Runtime(meth)); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++// Load Char (16bit unsigned) ++instruct loadUS(mRegI dst, memory mem) %{ ++ match(Set dst (LoadUS mem)); ++ ++ ins_cost(125); ++ format %{ "loadUS $dst,$mem @ loadC" %} ++ ins_encode(load_C_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadUS_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadUS mem))); ++ ++ ins_cost(125); ++ format %{ "loadUS $dst,$mem @ loadUS_convI2L" %} ++ ins_encode(load_C_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Store Char (16bit unsigned) ++instruct storeC(memory mem, mRegI src) %{ ++ match(Set mem (StoreC mem src)); ++ ++ ins_cost(125); ++ format %{ "storeC $src, $mem @ storeC" %} ++ ins_encode(store_C_reg_enc(mem, src)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct storeC_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreC mem zero)); ++ ++ ins_cost(125); ++ format %{ "storeC $zero, $mem @ storeC_0" %} ++ ins_encode(store_C0_enc(mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct loadConF_immF_0(regF dst, immF_0 zero) %{ ++ match(Set dst zero); ++ ins_cost(100); ++ ++ format %{ "mov $dst, zero @ loadConF_immF_0\n"%} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mtc1(R0, dst); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++ ++instruct loadConF(regF dst, immF src) %{ ++ match(Set dst src); ++ ins_cost(125); ++ ++ format %{ "lwc1 $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %} ++ ins_encode %{ ++ int con_offset = $constantoffset($src); ++ ++ if (Assembler::is_simm16(con_offset)) { ++ __ lwc1($dst$$FloatRegister, $constanttablebase, con_offset); ++ } else { ++ __ set64(AT, con_offset); ++ if (UseLEXT1) { ++ __ gslwxc1($dst$$FloatRegister, $constanttablebase, AT, 0); ++ } else { ++ __ daddu(AT, $constanttablebase, AT); ++ __ lwc1($dst$$FloatRegister, AT, 0); ++ } ++ } ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++ ++instruct loadConD_immD_0(regD dst, immD_0 zero) %{ ++ match(Set dst zero); ++ ins_cost(100); ++ ++ format %{ "mov $dst, zero @ loadConD_immD_0"%} ++ ins_encode %{ ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ dmtc1(R0, dst); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++instruct loadConD(regD dst, immD src) %{ ++ match(Set dst src); ++ ins_cost(125); ++ ++ format %{ "ldc1 $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %} ++ ins_encode %{ ++ int con_offset = $constantoffset($src); ++ ++ if (Assembler::is_simm16(con_offset)) { ++ __ ldc1($dst$$FloatRegister, $constanttablebase, con_offset); ++ } else { ++ __ set64(AT, con_offset); ++ if (UseLEXT1) { ++ __ gsldxc1($dst$$FloatRegister, $constanttablebase, AT, 0); ++ } else { ++ __ daddu(AT, $constanttablebase, AT); ++ __ ldc1($dst$$FloatRegister, AT, 0); ++ } ++ } ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++// Store register Float value (it is faster than store from FPU register) ++instruct storeF_reg( memory mem, regF src) %{ ++ match(Set mem (StoreF mem src)); ++ ++ ins_cost(50); ++ format %{ "store $mem, $src\t# store float @ storeF_reg" %} ++ ins_encode(store_F_reg_enc(mem, src)); ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct storeF_immF_0( memory mem, immF_0 zero) %{ ++ match(Set mem (StoreF mem zero)); ++ ++ ins_cost(40); ++ format %{ "store $mem, zero\t# store float @ storeF_immF_0" %} ++ ins_encode %{ ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if( index != 0 ) { ++ if (UseLEXT1) { ++ if ( Assembler::is_simm(disp, 8) ) { ++ if ( scale == 0 ) { ++ __ gsswx(R0, as_Register(base), as_Register(index), disp); ++ } else { ++ __ dsll(T9, as_Register(index), scale); ++ __ gsswx(R0, as_Register(base), T9, disp); ++ } ++ } else if ( Assembler::is_simm16(disp) ) { ++ if ( scale == 0 ) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } else { ++ __ dsll(T9, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), T9); ++ } ++ __ sw(R0, AT, disp); ++ } else { ++ if ( scale == 0 ) { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(index), T9); ++ __ gsswx(R0, as_Register(base), AT, 0); ++ } else { ++ __ dsll(T9, as_Register(index), scale); ++ __ move(AT, disp); ++ __ daddu(AT, AT, T9); ++ __ gsswx(R0, as_Register(base), AT, 0); ++ } ++ } ++ } else { //not use loongson isa ++ if(scale != 0) { ++ __ dsll(T9, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), T9); ++ } else { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } ++ if( Assembler::is_simm16(disp) ) { ++ __ sw(R0, AT, disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ sw(R0, AT, 0); ++ } ++ } ++ } else { //index is 0 ++ if (UseLEXT1) { ++ if ( Assembler::is_simm16(disp) ) { ++ __ sw(R0, as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ __ gsswx(R0, as_Register(base), T9, 0); ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ sw(R0, as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(base), T9); ++ __ sw(R0, AT, 0); ++ } ++ } ++ } ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Double ++instruct loadD(regD dst, memory mem) %{ ++ match(Set dst (LoadD mem)); ++ ++ ins_cost(150); ++ format %{ "loadD $dst, $mem #@loadD" %} ++ ins_encode(load_D_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Double - UNaligned ++instruct loadD_unaligned(regD dst, memory mem ) %{ ++ match(Set dst (LoadD_unaligned mem)); ++ ins_cost(250); ++ // FIXME: Need more effective ldl/ldr ++ format %{ "loadD_unaligned $dst, $mem #@loadD_unaligned" %} ++ ins_encode(load_D_enc(dst, mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct storeD_reg( memory mem, regD src) %{ ++ match(Set mem (StoreD mem src)); ++ ++ ins_cost(50); ++ format %{ "store $mem, $src\t# store float @ storeD_reg" %} ++ ins_encode(store_D_reg_enc(mem, src)); ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct storeD_immD_0( memory mem, immD_0 zero) %{ ++ match(Set mem (StoreD mem zero)); ++ ++ ins_cost(40); ++ format %{ "store $mem, zero\t# store float @ storeD_immD_0" %} ++ ins_encode %{ ++ int base = $mem$$base; ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ __ mtc1(R0, F30); ++ __ cvt_d_w(F30, F30); ++ ++ if( index != 0 ) { ++ if (UseLEXT1) { ++ if ( Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ __ gssdxc1(F30, as_Register(base), as_Register(index), disp); ++ } else { ++ __ dsll(T9, as_Register(index), scale); ++ __ gssdxc1(F30, as_Register(base), T9, disp); ++ } ++ } else if ( Assembler::is_simm16(disp) ) { ++ if (scale == 0) { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ __ sdc1(F30, AT, disp); ++ } else { ++ __ dsll(T9, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), T9); ++ __ sdc1(F30, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(index), T9); ++ __ gssdxc1(F30, as_Register(base), AT, 0); ++ } else { ++ __ move(T9, disp); ++ __ dsll(AT, as_Register(index), scale); ++ __ daddu(AT, AT, T9); ++ __ gssdxc1(F30, as_Register(base), AT, 0); ++ } ++ } ++ } else { // not use loongson isa ++ if(scale != 0) { ++ __ dsll(T9, as_Register(index), scale); ++ __ daddu(AT, as_Register(base), T9); ++ } else { ++ __ daddu(AT, as_Register(base), as_Register(index)); ++ } ++ if( Assembler::is_simm16(disp) ) { ++ __ sdc1(F30, AT, disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, AT, T9); ++ __ sdc1(F30, AT, 0); ++ } ++ } ++ } else {// index is 0 ++ if (UseLEXT1) { ++ if ( Assembler::is_simm16(disp) ) { ++ __ sdc1(F30, as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ __ gssdxc1(F30, as_Register(base), T9, 0); ++ } ++ } else { ++ if( Assembler::is_simm16(disp) ) { ++ __ sdc1(F30, as_Register(base), disp); ++ } else { ++ __ move(T9, disp); ++ __ daddu(AT, as_Register(base), T9); ++ __ sdc1(F30, AT, 0); ++ } ++ } ++ } ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct loadSSI(mRegI dst, stackSlotI src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "lw $dst, $src\t# int stk @ loadSSI" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSI) !"); ++ __ lw($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSI(stackSlotI dst, mRegI src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sw $dst, $src\t# int stk @ storeSSI" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSI) !"); ++ __ sw($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSL(mRegL dst, stackSlotL src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld $dst, $src\t# long stk @ loadSSL" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSL) !"); ++ __ ld($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSL(stackSlotL dst, mRegL src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sd $dst, $src\t# long stk @ storeSSL" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSL) !"); ++ __ sd($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSP(mRegP dst, stackSlotP src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld $dst, $src\t# ptr stk @ loadSSP" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSP) !"); ++ __ ld($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSP(stackSlotP dst, mRegP src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sd $dst, $src\t# ptr stk @ storeSSP" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSP) !"); ++ __ sd($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSF(regF dst, stackSlotF src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "lwc1 $dst, $src\t# float stk @ loadSSF" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSF) !"); ++ __ lwc1($dst$$FloatRegister, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSF(stackSlotF dst, regF src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "swc1 $dst, $src\t# float stk @ storeSSF" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSF) !"); ++ __ swc1($src$$FloatRegister, SP, $dst$$disp); ++ %} ++ ins_pipe(fpu_storeF); ++%} ++ ++// Use the same format since predicate() can not be used here. ++instruct loadSSD(regD dst, stackSlotD src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ldc1 $dst, $src\t# double stk @ loadSSD" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSD) !"); ++ __ ldc1($dst$$FloatRegister, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSD(stackSlotD dst, regD src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sdc1 $dst, $src\t# double stk @ storeSSD" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSD) !"); ++ __ sdc1($src$$FloatRegister, SP, $dst$$disp); ++ %} ++ ins_pipe(fpu_storeF); ++%} ++ ++instruct cmpFastLock( FlagsReg cr, mRegP object, s0_RegP box, mRegI tmp, mRegP scr) %{ ++ match( Set cr (FastLock object box) ); ++ effect( TEMP tmp, TEMP scr, USE_KILL box ); ++ ins_cost(300); ++ format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %} ++ ins_encode %{ ++ __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register); ++ __ move($cr$$Register, AT); ++ %} ++ ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++%} ++ ++instruct cmpFastUnlock( FlagsReg cr, mRegP object, s0_RegP box, mRegP tmp ) %{ ++ match( Set cr (FastUnlock object box) ); ++ effect( TEMP tmp, USE_KILL box ); ++ ins_cost(300); ++ format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %} ++ ins_encode %{ ++ __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register); ++ __ move($cr$$Register, AT); ++ %} ++ ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++%} ++ ++// Store CMS card-mark Immediate ++instruct storeImmCM(memory mem, immI8 src) %{ ++ match(Set mem (StoreCM mem src)); ++ ++ ins_cost(150); ++ format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} ++// opcode(0xC6); ++ ins_encode(store_B_immI_enc_sync(mem, src)); ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Die now ++instruct ShouldNotReachHere( ) ++%{ ++ match(Halt); ++ ins_cost(300); ++ ++ // Use the following format syntax ++ format %{ "ILLTRAP ;#@ShouldNotReachHere" %} ++ ins_encode %{ ++ // Here we should emit illtrap ! ++ ++ __ stop("in ShoudNotReachHere"); ++ ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++instruct leaP8Narrow(mRegP dst, indOffset8Narrow mem) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ match(Set dst mem); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, $mem\t# ptr off8narrow @ leaP8Narrow" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = as_Register($mem$$base); ++ int disp = $mem$$disp; ++ ++ __ daddiu(dst, base, disp); ++ %} ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++instruct leaPPosIdxScaleOff8(mRegP dst, basePosIndexScaleOffset8 mem) ++%{ ++ match(Set dst mem); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, $mem\t# @ PosIdxScaleOff8" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = as_Register($mem$$base); ++ Register index = as_Register($mem$$index); ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if (scale == 0) { ++ __ daddu(AT, base, index); ++ __ daddiu(dst, AT, disp); ++ } else { ++ __ dsll(AT, index, scale); ++ __ daddu(AT, base, AT); ++ __ daddiu(dst, AT, disp); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++instruct leaPIdxScale(mRegP dst, indIndexScale mem) ++%{ ++ match(Set dst mem); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, $mem\t# @ leaPIdxScale" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = as_Register($mem$$base); ++ Register index = as_Register($mem$$index); ++ int scale = $mem$$scale; ++ ++ if (scale == 0) { ++ __ daddu(dst, base, index); ++ } else { ++ __ dsll(AT, index, scale); ++ __ daddu(dst, base, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++ ++// ============================================================================ ++// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass ++// array for an instance of the superklass. Set a hidden internal cache on a ++// hit (cache is checked with exposed code in gen_subtype_check()). Return ++// NZ for a miss or zero for a hit. The encoding ALSO sets flags. ++instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{ ++ match(Set result (PartialSubtypeCheck sub super)); ++ effect(KILL tmp); ++ ins_cost(1100); // slightly larger than the next version ++ format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %} ++ ++ ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) ); ++ ins_pipe( pipe_slow ); ++%} ++ ++// Conditional-store of the updated heap-top. ++// Used during allocation of the shared heap. ++ ++instruct storePConditional( memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr ) %{ ++ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); ++ ++ format %{ "CMPXCHG $heap_top_ptr, $newval\t# (ptr) @storePConditional " ++ "If $oldval == $heap_top_ptr then store $newval into $heap_top_ptr" %} ++ ins_encode%{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp); ++ ++ int index = $heap_top_ptr$$index; ++ int scale = $heap_top_ptr$$scale; ++ int disp = $heap_top_ptr$$disp; ++ ++ guarantee(Assembler::is_simm16(disp), ""); ++ ++ if( index != 0 ) { ++ __ stop("in storePConditional: index != 0"); ++ } else { ++ __ cmpxchg(newval, addr, oldval); ++ __ move($cr$$Register, AT); ++ } ++ %} ++ ins_pipe( long_memory_op ); ++%} ++ ++// Conditional-store of an int value. ++// AT flag is set on success, reset otherwise. ++instruct storeIConditional( memory mem, mRegI oldval, mRegI newval, FlagsReg cr ) %{ ++ match(Set cr (StoreIConditional mem (Binary oldval newval))); ++// effect(KILL oldval); ++ format %{ "CMPXCHG $newval, $mem, $oldval \t# @storeIConditional" %} ++ ++ ins_encode %{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Address addr(as_Register($mem$$base), $mem$$disp); ++ Label again, failure; ++ ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ guarantee(Assembler::is_simm16(disp), ""); ++ ++ if( index != 0 ) { ++ __ stop("in storeIConditional: index != 0"); ++ } else { ++ __ bind(again); ++ if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) __ sync(); ++ __ ll(AT, addr); ++ __ bne(AT, oldval, failure); ++ __ delayed()->addu(AT, R0, R0); ++ ++ __ addu(AT, newval, R0); ++ __ sc(AT, addr); ++ __ beq(AT, R0, again); ++ __ delayed()->addiu(AT, R0, 0xFF); ++ __ bind(failure); ++ __ sync(); ++ ++ __ move($cr$$Register, AT); ++ } ++%} ++ ++ ins_pipe( long_memory_op ); ++%} ++ ++// Conditional-store of a long value. ++// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG. ++instruct storeLConditional(memory mem, t2RegL oldval, mRegL newval, FlagsReg cr ) ++%{ ++ match(Set cr (StoreLConditional mem (Binary oldval newval))); ++ effect(KILL oldval); ++ ++ format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %} ++ ins_encode%{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Address addr(as_Register($mem$$base), $mem$$disp); ++ ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ guarantee(Assembler::is_simm16(disp), ""); ++ ++ if( index != 0 ) { ++ __ stop("in storeIConditional: index != 0"); ++ } else { ++ __ cmpxchg(newval, addr, oldval); ++ __ move($cr$$Register, AT); ++ } ++ %} ++ ins_pipe( long_memory_op ); ++%} ++ ++// Implement LoadPLocked. Must be ordered against changes of the memory location ++// by storePConditional. ++instruct loadPLocked(mRegP dst, memory mem) %{ ++ match(Set dst (LoadPLocked mem)); ++ ins_cost(MEMORY_REF_COST); ++ ++ format %{ "ld $dst, $mem #@loadPLocked\n\t" ++ "sync" %} ++ size(12); ++ ins_encode (load_P_enc_ac(dst, mem)); ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct compareAndSwapI( mRegI res, mRegP mem_ptr, mS2RegI oldval, mRegI newval) %{ ++ match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); ++ effect(KILL oldval); ++// match(CompareAndSwapI mem_ptr (Binary oldval newval)); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL\n\t" ++ "MOV $res, 1 @ compareAndSwapI\n\t" ++ "BNE AT, R0 @ compareAndSwapI\n\t" ++ "MOV $res, 0 @ compareAndSwapI\n" ++ "L:" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ Label L; ++ ++ __ cmpxchg32(newval, addr, oldval); ++ __ move(res, AT); ++ %} ++ ins_pipe( long_memory_op ); ++%} ++ ++instruct compareAndSwapL( mRegI res, mRegP mem_ptr, s2RegL oldval, mRegL newval) %{ ++ predicate(VM_Version::supports_cx8()); ++ match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); ++ effect(KILL oldval); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI\n\t" ++ "MOV $res, 1 @ compareAndSwapI\n\t" ++ "BNE AT, R0 @ compareAndSwapI\n\t" ++ "MOV $res, 0 @ compareAndSwapI\n" ++ "L:" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ Label L; ++ ++ __ cmpxchg(newval, addr, oldval); ++ __ move(res, AT); ++ %} ++ ins_pipe( long_memory_op ); ++%} ++ ++//FIXME: ++instruct compareAndSwapP( mRegI res, mRegP mem_ptr, s2_RegP oldval, mRegP newval) %{ ++ match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); ++ effect(KILL oldval); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP\n\t" ++ "MOV $res, AT @ compareAndSwapP\n\t" ++ "L:" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ Label L; ++ ++ __ cmpxchg(newval, addr, oldval); ++ __ move(res, AT); ++ %} ++ ins_pipe( long_memory_op ); ++%} ++ ++instruct compareAndSwapN( mRegI res, mRegP mem_ptr, t2_RegN oldval, mRegN newval) %{ ++ match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); ++ effect(KILL oldval); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN\n\t" ++ "MOV $res, AT @ compareAndSwapN\n\t" ++ "L:" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ Label L; ++ ++ // cmpxchg32 is implemented with ll/sc, which will do sign extension. ++ // Thus, we should extend oldval's sign for correct comparision. ++ // ++ __ sll(oldval, oldval, 0); ++ ++ __ cmpxchg32(newval, addr, oldval); ++ __ move(res, AT); ++ %} ++ ins_pipe( long_memory_op ); ++%} ++ ++//----------Max and Min-------------------------------------------------------- ++// Min Instructions ++//// ++// *** Min and Max using the conditional move are slower than the ++// *** branch version on a Pentium III. ++// // Conditional move for min ++//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ ++// effect( USE_DEF op2, USE op1, USE cr ); ++// format %{ "CMOVlt $op2,$op1\t! min" %} ++// opcode(0x4C,0x0F); ++// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); ++// ins_pipe( pipe_cmov_reg ); ++//%} ++// ++//// Min Register with Register (P6 version) ++//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{ ++// predicate(VM_Version::supports_cmov() ); ++// match(Set op2 (MinI op1 op2)); ++// ins_cost(200); ++// expand %{ ++// eFlagsReg cr; ++// compI_eReg(cr,op1,op2); ++// cmovI_reg_lt(op2,op1,cr); ++// %} ++//%} ++ ++// Min Register with Register (generic version) ++instruct minI_Reg_Reg(mRegI dst, mRegI src) %{ ++ match(Set dst (MinI dst src)); ++ //effect(KILL flags); ++ ins_cost(80); ++ ++ format %{ "MIN $dst, $src @minI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ slt(AT, src, dst); ++ __ movn(dst, src, AT); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// Max Register with Register ++// *** Min and Max using the conditional move are slower than the ++// *** branch version on a Pentium III. ++// // Conditional move for max ++//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ ++// effect( USE_DEF op2, USE op1, USE cr ); ++// format %{ "CMOVgt $op2,$op1\t! max" %} ++// opcode(0x4F,0x0F); ++// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); ++// ins_pipe( pipe_cmov_reg ); ++//%} ++// ++// // Max Register with Register (P6 version) ++//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{ ++// predicate(VM_Version::supports_cmov() ); ++// match(Set op2 (MaxI op1 op2)); ++// ins_cost(200); ++// expand %{ ++// eFlagsReg cr; ++// compI_eReg(cr,op1,op2); ++// cmovI_reg_gt(op2,op1,cr); ++// %} ++//%} ++ ++// Max Register with Register (generic version) ++instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{ ++ match(Set dst (MaxI dst src)); ++ ins_cost(80); ++ ++ format %{ "MAX $dst, $src @maxI_Reg_Reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ slt(AT, dst, src); ++ __ movn(dst, src, AT); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{ ++ match(Set dst (MaxI dst zero)); ++ ins_cost(50); ++ ++ format %{ "MAX $dst, 0 @maxI_Reg_zero" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ slt(AT, dst, R0); ++ __ movn(dst, R0, AT); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL src mask)); ++ ++ format %{ "movl $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ dext(dst, src, 0, 32); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32) ++%{ ++ match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32))); ++ ++ format %{ "combine_i2l $dst, $src2(H), $src1(L) @ combine_i2l" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ if (src1 == dst) { ++ __ dinsu(dst, src2, 32, 32); ++ } else if (src2 == dst) { ++ __ dsll32(dst, dst, 0); ++ __ dins(dst, src1, 0, 32); ++ } else { ++ __ dext(dst, src1, 0, 32); ++ __ dinsu(dst, src2, 32, 32); ++ } ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Zero-extend convert int to long ++instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL (ConvI2L src) mask)); ++ ++ format %{ "movl $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ dext(dst, src, 0, 32); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL (ConvI2L (ConvL2I src)) mask)); ++ ++ format %{ "movl $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ dext(dst, src, 0, 32); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Match loading integer and casting it to unsigned int in long register. ++// LoadI + ConvI2L + AndL 0xffffffff. ++instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{ ++ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); ++ ++ format %{ "lwu $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %} ++ ins_encode (load_N_enc(dst, mem)); ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{ ++ match(Set dst (AndL mask (ConvI2L (LoadI mem)))); ++ ++ format %{ "lwu $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %} ++ ins_encode (load_N_enc(dst, mem)); ++ ins_pipe(ialu_loadI); ++%} ++ ++ ++// ============================================================================ ++// Safepoint Instruction ++instruct safePoint_poll_reg(mRegP poll) %{ ++ match(SafePoint poll); ++ predicate(false); ++ effect(USE poll); ++ ++ ins_cost(125); ++ format %{ "Safepoint @ [$poll] : poll for GC @ safePoint_poll_reg" %} ++ ++ ins_encode %{ ++ Register poll_reg = $poll$$Register; ++ ++ __ block_comment("Safepoint:"); ++ __ relocate(relocInfo::poll_type); ++ __ lw(AT, poll_reg, 0); ++ %} ++ ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct safePoint_poll() %{ ++ match(SafePoint); ++ ++ ins_cost(105); ++ format %{ "poll for GC @ safePoint_poll" %} ++ ++ ins_encode %{ ++ __ block_comment("Safepoint:"); ++ __ set64(T9, (long)os::get_polling_page()); ++ __ relocate(relocInfo::poll_type); ++ __ lw(AT, T9, 0); ++ %} ++ ++ ins_pipe( ialu_storeI ); ++%} ++ ++//----------Arithmetic Conversion Instructions--------------------------------- ++ ++instruct roundFloat_nop(regF dst) ++%{ ++ match(Set dst (RoundFloat dst)); ++ ++ ins_cost(0); ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct roundDouble_nop(regD dst) ++%{ ++ match(Set dst (RoundDouble dst)); ++ ++ ins_cost(0); ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++//---------- Zeros Count Instructions ------------------------------------------ ++// CountLeadingZerosINode CountTrailingZerosINode ++instruct countLeadingZerosI(mRegI dst, mRegI src) %{ ++ predicate(UseCountLeadingZerosInstructionMIPS64); ++ match(Set dst (CountLeadingZerosI src)); ++ ++ format %{ "clz $dst, $src\t# count leading zeros (int)" %} ++ ins_encode %{ ++ __ clz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countLeadingZerosL(mRegI dst, mRegL src) %{ ++ predicate(UseCountLeadingZerosInstructionMIPS64); ++ match(Set dst (CountLeadingZerosL src)); ++ ++ format %{ "dclz $dst, $src\t# count leading zeros (long)" %} ++ ins_encode %{ ++ __ dclz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countTrailingZerosI(mRegI dst, mRegI src) %{ ++ predicate(UseCountTrailingZerosInstructionMIPS64); ++ match(Set dst (CountTrailingZerosI src)); ++ ++ format %{ "ctz $dst, $src\t# count trailing zeros (int)" %} ++ ins_encode %{ ++ // ctz and dctz is gs instructions. ++ __ ctz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countTrailingZerosL(mRegI dst, mRegL src) %{ ++ predicate(UseCountTrailingZerosInstructionMIPS64); ++ match(Set dst (CountTrailingZerosL src)); ++ ++ format %{ "dcto $dst, $src\t# count trailing zeros (long)" %} ++ ins_encode %{ ++ __ dctz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// ====================VECTOR INSTRUCTIONS===================================== ++ ++// Load vectors (8 bytes long) ++instruct loadV8(vecD dst, memory mem) %{ ++ predicate(n->as_LoadVector()->memory_size() == 8); ++ match(Set dst (LoadVector mem)); ++ ins_cost(125); ++ format %{ "load $dst, $mem\t! load vector (8 bytes)" %} ++ ins_encode(load_D_enc(dst, mem)); ++ ins_pipe( fpu_loadF ); ++%} ++ ++// Store vectors (8 bytes long) ++instruct storeV8(memory mem, vecD src) %{ ++ predicate(n->as_StoreVector()->memory_size() == 8); ++ match(Set mem (StoreVector mem src)); ++ ins_cost(145); ++ format %{ "store $mem, $src\t! store vector (8 bytes)" %} ++ ins_encode(store_D_reg_enc(mem, src)); ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct Repl8B_DSP(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 8 && UseLEXT3); ++ match(Set dst (ReplicateB src)); ++ ins_cost(100); ++ format %{ "replv_ob AT, $src\n\t" ++ "dmtc1 AT, $dst\t! replicate8B" %} ++ ins_encode %{ ++ __ replv_ob(AT, $src$$Register); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB src)); ++ ins_cost(140); ++ format %{ "move AT, $src\n\t" ++ "dins AT, AT, 8, 8\n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate8B" %} ++ ins_encode %{ ++ __ move(AT, $src$$Register); ++ __ dins(AT, AT, 8, 8); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_imm_DSP(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 8 && UseLEXT3); ++ match(Set dst (ReplicateB con)); ++ ins_cost(110); ++ format %{ "repl_ob AT, [$con]\n\t" ++ "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %} ++ ins_encode %{ ++ int val = $con$$constant; ++ __ repl_ob(AT, val); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_imm(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB con)); ++ ins_cost(150); ++ format %{ "move AT, [$con]\n\t" ++ "dins AT, AT, 8, 8\n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %} ++ ins_encode %{ ++ __ move(AT, $con$$constant); ++ __ dins(AT, AT, 8, 8); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_zero(vecD dst, immI_0 zero) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB zero)); ++ ins_cost(90); ++ format %{ "dmtc1 R0, $dst\t! replicate8B zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_M1(vecD dst, immI_M1 M1) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB M1)); ++ ins_cost(80); ++ format %{ "dmtc1 -1, $dst\t! replicate8B -1" %} ++ ins_encode %{ ++ __ nor(AT, R0, R0); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_DSP(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 4 && UseLEXT3); ++ match(Set dst (ReplicateS src)); ++ ins_cost(100); ++ format %{ "replv_qh AT, $src\n\t" ++ "dmtc1 AT, $dst\t! replicate4S" %} ++ ins_encode %{ ++ __ replv_qh(AT, $src$$Register); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS src)); ++ ins_cost(120); ++ format %{ "move AT, $src \n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate4S" %} ++ ins_encode %{ ++ __ move(AT, $src$$Register); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_imm_DSP(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 4 && UseLEXT3); ++ match(Set dst (ReplicateS con)); ++ ins_cost(100); ++ format %{ "repl_qh AT, [$con]\n\t" ++ "dmtc1 AT, $dst\t! replicate4S($con)" %} ++ ins_encode %{ ++ int val = $con$$constant; ++ if ( Assembler::is_simm(val, 10)) { ++ //repl_qh supports 10 bits immediate ++ __ repl_qh(AT, val); ++ } else { ++ __ li32(AT, val); ++ __ replv_qh(AT, AT); ++ } ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_imm(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS con)); ++ ins_cost(110); ++ format %{ "move AT, [$con]\n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate4S($con)" %} ++ ins_encode %{ ++ __ move(AT, $con$$constant); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_zero(vecD dst, immI_0 zero) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS zero)); ++ format %{ "dmtc1 R0, $dst\t! replicate4S zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_M1(vecD dst, immI_M1 M1) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS M1)); ++ format %{ "dmtc1 -1, $dst\t! replicate4S -1" %} ++ ins_encode %{ ++ __ nor(AT, R0, R0); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar to be vector ++instruct Repl2I(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI src)); ++ format %{ "dins AT, $src, 0, 32\n\t" ++ "dinsu AT, $src, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate2I" %} ++ ins_encode %{ ++ __ dins(AT, $src$$Register, 0, 32); ++ __ dinsu(AT, $src$$Register, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar immediate to be vector by loading from const table. ++instruct Repl2I_imm(vecD dst, immI con, mA7RegI tmp) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI con)); ++ effect(KILL tmp); ++ format %{ "li32 AT, [$con], 32\n\t" ++ "dinsu AT, AT\n\t" ++ "dmtc1 AT, $dst\t! replicate2I($con)" %} ++ ins_encode %{ ++ int val = $con$$constant; ++ __ li32(AT, val); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar zero to be vector ++instruct Repl2I_zero(vecD dst, immI_0 zero) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI zero)); ++ format %{ "dmtc1 R0, $dst\t! replicate2I zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar -1 to be vector ++instruct Repl2I_M1(vecD dst, immI_M1 M1) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI M1)); ++ format %{ "dmtc1 -1, $dst\t! replicate2I -1, use AT" %} ++ ins_encode %{ ++ __ nor(AT, R0, R0); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate float (4 byte) scalar to be vector ++instruct Repl2F(vecD dst, regF src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateF src)); ++ format %{ "cvt.ps $dst, $src, $src\t! replicate2F" %} ++ ins_encode %{ ++ __ cvt_ps_s($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// Replicate float (4 byte) scalar zero to be vector ++instruct Repl2F_zero(vecD dst, immF_0 zero) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateF zero)); ++ format %{ "dmtc1 R0, $dst\t! replicate2F zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++ ++// ====================VECTOR ARITHMETIC======================================= ++ ++// --------------------------------- ADD -------------------------------------- ++ ++// Floats vector add ++// kernel does not have emulation of PS instructions yet, so PS instructions is disabled. ++instruct vadd2F(vecD dst, vecD src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVF dst src)); ++ format %{ "add.ps $dst,$src\t! add packed2F" %} ++ ins_encode %{ ++ __ add_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct vadd2F3(vecD dst, vecD src1, vecD src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVF src1 src2)); ++ format %{ "add.ps $dst,$src1,$src2\t! add packed2F" %} ++ ins_encode %{ ++ __ add_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// --------------------------------- SUB -------------------------------------- ++ ++// Floats vector sub ++instruct vsub2F(vecD dst, vecD src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (SubVF dst src)); ++ format %{ "sub.ps $dst,$src\t! sub packed2F" %} ++ ins_encode %{ ++ __ sub_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// --------------------------------- MUL -------------------------------------- ++ ++// Floats vector mul ++instruct vmul2F(vecD dst, vecD src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (MulVF dst src)); ++ format %{ "mul.ps $dst, $src\t! mul packed2F" %} ++ ins_encode %{ ++ __ mul_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct vmul2F3(vecD dst, vecD src1, vecD src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (MulVF src1 src2)); ++ format %{ "mul.ps $dst, $src1, $src2\t! mul packed2F" %} ++ ins_encode %{ ++ __ mul_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// --------------------------------- DIV -------------------------------------- ++// MIPS do not have div.ps ++ ++// --------------------------------- MADD -------------------------------------- ++// Floats vector madd ++//instruct vmadd2F(vecD dst, vecD src1, vecD src2, vecD src3) %{ ++// predicate(n->as_Vector()->length() == 2); ++// match(Set dst (AddVF (MulVF src1 src2) src3)); ++// ins_cost(50); ++// format %{ "madd.ps $dst, $src3, $src1, $src2\t! madd packed2F" %} ++// ins_encode %{ ++// __ madd_ps($dst$$FloatRegister, $src3$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++// %} ++// ins_pipe( fpu_regF_regF ); ++//%} ++ ++ ++//----------PEEPHOLE RULES----------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++// ++// peepmatch ( root_instr_name [preceeding_instruction]* ); ++// ++// peepconstraint %{ ++// (instruction_number.operand_name relational_op instruction_number.operand_name ++// [, ...] ); ++// // instruction numbers are zero-based using left to right order in peepmatch ++// ++// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); ++// // provide an instruction_number.operand_name for each operand that appears ++// // in the replacement instruction's match rule ++// ++// ---------VM FLAGS--------------------------------------------------------- ++// ++// All peephole optimizations can be turned off using -XX:-OptoPeephole ++// ++// Each peephole rule is given an identifying number starting with zero and ++// increasing by one in the order seen by the parser. An individual peephole ++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# ++// on the command-line. ++// ++// ---------CURRENT LIMITATIONS---------------------------------------------- ++// ++// Only match adjacent instructions in same basic block ++// Only equality constraints ++// Only constraints between operands, not (0.dest_reg == EAX_enc) ++// Only one replacement instruction ++// ++// ---------EXAMPLE---------------------------------------------------------- ++// ++// // pertinent parts of existing instructions in architecture description ++// instruct movI(eRegI dst, eRegI src) %{ ++// match(Set dst (CopyI src)); ++// %} ++// ++// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{ ++// match(Set dst (AddI dst src)); ++// effect(KILL cr); ++// %} ++// ++// // Change (inc mov) to lea ++// peephole %{ ++// // increment preceeded by register-register move ++// peepmatch ( incI_eReg movI ); ++// // require that the destination register of the increment ++// // match the destination register of the move ++// peepconstraint ( 0.dst == 1.dst ); ++// // construct a replacement instruction that sets ++// // the destination to ( move's source register + one ) ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// Implementation no longer uses movX instructions since ++// machine-independent system no longer uses CopyX nodes. ++// ++// peephole %{ ++// peepmatch ( incI_eReg movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( decI_eReg movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( addI_eReg_imm movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( addP_eReg_imm movP ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++ ++// // Change load of spilled value to only a spill ++// instruct storeI(memory mem, eRegI src) %{ ++// match(Set mem (StoreI mem src)); ++// %} ++// ++// instruct loadI(eRegI dst, memory mem) %{ ++// match(Set dst (LoadI mem)); ++// %} ++// ++//peephole %{ ++// peepmatch ( loadI storeI ); ++// peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); ++// peepreplace ( storeI( 1.mem 1.mem 1.src ) ); ++//%} ++ ++//----------SMARTSPILL RULES--------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++ +diff --git a/hotspot/src/cpu/mips/vm/nativeInst_mips.cpp b/hotspot/src/cpu/mips/vm/nativeInst_mips.cpp +new file mode 100644 +index 0000000000..e1f7cd944d +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/nativeInst_mips.cpp +@@ -0,0 +1,1829 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/codeBlob.hpp" ++#include "code/codeCache.hpp" ++#include "compiler/disassembler.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/ostream.hpp" ++ ++#include ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++void NativeInstruction::wrote(int offset) { ++ ICache::invalidate_word(addr_at(offset)); ++} ++ ++void NativeInstruction::set_long_at(int offset, long i) { ++ address addr = addr_at(offset); ++ *(long*)addr = i; ++ ICache::invalidate_range(addr, 8); ++} ++ ++static int illegal_instruction_bits = 0; ++ ++int NativeInstruction::illegal_instruction() { ++ if (illegal_instruction_bits == 0) { ++ ResourceMark rm; ++ char buf[40]; ++ CodeBuffer cbuf((address)&buf[0], 20); ++ MacroAssembler* a = new MacroAssembler(&cbuf); ++ address ia = a->pc(); ++ a->brk(11); ++ int bits = *(int*)ia; ++ illegal_instruction_bits = bits; ++ } ++ return illegal_instruction_bits; ++} ++ ++bool NativeInstruction::is_int_branch() { ++ switch(Assembler::opcode(insn_word())) { ++ case Assembler::beq_op: ++ case Assembler::beql_op: ++ case Assembler::bgtz_op: ++ case Assembler::bgtzl_op: ++ case Assembler::blez_op: ++ case Assembler::blezl_op: ++ case Assembler::bne_op: ++ case Assembler::bnel_op: ++ return true; ++ case Assembler::regimm_op: ++ switch(Assembler::rt(insn_word())) { ++ case Assembler::bgez_op: ++ case Assembler::bgezal_op: ++ case Assembler::bgezall_op: ++ case Assembler::bgezl_op: ++ case Assembler::bltz_op: ++ case Assembler::bltzal_op: ++ case Assembler::bltzall_op: ++ case Assembler::bltzl_op: ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++bool NativeInstruction::is_float_branch() { ++ if (!is_op(Assembler::cop1_op) || ++ !is_rs((Register)Assembler::bc1f_op)) return false; ++ ++ switch(Assembler::rt(insn_word())) { ++ case Assembler::bcf_op: ++ case Assembler::bcfl_op: ++ case Assembler::bct_op: ++ case Assembler::bctl_op: ++ return true; ++ } ++ ++ return false; ++} ++ ++ ++void NativeCall::verify() { ++ // make sure code pattern is actually a call instruction ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // jal target ++ // nop ++ if ( is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_op(int_at(16), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return; ++ } ++ ++ // jal targe ++ // nop ++ if ( is_op(int_at(0), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ return; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) && ++ is_special_op(int_at(24), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ // FIXME: why add jr_op here? ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ if (nativeInstruction_at(addr_at(0))->is_trampoline_call()) ++ return; ++ ++ fatal("not a call"); ++} ++ ++address NativeCall::target_addr_for_insn() const { ++ // jal target ++ // nop ++ if ( is_op(int_at(0), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(4))->is_nop()) { ++ int instr_index = int_at(0) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // jal target ++ // nop ++ if ( nativeInstruction_at(addr_at(0))->is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_op(int_at(16), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(20))->is_nop()) { ++ int instr_index = int_at(16) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff), ++ (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff)); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ld dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ld_op) ) { ++ ++ address dest = (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ return dest + Assembler::simm16((intptr_t)int_at(12) & 0xffff); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(0), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop <-- optional ++ //nop <-- optional ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop <-- optional ++ //nop <-- optional ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop <-- optional ++ //nop <-- optional ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ tty->print_cr("not a call: addr = " INTPTR_FORMAT , p2i(addr_at(0))); ++ tty->print_cr("======= Start decoding at addr = " INTPTR_FORMAT " =======", p2i(addr_at(0))); ++ Disassembler::decode(addr_at(0) - 2 * 4, addr_at(0) + 8 * 4, tty); ++ tty->print_cr("======= End of decoding ======="); ++ fatal("not a call"); ++ return NULL; ++} ++ ++// Extract call destination from a NativeCall. The call might use a trampoline stub. ++address NativeCall::destination() const { ++ address addr = (address)this; ++ address destination = target_addr_for_insn(); ++ // Do we use a trampoline stub for this call? ++ // Trampoline stubs are located behind the main code. ++ if (destination > addr) { ++ // Filter out recursive method invocation (call to verified/unverified entry point). ++ CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. ++ assert(cb && cb->is_nmethod(), "sanity"); ++ nmethod *nm = (nmethod *)cb; ++ NativeInstruction* ni = nativeInstruction_at(addr); ++ if (nm->stub_contains(destination) && ni->is_trampoline_call()) { ++ // Yes we do, so get the destination from the trampoline stub. ++ const address trampoline_stub_addr = destination; ++ destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); ++ } ++ } ++ return destination; ++} ++ ++// Similar to replace_mt_safe, but just changes the destination. The ++// important thing is that free-running threads are able to execute this ++// call instruction at all times. ++// ++// Used in the runtime linkage of calls; see class CompiledIC. ++// ++// Add parameter assert_lock to switch off assertion ++// during code generation, where no patching lock is needed. ++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { ++ assert(!assert_lock || ++ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), ++ "concurrent code patching"); ++ ++ ResourceMark rm; ++ address addr_call = addr_at(0); ++ assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); ++ // Patch the constant in the call's trampoline stub. ++ if (MacroAssembler::reachable_from_cache()) { ++ set_destination(dest); ++ } else { ++ address trampoline_stub_addr = nativeCall_at(addr_call)->target_addr_for_insn(); ++ assert (get_trampoline() != NULL && trampoline_stub_addr == get_trampoline(), "we need a trampoline"); ++ nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); ++ } ++} ++ ++ ++address NativeCall::get_trampoline() { ++ address call_addr = addr_at(0); ++ ++ CodeBlob *code = CodeCache::find_blob(call_addr); ++ assert(code != NULL, "Could not find the containing code blob"); ++ ++ // If the codeBlob is not a nmethod, this is because we get here from the ++ // CodeBlob constructor, which is called within the nmethod constructor. ++ return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); ++} ++ ++// manual implementation of GSSQ ++// ++// 00000001200009c0 : ++// 1200009c0: 0085202d daddu a0, a0, a1 ++// 1200009c4: e8860027 gssq a2, a3, 0(a0) ++// 1200009c8: 03e00008 jr ra ++// 1200009cc: 00000000 nop ++// ++typedef void (* atomic_store128_ptr)(long *addr, int offset, long low64, long hi64); ++ ++static int *buf; ++ ++static atomic_store128_ptr get_atomic_store128_func() { ++ assert(UseLEXT1, "UseLEXT1 must be true"); ++ static atomic_store128_ptr p = NULL; ++ if (p != NULL) ++ return p; ++ ++ buf = (int *)mmap(NULL, 1024, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, ++ -1, 0); ++ buf[0] = 0x0085202d; ++ buf[1] = (0x3a << 26) | (4 << 21) | (6 << 16) | 0x27; /* gssq $a2, $a3, 0($a0) */ ++ buf[2] = 0x03e00008; ++ buf[3] = 0; ++ ++ asm("sync"); ++ p = (atomic_store128_ptr)buf; ++ return p; ++} ++ ++void NativeCall::patch_on_jal_only(address dst) { ++ long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint jal_inst = (Assembler::jal_op << 26) | dest; ++ set_int_at(0, jal_inst); ++ ICache::invalidate_range(addr_at(0), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeCall::patch_on_trampoline(address dest) { ++ assert(nativeInstruction_at(addr_at(0))->is_trampoline_call(), "unexpected code at call site"); ++ jlong dst = (jlong) dest; ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ld dst, dst, imm16 ++ if ((dst> 0) && Assembler::is_simm16(dst >> 32)) { ++ dst += (dst & 0x8000) << 1; ++ set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low(dst >> 32) & 0xffff)); ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(dst >> 16) & 0xffff)); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low(dst) & 0xffff)); ++ ++ ICache::invalidate_range(addr_at(0), 24); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeCall::patch_on_jal_gs(address dst) { ++ long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint jal_inst = (Assembler::jal_op << 26) | dest; ++ set_int_at(16, jal_inst); ++ ICache::invalidate_range(addr_at(16), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeCall::patch_on_jal(address dst) { ++ patch_on_jal_gs(dst); ++} ++ ++void NativeCall::patch_on_jalr_gs(address dst) { ++ patch_set48_gs(dst); ++} ++ ++void NativeCall::patch_on_jalr(address dst) { ++ patch_set48(dst); ++} ++ ++void NativeCall::patch_set48_gs(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ int count = 0; ++ int insts[4] = {0, 0, 0, 0}; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ insts[count] = 0; ++ count++; ++ } ++ ++ guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned"); ++ atomic_store128_ptr func = get_atomic_store128_func(); ++ (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]); ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeCall::patch_set32_gs(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ int insts[2] = {0, 0}; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ //daddiu(d, R0, value); ++ //set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ //set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ //set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 2) { ++ //nop(); ++ //set_int_at(count << 2, 0); ++ insts[count] = 0; ++ count++; ++ } ++ ++ long inst = insts[1]; ++ inst = inst << 32; ++ inst = inst + insts[0]; ++ ++ set_long_at(0, inst); ++} ++ ++void NativeCall::patch_set48(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ //daddiu(d, R0, value); ++ set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ //ori(d, R0, julong(value) >> 16); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); ++ count += 1; ++ //dsll(d, d, 16); ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ //lui(d, value >> 32); ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); ++ count += 1; ++ //ori(d, d, split_low(value >> 16)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ //dsll(d, d, 16); ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ //nop(); ++ set_int_at(count << 2, 0); ++ count++; ++ } ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeCall::patch_set32(address dest) { ++ patch_set32_gs(dest); ++} ++ ++void NativeCall::set_destination(address dest) { ++ OrderAccess::fence(); ++ ++ // li64 ++ if (is_special_op(int_at(16), Assembler::dsll_op)) { ++ int first_word = int_at(0); ++ set_int_at(0, 0x1000ffff); /* .1: b .1 */ ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 32) & 0xffff)); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 16) & 0xffff)); ++ set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)dest) & 0xffff)); ++ set_int_at(0, (first_word & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 48) & 0xffff)); ++ ICache::invalidate_range(addr_at(0), 24); ++ } else if (is_op(int_at(16), Assembler::jal_op)) { ++ if (UseLEXT1) { ++ patch_on_jal_gs(dest); ++ } else { ++ patch_on_jal(dest); ++ } ++ } else if (is_op(int_at(0), Assembler::jal_op)) { ++ patch_on_jal_only(dest); ++ } else if (is_special_op(int_at(16), Assembler::jalr_op)) { ++ if (UseLEXT1) { ++ patch_on_jalr_gs(dest); ++ } else { ++ patch_on_jalr(dest); ++ } ++ } else if (is_special_op(int_at(8), Assembler::jalr_op)) { ++ guarantee(!os::is_MP() || (((long)addr_at(0) % 8) == 0), "destination must be aligned by 8"); ++ if (UseLEXT1) { ++ patch_set32_gs(dest); ++ } else { ++ patch_set32(dest); ++ } ++ ICache::invalidate_range(addr_at(0), 8); ++ } else { ++ fatal("not a call"); ++ } ++} ++ ++void NativeCall::print() { ++ tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT, ++ p2i(instruction_address()), p2i(destination())); ++} ++ ++// Inserts a native call instruction at a given pc ++void NativeCall::insert(address code_pos, address entry) { ++ NativeCall *call = nativeCall_at(code_pos); ++ CodeBuffer cb(call->addr_at(0), instruction_size); ++ MacroAssembler masm(&cb); ++#define __ masm. ++ __ li48(T9, (long)entry); ++ __ jalr (); ++ __ delayed()->nop(); ++#undef __ ++ ++ ICache::invalidate_range(call->addr_at(0), instruction_size); ++} ++ ++// MT-safe patching of a call instruction. ++// First patches first word of instruction to two jmp's that jmps to them ++// selfs (spinlock). Then patches the last byte, and then atomicly replaces ++// the jmp's with the first 4 byte of the new instruction. ++void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) { ++ Unimplemented(); ++} ++ ++//------------------------------------------------------------------- ++ ++void NativeMovConstReg::verify() { ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ return; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ return; ++ } ++ ++ fatal("not a mov reg, imm64/imm48"); ++} ++ ++void NativeMovConstReg::print() { ++ tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, ++ p2i(instruction_address()), data()); ++} ++ ++intptr_t NativeMovConstReg::data() const { ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ ++ return Assembler::merge( (intptr_t)(int_at(20) & 0xffff), ++ (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff)); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ ++ return Assembler::merge( (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return Assembler::merge( (intptr_t)(int_at(8) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return Assembler::merge( (intptr_t)(0), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ fatal("not a mov reg, imm64/imm48"); ++ return 0; // unreachable ++} ++ ++void NativeMovConstReg::patch_set48(intptr_t x) { ++ jlong value = (jlong) x; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ //daddiu(d, R0, value); ++ set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ set_int_at(count << 2, 0); ++ count++; ++ } ++} ++ ++void NativeMovConstReg::set_data(intptr_t x, intptr_t o) { ++ // li64 or li48 ++ if ((!nativeInstruction_at(addr_at(12))->is_nop()) && is_special_op(int_at(16), Assembler::dsll_op) && is_op(long_at(20), Assembler::ori_op)) { ++ set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 48) & 0xffff)); ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 32) & 0xffff)); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 16) & 0xffff)); ++ set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)x) & 0xffff)); ++ } else { ++ patch_set48(x); ++ } ++ ++ ICache::invalidate_range(addr_at(0), 24); ++ ++ // Find and replace the oop/metadata corresponding to this ++ // instruction in oops section. ++ CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address()); ++ nmethod* nm = blob->as_nmethod_or_null(); ++ if (nm != NULL) { ++ o = o ? o : x; ++ RelocIterator iter(nm, instruction_address(), next_instruction_address()); ++ while (iter.next()) { ++ if (iter.type() == relocInfo::oop_type) { ++ oop* oop_addr = iter.oop_reloc()->oop_addr(); ++ *oop_addr = cast_to_oop(o); ++ break; ++ } else if (iter.type() == relocInfo::metadata_type) { ++ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); ++ *metadata_addr = (Metadata*)o; ++ break; ++ } ++ } ++ } ++} ++ ++//------------------------------------------------------------------- ++ ++int NativeMovRegMem::offset() const{ ++ if (is_immediate()) ++ return (short)(int_at(instruction_offset)&0xffff); ++ else ++ return Assembler::merge(int_at(hiword_offset)&0xffff, long_at(instruction_offset)&0xffff); ++} ++ ++void NativeMovRegMem::set_offset(int x) { ++ if (is_immediate()) { ++ assert(Assembler::is_simm16(x), "just check"); ++ set_int_at(0, (int_at(0)&0xffff0000) | (x&0xffff) ); ++ if (is_64ldst()) { ++ assert(Assembler::is_simm16(x+4), "just check"); ++ set_int_at(4, (int_at(4)&0xffff0000) | ((x+4)&0xffff) ); ++ } ++ } else { ++ set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_high(x) & 0xffff)); ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(x) & 0xffff)); ++ } ++ ICache::invalidate_range(addr_at(0), 8); ++} ++ ++void NativeMovRegMem::verify() { ++ int offset = 0; ++ ++ if ( Assembler::opcode(int_at(0)) == Assembler::lui_op ) { ++ ++ if ( Assembler::opcode(int_at(4)) != Assembler::ori_op ) { ++ fatal ("not a mov [reg+offs], reg instruction"); ++ } ++ ++ offset += 12; ++ } ++ ++ switch(Assembler::opcode(int_at(offset))) { ++ case Assembler::lb_op: ++ case Assembler::lbu_op: ++ case Assembler::lh_op: ++ case Assembler::lhu_op: ++ case Assembler::lw_op: ++ case Assembler::lwu_op: ++ case Assembler::ld_op: ++ case Assembler::lwc1_op: ++ case Assembler::ldc1_op: ++ case Assembler::sb_op: ++ case Assembler::sh_op: ++ case Assembler::sw_op: ++ case Assembler::sd_op: ++ case Assembler::swc1_op: ++ case Assembler::sdc1_op: ++ break; ++ default: ++ fatal ("not a mov [reg+offs], reg instruction"); ++ } ++} ++ ++ ++void NativeMovRegMem::print() { ++ tty->print_cr(PTR_FORMAT ": mov reg, [reg + %x]", p2i(instruction_address()), offset()); ++} ++ ++bool NativeInstruction::is_sigill_zombie_not_entrant() { ++ return uint_at(0) == NativeIllegalInstruction::instruction_code; ++} ++ ++void NativeIllegalInstruction::insert(address code_pos) { ++ *(juint*)code_pos = instruction_code; ++ ICache::invalidate_range(code_pos, instruction_size); ++} ++ ++void NativeJump::verify() { ++ assert(((NativeInstruction *)this)->is_jump() || ++ ((NativeInstruction *)this)->is_cond_jump(), "not a general jump instruction"); ++} ++ ++void NativeJump::patch_set48_gs(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ int insts[4] = {0, 0, 0, 0}; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ insts[count] = 0; ++ count++; ++ } ++ ++ guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned"); ++ atomic_store128_ptr func = get_atomic_store128_func(); ++ (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]); ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeJump::patch_set48(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ set_int_at(count << 2, 0); ++ count++; ++ } ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeJump::patch_on_j_only(address dst) { ++ long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint j_inst = (Assembler::j_op << 26) | dest; ++ set_int_at(0, j_inst); ++ ICache::invalidate_range(addr_at(0), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++ ++void NativeJump::patch_on_j_gs(address dst) { ++ long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint j_inst = (Assembler::j_op << 26) | dest; ++ set_int_at(16, j_inst); ++ ICache::invalidate_range(addr_at(16), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeJump::patch_on_j(address dst) { ++ patch_on_j_gs(dst); ++} ++ ++void NativeJump::patch_on_jr_gs(address dst) { ++ patch_set48_gs(dst); ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeJump::patch_on_jr(address dst) { ++ patch_set48(dst); ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++ ++void NativeJump::set_jump_destination(address dest) { ++ OrderAccess::fence(); ++ ++ if (is_short()) { ++ assert(Assembler::is_simm16(dest-addr_at(4)), "change this code"); ++ set_int_at(0, (int_at(0) & 0xffff0000) | (dest - addr_at(4)) & 0xffff ); ++ ICache::invalidate_range(addr_at(0), 4); ++ } else if (is_b_far()) { ++ int offset = dest - addr_at(12); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (offset >> 16)); ++ set_int_at(16, (int_at(16) & 0xffff0000) | (offset & 0xffff)); ++ } else { ++ if (is_op(int_at(16), Assembler::j_op)) { ++ if (UseLEXT1) { ++ patch_on_j_gs(dest); ++ } else { ++ patch_on_j(dest); ++ } ++ } else if (is_op(int_at(0), Assembler::j_op)) { ++ patch_on_j_only(dest); ++ } else if (is_special_op(int_at(16), Assembler::jr_op)) { ++ if (UseLEXT1) { ++ //guarantee(!os::is_MP() || (((long)addr_at(0) % 16) == 0), "destination must be aligned for GSSD"); ++ //patch_on_jr_gs(dest); ++ patch_on_jr(dest); ++ } else { ++ patch_on_jr(dest); ++ } ++ } else { ++ fatal("not a jump"); ++ } ++ } ++} ++ ++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { ++ CodeBuffer cb(code_pos, instruction_size); ++ MacroAssembler masm(&cb); ++#define __ masm. ++ if (Assembler::is_simm16((entry - code_pos - 4) / 4)) { ++ __ b(entry); ++ __ delayed()->nop(); ++ } else { ++ // Attention: We have to use a relative jump here since PC reloc-operation isn't allowed here. ++ int offset = entry - code_pos; ++ ++ Label L; ++ __ bgezal(R0, L); ++ __ delayed()->lui(T9, (offset - 8) >> 16); ++ __ bind(L); ++ __ ori(T9, T9, (offset - 8) & 0xffff); ++ __ daddu(T9, T9, RA); ++ __ jr(T9); ++ __ delayed()->nop(); ++ } ++ ++#undef __ ++ ++ ICache::invalidate_range(code_pos, instruction_size); ++} ++ ++bool NativeJump::is_b_far() { ++// ++// 0x000000556809f198: daddu at, ra, zero ++// 0x000000556809f19c: [4110001]bgezal zero, 0x000000556809f1a4 ++// ++// 0x000000556809f1a0: nop ++// 0x000000556809f1a4: lui t9, 0xfffffffd ++// 0x000000556809f1a8: ori t9, t9, 0x14dc ++// 0x000000556809f1ac: daddu t9, t9, ra ++// 0x000000556809f1b0: daddu ra, at, zero ++// 0x000000556809f1b4: jr t9 ++// 0x000000556809f1b8: nop ++// ;; ImplicitNullCheckStub slow case ++// 0x000000556809f1bc: lui t9, 0x55 ++// ++ return is_op(int_at(12), Assembler::lui_op); ++} ++ ++address NativeJump::jump_destination() { ++ if ( is_short() ) { ++ return addr_at(4) + Assembler::imm_off(int_at(instruction_offset)) * 4; ++ } ++ // Assembler::merge() is not correct in MIPS_64! ++ // ++ // Example: ++ // hi16 = 0xfffd, ++ // lo16 = f7a4, ++ // ++ // offset=0xfffdf7a4 (Right) ++ // Assembler::merge = 0xfffcf7a4 (Wrong) ++ // ++ if ( is_b_far() ) { ++ int hi16 = int_at(12)&0xffff; ++ int low16 = int_at(16)&0xffff; ++ address target = addr_at(12) + (hi16 << 16) + low16; ++ return target; ++ } ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // j target ++ // nop ++ if ( nativeInstruction_at(addr_at(0))->is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_op(int_at(16), Assembler::j_op) && ++ nativeInstruction_at(addr_at(20))->is_nop()) { ++ int instr_index = int_at(16) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // j target ++ // nop ++ if ( is_op(int_at(0), Assembler::j_op) && ++ nativeInstruction_at(addr_at(4))->is_nop()) { ++ int instr_index = int_at(0) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff), ++ (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff)); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(0), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ fatal("not a jump"); ++ return NULL; // unreachable ++} ++ ++// MT-safe patching of a long jump instruction. ++// First patches first word of instruction to two jmp's that jmps to them ++// selfs (spinlock). Then patches the last byte, and then atomicly replaces ++// the jmp's with the first 4 byte of the new instruction. ++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { ++ NativeGeneralJump* h_jump = nativeGeneralJump_at (instr_addr); ++ assert((int)instruction_size == (int)NativeCall::instruction_size, ++ "note::Runtime1::patch_code uses NativeCall::instruction_size"); ++ ++ // ensure 100% atomicity ++ guarantee(!os::is_MP() || (((long)instr_addr % BytesPerWord) == 0), "destination must be aligned for SD"); ++ ++ int *p = (int *)instr_addr; ++ int jr_word = p[4]; ++ ++ p[4] = 0x1000fffb; /* .1: --; --; --; --; b .1; nop */ ++ memcpy(instr_addr, code_buffer, NativeCall::instruction_size - 8); ++ *(long *)(instr_addr + 16) = *(long *)(code_buffer + 16); ++} ++ ++// Must ensure atomicity ++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { ++ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); ++ assert(nativeInstruction_at(verified_entry + BytesPerInstWord)->is_nop(), "mips64 cannot replace non-nop with jump"); ++ ++ if (MacroAssembler::reachable_from_cache(dest)) { ++ CodeBuffer cb(verified_entry, 1 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.j(dest); ++ } else { ++ // We use an illegal instruction for marking a method as ++ // not_entrant or zombie ++ NativeIllegalInstruction::insert(verified_entry); ++ } ++ ++ ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord); ++} ++ ++bool NativeInstruction::is_jump() ++{ ++ if ((int_at(0) & NativeGeneralJump::b_mask) == NativeGeneralJump::beq_opcode) ++ return true; ++ if (is_op(int_at(4), Assembler::lui_op)) // simplified b_far ++ return true; ++ if (is_op(int_at(12), Assembler::lui_op)) // original b_far ++ return true; ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // j target ++ // nop ++ if ( is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ nativeInstruction_at(addr_at(16))->is_op(Assembler::j_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return true; ++ } ++ ++ if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::j_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ return true; ++ } ++ ++ // lui rd, imm(63...48); ++ // ori rd, rd, imm(47...32); ++ // dsll rd, rd, 16; ++ // ori rd, rd, imm(31...16); ++ // dsll rd, rd, 16; ++ // ori rd, rd, imm(15...0); ++ // jr rd ++ // nop ++ if (is_op(int_at(0), Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) && ++ is_special_op(int_at(24), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if (is_op(int_at(0), Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ return false; ++} ++ ++bool NativeInstruction::is_dtrace_trap() { ++ //return (*(int32_t*)this & 0xff) == 0xcc; ++ Unimplemented(); ++ return false; ++} ++ ++bool NativeInstruction::is_safepoint_poll() { ++ // ++ // 390 li T2, 0x0000000000400000 #@loadConP ++ // 394 sw [SP + #12], V1 # spill 9 ++ // 398 Safepoint @ [T2] : poll for GC @ safePoint_poll # spec.benchmarks.compress.Decompressor::decompress @ bci:224 L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1 ++ // ++ // 0x000000ffe5815130: lui t2, 0x40 ++ // 0x000000ffe5815134: sw v1, 0xc(sp) ; OopMap{a6=Oop off=920} ++ // ;*goto ++ // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) ++ // ++ // 0x000000ffe5815138: lw at, 0x0(t2) ;*goto <--- PC ++ // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) ++ // ++ ++ // Since there may be some spill instructions between the safePoint_poll and loadConP, ++ // we check the safepoint instruction like the this. ++ return is_op(Assembler::lw_op) && is_rt(AT); ++} +diff --git a/hotspot/src/cpu/mips/vm/nativeInst_mips.hpp b/hotspot/src/cpu/mips/vm/nativeInst_mips.hpp +new file mode 100644 +index 0000000000..13a4cb4ef1 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/nativeInst_mips.hpp +@@ -0,0 +1,735 @@ ++/* ++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_NATIVEINST_MIPS_HPP ++#define CPU_MIPS_VM_NATIVEINST_MIPS_HPP ++ ++#include "asm/assembler.hpp" ++#include "memory/allocation.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/os.hpp" ++#include "utilities/top.hpp" ++ ++// We have interfaces for the following instructions: ++// - NativeInstruction ++// - - NativeCall ++// - - NativeMovConstReg ++// - - NativeMovConstRegPatching ++// - - NativeMovRegMem ++// - - NativeMovRegMemPatching ++// - - NativeJump ++// - - NativeIllegalOpCode ++// - - NativeGeneralJump ++// - - NativeReturn ++// - - NativeReturnX (return with argument) ++// - - NativePushConst ++// - - NativeTstRegMem ++ ++// The base class for different kinds of native instruction abstractions. ++// Provides the primitive operations to manipulate code relative to this. ++ ++class NativeInstruction VALUE_OBJ_CLASS_SPEC { ++ friend class Relocation; ++ ++ public: ++ enum mips_specific_constants { ++ nop_instruction_code = 0, ++ nop_instruction_size = 4, ++ sync_instruction_code = 0xf ++ }; ++ ++ bool is_nop() { return long_at(0) == nop_instruction_code; } ++ bool is_sync() { return long_at(0) == sync_instruction_code; } ++ bool is_dtrace_trap(); ++ inline bool is_call(); ++ inline bool is_illegal(); ++ inline bool is_return(); ++ bool is_jump(); ++ inline bool is_cond_jump(); ++ bool is_safepoint_poll(); ++ ++ //mips has no instruction to generate a illegal instrucion exception ++ //we define ours: break 11 ++ static int illegal_instruction(); ++ ++ bool is_int_branch(); ++ bool is_float_branch(); ++ ++ inline bool is_trampoline_call(); ++ ++ //We use an illegal instruction for marking a method as not_entrant or zombie. ++ bool is_sigill_zombie_not_entrant(); ++ ++ protected: ++ address addr_at(int offset) const { return address(this) + offset; } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(BytesPerInstWord); } ++ address prev_instruction_address() const { return addr_at(-BytesPerInstWord); } ++ ++ s_char sbyte_at(int offset) const { return *(s_char*) addr_at(offset); } ++ u_char ubyte_at(int offset) const { return *(u_char*) addr_at(offset); } ++ ++ jint int_at(int offset) const { return *(jint*) addr_at(offset); } ++ juint uint_at(int offset) const { return *(juint*) addr_at(offset); } ++ ++ intptr_t ptr_at(int offset) const { return *(intptr_t*) addr_at(offset); } ++ ++ oop oop_at (int offset) const { return *(oop*) addr_at(offset); } ++ int long_at(int offset) const { return *(jint*)addr_at(offset); } ++ ++ ++ void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; wrote(offset); } ++ void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; wrote(offset); } ++ void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; wrote(offset); } ++ void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; wrote(offset); } ++ void set_long_at(int offset, long i); ++ ++ int insn_word() const { return long_at(0); } ++ static bool is_op (int insn, Assembler::ops op) { return Assembler::opcode(insn) == (int)op; } ++ bool is_op (Assembler::ops op) const { return is_op(insn_word(), op); } ++ bool is_rs (int insn, Register rs) const { return Assembler::rs(insn) == (int)rs->encoding(); } ++ bool is_rs (Register rs) const { return is_rs(insn_word(), rs); } ++ bool is_rt (int insn, Register rt) const { return Assembler::rt(insn) == (int)rt->encoding(); } ++ bool is_rt (Register rt) const { return is_rt(insn_word(), rt); } ++ ++ static bool is_special_op (int insn, Assembler::special_ops op) { ++ return is_op(insn, Assembler::special_op) && Assembler::special(insn)==(int)op; ++ } ++ bool is_special_op (Assembler::special_ops op) const { return is_special_op(insn_word(), op); } ++ ++ void wrote(int offset); ++ ++ public: ++ ++ // unit test stuff ++ static void test() {} // override for testing ++ ++ inline friend NativeInstruction* nativeInstruction_at(address address); ++}; ++ ++inline NativeInstruction* nativeInstruction_at(address address) { ++ NativeInstruction* inst = (NativeInstruction*)address; ++#ifdef ASSERT ++ //inst->verify(); ++#endif ++ return inst; ++} ++ ++inline NativeCall* nativeCall_at(address address); ++// The NativeCall is an abstraction for accessing/manipulating native call imm32/imm64 ++// instructions (used to manipulate inline caches, primitive & dll calls, etc.). ++// MIPS has no call instruction with imm32/imm64. Usually, a call was done like this: ++// 32 bits: ++// lui rt, imm16 ++// addiu rt, rt, imm16 ++// jalr rt ++// nop ++// ++// 64 bits: ++// lui rd, imm(63...48); ++// ori rd, rd, imm(47...32); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(31...16); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(15...0); ++// jalr rd ++// nop ++// ++ ++// we just consider the above for instruction as one call instruction ++class NativeCall: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 6 * BytesPerInstWord, ++ return_address_offset_short = 4 * BytesPerInstWord, ++ return_address_offset_long = 6 * BytesPerInstWord, ++ displacement_offset = 0 ++ }; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ ++ address next_instruction_address() const { ++ if (is_special_op(int_at(8), Assembler::jalr_op)) { ++ return addr_at(return_address_offset_short); ++ } else { ++ return addr_at(return_address_offset_long); ++ } ++ } ++ ++ address return_address() const { ++ return next_instruction_address(); ++ } ++ ++ address target_addr_for_insn() const; ++ address destination() const; ++ void set_destination(address dest); ++ ++ void patch_set48_gs(address dest); ++ void patch_set48(address dest); ++ ++ void patch_on_jalr_gs(address dest); ++ void patch_on_jalr(address dest); ++ ++ void patch_on_jal_gs(address dest); ++ void patch_on_jal(address dest); ++ ++ void patch_on_trampoline(address dest); ++ ++ void patch_on_jal_only(address dest); ++ ++ void patch_set32_gs(address dest); ++ void patch_set32(address dest); ++ ++ void verify_alignment() { } ++ void verify(); ++ void print(); ++ ++ // Creation ++ inline friend NativeCall* nativeCall_at(address address); ++ inline friend NativeCall* nativeCall_before(address return_address); ++ ++ static bool is_call_at(address instr) { ++ return nativeInstruction_at(instr)->is_call(); ++ } ++ ++ static bool is_call_before(address return_address) { ++ return is_call_at(return_address - return_address_offset_short) | is_call_at(return_address - return_address_offset_long); ++ } ++ ++ static bool is_call_to(address instr, address target) { ++ return nativeInstruction_at(instr)->is_call() && ++nativeCall_at(instr)->destination() == target; ++ } ++ ++ // MT-safe patching of a call instruction. ++ static void insert(address code_pos, address entry); ++ ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++ ++ // Similar to replace_mt_safe, but just changes the destination. The ++ // important thing is that free-running threads are able to execute ++ // this call instruction at all times. If the call is an immediate jal ++ // instruction we can simply rely on atomicity of 32-bit writes to ++ // make sure other threads will see no intermediate states. ++ ++ // We cannot rely on locks here, since the free-running threads must run at ++ // full speed. ++ // ++ // Used in the runtime linkage of calls; see class CompiledIC. ++ ++ // The parameter assert_lock disables the assertion during code generation. ++ void set_destination_mt_safe(address dest, bool assert_lock = true); ++ ++ address get_trampoline(); ++}; ++ ++inline NativeCall* nativeCall_at(address address) { ++ NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset); ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++inline NativeCall* nativeCall_before(address return_address) { ++ NativeCall* call = NULL; ++ if (NativeCall::is_call_at(return_address - NativeCall::return_address_offset_long)) { ++ call = (NativeCall*)(return_address - NativeCall::return_address_offset_long); ++ } else { ++ call = (NativeCall*)(return_address - NativeCall::return_address_offset_short); ++ } ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++class NativeMovConstReg: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 4 * BytesPerInstWord, ++ next_instruction_offset = 4 * BytesPerInstWord, ++ }; ++ ++ int insn_word() const { return long_at(instruction_offset); } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(next_instruction_offset); } ++ intptr_t data() const; ++ void set_data(intptr_t x, intptr_t o = 0); ++ ++ void patch_set48(intptr_t x); ++ ++ void verify(); ++ void print(); ++ ++ // unit test stuff ++ static void test() {} ++ ++ // Creation ++ inline friend NativeMovConstReg* nativeMovConstReg_at(address address); ++ inline friend NativeMovConstReg* nativeMovConstReg_before(address address); ++}; ++ ++inline NativeMovConstReg* nativeMovConstReg_at(address address) { ++ NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++inline NativeMovConstReg* nativeMovConstReg_before(address address) { ++ NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++class NativeMovConstRegPatching: public NativeMovConstReg { ++ private: ++ friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) { ++ NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset); ++ #ifdef ASSERT ++ test->verify(); ++ #endif ++ return test; ++ } ++}; ++ ++// An interface for accessing/manipulating native moves of the form: ++// lui AT, split_high(offset) ++// addiu AT, split_low(offset) ++// addu reg, reg, AT ++// lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, 0 ++// [lw/sw/lwc1/swc1 dest, reg, 4] ++// or ++// lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, offset ++// [lw/sw/lwc1/swc1 dest, reg, offset+4] ++// ++// Warning: These routines must be able to handle any instruction sequences ++// that are generated as a result of the load/store byte,word,long ++// macros. ++ ++class NativeMovRegMem: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ hiword_offset = 4, ++ ldst_offset = 12, ++ immediate_size = 4, ++ ldst_size = 16 ++ }; ++ ++ //offset is less than 16 bits. ++ bool is_immediate() const { return !is_op(long_at(instruction_offset), Assembler::lui_op); } ++ bool is_64ldst() const { ++ if (is_immediate()) { ++ return (Assembler::opcode(long_at(hiword_offset)) == Assembler::opcode(long_at(instruction_offset))) && ++ (Assembler::imm_off(long_at(hiword_offset)) == Assembler::imm_off(long_at(instruction_offset)) + wordSize); ++ } else { ++ return (Assembler::opcode(long_at(ldst_offset+hiword_offset)) == Assembler::opcode(long_at(ldst_offset))) && ++ (Assembler::imm_off(long_at(ldst_offset+hiword_offset)) == Assembler::imm_off(long_at(ldst_offset)) + wordSize); ++ } ++ } ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address next_instruction_address() const { ++ return addr_at( (is_immediate()? immediate_size : ldst_size) + (is_64ldst()? 4 : 0)); ++ } ++ ++ int offset() const; ++ ++ void set_offset(int x); ++ ++ void add_offset_in_bytes(int add_offset) { set_offset ( ( offset() + add_offset ) ); } ++ ++ void verify(); ++ void print (); ++ ++ // unit test stuff ++ static void test() {} ++ ++ private: ++ inline friend NativeMovRegMem* nativeMovRegMem_at (address address); ++}; ++ ++inline NativeMovRegMem* nativeMovRegMem_at (address address) { ++ NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++class NativeMovRegMemPatching: public NativeMovRegMem { ++ private: ++ friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) { ++ NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset); ++ #ifdef ASSERT ++ test->verify(); ++ #endif ++ return test; ++ } ++}; ++ ++ ++// Handles all kinds of jump on Loongson. Long/far, conditional/unconditional ++// 32 bits: ++// far jump: ++// lui reg, split_high(addr) ++// addiu reg, split_low(addr) ++// jr reg ++// nop ++// or ++// beq ZERO, ZERO, offset ++// nop ++// ++ ++//64 bits: ++// far jump: ++// lui rd, imm(63...48); ++// ori rd, rd, imm(47...32); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(31...16); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(15...0); ++// jalr rd ++// nop ++// ++class NativeJump: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ beq_opcode = 0x10000000,//000100|00000|00000|offset ++ b_mask = 0xffff0000, ++ short_size = 8, ++ instruction_size = 6 * BytesPerInstWord ++ }; ++ ++ bool is_short() const { return (long_at(instruction_offset) & b_mask) == beq_opcode; } ++ bool is_b_far(); ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address jump_destination(); ++ ++ void patch_set48_gs(address dest); ++ void patch_set48(address dest); ++ ++ void patch_on_jr_gs(address dest); ++ void patch_on_jr(address dest); ++ ++ void patch_on_j_gs(address dest); ++ void patch_on_j(address dest); ++ ++ void patch_on_j_only(address dest); ++ ++ void set_jump_destination(address dest); ++ ++ // Creation ++ inline friend NativeJump* nativeJump_at(address address); ++ ++ // Insertion of native jump instruction ++ static void insert(address code_pos, address entry) { Unimplemented(); } ++ // MT-safe insertion of native jump at verified method entry ++ static void check_verified_entry_alignment(address entry, address verified_entry) {} ++ static void patch_verified_entry(address entry, address verified_entry, address dest); ++ ++ void verify(); ++}; ++ ++inline NativeJump* nativeJump_at(address address) { ++ NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset); ++ debug_only(jump->verify();) ++ return jump; ++} ++ ++class NativeGeneralJump: public NativeJump { ++ public: ++ // Creation ++ inline friend NativeGeneralJump* nativeGeneralJump_at(address address); ++ ++ // Insertion of native general jump instruction ++ static void insert_unconditional(address code_pos, address entry); ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++}; ++ ++inline NativeGeneralJump* nativeGeneralJump_at(address address) { ++ NativeGeneralJump* jump = (NativeGeneralJump*)(address); ++ debug_only(jump->verify();) ++ return jump; ++} ++ ++class NativeIllegalInstruction: public NativeInstruction { ++public: ++ enum mips_specific_constants { ++ instruction_code = 0x42000029, // mips reserved instruction ++ instruction_size = 4, ++ instruction_offset = 0, ++ next_instruction_offset = 4 ++ }; ++ ++ // Insert illegal opcode as specific address ++ static void insert(address code_pos); ++}; ++ ++// return instruction that does not pop values of the stack ++// jr RA ++// delay slot ++class NativeReturn: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_size = 8, ++ instruction_offset = 0, ++ next_instruction_offset = 8 ++ }; ++}; ++ ++ ++ ++ ++class NativeCondJump; ++inline NativeCondJump* nativeCondJump_at(address address); ++class NativeCondJump: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_size = 16, ++ instruction_offset = 12, ++ next_instruction_offset = 20 ++ }; ++ ++ ++ int insn_word() const { return long_at(instruction_offset); } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(next_instruction_offset); } ++ ++ // Creation ++ inline friend NativeCondJump* nativeCondJump_at(address address); ++ ++ address jump_destination() const { ++ return ::nativeCondJump_at(addr_at(12))->jump_destination(); ++ } ++ ++ void set_jump_destination(address dest) { ++ ::nativeCondJump_at(addr_at(12))->set_jump_destination(dest); ++ } ++ ++}; ++ ++inline NativeCondJump* nativeCondJump_at(address address) { ++ NativeCondJump* jump = (NativeCondJump*)(address); ++ return jump; ++} ++ ++ ++ ++inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); } ++ ++inline bool NativeInstruction::is_call() { ++ // jal target ++ // nop ++ if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::jal_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ return true; ++ } ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // jal target ++ // nop ++ if ( is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ nativeInstruction_at(addr_at(16))->is_op(Assembler::jal_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return true; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) && ++ is_special_op(int_at(24), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ if(is_trampoline_call()) ++ return true; ++ ++ return false; ++ ++} ++ ++inline bool NativeInstruction::is_return() { return is_special_op(Assembler::jr_op) && is_rs(RA);} ++ ++inline bool NativeInstruction::is_cond_jump() { return is_int_branch() || is_float_branch(); } ++ ++// Call trampoline stubs. ++class NativeCallTrampolineStub : public NativeInstruction { ++ public: ++ ++ enum mips_specific_constants { ++ instruction_size = 2 * BytesPerInstWord, ++ instruction_offset = 0, ++ next_instruction_offset = 2 * BytesPerInstWord ++ }; ++ ++ address destination() const { ++ return (address)ptr_at(0); ++ } ++ ++ void set_destination(address new_destination) { ++ set_ptr_at(0, (intptr_t)new_destination); ++ } ++}; ++ ++inline bool NativeInstruction::is_trampoline_call() { ++ // lui dst, imm16 ++ // ori dst, dst, imm16 ++ // dsll dst, dst, 16 ++ // ld target, dst, imm16 ++ // jalr target ++ // nop ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ld_op) && ++ is_special_op(int_at(16), Assembler::jalr_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return true; ++ } ++ ++ return false; ++} ++ ++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { ++ return (NativeCallTrampolineStub*)addr; ++} ++ ++#endif // CPU_MIPS_VM_NATIVEINST_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/registerMap_mips.hpp b/hotspot/src/cpu/mips/vm/registerMap_mips.hpp +new file mode 100644 +index 0000000000..7f800eb107 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/registerMap_mips.hpp +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_REGISTERMAP_MIPS_HPP ++#define CPU_MIPS_VM_REGISTERMAP_MIPS_HPP ++ ++// machine-dependent implemention for register maps ++ friend class frame; ++ ++ private: ++#ifndef CORE ++ // This is the hook for finding a register in an "well-known" location, ++ // such as a register block of a predetermined format. ++ // Since there is none, we just return NULL. ++ // See registerMap_sparc.hpp for an example of grabbing registers ++ // from register save areas of a standard layout. ++ address pd_location(VMReg reg) const {return NULL;} ++#endif ++ ++ // no PD state to clear or copy: ++ void pd_clear() {} ++ void pd_initialize() {} ++ void pd_initialize_from(const RegisterMap* map) {} ++ ++#endif // CPU_MIPS_VM_REGISTERMAP_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/register_definitions_mips.cpp b/hotspot/src/cpu/mips/vm/register_definitions_mips.cpp +new file mode 100644 +index 0000000000..4af2531834 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/register_definitions_mips.cpp +@@ -0,0 +1,103 @@ ++/* ++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/register.hpp" ++#include "register_mips.hpp" ++#ifdef TARGET_ARCH_MODEL_mips_32 ++# include "interp_masm_mips_32.hpp" ++#endif ++#ifdef TARGET_ARCH_MODEL_mips_64 ++# include "interp_masm_mips_64.hpp" ++#endif ++ ++REGISTER_DEFINITION(Register, noreg); ++REGISTER_DEFINITION(Register, i0); ++REGISTER_DEFINITION(Register, i1); ++REGISTER_DEFINITION(Register, i2); ++REGISTER_DEFINITION(Register, i3); ++REGISTER_DEFINITION(Register, i4); ++REGISTER_DEFINITION(Register, i5); ++REGISTER_DEFINITION(Register, i6); ++REGISTER_DEFINITION(Register, i7); ++REGISTER_DEFINITION(Register, i8); ++REGISTER_DEFINITION(Register, i9); ++REGISTER_DEFINITION(Register, i10); ++REGISTER_DEFINITION(Register, i11); ++REGISTER_DEFINITION(Register, i12); ++REGISTER_DEFINITION(Register, i13); ++REGISTER_DEFINITION(Register, i14); ++REGISTER_DEFINITION(Register, i15); ++REGISTER_DEFINITION(Register, i16); ++REGISTER_DEFINITION(Register, i17); ++REGISTER_DEFINITION(Register, i18); ++REGISTER_DEFINITION(Register, i19); ++REGISTER_DEFINITION(Register, i20); ++REGISTER_DEFINITION(Register, i21); ++REGISTER_DEFINITION(Register, i22); ++REGISTER_DEFINITION(Register, i23); ++REGISTER_DEFINITION(Register, i24); ++REGISTER_DEFINITION(Register, i25); ++REGISTER_DEFINITION(Register, i26); ++REGISTER_DEFINITION(Register, i27); ++REGISTER_DEFINITION(Register, i28); ++REGISTER_DEFINITION(Register, i29); ++REGISTER_DEFINITION(Register, i30); ++REGISTER_DEFINITION(Register, i31); ++ ++REGISTER_DEFINITION(FloatRegister, fnoreg); ++REGISTER_DEFINITION(FloatRegister, f0); ++REGISTER_DEFINITION(FloatRegister, f1); ++REGISTER_DEFINITION(FloatRegister, f2); ++REGISTER_DEFINITION(FloatRegister, f3); ++REGISTER_DEFINITION(FloatRegister, f4); ++REGISTER_DEFINITION(FloatRegister, f5); ++REGISTER_DEFINITION(FloatRegister, f6); ++REGISTER_DEFINITION(FloatRegister, f7); ++REGISTER_DEFINITION(FloatRegister, f8); ++REGISTER_DEFINITION(FloatRegister, f9); ++REGISTER_DEFINITION(FloatRegister, f10); ++REGISTER_DEFINITION(FloatRegister, f11); ++REGISTER_DEFINITION(FloatRegister, f12); ++REGISTER_DEFINITION(FloatRegister, f13); ++REGISTER_DEFINITION(FloatRegister, f14); ++REGISTER_DEFINITION(FloatRegister, f15); ++REGISTER_DEFINITION(FloatRegister, f16); ++REGISTER_DEFINITION(FloatRegister, f17); ++REGISTER_DEFINITION(FloatRegister, f18); ++REGISTER_DEFINITION(FloatRegister, f19); ++REGISTER_DEFINITION(FloatRegister, f20); ++REGISTER_DEFINITION(FloatRegister, f21); ++REGISTER_DEFINITION(FloatRegister, f22); ++REGISTER_DEFINITION(FloatRegister, f23); ++REGISTER_DEFINITION(FloatRegister, f24); ++REGISTER_DEFINITION(FloatRegister, f25); ++REGISTER_DEFINITION(FloatRegister, f26); ++REGISTER_DEFINITION(FloatRegister, f27); ++REGISTER_DEFINITION(FloatRegister, f28); ++REGISTER_DEFINITION(FloatRegister, f29); ++REGISTER_DEFINITION(FloatRegister, f30); ++REGISTER_DEFINITION(FloatRegister, f31); +diff --git a/hotspot/src/cpu/mips/vm/register_mips.cpp b/hotspot/src/cpu/mips/vm/register_mips.cpp +new file mode 100644 +index 0000000000..4a9b22bfef +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/register_mips.cpp +@@ -0,0 +1,52 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "register_mips.hpp" ++ ++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1; ++const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + ++ 2 * FloatRegisterImpl::number_of_registers; ++ ++const char* RegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3", ++ "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", ++ "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", ++ "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra" ++ }; ++ return is_valid() ? names[encoding()] : "noreg"; ++} ++ ++const char* FloatRegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", ++ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", ++ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", ++ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", ++ }; ++ return is_valid() ? names[encoding()] : "fnoreg"; ++} ++ +diff --git a/hotspot/src/cpu/mips/vm/register_mips.hpp b/hotspot/src/cpu/mips/vm/register_mips.hpp +new file mode 100644 +index 0000000000..88bf2d68cc +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/register_mips.hpp +@@ -0,0 +1,346 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_REGISTER_MIPS_HPP ++#define CPU_MIPS_VM_REGISTER_MIPS_HPP ++ ++#include "asm/register.hpp" ++#include "vm_version_mips.hpp" ++ ++class VMRegImpl; ++typedef VMRegImpl* VMReg; ++ ++// Use Register as shortcut ++class RegisterImpl; ++typedef RegisterImpl* Register; ++ ++ ++// The implementation of integer registers for the mips architecture ++inline Register as_Register(int encoding) { ++ return (Register)(intptr_t) encoding; ++} ++ ++class RegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 32 ++ }; ++ ++ // derived registers, offsets, and addresses ++ Register successor() const { return as_Register(encoding() + 1); } ++ ++ // construction ++ inline friend Register as_Register(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // accessors ++ int encoding() const { assert(is_valid(),err_msg( "invalid register (%d)", (int)(intptr_t)this)); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++}; ++ ++ ++// The integer registers of the MIPS32 architecture ++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); ++ ++ ++CONSTANT_REGISTER_DECLARATION(Register, i0, (0)); ++CONSTANT_REGISTER_DECLARATION(Register, i1, (1)); ++CONSTANT_REGISTER_DECLARATION(Register, i2, (2)); ++CONSTANT_REGISTER_DECLARATION(Register, i3, (3)); ++CONSTANT_REGISTER_DECLARATION(Register, i4, (4)); ++CONSTANT_REGISTER_DECLARATION(Register, i5, (5)); ++CONSTANT_REGISTER_DECLARATION(Register, i6, (6)); ++CONSTANT_REGISTER_DECLARATION(Register, i7, (7)); ++CONSTANT_REGISTER_DECLARATION(Register, i8, (8)); ++CONSTANT_REGISTER_DECLARATION(Register, i9, (9)); ++CONSTANT_REGISTER_DECLARATION(Register, i10, (10)); ++CONSTANT_REGISTER_DECLARATION(Register, i11, (11)); ++CONSTANT_REGISTER_DECLARATION(Register, i12, (12)); ++CONSTANT_REGISTER_DECLARATION(Register, i13, (13)); ++CONSTANT_REGISTER_DECLARATION(Register, i14, (14)); ++CONSTANT_REGISTER_DECLARATION(Register, i15, (15)); ++CONSTANT_REGISTER_DECLARATION(Register, i16, (16)); ++CONSTANT_REGISTER_DECLARATION(Register, i17, (17)); ++CONSTANT_REGISTER_DECLARATION(Register, i18, (18)); ++CONSTANT_REGISTER_DECLARATION(Register, i19, (19)); ++CONSTANT_REGISTER_DECLARATION(Register, i20, (20)); ++CONSTANT_REGISTER_DECLARATION(Register, i21, (21)); ++CONSTANT_REGISTER_DECLARATION(Register, i22, (22)); ++CONSTANT_REGISTER_DECLARATION(Register, i23, (23)); ++CONSTANT_REGISTER_DECLARATION(Register, i24, (24)); ++CONSTANT_REGISTER_DECLARATION(Register, i25, (25)); ++CONSTANT_REGISTER_DECLARATION(Register, i26, (26)); ++CONSTANT_REGISTER_DECLARATION(Register, i27, (27)); ++CONSTANT_REGISTER_DECLARATION(Register, i28, (28)); ++CONSTANT_REGISTER_DECLARATION(Register, i29, (29)); ++CONSTANT_REGISTER_DECLARATION(Register, i30, (30)); ++CONSTANT_REGISTER_DECLARATION(Register, i31, (31)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define NOREG ((Register)(noreg_RegisterEnumValue)) ++ ++#define I0 ((Register)(i0_RegisterEnumValue)) ++#define I1 ((Register)(i1_RegisterEnumValue)) ++#define I2 ((Register)(i2_RegisterEnumValue)) ++#define I3 ((Register)(i3_RegisterEnumValue)) ++#define I4 ((Register)(i4_RegisterEnumValue)) ++#define I5 ((Register)(i5_RegisterEnumValue)) ++#define I6 ((Register)(i6_RegisterEnumValue)) ++#define I7 ((Register)(i7_RegisterEnumValue)) ++#define I8 ((Register)(i8_RegisterEnumValue)) ++#define I9 ((Register)(i9_RegisterEnumValue)) ++#define I10 ((Register)(i10_RegisterEnumValue)) ++#define I11 ((Register)(i11_RegisterEnumValue)) ++#define I12 ((Register)(i12_RegisterEnumValue)) ++#define I13 ((Register)(i13_RegisterEnumValue)) ++#define I14 ((Register)(i14_RegisterEnumValue)) ++#define I15 ((Register)(i15_RegisterEnumValue)) ++#define I16 ((Register)(i16_RegisterEnumValue)) ++#define I17 ((Register)(i17_RegisterEnumValue)) ++#define I18 ((Register)(i18_RegisterEnumValue)) ++#define I19 ((Register)(i19_RegisterEnumValue)) ++#define I20 ((Register)(i20_RegisterEnumValue)) ++#define I21 ((Register)(i21_RegisterEnumValue)) ++#define I22 ((Register)(i22_RegisterEnumValue)) ++#define I23 ((Register)(i23_RegisterEnumValue)) ++#define I24 ((Register)(i24_RegisterEnumValue)) ++#define I25 ((Register)(i25_RegisterEnumValue)) ++#define I26 ((Register)(i26_RegisterEnumValue)) ++#define I27 ((Register)(i27_RegisterEnumValue)) ++#define I28 ((Register)(i28_RegisterEnumValue)) ++#define I29 ((Register)(i29_RegisterEnumValue)) ++#define I30 ((Register)(i30_RegisterEnumValue)) ++#define I31 ((Register)(i31_RegisterEnumValue)) ++ ++#define R0 ((Register)(i0_RegisterEnumValue)) ++#define AT ((Register)(i1_RegisterEnumValue)) ++#define V0 ((Register)(i2_RegisterEnumValue)) ++#define V1 ((Register)(i3_RegisterEnumValue)) ++#define RA0 ((Register)(i4_RegisterEnumValue)) ++#define RA1 ((Register)(i5_RegisterEnumValue)) ++#define RA2 ((Register)(i6_RegisterEnumValue)) ++#define RA3 ((Register)(i7_RegisterEnumValue)) ++#define RA4 ((Register)(i8_RegisterEnumValue)) ++#define RA5 ((Register)(i9_RegisterEnumValue)) ++#define RA6 ((Register)(i10_RegisterEnumValue)) ++#define RA7 ((Register)(i11_RegisterEnumValue)) ++#define RT0 ((Register)(i12_RegisterEnumValue)) ++#define RT1 ((Register)(i13_RegisterEnumValue)) ++#define RT2 ((Register)(i14_RegisterEnumValue)) ++#define RT3 ((Register)(i15_RegisterEnumValue)) ++#define S0 ((Register)(i16_RegisterEnumValue)) ++#define S1 ((Register)(i17_RegisterEnumValue)) ++#define S2 ((Register)(i18_RegisterEnumValue)) ++#define S3 ((Register)(i19_RegisterEnumValue)) ++#define S4 ((Register)(i20_RegisterEnumValue)) ++#define S5 ((Register)(i21_RegisterEnumValue)) ++#define S6 ((Register)(i22_RegisterEnumValue)) ++#define S7 ((Register)(i23_RegisterEnumValue)) ++#define RT8 ((Register)(i24_RegisterEnumValue)) ++#define RT9 ((Register)(i25_RegisterEnumValue)) ++#define K0 ((Register)(i26_RegisterEnumValue)) ++#define K1 ((Register)(i27_RegisterEnumValue)) ++#define GP ((Register)(i28_RegisterEnumValue)) ++#define SP ((Register)(i29_RegisterEnumValue)) ++#define FP ((Register)(i30_RegisterEnumValue)) ++#define S8 ((Register)(i30_RegisterEnumValue)) ++#define RA ((Register)(i31_RegisterEnumValue)) ++ ++#define c_rarg0 RT0 ++#define c_rarg1 RT1 ++#define Rmethod S3 ++#define Rsender S4 ++#define Rnext S1 ++ ++/* ++#define RT0 T0 ++#define RT1 T1 ++#define RT2 T2 ++#define RT3 T3 ++#define RT4 T8 ++#define RT5 T9 ++*/ ++ ++ ++//for interpreter frame ++// bytecode pointer register ++#define BCP S0 ++// local variable pointer register ++#define LVP S7 ++// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM ++// be sure to save and restore its value in call_stub ++#define TSR S2 ++ ++//OPT_SAFEPOINT not supported yet ++#define OPT_SAFEPOINT 1 ++ ++#define OPT_THREAD 1 ++ ++#define TREG S6 ++ ++#define S5_heapbase S5 ++ ++#define mh_SP_save SP ++ ++#define FSR V0 ++#define SSR V1 ++#define FSF F0 ++#define SSF F1 ++#define FTF F14 ++#define STF F15 ++ ++#define AFT F30 ++ ++#define RECEIVER T0 ++#define IC_Klass T1 ++ ++#define SHIFT_count T3 ++ ++#endif // DONT_USE_REGISTER_DEFINES ++ ++// Use FloatRegister as shortcut ++class FloatRegisterImpl; ++typedef FloatRegisterImpl* FloatRegister; ++ ++inline FloatRegister as_FloatRegister(int encoding) { ++ return (FloatRegister)(intptr_t) encoding; ++} ++ ++// The implementation of floating point registers for the mips architecture ++class FloatRegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ float_arg_base = 12, ++ number_of_registers = 32 ++ }; ++ ++ // construction ++ inline friend FloatRegister as_FloatRegister(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // derived registers, offsets, and addresses ++ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++ ++}; ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue)) ++#define F0 ((FloatRegister)( f0_FloatRegisterEnumValue)) ++#define F1 ((FloatRegister)( f1_FloatRegisterEnumValue)) ++#define F2 ((FloatRegister)( f2_FloatRegisterEnumValue)) ++#define F3 ((FloatRegister)( f3_FloatRegisterEnumValue)) ++#define F4 ((FloatRegister)( f4_FloatRegisterEnumValue)) ++#define F5 ((FloatRegister)( f5_FloatRegisterEnumValue)) ++#define F6 ((FloatRegister)( f6_FloatRegisterEnumValue)) ++#define F7 ((FloatRegister)( f7_FloatRegisterEnumValue)) ++#define F8 ((FloatRegister)( f8_FloatRegisterEnumValue)) ++#define F9 ((FloatRegister)( f9_FloatRegisterEnumValue)) ++#define F10 ((FloatRegister)( f10_FloatRegisterEnumValue)) ++#define F11 ((FloatRegister)( f11_FloatRegisterEnumValue)) ++#define F12 ((FloatRegister)( f12_FloatRegisterEnumValue)) ++#define F13 ((FloatRegister)( f13_FloatRegisterEnumValue)) ++#define F14 ((FloatRegister)( f14_FloatRegisterEnumValue)) ++#define F15 ((FloatRegister)( f15_FloatRegisterEnumValue)) ++#define F16 ((FloatRegister)( f16_FloatRegisterEnumValue)) ++#define F17 ((FloatRegister)( f17_FloatRegisterEnumValue)) ++#define F18 ((FloatRegister)( f18_FloatRegisterEnumValue)) ++#define F19 ((FloatRegister)( f19_FloatRegisterEnumValue)) ++#define F20 ((FloatRegister)( f20_FloatRegisterEnumValue)) ++#define F21 ((FloatRegister)( f21_FloatRegisterEnumValue)) ++#define F22 ((FloatRegister)( f22_FloatRegisterEnumValue)) ++#define F23 ((FloatRegister)( f23_FloatRegisterEnumValue)) ++#define F24 ((FloatRegister)( f24_FloatRegisterEnumValue)) ++#define F25 ((FloatRegister)( f25_FloatRegisterEnumValue)) ++#define F26 ((FloatRegister)( f26_FloatRegisterEnumValue)) ++#define F27 ((FloatRegister)( f27_FloatRegisterEnumValue)) ++#define F28 ((FloatRegister)( f28_FloatRegisterEnumValue)) ++#define F29 ((FloatRegister)( f29_FloatRegisterEnumValue)) ++#define F30 ((FloatRegister)( f30_FloatRegisterEnumValue)) ++#define F31 ((FloatRegister)( f31_FloatRegisterEnumValue)) ++#endif // DONT_USE_REGISTER_DEFINES ++ ++ ++const int MIPS_ARGS_IN_REGS_NUM = 4; ++ ++// Need to know the total number of registers of all sorts for SharedInfo. ++// Define a class that exports it. ++class ConcreteRegisterImpl : public AbstractRegisterImpl { ++ public: ++ enum { ++ // A big enough number for C2: all the registers plus flags ++ // This number must be large enough to cover REG_COUNT (defined by c2) registers. ++ // There is no requirement that any ordering here matches any ordering c2 gives ++ // it's optoregs. ++ number_of_registers = (RegisterImpl::number_of_registers + FloatRegisterImpl::number_of_registers) * 2 ++ }; ++ ++ static const int max_gpr; ++ static const int max_fpr; ++}; ++ ++#endif //CPU_MIPS_VM_REGISTER_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/relocInfo_mips.cpp b/hotspot/src/cpu/mips/vm/relocInfo_mips.cpp +new file mode 100644 +index 0000000000..cae43b2d96 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/relocInfo_mips.cpp +@@ -0,0 +1,156 @@ ++/* ++ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/relocInfo.hpp" ++#include "compiler/disassembler.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/safepoint.hpp" ++ ++ ++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { ++ x += o; ++ typedef Assembler::WhichOperand WhichOperand; ++ WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop ++ assert(which == Assembler::disp32_operand || ++ which == Assembler::narrow_oop_operand || ++ which == Assembler::imm_operand, "format unpacks ok"); ++ if (which == Assembler::imm_operand) { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(x)); ++ } ++ } else if (which == Assembler::narrow_oop_operand) { ++ // both compressed oops and compressed classes look the same ++ if (Universe::heap()->is_in_reserved((oop)x)) { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)oopDesc::encode_heap_oop((oop)x), "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(oopDesc::encode_heap_oop((oop)x)), (intptr_t)(x)); ++ } ++ } else { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x)); ++ } ++ } ++ } else { ++ // Note: Use runtime_call_type relocations for call32_operand. ++ assert(0, "call32_operand not supported in MIPS64"); ++ } ++} ++ ++ ++//NOTICE HERE, this relocate is not need for MIPS, since MIPS USE abosolutly target, ++//Maybe We should FORGET CALL RELOCATION ++address Relocation::pd_call_destination(address orig_addr) { ++ intptr_t adj = 0; ++ NativeInstruction* ni = nativeInstruction_at(addr()); ++ if (ni->is_call()) { ++ if (!ni->is_trampoline_call()) { ++ return nativeCall_at(addr())->target_addr_for_insn(); ++ } else { ++ address trampoline = nativeCall_at(addr())->get_trampoline(); ++ if (trampoline) { ++ return nativeCallTrampolineStub_at(trampoline)->destination(); ++ } else { ++ return (address) -1; ++ } ++ } ++ } else if (ni->is_jump()) { ++ return nativeGeneralJump_at(addr())->jump_destination() + adj; ++ } else if (ni->is_cond_jump()) { ++ return nativeCondJump_at(addr())->jump_destination() +adj; ++ } else { ++ tty->print_cr("\nError!\ncall destination: 0x%lx", p2i(addr())); ++ Disassembler::decode(addr() - 10 * 4, addr() + 10 * 4, tty); ++ ShouldNotReachHere(); ++ return NULL; ++ } ++} ++ ++ ++void Relocation::pd_set_call_destination(address x) { ++ NativeInstruction* ni = nativeInstruction_at(addr()); ++ if (ni->is_call()) { ++ NativeCall* call = nativeCall_at(addr()); ++ if (!ni->is_trampoline_call()) { ++ call->set_destination(x); ++ } else { ++ address trampoline_stub_addr = call->get_trampoline(); ++ if (trampoline_stub_addr != NULL) { ++ address orig = call->target_addr_for_insn(); ++ if (orig != trampoline_stub_addr) { ++ call->patch_on_trampoline(trampoline_stub_addr); ++ } ++ call->set_destination_mt_safe(x, false); ++ } ++ } ++ } else if (ni->is_jump()) ++ nativeGeneralJump_at(addr())->set_jump_destination(x); ++ else if (ni->is_cond_jump()) ++ nativeCondJump_at(addr())->set_jump_destination(x); ++ else ++ { ShouldNotReachHere(); } ++ ++ // Unresolved jumps are recognized by a destination of -1 ++ // However 64bit can't actually produce such an address ++ // and encodes a jump to self but jump_destination will ++ // return a -1 as the signal. We must not relocate this ++ // jmp or the ic code will not see it as unresolved. ++} ++ ++ ++address* Relocation::pd_address_in_code() { ++ return (address*)addr(); ++} ++ ++ ++address Relocation::pd_get_address_from_code() { ++ NativeMovConstReg* ni = nativeMovConstReg_at(addr()); ++ return (address)ni->data(); ++} ++ ++ ++ ++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++} ++ ++void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++} ++ ++void internal_pc_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++ address target =0; ++ NativeMovConstReg* ni = nativeMovConstReg_at(addr()); ++ target = new_addr_for((address)ni->data(), src, dest); ++ ni->set_data((intptr_t)target); ++} ++ ++void metadata_Relocation::pd_fix_value(address x) { ++} +diff --git a/hotspot/src/cpu/mips/vm/relocInfo_mips.hpp b/hotspot/src/cpu/mips/vm/relocInfo_mips.hpp +new file mode 100644 +index 0000000000..04ad5dac96 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/relocInfo_mips.hpp +@@ -0,0 +1,40 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_RELOCINFO_MIPS_HPP ++#define CPU_MIPS_VM_RELOCINFO_MIPS_HPP ++ ++ // machine-dependent parts of class relocInfo ++ private: ++ enum { ++ // Since MIPS instructions are whole words, ++ // the two low-order offset bits can always be discarded. ++ offset_unit = 4, ++ ++ // imm_oop_operand vs. narrow_oop_operand ++ format_width = 2 ++ }; ++ ++#endif // CPU_MIPS_VM_RELOCINFO_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/runtime_mips_64.cpp b/hotspot/src/cpu/mips/vm/runtime_mips_64.cpp +new file mode 100644 +index 0000000000..bb9269b423 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/runtime_mips_64.cpp +@@ -0,0 +1,206 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#ifdef COMPILER2 ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "classfile/systemDictionary.hpp" ++#include "code/vmreg.hpp" ++#include "interpreter/interpreter.hpp" ++#include "opto/runtime.hpp" ++#include "runtime/interfaceSupport.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/globalDefinitions.hpp" ++#include "vmreg_mips.inline.hpp" ++#endif ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++//-------------- generate_exception_blob ----------- ++// creates _exception_blob. ++// The exception blob is jumped to from a compiled method. ++// (see emit_exception_handler in sparc.ad file) ++// ++// Given an exception pc at a call we call into the runtime for the ++// handler in this method. This handler might merely restore state ++// (i.e. callee save registers) unwind the frame and jump to the ++// exception handler for the nmethod if there is no Java level handler ++// for the nmethod. ++// ++// This code is entered with a jump, and left with a jump. ++// ++// Arguments: ++// V0: exception oop ++// V1: exception pc ++// ++// Results: ++// A0: exception oop ++// A1: exception pc in caller or ??? ++// jumps to: exception handler of caller ++// ++// Note: the exception pc MUST be at a call (precise debug information) ++// ++// [stubGenerator_mips.cpp] generate_forward_exception() ++// |- V0, V1 are created ++// |- T9 <= SharedRuntime::exception_handler_for_return_address ++// `- jr T9 ++// `- the caller's exception_handler ++// `- jr OptoRuntime::exception_blob ++// `- here ++// ++void OptoRuntime::generate_exception_blob() { ++ // Capture info about frame layout ++ enum layout { ++ fp_off, ++ return_off, // slot for return address ++ framesize ++ }; ++ ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ CodeBuffer buffer("exception_blob", 5120, 5120); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ ++ address start = __ pc(); ++ ++ __ daddiu(SP, SP, -1 * framesize * wordSize); // Prolog! ++ ++ // this frame will be treated as the original caller method. ++ // So, the return pc should be filled with the original exception pc. ++ // ref: X86's implementation ++ __ sd(V1, SP, return_off *wordSize); // return address ++ __ sd(FP, SP, fp_off *wordSize); ++ ++ // Save callee saved registers. None for UseSSE=0, ++ // floats-only for UseSSE=1, and doubles for UseSSE=2. ++ ++ __ daddiu(FP, SP, fp_off * wordSize); ++ ++ // Store exception in Thread object. We cannot pass any arguments to the ++ // handle_exception call, since we do not want to make any assumption ++ // about the size of the frame where the exception happened in. ++ Register thread = TREG; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ __ sd(V0, Address(thread, JavaThread::exception_oop_offset())); ++ __ sd(V1, Address(thread, JavaThread::exception_pc_offset())); ++ ++ // This call does all the hard work. It checks if an exception handler ++ // exists in the method. ++ // If so, it returns the handler address. ++ // If not, it prepares for stack-unwinding, restoring the callee-save ++ // registers of the frame being removed. ++ __ set_last_Java_frame(thread, NOREG, NOREG, NULL); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ __ relocate(relocInfo::internal_pc_type); ++ ++ { ++ long save_pc = (long)__ pc() + 48; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ ++ __ move(A0, thread); ++ __ patchable_set48(T9, (long)OptoRuntime::handle_exception_C); ++ __ jalr(T9); ++ __ delayed()->nop(); ++ ++ // Set an oopmap for the call site ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap( framesize, 0 ); ++ ++ oop_maps->add_gc_map( __ offset(), map); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(thread, true); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog! ++ ++ // V0: exception handler ++ ++ // We have a handler in V0, (could be deopt blob) ++ __ move(T9, V0); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // Get the exception ++ __ ld(A0, Address(thread, JavaThread::exception_oop_offset())); ++ // Get the exception pc in case we are deoptimized ++ __ ld(A1, Address(thread, JavaThread::exception_pc_offset())); ++#ifdef ASSERT ++ __ sd(R0, Address(thread, JavaThread::exception_handler_pc_offset())); ++ __ sd(R0, Address(thread, JavaThread::exception_pc_offset())); ++#endif ++ // Clear the exception oop so GC no longer processes it as a root. ++ __ sd(R0, Address(thread, JavaThread::exception_oop_offset())); ++ ++ // Fix seg fault when running: ++ // Eclipse + Plugin + Debug As ++ // This is the only condition where C2 calls SharedRuntime::generate_deopt_blob() ++ // ++ __ move(V0, A0); ++ __ move(V1, A1); ++ ++ // V0: exception oop ++ // T9: exception handler ++ // A1: exception pc ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ // make sure all code is generated ++ masm->flush(); ++ ++ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize); ++} +diff --git a/hotspot/src/cpu/mips/vm/sharedRuntime_mips_64.cpp b/hotspot/src/cpu/mips/vm/sharedRuntime_mips_64.cpp +new file mode 100644 +index 0000000000..daf04c4422 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/sharedRuntime_mips_64.cpp +@@ -0,0 +1,3816 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/debugInfoRec.hpp" ++#include "code/icBuffer.hpp" ++#include "code/vtableStubs.hpp" ++#include "interpreter/interpreter.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "prims/jvmtiRedefineClassesTrace.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/vframeArray.hpp" ++#include "vmreg_mips.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++#include ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; ++ ++class RegisterSaver { ++ enum { FPU_regs_live = 32 }; ++ // Capture info about frame layout ++ enum layout { ++#define DEF_LAYOUT_OFFS(regname) regname ## _off, regname ## H_off, ++ DEF_LAYOUT_OFFS(for_16_bytes_aligned) ++ DEF_LAYOUT_OFFS(fpr0) ++ DEF_LAYOUT_OFFS(fpr1) ++ DEF_LAYOUT_OFFS(fpr2) ++ DEF_LAYOUT_OFFS(fpr3) ++ DEF_LAYOUT_OFFS(fpr4) ++ DEF_LAYOUT_OFFS(fpr5) ++ DEF_LAYOUT_OFFS(fpr6) ++ DEF_LAYOUT_OFFS(fpr7) ++ DEF_LAYOUT_OFFS(fpr8) ++ DEF_LAYOUT_OFFS(fpr9) ++ DEF_LAYOUT_OFFS(fpr10) ++ DEF_LAYOUT_OFFS(fpr11) ++ DEF_LAYOUT_OFFS(fpr12) ++ DEF_LAYOUT_OFFS(fpr13) ++ DEF_LAYOUT_OFFS(fpr14) ++ DEF_LAYOUT_OFFS(fpr15) ++ DEF_LAYOUT_OFFS(fpr16) ++ DEF_LAYOUT_OFFS(fpr17) ++ DEF_LAYOUT_OFFS(fpr18) ++ DEF_LAYOUT_OFFS(fpr19) ++ DEF_LAYOUT_OFFS(fpr20) ++ DEF_LAYOUT_OFFS(fpr21) ++ DEF_LAYOUT_OFFS(fpr22) ++ DEF_LAYOUT_OFFS(fpr23) ++ DEF_LAYOUT_OFFS(fpr24) ++ DEF_LAYOUT_OFFS(fpr25) ++ DEF_LAYOUT_OFFS(fpr26) ++ DEF_LAYOUT_OFFS(fpr27) ++ DEF_LAYOUT_OFFS(fpr28) ++ DEF_LAYOUT_OFFS(fpr29) ++ DEF_LAYOUT_OFFS(fpr30) ++ DEF_LAYOUT_OFFS(fpr31) ++ ++ DEF_LAYOUT_OFFS(v0) ++ DEF_LAYOUT_OFFS(v1) ++ DEF_LAYOUT_OFFS(a0) ++ DEF_LAYOUT_OFFS(a1) ++ DEF_LAYOUT_OFFS(a2) ++ DEF_LAYOUT_OFFS(a3) ++ DEF_LAYOUT_OFFS(a4) ++ DEF_LAYOUT_OFFS(a5) ++ DEF_LAYOUT_OFFS(a6) ++ DEF_LAYOUT_OFFS(a7) ++ DEF_LAYOUT_OFFS(t0) ++ DEF_LAYOUT_OFFS(t1) ++ DEF_LAYOUT_OFFS(t2) ++ DEF_LAYOUT_OFFS(t3) ++ DEF_LAYOUT_OFFS(s0) ++ DEF_LAYOUT_OFFS(s1) ++ DEF_LAYOUT_OFFS(s2) ++ DEF_LAYOUT_OFFS(s3) ++ DEF_LAYOUT_OFFS(s4) ++ DEF_LAYOUT_OFFS(s5) ++ DEF_LAYOUT_OFFS(s6) ++ DEF_LAYOUT_OFFS(s7) ++ DEF_LAYOUT_OFFS(t8) ++ DEF_LAYOUT_OFFS(t9) ++ ++ DEF_LAYOUT_OFFS(gp) ++ DEF_LAYOUT_OFFS(fp) ++ DEF_LAYOUT_OFFS(return) ++ reg_save_size ++ }; ++ ++ public: ++ ++ static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false ); ++ static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); ++ static int raOffset(void) { return return_off / 2; } ++ //Rmethod ++ static int methodOffset(void) { return s3_off / 2; } ++ ++ static int v0Offset(void) { return v0_off / 2; } ++ static int v1Offset(void) { return v1_off / 2; } ++ ++ static int fpResultOffset(void) { return fpr0_off / 2; } ++ ++ // During deoptimization only the result register need to be restored ++ // all the other values have already been extracted. ++ static void restore_result_registers(MacroAssembler* masm); ++}; ++ ++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) { ++ ++ // Always make the frame size 16-byte aligned ++ int frame_size_in_bytes = round_to(additional_frame_words*wordSize + ++ reg_save_size*BytesPerInt, 16); ++ // OopMap frame size is in compiler stack slots (jint's) not bytes or words ++ int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; ++ // The caller will allocate additional_frame_words ++ int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt; ++ // CodeBlob frame size is in words. ++ int frame_size_in_words = frame_size_in_bytes / wordSize; ++ *total_frame_words = frame_size_in_words; ++ ++ // save registers ++ ++ __ daddiu(SP, SP, - reg_save_size * jintSize); ++ ++ __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize); ++ __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize); ++ __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize); ++ __ sdc1(F6, SP, fpr6_off * jintSize); __ sdc1(F7, SP, fpr7_off * jintSize); ++ __ sdc1(F8, SP, fpr8_off * jintSize); __ sdc1(F9, SP, fpr9_off * jintSize); ++ __ sdc1(F10, SP, fpr10_off * jintSize); __ sdc1(F11, SP, fpr11_off * jintSize); ++ __ sdc1(F12, SP, fpr12_off * jintSize); __ sdc1(F13, SP, fpr13_off * jintSize); ++ __ sdc1(F14, SP, fpr14_off * jintSize); __ sdc1(F15, SP, fpr15_off * jintSize); ++ __ sdc1(F16, SP, fpr16_off * jintSize); __ sdc1(F17, SP, fpr17_off * jintSize); ++ __ sdc1(F18, SP, fpr18_off * jintSize); __ sdc1(F19, SP, fpr19_off * jintSize); ++ __ sdc1(F20, SP, fpr20_off * jintSize); __ sdc1(F21, SP, fpr21_off * jintSize); ++ __ sdc1(F22, SP, fpr22_off * jintSize); __ sdc1(F23, SP, fpr23_off * jintSize); ++ __ sdc1(F24, SP, fpr24_off * jintSize); __ sdc1(F25, SP, fpr25_off * jintSize); ++ __ sdc1(F26, SP, fpr26_off * jintSize); __ sdc1(F27, SP, fpr27_off * jintSize); ++ __ sdc1(F28, SP, fpr28_off * jintSize); __ sdc1(F29, SP, fpr29_off * jintSize); ++ __ sdc1(F30, SP, fpr30_off * jintSize); __ sdc1(F31, SP, fpr31_off * jintSize); ++ __ sd(V0, SP, v0_off * jintSize); __ sd(V1, SP, v1_off * jintSize); ++ __ sd(A0, SP, a0_off * jintSize); __ sd(A1, SP, a1_off * jintSize); ++ __ sd(A2, SP, a2_off * jintSize); __ sd(A3, SP, a3_off * jintSize); ++ __ sd(A4, SP, a4_off * jintSize); __ sd(A5, SP, a5_off * jintSize); ++ __ sd(A6, SP, a6_off * jintSize); __ sd(A7, SP, a7_off * jintSize); ++ __ sd(T0, SP, t0_off * jintSize); ++ __ sd(T1, SP, t1_off * jintSize); ++ __ sd(T2, SP, t2_off * jintSize); ++ __ sd(T3, SP, t3_off * jintSize); ++ __ sd(S0, SP, s0_off * jintSize); ++ __ sd(S1, SP, s1_off * jintSize); ++ __ sd(S2, SP, s2_off * jintSize); ++ __ sd(S3, SP, s3_off * jintSize); ++ __ sd(S4, SP, s4_off * jintSize); ++ __ sd(S5, SP, s5_off * jintSize); ++ __ sd(S6, SP, s6_off * jintSize); ++ __ sd(S7, SP, s7_off * jintSize); ++ ++ __ sd(T8, SP, t8_off * jintSize); ++ __ sd(T9, SP, t9_off * jintSize); ++ ++ __ sd(GP, SP, gp_off * jintSize); ++ __ sd(FP, SP, fp_off * jintSize); ++ __ sd(RA, SP, return_off * jintSize); ++ __ daddiu(FP, SP, fp_off * jintSize); ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ //OopMap* map = new OopMap( frame_words, 0 ); ++ OopMap* map = new OopMap( frame_size_in_slots, 0 ); ++ ++ ++//#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words) ++#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) ++ map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg()); ++ ++ map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg()); ++ ++#undef STACK_OFFSET ++ return map; ++} ++ ++ ++// Pop the current frame and restore all the registers that we ++// saved. ++void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { ++ __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize); ++ __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize); ++ __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize); ++ __ ldc1(F6, SP, fpr6_off * jintSize); __ ldc1(F7, SP, fpr7_off * jintSize); ++ __ ldc1(F8, SP, fpr8_off * jintSize); __ ldc1(F9, SP, fpr9_off * jintSize); ++ __ ldc1(F10, SP, fpr10_off * jintSize); __ ldc1(F11, SP, fpr11_off * jintSize); ++ __ ldc1(F12, SP, fpr12_off * jintSize); __ ldc1(F13, SP, fpr13_off * jintSize); ++ __ ldc1(F14, SP, fpr14_off * jintSize); __ ldc1(F15, SP, fpr15_off * jintSize); ++ __ ldc1(F16, SP, fpr16_off * jintSize); __ ldc1(F17, SP, fpr17_off * jintSize); ++ __ ldc1(F18, SP, fpr18_off * jintSize); __ ldc1(F19, SP, fpr19_off * jintSize); ++ __ ldc1(F20, SP, fpr20_off * jintSize); __ ldc1(F21, SP, fpr21_off * jintSize); ++ __ ldc1(F22, SP, fpr22_off * jintSize); __ ldc1(F23, SP, fpr23_off * jintSize); ++ __ ldc1(F24, SP, fpr24_off * jintSize); __ ldc1(F25, SP, fpr25_off * jintSize); ++ __ ldc1(F26, SP, fpr26_off * jintSize); __ ldc1(F27, SP, fpr27_off * jintSize); ++ __ ldc1(F28, SP, fpr28_off * jintSize); __ ldc1(F29, SP, fpr29_off * jintSize); ++ __ ldc1(F30, SP, fpr30_off * jintSize); __ ldc1(F31, SP, fpr31_off * jintSize); ++ ++ __ ld(V0, SP, v0_off * jintSize); __ ld(V1, SP, v1_off * jintSize); ++ __ ld(A0, SP, a0_off * jintSize); __ ld(A1, SP, a1_off * jintSize); ++ __ ld(A2, SP, a2_off * jintSize); __ ld(A3, SP, a3_off * jintSize); ++ __ ld(A4, SP, a4_off * jintSize); __ ld(A5, SP, a5_off * jintSize); ++ __ ld(A6, SP, a6_off * jintSize); __ ld(A7, SP, a7_off * jintSize); ++ __ ld(T0, SP, t0_off * jintSize); ++ __ ld(T1, SP, t1_off * jintSize); ++ __ ld(T2, SP, t2_off * jintSize); ++ __ ld(T3, SP, t3_off * jintSize); ++ __ ld(S0, SP, s0_off * jintSize); ++ __ ld(S1, SP, s1_off * jintSize); ++ __ ld(S2, SP, s2_off * jintSize); ++ __ ld(S3, SP, s3_off * jintSize); ++ __ ld(S4, SP, s4_off * jintSize); ++ __ ld(S5, SP, s5_off * jintSize); ++ __ ld(S6, SP, s6_off * jintSize); ++ __ ld(S7, SP, s7_off * jintSize); ++ ++ __ ld(T8, SP, t8_off * jintSize); ++ __ ld(T9, SP, t9_off * jintSize); ++ ++ __ ld(GP, SP, gp_off * jintSize); ++ __ ld(FP, SP, fp_off * jintSize); ++ __ ld(RA, SP, return_off * jintSize); ++ ++ __ addiu(SP, SP, reg_save_size * jintSize); ++} ++ ++// Pop the current frame and restore the registers that might be holding ++// a result. ++void RegisterSaver::restore_result_registers(MacroAssembler* masm) { ++ ++ // Just restore result register. Only used by deoptimization. By ++ // now any callee save register that needs to be restore to a c2 ++ // caller of the deoptee has been extracted into the vframeArray ++ // and will be stuffed into the c2i adapter we create for later ++ // restoration so only result registers need to be restored here. ++ ++ __ ld(V0, SP, v0_off * jintSize); ++ __ ld(V1, SP, v1_off * jintSize); ++ __ ldc1(F0, SP, fpr0_off * jintSize); ++ __ ldc1(F1, SP, fpr1_off * jintSize); ++ __ addiu(SP, SP, return_off * jintSize); ++} ++ ++// Is vector's size (in bytes) bigger than a size saved by default? ++// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. ++bool SharedRuntime::is_wide_vector(int size) { ++ return size > 16; ++} ++ ++// The java_calling_convention describes stack locations as ideal slots on ++// a frame with no abi restrictions. Since we must observe abi restrictions ++// (like the placement of the register window) the slots must be biased by ++// the following value. ++ ++static int reg2offset_in(VMReg r) { ++ // Account for saved fp and return address ++ // This should really be in_preserve_stack_slots ++ return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size); ++} ++ ++static int reg2offset_out(VMReg r) { ++ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; ++} ++ ++// --------------------------------------------------------------------------- ++// Read the array of BasicTypes from a signature, and compute where the ++// arguments should go. Values in the VMRegPair regs array refer to 4-byte ++// quantities. Values less than SharedInfo::stack0 are registers, those above ++// refer to 4-byte stack slots. All stack slots are based off of the stack pointer ++// as framesizes are fixed. ++// VMRegImpl::stack0 refers to the first slot 0(sp). ++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register ++// up to RegisterImpl::number_of_registers) are the 32-bit ++// integer registers. ++ ++// Pass first five oop/int args in registers T0, A0 - A3. ++// Pass float/double/long args in stack. ++// Doubles have precedence, so if you pass a mix of floats and doubles ++// the doubles will grab the registers before the floats will. ++ ++// Note: the INPUTS in sig_bt are in units of Java argument words, which are ++// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit ++// units regardless of build. ++ ++ ++// --------------------------------------------------------------------------- ++// The compiled Java calling convention. ++// Pass first five oop/int args in registers T0, A0 - A3. ++// Pass float/double/long args in stack. ++// Doubles have precedence, so if you pass a mix of floats and doubles ++// the doubles will grab the registers before the floats will. ++ ++int SharedRuntime::java_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ int total_args_passed, ++ int is_outgoing) { ++ ++ // Create the mapping between argument positions and registers. ++ static const Register INT_ArgReg[Argument::n_register_parameters] = { ++ T0, A0, A1, A2, A3, A4, A5, A6 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { ++ F12, F13, F14, F15, F16, F17, F18, F19 ++ }; ++ ++ uint args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_VOID: ++ // halves of T_LONG or T_DOUBLE ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_BOOLEAN: ++ case T_CHAR: ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set1(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ // fall through ++ case T_OBJECT: ++ case T_ARRAY: ++ case T_ADDRESS: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set2(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set1(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set2(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++ ++ return round_to(stk_args, 2); ++} ++ ++// Patch the callers callsite with entry to compiled code if it exists. ++static void patch_callers_callsite(MacroAssembler *masm) { ++ Label L; ++ __ verify_oop(Rmethod); ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ // Schedule the branch target address early. ++ // Call into the VM to patch the caller, then jump to compiled callee ++ // V0 isn't live so capture return address while we easily can ++ __ move(V0, RA); ++ ++ __ pushad(); ++#ifdef COMPILER2 ++ // C2 may leave the stack dirty if not in SSE2+ mode ++ __ empty_FPU_stack(); ++#endif ++ ++ // VM needs caller's callsite ++ // VM needs target method ++ ++ __ move(A0, Rmethod); ++ __ move(A1, V0); ++ // we should preserve the return address ++ __ verify_oop(Rmethod); ++ __ move(S0, SP); ++ __ move(AT, -(StackAlignmentInBytes)); // align the stack ++ __ andr(SP, SP, AT); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), ++ relocInfo::runtime_call_type); ++ ++ __ delayed()->nop(); ++ __ move(SP, S0); ++ __ popad(); ++ __ bind(L); ++} ++ ++static void gen_c2i_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ Label& skip_fixup) { ++ ++ // Before we get into the guts of the C2I adapter, see if we should be here ++ // at all. We've come from compiled code and are attempting to jump to the ++ // interpreter, which means the caller made a static call to get here ++ // (vcalls always get a compiled target if there is one). Check for a ++ // compiled target. If there is one, we need to patch the caller's call. ++ // However we will run interpreted if we come thru here. The next pass ++ // thru the call site will run compiled. If we ran compiled here then ++ // we can (theorectically) do endless i2c->c2i->i2c transitions during ++ // deopt/uncommon trap cycles. If we always go interpreted here then ++ // we can have at most one and don't need to play any tricks to keep ++ // from endlessly growing the stack. ++ // ++ // Actually if we detected that we had an i2c->c2i transition here we ++ // ought to be able to reset the world back to the state of the interpreted ++ // call and not bother building another interpreter arg area. We don't ++ // do that at this point. ++ ++ patch_callers_callsite(masm); ++ __ bind(skip_fixup); ++ ++#ifdef COMPILER2 ++ __ empty_FPU_stack(); ++#endif ++ //this is for native ? ++ // Since all args are passed on the stack, total_args_passed * interpreter_ ++ // stack_element_size is the ++ // space we need. ++ int extraspace = total_args_passed * Interpreter::stackElementSize; ++ ++ // stack is aligned, keep it that way ++ extraspace = round_to(extraspace, 2*wordSize); ++ ++ // Get return address ++ __ move(V0, RA); ++ // set senderSP value ++ //refer to interpreter_mips.cpp:generate_asm_entry ++ __ move(Rsender, SP); ++ __ addiu(SP, SP, -extraspace); ++ ++ // Now write the args into the outgoing interpreter space ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // st_off points to lowest address on stack. ++ int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize; ++ // Say 4 args: ++ // i st_off ++ // 0 12 T_LONG ++ // 1 8 T_VOID ++ // 2 4 T_OBJECT ++ // 3 0 T_BOOL ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // memory to memory use fpu stack top ++ int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; ++ if (!r_2->is_valid()) { ++ __ ld_ptr(AT, SP, ld_off); ++ __ st_ptr(AT, SP, st_off); ++ ++ } else { ++ ++ ++ int next_off = st_off - Interpreter::stackElementSize; ++ __ ld_ptr(AT, SP, ld_off); ++ __ st_ptr(AT, SP, st_off); ++ ++ // Ref to is_Register condition ++ if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ++ __ st_ptr(AT, SP, st_off - 8); ++ } ++ } else if (r_1->is_Register()) { ++ Register r = r_1->as_Register(); ++ if (!r_2->is_valid()) { ++ __ sd(r, SP, st_off); ++ } else { ++ //FIXME, mips will not enter here ++ // long/double in gpr ++ __ sd(r, SP, st_off); ++ // In [java/util/zip/ZipFile.java] ++ // ++ // private static native long open(String name, int mode, long lastModified); ++ // private static native int getTotal(long jzfile); ++ // ++ // We need to transfer T_LONG paramenters from a compiled method to a native method. ++ // It's a complex process: ++ // ++ // Caller -> lir_static_call -> gen_resolve_stub ++ // -> -- resolve_static_call_C ++ // `- gen_c2i_adapter() [*] ++ // | ++ // `- AdapterHandlerLibrary::get_create_apapter_index ++ // -> generate_native_entry ++ // -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**] ++ // ++ // In [**], T_Long parameter is stored in stack as: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // However, the sequence is reversed here: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry(). ++ // ++ if (sig_bt[i] == T_LONG) ++ __ sd(r, SP, st_off - 8); ++ } ++ } else if (r_1->is_FloatRegister()) { ++ assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); ++ ++ FloatRegister fr = r_1->as_FloatRegister(); ++ if (sig_bt[i] == T_FLOAT) ++ __ swc1(fr, SP, st_off); ++ else { ++ __ sdc1(fr, SP, st_off); ++ __ sdc1(fr, SP, st_off - 8); // T_DOUBLE needs two slots ++ } ++ } ++ } ++ ++ // Schedule the branch target address early. ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) ); ++ // And repush original return address ++ __ move(RA, V0); ++ __ jr (AT); ++ __ delayed()->nop(); ++} ++ ++static void gen_i2c_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs) { ++ ++ // Generate an I2C adapter: adjust the I-frame to make space for the C-frame ++ // layout. Lesp was saved by the calling I-frame and will be restored on ++ // return. Meanwhile, outgoing arg space is all owned by the callee ++ // C-frame, so we can mangle it at will. After adjusting the frame size, ++ // hoist register arguments and repack other args according to the compiled ++ // code convention. Finally, end in a jump to the compiled code. The entry ++ // point address is the start of the buffer. ++ ++ // We will only enter here from an interpreted frame and never from after ++ // passing thru a c2i. Azul allowed this but we do not. If we lose the ++ // race and use a c2i we will remain interpreted for the race loser(s). ++ // This removes all sorts of headaches on the mips side and also eliminates ++ // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. ++ ++ ++ __ move(T9, SP); ++ ++ // Cut-out for having no stack args. Since up to 2 int/oop args are passed ++ // in registers, we will occasionally have no stack args. ++ int comp_words_on_stack = 0; ++ if (comp_args_on_stack) { ++ // Sig words on the stack are greater-than VMRegImpl::stack0. Those in ++ // registers are below. By subtracting stack0, we either get a negative ++ // number (all values in registers) or the maximum stack slot accessed. ++ // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg); ++ // Convert 4-byte stack slots to words. ++ comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord; ++ // Round up to miminum stack alignment, in wordSize ++ comp_words_on_stack = round_to(comp_words_on_stack, 2); ++ __ daddiu(SP, SP, -comp_words_on_stack * wordSize); ++ } ++ ++ // Align the outgoing SP ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ // push the return address on the stack (note that pushing, rather ++ // than storing it, yields the correct frame alignment for the callee) ++ // Put saved SP in another register ++ const Register saved_sp = V0; ++ __ move(saved_sp, T9); ++ ++ ++ // Will jump to the compiled code just as if compiled code was doing it. ++ // Pre-load the register-jump target early, to schedule it better. ++ __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset())); ++ ++ // Now generate the shuffle code. Pick up all register args and move the ++ // rest through the floating point stack top. ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ // Longs and doubles are passed in native word order, but misaligned ++ // in the 32-bit build. ++ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // Pick up 0, 1 or 2 words from SP+offset. ++ ++ //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); ++ // Load in argument order going down. ++ int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize; ++ // Point to interpreter value (vs. tag) ++ int next_off = ld_off - Interpreter::stackElementSize; ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // Convert stack slot to an SP offset (+ wordSize to ++ // account for return address ) ++ // NOTICE HERE!!!! I sub a wordSize here ++ int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; ++ //+ wordSize; ++ ++ if (!r_2->is_valid()) { ++ __ ld(AT, saved_sp, ld_off); ++ __ sd(AT, SP, st_off); ++ } else { ++ // Interpreter local[n] == MSW, local[n+1] == LSW however locals ++ // are accessed as negative so LSW is at LOW address ++ ++ // ld_off is MSW so get LSW ++ // st_off is LSW (i.e. reg.first()) ++ ++ // [./org/eclipse/swt/graphics/GC.java] ++ // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, ++ // int destX, int destY, int destWidth, int destHeight, ++ // boolean simple, ++ // int imgWidth, int imgHeight, ++ // long maskPixmap, <-- Pass T_LONG in stack ++ // int maskType); ++ // Before this modification, Eclipse displays icons with solid black background. ++ // ++ __ ld(AT, saved_sp, ld_off); ++ if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ++ __ ld(AT, saved_sp, ld_off - 8); ++ __ sd(AT, SP, st_off); ++ } ++ } else if (r_1->is_Register()) { // Register argument ++ Register r = r_1->as_Register(); ++ if (r_2->is_valid()) { ++ // Remember r_1 is low address (and LSB on mips) ++ // So r_2 gets loaded from high address regardless of the platform ++ assert(r_2->as_Register() == r_1->as_Register(), ""); ++ __ ld(r, saved_sp, ld_off); ++ ++ // ++ // For T_LONG type, the real layout is as below: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // We should load the low-8 bytes. ++ // ++ if (sig_bt[i] == T_LONG) ++ __ ld(r, saved_sp, ld_off - 8); ++ } else { ++ __ lw(r, saved_sp, ld_off); ++ } ++ } else if (r_1->is_FloatRegister()) { // Float Register ++ assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); ++ ++ FloatRegister fr = r_1->as_FloatRegister(); ++ if (sig_bt[i] == T_FLOAT) ++ __ lwc1(fr, saved_sp, ld_off); ++ else { ++ __ ldc1(fr, saved_sp, ld_off); ++ __ ldc1(fr, saved_sp, ld_off - 8); ++ } ++ } ++ } ++ ++ // 6243940 We might end up in handle_wrong_method if ++ // the callee is deoptimized as we race thru here. If that ++ // happens we don't want to take a safepoint because the ++ // caller frame will look interpreted and arguments are now ++ // "compiled" so it is much better to make this transition ++ // invisible to the stack walking code. Unfortunately if ++ // we try and find the callee by normal means a safepoint ++ // is possible. So we stash the desired callee in the thread ++ // and the vm will find there should this case occur. ++ __ get_thread(T8); ++ __ sd(Rmethod, T8, in_bytes(JavaThread::callee_target_offset())); ++ ++ // move methodOop to V0 in case we end up in an c2i adapter. ++ // the c2i adapters expect methodOop in V0 (c2) because c2's ++ // resolve stubs return the result (the method) in V0. ++ // I'd love to fix this. ++ __ move(V0, Rmethod); ++ __ jr(T9); ++ __ delayed()->nop(); ++} ++ ++// --------------------------------------------------------------- ++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ AdapterFingerPrint* fingerprint) { ++ address i2c_entry = __ pc(); ++ ++ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); ++ ++ // ------------------------------------------------------------------------- ++ // Generate a C2I adapter. On entry we know G5 holds the methodOop. The ++ // args start out packed in the compiled layout. They need to be unpacked ++ // into the interpreter layout. This will almost always require some stack ++ // space. We grow the current (compiled) stack, then repack the args. We ++ // finally end in a jump to the generic interpreter entry point. On exit ++ // from the interpreter, the interpreter will restore our SP (lest the ++ // compiled code, which relys solely on SP and not FP, get sick). ++ ++ address c2i_unverified_entry = __ pc(); ++ Label skip_fixup; ++ { ++ Register holder = T1; ++ Register receiver = T0; ++ Register temp = T8; ++ address ic_miss = SharedRuntime::get_ic_miss_stub(); ++ ++ Label missed; ++ ++ __ verify_oop(holder); ++ //add for compressedoops ++ __ load_klass(temp, receiver); ++ __ verify_oop(temp); ++ ++ __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); ++ __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset()); ++ __ bne(AT, temp, missed); ++ __ delayed()->nop(); ++ // Method might have been compiled since the call site was patched to ++ // interpreted if that is the case treat it as a miss so we can get ++ // the call site corrected. ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); ++ __ beq(AT, R0, skip_fixup); ++ __ delayed()->nop(); ++ __ bind(missed); ++ ++ __ jmp(ic_miss, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } ++ ++ address c2i_entry = __ pc(); ++ ++ gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); ++ ++ __ flush(); ++ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); ++} ++ ++int SharedRuntime::c_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ VMRegPair *regs2, ++ int total_args_passed) { ++ assert(regs2 == NULL, "not needed on MIPS"); ++ // Return the number of VMReg stack_slots needed for the args. ++ // This value does not include an abi space (like register window ++ // save area). ++ ++ // We return the amount of VMReg stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. Since we always ++ // have space for storing at least 6 registers to memory we start with that. ++ // See int_stk_helper for a further discussion. ++ // We return the amount of VMRegImpl stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. ++ static const Register INT_ArgReg[Argument::n_register_parameters] = { ++ A0, A1, A2, A3, A4, A5, A6, A7 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { ++ F12, F13, F14, F15, F16, F17, F18, F19 ++ }; ++ uint args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++// Example: ++// n java.lang.UNIXProcess::forkAndExec ++// private native int forkAndExec(byte[] prog, ++// byte[] argBlock, int argc, ++// byte[] envBlock, int envc, ++// byte[] dir, ++// boolean redirectErrorStream, ++// FileDescriptor stdin_fd, ++// FileDescriptor stdout_fd, ++// FileDescriptor stderr_fd) ++// JNIEXPORT jint JNICALL ++// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env, ++// jobject process, ++// jbyteArray prog, ++// jbyteArray argBlock, jint argc, ++// jbyteArray envBlock, jint envc, ++// jbyteArray dir, ++// jboolean redirectErrorStream, ++// jobject stdin_fd, ++// jobject stdout_fd, ++// jobject stderr_fd) ++// ++// ::c_calling_convention ++// 0: // env <-- a0 ++// 1: L // klass/obj <-- t0 => a1 ++// 2: [ // prog[] <-- a0 => a2 ++// 3: [ // argBlock[] <-- a1 => a3 ++// 4: I // argc <-- a2 => a4 ++// 5: [ // envBlock[] <-- a3 => a5 ++// 6: I // envc <-- a4 => a5 ++// 7: [ // dir[] <-- a5 => a7 ++// 8: Z // redirectErrorStream <-- a6 => sp[0] ++// 9: L // stdin fp[16] => sp[8] ++// 10: L // stdout fp[24] => sp[16] ++// 11: L // stderr fp[32] => sp[24] ++// ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_VOID: // Halves of longs and doubles ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_BOOLEAN: ++ case T_CHAR: ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set1(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ // fall through ++ case T_OBJECT: ++ case T_ARRAY: ++ case T_ADDRESS: ++ case T_METADATA: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set2(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set1(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set2(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++ ++ return round_to(stk_args, 2); ++} ++ ++// --------------------------------------------------------------------------- ++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ swc1(FSF, FP, -wordSize); ++ break; ++ case T_DOUBLE: ++ __ sdc1(FSF, FP, -wordSize ); ++ break; ++ case T_VOID: break; ++ case T_LONG: ++ __ sd(V0, FP, -wordSize); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ __ sd(V0, FP, -wordSize); ++ break; ++ default: { ++ __ sw(V0, FP, -wordSize); ++ } ++ } ++} ++ ++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ lwc1(FSF, FP, -wordSize); ++ break; ++ case T_DOUBLE: ++ __ ldc1(FSF, FP, -wordSize ); ++ break; ++ case T_LONG: ++ __ ld(V0, FP, -wordSize); ++ break; ++ case T_VOID: break; ++ case T_OBJECT: ++ case T_ARRAY: ++ __ ld(V0, FP, -wordSize); ++ break; ++ default: { ++ __ lw(V0, FP, -wordSize); ++ } ++ } ++} ++ ++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ for ( int i = first_arg ; i < arg_count ; i++ ) { ++ if (args[i].first()->is_Register()) { ++ __ push(args[i].first()->as_Register()); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ push(args[i].first()->as_FloatRegister()); ++ } ++ } ++} ++ ++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { ++ if (args[i].first()->is_Register()) { ++ __ pop(args[i].first()->as_Register()); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ pop(args[i].first()->as_FloatRegister()); ++ } ++ } ++} ++ ++// A simple move of integer like type ++static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ // stack to stack ++ __ lw(AT, FP, reg2offset_in(src.first())); ++ __ sd(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ // stack to reg ++ __ lw(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else if (dst.first()->is_stack()) { ++ // reg to stack ++ __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first())); ++ } else { ++ if (dst.first() != src.first()){ ++ __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first() ++ } ++ } ++} ++ ++// An oop arg. Must pass a handle not the oop itself ++static void object_move(MacroAssembler* masm, ++ OopMap* map, ++ int oop_handle_offset, ++ int framesize_in_slots, ++ VMRegPair src, ++ VMRegPair dst, ++ bool is_receiver, ++ int* receiver_offset) { ++ ++ // must pass a handle. First figure out the location we use as a handle ++ ++ //FIXME, for mips, dst can be register ++ if (src.first()->is_stack()) { ++ // Oop is already on the stack as an argument ++ Register rHandle = V0; ++ Label nil; ++ __ xorr(rHandle, rHandle, rHandle); ++ __ ld(AT, FP, reg2offset_in(src.first())); ++ __ beq(AT, R0, nil); ++ __ delayed()->nop(); ++ __ lea(rHandle, Address(FP, reg2offset_in(src.first()))); ++ __ bind(nil); ++ if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); ++ else __ move( (dst.first())->as_Register(), rHandle); ++ //if dst is register ++ //FIXME, do mips need out preserve stack slots? ++ int offset_in_older_frame = src.first()->reg2stack() ++ + SharedRuntime::out_preserve_stack_slots(); ++ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); ++ if (is_receiver) { ++ *receiver_offset = (offset_in_older_frame ++ + framesize_in_slots) * VMRegImpl::stack_slot_size; ++ } ++ } else { ++ // Oop is in an a register we must store it to the space we reserve ++ // on the stack for oop_handles ++ const Register rOop = src.first()->as_Register(); ++ assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register"); ++ const Register rHandle = V0; ++ //Important: refer to java_calling_convertion ++ int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset; ++ int offset = oop_slot*VMRegImpl::stack_slot_size; ++ Label skip; ++ __ sd( rOop , SP, offset ); ++ map->set_oop(VMRegImpl::stack2reg(oop_slot)); ++ __ xorr( rHandle, rHandle, rHandle); ++ __ beq(rOop, R0, skip); ++ __ delayed()->nop(); ++ __ lea(rHandle, Address(SP, offset)); ++ __ bind(skip); ++ // Store the handle parameter ++ if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); ++ else __ move((dst.first())->as_Register(), rHandle); ++ //if dst is register ++ ++ if (is_receiver) { ++ *receiver_offset = offset; ++ } ++ } ++} ++ ++// A float arg may have to do float reg int reg conversion ++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); ++ ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ __ lw(AT, FP, reg2offset_in(src.first())); ++ __ sw(AT, SP, reg2offset_out(dst.first())); ++ } ++ else ++ __ lwc1(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); ++ } else { ++ // reg to stack ++ if(dst.first()->is_stack()) ++ __ swc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); ++ else ++ __ mov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } ++} ++ ++// A long move ++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ ++ // The only legal possibility for a long_move VMRegPair is: ++ // 1: two stack slots (possibly unaligned) ++ // as neither the java or C calling convention will use registers ++ // for longs. ++ ++ if (src.first()->is_stack()) { ++ assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack"); ++ if( dst.first()->is_stack()){ ++ __ ld(AT, FP, reg2offset_in(src.first())); ++ __ sd(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first())); ++ } ++ } else { ++ if( dst.first()->is_stack()){ ++ __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first())); ++ } else { ++ __ move( (dst.first())->as_Register() , (src.first())->as_Register()); ++ } ++ } ++} ++ ++// A double move ++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ ++ // The only legal possibilities for a double_move VMRegPair are: ++ // The painful thing here is that like long_move a VMRegPair might be ++ ++ // Because of the calling convention we know that src is either ++ // 1: a single physical register (xmm registers only) ++ // 2: two stack slots (possibly unaligned) ++ // dst can only be a pair of stack slots. ++ ++ ++ if (src.first()->is_stack()) { ++ // source is all stack ++ if( dst.first()->is_stack()){ ++ __ ld(AT, FP, reg2offset_in(src.first())); ++ __ sd(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first())); ++ } ++ ++ } else { ++ // reg to stack ++ // No worries about stack alignment ++ if( dst.first()->is_stack()){ ++ __ sdc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); ++ } ++ else ++ __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ ++ } ++} ++ ++static void verify_oop_args(MacroAssembler* masm, ++ methodHandle method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ Register temp_reg = T9; // not part of any compiled calling seq ++ if (VerifyOops) { ++ for (int i = 0; i < method->size_of_parameters(); i++) { ++ if (sig_bt[i] == T_OBJECT || ++ sig_bt[i] == T_ARRAY) { ++ VMReg r = regs[i].first(); ++ assert(r->is_valid(), "bad oop arg"); ++ if (r->is_stack()) { ++ __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); ++ __ verify_oop(temp_reg); ++ } else { ++ __ verify_oop(r->as_Register()); ++ } ++ } ++ } ++ } ++} ++ ++static void gen_special_dispatch(MacroAssembler* masm, ++ methodHandle method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ verify_oop_args(masm, method, sig_bt, regs); ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ ++ // Now write the args into the outgoing interpreter space ++ bool has_receiver = false; ++ Register receiver_reg = noreg; ++ int member_arg_pos = -1; ++ Register member_reg = noreg; ++ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); ++ if (ref_kind != 0) { ++ member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument ++ member_reg = S3; // known to be free at this point ++ has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); ++ } else if (iid == vmIntrinsics::_invokeBasic) { ++ has_receiver = true; ++ } else { ++ fatal(err_msg_res("unexpected intrinsic id %d", iid)); ++ } ++ ++ if (member_reg != noreg) { ++ // Load the member_arg into register, if necessary. ++ SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); ++ VMReg r = regs[member_arg_pos].first(); ++ if (r->is_stack()) { ++ __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ member_reg = r->as_Register(); ++ } ++ } ++ ++ if (has_receiver) { ++ // Make sure the receiver is loaded into a register. ++ assert(method->size_of_parameters() > 0, "oob"); ++ assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); ++ VMReg r = regs[0].first(); ++ assert(r->is_valid(), "bad receiver arg"); ++ if (r->is_stack()) { ++ // Porting note: This assumes that compiled calling conventions always ++ // pass the receiver oop in a register. If this is not true on some ++ // platform, pick a temp and load the receiver from stack. ++ fatal("receiver always in a register"); ++ receiver_reg = SSR; // known to be free at this point ++ __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ receiver_reg = r->as_Register(); ++ } ++ } ++ ++ // Figure out which address we are really jumping to: ++ MethodHandles::generate_method_handle_dispatch(masm, iid, ++ receiver_reg, member_reg, /*for_compiler_entry:*/ true); ++} ++ ++// --------------------------------------------------------------------------- ++// Generate a native wrapper for a given method. The method takes arguments ++// in the Java compiled code convention, marshals them to the native ++// convention (handlizes oops, etc), transitions to native, makes the call, ++// returns to java state (possibly blocking), unhandlizes any result and ++// returns. ++nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, ++ methodHandle method, ++ int compile_id, ++ BasicType* in_sig_bt, ++ VMRegPair* in_regs, ++ BasicType ret_type) { ++ if (method->is_method_handle_intrinsic()) { ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ intptr_t start = (intptr_t)__ pc(); ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ // Make enough room for patch_verified_entry ++ __ nop(); ++ __ nop(); ++ gen_special_dispatch(masm, ++ method, ++ in_sig_bt, ++ in_regs); ++ int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period ++ __ flush(); ++ int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually ++ return nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ in_ByteSize(-1), ++ in_ByteSize(-1), ++ (OopMapSet*)NULL); ++ } ++ bool is_critical_native = true; ++ address native_func = method->critical_native_function(); ++ if (native_func == NULL) { ++ native_func = method->native_function(); ++ is_critical_native = false; ++ } ++ assert(native_func != NULL, "must have function"); ++ ++ // Native nmethod wrappers never take possesion of the oop arguments. ++ // So the caller will gc the arguments. The only thing we need an ++ // oopMap for is if the call is static ++ // ++ // An OopMap for lock (and class if static), and one for the VM call itself ++ OopMapSet *oop_maps = new OopMapSet(); ++ ++ // We have received a description of where all the java arg are located ++ // on entry to the wrapper. We need to convert these args to where ++ // the jni function will expect them. To figure out where they go ++ // we convert the java signature to a C signature by inserting ++ // the hidden arguments as arg[0] and possibly arg[1] (static method) ++ ++ const int total_in_args = method->size_of_parameters(); ++ int total_c_args = total_in_args; ++ if (!is_critical_native) { ++ total_c_args += 1; ++ if (method->is_static()) { ++ total_c_args++; ++ } ++ } else { ++ for (int i = 0; i < total_in_args; i++) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ total_c_args++; ++ } ++ } ++ } ++ ++ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); ++ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); ++ BasicType* in_elem_bt = NULL; ++ ++ int argc = 0; ++ if (!is_critical_native) { ++ out_sig_bt[argc++] = T_ADDRESS; ++ if (method->is_static()) { ++ out_sig_bt[argc++] = T_OBJECT; ++ } ++ ++ for (int i = 0; i < total_in_args ; i++ ) { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ } ++ } else { ++ Thread* THREAD = Thread::current(); ++ in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); ++ SignatureStream ss(method->signature()); ++ for (int i = 0; i < total_in_args ; i++ ) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ // Arrays are passed as int, elem* pair ++ out_sig_bt[argc++] = T_INT; ++ out_sig_bt[argc++] = T_ADDRESS; ++ Symbol* atype = ss.as_symbol(CHECK_NULL); ++ const char* at = atype->as_C_string(); ++ if (strlen(at) == 2) { ++ assert(at[0] == '[', "must be"); ++ switch (at[1]) { ++ case 'B': in_elem_bt[i] = T_BYTE; break; ++ case 'C': in_elem_bt[i] = T_CHAR; break; ++ case 'D': in_elem_bt[i] = T_DOUBLE; break; ++ case 'F': in_elem_bt[i] = T_FLOAT; break; ++ case 'I': in_elem_bt[i] = T_INT; break; ++ case 'J': in_elem_bt[i] = T_LONG; break; ++ case 'S': in_elem_bt[i] = T_SHORT; break; ++ case 'Z': in_elem_bt[i] = T_BOOLEAN; break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ } else { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ in_elem_bt[i] = T_VOID; ++ } ++ if (in_sig_bt[i] != T_VOID) { ++ assert(in_sig_bt[i] == ss.type(), "must match"); ++ ss.next(); ++ } ++ } ++ } ++ ++ // Now figure out where the args must be stored and how much stack space ++ // they require (neglecting out_preserve_stack_slots but space for storing ++ // the 1st six register arguments). It's weird see int_stk_helper. ++ // ++ int out_arg_slots; ++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); ++ ++ // Compute framesize for the wrapper. We need to handlize all oops in ++ // registers. We must create space for them here that is disjoint from ++ // the windowed save area because we have no control over when we might ++ // flush the window again and overwrite values that gc has since modified. ++ // (The live window race) ++ // ++ // We always just allocate 6 word for storing down these object. This allow ++ // us to simply record the base and use the Ireg number to decide which ++ // slot to use. (Note that the reg number is the inbound number not the ++ // outbound number). ++ // We must shuffle args to match the native convention, and include var-args space. ++ ++ // Calculate the total number of stack slots we will need. ++ ++ // First count the abi requirement plus all of the outgoing args ++ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; ++ ++ // Now the space for the inbound oop handle area ++ int total_save_slots = 9 * VMRegImpl::slots_per_word; // 9 arguments passed in registers ++ if (is_critical_native) { ++ // Critical natives may have to call out so they need a save area ++ // for register arguments. ++ int double_slots = 0; ++ int single_slots = 0; ++ for ( int i = 0; i < total_in_args; i++) { ++ if (in_regs[i].first()->is_Register()) { ++ const Register reg = in_regs[i].first()->as_Register(); ++ switch (in_sig_bt[i]) { ++ case T_BOOLEAN: ++ case T_BYTE: ++ case T_SHORT: ++ case T_CHAR: ++ case T_INT: single_slots++; break; ++ case T_ARRAY: ++ case T_LONG: double_slots++; break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ switch (in_sig_bt[i]) { ++ case T_FLOAT: single_slots++; break; ++ case T_DOUBLE: double_slots++; break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ } ++ total_save_slots = double_slots * 2 + single_slots; ++ // align the save area ++ if (double_slots != 0) { ++ stack_slots = round_to(stack_slots, 2); ++ } ++ } ++ ++ int oop_handle_offset = stack_slots; ++ stack_slots += total_save_slots; ++ ++ // Now any space we need for handlizing a klass if static method ++ ++ int klass_slot_offset = 0; ++ int klass_offset = -1; ++ int lock_slot_offset = 0; ++ bool is_static = false; ++ ++ if (method->is_static()) { ++ klass_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; ++ is_static = true; ++ } ++ ++ // Plus a lock if needed ++ ++ if (method->is_synchronized()) { ++ lock_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ } ++ ++ // Now a place to save return value or as a temporary for any gpr -> fpr moves ++ // + 2 for return address (which we own) and saved fp ++ stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7) ++ ++ // Ok The space we have allocated will look like: ++ // ++ // ++ // FP-> | | ++ // |---------------------| ++ // | 2 slots for moves | ++ // |---------------------| ++ // | lock box (if sync) | ++ // |---------------------| <- lock_slot_offset ++ // | klass (if static) | ++ // |---------------------| <- klass_slot_offset ++ // | oopHandle area | ++ // |---------------------| <- oop_handle_offset ++ // | outbound memory | ++ // | based arguments | ++ // | | ++ // |---------------------| ++ // | vararg area | ++ // |---------------------| ++ // | | ++ // SP-> | out_preserved_slots | ++ // ++ // ++ ++ ++ // Now compute actual number of stack words we need rounding to make ++ // stack properly aligned. ++ stack_slots = round_to(stack_slots, StackAlignmentInSlots); ++ ++ int stack_size = stack_slots * VMRegImpl::stack_slot_size; ++ ++ intptr_t start = (intptr_t)__ pc(); ++ ++ ++ ++ // First thing make an ic check to see if we should even be here ++ address ic_miss = SharedRuntime::get_ic_miss_stub(); ++ ++ // We are free to use all registers as temps without saving them and ++ // restoring them except fp. fp is the only callee save register ++ // as far as the interpreter and the compiler(s) are concerned. ++ ++ //refer to register_mips.hpp:IC_Klass ++ const Register ic_reg = T1; ++ const Register receiver = T0; ++ ++ Label hit; ++ Label exception_pending; ++ ++ __ verify_oop(receiver); ++ //add for compressedoops ++ __ load_klass(T9, receiver); ++ __ beq(T9, ic_reg, hit); ++ __ delayed()->nop(); ++ __ jmp(ic_miss, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ bind(hit); ++ ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ ++ // Make enough room for patch_verified_entry ++ __ nop(); ++ __ nop(); ++ ++ // Generate stack overflow check ++ if (UseStackBanging) { ++ __ bang_stack_with_offset(StackShadowPages*os::vm_page_size()); ++ } ++ ++ // Generate a new frame for the wrapper. ++ // do mips need this ? ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ ++ __ enter(); ++ // -2 because return address is already present and so is saved fp ++ __ addiu(SP, SP, -1 * (stack_size - 2*wordSize)); ++ ++ // Frame is now completed as far a size and linkage. ++ ++ int frame_complete = ((intptr_t)__ pc()) - start; ++ ++ // Calculate the difference between sp and fp. We need to know it ++ // after the native call because on windows Java Natives will pop ++ // the arguments and it is painful to do sp relative addressing ++ // in a platform independent way. So after the call we switch to ++ // fp relative addressing. ++ //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change ++ //the SP ++ int fp_adjustment = stack_size - 2*wordSize; ++ ++#ifdef COMPILER2 ++ // C2 may leave the stack dirty if not in SSE2+ mode ++ __ empty_FPU_stack(); ++#endif ++ ++ // Compute the fp offset for any slots used after the jni call ++ ++ int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; ++ // We use TREG as a thread pointer because it is callee save and ++ // if we load it once it is usable thru the entire wrapper ++ const Register thread = TREG; ++ ++ // We use S4 as the oop handle for the receiver/klass ++ // It is callee save so it survives the call to native ++ ++ const Register oop_handle_reg = S4; ++ if (is_critical_native) { ++ Unimplemented(); ++ // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, ++ // oop_handle_offset, oop_maps, in_regs, in_sig_bt); ++ } ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ // ++ // We immediately shuffle the arguments so that any vm call we have to ++ // make from here on out (sync slow path, jvmpi, etc.) we will have ++ // captured the oops from our caller and have a valid oopMap for ++ // them. ++ ++ // ----------------- ++ // The Grand Shuffle ++ // ++ // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* ++ // and, if static, the class mirror instead of a receiver. This pretty much ++ // guarantees that register layout will not match (and mips doesn't use reg ++ // parms though amd does). Since the native abi doesn't use register args ++ // and the java conventions does we don't have to worry about collisions. ++ // All of our moved are reg->stack or stack->stack. ++ // We ignore the extra arguments during the shuffle and handle them at the ++ // last moment. The shuffle is described by the two calling convention ++ // vectors we have in our possession. We simply walk the java vector to ++ // get the source locations and the c vector to get the destinations. ++ ++ int c_arg = method->is_static() ? 2 : 1 ; ++ ++ // Record sp-based slot for receiver on stack for non-static methods ++ int receiver_offset = -1; ++ ++ // This is a trick. We double the stack slots so we can claim ++ // the oops in the caller's frame. Since we are sure to have ++ // more args than the caller doubling is enough to make ++ // sure we can capture all the incoming oop args from the ++ // caller. ++ // ++ OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); ++ ++ // Mark location of fp (someday) ++ // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp)); ++ ++#ifdef ASSERT ++ bool reg_destroyed[RegisterImpl::number_of_registers]; ++ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; ++ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { ++ reg_destroyed[r] = false; ++ } ++ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { ++ freg_destroyed[f] = false; ++ } ++ ++#endif /* ASSERT */ ++ ++ // This may iterate in two different directions depending on the ++ // kind of native it is. The reason is that for regular JNI natives ++ // the incoming and outgoing registers are offset upwards and for ++ // critical natives they are offset down. ++ GrowableArray arg_order(2 * total_in_args); ++ VMRegPair tmp_vmreg; ++ tmp_vmreg.set2(T8->as_VMReg()); ++ ++ if (!is_critical_native) { ++ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { ++ arg_order.push(i); ++ arg_order.push(c_arg); ++ } ++ } else { ++ // Compute a valid move order, using tmp_vmreg to break any cycles ++ Unimplemented(); ++ // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); ++ } ++ ++ int temploc = -1; ++ for (int ai = 0; ai < arg_order.length(); ai += 2) { ++ int i = arg_order.at(ai); ++ int c_arg = arg_order.at(ai + 1); ++ __ block_comment(err_msg("move %d -> %d", i, c_arg)); ++ if (c_arg == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // This arg needs to be moved to a temporary ++ __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); ++ in_regs[i] = tmp_vmreg; ++ temploc = i; ++ continue; ++ } else if (i == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // Read from the temporary location ++ assert(temploc != -1, "must be valid"); ++ i = temploc; ++ temploc = -1; ++ } ++#ifdef ASSERT ++ if (in_regs[i].first()->is_Register()) { ++ assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); ++ } ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif /* ASSERT */ ++ switch (in_sig_bt[i]) { ++ case T_ARRAY: ++ if (is_critical_native) { ++ Unimplemented(); ++ // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); ++ c_arg++; ++#ifdef ASSERT ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif ++ break; ++ } ++ case T_OBJECT: ++ assert(!is_critical_native, "no oop arguments"); ++ object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], ++ ((i == 0) && (!is_static)), ++ &receiver_offset); ++ break; ++ case T_VOID: ++ break; ++ ++ case T_FLOAT: ++ float_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_DOUBLE: ++ assert( i + 1 < total_in_args && ++ in_sig_bt[i + 1] == T_VOID && ++ out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); ++ double_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_LONG : ++ long_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); ++ ++ default: ++ simple_move32(masm, in_regs[i], out_regs[c_arg]); ++ } ++ } ++ ++ // point c_arg at the first arg that is already loaded in case we ++ // need to spill before we call out ++ c_arg = total_c_args - total_in_args; ++ // Pre-load a static method's oop. Used both by locking code and ++ // the normal JNI call code. ++ ++ __ move(oop_handle_reg, A1); ++ ++ if (method->is_static() && !is_critical_native) { ++ ++ // load opp into a register ++ int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local( ++ (method->method_holder())->java_mirror())); ++ ++ ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ __ relocate(rspec); ++ __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror())); ++ // Now handlize the static class mirror it's known not-null. ++ __ sd( oop_handle_reg, SP, klass_offset); ++ map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); ++ ++ // Now get the handle ++ __ lea(oop_handle_reg, Address(SP, klass_offset)); ++ // store the klass handle as second argument ++ __ move(A1, oop_handle_reg); ++ // and protect the arg if we must spill ++ c_arg--; ++ } ++ ++ // Change state to native (we save the return address in the thread, since it might not ++ // be pushed on the stack when we do a a stack traversal). It is enough that the pc() ++ // points into the right code segment. It does not have to be the correct return pc. ++ // We use the same pc/oopMap repeatedly when we call out ++ ++ intptr_t the_pc = (intptr_t) __ pc(); ++ oop_maps->add_gc_map(the_pc - start, map); ++ ++ __ set_last_Java_frame(SP, noreg, NULL); ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)the_pc ; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ ++ ++ // We have all of the arguments setup at this point. We must not touch any register ++ // argument registers at this point (what if we save/restore them there are no oop? ++ { ++ SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); ++ save_args(masm, total_c_args, c_arg, out_regs); ++ int metadata_index = __ oop_recorder()->find_index(method()); ++ RelocationHolder rspec = metadata_Relocation::spec(metadata_index); ++ __ relocate(rspec); ++ __ patchable_set48(AT, (long)(method())); ++ ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ thread, AT); ++ ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ } ++ ++ // These are register definitions we need for locking/unlocking ++ const Register swap_reg = T8; // Must use T8 for cmpxchg instruction ++ const Register obj_reg = T9; // Will contain the oop ++ //const Register lock_reg = T6; // Address of compiler lock object (BasicLock) ++ const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock) ++ ++ ++ ++ Label slow_path_lock; ++ Label lock_done; ++ ++ // Lock a synchronized method ++ if (method->is_synchronized()) { ++ assert(!is_critical_native, "unhandled"); ++ ++ const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); ++ ++ // Get the handle (the 2nd argument) ++ __ move(oop_handle_reg, A1); ++ ++ // Get address of the box ++ __ lea(lock_reg, Address(FP, lock_slot_fp_offset)); ++ ++ // Load the oop from the handle ++ __ ld(obj_reg, oop_handle_reg, 0); ++ ++ if (UseBiasedLocking) { ++ // Note that oop_handle_reg is trashed during this call ++ __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock); ++ } ++ ++ // Load immediate 1 into swap_reg %T8 ++ __ move(swap_reg, 1); ++ ++ __ ld(AT, obj_reg, 0); ++ __ orr(swap_reg, swap_reg, AT); ++ ++ __ sd( swap_reg, lock_reg, mark_word_offset); ++ __ cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg); ++ __ bne(AT, R0, lock_done); ++ __ delayed()->nop(); ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) sp <= mark < mark + os::pagesize() ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg ++ ++ __ dsubu(swap_reg, swap_reg, SP); ++ __ move(AT, 3 - os::vm_page_size()); ++ __ andr(swap_reg , swap_reg, AT); ++ // Save the test result, for recursive case, the result is zero ++ __ sd(swap_reg, lock_reg, mark_word_offset); ++ __ bne(swap_reg, R0, slow_path_lock); ++ __ delayed()->nop(); ++ // Slow path will re-enter here ++ __ bind(lock_done); ++ ++ if (UseBiasedLocking) { ++ // Re-fetch oop_handle_reg as we trashed it above ++ __ move(A1, oop_handle_reg); ++ } ++ } ++ ++ ++ // Finally just about ready to make the JNI call ++ ++ ++ // get JNIEnv* which is first argument to native ++ if (!is_critical_native) { ++ __ addiu(A0, thread, in_bytes(JavaThread::jni_environment_offset())); ++ } ++ ++ // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob) ++ // Load the second arguments into A1 ++ //__ ld(A1, SP , wordSize ); // klass ++ ++ // Now set thread in native ++ __ addiu(AT, R0, _thread_in_native); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ // do the call ++ __ call(native_func, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ // WARNING - on Windows Java Natives use pascal calling convention and pop the ++ // arguments off of the stack. We could just re-adjust the stack pointer here ++ // and continue to do SP relative addressing but we instead switch to FP ++ // relative addressing. ++ ++ // Unpack native results. ++ switch (ret_type) { ++ case T_BOOLEAN: __ c2bool(V0); break; ++ case T_CHAR : __ andi(V0, V0, 0xFFFF); break; ++ case T_BYTE : __ sign_extend_byte (V0); break; ++ case T_SHORT : __ sign_extend_short(V0); break; ++ case T_INT : // nothing to do break; ++ case T_DOUBLE : ++ case T_FLOAT : ++ // Result is in st0 we'll save as needed ++ break; ++ case T_ARRAY: // Really a handle ++ case T_OBJECT: // Really a handle ++ break; // can't de-handlize until after safepoint check ++ case T_VOID: break; ++ case T_LONG: break; ++ default : ShouldNotReachHere(); ++ } ++ // Switch thread to "native transition" state before reading the synchronization state. ++ // This additional state is necessary because reading and testing the synchronization ++ // state is not atomic w.r.t. GC, as this scenario demonstrates: ++ // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. ++ // VM thread changes sync state to synchronizing and suspends threads for GC. ++ // Thread A is resumed to finish this native method, but doesn't block here since it ++ // didn't see any synchronization is progress, and escapes. ++ __ addiu(AT, R0, _thread_in_native_trans); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ if(os::is_MP()) { ++ if (UseMembar) { ++ // Force this write out before the read below ++ __ sync(); ++ } else { ++ // Write serialization page so VM thread can do a pseudo remote membar. ++ // We use the current thread pointer to calculate a thread specific ++ // offset to write to within the page. This minimizes bus traffic ++ // due to cache line collision. ++ __ serialize_memory(thread, A0); ++ } ++ } ++ ++ Label after_transition; ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { ++ Label Continue; ++ __ li(AT, SafepointSynchronize::address_of_state()); ++ __ lw(A0, AT, 0); ++ __ addiu(AT, A0, -SafepointSynchronize::_not_synchronized); ++ Label L; ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, Continue); ++ __ delayed()->nop(); ++ __ bind(L); ++ ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // ++ save_native_result(masm, ret_type, stack_slots); ++ __ move(A0, thread); ++ __ addiu(SP, SP, -wordSize); ++ __ push(S2); ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ if (!is_critical_native) { ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } else { ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } ++ __ move(SP, S2); // use S2 as a sender SP holder ++ __ pop(S2); ++ __ addiu(SP, SP, wordSize); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ // Restore any method result value ++ restore_native_result(masm, ret_type, stack_slots); ++ ++ if (is_critical_native) { ++ // The call above performed the transition to thread_in_Java so ++ // skip the transition logic below. ++ __ beq(R0, R0, after_transition); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(Continue); ++ } ++ ++ // change thread state ++ __ addiu(AT, R0, _thread_in_Java); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ bind(after_transition); ++ Label reguard; ++ Label reguard_done; ++ __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); ++ __ addiu(AT, AT, -JavaThread::stack_guard_yellow_disabled); ++ __ beq(AT, R0, reguard); ++ __ delayed()->nop(); ++ // slow path reguard re-enters here ++ __ bind(reguard_done); ++ ++ // Handle possible exception (will unlock if necessary) ++ ++ // native result if any is live ++ ++ // Unlock ++ Label slow_path_unlock; ++ Label unlock_done; ++ if (method->is_synchronized()) { ++ ++ Label done; ++ ++ // Get locked oop from the handle we passed to jni ++ __ ld( obj_reg, oop_handle_reg, 0); ++ if (UseBiasedLocking) { ++ __ biased_locking_exit(obj_reg, T8, done); ++ ++ } ++ ++ // Simple recursive lock? ++ ++ __ ld(AT, FP, lock_slot_fp_offset); ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ // Must save FSF if if it is live now because cmpxchg must use it ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ ++ // get old displaced header ++ __ ld (T8, FP, lock_slot_fp_offset); ++ // get address of the stack lock ++ __ addiu(c_rarg0, FP, lock_slot_fp_offset); ++ // Atomic swap old header if oop still contains the stack lock ++ __ cmpxchg(T8, Address(obj_reg, 0), c_rarg0); ++ ++ __ beq(AT, R0, slow_path_unlock); ++ __ delayed()->nop(); ++ // slow path re-enters here ++ __ bind(unlock_done); ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ ++ __ bind(done); ++ ++ } ++ { ++ SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); ++ // Tell dtrace about this method exit ++ save_native_result(masm, ret_type, stack_slots); ++ int metadata_index = __ oop_recorder()->find_index( (method())); ++ RelocationHolder rspec = metadata_Relocation::spec(metadata_index); ++ __ relocate(rspec); ++ __ patchable_set48(AT, (long)(method())); ++ ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ thread, AT); ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ ++ // We can finally stop using that last_Java_frame we setup ages ago ++ ++ __ reset_last_Java_frame(false); ++ ++ // Unpack oop result, e.g. JNIHandles::resolve value. ++ if (ret_type == T_OBJECT || ret_type == T_ARRAY) { ++ __ resolve_jobject(V0, thread, T9); ++ } ++ ++ if (!is_critical_native) { ++ // reset handle block ++ __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset())); ++ __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes()); ++ } ++ ++ if (!is_critical_native) { ++ // Any exception pending? ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, exception_pending); ++ __ delayed()->nop(); ++ } ++ // no exception, we're almost done ++ ++ // check that only result value is on FPU stack ++ __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit"); ++ ++ // Return ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ leave(); ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ // Unexpected paths are out of line and go here ++ // Slow path locking & unlocking ++ if (method->is_synchronized()) { ++ ++ // BEGIN Slow path lock ++ __ bind(slow_path_lock); ++ ++ // protect the args we've loaded ++ save_args(masm, total_c_args, c_arg, out_regs); ++ ++ // has last_Java_frame setup. No exceptions so do vanilla call not call_VM ++ // args are (oop obj, BasicLock* lock, JavaThread* thread) ++ ++ __ move(A0, obj_reg); ++ __ move(A1, lock_reg); ++ __ move(A2, thread); ++ __ addiu(SP, SP, - 3*wordSize); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ move(SP, S2); ++ __ addiu(SP, SP, 3*wordSize); ++ ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("no pending exception allowed on exit from monitorenter"); ++ __ bind(L); ++ } ++#endif ++ __ b(lock_done); ++ __ delayed()->nop(); ++ // END Slow path lock ++ ++ // BEGIN Slow path unlock ++ __ bind(slow_path_unlock); ++ ++ // Slow path unlock ++ ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ // Save pending exception around call to VM (which contains an EXCEPTION_MARK) ++ ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ push(AT); ++ __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ ++ // should be a peal ++ // +wordSize because of the push above ++ __ addiu(A1, FP, lock_slot_fp_offset); ++ ++ __ move(A0, obj_reg); ++ __ addiu(SP,SP, -2*wordSize); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), ++ relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ addiu(SP, SP, 2*wordSize); ++ __ move(SP, S2); ++ //add for compressedoops ++ __ reinit_heapbase(); ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld( AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); ++ __ bind(L); ++ } ++#endif /* ASSERT */ ++ ++ __ pop(AT); ++ __ sd(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ __ b(unlock_done); ++ __ delayed()->nop(); ++ // END Slow path unlock ++ ++ } ++ ++ // SLOW PATH Reguard the stack if needed ++ ++ __ bind(reguard); ++ save_native_result(masm, ret_type, stack_slots); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), ++ relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ restore_native_result(masm, ret_type, stack_slots); ++ __ b(reguard_done); ++ __ delayed()->nop(); ++ ++ // BEGIN EXCEPTION PROCESSING ++ if (!is_critical_native) { ++ // Forward the exception ++ __ bind(exception_pending); ++ ++ // remove possible return value from FPU register stack ++ __ empty_FPU_stack(); ++ ++ // pop our frame ++ //forward_exception_entry need return address on stack ++ __ move(SP, FP); ++ __ pop(FP); ++ ++ // and forward the exception ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } ++ __ flush(); ++ ++ nmethod *nm = nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), ++ in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), ++ oop_maps); ++ ++ if (is_critical_native) { ++ nm->set_lazy_critical_native(true); ++ } ++ ++ return nm; ++ ++} ++ ++#ifdef HAVE_DTRACE_H ++// --------------------------------------------------------------------------- ++// Generate a dtrace nmethod for a given signature. The method takes arguments ++// in the Java compiled code convention, marshals them to the native ++// abi and then leaves nops at the position you would expect to call a native ++// function. When the probe is enabled the nops are replaced with a trap ++// instruction that dtrace inserts and the trace will cause a notification ++// to dtrace. ++// ++// The probes are only able to take primitive types and java/lang/String as ++// arguments. No other java types are allowed. Strings are converted to utf8 ++// strings so that from dtrace point of view java strings are converted to C ++// strings. There is an arbitrary fixed limit on the total space that a method ++// can use for converting the strings. (256 chars per string in the signature). ++// So any java string larger then this is truncated. ++ ++static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 }; ++static bool offsets_initialized = false; ++ ++static VMRegPair reg64_to_VMRegPair(Register r) { ++ VMRegPair ret; ++ if (wordSize == 8) { ++ ret.set2(r->as_VMReg()); ++ } else { ++ ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg()); ++ } ++ return ret; ++} ++ ++ ++nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm, ++ methodHandle method) { ++ ++ ++ // generate_dtrace_nmethod is guarded by a mutex so we are sure to ++ // be single threaded in this method. ++ assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be"); ++ ++ // Fill in the signature array, for the calling-convention call. ++ int total_args_passed = method->size_of_parameters(); ++ ++ BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); ++ VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); ++ ++ // The signature we are going to use for the trap that dtrace will see ++ // java/lang/String is converted. We drop "this" and any other object ++ // is converted to NULL. (A one-slot java/lang/Long object reference ++ // is converted to a two-slot long, which is why we double the allocation). ++ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2); ++ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2); ++ ++ int i=0; ++ int total_strings = 0; ++ int first_arg_to_pass = 0; ++ int total_c_args = 0; ++ ++ // Skip the receiver as dtrace doesn't want to see it ++ if( !method->is_static() ) { ++ in_sig_bt[i++] = T_OBJECT; ++ first_arg_to_pass = 1; ++ } ++ ++ SignatureStream ss(method->signature()); ++ for ( ; !ss.at_return_type(); ss.next()) { ++ BasicType bt = ss.type(); ++ in_sig_bt[i++] = bt; // Collect remaining bits of signature ++ out_sig_bt[total_c_args++] = bt; ++ if( bt == T_OBJECT) { ++ symbolOop s = ss.as_symbol_or_null(); ++ if (s == vmSymbols::java_lang_String()) { ++ total_strings++; ++ out_sig_bt[total_c_args-1] = T_ADDRESS; ++ } else if (s == vmSymbols::java_lang_Boolean() || ++ s == vmSymbols::java_lang_Byte()) { ++ out_sig_bt[total_c_args-1] = T_BYTE; ++ } else if (s == vmSymbols::java_lang_Character() || ++ s == vmSymbols::java_lang_Short()) { ++ out_sig_bt[total_c_args-1] = T_SHORT; ++ } else if (s == vmSymbols::java_lang_Integer() || ++ s == vmSymbols::java_lang_Float()) { ++ out_sig_bt[total_c_args-1] = T_INT; ++ } else if (s == vmSymbols::java_lang_Long() || ++ s == vmSymbols::java_lang_Double()) { ++ out_sig_bt[total_c_args-1] = T_LONG; ++ out_sig_bt[total_c_args++] = T_VOID; ++ } ++ } else if ( bt == T_LONG || bt == T_DOUBLE ) { ++ in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots ++ // We convert double to long ++ out_sig_bt[total_c_args-1] = T_LONG; ++ out_sig_bt[total_c_args++] = T_VOID; ++ } else if ( bt == T_FLOAT) { ++ // We convert float to int ++ out_sig_bt[total_c_args-1] = T_INT; ++ } ++ } ++ ++ assert(i==total_args_passed, "validly parsed signature"); ++ ++ // Now get the compiled-Java layout as input arguments ++ int comp_args_on_stack; ++ comp_args_on_stack = SharedRuntime::java_calling_convention( ++ in_sig_bt, in_regs, total_args_passed, false); ++ ++ // We have received a description of where all the java arg are located ++ // on entry to the wrapper. We need to convert these args to where ++ // the a native (non-jni) function would expect them. To figure out ++ // where they go we convert the java signature to a C signature and remove ++ // T_VOID for any long/double we might have received. ++ ++ ++ // Now figure out where the args must be stored and how much stack space ++ // they require (neglecting out_preserve_stack_slots but space for storing ++ // the 1st six register arguments). It's weird see int_stk_helper. ++ ++ int out_arg_slots; ++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); ++ ++ // Calculate the total number of stack slots we will need. ++ ++ // First count the abi requirement plus all of the outgoing args ++ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; ++ ++ // Plus a temp for possible converion of float/double/long register args ++ ++ int conversion_temp = stack_slots; ++ stack_slots += 2; ++ ++ ++ // Now space for the string(s) we must convert ++ ++ int string_locs = stack_slots; ++ stack_slots += total_strings * ++ (max_dtrace_string_size / VMRegImpl::stack_slot_size); ++ ++ // Ok The space we have allocated will look like: ++ // ++ // ++ // FP-> | | ++ // |---------------------| ++ // | string[n] | ++ // |---------------------| <- string_locs[n] ++ // | string[n-1] | ++ // |---------------------| <- string_locs[n-1] ++ // | ... | ++ // | ... | ++ // |---------------------| <- string_locs[1] ++ // | string[0] | ++ // |---------------------| <- string_locs[0] ++ // | temp | ++ // |---------------------| <- conversion_temp ++ // | outbound memory | ++ // | based arguments | ++ // | | ++ // |---------------------| ++ // | | ++ // SP-> | out_preserved_slots | ++ // ++ // ++ ++ // Now compute actual number of stack words we need rounding to make ++ // stack properly aligned. ++ stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word); ++ ++ int stack_size = stack_slots * VMRegImpl::stack_slot_size; ++ ++ intptr_t start = (intptr_t)__ pc(); ++ ++ // First thing make an ic check to see if we should even be here ++ ++ { ++ Label L; ++ const Register temp_reg = G3_scratch; ++ Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub()); ++ __ verify_oop(O0); ++ __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg); ++ __ cmp(temp_reg, G5_inline_cache_reg); ++ __ brx(Assembler::equal, true, Assembler::pt, L); ++ __ delayed()->nop(); ++ ++ __ jump_to(ic_miss, 0); ++ __ delayed()->nop(); ++ __ align(CodeEntryAlignment); ++ __ bind(L); ++ } ++ ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ ++ // Make enough room for patch_verified_entry ++ __ nop(); ++ __ nop(); ++ ++ // Generate stack overflow check before creating frame ++ __ generate_stack_overflow_check(stack_size); ++ ++ // Generate a new frame for the wrapper. ++ __ save(SP, -stack_size, SP); ++ ++ // Frame is now completed as far a size and linkage. ++ ++ int frame_complete = ((intptr_t)__ pc()) - start; ++ ++#ifdef ASSERT ++ bool reg_destroyed[RegisterImpl::number_of_registers]; ++ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; ++ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { ++ reg_destroyed[r] = false; ++ } ++ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { ++ freg_destroyed[f] = false; ++ } ++ ++#endif /* ASSERT */ ++ ++ VMRegPair zero; ++ const Register g0 = G0; // without this we get a compiler warning (why??) ++ zero.set2(g0->as_VMReg()); ++ ++ int c_arg, j_arg; ++ ++ Register conversion_off = noreg; ++ ++ for (j_arg = first_arg_to_pass, c_arg = 0 ; ++ j_arg < total_args_passed ; j_arg++, c_arg++ ) { ++ ++ VMRegPair src = in_regs[j_arg]; ++ VMRegPair dst = out_regs[c_arg]; ++ ++#ifdef ASSERT ++ if (src.first()->is_Register()) { ++ assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!"); ++ } else if (src.first()->is_FloatRegister()) { ++ assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding( ++ FloatRegisterImpl::S)], "ack!"); ++ } ++ if (dst.first()->is_Register()) { ++ reg_destroyed[dst.first()->as_Register()->encoding()] = true; ++ } else if (dst.first()->is_FloatRegister()) { ++ freg_destroyed[dst.first()->as_FloatRegister()->encoding( ++ FloatRegisterImpl::S)] = true; ++ } ++#endif /* ASSERT */ ++ ++ switch (in_sig_bt[j_arg]) { ++ case T_ARRAY: ++ case T_OBJECT: ++ { ++ if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT || ++ out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { ++ // need to unbox a one-slot value ++ Register in_reg = L0; ++ Register tmp = L2; ++ if ( src.first()->is_reg() ) { ++ in_reg = src.first()->as_Register(); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS), ++ "must be"); ++ __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg); ++ } ++ // If the final destination is an acceptable register ++ if ( dst.first()->is_reg() ) { ++ if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) { ++ tmp = dst.first()->as_Register(); ++ } ++ } ++ ++ Label skipUnbox; ++ if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) { ++ __ mov(G0, tmp->successor()); ++ } ++ __ br_null(in_reg, true, Assembler::pn, skipUnbox); ++ __ delayed()->mov(G0, tmp); ++ ++ BasicType bt = out_sig_bt[c_arg]; ++ int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt); ++ switch (bt) { ++ case T_BYTE: ++ __ ldub(in_reg, box_offset, tmp); break; ++ case T_SHORT: ++ __ lduh(in_reg, box_offset, tmp); break; ++ case T_INT: ++ __ ld(in_reg, box_offset, tmp); break; ++ case T_LONG: ++ __ ld_long(in_reg, box_offset, tmp); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ bind(skipUnbox); ++ // If tmp wasn't final destination copy to final destination ++ if (tmp == L2) { ++ VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2); ++ if (out_sig_bt[c_arg] == T_LONG) { ++ long_move(masm, tmp_as_VM, dst); ++ } else { ++ move32_64(masm, tmp_as_VM, out_regs[c_arg]); ++ } ++ } ++ if (out_sig_bt[c_arg] == T_LONG) { ++ assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); ++ ++c_arg; // move over the T_VOID to keep the loop indices in sync ++ } ++ } else if (out_sig_bt[c_arg] == T_ADDRESS) { ++ Register s = ++ src.first()->is_reg() ? src.first()->as_Register() : L2; ++ Register d = ++ dst.first()->is_reg() ? dst.first()->as_Register() : L2; ++ ++ // We store the oop now so that the conversion pass can reach ++ // while in the inner frame. This will be the only store if ++ // the oop is NULL. ++ if (s != L2) { ++ // src is register ++ if (d != L2) { ++ // dst is register ++ __ mov(s, d); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } else { ++ // src not a register ++ assert(Assembler::is_simm13(reg2offset(src.first()) + ++ STACK_BIAS), "must be"); ++ __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d); ++ if (d == L2) { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } ++ } else if (out_sig_bt[c_arg] != T_VOID) { ++ // Convert the arg to NULL ++ if (dst.first()->is_reg()) { ++ __ mov(G0, dst.first()->as_Register()); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } ++ } ++ break; ++ case T_VOID: ++ break; ++ ++ case T_FLOAT: ++ if (src.first()->is_stack()) { ++ // Stack to stack/reg is simple ++ move32_64(masm, src, dst); ++ } else { ++ if (dst.first()->is_reg()) { ++ // freg -> reg ++ int off = ++ STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ Register d = dst.first()->as_Register(); ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, off); ++ __ ld(SP, off, d); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ __ ld(SP, conversion_off , d); ++ } ++ } else { ++ // freg -> mem ++ int off = STACK_BIAS + reg2offset(dst.first()); ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, off); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ } ++ } ++ } ++ break; ++ ++ case T_DOUBLE: ++ assert( j_arg + 1 < total_args_passed && ++ in_sig_bt[j_arg + 1] == T_VOID && ++ out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); ++ if (src.first()->is_stack()) { ++ // Stack to stack/reg is simple ++ long_move(masm, src, dst); ++ } else { ++ Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2; ++ ++ // Destination could be an odd reg on 32bit in which case ++ // we can't load direct to the destination. ++ ++ if (!d->is_even() && wordSize == 4) { ++ d = L2; ++ } ++ int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), ++ SP, off); ++ __ ld_long(SP, off, d); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ __ ld_long(SP, conversion_off, d); ++ } ++ if (d == L2) { ++ long_move(masm, reg64_to_VMRegPair(L2), dst); ++ } ++ } ++ break; ++ ++ case T_LONG : ++ // 32bit can't do a split move of something like g1 -> O0, O1 ++ // so use a memory temp ++ if (src.is_single_phys_reg() && wordSize == 4) { ++ Register tmp = L2; ++ if (dst.first()->is_reg() && ++ (wordSize == 8 || dst.first()->as_Register()->is_even())) { ++ tmp = dst.first()->as_Register(); ++ } ++ ++ int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ if (Assembler::is_simm13(off)) { ++ __ stx(src.first()->as_Register(), SP, off); ++ __ ld_long(SP, off, tmp); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stx(src.first()->as_Register(), SP, conversion_off); ++ __ ld_long(SP, conversion_off, tmp); ++ } ++ ++ if (tmp == L2) { ++ long_move(masm, reg64_to_VMRegPair(L2), dst); ++ } ++ } else { ++ long_move(masm, src, dst); ++ } ++ break; ++ ++ case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); ++ ++ default: ++ move32_64(masm, src, dst); ++ } ++ } ++ ++ ++ // If we have any strings we must store any register based arg to the stack ++ // This includes any still live xmm registers too. ++ ++ if (total_strings > 0 ) { ++ ++ // protect all the arg registers ++ __ save_frame(0); ++ __ mov(G2_thread, L7_thread_cache); ++ const Register L2_string_off = L2; ++ ++ // Get first string offset ++ __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off); ++ ++ for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) { ++ if (out_sig_bt[c_arg] == T_ADDRESS) { ++ ++ VMRegPair dst = out_regs[c_arg]; ++ const Register d = dst.first()->is_reg() ? ++ dst.first()->as_Register()->after_save() : noreg; ++ ++ // It's a string the oop and it was already copied to the out arg ++ // position ++ if (d != noreg) { ++ __ mov(d, O0); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), ++ "must be"); ++ __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0); ++ } ++ Label skip; ++ ++ __ br_null(O0, false, Assembler::pn, skip); ++ __ delayed()->addu(FP, L2_string_off, O1); ++ ++ if (d != noreg) { ++ __ mov(O1, d); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), ++ "must be"); ++ __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf), ++ relocInfo::runtime_call_type); ++ __ delayed()->addu(L2_string_off, max_dtrace_string_size, L2_string_off); ++ ++ __ bind(skip); ++ ++ } ++ ++ } ++ __ mov(L7_thread_cache, G2_thread); ++ __ restore(); ++ ++ } ++ ++ ++ // Ok now we are done. Need to place the nop that dtrace wants in order to ++ // patch in the trap ++ ++ int patch_offset = ((intptr_t)__ pc()) - start; ++ ++ __ nop(); ++ ++ ++ // Return ++ ++ __ ret(); ++ __ delayed()->restore(); ++ ++ __ flush(); ++ ++ nmethod *nm = nmethod::new_dtrace_nmethod( ++ method, masm->code(), vep_offset, patch_offset, frame_complete, ++ stack_slots / VMRegImpl::slots_per_word); ++ return nm; ++ ++} ++ ++#endif // HAVE_DTRACE_H ++ ++// this function returns the adjust size (in number of words) to a c2i adapter ++// activation for use during deoptimization ++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { ++ return (callee_locals - callee_parameters) * Interpreter::stackElementWords; ++} ++ ++// "Top of Stack" slots that may be unused by the calling convention but must ++// otherwise be preserved. ++// On Intel these are not necessary and the value can be zero. ++// On Sparc this describes the words reserved for storing a register window ++// when an interrupt occurs. ++uint SharedRuntime::out_preserve_stack_slots() { ++ return 0; ++} ++ ++//------------------------------generate_deopt_blob---------------------------- ++// Ought to generate an ideal graph & compile, but here's some SPARC ASM ++// instead. ++void SharedRuntime::generate_deopt_blob() { ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ //CodeBuffer buffer ("deopt_blob", 4000, 2048); ++ CodeBuffer buffer ("deopt_blob", 8000, 2048); ++ MacroAssembler* masm = new MacroAssembler( & buffer); ++ int frame_size_in_words; ++ OopMap* map = NULL; ++ // Account for the extra args we place on the stack ++ // by the time we call fetch_unroll_info ++ const int additional_words = 2; // deopt kind, thread ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ ++ address start = __ pc(); ++ Label cont; ++ // we use S3 for DeOpt reason register ++ Register reason = S3; ++ // use S6 for thread register ++ Register thread = TREG; ++ // use S7 for fetch_unroll_info returned UnrollBlock ++ Register unroll = S7; ++ // Prolog for non exception case! ++ // Correct the return address we were given. ++ //FIXME, return address is on the tos or Ra? ++ __ addiu(RA, RA, - (NativeCall::return_address_offset_long)); ++ // Save everything in sight. ++ map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); ++ // Normal deoptimization ++ __ move(reason, Deoptimization::Unpack_deopt); ++ __ b(cont); ++ __ delayed()->nop(); ++ ++ int reexecute_offset = __ pc() - start; ++ ++ // Reexecute case ++ // return address is the pc describes what bci to do re-execute at ++ ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); ++ __ move(reason, Deoptimization::Unpack_reexecute); ++ __ b(cont); ++ __ delayed()->nop(); ++ ++ int exception_offset = __ pc() - start; ++ // Prolog for exception case ++ ++ // all registers are dead at this entry point, except for V0 and ++ // V1 which contain the exception oop and exception pc ++ // respectively. Set them in TLS and fall thru to the ++ // unpack_with_exception_in_tls entry point. ++ ++ __ get_thread(thread); ++ __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ int exception_in_tls_offset = __ pc() - start; ++ // new implementation because exception oop is now passed in JavaThread ++ ++ // Prolog for exception case ++ // All registers must be preserved because they might be used by LinearScan ++ // Exceptiop oop and throwing PC are passed in JavaThread ++ // tos: stack at point of call to method that threw the exception (i.e. only ++ // args are on the stack, no return address) ++ ++ // Return address will be patched later with the throwing pc. The correct value is not ++ // available now because loading it from memory would destroy registers. ++ // Save everything in sight. ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ __ addiu(RA, RA, - (NativeCall::return_address_offset_long)); ++ (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); ++ ++ // Now it is safe to overwrite any register ++ // store the correct deoptimization type ++ __ move(reason, Deoptimization::Unpack_exception); ++ // load throwing pc from JavaThread and patch it as the return address ++ // of the current frame. Then clear the field in JavaThread ++ __ get_thread(thread); ++ __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); ++ ++ ++#ifdef ASSERT ++ // verify that there is really an exception oop in JavaThread ++ __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset())); ++ __ verify_oop(AT); ++ // verify that there is no pending exception ++ Label no_pending_exception; ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, no_pending_exception); ++ __ delayed()->nop(); ++ __ stop("must not have pending exception here"); ++ __ bind(no_pending_exception); ++#endif ++ __ bind(cont); ++ // Compiled code leaves the floating point stack dirty, empty it. ++ __ empty_FPU_stack(); ++ ++ ++ // Call C code. Need thread and this frame, but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ __ move(A0, thread); ++ __ addiu(SP, SP, -additional_words * wordSize); ++ ++ __ set_last_Java_frame(NOREG, NOREG, NULL); ++ ++ // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on ++ // this call, no GC can happen. Call should capture return values. ++ ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ ++ __ call((address)Deoptimization::fetch_unroll_info); ++ //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ oop_maps->add_gc_map(__ pc() - start, map); ++ __ addiu(SP, SP, additional_words * wordSize); ++ __ get_thread(thread); ++ __ reset_last_Java_frame(false); ++ ++ // Load UnrollBlock into S7 ++ __ move(unroll, V0); ++ ++ ++ // Move the unpack kind to a safe place in the UnrollBlock because ++ // we are very short of registers ++ ++ Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); ++ __ sw(reason, unpack_kind); ++ // save the unpack_kind value ++ // Retrieve the possible live values (return values) ++ // All callee save registers representing jvm state ++ // are now in the vframeArray. ++ ++ Label noException; ++ __ move(AT, Deoptimization::Unpack_exception); ++ __ bne(AT, reason, noException);// Was exception pending? ++ __ delayed()->nop(); ++ __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ ++ __ verify_oop(V0); ++ ++ // Overwrite the result registers with the exception results. ++ __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize); ++ __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize); ++ ++ __ bind(noException); ++ ++ ++ // Stack is back to only having register save data on the stack. ++ // Now restore the result registers. Everything else is either dead or captured ++ // in the vframeArray. ++ ++ RegisterSaver::restore_result_registers(masm); ++ // All of the register save area has been popped of the stack. Only the ++ // return address remains. ++ // Pop all the frames we must move/replace. ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: caller of deopting frame (could be compiled/interpreted). ++ // ++ // Note: by leaving the return address of self-frame on the stack ++ // and using the size of frame 2 to adjust the stack ++ // when we are done the return to frame 3 will still be on the stack. ++ ++ // register for the sender's sp ++ Register sender_sp = Rsender; ++ // register for frame pcs ++ Register pcs = T0; ++ // register for frame sizes ++ Register sizes = T1; ++ // register for frame count ++ Register count = T3; ++ ++ // Pop deoptimized frame ++ __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); ++ __ addu(SP, SP, AT); ++ // sp should be pointing at the return address to the caller (3) ++ ++ // Load array of frame pcs into pcs ++ __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); ++ __ addiu(SP, SP, wordSize); // trash the old pc ++ // Load array of frame sizes into T6 ++ __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); ++ ++ ++ ++ // Load count of frams into T3 ++ __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); ++ // Pick up the initial fp we should save ++ __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. ++ __ move(sender_sp, SP); ++ __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); ++ __ subu(SP, SP, AT); ++ ++ // Push interpreter frames in a loop ++ // ++ //Loop: ++ // 0x000000555bd82d18: lw t2, 0x0(t1) ; lw sizes[i] <--- error lw->ld ++ // 0x000000555bd82d1c: ld at, 0x0(t0) ; ld pcs[i] ++ // 0x000000555bd82d20: daddiu t2, t2, 0xfffffff0 ; t2 -= 16 ++ // 0x000000555bd82d24: daddiu sp, sp, 0xfffffff0 ++ // 0x000000555bd82d28: sd fp, 0x0(sp) ; push fp ++ // 0x000000555bd82d2c: sd at, 0x8(sp) ; push at ++ // 0x000000555bd82d30: daddu fp, sp, zero ; fp <- sp ++ // 0x000000555bd82d34: dsubu sp, sp, t2 ; sp -= t2 ++ // 0x000000555bd82d38: sd zero, 0xfffffff0(fp) ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ // 0x000000555bd82d3c: sd s4, 0xfffffff8(fp) ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ // 0x000000555bd82d40: daddu s4, sp, zero ; move(sender_sp, SP); ++ // 0x000000555bd82d44: daddiu t3, t3, 0xffffffff ; count -- ++ // 0x000000555bd82d48: daddiu t1, t1, 0x4 ; sizes += 4 ++ // 0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18 ++ // 0x000000555bd82d50: daddiu t0, t0, 0x4 ; <--- error t0 += 8 ++ // ++ // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split ++ Label loop; ++ __ bind(loop); ++ __ ld(T2, sizes, 0); // Load frame size ++ __ ld_ptr(AT, pcs, 0); // save return address ++ __ addiu(T2, T2, -2*wordSize); // we'll push pc and fp, by hand ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ subu(SP, SP, T2); // Prolog! ++ // This value is corrected by layout_activation_impl ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable ++ __ move(sender_sp, SP); // pass to next frame ++ __ addiu(count, count, -1); // decrement counter ++ __ addiu(sizes, sizes, wordSize); // Bump array pointer (sizes) ++ __ bne(count, R0, loop); ++ __ delayed()->addiu(pcs, pcs, wordSize); // Bump array pointer (pcs) ++ __ ld(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0); ++ // Re-push self-frame ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ __ addiu(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize); ++ ++ // Restore frame locals after moving the frame ++ __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize); ++ __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize); ++ __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local ++ __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize); ++ ++ ++ // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on ++ // this call, no GC can happen. ++ __ move(A1, reason); // exec_mode ++ __ get_thread(thread); ++ __ move(A0, thread); // thread ++ __ addiu(SP, SP, (-additional_words) *wordSize); ++ ++ // set last_Java_sp, last_Java_fp ++ __ set_last_Java_frame(NOREG, FP, NULL); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ ++ __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ // Revert SP alignment after call since we're going to do some SP relative addressing below ++ __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // Set an oopmap for the call site ++ oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0)); ++ ++ __ push(V0); ++ ++ __ get_thread(thread); ++ __ reset_last_Java_frame(true); ++ ++ // Collect return values ++ __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words + 1) * wordSize); ++ __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words + 1) * wordSize); ++ __ ldc1(F0, SP, (RegisterSaver::fpResultOffset() + additional_words + 1) * wordSize);// Pop float stack and store in local ++ __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + additional_words + 2) * wordSize); ++ //FIXME, ++ // Clear floating point stack before returning to interpreter ++ __ empty_FPU_stack(); ++ //FIXME, we should consider about float and double ++ // Push a float or double return value if necessary. ++ __ leave(); ++ ++ // Jump to interpreter ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ masm->flush(); ++ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); ++ _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); ++} ++ ++#ifdef COMPILER2 ++ ++//------------------------------generate_uncommon_trap_blob-------------------- ++// Ought to generate an ideal graph & compile, but here's some SPARC ASM ++// instead. ++void SharedRuntime::generate_uncommon_trap_blob() { ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 ); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ enum frame_layout { ++ fp_off, fp_off2, ++ return_off, return_off2, ++ framesize ++ }; ++ assert(framesize % 4 == 0, "sp not 16-byte aligned"); ++ ++ address start = __ pc(); ++ ++ // Push self-frame. ++ __ daddiu(SP, SP, -framesize * BytesPerInt); ++ ++ __ sd(RA, SP, return_off * BytesPerInt); ++ __ sd(FP, SP, fp_off * BytesPerInt); ++ ++ __ daddiu(FP, SP, fp_off * BytesPerInt); ++ ++ // Clear the floating point exception stack ++ __ empty_FPU_stack(); ++ ++ Register thread = TREG; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // set last_Java_sp ++ __ set_last_Java_frame(NOREG, FP, NULL); ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ long save_pc = (long)__ pc() + 52; ++ __ patchable_set48(AT, (long)save_pc); ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ } ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // capture callee-saved registers as well as return values. ++ __ move(A0, thread); ++ // argument already in T0 ++ __ move(A1, T0); ++ __ patchable_call((address)Deoptimization::uncommon_trap); ++ ++ // Set an oopmap for the call site ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap( framesize, 0 ); ++ ++ //oop_maps->add_gc_map( __ offset(), true, map); ++ oop_maps->add_gc_map( __ offset(), map); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(false); ++ ++ // Load UnrollBlock into S7 ++ Register unroll = S7; ++ __ move(unroll, V0); ++ ++ // Pop all the frames we must move/replace. ++ // ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: possible-i2c-adapter-frame ++ // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an ++ // and c2i here) ++ ++ __ daddiu(SP, SP, framesize * BytesPerInt); ++ ++ // Pop deoptimized frame ++ __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); ++ __ daddu(SP, SP, AT); ++ ++ // register for frame pcs ++ Register pcs = T8; ++ // register for frame sizes ++ Register sizes = T9; ++ // register for frame count ++ Register count = T3; ++ // register for the sender's sp ++ Register sender_sp = T1; ++ ++ // sp should be pointing at the return address to the caller (4) ++ // Load array of frame pcs ++ __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); ++ ++ // Load array of frame sizes ++ __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); ++ __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); ++ ++ // Pick up the initial fp we should save ++ __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. ++ ++ __ move(sender_sp, SP); ++ __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); ++ __ dsubu(SP, SP, AT); ++ // Push interpreter frames in a loop ++ Label loop; ++ __ bind(loop); ++ __ ld(T2, sizes, 0); // Load frame size ++ __ ld(AT, pcs, 0); // save return address ++ __ daddiu(T2, T2, -2*wordSize); // we'll push pc and fp, by hand ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ dsubu(SP, SP, T2); // Prolog! ++ // This value is corrected by layout_activation_impl ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable ++ __ move(sender_sp, SP); // pass to next frame ++ __ daddiu(count, count, -1); // decrement counter ++ __ daddiu(sizes, sizes, wordSize); // Bump array pointer (sizes) ++ __ addiu(pcs, pcs, wordSize); // Bump array pointer (pcs) ++ __ bne(count, R0, loop); ++ __ delayed()->nop(); // Bump array pointer (pcs) ++ ++ __ ld(RA, pcs, 0); ++ ++ // Re-push self-frame ++ // save old & set new FP ++ // save final return address ++ __ enter(); ++ ++ // Use FP because the frames look interpreted now ++ // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. ++ // Don't need the precise return PC here, just precise enough to point into this code blob. ++ address the_pc = __ pc(); ++ __ set_last_Java_frame(NOREG, FP, the_pc); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // restore return values to their stack-slots with the new SP. ++ __ move(A0, thread); ++ __ move(A1, Deoptimization::Unpack_uncommon_trap); ++ __ patchable_call((address)Deoptimization::unpack_frames); ++ // Set an oopmap for the call site ++ oop_maps->add_gc_map( __ offset(), new OopMap( framesize, 0 ) ); ++ ++ __ reset_last_Java_frame(true); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog! ++ ++ // Jump to interpreter ++ __ jr(RA); ++ __ delayed()->nop(); ++ // ------------- ++ // make sure all code is generated ++ masm->flush(); ++ ++ _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2); ++} ++ ++#endif // COMPILER2 ++ ++//------------------------------generate_handler_blob------------------- ++// ++// Generate a special Compile2Runtime blob that saves all registers, and sets ++// up an OopMap and calls safepoint code to stop the compiled code for ++// a safepoint. ++// ++// This blob is jumped to (via a breakpoint and the signal handler) from a ++// safepoint in compiled code. ++ ++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) { ++ ++ // Account for thread arg in our frame ++ const int additional_words = 0; ++ int frame_size_in_words; ++ ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ ++ ResourceMark rm; ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map; ++ ++ // allocate space for the code ++ // setup code generation tools ++ CodeBuffer buffer ("handler_blob", 2048, 512); ++ MacroAssembler* masm = new MacroAssembler( &buffer); ++ ++ const Register thread = TREG; ++ address start = __ pc(); ++ address call_pc = NULL; ++ bool cause_return = (pool_type == POLL_AT_RETURN); ++ bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP); ++ ++ // If cause_return is true we are at a poll_return and there is ++ // the return address in RA to the caller on the nmethod ++ // that is safepoint. We can leave this return in RA and ++ // effectively complete the return and safepoint in the caller. ++ // Otherwise we load exception pc to RA. ++ __ push(thread); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ if(!cause_return) { ++ __ ld_ptr(RA, Address(thread, JavaThread::saved_exception_pc_offset())); ++ } ++ ++ __ pop(thread); ++ map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ // The following is basically a call_VM. However, we need the precise ++ // address of the call in order to generate an oopmap. Hence, we do all the ++ // work outselvs. ++ ++ __ move(A0, thread); ++ __ set_last_Java_frame(NOREG, NOREG, NULL); ++ ++ ++ // Do the call ++ __ call(call_ptr); ++ __ delayed()->nop(); ++ ++ // Set an oopmap for the call site. This oopmap will map all ++ // oop-registers and debug-info registers as callee-saved. This ++ // will allow deoptimization at this safepoint to find all possible ++ // debug-info recordings, as well as let GC find all oops. ++ oop_maps->add_gc_map(__ offset(), map); ++ ++ Label noException; ++ ++ // Clear last_Java_sp again ++ __ reset_last_Java_frame(false); ++ ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, noException); ++ __ delayed()->nop(); ++ ++ // Exception pending ++ ++ RegisterSaver::restore_live_registers(masm, save_vectors); ++ //forward_exception_entry need return address on the stack ++ __ push(RA); ++ __ patchable_jump((address)StubRoutines::forward_exception_entry()); ++ ++ // No exception case ++ __ bind(noException); ++ // Normal exit, register restoring and exit ++ RegisterSaver::restore_live_registers(masm, save_vectors); ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ masm->flush(); ++ ++ // Fill-out other meta info ++ return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); ++} ++ ++// ++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss ++// ++// Generate a stub that calls into vm to find out the proper destination ++// of a java call. All the argument registers are live at this point ++// but since this is generic code we don't know what they are and the caller ++// must do any gc of the args. ++// ++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ ++ // allocate space for the code ++ ResourceMark rm; ++ ++ //CodeBuffer buffer(name, 1000, 512); ++ CodeBuffer buffer(name, 2000, 2048); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ int frame_size_words; ++ //we put the thread in A0 ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = NULL; ++ ++ int start = __ offset(); ++ map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); ++ ++ ++ int frame_complete = __ offset(); ++ ++ const Register thread = T8; ++ __ get_thread(thread); ++ ++ __ move(A0, thread); ++ __ set_last_Java_frame(noreg, FP, NULL); ++ //align the stack before invoke native ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 24 + 1 * BytesPerInstWord; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ ++ __ call(destination); ++ __ delayed()->nop(); ++ ++ // Set an oopmap for the call site. ++ // We need this not only for callee-saved registers, but also for volatile ++ // registers that the compiler might be keeping live across a safepoint. ++ oop_maps->add_gc_map( __ offset() - start, map); ++ // V0 contains the address we are going to jump to assuming no exception got installed ++ __ get_thread(thread); ++ __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // clear last_Java_sp ++ __ reset_last_Java_frame(true); ++ // check for pending exceptions ++ Label pending; ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, pending); ++ __ delayed()->nop(); ++ // get the returned Method* ++ //FIXME, do mips need this ? ++ __ get_vm_result_2(Rmethod, thread); // Refer to OpenJDK8 ++ __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize); ++ __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize); ++ RegisterSaver::restore_live_registers(masm); ++ ++ // We are back the the original state on entry and ready to go the callee method. ++ __ jr(V0); ++ __ delayed()->nop(); ++ // Pending exception after the safepoint ++ ++ __ bind(pending); ++ ++ RegisterSaver::restore_live_registers(masm); ++ ++ // exception pending => remove activation and forward to exception handler ++ //forward_exception_entry need return address on the stack ++ __ push(RA); ++ __ get_thread(thread); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); ++ __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ // ++ // make sure all code is generated ++ masm->flush(); ++ ++ RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); ++ return tmp; ++} ++ ++extern "C" int SpinPause() {return 0;} ++ ++ ++//------------------------------Montgomery multiplication------------------------ ++// ++ ++// Subtract 0:b from carry:a. Return carry. ++static unsigned long ++sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) { ++ long borrow = 0, t = 0; ++ unsigned long tmp0, tmp1; ++ __asm__ __volatile__ ( ++ "0: \n" ++ "ld %[tmp0], 0(%[a]) \n" ++ "ld %[tmp1], 0(%[b]) \n" ++ "sltu %[t], %[tmp0], %[borrow] \n" ++ "dsubu %[tmp0], %[tmp0], %[borrow] \n" ++ "sltu %[borrow], %[tmp0], %[tmp1] \n" ++ "or %[borrow], %[borrow], %[t] \n" ++ "dsubu %[tmp0], %[tmp0], %[tmp1] \n" ++ "sd %[tmp0], 0(%[a]) \n" ++ "daddiu %[a], %[a], 8 \n" ++ "daddiu %[b], %[b], 8 \n" ++ "daddiu %[len], %[len], -1 \n" ++ "bgtz %[len], 0b \n" ++ "dsubu %[tmp0], %[carry], %[borrow] \n" ++ : [len]"+r"(len), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [borrow]"+r"(borrow), [a]"+r"(a), [b]"+r"(b), [t]"+r"(t) ++ : [carry]"r"(carry) ++ : "memory" ++ ); ++ return tmp0; ++} ++ ++// Multiply (unsigned) Long A by Long B, accumulating the double- ++// length result into the accumulator formed of t0, t1, and t2. ++inline void MACC(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) { ++ unsigned long hi, lo, carry = 0, t = 0; ++ __asm__ __volatile__( ++ "dmultu %[A], %[B] \n" ++ "mfhi %[hi] \n" ++ "mflo %[lo] \n" ++ "daddu %[t0], %[t0], %[lo] \n" ++ "sltu %[carry], %[t0], %[lo] \n" ++ "daddu %[t1], %[t1], %[carry] \n" ++ "sltu %[t], %[t1], %[carry] \n" ++ "daddu %[t1], %[t1], %[hi] \n" ++ "sltu %[carry], %[t1], %[hi] \n" ++ "or %[carry], %[carry], %[t] \n" ++ "daddu %[t2], %[t2], %[carry] \n" ++ : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t) ++ : [A]"r"(A), [B]"r"(B) ++ : ++ ); ++} ++ ++// As above, but add twice the double-length result into the ++// accumulator. ++inline void MACC2(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) { ++ unsigned long hi, lo, carry = 0, t = 0; ++ __asm__ __volatile__( ++ "dmultu %[A], %[B] \n" ++ "mfhi %[hi] \n" ++ "mflo %[lo] \n" ++ "daddu %[t0], %[t0], %[lo] \n" ++ "sltu %[carry], %[t0], %[lo] \n" ++ "daddu %[t1], %[t1], %[carry] \n" ++ "sltu %[t], %[t1], %[carry] \n" ++ "daddu %[t1], %[t1], %[hi] \n" ++ "sltu %[carry], %[t1], %[hi] \n" ++ "or %[carry], %[carry], %[t] \n" ++ "daddu %[t2], %[t2], %[carry] \n" ++ "daddu %[t0], %[t0], %[lo] \n" ++ "sltu %[carry], %[t0], %[lo] \n" ++ "daddu %[t1], %[t1], %[carry] \n" ++ "sltu %[t], %[t1], %[carry] \n" ++ "daddu %[t1], %[t1], %[hi] \n" ++ "sltu %[carry], %[t1], %[hi] \n" ++ "or %[carry], %[carry], %[t] \n" ++ "daddu %[t2], %[t2], %[carry] \n" ++ : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t) ++ : [A]"r"(A), [B]"r"(B) ++ : ++ ); ++} ++ ++// Fast Montgomery multiplication. The derivation of the algorithm is ++// in A Cryptographic Library for the Motorola DSP56000, ++// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. ++ ++static void __attribute__((noinline)) ++montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[], ++ unsigned long m[], unsigned long inv, int len) { ++ unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator ++ int i; ++ ++ assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); ++ ++ for (i = 0; i < len; i++) { ++ int j; ++ for (j = 0; j < i; j++) { ++ MACC(a[j], b[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ MACC(a[i], b[0], t0, t1, t2); ++ m[i] = t0 * inv; ++ MACC(m[i], n[0], t0, t1, t2); ++ ++ assert(t0 == 0, "broken Montgomery multiply"); ++ ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ for (i = len; i < 2*len; i++) { ++ int j; ++ for (j = i-len+1; j < len; j++) { ++ MACC(a[j], b[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ m[i-len] = t0; ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ while (t0) ++ t0 = sub(m, n, t0, len); ++} ++ ++// Fast Montgomery squaring. This uses asymptotically 25% fewer ++// multiplies so it should be up to 25% faster than Montgomery ++// multiplication. However, its loop control is more complex and it ++// may actually run slower on some machines. ++ ++static void __attribute__((noinline)) ++montgomery_square(unsigned long a[], unsigned long n[], ++ unsigned long m[], unsigned long inv, int len) { ++ unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator ++ int i; ++ ++ assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); ++ ++ for (i = 0; i < len; i++) { ++ int j; ++ int end = (i+1)/2; ++ for (j = 0; j < end; j++) { ++ MACC2(a[j], a[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ if ((i & 1) == 0) { ++ MACC(a[j], a[j], t0, t1, t2); ++ } ++ for (; j < i; j++) { ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ m[i] = t0 * inv; ++ MACC(m[i], n[0], t0, t1, t2); ++ ++ assert(t0 == 0, "broken Montgomery square"); ++ ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ for (i = len; i < 2*len; i++) { ++ int start = i-len+1; ++ int end = start + (len - start)/2; ++ int j; ++ for (j = start; j < end; j++) { ++ MACC2(a[j], a[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ if ((i & 1) == 0) { ++ MACC(a[j], a[j], t0, t1, t2); ++ } ++ for (; j < len; j++) { ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ m[i-len] = t0; ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ while (t0) ++ t0 = sub(m, n, t0, len); ++} ++ ++// Swap words in a longword. ++static unsigned long swap(unsigned long x) { ++ return (x << 32) | (x >> 32); ++} ++ ++// Copy len longwords from s to d, word-swapping as we go. The ++// destination array is reversed. ++static void reverse_words(unsigned long *s, unsigned long *d, int len) { ++ d += len; ++ while(len-- > 0) { ++ d--; ++ *d = swap(*s); ++ s++; ++ } ++} ++ ++// The threshold at which squaring is advantageous was determined ++// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. ++// Doesn't seem to be relevant for MIPS64 so we use the same value. ++#define MONTGOMERY_SQUARING_THRESHOLD 64 ++ ++void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, ++ jint len, jlong inv, ++ jint *m_ints) { ++ assert(len % 2 == 0, "array length in montgomery_multiply must be even"); ++ int longwords = len/2; ++ ++ // Make very sure we don't use so much space that the stack might ++ // overflow. 512 jints corresponds to an 16384-bit integer and ++ // will use here a total of 8k bytes of stack space. ++ int total_allocation = longwords * sizeof (unsigned long) * 4; ++ guarantee(total_allocation <= 8192, "must be"); ++ unsigned long *scratch = (unsigned long *)alloca(total_allocation); ++ ++ // Local scratch arrays ++ unsigned long ++ *a = scratch + 0 * longwords, ++ *b = scratch + 1 * longwords, ++ *n = scratch + 2 * longwords, ++ *m = scratch + 3 * longwords; ++ ++ reverse_words((unsigned long *)a_ints, a, longwords); ++ reverse_words((unsigned long *)b_ints, b, longwords); ++ reverse_words((unsigned long *)n_ints, n, longwords); ++ ++ ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords); ++ ++ reverse_words(m, (unsigned long *)m_ints, longwords); ++} ++ ++void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, ++ jint len, jlong inv, ++ jint *m_ints) { ++ assert(len % 2 == 0, "array length in montgomery_square must be even"); ++ int longwords = len/2; ++ ++ // Make very sure we don't use so much space that the stack might ++ // overflow. 512 jints corresponds to an 16384-bit integer and ++ // will use here a total of 6k bytes of stack space. ++ int total_allocation = longwords * sizeof (unsigned long) * 3; ++ guarantee(total_allocation <= 8192, "must be"); ++ unsigned long *scratch = (unsigned long *)alloca(total_allocation); ++ ++ // Local scratch arrays ++ unsigned long ++ *a = scratch + 0 * longwords, ++ *n = scratch + 1 * longwords, ++ *m = scratch + 2 * longwords; ++ ++ reverse_words((unsigned long *)a_ints, a, longwords); ++ reverse_words((unsigned long *)n_ints, n, longwords); ++ ++ if (len >= MONTGOMERY_SQUARING_THRESHOLD) { ++ ::montgomery_square(a, n, m, (unsigned long)inv, longwords); ++ } else { ++ ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords); ++ } ++ ++ reverse_words(m, (unsigned long *)m_ints, longwords); ++} +diff --git a/hotspot/src/cpu/mips/vm/stubGenerator_mips_64.cpp b/hotspot/src/cpu/mips/vm/stubGenerator_mips_64.cpp +new file mode 100644 +index 0000000000..aeb797faf9 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/stubGenerator_mips_64.cpp +@@ -0,0 +1,2147 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/instanceOop.hpp" ++#include "oops/method.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#include "utilities/top.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++// Declaration and definition of StubGenerator (no .hpp file). ++// For a more detailed description of the stub routine structure ++// see the comment in stubRoutines.hpp ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) ++//#define a__ ((Assembler*)_masm)-> ++ ++//#ifdef PRODUCT ++//#define BLOCK_COMMENT(str) /* nothing */ ++//#else ++//#define BLOCK_COMMENT(str) __ block_comment(str) ++//#endif ++ ++//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions ++ ++// Stub Code definitions ++ ++static address handle_unsafe_access() { ++ JavaThread* thread = JavaThread::current(); ++ address pc = thread->saved_exception_pc(); ++ // pc is the instruction which we must emulate ++ // doing a no-op is fine: return garbage from the load ++ // therefore, compute npc ++ address npc = (address)((unsigned long)pc + sizeof(unsigned int)); ++ ++ // request an async exception ++ thread->set_pending_unsafe_access_error(); ++ ++ // return address of next instruction to execute ++ return npc; ++} ++ ++class StubGenerator: public StubCodeGenerator { ++ private: ++ ++ // ABI mips n64 ++ // This fig is not MIPS ABI. It is call Java from C ABI. ++ // Call stubs are used to call Java from C ++ // ++ // [ return_from_Java ] ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ // ... ++ // -8 [ S6 ] ++ // -7 [ S5 ] ++ // -6 [ S4 ] ++ // -5 [ S3 ] ++ // -4 [ S1 ] ++ // -3 [ TSR(S2) ] ++ // -2 [ LVP(S7) ] ++ // -1 [ BCP(S1) ] ++ // 0 [ saved fp ] <--- fp_after_call ++ // 1 [ return address ] ++ // 2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp ++ // 3 [ result ] <--- a1 ++ // 4 [ result_type ] <--- a2 ++ // 5 [ method ] <--- a3 ++ // 6 [ entry_point ] <--- a4 ++ // 7 [ parameters ] <--- a5 ++ // 8 [ parameter_size ] <--- a6 ++ // 9 [ thread ] <--- a7 ++ ++ // ++ // n64 does not save paras in sp. ++ // ++ // [ return_from_Java ] ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ // ... ++ //-13 [ thread ] ++ //-12 [ result_type ] <--- a2 ++ //-11 [ result ] <--- a1 ++ //-10 [ ] ++ // -9 [ ptr. to call wrapper ] <--- a0 ++ // -8 [ S6 ] ++ // -7 [ S5 ] ++ // -6 [ S4 ] ++ // -5 [ S3 ] ++ // -4 [ S1 ] ++ // -3 [ TSR(S2) ] ++ // -2 [ LVP(S7) ] ++ // -1 [ BCP(S1) ] ++ // 0 [ saved fp ] <--- fp_after_call ++ // 1 [ return address ] ++ // 2 [ ] <--- old sp ++ // ++ // Find a right place in the call_stub for GP. ++ // GP will point to the starting point of Interpreter::dispatch_table(itos). ++ // It should be saved/restored before/after Java calls. ++ // ++ enum call_stub_layout { ++ RA_off = 1, ++ FP_off = 0, ++ BCP_off = -1, ++ LVP_off = -2, ++ TSR_off = -3, ++ S1_off = -4, ++ S3_off = -5, ++ S4_off = -6, ++ S5_off = -7, ++ S6_off = -8, ++ call_wrapper_off = -9, ++ result_off = -11, ++ result_type_off = -12, ++ thread_off = -13, ++ total_off = thread_off - 1, ++ GP_off = -14, ++ }; ++ ++ address generate_call_stub(address& return_address) { ++ ++ assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code"); ++ StubCodeMark mark(this, "StubRoutines", "call_stub"); ++ address start = __ pc(); ++ ++ // same as in generate_catch_exception()! ++ ++ // stub code ++ // save ra and fp ++ __ enter(); ++ // I think 14 is the max gap between argument and callee saved register ++ __ daddiu(SP, SP, total_off * wordSize); ++ __ sd(BCP, FP, BCP_off * wordSize); ++ __ sd(LVP, FP, LVP_off * wordSize); ++ __ sd(TSR, FP, TSR_off * wordSize); ++ __ sd(S1, FP, S1_off * wordSize); ++ __ sd(S3, FP, S3_off * wordSize); ++ __ sd(S4, FP, S4_off * wordSize); ++ __ sd(S5, FP, S5_off * wordSize); ++ __ sd(S6, FP, S6_off * wordSize); ++ __ sd(A0, FP, call_wrapper_off * wordSize); ++ __ sd(A1, FP, result_off * wordSize); ++ __ sd(A2, FP, result_type_off * wordSize); ++ __ sd(A7, FP, thread_off * wordSize); ++ __ sd(GP, FP, GP_off * wordSize); ++ ++ __ set64(GP, (long)Interpreter::dispatch_table(itos)); ++ ++#ifdef OPT_THREAD ++ __ move(TREG, A7); ++#endif ++ //add for compressedoops ++ __ reinit_heapbase(); ++ ++#ifdef ASSERT ++ // make sure we have no pending exceptions ++ { ++ Label L; ++ __ ld(AT, A7, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ /* FIXME: I do not know how to realize stop in mips arch, do it in the future */ ++ __ stop("StubRoutines::call_stub: entered with pending exception"); ++ __ bind(L); ++ } ++#endif ++ ++ // pass parameters if any ++ // A5: parameter ++ // A6: parameter_size ++ // T0: parameter_size_tmp(--) ++ // T2: offset(++) ++ // T3: tmp ++ Label parameters_done; ++ // judge if the parameter_size equals 0 ++ __ beq(A6, R0, parameters_done); ++ __ delayed()->nop(); ++ __ dsll(AT, A6, Interpreter::logStackElementSize); ++ __ dsubu(SP, SP, AT); ++ __ move(AT, -StackAlignmentInBytes); ++ __ andr(SP, SP , AT); ++ // Copy Java parameters in reverse order (receiver last) ++ // Note that the argument order is inverted in the process ++ Label loop; ++ __ move(T0, A6); ++ __ move(T2, R0); ++ __ bind(loop); ++ ++ // get parameter ++ __ dsll(T3, T0, LogBytesPerWord); ++ __ daddu(T3, T3, A5); ++ __ ld(AT, T3, -wordSize); ++ __ dsll(T3, T2, LogBytesPerWord); ++ __ daddu(T3, T3, SP); ++ __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0)); ++ __ daddiu(T2, T2, 1); ++ __ daddiu(T0, T0, -1); ++ __ bne(T0, R0, loop); ++ __ delayed()->nop(); ++ // advance to next parameter ++ ++ // call Java function ++ __ bind(parameters_done); ++ ++ // receiver in V0, methodOop in Rmethod ++ ++ __ move(Rmethod, A3); ++ __ move(Rsender, SP); //set sender sp ++ __ jalr(A4); ++ __ delayed()->nop(); ++ return_address = __ pc(); ++ ++ Label common_return; ++ __ bind(common_return); ++ ++ // store result depending on type ++ // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) ++ __ ld(T0, FP, result_off * wordSize); // result --> T0 ++ Label is_long, is_float, is_double, exit; ++ __ ld(T2, FP, result_type_off * wordSize); // result_type --> T2 ++ __ daddiu(T3, T2, (-1) * T_LONG); ++ __ beq(T3, R0, is_long); ++ __ delayed()->daddiu(T3, T2, (-1) * T_FLOAT); ++ __ beq(T3, R0, is_float); ++ __ delayed()->daddiu(T3, T2, (-1) * T_DOUBLE); ++ __ beq(T3, R0, is_double); ++ __ delayed()->nop(); ++ ++ // handle T_INT case ++ __ sd(V0, T0, 0 * wordSize); ++ __ bind(exit); ++ ++ // restore ++ __ ld(BCP, FP, BCP_off * wordSize); ++ __ ld(LVP, FP, LVP_off * wordSize); ++ __ ld(GP, FP, GP_off * wordSize); ++ __ ld(TSR, FP, TSR_off * wordSize); ++ ++ __ ld(S1, FP, S1_off * wordSize); ++ __ ld(S3, FP, S3_off * wordSize); ++ __ ld(S4, FP, S4_off * wordSize); ++ __ ld(S5, FP, S5_off * wordSize); ++ __ ld(S6, FP, S6_off * wordSize); ++ ++ __ leave(); ++ ++ // return ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ // handle return types different from T_INT ++ __ bind(is_long); ++ __ sd(V0, T0, 0 * wordSize); ++ __ b(exit); ++ __ delayed()->nop(); ++ ++ __ bind(is_float); ++ __ swc1(F0, T0, 0 * wordSize); ++ __ b(exit); ++ __ delayed()->nop(); ++ ++ __ bind(is_double); ++ __ sdc1(F0, T0, 0 * wordSize); ++ __ b(exit); ++ __ delayed()->nop(); ++ //FIXME, 1.6 mips version add operation of fpu here ++ StubRoutines::gs2::set_call_stub_compiled_return(__ pc()); ++ __ b(common_return); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Return point for a Java call if there's an exception thrown in ++ // Java code. The exception is caught and transformed into a ++ // pending exception stored in JavaThread that can be tested from ++ // within the VM. ++ // ++ // Note: Usually the parameters are removed by the callee. In case ++ // of an exception crossing an activation frame boundary, that is ++ // not the case if the callee is compiled code => need to setup the ++ // sp. ++ // ++ // V0: exception oop ++ ++ address generate_catch_exception() { ++ StubCodeMark mark(this, "StubRoutines", "catch_exception"); ++ address start = __ pc(); ++ ++ Register thread = TREG; ++ ++ // get thread directly ++#ifndef OPT_THREAD ++ __ ld(thread, FP, thread_off * wordSize); ++#endif ++ ++#ifdef ASSERT ++ // verify that threads correspond ++ { Label L; ++ __ get_thread(T8); ++ __ beq(T8, thread, L); ++ __ delayed()->nop(); ++ __ stop("StubRoutines::catch_exception: threads must correspond"); ++ __ bind(L); ++ } ++#endif ++ // set pending exception ++ __ verify_oop(V0); ++ __ sd(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ li(AT, (long)__FILE__); ++ __ sd(AT, thread, in_bytes(Thread::exception_file_offset ())); ++ __ li(AT, (long)__LINE__); ++ __ sd(AT, thread, in_bytes(Thread::exception_line_offset ())); ++ ++ // complete return to VM ++ assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); ++ __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Continuation point for runtime calls returning with a pending ++ // exception. The pending exception check happened in the runtime ++ // or native call stub. The pending exception in Thread is ++ // converted into a Java-level exception. ++ // ++ // Contract with Java-level exception handlers: ++ // V0: exception ++ // V1: throwing pc ++ // ++ // NOTE: At entry of this stub, exception-pc must be on stack !! ++ ++ address generate_forward_exception() { ++ StubCodeMark mark(this, "StubRoutines", "forward exception"); ++ //Register thread = TREG; ++ Register thread = TREG; ++ address start = __ pc(); ++ ++ // Upon entry, the sp points to the return address returning into ++ // Java (interpreted or compiled) code; i.e., the return address ++ // throwing pc. ++ // ++ // Arguments pushed before the runtime call are still on the stack ++ // but the exception handler will reset the stack pointer -> ++ // ignore them. A potential result in registers can be ignored as ++ // well. ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++#ifdef ASSERT ++ // make sure this code is only executed if there is a pending exception ++ { ++ Label L; ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("StubRoutines::forward exception: no pending exception (1)"); ++ __ bind(L); ++ } ++#endif ++ ++ // compute exception handler into T9 ++ __ ld(A1, SP, 0); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); ++ __ move(T9, V0); ++ __ pop(V1); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); ++ ++#ifdef ASSERT ++ // make sure exception is set ++ { ++ Label L; ++ __ bne(V0, R0, L); ++ __ delayed()->nop(); ++ __ stop("StubRoutines::forward exception: no pending exception (2)"); ++ __ bind(L); ++ } ++#endif ++ ++ // continue at exception handler (return address removed) ++ // V0: exception ++ // T9: exception handler ++ // V1: throwing pc ++ __ verify_oop(V0); ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // The following routine generates a subroutine to throw an ++ // asynchronous UnknownError when an unsafe access gets a fault that ++ // could not be reasonably prevented by the programmer. (Example: ++ // SIGBUS/OBJERR.) ++ address generate_handler_for_unsafe_access() { ++ StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access"); ++ address start = __ pc(); ++ __ push(V0); ++ __ pushad_except_v0(); // push registers ++ __ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ popad_except_v0(); ++ __ move(RA, V0); ++ __ pop(V0); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Non-destructive plausibility checks for oops ++ // ++ address generate_verify_oop() { ++ StubCodeMark mark(this, "StubRoutines", "verify_oop"); ++ address start = __ pc(); ++ __ reinit_heapbase(); ++ __ verify_oop_subroutine(); ++ address end = __ pc(); ++ return start; ++ } ++ ++ // ++ // Generate overlap test for array copy stubs ++ // ++ // Input: ++ // A0 - array1 ++ // A1 - array2 ++ // A2 - element count ++ // ++ ++ // use T9 as temp ++ void array_overlap_test(address no_overlap_target, int log2_elem_size) { ++ int elem_size = 1 << log2_elem_size; ++ Address::ScaleFactor sf = Address::times_1; ++ ++ switch (log2_elem_size) { ++ case 0: sf = Address::times_1; break; ++ case 1: sf = Address::times_2; break; ++ case 2: sf = Address::times_4; break; ++ case 3: sf = Address::times_8; break; ++ } ++ ++ __ dsll(AT, A2, sf); ++ __ daddu(AT, AT, A0); ++ __ daddiu(T9, AT, -elem_size); ++ __ dsubu(AT, A1, A0); ++ __ blez(AT, no_overlap_target); ++ __ delayed()->nop(); ++ __ dsubu(AT, A1, T9); ++ __ bgtz(AT, no_overlap_target); ++ __ delayed()->nop(); ++ ++ // If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target ++ Label L; ++ __ bgez(A0, L); ++ __ delayed()->nop(); ++ __ bgtz(A1, no_overlap_target); ++ __ delayed()->nop(); ++ __ bind(L); ++ ++ } ++ ++ // ++ // Generate store check for array ++ // ++ // Input: ++ // T0 - starting address ++ // T1 - element count ++ // ++ // The 2 input registers are overwritten ++ // ++ ++ ++ void array_store_check(Register tmp) { ++ assert_different_registers(tmp, AT, T0, T1); ++ BarrierSet* bs = Universe::heap()->barrier_set(); ++ assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); ++ CardTableModRefBS* ct = (CardTableModRefBS*)bs; ++ assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); ++ Label l_0; ++ ++ if (UseConcMarkSweepGC) __ sync(); ++ ++ __ set64(tmp, (long)ct->byte_map_base); ++ ++ __ dsll(AT, T1, TIMES_OOP); ++ __ daddu(AT, T0, AT); ++ __ daddiu(T1, AT, - BytesPerHeapOop); ++ ++ __ shr(T0, CardTableModRefBS::card_shift); ++ __ shr(T1, CardTableModRefBS::card_shift); ++ ++ __ dsubu(T1, T1, T0); // end --> cards count ++ __ bind(l_0); ++ ++ __ daddu(AT, tmp, T0); ++ if (UseLEXT1) { ++ __ gssbx(R0, AT, T1, 0); ++ } else { ++ __ daddu(AT, AT, T1); ++ __ sb(R0, AT, 0); ++ } ++ ++ __ bgtz(T1, l_0); ++ __ delayed()->daddiu(T1, T1, - 1); ++ } ++ ++ // Generate code for an array write pre barrier ++ // ++ // addr - starting address ++ // count - element count ++ // tmp - scratch register ++ // ++ // Destroy no registers! ++ // ++ void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) { ++ BarrierSet* bs = Universe::heap()->barrier_set(); ++ switch (bs->kind()) { ++ case BarrierSet::G1SATBCT: ++ case BarrierSet::G1SATBCTLogging: ++ // With G1, don't generate the call if we statically know that the target in uninitialized ++ if (!dest_uninitialized) { ++ __ pushad(); // push registers ++ if (count == A0) { ++ if (addr == A1) { ++ // exactly backwards!! ++ //__ xchgptr(c_rarg1, c_rarg0); ++ __ move(AT, A0); ++ __ move(A0, A1); ++ __ move(A1, AT); ++ } else { ++ __ move(A1, count); ++ __ move(A0, addr); ++ } ++ } else { ++ __ move(A0, addr); ++ __ move(A1, count); ++ } ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2); ++ __ popad(); ++ } ++ break; ++ case BarrierSet::CardTableModRef: ++ case BarrierSet::CardTableExtension: ++ case BarrierSet::ModRef: ++ break; ++ default: ++ ShouldNotReachHere(); ++ ++ } ++ } ++ ++ // ++ // Generate code for an array write post barrier ++ // ++ // Input: ++ // start - register containing starting address of destination array ++ // count - elements count ++ // scratch - scratch register ++ // ++ // The input registers are overwritten. ++ // ++ void gen_write_ref_array_post_barrier(Register start, Register count, Register scratch) { ++ assert_different_registers(start, count, scratch, AT); ++ BarrierSet* bs = Universe::heap()->barrier_set(); ++ switch (bs->kind()) { ++ case BarrierSet::G1SATBCT: ++ case BarrierSet::G1SATBCTLogging: ++ { ++ __ pushad(); // push registers (overkill) ++ if (count == A0) { ++ if (start == A1) { ++ // exactly backwards!! ++ //__ xchgptr(c_rarg1, c_rarg0); ++ __ move(AT, A0); ++ __ move(A0, A1); ++ __ move(A1, AT); ++ } else { ++ __ move(A1, count); ++ __ move(A0, start); ++ } ++ } else { ++ __ move(A0, start); ++ __ move(A1, count); ++ } ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2); ++ __ popad(); ++ } ++ break; ++ case BarrierSet::CardTableModRef: ++ case BarrierSet::CardTableExtension: ++ { ++ CardTableModRefBS* ct = (CardTableModRefBS*)bs; ++ assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); ++ ++ Label L_loop; ++ const Register end = count; ++ ++ if (UseConcMarkSweepGC) __ sync(); ++ ++ int64_t disp = (int64_t) ct->byte_map_base; ++ __ set64(scratch, disp); ++ ++ __ lea(end, Address(start, count, TIMES_OOP, 0)); // end == start+count*oop_size ++ __ daddiu(end, end, -BytesPerHeapOop); // end - 1 to make inclusive ++ __ shr(start, CardTableModRefBS::card_shift); ++ __ shr(end, CardTableModRefBS::card_shift); ++ __ dsubu(end, end, start); // end --> cards count ++ ++ __ daddu(start, start, scratch); ++ ++ __ bind(L_loop); ++ if (UseLEXT1) { ++ __ gssbx(R0, start, count, 0); ++ } else { ++ __ daddu(AT, start, count); ++ __ sb(R0, AT, 0); ++ } ++ __ daddiu(count, count, -1); ++ __ slt(AT, count, R0); ++ __ beq(AT, R0, L_loop); ++ __ delayed()->nop(); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_byte_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_byte_copy(). ++ // ++ address generate_disjoint_byte_copy(bool aligned, const char * name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ ++ ++ Register tmp1 = T0; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ ++ address start = __ pc(); ++ ++ __ push(tmp1); ++ __ push(tmp2); ++ __ push(tmp3); ++ __ move(tmp1, A0); ++ __ move(tmp2, A1); ++ __ move(tmp3, A2); ++ ++ ++ Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11; ++ Label l_debug; ++ ++ __ daddiu(AT, tmp3, -9); //why the number is 9 ? ++ __ blez(AT, l_9); ++ __ delayed()->nop(); ++ ++ if (!aligned) { ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 1); ++ __ bne(AT, R0, l_9); // if arrays don't have the same alignment mod 2, do 1 element copy ++ __ delayed()->nop(); ++ ++ __ andi(AT, tmp1, 1); ++ __ beq(AT, R0, l_10); //copy 1 enlement if necessary to aligh to 2 bytes ++ __ delayed()->nop(); ++ ++ __ lb(AT, tmp1, 0); ++ __ daddiu(tmp1, tmp1, 1); ++ __ sb(AT, tmp2, 0); ++ __ daddiu(tmp2, tmp2, 1); ++ __ daddiu(tmp3, tmp3, -1); ++ __ bind(l_10); ++ ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 3); ++ __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 2 elements copy ++ __ delayed()->nop(); ++ ++ // At this point it is guaranteed that both, from and to have the same alignment mod 4. ++ ++ // Copy 2 elements if necessary to align to 4 bytes. ++ __ andi(AT, tmp1, 3); ++ __ beq(AT, R0, l_2); ++ __ delayed()->nop(); ++ ++ __ lhu(AT, tmp1, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(tmp3, tmp3, -2); ++ __ bind(l_2); ++ ++ // At this point the positions of both, from and to, are at least 4 byte aligned. ++ ++ // Copy 4 elements at a time. ++ // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 7); ++ __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned ++ __ delayed()->nop(); ++ ++ // Copy a 4 elements if necessary to align to 8 bytes. ++ __ andi(AT, tmp1, 7); ++ __ beq(AT, R0, l_7); ++ __ delayed()->nop(); ++ ++ __ lw(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -4); ++ __ sw(AT, tmp2, 0); ++ { // FasterArrayCopy ++ __ daddiu(tmp1, tmp1, 4); ++ __ daddiu(tmp2, tmp2, 4); ++ } ++ } ++ ++ __ bind(l_7); ++ ++ // Copy 4 elements at a time; either the loads or the stores can ++ // be unaligned if aligned == false. ++ ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -7); ++ __ blez(AT, l_6); // copy 4 at a time if less than 4 elements remain ++ __ delayed()->nop(); ++ ++ __ bind(l_8); ++ // For Loongson, there is 128-bit memory access. TODO ++ __ ld(AT, tmp1, 0); ++ __ sd(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 8); ++ __ daddiu(tmp2, tmp2, 8); ++ __ daddiu(tmp3, tmp3, -8); ++ __ daddiu(AT, tmp3, -8); ++ __ bgez(AT, l_8); ++ __ delayed()->nop(); ++ } ++ __ bind(l_6); ++ ++ // copy 4 bytes at a time ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -3); ++ __ blez(AT, l_1); ++ __ delayed()->nop(); ++ ++ __ bind(l_3); ++ __ lw(AT, tmp1, 0); ++ __ sw(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 4); ++ __ daddiu(tmp2, tmp2, 4); ++ __ daddiu(tmp3, tmp3, -4); ++ __ daddiu(AT, tmp3, -4); ++ __ bgez(AT, l_3); ++ __ delayed()->nop(); ++ ++ } ++ ++ // do 2 bytes copy ++ __ bind(l_1); ++ { ++ __ daddiu(AT, tmp3, -1); ++ __ blez(AT, l_9); ++ __ delayed()->nop(); ++ ++ __ bind(l_5); ++ __ lhu(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -2); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(AT, tmp3, -2); ++ __ bgez(AT, l_5); ++ __ delayed()->nop(); ++ } ++ ++ //do 1 element copy--byte ++ __ bind(l_9); ++ __ beq(R0, tmp3, l_4); ++ __ delayed()->nop(); ++ ++ { ++ __ bind(l_11); ++ __ lb(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -1); ++ __ sb(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 1); ++ __ daddiu(tmp2, tmp2, 1); ++ __ daddiu(AT, tmp3, -1); ++ __ bgez(AT, l_11); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(l_4); ++ __ pop(tmp3); ++ __ pop(tmp2); ++ __ pop(tmp1); ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_byte_copy(bool aligned, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit; ++ Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned; ++ ++ address nooverlap_target = aligned ? ++ StubRoutines::arrayof_jbyte_disjoint_arraycopy() : ++ StubRoutines::jbyte_disjoint_arraycopy(); ++ ++ array_overlap_test(nooverlap_target, 0); ++ ++ const Register from = A0; // source array address ++ const Register to = A1; // destination array address ++ const Register count = A2; // elements count ++ const Register end_from = T3; // source array end address ++ const Register end_to = T0; // destination array end address ++ const Register end_count = T1; // destination array end address ++ ++ __ push(end_from); ++ __ push(end_to); ++ __ push(end_count); ++ __ push(T8); ++ ++ // copy from high to low ++ __ move(end_count, count); ++ __ daddu(end_from, from, end_count); ++ __ daddu(end_to, to, end_count); ++ ++ // If end_from and end_to has differante alignment, unaligned copy is performed. ++ __ andi(AT, end_from, 3); ++ __ andi(T8, end_to, 3); ++ __ bne(AT, T8, l_copy_byte); ++ __ delayed()->nop(); ++ ++ // First deal with the unaligned data at the top. ++ __ bind(l_unaligned); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_from, 3); ++ __ bne(AT, R0, l_from_unaligned); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_to, 3); ++ __ beq(AT, R0, l_4_bytes_aligned); ++ __ delayed()->nop(); ++ ++ __ bind(l_from_unaligned); ++ __ lb(AT, end_from, -1); ++ __ sb(AT, end_to, -1); ++ __ daddiu(end_from, end_from, -1); ++ __ daddiu(end_to, end_to, -1); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_unaligned); ++ __ delayed()->nop(); ++ ++ // now end_to, end_from point to 4-byte aligned high-ends ++ // end_count contains byte count that is not copied. ++ // copy 4 bytes at a time ++ __ bind(l_4_bytes_aligned); ++ ++ __ move(T8, end_count); ++ __ daddiu(AT, end_count, -3); ++ __ blez(AT, l_copy_suffix); ++ __ delayed()->nop(); ++ ++ //__ andi(T8, T8, 3); ++ __ lea(end_from, Address(end_from, -4)); ++ __ lea(end_to, Address(end_to, -4)); ++ ++ __ dsrl(end_count, end_count, 2); ++ __ align(16); ++ __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes ++ __ lw(AT, end_from, 0); ++ __ sw(AT, end_to, 0); ++ __ addiu(end_from, end_from, -4); ++ __ addiu(end_to, end_to, -4); ++ __ addiu(end_count, end_count, -1); ++ __ bne(end_count, R0, l_copy_4_bytes_loop); ++ __ delayed()->nop(); ++ ++ __ b(l_copy_suffix); ++ __ delayed()->nop(); ++ // copy dwords aligned or not with repeat move ++ // l_copy_suffix ++ // copy suffix (0-3 bytes) ++ __ bind(l_copy_suffix); ++ __ andi(T8, T8, 3); ++ __ beq(T8, R0, l_exit); ++ __ delayed()->nop(); ++ __ addiu(end_from, end_from, 3); ++ __ addiu(end_to, end_to, 3); ++ __ bind(l_copy_suffix_loop); ++ __ lb(AT, end_from, 0); ++ __ sb(AT, end_to, 0); ++ __ addiu(end_from, end_from, -1); ++ __ addiu(end_to, end_to, -1); ++ __ addiu(T8, T8, -1); ++ __ bne(T8, R0, l_copy_suffix_loop); ++ __ delayed()->nop(); ++ ++ __ bind(l_copy_byte); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ __ lb(AT, end_from, -1); ++ __ sb(AT, end_to, -1); ++ __ daddiu(end_from, end_from, -1); ++ __ daddiu(end_to, end_to, -1); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_copy_byte); ++ __ delayed()->nop(); ++ ++ __ bind(l_exit); ++ __ pop(T8); ++ __ pop(end_count); ++ __ pop(end_to); ++ __ pop(end_from); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Generate stub for disjoint short copy. If "aligned" is true, the ++ // "from" and "to" addresses are assumed to be heapword aligned. ++ // ++ // Arguments for generated stub: ++ // from: A0 ++ // to: A1 ++ // elm.count: A2 treated as signed ++ // one element: 2 bytes ++ // ++ // Strategy for aligned==true: ++ // ++ // If length <= 9: ++ // 1. copy 1 elements at a time (l_5) ++ // ++ // If length > 9: ++ // 1. copy 4 elements at a time until less than 4 elements are left (l_7) ++ // 2. copy 2 elements at a time until less than 2 elements are left (l_6) ++ // 3. copy last element if one was left in step 2. (l_1) ++ // ++ // ++ // Strategy for aligned==false: ++ // ++ // If length <= 9: same as aligned==true case ++ // ++ // If length > 9: ++ // 1. continue with step 7. if the alignment of from and to mod 4 ++ // is different. ++ // 2. align from and to to 4 bytes by copying 1 element if necessary ++ // 3. at l_2 from and to are 4 byte aligned; continue with ++ // 6. if they cannot be aligned to 8 bytes because they have ++ // got different alignment mod 8. ++ // 4. at this point we know that both, from and to, have the same ++ // alignment mod 8, now copy one element if necessary to get ++ // 8 byte alignment of from and to. ++ // 5. copy 4 elements at a time until less than 4 elements are ++ // left; depending on step 3. all load/stores are aligned. ++ // 6. copy 2 elements at a time until less than 2 elements are ++ // left. (l_6) ++ // 7. copy 1 element at a time. (l_5) ++ // 8. copy last element if one was left in step 6. (l_1) ++ ++ address generate_disjoint_short_copy(bool aligned, const char * name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ ++ Register tmp1 = T0; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T8; ++ Register tmp5 = T9; ++ Register tmp6 = T2; ++ ++ address start = __ pc(); ++ ++ __ push(tmp1); ++ __ push(tmp2); ++ __ push(tmp3); ++ __ move(tmp1, A0); ++ __ move(tmp2, A1); ++ __ move(tmp3, A2); ++ ++ Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11, l_12, l_13, l_14; ++ Label l_debug; ++ // don't try anything fancy if arrays don't have many elements ++ __ daddiu(AT, tmp3, -23); ++ __ blez(AT, l_14); ++ __ delayed()->nop(); ++ // move push here ++ __ push(tmp4); ++ __ push(tmp5); ++ __ push(tmp6); ++ ++ if (!aligned) { ++ __ xorr(AT, A0, A1); ++ __ andi(AT, AT, 1); ++ __ bne(AT, R0, l_debug); // if arrays don't have the same alignment mod 2, can this happen? ++ __ delayed()->nop(); ++ ++ __ xorr(AT, A0, A1); ++ __ andi(AT, AT, 3); ++ __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 1 element copy ++ __ delayed()->nop(); ++ ++ // At this point it is guaranteed that both, from and to have the same alignment mod 4. ++ ++ // Copy 1 element if necessary to align to 4 bytes. ++ __ andi(AT, A0, 3); ++ __ beq(AT, R0, l_2); ++ __ delayed()->nop(); ++ ++ __ lhu(AT, tmp1, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(tmp3, tmp3, -1); ++ __ bind(l_2); ++ ++ // At this point the positions of both, from and to, are at least 4 byte aligned. ++ ++ // Copy 4 elements at a time. ++ // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 7); ++ __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned ++ __ delayed()->nop(); ++ ++ // Copy a 2-element word if necessary to align to 8 bytes. ++ __ andi(AT, tmp1, 7); ++ __ beq(AT, R0, l_7); ++ __ delayed()->nop(); ++ ++ __ lw(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -2); ++ __ sw(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 4); ++ __ daddiu(tmp2, tmp2, 4); ++ }// end of if (!aligned) ++ ++ __ bind(l_7); ++ // At this time the position of both, from and to, are at least 8 byte aligned. ++ // Copy 8 elemnets at a time. ++ // Align to 16 bytes, but only if both from and to have same alignment mod 8. ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 15); ++ __ bne(AT, R0, l_9); ++ __ delayed()->nop(); ++ ++ // Copy 4-element word if necessary to align to 16 bytes, ++ __ andi(AT, tmp1, 15); ++ __ beq(AT, R0, l_10); ++ __ delayed()->nop(); ++ ++ __ ld(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -4); ++ __ sd(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 8); ++ __ daddiu(tmp2, tmp2, 8); ++ ++ __ bind(l_10); ++ ++ // Copy 8 elements at a time; either the loads or the stores can ++ // be unalligned if aligned == false ++ ++ { // FasterArrayCopy ++ __ bind(l_11); ++ // For loongson the 128-bit memory access instruction is gslq/gssq ++ if (UseLEXT1) { ++ __ gslq(AT, tmp4, tmp1, 0); ++ __ gslq(tmp5, tmp6, tmp1, 16); ++ __ daddiu(tmp1, tmp1, 32); ++ __ daddiu(tmp2, tmp2, 32); ++ __ gssq(AT, tmp4, tmp2, -32); ++ __ gssq(tmp5, tmp6, tmp2, -16); ++ } else { ++ __ ld(AT, tmp1, 0); ++ __ ld(tmp4, tmp1, 8); ++ __ ld(tmp5, tmp1, 16); ++ __ ld(tmp6, tmp1, 24); ++ __ daddiu(tmp1, tmp1, 32); ++ __ sd(AT, tmp2, 0); ++ __ sd(tmp4, tmp2, 8); ++ __ sd(tmp5, tmp2, 16); ++ __ sd(tmp6, tmp2, 24); ++ __ daddiu(tmp2, tmp2, 32); ++ } ++ __ daddiu(tmp3, tmp3, -16); ++ __ daddiu(AT, tmp3, -16); ++ __ bgez(AT, l_11); ++ __ delayed()->nop(); ++ } ++ __ bind(l_9); ++ ++ // Copy 4 elements at a time; either the loads or the stores can ++ // be unaligned if aligned == false. ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -15);// loop unrolling 4 times, so if the elements should not be less than 16 ++ __ blez(AT, l_4); // copy 2 at a time if less than 16 elements remain ++ __ delayed()->nop(); ++ ++ __ bind(l_8); ++ __ ld(AT, tmp1, 0); ++ __ ld(tmp4, tmp1, 8); ++ __ ld(tmp5, tmp1, 16); ++ __ ld(tmp6, tmp1, 24); ++ __ sd(AT, tmp2, 0); ++ __ sd(tmp4, tmp2, 8); ++ __ sd(tmp5, tmp2,16); ++ __ daddiu(tmp1, tmp1, 32); ++ __ daddiu(tmp2, tmp2, 32); ++ __ daddiu(tmp3, tmp3, -16); ++ __ daddiu(AT, tmp3, -16); ++ __ bgez(AT, l_8); ++ __ delayed()->sd(tmp6, tmp2, -8); ++ } ++ __ bind(l_6); ++ ++ // copy 2 element at a time ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -7); ++ __ blez(AT, l_4); ++ __ delayed()->nop(); ++ ++ __ bind(l_3); ++ __ lw(AT, tmp1, 0); ++ __ lw(tmp4, tmp1, 4); ++ __ lw(tmp5, tmp1, 8); ++ __ lw(tmp6, tmp1, 12); ++ __ sw(AT, tmp2, 0); ++ __ sw(tmp4, tmp2, 4); ++ __ sw(tmp5, tmp2, 8); ++ __ daddiu(tmp1, tmp1, 16); ++ __ daddiu(tmp2, tmp2, 16); ++ __ daddiu(tmp3, tmp3, -8); ++ __ daddiu(AT, tmp3, -8); ++ __ bgez(AT, l_3); ++ __ delayed()->sw(tmp6, tmp2, -4); ++ } ++ ++ __ bind(l_1); ++ // do single element copy (8 bit), can this happen? ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -3); ++ __ blez(AT, l_4); ++ __ delayed()->nop(); ++ ++ __ bind(l_5); ++ __ lhu(AT, tmp1, 0); ++ __ lhu(tmp4, tmp1, 2); ++ __ lhu(tmp5, tmp1, 4); ++ __ lhu(tmp6, tmp1, 6); ++ __ sh(AT, tmp2, 0); ++ __ sh(tmp4, tmp2, 2); ++ __ sh(tmp5, tmp2, 4); ++ __ daddiu(tmp1, tmp1, 8); ++ __ daddiu(tmp2, tmp2, 8); ++ __ daddiu(tmp3, tmp3, -4); ++ __ daddiu(AT, tmp3, -4); ++ __ bgez(AT, l_5); ++ __ delayed()->sh(tmp6, tmp2, -2); ++ } ++ // single element ++ __ bind(l_4); ++ ++ __ pop(tmp6); ++ __ pop(tmp5); ++ __ pop(tmp4); ++ ++ __ bind(l_14); ++ { // FasterArrayCopy ++ __ beq(R0, tmp3, l_13); ++ __ delayed()->nop(); ++ ++ __ bind(l_12); ++ __ lhu(AT, tmp1, 0); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(tmp3, tmp3, -1); ++ __ daddiu(AT, tmp3, -1); ++ __ bgez(AT, l_12); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(l_13); ++ __ pop(tmp3); ++ __ pop(tmp2); ++ __ pop(tmp1); ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ __ bind(l_debug); ++ __ stop("generate_disjoint_short_copy should not reach here"); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we ++ // let the hardware handle it. The two or four words within dwords ++ // or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_short_copy(bool aligned, const char *name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ Label l_exit, l_copy_short, l_from_unaligned, l_unaligned, l_4_bytes_aligned; ++ ++ address nooverlap_target = aligned ? ++ StubRoutines::arrayof_jshort_disjoint_arraycopy() : ++ StubRoutines::jshort_disjoint_arraycopy(); ++ ++ array_overlap_test(nooverlap_target, 1); ++ ++ const Register from = A0; // source array address ++ const Register to = A1; // destination array address ++ const Register count = A2; // elements count ++ const Register end_from = T3; // source array end address ++ const Register end_to = T0; // destination array end address ++ const Register end_count = T1; // destination array end address ++ ++ __ push(end_from); ++ __ push(end_to); ++ __ push(end_count); ++ __ push(T8); ++ ++ // copy from high to low ++ __ move(end_count, count); ++ __ sll(AT, end_count, Address::times_2); ++ __ daddu(end_from, from, AT); ++ __ daddu(end_to, to, AT); ++ ++ // If end_from and end_to has differante alignment, unaligned copy is performed. ++ __ andi(AT, end_from, 3); ++ __ andi(T8, end_to, 3); ++ __ bne(AT, T8, l_copy_short); ++ __ delayed()->nop(); ++ ++ // First deal with the unaligned data at the top. ++ __ bind(l_unaligned); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_from, 3); ++ __ bne(AT, R0, l_from_unaligned); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_to, 3); ++ __ beq(AT, R0, l_4_bytes_aligned); ++ __ delayed()->nop(); ++ ++ // Copy 1 element if necessary to align to 4 bytes. ++ __ bind(l_from_unaligned); ++ __ lhu(AT, end_from, -2); ++ __ sh(AT, end_to, -2); ++ __ daddiu(end_from, end_from, -2); ++ __ daddiu(end_to, end_to, -2); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_unaligned); ++ __ delayed()->nop(); ++ ++ // now end_to, end_from point to 4-byte aligned high-ends ++ // end_count contains byte count that is not copied. ++ // copy 4 bytes at a time ++ __ bind(l_4_bytes_aligned); ++ ++ __ daddiu(AT, end_count, -1); ++ __ blez(AT, l_copy_short); ++ __ delayed()->nop(); ++ ++ __ lw(AT, end_from, -4); ++ __ sw(AT, end_to, -4); ++ __ addiu(end_from, end_from, -4); ++ __ addiu(end_to, end_to, -4); ++ __ addiu(end_count, end_count, -2); ++ __ b(l_4_bytes_aligned); ++ __ delayed()->nop(); ++ ++ // copy 1 element at a time ++ __ bind(l_copy_short); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ __ lhu(AT, end_from, -2); ++ __ sh(AT, end_to, -2); ++ __ daddiu(end_from, end_from, -2); ++ __ daddiu(end_to, end_to, -2); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_copy_short); ++ __ delayed()->nop(); ++ ++ __ bind(l_exit); ++ __ pop(T8); ++ __ pop(end_count); ++ __ pop(end_to); ++ __ pop(end_from); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_3, l_4, l_5, l_6, l_7; ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ if (is_oop) { ++ gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized); ++ } ++ ++ if(!aligned) { ++ __ xorr(AT, T3, T0); ++ __ andi(AT, AT, 7); ++ __ bne(AT, R0, l_5); // not same alignment mod 8 -> copy 1 element each time ++ __ delayed()->nop(); ++ ++ __ andi(AT, T3, 7); ++ __ beq(AT, R0, l_6); //copy 2 elements each time ++ __ delayed()->nop(); ++ ++ __ lw(AT, T3, 0); ++ __ daddiu(T1, T1, -1); ++ __ sw(AT, T0, 0); ++ __ daddiu(T3, T3, 4); ++ __ daddiu(T0, T0, 4); ++ } ++ ++ { ++ __ bind(l_6); ++ __ daddiu(AT, T1, -1); ++ __ blez(AT, l_5); ++ __ delayed()->nop(); ++ ++ __ bind(l_7); ++ __ ld(AT, T3, 0); ++ __ sd(AT, T0, 0); ++ __ daddiu(T3, T3, 8); ++ __ daddiu(T0, T0, 8); ++ __ daddiu(T1, T1, -2); ++ __ daddiu(AT, T1, -2); ++ __ bgez(AT, l_7); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(l_5); ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_3); ++ __ lw(AT, T3, 0); ++ __ sw(AT, T0, 0); ++ __ addiu(T3, T3, 4); ++ __ addiu(T0, T0, 4); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_3); ++ __ delayed()->nop(); ++ ++ // exit ++ __ bind(l_4); ++ if (is_oop) { ++ gen_write_ref_array_post_barrier(A1, A2, T1); ++ } ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_2, l_4; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ address nooverlap_target; ++ ++ if (is_oop) { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_oop_disjoint_arraycopy() : ++ StubRoutines::oop_disjoint_arraycopy(); ++ } else { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_jint_disjoint_arraycopy() : ++ StubRoutines::jint_disjoint_arraycopy(); ++ } ++ ++ array_overlap_test(nooverlap_target, 2); ++ ++ if (is_oop) { ++ gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized); ++ } ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ // T3: source array address ++ // T0: destination array address ++ // T1: element count ++ ++ __ sll(AT, T1, Address::times_4); ++ __ addu(AT, T3, AT); ++ __ daddiu(T3, AT, -4); ++ __ sll(AT, T1, Address::times_4); ++ __ addu(AT, T0, AT); ++ __ daddiu(T0, AT, -4); ++ ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_2); ++ __ lw(AT, T3, 0); ++ __ sw(AT, T0, 0); ++ __ addiu(T3, T3, -4); ++ __ addiu(T0, T0, -4); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_2); ++ __ delayed()->nop(); ++ ++ __ bind(l_4); ++ if (is_oop) { ++ gen_write_ref_array_post_barrier(A1, A2, T1); ++ } ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_3, l_4; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ if (is_oop) { ++ gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized); ++ } ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ // T3: source array address ++ // T0: destination array address ++ // T1: element count ++ ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_3); ++ __ ld(AT, T3, 0); ++ __ sd(AT, T0, 0); ++ __ addiu(T3, T3, 8); ++ __ addiu(T0, T0, 8); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_3); ++ __ delayed()->nop(); ++ ++ // exit ++ __ bind(l_4); ++ if (is_oop) { ++ gen_write_ref_array_post_barrier(A1, A2, T1); ++ } ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_2, l_4; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ address nooverlap_target; ++ ++ if (is_oop) { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_oop_disjoint_arraycopy() : ++ StubRoutines::oop_disjoint_arraycopy(); ++ } else { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_jlong_disjoint_arraycopy() : ++ StubRoutines::jlong_disjoint_arraycopy(); ++ } ++ ++ array_overlap_test(nooverlap_target, 3); ++ ++ if (is_oop) { ++ gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized); ++ } ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T3, AT); ++ __ daddiu(T3, AT, -8); ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T0, AT); ++ __ daddiu(T0, AT, -8); ++ ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_2); ++ __ ld(AT, T3, 0); ++ __ sd(AT, T0, 0); ++ __ addiu(T3, T3, -8); ++ __ addiu(T0, T0, -8); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_2); ++ __ delayed()->nop(); ++ ++ // exit ++ __ bind(l_4); ++ if (is_oop) { ++ gen_write_ref_array_post_barrier(A1, A2, T1); ++ } ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ //FIXME ++ address generate_disjoint_long_copy(bool aligned, const char *name) { ++ Label l_1, l_2; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ b(l_2); ++ __ delayed()->nop(); ++ __ align(16); ++ __ bind(l_1); ++ __ ld(AT, T3, 0); ++ __ sd (AT, T0, 0); ++ __ addiu(T3, T3, 8); ++ __ addiu(T0, T0, 8); ++ __ bind(l_2); ++ __ addiu(T1, T1, -1); ++ __ bgez(T1, l_1); ++ __ delayed()->nop(); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ ++ address generate_conjoint_long_copy(bool aligned, const char *name) { ++ Label l_1, l_2; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ address nooverlap_target = aligned ? ++ StubRoutines::arrayof_jlong_disjoint_arraycopy() : ++ StubRoutines::jlong_disjoint_arraycopy(); ++ array_overlap_test(nooverlap_target, 3); ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T3, AT); ++ __ daddiu(T3, AT, -8); ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T0, AT); ++ __ daddiu(T0, AT, -8); ++ ++ __ b(l_2); ++ __ delayed()->nop(); ++ __ align(16); ++ __ bind(l_1); ++ __ ld(AT, T3, 0); ++ __ sd (AT, T0, 0); ++ __ addiu(T3, T3, -8); ++ __ addiu(T0, T0,-8); ++ __ bind(l_2); ++ __ addiu(T1, T1, -1); ++ __ bgez(T1, l_1); ++ __ delayed()->nop(); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ void generate_arraycopy_stubs() { ++ if (UseCompressedOops) { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, ++ "oop_disjoint_arraycopy"); ++ StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, ++ "oop_arraycopy"); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, ++ "oop_disjoint_arraycopy_uninit", true); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, ++ "oop_arraycopy_uninit", true); ++ } else { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, ++ "oop_disjoint_arraycopy"); ++ StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, ++ "oop_arraycopy"); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, ++ "oop_disjoint_arraycopy_uninit", true); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, ++ "oop_arraycopy_uninit", true); ++ } ++ ++ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy"); ++ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); ++ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy"); ++ StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy"); ++ ++ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy"); ++ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); ++ StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy"); ++ StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy"); ++ ++ // We don't generate specialized code for HeapWord-aligned source ++ // arrays, so just use the code we've already generated ++ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy; ++ StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy; ++ ++ StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy; ++ StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy; ++ ++ StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; ++ StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; ++ ++ StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; ++ StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; ++ StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; ++ StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; ++ } ++ ++ // add a function to implement SafeFetch32 and SafeFetchN ++ void generate_safefetch(const char* name, int size, address* entry, ++ address* fault_pc, address* continuation_pc) { ++ // safefetch signatures: ++ // int SafeFetch32(int* adr, int errValue); ++ // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); ++ // ++ // arguments: ++ // A0 = adr ++ // A1 = errValue ++ // ++ // result: ++ // PPC_RET = *adr or errValue ++ ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ // Entry point, pc or function descriptor. ++ *entry = __ pc(); ++ ++ // Load *adr into A1, may fault. ++ *fault_pc = __ pc(); ++ switch (size) { ++ case 4: ++ // int32_t ++ __ lw(A1, A0, 0); ++ break; ++ case 8: ++ // int64_t ++ __ ld(A1, A0, 0); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ // return errValue or *adr ++ *continuation_pc = __ pc(); ++ __ addu(V0,A1,R0); ++ __ jr(RA); ++ __ delayed()->nop(); ++ } ++ ++ ++#undef __ ++#define __ masm-> ++ ++ // Continuation point for throwing of implicit exceptions that are ++ // not handled in the current activation. Fabricates an exception ++ // oop and initiates normal exception dispatching in this ++ // frame. Since we need to preserve callee-saved values (currently ++ // only for C2, but done for C1 as well) we need a callee-saved oop ++ // map and therefore have to make these stubs into RuntimeStubs ++ // rather than BufferBlobs. If the compiler needs all registers to ++ // be preserved between the fault point and the exception handler ++ // then it must assume responsibility for that in ++ // AbstractCompiler::continuation_for_implicit_null_exception or ++ // continuation_for_implicit_division_by_zero_exception. All other ++ // implicit exceptions (e.g., NullPointerException or ++ // AbstractMethodError on entry) are either at call sites or ++ // otherwise assume that stack unwinding will be initiated, so ++ // caller saved registers were assumed volatile in the compiler. ++ address generate_throw_exception(const char* name, ++ address runtime_entry, ++ bool restore_saved_exception_pc) { ++ // Information about frame layout at time of blocking runtime call. ++ // Note that we only have to preserve callee-saved registers since ++ // the compilers are responsible for supplying a continuation point ++ // if they expect all registers to be preserved. ++ enum layout { ++ thread_off, // last_java_sp ++ S7_off, // callee saved register sp + 1 ++ S6_off, // callee saved register sp + 2 ++ S5_off, // callee saved register sp + 3 ++ S4_off, // callee saved register sp + 4 ++ S3_off, // callee saved register sp + 5 ++ S2_off, // callee saved register sp + 6 ++ S1_off, // callee saved register sp + 7 ++ S0_off, // callee saved register sp + 8 ++ FP_off, ++ ret_address, ++ framesize ++ }; ++ ++ int insts_size = 2048; ++ int locs_size = 32; ++ ++ // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false, ++ // NULL, NULL, NULL, false, NULL, name, false); ++ CodeBuffer code (name , insts_size, locs_size); ++ OopMapSet* oop_maps = new OopMapSet(); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ ++ address start = __ pc(); ++ ++ // This is an inlined and slightly modified version of call_VM ++ // which has the ability to fetch the return PC out of ++ // thread-local storage and also sets up last_Java_sp slightly ++ // differently than the real call_VM ++#ifndef OPT_THREAD ++ Register java_thread = TREG; ++ __ get_thread(java_thread); ++#else ++ Register java_thread = TREG; ++#endif ++ if (restore_saved_exception_pc) { ++ __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); ++ } ++ ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ addiu(SP, SP, (-1) * (framesize-2) * wordSize); // prolog ++ __ sd(S0, SP, S0_off * wordSize); ++ __ sd(S1, SP, S1_off * wordSize); ++ __ sd(S2, SP, S2_off * wordSize); ++ __ sd(S3, SP, S3_off * wordSize); ++ __ sd(S4, SP, S4_off * wordSize); ++ __ sd(S5, SP, S5_off * wordSize); ++ __ sd(S6, SP, S6_off * wordSize); ++ __ sd(S7, SP, S7_off * wordSize); ++ ++ int frame_complete = __ pc() - start; ++ // push java thread (becomes first argument of C function) ++ __ sd(java_thread, SP, thread_off * wordSize); ++ if (java_thread != A0) ++ __ move(A0, java_thread); ++ ++ // Set up last_Java_sp and last_Java_fp ++ __ set_last_Java_frame(java_thread, SP, FP, NULL); ++ // Align stack ++ __ set64(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ ++ // Call runtime ++ __ call(runtime_entry); ++ __ delayed()->nop(); ++ // Generate oop map ++ OopMap* map = new OopMap(framesize, 0); ++ oop_maps->add_gc_map(__ offset(), map); ++ ++ // restore the thread (cannot use the pushed argument since arguments ++ // may be overwritten by C code generated by an optimizing compiler); ++ // however can use the register value directly if it is callee saved. ++#ifndef OPT_THREAD ++ __ get_thread(java_thread); ++#endif ++ ++ __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ reset_last_Java_frame(java_thread, true); ++ ++ // Restore callee save registers. This must be done after resetting the Java frame ++ __ ld(S0, SP, S0_off * wordSize); ++ __ ld(S1, SP, S1_off * wordSize); ++ __ ld(S2, SP, S2_off * wordSize); ++ __ ld(S3, SP, S3_off * wordSize); ++ __ ld(S4, SP, S4_off * wordSize); ++ __ ld(S5, SP, S5_off * wordSize); ++ __ ld(S6, SP, S6_off * wordSize); ++ __ ld(S7, SP, S7_off * wordSize); ++ ++ // discard arguments ++ __ move(SP, FP); // epilog ++ __ pop(FP); ++ // check for pending exceptions ++#ifdef ASSERT ++ Label L; ++ __ ld(AT, java_thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ should_not_reach_here(); ++ __ bind(L); ++#endif //ASSERT ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, ++ &code, ++ frame_complete, ++ framesize, ++ oop_maps, false); ++ return stub->entry_point(); ++ } ++ ++ // Initialization ++ void generate_initial() { ++ // Generates all stubs and initializes the entry points ++ ++ //------------------------------------------------------------- ++ //----------------------------------------------------------- ++ // entry points that exist in all platforms ++ // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller ++ // than the disadvantage of having a much more complicated generator structure. ++ // See also comment in stubRoutines.hpp. ++ StubRoutines::_forward_exception_entry = generate_forward_exception(); ++ StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); ++ // is referenced by megamorphic call ++ StubRoutines::_catch_exception_entry = generate_catch_exception(); ++ ++ StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access(); ++ ++ StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false); ++ } ++ ++ void generate_all() { ++ // Generates all stubs and initializes the entry points ++ ++ // These entry points require SharedInfo::stack0 to be set up in ++ // non-core builds and need to be relocatable, so they each ++ // fabricate a RuntimeStub internally. ++ StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); ++ ++ StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false); ++ ++ StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); ++ ++ // entry points that are platform specific ++ ++ // support for verify_oop (must happen after universe_init) ++ StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); ++#ifndef CORE ++ // arraycopy stubs used by compilers ++ generate_arraycopy_stubs(); ++#endif ++ ++ // Safefetch stubs. ++ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, ++ &StubRoutines::_safefetch32_fault_pc, ++ &StubRoutines::_safefetch32_continuation_pc); ++ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, ++ &StubRoutines::_safefetchN_fault_pc, ++ &StubRoutines::_safefetchN_continuation_pc); ++ ++ if (UseMontgomeryMultiplyIntrinsic) { ++ StubRoutines::_montgomeryMultiply ++ = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); ++ } ++ if (UseMontgomerySquareIntrinsic) { ++ StubRoutines::_montgomerySquare ++ = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square); ++ } ++ } ++ ++ public: ++ StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { ++ if (all) { ++ generate_all(); ++ } else { ++ generate_initial(); ++ } ++ } ++}; // end class declaration ++ ++void StubGenerator_generate(CodeBuffer* code, bool all) { ++ StubGenerator g(code, all); ++} +diff --git a/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.cpp b/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.cpp +new file mode 100644 +index 0000000000..733a48b889 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.cpp +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++ ++// a description of how to extend it, see the stubRoutines.hpp file. ++ ++//find the last fp value ++address StubRoutines::gs2::_call_stub_compiled_return = NULL; +diff --git a/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.hpp b/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.hpp +new file mode 100644 +index 0000000000..920c08844e +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.hpp +@@ -0,0 +1,59 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP ++#define CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP ++ ++// This file holds the platform specific parts of the StubRoutines ++// definition. See stubRoutines.hpp for a description on how to ++// extend it. ++ ++static bool returns_to_call_stub(address return_pc){ ++ return return_pc == _call_stub_return_address||return_pc == gs2::get_call_stub_compiled_return(); ++} ++ ++enum platform_dependent_constants { ++ code_size1 = 20000, // simply increase if too small (assembler will crash if too small) ++ code_size2 = 40000 // simply increase if too small (assembler will crash if too small) ++}; ++ ++class gs2 { ++ friend class StubGenerator; ++ friend class VMStructs; ++ private: ++ // If we call compiled code directly from the call stub we will ++ // need to adjust the return back to the call stub to a specialized ++ // piece of code that can handle compiled results and cleaning the fpu ++ // stack. The variable holds that location. ++ static address _call_stub_compiled_return; ++ ++public: ++ // Call back points for traps in compiled code ++ static address get_call_stub_compiled_return() { return _call_stub_compiled_return; } ++ static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; } ++ ++}; ++ ++#endif // CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP +diff --git a/hotspot/src/cpu/mips/vm/templateInterpreterGenerator_mips.hpp b/hotspot/src/cpu/mips/vm/templateInterpreterGenerator_mips.hpp +new file mode 100644 +index 0000000000..a83c3728f8 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/templateInterpreterGenerator_mips.hpp +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_TEMPLATEINTERPRETERGENERATOR_MIPS_HPP ++#define CPU_MIPS_VM_TEMPLATEINTERPRETERGENERATOR_MIPS_HPP ++ ++ protected: ++ ++ void generate_fixed_frame(bool native_call); ++ ++ // address generate_asm_interpreter_entry(bool synchronized); ++ ++#endif // CPU_MIPS_VM_TEMPLATEINTERPRETERGENERATOR_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/templateInterpreter_mips.hpp b/hotspot/src/cpu/mips/vm/templateInterpreter_mips.hpp +new file mode 100644 +index 0000000000..204f1b2f21 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/templateInterpreter_mips.hpp +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_TEMPLATEINTERPRETER_MIPS_HPP ++#define CPU_MIPS_VM_TEMPLATEINTERPRETER_MIPS_HPP ++ ++ ++ protected: ++ ++ // Size of interpreter code. Increase if too small. Interpreter will ++ // fail with a guarantee ("not enough space for interpreter generation"); ++ // if too small. ++ // Run with +PrintInterpreter to get the VM to print out the size. ++ // Max size with JVMTI ++ // The sethi() instruction generates lots more instructions when shell ++ // stack limit is unlimited, so that's why this is much bigger. ++ const static int InterpreterCodeSize = 500 * K; ++ ++#endif // CPU_MIPS_VM_TEMPLATEINTERPRETER_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/templateInterpreter_mips_64.cpp b/hotspot/src/cpu/mips/vm/templateInterpreter_mips_64.cpp +new file mode 100644 +index 0000000000..0cc5d33070 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/templateInterpreter_mips_64.cpp +@@ -0,0 +1,2306 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/bytecodeHistogram.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterGenerator.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/templateTable.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "runtime/timer.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/debug.hpp" ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#ifndef CC_INTERP ++ ++// asm based interpreter deoptimization helpers ++int AbstractInterpreter::size_activation(int max_stack, ++ int temps, ++ int extra_args, ++ int monitors, ++ int callee_params, ++ int callee_locals, ++ bool is_top_frame) { ++ // Note: This calculation must exactly parallel the frame setup ++ // in AbstractInterpreterGenerator::generate_method_entry. ++ ++ // fixed size of an interpreter frame: ++ int overhead = frame::sender_sp_offset - ++ frame::interpreter_frame_initial_sp_offset; ++ // Our locals were accounted for by the caller (or last_frame_adjust ++ // on the transistion) Since the callee parameters already account ++ // for the callee's params we only need to account for the extra ++ // locals. ++ int size = overhead + ++ (callee_locals - callee_params)*Interpreter::stackElementWords + ++ monitors * frame::interpreter_frame_monitor_size() + ++ temps* Interpreter::stackElementWords + extra_args; ++ ++ return size; ++} ++ ++ ++const int Interpreter::return_sentinel = 0xfeedbeed; ++const int method_offset = frame::interpreter_frame_method_offset * wordSize; ++const int bci_offset = frame::interpreter_frame_bcx_offset * wordSize; ++const int locals_offset = frame::interpreter_frame_locals_offset * wordSize; ++ ++//----------------------------------------------------------------------------- ++ ++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { ++ address entry = __ pc(); ++ ++#ifdef ASSERT ++ { ++ Label L; ++ __ addiu(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ subu(T1, T1, SP); // T1 = maximal sp for current fp ++ __ bgez(T1, L); // check if frame is complete ++ __ delayed()->nop(); ++ __ stop("interpreter frame not set up"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ // Restore bcp under the assumption that the current frame is still ++ // interpreted ++ // FIXME: please change the func restore_bcp ++ // S0 is the conventional register for bcp ++ __ restore_bcp(); ++ ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // throw exception ++ // FIXME: why do not pass parameter thread ? ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler( ++ const char* name) { ++ address entry = __ pc(); ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ __ li(A1, (long)name); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ClassCastException_handler() { ++ address entry = __ pc(); ++ ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ __ empty_FPU_stack(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException), FSR); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_exception_handler_common( ++ const char* name, const char* message, bool pass_oop) { ++ assert(!pass_oop || message == NULL, "either oop or message but not both"); ++ address entry = __ pc(); ++ ++ // expression stack must be empty before entering the VM if an exception happened ++ __ empty_expression_stack(); ++ // setup parameters ++ __ li(A1, (long)name); ++ if (pass_oop) { ++ __ call_VM(V0, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR); ++ } else { ++ __ li(A2, (long)message); ++ __ call_VM(V0, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2); ++ } ++ // throw exception ++ __ jmp(Interpreter::throw_exception_entry(), relocInfo::none); ++ __ delayed()->nop(); ++ return entry; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_continuation_for(TosState state) { ++ address entry = __ pc(); ++ // NULL last_sp until next java call ++ __ sd(R0,Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); ++ __ dispatch_next(state); ++ return entry; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { ++ ++ address entry = __ pc(); ++ ++ // Restore stack bottom in case i2c adjusted stack ++ __ ld(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); ++ // and NULL it as marker that sp is now tos until next java call ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ __ restore_bcp(); ++ __ restore_locals(); ++ ++ // mdp: T8 ++ // ret: FSR ++ // tmp: T9 ++ if (state == atos) { ++ Register mdp = T8; ++ Register tmp = T9; ++ __ profile_return_type(mdp, FSR, tmp); ++ } ++ ++ ++ const Register cache = T9; ++ const Register index = T3; ++ __ get_cache_and_index_at_bcp(cache, index, 1, index_size); ++ ++ const Register flags = cache; ++ __ dsll(AT, index, Address::times_ptr); ++ __ daddu(AT, cache, AT); ++ __ lw(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask); ++ __ dsll(AT, flags, Interpreter::stackElementScale()); ++ __ daddu(SP, SP, AT); ++ ++ __ dispatch_next(state, step); ++ ++ return entry; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, ++ int step) { ++ address entry = __ pc(); ++ // NULL last_sp until next java call ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ restore_bcp(); ++ __ restore_locals(); ++ // handle exceptions ++ { ++ Label L; ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ __ dispatch_next(state, step); ++ return entry; ++} ++ ++int AbstractInterpreter::BasicType_as_index(BasicType type) { ++ int i = 0; ++ switch (type) { ++ case T_BOOLEAN: i = 0; break; ++ case T_CHAR : i = 1; break; ++ case T_BYTE : i = 2; break; ++ case T_SHORT : i = 3; break; ++ case T_INT : // fall through ++ case T_LONG : // fall through ++ case T_VOID : i = 4; break; ++ case T_FLOAT : i = 5; break; ++ case T_DOUBLE : i = 6; break; ++ case T_OBJECT : // fall through ++ case T_ARRAY : i = 7; break; ++ default : ShouldNotReachHere(); ++ } ++ assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, ++ "index out of bounds"); ++ return i; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_result_handler_for( ++ BasicType type) { ++ address entry = __ pc(); ++ switch (type) { ++ case T_BOOLEAN: __ c2bool(V0); break; ++ case T_CHAR : __ andi(V0, V0, 0xFFFF); break; ++ case T_BYTE : __ sign_extend_byte (V0); break; ++ case T_SHORT : __ sign_extend_short(V0); break; ++ case T_INT : /* nothing to do */ break; ++ case T_FLOAT : /* nothing to do */ break; ++ case T_DOUBLE : /* nothing to do */ break; ++ case T_OBJECT : ++ { ++ __ ld(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ verify_oop(V0); // and verify it ++ } ++ break; ++ default : ShouldNotReachHere(); ++ } ++ __ jr(RA); // return from result handler ++ __ delayed()->nop(); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_safept_entry_for( ++ TosState state, ++ address runtime_entry) { ++ address entry = __ pc(); ++ __ push(state); ++ __ call_VM(noreg, runtime_entry); ++ __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); ++ return entry; ++} ++ ++ ++ ++// Helpers for commoning out cases in the various type of method entries. ++// ++ ++ ++// increment invocation count & check for overflow ++// ++// Note: checking for negative value instead of overflow ++// so we have a 'sticky' overflow test ++// ++// Rmethod: method ++// T3 : invocation counter ++// ++void InterpreterGenerator::generate_counter_incr( ++ Label* overflow, ++ Label* profile_method, ++ Label* profile_method_continue) { ++ Label done; ++ if (TieredCompilation) { ++ int increment = InvocationCounter::count_increment; ++ int mask = ((1 << Tier0InvokeNotifyFreqLog) - 1) << InvocationCounter::count_shift; ++ Label no_mdo; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld(FSR, Address(Rmethod, Method::method_data_offset())); ++ __ beq(FSR, R0, no_mdo); ++ __ delayed()->nop(); ++ // Increment counter in the MDO ++ const Address mdo_invocation_counter(FSR, in_bytes(MethodData::invocation_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, T3, false, Assembler::zero, overflow); ++ __ beq(R0, R0, done); ++ __ delayed()->nop(); ++ } ++ __ bind(no_mdo); ++ // Increment counter in MethodCounters ++ const Address invocation_counter(FSR, ++ MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset()); ++ __ get_method_counters(Rmethod, FSR, done); ++ __ increment_mask_and_jump(invocation_counter, increment, mask, T3, false, Assembler::zero, overflow); ++ __ bind(done); ++ } else { ++ const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset()) ++ + in_bytes(InvocationCounter::counter_offset())); ++ const Address backedge_counter (FSR, in_bytes(MethodCounters::backedge_counter_offset()) ++ + in_bytes(InvocationCounter::counter_offset())); ++ ++ __ get_method_counters(Rmethod, FSR, done); ++ ++ if (ProfileInterpreter) { // %%% Merge this into methodDataOop ++ __ lw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); ++ __ incrementl(T9, 1); ++ __ sw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); ++ } ++ // Update standard invocation counters ++ __ lw(T3, invocation_counter); ++ __ increment(T3, InvocationCounter::count_increment); ++ __ sw(T3, invocation_counter); // save invocation count ++ ++ __ lw(FSR, backedge_counter); // load backedge counter ++ __ li(AT, InvocationCounter::count_mask_value); // mask out the status bits ++ __ andr(FSR, FSR, AT); ++ ++ __ daddu(T3, T3, FSR); // add both counters ++ ++ if (ProfileInterpreter && profile_method != NULL) { ++ // Test to see if we should create a method data oop ++ if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) { ++ __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T3, AT); ++ } ++ ++ __ bne_far(AT, R0, *profile_method_continue); ++ __ delayed()->nop(); ++ ++ // if no method data exists, go to profile_method ++ __ test_method_data_pointer(FSR, *profile_method); ++ } ++ ++ if (Assembler::is_simm16(CompileThreshold)) { ++ __ srl(AT, T3, InvocationCounter::count_shift); ++ __ slti(AT, AT, CompileThreshold); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T3, AT); ++ } ++ ++ __ beq_far(AT, R0, *overflow); ++ __ delayed()->nop(); ++ __ bind(done); ++ } ++} ++ ++void InterpreterGenerator::generate_counter_overflow(Label* do_continue) { ++ ++ // Asm interpreter on entry ++ // S7 - locals ++ // S0 - bcp ++ // Rmethod - method ++ // FP - interpreter frame ++ ++ // On return (i.e. jump to entry_point) ++ // Rmethod - method ++ // RA - return address of interpreter caller ++ // tos - the last parameter to Java method ++ // SP - sender_sp ++ ++ ++ // the bcp is valid if and only if it's not null ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), R0); ++ __ ld(Rmethod, FP, method_offset); ++ // Preserve invariant that S0/S7 contain bcp/locals of sender frame ++ __ b_far(*do_continue); ++ __ delayed()->nop(); ++} ++ ++// See if we've got enough room on the stack for locals plus overhead. ++// The expression stack grows down incrementally, so the normal guard ++// page mechanism will work for that. ++// ++// NOTE: Since the additional locals are also always pushed (wasn't ++// obvious in generate_method_entry) so the guard should work for them ++// too. ++// ++// Args: ++// T2: number of additional locals this frame needs (what we must check) ++// T0: Method* ++// ++void InterpreterGenerator::generate_stack_overflow_check(void) { ++ // see if we've got enough room on the stack for locals plus overhead. ++ // the expression stack grows down incrementally, so the normal guard ++ // page mechanism will work for that. ++ // ++ // Registers live on entry: ++ // ++ // T0: Method* ++ // T2: number of additional locals this frame needs (what we must check) ++ ++ // NOTE: since the additional locals are also always pushed (wasn't obvious in ++ // generate_method_entry) so the guard should work for them too. ++ // ++ ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++ // total overhead size: entry_size + (saved fp thru expr stack bottom). ++ // be sure to change this if you add/subtract anything to/from the overhead area ++ const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize) ++ + entry_size; ++ ++ const int page_size = os::vm_page_size(); ++ ++ Label after_frame_check; ++ ++ // see if the frame is greater than one page in size. If so, ++ // then we need to verify there is enough stack space remaining ++ // for the additional locals. ++ __ move(AT, (page_size - overhead_size) / Interpreter::stackElementSize); ++ __ slt(AT, AT, T2); ++ __ beq(AT, R0, after_frame_check); ++ __ delayed()->nop(); ++ ++ // compute sp as if this were going to be the last frame on ++ // the stack before the red zone ++#ifndef OPT_THREAD ++ Register thread = T1; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ++ // locals + overhead, in bytes ++ __ dsll(T3, T2, Interpreter::stackElementScale()); ++ __ daddiu(T3, T3, overhead_size); // locals * 4 + overhead_size --> T3 ++ ++#ifdef ASSERT ++ Label stack_base_okay, stack_size_okay; ++ // verify that thread stack base is non-zero ++ __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); ++ __ bne(AT, R0, stack_base_okay); ++ __ delayed()->nop(); ++ __ stop("stack base is zero"); ++ __ bind(stack_base_okay); ++ // verify that thread stack size is non-zero ++ __ ld(AT, thread, in_bytes(Thread::stack_size_offset())); ++ __ bne(AT, R0, stack_size_okay); ++ __ delayed()->nop(); ++ __ stop("stack size is zero"); ++ __ bind(stack_size_okay); ++#endif ++ ++ // Add stack base to locals and subtract stack size ++ __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT ++ __ daddu(T3, T3, AT); // locals * 4 + overhead_size + stack_base--> T3 ++ __ ld(AT, thread, in_bytes(Thread::stack_size_offset())); // stack_size --> AT ++ __ dsubu(T3, T3, AT); // locals * 4 + overhead_size + stack_base - stack_size --> T3 ++ ++ ++ // add in the redzone and yellow size ++ __ move(AT, (StackRedPages+StackYellowPages) * page_size); ++ __ addu(T3, T3, AT); ++ ++ // check against the current stack bottom ++ __ slt(AT, T3, SP); ++ __ bne(AT, R0, after_frame_check); ++ __ delayed()->nop(); ++ ++ // Note: the restored frame is not necessarily interpreted. ++ // Use the shared runtime version of the StackOverflowError. ++ __ move(SP, Rsender); ++ assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); ++ __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ ++ // all done with frame size check ++ __ bind(after_frame_check); ++} ++ ++// Allocate monitor and lock method (asm interpreter) ++// Rmethod - Method* ++void InterpreterGenerator::lock_method(void) { ++ // synchronize method ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++#ifdef ASSERT ++ { Label L; ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T0, T0, JVM_ACC_SYNCHRONIZED); ++ __ bne(T0, R0, L); ++ __ delayed()->nop(); ++ __ stop("method doesn't need synchronization"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ // get synchronization object ++ { ++ Label done; ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T2, T0, JVM_ACC_STATIC); ++ __ ld(T0, LVP, Interpreter::local_offset_in_bytes(0)); ++ __ beq(T2, R0, done); ++ __ delayed()->nop(); ++ __ ld(T0, Rmethod, in_bytes(Method::const_offset())); ++ __ ld(T0, T0, in_bytes(ConstMethod::constants_offset())); ++ __ ld(T0, T0, ConstantPool::pool_holder_offset_in_bytes()); ++ __ ld(T0, T0, mirror_offset); ++ __ bind(done); ++ } ++ // add space for monitor & lock ++ __ daddiu(SP, SP, (-1) * entry_size); // add space for a monitor entry ++ __ sd(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ // set new monitor block top ++ __ sd(T0, SP, BasicObjectLock::obj_offset_in_bytes()); // store object ++ // FIXME: I do not know what lock_object will do and what it will need ++ __ move(c_rarg0, SP); // object address ++ __ lock_object(c_rarg0); ++} ++ ++// Generate a fixed interpreter frame. This is identical setup for ++// interpreted methods and for native methods hence the shared code. ++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { ++ ++ // [ local var m-1 ] <--- sp ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- T0(sender's sp) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // initialize fixed part of activation frame ++ // sender's sp in Rsender ++ int i = 0; ++ int frame_size = 9; ++#ifndef CORE ++ ++frame_size; ++#endif ++ __ daddiu(SP, SP, (-frame_size) * wordSize); ++ __ sd(RA, SP, (frame_size - 1) * wordSize); // save return address ++ __ sd(FP, SP, (frame_size - 2) * wordSize); // save sender's fp ++ __ daddiu(FP, SP, (frame_size - 2) * wordSize); ++ __ sd(Rsender, FP, (-++i) * wordSize); // save sender's sp ++ __ sd(R0, FP,(-++i) * wordSize); //save last_sp as null ++ __ sd(LVP, FP, (-++i) * wordSize); // save locals offset ++ __ ld(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop ++ __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase ++ __ sd(Rmethod, FP, (-++i) * wordSize); // save Method* ++#ifndef CORE ++ if (ProfileInterpreter) { ++ Label method_data_continue; ++ __ ld(AT, Rmethod, in_bytes(Method::method_data_offset())); ++ __ beq(AT, R0, method_data_continue); ++ __ delayed()->nop(); ++ __ daddiu(AT, AT, in_bytes(MethodData::data_offset())); ++ __ bind(method_data_continue); ++ __ sd(AT, FP, (-++i) * wordSize); ++ } else { ++ __ sd(R0, FP, (-++i) * wordSize); ++ } ++#endif // !CORE ++ ++ __ ld(T2, Rmethod, in_bytes(Method::const_offset())); ++ __ ld(T2, T2, in_bytes(ConstMethod::constants_offset())); ++ __ ld(T2, T2, ConstantPool::cache_offset_in_bytes()); ++ __ sd(T2, FP, (-++i) * wordSize); // set constant pool cache ++ if (native_call) { ++ __ sd(R0, FP, (-++i) * wordSize); // no bcp ++ } else { ++ __ sd(BCP, FP, (-++i) * wordSize); // set bcp ++ } ++ __ sd(SP, FP, (-++i) * wordSize); // reserve word for pointer to expression stack bottom ++ assert(i + 2 == frame_size, "i + 2 should be equal to frame_size"); ++} ++ ++// End of helpers ++ ++// Various method entries ++//------------------------------------------------------------------------------------------------------------------------ ++// ++// ++ ++// Call an accessor method (assuming it is resolved, otherwise drop ++// into vanilla (slow path) entry ++address InterpreterGenerator::generate_accessor_entry(void) { ++ ++ // Rmethod: Method* ++ // V0: receiver (preserve for slow entry into asm interpreter) ++ // Rsender: senderSP must preserved for slow path, set SP to it on fast path ++ ++ address entry_point = __ pc(); ++ Label xreturn_path; ++ // do fastpath for resolved accessor methods ++ if (UseFastAccessorMethods) { ++ Label slow_path; ++ __ li(T2, SafepointSynchronize::address_of_state()); ++ __ lw(AT, T2, 0); ++ __ daddiu(AT, AT, -(SafepointSynchronize::_not_synchronized)); ++ __ bne(AT, R0, slow_path); ++ __ delayed()->nop(); ++ // Code: _aload_0, _(i|a)getfield, _(i|a)return or any rewrites thereof; ++ // parameter size = 1 ++ // Note: We can only use this code if the getfield has been resolved ++ // and if we don't have a null-pointer exception => check for ++ // these conditions first and use slow path if necessary. ++ // Rmethod: method ++ // V0: receiver ++ ++ // [ receiver ] <-- sp ++ __ ld(T0, SP, 0); ++ ++ // check if local 0 != NULL and read field ++ __ beq(T0, R0, slow_path); ++ __ delayed()->nop(); ++ __ ld(T2, Rmethod, in_bytes(Method::const_offset())); ++ __ ld(T2, T2, in_bytes(ConstMethod::constants_offset())); ++ // read first instruction word and extract bytecode @ 1 and index @ 2 ++ __ ld(T3, Rmethod, in_bytes(Method::const_offset())); ++ __ lw(T3, T3, in_bytes(ConstMethod::codes_offset())); ++ // Shift codes right to get the index on the right. ++ // The bytecode fetched looks like <0xb4><0x2a> ++ __ dsrl(T3, T3, 2 * BitsPerByte); ++ // FIXME: maybe it's wrong ++ __ dsll(T3, T3, exact_log2(in_words(ConstantPoolCacheEntry::size()))); ++ __ ld(T2, T2, ConstantPool::cache_offset_in_bytes()); ++ ++ // T0: local 0 ++ // Rmethod: method ++ // V0: receiver - do not destroy since it is needed for slow path! ++ // T1: scratch use which register instead ? ++ // T3: constant pool cache index ++ // T2: constant pool cache ++ // Rsender: send's sp ++ // check if getfield has been resolved and read constant pool cache entry ++ // check the validity of the cache entry by testing whether _indices field ++ // contains Bytecode::_getfield in b1 byte. ++ assert(in_words(ConstantPoolCacheEntry::size()) == 4, "adjust shift below"); ++ ++ __ dsll(T8, T3, Address::times_8); ++ __ move(T1, in_bytes(ConstantPoolCache::base_offset() ++ + ConstantPoolCacheEntry::indices_offset())); ++ __ daddu(T1, T8, T1); ++ __ daddu(T1, T1, T2); ++ __ lw(T1, T1, 0); ++ __ dsrl(T1, T1, 2 * BitsPerByte); ++ __ andi(T1, T1, 0xFF); ++ __ daddiu(T1, T1, (-1) * Bytecodes::_getfield); ++ __ bne(T1, R0, slow_path); ++ __ delayed()->nop(); ++ ++ // Note: constant pool entry is not valid before bytecode is resolved ++ ++ __ move(T1, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ __ daddu(T1, T1, T8); ++ __ daddu(T1, T1, T2); ++ __ lw(AT, T1, 0); ++ ++ __ move(T1, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ daddu(T1, T1, T8); ++ __ daddu(T1, T1, T2); ++ __ lw(T3, T1, 0); ++ ++ Label notByte, notBool, notShort, notChar, notObj; ++ ++ // Need to differentiate between igetfield, agetfield, bgetfield etc. ++ // because they are different sizes. ++ // Use the type from the constant pool cache ++ __ srl(T3, T3, ConstantPoolCacheEntry::tos_state_shift); ++ // Make sure we don't need to mask T3 for tosBits after the above shift ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ // btos = 0 ++ __ bne(T3, R0, notByte); ++ __ delayed()->daddu(T0, T0, AT); ++ ++ __ lb(V0, T0, 0); ++ __ b(xreturn_path); ++ __ delayed()->nop(); ++ ++ //ztos ++ __ bind(notByte); ++ __ daddiu(T1, T3, (-1) * ztos); ++ __ bne(T1, R0, notBool); ++ __ delayed()->nop(); ++ __ lb(V0, T0, 0); ++ __ b(xreturn_path); ++ __ delayed()->nop(); ++ ++ //stos ++ __ bind(notBool); ++ __ daddiu(T1, T3, (-1) * stos); ++ __ bne(T1, R0, notShort); ++ __ delayed()->nop(); ++ __ lh(V0, T0, 0); ++ __ b(xreturn_path); ++ __ delayed()->nop(); ++ ++ //ctos ++ __ bind(notShort); ++ __ daddiu(T1, T3, (-1) * ctos); ++ __ bne(T1, R0, notChar); ++ __ delayed()->nop(); ++ __ lhu(V0, T0, 0); ++ __ b(xreturn_path); ++ __ delayed()->nop(); ++ ++ //atos ++ __ bind(notChar); ++ __ daddiu(T1, T3, (-1) * atos); ++ __ bne(T1, R0, notObj); ++ __ delayed()->nop(); ++ //add for compressedoops ++ __ load_heap_oop(V0, Address(T0, 0)); ++ __ b(xreturn_path); ++ __ delayed()->nop(); ++ ++ //itos ++ __ bind(notObj); ++#ifdef ASSERT ++ Label okay; ++ __ daddiu(T1, T3, (-1) * itos); ++ __ beq(T1, R0, okay); ++ __ delayed()->nop(); ++ __ stop("what type is this?"); ++ __ bind(okay); ++#endif // ASSERT ++ __ lw(V0, T0, 0); ++ ++ __ bind(xreturn_path); ++ ++ // _ireturn/_areturn ++ //FIXME ++ __ move(SP, Rsender);//FIXME, set sender's fp to SP ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ // generate a vanilla interpreter entry as the slow path ++ __ bind(slow_path); ++ (void) generate_normal_entry(false); ++ } else { ++ (void) generate_normal_entry(false); ++ } ++ ++ return entry_point; ++} ++ ++// Method entry for java.lang.ref.Reference.get. ++address InterpreterGenerator::generate_Reference_get_entry(void) { ++#if INCLUDE_ALL_GCS ++ // Code: _aload_0, _getfield, _areturn ++ // parameter size = 1 ++ // ++ // The code that gets generated by this routine is split into 2 parts: ++ // 1. The "intrinsified" code for G1 (or any SATB based GC), ++ // 2. The slow path - which is an expansion of the regular method entry. ++ // ++ // Notes:- ++ // * In the G1 code we do not check whether we need to block for ++ // a safepoint. If G1 is enabled then we must execute the specialized ++ // code for Reference.get (except when the Reference object is null) ++ // so that we can log the value in the referent field with an SATB ++ // update buffer. ++ // If the code for the getfield template is modified so that the ++ // G1 pre-barrier code is executed when the current method is ++ // Reference.get() then going through the normal method entry ++ // will be fine. ++ // * The G1 code can, however, check the receiver object (the instance ++ // of java.lang.Reference) and jump to the slow path if null. If the ++ // Reference object is null then we obviously cannot fetch the referent ++ // and so we don't need to call the G1 pre-barrier. Thus we can use the ++ // regular method entry code to generate the NPE. ++ // ++ // This code is based on generate_accessor_enty. ++ // ++ // Rmethod: Method* ++ ++ // Rsender: senderSP must preserve for slow path, set SP to it on fast path (Rsender) ++ ++ address entry = __ pc(); ++ ++ const int referent_offset = java_lang_ref_Reference::referent_offset; ++ guarantee(referent_offset > 0, "referent offset not initialized"); ++ ++ if (UseG1GC) { ++ Label slow_path; ++ ++ // Check if local 0 != NULL ++ // If the receiver is null then it is OK to jump to the slow path. ++ __ ld(V0, SP, 0); ++ ++ __ beq(V0, R0, slow_path); ++ __ delayed()->nop(); ++ ++ // Generate the G1 pre-barrier code to log the value of ++ // the referent field in an SATB buffer. ++ ++ // Load the value of the referent field. ++ const Address field_address(V0, referent_offset); ++ __ load_heap_oop(V0, field_address); ++ ++ __ push(RA); ++ // Generate the G1 pre-barrier code to log the value of ++ // the referent field in an SATB buffer. ++ __ g1_write_barrier_pre(noreg /* obj */, ++ V0 /* pre_val */, ++ TREG /* thread */, ++ Rmethod /* tmp */, ++ true /* tosca_live */, ++ true /* expand_call */); ++ __ pop(RA); ++ ++ __ jr(RA); ++ __ delayed()->daddu(SP, Rsender, R0); // set sp to sender sp ++ ++ // generate a vanilla interpreter entry as the slow path ++ __ bind(slow_path); ++ (void) generate_normal_entry(false); ++ ++ return entry; ++ } ++#endif // INCLUDE_ALL_GCS ++ ++ // If G1 is not enabled then attempt to go through the accessor entry point ++ // Reference.get is an accessor ++ return generate_accessor_entry(); ++} ++ ++// Interpreter stub for calling a native method. (asm interpreter) ++// This sets up a somewhat different looking stack for calling the ++// native method than the typical interpreter frame setup. ++address InterpreterGenerator::generate_native_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls; ++ // Rsender: sender's sp ++ // Rmethod: Method* ++ address entry_point = __ pc(); ++ ++#ifndef CORE ++ const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset())); ++#endif ++ ++ // get parameter size (always needed) ++ // the size in the java stack ++ __ ld(V0, Rmethod, in_bytes(Method::const_offset())); ++ __ lhu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // native calls don't need the stack size check since they have no expression stack ++ // and the arguments are already on the stack and we only add a handful of words ++ // to the stack ++ ++ // Rmethod: Method* ++ // V0: size of parameters ++ // Layout of frame at this point ++ // ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ ++ // for natives the size of locals is zero ++ ++ // compute beginning of parameters (S7) ++ __ dsll(LVP, V0, Address::times_8); ++ __ daddiu(LVP, LVP, (-1) * wordSize); ++ __ daddu(LVP, LVP, SP); ++ ++ ++ // add 2 zero-initialized slots for native calls ++ // 1 slot for native oop temp offset (setup via runtime) ++ // 1 slot for static native result handler3 (setup via runtime) ++ __ push2(R0, R0); ++ ++ // Layout of frame at this point ++ // [ method holder mirror ] <--- sp ++ // [ result type info ] ++ // [ argument word n-1 ] <--- T0 ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++#ifndef CORE ++ if (inc_counter) __ lw(T3, invocation_counter); // (pre-)fetch invocation count ++#endif ++ ++ // initialize fixed part of activation frame ++ generate_fixed_frame(true); ++ // after this function, the layout of frame is as following ++ // ++ // [ monitor block top ] <--- sp ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- sender's sp ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ ++ // make sure method is native & not abstract ++#ifdef ASSERT ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ { ++ Label L; ++ __ andi(AT, T0, JVM_ACC_NATIVE); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute native method as non-native"); ++ __ bind(L); ++ } ++ { ++ Label L; ++ __ andi(AT, T0, JVM_ACC_ABSTRACT); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute abstract method in interpreter"); ++ __ bind(L); ++ } ++#endif ++ ++ // Since at this point in the method invocation the exception handler ++ // would try to exit the monitor of synchronized methods which hasn't ++ // been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation will ++ // check this flag. ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(AT, (int)true); ++ __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++#ifndef CORE ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow, NULL, NULL); ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++#endif // CORE ++ ++ bang_stack_shadow_pages(true); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ if (synchronized) { ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ { ++ Label L; ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, T0, JVM_ACC_SYNCHRONIZED); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("method needs synchronization"); ++ __ bind(L); ++ } ++#endif ++ } ++ ++ // after method_lock, the layout of frame is as following ++ // ++ // [ monitor entry ] <--- sp ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // start execution ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ beq(AT, SP, L); ++ __ delayed()->nop(); ++ __ stop("broken stack frame setup in interpreter in asm"); ++ __ bind(L); ++ } ++#endif ++ ++ // jvmti/jvmpi support ++ __ notify_method_entry(); ++ ++ // work registers ++ const Register method = Rmethod; ++ //const Register thread = T2; ++ const Register t = T8; ++ ++ __ get_method(method); ++ __ verify_oop(method); ++ { ++ Label L, Lstatic; ++ __ ld(t,method,in_bytes(Method::const_offset())); ++ __ lhu(t, t, in_bytes(ConstMethod::size_of_parameters_offset())); ++ // MIPS n64 ABI: caller does not reserve space for the register auguments. ++ // A0 and A1(if needed) ++ __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, AT, JVM_ACC_STATIC); ++ __ beq(AT, R0, Lstatic); ++ __ delayed()->nop(); ++ __ daddiu(t, t, 1); ++ __ bind(Lstatic); ++ __ daddiu(t, t, -7); ++ __ blez(t, L); ++ __ delayed()->nop(); ++ __ dsll(t, t, Address::times_8); ++ __ dsubu(SP, SP, t); ++ __ bind(L); ++ } ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ __ move(AT, SP); ++ // [ ] <--- sp ++ // ... (size of parameters - 8 ) ++ // [ monitor entry ] ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ // get signature handler ++ { ++ Label L; ++ __ ld(T9, method, in_bytes(Method::signature_handler_offset())); ++ __ bne(T9, R0, L); ++ __ delayed()->nop(); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::prepare_native_call), method); ++ __ get_method(method); ++ __ ld(T9, method, in_bytes(Method::signature_handler_offset())); ++ __ bind(L); ++ } ++ ++ // call signature handler ++ // FIXME: when change codes in InterpreterRuntime, note this point ++ // from: begin of parameters ++ assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code"); ++ // to: current sp ++ assert(InterpreterRuntime::SignatureHandlerGenerator::to () == SP, "adjust this code"); ++ // temp: T3 ++ assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t , "adjust this code"); ++ ++ __ jalr(T9); ++ __ delayed()->nop(); ++ __ get_method(method); ++ ++ // ++ // if native function is static, and its second parameter has type length of double word, ++ // and first parameter has type length of word, we have to reserve one word ++ // for the first parameter, according to mips o32 abi. ++ // if native function is not static, and its third parameter has type length of double word, ++ // and second parameter has type length of word, we have to reserve one word for the second ++ // parameter. ++ // ++ ++ ++ // result handler is in V0 ++ // set result handler ++ __ sd(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize); ++ ++#define FIRSTPARA_SHIFT_COUNT 5 ++#define SECONDPARA_SHIFT_COUNT 9 ++#define THIRDPARA_SHIFT_COUNT 13 ++#define PARA_MASK 0xf ++ ++ // pass mirror handle if static call ++ { ++ Label L; ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ __ lw(t, method, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, t, JVM_ACC_STATIC); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ ++ // get mirror ++ __ ld(t, method, in_bytes(Method:: const_offset())); ++ __ ld(t, t, in_bytes(ConstMethod::constants_offset())); //?? ++ __ ld(t, t, ConstantPool::pool_holder_offset_in_bytes()); ++ __ ld(t, t, mirror_offset); ++ // copy mirror into activation frame ++ //__ sw(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ // pass handle to mirror ++ __ sd(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ daddiu(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ move(A1, t); ++ __ bind(L); ++ } ++ ++ // [ mthd holder mirror ptr ] <--- sp --------------------| (only for static method) ++ // [ ] | ++ // ... size of parameters(or +1) | ++ // [ monitor entry ] | ++ // ... | ++ // [ monitor entry ] | ++ // [ monitor block top ] ( the top monitor entry ) | ++ // [ byte code pointer (0) ] (if native, bcp = 0) | ++ // [ constant pool cache ] | ++ // [ Method* ] | ++ // [ locals offset ] | ++ // [ sender's sp ] | ++ // [ sender's fp ] | ++ // [ return address ] <--- fp | ++ // [ method holder mirror ] <----------------------------| ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // get native function entry point ++ { Label L; ++ __ ld(T9, method, in_bytes(Method::native_function_offset())); ++ __ li(V1, SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); ++ __ bne(V1, T9, L); ++ __ delayed()->nop(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method); ++ __ get_method(method); ++ __ verify_oop(method); ++ __ ld(T9, method, in_bytes(Method::native_function_offset())); ++ __ bind(L); ++ } ++ ++ // pass JNIEnv ++ // native function in T9 ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ daddiu(t, thread, in_bytes(JavaThread::jni_environment_offset())); ++ __ move(A0, t); ++ // [ jni environment ] <--- sp ++ // [ mthd holder mirror ptr ] ---------------------------->| (only for static method) ++ // [ ] | ++ // ... size of parameters | ++ // [ monitor entry ] | ++ // ... | ++ // [ monitor entry ] | ++ // [ monitor block top ] ( the top monitor entry ) | ++ // [ byte code pointer (0) ] (if native, bcp = 0) | ++ // [ constant pool cache ] | ++ // [ Method* ] | ++ // [ locals offset ] | ++ // [ sender's sp ] | ++ // [ sender's fp ] | ++ // [ return address ] <--- fp | ++ // [ method holder mirror ] <----------------------------| ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // set_last_Java_frame_before_call ++ __ sd(FP, thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ // Change state to native (we save the return address in the thread, since it might not ++ // be pushed on the stack when we do a a stack traversal). It is enough that the pc() ++ // points into the right code segment. It does not have to be the correct return pc. ++ __ li(t, __ pc()); ++ __ sd(t, thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ __ sd(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ ++ // change thread state ++#ifdef ASSERT ++ { ++ Label L; ++ __ lw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ daddiu(t, t, (-1) * _thread_in_Java); ++ __ beq(t, R0, L); ++ __ delayed()->nop(); ++ __ stop("Wrong thread state in native stub"); ++ __ bind(L); ++ } ++#endif ++ ++ __ move(t, _thread_in_native); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ // call native method ++ __ jalr(T9); ++ __ delayed()->nop(); ++ // result potentially in V0 or F0 ++ ++ ++ // via _last_native_pc and not via _last_jave_sp ++ // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result. ++ // If the order changes or anything else is added to the stack the code in ++ // interpreter_frame_result will have to be changed. ++ //FIXME, should modify here ++ // save return value to keep the value from being destroyed by other calls ++ __ push(dtos); ++ __ push(ltos); ++ ++ // change thread state ++ __ get_thread(thread); ++ __ move(t, _thread_in_native_trans); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ if(os::is_MP()) { ++ if (UseMembar) { ++ // Force this write out before the read below ++ __ sync(); ++ } else { ++ // Write serialization page so VM thread can do a pseudo remote membar. ++ // We use the current thread pointer to calculate a thread specific ++ // offset to write to within the page. This minimizes bus traffic ++ // due to cache line collision. ++ __ serialize_memory(thread, A0); ++ } ++ } ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { Label Continue; ++ ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are ++ // preserved and correspond to the bcp/locals pointers. So we do a runtime call ++ // by hand. ++ // ++ Label L; ++ __ li(AT, SafepointSynchronize::address_of_state()); ++ __ lw(AT, AT, 0); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, Continue); ++ __ delayed()->nop(); ++ __ bind(L); ++ __ move(A0, thread); ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), ++ relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ //add for compressedoops ++ __ reinit_heapbase(); ++ __ bind(Continue); ++ } ++ ++ // change thread state ++ __ move(t, _thread_in_Java); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ reset_last_Java_frame(thread, true); ++ ++ // reset handle block ++ __ ld(t, thread, in_bytes(JavaThread::active_handles_offset())); ++ __ sw(R0, t, JNIHandleBlock::top_offset_in_bytes()); ++ ++ // If result was an oop then unbox and save it in the frame ++ { ++ Label no_oop; ++ //FIXME, addiu only support 16-bit imeditate ++ __ ld(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize); ++ __ li(T0, AbstractInterpreter::result_handler(T_OBJECT)); ++ __ bne(AT, T0, no_oop); ++ __ delayed()->nop(); ++ __ pop(ltos); ++ // Unbox oop result, e.g. JNIHandles::resolve value. ++ __ resolve_jobject(V0, thread, T9); ++ __ sd(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize); ++ // keep stack depth as expected by pushing oop which will eventually be discarded ++ __ push(ltos); ++ __ bind(no_oop); ++ } ++ { ++ Label no_reguard; ++ __ lw(t, thread, in_bytes(JavaThread::stack_guard_state_offset())); ++ __ move(AT,(int) JavaThread::stack_guard_yellow_disabled); ++ __ bne(t, AT, no_reguard); ++ __ delayed()->nop(); ++ __ pushad(); ++ __ move(S5_heapbase, SP); ++ __ move(AT, -StackAlignmentInBytes); ++ __ andr(SP, SP, AT); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ move(SP, S5_heapbase); ++ __ popad(); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ __ bind(no_reguard); ++ } ++ // restore BCP to have legal interpreter frame, ++ // i.e., bci == 0 <=> BCP == code_base() ++ // Can't call_VM until bcp is within reasonable. ++ __ get_method(method); // method is junk from thread_in_native to now. ++ __ verify_oop(method); ++ __ ld(BCP, method, in_bytes(Method::const_offset())); ++ __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset()))); ++ // handle exceptions (exception handling will handle unlocking!) ++ { ++ Label L; ++ __ ld(t, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(t, R0, L); ++ __ delayed()->nop(); ++ // Note: At some point we may want to unify this with the code used in ++ // call_VM_base(); ++ // i.e., we should use the StubRoutines::forward_exception code. For now this ++ // doesn't work here because the sp is not correctly set at this point. ++ __ MacroAssembler::call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ ++ // do unlocking if necessary ++ { ++ Label L; ++ __ lw(t, method, in_bytes(Method::access_flags_offset())); ++ __ andi(t, t, JVM_ACC_SYNCHRONIZED); ++ __ beq(t, R0, L); ++ // the code below should be shared with interpreter macro assembler implementation ++ { ++ Label unlock; ++ // BasicObjectLock will be first in list, ++ // since this is a synchronized method. However, need ++ // to check that the object has not been unlocked by ++ // an explicit monitorexit bytecode. ++ __ delayed()->daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock)); ++ // address of first monitor ++ ++ __ ld(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ bne(t, R0, unlock); ++ __ delayed()->nop(); ++ ++ // Entry already unlocked, need to throw exception ++ __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ __ bind(unlock); ++ __ unlock_object(c_rarg0); ++ } ++ __ bind(L); ++ } ++ ++ // jvmti/jvmpi support ++ // Note: This must happen _after_ handling/throwing any exceptions since ++ // the exception handler code notifies the runtime of method exits ++ // too. If this happens before, method entry/exit notifications are ++ // not properly paired (was bug - gri 11/22/99). ++ __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); ++ ++ // restore potential result in V0, ++ // call result handler to restore potential result in ST0 & handle result ++ ++ __ pop(ltos); ++ __ pop(dtos); ++ ++ __ ld(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize); ++ __ jalr(t); ++ __ delayed()->nop(); ++ ++ ++ // remove activation ++ __ ld(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp ++ __ ld(RA, FP, frame::interpreter_frame_return_addr_offset * wordSize); // get return address ++ __ ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++#ifndef CORE ++ if (inc_counter) { ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(&continue_after_compile); ++ // entry_point is the beginning of this ++ // function and checks again for compiled code ++ } ++#endif ++ return entry_point; ++} ++ ++// ++// Generic interpreted method entry to (asm) interpreter ++// ++// Layout of frame just at the entry ++// ++// [ argument word n-1 ] <--- sp ++// ... ++// [ argument word 0 ] ++// assume Method* in Rmethod before call this method. ++// prerequisites to the generated stub : the callee Method* in Rmethod ++// note you must save the caller bcp before call the generated stub ++// ++address InterpreterGenerator::generate_normal_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls; ++ ++ // Rmethod: Method* ++ // Rsender: sender 's sp ++ address entry_point = __ pc(); ++ ++ const Address invocation_counter(Rmethod, ++ in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset())); ++ ++ // get parameter size (always needed) ++ __ ld(T3, Rmethod, in_bytes(Method::const_offset())); //T3 --> Rmethod._constMethod ++ __ lhu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // Rmethod: Method* ++ // V0: size of parameters ++ // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i ++ // get size of locals in words to T2 ++ __ lhu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset())); ++ // T2 = no. of additional locals, locals include parameters ++ __ dsubu(T2, T2, V0); ++ ++ // see if we've got enough room on the stack for locals plus overhead. ++ // Layout of frame at this point ++ // ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ generate_stack_overflow_check(); ++ // after this function, the layout of frame does not change ++ ++ // compute beginning of parameters (LVP) ++ __ dsll(LVP, V0, LogBytesPerWord); ++ __ daddiu(LVP, LVP, (-1) * wordSize); ++ __ daddu(LVP, LVP, SP); ++ ++ // T2 - # of additional locals ++ // allocate space for locals ++ // explicitly initialize locals ++ { ++ Label exit, loop; ++ __ beq(T2, R0, exit); ++ __ delayed()->nop(); ++ ++ __ bind(loop); ++ __ daddiu(SP, SP, (-1) * wordSize); ++ __ daddiu(T2, T2, -1); // until everything initialized ++ __ bne(T2, R0, loop); ++ __ delayed()->sd(R0, SP, 0); // initialize local variables ++ ++ __ bind(exit); ++ } ++ ++ // ++ // [ local var m-1 ] <--- sp ++ // ... ++ // [ local var 0 ] ++ // [ argument word n-1 ] <--- T0? ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ // initialize fixed part of activation frame ++ ++ generate_fixed_frame(false); ++ ++ ++ // after this function, the layout of frame is as following ++ // ++ // [ monitor block top ] <--- sp ( the top monitor entry ) ++ // [ byte code pointer ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] <--- fp ++ // [ return address ] ++ // [ local var m-1 ] ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++ // make sure method is not native & not abstract ++#ifdef ASSERT ++ __ ld(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ { ++ Label L; ++ __ andi(T2, AT, JVM_ACC_NATIVE); ++ __ beq(T2, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute native method as non-native"); ++ __ bind(L); ++ } ++ { ++ Label L; ++ __ andi(T2, AT, JVM_ACC_ABSTRACT); ++ __ beq(T2, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute abstract method in interpreter"); ++ __ bind(L); ++ } ++#endif ++ ++ // Since at this point in the method invocation the exception handler ++ // would try to exit the monitor of synchronized methods which hasn't ++ // been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation will ++ // check this flag. ++ ++#ifndef OPT_THREAD ++ Register thread = T8; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ __ move(AT, (int)true); ++ __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++#ifndef CORE ++ ++ // mdp : T8 ++ // tmp1: T9 ++ // tmp2: T2 ++ __ profile_parameters_type(T8, T9, T2); ++ ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ Label profile_method; ++ Label profile_method_continue; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow, ++ &profile_method, ++ &profile_method_continue); ++ if (ProfileInterpreter) { ++ __ bind(profile_method_continue); ++ } ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++ ++#endif // CORE ++ ++ bang_stack_shadow_pages(false); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ // ++ if (synchronized) { ++ // Allocate monitor and lock method ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ { Label L; ++ __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T2, AT, JVM_ACC_SYNCHRONIZED); ++ __ beq(T2, R0, L); ++ __ delayed()->nop(); ++ __ stop("method needs synchronization"); ++ __ bind(L); ++ } ++#endif ++ } ++ ++ // layout of frame after lock_method ++ // [ monitor entry ] <--- sp ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ local var m-1 ] ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++ // start execution ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ beq(AT, SP, L); ++ __ delayed()->nop(); ++ __ stop("broken stack frame setup in interpreter in native"); ++ __ bind(L); ++ } ++#endif ++ ++ // jvmti/jvmpi support ++ __ notify_method_entry(); ++ ++ __ dispatch_next(vtos); ++ ++ // invocation counter overflow ++ if (inc_counter) { ++ if (ProfileInterpreter) { ++ // We have decided to profile this method in the interpreter ++ __ bind(profile_method); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::profile_method)); ++ __ set_method_data_pointer_for_bcp(); ++ __ get_method(Rmethod); ++ __ b(profile_method_continue); ++ __ delayed()->nop(); ++ } ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(&continue_after_compile); ++ } ++ ++ return entry_point; ++} ++ ++// Entry points ++// ++// Here we generate the various kind of entries into the interpreter. ++// The two main entry type are generic bytecode methods and native ++// call method. These both come in synchronized and non-synchronized ++// versions but the frame layout they create is very similar. The ++// other method entry types are really just special purpose entries ++// that are really entry and interpretation all in one. These are for ++// trivial methods like accessor, empty, or special math methods. ++// ++// When control flow reaches any of the entry types for the interpreter ++// the following holds -> ++// ++// Arguments: ++// ++// Rmethod: Method* ++// V0: receiver ++// ++// ++// Stack layout immediately at entry ++// ++// [ parameter n-1 ] <--- sp ++// ... ++// [ parameter 0 ] ++// [ expression stack ] (caller's java expression stack) ++ ++// Assuming that we don't go to one of the trivial specialized entries ++// the stack will look like below when we are ready to execute the ++// first bytecode (or call the native routine). The register usage ++// will be as the template based interpreter expects (see ++// interpreter_mips_64.hpp). ++// ++// local variables follow incoming parameters immediately; i.e. ++// the return address is moved to the end of the locals). ++// ++// [ monitor entry ] <--- sp ++// ... ++// [ monitor entry ] ++// [ monitor block top ] ( the top monitor entry ) ++// [ byte code pointer ] (if native, bcp = 0) ++// [ constant pool cache ] ++// [ Method* ] ++// [ locals offset ] ++// [ sender's sp ] ++// [ sender's fp ] ++// [ return address ] <--- fp ++// [ local var m-1 ] ++// ... ++// [ local var 0 ] ++// [ argumnet word n-1 ] <--- ( sender's sp ) ++// ... ++// [ argument word 0 ] <--- S7 ++ ++address AbstractInterpreterGenerator::generate_method_entry( ++ AbstractInterpreter::MethodKind kind) { ++ // determine code generation flags ++ bool synchronized = false; ++ address entry_point = NULL; ++ switch (kind) { ++ case Interpreter::zerolocals : ++ break; ++ case Interpreter::zerolocals_synchronized: ++ synchronized = true; ++ break; ++ case Interpreter::native : ++ entry_point = ((InterpreterGenerator*)this)->generate_native_entry(false); ++ break; ++ case Interpreter::native_synchronized : ++ entry_point = ((InterpreterGenerator*)this)->generate_native_entry(true); ++ break; ++ case Interpreter::empty : ++ entry_point = ((InterpreterGenerator*)this)->generate_empty_entry(); ++ break; ++ case Interpreter::accessor : ++ entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry(); ++ break; ++ case Interpreter::abstract : ++ entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry(); ++ break; ++ ++ case Interpreter::java_lang_math_sin : // fall thru ++ case Interpreter::java_lang_math_cos : // fall thru ++ case Interpreter::java_lang_math_tan : // fall thru ++ case Interpreter::java_lang_math_log : // fall thru ++ case Interpreter::java_lang_math_log10 : // fall thru ++ case Interpreter::java_lang_math_pow : // fall thru ++ case Interpreter::java_lang_math_exp : break; ++ case Interpreter::java_lang_math_abs : // fall thru ++ case Interpreter::java_lang_math_sqrt : ++ entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind); break; ++ case Interpreter::java_lang_ref_reference_get: ++ entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break; ++ default: ++ fatal(err_msg("unexpected method kind: %d", kind)); ++ break; ++ } ++ if (entry_point) return entry_point; ++ ++ return ((InterpreterGenerator*)this)->generate_normal_entry(synchronized); ++} ++ ++// These should never be compiled since the interpreter will prefer ++// the compiled version to the intrinsic version. ++bool AbstractInterpreter::can_be_compiled(methodHandle m) { ++ switch (method_kind(m)) { ++ case Interpreter::java_lang_math_sin : // fall thru ++ case Interpreter::java_lang_math_cos : // fall thru ++ case Interpreter::java_lang_math_tan : // fall thru ++ case Interpreter::java_lang_math_abs : // fall thru ++ case Interpreter::java_lang_math_log : // fall thru ++ case Interpreter::java_lang_math_log10 : // fall thru ++ case Interpreter::java_lang_math_sqrt : // fall thru ++ case Interpreter::java_lang_math_pow : // fall thru ++ case Interpreter::java_lang_math_exp : ++ return false; ++ default: ++ return true; ++ } ++} ++ ++// How much stack a method activation needs in words. ++int AbstractInterpreter::size_top_interpreter_activation(Method* method) { ++ ++ const int entry_size = frame::interpreter_frame_monitor_size(); ++ ++ // total overhead size: entry_size + (saved fp thru expr stack bottom). ++ // be sure to change this if you add/subtract anything to/from the overhead area ++ const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size; ++ ++ const int stub_code = 6; // see generate_call_stub ++ // return overhead_size + method->max_locals() + method->max_stack() + stub_code; ++ const int method_stack = (method->max_locals() + method->max_stack()) * ++ Interpreter::stackElementWords; ++ return overhead_size + method_stack + stub_code; ++} ++ ++void AbstractInterpreter::layout_activation(Method* method, ++ int tempcount, ++ int popframe_extra_args, ++ int moncount, ++ int caller_actual_parameters, ++ int callee_param_count, ++ int callee_locals, ++ frame* caller, ++ frame* interpreter_frame, ++ bool is_top_frame, ++ bool is_bottom_frame) { ++ // Note: This calculation must exactly parallel the frame setup ++ // in AbstractInterpreterGenerator::generate_method_entry. ++ // If interpreter_frame!=NULL, set up the method, locals, and monitors. ++ // The frame interpreter_frame, if not NULL, is guaranteed to be the ++ // right size, as determined by a previous call to this method. ++ // It is also guaranteed to be walkable even though it is in a skeletal state ++ ++ // fixed size of an interpreter frame: ++ ++ int max_locals = method->max_locals() * Interpreter::stackElementWords; ++ int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords; ++ ++#ifdef ASSERT ++ if (!EnableInvokeDynamic) { ++ // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences? ++ // Probably, since deoptimization doesn't work yet. ++ assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable"); ++ } ++ assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); ++#endif ++ ++ interpreter_frame->interpreter_frame_set_method(method); ++ // NOTE the difference in using sender_sp and interpreter_frame_sender_sp ++ // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) ++ // and sender_sp is fp+8 ++ intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; ++ ++#ifdef ASSERT ++ if (caller->is_interpreted_frame()) { ++ assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); ++ } ++#endif ++ ++ interpreter_frame->interpreter_frame_set_locals(locals); ++ BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); ++ BasicObjectLock* monbot = montop - moncount; ++ interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount); ++ ++ //set last sp; ++ intptr_t* sp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords - ++ popframe_extra_args; ++ interpreter_frame->interpreter_frame_set_last_sp(sp); ++ // All frames but the initial interpreter frame we fill in have a ++ // value for sender_sp that allows walking the stack but isn't ++ // truly correct. Correct the value here. ++ // ++ if (extra_locals != 0 && ++ interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) { ++ interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals); ++ } ++ *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache(); ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateInterpreterGenerator::generate_throw_exception() { ++ // Entry point in previous activation (i.e., if the caller was ++ // interpreted) ++ Interpreter::_rethrow_exception_entry = __ pc(); ++ // Restore sp to interpreter_frame_last_sp even though we are going ++ // to empty the expression stack for the exception processing. ++ __ sd(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ // V0: exception ++ // V1: return address/pc that threw exception ++ __ restore_bcp(); // BCP points to call/send ++ __ restore_locals(); ++ ++ //add for compressedoops ++ __ reinit_heapbase(); ++ // Entry point for exceptions thrown within interpreter code ++ Interpreter::_throw_exception_entry = __ pc(); ++ // expression stack is undefined here ++ // V0: exception ++ // BCP: exception bcp ++ __ verify_oop(V0); ++ ++ // expression stack must be empty before entering the VM in case of an exception ++ __ empty_expression_stack(); ++ // find exception handler address and preserve exception oop ++ __ move(A1, V0); ++ __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1); ++ // V0: exception handler entry point ++ // V1: preserved exception oop ++ // S0: bcp for exception handler ++ __ push(V1); // push exception which is now the only value on the stack ++ __ jr(V0); // jump to exception handler (may be _remove_activation_entry!) ++ __ delayed()->nop(); ++ ++ // If the exception is not handled in the current frame the frame is removed and ++ // the exception is rethrown (i.e. exception continuation is _rethrow_exception). ++ // ++ // Note: At this point the bci is still the bxi for the instruction which caused ++ // the exception and the expression stack is empty. Thus, for any VM calls ++ // at this point, GC will find a legal oop map (with empty expression stack). ++ ++ // In current activation ++ // V0: exception ++ // BCP: exception bcp ++ ++ // ++ // JVMTI PopFrame support ++ // ++ ++ Interpreter::_remove_activation_preserving_args_entry = __ pc(); ++ __ empty_expression_stack(); ++ // Set the popframe_processing bit in pending_popframe_condition indicating that we are ++ // currently handling popframe, so that call_VMs that may happen later do not trigger new ++ // popframe handling cycles. ++#ifndef OPT_THREAD ++ Register thread = T2; ++ __ get_thread(T2); ++#else ++ Register thread = TREG; ++#endif ++ __ lw(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ __ ori(T3, T3, JavaThread::popframe_processing_bit); ++ __ sw(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++#ifndef CORE ++ { ++ // Check to see whether we are returning to a deoptimized frame. ++ // (The PopFrame call ensures that the caller of the popped frame is ++ // either interpreted or compiled and deoptimizes it if compiled.) ++ // In this case, we can't call dispatch_next() after the frame is ++ // popped, but instead must save the incoming arguments and restore ++ // them after deoptimization has occurred. ++ // ++ // Note that we don't compare the return PC against the ++ // deoptimization blob's unpack entry because of the presence of ++ // adapter frames in C2. ++ Label caller_not_deoptimized; ++ __ ld(A0, FP, frame::return_addr_offset * wordSize); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0); ++ __ bne(V0, R0, caller_not_deoptimized); ++ __ delayed()->nop(); ++ ++ // Compute size of arguments for saving when returning to deoptimized caller ++ __ get_method(A1); ++ __ verify_oop(A1); ++ __ ld(A1, A1, in_bytes(Method::const_offset())); ++ __ lhu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset())); ++ __ shl(A1, Interpreter::logStackElementSize); ++ __ restore_locals(); ++ __ dsubu(A2, LVP, A1); ++ __ daddiu(A2, A2, wordSize); ++ // Save these arguments ++#ifndef OPT_THREAD ++ __ get_thread(A0); ++#else ++ __ move(A0, TREG); ++#endif ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2); ++ ++ __ remove_activation(vtos, T9, false, false, false); ++ ++ // Inform deoptimization that it is responsible for restoring these arguments ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(AT, JavaThread::popframe_force_deopt_reexecution_bit); ++ __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ // Continue in deoptimization handler ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ __ bind(caller_not_deoptimized); ++ } ++#endif /* !CORE */ ++ ++ __ remove_activation(vtos, T3, ++ /* throw_monitor_exception */ false, ++ /* install_monitor_exception */ false, ++ /* notify_jvmdi */ false); ++ ++ // Clear the popframe condition flag ++ // Finish with popframe handling ++ // A previous I2C followed by a deoptimization might have moved the ++ // outgoing arguments further up the stack. PopFrame expects the ++ // mutations to those outgoing arguments to be preserved and other ++ // constraints basically require this frame to look exactly as ++ // though it had previously invoked an interpreted activation with ++ // no space between the top of the expression stack (current ++ // last_sp) and the top of stack. Rather than force deopt to ++ // maintain this kind of invariant all the time we call a small ++ // fixup routine to move the mutated arguments onto the top of our ++ // expression stack if necessary. ++ __ move(T8, SP); ++ __ ld(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // PC must point into interpreter here ++ __ set_last_Java_frame(thread, noreg, FP, __ pc()); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2); ++ __ reset_last_Java_frame(thread, true); ++ // Restore the last_sp and null it out ++ __ ld(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ ++ ++ __ move(AT, JavaThread::popframe_inactive); ++ __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++ // Finish with popframe handling ++ __ restore_bcp(); ++ __ restore_locals(); ++#ifndef CORE ++ // The method data pointer was incremented already during ++ // call profiling. We have to restore the mdp for the current bcp. ++ if (ProfileInterpreter) { ++ __ set_method_data_pointer_for_bcp(); ++ } ++#endif // !CORE ++ // Clear the popframe condition flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(AT, JavaThread::popframe_inactive); ++ __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++#if INCLUDE_JVMTI ++ { ++ Label L_done; ++ ++ __ lbu(AT, BCP, 0); ++ __ daddiu(AT, AT, -1 * Bytecodes::_invokestatic); ++ __ bne(AT, R0, L_done); ++ __ delayed()->nop(); ++ ++ // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. ++ // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. ++ ++ __ get_method(T9); ++ __ ld(T8, LVP, 0); ++ __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T9, BCP); ++ ++ __ beq(T8, R0, L_done); ++ __ delayed()->nop(); ++ ++ __ sd(T8, SP, 0); ++ __ bind(L_done); ++ } ++#endif // INCLUDE_JVMTI ++ ++ __ dispatch_next(vtos); ++ // end of PopFrame support ++ ++ Interpreter::_remove_activation_entry = __ pc(); ++ ++ // preserve exception over this code sequence ++ __ pop(T0); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ sd(T0, thread, in_bytes(JavaThread::vm_result_offset())); ++ // remove the activation (without doing throws on illegalMonitorExceptions) ++ __ remove_activation(vtos, T3, false, true, false); ++ // restore exception ++ __ get_vm_result(T0, thread); ++ __ verify_oop(T0); ++ ++ // In between activations - previous activation type unknown yet ++ // compute continuation point - the continuation point expects ++ // the following registers set up: ++ // ++ // T0: exception ++ // T1: return address/pc that threw exception ++ // SP: expression stack of caller ++ // FP: fp of caller ++ __ push2(T0, T3); // save exception and return address ++ __ move(A1, T3); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); ++ __ move(T9, V0); // save exception handler ++ __ pop2(V0, V1); // restore return address and exception ++ ++ // Note that an "issuing PC" is actually the next PC after the call ++ __ jr(T9); // jump to exception handler of caller ++ __ delayed()->nop(); ++} ++ ++ ++// ++// JVMTI ForceEarlyReturn support ++// ++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { ++ address entry = __ pc(); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ empty_expression_stack(); ++ __ empty_FPU_stack(); ++ __ load_earlyret_value(state); ++ ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ __ ld_ptr(T9, TREG, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ const Address cond_addr(T9, in_bytes(JvmtiThreadState::earlyret_state_offset())); ++ // Clear the earlyret state ++ __ move(AT, JvmtiThreadState::earlyret_inactive); ++ __ sw(AT, cond_addr); ++ __ sync(); ++ ++ ++ __ remove_activation(state, T0, ++ false, /* throw_monitor_exception */ ++ false, /* install_monitor_exception */ ++ true); /* notify_jvmdi */ ++ __ sync(); ++ __ jr(T0); ++ __ delayed()->nop(); ++ return entry; ++} // end of ForceEarlyReturn support ++ ++ ++//----------------------------------------------------------------------------- ++// Helper for vtos entry point generation ++ ++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, ++ address& bep, ++ address& cep, ++ address& sep, ++ address& aep, ++ address& iep, ++ address& lep, ++ address& fep, ++ address& dep, ++ address& vep) { ++ assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); ++ Label L; ++ fep = __ pc(); __ push(ftos); __ b(L); __ delayed()->nop(); ++ dep = __ pc(); __ push(dtos); __ b(L); __ delayed()->nop(); ++ lep = __ pc(); __ push(ltos); __ b(L); __ delayed()->nop(); ++ aep =__ pc(); __ push(atos); __ b(L); __ delayed()->nop(); ++ bep = cep = sep = ++ iep = __ pc(); __ push(itos); ++ vep = __ pc(); ++ __ bind(L); ++ generate_and_dispatch(t); ++} ++ ++ ++//----------------------------------------------------------------------------- ++// Generation of individual instructions ++ ++// helpers for generate_and_dispatch ++ ++ ++InterpreterGenerator::InterpreterGenerator(StubQueue* code) ++ : TemplateInterpreterGenerator(code) { ++ generate_all(); // down here so it can be "virtual" ++} ++ ++//----------------------------------------------------------------------------- ++ ++// Non-product code ++#ifndef PRODUCT ++address TemplateInterpreterGenerator::generate_trace_code(TosState state) { ++ address entry = __ pc(); ++ ++ // prepare expression stack ++ __ push(state); // save tosca ++ ++ // tos & tos2 ++ // trace_bytecode need actually 4 args, the last two is tos&tos2 ++ // this work fine for x86. but mips o32 call convention will store A2-A3 ++ // to the stack position it think is the tos&tos2 ++ // when the expression stack have no more than 2 data, error occur. ++ __ ld(A2, SP, 0); ++ __ ld(A3, SP, 1 * wordSize); ++ ++ // pass arguments & call tracer ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), RA, A2, A3); ++ __ move(RA, V0); // make sure return address is not destroyed by pop(state) ++ ++ // restore expression stack ++ __ pop(state); // restore tosca ++ ++ // return ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return entry; ++} ++ ++void TemplateInterpreterGenerator::count_bytecode() { ++ __ li(T8, (long)&BytecodeCounter::_counter_value); ++ __ lw(AT, T8, 0); ++ __ daddiu(AT, AT, 1); ++ __ sw(AT, T8, 0); ++} ++ ++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ++ __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]); ++ __ lw(AT, T8, 0); ++ __ daddiu(AT, AT, 1); ++ __ sw(AT, T8, 0); ++} ++ ++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ++ __ li(T8, (long)&BytecodePairHistogram::_index); ++ __ lw(T9, T8, 0); ++ __ dsrl(T9, T9, BytecodePairHistogram::log2_number_of_codes); ++ __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes); ++ __ orr(T9, T9, T8); ++ __ li(T8, (long)&BytecodePairHistogram::_index); ++ __ sw(T9, T8, 0); ++ __ dsll(T9, T9, 2); ++ __ li(T8, (long)BytecodePairHistogram::_counters); ++ __ daddu(T8, T8, T9); ++ __ lw(AT, T8, 0); ++ __ daddiu(AT, AT, 1); ++ __ sw(AT, T8, 0); ++} ++ ++ ++void TemplateInterpreterGenerator::trace_bytecode(Template* t) { ++ // Call a little run-time stub to avoid blow-up for each bytecode. ++ // The run-time runtime saves the right registers, depending on ++ // the tosca in-state for the given template. ++ ++ address entry = Interpreter::trace_code(t->tos_in()); ++ assert(entry != NULL, "entry must have been generated"); ++ __ call(entry, relocInfo::none); ++ __ delayed()->nop(); ++ //add for compressedoops ++ __ reinit_heapbase(); ++} ++ ++ ++void TemplateInterpreterGenerator::stop_interpreter_at() { ++ Label L; ++ __ li(T8, long(&BytecodeCounter::_counter_value)); ++ __ lw(T8, T8, 0); ++ __ move(AT, StopInterpreterAt); ++ __ bne(T8, AT, L); ++ __ delayed()->nop(); ++ __ brk(5); ++ __ delayed()->nop(); ++ __ bind(L); ++} ++#endif // !PRODUCT ++#endif // ! CC_INTERP +diff --git a/hotspot/src/cpu/mips/vm/templateTable_mips.hpp b/hotspot/src/cpu/mips/vm/templateTable_mips.hpp +new file mode 100644 +index 0000000000..d879e6dc92 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/templateTable_mips.hpp +@@ -0,0 +1,34 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++ static void prepare_invoke(Register method, Register index, int byte_no, ++ Bytecodes::Code code); ++ static void invokevirtual_helper(Register index, Register recv, ++ Register flags); ++ static void volatile_barrier(); ++ ++ // Helpers ++ static void index_check(Register array, Register index); ++ static void index_check_without_pop(Register array, Register index); +diff --git a/hotspot/src/cpu/mips/vm/templateTable_mips_64.cpp b/hotspot/src/cpu/mips/vm/templateTable_mips_64.cpp +new file mode 100644 +index 0000000000..7415511b99 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/templateTable_mips_64.cpp +@@ -0,0 +1,4623 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/templateTable.hpp" ++#include "memory/universe.inline.hpp" ++#include "oops/methodData.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "utilities/macros.hpp" ++ ++ ++#ifndef CC_INTERP ++ ++#define __ _masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Platform-dependent initialization ++ ++void TemplateTable::pd_initialize() { ++ // No mips specific initialization ++} ++ ++// Address computation: local variables ++ ++static inline Address iaddress(int n) { ++ return Address(LVP, Interpreter::local_offset_in_bytes(n)); ++} ++ ++static inline Address laddress(int n) { ++ return iaddress(n + 1); ++} ++ ++static inline Address faddress(int n) { ++ return iaddress(n); ++} ++ ++static inline Address daddress(int n) { ++ return laddress(n); ++} ++ ++static inline Address aaddress(int n) { ++ return iaddress(n); ++} ++static inline Address haddress(int n) { return iaddress(n + 0); } ++ ++ ++static inline Address at_sp() { return Address(SP, 0); } ++static inline Address at_sp_p1() { return Address(SP, 1 * wordSize); } ++static inline Address at_sp_p2() { return Address(SP, 2 * wordSize); } ++ ++// At top of Java expression stack which may be different than sp(). It ++// isn't for category 1 objects. ++static inline Address at_tos () { ++ Address tos = Address(SP, Interpreter::expr_offset_in_bytes(0)); ++ return tos; ++} ++ ++static inline Address at_tos_p1() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(1)); ++} ++ ++static inline Address at_tos_p2() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(2)); ++} ++ ++static inline Address at_tos_p3() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(3)); ++} ++ ++// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator ++Address TemplateTable::at_bcp(int offset) { ++ assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); ++ return Address(BCP, offset); ++} ++ ++// Miscelaneous helper routines ++// Store an oop (or NULL) at the address described by obj. ++// If val == noreg this means store a NULL ++ ++static void do_oop_store(InterpreterMacroAssembler* _masm, ++ Address obj, ++ Register val, ++ BarrierSet::Name barrier, ++ bool precise) { ++ assert(val == noreg || val == V0, "parameter is just for looks"); ++ switch (barrier) { ++#if INCLUDE_ALL_GCS ++ case BarrierSet::G1SATBCT: ++ case BarrierSet::G1SATBCTLogging: ++ { ++ // flatten object address if needed ++ if (obj.index() == noreg && obj.disp() == 0) { ++ if (obj.base() != T3) { ++ __ move(T3, obj.base()); ++ } ++ } else { ++ __ lea(T3, obj); ++ } ++ __ g1_write_barrier_pre(T3 /* obj */, ++ T1 /* pre_val */, ++ TREG /* thread */, ++ T9 /* tmp */, ++ val != noreg /* tosca_live */, ++ false /* expand_call */); ++ if (val == noreg) { ++ __ store_heap_oop_null(Address(T3, 0)); ++ } else { ++ // G1 barrier needs uncompressed oop for region cross check. ++ Register new_val = val; ++ if (UseCompressedOops) { ++ new_val = T1; ++ __ move(new_val, val); ++ } ++ __ store_heap_oop(Address(T3, 0), val); ++ __ g1_write_barrier_post(T3 /* store_adr */, ++ new_val /* new_val */, ++ TREG /* thread */, ++ T9 /* tmp */, ++ T1 /* tmp2 */); ++ } ++ } ++ break; ++#endif // INCLUDE_ALL_GCS ++ case BarrierSet::CardTableModRef: ++ case BarrierSet::CardTableExtension: ++ { ++ if (val == noreg) { ++ __ store_heap_oop_null(obj); ++ } else { ++ __ store_heap_oop(obj, val); ++ // flatten object address if needed ++ if (!precise || (obj.index() == noreg && obj.disp() == 0)) { ++ __ store_check(obj.base()); ++ } else { ++ __ lea(T9, obj); ++ __ store_check(T9); ++ } ++ } ++ } ++ break; ++ case BarrierSet::ModRef: ++ case BarrierSet::Other: ++ if (val == noreg) { ++ __ store_heap_oop_null(obj); ++ } else { ++ __ store_heap_oop(obj, val); ++ } ++ break; ++ default : ++ ShouldNotReachHere(); ++ ++ } ++} ++ ++// bytecode folding ++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, ++ Register tmp_reg, bool load_bc_into_bc_reg/*=true*/, ++ int byte_no) { ++ if (!RewriteBytecodes) return; ++ Label L_patch_done; ++ ++ switch (bc) { ++ case Bytecodes::_fast_aputfield: ++ case Bytecodes::_fast_bputfield: ++ case Bytecodes::_fast_zputfield: ++ case Bytecodes::_fast_cputfield: ++ case Bytecodes::_fast_dputfield: ++ case Bytecodes::_fast_fputfield: ++ case Bytecodes::_fast_iputfield: ++ case Bytecodes::_fast_lputfield: ++ case Bytecodes::_fast_sputfield: ++ { ++ // We skip bytecode quickening for putfield instructions when ++ // the put_code written to the constant pool cache is zero. ++ // This is required so that every execution of this instruction ++ // calls out to InterpreterRuntime::resolve_get_put to do ++ // additional, required work. ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ assert(load_bc_into_bc_reg, "we use bc_reg as temp"); ++ __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1); ++ __ daddiu(bc_reg, R0, bc); ++ __ beq(tmp_reg, R0, L_patch_done); ++ __ delayed()->nop(); ++ } ++ break; ++ default: ++ assert(byte_no == -1, "sanity"); ++ // the pair bytecodes have already done the load. ++ if (load_bc_into_bc_reg) { ++ __ move(bc_reg, bc); ++ } ++ } ++ ++ if (JvmtiExport::can_post_breakpoint()) { ++ Label L_fast_patch; ++ // if a breakpoint is present we can't rewrite the stream directly ++ __ lbu(tmp_reg, at_bcp(0)); ++ __ move(AT, Bytecodes::_breakpoint); ++ __ bne(tmp_reg, AT, L_fast_patch); ++ __ delayed()->nop(); ++ ++ __ get_method(tmp_reg); ++ // Let breakpoint table handling rewrite to quicker bytecode ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg); ++ ++ __ b(L_patch_done); ++ __ delayed()->nop(); ++ __ bind(L_fast_patch); ++ } ++ ++#ifdef ASSERT ++ Label L_okay; ++ __ lbu(tmp_reg, at_bcp(0)); ++ __ move(AT, (int)Bytecodes::java_code(bc)); ++ __ beq(tmp_reg, AT, L_okay); ++ __ delayed()->nop(); ++ __ beq(tmp_reg, bc_reg, L_patch_done); ++ __ delayed()->nop(); ++ __ stop("patching the wrong bytecode"); ++ __ bind(L_okay); ++#endif ++ ++ // patch bytecode ++ __ sb(bc_reg, at_bcp(0)); ++ __ bind(L_patch_done); ++} ++ ++ ++// Individual instructions ++ ++void TemplateTable::nop() { ++ transition(vtos, vtos); ++ // nothing to do ++} ++ ++void TemplateTable::shouldnotreachhere() { ++ transition(vtos, vtos); ++ __ stop("shouldnotreachhere bytecode"); ++} ++ ++void TemplateTable::aconst_null() { ++ transition(vtos, atos); ++ __ move(FSR, R0); ++} ++ ++void TemplateTable::iconst(int value) { ++ transition(vtos, itos); ++ if (value == 0) { ++ __ move(FSR, R0); ++ } else { ++ __ move(FSR, value); ++ } ++} ++ ++void TemplateTable::lconst(int value) { ++ transition(vtos, ltos); ++ if (value == 0) { ++ __ move(FSR, R0); ++ } else { ++ __ move(FSR, value); ++ } ++} ++ ++void TemplateTable::fconst(int value) { ++ transition(vtos, ftos); ++ switch( value ) { ++ case 0: __ mtc1(R0, FSF); return; ++ case 1: __ addiu(AT, R0, 1); break; ++ case 2: __ addiu(AT, R0, 2); break; ++ default: ShouldNotReachHere(); ++ } ++ __ mtc1(AT, FSF); ++ __ cvt_s_w(FSF, FSF); ++} ++ ++void TemplateTable::dconst(int value) { ++ transition(vtos, dtos); ++ switch( value ) { ++ case 0: __ dmtc1(R0, FSF); ++ return; ++ case 1: __ daddiu(AT, R0, 1); ++ __ dmtc1(AT, FSF); ++ __ cvt_d_w(FSF, FSF); ++ break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::bipush() { ++ transition(vtos, itos); ++ __ lb(FSR, at_bcp(1)); ++} ++ ++void TemplateTable::sipush() { ++ transition(vtos, itos); ++ __ lb(FSR, BCP, 1); ++ __ lbu(AT, BCP, 2); ++ __ dsll(FSR, FSR, 8); ++ __ orr(FSR, FSR, AT); ++} ++ ++// T1 : tags ++// T2 : index ++// T3 : cpool ++// T8 : tag ++void TemplateTable::ldc(bool wide) { ++ transition(vtos, vtos); ++ Label call_ldc, notFloat, notClass, Done; ++ // get index in cpool ++ if (wide) { ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); ++ } else { ++ __ lbu(T2, at_bcp(1)); ++ } ++ ++ __ get_cpool_and_tags(T3, T1); ++ ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type ++ if (UseLEXT1 && Assembler::is_simm(sizeof(tags_offset), 8)) { ++ __ gslbx(T1, T1, T2, tags_offset); ++ } else { ++ __ daddu(AT, T1, T2); ++ __ lb(T1, AT, tags_offset); ++ } ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ //now T1 is the tag ++ ++ // unresolved class - get the resolved class ++ __ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedClass); ++ __ beq(AT, R0, call_ldc); ++ __ delayed()->nop(); ++ ++ // unresolved class in error (resolution failed) - call into runtime ++ // so that the same error from first resolution attempt is thrown. ++ __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError); ++ __ beq(AT, R0, call_ldc); ++ __ delayed()->nop(); ++ ++ // resolved class - need to call vm to get java mirror of the class ++ __ daddiu(AT, T1, - JVM_CONSTANT_Class); ++ __ bne(AT, R0, notClass); ++ __ delayed()->dsll(T2, T2, Address::times_8); ++ ++ __ bind(call_ldc); ++ __ move(A1, wide); ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1); ++ //__ push(atos); ++ __ daddiu(SP, SP, - Interpreter::stackElementSize); ++ __ b(Done); ++ __ delayed()->sd(FSR, SP, 0); // added for performance issue ++ ++ __ bind(notClass); ++ __ daddiu(AT, T1, -JVM_CONSTANT_Float); ++ __ bne(AT, R0, notFloat); ++ __ delayed()->nop(); ++ // ftos ++ if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) { ++ __ gslwxc1(FSF, T3, T2, base_offset); ++ } else { ++ __ daddu(AT, T3, T2); ++ __ lwc1(FSF, AT, base_offset); ++ } ++ //__ push_f(); ++ __ daddiu(SP, SP, - Interpreter::stackElementSize); ++ __ b(Done); ++ __ delayed()->swc1(FSF, SP, 0); ++ ++ __ bind(notFloat); ++#ifdef ASSERT ++ { ++ Label L; ++ __ daddiu(AT, T1, -JVM_CONSTANT_Integer); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("unexpected tag type in ldc"); ++ __ bind(L); ++ } ++#endif ++ // itos JVM_CONSTANT_Integer only ++ if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) { ++ __ gslwx(FSR, T3, T2, base_offset); ++ } else { ++ __ daddu(T0, T3, T2); ++ __ lw(FSR, T0, base_offset); ++ } ++ __ push(itos); ++ __ bind(Done); ++} ++ ++// Fast path for caching oop constants. ++void TemplateTable::fast_aldc(bool wide) { ++ transition(vtos, atos); ++ ++ Register result = FSR; ++ Register tmp = SSR; ++ int index_size = wide ? sizeof(u2) : sizeof(u1); ++ ++ Label resolved; ++ ++ // We are resolved if the resolved reference cache entry contains a ++ // non-null object (String, MethodType, etc.) ++ assert_different_registers(result, tmp); ++ __ get_cache_index_at_bcp(tmp, 1, index_size); ++ __ load_resolved_reference_at_index(result, tmp); ++ __ bne(result, R0, resolved); ++ __ delayed()->nop(); ++ ++ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); ++ // first time invocation - must resolve first ++ int i = (int)bytecode(); ++ __ move(tmp, i); ++ __ call_VM(result, entry, tmp); ++ ++ __ bind(resolved); ++ ++ if (VerifyOops) { ++ __ verify_oop(result); ++ } ++} ++ ++ ++// used register: T2, T3, T1 ++// T2 : index ++// T3 : cpool ++// T1 : tag ++void TemplateTable::ldc2_w() { ++ transition(vtos, vtos); ++ Label Long, Done; ++ ++ // get index in cpool ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); ++ ++ __ get_cpool_and_tags(T3, T1); ++ ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type in T1 ++ if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) { ++ __ gslbx(T1, T1, T2, tags_offset); ++ } else { ++ __ daddu(AT, T1, T2); ++ __ lb(T1, AT, tags_offset); ++ } ++ ++ __ daddiu(AT, T1, - JVM_CONSTANT_Double); ++ __ bne(AT, R0, Long); ++ __ delayed()->dsll(T2, T2, Address::times_8); ++ ++ // dtos ++ if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) { ++ __ gsldxc1(FSF, T3, T2, base_offset); ++ } else { ++ __ daddu(AT, T3, T2); ++ __ ldc1(FSF, AT, base_offset); ++ } ++ __ push(dtos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ltos ++ __ bind(Long); ++ if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) { ++ __ gsldx(FSR, T3, T2, base_offset); ++ } else { ++ __ daddu(AT, T3, T2); ++ __ ld(FSR, AT, base_offset); ++ } ++ __ push(ltos); ++ ++ __ bind(Done); ++} ++ ++// we compute the actual local variable address here ++// the x86 dont do so for it has scaled index memory access model, we dont have, so do here ++void TemplateTable::locals_index(Register reg, int offset) { ++ __ lbu(reg, at_bcp(offset)); ++ __ dsll(reg, reg, Address::times_8); ++ __ dsubu(reg, LVP, reg); ++} ++ ++// this method will do bytecode folding of the two form: ++// iload iload iload caload ++// used register : T2, T3 ++// T2 : bytecode ++// T3 : folded code ++void TemplateTable::iload() { ++ transition(vtos, itos); ++ if (RewriteFrequentPairs) { ++ Label rewrite, done; ++ // get the next bytecode in T2 ++ __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); ++ // if _iload, wait to rewrite to iload2. We only want to rewrite the ++ // last two iloads in a pair. Comparing against fast_iload means that ++ // the next bytecode is neither an iload or a caload, and therefore ++ // an iload pair. ++ __ move(AT, Bytecodes::_iload); ++ __ beq(AT, T2, done); ++ __ delayed()->nop(); ++ ++ __ move(T3, Bytecodes::_fast_iload2); ++ __ move(AT, Bytecodes::_fast_iload); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // if _caload, rewrite to fast_icaload ++ __ move(T3, Bytecodes::_fast_icaload); ++ __ move(AT, Bytecodes::_caload); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // rewrite so iload doesn't check again. ++ __ move(T3, Bytecodes::_fast_iload); ++ ++ // rewrite ++ // T3 : fast bytecode ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_iload, T3, T2, false); ++ __ bind(done); ++ } ++ ++ // Get the local value into tos ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fast_iload2() { ++ transition(vtos, itos); ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++ __ push(itos); ++ locals_index(T2, 3); ++ __ lw(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fast_iload() { ++ transition(vtos, itos); ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::lload() { ++ transition(vtos, ltos); ++ locals_index(T2); ++ __ ld(FSR, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fload() { ++ transition(vtos, ftos); ++ locals_index(T2); ++ __ lwc1(FSF, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::dload() { ++ transition(vtos, dtos); ++ locals_index(T2); ++ __ ldc1(FSF, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::aload() { ++ transition(vtos, atos); ++ locals_index(T2); ++ __ ld(FSR, T2, 0); ++} ++ ++void TemplateTable::locals_index_wide(Register reg) { ++ __ get_unsigned_2_byte_index_at_bcp(reg, 2); ++ __ dsll(reg, reg, Address::times_8); ++ __ dsubu(reg, LVP, reg); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_iload() { ++ transition(vtos, itos); ++ locals_index_wide(T2); ++ __ ld(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_lload() { ++ transition(vtos, ltos); ++ locals_index_wide(T2); ++ __ ld(FSR, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_fload() { ++ transition(vtos, ftos); ++ locals_index_wide(T2); ++ __ lwc1(FSF, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_dload() { ++ transition(vtos, dtos); ++ locals_index_wide(T2); ++ __ ldc1(FSF, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_aload() { ++ transition(vtos, atos); ++ locals_index_wide(T2); ++ __ ld(FSR, T2, 0); ++} ++ ++// we use A2 as the regiser for index, BE CAREFUL! ++// we dont use our tge 29 now, for later optimization ++void TemplateTable::index_check(Register array, Register index) { ++ // Pop ptr into array ++ __ pop_ptr(array); ++ index_check_without_pop(array, index); ++} ++ ++void TemplateTable::index_check_without_pop(Register array, Register index) { ++ // destroys A2 ++ // check array ++ __ null_check(array, arrayOopDesc::length_offset_in_bytes()); ++ ++ // sign extend since tos (index) might contain garbage in upper bits ++ __ sll(index, index, 0); ++ ++ // check index ++ Label ok; ++ __ lw(AT, array, arrayOopDesc::length_offset_in_bytes()); ++#ifndef OPT_RANGECHECK ++ __ sltu(AT, index, AT); ++ __ bne(AT, R0, ok); ++ __ delayed()->nop(); ++ ++ //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2 ++ if (A2 != index) __ move(A2, index); ++ __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); ++ __ delayed()->nop(); ++ __ bind(ok); ++#else ++ __ lw(AT, array, arrayOopDesc::length_offset_in_bytes()); ++ __ move(A2, index); ++ __ tgeu(A2, AT, 29); ++#endif ++} ++ ++void TemplateTable::iaload() { ++ transition(itos, itos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, 2); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT)); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, 2); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT)); ++ ++ __ gslwle(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, 2); ++ if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_INT), 8)) { ++ __ gslwx(FSR, FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT)); ++ } else { ++ __ daddu(FSR, SSR, FSR); ++ __ lw(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT)); ++ } ++ } ++} ++ ++void TemplateTable::laload() { ++ transition(itos, ltos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, Address::times_8); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); ++ ++ __ gsldle(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(AT, FSR, Address::times_8); ++ if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_LONG), 8)) { ++ __ gsldx(FSR, SSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG)); ++ } else { ++ __ daddu(AT, SSR, AT); ++ __ ld(FSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG)); ++ } ++ } ++} ++ ++void TemplateTable::faload() { ++ transition(itos, ftos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ shl(FSR, 2); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ shl(AT, 2); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); ++ ++ __ gslwlec1(FSF, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ shl(FSR, 2); ++ if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_FLOAT), 8)) { ++ __ gslwxc1(FSF, SSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); ++ } else { ++ __ daddu(FSR, SSR, FSR); ++ __ lwc1(FSF, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); ++ } ++ } ++} ++ ++void TemplateTable::daload() { ++ transition(itos, dtos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, 3); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, 3); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); ++ ++ __ gsldlec1(FSF, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(AT, FSR, 3); ++ if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_DOUBLE), 8)) { ++ __ gsldxc1(FSF, SSR, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)); ++ } else { ++ __ daddu(AT, SSR, AT); ++ __ ldc1(FSF, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)); ++ } ++ } ++} ++ ++void TemplateTable::aaload() { ++ transition(itos, atos); ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, UseCompressedOops ? Address::times_4 : Address::times_8); ++ __ daddu(FSR, SSR, FSR); ++ //add for compressedoops ++ __ load_heap_oop(FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); ++} ++ ++void TemplateTable::baload() { ++ transition(itos, itos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR:index ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound ++ ++ __ gslble(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_BYTE), 8)) { ++ __ gslbx(FSR, SSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); ++ } else { ++ __ daddu(FSR, SSR, FSR); ++ __ lb(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); ++ } ++ } ++} ++ ++void TemplateTable::caload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, Address::times_2); ++ __ daddu(FSR, SSR, FSR); ++ __ lhu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); ++} ++ ++// iload followed by caload frequent pair ++// used register : T2 ++// T2 : index ++void TemplateTable::fast_icaload() { ++ transition(vtos, itos); ++ // load index out of locals ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, 1); ++ __ daddu(FSR, SSR, FSR); ++ __ lhu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); ++} ++ ++void TemplateTable::saload() { ++ transition(itos, itos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, Address::times_2); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, Address::times_2); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT)); ++ ++ __ gslhle(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, Address::times_2); ++ if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_SHORT), 8)) { ++ __ gslhx(FSR, SSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)); ++ } else { ++ __ daddu(FSR, SSR, FSR); ++ __ lh(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)); ++ } ++ } ++} ++ ++void TemplateTable::iload(int n) { ++ transition(vtos, itos); ++ __ lw(FSR, iaddress(n)); ++} ++ ++void TemplateTable::lload(int n) { ++ transition(vtos, ltos); ++ __ ld(FSR, laddress(n)); ++} ++ ++void TemplateTable::fload(int n) { ++ transition(vtos, ftos); ++ __ lwc1(FSF, faddress(n)); ++} ++ ++void TemplateTable::dload(int n) { ++ transition(vtos, dtos); ++ __ ldc1(FSF, laddress(n)); ++} ++ ++void TemplateTable::aload(int n) { ++ transition(vtos, atos); ++ __ ld(FSR, aaddress(n)); ++} ++ ++// used register : T2, T3 ++// T2 : bytecode ++// T3 : folded code ++void TemplateTable::aload_0() { ++ transition(vtos, atos); ++ // According to bytecode histograms, the pairs: ++ // ++ // _aload_0, _fast_igetfield ++ // _aload_0, _fast_agetfield ++ // _aload_0, _fast_fgetfield ++ // ++ // occur frequently. If RewriteFrequentPairs is set, the (slow) ++ // _aload_0 bytecode checks if the next bytecode is either ++ // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then ++ // rewrites the current bytecode into a pair bytecode; otherwise it ++ // rewrites the current bytecode into _fast_aload_0 that doesn't do ++ // the pair check anymore. ++ // ++ // Note: If the next bytecode is _getfield, the rewrite must be ++ // delayed, otherwise we may miss an opportunity for a pair. ++ // ++ // Also rewrite frequent pairs ++ // aload_0, aload_1 ++ // aload_0, iload_1 ++ // These bytecodes with a small amount of code are most profitable ++ // to rewrite ++ if (RewriteFrequentPairs) { ++ Label rewrite, done; ++ // get the next bytecode in T2 ++ __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); ++ ++ // do actual aload_0 ++ aload(0); ++ ++ // if _getfield then wait with rewrite ++ __ move(AT, Bytecodes::_getfield); ++ __ beq(AT, T2, done); ++ __ delayed()->nop(); ++ ++ // if _igetfield then reqrite to _fast_iaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_iaccess_0); ++ __ move(AT, Bytecodes::_fast_igetfield); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // if _agetfield then reqrite to _fast_aaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_aaccess_0); ++ __ move(AT, Bytecodes::_fast_agetfield); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // if _fgetfield then reqrite to _fast_faccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_faccess_0); ++ __ move(AT, Bytecodes::_fast_fgetfield); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // else rewrite to _fast_aload0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_aload_0); ++ ++ // rewrite ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_aload_0, T3, T2, false); ++ ++ __ bind(done); ++ } else { ++ aload(0); ++ } ++} ++ ++void TemplateTable::istore() { ++ transition(itos, vtos); ++ locals_index(T2); ++ __ sw(FSR, T2, 0); ++} ++ ++void TemplateTable::lstore() { ++ transition(ltos, vtos); ++ locals_index(T2); ++ __ sd(FSR, T2, -wordSize); ++} ++ ++void TemplateTable::fstore() { ++ transition(ftos, vtos); ++ locals_index(T2); ++ __ swc1(FSF, T2, 0); ++} ++ ++void TemplateTable::dstore() { ++ transition(dtos, vtos); ++ locals_index(T2); ++ __ sdc1(FSF, T2, -wordSize); ++} ++ ++void TemplateTable::astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ locals_index(T2); ++ __ sd(FSR, T2, 0); ++} ++ ++void TemplateTable::wide_istore() { ++ transition(vtos, vtos); ++ __ pop_i(FSR); ++ locals_index_wide(T2); ++ __ sd(FSR, T2, 0); ++} ++ ++void TemplateTable::wide_lstore() { ++ transition(vtos, vtos); ++ __ pop_l(FSR); ++ locals_index_wide(T2); ++ __ sd(FSR, T2, -wordSize); ++} ++ ++void TemplateTable::wide_fstore() { ++ wide_istore(); ++} ++ ++void TemplateTable::wide_dstore() { ++ wide_lstore(); ++} ++ ++void TemplateTable::wide_astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ locals_index_wide(T2); ++ __ sd(FSR, T2, 0); ++} ++ ++// used register : T2 ++void TemplateTable::iastore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); // T2: array SSR: index ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T2); ++ __ dsll(SSR, SSR, Address::times_4); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_4); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT)); //bound ++ ++ __ gsswle(FSR, SSR, AT); ++ } else { ++ index_check(T2, SSR); // prefer index in SSR ++ __ dsll(SSR, SSR, Address::times_4); ++ if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_INT), 8)) { ++ __ gsswx(FSR, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_INT)); ++ } else { ++ __ daddu(T2, T2, SSR); ++ __ sw(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_INT)); ++ } ++ } ++} ++ ++ ++ ++// used register T2, T3 ++void TemplateTable::lastore() { ++ transition(ltos, vtos); ++ __ pop_i (T2); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T3); ++ __ dsll(T2, T2, Address::times_8); ++ __ daddu(T2, T3, T2); ++ __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); // base ++ ++ __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); //bound ++ ++ __ gssdle(FSR, T2, AT); ++ } else { ++ index_check(T3, T2); ++ __ dsll(T2, T2, Address::times_8); ++ if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_LONG), 8)) { ++ __ gssdx(FSR, T3, T2, arrayOopDesc::base_offset_in_bytes(T_LONG)); ++ } else { ++ __ daddu(T3, T3, T2); ++ __ sd(FSR, T3, arrayOopDesc::base_offset_in_bytes(T_LONG)); ++ } ++ } ++} ++ ++// used register T2 ++void TemplateTable::fastore() { ++ transition(ftos, vtos); ++ __ pop_i(SSR); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T2); ++ __ dsll(SSR, SSR, Address::times_4); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_4); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); //bound ++ ++ __ gsswlec1(FSF, SSR, AT); ++ } else { ++ index_check(T2, SSR); ++ __ dsll(SSR, SSR, Address::times_4); ++ if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_FLOAT), 8)) { ++ __ gsswxc1(FSF, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); ++ } else { ++ __ daddu(T2, T2, SSR); ++ __ swc1(FSF, T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); ++ } ++ } ++} ++ ++// used register T2, T3 ++void TemplateTable::dastore() { ++ transition(dtos, vtos); ++ __ pop_i (T2); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T3); ++ __ dsll(T2, T2, Address::times_8); ++ __ daddu(T2, T3, T2); ++ __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); // base ++ ++ __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); //bound ++ ++ __ gssdlec1(FSF, T2, AT); ++ } else { ++ index_check(T3, T2); ++ __ dsll(T2, T2, Address::times_8); ++ if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_DOUBLE), 8)) { ++ __ gssdxc1(FSF, T3, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)); ++ } else { ++ __ daddu(T3, T3, T2); ++ __ sdc1(FSF, T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)); ++ } ++ } ++} ++ ++// used register : T2, T3, T8 ++// T2 : array ++// T3 : subklass ++// T8 : supklass ++void TemplateTable::aastore() { ++ Label is_null, ok_is_subtype, done; ++ transition(vtos, vtos); ++ // stack: ..., array, index, value ++ __ ld(FSR, at_tos()); // Value ++ __ lw(SSR, at_tos_p1()); // Index ++ __ ld(T2, at_tos_p2()); // Array ++ ++ // index_check(T2, SSR); ++ index_check_without_pop(T2, SSR); ++ // do array store check - check for NULL value first ++ __ beq(FSR, R0, is_null); ++ __ delayed()->nop(); ++ ++ // Move subklass into T3 ++ //add for compressedoops ++ __ load_klass(T3, FSR); ++ // Move superklass into T8 ++ //add for compressedoops ++ __ load_klass(T8, T2); ++ __ ld(T8, Address(T8, ObjArrayKlass::element_klass_offset())); ++ // Compress array+index*4+12 into a single register. T2 ++ __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8); ++ __ daddu(T2, T2, AT); ++ __ daddiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ ++ // Generate subtype check. ++ // Superklass in T8. Subklass in T3. ++ __ gen_subtype_check(T8, T3, ok_is_subtype); ++ // Come here on failure ++ // object is at FSR ++ __ jmp(Interpreter::_throw_ArrayStoreException_entry); ++ __ delayed()->nop(); ++ // Come here on success ++ __ bind(ok_is_subtype); ++ do_oop_store(_masm, Address(T2, 0), FSR, _bs->kind(), true); ++ __ b(done); ++ __ delayed()->nop(); ++ ++ // Have a NULL in FSR, T2=array, SSR=index. Store NULL at ary[idx] ++ __ bind(is_null); ++ __ profile_null_seen(T9); ++ __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8); ++ __ daddu(T2, T2, AT); ++ do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, _bs->kind(), true); ++ ++ __ bind(done); ++ __ daddiu(SP, SP, 3 * Interpreter::stackElementSize); ++} ++ ++void TemplateTable::bastore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); ++ if(UseBoundCheckInstruction) { ++ guarantee(false, "unimplemented yet!"); ++ __ pop_ptr(T2); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound ++ ++ __ gssble(FSR, SSR, AT); ++ } else { ++ index_check(T2, SSR); ++ ++ // Need to check whether array is boolean or byte ++ // since both types share the bastore bytecode. ++ __ load_klass(T9, T2); ++ __ lw(T9, T9, in_bytes(Klass::layout_helper_offset())); ++ ++ int diffbit = Klass::layout_helper_boolean_diffbit(); ++ __ move(AT, diffbit); ++ ++ Label L_skip; ++ __ andr(AT, T9, AT); ++ __ beq(AT, R0, L_skip); ++ __ delayed()->nop(); ++ __ andi(FSR, FSR, 0x1); ++ __ bind(L_skip); ++ ++ if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_BYTE), 8)) { ++ __ gssbx(FSR, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); ++ } else { ++ __ daddu(SSR, T2, SSR); ++ __ sb(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); ++ } ++ } ++} ++ ++void TemplateTable::castore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T2); ++ __ dsll(SSR, SSR, Address::times_2); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_2); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR)); //bound ++ ++ __ gsshle(FSR, SSR, AT); ++ } else { ++ index_check(T2, SSR); ++ __ dsll(SSR, SSR, Address::times_2); ++ if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_CHAR), 8)) { ++ __ gsshx(FSR, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); ++ } else { ++ __ daddu(SSR, T2, SSR); ++ __ sh(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); ++ } ++ } ++} ++ ++void TemplateTable::sastore() { ++ castore(); ++} ++ ++void TemplateTable::istore(int n) { ++ transition(itos, vtos); ++ __ sw(FSR, iaddress(n)); ++} ++ ++void TemplateTable::lstore(int n) { ++ transition(ltos, vtos); ++ __ sd(FSR, laddress(n)); ++} ++ ++void TemplateTable::fstore(int n) { ++ transition(ftos, vtos); ++ __ swc1(FSF, faddress(n)); ++} ++ ++void TemplateTable::dstore(int n) { ++ transition(dtos, vtos); ++ __ sdc1(FSF, laddress(n)); ++} ++ ++void TemplateTable::astore(int n) { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ __ sd(FSR, aaddress(n)); ++} ++ ++void TemplateTable::pop() { ++ transition(vtos, vtos); ++ __ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void TemplateTable::pop2() { ++ transition(vtos, vtos); ++ __ daddiu(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void TemplateTable::dup() { ++ transition(vtos, vtos); ++ // stack: ..., a ++ __ load_ptr(0, FSR); ++ __ push_ptr(FSR); ++ // stack: ..., a, a ++} ++ ++// blows FSR ++void TemplateTable::dup_x1() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ load_ptr(0, FSR); // load b ++ __ load_ptr(1, A5); // load a ++ __ store_ptr(1, FSR); // store b ++ __ store_ptr(0, A5); // store a ++ __ push_ptr(FSR); // push b ++ // stack: ..., b, a, b ++} ++ ++// blows FSR ++void TemplateTable::dup_x2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ load_ptr(0, FSR); // load c ++ __ load_ptr(2, A5); // load a ++ __ store_ptr(2, FSR); // store c in a ++ __ push_ptr(FSR); // push c ++ // stack: ..., c, b, c, c ++ __ load_ptr(2, FSR); // load b ++ __ store_ptr(2, A5); // store a in b ++ // stack: ..., c, a, c, c ++ __ store_ptr(1, FSR); // store b in c ++ // stack: ..., c, a, b, c ++} ++ ++// blows FSR ++void TemplateTable::dup2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ load_ptr(1, FSR); // load a ++ __ push_ptr(FSR); // push a ++ __ load_ptr(1, FSR); // load b ++ __ push_ptr(FSR); // push b ++ // stack: ..., a, b, a, b ++} ++ ++// blows FSR ++void TemplateTable::dup2_x1() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ load_ptr(0, T2); // load c ++ __ load_ptr(1, FSR); // load b ++ __ push_ptr(FSR); // push b ++ __ push_ptr(T2); // push c ++ // stack: ..., a, b, c, b, c ++ __ store_ptr(3, T2); // store c in b ++ // stack: ..., a, c, c, b, c ++ __ load_ptr(4, T2); // load a ++ __ store_ptr(2, T2); // store a in 2nd c ++ // stack: ..., a, c, a, b, c ++ __ store_ptr(4, FSR); // store b in a ++ // stack: ..., b, c, a, b, c ++ ++ // stack: ..., b, c, a, b, c ++} ++ ++// blows FSR, SSR ++void TemplateTable::dup2_x2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c, d ++ // stack: ..., a, b, c, d ++ __ load_ptr(0, T2); // load d ++ __ load_ptr(1, FSR); // load c ++ __ push_ptr(FSR); // push c ++ __ push_ptr(T2); // push d ++ // stack: ..., a, b, c, d, c, d ++ __ load_ptr(4, FSR); // load b ++ __ store_ptr(2, FSR); // store b in d ++ __ store_ptr(4, T2); // store d in b ++ // stack: ..., a, d, c, b, c, d ++ __ load_ptr(5, T2); // load a ++ __ load_ptr(3, FSR); // load c ++ __ store_ptr(3, T2); // store a in c ++ __ store_ptr(5, FSR); // store c in a ++ // stack: ..., c, d, a, b, c, d ++ ++ // stack: ..., c, d, a, b, c, d ++} ++ ++// blows FSR ++void TemplateTable::swap() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ ++ __ load_ptr(1, A5); // load a ++ __ load_ptr(0, FSR); // load b ++ __ store_ptr(0, A5); // store a in b ++ __ store_ptr(1, FSR); // store b in a ++ ++ // stack: ..., b, a ++} ++ ++void TemplateTable::iop2(Operation op) { ++ transition(itos, itos); ++ ++ __ pop_i(SSR); ++ switch (op) { ++ case add : __ addu32(FSR, SSR, FSR); break; ++ case sub : __ subu32(FSR, SSR, FSR); break; ++ case mul : __ mul(FSR, SSR, FSR); break; ++ case _and : __ andr(FSR, SSR, FSR); break; ++ case _or : __ orr(FSR, SSR, FSR); break; ++ case _xor : __ xorr(FSR, SSR, FSR); break; ++ case shl : __ sllv(FSR, SSR, FSR); break; ++ case shr : __ srav(FSR, SSR, FSR); break; ++ case ushr : __ srlv(FSR, SSR, FSR); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++// the result stored in FSR, SSR, ++// used registers : T2, T3 ++void TemplateTable::lop2(Operation op) { ++ transition(ltos, ltos); ++ __ pop_l(T2); ++ ++ switch (op) { ++ case add : __ daddu(FSR, T2, FSR); break; ++ case sub : __ dsubu(FSR, T2, FSR); break; ++ case _and: __ andr(FSR, T2, FSR); break; ++ case _or : __ orr(FSR, T2, FSR); break; ++ case _xor: __ xorr(FSR, T2, FSR); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception, ++// the result is 0x80000000 ++// the godson2 cpu do the same, so we need not handle this specially like x86 ++void TemplateTable::idiv() { ++ transition(itos, itos); ++ Label not_zero; ++ ++ __ bne(FSR, R0, not_zero); ++ __ delayed()->nop(); ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ __ bind(not_zero); ++ ++ __ pop_i(SSR); ++ if (UseLEXT1) { ++ __ gsdiv(FSR, SSR, FSR); ++ } else { ++ __ div(SSR, FSR); ++ __ mflo(FSR); ++ } ++} ++ ++void TemplateTable::irem() { ++ transition(itos, itos); ++ Label not_zero; ++ __ pop_i(SSR); ++ __ div(SSR, FSR); ++ ++ __ bne(FSR, R0, not_zero); ++ __ delayed()->nop(); ++ //__ brk(7); ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ ++ __ bind(not_zero); ++ __ mfhi(FSR); ++} ++ ++void TemplateTable::lmul() { ++ transition(ltos, ltos); ++ __ pop_l(T2); ++ if (UseLEXT1) { ++ __ gsdmult(FSR, T2, FSR); ++ } else { ++ __ dmult(T2, FSR); ++ __ mflo(FSR); ++ } ++} ++ ++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry ++void TemplateTable::ldiv() { ++ transition(ltos, ltos); ++ Label normal; ++ ++ __ bne(FSR, R0, normal); ++ __ delayed()->nop(); ++ ++ //__ brk(7); //generate FPE ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ ++ __ bind(normal); ++ __ pop_l(A2); ++ if (UseLEXT1) { ++ __ gsddiv(FSR, A2, FSR); ++ } else { ++ __ ddiv(A2, FSR); ++ __ mflo(FSR); ++ } ++} ++ ++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry ++void TemplateTable::lrem() { ++ transition(ltos, ltos); ++ Label normal; ++ ++ __ bne(FSR, R0, normal); ++ __ delayed()->nop(); ++ ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ ++ __ bind(normal); ++ __ pop_l (A2); ++ ++ if (UseLEXT1) { ++ __ gsdmod(FSR, A2, FSR); ++ } else { ++ __ ddiv(A2, FSR); ++ __ mfhi(FSR); ++ } ++} ++ ++// result in FSR ++// used registers : T0 ++void TemplateTable::lshl() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ dsllv(FSR, T0, FSR); ++} ++ ++// used registers : T0 ++void TemplateTable::lshr() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ dsrav(FSR, T0, FSR); ++} ++ ++// used registers : T0 ++void TemplateTable::lushr() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ dsrlv(FSR, T0, FSR); ++} ++ ++// result in FSF ++void TemplateTable::fop2(Operation op) { ++ transition(ftos, ftos); ++ switch (op) { ++ case add: ++ __ lwc1(FTF, at_sp()); ++ __ add_s(FSF, FTF, FSF); ++ break; ++ case sub: ++ __ lwc1(FTF, at_sp()); ++ __ sub_s(FSF, FTF, FSF); ++ break; ++ case mul: ++ __ lwc1(FTF, at_sp()); ++ __ mul_s(FSF, FTF, FSF); ++ break; ++ case div: ++ __ lwc1(FTF, at_sp()); ++ __ div_s(FSF, FTF, FSF); ++ break; ++ case rem: ++ __ mov_s(F13, FSF); ++ __ lwc1(F12, at_sp()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); ++ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ __ daddiu(SP, SP, 1 * wordSize); ++} ++ ++// result in SSF||FSF ++// i dont handle the strict flags ++void TemplateTable::dop2(Operation op) { ++ transition(dtos, dtos); ++ switch (op) { ++ case add: ++ __ ldc1(FTF, at_sp()); ++ __ add_d(FSF, FTF, FSF); ++ break; ++ case sub: ++ __ ldc1(FTF, at_sp()); ++ __ sub_d(FSF, FTF, FSF); ++ break; ++ case mul: ++ __ ldc1(FTF, at_sp()); ++ __ mul_d(FSF, FTF, FSF); ++ break; ++ case div: ++ __ ldc1(FTF, at_sp()); ++ __ div_d(FSF, FTF, FSF); ++ break; ++ case rem: ++ __ mov_d(F13, FSF); ++ __ ldc1(F12, at_sp()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); ++ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ __ daddiu(SP, SP, 2 * wordSize); ++} ++ ++void TemplateTable::ineg() { ++ transition(itos, itos); ++ __ subu32(FSR, R0, FSR); ++} ++ ++void TemplateTable::lneg() { ++ transition(ltos, ltos); ++ __ dsubu(FSR, R0, FSR); ++} ++ ++void TemplateTable::fneg() { ++ transition(ftos, ftos); ++ __ neg_s(FSF, FSF); ++} ++ ++void TemplateTable::dneg() { ++ transition(dtos, dtos); ++ __ neg_d(FSF, FSF); ++} ++ ++// used registers : T2 ++void TemplateTable::iinc() { ++ transition(vtos, vtos); ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++ __ lb(AT, at_bcp(2)); // get constant ++ __ daddu(FSR, FSR, AT); ++ __ sw(FSR, T2, 0); ++} ++ ++// used register : T2 ++void TemplateTable::wide_iinc() { ++ transition(vtos, vtos); ++ locals_index_wide(T2); ++ __ get_2_byte_integer_at_bcp(FSR, AT, 4); ++ __ hswap(FSR); ++ __ lw(AT, T2, 0); ++ __ daddu(FSR, AT, FSR); ++ __ sw(FSR, T2, 0); ++} ++ ++void TemplateTable::convert() { ++ // Checking ++#ifdef ASSERT ++ { ++ TosState tos_in = ilgl; ++ TosState tos_out = ilgl; ++ switch (bytecode()) { ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_in = itos; break; ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_l2d: tos_in = ltos; break; ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_f2d: tos_in = ftos; break; ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_d2l: // fall through ++ case Bytecodes::_d2f: tos_in = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ switch (bytecode()) { ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_out = itos; break; ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_d2l: tos_out = ltos; break; ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_d2f: tos_out = ftos; break; ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_l2d: // fall through ++ case Bytecodes::_f2d: tos_out = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ transition(tos_in, tos_out); ++ } ++#endif // ASSERT ++ ++ // Conversion ++ switch (bytecode()) { ++ case Bytecodes::_i2l: ++ __ sll(FSR, FSR, 0); ++ break; ++ case Bytecodes::_i2f: ++ __ mtc1(FSR, FSF); ++ __ cvt_s_w(FSF, FSF); ++ break; ++ case Bytecodes::_i2d: ++ __ mtc1(FSR, FSF); ++ __ cvt_d_w(FSF, FSF); ++ break; ++ case Bytecodes::_i2b: ++ __ seb(FSR, FSR); ++ break; ++ case Bytecodes::_i2c: ++ __ andi(FSR, FSR, 0xFFFF); // truncate upper 56 bits ++ break; ++ case Bytecodes::_i2s: ++ __ seh(FSR, FSR); ++ break; ++ case Bytecodes::_l2i: ++ __ sll(FSR, FSR, 0); ++ break; ++ case Bytecodes::_l2f: ++ __ dmtc1(FSR, FSF); ++ __ cvt_s_l(FSF, FSF); ++ break; ++ case Bytecodes::_l2d: ++ __ dmtc1(FSR, FSF); ++ __ cvt_d_l(FSF, FSF); ++ break; ++ case Bytecodes::_f2i: ++ { ++ Label L; ++ ++ __ trunc_w_s(F12, FSF); ++ __ move(AT, 0x7fffffff); ++ __ mfc1(FSR, F12); ++ __ c_un_s(FSF, FSF); //NaN? ++ __ movt(FSR, R0); ++ ++ __ bne(AT, FSR, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, FSF); ++ __ andr(AT, AT, T9); ++ ++ __ movn(FSR, T9, AT); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_f2l: ++ { ++ Label L; ++ ++ __ trunc_l_s(F12, FSF); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(FSR, F12); ++ __ c_un_s(FSF, FSF); //NaN? ++ __ movt(FSR, R0); ++ ++ __ bne(AT, FSR, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, FSF); ++ __ andr(AT, AT, T9); ++ ++ __ dsll32(T9, T9, 0); ++ __ movn(FSR, T9, AT); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_f2d: ++ __ cvt_d_s(FSF, FSF); ++ break; ++ case Bytecodes::_d2i: ++ { ++ Label L; ++ ++ __ trunc_w_d(F12, FSF); ++ __ move(AT, 0x7fffffff); ++ __ mfc1(FSR, F12); ++ ++ __ bne(FSR, AT, L); ++ __ delayed()->mtc1(R0, F12); ++ ++ __ cvt_d_w(F12, F12); ++ __ c_ult_d(FSF, F12); ++ __ bc1f(L); ++ __ delayed()->addiu(T9, R0, -1); ++ ++ __ c_un_d(FSF, FSF); //NaN? ++ __ subu32(FSR, T9, AT); ++ __ movt(FSR, R0); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_d2l: ++ { ++ Label L; ++ ++ __ trunc_l_d(F12, FSF); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(FSR, F12); ++ ++ __ bne(FSR, AT, L); ++ __ delayed()->mtc1(R0, F12); ++ ++ __ cvt_d_w(F12, F12); ++ __ c_ult_d(FSF, F12); ++ __ bc1f(L); ++ __ delayed()->daddiu(T9, R0, -1); ++ ++ __ c_un_d(FSF, FSF); //NaN? ++ __ subu(FSR, T9, AT); ++ __ movt(FSR, R0); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_d2f: ++ __ cvt_s_d(FSF, FSF); ++ break; ++ default : ++ ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::lcmp() { ++ transition(ltos, itos); ++ ++ Label low, high, done; ++ __ pop(T0); ++ __ pop(R0); ++ __ slt(AT, T0, FSR); ++ __ bne(AT, R0, low); ++ __ delayed()->nop(); ++ ++ __ bne(T0, FSR, high); ++ __ delayed()->nop(); ++ ++ __ li(FSR, (long)0); ++ __ b(done); ++ __ delayed()->nop(); ++ ++ __ bind(low); ++ __ li(FSR, (long)-1); ++ __ b(done); ++ __ delayed()->nop(); ++ ++ __ bind(high); ++ __ li(FSR, (long)1); ++ __ b(done); ++ __ delayed()->nop(); ++ ++ __ bind(done); ++} ++ ++void TemplateTable::float_cmp(bool is_float, int unordered_result) { ++ Label less, done; ++ ++ __ move(FSR, R0); ++ ++ if (is_float) { ++ __ lwc1(FTF, at_sp()); ++ __ c_eq_s(FTF, FSF); ++ __ bc1t(done); ++ __ delayed()->daddiu(SP, SP, 1 * wordSize); ++ ++ if (unordered_result<0) ++ __ c_ult_s(FTF, FSF); ++ else ++ __ c_olt_s(FTF, FSF); ++ } else { ++ __ ldc1(FTF, at_sp()); ++ __ c_eq_d(FTF, FSF); ++ __ bc1t(done); ++ __ delayed()->daddiu(SP, SP, 2 * wordSize); ++ ++ if (unordered_result<0) ++ __ c_ult_d(FTF, FSF); ++ else ++ __ c_olt_d(FTF, FSF); ++ } ++ __ bc1t(less); ++ __ delayed()->nop(); ++ __ move(FSR, 1); ++ __ b(done); ++ __ delayed()->nop(); ++ __ bind(less); ++ __ move(FSR, -1); ++ __ bind(done); ++} ++ ++ ++// used registers : T3, A7, Rnext ++// FSR : return bci, this is defined by the vm specification ++// T2 : MDO taken count ++// T3 : method ++// A7 : offset ++// Rnext : next bytecode, this is required by dispatch_base ++void TemplateTable::branch(bool is_jsr, bool is_wide) { ++ __ get_method(T3); ++ __ profile_taken_branch(A7, T2); // only C2 meaningful ++ ++ const ByteSize be_offset = MethodCounters::backedge_counter_offset() + ++ InvocationCounter::counter_offset(); ++ const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset(); ++ ++ // Load up T4 with the branch displacement ++ if (!is_wide) { ++ __ lb(A7, BCP, 1); ++ __ lbu(AT, BCP, 2); ++ __ dsll(A7, A7, 8); ++ __ orr(A7, A7, AT); ++ } else { ++ __ get_4_byte_integer_at_bcp(A7, AT, 1); ++ __ swap(A7); ++ } ++ ++ // Handle all the JSR stuff here, then exit. ++ // It's much shorter and cleaner than intermingling with the non-JSR ++ // normal-branch stuff occuring below. ++ if (is_jsr) { ++ // Pre-load the next target bytecode into Rnext ++ __ daddu(AT, BCP, A7); ++ __ lbu(Rnext, AT, 0); ++ ++ // compute return address as bci in FSR ++ __ daddiu(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset())); ++ __ ld(AT, T3, in_bytes(Method::const_offset())); ++ __ dsubu(FSR, FSR, AT); ++ // Adjust the bcp in BCP by the displacement in A7 ++ __ daddu(BCP, BCP, A7); ++ // jsr returns atos that is not an oop ++ // Push return address ++ __ push_i(FSR); ++ // jsr returns vtos ++ __ dispatch_only_noverify(vtos); ++ ++ return; ++ } ++ ++ // Normal (non-jsr) branch handling ++ ++ // Adjust the bcp in S0 by the displacement in T4 ++ __ daddu(BCP, BCP, A7); ++ ++ assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters"); ++ Label backedge_counter_overflow; ++ Label profile_method; ++ Label dispatch; ++ if (UseLoopCounter) { ++ // increment backedge counter for backward branches ++ // T3: method ++ // T4: target offset ++ // BCP: target bcp ++ // LVP: locals pointer ++ __ bgtz(A7, dispatch); // check if forward or backward branch ++ __ delayed()->nop(); ++ ++ // check if MethodCounters exists ++ Label has_counters; ++ __ ld(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP ++ __ bne(AT, R0, has_counters); ++ __ delayed()->nop(); ++ __ push(T3); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), ++ T3); ++ __ pop(T3); ++ __ ld(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP ++ __ beq(AT, R0, dispatch); ++ __ delayed()->nop(); ++ __ bind(has_counters); ++ ++ if (TieredCompilation) { ++ Label no_mdo; ++ int increment = InvocationCounter::count_increment; ++ int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld(T0, Address(T3, in_bytes(Method::method_data_offset()))); ++ __ beq(T0, R0, no_mdo); ++ __ delayed()->nop(); ++ // Increment the MDO backedge counter ++ const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, ++ T1, false, Assembler::zero, &backedge_counter_overflow); ++ __ beq(R0, R0, dispatch); ++ __ delayed()->nop(); ++ } ++ __ bind(no_mdo); ++ // Increment backedge counter in MethodCounters* ++ __ ld(T0, Address(T3, Method::method_counters_offset())); ++ __ increment_mask_and_jump(Address(T0, be_offset), increment, mask, ++ T1, false, Assembler::zero, &backedge_counter_overflow); ++ if (!UseOnStackReplacement) { ++ __ bind(backedge_counter_overflow); ++ } ++ } else { ++ // increment back edge counter ++ __ ld(T1, T3, in_bytes(Method::method_counters_offset())); ++ __ lw(T0, T1, in_bytes(be_offset)); ++ __ increment(T0, InvocationCounter::count_increment); ++ __ sw(T0, T1, in_bytes(be_offset)); ++ ++ // load invocation counter ++ __ lw(T1, T1, in_bytes(inv_offset)); ++ // buffer bit added, mask no needed ++ ++ // daddu backedge counter & invocation counter ++ __ daddu(T1, T1, T0); ++ ++ if (ProfileInterpreter) { ++ // Test to see if we should create a method data oop ++ // T1 : backedge counter & invocation counter ++ if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) { ++ __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T1, AT); ++ } ++ ++ __ bne(AT, R0, dispatch); ++ __ delayed()->nop(); ++ ++ // if no method data exists, go to profile method ++ __ test_method_data_pointer(T1, profile_method); ++ ++ if (UseOnStackReplacement) { ++ if (Assembler::is_simm16(InvocationCounter::InterpreterBackwardBranchLimit)) { ++ __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T2, AT); ++ } ++ ++ __ bne(AT, R0, dispatch); ++ __ delayed()->nop(); ++ ++ // When ProfileInterpreter is on, the backedge_count comes ++ // from the methodDataOop, which value does not get reset on ++ // the call to frequency_counter_overflow(). ++ // To avoid excessive calls to the overflow routine while ++ // the method is being compiled, daddu a second test to make ++ // sure the overflow function is called only once every ++ // overflow_frequency. ++ const int overflow_frequency = 1024; ++ __ andi(AT, T2, overflow_frequency-1); ++ __ beq(AT, R0, backedge_counter_overflow); ++ __ delayed()->nop(); ++ } ++ } else { ++ if (UseOnStackReplacement) { ++ // check for overflow against AT, which is the sum of the counters ++ __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T1, AT); ++ __ beq(AT, R0, backedge_counter_overflow); ++ __ delayed()->nop(); ++ } ++ } ++ } ++ __ bind(dispatch); ++ } ++ ++ // Pre-load the next target bytecode into Rnext ++ __ lbu(Rnext, BCP, 0); ++ ++ // continue with the bytecode @ target ++ // FSR: return bci for jsr's, unused otherwise ++ // Rnext: target bytecode ++ // BCP: target bcp ++ __ dispatch_only(vtos); ++ ++ if (UseLoopCounter) { ++ if (ProfileInterpreter) { ++ // Out-of-line code to allocate method data oop. ++ __ bind(profile_method); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); ++ __ lbu(Rnext, BCP, 0); ++ __ set_method_data_pointer_for_bcp(); ++ __ b(dispatch); ++ __ delayed()->nop(); ++ } ++ ++ if (UseOnStackReplacement) { ++ // invocation counter overflow ++ __ bind(backedge_counter_overflow); ++ __ subu(A7, BCP, A7); // branch bcp ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), A7); ++ __ lbu(Rnext, BCP, 0); ++ ++ // V0: osr nmethod (osr ok) or NULL (osr not possible) ++ // V1: osr adapter frame return address ++ // Rnext: target bytecode ++ // LVP: locals pointer ++ // BCP: bcp ++ __ beq(V0, R0, dispatch); ++ __ delayed()->nop(); ++ // nmethod may have been invalidated (VM may block upon call_VM return) ++ __ lw(T3, V0, nmethod::entry_bci_offset()); ++ __ move(AT, InvalidOSREntryBci); ++ __ beq(AT, T3, dispatch); ++ __ delayed()->nop(); ++ // We need to prepare to execute the OSR method. First we must ++ // migrate the locals and monitors off of the stack. ++ //V0: osr nmethod (osr ok) or NULL (osr not possible) ++ //V1: osr adapter frame return address ++ //Rnext: target bytecode ++ //LVP: locals pointer ++ //BCP: bcp ++ __ move(BCP, V0); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); ++ ++ // V0 is OSR buffer, move it to expected parameter location ++ // refer to osrBufferPointer in c1_LIRAssembler_mips.cpp ++ __ move(T0, V0); ++ ++ // pop the interpreter frame ++ __ ld(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); ++ //FIXME, shall we keep the return address on the stack? ++ __ leave(); // remove frame anchor ++ __ move(LVP, RA); ++ __ move(SP, A7); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP , SP , AT); ++ ++ // push the (possibly adjusted) return address ++ //refer to osr_entry in c1_LIRAssembler_mips.cpp ++ __ ld(AT, BCP, nmethod::osr_entry_point_offset()); ++ __ jr(AT); ++ __ delayed()->nop(); ++ } ++ } ++} ++ ++ ++void TemplateTable::if_0cmp(Condition cc) { ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ switch(cc) { ++ case not_equal: ++ __ beq(FSR, R0, not_taken); ++ break; ++ case equal: ++ __ bne(FSR, R0, not_taken); ++ break; ++ case less: ++ __ bgez(FSR, not_taken); ++ break; ++ case less_equal: ++ __ bgtz(FSR, not_taken); ++ break; ++ case greater: ++ __ blez(FSR, not_taken); ++ break; ++ case greater_equal: ++ __ bltz(FSR, not_taken); ++ break; ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++void TemplateTable::if_icmp(Condition cc) { ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ ++ __ pop_i(SSR); ++ switch(cc) { ++ case not_equal: ++ __ beq(SSR, FSR, not_taken); ++ break; ++ case equal: ++ __ bne(SSR, FSR, not_taken); ++ break; ++ case less: ++ __ slt(AT, SSR, FSR); ++ __ beq(AT, R0, not_taken); ++ break; ++ case less_equal: ++ __ slt(AT, FSR, SSR); ++ __ bne(AT, R0, not_taken); ++ break; ++ case greater: ++ __ slt(AT, FSR, SSR); ++ __ beq(AT, R0, not_taken); ++ break; ++ case greater_equal: ++ __ slt(AT, SSR, FSR); ++ __ bne(AT, R0, not_taken); ++ break; ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++void TemplateTable::if_nullcmp(Condition cc) { ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ switch(cc) { ++ case not_equal: ++ __ beq(FSR, R0, not_taken); ++ break; ++ case equal: ++ __ bne(FSR, R0, not_taken); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++ ++void TemplateTable::if_acmp(Condition cc) { ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ // __ lw(SSR, SP, 0); ++ __ pop_ptr(SSR); ++ switch(cc) { ++ case not_equal: ++ __ beq(SSR, FSR, not_taken); ++ break; ++ case equal: ++ __ bne(SSR, FSR, not_taken); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : method ++// T2 : returb bci ++void TemplateTable::ret() { ++ transition(vtos, vtos); ++ ++ locals_index(T2); ++ __ ld(T2, T2, 0); ++ __ profile_ret(T2, T3); ++ ++ __ get_method(T1); ++ __ ld(BCP, T1, in_bytes(Method::const_offset())); ++ __ daddu(BCP, BCP, T2); ++ __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); ++ ++ __ dispatch_next(vtos); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : method ++// T2 : returb bci ++void TemplateTable::wide_ret() { ++ transition(vtos, vtos); ++ ++ locals_index_wide(T2); ++ __ ld(T2, T2, 0); // get return bci, compute return bcp ++ __ profile_ret(T2, T3); ++ ++ __ get_method(T1); ++ __ ld(BCP, T1, in_bytes(Method::const_offset())); ++ __ daddu(BCP, BCP, T2); ++ __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); ++ ++ __ dispatch_next(vtos); ++} ++ ++// used register T2, T3, A7, Rnext ++// T2 : bytecode pointer ++// T3 : low ++// A7 : high ++// Rnext : dest bytecode, required by dispatch_base ++void TemplateTable::tableswitch() { ++ Label default_case, continue_execution; ++ transition(itos, vtos); ++ ++ // align BCP ++ __ daddiu(T2, BCP, BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(T2, T2, AT); ++ ++ // load lo & hi ++ __ lw(T3, T2, 1 * BytesPerInt); ++ __ swap(T3); ++ __ lw(A7, T2, 2 * BytesPerInt); ++ __ swap(A7); ++ ++ // check against lo & hi ++ __ slt(AT, FSR, T3); ++ __ bne(AT, R0, default_case); ++ __ delayed()->nop(); ++ ++ __ slt(AT, A7, FSR); ++ __ bne(AT, R0, default_case); ++ __ delayed()->nop(); ++ ++ // lookup dispatch offset, in A7 big endian ++ __ dsubu(FSR, FSR, T3); ++ __ dsll(AT, FSR, Address::times_4); ++ __ daddu(AT, T2, AT); ++ __ lw(A7, AT, 3 * BytesPerInt); ++ __ profile_switch_case(FSR, T9, T3); ++ ++ __ bind(continue_execution); ++ __ swap(A7); ++ __ daddu(BCP, BCP, A7); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos); ++ ++ // handle default ++ __ bind(default_case); ++ __ profile_switch_default(FSR); ++ __ lw(A7, T2, 0); ++ __ b(continue_execution); ++ __ delayed()->nop(); ++} ++ ++void TemplateTable::lookupswitch() { ++ transition(itos, itos); ++ __ stop("lookupswitch bytecode should have been rewritten"); ++} ++ ++// used registers : T2, T3, A7, Rnext ++// T2 : bytecode pointer ++// T3 : pair index ++// A7 : offset ++// Rnext : dest bytecode ++// the data after the opcode is the same as lookupswitch ++// see Rewriter::rewrite_method for more information ++void TemplateTable::fast_linearswitch() { ++ transition(itos, vtos); ++ Label loop_entry, loop, found, continue_execution; ++ ++ // swap FSR so we can avoid swapping the table entries ++ __ swap(FSR); ++ ++ // align BCP ++ __ daddiu(T2, BCP, BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(T2, T2, AT); ++ ++ // set counter ++ __ lw(T3, T2, BytesPerInt); ++ __ swap(T3); ++ __ b(loop_entry); ++ __ delayed()->nop(); ++ ++ // table search ++ __ bind(loop); ++ // get the entry value ++ __ dsll(AT, T3, Address::times_8); ++ __ daddu(AT, T2, AT); ++ __ lw(AT, AT, 2 * BytesPerInt); ++ ++ // found? ++ __ beq(FSR, AT, found); ++ __ delayed()->nop(); ++ ++ __ bind(loop_entry); ++ __ bgtz(T3, loop); ++ __ delayed()->daddiu(T3, T3, -1); ++ ++ // default case ++ __ profile_switch_default(FSR); ++ __ lw(A7, T2, 0); ++ __ b(continue_execution); ++ __ delayed()->nop(); ++ ++ // entry found -> get offset ++ __ bind(found); ++ __ dsll(AT, T3, Address::times_8); ++ __ daddu(AT, T2, AT); ++ __ lw(A7, AT, 3 * BytesPerInt); ++ __ profile_switch_case(T3, FSR, T2); ++ ++ // continue execution ++ __ bind(continue_execution); ++ __ swap(A7); ++ __ daddu(BCP, BCP, A7); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos); ++} ++ ++// used registers : T0, T1, T2, T3, A7, Rnext ++// T2 : pairs address(array) ++// Rnext : dest bytecode ++// the data after the opcode is the same as lookupswitch ++// see Rewriter::rewrite_method for more information ++void TemplateTable::fast_binaryswitch() { ++ transition(itos, vtos); ++ // Implementation using the following core algorithm: ++ // ++ // int binary_search(int key, LookupswitchPair* array, int n) { ++ // // Binary search according to "Methodik des Programmierens" by ++ // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. ++ // int i = 0; ++ // int j = n; ++ // while (i+1 < j) { ++ // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) ++ // // with Q: for all i: 0 <= i < n: key < a[i] ++ // // where a stands for the array and assuming that the (inexisting) ++ // // element a[n] is infinitely big. ++ // int h = (i + j) >> 1; ++ // // i < h < j ++ // if (key < array[h].fast_match()) { ++ // j = h; ++ // } else { ++ // i = h; ++ // } ++ // } ++ // // R: a[i] <= key < a[i+1] or Q ++ // // (i.e., if key is within array, i is the correct index) ++ // return i; ++ // } ++ ++ // register allocation ++ const Register array = T2; ++ const Register i = T3, j = A7; ++ const Register h = T1; ++ const Register temp = T0; ++ const Register key = FSR; ++ ++ // setup array ++ __ daddiu(array, BCP, 3*BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(array, array, AT); ++ ++ // initialize i & j ++ __ move(i, R0); ++ __ lw(j, array, - 1 * BytesPerInt); ++ // Convert j into native byteordering ++ __ swap(j); ++ ++ // and start ++ Label entry; ++ __ b(entry); ++ __ delayed()->nop(); ++ ++ // binary search loop ++ { ++ Label loop; ++ __ bind(loop); ++ // int h = (i + j) >> 1; ++ __ daddu(h, i, j); ++ __ dsrl(h, h, 1); ++ // if (key < array[h].fast_match()) { ++ // j = h; ++ // } else { ++ // i = h; ++ // } ++ // Convert array[h].match to native byte-ordering before compare ++ __ dsll(AT, h, Address::times_8); ++ __ daddu(AT, array, AT); ++ __ lw(temp, AT, 0 * BytesPerInt); ++ __ swap(temp); ++ ++ { ++ Label set_i, end_of_if; ++ __ slt(AT, key, temp); ++ __ beq(AT, R0, set_i); ++ __ delayed()->nop(); ++ ++ __ b(end_of_if); ++ __ delayed(); __ move(j, h); ++ ++ __ bind(set_i); ++ __ move(i, h); ++ ++ __ bind(end_of_if); ++ } ++ // while (i+1 < j) ++ __ bind(entry); ++ __ daddiu(h, i, 1); ++ __ slt(AT, h, j); ++ __ bne(AT, R0, loop); ++ __ delayed()->nop(); ++ } ++ ++ // end of binary search, result index is i (must check again!) ++ Label default_case; ++ // Convert array[i].match to native byte-ordering before compare ++ __ dsll(AT, i, Address::times_8); ++ __ daddu(AT, array, AT); ++ __ lw(temp, AT, 0 * BytesPerInt); ++ __ swap(temp); ++ __ bne(key, temp, default_case); ++ __ delayed()->nop(); ++ ++ // entry found -> j = offset ++ __ dsll(AT, i, Address::times_8); ++ __ daddu(AT, array, AT); ++ __ lw(j, AT, 1 * BytesPerInt); ++ __ profile_switch_case(i, key, array); ++ __ swap(j); ++ ++ __ daddu(BCP, BCP, j); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos); ++ ++ // default case -> j = default offset ++ __ bind(default_case); ++ __ profile_switch_default(i); ++ __ lw(j, array, - 2 * BytesPerInt); ++ __ swap(j); ++ __ daddu(BCP, BCP, j); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos); ++} ++ ++void TemplateTable::_return(TosState state) { ++ transition(state, state); ++ assert(_desc->calls_vm(), ++ "inconsistent calls_vm information"); // call in remove_activation ++ ++ if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { ++ assert(state == vtos, "only valid state"); ++ __ ld(T1, aaddress(0)); ++ __ load_klass(LVP, T1); ++ __ lw(LVP, LVP, in_bytes(Klass::access_flags_offset())); ++ __ move(AT, JVM_ACC_HAS_FINALIZER); ++ __ andr(AT, AT, LVP); ++ Label skip_register_finalizer; ++ __ beq(AT, R0, skip_register_finalizer); ++ __ delayed()->nop(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::register_finalizer), T1); ++ __ bind(skip_register_finalizer); ++ } ++ ++ // Narrow result if state is itos but result type is smaller. ++ // Need to narrow in the return bytecode rather than in generate_return_entry ++ // since compiled code callers expect the result to already be narrowed. ++ if (state == itos) { ++ __ narrow(FSR); ++ } ++ ++ __ remove_activation(state, T9); ++ __ sync(); ++ ++ __ jr(T9); ++ __ delayed()->nop(); ++} ++ ++// ---------------------------------------------------------------------------- ++// Volatile variables demand their effects be made known to all CPU's ++// in order. Store buffers on most chips allow reads & writes to ++// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode ++// without some kind of memory barrier (i.e., it's not sufficient that ++// the interpreter does not reorder volatile references, the hardware ++// also must not reorder them). ++// ++// According to the new Java Memory Model (JMM): ++// (1) All volatiles are serialized wrt to each other. ALSO reads & ++// writes act as aquire & release, so: ++// (2) A read cannot let unrelated NON-volatile memory refs that ++// happen after the read float up to before the read. It's OK for ++// non-volatile memory refs that happen before the volatile read to ++// float down below it. ++// (3) Similar a volatile write cannot let unrelated NON-volatile ++// memory refs that happen BEFORE the write float down to after the ++// write. It's OK for non-volatile memory refs that happen after the ++// volatile write to float up before it. ++// ++// We only put in barriers around volatile refs (they are expensive), ++// not _between_ memory refs (that would require us to track the ++// flavor of the previous memory refs). Requirements (2) and (3) ++// require some barriers before volatile stores and after volatile ++// loads. These nearly cover requirement (1) but miss the ++// volatile-store-volatile-load case. This final case is placed after ++// volatile-stores although it could just as well go before ++// volatile-loads. ++void TemplateTable::volatile_barrier() { ++ if(os::is_MP()) __ sync(); ++} ++ ++// we dont shift left 2 bits in get_cache_and_index_at_bcp ++// for we always need shift the index we use it. the ConstantPoolCacheEntry ++// is 16-byte long, index is the index in ++// ConstantPoolCache, so cache + base_offset() + index * 16 is ++// the corresponding ConstantPoolCacheEntry ++// used registers : T2 ++// NOTE : the returned index need also shift left 4 to get the address! ++void TemplateTable::resolve_cache_and_index(int byte_no, ++ Register Rcache, ++ Register index, ++ size_t index_size) { ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ const Register temp = A1; ++ assert_different_registers(Rcache, index); ++ ++ Label resolved; ++ __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); ++ // is resolved? ++ int i = (int)bytecode(); ++ __ addiu(temp, temp, -i); ++ __ beq(temp, R0, resolved); ++ __ delayed()->nop(); ++ // resolve first time through ++ address entry; ++ switch (bytecode()) { ++ case Bytecodes::_getstatic : // fall through ++ case Bytecodes::_putstatic : // fall through ++ case Bytecodes::_getfield : // fall through ++ case Bytecodes::_putfield : ++ entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put); ++ break; ++ case Bytecodes::_invokevirtual : // fall through ++ case Bytecodes::_invokespecial : // fall through ++ case Bytecodes::_invokestatic : // fall through ++ case Bytecodes::_invokeinterface: ++ entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke); ++ break; ++ case Bytecodes::_invokehandle: ++ entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle); ++ break; ++ case Bytecodes::_invokedynamic: ++ entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic); ++ break; ++ default : ++ fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode()))); ++ break; ++ } ++ ++ __ move(temp, i); ++ __ call_VM(NOREG, entry, temp); ++ ++ // Update registers with resolved info ++ __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); ++ __ bind(resolved); ++} ++ ++// The Rcache and index registers must be set before call ++void TemplateTable::load_field_cp_cache_entry(Register obj, ++ Register cache, ++ Register index, ++ Register off, ++ Register flags, ++ bool is_static = false) { ++ assert_different_registers(cache, index, flags, off); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ // Field offset ++ __ dsll(AT, index, Address::times_ptr); ++ __ daddu(AT, cache, AT); ++ __ ld(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset())); ++ // Flags ++ __ ld(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())); ++ ++ // klass overwrite register ++ if (is_static) { ++ __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())); ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ __ ld(obj, Address(obj, mirror_offset)); ++ ++ __ verify_oop(obj); ++ } ++} ++ ++// get the method, itable_index and flags of the current invoke ++void TemplateTable::load_invoke_cp_cache_entry(int byte_no, ++ Register method, ++ Register itable_index, ++ Register flags, ++ bool is_invokevirtual, ++ bool is_invokevfinal, /*unused*/ ++ bool is_invokedynamic) { ++ // setup registers ++ const Register cache = T3; ++ const Register index = T1; ++ assert_different_registers(method, flags); ++ assert_different_registers(method, cache, index); ++ assert_different_registers(itable_index, flags); ++ assert_different_registers(itable_index, cache, index); ++ assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant"); ++ // determine constant pool cache field offsets ++ const int method_offset = in_bytes( ++ ConstantPoolCache::base_offset() + ++ ((byte_no == f2_byte) ++ ? ConstantPoolCacheEntry::f2_offset() ++ : ConstantPoolCacheEntry::f1_offset())); ++ const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::flags_offset()); ++ // access constant pool cache fields ++ const int index_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::f2_offset()); ++ ++ size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2)); ++ resolve_cache_and_index(byte_no, cache, index, index_size); ++ ++ //assert(wordSize == 8, "adjust code below"); ++ // note we shift 4 not 2, for we get is the true inde ++ // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version ++ __ dsll(AT, index, Address::times_ptr); ++ __ daddu(AT, cache, AT); ++ __ ld(method, AT, method_offset); ++ ++ if (itable_index != NOREG) { ++ __ ld(itable_index, AT, index_offset); ++ } ++ __ ld(flags, AT, flags_offset); ++} ++ ++// The registers cache and index expected to be set before call. ++// Correct values of the cache and index registers are preserved. ++void TemplateTable::jvmti_post_field_access(Register cache, Register index, ++ bool is_static, bool has_tos) { ++ // do the JVMTI work here to avoid disturbing the register state below ++ // We use c_rarg registers here because we want to use the register used in ++ // the call to the VM ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we ++ // take the time to call into the VM. ++ Label L1; ++ // kill FSR ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ assert_different_registers(cache, index, AT); ++ __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); ++ __ lw(AT, AT, 0); ++ __ beq(AT, R0, L1); ++ __ delayed()->nop(); ++ ++ __ get_cache_and_index_at_bcp(tmp2, tmp3, 1); ++ ++ // cache entry pointer ++ __ daddiu(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset())); ++ __ shl(tmp3, LogBytesPerWord); ++ __ daddu(tmp2, tmp2, tmp3); ++ if (is_static) { ++ __ move(tmp1, R0); ++ } else { ++ __ ld(tmp1, SP, 0); ++ __ verify_oop(tmp1); ++ } ++ // tmp1: object pointer or NULL ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_access), ++ tmp1, tmp2, tmp3); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++void TemplateTable::pop_and_check_object(Register r) { ++ __ pop_ptr(r); ++ __ null_check(r); // for field access must check obj. ++ __ verify_oop(r); ++} ++ ++// used registers : T1, T2, T3, T1 ++// T1 : flags ++// T2 : off ++// T3 : obj ++// T1 : field address ++// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the ++// following mapping to the TosState states: ++// btos: 0 ++// ctos: 1 ++// stos: 2 ++// itos: 3 ++// ltos: 4 ++// ftos: 5 ++// dtos: 6 ++// atos: 7 ++// vtos: 8 ++// see ConstantPoolCacheEntry::set_field for more info ++void TemplateTable::getfield_or_static(int byte_no, bool is_static) { ++ transition(vtos, vtos); ++ ++ const Register cache = T3; ++ const Register index = T0; ++ ++ const Register obj = T3; ++ const Register off = T2; ++ const Register flags = T1; ++ ++ const Register scratch = T8; ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_access(cache, index, is_static, false); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); ++ ++ { ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, flags); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ if (!is_static) pop_and_check_object(obj); ++ __ daddu(index, obj, off); ++ ++ ++ Label Done, notByte, notBool, notInt, notShort, notChar, ++ notLong, notFloat, notObj, notDouble; ++ ++ assert(btos == 0, "change code, btos != 0"); ++ __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); ++ __ bne(flags, R0, notByte); ++ __ delayed()->nop(); ++ ++ // btos ++ __ lb(FSR, index, 0); ++ __ push(btos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ ++ __ bind(notByte); ++ __ move(AT, ztos); ++ __ bne(flags, AT, notBool); ++ __ delayed()->nop(); ++ ++ // ztos ++ __ lb(FSR, index, 0); ++ __ push(ztos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ ++ __ bind(notBool); ++ __ move(AT, itos); ++ __ bne(flags, AT, notInt); ++ __ delayed()->nop(); ++ ++ // itos ++ __ lw(FSR, index, 0); ++ __ push(itos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_igetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notInt); ++ __ move(AT, atos); ++ __ bne(flags, AT, notObj); ++ __ delayed()->nop(); ++ ++ // atos ++ //add for compressedoops ++ __ load_heap_oop(FSR, Address(index, 0)); ++ __ push(atos); ++ ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_agetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notObj); ++ __ move(AT, ctos); ++ __ bne(flags, AT, notChar); ++ __ delayed()->nop(); ++ ++ // ctos ++ __ lhu(FSR, index, 0); ++ __ push(ctos); ++ ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notChar); ++ __ move(AT, stos); ++ __ bne(flags, AT, notShort); ++ __ delayed()->nop(); ++ ++ // stos ++ __ lh(FSR, index, 0); ++ __ push(stos); ++ ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notShort); ++ __ move(AT, ltos); ++ __ bne(flags, AT, notLong); ++ __ delayed()->nop(); ++ ++ // FIXME : the load/store should be atomic, we have no simple method to do this in mips32 ++ // ltos ++ __ ld(FSR, index, 0 * wordSize); ++ __ push(ltos); ++ ++ // Don't rewrite to _fast_lgetfield for potential volatile case. ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notLong); ++ __ move(AT, ftos); ++ __ bne(flags, AT, notFloat); ++ __ delayed()->nop(); ++ ++ // ftos ++ __ lwc1(FSF, index, 0); ++ __ push(ftos); ++ ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notFloat); ++ __ move(AT, dtos); ++#ifdef ASSERT ++ __ bne(flags, AT, notDouble); ++ __ delayed()->nop(); ++#endif ++ ++ // dtos ++ __ ldc1(FSF, index, 0 * wordSize); ++ __ push(dtos); ++ ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2); ++ } ++ ++ ++#ifdef ASSERT ++ __ b(Done); ++ __ delayed()->nop(); ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++ ++void TemplateTable::getfield(int byte_no) { ++ getfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::getstatic(int byte_no) { ++ getfield_or_static(byte_no, true); ++} ++ ++// The registers cache and index expected to be set before call. ++// The function may destroy various registers, just not the cache and index registers. ++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { ++ transition(vtos, vtos); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L1; ++ //kill AT, T1, T2, T3, T9 ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T9; ++ assert_different_registers(cache, index, tmp4); ++ ++ __ li(AT, JvmtiExport::get_field_modification_count_addr()); ++ __ lw(AT, AT, 0); ++ __ beq(AT, R0, L1); ++ __ delayed()->nop(); ++ ++ __ get_cache_and_index_at_bcp(tmp2, tmp4, 1); ++ ++ if (is_static) { ++ __ move(tmp1, R0); ++ } else { ++ // Life is harder. The stack holds the value on top, followed by ++ // the object. We don't know the size of the value, though; it ++ // could be one or two words depending on its type. As a result, ++ // we must find the type to determine where the object is. ++ Label two_word, valsize_known; ++ __ dsll(AT, tmp4, Address::times_8); ++ __ daddu(AT, tmp2, AT); ++ __ ld(tmp3, AT, in_bytes(cp_base_offset + ++ ConstantPoolCacheEntry::flags_offset())); ++ __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift); ++ ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ __ move(tmp1, SP); ++ __ move(AT, ltos); ++ __ beq(tmp3, AT, two_word); ++ __ delayed()->nop(); ++ __ move(AT, dtos); ++ __ beq(tmp3, AT, two_word); ++ __ delayed()->nop(); ++ __ b(valsize_known); ++ __ delayed()->daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) ); ++ ++ __ bind(two_word); ++ __ daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2)); ++ ++ __ bind(valsize_known); ++ // setup object pointer ++ __ ld(tmp1, tmp1, 0*wordSize); ++ } ++ // cache entry pointer ++ __ daddiu(tmp2, tmp2, in_bytes(cp_base_offset)); ++ __ shl(tmp4, LogBytesPerWord); ++ __ daddu(tmp2, tmp2, tmp4); ++ // object (tos) ++ __ move(tmp3, SP); ++ // tmp1: object pointer set up above (NULL if static) ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ tmp1, tmp2, tmp3); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++// used registers : T0, T1, T2, T3, T8 ++// T1 : flags ++// T2 : off ++// T3 : obj ++// T8 : volatile bit ++// see ConstantPoolCacheEntry::set_field for more info ++void TemplateTable::putfield_or_static(int byte_no, bool is_static) { ++ transition(vtos, vtos); ++ ++ const Register cache = T3; ++ const Register index = T0; ++ const Register obj = T3; ++ const Register off = T2; ++ const Register flags = T1; ++ const Register bc = T3; ++ ++ const Register scratch = T8; ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_mod(cache, index, is_static); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); ++ ++ Label Done; ++ { ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, flags); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ ++ Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; ++ ++ assert(btos == 0, "change code, btos != 0"); ++ ++ // btos ++ __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); ++ __ bne(flags, R0, notByte); ++ __ delayed()->nop(); ++ ++ __ pop(btos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(AT, obj, off); ++ __ sb(FSR, AT, 0); ++ ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ztos ++ __ bind(notByte); ++ __ move(AT, ztos); ++ __ bne(flags, AT, notBool); ++ __ delayed()->nop(); ++ ++ __ pop(ztos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(AT, obj, off); ++ __ andi(FSR, FSR, 0x1); ++ __ sb(FSR, AT, 0); ++ ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // itos ++ __ bind(notBool); ++ __ move(AT, itos); ++ __ bne(flags, AT, notInt); ++ __ delayed()->nop(); ++ ++ __ pop(itos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(AT, obj, off); ++ __ sw(FSR, AT, 0); ++ ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // atos ++ __ bind(notInt); ++ __ move(AT, atos); ++ __ bne(flags, AT, notObj); ++ __ delayed()->nop(); ++ ++ __ pop(atos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ ++ do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR, _bs->kind(), false); ++ ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ctos ++ __ bind(notObj); ++ __ move(AT, ctos); ++ __ bne(flags, AT, notChar); ++ __ delayed()->nop(); ++ ++ __ pop(ctos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(AT, obj, off); ++ __ sh(FSR, AT, 0); ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // stos ++ __ bind(notChar); ++ __ move(AT, stos); ++ __ bne(flags, AT, notShort); ++ __ delayed()->nop(); ++ ++ __ pop(stos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(AT, obj, off); ++ __ sh(FSR, AT, 0); ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ltos ++ __ bind(notShort); ++ __ move(AT, ltos); ++ __ bne(flags, AT, notLong); ++ __ delayed()->nop(); ++ ++ __ pop(ltos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(AT, obj, off); ++ __ sd(FSR, AT, 0); ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ftos ++ __ bind(notLong); ++ __ move(AT, ftos); ++ __ bne(flags, AT, notFloat); ++ __ delayed()->nop(); ++ ++ __ pop(ftos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(AT, obj, off); ++ __ swc1(FSF, AT, 0); ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ ++ // dtos ++ __ bind(notFloat); ++ __ move(AT, dtos); ++#ifdef ASSERT ++ __ bne(flags, AT, notDouble); ++ __ delayed()->nop(); ++#endif ++ ++ __ pop(dtos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(AT, obj, off); ++ __ sdc1(FSF, AT, 0); ++ if (!is_static) { ++ patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no); ++ } ++ ++#ifdef ASSERT ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++void TemplateTable::putfield(int byte_no) { ++ putfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::putstatic(int byte_no) { ++ putfield_or_static(byte_no, true); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : cp_entry ++// T2 : obj ++// T3 : value pointer ++void TemplateTable::jvmti_post_fast_field_mod() { ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L2; ++ //kill AT, T1, T2, T3, T9 ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T9; ++ __ li(AT, JvmtiExport::get_field_modification_count_addr()); ++ __ lw(tmp3, AT, 0); ++ __ beq(tmp3, R0, L2); ++ __ delayed()->nop(); ++ __ pop_ptr(tmp1); ++ __ verify_oop(tmp1); ++ __ push_ptr(tmp1); ++ switch (bytecode()) { // load values into the jvalue object ++ case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ push_i(FSR); break; ++ case Bytecodes::_fast_dputfield: __ push_d(FSF); break; ++ case Bytecodes::_fast_fputfield: __ push_f(); break; ++ case Bytecodes::_fast_lputfield: __ push_l(FSR); break; ++ default: ShouldNotReachHere(); ++ } ++ __ move(tmp3, SP); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1); ++ __ verify_oop(tmp1); ++ // tmp1: object pointer copied above ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ tmp1, tmp2, tmp3); ++ ++ switch (bytecode()) { // restore tos values ++ case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ pop_i(FSR); break; ++ case Bytecodes::_fast_dputfield: __ pop_d(); break; ++ case Bytecodes::_fast_fputfield: __ pop_f(); break; ++ case Bytecodes::_fast_lputfield: __ pop_l(FSR); break; ++ } ++ __ bind(L2); ++ } ++} ++ ++// used registers : T2, T3, T1 ++// T2 : index & off & field address ++// T3 : cache & obj ++// T1 : flags ++void TemplateTable::fast_storefield(TosState state) { ++ transition(state, vtos); ++ ++ const Register scratch = T8; ++ ++ ByteSize base = ConstantPoolCache::base_offset(); ++ ++ jvmti_post_fast_field_mod(); ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ sync(); ++ ++ // test for volatile with T1 ++ __ dsll(AT, T2, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ ld(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset())); ++ ++ // replace index with field offset from cache entry ++ __ ld(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset())); ++ ++ Label Done; ++ { ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, T1); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ // Get object from stack ++ pop_and_check_object(T3); ++ ++ if (bytecode() != Bytecodes::_fast_aputfield) { ++ // field address ++ __ daddu(T2, T3, T2); ++ } ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_zputfield: ++ __ andi(FSR, FSR, 0x1); // boolean is true if LSB is 1 ++ // fall through to bputfield ++ case Bytecodes::_fast_bputfield: ++ __ sb(FSR, T2, 0); ++ break; ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: ++ __ sh(FSR, T2, 0); ++ break; ++ case Bytecodes::_fast_iputfield: ++ __ sw(FSR, T2, 0); ++ break; ++ case Bytecodes::_fast_lputfield: ++ __ sd(FSR, T2, 0 * wordSize); ++ break; ++ case Bytecodes::_fast_fputfield: ++ __ swc1(FSF, T2, 0); ++ break; ++ case Bytecodes::_fast_dputfield: ++ __ sdc1(FSF, T2, 0 * wordSize); ++ break; ++ case Bytecodes::_fast_aputfield: ++ do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR, _bs->kind(), false); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++// used registers : T2, T3, T1 ++// T3 : cp_entry & cache ++// T2 : index & offset ++void TemplateTable::fast_accessfield(TosState state) { ++ transition(atos, state); ++ ++ const Register scratch = T8; ++ ++ // do the JVMTI work here to avoid disturbing the register state below ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we take ++ // the time to call into the VM. ++ Label L1; ++ __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); ++ __ lw(T3, AT, 0); ++ __ beq(T3, R0, L1); ++ __ delayed()->nop(); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(T3, T1, 1); ++ __ move(TSR, FSR); ++ __ verify_oop(FSR); ++ // FSR: object pointer copied above ++ // T3: cache entry pointer ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), ++ FSR, T3); ++ __ move(FSR, TSR); ++ __ bind(L1); ++ } ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ sync(); ++ ++ // replace index with field offset from cache entry ++ __ dsll(AT, T2, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ ++ { ++ __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, AT); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ // FSR: object ++ __ verify_oop(FSR); ++ __ null_check(FSR); ++ // field addresses ++ __ daddu(FSR, FSR, T2); ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_bgetfield: ++ __ lb(FSR, FSR, 0); ++ break; ++ case Bytecodes::_fast_sgetfield: ++ __ lh(FSR, FSR, 0); ++ break; ++ case Bytecodes::_fast_cgetfield: ++ __ lhu(FSR, FSR, 0); ++ break; ++ case Bytecodes::_fast_igetfield: ++ __ lw(FSR, FSR, 0); ++ break; ++ case Bytecodes::_fast_lgetfield: ++ __ stop("should not be rewritten"); ++ break; ++ case Bytecodes::_fast_fgetfield: ++ __ lwc1(FSF, FSR, 0); ++ break; ++ case Bytecodes::_fast_dgetfield: ++ __ ldc1(FSF, FSR, 0); ++ break; ++ case Bytecodes::_fast_agetfield: ++ //add for compressedoops ++ __ load_heap_oop(FSR, Address(FSR, 0)); ++ __ verify_oop(FSR); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0 ++// used registers : T1, T2, T3, T1 ++// T1 : obj & field address ++// T2 : off ++// T3 : cache ++// T1 : index ++void TemplateTable::fast_xaccess(TosState state) { ++ transition(vtos, state); ++ ++ const Register scratch = T8; ++ ++ // get receiver ++ __ ld(T1, aaddress(0)); ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 2); ++ __ dsll(AT, T2, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ ++ { ++ __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, AT); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ // make sure exception is reported in correct bcp range (getfield is ++ // next instruction) ++ __ daddiu(BCP, BCP, 1); ++ __ null_check(T1); ++ __ daddu(T1, T1, T2); ++ ++ if (state == itos) { ++ __ lw(FSR, T1, 0); ++ } else if (state == atos) { ++ __ load_heap_oop(FSR, Address(T1, 0)); ++ __ verify_oop(FSR); ++ } else if (state == ftos) { ++ __ lwc1(FSF, T1, 0); ++ } else { ++ ShouldNotReachHere(); ++ } ++ __ daddiu(BCP, BCP, -1); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++ ++ ++//----------------------------------------------------------------------------- ++// Calls ++ ++void TemplateTable::count_calls(Register method, Register temp) { ++ // implemented elsewhere ++ ShouldNotReachHere(); ++} ++ ++// method, index, recv, flags: T1, T2, T3, T1 ++// byte_no = 2 for _invokevirtual, 1 else ++// T0 : return address ++// get the method & index of the invoke, and push the return address of ++// the invoke(first word in the frame) ++// this address is where the return code jmp to. ++// NOTE : this method will set T3&T1 as recv&flags ++void TemplateTable::prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index, // itable index, MethodType, etc. ++ Register recv, // if caller wants to see it ++ Register flags // if caller wants to test it ++ ) { ++ // determine flags ++ const Bytecodes::Code code = bytecode(); ++ const bool is_invokeinterface = code == Bytecodes::_invokeinterface; ++ const bool is_invokedynamic = code == Bytecodes::_invokedynamic; ++ const bool is_invokehandle = code == Bytecodes::_invokehandle; ++ const bool is_invokevirtual = code == Bytecodes::_invokevirtual; ++ const bool is_invokespecial = code == Bytecodes::_invokespecial; ++ const bool load_receiver = (recv != noreg); ++ const bool save_flags = (flags != noreg); ++ assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),""); ++ assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); ++ assert(flags == noreg || flags == T1, "error flags reg."); ++ assert(recv == noreg || recv == T3, "error recv reg."); ++ ++ // setup registers & access constant pool cache ++ if(recv == noreg) recv = T3; ++ if(flags == noreg) flags = T1; ++ assert_different_registers(method, index, recv, flags); ++ ++ // save 'interpreter return address' ++ __ save_bcp(); ++ ++ load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); ++ ++ if (is_invokedynamic || is_invokehandle) { ++ Label L_no_push; ++ __ move(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift)); ++ __ andr(AT, AT, flags); ++ __ beq(AT, R0, L_no_push); ++ __ delayed()->nop(); ++ // Push the appendix as a trailing parameter. ++ // This must be done before we get the receiver, ++ // since the parameter_size includes it. ++ Register tmp = SSR; ++ __ push(tmp); ++ __ move(tmp, index); ++ assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); ++ __ load_resolved_reference_at_index(index, tmp); ++ __ pop(tmp); ++ __ push(index); // push appendix (MethodType, CallSite, etc.) ++ __ bind(L_no_push); ++ } ++ ++ // load receiver if needed (after appendix is pushed so parameter size is correct) ++ // Note: no return address pushed yet ++ if (load_receiver) { ++ __ move(AT, ConstantPoolCacheEntry::parameter_size_mask); ++ __ andr(recv, flags, AT); ++ // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0. ++ const int no_return_pc_pushed_yet = 0; // argument slot correction before we push return address ++ const int receiver_is_at_end = -1; // back off one slot to get receiver ++ Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end); ++ __ ld(recv, recv_addr); ++ __ verify_oop(recv); ++ } ++ if(save_flags) { ++ __ move(BCP, flags); ++ } ++ ++ // compute return type ++ __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, 0xf); ++ ++ // Make sure we don't need to mask flags for tos_state_shift after the above shift ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ // load return address ++ { ++ const address table = (address) Interpreter::invoke_return_entry_table_for(code); ++ __ li(AT, (long)table); ++ __ dsll(flags, flags, LogBytesPerWord); ++ __ daddu(AT, AT, flags); ++ __ ld(RA, AT, 0); ++ } ++ ++ if (save_flags) { ++ __ move(flags, BCP); ++ __ restore_bcp(); ++ } ++} ++ ++// used registers : T0, T3, T1, T2 ++// T3 : recv, this two register using convention is by prepare_invoke ++// T1 : flags, klass ++// Rmethod : method, index must be Rmethod ++void TemplateTable::invokevirtual_helper(Register index, ++ Register recv, ++ Register flags) { ++ ++ assert_different_registers(index, recv, flags, T2); ++ ++ // Test for an invoke of a final method ++ Label notFinal; ++ __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); ++ __ andr(AT, flags, AT); ++ __ beq(AT, R0, notFinal); ++ __ delayed()->nop(); ++ ++ Register method = index; // method must be Rmethod ++ assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention"); ++ ++ // do the call - the index is actually the method to call ++ // the index is indeed methodOop, for this is vfinal, ++ // see ConstantPoolCacheEntry::set_method for more info ++ ++ __ verify_oop(method); ++ ++ // It's final, need a null check here! ++ __ null_check(recv); ++ ++ // profile this call ++ __ profile_final_call(T2); ++ ++ // T2: tmp, used for mdp ++ // method: callee ++ // T9: tmp ++ // is_virtual: true ++ __ profile_arguments_type(T2, method, T9, true); ++ ++ __ jump_from_interpreted(method, T2); ++ ++ __ bind(notFinal); ++ ++ // get receiver klass ++ __ null_check(recv, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(T2, recv); ++ __ verify_oop(T2); ++ ++ // profile this call ++ __ profile_virtual_call(T2, T0, T1); ++ ++ // get target methodOop & entry point ++ const int base = InstanceKlass::vtable_start_offset() * wordSize; ++ assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); ++ __ dsll(AT, index, Address::times_ptr); ++ // T2: receiver ++ __ daddu(AT, T2, AT); ++ //this is a ualign read ++ __ ld(method, AT, base + vtableEntry::method_offset_in_bytes()); ++ __ profile_arguments_type(T2, method, T9, true); ++ __ jump_from_interpreted(method, T2); ++ ++} ++ ++void TemplateTable::invokevirtual(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG, T3, T1); ++ // now recv & flags in T3, T1 ++ invokevirtual_helper(Rmethod, T3, T1); ++} ++ ++// T9 : entry ++// Rmethod : method ++void TemplateTable::invokespecial(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG, T3); ++ // now recv & flags in T3, T1 ++ __ verify_oop(T3); ++ __ null_check(T3); ++ __ profile_call(T9); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T9: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T9, false); ++ ++ __ jump_from_interpreted(Rmethod, T9); ++ __ move(T0, T3); ++} ++ ++void TemplateTable::invokestatic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG); ++ __ verify_oop(Rmethod); ++ ++ __ profile_call(T9); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T9: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T9, false); ++ ++ __ jump_from_interpreted(Rmethod, T9); ++} ++ ++// i have no idea what to do here, now. for future change. FIXME. ++void TemplateTable::fast_invokevfinal(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); ++ __ stop("fast_invokevfinal not used on mips64"); ++} ++ ++// used registers : T0, T1, T2, T3, T1, A7 ++// T0 : itable, vtable, entry ++// T1 : interface ++// T3 : receiver ++// T1 : flags, klass ++// Rmethod : index, method, this is required by interpreter_entry ++void TemplateTable::invokeinterface(int byte_no) { ++ transition(vtos, vtos); ++ //this method will use T1-T4 and T0 ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, T2, Rmethod, T3, T1); ++ // T2: reference klass ++ // Rmethod: method ++ // T3: receiver ++ // T1: flags ++ ++ // Special case of invokeinterface called for virtual method of ++ // java.lang.Object. See cpCacheOop.cpp for details. ++ // This code isn't produced by javac, but could be produced by ++ // another compliant java compiler. ++ Label notMethod; ++ __ move(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift)); ++ __ andr(AT, T1, AT); ++ __ beq(AT, R0, notMethod); ++ __ delayed()->nop(); ++ ++ invokevirtual_helper(Rmethod, T3, T1); ++ __ bind(notMethod); ++ // Get receiver klass into T1 - also a null check ++ //add for compressedoops ++ __ load_klass(T1, T3); ++ __ verify_oop(T1); ++ ++ Label no_such_interface, no_such_method; ++ ++ // Receiver subtype check against REFC. ++ // Superklass in T2. Subklass in T1. ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ T1, T2, noreg, ++ // outputs: scan temp. reg, scan temp. reg ++ T0, FSR, ++ no_such_interface, ++ /*return_method=*/false); ++ ++ ++ // profile this call ++ __ profile_virtual_call(T1, T0, FSR); ++ ++ // Get declaring interface class from method, and itable index ++ __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset())); ++ __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset())); ++ __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes()); ++ __ lw(Rmethod, Rmethod, in_bytes(Method::itable_index_offset())); ++ __ addiu(Rmethod, Rmethod, (-1) * Method::itable_index_max); ++ __ subu32(Rmethod, R0, Rmethod); ++ ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ T1, T2, Rmethod, ++ // outputs: method, scan temp. reg ++ Rmethod, T0, ++ no_such_interface); ++ ++ // Rmethod: Method* to call ++ // T3: receiver ++ // Check for abstract method error ++ // Note: This should be done more efficiently via a throw_abstract_method_error ++ // interpreter entry point and a conditional jump to it in case of a null ++ // method. ++ __ beq(Rmethod, R0, no_such_method); ++ __ delayed()->nop(); ++ ++ __ profile_arguments_type(T1, Rmethod, T0, true); ++ ++ // do the call ++ // T3: receiver ++ // Rmethod: Method* ++ __ jump_from_interpreted(Rmethod, T1); ++ __ should_not_reach_here(); ++ ++ // exception handling code follows... ++ // note: must restore interpreter registers to canonical ++ // state for exception handling to work correctly! ++ ++ __ bind(no_such_method); ++ // throw exception ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError)); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ __ bind(no_such_interface); ++ // throw exception ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_IncompatibleClassChangeError)); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++} ++ ++ ++void TemplateTable::invokehandle(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ const Register T2_method = Rmethod; ++ const Register FSR_mtype = FSR; ++ const Register T3_recv = T3; ++ ++ if (!EnableInvokeDynamic) { ++ // rewriter does not generate this bytecode ++ __ should_not_reach_here(); ++ return; ++ } ++ ++ prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv); ++ //??__ verify_method_ptr(T2_method); ++ __ verify_oop(T3_recv); ++ __ null_check(T3_recv); ++ ++ // T9: MethodType object (from cpool->resolved_references[f1], if necessary) ++ // T2_method: MH.invokeExact_MT method (from f2) ++ ++ // Note: T9 is already pushed (if necessary) by prepare_invoke ++ ++ // FIXME: profile the LambdaForm also ++ __ profile_final_call(T9); ++ ++ // T8: tmp, used for mdp ++ // T2_method: callee ++ // T9: tmp ++ // is_virtual: true ++ __ profile_arguments_type(T8, T2_method, T9, true); ++ ++ __ jump_from_interpreted(T2_method, T9); ++} ++ ++ void TemplateTable::invokedynamic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ ++ if (!EnableInvokeDynamic) { ++ // We should not encounter this bytecode if !EnableInvokeDynamic. ++ // The verifier will stop it. However, if we get past the verifier, ++ // this will stop the thread in a reasonable way, without crashing the JVM. ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_IncompatibleClassChangeError)); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ return; ++ } ++ ++ //const Register Rmethod = T2; ++ const Register T2_callsite = T2; ++ ++ prepare_invoke(byte_no, Rmethod, T2_callsite); ++ ++ // T2: CallSite object (from cpool->resolved_references[f1]) ++ // Rmethod: MH.linkToCallSite method (from f2) ++ ++ // Note: T2_callsite is already pushed by prepare_invoke ++ // %%% should make a type profile for any invokedynamic that takes a ref argument ++ // profile this call ++ __ profile_call(T9); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T9: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T9, false); ++ ++ __ verify_oop(T2_callsite); ++ ++ __ jump_from_interpreted(Rmethod, T9); ++ } ++ ++//----------------------------------------------------------------------------- ++// Allocation ++// T1 : tags & buffer end & thread ++// T2 : object end ++// T3 : klass ++// T1 : object size ++// A1 : cpool ++// A2 : cp index ++// return object in FSR ++void TemplateTable::_new() { ++ transition(vtos, atos); ++ __ get_unsigned_2_byte_index_at_bcp(A2, 1); ++ ++ Label slow_case; ++ Label done; ++ Label initialize_header; ++ Label initialize_object; // including clearing the fields ++ Label allocate_shared; ++ ++ // get InstanceKlass in T3 ++ __ get_cpool_and_tags(A1, T1); ++ ++ __ dsll(AT, A2, Address::times_8); ++ if (UseLEXT1 && Assembler::is_simm(sizeof(ConstantPool), 8)) { ++ __ gsldx(T3, A1, AT, sizeof(ConstantPool)); ++ } else { ++ __ daddu(AT, A1, AT); ++ __ ld(T3, AT, sizeof(ConstantPool)); ++ } ++ ++ // make sure the class we're about to instantiate has been resolved. ++ // Note: slow_case does a pop of stack, which is why we loaded class/pushed above ++ const int tags_offset = Array::base_offset_in_bytes(); ++ if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) { ++ __ gslbx(AT, T1, A2, tags_offset); ++ } else { ++ __ daddu(T1, T1, A2); ++ __ lb(AT, T1, tags_offset); ++ } ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ ++ // make sure klass is initialized & doesn't have finalizer ++ // make sure klass is fully initialized ++ __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset())); ++ __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ // has_finalizer ++ __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) ); ++ __ andi(AT, T0, Klass::_lh_instance_slow_path_bit); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ // Allocate the instance ++ // 1) Try to allocate in the TLAB ++ // 2) if fail and the object is large allocate in the shared Eden ++ // 3) if the above fails (or is not applicable), go to a slow case ++ // (creates a new TLAB, etc.) ++ ++ const bool allow_shared_alloc = ++ Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode; ++ ++#ifndef OPT_THREAD ++ const Register thread = T8; ++ if (UseTLAB || allow_shared_alloc) { ++ __ get_thread(thread); ++ } ++#else ++ const Register thread = TREG; ++#endif ++ ++ if (UseTLAB) { ++ // get tlab_top ++ __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset())); ++ // get tlab_end ++ __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset())); ++ __ daddu(T2, FSR, T0); ++ __ slt(AT, AT, T2); ++ __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case); ++ __ delayed()->nop(); ++ __ sd(T2, thread, in_bytes(JavaThread::tlab_top_offset())); ++ ++ if (ZeroTLAB) { ++ // the fields have been already cleared ++ __ beq(R0, R0, initialize_header); ++ } else { ++ // initialize both the header and fields ++ __ beq(R0, R0, initialize_object); ++ } ++ __ delayed()->nop(); ++ } ++ ++ // Allocation in the shared Eden , if allowed ++ // T0 : instance size in words ++ if(allow_shared_alloc){ ++ __ bind(allocate_shared); ++ ++ Label retry; ++ Address heap_top(T1); ++ __ set64(T1, (long)Universe::heap()->top_addr()); ++ __ ld(FSR, heap_top); ++ ++ __ bind(retry); ++ __ set64(AT, (long)Universe::heap()->end_addr()); ++ __ ld(AT, AT, 0); ++ __ daddu(T2, FSR, T0); ++ __ slt(AT, AT, T2); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ // Compare FSR with the top addr, and if still equal, store the new ++ // top addr in T2 at the address of the top addr pointer. Sets AT if was ++ // equal, and clears it otherwise. Use lock prefix for atomicity on MPs. ++ // ++ // FSR: object begin ++ // T2: object end ++ // T0: instance size in words ++ ++ // if someone beat us on the allocation, try again, otherwise continue ++ __ cmpxchg(T2, heap_top, FSR); ++ __ beq(AT, R0, retry); ++ __ delayed()->nop(); ++ ++ __ incr_allocated_bytes(thread, T0, 0); ++ } ++ ++ if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) { ++ // The object is initialized before the header. If the object size is ++ // zero, go directly to the header initialization. ++ __ bind(initialize_object); ++ __ set64(AT, - sizeof(oopDesc)); ++ __ daddu(T0, T0, AT); ++ __ beq(T0, R0, initialize_header); ++ __ delayed()->nop(); ++ ++ // initialize remaining object fields: T0 is a multiple of 2 ++ { ++ Label loop; ++ __ daddu(T1, FSR, T0); ++ __ daddiu(T1, T1, -oopSize); ++ ++ __ bind(loop); ++ __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize); ++ __ bne(T1, FSR, loop); //dont clear header ++ __ delayed()->daddiu(T1, T1, -oopSize); ++ } ++ ++ //klass in T3, ++ // initialize object header only. ++ __ bind(initialize_header); ++ if (UseBiasedLocking) { ++ __ ld(AT, T3, in_bytes(Klass::prototype_header_offset())); ++ __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ()); ++ } else { ++ __ set64(AT, (long)markOopDesc::prototype()); ++ __ sd(AT, FSR, oopDesc::mark_offset_in_bytes()); ++ } ++ ++ __ store_klass_gap(FSR, R0); ++ __ store_klass(FSR, T3); ++ ++ { ++ SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0); ++ // Trigger dtrace event for fastpath ++ __ push(atos); ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR); ++ __ pop(atos); ++ ++ } ++ __ b(done); ++ __ delayed()->nop(); ++ } ++ ++ // slow case ++ __ bind(slow_case); ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2); ++ ++ // continue ++ __ bind(done); ++ __ sync(); ++} ++ ++void TemplateTable::newarray() { ++ transition(itos, atos); ++ __ lbu(A1, at_bcp(1)); ++ //type, count ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR); ++ __ sync(); ++} ++ ++void TemplateTable::anewarray() { ++ transition(itos, atos); ++ __ get_2_byte_integer_at_bcp(A2, AT, 1); ++ __ huswap(A2); ++ __ get_constant_pool(A1); ++ // cp, index, count ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR); ++ __ sync(); ++} ++ ++void TemplateTable::arraylength() { ++ transition(atos, itos); ++ __ null_check(FSR, arrayOopDesc::length_offset_in_bytes()); ++ __ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes()); ++} ++ ++// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always) ++// T2 : sub klass ++// T3 : cpool ++// T3 : super klass ++void TemplateTable::checkcast() { ++ transition(atos, atos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ __ beq(FSR, R0, is_null); ++ __ delayed()->nop(); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(T3, T1); ++ __ get_2_byte_integer_at_bcp(T2, AT, 1); ++ __ huswap(T2); ++ ++ // See if bytecode has already been quicked ++ __ daddu(AT, T1, T2); ++ __ lb(AT, AT, Array::base_offset_in_bytes()); ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ beq(AT, R0, quicked); ++ __ delayed()->nop(); ++ ++ // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded. ++ // Then, GC will move the object in V0 to another places in heap. ++ // Therefore, We should never save such an object in register. ++ // Instead, we should save it in the stack. It can be modified automatically by the GC thread. ++ // After GC, the object address in FSR is changed to a new place. ++ // ++ __ push(atos); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ __ get_vm_result_2(T3, thread); ++ __ pop_ptr(FSR); ++ __ b(resolved); ++ __ delayed()->nop(); ++ ++ // klass already in cp, get superklass in T3 ++ __ bind(quicked); ++ __ dsll(AT, T2, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ ld(T3, AT, sizeof(ConstantPool)); ++ ++ __ bind(resolved); ++ ++ // get subklass in T2 ++ //add for compressedoops ++ __ load_klass(T2, FSR); ++ // Superklass in T3. Subklass in T2. ++ __ gen_subtype_check(T3, T2, ok_is_subtype); ++ ++ // Come here on failure ++ // object is at FSR ++ __ jmp(Interpreter::_throw_ClassCastException_entry); ++ __ delayed()->nop(); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ ++ // Collect counts on whether this check-cast sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ b(done); ++ __ delayed()->nop(); ++ __ bind(is_null); ++ __ profile_null_seen(T3); ++ } else { ++ __ bind(is_null); ++ } ++ __ bind(done); ++} ++ ++// i use T3 as cpool, T1 as tags, T2 as index ++// object always in FSR, superklass in T3, subklass in T2 ++void TemplateTable::instanceof() { ++ transition(atos, itos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ ++ __ beq(FSR, R0, is_null); ++ __ delayed()->nop(); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(T3, T1); ++ // get index ++ __ get_2_byte_integer_at_bcp(T2, AT, 1); ++ __ huswap(T2); ++ ++ // See if bytecode has already been quicked ++ // quicked ++ __ daddu(AT, T1, T2); ++ __ lb(AT, AT, Array::base_offset_in_bytes()); ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ beq(AT, R0, quicked); ++ __ delayed()->nop(); ++ ++ __ push(atos); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ __ get_vm_result_2(T3, thread); ++ __ pop_ptr(FSR); ++ __ b(resolved); ++ __ delayed()->nop(); ++ ++ // get superklass in T3, subklass in T2 ++ __ bind(quicked); ++ __ dsll(AT, T2, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ ld(T3, AT, sizeof(ConstantPool)); ++ ++ __ bind(resolved); ++ // get subklass in T2 ++ //add for compressedoops ++ __ load_klass(T2, FSR); ++ ++ // Superklass in T3. Subklass in T2. ++ __ gen_subtype_check(T3, T2, ok_is_subtype); ++ // Come here on failure ++ __ b(done); ++ __ delayed(); __ move(FSR, R0); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ __ move(FSR, 1); ++ ++ // Collect counts on whether this test sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ beq(R0, R0, done); ++ __ delayed()->nop(); ++ __ bind(is_null); ++ __ profile_null_seen(T3); ++ } else { ++ __ bind(is_null); // same as 'done' ++ } ++ __ bind(done); ++ // FSR = 0: obj == NULL or obj is not an instanceof the specified klass ++ // FSR = 1: obj != NULL and obj is an instanceof the specified klass ++} ++ ++//-------------------------------------------------------- ++//-------------------------------------------- ++// Breakpoints ++void TemplateTable::_breakpoint() { ++ // Note: We get here even if we are single stepping.. ++ // jbug inists on setting breakpoints at every bytecode ++ // even if we are in single step mode. ++ ++ transition(vtos, vtos); ++ ++ // get the unpatched byte code ++ __ get_method(A1); ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::get_original_bytecode_at), ++ A1, BCP); ++ __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal ++ ++ // post the breakpoint event ++ __ get_method(A1); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP); ++ ++ // complete the execution of original bytecode ++ __ dispatch_only_normal(vtos); ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateTable::athrow() { ++ transition(atos, vtos); ++ __ null_check(FSR); ++ __ jmp(Interpreter::throw_exception_entry()); ++ __ delayed()->nop(); ++} ++ ++//----------------------------------------------------------------------------- ++// Synchronization ++// ++// Note: monitorenter & exit are symmetric routines; which is reflected ++// in the assembly code structure as well ++// ++// Stack layout: ++// ++// [expressions ] <--- SP = expression stack top ++// .. ++// [expressions ] ++// [monitor entry] <--- monitor block top = expression stack bot ++// .. ++// [monitor entry] ++// [frame data ] <--- monitor block bot ++// ... ++// [return addr ] <--- FP ++ ++// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer ++// object always in FSR ++void TemplateTable::monitorenter() { ++ transition(atos, vtos); ++ ++ // check for NULL object ++ __ null_check(FSR); ++ ++ const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset ++ * wordSize); ++ const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize); ++ Label allocated; ++ ++ // initialize entry pointer ++ __ move(c_rarg0, R0); ++ ++ // find a free slot in the monitor block (result in c_rarg0) ++ { ++ Label entry, loop, exit, next; ++ __ ld(T2, monitor_block_top); ++ __ b(entry); ++ __ delayed()->daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ ++ // free slot? ++ __ bind(loop); ++ __ ld(AT, T2, BasicObjectLock::obj_offset_in_bytes()); ++ __ bne(AT, R0, next); ++ __ delayed()->nop(); ++ __ move(c_rarg0, T2); ++ ++ __ bind(next); ++ __ beq(FSR, AT, exit); ++ __ delayed()->nop(); ++ __ daddiu(T2, T2, entry_size); ++ ++ __ bind(entry); ++ __ bne(T3, T2, loop); ++ __ delayed()->nop(); ++ __ bind(exit); ++ } ++ ++ __ bne(c_rarg0, R0, allocated); ++ __ delayed()->nop(); ++ ++ // allocate one if there's no free slot ++ { ++ Label entry, loop; ++ // 1. compute new pointers // SP: old expression stack top ++ __ ld(c_rarg0, monitor_block_top); ++ __ daddiu(SP, SP, - entry_size); ++ __ daddiu(c_rarg0, c_rarg0, - entry_size); ++ __ sd(c_rarg0, monitor_block_top); ++ __ b(entry); ++ __ delayed(); __ move(T3, SP); ++ ++ // 2. move expression stack contents ++ __ bind(loop); ++ __ ld(AT, T3, entry_size); ++ __ sd(AT, T3, 0); ++ __ daddiu(T3, T3, wordSize); ++ __ bind(entry); ++ __ bne(T3, c_rarg0, loop); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(allocated); ++ // Increment bcp to point to the next bytecode, ++ // so exception handling for async. exceptions work correctly. ++ // The object has already been poped from the stack, so the ++ // expression stack looks correct. ++ __ daddiu(BCP, BCP, 1); ++ __ sd(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ lock_object(c_rarg0); ++ // check to make sure this monitor doesn't cause stack overflow after locking ++ __ save_bcp(); // in case of exception ++ __ generate_stack_overflow_check(0); ++ // The bcp has already been incremented. Just need to dispatch to next instruction. ++ ++ __ dispatch_next(vtos); ++} ++ ++// T2 : top ++// c_rarg0 : entry ++void TemplateTable::monitorexit() { ++ transition(atos, vtos); ++ ++ __ null_check(FSR); ++ ++ const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize); ++ Label found; ++ ++ // find matching slot ++ { ++ Label entry, loop; ++ __ ld(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ b(entry); ++ __ delayed()->daddiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ ++ __ bind(loop); ++ __ ld(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ beq(FSR, AT, found); ++ __ delayed()->nop(); ++ __ daddiu(c_rarg0, c_rarg0, entry_size); ++ __ bind(entry); ++ __ bne(T2, c_rarg0, loop); ++ __ delayed()->nop(); ++ } ++ ++ // error handling. Unlocking was not block-structured ++ Label end; ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ // call run-time routine ++ // c_rarg0: points to monitor entry ++ __ bind(found); ++ __ move(TSR, FSR); ++ __ unlock_object(c_rarg0); ++ __ move(FSR, TSR); ++ __ bind(end); ++} ++ ++ ++// Wide instructions ++void TemplateTable::wide() { ++ transition(vtos, vtos); ++ __ lbu(Rnext, at_bcp(1)); ++ __ dsll(T9, Rnext, Address::times_8); ++ __ li(AT, (long)Interpreter::_wentry_point); ++ __ daddu(AT, T9, AT); ++ __ ld(T9, AT, 0); ++ __ jr(T9); ++ __ delayed()->nop(); ++} ++ ++ ++void TemplateTable::multianewarray() { ++ transition(vtos, atos); ++ // last dim is on top of stack; we want address of first one: ++ // first_addr = last_addr + (ndims - 1) * wordSize ++ __ lbu(A1, at_bcp(3)); // dimension ++ __ daddiu(A1, A1, -1); ++ __ dsll(A1, A1, Address::times_8); ++ __ daddu(A1, SP, A1); // now A1 pointer to the count array on the stack ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1); ++ __ lbu(AT, at_bcp(3)); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(SP, SP, AT); ++ __ sync(); ++} ++#endif // !CC_INTERP +diff --git a/hotspot/src/cpu/mips/vm/templateTable_mips_64.hpp b/hotspot/src/cpu/mips/vm/templateTable_mips_64.hpp +new file mode 100644 +index 0000000000..b63274a206 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/templateTable_mips_64.hpp +@@ -0,0 +1,44 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP ++#define CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP ++ ++ static void prepare_invoke(int byte_no, ++ Register method, ++ Register index = noreg, ++ Register recv = noreg, ++ Register flags = noreg ++ ); ++ static void invokevirtual_helper(Register index, Register recv, ++ Register flags); ++ //static void volatile_barrier(Assembler::Membar_mask_bits order_constraint); ++ static void volatile_barrier(); ++ ++ // Helpers ++ static void index_check(Register array, Register index); ++ static void index_check_without_pop(Register array, Register index); ++ ++#endif // CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP +diff --git a/hotspot/src/cpu/mips/vm/vmStructs_mips.hpp b/hotspot/src/cpu/mips/vm/vmStructs_mips.hpp +new file mode 100644 +index 0000000000..6939914356 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/vmStructs_mips.hpp +@@ -0,0 +1,68 @@ ++/* ++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP ++#define CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP ++ ++// These are the CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ \ ++ /******************************/ \ ++ /* JavaCallWrapper */ \ ++ /******************************/ \ ++ /******************************/ \ ++ /* JavaFrameAnchor */ \ ++ /******************************/ \ ++ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) \ ++ \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_STRUCTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++ ++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_TYPES_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++ ++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++#endif // CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/vm_version_ext_mips.cpp b/hotspot/src/cpu/mips/vm/vm_version_ext_mips.cpp +new file mode 100644 +index 0000000000..a98f70d9ff +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/vm_version_ext_mips.cpp +@@ -0,0 +1,89 @@ ++/* ++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "memory/allocation.inline.hpp" ++#include "vm_version_ext_mips.hpp" ++ ++// VM_Version_Ext statics ++int VM_Version_Ext::_no_of_threads = 0; ++int VM_Version_Ext::_no_of_cores = 0; ++int VM_Version_Ext::_no_of_sockets = 0; ++bool VM_Version_Ext::_initialized = false; ++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; ++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; ++ ++void VM_Version_Ext::initialize_cpu_information(void) { ++ // do nothing if cpu info has been initialized ++ if (_initialized) { ++ return; ++ } ++ ++ _no_of_cores = os::processor_count(); ++ _no_of_threads = _no_of_cores; ++ _no_of_sockets = _no_of_cores; ++ if (is_loongson()) { ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "Loongson MIPS"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "Loongson MIPS %s", cpu_features()); ++ } else { ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "MIPS"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "MIPS %s", cpu_features()); ++ } ++ _initialized = true; ++} ++ ++int VM_Version_Ext::number_of_threads(void) { ++ initialize_cpu_information(); ++ return _no_of_threads; ++} ++ ++int VM_Version_Ext::number_of_cores(void) { ++ initialize_cpu_information(); ++ return _no_of_cores; ++} ++ ++int VM_Version_Ext::number_of_sockets(void) { ++ initialize_cpu_information(); ++ return _no_of_sockets; ++} ++ ++const char* VM_Version_Ext::cpu_name(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); ++ return tmp; ++} ++ ++const char* VM_Version_Ext::cpu_description(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); ++ return tmp; ++} +diff --git a/hotspot/src/cpu/mips/vm/vm_version_ext_mips.hpp b/hotspot/src/cpu/mips/vm/vm_version_ext_mips.hpp +new file mode 100644 +index 0000000000..a240fcc2e9 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/vm_version_ext_mips.hpp +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP ++#define CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP ++ ++#include "runtime/vm_version.hpp" ++#include "utilities/macros.hpp" ++ ++class VM_Version_Ext : public VM_Version { ++ private: ++ static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; ++ static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; ++ ++ static int _no_of_threads; ++ static int _no_of_cores; ++ static int _no_of_sockets; ++ static bool _initialized; ++ static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; ++ static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; ++ ++ public: ++ static int number_of_threads(void); ++ static int number_of_cores(void); ++ static int number_of_sockets(void); ++ ++ static const char* cpu_name(void); ++ static const char* cpu_description(void); ++ static void initialize_cpu_information(void); ++}; ++ ++#endif // CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/vm_version_mips.cpp b/hotspot/src/cpu/mips/vm/vm_version_mips.cpp +new file mode 100644 +index 0000000000..aef8f0746a +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/vm_version_mips.cpp +@@ -0,0 +1,510 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/java.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "vm_version_mips.hpp" ++#ifdef TARGET_OS_FAMILY_linux ++# include "os_linux.inline.hpp" ++#endif ++ ++#define A0 RA0 ++ ++int VM_Version::_cpuFeatures; ++const char* VM_Version::_features_str = ""; ++VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; ++volatile bool VM_Version::_is_determine_cpucfg_supported_running = false; ++bool VM_Version::_is_cpucfg_instruction_supported = true; ++bool VM_Version::_cpu_info_is_initialized = false; ++ ++static BufferBlob* stub_blob; ++static const int stub_size = 600; ++ ++extern "C" { ++ typedef void (*get_cpu_info_stub_t)(void*); ++} ++static get_cpu_info_stub_t get_cpu_info_stub = NULL; ++ ++ ++class VM_Version_StubGenerator: public StubCodeGenerator { ++ public: ++ ++ VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} ++ ++ address generate_get_cpu_info() { ++ assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized"); ++ StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); ++# define __ _masm-> ++ ++ address start = __ pc(); ++ ++ __ enter(); ++ __ push(AT); ++ __ push(V0); ++ ++ __ li(AT, (long)0); ++ __ cpucfg(V0, AT); ++ __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); ++ __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); ++ ++ __ li(AT, 1); ++ __ cpucfg(V0, AT); ++ __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); ++ __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); ++ ++ __ li(AT, 2); ++ __ cpucfg(V0, AT); ++ __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); ++ __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); ++ ++ __ pop(V0); ++ __ pop(AT); ++ __ leave(); ++ __ jr(RA); ++ __ delayed()->nop(); ++# undef __ ++ ++ return start; ++ }; ++}; ++ ++uint32_t VM_Version::get_feature_flags_by_cpucfg() { ++ uint32_t result = 0; ++ if (_cpuid_info.cpucfg_info_id1.bits.MMI != 0) ++ result |= CPU_MMI; ++ if (_cpuid_info.cpucfg_info_id1.bits.MSA1 != 0) ++ result |= CPU_MSA1_0; ++ if (_cpuid_info.cpucfg_info_id1.bits.MSA2 != 0) ++ result |= CPU_MSA2_0; ++ if (_cpuid_info.cpucfg_info_id1.bits.CGP != 0) ++ result |= CPU_CGP; ++ if (_cpuid_info.cpucfg_info_id1.bits.LSX1 != 0) ++ result |= CPU_LSX1; ++ if (_cpuid_info.cpucfg_info_id1.bits.LSX2 != 0) ++ result |= CPU_LSX2; ++ if (_cpuid_info.cpucfg_info_id1.bits.LASX != 0) ++ result |= CPU_LASX; ++ if (_cpuid_info.cpucfg_info_id1.bits.LLSYNC != 0) ++ result |= CPU_LLSYNC; ++ if (_cpuid_info.cpucfg_info_id1.bits.TGTSYNC != 0) ++ result |= CPU_TGTSYNC; ++ if (_cpuid_info.cpucfg_info_id1.bits.MUALP != 0) ++ result |= CPU_MUALP; ++ if (_cpuid_info.cpucfg_info_id2.bits.LEXT1 != 0) ++ result |= CPU_LEXT1; ++ if (_cpuid_info.cpucfg_info_id2.bits.LEXT2 != 0) ++ result |= CPU_LEXT2; ++ if (_cpuid_info.cpucfg_info_id2.bits.LEXT3 != 0) ++ result |= CPU_LEXT3; ++ if (_cpuid_info.cpucfg_info_id2.bits.LAMO != 0) ++ result |= CPU_LAMO; ++ if (_cpuid_info.cpucfg_info_id2.bits.LPIXU != 0) ++ result |= CPU_LPIXU; ++ ++ result |= CPU_ULSYNC; ++ ++ return result; ++} ++ ++void read_cpu_info(const char *path, char *result) { ++ FILE *ptr; ++ char buf[1024]; ++ int i = 0; ++ if((ptr=fopen(path, "r")) != NULL) { ++ while(fgets(buf, 1024, ptr)!=NULL) { ++ strcat(result,buf); ++ i++; ++ if (i == 10) break; ++ } ++ fclose(ptr); ++ } else { ++ warning("Can't detect CPU info - cannot open %s", path); ++ } ++} ++ ++void strlwr(char *str) { ++ for (; *str!='\0'; str++) ++ *str = tolower(*str); ++} ++ ++int VM_Version::get_feature_flags_by_cpuinfo(int features) { ++ assert(!cpu_info_is_initialized(), "VM_Version should not be initialized"); ++ ++ char res[10240]; ++ int i; ++ memset(res, '\0', 10240 * sizeof(char)); ++ read_cpu_info("/proc/cpuinfo", res); ++ // res is converted to lower case ++ strlwr(res); ++ ++ if (strstr(res, "loongson")) { ++ // Loongson CPU ++ features |= CPU_LOONGSON; ++ ++ const struct Loongson_Cpuinfo loongson_cpuinfo[] = { ++ {L_3A1000, "3a1000"}, ++ {L_3B1500, "3b1500"}, ++ {L_3A2000, "3a2000"}, ++ {L_3B2000, "3b2000"}, ++ {L_3A3000, "3a3000"}, ++ {L_3B3000, "3b3000"}, ++ {L_2K1000, "2k1000"}, ++ {L_UNKNOWN, "unknown"} ++ }; ++ ++ // Loongson Family ++ int detected = 0; ++ for (i = 0; i <= L_UNKNOWN; i++) { ++ switch (i) { ++ // 3A1000 and 3B1500 may use an old kernel and further comparsion is needed ++ // test PRID REV in /proc/cpuinfo ++ // 3A1000: V0.5, model name: ICT Loongson-3A V0.5 FPU V0.1 ++ // 3B1500: V0.7, model name: ICT Loongson-3B V0.7 FPU V0.1 ++ case L_3A1000: ++ if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3a v0.5")) { ++ features |= CPU_LOONGSON_GS464; ++ detected++; ++ //tty->print_cr("3A1000 platform"); ++ } ++ break; ++ case L_3B1500: ++ if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3b v0.7")) { ++ features |= CPU_LOONGSON_GS464; ++ detected++; ++ //tty->print_cr("3B1500 platform"); ++ } ++ break; ++ case L_3A2000: ++ case L_3B2000: ++ case L_3A3000: ++ case L_3B3000: ++ if (strstr(res, loongson_cpuinfo[i].match_str)) { ++ features |= CPU_LOONGSON_GS464E; ++ detected++; ++ //tty->print_cr("3A2000/3A3000/3B2000/3B3000 platform"); ++ } ++ break; ++ case L_2K1000: ++ if (strstr(res, loongson_cpuinfo[i].match_str)) { ++ features |= CPU_LOONGSON_GS264; ++ detected++; ++ //tty->print_cr("2K1000 platform"); ++ } ++ break; ++ case L_UNKNOWN: ++ if (detected == 0) { ++ detected++; ++ //tty->print_cr("unknown Loongson platform"); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ assert (detected == 1, "one and only one of LOONGSON_CPU_FAMILY should be detected"); ++ } else { // not Loongson ++ // Not Loongson CPU ++ //tty->print_cr("MIPS platform"); ++ } ++ ++ if (features & CPU_LOONGSON_GS264) { ++ features |= CPU_LEXT1; ++ features |= CPU_LEXT2; ++ features |= CPU_TGTSYNC; ++ features |= CPU_ULSYNC; ++ features |= CPU_MSA1_0; ++ features |= CPU_LSX1; ++ } else if (features & CPU_LOONGSON_GS464) { ++ features |= CPU_LEXT1; ++ features |= CPU_LLSYNC; ++ features |= CPU_TGTSYNC; ++ } else if (features & CPU_LOONGSON_GS464E) { ++ features |= CPU_LEXT1; ++ features |= CPU_LEXT2; ++ features |= CPU_LEXT3; ++ features |= CPU_TGTSYNC; ++ features |= CPU_ULSYNC; ++ } else if (features & CPU_LOONGSON) { ++ // unknow loongson ++ features |= CPU_LLSYNC; ++ features |= CPU_TGTSYNC; ++ features |= CPU_ULSYNC; ++ } ++ VM_Version::_cpu_info_is_initialized = true; ++ ++ return features; ++} ++ ++void VM_Version::get_processor_features() { ++ ++ clean_cpuFeatures(); ++ ++ // test if cpucfg instruction is supported ++ VM_Version::_is_determine_cpucfg_supported_running = true; ++ __asm__ __volatile__( ++ ".insn \n\t" ++ ".word (0xc8080118)\n\t" // cpucfg zero, zero ++ : ++ : ++ : ++ ); ++ VM_Version::_is_determine_cpucfg_supported_running = false; ++ ++ if (supports_cpucfg()) { ++ get_cpu_info_stub(&_cpuid_info); ++ _cpuFeatures = get_feature_flags_by_cpucfg(); ++ // Only Loongson CPUs support cpucfg ++ _cpuFeatures |= CPU_LOONGSON; ++ } else { ++ _cpuFeatures = get_feature_flags_by_cpuinfo(0); ++ } ++ ++ _supports_cx8 = true; ++ ++ if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) { ++ FLAG_SET_CMDLINE(uintx, MaxGCPauseMillis, 650); ++ } ++ ++#ifdef COMPILER2 ++ if (MaxVectorSize > 0) { ++ if (!is_power_of_2(MaxVectorSize)) { ++ warning("MaxVectorSize must be a power of 2"); ++ MaxVectorSize = 8; ++ } ++ if (MaxVectorSize > 0 && supports_ps()) { ++ MaxVectorSize = 8; ++ } else { ++ MaxVectorSize = 0; ++ } ++ } ++ // ++ // Vector optimization of MIPS works in most cases, but cannot pass hotspot/test/compiler/6340864/TestFloatVect.java. ++ // Vector optimization was closed by default. ++ // The reasons: ++ // 1. The kernel does not have emulation of PS instructions yet, so the emulation of PS instructions must be done in JVM, see JVM_handle_linux_signal. ++ // 2. It seems the gcc4.4.7 had some bug related to ucontext_t, which is used in signal handler to emulate PS instructions. ++ // ++ if (FLAG_IS_DEFAULT(MaxVectorSize)) { ++ MaxVectorSize = 0; ++ } ++ ++#endif ++ ++ if (needs_llsync() && needs_tgtsync() && !needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 1000); ++ } ++ } else if (!needs_llsync() && needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 2000); ++ } ++ } else if (!needs_llsync() && !needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 3000); ++ } ++ } else if (needs_llsync() && !needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 4000); ++ } ++ } else if (needs_llsync() && needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 10000); ++ } ++ } else { ++ assert(false, "Should Not Reach Here, what is the cpu type?"); ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 10000); ++ } ++ } ++ ++ if (supports_lext1()) { ++ if (FLAG_IS_DEFAULT(UseLEXT1)) { ++ FLAG_SET_DEFAULT(UseLEXT1, true); ++ } ++ } else if (UseLEXT1) { ++ warning("LEXT1 instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLEXT1, false); ++ } ++ ++ if (supports_lext2()) { ++ if (FLAG_IS_DEFAULT(UseLEXT2)) { ++ FLAG_SET_DEFAULT(UseLEXT2, true); ++ } ++ } else if (UseLEXT2) { ++ warning("LEXT2 instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLEXT2, false); ++ } ++ ++ if (supports_lext3()) { ++ if (FLAG_IS_DEFAULT(UseLEXT3)) { ++ FLAG_SET_DEFAULT(UseLEXT3, true); ++ } ++ } else if (UseLEXT3) { ++ warning("LEXT3 instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLEXT3, false); ++ } ++ ++ if (UseLEXT2) { ++ if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) { ++ FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 1); ++ } ++ } else if (UseCountTrailingZerosInstructionMIPS64) { ++ if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) ++ warning("ctz/dctz instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 0); ++ } ++ ++ if (TieredCompilation) { ++ if (!FLAG_IS_DEFAULT(TieredCompilation)) ++ warning("TieredCompilation not supported"); ++ FLAG_SET_DEFAULT(TieredCompilation, false); ++ } ++ ++ char buf[256]; ++ bool is_unknown_loongson_cpu = is_loongson() && !is_gs464() && !is_gs464e() && !is_gs264() && !supports_cpucfg(); ++ ++ // A note on the _features_string format: ++ // There are jtreg tests checking the _features_string for various properties. ++ // For some strange reason, these tests require the string to contain ++ // only _lowercase_ characters. Keep that in mind when being surprised ++ // about the unusual notation of features - and when adding new ones. ++ // Features may have one comma at the end. ++ // Furthermore, use one, and only one, separator space between features. ++ // Multiple spaces are considered separate tokens, messing up everything. ++ jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, usesynclevel:%d", ++ (is_loongson() ? "mips-compatible loongson cpu" : "mips cpu"), ++ (is_gs464() ? ", gs464 (3a1000/3b1500)" : ""), ++ (is_gs464e() ? ", gs464e (3a2000/3a3000/3b2000/3b3000)" : ""), ++ (is_gs264() ? ", gs264 (2k1000)" : ""), ++ (is_unknown_loongson_cpu ? ", unknown loongson cpu" : ""), ++ (supports_dsp() ? ", dsp" : ""), ++ (supports_ps() ? ", ps" : ""), ++ (supports_3d() ? ", 3d" : ""), ++ (supports_mmi() ? ", mmi" : ""), ++ (supports_msa1_0() ? ", msa1_0" : ""), ++ (supports_msa2_0() ? ", msa2_0" : ""), ++ (supports_lsx1() ? ", lsx1" : ""), ++ (supports_lsx2() ? ", lsx2" : ""), ++ (supports_lasx() ? ", lasx" : ""), ++ (supports_lext1() ? ", lext1" : ""), ++ (supports_lext2() ? ", lext2" : ""), ++ (supports_lext3() ? ", lext3" : ""), ++ (supports_cgp() ? ", aes, crc, sha1, sha256, sha512" : ""), ++ (supports_lamo() ? ", lamo" : ""), ++ (supports_lpixu() ? ", lpixu" : ""), ++ (needs_llsync() ? ", llsync" : ""), ++ (needs_tgtsync() ? ", tgtsync": ""), ++ (needs_ulsync() ? ", ulsync": ""), ++ (supports_mualp() ? ", mualp" : ""), ++ UseSyncLevel); ++ _features_str = strdup(buf); ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchLines, 1); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) { ++ FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1); ++ } ++ ++ if (UseSHA) { ++ warning("SHA instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseSHA, false); ++ } ++ ++ if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) { ++ warning("SHA intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); ++ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); ++ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); ++ } ++ ++ if (UseAES) { ++ if (!FLAG_IS_DEFAULT(UseAES)) { ++ warning("AES instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAES, false); ++ } ++ } ++ ++ if (UseCRC32Intrinsics) { ++ if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { ++ warning("CRC32Intrinsics instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); ++ } ++ } ++ ++ if (UseAESIntrinsics) { ++ if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) { ++ warning("AES intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAESIntrinsics, false); ++ } ++ } ++ ++ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { ++ UseMontgomeryMultiplyIntrinsic = true; ++ } ++ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { ++ UseMontgomerySquareIntrinsic = true; ++ } ++ ++ if (CriticalJNINatives) { ++ if (FLAG_IS_CMDLINE(CriticalJNINatives)) { ++ warning("CriticalJNINatives specified, but not supported in this VM"); ++ } ++ FLAG_SET_DEFAULT(CriticalJNINatives, false); ++ } ++} ++ ++void VM_Version::initialize() { ++ ResourceMark rm; ++ // Making this stub must be FIRST use of assembler ++ ++ stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size); ++ if (stub_blob == NULL) { ++ vm_exit_during_initialization("Unable to allocate get_cpu_info_stub"); ++ } ++ CodeBuffer c(stub_blob); ++ VM_Version_StubGenerator g(&c); ++ get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, ++ g.generate_get_cpu_info()); ++ ++ get_processor_features(); ++} +diff --git a/hotspot/src/cpu/mips/vm/vm_version_mips.hpp b/hotspot/src/cpu/mips/vm/vm_version_mips.hpp +new file mode 100644 +index 0000000000..0de01e5f64 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/vm_version_mips.hpp +@@ -0,0 +1,221 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VM_VERSION_MIPS_HPP ++#define CPU_MIPS_VM_VM_VERSION_MIPS_HPP ++ ++#include "runtime/globals_extension.hpp" ++#include "runtime/vm_version.hpp" ++ ++ ++class VM_Version: public Abstract_VM_Version { ++public: ++ ++ union Loongson_Cpucfg_Id1 { ++ uint32_t value; ++ struct { ++ uint32_t FP : 1, ++ FPREV : 3, ++ MMI : 1, ++ MSA1 : 1, ++ MSA2 : 1, ++ CGP : 1, ++ WRP : 1, ++ LSX1 : 1, ++ LSX2 : 1, ++ LASX : 1, ++ R6FXP : 1, ++ R6CRCP : 1, ++ R6FPP : 1, ++ CNT64 : 1, ++ LSLDR0 : 1, ++ LSPREF : 1, ++ LSPREFX : 1, ++ LSSYNCI : 1, ++ LSUCA : 1, ++ LLSYNC : 1, ++ TGTSYNC : 1, ++ LLEXC : 1, ++ SCRAND : 1, ++ MUALP : 1, ++ KMUALEn : 1, ++ ITLBT : 1, ++ LSUPERF : 1, ++ SFBP : 1, ++ CDMAP : 1, ++ : 1; ++ } bits; ++ }; ++ ++ union Loongson_Cpucfg_Id2 { ++ uint32_t value; ++ struct { ++ uint32_t LEXT1 : 1, ++ LEXT2 : 1, ++ LEXT3 : 1, ++ LSPW : 1, ++ LBT1 : 1, ++ LBT2 : 1, ++ LBT3 : 1, ++ LBTMMU : 1, ++ LPMP : 1, ++ LPMRev : 3, ++ LAMO : 1, ++ LPIXU : 1, ++ LPIXNU : 1, ++ LVZP : 1, ++ LVZRev : 3, ++ LGFTP : 1, ++ LGFTRev : 3, ++ LLFTP : 1, ++ LLFTRev : 3, ++ LCSRP : 1, ++ DISBLKLY : 1, ++ : 3; ++ } bits; ++ }; ++ ++protected: ++ ++ enum { ++ CPU_LOONGSON = (1 << 1), ++ CPU_LOONGSON_GS464 = (1 << 2), ++ CPU_LOONGSON_GS464E = (1 << 3), ++ CPU_LOONGSON_GS264 = (1 << 4), ++ CPU_MMI = (1 << 11), ++ CPU_MSA1_0 = (1 << 12), ++ CPU_MSA2_0 = (1 << 13), ++ CPU_CGP = (1 << 14), ++ CPU_LSX1 = (1 << 15), ++ CPU_LSX2 = (1 << 16), ++ CPU_LASX = (1 << 17), ++ CPU_LEXT1 = (1 << 18), ++ CPU_LEXT2 = (1 << 19), ++ CPU_LEXT3 = (1 << 20), ++ CPU_LAMO = (1 << 21), ++ CPU_LPIXU = (1 << 22), ++ CPU_LLSYNC = (1 << 23), ++ CPU_TGTSYNC = (1 << 24), ++ CPU_ULSYNC = (1 << 25), ++ CPU_MUALP = (1 << 26), ++ ++ //////////////////////add some other feature here////////////////// ++ } cpuFeatureFlags; ++ ++ enum Loongson_Family { ++ L_3A1000 = 0, ++ L_3B1500 = 1, ++ L_3A2000 = 2, ++ L_3B2000 = 3, ++ L_3A3000 = 4, ++ L_3B3000 = 5, ++ L_2K1000 = 6, ++ L_UNKNOWN = 7 ++ }; ++ ++ struct Loongson_Cpuinfo { ++ Loongson_Family id; ++ const char* const match_str; ++ }; ++ ++ static int _cpuFeatures; ++ static const char* _features_str; ++ static volatile bool _is_determine_cpucfg_supported_running; ++ static bool _is_cpucfg_instruction_supported; ++ static bool _cpu_info_is_initialized; ++ ++ struct CpuidInfo { ++ uint32_t cpucfg_info_id0; ++ Loongson_Cpucfg_Id1 cpucfg_info_id1; ++ Loongson_Cpucfg_Id2 cpucfg_info_id2; ++ uint32_t cpucfg_info_id3; ++ uint32_t cpucfg_info_id4; ++ uint32_t cpucfg_info_id5; ++ uint32_t cpucfg_info_id6; ++ uint32_t cpucfg_info_id8; ++ }; ++ ++ // The actual cpuid info block ++ static CpuidInfo _cpuid_info; ++ ++ static uint32_t get_feature_flags_by_cpucfg(); ++ static int get_feature_flags_by_cpuinfo(int features); ++ static void get_processor_features(); ++ ++public: ++ // Offsets for cpuid asm stub ++ static ByteSize Loongson_Cpucfg_id0_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id0); } ++ static ByteSize Loongson_Cpucfg_id1_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id1); } ++ static ByteSize Loongson_Cpucfg_id2_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id2); } ++ static ByteSize Loongson_Cpucfg_id3_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id3); } ++ static ByteSize Loongson_Cpucfg_id4_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id4); } ++ static ByteSize Loongson_Cpucfg_id5_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id5); } ++ static ByteSize Loongson_Cpucfg_id6_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id6); } ++ static ByteSize Loongson_Cpucfg_id8_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id8); } ++ ++ static bool is_determine_features_test_running() { return _is_determine_cpucfg_supported_running; } ++ ++ static void clean_cpuFeatures() { _cpuFeatures = 0; } ++ ++ // Initialization ++ static void initialize(); ++ ++ static bool cpu_info_is_initialized() { return _cpu_info_is_initialized; } ++ ++ static bool supports_cpucfg() { return _is_cpucfg_instruction_supported; } ++ static bool set_supports_cpucfg(bool value) { return _is_cpucfg_instruction_supported = value; } ++ ++ static bool is_loongson() { return _cpuFeatures & CPU_LOONGSON; } ++ static bool is_gs264() { return _cpuFeatures & CPU_LOONGSON_GS264; } ++ static bool is_gs464() { return _cpuFeatures & CPU_LOONGSON_GS464; } ++ static bool is_gs464e() { return _cpuFeatures & CPU_LOONGSON_GS464E; } ++ static bool supports_dsp() { return 0; /*not supported yet*/} ++ static bool supports_ps() { return 0; /*not supported yet*/} ++ static bool supports_3d() { return 0; /*not supported yet*/} ++ static bool supports_msa1_0() { return _cpuFeatures & CPU_MSA1_0; } ++ static bool supports_msa2_0() { return _cpuFeatures & CPU_MSA2_0; } ++ static bool supports_cgp() { return _cpuFeatures & CPU_CGP; } ++ static bool supports_mmi() { return _cpuFeatures & CPU_MMI; } ++ static bool supports_lsx1() { return _cpuFeatures & CPU_LSX1; } ++ static bool supports_lsx2() { return _cpuFeatures & CPU_LSX2; } ++ static bool supports_lasx() { return _cpuFeatures & CPU_LASX; } ++ static bool supports_lext1() { return _cpuFeatures & CPU_LEXT1; } ++ static bool supports_lext2() { return _cpuFeatures & CPU_LEXT2; } ++ static bool supports_lext3() { return _cpuFeatures & CPU_LEXT3; } ++ static bool supports_lamo() { return _cpuFeatures & CPU_LAMO; } ++ static bool supports_lpixu() { return _cpuFeatures & CPU_LPIXU; } ++ static bool needs_llsync() { return _cpuFeatures & CPU_LLSYNC; } ++ static bool needs_tgtsync() { return _cpuFeatures & CPU_TGTSYNC; } ++ static bool needs_ulsync() { return _cpuFeatures & CPU_ULSYNC; } ++ static bool supports_mualp() { return _cpuFeatures & CPU_MUALP; } ++ ++ //mips has no such instructions, use ll/sc instead ++ static bool supports_compare_and_exchange() { return false; } ++ ++ static const char* cpu_features() { return _features_str; } ++ ++}; ++ ++#endif // CPU_MIPS_VM_VM_VERSION_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/vmreg_mips.cpp b/hotspot/src/cpu/mips/vm/vmreg_mips.cpp +new file mode 100644 +index 0000000000..86bd74d430 +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/vmreg_mips.cpp +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "code/vmreg.hpp" ++ ++ ++ ++void VMRegImpl::set_regName() { ++ Register reg = ::as_Register(0); ++ int i; ++ for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) { ++ regName[i++] = reg->name(); ++ regName[i++] = reg->name(); ++ reg = reg->successor(); ++ } ++ ++ FloatRegister freg = ::as_FloatRegister(0); ++ for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { ++ regName[i++] = freg->name(); ++ regName[i++] = freg->name(); ++ freg = freg->successor(); ++ } ++ ++ for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) { ++ regName[i] = "NON-GPR-FPR"; ++ } ++} +diff --git a/hotspot/src/cpu/mips/vm/vmreg_mips.hpp b/hotspot/src/cpu/mips/vm/vmreg_mips.hpp +new file mode 100644 +index 0000000000..6a970ea91a +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/vmreg_mips.hpp +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VMREG_MIPS_HPP ++#define CPU_MIPS_VM_VMREG_MIPS_HPP ++ ++bool is_Register(); ++Register as_Register(); ++ ++bool is_FloatRegister(); ++FloatRegister as_FloatRegister(); ++ ++#endif // CPU_MIPS_VM_VMREG_MIPS_HPP +diff --git a/hotspot/src/cpu/mips/vm/vmreg_mips.inline.hpp b/hotspot/src/cpu/mips/vm/vmreg_mips.inline.hpp +new file mode 100644 +index 0000000000..77e18ce57d +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/vmreg_mips.inline.hpp +@@ -0,0 +1,68 @@ ++/* ++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP ++ ++inline VMReg RegisterImpl::as_VMReg() { ++ if( this==noreg ) return VMRegImpl::Bad(); ++ return VMRegImpl::as_VMReg(encoding() << 1 ); ++} ++ ++inline VMReg FloatRegisterImpl::as_VMReg() { ++ return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr); ++} ++ ++inline bool VMRegImpl::is_Register() { ++ return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; ++} ++ ++inline bool VMRegImpl::is_FloatRegister() { ++ return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; ++} ++ ++inline Register VMRegImpl::as_Register() { ++ ++ assert( is_Register(), "must be"); ++ // Yuk ++ return ::as_Register(value() >> 1); ++} ++ ++inline FloatRegister VMRegImpl::as_FloatRegister() { ++ assert( is_FloatRegister(), "must be" ); ++ // Yuk ++ assert( is_even(value()), "must be" ); ++ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1); ++} ++ ++inline bool VMRegImpl::is_concrete() { ++ assert(is_reg(), "must be"); ++ if(is_Register()) return true; ++ if(is_FloatRegister()) return true; ++ assert(false, "what register?"); ++ return false; ++} ++ ++#endif // CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP +diff --git a/hotspot/src/cpu/mips/vm/vtableStubs_mips_64.cpp b/hotspot/src/cpu/mips/vm/vtableStubs_mips_64.cpp +new file mode 100644 +index 0000000000..7779c58e0a +--- /dev/null ++++ b/hotspot/src/cpu/mips/vm/vtableStubs_mips_64.cpp +@@ -0,0 +1,301 @@ ++/* ++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/vtableStubs.hpp" ++#include "interp_masm_mips_64.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/klassVtable.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_mips.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++ ++// machine-dependent part of VtableStubs: create VtableStub of correct size and ++// initialize its code ++ ++#define __ masm-> ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#ifndef PRODUCT ++extern "C" void bad_compiled_vtable_index(JavaThread* thread, ++ oop receiver, ++ int index); ++#endif ++ ++// used by compiler only; reciever in T0. ++// used registers : ++// Rmethod : receiver klass & method ++// NOTE: If this code is used by the C1, the receiver_location is always 0. ++// when reach here, receiver in T0, klass in T8 ++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { ++ const int gs2_code_length = VtableStub::pd_code_size_limit(true); ++ VtableStub* s = new(gs2_code_length) VtableStub(true, vtable_index); ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), gs2_code_length); ++ MacroAssembler* masm = new MacroAssembler(&cb); ++ Register t1 = T8, t2 = Rmethod; ++#ifndef PRODUCT ++ if (CountCompiledCalls) { ++ __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); ++ __ lw(t1, AT , 0); ++ __ addiu(t1, t1, 1); ++ __ sw(t1, AT,0); ++ } ++#endif ++ ++ // get receiver (need to skip return address on top of stack) ++ //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0"); ++ ++ // get receiver klass ++ address npe_addr = __ pc(); ++ //add for compressedoops ++ __ load_klass(t1, T0); ++ // compute entry offset (in words) ++ int entry_offset = InstanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size(); ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ // check offset vs vtable length ++ __ lw(t2, t1, InstanceKlass::vtable_length_offset()*wordSize); ++ assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code"); ++ __ move(AT, vtable_index*vtableEntry::size()); ++ __ slt(AT, AT, t2); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ move(A2, vtable_index); ++ __ move(A1, A0); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2); ++ __ bind(L); ++ } ++#endif // PRODUCT ++ // load methodOop and target address ++ const Register method = Rmethod; ++ int offset = entry_offset*wordSize + vtableEntry::method_offset_in_bytes(); ++ guarantee(Assembler::is_simm16(offset), "not a signed 16-bit int"); ++ __ ld_ptr(method, t1, offset); ++ if (DebugVtables) { ++ Label L; ++ __ beq(method, R0, L); ++ __ delayed()->nop(); ++ __ ld(AT, method,in_bytes(Method::from_compiled_offset())); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("Vtable entry is NULL"); ++ __ bind(L); ++ } ++ // T8: receiver klass ++ // T0: receiver ++ // Rmethod: methodOop ++ // T9: entry ++ address ame_addr = __ pc(); ++ __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset())); ++ __ jr(T9); ++ __ delayed()->nop(); ++ masm->flush(); ++ s->set_exception_points(npe_addr, ame_addr); ++ return s; ++} ++ ++ ++// used registers : ++// T1 T2 ++// when reach here, the receiver in T0, klass in T1 ++VtableStub* VtableStubs::create_itable_stub(int itable_index) { ++ // Note well: pd_code_size_limit is the absolute minimum we can get ++ // away with. If you add code here, bump the code stub size ++ // returned by pd_code_size_limit! ++ const int gs2_code_length = VtableStub::pd_code_size_limit(false); ++ VtableStub* s = new(gs2_code_length) VtableStub(false, itable_index); ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), gs2_code_length); ++ MacroAssembler* masm = new MacroAssembler(&cb); ++ // we T8,T9 as temparary register, they are free from register allocator ++ Register t1 = T8, t2 = T2; ++ // Entry arguments: ++ // T1: Interface ++ // T0: Receiver ++ ++#ifndef PRODUCT ++ if (CountCompiledCalls) { ++ __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); ++ __ lw(T8, AT, 0); ++ __ addiu(T8, T8,1); ++ __ sw(T8, AT, 0); ++ } ++#endif /* PRODUCT */ ++ const Register holder_klass_reg = T1; // declaring interface klass (DECC) ++ const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC) ++ const Register icholder_reg = T1; ++ __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset()); ++ __ ld_ptr(holder_klass_reg, icholder_reg, CompiledICHolder::holder_metadata_offset()); ++ ++ // get receiver klass (also an implicit null-check) ++ address npe_addr = __ pc(); ++ __ load_klass(t1, T0); ++ { ++ // x86 use lookup_interface_method, but lookup_interface_method does not work on MIPS. ++ const int base = InstanceKlass::vtable_start_offset() * wordSize; ++ assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); ++ assert(Assembler::is_simm16(base), "change this code"); ++ __ daddiu(t2, t1, base); ++ assert(Assembler::is_simm16(InstanceKlass::vtable_length_offset() * wordSize), "change this code"); ++ __ lw(AT, t1, InstanceKlass::vtable_length_offset() * wordSize); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(t2, t2, AT); ++ if (HeapWordsPerLong > 1) { ++ __ round_to(t2, BytesPerLong); ++ } ++ ++ Label hit, entry; ++ assert(Assembler::is_simm16(itableOffsetEntry::size() * wordSize), "change this code"); ++ __ bind(entry); ++ ++#ifdef ASSERT ++ // Check that the entry is non-null ++ if (DebugVtables) { ++ Label L; ++ assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code"); ++ __ lw(AT, t1, itableOffsetEntry::interface_offset_in_bytes()); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("null entry point found in itable's offset table"); ++ __ bind(L); ++ } ++#endif ++ assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code"); ++ __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); ++ __ bne(AT, resolved_klass_reg, entry); ++ __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize); ++ ++ } ++ ++ // add for compressedoops ++ __ load_klass(t1, T0); ++ // compute itable entry offset (in words) ++ const int base = InstanceKlass::vtable_start_offset() * wordSize; ++ assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); ++ assert(Assembler::is_simm16(base), "change this code"); ++ __ daddiu(t2, t1, base); ++ assert(Assembler::is_simm16(InstanceKlass::vtable_length_offset() * wordSize), "change this code"); ++ __ lw(AT, t1, InstanceKlass::vtable_length_offset() * wordSize); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(t2, t2, AT); ++ if (HeapWordsPerLong > 1) { ++ __ round_to(t2, BytesPerLong); ++ } ++ ++ Label hit, entry; ++ assert(Assembler::is_simm16(itableOffsetEntry::size() * wordSize), "change this code"); ++ __ bind(entry); ++ ++#ifdef ASSERT ++ // Check that the entry is non-null ++ if (DebugVtables) { ++ Label L; ++ assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code"); ++ __ lw(AT, t1, itableOffsetEntry::interface_offset_in_bytes()); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("null entry point found in itable's offset table"); ++ __ bind(L); ++ } ++#endif ++ assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code"); ++ __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); ++ __ bne(AT, holder_klass_reg, entry); ++ __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize); ++ ++ // We found a hit, move offset into T9 ++ __ ld_ptr(t2, t2, itableOffsetEntry::offset_offset_in_bytes() - itableOffsetEntry::size() * wordSize); ++ ++ // Compute itableMethodEntry. ++ const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) + ++ itableMethodEntry::method_offset_in_bytes(); ++ ++ // Get methodOop and entrypoint for compiler ++ const Register method = Rmethod; ++ __ dsll(AT, t2, Address::times_1); ++ __ addu(AT, AT, t1); ++ guarantee(Assembler::is_simm16(method_offset), "not a signed 16-bit int"); ++ __ ld_ptr(method, AT, method_offset); ++ ++#ifdef ASSERT ++ if (DebugVtables) { ++ Label L1; ++ __ beq(method, R0, L1); ++ __ delayed()->nop(); ++ __ ld(AT, method,in_bytes(Method::from_compiled_offset())); ++ __ bne(AT, R0, L1); ++ __ delayed()->nop(); ++ __ stop("methodOop is null"); ++ __ bind(L1); ++ } ++#endif // ASSERT ++ ++ // Rmethod: methodOop ++ // T0: receiver ++ // T9: entry point ++ address ame_addr = __ pc(); ++ __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset())); ++ __ jr(T9); ++ __ delayed()->nop(); ++ masm->flush(); ++ s->set_exception_points(npe_addr, ame_addr); ++ return s; ++} ++ ++// NOTE : whenever you change the code above, dont forget to change the const here ++int VtableStub::pd_code_size_limit(bool is_vtable_stub) { ++ if (is_vtable_stub) { ++ return ( DebugVtables ? 600 : 28) + (CountCompiledCalls ? 24 : 0)+ ++ (UseCompressedOops ? 16 : 0); ++ } else { ++ return ( DebugVtables ? 636 : 152) + (CountCompiledCalls ? 24 : 0)+ ++ (UseCompressedOops ? 32 : 0); ++ } ++} ++ ++int VtableStub::pd_code_alignment() { ++ return wordSize; ++} +diff --git a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp +index c1c053e66c..5c90df1079 100644 +--- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp ++++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp +@@ -1513,6 +1513,10 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { + } + } + ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++ ShouldNotReachHere(); ++} ++ + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); + LIR_Opr dest = op->result_opr(); +@@ -2102,6 +2106,12 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L + } + + ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, ++ LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ ShouldNotReachHere(); ++} ++ ++ + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); + +diff --git a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp +index 92b73e1c71..45da327efb 100644 +--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp ++++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp +@@ -242,20 +242,27 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr); + } + +-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { + __ cmp_mem_int(condition, base, disp, c, info); ++ __ branch(condition, T_INT, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); + +-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { +- __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info); +-} +- +- +-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, LIR_Opr disp, BasicType type, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { + __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info); ++ __ branch(condition, type, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); + + bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { + if (tmp->is_valid() && c > 0 && c < max_jint) { +diff --git a/hotspot/src/os/linux/vm/os_linux.cpp b/hotspot/src/os/linux/vm/os_linux.cpp +index ba1bce4239..42a73ea5aa 100644 +--- a/hotspot/src/os/linux/vm/os_linux.cpp ++++ b/hotspot/src/os/linux/vm/os_linux.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + // no precompiled headers + #include "classfile/classLoader.hpp" + #include "classfile/systemDictionary.hpp" +@@ -1969,7 +1975,11 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) + {EM_ALPHA, EM_ALPHA, ELFCLASS64, ELFDATA2LSB, (char*)"Alpha"}, + {EM_MIPS_RS3_LE, EM_MIPS_RS3_LE, ELFCLASS32, ELFDATA2LSB, (char*)"MIPSel"}, + {EM_MIPS, EM_MIPS, ELFCLASS32, ELFDATA2MSB, (char*)"MIPS"}, ++ {EM_MIPS, EM_MIPS, ELFCLASS64, ELFDATA2LSB, (char*)"MIPS64 LE"}, + {EM_PARISC, EM_PARISC, ELFCLASS32, ELFDATA2MSB, (char*)"PARISC"}, ++#if defined (LOONGARCH64) ++ {EM_LOONGARCH, EM_LOONGARCH, ELFCLASS64, ELFDATA2LSB, (char*)"LOONGARCH64"}, ++#endif + {EM_68K, EM_68K, ELFCLASS32, ELFDATA2MSB, (char*)"M68k"}, + {EM_AARCH64, EM_AARCH64, ELFCLASS64, ELFDATA2LSB, (char*)"AARCH64"}, + }; +@@ -1984,6 +1994,8 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) + static Elf32_Half running_arch_code=EM_SPARCV9; + #elif (defined __sparc) && (!defined _LP64) + static Elf32_Half running_arch_code=EM_SPARC; ++ #elif (defined MIPS64) ++ static Elf32_Half running_arch_code=EM_MIPS; + #elif (defined __powerpc64__) + static Elf32_Half running_arch_code=EM_PPC64; + #elif (defined __powerpc__) +@@ -2004,9 +2016,11 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) + static Elf32_Half running_arch_code=EM_68K; + #elif (defined AARCH64) + static Elf32_Half running_arch_code=EM_AARCH64; ++ #elif (defined LOONGARCH64) ++ static Elf32_Half running_arch_code=EM_LOONGARCH; + #else + #error Method os::dll_load requires that one of following is defined:\ +- IA32, AMD64, IA64, __sparc, __powerpc__, ARM, S390, ALPHA, MIPS, MIPSEL, PARISC, M68K, AARCH64 ++ IA32, AMD64, IA64, __sparc, __powerpc__, ARM, S390, ALPHA, MIPS, MIPSEL, __mips64, PARISC, M68K, AARCH64 + #endif + + // Identify compatability class for VM's architecture and library's architecture +@@ -3513,7 +3527,7 @@ size_t os::Linux::find_large_page_size() { + + #ifndef ZERO + large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M) +- ARM_ONLY(2 * M) PPC_ONLY(4 * M) AARCH64_ONLY(2 * M); ++ ARM_ONLY(2 * M) PPC_ONLY(4 * M) AARCH64_ONLY(2 * M) MIPS64_ONLY(4 * M) LOONGARCH64_ONLY(4 * M); //In MIPS _large_page_size is seted 4*M. // TODO: LA + #endif // ZERO + + FILE *fp = fopen("/proc/meminfo", "r"); +@@ -5120,7 +5134,12 @@ jint os::init_2(void) + Linux::fast_thread_clock_init(); + + // Allocate a single page and mark it as readable for safepoint polling ++#ifdef OPT_SAFEPOINT ++ void * p = (void *)(0x10000); ++ address polling_page = (address) ::mmap(p, Linux::page_size(), PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); ++#else + address polling_page = (address) ::mmap(NULL, Linux::page_size(), PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); ++#endif + guarantee( polling_page != MAP_FAILED, "os::init_2: failed to allocate polling page" ); + + os::set_polling_page( polling_page ); +@@ -5155,13 +5174,20 @@ jint os::init_2(void) + // size. Add a page for compiler2 recursion in main thread. + // Add in 2*BytesPerWord times page size to account for VM stack during + // class initialization depending on 32 or 64 bit VM. ++ ++ /* ++ * 2014/1/2: JDK8 requires larger -Xss option. ++ * Some application cannot run with -Xss192K. ++ * We are not sure whether this causes errors, so simply print a warning. ++ */ ++ size_t min_stack_allowed_jdk6 = os::Linux::min_stack_allowed; + os::Linux::min_stack_allowed = MAX2(os::Linux::min_stack_allowed, + (size_t)(StackYellowPages+StackRedPages+StackShadowPages) * Linux::page_size() + + (2*BytesPerWord COMPILER2_PRESENT(+1)) * Linux::vm_default_page_size()); + + size_t threadStackSizeInBytes = ThreadStackSize * K; + if (threadStackSizeInBytes != 0 && +- threadStackSizeInBytes < os::Linux::min_stack_allowed) { ++ threadStackSizeInBytes < min_stack_allowed_jdk6) { + tty->print_cr("\nThe stack size specified is too small, " + "Specify at least %dk", + os::Linux::min_stack_allowed/ K); +diff --git a/hotspot/src/os/linux/vm/os_perf_linux.cpp b/hotspot/src/os/linux/vm/os_perf_linux.cpp +index 0d1f75810a..cbc6c0757c 100644 +--- a/hotspot/src/os/linux/vm/os_perf_linux.cpp ++++ b/hotspot/src/os/linux/vm/os_perf_linux.cpp +@@ -50,6 +50,12 @@ + #ifdef TARGET_ARCH_ppc + # include "vm_version_ext_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "vm_version_ext_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "vm_version_ext_loongarch.hpp" ++#endif + + #include + #include +diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/assembler_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/assembler_linux_loongarch.cpp +new file mode 100644 +index 0000000000..5ee0965f42 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_loongarch/vm/assembler_linux_loongarch.cpp +@@ -0,0 +1,92 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "runtime/os.hpp" ++#include "runtime/threadLocalStorage.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++void MacroAssembler::get_thread(Register thread) { ++#ifdef MINIMIZE_RAM_USAGE ++ Register tmp; ++ ++ if (thread == AT) ++ tmp = T9; ++ else ++ tmp = AT; ++ ++ move(thread, SP); ++ shr(thread, PAGE_SHIFT); ++ ++ push(tmp); ++ li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1)); ++ andr(thread, thread, tmp); ++ shl(thread, Address::times_ptr); // sizeof(Thread *) ++ li48(tmp, (long)ThreadLocalStorage::sp_map_addr()); ++ add_d(tmp, tmp, thread); ++ ld_ptr(thread, tmp, 0); ++ pop(tmp); ++#else ++ if (thread != V0) { ++ push(V0); ++ } ++ pushad_except_v0(); ++ ++ li(A0, ThreadLocalStorage::thread_index()); ++ push(S5); ++ move(S5, SP); ++ li(AT, -StackAlignmentInBytes); ++ andr(SP, SP, AT); ++ // TODO: confirm reloc ++ call(CAST_FROM_FN_PTR(address, pthread_getspecific), relocInfo::runtime_call_type); ++ move(SP, S5); ++ pop(S5); ++ ++ popad_except_v0(); ++ if (thread != V0) { ++ move(thread, V0); ++ pop(V0); ++ } ++#endif // MINIMIZE_RAM_USAGE ++} +diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/atomic_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/atomic_linux_loongarch.inline.hpp +new file mode 100644 +index 0000000000..69590ba582 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_loongarch/vm/atomic_linux_loongarch.inline.hpp +@@ -0,0 +1,206 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_VM_ATOMIC_LINUX_LOONGARCH_INLINE_HPP ++#define OS_CPU_LINUX_LOONGARCH_VM_ATOMIC_LINUX_LOONGARCH_INLINE_HPP ++ ++#include "orderAccess_linux_loongarch.inline.hpp" ++#include "runtime/atomic.hpp" ++#include "runtime/os.hpp" ++#include "vm_version_loongarch.hpp" ++ ++// Implementation of class atomic ++ ++inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; } ++inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; } ++inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; } ++inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; } ++inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; } ++inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; } ++ ++inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; } ++inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; } ++inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; } ++inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; } ++inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; } ++inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void**)dest = store_value; } ++ ++inline jlong Atomic::load (volatile jlong* src) { return *src; } ++ ++///////////implementation of Atomic::add*///////////////// ++inline jint Atomic::add (jint add_value, volatile jint* dest) { ++ //TODO LA opt amadd ++ jint __ret, __tmp; ++ __asm__ __volatile__ ( ++ "1: ll.w %[__ret], %[__dest] \n\t" ++ " add.w %[__tmp], %[__val], %[__ret] \n\t" ++ " sc.w %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (add_value) ++ : "memory" ++ ); ++ ++ return add_value + __ret; ++} ++ ++inline intptr_t Atomic::add_ptr (intptr_t add_value, volatile intptr_t* dest) { ++ //TODO LA opt amadd ++ jint __ret, __tmp; ++ __asm__ __volatile__ ( ++ "1: ll.d %[__ret], %[__dest] \n\t" ++ " add.d %[__tmp], %[__val], %[__ret] \n\t" ++ " sc.d %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (add_value) ++ : "memory" ++ ); ++ ++ return add_value + __ret; ++} ++ ++inline void* Atomic::add_ptr (intptr_t add_value, volatile void* dest) { ++ return (void*)add_ptr((intptr_t)add_value, (volatile intptr_t*)dest); ++} ++ ++///////////implementation of Atomic::inc*///////////////// ++inline void Atomic::inc (volatile jint* dest) { (void)add(1, dest); } ++inline void Atomic::inc_ptr (volatile intptr_t* dest) { (void)add_ptr(1, dest); } ++inline void Atomic::inc_ptr (volatile void* dest) { (void)inc_ptr((volatile intptr_t*)dest); } ++ ++///////////implementation of Atomic::dec*///////////////// ++inline void Atomic::dec (volatile jint* dest) { (void)add(-1, dest); } ++inline void Atomic::dec_ptr (volatile intptr_t* dest) { (void)add_ptr(-1, dest); } ++inline void Atomic::dec_ptr (volatile void* dest) { (void)dec_ptr((volatile intptr_t*)dest); } ++ ++ ++///////////implementation of Atomic::xchg*///////////////// ++inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) { ++ jint __ret, __tmp; ++ ++ __asm__ __volatile__ ( ++ "1: ll.w %[__ret], %[__dest] \n\t" ++ " move %[__tmp], %[__val] \n\t" ++ " sc.w %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __ret; ++} ++ ++inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) { ++ intptr_t __ret, __tmp; ++ __asm__ __volatile__ ( ++ "1: ll.d %[__ret], %[__dest] \n\t" ++ " move %[__tmp], %[__val] \n\t" ++ " sc.d %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "ZC" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value) ++ : "memory" ++ ); ++ return __ret; ++ ++} ++ ++inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) { ++ return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest); ++} ++ ++///////////implementation of Atomic::cmpxchg*///////////////// ++inline jint Atomic::cmpxchg (jint exchange_value, volatile jint* dest, jint compare_value) { ++ jint __prev, __cmp; ++ ++ __asm__ __volatile__ ( ++ "1: ll.w %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $r0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " sc.w %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ "2: \n\t" ++ " dbar 0x700 \n\t" ++ ++ : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) ++ : [__dest] "ZC" (*(volatile jint*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __prev; ++} ++ ++inline jlong Atomic::cmpxchg (jlong exchange_value, volatile jlong* dest, jlong compare_value) { ++ jlong __prev, __cmp; ++ ++ __asm__ __volatile__ ( ++ "1: ll.d %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $r0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " sc.d %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ "2: \n\t" ++ " dbar 0x700 \n\t" ++ ++ : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) ++ : [__dest] "ZC" (*(volatile jlong*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : "memory" ++ ); ++ return __prev; ++} ++ ++inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value) { ++ intptr_t __prev, __cmp; ++ __asm__ __volatile__ ( ++ "1: ll.d %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $r0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " sc.d %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ "2: \n\t" ++ " dbar 0x700 \n\t" ++ ++ : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) ++ : [__dest] "ZC" (*(volatile intptr_t*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __prev; ++} ++ ++inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value) { ++ return (void*)cmpxchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest, (intptr_t)compare_value); ++} ++ ++#endif // OS_CPU_LINUX_LOONGARCH_VM_ATOMIC_LINUX_LOONGARCH_INLINE_HPP +diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/bytes_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/bytes_linux_loongarch.inline.hpp +new file mode 100644 +index 0000000000..4e205c468e +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_loongarch/vm/bytes_linux_loongarch.inline.hpp +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_VM_BYTES_LINUX_LOONGARCH_INLINE_HPP ++#define OS_CPU_LINUX_LOONGARCH_VM_BYTES_LINUX_LOONGARCH_INLINE_HPP ++ ++#include ++ ++// Efficient swapping of data bytes from Java byte ++// ordering to native byte ordering and vice versa. ++inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); } ++inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); } ++inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); } ++ ++#endif // OS_CPU_LINUX_LOONGARCH_VM_BYTES_LINUX_LOONGARCH_INLINE_HPP +diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/copy_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/copy_linux_loongarch.inline.hpp +new file mode 100644 +index 0000000000..7d6e11a935 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_loongarch/vm/copy_linux_loongarch.inline.hpp +@@ -0,0 +1,125 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_VM_COPY_LINUX_LOONGARCH_INLINE_HPP ++#define OS_CPU_LINUX_LOONGARCH_VM_COPY_LINUX_LOONGARCH_INLINE_HPP ++ ++static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) { ++ (void)memmove(to, from, count * HeapWordSize); ++} ++ ++static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ (void)memcpy(to, from, count * HeapWordSize); ++ break; ++ } ++} ++ ++static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ while (count-- > 0) { ++ *to++ = *from++; ++ } ++ break; ++ } ++} ++ ++static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_words(from, to, count); ++} ++ ++static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) { ++ pd_disjoint_words(from, to, count); ++} ++ ++static void pd_conjoint_bytes(void* from, void* to, size_t count) { ++ (void)memmove(to, from, count); ++} ++ ++static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) { ++ pd_conjoint_bytes(from, to, count); ++} ++ ++static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) { ++ //assert(!UseCompressedOops, "foo!"); ++ assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size"); ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_bytes_atomic(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count); ++} ++ ++static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jints_atomic((jint*)from, (jint*)to, count); ++} ++ ++static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count); ++} ++ ++static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) { ++ //assert(!UseCompressedOops, "foo!"); ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); ++ pd_conjoint_oops_atomic((oop*)from, (oop*)to, count); ++} ++ ++#endif // OS_CPU_LINUX_LOONGARCH_VM_COPY_LINUX_LOONGARCH_INLINE_HPP +diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/globals_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/globals_linux_loongarch.hpp +new file mode 100644 +index 0000000000..8ec3fa8239 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_loongarch/vm/globals_linux_loongarch.hpp +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_VM_GLOBALS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_VM_GLOBALS_LINUX_LOONGARCH_HPP ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++define_pd_global(bool, DontYieldALot, false); ++define_pd_global(intx, ThreadStackSize, 2048); ++define_pd_global(intx, VMThreadStackSize, 2048); ++ ++define_pd_global(intx, CompilerThreadStackSize, 0); // 0 => use system default ++ ++define_pd_global(uintx,JVMInvokeMethodSlack, 8192); ++ ++// Used on 64 bit platforms for UseCompressedOops base address ++define_pd_global(uintx,HeapBaseMinAddress, 2*G); ++ ++#endif // OS_CPU_LINUX_LOONGARCH_VM_GLOBALS_LINUX_LOONGARCH_HPP +diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/orderAccess_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/orderAccess_linux_loongarch.inline.hpp +new file mode 100644 +index 0000000000..3e050c8d09 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_loongarch/vm/orderAccess_linux_loongarch.inline.hpp +@@ -0,0 +1,115 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_VM_ORDERACCESS_LINUX_LOONGARCH_INLINE_HPP ++#define OS_CPU_LINUX_LOONGARCH_VM_ORDERACCESS_LINUX_LOONGARCH_INLINE_HPP ++ ++#include "runtime/atomic.hpp" ++#include "runtime/orderAccess.hpp" ++#include "runtime/os.hpp" ++#include "vm_version_loongarch.hpp" ++ ++#define inlasm_sync(v) if (os::is_ActiveCoresMP()) \ ++ __asm__ __volatile__ ("nop" : : : "memory"); \ ++ else \ ++ __asm__ __volatile__ ("dbar %0" : :"K"(v) : "memory"); ++ ++inline void OrderAccess::loadload() { inlasm_sync(0x15); } ++inline void OrderAccess::storestore() { inlasm_sync(0x1a); } ++inline void OrderAccess::loadstore() { inlasm_sync(0x16); } ++inline void OrderAccess::storeload() { inlasm_sync(0x19); } ++ ++inline void OrderAccess::acquire() { inlasm_sync(0x14); } ++inline void OrderAccess::release() { inlasm_sync(0x12); } ++inline void OrderAccess::fence() { inlasm_sync(0x10); } ++ ++//implementation of load_acquire ++inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { jbyte data = *p; acquire(); return data; } ++inline jshort OrderAccess::load_acquire(volatile jshort* p) { jshort data = *p; acquire(); return data; } ++inline jint OrderAccess::load_acquire(volatile jint* p) { jint data = *p; acquire(); return data; } ++inline jlong OrderAccess::load_acquire(volatile jlong* p) { jlong tmp = *p; acquire(); return tmp; } ++inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { jubyte data = *p; acquire(); return data; } ++inline jushort OrderAccess::load_acquire(volatile jushort* p) { jushort data = *p; acquire(); return data; } ++inline juint OrderAccess::load_acquire(volatile juint* p) { juint data = *p; acquire(); return data; } ++inline julong OrderAccess::load_acquire(volatile julong* p) { julong tmp = *p; acquire(); return tmp; } ++inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { jfloat data = *p; acquire(); return data; } ++inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { jdouble tmp = *p; acquire(); return tmp; } ++ ++//implementation of load_ptr_acquire ++inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) { intptr_t data = *p; acquire(); return data; } ++inline void* OrderAccess::load_ptr_acquire(volatile void* p) { void *data = *(void* volatile *)p; acquire(); return data; } ++inline void* OrderAccess::load_ptr_acquire(const volatile void* p) { void *data = *(void* volatile *)p; acquire(); return data; } ++ ++//implementation of release_store ++inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { release(); *p = v; } ++inline void OrderAccess::release_store(volatile jshort* p, jshort v) { release(); *p = v; } ++inline void OrderAccess::release_store(volatile jint* p, jint v) { release(); *p = v; } ++inline void OrderAccess::release_store(volatile jlong* p, jlong v) { release(); *p = v; } ++inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { release(); *p = v; } ++inline void OrderAccess::release_store(volatile jushort* p, jushort v) { release(); *p = v; } ++inline void OrderAccess::release_store(volatile juint* p, juint v) { release(); *p = v; } ++inline void OrderAccess::release_store(volatile julong* p, julong v) { release(); *p = v; } ++inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { release(); *p = v; } ++inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) { release(); *p = v; } ++ ++//implementation of release_store_ptr ++inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { release(); *p = v; } ++inline void OrderAccess::release_store_ptr(volatile void* p, void* v) { release(); *(void* volatile *)p = v; } ++ ++//implementation of store_fence ++inline void OrderAccess::store_fence(jbyte* p, jbyte v) { *p = v; fence(); } ++inline void OrderAccess::store_fence(jshort* p, jshort v) { *p = v; fence(); } ++inline void OrderAccess::store_fence(jint* p, jint v) { *p = v; fence(); } ++inline void OrderAccess::store_fence(jlong* p, jlong v) { *p = v; fence(); } ++inline void OrderAccess::store_fence(jubyte* p, jubyte v) { *p = v; fence(); } ++inline void OrderAccess::store_fence(jushort* p, jushort v) { *p = v; fence(); } ++inline void OrderAccess::store_fence(juint* p, juint v) { *p = v; fence(); } ++inline void OrderAccess::store_fence(julong* p, julong v) { *p = v; fence(); } ++inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; fence(); } ++inline void OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; fence(); } ++ ++//implementation of store_ptr_fence ++inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; fence(); } ++inline void OrderAccess::store_ptr_fence(void** p, void* v) { *p = v; fence(); } ++ ++//implementation of release_store_fence ++inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { release_store(p, v); fence(); } ++inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { release_store(p, v); fence(); } ++inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { release_store(p, v); fence(); } ++inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { release_store(p, v); fence(); } ++inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { release_store(p, v); fence(); } ++inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store(p, v); fence(); } ++inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { release_store(p, v); fence(); } ++inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { release_store(p, v); fence(); } ++inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { release_store(p, v); fence(); } ++inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store(p, v); fence(); } ++ ++//implementaion of release_store_ptr_fence ++inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { release_store_ptr(p, v); fence(); } ++inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { release_store_ptr(p, v); fence(); } ++ ++#undef inlasm_sync ++ ++#endif // OS_CPU_LINUX_LOONGARCH_VM_ORDERACCESS_LINUX_LOONGARCH_INLINE_HPP +diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.cpp +new file mode 100644 +index 0000000000..f2c3df84a1 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.cpp +@@ -0,0 +1,750 @@ ++/* ++ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// no precompiled headers ++#include "asm/macroAssembler.hpp" ++#include "classfile/classLoader.hpp" ++#include "classfile/systemDictionary.hpp" ++#include "classfile/vmSymbols.hpp" ++#include "code/icBuffer.hpp" ++#include "code/vtableStubs.hpp" ++#include "interpreter/interpreter.hpp" ++#include "jvm_linux.h" ++#include "memory/allocation.inline.hpp" ++#include "mutex_linux.inline.hpp" ++#include "os_share_linux.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm.h" ++#include "prims/jvm_misc.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/extendedPC.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/interfaceSupport.hpp" ++#include "runtime/java.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/osThread.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/timer.hpp" ++#include "utilities/events.hpp" ++#include "utilities/vmError.hpp" ++#include "utilities/debug.hpp" ++#include "compiler/disassembler.hpp" ++// put OS-includes here ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++ ++#define REG_SP 3 ++#define REG_FP 22 ++ ++address os::current_stack_pointer() { ++ register void *sp __asm__ ("$r3"); ++ return (address) sp; ++} ++ ++char* os::non_memory_address_word() { ++ // Must never look like an address returned by reserve_memory, ++ // even in its subfields (as defined by the CPU immediate fields, ++ // if the CPU splits constants across multiple instructions). ++ ++ return (char*) -1; ++} ++ ++void os::initialize_thread(Thread* thr) { ++// Nothing to do. ++} ++ ++address os::Linux::ucontext_get_pc(ucontext_t * uc) { ++ return (address)uc->uc_mcontext.__pc; ++} ++ ++intptr_t* os::Linux::ucontext_get_sp(ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP]; ++} ++ ++intptr_t* os::Linux::ucontext_get_fp(ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP]; ++} ++ ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread ++// is currently interrupted by SIGPROF. ++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal ++// frames. Currently we don't do that on Linux, so it's the same as ++// os::fetch_frame_from_context(). ++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, ++ ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ assert(thread != NULL, "just checking"); ++ assert(ret_sp != NULL, "just checking"); ++ assert(ret_fp != NULL, "just checking"); ++ ++ return os::fetch_frame_from_context(uc, ret_sp, ret_fp); ++} ++ ++ExtendedPC os::fetch_frame_from_context(void* ucVoid, ++ intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ ExtendedPC epc; ++ ucontext_t* uc = (ucontext_t*)ucVoid; ++ ++ if (uc != NULL) { ++ epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); ++ if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc); ++ if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc); ++ } else { ++ // construct empty ExtendedPC for return value checking ++ epc = ExtendedPC(NULL); ++ if (ret_sp) *ret_sp = (intptr_t *)NULL; ++ if (ret_fp) *ret_fp = (intptr_t *)NULL; ++ } ++ ++ return epc; ++} ++ ++frame os::fetch_frame_from_context(void* ucVoid) { ++ intptr_t* sp; ++ intptr_t* fp; ++ ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp); ++ return frame(sp, fp, epc.pc()); ++} ++ ++// By default, gcc always save frame pointer on stack. It may get ++// turned off by -fomit-frame-pointer, ++frame os::get_sender_for_C_frame(frame* fr) { ++ return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); ++} ++ ++//intptr_t* _get_previous_fp() { ++intptr_t* __attribute__((noinline)) os::get_previous_fp() { ++ return (intptr_t*)__builtin_frame_address(0); ++} ++ ++frame os::current_frame() { ++ intptr_t* fp = (intptr_t*)get_previous_fp(); ++ frame myframe((intptr_t*)os::current_stack_pointer(), ++ (intptr_t*)fp, ++ CAST_FROM_FN_PTR(address, os::current_frame)); ++ if (os::is_first_C_frame(&myframe)) { ++ // stack is not walkable ++ return frame(); ++ } else { ++ return os::get_sender_for_C_frame(&myframe); ++ } ++} ++ ++extern "C" JNIEXPORT int ++JVM_handle_linux_signal(int sig, ++ siginfo_t* info, ++ void* ucVoid, ++ int abort_if_unrecognized) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx", ++ info->si_signo, ++ info->si_code, ++ info->si_errno, ++ info->si_addr); ++#endif ++ ++ ucontext_t* uc = (ucontext_t*) ucVoid; ++ ++ Thread* t = ThreadLocalStorage::get_thread_slow(); ++ ++ SignalHandlerMark shm(t); ++ ++ // Note: it's not uncommon that JNI code uses signal/sigset to install ++ // then restore certain signal handler (e.g. to temporarily block SIGPIPE, ++ // or have a SIGILL handler when detecting CPU type). When that happens, ++ // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To ++ // avoid unnecessary crash when libjsig is not preloaded, try handle signals ++ // that do not require siginfo/ucontext first. ++ ++ if (sig == SIGPIPE/* || sig == SIGXFSZ*/) { ++ // allow chained handler to go first ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++ return true; ++ } else { ++ if (PrintMiscellaneous && (WizardMode || Verbose)) { ++ warning("Ignoring SIGPIPE - see bug 4229104"); ++ } ++ return true; ++ } ++ } ++ ++ JavaThread* thread = NULL; ++ VMThread* vmthread = NULL; ++ if (os::Linux::signal_handlers_are_installed) { ++ if (t != NULL ){ ++ if(t->is_Java_thread()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("this thread is a java thread"); ++#endif ++ thread = (JavaThread*)t; ++ } ++ else if(t->is_VM_thread()){ ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("this thread is a VM thread\n"); ++#endif ++ vmthread = (VMThread *)t; ++ } ++ } ++ } ++ ++ // decide if this trap can be handled by a stub ++ address stub = NULL; ++ address pc = NULL; ++ ++ pc = (address) os::Linux::ucontext_get_pc(uc); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("pc=%lx", pc); ++ os::print_context(tty, uc); ++#endif ++ //%note os_trap_1 ++ if (info != NULL && uc != NULL && thread != NULL) { ++ pc = (address) os::Linux::ucontext_get_pc(uc); ++ // Handle ALL stack overflow variations here ++ if (sig == SIGSEGV) { ++ address addr = (address) info->si_addr; ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("handle all stack overflow variations: "); ++ /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n", ++ addr, ++ thread->stack_base(), ++ thread->stack_base() - thread->stack_size()); ++ */ ++#endif ++ ++ // check if fault address is within thread stack ++ if (addr < thread->stack_base() && ++ addr >= thread->stack_base() - thread->stack_size()) { ++ // stack overflow ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("stack exception check \n"); ++#endif ++ if (thread->in_stack_yellow_zone(addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is in yellow zone\n"); ++#endif ++ thread->disable_stack_yellow_zone(); ++ if (thread->thread_state() == _thread_in_Java) { ++ // Throw a stack overflow exception. Guard pages will be reenabled ++ // while unwinding the stack. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("this thread is in java\n"); ++#endif ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); ++ } else { ++ // Thread was in the vm or native code. Return and try to finish. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("this thread is in vm or native codes and return\n"); ++#endif ++ return 1; ++ } ++ } else if (thread->in_stack_red_zone(addr)) { ++ // Fatal red zone violation. Disable the guard pages and fall through ++ // to handle_unexpected_exception way down below. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is in red zone\n"); ++#endif ++ thread->disable_stack_red_zone(); ++ tty->print_raw_cr("An irrecoverable stack overflow has occurred."); ++ ++ // This is a likely cause, but hard to verify. Let's just print ++ // it as a hint. ++ tty->print_raw_cr("Please check if any of your loaded .so files has " ++ "enabled executable stack (see man page execstack(8))"); ++ } else { ++ // Accessing stack address below sp may cause SEGV if current ++ // thread has MAP_GROWSDOWN stack. This should only happen when ++ // current thread was created by user code with MAP_GROWSDOWN flag ++ // and then attached to VM. See notes in os_linux.cpp. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is neither in yellow zone nor in the red one\n"); ++#endif ++ if (thread->osthread()->expanding_stack() == 0) { ++ thread->osthread()->set_expanding_stack(); ++ if (os::Linux::manually_expand_stack(thread, addr)) { ++ thread->osthread()->clear_expanding_stack(); ++ return 1; ++ } ++ thread->osthread()->clear_expanding_stack(); ++ } else { ++ fatal("recursive segv. expanding stack."); ++ } ++ } ++ } ++ } // sig == SIGSEGV ++ ++ if (thread->thread_state() == _thread_in_Java) { ++ // Java thread running in Java code => find exception handler if any ++ // a fault inside compiled code, the interpreter, or a stub ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("java thread running in java code\n"); ++#endif ++ ++ // Handle signal from NativeJump::patch_verified_entry(). ++ if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig); ++#endif ++ stub = SharedRuntime::get_handle_wrong_method_stub(); ++ } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig); ++#endif ++ stub = SharedRuntime::get_poll_stub(pc); ++ } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { ++ // BugId 4454115: A read from a MappedByteBuffer can fault ++ // here if the underlying file has been truncated. ++ // Do not crash the VM in such a case. ++ CodeBlob* cb = CodeCache::find_blob_unsafe(pc); ++ nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL; ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("cb = %lx, nm = %lx\n", cb, nm); ++#endif ++ if (nm != NULL && nm->has_unsafe_access()) { ++ stub = StubRoutines::handler_for_unsafe_access(); ++ } ++ } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) { ++ // HACK: si_code does not work on linux 2.2.12-20!!! ++ int op = pc[0] & 0x3f; ++ int op1 = pc[3] & 0x3f; ++ //FIXME, Must port to LA code!! ++ switch (op) { ++ case 0x1e: //ddiv ++ case 0x1f: //ddivu ++ case 0x1a: //div ++ case 0x1b: //divu ++ case 0x34: //trap ++ // In LA, div_by_zero exception can only be triggered by explicit 'trap'. ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, ++ pc, ++ SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO); ++ break; ++ default: ++ // TODO: handle more cases if we are using other x86 instructions ++ // that can generate SIGFPE signal on linux. ++ tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1); ++ //fatal("please update this code."); ++ } ++ } else if (sig == SIGSEGV && ++ !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("continuation for implicit exception\n"); ++#endif ++ // Determination of interpreter/vtable stub/compiled code null exception ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("continuation_for_implicit_exception stub: %lx", stub); ++#endif ++ } ++ } else if (thread->thread_state() == _thread_in_vm && ++ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ ++ thread->doing_unsafe_access()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("SIGBUS in vm thread \n"); ++#endif ++ stub = StubRoutines::handler_for_unsafe_access(); ++ } ++ ++ // jni_fast_GetField can trap at certain pc's if a GC kicks in ++ // and the heap gets shrunk before the field access. ++ if ((sig == SIGSEGV) || (sig == SIGBUS)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("jni fast get trap: "); ++#endif ++ address addr = JNI_FastGetField::find_slowcase_pc(pc); ++ if (addr != (address)-1) { ++ stub = addr; ++ } ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("addr = %d, stub = %lx", addr, stub); ++#endif ++ } ++ ++ // Check to see if we caught the safepoint code in the ++ // process of write protecting the memory serialization page. ++ // It write enables the page immediately after protecting it ++ // so we can just return to retry the write. ++ if ((sig == SIGSEGV) && ++ os::is_memory_serialize_page(thread, (address) info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("write protecting the memory serialiazation page\n"); ++#endif ++ // Block current thread until the memory serialize page permission restored. ++ os::block_on_serialize_page_trap(); ++ return true; ++ } ++ } ++ ++ if (stub != NULL) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("resolved stub=%lx\n",stub); ++#endif ++ // save all thread context in case we need to restore it ++ if (thread != NULL) thread->set_saved_exception_pc(pc); ++ ++ uc->uc_mcontext.__pc = (greg_t)stub; ++ return true; ++ } ++ ++ // signal-chaining ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("signal chaining\n"); ++#endif ++ return true; ++ } ++ ++ if (!abort_if_unrecognized) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("abort becauce of unrecognized\n"); ++#endif ++ // caller wants another chance, so give it to him ++ return false; ++ } ++ ++ if (pc == NULL && uc != NULL) { ++ pc = os::Linux::ucontext_get_pc(uc); ++ } ++ ++ // unmask current signal ++ sigset_t newset; ++ sigemptyset(&newset); ++ sigaddset(&newset, sig); ++ sigprocmask(SIG_UNBLOCK, &newset, NULL); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("VMError in signal handler\n"); ++#endif ++ VMError err(t, sig, pc, info, ucVoid); ++ err.report_and_die(); ++ ++ ShouldNotReachHere(); ++ return true; // Mute compiler ++} ++ ++void os::Linux::init_thread_fpu_state(void) { ++} ++ ++int os::Linux::get_fpu_control_word(void) { ++ return 0; // mute compiler ++} ++ ++void os::Linux::set_fpu_control_word(int fpu_control) { ++} ++ ++bool os::is_allocatable(size_t bytes) { ++ ++ if (bytes < 2 * G) { ++ return true; ++ } ++ ++ char* addr = reserve_memory(bytes, NULL); ++ ++ if (addr != NULL) { ++ release_memory(addr, bytes); ++ } ++ ++ return addr != NULL; ++} ++ ++//////////////////////////////////////////////////////////////////////////////// ++// thread stack ++ ++size_t os::Linux::min_stack_allowed = 96 * K; ++ ++// Test if pthread library can support variable thread stack size. LinuxThreads ++// in fixed stack mode allocates 2M fixed slot for each thread. LinuxThreads ++// in floating stack mode and NPTL support variable stack size. ++bool os::Linux::supports_variable_stack_size() { ++ if (os::Linux::is_NPTL()) { ++ // NPTL, yes ++ return true; ++ ++ } else { ++ // Note: We can't control default stack size when creating a thread. ++ // If we use non-default stack size (pthread_attr_setstacksize), both ++ // floating stack and non-floating stack LinuxThreads will return the ++ // same value. This makes it impossible to implement this function by ++ // detecting thread stack size directly. ++ // ++ // An alternative approach is to check %gs. Fixed-stack LinuxThreads ++ // do not use %gs, so its value is 0. Floating-stack LinuxThreads use ++ // %gs (either as LDT selector or GDT selector, depending on kernel) ++ // to access thread specific data. ++ // ++ // Note that %gs is a reserved glibc register since early 2001, so ++ // applications are not allowed to change its value (Ulrich Drepper from ++ // Redhat confirmed that all known offenders have been modified to use ++ // either %fs or TSD). In the worst case scenario, when VM is embedded in ++ // a native application that plays with %gs, we might see non-zero %gs ++ // even LinuxThreads is running in fixed stack mode. As the result, we'll ++ // return true and skip _thread_safety_check(), so we may not be able to ++ // detect stack-heap collisions. But otherwise it's harmless. ++ // ++ return false; ++ } ++} ++ ++// return default stack size for thr_type ++size_t os::Linux::default_stack_size(os::ThreadType thr_type) { ++ // default stack size (compiler thread needs larger stack) ++ size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K); ++ return s; ++} ++ ++size_t os::Linux::default_guard_size(os::ThreadType thr_type) { ++ // Creating guard page is very expensive. Java thread has HotSpot ++ // guard page, only enable glibc guard page for non-Java threads. ++ return (thr_type == java_thread ? 0 : page_size()); ++} ++ ++// Java thread: ++// ++// Low memory addresses ++// +------------------------+ ++// | |\ JavaThread created by VM does not have glibc ++// | glibc guard page | - guard, attached Java thread usually has ++// | |/ 1 page glibc guard. ++// P1 +------------------------+ Thread::stack_base() - Thread::stack_size() ++// | |\ ++// | HotSpot Guard Pages | - red and yellow pages ++// | |/ ++// +------------------------+ JavaThread::stack_yellow_zone_base() ++// | |\ ++// | Normal Stack | - ++// | |/ ++// P2 +------------------------+ Thread::stack_base() ++// ++// Non-Java thread: ++// ++// Low memory addresses ++// +------------------------+ ++// | |\ ++// | glibc guard page | - usually 1 page ++// | |/ ++// P1 +------------------------+ Thread::stack_base() - Thread::stack_size() ++// | |\ ++// | Normal Stack | - ++// | |/ ++// P2 +------------------------+ Thread::stack_base() ++// ++// ** P1 (aka bottom) and size ( P2 = P1 - size) are the address and stack size returned from ++// pthread_attr_getstack() ++ ++static void current_stack_region(address * bottom, size_t * size) { ++ if (os::is_primordial_thread()) { ++ // primordial thread needs special handling because pthread_getattr_np() ++ // may return bogus value. ++ *bottom = os::Linux::initial_thread_stack_bottom(); ++ *size = os::Linux::initial_thread_stack_size(); ++ } else { ++ pthread_attr_t attr; ++ ++ int rslt = pthread_getattr_np(pthread_self(), &attr); ++ ++ // JVM needs to know exact stack location, abort if it fails ++ if (rslt != 0) { ++ if (rslt == ENOMEM) { ++ vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np"); ++ } else { ++ fatal(err_msg("pthread_getattr_np failed with errno = %d", rslt)); ++ } ++ } ++ ++ if (pthread_attr_getstack(&attr, (void **)bottom, size) != 0) { ++ fatal("Can not locate current stack attributes!"); ++ } ++ ++ pthread_attr_destroy(&attr); ++ ++ } ++ assert(os::current_stack_pointer() >= *bottom && ++ os::current_stack_pointer() < *bottom + *size, "just checking"); ++} ++ ++address os::current_stack_base() { ++ address bottom; ++ size_t size; ++ current_stack_region(&bottom, &size); ++ return (bottom + size); ++} ++ ++size_t os::current_stack_size() { ++ // stack size includes normal stack and HotSpot guard pages ++ address bottom; ++ size_t size; ++ current_stack_region(&bottom, &size); ++ return size; ++} ++ ++///////////////////////////////////////////////////////////////////////////// ++// helper functions for fatal error handler ++void os::print_register_info(outputStream *st, void *context) { ++ ++ ucontext_t *uc = (ucontext_t*)context; ++ ++ st->print_cr("Register to memory mapping:"); ++ st->cr(); ++ // this is horrendously verbose but the layout of the registers in the ++ // // context does not match how we defined our abstract Register set, so ++ // // we can't just iterate through the gregs area ++ // ++ // // this is only for the "general purpose" registers ++ st->print("ZERO=" ); print_location(st, uc->uc_mcontext.__gregs[0]); ++ st->print("RA=" ); print_location(st, uc->uc_mcontext.__gregs[1]); ++ st->print("TP=" ); print_location(st, uc->uc_mcontext.__gregs[2]); ++ st->print("SP=" ); print_location(st, uc->uc_mcontext.__gregs[3]); ++ st->cr(); ++ st->print("A0=" ); print_location(st, uc->uc_mcontext.__gregs[4]); ++ st->print("A1=" ); print_location(st, uc->uc_mcontext.__gregs[5]); ++ st->print("A2=" ); print_location(st, uc->uc_mcontext.__gregs[6]); ++ st->print("A3=" ); print_location(st, uc->uc_mcontext.__gregs[7]); ++ st->cr(); ++ st->print("A4=" ); print_location(st, uc->uc_mcontext.__gregs[8]); ++ st->print("A5=" ); print_location(st, uc->uc_mcontext.__gregs[9]); ++ st->print("A6=" ); print_location(st, uc->uc_mcontext.__gregs[10]); ++ st->print("A7=" ); print_location(st, uc->uc_mcontext.__gregs[11]); ++ st->cr(); ++ st->print("T0=" ); print_location(st, uc->uc_mcontext.__gregs[12]); ++ st->print("T1=" ); print_location(st, uc->uc_mcontext.__gregs[13]); ++ st->print("T2=" ); print_location(st, uc->uc_mcontext.__gregs[14]); ++ st->print("T3=" ); print_location(st, uc->uc_mcontext.__gregs[15]); ++ st->cr(); ++ st->print("T4=" ); print_location(st, uc->uc_mcontext.__gregs[16]); ++ st->print("T5=" ); print_location(st, uc->uc_mcontext.__gregs[17]); ++ st->print("T6=" ); print_location(st, uc->uc_mcontext.__gregs[18]); ++ st->print("T7=" ); print_location(st, uc->uc_mcontext.__gregs[19]); ++ st->cr(); ++ st->print("T8=" ); print_location(st, uc->uc_mcontext.__gregs[20]); ++ st->print("RX=" ); print_location(st, uc->uc_mcontext.__gregs[21]); ++ st->print("FP=" ); print_location(st, uc->uc_mcontext.__gregs[22]); ++ st->print("S0=" ); print_location(st, uc->uc_mcontext.__gregs[23]); ++ st->cr(); ++ st->print("S1=" ); print_location(st, uc->uc_mcontext.__gregs[24]); ++ st->print("S2=" ); print_location(st, uc->uc_mcontext.__gregs[25]); ++ st->print("S3=" ); print_location(st, uc->uc_mcontext.__gregs[26]); ++ st->print("S4=" ); print_location(st, uc->uc_mcontext.__gregs[27]); ++ st->cr(); ++ st->print("S5=" ); print_location(st, uc->uc_mcontext.__gregs[28]); ++ st->print("S6=" ); print_location(st, uc->uc_mcontext.__gregs[29]); ++ st->print("S7=" ); print_location(st, uc->uc_mcontext.__gregs[30]); ++ st->print("S8=" ); print_location(st, uc->uc_mcontext.__gregs[31]); ++ st->cr(); ++ ++} ++void os::print_context(outputStream *st, void *context) { ++ ++ ucontext_t *uc = (ucontext_t*)context; ++ st->print_cr("Registers:"); ++ st->print( "ZERO=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[0]); ++ st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[1]); ++ st->print(", TP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[2]); ++ st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[3]); ++ st->cr(); ++ st->print( "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[4]); ++ st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[5]); ++ st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[6]); ++ st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[7]); ++ st->cr(); ++ st->print( "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[8]); ++ st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[9]); ++ st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[10]); ++ st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[11]); ++ st->cr(); ++ st->print( "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[12]); ++ st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[13]); ++ st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[14]); ++ st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[15]); ++ st->cr(); ++ st->print( "T4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[16]); ++ st->print(", T5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[17]); ++ st->print(", T6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[18]); ++ st->print(", T7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[19]); ++ st->cr(); ++ st->print( "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[20]); ++ st->print(", RX=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[21]); ++ st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[22]); ++ st->print(", S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[23]); ++ st->cr(); ++ st->print( "S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[24]); ++ st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[25]); ++ st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[26]); ++ st->print(", S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[27]); ++ st->cr(); ++ st->print( "S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[28]); ++ st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[29]); ++ st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[30]); ++ st->print(", S8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[31]); ++ st->cr(); ++ st->cr(); ++ ++ intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); ++ st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp)); ++ //print_hex_dump(st, (address)sp, (address)(sp + 8*sizeof(intptr_t)), sizeof(intptr_t)); ++ print_hex_dump(st, (address)sp-32, (address)(sp + 32), sizeof(intptr_t)); ++ st->cr(); ++ ++ // Note: it may be unsafe to inspect memory near pc. For example, pc may ++ // point to garbage if entry point in an nmethod is corrupted. Leave ++ // this at the end, and hope for the best. ++ address pc = os::Linux::ucontext_get_pc(uc); ++ st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc)); ++ print_hex_dump(st, pc - 64, pc + 64, sizeof(char)); ++ Disassembler::decode(pc - 80, pc + 80, st); ++} ++ ++void os::setup_fpu() { ++ // no use for LA ++} ++ ++#ifndef PRODUCT ++void os::verify_stack_alignment() { ++ assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); ++} ++#endif ++ ++bool os::is_ActiveCoresMP() { ++ return UseActiveCoresMP && _initial_active_processor_count == 1; ++} +diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.hpp +new file mode 100644 +index 0000000000..a7321ae025 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.hpp +@@ -0,0 +1,39 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_VM_OS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_VM_OS_LINUX_LOONGARCH_HPP ++ ++ static void setup_fpu(); ++ static bool is_allocatable(size_t bytes); ++ static intptr_t *get_previous_fp(); ++ ++ // Used to register dynamic code cache area with the OS ++ // Note: Currently only used in 64 bit Windows implementations ++ static bool register_code_area(char *low, char *high) { return true; } ++ ++ static bool is_ActiveCoresMP(); ++ ++#endif // OS_CPU_LINUX_LOONGARCH_VM_OS_LINUX_LOONGARCH_HPP +diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/prefetch_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/prefetch_linux_loongarch.inline.hpp +new file mode 100644 +index 0000000000..a1cedcd8cf +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_loongarch/vm/prefetch_linux_loongarch.inline.hpp +@@ -0,0 +1,56 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_VM_PREFETCH_LINUX_LOONGARCH_INLINE_HPP ++#define OS_CPU_LINUX_LOONGARCH_VM_PREFETCH_LINUX_LOONGARCH_INLINE_HPP ++ ++ ++inline void Prefetch::read (void *loc, intx interval) { ++// According to previous and present SPECjbb2015 score, ++// comment prefetch is better than if (interval >= 0) prefetch branch. ++// So choose comment prefetch as the base line. ++#if 0 ++ __asm__ __volatile__ ( ++ " preld 0, %[__loc] \n" ++ : ++ : [__loc] "m"( *((address)loc + interval) ) ++ : "memory" ++ ); ++#endif ++} ++ ++inline void Prefetch::write(void *loc, intx interval) { ++// Ditto ++#if 0 ++ __asm__ __volatile__ ( ++ " preld 8, %[__loc] \n" ++ : ++ : [__loc] "m"( *((address)loc + interval) ) ++ : "memory" ++ ); ++#endif ++} ++ ++#endif // OS_CPU_LINUX_LOONGARCH_VM_PREFETCH_LINUX_LOONGARCH_INLINE_HPP +diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.cpp +new file mode 100644 +index 0000000000..be28a562a1 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.cpp +@@ -0,0 +1,84 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/threadLocalStorage.hpp" ++ ++// Map stack pointer (%esp) to thread pointer for faster TLS access ++// ++// Here we use a flat table for better performance. Getting current thread ++// is down to one memory access (read _sp_map[%esp>>12]) in generated code ++// and two in runtime code (-fPIC code needs an extra load for _sp_map). ++// ++// This code assumes stack page is not shared by different threads. It works ++// in 32-bit VM when page size is 4K (or a multiple of 4K, if that matters). ++// ++// Notice that _sp_map is allocated in the bss segment, which is ZFOD ++// (zero-fill-on-demand). While it reserves 4M address space upfront, ++// actual memory pages are committed on demand. ++// ++// If an application creates and destroys a lot of threads, usually the ++// stack space freed by a thread will soon get reused by new thread ++// (this is especially true in NPTL or LinuxThreads in fixed-stack mode). ++// No memory page in _sp_map is wasted. ++// ++// However, it's still possible that we might end up populating & ++// committing a large fraction of the 4M table over time, but the actual ++// amount of live data in the table could be quite small. The max wastage ++// is less than 4M bytes. If it becomes an issue, we could use madvise() ++// with MADV_DONTNEED to reclaim unused (i.e. all-zero) pages in _sp_map. ++// MADV_DONTNEED on Linux keeps the virtual memory mapping, but zaps the ++// physical memory page (i.e. similar to MADV_FREE on Solaris). ++ ++#ifdef MINIMIZE_RAM_USAGE ++Thread* ThreadLocalStorage::_sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)]; ++#endif // MINIMIZE_RAM_USAGE ++ ++void ThreadLocalStorage::generate_code_for_get_thread() { ++ // nothing we can do here for user-level thread ++} ++ ++void ThreadLocalStorage::pd_init() { ++#ifdef MINIMIZE_RAM_USAGE ++ assert(align_size_down(os::vm_page_size(), PAGE_SIZE) == os::vm_page_size(), ++ "page size must be multiple of PAGE_SIZE"); ++#endif // MINIMIZE_RAM_USAGE ++} ++ ++void ThreadLocalStorage::pd_set_thread(Thread* thread) { ++ os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread); ++#ifdef MINIMIZE_RAM_USAGE ++ address stack_top = os::current_stack_base(); ++ size_t stack_size = os::current_stack_size(); ++ ++ for (address p = stack_top - stack_size; p < stack_top; p += PAGE_SIZE) { ++ int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1); ++ assert(thread == NULL || _sp_map[index] == NULL || thread == _sp_map[index], ++ "thread exited without detaching from VM??"); ++ _sp_map[index] = thread; ++ } ++#endif // MINIMIZE_RAM_USAGE ++} +diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.hpp +new file mode 100644 +index 0000000000..4fab788a75 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.hpp +@@ -0,0 +1,61 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_VM_THREADLS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_VM_THREADLS_LINUX_LOONGARCH_HPP ++ ++#ifdef MINIMIZE_RAM_USAGE ++ // Processor dependent parts of ThreadLocalStorage ++ //only the low 2G space for user program in Linux ++ ++ #define SP_BITLENGTH 34 ++ #define PAGE_SHIFT 14 ++ #define PAGE_SIZE (1UL << PAGE_SHIFT) ++ ++ static Thread* _sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)]; ++ static int _sp_map_low; ++ static int _sp_map_high; ++#endif // MINIMIZE_RAM_USAGE ++ ++public: ++#ifdef MINIMIZE_RAM_USAGE ++ static Thread** sp_map_addr() { return _sp_map; } ++#endif // MINIMIZE_RAM_USAGE ++ ++ static Thread* thread() { ++#ifdef MINIMIZE_RAM_USAGE ++ /* Thread::thread() can also be optimized in the same way as __get_thread() */ ++ //return (Thread*) os::thread_local_storage_at(thread_index()); ++ uintptr_t sp; ++ uintptr_t mask = (1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1; ++ ++ __asm__ __volatile__ ("addi.d %0, $r29, 0 " : "=r" (sp)); ++ ++ return _sp_map[(sp >> PAGE_SHIFT) & mask]; ++#else ++ return (Thread*) os::thread_local_storage_at(thread_index()); ++#endif // MINIMIZE_RAM_USAGE ++ } ++#endif // OS_CPU_LINUX_LOONGARCH_VM_THREADLS_LINUX_LOONGARCH_HPP +diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.cpp +new file mode 100644 +index 0000000000..44f666d61f +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.cpp +@@ -0,0 +1,99 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++ ++void JavaThread::pd_initialize() ++{ ++ _anchor.clear(); ++} ++ ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is ++// currently interrupted by SIGPROF ++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, ++ void* ucontext, bool isInJava) { ++ ++ assert(Thread::current() == this, "caller must be current thread"); ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { ++ assert(this->is_Java_thread(), "must be JavaThread"); ++ JavaThread* jt = (JavaThread *)this; ++ ++ // If we have a last_Java_frame, then we should use it even if ++ // isInJava == true. It should be more reliable than ucontext info. ++ if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) { ++ *fr_addr = jt->pd_last_frame(); ++ return true; ++ } ++ ++ // At this point, we don't have a last_Java_frame, so ++ // we try to glean some information out of the ucontext ++ // if we were running Java code when SIGPROF came in. ++ if (isInJava) { ++ ucontext_t* uc = (ucontext_t*) ucontext; ++ ++ intptr_t* ret_fp; ++ intptr_t* ret_sp; ++ ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, ++ &ret_sp, &ret_fp); ++ if (addr.pc() == NULL || ret_sp == NULL ) { ++ // ucontext wasn't useful ++ return false; ++ } ++ ++ frame ret_frame(ret_sp, ret_fp, addr.pc()); ++ if (!ret_frame.safe_for_sender(jt)) { ++#ifdef COMPILER2 ++ // C2 uses ebp as a general register see if NULL fp helps ++ frame ret_frame2(ret_sp, NULL, addr.pc()); ++ if (!ret_frame2.safe_for_sender(jt)) { ++ // nothing else to try if the frame isn't good ++ return false; ++ } ++ ret_frame = ret_frame2; ++#else ++ // nothing else to try if the frame isn't good ++ return false; ++#endif /* COMPILER2 */ ++ } ++ *fr_addr = ret_frame; ++ return true; ++ } ++ ++ // nothing else to try ++ return false; ++} ++ ++void JavaThread::cache_global_variables() { } ++ +diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.hpp +new file mode 100644 +index 0000000000..d6dd2521f4 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.hpp +@@ -0,0 +1,75 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP ++ ++ private: ++ void pd_initialize(); ++ ++ frame pd_last_frame() { ++ assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); ++ if (_anchor.last_Java_pc() != NULL) { ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); ++ } else { ++ // This will pick up pc from sp ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp()); ++ } ++ } ++ ++ ++ public: ++ // Mutators are highly dangerous.... ++ intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } ++ void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } ++ ++ void set_base_of_stack_pointer(intptr_t* base_sp) { ++ } ++ ++ static ByteSize last_Java_fp_offset() { ++ return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); ++ } ++ ++ intptr_t* base_of_stack_pointer() { ++ return NULL; ++ } ++ void record_base_of_stack_pointer() { ++ } ++ ++ bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, ++ bool isInJava); ++ ++ bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); ++private: ++ bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); ++public: ++ ++ // These routines are only used on cpu architectures that ++ // have separate register stacks (Itanium). ++ static bool register_stack_overflow() { return false; } ++ static void enable_register_stack_guard() {} ++ static void disable_register_stack_guard() {} ++ ++#endif // OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP +diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/vmStructs_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/vmStructs_linux_loongarch.hpp +new file mode 100644 +index 0000000000..0097cadcb7 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_loongarch/vm/vmStructs_linux_loongarch.hpp +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_VM_VMSTRUCTS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_VM_VMSTRUCTS_LINUX_LOONGARCH_HPP ++ ++// These are the OS and CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ \ ++ /******************************/ \ ++ /* Threads (NOTE: incomplete) */ \ ++ /******************************/ \ ++ nonstatic_field(OSThread, _thread_id, pid_t) \ ++ nonstatic_field(OSThread, _pthread_id, pthread_t) ++ ++ ++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ \ ++ /**********************/ \ ++ /* Posix Thread IDs */ \ ++ /**********************/ \ ++ \ ++ declare_integer_type(pid_t) \ ++ declare_unsigned_integer_type(pthread_t) ++ ++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#endif // OS_CPU_LINUX_LOONGARCH_VM_VMSTRUCTS_LINUX_LOONGARCH_HPP +diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/vm_version_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/vm_version_linux_loongarch.cpp +new file mode 100644 +index 0000000000..80a1538de9 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_loongarch/vm/vm_version_linux_loongarch.cpp +@@ -0,0 +1,29 @@ ++/* ++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/os.hpp" ++#include "vm_version_loongarch.hpp" ++ +diff --git a/hotspot/src/os_cpu/linux_mips/vm/assembler_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/assembler_linux_mips.cpp +new file mode 100644 +index 0000000000..4ba53d9341 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_mips/vm/assembler_linux_mips.cpp +@@ -0,0 +1,111 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "runtime/os.hpp" ++#include "runtime/threadLocalStorage.hpp" ++ ++#define A0 RA0 ++#define A1 RA1 ++#define A2 RA2 ++#define A3 RA3 ++#define A4 RA4 ++#define A5 RA5 ++#define A6 RA6 ++#define A7 RA7 ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++void MacroAssembler::get_thread(Register thread) { ++#ifdef MINIMIZE_RAM_USAGE ++// ++// In MIPS64, we don't use full 64-bit address space. ++// Only a small range is actually used. ++// ++// Example: ++// $ cat /proc/13352/maps ++// 120000000-120010000 r-xp 00000000 08:01 41077 /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java ++// 12001c000-120020000 rw-p 0000c000 08:01 41077 /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java ++// 120020000-1208dc000 rwxp 00000000 00:00 0 [heap] ++// 555d574000-555d598000 r-xp 00000000 08:01 2073768 /lib/ld-2.12.so ++// 555d598000-555d59c000 rw-p 00000000 00:00 0 ++// ...... ++// 558b1f8000-558b23c000 rwxp 00000000 00:00 0 ++// 558b23c000-558b248000 ---p 00000000 00:00 0 ++// 558b248000-558b28c000 rwxp 00000000 00:00 0 ++// ffff914000-ffff94c000 rwxp 00000000 00:00 0 [stack] ++// ffffffc000-10000000000 r-xp 00000000 00:00 0 [vdso] ++// ++// All stacks are positioned at 0x55________. ++// Therefore, we can utilize the same algorithm used in 32-bit. ++ // int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1); ++ // Thread* thread = _sp_map[index]; ++ Register tmp; ++ ++ if (thread == AT) ++ tmp = T9; ++ else ++ tmp = AT; ++ ++ move(thread, SP); ++ shr(thread, PAGE_SHIFT); ++ ++ push(tmp); ++ li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1)); ++ andr(thread, thread, tmp); ++ shl(thread, Address::times_ptr); // sizeof(Thread *) ++ li48(tmp, (long)ThreadLocalStorage::sp_map_addr()); ++ addu(tmp, tmp, thread); ++ ld_ptr(thread, tmp, 0); ++ pop(tmp); ++#else ++ if (thread != V0) { ++ push(V0); ++ } ++ pushad_except_v0(); ++ ++ move(A0, ThreadLocalStorage::thread_index()); ++ push(S5); ++ move(S5, SP); ++ move(AT, -StackAlignmentInBytes); ++ andr(SP, SP, AT); ++ call(CAST_FROM_FN_PTR(address, pthread_getspecific)); ++ delayed()->nop(); ++ move(SP, S5); ++ pop(S5); ++ ++ popad_except_v0(); ++ if (thread != V0) { ++ move(thread, V0); ++ pop(V0); ++ } ++#endif // MINIMIZE_RAM_USAGE ++} +diff --git a/hotspot/src/os_cpu/linux_mips/vm/atomic_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/atomic_linux_mips.inline.hpp +new file mode 100644 +index 0000000000..1c7ad605e9 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_mips/vm/atomic_linux_mips.inline.hpp +@@ -0,0 +1,258 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_INLINE_HPP ++#define OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_INLINE_HPP ++ ++#include "orderAccess_linux_mips.inline.hpp" ++#include "runtime/atomic.hpp" ++#include "runtime/os.hpp" ++#include "vm_version_mips.hpp" ++ ++// Implementation of class atomic ++ ++inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; } ++inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; } ++inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; } ++inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; } ++inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; } ++inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; } ++ ++inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; } ++inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; } ++inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; } ++inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; } ++inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; } ++inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void**)dest = store_value; } ++ ++inline jlong Atomic::load (volatile jlong* src) { return *src; } ++ ++///////////implementation of Atomic::add*///////////////// ++inline jint Atomic::add (jint add_value, volatile jint* dest) { ++ jint __ret, __tmp; ++ __asm__ __volatile__ ( ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1: sync \n\t" ++ " ll %[__ret], %[__dest] \n\t" ++ " addu %[__tmp], %[__val], %[__ret] \n\t" ++ " sc %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ " nop \n\t" ++ ++ " .set pop\n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (add_value) ++ : "memory" ++ ); ++ ++ return add_value + __ret; ++} ++ ++inline intptr_t Atomic::add_ptr (intptr_t add_value, volatile intptr_t* dest) { ++ jint __ret, __tmp; ++ __asm__ __volatile__ ( ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1: sync \n\t" ++ " lld %[__ret], %[__dest] \n\t" ++ " daddu %[__tmp], %[__val], %[__ret] \n\t" ++ " scd %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ " nop \n\t" ++ ++ " .set pop\n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (add_value) ++ : "memory" ++ ); ++ ++ return add_value + __ret; ++} ++ ++inline void* Atomic::add_ptr (intptr_t add_value, volatile void* dest) { ++ return (void*)add_ptr((intptr_t)add_value, (volatile intptr_t*)dest); ++} ++ ++///////////implementation of Atomic::inc*///////////////// ++inline void Atomic::inc (volatile jint* dest) { (void)add(1, dest); } ++inline void Atomic::inc_ptr (volatile intptr_t* dest) { (void)add_ptr(1, dest); } ++inline void Atomic::inc_ptr (volatile void* dest) { (void)inc_ptr((volatile intptr_t*)dest); } ++ ++///////////implementation of Atomic::dec*///////////////// ++inline void Atomic::dec (volatile jint* dest) { (void)add(-1, dest); } ++inline void Atomic::dec_ptr (volatile intptr_t* dest) { (void)add_ptr(-1, dest); } ++inline void Atomic::dec_ptr (volatile void* dest) { (void)dec_ptr((volatile intptr_t*)dest); } ++ ++ ++///////////implementation of Atomic::xchg*///////////////// ++inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) { ++ jint __ret, __tmp; ++ ++ __asm__ __volatile__ ( ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1: sync\n\t" ++ " ll %[__ret], %[__dest] \n\t" ++ " move %[__tmp], %[__val] \n\t" ++ " sc %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ " nop \n\t" ++ ++ " .set pop\n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __ret; ++} ++ ++inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) { ++ intptr_t __ret, __tmp; ++ __asm__ __volatile__ ( ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1: sync\n\t" ++ " lld %[__ret], %[__dest] \n\t" ++ " move %[__tmp], %[__val] \n\t" ++ " scd %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ " nop \n\t" ++ ++ " .set pop\n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "m" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value) ++ : "memory" ++ ); ++ return __ret; ++} ++ ++inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) { ++ return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest); ++} ++ ++///////////implementation of Atomic::cmpxchg*///////////////// ++inline jint Atomic::cmpxchg (jint exchange_value, volatile jint* dest, jint compare_value) { ++ jint __prev, __cmp; ++ ++ __asm__ __volatile__ ( ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1:sync \n\t" ++ " ll %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " sc %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ " nop \n\t" ++ "2: \n\t" ++ " sync \n\t" ++ ++ " .set pop\n\t" ++ ++ : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) ++ : [__dest] "m" (*(volatile jint*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __prev; ++} ++ ++inline jlong Atomic::cmpxchg (jlong exchange_value, volatile jlong* dest, jlong compare_value) { ++ jlong __prev, __cmp; ++ ++ __asm__ __volatile__ ( ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1:sync \n\t" ++ " lld %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " scd %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ " nop \n\t" ++ "2: \n\t" ++ " sync \n\t" ++ ++ " .set pop\n\t" ++ ++ : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) ++ : [__dest] "m" (*(volatile jlong*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : "memory" ++ ); ++ return __prev; ++} ++ ++inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value) { ++ intptr_t __prev, __cmp; ++ __asm__ __volatile__ ( ++ " .set push \n\t" ++ " .set mips64\n\t\t" ++ " .set noreorder\n\t" ++ ++ "1:sync \n\t" ++ " lld %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " scd %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ " nop \n\t" ++ "2: \n\t" ++ " sync \n\t" ++ " .set pop \n\t" ++ ++ : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) ++ : [__dest] "m" (*(volatile intptr_t*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __prev; ++} ++ ++inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value) { ++ return (void*)cmpxchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest, (intptr_t)compare_value); ++} ++ ++#endif // OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_INLINE_HPP +diff --git a/hotspot/src/os_cpu/linux_mips/vm/bytes_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/bytes_linux_mips.inline.hpp +new file mode 100644 +index 0000000000..5b5cd10aa5 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_mips/vm/bytes_linux_mips.inline.hpp +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP ++#define OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP ++ ++#include ++ ++// Efficient swapping of data bytes from Java byte ++// ordering to native byte ordering and vice versa. ++inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); } ++inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); } ++inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); } ++ ++#endif // OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP +diff --git a/hotspot/src/os_cpu/linux_mips/vm/copy_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/copy_linux_mips.inline.hpp +new file mode 100644 +index 0000000000..73ac34501b +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_mips/vm/copy_linux_mips.inline.hpp +@@ -0,0 +1,125 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP ++#define OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP ++ ++static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) { ++ (void)memmove(to, from, count * HeapWordSize); ++} ++ ++static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ (void)memcpy(to, from, count * HeapWordSize); ++ break; ++ } ++} ++ ++static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ while (count-- > 0) { ++ *to++ = *from++; ++ } ++ break; ++ } ++} ++ ++static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_words(from, to, count); ++} ++ ++static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) { ++ pd_disjoint_words(from, to, count); ++} ++ ++static void pd_conjoint_bytes(void* from, void* to, size_t count) { ++ (void)memmove(to, from, count); ++} ++ ++static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) { ++ pd_conjoint_bytes(from, to, count); ++} ++ ++static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) { ++ //assert(!UseCompressedOops, "foo!"); ++ assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size"); ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_bytes_atomic(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count); ++} ++ ++static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jints_atomic((jint*)from, (jint*)to, count); ++} ++ ++static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count); ++} ++ ++static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) { ++ //assert(!UseCompressedOops, "foo!"); ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); ++ pd_conjoint_oops_atomic((oop*)from, (oop*)to, count); ++} ++ ++#endif // OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP +diff --git a/hotspot/src/os_cpu/linux_mips/vm/globals_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/globals_linux_mips.hpp +new file mode 100644 +index 0000000000..f1599ac5f1 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_mips/vm/globals_linux_mips.hpp +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++define_pd_global(bool, DontYieldALot, false); ++#ifdef MIPS64 ++define_pd_global(intx, ThreadStackSize, 1024); // 0 => use system default ++define_pd_global(intx, VMThreadStackSize, 1024); ++#else ++// ThreadStackSize 320 allows a couple of test cases to run while ++// keeping the number of threads that can be created high. System ++// default ThreadStackSize appears to be 512 which is too big. ++define_pd_global(intx, ThreadStackSize, 320); ++define_pd_global(intx, VMThreadStackSize, 512); ++#endif // MIPS64 ++ ++define_pd_global(intx, CompilerThreadStackSize, 0); ++ ++define_pd_global(uintx,JVMInvokeMethodSlack, 8192); ++ ++// Used on 64 bit platforms for UseCompressedOops base address ++define_pd_global(uintx,HeapBaseMinAddress, 2*G); ++ ++#endif // OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP +diff --git a/hotspot/src/os_cpu/linux_mips/vm/linux_mips.ad b/hotspot/src/os_cpu/linux_mips/vm/linux_mips.ad +new file mode 100644 +index 0000000000..5e38996ffa +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_mips/vm/linux_mips.ad +@@ -0,0 +1,153 @@ ++// ++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++// mips32/godson2 Linux Architecture Description File ++ ++//----------OS-DEPENDENT ENCODING BLOCK---------------------------------------- ++// This block specifies the encoding classes used by the compiler to ++// output byte streams. Encoding classes generate functions which are ++// called by Machine Instruction Nodes in order to generate the bit ++// encoding of the instruction. Operands specify their base encoding ++// interface with the interface keyword. There are currently ++// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, & ++// COND_INTER. REG_INTER causes an operand to generate a function ++// which returns its register number when queried. CONST_INTER causes ++// an operand to generate a function which returns the value of the ++// constant when queried. MEMORY_INTER causes an operand to generate ++// four functions which return the Base Register, the Index Register, ++// the Scale Value, and the Offset Value of the operand when queried. ++// COND_INTER causes an operand to generate six functions which return ++// the encoding code (ie - encoding bits for the instruction) ++// associated with each basic boolean condition for a conditional ++// instruction. Instructions specify two basic values for encoding. ++// They use the ins_encode keyword to specify their encoding class ++// (which must be one of the class names specified in the encoding ++// block), and they use the opcode keyword to specify, in order, their ++// primary, secondary, and tertiary opcode. Only the opcode sections ++// which a particular instruction needs for encoding need to be ++// specified. ++encode %{ ++ // Build emit functions for each basic byte or larger field in the intel ++ // encoding scheme (opcode, rm, sib, immediate), and call them from C++ ++ // code in the enc_class source block. Emit functions will live in the ++ // main source block for now. In future, we can generalize this by ++ // adding a syntax that specifies the sizes of fields in an order, ++ // so that the adlc can build the emit functions automagically ++ ++ enc_class linux_breakpoint ++ %{ ++ MacroAssembler* masm = new MacroAssembler(&cbuf); ++ masm->call(CAST_FROM_FN_PTR(address, os::breakpoint), relocInfo::runtime_call_type); ++ %} ++ ++ enc_class call_epilog ++ %{ ++ if (VerifyStackAtCalls) { ++ // Check that stack depth is unchanged: find majik cookie on stack ++ int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP,-2)); ++ if(framesize >= 128) { ++ emit_opcode(cbuf, 0x81); // cmp [esp+0],0xbadb1ood ++ emit_d8(cbuf,0xBC); ++ emit_d8(cbuf,0x24); ++ emit_d32(cbuf,framesize); // Find majik cookie from ESP ++ emit_d32(cbuf, 0xbadb100d); ++ } ++ else { ++ emit_opcode(cbuf, 0x81); // cmp [esp+0],0xbadb1ood ++ emit_d8(cbuf,0x7C); ++ emit_d8(cbuf,0x24); ++ emit_d8(cbuf,framesize); // Find majik cookie from ESP ++ emit_d32(cbuf, 0xbadb100d); ++ } ++ // jmp EQ around INT3 ++ // QQQ TODO ++ const int jump_around = 5; // size of call to breakpoint, 1 for CC ++ emit_opcode(cbuf, 0x74); ++ emit_d8(cbuf, jump_around); ++ // QQQ temporary ++ emit_break(cbuf); ++ // Die if stack mismatch ++ // emit_opcode(cbuf,0xCC); ++ } ++ %} ++ ++%} ++ ++// INSTRUCTIONS -- Platform dependent ++ ++//----------OS and Locking Instructions---------------------------------------- ++ ++// This name is KNOWN by the ADLC and cannot be changed. ++// The ADLC forces a 'TypeRawPtr::BOTTOM' output type ++// for this guy. ++instruct tlsLoadP(eAXRegP dst, eFlagsReg cr) %{ ++%{ ++ match(Set dst (ThreadLocal)); ++ effect(DEF dst, KILL cr); ++ ++ format %{ "MOV EAX, Thread::current()" %} ++ ins_encode( linux_tlsencode(dst) ); ++ ins_pipe( ialu_reg_fat ); ++%} ++ ++// Die now ++instruct ShouldNotReachHere() ++%{ ++ match(Halt); ++ ++ // Use the following format syntax ++ format %{ "int3\t# ShouldNotReachHere" %} ++ // QQQ TODO for now call breakpoint ++ // opcode(0xCC); ++ // ins_encode(Opc); ++ ins_encode(linux_breakpoint); ++ ins_pipe(pipe_slow); ++%} ++ ++ ++// Platform dependent source ++ ++source ++%{ ++// emit an interrupt that is caught by the debugger ++void emit_break(CodeBuffer& cbuf) { ++ // Debugger doesn't really catch this but best we can do so far QQQ ++#define __ masm. ++ __ lui(T9, Assembler::split_high((int)os::breakpoint)); ++ __ addiu(T9, T9, Assembler::split_low((int)os::breakpoint)); ++ __ jalr(T9); ++ __ delayed()->nop(); ++} ++ ++void MachBreakpointNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { ++ emit_break(cbuf); ++} ++ ++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { ++ //return 5; ++ return 16; ++} ++ ++%} +diff --git a/hotspot/src/os_cpu/linux_mips/vm/linux_mips.s b/hotspot/src/os_cpu/linux_mips/vm/linux_mips.s +new file mode 100644 +index 0000000000..f87fbf265d +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_mips/vm/linux_mips.s +@@ -0,0 +1,25 @@ ++# ++# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2015, 2017, Loongson Technology. All rights reserved. ++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++# ++# This code is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License version 2 only, as ++# published by the Free Software Foundation. ++# ++# This code is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++# version 2 for more details (a copy is included in the LICENSE file that ++# accompanied this code). ++# ++# You should have received a copy of the GNU General Public License version ++# 2 along with this work; if not, write to the Free Software Foundation, ++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++# ++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++# or visit www.oracle.com if you need additional information or have any ++# questions. ++# ++ ++ +diff --git a/hotspot/src/os_cpu/linux_mips/vm/linux_mips_64.ad b/hotspot/src/os_cpu/linux_mips/vm/linux_mips_64.ad +new file mode 100644 +index 0000000000..ca4d094738 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_mips/vm/linux_mips_64.ad +@@ -0,0 +1,50 @@ ++// ++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++// AMD64 Linux Architecture Description File ++ ++//----------OS-DEPENDENT ENCODING BLOCK---------------------------------------- ++// This block specifies the encoding classes used by the compiler to ++// output byte streams. Encoding classes generate functions which are ++// called by Machine Instruction Nodes in order to generate the bit ++// encoding of the instruction. Operands specify their base encoding ++// interface with the interface keyword. There are currently ++// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, & ++// COND_INTER. REG_INTER causes an operand to generate a function ++// which returns its register number when queried. CONST_INTER causes ++// an operand to generate a function which returns the value of the ++// constant when queried. MEMORY_INTER causes an operand to generate ++// four functions which return the Base Register, the Index Register, ++// the Scale Value, and the Offset Value of the operand when queried. ++// COND_INTER causes an operand to generate six functions which return ++// the encoding code (ie - encoding bits for the instruction) ++// associated with each basic boolean condition for a conditional ++// instruction. Instructions specify two basic values for encoding. ++// They use the ins_encode keyword to specify their encoding class ++// (which must be one of the class names specified in the encoding ++// block), and they use the opcode keyword to specify, in order, their ++// primary, secondary, and tertiary opcode. Only the opcode sections ++// which a particular instruction needs for encoding need to be ++// specified. +diff --git a/hotspot/src/os_cpu/linux_mips/vm/orderAccess_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/orderAccess_linux_mips.inline.hpp +new file mode 100644 +index 0000000000..c9bc169aa5 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_mips/vm/orderAccess_linux_mips.inline.hpp +@@ -0,0 +1,115 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_INLINE_HPP ++#define OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_INLINE_HPP ++ ++#include "runtime/atomic.hpp" ++#include "runtime/orderAccess.hpp" ++#include "runtime/os.hpp" ++#include "vm_version_mips.hpp" ++ ++#define inlasm_sync() if (os::is_ActiveCoresMP()) \ ++ __asm__ __volatile__ ("nop" : : : "memory"); \ ++ else \ ++ __asm__ __volatile__ ("sync" : : : "memory"); ++ ++inline void OrderAccess::loadload() { inlasm_sync(); } ++inline void OrderAccess::storestore() { inlasm_sync(); } ++inline void OrderAccess::loadstore() { inlasm_sync(); } ++inline void OrderAccess::storeload() { inlasm_sync(); } ++ ++inline void OrderAccess::acquire() { inlasm_sync(); } ++inline void OrderAccess::release() { inlasm_sync(); } ++inline void OrderAccess::fence() { inlasm_sync(); } ++ ++//implementation of load_acquire ++inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { jbyte data = *p; acquire(); return data; } ++inline jshort OrderAccess::load_acquire(volatile jshort* p) { jshort data = *p; acquire(); return data; } ++inline jint OrderAccess::load_acquire(volatile jint* p) { jint data = *p; acquire(); return data; } ++inline jlong OrderAccess::load_acquire(volatile jlong* p) { jlong tmp = *p; acquire(); return tmp; } ++inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { jubyte data = *p; acquire(); return data; } ++inline jushort OrderAccess::load_acquire(volatile jushort* p) { jushort data = *p; acquire(); return data; } ++inline juint OrderAccess::load_acquire(volatile juint* p) { juint data = *p; acquire(); return data; } ++inline julong OrderAccess::load_acquire(volatile julong* p) { julong tmp = *p; acquire(); return tmp; } ++inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { jfloat data = *p; acquire(); return data; } ++inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { jdouble tmp = *p; acquire(); return tmp; } ++ ++//implementation of load_ptr_acquire ++inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) { intptr_t data = *p; acquire(); return data; } ++inline void* OrderAccess::load_ptr_acquire(volatile void* p) { void *data = *(void* volatile *)p; acquire(); return data; } ++inline void* OrderAccess::load_ptr_acquire(const volatile void* p) { void *data = *(void* volatile *)p; acquire(); return data; } ++ ++//implementation of release_store ++inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { release(); *p = v; } ++inline void OrderAccess::release_store(volatile jshort* p, jshort v) { release(); *p = v; } ++inline void OrderAccess::release_store(volatile jint* p, jint v) { release(); *p = v; } ++inline void OrderAccess::release_store(volatile jlong* p, jlong v) { release(); *p = v; } ++inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { release(); *p = v; } ++inline void OrderAccess::release_store(volatile jushort* p, jushort v) { release(); *p = v; } ++inline void OrderAccess::release_store(volatile juint* p, juint v) { release(); *p = v; } ++inline void OrderAccess::release_store(volatile julong* p, julong v) { release(); *p = v; } ++inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { release(); *p = v; } ++inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) { release(); *p = v; } ++ ++//implementation of release_store_ptr ++inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { release(); *p = v; } ++inline void OrderAccess::release_store_ptr(volatile void* p, void* v) { release(); *(void* volatile *)p = v; } ++ ++//implementation of store_fence ++inline void OrderAccess::store_fence(jbyte* p, jbyte v) { *p = v; fence(); } ++inline void OrderAccess::store_fence(jshort* p, jshort v) { *p = v; fence(); } ++inline void OrderAccess::store_fence(jint* p, jint v) { *p = v; fence(); } ++inline void OrderAccess::store_fence(jlong* p, jlong v) { *p = v; fence(); } ++inline void OrderAccess::store_fence(jubyte* p, jubyte v) { *p = v; fence(); } ++inline void OrderAccess::store_fence(jushort* p, jushort v) { *p = v; fence(); } ++inline void OrderAccess::store_fence(juint* p, juint v) { *p = v; fence(); } ++inline void OrderAccess::store_fence(julong* p, julong v) { *p = v; fence(); } ++inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; fence(); } ++inline void OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; fence(); } ++ ++//implementation of store_ptr_fence ++inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; fence(); } ++inline void OrderAccess::store_ptr_fence(void** p, void* v) { *p = v; fence(); } ++ ++//implementation of release_store_fence ++inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { release_store(p, v); fence(); } ++inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { release_store(p, v); fence(); } ++inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { release_store(p, v); fence(); } ++inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { release_store(p, v); fence(); } ++inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { release_store(p, v); fence(); } ++inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store(p, v); fence(); } ++inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { release_store(p, v); fence(); } ++inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { release_store(p, v); fence(); } ++inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { release_store(p, v); fence(); } ++inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store(p, v); fence(); } ++ ++//implementaion of release_store_ptr_fence ++inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { release_store_ptr(p, v); fence(); } ++inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { release_store_ptr(p, v); fence(); } ++ ++#undef inlasm_sync ++ ++#endif // OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_INLINE_HPP +diff --git a/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.cpp +new file mode 100644 +index 0000000000..43487dab98 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.cpp +@@ -0,0 +1,1015 @@ ++/* ++ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// no precompiled headers ++#include "asm/macroAssembler.hpp" ++#include "classfile/classLoader.hpp" ++#include "classfile/systemDictionary.hpp" ++#include "classfile/vmSymbols.hpp" ++#include "code/icBuffer.hpp" ++#include "code/vtableStubs.hpp" ++#include "interpreter/interpreter.hpp" ++#include "jvm_linux.h" ++#include "memory/allocation.inline.hpp" ++#include "mutex_linux.inline.hpp" ++#include "os_share_linux.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm.h" ++#include "prims/jvm_misc.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/extendedPC.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/interfaceSupport.hpp" ++#include "runtime/java.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/osThread.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/timer.hpp" ++#include "utilities/events.hpp" ++#include "utilities/vmError.hpp" ++#include "utilities/debug.hpp" ++#include "compiler/disassembler.hpp" ++// put OS-includes here ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++ ++#define REG_SP 29 ++#define REG_FP 30 ++ ++address os::current_stack_pointer() { ++ register void *sp __asm__ ("$29"); ++ return (address) sp; ++} ++ ++char* os::non_memory_address_word() { ++ // Must never look like an address returned by reserve_memory, ++ // even in its subfields (as defined by the CPU immediate fields, ++ // if the CPU splits constants across multiple instructions). ++ ++ return (char*) -1; ++} ++ ++void os::initialize_thread(Thread* thr) { ++// Nothing to do. ++} ++ ++address os::Linux::ucontext_get_pc(ucontext_t * uc) { ++ //return (address)uc->uc_mcontext.gregs[REG_PC]; ++ return (address)uc->uc_mcontext.pc; ++} ++ ++intptr_t* os::Linux::ucontext_get_sp(ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.gregs[REG_SP]; ++} ++ ++intptr_t* os::Linux::ucontext_get_fp(ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.gregs[REG_FP]; ++} ++ ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread ++// is currently interrupted by SIGPROF. ++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal ++// frames. Currently we don't do that on Linux, so it's the same as ++// os::fetch_frame_from_context(). ++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, ++ ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ assert(thread != NULL, "just checking"); ++ assert(ret_sp != NULL, "just checking"); ++ assert(ret_fp != NULL, "just checking"); ++ ++ return os::fetch_frame_from_context(uc, ret_sp, ret_fp); ++} ++ ++ExtendedPC os::fetch_frame_from_context(void* ucVoid, ++ intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ ExtendedPC epc; ++ ucontext_t* uc = (ucontext_t*)ucVoid; ++ ++ if (uc != NULL) { ++ epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); ++ if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc); ++ if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc); ++ } else { ++ // construct empty ExtendedPC for return value checking ++ epc = ExtendedPC(NULL); ++ if (ret_sp) *ret_sp = (intptr_t *)NULL; ++ if (ret_fp) *ret_fp = (intptr_t *)NULL; ++ } ++ ++ return epc; ++} ++ ++frame os::fetch_frame_from_context(void* ucVoid) { ++ intptr_t* sp; ++ intptr_t* fp; ++ ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp); ++ return frame(sp, fp, epc.pc()); ++} ++ ++// By default, gcc always save frame pointer (%ebp/%rbp) on stack. It may get ++// turned off by -fomit-frame-pointer, ++frame os::get_sender_for_C_frame(frame* fr) { ++ return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); ++} ++ ++//intptr_t* _get_previous_fp() { ++intptr_t* __attribute__((noinline)) os::get_previous_fp() { ++ int *pc; ++ intptr_t sp; ++ int *pc_limit = (int*)(void*)&os::get_previous_fp; ++ int insn; ++ ++ { ++ l_pc:; ++ pc = (int*)&&l_pc; ++ __asm__ __volatile__ ("move %0, $sp" : "=r" (sp)); ++ } ++ ++ do { ++ insn = *pc; ++ switch(bitfield(insn, 16, 16)) { ++ case 0x27bd: /* addiu $sp,$sp,-i */ ++ case 0x67bd: /* daddiu $sp,$sp,-i */ ++ assert ((short)bitfield(insn, 0, 16)<0, "bad frame"); ++ sp -= (short)bitfield(insn, 0, 16); ++ return (intptr_t*)sp; ++ } ++ --pc; ++ } while (pc>=pc_limit); // The initial value of pc may be equal to pc_limit, because of GCC optimization. ++ ++ ShouldNotReachHere(); ++ return NULL; // mute compiler ++} ++ ++ ++frame os::current_frame() { ++ intptr_t* fp = (intptr_t*)get_previous_fp(); ++ frame myframe((intptr_t*)os::current_stack_pointer(), ++ (intptr_t*)fp, ++ CAST_FROM_FN_PTR(address, os::current_frame)); ++ if (os::is_first_C_frame(&myframe)) { ++ // stack is not walkable ++ return frame(); ++ } else { ++ return os::get_sender_for_C_frame(&myframe); ++ } ++} ++ ++//x86 add 2 new assemble function here! ++extern "C" JNIEXPORT int ++JVM_handle_linux_signal(int sig, ++ siginfo_t* info, ++ void* ucVoid, ++ int abort_if_unrecognized) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx", ++ info->si_signo, ++ info->si_code, ++ info->si_errno, ++ info->si_addr); ++#endif ++ ++ ucontext_t* uc = (ucontext_t*) ucVoid; ++ ++ Thread* t = ThreadLocalStorage::get_thread_slow(); ++ ++ SignalHandlerMark shm(t); ++ ++ // Note: it's not uncommon that JNI code uses signal/sigset to install ++ // then restore certain signal handler (e.g. to temporarily block SIGPIPE, ++ // or have a SIGILL handler when detecting CPU type). When that happens, ++ // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To ++ // avoid unnecessary crash when libjsig is not preloaded, try handle signals ++ // that do not require siginfo/ucontext first. ++ ++ if (sig == SIGPIPE/* || sig == SIGXFSZ*/) { ++ // allow chained handler to go first ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++ return true; ++ } else { ++ if (PrintMiscellaneous && (WizardMode || Verbose)) { ++ warning("Ignoring SIGPIPE - see bug 4229104"); ++ } ++ return true; ++ } ++ } ++ ++ JavaThread* thread = NULL; ++ VMThread* vmthread = NULL; ++ if (os::Linux::signal_handlers_are_installed) { ++ if (t != NULL ){ ++ if(t->is_Java_thread()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("this thread is a java thread"); ++#endif ++ thread = (JavaThread*)t; ++ } ++ else if(t->is_VM_thread()){ ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("this thread is a VM thread\n"); ++#endif ++ vmthread = (VMThread *)t; ++ } ++ } ++ } ++ ++ // decide if this trap can be handled by a stub ++ address stub = NULL; ++ address pc = NULL; ++ ++ pc = (address) os::Linux::ucontext_get_pc(uc); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("pc=%lx", pc); ++ os::print_context(tty, uc); ++#endif ++ //%note os_trap_1 ++ if (info != NULL && uc != NULL && thread != NULL) { ++ pc = (address) os::Linux::ucontext_get_pc(uc); ++ // Handle ALL stack overflow variations here ++ if (sig == SIGSEGV) { ++ address addr = (address) info->si_addr; ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("handle all stack overflow variations: "); ++ /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n", ++ addr, ++ thread->stack_base(), ++ thread->stack_base() - thread->stack_size()); ++ */ ++#endif ++ ++ // check if fault address is within thread stack ++ if (addr < thread->stack_base() && ++ addr >= thread->stack_base() - thread->stack_size()) { ++ // stack overflow ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("stack exception check \n"); ++#endif ++ if (thread->in_stack_yellow_zone(addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is in yellow zone\n"); ++#endif ++ thread->disable_stack_yellow_zone(); ++ if (thread->thread_state() == _thread_in_Java) { ++ // Throw a stack overflow exception. Guard pages will be reenabled ++ // while unwinding the stack. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("this thread is in java\n"); ++#endif ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); ++ } else { ++ // Thread was in the vm or native code. Return and try to finish. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("this thread is in vm or native codes and return\n"); ++#endif ++ return 1; ++ } ++ } else if (thread->in_stack_red_zone(addr)) { ++ // Fatal red zone violation. Disable the guard pages and fall through ++ // to handle_unexpected_exception way down below. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is in red zone\n"); ++#endif ++ thread->disable_stack_red_zone(); ++ tty->print_raw_cr("An irrecoverable stack overflow has occurred."); ++ ++ // This is a likely cause, but hard to verify. Let's just print ++ // it as a hint. ++ tty->print_raw_cr("Please check if any of your loaded .so files has " ++ "enabled executable stack (see man page execstack(8))"); ++ } else { ++ // Accessing stack address below sp may cause SEGV if current ++ // thread has MAP_GROWSDOWN stack. This should only happen when ++ // current thread was created by user code with MAP_GROWSDOWN flag ++ // and then attached to VM. See notes in os_linux.cpp. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is neither in yellow zone nor in the red one\n"); ++#endif ++ if (thread->osthread()->expanding_stack() == 0) { ++ thread->osthread()->set_expanding_stack(); ++ if (os::Linux::manually_expand_stack(thread, addr)) { ++ thread->osthread()->clear_expanding_stack(); ++ return 1; ++ } ++ thread->osthread()->clear_expanding_stack(); ++ } else { ++ fatal("recursive segv. expanding stack."); ++ } ++ } ++ } //addr < ++ } //sig == SIGSEGV ++ ++ if (thread->thread_state() == _thread_in_Java) { ++ // Java thread running in Java code => find exception handler if any ++ // a fault inside compiled code, the interpreter, or a stub ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("java thread running in java code\n"); ++#endif ++ ++ // Handle signal from NativeJump::patch_verified_entry(). ++ if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig); ++#endif ++ stub = SharedRuntime::get_handle_wrong_method_stub(); ++ } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig); ++#endif ++ stub = SharedRuntime::get_poll_stub(pc); ++ } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { ++ // BugId 4454115: A read from a MappedByteBuffer can fault ++ // here if the underlying file has been truncated. ++ // Do not crash the VM in such a case. ++ CodeBlob* cb = CodeCache::find_blob_unsafe(pc); ++ nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL; ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("cb = %lx, nm = %lx\n", cb, nm); ++#endif ++ if (nm != NULL && nm->has_unsafe_access()) { ++ stub = StubRoutines::handler_for_unsafe_access(); ++ } ++ } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) { ++ // HACK: si_code does not work on linux 2.2.12-20!!! ++ int op = pc[0] & 0x3f; ++ int op1 = pc[3] & 0x3f; ++ //FIXME, Must port to mips code!! ++ switch (op) { ++ case 0x1e: //ddiv ++ case 0x1f: //ddivu ++ case 0x1a: //div ++ case 0x1b: //divu ++ case 0x34: //trap ++ /* In MIPS, div_by_zero exception can only be triggered by explicit 'trap'. ++ * Ref: [c1_LIRAssembler_mips.cpp] arithmetic_idiv() ++ */ ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, ++ pc, ++ SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO); ++ break; ++ default: ++ // TODO: handle more cases if we are using other x86 instructions ++ // that can generate SIGFPE signal on linux. ++ tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1); ++ //fatal("please update this code."); ++ } ++ } else if (sig == SIGSEGV && ++ !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("continuation for implicit exception\n"); ++#endif ++ // Determination of interpreter/vtable stub/compiled code null exception ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("continuation_for_implicit_exception stub: %lx", stub); ++#endif ++ } else if (/*thread->thread_state() == _thread_in_Java && */sig == SIGILL) { ++ //Since kernel does not have emulation of PS instructions yet, the emulation must be handled here. ++ //The method is to trigger kernel emulation of float emulation. ++ int inst = *(int*)pc; ++ int ops = (inst >> 26) & 0x3f; ++ int ops_fmt = (inst >> 21) & 0x1f; ++ int op = inst & 0x3f; ++ if (ops == Assembler::cop1_op && ops_fmt == Assembler::ps_fmt) { ++ int ft, fs, fd; ++ ft = (inst >> 16) & 0x1f; ++ fs = (inst >> 11) & 0x1f; ++ fd = (inst >> 6) & 0x1f; ++ float ft_upper, ft_lower, fs_upper, fs_lower, fd_upper, fd_lower; ++ double ft_value, fs_value, fd_value; ++ ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft]; ++ fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs]; ++ __asm__ __volatile__ ( ++ "cvt.s.pl %0, %4\n\t" ++ "cvt.s.pu %1, %4\n\t" ++ "cvt.s.pl %2, %5\n\t" ++ "cvt.s.pu %3, %5\n\t" ++ : "=f" (fs_lower), "=f" (fs_upper), "=f" (ft_lower), "=f" (ft_upper) ++ : "f" (fs_value), "f" (ft_value) ++ ); ++ ++ switch (op) { ++ case Assembler::fadd_op: ++ __asm__ __volatile__ ( ++ "add.s %1, %3, %5\n\t" ++ "add.s %2, %4, %6\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) ++ : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ case Assembler::fsub_op: ++ //fd = fs - ft ++ __asm__ __volatile__ ( ++ "sub.s %1, %3, %5\n\t" ++ "sub.s %2, %4, %6\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) ++ : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ case Assembler::fmul_op: ++ __asm__ __volatile__ ( ++ "mul.s %1, %3, %5\n\t" ++ "mul.s %2, %4, %6\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) ++ : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ default: ++ tty->print_cr("unknown cop1 opcode 0x%x with SIGILL.", op); ++ } ++ } else if (ops == Assembler::cop1x_op /*&& op == Assembler::nmadd_ps_op*/) { ++ // madd.ps is not used, the code below were not tested ++ int fr, ft, fs, fd; ++ float fr_upper, fr_lower, fs_upper, fs_lower, ft_upper, ft_lower, fd_upper, fd_lower; ++ double fr_value, ft_value, fs_value, fd_value; ++ switch (op) { ++ case Assembler::madd_ps_op: ++ // fd = (fs * ft) + fr ++ fr = (inst >> 21) & 0x1f; ++ ft = (inst >> 16) & 0x1f; ++ fs = (inst >> 11) & 0x1f; ++ fd = (inst >> 6) & 0x1f; ++ fr_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fr]; ++ ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft]; ++ fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs]; ++ __asm__ __volatile__ ( ++ "cvt.s.pu %3, %9\n\t" ++ "cvt.s.pl %4, %9\n\t" ++ "cvt.s.pu %5, %10\n\t" ++ "cvt.s.pl %6, %10\n\t" ++ "cvt.s.pu %7, %11\n\t" ++ "cvt.s.pl %8, %11\n\t" ++ "madd.s %1, %3, %5, %7\n\t" ++ "madd.s %2, %4, %6, %8\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower), "=f" (fr_upper), "=f" (fr_lower), "=f" (fs_upper), "=f" (fs_lower), "=f" (ft_upper), "=f" (ft_lower) ++ : "f" (fr_value)/*9*/, "f" (fs_value)/*10*/, "f" (ft_value)/*11*/ ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ default: ++ tty->print_cr("unknown cop1x opcode 0x%x with SIGILL.", op); ++ } ++ } ++ } //SIGILL ++ } else if (sig == SIGILL && VM_Version::is_determine_features_test_running()) { ++ // thread->thread_state() != _thread_in_Java ++ // SIGILL must be caused by VM_Version::determine_features(). ++ VM_Version::set_supports_cpucfg(false); ++ stub = pc + 4; // continue with next instruction. ++ } else if (thread->thread_state() == _thread_in_vm && ++ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ ++ thread->doing_unsafe_access()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("SIGBUS in vm thread \n"); ++#endif ++ stub = StubRoutines::handler_for_unsafe_access(); ++ } ++ ++ // jni_fast_GetField can trap at certain pc's if a GC kicks in ++ // and the heap gets shrunk before the field access. ++ if ((sig == SIGSEGV) || (sig == SIGBUS)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("jni fast get trap: "); ++#endif ++ address addr = JNI_FastGetField::find_slowcase_pc(pc); ++ if (addr != (address)-1) { ++ stub = addr; ++ } ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("addr = %d, stub = %lx", addr, stub); ++#endif ++ } ++ ++ // Check to see if we caught the safepoint code in the ++ // process of write protecting the memory serialization page. ++ // It write enables the page immediately after protecting it ++ // so we can just return to retry the write. ++ if ((sig == SIGSEGV) && ++ os::is_memory_serialize_page(thread, (address) info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("write protecting the memory serialiazation page\n"); ++#endif ++ // Block current thread until the memory serialize page permission restored. ++ os::block_on_serialize_page_trap(); ++ return true; ++ } ++ } ++ ++ // Execution protection violation ++ // ++ // This should be kept as the last step in the triage. We don't ++ // have a dedicated trap number for a no-execute fault, so be ++ // conservative and allow other handlers the first shot. ++ // ++ // Note: We don't test that info->si_code == SEGV_ACCERR here. ++ // this si_code is so generic that it is almost meaningless; and ++ // the si_code for this condition may change in the future. ++ // Furthermore, a false-positive should be harmless. ++ if (UnguardOnExecutionViolation > 0 && ++ //(sig == SIGSEGV || sig == SIGBUS) && ++ //uc->uc_mcontext.gregs[REG_TRAPNO] == trap_page_fault) { ++ (sig == SIGSEGV || sig == SIGBUS ++#ifdef OPT_RANGECHECK ++ || sig == SIGSYS ++#endif ++ ) && ++ //(uc->uc_mcontext.cause == 2 || uc->uc_mcontext.cause == 3)) { ++ (uc->uc_mcontext.hi1 == 2 || uc->uc_mcontext.hi1 == 3)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("execution protection violation\n"); ++#endif ++ ++ int page_size = os::vm_page_size(); ++ address addr = (address) info->si_addr; ++ address pc = os::Linux::ucontext_get_pc(uc); ++ // Make sure the pc and the faulting address are sane. ++ // ++ // If an instruction spans a page boundary, and the page containing ++ // the beginning of the instruction is executable but the following ++ // page is not, the pc and the faulting address might be slightly ++ // different - we still want to unguard the 2nd page in this case. ++ // ++ // 15 bytes seems to be a (very) safe value for max instruction size. ++ bool pc_is_near_addr = ++ (pointer_delta((void*) addr, (void*) pc, sizeof(char)) < 15); ++ bool instr_spans_page_boundary = ++ (align_size_down((intptr_t) pc ^ (intptr_t) addr, ++ (intptr_t) page_size) > 0); ++ ++ if (pc == addr || (pc_is_near_addr && instr_spans_page_boundary)) { ++ static volatile address last_addr = ++ (address) os::non_memory_address_word(); ++ ++ // In conservative mode, don't unguard unless the address is in the VM ++ if (addr != last_addr && ++ (UnguardOnExecutionViolation > 1 || os::address_is_in_vm(addr))) { ++ ++ // Set memory to RWX and retry ++ address page_start = ++ (address) align_size_down((intptr_t) addr, (intptr_t) page_size); ++ bool res = os::protect_memory((char*) page_start, page_size, ++ os::MEM_PROT_RWX); ++ ++ if (PrintMiscellaneous && Verbose) { ++ char buf[256]; ++ jio_snprintf(buf, sizeof(buf), "Execution protection violation " ++ "at " INTPTR_FORMAT ++ ", unguarding " INTPTR_FORMAT ": %s, errno=%d", addr, ++ page_start, (res ? "success" : "failed"), errno); ++ tty->print_raw_cr(buf); ++ } ++ stub = pc; ++ ++ // Set last_addr so if we fault again at the same address, we don't end ++ // up in an endless loop. ++ // ++ // There are two potential complications here. Two threads trapping at ++ // the same address at the same time could cause one of the threads to ++ // think it already unguarded, and abort the VM. Likely very rare. ++ // ++ // The other race involves two threads alternately trapping at ++ // different addresses and failing to unguard the page, resulting in ++ // an endless loop. This condition is probably even more unlikely than ++ // the first. ++ // ++ // Although both cases could be avoided by using locks or thread local ++ // last_addr, these solutions are unnecessary complication: this ++ // handler is a best-effort safety net, not a complete solution. It is ++ // disabled by default and should only be used as a workaround in case ++ // we missed any no-execute-unsafe VM code. ++ ++ last_addr = addr; ++ } ++ } ++ } ++ ++ if (stub != NULL) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("resolved stub=%lx\n",stub); ++#endif ++ // save all thread context in case we need to restore it ++ if (thread != NULL) thread->set_saved_exception_pc(pc); ++ ++ uc->uc_mcontext.pc = (greg_t)stub; ++ return true; ++ } ++ ++ // signal-chaining ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("signal chaining\n"); ++#endif ++ return true; ++ } ++ ++ if (!abort_if_unrecognized) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("abort becauce of unrecognized\n"); ++#endif ++ // caller wants another chance, so give it to him ++ return false; ++ } ++ ++ if (pc == NULL && uc != NULL) { ++ pc = os::Linux::ucontext_get_pc(uc); ++ } ++ ++ // unmask current signal ++ sigset_t newset; ++ sigemptyset(&newset); ++ sigaddset(&newset, sig); ++ sigprocmask(SIG_UNBLOCK, &newset, NULL); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("VMError in signal handler\n"); ++#endif ++ VMError err(t, sig, pc, info, ucVoid); ++ err.report_and_die(); ++ ++ ShouldNotReachHere(); ++ return true; // Mute compiler ++} ++ ++// FCSR:...|24| 23 |22|21|... ++// ...|FS|FCC0|FO|FN|... ++void os::Linux::init_thread_fpu_state(void) { ++ if (SetFSFOFN == 999) ++ return; ++ int fs = (SetFSFOFN / 100)? 1:0; ++ int fo = ((SetFSFOFN % 100) / 10)? 1:0; ++ int fn = (SetFSFOFN % 10)? 1:0; ++ int mask = fs << 24 | fo << 22 | fn << 21; ++ ++ int fcsr = get_fpu_control_word(); ++ fcsr = fcsr | mask; ++ set_fpu_control_word(fcsr); ++ /* ++ if (fcsr != get_fpu_control_word()) ++ tty->print_cr(" fail to set to %lx, get_fpu_control_word:%lx", fcsr, get_fpu_control_word()); ++ */ ++} ++ ++int os::Linux::get_fpu_control_word(void) { ++ int fcsr; ++ __asm__ __volatile__ ( ++ ".set noat;" ++ "daddiu %0, $0, 0;" ++ "cfc1 %0, $31;" ++ : "=r" (fcsr) ++ ); ++ return fcsr; ++} ++ ++void os::Linux::set_fpu_control_word(int fpu_control) { ++ __asm__ __volatile__ ( ++ ".set noat;" ++ "ctc1 %0, $31;" ++ : ++ : "r" (fpu_control) ++ ); ++} ++ ++bool os::is_allocatable(size_t bytes) { ++ ++ if (bytes < 2 * G) { ++ return true; ++ } ++ ++ char* addr = reserve_memory(bytes, NULL); ++ ++ if (addr != NULL) { ++ release_memory(addr, bytes); ++ } ++ ++ return addr != NULL; ++} ++ ++//////////////////////////////////////////////////////////////////////////////// ++// thread stack ++ ++size_t os::Linux::min_stack_allowed = 96 * K; ++ ++ ++// Test if pthread library can support variable thread stack size. LinuxThreads ++// in fixed stack mode allocates 2M fixed slot for each thread. LinuxThreads ++// in floating stack mode and NPTL support variable stack size. ++bool os::Linux::supports_variable_stack_size() { ++ if (os::Linux::is_NPTL()) { ++ // NPTL, yes ++ return true; ++ ++ } else { ++ // Note: We can't control default stack size when creating a thread. ++ // If we use non-default stack size (pthread_attr_setstacksize), both ++ // floating stack and non-floating stack LinuxThreads will return the ++ // same value. This makes it impossible to implement this function by ++ // detecting thread stack size directly. ++ // ++ // An alternative approach is to check %gs. Fixed-stack LinuxThreads ++ // do not use %gs, so its value is 0. Floating-stack LinuxThreads use ++ // %gs (either as LDT selector or GDT selector, depending on kernel) ++ // to access thread specific data. ++ // ++ // Note that %gs is a reserved glibc register since early 2001, so ++ // applications are not allowed to change its value (Ulrich Drepper from ++ // Redhat confirmed that all known offenders have been modified to use ++ // either %fs or TSD). In the worst case scenario, when VM is embedded in ++ // a native application that plays with %gs, we might see non-zero %gs ++ // even LinuxThreads is running in fixed stack mode. As the result, we'll ++ // return true and skip _thread_safety_check(), so we may not be able to ++ // detect stack-heap collisions. But otherwise it's harmless. ++ // ++ return false; ++ } ++} ++ ++// return default stack size for thr_type ++size_t os::Linux::default_stack_size(os::ThreadType thr_type) { ++ // default stack size (compiler thread needs larger stack) ++ size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K); ++ return s; ++} ++ ++size_t os::Linux::default_guard_size(os::ThreadType thr_type) { ++ // Creating guard page is very expensive. Java thread has HotSpot ++ // guard page, only enable glibc guard page for non-Java threads. ++ return (thr_type == java_thread ? 0 : page_size()); ++} ++ ++// Java thread: ++// ++// Low memory addresses ++// +------------------------+ ++// | |\ JavaThread created by VM does not have glibc ++// | glibc guard page | - guard, attached Java thread usually has ++// | |/ 1 page glibc guard. ++// P1 +------------------------+ Thread::stack_base() - Thread::stack_size() ++// | |\ ++// | HotSpot Guard Pages | - red and yellow pages ++// | |/ ++// +------------------------+ JavaThread::stack_yellow_zone_base() ++// | |\ ++// | Normal Stack | - ++// | |/ ++// P2 +------------------------+ Thread::stack_base() ++// ++// Non-Java thread: ++// ++// Low memory addresses ++// +------------------------+ ++// | |\ ++// | glibc guard page | - usually 1 page ++// | |/ ++// P1 +------------------------+ Thread::stack_base() - Thread::stack_size() ++// | |\ ++// | Normal Stack | - ++// | |/ ++// P2 +------------------------+ Thread::stack_base() ++// ++// ** P1 (aka bottom) and size ( P2 = P1 - size) are the address and stack size returned from ++// pthread_attr_getstack() ++ ++static void current_stack_region(address * bottom, size_t * size) { ++ if (os::is_primordial_thread()) { ++ // primordial thread needs special handling because pthread_getattr_np() ++ // may return bogus value. ++ *bottom = os::Linux::initial_thread_stack_bottom(); ++ *size = os::Linux::initial_thread_stack_size(); ++ } else { ++ pthread_attr_t attr; ++ ++ int rslt = pthread_getattr_np(pthread_self(), &attr); ++ ++ // JVM needs to know exact stack location, abort if it fails ++ if (rslt != 0) { ++ if (rslt == ENOMEM) { ++ vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np"); ++ } else { ++ fatal(err_msg("pthread_getattr_np failed with errno = %d", rslt)); ++ } ++ } ++ ++ if (pthread_attr_getstack(&attr, (void **)bottom, size) != 0) { ++ fatal("Can not locate current stack attributes!"); ++ } ++ ++ pthread_attr_destroy(&attr); ++ ++ } ++ assert(os::current_stack_pointer() >= *bottom && ++ os::current_stack_pointer() < *bottom + *size, "just checking"); ++} ++ ++address os::current_stack_base() { ++ address bottom; ++ size_t size; ++ current_stack_region(&bottom, &size); ++ return (bottom + size); ++} ++ ++size_t os::current_stack_size() { ++ // stack size includes normal stack and HotSpot guard pages ++ address bottom; ++ size_t size; ++ current_stack_region(&bottom, &size); ++ return size; ++} ++ ++///////////////////////////////////////////////////////////////////////////// ++// helper functions for fatal error handler ++void os::print_register_info(outputStream *st, void *context) { ++ if (context == NULL) return; ++ ++ ucontext_t *uc = (ucontext_t*)context; ++ ++ st->print_cr("Register to memory mapping:"); ++ st->cr(); ++ // this is horrendously verbose but the layout of the registers in the ++ // // context does not match how we defined our abstract Register set, so ++ // // we can't just iterate through the gregs area ++ // ++ // // this is only for the "general purpose" registers ++ st->print("R0=" ); print_location(st, uc->uc_mcontext.gregs[0]); ++ st->print("AT=" ); print_location(st, uc->uc_mcontext.gregs[1]); ++ st->print("V0=" ); print_location(st, uc->uc_mcontext.gregs[2]); ++ st->print("V1=" ); print_location(st, uc->uc_mcontext.gregs[3]); ++ st->cr(); ++ st->print("A0=" ); print_location(st, uc->uc_mcontext.gregs[4]); ++ st->print("A1=" ); print_location(st, uc->uc_mcontext.gregs[5]); ++ st->print("A2=" ); print_location(st, uc->uc_mcontext.gregs[6]); ++ st->print("A3=" ); print_location(st, uc->uc_mcontext.gregs[7]); ++ st->cr(); ++ st->print("A4=" ); print_location(st, uc->uc_mcontext.gregs[8]); ++ st->print("A5=" ); print_location(st, uc->uc_mcontext.gregs[9]); ++ st->print("A6=" ); print_location(st, uc->uc_mcontext.gregs[10]); ++ st->print("A7=" ); print_location(st, uc->uc_mcontext.gregs[11]); ++ st->cr(); ++ st->print("T0=" ); print_location(st, uc->uc_mcontext.gregs[12]); ++ st->print("T1=" ); print_location(st, uc->uc_mcontext.gregs[13]); ++ st->print("T2=" ); print_location(st, uc->uc_mcontext.gregs[14]); ++ st->print("T3=" ); print_location(st, uc->uc_mcontext.gregs[15]); ++ st->cr(); ++ st->print("S0=" ); print_location(st, uc->uc_mcontext.gregs[16]); ++ st->print("S1=" ); print_location(st, uc->uc_mcontext.gregs[17]); ++ st->print("S2=" ); print_location(st, uc->uc_mcontext.gregs[18]); ++ st->print("S3=" ); print_location(st, uc->uc_mcontext.gregs[19]); ++ st->cr(); ++ st->print("S4=" ); print_location(st, uc->uc_mcontext.gregs[20]); ++ st->print("S5=" ); print_location(st, uc->uc_mcontext.gregs[21]); ++ st->print("S6=" ); print_location(st, uc->uc_mcontext.gregs[22]); ++ st->print("S7=" ); print_location(st, uc->uc_mcontext.gregs[23]); ++ st->cr(); ++ st->print("T8=" ); print_location(st, uc->uc_mcontext.gregs[24]); ++ st->print("T9=" ); print_location(st, uc->uc_mcontext.gregs[25]); ++ st->print("K0=" ); print_location(st, uc->uc_mcontext.gregs[26]); ++ st->print("K1=" ); print_location(st, uc->uc_mcontext.gregs[27]); ++ st->cr(); ++ st->print("GP=" ); print_location(st, uc->uc_mcontext.gregs[28]); ++ st->print("SP=" ); print_location(st, uc->uc_mcontext.gregs[29]); ++ st->print("FP=" ); print_location(st, uc->uc_mcontext.gregs[30]); ++ st->print("RA=" ); print_location(st, uc->uc_mcontext.gregs[31]); ++ st->cr(); ++ ++} ++void os::print_context(outputStream *st, void *context) { ++ if (context == NULL) return; ++ ++ ucontext_t *uc = (ucontext_t*)context; ++ st->print_cr("Registers:"); ++ st->print( "R0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[0]); ++ st->print(", AT=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[1]); ++ st->print(", V0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[2]); ++ st->print(", V1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[3]); ++ st->cr(); ++ st->print( "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[4]); ++ st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[5]); ++ st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[6]); ++ st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[7]); ++ st->cr(); ++ st->print( "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[8]); ++ st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[9]); ++ st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[10]); ++ st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[11]); ++ st->cr(); ++ st->print( "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[12]); ++ st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[13]); ++ st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[14]); ++ st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[15]); ++ st->cr(); ++ st->print( "S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[16]); ++ st->print(", S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[17]); ++ st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[18]); ++ st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[19]); ++ st->cr(); ++ st->print( "S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[20]); ++ st->print(", S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[21]); ++ st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[22]); ++ st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[23]); ++ st->cr(); ++ st->print( "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[24]); ++ st->print(", T9=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[25]); ++ st->print(", K0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[26]); ++ st->print(", K1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[27]); ++ st->cr(); ++ st->print( "GP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[28]); ++ st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[29]); ++ st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[30]); ++ st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[31]); ++ st->cr(); ++ st->cr(); ++ ++ intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); ++ st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp)); ++ //print_hex_dump(st, (address)sp, (address)(sp + 8*sizeof(intptr_t)), sizeof(intptr_t)); ++ print_hex_dump(st, (address)sp-32, (address)(sp + 32), sizeof(intptr_t)); ++ st->cr(); ++ ++ // Note: it may be unsafe to inspect memory near pc. For example, pc may ++ // point to garbage if entry point in an nmethod is corrupted. Leave ++ // this at the end, and hope for the best. ++ address pc = os::Linux::ucontext_get_pc(uc); ++ st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc)); ++ print_hex_dump(st, pc - 64, pc + 64, sizeof(char)); ++ Disassembler::decode(pc - 80, pc + 80, st); ++} ++ ++void os::setup_fpu() { ++ /* ++ //no use for MIPS ++ int fcsr; ++ address fpu_cntrl = StubRoutines::addr_fpu_cntrl_wrd_std(); ++ __asm__ __volatile__ ( ++ ".set noat;" ++ "cfc1 %0, $31;" ++ "sw %0, 0(%1);" ++ : "=r" (fcsr) ++ : "r" (fpu_cntrl) ++ : "memory" ++ ); ++ printf("fpu_cntrl: %lx\n", fpu_cntrl); ++ */ ++} ++ ++#ifndef PRODUCT ++void os::verify_stack_alignment() { ++ assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); ++} ++#endif ++ ++bool os::is_ActiveCoresMP() { ++ return UseActiveCoresMP && _initial_active_processor_count == 1; ++} +diff --git a/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.hpp +new file mode 100644 +index 0000000000..c07d08156f +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.hpp +@@ -0,0 +1,39 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP ++ ++ static void setup_fpu(); ++ static bool is_allocatable(size_t bytes); ++ static intptr_t *get_previous_fp(); ++ ++ // Used to register dynamic code cache area with the OS ++ // Note: Currently only used in 64 bit Windows implementations ++ static bool register_code_area(char *low, char *high) { return true; } ++ ++ static bool is_ActiveCoresMP(); ++ ++#endif // OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP +diff --git a/hotspot/src/os_cpu/linux_mips/vm/prefetch_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/prefetch_linux_mips.inline.hpp +new file mode 100644 +index 0000000000..93490345f0 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_mips/vm/prefetch_linux_mips.inline.hpp +@@ -0,0 +1,58 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP ++#define OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP ++ ++ ++inline void Prefetch::read (void *loc, intx interval) { ++ // 'pref' is implemented as NOP in Loongson 3A ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips32\n" ++ " .set noreorder\n" ++ " pref 0, 0(%[__loc]) \n" ++ " .set pop\n" ++ : [__loc] "=&r"(loc) ++ : ++ : "memory" ++ ); ++} ++ ++inline void Prefetch::write(void *loc, intx interval) { ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips32\n" ++ " .set noreorder\n" ++ " pref 1, 0(%[__loc]) \n" ++ " .set pop\n" ++ : [__loc] "=&r"(loc) ++ : ++ : "memory" ++ ); ++ ++} ++ ++#endif // OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP +diff --git a/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.cpp +new file mode 100644 +index 0000000000..be28a562a1 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.cpp +@@ -0,0 +1,84 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/threadLocalStorage.hpp" ++ ++// Map stack pointer (%esp) to thread pointer for faster TLS access ++// ++// Here we use a flat table for better performance. Getting current thread ++// is down to one memory access (read _sp_map[%esp>>12]) in generated code ++// and two in runtime code (-fPIC code needs an extra load for _sp_map). ++// ++// This code assumes stack page is not shared by different threads. It works ++// in 32-bit VM when page size is 4K (or a multiple of 4K, if that matters). ++// ++// Notice that _sp_map is allocated in the bss segment, which is ZFOD ++// (zero-fill-on-demand). While it reserves 4M address space upfront, ++// actual memory pages are committed on demand. ++// ++// If an application creates and destroys a lot of threads, usually the ++// stack space freed by a thread will soon get reused by new thread ++// (this is especially true in NPTL or LinuxThreads in fixed-stack mode). ++// No memory page in _sp_map is wasted. ++// ++// However, it's still possible that we might end up populating & ++// committing a large fraction of the 4M table over time, but the actual ++// amount of live data in the table could be quite small. The max wastage ++// is less than 4M bytes. If it becomes an issue, we could use madvise() ++// with MADV_DONTNEED to reclaim unused (i.e. all-zero) pages in _sp_map. ++// MADV_DONTNEED on Linux keeps the virtual memory mapping, but zaps the ++// physical memory page (i.e. similar to MADV_FREE on Solaris). ++ ++#ifdef MINIMIZE_RAM_USAGE ++Thread* ThreadLocalStorage::_sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)]; ++#endif // MINIMIZE_RAM_USAGE ++ ++void ThreadLocalStorage::generate_code_for_get_thread() { ++ // nothing we can do here for user-level thread ++} ++ ++void ThreadLocalStorage::pd_init() { ++#ifdef MINIMIZE_RAM_USAGE ++ assert(align_size_down(os::vm_page_size(), PAGE_SIZE) == os::vm_page_size(), ++ "page size must be multiple of PAGE_SIZE"); ++#endif // MINIMIZE_RAM_USAGE ++} ++ ++void ThreadLocalStorage::pd_set_thread(Thread* thread) { ++ os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread); ++#ifdef MINIMIZE_RAM_USAGE ++ address stack_top = os::current_stack_base(); ++ size_t stack_size = os::current_stack_size(); ++ ++ for (address p = stack_top - stack_size; p < stack_top; p += PAGE_SIZE) { ++ int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1); ++ assert(thread == NULL || _sp_map[index] == NULL || thread == _sp_map[index], ++ "thread exited without detaching from VM??"); ++ _sp_map[index] = thread; ++ } ++#endif // MINIMIZE_RAM_USAGE ++} +diff --git a/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.hpp +new file mode 100644 +index 0000000000..e595195e21 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.hpp +@@ -0,0 +1,61 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_THREADLS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_THREADLS_LINUX_MIPS_HPP ++ ++#ifdef MINIMIZE_RAM_USAGE ++ // Processor dependent parts of ThreadLocalStorage ++ //only the low 2G space for user program in Linux ++ ++ #define SP_BITLENGTH 34 ++ #define PAGE_SHIFT 14 ++ #define PAGE_SIZE (1UL << PAGE_SHIFT) ++ ++ static Thread* _sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)]; ++ static int _sp_map_low; ++ static int _sp_map_high; ++#endif // MINIMIZE_RAM_USAGE ++ ++public: ++#ifdef MINIMIZE_RAM_USAGE ++ static Thread** sp_map_addr() { return _sp_map; } ++#endif // MINIMIZE_RAM_USAGE ++ ++ static Thread* thread() { ++#ifdef MINIMIZE_RAM_USAGE ++ /* Thread::thread() can also be optimized in the same way as __get_thread() */ ++ //return (Thread*) os::thread_local_storage_at(thread_index()); ++ uintptr_t sp; ++ uintptr_t mask = (1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1; ++ ++ __asm__ __volatile__ ("daddiu %0, $29, 0 " : "=r" (sp)); ++ ++ return _sp_map[(sp >> PAGE_SHIFT) & mask]; ++#else ++ return (Thread*) os::thread_local_storage_at(thread_index()); ++#endif // MINIMIZE_RAM_USAGE ++ } ++#endif // OS_CPU_LINUX_MIPS_VM_THREADLS_LINUX_MIPS_HPP +diff --git a/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.cpp +new file mode 100644 +index 0000000000..44f666d61f +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.cpp +@@ -0,0 +1,99 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++ ++void JavaThread::pd_initialize() ++{ ++ _anchor.clear(); ++} ++ ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is ++// currently interrupted by SIGPROF ++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, ++ void* ucontext, bool isInJava) { ++ ++ assert(Thread::current() == this, "caller must be current thread"); ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { ++ assert(this->is_Java_thread(), "must be JavaThread"); ++ JavaThread* jt = (JavaThread *)this; ++ ++ // If we have a last_Java_frame, then we should use it even if ++ // isInJava == true. It should be more reliable than ucontext info. ++ if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) { ++ *fr_addr = jt->pd_last_frame(); ++ return true; ++ } ++ ++ // At this point, we don't have a last_Java_frame, so ++ // we try to glean some information out of the ucontext ++ // if we were running Java code when SIGPROF came in. ++ if (isInJava) { ++ ucontext_t* uc = (ucontext_t*) ucontext; ++ ++ intptr_t* ret_fp; ++ intptr_t* ret_sp; ++ ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, ++ &ret_sp, &ret_fp); ++ if (addr.pc() == NULL || ret_sp == NULL ) { ++ // ucontext wasn't useful ++ return false; ++ } ++ ++ frame ret_frame(ret_sp, ret_fp, addr.pc()); ++ if (!ret_frame.safe_for_sender(jt)) { ++#ifdef COMPILER2 ++ // C2 uses ebp as a general register see if NULL fp helps ++ frame ret_frame2(ret_sp, NULL, addr.pc()); ++ if (!ret_frame2.safe_for_sender(jt)) { ++ // nothing else to try if the frame isn't good ++ return false; ++ } ++ ret_frame = ret_frame2; ++#else ++ // nothing else to try if the frame isn't good ++ return false; ++#endif /* COMPILER2 */ ++ } ++ *fr_addr = ret_frame; ++ return true; ++ } ++ ++ // nothing else to try ++ return false; ++} ++ ++void JavaThread::cache_global_variables() { } ++ +diff --git a/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.hpp +new file mode 100644 +index 0000000000..cb11c36ae5 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.hpp +@@ -0,0 +1,75 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP ++ ++ private: ++ void pd_initialize(); ++ ++ frame pd_last_frame() { ++ assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); ++ if (_anchor.last_Java_pc() != NULL) { ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); ++ } else { ++ // This will pick up pc from sp ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp()); ++ } ++ } ++ ++ ++ public: ++ // Mutators are highly dangerous.... ++ intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } ++ void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } ++ ++ void set_base_of_stack_pointer(intptr_t* base_sp) { ++ } ++ ++ static ByteSize last_Java_fp_offset() { ++ return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); ++ } ++ ++ intptr_t* base_of_stack_pointer() { ++ return NULL; ++ } ++ void record_base_of_stack_pointer() { ++ } ++ ++ bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, ++ bool isInJava); ++ ++ bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); ++private: ++ bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); ++public: ++ ++ // These routines are only used on cpu architectures that ++ // have separate register stacks (Itanium). ++ static bool register_stack_overflow() { return false; } ++ static void enable_register_stack_guard() {} ++ static void disable_register_stack_guard() {} ++ ++#endif // OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP +diff --git a/hotspot/src/os_cpu/linux_mips/vm/vmStructs_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/vmStructs_linux_mips.hpp +new file mode 100644 +index 0000000000..b7454bf045 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_mips/vm/vmStructs_linux_mips.hpp +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP ++ ++// These are the OS and CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ \ ++ /******************************/ \ ++ /* Threads (NOTE: incomplete) */ \ ++ /******************************/ \ ++ nonstatic_field(OSThread, _thread_id, pid_t) \ ++ nonstatic_field(OSThread, _pthread_id, pthread_t) ++ ++ ++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ \ ++ /**********************/ \ ++ /* Posix Thread IDs */ \ ++ /**********************/ \ ++ \ ++ declare_integer_type(pid_t) \ ++ declare_unsigned_integer_type(pthread_t) ++ ++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#endif // OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP +diff --git a/hotspot/src/os_cpu/linux_mips/vm/vm_version_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/vm_version_linux_mips.cpp +new file mode 100644 +index 0000000000..ce697823b9 +--- /dev/null ++++ b/hotspot/src/os_cpu/linux_mips/vm/vm_version_linux_mips.cpp +@@ -0,0 +1,28 @@ ++/* ++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/os.hpp" ++#include "vm_version_mips.hpp" +diff --git a/hotspot/src/share/tools/hsdis/Makefile b/hotspot/src/share/tools/hsdis/Makefile +index 0d1b608944..a9754ce2ac 100644 +--- a/hotspot/src/share/tools/hsdis/Makefile ++++ b/hotspot/src/share/tools/hsdis/Makefile +@@ -105,12 +105,25 @@ CFLAGS/sparc += -m32 + endif + CFLAGS += $(CFLAGS/$(ARCH)) + CFLAGS += -fPIC ++ifeq ($(ARCH), mips64) ++CPUINFO = $(shell cat /proc/cpuinfo) ++ifneq ($(findstring Loongson,$(CPUINFO)),) ++CFLAGS += -DLOONGSON ++endif ++endif + OS = linux + LIB_EXT = .so + CC = gcc + endif + CFLAGS += -O + DLDFLAGS += -shared ++ifeq ($(ARCH), mips64) ++DLDFLAGS += -Wl,-z,noexecstack ++endif ++ifeq ($(ARCH), loongarch64) ++DLDFLAGS += -Wl,-z,noexecstack ++CONFIGURE_ARGS += --disable-werror ++endif + LDFLAGS += -ldl + OUTFLAGS += -o $@ + else +diff --git a/hotspot/src/share/tools/hsdis/hsdis.c b/hotspot/src/share/tools/hsdis/hsdis.c +index 4fb4964870..f6ef5bea15 100644 +--- a/hotspot/src/share/tools/hsdis/hsdis.c ++++ b/hotspot/src/share/tools/hsdis/hsdis.c +@@ -493,6 +493,16 @@ static const char* native_arch_name() { + #if defined(LIBARCH_ppc64) || defined(LIBARCH_ppc64le) + res = "powerpc:common64"; + #endif ++#ifdef LIBARCH_mips64 ++#ifdef LOONGSON ++ res = "mips:loongson_3a"; ++#else ++ res = "mips:isa64"; ++#endif ++#endif ++#ifdef LIBARCH_loongarch64 ++ res = "loongarch"; ++#endif + #ifdef LIBARCH_aarch64 + res = "aarch64"; + #endif +diff --git a/hotspot/src/share/vm/adlc/main.cpp b/hotspot/src/share/vm/adlc/main.cpp +index 52044f12d4..50c585872e 100644 +--- a/hotspot/src/share/vm/adlc/main.cpp ++++ b/hotspot/src/share/vm/adlc/main.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + // MAIN.CPP - Entry point for the Architecture Description Language Compiler + #include "adlc.hpp" + +@@ -234,6 +240,14 @@ int main(int argc, char *argv[]) + AD.addInclude(AD._CPP_file, "nativeInst_x86.hpp"); + AD.addInclude(AD._CPP_file, "vmreg_x86.inline.hpp"); + #endif ++#ifdef TARGET_ARCH_mips ++ AD.addInclude(AD._CPP_file, "nativeInst_mips.hpp"); ++ AD.addInclude(AD._CPP_file, "vmreg_mips.inline.hpp"); ++#endif ++#ifdef TARGET_ARCH_loongarch ++ AD.addInclude(AD._CPP_file, "nativeInst_loongarch.hpp"); ++ AD.addInclude(AD._CPP_file, "vmreg_loongarch.inline.hpp"); ++#endif + #ifdef TARGET_ARCH_aarch64 + AD.addInclude(AD._CPP_file, "assembler_aarch64.inline.hpp"); + AD.addInclude(AD._CPP_file, "nativeInst_aarch64.hpp"); +diff --git a/hotspot/src/share/vm/asm/assembler.hpp b/hotspot/src/share/vm/asm/assembler.hpp +index f7f1ae1d36..572aa997ca 100644 +--- a/hotspot/src/share/vm/asm/assembler.hpp ++++ b/hotspot/src/share/vm/asm/assembler.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_ASM_ASSEMBLER_HPP + #define SHARE_VM_ASM_ASSEMBLER_HPP + +@@ -53,6 +59,14 @@ + # include "register_ppc.hpp" + # include "vm_version_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "register_mips.hpp" ++# include "vm_version_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "register_loongarch.hpp" ++# include "vm_version_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "register_aarch64.hpp" + # include "vm_version_aarch64.hpp" +@@ -468,6 +482,12 @@ class AbstractAssembler : public ResourceObj { + #ifdef TARGET_ARCH_ppc + # include "assembler_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "assembler_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "assembler_loongarch.hpp" ++#endif + + + #endif // SHARE_VM_ASM_ASSEMBLER_HPP +diff --git a/hotspot/src/share/vm/asm/assembler.inline.hpp b/hotspot/src/share/vm/asm/assembler.inline.hpp +index 1a48cb3171..8ac90e1474 100644 +--- a/hotspot/src/share/vm/asm/assembler.inline.hpp ++++ b/hotspot/src/share/vm/asm/assembler.inline.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_ASM_ASSEMBLER_INLINE_HPP + #define SHARE_VM_ASM_ASSEMBLER_INLINE_HPP + +@@ -42,6 +48,12 @@ + #ifdef TARGET_ARCH_ppc + # include "assembler_ppc.inline.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "assembler_mips.inline.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "assembler_loongarch.inline.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "assembler_aarch64.inline.hpp" + #endif +diff --git a/hotspot/src/share/vm/asm/codeBuffer.cpp b/hotspot/src/share/vm/asm/codeBuffer.cpp +index d94ac40655..f6b578111f 100644 +--- a/hotspot/src/share/vm/asm/codeBuffer.cpp ++++ b/hotspot/src/share/vm/asm/codeBuffer.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2023. These ++ * modifications are Copyright (c) 2015, 2023, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "asm/codeBuffer.hpp" + #include "compiler/disassembler.hpp" +@@ -323,6 +329,7 @@ void CodeSection::relocate(address at, RelocationHolder const& spec, int format) + assert(rtype == relocInfo::none || + rtype == relocInfo::runtime_call_type || + rtype == relocInfo::internal_word_type|| ++ NOT_ZERO(MIPS64_ONLY(rtype == relocInfo::internal_pc_type ||)) + rtype == relocInfo::section_word_type || + rtype == relocInfo::external_word_type, + "code needs relocation information"); +diff --git a/hotspot/src/share/vm/asm/codeBuffer.hpp b/hotspot/src/share/vm/asm/codeBuffer.hpp +index 02b619ad77..c04560a0bc 100644 +--- a/hotspot/src/share/vm/asm/codeBuffer.hpp ++++ b/hotspot/src/share/vm/asm/codeBuffer.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_ASM_CODEBUFFER_HPP + #define SHARE_VM_ASM_CODEBUFFER_HPP + +@@ -635,6 +641,12 @@ class CodeBuffer: public StackObj { + #ifdef TARGET_ARCH_ppc + # include "codeBuffer_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "codeBuffer_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "codeBuffer_loongarch.hpp" ++#endif + + }; + +diff --git a/hotspot/src/share/vm/asm/macroAssembler.hpp b/hotspot/src/share/vm/asm/macroAssembler.hpp +index 1482eb630b..0be415b6c5 100644 +--- a/hotspot/src/share/vm/asm/macroAssembler.hpp ++++ b/hotspot/src/share/vm/asm/macroAssembler.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_ASM_MACROASSEMBLER_HPP + #define SHARE_VM_ASM_MACROASSEMBLER_HPP + +@@ -45,5 +51,10 @@ + #ifdef TARGET_ARCH_aarch64 + # include "macroAssembler_aarch64.hpp" + #endif +- ++#ifdef TARGET_ARCH_mips ++# include "macroAssembler_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "macroAssembler_loongarch.hpp" ++#endif + #endif // SHARE_VM_ASM_MACROASSEMBLER_HPP +diff --git a/hotspot/src/share/vm/asm/macroAssembler.inline.hpp b/hotspot/src/share/vm/asm/macroAssembler.inline.hpp +index db3daa52e9..6f4e523c59 100644 +--- a/hotspot/src/share/vm/asm/macroAssembler.inline.hpp ++++ b/hotspot/src/share/vm/asm/macroAssembler.inline.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_ASM_MACROASSEMBLER_INLINE_HPP + #define SHARE_VM_ASM_MACROASSEMBLER_INLINE_HPP + +@@ -42,6 +48,12 @@ + #ifdef TARGET_ARCH_ppc + # include "macroAssembler_ppc.inline.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "macroAssembler_mips.inline.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "macroAssembler_loongarch.inline.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "macroAssembler_aarch64.inline.hpp" + #endif +diff --git a/hotspot/src/share/vm/asm/register.hpp b/hotspot/src/share/vm/asm/register.hpp +index c500890181..6a20929e59 100644 +--- a/hotspot/src/share/vm/asm/register.hpp ++++ b/hotspot/src/share/vm/asm/register.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_ASM_REGISTER_HPP + #define SHARE_VM_ASM_REGISTER_HPP + +@@ -108,6 +114,12 @@ const type name = ((type)name##_##type##EnumValue) + #ifdef TARGET_ARCH_ppc + # include "register_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "register_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "register_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "register_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/c1/c1_Defs.hpp b/hotspot/src/share/vm/c1/c1_Defs.hpp +index b0cd763739..b42b9de1b5 100644 +--- a/hotspot/src/share/vm/c1/c1_Defs.hpp ++++ b/hotspot/src/share/vm/c1/c1_Defs.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_C1_C1_DEFS_HPP + #define SHARE_VM_C1_C1_DEFS_HPP + +@@ -29,6 +35,9 @@ + #ifdef TARGET_ARCH_x86 + # include "register_x86.hpp" + #endif ++#ifdef TARGET_ARCH_loongarch ++# include "register_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "register_aarch64.hpp" + #endif +@@ -56,6 +65,9 @@ enum { + #ifdef TARGET_ARCH_x86 + # include "c1_Defs_x86.hpp" + #endif ++#ifdef TARGET_ARCH_loongarch ++# include "c1_Defs_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "c1_Defs_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/c1/c1_FpuStackSim.hpp b/hotspot/src/share/vm/c1/c1_FpuStackSim.hpp +index f07e97a4d3..6bc367a897 100644 +--- a/hotspot/src/share/vm/c1/c1_FpuStackSim.hpp ++++ b/hotspot/src/share/vm/c1/c1_FpuStackSim.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_C1_C1_FPUSTACKSIM_HPP + #define SHARE_VM_C1_C1_FPUSTACKSIM_HPP + +@@ -35,6 +41,9 @@ class FpuStackSim; + #ifdef TARGET_ARCH_x86 + # include "c1_FpuStackSim_x86.hpp" + #endif ++#ifdef TARGET_ARCH_loongarch ++# include "c1_FpuStackSim_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "c1_FpuStackSim_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/c1/c1_FrameMap.cpp b/hotspot/src/share/vm/c1/c1_FrameMap.cpp +index 1dac94d58c..b1e37ec41c 100644 +--- a/hotspot/src/share/vm/c1/c1_FrameMap.cpp ++++ b/hotspot/src/share/vm/c1/c1_FrameMap.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "c1/c1_FrameMap.hpp" + #include "c1/c1_LIR.hpp" +@@ -29,6 +35,9 @@ + #ifdef TARGET_ARCH_x86 + # include "vmreg_x86.inline.hpp" + #endif ++#ifdef TARGET_ARCH_loongarch ++# include "vmreg_loongarch.inline.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "vmreg_aarch64.inline.hpp" + #endif +diff --git a/hotspot/src/share/vm/c1/c1_FrameMap.hpp b/hotspot/src/share/vm/c1/c1_FrameMap.hpp +index 41571e3d16..c0e7b28ea4 100644 +--- a/hotspot/src/share/vm/c1/c1_FrameMap.hpp ++++ b/hotspot/src/share/vm/c1/c1_FrameMap.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_C1_C1_FRAMEMAP_HPP + #define SHARE_VM_C1_C1_FRAMEMAP_HPP + +@@ -85,6 +91,9 @@ class FrameMap : public CompilationResourceObj { + #ifdef TARGET_ARCH_x86 + # include "c1_FrameMap_x86.hpp" + #endif ++#ifdef TARGET_ARCH_loongarch ++# include "c1_FrameMap_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "c1_FrameMap_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/c1/c1_LIR.cpp b/hotspot/src/share/vm/c1/c1_LIR.cpp +index fa37e7a046..5d33d3f7a0 100644 +--- a/hotspot/src/share/vm/c1/c1_LIR.cpp ++++ b/hotspot/src/share/vm/c1/c1_LIR.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "c1/c1_InstructionPrinter.hpp" + #include "c1/c1_LIR.hpp" +@@ -79,6 +85,17 @@ FloatRegister LIR_OprDesc::as_double_reg() const { + + #endif + ++#if defined(LOONGARCH64) ++ ++FloatRegister LIR_OprDesc::as_float_reg() const { ++ return as_FloatRegister(fpu_regnr()); ++} ++ ++FloatRegister LIR_OprDesc::as_double_reg() const { ++ return as_FloatRegister(fpu_regnrLo()); ++} ++ ++#endif + + LIR_Opr LIR_OprFact::illegalOpr = LIR_OprFact::illegal(); + +@@ -149,13 +166,19 @@ void LIR_Address::verify0() const { + #endif + #ifdef _LP64 + assert(base()->is_cpu_register(), "wrong base operand"); +-#ifndef AARCH64 ++#if !defined(AARCH64) && !defined(LOONGARCH64) + assert(index()->is_illegal() || index()->is_double_cpu(), "wrong index operand"); + #else + assert(index()->is_illegal() || index()->is_double_cpu() || index()->is_single_cpu(), "wrong index operand"); + #endif ++#ifdef LOONGARCH64 ++ assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT || ++ base()->type() == T_LONG || base()->type() == T_METADATA, ++ "wrong type for addresses"); ++#else + assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA, + "wrong type for addresses"); ++#endif + #else + assert(base()->is_single_cpu(), "wrong base operand"); + assert(index()->is_illegal() || index()->is_single_cpu(), "wrong index operand"); +@@ -258,8 +281,6 @@ bool LIR_OprDesc::is_oop() const { + } + } + +- +- + void LIR_Op2::verify() const { + #ifdef ASSERT + switch (code()) { +@@ -301,6 +322,18 @@ void LIR_Op2::verify() const { + #endif + } + ++void LIR_Op4::verify() const { ++#ifdef ASSERT ++ switch (code()) { ++ case lir_cmp_cmove: ++ break; ++ ++ default: ++ assert(!result_opr()->is_register() || !result_opr()->is_oop_register(), ++ "can't produce oops from arith"); ++ } ++#endif ++} + + LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block) + : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) +@@ -358,6 +391,55 @@ void LIR_OpBranch::negate_cond() { + } + } + ++LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeStub* stub, CodeEmitInfo* info) ++ : LIR_Op2(lir_cmp_branch, cond, left, right, info) ++ , _label(stub->entry()) ++ , _block(NULL) ++ , _ublock(NULL) ++ , _stub(stub) { ++} ++ ++LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, CodeEmitInfo* info) ++ : LIR_Op2(lir_cmp_branch, cond, left, right, info) ++ , _label(block->label()) ++ , _block(block) ++ , _ublock(NULL) ++ , _stub(NULL) { ++} ++ ++LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, BlockBegin* ublock, CodeEmitInfo* info) ++ : LIR_Op2(lir_cmp_float_branch, cond, left, right, info) ++ , _label(block->label()) ++ , _block(block) ++ , _ublock(ublock) ++ , _stub(NULL) { ++} ++ ++void LIR_OpCmpBranch::change_block(BlockBegin* b) { ++ assert(_block != NULL, "must have old block"); ++ assert(_block->label() == label(), "must be equal"); ++ ++ _block = b; ++ _label = b->label(); ++} ++ ++void LIR_OpCmpBranch::change_ublock(BlockBegin* b) { ++ assert(_ublock != NULL, "must have old block"); ++ ++ _ublock = b; ++} ++ ++void LIR_OpCmpBranch::negate_cond() { ++ switch (condition()) { ++ case lir_cond_equal: set_condition(lir_cond_notEqual); break; ++ case lir_cond_notEqual: set_condition(lir_cond_equal); break; ++ case lir_cond_less: set_condition(lir_cond_greaterEqual); break; ++ case lir_cond_lessEqual: set_condition(lir_cond_greater); break; ++ case lir_cond_greaterEqual: set_condition(lir_cond_less); break; ++ case lir_cond_greater: set_condition(lir_cond_lessEqual); break; ++ default: ShouldNotReachHere(); ++ } ++} + + LIR_OpTypeCheck::LIR_OpTypeCheck(LIR_Code code, LIR_Opr result, LIR_Opr object, ciKlass* klass, + LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, +@@ -560,10 +642,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) { + assert(opConvert->_info == NULL, "must be"); + if (opConvert->_opr->is_valid()) do_input(opConvert->_opr); + if (opConvert->_result->is_valid()) do_output(opConvert->_result); +-#if defined(PPC) || defined(AARCH64) +- if (opConvert->_tmp1->is_valid()) do_temp(opConvert->_tmp1); +- if (opConvert->_tmp2->is_valid()) do_temp(opConvert->_tmp2); +-#endif ++ if (opConvert->_tmp->is_valid()) do_temp(opConvert->_tmp); + do_stub(opConvert->_stub); + + break; +@@ -661,6 +740,25 @@ void LIR_OpVisitState::visit(LIR_Op* op) { + break; + } + ++// LIR_OpCmpBranch; ++ case lir_cmp_branch: // may have info, input and result register always invalid ++ case lir_cmp_float_branch: // may have info, input and result register always invalid ++ { ++ assert(op->as_OpCmpBranch() != NULL, "must be"); ++ LIR_OpCmpBranch* opCmpBranch = (LIR_OpCmpBranch*)op; ++ assert(opCmpBranch->_tmp2->is_illegal() && opCmpBranch->_tmp3->is_illegal() && ++ opCmpBranch->_tmp4->is_illegal() && opCmpBranch->_tmp5->is_illegal(), "not used"); ++ ++ if (opCmpBranch->_info) do_info(opCmpBranch->_info); ++ if (opCmpBranch->_opr1->is_valid()) do_input(opCmpBranch->_opr1); ++ if (opCmpBranch->_opr2->is_valid()) do_input(opCmpBranch->_opr2); ++ if (opCmpBranch->_tmp1->is_valid()) do_temp(opCmpBranch->_tmp1); ++ if (opCmpBranch->_stub != NULL) opCmpBranch->stub()->visit(this); ++ assert(opCmpBranch->_result->is_illegal(), "not used"); ++ ++ break; ++ } ++ + // special handling for cmove: right input operand must not be equal + // to the result operand, otherwise the backend fails + case lir_cmove: +@@ -806,6 +904,29 @@ void LIR_OpVisitState::visit(LIR_Op* op) { + break; + } + ++// LIR_Op4 ++ // special handling for cmp cmove: src2(opr4) operand must not be equal ++ // to the result operand, otherwise the backend fails ++ case lir_cmp_cmove: ++ { ++ assert(op->as_Op4() != NULL, "must be"); ++ LIR_Op4* op4 = (LIR_Op4*)op; ++ ++ assert(op4->_info == NULL, "not used"); ++ assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && ++ op4->_opr3->is_valid() && op4->_opr4->is_valid() && ++ op4->_result->is_valid(), "used"); ++ ++ do_input(op4->_opr1); ++ do_input(op4->_opr2); ++ do_input(op4->_opr3); ++ do_input(op4->_opr4); ++ do_temp(op4->_opr4); ++ do_output(op4->_result); ++ ++ break; ++ } ++ + + // LIR_OpJavaCall + case lir_static_call: +@@ -1121,6 +1242,13 @@ void LIR_Op2::emit_code(LIR_Assembler* masm) { + masm->emit_op2(this); + } + ++void LIR_OpCmpBranch::emit_code(LIR_Assembler* masm) { ++ masm->emit_opCmpBranch(this); ++ if (stub()) { ++ masm->append_code_stub(stub()); ++ } ++} ++ + void LIR_OpAllocArray::emit_code(LIR_Assembler* masm) { + masm->emit_alloc_array(this); + masm->append_code_stub(stub()); +@@ -1141,6 +1269,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) { + masm->emit_op3(this); + } + ++void LIR_Op4::emit_code(LIR_Assembler* masm) { ++ masm->emit_op4(this); ++} ++ + void LIR_OpLock::emit_code(LIR_Assembler* masm) { + masm->emit_lock(this); + if (stub()) { +@@ -1381,7 +1513,6 @@ void LIR_List::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int + info)); + } + +- + void LIR_List::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info) { + append(new LIR_Op2( + lir_cmp, +@@ -1391,6 +1522,17 @@ void LIR_List::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* ad + info)); + } + ++void LIR_List::null_check(LIR_Opr opr, CodeEmitInfo* info, bool deoptimize_on_null) { ++ if (deoptimize_on_null) { ++ // Emit an explicit null check and deoptimize if opr is null ++ CodeStub* deopt = new DeoptimizeStub(info); ++ cmp_branch(lir_cond_equal, opr, LIR_OprFact::oopConst(NULL), T_OBJECT, deopt); ++ } else { ++ // Emit an implicit null check ++ append(new LIR_Op1(lir_null_check, opr, info)); ++ } ++} ++ + void LIR_List::allocate_object(LIR_Opr dst, LIR_Opr t1, LIR_Opr t2, LIR_Opr t3, LIR_Opr t4, + int header_size, int object_size, LIR_Opr klass, bool init_check, CodeStub* stub) { + append(new LIR_OpAllocObj( +@@ -1520,18 +1662,6 @@ void LIR_List::store_check(LIR_Opr object, LIR_Opr array, LIR_Opr tmp1, LIR_Opr + append(c); + } + +-void LIR_List::null_check(LIR_Opr opr, CodeEmitInfo* info, bool deoptimize_on_null) { +- if (deoptimize_on_null) { +- // Emit an explicit null check and deoptimize if opr is null +- CodeStub* deopt = new DeoptimizeStub(info); +- cmp(lir_cond_equal, opr, LIR_OprFact::oopConst(NULL)); +- branch(lir_cond_equal, T_OBJECT, deopt); +- } else { +- // Emit an implicit null check +- append(new LIR_Op1(lir_null_check, opr, info)); +- } +-} +- + void LIR_List::cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, + LIR_Opr t1, LIR_Opr t2, LIR_Opr result) { + append(new LIR_OpCompareAndSwap(lir_cas_long, addr, cmp_value, new_value, t1, t2, result)); +@@ -1780,6 +1910,8 @@ const char * LIR_Op::name() const { + case lir_cmp_l2i: s = "cmp_l2i"; break; + case lir_ucmp_fd2i: s = "ucomp_fd2i"; break; + case lir_cmp_fd2i: s = "comp_fd2i"; break; ++ case lir_cmp_branch: s = "cmp_branch"; break; ++ case lir_cmp_float_branch: s = "cmp_fbranch"; break; + case lir_cmove: s = "cmove"; break; + case lir_add: s = "add"; break; + case lir_sub: s = "sub"; break; +@@ -1809,6 +1941,8 @@ const char * LIR_Op::name() const { + // LIR_Op3 + case lir_idiv: s = "idiv"; break; + case lir_irem: s = "irem"; break; ++ // LIR_Op4 ++ case lir_cmp_cmove: s = "cmp_cmove"; break; + // LIR_OpJavaCall + case lir_static_call: s = "static"; break; + case lir_optvirtual_call: s = "optvirtual"; break; +@@ -1960,6 +2094,26 @@ void LIR_OpBranch::print_instr(outputStream* out) const { + } + } + ++// LIR_OpCmpBranch ++void LIR_OpCmpBranch::print_instr(outputStream* out) const { ++ print_condition(out, condition()); out->print(" "); ++ in_opr1()->print(out); out->print(" "); ++ in_opr2()->print(out); out->print(" "); ++ if (block() != NULL) { ++ out->print("[B%d] ", block()->block_id()); ++ } else if (stub() != NULL) { ++ out->print("["); ++ stub()->print_name(out); ++ out->print(": " INTPTR_FORMAT "]", p2i(stub())); ++ if (stub()->info() != NULL) out->print(" [bci:%d]", stub()->info()->stack()->bci()); ++ } else { ++ out->print("[label:" INTPTR_FORMAT "] ", p2i(label())); ++ } ++ if (ublock() != NULL) { ++ out->print("unordered: [B%d] ", ublock()->block_id()); ++ } ++} ++ + void LIR_Op::print_condition(outputStream* out, LIR_Condition cond) { + switch(cond) { + case lir_cond_equal: out->print("[EQ]"); break; +@@ -1980,12 +2134,9 @@ void LIR_OpConvert::print_instr(outputStream* out) const { + print_bytecode(out, bytecode()); + in_opr()->print(out); out->print(" "); + result_opr()->print(out); out->print(" "); +-#if defined(PPC) || defined(AARCH64) +- if(tmp1()->is_valid()) { +- tmp1()->print(out); out->print(" "); +- tmp2()->print(out); out->print(" "); ++ if(tmp()->is_valid()) { ++ tmp()->print(out); out->print(" "); + } +-#endif + } + + void LIR_OpConvert::print_bytecode(outputStream* out, Bytecodes::Code code) { +@@ -2031,9 +2182,6 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const { + + // LIR_Op2 + void LIR_Op2::print_instr(outputStream* out) const { +- if (code() == lir_cmove) { +- print_condition(out, condition()); out->print(" "); +- } + in_opr1()->print(out); out->print(" "); + in_opr2()->print(out); out->print(" "); + if (tmp1_opr()->is_valid()) { tmp1_opr()->print(out); out->print(" "); } +@@ -2082,6 +2230,18 @@ void LIR_Op3::print_instr(outputStream* out) const { + result_opr()->print(out); + } + ++// LIR_Op4 ++void LIR_Op4::print_instr(outputStream* out) const { ++ if (code() == lir_cmp_cmove) { ++ print_condition(out, condition()); out->print(" "); ++ } ++ in_opr1()->print(out); out->print(" "); ++ in_opr2()->print(out); out->print(" "); ++ in_opr3()->print(out); out->print(" "); ++ in_opr4()->print(out); out->print(" "); ++ result_opr()->print(out); ++} ++ + + void LIR_OpLock::print_instr(outputStream* out) const { + hdr_opr()->print(out); out->print(" "); +@@ -2095,10 +2255,14 @@ void LIR_OpLock::print_instr(outputStream* out) const { + + #ifdef ASSERT + void LIR_OpAssert::print_instr(outputStream* out) const { ++ tty->print_cr("function LIR_OpAssert::print_instr unimplemented yet! "); ++ Unimplemented(); ++ /* + print_condition(out, condition()); out->print(" "); + in_opr1()->print(out); out->print(" "); + in_opr2()->print(out); out->print(", \""); + out->print("%s", msg()); out->print("\""); ++ */ + } + #endif + +diff --git a/hotspot/src/share/vm/c1/c1_LIR.hpp b/hotspot/src/share/vm/c1/c1_LIR.hpp +index 24b8620211..aec77afe1f 100644 +--- a/hotspot/src/share/vm/c1/c1_LIR.hpp ++++ b/hotspot/src/share/vm/c1/c1_LIR.hpp +@@ -22,6 +22,11 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2018, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ + #ifndef SHARE_VM_C1_C1_LIR_HPP + #define SHARE_VM_C1_C1_LIR_HPP + +@@ -452,7 +457,7 @@ class LIR_OprDesc: public CompilationResourceObj { + // for compatibility with RInfo + int fpu () const { return lo_reg_half(); } + #endif +-#if defined(SPARC) || defined(ARM) || defined(PPC) || defined(AARCH64) ++#if defined(SPARC) || defined(ARM) || defined(PPC) || defined(AARCH64) || defined(LOONGARCH) + FloatRegister as_float_reg () const; + FloatRegister as_double_reg () const; + #endif +@@ -542,7 +547,7 @@ class LIR_Address: public LIR_OprPtr { + , _type(type) + , _disp(0) { verify(); } + +-#if defined(X86) || defined(ARM) || defined(AARCH64) ++#if defined(X86) || defined(ARM) || defined(AARCH64) || defined(LOONGARCH) + LIR_Address(LIR_Opr base, LIR_Opr index, Scale scale, intx disp, BasicType type): + _base(base) + , _index(index) +@@ -658,7 +663,13 @@ class LIR_OprFact: public AllStatic { + LIR_OprDesc::double_type | + LIR_OprDesc::cpu_register | + LIR_OprDesc::double_size); } +-#endif // PPC ++#elif defined(LOONGARCH) ++ static LIR_Opr double_fpu(int reg) { return (LIR_Opr)(intptr_t)((reg << LIR_OprDesc::reg1_shift) | ++ (reg << LIR_OprDesc::reg2_shift) | ++ LIR_OprDesc::double_type | ++ LIR_OprDesc::fpu_register | ++ LIR_OprDesc::double_size); } ++#endif // LOONGARCH + + static LIR_Opr virtual_register(int index, BasicType type) { + LIR_Opr res; +@@ -872,9 +883,11 @@ class LIR_OpConvert; + class LIR_OpAllocObj; + class LIR_OpRoundFP; + class LIR_Op2; ++class LIR_OpCmpBranch; + class LIR_OpDelay; + class LIR_Op3; + class LIR_OpAllocArray; ++class LIR_Op4; + class LIR_OpCall; + class LIR_OpJavaCall; + class LIR_OpRTCall; +@@ -943,6 +956,8 @@ enum LIR_Code { + , lir_cmp_l2i + , lir_ucmp_fd2i + , lir_cmp_fd2i ++ , lir_cmp_branch ++ , lir_cmp_float_branch + , lir_cmove + , lir_add + , lir_sub +@@ -976,6 +991,9 @@ enum LIR_Code { + , lir_idiv + , lir_irem + , end_op3 ++ , begin_op4 ++ , lir_cmp_cmove ++ , end_op4 + , begin_opJavaCall + , lir_static_call + , lir_optvirtual_call +@@ -1139,12 +1157,14 @@ class LIR_Op: public CompilationResourceObj { + virtual LIR_OpAllocObj* as_OpAllocObj() { return NULL; } + virtual LIR_OpRoundFP* as_OpRoundFP() { return NULL; } + virtual LIR_OpBranch* as_OpBranch() { return NULL; } ++ virtual LIR_OpCmpBranch* as_OpCmpBranch() { return NULL; } + virtual LIR_OpRTCall* as_OpRTCall() { return NULL; } + virtual LIR_OpConvert* as_OpConvert() { return NULL; } + virtual LIR_Op0* as_Op0() { return NULL; } + virtual LIR_Op1* as_Op1() { return NULL; } + virtual LIR_Op2* as_Op2() { return NULL; } + virtual LIR_Op3* as_Op3() { return NULL; } ++ virtual LIR_Op4* as_Op4() { return NULL; } + virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; } + virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; } + virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; } +@@ -1474,37 +1494,18 @@ class LIR_OpConvert: public LIR_Op1 { + private: + Bytecodes::Code _bytecode; + ConversionStub* _stub; +-#if defined(PPC) || defined(AARCH64) +- LIR_Opr _tmp1; +- LIR_Opr _tmp2; +-#endif ++ LIR_Opr _tmp; + + public: +- LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub) ++ LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub, LIR_Opr tmp) + : LIR_Op1(lir_convert, opr, result) + , _stub(stub) +-#ifdef PPC +- , _tmp1(LIR_OprDesc::illegalOpr()) +- , _tmp2(LIR_OprDesc::illegalOpr()) +-#endif ++ , _tmp(tmp) + , _bytecode(code) {} + +-#if defined(PPC) || defined(AARCH64) +- LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub +- ,LIR_Opr tmp1, LIR_Opr tmp2) +- : LIR_Op1(lir_convert, opr, result) +- , _stub(stub) +- , _tmp1(tmp1) +- , _tmp2(tmp2) +- , _bytecode(code) {} +-#endif +- + Bytecodes::Code bytecode() const { return _bytecode; } + ConversionStub* stub() const { return _stub; } +-#if defined(PPC) || defined(AARCH64) +- LIR_Opr tmp1() const { return _tmp1; } +- LIR_Opr tmp2() const { return _tmp2; } +-#endif ++ LIR_Opr tmp() const { return _tmp; } + + virtual void emit_code(LIR_Assembler* masm); + virtual LIR_OpConvert* as_OpConvert() { return this; } +@@ -1659,7 +1660,7 @@ class LIR_Op2: public LIR_Op { + , _tmp3(LIR_OprFact::illegalOpr) + , _tmp4(LIR_OprFact::illegalOpr) + , _tmp5(LIR_OprFact::illegalOpr) { +- assert(code == lir_cmp || code == lir_assert, "code check"); ++ assert(code == lir_cmp || code == lir_cmp_branch || code == lir_cmp_float_branch || code == lir_assert, "code check"); + } + + LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) +@@ -1691,7 +1692,7 @@ class LIR_Op2: public LIR_Op { + , _tmp3(LIR_OprFact::illegalOpr) + , _tmp4(LIR_OprFact::illegalOpr) + , _tmp5(LIR_OprFact::illegalOpr) { +- assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); ++ assert((code != lir_cmp && code != lir_cmp_branch && code != lir_cmp_float_branch) && is_in_range(code, begin_op2, end_op2), "code check"); + } + + LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr, +@@ -1707,7 +1708,7 @@ class LIR_Op2: public LIR_Op { + , _tmp3(tmp3) + , _tmp4(tmp4) + , _tmp5(tmp5) { +- assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); ++ assert((code != lir_cmp && code != lir_cmp_branch && code != lir_cmp_float_branch) && is_in_range(code, begin_op2, end_op2), "code check"); + } + + LIR_Opr in_opr1() const { return _opr1; } +@@ -1719,10 +1720,12 @@ class LIR_Op2: public LIR_Op { + LIR_Opr tmp4_opr() const { return _tmp4; } + LIR_Opr tmp5_opr() const { return _tmp5; } + LIR_Condition condition() const { +- assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition; ++ assert(code() == lir_cmp || code() == lir_cmp_branch || code() == lir_cmp_float_branch || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); ++ return _condition; + } + void set_condition(LIR_Condition condition) { +- assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove"); _condition = condition; ++ assert(code() == lir_cmp || code() == lir_cmp_branch || code() == lir_cmp_float_branch || code() == lir_cmove, "only valid for cmp and cmove"); ++ _condition = condition; + } + + void set_fpu_stack_size(int size) { _fpu_stack_size = size; } +@@ -1736,6 +1739,43 @@ class LIR_Op2: public LIR_Op { + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; + }; + ++class LIR_OpCmpBranch: public LIR_Op2 { ++ friend class LIR_OpVisitState; ++ ++ private: ++ Label* _label; ++ BlockBegin* _block; // if this is a branch to a block, this is the block ++ BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block ++ CodeStub* _stub; // if this is a branch to a stub, this is the stub ++ ++ public: ++ LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, Label* lbl, CodeEmitInfo* info = NULL) ++ : LIR_Op2(lir_cmp_branch, cond, left, right, info) ++ , _label(lbl) ++ , _block(NULL) ++ , _ublock(NULL) ++ , _stub(NULL) { } ++ ++ LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeStub* stub, CodeEmitInfo* info = NULL); ++ LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, CodeEmitInfo* info = NULL); ++ ++ // for unordered comparisons ++ LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, BlockBegin* ublock, CodeEmitInfo* info = NULL); ++ ++ Label* label() const { return _label; } ++ BlockBegin* block() const { return _block; } ++ BlockBegin* ublock() const { return _ublock; } ++ CodeStub* stub() const { return _stub; } ++ ++ void change_block(BlockBegin* b); ++ void change_ublock(BlockBegin* b); ++ void negate_cond(); ++ ++ virtual void emit_code(LIR_Assembler* masm); ++ virtual LIR_OpCmpBranch* as_OpCmpBranch() { return this; } ++ virtual void print_instr(outputStream* out) const PRODUCT_RETURN; ++}; ++ + class LIR_OpAllocArray : public LIR_Op { + friend class LIR_OpVisitState; + +@@ -1776,7 +1816,6 @@ class LIR_OpAllocArray : public LIR_Op { + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; + }; + +- + class LIR_Op3: public LIR_Op { + friend class LIR_OpVisitState; + +@@ -1800,6 +1839,48 @@ class LIR_Op3: public LIR_Op { + }; + + ++class LIR_Op4: public LIR_Op { ++ friend class LIR_OpVisitState; ++ ++ private: ++ LIR_Opr _opr1; ++ LIR_Opr _opr2; ++ LIR_Opr _opr3; ++ LIR_Opr _opr4; ++ BasicType _type; ++ LIR_Condition _condition; ++ ++ void verify() const; ++ ++ public: ++ LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4, LIR_Opr result, BasicType type) ++ : LIR_Op(code, result, NULL) ++ , _opr1(opr1) ++ , _opr2(opr2) ++ , _opr3(opr3) ++ , _opr4(opr4) ++ , _type(type) ++ , _condition(condition) { ++ assert(is_in_range(code, begin_op4, end_op4), "code check"); ++ assert(type != T_ILLEGAL, "cmove should have type"); ++ } ++ LIR_Opr in_opr1() const { return _opr1; } ++ LIR_Opr in_opr2() const { return _opr2; } ++ LIR_Opr in_opr3() const { return _opr3; } ++ LIR_Opr in_opr4() const { return _opr4; } ++ BasicType type() const { return _type; } ++ LIR_Condition condition() const { ++ assert(code() == lir_cmp_cmove, "only valid for cmp cmove"); return _condition; ++ } ++ void set_condition(LIR_Condition condition) { ++ assert(code() == lir_cmp_cmove, "only valid for cmp cmove"); _condition = condition; ++ } ++ ++ virtual void emit_code(LIR_Assembler* masm); ++ virtual LIR_Op4* as_Op4() { return this; } ++ virtual void print_instr(outputStream* out) const PRODUCT_RETURN; ++}; ++ + //-------------------------------- + class LabelObj: public CompilationResourceObj { + private: +@@ -2141,17 +2222,9 @@ class LIR_List: public CompilationResourceObj { + + void safepoint(LIR_Opr tmp, CodeEmitInfo* info) { append(new LIR_Op1(lir_safepoint, tmp, info)); } + +-#ifdef PPC +- void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_OpConvert(code, left, dst, NULL, tmp1, tmp2)); } +-#endif +-#if defined(AARCH64) +- void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, +- ConversionStub* stub = NULL, LIR_Opr tmp1 = LIR_OprDesc::illegalOpr()) { +- append(new LIR_OpConvert(code, left, dst, stub, tmp1, LIR_OprDesc::illegalOpr())); ++ void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL, LIR_Opr tmp = LIR_OprFact::illegalOpr) { ++ append(new LIR_OpConvert(code, left, dst, stub, tmp)); + } +-#else +- void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL/*, bool is_32bit = false*/) { append(new LIR_OpConvert(code, left, dst, stub)); } +-#endif + + void logical_and (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_and, left, right, dst)); } + void logical_or (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_or, left, right, dst)); } +@@ -2256,6 +2329,48 @@ class LIR_List: public CompilationResourceObj { + append(new LIR_OpBranch(cond, type, block, unordered)); + } + ++#if defined(X86) || defined(AARCH64) ++ ++ template ++ void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info = NULL) { ++ cmp(condition, left, right, info); ++ branch(condition, type, tgt); ++ } ++ ++ void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ cmp(condition, left, right); ++ branch(condition, type, block, unordered); ++ } ++ ++ void cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ cmp(condition, left, right); ++ cmove(condition, src1, src2, dst, type); ++ } ++ ++#endif ++ ++#ifdef LOONGARCH ++ ++ template ++ void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info = NULL) { ++ append(new LIR_OpCmpBranch(condition, left, right, tgt, info)); ++ } ++ ++ void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ append(new LIR_OpCmpBranch(condition, left, right, block, unordered)); ++ } ++ ++ void cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ append(new LIR_Op4(lir_cmp_cmove, condition, left, right, src1, src2, dst, type)); ++ } ++ ++#endif ++ ++ template ++ void cmp_branch(LIR_Condition condition, LIR_Opr left, int right, BasicType type, T tgt, CodeEmitInfo* info = NULL) { ++ cmp_branch(condition, left, LIR_OprFact::intConst(right), type, tgt, info); ++ } ++ + void shift_left(LIR_Opr value, LIR_Opr count, LIR_Opr dst, LIR_Opr tmp); + void shift_right(LIR_Opr value, LIR_Opr count, LIR_Opr dst, LIR_Opr tmp); + void unsigned_shift_right(LIR_Opr value, LIR_Opr count, LIR_Opr dst, LIR_Opr tmp); +diff --git a/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp b/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp +index e5cd19f17a..a18c53008b 100644 +--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp ++++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "c1/c1_Compilation.hpp" + #include "c1/c1_Instruction.hpp" +@@ -34,6 +40,10 @@ + # include "nativeInst_x86.hpp" + # include "vmreg_x86.inline.hpp" + #endif ++#ifdef TARGET_ARCH_loongarch ++# include "nativeInst_loongarch.hpp" ++# include "vmreg_loongarch.inline.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "nativeInst_aarch64.hpp" + # include "vmreg_aarch64.inline.hpp" +@@ -811,6 +821,18 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { + } + + ++void LIR_Assembler::emit_op4(LIR_Op4* op) { ++ switch (op->code()) { ++ case lir_cmp_cmove: ++ cmp_cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->in_opr3(), op->in_opr4(), op->result_opr(), op->type()); ++ break; ++ ++ default: ++ Unimplemented(); ++ break; ++ } ++} ++ + void LIR_Assembler::build_frame() { + _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); + } +diff --git a/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp b/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp +index 1a68d458d2..ac0f4e7a46 100644 +--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp ++++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_C1_C1_LIRASSEMBLER_HPP + #define SHARE_VM_C1_C1_LIRASSEMBLER_HPP + +@@ -195,7 +201,9 @@ class LIR_Assembler: public CompilationResourceObj { + void emit_op1(LIR_Op1* op); + void emit_op2(LIR_Op2* op); + void emit_op3(LIR_Op3* op); ++ void emit_op4(LIR_Op4* op); + void emit_opBranch(LIR_OpBranch* op); ++ void emit_opCmpBranch(LIR_OpCmpBranch* op); + void emit_opLabel(LIR_OpLabel* op); + void emit_arraycopy(LIR_OpArrayCopy* op); + void emit_updatecrc32(LIR_OpUpdateCRC32* op); +@@ -227,6 +235,7 @@ class LIR_Assembler: public CompilationResourceObj { + void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); + void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); // info set for null exceptions + void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op); ++ void cmp_cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type); + void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type); + + void call( LIR_OpJavaCall* op, relocInfo::relocType rtype); +@@ -265,6 +274,9 @@ class LIR_Assembler: public CompilationResourceObj { + #ifdef TARGET_ARCH_x86 + # include "c1_LIRAssembler_x86.hpp" + #endif ++#ifdef TARGET_ARCH_loongarch ++# include "c1_LIRAssembler_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "c1_LIRAssembler_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp +index 837553ddb6..c66f3102b9 100644 +--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp ++++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "c1/c1_Defs.hpp" + #include "c1/c1_Compilation.hpp" +@@ -482,13 +488,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index, + CodeEmitInfo* null_check_info, CodeEmitInfo* range_check_info) { + CodeStub* stub = new RangeCheckStub(range_check_info, index); + if (index->is_constant()) { +- cmp_mem_int(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(), +- index->as_jint(), null_check_info); +- __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch ++ cmp_mem_int_branch(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(), ++ index->as_jint(), stub, null_check_info); // forward branch + } else { +- cmp_reg_mem(lir_cond_aboveEqual, index, array, +- arrayOopDesc::length_offset_in_bytes(), T_INT, null_check_info); +- __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch ++ cmp_reg_mem_branch(lir_cond_aboveEqual, index, array, arrayOopDesc::length_offset_in_bytes(), ++ T_INT, stub, null_check_info); // forward branch + } + } + +@@ -496,12 +500,10 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index, + void LIRGenerator::nio_range_check(LIR_Opr buffer, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) { + CodeStub* stub = new RangeCheckStub(info, index, true); + if (index->is_constant()) { +- cmp_mem_int(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), index->as_jint(), info); +- __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch ++ cmp_mem_int_branch(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), index->as_jint(), stub, info); // forward branch + } else { +- cmp_reg_mem(lir_cond_aboveEqual, index, buffer, +- java_nio_Buffer::limit_offset(), T_INT, info); +- __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch ++ cmp_reg_mem_branch(lir_cond_aboveEqual, index, buffer, ++ java_nio_Buffer::limit_offset(), T_INT, stub, info); // forward branch + } + __ move(index, result); + } +@@ -934,7 +936,7 @@ LIR_Opr LIRGenerator::force_to_spill(LIR_Opr value, BasicType t) { + return tmp; + } + +-void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { ++void LIRGenerator::profile_branch(If* if_instr, If::Condition cond, LIR_Opr left, LIR_Opr right) { + if (if_instr->should_profile()) { + ciMethod* method = if_instr->profiled_method(); + assert(method != NULL, "method should be set if branch is profiled"); +@@ -955,10 +957,17 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { + __ metadata2reg(md->constant_encoding(), md_reg); + + LIR_Opr data_offset_reg = new_pointer_register(); +- __ cmove(lir_cond(cond), +- LIR_OprFact::intptrConst(taken_count_offset), +- LIR_OprFact::intptrConst(not_taken_count_offset), +- data_offset_reg, as_BasicType(if_instr->x()->type())); ++ if (left == LIR_OprFact::illegalOpr && right == LIR_OprFact::illegalOpr) { ++ __ cmove(lir_cond(cond), ++ LIR_OprFact::intptrConst(taken_count_offset), ++ LIR_OprFact::intptrConst(not_taken_count_offset), ++ data_offset_reg, as_BasicType(if_instr->x()->type())); ++ } else { ++ __ cmp_cmove(lir_cond(cond), left, right, ++ LIR_OprFact::intptrConst(taken_count_offset), ++ LIR_OprFact::intptrConst(not_taken_count_offset), ++ data_offset_reg, as_BasicType(if_instr->x()->type())); ++ } + + // MDO cells are intptr_t, so the data_reg width is arch-dependent. + LIR_Opr data_reg = new_pointer_register(); +@@ -1305,8 +1314,8 @@ void LIRGenerator::do_isPrimitive(Intrinsic* x) { + } + + __ move(new LIR_Address(rcvr.result(), java_lang_Class::klass_offset_in_bytes(), T_ADDRESS), temp, info); +- __ cmp(lir_cond_notEqual, temp, LIR_OprFact::metadataConst(0)); +- __ cmove(lir_cond_notEqual, LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN); ++ __ cmp_cmove(lir_cond_notEqual, temp, LIR_OprFact::metadataConst(0), ++ LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN); + } + + // Example: Thread.currentThread() +@@ -1499,7 +1508,6 @@ void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, LIR_Opr p + // Read the marking-in-progress flag. + LIR_Opr flag_val = new_register(T_INT); + __ load(mark_active_flag_addr, flag_val); +- __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0)); + + LIR_PatchCode pre_val_patch_code = lir_patch_none; + +@@ -1528,7 +1536,7 @@ void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, LIR_Opr p + slow = new G1PreBarrierStub(pre_val); + } + +- __ branch(lir_cond_notEqual, T_INT, slow); ++ __ cmp_branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow); + __ branch_destination(slow->continuation()); + } + +@@ -1586,10 +1594,8 @@ void LIRGenerator::G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_Opr + } + assert(new_val->is_register(), "must be a register at this point"); + +- __ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD)); +- + CodeStub* slow = new G1PostBarrierStub(addr, new_val); +- __ branch(lir_cond_notEqual, LP64_ONLY(T_LONG) NOT_LP64(T_INT), slow); ++ __ cmp_branch(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD), T_INT, slow); + __ branch_destination(slow->continuation()); + } + +@@ -1859,12 +1865,10 @@ void LIRGenerator::do_NIOCheckIndex(Intrinsic* x) { + CodeEmitInfo* info = state_for(x); + CodeStub* stub = new RangeCheckStub(info, index.result(), true); + if (index.result()->is_constant()) { +- cmp_mem_int(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), info); +- __ branch(lir_cond_belowEqual, T_INT, stub); ++ cmp_mem_int_branch(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), stub, info); + } else { +- cmp_reg_mem(lir_cond_aboveEqual, index.result(), buf.result(), +- java_nio_Buffer::limit_offset(), T_INT, info); +- __ branch(lir_cond_aboveEqual, T_INT, stub); ++ cmp_reg_mem_branch(lir_cond_aboveEqual, index.result(), buf.result(), ++ java_nio_Buffer::limit_offset(), T_INT, stub, info); + } + __ move(index.result(), result); + } else { +@@ -1945,8 +1949,8 @@ void LIRGenerator::do_LoadIndexed(LoadIndexed* x) { + } else if (use_length) { + // TODO: use a (modified) version of array_range_check that does not require a + // constant length to be loaded to a register +- __ cmp(lir_cond_belowEqual, length.result(), index.result()); +- __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result())); ++ CodeStub* stub = new RangeCheckStub(range_check_info, index.result()); ++ __ cmp_branch(lir_cond_belowEqual, length.result(), index.result(), T_INT, stub); + } else { + array_range_check(array.result(), index.result(), null_check_info, range_check_info); + // The range check performs the null check, so clear it out for the load +@@ -2128,7 +2132,7 @@ void LIRGenerator::do_UnsafeGetRaw(UnsafeGetRaw* x) { + assert(index_op->type() == T_INT, "only int constants supported"); + addr = new LIR_Address(base_op, index_op->as_jint(), dst_type); + } else { +-#if defined(X86) || defined(AARCH64) ++#if defined(X86) || defined(AARCH64) || defined(LOONGARCH) + addr = new LIR_Address(base_op, index_op, LIR_Address::Scale(log2_scale), 0, dst_type); + #elif defined(GENERATE_ADDRESS_IS_PREFERRED) + addr = generate_address(base_op, index_op, log2_scale, 0, dst_type); +@@ -2343,19 +2347,18 @@ void LIRGenerator::do_UnsafeGetObject(UnsafeGetObject* x) { + + if (off.type()->is_int()) { + referent_off = LIR_OprFact::intConst(java_lang_ref_Reference::referent_offset); ++ __ cmp_branch(lir_cond_notEqual, off.result(), referent_off, T_INT, Lcont->label()); + } else { + assert(off.type()->is_long(), "what else?"); + referent_off = new_register(T_LONG); + __ move(LIR_OprFact::longConst(java_lang_ref_Reference::referent_offset), referent_off); ++ __ cmp_branch(lir_cond_notEqual, off.result(), referent_off, T_LONG, Lcont->label()); + } +- __ cmp(lir_cond_notEqual, off.result(), referent_off); +- __ branch(lir_cond_notEqual, as_BasicType(off.type()), Lcont->label()); + } + if (gen_source_check) { + // offset is a const and equals referent offset + // if (source == null) -> continue +- __ cmp(lir_cond_equal, src_reg, LIR_OprFact::oopConst(NULL)); +- __ branch(lir_cond_equal, T_OBJECT, Lcont->label()); ++ __ cmp_branch(lir_cond_equal, src_reg, LIR_OprFact::oopConst(NULL), T_OBJECT, Lcont->label()); + } + LIR_Opr src_klass = new_register(T_METADATA); + if (gen_type_check) { +@@ -2365,8 +2368,7 @@ void LIRGenerator::do_UnsafeGetObject(UnsafeGetObject* x) { + LIR_Address* reference_type_addr = new LIR_Address(src_klass, in_bytes(InstanceKlass::reference_type_offset()), T_BYTE); + LIR_Opr reference_type = new_register(T_INT); + __ move(reference_type_addr, reference_type); +- __ cmp(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE)); +- __ branch(lir_cond_equal, T_INT, Lcont->label()); ++ __ cmp_branch(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE), T_INT, Lcont->label()); + } + { + // We have determined that src->_klass->_reference_type != REF_NONE +@@ -2446,19 +2448,14 @@ void LIRGenerator::do_SwitchRanges(SwitchRangeArray* x, LIR_Opr value, BlockBegi + int high_key = one_range->high_key(); + BlockBegin* dest = one_range->sux(); + if (low_key == high_key) { +- __ cmp(lir_cond_equal, value, low_key); +- __ branch(lir_cond_equal, T_INT, dest); ++ __ cmp_branch(lir_cond_equal, value, low_key, T_INT, dest); + } else if (high_key - low_key == 1) { +- __ cmp(lir_cond_equal, value, low_key); +- __ branch(lir_cond_equal, T_INT, dest); +- __ cmp(lir_cond_equal, value, high_key); +- __ branch(lir_cond_equal, T_INT, dest); ++ __ cmp_branch(lir_cond_equal, value, low_key, T_INT, dest); ++ __ cmp_branch(lir_cond_equal, value, high_key, T_INT, dest); + } else { + LabelObj* L = new LabelObj(); +- __ cmp(lir_cond_less, value, low_key); +- __ branch(lir_cond_less, T_INT, L->label()); +- __ cmp(lir_cond_lessEqual, value, high_key); +- __ branch(lir_cond_lessEqual, T_INT, dest); ++ __ cmp_branch(lir_cond_less, value, low_key, T_INT, L->label()); ++ __ cmp_branch(lir_cond_lessEqual, value, high_key, T_INT, dest); + __ branch_destination(L->label()); + } + } +@@ -2545,8 +2542,7 @@ void LIRGenerator::do_TableSwitch(TableSwitch* x) { + do_SwitchRanges(create_lookup_ranges(x), value, x->default_sux()); + } else { + for (int i = 0; i < len; i++) { +- __ cmp(lir_cond_equal, value, i + lo_key); +- __ branch(lir_cond_equal, T_INT, x->sux_at(i)); ++ __ cmp_branch(lir_cond_equal, value, i + lo_key, T_INT, x->sux_at(i)); + } + __ jump(x->default_sux()); + } +@@ -2571,8 +2567,7 @@ void LIRGenerator::do_LookupSwitch(LookupSwitch* x) { + } else { + int len = x->length(); + for (int i = 0; i < len; i++) { +- __ cmp(lir_cond_equal, value, x->key_at(i)); +- __ branch(lir_cond_equal, T_INT, x->sux_at(i)); ++ __ cmp_branch(lir_cond_equal, value, x->key_at(i), T_INT, x->sux_at(i)); + } + __ jump(x->default_sux()); + } +@@ -2624,7 +2619,6 @@ void LIRGenerator::do_Goto(Goto* x) { + } + LIR_Opr md_reg = new_register(T_METADATA); + __ metadata2reg(md->constant_encoding(), md_reg); +- + increment_counter(new LIR_Address(md_reg, offset, + NOT_LP64(T_INT) LP64_ONLY(T_LONG)), DataLayout::counter_increment); + } +@@ -3078,8 +3072,8 @@ void LIRGenerator::do_IfOp(IfOp* x) { + f_val.dont_load_item(); + LIR_Opr reg = rlock_result(x); + +- __ cmp(lir_cond(x->cond()), left.result(), right.result()); +- __ cmove(lir_cond(x->cond()), t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type())); ++ __ cmp_cmove(lir_cond(x->cond()), left.result(), right.result(), ++ t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type())); + } + + #ifdef JFR_HAVE_INTRINSICS +@@ -3119,8 +3113,7 @@ void LIRGenerator::do_getEventWriter(Intrinsic* x) { + T_OBJECT); + LIR_Opr result = rlock_result(x); + __ move_wide(jobj_addr, result); +- __ cmp(lir_cond_equal, result, LIR_OprFact::oopConst(NULL)); +- __ branch(lir_cond_equal, T_OBJECT, L_end->label()); ++ __ cmp_branch(lir_cond_equal, result, LIR_OprFact::oopConst(0), T_OBJECT, L_end->label()); + __ move_wide(new LIR_Address(result, T_OBJECT), result); + + __ branch_destination(L_end->label()); +@@ -3484,10 +3477,9 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, + LIR_Opr meth = new_register(T_METADATA); + __ metadata2reg(method->constant_encoding(), meth); + __ logical_and(result, mask, result); +- __ cmp(lir_cond_equal, result, LIR_OprFact::intConst(0)); + // The bci for info can point to cmp for if's we want the if bci + CodeStub* overflow = new CounterOverflowStub(info, bci, meth); +- __ branch(lir_cond_equal, T_INT, overflow); ++ __ cmp_branch(lir_cond_equal, result, LIR_OprFact::intConst(0), T_INT, overflow); + __ branch_destination(overflow->continuation()); + } + } +@@ -3599,8 +3591,7 @@ void LIRGenerator::do_RangeCheckPredicate(RangeCheckPredicate *x) { + CodeEmitInfo *info = state_for(x, x->state()); + CodeStub* stub = new PredicateFailedStub(info); + +- __ cmp(lir_cond(cond), left, right); +- __ branch(lir_cond(cond), right->type(), stub); ++ __ cmp_branch(lir_cond(cond), left, right, right->type(), stub); + } + } + +@@ -3748,8 +3739,7 @@ LIR_Opr LIRGenerator::maybe_mask_boolean(StoreIndexed* x, LIR_Opr array, LIR_Opr + __ move(new LIR_Address(klass, in_bytes(Klass::layout_helper_offset()), T_INT), layout); + int diffbit = Klass::layout_helper_boolean_diffbit(); + __ logical_and(layout, LIR_OprFact::intConst(diffbit), layout); +- __ cmp(lir_cond_notEqual, layout, LIR_OprFact::intConst(0)); +- __ cmove(lir_cond_notEqual, value_fixed, value, value_fixed, T_BYTE); ++ __ cmp_cmove(lir_cond_notEqual, layout, LIR_OprFact::intConst(0), value_fixed, value, value_fixed, T_BYTE); + value = value_fixed; + } + return value; +diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp +index 27be79fee1..57c253db69 100644 +--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp ++++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2015. These ++ * modifications are Copyright (c) 2015 Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_C1_C1_LIRGENERATOR_HPP + #define SHARE_VM_C1_C1_LIRGENERATOR_HPP + +@@ -246,6 +252,9 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { + void do_getClass(Intrinsic* x); + void do_currentThread(Intrinsic* x); + void do_MathIntrinsic(Intrinsic* x); ++#if defined(LOONGARCH64) ++ void do_LibmIntrinsic(Intrinsic* x); ++#endif + void do_ArrayCopy(Intrinsic* x); + void do_CompareAndSwap(Intrinsic* x, ValueType* type); + void do_NIOCheckIndex(Intrinsic* x); +@@ -335,8 +344,10 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { + void new_instance (LIR_Opr dst, ciInstanceKlass* klass, bool is_unresolved, LIR_Opr scratch1, LIR_Opr scratch2, LIR_Opr scratch3, LIR_Opr scratch4, LIR_Opr klass_reg, CodeEmitInfo* info); + + // machine dependent +- void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info); +- void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info); ++ template ++ void cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info); ++ template ++ void cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info); + void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, LIR_Opr disp, BasicType type, CodeEmitInfo* info); + + void arraycopy_helper(Intrinsic* x, int* flags, ciArrayKlass** expected_type); +@@ -364,7 +375,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { + + LIR_Opr safepoint_poll_register(); + +- void profile_branch(If* if_instr, If::Condition cond); ++ void profile_branch(If* if_instr, If::Condition cond, LIR_Opr left = LIR_OprFact::illegalOpr, LIR_Opr right = LIR_OprFact::illegalOpr); + void increment_event_counter_impl(CodeEmitInfo* info, + ciMethod *method, int frequency, + int bci, bool backedge, bool notify); +diff --git a/hotspot/src/share/vm/c1/c1_LinearScan.cpp b/hotspot/src/share/vm/c1/c1_LinearScan.cpp +index 1f6281bf25..4549ff0928 100644 +--- a/hotspot/src/share/vm/c1/c1_LinearScan.cpp ++++ b/hotspot/src/share/vm/c1/c1_LinearScan.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "c1/c1_CFGPrinter.hpp" + #include "c1/c1_CodeStubs.hpp" +@@ -35,6 +41,9 @@ + #ifdef TARGET_ARCH_x86 + # include "vmreg_x86.inline.hpp" + #endif ++#ifdef TARGET_ARCH_loongarch ++# include "vmreg_loongarch.inline.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "vmreg_aarch64.inline.hpp" + #endif +@@ -1256,6 +1265,23 @@ void LinearScan::add_register_hints(LIR_Op* op) { + LIR_Opr move_from = cmove->in_opr1(); + LIR_Opr move_to = cmove->result_opr(); + ++ if (move_to->is_register() && move_from->is_register()) { ++ Interval* from = interval_at(reg_num(move_from)); ++ Interval* to = interval_at(reg_num(move_to)); ++ if (from != NULL && to != NULL) { ++ to->set_register_hint(from); ++ TRACE_LINEAR_SCAN(4, tty->print_cr("operation at op_id %d: added hint from interval %d to %d", cmove->id(), from->reg_num(), to->reg_num())); ++ } ++ } ++ break; ++ } ++ case lir_cmp_cmove: { ++ assert(op->as_Op4() != NULL, "lir_cmp_cmove must be LIR_Op4"); ++ LIR_Op4* cmove = (LIR_Op4*)op; ++ ++ LIR_Opr move_from = cmove->in_opr3(); ++ LIR_Opr move_to = cmove->result_opr(); ++ + if (move_to->is_register() && move_from->is_register()) { + Interval* from = interval_at(reg_num(move_from)); + Interval* to = interval_at(reg_num(move_to)); +@@ -2104,7 +2130,7 @@ LIR_Opr LinearScan::calc_operand_for_interval(const Interval* interval) { + #ifdef _LP64 + return LIR_OprFact::double_cpu(assigned_reg, assigned_reg); + #else +-#if defined(SPARC) || defined(PPC) ++#if defined(SPARC) || defined(PPC) || defined(LOONGARCH) + return LIR_OprFact::double_cpu(assigned_regHi, assigned_reg); + #else + return LIR_OprFact::double_cpu(assigned_reg, assigned_regHi); +@@ -3285,7 +3311,9 @@ void LinearScan::verify_no_oops_in_fixed_intervals() { + check_live = (move->patch_code() == lir_patch_none); + } + LIR_OpBranch* branch = op->as_OpBranch(); +- if (branch != NULL && branch->stub() != NULL && branch->stub()->is_exception_throw_stub()) { ++ LIR_OpCmpBranch* cmp_branch = op->as_OpCmpBranch(); ++ if ((branch != NULL && branch->stub() != NULL && branch->stub()->is_exception_throw_stub()) || ++ (cmp_branch != NULL && cmp_branch->stub() != NULL && cmp_branch->stub()->is_exception_throw_stub())) { + // Don't bother checking the stub in this case since the + // exception stub will never return to normal control flow. + check_live = false; +@@ -6142,6 +6170,16 @@ void ControlFlowOptimizer::substitute_branch_target(BlockBegin* block, BlockBegi + assert(op->as_OpBranch() != NULL, "branch must be of type LIR_OpBranch"); + LIR_OpBranch* branch = (LIR_OpBranch*)op; + ++ if (branch->block() == target_from) { ++ branch->change_block(target_to); ++ } ++ if (branch->ublock() == target_from) { ++ branch->change_ublock(target_to); ++ } ++ } else if (op->code() == lir_cmp_branch || op->code() == lir_cmp_float_branch) { ++ assert(op->as_OpCmpBranch() != NULL, "branch must be of type LIR_OpCmpBranch"); ++ LIR_OpCmpBranch* branch = (LIR_OpCmpBranch*)op; ++ + if (branch->block() == target_from) { + branch->change_block(target_to); + } +@@ -6252,6 +6290,20 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) { + instructions->truncate(instructions->length() - 1); + } + } ++ } else if (prev_op->code() == lir_cmp_branch || prev_op->code() == lir_cmp_float_branch) { ++ assert(prev_op->as_OpCmpBranch() != NULL, "branch must be of type LIR_OpCmpBranch"); ++ LIR_OpCmpBranch* prev_branch = (LIR_OpCmpBranch*)prev_op; ++ ++ if (prev_branch->stub() == NULL) { ++ if (prev_branch->block() == code->at(i + 1) && prev_branch->info() == NULL) { ++ TRACE_LINEAR_SCAN(3, tty->print_cr("Negating conditional branch and deleting unconditional branch at end of block B%d", block->block_id())); ++ ++ // eliminate a conditional branch to the immediate successor ++ prev_branch->change_block(last_branch->block()); ++ prev_branch->negate_cond(); ++ instructions->trunc_to(instructions->length() - 1); ++ } ++ } + } + } + } +@@ -6328,6 +6380,13 @@ void ControlFlowOptimizer::verify(BlockList* code) { + assert(op_branch->block() == NULL || code->index_of(op_branch->block()) != -1, "branch target not valid"); + assert(op_branch->ublock() == NULL || code->index_of(op_branch->ublock()) != -1, "branch target not valid"); + } ++ ++ LIR_OpCmpBranch* op_cmp_branch = instructions->at(j)->as_OpCmpBranch(); ++ ++ if (op_cmp_branch != NULL) { ++ assert(op_cmp_branch->block() == NULL || code->find(op_cmp_branch->block()) != -1, "branch target not valid"); ++ assert(op_cmp_branch->ublock() == NULL || code->find(op_cmp_branch->ublock()) != -1, "branch target not valid"); ++ } + } + + for (j = 0; j < block->number_of_sux() - 1; j++) { +@@ -6571,6 +6630,24 @@ void LinearScanStatistic::collect(LinearScan* allocator) { + break; + } + ++ case lir_cmp_branch: ++ case lir_cmp_float_branch: { ++ LIR_OpCmpBranch* branch = op->as_OpCmpBranch(); ++ if (branch->block() == NULL) { ++ inc_counter(counter_stub_branch); ++ } else { ++ inc_counter(counter_cond_branch); ++ } ++ inc_counter(counter_cmp); ++ break; ++ } ++ ++ case lir_cmp_cmove: { ++ inc_counter(counter_misc_inst); ++ inc_counter(counter_cmp); ++ break; ++ } ++ + case lir_neg: + case lir_add: + case lir_sub: +diff --git a/hotspot/src/share/vm/c1/c1_LinearScan.hpp b/hotspot/src/share/vm/c1/c1_LinearScan.hpp +index 96e6b3babf..576a07d73d 100644 +--- a/hotspot/src/share/vm/c1/c1_LinearScan.hpp ++++ b/hotspot/src/share/vm/c1/c1_LinearScan.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_C1_C1_LINEARSCAN_HPP + #define SHARE_VM_C1_C1_LINEARSCAN_HPP + +@@ -976,6 +982,9 @@ class LinearScanTimers : public StackObj { + #ifdef TARGET_ARCH_x86 + # include "c1_LinearScan_x86.hpp" + #endif ++#ifdef TARGET_ARCH_loongarch ++# include "c1_LinearScan_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "c1_LinearScan_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp b/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp +index 7e22bbaa27..12aca7bf50 100644 +--- a/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp ++++ b/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_C1_C1_MACROASSEMBLER_HPP + #define SHARE_VM_C1_C1_MACROASSEMBLER_HPP + +@@ -50,6 +56,9 @@ class C1_MacroAssembler: public MacroAssembler { + #ifdef TARGET_ARCH_x86 + # include "c1_MacroAssembler_x86.hpp" + #endif ++#ifdef TARGET_ARCH_loongarch ++# include "c1_MacroAssembler_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "c1_MacroAssembler_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/c1/c1_Runtime1.cpp b/hotspot/src/share/vm/c1/c1_Runtime1.cpp +index aebc377527..f1253506f6 100644 +--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp ++++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "asm/codeBuffer.hpp" + #include "c1/c1_CodeStubs.hpp" +@@ -710,6 +716,7 @@ JRT_ENTRY(void, Runtime1::deoptimize(JavaThread* thread)) + // Return to the now deoptimized frame. + JRT_END + ++#ifndef LOONGARCH + + static Klass* resolve_field_return_klass(methodHandle caller, int bci, TRAPS) { + Bytecode_field field_access(caller, bci); +@@ -1186,6 +1193,47 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* thread, Runtime1::StubID stub_i + } + JRT_END + ++#else ++ ++JRT_ENTRY(void, Runtime1::patch_code(JavaThread* thread, Runtime1::StubID stub_id )) ++{ ++ RegisterMap reg_map(thread, false); ++ ++ NOT_PRODUCT(_patch_code_slowcase_cnt++;) ++ // According to the LoongArch, "Concurrent modification and ++ // execution of instructions can lead to the resulting instruction ++ // performing any behavior that can be achieved by executing any ++ // sequence of instructions that can be executed from the same ++ // Exception level, except where the instruction before ++ // modification and the instruction after modification is a B, BL, ++ // NOP, BRK instruction." ++ // ++ // This effectively makes the games we play when patching ++ // impossible, so when we come across an access that needs ++ // patching we must deoptimize. ++ ++ if (TracePatching) { ++ tty->print_cr("Deoptimizing because patch is needed"); ++ } ++ ++ frame runtime_frame = thread->last_frame(); ++ frame caller_frame = runtime_frame.sender(®_map); ++ ++ // It's possible the nmethod was invalidated in the last ++ // safepoint, but if it's still alive then make it not_entrant. ++ nmethod* nm = CodeCache::find_nmethod(caller_frame.pc()); ++ if (nm != NULL) { ++ nm->make_not_entrant(); ++ } ++ ++ Deoptimization::deoptimize_frame(thread, caller_frame.id()); ++ ++ // Return to the now deoptimized frame. ++} ++JRT_END ++ ++#endif ++ + // + // Entry point for compiled code. We want to patch a nmethod. + // We don't do a normal VM transition here because we want to +diff --git a/hotspot/src/share/vm/c1/c1_globals.hpp b/hotspot/src/share/vm/c1/c1_globals.hpp +index 8f7f9f61c9..0e2d926bdf 100644 +--- a/hotspot/src/share/vm/c1/c1_globals.hpp ++++ b/hotspot/src/share/vm/c1/c1_globals.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_C1_C1_GLOBALS_HPP + #define SHARE_VM_C1_C1_GLOBALS_HPP + +@@ -29,6 +35,9 @@ + #ifdef TARGET_ARCH_x86 + # include "c1_globals_x86.hpp" + #endif ++#ifdef TARGET_ARCH_loongarch ++# include "c1_globals_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "c1_globals_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/classfile/bytecodeAssembler.cpp b/hotspot/src/share/vm/classfile/bytecodeAssembler.cpp +index f067419ffc..5aa19dc84f 100644 +--- a/hotspot/src/share/vm/classfile/bytecodeAssembler.cpp ++++ b/hotspot/src/share/vm/classfile/bytecodeAssembler.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + + #include "classfile/bytecodeAssembler.hpp" +@@ -32,6 +38,12 @@ + #ifdef TARGET_ARCH_x86 + # include "bytes_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "bytes_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "bytes_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_sparc + # include "bytes_sparc.hpp" + #endif +diff --git a/hotspot/src/share/vm/classfile/classFileStream.hpp b/hotspot/src/share/vm/classfile/classFileStream.hpp +index 9632c8c8c2..fad25c44fc 100644 +--- a/hotspot/src/share/vm/classfile/classFileStream.hpp ++++ b/hotspot/src/share/vm/classfile/classFileStream.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_CLASSFILE_CLASSFILESTREAM_HPP + #define SHARE_VM_CLASSFILE_CLASSFILESTREAM_HPP + +@@ -29,6 +35,12 @@ + #ifdef TARGET_ARCH_x86 + # include "bytes_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "bytes_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "bytes_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "bytes_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/classfile/stackMapTable.hpp b/hotspot/src/share/vm/classfile/stackMapTable.hpp +index a36a7ba3cf..d7c1f08644 100644 +--- a/hotspot/src/share/vm/classfile/stackMapTable.hpp ++++ b/hotspot/src/share/vm/classfile/stackMapTable.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_CLASSFILE_STACKMAPTABLE_HPP + #define SHARE_VM_CLASSFILE_STACKMAPTABLE_HPP + +@@ -34,6 +40,12 @@ + #ifdef TARGET_ARCH_x86 + # include "bytes_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "bytes_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "bytes_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "bytes_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/classfile/verifier.cpp b/hotspot/src/share/vm/classfile/verifier.cpp +index c653e2b5a9..1a6b7e8b1a 100644 +--- a/hotspot/src/share/vm/classfile/verifier.cpp ++++ b/hotspot/src/share/vm/classfile/verifier.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "classfile/classFileStream.hpp" + #include "classfile/javaClasses.hpp" +@@ -48,6 +54,12 @@ + #ifdef TARGET_ARCH_x86 + # include "bytes_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "bytes_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "bytes_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "bytes_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/code/codeBlob.cpp b/hotspot/src/share/vm/code/codeBlob.cpp +index aff2aaf0ca..9ba76007cd 100644 +--- a/hotspot/src/share/vm/code/codeBlob.cpp ++++ b/hotspot/src/share/vm/code/codeBlob.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "code/codeBlob.hpp" + #include "code/codeCache.hpp" +@@ -57,6 +63,12 @@ + #ifdef TARGET_ARCH_ppc + # include "nativeInst_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "nativeInst_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "nativeInst_loongarch.hpp" ++#endif + #ifdef COMPILER1 + #include "c1/c1_Runtime1.hpp" + #endif +diff --git a/hotspot/src/share/vm/code/compiledIC.hpp b/hotspot/src/share/vm/code/compiledIC.hpp +index f910f11886..e282a3f3af 100644 +--- a/hotspot/src/share/vm/code/compiledIC.hpp ++++ b/hotspot/src/share/vm/code/compiledIC.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_CODE_COMPILEDIC_HPP + #define SHARE_VM_CODE_COMPILEDIC_HPP + +@@ -45,6 +51,12 @@ + #ifdef TARGET_ARCH_ppc + # include "nativeInst_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "nativeInst_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "nativeInst_loongarch.hpp" ++#endif + + //----------------------------------------------------------------------------- + // The CompiledIC represents a compiled inline cache. +diff --git a/hotspot/src/share/vm/code/relocInfo.hpp b/hotspot/src/share/vm/code/relocInfo.hpp +index ad55a2fd93..813504821d 100644 +--- a/hotspot/src/share/vm/code/relocInfo.hpp ++++ b/hotspot/src/share/vm/code/relocInfo.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2015, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_CODE_RELOCINFO_HPP + #define SHARE_VM_CODE_RELOCINFO_HPP + +@@ -261,7 +267,11 @@ class relocInfo VALUE_OBJ_CLASS_SPEC { + poll_return_type = 11, // polling instruction for safepoints at return + metadata_type = 12, // metadata that used to be oops + trampoline_stub_type = 13, // stub-entry for trampoline ++#if !defined MIPS64 + yet_unused_type_1 = 14, // Still unused ++#else ++ internal_pc_type = 14, // tag for internal data,?? ++#endif + data_prefix_tag = 15, // tag for a prefix (carries data arguments) + type_mask = 15 // A mask which selects only the above values + }; +@@ -288,6 +298,7 @@ class relocInfo VALUE_OBJ_CLASS_SPEC { + ; + #endif + ++#if defined MIPS64 && !defined ZERO + #define APPLY_TO_RELOCATIONS(visitor) \ + visitor(oop) \ + visitor(metadata) \ +@@ -300,9 +311,26 @@ class relocInfo VALUE_OBJ_CLASS_SPEC { + visitor(internal_word) \ + visitor(poll) \ + visitor(poll_return) \ +- visitor(section_word) \ + visitor(trampoline_stub) \ ++ visitor(internal_pc) \ + ++#else ++ #define APPLY_TO_RELOCATIONS(visitor) \ ++ visitor(oop) \ ++ visitor(metadata) \ ++ visitor(virtual_call) \ ++ visitor(opt_virtual_call) \ ++ visitor(static_call) \ ++ visitor(static_stub) \ ++ visitor(runtime_call) \ ++ visitor(external_word) \ ++ visitor(internal_word) \ ++ visitor(poll) \ ++ visitor(poll_return) \ ++ visitor(trampoline_stub) \ ++ visitor(section_word) \ ++ ++#endif + + public: + enum { +@@ -432,6 +460,12 @@ class relocInfo VALUE_OBJ_CLASS_SPEC { + #endif + #ifdef TARGET_ARCH_ppc + # include "relocInfo_ppc.hpp" ++#endif ++#ifdef TARGET_ARCH_mips ++# include "relocInfo_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "relocInfo_loongarch.hpp" + #endif + + +@@ -1024,6 +1058,15 @@ class metadata_Relocation : public DataRelocation { + // Note: metadata_value transparently converts Universe::non_metadata_word to NULL. + }; + ++#if defined MIPS64 ++// to handle the set_last_java_frame pc ++class internal_pc_Relocation : public Relocation { ++ relocInfo::relocType type() { return relocInfo::internal_pc_type; } ++ public: ++ address pc() { return pd_get_address_from_code(); } ++ void fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest); ++}; ++#endif + + class virtual_call_Relocation : public CallRelocation { + relocInfo::relocType type() { return relocInfo::virtual_call_type; } +diff --git a/hotspot/src/share/vm/code/vmreg.hpp b/hotspot/src/share/vm/code/vmreg.hpp +index 07b595b60a..5bc7131a8a 100644 +--- a/hotspot/src/share/vm/code/vmreg.hpp ++++ b/hotspot/src/share/vm/code/vmreg.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_CODE_VMREG_HPP + #define SHARE_VM_CODE_VMREG_HPP + +@@ -47,6 +53,12 @@ + #elif defined TARGET_ARCH_MODEL_ppc_64 + # include "adfiles/adGlobals_ppc_64.hpp" + #endif ++#ifdef TARGET_ARCH_MODEL_mips_64 ++# include "adfiles/adGlobals_mips_64.hpp" ++#endif ++#ifdef TARGET_ARCH_MODEL_loongarch_64 ++# include "adfiles/adGlobals_loongarch_64.hpp" ++#endif + #endif + + //------------------------------VMReg------------------------------------------ +@@ -158,6 +170,12 @@ public: + #ifdef TARGET_ARCH_x86 + # include "vmreg_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "vmreg_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "vmreg_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "vmreg_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/compiler/disassembler.cpp b/hotspot/src/share/vm/compiler/disassembler.cpp +index dfdd5f77e7..2dd0ff69ac 100644 +--- a/hotspot/src/share/vm/compiler/disassembler.cpp ++++ b/hotspot/src/share/vm/compiler/disassembler.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "classfile/javaClasses.hpp" + #include "code/codeCache.hpp" +@@ -50,6 +56,12 @@ + #ifdef TARGET_ARCH_ppc + # include "depChecker_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "depChecker_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "depChecker_loongarch.hpp" ++#endif + #ifdef SHARK + #include "shark/sharkEntry.hpp" + #endif +diff --git a/hotspot/src/share/vm/compiler/disassembler.hpp b/hotspot/src/share/vm/compiler/disassembler.hpp +index 168851cc26..8b632748f2 100644 +--- a/hotspot/src/share/vm/compiler/disassembler.hpp ++++ b/hotspot/src/share/vm/compiler/disassembler.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_COMPILER_DISASSEMBLER_HPP + #define SHARE_VM_COMPILER_DISASSEMBLER_HPP + +@@ -95,6 +101,12 @@ class Disassembler { + #endif + #ifdef TARGET_ARCH_ppc + # include "disassembler_ppc.hpp" ++#endif ++#ifdef TARGET_ARCH_mips ++# include "disassembler_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "disassembler_loongarch.hpp" + #endif + + +diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp +index 733b5c91ad..678a1ee836 100644 +--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp ++++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp +@@ -86,6 +86,9 @@ class CardTableExtension : public CardTableModRefBS { + void inline_write_ref_field_gc(void* field, oop new_val) { + jbyte* byte = byte_for(field); + *byte = youngergen_card; ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif + } + + // Adaptive size policy support +diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp +index 1dde10746d..8b800b31c5 100644 +--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp ++++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp +@@ -105,6 +105,9 @@ ParMarkBitMap::mark_obj(HeapWord* addr, size_t size) + assert(end_bit_ok, "concurrency problem"); + DEBUG_ONLY(Atomic::inc_ptr(&mark_bitmap_count)); + DEBUG_ONLY(Atomic::add_ptr(size, &mark_bitmap_size)); ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif + return true; + } + return false; +diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp +index 6cf76353d9..4d34bc209b 100644 +--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp ++++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp +@@ -33,6 +33,9 @@ void ParCompactionManager::push_objarray(oop obj, size_t index) + ObjArrayTask task(obj, index); + assert(task.is_valid(), "bad ObjArrayTask"); + _objarray_stack.push(task); ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif + } + + void ParCompactionManager::push_region(size_t index) +@@ -44,6 +47,9 @@ void ParCompactionManager::push_region(size_t index) + assert(region_ptr->_pushed++ == 0, "should only be pushed once"); + #endif + region_stack()->push(index); ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif + } + + #endif // SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSCOMPACTIONMANAGER_INLINE_HPP +diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp +index 0fa980ef83..2f66493e0a 100644 +--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp ++++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp +@@ -499,6 +499,9 @@ void ParallelCompactData::add_obj(HeapWord* addr, size_t len) + if (beg_region == end_region) { + // All in one region. + _region_data[beg_region].add_live_obj(len); ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif + return; + } + +@@ -517,6 +520,9 @@ void ParallelCompactData::add_obj(HeapWord* addr, size_t len) + const size_t end_ofs = region_offset(addr + len - 1); + _region_data[end_region].set_partial_obj_size(end_ofs + 1); + _region_data[end_region].set_partial_obj_addr(addr); ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif + } + + void +@@ -3229,6 +3235,9 @@ void PSParallelCompact::fill_blocks(size_t region_idx) + if (new_block != cur_block) { + cur_block = new_block; + sd.block(cur_block)->set_offset(bitmap->bits_to_words(live_bits)); ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif + } + + const size_t end_bit = bitmap->find_obj_end(beg_bit, range_end); +diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp +index 881f380cea..461b83930f 100644 +--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp ++++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp +@@ -1329,6 +1329,9 @@ inline bool PSParallelCompact::mark_obj(oop obj) { + const int obj_size = obj->size(); + if (mark_bitmap()->mark_obj(obj, obj_size)) { + _summary_data.add_obj(obj, obj_size); ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif + return true; + } else { + return false; +@@ -1363,6 +1366,9 @@ inline void PSParallelCompact::mark_and_push(ParCompactionManager* cm, T* p) { + oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); + if (mark_bitmap()->is_unmarked(obj) && mark_obj(obj)) { + cm->push(obj); ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif + } + } + } +diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp +index a33132009c..291019660a 100644 +--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp ++++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp +@@ -41,8 +41,9 @@ template + inline void PSPromotionManager::claim_or_forward_internal_depth(T* p) { + if (p != NULL) { // XXX: error if p != NULL here + oop o = oopDesc::load_decode_heap_oop_not_null(p); +- if (o->is_forwarded()) { +- o = o->forwardee(); ++ markOop m = o->mark(); ++ if (m->is_marked()) { ++ o = (oop) m->decode_pointer(); + // Card mark + if (PSScavenge::is_obj_in_young(o)) { + PSScavenge::card_table()->inline_write_ref_field_gc(p, o); +@@ -102,11 +103,19 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) { + + oop new_obj = NULL; + ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif ++ + // NOTE! We must be very careful with any methods that access the mark + // in o. There may be multiple threads racing on it, and it may be forwarded + // at any time. Do not use oop methods for accessing the mark! + markOop test_mark = o->mark(); + ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif ++ + // The same test as "o->is_forwarded()" + if (!test_mark->is_marked()) { + bool new_obj_is_tenured = false; +@@ -141,6 +150,10 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) { + } + } + } ++ ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif + } + } + +@@ -200,6 +213,9 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) { + + // Copy obj + Copy::aligned_disjoint_words((HeapWord*)o, (HeapWord*)new_obj, new_obj_size); ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif + + // Now we have to CAS in the header. + if (o->cas_forward_to(new_obj, test_mark)) { +@@ -247,6 +263,10 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) { + // don't update this before the unallocation! + new_obj = o->forwardee(); + } ++ ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif + } else { + assert(o->is_forwarded(), "Sanity"); + new_obj = o->forwardee(); +diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp +index 1a722a7ca7..4980be3946 100644 +--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp ++++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp +@@ -71,14 +71,22 @@ inline void PSScavenge::copy_and_push_safe_barrier(PSPromotionManager* pm, + assert(should_scavenge(p, true), "revisiting object?"); + + oop o = oopDesc::load_decode_heap_oop_not_null(p); +- oop new_obj = o->is_forwarded() +- ? o->forwardee() +- : pm->copy_to_survivor_space(o); ++#if defined MIPS || defined LOONGARCH ++ if (oopDesc::is_null(o)) return; ++#endif ++ ++ oop new_obj; ++ markOop m = o->mark(); ++ if (m->is_marked()) { ++ new_obj = (oop) m->decode_pointer(); ++ } else { ++ new_obj = pm->copy_to_survivor_space(o); ++ } + + #ifndef PRODUCT + // This code must come after the CAS test, or it will print incorrect + // information. +- if (TraceScavenge && o->is_forwarded()) { ++ if (TraceScavenge && m->is_marked()) { + gclog_or_tty->print_cr("{%s %s " PTR_FORMAT " -> " PTR_FORMAT " (%d)}", + "forwarding", + new_obj->klass()->internal_name(), p2i((void *)o), p2i((void *)new_obj), new_obj->size()); +@@ -138,8 +146,9 @@ class PSScavengeFromKlassClosure: public OopClosure { + + oop o = *p; + oop new_obj; +- if (o->is_forwarded()) { +- new_obj = o->forwardee(); ++ markOop m = o->mark(); ++ if (m->is_marked()) { ++ new_obj = (oop) m->decode_pointer(); + } else { + new_obj = _pm->copy_to_survivor_space(o); + } +diff --git a/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp b/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp +index e14c50bf01..8b3860070c 100644 +--- a/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp ++++ b/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_INTERPRETER_ABSTRACTINTERPRETER_HPP + #define SHARE_VM_INTERPRETER_ABSTRACTINTERPRETER_HPP + +@@ -42,6 +48,10 @@ + # include "interp_masm_zero.hpp" + #elif defined TARGET_ARCH_MODEL_ppc_64 + # include "interp_masm_ppc_64.hpp" ++#elif defined TARGET_ARCH_MODEL_mips_64 ++# include "interp_masm_mips_64.hpp" ++#elif defined TARGET_ARCH_MODEL_loongarch_64 ++# include "interp_masm_loongarch_64.hpp" + #endif + + // This file contains the platform-independent parts +diff --git a/hotspot/src/share/vm/interpreter/bytecode.hpp b/hotspot/src/share/vm/interpreter/bytecode.hpp +index 7e55fd009a..a06dcd58bc 100644 +--- a/hotspot/src/share/vm/interpreter/bytecode.hpp ++++ b/hotspot/src/share/vm/interpreter/bytecode.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_INTERPRETER_BYTECODE_HPP + #define SHARE_VM_INTERPRETER_BYTECODE_HPP + +@@ -31,6 +37,12 @@ + #ifdef TARGET_ARCH_x86 + # include "bytes_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "bytes_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "bytes_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "bytes_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.hpp b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.hpp +index 28843715c7..c17fe8d7e0 100644 +--- a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.hpp ++++ b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_INTERPRETER_BYTECODEINTERPRETER_HPP + #define SHARE_VM_INTERPRETER_BYTECODEINTERPRETER_HPP + +@@ -35,6 +41,9 @@ + #ifdef TARGET_ARCH_x86 + # include "bytes_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "bytes_mips.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "bytes_aarch64.hpp" + #endif +@@ -592,6 +601,12 @@ void print(); + #ifdef TARGET_ARCH_x86 + # include "bytecodeInterpreter_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "bytecodeInterpreter_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "bytecodeInterpreter_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "bytecodeInterpreter_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp +index f5db0b4d9d..8adbf95acb 100644 +--- a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp ++++ b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_INTERPRETER_BYTECODEINTERPRETER_INLINE_HPP + #define SHARE_VM_INTERPRETER_BYTECODEINTERPRETER_INLINE_HPP + +@@ -46,6 +52,12 @@ + #ifdef TARGET_ARCH_x86 + # include "bytecodeInterpreter_x86.inline.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "bytecodeInterpreter_mips.inline.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "bytecodeInterpreter_loongarch.inline.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "bytecodeInterpreter_aarch64.inline.hpp" + #endif +diff --git a/hotspot/src/share/vm/interpreter/bytecodeStream.hpp b/hotspot/src/share/vm/interpreter/bytecodeStream.hpp +index b814b88d5d..e1f2421600 100644 +--- a/hotspot/src/share/vm/interpreter/bytecodeStream.hpp ++++ b/hotspot/src/share/vm/interpreter/bytecodeStream.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_INTERPRETER_BYTECODESTREAM_HPP + #define SHARE_VM_INTERPRETER_BYTECODESTREAM_HPP + +@@ -32,6 +38,12 @@ + #ifdef TARGET_ARCH_x86 + # include "bytes_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "bytes_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "bytes_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "bytes_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/interpreter/bytecodes.cpp b/hotspot/src/share/vm/interpreter/bytecodes.cpp +index fdb880a3b3..4f5111074f 100644 +--- a/hotspot/src/share/vm/interpreter/bytecodes.cpp ++++ b/hotspot/src/share/vm/interpreter/bytecodes.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "interpreter/bytecodes.hpp" + #include "memory/resourceArea.hpp" +@@ -29,6 +35,12 @@ + #ifdef TARGET_ARCH_x86 + # include "bytes_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "bytes_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "bytes_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "bytes_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/interpreter/bytecodes.hpp b/hotspot/src/share/vm/interpreter/bytecodes.hpp +index c3463cd76d..bdf4c487f0 100644 +--- a/hotspot/src/share/vm/interpreter/bytecodes.hpp ++++ b/hotspot/src/share/vm/interpreter/bytecodes.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_INTERPRETER_BYTECODES_HPP + #define SHARE_VM_INTERPRETER_BYTECODES_HPP + +@@ -292,6 +298,12 @@ class Bytecodes: AllStatic { + #ifdef TARGET_ARCH_x86 + # include "bytecodes_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "bytecodes_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "bytecodes_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "bytecodes_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/interpreter/cppInterpreter.hpp b/hotspot/src/share/vm/interpreter/cppInterpreter.hpp +index 6a6447503c..f9c540fb4a 100644 +--- a/hotspot/src/share/vm/interpreter/cppInterpreter.hpp ++++ b/hotspot/src/share/vm/interpreter/cppInterpreter.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_INTERPRETER_CPPINTERPRETER_HPP + #define SHARE_VM_INTERPRETER_CPPINTERPRETER_HPP + +@@ -84,6 +90,12 @@ class CppInterpreter: public AbstractInterpreter { + #ifdef TARGET_ARCH_x86 + # include "cppInterpreter_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "cppInterpreter_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "cppInterpreter_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "cppInterpreter_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/interpreter/cppInterpreterGenerator.hpp b/hotspot/src/share/vm/interpreter/cppInterpreterGenerator.hpp +index 6a08a3f43f..1fd19994d7 100644 +--- a/hotspot/src/share/vm/interpreter/cppInterpreterGenerator.hpp ++++ b/hotspot/src/share/vm/interpreter/cppInterpreterGenerator.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_INTERPRETER_CPPINTERPRETERGENERATOR_HPP + #define SHARE_VM_INTERPRETER_CPPINTERPRETERGENERATOR_HPP + +@@ -50,6 +56,12 @@ class CppInterpreterGenerator: public AbstractInterpreterGenerator { + #ifdef TARGET_ARCH_x86 + # include "cppInterpreterGenerator_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "cppInterpreterGenerator_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "cppInterpreterGenerator_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "cppInterpreterGenerator_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/interpreter/interpreter.hpp b/hotspot/src/share/vm/interpreter/interpreter.hpp +index ebfb68d36b..610949f3f7 100644 +--- a/hotspot/src/share/vm/interpreter/interpreter.hpp ++++ b/hotspot/src/share/vm/interpreter/interpreter.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_INTERPRETER_INTERPRETER_HPP + #define SHARE_VM_INTERPRETER_INTERPRETER_HPP + +@@ -148,6 +154,12 @@ class Interpreter: public CC_INTERP_ONLY(CppInterpreter) NOT_CC_INTERP(TemplateI + #ifdef TARGET_ARCH_x86 + # include "interpreter_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "interpreter_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "interpreter_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "interpreter_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/interpreter/interpreterGenerator.hpp b/hotspot/src/share/vm/interpreter/interpreterGenerator.hpp +index 1dc7cb2983..92bbe6b440 100644 +--- a/hotspot/src/share/vm/interpreter/interpreterGenerator.hpp ++++ b/hotspot/src/share/vm/interpreter/interpreterGenerator.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_INTERPRETER_INTERPRETERGENERATOR_HPP + #define SHARE_VM_INTERPRETER_INTERPRETERGENERATOR_HPP + +@@ -44,6 +50,12 @@ InterpreterGenerator(StubQueue* _code); + #ifdef TARGET_ARCH_x86 + # include "interpreterGenerator_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "interpreterGenerator_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "interpreterGenerator_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "interpreterGenerator_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp b/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp +index 5d2845383c..f48622f67e 100644 +--- a/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp ++++ b/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "classfile/systemDictionary.hpp" + #include "classfile/vmSymbols.hpp" +@@ -59,6 +65,12 @@ + #ifdef TARGET_ARCH_x86 + # include "vm_version_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "vm_version_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "vm_version_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "vm_version_aarch64.hpp" + #endif +@@ -1290,7 +1302,7 @@ IRT_ENTRY(void, InterpreterRuntime::prepare_native_call(JavaThread* thread, Meth + // preparing the same method will be sure to see non-null entry & mirror. + IRT_END + +-#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64) ++#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64) || defined(MIPS) || defined(LOONGARCH) + IRT_LEAF(void, InterpreterRuntime::popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address)) + if (src_address == dest_address) { + return; +diff --git a/hotspot/src/share/vm/interpreter/interpreterRuntime.hpp b/hotspot/src/share/vm/interpreter/interpreterRuntime.hpp +index 472bf4d94c..9a98d5559c 100644 +--- a/hotspot/src/share/vm/interpreter/interpreterRuntime.hpp ++++ b/hotspot/src/share/vm/interpreter/interpreterRuntime.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_INTERPRETER_INTERPRETERRUNTIME_HPP + #define SHARE_VM_INTERPRETER_INTERPRETERRUNTIME_HPP + +@@ -156,7 +162,7 @@ class InterpreterRuntime: AllStatic { + Method* method, + intptr_t* from, intptr_t* to); + +-#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64) ++#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64) || defined(MIPS) || defined(LOONGARCH) + // Popframe support (only needed on x86, AMD64 and ARM) + static void popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address); + #endif +@@ -165,6 +171,12 @@ class InterpreterRuntime: AllStatic { + #ifdef TARGET_ARCH_x86 + # include "interpreterRT_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "interpreterRT_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "interpreterRT_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "interpreterRT_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/interpreter/templateInterpreter.hpp b/hotspot/src/share/vm/interpreter/templateInterpreter.hpp +index 5f76dca8a6..757860f43c 100644 +--- a/hotspot/src/share/vm/interpreter/templateInterpreter.hpp ++++ b/hotspot/src/share/vm/interpreter/templateInterpreter.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_INTERPRETER_TEMPLATEINTERPRETER_HPP + #define SHARE_VM_INTERPRETER_TEMPLATEINTERPRETER_HPP + +@@ -190,6 +196,12 @@ class TemplateInterpreter: public AbstractInterpreter { + #ifdef TARGET_ARCH_x86 + # include "templateInterpreter_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "templateInterpreter_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "templateInterpreter_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "templateInterpreter_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.hpp b/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.hpp +index bd94bd02bc..28ca437eb2 100644 +--- a/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.hpp ++++ b/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP + #define SHARE_VM_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP + +@@ -89,6 +95,12 @@ class TemplateInterpreterGenerator: public AbstractInterpreterGenerator { + #ifdef TARGET_ARCH_x86 + # include "templateInterpreterGenerator_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "templateInterpreterGenerator_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "templateInterpreterGenerator_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "templateInterpreterGenerator_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/interpreter/templateTable.hpp b/hotspot/src/share/vm/interpreter/templateTable.hpp +index 60d243c16a..1b73822abd 100644 +--- a/hotspot/src/share/vm/interpreter/templateTable.hpp ++++ b/hotspot/src/share/vm/interpreter/templateTable.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_INTERPRETER_TEMPLATETABLE_HPP + #define SHARE_VM_INTERPRETER_TEMPLATETABLE_HPP + +@@ -40,6 +46,10 @@ + # include "interp_masm_zero.hpp" + #elif defined TARGET_ARCH_MODEL_ppc_64 + # include "interp_masm_ppc_64.hpp" ++#elif defined TARGET_ARCH_MODEL_mips_64 ++# include "interp_masm_mips_64.hpp" ++#elif defined TARGET_ARCH_MODEL_loongarch_64 ++# include "interp_masm_loongarch_64.hpp" + #endif + + #ifndef CC_INTERP +@@ -367,6 +377,10 @@ class TemplateTable: AllStatic { + # include "templateTable_zero.hpp" + #elif defined TARGET_ARCH_MODEL_ppc_64 + # include "templateTable_ppc_64.hpp" ++#elif defined TARGET_ARCH_MODEL_mips_64 ++# include "templateTable_mips_64.hpp" ++#elif defined TARGET_ARCH_MODEL_loongarch_64 ++# include "templateTable_loongarch_64.hpp" + #endif + + }; +diff --git a/hotspot/src/share/vm/jfr/utilities/jfrBigEndian.hpp b/hotspot/src/share/vm/jfr/utilities/jfrBigEndian.hpp +index 6d9ab39fdd..f4e9a4ca69 100644 +--- a/hotspot/src/share/vm/jfr/utilities/jfrBigEndian.hpp ++++ b/hotspot/src/share/vm/jfr/utilities/jfrBigEndian.hpp +@@ -116,7 +116,7 @@ inline T JfrBigEndian::read_unaligned(const address location) { + inline bool JfrBigEndian::platform_supports_unaligned_reads(void) { + #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390) + return true; +-#elif defined(SPARC) || defined(ARM) || defined(AARCH64) ++#elif defined(SPARC) || defined(ARM) || defined(AARCH64) || defined(MIPS) || defined(LOONGARCH) + return false; + #else + #warning "Unconfigured platform" +diff --git a/hotspot/src/share/vm/jfr/writers/jfrEncoders.hpp b/hotspot/src/share/vm/jfr/writers/jfrEncoders.hpp +index 42a8b719cd..f08f6ee13a 100644 +--- a/hotspot/src/share/vm/jfr/writers/jfrEncoders.hpp ++++ b/hotspot/src/share/vm/jfr/writers/jfrEncoders.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_JFR_WRITERS_JFRENCODERS_HPP + #define SHARE_VM_JFR_WRITERS_JFRENCODERS_HPP + +@@ -46,6 +52,12 @@ + #ifdef TARGET_ARCH_aarch64 + # include "bytes_aarch64.hpp" + #endif ++#ifdef TARGET_ARCH_loongarch ++# include "bytes_loongarch.hpp" ++#endif ++#ifdef TARGET_ARCH_mips ++# include "bytes_mips.hpp" ++#endif + + // + // The Encoding policy prescribes a template +diff --git a/hotspot/src/share/vm/memory/barrierSet.hpp b/hotspot/src/share/vm/memory/barrierSet.hpp +index 13ff9b2738..081b70744d 100644 +--- a/hotspot/src/share/vm/memory/barrierSet.hpp ++++ b/hotspot/src/share/vm/memory/barrierSet.hpp +@@ -27,6 +27,7 @@ + + #include "memory/memRegion.hpp" + #include "oops/oopsHierarchy.hpp" ++#include "runtime/orderAccess.hpp" + + // This class provides the interface between a barrier implementation and + // the rest of the system. +@@ -95,8 +96,16 @@ private: + // Keep this private so as to catch violations at build time. + virtual void write_ref_field_pre_work( void* field, oop new_val) { guarantee(false, "Not needed"); }; + protected: +- virtual void write_ref_field_pre_work( oop* field, oop new_val) {}; +- virtual void write_ref_field_pre_work(narrowOop* field, oop new_val) {}; ++ virtual void write_ref_field_pre_work( oop* field, oop new_val) { ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif ++ }; ++ virtual void write_ref_field_pre_work(narrowOop* field, oop new_val) { ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif ++ }; + public: + + // ...then the post-write version. +@@ -132,9 +141,17 @@ public: + + // Below length is the # array elements being written + virtual void write_ref_array_pre(oop* dst, int length, +- bool dest_uninitialized = false) {} ++ bool dest_uninitialized = false) { ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif ++ } + virtual void write_ref_array_pre(narrowOop* dst, int length, +- bool dest_uninitialized = false) {} ++ bool dest_uninitialized = false) { ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif ++} + // Below count is the # array elements being written, starting + // at the address "start", which may not necessarily be HeapWord-aligned + inline void write_ref_array(HeapWord* start, size_t count); +diff --git a/hotspot/src/share/vm/memory/cardTableModRefBS.hpp b/hotspot/src/share/vm/memory/cardTableModRefBS.hpp +index 01e4688836..80bd151873 100644 +--- a/hotspot/src/share/vm/memory/cardTableModRefBS.hpp ++++ b/hotspot/src/share/vm/memory/cardTableModRefBS.hpp +@@ -316,6 +316,9 @@ public: + + inline void inline_write_ref_array(MemRegion mr) { + dirty_MemRegion(mr); ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif + } + protected: + void write_ref_array_work(MemRegion mr) { +@@ -329,7 +332,11 @@ public: + + // *** Card-table-barrier-specific things. + +- template inline void inline_write_ref_field_pre(T* field, oop newVal) {} ++ template inline void inline_write_ref_field_pre(T* field, oop newVal) { ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif ++ } + + template inline void inline_write_ref_field(T* field, oop newVal, bool release) { + jbyte* byte = byte_for((void*)field); +@@ -339,6 +346,9 @@ public: + } else { + *byte = dirty_card; + } ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif + } + + // These are used by G1, when it uses the card table as a temporary data +diff --git a/hotspot/src/share/vm/memory/cardTableRS.cpp b/hotspot/src/share/vm/memory/cardTableRS.cpp +index fb33a708ae..da22acba47 100644 +--- a/hotspot/src/share/vm/memory/cardTableRS.cpp ++++ b/hotspot/src/share/vm/memory/cardTableRS.cpp +@@ -252,6 +252,9 @@ void ClearNoncleanCardWrapper::do_MemRegion(MemRegion mr) { + // cur_youngergen_and_prev_nonclean_card ==> no change. + void CardTableRS::write_ref_field_gc_par(void* field, oop new_val) { + jbyte* entry = ct_bs()->byte_for(field); ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif + do { + jbyte entry_val = *entry; + // We put this first because it's probably the most common case. +@@ -266,7 +269,12 @@ void CardTableRS::write_ref_field_gc_par(void* field, oop new_val) { + jbyte new_val = cur_youngergen_and_prev_nonclean_card; + jbyte res = Atomic::cmpxchg(new_val, entry, entry_val); + // Did the CAS succeed? +- if (res == entry_val) return; ++ if (res == entry_val) { ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif ++ return; ++ } + // Otherwise, retry, to see the new value. + continue; + } else { +diff --git a/hotspot/src/share/vm/memory/cardTableRS.hpp b/hotspot/src/share/vm/memory/cardTableRS.hpp +index 25884feac8..5d4e77f269 100644 +--- a/hotspot/src/share/vm/memory/cardTableRS.hpp ++++ b/hotspot/src/share/vm/memory/cardTableRS.hpp +@@ -121,7 +121,14 @@ public: + + void inline_write_ref_field_gc(void* field, oop new_val) { + jbyte* byte = _ct_bs->byte_for(field); +- *byte = youngergen_card; ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif ++ *byte = youngergen_card; ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif ++ + } + void write_ref_field_gc_work(void* field, oop new_val) { + inline_write_ref_field_gc(field, new_val); +diff --git a/hotspot/src/share/vm/memory/metaspace.cpp b/hotspot/src/share/vm/memory/metaspace.cpp +index fb0564ac27..9cec7d4375 100644 +--- a/hotspot/src/share/vm/memory/metaspace.cpp ++++ b/hotspot/src/share/vm/memory/metaspace.cpp +@@ -21,6 +21,13 @@ + * questions. + * + */ ++ ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2021 Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "gc_interface/collectedHeap.hpp" + #include "memory/allocation.hpp" +@@ -3065,12 +3072,12 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a + // Don't use large pages for the class space. + bool large_pages = false; + +-#ifndef AARCH64 ++#if !defined(AARCH64) && !defined(MIPS64) && !defined(LOONGARCH) + ReservedSpace metaspace_rs = ReservedSpace(compressed_class_space_size(), + _reserve_alignment, + large_pages, + requested_addr, 0); +-#else // AARCH64 ++#else // defined(AARCH64) || defined(MIPS64) || defined(LOONGARCH) + ReservedSpace metaspace_rs; + + // Our compressed klass pointers may fit nicely into the lower 32 +@@ -3107,7 +3114,7 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a + } + } + +-#endif // AARCH64 ++#endif // defined(AARCH64) || defined(MIPS64) || defined(LOONGARCH) + + if (!metaspace_rs.is_reserved()) { + #if INCLUDE_CDS +diff --git a/hotspot/src/share/vm/oops/constantPool.hpp b/hotspot/src/share/vm/oops/constantPool.hpp +index ec111df04e..6c0607105c 100644 +--- a/hotspot/src/share/vm/oops/constantPool.hpp ++++ b/hotspot/src/share/vm/oops/constantPool.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_OOPS_CONSTANTPOOLOOP_HPP + #define SHARE_VM_OOPS_CONSTANTPOOLOOP_HPP + +@@ -50,6 +56,13 @@ + #ifdef TARGET_ARCH_ppc + # include "bytes_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "bytes_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "bytes_loongarch.hpp" ++#endif ++ + + // A constantPool is an array containing class constants as described in the + // class file. +diff --git a/hotspot/src/share/vm/oops/klass.hpp b/hotspot/src/share/vm/oops/klass.hpp +index acef334849..23fc0b9988 100644 +--- a/hotspot/src/share/vm/oops/klass.hpp ++++ b/hotspot/src/share/vm/oops/klass.hpp +@@ -32,6 +32,9 @@ + #include "oops/klassPS.hpp" + #include "oops/metadata.hpp" + #include "oops/oop.hpp" ++#if defined MIPS || defined LOONGARCH ++#include "runtime/orderAccess.hpp" ++#endif + #include "utilities/accessFlags.hpp" + #include "utilities/macros.hpp" + #if INCLUDE_ALL_GCS +@@ -289,8 +292,18 @@ protected: + // The Klasses are not placed in the Heap, so the Card Table or + // the Mod Union Table can't be used to mark when klasses have modified oops. + // The CT and MUT bits saves this information for the individual Klasses. +- void record_modified_oops() { _modified_oops = 1; } +- void clear_modified_oops() { _modified_oops = 0; } ++ void record_modified_oops() { ++ _modified_oops = 1; ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif ++ } ++ void clear_modified_oops() { ++ _modified_oops = 0; ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) OrderAccess::fence(); ++#endif ++ } + bool has_modified_oops() { return _modified_oops == 1; } + + void accumulate_modified_oops() { if (has_modified_oops()) _accumulated_modified_oops = 1; } +diff --git a/hotspot/src/share/vm/oops/oop.hpp b/hotspot/src/share/vm/oops/oop.hpp +index 0678c6b3fb..1cb20e351f 100644 +--- a/hotspot/src/share/vm/oops/oop.hpp ++++ b/hotspot/src/share/vm/oops/oop.hpp +@@ -72,7 +72,13 @@ class oopDesc { + markOop mark() const { return _mark; } + markOop* mark_addr() const { return (markOop*) &_mark; } + +- void set_mark(volatile markOop m) { _mark = m; } ++ void set_mark(volatile markOop m) { ++#if (defined MIPS || defined LOONGARCH) && !defined ZERO ++ if (UseSyncLevel >= 2000) release_set_mark(m); ++ else ++#endif ++ _mark = m; ++ } + + void release_set_mark(markOop m); + markOop cas_set_mark(markOop new_mark, markOop old_mark); +diff --git a/hotspot/src/share/vm/oops/oop.inline.hpp b/hotspot/src/share/vm/oops/oop.inline.hpp +index beec739d38..8660c1e331 100644 +--- a/hotspot/src/share/vm/oops/oop.inline.hpp ++++ b/hotspot/src/share/vm/oops/oop.inline.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_OOPS_OOP_INLINE_HPP + #define SHARE_VM_OOPS_OOP_INLINE_HPP + +@@ -60,6 +66,12 @@ + #ifdef TARGET_ARCH_ppc + # include "bytes_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "bytes_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "bytes_loongarch.hpp" ++#endif + + // Implementation of all inlined member functions defined in oop.hpp + // We need a separate file to avoid circular references +diff --git a/hotspot/src/share/vm/oops/oop.pcgc.inline.hpp b/hotspot/src/share/vm/oops/oop.pcgc.inline.hpp +index 8a4603944e..b28bb99189 100644 +--- a/hotspot/src/share/vm/oops/oop.pcgc.inline.hpp ++++ b/hotspot/src/share/vm/oops/oop.pcgc.inline.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_OOPS_OOP_PCGC_INLINE_HPP + #define SHARE_VM_OOPS_OOP_PCGC_INLINE_HPP + +@@ -75,7 +81,7 @@ inline oop oopDesc::forward_to_atomic(oop p) { + // forwarding pointer. + oldMark = curMark; + } +- return forwardee(); ++ return (oop) oldMark->decode_pointer(); + } + + #endif // SHARE_VM_OOPS_OOP_PCGC_INLINE_HPP +diff --git a/hotspot/src/share/vm/opto/buildOopMap.cpp b/hotspot/src/share/vm/opto/buildOopMap.cpp +index 91642f1d7d..5df185df04 100644 +--- a/hotspot/src/share/vm/opto/buildOopMap.cpp ++++ b/hotspot/src/share/vm/opto/buildOopMap.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "compiler/oopMap.hpp" + #include "opto/addnode.hpp" +@@ -50,6 +56,12 @@ + #ifdef TARGET_ARCH_ppc + # include "vmreg_ppc.inline.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "vmreg_mips.inline.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "vmreg_loongarch.inline.hpp" ++#endif + + // The functions in this file builds OopMaps after all scheduling is done. + // +diff --git a/hotspot/src/share/vm/opto/bytecodeInfo.cpp b/hotspot/src/share/vm/opto/bytecodeInfo.cpp +index 7fd615d35f..ad472e8722 100644 +--- a/hotspot/src/share/vm/opto/bytecodeInfo.cpp ++++ b/hotspot/src/share/vm/opto/bytecodeInfo.cpp +@@ -361,9 +361,20 @@ bool InlineTree::try_to_inline(ciMethod* callee_method, ciMethod* caller_method, + } else if (forced_inline()) { + // Inlining was forced by CompilerOracle, ciReplay or annotation + } else if (profile.count() == 0) { ++#ifndef MIPS + // don't inline unreached call sites + set_msg("call site not reached"); + return false; ++#else ++ ciMethodBlocks* blocks = caller_method->get_method_blocks(); ++ // Check if the call site belongs to a start block: ++ // call sites in a start block must be reached before. ++ if (blocks->block_containing(0) != blocks->block_containing(jvms->bci())) { ++ // don't inline unreached call sites ++ set_msg("call site not reached"); ++ return false; ++ } ++#endif + } + } + +diff --git a/hotspot/src/share/vm/opto/c2_globals.hpp b/hotspot/src/share/vm/opto/c2_globals.hpp +index 82d2efef92..d373b20456 100644 +--- a/hotspot/src/share/vm/opto/c2_globals.hpp ++++ b/hotspot/src/share/vm/opto/c2_globals.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_OPTO_C2_GLOBALS_HPP + #define SHARE_VM_OPTO_C2_GLOBALS_HPP + +@@ -35,6 +41,12 @@ + #ifdef TARGET_ARCH_sparc + # include "c2_globals_sparc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "c2_globals_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "c2_globals_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_arm + # include "c2_globals_arm.hpp" + #endif +diff --git a/hotspot/src/share/vm/opto/c2compiler.cpp b/hotspot/src/share/vm/opto/c2compiler.cpp +index 137f49600d..f689d64a38 100644 +--- a/hotspot/src/share/vm/opto/c2compiler.cpp ++++ b/hotspot/src/share/vm/opto/c2compiler.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "opto/c2compiler.hpp" + #include "opto/runtime.hpp" +@@ -39,6 +45,10 @@ + # include "adfiles/ad_zero.hpp" + #elif defined TARGET_ARCH_MODEL_ppc_64 + # include "adfiles/ad_ppc_64.hpp" ++#elif defined TARGET_ARCH_MODEL_mips_64 ++# include "adfiles/ad_mips_64.hpp" ++#elif defined TARGET_ARCH_MODEL_loongarch_64 ++# include "adfiles/ad_loongarch_64.hpp" + #endif + + // register information defined by ADLC +diff --git a/hotspot/src/share/vm/opto/chaitin.hpp b/hotspot/src/share/vm/opto/chaitin.hpp +index de6d443cd3..0b27dc9335 100644 +--- a/hotspot/src/share/vm/opto/chaitin.hpp ++++ b/hotspot/src/share/vm/opto/chaitin.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_OPTO_CHAITIN_HPP + #define SHARE_VM_OPTO_CHAITIN_HPP + +@@ -136,8 +142,12 @@ public: + + // Number of registers this live range uses when it colors + private: ++#ifdef LOONGARCH64 ++ uint16_t _num_regs; ++#else + uint8 _num_regs; // 2 for Longs and Doubles, 1 for all else + // except _num_regs is kill count for fat_proj ++#endif + public: + int num_regs() const { return _num_regs; } + void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; } +@@ -145,7 +155,11 @@ public: + private: + // Number of physical registers this live range uses when it colors + // Architecture and register-set dependent ++#ifdef LOONGARCH64 ++ uint16_t _reg_pressure; ++#else + uint8 _reg_pressure; ++#endif + public: + void set_reg_pressure(int i) { _reg_pressure = i; } + int reg_pressure() const { return _reg_pressure; } +diff --git a/hotspot/src/share/vm/opto/compile.cpp b/hotspot/src/share/vm/opto/compile.cpp +index ae22ba84d9..9004dc0d72 100644 +--- a/hotspot/src/share/vm/opto/compile.cpp ++++ b/hotspot/src/share/vm/opto/compile.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "asm/macroAssembler.hpp" + #include "asm/macroAssembler.inline.hpp" +@@ -81,6 +87,10 @@ + # include "adfiles/ad_zero.hpp" + #elif defined TARGET_ARCH_MODEL_ppc_64 + # include "adfiles/ad_ppc_64.hpp" ++#elif defined TARGET_ARCH_MODEL_mips_64 ++# include "adfiles/ad_mips_64.hpp" ++#elif defined TARGET_ARCH_MODEL_loongarch_64 ++# include "adfiles/ad_loongarch_64.hpp" + #endif + + // -------------------- Compile::mach_constant_base_node ----------------------- +diff --git a/hotspot/src/share/vm/opto/compile.hpp b/hotspot/src/share/vm/opto/compile.hpp +index b4f4cfefed..d263ee2fc4 100644 +--- a/hotspot/src/share/vm/opto/compile.hpp ++++ b/hotspot/src/share/vm/opto/compile.hpp +@@ -1025,7 +1025,7 @@ class Compile : public Phase { + bool in_scratch_emit_size() const { return _in_scratch_emit_size; } + + enum ScratchBufferBlob { +- MAX_inst_size = 1024, ++ MAX_inst_size = 1024 MIPS64_ONLY(* 2) LOONGARCH64_ONLY(*2), + MAX_locs_size = 128, // number of relocInfo elements + MAX_const_size = 128, + MAX_stubs_size = 128 +diff --git a/hotspot/src/share/vm/opto/gcm.cpp b/hotspot/src/share/vm/opto/gcm.cpp +index f51484efb0..12457b7c34 100644 +--- a/hotspot/src/share/vm/opto/gcm.cpp ++++ b/hotspot/src/share/vm/opto/gcm.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "libadt/vectset.hpp" + #include "memory/allocation.inline.hpp" +@@ -49,6 +55,10 @@ + # include "adfiles/ad_zero.hpp" + #elif defined TARGET_ARCH_MODEL_ppc_64 + # include "adfiles/ad_ppc_64.hpp" ++#elif defined TARGET_ARCH_MODEL_mips_64 ++# include "adfiles/ad_mips_64.hpp" ++#elif defined TARGET_ARCH_MODEL_loongarch_64 ++# include "adfiles/ad_loongarch_64.hpp" + #endif + + +diff --git a/hotspot/src/share/vm/opto/lcm.cpp b/hotspot/src/share/vm/opto/lcm.cpp +index c6178a715b..2d492568d9 100644 +--- a/hotspot/src/share/vm/opto/lcm.cpp ++++ b/hotspot/src/share/vm/opto/lcm.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "memory/allocation.inline.hpp" + #include "opto/block.hpp" +@@ -44,6 +50,10 @@ + # include "adfiles/ad_zero.hpp" + #elif defined TARGET_ARCH_MODEL_ppc_64 + # include "adfiles/ad_ppc_64.hpp" ++#elif defined TARGET_ARCH_MODEL_mips_64 ++# include "adfiles/ad_mips_64.hpp" ++#elif defined TARGET_ARCH_MODEL_loongarch_64 ++# include "adfiles/ad_loongarch_64.hpp" + #endif + + // Optimization - Graph Style +diff --git a/hotspot/src/share/vm/opto/locknode.hpp b/hotspot/src/share/vm/opto/locknode.hpp +index b320f6bfb2..4bfb0ff072 100644 +--- a/hotspot/src/share/vm/opto/locknode.hpp ++++ b/hotspot/src/share/vm/opto/locknode.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_OPTO_LOCKNODE_HPP + #define SHARE_VM_OPTO_LOCKNODE_HPP + +@@ -42,6 +48,10 @@ + # include "adfiles/ad_zero.hpp" + #elif defined TARGET_ARCH_MODEL_ppc_64 + # include "adfiles/ad_ppc_64.hpp" ++#elif defined TARGET_ARCH_MODEL_mips_64 ++# include "adfiles/ad_mips_64.hpp" ++#elif defined TARGET_ARCH_MODEL_loongarch_64 ++# include "adfiles/ad_loongarch_64.hpp" + #endif + + //------------------------------BoxLockNode------------------------------------ +diff --git a/hotspot/src/share/vm/opto/matcher.cpp b/hotspot/src/share/vm/opto/matcher.cpp +index 75f1fbee58..994de0736a 100644 +--- a/hotspot/src/share/vm/opto/matcher.cpp ++++ b/hotspot/src/share/vm/opto/matcher.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "memory/allocation.inline.hpp" + #include "opto/addnode.hpp" +@@ -52,6 +58,10 @@ + # include "adfiles/ad_zero.hpp" + #elif defined TARGET_ARCH_MODEL_ppc_64 + # include "adfiles/ad_ppc_64.hpp" ++#elif defined TARGET_ARCH_MODEL_mips_64 ++# include "adfiles/ad_mips_64.hpp" ++#elif defined TARGET_ARCH_MODEL_loongarch_64 ++# include "adfiles/ad_loongarch_64.hpp" + #endif + + OptoReg::Name OptoReg::c_frame_pointer; +diff --git a/hotspot/src/share/vm/opto/output.cpp b/hotspot/src/share/vm/opto/output.cpp +index 6032b72a9b..7fb4dea28e 100644 +--- a/hotspot/src/share/vm/opto/output.cpp ++++ b/hotspot/src/share/vm/opto/output.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "asm/assembler.inline.hpp" + #include "code/compiledIC.hpp" +@@ -844,6 +850,27 @@ void Compile::Process_OopMap_Node(MachNode *mach, int current_offset) { + // Add the safepoint in the DebugInfoRecorder + if( !mach->is_MachCall() ) { + mcall = NULL; ++#if defined(MIPS) || defined(LOONGARCH) ++ // safepoint_pc_offset should point to tha last instruction in safePoint. ++ // In X86 and sparc, their safePoints only contain one instruction. ++ // However, we should add current_offset with the size of safePoint in MIPS. ++ // 0x2d6ff22c: lw s2, 0x14(s2) ++ // last_pd->pc_offset()=308, pc_offset=304, bci=64 ++ // last_pd->pc_offset()=312, pc_offset=312, bci=64 ++ // src/hotspot/share/code/debugInfoRec.cpp:295, assert(last_pd->pc_offset() == pc_offset, "must be last pc") ++ // ++ // ;; Safepoint: ++ // ---> pc_offset=304 ++ // 0x2d6ff230: lui at, 0x2b7a ; OopMap{s2=Oop s5=Oop t4=Oop off=308} ++ // ;*goto ++ // ; - java.util.Hashtable::get@64 (line 353) ++ // ---> last_pd(308) ++ // 0x2d6ff234: lw at, 0xffffc100(at) ;*goto ++ // ; - java.util.Hashtable::get@64 (line 353) ++ // ; {poll} ++ // 0x2d6ff238: addiu s0, zero, 0x0 ++ safepoint_pc_offset += sfn->size(_regalloc) - 4; ++#endif + debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map); + } else { + mcall = mach->as_MachCall(); +@@ -1502,6 +1529,22 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { + DEBUG_ONLY( uint instr_offset = cb->insts_size(); ) + n->emit(*cb, _regalloc); + current_offset = cb->insts_size(); ++#if defined(MIPS) || defined(LOONGARCH) ++ if (!n->is_Proj() && (cb->insts()->end() != badAddress)) { ++ // For MIPS, the first instruction of the previous node (usually a instruction sequence) sometime ++ // is not the instruction which access memory. adjust is needed. previous_offset points to the ++ // instruction which access memory. Instruction size is 4. cb->insts_size() and ++ // cb->insts()->end() are the location of current instruction. ++ int adjust = 4; ++ NativeInstruction* inst = (NativeInstruction*) (cb->insts()->end() - 4); ++ if (inst->is_sync()) { ++ // a sync may be the last instruction, see store_B_immI_enc_sync ++ adjust += 4; ++ inst = (NativeInstruction*) (cb->insts()->end() - 8); ++ } ++ previous_offset = current_offset - adjust; ++ } ++#endif + + // Above we only verified that there is enough space in the instruction section. + // However, the instruction may emit stubs that cause code buffer expansion. +diff --git a/hotspot/src/share/vm/opto/output.hpp b/hotspot/src/share/vm/opto/output.hpp +index ba72841363..37f954de9b 100644 +--- a/hotspot/src/share/vm/opto/output.hpp ++++ b/hotspot/src/share/vm/opto/output.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_OPTO_OUTPUT_HPP + #define SHARE_VM_OPTO_OUTPUT_HPP + +@@ -41,6 +47,10 @@ + # include "adfiles/ad_zero.hpp" + #elif defined TARGET_ARCH_MODEL_ppc_64 + # include "adfiles/ad_ppc_64.hpp" ++#elif defined TARGET_ARCH_MODEL_mips_64 ++# include "adfiles/ad_mips_64.hpp" ++#elif defined TARGET_ARCH_MODEL_loongarch_64 ++# include "adfiles/ad_loongarch_64.hpp" + #endif + + class Arena; +diff --git a/hotspot/src/share/vm/opto/regmask.cpp b/hotspot/src/share/vm/opto/regmask.cpp +index 352ccfb9d9..9a656d03ee 100644 +--- a/hotspot/src/share/vm/opto/regmask.cpp ++++ b/hotspot/src/share/vm/opto/regmask.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "opto/compile.hpp" + #include "opto/regmask.hpp" +@@ -39,6 +45,10 @@ + # include "adfiles/ad_zero.hpp" + #elif defined TARGET_ARCH_MODEL_ppc_64 + # include "adfiles/ad_ppc_64.hpp" ++#elif defined TARGET_ARCH_MODEL_mips_64 ++# include "adfiles/ad_mips_64.hpp" ++#elif defined TARGET_ARCH_MODEL_loongarch_64 ++# include "adfiles/ad_loongarch_64.hpp" + #endif + + #define RM_SIZE _RM_SIZE /* a constant private to the class RegMask */ +diff --git a/hotspot/src/share/vm/opto/regmask.hpp b/hotspot/src/share/vm/opto/regmask.hpp +index 5ceebb3fb8..6d08b68731 100644 +--- a/hotspot/src/share/vm/opto/regmask.hpp ++++ b/hotspot/src/share/vm/opto/regmask.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_OPTO_REGMASK_HPP + #define SHARE_VM_OPTO_REGMASK_HPP + +@@ -42,6 +48,10 @@ + # include "adfiles/adGlobals_zero.hpp" + #elif defined TARGET_ARCH_MODEL_ppc_64 + # include "adfiles/adGlobals_ppc_64.hpp" ++#elif defined TARGET_ARCH_MODEL_mips_64 ++# include "adfiles/adGlobals_mips_64.hpp" ++#elif defined TARGET_ARCH_MODEL_loongarch_64 ++# include "adfiles/adGlobals_loongarch_64.hpp" + #endif + + // Some fun naming (textual) substitutions: +diff --git a/hotspot/src/share/vm/opto/runtime.cpp b/hotspot/src/share/vm/opto/runtime.cpp +index a43b37f2c5..f2bcafa2c5 100644 +--- a/hotspot/src/share/vm/opto/runtime.cpp ++++ b/hotspot/src/share/vm/opto/runtime.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "classfile/systemDictionary.hpp" + #include "classfile/vmSymbols.hpp" +@@ -82,6 +88,10 @@ + # include "adfiles/ad_zero.hpp" + #elif defined TARGET_ARCH_MODEL_ppc_64 + # include "adfiles/ad_ppc_64.hpp" ++#elif defined TARGET_ARCH_MODEL_mips_64 ++# include "adfiles/ad_mips_64.hpp" ++#elif defined TARGET_ARCH_MODEL_loongarch_64 ++# include "adfiles/ad_loongarch_64.hpp" + #endif + + +diff --git a/hotspot/src/share/vm/opto/type.cpp b/hotspot/src/share/vm/opto/type.cpp +index 58572f137d..299d48b12a 100644 +--- a/hotspot/src/share/vm/opto/type.cpp ++++ b/hotspot/src/share/vm/opto/type.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "ci/ciMethodData.hpp" + #include "ci/ciTypeFlow.hpp" +@@ -68,6 +74,16 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = { + { Bad, T_ILLEGAL, "vectord:", false, Op_RegD, relocInfo::none }, // VectorD + { Bad, T_ILLEGAL, "vectorx:", false, 0, relocInfo::none }, // VectorX + { Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY ++#elif defined(MIPS64) ++ { Bad, T_ILLEGAL, "vectors:", false, 0, relocInfo::none }, // VectorS ++ { Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD ++ { Bad, T_ILLEGAL, "vectorx:", false, 0, relocInfo::none }, // VectorX ++ { Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY ++#elif defined(LOONGARCH64) ++ { Bad, T_ILLEGAL, "vectors:", false, 0, relocInfo::none }, // VectorS ++ { Bad, T_ILLEGAL, "vectord:", false, 0, relocInfo::none }, // VectorD ++ { Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX ++ { Bad, T_ILLEGAL, "vectory:", false, Op_VecY, relocInfo::none }, // VectorY + #elif defined(PPC64) + { Bad, T_ILLEGAL, "vectors:", false, 0, relocInfo::none }, // VectorS + { Bad, T_ILLEGAL, "vectord:", false, Op_RegL, relocInfo::none }, // VectorD +diff --git a/hotspot/src/share/vm/prims/jniCheck.cpp b/hotspot/src/share/vm/prims/jniCheck.cpp +index 593ca8a1e3..82813b71fe 100644 +--- a/hotspot/src/share/vm/prims/jniCheck.cpp ++++ b/hotspot/src/share/vm/prims/jniCheck.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "classfile/systemDictionary.hpp" + #include "classfile/vmSymbols.hpp" +@@ -55,6 +61,12 @@ + #ifdef TARGET_ARCH_ppc + # include "jniTypes_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "jniTypes_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "jniTypes_loongarch.hpp" ++#endif + + // Complain every extra number of unplanned local refs + #define CHECK_JNI_LOCAL_REF_CAP_WARN_THRESHOLD 32 +diff --git a/hotspot/src/share/vm/prims/jni_md.h b/hotspot/src/share/vm/prims/jni_md.h +index 6209a66449..271715d4a2 100644 +--- a/hotspot/src/share/vm/prims/jni_md.h ++++ b/hotspot/src/share/vm/prims/jni_md.h +@@ -22,6 +22,12 @@ + * or visit www.oracle.com if you need additional information or have any + * questions. + */ ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + + /* Switch to the correct jni_md.h file without reliance on -I options. */ + #ifdef TARGET_ARCH_x86 +@@ -42,6 +48,12 @@ + #ifdef TARGET_ARCH_ppc + # include "jni_ppc.h" + #endif ++#ifdef TARGET_ARCH_mips ++# include "jni_mips.h" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "jni_loongarch.h" ++#endif + + + /* +diff --git a/hotspot/src/share/vm/prims/jvmtiClassFileReconstituter.cpp b/hotspot/src/share/vm/prims/jvmtiClassFileReconstituter.cpp +index ab31d0d91e..0d8570b764 100644 +--- a/hotspot/src/share/vm/prims/jvmtiClassFileReconstituter.cpp ++++ b/hotspot/src/share/vm/prims/jvmtiClassFileReconstituter.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "classfile/symbolTable.hpp" + #include "interpreter/bytecodeStream.hpp" +@@ -46,6 +52,12 @@ + #ifdef TARGET_ARCH_ppc + # include "bytes_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "bytes_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "bytes_loongarch.hpp" ++#endif + // FIXME: add Deprecated attribute + // FIXME: fix Synthetic attribute + // FIXME: per Serguei, add error return handling for ConstantPool::copy_cpool_bytes() +diff --git a/hotspot/src/share/vm/prims/methodHandles.hpp b/hotspot/src/share/vm/prims/methodHandles.hpp +index db6e06180d..841082859a 100644 +--- a/hotspot/src/share/vm/prims/methodHandles.hpp ++++ b/hotspot/src/share/vm/prims/methodHandles.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_PRIMS_METHODHANDLES_HPP + #define SHARE_VM_PRIMS_METHODHANDLES_HPP + +@@ -198,6 +204,13 @@ public: + #ifdef TARGET_ARCH_ppc + # include "methodHandles_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "methodHandles_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "methodHandles_loongarch.hpp" ++#endif ++ + + // Tracing + static void trace_method_handle(MacroAssembler* _masm, const char* adaptername) PRODUCT_RETURN; +diff --git a/hotspot/src/share/vm/runtime/atomic.inline.hpp b/hotspot/src/share/vm/runtime/atomic.inline.hpp +index 222f29cbf4..7c7c6edb27 100644 +--- a/hotspot/src/share/vm/runtime/atomic.inline.hpp ++++ b/hotspot/src/share/vm/runtime/atomic.inline.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_RUNTIME_ATOMIC_INLINE_HPP + #define SHARE_VM_RUNTIME_ATOMIC_INLINE_HPP + +@@ -31,6 +37,12 @@ + #ifdef TARGET_OS_ARCH_linux_x86 + # include "atomic_linux_x86.inline.hpp" + #endif ++#ifdef TARGET_OS_ARCH_linux_mips ++# include "atomic_linux_mips.inline.hpp" ++#endif ++#ifdef TARGET_OS_ARCH_linux_loongarch ++# include "atomic_linux_loongarch.inline.hpp" ++#endif + #ifdef TARGET_OS_ARCH_linux_sparc + # include "atomic_linux_sparc.inline.hpp" + #endif +diff --git a/hotspot/src/share/vm/runtime/deoptimization.cpp b/hotspot/src/share/vm/runtime/deoptimization.cpp +index f91afdc416..36a924fd4f 100644 +--- a/hotspot/src/share/vm/runtime/deoptimization.cpp ++++ b/hotspot/src/share/vm/runtime/deoptimization.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "classfile/systemDictionary.hpp" + #include "code/debugInfoRec.hpp" +@@ -68,6 +74,12 @@ + #ifdef TARGET_ARCH_ppc + # include "vmreg_ppc.inline.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "vmreg_mips.inline.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "vmreg_loongarch.inline.hpp" ++#endif + #ifdef COMPILER2 + #if defined AD_MD_HPP + # include AD_MD_HPP +@@ -84,6 +96,12 @@ + #elif defined TARGET_ARCH_MODEL_ppc_64 + # include "adfiles/ad_ppc_64.hpp" + #endif ++#ifdef TARGET_ARCH_MODEL_mips_64 ++# include "adfiles/ad_mips_64.hpp" ++#endif ++#ifdef TARGET_ARCH_MODEL_loongarch_64 ++# include "adfiles/ad_loongarch_64.hpp" ++#endif + #endif // COMPILER2 + + PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC +diff --git a/hotspot/src/share/vm/runtime/dtraceJSDT.hpp b/hotspot/src/share/vm/runtime/dtraceJSDT.hpp +index db568def34..490c5f5a4e 100644 +--- a/hotspot/src/share/vm/runtime/dtraceJSDT.hpp ++++ b/hotspot/src/share/vm/runtime/dtraceJSDT.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_RUNTIME_DTRACEJSDT_HPP + #define SHARE_VM_RUNTIME_DTRACEJSDT_HPP + +@@ -44,6 +50,12 @@ + #ifdef TARGET_ARCH_ppc + # include "nativeInst_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "nativeInst_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "nativeInst_loongarch.hpp" ++#endif + + class RegisteredProbes; + typedef jlong OpaqueProbes; +diff --git a/hotspot/src/share/vm/runtime/frame.cpp b/hotspot/src/share/vm/runtime/frame.cpp +index 338b7ad3a7..5a161133ba 100644 +--- a/hotspot/src/share/vm/runtime/frame.cpp ++++ b/hotspot/src/share/vm/runtime/frame.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "compiler/abstractCompiler.hpp" + #include "compiler/disassembler.hpp" +@@ -64,6 +70,13 @@ + #ifdef TARGET_ARCH_ppc + # include "nativeInst_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "nativeInst_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "nativeInst_loongarch.hpp" ++#endif ++ + + PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC + +diff --git a/hotspot/src/share/vm/runtime/frame.hpp b/hotspot/src/share/vm/runtime/frame.hpp +index 2d80ecc208..4a9e6edb54 100644 +--- a/hotspot/src/share/vm/runtime/frame.hpp ++++ b/hotspot/src/share/vm/runtime/frame.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_RUNTIME_FRAME_HPP + #define SHARE_VM_RUNTIME_FRAME_HPP + +@@ -45,6 +51,10 @@ + # include "adfiles/adGlobals_zero.hpp" + #elif defined TARGET_ARCH_MODEL_ppc_64 + # include "adfiles/adGlobals_ppc_64.hpp" ++#elif defined TARGET_ARCH_MODEL_mips_64 ++# include "adfiles/adGlobals_mips_64.hpp" ++#elif defined TARGET_ARCH_MODEL_loongarch_64 ++# include "adfiles/adGlobals_loongarch_64.hpp" + #endif + #endif // COMPILER2 + #ifdef TARGET_ARCH_zero +@@ -489,6 +499,12 @@ class frame VALUE_OBJ_CLASS_SPEC { + #ifdef TARGET_ARCH_x86 + # include "frame_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "frame_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "frame_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "frame_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/runtime/frame.inline.hpp b/hotspot/src/share/vm/runtime/frame.inline.hpp +index 710b82306a..704cc8df8f 100644 +--- a/hotspot/src/share/vm/runtime/frame.inline.hpp ++++ b/hotspot/src/share/vm/runtime/frame.inline.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_RUNTIME_FRAME_INLINE_HPP + #define SHARE_VM_RUNTIME_FRAME_INLINE_HPP + +@@ -49,6 +55,12 @@ + #ifdef TARGET_ARCH_ppc + # include "jniTypes_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "jniTypes_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "jniTypes_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_zero + # include "entryFrame_zero.hpp" + # include "fakeStubFrame_zero.hpp" +@@ -115,6 +127,12 @@ inline oop* frame::interpreter_frame_temp_oop_addr() const { + #ifdef TARGET_ARCH_ppc + # include "frame_ppc.inline.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "frame_mips.inline.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "frame_loongarch.inline.hpp" ++#endif + + + #endif // SHARE_VM_RUNTIME_FRAME_INLINE_HPP +diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp +index 23ce8af569..f36137aabf 100644 +--- a/hotspot/src/share/vm/runtime/globals.hpp ++++ b/hotspot/src/share/vm/runtime/globals.hpp +@@ -55,6 +55,12 @@ + #ifdef TARGET_ARCH_ppc + # include "globals_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "globals_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "globals_loongarch.hpp" ++#endif + #ifdef TARGET_OS_FAMILY_linux + # include "globals_linux.hpp" + #endif +@@ -79,6 +85,12 @@ + #ifdef TARGET_OS_ARCH_linux_sparc + # include "globals_linux_sparc.hpp" + #endif ++#ifdef TARGET_OS_ARCH_linux_mips ++# include "globals_linux_mips.hpp" ++#endif ++#ifdef TARGET_OS_ARCH_linux_loongarch ++# include "globals_linux_loongarch.hpp" ++#endif + #ifdef TARGET_OS_ARCH_linux_zero + # include "globals_linux_zero.hpp" + #endif +@@ -116,6 +128,12 @@ + #ifdef TARGET_ARCH_sparc + # include "c1_globals_sparc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "c1_globals_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "c1_globals_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_arm + # include "c1_globals_arm.hpp" + #endif +@@ -148,6 +166,12 @@ + #ifdef TARGET_ARCH_sparc + # include "c2_globals_sparc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "c2_globals_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "c2_globals_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_arm + # include "c2_globals_arm.hpp" + #endif +@@ -3209,7 +3233,7 @@ class CommandLineFlags { + product(uintx, InitialHeapSize, 0, \ + "Initial heap size (in bytes); zero means use ergonomics") \ + \ +- product(uintx, MaxHeapSize, ScaleForWordSize(96*M), \ ++ product(uintx, MaxHeapSize, ScaleForWordSize(MIPS64_ONLY(1500) NOT_MIPS64(96) *M), \ + "Maximum heap size (in bytes)") \ + \ + product(uintx, OldSize, ScaleForWordSize(4*M), \ +diff --git a/hotspot/src/share/vm/runtime/icache.hpp b/hotspot/src/share/vm/runtime/icache.hpp +index ba81a06ff5..9c0cfdb7d7 100644 +--- a/hotspot/src/share/vm/runtime/icache.hpp ++++ b/hotspot/src/share/vm/runtime/icache.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_RUNTIME_ICACHE_HPP + #define SHARE_VM_RUNTIME_ICACHE_HPP + +@@ -86,7 +92,12 @@ class AbstractICache : AllStatic { + #ifdef TARGET_ARCH_ppc + # include "icache_ppc.hpp" + #endif +- ++#ifdef TARGET_ARCH_mips ++# include "icache_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "icache_loongarch.hpp" ++#endif + + + class ICacheStubGenerator : public StubCodeGenerator { +diff --git a/hotspot/src/share/vm/runtime/java.cpp b/hotspot/src/share/vm/runtime/java.cpp +index 0a263b017c..9ba0decaae 100644 +--- a/hotspot/src/share/vm/runtime/java.cpp ++++ b/hotspot/src/share/vm/runtime/java.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "classfile/classLoader.hpp" + #include "classfile/symbolTable.hpp" +@@ -84,6 +90,12 @@ + #ifdef TARGET_ARCH_ppc + # include "vm_version_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "vm_version_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "vm_version_loongarch.hpp" ++#endif + #if INCLUDE_ALL_GCS + #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp" + #include "gc_implementation/parallelScavenge/psScavenge.hpp" +diff --git a/hotspot/src/share/vm/runtime/javaCalls.hpp b/hotspot/src/share/vm/runtime/javaCalls.hpp +index 6126bbe75e..1747e2b2ee 100644 +--- a/hotspot/src/share/vm/runtime/javaCalls.hpp ++++ b/hotspot/src/share/vm/runtime/javaCalls.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_RUNTIME_JAVACALLS_HPP + #define SHARE_VM_RUNTIME_JAVACALLS_HPP + +@@ -49,6 +55,12 @@ + #ifdef TARGET_ARCH_ppc + # include "jniTypes_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "jniTypes_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "jniTypes_loongarch.hpp" ++#endif + + // A JavaCallWrapper is constructed before each JavaCall and destructed after the call. + // Its purpose is to allocate/deallocate a new handle block and to save/restore the last +diff --git a/hotspot/src/share/vm/runtime/javaFrameAnchor.hpp b/hotspot/src/share/vm/runtime/javaFrameAnchor.hpp +index 129a01e293..c2b1b2e6c3 100644 +--- a/hotspot/src/share/vm/runtime/javaFrameAnchor.hpp ++++ b/hotspot/src/share/vm/runtime/javaFrameAnchor.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_RUNTIME_JAVAFRAMEANCHOR_HPP + #define SHARE_VM_RUNTIME_JAVAFRAMEANCHOR_HPP + +@@ -80,6 +86,12 @@ friend class JavaCallWrapper; + #ifdef TARGET_ARCH_x86 + # include "javaFrameAnchor_x86.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "javaFrameAnchor_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "javaFrameAnchor_loongarch.hpp" ++#endif + #ifdef TARGET_ARCH_aarch64 + # include "javaFrameAnchor_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/runtime/os.cpp b/hotspot/src/share/vm/runtime/os.cpp +index 96eed03670..28c78409e7 100644 +--- a/hotspot/src/share/vm/runtime/os.cpp ++++ b/hotspot/src/share/vm/runtime/os.cpp +@@ -1122,7 +1122,8 @@ bool os::is_first_C_frame(frame* fr) { + + uintptr_t old_fp = (uintptr_t)fr->link(); + if ((old_fp & fp_align_mask) != 0) return true; +- if (old_fp == 0 || old_fp == (uintptr_t)-1 || old_fp == ufp) return true; ++ // The check for old_fp and ufp is harmful on MIPS due to its special ABI. ++ if (old_fp == 0 || old_fp == (uintptr_t)-1 NOT_MIPS64(|| old_fp == ufp)) return true; + + // stack grows downwards; if old_fp is below current fp or if the stack + // frame is too large, either the stack is corrupted or fp is not saved +diff --git a/hotspot/src/share/vm/runtime/os.hpp b/hotspot/src/share/vm/runtime/os.hpp +index 836c231b03..0ca6e64598 100644 +--- a/hotspot/src/share/vm/runtime/os.hpp ++++ b/hotspot/src/share/vm/runtime/os.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_RUNTIME_OS_HPP + #define SHARE_VM_RUNTIME_OS_HPP + +@@ -857,6 +863,12 @@ class os: AllStatic { + #ifdef TARGET_OS_ARCH_linux_x86 + # include "os_linux_x86.hpp" + #endif ++#ifdef TARGET_OS_ARCH_linux_mips ++# include "os_linux_mips.hpp" ++#endif ++#ifdef TARGET_OS_ARCH_linux_loongarch ++# include "os_linux_loongarch.hpp" ++#endif + #ifdef TARGET_OS_ARCH_linux_aarch64 + # include "os_linux_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/runtime/prefetch.inline.hpp b/hotspot/src/share/vm/runtime/prefetch.inline.hpp +index f4e30de34d..fec16f842c 100644 +--- a/hotspot/src/share/vm/runtime/prefetch.inline.hpp ++++ b/hotspot/src/share/vm/runtime/prefetch.inline.hpp +@@ -46,6 +46,12 @@ + #ifdef TARGET_OS_ARCH_linux_ppc + # include "prefetch_linux_ppc.inline.hpp" + #endif ++#ifdef TARGET_OS_ARCH_linux_mips ++# include "prefetch_linux_mips.inline.hpp" ++#endif ++#ifdef TARGET_OS_ARCH_linux_loongarch ++# include "prefetch_linux_loongarch.inline.hpp" ++#endif + + // Solaris + #ifdef TARGET_OS_ARCH_solaris_x86 +diff --git a/hotspot/src/share/vm/runtime/registerMap.hpp b/hotspot/src/share/vm/runtime/registerMap.hpp +index 67ef212d65..1e26dfcba4 100644 +--- a/hotspot/src/share/vm/runtime/registerMap.hpp ++++ b/hotspot/src/share/vm/runtime/registerMap.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_RUNTIME_REGISTERMAP_HPP + #define SHARE_VM_RUNTIME_REGISTERMAP_HPP + +@@ -45,6 +51,12 @@ + #ifdef TARGET_ARCH_ppc + # include "register_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "register_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "register_loongarch.hpp" ++#endif + + class JavaThread; + +@@ -156,6 +168,12 @@ class RegisterMap : public StackObj { + #ifdef TARGET_ARCH_ppc + # include "registerMap_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "registerMap_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "registerMap_loongarch.hpp" ++#endif + + }; + +diff --git a/hotspot/src/share/vm/runtime/relocator.hpp b/hotspot/src/share/vm/runtime/relocator.hpp +index bb19c75fe6..53f3c9f6bd 100644 +--- a/hotspot/src/share/vm/runtime/relocator.hpp ++++ b/hotspot/src/share/vm/runtime/relocator.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_RUNTIME_RELOCATOR_HPP + #define SHARE_VM_RUNTIME_RELOCATOR_HPP + +@@ -45,6 +51,12 @@ + #ifdef TARGET_ARCH_ppc + # include "bytes_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "bytes_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "bytes_loongarch.hpp" ++#endif + + // This code has been converted from the 1.1E java virtual machine + // Thanks to the JavaTopics group for using the code +diff --git a/hotspot/src/share/vm/runtime/safepoint.cpp b/hotspot/src/share/vm/runtime/safepoint.cpp +index 440617c802..be0e4dd13c 100644 +--- a/hotspot/src/share/vm/runtime/safepoint.cpp ++++ b/hotspot/src/share/vm/runtime/safepoint.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "classfile/symbolTable.hpp" + #include "classfile/systemDictionary.hpp" +@@ -78,6 +84,14 @@ + # include "nativeInst_ppc.hpp" + # include "vmreg_ppc.inline.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "nativeInst_mips.hpp" ++# include "vmreg_mips.inline.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "nativeInst_loongarch.hpp" ++# include "vmreg_loongarch.inline.hpp" ++#endif + #if INCLUDE_ALL_GCS + #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp" + #include "gc_implementation/shared/suspendibleThreadSet.hpp" +diff --git a/hotspot/src/share/vm/runtime/sharedRuntime.cpp b/hotspot/src/share/vm/runtime/sharedRuntime.cpp +index 5f540247f9..abcd6066b9 100644 +--- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp ++++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "classfile/systemDictionary.hpp" + #include "classfile/vmSymbols.hpp" +@@ -82,6 +88,15 @@ + # include "nativeInst_ppc.hpp" + # include "vmreg_ppc.inline.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "nativeInst_mips.hpp" ++# include "vmreg_mips.inline.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "nativeInst_loongarch.hpp" ++# include "vmreg_loongarch.inline.hpp" ++#endif ++ + #ifdef COMPILER1 + #include "c1/c1_Runtime1.hpp" + #endif +@@ -220,7 +235,6 @@ void SharedRuntime::print_ic_miss_histogram() { + } + } + #endif // PRODUCT +- + #if INCLUDE_ALL_GCS + + // G1 write-barrier pre: executed before a pointer store. +diff --git a/hotspot/src/share/vm/runtime/sharedRuntimeTrig.cpp b/hotspot/src/share/vm/runtime/sharedRuntimeTrig.cpp +index 37880d8a5c..3987880b16 100644 +--- a/hotspot/src/share/vm/runtime/sharedRuntimeTrig.cpp ++++ b/hotspot/src/share/vm/runtime/sharedRuntimeTrig.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020, These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "prims/jni.h" + #include "runtime/interfaceSupport.hpp" +@@ -534,6 +540,15 @@ static SAFEBUF int __ieee754_rem_pio2(double x, double *y) { + * then 3 2 + * sin(x) = x + (S1*x + (x *(r-y/2)+y)) + */ ++#if defined(MIPS) || defined(LOONGARCH) ++// TODO: LA ++#undef S1 ++#undef S2 ++#undef S3 ++#undef S4 ++#undef S5 ++#undef S6 ++#endif + + static const double + S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ +diff --git a/hotspot/src/share/vm/runtime/stackValueCollection.cpp b/hotspot/src/share/vm/runtime/stackValueCollection.cpp +index 8774768311..fe81c1bfd8 100644 +--- a/hotspot/src/share/vm/runtime/stackValueCollection.cpp ++++ b/hotspot/src/share/vm/runtime/stackValueCollection.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "runtime/stackValueCollection.hpp" + #ifdef TARGET_ARCH_x86 +@@ -42,6 +48,12 @@ + #ifdef TARGET_ARCH_ppc + # include "jniTypes_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "jniTypes_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "jniTypes_loongarch.hpp" ++#endif + + PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC + +diff --git a/hotspot/src/share/vm/runtime/statSampler.cpp b/hotspot/src/share/vm/runtime/statSampler.cpp +index 41f469622f..3b43089062 100644 +--- a/hotspot/src/share/vm/runtime/statSampler.cpp ++++ b/hotspot/src/share/vm/runtime/statSampler.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020 Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "classfile/systemDictionary.hpp" + #include "classfile/vmSymbols.hpp" +@@ -51,6 +57,12 @@ + #ifdef TARGET_ARCH_ppc + # include "vm_version_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "vm_version_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "vm_version_loongarch.hpp" ++#endif + + // -------------------------------------------------------- + // StatSamplerTask +diff --git a/hotspot/src/share/vm/runtime/stubRoutines.hpp b/hotspot/src/share/vm/runtime/stubRoutines.hpp +index e18b9127df..9bf933762a 100644 +--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp ++++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_RUNTIME_STUBROUTINES_HPP + #define SHARE_VM_RUNTIME_STUBROUTINES_HPP + +@@ -49,6 +55,12 @@ + #ifdef TARGET_ARCH_ppc + # include "nativeInst_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "nativeInst_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "nativeInst_loongarch.hpp" ++#endif + + // StubRoutines provides entry points to assembly routines used by + // compiled code and the run-time system. Platform-specific entry +@@ -116,6 +128,10 @@ class StubRoutines: AllStatic { + # include "stubRoutines_zero.hpp" + #elif defined TARGET_ARCH_MODEL_ppc_64 + # include "stubRoutines_ppc_64.hpp" ++#elif defined TARGET_ARCH_MODEL_mips_64 ++# include "stubRoutines_mips_64.hpp" ++#elif defined TARGET_ARCH_MODEL_loongarch_64 ++# include "stubRoutines_loongarch_64.hpp" + #endif + + static jint _verify_oop_count; +diff --git a/hotspot/src/share/vm/runtime/thread.cpp b/hotspot/src/share/vm/runtime/thread.cpp +index e6586c40cb..3db678ff48 100644 +--- a/hotspot/src/share/vm/runtime/thread.cpp ++++ b/hotspot/src/share/vm/runtime/thread.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "classfile/classLoader.hpp" + #include "classfile/javaClasses.hpp" +diff --git a/hotspot/src/share/vm/runtime/thread.hpp b/hotspot/src/share/vm/runtime/thread.hpp +index 1c19ab7290..aa69217eef 100644 +--- a/hotspot/src/share/vm/runtime/thread.hpp ++++ b/hotspot/src/share/vm/runtime/thread.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_RUNTIME_THREAD_HPP + #define SHARE_VM_RUNTIME_THREAD_HPP + +@@ -1711,6 +1717,12 @@ public: + #ifdef TARGET_OS_ARCH_linux_x86 + # include "thread_linux_x86.hpp" + #endif ++#ifdef TARGET_OS_ARCH_linux_mips ++# include "thread_linux_mips.hpp" ++#endif ++#ifdef TARGET_OS_ARCH_linux_loongarch ++# include "thread_linux_loongarch.hpp" ++#endif + #ifdef TARGET_OS_ARCH_linux_aarch64 + # include "thread_linux_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/runtime/threadLocalStorage.hpp b/hotspot/src/share/vm/runtime/threadLocalStorage.hpp +index 58c1afc810..0938b2edda 100644 +--- a/hotspot/src/share/vm/runtime/threadLocalStorage.hpp ++++ b/hotspot/src/share/vm/runtime/threadLocalStorage.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_RUNTIME_THREADLOCALSTORAGE_HPP + #define SHARE_VM_RUNTIME_THREADLOCALSTORAGE_HPP + +@@ -51,6 +57,12 @@ class ThreadLocalStorage : AllStatic { + #ifdef TARGET_OS_ARCH_linux_x86 + # include "threadLS_linux_x86.hpp" + #endif ++#ifdef TARGET_OS_ARCH_linux_mips ++# include "threadLS_linux_mips.hpp" ++#endif ++#ifdef TARGET_OS_ARCH_linux_loongarch ++# include "threadLS_linux_loongarch.hpp" ++#endif + #ifdef TARGET_OS_ARCH_linux_aarch64 + # include "threadLS_linux_aarch64.hpp" + #endif +diff --git a/hotspot/src/share/vm/runtime/virtualspace.cpp b/hotspot/src/share/vm/runtime/virtualspace.cpp +index 66392b75f1..5ced38d838 100644 +--- a/hotspot/src/share/vm/runtime/virtualspace.cpp ++++ b/hotspot/src/share/vm/runtime/virtualspace.cpp +@@ -1,5 +1,6 @@ + /* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -147,6 +148,15 @@ void ReservedSpace::initialize(size_t size, size_t alignment, bool large, + bool special = large && !os::can_commit_large_page_memory(); + char* base = NULL; + ++#if defined MIPS && !defined ZERO ++ size_t opt_reg_addr = 5 * os::Linux::page_size(); ++ static int code_cache_init_flag = 1; ++ if (UseCodeCacheAllocOpt && code_cache_init_flag && executable) { ++ code_cache_init_flag = 0; ++ requested_address = (char*) opt_reg_addr; ++ } ++#endif ++ + if (requested_address != 0) { + requested_address -= noaccess_prefix; // adjust requested address + assert(requested_address != NULL, "huge noaccess prefix?"); +@@ -193,6 +203,12 @@ void ReservedSpace::initialize(size_t size, size_t alignment, bool large, + if (failed_to_reserve_as_requested(base, requested_address, size, false)) { + // OS ignored requested address. Try different address. + base = NULL; ++#if defined MIPS && !defined ZERO ++ if (UseCodeCacheAllocOpt && requested_address == (char*) opt_reg_addr) { ++ requested_address = NULL; ++ base = os::reserve_memory(size, NULL, alignment); ++ } ++#endif + } + } else { + base = os::reserve_memory(size, NULL, alignment); +diff --git a/hotspot/src/share/vm/runtime/vmStructs.cpp b/hotspot/src/share/vm/runtime/vmStructs.cpp +index 32e3921b2b..c6cc4c4329 100644 +--- a/hotspot/src/share/vm/runtime/vmStructs.cpp ++++ b/hotspot/src/share/vm/runtime/vmStructs.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "classfile/dictionary.hpp" + #include "classfile/javaClasses.hpp" +@@ -122,6 +128,12 @@ + #ifdef TARGET_ARCH_ppc + # include "vmStructs_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "vmStructs_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "vmStructs_loongarch.hpp" ++#endif + #ifdef TARGET_OS_ARCH_linux_x86 + # include "vmStructs_linux_x86.hpp" + #endif +@@ -149,6 +161,12 @@ + #ifdef TARGET_OS_ARCH_linux_ppc + # include "vmStructs_linux_ppc.hpp" + #endif ++#ifdef TARGET_OS_ARCH_linux_mips ++# include "vmStructs_linux_mips.hpp" ++#endif ++#ifdef TARGET_OS_ARCH_linux_loongarch ++# include "vmStructs_linux_loongarch.hpp" ++#endif + #ifdef TARGET_OS_ARCH_aix_ppc + # include "vmStructs_aix_ppc.hpp" + #endif +@@ -208,6 +226,10 @@ + # include "adfiles/adGlobals_zero.hpp" + #elif defined TARGET_ARCH_MODEL_ppc_64 + # include "adfiles/adGlobals_ppc_64.hpp" ++#elif defined TARGET_ARCH_MODEL_mips_64 ++# include "adfiles/adGlobals_mips_64.hpp" ++#elif defined TARGET_ARCH_MODEL_loongarch_64 ++# include "adfiles/adGlobals_loongarch_64.hpp" + #endif + #endif // COMPILER2 + +diff --git a/hotspot/src/share/vm/runtime/vm_version.cpp b/hotspot/src/share/vm/runtime/vm_version.cpp +index 91f9c70f5a..d8dcfcfcca 100644 +--- a/hotspot/src/share/vm/runtime/vm_version.cpp ++++ b/hotspot/src/share/vm/runtime/vm_version.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "memory/universe.hpp" + #include "oops/oop.inline.hpp" +@@ -44,6 +50,12 @@ + #ifdef TARGET_ARCH_ppc + # include "vm_version_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "vm_version_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "vm_version_loongarch.hpp" ++#endif + + const char* Abstract_VM_Version::_s_vm_release = Abstract_VM_Version::vm_release(); + const char* Abstract_VM_Version::_s_internal_vm_info_string = Abstract_VM_Version::internal_vm_info_string(); +@@ -193,6 +205,14 @@ const char* Abstract_VM_Version::jre_release_version() { + #else + #define CPU "ppc64" + #endif ++#elif defined(MIPS64) ++#if defined(VM_LITTLE_ENDIAN) ++#define CPU "mips64el" ++#else ++#define CPU "mips64" ++#endif ++#elif defined(LOONGARCH64) ++#define CPU "loongarch64" + #else + #define CPU IA32_ONLY("x86") \ + IA64_ONLY("ia64") \ +diff --git a/hotspot/src/share/vm/utilities/copy.hpp b/hotspot/src/share/vm/utilities/copy.hpp +index c1d82c7083..1279319a17 100644 +--- a/hotspot/src/share/vm/utilities/copy.hpp ++++ b/hotspot/src/share/vm/utilities/copy.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_UTILITIES_COPY_HPP + #define SHARE_VM_UTILITIES_COPY_HPP + +@@ -331,6 +337,27 @@ class Copy : AllStatic { + #endif + } + ++ ++ // SAPJVM AS 2011-09-20. Template for atomic copy. ++ template static void copy_conjoint_atomic(T* from, T* to, size_t count) ++ { ++ if (from > to) { ++ while (count-- > 0) { ++ // Copy forwards ++ *to++ = *from++; ++ } ++ } else { ++ from += count - 1; ++ to += count - 1; ++ while (count-- > 0) { ++ // Copy backwards ++ *to-- = *from--; ++ } ++ } ++ } ++ ++ ++ + // Platform dependent implementations of the above methods. + #ifdef TARGET_ARCH_x86 + # include "copy_x86.hpp" +@@ -350,6 +377,13 @@ class Copy : AllStatic { + #ifdef TARGET_ARCH_ppc + # include "copy_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "copy_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "copy_loongarch.hpp" ++#endif ++ + + }; + +diff --git a/hotspot/src/share/vm/utilities/debug.cpp b/hotspot/src/share/vm/utilities/debug.cpp +index 58a32a2b83..1026585f84 100644 +--- a/hotspot/src/share/vm/utilities/debug.cpp ++++ b/hotspot/src/share/vm/utilities/debug.cpp +@@ -690,6 +690,7 @@ void help() { + tty->print_cr(" pns($sp, $ebp, $pc) on Linux/x86 or"); + tty->print_cr(" pns($sp, $fp, $pc) on Linux/AArch64 or"); + tty->print_cr(" pns($sp, 0, $pc) on Linux/ppc64 or"); ++ tty->print_cr(" pns($sp, $s8, $pc) on Linux/mips or"); + tty->print_cr(" pns($sp + 0x7ff, 0, $pc) on Solaris/SPARC"); + tty->print_cr(" - in gdb do 'set overload-resolution off' before calling pns()"); + tty->print_cr(" - in dbx do 'frame 1' before calling pns()"); +diff --git a/hotspot/src/share/vm/utilities/globalDefinitions.hpp b/hotspot/src/share/vm/utilities/globalDefinitions.hpp +index 81866b8409..61fc0c48a2 100644 +--- a/hotspot/src/share/vm/utilities/globalDefinitions.hpp ++++ b/hotspot/src/share/vm/utilities/globalDefinitions.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_UTILITIES_GLOBALDEFINITIONS_HPP + #define SHARE_VM_UTILITIES_GLOBALDEFINITIONS_HPP + +@@ -455,6 +461,12 @@ enum RTMState { + #ifdef TARGET_ARCH_ppc + # include "globalDefinitions_ppc.hpp" + #endif ++#ifdef TARGET_ARCH_mips ++# include "globalDefinitions_mips.hpp" ++#endif ++#ifdef TARGET_ARCH_loongarch ++# include "globalDefinitions_loongarch.hpp" ++#endif + + /* + * If a platform does not support native stack walking +diff --git a/hotspot/src/share/vm/utilities/macros.hpp b/hotspot/src/share/vm/utilities/macros.hpp +index 599e1074de..41ef06e27f 100644 +--- a/hotspot/src/share/vm/utilities/macros.hpp ++++ b/hotspot/src/share/vm/utilities/macros.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2020. These ++ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_UTILITIES_MACROS_HPP + #define SHARE_VM_UTILITIES_MACROS_HPP + +@@ -373,6 +379,30 @@ + #define NOT_SPARC(code) code + #endif + ++#ifdef MIPS64 ++#ifndef MIPS ++#define MIPS ++#endif ++#define MIPS64_ONLY(code) code ++#define NOT_MIPS64(code) ++#else ++#undef MIPS ++#define MIPS64_ONLY(code) ++#define NOT_MIPS64(code) code ++#endif ++ ++#ifdef LOONGARCH64 ++#ifndef LOONGARCH ++#define LOONGARCH ++#endif ++#define LOONGARCH64_ONLY(code) code ++#define NOT_LOONGARCH64(code) ++#else ++#undef LOONGARCH ++#define LOONGARCH64_ONLY(code) ++#define NOT_LOONGARCH64(code) code ++#endif ++ + #if defined(PPC32) || defined(PPC64) + #ifndef PPC + #define PPC +diff --git a/hotspot/src/share/vm/utilities/taskqueue.hpp b/hotspot/src/share/vm/utilities/taskqueue.hpp +index bc06caccb4..46be35a325 100644 +--- a/hotspot/src/share/vm/utilities/taskqueue.hpp ++++ b/hotspot/src/share/vm/utilities/taskqueue.hpp +@@ -121,11 +121,22 @@ protected: + Age(const Age& age) { _data = age._data; } + Age(idx_t top, idx_t tag) { _fields._top = top; _fields._tag = tag; } + ++#if !defined MIPS && !defined LOONGARCH + Age get() const volatile { return _data; } + void set(Age age) volatile { _data = age._data; } + + idx_t top() const volatile { return _fields._top; } + idx_t tag() const volatile { return _fields._tag; } ++#else ++ Age get() const volatile { ++ size_t res = OrderAccess::load_ptr_acquire((volatile intptr_t*) &_data); ++ return *(Age*)(&res); ++ } ++ void set(Age age) volatile { OrderAccess::release_store_ptr((volatile intptr_t*) &_data, *(size_t*)(&age._data)); } ++ ++ idx_t top() const volatile { return OrderAccess::load_acquire((volatile idx_t*) &(_fields._top)); } ++ idx_t tag() const volatile { return OrderAccess::load_acquire((volatile idx_t*) &(_fields._tag)); } ++#endif + + // Increment top; if it wraps, increment tag also. + void increment() { +@@ -195,23 +206,50 @@ protected: + public: + TaskQueueSuper() : _bottom(0), _age() {} + ++#if defined MIPS || defined LOONGARCH ++ inline uint get_bottom() const { ++ return OrderAccess::load_acquire((volatile juint*)&_bottom); ++ } ++ ++ inline void set_bottom(uint new_bottom) { ++ OrderAccess::release_store(&_bottom, new_bottom); ++ } ++#endif + // Return true if the TaskQueue contains/does not contain any tasks. +- bool peek() const { return _bottom != _age.top(); } ++ bool peek() const { ++#if defined MIPS || defined LOONGARCH ++ return get_bottom() != _age.top(); ++#else ++ return _bottom != _age.top(); ++#endif ++ } + bool is_empty() const { return size() == 0; } + + // Return an estimate of the number of elements in the queue. + // The "careful" version admits the possibility of pop_local/pop_global + // races. + uint size() const { ++#if defined MIPS || defined LOONGARCH ++ return size(get_bottom(), _age.top()); ++#else + return size(_bottom, _age.top()); ++#endif + } + + uint dirty_size() const { ++#if defined MIPS || defined LOONGARCH ++ return dirty_size(get_bottom(), _age.top()); ++#else + return dirty_size(_bottom, _age.top()); ++#endif + } + + void set_empty() { ++#if defined MIPS || defined LOONGARCH ++ set_bottom(0); ++#else + _bottom = 0; ++#endif + _age.set(0); + } + +@@ -263,7 +301,9 @@ protected: + typedef typename TaskQueueSuper::Age Age; + typedef typename TaskQueueSuper::idx_t idx_t; + ++#if !defined MIPS && !defined LOONGARCH + using TaskQueueSuper::_bottom; ++#endif + using TaskQueueSuper::_age; + using TaskQueueSuper::increment_index; + using TaskQueueSuper::decrement_index; +@@ -327,7 +367,11 @@ template + void GenericTaskQueue::oops_do(OopClosure* f) { + // tty->print_cr("START OopTaskQueue::oops_do"); + uint iters = size(); ++#if defined MIPS || defined LOONGARCH ++ uint index = this->get_bottom(); ++#else + uint index = _bottom; ++#endif + for (uint i = 0; i < iters; ++i) { + index = decrement_index(index); + // tty->print_cr(" doing entry %d," INTPTR_T " -> " INTPTR_T, +@@ -345,14 +389,22 @@ template + bool GenericTaskQueue::push_slow(E t, uint dirty_n_elems) { + if (dirty_n_elems == N - 1) { + // Actually means 0, so do the push. ++#if defined MIPS || defined LOONGARCH ++ uint localBot = this->get_bottom(); ++#else + uint localBot = _bottom; ++#endif + // g++ complains if the volatile result of the assignment is + // unused, so we cast the volatile away. We cannot cast directly + // to void, because gcc treats that as not using the result of the + // assignment. However, casting to E& means that we trigger an + // unused-value warning. So, we cast the E& to void. + (void)const_cast(_elems[localBot] = t); ++#if defined MIPS || defined LOONGARCH ++ this->set_bottom(increment_index(localBot)); ++#else + OrderAccess::release_store(&_bottom, increment_index(localBot)); ++#endif + TASKQUEUE_STATS_ONLY(stats.record_push()); + return true; + } +@@ -407,7 +459,11 @@ bool GenericTaskQueue::pop_global(volatile E& t) { + #if !(defined SPARC || defined IA32 || defined AMD64) + OrderAccess::fence(); + #endif ++#if defined MIPS || defined LOONGARCH ++ uint localBot = this->get_bottom(); ++#else + uint localBot = OrderAccess::load_acquire((volatile juint*)&_bottom); ++#endif + uint n_elems = size(localBot, oldAge.top()); + if (n_elems == 0) { + return false; +@@ -662,7 +718,11 @@ public: + + template inline bool + GenericTaskQueue::push(E t) { ++#if defined MIPS || defined LOONGARCH ++ uint localBot = this->get_bottom(); ++#else + uint localBot = _bottom; ++#endif + assert(localBot < N, "_bottom out of range."); + idx_t top = _age.top(); + uint dirty_n_elems = dirty_size(localBot, top); +@@ -674,7 +734,11 @@ GenericTaskQueue::push(E t) { + // assignment. However, casting to E& means that we trigger an + // unused-value warning. So, we cast the E& to void. + (void) const_cast(_elems[localBot] = t); ++#if defined MIPS || defined LOONGARCH ++ this->set_bottom(increment_index(localBot)); ++#else + OrderAccess::release_store(&_bottom, increment_index(localBot)); ++#endif + TASKQUEUE_STATS_ONLY(stats.record_push()); + return true; + } else { +@@ -684,7 +748,11 @@ GenericTaskQueue::push(E t) { + + template inline bool + GenericTaskQueue::pop_local(volatile E& t) { ++#if defined MIPS || defined LOONGARCH ++ uint localBot = this->get_bottom(); ++#else + uint localBot = _bottom; ++#endif + // This value cannot be N-1. That can only occur as a result of + // the assignment to bottom in this method. If it does, this method + // resets the size to 0 before the next call (which is sequential, +@@ -693,7 +761,11 @@ GenericTaskQueue::pop_local(volatile E& t) { + assert(dirty_n_elems != N - 1, "Shouldn't be possible..."); + if (dirty_n_elems == 0) return false; + localBot = decrement_index(localBot); ++#if defined MIPS || defined LOONGARCH ++ this->set_bottom(localBot); ++#else + _bottom = localBot; ++#endif + // This is necessary to prevent any read below from being reordered + // before the store just above. + OrderAccess::fence(); +diff --git a/hotspot/src/share/vm/utilities/vmError.cpp b/hotspot/src/share/vm/utilities/vmError.cpp +index fa7a32508e..7098a98a9f 100644 +--- a/hotspot/src/share/vm/utilities/vmError.cpp ++++ b/hotspot/src/share/vm/utilities/vmError.cpp +@@ -22,6 +22,13 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2018. These ++ * modifications are Copyright (c) 2018 Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ * ++*/ ++ + #include + #include "precompiled.hpp" + #include "compiler/compileBroker.hpp" +@@ -488,7 +495,12 @@ void VMError::report(outputStream* st) { + JDK_Version::runtime_name() : ""; + const char* runtime_version = JDK_Version::runtime_version() != NULL ? + JDK_Version::runtime_version() : ""; +- st->print_cr("# JRE version: %s (%s) (build %s)", runtime_name, buf, runtime_version); ++#ifdef LOONGSON_RUNTIME_NAME ++ const char* loongson_runtime_name_and_version = LOONGSON_RUNTIME_NAME; ++#else ++ const char* loongson_runtime_name_and_version = ""; ++#endif ++ st->print_cr("# JRE version: %s (%s) (build %s) (%s)", runtime_name, buf, runtime_version, loongson_runtime_name_and_version); + st->print_cr("# Java VM: %s (%s %s %s %s)", + Abstract_VM_Version::vm_name(), + Abstract_VM_Version::vm_release(), +diff --git a/hotspot/test/compiler/criticalnatives/argumentcorruption/Test8167409.sh b/hotspot/test/compiler/criticalnatives/argumentcorruption/Test8167409.sh +index fcf1d04b6a..5b8e7dcce5 100644 +--- a/hotspot/test/compiler/criticalnatives/argumentcorruption/Test8167409.sh ++++ b/hotspot/test/compiler/criticalnatives/argumentcorruption/Test8167409.sh +@@ -24,6 +24,12 @@ + # questions. + # + ++# ++# This file has been modified by Loongson Technology in 2023. These ++# modifications are Copyright (c) 2023, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + ## @test Test8167409.sh + ## @bug 8167409 + ## @summary Invalid value passed to critical JNI function +@@ -68,6 +74,18 @@ if [ $VM_CPU = "aarch64" ]; then + exit 0; + fi + ++# CriticalJNINatives is not supported for loongarch64 ++if [ $VM_CPU = "loongarch64" ]; then ++ echo "Test Passed" ++ exit 0; ++fi ++ ++# CriticalJNINatives is not supported for mips64 ++if [ $VM_CPU = "mips64" -o $VM_CPU = "mips64el" ]; then ++ echo "Test Passed" ++ exit 0; ++fi ++ + THIS_DIR=. + + cp ${TESTSRC}${FS}*.java ${THIS_DIR} +diff --git a/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java +index fa9a6f208b..885957cf1c 100644 +--- a/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java ++++ b/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java +@@ -34,11 +34,12 @@ import com.oracle.java.testlibrary.cli.predicate.OrPredicate; + public class GenericTestCaseForOtherCPU extends + SHAOptionsBase.TestCase { + public GenericTestCaseForOtherCPU(String optionName) { +- // Execute the test case on any CPU except SPARC and X86 ++ // Execute the test case on any CPU except SPARC, LoongArch64 and X86 + super(optionName, new NotPredicate(new OrPredicate(Platform::isSparc, + new OrPredicate(Platform::isAArch64, + new OrPredicate(Platform::isPPC, +- new OrPredicate(Platform::isX64, Platform::isX86)))))); ++ new OrPredicate(Platform::isLoongArch64, ++ new OrPredicate(Platform::isX64, Platform::isX86))))))); + } + + @Override +diff --git a/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +index dc8c398408..2427b2bf7b 100644 +--- a/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java ++++ b/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +@@ -62,18 +62,24 @@ public class IntrinsicPredicates { + = new OrPredicate( + new CPUSpecificPredicate("sparc.*", new String[] { "sha1" }, + null), ++ // Basic instructions are used to implement SHA1 Intrinsics on LA, so "sha1" feature is not needed. ++ new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, ++ null), + new CPUSpecificPredicate("aarch64", new String[] { "sha1" }, +- null)); ++ null))); + + public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE + = new OrPredicate(new CPUSpecificPredicate("aarch64", new String[] { "sha256" }, + null), + new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha256" }, + null), ++ // Basic instructions are used to implement SHA256 Intrinsics on LA, so "sha256" feature is not needed. ++ new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, ++ null), + new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, + null), + new CPUSpecificPredicate("ppc64le.*", new String[] { "sha" }, +- null)))); ++ null))))); + + public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE + = new OrPredicate( +diff --git a/hotspot/test/runtime/6929067/Test6929067.sh b/hotspot/test/runtime/6929067/Test6929067.sh +index 2bbb3401ce..1a5482e645 100644 +--- a/hotspot/test/runtime/6929067/Test6929067.sh ++++ b/hotspot/test/runtime/6929067/Test6929067.sh +@@ -97,6 +97,10 @@ case "$ARCH" in + i686) + ARCH=i386 + ;; ++ loongarch64) ++ COMP_FLAG="" ++ ARCH=loongarch64 ++ ;; + # Assuming other ARCH values need no translation + esac + +diff --git a/hotspot/test/runtime/Unsafe/RangeCheck.java b/hotspot/test/runtime/Unsafe/RangeCheck.java +index 9ded944cb2..4d4ea2e048 100644 +--- a/hotspot/test/runtime/Unsafe/RangeCheck.java ++++ b/hotspot/test/runtime/Unsafe/RangeCheck.java +@@ -43,6 +43,7 @@ public class RangeCheck { + true, + "-Xmx32m", + "-XX:-TransmitErrorReport", ++ "-XX:-InlineUnsafeOps", // The compiler intrinsics doesn't have the assert + DummyClassWithMainRangeCheck.class.getName()); + + OutputAnalyzer output = new OutputAnalyzer(pb.start()); +diff --git a/hotspot/test/test_env.sh b/hotspot/test/test_env.sh +index 5ba4f28c45..d9d8bb6b6b 100644 +--- a/hotspot/test/test_env.sh ++++ b/hotspot/test/test_env.sh +@@ -211,6 +211,29 @@ if [ $? = 0 ] + then + VM_CPU="aarch64" + fi ++grep "mips" vm_version.out > ${NULL} ++if [ $? = 0 ] ++then ++ VM_CPU="mips" ++ if [ $VM_BITS = "64" ] ++ then ++ VM_CPU="mips64" ++ grep "mips64el" vm_version.out > ${NULL} ++ if [ $? = 0 ] ++ then ++ VM_CPU="mips64el" ++ fi ++ fi ++fi ++grep "loongarch" vm_version.out > ${NULL} ++if [ $? = 0 ] ++then ++ VM_CPU="loongarch" ++ if [ $VM_BITS = "64" ] ++ then ++ VM_CPU="loongarch64" ++ fi ++fi + export VM_TYPE VM_BITS VM_OS VM_CPU + echo "VM_TYPE=${VM_TYPE}" + echo "VM_BITS=${VM_BITS}" +diff --git a/hotspot/test/testlibrary/com/oracle/java/testlibrary/Platform.java b/hotspot/test/testlibrary/com/oracle/java/testlibrary/Platform.java +index 6a14079347..56a6375b5f 100644 +--- a/hotspot/test/testlibrary/com/oracle/java/testlibrary/Platform.java ++++ b/hotspot/test/testlibrary/com/oracle/java/testlibrary/Platform.java +@@ -126,6 +126,10 @@ public class Platform { + return isArch("aarch64"); + } + ++ public static boolean isLoongArch64() { ++ return isArch("loongarch64"); ++ } ++ + private static boolean isArch(String archnameRE) { + return Pattern.compile(archnameRE, Pattern.CASE_INSENSITIVE) + .matcher(osArch) +@@ -136,6 +140,10 @@ public class Platform { + return osArch; + } + ++ public static boolean isMIPS() { ++ return isArch("mips.*"); ++ } ++ + /** + * Return a boolean for whether we expect to be able to attach + * the SA to our own processes on this system. +diff --git a/hotspot/test/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/hotspot/test/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +index 7d56a4a3bc..41825e18b3 100644 +--- a/hotspot/test/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java ++++ b/hotspot/test/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +@@ -43,7 +43,7 @@ import java.util.Set; + */ + public class TestMutuallyExclusivePlatformPredicates { + private static enum MethodGroup { +- ARCH("isARM", "isPPC", "isSparc", "isX86", "isX64", "isAArch64"), ++ ARCH("isARM", "isPPC", "isSparc", "isX86", "isX64", "isAArch64", "isMIPS", "isLoongArch64"), + BITNESS("is32bit", "is64bit"), + OS("isAix", "isLinux", "isSolaris", "isWindows", "isOSX"), + VM_TYPE("isClient", "isServer", "isGraal", "isMinimal"), +diff --git a/jdk/make/Images.gmk b/jdk/make/Images.gmk +index 991c0af7b4..9171685655 100644 +--- a/jdk/make/Images.gmk ++++ b/jdk/make/Images.gmk +@@ -23,6 +23,12 @@ + # questions. + # + ++# ++# This file has been modified by Loongson Technology in 2022. These ++# modifications are Copyright (c) 2018, 2022, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + include $(SPEC) + include MakeBase.gmk + include JavaCompilation.gmk +@@ -650,6 +656,11 @@ $(JDK_IMAGE_DIR)/src.zip: $(IMAGES_OUTPUTDIR)/src.zip + $(ECHO) $(LOG_INFO) Copying $(patsubst $(OUTPUT_ROOT)/%,%,$@) + $(install-file) + ++# create link "mips64el -> mips64" for deploy ++$(JDK_IMAGE_DIR)/jre/lib/mips64: $(JDK_IMAGE_DIR)/jre/lib/mips64el ++ $(ECHO) $(LOG_INFO) Create link from mips64 to mips64 ++ $(CD) $(JDK_IMAGE_DIR)/jre/lib && $(RM) mips64 && $(LN) -s mips64el mips64 ++ + ################################################################################ + # Post processing (strip etc) + +@@ -728,6 +739,14 @@ ifneq ($(POST_STRIP_CMD), ) + + endif + ++################################################################################ ++# Loongson added list, architecture dependent files ++ifeq ($(OPENJDK_TARGET_CPU), mips64) ++ ifeq ($(OPENJDK_TARGET_CPU_ENDIAN), little) ++ JDK_IMAGE_LOONGSON_LIST := $(JDK_IMAGE_DIR)/jre/lib/mips64el ++ endif ++endif ++ + ################################################################################ + + # Include the custom makefile right here, after all variables have been defined +@@ -753,6 +772,7 @@ jdk-image: $(JDK_BIN_TARGETS) $(JDKJRE_BIN_TARGETS) \ + $(JDKJRE_DOC_TARGETS) $(JDK_DOC_TARGETS) \ + $(JDK_INFO_FILE) $(JDKJRE_STRIP_LIST) $(JDK_BIN_STRIP_LIST) \ + $(JDK_IMAGE_DIR)/src.zip \ ++ $(JDK_IMAGE_LOONGSON_LIST) \ + $(JDK_BIN_ISADIR_LINK_TARGETS) $(JDKJRE_BIN_ISADIR_LINK_TARGETS) + + jre-overlay-image: $(JRE_OVERLAY_BIN_TARGETS) $(JRE_OVERLAY_LIB_TARGETS) \ +diff --git a/jdk/make/gensrc/GensrcMisc.gmk b/jdk/make/gensrc/GensrcMisc.gmk +index 0e3dee5ca3..66f19f4d25 100644 +--- a/jdk/make/gensrc/GensrcMisc.gmk ++++ b/jdk/make/gensrc/GensrcMisc.gmk +@@ -23,6 +23,12 @@ + # questions. + # + ++# ++# This file has been modified by Loongson Technology in 2018. These ++# modifications are Copyright (c) 2018, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + include ProfileNames.gmk + + ################################################################################ +@@ -39,6 +45,7 @@ $(PROFILE_VERSION_JAVA_TARGETS): \ + $(SED) -e 's/@@launcher_name@@/$(LAUNCHER_NAME)/g' \ + -e 's/@@java_version@@/$(RELEASE)/g' \ + -e 's/@@java_runtime_version@@/$(FULL_VERSION)/g' \ ++ -e 's/@@loongson_runtime_name@@/$(LOONGSON_RUNTIME_NAME)/g' \ + -e 's/@@java_runtime_name@@/$(RUNTIME_NAME)/g' \ + -e 's/@@java_profile_name@@/$(call profile_version_name, $@)/g' \ + $< > $@.tmp +diff --git a/jdk/make/lib/SoundLibraries.gmk b/jdk/make/lib/SoundLibraries.gmk +index b59a9462ec..8ce97dc854 100644 +--- a/jdk/make/lib/SoundLibraries.gmk ++++ b/jdk/make/lib/SoundLibraries.gmk +@@ -23,6 +23,12 @@ + # questions. + # + ++# ++# This file has been modified by Loongson Technology in 2021. These ++# modifications are Copyright (c) 2015, 2021, Loongson Technology, and are made ++# available on the same license terms set forth above. ++# ++ + LIBJSOUND_SRC_DIRS := \ + $(JDK_TOPDIR)/src/share/native/com/sun/media/sound \ + $(JDK_TOPDIR)/src/$(OPENJDK_TARGET_OS_API_DIR)/native/com/sun/media/sound +@@ -136,6 +142,14 @@ else + LIBJSOUND_CFLAGS += -DX_ARCH=X_PPC + endif + ++ ifeq ($(OPENJDK_TARGET_CPU), mips64) ++ LIBJSOUND_CFLAGS += -DX_ARCH=X_MIPS64 ++ endif ++ ++ ifeq ($(OPENJDK_TARGET_CPU), loongarch64) ++ LIBJSOUND_CFLAGS += -DX_ARCH=X_LOONGARCH64 ++ endif ++ + ifeq ($(OPENJDK_TARGET_CPU), ppc64) + LIBJSOUND_CFLAGS += -DX_ARCH=X_PPC64 + endif +diff --git a/jdk/src/share/classes/sun/misc/Version.java.template b/jdk/src/share/classes/sun/misc/Version.java.template +index 32e2586e79..e38541a9f7 100644 +--- a/jdk/src/share/classes/sun/misc/Version.java.template ++++ b/jdk/src/share/classes/sun/misc/Version.java.template +@@ -23,6 +23,13 @@ + * questions. + */ + ++/* ++ * This file has been modified by Loongson Technology in 2018. These ++ * modifications are Copyright (c) 2018 Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ ++ + package sun.misc; + import java.io.PrintStream; + +@@ -44,6 +51,9 @@ public class Version { + private static final String java_runtime_version = + "@@java_runtime_version@@"; + ++ private static final String loongson_runtime_name = ++ "@@loongson_runtime_name@@"; ++ + static { + init(); + } +@@ -103,7 +113,11 @@ public class Version { + + /* Second line: runtime version (ie, libraries). */ + +- ps.print(java_runtime_name + " (build " + java_runtime_version); ++ ps.print(java_runtime_name); ++ if (loongson_runtime_name.length() > 0) { ++ ps.print(" ("+ loongson_runtime_name +")"); ++ } ++ ps.print(" (build " + java_runtime_version); + + if (java_profile_name.length() > 0) { + // profile name +diff --git a/jdk/src/solaris/bin/loongarch64/jvm.cfg b/jdk/src/solaris/bin/loongarch64/jvm.cfg +new file mode 100644 +index 0000000000..42a06755da +--- /dev/null ++++ b/jdk/src/solaris/bin/loongarch64/jvm.cfg +@@ -0,0 +1,36 @@ ++# Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++# ++# This code is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License version 2 only, as ++# published by the Free Software Foundation. Oracle designates this ++# particular file as subject to the "Classpath" exception as provided ++# by Oracle in the LICENSE file that accompanied this code. ++# ++# This code is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++# version 2 for more details (a copy is included in the LICENSE file that ++# accompanied this code). ++# ++# You should have received a copy of the GNU General Public License version ++# 2 along with this work; if not, write to the Free Software Foundation, ++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++# ++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++# or visit www.oracle.com if you need additional information or have any ++# questions. ++# ++# ++# List of JVMs that can be used as an option to java, javac, etc. ++# Order is important -- first in this list is the default JVM. ++# NOTE that this both this file and its format are UNSUPPORTED and ++# WILL GO AWAY in a future release. ++# ++# You may also select a JVM in an arbitrary location with the ++# "-XXaltjvm=" option, but that too is unsupported ++# and may not be available in a future release. ++# ++-server KNOWN ++-client IGNORE +diff --git a/jdk/src/solaris/bin/mips64/jvm.cfg b/jdk/src/solaris/bin/mips64/jvm.cfg +new file mode 100644 +index 0000000000..42a06755da +--- /dev/null ++++ b/jdk/src/solaris/bin/mips64/jvm.cfg +@@ -0,0 +1,36 @@ ++# Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++# ++# This code is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License version 2 only, as ++# published by the Free Software Foundation. Oracle designates this ++# particular file as subject to the "Classpath" exception as provided ++# by Oracle in the LICENSE file that accompanied this code. ++# ++# This code is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++# version 2 for more details (a copy is included in the LICENSE file that ++# accompanied this code). ++# ++# You should have received a copy of the GNU General Public License version ++# 2 along with this work; if not, write to the Free Software Foundation, ++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++# ++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++# or visit www.oracle.com if you need additional information or have any ++# questions. ++# ++# ++# List of JVMs that can be used as an option to java, javac, etc. ++# Order is important -- first in this list is the default JVM. ++# NOTE that this both this file and its format are UNSUPPORTED and ++# WILL GO AWAY in a future release. ++# ++# You may also select a JVM in an arbitrary location with the ++# "-XXaltjvm=" option, but that too is unsupported ++# and may not be available in a future release. ++# ++-server KNOWN ++-client IGNORE +diff --git a/jdk/test/jdk/jfr/event/os/TestCPUInformation.java b/jdk/test/jdk/jfr/event/os/TestCPUInformation.java +index d6a026b2cc..b65486023f 100644 +--- a/jdk/test/jdk/jfr/event/os/TestCPUInformation.java ++++ b/jdk/test/jdk/jfr/event/os/TestCPUInformation.java +@@ -54,8 +54,8 @@ public class TestCPUInformation { + Events.assertField(event, "hwThreads").atLeast(1); + Events.assertField(event, "cores").atLeast(1); + Events.assertField(event, "sockets").atLeast(1); +- Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390"); +- Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390"); ++ Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390", "MIPS", "LoongArch"); ++ Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390", "MIPS", "LoongArch"); + } + } + } +diff --git a/jdk/test/sun/management/jmxremote/bootstrap/linux-loongarch64/launcher b/jdk/test/sun/management/jmxremote/bootstrap/linux-loongarch64/launcher +new file mode 100755 +index 0000000000..66291c7522 +Binary files /dev/null and b/jdk/test/sun/management/jmxremote/bootstrap/linux-loongarch64/launcher differ +diff --git a/jdk/test/sun/management/jmxremote/bootstrap/linux-mips64el/launcher b/jdk/test/sun/management/jmxremote/bootstrap/linux-mips64el/launcher +new file mode 100644 +index 0000000000..5c8385ca12 +Binary files /dev/null and b/jdk/test/sun/management/jmxremote/bootstrap/linux-mips64el/launcher differ +diff --git a/jdk/test/sun/security/pkcs11/PKCS11Test.java b/jdk/test/sun/security/pkcs11/PKCS11Test.java +index 5fc9c605de..9db6a17d66 100644 +--- a/jdk/test/sun/security/pkcs11/PKCS11Test.java ++++ b/jdk/test/sun/security/pkcs11/PKCS11Test.java +@@ -21,6 +21,11 @@ + * questions. + */ + ++ /* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ + + // common infrastructure for SunPKCS11 tests + +@@ -589,6 +594,9 @@ public abstract class PKCS11Test { + "/usr/lib64/"}); + osMap.put("Linux-ppc64-64", new String[]{"/usr/lib64/"}); + osMap.put("Linux-ppc64le-64", new String[]{"/usr/lib64/"}); ++ osMap.put("Linux-mips64el-64", new String[]{"/usr/lib64/"}); ++ osMap.put("Linux-loongarch64-64", new String[]{"/usr/lib/loongarch64-linux-gnu/", ++ "/usr/lib64/" }); + osMap.put("Windows-x86-32", new String[]{ + PKCS11_BASE + "/nss/lib/windows-i586/".replace('/', SEP)}); + osMap.put("Windows-amd64-64", new String[]{ diff --git a/jemalloc/PKGBUILD b/jemalloc/PKGBUILD index 2244c662c2..810d59169c 100644 --- a/jemalloc/PKGBUILD +++ b/jemalloc/PKGBUILD @@ -15,8 +15,15 @@ makedepends=('clang') options=('!lto') provides=('libjemalloc.so') optdepends=('perl: for jeprof') -source=("https://github.com/jemalloc/jemalloc/releases/download/${pkgver}/${pkgname}-${pkgver}.tar.bz2") -sha256sums=('2db82d1e7119df3e71b7640219b6dfe84789bc0537983c3b7ac4f7189aecfeaa') +source=("https://github.com/jemalloc/jemalloc/releases/download/${pkgver}/${pkgname}-${pkgver}.tar.bz2" + add-loongarch64.patch) +sha256sums=('2db82d1e7119df3e71b7640219b6dfe84789bc0537983c3b7ac4f7189aecfeaa' + '5e8f375b5d5aec487d65a7681b3574aa0e19927b1d2d9140fc9dba2cf74a961d') + +prepare() { + cd "$pkgname-$pkgver" + patch -p1 -i "$srcdir/add-loongarch64.patch" +} build() { cd $pkgname-$pkgver @@ -24,6 +31,10 @@ build() { # FS#71745: GCC-built jemalloc causes telegram-desktop to crash a lot. The reason is still not clear. export CC=clang export CXX=clang++ + CFLAGS=${CFLAGS/-mlsx /} + CXXFLAGS=${CXXFLAGS/-mlsx /} + CFLAGS=${CFLAGS/-fstack-clash-protection/} + CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/} ./configure \ --enable-prof \ diff --git a/jemalloc/add-loongarch64.patch b/jemalloc/add-loongarch64.patch new file mode 100644 index 0000000000..22af43b82a --- /dev/null +++ b/jemalloc/add-loongarch64.patch @@ -0,0 +1,14 @@ +Index: jemalloc-5.2.1/include/jemalloc/internal/quantum.h +=================================================================== +--- jemalloc-5.2.1.orig/include/jemalloc/internal/quantum.h ++++ jemalloc-5.2.1/include/jemalloc/internal/quantum.h +@@ -30,6 +30,9 @@ + # ifdef __hppa__ + # define LG_QUANTUM 4 + # endif ++# ifdef __loongarch64 ++# define LG_QUANTUM 4 ++# endif + # ifdef __m68k__ + # define LG_QUANTUM 3 + # endif diff --git a/jless/PKGBUILD b/jless/PKGBUILD index 353e2b1317..a9a35b7ffc 100644 --- a/jless/PKGBUILD +++ b/jless/PKGBUILD @@ -25,7 +25,7 @@ pkgver() { prepare() { cd "$pkgname" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/js102/PKGBUILD b/js102/PKGBUILD index 58188c24b1..93d1a698d9 100644 --- a/js102/PKGBUILD +++ b/js102/PKGBUILD @@ -31,9 +31,11 @@ options=(!lto) _relver=${pkgver}esr source=( https://archive.mozilla.org/pub/firefox/releases/$_relver/source/firefox-$_relver.source.tar.xz{,.asc} + js102-loong64-jit.patch ) b2sums=('b70727fa91d0d270673374bebb4745b87f6194191c1c9415547d772811a4a85f79a97e8985877eb5c9beef43fe15bef574172da35935e7024a947919ec11d883' - 'SKIP') + 'SKIP' + '2740b5d321f585cd60a038c1b1350c9e08c6559cc02a331ce7bc3d228bc785b066f795c8cbd2e43d46fe67687df420e20f4bb4bce40a2a2f962bc812aa170d01') validpgpkeys=( 14F26682D0916CDD81E37B6D61B7B526D98F0353 # Mozilla Software Releases ) @@ -44,6 +46,7 @@ COMPRESSZST+=(--long) prepare() { mkdir mozbuild cd firefox-$pkgver + patch -p1 -i $srcdir/js102-loong64-jit.patch cat >../mozconfig <.mozconfig ../mozconfig - <.mozconfig ../mozconfig - <.mozconfig ../mozconfig - < +# Date 1665408155 0 +# Node ID d9e0d2d8b3a89bdee2a55a0bdab9adcd108eb253 +# Parent f0bbec2617db346c3032e870fc571970728ae220 +Bug 1792981 - [loong64] Enable JIT compiler of loong64 port by default. r=jandem + +Fix a build with JIT issue on native loongarch64 machine, and enable JIT by default. + +Differential Revision: https://phabricator.services.mozilla.com/D158397 + +diff --git a/js/moz.configure b/js/moz.configure +--- a/js/moz.configure ++++ b/js/moz.configure +@@ -265,16 +265,18 @@ def jit_codegen(jit_enabled, simulator, + + if simulator: + return simulator + + if target.cpu == "aarch64": + return namespace(arm64=True) + elif target.cpu == "x86_64": + return namespace(x64=True) ++ elif target.cpu == "loongarch64": ++ return namespace(loong64=True) + + return namespace(**{str(target.cpu): True}) + + + set_config("JS_CODEGEN_NONE", jit_codegen.none) + set_config("JS_CODEGEN_ARM", jit_codegen.arm) + set_config("JS_CODEGEN_ARM64", jit_codegen.arm64) + set_config("JS_CODEGEN_MIPS32", jit_codegen.mips32) +diff --git a/js/src/wasm/WasmSignalHandlers.cpp b/js/src/wasm/WasmSignalHandlers.cpp +--- a/js/src/wasm/WasmSignalHandlers.cpp ++++ b/js/src/wasm/WasmSignalHandlers.cpp +@@ -158,10 +158,10 @@ + # define R32_sig(p) ((p)->uc_mcontext.gp_regs[32]) + # endif + # if defined(__linux__) && defined(__loongarch__) +-# define EPC_sig(p) ((p)->uc_mcontext.pc) +-# define RRA_sig(p) ((p)->uc_mcontext.gregs[1]) +-# define RSP_sig(p) ((p)->uc_mcontext.gregs[3]) +-# define RFP_sig(p) ((p)->uc_mcontext.gregs[22]) ++# define EPC_sig(p) ((p)->uc_mcontext.__pc) ++# define RRA_sig(p) ((p)->uc_mcontext.__gregs[1]) ++# define R03_sig(p) ((p)->uc_mcontext.__gregs[3]) ++# define RFP_sig(p) ((p)->uc_mcontext.__gregs[22]) + # endif + # elif defined(__NetBSD__) + # define EIP_sig(p) ((p)->uc_mcontext.__gregs[_REG_EIP]) +@@ -403,17 +403,17 @@ struct macos_aarch64_context { + # elif defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || \ + defined(__PPC64LE__) + # define PC_sig(p) R32_sig(p) + # define SP_sig(p) R01_sig(p) + # define FP_sig(p) R01_sig(p) + # elif defined(__loongarch__) + # define PC_sig(p) EPC_sig(p) + # define FP_sig(p) RFP_sig(p) +-# define SP_sig(p) RSP_sig(p) ++# define SP_sig(p) R03_sig(p) + # define LR_sig(p) RRA_sig(p) + # endif + + static void SetContextPC(CONTEXT* context, uint8_t* pc) { + # ifdef PC_sig + *reinterpret_cast(&PC_sig(context)) = pc; + # else + MOZ_CRASH(); + diff --git a/js115/PKGBUILD b/js115/PKGBUILD index 08b7ddc73d..903e6579df 100644 --- a/js115/PKGBUILD +++ b/js115/PKGBUILD @@ -16,7 +16,7 @@ depends=( makedepends=( autoconf2.13 clang - lld +# lld llvm python rust @@ -57,11 +57,12 @@ ac_add_options --enable-release ac_add_options --enable-hardening ac_add_options --enable-optimize ac_add_options --enable-rust-simd -ac_add_options --enable-linker=lld +ac_add_options --enable-linker=bfd ac_add_options --disable-bootstrap ac_add_options --disable-debug ac_add_options --disable-jemalloc ac_add_options --disable-strip +ac_add_options --disable-unified-build # System libraries ac_add_options --with-system-zlib @@ -82,11 +83,18 @@ build() { export MOZBUILD_STATE_PATH="$srcdir/mozbuild" export MOZ_BUILD_DATE="$(date -u${SOURCE_DATE_EPOCH:+d @$SOURCE_DATE_EPOCH} +%Y%m%d%H%M%S)" export MOZ_NOSPAM=1 +# clang didn't support -mlsx + CFLAGS=${CFLAGS/-mlsx /} + CXXFLAGS=${CXXFLAGS/-mlsx /} + CFLAGS=${CFLAGS/-fstack-clash-protection/} + CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/} # malloc_usable_size is used in various parts of the codebase CFLAGS="${CFLAGS/_FORTIFY_SOURCE=3/_FORTIFY_SOURCE=2}" CXXFLAGS="${CXXFLAGS/_FORTIFY_SOURCE=3/_FORTIFY_SOURCE=2}" + # LTO needs more open files + ulimit -n 4096 # Greatly reduce size of relocation tables # https://gitlab.archlinux.org/archlinux/rfcs/-/blob/master/rfcs/0023-pack-relative-relocs.rst LDFLAGS+=" -Wl,-z,pack-relative-relocs" @@ -94,40 +102,40 @@ build() { # Do 3-tier PGO echo "Building instrumented JS..." cat >.mozconfig ../mozconfig - <.mozconfig ../mozconfig - <.mozconfig ../mozconfig - < + inline void cmp32Set(Condition cond, T1 lhs, T2 rhs, Register dest) +- DEFINED_ON(x86_shared, arm, arm64, mips32, mips64); ++ DEFINED_ON(x86_shared, arm, arm64, mips32, mips64, loongarch64); + + template + inline void cmpPtrSet(Condition cond, T1 lhs, T2 rhs, Register dest) PER_ARCH; +@@ -1694,7 +1700,7 @@ + inline void branchTestInt32(Condition cond, Register tag, + Label* label) PER_SHARED_ARCH; + inline void branchTestDouble(Condition cond, Register tag, Label* label) +- DEFINED_ON(arm, arm64, mips32, mips64, x86_shared); ++ DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared); + inline void branchTestNumber(Condition cond, Register tag, + Label* label) PER_SHARED_ARCH; + inline void branchTestBoolean(Condition cond, Register tag, +@@ -1726,7 +1732,7 @@ + Label* label) PER_SHARED_ARCH; + inline void branchTestUndefined(Condition cond, const ValueOperand& value, + Label* label) +- DEFINED_ON(arm, arm64, mips32, mips64, x86_shared); ++ DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared); + + inline void branchTestInt32(Condition cond, const Address& address, + Label* label) PER_SHARED_ARCH; +@@ -1734,7 +1740,7 @@ + Label* label) PER_SHARED_ARCH; + inline void branchTestInt32(Condition cond, const ValueOperand& value, + Label* label) +- DEFINED_ON(arm, arm64, mips32, mips64, x86_shared); ++ DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared); + + inline void branchTestDouble(Condition cond, const Address& address, + Label* label) PER_SHARED_ARCH; +@@ -1742,11 +1748,11 @@ + Label* label) PER_SHARED_ARCH; + inline void branchTestDouble(Condition cond, const ValueOperand& value, + Label* label) +- DEFINED_ON(arm, arm64, mips32, mips64, x86_shared); ++ DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared); + + inline void branchTestNumber(Condition cond, const ValueOperand& value, + Label* label) +- DEFINED_ON(arm, arm64, mips32, mips64, x86_shared); ++ DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared); + + inline void branchTestBoolean(Condition cond, const Address& address, + Label* label) PER_SHARED_ARCH; +@@ -1754,7 +1760,7 @@ + Label* label) PER_SHARED_ARCH; + inline void branchTestBoolean(Condition cond, const ValueOperand& value, + Label* label) +- DEFINED_ON(arm, arm64, mips32, mips64, x86_shared); ++ DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared); + + inline void branchTestString(Condition cond, const Address& address, + Label* label) PER_SHARED_ARCH; +@@ -1762,7 +1768,7 @@ + Label* label) PER_SHARED_ARCH; + inline void branchTestString(Condition cond, const ValueOperand& value, + Label* label) +- DEFINED_ON(arm, arm64, mips32, mips64, x86_shared); ++ DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared); + + inline void branchTestSymbol(Condition cond, const Address& address, + Label* label) PER_SHARED_ARCH; +@@ -1770,7 +1776,7 @@ + Label* label) PER_SHARED_ARCH; + inline void branchTestSymbol(Condition cond, const ValueOperand& value, + Label* label) +- DEFINED_ON(arm, arm64, mips32, mips64, x86_shared); ++ DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared); + + inline void branchTestBigInt(Condition cond, const Address& address, + Label* label) PER_SHARED_ARCH; +@@ -1786,7 +1792,7 @@ + Label* label) PER_SHARED_ARCH; + inline void branchTestNull(Condition cond, const ValueOperand& value, + Label* label) +- DEFINED_ON(arm, arm64, mips32, mips64, x86_shared); ++ DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared); + + // Clobbers the ScratchReg on x64. + inline void branchTestObject(Condition cond, const Address& address, +@@ -1795,7 +1801,7 @@ + Label* label) PER_SHARED_ARCH; + inline void branchTestObject(Condition cond, const ValueOperand& value, + Label* label) +- DEFINED_ON(arm, arm64, mips32, mips64, x86_shared); ++ DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared); + + inline void branchTestGCThing(Condition cond, const Address& address, + Label* label) PER_SHARED_ARCH; +@@ -1806,7 +1812,7 @@ + + inline void branchTestPrimitive(Condition cond, const ValueOperand& value, + Label* label) +- DEFINED_ON(arm, arm64, mips32, mips64, x86_shared); ++ DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared); + + inline void branchTestMagic(Condition cond, const Address& address, + Label* label) PER_SHARED_ARCH; +@@ -1815,7 +1821,7 @@ + template + inline void branchTestMagic(Condition cond, const ValueOperand& value, + L label) +- DEFINED_ON(arm, arm64, mips32, mips64, x86_shared); ++ DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared); + + inline void branchTestMagic(Condition cond, const Address& valaddr, + JSWhyMagic why, Label* label) PER_ARCH; +@@ -1833,17 +1839,17 @@ + // The type of the value should match the type of the method. + inline void branchTestInt32Truthy(bool truthy, const ValueOperand& value, + Label* label) +- DEFINED_ON(arm, arm64, mips32, mips64, x86_shared); ++ DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared); + inline void branchTestDoubleTruthy(bool truthy, FloatRegister reg, + Label* label) PER_SHARED_ARCH; + inline void branchTestBooleanTruthy(bool truthy, const ValueOperand& value, + Label* label) PER_ARCH; + inline void branchTestStringTruthy(bool truthy, const ValueOperand& value, + Label* label) +- DEFINED_ON(arm, arm64, mips32, mips64, x86_shared); ++ DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared); + inline void branchTestBigIntTruthy(bool truthy, const ValueOperand& value, + Label* label) +- DEFINED_ON(arm, arm64, mips32, mips64, x86_shared); ++ DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared); + + // Create an unconditional branch to the address given as argument. + inline void branchToComputedAddress(const BaseIndex& address) PER_ARCH; +@@ -2007,10 +2013,10 @@ + // ======================================================================== + // Memory access primitives. + inline void storeUncanonicalizedDouble(FloatRegister src, const Address& dest) +- DEFINED_ON(x86_shared, arm, arm64, mips32, mips64); ++ DEFINED_ON(x86_shared, arm, arm64, mips32, mips64, loongarch64); + inline void storeUncanonicalizedDouble(FloatRegister src, + const BaseIndex& dest) +- DEFINED_ON(x86_shared, arm, arm64, mips32, mips64); ++ DEFINED_ON(x86_shared, arm, arm64, mips32, mips64, loongarch64); + inline void storeUncanonicalizedDouble(FloatRegister src, const Operand& dest) + DEFINED_ON(x86_shared); + +@@ -2024,10 +2030,10 @@ + + inline void storeUncanonicalizedFloat32(FloatRegister src, + const Address& dest) +- DEFINED_ON(x86_shared, arm, arm64, mips32, mips64); ++ DEFINED_ON(x86_shared, arm, arm64, mips32, mips64, loongarch64); + inline void storeUncanonicalizedFloat32(FloatRegister src, + const BaseIndex& dest) +- DEFINED_ON(x86_shared, arm, arm64, mips32, mips64); ++ DEFINED_ON(x86_shared, arm, arm64, mips32, mips64, loongarch64); + inline void storeUncanonicalizedFloat32(FloatRegister src, + const Operand& dest) + DEFINED_ON(x86_shared); +@@ -3475,10 +3481,10 @@ + + // temp required on x86 and x64; must be undefined on mips64. + void convertUInt64ToFloat32(Register64 src, FloatRegister dest, Register temp) +- DEFINED_ON(arm64, mips64, x64, x86); ++ DEFINED_ON(arm64, mips64, loongarch64, x64, x86); + + void convertInt64ToFloat32(Register64 src, FloatRegister dest) +- DEFINED_ON(arm64, mips64, x64, x86); ++ DEFINED_ON(arm64, mips64, loongarch64, x64, x86); + + bool convertUInt64ToDoubleNeedsTemp() PER_ARCH; + +@@ -3519,19 +3525,19 @@ + + void wasmBoundsCheck32(Condition cond, Register index, + Register boundsCheckLimit, Label* label) +- DEFINED_ON(arm, arm64, mips32, mips64, x86_shared); ++ DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared); + + void wasmBoundsCheck32(Condition cond, Register index, + Address boundsCheckLimit, Label* label) +- DEFINED_ON(arm, arm64, mips32, mips64, x86_shared); ++ DEFINED_ON(arm, arm64, mips32, mips64, loongarch64, x86_shared); + + void wasmBoundsCheck64(Condition cond, Register64 index, + Register64 boundsCheckLimit, Label* label) +- DEFINED_ON(arm64, mips64, x64); ++ DEFINED_ON(arm64, mips64, loongarch64, x64); + + void wasmBoundsCheck64(Condition cond, Register64 index, + Address boundsCheckLimit, Label* label) +- DEFINED_ON(arm64, mips64, x64); ++ DEFINED_ON(arm64, mips64, loongarch64, x64); + + // Each wasm load/store instruction appends its own wasm::Trap::OutOfBounds. + void wasmLoad(const wasm::MemoryAccessDesc& access, Operand srcAddr, +@@ -3554,13 +3560,13 @@ + DEFINED_ON(arm, mips_shared); + void wasmLoadI64(const wasm::MemoryAccessDesc& access, Register memoryBase, + Register ptr, Register ptrScratch, Register64 output) +- DEFINED_ON(arm, mips32, mips64); ++ DEFINED_ON(arm, mips32, mips64, loongarch64); + void wasmStore(const wasm::MemoryAccessDesc& access, AnyRegister value, + Register memoryBase, Register ptr, Register ptrScratch) + DEFINED_ON(arm, mips_shared); + void wasmStoreI64(const wasm::MemoryAccessDesc& access, Register64 value, + Register memoryBase, Register ptr, Register ptrScratch) +- DEFINED_ON(arm, mips32, mips64); ++ DEFINED_ON(arm, mips32, mips64, loongarch64); + + // These accept general memoryBase + ptr + offset (in `access`); the offset is + // always smaller than the guard region. They will insert an additional add +@@ -3580,14 +3586,14 @@ + void wasmUnalignedLoad(const wasm::MemoryAccessDesc& access, + Register memoryBase, Register ptr, Register ptrScratch, + Register output, Register tmp) +- DEFINED_ON(mips32, mips64); ++ DEFINED_ON(mips32, mips64, loongarch64); + + // MIPS: `ptr` will always be updated. + void wasmUnalignedLoadFP(const wasm::MemoryAccessDesc& access, + Register memoryBase, Register ptr, + Register ptrScratch, FloatRegister output, + Register tmp1, Register tmp2, Register tmp3) +- DEFINED_ON(mips32, mips64); ++ DEFINED_ON(mips32, mips64, loongarch64); + + // `ptr` will always be updated. + void wasmUnalignedLoadI64(const wasm::MemoryAccessDesc& access, +@@ -3599,19 +3605,19 @@ + void wasmUnalignedStore(const wasm::MemoryAccessDesc& access, Register value, + Register memoryBase, Register ptr, + Register ptrScratch, Register tmp) +- DEFINED_ON(mips32, mips64); ++ DEFINED_ON(mips32, mips64, loongarch64); + + // `ptr` will always be updated. + void wasmUnalignedStoreFP(const wasm::MemoryAccessDesc& access, + FloatRegister floatValue, Register memoryBase, + Register ptr, Register ptrScratch, Register tmp) +- DEFINED_ON(mips32, mips64); ++ DEFINED_ON(mips32, mips64, loongarch64); + + // `ptr` will always be updated. + void wasmUnalignedStoreI64(const wasm::MemoryAccessDesc& access, + Register64 value, Register memoryBase, + Register ptr, Register ptrScratch, Register tmp) +- DEFINED_ON(mips32, mips64); ++ DEFINED_ON(mips32, mips64, loongarch64); + + // wasm specific methods, used in both the wasm baseline compiler and ion. + +@@ -3642,11 +3648,11 @@ + void wasmTruncateDoubleToInt64(FloatRegister input, Register64 output, + bool isSaturating, Label* oolEntry, + Label* oolRejoin, FloatRegister tempDouble) +- DEFINED_ON(arm64, x86, x64, mips64); ++ DEFINED_ON(arm64, x86, x64, mips64, loongarch64); + void wasmTruncateDoubleToUInt64(FloatRegister input, Register64 output, + bool isSaturating, Label* oolEntry, + Label* oolRejoin, FloatRegister tempDouble) +- DEFINED_ON(arm64, x86, x64, mips64); ++ DEFINED_ON(arm64, x86, x64, mips64, loongarch64); + void oolWasmTruncateCheckF64ToI64(FloatRegister input, Register64 output, + TruncFlags flags, wasm::BytecodeOffset off, + Label* rejoin) +@@ -3655,11 +3661,11 @@ + void wasmTruncateFloat32ToInt64(FloatRegister input, Register64 output, + bool isSaturating, Label* oolEntry, + Label* oolRejoin, FloatRegister tempDouble) +- DEFINED_ON(arm64, x86, x64, mips64); ++ DEFINED_ON(arm64, x86, x64, mips64, loongarch64); + void wasmTruncateFloat32ToUInt64(FloatRegister input, Register64 output, + bool isSaturating, Label* oolEntry, + Label* oolRejoin, FloatRegister tempDouble) +- DEFINED_ON(arm64, x86, x64, mips64); ++ DEFINED_ON(arm64, x86, x64, mips64, loongarch64); + void oolWasmTruncateCheckF32ToI64(FloatRegister input, Register64 output, + TruncFlags flags, wasm::BytecodeOffset off, + Label* rejoin) +@@ -4808,7 +4814,7 @@ + template + inline void addStackPtrTo(T t); + +- void subFromStackPtr(Imm32 imm32) DEFINED_ON(mips32, mips64, arm, x86, x64); ++ void subFromStackPtr(Imm32 imm32) DEFINED_ON(mips32, mips64, loongarch64, arm, x86, x64); + void subFromStackPtr(Register reg); + + template +diff -ur a/mfbt/double-conversion/double-conversion/utils.h b/mfbt/double-conversion/double-conversion/utils.h +--- a/mfbt/double-conversion/double-conversion/utils.h 2022-07-02 17:03:33.801508000 +0800 ++++ b/mfbt/double-conversion/double-conversion/utils.h 2022-07-02 17:18:24.988508000 +0800 +@@ -107,7 +107,7 @@ + #if defined(_M_X64) || defined(__x86_64__) || \ + defined(__ARMEL__) || defined(__avr32__) || defined(_M_ARM) || defined(_M_ARM64) || \ + defined(__hppa__) || defined(__ia64__) || \ +- defined(__mips__) || \ ++ defined(__mips__) || defined(__loongarch__) || defined(__loongarch64) || \ + defined(__nios2__) || defined(__ghs) || \ + defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \ + defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ +diff -ur a/mfbt/tests/TestPoisonArea.cpp b/mfbt/tests/TestPoisonArea.cpp +--- a/mfbt/tests/TestPoisonArea.cpp 2022-07-02 17:03:33.809508000 +0800 ++++ b/mfbt/tests/TestPoisonArea.cpp 2022-07-02 17:04:48.113508000 +0800 +@@ -175,6 +175,9 @@ + # define RETURN_INSTR _return_instr + # define RETURN_INSTR_TYPE ia64_instr + ++#elif defined __loongarch64 ++#define RETURN_INSTR 0x03e00008 /* jr ra */ ++ + #else + # error "Need return instruction for this architecture" + #endif +diff -ur a/nsprpub/pr/include/md/_freebsd.cfg b/nsprpub/pr/include/md/_freebsd.cfg +--- a/nsprpub/pr/include/md/_freebsd.cfg 2022-07-02 17:03:36.055508000 +0800 ++++ b/nsprpub/pr/include/md/_freebsd.cfg 2022-07-02 17:04:48.113508000 +0800 +@@ -490,6 +490,53 @@ + #define PR_BYTES_PER_WORD_LOG2 3 + #define PR_BYTES_PER_DWORD_LOG2 3 + ++#elif defined(__loongarch__) ++ ++#undef IS_BIG_ENDIAN ++#define IS_LITTLE_ENDIAN 1 ++ ++#define IS_64 ++ ++#define PR_BYTES_PER_BYTE 1 ++#define PR_BYTES_PER_SHORT 2 ++#define PR_BYTES_PER_INT 4 ++#define PR_BYTES_PER_INT64 8 ++#define PR_BYTES_PER_LONG 8 ++#define PR_BYTES_PER_FLOAT 4 ++#define PR_BYTES_PER_DOUBLE 8 ++#define PR_BYTES_PER_WORD 8 ++#define PR_BYTES_PER_DWORD 8 ++ ++#define PR_BITS_PER_BYTE 8 ++#define PR_BITS_PER_SHORT 16 ++#define PR_BITS_PER_INT 32 ++#define PR_BITS_PER_INT64 64 ++#define PR_BITS_PER_LONG 64 ++#define PR_BITS_PER_FLOAT 32 ++#define PR_BITS_PER_DOUBLE 64 ++#define PR_BITS_PER_WORD 64 ++ ++#define PR_BITS_PER_BYTE_LOG2 3 ++#define PR_BITS_PER_SHORT_LOG2 4 ++#define PR_BITS_PER_INT_LOG2 5 ++#define PR_BITS_PER_INT64_LOG2 6 ++#define PR_BITS_PER_LONG_LOG2 6 ++#define PR_BITS_PER_FLOAT_LOG2 5 ++#define PR_BITS_PER_DOUBLE_LOG2 6 ++#define PR_BITS_PER_WORD_LOG2 6 ++ ++#define PR_ALIGN_OF_SHORT 2 ++#define PR_ALIGN_OF_INT 4 ++#define PR_ALIGN_OF_LONG 8 ++#define PR_ALIGN_OF_INT64 8 ++#define PR_ALIGN_OF_FLOAT 4 ++#define PR_ALIGN_OF_DOUBLE 8 ++#define PR_ALIGN_OF_POINTER 8 ++#define PR_ALIGN_OF_WORD 8 ++ ++#define PR_BYTES_PER_WORD_LOG2 3 ++#define PR_BYTES_PER_DWORD_LOG2 3 ++ + #elif defined(__mips__) + + #if defined(__MIPSEB__) || defined(_MIPSEB) +diff -ur a/nsprpub/pr/include/md/_linux.cfg b/nsprpub/pr/include/md/_linux.cfg +--- a/nsprpub/pr/include/md/_linux.cfg 2022-07-02 17:03:36.055508000 +0800 ++++ b/nsprpub/pr/include/md/_linux.cfg 2022-07-02 17:04:48.114508000 +0800 +@@ -496,6 +496,56 @@ + #define PR_BYTES_PER_WORD_LOG2 2 + #define PR_BYTES_PER_DWORD_LOG2 3 + ++#elif defined(__loongarch__) ++ ++/* For _ABI64 */ ++#include ++ ++#define IS_LITTLE_ENDIAN 1 ++#undef IS_BIG_ENDIAN ++ ++#define IS_64 ++ ++#define PR_BYTES_PER_BYTE 1 ++#define PR_BYTES_PER_SHORT 2 ++#define PR_BYTES_PER_INT 4 ++#define PR_BYTES_PER_INT64 8 ++#define PR_BYTES_PER_LONG 4 ++#define PR_BYTES_PER_FLOAT 4 ++#define PR_BYTES_PER_DOUBLE 8 ++#define PR_BYTES_PER_WORD 4 ++#define PR_BYTES_PER_DWORD 8 ++ ++#define PR_BITS_PER_BYTE 8 ++#define PR_BITS_PER_SHORT 16 ++#define PR_BITS_PER_INT 32 ++#define PR_BITS_PER_INT64 64 ++#define PR_BITS_PER_LONG 32 ++#define PR_BITS_PER_FLOAT 32 ++#define PR_BITS_PER_DOUBLE 64 ++#define PR_BITS_PER_WORD 32 ++ ++#define PR_BITS_PER_BYTE_LOG2 3 ++#define PR_BITS_PER_SHORT_LOG2 4 ++#define PR_BITS_PER_INT_LOG2 5 ++#define PR_BITS_PER_INT64_LOG2 6 ++#define PR_BITS_PER_LONG_LOG2 5 ++#define PR_BITS_PER_FLOAT_LOG2 5 ++#define PR_BITS_PER_DOUBLE_LOG2 6 ++#define PR_BITS_PER_WORD_LOG2 5 ++ ++#define PR_ALIGN_OF_SHORT 2 ++#define PR_ALIGN_OF_INT 4 ++#define PR_ALIGN_OF_LONG 4 ++#define PR_ALIGN_OF_INT64 8 ++#define PR_ALIGN_OF_FLOAT 4 ++#define PR_ALIGN_OF_DOUBLE 8 ++#define PR_ALIGN_OF_POINTER 4 ++#define PR_ALIGN_OF_WORD 4 ++ ++#define PR_BYTES_PER_WORD_LOG2 2 ++#define PR_BYTES_PER_DWORD_LOG2 3 ++ + #elif defined(__mips__) + + /* For _ABI64 */ +diff -ur a/nsprpub/pr/include/md/_linux.h b/nsprpub/pr/include/md/_linux.h +--- a/nsprpub/pr/include/md/_linux.h 2022-07-02 17:03:36.056508000 +0800 ++++ b/nsprpub/pr/include/md/_linux.h 2022-07-02 17:04:48.114508000 +0800 +@@ -37,6 +37,8 @@ + #define _PR_SI_ARCHITECTURE "sparc" + #elif defined(__i386__) + #define _PR_SI_ARCHITECTURE "x86" ++#elif defined(__loongarch__) ++#define _PR_SI_ARCHITECTURE "loongarch" + #elif defined(__mips__) + #define _PR_SI_ARCHITECTURE "mips" + #elif defined(__arm__) +@@ -83,7 +85,7 @@ + #define _MD_DEFAULT_STACK_SIZE 65536L + #define _MD_MMAP_FLAGS MAP_PRIVATE + +-#if defined(__aarch64__) || defined(__mips__) ++#if defined(__aarch64__) || defined(__mips__) || defined(__loongarch__) + #define _MD_MINIMUM_STACK_SIZE 0x20000 + #endif + +@@ -178,6 +180,16 @@ + #endif + #endif + ++#if defined(__loongarch__) && defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) ++/* Use GCC built-in functions */ ++#define _PR_HAVE_ATOMIC_OPS ++#define _MD_INIT_ATOMIC() ++#define _MD_ATOMIC_INCREMENT(ptr) __sync_add_and_fetch(ptr, 1) ++#define _MD_ATOMIC_DECREMENT(ptr) __sync_sub_and_fetch(ptr, 1) ++#define _MD_ATOMIC_ADD(ptr, i) __sync_add_and_fetch(ptr, i) ++#define _MD_ATOMIC_SET(ptr, nv) __sync_lock_test_and_set(ptr, nv) ++#endif ++ + #if defined(__mips__) && defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) + /* Use GCC built-in functions */ + #define _PR_HAVE_ATOMIC_OPS +@@ -454,6 +466,18 @@ + #endif /* defined(__GLIBC__) && __GLIBC__ >= 2 */ + #define PR_NUM_GCREGS 6 + ++#elif defined(__loongarch__) ++/* Linux/MIPS */ ++#if defined(__GLIBC__) && __GLIBC__ >= 2 ++#define _MD_GET_SP(_t) (_t)->md.context[0].__jmpbuf[0].__sp ++#define _MD_SET_FP(_t, val) ((_t)->md.context[0].__jmpbuf[0].__fp = (val)) ++#define _MD_GET_SP_PTR(_t) &(_MD_GET_SP(_t)) ++#define _MD_GET_FP_PTR(_t) (&(_t)->md.context[0].__jmpbuf[0].__fp) ++#define _MD_SP_TYPE __ptr_t ++#else ++#error "Linux/Loongarch pre-glibc2 not supported yet" ++#endif /* defined(__GLIBC__) && __GLIBC__ >= 2 */ ++ + #elif defined(__mips__) + /* Linux/MIPS */ + #if defined(__GLIBC__) && __GLIBC__ >= 2 +@@ -533,6 +557,19 @@ + _thread->md.sp = _MD_GET_SP_PTR(_thread); \ + _thread->md.fp = _MD_GET_FP_PTR(_thread); \ + _MD_SET_FP(_thread, 0); \ ++} ++ ++#elif defined(__loongarch__) ++ ++#define _MD_INIT_CONTEXT(_thread, _sp, _main, status) \ ++{ \ ++ *status = PR_TRUE; \ ++ (void) sigsetjmp(CONTEXT(_thread), 1); \ ++ _thread->md.context[0].__jmpbuf[0].__pc = (__ptr_t) _main; \ ++ _MD_GET_SP(_thread) = (_MD_SP_TYPE) ((_sp) - 64); \ ++ _thread->md.sp = _MD_GET_SP_PTR(_thread); \ ++ _thread->md.fp = _MD_GET_FP_PTR(_thread); \ ++ _MD_SET_FP(_thread, 0); \ + } + + #elif defined(__mips__) +diff -ur a/nsprpub/pr/include/pratom.h b/nsprpub/pr/include/pratom.h +--- a/nsprpub/pr/include/pratom.h 2022-07-02 17:03:36.058508000 +0800 ++++ b/nsprpub/pr/include/pratom.h 2022-07-02 17:04:48.114508000 +0800 +@@ -108,6 +108,8 @@ + (defined(__arm__) && \ + defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)) || \ + defined(__aarch64__) || defined(__alpha) || \ ++ (defined(__loongarch__) && \ ++ defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)) || \ + (defined(__mips__) && \ + defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4))))) + +diff -ur a/python/mozbuild/mozbuild/configure/constants.py b/python/mozbuild/mozbuild/configure/constants.py +--- a/python/mozbuild/mozbuild/configure/constants.py 2022-07-02 17:03:43.773508000 +0800 ++++ b/python/mozbuild/mozbuild/configure/constants.py 2022-07-02 17:26:30.853508000 +0800 +@@ -47,6 +47,7 @@ + "arm": 32, + "hppa": 32, + "ia64": 64, ++ "loongarch64": 64, + "m68k": 32, + "mips32": 32, + "mips64": 64, +@@ -83,6 +84,7 @@ + ("arm", "__arm__ || _M_ARM"), + ("aarch64", "__aarch64__ || _M_ARM64"), + ("ia64", "__ia64__"), ++ ("loongarch64", "__loongarch64"), + ("s390x", "__s390x__"), + ("s390", "__s390__"), + ("ppc64", "__powerpc64__"), diff --git a/just/PKGBUILD b/just/PKGBUILD index 4f29e500ce..881fa91652 100644 --- a/just/PKGBUILD +++ b/just/PKGBUILD @@ -21,7 +21,13 @@ prepare() { build() { cd "${pkgname}-${pkgver}" - cargo build --frozen --release + find -name Cargo.lock -exec rm -f {} \; + mkdir -p .cargo + cat > .cargo/config.toml < +Date: Thu, 29 Jun 2023 19:49:29 +0800 +Subject: [PATCH] Add basic support for LoongArch architecture + +--- + src/engines/ptrace.cc | 6 ++++++ + src/engines/ptrace_linux.cc | 18 +++++++++++------- + src/solib-parser/lib.c | 2 ++ + 3 files changed, 19 insertions(+), 7 deletions(-) + +diff --git a/src/engines/ptrace.cc b/src/engines/ptrace.cc +index 7676751..18acaa6 100644 +--- a/src/engines/ptrace.cc ++++ b/src/engines/ptrace.cc +@@ -52,6 +52,12 @@ static unsigned long arch_setupBreakpoint(unsigned long addr, unsigned long old_ + val = (old_data & ~(0xffffffffUL << shift)) | (0xd4200000UL << shift); + #elif defined(__riscv) + val = 0x00100073; /* ebreak */ // No width problem, prefer ebreak than c.ebreak for ISA w/o C extension. ++#elif defined(__loongarch__) ++ unsigned long aligned_addr = getAligned(addr); ++ unsigned long offs = addr - aligned_addr; ++ unsigned long shift = 8 * offs; ++ ++ val = (old_data & ~(0xffffffffUL << shift)) | (0x002a0004UL << shift); /* break 0x4 */ + #else + # error Unsupported architecture + #endif +diff --git a/src/engines/ptrace_linux.cc b/src/engines/ptrace_linux.cc +index 4062a06..a73be00 100644 +--- a/src/engines/ptrace_linux.cc ++++ b/src/engines/ptrace_linux.cc +@@ -3,7 +3,7 @@ + #include + #include + +-#if defined(__aarch64__) ++#if defined(__aarch64__) || defined(__loongarch__) + # include + # include + #endif +@@ -20,7 +20,7 @@ + enum + { + i386_EIP = 12, x86_64_RIP = 16, ppc_NIP = 32, arm_PC = 15, aarch64_PC = 32, // See Linux arch/arm64/include/asm/ptrace.h +- riscv_EPC = 0 ++ riscv_EPC = 0, loongarch_ERA = 33 + }; + + static void arch_adjustPcAfterBreakpoint(unsigned long *regs); +@@ -41,7 +41,7 @@ static void arch_adjustPcAfterBreakpoint(unsigned long *regs) + regs[i386_EIP]--; + #elif defined(__x86_64__) + regs[x86_64_RIP]--; +-#elif defined(__powerpc__) || defined(__arm__) || defined(__aarch64__) || defined(__riscv) ++#elif defined(__powerpc__) || defined(__arm__) || defined(__aarch64__) || defined(__riscv) || defined(__loongarch__) + // Do nothing + #else + # error Unsupported architecture +@@ -64,6 +64,8 @@ static unsigned long arch_getPcFromRegs(unsigned long *regs) + out = regs[ppc_NIP]; + #elif defined(__riscv) + out = regs[riscv_EPC]; ++#elif defined(__loongarch__) ++ out = regs[loongarch_ERA]; + #else + # error Unsupported architecture + #endif +@@ -321,7 +323,7 @@ static unsigned long getPcFromRegs(unsigned long *regs) + + static long getRegs(pid_t pid, void *addr, void *regs, size_t len) + { +-#if defined(__aarch64__) ++#if defined(__aarch64__) || defined(__loongarch__) + struct iovec iov = + { regs, len}; + return ptrace(PTRACE_GETREGSET, pid, (void *)NT_PRSTATUS, &iov); +@@ -369,7 +371,7 @@ void ptrace_sys::pokeWord(pid_t pid, unsigned long aligned_addr, unsigned long v + + static long setRegs(pid_t pid, void *addr, void *regs, size_t len) + { +-#if defined(__aarch64__) ++#if defined(__aarch64__) || defined(__loongarch__) + struct iovec iov = + { regs, len}; + return ptrace(PTRACE_SETREGSET, pid, (void *)NT_PRSTATUS, &iov); +@@ -392,7 +394,7 @@ void ptrace_sys::singleStep(pid_t pid) + void ptrace_sys::skipInstruction(pid_t pid) + { + // Nop on x86, op on PowerPC/ARM +-#if defined(__powerpc__) || defined(__arm__) || defined(__aarch64__) ++#if defined(__powerpc__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch__) + unsigned long regs[1024]; + + getRegs(pid, NULL, regs, sizeof regs); +@@ -401,8 +403,10 @@ void ptrace_sys::skipInstruction(pid_t pid) + regs[ppc_NIP] += 4; + # elif defined(__aarch64__) + regs[aarch64_PC] += 4; +-# else ++# elif defined(__arm__) + regs[arm_PC] += 4; ++# else ++ regs[loongarch_ERA] += 4; + # endif + setRegs(pid, NULL, regs, sizeof regs); + #endif +diff --git a/src/solib-parser/lib.c b/src/solib-parser/lib.c +index 87a2344..82a7712 100644 +--- a/src/solib-parser/lib.c ++++ b/src/solib-parser/lib.c +@@ -97,6 +97,8 @@ static void force_breakpoint(void) + ".long 0xd4200000\n" /* From https://github.com/scottt/debugbreak */ + #elif defined(__riscv) + "ebreak\n" ++#elif defined(__loongarch__) ++ "break 0x4\n" + #else + # error Unsupported architecture + #endif +-- +2.41.0 + diff --git a/kcov/PKGBUILD b/kcov/PKGBUILD index c244cabe49..dab17f9fe7 100644 --- a/kcov/PKGBUILD +++ b/kcov/PKGBUILD @@ -9,8 +9,15 @@ url="https://simonkagstrom.github.io/kcov" license=('GPL2') depends=('elfutils' 'curl' 'zstd' 'zlib' 'binutils' 'libsframe.so') makedepends=('cmake' 'python' 'ninja') -source=("https://github.com/SimonKagstrom/kcov/archive/v${pkgver}.tar.gz") -sha512sums=('4ba0eafe54e4e156a18c965c43bc9634db2e1f385ea4cf52e9123818b3addd31357b6857cd17490894eeb12554bb5a77e89b0657e5fdefa05696d2ac20058ae8') +source=("https://github.com/SimonKagstrom/kcov/archive/v${pkgver}.tar.gz" + 0001-Add-basic-support-for-LoongArch-architecture.patch) +sha512sums=('4ba0eafe54e4e156a18c965c43bc9634db2e1f385ea4cf52e9123818b3addd31357b6857cd17490894eeb12554bb5a77e89b0657e5fdefa05696d2ac20058ae8' + 'e6f34cfbe6ed16c3c4288eacf7b68ed7428fe9690b4974329ce65d9efd413c73662409c7b76663e81b8bb56260499d9bf252ab7afffd3602f19981157eadd0cc') + +prepare() { + cd $pkgname-$pkgver + patch -p1 -i $srcdir/0001-Add-basic-support-for-LoongArch-architecture.patch +} build() { cd $pkgname-$pkgver diff --git a/kdeplasma-addons/PKGBUILD b/kdeplasma-addons/PKGBUILD index e31b7508ca..9995028f99 100644 --- a/kdeplasma-addons/PKGBUILD +++ b/kdeplasma-addons/PKGBUILD @@ -11,7 +11,7 @@ arch=(loong64 x86_64) url='https://kde.org/plasma-desktop/' license=(LGPL) depends=(plasma-workspace kunitconversion5) -makedepends=(extra-cmake-modules qt5-webengine networkmanager-qt5) +makedepends=(extra-cmake-modules networkmanager-qt5) optdepends=('purpose5: Quickshare applet' 'quota-tools: disk quota applet' 'qt5-webengine: dictionary and webbrowser applets' diff --git a/kernel-headers-musl/PKGBUILD b/kernel-headers-musl/PKGBUILD index 57f0bb971b..d110331dea 100644 --- a/kernel-headers-musl/PKGBUILD +++ b/kernel-headers-musl/PKGBUILD @@ -3,25 +3,28 @@ # Contributor: Jens Staal pkgname=kernel-headers-musl -pkgver=4.19.88 -pkgrel=2 +pkgver=6.0 +_rdate=20221017 +pkgrel=1 pkgdesc="Linux kernel headers sanitized for use with musl libc" arch=('loong64' 'x86_64') url="https://github.com/sabotage-linux/kernel-headers" license=('LGPL') +makedepends=(rsync) depends=('musl') -source=("$pkgname-$pkgver.tar.gz::https://github.com/sabotage-linux/kernel-headers/archive/v${pkgver/_/-}.tar.gz") -sha512sums=('db0239c40399c89cc250b9f1f53b7ec4eb119fde6b25c503aef7e88b80694df3a5e89196a22e66376731764bac83d9120794ee6c601a95b824f1ab770cb45a61') +source=(https://github.com/yetist/linux/releases/download/v${_rdate}/linux-${pkgver}-${_rdate}.tar.xz) +sha256sums=('ac4822f7dad35e42b1d0b02190eb876d80f3beefe9576ae8b45aeb5c5bc79eb1') _CARCH=$CARCH [[ $CARCH = i?86 ]] && _CARCH=x86 +[[ $CARCH = loong64 ]] && _CARCH=loongarch build() { - cd "$srcdir"/kernel-headers-${pkgver/_/-} - make ARCH=${_CARCH} prefix=/usr/lib/musl + cd "$srcdir"/linux-${pkgver/_/-} + make ARCH=${_CARCH} mrproper } package() { - cd "$srcdir"/kernel-headers-${pkgver/_/-} - make ARCH=${_CARCH} prefix=/usr/lib/musl DESTDIR="$pkgdir" install + cd "$srcdir"/linux-${pkgver/_/-} + make ARCH=${_CARCH} INSTALL_HDR_PATH="$pkgdir/usr/lib/musl" headers_install } diff --git a/kmon/PKGBUILD b/kmon/PKGBUILD index 1ee3ae43a6..bb1926cc9e 100644 --- a/kmon/PKGBUILD +++ b/kmon/PKGBUILD @@ -16,7 +16,7 @@ validpgpkeys=('C4B2D24CF87CD188C79D00BB485B7C52E9EC0DC6') # kmon releases # Contributor: Tobias Kieslich -pkgname=(libgda libgda-{firebird,jdbc,mysql,postgres}) +pkgname=(libgda libgda-{firebird,mysql,postgres}) pkgver=5.2.10 pkgrel=5 pkgdesc="Database access library" @@ -14,7 +14,9 @@ makedepends=(glade mariadb-libs postgresql-libs libfbclient jdk11-openjdk intlto _commit=85a2532df64698306fd3be324bb2052fb1b80fd3 # tags/LIBGDA_5_2_10^0 source=("git+https://gitlab.gnome.org/GNOME/libgda.git#commit=$_commit" 0001-Use-single-header-with-GtkSource.patch - 0002-Use-goocanvas-3.0.patch) + 0002-Use-goocanvas-3.0.patch + libgda-la64.patch) + sha256sums=('SKIP' '104594a6034b041d87c1129b0b5544c8899393d66a6a0fb7a8b55f7fcb77e560' '81ab26fd12f7174e880815f6aa624c24821b1f9c7f386ae566293df7f041b25b') @@ -39,6 +41,8 @@ prepare() { # Port to goocanvas-3.0 git apply -3 ../0002-Use-goocanvas-3.0.patch + patch -p1 -i $srcdir/libgda-la64.patch + rm getsp.class NOCONFIGURE=1 ./autogen.sh } @@ -56,7 +60,6 @@ build() { package_libgda() { optdepends=('libgda-firebird: provider for Firebird' - 'libgda-jdbc: provider for JDBC' 'libgda-mysql: provider for MySQL' 'libgda-postgres: provider for PostgreSQL') options+=(emptydirs) @@ -66,16 +69,16 @@ package_libgda() { mkdir -p providers local provider - for provider in firebird jdbc mysql postgres; do + for provider in firebird mysql postgres; do mv "$pkgdir"/usr/lib/libgda-$_apiver/providers/libgda-$provider[-.]*so \ "$pkgdir"/usr/lib/pkgconfig/libgda-$provider-$_apiver.pc \ "$pkgdir"/usr/share/libgda-$_apiver/${provider}_specs_*.xml \ "$srcdir/providers" done - mv "$pkgdir"/usr/lib/libgda-$_apiver/providers/gdaprovider-${_apiver}.jar \ - "$pkgdir"/usr/bin/gda-list-jdbc-providers-${_apiver} \ - "$srcdir/providers" +# mv "$pkgdir"/usr/lib/libgda-$_apiver/providers/gdaprovider-${_apiver}.jar \ +# "$pkgdir"/usr/bin/gda-list-jdbc-providers-${_apiver} \ +# "$srcdir/providers" } _packageprovider() { @@ -112,3 +115,10 @@ package_libgda-postgres() { depends=(libgda postgresql-libs) _packageprovider postgres } +sha256sums=('SKIP' + '104594a6034b041d87c1129b0b5544c8899393d66a6a0fb7a8b55f7fcb77e560' + '81ab26fd12f7174e880815f6aa624c24821b1f9c7f386ae566293df7f041b25b') +sha256sums=('SKIP' + '104594a6034b041d87c1129b0b5544c8899393d66a6a0fb7a8b55f7fcb77e560' + '81ab26fd12f7174e880815f6aa624c24821b1f9c7f386ae566293df7f041b25b' + '8f6f9129df0032d895a8549f3e66e5eef530791326d14097cbb3e5fc8a2b84b1') diff --git a/libgda/libgda-la64.patch b/libgda/libgda-la64.patch new file mode 100644 index 0000000000..4e9ad28d23 --- /dev/null +++ b/libgda/libgda-la64.patch @@ -0,0 +1,22 @@ +Index: libgda/getsp.java +=================================================================== +--- libgda.orig/getsp.java ++++ libgda/getsp.java +@@ -25,7 +25,7 @@ public class getsp { + while (i<=j) { + if (i==j || lp.charAt(i)==ps) { + String lib=lp.substring(k,i); +- String suffix="/lib/amd64/server"; ++ String suffix="/lib/loongarch64/server"; + k=i+1; + if (lib.compareTo(".")!=0) + r=(r==null)?(prefix+lib+suffix):(r+" "+prefix+lib+suffix); +@@ -50,7 +50,7 @@ public class getsp { + + if (r!=null) System.out.println(r); + } else if (args[0].compareTo("-ldpath")==0) { +- String lp1=System.getProperty("java.home")+"/lib/amd64/server"; ++ String lp1=System.getProperty("java.home")+"/lib/loongarch64/server"; + String lp2=System.getProperty("java.library.path"); + System.out.println(lp1+":"+lp2); + } diff --git a/libgexiv2/PKGBUILD b/libgexiv2/PKGBUILD index f11fbc8d62..60faaba6cc 100644 --- a/libgexiv2/PKGBUILD +++ b/libgexiv2/PKGBUILD @@ -3,7 +3,7 @@ pkgname=libgexiv2 pkgver=0.14.2 -pkgrel=1 +pkgrel=2 pkgdesc='GObject-based wrapper around the Exiv2 library' url='https://wiki.gnome.org/Projects/gexiv2' arch=(loong64 x86_64) diff --git a/libglvnd/PKGBUILD b/libglvnd/PKGBUILD index 500c12f9b4..53e84e3298 100644 --- a/libglvnd/PKGBUILD +++ b/libglvnd/PKGBUILD @@ -17,6 +17,7 @@ sha512sums=('7b6eb8e075b48f1d915b892044adc3260547d74ed61d1e2fa6c5f0f8c3527754abe build() { arch-meson $pkgname-v$pkgver build \ + -Dasm=disabled \ -D gles1=false meson compile -C build diff --git a/libgme/PKGBUILD b/libgme/PKGBUILD index cf80bb5142..e6f394754b 100644 --- a/libgme/PKGBUILD +++ b/libgme/PKGBUILD @@ -16,7 +16,7 @@ sha256sums=('aba34e53ef0ec6a34b58b84e28bf8cfbccee6585cebca25333604c35db3e051d') validpgpkeys=(5406ECE83665DA9D201D35720BAF0C9C7B6AE9F2) # Michael Pyne build() { - cmake -S game-music-emu-$pkgver -B build -G Ninja -DCMAKE_INSTALL_PREFIX='/usr' -DCMAKE_BUILD_TYPE=Release + cmake -S game-music-emu-$pkgver -B build -G Ninja -DCMAKE_INSTALL_PREFIX='/usr' -DCMAKE_BUILD_TYPE=Release -DENABLE_UBSAN=off cmake --build build } diff --git a/libgoom2/PKGBUILD b/libgoom2/PKGBUILD index 96296aa5e2..32642faa58 100644 --- a/libgoom2/PKGBUILD +++ b/libgoom2/PKGBUILD @@ -15,6 +15,8 @@ sha512sums=('790e3ab8dee122320ad8b3ae15f6a1cd2780222d5ae97979f614f16ba73b4b85396 build() { cd "${srcdir}/goom2k4-0" + CFLAGS=${CFLAGS/-Wformat -Werror=format-security/} + CXXFLAGS=${CXXFLAGS/-Wformat -Werror=format-security/} ./configure --prefix=/usr \ --without-xmms \ diff --git a/libgpod/PKGBUILD b/libgpod/PKGBUILD index 866dc191bd..a2423d3fa6 100644 --- a/libgpod/PKGBUILD +++ b/libgpod/PKGBUILD @@ -3,7 +3,7 @@ pkgname=libgpod pkgver=0.8.3 -pkgrel=15 +pkgrel=16 pkgdesc="A shared library to access the contents of an iPod" url="http://www.gtkpod.org/libgpod/" arch=(loong64 x86_64) diff --git a/libimagequant/PKGBUILD b/libimagequant/PKGBUILD index 339dbde847..a5966338be 100644 --- a/libimagequant/PKGBUILD +++ b/libimagequant/PKGBUILD @@ -14,6 +14,12 @@ sha256sums=('ff1a34d3df9a1a5e5c1fa3895c036a885dc7b9740d7fccdf57e9ed678b8fb3a3') build() { cd "$srcdir/$pkgname-$pkgver/imagequant-sys" + find -name Cargo.lock -exec rm -f {} \; + mkdir -p .cargo + cat > .cargo/config.toml <11' + 'jdk11-openjdk' ) optdepends=('java-runtime>11: for TurboJPEG Java wrapper') provides=( @@ -45,6 +45,7 @@ build() { -D ENABLE_STATIC=OFF -D WITH_JAVA=ON -D WITH_JPEG8=ON + -D WITH_SIMD=OFF -G Ninja -S $pkgname-$pkgver -W no-dev diff --git a/libjxl/PKGBUILD b/libjxl/PKGBUILD index 81f977eb81..c491dde5c1 100644 --- a/libjxl/PKGBUILD +++ b/libjxl/PKGBUILD @@ -82,7 +82,7 @@ package_libjxl() { DESTDIR="$pkgdir" cmake --install build install -D -m644 libjxl/{LICENSE,PATENTS} -t "${pkgdir}/usr/share/licenses/${pkgname}" - mv "${pkgdir}/usr/share/java"/{org.jpeg.jpegxl,jpegxl}.jar +# mv "${pkgdir}/usr/share/java"/{org.jpeg.jpegxl,jpegxl}.jar } package_libjxl-doc() { diff --git a/libksysguard/PKGBUILD b/libksysguard/PKGBUILD index 9d0cdd7473..ffddf24952 100644 --- a/libksysguard/PKGBUILD +++ b/libksysguard/PKGBUILD @@ -10,7 +10,7 @@ pkgdesc='Library to retrieve information on the current status of computer hardw arch=(loong64 x86_64) url='https://kde.org/plasma-desktop/' license=(LGPL) -depends=(libxres qt5-webengine knewstuff5) +depends=(libxres knewstuff5) makedepends=(extra-cmake-modules kdoctools5 qt5-tools) conflicts=('ksysguard<5.21.90') groups=(plasma) diff --git a/libopenraw/PKGBUILD b/libopenraw/PKGBUILD index e39717b17a..6fc81a9d5c 100644 --- a/libopenraw/PKGBUILD +++ b/libopenraw/PKGBUILD @@ -14,14 +14,17 @@ license=('LGPL') depends=('gdk-pixbuf2') makedepends=('boost' 'libxml2' 'cargo') provides=('libopenraw.so' 'libopenrawgnome.so') -source=("https://libopenraw.freedesktop.org/download/${pkgname}-${pkgver}.tar.bz2"{.asc,}) +source=("https://libopenraw.freedesktop.org/download/${pkgname}-${pkgver}.tar.bz2"{.asc,} + libopenraw-fix-build.patch) b2sums=('SKIP' '4f6ea0db32843c6685ddd5d72332e7eedfa28527d4ef76a8c1aeab56966681cc7a005cc903411a6d33c8638e31757b883fff25d6a5500b9b9395e5cdcf3a56f9') validpgpkeys=('6C44DB3E0BF3EAF5B433239A5FEE05E6A56E15A3') # Hubert Figuiere prepare() { + cd ${pkgname}-${pkgver} # Fix libopenraw dependency in -gnome pc file - sed -e 's|libopenraw-0.1|libopenraw-0.3|' -i $pkgname-$pkgver/gnome/libopenraw-gnome-0.3.pc.in + sed -e 's|libopenraw-0.1|libopenraw-0.3|' -i gnome/libopenraw-gnome-0.3.pc.in + patch -p1 -i $srcdir/libopenraw-fix-build.patch } build() { diff --git a/libopenraw/libopenraw-fix-build.patch b/libopenraw/libopenraw-fix-build.patch new file mode 100644 index 0000000000..5d6a1f38b8 --- /dev/null +++ b/libopenraw/libopenraw-fix-build.patch @@ -0,0 +1,12 @@ +Index: libopenraw-0.3.0/lib/cr2file.cpp +=================================================================== +--- libopenraw-0.3.0.orig/lib/cr2file.cpp ++++ libopenraw-0.3.0/lib/cr2file.cpp +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + + #include + #include diff --git a/liborcus/PKGBUILD b/liborcus/PKGBUILD index acc6db3df2..ee32a3cc72 100644 --- a/liborcus/PKGBUILD +++ b/liborcus/PKGBUILD @@ -11,9 +11,16 @@ license=('MPL') depends=('libixion' 'glibc' 'boost-libs' 'gcc-libs' 'zlib' 'python') makedepends=('boost' 'mdds') optdepends=('python-requests: in tool bugzilla') -source=(https://kohei.us/files/orcus/src/${pkgname}-${pkgver}.tar.xz) +source=(https://kohei.us/files/orcus/src/${pkgname}-${pkgver}.tar.xz +liborcus-cstdint.patch) # https://gitlab.com/orcus/orcus/-/releases -sha256sums=('69ed26a00d4aaa7688e62a6e003cbc81928521a45e96605e53365aa499719e39') +sha256sums=('69ed26a00d4aaa7688e62a6e003cbc81928521a45e96605e53365aa499719e39' + '8e64a22ffaec4dbd5e7055c58b9cce209038130de01369829f40be55b4a9e29d') + +prepare() { + cd "${pkgname}"-${pkgver} + patch -p1 -i $srcdir/liborcus-cstdint.patch +} build() { cd "${pkgname}"-${pkgver} diff --git a/liborcus/liborcus-cstdint.patch b/liborcus/liborcus-cstdint.patch new file mode 100644 index 0000000000..cd2e132af0 --- /dev/null +++ b/liborcus/liborcus-cstdint.patch @@ -0,0 +1,20 @@ +--- liborcus-0.17.2/include/orcus/types.hpp 2023-03-10 19:40:51.890866647 +0800 ++++ liborcus-0.17.2/include/orcus/types.hpp 2023-03-10 19:41:05.074252753 +0800 +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + #include "env.hpp" + + namespace orcus { +--- liborcus-0.17.2/include/orcus/base64.hpp 2021-10-14 10:59:58.000000000 +0800 ++++ liborcus-0.17.2/include/orcus/base64.hpp 2023-03-10 19:47:14.812579067 +0800 +@@ -11,6 +11,7 @@ + #include "env.hpp" + #include + #include ++#include + + namespace orcus { + diff --git a/libotr/PKGBUILD b/libotr/PKGBUILD index 6fed46df2f..0e6e97c7fd 100644 --- a/libotr/PKGBUILD +++ b/libotr/PKGBUILD @@ -13,14 +13,17 @@ arch=('loong64' 'x86_64') depends=('libgcrypt') validpgpkeys=('22DF3305DF56667CE15784FCF24DE08F42C2ABAD') # OTR Dev Team source=(https://otr.cypherpunks.ca/${pkgname}-${pkgver}.tar.gz{,.asc} - "$pkgname-4.1.1-include-socket.h.patch") + "$pkgname-4.1.1-include-socket.h.patch" + libotr-fix-build.patch) sha256sums=('8b3b182424251067a952fb4e6c7b95a21e644fbb27fbd5f8af2b2ed87ca419f5' 'SKIP' - 'cfda75f8c5bba2e735d2b4f1bb90f60b45fa1d554a97fff75cac467f7873ebde') + 'cfda75f8c5bba2e735d2b4f1bb90f60b45fa1d554a97fff75cac467f7873ebde' + '8564fd454e46d7d90919c9b665ea6634868d64f96cfa5e25163d772f9c88d2f4') prepare() { # FS#75450 patch -d "$pkgname-$pkgver" -N -p 1 -i "${srcdir}/$pkgname-4.1.1-include-socket.h.patch" + patch -d "$pkgname-$pkgver" -N -p 1 -i "$srcdir/libotr-fix-build.patch" } build() { diff --git a/libotr/libotr-fix-build.patch b/libotr/libotr-fix-build.patch new file mode 100644 index 0000000000..ecb87e7d76 --- /dev/null +++ b/libotr/libotr-fix-build.patch @@ -0,0 +1,12 @@ +Index: libotr-4.1.1/tests/regression/client/client.c +=================================================================== +--- libotr-4.1.1.orig/tests/regression/client/client.c ++++ libotr-4.1.1/tests/regression/client/client.c +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + #include + #include + #include diff --git a/libraw/PKGBUILD b/libraw/PKGBUILD index 74a9258c3c..3ed4e0b269 100644 --- a/libraw/PKGBUILD +++ b/libraw/PKGBUILD @@ -23,6 +23,7 @@ sha256sums=('fe7288013206854baf6e4417d0fb63ba4ed7227bf36fff021992671c2dd34b03') build() { cd LibRaw-$pkgver + autoreconf ./configure --prefix=/usr make } diff --git a/libredefender/PKGBUILD b/libredefender/PKGBUILD index ba7be8da57..f8890cbaa0 100644 --- a/libredefender/PKGBUILD +++ b/libredefender/PKGBUILD @@ -18,7 +18,7 @@ validpgpkeys=("64B13F7117D6E07D661BBCE0FE763A64F5E54FD6") prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/libreoffice-fresh/PKGBUILD b/libreoffice-fresh/PKGBUILD index a02366f5c3..ea641d0b5f 100644 --- a/libreoffice-fresh/PKGBUILD +++ b/libreoffice-fresh/PKGBUILD @@ -164,6 +164,8 @@ build() { # Build only minimal debug info to reduce size (~1.2GB -> ~225MB) CFLAGS=${CFLAGS/-g /-g1 } CXXFLAGS=${CXXFLAGS/-g /-g1 } + CFLAGS=${CFLAGS/-mlsx /} + CXXFLAGS=${CFLAGS/-mlsx /} # this uses malloc_usable_size, which is incompatible with fortification level 3 # /usr/lib/libreoffice/program/libskialo.so uses malloc_usable_size @@ -223,7 +225,7 @@ build() { --with-system-libmwaw \ --with-system-libetonyek \ --with-system-libfreehand \ - --without-system-firebird \ + --disable-firebird-sdbc \ --with-system-zxing \ --with-system-libtommath \ --with-system-libatomic-ops \ diff --git a/libretro-genesis-plus-gx/PKGBUILD b/libretro-genesis-plus-gx/PKGBUILD index edfe8c8f20..d2ee1123bb 100644 --- a/libretro-genesis-plus-gx/PKGBUILD +++ b/libretro-genesis-plus-gx/PKGBUILD @@ -19,6 +19,7 @@ depends=( makedepends=( clang git + gettext ) _commit=ed1e5f514d2e314ceacde841da485aa69cccba4a source=(libretro-genesis-plus-gx::git+https://github.com/libretro/Genesis-Plus-GX.git#commit=${_commit}) @@ -31,6 +32,10 @@ pkgver() { build() { export CC=clang # FS#71188 - [libretro-genesis-plus-gx] segfaults when compiled with -O2 + CFLAGS=${CFLAGS/-mlsx /} + CXXFLAGS=${CXXFLAGS/-mlsx /} + CFLAGS=${CFLAGS/-fstack-clash-protection/} + CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/} make \ SHARED_LIBVORBIS=1 \ SYSTEM_ZLIB=1 \ diff --git a/librustls/PKGBUILD b/librustls/PKGBUILD index c04ccf032c..50c7f9af36 100644 --- a/librustls/PKGBUILD +++ b/librustls/PKGBUILD @@ -20,7 +20,7 @@ sha256sums=('7eaffd02528155f561742bd712f5454e68fb771b3eb55d63bf0520429ab717f1' prepare() { cd rustls-ffi-${pkgver} - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "$(rustc -vV | sed -n 's/host: //p')" patch -Np1 -i ../shared-linking.patch } diff --git a/libserialport/PKGBUILD b/libserialport/PKGBUILD index eea7c30c52..5ad1d20401 100644 --- a/libserialport/PKGBUILD +++ b/libserialport/PKGBUILD @@ -13,7 +13,7 @@ depends=('glibc') source=("https://sigrok.org/download/source/libserialport/libserialport-$pkgver.tar.gz" "diable_termiox.patch::https://sigrok.org/gitweb/?p=libserialport.git;a=patch;h=6f9b03e597ea7200eb616a4e410add3dd1690cb1") sha512sums=('7d379d1099173841e6d4df04c8c12dc6a4ebdfa0323ef35da6f3dea20db55d4f6ad81c1f6679f5aac7fe83270176428a817daa8627c336505335a07e06350a85' - 'b4834dedc393ba23c80c6487b41c69c273f11ce201c72d54668003774226883a9185c295a3ac2cc33d6075dbf38921c67f4d39a160656884c67152f75951822f') + '8e64304df07aa163370d157a052b5c101350011ab8474258fe9f1beb0565c5efec9f1ad4918749d39f34423061f7de338280d36fb5f72d99df4d4f5ce8162e8c') prepare() { cd $pkgname-$pkgver diff --git a/libsmbios/0001-add-support-for-loongarch64.patch b/libsmbios/0001-add-support-for-loongarch64.patch new file mode 100644 index 0000000000..b4166621ba --- /dev/null +++ b/libsmbios/0001-add-support-for-loongarch64.patch @@ -0,0 +1,672 @@ +From 2e8c11b36537b1319af1c8095909b8882d203a0a Mon Sep 17 00:00:00 2001 +From: Xiaotian Wu +Date: Mon, 9 May 2022 18:08:30 +0800 +Subject: [PATCH] add support for loongarch64 + +--- + src/libsmbios_c/cmos/cmos_linux.c | 6 + + src/libsmbios_c/cmos/io.h | 625 ++++++++++++++++++++++++++++++ + 2 files changed, 631 insertions(+) + create mode 100644 src/libsmbios_c/cmos/io.h + +diff --git a/src/libsmbios_c/cmos/cmos_linux.c b/src/libsmbios_c/cmos/cmos_linux.c +index 11c0871..ffae53c 100644 +--- a/src/libsmbios_c/cmos/cmos_linux.c ++++ b/src/libsmbios_c/cmos/cmos_linux.c +@@ -22,7 +22,11 @@ + #include "smbios_c/compat.h" + + // system ++#if defined(__i386__) || defined(__x86_64__) + #include ++#else ++#include "io.h" ++#endif + #include + #include + +@@ -57,8 +61,10 @@ int __hidden init_cmos_struct(struct cmos_access_obj *m) + int retval = 0; + + fnprintf("\n"); ++#if defined(__i386__) || defined(__x86_64__) + if(iopl(3) < 0) + goto out_noprivs; ++#endif + + m->read_fn = linux_read_fn; + m->write_fn = linux_write_fn; +diff --git a/src/libsmbios_c/cmos/io.h b/src/libsmbios_c/cmos/io.h +new file mode 100644 +index 0000000..fb0a726 +--- /dev/null ++++ b/src/libsmbios_c/cmos/io.h +@@ -0,0 +1,625 @@ ++/* vi: set sw=4 ts=4 sts=4 expandtab wrap ai: */ ++/* Generic I/O port emulation, based on MN10300 code ++ * ++ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. ++ * Copyright (C) 2021 Xiaotian Wu ++ * Written by David Howells (dhowells@redhat.com) ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public Licence ++ * as published by the Free Software Foundation; either version ++ * 2 of the Licence, or (at your option) any later version. ++ */ ++ ++#ifndef __IO_H__ ++#define __IO_H__ 1 ++ ++#include ++#include ++ ++/* ++ * __raw_{read,write}{b,w,l,q}() access memory in native endianness. ++ * ++ * On some architectures memory mapped IO needs to be accessed differently. ++ * On the simple architectures, we just read/write the memory location ++ * directly. ++ */ ++#ifndef __raw_readb ++#define __raw_readb __raw_readb ++static inline __u8 __raw_readb(const volatile void *addr) ++{ ++ return *(const volatile __u8 *)addr; ++} ++#endif ++#ifndef __raw_readw ++#define __raw_readw __raw_readw ++static inline __u16 __raw_readw(const volatile void *addr) ++{ ++ return *(const volatile __u16 *)addr; ++} ++#endif ++#ifndef __raw_readl ++#define __raw_readl __raw_readl ++static inline __u32 __raw_readl(const volatile void *addr) ++{ ++ return *(const volatile __u32 *)addr; ++} ++#endif ++ ++#ifdef __LP64__ ++#ifndef __raw_readq ++#define __raw_readq __raw_readq ++static inline __u64 __raw_readq(const volatile void *addr) ++{ ++ return *(const volatile __u64 *)addr; ++} ++#endif ++#endif /* __LP64__ */ ++ ++#ifndef __raw_writeb ++#define __raw_writeb __raw_writeb ++static inline void __raw_writeb(__u8 value, volatile void *addr) ++{ ++ *(volatile __u8 *)addr = value; ++} ++#endif ++#ifndef __raw_writew ++#define __raw_writew __raw_writew ++static inline void __raw_writew(__u16 value, volatile void *addr) ++{ ++ *(volatile __u16 *)addr = value; ++} ++#endif ++#ifndef __raw_writel ++#define __raw_writel __raw_writel ++static inline void __raw_writel(__u32 value, volatile void *addr) ++{ ++ *(volatile __u32 *)addr = value; ++} ++#endif ++ ++#ifdef __LP64__ ++#ifndef __raw_writeq ++#define __raw_writeq __raw_writeq ++static inline void __raw_writeq(__u64 value, volatile void *addr) ++{ ++ *(volatile __u64 *)addr = value; ++} ++#endif ++#endif /* __LP64__ */ ++/* ++ * {read,write}{b,w,l,q}() access little endian memory and return result in ++ * native endianness. ++ */ ++#ifndef readb ++#define readb readb ++static inline __u8 readb(const volatile void *addr) ++{ ++ return __raw_readb(addr); ++} ++#endif ++#ifndef readw ++#define readw readw ++static inline __u16 readw(const volatile void *addr) ++{ ++ return __le16_to_cpu(__raw_readw(addr)); ++} ++#endif ++#ifndef readl ++#define readl readl ++static inline __u32 readl(const volatile void *addr) ++{ ++ return __le32_to_cpu(__raw_readl(addr)); ++} ++#endif ++#ifdef __LP64__ ++#ifndef readq ++#define readq readq ++static inline __u64 readq(const volatile void *addr) ++{ ++ return __le64_to_cpu(__raw_readq(addr)); ++} ++#endif ++#endif /* __LP64__ */ ++#ifndef writeb ++#define writeb writeb ++static inline void writeb(__u8 value, volatile void *addr) ++{ ++ __raw_writeb(value, addr); ++} ++#endif ++#ifndef writew ++#define writew writew ++static inline void writew(__u16 value, volatile void *addr) ++{ ++ __raw_writew(__cpu_to_le16(value), addr); ++} ++#endif ++#ifndef writel ++#define writel writel ++static inline void writel(__u32 value, volatile void *addr) ++{ ++ __raw_writel(__cpu_to_le32(value), addr); ++} ++#endif ++#ifdef __LP64__ ++#ifndef writeq ++#define writeq writeq ++static inline void writeq(__u64 value, volatile void *addr) ++{ ++ __raw_writeq(__cpu_to_le64(value), addr); ++} ++#endif ++#endif /* __LP64__ */ ++/* ++ * {read,write}{b,w,l,q}_relaxed() are like the regular version, but ++ * are not guaranteed to provide ordering against spinlocks or memory ++ * accesses. ++ */ ++#ifndef readb_relaxed ++#define readb_relaxed readb ++#endif ++#ifndef readw_relaxed ++#define readw_relaxed readw ++#endif ++#ifndef readl_relaxed ++#define readl_relaxed readl ++#endif ++#if defined(readq) && !defined(readq_relaxed) ++#define readq_relaxed readq ++#endif ++#ifndef writeb_relaxed ++#define writeb_relaxed writeb ++#endif ++#ifndef writew_relaxed ++#define writew_relaxed writew ++#endif ++#ifndef writel_relaxed ++#define writel_relaxed writel ++#endif ++#if defined(writeq) && !defined(writeq_relaxed) ++#define writeq_relaxed writeq ++#endif ++/* ++ * {read,write}s{b,w,l,q}() repeatedly access the same memory address in ++ * native endianness in 8-, 16-, 32- or 64-bit chunks (@count times). ++ */ ++#ifndef readsb ++#define readsb readsb ++static inline void readsb(const volatile void *addr, void *buffer, ++ unsigned int count) ++{ ++ if (count) { ++ __u8 *buf = buffer; ++ do { ++ __u8 x = __raw_readb(addr); ++ *buf++ = x; ++ } while (--count); ++ } ++} ++#endif ++#ifndef readsw ++#define readsw readsw ++static inline void readsw(const volatile void *addr, void *buffer, ++ unsigned int count) ++{ ++ if (count) { ++ __u16 *buf = buffer; ++ do { ++ __u16 x = __raw_readw(addr); ++ *buf++ = x; ++ } while (--count); ++ } ++} ++#endif ++#ifndef readsl ++#define readsl readsl ++static inline void readsl(const volatile void *addr, void *buffer, ++ unsigned int count) ++{ ++ if (count) { ++ __u32 *buf = buffer; ++ do { ++ __u32 x = __raw_readl(addr); ++ *buf++ = x; ++ } while (--count); ++ } ++} ++#endif ++#ifdef __LP64__ ++#ifndef readsq ++#define readsq readsq ++static inline void readsq(const volatile void *addr, void *buffer, ++ unsigned int count) ++{ ++ if (count) { ++ __u64 *buf = buffer; ++ do { ++ __u64 x = __raw_readq(addr); ++ *buf++ = x; ++ } while (--count); ++ } ++} ++#endif ++#endif /* __LP64__ */ ++#ifndef writesb ++#define writesb writesb ++static inline void writesb(volatile void *addr, const void *buffer, ++ unsigned int count) ++{ ++ if (count) { ++ const __u8 *buf = buffer; ++ do { ++ __raw_writeb(*buf++, addr); ++ } while (--count); ++ } ++} ++#endif ++#ifndef writesw ++#define writesw writesw ++static inline void writesw(volatile void *addr, const void *buffer, ++ unsigned int count) ++{ ++ if (count) { ++ const __u16 *buf = buffer; ++ do { ++ __raw_writew(*buf++, addr); ++ } while (--count); ++ } ++} ++#endif ++#ifndef writesl ++#define writesl writesl ++static inline void writesl(volatile void *addr, const void *buffer, ++ unsigned int count) ++{ ++ if (count) { ++ const __u32 *buf = buffer; ++ do { ++ __raw_writel(*buf++, addr); ++ } while (--count); ++ } ++} ++#endif ++#ifdef __LP64__ ++#ifndef writesq ++#define writesq writesq ++static inline void writesq(volatile void *addr, const void *buffer, ++ unsigned int count) ++{ ++ if (count) { ++ const __u64 *buf = buffer; ++ do { ++ __raw_writeq(*buf++, addr); ++ } while (--count); ++ } ++} ++#endif ++#endif /* __LP64__ */ ++ ++#ifndef PCI_IOBASE ++#define PCI_IOBASE ((void *)0) ++#endif ++ ++#ifndef IO_SPACE_LIMIT ++#define IO_SPACE_LIMIT 0xffff ++#endif ++/* ++ * {in,out}{b,w,l}() access little endian I/O. {in,out}{b,w,l}_p() can be ++ * implemented on hardware that needs an additional delay for I/O accesses to ++ * take effect. ++ */ ++#ifndef inb ++#define inb inb ++static inline __u8 inb(unsigned long addr) ++{ ++ return readb(PCI_IOBASE + addr); ++} ++#endif ++#ifndef inw ++#define inw inw ++static inline __u16 inw(unsigned long addr) ++{ ++ return readw(PCI_IOBASE + addr); ++} ++#endif ++#ifndef inl ++#define inl inl ++static inline __u32 inl(unsigned long addr) ++{ ++ return readl(PCI_IOBASE + addr); ++} ++#endif ++#ifndef outb ++#define outb outb ++static inline void outb(__u8 value, unsigned long addr) ++{ ++ writeb(value, PCI_IOBASE + addr); ++} ++#endif ++#ifndef outw ++#define outw outw ++static inline void outw(__u16 value, unsigned long addr) ++{ ++ writew(value, PCI_IOBASE + addr); ++} ++#endif ++#ifndef outl ++#define outl outl ++static inline void outl(__u32 value, unsigned long addr) ++{ ++ writel(value, PCI_IOBASE + addr); ++} ++#endif ++#ifndef inb_p ++#define inb_p inb_p ++static inline __u8 inb_p(unsigned long addr) ++{ ++ return inb(addr); ++} ++#endif ++#ifndef inw_p ++#define inw_p inw_p ++static inline __u16 inw_p(unsigned long addr) ++{ ++ return inw(addr); ++} ++#endif ++#ifndef inl_p ++#define inl_p inl_p ++static inline __u32 inl_p(unsigned long addr) ++{ ++ return inl(addr); ++} ++#endif ++#ifndef outb_p ++#define outb_p outb_p ++static inline void outb_p(__u8 value, unsigned long addr) ++{ ++ outb(value, addr); ++} ++#endif ++#ifndef outw_p ++#define outw_p outw_p ++static inline void outw_p(__u16 value, unsigned long addr) ++{ ++ outw(value, addr); ++} ++#endif ++#ifndef outl_p ++#define outl_p outl_p ++static inline void outl_p(__u32 value, unsigned long addr) ++{ ++ outl(value, addr); ++} ++#endif ++/* ++ * {in,out}s{b,w,l}{,_p}() are variants of the above that repeatedly access a ++ * single I/O port multiple times. ++ */ ++#ifndef insb ++#define insb insb ++static inline void insb(unsigned long addr, void *buffer, unsigned int count) ++{ ++ readsb(PCI_IOBASE + addr, buffer, count); ++} ++#endif ++#ifndef insw ++#define insw insw ++static inline void insw(unsigned long addr, void *buffer, unsigned int count) ++{ ++ readsw(PCI_IOBASE + addr, buffer, count); ++} ++#endif ++#ifndef insl ++#define insl insl ++static inline void insl(unsigned long addr, void *buffer, unsigned int count) ++{ ++ readsl(PCI_IOBASE + addr, buffer, count); ++} ++#endif ++#ifndef outsb ++#define outsb outsb ++static inline void outsb(unsigned long addr, const void *buffer, ++ unsigned int count) ++{ ++ writesb(PCI_IOBASE + addr, buffer, count); ++} ++#endif ++#ifndef outsw ++#define outsw outsw ++static inline void outsw(unsigned long addr, const void *buffer, ++ unsigned int count) ++{ ++ writesw(PCI_IOBASE + addr, buffer, count); ++} ++#endif ++#ifndef outsl ++#define outsl outsl ++static inline void outsl(unsigned long addr, const void *buffer, ++ unsigned int count) ++{ ++ writesl(PCI_IOBASE + addr, buffer, count); ++} ++#endif ++#ifndef insb_p ++#define insb_p insb_p ++static inline void insb_p(unsigned long addr, void *buffer, unsigned int count) ++{ ++ insb(addr, buffer, count); ++} ++#endif ++#ifndef insw_p ++#define insw_p insw_p ++static inline void insw_p(unsigned long addr, void *buffer, unsigned int count) ++{ ++ insw(addr, buffer, count); ++} ++#endif ++#ifndef insl_p ++#define insl_p insl_p ++static inline void insl_p(unsigned long addr, void *buffer, unsigned int count) ++{ ++ insl(addr, buffer, count); ++} ++#endif ++#ifndef outsb_p ++#define outsb_p outsb_p ++static inline void outsb_p(unsigned long addr, const void *buffer, ++ unsigned int count) ++{ ++ outsb(addr, buffer, count); ++} ++#endif ++#ifndef outsw_p ++#define outsw_p outsw_p ++static inline void outsw_p(unsigned long addr, const void *buffer, ++ unsigned int count) ++{ ++ outsw(addr, buffer, count); ++} ++#endif ++#ifndef outsl_p ++#define outsl_p outsl_p ++static inline void outsl_p(unsigned long addr, const void *buffer, ++ unsigned int count) ++{ ++ outsl(addr, buffer, count); ++} ++#endif ++ ++#ifndef ioread8 ++#define ioread8 ioread8 ++static inline __u8 ioread8(const volatile void *addr) ++{ ++ return readb(addr); ++} ++#endif ++#ifndef ioread16 ++#define ioread16 ioread16 ++static inline __u16 ioread16(const volatile void *addr) ++{ ++ return readw(addr); ++} ++#endif ++#ifndef ioread32 ++#define ioread32 ioread32 ++static inline __u32 ioread32(const volatile void *addr) ++{ ++ return readl(addr); ++} ++#endif ++#ifdef __LP64__ ++#ifndef ioread64 ++#define ioread64 ioread64 ++static inline __u64 ioread64(const volatile void *addr) ++{ ++ return readq(addr); ++} ++#endif ++#endif /* __LP64__ */ ++#ifndef iowrite8 ++#define iowrite8 iowrite8 ++static inline void iowrite8(__u8 value, volatile void *addr) ++{ ++ writeb(value, addr); ++} ++#endif ++#ifndef iowrite16 ++#define iowrite16 iowrite16 ++static inline void iowrite16(__u16 value, volatile void *addr) ++{ ++ writew(value, addr); ++} ++#endif ++#ifndef iowrite32 ++#define iowrite32 iowrite32 ++static inline void iowrite32(__u32 value, volatile void *addr) ++{ ++ writel(value, addr); ++} ++#endif ++#ifdef __LP64__ ++#ifndef iowrite64 ++#define iowrite64 iowrite64 ++static inline void iowrite64(__u64 value, volatile void *addr) ++{ ++ writeq(value, addr); ++} ++#endif ++#endif /* __LP64__ */ ++ ++#ifndef ioread8_rep ++#define ioread8_rep ioread8_rep ++static inline void ioread8_rep(const volatile void *addr, void *buffer, ++ unsigned int count) ++{ ++ readsb(addr, buffer, count); ++} ++#endif ++#ifndef ioread16_rep ++#define ioread16_rep ioread16_rep ++static inline void ioread16_rep(const volatile void *addr, ++ void *buffer, unsigned int count) ++{ ++ readsw(addr, buffer, count); ++} ++#endif ++#ifndef ioread32_rep ++#define ioread32_rep ioread32_rep ++static inline void ioread32_rep(const volatile void *addr, ++ void *buffer, unsigned int count) ++{ ++ readsl(addr, buffer, count); ++} ++#endif ++#ifdef __LP64__ ++#ifndef ioread64_rep ++#define ioread64_rep ioread64_rep ++static inline void ioread64_rep(const volatile void *addr, ++ void *buffer, unsigned int count) ++{ ++ readsq(addr, buffer, count); ++} ++#endif ++#endif /* __LP64__ */ ++#ifndef iowrite8_rep ++#define iowrite8_rep iowrite8_rep ++static inline void iowrite8_rep(volatile void *addr, ++ const void *buffer, ++ unsigned int count) ++{ ++ writesb(addr, buffer, count); ++} ++#endif ++#ifndef iowrite16_rep ++#define iowrite16_rep iowrite16_rep ++static inline void iowrite16_rep(volatile void *addr, ++ const void *buffer, ++ unsigned int count) ++{ ++ writesw(addr, buffer, count); ++} ++#endif ++#ifndef iowrite32_rep ++#define iowrite32_rep iowrite32_rep ++static inline void iowrite32_rep(volatile void *addr, ++ const void *buffer, ++ unsigned int count) ++{ ++ writesl(addr, buffer, count); ++} ++#endif ++#ifdef __LP64__ ++#ifndef iowrite64_rep ++#define iowrite64_rep iowrite64_rep ++static inline void iowrite64_rep(volatile void *addr, ++ const void *buffer, ++ unsigned int count) ++{ ++ writesq(addr, buffer, count); ++} ++#endif ++#endif /* __LP64__ */ ++ ++#endif /* __IO_H__ */ +-- +2.35.1 + diff --git a/libsmbios/PKGBUILD b/libsmbios/PKGBUILD index 91cf49c290..a3c794ac64 100644 --- a/libsmbios/PKGBUILD +++ b/libsmbios/PKGBUILD @@ -13,7 +13,8 @@ depends=('gcc-libs') makedepends=('libxml2' 'python' 'chrpath' 'doxygen' git cppunit help2man) optdepends=('python: tools') _commit=5b72244ca0d09c7f228d571ec2d5d20183486c11 # tags/v2.4.3 -source=("git+https://github.com/dell/libsmbios.git#commit=$_commit") +source=("git+https://github.com/dell/libsmbios.git#commit=$_commit" +0001-add-support-for-loongarch64.patch) sha256sums=('SKIP') pkgver() { @@ -23,6 +24,7 @@ pkgver() { prepare() { cd $pkgname + patch -p1 -i $srcdir/0001-add-support-for-loongarch64.patch ./autogen.sh --no-configure } @@ -48,3 +50,5 @@ package() { install -m755 -d "${pkgdir}/usr/share/licenses/${pkgname}" install -m644 ../COPYING-OSL "${pkgdir}/usr/share/licenses/${pkgname}/" } +sha256sums=('SKIP' + '6bf9e8d30891867bd89698dbdb559a8c9d8c9878fd141a3ac1771993dfd6d420') diff --git a/liburcu/PKGBUILD b/liburcu/PKGBUILD index d658336d4f..2deb37bff1 100644 --- a/liburcu/PKGBUILD +++ b/liburcu/PKGBUILD @@ -10,11 +10,18 @@ arch=('loong64' 'x86_64') url="https://lttng.org/urcu" license=('LGPL2.1') depends=('glibc') -source=(https://lttng.org/files/urcu/userspace-rcu-${pkgver}.tar.bz2{,.asc}) +source=(https://lttng.org/files/urcu/userspace-rcu-${pkgver}.tar.bz2{,.asc} + userspace-rcu-loongarch64.patch) sha256sums=('ca43bf261d4d392cff20dfae440836603bf009fce24fdc9b2697d837a2239d4f' 'SKIP') validpgpkeys=('2A0B4ED915F2D3FA45F5B16217280A9781186ACF') +prepare() { + cd "$srcdir"/userspace-rcu-${pkgver} + patch -p1 -i "$srcdir/userspace-rcu-loongarch64.patch" + autoreconf -vfi +} + build() { cd "$srcdir"/userspace-rcu-${pkgver} ./configure --prefix=/usr diff --git a/liburcu/userspace-rcu-loongarch64.patch b/liburcu/userspace-rcu-loongarch64.patch new file mode 100644 index 0000000000..f57f3106a7 --- /dev/null +++ b/liburcu/userspace-rcu-loongarch64.patch @@ -0,0 +1,186 @@ +diff --git a/LICENSE b/LICENSE +index a06fdcc..acf13d7 100644 +--- a/LICENSE ++++ b/LICENSE +@@ -44,6 +44,7 @@ MIT/X11 (BSD like) license apply to: + compiler.h + arch/s390.h + uatomic/alpha.h ++uatomic/loongarch.h + uatomic/mips.h + uatomic/nios2.h + uatomic/s390.h +diff --git a/README.md b/README.md +index 02b903a..29b3a4a 100644 +--- a/README.md ++++ b/README.md +@@ -51,6 +51,7 @@ Currently, the following architectures are supported: + - hppa/PA-RISC + - m68k + - RISC-V ++ - LoongArch + + Tested on: + +diff --git a/include/Makefile.am b/include/Makefile.am +index 3f92cc3..1a562fa 100644 +--- a/include/Makefile.am ++++ b/include/Makefile.am +@@ -7,6 +7,7 @@ nobase_include_HEADERS = \ + urcu/arch.h \ + urcu/arch/hppa.h \ + urcu/arch/ia64.h \ ++ urcu/arch/loongarch.h \ + urcu/arch/m68k.h \ + urcu/arch/mips.h \ + urcu/arch/nios2.h \ +@@ -67,6 +68,7 @@ nobase_include_HEADERS = \ + urcu/uatomic.h \ + urcu/uatomic/hppa.h \ + urcu/uatomic/ia64.h \ ++ urcu/uatomic/loongarch.h \ + urcu/uatomic/m68k.h \ + urcu/uatomic/mips.h \ + urcu/uatomic/nios2.h \ +diff --git a/include/urcu/arch.h b/include/urcu/arch.h +index 2bffdbe..928577e 100644 +--- a/include/urcu/arch.h ++++ b/include/urcu/arch.h +@@ -49,6 +49,7 @@ + * URCU_ARCH_HPPA : All HP PA-RISC variants + * URCU_ARCH_M68K : All Motorola 68000 variants + * URCU_ARCH_RISCV : All RISC-V variants ++ * URCU_ARCH_LOONGARCH : All LoongArch variants + */ + + #if (defined(__INTEL_OFFLOAD) || defined(__TARGET_ARCH_MIC) || defined(__MIC__)) +@@ -167,6 +168,11 @@ + #define URCU_ARCH_RISCV 1 + #include + ++#elif defined(__loongarch__) ++ ++#define URCU_ARCH_LOONGARCH 1 ++#include ++ + #else + #error "Cannot build: unrecognized architecture, see ." + #endif +diff --git a/include/urcu/arch/loongarch.h b/include/urcu/arch/loongarch.h +new file mode 100644 +index 0000000..a6d9fee +--- /dev/null ++++ b/include/urcu/arch/loongarch.h +@@ -0,0 +1,49 @@ ++#ifndef _URCU_ARCH_LOONGARCH_H ++#define _URCU_ARCH_LOONGARCH_H ++ ++/* ++ * arch/loongarch.h: trivial definitions for the LoongArch architecture. ++ * ++ * Copyright (c) 2021 Wang Jing ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include ++#include ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#include ++#include ++ ++/* ++ * On Linux, define the membarrier system call number if not yet available in ++ * the system headers. ++ */ ++#if (defined(__linux__) && !defined(__NR_membarrier)) ++#define __NR_membarrier 283 ++#endif ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#include ++ ++#endif /* _URCU_ARCH_LOONGARCH_H */ +diff --git a/include/urcu/uatomic.h b/include/urcu/uatomic.h +index 2fb5fd4..e1ff44d 100644 +--- a/include/urcu/uatomic.h ++++ b/include/urcu/uatomic.h +@@ -51,6 +51,8 @@ + #include + #elif defined(URCU_ARCH_RISCV) + #include ++#elif defined(URCU_ARCH_LOONGARCH) ++#include + #else + #error "Cannot build: unrecognized architecture, see ." + #endif +diff --git a/include/urcu/uatomic/loongarch.h b/include/urcu/uatomic/loongarch.h +new file mode 100644 +index 0000000..f41302a +--- /dev/null ++++ b/include/urcu/uatomic/loongarch.h +@@ -0,0 +1,44 @@ ++#ifndef _URCU_UATOMIC_ARCH_LOONGARCH_H ++#define _URCU_UATOMIC_ARCH_LOONGARCH_H ++ ++/* ++ * Atomic exchange operations for the LoongArch architecture. Let GCC do it. ++ * ++ * Copyright (c) 2021 Wang Jing ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to ++ * deal in the Software without restriction, including without limitation the ++ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ * sell copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#include ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#define UATOMIC_HAS_ATOMIC_BYTE ++#define UATOMIC_HAS_ATOMIC_SHORT ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#include ++ ++#endif /* _URCU_UATOMIC_ARCH_LOONGARCH_H */ diff --git a/libusbsio/PKGBUILD b/libusbsio/PKGBUILD index 06b1a7ba3a..0f3f63d124 100644 --- a/libusbsio/PKGBUILD +++ b/libusbsio/PKGBUILD @@ -50,7 +50,7 @@ package_libusbsio() { ) cd $pkgbase-$pkgver-src - install -vDm 755 bin/linux_$CARCH/$pkgbase.so -t "$pkgdir/usr/lib/" + install -vDm 755 bin/linux_`uname -m`/$pkgbase.so -t "$pkgdir/usr/lib/" install -vDm 644 license/*.txt -t "$pkgdir/usr/share/licenses/$pkgname/" } @@ -67,8 +67,8 @@ package_python-libusbsio() { ( cd python/dist/$pkgbase-$pkgver python -m installer --destdir="$pkgdir" dist/*.whl - install -vdm 755 "$pkgdir/$_site_packages/$pkgbase/bin/linux_$CARCH" - ln -fsv /usr/lib/$pkgbase.so "$pkgdir/$_site_packages/$pkgbase/bin/linux_$CARCH/$pkgbase.so" + install -vdm 755 "$pkgdir/$_site_packages/$pkgbase/bin/linux_`uname -m`" + ln -fsv /usr/lib/$pkgbase.so "$pkgdir/$_site_packages/$pkgbase/bin/linux_`uname -m`/$pkgbase.so" ) install -vDm 644 license/BSD-3-clause.txt -t "$pkgdir/usr/share/licenses/$pkgname/" install -vDm 644 python/README.md -t "$pkgdir/usr/share/doc/$pkgname/" diff --git a/libvirt/PKGBUILD b/libvirt/PKGBUILD index e16c04e57f..27232c604f 100644 --- a/libvirt/PKGBUILD +++ b/libvirt/PKGBUILD @@ -83,13 +83,15 @@ backup=( ) source=( "https://libvirt.org/sources/$pkgname-$pkgver.tar.xz"{,.asc} + libvirt-loongarch.patch ) sha256sums=('8ba2e72ec8bdd2418554a1474c42c35704c30174b7611eaf9a16544b71bcf00a' - 'SKIP') + 'f0562941282b157e2ebba9d203c33f4f9c0f3f93562129448f7de6e5df0575fc') validpgpkeys=('453B65310595562855471199CA68BE8010084C9C') # Jiří Denemark prepare() { cd "$pkgname-$pkgver" + patch -Np1 -i ../libvirt-loongarch.patch sed -i 's|/sysconfig/|/conf.d/|g' \ src/remote/libvirtd.service.in \ @@ -185,3 +187,12 @@ package_libvirt-storage-iscsi-direct() { install -Dv -t "$pkgdir/usr/lib/libvirt/storage-backend" "$pkgdir/../libvirt_storage_backend_iscsi-direct.so" } + +package_libvirt-storage-rbd() { + pkgdesc="Libvirt RBD storage backend" + depends=("libvirt=$pkgver") + optdepends=() + backup=() + + install -Dv -t "$pkgdir/usr/lib/libvirt/storage-backend" "$pkgdir/../libvirt_storage_backend_rbd.so" +} diff --git a/libvirt/libvirt-loongarch.patch b/libvirt/libvirt-loongarch.patch new file mode 100644 index 0000000000..169949dbfa --- /dev/null +++ b/libvirt/libvirt-loongarch.patch @@ -0,0 +1,407 @@ +diff --git a/src/conf/schemas/basictypes.rng b/src/conf/schemas/basictypes.rng +index 26eb538077..04f032b3ab 100644 +--- a/src/conf/schemas/basictypes.rng ++++ b/src/conf/schemas/basictypes.rng +@@ -470,6 +470,7 @@ + x86_64 + xtensa + xtensaeb ++ loongarch64 + + + +diff --git a/src/cpu/cpu.c b/src/cpu/cpu.c +index bc43aa4e93..dd677ba269 100644 +--- a/src/cpu/cpu.c ++++ b/src/cpu/cpu.c +@@ -28,6 +28,7 @@ + #include "cpu_s390.h" + #include "cpu_arm.h" + #include "cpu_riscv64.h" ++#include "cpu_loongarch64.h" + #include "capabilities.h" + + +@@ -41,6 +42,7 @@ static struct cpuArchDriver *drivers[] = { + &cpuDriverS390, + &cpuDriverArm, + &cpuDriverRiscv64, ++ &cpuDriverLoongarch64, + }; + + +diff --git a/src/cpu/cpu_loongarch64.c b/src/cpu/cpu_loongarch64.c +new file mode 100644 +index 0000000000..cf026258f9 +--- /dev/null ++++ b/src/cpu/cpu_loongarch64.c +@@ -0,0 +1,74 @@ ++/* ++ * cpu_loongarch64.c: CPU driver for loongarch64 CPUs ++ * ++ * Copyright (c) 2023, XinmuTouhouKyou ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library. If not, see ++ * . ++ */ ++ ++#include ++#include "cpu.h" ++#include "cpu_loongarch64.h" ++ ++ ++static const virArch archs[] = { VIR_ARCH_LOONGARCH64 }; ++static virCPUCompareResult ++virCPULoongarch64Compare(virCPUDef *host G_GNUC_UNUSED, ++ virCPUDef *cpu G_GNUC_UNUSED, ++ bool failMessages G_GNUC_UNUSED) ++{ ++ /* loongarch64 relies on QEMU to perform all runability checking. Return ++ * VIR_CPU_COMPARE_IDENTICAL to bypass Libvirt checking. ++ */ ++ return VIR_CPU_COMPARE_IDENTICAL; ++} ++ ++static int ++virCPULoongarch64ValidateFeatures(virCPUDef *cpu G_GNUC_UNUSED) ++{ ++ return 0; ++} ++ ++static int ++virCPULoongarch64Update(virCPUDef *guest, ++ const virCPUDef *host G_GNUC_UNUSED, ++ bool relative G_GNUC_UNUSED) ++{ ++ g_autoptr(virCPUDef) updated = virCPUDefCopyWithoutModel(guest); ++ ++ if (!relative || guest->mode != VIR_CPU_MODE_HOST_MODEL) ++ return 0; ++ ++ updated->mode = VIR_CPU_MODE_CUSTOM; ++ virCPUDefCopyModel(updated, host, true); ++ ++ virCPUDefStealModel(guest, updated, false); ++ guest->mode = VIR_CPU_MODE_CUSTOM; ++ guest->match = VIR_CPU_MATCH_EXACT; ++ ++ return 0; ++} ++ ++struct cpuArchDriver cpuDriverLoongarch64 = { ++ .name = "loongarch64", ++ .arch = archs, ++ .narch = G_N_ELEMENTS(archs), ++ .compare = virCPULoongarch64Compare, ++ .decode = NULL, ++ .encode = NULL, ++ .baseline = NULL, ++ .update = virCPULoongarch64Update, ++ .validateFeatures = virCPULoongarch64ValidateFeatures, ++}; +diff --git a/src/cpu/cpu_loongarch64.h b/src/cpu/cpu_loongarch64.h +new file mode 100644 +index 0000000000..a5f84b5637 +--- /dev/null ++++ b/src/cpu/cpu_loongarch64.h +@@ -0,0 +1,25 @@ ++/* ++ * cpu_loongarch64.h: CPU driver for loongarch64 CPUs ++ * ++ * Copyright (c) 2023, XinmuTouhouKyou ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library. If not, see ++ * . ++ */ ++ ++#pragma once ++ ++#include "cpu.h" ++ ++extern struct cpuArchDriver cpuDriverLoongarch64; +diff --git a/src/cpu/meson.build b/src/cpu/meson.build +index 55396903b9..09c02ef9d9 100644 +--- a/src/cpu/meson.build ++++ b/src/cpu/meson.build +@@ -6,6 +6,7 @@ cpu_sources = [ + 'cpu_riscv64.c', + 'cpu_s390.c', + 'cpu_x86.c', ++ 'cpu_loongarch64.c', + ] + + cpu_lib = static_library( +diff --git a/src/cpu_map/index.xml b/src/cpu_map/index.xml +index d2c5af5797..92948cd213 100644 +--- a/src/cpu_map/index.xml ++++ b/src/cpu_map/index.xml +@@ -119,4 +119,8 @@ + + + ++ ++ ++ ++ + +diff --git a/src/cpu_map/loongarch64_la464.xml b/src/cpu_map/loongarch64_la464.xml +new file mode 100644 +index 0000000000..3d4f34ae7a +--- /dev/null ++++ b/src/cpu_map/loongarch64_la464.xml +@@ -0,0 +1,5 @@ ++ ++ ++ ++ ++ +diff --git a/src/cpu_map/loongarch64_vendors.xml b/src/cpu_map/loongarch64_vendors.xml +new file mode 100644 +index 0000000000..64d49a9662 +--- /dev/null ++++ b/src/cpu_map/loongarch64_vendors.xml +@@ -0,0 +1,3 @@ ++ ++ ++ +\ No newline at end of file +diff --git a/src/cpu_map/meson.build b/src/cpu_map/meson.build +index ae5293e85f..6bce65f0fb 100644 +--- a/src/cpu_map/meson.build ++++ b/src/cpu_map/meson.build +@@ -84,6 +84,8 @@ cpumap_data = [ + 'x86_vendors.xml', + 'x86_Westmere-IBRS.xml', + 'x86_Westmere.xml', ++ 'loongarch64_vendors.xml', ++ 'loongarch64_la464.xml', + ] + + install_data(cpumap_data, install_dir: pkgdatadir / 'cpu_map') +diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c +index 83119e871a..6c6eae0b66 100644 +--- a/src/qemu/qemu_capabilities.c ++++ b/src/qemu/qemu_capabilities.c +@@ -1138,7 +1138,7 @@ virQEMUCapsInitGuestFromBinary(virCaps *caps, + NULL, NULL, 0, NULL); + } + +- if ((ARCH_IS_X86(guestarch) || guestarch == VIR_ARCH_AARCH64)) ++ if ((ARCH_IS_X86(guestarch) || guestarch == VIR_ARCH_AARCH64 || guestarch == VIR_ARCH_LOONGARCH64)) + virCapabilitiesAddGuestFeatureWithToggle(guest, VIR_CAPS_GUEST_FEATURE_TYPE_ACPI, + true, true); + +@@ -2697,6 +2697,10 @@ static const char *preferredMachines[] = + + "sim", /* VIR_ARCH_XTENSA */ + "sim", /* VIR_ARCH_XTENSAEB */ ++ ++ ++// "virt", /* VIR_ARCH_LOONGARCH32 */ ++ "virt", /* VIR_ARCH_LOONGARCH64 */ + }; + G_STATIC_ASSERT(G_N_ELEMENTS(preferredMachines) == VIR_ARCH_LAST); + +diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c +index 413f67577e..29a78127ea 100644 +--- a/src/qemu/qemu_domain.c ++++ b/src/qemu/qemu_domain.c +@@ -4221,7 +4221,12 @@ qemuDomainDefAddDefaultDevices(virQEMUDriver *driver, + if (qemuDomainIsMipsMalta(def)) + addPCIRoot = true; + break; +- ++ case VIR_ARCH_LOONGARCH64: ++ if (qemuDomainIsLoongarch64Virt(def)) { ++ addPCIRoot = true; ++ addDefaultUSB = true; ++ } ++ break; + case VIR_ARCH_ARMV7B: + case VIR_ARCH_CRIS: + case VIR_ARCH_ITANIUM: +@@ -8901,6 +8906,20 @@ qemuDomainMachineIsRISCVVirt(const char *machine, + return false; + } + ++static bool ++qemuDomainMachineIsLoongarch64Virt(const char *machine, ++ const virArch arch) ++{ ++ if (arch!=VIR_ARCH_LOONGARCH64) ++ return false; ++ ++ if (STREQ(machine, "virt") || ++ STRPREFIX(machine, "virt-")) { ++ return true; ++ } ++ ++ return false; ++} + + /* You should normally avoid this function and use + * qemuDomainIsPSeries() instead. */ +@@ -8998,6 +9017,12 @@ qemuDomainIsRISCVVirt(const virDomainDef *def) + return qemuDomainMachineIsRISCVVirt(def->os.machine, def->os.arch); + } + ++bool ++qemuDomainIsLoongarch64Virt(const virDomainDef *def) ++{ ++ return qemuDomainMachineIsLoongarch64Virt(def->os.machine, def->os.arch); ++} ++ + + bool + qemuDomainIsPSeries(const virDomainDef *def) +diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h +index 1e56e50672..cb444ff06a 100644 +--- a/src/qemu/qemu_domain.h ++++ b/src/qemu/qemu_domain.h +@@ -827,6 +827,7 @@ bool qemuDomainIsS390CCW(const virDomainDef *def); + bool qemuDomainIsARMVirt(const virDomainDef *def); + bool qemuDomainIsRISCVVirt(const virDomainDef *def); + bool qemuDomainIsPSeries(const virDomainDef *def); ++bool qemuDomainIsLoongarch64Virt(const virDomainDef *def); + bool qemuDomainIsMipsMalta(const virDomainDef *def); + bool qemuDomainHasPCIRoot(const virDomainDef *def); + bool qemuDomainHasPCIeRoot(const virDomainDef *def); +diff --git a/src/util/virarch.c b/src/util/virarch.c +index 01e520de73..9b981b6ced 100644 +--- a/src/util/virarch.c ++++ b/src/util/virarch.c +@@ -83,6 +83,10 @@ static const struct virArchData { + + { "xtensa", 32, VIR_ARCH_LITTLE_ENDIAN }, + { "xtensaeb", 32, VIR_ARCH_BIG_ENDIAN }, ++ ++// { "loong32", 32, VIR_ARCH_LITTLE_ENDIAN }, ++ { "loongarch64", 64, VIR_ARCH_LITTLE_ENDIAN }, ++ + }; + + G_STATIC_ASSERT(G_N_ELEMENTS(virArchData) == VIR_ARCH_LAST); +diff --git a/src/util/virarch.h b/src/util/virarch.h +index 747f77c48e..3e48102f98 100644 +--- a/src/util/virarch.h ++++ b/src/util/virarch.h +@@ -69,6 +69,9 @@ typedef enum { + VIR_ARCH_XTENSA, /* XTensa 32 LE https://en.wikipedia.org/wiki/Xtensa#Processor_Cores */ + VIR_ARCH_XTENSAEB, /* XTensa 32 BE https://en.wikipedia.org/wiki/Xtensa#Processor_Cores */ + ++// VIR_ARCH_LOONGARCH32, /* LoongArch 32 LE https://en.wikipedia.org/wiki/LoongArch */ ++ VIR_ARCH_LOONGARCH64, /* LoongArch 64 LE https://en.wikipedia.org/wiki/LoongArch */ ++ + VIR_ARCH_LAST, + } virArch; + +diff --git a/src/util/virsysinfo.c b/src/util/virsysinfo.c +index 36a861c53f..8a34c0479d 100644 +--- a/src/util/virsysinfo.c ++++ b/src/util/virsysinfo.c +@@ -1228,6 +1228,74 @@ virSysinfoReadDMI(void) + return g_steal_pointer(&ret); + } + ++static int ++virSysinfoParseLoongarch64Processor(const char *base, virSysinfoDef *ret) ++{ ++ const char *cur; ++ char *eol, *tmp_base; ++ virSysinfoProcessorDef *processor; ++ char *processor_type = NULL; ++ ++ if (!(tmp_base = strstr(base, "Model Name")) && ++ !(tmp_base = strstr(base, "processor"))) ++ return 0; ++ ++ eol = strchr(tmp_base, '\n'); ++ cur = strchr(tmp_base, ':') + 1; ++ virSkipSpaces(&cur); ++ if (eol) ++ processor_type = g_strndup(cur, eol - cur); ++ ++ while ((tmp_base = strstr(base, "processor")) != NULL) { ++ base = tmp_base; ++ eol = strchr(base, '\n'); ++ cur = strchr(base, ':') + 1; ++ ++ VIR_EXPAND_N(ret->processor, ret->nprocessor, 1); ++ processor = &ret->processor[ret->nprocessor - 1]; ++ ++ virSkipSpaces(&cur); ++ if (eol) ++ processor->processor_socket_destination = g_strndup(cur, ++ eol - cur); ++ ++ processor->processor_type = g_strdup(processor_type); ++ ++ base = cur; ++ } ++ ++ VIR_FREE(processor_type); ++ return 0; ++} ++virSysinfoDef * ++virSysinfoReadLoongArch64(void); ++virSysinfoDef * ++virSysinfoReadLoongArch64(void){ ++ g_autoptr(virSysinfoDef) ret = NULL; ++ g_autofree char *outbuf = NULL; ++ ++ if ((ret = virSysinfoReadDMI())) { ++ if (!virSysinfoDefIsEmpty(ret)) ++ return g_steal_pointer(&ret); ++ virSysinfoDefFree(ret); ++ } ++ ++ virResetLastError(); ++ ret = g_new0(virSysinfoDef, 1); ++ ++ if (virFileReadAll(CPUINFO, CPUINFO_FILE_LEN, &outbuf) < 0) { ++ virReportError(VIR_ERR_INTERNAL_ERROR, ++ _("Failed to open %1$s"), CPUINFO); ++ return NULL; ++ } ++ ++ ret->nprocessor = 0; ++ ret->processor = NULL; ++ if (virSysinfoParseLoongarch64Processor(outbuf, ret) < 0) ++ return NULL; ++ ++ return g_steal_pointer(&ret); ++} + + /** + * virSysinfoRead: +@@ -1250,6 +1318,8 @@ virSysinfoRead(void) + defined(__i386__) || \ + defined(__amd64__)) + return virSysinfoReadDMI(); ++#elif defined(__loongarch64) ++ return virSysinfoReadLoongArch64(); + #else /* WIN32 || not supported arch */ + /* + * this can probably be extracted from Windows using API or registry diff --git a/libvisual/PKGBUILD b/libvisual/PKGBUILD index 8568f7e8ad..9a6f1e5fec 100644 --- a/libvisual/PKGBUILD +++ b/libvisual/PKGBUILD @@ -13,6 +13,8 @@ sha256sums=('63085fd9835c42c9399ea6bb13a7ebd4b1547ace75c4595ce8e9759512bd998a') build() { cd ${pkgname}-${pkgver} + CFLAGS=${CFLAGS/-Werror=format-security/} + CXXFLAGS=${CXXFLAGS/-Werror=format-security/} ./configure --prefix=/usr make } diff --git a/libvpx/PKGBUILD b/libvpx/PKGBUILD index 197be672f5..42bef022d0 100644 --- a/libvpx/PKGBUILD +++ b/libvpx/PKGBUILD @@ -41,6 +41,8 @@ build() { --enable-shared \ --enable-vp8 \ --enable-vp9 \ + --disable-lsx \ + --disable-lasx \ --enable-vp9-highbitdepth \ --enable-vp9-temporal-denoising make diff --git a/libyuv/0001-fix-build-error.patch b/libyuv/0001-fix-build-error.patch new file mode 100644 index 0000000000..7266303555 --- /dev/null +++ b/libyuv/0001-fix-build-error.patch @@ -0,0 +1,113 @@ +From 5f2390c7b4d114d591b7880e6020a79d09957f3e Mon Sep 17 00:00:00 2001 +From: Xiaotian Wu +Date: Mon, 1 Jan 2024 16:46:39 +0800 +Subject: [PATCH] fix build error +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +/build/libyuv/src/libyuv/source/row_lsx.cc: In function ‘void libyuv::ARGB1555ToUVRow_LSX(const uint8_t*, int, uint8_t*, uint8_t*, int)’: +/build/libyuv/src/libyuv/source/row_lsx.cc:410:25: error: narrowing conversion of ‘9259542123273814144’ from ‘long unsigned int’ to ‘long long int’ [-Wnarrowing] + 410 | __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; + | ^~~~~~~~~~~~~~~~~~ +--- + source/row_lsx.cc | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/source/row_lsx.cc b/source/row_lsx.cc +index 9c1e16f2..c7b30051 100644 +--- a/source/row_lsx.cc ++++ b/source/row_lsx.cc +@@ -407,7 +407,7 @@ void ARGB1555ToUVRow_LSX(const uint8_t* src_argb1555, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_argb1555, 0, src_argb1555, 16, next_argb1555, 0, +@@ -516,7 +516,7 @@ void RGB565ToUVRow_LSX(const uint8_t* src_rgb565, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_rgb565, 0, src_rgb565, 16, next_rgb565, 0, +@@ -577,7 +577,7 @@ void RGB24ToUVRow_LSX(const uint8_t* src_rgb24, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + __m128i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18}; + __m128i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908}; + __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19}; +@@ -630,7 +630,7 @@ void RAWToUVRow_LSX(const uint8_t* src_raw, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + __m128i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18}; + __m128i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908}; + __m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19}; +@@ -865,7 +865,7 @@ void BGRAToUVRow_LSX(const uint8_t* src_bgra, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_bgra, 0, src_bgra, 16, src_bgra, 32, src_bgra, 48, +@@ -913,7 +913,7 @@ void ABGRToUVRow_LSX(const uint8_t* src_abgr, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_abgr, 0, src_abgr, 16, src_abgr, 32, src_abgr, 48, +@@ -961,7 +961,7 @@ void RGBAToUVRow_LSX(const uint8_t* src_rgba, + __m128i const_38 = __lsx_vldi(0x413); + __m128i const_94 = __lsx_vldi(0x42F); + __m128i const_18 = __lsx_vldi(0x409); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_rgba, 0, src_rgba, 16, src_rgba, 32, src_rgba, 48, +@@ -1010,7 +1010,7 @@ void ARGBToUVJRow_LSX(const uint8_t* src_argb, + __m128i const_21 = __lsx_vldi(0x415); + __m128i const_53 = __lsx_vldi(0x435); + __m128i const_10 = __lsx_vldi(0x40A); +- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080}; ++ __m128i const_8080 = {static_cast(0x8080808080808080), static_cast(0x8080808080808080)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48, +@@ -1388,7 +1388,7 @@ void ARGBBlendRow_LSX(const uint8_t* src_argb, + __m128i const_256 = __lsx_vldi(0x500); + __m128i zero = __lsx_vldi(0); + __m128i alpha = __lsx_vldi(0xFF); +- __m128i control = {0xFF000000FF000000, 0xFF000000FF000000}; ++ __m128i control = {static_cast(0xFF000000FF000000), static_cast(0xFF000000FF000000)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb1, 0, src_argb1, 16, +@@ -1434,7 +1434,7 @@ void ARGBQuantizeRow_LSX(uint8_t* dst_argb, + __m128i vec_offset = __lsx_vreplgr2vr_b(interval_offset); + __m128i vec_scale = __lsx_vreplgr2vr_w(scale); + __m128i zero = __lsx_vldi(0); +- __m128i control = {0xFF000000FF000000, 0xFF000000FF000000}; ++ __m128i control = {static_cast(0xFF000000FF000000), static_cast(0xFF000000FF000000)}; + + for (x = 0; x < len; x++) { + DUP4_ARG2(__lsx_vld, dst_argb, 0, dst_argb, 16, dst_argb, 32, dst_argb, 48, +-- +2.42.0 + diff --git a/libyuv/PKGBUILD b/libyuv/PKGBUILD index 9d25df1832..826d7d3ec5 100644 --- a/libyuv/PKGBUILD +++ b/libyuv/PKGBUILD @@ -11,8 +11,10 @@ depends=(gcc-libs libjpeg) makedepends=(cmake git) # Version required by libavif _commit=464c51a0353c71f08fe45f683d6a97a638d47833 -source=(git+${url}#commit=${_commit}) -sha512sums=(SKIP) +source=(git+${url}#commit=${_commit} + 0001-fix-build-error.patch) +sha512sums=('SKIP' + '74e4503371ea7fbb054b18990dccc708d97612c03821d6ca3f19bf26ca8bbabfb7978f61b6a738b17c820bb5fd278a746dba5f0e9ae8e0d2a8833f6d5b7feaea') pkgver() { cd ${pkgname} @@ -21,6 +23,7 @@ pkgver() { prepare() { sed -i 's|yuvconvert ${JPEG_LIBRARY}|${ly_lib_shared} ${JPEG_LIBRARY}|' ${pkgname}/CMakeLists.txt + patch -d ${pkgname} -p1 -i $srcdir/0001-fix-build-error.patch } build() { diff --git a/link-grammar/PKGBUILD b/link-grammar/PKGBUILD index 5ae5123146..4731f80765 100644 --- a/link-grammar/PKGBUILD +++ b/link-grammar/PKGBUILD @@ -11,11 +11,10 @@ license=('LGPL') depends=('hunspell' 'sqlite' 'libedit') makedepends=('python' 'swig' 'apache-ant' 'java-environment=11') options=('!makeflags') -source=(https://www.abisource.com/downloads/${pkgname}/${pkgver}/${pkgname}-${pkgver}.tar.gz{,.asc}) +source=(https://github.com/opencog/link-grammar/archive/refs/tags/${pkgname}-${pkgver}.tar.gz) validpgpkeys=('6407453C98BECC19ADB03D82EB6AA534E0C0651C' '8305252160000B5E89843F5464A99A8201045933') # Linas Vepstas -sha256sums=('ef89a322f943607546a667ecb4fdf31d56cab5ed11c856873ac72fa3d352f7a3' - 'SKIP') +sha256sums=('e0cd1b94cc9af20e5bd9a04604a714e11efe21ae5e453b639cdac050b6ac4150') build() { cd ${pkgname}-${pkgver} diff --git a/linux-hardened/PKGBUILD b/linux-hardened/PKGBUILD index 469b3c90a2..1c16ec51c9 100644 --- a/linux-hardened/PKGBUILD +++ b/linux-hardened/PKGBUILD @@ -4,8 +4,10 @@ # Contributor: Thomas Baechler pkgbase=linux-hardened -pkgver=6.7.2.hardened1 -pkgrel=2 +_ver=6.7.0 +_rdate=20231226 +pkgver=${_ver}.hardened1 +pkgrel=1 pkgdesc='Security-Hardened Linux' url='https://github.com/anthraxx/linux-hardened' arch=(loong64 x86_64) @@ -31,9 +33,9 @@ options=('!strip') _srcname=linux-${pkgver%.*} _srctag=${pkgver%.*}-${pkgver##*.} source=( - https://cdn.kernel.org/pub/linux/kernel/v${pkgver%%.*}.x/${_srcname}.tar.{xz,sign} - ${url}/releases/download/${_srctag}/${pkgbase}-${_srctag}.patch{,.sig} - config # the main kernel config file + https://github.com/loongarchlinux/linux/releases/download/v${_rdate}/linux-${_ver}-${_rdate}.tar.xz + config # the main kernel config file + config.la64 ) validpgpkeys=( ABAF11C65A2970B130ABE3C479BE3E4300411886 # Linus Torvalds @@ -41,16 +43,12 @@ validpgpkeys=( E240B57E2C4630BA768E2F26FC1B547C8D8172C8 # Levente Polyak ) # https://www.kernel.org/pub/linux/kernel/v6.x/sha256sums.asc -sha256sums=('c34de41baa29c475c0834e88a3171e255ff86cd32d83c6bffc2b797e60bfa671' - 'SKIP' - 'f1a21eab19a3685f6cabaef93c520fa0061202b978ba64d539af0de690c375d6' - 'SKIP' - '79aa07a1108582118c5e4721b9b5440053791d7a98ceb9538d42a511e39097eb') -b2sums=('4c1f480de0c1458aa67379cd02d35708f63850adb84a85061088de1f82b5d084bc7cf7da459a3f1e415544351d1f36a9a832277240774ae461cdde11687cbadd' - 'SKIP' - 'c97573edb0e765b35c9ebc1a091889aa924be997d57a8acf8a9221edd36ab8b4823c6521dc31838c4c4ec9e23bf39a5c462bc7ab99baf81dddf8c92fd8cfe10f' - 'SKIP' - 'ca79cea706454ee6aab3bfbc01d3067cef4fdfd49413c8bca52480596ec394d932a4e252b4bbcc3a605bd3a56b4b73493c47a7cdd3e984a5b42767fce0f1c025') +sha256sums=('d0e6ce60f0ccd162aabe130c00509590de790e33642a12ed4249aa08ac14f674' + '46a1e0e43247d09c5ae29cfa7a79e272767a49b90c5761c2e4a5656a4ced6cf2' + 'c9b26d463e27257d6ad13e59d489db5bd9b103e506dc80d7917bf48471480c85') +b2sums=('15b6b33c6fdac5329d56424afc09a722053f045ca1a1553d583d80296a20a3e545d6ac1fd7950e575816df3e2d1c1033aeba779aa374ee25406f05d284d56815' + '914edb986d34ddaa20738ec6d4f0d68b2500ee4662be3f58c1f62ecfa87f3ab88205acf91ec7d03d2f925880d538d0b1716183add857d2bff533e5a0d0596ba5' + 'd285dfd1304c9bc42f9fdaa18f8a393fc599be5d6144abe43959c8f63de7b8973821c50135ce81764428659ddd0eb634d7cce6da697cab9752311ad05dee2df6') export KBUILD_BUILD_HOST=archlinux export KBUILD_BUILD_USER=$pkgbase @@ -74,9 +72,9 @@ prepare() { done echo "Setting config..." - cp ../config .config + cp ../config.la64 .config make olddefconfig - diff -u ../config .config || : + diff -u ../config.la64 .config || : make -s kernelrelease > version echo "Prepared $pkgbase version $( +Date: Wed Sep 6 22:53:09 2023 +0800 + + LoongArch: Remove shm_align_mask and use SHMLBA instead + + Both shm_align_mask and SHMLBA want to avoid cache alias. But they are + inconsistent: shm_align_mask is (PAGE_SIZE - 1) while SHMLBA is SZ_64K, + but PAGE_SIZE is not always equal to SZ_64K. + + This may cause problems when shmat() twice. Fix this problem by removing + shm_align_mask and using SHMLBA (strictly SHMLBA - 1) instead. + + Reported-by: Jiantao Shan + Signed-off-by: Huacai Chen + +diff --git a/arch/loongarch/mm/cache.c b/arch/loongarch/mm/cache.c +index 72685a48eaf0..6be04d36ca07 100644 +--- a/arch/loongarch/mm/cache.c ++++ b/arch/loongarch/mm/cache.c +@@ -156,7 +156,6 @@ void cpu_cache_init(void) + + current_cpu_data.cache_leaves_present = leaf; + current_cpu_data.options |= LOONGARCH_CPU_PREFETCH; +- shm_align_mask = PAGE_SIZE - 1; + } + + static const pgprot_t protection_map[16] = { +diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c +index fbe1a4856fc4..a9630a81b38a 100644 +--- a/arch/loongarch/mm/mmap.c ++++ b/arch/loongarch/mm/mmap.c +@@ -8,12 +8,11 @@ + #include + #include + +-unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ +-EXPORT_SYMBOL(shm_align_mask); ++#define SHM_ALIGN_MASK (SHMLBA - 1) + +-#define COLOUR_ALIGN(addr, pgoff) \ +- ((((addr) + shm_align_mask) & ~shm_align_mask) + \ +- (((pgoff) << PAGE_SHIFT) & shm_align_mask)) ++#define COLOUR_ALIGN(addr, pgoff) \ ++ ((((addr) + SHM_ALIGN_MASK) & ~SHM_ALIGN_MASK) \ ++ + (((pgoff) << PAGE_SHIFT) & SHM_ALIGN_MASK)) + + enum mmap_allocation_direction {UP, DOWN}; + +@@ -40,7 +39,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, + * cache aliasing constraints. + */ + if ((flags & MAP_SHARED) && +- ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask)) ++ ((addr - (pgoff << PAGE_SHIFT)) & SHM_ALIGN_MASK)) + return -EINVAL; + return addr; + } +@@ -63,7 +62,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, + } + + info.length = len; +- info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0; ++ info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + + if (dir == DOWN) { diff --git a/linux-tools/PKGBUILD b/linux-tools/PKGBUILD index aeeb369c7d..47421ceb24 100644 --- a/linux-tools/PKGBUILD +++ b/linux-tools/PKGBUILD @@ -10,9 +10,9 @@ pkgname=( 'linux-tools-meta' 'perf' 'tmon' - 'turbostat' +# 'turbostat' 'usbip' - 'x86_energy_perf_policy' +# 'x86_energy_perf_policy' ) pkgver=6.7 pkgrel=1 @@ -64,6 +64,7 @@ sha256sums=('SKIP' '2d5e2f8d40b6f19bf2e1dead57ca105d72098fb0b418c09ff2e0cb91089710af') prepare() { + ln -sf linux-$pkgver linux cd linux # apply patch from the source array (should be a pacman feature) @@ -103,10 +104,10 @@ build() { make VERSION=$pkgver-$pkgrel popd - echo ':: x86_energy_perf_policy' - pushd linux/tools/power/x86/x86_energy_perf_policy - make - popd +# echo ':: x86_energy_perf_policy' +# pushd linux/tools/power/x86/x86_energy_perf_policy +# make +# popd echo ':: usbip' pushd linux/tools/usb/usbip @@ -127,10 +128,10 @@ build() { make popd - echo ':: turbostat' - pushd linux/tools/power/x86/turbostat - make - popd +# echo ':: turbostat' +# pushd linux/tools/power/x86/turbostat +# make +# popd echo ':: hv' pushd linux/tools/hv @@ -163,9 +164,9 @@ package_linux-tools-meta() { 'hyperv' 'perf' 'tmon' - 'turbostat' +# 'turbostat' 'usbip' - 'x86_energy_perf_policy' +# 'x86_energy_perf_policy' ) conflicts=( 'acpidump' diff --git a/liteide/PKGBUILD b/liteide/PKGBUILD index b7319aebe0..e342078f8e 100644 --- a/liteide/PKGBUILD +++ b/liteide/PKGBUILD @@ -15,13 +15,16 @@ depends=(go-tools qt5-base) optdepends=('go: go compiler' 'gcc-go: go compiler') options=(!strip !emptydirs) -source=("$pkgname-x$pkgver::git+$url#commit=35a0dcd957d8bdcc189089a9c027bc54b8aa8cde") # tag: x38.3 -b2sums=('SKIP') +source=("$pkgname-x$pkgver::git+$url#commit=35a0dcd957d8bdcc189089a9c027bc54b8aa8cde" + liteide-fix-build.patch) +b2sums=('SKIP' + '03d9ff6614a800e9d4fb5a6e05a1b8a92e6e4fb27342b237a6c297dadcd90c39b5299da99bee0ee639630947de67c521fe72c10419cc06c3f94eda0a546f84a7') prepare() { cd $pkgname-x$pkgver chmod +x build/*_*.sh + patch -p1 -i $srcdir/liteide-fix-build.patch # Fix for FS#4662 (until fixed by upstream) sed -i 's|^GOROOT|#GOROOT|g' liteidex/os_deploy/linux/liteenv/linux{32,64}.env diff --git a/liteide/liteide-fix-build.patch b/liteide/liteide-fix-build.patch new file mode 100644 index 0000000000..c2ab73ddbe --- /dev/null +++ b/liteide/liteide-fix-build.patch @@ -0,0 +1,11 @@ +--- a/build/update_pkg.sh 2024-01-10 14:06:25.000000000 +0800 ++++ b/build/update_pkg.sh 2024-01-10 14:31:06.474082272 +0800 +@@ -25,7 +25,7 @@ + echo install gocode ... + go install -v github.com/visualfc/gocode@latest + echo install gotools ... +-go install -v github.com/visualfc/gotools@latest ++go install -v github.com/visualfc/gotools@master + echo install gomodifytags ... + go install -v github.com/fatih/gomodifytags@latest + diff --git a/lld/PKGBUILD b/lld/PKGBUILD index da321dab0e..783b88185c 100644 --- a/lld/PKGBUILD +++ b/lld/PKGBUILD @@ -15,7 +15,8 @@ _source_base=https://github.com/llvm/llvm-project/releases/download/llvmorg-$pkg source=($_source_base/lld-$pkgver.src.tar.xz{,.sig} $_source_base/llvm-$pkgver.src.tar.xz{,.sig} $_source_base/libunwind-$pkgver.src.tar.xz{,.sig} - $_source_base/cmake-$pkgver.src.tar.xz{,.sig}) + $_source_base/cmake-$pkgver.src.tar.xz{,.sig} + lld-la64.patch) sha256sums=('a127e334dd267f2e20d5a0c6b15aa9651f3fbbdfe3dc7d2573c617fad1155fcb' 'SKIP' 'e91db44d1b3bb1c33fcea9a7d1f2423b883eaa9163d3d56ca2aa6d2f0711bc29' @@ -23,16 +24,18 @@ sha256sums=('a127e334dd267f2e20d5a0c6b15aa9651f3fbbdfe3dc7d2573c617fad1155fcb' '7e04070aee07e43ecb5f2b321a7cc64671202af3bcf15324bb1e134cdb7b2b72' 'SKIP' '39d342a4161095d2f28fb1253e4585978ac50521117da666e2b1f6f28b62f514' - 'SKIP') + 'SKIP' + '2c99101e69601f493ddf4e52f6e67d98984dae7998ee40dd7cd4adba972065ce') validpgpkeys=('474E22316ABF4785A88C6E8EA2C794A986419D8A' # Tom Stellard 'D574BD5D1D0E98895E3BF90044F2485E45D59042') # Tobias Hieta prepare() { # https://bugs.llvm.org/show_bug.cgi?id=49228 mv libunwind{-$pkgver.src,} - mv cmake{-$pkgver.src,} + cd lld-$pkgver.src + patch -p2 -i $srcdir/lld-la64.patch mkdir build } diff --git a/lld/lld-la64.patch b/lld/lld-la64.patch new file mode 100644 index 0000000000..de69479023 --- /dev/null +++ b/lld/lld-la64.patch @@ -0,0 +1,2569 @@ +diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp +new file mode 100644 +index 000000000000..9dc99e573d41 +--- /dev/null ++++ b/lld/ELF/Arch/LoongArch.cpp +@@ -0,0 +1,687 @@ ++//===- LoongArch.cpp ------------------------------------------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "InputFiles.h" ++#include "OutputSections.h" ++#include "Symbols.h" ++#include "SyntheticSections.h" ++#include "Target.h" ++ ++using namespace llvm; ++using namespace llvm::object; ++using namespace llvm::support::endian; ++using namespace llvm::ELF; ++using namespace lld; ++using namespace lld::elf; ++ ++namespace { ++class LoongArch final : public TargetInfo { ++public: ++ LoongArch(); ++ uint32_t calcEFlags() const override; ++ int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; ++ void writeGotPlt(uint8_t *buf, const Symbol &s) const override; ++ void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; ++ void writePltHeader(uint8_t *buf) const override; ++ void writePlt(uint8_t *buf, const Symbol &sym, ++ uint64_t pltEntryAddr) const override; ++ RelType getDynRel(RelType type) const override; ++ RelExpr getRelExpr(RelType type, const Symbol &s, ++ const uint8_t *loc) const override; ++ bool usesOnlyLowPageBits(RelType type) const override; ++ void relocate(uint8_t *loc, const Relocation &rel, ++ uint64_t val) const override; ++}; ++} // end anonymous namespace ++ ++enum Op { ++ SUB_W = 0x00110000, ++ SUB_D = 0x00118000, ++ BREAK = 0x002a0000, ++ SRLI_W = 0x00448000, ++ SRLI_D = 0x00450000, ++ ADDI_W = 0x02800000, ++ ADDI_D = 0x02c00000, ++ ANDI = 0x03400000, ++ PCADDU12I = 0x1c000000, ++ LD_W = 0x28800000, ++ LD_D = 0x28c00000, ++ JIRL = 0x4c000000, ++}; ++ ++enum Reg { ++ R_ZERO = 0, ++ R_RA = 1, ++ R_TP = 2, ++ R_T0 = 12, ++ R_T1 = 13, ++ R_T2 = 14, ++ R_T3 = 15, ++}; ++ ++// Mask out the input's lowest 12 bits for use with `pcalau12i`, in sequences ++// like `pcalau12i + addi.[wd]` or `pcalau12i + {ld,st}.*` where the `pcalau12i` ++// produces a PC-relative intermediate value with the lowest 12 bits zeroed (the ++// "page") for the next instruction to add in the "page offset". (`pcalau12i` ++// stands for something like "PC ALigned Add Upper that starts from the 12th ++// bit, Immediate".) ++// ++// Here a "page" is in fact just another way to refer to the 12-bit range ++// allowed by the immediate field of the addi/ld/st instructions, and not ++// related to the system or the kernel's actual page size. The sematics happens ++// to match the AArch64 `adrp`, so the concept of "page" is borrowed here. ++static uint64_t getLoongArchPage(uint64_t p) { ++ return p & ~static_cast(0xfff); ++} ++ ++static uint32_t lo12(uint32_t val) { return val & 0xfff; } ++ ++// Calculate the adjusted page delta between dest and PC. ++uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc) { ++ // Consider the large code model access pattern, of which the smaller code ++ // models' access patterns are a subset: ++ // ++ // pcalau12i U, %foo_hi20(sym) ; b in [-0x80000, 0x7ffff] ++ // addi.d T, zero, %foo_lo12(sym) ; a in [-0x800, 0x7ff] ++ // lu32i.d T, %foo64_lo20(sym) ; c in [-0x80000, 0x7ffff] ++ // lu52i.d T, T, %foo64_hi12(sym) ; d in [-0x800, 0x7ff] ++ // {ldx,stx,add}.* dest, U, T ++ // ++ // Let page(pc) = 0xRRR'QQQQQ'PPPPP'000 and dest = 0xZZZ'YYYYY'XXXXX'AAA, ++ // with RQ, P, ZY, X and A representing the respective bitfields as unsigned ++ // integers. We have: ++ // ++ // page(dest) = 0xZZZ'YYYYY'XXXXX'000 ++ // - page(pc) = 0xRRR'QQQQQ'PPPPP'000 ++ // ---------------------------------- ++ // 0xddd'ccccc'bbbbb'000 ++ // ++ // Now consider the above pattern's actual effects: ++ // ++ // page(pc) 0xRRR'QQQQQ'PPPPP'000 ++ // pcalau12i + 0xiii'iiiii'bbbbb'000 ++ // addi + 0xjjj'jjjjj'kkkkk'AAA ++ // lu32i.d & lu52i.d + 0xddd'ccccc'00000'000 ++ // -------------------------------------------------- ++ // dest = U + T ++ // = ((RQ<<32) + (P<<12) + i + (b<<12)) + (j + k + A + (cd<<32)) ++ // = (((RQ+cd)<<32) + i + j) + (((P+b)<<12) + k) + A ++ // = (ZY<<32) + (X<<12) + A ++ // ++ // ZY<<32 = (RQ<<32)+(cd<<32)+i+j, X<<12 = (P<<12)+(b<<12)+k ++ // cd<<32 = (ZY<<32)-(RQ<<32)-i-j, b<<12 = (X<<12)-(P<<12)-k ++ // ++ // where i and k are terms representing the effect of b's and A's sign ++ // extension respectively. ++ // ++ // i = signed b < 0 ? -0x10000'0000 : 0 ++ // k = signed A < 0 ? -0x1000 : 0 ++ // ++ // The j term is a bit complex: it represents the higher half of ++ // sign-extended bits from A that are effectively lost if i == 0 but k != 0, ++ // due to overwriting by lu32i.d & lu52i.d. ++ // ++ // j = signed A < 0 && signed b >= 0 ? 0x10000'0000 : 0 ++ // ++ // The actual effect of the instruction sequence before the final addition, ++ // i.e. our desired result value, is thus: ++ // ++ // result = (cd<<32) + (b<<12) ++ // = (ZY<<32)-(RQ<<32)-i-j + (X<<12)-(P<<12)-k ++ // = ((ZY<<32)+(X<<12)) - ((RQ<<32)+(P<<12)) - i - j - k ++ // = page(dest) - page(pc) - i - j - k ++ // ++ // when signed A >= 0 && signed b >= 0: ++ // ++ // i = j = k = 0 ++ // result = page(dest) - page(pc) ++ // ++ // when signed A >= 0 && signed b < 0: ++ // ++ // i = -0x10000'0000, j = k = 0 ++ // result = page(dest) - page(pc) + 0x10000'0000 ++ // ++ // when signed A < 0 && signed b >= 0: ++ // ++ // i = 0, j = 0x10000'0000, k = -0x1000 ++ // result = page(dest) - page(pc) - 0x10000'0000 + 0x1000 ++ // ++ // when signed A < 0 && signed b < 0: ++ // ++ // i = -0x10000'0000, j = 0, k = -0x1000 ++ // result = page(dest) - page(pc) + 0x1000 ++ uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pc); ++ bool negativeA = lo12(dest) > 0x7ff; ++ bool negativeB = (result & 0x8000'0000) != 0; ++ ++ if (negativeA) ++ result += 0x1000; ++ if (negativeA && !negativeB) ++ result -= 0x10000'0000; ++ else if (!negativeA && negativeB) ++ result += 0x10000'0000; ++ ++ return result; ++} ++ ++static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; } ++ ++static uint32_t insn(uint32_t op, uint32_t d, uint32_t j, uint32_t k) { ++ return op | d | (j << 5) | (k << 10); ++} ++ ++// Extract bits v[begin:end], where range is inclusive. ++static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) { ++ return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1)) - 1)) >> end; ++} ++ ++static uint32_t setD5k16(uint32_t insn, uint32_t imm) { ++ uint32_t immLo = extractBits(imm, 15, 0); ++ uint32_t immHi = extractBits(imm, 20, 16); ++ return (insn & 0xfc0003e0) | (immLo << 10) | immHi; ++} ++ ++static uint32_t setD10k16(uint32_t insn, uint32_t imm) { ++ uint32_t immLo = extractBits(imm, 15, 0); ++ uint32_t immHi = extractBits(imm, 25, 16); ++ return (insn & 0xfc000000) | (immLo << 10) | immHi; ++} ++ ++static uint32_t setJ20(uint32_t insn, uint32_t imm) { ++ return (insn & 0xfe00001f) | (extractBits(imm, 19, 0) << 5); ++} ++ ++static uint32_t setK12(uint32_t insn, uint32_t imm) { ++ return (insn & 0xffc003ff) | (extractBits(imm, 11, 0) << 10); ++} ++ ++static uint32_t setK16(uint32_t insn, uint32_t imm) { ++ return (insn & 0xfc0003ff) | (extractBits(imm, 15, 0) << 10); ++} ++ ++static bool isJirl(uint32_t insn) { ++ return (insn & 0xfc000000) == JIRL; ++} ++ ++LoongArch::LoongArch() { ++ // The LoongArch ISA itself does not have a limit on page sizes. According to ++ // the ISA manual, the PS (page size) field in MTLB entries and CSR.STLBPS is ++ // 6 bits wide, meaning the maximum page size is 2^63 which is equivalent to ++ // "unlimited". ++ // However, practically the maximum usable page size is constrained by the ++ // kernel implementation, and 64KiB is the biggest non-huge page size ++ // supported by Linux as of v6.4. The most widespread page size in use, ++ // though, is 16KiB. ++ defaultCommonPageSize = 16384; ++ defaultMaxPageSize = 65536; ++ write32le(trapInstr.data(), BREAK); // break 0 ++ ++ copyRel = R_LARCH_COPY; ++ pltRel = R_LARCH_JUMP_SLOT; ++ relativeRel = R_LARCH_RELATIVE; ++ iRelativeRel = R_LARCH_IRELATIVE; ++ ++ if (config->is64) { ++ symbolicRel = R_LARCH_64; ++ tlsModuleIndexRel = R_LARCH_TLS_DTPMOD64; ++ tlsOffsetRel = R_LARCH_TLS_DTPREL64; ++ tlsGotRel = R_LARCH_TLS_TPREL64; ++ } else { ++ symbolicRel = R_LARCH_32; ++ tlsModuleIndexRel = R_LARCH_TLS_DTPMOD32; ++ tlsOffsetRel = R_LARCH_TLS_DTPREL32; ++ tlsGotRel = R_LARCH_TLS_TPREL32; ++ } ++ ++ gotRel = symbolicRel; ++ ++ // .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map ++ gotPltHeaderEntriesNum = 2; ++ ++ pltHeaderSize = 32; ++ pltEntrySize = 16; ++ ipltEntrySize = 16; ++} ++ ++static uint32_t getEFlags(const InputFile *f) { ++ if (config->is64) ++ return cast>(f)->getObj().getHeader().e_flags; ++ return cast>(f)->getObj().getHeader().e_flags; ++} ++ ++static bool inputFileHasCode(const InputFile *f) { ++ for (const auto *sec : f->getSections()) ++ if (sec && sec->flags & SHF_EXECINSTR) ++ return true; ++ ++ return false; ++} ++ ++uint32_t LoongArch::calcEFlags() const { ++ // If there are only binary input files (from -b binary), use a ++ // value of 0 for the ELF header flags. ++ if (ctx.objectFiles.empty()) ++ return 0; ++ ++ uint32_t target = 0; ++ const InputFile *targetFile; ++ for (const InputFile *f : ctx.objectFiles) { ++ // Do not enforce ABI compatibility if the input file does not contain code. ++ // This is useful for allowing linkage with data-only object files produced ++ // with tools like objcopy, that have zero e_flags. ++ if (!inputFileHasCode(f)) ++ continue; ++ ++ // Take the first non-zero e_flags as the reference. ++ uint32_t flags = getEFlags(f); ++ if (target == 0 && flags != 0) { ++ target = flags; ++ targetFile = f; ++ } ++ ++ if ((flags & EF_LOONGARCH_ABI_MODIFIER_MASK) != ++ (target & EF_LOONGARCH_ABI_MODIFIER_MASK)) ++ error(toString(f) + ++ ": cannot link object files with different ABI from " + ++ toString(targetFile)); ++ ++ // We cannot process psABI v1.x / object ABI v0 files (containing stack ++ // relocations), unlike ld.bfd. ++ // ++ // Instead of blindly accepting every v0 object and only failing at ++ // relocation processing time, just disallow interlink altogether. We ++ // don't expect significant usage of object ABI v0 in the wild (the old ++ // world may continue using object ABI v0 for a while, but as it's not ++ // binary-compatible with the upstream i.e. new-world ecosystem, it's not ++ // being considered here). ++ // ++ // There are briefly some new-world systems with object ABI v0 binaries too. ++ // It is because these systems were built before the new ABI was finalized. ++ // These are not supported either due to the extremely small number of them, ++ // and the few impacted users are advised to simply rebuild world or ++ // reinstall a recent system. ++ if ((flags & EF_LOONGARCH_OBJABI_MASK) != EF_LOONGARCH_OBJABI_V1) ++ error(toString(f) + ": unsupported object file ABI version"); ++ } ++ ++ return target; ++} ++ ++int64_t LoongArch::getImplicitAddend(const uint8_t *buf, RelType type) const { ++ switch (type) { ++ default: ++ internalLinkerError(getErrorLocation(buf), ++ "cannot read addend for relocation " + toString(type)); ++ return 0; ++ case R_LARCH_32: ++ case R_LARCH_TLS_DTPMOD32: ++ case R_LARCH_TLS_DTPREL32: ++ case R_LARCH_TLS_TPREL32: ++ return SignExtend64<32>(read32le(buf)); ++ case R_LARCH_64: ++ case R_LARCH_TLS_DTPMOD64: ++ case R_LARCH_TLS_DTPREL64: ++ case R_LARCH_TLS_TPREL64: ++ return read64le(buf); ++ case R_LARCH_RELATIVE: ++ case R_LARCH_IRELATIVE: ++ return config->is64 ? read64le(buf) : read32le(buf); ++ case R_LARCH_NONE: ++ case R_LARCH_JUMP_SLOT: ++ // These relocations are defined as not having an implicit addend. ++ return 0; ++ } ++} ++ ++void LoongArch::writeGotPlt(uint8_t *buf, const Symbol &s) const { ++ if (config->is64) ++ write64le(buf, in.plt->getVA()); ++ else ++ write32le(buf, in.plt->getVA()); ++} ++ ++void LoongArch::writeIgotPlt(uint8_t *buf, const Symbol &s) const { ++ if (config->writeAddends) { ++ if (config->is64) ++ write64le(buf, s.getVA()); ++ else ++ write32le(buf, s.getVA()); ++ } ++} ++ ++void LoongArch::writePltHeader(uint8_t *buf) const { ++ // The LoongArch PLT is currently structured just like that of RISCV. ++ // Annoyingly, this means the PLT is still using `pcaddu12i` to perform ++ // PC-relative addressing (because `pcaddu12i` is the same as RISCV `auipc`), ++ // in contrast to the AArch64-like page-offset scheme with `pcalau12i` that ++ // is used everywhere else involving PC-relative operations in the LoongArch ++ // ELF psABI v2.00. ++ // ++ // The `pcrel_{hi20,lo12}` operators are illustrative only and not really ++ // supported by LoongArch assemblers. ++ // ++ // pcaddu12i $t2, %pcrel_hi20(.got.plt) ++ // sub.[wd] $t1, $t1, $t3 ++ // ld.[wd] $t3, $t2, %pcrel_lo12(.got.plt) ; t3 = _dl_runtime_resolve ++ // addi.[wd] $t1, $t1, -pltHeaderSize-12 ; t1 = &.plt[i] - &.plt[0] ++ // addi.[wd] $t0, $t2, %pcrel_lo12(.got.plt) ++ // srli.[wd] $t1, $t1, (is64?1:2) ; t1 = &.got.plt[i] - &.got.plt[0] ++ // ld.[wd] $t0, $t0, Wordsize ; t0 = link_map ++ // jr $t3 ++ uint32_t offset = in.gotPlt->getVA() - in.plt->getVA(); ++ uint32_t sub = config->is64 ? SUB_D : SUB_W; ++ uint32_t ld = config->is64 ? LD_D : LD_W; ++ uint32_t addi = config->is64 ? ADDI_D : ADDI_W; ++ uint32_t srli = config->is64 ? SRLI_D : SRLI_W; ++ write32le(buf + 0, insn(PCADDU12I, R_T2, hi20(offset), 0)); ++ write32le(buf + 4, insn(sub, R_T1, R_T1, R_T3)); ++ write32le(buf + 8, insn(ld, R_T3, R_T2, lo12(offset))); ++ write32le(buf + 12, insn(addi, R_T1, R_T1, lo12(-target->pltHeaderSize - 12))); ++ write32le(buf + 16, insn(addi, R_T0, R_T2, lo12(offset))); ++ write32le(buf + 20, insn(srli, R_T1, R_T1, config->is64 ? 1 : 2)); ++ write32le(buf + 24, insn(ld, R_T0, R_T0, config->wordsize)); ++ write32le(buf + 28, insn(JIRL, R_ZERO, R_T3, 0)); ++} ++ ++void LoongArch::writePlt(uint8_t *buf, const Symbol &sym, ++ uint64_t pltEntryAddr) const { ++ // See the comment in writePltHeader for reason why pcaddu12i is used instead ++ // of the pcalau12i that's more commonly seen in the ELF psABI v2.0 days. ++ // ++ // pcaddu12i $t3, %pcrel_hi20(f@.got.plt) ++ // ld.[wd] $t3, $t3, %pcrel_lo12(f@.got.plt) ++ // jirl $t1, $t3, 0 ++ // nop ++ uint32_t offset = sym.getGotPltVA() - pltEntryAddr; ++ write32le(buf + 0, insn(PCADDU12I, R_T3, hi20(offset), 0)); ++ write32le(buf + 4, ++ insn(config->is64 ? LD_D : LD_W, R_T3, R_T3, lo12(offset))); ++ write32le(buf + 8, insn(JIRL, R_T1, R_T3, 0)); ++ write32le(buf + 12, insn(ANDI, R_ZERO, R_ZERO, 0)); ++} ++ ++RelType LoongArch::getDynRel(RelType type) const { ++ return type == target->symbolicRel ? type ++ : static_cast(R_LARCH_NONE); ++} ++ ++RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, ++ const uint8_t *loc) const { ++ switch (type) { ++ case R_LARCH_NONE: ++ case R_LARCH_MARK_LA: ++ case R_LARCH_MARK_PCREL: ++ return R_NONE; ++ case R_LARCH_32: ++ case R_LARCH_64: ++ case R_LARCH_ABS_HI20: ++ case R_LARCH_ABS_LO12: ++ case R_LARCH_ABS64_LO20: ++ case R_LARCH_ABS64_HI12: ++ return R_ABS; ++ case R_LARCH_PCALA_LO12: ++ // We could just R_ABS, but the JIRL instruction reuses the relocation type ++ // for a different purpose. The questionable usage is part of glibc 2.37 ++ // libc_nonshared.a [1], which is linked into user programs, so we have to ++ // work around it for a while, even if a new relocation type may be ++ // introduced in the future [2]. ++ // ++ // [1]: https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=9f482b73f41a9a1bbfb173aad0733d1c824c788a ++ // [2]: https://github.com/loongson/la-abi-specs/pull/3 ++ return isJirl(read32le(loc)) ? R_PLT : R_ABS; ++ case R_LARCH_TLS_DTPREL32: ++ case R_LARCH_TLS_DTPREL64: ++ return R_DTPREL; ++ case R_LARCH_TLS_TPREL32: ++ case R_LARCH_TLS_TPREL64: ++ case R_LARCH_TLS_LE_HI20: ++ case R_LARCH_TLS_LE_LO12: ++ case R_LARCH_TLS_LE64_LO20: ++ case R_LARCH_TLS_LE64_HI12: ++ return R_TPREL; ++ case R_LARCH_ADD8: ++ case R_LARCH_ADD16: ++ case R_LARCH_ADD32: ++ case R_LARCH_ADD64: ++ case R_LARCH_SUB8: ++ case R_LARCH_SUB16: ++ case R_LARCH_SUB32: ++ case R_LARCH_SUB64: ++ // The LoongArch add/sub relocs behave like the RISCV counterparts; reuse ++ // the RelExpr to avoid code duplication. ++ return R_RISCV_ADD; ++ case R_LARCH_32_PCREL: ++ case R_LARCH_64_PCREL: ++ return R_PC; ++ case R_LARCH_B16: ++ case R_LARCH_B21: ++ case R_LARCH_B26: ++ return R_PLT_PC; ++ case R_LARCH_GOT_PC_HI20: ++ case R_LARCH_GOT64_PC_LO20: ++ case R_LARCH_GOT64_PC_HI12: ++ case R_LARCH_TLS_IE_PC_HI20: ++ case R_LARCH_TLS_IE64_PC_LO20: ++ case R_LARCH_TLS_IE64_PC_HI12: ++ return R_LOONGARCH_GOT_PAGE_PC; ++ case R_LARCH_GOT_PC_LO12: ++ case R_LARCH_TLS_IE_PC_LO12: ++ return R_LOONGARCH_GOT; ++ case R_LARCH_TLS_LD_PC_HI20: ++ case R_LARCH_TLS_GD_PC_HI20: ++ return R_LOONGARCH_TLSGD_PAGE_PC; ++ case R_LARCH_PCALA_HI20: ++ // Why not R_LOONGARCH_PAGE_PC, majority of references don't go through PLT ++ // anyway so why waste time checking only to get everything relaxed back to ++ // it? ++ // ++ // This is again due to the R_LARCH_PCALA_LO12 on JIRL case, where we want ++ // both the HI20 and LO12 to potentially refer to the PLT. But in reality ++ // the HI20 reloc appears earlier, and the relocs don't contain enough ++ // information to let us properly resolve semantics per symbol. ++ // Unlike RISCV, our LO12 relocs *do not* point to their corresponding HI20 ++ // relocs, hence it is nearly impossible to 100% accurately determine each ++ // HI20's "flavor" without taking big performance hits, in the presence of ++ // edge cases (e.g. HI20 without pairing LO12; paired LO12 placed so far ++ // apart that relationship is not certain anymore), and programmer mistakes ++ // (e.g. as outlined in https://github.com/loongson/la-abi-specs/pull/3). ++ // ++ // Ideally we would scan in an extra pass for all LO12s on JIRL, then mark ++ // every HI20 reloc referring to the same symbol differently; this is not ++ // feasible with the current function signature of getRelExpr that doesn't ++ // allow for such inter-pass state. ++ // ++ // So, unfortunately we have to again workaround this quirk the same way as ++ // BFD: assuming every R_LARCH_PCALA_HI20 is potentially PLT-needing, only ++ // relaxing back to R_LOONGARCH_PAGE_PC if it's known not so at a later ++ // stage. ++ return R_LOONGARCH_PLT_PAGE_PC; ++ case R_LARCH_PCALA64_LO20: ++ case R_LARCH_PCALA64_HI12: ++ return R_LOONGARCH_PAGE_PC; ++ case R_LARCH_GOT_HI20: ++ case R_LARCH_GOT_LO12: ++ case R_LARCH_GOT64_LO20: ++ case R_LARCH_GOT64_HI12: ++ case R_LARCH_TLS_IE_HI20: ++ case R_LARCH_TLS_IE_LO12: ++ case R_LARCH_TLS_IE64_LO20: ++ case R_LARCH_TLS_IE64_HI12: ++ return R_GOT; ++ case R_LARCH_TLS_LD_HI20: ++ return R_TLSLD_GOT; ++ case R_LARCH_TLS_GD_HI20: ++ return R_TLSGD_GOT; ++ case R_LARCH_RELAX: ++ // LoongArch linker relaxation is not implemented yet. ++ return R_NONE; ++ ++ // Other known relocs that are explicitly unimplemented: ++ // ++ // - psABI v1 relocs that need a stateful stack machine to work, and not ++ // required when implementing psABI v2; ++ // - relocs that are not used anywhere (R_LARCH_{ADD,SUB}_24 [1], and the ++ // two GNU vtable-related relocs). ++ // ++ // [1]: https://web.archive.org/web/20230709064026/https://github.com/loongson/LoongArch-Documentation/issues/51 ++ default: ++ error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + ++ ") against symbol " + toString(s)); ++ return R_NONE; ++ } ++} ++ ++bool LoongArch::usesOnlyLowPageBits(RelType type) const { ++ switch (type) { ++ default: ++ return false; ++ case R_LARCH_PCALA_LO12: ++ case R_LARCH_GOT_LO12: ++ case R_LARCH_GOT_PC_LO12: ++ case R_LARCH_TLS_IE_PC_LO12: ++ return true; ++ } ++} ++ ++void LoongArch::relocate(uint8_t *loc, const Relocation &rel, ++ uint64_t val) const { ++ switch (rel.type) { ++ case R_LARCH_32_PCREL: ++ checkInt(loc, val, 32, rel); ++ [[fallthrough]]; ++ case R_LARCH_32: ++ case R_LARCH_TLS_DTPREL32: ++ write32le(loc, val); ++ return; ++ case R_LARCH_64: ++ case R_LARCH_TLS_DTPREL64: ++ case R_LARCH_64_PCREL: ++ write64le(loc, val); ++ return; ++ ++ case R_LARCH_B16: ++ checkInt(loc, val, 18, rel); ++ checkAlignment(loc, val, 4, rel); ++ write32le(loc, setK16(read32le(loc), val >> 2)); ++ return; ++ ++ case R_LARCH_B21: ++ checkInt(loc, val, 23, rel); ++ checkAlignment(loc, val, 4, rel); ++ write32le(loc, setD5k16(read32le(loc), val >> 2)); ++ return; ++ ++ case R_LARCH_B26: ++ checkInt(loc, val, 28, rel); ++ checkAlignment(loc, val, 4, rel); ++ write32le(loc, setD10k16(read32le(loc), val >> 2)); ++ return; ++ ++ // Relocs intended for `addi`, `ld` or `st`. ++ case R_LARCH_PCALA_LO12: ++ // We have to again inspect the insn word to handle the R_LARCH_PCALA_LO12 ++ // on JIRL case: firstly JIRL wants its immediate's 2 lowest zeroes ++ // removed by us (in contrast to regular R_LARCH_PCALA_LO12), secondly ++ // its immediate slot width is different too (16, not 12). ++ // In this case, process like an R_LARCH_B16, but without overflow checking ++ // and only taking the value's lowest 12 bits. ++ if (isJirl(read32le(loc))) { ++ checkAlignment(loc, val, 4, rel); ++ val = SignExtend64<12>(val); ++ write32le(loc, setK16(read32le(loc), val >> 2)); ++ return; ++ } ++ [[fallthrough]]; ++ case R_LARCH_ABS_LO12: ++ case R_LARCH_GOT_PC_LO12: ++ case R_LARCH_GOT_LO12: ++ case R_LARCH_TLS_LE_LO12: ++ case R_LARCH_TLS_IE_PC_LO12: ++ case R_LARCH_TLS_IE_LO12: ++ write32le(loc, setK12(read32le(loc), extractBits(val, 11, 0))); ++ return; ++ ++ // Relocs intended for `lu12i.w` or `pcalau12i`. ++ case R_LARCH_ABS_HI20: ++ case R_LARCH_PCALA_HI20: ++ case R_LARCH_GOT_PC_HI20: ++ case R_LARCH_GOT_HI20: ++ case R_LARCH_TLS_LE_HI20: ++ case R_LARCH_TLS_IE_PC_HI20: ++ case R_LARCH_TLS_IE_HI20: ++ case R_LARCH_TLS_LD_PC_HI20: ++ case R_LARCH_TLS_LD_HI20: ++ case R_LARCH_TLS_GD_PC_HI20: ++ case R_LARCH_TLS_GD_HI20: ++ write32le(loc, setJ20(read32le(loc), extractBits(val, 31, 12))); ++ return; ++ ++ // Relocs intended for `lu32i.d`. ++ case R_LARCH_ABS64_LO20: ++ case R_LARCH_PCALA64_LO20: ++ case R_LARCH_GOT64_PC_LO20: ++ case R_LARCH_GOT64_LO20: ++ case R_LARCH_TLS_LE64_LO20: ++ case R_LARCH_TLS_IE64_PC_LO20: ++ case R_LARCH_TLS_IE64_LO20: ++ write32le(loc, setJ20(read32le(loc), extractBits(val, 51, 32))); ++ return; ++ ++ // Relocs intended for `lu52i.d`. ++ case R_LARCH_ABS64_HI12: ++ case R_LARCH_PCALA64_HI12: ++ case R_LARCH_GOT64_PC_HI12: ++ case R_LARCH_GOT64_HI12: ++ case R_LARCH_TLS_LE64_HI12: ++ case R_LARCH_TLS_IE64_PC_HI12: ++ case R_LARCH_TLS_IE64_HI12: ++ write32le(loc, setK12(read32le(loc), extractBits(val, 63, 52))); ++ return; ++ ++ case R_LARCH_ADD8: ++ *loc += val; ++ return; ++ case R_LARCH_ADD16: ++ write16le(loc, read16le(loc) + val); ++ return; ++ case R_LARCH_ADD32: ++ write32le(loc, read32le(loc) + val); ++ return; ++ case R_LARCH_ADD64: ++ write64le(loc, read64le(loc) + val); ++ return; ++ case R_LARCH_SUB8: ++ *loc -= val; ++ return; ++ case R_LARCH_SUB16: ++ write16le(loc, read16le(loc) - val); ++ return; ++ case R_LARCH_SUB32: ++ write32le(loc, read32le(loc) - val); ++ return; ++ case R_LARCH_SUB64: ++ write64le(loc, read64le(loc) - val); ++ return; ++ ++ case R_LARCH_MARK_LA: ++ case R_LARCH_MARK_PCREL: ++ // no-op ++ return; ++ ++ case R_LARCH_RELAX: ++ return; // Ignored (for now) ++ ++ default: ++ llvm_unreachable("unknown relocation"); ++ } ++} ++ ++TargetInfo *elf::getLoongArchTargetInfo() { ++ static LoongArch target; ++ return ⌖ ++} +diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt +index 8e6a746d219e..89955db67733 100644 +--- a/lld/ELF/CMakeLists.txt ++++ b/lld/ELF/CMakeLists.txt +@@ -25,6 +25,7 @@ add_lld_library(lldELF + Arch/ARM.cpp + Arch/AVR.cpp + Arch/Hexagon.cpp ++ Arch/LoongArch.cpp + Arch/Mips.cpp + Arch/MipsArchTree.cpp + Arch/MSP430.cpp +diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp +index 7e2a72acf8f6..3c1803be6fb9 100644 +--- a/lld/ELF/Driver.cpp ++++ b/lld/ELF/Driver.cpp +@@ -167,6 +167,7 @@ static std::tuple parseEmulation(StringRef emul) { + .Case("elf32lriscv", {ELF32LEKind, EM_RISCV}) + .Cases("elf32ppc", "elf32ppclinux", {ELF32BEKind, EM_PPC}) + .Cases("elf32lppc", "elf32lppclinux", {ELF32LEKind, EM_PPC}) ++ .Case("elf32loongarch", {ELF32LEKind, EM_LOONGARCH}) + .Case("elf64btsmip", {ELF64BEKind, EM_MIPS}) + .Case("elf64ltsmip", {ELF64LEKind, EM_MIPS}) + .Case("elf64lriscv", {ELF64LEKind, EM_RISCV}) +@@ -178,6 +179,7 @@ static std::tuple parseEmulation(StringRef emul) { + .Case("elf64_sparc", {ELF64BEKind, EM_SPARCV9}) + .Case("msp430elf", {ELF32LEKind, EM_MSP430}) + .Case("elf64_amdgpu", {ELF64LEKind, EM_AMDGPU}) ++ .Case("elf64loongarch", {ELF64LEKind, EM_LOONGARCH}) + .Default({ELFNoneKind, EM_NONE}); + + if (ret.first == ELFNoneKind) +@@ -1032,8 +1034,9 @@ static bool getIsRela(opt::InputArgList &args) { + + // Otherwise use the psABI defined relocation entry format. + uint16_t m = config->emachine; +- return m == EM_AARCH64 || m == EM_AMDGPU || m == EM_HEXAGON || m == EM_PPC || +- m == EM_PPC64 || m == EM_RISCV || m == EM_X86_64; ++ return m == EM_AARCH64 || m == EM_AMDGPU || m == EM_HEXAGON || ++ m == EM_LOONGARCH || m == EM_PPC || m == EM_PPC64 || m == EM_RISCV || ++ m == EM_X86_64; + } + + static void parseClangOption(StringRef opt, const Twine &msg) { +@@ -1570,8 +1573,9 @@ static void setConfigs(opt::InputArgList &args) { + // have support for reading Elf_Rel addends, so we only enable for a subset. + #ifndef NDEBUG + bool checkDynamicRelocsDefault = m == EM_AARCH64 || m == EM_ARM || +- m == EM_386 || m == EM_MIPS || +- m == EM_X86_64 || m == EM_RISCV; ++ m == EM_386 || m == EM_LOONGARCH || ++ m == EM_MIPS || m == EM_RISCV || ++ m == EM_X86_64; + #else + bool checkDynamicRelocsDefault = false; + #endif +diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp +index 7dacdeb9f042..c43b4afd6cb6 100644 +--- a/lld/ELF/InputFiles.cpp ++++ b/lld/ELF/InputFiles.cpp +@@ -1534,6 +1534,9 @@ static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) { + return EM_AVR; + case Triple::hexagon: + return EM_HEXAGON; ++ case Triple::loongarch32: ++ case Triple::loongarch64: ++ return EM_LOONGARCH; + case Triple::mips: + case Triple::mipsel: + case Triple::mips64: +diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp +index df24f998bff6..d56291cbd84a 100644 +--- a/lld/ELF/InputSection.cpp ++++ b/lld/ELF/InputSection.cpp +@@ -609,6 +609,7 @@ static int64_t getTlsTpOffset(const Symbol &s) { + // to allow a signed 16-bit offset to reach 0x1000 of TCB/thread-library + // data and 0xf000 of the program's TLS segment. + return s.getVA(0) + (tls->p_vaddr & (tls->p_align - 1)) - 0x7000; ++ case EM_LOONGARCH: + case EM_RISCV: + return s.getVA(0) + (tls->p_vaddr & (tls->p_align - 1)); + +@@ -643,6 +644,14 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, + case R_GOT: + case R_RELAX_TLS_GD_TO_IE_ABS: + return sym.getGotVA() + a; ++ case R_LOONGARCH_GOT: ++ // The LoongArch TLS GD relocs reuse the R_LARCH_GOT_PC_LO12 reloc type ++ // for their page offsets. The arithmetics are different in the TLS case ++ // so we have to duplicate some logic here. ++ if (sym.hasFlag(NEEDS_TLSGD) && type != R_LARCH_TLS_IE_PC_LO12) ++ // Like R_LOONGARCH_TLSGD_PAGE_PC but taking the absolute value. ++ return in.got->getGlobalDynAddr(sym) + a; ++ return getRelocTargetVA(file, type, a, p, sym, R_GOT); + case R_GOTONLY_PC: + return in.got->getVA() + a - p; + case R_GOTPLTONLY_PC: +@@ -667,6 +676,10 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, + case R_GOT_PC: + case R_RELAX_TLS_GD_TO_IE: + return sym.getGotVA() + a - p; ++ case R_LOONGARCH_GOT_PAGE_PC: ++ if (sym.hasFlag(NEEDS_TLSGD)) ++ return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p); ++ return getLoongArchPageDelta(sym.getGotVA() + a, p); + case R_MIPS_GOTREL: + return sym.getVA(a) - in.mipsGot->getGp(file); + case R_MIPS_GOT_GP: +@@ -715,6 +728,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, + *hiRel->sym, hiRel->expr); + return 0; + } ++ case R_LOONGARCH_PAGE_PC: ++ return getLoongArchPageDelta(sym.getVA(a), p); + case R_PC: + case R_ARM_PCA: { + uint64_t dest; +@@ -748,6 +763,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, + case R_PLT_PC: + case R_PPC64_CALL_PLT: + return sym.getPltVA() + a - p; ++ case R_LOONGARCH_PLT_PAGE_PC: ++ return getLoongArchPageDelta(sym.getPltVA() + a, p); + case R_PLT_GOTPLT: + return sym.getPltVA() + a - in.gotPlt->getVA(); + case R_PPC32_PLTREL: +@@ -808,6 +825,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, + return in.got->getGlobalDynAddr(sym) + a - in.gotPlt->getVA(); + case R_TLSGD_PC: + return in.got->getGlobalDynAddr(sym) + a - p; ++ case R_LOONGARCH_TLSGD_PAGE_PC: ++ return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p); + case R_TLSLD_GOTPLT: + return in.got->getVA() + in.got->getTlsIndexOff() + a - in.gotPlt->getVA(); + case R_TLSLD_GOT: +diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp +index aeba918292a7..b43c052552ab 100644 +--- a/lld/ELF/Relocations.cpp ++++ b/lld/ELF/Relocations.cpp +@@ -190,8 +190,8 @@ static bool isAbsoluteValue(const Symbol &sym) { + + // Returns true if Expr refers a PLT entry. + static bool needsPlt(RelExpr expr) { +- return oneof( +- expr); ++ return oneof(expr); + } + + // Returns true if Expr refers a GOT entry. Note that this function +@@ -200,7 +200,8 @@ static bool needsPlt(RelExpr expr) { + static bool needsGot(RelExpr expr) { + return oneof(expr); ++ R_AARCH64_GOT_PAGE, R_LOONGARCH_GOT, R_LOONGARCH_GOT_PAGE_PC>( ++ expr); + } + + // True if this expression is of the form Sym - X, where X is a position in the +@@ -208,12 +209,14 @@ static bool needsGot(RelExpr expr) { + static bool isRelExpr(RelExpr expr) { + return oneof(expr); ++ R_RISCV_PC_INDIRECT, R_PPC64_RELAX_GOT_PC, R_LOONGARCH_PAGE_PC>( ++ expr); + } + +- + static RelExpr toPlt(RelExpr expr) { + switch (expr) { ++ case R_LOONGARCH_PAGE_PC: ++ return R_LOONGARCH_PLT_PAGE_PC; + case R_PPC64_CALL: + return R_PPC64_CALL_PLT; + case R_PC: +@@ -232,6 +235,8 @@ static RelExpr fromPlt(RelExpr expr) { + case R_PLT_PC: + case R_PPC32_PLTREL: + return R_PC; ++ case R_LOONGARCH_PLT_PAGE_PC: ++ return R_LOONGARCH_PAGE_PC; + case R_PPC64_CALL_PLT: + return R_PPC64_CALL; + case R_PLT: +@@ -946,7 +951,9 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, + R_MIPS_GOTREL, R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, + R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC, + R_PLT_PC, R_PLT_GOTPLT, R_PPC32_PLTREL, R_PPC64_CALL_PLT, +- R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE>(e)) ++ R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE, ++ R_LOONGARCH_PLT_PAGE_PC, R_LOONGARCH_GOT, R_LOONGARCH_GOT_PAGE_PC>( ++ e)) + return true; + + // These never do, except if the entire file is position dependent or if +@@ -1050,7 +1057,9 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, + // for detailed description: + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + in.mipsGot->addEntry(*sec->file, sym, addend, expr); +- } else { ++ } else if (!sym.isTls() || config->emachine != EM_LOONGARCH) { ++ // Many LoongArch TLS relocs reuse the R_LOONGARCH_GOT type, in which ++ // case the NEEDS_GOT flag shouldn't get set. + sym.setFlags(NEEDS_GOT); + } + } else if (needsPlt(expr)) { +@@ -1090,7 +1099,8 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, + (isa(sec) && config->emachine != EM_MIPS)); + if (canWrite) { + RelType rel = target->getDynRel(type); +- if (expr == R_GOT || (rel == target->symbolicRel && !sym.isPreemptible)) { ++ if (oneof(expr) || ++ (rel == target->symbolicRel && !sym.isPreemptible)) { + addRelativeReloc(*sec, offset, sym, addend, expr, type); + return; + } else if (rel != 0) { +@@ -1242,11 +1252,13 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, + return 1; + } + +- // ARM, Hexagon and RISC-V do not support GD/LD to IE/LE relaxation. For +- // PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable ++ // ARM, Hexagon, LoongArch and RISC-V do not support GD/LD to IE/LE ++ // relaxation. ++ // For PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable + // relaxation as well. + bool toExecRelax = !config->shared && config->emachine != EM_ARM && + config->emachine != EM_HEXAGON && ++ config->emachine != EM_LOONGARCH && + config->emachine != EM_RISCV && + !c.file->ppc64DisableTLSRelax; + +@@ -1263,8 +1275,7 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, + // being suitable for being dynamically loaded via dlopen. GOT[e0] is the + // module index, with a special value of 0 for the current module. GOT[e1] is + // unused. There only needs to be one module index entry. +- if (oneof( +- expr)) { ++ if (oneof(expr)) { + // Local-Dynamic relocs can be relaxed to Local-Exec. + if (toExecRelax) { + c.addReloc({target->adjustTlsExpr(type, R_RELAX_TLS_LD_TO_LE), type, +@@ -1295,7 +1306,8 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, + } + + if (oneof(expr)) { ++ R_TLSDESC_GOTPLT, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC, ++ R_LOONGARCH_TLSGD_PAGE_PC>(expr)) { + if (!toExecRelax) { + sym.setFlags(NEEDS_TLSGD); + c.addReloc({expr, type, offset, addend, &sym}); +@@ -1315,8 +1327,8 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, + return target->getTlsGdRelaxSkip(type); + } + +- if (oneof(expr)) { ++ if (oneof(expr)) { + ctx.hasTlsIe.store(true, std::memory_order_relaxed); + // Initial-Exec relocs can be relaxed to Local-Exec if the symbol is locally + // defined. +diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h +index 29e3edeca6be..e36215bd0d93 100644 +--- a/lld/ELF/Relocations.h ++++ b/lld/ELF/Relocations.h +@@ -102,6 +102,15 @@ enum RelExpr { + R_PPC64_RELAX_GOT_PC, + R_RISCV_ADD, + R_RISCV_PC_INDIRECT, ++ // Same as R_PC but with page-aligned semantics. ++ R_LOONGARCH_PAGE_PC, ++ // Same as R_PLT_PC but with page-aligned semantics. ++ R_LOONGARCH_PLT_PAGE_PC, ++ // In addition to having page-aligned semantics, LoongArch GOT relocs are ++ // also reused for TLS, making the semantics differ from other architectures. ++ R_LOONGARCH_GOT, ++ R_LOONGARCH_GOT_PAGE_PC, ++ R_LOONGARCH_TLSGD_PAGE_PC, + }; + + // Architecture-neutral representation of relocation. +diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp +index bb09bde5d22e..c7b107515d31 100644 +--- a/lld/ELF/ScriptParser.cpp ++++ b/lld/ELF/ScriptParser.cpp +@@ -438,6 +438,8 @@ static std::pair parseBfdName(StringRef s) { + .Case("elf64-littleriscv", {ELF64LEKind, EM_RISCV}) + .Case("elf64-sparc", {ELF64BEKind, EM_SPARCV9}) + .Case("elf32-msp430", {ELF32LEKind, EM_MSP430}) ++ .Case("elf32-loongarch", {ELF32LEKind, EM_LOONGARCH}) ++ .Case("elf64-loongarch", {ELF64LEKind, EM_LOONGARCH}) + .Default({ELFNoneKind, EM_NONE}); + } + +diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp +index 3873c7a25e44..32bb2164a208 100644 +--- a/lld/ELF/Target.cpp ++++ b/lld/ELF/Target.cpp +@@ -62,6 +62,8 @@ TargetInfo *elf::getTarget() { + return getAVRTargetInfo(); + case EM_HEXAGON: + return getHexagonTargetInfo(); ++ case EM_LOONGARCH: ++ return getLoongArchTargetInfo(); + case EM_MIPS: + switch (config->ekind) { + case ELF32LEKind: +diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h +index e6a78169058a..d1a4fb195b89 100644 +--- a/lld/ELF/Target.h ++++ b/lld/ELF/Target.h +@@ -172,6 +172,7 @@ TargetInfo *getAMDGPUTargetInfo(); + TargetInfo *getARMTargetInfo(); + TargetInfo *getAVRTargetInfo(); + TargetInfo *getHexagonTargetInfo(); ++TargetInfo *getLoongArchTargetInfo(); + TargetInfo *getMSP430TargetInfo(); + TargetInfo *getPPC64TargetInfo(); + TargetInfo *getPPCTargetInfo(); +@@ -215,6 +216,7 @@ void writePrefixedInstruction(uint8_t *loc, uint64_t insn); + void addPPC64SaveRestore(); + uint64_t getPPC64TocBase(); + uint64_t getAArch64Page(uint64_t expr); ++uint64_t getLoongArchPageDelta(uint64_t dest, uint64_t pc); + void riscvFinalizeRelax(int passes); + void mergeRISCVAttributesSections(); + +diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst +index a450923cded9..290244af1eb9 100644 +--- a/lld/docs/ReleaseNotes.rst ++++ b/lld/docs/ReleaseNotes.rst +@@ -59,6 +59,65 @@ ELF Improvements + * Armv4(T) thunks are now supported. + (`D139888 `_) + (`D141272 `_) ++* When ``--threads=`` is not specified, the number of concurrency is now capped to 16. ++ A large ``--thread=`` can harm performance, especially with some system ++ malloc implementations like glibc's. ++ (`D147493 `_) ++* ``--remap-inputs=`` and ``--remap-inputs-file=`` are added to remap input files. ++ (`D148859 `_) ++* ``--lto=`` is now available to support ``clang -funified-lto`` ++ (`D123805 `_) ++* ``--lto-CGO[0-3]`` is now available to control ``CodeGenOpt::Level`` independent of the LTO optimization level. ++ (`D141970 `_) ++* ``--check-dynamic-relocations=`` is now correct 32-bit targets when the addend is larger than 0x80000000. ++ (`D149347 `_) ++* ``--print-memory-usage`` has been implemented for memory regions. ++ (`D150644 `_) ++* ``SHF_MERGE``, ``--icf=``, and ``--build-id=fast`` have switched to 64-bit xxh3. ++ (`D154813 `_) ++* Quoted output section names can now be used in linker scripts. ++ (`#60496 `_) ++* ``MEMORY`` can now be used without a ``SECTIONS`` command. ++ (`D145132 `_) ++* ``REVERSE`` can now be used in input section descriptions to reverse the order of input sections. ++ (`D145381 `_) ++* Program header assignment can now be used within ``OVERLAY``. This functionality was accidentally lost in 2020. ++ (`D150445 `_) ++* Operators ``^`` and ``^=`` can now be used in linker scripts. ++* LoongArch is now supported. ++* ``DT_AARCH64_MEMTAG_*`` dynamic tags are now supported. ++ (`D143769 `_) ++* AArch32 port now supports BE-8 and BE-32 modes for big-endian. ++ (`D140201 `_) ++ (`D140202 `_) ++ (`D150870 `_) ++* ``R_ARM_THM_ALU_ABS_G*`` relocations are now supported. ++ (`D153407 `_) ++* ``.ARM.exidx`` sections may start at non-zero output section offset. ++ (`D148033 `_) ++* Arm Cortex-M Security Extensions is now implemented. ++ (`D139092 `_) ++* BTI landing pads are now added to PLT entries accessed by range extension thunks or relative vtables. ++ (`D148704 `_) ++ (`D153264 `_) ++* AArch64 short range thunk has been implemented to mitigate the performance loss of a long range thunk. ++ (`D148701 `_) ++* ``R_AVR_8_LO8/R_AVR_8_HI8/R_AVR_8_HLO8/R_AVR_LO8_LDI_GS/R_AVR_HI8_LDI_GS`` have been implemented. ++ (`D147100 `_) ++ (`D147364 `_) ++* ``--no-power10-stubs`` now works for PowerPC64. ++* ``DT_PPC64_OPT`` is now supported; ++ (`D150631 `_) ++* ``PT_RISCV_ATTRIBUTES`` is added to include the SHT_RISCV_ATTRIBUTES section. ++ (`D152065 `_) ++* ``R_RISCV_PLT32`` is added to support C++ relative vtables. ++ (`D143115 `_) ++* RISC-V global pointer relaxation has been implemented. Specify ``--relax-gp`` to enable the linker relaxation. ++ (`D143673 `_) ++* The symbol value of ``foo`` is correctly handled when ``--wrap=foo`` and RISC-V linker relaxation are used. ++ (`D151768 `_) ++* x86-64 large data sections are now placed away from code sections to alleviate relocation overflow pressure. ++ (`D150510 `_) + + Breaking changes + ---------------- +diff --git a/lld/docs/index.rst b/lld/docs/index.rst +index ce6320333243..a3407d3b9db0 100644 +--- a/lld/docs/index.rst ++++ b/lld/docs/index.rst +@@ -22,10 +22,11 @@ Features + machine, you can expect that LLD runs more than twice as fast as the GNU + gold linker. Your mileage may vary, though. + +-- It supports various CPUs/ABIs including AArch64, AMDGPU, ARM, Hexagon, MIPS +- 32/64 big/little-endian, PowerPC, PowerPC64, RISC-V, SPARC V9, x86-32 and +- x86-64. Among these, AArch64, ARM (>= v6), PowerPC, PowerPC64, x86-32 and +- x86-64 have production quality. MIPS seems decent too. ++- It supports various CPUs/ABIs including AArch64, AMDGPU, ARM, Hexagon, ++ LoongArch, MIPS 32/64 big/little-endian, PowerPC, PowerPC64, RISC-V, ++ SPARC V9, x86-32 and x86-64. Among these, AArch64, ARM (>= v4), LoongArch, ++ PowerPC, PowerPC64, RISC-V, x86-32 and x86-64 have production quality. ++ MIPS seems decent too. + + - It is always a cross-linker, meaning that it always supports all the + above targets however it was built. In fact, we don't provide a +diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 +index edeb7c4bfe37..4889d04b924f 100644 +--- a/lld/docs/ld.lld.1 ++++ b/lld/docs/ld.lld.1 +@@ -4,7 +4,7 @@ + .\" + .\" This man page documents only lld's ELF linking support, obtained originally + .\" from FreeBSD. +-.Dd May 12, 2019 ++.Dd Jul 25, 2023 + .Dt LD.LLD 1 + .Os + .Sh NAME +@@ -27,8 +27,8 @@ It accepts most of the same command line arguments and linker scripts + as GNU linkers. + .Pp + .Nm +-currently supports i386, x86-64, ARM, AArch64, PowerPC32, PowerPC64, +-MIPS32, MIPS64, RISC-V, AMDGPU, Hexagon and SPARC V9 targets. ++currently supports i386, x86-64, ARM, AArch64, LoongArch, PowerPC32, ++PowerPC64, MIPS32, MIPS64, RISC-V, AMDGPU, Hexagon and SPARC V9 targets. + .Nm + acts as a Microsoft link.exe-compatible linker if invoked as + .Nm lld-link +diff --git a/lld/test/ELF/emulation-loongarch.s b/lld/test/ELF/emulation-loongarch.s +new file mode 100644 +index 000000000000..343e836274a3 +--- /dev/null ++++ b/lld/test/ELF/emulation-loongarch.s +@@ -0,0 +1,78 @@ ++# REQUIRES: loongarch ++ ++# RUN: llvm-mc -filetype=obj -triple=loongarch32 %s -o %t.o ++# RUN: ld.lld %t.o -o %t ++# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA32 %s ++# RUN: ld.lld -m elf32loongarch %t.o -o %t ++# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA32 %s ++# RUN: echo 'OUTPUT_FORMAT(elf32-loongarch)' > %t.script ++# RUN: ld.lld %t.script %t.o -o %t ++# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA32 %s ++ ++# LA32: ElfHeader { ++# LA32-NEXT: Ident { ++# LA32-NEXT: Magic: (7F 45 4C 46) ++# LA32-NEXT: Class: 32-bit (0x1) ++# LA32-NEXT: DataEncoding: LittleEndian (0x1) ++# LA32-NEXT: FileVersion: 1 ++# LA32-NEXT: OS/ABI: SystemV (0x0) ++# LA32-NEXT: ABIVersion: 0 ++# LA32-NEXT: Unused: (00 00 00 00 00 00 00) ++# LA32-NEXT: } ++# LA32-NEXT: Type: Executable (0x2) ++# LA32-NEXT: Machine: EM_LOONGARCH (0x102) ++# LA32-NEXT: Version: 1 ++# LA32-NEXT: Entry: ++# LA32-NEXT: ProgramHeaderOffset: 0x34 ++# LA32-NEXT: SectionHeaderOffset: ++# LA32-NEXT: Flags [ (0x43) ++# LA32-NEXT: EF_LOONGARCH_ABI_DOUBLE_FLOAT (0x3) ++# LA32-NEXT: EF_LOONGARCH_OBJABI_V1 (0x40) ++# LA32-NEXT: ] ++# LA32-NEXT: HeaderSize: 52 ++# LA32-NEXT: ProgramHeaderEntrySize: 32 ++# LA32-NEXT: ProgramHeaderCount: ++# LA32-NEXT: SectionHeaderEntrySize: 40 ++# LA32-NEXT: SectionHeaderCount: ++# LA32-NEXT: StringTableSectionIndex: ++# LA32-NEXT: } ++ ++# RUN: llvm-mc -filetype=obj -triple=loongarch64 %s -o %t.o ++# RUN: ld.lld %t.o -o %t ++# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA64 %s ++# RUN: ld.lld -m elf64loongarch %t.o -o %t ++# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA64 %s ++# RUN: echo 'OUTPUT_FORMAT(elf64-loongarch)' > %t.script ++# RUN: ld.lld %t.script %t.o -o %t ++# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA64 %s ++ ++# LA64: ElfHeader { ++# LA64-NEXT: Ident { ++# LA64-NEXT: Magic: (7F 45 4C 46) ++# LA64-NEXT: Class: 64-bit (0x2) ++# LA64-NEXT: DataEncoding: LittleEndian (0x1) ++# LA64-NEXT: FileVersion: 1 ++# LA64-NEXT: OS/ABI: SystemV (0x0) ++# LA64-NEXT: ABIVersion: 0 ++# LA64-NEXT: Unused: (00 00 00 00 00 00 00) ++# LA64-NEXT: } ++# LA64-NEXT: Type: Executable (0x2) ++# LA64-NEXT: Machine: EM_LOONGARCH (0x102) ++# LA64-NEXT: Version: 1 ++# LA64-NEXT: Entry: ++# LA64-NEXT: ProgramHeaderOffset: 0x40 ++# LA64-NEXT: SectionHeaderOffset: ++# LA64-NEXT: Flags [ (0x43) ++# LA64-NEXT: EF_LOONGARCH_ABI_DOUBLE_FLOAT (0x3) ++# LA64-NEXT: EF_LOONGARCH_OBJABI_V1 (0x40) ++# LA64-NEXT: ] ++# LA64-NEXT: HeaderSize: 64 ++# LA64-NEXT: ProgramHeaderEntrySize: 56 ++# LA64-NEXT: ProgramHeaderCount: ++# LA64-NEXT: SectionHeaderEntrySize: 64 ++# LA64-NEXT: SectionHeaderCount: ++# LA64-NEXT: StringTableSectionIndex: ++# LA64-NEXT: } ++ ++.globl _start ++_start: +diff --git a/lld/test/ELF/loongarch-abs64.s b/lld/test/ELF/loongarch-abs64.s +new file mode 100644 +index 000000000000..4bfe7df9135a +--- /dev/null ++++ b/lld/test/ELF/loongarch-abs64.s +@@ -0,0 +1,64 @@ ++# REQUIRES: loongarch ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %s -o %t.la64.o ++ ++# RUN: ld.lld %t.la64.o --defsym foo=0 --defsym bar=42 -o %t.la64.1 ++# RUN: llvm-objdump --no-show-raw-insn -d %t.la64.1 | FileCheck --check-prefix=CASE1 %s ++# CASE1: lu12i.w $a0, 0 ++# CASE1-NEXT: ori $a0, $a0, 0 ++# CASE1-NEXT: lu32i.d $a0, 0 ++# CASE1-NEXT: lu52i.d $a0, $a0, 0 ++# CASE1-NEXT: lu12i.w $a1, 0 ++# CASE1-NEXT: ori $a1, $a1, 42 ++# CASE1-NEXT: lu32i.d $a1, 0 ++# CASE1-NEXT: lu52i.d $a1, $a1, 0 ++ ++# RUN: ld.lld %t.la64.o --defsym foo=0x12345678 --defsym bar=0x87654321 -o %t.la64.2 ++# RUN: llvm-objdump --no-show-raw-insn -d %t.la64.2 | FileCheck --check-prefix=CASE2 %s ++# CASE2: lu12i.w $a0, 74565 ++# CASE2-NEXT: ori $a0, $a0, 1656 ++# CASE2-NEXT: lu32i.d $a0, 0 ++# CASE2-NEXT: lu52i.d $a0, $a0, 0 ++# CASE2-NEXT: lu12i.w $a1, -493996 ++# CASE2-NEXT: ori $a1, $a1, 801 ++# CASE2-NEXT: lu32i.d $a1, 0 ++# CASE2-NEXT: lu52i.d $a1, $a1, 0 ++ ++# RUN: ld.lld %t.la64.o --defsym foo=0x12345fedcb678 --defsym bar=0xfedcb12345000 -o %t.la64.3 ++# RUN: llvm-objdump --no-show-raw-insn -d %t.la64.3 | FileCheck --check-prefix=CASE3 %s ++# CASE3: lu12i.w $a0, -4661 ++# CASE3-NEXT: ori $a0, $a0, 1656 ++# CASE3-NEXT: lu32i.d $a0, 74565 ++# CASE3-NEXT: lu52i.d $a0, $a0, 0 ++# CASE3-NEXT: lu12i.w $a1, 74565 ++# CASE3-NEXT: ori $a1, $a1, 0 ++# CASE3-NEXT: lu32i.d $a1, -4661 ++# CASE3-NEXT: lu52i.d $a1, $a1, 0 ++ ++# RUN: ld.lld %t.la64.o --defsym foo=0xfffffeeeeeddd --defsym bar=0xfff00000f1111222 -o %t.la64.4 ++# RUN: llvm-objdump --no-show-raw-insn -d %t.la64.4 | FileCheck --check-prefix=CASE4 %s ++# CASE4: lu12i.w $a0, -69906 ++# CASE4-NEXT: ori $a0, $a0, 3549 ++# CASE4-NEXT: lu32i.d $a0, -1 ++# CASE4-NEXT: lu52i.d $a0, $a0, 0 ++# CASE4-NEXT: lu12i.w $a1, -61167 ++# CASE4-NEXT: ori $a1, $a1, 546 ++# CASE4-NEXT: lu32i.d $a1, 0 ++# CASE4-NEXT: lu52i.d $a1, $a1, -1 ++ ++.global _start ++ ++_start: ++1: ++ lu12i.w $a0, %abs_hi20(foo) ++.reloc 1b, R_LARCH_MARK_LA, foo ++ ori $a0, $a0, %abs_lo12(foo) ++ lu32i.d $a0, %abs64_lo20(foo) ++ lu52i.d $a0, $a0, %abs64_hi12(foo) ++ ++2: ++ lu12i.w $a1, %abs_hi20(bar) ++.reloc 1b, R_LARCH_MARK_LA, bar ++ ori $a1, $a1, %abs_lo12(bar) ++ lu32i.d $a1, %abs64_lo20(bar) ++ lu52i.d $a1, $a1, %abs64_hi12(bar) +diff --git a/lld/test/ELF/loongarch-add-sub.s b/lld/test/ELF/loongarch-add-sub.s +new file mode 100644 +index 000000000000..963e4cbbe0fc +--- /dev/null ++++ b/lld/test/ELF/loongarch-add-sub.s +@@ -0,0 +1,36 @@ ++# REQUIRES: loongarch ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %s -o %t.la64.o ++ ++# RUN: ld.lld --section-start=.rodata=0x1234567890 --section-start=.text=0x9876543210 %t.la64.o -o %t.la64 ++# RUN: llvm-readelf -x .rodata %t.la64 | FileCheck --check-prefix=CHECK %s ++# CHECK: section '.rodata': ++# CHECK-NEXT: 0x1234567890 10325476 98badcfe 80b9fd41 86000000 ++# CHECK-NEXT: 0x12345678a0 80b9fd41 80b980 ++ ++.global _start ++ ++_start: ++1: ++ break 0 ++ ++.rodata ++2: ++ .dword 0xfedcba9876543210 ++ ++foo: ++ .dword 0 ++ .reloc foo, R_LARCH_ADD64, 1b ++ .reloc foo, R_LARCH_SUB64, 2b ++bar: ++ .word 0 ++ .reloc bar, R_LARCH_ADD32, 1b ++ .reloc bar, R_LARCH_SUB32, 2b ++baz: ++ .short 0 ++ .reloc baz, R_LARCH_ADD16, 1b ++ .reloc baz, R_LARCH_SUB16, 2b ++quux: ++ .byte 0 ++ .reloc quux, R_LARCH_ADD8, 1b ++ .reloc quux, R_LARCH_SUB8, 2b +diff --git a/lld/test/ELF/loongarch-branch.s b/lld/test/ELF/loongarch-branch.s +new file mode 100644 +index 000000000000..b223ff95bd89 +--- /dev/null ++++ b/lld/test/ELF/loongarch-branch.s +@@ -0,0 +1,68 @@ ++# REQUIRES: loongarch ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch32-unknown-elf %s -o %t.la32.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %s -o %t.la64.o ++ ++# RUN: ld.lld %t.la32.o --defsym foo16=b16+4 --defsym bar16=b16 --defsym foo21=b21+4 --defsym bar21=b21 --defsym foo26=b26+4 --defsym bar26=b26 -o %t.la32 ++# RUN: ld.lld %t.la64.o --defsym foo16=b16+4 --defsym bar16=b16 --defsym foo21=b21+4 --defsym bar21=b21 --defsym foo26=b26+4 --defsym bar26=b26 -o %t.la64 ++# RUN: llvm-objdump --no-show-raw-insn -d %t.la32 | FileCheck %s --check-prefix=CHECK ++# RUN: llvm-objdump --no-show-raw-insn -d %t.la64 | FileCheck %s --check-prefix=CHECK ++# CHECK: beq $zero, $zero, 4 ++# CHECK: bne $zero, $zero, -4 ++# CHECK: beqz $s8, 4 ++# CHECK: bnez $s8, -4 ++# CHECK: b 4 ++# CHECK: bl -4 ++ ++# RUN: ld.lld %t.la32.o --defsym foo16=b16+0x1fffc --defsym bar16=b16+4-0x20000 --defsym foo21=b21+0x3ffffc --defsym bar21=b21+4-0x400000 --defsym foo26=b26+0x7fffffc --defsym bar26=b26+4-0x8000000 -o %t.la32.limits ++# RUN: ld.lld %t.la64.o --defsym foo16=b16+0x1fffc --defsym bar16=b16+4-0x20000 --defsym foo21=b21+0x3ffffc --defsym bar21=b21+4-0x400000 --defsym foo26=b26+0x7fffffc --defsym bar26=b26+4-0x8000000 -o %t.la64.limits ++# RUN: llvm-objdump --no-show-raw-insn -d %t.la32.limits | FileCheck --check-prefix=LIMITS %s ++# RUN: llvm-objdump --no-show-raw-insn -d %t.la64.limits | FileCheck --check-prefix=LIMITS %s ++# LIMITS: beq $zero, $zero, 131068 ++# LIMITS-NEXT: bne $zero, $zero, -131072 ++# LIMITS: beqz $s8, 4194300 ++# LIMITS-NEXT: bnez $s8, -4194304 ++# LIMITS: b 134217724 ++# LIMITS-NEXT: bl -134217728 ++ ++# RUN: not ld.lld %t.la32.o --defsym foo16=b16+0x20000 --defsym bar16=b16+4-0x20004 --defsym foo21=b21+0x400000 --defsym bar21=b21+4-0x400004 --defsym foo26=b26+0x8000000 --defsym bar26=b26+4-0x8000004 -o /dev/null 2>&1 | FileCheck -DFILE=%t.la32.o --check-prefix=ERROR-RANGE %s ++# RUN: not ld.lld %t.la64.o --defsym foo16=b16+0x20000 --defsym bar16=b16+4-0x20004 --defsym foo21=b21+0x400000 --defsym bar21=b21+4-0x400004 --defsym foo26=b26+0x8000000 --defsym bar26=b26+4-0x8000004 -o /dev/null 2>&1 | FileCheck -DFILE=%t.la64.o --check-prefix=ERROR-RANGE %s ++# ERROR-RANGE: error: [[FILE]]:(.text+0x0): relocation R_LARCH_B16 out of range: 131072 is not in [-131072, 131071]; references 'foo16' ++# ERROR-RANGE: error: [[FILE]]:(.text+0x4): relocation R_LARCH_B16 out of range: -131076 is not in [-131072, 131071]; references 'bar16' ++# ERROR-RANGE: error: [[FILE]]:(.text+0x8): relocation R_LARCH_B21 out of range: 4194304 is not in [-4194304, 4194303]; references 'foo21' ++# ERROR-RANGE: error: [[FILE]]:(.text+0xc): relocation R_LARCH_B21 out of range: -4194308 is not in [-4194304, 4194303]; references 'bar21' ++# ERROR-RANGE: error: [[FILE]]:(.text+0x10): relocation R_LARCH_B26 out of range: 134217728 is not in [-134217728, 134217727]; references 'foo26' ++# ERROR-RANGE: error: [[FILE]]:(.text+0x14): relocation R_LARCH_B26 out of range: -134217732 is not in [-134217728, 134217727]; references 'bar26' ++ ++# RUN: not ld.lld %t.la32.o --defsym foo16=b16+1 --defsym bar16=b16-1 --defsym foo21=b21+1 --defsym bar21=b21-1 --defsym foo26=b26+1 --defsym bar26=b26-1 -o /dev/null 2>&1 | FileCheck -DFILE=%t.la32.o --check-prefix=ERROR-ALIGN-1 %s ++# RUN: not ld.lld %t.la64.o --defsym foo16=b16+1 --defsym bar16=b16-1 --defsym foo21=b21+1 --defsym bar21=b21-1 --defsym foo26=b26+1 --defsym bar26=b26-1 -o /dev/null 2>&1 | FileCheck -DFILE=%t.la64.o --check-prefix=ERROR-ALIGN-1 %s ++# ERROR-ALIGN-1: error: [[FILE]]:(.text+0x0): improper alignment for relocation R_LARCH_B16: 0x1 is not aligned to 4 bytes ++# ERROR-ALIGN-1-NEXT: error: [[FILE]]:(.text+0x4): improper alignment for relocation R_LARCH_B16: 0xFFFFFFFFFFFFFFFB is not aligned to 4 bytes ++# ERROR-ALIGN-1-NEXT: error: [[FILE]]:(.text+0x8): improper alignment for relocation R_LARCH_B21: 0x1 is not aligned to 4 bytes ++# ERROR-ALIGN-1-NEXT: error: [[FILE]]:(.text+0xc): improper alignment for relocation R_LARCH_B21: 0xFFFFFFFFFFFFFFFB is not aligned to 4 bytes ++# ERROR-ALIGN-1-NEXT: error: [[FILE]]:(.text+0x10): improper alignment for relocation R_LARCH_B26: 0x1 is not aligned to 4 bytes ++# ERROR-ALIGN-1-NEXT: error: [[FILE]]:(.text+0x14): improper alignment for relocation R_LARCH_B26: 0xFFFFFFFFFFFFFFFB is not aligned to 4 bytes ++ ++# RUN: not ld.lld %t.la32.o --defsym foo16=b16+2 --defsym bar16=b16-2 --defsym foo21=b21+2 --defsym bar21=b21-2 --defsym foo26=b26+2 --defsym bar26=b26-2 -o /dev/null 2>&1 | FileCheck -DFILE=%t.la32.o --check-prefix=ERROR-ALIGN-2 %s ++# RUN: not ld.lld %t.la64.o --defsym foo16=b16+2 --defsym bar16=b16-2 --defsym foo21=b21+2 --defsym bar21=b21-2 --defsym foo26=b26+2 --defsym bar26=b26-2 -o /dev/null 2>&1 | FileCheck -DFILE=%t.la64.o --check-prefix=ERROR-ALIGN-2 %s ++# ERROR-ALIGN-2: error: [[FILE]]:(.text+0x0): improper alignment for relocation R_LARCH_B16: 0x2 is not aligned to 4 bytes ++# ERROR-ALIGN-2-NEXT: error: [[FILE]]:(.text+0x4): improper alignment for relocation R_LARCH_B16: 0xFFFFFFFFFFFFFFFA is not aligned to 4 bytes ++# ERROR-ALIGN-2-NEXT: error: [[FILE]]:(.text+0x8): improper alignment for relocation R_LARCH_B21: 0x2 is not aligned to 4 bytes ++# ERROR-ALIGN-2-NEXT: error: [[FILE]]:(.text+0xc): improper alignment for relocation R_LARCH_B21: 0xFFFFFFFFFFFFFFFA is not aligned to 4 bytes ++# ERROR-ALIGN-2-NEXT: error: [[FILE]]:(.text+0x10): improper alignment for relocation R_LARCH_B26: 0x2 is not aligned to 4 bytes ++# ERROR-ALIGN-2-NEXT: error: [[FILE]]:(.text+0x14): improper alignment for relocation R_LARCH_B26: 0xFFFFFFFFFFFFFFFA is not aligned to 4 bytes ++ ++.global _start ++.global b16 ++.global b21 ++.global b26 ++_start: ++b16: ++ beq $zero, $zero, foo16 ++ bne $zero, $zero, bar16 ++b21: ++ beqz $s8, foo21 ++ bnez $s8, bar21 ++b26: ++ b foo26 ++ bl bar26 +diff --git a/lld/test/ELF/loongarch-interlink.test b/lld/test/ELF/loongarch-interlink.test +new file mode 100644 +index 000000000000..44e5d03409a4 +--- /dev/null ++++ b/lld/test/ELF/loongarch-interlink.test +@@ -0,0 +1,84 @@ ++# REQUIRES: loongarch ++# RUN: rm -rf %t && split-file %s %t ++ ++# RUN: yaml2obj %t/blob.yaml -o %t/blob.o ++# RUN: yaml2obj %t/v0-lp64d.yaml -o %t/v0-lp64d.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu %t/start.s -o %t/v1-lp64d.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnusf %t/start.s -o %t/v1-lp64s.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu %t/bar.s -o %t/v1-b-lp64d.o ++ ++## Check that binary input results in e_flags=0 output. ++# RUN: ld.lld -m elf64loongarch -b binary %t/blob.bin -o %t/blob.out ++# RUN: llvm-readobj -h %t/blob.out | FileCheck --check-prefix=EMPTY %s ++# EMPTY: Flags [ ++# EMPTY-NEXT: ] ++ ++## Check that interlink between e_flags=0 and normal input (that contain code) ++## is allowed. ++## Also check that the e_flags logic work as intended regardless of input file ++## order. ++# RUN: ld.lld %t/blob.o %t/v1-lp64d.o -o %t/v1-lp64d.out ++# RUN: ld.lld %t/v1-lp64s.o %t/blob.o -o %t/v1-lp64s.out ++# RUN: llvm-readobj -h %t/v1-lp64d.out | FileCheck --check-prefix=V1-LP64D %s ++# RUN: llvm-readobj -h %t/v1-lp64s.out | FileCheck --check-prefix=V1-LP64S %s ++# V1-LP64D: Flags [ (0x43) ++# V1-LP64S: Flags [ (0x41) ++ ++## Check that interlink between different ABIs is disallowed. ++# RUN: not ld.lld %t/v1-lp64s.o %t/v1-b-lp64d.o -o /dev/null 2>&1 | FileCheck -DFILE1=%t/v1-b-lp64d.o -DFILE2=%t/v1-lp64s.o --check-prefix=INTERLINK-ERR %s ++# INTERLINK-ERR: error: [[FILE1]]: cannot link object files with different ABI from [[FILE2]] ++ ++## Check that interlink between different object ABI versions is disallowed. ++# RUN: not ld.lld %t/v0-lp64d.o %t/v1-b-lp64d.o %t/blob.o -o /dev/null 2>&1 | FileCheck -DFILE=%t/v0-lp64d.o --check-prefix=VERSION-ERR %s ++# VERSION-ERR: error: [[FILE]]: unsupported object file ABI version ++ ++#--- blob.bin ++BLOB ++ ++#--- blob.yaml ++--- !ELF ++FileHeader: ++ Class: ELFCLASS64 ++ Data: ELFDATA2LSB ++ Type: ET_REL ++ Machine: EM_LOONGARCH ++ SectionHeaderStringTable: .strtab ++Sections: ++ - Name: .data ++ Type: SHT_PROGBITS ++ Flags: [ SHF_WRITE, SHF_ALLOC ] ++ AddressAlign: 0x1 ++ Content: 424C4F42 ++Symbols: ++ - Name: blob ++ Section: .data ++ Binding: STB_GLOBAL ++ ++#--- v0-lp64d.yaml ++--- !ELF ++FileHeader: ++ Class: ELFCLASS64 ++ Data: ELFDATA2LSB ++ Type: ET_REL ++ Machine: EM_LOONGARCH ++ Flags: [ EF_LOONGARCH_ABI_DOUBLE_FLOAT ] ++ SectionHeaderStringTable: .strtab ++Sections: ++ - Name: .text ++ Type: SHT_PROGBITS ++ Flags: [ SHF_ALLOC, SHF_EXECINSTR ] ++ AddressAlign: 0x4 ++ Content: 0000a002 ++ ++#--- start.s ++.global _start ++_start: ++ la $a0, blob ++ ld.b $a0, $a0, 0 ++ li.w $a7, 94 ++ syscall 0 ++ ++#--- bar.s ++bar: ++ move $a0, $zero ++ ret +diff --git a/lld/test/ELF/loongarch-pc-aligned.s b/lld/test/ELF/loongarch-pc-aligned.s +new file mode 100644 +index 000000000000..9df3492d1877 +--- /dev/null ++++ b/lld/test/ELF/loongarch-pc-aligned.s +@@ -0,0 +1,283 @@ ++# REQUIRES: loongarch ++# RUN: rm -rf %t && split-file %s %t ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/a.s -o %t/a.la32.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/a.s -o %t/a.la64.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/extreme.s -o %t/extreme.o ++ ++# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x11000 --section-start=.text=0x11ffc -o %t/case1.la32 ++# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x11000 --section-start=.text=0x11ffc -o %t/case1.la64 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/case1.la32 | FileCheck %s --check-prefix=CASE1 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/case1.la64 | FileCheck %s --check-prefix=CASE1 ++# CASE1: pcalau12i $a0, 0 ++# CASE1-NEXT: ld.w $a0, $a0, 0 ++ ++# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x11000 --section-start=.text=0x12000 -o %t/case2.la32 ++# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x11000 --section-start=.text=0x12000 -o %t/case2.la64 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/case2.la32 | FileCheck %s --check-prefix=CASE2 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/case2.la64 | FileCheck %s --check-prefix=CASE2 ++# CASE2: pcalau12i $a0, -1 ++# CASE2-NEXT: ld.w $a0, $a0, 0 ++ ++# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x117ff --section-start=.text=0x12000 -o %t/case3.la32 ++# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x117ff --section-start=.text=0x12000 -o %t/case3.la64 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/case3.la32 | FileCheck %s --check-prefix=CASE3 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/case3.la64 | FileCheck %s --check-prefix=CASE3 ++# CASE3: pcalau12i $a0, -1 ++# CASE3-NEXT: ld.w $a0, $a0, 2047 ++ ++# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x11800 --section-start=.text=0x12000 -o %t/case4.la32 ++# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x11800 --section-start=.text=0x12000 -o %t/case4.la64 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/case4.la32 | FileCheck %s --check-prefix=CASE4 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/case4.la64 | FileCheck %s --check-prefix=CASE4 ++# CASE4: pcalau12i $a0, 0 ++# CASE4-NEXT: ld.w $a0, $a0, -2048 ++ ++# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x12004 --section-start=.text=0x11ffc -o %t/case5.la32 ++# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x12004 --section-start=.text=0x11ffc -o %t/case5.la64 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/case5.la32 | FileCheck %s --check-prefix=CASE5 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/case5.la64 | FileCheck %s --check-prefix=CASE5 ++# CASE5: pcalau12i $a0, 1 ++# CASE5-NEXT: ld.w $a0, $a0, 4 ++ ++# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x12800 --section-start=.text=0x11ffc -o %t/case6.la32 ++# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x12800 --section-start=.text=0x11ffc -o %t/case6.la64 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/case6.la32 | FileCheck %s --check-prefix=CASE6 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/case6.la64 | FileCheck %s --check-prefix=CASE6 ++# CASE6: pcalau12i $a0, 2 ++# CASE6-NEXT: ld.w $a0, $a0, -2048 ++ ++# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x7ffff123 --section-start=.text=0x0 -o %t/case7.la32 ++# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x7ffff123 --section-start=.text=0x0 -o %t/case7.la64 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/case7.la32 | FileCheck %s --check-prefix=CASE7 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/case7.la64 | FileCheck %s --check-prefix=CASE7 ++# CASE7: pcalau12i $a0, 524287 ++# CASE7-NEXT: ld.w $a0, $a0, 291 ++ ++# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x7ffffabc --section-start=.text=0x0 -o %t/case8.la32 ++# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x7ffffabc --section-start=.text=0x0 -o %t/case8.la64 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/case8.la32 | FileCheck %s --check-prefix=CASE8 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/case8.la64 | FileCheck %s --check-prefix=CASE8 ++# CASE8: pcalau12i $a0, -524288 ++# CASE8-NEXT: ld.w $a0, $a0, -1348 ++ ++# RUN: ld.lld %t/a.la32.o --section-start=.rodata=0x10123 --section-start=.text=0x80010000 -o %t/case9.la32 ++# RUN: ld.lld %t/a.la64.o --section-start=.rodata=0x10123 --section-start=.text=0x80010000 -o %t/case9.la64 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/case9.la32 | FileCheck %s --check-prefix=CASE9 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/case9.la64 | FileCheck %s --check-prefix=CASE9 ++# CASE9: pcalau12i $a0, -524288 ++# CASE9-NEXT: ld.w $a0, $a0, 291 ++ ++## page delta = 0x4443333322222000, page offset = 0x111 ++## %pc_lo12 = 0x111 = 273 ++## %pc_hi20 = 0x22222 = 139810 ++## %pc64_lo20 = 0x33333 = 209715 ++## %pc64_hi12 = 0x444 = 1092 ++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x4443333334567111 --section-start=.text=0x0000000012345678 -o %t/extreme0 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme0 | FileCheck %s --check-prefix=EXTREME0 ++# EXTREME0: addi.d $t0, $zero, 273 ++# EXTREME0-NEXT: pcalau12i $t1, 139810 ++# EXTREME0-NEXT: lu32i.d $t0, 209715 ++# EXTREME0-NEXT: lu52i.d $t0, $t0, 1092 ++ ++## page delta = 0x4443333222223000, page offset = 0x888 ++## %pc_lo12 = 0x888 = -1912 ++## %pc_hi20 = 0x22223 = 139811 ++## %pc64_lo20 = 0x33332 = 209714 ++## %pc64_hi12 = 0x444 = 1092 ++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x4443333334567888 --section-start=.text=0x0000000012345678 -o %t/extreme1 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme1 | FileCheck %s --check-prefix=EXTREME1 ++# EXTREME1: addi.d $t0, $zero, -1912 ++# EXTREME1-NEXT: pcalau12i $t1, 139811 ++# EXTREME1-NEXT: lu32i.d $t0, 209714 ++# EXTREME1-NEXT: lu52i.d $t0, $t0, 1092 ++ ++## page delta = 0x4443333499999000, page offset = 0x111 ++## %pc_lo12 = 0x111 = 273 ++## %pc_hi20 = 0x99999 = -419431 ++## %pc64_lo20 = 0x33334 = 209716 ++## %pc64_hi12 = 0x444 = 1092 ++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x44433333abcde111 --section-start=.text=0x0000000012345678 -o %t/extreme2 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme2 | FileCheck %s --check-prefix=EXTREME2 ++# EXTREME2: addi.d $t0, $zero, 273 ++# EXTREME2-NEXT: pcalau12i $t1, -419431 ++# EXTREME2-NEXT: lu32i.d $t0, 209716 ++# EXTREME2-NEXT: lu52i.d $t0, $t0, 1092 ++ ++## page delta = 0x444333339999a000, page offset = 0x888 ++## %pc_lo12 = 0x888 = -1912 ++## %pc_hi20 = 0x9999a = -419430 ++## %pc64_lo20 = 0x33333 = 209715 ++## %pc64_hi12 = 0x444 = 1092 ++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x44433333abcde888 --section-start=.text=0x0000000012345678 -o %t/extreme3 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme3 | FileCheck %s --check-prefix=EXTREME3 ++# EXTREME3: addi.d $t0, $zero, -1912 ++# EXTREME3-NEXT: pcalau12i $t1, -419430 ++# EXTREME3-NEXT: lu32i.d $t0, 209715 ++# EXTREME3-NEXT: lu52i.d $t0, $t0, 1092 ++ ++## page delta = 0x444aaaaa22222000, page offset = 0x111 ++## %pc_lo12 = 0x111 = 273 ++## %pc_hi20 = 0x22222 = 139810 ++## %pc64_lo20 = 0xaaaaa = -349526 ++## %pc64_hi12 = 0x444 = 1092 ++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaa34567111 --section-start=.text=0x0000000012345678 -o %t/extreme4 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme4 | FileCheck %s --check-prefix=EXTREME4 ++# EXTREME4: addi.d $t0, $zero, 273 ++# EXTREME4-NEXT: pcalau12i $t1, 139810 ++# EXTREME4-NEXT: lu32i.d $t0, -349526 ++# EXTREME4-NEXT: lu52i.d $t0, $t0, 1092 ++ ++## page delta = 0x444aaaa922223000, page offset = 0x888 ++## %pc_lo12 = 0x888 = -1912 ++## %pc_hi20 = 0x22223 = 139811 ++## %pc64_lo20 = 0xaaaa9 = -349527 ++## %pc64_hi12 = 0x444 = 1092 ++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaa34567888 --section-start=.text=0x0000000012345678 -o %t/extreme5 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme5 | FileCheck %s --check-prefix=EXTREME5 ++# EXTREME5: addi.d $t0, $zero, -1912 ++# EXTREME5-NEXT: pcalau12i $t1, 139811 ++# EXTREME5-NEXT: lu32i.d $t0, -349527 ++# EXTREME5-NEXT: lu52i.d $t0, $t0, 1092 ++ ++## page delta = 0x444aaaab99999000, page offset = 0x111 ++## %pc_lo12 = 0x111 = 273 ++## %pc_hi20 = 0x99999 = -419431 ++## %pc64_lo20 = 0xaaaab = -349525 ++## %pc64_hi12 = 0x444 = 1092 ++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaaabcde111 --section-start=.text=0x0000000012345678 -o %t/extreme6 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme6 | FileCheck %s --check-prefix=EXTREME6 ++# EXTREME6: addi.d $t0, $zero, 273 ++# EXTREME6-NEXT: pcalau12i $t1, -419431 ++# EXTREME6-NEXT: lu32i.d $t0, -349525 ++# EXTREME6-NEXT: lu52i.d $t0, $t0, 1092 ++ ++## page delta = 0x444aaaaa9999a000, page offset = 0x888 ++## %pc_lo12 = 0x888 = -1912 ++## %pc_hi20 = 0x9999a = -419430 ++## %pc64_lo20 = 0xaaaaa = -349526 ++## %pc64_hi12 = 0x444 = 1092 ++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaaabcde888 --section-start=.text=0x0000000012345678 -o %t/extreme7 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme7 | FileCheck %s --check-prefix=EXTREME7 ++# EXTREME7: addi.d $t0, $zero, -1912 ++# EXTREME7-NEXT: pcalau12i $t1, -419430 ++# EXTREME7-NEXT: lu32i.d $t0, -349526 ++# EXTREME7-NEXT: lu52i.d $t0, $t0, 1092 ++ ++## page delta = 0xbbb3333322222000, page offset = 0x111 ++## %pc_lo12 = 0x111 = 273 ++## %pc_hi20 = 0x22222 = 139810 ++## %pc64_lo20 = 0x33333 = 209715 ++## %pc64_hi12 = 0xbbb = -1093 ++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb3333334567111 --section-start=.text=0x0000000012345678 -o %t/extreme8 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme8 | FileCheck %s --check-prefix=EXTREME8 ++# EXTREME8: addi.d $t0, $zero, 273 ++# EXTREME8-NEXT: pcalau12i $t1, 139810 ++# EXTREME8-NEXT: lu32i.d $t0, 209715 ++# EXTREME8-NEXT: lu52i.d $t0, $t0, -1093 ++ ++## page delta = 0xbbb3333222223000, page offset = 0x888 ++## %pc_lo12 = 0x888 = -1912 ++## %pc_hi20 = 0x22223 = 139811 ++## %pc64_lo20 = 0x33332 = 209714 ++## %pc64_hi12 = 0xbbb = -1093 ++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb3333334567888 --section-start=.text=0x0000000012345678 -o %t/extreme9 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme9 | FileCheck %s --check-prefix=EXTREME9 ++# EXTREME9: addi.d $t0, $zero, -1912 ++# EXTREME9-NEXT: pcalau12i $t1, 139811 ++# EXTREME9-NEXT: lu32i.d $t0, 209714 ++# EXTREME9-NEXT: lu52i.d $t0, $t0, -1093 ++ ++## page delta = 0xbbb3333499999000, page offset = 0x111 ++## %pc_lo12 = 0x111 = 273 ++## %pc_hi20 = 0x99999 = -419431 ++## %pc64_lo20 = 0x33334 = 209716 ++## %pc64_hi12 = 0xbbb = -1093 ++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb33333abcde111 --section-start=.text=0x0000000012345678 -o %t/extreme10 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme10 | FileCheck %s --check-prefix=EXTREME10 ++# EXTREME10: addi.d $t0, $zero, 273 ++# EXTREME10-NEXT: pcalau12i $t1, -419431 ++# EXTREME10-NEXT: lu32i.d $t0, 209716 ++# EXTREME10-NEXT: lu52i.d $t0, $t0, -1093 ++ ++## page delta = 0xbbb333339999a000, page offset = 0x888 ++## %pc_lo12 = 0x888 = -1912 ++## %pc_hi20 = 0x9999a = -419430 ++## %pc64_lo20 = 0x33333 = 209715 ++## %pc64_hi12 = 0xbbb = -1093 ++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb33333abcde888 --section-start=.text=0x0000000012345678 -o %t/extreme11 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme11 | FileCheck %s --check-prefix=EXTREME11 ++# EXTREME11: addi.d $t0, $zero, -1912 ++# EXTREME11-NEXT: pcalau12i $t1, -419430 ++# EXTREME11-NEXT: lu32i.d $t0, 209715 ++# EXTREME11-NEXT: lu52i.d $t0, $t0, -1093 ++ ++## page delta = 0xbbbaaaaa22222000, page offset = 0x111 ++## %pc_lo12 = 0x111 = 273 ++## %pc_hi20 = 0x22222 = 139810 ++## %pc64_lo20 = 0xaaaaa = -349526 ++## %pc64_hi12 = 0xbbb = -1093 ++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaa34567111 --section-start=.text=0x0000000012345678 -o %t/extreme12 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme12 | FileCheck %s --check-prefix=EXTREME12 ++# EXTREME12: addi.d $t0, $zero, 273 ++# EXTREME12-NEXT: pcalau12i $t1, 139810 ++# EXTREME12-NEXT: lu32i.d $t0, -349526 ++# EXTREME12-NEXT: lu52i.d $t0, $t0, -1093 ++ ++## page delta = 0xbbbaaaa922223000, page offset = 0x888 ++## %pc_lo12 = 0x888 = -1912 ++## %pc_hi20 = 0x22223 = 139811 ++## %pc64_lo20 = 0xaaaa9 = -349527 ++## %pc64_hi12 = 0xbbb = -1093 ++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaa34567888 --section-start=.text=0x0000000012345678 -o %t/extreme13 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme13 | FileCheck %s --check-prefix=EXTREME13 ++# EXTREME13: addi.d $t0, $zero, -1912 ++# EXTREME13-NEXT: pcalau12i $t1, 139811 ++# EXTREME13-NEXT: lu32i.d $t0, -349527 ++# EXTREME13-NEXT: lu52i.d $t0, $t0, -1093 ++ ++## page delta = 0xbbbaaaab99999000, page offset = 0x111 ++## %pc_lo12 = 0x111 = 273 ++## %pc_hi20 = 0x99999 = -419431 ++## %pc64_lo20 = 0xaaaab = -349525 ++## %pc64_hi12 = 0xbbb = -1093 ++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaaabcde111 --section-start=.text=0x0000000012345678 -o %t/extreme14 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme14 | FileCheck %s --check-prefix=EXTREME14 ++# EXTREME14: addi.d $t0, $zero, 273 ++# EXTREME14-NEXT: pcalau12i $t1, -419431 ++# EXTREME14-NEXT: lu32i.d $t0, -349525 ++# EXTREME14-NEXT: lu52i.d $t0, $t0, -1093 ++ ++## page delta = 0xbbbaaaaa9999a000, page offset = 0x888 ++## %pc_lo12 = 0x888 = -1912 ++## %pc_hi20 = 0x9999a = -419430 ++## %pc64_lo20 = 0xaaaaa = -349526 ++## %pc64_hi12 = 0xbbb = -1093 ++# RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaaabcde888 --section-start=.text=0x0000000012345678 -o %t/extreme15 ++# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme15 | FileCheck %s --check-prefix=EXTREME15 ++# EXTREME15: addi.d $t0, $zero, -1912 ++# EXTREME15-NEXT: pcalau12i $t1, -419430 ++# EXTREME15-NEXT: lu32i.d $t0, -349526 ++# EXTREME15-NEXT: lu52i.d $t0, $t0, -1093 ++ ++#--- a.s ++.rodata ++x: ++.word 10 ++.text ++.global _start ++_start: ++ pcalau12i $a0, %pc_hi20(x) ++ ld.w $a0, $a0, %pc_lo12(x) ++ ++#--- extreme.s ++.rodata ++x: ++.word 10 ++.text ++.global _start ++_start: ++ addi.d $t0, $zero, %pc_lo12(x) ++ pcalau12i $t1, %pc_hi20(x) ++ lu32i.d $t0, %pc64_lo20(x) ++ lu52i.d $t0, $t0, %pc64_hi12(x) +diff --git a/lld/test/ELF/loongarch-pcala-lo12-jirl-shared.s b/lld/test/ELF/loongarch-pcala-lo12-jirl-shared.s +new file mode 100644 +index 000000000000..991f8fbe974f +--- /dev/null ++++ b/lld/test/ELF/loongarch-pcala-lo12-jirl-shared.s +@@ -0,0 +1,60 @@ ++# REQUIRES: loongarch ++# RUN: rm -rf %t && split-file %s %t ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch32-unknown-elf %t/a.s -o %t/a.la32.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %t/a.s -o %t/a.la64.o ++ ++# RUN: ld.lld %t/a.la32.o -shared -T %t/a.t -o %t/a.la32.so ++# RUN: ld.lld %t/a.la64.o -shared -T %t/a.t -o %t/a.la64.so ++ ++# RUN: llvm-objdump -d --no-show-raw-insn %t/a.la32.so | FileCheck --check-prefixes=DIS,DIS32 %s ++# RUN: llvm-objdump -d --no-show-raw-insn %t/a.la64.so | FileCheck --check-prefixes=DIS,DIS64 %s ++ ++## PLT should be present in this case. ++# DIS: Disassembly of section .plt: ++# DIS: <.plt>: ++# DIS: 234020: pcaddu12i $t3, 510 ++# DIS32-NEXT: ld.w $t3, $t3, 84 ++# DIS64-NEXT: ld.d $t3, $t3, 184 ++# DIS-NEXT: jirl $t1, $t3, 0 ++# DIS-NEXT: nop ++ ++# DIS: Disassembly of section .text: ++# DIS: : ++# DIS-NEXT: nop ++# DIS-NEXT: nop ++# DIS-NEXT: nop ++# DIS-NEXT: pcalau12i $t0, -510 ++# DIS-NEXT: jirl $zero, $t0, 32 ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch32-unknown-elf %t/error.s -o %t/error.la32.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %t/error.s -o %t/error.la64.o ++# RUN: not ld.lld %t/error.la32.o -shared -o %t/error.la32.so 2>&1 | FileCheck --check-prefix=ERR %s ++# RUN: not ld.lld %t/error.la64.o -shared -o %t/error.la64.so 2>&1 | FileCheck --check-prefix=ERR %s ++# ERR: error: relocation R_LARCH_PCALA_LO12 cannot be used against symbol 'bar'; recompile with -fPIC ++ ++#--- a.t ++SECTIONS { ++ .plt 0x234000: { *(.plt) } ++ .text 0x432000: { *(.text) } ++} ++ ++#--- a.s ++.p2align 12 ++.global foo ++foo: ++## The nops are for pushing the relocs off page boundary, to better see the ++## page-aligned semantics in action. ++ nop ++ nop ++ nop ++ ## The offsets should be -510 (0x234 - 0x432) and 32 (PLT header size + 0) ++ ## respectively. ++ pcalau12i $t0, %pc_hi20(bar) ++ jirl $zero, $t0, %pc_lo12(bar) ++ ++#--- error.s ++.global foo ++foo: ++ pcalau12i $t0, %pc_hi20(bar) ++ ld.w $t0, $t0, %pc_lo12(bar) +diff --git a/lld/test/ELF/loongarch-pcala-lo12-jirl.s b/lld/test/ELF/loongarch-pcala-lo12-jirl.s +new file mode 100644 +index 000000000000..1a03152aaa2a +--- /dev/null ++++ b/lld/test/ELF/loongarch-pcala-lo12-jirl.s +@@ -0,0 +1,42 @@ ++# REQUIRES: loongarch ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch32-unknown-elf %s -o %t.la32.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %s -o %t.la64.o ++ ++# RUN: ld.lld %t.la32.o -o %t.la32 ++# RUN: ld.lld %t.la64.o -o %t.la64 ++# RUN: llvm-objdump -d --no-show-raw-insn %t.la32 | FileCheck %s ++# RUN: llvm-objdump -d --no-show-raw-insn %t.la64 | FileCheck %s ++# CHECK: pcalau12i $t0, -1 ++# CHECK-NEXT: jirl $ra, $t0, 564 ++# CHECK-NEXT: pcalau12i $t0, 0 ++# CHECK-NEXT: jirl $zero, $t0, -1348 ++ ++## PLT shouldn't get generated in this case. ++# CHECK-NOT: Disassembly of section .plt: ++ ++.p2align 12 ++.org 0x234 ++.global foo ++foo: ++ li.w $a0, 42 ++ ret ++ ++.org 0xabc ++.global bar ++bar: ++ li.w $a7, 94 ++ syscall 0 ++ ++.org 0x1000 ++.global _start ++_start: ++## The nops are for pushing the relocs off page boundary, to better see the ++## page-aligned semantics in action. ++ nop ++ nop ++ nop ++ pcalau12i $t0, %pc_hi20(foo) ++ jirl $ra, $t0, %pc_lo12(foo) ++ pcalau12i $t0, %pc_hi20(bar) ++ jirl $zero, $t0, %pc_lo12(bar) +diff --git a/lld/test/ELF/loongarch-plt.s b/lld/test/ELF/loongarch-plt.s +new file mode 100644 +index 000000000000..82af53d39e73 +--- /dev/null ++++ b/lld/test/ELF/loongarch-plt.s +@@ -0,0 +1,108 @@ ++# REQUIRES: loongarch ++# RUN: echo '.globl bar, weak; .type bar,@function; .type weak,@function; bar: weak:' > %t1.s ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t1.s -o %t1.32.o ++# RUN: ld.lld -shared %t1.32.o -soname=t1.32.so -o %t1.32.so ++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %s -o %t.32.o ++# RUN: ld.lld %t.32.o %t1.32.so -z separate-code -o %t.32 ++# RUN: llvm-readelf -S -s %t.32 | FileCheck --check-prefixes=SEC,NM %s ++# RUN: llvm-readobj -r %t.32 | FileCheck --check-prefix=RELOC32 %s ++# RUN: llvm-readelf -x .got.plt %t.32 | FileCheck --check-prefix=GOTPLT32 %s ++# RUN: llvm-objdump -d --no-show-raw-insn %t.32 | FileCheck --check-prefixes=DIS,DIS32 %s ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t1.s -o %t1.64.o ++# RUN: ld.lld -shared %t1.64.o -soname=t1.64.so -o %t1.64.so ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s -o %t.64.o ++# RUN: ld.lld %t.64.o %t1.64.so -z separate-code -o %t.64 ++# RUN: llvm-readelf -S -s %t.64 | FileCheck --check-prefixes=SEC,NM %s ++# RUN: llvm-readobj -r %t.64 | FileCheck --check-prefix=RELOC64 %s ++# RUN: llvm-readelf -x .got.plt %t.64 | FileCheck --check-prefix=GOTPLT64 %s ++# RUN: llvm-objdump -d --no-show-raw-insn %t.64 | FileCheck --check-prefixes=DIS,DIS64 %s ++ ++# SEC: .plt PROGBITS {{0*}}00020020 ++ ++## A canonical PLT has a non-zero st_value. bar and weak are called but their ++## addresses are not taken, so a canonical PLT is not necessary. ++# NM: {{0*}}00000000 0 FUNC GLOBAL DEFAULT UND bar ++# NM: {{0*}}00000000 0 FUNC WEAK DEFAULT UND weak ++ ++## The .got.plt slots relocated by .rela.plt point to .plt ++## This is required by glibc. ++# RELOC32: .rela.plt { ++# RELOC32-NEXT: 0x40070 R_LARCH_JUMP_SLOT bar 0x0 ++# RELOC32-NEXT: 0x40074 R_LARCH_JUMP_SLOT weak 0x0 ++# RELOC32-NEXT: } ++# GOTPLT32: section '.got.plt' ++# GOTPLT32-NEXT: 0x00040068 00000000 00000000 20000200 20000200 ++ ++# RELOC64: .rela.plt { ++# RELOC64-NEXT: 0x400E0 R_LARCH_JUMP_SLOT bar 0x0 ++# RELOC64-NEXT: 0x400E8 R_LARCH_JUMP_SLOT weak 0x0 ++# RELOC64-NEXT: } ++# GOTPLT64: section '.got.plt' ++# GOTPLT64-NEXT: 0x000400d0 00000000 00000000 00000000 00000000 ++# GOTPLT64-NEXT: 0x000400e0 20000200 00000000 20000200 00000000 ++ ++# DIS: <_start>: ++## Direct call ++## foo - . = 0x20010-0x20000 = 16 ++# DIS-NEXT: 20000: bl 16 ++## bar@plt - . = 0x20040-0x20004 = 60 ++# DIS-NEXT: 20004: bl 60 ++## bar@plt - . = 0x20040-0x20008 = 56 ++# DIS-NEXT: 20008: bl 56 ++## weak@plt - . = 0x20050-0x2000c = 68 ++# DIS-NEXT: 2000c: bl 68 ++# DIS: : ++# DIS-NEXT: 20010: ++ ++# DIS: Disassembly of section .plt: ++# DIS: <.plt>: ++## 32-bit: .got.plt - .plt = 0x40068 - 0x20020 = 4096*32+72 ++# DIS32-NEXT: pcaddu12i $t2, 32 ++# DIS32-NEXT: sub.w $t1, $t1, $t3 ++# DIS32-NEXT: ld.w $t3, $t2, 72 ++# DIS32-NEXT: addi.w $t1, $t1, -44 ++# DIS32-NEXT: addi.w $t0, $t2, 72 ++# DIS32-NEXT: srli.w $t1, $t1, 2 ++# DIS32-NEXT: ld.w $t0, $t0, 4 ++# DIS32-NEXT: jr $t3 ++ ++## 64-bit: .got.plt - .plt = 0x400d0 - 0x20020 = 4096*32+176 ++# DIS64-NEXT: pcaddu12i $t2, 32 ++# DIS64-NEXT: sub.d $t1, $t1, $t3 ++# DIS64-NEXT: ld.d $t3, $t2, 176 ++# DIS64-NEXT: addi.d $t1, $t1, -44 ++# DIS64-NEXT: addi.d $t0, $t2, 176 ++# DIS64-NEXT: srli.d $t1, $t1, 1 ++# DIS64-NEXT: ld.d $t0, $t0, 8 ++# DIS64-NEXT: jr $t3 ++ ++## 32-bit: &.got.plt[bar]-. = 0x40070-0x20040 = 4096*32+48 ++## 64-bit: &.got.plt[bar]-. = 0x400e0-0x20040 = 4096*32+160 ++# DIS: 20040: pcaddu12i $t3, 32 ++# DIS32-NEXT: ld.w $t3, $t3, 48 ++# DIS64-NEXT: ld.d $t3, $t3, 160 ++# DIS-NEXT: jirl $t1, $t3, 0 ++# DIS-NEXT: nop ++ ++## 32-bit: &.got.plt[weak]-. = 0x40074-0x20050 = 4096*32+36 ++## 64-bit: &.got.plt[weak]-. = 0x400e8-0x20050 = 4096*32+152 ++# DIS: 20050: pcaddu12i $t3, 32 ++# DIS32-NEXT: ld.w $t3, $t3, 36 ++# DIS64-NEXT: ld.d $t3, $t3, 152 ++# DIS-NEXT: jirl $t1, $t3, 0 ++# DIS-NEXT: nop ++ ++.global _start, foo, bar ++.weak weak ++ ++_start: ++ bl foo ++ bl bar ++ bl %plt(bar) ++ bl weak ++ ++## foo is local and non-preemptible, no PLT is generated. ++foo: ++ ret +diff --git a/lld/test/ELF/loongarch-reloc-pic.s b/lld/test/ELF/loongarch-reloc-pic.s +new file mode 100644 +index 000000000000..b23ad55a2523 +--- /dev/null ++++ b/lld/test/ELF/loongarch-reloc-pic.s +@@ -0,0 +1,44 @@ ++# REQUIRES: loongarch ++# RUN: rm -rf %t && split-file %s %t ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/32.s -o %t/32.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/64.s -o %t/64.o ++# RUN: ld.lld -shared %t/32.o -o %t/32.so ++# RUN: llvm-nm %t/32.so | FileCheck --check-prefix=NM32 %s ++# RUN: llvm-readobj -r %t/32.so | FileCheck --check-prefix=RELOC32 %s ++# RUN: ld.lld -shared %t/64.o -o %t/64.so ++# RUN: llvm-nm %t/64.so | FileCheck --check-prefix=NM64 %s ++# RUN: llvm-readobj -r %t/64.so | FileCheck --check-prefix=RELOC64 %s ++ ++## R_LARCH_32 and R_LARCH_64 are absolute relocation types. ++## In PIC mode, they create relative relocations if the symbol is non-preemptable. ++ ++# NM32: 000301fc d b ++# NM64: 00030350 d b ++ ++# RELOC32: .rela.dyn { ++# RELOC32-NEXT: 0x301FC R_LARCH_RELATIVE - 0x301FC ++# RELOC32-NEXT: 0x301F8 R_LARCH_32 a 0 ++# RELOC32-NEXT: } ++# RELOC64: .rela.dyn { ++# RELOC64-NEXT: 0x30350 R_LARCH_RELATIVE - 0x30350 ++# RELOC64-NEXT: 0x30348 R_LARCH_64 a 0 ++# RELOC64-NEXT: } ++ ++#--- 32.s ++.globl a, b ++.hidden b ++ ++.data ++.long a ++b: ++.long b ++ ++#--- 64.s ++.globl a, b ++.hidden b ++ ++.data ++.quad a ++b: ++.quad b +diff --git a/lld/test/ELF/loongarch-tls-gd-edge-case.s b/lld/test/ELF/loongarch-tls-gd-edge-case.s +new file mode 100644 +index 000000000000..9f25f10c73b4 +--- /dev/null ++++ b/lld/test/ELF/loongarch-tls-gd-edge-case.s +@@ -0,0 +1,46 @@ ++# REQUIRES: loongarch ++ ++## Edge case: when a TLS symbol is being accessed in both GD and IE manners, ++## correct reloc behavior should be preserved for both kinds of accesses. ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %s -o %t.la32.o ++# RUN: ld.lld %t.la32.o -shared -o %t.la32 ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s -o %t.la64.o ++# RUN: ld.lld %t.la64.o -shared -o %t.la64 ++ ++# RUN: llvm-readelf -Wr %t.la32 | FileCheck --check-prefix=LA32-REL %s ++# RUN: llvm-objdump -d --no-show-raw-insn %t.la32 | FileCheck --check-prefix=LA32 %s ++ ++# RUN: llvm-readelf -Wr %t.la64 | FileCheck --check-prefix=LA64-REL %s ++# RUN: llvm-objdump -d --no-show-raw-insn %t.la64 | FileCheck --check-prefix=LA64 %s ++ ++# LA32-REL-NOT: R_LARCH_32 ++# LA32-REL: 0002023c 00000206 R_LARCH_TLS_DTPMOD32 00000000 y + 0 ++# LA32-REL-NEXT: 00020240 00000208 R_LARCH_TLS_DTPREL32 00000000 y + 0 ++# LA32-REL-NEXT: 00020244 0000020a R_LARCH_TLS_TPREL32 00000000 y + 0 ++ ++# LA64-REL-NOT: R_LARCH_64 ++# LA64-REL: 00000000000203a0 0000000200000007 R_LARCH_TLS_DTPMOD64 0000000000000000 y + 0 ++# LA64-REL-NEXT: 00000000000203a8 0000000200000009 R_LARCH_TLS_DTPREL64 0000000000000000 y + 0 ++# LA64-REL-NEXT: 00000000000203b0 000000020000000b R_LARCH_TLS_TPREL64 0000000000000000 y + 0 ++ ++# LA32: 101d4: pcalau12i $a0, 16 ++# LA32-NEXT: ld.w $a0, $a0, 580 ++# LA32-NEXT: pcalau12i $a1, 16 ++# LA32-NEXT: addi.w $a1, $a1, 572 ++ ++# LA64: 102e0: pcalau12i $a0, 16 ++# LA64-NEXT: ld.d $a0, $a0, 944 ++# LA64-NEXT: pcalau12i $a1, 16 ++# LA64-NEXT: addi.d $a1, $a1, 928 ++ ++.global _start ++_start: ++la.tls.ie $a0, y # should refer to the GOT entry relocated by the R_LARCH_TLS_TPRELnn record ++la.tls.gd $a1, y # should refer to the GOT entry relocated by the R_LARCH_TLS_DTPMODnn record ++ ++.section .tbss,"awT",@nobits ++.global y ++y: ++.word 0 ++.size y, 4 +diff --git a/lld/test/ELF/loongarch-tls-gd.s b/lld/test/ELF/loongarch-tls-gd.s +new file mode 100644 +index 000000000000..2aecb44c17a3 +--- /dev/null ++++ b/lld/test/ELF/loongarch-tls-gd.s +@@ -0,0 +1,136 @@ ++# REQUIRES: loongarch ++# RUN: rm -rf %t && split-file %s %t ++ ++## LoongArch psABI doesn't specify TLS relaxation. Though the code sequences are not ++## relaxed, dynamic relocations can be omitted for GD->LE relaxation. ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/a.s -o %t/a.32.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/bc.s -o %t/bc.32.o ++# RUN: ld.lld -shared -soname=bc.so %t/bc.32.o -o %t/bc.32.so ++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/tga.s -o %t/tga.32.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/a.s -o %t/a.64.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/bc.s -o %t/bc.64.o ++# RUN: ld.lld -shared -soname=bc.so %t/bc.64.o -o %t/bc.64.so ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/tga.s -o %t/tga.64.o ++ ++## LA32 GD ++# RUN: ld.lld -shared %t/a.32.o %t/bc.32.o -o %t/gd.32.so ++# RUN: llvm-readobj -r %t/gd.32.so | FileCheck --check-prefix=GD32-REL %s ++# RUN: llvm-objdump -d --no-show-raw-insn %t/gd.32.so | FileCheck --check-prefix=GD32 %s ++ ++## LA32 GD -> LE ++# RUN: ld.lld %t/a.32.o %t/bc.32.o %t/tga.32.o -o %t/le.32 ++# RUN: llvm-readelf -r %t/le.32 | FileCheck --check-prefix=NOREL %s ++# RUN: llvm-readelf -x .got %t/le.32 | FileCheck --check-prefix=LE32-GOT %s ++# RUN: ld.lld -pie %t/a.32.o %t/bc.32.o %t/tga.32.o -o %t/le-pie.32 ++# RUN: llvm-readelf -r %t/le-pie.32 | FileCheck --check-prefix=NOREL %s ++# RUN: llvm-readelf -x .got %t/le-pie.32 | FileCheck --check-prefix=LE32-GOT %s ++ ++## LA32 GD -> IE ++# RUN: ld.lld %t/a.32.o %t/bc.32.so %t/tga.32.o -o %t/ie.32 ++# RUN: llvm-readobj -r %t/ie.32 | FileCheck --check-prefix=IE32-REL %s ++# RUN: llvm-readelf -x .got %t/ie.32 | FileCheck --check-prefix=IE32-GOT %s ++ ++## LA64 GD ++# RUN: ld.lld -shared %t/a.64.o %t/bc.64.o -o %t/gd.64.so ++# RUN: llvm-readobj -r %t/gd.64.so | FileCheck --check-prefix=GD64-REL %s ++# RUN: llvm-objdump -d --no-show-raw-insn %t/gd.64.so | FileCheck --check-prefix=GD64 %s ++ ++## LA64 GD -> LE ++# RUN: ld.lld %t/a.64.o %t/bc.64.o %t/tga.64.o -o %t/le.64 ++# RUN: llvm-readelf -r %t/le.64 | FileCheck --check-prefix=NOREL %s ++# RUN: llvm-readelf -x .got %t/le.64 | FileCheck --check-prefix=LE64-GOT %s ++# RUN: ld.lld -pie %t/a.64.o %t/bc.64.o %t/tga.64.o -o %t/le-pie.64 ++# RUN: llvm-readelf -r %t/le-pie.64 | FileCheck --check-prefix=NOREL %s ++# RUN: llvm-readelf -x .got %t/le-pie.64 | FileCheck --check-prefix=LE64-GOT %s ++ ++## LA64 GD -> IE ++# RUN: ld.lld %t/a.64.o %t/bc.64.so %t/tga.64.o -o %t/ie.64 ++# RUN: llvm-readobj -r %t/ie.64 | FileCheck --check-prefix=IE64-REL %s ++# RUN: llvm-readelf -x .got %t/ie.64 | FileCheck --check-prefix=IE64-GOT %s ++ ++# GD32-REL: .rela.dyn { ++# GD32-REL-NEXT: 0x20310 R_LARCH_TLS_DTPMOD32 a 0x0 ++# GD32-REL-NEXT: 0x20314 R_LARCH_TLS_DTPREL32 a 0x0 ++# GD32-REL-NEXT: 0x20318 R_LARCH_TLS_DTPMOD32 b 0x0 ++# GD32-REL-NEXT: 0x2031C R_LARCH_TLS_DTPREL32 b 0x0 ++# GD32-REL-NEXT: } ++ ++## &DTPMOD(a) - . = 0x20310 - 0x10250: 0x10 pages, page offset 0x310 ++# GD32: 10250: pcalau12i $a0, 16 ++# GD32-NEXT: addi.w $a0, $a0, 784 ++# GD32-NEXT: bl 56 ++ ++## &DTPMOD(b) - . = 0x20318 - 0x1025c: 0x10 pages, page offset 0x318 ++# GD32: 1025c: pcalau12i $a0, 16 ++# GD32-NEXT: addi.w $a0, $a0, 792 ++# GD32-NEXT: bl 44 ++ ++# GD64-REL: .rela.dyn { ++# GD64-REL-NEXT: 0x204C0 R_LARCH_TLS_DTPMOD64 a 0x0 ++# GD64-REL-NEXT: 0x204C8 R_LARCH_TLS_DTPREL64 a 0x0 ++# GD64-REL-NEXT: 0x204D0 R_LARCH_TLS_DTPMOD64 b 0x0 ++# GD64-REL-NEXT: 0x204D8 R_LARCH_TLS_DTPREL64 b 0x0 ++# GD64-REL-NEXT: } ++ ++## &DTPMOD(a) - . = 0x204c0 - 0x10398: 0x10 pages, page offset 0x4c0 ++# GD64: 10398: pcalau12i $a0, 16 ++# GD64-NEXT: addi.d $a0, $a0, 1216 ++# GD64-NEXT: bl 48 ++ ++## &DTPMOD(b) - . = 0x204d0 - 0x103a4: 0x10 pages, page offset 0x4d0 ++# GD64: 103a4: pcalau12i $a0, 16 ++# GD64-NEXT: addi.d $a0, $a0, 1232 ++# GD64-NEXT: bl 36 ++ ++# NOREL: no relocations ++ ++## .got contains pre-populated values: [a@dtpmod, a@dtprel, b@dtpmod, b@dtprel] ++## a@dtprel = st_value(a) = 0x8 ++## b@dtprel = st_value(b) = 0xc ++# LE32-GOT: section '.got': ++# LE32-GOT-NEXT: 0x[[#%x,A:]] 01000000 08000000 01000000 0c000000 ++# LE64-GOT: section '.got': ++# LE64-GOT-NEXT: 0x[[#%x,A:]] 01000000 00000000 08000000 00000000 ++# LE64-GOT-NEXT: 0x[[#%x,A:]] 01000000 00000000 0c000000 00000000 ++ ++## a is local - relaxed to LE - its DTPMOD/DTPREL slots are link-time constants. ++## b is external - DTPMOD/DTPREL dynamic relocations are required. ++# IE32-REL: .rela.dyn { ++# IE32-REL-NEXT: 0x30228 R_LARCH_TLS_DTPMOD32 b 0x0 ++# IE32-REL-NEXT: 0x3022C R_LARCH_TLS_DTPREL32 b 0x0 ++# IE32-REL-NEXT: } ++# IE32-GOT: section '.got': ++# IE32-GOT-NEXT: 0x00030220 01000000 08000000 00000000 00000000 ++ ++# IE64-REL: .rela.dyn { ++# IE64-REL-NEXT: 0x30388 R_LARCH_TLS_DTPMOD64 b 0x0 ++# IE64-REL-NEXT: 0x30390 R_LARCH_TLS_DTPREL64 b 0x0 ++# IE64-REL-NEXT: } ++# IE64-GOT: section '.got': ++# IE64-GOT-NEXT: 0x00030378 01000000 00000000 08000000 00000000 ++# IE64-GOT-NEXT: 0x00030388 00000000 00000000 00000000 00000000 ++ ++#--- a.s ++la.tls.gd $a0, a ++bl %plt(__tls_get_addr) ++ ++la.tls.gd $a0, b ++bl %plt(__tls_get_addr) ++ ++.section .tbss,"awT",@nobits ++.globl a ++.zero 8 ++a: ++.zero 4 ++ ++#--- bc.s ++.section .tbss,"awT",@nobits ++.globl b, c ++b: ++.zero 4 ++c: ++ ++#--- tga.s ++.globl __tls_get_addr ++__tls_get_addr: +diff --git a/lld/test/ELF/loongarch-tls-ie.s b/lld/test/ELF/loongarch-tls-ie.s +new file mode 100644 +index 000000000000..78c207991b4e +--- /dev/null ++++ b/lld/test/ELF/loongarch-tls-ie.s +@@ -0,0 +1,114 @@ ++# REQUIRES: loongarch ++# RUN: rm -rf %t && split-file %s %t ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/32.s -o %t/32.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/64.s -o %t/64.o ++ ++## LA32 IE ++# RUN: ld.lld -shared %t/32.o -o %t/32.so ++# RUN: llvm-readobj -r -d %t/32.so | FileCheck --check-prefix=IE32-REL %s ++# RUN: llvm-objdump -d --no-show-raw-insn %t/32.so | FileCheck --check-prefixes=IE32 %s ++ ++## LA32 IE -> LE ++# RUN: ld.lld %t/32.o -o %t/32 ++# RUN: llvm-readelf -r %t/32 | FileCheck --check-prefix=NOREL %s ++# RUN: llvm-readelf -x .got %t/32 | FileCheck --check-prefix=LE32-GOT %s ++# RUN: llvm-objdump -d --no-show-raw-insn %t/32 | FileCheck --check-prefixes=LE32 %s ++ ++## LA64 IE ++# RUN: ld.lld -shared %t/64.o -o %t/64.so ++# RUN: llvm-readobj -r -d %t/64.so | FileCheck --check-prefix=IE64-REL %s ++# RUN: llvm-objdump -d --no-show-raw-insn %t/64.so | FileCheck --check-prefixes=IE64 %s ++ ++## LA64 IE -> LE ++# RUN: ld.lld %t/64.o -o %t/64 ++# RUN: llvm-readelf -r %t/64 | FileCheck --check-prefix=NOREL %s ++# RUN: llvm-readelf -x .got %t/64 | FileCheck --check-prefix=LE64-GOT %s ++# RUN: llvm-objdump -d --no-show-raw-insn %t/64 | FileCheck --check-prefixes=LE64 %s ++ ++# IE32-REL: FLAGS STATIC_TLS ++# IE32-REL: .rela.dyn { ++# IE32-REL-NEXT: 0x20218 R_LARCH_TLS_TPREL32 - 0xC ++# IE32-REL-NEXT: 0x20214 R_LARCH_TLS_TPREL32 a 0x0 ++# IE32-REL-NEXT: } ++ ++# IE64-REL: FLAGS STATIC_TLS ++# IE64-REL: .rela.dyn { ++# IE64-REL-NEXT: 0x20370 R_LARCH_TLS_TPREL64 - 0xC ++# IE64-REL-NEXT: 0x20368 R_LARCH_TLS_TPREL64 a 0x0 ++# IE64-REL-NEXT: } ++ ++## LA32: ++## &.got[0] - . = 0x20214 - 0x101a4: 0x10 pages, page offset 0x214 ++## &.got[1] - . = 0x20218 - 0x101b0: 0x10 pages, page offset 0x218 ++# IE32: 101a4: pcalau12i $a4, 16 ++# IE32-NEXT: ld.w $a4, $a4, 532 ++# IE32-NEXT: add.w $a4, $a4, $tp ++# IE32-NEXT: 101b0: pcalau12i $a5, 16 ++# IE32-NEXT: ld.w $a5, $a5, 536 ++# IE32-NEXT: add.w $a5, $a5, $tp ++ ++## LA64: ++## &.got[0] - . = 0x20368 - 0x102a0: 0x10 pages, page offset 0x368 ++## &.got[1] - . = 0x20370 - 0x102ac: 0x10 pages, page offset 0x370 ++# IE64: 102a0: pcalau12i $a4, 16 ++# IE64-NEXT: ld.d $a4, $a4, 872 ++# IE64-NEXT: add.d $a4, $a4, $tp ++# IE64-NEXT: 102ac: pcalau12i $a5, 16 ++# IE64-NEXT: ld.d $a5, $a5, 880 ++# IE64-NEXT: add.d $a5, $a5, $tp ++ ++# NOREL: no relocations ++ ++# a@tprel = st_value(a) = 0x8 ++# b@tprel = st_value(a) = 0xc ++# LE32-GOT: section '.got': ++# LE32-GOT-NEXT: 0x0003012c 08000000 0c000000 ++# LE64-GOT: section '.got': ++# LE64-GOT-NEXT: 0x000301e0 08000000 00000000 0c000000 00000000 ++ ++## LA32: ++## &.got[0] - . = 0x3012c - 0x20114: 0x10 pages, page offset 0x12c ++## &.got[1] - . = 0x30130 - 0x20120: 0x10 pages, page offset 0x130 ++# LE32: 20114: pcalau12i $a4, 16 ++# LE32-NEXT: ld.w $a4, $a4, 300 ++# LE32-NEXT: add.w $a4, $a4, $tp ++# LE32-NEXT: 20120: pcalau12i $a5, 16 ++# LE32-NEXT: ld.w $a5, $a5, 304 ++# LE32-NEXT: add.w $a5, $a5, $tp ++ ++## LA64: ++## &.got[0] - . = 0x301e0 - 0x201c8: 0x10 pages, page offset 0x1e0 ++## &.got[1] - . = 0x301e8 - 0x201d4: 0x10 pages, page offset 0x1e8 ++# LE64: 201c8: pcalau12i $a4, 16 ++# LE64-NEXT: ld.d $a4, $a4, 480 ++# LE64-NEXT: add.d $a4, $a4, $tp ++# LE64-NEXT: 201d4: pcalau12i $a5, 16 ++# LE64-NEXT: ld.d $a5, $a5, 488 ++# LE64-NEXT: add.d $a5, $a5, $tp ++ ++#--- 32.s ++la.tls.ie $a4, a ++add.w $a4, $a4, $tp ++la.tls.ie $a5, b ++add.w $a5, $a5, $tp ++ ++.section .tbss,"awT",@nobits ++.globl a ++.zero 8 ++a: ++.zero 4 ++b: ++ ++#--- 64.s ++la.tls.ie $a4, a ++add.d $a4, $a4, $tp ++la.tls.ie $a5, b ++add.d $a5, $a5, $tp ++ ++.section .tbss,"awT",@nobits ++.globl a ++.zero 8 ++a: ++.zero 4 ++b: +diff --git a/lld/test/ELF/loongarch-tls-ld.s b/lld/test/ELF/loongarch-tls-ld.s +new file mode 100644 +index 000000000000..a5be3ad905b7 +--- /dev/null ++++ b/lld/test/ELF/loongarch-tls-ld.s +@@ -0,0 +1,89 @@ ++# REQUIRES: loongarch ++# RUN: rm -rf %t && split-file %s %t ++ ++## LoongArch psABI doesn't specify TLS relaxation. Though the code sequences are not ++## relaxed, dynamic relocations can be omitted for LD->LE relaxation. ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch32 --position-independent %t/a.s -o %t/a.32.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/tga.s -o %t/tga.32.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 --position-independent %t/a.s -o %t/a.64.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/tga.s -o %t/tga.64.o ++ ++## LA32 LD ++# RUN: ld.lld -shared %t/a.32.o -o %t/ld.32.so ++# RUN: llvm-readobj -r %t/ld.32.so | FileCheck --check-prefix=LD32-REL %s ++# RUN: llvm-readelf -x .got %t/ld.32.so | FileCheck --check-prefix=LD32-GOT %s ++# RUN: llvm-objdump -d --no-show-raw-insn %t/ld.32.so | FileCheck --check-prefixes=LD32 %s ++ ++## LA32 LD -> LE ++# RUN: ld.lld %t/a.32.o %t/tga.32.o -o %t/le.32 ++# RUN: llvm-readelf -r %t/le.32 | FileCheck --check-prefix=NOREL %s ++# RUN: llvm-readelf -x .got %t/le.32 | FileCheck --check-prefix=LE32-GOT %s ++# RUN: llvm-objdump -d --no-show-raw-insn %t/le.32 | FileCheck --check-prefixes=LE32 %s ++ ++## LA64 LD ++# RUN: ld.lld -shared %t/a.64.o -o %t/ld.64.so ++# RUN: llvm-readobj -r %t/ld.64.so | FileCheck --check-prefix=LD64-REL %s ++# RUN: llvm-readelf -x .got %t/ld.64.so | FileCheck --check-prefix=LD64-GOT %s ++# RUN: llvm-objdump -d --no-show-raw-insn %t/ld.64.so | FileCheck --check-prefixes=LD64 %s ++ ++## LA64 LD -> LE ++# RUN: ld.lld %t/a.64.o %t/tga.64.o -o %t/le.64 ++# RUN: llvm-readelf -r %t/le.64 | FileCheck --check-prefix=NOREL %s ++# RUN: llvm-readelf -x .got %t/le.64 | FileCheck --check-prefix=LE64-GOT %s ++# RUN: llvm-objdump -d --no-show-raw-insn %t/le.64 | FileCheck --check-prefixes=LE64 %s ++ ++## a@dtprel = st_value(a) = 0 is a link-time constant. ++# LD32-REL: .rela.dyn { ++# LD32-REL-NEXT: 0x20280 R_LARCH_TLS_DTPMOD32 - 0x0 ++# LD32-REL-NEXT: } ++# LD32-GOT: section '.got': ++# LD32-GOT-NEXT: 0x00020280 00000000 00000000 ++ ++# LD64-REL: .rela.dyn { ++# LD64-REL-NEXT: 0x20400 R_LARCH_TLS_DTPMOD64 - 0x0 ++# LD64-REL-NEXT: } ++# LD64-GOT: section '.got': ++# LD64-GOT-NEXT: 0x00020400 00000000 00000000 00000000 00000000 ++ ++## LA32: &DTPMOD(a) - . = 0x20280 - 0x101cc: 0x10 pages, page offset 0x280 ++# LD32: 101cc: pcalau12i $a0, 16 ++# LD32-NEXT: addi.w $a0, $a0, 640 ++# LD32-NEXT: bl 44 ++ ++## LA64: &DTPMOD(a) - . = 0x20400 - 0x102e0: 0x10 pages, page offset 0x400 ++# LD64: 102e0: pcalau12i $a0, 16 ++# LD64-NEXT: addi.d $a0, $a0, 1024 ++# LD64-NEXT: bl 40 ++ ++# NOREL: no relocations ++ ++## a is local - its DTPMOD/DTPREL slots are link-time constants. ++## a@dtpmod = 1 (main module) ++# LE32-GOT: section '.got': ++# LE32-GOT-NEXT: 0x00030120 01000000 00000000 ++ ++# LE64-GOT: section '.got': ++# LE64-GOT-NEXT: 0x000301d8 01000000 00000000 00000000 00000000 ++ ++## LA32: DTPMOD(.LANCHOR0) - . = 0x30120 - 0x20114: 0x10 pages, page offset 0x120 ++# LE32: 20114: pcalau12i $a0, 16 ++# LE32-NEXT: addi.w $a0, $a0, 288 ++# LE32-NEXT: bl 4 ++ ++## LA64: DTPMOD(.LANCHOR0) - . = 0x301d8 - 0x201c8: 0x10 pages, page offset 0x1d8 ++# LE64: 201c8: pcalau12i $a0, 16 ++# LE64-NEXT: addi.d $a0, $a0, 472 ++# LE64-NEXT: bl 4 ++ ++#--- a.s ++la.tls.ld $a0, .LANCHOR0 ++bl %plt(__tls_get_addr) ++ ++.section .tbss,"awT",@nobits ++.set .LANCHOR0, . + 0 ++.zero 8 ++ ++#--- tga.s ++.globl __tls_get_addr ++__tls_get_addr: +diff --git a/lld/test/ELF/loongarch-tls-le.s b/lld/test/ELF/loongarch-tls-le.s +new file mode 100644 +index 000000000000..a20d7d83bae3 +--- /dev/null ++++ b/lld/test/ELF/loongarch-tls-le.s +@@ -0,0 +1,42 @@ ++# REQUIRES: loongarch ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch32 %s -o %t.32.o ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s -o %t.64.o ++ ++# RUN: ld.lld %t.32.o -o %t.32 ++# RUN: llvm-nm -p %t.32 | FileCheck --check-prefixes=NM %s ++# RUN: llvm-objdump -d --no-show-raw-insn %t.32 | FileCheck --check-prefixes=LE %s ++ ++# RUN: ld.lld %t.64.o -o %t.64 ++# RUN: llvm-objdump -d --no-show-raw-insn %t.64 | FileCheck --check-prefixes=LE %s ++ ++# RUN: not ld.lld -shared %t.32.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error: ++ ++# ERR: error: relocation R_LARCH_TLS_LE_HI20 against .LANCHOR0 cannot be used with -shared ++# ERR: error: relocation R_LARCH_TLS_LE_LO12 against .LANCHOR0 cannot be used with -shared ++# ERR: error: relocation R_LARCH_TLS_LE_HI20 against a cannot be used with -shared ++# ERR: error: relocation R_LARCH_TLS_LE_LO12 against a cannot be used with -shared ++ ++# NM: {{0*}}00000008 b .LANCHOR0 ++# NM: {{0*}}00000800 B a ++ ++## .LANCHOR0@tprel = 8 ++## a@tprel = 0x800 ++# LE: lu12i.w $a0, 0 ++# LE-NEXT: ori $a0, $a0, 8 ++# LE-NEXT: lu12i.w $a1, 0 ++# LE-NEXT: ori $a1, $a1, 2048 ++# LE-EMPTY: ++ ++.text ++_start: ++la.tls.le $a0, .LANCHOR0 ++la.tls.le $a1, a ++ ++.section .tbss,"awT",@nobits ++.space 8 ++.LANCHOR0: ++.space 0x800-8 ++.globl a ++a: ++.zero 4 +diff --git a/lld/test/lit.cfg.py b/lld/test/lit.cfg.py +index 96a1d652573f..4b84cfd93dc5 100644 +--- a/lld/test/lit.cfg.py ++++ b/lld/test/lit.cfg.py +@@ -63,20 +63,28 @@ if platform.system() not in ['Windows']: + config.available_features.add('demangler') + + llvm_config.feature_config( +- [('--targets-built', {'AArch64': 'aarch64', +- 'AMDGPU': 'amdgpu', +- 'ARM': 'arm', +- 'AVR': 'avr', +- 'Hexagon': 'hexagon', +- 'Mips': 'mips', +- 'MSP430': 'msp430', +- 'PowerPC': 'ppc', +- 'RISCV': 'riscv', +- 'Sparc': 'sparc', +- 'WebAssembly': 'wasm', +- 'X86': 'x86'}), +- ('--assertion-mode', {'ON': 'asserts'}), +- ]) ++ [ ++ ( ++ "--targets-built", ++ { ++ "AArch64": "aarch64", ++ "AMDGPU": "amdgpu", ++ "ARM": "arm", ++ "AVR": "avr", ++ "Hexagon": "hexagon", ++ "LoongArch": "loongarch", ++ "Mips": "mips", ++ "MSP430": "msp430", ++ "PowerPC": "ppc", ++ "RISCV": "riscv", ++ "Sparc": "sparc", ++ "WebAssembly": "wasm", ++ "X86": "x86", ++ }, ++ ), ++ ("--assertion-mode", {"ON": "asserts"}), ++ ] ++) + + # Set a fake constant version so that we get consistent output. + config.environment['LLD_VERSION'] = 'LLD 1.0' diff --git a/llvm/PKGBUILD b/llvm/PKGBUILD index e53aec9141..d4be161983 100644 --- a/llvm/PKGBUILD +++ b/llvm/PKGBUILD @@ -14,13 +14,17 @@ options=('staticlibs' '!lto') # https://github.com/llvm/llvm-project/issues/5774 _source_base=https://github.com/llvm/llvm-project/releases/download/llvmorg-$pkgver source=($_source_base/llvm-$pkgver.src.tar.xz{,.sig} $_source_base/cmake-$pkgver.src.tar.xz{,.sig} - $_source_base/third-party-$pkgver.src.tar.xz{,.sig}) + $_source_base/third-party-$pkgver.src.tar.xz{,.sig} + llvm-newreloc-la64.patch + RuntimeDyld-MCJIT-Add-LoongArch-support.patch) sha256sums=('e91db44d1b3bb1c33fcea9a7d1f2423b883eaa9163d3d56ca2aa6d2f0711bc29' 'SKIP' '39d342a4161095d2f28fb1253e4585978ac50521117da666e2b1f6f28b62f514' 'SKIP' '15f5b9aeeba938530af977d5f9205612737a091a7f0f6c8075df8723b7713f70' - 'SKIP') + 'SKIP' + 'f26ad05b93f5b7918fcf4209e892d135029e58ca0f0141a50cb67bfec7e80061' + '7c7de135adc059b5877270c38e050e02c516c6cb04cd4b10f1111d956e0c82e0') validpgpkeys=('474E22316ABF4785A88C6E8EA2C794A986419D8A' # Tom Stellard 'D574BD5D1D0E98895E3BF90044F2485E45D59042') # Tobias Hieta @@ -55,6 +59,8 @@ _get_distribution_components() { prepare() { rename -v -- "-$pkgver.src" '' {cmake,third-party}-$pkgver.src cd llvm-$pkgver.src + patch -Np1 -i ../RuntimeDyld-MCJIT-Add-LoongArch-support.patch + patch -Np2 -i ../llvm-newreloc-la64.patch mkdir build } diff --git a/llvm/RuntimeDyld-MCJIT-Add-LoongArch-support.patch b/llvm/RuntimeDyld-MCJIT-Add-LoongArch-support.patch new file mode 100644 index 0000000000..6b03eebd59 --- /dev/null +++ b/llvm/RuntimeDyld-MCJIT-Add-LoongArch-support.patch @@ -0,0 +1,328 @@ +From cb5f5e13300712b5dde6cfcafab9f7f465096c18 Mon Sep 17 00:00:00 2001 +From: wanglei +Date: Tue, 30 May 2023 19:16:18 +0800 +Subject: [PATCH] [RuntimeDyld][MCJIT] Add LoongArch support + +--- + lib/ExecutionEngine/Orc/LLJIT.cpp | 3 +- + .../RuntimeDyld/RuntimeDyld.cpp | 12 ++ + .../RuntimeDyld/RuntimeDyldELF.cpp | 198 ++++++++++++++++++ + .../RuntimeDyld/RuntimeDyldELF.h | 14 ++ + 4 files changed, 226 insertions(+), 1 deletion(-) + +diff --git a/lib/ExecutionEngine/Orc/LLJIT.cpp b/lib/ExecutionEngine/Orc/LLJIT.cpp +index bc84988e..e06dea9d 100644 +--- a/lib/ExecutionEngine/Orc/LLJIT.cpp ++++ b/lib/ExecutionEngine/Orc/LLJIT.cpp +@@ -802,7 +802,8 @@ LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) { + + if (S.JTMB->getTargetTriple().isOSBinFormatELF() && + (S.JTMB->getTargetTriple().getArch() == Triple::ArchType::ppc64 || +- S.JTMB->getTargetTriple().getArch() == Triple::ArchType::ppc64le)) ++ S.JTMB->getTargetTriple().getArch() == Triple::ArchType::ppc64le || ++ S.JTMB->getTargetTriple().getArch() == Triple::ArchType::loongarch64)) + Layer->setAutoClaimResponsibilityForObjectSymbols(true); + + // FIXME: Explicit conversion to std::unique_ptr added to silence +diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +index a9aaff42..b154ea28 100644 +--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp ++++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +@@ -987,6 +987,18 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr, + // and stubs for branches Thumb - ARM and ARM - Thumb. + writeBytesUnaligned(0xe51ff004, Addr, 4); // ldr pc, [pc, #-4] + return Addr + 4; ++ } else if (Arch == Triple::loongarch64) { ++ // lu12i.w $t0, %abs_hi20(addr) ++ // ori $t0, $t0, %abs_lo12(addr) ++ // lu32i.d $t0, %abs64_lo20(addr) ++ // lu52i.d $t0, $t0, %abs64_lo12(addr) ++ // jr $t0 ++ writeBytesUnaligned(0x1400000c, Addr, 4); ++ writeBytesUnaligned(0x0380018c, Addr + 4, 4); ++ writeBytesUnaligned(0x1600000c, Addr + 8, 4); ++ writeBytesUnaligned(0x0300018c, Addr + 12, 4); ++ writeBytesUnaligned(0x4c000180, Addr + 16, 4); ++ return Addr; + } else if (IsMipsO32ABI || IsMipsN32ABI) { + // 0: 3c190000 lui t9,%hi(addr). + // 4: 27390000 addiu t9,t9,%lo(addr). +diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +index 2fe49fef..f85452be 100644 +--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp ++++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +@@ -641,6 +641,102 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section, + } + } + ++// Returns extract bits Val[Hi:Lo]. ++static inline uint32_t extractBits(uint32_t Val, unsigned Hi, unsigned Lo) { ++ return (Val & (((1UL << (Hi + 1)) - 1))) >> Lo; ++} ++ ++void RuntimeDyldELF::resolveLoongArch64Relocation(const SectionEntry &Section, ++ uint64_t Offset, ++ uint64_t Value, uint32_t Type, ++ int64_t Addend) { ++ uint32_t *TargetPtr = ++ reinterpret_cast(Section.getAddressWithOffset(Offset)); ++ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); ++ ++ LLVM_DEBUG(dbgs() << "resolveLoongArch64Relocation, LocalAddress: 0x" ++ << format("%llx", Section.getAddressWithOffset(Offset)) ++ << " FinalAddress: 0x" << format("%llx", FinalAddress) ++ << " Value: 0x" << format("%llx", Value) << " Type: 0x" ++ << format("%x", Type) << " Addend: 0x" ++ << format("%llx", Addend) << "\n"); ++ ++ switch (Type) { ++ default: ++ report_fatal_error("Relocation type not implemented yet!"); ++ break; ++ case ELF::R_LARCH_32: ++ *(support::little32_t *)TargetPtr = static_cast(Value + Addend); ++ break; ++ case ELF::R_LARCH_64: ++ *(support::little64_t *)TargetPtr = Value + Addend; ++ break; ++ case ELF::R_LARCH_32_PCREL: ++ *(support::little32_t *)TargetPtr = ++ static_cast(Value - FinalAddress + Addend); ++ break; ++ case ELF::R_LARCH_B26: { ++ uint64_t BranchImm = Value - FinalAddress + Addend; ++ assert(isInt<28>(BranchImm)); ++ uint32_t RawInstr = *(support::little32_t *)TargetPtr; ++ uint32_t Imm = static_cast(BranchImm >> 2); ++ uint32_t Imm15_0 = extractBits(Imm, /*Hi=*/15, /*Lo=*/0) << 10; ++ uint32_t Imm25_16 = extractBits(Imm, /*Hi=*/25, /*Lo=*/16); ++ *(support::little32_t *)TargetPtr = RawInstr | Imm15_0 | Imm25_16; ++ break; ++ } ++ case ELF::R_LARCH_GOT_PC_HI20: ++ case ELF::R_LARCH_PCALA_HI20: { ++ uint64_t Target = Value + Addend; ++ uint64_t TargetPage = ++ (Target + (Target & 0x800)) & ~static_cast(0xfff); ++ uint64_t PCPage = FinalAddress & ~static_cast(0xfff); ++ int64_t PageDelta = TargetPage - PCPage; ++ uint32_t RawInstr = *(support::little32_t *)TargetPtr; ++ uint32_t Imm31_12 = extractBits(PageDelta, /*Hi=*/31, /*Lo=*/12) << 5; ++ *(support::little32_t *)TargetPtr = RawInstr | Imm31_12; ++ break; ++ } ++ case ELF::R_LARCH_GOT_PC_LO12: ++ case ELF::R_LARCH_PCALA_LO12: { ++ // TODO: code-model=medium ++ uint64_t TargetOffset = (Value + Addend) & 0xfff; ++ uint32_t RawInstr = *(support::little32_t *)TargetPtr; ++ uint32_t Imm11_0 = TargetOffset << 10; ++ *(support::little32_t *)TargetPtr = RawInstr | Imm11_0; ++ break; ++ } ++ case ELF::R_LARCH_ABS_HI20: { ++ uint64_t Target = Value + Addend; ++ uint32_t RawInstr = *(support::little32_t *)TargetPtr; ++ uint32_t Imm31_12 = extractBits(Target, /*Hi=*/31, /*Lo=*/12) << 5; ++ *(support::little32_t *)TargetPtr = RawInstr | Imm31_12; ++ break; ++ } ++ case ELF::R_LARCH_ABS_LO12: { ++ uint64_t Target = Value + Addend; ++ uint32_t RawInstr = *(support::little32_t *)TargetPtr; ++ uint32_t Imm11_0 = extractBits(Target, /*Hi=*/11, /*Lo=*/0) << 10; ++ *(support::little32_t *)TargetPtr = RawInstr | Imm11_0; ++ break; ++ } ++ case ELF::R_LARCH_ABS64_LO20: { ++ uint64_t Target = Value + Addend; ++ uint32_t RawInstr = *(support::little32_t *)TargetPtr; ++ uint32_t Imm51_32 = extractBits(Target >> 32, /*Hi=*/19, /*Lo=*/0) << 5; ++ *(support::little32_t *)TargetPtr = RawInstr | Imm51_32; ++ break; ++ } ++ case ELF::R_LARCH_ABS64_HI12: { ++ uint64_t Target = Value + Addend; ++ uint32_t RawInstr = *(support::little32_t *)TargetPtr; ++ uint32_t Imm63_52 = extractBits(Target >> 32, /*Hi=*/31, /*Lo=*/20) << 5; ++ *(support::little32_t *)TargetPtr = RawInstr | Imm63_52; ++ break; ++ } ++ } ++} ++ + void RuntimeDyldELF::setMipsABI(const ObjectFile &Obj) { + if (Arch == Triple::UnknownArch || + !StringRef(Triple::getArchTypePrefix(Arch)).equals("mips")) { +@@ -1057,6 +1153,9 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, + resolveARMRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL), Type, + (uint32_t)(Addend & 0xffffffffL)); + break; ++ case Triple::loongarch64: ++ resolveLoongArch64Relocation(Section, Offset, Value, Type, Addend); ++ break; + case Triple::ppc: // Fall through. + case Triple::ppcle: + resolvePPC32Relocation(Section, Offset, Value, Type, Addend); +@@ -1209,6 +1308,81 @@ void RuntimeDyldELF::resolveAArch64Branch(unsigned SectionID, + } + } + ++bool RuntimeDyldELF::resolveLoongArch64ShortBranch( ++ unsigned SectionID, relocation_iterator RelI, ++ const RelocationValueRef &Value) { ++ uint64_t Address; ++ if (Value.SymbolName) { ++ auto Loc = GlobalSymbolTable.find(Value.SymbolName); ++ // Don't create direct branch for external symbols. ++ if (Loc == GlobalSymbolTable.end()) ++ return false; ++ const auto &SymInfo = Loc->second; ++ Address = ++ uint64_t(Sections[SymInfo.getSectionID()].getLoadAddressWithOffset( ++ SymInfo.getOffset())); ++ } else { ++ Address = uint64_t(Sections[Value.SectionID].getLoadAddress()); ++ } ++ uint64_t Offset = RelI->getOffset(); ++ uint64_t SourceAddress = Sections[SectionID].getLoadAddressWithOffset(Offset); ++ if (!isInt<28>(Address + Value.Addend - SourceAddress)) ++ return false; ++ resolveRelocation(Sections[SectionID], Offset, Address, RelI->getType(), ++ Value.Addend); ++ return true; ++} ++ ++void RuntimeDyldELF::resolveLoongArch64Branch(unsigned SectionID, ++ const RelocationValueRef &Value, ++ relocation_iterator RelI, ++ StubMap &Stubs) { ++ LLVM_DEBUG(dbgs() << "\t\tThis is an LoongArch64 branch relocation.\n"); ++ SectionEntry &Section = Sections[SectionID]; ++ uint64_t Offset = RelI->getOffset(); ++ unsigned RelType = RelI->getType(); ++ // Look for an existing stub. ++ StubMap::const_iterator i = Stubs.find(Value); ++ if (i != Stubs.end()) { ++ resolveRelocation(Section, Offset, ++ (uint64_t)Section.getAddressWithOffset(i->second), ++ RelType, 0); ++ LLVM_DEBUG(dbgs() << " Stub function found\n"); ++ } else if (!resolveLoongArch64ShortBranch(SectionID, RelI, Value)) { ++ // Create a new stub function. ++ LLVM_DEBUG(dbgs() << " Create a new stub function\n"); ++ Stubs[Value] = Section.getStubOffset(); ++ uint8_t *StubTargetAddr = createStubFunction( ++ Section.getAddressWithOffset(Section.getStubOffset())); ++ RelocationEntry LU12I_W(SectionID, StubTargetAddr - Section.getAddress(), ++ ELF::R_LARCH_ABS_HI20, Value.Addend); ++ RelocationEntry ORI(SectionID, StubTargetAddr - Section.getAddress() + 4, ++ ELF::R_LARCH_ABS_LO12, Value.Addend); ++ RelocationEntry LU32I_D(SectionID, ++ StubTargetAddr - Section.getAddress() + 8, ++ ELF::R_LARCH_ABS64_LO20, Value.Addend); ++ RelocationEntry LU52I_D(SectionID, ++ StubTargetAddr - Section.getAddress() + 12, ++ ELF::R_LARCH_ABS64_HI12, Value.Addend); ++ if (Value.SymbolName) { ++ addRelocationForSymbol(LU12I_W, Value.SymbolName); ++ addRelocationForSymbol(ORI, Value.SymbolName); ++ addRelocationForSymbol(LU32I_D, Value.SymbolName); ++ addRelocationForSymbol(LU52I_D, Value.SymbolName); ++ } else { ++ addRelocationForSection(LU12I_W, Value.SectionID); ++ addRelocationForSection(ORI, Value.SectionID); ++ addRelocationForSection(LU32I_D, Value.SectionID); ++ addRelocationForSection(LU52I_D, Value.SectionID); ++ } ++ resolveRelocation(Section, Offset, ++ reinterpret_cast(Section.getAddressWithOffset( ++ Section.getStubOffset())), ++ RelType, 0); ++ Section.advanceStubOffset(getMaxStubSize()); ++ } ++} ++ + Expected + RuntimeDyldELF::processRelocationRef( + unsigned SectionID, relocation_iterator RelI, const ObjectFile &O, +@@ -1369,6 +1543,25 @@ RuntimeDyldELF::processRelocationRef( + } + processSimpleRelocation(SectionID, Offset, RelType, Value); + } ++ } else if (Arch == Triple::loongarch64) { ++ if (RelType == ELF::R_LARCH_B26 && MemMgr.allowStubAllocation()) { ++ resolveLoongArch64Branch(SectionID, Value, RelI, Stubs); ++ } else if (RelType == ELF::R_LARCH_GOT_PC_HI20 || ++ RelType == ELF::R_LARCH_GOT_PC_LO12) { ++ // FIXME: This will create redundant got entry. ++ uint64_t GOTOffset = allocateGOTEntries(1); ++ // Create relocation for newly created GOT entry. ++ RelocationEntry RE = ++ computeGOTOffsetRE(GOTOffset, Value.Offset, ELF::R_LARCH_64); ++ if (Value.SymbolName) ++ addRelocationForSymbol(RE, Value.SymbolName); ++ else ++ addRelocationForSection(RE, Value.SectionID); ++ resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend, ++ RelType); ++ } else { ++ processSimpleRelocation(SectionID, Offset, RelType, Value); ++ } + } else if (IsMipsO32ABI) { + uint8_t *Placeholder = reinterpret_cast( + computePlaceholderAddress(SectionID, Offset)); +@@ -2214,6 +2407,7 @@ size_t RuntimeDyldELF::getGOTEntrySize() { + case Triple::x86_64: + case Triple::aarch64: + case Triple::aarch64_be: ++ case Triple::loongarch64: + case Triple::ppc64: + case Triple::ppc64le: + case Triple::systemz: +@@ -2525,6 +2719,10 @@ bool RuntimeDyldELF::relocationNeedsGot(const RelocationRef &R) const { + return RelTy == ELF::R_AARCH64_ADR_GOT_PAGE || + RelTy == ELF::R_AARCH64_LD64_GOT_LO12_NC; + ++ if (Arch == Triple::loongarch64) ++ return RelTy == ELF::R_LARCH_GOT_PC_HI20 || ++ RelTy == ELF::R_LARCH_GOT_PC_LO12; ++ + if (Arch == Triple::x86_64) + return RelTy == ELF::R_X86_64_GOTPCREL || + RelTy == ELF::R_X86_64_GOTPCRELX || +diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +index dfdd98cb..2c930219 100644 +--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h ++++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +@@ -48,6 +48,18 @@ class RuntimeDyldELF : public RuntimeDyldImpl { + void resolveARMRelocation(const SectionEntry &Section, uint64_t Offset, + uint32_t Value, uint32_t Type, int32_t Addend); + ++ void resolveLoongArch64Relocation(const SectionEntry &Section, ++ uint64_t Offset, uint64_t Value, ++ uint32_t Type, int64_t Addend); ++ ++ bool resolveLoongArch64ShortBranch(unsigned SectionID, ++ relocation_iterator RelI, ++ const RelocationValueRef &Value); ++ ++ void resolveLoongArch64Branch(unsigned SectionID, ++ const RelocationValueRef &Value, ++ relocation_iterator RelI, StubMap &Stubs); ++ + void resolvePPC32Relocation(const SectionEntry &Section, uint64_t Offset, + uint64_t Value, uint32_t Type, int64_t Addend); + +@@ -69,6 +81,8 @@ class RuntimeDyldELF : public RuntimeDyldImpl { + return 16; + else if (IsMipsN64ABI) + return 32; ++ if (Arch == Triple::loongarch64) ++ return 20; // lu12i.w; ori; lu32i.d; lu52i.d; jr + else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) + return 44; + else if (Arch == Triple::x86_64) +-- +2.40.0 + diff --git a/llvm/llvm-newreloc-la64.patch b/llvm/llvm-newreloc-la64.patch new file mode 100644 index 0000000000..60a5e8f07e --- /dev/null +++ b/llvm/llvm-newreloc-la64.patch @@ -0,0 +1,132 @@ +diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def +index 67dbd020140b..02bce3c71712 100644 +--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def ++++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def +@@ -103,3 +103,18 @@ ELF_RELOC(R_LARCH_TLS_GD_PC_HI20, 97) + ELF_RELOC(R_LARCH_TLS_GD_HI20, 98) + ELF_RELOC(R_LARCH_32_PCREL, 99) + ELF_RELOC(R_LARCH_RELAX, 100) ++ ++// Relocs added in ELF for the LoongArch™ Architecture v20230519, part of the ++// v2.10 LoongArch ABI specs. ++// ++// Spec addition: https://github.com/loongson/la-abi-specs/pull/1 ++// Binutils commit 57a930e3bfe4b2c7fd6463ed39311e1938513138 ++ELF_RELOC(R_LARCH_DELETE, 101) ++ELF_RELOC(R_LARCH_ALIGN, 102) ++ELF_RELOC(R_LARCH_PCREL20_S2, 103) ++ELF_RELOC(R_LARCH_CFA, 104) ++ELF_RELOC(R_LARCH_ADD6, 105) ++ELF_RELOC(R_LARCH_SUB6, 106) ++ELF_RELOC(R_LARCH_ADD_ULEB128, 107) ++ELF_RELOC(R_LARCH_SUB_ULEB128, 108) ++ELF_RELOC(R_LARCH_64_PCREL, 109) +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +index 57330dd31f71..a6b9c0652639 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +@@ -59,7 +59,7 @@ unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx, + case FK_Data_4: + return IsPCRel ? ELF::R_LARCH_32_PCREL : ELF::R_LARCH_32; + case FK_Data_8: +- return ELF::R_LARCH_64; ++ return IsPCRel ? ELF::R_LARCH_64_PCREL : ELF::R_LARCH_64; + case LoongArch::fixup_loongarch_b16: + return ELF::R_LARCH_B16; + case LoongArch::fixup_loongarch_b21: +diff --git a/llvm/test/MC/LoongArch/Relocations/sub-expr.s b/llvm/test/MC/LoongArch/Relocations/sub-expr.s +new file mode 100644 +index 000000000000..0179e1027af8 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/Relocations/sub-expr.s +@@ -0,0 +1,28 @@ ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s -o %t ++# RUN: llvm-readobj -r %t | FileCheck %s ++ ++## Check that subtraction expressions emit R_LARCH_32_PCREL and R_LARCH_64_PCREL relocations. ++ ++## TODO: 1- or 2-byte data relocations are not supported for now. ++ ++# CHECK: Relocations [ ++# CHECK-NEXT: Section ({{.*}}) .rela.data { ++# CHECK-NEXT: 0x0 R_LARCH_64_PCREL sx 0x0 ++# CHECK-NEXT: 0x8 R_LARCH_64_PCREL sy 0x0 ++# CHECK-NEXT: 0x10 R_LARCH_32_PCREL sx 0x0 ++# CHECK-NEXT: 0x14 R_LARCH_32_PCREL sy 0x0 ++# CHECK-NEXT: } ++ ++.section sx,"a" ++x: ++nop ++ ++.data ++.8byte x-. ++.8byte y-. ++.4byte x-. ++.4byte y-. ++ ++.section sy,"a" ++y: ++nop +diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test +index c26fae7e8323..e32dc893fa79 100644 +--- a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test ++++ b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test +@@ -93,6 +93,15 @@ + # CHECK: Type: R_LARCH_TLS_GD_HI20 (98) + # CHECK: Type: R_LARCH_32_PCREL (99) + # CHECK: Type: R_LARCH_RELAX (100) ++# CHECK: Type: R_LARCH_DELETE (101) ++# CHECK: Type: R_LARCH_ALIGN (102) ++# CHECK: Type: R_LARCH_PCREL20_S2 (103) ++# CHECK: Type: R_LARCH_CFA (104) ++# CHECK: Type: R_LARCH_ADD6 (105) ++# CHECK: Type: R_LARCH_SUB6 (106) ++# CHECK: Type: R_LARCH_ADD_ULEB128 (107) ++# CHECK: Type: R_LARCH_SUB_ULEB128 (108) ++# CHECK: Type: R_LARCH_64_PCREL (109) + + --- !ELF + FileHeader: +@@ -193,3 +202,12 @@ Sections: + - Type: R_LARCH_TLS_GD_HI20 + - Type: R_LARCH_32_PCREL + - Type: R_LARCH_RELAX ++ - Type: R_LARCH_DELETE ++ - Type: R_LARCH_ALIGN ++ - Type: R_LARCH_PCREL20_S2 ++ - Type: R_LARCH_CFA ++ - Type: R_LARCH_ADD6 ++ - Type: R_LARCH_SUB6 ++ - Type: R_LARCH_ADD_ULEB128 ++ - Type: R_LARCH_SUB_ULEB128 ++ - Type: R_LARCH_64_PCREL +diff --git a/llvm/unittests/Object/ELFTest.cpp b/llvm/unittests/Object/ELFTest.cpp +index 9cf8feb0e2c5..35fc2ec698fb 100644 +--- a/llvm/unittests/Object/ELFTest.cpp ++++ b/llvm/unittests/Object/ELFTest.cpp +@@ -233,6 +233,24 @@ TEST(ELFTest, getELFRelocationTypeNameForLoongArch) { + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_32_PCREL)); + EXPECT_EQ("R_LARCH_RELAX", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_RELAX)); ++ EXPECT_EQ("R_LARCH_DELETE", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_DELETE)); ++ EXPECT_EQ("R_LARCH_ALIGN", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ALIGN)); ++ EXPECT_EQ("R_LARCH_PCREL20_S2", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_PCREL20_S2)); ++ EXPECT_EQ("R_LARCH_CFA", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_CFA)); ++ EXPECT_EQ("R_LARCH_ADD6", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ADD6)); ++ EXPECT_EQ("R_LARCH_SUB6", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_SUB6)); ++ EXPECT_EQ("R_LARCH_ADD_ULEB128", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_ADD_ULEB128)); ++ EXPECT_EQ("R_LARCH_SUB_ULEB128", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_SUB_ULEB128)); ++ EXPECT_EQ("R_LARCH_64_PCREL", ++ getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_64_PCREL)); + } + + TEST(ELFTest, getELFRelativeRelocationType) { diff --git a/llvm14/PKGBUILD b/llvm14/PKGBUILD index 4c0eb411e6..9bc641c9e5 100644 --- a/llvm14/PKGBUILD +++ b/llvm14/PKGBUILD @@ -8,15 +8,17 @@ arch=('loong64' 'x86_64') url="https://llvm.org/" license=('custom:Apache 2.0 with LLVM Exception') makedepends=('cmake' 'ninja' 'libffi' 'libedit' 'ncurses' 'libxml2' - 'python') + 'python' 'python-psutil') checkdepends=('python-psutil') options=('staticlibs' '!lto') # Getting thousands of test failures with LTO _source_base=https://github.com/llvm/llvm-project/releases/download/llvmorg-$pkgver source=($_source_base/llvm-$pkgver.src.tar.xz{,.sig} - llvm-coroutines-ubsan.patch) + llvm-coroutines-ubsan.patch + llvm-loong64.patch) sha256sums=('050922ecaaca5781fdf6631ea92bc715183f202f9d2f15147226f023414f619a' 'SKIP' - 'ee9baf6df05474083857044d92f26f59d3ee709cdf82ba3bdb2792e6645f71d9') + 'ee9baf6df05474083857044d92f26f59d3ee709cdf82ba3bdb2792e6645f71d9' + '43c66f16ac510842ca7e6ae12869d671838799925c8009ddacae8a1af8f0d7e5') validpgpkeys=('474E22316ABF4785A88C6E8EA2C794A986419D8A') # Tom Stellard # Utilizing LLVM_DISTRIBUTION_COMPONENTS to avoid @@ -49,6 +51,7 @@ _get_distribution_components() { prepare() { cd llvm-$pkgver.src + patch -p1 -i $srcdir/llvm-loong64.patch mkdir build # https://github.com/llvm/llvm-project/issues/49689 diff --git a/llvm14/llvm-loong64.patch b/llvm14/llvm-loong64.patch new file mode 100644 index 0000000000..433a8f4062 --- /dev/null +++ b/llvm14/llvm-loong64.patch @@ -0,0 +1,47164 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 2da05ef8..04292d5b 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -342,6 +342,7 @@ set(LLVM_ALL_TARGETS + BPF + Hexagon + Lanai ++ LoongArch + Mips + MSP430 + NVPTX +diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake +index 18d78879..e16e1c73 100644 +--- a/cmake/config-ix.cmake ++++ b/cmake/config-ix.cmake +@@ -464,6 +464,8 @@ elseif (LLVM_NATIVE_ARCH MATCHES "riscv64") + set(LLVM_NATIVE_ARCH RISCV) + elseif (LLVM_NATIVE_ARCH STREQUAL "m68k") + set(LLVM_NATIVE_ARCH M68k) ++elseif (LLVM_NATIVE_ARCH MATCHES "loongarch") ++ set(LLVM_NATIVE_ARCH LoongArch) + else () + message(FATAL_ERROR "Unknown architecture ${LLVM_NATIVE_ARCH}") + endif () +diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h +index 42277c01..0482bf9c 100644 +--- a/include/llvm/ADT/Triple.h ++++ b/include/llvm/ADT/Triple.h +@@ -57,6 +57,8 @@ public: + bpfeb, // eBPF or extended BPF or 64-bit BPF (big endian) + csky, // CSKY: csky + hexagon, // Hexagon: hexagon ++ loongarch32, // LoongArch (32-bit): loongarch32 ++ loongarch64, // LoongArch (64-bit): loongarch64 + m68k, // M68k: Motorola 680x0 family + mips, // MIPS: mips, mipsallegrex, mipsr6 + mipsel, // MIPSEL: mipsel, mipsallegrexe, mipsr6el +@@ -218,6 +220,7 @@ public: + GNUX32, + GNUILP32, + CODE16, ++ GNUABILPX32, + EABI, + EABIHF, + Android, +@@ -789,6 +792,21 @@ public: + return isMIPS32() || isMIPS64(); + } + ++ /// Tests whether the target is LoongArch 32-bit ++ bool isLoongArch32() const { ++ return getArch() == Triple::loongarch32; ++ } ++ ++ /// Tests whether the target is LoongArch 64-bit. ++ bool isLoongArch64() const { ++ return getArch() == Triple::loongarch64; ++ } ++ ++ /// Tests whether the target is LoongArch (32- or 64-bit). ++ bool isLoongArch() const { ++ return isLoongArch32() || isLoongArch64(); ++ } ++ + /// Tests whether the target is PowerPC (32- or 64-bit LE or BE). + bool isPPC() const { + return getArch() == Triple::ppc || getArch() == Triple::ppc64 || +diff --git a/include/llvm/BinaryFormat/ELF.h b/include/llvm/BinaryFormat/ELF.h +index 5d3b1270..af00ca0b 100644 +--- a/include/llvm/BinaryFormat/ELF.h ++++ b/include/llvm/BinaryFormat/ELF.h +@@ -319,6 +319,7 @@ enum { + EM_BPF = 247, // Linux kernel bpf virtual machine + EM_VE = 251, // NEC SX-Aurora VE + EM_CSKY = 252, // C-SKY 32-bit processor ++ EM_LOONGARCH = 258, // LoongArch processor + }; + + // Object file classes. +@@ -671,6 +672,25 @@ enum { + STO_RISCV_VARIANT_CC = 0x80 + }; + ++// LoongArch Specific e_flags ++enum : unsigned { ++ // FIXME: Change these when all ABIs definition were finalized. ++ // See current definitions: ++ // https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html#_e_flags_identifies_abi_type_and_version ++ EF_LARCH_BASE_ABI = 0x3, ++ EF_LARCH_BASE_ABI_ILP32S = 0x5, ++ EF_LARCH_BASE_ABI_ILP32F = 0x6, ++ EF_LARCH_BASE_ABI_ILP32D = 0x7, ++ EF_LARCH_BASE_ABI_LP64S = 0x1, ++ EF_LARCH_BASE_ABI_LP64F = 0x2, ++ EF_LARCH_BASE_ABI_LP64D = 0x3 ++}; ++ ++// ELF Relocation types for LoongArch ++enum { ++#include "ELFRelocs/LoongArch.def" ++}; ++ + // ELF Relocation types for S390/zSeries + enum { + #include "ELFRelocs/SystemZ.def" +diff --git a/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def b/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def +new file mode 100644 +index 00000000..6699e732 +--- /dev/null ++++ b/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def +@@ -0,0 +1,102 @@ ++ ++#ifndef ELF_RELOC ++#error "ELF_RELOC must be defined" ++#endif ++ ++ELF_RELOC(R_LARCH_NONE, 0) ++ELF_RELOC(R_LARCH_32, 1) ++ELF_RELOC(R_LARCH_64, 2) ++ELF_RELOC(R_LARCH_RELATIVE, 3) ++ELF_RELOC(R_LARCH_COPY, 4) ++ELF_RELOC(R_LARCH_JUMP_SLOT, 5) ++ELF_RELOC(R_LARCH_TLS_DTPMOD32, 6) ++ELF_RELOC(R_LARCH_TLS_DTPMOD64, 7) ++ELF_RELOC(R_LARCH_TLS_DTPREL32, 8) ++ELF_RELOC(R_LARCH_TLS_DTPREL64, 9) ++ELF_RELOC(R_LARCH_TLS_TPREL32, 10) ++ELF_RELOC(R_LARCH_TLS_TPREL64, 11) ++ELF_RELOC(R_LARCH_IRELATIVE, 12) ++ ++ELF_RELOC(R_LARCH_MARK_LA, 20) ++ELF_RELOC(R_LARCH_MARK_PCREL, 21) ++ ++ELF_RELOC(R_LARCH_SOP_PUSH_PCREL, 22) ++ ++ELF_RELOC(R_LARCH_SOP_PUSH_ABSOLUTE, 23) ++ ++ELF_RELOC(R_LARCH_SOP_PUSH_DUP, 24) ++ELF_RELOC(R_LARCH_SOP_PUSH_GPREL, 25) ++ELF_RELOC(R_LARCH_SOP_PUSH_TLS_TPREL, 26) ++ELF_RELOC(R_LARCH_SOP_PUSH_TLS_GOT, 27) ++ELF_RELOC(R_LARCH_SOP_PUSH_TLS_GD, 28) ++ELF_RELOC(R_LARCH_SOP_PUSH_PLT_PCREL, 29) ++ ++ELF_RELOC(R_LARCH_SOP_ASSERT, 30) ++ELF_RELOC(R_LARCH_SOP_NOT, 31) ++ELF_RELOC(R_LARCH_SOP_SUB, 32) ++ELF_RELOC(R_LARCH_SOP_SL, 33) ++ELF_RELOC(R_LARCH_SOP_SR, 34) ++ELF_RELOC(R_LARCH_SOP_ADD, 35) ++ELF_RELOC(R_LARCH_SOP_AND, 36) ++ELF_RELOC(R_LARCH_SOP_IF_ELSE, 37) ++ELF_RELOC(R_LARCH_SOP_POP_32_S_10_5, 38) ++ELF_RELOC(R_LARCH_SOP_POP_32_U_10_12, 39) ++ELF_RELOC(R_LARCH_SOP_POP_32_S_10_12, 40) ++ELF_RELOC(R_LARCH_SOP_POP_32_S_10_16, 41) ++ELF_RELOC(R_LARCH_SOP_POP_32_S_10_16_S2, 42) ++ELF_RELOC(R_LARCH_SOP_POP_32_S_5_20, 43) ++ELF_RELOC(R_LARCH_SOP_POP_32_S_0_5_10_16_S2, 44) ++ELF_RELOC(R_LARCH_SOP_POP_32_S_0_10_10_16_S2, 45) ++ELF_RELOC(R_LARCH_SOP_POP_32_U, 46) ++ ++ELF_RELOC(R_LARCH_ADD8, 47) ++ELF_RELOC(R_LARCH_ADD16, 48) ++ELF_RELOC(R_LARCH_ADD24, 49) ++ELF_RELOC(R_LARCH_ADD32, 50) ++ELF_RELOC(R_LARCH_ADD64, 51) ++ELF_RELOC(R_LARCH_SUB8, 52) ++ELF_RELOC(R_LARCH_SUB16, 53) ++ELF_RELOC(R_LARCH_SUB24, 54) ++ELF_RELOC(R_LARCH_SUB32, 55) ++ELF_RELOC(R_LARCH_SUB64, 56) ++ ++ELF_RELOC(R_LARCH_GNU_VTINHERIT, 57) ++ELF_RELOC(R_LARCH_GNU_VTENTRY, 58) ++ ++ELF_RELOC(R_LARCH_B16, 64) ++ELF_RELOC(R_LARCH_B21, 65) ++ELF_RELOC(R_LARCH_B26, 66) ++ELF_RELOC(R_LARCH_ABS_HI20, 67) ++ELF_RELOC(R_LARCH_ABS_LO12, 68) ++ELF_RELOC(R_LARCH_ABS64_LO20, 69) ++ELF_RELOC(R_LARCH_ABS64_HI12, 70) ++ELF_RELOC(R_LARCH_PCALA_HI20, 71) ++ELF_RELOC(R_LARCH_PCALA_LO12, 72) ++ELF_RELOC(R_LARCH_PCALA64_LO20, 73) ++ELF_RELOC(R_LARCH_PCALA64_HI12, 74) ++ELF_RELOC(R_LARCH_GOT_PC_HI20, 75) ++ELF_RELOC(R_LARCH_GOT_PC_LO12, 76) ++ELF_RELOC(R_LARCH_GOT64_PC_LO20, 77) ++ELF_RELOC(R_LARCH_GOT64_PC_HI12, 78) ++ELF_RELOC(R_LARCH_GOT_HI20, 79) ++ELF_RELOC(R_LARCH_GOT_LO12, 80) ++ELF_RELOC(R_LARCH_GOT64_LO20, 81) ++ELF_RELOC(R_LARCH_GOT64_HI12, 82) ++ELF_RELOC(R_LARCH_TLS_LE_HI20, 83) ++ELF_RELOC(R_LARCH_TLS_LE_LO12, 84) ++ELF_RELOC(R_LARCH_TLS_LE64_LO20, 85) ++ELF_RELOC(R_LARCH_TLS_LE64_HI12, 86) ++ELF_RELOC(R_LARCH_TLS_IE_PC_HI20, 87) ++ELF_RELOC(R_LARCH_TLS_IE_PC_LO12, 88) ++ELF_RELOC(R_LARCH_TLS_IE64_PC_LO20, 89) ++ELF_RELOC(R_LARCH_TLS_IE64_PC_HI12, 90) ++ELF_RELOC(R_LARCH_TLS_IE_HI20, 91) ++ELF_RELOC(R_LARCH_TLS_IE_LO12, 92) ++ELF_RELOC(R_LARCH_TLS_IE64_LO20, 93) ++ELF_RELOC(R_LARCH_TLS_IE64_HI12, 94) ++ELF_RELOC(R_LARCH_TLS_LD_PC_HI20, 95) ++ELF_RELOC(R_LARCH_TLS_LD_HI20, 96) ++ELF_RELOC(R_LARCH_TLS_GD_PC_HI20, 97) ++ELF_RELOC(R_LARCH_TLS_GD_HI20, 98) ++ELF_RELOC(R_LARCH_32_PCREL, 99) ++ELF_RELOC(R_LARCH_RELAX, 100) +diff --git a/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/include/llvm/ExecutionEngine/Orc/OrcABISupport.h +index 82dfdc27..4646ffdd 100644 +--- a/include/llvm/ExecutionEngine/Orc/OrcABISupport.h ++++ b/include/llvm/ExecutionEngine/Orc/OrcABISupport.h +@@ -330,6 +330,43 @@ public: + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs); + }; + ++// @brief LoongArch64 support. ++class OrcLoongArch64 { ++public: ++ static constexpr unsigned PointerSize = 8; ++ static constexpr unsigned TrampolineSize = 40; ++ static constexpr unsigned StubSize = 32; ++ static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31; ++ static constexpr unsigned ResolverCodeSize = 0x120; ++ ++ /// Write the resolver code into the given memory. The user is ++ /// responsible for allocating the memory and setting permissions. ++ /// ++ /// ReentryFnAddr should be the address of a function whose signature matches ++ /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr ++ /// argument of writeResolverCode will be passed as the second argument to ++ /// the function at ReentryFnAddr. ++ static void writeResolverCode(char *ResolverWorkingMem, ++ JITTargetAddress ResolverTargetAddress, ++ JITTargetAddress ReentryFnAddr, ++ JITTargetAddress ReentryCtxAddr); ++ ++ /// Write the requested number of trampolines into the given memory, ++ /// which must be big enough to hold 1 pointer, plus NumTrampolines ++ /// trampolines. ++ static void writeTrampolines(char *TrampolineBlockWorkingMem, ++ JITTargetAddress TrampolineBlockTargetAddress, ++ JITTargetAddress ResolverFnAddr, ++ unsigned NumTrampolines); ++ /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem. ++ /// Stubs will be written as if linked at StubsBlockTargetAddress, with the ++ /// Nth stub using the Nth pointer in memory starting at ++ /// PointersBlockTargetAddress. ++ static void writeIndirectStubsBlock( ++ char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, ++ JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs); ++}; ++ + } // end namespace orc + } // end namespace llvm + +diff --git a/include/llvm/IR/CMakeLists.txt b/include/llvm/IR/CMakeLists.txt +index 0498fc26..b675a45d 100644 +--- a/include/llvm/IR/CMakeLists.txt ++++ b/include/llvm/IR/CMakeLists.txt +@@ -9,6 +9,7 @@ tablegen(LLVM IntrinsicsAMDGPU.h -gen-intrinsic-enums -intrinsic-prefix=amdgcn) + tablegen(LLVM IntrinsicsARM.h -gen-intrinsic-enums -intrinsic-prefix=arm) + tablegen(LLVM IntrinsicsBPF.h -gen-intrinsic-enums -intrinsic-prefix=bpf) + tablegen(LLVM IntrinsicsHexagon.h -gen-intrinsic-enums -intrinsic-prefix=hexagon) ++tablegen(LLVM IntrinsicsLoongArch.h -gen-intrinsic-enums -intrinsic-prefix=loongarch) + tablegen(LLVM IntrinsicsMips.h -gen-intrinsic-enums -intrinsic-prefix=mips) + tablegen(LLVM IntrinsicsNVPTX.h -gen-intrinsic-enums -intrinsic-prefix=nvvm) + tablegen(LLVM IntrinsicsPowerPC.h -gen-intrinsic-enums -intrinsic-prefix=ppc) +diff --git a/include/llvm/IR/InlineAsm.h b/include/llvm/IR/InlineAsm.h +index cf6b7af9..983fe97e 100644 +--- a/include/llvm/IR/InlineAsm.h ++++ b/include/llvm/IR/InlineAsm.h +@@ -266,6 +266,7 @@ public: + Constraint_Uy, + Constraint_X, + Constraint_Z, ++ Constraint_ZB, + Constraint_ZC, + Constraint_Zy, + Constraints_Max = Constraint_Zy, +diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td +index f5248e82..d4a8df4a 100644 +--- a/include/llvm/IR/Intrinsics.td ++++ b/include/llvm/IR/Intrinsics.td +@@ -1937,3 +1937,4 @@ include "llvm/IR/IntrinsicsSystemZ.td" + include "llvm/IR/IntrinsicsWebAssembly.td" + include "llvm/IR/IntrinsicsRISCV.td" + include "llvm/IR/IntrinsicsVE.td" ++include "llvm/IR/IntrinsicsLoongArch.td" +diff --git a/include/llvm/IR/IntrinsicsLoongArch.td b/include/llvm/IR/IntrinsicsLoongArch.td +new file mode 100644 +index 00000000..6e70173f +--- /dev/null ++++ b/include/llvm/IR/IntrinsicsLoongArch.td +@@ -0,0 +1,3619 @@ ++//===- IntrinsicsLoongArch.td - Defines LoongArch intrinsics ---------*- tablegen -*-===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines all of the LoongArch-specific intrinsics. ++// ++//===----------------------------------------------------------------------===// ++ ++let TargetPrefix = "loongarch" in { // All intrinsics start with "llvm.loongarch.". ++ ++//===----------------------------------------------------------------------===// ++// LoongArch LSX ++ ++def int_loongarch_lsx_vclo_b : GCCBuiltin<"__builtin_lsx_vclo_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vclo_h : GCCBuiltin<"__builtin_lsx_vclo_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vclo_w : GCCBuiltin<"__builtin_lsx_vclo_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vclo_d : GCCBuiltin<"__builtin_lsx_vclo_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vflogb_s : GCCBuiltin<"__builtin_lsx_vflogb_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vflogb_d : GCCBuiltin<"__builtin_lsx_vflogb_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vpickve2gr_b : GCCBuiltin<"__builtin_lsx_vpickve2gr_b">, ++ Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickve2gr_h : GCCBuiltin<"__builtin_lsx_vpickve2gr_h">, ++ Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickve2gr_w : GCCBuiltin<"__builtin_lsx_vpickve2gr_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickve2gr_d : GCCBuiltin<"__builtin_lsx_vpickve2gr_d">, ++ Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vpickve2gr_bu : GCCBuiltin<"__builtin_lsx_vpickve2gr_bu">, ++ Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickve2gr_hu : GCCBuiltin<"__builtin_lsx_vpickve2gr_hu">, ++ Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickve2gr_wu : GCCBuiltin<"__builtin_lsx_vpickve2gr_wu">, ++ Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickve2gr_du : GCCBuiltin<"__builtin_lsx_vpickve2gr_du">, ++ Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vreplvei_b : GCCBuiltin<"__builtin_lsx_vreplvei_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplvei_h : GCCBuiltin<"__builtin_lsx_vreplvei_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplvei_w : GCCBuiltin<"__builtin_lsx_vreplvei_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplvei_d : GCCBuiltin<"__builtin_lsx_vreplvei_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmskltz_b : GCCBuiltin<"__builtin_lsx_vmskltz_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmskltz_h : GCCBuiltin<"__builtin_lsx_vmskltz_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmskltz_w : GCCBuiltin<"__builtin_lsx_vmskltz_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmskltz_d : GCCBuiltin<"__builtin_lsx_vmskltz_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfmadd_s : GCCBuiltin<"__builtin_lsx_vfmadd_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfmadd_d : GCCBuiltin<"__builtin_lsx_vfmadd_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfmsub_s : GCCBuiltin<"__builtin_lsx_vfmsub_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfmsub_d : GCCBuiltin<"__builtin_lsx_vfmsub_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfnmadd_s : GCCBuiltin<"__builtin_lsx_vfnmadd_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfnmadd_d : GCCBuiltin<"__builtin_lsx_vfnmadd_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfnmsub_s : GCCBuiltin<"__builtin_lsx_vfnmsub_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfnmsub_d : GCCBuiltin<"__builtin_lsx_vfnmsub_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_caf_s : GCCBuiltin<"__builtin_lsx_vfcmp_caf_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_caf_d : GCCBuiltin<"__builtin_lsx_vfcmp_caf_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_cor_s : GCCBuiltin<"__builtin_lsx_vfcmp_cor_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_cor_d : GCCBuiltin<"__builtin_lsx_vfcmp_cor_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_cun_s : GCCBuiltin<"__builtin_lsx_vfcmp_cun_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_cun_d : GCCBuiltin<"__builtin_lsx_vfcmp_cun_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_cune_s : GCCBuiltin<"__builtin_lsx_vfcmp_cune_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_cune_d : GCCBuiltin<"__builtin_lsx_vfcmp_cune_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_cueq_s : GCCBuiltin<"__builtin_lsx_vfcmp_cueq_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_cueq_d : GCCBuiltin<"__builtin_lsx_vfcmp_cueq_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_ceq_s : GCCBuiltin<"__builtin_lsx_vfcmp_ceq_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_ceq_d : GCCBuiltin<"__builtin_lsx_vfcmp_ceq_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_cne_s : GCCBuiltin<"__builtin_lsx_vfcmp_cne_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_cne_d : GCCBuiltin<"__builtin_lsx_vfcmp_cne_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_clt_s : GCCBuiltin<"__builtin_lsx_vfcmp_clt_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_clt_d : GCCBuiltin<"__builtin_lsx_vfcmp_clt_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_cult_s : GCCBuiltin<"__builtin_lsx_vfcmp_cult_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_cult_d : GCCBuiltin<"__builtin_lsx_vfcmp_cult_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_cle_s : GCCBuiltin<"__builtin_lsx_vfcmp_cle_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_cle_d : GCCBuiltin<"__builtin_lsx_vfcmp_cle_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_cule_s : GCCBuiltin<"__builtin_lsx_vfcmp_cule_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_cule_d : GCCBuiltin<"__builtin_lsx_vfcmp_cule_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_saf_s : GCCBuiltin<"__builtin_lsx_vfcmp_saf_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_saf_d : GCCBuiltin<"__builtin_lsx_vfcmp_saf_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_sor_s : GCCBuiltin<"__builtin_lsx_vfcmp_sor_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_sor_d : GCCBuiltin<"__builtin_lsx_vfcmp_sor_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_sun_s : GCCBuiltin<"__builtin_lsx_vfcmp_sun_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_sun_d : GCCBuiltin<"__builtin_lsx_vfcmp_sun_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_sune_s : GCCBuiltin<"__builtin_lsx_vfcmp_sune_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_sune_d : GCCBuiltin<"__builtin_lsx_vfcmp_sune_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_sueq_s : GCCBuiltin<"__builtin_lsx_vfcmp_sueq_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_sueq_d : GCCBuiltin<"__builtin_lsx_vfcmp_sueq_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_seq_s : GCCBuiltin<"__builtin_lsx_vfcmp_seq_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_seq_d : GCCBuiltin<"__builtin_lsx_vfcmp_seq_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_sne_s : GCCBuiltin<"__builtin_lsx_vfcmp_sne_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_sne_d : GCCBuiltin<"__builtin_lsx_vfcmp_sne_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_slt_s : GCCBuiltin<"__builtin_lsx_vfcmp_slt_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_slt_d : GCCBuiltin<"__builtin_lsx_vfcmp_slt_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_sult_s : GCCBuiltin<"__builtin_lsx_vfcmp_sult_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_sult_d : GCCBuiltin<"__builtin_lsx_vfcmp_sult_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_sle_s : GCCBuiltin<"__builtin_lsx_vfcmp_sle_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_sle_d : GCCBuiltin<"__builtin_lsx_vfcmp_sle_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcmp_sule_s : GCCBuiltin<"__builtin_lsx_vfcmp_sule_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcmp_sule_d : GCCBuiltin<"__builtin_lsx_vfcmp_sule_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vbitsel_v : GCCBuiltin<"__builtin_lsx_vbitsel_v">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vshuf_b : GCCBuiltin<"__builtin_lsx_vshuf_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vldrepl_b : GCCBuiltin<"__builtin_lsx_vldrepl_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; ++def int_loongarch_lsx_vldrepl_h : GCCBuiltin<"__builtin_lsx_vldrepl_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; ++def int_loongarch_lsx_vldrepl_w : GCCBuiltin<"__builtin_lsx_vldrepl_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; ++def int_loongarch_lsx_vldrepl_d : GCCBuiltin<"__builtin_lsx_vldrepl_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; ++ ++def int_loongarch_lsx_vstelm_b : GCCBuiltin<"__builtin_lsx_vstelm_b">, ++ Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; ++def int_loongarch_lsx_vstelm_h : GCCBuiltin<"__builtin_lsx_vstelm_h">, ++ Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; ++def int_loongarch_lsx_vstelm_w : GCCBuiltin<"__builtin_lsx_vstelm_w">, ++ Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; ++def int_loongarch_lsx_vstelm_d : GCCBuiltin<"__builtin_lsx_vstelm_d">, ++ Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; ++ ++def int_loongarch_lsx_vldx : GCCBuiltin<"__builtin_lsx_vldx">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i64_ty], ++ [IntrReadMem, IntrArgMemOnly]>; ++ ++def int_loongarch_lsx_vstx : GCCBuiltin<"__builtin_lsx_vstx">, ++ Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i64_ty], ++ [IntrArgMemOnly]>; ++ ++def int_loongarch_lsx_vaddwev_d_w : GCCBuiltin<"__builtin_lsx_vaddwev_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwev_w_h : GCCBuiltin<"__builtin_lsx_vaddwev_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwev_h_b : GCCBuiltin<"__builtin_lsx_vaddwev_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwev_q_d : GCCBuiltin<"__builtin_lsx_vaddwev_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsubwev_d_w : GCCBuiltin<"__builtin_lsx_vsubwev_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwev_w_h : GCCBuiltin<"__builtin_lsx_vsubwev_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwev_h_b : GCCBuiltin<"__builtin_lsx_vsubwev_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwev_q_d : GCCBuiltin<"__builtin_lsx_vsubwev_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++ ++def int_loongarch_lsx_vaddwod_d_w : GCCBuiltin<"__builtin_lsx_vaddwod_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwod_w_h : GCCBuiltin<"__builtin_lsx_vaddwod_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwod_h_b : GCCBuiltin<"__builtin_lsx_vaddwod_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwod_q_d : GCCBuiltin<"__builtin_lsx_vaddwod_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsubwod_d_w : GCCBuiltin<"__builtin_lsx_vsubwod_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwod_w_h : GCCBuiltin<"__builtin_lsx_vsubwod_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwod_h_b : GCCBuiltin<"__builtin_lsx_vsubwod_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwod_q_d : GCCBuiltin<"__builtin_lsx_vsubwod_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vaddwev_d_wu : GCCBuiltin<"__builtin_lsx_vaddwev_d_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwev_w_hu : GCCBuiltin<"__builtin_lsx_vaddwev_w_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwev_h_bu : GCCBuiltin<"__builtin_lsx_vaddwev_h_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwev_q_du : GCCBuiltin<"__builtin_lsx_vaddwev_q_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsubwev_d_wu : GCCBuiltin<"__builtin_lsx_vsubwev_d_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwev_w_hu : GCCBuiltin<"__builtin_lsx_vsubwev_w_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwev_h_bu : GCCBuiltin<"__builtin_lsx_vsubwev_h_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwev_q_du : GCCBuiltin<"__builtin_lsx_vsubwev_q_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vaddwod_d_wu : GCCBuiltin<"__builtin_lsx_vaddwod_d_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwod_w_hu : GCCBuiltin<"__builtin_lsx_vaddwod_w_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwod_h_bu : GCCBuiltin<"__builtin_lsx_vaddwod_h_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwod_q_du : GCCBuiltin<"__builtin_lsx_vaddwod_q_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsubwod_d_wu : GCCBuiltin<"__builtin_lsx_vsubwod_d_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwod_w_hu : GCCBuiltin<"__builtin_lsx_vsubwod_w_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwod_h_bu : GCCBuiltin<"__builtin_lsx_vsubwod_h_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubwod_q_du : GCCBuiltin<"__builtin_lsx_vsubwod_q_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vaddwev_d_wu_w : GCCBuiltin<"__builtin_lsx_vaddwev_d_wu_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwev_w_hu_h : GCCBuiltin<"__builtin_lsx_vaddwev_w_hu_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwev_h_bu_b : GCCBuiltin<"__builtin_lsx_vaddwev_h_bu_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwev_q_du_d : GCCBuiltin<"__builtin_lsx_vaddwev_q_du_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vaddwod_d_wu_w : GCCBuiltin<"__builtin_lsx_vaddwod_d_wu_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwod_w_hu_h : GCCBuiltin<"__builtin_lsx_vaddwod_w_hu_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwod_h_bu_b : GCCBuiltin<"__builtin_lsx_vaddwod_h_bu_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vaddwod_q_du_d : GCCBuiltin<"__builtin_lsx_vaddwod_q_du_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vhaddw_qu_du : GCCBuiltin<"__builtin_lsx_vhaddw_qu_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhsubw_qu_du : GCCBuiltin<"__builtin_lsx_vhsubw_qu_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vhaddw_q_d : GCCBuiltin<"__builtin_lsx_vhaddw_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhsubw_q_d : GCCBuiltin<"__builtin_lsx_vhsubw_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmuh_b : GCCBuiltin<"__builtin_lsx_vmuh_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmuh_h : GCCBuiltin<"__builtin_lsx_vmuh_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmuh_w : GCCBuiltin<"__builtin_lsx_vmuh_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmuh_d : GCCBuiltin<"__builtin_lsx_vmuh_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmuh_bu : GCCBuiltin<"__builtin_lsx_vmuh_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmuh_hu : GCCBuiltin<"__builtin_lsx_vmuh_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmuh_wu : GCCBuiltin<"__builtin_lsx_vmuh_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmuh_du : GCCBuiltin<"__builtin_lsx_vmuh_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmulwev_d_w : GCCBuiltin<"__builtin_lsx_vmulwev_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwev_w_h : GCCBuiltin<"__builtin_lsx_vmulwev_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwev_h_b : GCCBuiltin<"__builtin_lsx_vmulwev_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwev_q_d : GCCBuiltin<"__builtin_lsx_vmulwev_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmulwod_d_w : GCCBuiltin<"__builtin_lsx_vmulwod_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwod_w_h : GCCBuiltin<"__builtin_lsx_vmulwod_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwod_h_b : GCCBuiltin<"__builtin_lsx_vmulwod_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwod_q_d : GCCBuiltin<"__builtin_lsx_vmulwod_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmulwev_d_wu : GCCBuiltin<"__builtin_lsx_vmulwev_d_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwev_w_hu : GCCBuiltin<"__builtin_lsx_vmulwev_w_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwev_h_bu : GCCBuiltin<"__builtin_lsx_vmulwev_h_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwev_q_du : GCCBuiltin<"__builtin_lsx_vmulwev_q_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmulwod_d_wu : GCCBuiltin<"__builtin_lsx_vmulwod_d_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwod_w_hu : GCCBuiltin<"__builtin_lsx_vmulwod_w_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwod_h_bu : GCCBuiltin<"__builtin_lsx_vmulwod_h_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwod_q_du : GCCBuiltin<"__builtin_lsx_vmulwod_q_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmulwev_d_wu_w : GCCBuiltin<"__builtin_lsx_vmulwev_d_wu_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwev_w_hu_h : GCCBuiltin<"__builtin_lsx_vmulwev_w_hu_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwev_h_bu_b : GCCBuiltin<"__builtin_lsx_vmulwev_h_bu_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwev_q_du_d : GCCBuiltin<"__builtin_lsx_vmulwev_q_du_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmulwod_d_wu_w : GCCBuiltin<"__builtin_lsx_vmulwod_d_wu_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwod_w_hu_h : GCCBuiltin<"__builtin_lsx_vmulwod_w_hu_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwod_h_bu_b : GCCBuiltin<"__builtin_lsx_vmulwod_h_bu_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmulwod_q_du_d : GCCBuiltin<"__builtin_lsx_vmulwod_q_du_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmaddwev_d_w : GCCBuiltin<"__builtin_lsx_vmaddwev_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwev_w_h : GCCBuiltin<"__builtin_lsx_vmaddwev_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwev_h_b : GCCBuiltin<"__builtin_lsx_vmaddwev_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwev_q_d : GCCBuiltin<"__builtin_lsx_vmaddwev_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmaddwod_d_w : GCCBuiltin<"__builtin_lsx_vmaddwod_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwod_w_h : GCCBuiltin<"__builtin_lsx_vmaddwod_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwod_h_b : GCCBuiltin<"__builtin_lsx_vmaddwod_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwod_q_d : GCCBuiltin<"__builtin_lsx_vmaddwod_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmaddwev_d_wu : GCCBuiltin<"__builtin_lsx_vmaddwev_d_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwev_w_hu : GCCBuiltin<"__builtin_lsx_vmaddwev_w_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwev_h_bu : GCCBuiltin<"__builtin_lsx_vmaddwev_h_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwev_q_du : GCCBuiltin<"__builtin_lsx_vmaddwev_q_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmaddwod_d_wu : GCCBuiltin<"__builtin_lsx_vmaddwod_d_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwod_w_hu : GCCBuiltin<"__builtin_lsx_vmaddwod_w_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwod_h_bu : GCCBuiltin<"__builtin_lsx_vmaddwod_h_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwod_q_du : GCCBuiltin<"__builtin_lsx_vmaddwod_q_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmaddwev_d_wu_w : GCCBuiltin<"__builtin_lsx_vmaddwev_d_wu_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwev_w_hu_h : GCCBuiltin<"__builtin_lsx_vmaddwev_w_hu_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwev_h_bu_b : GCCBuiltin<"__builtin_lsx_vmaddwev_h_bu_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwev_q_du_d : GCCBuiltin<"__builtin_lsx_vmaddwev_q_du_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmaddwod_d_wu_w : GCCBuiltin<"__builtin_lsx_vmaddwod_d_wu_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwod_w_hu_h : GCCBuiltin<"__builtin_lsx_vmaddwod_w_hu_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwod_h_bu_b : GCCBuiltin<"__builtin_lsx_vmaddwod_h_bu_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaddwod_q_du_d : GCCBuiltin<"__builtin_lsx_vmaddwod_q_du_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrln_b_h : GCCBuiltin<"__builtin_lsx_vsrln_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrln_h_w : GCCBuiltin<"__builtin_lsx_vsrln_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrln_w_d : GCCBuiltin<"__builtin_lsx_vsrln_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsran_b_h : GCCBuiltin<"__builtin_lsx_vsran_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsran_h_w : GCCBuiltin<"__builtin_lsx_vsran_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsran_w_d : GCCBuiltin<"__builtin_lsx_vsran_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrlrn_b_h : GCCBuiltin<"__builtin_lsx_vsrlrn_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlrn_h_w : GCCBuiltin<"__builtin_lsx_vsrlrn_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlrn_w_d : GCCBuiltin<"__builtin_lsx_vsrlrn_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrarn_b_h : GCCBuiltin<"__builtin_lsx_vsrarn_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrarn_h_w : GCCBuiltin<"__builtin_lsx_vsrarn_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrarn_w_d : GCCBuiltin<"__builtin_lsx_vsrarn_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrln_b_h : GCCBuiltin<"__builtin_lsx_vssrln_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrln_h_w : GCCBuiltin<"__builtin_lsx_vssrln_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrln_w_d : GCCBuiltin<"__builtin_lsx_vssrln_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssran_b_h : GCCBuiltin<"__builtin_lsx_vssran_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssran_h_w : GCCBuiltin<"__builtin_lsx_vssran_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssran_w_d : GCCBuiltin<"__builtin_lsx_vssran_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrlrn_b_h : GCCBuiltin<"__builtin_lsx_vssrlrn_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrn_h_w : GCCBuiltin<"__builtin_lsx_vssrlrn_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrn_w_d : GCCBuiltin<"__builtin_lsx_vssrlrn_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrarn_b_h : GCCBuiltin<"__builtin_lsx_vssrarn_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarn_h_w : GCCBuiltin<"__builtin_lsx_vssrarn_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarn_w_d : GCCBuiltin<"__builtin_lsx_vssrarn_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrln_bu_h : GCCBuiltin<"__builtin_lsx_vssrln_bu_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrln_hu_w : GCCBuiltin<"__builtin_lsx_vssrln_hu_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrln_wu_d : GCCBuiltin<"__builtin_lsx_vssrln_wu_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssran_bu_h : GCCBuiltin<"__builtin_lsx_vssran_bu_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssran_hu_w : GCCBuiltin<"__builtin_lsx_vssran_hu_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssran_wu_d : GCCBuiltin<"__builtin_lsx_vssran_wu_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrlrn_bu_h : GCCBuiltin<"__builtin_lsx_vssrlrn_bu_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrn_hu_w : GCCBuiltin<"__builtin_lsx_vssrlrn_hu_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrn_wu_d : GCCBuiltin<"__builtin_lsx_vssrlrn_wu_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrarn_bu_h : GCCBuiltin<"__builtin_lsx_vssrarn_bu_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarn_hu_w : GCCBuiltin<"__builtin_lsx_vssrarn_hu_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarn_wu_d : GCCBuiltin<"__builtin_lsx_vssrarn_wu_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vandn_v : GCCBuiltin<"__builtin_lsx_vandn_v">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vorn_v : GCCBuiltin<"__builtin_lsx_vorn_v">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfrstp_b : GCCBuiltin<"__builtin_lsx_vfrstp_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vfrstp_h : GCCBuiltin<"__builtin_lsx_vfrstp_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lsx_vadd_q : GCCBuiltin<"__builtin_lsx_vadd_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsub_q : GCCBuiltin<"__builtin_lsx_vsub_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsigncov_b : GCCBuiltin<"__builtin_lsx_vsigncov_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vsigncov_h : GCCBuiltin<"__builtin_lsx_vsigncov_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vsigncov_w : GCCBuiltin<"__builtin_lsx_vsigncov_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vsigncov_d : GCCBuiltin<"__builtin_lsx_vsigncov_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcvt_h_s : GCCBuiltin<"__builtin_lsx_vfcvt_h_s">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcvt_s_d : GCCBuiltin<"__builtin_lsx_vfcvt_s_d">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vffint_s_l : GCCBuiltin<"__builtin_lsx_vffint_s_l">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftint_w_d : GCCBuiltin<"__builtin_lsx_vftint_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrz_w_d : GCCBuiltin<"__builtin_lsx_vftintrz_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrp_w_d : GCCBuiltin<"__builtin_lsx_vftintrp_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrm_w_d : GCCBuiltin<"__builtin_lsx_vftintrm_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrne_w_d : GCCBuiltin<"__builtin_lsx_vftintrne_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vbsrl_v : GCCBuiltin<"__builtin_lsx_vbsrl_v">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbsll_v : GCCBuiltin<"__builtin_lsx_vbsll_v">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfrstpi_b : GCCBuiltin<"__builtin_lsx_vfrstpi_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfrstpi_h : GCCBuiltin<"__builtin_lsx_vfrstpi_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vneg_b : GCCBuiltin<"__builtin_lsx_vneg_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vneg_h : GCCBuiltin<"__builtin_lsx_vneg_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vneg_w : GCCBuiltin<"__builtin_lsx_vneg_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vneg_d : GCCBuiltin<"__builtin_lsx_vneg_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmskgez_b : GCCBuiltin<"__builtin_lsx_vmskgez_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmsknz_b : GCCBuiltin<"__builtin_lsx_vmsknz_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfrintrm_s : GCCBuiltin<"__builtin_lsx_vfrintrm_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfrintrm_d : GCCBuiltin<"__builtin_lsx_vfrintrm_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfrintrp_s : GCCBuiltin<"__builtin_lsx_vfrintrp_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfrintrp_d : GCCBuiltin<"__builtin_lsx_vfrintrp_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfrintrz_s : GCCBuiltin<"__builtin_lsx_vfrintrz_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfrintrz_d : GCCBuiltin<"__builtin_lsx_vfrintrz_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfrintrne_s : GCCBuiltin<"__builtin_lsx_vfrintrne_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfrintrne_d : GCCBuiltin<"__builtin_lsx_vfrintrne_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vffinth_d_w : GCCBuiltin<"__builtin_lsx_vffinth_d_w">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vffintl_d_w : GCCBuiltin<"__builtin_lsx_vffintl_d_w">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrm_w_s : GCCBuiltin<"__builtin_lsx_vftintrm_w_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrm_l_d : GCCBuiltin<"__builtin_lsx_vftintrm_l_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrp_w_s : GCCBuiltin<"__builtin_lsx_vftintrp_w_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrp_l_d : GCCBuiltin<"__builtin_lsx_vftintrp_l_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrz_w_s : GCCBuiltin<"__builtin_lsx_vftintrz_w_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrz_l_d : GCCBuiltin<"__builtin_lsx_vftintrz_l_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrne_w_s : GCCBuiltin<"__builtin_lsx_vftintrne_w_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrne_l_d : GCCBuiltin<"__builtin_lsx_vftintrne_l_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftinth_l_s : GCCBuiltin<"__builtin_lsx_vftinth_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintl_l_s : GCCBuiltin<"__builtin_lsx_vftintl_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrmh_l_s : GCCBuiltin<"__builtin_lsx_vftintrmh_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrml_l_s : GCCBuiltin<"__builtin_lsx_vftintrml_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrph_l_s : GCCBuiltin<"__builtin_lsx_vftintrph_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrpl_l_s : GCCBuiltin<"__builtin_lsx_vftintrpl_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrzh_l_s : GCCBuiltin<"__builtin_lsx_vftintrzh_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrzl_l_s : GCCBuiltin<"__builtin_lsx_vftintrzl_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrneh_l_s : GCCBuiltin<"__builtin_lsx_vftintrneh_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrnel_l_s : GCCBuiltin<"__builtin_lsx_vftintrnel_l_s">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vexth_d_w : GCCBuiltin<"__builtin_lsx_vexth_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vexth_w_h : GCCBuiltin<"__builtin_lsx_vexth_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vexth_h_b : GCCBuiltin<"__builtin_lsx_vexth_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vexth_q_d : GCCBuiltin<"__builtin_lsx_vexth_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vexth_du_wu : GCCBuiltin<"__builtin_lsx_vexth_du_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vexth_wu_hu : GCCBuiltin<"__builtin_lsx_vexth_wu_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vexth_hu_bu : GCCBuiltin<"__builtin_lsx_vexth_hu_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vexth_qu_du : GCCBuiltin<"__builtin_lsx_vexth_qu_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvexth_du_wu : GCCBuiltin<"__builtin_lasx_xvexth_du_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvexth_wu_hu : GCCBuiltin<"__builtin_lasx_xvexth_wu_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvexth_hu_bu : GCCBuiltin<"__builtin_lasx_xvexth_hu_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvexth_qu_du : GCCBuiltin<"__builtin_lasx_xvexth_qu_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsllwil_d_w : GCCBuiltin<"__builtin_lsx_vsllwil_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsllwil_w_h : GCCBuiltin<"__builtin_lsx_vsllwil_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsllwil_h_b : GCCBuiltin<"__builtin_lsx_vsllwil_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vextl_q_d : GCCBuiltin<"__builtin_lsx_vextl_q_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsllwil_du_wu : GCCBuiltin<"__builtin_lsx_vsllwil_du_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsllwil_wu_hu : GCCBuiltin<"__builtin_lsx_vsllwil_wu_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsllwil_hu_bu : GCCBuiltin<"__builtin_lsx_vsllwil_hu_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vextl_qu_du : GCCBuiltin<"__builtin_lsx_vextl_qu_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vbitclri_b : GCCBuiltin<"__builtin_lsx_vbitclri_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitclri_h : GCCBuiltin<"__builtin_lsx_vbitclri_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitclri_w : GCCBuiltin<"__builtin_lsx_vbitclri_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitclri_d : GCCBuiltin<"__builtin_lsx_vbitclri_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vbitseti_b : GCCBuiltin<"__builtin_lsx_vbitseti_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitseti_h : GCCBuiltin<"__builtin_lsx_vbitseti_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitseti_w : GCCBuiltin<"__builtin_lsx_vbitseti_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitseti_d : GCCBuiltin<"__builtin_lsx_vbitseti_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vbitrevi_b : GCCBuiltin<"__builtin_lsx_vbitrevi_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitrevi_h : GCCBuiltin<"__builtin_lsx_vbitrevi_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitrevi_w : GCCBuiltin<"__builtin_lsx_vbitrevi_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitrevi_d : GCCBuiltin<"__builtin_lsx_vbitrevi_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrlrni_b_h : GCCBuiltin<"__builtin_lsx_vssrlrni_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrni_h_w : GCCBuiltin<"__builtin_lsx_vssrlrni_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrni_w_d : GCCBuiltin<"__builtin_lsx_vssrlrni_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrni_d_q : GCCBuiltin<"__builtin_lsx_vssrlrni_d_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrani_b_h : GCCBuiltin<"__builtin_lsx_vsrani_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrani_h_w : GCCBuiltin<"__builtin_lsx_vsrani_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrani_w_d : GCCBuiltin<"__builtin_lsx_vsrani_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrani_d_q : GCCBuiltin<"__builtin_lsx_vsrani_d_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vextrins_b : GCCBuiltin<"__builtin_lsx_vextrins_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vextrins_h : GCCBuiltin<"__builtin_lsx_vextrins_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vextrins_w : GCCBuiltin<"__builtin_lsx_vextrins_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vextrins_d : GCCBuiltin<"__builtin_lsx_vextrins_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vbitseli_b : GCCBuiltin<"__builtin_lsx_vbitseli_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vandi_b : GCCBuiltin<"__builtin_lsx_vandi_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vori_b : GCCBuiltin<"__builtin_lsx_vori_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vxori_b : GCCBuiltin<"__builtin_lsx_vxori_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vnori_b : GCCBuiltin<"__builtin_lsx_vnori_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vldi : GCCBuiltin<"__builtin_lsx_vldi">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vpermi_w : GCCBuiltin<"__builtin_lsx_vpermi_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsadd_b : GCCBuiltin<"__builtin_lsx_vsadd_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vsadd_h : GCCBuiltin<"__builtin_lsx_vsadd_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vsadd_w : GCCBuiltin<"__builtin_lsx_vsadd_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vsadd_d : GCCBuiltin<"__builtin_lsx_vsadd_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lsx_vssub_b : GCCBuiltin<"__builtin_lsx_vssub_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssub_h : GCCBuiltin<"__builtin_lsx_vssub_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssub_w : GCCBuiltin<"__builtin_lsx_vssub_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssub_d : GCCBuiltin<"__builtin_lsx_vssub_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsadd_bu : GCCBuiltin<"__builtin_lsx_vsadd_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vsadd_hu : GCCBuiltin<"__builtin_lsx_vsadd_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vsadd_wu : GCCBuiltin<"__builtin_lsx_vsadd_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vsadd_du : GCCBuiltin<"__builtin_lsx_vsadd_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lsx_vssub_bu : GCCBuiltin<"__builtin_lsx_vssub_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssub_hu : GCCBuiltin<"__builtin_lsx_vssub_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssub_wu : GCCBuiltin<"__builtin_lsx_vssub_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssub_du : GCCBuiltin<"__builtin_lsx_vssub_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vhaddw_h_b : GCCBuiltin<"__builtin_lsx_vhaddw_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhaddw_w_h : GCCBuiltin<"__builtin_lsx_vhaddw_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhaddw_d_w : GCCBuiltin<"__builtin_lsx_vhaddw_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vhsubw_h_b : GCCBuiltin<"__builtin_lsx_vhsubw_h_b">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhsubw_w_h : GCCBuiltin<"__builtin_lsx_vhsubw_w_h">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhsubw_d_w : GCCBuiltin<"__builtin_lsx_vhsubw_d_w">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vhaddw_hu_bu : GCCBuiltin<"__builtin_lsx_vhaddw_hu_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhaddw_wu_hu : GCCBuiltin<"__builtin_lsx_vhaddw_wu_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhaddw_du_wu : GCCBuiltin<"__builtin_lsx_vhaddw_du_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vhsubw_hu_bu : GCCBuiltin<"__builtin_lsx_vhsubw_hu_bu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhsubw_wu_hu : GCCBuiltin<"__builtin_lsx_vhsubw_wu_hu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vhsubw_du_wu : GCCBuiltin<"__builtin_lsx_vhsubw_du_wu">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vadda_b : GCCBuiltin<"__builtin_lsx_vadda_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vadda_h : GCCBuiltin<"__builtin_lsx_vadda_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vadda_w : GCCBuiltin<"__builtin_lsx_vadda_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vadda_d : GCCBuiltin<"__builtin_lsx_vadda_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lsx_vabsd_b : GCCBuiltin<"__builtin_lsx_vabsd_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vabsd_h : GCCBuiltin<"__builtin_lsx_vabsd_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vabsd_w : GCCBuiltin<"__builtin_lsx_vabsd_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vabsd_d : GCCBuiltin<"__builtin_lsx_vabsd_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vabsd_bu : GCCBuiltin<"__builtin_lsx_vabsd_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vabsd_hu : GCCBuiltin<"__builtin_lsx_vabsd_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vabsd_wu : GCCBuiltin<"__builtin_lsx_vabsd_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vabsd_du : GCCBuiltin<"__builtin_lsx_vabsd_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vavg_b : GCCBuiltin<"__builtin_lsx_vavg_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavg_h : GCCBuiltin<"__builtin_lsx_vavg_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavg_w : GCCBuiltin<"__builtin_lsx_vavg_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavg_d : GCCBuiltin<"__builtin_lsx_vavg_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lsx_vavg_bu : GCCBuiltin<"__builtin_lsx_vavg_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavg_hu : GCCBuiltin<"__builtin_lsx_vavg_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavg_wu : GCCBuiltin<"__builtin_lsx_vavg_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavg_du : GCCBuiltin<"__builtin_lsx_vavg_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lsx_vavgr_b : GCCBuiltin<"__builtin_lsx_vavgr_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavgr_h : GCCBuiltin<"__builtin_lsx_vavgr_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavgr_w : GCCBuiltin<"__builtin_lsx_vavgr_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavgr_d : GCCBuiltin<"__builtin_lsx_vavgr_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lsx_vavgr_bu : GCCBuiltin<"__builtin_lsx_vavgr_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavgr_hu : GCCBuiltin<"__builtin_lsx_vavgr_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavgr_wu : GCCBuiltin<"__builtin_lsx_vavgr_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vavgr_du : GCCBuiltin<"__builtin_lsx_vavgr_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrlr_b : GCCBuiltin<"__builtin_lsx_vsrlr_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlr_h : GCCBuiltin<"__builtin_lsx_vsrlr_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlr_w : GCCBuiltin<"__builtin_lsx_vsrlr_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlr_d : GCCBuiltin<"__builtin_lsx_vsrlr_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrar_b : GCCBuiltin<"__builtin_lsx_vsrar_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrar_h : GCCBuiltin<"__builtin_lsx_vsrar_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrar_w : GCCBuiltin<"__builtin_lsx_vsrar_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrar_d : GCCBuiltin<"__builtin_lsx_vsrar_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfmax_s : GCCBuiltin<"__builtin_lsx_vfmax_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfmax_d : GCCBuiltin<"__builtin_lsx_vfmax_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfmin_s : GCCBuiltin<"__builtin_lsx_vfmin_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfmin_d : GCCBuiltin<"__builtin_lsx_vfmin_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfmaxa_s : GCCBuiltin<"__builtin_lsx_vfmaxa_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfmaxa_d : GCCBuiltin<"__builtin_lsx_vfmaxa_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfmina_s : GCCBuiltin<"__builtin_lsx_vfmina_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfmina_d : GCCBuiltin<"__builtin_lsx_vfmina_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfclass_s : GCCBuiltin<"__builtin_lsx_vfclass_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfclass_d : GCCBuiltin<"__builtin_lsx_vfclass_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfrecip_s : GCCBuiltin<"__builtin_lsx_vfrecip_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfrecip_d : GCCBuiltin<"__builtin_lsx_vfrecip_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfrsqrt_s : GCCBuiltin<"__builtin_lsx_vfrsqrt_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfrsqrt_d : GCCBuiltin<"__builtin_lsx_vfrsqrt_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcvtl_s_h : GCCBuiltin<"__builtin_lsx_vfcvtl_s_h">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcvtl_d_s : GCCBuiltin<"__builtin_lsx_vfcvtl_d_s">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfcvth_s_h : GCCBuiltin<"__builtin_lsx_vfcvth_s_h">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfcvth_d_s : GCCBuiltin<"__builtin_lsx_vfcvth_d_s">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftint_w_s : GCCBuiltin<"__builtin_lsx_vftint_w_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftint_l_d : GCCBuiltin<"__builtin_lsx_vftint_l_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftint_wu_s : GCCBuiltin<"__builtin_lsx_vftint_wu_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftint_lu_d : GCCBuiltin<"__builtin_lsx_vftint_lu_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrlri_b : GCCBuiltin<"__builtin_lsx_vsrlri_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlri_h : GCCBuiltin<"__builtin_lsx_vsrlri_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlri_w : GCCBuiltin<"__builtin_lsx_vsrlri_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlri_d : GCCBuiltin<"__builtin_lsx_vsrlri_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrari_b : GCCBuiltin<"__builtin_lsx_vsrari_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrari_h : GCCBuiltin<"__builtin_lsx_vsrari_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrari_w : GCCBuiltin<"__builtin_lsx_vsrari_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrari_d : GCCBuiltin<"__builtin_lsx_vsrari_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsat_b : GCCBuiltin<"__builtin_lsx_vsat_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsat_h : GCCBuiltin<"__builtin_lsx_vsat_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsat_w : GCCBuiltin<"__builtin_lsx_vsat_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsat_d : GCCBuiltin<"__builtin_lsx_vsat_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsat_bu : GCCBuiltin<"__builtin_lsx_vsat_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsat_hu : GCCBuiltin<"__builtin_lsx_vsat_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsat_wu : GCCBuiltin<"__builtin_lsx_vsat_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsat_du : GCCBuiltin<"__builtin_lsx_vsat_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrlni_b_h : GCCBuiltin<"__builtin_lsx_vsrlni_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlni_h_w : GCCBuiltin<"__builtin_lsx_vsrlni_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlni_w_d : GCCBuiltin<"__builtin_lsx_vsrlni_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrlni_d_q : GCCBuiltin<"__builtin_lsx_vsrlni_d_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrlni_b_h : GCCBuiltin<"__builtin_lsx_vssrlni_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlni_h_w : GCCBuiltin<"__builtin_lsx_vssrlni_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlni_w_d : GCCBuiltin<"__builtin_lsx_vssrlni_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlni_d_q : GCCBuiltin<"__builtin_lsx_vssrlni_d_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrlrni_bu_h : GCCBuiltin<"__builtin_lsx_vssrlrni_bu_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrni_hu_w : GCCBuiltin<"__builtin_lsx_vssrlrni_hu_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrni_wu_d : GCCBuiltin<"__builtin_lsx_vssrlrni_wu_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlrni_du_q : GCCBuiltin<"__builtin_lsx_vssrlrni_du_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrarni_b_h : GCCBuiltin<"__builtin_lsx_vsrarni_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrarni_h_w : GCCBuiltin<"__builtin_lsx_vsrarni_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrarni_w_d : GCCBuiltin<"__builtin_lsx_vsrarni_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrarni_d_q : GCCBuiltin<"__builtin_lsx_vsrarni_d_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrani_b_h : GCCBuiltin<"__builtin_lsx_vssrani_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrani_h_w : GCCBuiltin<"__builtin_lsx_vssrani_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrani_w_d : GCCBuiltin<"__builtin_lsx_vssrani_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrani_d_q : GCCBuiltin<"__builtin_lsx_vssrani_d_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrani_bu_h : GCCBuiltin<"__builtin_lsx_vssrani_bu_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrani_hu_w : GCCBuiltin<"__builtin_lsx_vssrani_hu_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrani_wu_d : GCCBuiltin<"__builtin_lsx_vssrani_wu_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrani_du_q : GCCBuiltin<"__builtin_lsx_vssrani_du_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrarni_b_h : GCCBuiltin<"__builtin_lsx_vssrarni_b_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarni_h_w : GCCBuiltin<"__builtin_lsx_vssrarni_h_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarni_w_d : GCCBuiltin<"__builtin_lsx_vssrarni_w_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarni_d_q : GCCBuiltin<"__builtin_lsx_vssrarni_d_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrarni_bu_h : GCCBuiltin<"__builtin_lsx_vssrarni_bu_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarni_hu_w : GCCBuiltin<"__builtin_lsx_vssrarni_hu_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarni_wu_d : GCCBuiltin<"__builtin_lsx_vssrarni_wu_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrarni_du_q : GCCBuiltin<"__builtin_lsx_vssrarni_du_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vssrlni_bu_h : GCCBuiltin<"__builtin_lsx_vssrlni_bu_h">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlni_hu_w : GCCBuiltin<"__builtin_lsx_vssrlni_hu_w">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlni_wu_d : GCCBuiltin<"__builtin_lsx_vssrlni_wu_d">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vssrlni_du_q : GCCBuiltin<"__builtin_lsx_vssrlni_du_q">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vseq_b : GCCBuiltin<"__builtin_lsx_vseq_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vseq_h : GCCBuiltin<"__builtin_lsx_vseq_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vseq_w : GCCBuiltin<"__builtin_lsx_vseq_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vseq_d : GCCBuiltin<"__builtin_lsx_vseq_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsle_b : GCCBuiltin<"__builtin_lsx_vsle_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsle_h : GCCBuiltin<"__builtin_lsx_vsle_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsle_w : GCCBuiltin<"__builtin_lsx_vsle_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsle_d : GCCBuiltin<"__builtin_lsx_vsle_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsle_bu : GCCBuiltin<"__builtin_lsx_vsle_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsle_hu : GCCBuiltin<"__builtin_lsx_vsle_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsle_wu : GCCBuiltin<"__builtin_lsx_vsle_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsle_du : GCCBuiltin<"__builtin_lsx_vsle_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vslt_b : GCCBuiltin<"__builtin_lsx_vslt_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslt_h : GCCBuiltin<"__builtin_lsx_vslt_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslt_w : GCCBuiltin<"__builtin_lsx_vslt_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslt_d : GCCBuiltin<"__builtin_lsx_vslt_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vslt_bu : GCCBuiltin<"__builtin_lsx_vslt_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslt_hu : GCCBuiltin<"__builtin_lsx_vslt_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslt_wu : GCCBuiltin<"__builtin_lsx_vslt_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslt_du : GCCBuiltin<"__builtin_lsx_vslt_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vadd_b : GCCBuiltin<"__builtin_lsx_vadd_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vadd_h : GCCBuiltin<"__builtin_lsx_vadd_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vadd_w : GCCBuiltin<"__builtin_lsx_vadd_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vadd_d : GCCBuiltin<"__builtin_lsx_vadd_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lsx_vsub_b : GCCBuiltin<"__builtin_lsx_vsub_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsub_h : GCCBuiltin<"__builtin_lsx_vsub_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsub_w : GCCBuiltin<"__builtin_lsx_vsub_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsub_d : GCCBuiltin<"__builtin_lsx_vsub_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmax_b : GCCBuiltin<"__builtin_lsx_vmax_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmax_h : GCCBuiltin<"__builtin_lsx_vmax_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmax_w : GCCBuiltin<"__builtin_lsx_vmax_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmax_d : GCCBuiltin<"__builtin_lsx_vmax_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmin_b : GCCBuiltin<"__builtin_lsx_vmin_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmin_h : GCCBuiltin<"__builtin_lsx_vmin_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmin_w : GCCBuiltin<"__builtin_lsx_vmin_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmin_d : GCCBuiltin<"__builtin_lsx_vmin_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmax_bu : GCCBuiltin<"__builtin_lsx_vmax_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmax_hu : GCCBuiltin<"__builtin_lsx_vmax_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmax_wu : GCCBuiltin<"__builtin_lsx_vmax_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmax_du : GCCBuiltin<"__builtin_lsx_vmax_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmin_bu : GCCBuiltin<"__builtin_lsx_vmin_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmin_hu : GCCBuiltin<"__builtin_lsx_vmin_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmin_wu : GCCBuiltin<"__builtin_lsx_vmin_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmin_du : GCCBuiltin<"__builtin_lsx_vmin_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmul_b : GCCBuiltin<"__builtin_lsx_vmul_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmul_h : GCCBuiltin<"__builtin_lsx_vmul_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmul_w : GCCBuiltin<"__builtin_lsx_vmul_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmul_d : GCCBuiltin<"__builtin_lsx_vmul_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmadd_b : GCCBuiltin<"__builtin_lsx_vmadd_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vmadd_h : GCCBuiltin<"__builtin_lsx_vmadd_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vmadd_w : GCCBuiltin<"__builtin_lsx_vmadd_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vmadd_d : GCCBuiltin<"__builtin_lsx_vmadd_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmsub_b : GCCBuiltin<"__builtin_lsx_vmsub_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vmsub_h : GCCBuiltin<"__builtin_lsx_vmsub_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vmsub_w : GCCBuiltin<"__builtin_lsx_vmsub_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vmsub_d : GCCBuiltin<"__builtin_lsx_vmsub_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lsx_vdiv_b : GCCBuiltin<"__builtin_lsx_vdiv_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vdiv_h : GCCBuiltin<"__builtin_lsx_vdiv_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vdiv_w : GCCBuiltin<"__builtin_lsx_vdiv_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vdiv_d : GCCBuiltin<"__builtin_lsx_vdiv_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmod_b : GCCBuiltin<"__builtin_lsx_vmod_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmod_h : GCCBuiltin<"__builtin_lsx_vmod_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmod_w : GCCBuiltin<"__builtin_lsx_vmod_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmod_d : GCCBuiltin<"__builtin_lsx_vmod_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vdiv_bu : GCCBuiltin<"__builtin_lsx_vdiv_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vdiv_hu : GCCBuiltin<"__builtin_lsx_vdiv_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vdiv_wu : GCCBuiltin<"__builtin_lsx_vdiv_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vdiv_du : GCCBuiltin<"__builtin_lsx_vdiv_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsll_b : GCCBuiltin<"__builtin_lsx_vsll_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsll_h : GCCBuiltin<"__builtin_lsx_vsll_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsll_w : GCCBuiltin<"__builtin_lsx_vsll_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsll_d : GCCBuiltin<"__builtin_lsx_vsll_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrl_b : GCCBuiltin<"__builtin_lsx_vsrl_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrl_h : GCCBuiltin<"__builtin_lsx_vsrl_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrl_w : GCCBuiltin<"__builtin_lsx_vsrl_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrl_d : GCCBuiltin<"__builtin_lsx_vsrl_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vbitclr_b : GCCBuiltin<"__builtin_lsx_vbitclr_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitclr_h : GCCBuiltin<"__builtin_lsx_vbitclr_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitclr_w : GCCBuiltin<"__builtin_lsx_vbitclr_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitclr_d : GCCBuiltin<"__builtin_lsx_vbitclr_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vbitset_b : GCCBuiltin<"__builtin_lsx_vbitset_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitset_h : GCCBuiltin<"__builtin_lsx_vbitset_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitset_w : GCCBuiltin<"__builtin_lsx_vbitset_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitset_d : GCCBuiltin<"__builtin_lsx_vbitset_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vpackev_b : GCCBuiltin<"__builtin_lsx_vpackev_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpackev_h : GCCBuiltin<"__builtin_lsx_vpackev_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpackev_w : GCCBuiltin<"__builtin_lsx_vpackev_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpackev_d : GCCBuiltin<"__builtin_lsx_vpackev_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vpackod_b : GCCBuiltin<"__builtin_lsx_vpackod_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpackod_h : GCCBuiltin<"__builtin_lsx_vpackod_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpackod_w : GCCBuiltin<"__builtin_lsx_vpackod_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpackod_d : GCCBuiltin<"__builtin_lsx_vpackod_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vilvl_b : GCCBuiltin<"__builtin_lsx_vilvl_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vilvl_h : GCCBuiltin<"__builtin_lsx_vilvl_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vilvl_w : GCCBuiltin<"__builtin_lsx_vilvl_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vilvl_d : GCCBuiltin<"__builtin_lsx_vilvl_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vilvh_b : GCCBuiltin<"__builtin_lsx_vilvh_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vilvh_h : GCCBuiltin<"__builtin_lsx_vilvh_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vilvh_w : GCCBuiltin<"__builtin_lsx_vilvh_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vilvh_d : GCCBuiltin<"__builtin_lsx_vilvh_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vpickev_b : GCCBuiltin<"__builtin_lsx_vpickev_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickev_h : GCCBuiltin<"__builtin_lsx_vpickev_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickev_w : GCCBuiltin<"__builtin_lsx_vpickev_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickev_d : GCCBuiltin<"__builtin_lsx_vpickev_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vand_v : GCCBuiltin<"__builtin_lsx_vand_v">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vor_v : GCCBuiltin<"__builtin_lsx_vor_v">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vbitrev_b : GCCBuiltin<"__builtin_lsx_vbitrev_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitrev_h : GCCBuiltin<"__builtin_lsx_vbitrev_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitrev_w : GCCBuiltin<"__builtin_lsx_vbitrev_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vbitrev_d : GCCBuiltin<"__builtin_lsx_vbitrev_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmod_bu : GCCBuiltin<"__builtin_lsx_vmod_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmod_hu : GCCBuiltin<"__builtin_lsx_vmod_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmod_wu : GCCBuiltin<"__builtin_lsx_vmod_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmod_du : GCCBuiltin<"__builtin_lsx_vmod_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vpickod_b : GCCBuiltin<"__builtin_lsx_vpickod_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickod_h : GCCBuiltin<"__builtin_lsx_vpickod_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickod_w : GCCBuiltin<"__builtin_lsx_vpickod_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpickod_d : GCCBuiltin<"__builtin_lsx_vpickod_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vreplve_b : GCCBuiltin<"__builtin_lsx_vreplve_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplve_h : GCCBuiltin<"__builtin_lsx_vreplve_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplve_w : GCCBuiltin<"__builtin_lsx_vreplve_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplve_d : GCCBuiltin<"__builtin_lsx_vreplve_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsra_b : GCCBuiltin<"__builtin_lsx_vsra_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsra_h : GCCBuiltin<"__builtin_lsx_vsra_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsra_w : GCCBuiltin<"__builtin_lsx_vsra_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsra_d : GCCBuiltin<"__builtin_lsx_vsra_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vxor_v : GCCBuiltin<"__builtin_lsx_vxor_v">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vnor_v : GCCBuiltin<"__builtin_lsx_vnor_v">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfadd_s : GCCBuiltin<"__builtin_lsx_vfadd_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfadd_d : GCCBuiltin<"__builtin_lsx_vfadd_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfsub_s : GCCBuiltin<"__builtin_lsx_vfsub_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfsub_d : GCCBuiltin<"__builtin_lsx_vfsub_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfmul_s : GCCBuiltin<"__builtin_lsx_vfmul_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfmul_d : GCCBuiltin<"__builtin_lsx_vfmul_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vshuf_h : GCCBuiltin<"__builtin_lsx_vshuf_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vshuf_w : GCCBuiltin<"__builtin_lsx_vshuf_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vshuf_d : GCCBuiltin<"__builtin_lsx_vshuf_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lsx_vseqi_b : GCCBuiltin<"__builtin_lsx_vseqi_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vseqi_h : GCCBuiltin<"__builtin_lsx_vseqi_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vseqi_w : GCCBuiltin<"__builtin_lsx_vseqi_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vseqi_d : GCCBuiltin<"__builtin_lsx_vseqi_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vslei_b : GCCBuiltin<"__builtin_lsx_vslei_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslei_h : GCCBuiltin<"__builtin_lsx_vslei_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslei_w : GCCBuiltin<"__builtin_lsx_vslei_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslei_d : GCCBuiltin<"__builtin_lsx_vslei_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vslei_bu : GCCBuiltin<"__builtin_lsx_vslei_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslei_hu : GCCBuiltin<"__builtin_lsx_vslei_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslei_wu : GCCBuiltin<"__builtin_lsx_vslei_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslei_du : GCCBuiltin<"__builtin_lsx_vslei_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vslti_b : GCCBuiltin<"__builtin_lsx_vslti_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslti_h : GCCBuiltin<"__builtin_lsx_vslti_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslti_w : GCCBuiltin<"__builtin_lsx_vslti_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslti_d : GCCBuiltin<"__builtin_lsx_vslti_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vslti_bu : GCCBuiltin<"__builtin_lsx_vslti_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslti_hu : GCCBuiltin<"__builtin_lsx_vslti_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslti_wu : GCCBuiltin<"__builtin_lsx_vslti_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslti_du : GCCBuiltin<"__builtin_lsx_vslti_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vaddi_bu : GCCBuiltin<"__builtin_lsx_vaddi_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vaddi_hu : GCCBuiltin<"__builtin_lsx_vaddi_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vaddi_wu : GCCBuiltin<"__builtin_lsx_vaddi_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lsx_vaddi_du : GCCBuiltin<"__builtin_lsx_vaddi_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lsx_vsubi_bu : GCCBuiltin<"__builtin_lsx_vsubi_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubi_hu : GCCBuiltin<"__builtin_lsx_vsubi_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubi_wu : GCCBuiltin<"__builtin_lsx_vsubi_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsubi_du : GCCBuiltin<"__builtin_lsx_vsubi_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmaxi_b : GCCBuiltin<"__builtin_lsx_vmaxi_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaxi_h : GCCBuiltin<"__builtin_lsx_vmaxi_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaxi_w : GCCBuiltin<"__builtin_lsx_vmaxi_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaxi_d : GCCBuiltin<"__builtin_lsx_vmaxi_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmini_b : GCCBuiltin<"__builtin_lsx_vmini_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmini_h : GCCBuiltin<"__builtin_lsx_vmini_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmini_w : GCCBuiltin<"__builtin_lsx_vmini_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmini_d : GCCBuiltin<"__builtin_lsx_vmini_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmaxi_bu : GCCBuiltin<"__builtin_lsx_vmaxi_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaxi_hu : GCCBuiltin<"__builtin_lsx_vmaxi_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaxi_wu : GCCBuiltin<"__builtin_lsx_vmaxi_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmaxi_du : GCCBuiltin<"__builtin_lsx_vmaxi_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vmini_bu : GCCBuiltin<"__builtin_lsx_vmini_bu">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmini_hu : GCCBuiltin<"__builtin_lsx_vmini_hu">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmini_wu : GCCBuiltin<"__builtin_lsx_vmini_wu">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vmini_du : GCCBuiltin<"__builtin_lsx_vmini_du">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vclz_b : GCCBuiltin<"__builtin_lsx_vclz_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vclz_h : GCCBuiltin<"__builtin_lsx_vclz_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vclz_w : GCCBuiltin<"__builtin_lsx_vclz_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vclz_d : GCCBuiltin<"__builtin_lsx_vclz_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vpcnt_b : GCCBuiltin<"__builtin_lsx_vpcnt_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpcnt_h : GCCBuiltin<"__builtin_lsx_vpcnt_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpcnt_w : GCCBuiltin<"__builtin_lsx_vpcnt_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vpcnt_d : GCCBuiltin<"__builtin_lsx_vpcnt_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfsqrt_s : GCCBuiltin<"__builtin_lsx_vfsqrt_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfsqrt_d : GCCBuiltin<"__builtin_lsx_vfsqrt_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfrint_s : GCCBuiltin<"__builtin_lsx_vfrint_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfrint_d : GCCBuiltin<"__builtin_lsx_vfrint_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vffint_s_w : GCCBuiltin<"__builtin_lsx_vffint_s_w">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vffint_d_l : GCCBuiltin<"__builtin_lsx_vffint_d_l">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vffint_s_wu : GCCBuiltin<"__builtin_lsx_vffint_s_wu">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vffint_d_lu : GCCBuiltin<"__builtin_lsx_vffint_d_lu">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vftintrz_wu_s : GCCBuiltin<"__builtin_lsx_vftintrz_wu_s">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vftintrz_lu_d : GCCBuiltin<"__builtin_lsx_vftintrz_lu_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vreplgr2vr_b : GCCBuiltin<"__builtin_lsx_vreplgr2vr_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplgr2vr_h : GCCBuiltin<"__builtin_lsx_vreplgr2vr_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplgr2vr_w : GCCBuiltin<"__builtin_lsx_vreplgr2vr_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vreplgr2vr_d : GCCBuiltin<"__builtin_lsx_vreplgr2vr_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vinsgr2vr_b : GCCBuiltin<"__builtin_lsx_vinsgr2vr_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vinsgr2vr_h : GCCBuiltin<"__builtin_lsx_vinsgr2vr_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vinsgr2vr_w : GCCBuiltin<"__builtin_lsx_vinsgr2vr_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lsx_vinsgr2vr_d : GCCBuiltin<"__builtin_lsx_vinsgr2vr_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lsx_vfdiv_s : GCCBuiltin<"__builtin_lsx_vfdiv_s">, ++ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vfdiv_d : GCCBuiltin<"__builtin_lsx_vfdiv_d">, ++ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vslli_b : GCCBuiltin<"__builtin_lsx_vslli_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslli_h : GCCBuiltin<"__builtin_lsx_vslli_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslli_w : GCCBuiltin<"__builtin_lsx_vslli_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vslli_d : GCCBuiltin<"__builtin_lsx_vslli_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrli_b : GCCBuiltin<"__builtin_lsx_vsrli_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrli_h : GCCBuiltin<"__builtin_lsx_vsrli_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrli_w : GCCBuiltin<"__builtin_lsx_vsrli_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrli_d : GCCBuiltin<"__builtin_lsx_vsrli_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vsrai_b : GCCBuiltin<"__builtin_lsx_vsrai_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrai_h : GCCBuiltin<"__builtin_lsx_vsrai_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrai_w : GCCBuiltin<"__builtin_lsx_vsrai_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vsrai_d : GCCBuiltin<"__builtin_lsx_vsrai_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vshuf4i_b : GCCBuiltin<"__builtin_lsx_vshuf4i_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vshuf4i_h : GCCBuiltin<"__builtin_lsx_vshuf4i_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vshuf4i_w : GCCBuiltin<"__builtin_lsx_vshuf4i_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vshuf4i_d : GCCBuiltin<"__builtin_lsx_vshuf4i_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vrotr_b : GCCBuiltin<"__builtin_lsx_vrotr_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vrotr_h : GCCBuiltin<"__builtin_lsx_vrotr_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vrotr_w : GCCBuiltin<"__builtin_lsx_vrotr_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vrotr_d : GCCBuiltin<"__builtin_lsx_vrotr_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vrotri_b : GCCBuiltin<"__builtin_lsx_vrotri_b">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vrotri_h : GCCBuiltin<"__builtin_lsx_vrotri_h">, ++ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vrotri_w : GCCBuiltin<"__builtin_lsx_vrotri_w">, ++ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_vrotri_d : GCCBuiltin<"__builtin_lsx_vrotri_d">, ++ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_vld : GCCBuiltin<"__builtin_lsx_vld">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly]>; ++ ++def int_loongarch_lsx_vst : GCCBuiltin<"__builtin_lsx_vst">, ++ Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty], ++ [IntrArgMemOnly]>; ++ ++def int_loongarch_lsx_bz_v : GCCBuiltin<"__builtin_lsx_bz_v">, ++ Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_bnz_v : GCCBuiltin<"__builtin_lsx_bnz_v">, ++ Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_bz_b : GCCBuiltin<"__builtin_lsx_bz_b">, ++ Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_bz_h : GCCBuiltin<"__builtin_lsx_bz_h">, ++ Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_bz_w : GCCBuiltin<"__builtin_lsx_bz_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_bz_d : GCCBuiltin<"__builtin_lsx_bz_d">, ++ Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lsx_bnz_b : GCCBuiltin<"__builtin_lsx_bnz_b">, ++ Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; ++def int_loongarch_lsx_bnz_h : GCCBuiltin<"__builtin_lsx_bnz_h">, ++ Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; ++def int_loongarch_lsx_bnz_w : GCCBuiltin<"__builtin_lsx_bnz_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; ++def int_loongarch_lsx_bnz_d : GCCBuiltin<"__builtin_lsx_bnz_d">, ++ Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>; ++ ++//===----------------------------------------------------------------------===// ++//LoongArch LASX ++ ++def int_loongarch_lasx_xvfmadd_s : GCCBuiltin<"__builtin_lasx_xvfmadd_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvfmadd_d : GCCBuiltin<"__builtin_lasx_xvfmadd_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfmsub_s : GCCBuiltin<"__builtin_lasx_xvfmsub_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvfmsub_d : GCCBuiltin<"__builtin_lasx_xvfmsub_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++ ++def int_loongarch_lasx_xvfnmadd_s : GCCBuiltin<"__builtin_lasx_xvfnmadd_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvfnmadd_d : GCCBuiltin<"__builtin_lasx_xvfnmadd_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfnmsub_s : GCCBuiltin<"__builtin_lasx_xvfnmsub_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvfnmsub_d : GCCBuiltin<"__builtin_lasx_xvfnmsub_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvclo_b : GCCBuiltin<"__builtin_lasx_xvclo_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvclo_h : GCCBuiltin<"__builtin_lasx_xvclo_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvclo_w : GCCBuiltin<"__builtin_lasx_xvclo_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvclo_d : GCCBuiltin<"__builtin_lasx_xvclo_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvflogb_s : GCCBuiltin<"__builtin_lasx_xvflogb_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvflogb_d : GCCBuiltin<"__builtin_lasx_xvflogb_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpickve2gr_w : GCCBuiltin<"__builtin_lasx_xvpickve2gr_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpickve2gr_d : GCCBuiltin<"__builtin_lasx_xvpickve2gr_d">, ++ Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpickve2gr_wu : GCCBuiltin<"__builtin_lasx_xvpickve2gr_wu">, ++ Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpickve2gr_du : GCCBuiltin<"__builtin_lasx_xvpickve2gr_du">, ++ Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmskltz_b : GCCBuiltin<"__builtin_lasx_xvmskltz_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmskltz_h : GCCBuiltin<"__builtin_lasx_xvmskltz_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmskltz_w : GCCBuiltin<"__builtin_lasx_xvmskltz_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmskltz_d : GCCBuiltin<"__builtin_lasx_xvmskltz_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_caf_s : GCCBuiltin<"__builtin_lasx_xvfcmp_caf_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_caf_d : GCCBuiltin<"__builtin_lasx_xvfcmp_caf_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_cor_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cor_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_cor_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cor_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_cun_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cun_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_cun_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cun_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_cune_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cune_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_cune_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cune_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_cueq_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cueq_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_cueq_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cueq_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_ceq_s : GCCBuiltin<"__builtin_lasx_xvfcmp_ceq_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_ceq_d : GCCBuiltin<"__builtin_lasx_xvfcmp_ceq_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_cne_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cne_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_cne_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cne_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_clt_s : GCCBuiltin<"__builtin_lasx_xvfcmp_clt_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_clt_d : GCCBuiltin<"__builtin_lasx_xvfcmp_clt_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_cult_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cult_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_cult_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cult_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_cle_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cle_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_cle_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cle_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_cule_s : GCCBuiltin<"__builtin_lasx_xvfcmp_cule_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_cule_d : GCCBuiltin<"__builtin_lasx_xvfcmp_cule_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_saf_s : GCCBuiltin<"__builtin_lasx_xvfcmp_saf_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_saf_d : GCCBuiltin<"__builtin_lasx_xvfcmp_saf_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_sor_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sor_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_sor_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sor_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_sun_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sun_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_sun_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sun_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_sune_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sune_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_sune_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sune_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_sueq_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sueq_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_sueq_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sueq_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_seq_s : GCCBuiltin<"__builtin_lasx_xvfcmp_seq_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_seq_d : GCCBuiltin<"__builtin_lasx_xvfcmp_seq_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_sne_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sne_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_sne_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sne_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_slt_s : GCCBuiltin<"__builtin_lasx_xvfcmp_slt_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_slt_d : GCCBuiltin<"__builtin_lasx_xvfcmp_slt_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_sult_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sult_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_sult_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sult_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_sle_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sle_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_sle_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sle_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcmp_sule_s : GCCBuiltin<"__builtin_lasx_xvfcmp_sule_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcmp_sule_d : GCCBuiltin<"__builtin_lasx_xvfcmp_sule_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvbitsel_v : GCCBuiltin<"__builtin_lasx_xvbitsel_v">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvshuf_b : GCCBuiltin<"__builtin_lasx_xvshuf_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvldrepl_b : GCCBuiltin<"__builtin_lasx_xvldrepl_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; ++def int_loongarch_lasx_xvldrepl_h : GCCBuiltin<"__builtin_lasx_xvldrepl_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; ++def int_loongarch_lasx_xvldrepl_w : GCCBuiltin<"__builtin_lasx_xvldrepl_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; ++def int_loongarch_lasx_xvldrepl_d : GCCBuiltin<"__builtin_lasx_xvldrepl_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; ++ ++def int_loongarch_lasx_xvstelm_b : GCCBuiltin<"__builtin_lasx_xvstelm_b">, ++ Intrinsic<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; ++def int_loongarch_lasx_xvstelm_h : GCCBuiltin<"__builtin_lasx_xvstelm_h">, ++ Intrinsic<[], [llvm_v16i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; ++def int_loongarch_lasx_xvstelm_w : GCCBuiltin<"__builtin_lasx_xvstelm_w">, ++ Intrinsic<[], [llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; ++def int_loongarch_lasx_xvstelm_d : GCCBuiltin<"__builtin_lasx_xvstelm_d">, ++ Intrinsic<[], [llvm_v4i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; ++ ++def int_loongarch_lasx_xvldx : GCCBuiltin<"__builtin_lasx_xvldx">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i64_ty], ++ [IntrReadMem, IntrArgMemOnly]>; ++ ++def int_loongarch_lasx_xvstx : GCCBuiltin<"__builtin_lasx_xvstx">, ++ Intrinsic<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i64_ty], ++ [IntrArgMemOnly]>; ++ ++def int_loongarch_lasx_xvaddwev_d_w : GCCBuiltin<"__builtin_lasx_xvaddwev_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwev_w_h : GCCBuiltin<"__builtin_lasx_xvaddwev_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwev_h_b : GCCBuiltin<"__builtin_lasx_xvaddwev_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwev_q_d : GCCBuiltin<"__builtin_lasx_xvaddwev_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsubwev_d_w : GCCBuiltin<"__builtin_lasx_xvsubwev_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwev_w_h : GCCBuiltin<"__builtin_lasx_xvsubwev_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwev_h_b : GCCBuiltin<"__builtin_lasx_xvsubwev_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwev_q_d : GCCBuiltin<"__builtin_lasx_xvsubwev_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvaddwod_d_w : GCCBuiltin<"__builtin_lasx_xvaddwod_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwod_w_h : GCCBuiltin<"__builtin_lasx_xvaddwod_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwod_h_b : GCCBuiltin<"__builtin_lasx_xvaddwod_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwod_q_d : GCCBuiltin<"__builtin_lasx_xvaddwod_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsubwod_d_w : GCCBuiltin<"__builtin_lasx_xvsubwod_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwod_w_h : GCCBuiltin<"__builtin_lasx_xvsubwod_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwod_h_b : GCCBuiltin<"__builtin_lasx_xvsubwod_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwod_q_d : GCCBuiltin<"__builtin_lasx_xvsubwod_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvaddwev_d_wu : GCCBuiltin<"__builtin_lasx_xvaddwev_d_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwev_w_hu : GCCBuiltin<"__builtin_lasx_xvaddwev_w_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwev_h_bu : GCCBuiltin<"__builtin_lasx_xvaddwev_h_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwev_q_du : GCCBuiltin<"__builtin_lasx_xvaddwev_q_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsubwev_d_wu : GCCBuiltin<"__builtin_lasx_xvsubwev_d_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwev_w_hu : GCCBuiltin<"__builtin_lasx_xvsubwev_w_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwev_h_bu : GCCBuiltin<"__builtin_lasx_xvsubwev_h_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwev_q_du : GCCBuiltin<"__builtin_lasx_xvsubwev_q_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvaddwod_d_wu : GCCBuiltin<"__builtin_lasx_xvaddwod_d_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwod_w_hu : GCCBuiltin<"__builtin_lasx_xvaddwod_w_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwod_h_bu : GCCBuiltin<"__builtin_lasx_xvaddwod_h_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwod_q_du : GCCBuiltin<"__builtin_lasx_xvaddwod_q_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsubwod_d_wu : GCCBuiltin<"__builtin_lasx_xvsubwod_d_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwod_w_hu : GCCBuiltin<"__builtin_lasx_xvsubwod_w_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwod_h_bu : GCCBuiltin<"__builtin_lasx_xvsubwod_h_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubwod_q_du : GCCBuiltin<"__builtin_lasx_xvsubwod_q_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvaddwev_d_wu_w : GCCBuiltin<"__builtin_lasx_xvaddwev_d_wu_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwev_w_hu_h : GCCBuiltin<"__builtin_lasx_xvaddwev_w_hu_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwev_h_bu_b : GCCBuiltin<"__builtin_lasx_xvaddwev_h_bu_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwev_q_du_d : GCCBuiltin<"__builtin_lasx_xvaddwev_q_du_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvaddwod_d_wu_w : GCCBuiltin<"__builtin_lasx_xvaddwod_d_wu_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwod_w_hu_h : GCCBuiltin<"__builtin_lasx_xvaddwod_w_hu_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwod_h_bu_b : GCCBuiltin<"__builtin_lasx_xvaddwod_h_bu_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvaddwod_q_du_d : GCCBuiltin<"__builtin_lasx_xvaddwod_q_du_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvhaddw_qu_du : GCCBuiltin<"__builtin_lasx_xvhaddw_qu_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhsubw_qu_du : GCCBuiltin<"__builtin_lasx_xvhsubw_qu_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvhaddw_q_d : GCCBuiltin<"__builtin_lasx_xvhaddw_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhsubw_q_d : GCCBuiltin<"__builtin_lasx_xvhsubw_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmuh_b : GCCBuiltin<"__builtin_lasx_xvmuh_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmuh_h : GCCBuiltin<"__builtin_lasx_xvmuh_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmuh_w : GCCBuiltin<"__builtin_lasx_xvmuh_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmuh_d : GCCBuiltin<"__builtin_lasx_xvmuh_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmuh_bu : GCCBuiltin<"__builtin_lasx_xvmuh_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmuh_hu : GCCBuiltin<"__builtin_lasx_xvmuh_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmuh_wu : GCCBuiltin<"__builtin_lasx_xvmuh_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmuh_du : GCCBuiltin<"__builtin_lasx_xvmuh_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmulwev_d_w : GCCBuiltin<"__builtin_lasx_xvmulwev_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwev_w_h : GCCBuiltin<"__builtin_lasx_xvmulwev_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwev_h_b : GCCBuiltin<"__builtin_lasx_xvmulwev_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwev_q_d : GCCBuiltin<"__builtin_lasx_xvmulwev_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmulwod_d_w : GCCBuiltin<"__builtin_lasx_xvmulwod_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwod_w_h : GCCBuiltin<"__builtin_lasx_xvmulwod_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwod_h_b : GCCBuiltin<"__builtin_lasx_xvmulwod_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwod_q_d : GCCBuiltin<"__builtin_lasx_xvmulwod_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmulwev_d_wu : GCCBuiltin<"__builtin_lasx_xvmulwev_d_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwev_w_hu : GCCBuiltin<"__builtin_lasx_xvmulwev_w_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwev_h_bu : GCCBuiltin<"__builtin_lasx_xvmulwev_h_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwev_q_du : GCCBuiltin<"__builtin_lasx_xvmulwev_q_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmulwod_d_wu : GCCBuiltin<"__builtin_lasx_xvmulwod_d_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwod_w_hu : GCCBuiltin<"__builtin_lasx_xvmulwod_w_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwod_h_bu : GCCBuiltin<"__builtin_lasx_xvmulwod_h_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwod_q_du : GCCBuiltin<"__builtin_lasx_xvmulwod_q_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmulwev_d_wu_w : GCCBuiltin<"__builtin_lasx_xvmulwev_d_wu_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwev_w_hu_h : GCCBuiltin<"__builtin_lasx_xvmulwev_w_hu_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwev_h_bu_b : GCCBuiltin<"__builtin_lasx_xvmulwev_h_bu_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwev_q_du_d : GCCBuiltin<"__builtin_lasx_xvmulwev_q_du_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmulwod_d_wu_w : GCCBuiltin<"__builtin_lasx_xvmulwod_d_wu_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwod_w_hu_h : GCCBuiltin<"__builtin_lasx_xvmulwod_w_hu_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwod_h_bu_b : GCCBuiltin<"__builtin_lasx_xvmulwod_h_bu_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmulwod_q_du_d : GCCBuiltin<"__builtin_lasx_xvmulwod_q_du_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmaddwev_d_w : GCCBuiltin<"__builtin_lasx_xvmaddwev_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwev_w_h : GCCBuiltin<"__builtin_lasx_xvmaddwev_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwev_h_b : GCCBuiltin<"__builtin_lasx_xvmaddwev_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwev_q_d : GCCBuiltin<"__builtin_lasx_xvmaddwev_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmaddwod_d_w : GCCBuiltin<"__builtin_lasx_xvmaddwod_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwod_w_h : GCCBuiltin<"__builtin_lasx_xvmaddwod_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwod_h_b : GCCBuiltin<"__builtin_lasx_xvmaddwod_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwod_q_d : GCCBuiltin<"__builtin_lasx_xvmaddwod_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmaddwev_d_wu : GCCBuiltin<"__builtin_lasx_xvmaddwev_d_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwev_w_hu : GCCBuiltin<"__builtin_lasx_xvmaddwev_w_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwev_h_bu : GCCBuiltin<"__builtin_lasx_xvmaddwev_h_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwev_q_du : GCCBuiltin<"__builtin_lasx_xvmaddwev_q_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmaddwod_d_wu : GCCBuiltin<"__builtin_lasx_xvmaddwod_d_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwod_w_hu : GCCBuiltin<"__builtin_lasx_xvmaddwod_w_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwod_h_bu : GCCBuiltin<"__builtin_lasx_xvmaddwod_h_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwod_q_du : GCCBuiltin<"__builtin_lasx_xvmaddwod_q_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmaddwev_d_wu_w : GCCBuiltin<"__builtin_lasx_xvmaddwev_d_wu_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwev_w_hu_h : GCCBuiltin<"__builtin_lasx_xvmaddwev_w_hu_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwev_h_bu_b : GCCBuiltin<"__builtin_lasx_xvmaddwev_h_bu_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwev_q_du_d : GCCBuiltin<"__builtin_lasx_xvmaddwev_q_du_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmaddwod_d_wu_w : GCCBuiltin<"__builtin_lasx_xvmaddwod_d_wu_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwod_w_hu_h : GCCBuiltin<"__builtin_lasx_xvmaddwod_w_hu_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwod_h_bu_b : GCCBuiltin<"__builtin_lasx_xvmaddwod_h_bu_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaddwod_q_du_d : GCCBuiltin<"__builtin_lasx_xvmaddwod_q_du_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrln_b_h : GCCBuiltin<"__builtin_lasx_xvsrln_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrln_h_w : GCCBuiltin<"__builtin_lasx_xvsrln_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrln_w_d : GCCBuiltin<"__builtin_lasx_xvsrln_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsran_b_h : GCCBuiltin<"__builtin_lasx_xvsran_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsran_h_w : GCCBuiltin<"__builtin_lasx_xvsran_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsran_w_d : GCCBuiltin<"__builtin_lasx_xvsran_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrlrn_b_h : GCCBuiltin<"__builtin_lasx_xvsrlrn_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlrn_h_w : GCCBuiltin<"__builtin_lasx_xvsrlrn_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlrn_w_d : GCCBuiltin<"__builtin_lasx_xvsrlrn_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrarn_b_h : GCCBuiltin<"__builtin_lasx_xvsrarn_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrarn_h_w : GCCBuiltin<"__builtin_lasx_xvsrarn_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrarn_w_d : GCCBuiltin<"__builtin_lasx_xvsrarn_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrln_b_h : GCCBuiltin<"__builtin_lasx_xvssrln_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrln_h_w : GCCBuiltin<"__builtin_lasx_xvssrln_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrln_w_d : GCCBuiltin<"__builtin_lasx_xvssrln_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssran_b_h : GCCBuiltin<"__builtin_lasx_xvssran_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssran_h_w : GCCBuiltin<"__builtin_lasx_xvssran_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssran_w_d : GCCBuiltin<"__builtin_lasx_xvssran_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrlrn_b_h : GCCBuiltin<"__builtin_lasx_xvssrlrn_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrn_h_w : GCCBuiltin<"__builtin_lasx_xvssrlrn_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrn_w_d : GCCBuiltin<"__builtin_lasx_xvssrlrn_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrarn_b_h : GCCBuiltin<"__builtin_lasx_xvssrarn_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarn_h_w : GCCBuiltin<"__builtin_lasx_xvssrarn_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarn_w_d : GCCBuiltin<"__builtin_lasx_xvssrarn_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrln_bu_h : GCCBuiltin<"__builtin_lasx_xvssrln_bu_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrln_hu_w : GCCBuiltin<"__builtin_lasx_xvssrln_hu_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrln_wu_d : GCCBuiltin<"__builtin_lasx_xvssrln_wu_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssran_bu_h : GCCBuiltin<"__builtin_lasx_xvssran_bu_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssran_hu_w : GCCBuiltin<"__builtin_lasx_xvssran_hu_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssran_wu_d : GCCBuiltin<"__builtin_lasx_xvssran_wu_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrlrn_bu_h : GCCBuiltin<"__builtin_lasx_xvssrlrn_bu_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrn_hu_w : GCCBuiltin<"__builtin_lasx_xvssrlrn_hu_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrn_wu_d : GCCBuiltin<"__builtin_lasx_xvssrlrn_wu_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrarn_bu_h : GCCBuiltin<"__builtin_lasx_xvssrarn_bu_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarn_hu_w : GCCBuiltin<"__builtin_lasx_xvssrarn_hu_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarn_wu_d : GCCBuiltin<"__builtin_lasx_xvssrarn_wu_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvandn_v : GCCBuiltin<"__builtin_lasx_xvandn_v">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvorn_v : GCCBuiltin<"__builtin_lasx_xvorn_v">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfrstp_b : GCCBuiltin<"__builtin_lasx_xvfrstp_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvfrstp_h : GCCBuiltin<"__builtin_lasx_xvfrstp_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvadd_q : GCCBuiltin<"__builtin_lasx_xvadd_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsub_q : GCCBuiltin<"__builtin_lasx_xvsub_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsigncov_b : GCCBuiltin<"__builtin_lasx_xvsigncov_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvsigncov_h : GCCBuiltin<"__builtin_lasx_xvsigncov_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvsigncov_w : GCCBuiltin<"__builtin_lasx_xvsigncov_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvsigncov_d : GCCBuiltin<"__builtin_lasx_xvsigncov_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcvt_h_s : GCCBuiltin<"__builtin_lasx_xvfcvt_h_s">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcvt_s_d : GCCBuiltin<"__builtin_lasx_xvfcvt_s_d">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvffint_s_l : GCCBuiltin<"__builtin_lasx_xvffint_s_l">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftint_w_d : GCCBuiltin<"__builtin_lasx_xvftint_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrz_w_d : GCCBuiltin<"__builtin_lasx_xvftintrz_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrp_w_d : GCCBuiltin<"__builtin_lasx_xvftintrp_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrm_w_d : GCCBuiltin<"__builtin_lasx_xvftintrm_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrne_w_d : GCCBuiltin<"__builtin_lasx_xvftintrne_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvbsrl_v : GCCBuiltin<"__builtin_lasx_xvbsrl_v">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbsll_v : GCCBuiltin<"__builtin_lasx_xvbsll_v">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfrstpi_b : GCCBuiltin<"__builtin_lasx_xvfrstpi_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfrstpi_h : GCCBuiltin<"__builtin_lasx_xvfrstpi_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvneg_b : GCCBuiltin<"__builtin_lasx_xvneg_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvneg_h : GCCBuiltin<"__builtin_lasx_xvneg_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvneg_w : GCCBuiltin<"__builtin_lasx_xvneg_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvneg_d : GCCBuiltin<"__builtin_lasx_xvneg_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmskgez_b : GCCBuiltin<"__builtin_lasx_xvmskgez_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmsknz_b : GCCBuiltin<"__builtin_lasx_xvmsknz_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfrintrm_s : GCCBuiltin<"__builtin_lasx_xvfrintrm_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfrintrm_d : GCCBuiltin<"__builtin_lasx_xvfrintrm_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfrintrp_s : GCCBuiltin<"__builtin_lasx_xvfrintrp_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfrintrp_d : GCCBuiltin<"__builtin_lasx_xvfrintrp_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfrintrz_s : GCCBuiltin<"__builtin_lasx_xvfrintrz_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfrintrz_d : GCCBuiltin<"__builtin_lasx_xvfrintrz_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfrintrne_s : GCCBuiltin<"__builtin_lasx_xvfrintrne_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfrintrne_d : GCCBuiltin<"__builtin_lasx_xvfrintrne_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvffinth_d_w : GCCBuiltin<"__builtin_lasx_xvffinth_d_w">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvffintl_d_w : GCCBuiltin<"__builtin_lasx_xvffintl_d_w">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrm_w_s : GCCBuiltin<"__builtin_lasx_xvftintrm_w_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrm_l_d : GCCBuiltin<"__builtin_lasx_xvftintrm_l_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrp_w_s : GCCBuiltin<"__builtin_lasx_xvftintrp_w_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrp_l_d : GCCBuiltin<"__builtin_lasx_xvftintrp_l_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrz_w_s : GCCBuiltin<"__builtin_lasx_xvftintrz_w_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrz_l_d : GCCBuiltin<"__builtin_lasx_xvftintrz_l_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrne_w_s : GCCBuiltin<"__builtin_lasx_xvftintrne_w_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrne_l_d : GCCBuiltin<"__builtin_lasx_xvftintrne_l_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftinth_l_s : GCCBuiltin<"__builtin_lasx_xvftinth_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintl_l_s : GCCBuiltin<"__builtin_lasx_xvftintl_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrmh_l_s : GCCBuiltin<"__builtin_lasx_xvftintrmh_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrml_l_s : GCCBuiltin<"__builtin_lasx_xvftintrml_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrph_l_s : GCCBuiltin<"__builtin_lasx_xvftintrph_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrpl_l_s : GCCBuiltin<"__builtin_lasx_xvftintrpl_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrzh_l_s : GCCBuiltin<"__builtin_lasx_xvftintrzh_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrzl_l_s : GCCBuiltin<"__builtin_lasx_xvftintrzl_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrneh_l_s : GCCBuiltin<"__builtin_lasx_xvftintrneh_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrnel_l_s : GCCBuiltin<"__builtin_lasx_xvftintrnel_l_s">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvexth_d_w : GCCBuiltin<"__builtin_lasx_xvexth_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvexth_w_h : GCCBuiltin<"__builtin_lasx_xvexth_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvexth_h_b : GCCBuiltin<"__builtin_lasx_xvexth_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvexth_q_d : GCCBuiltin<"__builtin_lasx_xvexth_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsllwil_d_w : GCCBuiltin<"__builtin_lasx_xvsllwil_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsllwil_w_h : GCCBuiltin<"__builtin_lasx_xvsllwil_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsllwil_h_b : GCCBuiltin<"__builtin_lasx_xvsllwil_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsllwil_du_wu : GCCBuiltin<"__builtin_lasx_xvsllwil_du_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsllwil_wu_hu : GCCBuiltin<"__builtin_lasx_xvsllwil_wu_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsllwil_hu_bu : GCCBuiltin<"__builtin_lasx_xvsllwil_hu_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvbitclri_b : GCCBuiltin<"__builtin_lasx_xvbitclri_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitclri_h : GCCBuiltin<"__builtin_lasx_xvbitclri_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitclri_w : GCCBuiltin<"__builtin_lasx_xvbitclri_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitclri_d : GCCBuiltin<"__builtin_lasx_xvbitclri_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvbitseti_b : GCCBuiltin<"__builtin_lasx_xvbitseti_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitseti_h : GCCBuiltin<"__builtin_lasx_xvbitseti_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitseti_w : GCCBuiltin<"__builtin_lasx_xvbitseti_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitseti_d : GCCBuiltin<"__builtin_lasx_xvbitseti_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvbitrevi_b : GCCBuiltin<"__builtin_lasx_xvbitrevi_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitrevi_h : GCCBuiltin<"__builtin_lasx_xvbitrevi_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitrevi_w : GCCBuiltin<"__builtin_lasx_xvbitrevi_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitrevi_d : GCCBuiltin<"__builtin_lasx_xvbitrevi_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrlrni_b_h : GCCBuiltin<"__builtin_lasx_xvssrlrni_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrni_h_w : GCCBuiltin<"__builtin_lasx_xvssrlrni_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrni_w_d : GCCBuiltin<"__builtin_lasx_xvssrlrni_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrni_d_q : GCCBuiltin<"__builtin_lasx_xvssrlrni_d_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrani_b_h : GCCBuiltin<"__builtin_lasx_xvsrani_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrani_h_w : GCCBuiltin<"__builtin_lasx_xvsrani_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrani_w_d : GCCBuiltin<"__builtin_lasx_xvsrani_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrani_d_q : GCCBuiltin<"__builtin_lasx_xvsrani_d_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvextrins_b : GCCBuiltin<"__builtin_lasx_xvextrins_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvextrins_h : GCCBuiltin<"__builtin_lasx_xvextrins_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvextrins_w : GCCBuiltin<"__builtin_lasx_xvextrins_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvextrins_d : GCCBuiltin<"__builtin_lasx_xvextrins_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvbitseli_b : GCCBuiltin<"__builtin_lasx_xvbitseli_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvandi_b : GCCBuiltin<"__builtin_lasx_xvandi_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvori_b : GCCBuiltin<"__builtin_lasx_xvori_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvxori_b : GCCBuiltin<"__builtin_lasx_xvxori_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvnori_b : GCCBuiltin<"__builtin_lasx_xvnori_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvldi : GCCBuiltin<"__builtin_lasx_xvldi">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpermi_w : GCCBuiltin<"__builtin_lasx_xvpermi_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsadd_b : GCCBuiltin<"__builtin_lasx_xvsadd_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvsadd_h : GCCBuiltin<"__builtin_lasx_xvsadd_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvsadd_w : GCCBuiltin<"__builtin_lasx_xvsadd_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvsadd_d : GCCBuiltin<"__builtin_lasx_xvsadd_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssub_b : GCCBuiltin<"__builtin_lasx_xvssub_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssub_h : GCCBuiltin<"__builtin_lasx_xvssub_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssub_w : GCCBuiltin<"__builtin_lasx_xvssub_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssub_d : GCCBuiltin<"__builtin_lasx_xvssub_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsadd_bu : GCCBuiltin<"__builtin_lasx_xvsadd_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvsadd_hu : GCCBuiltin<"__builtin_lasx_xvsadd_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvsadd_wu : GCCBuiltin<"__builtin_lasx_xvsadd_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvsadd_du : GCCBuiltin<"__builtin_lasx_xvsadd_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssub_bu : GCCBuiltin<"__builtin_lasx_xvssub_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssub_hu : GCCBuiltin<"__builtin_lasx_xvssub_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssub_wu : GCCBuiltin<"__builtin_lasx_xvssub_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssub_du : GCCBuiltin<"__builtin_lasx_xvssub_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvhaddw_h_b : GCCBuiltin<"__builtin_lasx_xvhaddw_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhaddw_w_h : GCCBuiltin<"__builtin_lasx_xvhaddw_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhaddw_d_w : GCCBuiltin<"__builtin_lasx_xvhaddw_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvhsubw_h_b : GCCBuiltin<"__builtin_lasx_xvhsubw_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhsubw_w_h : GCCBuiltin<"__builtin_lasx_xvhsubw_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhsubw_d_w : GCCBuiltin<"__builtin_lasx_xvhsubw_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvhaddw_hu_bu : GCCBuiltin<"__builtin_lasx_xvhaddw_hu_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhaddw_wu_hu : GCCBuiltin<"__builtin_lasx_xvhaddw_wu_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhaddw_du_wu : GCCBuiltin<"__builtin_lasx_xvhaddw_du_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvhsubw_hu_bu : GCCBuiltin<"__builtin_lasx_xvhsubw_hu_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhsubw_wu_hu : GCCBuiltin<"__builtin_lasx_xvhsubw_wu_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvhsubw_du_wu : GCCBuiltin<"__builtin_lasx_xvhsubw_du_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvadda_b : GCCBuiltin<"__builtin_lasx_xvadda_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvadda_h : GCCBuiltin<"__builtin_lasx_xvadda_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvadda_w : GCCBuiltin<"__builtin_lasx_xvadda_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvadda_d : GCCBuiltin<"__builtin_lasx_xvadda_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lasx_xvabsd_b : GCCBuiltin<"__builtin_lasx_xvabsd_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvabsd_h : GCCBuiltin<"__builtin_lasx_xvabsd_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvabsd_w : GCCBuiltin<"__builtin_lasx_xvabsd_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvabsd_d : GCCBuiltin<"__builtin_lasx_xvabsd_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvabsd_bu : GCCBuiltin<"__builtin_lasx_xvabsd_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvabsd_hu : GCCBuiltin<"__builtin_lasx_xvabsd_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvabsd_wu : GCCBuiltin<"__builtin_lasx_xvabsd_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvabsd_du : GCCBuiltin<"__builtin_lasx_xvabsd_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvavg_b : GCCBuiltin<"__builtin_lasx_xvavg_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavg_h : GCCBuiltin<"__builtin_lasx_xvavg_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavg_w : GCCBuiltin<"__builtin_lasx_xvavg_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavg_d : GCCBuiltin<"__builtin_lasx_xvavg_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lasx_xvavg_bu : GCCBuiltin<"__builtin_lasx_xvavg_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavg_hu : GCCBuiltin<"__builtin_lasx_xvavg_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavg_wu : GCCBuiltin<"__builtin_lasx_xvavg_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavg_du : GCCBuiltin<"__builtin_lasx_xvavg_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lasx_xvavgr_b : GCCBuiltin<"__builtin_lasx_xvavgr_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavgr_h : GCCBuiltin<"__builtin_lasx_xvavgr_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavgr_w : GCCBuiltin<"__builtin_lasx_xvavgr_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavgr_d : GCCBuiltin<"__builtin_lasx_xvavgr_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lasx_xvavgr_bu : GCCBuiltin<"__builtin_lasx_xvavgr_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavgr_hu : GCCBuiltin<"__builtin_lasx_xvavgr_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavgr_wu : GCCBuiltin<"__builtin_lasx_xvavgr_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvavgr_du : GCCBuiltin<"__builtin_lasx_xvavgr_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrlr_b : GCCBuiltin<"__builtin_lasx_xvsrlr_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlr_h : GCCBuiltin<"__builtin_lasx_xvsrlr_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlr_w : GCCBuiltin<"__builtin_lasx_xvsrlr_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlr_d : GCCBuiltin<"__builtin_lasx_xvsrlr_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrar_b : GCCBuiltin<"__builtin_lasx_xvsrar_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrar_h : GCCBuiltin<"__builtin_lasx_xvsrar_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrar_w : GCCBuiltin<"__builtin_lasx_xvsrar_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrar_d : GCCBuiltin<"__builtin_lasx_xvsrar_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfmax_s : GCCBuiltin<"__builtin_lasx_xvfmax_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfmax_d : GCCBuiltin<"__builtin_lasx_xvfmax_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfmin_s : GCCBuiltin<"__builtin_lasx_xvfmin_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfmin_d : GCCBuiltin<"__builtin_lasx_xvfmin_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfmaxa_s : GCCBuiltin<"__builtin_lasx_xvfmaxa_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfmaxa_d : GCCBuiltin<"__builtin_lasx_xvfmaxa_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfmina_s : GCCBuiltin<"__builtin_lasx_xvfmina_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfmina_d : GCCBuiltin<"__builtin_lasx_xvfmina_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfclass_s : GCCBuiltin<"__builtin_lasx_xvfclass_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfclass_d : GCCBuiltin<"__builtin_lasx_xvfclass_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfrecip_s : GCCBuiltin<"__builtin_lasx_xvfrecip_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfrecip_d : GCCBuiltin<"__builtin_lasx_xvfrecip_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfrsqrt_s : GCCBuiltin<"__builtin_lasx_xvfrsqrt_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfrsqrt_d : GCCBuiltin<"__builtin_lasx_xvfrsqrt_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcvtl_s_h : GCCBuiltin<"__builtin_lasx_xvfcvtl_s_h">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcvtl_d_s : GCCBuiltin<"__builtin_lasx_xvfcvtl_d_s">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfcvth_s_h : GCCBuiltin<"__builtin_lasx_xvfcvth_s_h">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfcvth_d_s : GCCBuiltin<"__builtin_lasx_xvfcvth_d_s">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftint_w_s : GCCBuiltin<"__builtin_lasx_xvftint_w_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftint_l_d : GCCBuiltin<"__builtin_lasx_xvftint_l_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftint_wu_s : GCCBuiltin<"__builtin_lasx_xvftint_wu_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftint_lu_d : GCCBuiltin<"__builtin_lasx_xvftint_lu_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrlri_b : GCCBuiltin<"__builtin_lasx_xvsrlri_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlri_h : GCCBuiltin<"__builtin_lasx_xvsrlri_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlri_w : GCCBuiltin<"__builtin_lasx_xvsrlri_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlri_d : GCCBuiltin<"__builtin_lasx_xvsrlri_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrari_b : GCCBuiltin<"__builtin_lasx_xvsrari_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrari_h : GCCBuiltin<"__builtin_lasx_xvsrari_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrari_w : GCCBuiltin<"__builtin_lasx_xvsrari_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrari_d : GCCBuiltin<"__builtin_lasx_xvsrari_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsat_b : GCCBuiltin<"__builtin_lasx_xvsat_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsat_h : GCCBuiltin<"__builtin_lasx_xvsat_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsat_w : GCCBuiltin<"__builtin_lasx_xvsat_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsat_d : GCCBuiltin<"__builtin_lasx_xvsat_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsat_bu : GCCBuiltin<"__builtin_lasx_xvsat_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsat_hu : GCCBuiltin<"__builtin_lasx_xvsat_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsat_wu : GCCBuiltin<"__builtin_lasx_xvsat_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsat_du : GCCBuiltin<"__builtin_lasx_xvsat_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrlni_b_h : GCCBuiltin<"__builtin_lasx_xvsrlni_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlni_h_w : GCCBuiltin<"__builtin_lasx_xvsrlni_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlni_w_d : GCCBuiltin<"__builtin_lasx_xvsrlni_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlni_d_q : GCCBuiltin<"__builtin_lasx_xvsrlni_d_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrlni_b_h : GCCBuiltin<"__builtin_lasx_xvssrlni_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlni_h_w : GCCBuiltin<"__builtin_lasx_xvssrlni_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlni_w_d : GCCBuiltin<"__builtin_lasx_xvssrlni_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlni_d_q : GCCBuiltin<"__builtin_lasx_xvssrlni_d_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrlrni_bu_h : GCCBuiltin<"__builtin_lasx_xvssrlrni_bu_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrni_hu_w : GCCBuiltin<"__builtin_lasx_xvssrlrni_hu_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrni_wu_d : GCCBuiltin<"__builtin_lasx_xvssrlrni_wu_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlrni_du_q : GCCBuiltin<"__builtin_lasx_xvssrlrni_du_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrarni_b_h : GCCBuiltin<"__builtin_lasx_xvsrarni_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrarni_h_w : GCCBuiltin<"__builtin_lasx_xvsrarni_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrarni_w_d : GCCBuiltin<"__builtin_lasx_xvsrarni_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrarni_d_q : GCCBuiltin<"__builtin_lasx_xvsrarni_d_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrani_b_h : GCCBuiltin<"__builtin_lasx_xvssrani_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrani_h_w : GCCBuiltin<"__builtin_lasx_xvssrani_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrani_w_d : GCCBuiltin<"__builtin_lasx_xvssrani_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrani_d_q : GCCBuiltin<"__builtin_lasx_xvssrani_d_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrani_bu_h : GCCBuiltin<"__builtin_lasx_xvssrani_bu_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrani_hu_w : GCCBuiltin<"__builtin_lasx_xvssrani_hu_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrani_wu_d : GCCBuiltin<"__builtin_lasx_xvssrani_wu_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrani_du_q : GCCBuiltin<"__builtin_lasx_xvssrani_du_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrarni_b_h : GCCBuiltin<"__builtin_lasx_xvssrarni_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarni_h_w : GCCBuiltin<"__builtin_lasx_xvssrarni_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarni_w_d : GCCBuiltin<"__builtin_lasx_xvssrarni_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarni_d_q : GCCBuiltin<"__builtin_lasx_xvssrarni_d_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrarni_bu_h : GCCBuiltin<"__builtin_lasx_xvssrarni_bu_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarni_hu_w : GCCBuiltin<"__builtin_lasx_xvssrarni_hu_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarni_wu_d : GCCBuiltin<"__builtin_lasx_xvssrarni_wu_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrarni_du_q : GCCBuiltin<"__builtin_lasx_xvssrarni_du_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvssrlni_bu_h : GCCBuiltin<"__builtin_lasx_xvssrlni_bu_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlni_hu_w : GCCBuiltin<"__builtin_lasx_xvssrlni_hu_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlni_wu_d : GCCBuiltin<"__builtin_lasx_xvssrlni_wu_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvssrlni_du_q : GCCBuiltin<"__builtin_lasx_xvssrlni_du_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvseq_b : GCCBuiltin<"__builtin_lasx_xvseq_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvseq_h : GCCBuiltin<"__builtin_lasx_xvseq_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvseq_w : GCCBuiltin<"__builtin_lasx_xvseq_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvseq_d : GCCBuiltin<"__builtin_lasx_xvseq_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsle_b : GCCBuiltin<"__builtin_lasx_xvsle_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsle_h : GCCBuiltin<"__builtin_lasx_xvsle_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsle_w : GCCBuiltin<"__builtin_lasx_xvsle_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsle_d : GCCBuiltin<"__builtin_lasx_xvsle_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsle_bu : GCCBuiltin<"__builtin_lasx_xvsle_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsle_hu : GCCBuiltin<"__builtin_lasx_xvsle_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsle_wu : GCCBuiltin<"__builtin_lasx_xvsle_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsle_du : GCCBuiltin<"__builtin_lasx_xvsle_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvslt_b : GCCBuiltin<"__builtin_lasx_xvslt_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslt_h : GCCBuiltin<"__builtin_lasx_xvslt_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslt_w : GCCBuiltin<"__builtin_lasx_xvslt_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslt_d : GCCBuiltin<"__builtin_lasx_xvslt_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvslt_bu : GCCBuiltin<"__builtin_lasx_xvslt_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslt_hu : GCCBuiltin<"__builtin_lasx_xvslt_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslt_wu : GCCBuiltin<"__builtin_lasx_xvslt_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslt_du : GCCBuiltin<"__builtin_lasx_xvslt_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvadd_b : GCCBuiltin<"__builtin_lasx_xvadd_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvadd_h : GCCBuiltin<"__builtin_lasx_xvadd_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvadd_w : GCCBuiltin<"__builtin_lasx_xvadd_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvadd_d : GCCBuiltin<"__builtin_lasx_xvadd_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsub_b : GCCBuiltin<"__builtin_lasx_xvsub_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsub_h : GCCBuiltin<"__builtin_lasx_xvsub_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsub_w : GCCBuiltin<"__builtin_lasx_xvsub_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsub_d : GCCBuiltin<"__builtin_lasx_xvsub_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmax_b : GCCBuiltin<"__builtin_lasx_xvmax_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmax_h : GCCBuiltin<"__builtin_lasx_xvmax_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmax_w : GCCBuiltin<"__builtin_lasx_xvmax_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmax_d : GCCBuiltin<"__builtin_lasx_xvmax_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmin_b : GCCBuiltin<"__builtin_lasx_xvmin_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmin_h : GCCBuiltin<"__builtin_lasx_xvmin_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmin_w : GCCBuiltin<"__builtin_lasx_xvmin_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmin_d : GCCBuiltin<"__builtin_lasx_xvmin_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmax_bu : GCCBuiltin<"__builtin_lasx_xvmax_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmax_hu : GCCBuiltin<"__builtin_lasx_xvmax_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmax_wu : GCCBuiltin<"__builtin_lasx_xvmax_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmax_du : GCCBuiltin<"__builtin_lasx_xvmax_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmin_bu : GCCBuiltin<"__builtin_lasx_xvmin_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmin_hu : GCCBuiltin<"__builtin_lasx_xvmin_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmin_wu : GCCBuiltin<"__builtin_lasx_xvmin_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmin_du : GCCBuiltin<"__builtin_lasx_xvmin_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmul_b : GCCBuiltin<"__builtin_lasx_xvmul_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmul_h : GCCBuiltin<"__builtin_lasx_xvmul_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmul_w : GCCBuiltin<"__builtin_lasx_xvmul_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmul_d : GCCBuiltin<"__builtin_lasx_xvmul_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmadd_b : GCCBuiltin<"__builtin_lasx_xvmadd_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvmadd_h : GCCBuiltin<"__builtin_lasx_xvmadd_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvmadd_w : GCCBuiltin<"__builtin_lasx_xvmadd_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvmadd_d : GCCBuiltin<"__builtin_lasx_xvmadd_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmsub_b : GCCBuiltin<"__builtin_lasx_xvmsub_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvmsub_h : GCCBuiltin<"__builtin_lasx_xvmsub_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvmsub_w : GCCBuiltin<"__builtin_lasx_xvmsub_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvmsub_d : GCCBuiltin<"__builtin_lasx_xvmsub_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvdiv_b : GCCBuiltin<"__builtin_lasx_xvdiv_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvdiv_h : GCCBuiltin<"__builtin_lasx_xvdiv_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvdiv_w : GCCBuiltin<"__builtin_lasx_xvdiv_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvdiv_d : GCCBuiltin<"__builtin_lasx_xvdiv_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmod_b : GCCBuiltin<"__builtin_lasx_xvmod_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmod_h : GCCBuiltin<"__builtin_lasx_xvmod_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmod_w : GCCBuiltin<"__builtin_lasx_xvmod_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmod_d : GCCBuiltin<"__builtin_lasx_xvmod_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvdiv_bu : GCCBuiltin<"__builtin_lasx_xvdiv_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvdiv_hu : GCCBuiltin<"__builtin_lasx_xvdiv_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvdiv_wu : GCCBuiltin<"__builtin_lasx_xvdiv_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvdiv_du : GCCBuiltin<"__builtin_lasx_xvdiv_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsll_b : GCCBuiltin<"__builtin_lasx_xvsll_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsll_h : GCCBuiltin<"__builtin_lasx_xvsll_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsll_w : GCCBuiltin<"__builtin_lasx_xvsll_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsll_d : GCCBuiltin<"__builtin_lasx_xvsll_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrl_b : GCCBuiltin<"__builtin_lasx_xvsrl_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrl_h : GCCBuiltin<"__builtin_lasx_xvsrl_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrl_w : GCCBuiltin<"__builtin_lasx_xvsrl_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrl_d : GCCBuiltin<"__builtin_lasx_xvsrl_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvbitclr_b : GCCBuiltin<"__builtin_lasx_xvbitclr_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitclr_h : GCCBuiltin<"__builtin_lasx_xvbitclr_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitclr_w : GCCBuiltin<"__builtin_lasx_xvbitclr_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitclr_d : GCCBuiltin<"__builtin_lasx_xvbitclr_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvbitset_b : GCCBuiltin<"__builtin_lasx_xvbitset_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitset_h : GCCBuiltin<"__builtin_lasx_xvbitset_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitset_w : GCCBuiltin<"__builtin_lasx_xvbitset_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitset_d : GCCBuiltin<"__builtin_lasx_xvbitset_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpackev_b : GCCBuiltin<"__builtin_lasx_xvpackev_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpackev_h : GCCBuiltin<"__builtin_lasx_xvpackev_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpackev_w : GCCBuiltin<"__builtin_lasx_xvpackev_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpackev_d : GCCBuiltin<"__builtin_lasx_xvpackev_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpackod_b : GCCBuiltin<"__builtin_lasx_xvpackod_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpackod_h : GCCBuiltin<"__builtin_lasx_xvpackod_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpackod_w : GCCBuiltin<"__builtin_lasx_xvpackod_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpackod_d : GCCBuiltin<"__builtin_lasx_xvpackod_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvilvl_b : GCCBuiltin<"__builtin_lasx_xvilvl_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvilvl_h : GCCBuiltin<"__builtin_lasx_xvilvl_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvilvl_w : GCCBuiltin<"__builtin_lasx_xvilvl_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvilvl_d : GCCBuiltin<"__builtin_lasx_xvilvl_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvilvh_b : GCCBuiltin<"__builtin_lasx_xvilvh_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvilvh_h : GCCBuiltin<"__builtin_lasx_xvilvh_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvilvh_w : GCCBuiltin<"__builtin_lasx_xvilvh_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvilvh_d : GCCBuiltin<"__builtin_lasx_xvilvh_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpickev_b : GCCBuiltin<"__builtin_lasx_xvpickev_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpickev_h : GCCBuiltin<"__builtin_lasx_xvpickev_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpickev_w : GCCBuiltin<"__builtin_lasx_xvpickev_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpickev_d : GCCBuiltin<"__builtin_lasx_xvpickev_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvand_v : GCCBuiltin<"__builtin_lasx_xvand_v">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvor_v : GCCBuiltin<"__builtin_lasx_xvor_v">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvbitrev_b : GCCBuiltin<"__builtin_lasx_xvbitrev_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitrev_h : GCCBuiltin<"__builtin_lasx_xvbitrev_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitrev_w : GCCBuiltin<"__builtin_lasx_xvbitrev_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvbitrev_d : GCCBuiltin<"__builtin_lasx_xvbitrev_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmod_bu : GCCBuiltin<"__builtin_lasx_xvmod_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmod_hu : GCCBuiltin<"__builtin_lasx_xvmod_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmod_wu : GCCBuiltin<"__builtin_lasx_xvmod_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmod_du : GCCBuiltin<"__builtin_lasx_xvmod_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpickod_b : GCCBuiltin<"__builtin_lasx_xvpickod_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpickod_h : GCCBuiltin<"__builtin_lasx_xvpickod_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpickod_w : GCCBuiltin<"__builtin_lasx_xvpickod_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpickod_d : GCCBuiltin<"__builtin_lasx_xvpickod_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvreplve_b : GCCBuiltin<"__builtin_lasx_xvreplve_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve_h : GCCBuiltin<"__builtin_lasx_xvreplve_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve_w : GCCBuiltin<"__builtin_lasx_xvreplve_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve_d : GCCBuiltin<"__builtin_lasx_xvreplve_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsra_b : GCCBuiltin<"__builtin_lasx_xvsra_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsra_h : GCCBuiltin<"__builtin_lasx_xvsra_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsra_w : GCCBuiltin<"__builtin_lasx_xvsra_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsra_d : GCCBuiltin<"__builtin_lasx_xvsra_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvxor_v : GCCBuiltin<"__builtin_lasx_xvxor_v">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvnor_v : GCCBuiltin<"__builtin_lasx_xvnor_v">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfadd_s : GCCBuiltin<"__builtin_lasx_xvfadd_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfadd_d : GCCBuiltin<"__builtin_lasx_xvfadd_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfsub_s : GCCBuiltin<"__builtin_lasx_xvfsub_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfsub_d : GCCBuiltin<"__builtin_lasx_xvfsub_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfmul_s : GCCBuiltin<"__builtin_lasx_xvfmul_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfmul_d : GCCBuiltin<"__builtin_lasx_xvfmul_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvshuf_h : GCCBuiltin<"__builtin_lasx_xvshuf_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvshuf_w : GCCBuiltin<"__builtin_lasx_xvshuf_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvshuf_d : GCCBuiltin<"__builtin_lasx_xvshuf_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvseqi_b : GCCBuiltin<"__builtin_lasx_xvseqi_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvseqi_h : GCCBuiltin<"__builtin_lasx_xvseqi_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvseqi_w : GCCBuiltin<"__builtin_lasx_xvseqi_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvseqi_d : GCCBuiltin<"__builtin_lasx_xvseqi_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvslei_b : GCCBuiltin<"__builtin_lasx_xvslei_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslei_h : GCCBuiltin<"__builtin_lasx_xvslei_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslei_w : GCCBuiltin<"__builtin_lasx_xvslei_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslei_d : GCCBuiltin<"__builtin_lasx_xvslei_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvslei_bu : GCCBuiltin<"__builtin_lasx_xvslei_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslei_hu : GCCBuiltin<"__builtin_lasx_xvslei_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslei_wu : GCCBuiltin<"__builtin_lasx_xvslei_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslei_du : GCCBuiltin<"__builtin_lasx_xvslei_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvslti_b : GCCBuiltin<"__builtin_lasx_xvslti_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslti_h : GCCBuiltin<"__builtin_lasx_xvslti_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslti_w : GCCBuiltin<"__builtin_lasx_xvslti_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslti_d : GCCBuiltin<"__builtin_lasx_xvslti_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvslti_bu : GCCBuiltin<"__builtin_lasx_xvslti_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslti_hu : GCCBuiltin<"__builtin_lasx_xvslti_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslti_wu : GCCBuiltin<"__builtin_lasx_xvslti_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslti_du : GCCBuiltin<"__builtin_lasx_xvslti_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvaddi_bu : GCCBuiltin<"__builtin_lasx_xvaddi_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvaddi_hu : GCCBuiltin<"__builtin_lasx_xvaddi_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvaddi_wu : GCCBuiltin<"__builtin_lasx_xvaddi_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], ++ [Commutative, IntrNoMem]>; ++def int_loongarch_lasx_xvaddi_du : GCCBuiltin<"__builtin_lasx_xvaddi_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], ++ [Commutative, IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsubi_bu : GCCBuiltin<"__builtin_lasx_xvsubi_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubi_hu : GCCBuiltin<"__builtin_lasx_xvsubi_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubi_wu : GCCBuiltin<"__builtin_lasx_xvsubi_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsubi_du : GCCBuiltin<"__builtin_lasx_xvsubi_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmaxi_b : GCCBuiltin<"__builtin_lasx_xvmaxi_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaxi_h : GCCBuiltin<"__builtin_lasx_xvmaxi_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaxi_w : GCCBuiltin<"__builtin_lasx_xvmaxi_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaxi_d : GCCBuiltin<"__builtin_lasx_xvmaxi_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmini_b : GCCBuiltin<"__builtin_lasx_xvmini_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmini_h : GCCBuiltin<"__builtin_lasx_xvmini_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmini_w : GCCBuiltin<"__builtin_lasx_xvmini_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmini_d : GCCBuiltin<"__builtin_lasx_xvmini_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmaxi_bu : GCCBuiltin<"__builtin_lasx_xvmaxi_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaxi_hu : GCCBuiltin<"__builtin_lasx_xvmaxi_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaxi_wu : GCCBuiltin<"__builtin_lasx_xvmaxi_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmaxi_du : GCCBuiltin<"__builtin_lasx_xvmaxi_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvmini_bu : GCCBuiltin<"__builtin_lasx_xvmini_bu">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmini_hu : GCCBuiltin<"__builtin_lasx_xvmini_hu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmini_wu : GCCBuiltin<"__builtin_lasx_xvmini_wu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvmini_du : GCCBuiltin<"__builtin_lasx_xvmini_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvclz_b : GCCBuiltin<"__builtin_lasx_xvclz_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvclz_h : GCCBuiltin<"__builtin_lasx_xvclz_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvclz_w : GCCBuiltin<"__builtin_lasx_xvclz_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvclz_d : GCCBuiltin<"__builtin_lasx_xvclz_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpcnt_b : GCCBuiltin<"__builtin_lasx_xvpcnt_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpcnt_h : GCCBuiltin<"__builtin_lasx_xvpcnt_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpcnt_w : GCCBuiltin<"__builtin_lasx_xvpcnt_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpcnt_d : GCCBuiltin<"__builtin_lasx_xvpcnt_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfsqrt_s : GCCBuiltin<"__builtin_lasx_xvfsqrt_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfsqrt_d : GCCBuiltin<"__builtin_lasx_xvfsqrt_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfrint_s : GCCBuiltin<"__builtin_lasx_xvfrint_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfrint_d : GCCBuiltin<"__builtin_lasx_xvfrint_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvffint_s_w : GCCBuiltin<"__builtin_lasx_xvffint_s_w">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvffint_d_l : GCCBuiltin<"__builtin_lasx_xvffint_d_l">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvffint_s_wu : GCCBuiltin<"__builtin_lasx_xvffint_s_wu">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvffint_d_lu : GCCBuiltin<"__builtin_lasx_xvffint_d_lu">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvftintrz_wu_s : GCCBuiltin<"__builtin_lasx_xvftintrz_wu_s">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvftintrz_lu_d : GCCBuiltin<"__builtin_lasx_xvftintrz_lu_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvreplgr2vr_b : GCCBuiltin<"__builtin_lasx_xvreplgr2vr_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplgr2vr_h : GCCBuiltin<"__builtin_lasx_xvreplgr2vr_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplgr2vr_w : GCCBuiltin<"__builtin_lasx_xvreplgr2vr_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplgr2vr_d : GCCBuiltin<"__builtin_lasx_xvreplgr2vr_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvinsgr2vr_w : GCCBuiltin<"__builtin_lasx_xvinsgr2vr_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrNoMem]>; ++def int_loongarch_lasx_xvinsgr2vr_d : GCCBuiltin<"__builtin_lasx_xvinsgr2vr_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty, llvm_i32_ty], ++ [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvfdiv_s : GCCBuiltin<"__builtin_lasx_xvfdiv_s">, ++ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvfdiv_d : GCCBuiltin<"__builtin_lasx_xvfdiv_d">, ++ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvslli_b : GCCBuiltin<"__builtin_lasx_xvslli_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslli_h : GCCBuiltin<"__builtin_lasx_xvslli_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslli_w : GCCBuiltin<"__builtin_lasx_xvslli_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvslli_d : GCCBuiltin<"__builtin_lasx_xvslli_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrli_b : GCCBuiltin<"__builtin_lasx_xvsrli_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrli_h : GCCBuiltin<"__builtin_lasx_xvsrli_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrli_w : GCCBuiltin<"__builtin_lasx_xvsrli_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrli_d : GCCBuiltin<"__builtin_lasx_xvsrli_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrai_b : GCCBuiltin<"__builtin_lasx_xvsrai_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrai_h : GCCBuiltin<"__builtin_lasx_xvsrai_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrai_w : GCCBuiltin<"__builtin_lasx_xvsrai_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrai_d : GCCBuiltin<"__builtin_lasx_xvsrai_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvshuf4i_b : GCCBuiltin<"__builtin_lasx_xvshuf4i_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvshuf4i_h : GCCBuiltin<"__builtin_lasx_xvshuf4i_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvshuf4i_w : GCCBuiltin<"__builtin_lasx_xvshuf4i_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvshuf4i_d : GCCBuiltin<"__builtin_lasx_xvshuf4i_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvrotr_b : GCCBuiltin<"__builtin_lasx_xvrotr_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvrotr_h : GCCBuiltin<"__builtin_lasx_xvrotr_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvrotr_w : GCCBuiltin<"__builtin_lasx_xvrotr_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvrotr_d : GCCBuiltin<"__builtin_lasx_xvrotr_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvrotri_b : GCCBuiltin<"__builtin_lasx_xvrotri_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvrotri_h : GCCBuiltin<"__builtin_lasx_xvrotri_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvrotri_w : GCCBuiltin<"__builtin_lasx_xvrotri_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvrotri_d : GCCBuiltin<"__builtin_lasx_xvrotri_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvld : GCCBuiltin<"__builtin_lasx_xvld">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadMem, IntrArgMemOnly]>; ++ ++def int_loongarch_lasx_xvst : GCCBuiltin<"__builtin_lasx_xvst">, ++ Intrinsic<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty], ++ [IntrArgMemOnly]>; ++ ++def int_loongarch_lasx_xvrepl128vei_b : GCCBuiltin<"__builtin_lasx_xvrepl128vei_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvrepl128vei_h : GCCBuiltin<"__builtin_lasx_xvrepl128vei_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvrepl128vei_w : GCCBuiltin<"__builtin_lasx_xvrepl128vei_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvrepl128vei_d : GCCBuiltin<"__builtin_lasx_xvrepl128vei_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvinsve0_w : GCCBuiltin<"__builtin_lasx_xvinsve0_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvinsve0_d : GCCBuiltin<"__builtin_lasx_xvinsve0_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpickve_w : GCCBuiltin<"__builtin_lasx_xvpickve_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpickve_d : GCCBuiltin<"__builtin_lasx_xvpickve_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvreplve0_b : GCCBuiltin<"__builtin_lasx_xvreplve0_b">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve0_h : GCCBuiltin<"__builtin_lasx_xvreplve0_h">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve0_w : GCCBuiltin<"__builtin_lasx_xvreplve0_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve0_d : GCCBuiltin<"__builtin_lasx_xvreplve0_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvreplve0_q : GCCBuiltin<"__builtin_lasx_xvreplve0_q">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_vext2xv_d_w : GCCBuiltin<"__builtin_lasx_vext2xv_d_w">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_vext2xv_w_h : GCCBuiltin<"__builtin_lasx_vext2xv_w_h">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_vext2xv_h_b : GCCBuiltin<"__builtin_lasx_vext2xv_h_b">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_vext2xv_d_h : GCCBuiltin<"__builtin_lasx_vext2xv_d_h">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_vext2xv_w_b : GCCBuiltin<"__builtin_lasx_vext2xv_w_b">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_vext2xv_d_b : GCCBuiltin<"__builtin_lasx_vext2xv_d_b">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_vext2xv_du_wu : GCCBuiltin<"__builtin_lasx_vext2xv_du_wu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_vext2xv_wu_hu : GCCBuiltin<"__builtin_lasx_vext2xv_wu_hu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_vext2xv_hu_bu : GCCBuiltin<"__builtin_lasx_vext2xv_hu_bu">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_vext2xv_du_hu : GCCBuiltin<"__builtin_lasx_vext2xv_du_hu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_vext2xv_wu_bu : GCCBuiltin<"__builtin_lasx_vext2xv_wu_bu">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_vext2xv_du_bu : GCCBuiltin<"__builtin_lasx_vext2xv_du_bu">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvpermi_q : GCCBuiltin<"__builtin_lasx_xvpermi_q">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvpermi_d : GCCBuiltin<"__builtin_lasx_xvpermi_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvperm_w : GCCBuiltin<"__builtin_lasx_xvperm_w">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvsrlrni_b_h : GCCBuiltin<"__builtin_lasx_xvsrlrni_b_h">, ++ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlrni_h_w : GCCBuiltin<"__builtin_lasx_xvsrlrni_h_w">, ++ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlrni_w_d : GCCBuiltin<"__builtin_lasx_xvsrlrni_w_d">, ++ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvsrlrni_d_q : GCCBuiltin<"__builtin_lasx_xvsrlrni_d_q">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xbz_v : GCCBuiltin<"__builtin_lasx_xbz_v">, ++ Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xbnz_v : GCCBuiltin<"__builtin_lasx_xbnz_v">, ++ Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xbz_b : GCCBuiltin<"__builtin_lasx_xbz_b">, ++ Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xbz_h : GCCBuiltin<"__builtin_lasx_xbz_h">, ++ Intrinsic<[llvm_i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xbz_w : GCCBuiltin<"__builtin_lasx_xbz_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xbz_d : GCCBuiltin<"__builtin_lasx_xbz_d">, ++ Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xbnz_b : GCCBuiltin<"__builtin_lasx_xbnz_b">, ++ Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xbnz_h : GCCBuiltin<"__builtin_lasx_xbnz_h">, ++ Intrinsic<[llvm_i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xbnz_w : GCCBuiltin<"__builtin_lasx_xbnz_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xbnz_d : GCCBuiltin<"__builtin_lasx_xbnz_d">, ++ Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++def int_loongarch_lasx_xvextl_q_d : GCCBuiltin<"__builtin_lasx_xvextl_q_d">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++def int_loongarch_lasx_xvextl_qu_du : GCCBuiltin<"__builtin_lasx_xvextl_qu_du">, ++ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; ++ ++//===----------------------------------------------------------------------===// ++// LoongArch BASE ++ ++def int_loongarch_cpucfg : GCCBuiltin<"__builtin_loongarch_cpucfg">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; ++ ++def int_loongarch_csrrd : GCCBuiltin<"__builtin_loongarch_csrrd">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; ++ ++def int_loongarch_dcsrrd : GCCBuiltin<"__builtin_loongarch_dcsrrd">, ++ Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>; ++ ++def int_loongarch_csrwr : GCCBuiltin<"__builtin_loongarch_csrwr">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_dcsrwr : GCCBuiltin<"__builtin_loongarch_dcsrwr">, ++ Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], []>; ++ ++def int_loongarch_csrxchg : GCCBuiltin<"__builtin_loongarch_csrxchg">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_dcsrxchg : GCCBuiltin<"__builtin_loongarch_dcsrxchg">, ++ Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], []>; ++ ++def int_loongarch_iocsrrd_b : GCCBuiltin<"__builtin_loongarch_iocsrrd_b">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; ++ ++def int_loongarch_iocsrrd_h : GCCBuiltin<"__builtin_loongarch_iocsrrd_h">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; ++ ++def int_loongarch_iocsrrd_w : GCCBuiltin<"__builtin_loongarch_iocsrrd_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; ++ ++def int_loongarch_iocsrrd_d : GCCBuiltin<"__builtin_loongarch_iocsrrd_d">, ++ Intrinsic<[llvm_i64_ty], [llvm_i32_ty], []>; ++ ++def int_loongarch_iocsrwr_b : GCCBuiltin<"__builtin_loongarch_iocsrwr_b">, ++ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_iocsrwr_h : GCCBuiltin<"__builtin_loongarch_iocsrwr_h">, ++ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_iocsrwr_w : GCCBuiltin<"__builtin_loongarch_iocsrwr_w">, ++ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_iocsrwr_d : GCCBuiltin<"__builtin_loongarch_iocsrwr_d">, ++ Intrinsic<[], [llvm_i64_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_cacop : GCCBuiltin<"__builtin_loongarch_cacop">, ++ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_dcacop : GCCBuiltin<"__builtin_loongarch_dcacop">, ++ Intrinsic<[], [llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], []>; ++ ++def int_loongarch_crc_w_b_w : GCCBuiltin<"__builtin_loongarch_crc_w_b_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_crc_w_h_w : GCCBuiltin<"__builtin_loongarch_crc_w_h_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_crc_w_w_w : GCCBuiltin<"__builtin_loongarch_crc_w_w_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_crc_w_d_w : GCCBuiltin<"__builtin_loongarch_crc_w_d_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_crcc_w_b_w : GCCBuiltin<"__builtin_loongarch_crcc_w_b_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_crcc_w_h_w : GCCBuiltin<"__builtin_loongarch_crcc_w_h_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_crcc_w_w_w : GCCBuiltin<"__builtin_loongarch_crcc_w_w_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_crcc_w_d_w : GCCBuiltin<"__builtin_loongarch_crcc_w_d_w">, ++ Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>; ++ ++def int_loongarch_tlbclr : GCCBuiltin<"__builtin_loongarch_tlbclr">, ++ Intrinsic<[], [], []>; ++ ++def int_loongarch_tlbflush : GCCBuiltin<"__builtin_loongarch_tlbflush">, ++ Intrinsic<[], [], []>; ++ ++def int_loongarch_tlbfill : GCCBuiltin<"__builtin_loongarch_tlbfill">, ++ Intrinsic<[], [], []>; ++ ++def int_loongarch_tlbrd : GCCBuiltin<"__builtin_loongarch_tlbrd">, ++ Intrinsic<[], [], []>; ++ ++def int_loongarch_tlbwr : GCCBuiltin<"__builtin_loongarch_tlbwr">, ++ Intrinsic<[], [], []>; ++ ++def int_loongarch_tlbsrch : GCCBuiltin<"__builtin_loongarch_tlbsrch">, ++ Intrinsic<[], [], []>; ++ ++def int_loongarch_syscall : GCCBuiltin<"__builtin_loongarch_syscall">, ++ Intrinsic<[], [llvm_i64_ty], []>; ++ ++def int_loongarch_break : GCCBuiltin<"__builtin_loongarch_break">, ++ Intrinsic<[], [llvm_i64_ty], []>; ++ ++def int_loongarch_asrtle_d : GCCBuiltin<"__builtin_loongarch_asrtle_d">, ++ Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], []>; ++ ++def int_loongarch_asrtgt_d : GCCBuiltin<"__builtin_loongarch_asrtgt_d">, ++ Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], []>; ++ ++def int_loongarch_dbar : GCCBuiltin<"__builtin_loongarch_dbar">, ++ Intrinsic<[], [llvm_i64_ty], []>; ++ ++def int_loongarch_ibar : GCCBuiltin<"__builtin_loongarch_ibar">, ++ Intrinsic<[], [llvm_i64_ty], []>; ++ ++} +diff --git a/include/llvm/Object/ELFObjectFile.h b/include/llvm/Object/ELFObjectFile.h +index e2d2784d..72099865 100644 +--- a/include/llvm/Object/ELFObjectFile.h ++++ b/include/llvm/Object/ELFObjectFile.h +@@ -1197,6 +1197,8 @@ StringRef ELFObjectFile::getFileFormatName() const { + return "elf32-littleriscv"; + case ELF::EM_CSKY: + return "elf32-csky"; ++ case ELF::EM_LOONGARCH: ++ return "elf32-loongarch"; + case ELF::EM_SPARC: + case ELF::EM_SPARC32PLUS: + return "elf32-sparc"; +@@ -1221,6 +1223,8 @@ StringRef ELFObjectFile::getFileFormatName() const { + return "elf64-s390"; + case ELF::EM_SPARCV9: + return "elf64-sparc"; ++ case ELF::EM_LOONGARCH: ++ return "elf64-loongarch"; + case ELF::EM_MIPS: + return "elf64-mips"; + case ELF::EM_AMDGPU: +@@ -1282,6 +1286,15 @@ template Triple::ArchType ELFObjectFile::getArch() const { + default: + report_fatal_error("Invalid ELFCLASS!"); + } ++ case ELF::EM_LOONGARCH: ++ switch (EF.getHeader().e_ident[ELF::EI_CLASS]) { ++ case ELF::ELFCLASS32: ++ return Triple::loongarch32; ++ case ELF::ELFCLASS64: ++ return Triple::loongarch64; ++ default: ++ report_fatal_error("Invalid ELFCLASS!"); ++ } + case ELF::EM_S390: + return Triple::systemz; + +diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap +index d0693ccf..3fd97d07 100644 +--- a/include/llvm/module.modulemap ++++ b/include/llvm/module.modulemap +@@ -71,6 +71,7 @@ module LLVM_BinaryFormat { + textual header "BinaryFormat/ELFRelocs/Hexagon.def" + textual header "BinaryFormat/ELFRelocs/i386.def" + textual header "BinaryFormat/ELFRelocs/Lanai.def" ++ textual header "BinaryFormat/ELFRelocs/LoongArch.def" + textual header "BinaryFormat/ELFRelocs/M68k.def" + textual header "BinaryFormat/ELFRelocs/Mips.def" + textual header "BinaryFormat/ELFRelocs/MSP430.def" +diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +index ce350034..50171505 100644 +--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp ++++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +@@ -207,6 +207,14 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, + PersonalityEncoding = dwarf::DW_EH_PE_absptr; + TTypeEncoding = dwarf::DW_EH_PE_absptr; + break; ++ case Triple::loongarch32: ++ case Triple::loongarch64: ++ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; ++ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | ++ dwarf::DW_EH_PE_sdata4; ++ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | ++ dwarf::DW_EH_PE_sdata4; ++ break; + case Triple::mips: + case Triple::mipsel: + case Triple::mips64: +diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp +index b66429d8..0562a823 100644 +--- a/lib/CodeGen/XRayInstrumentation.cpp ++++ b/lib/CodeGen/XRayInstrumentation.cpp +@@ -227,6 +227,8 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { + case Triple::ArchType::thumb: + case Triple::ArchType::aarch64: + case Triple::ArchType::hexagon: ++ case Triple::ArchType::loongarch32: ++ case Triple::ArchType::loongarch64: + case Triple::ArchType::mips: + case Triple::ArchType::mipsel: + case Triple::ArchType::mips64: +diff --git a/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp +index 249f02f3..305e0b07 100644 +--- a/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp ++++ b/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp +@@ -261,6 +261,9 @@ EPCIndirectionUtils::Create(ExecutorProcessControl &EPC) { + case Triple::mips64el: + return CreateWithABI(EPC); + ++ case Triple::loongarch64: ++ return CreateWithABI(EPC); ++ + case Triple::x86_64: + if (TT.getOS() == Triple::OSType::Win32) + return CreateWithABI(EPC); +diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +index 7a71d2f7..81644255 100644 +--- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp ++++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +@@ -152,6 +152,11 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES, + return CCMgrT::Create(ES, ErrorHandlerAddress); + } + ++ case Triple::loongarch64: { ++ typedef orc::LocalJITCompileCallbackManager CCMgrT; ++ return CCMgrT::Create(ES, ErrorHandlerAddress); ++ } ++ + case Triple::x86_64: { + if (T.getOS() == Triple::OSType::Win32) { + typedef orc::LocalJITCompileCallbackManager CCMgrT; +diff --git a/lib/ExecutionEngine/Orc/LazyReexports.cpp b/lib/ExecutionEngine/Orc/LazyReexports.cpp +index 66453e6a..eac277af 100644 +--- a/lib/ExecutionEngine/Orc/LazyReexports.cpp ++++ b/lib/ExecutionEngine/Orc/LazyReexports.cpp +@@ -131,6 +131,10 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES, + case Triple::mips64el: + return LocalLazyCallThroughManager::Create(ES, ErrorHandlerAddr); + ++ case Triple::loongarch64: ++ return LocalLazyCallThroughManager::Create( ++ ES, ErrorHandlerAddr); ++ + case Triple::x86_64: + if (T.getOS() == Triple::OSType::Win32) + return LocalLazyCallThroughManager::Create( +diff --git a/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/lib/ExecutionEngine/Orc/OrcABISupport.cpp +index 18b3c5e1..440831d7 100644 +--- a/lib/ExecutionEngine/Orc/OrcABISupport.cpp ++++ b/lib/ExecutionEngine/Orc/OrcABISupport.cpp +@@ -906,5 +906,206 @@ void OrcMips64::writeIndirectStubsBlock( + Stub[8 * I + 7] = 0x00000000; // nop + } + } ++ ++void OrcLoongArch64::writeResolverCode(char *ResolverWorkingMem, ++ JITTargetAddress ResolverTargetAddress, ++ JITTargetAddress ReentryFnAddr, ++ JITTargetAddress ReentryCtxAddr) { ++ ++ const uint32_t ResolverCode[] = { ++ // resolver_entry: ++ 0x02fc8063, // 0x0: addi.d $r3,$r3,-224(0xf20) ++ 0x29c00064, // 0x4: st.d $r4,$r3,0 ++ 0x29c02065, // 0x8: st.d $r5,$r3,8(0x8) ++ 0x29c04066, // 0xc: st.d $r6,$r3,16(0x10) ++ 0x29c06067, // 0x10: st.d $r7,$r3,24(0x18) ++ 0x29c08068, // 0x14: st.d $r8,$r3,32(0x20) ++ 0x29c0a069, // 0x18: st.d $r9,$r3,40(0x28) ++ 0x29c0c06a, // 0x1c: st.d $r10,$r3,48(0x30) ++ 0x29c0e06b, // 0x20: st.d $r11,$r3,56(0x38) ++ 0x29c1006c, // 0x24: st.d $r12,$r3,64(0x40) ++ 0x29c1206d, // 0x28: st.d $r13,$r3,72(0x48) ++ 0x29c1406e, // 0x2c: st.d $r14,$r3,80(0x50) ++ 0x29c1606f, // 0x30: st.d $r15,$r3,88(0x58) ++ 0x29c18070, // 0x34: st.d $r16,$r3,96(0x60) ++ 0x29c1a071, // 0x38: st.d $r17,$r3,104(0x68) ++ 0x29c1c072, // 0x3c: st.d $r18,$r3,112(0x70) ++ 0x29c1e073, // 0x40: st.d $r19,$r3,120(0x78) ++ 0x29c20074, // 0x44: st.d $r20,$r3,128(0x80) ++ 0x29c22076, // 0x48: st.d $r22,$r3,136(0x88) ++ 0x29c24077, // 0x4c: st.d $r23,$r3,144(0x90) ++ 0x29c26078, // 0x50: st.d $r24,$r3,152(0x98) ++ 0x29c28079, // 0x54: st.d $r25,$r3,160(0xa0) ++ 0x29c2a07a, // 0x58: st.d $r26,$r3,168(0xa8) ++ 0x29c2c07b, // 0x5c: st.d $r27,$r3,176(0xb0) ++ 0x29c2e07c, // 0x60: st.d $r28,$r3,184(0xb8) ++ 0x29c3007d, // 0x64: st.d $r29,$r3,192(0xc0) ++ 0x29c3207e, // 0x68: st.d $r30,$r3,200(0xc8) ++ 0x29c3407f, // 0x6c: st.d $r31,$r3,208(0xd0) ++ 0x29c36061, // 0x70: st.d $r1,$r3,216(0xd8) ++ // JIT re-entry ctx addr. ++ 0x00000000, // 0x74: lu12i.w $a0,hi(ctx) ++ 0x00000000, // 0x78: ori $a0,$a0,lo(ctx) ++ 0x00000000, // 0x7c: lu32i.d $a0,higher(ctx) ++ 0x00000000, // 0x80: lu52i.d $a0,$a0,highest(ctx) ++ ++ 0x00150025, // 0x84: move $r5,$r1 ++ 0x02ffa0a5, // 0x88: addi.d $r5,$r5,-24(0xfe8) ++ ++ // JIT re-entry fn addr: ++ 0x00000000, // 0x8c: lu12i.w $t0,hi(reentry) ++ 0x00000000, // 0x90: ori $t0,$t0,lo(reentry) ++ 0x00000000, // 0x94: lu32i.d $t0,higher(reentry) ++ 0x00000000, // 0x98: lu52i.d $t0,$t0,highest(reentry) ++ 0x4c0002a1, // 0x9c: jirl $r1,$r21,0 ++ 0x00150095, // 0xa0: move $r21,$r4 ++ 0x28c36061, // 0xa4: ld.d $r1,$r3,216(0xd8) ++ 0x28c3407f, // 0xa8: ld.d $r31,$r3,208(0xd0) ++ 0x28c3207e, // 0xac: ld.d $r30,$r3,200(0xc8) ++ 0x28c3007d, // 0xb0: ld.d $r29,$r3,192(0xc0) ++ 0x28c2e07c, // 0xb4: ld.d $r28,$r3,184(0xb8) ++ 0x28c2c07b, // 0xb8: ld.d $r27,$r3,176(0xb0) ++ 0x28c2a07a, // 0xbc: ld.d $r26,$r3,168(0xa8) ++ 0x28c28079, // 0xc0: ld.d $r25,$r3,160(0xa0) ++ 0x28c26078, // 0xc4: ld.d $r24,$r3,152(0x98) ++ 0x28c24077, // 0xc8: ld.d $r23,$r3,144(0x90) ++ 0x28c22076, // 0xcc: ld.d $r22,$r3,136(0x88) ++ 0x28c20074, // 0xd0: ld.d $r20,$r3,128(0x80) ++ 0x28c1e073, // 0xd4: ld.d $r19,$r3,120(0x78) ++ 0x28c1c072, // 0xd8: ld.d $r18,$r3,112(0x70) ++ 0x28c1a071, // 0xdc: ld.d $r17,$r3,104(0x68) ++ 0x28c18070, // 0xe0: ld.d $r16,$r3,96(0x60) ++ 0x28c1606f, // 0xe4: ld.d $r15,$r3,88(0x58) ++ 0x28c1406e, // 0xe8: ld.d $r14,$r3,80(0x50) ++ 0x28c1206d, // 0xec: ld.d $r13,$r3,72(0x48) ++ 0x28c1006c, // 0xf0: ld.d $r12,$r3,64(0x40) ++ 0x28c0e06b, // 0xf4: ld.d $r11,$r3,56(0x38) ++ 0x28c0c06a, // 0xf8: ld.d $r10,$r3,48(0x30) ++ 0x28c0a069, // 0xfc: ld.d $r9,$r3,40(0x28) ++ 0x28c08068, // 0x100: ld.d $r8,$r3,32(0x20) ++ 0x28c06067, // 0x104: ld.d $r7,$r3,24(0x18) ++ 0x28c04066, // 0x108: ld.d $r6,$r3,16(0x10) ++ 0x28c02065, // 0x10c: ld.d $r5,$r3,8(0x8) ++ 0x28c00064, // 0x110: ld.d $r4,$r3,0 ++ 0x02c38063, // 0x114: addi.d $r3,$r3,224(0xe0) ++ 0x00150281, // 0x118: move $r1,$r20 ++ 0x4c0002a0, // 0x11c: jirl $r0,$r21,0 ++ }; ++ ++ const unsigned ReentryFnAddrOffset = 0x8c; // JIT re-entry fn addr lu12i.w ++ const unsigned ReentryCtxAddrOffset = 0x74; // JIT re-entry ctx addr lu12i.w ++ ++ memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); ++ ++ uint32_t ReentryCtxLU12i = 0x14000004 | ((ReentryCtxAddr << 32 >> 44) << 5); ++ uint32_t ReentryCtxORi = 0x03800084 | ((ReentryCtxAddr & 0xFFF) << 10); ++ uint32_t ReentryCtxLU32i = 0x16000004 | ((ReentryCtxAddr << 12 >> 44) << 5); ++ uint32_t ReentryCtxLU52i = 0x03000084 | ((ReentryCtxAddr >> 52) << 10); ++ ++ memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLU12i, ++ sizeof(ReentryCtxLU12i)); ++ memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 4), &ReentryCtxORi, ++ sizeof(ReentryCtxORi)); ++ memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 8), &ReentryCtxLU32i, ++ sizeof(ReentryCtxLU32i)); ++ memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 12), &ReentryCtxLU52i, ++ sizeof(ReentryCtxLU52i)); ++ ++ uint32_t ReentryLU12i = 0x14000015 | ((ReentryFnAddr << 32 >> 44) << 5); ++ uint32_t ReentryORi = 0x038002b5 | ((ReentryFnAddr & 0xFFF) << 10); ++ uint32_t ReentryLU32i = 0x16000015 | ((ReentryFnAddr << 12 >> 44) << 5); ++ uint32_t ReentryLU52i = 0x030002b5 | ((ReentryFnAddr >> 52) << 10); ++ ++ memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryLU12i, ++ sizeof(ReentryLU12i)); ++ memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 4), &ReentryORi, ++ sizeof(ReentryORi)); ++ memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 8), &ReentryLU32i, ++ sizeof(ReentryLU32i)); ++ memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 12), &ReentryLU52i, ++ sizeof(ReentryLU52i)); ++} ++ ++void OrcLoongArch64::writeTrampolines( ++ char *TrampolineBlockWorkingMem, ++ JITTargetAddress TrampolineBlockTargetAddress, ++ JITTargetAddress ResolverFnAddr, unsigned NumTrampolines) { ++ ++ uint32_t *Trampolines = ++ reinterpret_cast(TrampolineBlockWorkingMem); ++ ++ uint64_t HiBits = ((ResolverFnAddr << 32 >> 44) << 5); ++ uint64_t LoBits = ((ResolverFnAddr & 0xFFF) << 10); ++ uint64_t HigherBits = ((ResolverFnAddr << 12 >> 44) << 5); ++ uint64_t HighestBits = ((ResolverFnAddr >> 52) << 10); ++ ++ for (unsigned I = 0; I < NumTrampolines; ++I) { ++ Trampolines[10 * I + 0] = 0x00150034; // move $t8,$ra ++ Trampolines[10 * I + 1] = ++ 0x14000015 | HiBits; // lu12i.w $r21,hi(ResolveAddr) ++ Trampolines[10 * I + 2] = ++ 0x038002b5 | LoBits; // ori $r21,$r21,lo(ResolveAddr) ++ Trampolines[10 * I + 3] = ++ 0x16000015 | HigherBits; // lu32i $r21,higher(ResolveAddr) ++ Trampolines[10 * I + 4] = ++ 0x030002b5 | HighestBits; // lu52i $r21,$r21,highest(ResolveAddr) ++ Trampolines[10 * I + 5] = 0x4c0002a1; // jirl $ra, $r21, 0 ++ } ++} ++ ++void OrcLoongArch64::writeIndirectStubsBlock( ++ char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, ++ JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { ++ // Stub format is: ++ // ++ // .section __orc_stubs ++ // stub1: ++ // lu12i.w $r21, %abs(ptr1)<<32>>44 ++ // ori $r21, $r21, %abs(ptr1)&0xfff ++ // lu32i.d $r21, %abs(ptr1)<<12>>44 ++ // lu52i.d $r21, $r21, %abs(ptr1)>>52 ++ // ld.d $r21, $r21, 0 ++ // jirl $r0, $r21, 0 ++ // stub2: ++ // lu12i.w $r21, %abs(ptr2)<<32>>44 ++ // ori $r21, $r21, %abs(ptr2)&0xfff ++ // lu32i.d $r21, %abs(ptr2)<<12>>44 ++ // lu52i.d $r21, $r21, %abs(ptr2)>>52 ++ // ld.d $r21, $r21, 0 ++ // jirl $r0, $r21, 0 ++ // ++ // ... ++ // ++ // .section __orc_ptrs ++ // ptr1: ++ // .dword 0x0 ++ // ptr2: ++ // .dword 0x0 ++ // ++ // ... ++ ++ assert(stubAndPointerRangesOk( ++ StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && ++ "PointersBlock is out of range"); ++ ++ // Populate the stubs page stubs and mark it executable. ++ uint32_t *Stub = reinterpret_cast(StubsBlockWorkingMem); ++ uint64_t PtrAddr = PointersBlockTargetAddress; ++ ++ for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 8) { ++ uint64_t HiBits = ((PtrAddr << 32 >> 44) << 5); ++ uint64_t LoBits = ((PtrAddr & 0xFFF) << 10); ++ uint64_t HigherBits = ((PtrAddr << 12 >> 44) << 5); ++ uint64_t HighestBits = ((PtrAddr >> 52) << 10); ++ Stub[8 * I + 0] = 0x14000015 | HiBits; // lu12i.w $r21, hi(PtrAddr) ++ Stub[8 * I + 1] = 0x038002b5 | LoBits; // ori $r21, $r21, lo(PtrAddr) ++ Stub[8 * I + 2] = 0x16000015 | HigherBits; // lu32i.d $r21, higher(PtrAddr) ++ Stub[8 * I + 3] = ++ 0x030002b5 | HighestBits; // lu52i.d $r21, $r21, highest(PtrAddr) ++ Stub[8 * I + 4] = 0x28c002b5; // ld.d $r21, $r21, 0 ++ Stub[8 * I + 5] = 0x4c0002a0; // jirl $r0, $r21, 0 ++ } ++} ++ + } // End namespace orc. + } // End namespace llvm. +diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +index f92618af..b41b2233 100644 +--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp ++++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +@@ -634,6 +634,191 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section, + } + } + ++void RuntimeDyldELF::resolveLoongArch64Relocation(const SectionEntry &Section, ++ uint64_t Offset, ++ uint64_t Value, uint32_t Type, ++ int64_t Addend) { ++ uint32_t *TargetPtr = ++ reinterpret_cast(Section.getAddressWithOffset(Offset)); ++ uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); ++ uint64_t tmp1, tmp2, tmp3; ++ ++ LLVM_DEBUG(dbgs() << "[XXX] resolveLoongArch64Relocation, LocalAddress: 0x" ++ << format("%llx", Section.getAddressWithOffset(Offset)) ++ << " FinalAddress: 0x" << format("%llx", FinalAddress) ++ << " Value: 0x" << format("%llx", Value) << " Type: 0x" ++ << format("%x", Type) << " Addend: 0x" ++ << format("%llx", Addend) << "\n"); ++ ++ switch (Type) { ++ case ELF::R_LARCH_SOP_PUSH_GPREL: ++ case ELF::R_LARCH_SOP_PUSH_TLS_TPREL: ++ case ELF::R_LARCH_SOP_PUSH_TLS_GOT: ++ case ELF::R_LARCH_SOP_PUSH_TLS_GD: ++ default: ++ llvm_unreachable("Relocation type not implemented yet!"); ++ break; ++ case ELF::R_LARCH_MARK_LA: ++ // mark la ++ MarkLA = true; ++ break; ++ case ELF::R_LARCH_SOP_PUSH_ABSOLUTE: ++ if (MarkLA && !Addend) ++ // push(value) ++ ValuesStack.push_back(Value); ++ else ++ // push(addend) ++ ValuesStack.push_back(Addend); ++ break; ++ case ELF::R_LARCH_SOP_PUSH_PLT_PCREL: ++ case ELF::R_LARCH_SOP_PUSH_PCREL: ++ MarkLA = false; ++ // push(value -pc + addend) ++ ValuesStack.push_back(Value - FinalAddress + Addend); ++ break; ++ case ELF::R_LARCH_SOP_NOT: ++ // pop(tmp1) ++ // push(!tmp1) ++ tmp1 = ValuesStack.pop_back_val(); ++ ValuesStack.push_back(!tmp1); ++ break; ++ case ELF::R_LARCH_SOP_AND: ++ // pop(tmp2) ++ // pop(tmp1) ++ // push(tmp1 & tmp2) ++ tmp2 = ValuesStack.pop_back_val(); ++ tmp1 = ValuesStack.pop_back_val(); ++ ValuesStack.push_back(tmp1 & tmp2); ++ break; ++ case ELF::R_LARCH_SOP_IF_ELSE: ++ // pop(tmp3) ++ // pop(tmp2) ++ // pop(tmp1) ++ // push(tmp1 ? tmp2 : tmp3) ++ tmp3 = ValuesStack.pop_back_val(); ++ tmp2 = ValuesStack.pop_back_val(); ++ tmp1 = ValuesStack.pop_back_val(); ++ ValuesStack.push_back(tmp1 ? tmp2 : tmp3); ++ break; ++ case ELF::R_LARCH_SOP_ADD: ++ // pop(tmp2) ++ // pop(tmp1) ++ // push(tmp1 + tmp2) ++ tmp2 = ValuesStack.pop_back_val(); ++ tmp1 = ValuesStack.pop_back_val(); ++ ValuesStack.push_back(tmp1 + tmp2); ++ break; ++ case ELF::R_LARCH_SOP_SUB: ++ // pop(tmp2) ++ // pop(tmp1) ++ // push(tmp1 - tmp2) ++ tmp2 = ValuesStack.pop_back_val(); ++ tmp1 = ValuesStack.pop_back_val(); ++ ValuesStack.push_back(tmp1 - tmp2); ++ break; ++ case ELF::R_LARCH_SOP_SR: ++ // pop(tmp2) ++ // pop(tmp1) ++ // push(tmp1 >> tmp2) ++ tmp2 = ValuesStack.pop_back_val(); ++ tmp1 = ValuesStack.pop_back_val(); ++ ValuesStack.push_back(tmp1 >> tmp2); ++ break; ++ case ELF::R_LARCH_SOP_SL: ++ // pop(tmp2) ++ // pop(tmp1) ++ // push(tmp1 << tmp2) ++ tmp2 = ValuesStack.pop_back_val(); ++ tmp1 = ValuesStack.pop_back_val(); ++ ValuesStack.push_back(tmp1 << tmp2); ++ break; ++ case ELF::R_LARCH_32: ++ support::ulittle32_t::ref{TargetPtr} = ++ static_cast(Value + Addend); ++ break; ++ case ELF::R_LARCH_64: ++ support::ulittle64_t::ref{TargetPtr} = Value + Addend; ++ break; ++ case ELF::R_LARCH_SOP_POP_32_U_10_12: ++ case ELF::R_LARCH_SOP_POP_32_S_10_12: ++ // pop(tmp1) ++ // get(inst) ++ // inst=(inst & 0xffc003ff)|((tmp1 & 0xfff) << 10) ++ // write(inst) ++ tmp1 = ValuesStack.pop_back_val(); ++ support::ulittle32_t::ref{TargetPtr} = ++ (support::ulittle32_t::ref{TargetPtr} & 0xffc003ff) | ++ static_cast((tmp1 & 0xfff) << 10); ++ break; ++ case ELF::R_LARCH_SOP_POP_32_S_5_20: ++ // pop(tmp1) ++ // get(inst) ++ // inst=(inst & 0xfe00001f)|((tmp1 & 0xfffff) << 5) ++ // write(inst) ++ tmp1 = ValuesStack.pop_back_val(); ++ support::ulittle32_t::ref{TargetPtr} = ++ (support::ulittle32_t::ref{TargetPtr} & 0xfe00001f) | ++ static_cast((tmp1 & 0xfffff) << 5); ++ break; ++ case ELF::R_LARCH_SOP_POP_32_S_10_16_S2: ++ // pop(tmp1) ++ // tmp1 >>=2 ++ // get(inst) ++ // inst=(inst & 0xfc0003ff)|((tmp1 & 0xffff) << 10) ++ // write(inst) ++ tmp1 = ValuesStack.pop_back_val(); ++ tmp1 >>= 2; ++ support::ulittle32_t::ref{TargetPtr} = ++ (support::ulittle32_t::ref{TargetPtr} & 0xfc0003ff) | ++ static_cast((tmp1 & 0xffff) << 10); ++ break; ++ case ELF::R_LARCH_SOP_POP_32_S_0_5_10_16_S2: ++ // pop(tmp1) ++ // tmp1 >>= 2 ++ // get(inst) ++ // inst=(inst & 0xfc0003e0)|((tmp1 & 0xffff) << 10)|((tmp1 & 0x1f0000) >> ++ // 16) write(inst) ++ tmp1 = ValuesStack.pop_back_val(); ++ tmp1 >>= 2; ++ support::ulittle32_t::ref{TargetPtr} = ++ (support::ulittle32_t::ref{TargetPtr} & 0xfc0003e0) | ++ static_cast((tmp1 & 0xffff) << 10) | ++ static_cast((tmp1 & 0x1f0000) >> 16); ++ break; ++ case ELF::R_LARCH_SOP_POP_32_S_0_10_10_16_S2: ++ // pop(tmp1) ++ // tmp1 >>= 2 ++ // get(inst) ++ // inst=(inst & 0xfc000000)|((tmp1 & 0xffff) << 10)|((tmp1 & 0x3ff0000) >> ++ // 16) write(inst) ++ tmp1 = ValuesStack.pop_back_val(); ++ tmp1 >>= 2; ++ support::ulittle32_t::ref{TargetPtr} = ++ (support::ulittle32_t::ref{TargetPtr} & 0xfc000000) | ++ static_cast((tmp1 & 0xffff) << 10) | ++ static_cast((tmp1 & 0x3ff0000) >> 16); ++ break; ++ case ELF::R_LARCH_ADD32: ++ support::ulittle32_t::ref{TargetPtr} = ++ (support::ulittle32_t::ref{TargetPtr} + ++ static_cast(Value + Addend)); ++ break; ++ case ELF::R_LARCH_SUB32: ++ support::ulittle32_t::ref{TargetPtr} = ++ (support::ulittle32_t::ref{TargetPtr} - ++ static_cast(Value + Addend)); ++ break; ++ case ELF::R_LARCH_ADD64: ++ support::ulittle64_t::ref{TargetPtr} = ++ (support::ulittle64_t::ref{TargetPtr} + Value + Addend); ++ break; ++ case ELF::R_LARCH_SUB64: ++ support::ulittle64_t::ref{TargetPtr} = ++ (support::ulittle64_t::ref{TargetPtr} - Value - Addend); ++ break; ++ } ++} ++ + void RuntimeDyldELF::setMipsABI(const ObjectFile &Obj) { + if (Arch == Triple::UnknownArch || + !StringRef(Triple::getArchTypePrefix(Arch)).equals("mips")) { +@@ -1050,6 +1235,9 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, + resolveARMRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL), Type, + (uint32_t)(Addend & 0xffffffffL)); + break; ++ case Triple::loongarch64: ++ resolveLoongArch64Relocation(Section, Offset, Value, Type, Addend); ++ break; + case Triple::ppc: // Fall through. + case Triple::ppcle: + resolvePPC32Relocation(Section, Offset, Value, Type, Addend); +@@ -1362,6 +1550,25 @@ RuntimeDyldELF::processRelocationRef( + } + processSimpleRelocation(SectionID, Offset, RelType, Value); + } ++ } else if (Arch == Triple::loongarch64) { ++ RTDyldSymbolTable::const_iterator Loc = GlobalSymbolTable.find(TargetName); ++ if (!TargetName.empty()) { ++ if (Loc == GlobalSymbolTable.end()) { ++ IsSaved = true; ++ SavedSymbol = TargetName; ++ } else { ++ IsSaved = false; ++ } ++ } ++ if (IsSaved == true) { ++ Value.SymbolName = SavedSymbol.data(); ++ processSimpleRelocation(SectionID, Offset, RelType, Value); ++ } else { ++ uint8_t *TargetAddr = getSymbolLocalAddress(TargetName); ++ resolveRelocation(Sections[SectionID], Offset, ++ reinterpret_cast(TargetAddr), RelType, ++ Addend); ++ } + } else if (IsMipsO32ABI) { + uint8_t *Placeholder = reinterpret_cast( + computePlaceholderAddress(SectionID, Offset)); +@@ -2211,6 +2418,7 @@ size_t RuntimeDyldELF::getGOTEntrySize() { + case Triple::x86_64: + case Triple::aarch64: + case Triple::aarch64_be: ++ case Triple::loongarch64: + case Triple::ppc64: + case Triple::ppc64le: + case Triple::systemz: +diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +index 1251036f..ba898f65 100644 +--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h ++++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +@@ -48,6 +48,10 @@ class RuntimeDyldELF : public RuntimeDyldImpl { + void resolveARMRelocation(const SectionEntry &Section, uint64_t Offset, + uint32_t Value, uint32_t Type, int32_t Addend); + ++ void resolveLoongArch64Relocation(const SectionEntry &Section, ++ uint64_t Offset, uint64_t Value, ++ uint32_t Type, int64_t Addend); ++ + void resolvePPC32Relocation(const SectionEntry &Section, uint64_t Offset, + uint64_t Value, uint32_t Type, int64_t Addend); + +@@ -155,6 +159,12 @@ private: + // EH frame sections with the memory manager. + SmallVector UnregisteredEHFrameSections; + ++ // For loongarch evaluteRelocation ++ SmallVector ValuesStack; ++ bool IsSaved; ++ bool MarkLA; ++ StringRef SavedSymbol; ++ + // Map between GOT relocation value and corresponding GOT offset + std::map GOTOffsetMap; + +diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp +index 726ba80d..26819818 100644 +--- a/lib/IR/Function.cpp ++++ b/lib/IR/Function.cpp +@@ -37,6 +37,7 @@ + #include "llvm/IR/IntrinsicsARM.h" + #include "llvm/IR/IntrinsicsBPF.h" + #include "llvm/IR/IntrinsicsHexagon.h" ++#include "llvm/IR/IntrinsicsLoongArch.h" + #include "llvm/IR/IntrinsicsMips.h" + #include "llvm/IR/IntrinsicsNVPTX.h" + #include "llvm/IR/IntrinsicsPowerPC.h" +diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp +index 56a42621..50a7c68e 100644 +--- a/lib/Object/ELF.cpp ++++ b/lib/Object/ELF.cpp +@@ -94,6 +94,13 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, + break; + } + break; ++ case ELF::EM_LOONGARCH: ++ switch (Type) { ++#include "llvm/BinaryFormat/ELFRelocs/LoongArch.def" ++ default: ++ break; ++ } ++ break; + case ELF::EM_PPC: + switch (Type) { + #include "llvm/BinaryFormat/ELFRelocs/PowerPC.def" +diff --git a/lib/Object/RelocationResolver.cpp b/lib/Object/RelocationResolver.cpp +index 00a45e2c..23fd0f57 100644 +--- a/lib/Object/RelocationResolver.cpp ++++ b/lib/Object/RelocationResolver.cpp +@@ -468,6 +468,28 @@ static uint64_t resolveRISCV(uint64_t Type, uint64_t Offset, uint64_t S, + } + } + ++static bool supportsLoongArch(uint64_t Type) { ++ switch (Type) { ++ case ELF::R_LARCH_32: ++ case ELF::R_LARCH_64: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++static uint64_t resolveLoongArch(uint64_t Type, uint64_t Offset, uint64_t S, ++ uint64_t LocData, int64_t Addend) { ++ switch (Type) { ++ case ELF::R_LARCH_32: ++ return (S + Addend) & 0xFFFFFFFF; ++ case ELF::R_LARCH_64: ++ return S + Addend; ++ default: ++ llvm_unreachable("Invalid relocation type"); ++ } ++} ++ + static bool supportsCOFFX86(uint64_t Type) { + switch (Type) { + case COFF::IMAGE_REL_I386_SECREL: +@@ -682,6 +704,8 @@ getRelocationResolver(const ObjectFile &Obj) { + return {supportsAmdgpu, resolveAmdgpu}; + case Triple::riscv64: + return {supportsRISCV, resolveRISCV}; ++ case Triple::loongarch64: ++ return {supportsLoongArch, resolveLoongArch}; + default: + return {nullptr, nullptr}; + } +@@ -715,6 +739,8 @@ getRelocationResolver(const ObjectFile &Obj) { + return {supportsHexagon, resolveHexagon}; + case Triple::riscv32: + return {supportsRISCV, resolveRISCV}; ++ case Triple::loongarch32: ++ return {supportsLoongArch, resolveLoongArch}; + default: + return {nullptr, nullptr}; + } +diff --git a/lib/ObjectYAML/ELFYAML.cpp b/lib/ObjectYAML/ELFYAML.cpp +index 3d4cd408..abe8c250 100644 +--- a/lib/ObjectYAML/ELFYAML.cpp ++++ b/lib/ObjectYAML/ELFYAML.cpp +@@ -344,6 +344,7 @@ void ScalarEnumerationTraits::enumeration( + ECase(EM_BPF); + ECase(EM_VE); + ECase(EM_CSKY); ++ ECase(EM_LOONGARCH); + #undef ECase + IO.enumFallback(Value); + } +@@ -599,6 +600,14 @@ void ScalarBitSetTraits::bitset(IO &IO, + break; + } + break; ++ case ELF::EM_LOONGARCH: ++ BCaseMask(EF_LARCH_BASE_ABI_ILP32S, EF_LARCH_BASE_ABI); ++ BCaseMask(EF_LARCH_BASE_ABI_ILP32F, EF_LARCH_BASE_ABI); ++ BCaseMask(EF_LARCH_BASE_ABI_ILP32D, EF_LARCH_BASE_ABI); ++ BCaseMask(EF_LARCH_BASE_ABI_LP64S, EF_LARCH_BASE_ABI); ++ BCaseMask(EF_LARCH_BASE_ABI_LP64F, EF_LARCH_BASE_ABI); ++ BCaseMask(EF_LARCH_BASE_ABI_LP64D, EF_LARCH_BASE_ABI); ++ break; + default: + break; + } +@@ -847,6 +856,8 @@ void ScalarEnumerationTraits::enumeration( + break; + case ELF::EM_68K: + #include "llvm/BinaryFormat/ELFRelocs/M68k.def" ++ case ELF::EM_LOONGARCH: ++#include "llvm/BinaryFormat/ELFRelocs/LoongArch.def" + break; + default: + // Nothing to do. +diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp +index a82a4d45..35e74b80 100644 +--- a/lib/Support/Host.cpp ++++ b/lib/Support/Host.cpp +@@ -1282,6 +1282,45 @@ StringRef sys::getHostCPUName() { + StringRef Content = P ? P->getBuffer() : ""; + return detail::getHostCPUNameForS390x(Content); + } ++#elif defined(__linux__) && defined(__loongarch__) ++// loongarch prid register ++// +----------------+----------------+----------------+----------------+ ++// | Company Options| Company ID | Processor ID | Revision | ++// +----------------+----------------+----------------+----------------+ ++// 31 24 23 16 15 8 7 0 ++ ++#define PRID_OPT_MASK 0xff000000 ++#define PRID_COMP_MASK 0xff0000 ++#define PRID_COMP_LOONGSON 0x140000 ++#define PRID_IMP_MASK 0xff00 ++ ++#define PRID_IMP_LOONGSON_32 0x4200 /* Loongson 32bit */ ++#define PRID_IMP_LOONGSON_64R 0x6100 /* Reduced Loongson 64bit */ ++#define PRID_IMP_LOONGSON_64C 0x6300 /* Classic Loongson 64bit */ ++#define PRID_IMP_LOONGSON_64G 0xc000 /* Generic Loongson 64bit */ ++ ++StringRef sys::getHostCPUName() { ++ // use prid to detect cpu name ++ unsigned CPUCFG_NUM = 0; // prid ++ unsigned prid; ++ ++ __asm__("cpucfg %[prid], %[CPUCFG_NUM]\n\t" ++ :[prid]"=r"(prid) ++ :[CPUCFG_NUM]"r"(CPUCFG_NUM)); ++ ++ if ((prid & PRID_COMP_MASK) == PRID_COMP_LOONGSON) {// for Loongson ++ switch (prid & PRID_IMP_MASK) { ++ case PRID_IMP_LOONGSON_32: // not support ++ return "generic-la32"; ++ case PRID_IMP_LOONGSON_64R: ++ case PRID_IMP_LOONGSON_64C: ++ case PRID_IMP_LOONGSON_64G: ++ return "la464"; ++ } ++ } ++ ++ return "generic"; ++} + #elif defined(__MVS__) + StringRef sys::getHostCPUName() { + // Get pointer to Communications Vector Table (CVT). +@@ -1759,6 +1798,36 @@ bool sys::getHostCPUFeatures(StringMap &Features) { + + return true; + } ++#elif defined(__linux__) && defined(__loongarch__) ++bool sys::getHostCPUFeatures(StringMap &Features) { ++ std::unique_ptr P = getProcCpuinfoContent(); ++ if (!P) ++ return false; ++ ++ SmallVector Lines; ++ P->getBuffer().split(Lines, "\n"); ++ ++ SmallVector CPUFeatures; ++ ++ // Look for the CPU features. ++ for (unsigned I = 0, E = Lines.size(); I != E; ++I) ++ if (Lines[I].startswith("features")) { ++ Lines[I].split(CPUFeatures, ' '); ++ break; ++ } ++ ++ for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { ++ StringRef LLVMFeatureStr = StringSwitch(CPUFeatures[I]) ++ .Case("lsx", "lsx") ++ .Case("lasx", "lasx") ++ .Default(""); ++ ++ if (LLVMFeatureStr != "") ++ Features[LLVMFeatureStr] = true; ++ } ++ ++ return true; ++} + #else + bool sys::getHostCPUFeatures(StringMap &Features) { return false; } + #endif +diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp +index a9afcc9d..d27e8821 100644 +--- a/lib/Support/Triple.cpp ++++ b/lib/Support/Triple.cpp +@@ -44,6 +44,8 @@ StringRef Triple::getArchTypeName(ArchType Kind) { + case lanai: return "lanai"; + case le32: return "le32"; + case le64: return "le64"; ++ case loongarch32: return "loongarch32"; ++ case loongarch64: return "loongarch64"; + case m68k: return "m68k"; + case mips64: return "mips64"; + case mips64el: return "mips64el"; +@@ -164,6 +166,9 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) { + + case ve: return "ve"; + case csky: return "csky"; ++ ++ case loongarch32: ++ case loongarch64: return "loongarch"; + } + } + +@@ -250,6 +255,7 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) { + case GNUEABIHF: return "gnueabihf"; + case GNUX32: return "gnux32"; + case GNUILP32: return "gnu_ilp32"; ++ case GNUABILPX32: return "gnuabilpx32"; + case Itanium: return "itanium"; + case MSVC: return "msvc"; + case MacABI: return "macabi"; +@@ -340,6 +346,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { + .Case("renderscript64", renderscript64) + .Case("ve", ve) + .Case("csky", csky) ++ .Case("loongarch32", loongarch32) ++ .Case("loongarch64", loongarch64) + .Default(UnknownArch); + } + +@@ -475,6 +483,8 @@ static Triple::ArchType parseArch(StringRef ArchName) { + .Case("wasm32", Triple::wasm32) + .Case("wasm64", Triple::wasm64) + .Case("csky", Triple::csky) ++ .Case("loongarch32", Triple::loongarch32) ++ .Case("loongarch64", Triple::loongarch64) + .Default(Triple::UnknownArch); + + // Some architectures require special parsing logic just to compute the +@@ -731,6 +741,8 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { + case Triple::lanai: + case Triple::le32: + case Triple::le64: ++ case Triple::loongarch32: ++ case Triple::loongarch64: + case Triple::m68k: + case Triple::mips64: + case Triple::mips64el: +@@ -813,6 +825,7 @@ Triple::Triple(const Twine &Str) + .StartsWith("mipsisa64", Triple::GNUABI64) + .StartsWith("mipsisa32", Triple::GNU) + .Cases("mips", "mipsel", "mipsr6", "mipsr6el", Triple::GNU) ++ .Cases("loongarch32", "loongarch64", Triple::GNU) + .Default(UnknownEnvironment); + } + } +@@ -1290,6 +1303,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { + case llvm::Triple::kalimba: + case llvm::Triple::lanai: + case llvm::Triple::le32: ++ case llvm::Triple::loongarch32: + case llvm::Triple::m68k: + case llvm::Triple::mips: + case llvm::Triple::mipsel: +@@ -1321,6 +1335,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { + case llvm::Triple::bpfel: + case llvm::Triple::hsail64: + case llvm::Triple::le64: ++ case llvm::Triple::loongarch64: + case llvm::Triple::mips64: + case llvm::Triple::mips64el: + case llvm::Triple::nvptx64: +@@ -1377,6 +1392,7 @@ Triple Triple::get32BitArchVariant() const { + case Triple::kalimba: + case Triple::lanai: + case Triple::le32: ++ case Triple::loongarch32: + case Triple::m68k: + case Triple::mips: + case Triple::mipsel: +@@ -1412,6 +1428,7 @@ Triple Triple::get32BitArchVariant() const { + case Triple::mips64el: + T.setArch(Triple::mipsel, getSubArch()); + break; ++ case Triple::loongarch64: T.setArch(Triple::loongarch32); break; + case Triple::nvptx64: T.setArch(Triple::nvptx); break; + case Triple::ppc64: T.setArch(Triple::ppc); break; + case Triple::ppc64le: T.setArch(Triple::ppcle); break; +@@ -1455,6 +1472,7 @@ Triple Triple::get64BitArchVariant() const { + case Triple::bpfel: + case Triple::hsail64: + case Triple::le64: ++ case Triple::loongarch64: + case Triple::mips64: + case Triple::mips64el: + case Triple::nvptx64: +@@ -1484,6 +1502,7 @@ Triple Triple::get64BitArchVariant() const { + case Triple::mipsel: + T.setArch(Triple::mips64el, getSubArch()); + break; ++ case Triple::loongarch32: T.setArch(Triple::loongarch64); break; + case Triple::nvptx: T.setArch(Triple::nvptx64); break; + case Triple::ppc: T.setArch(Triple::ppc64); break; + case Triple::ppcle: T.setArch(Triple::ppc64le); break; +@@ -1517,6 +1536,8 @@ Triple Triple::getBigEndianArchVariant() const { + case Triple::kalimba: + case Triple::le32: + case Triple::le64: ++ case Triple::loongarch32: ++ case Triple::loongarch64: + case Triple::msp430: + case Triple::nvptx64: + case Triple::nvptx: +@@ -1617,6 +1638,8 @@ bool Triple::isLittleEndian() const { + case Triple::kalimba: + case Triple::le32: + case Triple::le64: ++ case Triple::loongarch32: ++ case Triple::loongarch64: + case Triple::mips64el: + case Triple::mipsel: + case Triple::msp430: +diff --git a/lib/Target/LoongArch/AsmParser/CMakeLists.txt b/lib/Target/LoongArch/AsmParser/CMakeLists.txt +new file mode 100644 +index 00000000..cb8b768d +--- /dev/null ++++ b/lib/Target/LoongArch/AsmParser/CMakeLists.txt +@@ -0,0 +1,13 @@ ++add_llvm_component_library(LLVMLoongArchAsmParser ++ LoongArchAsmParser.cpp ++ ++ LINK_COMPONENTS ++ MC ++ MCParser ++ LoongArchDesc ++ LoongArchInfo ++ Support ++ ++ ADD_TO_COMPONENT ++ LoongArch ++ ) +diff --git a/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +new file mode 100644 +index 00000000..16854bab +--- /dev/null ++++ b/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +@@ -0,0 +1,2207 @@ ++//===-- LoongArchAsmParser.cpp - Parse LoongArch assembly to MCInst instructions ----===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchTargetStreamer.h" ++#include "MCTargetDesc/LoongArchABIInfo.h" ++#include "MCTargetDesc/LoongArchAnalyzeImmediate.h" ++#include "MCTargetDesc/LoongArchBaseInfo.h" ++#include "MCTargetDesc/LoongArchMCExpr.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "TargetInfo/LoongArchTargetInfo.h" ++#include "llvm/ADT/APFloat.h" ++#include "llvm/ADT/STLExtras.h" ++#include "llvm/ADT/SmallVector.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/ADT/StringSwitch.h" ++#include "llvm/ADT/Triple.h" ++#include "llvm/ADT/Twine.h" ++#include "llvm/BinaryFormat/ELF.h" ++#include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCExpr.h" ++#include "llvm/MC/MCInst.h" ++#include "llvm/MC/MCInstrDesc.h" ++#include "llvm/MC/MCObjectFileInfo.h" ++#include "llvm/MC/MCParser/MCAsmLexer.h" ++#include "llvm/MC/MCParser/MCAsmParser.h" ++#include "llvm/MC/MCParser/MCAsmParserExtension.h" ++#include "llvm/MC/MCParser/MCParsedAsmOperand.h" ++#include "llvm/MC/MCParser/MCTargetAsmParser.h" ++#include "llvm/MC/MCSectionELF.h" ++#include "llvm/MC/MCStreamer.h" ++#include "llvm/MC/MCSubtargetInfo.h" ++#include "llvm/MC/MCSymbol.h" ++#include "llvm/MC/MCSymbolELF.h" ++#include "llvm/MC/MCValue.h" ++#include "llvm/MC/SubtargetFeature.h" ++#include "llvm/MC/TargetRegistry.h" ++#include "llvm/Support/Casting.h" ++#include "llvm/Support/CommandLine.h" ++#include "llvm/Support/Compiler.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/MathExtras.h" ++#include "llvm/Support/SMLoc.h" ++#include "llvm/Support/SourceMgr.h" ++#include "llvm/Support/raw_ostream.h" ++#include ++#include ++#include ++#include ++#include ++#include ++ ++using namespace llvm; ++ ++#define DEBUG_TYPE "loongarch-asm-parser" ++ ++namespace llvm { ++ ++class MCInstrInfo; ++ ++} // end namespace llvm ++ ++namespace { ++ ++class LoongArchAssemblerOptions { ++public: ++ LoongArchAssemblerOptions(const FeatureBitset &Features_) : Features(Features_) {} ++ ++ LoongArchAssemblerOptions(const LoongArchAssemblerOptions *Opts) { ++ Features = Opts->getFeatures(); ++ } ++ ++ const FeatureBitset &getFeatures() const { return Features; } ++ void setFeatures(const FeatureBitset &Features_) { Features = Features_; } ++ ++private: ++ FeatureBitset Features; ++}; ++ ++} // end anonymous namespace ++ ++namespace { ++ ++class LoongArchAsmParser : public MCTargetAsmParser { ++ LoongArchTargetStreamer &getTargetStreamer() { ++ MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); ++ return static_cast(TS); ++ } ++ ++ LoongArchABIInfo ABI; ++ SmallVector, 2> AssemblerOptions; ++ MCSymbol *CurrentFn; // Pointer to the function being parsed. It may be a ++ // nullptr, which indicates that no function is currently ++ // selected. This usually happens after an '.end' ++ // directive. ++ bool IsPicEnabled; ++ ++ // Map of register aliases created via the .set directive. ++ StringMap RegisterSets; ++ ++#define GET_ASSEMBLER_HEADER ++#include "LoongArchGenAsmMatcher.inc" ++ ++ unsigned checkTargetMatchPredicate(MCInst &Inst) override; ++ ++ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, ++ OperandVector &Operands, MCStreamer &Out, ++ uint64_t &ErrorInfo, ++ bool MatchingInlineAsm) override; ++ ++ /// Parse a register as used in CFI directives ++ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; ++ OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, ++ SMLoc &EndLoc) override; ++ ++ bool mnemonicIsValid(StringRef Mnemonic); ++ ++ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, ++ SMLoc NameLoc, OperandVector &Operands) override; ++ ++ bool ParseDirective(AsmToken DirectiveID) override; ++ ++ OperandMatchResultTy parseMemOperand(OperandVector &Operands); ++ OperandMatchResultTy ++ matchAnyRegisterNameWithoutDollar(OperandVector &Operands, ++ StringRef Identifier, SMLoc S); ++ OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands, ++ const AsmToken &Token, ++ SMLoc S); ++ OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands, ++ SMLoc S); ++ OperandMatchResultTy parseAnyRegister(OperandVector &Operands); ++ OperandMatchResultTy parseJumpTarget(OperandVector &Operands); ++ ++ bool searchSymbolAlias(OperandVector &Operands); ++ ++ bool parseOperand(OperandVector &, StringRef Mnemonic); ++ ++ enum MacroExpanderResultTy { ++ MER_NotAMacro, ++ MER_Success, ++ MER_Fail, ++ }; ++ ++ // Expands assembly pseudo instructions. ++ MacroExpanderResultTy tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, ++ MCStreamer &Out, ++ const MCSubtargetInfo *STI); ++ ++ bool expandLoadImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, ++ const MCSubtargetInfo *STI); ++ ++ bool expandLoadAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, ++ const MCSubtargetInfo *STI); ++ ++ bool reportParseError(Twine ErrorMsg); ++ ++ bool parseMemOffset(const MCExpr *&Res); ++ ++ bool isEvaluated(const MCExpr *Expr); ++ bool parseDirectiveSet(); ++ ++ bool parseSetAssignment(); ++ ++ bool parseInternalDirectiveReallowModule(); ++ ++ int matchCPURegisterName(StringRef Symbol); ++ ++ int matchFPURegisterName(StringRef Name); ++ ++ int matchFCFRRegisterName(StringRef Name); ++ int matchFCSRRegisterName(StringRef Name); ++ ++ int matchLSX128RegisterName(StringRef Name); ++ ++ int matchLASX256RegisterName(StringRef Name); ++ ++ bool processInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, ++ const MCSubtargetInfo *STI); ++ ++ // Helper function that checks if the value of a vector index is within the ++ // boundaries of accepted values for each RegisterKind ++ // Example: VINSGR2VR.B $v0[n], $1 => 16 > n >= 0 ++ bool validateLSXIndex(int Val, int RegKind); ++ ++ void setFeatureBits(uint64_t Feature, StringRef FeatureString) { ++ if (!(getSTI().getFeatureBits()[Feature])) { ++ MCSubtargetInfo &STI = copySTI(); ++ setAvailableFeatures( ++ ComputeAvailableFeatures(STI.ToggleFeature(FeatureString))); ++ AssemblerOptions.back()->setFeatures(STI.getFeatureBits()); ++ } ++ } ++ ++ void clearFeatureBits(uint64_t Feature, StringRef FeatureString) { ++ if (getSTI().getFeatureBits()[Feature]) { ++ MCSubtargetInfo &STI = copySTI(); ++ setAvailableFeatures( ++ ComputeAvailableFeatures(STI.ToggleFeature(FeatureString))); ++ AssemblerOptions.back()->setFeatures(STI.getFeatureBits()); ++ } ++ } ++ ++ void setModuleFeatureBits(uint64_t Feature, StringRef FeatureString) { ++ setFeatureBits(Feature, FeatureString); ++ AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits()); ++ } ++ ++ void clearModuleFeatureBits(uint64_t Feature, StringRef FeatureString) { ++ clearFeatureBits(Feature, FeatureString); ++ AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits()); ++ } ++ ++public: ++ enum LoongArchMatchResultTy { ++ Match_RequiresNoZeroRegister = FIRST_TARGET_MATCH_RESULT_TY, ++ Match_RequiresNoRaRegister, ++ Match_RequiresRange0_31, ++ Match_RequiresRange0_63, ++ Match_MsbHigherThanLsb, ++ Match_RequiresPosSizeUImm6, ++#define GET_OPERAND_DIAGNOSTIC_TYPES ++#include "LoongArchGenAsmMatcher.inc" ++#undef GET_OPERAND_DIAGNOSTIC_TYPES ++ }; ++ ++ LoongArchAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser, ++ const MCInstrInfo &MII, const MCTargetOptions &Options) ++ : MCTargetAsmParser(Options, sti, MII), ++ ABI(LoongArchABIInfo::computeTargetABI(Triple(sti.getTargetTriple()), ++ sti.getCPU(), Options)) { ++ MCAsmParserExtension::Initialize(parser); ++ ++ parser.addAliasForDirective(".asciiz", ".asciz"); ++ parser.addAliasForDirective(".hword", ".2byte"); ++ parser.addAliasForDirective(".word", ".4byte"); ++ parser.addAliasForDirective(".dword", ".8byte"); ++ ++ // Initialize the set of available features. ++ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); ++ ++ // Remember the initial assembler options. The user can not modify these. ++ AssemblerOptions.push_back( ++ std::make_unique(getSTI().getFeatureBits())); ++ ++ // Create an assembler options environment for the user to modify. ++ AssemblerOptions.push_back( ++ std::make_unique(getSTI().getFeatureBits())); ++ ++ getTargetStreamer().updateABIInfo(*this); ++ ++ CurrentFn = nullptr; ++ ++ IsPicEnabled = getContext().getObjectFileInfo()->isPositionIndependent(); ++ } ++ ++ bool is64Bit() const { ++ return getSTI().getFeatureBits()[LoongArch::Feature64Bit]; ++ } ++ ++ const LoongArchABIInfo &getABI() const { return ABI; } ++ bool isABI_LP64D() const { return ABI.IsLP64D(); } ++ bool isABI_LP64S() const { return ABI.IsLP64S(); } ++ bool isABI_LP64F() const { return ABI.IsLP64F(); } ++ bool isABI_ILP32D() const { return ABI.IsILP32D(); } ++ bool isABI_ILP32F() const { return ABI.IsILP32F(); } ++ bool isABI_ILP32S() const { return ABI.IsILP32S(); } ++ ++ bool hasLSX() const { ++ return getSTI().getFeatureBits()[LoongArch::FeatureLSX]; ++ } ++ ++ bool hasLASX() const { ++ return getSTI().getFeatureBits()[LoongArch::FeatureLASX]; ++ } ++ ++ bool inPicMode() { ++ return IsPicEnabled; ++ } ++ ++ const MCExpr *createTargetUnaryExpr(const MCExpr *E, ++ AsmToken::TokenKind OperatorToken, ++ MCContext &Ctx) override { ++ switch(OperatorToken) { ++ default: ++ llvm_unreachable("Unknown token"); ++ return nullptr; ++#if 0 ++ case AsmToken::PercentPlt: ++ return LoongArchMCExpr::create(LoongArchMCExpr::MEK_PLT, E, Ctx); ++#endif ++ } ++ } ++}; ++ ++/// LoongArchOperand - Instances of this class represent a parsed LoongArch machine ++/// instruction. ++class LoongArchOperand : public MCParsedAsmOperand { ++public: ++ /// Broad categories of register classes ++ /// The exact class is finalized by the render method. ++ enum RegKind { ++ RegKind_GPR = 1, /// GPR32 and GPR64 (depending on is64Bit()) ++ RegKind_FGR = 2, /// FGR32, FGR64 (depending on hasBasicD()) ++ RegKind_FCFR = 4, /// FCFR ++ RegKind_FCSR = 8, /// FCSR ++ RegKind_LSX128 = 16, /// LSX128[BHWD] (makes no difference which) ++ RegKind_LASX256 = 32, /// LASX256[BHWD] (makes no difference which) ++ RegKind_Numeric = RegKind_GPR | RegKind_FGR | RegKind_FCFR | RegKind_FCSR | ++ RegKind_LSX128 | RegKind_LASX256 ++ }; ++ ++private: ++ enum KindTy { ++ k_Immediate, /// An immediate (possibly involving symbol references) ++ k_Memory, /// Base + Offset Memory Address ++ k_RegisterIndex, /// A register index in one or more RegKind. ++ k_Token, /// A simple token ++ k_RegList, /// A physical register list ++ } Kind; ++ ++public: ++ LoongArchOperand(KindTy K, LoongArchAsmParser &Parser) ++ : MCParsedAsmOperand(), Kind(K), AsmParser(Parser) {} ++ ++ ~LoongArchOperand() override { ++ switch (Kind) { ++ case k_Memory: ++ delete Mem.Base; ++ break; ++ case k_RegList: ++ delete RegList.List; ++ break; ++ case k_Immediate: ++ case k_RegisterIndex: ++ case k_Token: ++ break; ++ } ++ } ++ ++private: ++ /// For diagnostics, and checking the assembler temporary ++ LoongArchAsmParser &AsmParser; ++ ++ struct Token { ++ const char *Data; ++ unsigned Length; ++ }; ++ ++ struct RegIdxOp { ++ unsigned Index; /// Index into the register class ++ RegKind Kind; /// Bitfield of the kinds it could possibly be ++ struct Token Tok; /// The input token this operand originated from. ++ const MCRegisterInfo *RegInfo; ++ }; ++ ++ struct ImmOp { ++ const MCExpr *Val; ++ }; ++ ++ struct MemOp { ++ LoongArchOperand *Base; ++ const MCExpr *Off; ++ }; ++ ++ struct RegListOp { ++ SmallVector *List; ++ }; ++ ++ union { ++ struct Token Tok; ++ struct RegIdxOp RegIdx; ++ struct ImmOp Imm; ++ struct MemOp Mem; ++ struct RegListOp RegList; ++ }; ++ ++ SMLoc StartLoc, EndLoc; ++ ++ /// Internal constructor for register kinds ++ static std::unique_ptr CreateReg(unsigned Index, StringRef Str, ++ RegKind RegKind, ++ const MCRegisterInfo *RegInfo, ++ SMLoc S, SMLoc E, ++ LoongArchAsmParser &Parser) { ++ auto Op = std::make_unique(k_RegisterIndex, Parser); ++ Op->RegIdx.Index = Index; ++ Op->RegIdx.RegInfo = RegInfo; ++ Op->RegIdx.Kind = RegKind; ++ Op->RegIdx.Tok.Data = Str.data(); ++ Op->RegIdx.Tok.Length = Str.size(); ++ Op->StartLoc = S; ++ Op->EndLoc = E; ++ return Op; ++ } ++ ++public: ++ /// Coerce the register to GPR32 and return the real register for the current ++ /// target. ++ unsigned getGPR32Reg() const { ++ assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!"); ++ unsigned ClassID = LoongArch::GPR32RegClassID; ++ return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); ++ } ++ ++ /// Coerce the register to GPR32 and return the real register for the current ++ /// target. ++ unsigned getGPRMM16Reg() const { ++ assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!"); ++ unsigned ClassID = LoongArch::GPR32RegClassID; ++ return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); ++ } ++ ++ /// Coerce the register to GPR64 and return the real register for the current ++ /// target. ++ unsigned getGPR64Reg() const { ++ assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!"); ++ unsigned ClassID = LoongArch::GPR64RegClassID; ++ return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); ++ } ++ ++private: ++ /// Coerce the register to FGR64 and return the real register for the current ++ /// target. ++ unsigned getFGR64Reg() const { ++ assert(isRegIdx() && (RegIdx.Kind & RegKind_FGR) && "Invalid access!"); ++ return RegIdx.RegInfo->getRegClass(LoongArch::FGR64RegClassID) ++ .getRegister(RegIdx.Index); ++ } ++ ++ /// Coerce the register to FGR32 and return the real register for the current ++ /// target. ++ unsigned getFGR32Reg() const { ++ assert(isRegIdx() && (RegIdx.Kind & RegKind_FGR) && "Invalid access!"); ++ return RegIdx.RegInfo->getRegClass(LoongArch::FGR32RegClassID) ++ .getRegister(RegIdx.Index); ++ } ++ ++ /// Coerce the register to FCFR and return the real register for the current ++ /// target. ++ unsigned getFCFRReg() const { ++ assert(isRegIdx() && (RegIdx.Kind & RegKind_FCFR) && "Invalid access!"); ++ return RegIdx.RegInfo->getRegClass(LoongArch::FCFRRegClassID) ++ .getRegister(RegIdx.Index); ++ } ++ ++ /// Coerce the register to LSX128 and return the real register for the current ++ /// target. ++ unsigned getLSX128Reg() const { ++ assert(isRegIdx() && (RegIdx.Kind & RegKind_LSX128) && "Invalid access!"); ++ // It doesn't matter which of the LSX128[BHWD] classes we use. They are all ++ // identical ++ unsigned ClassID = LoongArch::LSX128BRegClassID; ++ return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); ++ } ++ ++ unsigned getLASX256Reg() const { ++ assert(isRegIdx() && (RegIdx.Kind & RegKind_LASX256) && "Invalid access!"); ++ unsigned ClassID = LoongArch::LASX256BRegClassID; ++ return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); ++ } ++ ++ /// Coerce the register to CCR and return the real register for the ++ /// current target. ++ unsigned getFCSRReg() const { ++ assert(isRegIdx() && (RegIdx.Kind & RegKind_FCSR) && "Invalid access!"); ++ unsigned ClassID = LoongArch::FCSRRegClassID; ++ return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); ++ } ++ ++public: ++ void addExpr(MCInst &Inst, const MCExpr *Expr) const { ++ // Add as immediate when possible. Null MCExpr = 0. ++ if (!Expr) ++ Inst.addOperand(MCOperand::createImm(0)); ++ else if (const MCConstantExpr *CE = dyn_cast(Expr)) ++ Inst.addOperand(MCOperand::createImm(CE->getValue())); ++ else ++ Inst.addOperand(MCOperand::createExpr(Expr)); ++ } ++ ++ void addRegOperands(MCInst &Inst, unsigned N) const { ++ llvm_unreachable("Use a custom parser instead"); ++ } ++ ++ /// Render the operand to an MCInst as a GPR32 ++ /// Asserts if the wrong number of operands are requested, or the operand ++ /// is not a k_RegisterIndex compatible with RegKind_GPR ++ void addGPR32ZeroAsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getGPR32Reg())); ++ } ++ ++ void addGPR32NonZeroAsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getGPR32Reg())); ++ } ++ ++ void addGPR32AsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getGPR32Reg())); ++ } ++ ++ void addGPRMM16AsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); ++ } ++ ++ void addGPRMM16AsmRegZeroOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); ++ } ++ ++ void addGPRMM16AsmRegMovePOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); ++ } ++ ++ void addGPRMM16AsmRegMovePPairFirstOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); ++ } ++ ++ void addGPRMM16AsmRegMovePPairSecondOperands(MCInst &Inst, ++ unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); ++ } ++ ++ /// Render the operand to an MCInst as a GPR64 ++ /// Asserts if the wrong number of operands are requested, or the operand ++ /// is not a k_RegisterIndex compatible with RegKind_GPR ++ void addGPR64AsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getGPR64Reg())); ++ } ++ ++ void addStrictlyFGR64AsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getFGR64Reg())); ++ } ++ ++ void addFGR64AsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getFGR64Reg())); ++ } ++ ++ void addFGR32AsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getFGR32Reg())); ++ } ++ ++ void addStrictlyFGR32AsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getFGR32Reg())); ++ } ++ ++ void addFCFRAsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getFCFRReg())); ++ } ++ ++ void addLSX128AsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getLSX128Reg())); ++ } ++ ++ void addLASX256AsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getLASX256Reg())); ++ } ++ ++ void addFCSRAsmRegOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ Inst.addOperand(MCOperand::createReg(getFCSRReg())); ++ } ++ ++ template ++ void addConstantUImmOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ uint64_t Imm = getConstantImm() - Offset; ++ Imm &= (1ULL << Bits) - 1; ++ Imm += Offset; ++ Imm += AdjustOffset; ++ Inst.addOperand(MCOperand::createImm(Imm)); ++ } ++ ++ template ++ void addSImmOperands(MCInst &Inst, unsigned N) const { ++ if (isImm() && !isConstantImm()) { ++ addExpr(Inst, getImm()); ++ return; ++ } ++ addConstantSImmOperands(Inst, N); ++ } ++ ++ template ++ void addUImmOperands(MCInst &Inst, unsigned N) const { ++ if (isImm() && !isConstantImm()) { ++ addExpr(Inst, getImm()); ++ return; ++ } ++ addConstantUImmOperands(Inst, N); ++ } ++ ++ template ++ void addConstantSImmOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ int64_t Imm = getConstantImm() - Offset; ++ Imm = SignExtend64(Imm); ++ Imm += Offset; ++ Imm += AdjustOffset; ++ Inst.addOperand(MCOperand::createImm(Imm)); ++ } ++ ++ void addImmOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ const MCExpr *Expr = getImm(); ++ addExpr(Inst, Expr); ++ } ++ ++ void addMemOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 2 && "Invalid number of operands!"); ++ ++ Inst.addOperand(MCOperand::createReg(AsmParser.getABI().ArePtrs64bit() ++ ? getMemBase()->getGPR64Reg() ++ : getMemBase()->getGPR32Reg())); ++ ++ const MCExpr *Expr = getMemOff(); ++ addExpr(Inst, Expr); ++ } ++ ++ void addRegListOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 1 && "Invalid number of operands!"); ++ ++ for (auto RegNo : getRegList()) ++ Inst.addOperand(MCOperand::createReg(RegNo)); ++ } ++ ++ bool isReg() const override { ++ // As a special case until we sort out the definition of div/divu, accept ++ // $0/$zero here so that MCK_ZERO works correctly. ++ return isGPRAsmReg() && RegIdx.Index == 0; ++ } ++ ++ bool isRegIdx() const { return Kind == k_RegisterIndex; } ++ bool isImm() const override { return Kind == k_Immediate; } ++ ++ bool isConstantImm() const { ++ int64_t Res; ++ return isImm() && getImm()->evaluateAsAbsolute(Res); ++ } ++ ++ bool isConstantImmz() const { ++ return isConstantImm() && getConstantImm() == 0; ++ } ++ ++ template bool isConstantUImm() const { ++ return isConstantImm() && isUInt(getConstantImm() - Offset); ++ } ++ ++ template bool isSImm() const { ++ return isConstantImm() ? isInt(getConstantImm()) : isImm(); ++ } ++ ++ template bool isUImm() const { ++ return isConstantImm() ? isUInt(getConstantImm()) : isImm(); ++ } ++ ++ template bool isAnyImm() const { ++ return isConstantImm() ? (isInt(getConstantImm()) || ++ isUInt(getConstantImm())) ++ : isImm(); ++ } ++ ++ template bool isConstantSImm() const { ++ return isConstantImm() && isInt(getConstantImm() - Offset); ++ } ++ ++ template bool isConstantUImmRange() const { ++ return isConstantImm() && getConstantImm() >= Bottom && ++ getConstantImm() <= Top; ++ } ++ ++ bool isToken() const override { ++ // Note: It's not possible to pretend that other operand kinds are tokens. ++ // The matcher emitter checks tokens first. ++ return Kind == k_Token; ++ } ++ ++ bool isMem() const override { return Kind == k_Memory; } ++ ++ bool isConstantMemOff() const { ++ return isMem() && isa(getMemOff()); ++ } ++ ++ // Allow relocation operators. ++ // FIXME: This predicate and others need to look through binary expressions ++ // and determine whether a Value is a constant or not. ++ template ++ bool isMemWithSimmOffset() const { ++ if (!isMem()) ++ return false; ++ if (!getMemBase()->isGPRAsmReg()) ++ return false; ++ if (isa(getMemOff()) || ++ (isConstantMemOff() && ++ isShiftedInt(getConstantMemOff()))) ++ return true; ++ MCValue Res; ++ bool IsReloc = getMemOff()->evaluateAsRelocatable(Res, nullptr, nullptr); ++ return IsReloc && isShiftedInt(Res.getConstant()); ++ } ++ ++ bool isMemWithPtrSizeOffset() const { ++ if (!isMem()) ++ return false; ++ if (!getMemBase()->isGPRAsmReg()) ++ return false; ++ const unsigned PtrBits = AsmParser.getABI().ArePtrs64bit() ? 64 : 32; ++ if (isa(getMemOff()) || ++ (isConstantMemOff() && isIntN(PtrBits, getConstantMemOff()))) ++ return true; ++ MCValue Res; ++ bool IsReloc = getMemOff()->evaluateAsRelocatable(Res, nullptr, nullptr); ++ return IsReloc && isIntN(PtrBits, Res.getConstant()); ++ } ++ ++ bool isMemWithGRPMM16Base() const { ++ return isMem() && getMemBase()->isMM16AsmReg(); ++ } ++ ++ template bool isMemWithUimmOffsetSP() const { ++ return isMem() && isConstantMemOff() && isUInt(getConstantMemOff()) ++ && getMemBase()->isRegIdx() && (getMemBase()->getGPR32Reg() == LoongArch::SP); ++ } ++ ++ template bool isMemWithUimmWordAlignedOffsetSP() const { ++ return isMem() && isConstantMemOff() && isUInt(getConstantMemOff()) ++ && (getConstantMemOff() % 4 == 0) && getMemBase()->isRegIdx() ++ && (getMemBase()->getGPR32Reg() == LoongArch::SP); ++ } ++ ++ template ++ bool isScaledUImm() const { ++ return isConstantImm() && ++ isShiftedUInt(getConstantImm()); ++ } ++ ++ template ++ bool isScaledSImm() const { ++ if (isConstantImm() && ++ isShiftedInt(getConstantImm())) ++ return true; ++ // Operand can also be a symbol or symbol plus ++ // offset in case of relocations. ++ if (Kind != k_Immediate) ++ return false; ++ MCValue Res; ++ bool Success = getImm()->evaluateAsRelocatable(Res, nullptr, nullptr); ++ return Success && isShiftedInt(Res.getConstant()); ++ } ++ ++ bool isRegList16() const { ++ if (!isRegList()) ++ return false; ++ ++ int Size = RegList.List->size(); ++ if (Size < 2 || Size > 5) ++ return false; ++ ++ unsigned R0 = RegList.List->front(); ++ unsigned R1 = RegList.List->back(); ++ if (!((R0 == LoongArch::S0 && R1 == LoongArch::RA) || ++ (R0 == LoongArch::S0_64 && R1 == LoongArch::RA_64))) ++ return false; ++ ++ int PrevReg = *RegList.List->begin(); ++ for (int i = 1; i < Size - 1; i++) { ++ int Reg = (*(RegList.List))[i]; ++ if ( Reg != PrevReg + 1) ++ return false; ++ PrevReg = Reg; ++ } ++ ++ return true; ++ } ++ ++ bool isInvNum() const { return Kind == k_Immediate; } ++ ++ bool isLSAImm() const { ++ if (!isConstantImm()) ++ return false; ++ int64_t Val = getConstantImm(); ++ return 1 <= Val && Val <= 4; ++ } ++ ++ bool isRegList() const { return Kind == k_RegList; } ++ ++ StringRef getToken() const { ++ assert(Kind == k_Token && "Invalid access!"); ++ return StringRef(Tok.Data, Tok.Length); ++ } ++ ++ unsigned getReg() const override { ++ // As a special case until we sort out the definition of div/divu, accept ++ // $0/$zero here so that MCK_ZERO works correctly. ++ if (Kind == k_RegisterIndex && RegIdx.Index == 0 && ++ RegIdx.Kind & RegKind_GPR) ++ return getGPR32Reg(); // FIXME: GPR64 too ++ ++ llvm_unreachable("Invalid access!"); ++ return 0; ++ } ++ ++ const MCExpr *getImm() const { ++ assert((Kind == k_Immediate) && "Invalid access!"); ++ return Imm.Val; ++ } ++ ++ int64_t getConstantImm() const { ++ const MCExpr *Val = getImm(); ++ int64_t Value = 0; ++ (void)Val->evaluateAsAbsolute(Value); ++ return Value; ++ } ++ ++ LoongArchOperand *getMemBase() const { ++ assert((Kind == k_Memory) && "Invalid access!"); ++ return Mem.Base; ++ } ++ ++ const MCExpr *getMemOff() const { ++ assert((Kind == k_Memory) && "Invalid access!"); ++ return Mem.Off; ++ } ++ ++ int64_t getConstantMemOff() const { ++ return static_cast(getMemOff())->getValue(); ++ } ++ ++ const SmallVectorImpl &getRegList() const { ++ assert((Kind == k_RegList) && "Invalid access!"); ++ return *(RegList.List); ++ } ++ ++ static std::unique_ptr CreateToken(StringRef Str, SMLoc S, ++ LoongArchAsmParser &Parser) { ++ auto Op = std::make_unique(k_Token, Parser); ++ Op->Tok.Data = Str.data(); ++ Op->Tok.Length = Str.size(); ++ Op->StartLoc = S; ++ Op->EndLoc = S; ++ return Op; ++ } ++ ++ /// Create a numeric register (e.g. $1). The exact register remains ++ /// unresolved until an instruction successfully matches ++ static std::unique_ptr ++ createNumericReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, ++ SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { ++ LLVM_DEBUG(dbgs() << "createNumericReg(" << Index << ", ...)\n"); ++ return CreateReg(Index, Str, RegKind_Numeric, RegInfo, S, E, Parser); ++ } ++ ++ /// Create a register that is definitely a GPR. ++ /// This is typically only used for named registers such as $gp. ++ static std::unique_ptr ++ createGPRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, ++ SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { ++ return CreateReg(Index, Str, RegKind_GPR, RegInfo, S, E, Parser); ++ } ++ ++ /// Create a register that is definitely a FGR. ++ /// This is typically only used for named registers such as $f0. ++ static std::unique_ptr ++ createFGRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, ++ SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { ++ return CreateReg(Index, Str, RegKind_FGR, RegInfo, S, E, Parser); ++ } ++ ++ /// Create a register that is definitely an FCFR. ++ /// This is typically only used for named registers such as $fcc0. ++ static std::unique_ptr ++ createFCFRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, ++ SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { ++ return CreateReg(Index, Str, RegKind_FCFR, RegInfo, S, E, Parser); ++ } ++ ++ /// Create a register that is definitely an FCSR. ++ /// This is typically only used for named registers such as $fcsr0. ++ static std::unique_ptr ++ createFCSRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, ++ SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { ++ return CreateReg(Index, Str, RegKind_FCSR, RegInfo, S, E, Parser); ++ } ++ ++ /// Create a register that is definitely an LSX128. ++ /// This is typically only used for named registers such as $v0. ++ static std::unique_ptr ++ createLSX128Reg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, ++ SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { ++ return CreateReg(Index, Str, RegKind_LSX128, RegInfo, S, E, Parser); ++ } ++ ++ static std::unique_ptr ++ createLASX256Reg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, ++ SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { ++ return CreateReg(Index, Str, RegKind_LASX256, RegInfo, S, E, Parser); ++ } ++ ++ static std::unique_ptr ++ CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { ++ auto Op = std::make_unique(k_Immediate, Parser); ++ Op->Imm.Val = Val; ++ Op->StartLoc = S; ++ Op->EndLoc = E; ++ return Op; ++ } ++ ++ static std::unique_ptr ++ CreateMem(std::unique_ptr Base, const MCExpr *Off, SMLoc S, ++ SMLoc E, LoongArchAsmParser &Parser) { ++ auto Op = std::make_unique(k_Memory, Parser); ++ Op->Mem.Base = Base.release(); ++ Op->Mem.Off = Off; ++ Op->StartLoc = S; ++ Op->EndLoc = E; ++ return Op; ++ } ++ ++ static std::unique_ptr ++ CreateRegList(SmallVectorImpl &Regs, SMLoc StartLoc, SMLoc EndLoc, ++ LoongArchAsmParser &Parser) { ++ assert(Regs.size() > 0 && "Empty list not allowed"); ++ ++ auto Op = std::make_unique(k_RegList, Parser); ++ Op->RegList.List = new SmallVector(Regs.begin(), Regs.end()); ++ Op->StartLoc = StartLoc; ++ Op->EndLoc = EndLoc; ++ return Op; ++ } ++ ++ bool isGPRZeroAsmReg() const { ++ return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index == 0; ++ } ++ ++ bool isGPRNonZeroAsmReg() const { ++ return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index > 0 && ++ RegIdx.Index <= 31; ++ } ++ ++ bool isGPRAsmReg() const { ++ return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index <= 31; ++ } ++ ++ bool isMM16AsmReg() const { ++ if (!(isRegIdx() && RegIdx.Kind)) ++ return false; ++ return ((RegIdx.Index >= 2 && RegIdx.Index <= 7) ++ || RegIdx.Index == 16 || RegIdx.Index == 17); ++ ++ } ++ bool isMM16AsmRegZero() const { ++ if (!(isRegIdx() && RegIdx.Kind)) ++ return false; ++ return (RegIdx.Index == 0 || ++ (RegIdx.Index >= 2 && RegIdx.Index <= 7) || ++ RegIdx.Index == 17); ++ } ++ ++ bool isMM16AsmRegMoveP() const { ++ if (!(isRegIdx() && RegIdx.Kind)) ++ return false; ++ return (RegIdx.Index == 0 || (RegIdx.Index >= 2 && RegIdx.Index <= 3) || ++ (RegIdx.Index >= 16 && RegIdx.Index <= 20)); ++ } ++ ++ bool isMM16AsmRegMovePPairFirst() const { ++ if (!(isRegIdx() && RegIdx.Kind)) ++ return false; ++ return RegIdx.Index >= 4 && RegIdx.Index <= 6; ++ } ++ ++ bool isMM16AsmRegMovePPairSecond() const { ++ if (!(isRegIdx() && RegIdx.Kind)) ++ return false; ++ return (RegIdx.Index == 21 || RegIdx.Index == 22 || ++ (RegIdx.Index >= 5 && RegIdx.Index <= 7)); ++ } ++ ++ bool isFGRAsmReg() const { ++ return isRegIdx() && RegIdx.Kind & RegKind_FGR && RegIdx.Index <= 31; ++ } ++ ++ bool isStrictlyFGRAsmReg() const { ++ return isRegIdx() && RegIdx.Kind == RegKind_FGR && RegIdx.Index <= 31; ++ } ++ ++ bool isFCSRAsmReg() const { ++ return isRegIdx() && RegIdx.Kind & RegKind_FCSR && RegIdx.Index <= 3; ++ } ++ ++ bool isFCFRAsmReg() const { ++ if (!(isRegIdx() && RegIdx.Kind & RegKind_FCFR)) ++ return false; ++ return RegIdx.Index <= 7; ++ } ++ ++ bool isLSX128AsmReg() const { ++ return isRegIdx() && RegIdx.Kind & RegKind_LSX128 && RegIdx.Index <= 31; ++ } ++ ++ bool isLASX256AsmReg() const { ++ return isRegIdx() && RegIdx.Kind & RegKind_LASX256 && RegIdx.Index <= 31; ++ } ++ ++ /// getStartLoc - Get the location of the first token of this operand. ++ SMLoc getStartLoc() const override { return StartLoc; } ++ /// getEndLoc - Get the location of the last token of this operand. ++ SMLoc getEndLoc() const override { return EndLoc; } ++ ++ void print(raw_ostream &OS) const override { ++ switch (Kind) { ++ case k_Immediate: ++ OS << "Imm<"; ++ OS << *Imm.Val; ++ OS << ">"; ++ break; ++ case k_Memory: ++ OS << "Mem<"; ++ Mem.Base->print(OS); ++ OS << ", "; ++ OS << *Mem.Off; ++ OS << ">"; ++ break; ++ case k_RegisterIndex: ++ OS << "RegIdx<" << RegIdx.Index << ":" << RegIdx.Kind << ", " ++ << StringRef(RegIdx.Tok.Data, RegIdx.Tok.Length) << ">"; ++ break; ++ case k_Token: ++ OS << getToken(); ++ break; ++ case k_RegList: ++ OS << "RegList< "; ++ for (auto Reg : (*RegList.List)) ++ OS << Reg << " "; ++ OS << ">"; ++ break; ++ } ++ } ++ ++ bool isValidForTie(const LoongArchOperand &Other) const { ++ if (Kind != Other.Kind) ++ return false; ++ ++ switch (Kind) { ++ default: ++ llvm_unreachable("Unexpected kind"); ++ return false; ++ case k_RegisterIndex: { ++ StringRef Token(RegIdx.Tok.Data, RegIdx.Tok.Length); ++ StringRef OtherToken(Other.RegIdx.Tok.Data, Other.RegIdx.Tok.Length); ++ return Token == OtherToken; ++ } ++ } ++ } ++}; // class LoongArchOperand ++ ++} // end anonymous namespace ++ ++namespace llvm { ++ ++extern const MCInstrDesc LoongArchInsts[]; ++ ++} // end namespace llvm ++ ++static const MCInstrDesc &getInstDesc(unsigned Opcode) { ++ return LoongArchInsts[Opcode]; ++} ++ ++static const MCSymbol *getSingleMCSymbol(const MCExpr *Expr) { ++ if (const MCSymbolRefExpr *SRExpr = dyn_cast(Expr)) { ++ return &SRExpr->getSymbol(); ++ } ++ ++ if (const MCBinaryExpr *BExpr = dyn_cast(Expr)) { ++ const MCSymbol *LHSSym = getSingleMCSymbol(BExpr->getLHS()); ++ const MCSymbol *RHSSym = getSingleMCSymbol(BExpr->getRHS()); ++ ++ if (LHSSym) ++ return LHSSym; ++ ++ if (RHSSym) ++ return RHSSym; ++ ++ return nullptr; ++ } ++ ++ if (const MCUnaryExpr *UExpr = dyn_cast(Expr)) ++ return getSingleMCSymbol(UExpr->getSubExpr()); ++ ++ return nullptr; ++} ++ ++static unsigned countMCSymbolRefExpr(const MCExpr *Expr) { ++ if (isa(Expr)) ++ return 1; ++ ++ if (const MCBinaryExpr *BExpr = dyn_cast(Expr)) ++ return countMCSymbolRefExpr(BExpr->getLHS()) + ++ countMCSymbolRefExpr(BExpr->getRHS()); ++ ++ if (const MCUnaryExpr *UExpr = dyn_cast(Expr)) ++ return countMCSymbolRefExpr(UExpr->getSubExpr()); ++ ++ return 0; ++} ++ ++bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, ++ MCStreamer &Out, ++ const MCSubtargetInfo *STI) { ++ const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); ++ ++ Inst.setLoc(IDLoc); ++ ++ // Check branch instructions. ++ if (MCID.isBranch() || MCID.isCall()) { ++ const unsigned Opcode = Inst.getOpcode(); ++ MCOperand Offset; ++ bool check = true; ++ unsigned OffsetOpndIdx, OffsetOpndWidth; ++ switch (Opcode) { ++ default: ++ check = false; ++ break; ++ case LoongArch::BEQ: ++ case LoongArch::BNE: ++ case LoongArch::BLT: ++ case LoongArch::BGE: ++ case LoongArch::BLTU: ++ case LoongArch::BGEU: ++ OffsetOpndIdx = 2; ++ OffsetOpndWidth = 16; ++ break; ++ case LoongArch::BEQZ: ++ case LoongArch::BNEZ: ++ case LoongArch::BCEQZ: ++ case LoongArch::BCNEZ: ++ OffsetOpndIdx = 1; ++ OffsetOpndWidth = 21; ++ break; ++ case LoongArch::B: ++ case LoongArch::BL: ++ OffsetOpndIdx = 0; ++ OffsetOpndWidth = 26; ++ break; ++ } ++ if (check) { ++ assert(MCID.getNumOperands() == OffsetOpndIdx + 1 && ++ "unexpected number of operands"); ++ Offset = Inst.getOperand(OffsetOpndIdx); ++ // Non-Imm situation will be dealed with later on when applying fixups. ++ if (Offset.isImm()) { ++ if (!isIntN(OffsetOpndWidth + 2, Offset.getImm())) ++ return Error(IDLoc, "branch target out of range"); ++ if (offsetToAlignment(Offset.getImm(), Align(1LL << 2))) ++ return Error(IDLoc, "branch to misaligned address"); ++ } ++ } ++ } ++ ++ bool IsPCRelativeLoad = (MCID.TSFlags & LoongArchII::IsPCRelativeLoad) != 0; ++ if ((MCID.mayLoad() || MCID.mayStore()) && !IsPCRelativeLoad) { ++ // Check the offset of memory operand, if it is a symbol ++ // reference or immediate we may have to expand instructions. ++ for (unsigned i = 0; i < MCID.getNumOperands(); i++) { ++ const MCOperandInfo &OpInfo = MCID.OpInfo[i]; ++ if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) || ++ (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) { ++ MCOperand &Op = Inst.getOperand(i); ++ if (Op.isImm()) { ++ int64_t MemOffset = Op.getImm(); ++ if (MemOffset < -32768 || MemOffset > 32767) { ++ return getParser().hasPendingError(); ++ } ++ } else if (Op.isExpr()) { ++ const MCExpr *Expr = Op.getExpr(); ++ if (Expr->getKind() == MCExpr::SymbolRef) { ++ const MCSymbolRefExpr *SR = ++ static_cast(Expr); ++ if (SR->getKind() == MCSymbolRefExpr::VK_None) { ++ return getParser().hasPendingError(); ++ } ++ } else if (!isEvaluated(Expr)) { ++ return getParser().hasPendingError(); ++ } ++ } ++ } ++ } // for ++ } // if load/store ++ ++ MacroExpanderResultTy ExpandResult = ++ tryExpandInstruction(Inst, IDLoc, Out, STI); ++ switch (ExpandResult) { ++ case MER_NotAMacro: ++ Out.emitInstruction(Inst, *STI); ++ break; ++ case MER_Success: ++ break; ++ case MER_Fail: ++ return true; ++ } ++ ++ return false; ++} ++ ++LoongArchAsmParser::MacroExpanderResultTy ++LoongArchAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, ++ const MCSubtargetInfo *STI) { ++ switch (Inst.getOpcode()) { ++ default: ++ return MER_NotAMacro; ++ case LoongArch::LoadImm32: // li.w $rd, $imm32 ++ case LoongArch::LoadImm64: // li.d $rd, $imm64 ++ return expandLoadImm(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; ++ case LoongArch::LoadAddrLocal: // la.local $rd, symbol ++ case LoongArch::LoadAddrGlobal: // la.global $rd, symbol ++ case LoongArch::LoadAddrGlobal_Alias: // la $rd, symbol ++ case LoongArch::LoadAddrTLS_LE: // la.tls.le $rd, symbol ++ case LoongArch::LoadAddrTLS_IE: // la.tls.ie $rd, symbol ++ case LoongArch::LoadAddrTLS_LD: // la.tls.ld $rd, symbol ++ case LoongArch::LoadAddrTLS_GD: // la.tls.gd $rd, symbol ++ return expandLoadAddress(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; ++ } ++} ++ ++/// Can the value be represented by a unsigned N-bit value and a shift left? ++template static bool isShiftedUIntAtAnyPosition(uint64_t x) { ++ unsigned BitNum = findFirstSet(x); ++ ++ return (x == x >> BitNum << BitNum) && isUInt(x >> BitNum); ++} ++ ++bool LoongArchAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, ++ MCStreamer &Out, ++ const MCSubtargetInfo *STI) { ++ const int64_t Imm = Inst.getOperand(1).getImm(); ++ const unsigned DstReg = Inst.getOperand(0).getReg(); ++ LoongArchTargetStreamer &TOut = getTargetStreamer(); ++ bool Is64Bit = Inst.getOpcode() == LoongArch::LoadImm64; ++ unsigned SrcReg = Is64Bit ? LoongArch::ZERO_64 : LoongArch::ZERO; ++ LoongArchAnalyzeImmediate::InstSeq Seq = ++ LoongArchAnalyzeImmediate::generateInstSeq( ++ Is64Bit ? Imm : SignExtend64<32>(Imm), Is64Bit); ++ ++ for (auto &Inst : Seq) { ++ if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32) ++ TOut.emitRI(Inst.Opc, DstReg, Inst.Imm, IDLoc, STI); ++ else ++ TOut.emitRRI(Inst.Opc, DstReg, SrcReg, Inst.Imm, IDLoc, STI); ++ SrcReg = DstReg; ++ } ++ ++ return false; ++} ++ ++bool LoongArchAsmParser::expandLoadAddress(MCInst &Inst, SMLoc IDLoc, ++ MCStreamer &Out, ++ const MCSubtargetInfo *STI) { ++ LoongArchTargetStreamer &TOut = getTargetStreamer(); ++ const MCExpr *SymExpr = Inst.getOperand(1).getExpr(); ++ const LoongArchMCExpr *HiExpr = nullptr; ++ const LoongArchMCExpr *LoExpr = nullptr; ++ const LoongArchMCExpr *HigherExpr = nullptr; ++ const LoongArchMCExpr *HighestExpr = nullptr; ++ unsigned DstReg = Inst.getOperand(0).getReg(); ++ ++ MCValue Res; ++ if (!SymExpr->evaluateAsRelocatable(Res, nullptr, nullptr)) { ++ Error(IDLoc, "expected relocatable expression"); ++ return true; ++ } ++ if (Res.getSymB() != nullptr) { ++ Error(IDLoc, "expected relocatable expression with only one symbol"); ++ return true; ++ } ++ ++ switch (Inst.getOpcode()) { ++ case LoongArch::LoadAddrLocal: ++ HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_PCREL_HI, SymExpr, ++ getContext()); ++ LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_PCREL_LO, SymExpr, ++ getContext()); ++ ++ TOut.emitRX(LoongArch::PCALAU12I_ri, DstReg, MCOperand::createExpr(HiExpr), ++ IDLoc, STI); ++ TOut.emitRRX(LoongArch::ADDI_D_rri, DstReg, DstReg, ++ MCOperand::createExpr(LoExpr), IDLoc, STI); ++ return false; ++ case LoongArch::LoadAddrGlobal: ++ case LoongArch::LoadAddrGlobal_Alias: ++ HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_GOT_HI, SymExpr, ++ getContext()); ++ LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_GOT_LO, SymExpr, ++ getContext()); ++ TOut.emitRX(LoongArch::PCALAU12I_ri, DstReg, MCOperand::createExpr(HiExpr), ++ IDLoc, STI); ++ TOut.emitRRX(LoongArch::LD_D_rri, DstReg, DstReg, ++ MCOperand::createExpr(LoExpr), IDLoc, STI); ++ return false; ++ case LoongArch::LoadAddrTLS_LE: ++ HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_HI, SymExpr, ++ getContext()); ++ LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_LO, SymExpr, ++ getContext()); ++ HigherExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_HIGHER, ++ SymExpr, getContext()); ++ HighestExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_HIGHEST, ++ SymExpr, getContext()); ++ TOut.emitRX(LoongArch::LU12I_W_ri, DstReg, MCOperand::createExpr(HiExpr), ++ IDLoc, STI); ++ TOut.emitRRX(LoongArch::ORI_rri, DstReg, DstReg, ++ MCOperand::createExpr(LoExpr), IDLoc, STI); ++ TOut.emitRX(LoongArch::LU32I_D_ri, DstReg, ++ MCOperand::createExpr(HigherExpr), IDLoc, STI); ++ TOut.emitRRX(LoongArch::LU52I_D_rri, DstReg, DstReg, ++ MCOperand::createExpr(HighestExpr), IDLoc, STI); ++ return false; ++ case LoongArch::LoadAddrTLS_IE: ++ HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSIE_HI, SymExpr, ++ getContext()); ++ LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSIE_LO, SymExpr, ++ getContext()); ++ TOut.emitRX(LoongArch::PCALAU12I_ri, DstReg, MCOperand::createExpr(HiExpr), ++ IDLoc, STI); ++ TOut.emitRRX(LoongArch::LD_D_rri, DstReg, DstReg, ++ MCOperand::createExpr(LoExpr), IDLoc, STI); ++ return false; ++ case LoongArch::LoadAddrTLS_LD: ++ case LoongArch::LoadAddrTLS_GD: ++ HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSGD_HI, SymExpr, ++ getContext()); ++ LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSGD_LO, SymExpr, ++ getContext()); ++ TOut.emitRX(LoongArch::PCALAU12I_ri, DstReg, MCOperand::createExpr(HiExpr), ++ IDLoc, STI); ++ TOut.emitRRX(LoongArch::ADDI_D_rri, DstReg, DstReg, ++ MCOperand::createExpr(LoExpr), IDLoc, STI); ++ return false; ++ default: ++ llvm_unreachable(""); ++ } ++} ++ ++unsigned LoongArchAsmParser::checkTargetMatchPredicate(MCInst &Inst) { ++ switch (Inst.getOpcode()) { ++ case LoongArch::BSTRINS_W: ++ case LoongArch::BSTRPICK_W: { ++ assert(Inst.getOperand(2).isImm() && Inst.getOperand(3).isImm() && ++ "Operands must be immediates for bstrins.w/bstrpick.w!"); ++ const signed Msbw = Inst.getOperand(2).getImm(); ++ const signed Lsbw = Inst.getOperand(3).getImm(); ++ if (Msbw < Lsbw) ++ return Match_MsbHigherThanLsb; ++ if ((Lsbw < 0) || (Msbw > 31)) ++ return Match_RequiresRange0_31; ++ return Match_Success; ++ } ++ case LoongArch::BSTRINS_D: ++ case LoongArch::BSTRPICK_D: { ++ assert(Inst.getOperand(2).isImm() && Inst.getOperand(3).isImm() && ++ "Operands must be immediates for bstrins.d/bstrpick.d!"); ++ const signed Msbd = Inst.getOperand(2).getImm(); ++ const signed Lsbd = Inst.getOperand(3).getImm(); ++ if (Msbd < Lsbd) ++ return Match_MsbHigherThanLsb; ++ if ((Lsbd < 0) || (Msbd > 63)) ++ return Match_RequiresRange0_63; ++ return Match_Success; ++ } ++ case LoongArch::CSRXCHG32: ++ case LoongArch::CSRXCHG: ++ if (Inst.getOperand(2).getReg() == LoongArch::ZERO || ++ Inst.getOperand(2).getReg() == LoongArch::ZERO_64) ++ return Match_RequiresNoZeroRegister; ++ if (Inst.getOperand(2).getReg() == LoongArch::RA || ++ Inst.getOperand(2).getReg() == LoongArch::RA_64) ++ return Match_RequiresNoRaRegister; ++ return Match_Success; ++ } ++ ++ return Match_Success; ++} ++ ++static SMLoc RefineErrorLoc(const SMLoc Loc, const OperandVector &Operands, ++ uint64_t ErrorInfo) { ++ if (ErrorInfo != ~0ULL && ErrorInfo < Operands.size()) { ++ SMLoc ErrorLoc = Operands[ErrorInfo]->getStartLoc(); ++ if (ErrorLoc == SMLoc()) ++ return Loc; ++ return ErrorLoc; ++ } ++ return Loc; ++} ++ ++bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, ++ OperandVector &Operands, ++ MCStreamer &Out, ++ uint64_t &ErrorInfo, ++ bool MatchingInlineAsm) { ++ MCInst Inst; ++ unsigned MatchResult = ++ MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); ++ switch (MatchResult) { ++ case Match_Success: ++ if (processInstruction(Inst, IDLoc, Out, STI)) ++ return true; ++ return false; ++ case Match_MissingFeature: ++ Error(IDLoc, "instruction requires a CPU feature not currently enabled"); ++ return true; ++ case Match_InvalidOperand: { ++ SMLoc ErrorLoc = IDLoc; ++ if (ErrorInfo != ~0ULL) { ++ if (ErrorInfo >= Operands.size()) ++ return Error(IDLoc, "too few operands for instruction"); ++ ++ ErrorLoc = Operands[ErrorInfo]->getStartLoc(); ++ if (ErrorLoc == SMLoc()) ++ ErrorLoc = IDLoc; ++ } ++ ++ return Error(ErrorLoc, "invalid operand for instruction"); ++ } ++ case Match_MnemonicFail: ++ return Error(IDLoc, "invalid instruction"); ++ case Match_RequiresNoZeroRegister: ++ return Error(IDLoc, "invalid operand ($zero) for instruction"); ++ case Match_RequiresNoRaRegister: ++ return Error(IDLoc, "invalid operand ($r1) for instruction"); ++ case Match_InvalidImm0_3: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "immediate must be an integer in range [0, 3]."); ++ case Match_InvalidImm0_7: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "immediate must be an integer in range [0, 7]."); ++ case Match_InvalidImm0_31: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "immediate must be an integer in range [0, 31]."); ++ case Match_InvalidImm0_63: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "immediate must be an integer in range [0, 63]."); ++ case Match_InvalidImm0_4095: ++ case Match_UImm12_Relaxed: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "immediate must be an integer in range [0, 4095]."); ++ case Match_InvalidImm0_32767: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "immediate must be an integer in range [0, 32767]."); ++ case Match_UImm16_Relaxed: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 16-bit unsigned immediate"); ++ case Match_UImm20_0: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 20-bit unsigned immediate"); ++ case Match_UImm26_0: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 26-bit unsigned immediate"); ++ case Match_UImm32_Coerced: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 32-bit immediate"); ++ case Match_InvalidSImm2: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 2-bit signed immediate"); ++ case Match_InvalidSImm3: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 3-bit signed immediate"); ++ case Match_InvalidSImm5: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 5-bit signed immediate"); ++ case Match_InvalidSImm8: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 8-bit signed immediate"); ++ case Match_InvalidSImm12: ++ case Match_SImm12_Relaxed: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 12-bit signed immediate"); ++ case Match_InvalidSImm14: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 14-bit signed immediate"); ++ case Match_InvalidSImm15: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 15-bit signed immediate"); ++ case Match_InvalidSImm16: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 16-bit signed immediate"); ++ case Match_InvalidSImm20: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 20-bit signed immediate"); ++ case Match_InvalidSImm21: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 21-bit signed immediate"); ++ case Match_InvalidSImm26: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 26-bit signed immediate"); ++ case Match_SImm32: ++ case Match_SImm32_Relaxed: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected 32-bit signed immediate"); ++ case Match_MemSImm14: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected memory with 14-bit signed offset"); ++ case Match_MemSImmPtr: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected memory with 32-bit signed offset"); ++ case Match_UImm2_1: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected immediate in range 1 .. 4"); ++ case Match_MemSImm14Lsl2: ++ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), ++ "expected memory with 16-bit signed offset and multiple of 4"); ++ case Match_RequiresRange0_31: { ++ SMLoc ErrorStart = Operands[3]->getStartLoc(); ++ SMLoc ErrorEnd = Operands[4]->getEndLoc(); ++ return Error(ErrorStart, "from lsbw to msbw are not in the range 0 .. 31", ++ SMRange(ErrorStart, ErrorEnd)); ++ } ++ case Match_RequiresPosSizeUImm6: { ++ SMLoc ErrorStart = Operands[3]->getStartLoc(); ++ SMLoc ErrorEnd = Operands[4]->getEndLoc(); ++ return Error(ErrorStart, "size plus position are not in the range 1 .. 63", ++ SMRange(ErrorStart, ErrorEnd)); ++ } ++ case Match_RequiresRange0_63: { ++ SMLoc ErrorStart = Operands[3]->getStartLoc(); ++ SMLoc ErrorEnd = Operands[4]->getEndLoc(); ++ return Error(ErrorStart, "from lsbd to msbd are not in the range 0 .. 63", ++ SMRange(ErrorStart, ErrorEnd)); ++ } ++ case Match_MsbHigherThanLsb: { ++ SMLoc ErrorStart = Operands[3]->getStartLoc(); ++ SMLoc ErrorEnd = Operands[4]->getEndLoc(); ++ return Error(ErrorStart, "msb are not higher than lsb", SMRange(ErrorStart, ErrorEnd)); ++ } ++ } ++ ++ llvm_unreachable("Implement any new match types added!"); ++} ++ ++/* ++ * Note: The implementation of this function must be sync with the definition ++ * of GPR32/GPR64 RegisterClass in LoongArchRegisterInfo.td ++ */ ++int LoongArchAsmParser::matchCPURegisterName(StringRef Name) { ++ int CC; ++ ++ CC = StringSwitch(Name) ++ .Cases("zero", "r0", 0) ++ .Cases("a0", "v0", "r4", 1) ++ .Cases("a1", "v1", "r5", 2) ++ .Cases("a2", "r6", 3) ++ .Cases("a3", "r7", 4) ++ .Cases("a4", "r8", 5) ++ .Cases("a5", "r9", 6) ++ .Cases("a6", "r10", 7) ++ .Cases("a7", "r11", 8) ++ .Cases("t0", "r12", 9) ++ .Cases("t1", "r13", 10) ++ .Cases("t2", "r14", 11) ++ .Cases("t3", "r15", 12) ++ .Cases("t4", "r16", 13) ++ .Cases("t5", "r17", 14) ++ .Cases("t6", "r18", 15) ++ .Cases("t7", "r19", 16) ++ .Cases("t8", "r20", 17) ++ .Cases("s0", "r23", 18) ++ .Cases("s1", "r24", 19) ++ .Cases("s2", "r25", 20) ++ .Cases("s3", "r26", 21) ++ .Cases("s4", "r27", 22) ++ .Cases("s5", "r28", 23) ++ .Cases("s6", "r29", 24) ++ .Cases("s7", "r30", 25) ++ .Cases("s8", "r31", 26) ++ .Cases("ra", "r1", 27) ++ .Cases("tp", "r2", 28) ++ .Cases("sp", "r3", 29) ++ .Case("r21", 30) ++ .Cases("fp", "r22", 31) ++ .Default(-1); ++ ++ return CC; ++} ++ ++int LoongArchAsmParser::matchFPURegisterName(StringRef Name) { ++ if (Name[0] == 'f') { ++ int CC; ++ ++ CC = StringSwitch(Name) ++ .Cases("f0", "fa0", "fv0", 0) ++ .Cases("f1", "fa1", "fv1", 1) ++ .Cases("f2", "fa2", 2) ++ .Cases("f3", "fa3", 3) ++ .Cases("f4", "fa4", 4) ++ .Cases("f5", "fa5", 5) ++ .Cases("f6", "fa6", 6) ++ .Cases("f7", "fa7", 7) ++ .Cases("f8", "ft0", 8) ++ .Cases("f9", "ft1", 9) ++ .Cases("f10", "ft2", 10) ++ .Cases("f11", "ft3", 11) ++ .Cases("f12", "ft4", 12) ++ .Cases("f13", "ft5", 13) ++ .Cases("f14", "ft6", 14) ++ .Cases("f15", "ft7", 15) ++ .Cases("f16", "ft8", 16) ++ .Cases("f17", "ft9", 17) ++ .Cases("f18", "ft10", 18) ++ .Cases("f19", "ft11", 19) ++ .Cases("f20", "ft12", 20) ++ .Cases("f21", "ft13", 21) ++ .Cases("f22", "ft14", 22) ++ .Cases("f23", "ft15", 23) ++ .Cases("f24", "fs0", 24) ++ .Cases("f25", "fs1", 25) ++ .Cases("f26", "fs2", 26) ++ .Cases("f27", "fs3", 27) ++ .Cases("f28", "fs4", 28) ++ .Cases("f29", "fs5", 29) ++ .Cases("f30", "fs6", 30) ++ .Cases("f31", "fs7", 31) ++ .Default(-1); ++ ++ return CC; ++ } ++ return -1; ++} ++ ++int LoongArchAsmParser::matchFCFRRegisterName(StringRef Name) { ++ if (Name.startswith("fcc")) { ++ StringRef NumString = Name.substr(3); ++ unsigned IntVal; ++ if (NumString.getAsInteger(10, IntVal)) ++ return -1; // This is not an integer. ++ if (IntVal > 7) // There are only 8 fcc registers. ++ return -1; ++ return IntVal; ++ } ++ return -1; ++} ++ ++int LoongArchAsmParser::matchFCSRRegisterName(StringRef Name) { ++ if (Name.startswith("fcsr")) { ++ StringRef NumString = Name.substr(4); ++ unsigned IntVal; ++ if (NumString.getAsInteger(10, IntVal)) ++ return -1; // This is not an integer. ++ if (IntVal > 3) // There are only 4 fcsr registers. ++ return -1; ++ return IntVal; ++ } ++ return -1; ++} ++ ++int LoongArchAsmParser::matchLSX128RegisterName(StringRef Name) { ++ unsigned IntVal; ++ ++ if (Name.front() != 'v' || Name.drop_front(2).getAsInteger(10, IntVal)) ++ return -1; ++ ++ if (IntVal > 31) ++ return -1; ++ ++ return IntVal; ++} ++ ++int LoongArchAsmParser::matchLASX256RegisterName(StringRef Name) { ++ unsigned IntVal; ++ ++ if (Name.front() != 'x' || Name.drop_front(2).getAsInteger(10, IntVal)) ++ return -1; ++ ++ if (IntVal > 31) ++ return -1; ++ ++ return IntVal; ++} ++ ++bool LoongArchAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { ++ MCAsmParser &Parser = getParser(); ++ LLVM_DEBUG(dbgs() << "parseOperand\n"); ++ ++ // Check if the current operand has a custom associated parser, if so, try to ++ // custom parse the operand, or fallback to the general approach. ++ OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); ++ if (ResTy == MatchOperand_Success) ++ return false; ++ // If there wasn't a custom match, try the generic matcher below. Otherwise, ++ // there was a match, but an error occurred, in which case, just return that ++ // the operand parsing failed. ++ if (ResTy == MatchOperand_ParseFail) ++ return true; ++ ++ LLVM_DEBUG(dbgs() << ".. Generic Parser\n"); ++ ++ switch (getLexer().getKind()) { ++ case AsmToken::Dollar: { ++ // Parse the register. ++ SMLoc S = Parser.getTok().getLoc(); ++ ++ // Almost all registers have been parsed by custom parsers. There is only ++ // one exception to this. $zero (and it's alias $0) will reach this point ++ // for div, divu, and similar instructions because it is not an operand ++ // to the instruction definition but an explicit register. Special case ++ // this situation for now. ++ if (parseAnyRegister(Operands) != MatchOperand_NoMatch) ++ return false; ++ ++ // Maybe it is a symbol reference. ++ StringRef Identifier; ++ if (Parser.parseIdentifier(Identifier)) ++ return true; ++ ++ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); ++ MCSymbol *Sym = getContext().getOrCreateSymbol("$" + Identifier); ++ // Otherwise create a symbol reference. ++ const MCExpr *Res = ++ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); ++ ++ Operands.push_back(LoongArchOperand::CreateImm(Res, S, E, *this)); ++ return false; ++ } ++ default: { ++ LLVM_DEBUG(dbgs() << ".. generic integer expression\n"); ++ ++ const MCExpr *Expr; ++ SMLoc S = Parser.getTok().getLoc(); // Start location of the operand. ++ if (getParser().parseExpression(Expr)) ++ return true; ++ ++ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); ++ ++ Operands.push_back(LoongArchOperand::CreateImm(Expr, S, E, *this)); ++ return false; ++ } ++ } // switch(getLexer().getKind()) ++ return true; ++} ++ ++bool LoongArchAsmParser::isEvaluated(const MCExpr *Expr) { ++ switch (Expr->getKind()) { ++ case MCExpr::Constant: ++ return true; ++ case MCExpr::SymbolRef: ++ return (cast(Expr)->getKind() != MCSymbolRefExpr::VK_None); ++ case MCExpr::Binary: { ++ const MCBinaryExpr *BE = cast(Expr); ++ if (!isEvaluated(BE->getLHS())) ++ return false; ++ return isEvaluated(BE->getRHS()); ++ } ++ case MCExpr::Unary: ++ return isEvaluated(cast(Expr)->getSubExpr()); ++ case MCExpr::Target: ++ return true; ++ } ++ return false; ++} ++ ++bool LoongArchAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, ++ SMLoc &EndLoc) { ++ return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success; ++} ++ ++OperandMatchResultTy LoongArchAsmParser::tryParseRegister(unsigned &RegNo, ++ SMLoc &StartLoc, ++ SMLoc &EndLoc) { ++ SmallVector, 1> Operands; ++ OperandMatchResultTy ResTy = parseAnyRegister(Operands); ++ if (ResTy == MatchOperand_Success) { ++ assert(Operands.size() == 1); ++ LoongArchOperand &Operand = static_cast(*Operands.front()); ++ StartLoc = Operand.getStartLoc(); ++ EndLoc = Operand.getEndLoc(); ++ ++ // AFAIK, we only support numeric registers and named GPR's in CFI ++ // directives. ++ // Don't worry about eating tokens before failing. Using an unrecognised ++ // register is a parse error. ++ if (Operand.isGPRAsmReg()) { ++ // Resolve to GPR32 or GPR64 appropriately. ++ RegNo = is64Bit() ? Operand.getGPR64Reg() : Operand.getGPR32Reg(); ++ } ++ ++ return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch ++ : MatchOperand_Success; ++ } ++ ++ assert(Operands.size() == 0); ++ return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch : MatchOperand_Success; ++} ++ ++bool LoongArchAsmParser::parseMemOffset(const MCExpr *&Res) { ++ return getParser().parseExpression(Res); ++} ++ ++OperandMatchResultTy ++LoongArchAsmParser::parseMemOperand(OperandVector &Operands) { ++ MCAsmParser &Parser = getParser(); ++ LLVM_DEBUG(dbgs() << "parseMemOperand\n"); ++ const MCExpr *IdVal = nullptr; ++ SMLoc S; ++ OperandMatchResultTy Res = MatchOperand_NoMatch; ++ // First operand is the base. ++ S = Parser.getTok().getLoc(); ++ ++ Res = parseAnyRegister(Operands); ++ if (Res != MatchOperand_Success) ++ return Res; ++ ++ if (Parser.getTok().isNot(AsmToken::Comma)) { ++ Error(Parser.getTok().getLoc(), "',' expected"); ++ return MatchOperand_ParseFail; ++ } ++ ++ Parser.Lex(); // Eat the ',' token. ++ ++ if (parseMemOffset(IdVal)) ++ return MatchOperand_ParseFail; ++ ++ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); ++ ++ // Replace the register operand with the memory operand. ++ std::unique_ptr op( ++ static_cast(Operands.back().release())); ++ // Remove the register from the operands. ++ // "op" will be managed by k_Memory. ++ Operands.pop_back(); ++ ++ // when symbol not defined, error report. ++ if (dyn_cast(IdVal)) { ++ return MatchOperand_ParseFail; ++ } ++ ++ // Add the memory operand. ++ if (dyn_cast(IdVal)) { ++ int64_t Imm; ++ if (IdVal->evaluateAsAbsolute(Imm)) ++ IdVal = MCConstantExpr::create(Imm, getContext()); ++ else ++ return MatchOperand_ParseFail; ++ } ++ ++ Operands.push_back(LoongArchOperand::CreateMem(std::move(op), IdVal, S, E, *this)); ++ return MatchOperand_Success; ++} ++ ++bool LoongArchAsmParser::searchSymbolAlias(OperandVector &Operands) { ++ MCAsmParser &Parser = getParser(); ++ MCSymbol *Sym = getContext().lookupSymbol(Parser.getTok().getIdentifier()); ++ if (!Sym) ++ return false; ++ ++ SMLoc S = Parser.getTok().getLoc(); ++ if (Sym->isVariable()) { ++ const MCExpr *Expr = Sym->getVariableValue(); ++ if (Expr->getKind() == MCExpr::SymbolRef) { ++ const MCSymbolRefExpr *Ref = static_cast(Expr); ++ StringRef DefSymbol = Ref->getSymbol().getName(); ++ if (DefSymbol.startswith("$")) { ++ OperandMatchResultTy ResTy = ++ matchAnyRegisterNameWithoutDollar(Operands, DefSymbol.substr(1), S); ++ if (ResTy == MatchOperand_Success) { ++ Parser.Lex(); ++ return true; ++ } ++ if (ResTy == MatchOperand_ParseFail) ++ llvm_unreachable("Should never ParseFail"); ++ } ++ } ++ } else if (Sym->isUnset()) { ++ // If symbol is unset, it might be created in the `parseSetAssignment` ++ // routine as an alias for a numeric register name. ++ // Lookup in the aliases list. ++ auto Entry = RegisterSets.find(Sym->getName()); ++ if (Entry != RegisterSets.end()) { ++ OperandMatchResultTy ResTy = ++ matchAnyRegisterWithoutDollar(Operands, Entry->getValue(), S); ++ if (ResTy == MatchOperand_Success) { ++ Parser.Lex(); ++ return true; ++ } ++ } ++ } ++ ++ return false; ++} ++ ++OperandMatchResultTy ++LoongArchAsmParser::matchAnyRegisterNameWithoutDollar(OperandVector &Operands, ++ StringRef Identifier, ++ SMLoc S) { ++ int Index = matchCPURegisterName(Identifier); ++ if (Index != -1) { ++ Operands.push_back(LoongArchOperand::createGPRReg( ++ Index, Identifier, getContext().getRegisterInfo(), S, ++ getLexer().getLoc(), *this)); ++ return MatchOperand_Success; ++ } ++ ++ Index = matchFPURegisterName(Identifier); ++ if (Index != -1) { ++ Operands.push_back(LoongArchOperand::createFGRReg( ++ Index, Identifier, getContext().getRegisterInfo(), S, ++ getLexer().getLoc(), *this)); ++ return MatchOperand_Success; ++ } ++ ++ Index = matchFCFRRegisterName(Identifier); ++ if (Index != -1) { ++ Operands.push_back(LoongArchOperand::createFCFRReg( ++ Index, Identifier, getContext().getRegisterInfo(), S, ++ getLexer().getLoc(), *this)); ++ return MatchOperand_Success; ++ } ++ ++ Index = matchFCSRRegisterName(Identifier); ++ if (Index != -1) { ++ Operands.push_back(LoongArchOperand::createFCSRReg( ++ Index, Identifier, getContext().getRegisterInfo(), S, ++ getLexer().getLoc(), *this)); ++ return MatchOperand_Success; ++ } ++ ++ Index = matchLSX128RegisterName(Identifier); ++ if (Index != -1) { ++ Operands.push_back(LoongArchOperand::createLSX128Reg( ++ Index, Identifier, getContext().getRegisterInfo(), S, ++ getLexer().getLoc(), *this)); ++ return MatchOperand_Success; ++ } ++ ++ Index = matchLASX256RegisterName(Identifier); ++ if (Index != -1) { ++ Operands.push_back(LoongArchOperand::createLASX256Reg( ++ Index, Identifier, getContext().getRegisterInfo(), S, ++ getLexer().getLoc(), *this)); ++ return MatchOperand_Success; ++ } ++ ++ return MatchOperand_NoMatch; ++} ++ ++OperandMatchResultTy ++LoongArchAsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands, ++ const AsmToken &Token, SMLoc S) { ++ if (Token.is(AsmToken::Identifier)) { ++ LLVM_DEBUG(dbgs() << ".. identifier\n"); ++ StringRef Identifier = Token.getIdentifier(); ++ OperandMatchResultTy ResTy = ++ matchAnyRegisterNameWithoutDollar(Operands, Identifier, S); ++ return ResTy; ++ } else if (Token.is(AsmToken::Integer)) { ++ LLVM_DEBUG(dbgs() << ".. integer\n"); ++ int64_t RegNum = Token.getIntVal(); ++ if (RegNum < 0 || RegNum > 31) { ++ // Show the error, but treat invalid register ++ // number as a normal one to continue parsing ++ // and catch other possible errors. ++ Error(getLexer().getLoc(), "invalid register number"); ++ } ++ Operands.push_back(LoongArchOperand::createNumericReg( ++ RegNum, Token.getString(), getContext().getRegisterInfo(), S, ++ Token.getLoc(), *this)); ++ return MatchOperand_Success; ++ } ++ ++ LLVM_DEBUG(dbgs() << Token.getKind() << "\n"); ++ ++ return MatchOperand_NoMatch; ++} ++ ++OperandMatchResultTy ++LoongArchAsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) { ++ auto Token = getLexer().peekTok(false); ++ return matchAnyRegisterWithoutDollar(Operands, Token, S); ++} ++ ++OperandMatchResultTy ++LoongArchAsmParser::parseAnyRegister(OperandVector &Operands) { ++ MCAsmParser &Parser = getParser(); ++ LLVM_DEBUG(dbgs() << "parseAnyRegister\n"); ++ ++ auto Token = Parser.getTok(); ++ ++ SMLoc S = Token.getLoc(); ++ ++ if (Token.isNot(AsmToken::Dollar)) { ++ LLVM_DEBUG(dbgs() << ".. !$ -> try sym aliasing\n"); ++ if (Token.is(AsmToken::Identifier)) { ++ if (searchSymbolAlias(Operands)) ++ return MatchOperand_Success; ++ } ++ LLVM_DEBUG(dbgs() << ".. !symalias -> NoMatch\n"); ++ return MatchOperand_NoMatch; ++ } ++ LLVM_DEBUG(dbgs() << ".. $\n"); ++ ++ OperandMatchResultTy ResTy = matchAnyRegisterWithoutDollar(Operands, S); ++ if (ResTy == MatchOperand_Success) { ++ Parser.Lex(); // $ ++ Parser.Lex(); // identifier ++ } ++ return ResTy; ++} ++ ++OperandMatchResultTy ++LoongArchAsmParser::parseJumpTarget(OperandVector &Operands) { ++ MCAsmParser &Parser = getParser(); ++ LLVM_DEBUG(dbgs() << "parseJumpTarget\n"); ++ ++ SMLoc S = getLexer().getLoc(); ++ ++ // Registers are a valid target and have priority over symbols. ++ OperandMatchResultTy ResTy = parseAnyRegister(Operands); ++ if (ResTy != MatchOperand_NoMatch) ++ return ResTy; ++ ++ // Integers and expressions are acceptable ++ const MCExpr *Expr = nullptr; ++ if (Parser.parseExpression(Expr)) { ++ // We have no way of knowing if a symbol was consumed so we must ParseFail ++ return MatchOperand_ParseFail; ++ } ++ Operands.push_back( ++ LoongArchOperand::CreateImm(Expr, S, getLexer().getLoc(), *this)); ++ return MatchOperand_Success; ++} ++ ++static std::string LoongArchMnemonicSpellCheck(StringRef S, ++ const FeatureBitset &FBS, ++ unsigned VariantID = 0); ++ ++bool LoongArchAsmParser::ParseInstruction(ParseInstructionInfo &Info, ++ StringRef Name, SMLoc NameLoc, ++ OperandVector &Operands) { ++ MCAsmParser &Parser = getParser(); ++ LLVM_DEBUG(dbgs() << "ParseInstruction\n"); ++ ++ // We have reached first instruction, module directive are now forbidden. ++ getTargetStreamer().forbidModuleDirective(); ++ ++ // Check if we have valid mnemonic ++ if (!mnemonicIsValid(Name)) { ++ FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); ++ std::string Suggestion = LoongArchMnemonicSpellCheck(Name, FBS); ++ return Error(NameLoc, "unknown instruction" + Suggestion); ++ } ++ ++ // First operand in MCInst is instruction mnemonic. ++ Operands.push_back(LoongArchOperand::CreateToken(Name, NameLoc, *this)); ++ ++ // Read the remaining operands. ++ if (getLexer().isNot(AsmToken::EndOfStatement)) { ++ // Read the first operand. ++ if (parseOperand(Operands, Name)) { ++ SMLoc Loc = getLexer().getLoc(); ++ return Error(Loc, "unexpected token in argument list"); ++ } ++ ++ while (getLexer().is(AsmToken::Comma)) { ++ Parser.Lex(); // Eat the comma. ++ // Parse and remember the operand. ++ if (parseOperand(Operands, Name)) { ++ SMLoc Loc = getLexer().getLoc(); ++ return Error(Loc, "unexpected token in argument list"); ++ } ++ } ++ } ++ if (getLexer().isNot(AsmToken::EndOfStatement)) { ++ SMLoc Loc = getLexer().getLoc(); ++ return Error(Loc, "unexpected token in argument list"); ++ } ++ Parser.Lex(); // Consume the EndOfStatement. ++ return false; ++} ++ ++// FIXME: Given that these have the same name, these should both be ++// consistent on affecting the Parser. ++bool LoongArchAsmParser::reportParseError(Twine ErrorMsg) { ++ SMLoc Loc = getLexer().getLoc(); ++ return Error(Loc, ErrorMsg); ++} ++ ++bool LoongArchAsmParser::parseSetAssignment() { ++ StringRef Name; ++ const MCExpr *Value; ++ MCAsmParser &Parser = getParser(); ++ ++ if (Parser.parseIdentifier(Name)) ++ return reportParseError("expected identifier after .set"); ++ ++ if (getLexer().isNot(AsmToken::Comma)) ++ return reportParseError("unexpected token, expected comma"); ++ Lex(); // Eat comma ++ ++ if (!Parser.parseExpression(Value)) { ++ // Parse assignment of an expression including ++ // symbolic registers: ++ // .set $tmp, $BB0-$BB1 ++ // .set r2, $f2 ++ MCSymbol *Sym = getContext().getOrCreateSymbol(Name); ++ Sym->setVariableValue(Value); ++ } else { ++ return reportParseError("expected valid expression after comma"); ++ } ++ ++ return false; ++} ++ ++bool LoongArchAsmParser::parseDirectiveSet() { ++ const AsmToken &Tok = getParser().getTok(); ++ StringRef IdVal = Tok.getString(); ++ SMLoc Loc = Tok.getLoc(); ++ ++ if (IdVal == "bopt") { ++ Warning(Loc, "'bopt' feature is unsupported"); ++ getParser().Lex(); ++ return false; ++ } ++ if (IdVal == "nobopt") { ++ // We're already running in nobopt mode, so nothing to do. ++ getParser().Lex(); ++ return false; ++ } ++ ++ // It is just an identifier, look for an assignment. ++ return parseSetAssignment(); ++} ++ ++bool LoongArchAsmParser::ParseDirective(AsmToken DirectiveID) { ++ // This returns false if this function recognizes the directive ++ // regardless of whether it is successfully handles or reports an ++ // error. Otherwise it returns true to give the generic parser a ++ // chance at recognizing it. ++ ++ MCAsmParser &Parser = getParser(); ++ StringRef IDVal = DirectiveID.getString(); ++ ++ if (IDVal == ".end") { ++ while (getLexer().isNot(AsmToken::Eof)) ++ Parser.Lex(); ++ return false; ++ } ++ ++ if (IDVal == ".set") { ++ parseDirectiveSet(); ++ return false; ++ } ++ ++ if (IDVal == ".llvm_internal_loongarch_reallow_module_directive") { ++ parseInternalDirectiveReallowModule(); ++ return false; ++ } ++ ++ return true; ++} ++ ++bool LoongArchAsmParser::parseInternalDirectiveReallowModule() { ++ // If this is not the end of the statement, report an error. ++ if (getLexer().isNot(AsmToken::EndOfStatement)) { ++ reportParseError("unexpected token, expected end of statement"); ++ return false; ++ } ++ ++ getTargetStreamer().reallowModuleDirective(); ++ ++ getParser().Lex(); // Eat EndOfStatement token. ++ return false; ++} ++ ++extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchAsmParser() { ++ RegisterMCAsmParser X(getTheLoongArch32Target()); ++ RegisterMCAsmParser A(getTheLoongArch64Target()); ++} ++ ++#define GET_REGISTER_MATCHER ++#define GET_MATCHER_IMPLEMENTATION ++#define GET_MNEMONIC_SPELL_CHECKER ++#include "LoongArchGenAsmMatcher.inc" ++ ++bool LoongArchAsmParser::mnemonicIsValid(StringRef Mnemonic) { ++ // Find the appropriate table for this asm variant. ++ const MatchEntry *Start, *End; ++ Start = std::begin(MatchTable0); ++ End = std::end(MatchTable0); ++ ++ // Search the table. ++ auto MnemonicRange = std::equal_range(Start, End, Mnemonic, LessOpcode()); ++ return MnemonicRange.first != MnemonicRange.second; ++} +diff --git a/lib/Target/LoongArch/CMakeLists.txt b/lib/Target/LoongArch/CMakeLists.txt +new file mode 100644 +index 00000000..8540b97f +--- /dev/null ++++ b/lib/Target/LoongArch/CMakeLists.txt +@@ -0,0 +1,55 @@ ++add_llvm_component_group(LoongArch HAS_JIT) ++ ++set(LLVM_TARGET_DEFINITIONS LoongArch.td) ++ ++tablegen(LLVM LoongArchGenAsmMatcher.inc -gen-asm-matcher) ++tablegen(LLVM LoongArchGenAsmWriter.inc -gen-asm-writer) ++tablegen(LLVM LoongArchGenCallingConv.inc -gen-callingconv) ++tablegen(LLVM LoongArchGenDAGISel.inc -gen-dag-isel) ++tablegen(LLVM LoongArchGenDisassemblerTables.inc -gen-disassembler) ++tablegen(LLVM LoongArchGenInstrInfo.inc -gen-instr-info) ++tablegen(LLVM LoongArchGenMCCodeEmitter.inc -gen-emitter) ++tablegen(LLVM LoongArchGenMCPseudoLowering.inc -gen-pseudo-lowering) ++tablegen(LLVM LoongArchGenRegisterInfo.inc -gen-register-info) ++tablegen(LLVM LoongArchGenSubtargetInfo.inc -gen-subtarget) ++ ++add_public_tablegen_target(LoongArchCommonTableGen) ++ ++add_llvm_target(LoongArchCodeGen ++ LoongArchAsmPrinter.cpp ++ LoongArchCCState.cpp ++ LoongArchExpandPseudo.cpp ++ LoongArchInstrInfo.cpp ++ LoongArchISelDAGToDAG.cpp ++ LoongArchISelLowering.cpp ++ LoongArchFrameLowering.cpp ++ LoongArchMCInstLower.cpp ++ LoongArchMachineFunction.cpp ++ LoongArchModuleISelDAGToDAG.cpp ++ LoongArchRegisterInfo.cpp ++ LoongArchSubtarget.cpp ++ LoongArchTargetMachine.cpp ++ LoongArchTargetObjectFile.cpp ++ LoongArchTargetTransformInfo.cpp ++ ++ LINK_COMPONENTS ++ Analysis ++ AsmPrinter ++ CodeGen ++ Core ++ MC ++ LoongArchDesc ++ LoongArchInfo ++ SelectionDAG ++ Support ++ Target ++ GlobalISel ++ ++ ADD_TO_COMPONENT ++ LoongArch ++ ) ++ ++add_subdirectory(AsmParser) ++add_subdirectory(Disassembler) ++add_subdirectory(MCTargetDesc) ++add_subdirectory(TargetInfo) +diff --git a/lib/Target/LoongArch/Disassembler/CMakeLists.txt b/lib/Target/LoongArch/Disassembler/CMakeLists.txt +new file mode 100644 +index 00000000..864be631 +--- /dev/null ++++ b/lib/Target/LoongArch/Disassembler/CMakeLists.txt +@@ -0,0 +1,11 @@ ++add_llvm_component_library(LLVMLoongArchDisassembler ++ LoongArchDisassembler.cpp ++ ++ LINK_COMPONENTS ++ MCDisassembler ++ LoongArchInfo ++ Support ++ ++ ADD_TO_COMPONENT ++ LoongArch ++ ) +diff --git a/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp b/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp +new file mode 100644 +index 00000000..2c92cc71 +--- /dev/null ++++ b/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp +@@ -0,0 +1,917 @@ ++//===- LoongArchDisassembler.cpp - Disassembler for LoongArch -----------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file is part of the LoongArch Disassembler. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "LoongArch.h" ++#include "llvm/ADT/ArrayRef.h" ++#include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCDisassembler/MCDisassembler.h" ++#include "llvm/MC/MCFixedLenDisassembler.h" ++#include "llvm/MC/MCInst.h" ++#include "llvm/MC/MCRegisterInfo.h" ++#include "llvm/MC/MCSubtargetInfo.h" ++#include "llvm/Support/Compiler.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/MathExtras.h" ++#include "llvm/MC/TargetRegistry.h" ++#include "llvm/Support/raw_ostream.h" ++#include ++#include ++ ++using namespace llvm; ++ ++#define DEBUG_TYPE "loongarch-disassembler" ++ ++using DecodeStatus = MCDisassembler::DecodeStatus; ++ ++namespace { ++ ++class LoongArchDisassembler : public MCDisassembler { ++ ++public: ++ LoongArchDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) ++ : MCDisassembler(STI, Ctx) {} ++ ++ bool is64Bit() const { return STI.getFeatureBits()[LoongArch::Feature64Bit]; } ++ ++ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ++ ArrayRef Bytes, uint64_t Address, ++ raw_ostream &CStream) const override; ++}; ++ ++} // end anonymous namespace ++ ++// Forward declare these because the autogenerated code will reference them. ++// Definitions are further down. ++static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodePtrRegisterClass(MCInst &Inst, ++ unsigned Insn, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeFCSRRegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeFCFRRegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeLSX128BRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeLSX128HRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeLSX128WRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeLSX128DRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeLASX256BRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeLASX256HRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeLASX256WRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeLASX256DRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeBranchTarget(MCInst &Inst, ++ unsigned Offset, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeJumpTarget(MCInst &Inst, ++ unsigned Insn, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeMem(MCInst &Inst, ++ unsigned Insn, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeMemSimm14(MCInst &Inst, ++ unsigned Insn, ++ uint64_t Address, ++ const void *Decoder); ++ ++static DecodeStatus DecodeLSX128Mem(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeLSX128Mem13(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeLSX128Mem10(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeLASX256Mem13(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeLASX256Mem10(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeLSX128memlsl(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeLSX128memstl(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeLASX256memlsl(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeLASX256memstl(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeLASX256Mem(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder); ++ ++static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn, ++ uint64_t Address, ++ const void *Decoder); ++ ++template ++static DecodeStatus DecodeUImmWithOffsetAndScale(MCInst &Inst, unsigned Value, ++ uint64_t Address, ++ const void *Decoder); ++ ++template ++static DecodeStatus DecodeUImmWithOffset(MCInst &Inst, unsigned Value, ++ uint64_t Address, ++ const void *Decoder) { ++ return DecodeUImmWithOffsetAndScale(Inst, Value, Address, ++ Decoder); ++} ++ ++template ++static DecodeStatus DecodeSImmWithOffsetAndScale(MCInst &Inst, unsigned Value, ++ uint64_t Address, ++ const void *Decoder); ++ ++/// INSVE_[BHWD] have an implicit operand that the generated decoder doesn't ++/// handle. ++template ++static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address, ++ const void *Decoder); ++ ++namespace llvm { ++ ++Target &getTheLoongArch32Target(); ++Target &getTheLoongArch64Target(); ++ ++} // end namespace llvm ++ ++static MCDisassembler *createLoongArchDisassembler( ++ const Target &T, ++ const MCSubtargetInfo &STI, ++ MCContext &Ctx) { ++ return new LoongArchDisassembler(STI, Ctx); ++} ++ ++extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchDisassembler() { ++ // Register the disassembler. ++ TargetRegistry::RegisterMCDisassembler(getTheLoongArch32Target(), ++ createLoongArchDisassembler); ++ TargetRegistry::RegisterMCDisassembler(getTheLoongArch64Target(), ++ createLoongArchDisassembler); ++} ++ ++#include "LoongArchGenDisassemblerTables.inc" ++ ++static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { ++ const LoongArchDisassembler *Dis = static_cast(D); ++ const MCRegisterInfo *RegInfo = Dis->getContext().getRegisterInfo(); ++ if (RC == LoongArch::GPR64RegClassID || RC == LoongArch::GPR32RegClassID) { ++ // sync with the GPR32/GPR64 RegisterClass in LoongArchRegisterInfo.td ++ // that just like LoongArchAsmParser.cpp and LoongArchISelLowering.cpp ++ unsigned char indexes[] = { 0, 27, 28, 29, 1, 2, 3, 4, ++ 5, 6, 7, 8, 9, 10, 11, 12, ++ 13, 14, 15, 16, 17, 30, 31, 18, ++ 19, 20, 21, 22, 23, 24, 25, 26 ++ }; ++ assert(RegNo < sizeof(indexes)); ++ return *(RegInfo->getRegClass(RC).begin() + indexes[RegNo]); ++ } ++ return *(RegInfo->getRegClass(RC).begin() + RegNo); ++} ++ ++template ++static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address, ++ const void *Decoder) { ++ using DecodeFN = DecodeStatus (*)(MCInst &, unsigned, uint64_t, const void *); ++ ++ // The size of the n field depends on the element size ++ // The register class also depends on this. ++ InsnType tmp = fieldFromInstruction(insn, 17, 5); ++ unsigned NSize = 0; ++ DecodeFN RegDecoder = nullptr; ++ if ((tmp & 0x18) == 0x00) { ++ NSize = 4; ++ RegDecoder = DecodeLSX128BRegisterClass; ++ } else if ((tmp & 0x1c) == 0x10) { ++ NSize = 3; ++ RegDecoder = DecodeLSX128HRegisterClass; ++ } else if ((tmp & 0x1e) == 0x18) { ++ NSize = 2; ++ RegDecoder = DecodeLSX128WRegisterClass; ++ } else if ((tmp & 0x1f) == 0x1c) { ++ NSize = 1; ++ RegDecoder = DecodeLSX128DRegisterClass; ++ } else ++ llvm_unreachable("Invalid encoding"); ++ ++ assert(NSize != 0 && RegDecoder != nullptr); ++ ++ // $vd ++ tmp = fieldFromInstruction(insn, 6, 5); ++ if (RegDecoder(MI, tmp, Address, Decoder) == MCDisassembler::Fail) ++ return MCDisassembler::Fail; ++ // $vd_in ++ if (RegDecoder(MI, tmp, Address, Decoder) == MCDisassembler::Fail) ++ return MCDisassembler::Fail; ++ // $n ++ tmp = fieldFromInstruction(insn, 16, NSize); ++ MI.addOperand(MCOperand::createImm(tmp)); ++ // $vs ++ tmp = fieldFromInstruction(insn, 11, 5); ++ if (RegDecoder(MI, tmp, Address, Decoder) == MCDisassembler::Fail) ++ return MCDisassembler::Fail; ++ // $n2 ++ MI.addOperand(MCOperand::createImm(0)); ++ ++ return MCDisassembler::Success; ++} ++ ++/// Read four bytes from the ArrayRef and return 32 bit word. ++static DecodeStatus readInstruction32(ArrayRef Bytes, uint64_t Address, ++ uint64_t &Size, uint32_t &Insn) { ++ // We want to read exactly 4 Bytes of data. ++ if (Bytes.size() < 4) { ++ Size = 0; ++ return MCDisassembler::Fail; ++ } ++ ++ Insn = (Bytes[0] << 0) | (Bytes[1] << 8) | (Bytes[2] << 16) | ++ (Bytes[3] << 24); ++ ++ return MCDisassembler::Success; ++} ++ ++DecodeStatus LoongArchDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, ++ ArrayRef Bytes, ++ uint64_t Address, ++ raw_ostream &CStream) const { ++ uint32_t Insn; ++ DecodeStatus Result; ++ Size = 0; ++ ++ // Attempt to read the instruction so that we can attempt to decode it. If ++ // the buffer is not 4 bytes long, let the higher level logic figure out ++ // what to do with a size of zero and MCDisassembler::Fail. ++ Result = readInstruction32(Bytes, Address, Size, Insn); ++ if (Result == MCDisassembler::Fail) ++ return MCDisassembler::Fail; ++ ++ // The only instruction size for standard encoded LoongArch. ++ Size = 4; ++ ++ if (is64Bit()) { ++ LLVM_DEBUG(dbgs() << "Trying LoongArch (GPR64) table (32-bit opcodes):\n"); ++ Result = decodeInstruction(DecoderTableLoongArch32, Instr, Insn, ++ Address, this, STI); ++ if (Result != MCDisassembler::Fail) ++ return Result; ++ } ++ ++ LLVM_DEBUG(dbgs() << "Trying LoongArch32 (GPR32) table (32-bit opcodes):\n"); ++ Result = decodeInstruction(DecoderTableLoongArch3232, Instr, Insn, ++ Address, this, STI); ++ if (Result != MCDisassembler::Fail) ++ return Result; ++ ++ return MCDisassembler::Fail; ++} ++ ++static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ ++ unsigned Reg = getReg(Decoder, LoongArch::GPR64RegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ unsigned Reg = getReg(Decoder, LoongArch::GPR32RegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodePtrRegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (static_cast(Decoder)->is64Bit()) ++ return DecodeGPR64RegisterClass(Inst, RegNo, Address, Decoder); ++ ++ return DecodeGPR32RegisterClass(Inst, RegNo, Address, Decoder); ++} ++ ++static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ unsigned Reg = getReg(Decoder, LoongArch::FGR64RegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ ++ unsigned Reg = getReg(Decoder, LoongArch::FGR32RegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeFCSRRegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ ++ unsigned Reg = getReg(Decoder, LoongArch::FCSRRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeFCFRRegisterClass(MCInst &Inst, ++ unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 7) ++ return MCDisassembler::Fail; ++ ++ unsigned Reg = getReg(Decoder, LoongArch::FCFRRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeMem(MCInst &Inst, ++ unsigned Insn, ++ uint64_t Address, ++ const void *Decoder) { ++ int Offset = SignExtend32<12>((Insn >> 10) & 0xfff); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ unsigned Base = fieldFromInstruction(Insn, 5, 5); ++ ++ Reg = getReg(Decoder, LoongArch::GPR32RegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ if (Inst.getOpcode() == LoongArch::SC_W || ++ Inst.getOpcode() == LoongArch::SC_D) ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeMemSimm14(MCInst &Inst, ++ unsigned Insn, ++ uint64_t Address, ++ const void *Decoder) { ++ int Offset = SignExtend32<12>((Insn >> 10) & 0x3fff); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ unsigned Base = fieldFromInstruction(Insn, 5, 5); ++ ++ Reg = getReg(Decoder, LoongArch::GPR32RegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ if (Inst.getOpcode() == LoongArch::SC_W || ++ Inst.getOpcode() == LoongArch::SC_D) ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLSX128Mem(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { ++ int Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12)); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ unsigned Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLSX128Mem13(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { ++ int Offset = SignExtend32<13>(fieldFromInstruction(Insn, 5, 13)); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLSX128Mem10(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { ++ int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 5, 10)); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLASX256Mem13(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { ++ int Offset = SignExtend32<13>(fieldFromInstruction(Insn, 5, 13)); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLASX256Mem10(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { ++ int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 5, 10)); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLSX128memstl(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { ++ int Offset = SignExtend32<8>(fieldFromInstruction(Insn, 10, 8)); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ unsigned Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ unsigned idx; ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ ++ switch (Inst.getOpcode()) { ++ default: ++ assert(false && "Unexpected instruction"); ++ return MCDisassembler::Fail; ++ break; ++ case LoongArch::VSTELM_B: ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ idx = fieldFromInstruction(Insn, 18, 4); ++ Inst.addOperand(MCOperand::createImm(idx)); ++ break; ++ case LoongArch::VSTELM_H: ++ Inst.addOperand(MCOperand::createImm(Offset * 2)); ++ idx = fieldFromInstruction(Insn, 18, 3); ++ Inst.addOperand(MCOperand::createImm(idx)); ++ break; ++ case LoongArch::VSTELM_W: ++ Inst.addOperand(MCOperand::createImm(Offset * 4)); ++ idx = fieldFromInstruction(Insn, 18, 2); ++ Inst.addOperand(MCOperand::createImm(idx)); ++ break; ++ case LoongArch::VSTELM_D: ++ Inst.addOperand(MCOperand::createImm(Offset * 8)); ++ idx = fieldFromInstruction(Insn, 18, 1); ++ Inst.addOperand(MCOperand::createImm(idx)); ++ break; ++ } ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLSX128memlsl(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { ++ ++ int Offset; ++ unsigned Reg, Base; ++ switch (Inst.getOpcode()) { ++ default: ++ assert(false && "Unexpected instruction"); ++ return MCDisassembler::Fail; ++ break; ++ case LoongArch::VLDREPL_B: ++ ++ Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12)); ++ Reg = fieldFromInstruction(Insn, 0, 5); ++ Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ break; ++ case LoongArch::VLDREPL_H: ++ ++ Offset = SignExtend32<11>(fieldFromInstruction(Insn, 10, 11)); ++ Reg = fieldFromInstruction(Insn, 0, 5); ++ Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LSX128HRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ Inst.addOperand(MCOperand::createImm(Offset * 2)); ++ break; ++ case LoongArch::VLDREPL_W: ++ ++ Offset = SignExtend32<10>(fieldFromInstruction(Insn, 10, 10)); ++ Reg = fieldFromInstruction(Insn, 0, 5); ++ Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LSX128WRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ Inst.addOperand(MCOperand::createImm(Offset * 4)); ++ break; ++ case LoongArch::VLDREPL_D: ++ ++ Offset = SignExtend32<9>(fieldFromInstruction(Insn, 10, 9)); ++ Reg = fieldFromInstruction(Insn, 0, 5); ++ Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LSX128WRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ Inst.addOperand(MCOperand::createImm(Offset * 8)); ++ break; ++ } ++ ++ return MCDisassembler::Success; ++} ++static DecodeStatus DecodeLASX256Mem(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { ++ int Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12)); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ unsigned Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLASX256memstl(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { ++ int Offset = SignExtend32<8>(fieldFromInstruction(Insn, 10, 8)); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ unsigned Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ unsigned idx; ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ ++ switch (Inst.getOpcode()) { ++ default: ++ assert(false && "Unexpected instruction"); ++ return MCDisassembler::Fail; ++ break; ++ case LoongArch::XVSTELM_B: ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ idx = fieldFromInstruction(Insn, 18, 5); ++ Inst.addOperand(MCOperand::createImm(idx)); ++ break; ++ case LoongArch::XVSTELM_H: ++ Inst.addOperand(MCOperand::createImm(Offset * 2)); ++ idx = fieldFromInstruction(Insn, 18, 4); ++ Inst.addOperand(MCOperand::createImm(idx)); ++ break; ++ case LoongArch::XVSTELM_W: ++ Inst.addOperand(MCOperand::createImm(Offset * 4)); ++ idx = fieldFromInstruction(Insn, 18, 3); ++ Inst.addOperand(MCOperand::createImm(idx)); ++ break; ++ case LoongArch::XVSTELM_D: ++ Inst.addOperand(MCOperand::createImm(Offset * 8)); ++ idx = fieldFromInstruction(Insn, 18, 2); ++ Inst.addOperand(MCOperand::createImm(idx)); ++ break; ++ } ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLASX256memlsl(MCInst &Inst, unsigned Insn, ++ uint64_t Address, const void *Decoder) { ++ ++ int Offset; ++ unsigned Reg, Base; ++ switch (Inst.getOpcode()) { ++ default: ++ assert(false && "Unexpected instruction"); ++ return MCDisassembler::Fail; ++ break; ++ case LoongArch::XVLDREPL_B: ++ ++ Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12)); ++ Reg = fieldFromInstruction(Insn, 0, 5); ++ Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ break; ++ case LoongArch::XVLDREPL_H: ++ ++ Offset = SignExtend32<11>(fieldFromInstruction(Insn, 10, 11)); ++ Reg = fieldFromInstruction(Insn, 0, 5); ++ Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LASX256HRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ Inst.addOperand(MCOperand::createImm(Offset * 2)); ++ break; ++ case LoongArch::XVLDREPL_W: ++ ++ Offset = SignExtend32<10>(fieldFromInstruction(Insn, 10, 10)); ++ Reg = fieldFromInstruction(Insn, 0, 5); ++ Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LASX256WRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ Inst.addOperand(MCOperand::createImm(Offset * 4)); ++ break; ++ case LoongArch::XVLDREPL_D: ++ ++ Offset = SignExtend32<9>(fieldFromInstruction(Insn, 10, 9)); ++ Reg = fieldFromInstruction(Insn, 0, 5); ++ Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::LASX256WRegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ Inst.addOperand(MCOperand::createImm(Offset * 8)); ++ break; ++ } ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeFMem(MCInst &Inst, ++ unsigned Insn, ++ uint64_t Address, ++ const void *Decoder) { ++ int Offset = SignExtend32<12>((Insn >> 10) & 0xffff); ++ unsigned Reg = fieldFromInstruction(Insn, 0, 5); ++ unsigned Base = fieldFromInstruction(Insn, 5, 5); ++ Reg = getReg(Decoder, LoongArch::FGR64RegClassID, Reg); ++ Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); ++ ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ Inst.addOperand(MCOperand::createReg(Base)); ++ Inst.addOperand(MCOperand::createImm(Offset)); ++ ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLSX128BRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ unsigned Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLSX128HRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ unsigned Reg = getReg(Decoder, LoongArch::LSX128HRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLSX128WRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ unsigned Reg = getReg(Decoder, LoongArch::LSX128WRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLSX128DRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ unsigned Reg = getReg(Decoder, LoongArch::LSX128DRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLASX256BRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ unsigned Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLASX256HRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ unsigned Reg = getReg(Decoder, LoongArch::LASX256HRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLASX256WRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ unsigned Reg = getReg(Decoder, LoongArch::LASX256WRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeLASX256DRegisterClass(MCInst &Inst, unsigned RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ if (RegNo > 31) ++ return MCDisassembler::Fail; ++ unsigned Reg = getReg(Decoder, LoongArch::LASX256DRegClassID, RegNo); ++ Inst.addOperand(MCOperand::createReg(Reg)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeBranchTarget(MCInst &Inst, ++ unsigned Offset, ++ uint64_t Address, ++ const void *Decoder) { ++ int32_t BranchOffset; ++ // Similar to LoongArchAsmParser::processInstruction, decode the branch target ++ // for different instructions. ++ switch (Inst.getOpcode()) { ++ default: ++ llvm_unreachable(""); ++ case LoongArch::BEQ: ++ case LoongArch::BNE: ++ case LoongArch::BLT: ++ case LoongArch::BGE: ++ case LoongArch::BLTU: ++ case LoongArch::BGEU: ++ BranchOffset = (SignExtend32<16>(Offset) * 4); ++ break; ++ case LoongArch::BEQZ: ++ case LoongArch::BNEZ: ++ case LoongArch::BCEQZ: ++ case LoongArch::BCNEZ: ++ BranchOffset = (SignExtend32<21>(Offset) * 4); ++ break; ++ case LoongArch::B: ++ case LoongArch::BL: ++ BranchOffset = (SignExtend32<26>(Offset) * 4); ++ break; ++ } ++ Inst.addOperand(MCOperand::createImm(BranchOffset)); ++ return MCDisassembler::Success; ++} ++ ++static DecodeStatus DecodeJumpTarget(MCInst &Inst, ++ unsigned Insn, ++ uint64_t Address, ++ const void *Decoder) { ++ unsigned hi10 = fieldFromInstruction(Insn, 0, 10); ++ unsigned lo16 = fieldFromInstruction(Insn, 10, 16); ++ int32_t JumpOffset = SignExtend32<28>((hi10 << 16 | lo16) << 2); ++ Inst.addOperand(MCOperand::createImm(JumpOffset)); ++ return MCDisassembler::Success; ++} ++ ++template ++static DecodeStatus DecodeUImmWithOffsetAndScale(MCInst &Inst, unsigned Value, ++ uint64_t Address, ++ const void *Decoder) { ++ Value &= ((1 << Bits) - 1); ++ Value *= Scale; ++ Inst.addOperand(MCOperand::createImm(Value + Offset)); ++ return MCDisassembler::Success; ++} ++ ++template ++static DecodeStatus DecodeSImmWithOffsetAndScale(MCInst &Inst, unsigned Value, ++ uint64_t Address, ++ const void *Decoder) { ++ int32_t Imm = SignExtend32(Value) * ScaleBy; ++ Inst.addOperand(MCOperand::createImm(Imm + Offset)); ++ return MCDisassembler::Success; ++} +diff --git a/lib/Target/LoongArch/LoongArch.h b/lib/Target/LoongArch/LoongArch.h +new file mode 100644 +index 00000000..73fd4a62 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArch.h +@@ -0,0 +1,37 @@ ++//===-- LoongArch.h - Top-level interface for LoongArch representation ----*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the entry points for global functions defined in ++// the LLVM LoongArch back-end. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H ++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H ++ ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "llvm/Target/TargetMachine.h" ++ ++namespace llvm { ++ class LoongArchTargetMachine; ++ class ModulePass; ++ class FunctionPass; ++ class LoongArchSubtarget; ++ class LoongArchTargetMachine; ++ class InstructionSelector; ++ class PassRegistry; ++ ++ FunctionPass *createLoongArchModuleISelDagPass(); ++ FunctionPass *createLoongArchOptimizePICCallPass(); ++ FunctionPass *createLoongArchBranchExpansion(); ++ FunctionPass *createLoongArchExpandPseudoPass(); ++ ++ void initializeLoongArchBranchExpansionPass(PassRegistry &); ++} // end namespace llvm; ++ ++#endif +diff --git a/lib/Target/LoongArch/LoongArch.td b/lib/Target/LoongArch/LoongArch.td +new file mode 100644 +index 00000000..8fab224b +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArch.td +@@ -0,0 +1,104 @@ ++//===-- LoongArch.td - Describe the LoongArch Target Machine ---------*- tablegen -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// This is the top level entry point for the LoongArch target. ++//===----------------------------------------------------------------------===// ++ ++//===----------------------------------------------------------------------===// ++// Target-independent interfaces ++//===----------------------------------------------------------------------===// ++ ++include "llvm/Target/Target.td" ++ ++// The overall idea of the PredicateControl class is to chop the Predicates list ++// into subsets that are usually overridden independently. This allows ++// subclasses to partially override the predicates of their superclasses without ++// having to re-add all the existing predicates. ++class PredicateControl { ++ // Predicates for the encoding scheme in use such as HasStdEnc ++ list EncodingPredicates = []; ++ // Predicates for the GPR size such as is64Bit ++ list GPRPredicates = []; ++ // Predicates for the FGR size and layout such as HasBasicD ++ list FGRPredicates = []; ++ // Predicates for the instruction group membership such as ISA's ++ list InsnPredicates = []; ++ // Predicate for the ISA extension that an instruction belongs to ++ list ExtPredicate = []; ++ // Predicate for marking the instruction as usable in hard-float mode only ++ list HardFloatPredicate = []; ++ // Predicates for anything else ++ list AdditionalPredicates = []; ++ list Predicates = !listconcat(EncodingPredicates, ++ GPRPredicates, ++ FGRPredicates, ++ InsnPredicates, ++ HardFloatPredicate, ++ ExtPredicate, ++ AdditionalPredicates); ++} ++ ++// Like Requires<> but for the AdditionalPredicates list ++class AdditionalRequires preds> { ++ list AdditionalPredicates = preds; ++} ++ ++//===----------------------------------------------------------------------===// ++// LoongArch Subtarget features // ++//===----------------------------------------------------------------------===// ++ ++def FeatureLSX : SubtargetFeature<"lsx", "HasLSX", "true", "Support LSX">; ++def Feature64Bit ++ : SubtargetFeature<"64bit", "HasLA64", "true", ++ "LA64 Basic Integer and Privilege Instruction Set">; ++def FeatureBasicF : SubtargetFeature<"f", "HasBasicF", "true", ++ "'F' (Single-Precision Floating-Point)">; ++def FeatureBasicD : SubtargetFeature<"d", "HasBasicD", "true", ++ "'D' (Double-Precision Floating-Point)", ++ [FeatureBasicF]>; ++ ++def FeatureLASX : SubtargetFeature<"lasx", "HasLASX", "true", "Support LASX", [FeatureLSX]>; ++ ++def FeatureUnalignedAccess ++ : SubtargetFeature<"unaligned-access", "UnalignedAccess", "true", ++ "Allow all unaligned memory access">; ++//===----------------------------------------------------------------------===// ++// Register File, Calling Conv, Instruction Descriptions ++//===----------------------------------------------------------------------===// ++ ++include "LoongArchRegisterInfo.td" ++include "LoongArchInstrInfo.td" ++include "LoongArchCallingConv.td" ++ ++def LoongArchInstrInfo : InstrInfo; ++ ++//===----------------------------------------------------------------------===// ++// LoongArch processors supported. ++//===----------------------------------------------------------------------===// ++ ++def : ProcessorModel<"generic-la32", NoSchedModel, []>; ++def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit]>; ++def : ProcessorModel<"la464", NoSchedModel, ++ [Feature64Bit, FeatureUnalignedAccess]>; ++ ++def LoongArchAsmParser : AsmParser { ++ let ShouldEmitMatchRegisterName = 0; ++} ++ ++def LoongArchAsmParserVariant : AsmParserVariant { ++ int Variant = 0; ++ ++ // Recognize hard coded registers. ++ string RegisterPrefix = "$"; ++} ++ ++def LoongArch : Target { ++ let InstructionSet = LoongArchInstrInfo; ++ let AssemblyParsers = [LoongArchAsmParser]; ++ let AssemblyParserVariants = [LoongArchAsmParserVariant]; ++ let AllowRegisterRenaming = 1; ++} +diff --git a/lib/Target/LoongArch/LoongArch32InstrInfo.td b/lib/Target/LoongArch/LoongArch32InstrInfo.td +new file mode 100644 +index 00000000..908307bb +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArch32InstrInfo.td +@@ -0,0 +1,717 @@ ++//===- LoongArch32InstrInfo.td - Target Description for LoongArch Target -*- tablegen -*-=// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file describes LoongArch32 instructions. ++// ++//===----------------------------------------------------------------------===// ++ ++//===---------------------------------------------------------------------===/ ++// Instruction Definitions. ++//===---------------------------------------------------------------------===/ ++ ++let DecoderNamespace = "LoongArch32" in { ++ /// ++ /// R2 ++ /// ++ def CLO_W : Count1<"clo.w", GPR32Opnd, ctlz>, R2I<0b00100>; ++ def CLZ_W : Int_Reg2<"clz.w", GPR32Opnd, ctlz>, R2I<0b00101>; ++ def CTO_W : Count1<"cto.w", GPR32Opnd, cttz>, R2I<0b00110>; ++ def CTZ_W : Int_Reg2<"ctz.w", GPR32Opnd, cttz>, R2I<0b00111>; ++ ++ def REVB_2H : Int_Reg2<"revb.2h", GPR32Opnd>, R2I<0b01100>;//see below bswap pattern ++ ++ def BITREV_4B : Int_Reg2<"bitrev.4b", GPR32Opnd>, R2I<0b10010>; ++ def BITREV_W : Int_Reg2<"bitrev.w", GPR32Opnd, bitreverse>, R2I<0b10100>; ++ ++ let isCodeGenOnly = 1 in { ++ def EXT_W_H32 : SignExtInReg<"ext.w.h", GPR32Opnd, i16>, R2I<0b10110>; ++ def EXT_W_B32 : SignExtInReg<"ext.w.b", GPR32Opnd, i8>, R2I<0b10111>; ++ ++ } ++ ++ def CPUCFG : Int_Reg2<"cpucfg", GPR32Opnd, int_loongarch_cpucfg>, R2I<0b11011>; ++ def RDTIMEL_W32 : Int_Reg2_Rdtime<"rdtimel.w", GPR32Opnd>, R2I<0b11000>; ++ def RDTIMEH_W32 : Int_Reg2_Rdtime<"rdtimeh.w", GPR32Opnd>, R2I<0b11001>; ++ ++ /// ++ /// R3 ++ /// ++ def ADD_W : Int_Reg3<"add.w", GPR32Opnd, add>, R3I<0b0100000>; ++ def SUB_W : Int_Reg3<"sub.w", GPR32Opnd, sub>, R3I<0b0100010>; ++ ++ let isCodeGenOnly = 1 in { ++ def SLT32 : SetCC_R<"slt", GPR32Opnd, setlt>, R3I<0b0100100>; ++ def SLTU32 : SetCC_R<"sltu", GPR32Opnd, setult>, R3I<0b0100101>; ++ def MASKEQZ32 : Int_Reg3<"maskeqz", GPR32Opnd>, R3I<0b0100110>;//see below patterns ++ def MASKNEZ32 : Int_Reg3<"masknez", GPR32Opnd>, R3I<0b0100111>;//see below patterns ++ ++ def NOR32 : Nor<"nor", GPR32Opnd>, R3I<0b0101000>; ++ def AND32 : Int_Reg3<"and", GPR32Opnd, and>, R3I<0b0101001>; ++ def OR32 : Int_Reg3<"or", GPR32Opnd, or>, R3I<0b0101010>; ++ def XOR32 : Int_Reg3<"xor", GPR32Opnd, xor>, R3I<0b0101011>; ++ def ANDN32 : Int_Reg3<"andn", GPR32Opnd>, R3I<0b0101101>; ++ def ORN32 : Int_Reg3<"orn", GPR32Opnd>, R3I<0b0101100>; ++ } ++ ++ def SLL_W : Shift_Var<"sll.w", GPR32Opnd, shl>, R3I<0b0101110>; ++ def SRL_W : Shift_Var<"srl.w", GPR32Opnd, srl>, R3I<0b0101111>; ++ def SRA_W : Shift_Var<"sra.w", GPR32Opnd, sra>, R3I<0b0110000>; ++ def ROTR_W: Shift_Var<"rotr.w", GPR32Opnd, rotr>, R3I<0b0110110>; ++ ++ def MUL_W : Int_Reg3<"mul.w", GPR32Opnd, mul>, R3I<0b0111000>; ++ def MULH_W : Int_Reg3<"mulh.w", GPR32Opnd, mulhs>, R3I<0b0111001>; ++ def MULH_WU : Int_Reg3<"mulh.wu", GPR32Opnd, mulhu>, R3I<0b0111010>; ++ ++let usesCustomInserter = 1 in { ++ def DIV_W : Int_Reg3<"div.w", GPR32Opnd, sdiv>, R3I<0b1000000>; ++ def MOD_W : Int_Reg3<"mod.w", GPR32Opnd, srem>, R3I<0b1000001>; ++ def DIV_WU : Int_Reg3<"div.wu", GPR32Opnd, udiv>, R3I<0b1000010>; ++ def MOD_WU : Int_Reg3<"mod.wu", GPR32Opnd, urem>, R3I<0b1000011>; ++} ++ ++ def CRC_W_B_W : Int_Reg3<"crc.w.b.w", GPR32Opnd, int_loongarch_crc_w_b_w>, R3I<0b1001000>; ++ def CRC_W_H_W : Int_Reg3<"crc.w.h.w", GPR32Opnd, int_loongarch_crc_w_h_w>, R3I<0b1001001>; ++ def CRC_W_W_W : Int_Reg3<"crc.w.w.w", GPR32Opnd, int_loongarch_crc_w_w_w>, R3I<0b1001010>; ++ def CRCC_W_B_W : Int_Reg3<"crcc.w.b.w", GPR32Opnd, int_loongarch_crcc_w_b_w>, R3I<0b1001100>; ++ def CRCC_W_H_W : Int_Reg3<"crcc.w.h.w", GPR32Opnd, int_loongarch_crcc_w_h_w>, R3I<0b1001101>; ++ def CRCC_W_W_W : Int_Reg3<"crcc.w.w.w", GPR32Opnd, int_loongarch_crcc_w_w_w>, R3I<0b1001110>; ++ /// ++ /// SLLI ++ /// ++ def SLLI_W : Shift_Imm32<"slli.w", GPR32Opnd, shl>, R2_IMM5<0b00>; ++ def SRLI_W : Shift_Imm32<"srli.w", GPR32Opnd, srl>, R2_IMM5<0b01>; ++ def SRAI_W : Shift_Imm32<"srai.w", GPR32Opnd, sra>, R2_IMM5<0b10>; ++ def ROTRI_W : Shift_Imm32<"rotri.w", GPR32Opnd, rotr>, R2_IMM5<0b11>; ++ /// ++ /// Misc ++ /// ++ def ALSL_W : Reg3_Sa<"alsl.w", GPR32Opnd, uimm2_plus1>, R3_SA2<0b00010> { ++ let Pattern = [(set GPR32Opnd:$rd, ++ (add GPR32Opnd:$rk, (shl GPR32Opnd:$rj, immZExt2Alsl:$sa)))]; ++ } ++ def BYTEPICK_W : Reg3_Sa<"bytepick.w", GPR32Opnd, uimm2>, R3_SA2<0b00100>;//pattern:[] ++ ++ def BREAK : Code15<"break", int_loongarch_break>, CODE15<0b1010100>; ++ def SYSCALL : Code15<"syscall", int_loongarch_syscall>, CODE15<0b1010110>; ++ def TRAP : TrapBase; ++ ++ def BSTRINS_W : InsBase_32<"bstrins.w", GPR32Opnd, uimm5, LoongArchBstrins>, ++ INSERT_BIT32<0>; ++ def BSTRPICK_W : PickBase_32<"bstrpick.w", GPR32Opnd, uimm5, LoongArchBstrpick>, ++ INSERT_BIT32<1>; ++ ++ /// ++ /// R2_IMM12 ++ /// ++ let isCodeGenOnly = 1 in { ++ def SLTI32 : SetCC_I<"slti", GPR32Opnd, simm12_32>, R2_IMM12<0b000>; //PatFrag ++ def SLTUI32 : SetCC_I<"sltui", GPR32Opnd, simm12_32>, R2_IMM12<0b001>; //PatFrag ++ } ++ def ADDI_W : Int_Reg2_Imm12<"addi.w", GPR32Opnd, simm12_32, add>, R2_IMM12<0b010>; ++ ++ let isCodeGenOnly = 1 in { ++ def ANDI32 : Int_Reg2_Imm12<"andi", GPR32Opnd, uimm12_32, and>, R2_IMM12<0b101>; ++ def ORI32 : Int_Reg2_Imm12<"ori", GPR32Opnd, uimm12_32, or>, R2_IMM12<0b110>; ++ def XORI32 : Int_Reg2_Imm12<"xori", GPR32Opnd, uimm12_32, xor>, R2_IMM12<0b111>; ++ } ++ ++ /// ++ /// Privilege Instructions ++ /// ++ def CSRRD32 : CSR<"csrrd", GPR32Opnd, uimm14_32, int_loongarch_csrrd>, R1_CSR<0b0000000000100>; ++ def CSRWR32 : CSRW<"csrwr", GPR32Opnd, uimm14_32, int_loongarch_csrwr>, R1_CSR<0b0000100000100>; ++ def CSRXCHG32 : CSRX<"csrxchg", GPR32Opnd, uimm14_32, int_loongarch_csrxchg>, R2_CSR<0b00000100>; ++ def IOCSRRD_B32 : Int_Reg2<"iocsrrd.b", GPR32Opnd, int_loongarch_iocsrrd_b>, R2P<0b000>; ++ def IOCSRRD_H32 : Int_Reg2<"iocsrrd.h", GPR32Opnd, int_loongarch_iocsrrd_h>, R2P<0b001>; ++ def IOCSRRD_W32 : Int_Reg2<"iocsrrd.w", GPR32Opnd, int_loongarch_iocsrrd_w>, R2P<0b010>; ++ def IOCSRWR_B32 : Int_Reg2_Iocsrwr<"iocsrwr.b", GPR32Opnd, GPR32Opnd, int_loongarch_iocsrwr_b>, R2P<0b100>; ++ def IOCSRWR_H32 : Int_Reg2_Iocsrwr<"iocsrwr.h", GPR32Opnd, GPR32Opnd, int_loongarch_iocsrwr_h>, R2P<0b101>; ++ def IOCSRWR_W32 : Int_Reg2_Iocsrwr<"iocsrwr.w", GPR32Opnd, GPR32Opnd, int_loongarch_iocsrwr_w>, R2P<0b110>; ++ def CACOP32 : CAC<"cacop", GPR32Opnd, simm12_32, int_loongarch_cacop>, R1_CACHE; ++ def LDDIR32 : LEVEL<"lddir", GPR32Opnd>, R2_LEVEL<0b00000110010000>; ++ def LDPTE32 : SEQ<"ldpte", GPR32Opnd>, R1_SEQ<0b00000110010001>; ++ ++ //def WAIT : Wait<"wait">; ++ // ++ //def IOCSRRD_D : R2P<0b011>, Int_Reg2<"iocsrrd.d", GPR32Opnd>; ++ //def IOCSRWR_D : R2P<0b111>, Int_Reg2<"iocsrwr.d", GPR32Opnd>; ++ // ++ //def TLBINV : IMM32<0b001000>, OP32<"tlbinv">; ++ //def TLBFLUSH : IMM32<0b001001>, OP32<"tlbflush">; ++ //def TLBP : IMM32<0b001010>, OP32<"tlbp">; ++ //def TLBR : IMM32<0b001011>, OP32<"tlbr">; ++ //def TLBWI : IMM32<0b001100>, OP32<"tlbwi">; ++ //def TLBWR : IMM32<0b001101>, OP32<"tlbwr">; ++ ++ /// ++ /// R1_IMM20 ++ /// ++ let isCodeGenOnly = 1 in { ++ def LU12I_W32 : SI20<"lu12i.w", GPR32Opnd, simm20_32>, R1_SI20<0b0001010>; ++ def PCADDI32 : SI20<"pcaddi", GPR32Opnd, simm20_32>, R1_SI20<0b0001100>; ++ def PCALAU12I32 : SI20<"pcalau12i", GPR32Opnd, simm20_32>, R1_SI20<0b0001101>; ++ def PCADDU12I32 : SI20<"pcaddu12i", GPR32Opnd, simm20_32>, R1_SI20<0b0001110>; ++ } ++ ++ let isCodeGenOnly = 1 in { ++ def BEQZ32 : Beqz<"beqz", brtarget, seteq, GPR32Opnd>, R1_IMM21BEQZ<0b010000>; ++ def BNEZ32 : Beqz<"bnez", brtarget, setne, GPR32Opnd>, R1_IMM21BEQZ<0b010001>; ++ ++ def JIRL32 : FJirl<"jirl", calltarget, GPR32Opnd>, R2_IMM16JIRL; ++ ++ def B32 : JumpFB, IMM26B<0b010100>; ++ ++ def BEQ32 : Beq<"beq", brtarget, seteq, GPR32Opnd>, R2_IMM16BEQ<0b010110>; ++ def BNE32 : Beq<"bne", brtarget, setne, GPR32Opnd>, R2_IMM16BEQ<0b010111>; ++ def BLT32 : Beq<"blt", brtarget, setlt, GPR32Opnd>, R2_IMM16BEQ<0b011000>; ++ def BGE32 : Beq<"bge", brtarget, setge, GPR32Opnd>, R2_IMM16BEQ<0b011001>; ++ def BLTU32 : Beq<"bltu", brtarget, setult, GPR32Opnd>, R2_IMM16BEQ<0b011010>; ++ def BGEU32 : Beq<"bgeu", brtarget, setuge, GPR32Opnd>, R2_IMM16BEQ<0b011011>; ++ } ++ ++ /// ++ /// Mem access ++ /// ++ def LL_W : LLBase<"ll.w", GPR32Opnd, mem_simm14_lsl2>, LL_SC<0b000>; ++ def SC_W : SCBase<"sc.w", GPR32Opnd, mem_simm14_lsl2>, LL_SC<0b001>; ++ ++ def PRELD_Raw32 : Preld_Raw<"preld", GPR32Opnd>, PRELD_FM; ++ ++ let isCodeGenOnly = 1 in { ++ def LD_B32 : Ld<"ld.b", GPR32Opnd, mem_simmptr, sextloadi8>, LOAD_STORE<0b0000>; ++ def LD_H32 : Ld<"ld.h", GPR32Opnd, mem_simmptr, sextloadi16, addrDefault>, LOAD_STORE<0b0001>; ++ def LD_W32 : Ld<"ld.w", GPR32Opnd, mem, load, addrDefault>, LOAD_STORE<0b0010>; ++ def ST_B32 : St<"st.b", GPR32Opnd, mem, truncstorei8>, LOAD_STORE<0b0100>; ++ def ST_H32 : St<"st.h", GPR32Opnd, mem, truncstorei16>, LOAD_STORE<0b0101>; ++ def ST_W32 : St<"st.w", GPR32Opnd, mem, store>, LOAD_STORE<0b0110>; ++ def LD_BU32 : Ld<"ld.bu", GPR32Opnd, mem_simmptr, zextloadi8, addrDefault>, LOAD_STORE<0b1000>; ++ def LD_HU32 : Ld<"ld.hu", GPR32Opnd, mem_simmptr, zextloadi16>, LOAD_STORE<0b1001>; ++ ++ def PRELD32 : Preld<"preld", mem, GPR32Opnd>, PRELD_FM; ++ ++ def LDPTR_W32 : LdPtr<"ldptr.w", GPR32Opnd>, LL_SC<0b100>; ++ def STPTR_W32 : StPtr<"stptr.w", GPR32Opnd>, LL_SC<0b101>; ++ } ++ ++ def IBAR : Bar<"ibar", int_loongarch_ibar>, BAR_FM<1>; ++ def DBAR : Bar<"dbar", int_loongarch_dbar>, BAR_FM<0>; ++ ++ def LONG_BRANCH_ADDIW : LoongArchPseudo<(outs GPR32Opnd:$dst), ++ (ins GPR32Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>; ++ ++ def LONG_BRANCH_ADDIW2Op : LoongArchPseudo<(outs GPR32Opnd:$dst), ++ (ins GPR32Opnd:$src, brtarget:$tgt), []>; ++ ++ def PseudoReturn : PseudoReturnBase; ++ ++ let isCodeGenOnly = 1 in { ++ def LDX_W32 : LDX_FT_LA<"ldx.w", GPR32Opnd, load>, ++ R3MI<0b00010000>; ++ def LDX_HU32 : LDX_FT_LA<"ldx.hu", GPR32Opnd, extloadi16>, ++ R3MI<0b01001000>; ++ def LDX_BU32 : LDX_FT_LA<"ldx.bu", GPR32Opnd, extloadi8>, ++ R3MI<0b01000000>; ++ def STX_W32 : STX_FT_LA<"stx.w", GPR32Opnd, store>, ++ R3MI<0b00110000>; ++ def LDX_H32 : LDX_FT_LA<"ldx.h", GPR32Opnd, sextloadi16>, ++ R3MI<0b00001000>; ++ def LDX_B32 : LDX_FT_LA<"ldx.b", GPR32Opnd, sextloadi8>, ++ R3MI<0b00000000>; ++ def STX_B32 : STX_FT_LA<"stx.b", GPR32Opnd, truncstorei8>, ++ R3MI<0b00100000>; ++ def STX_H32 : STX_FT_LA<"stx.h", GPR32Opnd, truncstorei16>, ++ R3MI<0b00101000>; ++ } ++} ++ ++def LEA_ADDI_W: EffectiveAddress<"addi.w", GPR32Opnd>, LEA_ADDI_FM<0b010>; ++ ++def : LoongArchPat<(LoongArchAddress (i32 tglobaladdr:$in)), ++ (ADDI_W (PCADDU12I32 tglobaladdr:$in) ,0)>,GPR_32; ++def : LoongArchPat<(LoongArchAddress (i32 tblockaddress:$in)), ++ (ADDI_W (PCADDU12I32 tblockaddress:$in),0)>, GPR_32; ++def : LoongArchPat<(LoongArchAddress (i32 tjumptable:$in)), ++ (ADDI_W (PCADDU12I32 tjumptable:$in),0)>, GPR_32; ++def : LoongArchPat<(LoongArchAddress (i32 texternalsym:$in)), ++ (ADDI_W (PCADDU12I32 texternalsym:$in),0)>, GPR_32; ++ ++//===----------------------------------------------------------------------===// ++// Arbitrary patterns that map to one or more instructions ++//===----------------------------------------------------------------------===// ++ ++let isCodeGenOnly = 1 in { ++ def REVB_2W_32 : Int_Reg2<"revb.2w", GPR32Opnd>, R2I<0b01110>; ++ def REVH_2W_32 : Int_Reg2<"revh.2w", GPR32Opnd>, R2I<0b10000>; ++} ++ ++// bswap pattern ++def : LoongArchPat<(bswap GPR32:$rj), (ROTRI_W (REVB_2H GPR32:$rj), 16)>; ++//def : LoongArchPat<(bswap GPR32:$rj), (REVB_2W_32 GPR32:$rj)>; ++//def : LoongArchPat<(bswap GPR32:$rj), (REVH_2W_32 (REVB_2H GPR32:$rj))>; ++ ++// i32 selects ++multiclass SelectInt_Pats { ++ ++// reg, immz ++def : LoongArchPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, RC:$f), ++ (OROp (MASKNEZOp RC:$t, RC:$cond), (MASKEQZOp RC:$f, RC:$cond))>; ++def : LoongArchPat<(select (Opg (setne RC:$cond, immz)), RC:$t, RC:$f), ++ (OROp (MASKEQZOp RC:$t, RC:$cond), (MASKNEZOp RC:$f, RC:$cond))>; ++ ++//def : LoongArchPat<(select (Opg (seteq RC:$cond, imm_type:$imm)), RC:$t, RC:$f), ++// (OROp (MASKNEZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)), ++// (MASKEQZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>; ++//def : LoongArchPat<(select (Opg (setne RC:$cond, imm_type:$imm)), RC:$t, RC:$f), ++// (OROp (MASKEQZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)), ++// (MASKNEZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>; ++ ++// reg, immSExt12Plus1 ++//def : LoongArchPat<(select (Opg (setgt RC:$cond, immSExt12Plus1:$imm)), RC:$t, RC:$f), ++// (OROp (MASKNEZOp RC:$t, (SLTiOp RC:$cond, (Plus1 imm:$imm))), ++// (MASKEQZOp RC:$f, (SLTiOp RC:$cond, (Plus1 imm:$imm))))>; ++//def : LoongArchPat<(select (Opg (setugt RC:$cond, immSExt16Plus1:$imm)), RC:$t, RC:$f), ++// (OROp (MASKNEZOp RC:$t, (SLTiuOp RC:$cond, (Plus1 imm:$imm))), ++// (MASKEQZOp RC:$f, (SLTiuOp RC:$cond, (Plus1 imm:$imm))))>; ++ ++def : LoongArchPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, immz), ++ (MASKNEZOp RC:$t, RC:$cond)>; ++def : LoongArchPat<(select (Opg (setne RC:$cond, immz)), RC:$t, immz), ++ (MASKEQZOp RC:$t, RC:$cond)>; ++def : LoongArchPat<(select (Opg (seteq RC:$cond, immz)), immz, RC:$f), ++ (MASKEQZOp RC:$f, RC:$cond)>; ++def : LoongArchPat<(select (Opg (setne RC:$cond, immz)), immz, RC:$f), ++ (MASKNEZOp RC:$f, RC:$cond)>; ++} ++ ++defm : SelectInt_Pats; ++ ++def : LoongArchPat<(select i32:$cond, i32:$t, i32:$f), ++ (OR32 (MASKEQZ32 i32:$t, i32:$cond), ++ (MASKNEZ32 i32:$f, i32:$cond))>; ++def : LoongArchPat<(select i32:$cond, i32:$t, immz), ++ (MASKEQZ32 i32:$t, i32:$cond)>; ++def : LoongArchPat<(select i32:$cond, immz, i32:$f), ++ (MASKNEZ32 i32:$f, i32:$cond)>; ++ ++// truncate ++def : LoongArchPat<(i32 (trunc (assertzext_lt_i32 GPR64:$src))), ++ (EXTRACT_SUBREG GPR64:$src, sub_32)>, GPR_64; ++def : LoongArchPat<(i32 (trunc GPR64:$src)), ++ (SLLI_W (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>, GPR_64; ++ ++// Patterns used for matching away redundant sign extensions. ++// LA32 arithmetic instructions sign extend their result implicitly. ++def : LoongArchPat<(i64 (sext (i32 (add GPR32:$src, GPR32:$src2)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (ADD_W GPR32:$src, GPR32:$src2), sub_32)>; ++def : LoongArchPat<(i64 (sext (i32 (sub GPR32:$src, GPR32:$src2)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SUB_W GPR32:$src, GPR32:$src2), sub_32)>; ++def : LoongArchPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (MUL_W GPR32:$src, GPR32:$src2), sub_32)>; ++ ++def : LoongArchPat<(store (i32 0), addr:$dst), (ST_W32 ZERO, addr:$dst)>; ++ ++def : InstAlias<"break", (BREAK 0), 1>; ++def : InstAlias<"break $imm", (BREAK uimm15:$imm), 1>; ++def : LoongArchInstAlias<"move $dst, $src", ++ (OR32 GPR32Opnd:$dst, GPR32Opnd:$src, ZERO), 1>, GPR_32; ++ ++def immSExt12Plus1 : PatLeaf<(imm), [{ ++ return isInt<13>(N->getSExtValue()) && isInt<12>(N->getSExtValue() + 1); ++}]>; ++ ++def Plus1 : SDNodeXFormgetSExtValue() + 1); }]>; ++ ++multiclass BrcondPats { ++ ++def : LoongArchPat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst), ++ (BNEOp RC:$lhs, ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst), ++ (BEQOp RC:$lhs, ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst), ++ (BEQOp1 (SLTOp RC:$lhs, RC:$rhs), ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst), ++ (BEQOp1 (SLTUOp RC:$lhs, RC:$rhs), ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond (i32 (setge RC:$lhs, immSExt12:$rhs)), bb:$dst), ++ (BEQOp1 (SLTIOp RC:$lhs, immSExt12:$rhs), ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond (i32 (setuge RC:$lhs, immSExt12:$rhs)), bb:$dst), ++ (BEQOp1 (SLTUIOp RC:$lhs, immSExt12:$rhs), ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond (i32 (setgt RC:$lhs, immSExt12Plus1:$rhs)), bb:$dst), ++ (BEQOp1 (SLTIOp RC:$lhs, (Plus1 imm:$rhs)), ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond (i32 (setugt RC:$lhs, immSExt12Plus1:$rhs)), bb:$dst), ++ (BEQOp1 (SLTUIOp RC:$lhs, (Plus1 imm:$rhs)), ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst), ++ (BEQOp1 (SLTOp RC:$rhs, RC:$lhs), ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst), ++ (BEQOp1 (SLTUOp RC:$rhs, RC:$lhs), ZEROReg, bb:$dst)>; ++def : LoongArchPat<(brcond RC:$cond, bb:$dst), ++ (BNEOp RC:$cond, ZEROReg, bb:$dst)>; ++} ++ ++defm : BrcondPats, GPR_64; ++ ++let usesCustomInserter = 1 in { ++ def ATOMIC_LOAD_ADD_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_ADD_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_ADD_I32 : Atomic2Ops; ++ def ATOMIC_LOAD_SUB_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_SUB_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_SUB_I32 : Atomic2Ops; ++ def ATOMIC_LOAD_AND_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_AND_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_AND_I32 : Atomic2Ops; ++ def ATOMIC_LOAD_OR_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_OR_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_OR_I32 : Atomic2Ops; ++ def ATOMIC_LOAD_XOR_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_XOR_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_XOR_I32 : Atomic2Ops; ++ def ATOMIC_LOAD_NAND_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_NAND_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_NAND_I32 : Atomic2Ops; ++ ++ def ATOMIC_SWAP_I8 : Atomic2Ops; ++ def ATOMIC_SWAP_I16 : Atomic2Ops; ++ def ATOMIC_SWAP_I32 : Atomic2Ops; ++ ++ def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap; ++ def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap; ++ def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap; ++ ++ def ATOMIC_LOAD_MAX_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_MAX_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_MAX_I32 : Atomic2Ops; ++ ++ def ATOMIC_LOAD_MIN_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_MIN_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_MIN_I32 : Atomic2Ops; ++ ++ def ATOMIC_LOAD_UMAX_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_UMAX_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_UMAX_I32 : Atomic2Ops; ++ ++ def ATOMIC_LOAD_UMIN_I8 : Atomic2Ops; ++ def ATOMIC_LOAD_UMIN_I16 : Atomic2Ops; ++ def ATOMIC_LOAD_UMIN_I32 : Atomic2Ops; ++} ++ ++def ATOMIC_LOAD_ADD_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_ADD_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_ADD_I32_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_SUB_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_SUB_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_SUB_I32_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_AND_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_AND_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_AND_I32_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_OR_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_OR_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_OR_I32_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_XOR_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_XOR_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_XOR_I32_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_NAND_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_NAND_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_NAND_I32_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_SWAP_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_SWAP_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_SWAP_I32_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_CMP_SWAP_I8_POSTRA : AtomicCmpSwapSubwordPostRA; ++def ATOMIC_CMP_SWAP_I16_POSTRA : AtomicCmpSwapSubwordPostRA; ++def ATOMIC_CMP_SWAP_I32_POSTRA : AtomicCmpSwapPostRA; ++ ++def ATOMIC_LOAD_MAX_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_MAX_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_MAX_I32_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_LOAD_MIN_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_MIN_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_MIN_I32_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_LOAD_UMAX_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_UMAX_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_UMAX_I32_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_LOAD_UMIN_I8_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_UMIN_I16_POSTRA : Atomic2OpsSubwordPostRA; ++def ATOMIC_LOAD_UMIN_I32_POSTRA : Atomic2OpsPostRA; ++ ++def : LoongArchPat<(atomic_load_8 addr:$a), (LD_B32 addr:$a)>; ++def : LoongArchPat<(atomic_load_16 addr:$a), (LD_H32 addr:$a)>; ++def : LoongArchPat<(atomic_load_32 addrimm14lsl2:$a), (LDPTR_W32 addrimm14lsl2:$a)>; ++def : LoongArchPat<(atomic_load_32 addr:$a), (LD_W32 addr:$a)>; ++ ++def : LoongArchPat<(atomic_store_8 addr:$a, GPR32:$v), ++ (ST_B32 GPR32:$v, addr:$a)>; ++def : LoongArchPat<(atomic_store_16 addr:$a, GPR32:$v), ++ (ST_H32 GPR32:$v, addr:$a)>; ++def : LoongArchPat<(atomic_store_32 addrimm14lsl2:$a, GPR32:$v), ++ (STPTR_W32 GPR32:$v, addrimm14lsl2:$a)>; ++def : LoongArchPat<(atomic_store_32 addr:$a, GPR32:$v), ++ (ST_W32 GPR32:$v, addr:$a)>; ++ ++def : LoongArchPat<(LoongArchDBAR (i32 immz)), ++ (DBAR 0)>; ++ ++def : LoongArchPat<(i32 (extloadi1 addr:$src)), (LD_BU32 addr:$src)>; ++def : LoongArchPat<(i32 (extloadi8 addr:$src)), (LD_BU32 addr:$src)>; ++def : LoongArchPat<(i32 (extloadi16 addr:$src)), (LD_HU32 addr:$src)>; ++ ++def : LoongArchPat<(store (i32 0), addr:$dst), (ST_W32 ZERO, addr:$dst)>; ++ ++// Patterns for loads/stores with a reg+imm operand. ++let AddedComplexity = 40 in { ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : StoreRegImmPat; ++ def : StoreRegImmPat; ++ def : StoreRegImmPat; ++ ++ def : LoadRegImm14Lsl2Pat; ++ def : StoreRegImm14Lsl2Pat; ++} ++ ++let isCall=1, isCTI=1, Defs = [RA] in { ++ ++ class JumpLinkRegPseudo: ++ LoongArchPseudo<(outs), (ins RO:$rj), [(LoongArchJmpLink RO:$rj)]>, ++ PseudoInstExpansion<(JIRLRInst RetReg, ResRO:$rj)> { ++ let hasPostISelHook = 1; ++ } ++ ++ class JumpLinkReg: ++ InstForm<(outs RO:$rd), (ins RO:$rj), !strconcat(opstr, "\t$rd, $rj, 0"), ++ [], FrmR, opstr> { ++ let hasPostISelHook = 1; ++ } ++ ++} ++ ++def JIRLR : JumpLinkReg<"jirl", GPR32Opnd>, R2_IMM16JIRL { ++ let offs16 = 0; ++} ++def JIRLRPseudo : JumpLinkRegPseudo; ++ ++class BrindRegPseudo: ++ LoongArchPseudo<(outs), (ins RO:$rj), [(brind RO:$rj)]>, ++ PseudoInstExpansion<(JIRLRInst RetReg, ResRO:$rj)> { ++ let isTerminator=1; ++ let isBarrier=1; ++ let isBranch = 1; ++ let isIndirectBranch = 1; ++ bit isCTI = 1; ++} ++ ++def JIRLRBRIND : BrindRegPseudo; ++ ++def : LoongArchPat<(addc GPR32:$src, immSExt12:$imm), ++ (ADDI_W GPR32:$src, imm:$imm)>; ++ ++defm : SeteqPats; ++defm : SetlePats; ++defm : SetgtPats; ++defm : SetgePats; ++defm : SetgeImmPats; ++ ++def : LoongArchPat<(i64 (sext (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))), (immZExt12:$imm12))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (XORI32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (immZExt12:$imm12)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (add (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (ADD_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (add (i32 (trunc (i64 (assertsext GPR64:$rj)))), (immSExt12:$imm12))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (ADDI_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (immSExt12:$imm12)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (sra (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SRA_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (srl (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SRL_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (mul (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (MUL_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (XOR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 GPR32:$rk))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (XOR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), GPR32:$rk), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))), (uimm12_32:$imm12))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (ORI32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (uimm12_32:$imm12)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 GPR32:$rk))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (OR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), GPR32:$rk), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (select i32:$cond, (i32 (trunc (i64 (assertsext GPR64:$t)))), (i32 (trunc (i64 (assertsext GPR64:$f))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (OR32 (MASKEQZ32 (EXTRACT_SUBREG GPR64:$t, sub_32), i32:$cond), ++ (MASKNEZ32 (EXTRACT_SUBREG GPR64:$f, sub_32), i32:$cond)), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (shl (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SLL_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (srem (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (MOD_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(atomic_store_32 addr:$a, (i32 (trunc (i64 (assertsext GPR64:$rj))))), ++ (ST_W32 (EXTRACT_SUBREG GPR64:$rj, sub_32), addr:$a)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (sub (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SUB_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (udiv (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (DIV_WU (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(i64 (sext (i32 (urem (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (MOD_WU (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; ++ ++def : LoongArchPat<(brcond (i32 (seteq (i32 (trunc (i64 (assertsext GPR64:$rj)))), 0)), bb:$offs21), ++ (BEQZ32 (EXTRACT_SUBREG GPR64:$rj, sub_32), brtarget:$offs21)>; ++ ++def : LoongArchPat<(setne (i32 (trunc (i64 (assertsext GPR64:$rj)))), 0), ++ (SLTU32 ZERO, (EXTRACT_SUBREG GPR64:$rj, sub_32))>; ++ ++def : LoongArchPat<(select i32:$cond, (i32 (trunc (i64 (assertsext GPR64:$t)))), (i32 (trunc (i64 (assertsext GPR64:$f))))), ++ (OR32 (MASKEQZ32 (EXTRACT_SUBREG GPR64:$t, sub_32), i32:$cond), ++ (MASKNEZ32 (EXTRACT_SUBREG GPR64:$f, sub_32), i32:$cond))>; ++ ++def : LoongArchPat<(select (i32 (setne (i32 (trunc (i64 (assertsext GPR64:$cond)))), immz)), immz, i32:$f), ++ (MASKNEZ32 i32:$f, (EXTRACT_SUBREG GPR64:$cond, sub_32))>; ++ ++def : LoongArchPat<(select (i32 (seteq (i32 (trunc (i64 (assertsext GPR64:$cond)))), immz)), immz, i32:$f), ++ (MASKEQZ32 i32:$f, (EXTRACT_SUBREG GPR64:$cond, sub_32))>; ++ ++ def : LoongArchPat<(store (i32 (trunc (i64 (assertsext GPR64:$v)))), addr:$a), ++ (ST_W32 (EXTRACT_SUBREG GPR64:$v, sub_32), addr:$a)>; ++ ++ ++def : LoongArchPat<(i32 (xor GPR32:$rj, (i32 -1))), ++ (NOR32 ZERO, GPR32:$rj)>; ++ ++def : LoongArchPat<(and GPR32:$rj, (i32 (xor GPR32:$rk, (i32 -1)))), ++ (ANDN32 GPR32:$rj, GPR32:$rk)>; ++ ++def : LoongArchPat< ++ (i64 ++ (sext ++ (i32 (and (i32 (trunc (i64 (assertsext GPR64:$rj)))), ++ (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rk)))), ++ (i32 -1)))) ++ ) ++ ) ++ ), ++ (INSERT_SUBREG ++ (i64 (IMPLICIT_DEF)), ++ (ANDN32 (EXTRACT_SUBREG GPR64:$rj, sub_32), ++ (EXTRACT_SUBREG GPR64:$rk, sub_32)), ++ sub_32 ++ )>; ++ ++def : LoongArchPat< ++ (i64 ++ (sext ++ (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))), ++ (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rk)))), ++ (i32 -1)))) ++ ) ++ ) ++ ), ++ (INSERT_SUBREG ++ (i64 (IMPLICIT_DEF)), ++ (ORN32 (EXTRACT_SUBREG GPR64:$rj, sub_32), ++ (EXTRACT_SUBREG GPR64:$rk, sub_32)), ++ sub_32 ++ )>; ++ ++def : LoongArchPat<(i64 ++ (sext ++ (i32 (xor (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))), ++ (i32 (trunc (i64 (assertsext GPR64:$rk)))))), ++ (i32 -1)) ++ ) ++ ) ++ ), ++ (INSERT_SUBREG ++ (i64 (IMPLICIT_DEF)), ++ (NOR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), ++ (EXTRACT_SUBREG GPR64:$rk, sub_32)), ++ sub_32 ++ )>; ++ ++def : LoongArchPat<(i64 ++ (sext ++ (i32 (xor (i32 (trunc (i64 (or (i64 (assertsext GPR64:$rj)), ++ (i64 (assertsext GPR64:$rk)))))), ++ (i32 -1)) ++ ) ++ ) ++ ), ++ (INSERT_SUBREG ++ (i64 (IMPLICIT_DEF)), ++ (NOR32 (EXTRACT_SUBREG GPR64:$rk, sub_32), ++ (EXTRACT_SUBREG GPR64:$rj, sub_32)), ++ sub_32 ++ )>; ++ ++def : LoongArchPat<(i64 ++ (sext ++ (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))), ++ (i32 -1)) ++ ) ++ ) ++ ), ++ (INSERT_SUBREG ++ (i64 (IMPLICIT_DEF)), ++ (NOR32 ZERO, (EXTRACT_SUBREG GPR64:$rj, sub_32)), ++ sub_32 ++ )>; ++ ++def : LoongArchPat<(i64 ++ (zext ++ (i32 (seteq (i32 (trunc (i64 (assertsext GPR64:$rj)))), ++ (i32 0)) ++ ) ++ ) ++ ), ++ (INSERT_SUBREG ++ (i64 (IMPLICIT_DEF)), ++ (SLTUI32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (i32 1)), ++ sub_32 ++ )>; +diff --git a/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/lib/Target/LoongArch/LoongArchAsmPrinter.cpp +new file mode 100644 +index 00000000..f84b5bda +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchAsmPrinter.cpp +@@ -0,0 +1,601 @@ ++//===- LoongArchAsmPrinter.cpp - LoongArch LLVM Assembly Printer --------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains a printer that converts from our internal representation ++// of machine-dependent LLVM code to GAS-format LoongArch assembly language. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchAsmPrinter.h" ++#include "MCTargetDesc/LoongArchInstPrinter.h" ++#include "MCTargetDesc/LoongArchABIInfo.h" ++#include "MCTargetDesc/LoongArchBaseInfo.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "LoongArch.h" ++#include "LoongArchMCInstLower.h" ++#include "LoongArchMachineFunction.h" ++#include "LoongArchSubtarget.h" ++#include "LoongArchTargetMachine.h" ++#include "LoongArchTargetStreamer.h" ++#include "llvm/ADT/SmallString.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/ADT/Triple.h" ++#include "llvm/ADT/Twine.h" ++#include "llvm/BinaryFormat/ELF.h" ++#include "llvm/CodeGen/MachineBasicBlock.h" ++#include "llvm/CodeGen/MachineConstantPool.h" ++#include "llvm/CodeGen/MachineFrameInfo.h" ++#include "llvm/CodeGen/MachineFunction.h" ++#include "llvm/CodeGen/MachineInstr.h" ++#include "llvm/CodeGen/MachineJumpTableInfo.h" ++#include "llvm/CodeGen/MachineOperand.h" ++#include "llvm/CodeGen/TargetRegisterInfo.h" ++#include "llvm/CodeGen/TargetSubtargetInfo.h" ++#include "llvm/IR/Attributes.h" ++#include "llvm/IR/BasicBlock.h" ++#include "llvm/IR/DataLayout.h" ++#include "llvm/IR/Function.h" ++#include "llvm/IR/InlineAsm.h" ++#include "llvm/IR/Instructions.h" ++#include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCExpr.h" ++#include "llvm/MC/MCInst.h" ++#include "llvm/MC/MCInstBuilder.h" ++#include "llvm/MC/MCObjectFileInfo.h" ++#include "llvm/MC/MCSectionELF.h" ++#include "llvm/MC/MCSymbol.h" ++#include "llvm/MC/MCSymbolELF.h" ++#include "llvm/MC/TargetRegistry.h" ++#include "llvm/Support/Casting.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/raw_ostream.h" ++#include "llvm/Target/TargetMachine.h" ++#include ++#include ++#include ++#include ++#include ++#include ++ ++using namespace llvm; ++ ++#define DEBUG_TYPE "loongarch-asm-printer" ++ ++LoongArchTargetStreamer &LoongArchAsmPrinter::getTargetStreamer() const { ++ return static_cast(*OutStreamer->getTargetStreamer()); ++} ++ ++bool LoongArchAsmPrinter::runOnMachineFunction(MachineFunction &MF) { ++ Subtarget = &MF.getSubtarget(); ++ ++ LoongArchFI = MF.getInfo(); ++ MCP = MF.getConstantPool(); ++ ++ AsmPrinter::runOnMachineFunction(MF); ++ ++ emitXRayTable(); ++ ++ return true; ++} ++ ++bool LoongArchAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) { ++ MCOp = MCInstLowering.LowerOperand(MO); ++ return MCOp.isValid(); ++} ++ ++#include "LoongArchGenMCPseudoLowering.inc" ++ ++// Lower PseudoReturn/PseudoIndirectBranch/PseudoIndirectBranch64 to ++// JIRL as appropriate for the target. ++void LoongArchAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer, ++ const MachineInstr *MI) { ++ bool HasLinkReg = false; ++ MCInst TmpInst0; ++ TmpInst0.setOpcode(LoongArch::JIRL); ++ HasLinkReg = true; ++ ++ MCOperand MCOp; ++ ++ if (HasLinkReg) { ++ unsigned ZeroReg = Subtarget->is64Bit() ? LoongArch::ZERO_64 : LoongArch::ZERO; ++ TmpInst0.addOperand(MCOperand::createReg(ZeroReg)); ++ } ++ ++ lowerOperand(MI->getOperand(0), MCOp); ++ TmpInst0.addOperand(MCOp); ++ ++ TmpInst0.addOperand(MCOperand::createImm(0)); ++ ++ EmitToStreamer(OutStreamer, TmpInst0); ++} ++ ++void LoongArchAsmPrinter::emitPseudoTailBranch(MCStreamer &OutStreamer, ++ const MachineInstr *MI) { ++ MCInst TmpInst; ++ TmpInst.setOpcode(LoongArch::B); ++ ++ MCOperand MCOp; ++ ++ lowerOperand(MI->getOperand(0), MCOp); ++ TmpInst.addOperand(MCOp); ++ ++ EmitToStreamer(OutStreamer, TmpInst); ++} ++ ++void LoongArchAsmPrinter::emitInstruction(const MachineInstr *MI) { ++ LoongArchTargetStreamer &TS = getTargetStreamer(); ++ unsigned Opc = MI->getOpcode(); ++ TS.forbidModuleDirective(); ++ ++ if (MI->isDebugValue()) { ++ SmallString<128> Str; ++ raw_svector_ostream OS(Str); ++ ++ PrintDebugValueComment(MI, OS); ++ return; ++ } ++ if (MI->isDebugLabel()) ++ return; ++ // If we just ended a constant pool, mark it as such. ++ OutStreamer->emitDataRegion(MCDR_DataRegionEnd); ++ InConstantPool = false; ++ ++ switch (Opc) { ++ case LoongArch::PATCHABLE_FUNCTION_ENTER: ++ LowerPATCHABLE_FUNCTION_ENTER(*MI); ++ return; ++ case LoongArch::PATCHABLE_FUNCTION_EXIT: ++ LowerPATCHABLE_FUNCTION_EXIT(*MI); ++ return; ++ case LoongArch::PATCHABLE_TAIL_CALL: ++ LowerPATCHABLE_TAIL_CALL(*MI); ++ return; ++ } ++ MachineBasicBlock::const_instr_iterator I = MI->getIterator(); ++ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); ++ ++ do { ++ // Do any auto-generated pseudo lowerings. ++ if (emitPseudoExpansionLowering(*OutStreamer, &*I)) ++ continue; ++ if (I->getOpcode() == LoongArch::PseudoReturn || ++ I->getOpcode() == LoongArch::PseudoReturn64){ ++ emitPseudoIndirectBranch(*OutStreamer, &*I); ++ continue; ++ } ++ if (I->getOpcode() == LoongArch::PseudoTailReturn){ ++ emitPseudoTailBranch(*OutStreamer, &*I); ++ continue; ++ } ++ ++ // Some instructions are marked as pseudo right now which ++ // would make the test fail for the wrong reason but ++ // that will be fixed soon. We need this here because we are ++ // removing another test for this situation downstream in the ++ // callchain. ++ // ++ if (I->isPseudo() ++ && !isLongBranchPseudo(I->getOpcode())) ++ llvm_unreachable("Pseudo opcode found in EmitInstruction()"); ++ ++ MCInst TmpInst0; ++ MCInstLowering.Lower(&*I, TmpInst0); ++ EmitToStreamer(*OutStreamer, TmpInst0); ++ } while ((++I != E) && I->isInsideBundle()); ++} ++ ++//===----------------------------------------------------------------------===// ++// ++// LoongArch Asm Directives ++// ++// ++//===----------------------------------------------------------------------===// ++ ++//===----------------------------------------------------------------------===// ++// Set directives ++//===----------------------------------------------------------------------===// ++ ++/// Emit Set directives. ++const char *LoongArchAsmPrinter::getCurrentABIString() const { ++ switch (static_cast(TM).getABI().GetEnumValue()) { ++ case LoongArchABIInfo::ABI::ILP32D: ++ return "abiilp32d"; ++ case LoongArchABIInfo::ABI::ILP32F: ++ return "abiilp32f"; ++ case LoongArchABIInfo::ABI::ILP32S: ++ return "abiilp32s"; ++ case LoongArchABIInfo::ABI::LP64D: ++ return "abilp64d"; ++ case LoongArchABIInfo::ABI::LP64S: ++ return "abilp64s"; ++ case LoongArchABIInfo::ABI::LP64F: ++ return "abilp64f"; ++ default: llvm_unreachable("Unknown LoongArch ABI"); ++ } ++} ++ ++void LoongArchAsmPrinter::emitFunctionEntryLabel() { ++ ++ OutStreamer->emitLabel(CurrentFnSym); ++ ++} ++ ++/// EmitFunctionBodyStart - Targets can override this to emit stuff before ++/// the first basic block in the function. ++void LoongArchAsmPrinter::emitFunctionBodyStart() { ++ ++ MCInstLowering.Initialize(&MF->getContext()); ++} ++ ++/// EmitFunctionBodyEnd - Targets can override this to emit stuff after ++/// the last basic block in the function. ++void LoongArchAsmPrinter::emitFunctionBodyEnd() { ++ ++ // Make sure to terminate any constant pools that were at the end ++ // of the function. ++ if (!InConstantPool) ++ return; ++ InConstantPool = false; ++ OutStreamer->emitDataRegion(MCDR_DataRegionEnd); ++} ++ ++void LoongArchAsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) { ++ AsmPrinter::emitBasicBlockEnd(MBB); ++} ++ ++/// isBlockOnlyReachableByFallthough - Return true if the basic block has ++/// exactly one predecessor and the control transfer mechanism between ++/// the predecessor and this block is a fall-through. ++bool LoongArchAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock* ++ MBB) const { ++ // The predecessor has to be immediately before this block. ++ const MachineBasicBlock *Pred = *MBB->pred_begin(); ++ ++ // If the predecessor is a switch statement, assume a jump table ++ // implementation, so it is not a fall through. ++ if (const BasicBlock *bb = Pred->getBasicBlock()) ++ if (isa(bb->getTerminator())) ++ return false; ++ ++ // Check default implementation ++ return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB); ++} ++ ++// Print out an operand for an inline asm expression. ++bool LoongArchAsmPrinter::PrintAsmOperand(const MachineInstr *MI, ++ unsigned OpNum, const char *ExtraCode, raw_ostream &O) { ++ // Does this asm operand have a single letter operand modifier? ++ if (ExtraCode && ExtraCode[0]) { ++ if (ExtraCode[1] != 0) return true; // Unknown modifier. ++ ++ const MachineOperand &MO = MI->getOperand(OpNum); ++ switch (ExtraCode[0]) { ++ default: ++ // See if this is a generic print operand ++ return AsmPrinter::PrintAsmOperand(MI,OpNum,ExtraCode,O); ++ case 'X': // hex const int ++ if ((MO.getType()) != MachineOperand::MO_Immediate) ++ return true; ++ O << "0x" << Twine::utohexstr(MO.getImm()); ++ return false; ++ case 'x': // hex const int (low 16 bits) ++ if ((MO.getType()) != MachineOperand::MO_Immediate) ++ return true; ++ O << "0x" << Twine::utohexstr(MO.getImm() & 0xffff); ++ return false; ++ case 'd': // decimal const int ++ if ((MO.getType()) != MachineOperand::MO_Immediate) ++ return true; ++ O << MO.getImm(); ++ return false; ++ case 'm': // decimal const int minus 1 ++ if ((MO.getType()) != MachineOperand::MO_Immediate) ++ return true; ++ O << MO.getImm() - 1; ++ return false; ++ case 'y': // exact log2 ++ if ((MO.getType()) != MachineOperand::MO_Immediate) ++ return true; ++ if (!isPowerOf2_64(MO.getImm())) ++ return true; ++ O << Log2_64(MO.getImm()); ++ return false; ++ case 'z': ++ // $r0 if zero, regular printing otherwise ++ if (MO.getType() == MachineOperand::MO_Immediate && MO.getImm() == 0) { ++ O << "$r0"; ++ return false; ++ } ++ // If not, call printOperand as normal. ++ break; ++ case 'D': // Second part of a double word register operand ++ case 'L': // Low order register of a double word register operand ++ case 'M': // High order register of a double word register operand ++ { ++ if (OpNum == 0) ++ return true; ++ const MachineOperand &FlagsOP = MI->getOperand(OpNum - 1); ++ if (!FlagsOP.isImm()) ++ return true; ++ unsigned Flags = FlagsOP.getImm(); ++ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); ++ // Number of registers represented by this operand. We are looking ++ // for 2 for 32 bit mode and 1 for 64 bit mode. ++ if (NumVals != 2) { ++ if (Subtarget->is64Bit() && NumVals == 1 && MO.isReg()) { ++ unsigned Reg = MO.getReg(); ++ O << '$' << LoongArchInstPrinter::getRegisterName(Reg); ++ return false; ++ } ++ return true; ++ } ++ ++ unsigned RegOp = OpNum; ++ if (!Subtarget->is64Bit()){ ++ // Endianness reverses which register holds the high or low value ++ // between M and L. ++ switch(ExtraCode[0]) { ++ case 'M': ++ RegOp = OpNum + 1; ++ break; ++ case 'L': ++ RegOp = OpNum; ++ break; ++ case 'D': // Always the second part ++ RegOp = OpNum + 1; ++ } ++ if (RegOp >= MI->getNumOperands()) ++ return true; ++ const MachineOperand &MO = MI->getOperand(RegOp); ++ if (!MO.isReg()) ++ return true; ++ unsigned Reg = MO.getReg(); ++ O << '$' << LoongArchInstPrinter::getRegisterName(Reg); ++ return false; ++ } ++ break; ++ } ++ case 'w': ++ // Print LSX registers for the 'f' constraint ++ // In LLVM, the 'w' modifier doesn't need to do anything. ++ // We can just call printOperand as normal. ++ break; ++ case 'u': ++ // Print LASX registers for the 'f' constraint ++ // In LLVM, the 'u' modifier doesn't need to do anything. ++ // We can just call printOperand as normal. ++ break; ++ } ++ } ++ ++ printOperand(MI, OpNum, O); ++ return false; ++} ++ ++bool LoongArchAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, ++ unsigned OpNum, ++ const char *ExtraCode, ++ raw_ostream &O) { ++ assert(OpNum + 1 < MI->getNumOperands() && "Insufficient operands"); ++ const MachineOperand &BaseMO = MI->getOperand(OpNum); ++ const MachineOperand &OffsetMO = MI->getOperand(OpNum + 1); ++ assert(BaseMO.isReg() && "Unexpected base pointer for inline asm memory operand."); ++ assert(OffsetMO.isImm() && "Unexpected offset for inline asm memory operand."); ++ int Offset = OffsetMO.getImm(); ++ ++ // Currently we are expecting either no ExtraCode or 'D','M','L'. ++ if (ExtraCode) { ++ switch (ExtraCode[0]) { ++ case 'D': ++ case 'M': ++ Offset += 4; ++ break; ++ case 'L': ++ break; ++ default: ++ return true; // Unknown modifier. ++ } ++ } ++ ++ O << "$" << LoongArchInstPrinter::getRegisterName(BaseMO.getReg()) << ", " << Offset; ++ ++ return false; ++} ++ ++void LoongArchAsmPrinter::printOperand(const MachineInstr *MI, int opNum, ++ raw_ostream &O) { ++ const MachineOperand &MO = MI->getOperand(opNum); ++ ++ switch (MO.getType()) { ++ case MachineOperand::MO_Register: ++ O << '$' ++ << StringRef(LoongArchInstPrinter::getRegisterName(MO.getReg())).lower(); ++ break; ++ ++ case MachineOperand::MO_Immediate: ++ O << MO.getImm(); ++ break; ++ ++ case MachineOperand::MO_MachineBasicBlock: ++ MO.getMBB()->getSymbol()->print(O, MAI); ++ return; ++ ++ case MachineOperand::MO_GlobalAddress: ++ getSymbol(MO.getGlobal())->print(O, MAI); ++ break; ++ ++ case MachineOperand::MO_BlockAddress: { ++ MCSymbol *BA = GetBlockAddressSymbol(MO.getBlockAddress()); ++ O << BA->getName(); ++ break; ++ } ++ ++ case MachineOperand::MO_ConstantPoolIndex: ++ O << getDataLayout().getPrivateGlobalPrefix() << "CPI" ++ << getFunctionNumber() << "_" << MO.getIndex(); ++ if (MO.getOffset()) ++ O << "+" << MO.getOffset(); ++ break; ++ ++ default: ++ llvm_unreachable(""); ++ } ++} ++ ++void LoongArchAsmPrinter:: ++printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O) { ++ // Load/Store memory operands -- imm($reg) ++ // If PIC target the target is loaded as the ++ // pattern lw $25,%call16($28) ++ ++ printOperand(MI, opNum+1, O); ++ O << "("; ++ printOperand(MI, opNum, O); ++ O << ")"; ++} ++ ++void LoongArchAsmPrinter:: ++printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O) { ++ // when using stack locations for not load/store instructions ++ // print the same way as all normal 3 operand instructions. ++ printOperand(MI, opNum, O); ++ O << ", "; ++ printOperand(MI, opNum+1, O); ++} ++ ++void LoongArchAsmPrinter:: ++printRegisterList(const MachineInstr *MI, int opNum, raw_ostream &O) { ++ for (int i = opNum, e = MI->getNumOperands(); i != e; ++i) { ++ if (i != opNum) O << ", "; ++ printOperand(MI, i, O); ++ } ++} ++ ++void LoongArchAsmPrinter::emitStartOfAsmFile(Module &M) { ++ LoongArchTargetStreamer &TS = getTargetStreamer(); ++ ++ // LoongArchTargetStreamer has an initialization order problem when emitting an ++ // object file directly (see LoongArchTargetELFStreamer for full details). Work ++ // around it by re-initializing the PIC state here. ++ TS.setPic(OutContext.getObjectFileInfo()->isPositionIndependent()); ++ ++ // Compute LoongArch architecture attributes based on the default subtarget ++ // that we'd have constructed. Module level directives aren't LTO ++ // clean anyhow. ++ // FIXME: For ifunc related functions we could iterate over and look ++ // for a feature string that doesn't match the default one. ++ const Triple &TT = TM.getTargetTriple(); ++ StringRef CPU = LoongArch_MC::selectLoongArchCPU(TT, TM.getTargetCPU()); ++ StringRef FS = TM.getTargetFeatureString(); ++ const LoongArchTargetMachine &MTM = static_cast(TM); ++ const LoongArchSubtarget STI(TT, CPU, FS, MTM, None); ++ ++ TS.updateABIInfo(STI); ++} ++ ++void LoongArchAsmPrinter::emitInlineAsmStart() const { ++ ++ OutStreamer->AddBlankLine(); ++} ++ ++void LoongArchAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, ++ const MCSubtargetInfo *EndInfo) const { ++ OutStreamer->AddBlankLine(); ++} ++ ++void LoongArchAsmPrinter::EmitInstrReg(const MCSubtargetInfo &STI, unsigned Opcode, ++ unsigned Reg) { ++ MCInst I; ++ I.setOpcode(Opcode); ++ I.addOperand(MCOperand::createReg(Reg)); ++ OutStreamer->emitInstruction(I, STI); ++} ++ ++void LoongArchAsmPrinter::EmitInstrRegReg(const MCSubtargetInfo &STI, ++ unsigned Opcode, unsigned Reg1, ++ unsigned Reg2) { ++ MCInst I; ++ // ++ // Because of the current td files for LoongArch32, the operands for MTC1 ++ // appear backwards from their normal assembly order. It's not a trivial ++ // change to fix this in the td file so we adjust for it here. ++ // ++ if (Opcode == LoongArch::MOVGR2FR_W) { ++ unsigned Temp = Reg1; ++ Reg1 = Reg2; ++ Reg2 = Temp; ++ } ++ I.setOpcode(Opcode); ++ I.addOperand(MCOperand::createReg(Reg1)); ++ I.addOperand(MCOperand::createReg(Reg2)); ++ OutStreamer->emitInstruction(I, STI); ++} ++ ++void LoongArchAsmPrinter::EmitInstrRegRegReg(const MCSubtargetInfo &STI, ++ unsigned Opcode, unsigned Reg1, ++ unsigned Reg2, unsigned Reg3) { ++ MCInst I; ++ I.setOpcode(Opcode); ++ I.addOperand(MCOperand::createReg(Reg1)); ++ I.addOperand(MCOperand::createReg(Reg2)); ++ I.addOperand(MCOperand::createReg(Reg3)); ++ OutStreamer->emitInstruction(I, STI); ++} ++ ++void LoongArchAsmPrinter::EmitMovFPIntPair(const MCSubtargetInfo &STI, ++ unsigned MovOpc, unsigned Reg1, ++ unsigned Reg2, unsigned FPReg1, ++ unsigned FPReg2, bool LE) { ++ if (!LE) { ++ unsigned temp = Reg1; ++ Reg1 = Reg2; ++ Reg2 = temp; ++ } ++ EmitInstrRegReg(STI, MovOpc, Reg1, FPReg1); ++ EmitInstrRegReg(STI, MovOpc, Reg2, FPReg2); ++} ++ ++void LoongArchAsmPrinter::emitEndOfAsmFile(Module &M) { ++ // return to the text section ++ OutStreamer->SwitchSection(OutContext.getObjectFileInfo()->getTextSection()); ++} ++ ++void LoongArchAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) { ++// Now this is unimplemented. ++} ++ ++void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI) { ++ EmitSled(MI, SledKind::FUNCTION_ENTER); ++} ++ ++void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI) { ++ EmitSled(MI, SledKind::FUNCTION_EXIT); ++} ++ ++void LoongArchAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) { ++ EmitSled(MI, SledKind::TAIL_CALL); ++} ++ ++void LoongArchAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, ++ raw_ostream &OS) { ++ // TODO: implement ++} ++ ++bool LoongArchAsmPrinter::isLongBranchPseudo(int Opcode) const { ++ return (Opcode == LoongArch::LONG_BRANCH_ADDIW ++ || Opcode == LoongArch::LONG_BRANCH_ADDIW2Op ++ || Opcode == LoongArch::LONG_BRANCH_ADDID ++ || Opcode == LoongArch::LONG_BRANCH_ADDID2Op ++ || Opcode == LoongArch::LONG_BRANCH_PCADDU12I); ++} ++ ++// Force static initialization. ++extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchAsmPrinter() { ++ RegisterAsmPrinter X(getTheLoongArch32Target()); ++ RegisterAsmPrinter A(getTheLoongArch64Target()); ++} +diff --git a/lib/Target/LoongArch/LoongArchAsmPrinter.h b/lib/Target/LoongArch/LoongArchAsmPrinter.h +new file mode 100644 +index 00000000..0facaa29 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchAsmPrinter.h +@@ -0,0 +1,138 @@ ++//===- LoongArchAsmPrinter.h - LoongArch LLVM Assembly Printer -----------*- C++ -*--===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// LoongArch Assembly printer class. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHASMPRINTER_H ++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHASMPRINTER_H ++ ++#include "LoongArchMCInstLower.h" ++#include "LoongArchSubtarget.h" ++#include "llvm/CodeGen/AsmPrinter.h" ++#include "llvm/MC/MCStreamer.h" ++#include "llvm/Support/Compiler.h" ++#include ++#include ++#include ++ ++namespace llvm { ++ ++class MCOperand; ++class MCSubtargetInfo; ++class MCSymbol; ++class MachineBasicBlock; ++class MachineConstantPool; ++class MachineFunction; ++class MachineInstr; ++class MachineOperand; ++class LoongArchFunctionInfo; ++class LoongArchTargetStreamer; ++class Module; ++class raw_ostream; ++class TargetMachine; ++ ++class LLVM_LIBRARY_VISIBILITY LoongArchAsmPrinter : public AsmPrinter { ++ LoongArchTargetStreamer &getTargetStreamer() const; ++ ++ void EmitInstrWithMacroNoAT(const MachineInstr *MI); ++ ++ //===------------------------------------------------------------------===// ++ // XRay implementation ++ //===------------------------------------------------------------------===// ++ ++public: ++ // XRay-specific lowering for LoongArch. ++ void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); ++ void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); ++ void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); ++ ++private: ++ /// MCP - Keep a pointer to constantpool entries of the current ++ /// MachineFunction. ++ const MachineConstantPool *MCP = nullptr; ++ ++ /// InConstantPool - Maintain state when emitting a sequence of constant ++ /// pool entries so we can properly mark them as data regions. ++ bool InConstantPool = false; ++ ++ void EmitSled(const MachineInstr &MI, SledKind Kind); ++ ++ // tblgen'erated function. ++ bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, ++ const MachineInstr *MI); ++ ++ // Emit PseudoReturn, PseudoReturn64, PseudoIndirectBranch, ++ // and PseudoIndirectBranch64 as a JIRL as appropriate ++ // for the target. ++ void emitPseudoIndirectBranch(MCStreamer &OutStreamer, ++ const MachineInstr *MI); ++ ++ void emitPseudoTailBranch(MCStreamer &OutStreamer, ++ const MachineInstr *MI); ++ ++ // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. ++ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); ++ ++ void emitInlineAsmStart() const override; ++ ++ void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, ++ const MCSubtargetInfo *EndInfo) const override; ++ ++ void EmitInstrReg(const MCSubtargetInfo &STI, unsigned Opcode, unsigned Reg); ++ ++ void EmitInstrRegReg(const MCSubtargetInfo &STI, unsigned Opcode, ++ unsigned Reg1, unsigned Reg2); ++ ++ void EmitInstrRegRegReg(const MCSubtargetInfo &STI, unsigned Opcode, ++ unsigned Reg1, unsigned Reg2, unsigned Reg3); ++ ++ void EmitMovFPIntPair(const MCSubtargetInfo &STI, unsigned MovOpc, ++ unsigned Reg1, unsigned Reg2, unsigned FPReg1, ++ unsigned FPReg2, bool LE); ++ ++ bool isLongBranchPseudo(int Opcode) const; ++ ++public: ++ const LoongArchSubtarget *Subtarget; ++ const LoongArchFunctionInfo *LoongArchFI; ++ LoongArchMCInstLower MCInstLowering; ++ ++ explicit LoongArchAsmPrinter(TargetMachine &TM, ++ std::unique_ptr Streamer) ++ : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(*this) {} ++ ++ StringRef getPassName() const override { return "LoongArch Assembly Printer"; } ++ ++ bool runOnMachineFunction(MachineFunction &MF) override; ++ ++ void emitInstruction(const MachineInstr *MI) override; ++ const char *getCurrentABIString() const; ++ void emitFunctionEntryLabel() override; ++ void emitFunctionBodyStart() override; ++ void emitFunctionBodyEnd() override; ++ void emitBasicBlockEnd(const MachineBasicBlock &MBB) override; ++ bool isBlockOnlyReachableByFallthrough( ++ const MachineBasicBlock* MBB) const override; ++ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, ++ const char *ExtraCode, raw_ostream &O) override; ++ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, ++ const char *ExtraCode, raw_ostream &O) override; ++ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); ++ void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O); ++ void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O); ++ void printRegisterList(const MachineInstr *MI, int opNum, raw_ostream &O); ++ void emitStartOfAsmFile(Module &M) override; ++ void emitEndOfAsmFile(Module &M) override; ++ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); ++}; ++ ++} // end namespace llvm ++ ++#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHASMPRINTER_H +diff --git a/lib/Target/LoongArch/LoongArchCCState.cpp b/lib/Target/LoongArch/LoongArchCCState.cpp +new file mode 100644 +index 00000000..18996f1e +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchCCState.cpp +@@ -0,0 +1,165 @@ ++//===---- LoongArchCCState.cpp - CCState with LoongArch specific extensions ---------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchCCState.h" ++#include "LoongArchSubtarget.h" ++#include "llvm/IR/Module.h" ++ ++using namespace llvm; ++ ++/// This function returns true if CallSym is a long double emulation routine. ++static bool isF128SoftLibCall(const char *CallSym) { ++ const char *const LibCalls[] = { ++ "__addtf3", "__divtf3", "__eqtf2", "__extenddftf2", ++ "__extendsftf2", "__fixtfdi", "__fixtfsi", "__fixtfti", ++ "__fixunstfdi", "__fixunstfsi", "__fixunstfti", "__floatditf", ++ "__floatsitf", "__floattitf", "__floatunditf", "__floatunsitf", ++ "__floatuntitf", "__getf2", "__gttf2", "__letf2", ++ "__lttf2", "__multf3", "__netf2", "__powitf2", ++ "__subtf3", "__trunctfdf2", "__trunctfsf2", "__unordtf2", ++ "ceill", "copysignl", "cosl", "exp2l", ++ "expl", "floorl", "fmal", "fmaxl", ++ "fmodl", "log10l", "log2l", "logl", ++ "nearbyintl", "powl", "rintl", "roundl", ++ "sinl", "sqrtl", "truncl"}; ++ ++ // Check that LibCalls is sorted alphabetically. ++ auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; }; ++ assert(std::is_sorted(std::begin(LibCalls), std::end(LibCalls), Comp)); ++ return std::binary_search(std::begin(LibCalls), std::end(LibCalls), ++ CallSym, Comp); ++} ++ ++/// This function returns true if Ty is fp128, {f128} or i128 which was ++/// originally a fp128. ++static bool originalTypeIsF128(const Type *Ty, const char *Func) { ++ if (Ty->isFP128Ty()) ++ return true; ++ ++ if (Ty->isStructTy() && Ty->getStructNumElements() == 1 && ++ Ty->getStructElementType(0)->isFP128Ty()) ++ return true; ++ ++ // If the Ty is i128 and the function being called is a long double emulation ++ // routine, then the original type is f128. ++ return (Func && Ty->isIntegerTy(128) && isF128SoftLibCall(Func)); ++} ++ ++/// Return true if the original type was vXfXX. ++static bool originalEVTTypeIsVectorFloat(EVT Ty) { ++ if (Ty.isVector() && Ty.getVectorElementType().isFloatingPoint()) ++ return true; ++ ++ return false; ++} ++ ++/// Return true if the original type was vXfXX / vXfXX. ++static bool originalTypeIsVectorFloat(const Type * Ty) { ++ if (Ty->isVectorTy() && Ty->isFPOrFPVectorTy()) ++ return true; ++ ++ return false; ++} ++ ++LoongArchCCState::SpecialCallingConvType ++LoongArchCCState::getSpecialCallingConvForCallee(const SDNode *Callee, ++ const LoongArchSubtarget &Subtarget) { ++ LoongArchCCState::SpecialCallingConvType SpecialCallingConv = NoSpecialCallingConv; ++ return SpecialCallingConv; ++} ++ ++void LoongArchCCState::PreAnalyzeCallResultForF128( ++ const SmallVectorImpl &Ins, ++ const Type *RetTy, const char *Call) { ++ for (unsigned i = 0; i < Ins.size(); ++i) { ++ OriginalArgWasF128.push_back( ++ originalTypeIsF128(RetTy, Call)); ++ OriginalArgWasFloat.push_back(RetTy->isFloatingPointTy()); ++ } ++} ++ ++/// Identify lowered values that originated from f128 or float arguments and ++/// record this for use by RetCC_LoongArchLP64. ++void LoongArchCCState::PreAnalyzeReturnForF128( ++ const SmallVectorImpl &Outs) { ++ const MachineFunction &MF = getMachineFunction(); ++ for (unsigned i = 0; i < Outs.size(); ++i) { ++ OriginalArgWasF128.push_back( ++ originalTypeIsF128(MF.getFunction().getReturnType(), nullptr)); ++ OriginalArgWasFloat.push_back( ++ MF.getFunction().getReturnType()->isFloatingPointTy()); ++ } ++} ++ ++/// Identify lower values that originated from vXfXX and record ++/// this. ++void LoongArchCCState::PreAnalyzeCallResultForVectorFloat( ++ const SmallVectorImpl &Ins, const Type *RetTy) { ++ for (unsigned i = 0; i < Ins.size(); ++i) { ++ OriginalRetWasFloatVector.push_back(originalTypeIsVectorFloat(RetTy)); ++ } ++} ++ ++/// Identify lowered values that originated from vXfXX arguments and record ++/// this. ++void LoongArchCCState::PreAnalyzeReturnForVectorFloat( ++ const SmallVectorImpl &Outs) { ++ for (unsigned i = 0; i < Outs.size(); ++i) { ++ ISD::OutputArg Out = Outs[i]; ++ OriginalRetWasFloatVector.push_back( ++ originalEVTTypeIsVectorFloat(Out.ArgVT)); ++ } ++} ++ ++/// Identify lowered values that originated from f128, float and sret to vXfXX ++/// arguments and record this. ++void LoongArchCCState::PreAnalyzeCallOperands( ++ const SmallVectorImpl &Outs, ++ std::vector &FuncArgs, ++ const char *Func) { ++ for (unsigned i = 0; i < Outs.size(); ++i) { ++ TargetLowering::ArgListEntry FuncArg = FuncArgs[Outs[i].OrigArgIndex]; ++ ++ OriginalArgWasF128.push_back(originalTypeIsF128(FuncArg.Ty, Func)); ++ OriginalArgWasFloat.push_back(FuncArg.Ty->isFloatingPointTy()); ++ OriginalArgWasFloatVector.push_back(FuncArg.Ty->isVectorTy()); ++ CallOperandIsFixed.push_back(Outs[i].IsFixed); ++ } ++} ++ ++/// Identify lowered values that originated from f128, float and vXfXX arguments ++/// and record this. ++void LoongArchCCState::PreAnalyzeFormalArgumentsForF128( ++ const SmallVectorImpl &Ins) { ++ const MachineFunction &MF = getMachineFunction(); ++ for (unsigned i = 0; i < Ins.size(); ++i) { ++ Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin(); ++ ++ // SRet arguments cannot originate from f128 or {f128} returns so we just ++ // push false. We have to handle this specially since SRet arguments ++ // aren't mapped to an original argument. ++ if (Ins[i].Flags.isSRet()) { ++ OriginalArgWasF128.push_back(false); ++ OriginalArgWasFloat.push_back(false); ++ OriginalArgWasFloatVector.push_back(false); ++ continue; ++ } ++ ++ assert(Ins[i].getOrigArgIndex() < MF.getFunction().arg_size()); ++ std::advance(FuncArg, Ins[i].getOrigArgIndex()); ++ ++ OriginalArgWasF128.push_back( ++ originalTypeIsF128(FuncArg->getType(), nullptr)); ++ OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy()); ++ ++ // The LoongArch vector ABI exhibits a corner case of sorts or quirk; if the ++ // first argument is actually an SRet pointer to a vector, then the next ++ // argument slot is $a2. ++ OriginalArgWasFloatVector.push_back(FuncArg->getType()->isVectorTy()); ++ } ++} +diff --git a/lib/Target/LoongArch/LoongArchCCState.h b/lib/Target/LoongArch/LoongArchCCState.h +new file mode 100644 +index 00000000..56d5b89b +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchCCState.h +@@ -0,0 +1,165 @@ ++//===---- LoongArchCCState.h - CCState with LoongArch specific extensions -----------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LoongArchCCSTATE_H ++#define LoongArchCCSTATE_H ++ ++#include "LoongArchISelLowering.h" ++#include "llvm/ADT/SmallVector.h" ++#include "llvm/CodeGen/CallingConvLower.h" ++ ++namespace llvm { ++class SDNode; ++class LoongArchSubtarget; ++ ++class LoongArchCCState : public CCState { ++public: ++ enum SpecialCallingConvType { NoSpecialCallingConv }; ++ ++ /// Determine the SpecialCallingConvType for the given callee ++ static SpecialCallingConvType ++ getSpecialCallingConvForCallee(const SDNode *Callee, ++ const LoongArchSubtarget &Subtarget); ++ ++private: ++ /// Identify lowered values that originated from f128 arguments and record ++ /// this for use by RetCC_LoongArchLP64. ++ void PreAnalyzeCallResultForF128(const SmallVectorImpl &Ins, ++ const Type *RetTy, const char * Func); ++ ++ /// Identify lowered values that originated from f128 arguments and record ++ /// this for use by RetCC_LoongArchLP64. ++ void PreAnalyzeReturnForF128(const SmallVectorImpl &Outs); ++ ++ /// Identify lowered values that originated from f128 arguments and record ++ /// this. ++ void ++ PreAnalyzeCallOperands(const SmallVectorImpl &Outs, ++ std::vector &FuncArgs, ++ const char *Func); ++ ++ /// Identify lowered values that originated from f128 arguments and record ++ /// this for use by RetCC_LoongArchLP64. ++ void ++ PreAnalyzeFormalArgumentsForF128(const SmallVectorImpl &Ins); ++ ++ void ++ PreAnalyzeCallResultForVectorFloat(const SmallVectorImpl &Ins, ++ const Type *RetTy); ++ ++ void PreAnalyzeFormalArgumentsForVectorFloat( ++ const SmallVectorImpl &Ins); ++ ++ void ++ PreAnalyzeReturnForVectorFloat(const SmallVectorImpl &Outs); ++ ++ /// Records whether the value has been lowered from an f128. ++ SmallVector OriginalArgWasF128; ++ ++ /// Records whether the value has been lowered from float. ++ SmallVector OriginalArgWasFloat; ++ ++ /// Records whether the value has been lowered from a floating point vector. ++ SmallVector OriginalArgWasFloatVector; ++ ++ /// Records whether the return value has been lowered from a floating point ++ /// vector. ++ SmallVector OriginalRetWasFloatVector; ++ ++ /// Records whether the value was a fixed argument. ++ /// See ISD::OutputArg::IsFixed, ++ SmallVector CallOperandIsFixed; ++ ++ // FIXME: This should probably be a fully fledged calling convention. ++ SpecialCallingConvType SpecialCallingConv; ++ ++public: ++ LoongArchCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, ++ SmallVectorImpl &locs, LLVMContext &C, ++ SpecialCallingConvType SpecialCC = NoSpecialCallingConv) ++ : CCState(CC, isVarArg, MF, locs, C), SpecialCallingConv(SpecialCC) {} ++ ++ void ++ AnalyzeCallOperands(const SmallVectorImpl &Outs, ++ CCAssignFn Fn, ++ std::vector &FuncArgs, ++ const char *Func) { ++ PreAnalyzeCallOperands(Outs, FuncArgs, Func); ++ CCState::AnalyzeCallOperands(Outs, Fn); ++ OriginalArgWasF128.clear(); ++ OriginalArgWasFloat.clear(); ++ OriginalArgWasFloatVector.clear(); ++ CallOperandIsFixed.clear(); ++ } ++ ++ // The AnalyzeCallOperands in the base class is not usable since we must ++ // provide a means of accessing ArgListEntry::IsFixed. Delete them from this ++ // class. This doesn't stop them being used via the base class though. ++ void AnalyzeCallOperands(const SmallVectorImpl &Outs, ++ CCAssignFn Fn) = delete; ++ void AnalyzeCallOperands(const SmallVectorImpl &Outs, ++ SmallVectorImpl &Flags, ++ CCAssignFn Fn) = delete; ++ ++ void AnalyzeFormalArguments(const SmallVectorImpl &Ins, ++ CCAssignFn Fn) { ++ PreAnalyzeFormalArgumentsForF128(Ins); ++ CCState::AnalyzeFormalArguments(Ins, Fn); ++ OriginalArgWasFloat.clear(); ++ OriginalArgWasF128.clear(); ++ OriginalArgWasFloatVector.clear(); ++ } ++ ++ void AnalyzeCallResult(const SmallVectorImpl &Ins, ++ CCAssignFn Fn, const Type *RetTy, ++ const char *Func) { ++ PreAnalyzeCallResultForF128(Ins, RetTy, Func); ++ PreAnalyzeCallResultForVectorFloat(Ins, RetTy); ++ CCState::AnalyzeCallResult(Ins, Fn); ++ OriginalArgWasFloat.clear(); ++ OriginalArgWasF128.clear(); ++ OriginalArgWasFloatVector.clear(); ++ } ++ ++ void AnalyzeReturn(const SmallVectorImpl &Outs, ++ CCAssignFn Fn) { ++ PreAnalyzeReturnForF128(Outs); ++ PreAnalyzeReturnForVectorFloat(Outs); ++ CCState::AnalyzeReturn(Outs, Fn); ++ OriginalArgWasFloat.clear(); ++ OriginalArgWasF128.clear(); ++ OriginalArgWasFloatVector.clear(); ++ } ++ ++ bool CheckReturn(const SmallVectorImpl &ArgsFlags, ++ CCAssignFn Fn) { ++ PreAnalyzeReturnForF128(ArgsFlags); ++ PreAnalyzeReturnForVectorFloat(ArgsFlags); ++ bool Return = CCState::CheckReturn(ArgsFlags, Fn); ++ OriginalArgWasFloat.clear(); ++ OriginalArgWasF128.clear(); ++ OriginalArgWasFloatVector.clear(); ++ return Return; ++ } ++ ++ bool WasOriginalArgF128(unsigned ValNo) { return OriginalArgWasF128[ValNo]; } ++ bool WasOriginalArgFloat(unsigned ValNo) { ++ return OriginalArgWasFloat[ValNo]; ++ } ++ bool WasOriginalArgVectorFloat(unsigned ValNo) const { ++ return OriginalArgWasFloatVector[ValNo]; ++ } ++ bool WasOriginalRetVectorFloat(unsigned ValNo) const { ++ return OriginalRetWasFloatVector[ValNo]; ++ } ++ bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; } ++ SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; } ++}; ++} ++ ++#endif +diff --git a/lib/Target/LoongArch/LoongArchCallingConv.td b/lib/Target/LoongArch/LoongArchCallingConv.td +new file mode 100644 +index 00000000..02bdb323 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchCallingConv.td +@@ -0,0 +1,292 @@ ++//===-- LoongArchCallingConv.td - Calling Conventions for LoongArch --*- tablegen -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// This describes the calling conventions for LoongArch architecture. ++//===----------------------------------------------------------------------===// ++ ++/// CCIfSubtarget - Match if the current subtarget has a feature F. ++class CCIfSubtarget ++ : CCIf" ++ "(State.getMachineFunction().getSubtarget()).", ++ F), ++ A>; ++ ++// The inverse of CCIfSubtarget ++class CCIfSubtargetNot : CCIfSubtarget; ++ ++/// Match if the original argument (before lowering) was a float. ++/// For example, this is true for i32's that were lowered from soft-float. ++class CCIfOrigArgWasNotFloat ++ : CCIf<"!static_cast(&State)->WasOriginalArgFloat(ValNo)", ++ A>; ++ ++/// Match if the original argument (before lowering) was a 128-bit float (i.e. ++/// long double). ++class CCIfOrigArgWasF128 ++ : CCIf<"static_cast(&State)->WasOriginalArgF128(ValNo)", A>; ++ ++/// Match if this specific argument is a vararg. ++/// This is slightly different fro CCIfIsVarArg which matches if any argument is ++/// a vararg. ++class CCIfArgIsVarArg ++ : CCIf<"!static_cast(&State)->IsCallOperandFixed(ValNo)", A>; ++ ++/// Match if the return was a floating point vector. ++class CCIfOrigArgWasNotVectorFloat ++ : CCIf<"!static_cast(&State)" ++ "->WasOriginalRetVectorFloat(ValNo)", A>; ++ ++/// Match if the special calling conv is the specified value. ++class CCIfSpecialCallingConv ++ : CCIf<"static_cast(&State)->getSpecialCallingConv() == " ++ "LoongArchCCState::" # CC, A>; ++ ++// For soft-float, f128 values are returned in A0_64 rather than V1_64. ++def RetCC_F128SoftFloat : CallingConv<[ ++ CCAssignToReg<[A0_64, A1_64]> ++]>; ++ ++// ++// For hard-float, f128 values are returned as a pair of f64's rather than a ++// pair of i64's. ++def RetCC_F128HardFloat : CallingConv<[ ++ //CCBitConvertToType, ++ ++ // Contrary to the ABI documentation, a struct containing a long double is ++ // returned in $f0, and $f1 instead of the usual $f0, and $f2. This is to ++ // match the de facto ABI as implemented by GCC. ++ CCIfInReg>, ++ ++ CCAssignToReg<[A0_64, A1_64]> ++]>; ++ ++// Handle F128 specially since we can't identify the original type during the ++// tablegen-erated code. ++def RetCC_F128 : CallingConv<[ ++ CCIfSubtarget<"useSoftFloat()", ++ CCIfType<[i64], CCDelegateTo>>, ++ CCIfSubtargetNot<"useSoftFloat()", ++ CCIfType<[i64], CCDelegateTo>> ++]>; ++ ++//===----------------------------------------------------------------------===// ++// LoongArch ILP32 Calling Convention ++//===----------------------------------------------------------------------===// ++ ++def CC_LoongArchILP32 : CallingConv<[ ++ // Promote i8/i16 arguments to i32. ++ CCIfType<[i1, i8, i16], CCPromoteToType>, ++ ++ // Integer values get stored in stack slots that are 4 bytes in ++ // size and 4-byte aligned. ++ CCIfType<[i32, f32], CCAssignToStack<4, 4>>, ++ ++ // Integer values get stored in stack slots that are 8 bytes in ++ // size and 8-byte aligned. ++ CCIfType<[f64], CCAssignToStack<8, 8>> ++]>; ++ ++// Only the return rules are defined here for 32-bit ABI. The rules for argument ++// passing are defined in LoongArchISelLowering.cpp. ++def RetCC_LoongArchILP32 : CallingConv<[ ++ // Promote i1/i8/i16 return values to i32. ++ CCIfType<[i1, i8, i16], CCPromoteToType>, ++ ++ // i32 are returned in registers A0, A1, unless the original return ++ // type was a vector of floats. ++ CCIfOrigArgWasNotVectorFloat>>, ++ ++ // f32 are returned in registers F0, F1 ++ CCIfType<[f32], CCAssignToReg<[F0, F1]>>, ++ ++ // f64 arguments are returned in F0_64 and F1_64 in hasBasicD mode. ++ CCIfType<[f64], CCIfSubtarget<"hasBasicD()", CCAssignToReg<[F0_64, F1_64]>>> ++]>; ++ ++def CC_LoongArchILP32_FP32 : CustomCallingConv; ++def CC_LoongArchILP32_FP64 : CustomCallingConv; ++def CC_LoongArch_F128 : CustomCallingConv; ++ ++def CC_LoongArchILP32_FP : CallingConv<[ ++ CCIfSubtargetNot<"hasBasicD()", CCDelegateTo>, ++ CCIfSubtarget<"hasBasicD()", CCDelegateTo> ++]>; ++ ++//===----------------------------------------------------------------------===// ++// LoongArch LP64 Calling Convention ++//===----------------------------------------------------------------------===// ++ ++def CC_LoongArchLP64_SoftFloat : CallingConv<[ ++ CCAssignToReg<[A0, A1, A2, A3, ++ A4, A5, A6, A7]>, ++ CCAssignToStack<4, 8> ++]>; ++ ++def CC_LoongArchLP64 : CallingConv<[ ++ ++ // All integers (except soft-float integers) are promoted to 64-bit. ++ CCIfType<[i8, i16, i32], CCIfOrigArgWasNotFloat>>, ++ ++ // The only i32's we have left are soft-float arguments. ++ CCIfSubtarget<"useSoftFloat()", CCIfType<[i32], CCDelegateTo>>, ++ ++ // Integer arguments are passed in integer registers. ++ //CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64, ++ // A4_64, A5_64, A6_64, A7_64], ++ // [F0_64, F1_64, F2_64, F3_64, ++ // F4_64, F5_64, F6_64, F7_64]>>, ++ CCIfType<[i64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, ++ A4_64, A5_64, A6_64, A7_64]>>, ++ ++ // f32 arguments are passed in single precision FP registers. ++ CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3, ++ F4, F5, F6, F7]>>, ++ ++ // f64 arguments are passed in double precision FP registers. ++ CCIfType<[f64], CCAssignToReg<[F0_64, F1_64, F2_64, F3_64, ++ F4_64, F5_64, F6_64, F7_64]>>, ++ ++ // others f32 arguments are passed in single precision FP registers. ++ CCIfType<[f32], CCAssignToReg<[A0, A1, A2, A3, A4, A5, A6, A7]>>, ++ ++ // others f64 arguments are passed in double precision FP registers. ++ CCIfType<[f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, ++ A4_64, A5_64, A6_64, A7_64]>>, ++ ++ CCIfSubtarget<"hasLSX()", ++ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], ++ CCAssignToRegWithShadow<[VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7], ++ [A0_64, A1_64, A2_64, A3_64, ++ A4_64, A5_64, A6_64, A7_64]>>>, ++ CCIfSubtarget<"hasLASX()", ++ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], ++ CCAssignToRegWithShadow<[XR0, XR1, XR2, XR3, XR4, XR5, XR6, XR7], ++ [A0_64, A1_64, A2_64, A3_64, ++ A4_64, A5_64, A6_64, A7_64]>>>, ++ ++ // All stack parameter slots become 64-bit doublewords and are 8-byte aligned. ++ CCIfType<[f32], CCAssignToStack<4, 8>>, ++ CCIfType<[i64, f64], CCAssignToStack<8, 8>>, ++ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], ++ CCAssignToStack<16, 16>>, ++ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], ++ CCAssignToStack<32, 32>> ++]>; ++ ++// LP64 variable arguments. ++// All arguments are passed in integer registers. ++def CC_LoongArchLP64_VarArg : CallingConv<[ ++ // All integers are promoted to 64-bit. ++ CCIfType<[i8, i16, i32], CCPromoteToType>, ++ ++ CCIfType<[f32], CCAssignToReg<[A0, A1, A2, A3, A4, A5, A6, A7]>>, ++ ++ CCIfType<[i64], CCIfOrigArgWasF128>>, ++ ++ CCIfType<[i64, f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, ++ A4_64, A5_64, A6_64, A7_64]>>, ++ ++ // All stack parameter slots become 64-bit doublewords and are 8-byte aligned. ++ CCIfType<[f32], CCAssignToStack<4, 8>>, ++ CCIfType<[i64, f64], CCAssignToStack<8, 8>> ++]>; ++ ++def RetCC_LoongArchLP64 : CallingConv<[ ++ // f128 needs to be handled similarly to f32 and f64. However, f128 is not ++ // legal and is lowered to i128 which is further lowered to a pair of i64's. ++ // This presents us with a problem for the calling convention since hard-float ++ // still needs to pass them in FPU registers, and soft-float needs to use $v0, ++ // and $a0 instead of the usual $v0, and $v1. We therefore resort to a ++ // pre-analyze (see PreAnalyzeReturnForF128()) step to pass information on ++ // whether the result was originally an f128 into the tablegen-erated code. ++ // ++ // f128 should only occur for the 64-bit ABI where long double is 128-bit. ++ CCIfType<[i64], CCIfOrigArgWasF128>>, ++ ++ CCIfType<[i8, i16, i32, i64], CCIfInReg>>, ++ ++ // i64 are returned in registers V0_64, V1_64 ++ CCIfType<[i64], CCAssignToReg<[A0_64, A1_64]>>, ++ ++ CCIfSubtarget<"hasLSX()", ++ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[VR0]>>>, ++ ++ CCIfSubtarget<"hasLASX()", ++ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], CCAssignToReg<[XR0]>>>, ++ ++ CCIfSubtarget<"hasLASX()", ++ CCIfType<[i64], CCAssignToReg<[A0_64, A1_64]>>>, ++ ++ // f32 are returned in registers F0, F2 ++ CCIfType<[f32], CCAssignToReg<[F0, F1]>>, ++ ++ // f64 are returned in registers D0, D2 ++ CCIfType<[f64], CCAssignToReg<[F0_64, F1_64]>> ++]>; ++ ++//===----------------------------------------------------------------------===// ++// LoongArch Calling Convention Dispatch ++//===----------------------------------------------------------------------===// ++ ++def RetCC_LoongArch : CallingConv<[ ++ CCIfSubtarget<"isABI_LP64()", CCDelegateTo>, ++ CCDelegateTo ++]>; ++ ++def CC_LoongArch_ByVal : CallingConv<[ ++ CCIfSubtarget<"isABI_ILP32()", CCIfByVal>>, ++ CCIfByVal> ++]>; ++ ++def CC_LoongArch_FixedArg : CallingConv<[ ++ CCIfByVal>, ++ //CCIfByVal>>, ++ ++ // f128 needs to be handled similarly to f32 and f64 on hard-float. However, ++ // f128 is not legal and is lowered to i128 which is further lowered to a pair ++ // of i64's. ++ // This presents us with a problem for the calling convention since hard-float ++ // still needs to pass them in FPU registers. We therefore resort to a ++ // pre-analyze (see PreAnalyzeFormalArgsForF128()) step to pass information on ++ // whether the argument was originally an f128 into the tablegen-erated code. ++ // ++ // f128 should only occur for the 64-bit ABI where long double is 128-bit. ++ CCIfType<[i64], ++ CCIfSubtargetNot<"useSoftFloat()", ++ CCIfOrigArgWasF128>>>, ++ ++ CCIfSubtarget<"isABI_ILP32()", CCDelegateTo>, ++ CCDelegateTo ++]>; ++ ++def CC_LoongArch_VarArg : CallingConv<[ ++ CCIfByVal>, ++ ++ CCIfSubtarget<"isABI_ILP32()", CCDelegateTo>, ++ CCDelegateTo ++]>; ++ ++def CC_LoongArch : CallingConv<[ ++ CCIfVarArg>>, ++ CCDelegateTo ++]>; ++ ++//===----------------------------------------------------------------------===// ++// Callee-saved register lists. ++//===----------------------------------------------------------------------===// ++ ++def CSR_SingleFloatOnly : CalleeSavedRegs<(add (sequence "F%u", 31, 24), RA, FP, ++ (sequence "S%u", 8, 0))>; ++ ++def CSR_ILP32 : CalleeSavedRegs<(add (sequence "F%u_64", 31, 24), RA, FP, ++ (sequence "S%u", 8, 0))>; ++ ++def CSR_LP64 : CalleeSavedRegs<(add (sequence "F%u_64", 31, 24), RA_64, FP_64, ++ (sequence "S%u_64", 8, 0))>; +diff --git a/lib/Target/LoongArch/LoongArchExpandPseudo.cpp b/lib/Target/LoongArch/LoongArchExpandPseudo.cpp +new file mode 100644 +index 00000000..c192f7fc +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchExpandPseudo.cpp +@@ -0,0 +1,2438 @@ ++//===-- LoongArchExpandPseudoInsts.cpp - Expand pseudo instructions ------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains a pass that expands pseudo instructions into target ++// instructions to allow proper scheduling, if-conversion, and other late ++// optimizations. This pass should be run after register allocation but before ++// the post-regalloc scheduling pass. ++// ++// This is currently only used for expanding atomic pseudos after register ++// allocation. We do this to avoid the fast register allocator introducing ++// spills between ll and sc. These stores cause some LoongArch implementations to ++// abort the atomic RMW sequence. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArch.h" ++#include "LoongArchInstrInfo.h" ++#include "LoongArchSubtarget.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "llvm/CodeGen/LivePhysRegs.h" ++#include "llvm/CodeGen/MachineFunctionPass.h" ++#include "llvm/CodeGen/MachineInstrBuilder.h" ++ ++using namespace llvm; ++ ++#define DEBUG_TYPE "loongarch-pseudo" ++ ++namespace { ++ class LoongArchExpandPseudo : public MachineFunctionPass { ++ public: ++ static char ID; ++ LoongArchExpandPseudo() : MachineFunctionPass(ID) {} ++ ++ const LoongArchInstrInfo *TII; ++ const LoongArchSubtarget *STI; ++ ++ bool runOnMachineFunction(MachineFunction &Fn) override; ++ ++ MachineFunctionProperties getRequiredProperties() const override { ++ return MachineFunctionProperties().set( ++ MachineFunctionProperties::Property::NoVRegs); ++ } ++ ++ StringRef getPassName() const override { ++ return "LoongArch pseudo instruction expansion pass"; ++ } ++ ++ private: ++ bool expandAtomicCmpSwap(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI); ++ bool expandAtomicCmpSwapSubword(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NextMBBI); ++ ++ bool expandAtomicBinOp(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI, unsigned Size); ++ bool expandXINSERT_BOp(MachineBasicBlock &BB, MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI); ++ bool expandINSERT_HOp(MachineBasicBlock &BB, MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI); ++ bool expandXINSERT_FWOp(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI); ++ bool expandAtomicBinOpSubword(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI); ++ ++ bool expandPseudoCall(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI); ++ bool expandPseudoTailCall(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I); ++ ++ bool expandPseudoTEQ(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI); ++ ++ bool expandLoadAddr(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI); ++ ++ bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NMBB); ++ bool expandMBB(MachineBasicBlock &MBB); ++ }; ++ char LoongArchExpandPseudo::ID = 0; ++} ++ ++static bool hasDbar(MachineBasicBlock *MBB) { ++ ++ for (MachineBasicBlock::iterator MBBb = MBB->begin(), MBBe = MBB->end(); ++ MBBb != MBBe; ++MBBb) { ++ if (MBBb->getOpcode() == LoongArch::DBAR) ++ return true; ++ if (MBBb->mayLoad() || MBBb->mayStore()) ++ break; ++ } ++ return false; ++} ++ ++bool LoongArchExpandPseudo::expandAtomicCmpSwapSubword( ++ MachineBasicBlock &BB, MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI) { ++ ++ MachineFunction *MF = BB.getParent(); ++ ++ DebugLoc DL = I->getDebugLoc(); ++ unsigned LL, SC; ++ unsigned ZERO = LoongArch::ZERO; ++ unsigned BNE = LoongArch::BNE32; ++ unsigned BEQ = LoongArch::BEQ32; ++ unsigned SEOp = ++ I->getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I8_POSTRA ? LoongArch::EXT_W_B32 : LoongArch::EXT_W_H32; ++ ++ LL = LoongArch::LL_W; ++ SC = LoongArch::SC_W; ++ ++ unsigned Dest = I->getOperand(0).getReg(); ++ unsigned Ptr = I->getOperand(1).getReg(); ++ unsigned Mask = I->getOperand(2).getReg(); ++ unsigned ShiftCmpVal = I->getOperand(3).getReg(); ++ unsigned Mask2 = I->getOperand(4).getReg(); ++ unsigned ShiftNewVal = I->getOperand(5).getReg(); ++ unsigned ShiftAmnt = I->getOperand(6).getReg(); ++ unsigned Scratch = I->getOperand(7).getReg(); ++ unsigned Scratch2 = I->getOperand(8).getReg(); ++ ++ // insert new blocks after the current block ++ const BasicBlock *LLVM_BB = BB.getBasicBlock(); ++ MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineFunction::iterator It = ++BB.getIterator(); ++ MF->insert(It, loop1MBB); ++ MF->insert(It, loop2MBB); ++ MF->insert(It, sinkMBB); ++ MF->insert(It, exitMBB); ++ ++ // Transfer the remainder of BB and its successor edges to exitMBB. ++ exitMBB->splice(exitMBB->begin(), &BB, ++ std::next(MachineBasicBlock::iterator(I)), BB.end()); ++ exitMBB->transferSuccessorsAndUpdatePHIs(&BB); ++ ++ // thisMBB: ++ // ... ++ // fallthrough --> loop1MBB ++ BB.addSuccessor(loop1MBB, BranchProbability::getOne()); ++ loop1MBB->addSuccessor(sinkMBB); ++ loop1MBB->addSuccessor(loop2MBB); ++ loop1MBB->normalizeSuccProbs(); ++ loop2MBB->addSuccessor(loop1MBB); ++ loop2MBB->addSuccessor(sinkMBB); ++ loop2MBB->normalizeSuccProbs(); ++ sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); ++ ++ // loop1MBB: ++ // ll dest, 0(ptr) ++ // and Mask', dest, Mask ++ // bne Mask', ShiftCmpVal, exitMBB ++ BuildMI(loop1MBB, DL, TII->get(LL), Scratch).addReg(Ptr).addImm(0); ++ BuildMI(loop1MBB, DL, TII->get(LoongArch::AND32), Scratch2) ++ .addReg(Scratch) ++ .addReg(Mask); ++ BuildMI(loop1MBB, DL, TII->get(BNE)) ++ .addReg(Scratch2).addReg(ShiftCmpVal).addMBB(sinkMBB); ++ ++ // loop2MBB: ++ // and dest, dest, mask2 ++ // or dest, dest, ShiftNewVal ++ // sc dest, dest, 0(ptr) ++ // beq dest, $0, loop1MBB ++ BuildMI(loop2MBB, DL, TII->get(LoongArch::AND32), Scratch) ++ .addReg(Scratch, RegState::Kill) ++ .addReg(Mask2); ++ BuildMI(loop2MBB, DL, TII->get(LoongArch::OR32), Scratch) ++ .addReg(Scratch, RegState::Kill) ++ .addReg(ShiftNewVal); ++ BuildMI(loop2MBB, DL, TII->get(SC), Scratch) ++ .addReg(Scratch, RegState::Kill) ++ .addReg(Ptr) ++ .addImm(0); ++ BuildMI(loop2MBB, DL, TII->get(BEQ)) ++ .addReg(Scratch, RegState::Kill) ++ .addReg(ZERO) ++ .addMBB(loop1MBB); ++ ++ // sinkMBB: ++ // srl srlres, Mask', shiftamt ++ // sign_extend dest,srlres ++ BuildMI(sinkMBB, DL, TII->get(LoongArch::SRL_W), Dest) ++ .addReg(Scratch2) ++ .addReg(ShiftAmnt); ++ ++ BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest); ++ ++ if (!hasDbar(sinkMBB)) { ++ MachineBasicBlock::iterator Pos = sinkMBB->begin(); ++ BuildMI(*sinkMBB, Pos, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); ++ } ++ ++ LivePhysRegs LiveRegs; ++ computeAndAddLiveIns(LiveRegs, *loop1MBB); ++ computeAndAddLiveIns(LiveRegs, *loop2MBB); ++ computeAndAddLiveIns(LiveRegs, *sinkMBB); ++ computeAndAddLiveIns(LiveRegs, *exitMBB); ++ ++ NMBBI = BB.end(); ++ I->eraseFromParent(); ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI) { ++ ++ const unsigned Size = ++ I->getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32_POSTRA ? 4 : 8; ++ MachineFunction *MF = BB.getParent(); ++ ++ DebugLoc DL = I->getDebugLoc(); ++ ++ unsigned LL, SC, ZERO, BNE, BEQ, MOVE; ++ ++ if (Size == 4) { ++ LL = LoongArch::LL_W; ++ SC = LoongArch::SC_W; ++ BNE = LoongArch::BNE32; ++ BEQ = LoongArch::BEQ32; ++ ++ ZERO = LoongArch::ZERO; ++ MOVE = LoongArch::OR32; ++ } else { ++ LL = LoongArch::LL_D; ++ SC = LoongArch::SC_D; ++ ZERO = LoongArch::ZERO_64; ++ BNE = LoongArch::BNE; ++ BEQ = LoongArch::BEQ; ++ MOVE = LoongArch::OR; ++ } ++ ++ unsigned Dest = I->getOperand(0).getReg(); ++ unsigned Ptr = I->getOperand(1).getReg(); ++ unsigned OldVal = I->getOperand(2).getReg(); ++ unsigned NewVal = I->getOperand(3).getReg(); ++ unsigned Scratch = I->getOperand(4).getReg(); ++ ++ // insert new blocks after the current block ++ const BasicBlock *LLVM_BB = BB.getBasicBlock(); ++ MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineFunction::iterator It = ++BB.getIterator(); ++ MF->insert(It, loop1MBB); ++ MF->insert(It, loop2MBB); ++ MF->insert(It, exitMBB); ++ ++ // Transfer the remainder of BB and its successor edges to exitMBB. ++ exitMBB->splice(exitMBB->begin(), &BB, ++ std::next(MachineBasicBlock::iterator(I)), BB.end()); ++ exitMBB->transferSuccessorsAndUpdatePHIs(&BB); ++ ++ // thisMBB: ++ // ... ++ // fallthrough --> loop1MBB ++ BB.addSuccessor(loop1MBB, BranchProbability::getOne()); ++ loop1MBB->addSuccessor(exitMBB); ++ loop1MBB->addSuccessor(loop2MBB); ++ loop1MBB->normalizeSuccProbs(); ++ loop2MBB->addSuccessor(loop1MBB); ++ loop2MBB->addSuccessor(exitMBB); ++ loop2MBB->normalizeSuccProbs(); ++ ++ // loop1MBB: ++ // ll dest, 0(ptr) ++ // bne dest, oldval, exitMBB ++ BuildMI(loop1MBB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0); ++ BuildMI(loop1MBB, DL, TII->get(BNE)) ++ .addReg(Dest, RegState::Kill).addReg(OldVal).addMBB(exitMBB); ++ ++ // loop2MBB: ++ // move scratch, NewVal ++ // sc Scratch, Scratch, 0(ptr) ++ // beq Scratch, $0, loop1MBB ++ BuildMI(loop2MBB, DL, TII->get(MOVE), Scratch).addReg(NewVal).addReg(ZERO); ++ BuildMI(loop2MBB, DL, TII->get(SC), Scratch) ++ .addReg(Scratch).addReg(Ptr).addImm(0); ++ BuildMI(loop2MBB, DL, TII->get(BEQ)) ++ .addReg(Scratch, RegState::Kill).addReg(ZERO).addMBB(loop1MBB); ++ ++ if (!hasDbar(exitMBB)) { ++ MachineBasicBlock::iterator Pos = exitMBB->begin(); ++ BuildMI(*exitMBB, Pos, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); ++ } ++ ++ LivePhysRegs LiveRegs; ++ computeAndAddLiveIns(LiveRegs, *loop1MBB); ++ computeAndAddLiveIns(LiveRegs, *loop2MBB); ++ computeAndAddLiveIns(LiveRegs, *exitMBB); ++ ++ NMBBI = BB.end(); ++ I->eraseFromParent(); ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandXINSERT_FWOp( ++ MachineBasicBlock &BB, MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI) { ++ ++ MachineFunction *MF = BB.getParent(); ++ ++ DebugLoc DL = I->getDebugLoc(); ++ ++ unsigned isGP64 = 0; ++ switch (I->getOpcode()) { ++ case LoongArch::XINSERT_FW_VIDX64_PSEUDO_POSTRA: ++ isGP64 = 1; ++ break; ++ case LoongArch::XINSERT_FW_VIDX_PSEUDO_POSTRA: ++ break; ++ default: ++ llvm_unreachable("Unknown subword vector pseudo for expansion!"); ++ } ++ ++ unsigned Dest = I->getOperand(0).getReg(); ++ unsigned SrcVecReg = I->getOperand(1).getReg(); ++ unsigned LaneReg = I->getOperand(2).getReg(); ++ unsigned SrcValReg = I->getOperand(3).getReg(); ++ ++ unsigned Dsttmp = I->getOperand(4).getReg(); ++ unsigned RI = I->getOperand(5).getReg(); ++ unsigned RJ = I->getOperand(6).getReg(); ++ Dsttmp = SrcVecReg; ++ ++ const BasicBlock *LLVM_BB = BB.getBasicBlock(); ++ MachineBasicBlock *blocks[11]; ++ MachineFunction::iterator It = ++BB.getIterator(); ++ for (int i = 0; i < 11; i++) { ++ blocks[i] = MF->CreateMachineBasicBlock(LLVM_BB); ++ MF->insert(It, blocks[i]); ++ } ++ ++ MachineBasicBlock *mainMBB = blocks[0]; ++ MachineBasicBlock *FirstMBB = blocks[1]; ++ MachineBasicBlock *sinkMBB = blocks[9]; ++ MachineBasicBlock *exitMBB = blocks[10]; ++ ++ exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); ++ exitMBB->transferSuccessorsAndUpdatePHIs(&BB); ++ ++ BB.addSuccessor(mainMBB, BranchProbability::getOne()); ++ for (int i = 1; i < 9; i++) { ++ mainMBB->addSuccessor(blocks[i]); ++ blocks[i]->addSuccessor(sinkMBB); ++ } ++ ++ unsigned ADDI, BLT, ZERO; ++ ADDI = isGP64 ? LoongArch::ADDI_D : LoongArch::ADDI_W; ++ BLT = isGP64 ? LoongArch::BLT : LoongArch::BLT32; ++ ZERO = isGP64 ? LoongArch::ZERO_64 : LoongArch::ZERO; ++ ++ for (int i = 1; i < 8; i++) { ++ BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(i); ++ BuildMI(mainMBB, DL, TII->get(BLT)) ++ .addReg(LaneReg) ++ .addReg(RI) ++ .addMBB(blocks[i + 1]); ++ } ++ ++ BuildMI(mainMBB, DL, TII->get(LoongArch::B32)).addMBB(FirstMBB); ++ ++ BuildMI(FirstMBB, DL, TII->get(LoongArch::XVINSGR2VR_W), Dsttmp) ++ .addReg(SrcVecReg) ++ .addReg(RJ) ++ .addImm(7); ++ BuildMI(FirstMBB, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ for (int i = 0; i < 7; i++) { ++ BuildMI(blocks[i + 2], DL, TII->get(LoongArch::XVINSGR2VR_W), Dsttmp) ++ .addReg(SrcVecReg) ++ .addReg(RJ) ++ .addImm(i); ++ BuildMI(blocks[i + 2], DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ } ++ ++ sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); ++ BuildMI(sinkMBB, DL, TII->get(LoongArch::XVORI_B), Dest) ++ .addReg(Dsttmp) ++ .addImm(0); ++ ++ LivePhysRegs LiveRegs; ++ for (int i = 0; i < 11; i++) { ++ computeAndAddLiveIns(LiveRegs, *blocks[i]); ++ } ++ ++ NMBBI = BB.end(); ++ I->eraseFromParent(); ++ ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandINSERT_HOp( ++ MachineBasicBlock &BB, MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI) { ++ ++ MachineFunction *MF = BB.getParent(); ++ ++ DebugLoc DL = I->getDebugLoc(); ++ ++ unsigned isGP64 = 0; ++ switch (I->getOpcode()) { ++ case LoongArch::INSERT_H_VIDX64_PSEUDO_POSTRA: ++ isGP64 = 1; ++ break; ++ default: ++ llvm_unreachable("Unknown subword vector pseudo for expansion!"); ++ } ++ ++ unsigned Dest = I->getOperand(0).getReg(); ++ unsigned SrcVecReg = I->getOperand(1).getReg(); ++ unsigned LaneReg = I->getOperand(2).getReg(); ++ unsigned SrcValReg = I->getOperand(3).getReg(); ++ ++ unsigned Dsttmp = I->getOperand(4).getReg(); ++ unsigned RI = I->getOperand(5).getReg(); ++ Dsttmp = SrcVecReg; ++ ++ const BasicBlock *LLVM_BB = BB.getBasicBlock(); ++ MachineBasicBlock *blocks[11]; ++ MachineFunction::iterator It = ++BB.getIterator(); ++ for (int i = 0; i < 11; i++) { ++ blocks[i] = MF->CreateMachineBasicBlock(LLVM_BB); ++ MF->insert(It, blocks[i]); ++ } ++ ++ MachineBasicBlock *mainMBB = blocks[0]; ++ MachineBasicBlock *FirstMBB = blocks[1]; ++ MachineBasicBlock *sinkMBB = blocks[9]; ++ MachineBasicBlock *exitMBB = blocks[10]; ++ ++ exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); ++ exitMBB->transferSuccessorsAndUpdatePHIs(&BB); ++ ++ BB.addSuccessor(mainMBB, BranchProbability::getOne()); ++ for (int i = 1; i < 9; i++) { ++ mainMBB->addSuccessor(blocks[i]); ++ blocks[i]->addSuccessor(sinkMBB); ++ } ++ ++ unsigned ADDI, BLT, ZERO; ++ ADDI = isGP64 ? LoongArch::ADDI_D : LoongArch::ADDI_W; ++ BLT = isGP64 ? LoongArch::BLT : LoongArch::BLT32; ++ ZERO = isGP64 ? LoongArch::ZERO_64 : LoongArch::ZERO; ++ ++ for (int i = 1; i < 8; i++) { ++ BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(i); ++ BuildMI(mainMBB, DL, TII->get(BLT)) ++ .addReg(LaneReg) ++ .addReg(RI) ++ .addMBB(blocks[i + 1]); ++ } ++ ++ BuildMI(mainMBB, DL, TII->get(LoongArch::B32)).addMBB(FirstMBB); ++ ++ BuildMI(FirstMBB, DL, TII->get(LoongArch::VINSGR2VR_H), Dsttmp) ++ .addReg(SrcVecReg) ++ .addReg(SrcValReg) ++ .addImm(7); ++ BuildMI(FirstMBB, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ for (int i = 0; i < 7; i++) { ++ BuildMI(blocks[i + 2], DL, TII->get(LoongArch::VINSGR2VR_H), Dsttmp) ++ .addReg(SrcVecReg) ++ .addReg(SrcValReg) ++ .addImm(i); ++ BuildMI(blocks[i + 2], DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ } ++ ++ sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); ++ BuildMI(sinkMBB, DL, TII->get(LoongArch::VORI_B), Dest) ++ .addReg(Dsttmp) ++ .addImm(0); ++ ++ LivePhysRegs LiveRegs; ++ for (int i = 0; i < 11; i++) { ++ computeAndAddLiveIns(LiveRegs, *blocks[i]); ++ } ++ ++ NMBBI = BB.end(); ++ I->eraseFromParent(); ++ ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandXINSERT_BOp( ++ MachineBasicBlock &BB, MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI) { ++ ++ MachineFunction *MF = BB.getParent(); ++ ++ DebugLoc DL = I->getDebugLoc(); ++ ++ unsigned isGP64 = 0; ++ switch (I->getOpcode()) { ++ case LoongArch::XINSERT_B_VIDX64_PSEUDO_POSTRA: ++ isGP64 = 1; ++ break; ++ case LoongArch::XINSERT_B_VIDX_PSEUDO_POSTRA: ++ break; ++ default: ++ llvm_unreachable("Unknown subword vector pseudo for expansion!"); ++ } ++ ++ unsigned Dest = I->getOperand(0).getReg(); ++ unsigned SrcVecReg = I->getOperand(1).getReg(); ++ unsigned LaneReg = I->getOperand(2).getReg(); ++ unsigned SrcValReg = I->getOperand(3).getReg(); ++ ++ unsigned R4r = I->getOperand(5).getReg(); ++ unsigned Rib = I->getOperand(6).getReg(); ++ unsigned Ris = I->getOperand(7).getReg(); ++ unsigned R7b1 = I->getOperand(8).getReg(); ++ unsigned R7b2 = I->getOperand(9).getReg(); ++ unsigned R7b3 = I->getOperand(10).getReg(); ++ unsigned R7r80_3 = I->getOperand(11).getReg(); ++ unsigned R7r80l_3 = I->getOperand(12).getReg(); ++ unsigned R7r81_3 = I->getOperand(13).getReg(); ++ unsigned R7r81l_3 = I->getOperand(14).getReg(); ++ unsigned R7r82_3 = I->getOperand(15).getReg(); ++ unsigned R7r82l_3 = I->getOperand(16).getReg(); ++ unsigned RI = I->getOperand(17).getReg(); ++ unsigned tmp_Dst73 = I->getOperand(18).getReg(); ++ unsigned Rimm = I->getOperand(19).getReg(); ++ unsigned R70 = I->getOperand(20).getReg(); ++ tmp_Dst73 = SrcVecReg; ++ ++ const BasicBlock *LLVM_BB = BB.getBasicBlock(); ++ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SevenMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SevenMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SevenMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SevenMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SevenMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ZeroMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ZeroMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ZeroMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ZeroMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ZeroMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *OneMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *OneMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *OneMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *OneMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *OneMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *TwoMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *TwoMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *TwoMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *TwoMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *TwoMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ThreeMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ThreeMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ThreeMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ThreeMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *ThreeMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FourMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FourMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FourMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FourMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FourMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FiveMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FiveMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FiveMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FiveMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *FiveMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SixMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SixMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SixMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SixMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *SixMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineFunction::iterator It = ++BB.getIterator(); ++ MF->insert(It, mainMBB); ++ MF->insert(It, SevenMBB); ++ MF->insert(It, SevenMBB3); ++ MF->insert(It, SevenMBB0); ++ MF->insert(It, SevenMBB1); ++ MF->insert(It, SevenMBB2); ++ MF->insert(It, ZeroMBB); ++ MF->insert(It, ZeroMBB3); ++ MF->insert(It, ZeroMBB0); ++ MF->insert(It, ZeroMBB1); ++ MF->insert(It, ZeroMBB2); ++ MF->insert(It, OneMBB); ++ MF->insert(It, OneMBB3); ++ MF->insert(It, OneMBB0); ++ MF->insert(It, OneMBB1); ++ MF->insert(It, OneMBB2); ++ MF->insert(It, TwoMBB); ++ MF->insert(It, TwoMBB3); ++ MF->insert(It, TwoMBB0); ++ MF->insert(It, TwoMBB1); ++ MF->insert(It, TwoMBB2); ++ MF->insert(It, ThreeMBB); ++ MF->insert(It, ThreeMBB3); ++ MF->insert(It, ThreeMBB0); ++ MF->insert(It, ThreeMBB1); ++ MF->insert(It, ThreeMBB2); ++ MF->insert(It, FourMBB); ++ MF->insert(It, FourMBB3); ++ MF->insert(It, FourMBB0); ++ MF->insert(It, FourMBB1); ++ MF->insert(It, FourMBB2); ++ MF->insert(It, FiveMBB); ++ MF->insert(It, FiveMBB3); ++ MF->insert(It, FiveMBB0); ++ MF->insert(It, FiveMBB1); ++ MF->insert(It, FiveMBB2); ++ MF->insert(It, SixMBB); ++ MF->insert(It, SixMBB3); ++ MF->insert(It, SixMBB0); ++ MF->insert(It, SixMBB1); ++ MF->insert(It, SixMBB2); ++ MF->insert(It, sinkMBB); ++ MF->insert(It, exitMBB); ++ ++ exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); ++ exitMBB->transferSuccessorsAndUpdatePHIs(&BB); ++ ++ BB.addSuccessor(mainMBB, BranchProbability::getOne()); ++ mainMBB->addSuccessor(SevenMBB); ++ mainMBB->addSuccessor(ZeroMBB); ++ mainMBB->addSuccessor(OneMBB); ++ mainMBB->addSuccessor(TwoMBB); ++ mainMBB->addSuccessor(ThreeMBB); ++ mainMBB->addSuccessor(FourMBB); ++ mainMBB->addSuccessor(FiveMBB); ++ mainMBB->addSuccessor(SixMBB); ++ SevenMBB->addSuccessor(SevenMBB0); ++ SevenMBB->addSuccessor(SevenMBB1); ++ SevenMBB->addSuccessor(SevenMBB2); ++ SevenMBB->addSuccessor(SevenMBB3); ++ SevenMBB0->addSuccessor(sinkMBB); ++ SevenMBB1->addSuccessor(sinkMBB); ++ SevenMBB2->addSuccessor(sinkMBB); ++ SevenMBB3->addSuccessor(sinkMBB); ++ ZeroMBB->addSuccessor(ZeroMBB0); ++ ZeroMBB->addSuccessor(ZeroMBB1); ++ ZeroMBB->addSuccessor(ZeroMBB2); ++ ZeroMBB->addSuccessor(ZeroMBB3); ++ ZeroMBB0->addSuccessor(sinkMBB); ++ ZeroMBB1->addSuccessor(sinkMBB); ++ ZeroMBB2->addSuccessor(sinkMBB); ++ ZeroMBB3->addSuccessor(sinkMBB); ++ OneMBB->addSuccessor(OneMBB0); ++ OneMBB->addSuccessor(OneMBB1); ++ OneMBB->addSuccessor(OneMBB2); ++ OneMBB->addSuccessor(OneMBB3); ++ OneMBB0->addSuccessor(sinkMBB); ++ OneMBB1->addSuccessor(sinkMBB); ++ OneMBB2->addSuccessor(sinkMBB); ++ OneMBB3->addSuccessor(sinkMBB); ++ TwoMBB->addSuccessor(TwoMBB0); ++ TwoMBB->addSuccessor(TwoMBB1); ++ TwoMBB->addSuccessor(TwoMBB2); ++ TwoMBB->addSuccessor(TwoMBB3); ++ TwoMBB0->addSuccessor(sinkMBB); ++ TwoMBB1->addSuccessor(sinkMBB); ++ TwoMBB2->addSuccessor(sinkMBB); ++ TwoMBB3->addSuccessor(sinkMBB); ++ ThreeMBB->addSuccessor(ThreeMBB0); ++ ThreeMBB->addSuccessor(ThreeMBB1); ++ ThreeMBB->addSuccessor(ThreeMBB2); ++ ThreeMBB->addSuccessor(ThreeMBB3); ++ ThreeMBB0->addSuccessor(sinkMBB); ++ ThreeMBB1->addSuccessor(sinkMBB); ++ ThreeMBB2->addSuccessor(sinkMBB); ++ ThreeMBB3->addSuccessor(sinkMBB); ++ FourMBB->addSuccessor(FourMBB0); ++ FourMBB->addSuccessor(FourMBB1); ++ FourMBB->addSuccessor(FourMBB2); ++ FourMBB->addSuccessor(FourMBB3); ++ FourMBB0->addSuccessor(sinkMBB); ++ FourMBB1->addSuccessor(sinkMBB); ++ FourMBB2->addSuccessor(sinkMBB); ++ FourMBB3->addSuccessor(sinkMBB); ++ FiveMBB->addSuccessor(FiveMBB0); ++ FiveMBB->addSuccessor(FiveMBB1); ++ FiveMBB->addSuccessor(FiveMBB2); ++ FiveMBB->addSuccessor(FiveMBB3); ++ FiveMBB0->addSuccessor(sinkMBB); ++ FiveMBB1->addSuccessor(sinkMBB); ++ FiveMBB2->addSuccessor(sinkMBB); ++ FiveMBB3->addSuccessor(sinkMBB); ++ SixMBB->addSuccessor(SixMBB0); ++ SixMBB->addSuccessor(SixMBB1); ++ SixMBB->addSuccessor(SixMBB2); ++ SixMBB->addSuccessor(SixMBB3); ++ SixMBB0->addSuccessor(sinkMBB); ++ SixMBB1->addSuccessor(sinkMBB); ++ SixMBB2->addSuccessor(sinkMBB); ++ SixMBB3->addSuccessor(sinkMBB); ++ ++ unsigned SRLI, ADDI, OR, MOD, BLT, ZERO; ++ SRLI = isGP64 ? LoongArch::SRLI_D : LoongArch::SRLI_W; ++ ADDI = isGP64 ? LoongArch::ADDI_D : LoongArch::ADDI_W; ++ OR = isGP64 ? LoongArch::OR : LoongArch::OR32; ++ MOD = isGP64 ? LoongArch::MOD_DU : LoongArch::MOD_WU; ++ BLT = isGP64 ? LoongArch::BLT : LoongArch::BLT32; ++ ZERO = isGP64 ? LoongArch::ZERO_64 : LoongArch::ZERO; ++ ++ BuildMI(mainMBB, DL, TII->get(SRLI), Rimm).addReg(LaneReg).addImm(2); ++ BuildMI(mainMBB, DL, TII->get(ADDI), R4r).addReg(ZERO).addImm(4); ++ BuildMI(mainMBB, DL, TII->get(OR), Rib).addReg(Rimm).addReg(ZERO); ++ BuildMI(mainMBB, DL, TII->get(MOD), Ris).addReg(Rib).addReg(R4r); ++ BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(1); ++ BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(ZeroMBB); ++ BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(2); ++ BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(OneMBB); ++ BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(3); ++ BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(TwoMBB); ++ BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(4); ++ BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(ThreeMBB); ++ BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(5); ++ BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(FourMBB); ++ BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(6); ++ BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(FiveMBB); ++ BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(7); ++ BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(SixMBB); ++ BuildMI(mainMBB, DL, TII->get(LoongArch::B32)).addMBB(SevenMBB); ++ ++ BuildMI(SevenMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) ++ .addReg(SrcVecReg) ++ .addImm(7); ++ BuildMI(SevenMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); ++ BuildMI(SevenMBB, DL, TII->get(BLT)) ++ .addReg(Ris) ++ .addReg(R7b1) ++ .addMBB(SevenMBB0); ++ BuildMI(SevenMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); ++ BuildMI(SevenMBB, DL, TII->get(BLT)) ++ .addReg(Ris) ++ .addReg(R7b2) ++ .addMBB(SevenMBB1); ++ BuildMI(SevenMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); ++ BuildMI(SevenMBB, DL, TII->get(BLT)) ++ .addReg(Ris) ++ .addReg(R7b3) ++ .addMBB(SevenMBB2); ++ BuildMI(SevenMBB, DL, TII->get(LoongArch::B32)).addMBB(SevenMBB3); ++ ++ BuildMI(SevenMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(SevenMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) ++ .addImm(0x00fff); ++ BuildMI(SevenMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(SevenMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(SevenMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80_3); ++ BuildMI(SevenMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(7); ++ BuildMI(SevenMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(SevenMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(SevenMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(8); ++ BuildMI(SevenMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) ++ .addImm(0xff00f); ++ BuildMI(SevenMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(SevenMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(SevenMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(SevenMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(7); ++ BuildMI(SevenMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(SevenMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(SevenMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(16); ++ BuildMI(SevenMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) ++ .addImm(0xffff0); ++ BuildMI(SevenMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0x0ff); ++ BuildMI(SevenMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(SevenMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(SevenMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(7); ++ BuildMI(SevenMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(SevenMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(SevenMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(24); ++ BuildMI(SevenMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) ++ .addImm(0xfffff); ++ BuildMI(SevenMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xf00); ++ BuildMI(SevenMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(SevenMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(SevenMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(7); ++ BuildMI(SevenMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(ZeroMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) ++ .addReg(SrcVecReg) ++ .addImm(0); ++ BuildMI(ZeroMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); ++ BuildMI(ZeroMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(ZeroMBB0); ++ BuildMI(ZeroMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); ++ BuildMI(ZeroMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(ZeroMBB1); ++ BuildMI(ZeroMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); ++ BuildMI(ZeroMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(ZeroMBB2); ++ BuildMI(ZeroMBB, DL, TII->get(LoongArch::B32)).addMBB(ZeroMBB3); ++ ++ BuildMI(ZeroMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(ZeroMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); ++ BuildMI(ZeroMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(ZeroMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(ZeroMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80_3); ++ BuildMI(ZeroMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(0); ++ BuildMI(ZeroMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(ZeroMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(ZeroMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(8); ++ BuildMI(ZeroMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); ++ BuildMI(ZeroMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(ZeroMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(ZeroMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(ZeroMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(0); ++ BuildMI(ZeroMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(ZeroMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(ZeroMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(16); ++ BuildMI(ZeroMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); ++ BuildMI(ZeroMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0x0ff); ++ BuildMI(ZeroMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(ZeroMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(ZeroMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(0); ++ BuildMI(ZeroMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(ZeroMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(ZeroMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(24); ++ BuildMI(ZeroMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); ++ BuildMI(ZeroMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xf00); ++ BuildMI(ZeroMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(ZeroMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(ZeroMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(0); ++ BuildMI(ZeroMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(OneMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) ++ .addReg(SrcVecReg) ++ .addImm(1); ++ BuildMI(OneMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); ++ BuildMI(OneMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(OneMBB0); ++ BuildMI(OneMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); ++ BuildMI(OneMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(OneMBB1); ++ BuildMI(OneMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); ++ BuildMI(OneMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(OneMBB2); ++ BuildMI(OneMBB, DL, TII->get(LoongArch::B32)).addMBB(OneMBB3); ++ ++ BuildMI(OneMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(OneMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); ++ BuildMI(OneMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(OneMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(OneMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80_3); ++ BuildMI(OneMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(1); ++ BuildMI(OneMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(OneMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(OneMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(8); ++ BuildMI(OneMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); ++ BuildMI(OneMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(OneMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(OneMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(OneMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(1); ++ BuildMI(OneMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(OneMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(OneMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(16); ++ BuildMI(OneMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); ++ BuildMI(OneMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0x0ff); ++ BuildMI(OneMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(OneMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(OneMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(1); ++ BuildMI(OneMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(OneMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(OneMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(24); ++ BuildMI(OneMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); ++ BuildMI(OneMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xf00); ++ BuildMI(OneMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(OneMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(OneMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(1); ++ BuildMI(OneMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(TwoMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) ++ .addReg(SrcVecReg) ++ .addImm(2); ++ BuildMI(TwoMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); ++ BuildMI(TwoMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(TwoMBB0); ++ BuildMI(TwoMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); ++ BuildMI(TwoMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(TwoMBB1); ++ BuildMI(TwoMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); ++ BuildMI(TwoMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(TwoMBB2); ++ BuildMI(TwoMBB, DL, TII->get(LoongArch::B32)).addMBB(TwoMBB3); ++ ++ BuildMI(TwoMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(TwoMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); ++ BuildMI(TwoMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(TwoMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(TwoMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80_3); ++ BuildMI(TwoMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(2); ++ BuildMI(TwoMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(TwoMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(TwoMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(8); ++ BuildMI(TwoMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); ++ BuildMI(TwoMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(TwoMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(TwoMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(TwoMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(2); ++ BuildMI(TwoMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(TwoMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(TwoMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(16); ++ BuildMI(TwoMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); ++ BuildMI(TwoMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0x0ff); ++ BuildMI(TwoMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(TwoMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(TwoMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(2); ++ BuildMI(TwoMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(TwoMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(TwoMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(24); ++ BuildMI(TwoMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); ++ BuildMI(TwoMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xf00); ++ BuildMI(TwoMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(TwoMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(TwoMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(2); ++ BuildMI(TwoMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(ThreeMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) ++ .addReg(SrcVecReg) ++ .addImm(3); ++ BuildMI(ThreeMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); ++ BuildMI(ThreeMBB, DL, TII->get(BLT)) ++ .addReg(Ris) ++ .addReg(R7b1) ++ .addMBB(ThreeMBB0); ++ BuildMI(ThreeMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); ++ BuildMI(ThreeMBB, DL, TII->get(BLT)) ++ .addReg(Ris) ++ .addReg(R7b2) ++ .addMBB(ThreeMBB1); ++ BuildMI(ThreeMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); ++ BuildMI(ThreeMBB, DL, TII->get(BLT)) ++ .addReg(Ris) ++ .addReg(R7b3) ++ .addMBB(ThreeMBB2); ++ BuildMI(ThreeMBB, DL, TII->get(LoongArch::B32)).addMBB(ThreeMBB3); ++ ++ BuildMI(ThreeMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(ThreeMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) ++ .addImm(0x00fff); ++ BuildMI(ThreeMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(ThreeMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(ThreeMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80_3); ++ BuildMI(ThreeMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(3); ++ BuildMI(ThreeMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(ThreeMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(ThreeMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(8); ++ BuildMI(ThreeMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) ++ .addImm(0xff00f); ++ BuildMI(ThreeMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(ThreeMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(ThreeMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(ThreeMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(3); ++ BuildMI(ThreeMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(ThreeMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(ThreeMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(16); ++ BuildMI(ThreeMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) ++ .addImm(0xffff0); ++ BuildMI(ThreeMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0x0ff); ++ BuildMI(ThreeMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(ThreeMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(ThreeMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(3); ++ BuildMI(ThreeMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(ThreeMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(ThreeMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(24); ++ BuildMI(ThreeMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) ++ .addImm(0xfffff); ++ BuildMI(ThreeMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xf00); ++ BuildMI(ThreeMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(ThreeMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(ThreeMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(3); ++ BuildMI(ThreeMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(FourMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) ++ .addReg(SrcVecReg) ++ .addImm(4); ++ BuildMI(FourMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); ++ BuildMI(FourMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(FourMBB0); ++ BuildMI(FourMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); ++ BuildMI(FourMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(FourMBB1); ++ BuildMI(FourMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); ++ BuildMI(FourMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(FourMBB2); ++ BuildMI(FourMBB, DL, TII->get(LoongArch::B32)).addMBB(FourMBB3); ++ ++ BuildMI(FourMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(FourMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); ++ BuildMI(FourMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(FourMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(FourMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80_3); ++ BuildMI(FourMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(4); ++ BuildMI(FourMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(FourMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(FourMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(8); ++ BuildMI(FourMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); ++ BuildMI(FourMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(FourMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(FourMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(FourMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(4); ++ BuildMI(FourMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(FourMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(FourMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(16); ++ BuildMI(FourMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); ++ BuildMI(FourMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0x0ff); ++ BuildMI(FourMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(FourMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(FourMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(4); ++ BuildMI(FourMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(FourMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(FourMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(24); ++ BuildMI(FourMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); ++ BuildMI(FourMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xf00); ++ BuildMI(FourMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(FourMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(FourMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(4); ++ BuildMI(FourMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(FiveMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) ++ .addReg(SrcVecReg) ++ .addImm(5); ++ BuildMI(FiveMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); ++ BuildMI(FiveMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(FiveMBB0); ++ BuildMI(FiveMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); ++ BuildMI(FiveMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(FiveMBB1); ++ BuildMI(FiveMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); ++ BuildMI(FiveMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(FiveMBB2); ++ BuildMI(FiveMBB, DL, TII->get(LoongArch::B32)).addMBB(FiveMBB3); ++ ++ BuildMI(FiveMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(FiveMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); ++ BuildMI(FiveMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(FiveMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(FiveMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80_3); ++ BuildMI(FiveMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(5); ++ BuildMI(FiveMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(FiveMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(FiveMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(8); ++ BuildMI(FiveMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); ++ BuildMI(FiveMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(FiveMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(FiveMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(FiveMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(5); ++ BuildMI(FiveMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(FiveMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(FiveMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(16); ++ BuildMI(FiveMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); ++ BuildMI(FiveMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0x0ff); ++ BuildMI(FiveMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(FiveMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(FiveMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(5); ++ BuildMI(FiveMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(FiveMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(FiveMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(24); ++ BuildMI(FiveMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); ++ BuildMI(FiveMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xf00); ++ BuildMI(FiveMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(FiveMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(FiveMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(5); ++ BuildMI(FiveMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(SixMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) ++ .addReg(SrcVecReg) ++ .addImm(6); ++ BuildMI(SixMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); ++ BuildMI(SixMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(SixMBB0); ++ BuildMI(SixMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); ++ BuildMI(SixMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(SixMBB1); ++ BuildMI(SixMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); ++ BuildMI(SixMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(SixMBB2); ++ BuildMI(SixMBB, DL, TII->get(LoongArch::B32)).addMBB(SixMBB3); ++ ++ BuildMI(SixMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(SixMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); ++ BuildMI(SixMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(SixMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(SixMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80_3); ++ BuildMI(SixMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(6); ++ BuildMI(SixMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(SixMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(SixMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(8); ++ BuildMI(SixMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); ++ BuildMI(SixMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xfff); ++ BuildMI(SixMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(SixMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(SixMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(6); ++ BuildMI(SixMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(SixMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(SixMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(16); ++ BuildMI(SixMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); ++ BuildMI(SixMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0x0ff); ++ BuildMI(SixMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(SixMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(SixMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(6); ++ BuildMI(SixMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ BuildMI(SixMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) ++ .addReg(SrcValReg) ++ .addImm(24); ++ BuildMI(SixMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) ++ .addReg(R7r80_3) ++ .addImm(24); ++ BuildMI(SixMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); ++ BuildMI(SixMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) ++ .addReg(R7r81l_3) ++ .addImm(0xf00); ++ BuildMI(SixMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) ++ .addReg(R70) ++ .addReg(R7r81_3); ++ BuildMI(SixMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) ++ .addReg(R7r82l_3) ++ .addReg(R7r80l_3); ++ BuildMI(SixMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) ++ .addReg(SrcVecReg) ++ .addReg(R7r82_3) ++ .addImm(6); ++ BuildMI(SixMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); ++ ++ sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); ++ ++ BuildMI(sinkMBB, DL, TII->get(LoongArch::XVORI_B), Dest) ++ .addReg(tmp_Dst73) ++ .addImm(0); ++ ++ LivePhysRegs LiveRegs; ++ computeAndAddLiveIns(LiveRegs, *mainMBB); ++ computeAndAddLiveIns(LiveRegs, *SevenMBB); ++ computeAndAddLiveIns(LiveRegs, *SevenMBB0); ++ computeAndAddLiveIns(LiveRegs, *SevenMBB1); ++ computeAndAddLiveIns(LiveRegs, *SevenMBB2); ++ computeAndAddLiveIns(LiveRegs, *SevenMBB3); ++ computeAndAddLiveIns(LiveRegs, *ZeroMBB); ++ computeAndAddLiveIns(LiveRegs, *ZeroMBB0); ++ computeAndAddLiveIns(LiveRegs, *ZeroMBB1); ++ computeAndAddLiveIns(LiveRegs, *ZeroMBB2); ++ computeAndAddLiveIns(LiveRegs, *ZeroMBB3); ++ computeAndAddLiveIns(LiveRegs, *OneMBB); ++ computeAndAddLiveIns(LiveRegs, *OneMBB0); ++ computeAndAddLiveIns(LiveRegs, *OneMBB1); ++ computeAndAddLiveIns(LiveRegs, *OneMBB2); ++ computeAndAddLiveIns(LiveRegs, *OneMBB3); ++ computeAndAddLiveIns(LiveRegs, *TwoMBB); ++ computeAndAddLiveIns(LiveRegs, *TwoMBB0); ++ computeAndAddLiveIns(LiveRegs, *TwoMBB1); ++ computeAndAddLiveIns(LiveRegs, *TwoMBB2); ++ computeAndAddLiveIns(LiveRegs, *TwoMBB3); ++ computeAndAddLiveIns(LiveRegs, *ThreeMBB); ++ computeAndAddLiveIns(LiveRegs, *ThreeMBB0); ++ computeAndAddLiveIns(LiveRegs, *ThreeMBB1); ++ computeAndAddLiveIns(LiveRegs, *ThreeMBB2); ++ computeAndAddLiveIns(LiveRegs, *ThreeMBB3); ++ computeAndAddLiveIns(LiveRegs, *FourMBB); ++ computeAndAddLiveIns(LiveRegs, *FourMBB0); ++ computeAndAddLiveIns(LiveRegs, *FourMBB1); ++ computeAndAddLiveIns(LiveRegs, *FourMBB2); ++ computeAndAddLiveIns(LiveRegs, *FourMBB3); ++ computeAndAddLiveIns(LiveRegs, *FiveMBB); ++ computeAndAddLiveIns(LiveRegs, *FiveMBB0); ++ computeAndAddLiveIns(LiveRegs, *FiveMBB1); ++ computeAndAddLiveIns(LiveRegs, *FiveMBB2); ++ computeAndAddLiveIns(LiveRegs, *FiveMBB3); ++ computeAndAddLiveIns(LiveRegs, *SixMBB); ++ computeAndAddLiveIns(LiveRegs, *SixMBB0); ++ computeAndAddLiveIns(LiveRegs, *SixMBB1); ++ computeAndAddLiveIns(LiveRegs, *SixMBB2); ++ computeAndAddLiveIns(LiveRegs, *SixMBB3); ++ computeAndAddLiveIns(LiveRegs, *sinkMBB); ++ computeAndAddLiveIns(LiveRegs, *exitMBB); ++ ++ NMBBI = BB.end(); ++ I->eraseFromParent(); ++ ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandAtomicBinOpSubword( ++ MachineBasicBlock &BB, MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI) { ++ ++ MachineFunction *MF = BB.getParent(); ++ ++ DebugLoc DL = I->getDebugLoc(); ++ unsigned LL, SC; ++ unsigned BEQ = LoongArch::BEQ32; ++ unsigned SEOp = LoongArch::EXT_W_H32; ++ ++ LL = LoongArch::LL_W; ++ SC = LoongArch::SC_W; ++ ++ bool IsSwap = false; ++ bool IsNand = false; ++ bool IsMAX = false; ++ bool IsMIN = false; ++ bool IsUnsigned = false; ++ ++ unsigned Opcode = 0; ++ switch (I->getOpcode()) { ++ case LoongArch::ATOMIC_LOAD_NAND_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_NAND_I16_POSTRA: ++ IsNand = true; ++ break; ++ case LoongArch::ATOMIC_SWAP_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_SWAP_I16_POSTRA: ++ IsSwap = true; ++ break; ++ case LoongArch::ATOMIC_LOAD_ADD_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_ADD_I16_POSTRA: ++ Opcode = LoongArch::ADD_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_MAX_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_MAX_I16_POSTRA: ++ Opcode = LoongArch::AMMAX_DB_W; ++ IsMAX = true; ++ break; ++ case LoongArch::ATOMIC_LOAD_MIN_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_MIN_I16_POSTRA: ++ Opcode = LoongArch::AMMIN_DB_W; ++ IsMIN = true; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMAX_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_UMAX_I16_POSTRA: ++ Opcode = LoongArch::AMMAX_DB_WU; ++ IsMAX = true; ++ IsUnsigned = true; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMIN_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_UMIN_I16_POSTRA: ++ Opcode = LoongArch::AMMIN_DB_WU; ++ IsMIN = true; ++ IsUnsigned = true; ++ break; ++ case LoongArch::ATOMIC_LOAD_SUB_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_SUB_I16_POSTRA: ++ Opcode = LoongArch::SUB_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_AND_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_AND_I16_POSTRA: ++ Opcode = LoongArch::AND32; ++ break; ++ case LoongArch::ATOMIC_LOAD_OR_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_OR_I16_POSTRA: ++ Opcode = LoongArch::OR32; ++ break; ++ case LoongArch::ATOMIC_LOAD_XOR_I8_POSTRA: ++ SEOp = LoongArch::EXT_W_B32; ++ LLVM_FALLTHROUGH; ++ case LoongArch::ATOMIC_LOAD_XOR_I16_POSTRA: ++ Opcode = LoongArch::XOR32; ++ break; ++ default: ++ llvm_unreachable("Unknown subword atomic pseudo for expansion!"); ++ } ++ ++ unsigned Dest = I->getOperand(0).getReg(); ++ unsigned Ptr = I->getOperand(1).getReg(); ++ unsigned Incr = I->getOperand(2).getReg(); ++ unsigned Mask = I->getOperand(3).getReg(); ++ unsigned Mask2 = I->getOperand(4).getReg(); ++ unsigned ShiftAmnt = I->getOperand(5).getReg(); ++ unsigned OldVal = I->getOperand(6).getReg(); ++ unsigned BinOpRes = I->getOperand(7).getReg(); ++ unsigned StoreVal = I->getOperand(8).getReg(); ++ ++ const BasicBlock *LLVM_BB = BB.getBasicBlock(); ++ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineFunction::iterator It = ++BB.getIterator(); ++ MF->insert(It, loopMBB); ++ MF->insert(It, sinkMBB); ++ MF->insert(It, exitMBB); ++ ++ exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); ++ exitMBB->transferSuccessorsAndUpdatePHIs(&BB); ++ ++ BB.addSuccessor(loopMBB, BranchProbability::getOne()); ++ loopMBB->addSuccessor(sinkMBB); ++ loopMBB->addSuccessor(loopMBB); ++ loopMBB->normalizeSuccProbs(); ++ ++ BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0); ++ if (IsNand) { ++ // and andres, oldval, incr2 ++ // nor binopres, $0, andres ++ // and newval, binopres, mask ++ BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes) ++ .addReg(OldVal) ++ .addReg(Incr); ++ BuildMI(loopMBB, DL, TII->get(LoongArch::NOR32), BinOpRes) ++ .addReg(LoongArch::ZERO) ++ .addReg(BinOpRes); ++ BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes) ++ .addReg(BinOpRes) ++ .addReg(Mask); ++ } else if (IsMAX || IsMIN) { ++ ++ unsigned SLTScratch4 = IsUnsigned ? LoongArch::SLTU32 : LoongArch::SLT32; ++ unsigned CMPIncr = IsMAX ? LoongArch::MASKEQZ32 : LoongArch::MASKNEZ32; ++ unsigned CMPOldVal = IsMAX ? LoongArch::MASKNEZ32 : LoongArch::MASKEQZ32; ++ ++ unsigned Scratch4 = I->getOperand(9).getReg(); ++ unsigned Scratch5 = I->getOperand(10).getReg(); ++ ++ BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), Scratch5) ++ .addReg(OldVal) ++ .addReg(Mask); ++ BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), Incr) ++ .addReg(Incr) ++ .addReg(Mask); ++ BuildMI(loopMBB, DL, TII->get(SLTScratch4), Scratch4) ++ .addReg(Scratch5) ++ .addReg(Incr); ++ BuildMI(loopMBB, DL, TII->get(CMPOldVal), BinOpRes) ++ .addReg(Scratch5) ++ .addReg(Scratch4); ++ BuildMI(loopMBB, DL, TII->get(CMPIncr), Scratch4) ++ .addReg(Incr) ++ .addReg(Scratch4); ++ BuildMI(loopMBB, DL, TII->get(LoongArch::OR32), BinOpRes) ++ .addReg(BinOpRes) ++ .addReg(Scratch4); ++ ++ } else if (!IsSwap) { ++ // binopres, oldval, incr2 ++ // and newval, binopres, mask ++ BuildMI(loopMBB, DL, TII->get(Opcode), BinOpRes) ++ .addReg(OldVal) ++ .addReg(Incr); ++ BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes) ++ .addReg(BinOpRes) ++ .addReg(Mask); ++ } else { // atomic.swap ++ // and newval, incr2, mask ++ BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes) ++ .addReg(Incr) ++ .addReg(Mask); ++ } ++ ++ // and StoreVal, OlddVal, Mask2 ++ // or StoreVal, StoreVal, BinOpRes ++ // StoreVal = sc StoreVal, 0(Ptr) ++ // beq StoreVal, zero, loopMBB ++ BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), StoreVal) ++ .addReg(OldVal) ++ .addReg(Mask2); ++ BuildMI(loopMBB, DL, TII->get(LoongArch::OR32), StoreVal) ++ .addReg(StoreVal) ++ .addReg(BinOpRes); ++ BuildMI(loopMBB, DL, TII->get(SC), StoreVal) ++ .addReg(StoreVal) ++ .addReg(Ptr) ++ .addImm(0); ++ BuildMI(loopMBB, DL, TII->get(BEQ)) ++ .addReg(StoreVal) ++ .addReg(LoongArch::ZERO) ++ .addMBB(loopMBB); ++ ++ // sinkMBB: ++ // and maskedoldval1,oldval,mask ++ // srl srlres,maskedoldval1,shiftamt ++ // sign_extend dest,srlres ++ ++ sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); ++ ++ BuildMI(sinkMBB, DL, TII->get(LoongArch::AND32), Dest) ++ .addReg(OldVal) ++ .addReg(Mask); ++ BuildMI(sinkMBB, DL, TII->get(LoongArch::SRL_W), Dest) ++ .addReg(Dest) ++ .addReg(ShiftAmnt); ++ ++ BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest); ++ ++ LivePhysRegs LiveRegs; ++ computeAndAddLiveIns(LiveRegs, *loopMBB); ++ computeAndAddLiveIns(LiveRegs, *sinkMBB); ++ computeAndAddLiveIns(LiveRegs, *exitMBB); ++ ++ NMBBI = BB.end(); ++ I->eraseFromParent(); ++ ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI, ++ unsigned Size) { ++ MachineFunction *MF = BB.getParent(); ++ ++ DebugLoc DL = I->getDebugLoc(); ++ ++ unsigned LL, SC, ZERO, BEQ, SUB; ++ if (Size == 4) { ++ LL = LoongArch::LL_W; ++ SC = LoongArch::SC_W; ++ BEQ = LoongArch::BEQ32; ++ ZERO = LoongArch::ZERO; ++ SUB = LoongArch::SUB_W; ++ } else { ++ LL = LoongArch::LL_D; ++ SC = LoongArch::SC_D; ++ ZERO = LoongArch::ZERO_64; ++ BEQ = LoongArch::BEQ; ++ SUB = LoongArch::SUB_D; ++ } ++ ++ unsigned OldVal = I->getOperand(0).getReg(); ++ unsigned Ptr = I->getOperand(1).getReg(); ++ unsigned Incr = I->getOperand(2).getReg(); ++ unsigned Scratch = I->getOperand(3).getReg(); ++ ++ unsigned Opcode = 0; ++ unsigned OR = 0; ++ unsigned AND = 0; ++ unsigned NOR = 0; ++ bool IsNand = false; ++ bool IsSub = false; ++ switch (I->getOpcode()) { ++ case LoongArch::ATOMIC_LOAD_ADD_I32_POSTRA: ++ Opcode = LoongArch::AMADD_DB_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_SUB_I32_POSTRA: ++ IsSub = true; ++ Opcode = LoongArch::AMADD_DB_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_AND_I32_POSTRA: ++ Opcode = LoongArch::AMAND_DB_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_OR_I32_POSTRA: ++ Opcode = LoongArch::AMOR_DB_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_XOR_I32_POSTRA: ++ Opcode = LoongArch::AMXOR_DB_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_NAND_I32_POSTRA: ++ IsNand = true; ++ AND = LoongArch::AND32; ++ NOR = LoongArch::NOR32; ++ break; ++ case LoongArch::ATOMIC_SWAP_I32_POSTRA: ++ OR = LoongArch::AMSWAP_DB_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_MAX_I32_POSTRA: ++ Opcode = LoongArch::AMMAX_DB_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_MIN_I32_POSTRA: ++ Opcode = LoongArch::AMMIN_DB_W; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMAX_I32_POSTRA: ++ Opcode = LoongArch::AMMAX_DB_WU; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMIN_I32_POSTRA: ++ Opcode = LoongArch::AMMIN_DB_WU; ++ break; ++ case LoongArch::ATOMIC_LOAD_ADD_I64_POSTRA: ++ Opcode = LoongArch::AMADD_DB_D; ++ break; ++ case LoongArch::ATOMIC_LOAD_SUB_I64_POSTRA: ++ IsSub = true; ++ Opcode = LoongArch::AMADD_DB_D; ++ break; ++ case LoongArch::ATOMIC_LOAD_AND_I64_POSTRA: ++ Opcode = LoongArch::AMAND_DB_D; ++ break; ++ case LoongArch::ATOMIC_LOAD_OR_I64_POSTRA: ++ Opcode = LoongArch::AMOR_DB_D; ++ break; ++ case LoongArch::ATOMIC_LOAD_XOR_I64_POSTRA: ++ Opcode = LoongArch::AMXOR_DB_D; ++ break; ++ case LoongArch::ATOMIC_LOAD_NAND_I64_POSTRA: ++ IsNand = true; ++ AND = LoongArch::AND; ++ NOR = LoongArch::NOR; ++ break; ++ case LoongArch::ATOMIC_SWAP_I64_POSTRA: ++ OR = LoongArch::AMSWAP_DB_D; ++ break; ++ case LoongArch::ATOMIC_LOAD_MAX_I64_POSTRA: ++ Opcode = LoongArch::AMMAX_DB_D; ++ break; ++ case LoongArch::ATOMIC_LOAD_MIN_I64_POSTRA: ++ Opcode = LoongArch::AMMIN_DB_D; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMAX_I64_POSTRA: ++ Opcode = LoongArch::AMMAX_DB_DU; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMIN_I64_POSTRA: ++ Opcode = LoongArch::AMMIN_DB_DU; ++ break; ++ default: ++ llvm_unreachable("Unknown pseudo atomic!"); ++ } ++ ++ const BasicBlock *LLVM_BB = BB.getBasicBlock(); ++ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineFunction::iterator It = ++BB.getIterator(); ++ MF->insert(It, loopMBB); ++ MF->insert(It, exitMBB); ++ ++ exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); ++ exitMBB->transferSuccessorsAndUpdatePHIs(&BB); ++ ++ BB.addSuccessor(loopMBB, BranchProbability::getOne()); ++ loopMBB->addSuccessor(exitMBB); ++ loopMBB->addSuccessor(loopMBB); ++ loopMBB->normalizeSuccProbs(); ++ ++ assert((OldVal != Ptr) && "Clobbered the wrong ptr reg!"); ++ assert((OldVal != Incr) && "Clobbered the wrong reg!"); ++ if (Opcode) { ++ if(IsSub){ ++ BuildMI(loopMBB, DL, TII->get(SUB), Scratch).addReg(ZERO).addReg(Incr); ++ BuildMI(loopMBB, DL, TII->get(Opcode), OldVal).addReg(Scratch).addReg(Ptr).addImm(0); ++ } ++ else{ ++ BuildMI(loopMBB, DL, TII->get(Opcode), OldVal).addReg(Incr).addReg(Ptr).addImm(0); ++ } ++ } else if (IsNand) { ++ assert(AND && NOR && ++ "Unknown nand instruction for atomic pseudo expansion"); ++ BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0); ++ BuildMI(loopMBB, DL, TII->get(AND), Scratch).addReg(OldVal).addReg(Incr); ++ BuildMI(loopMBB, DL, TII->get(NOR), Scratch).addReg(ZERO).addReg(Scratch); ++ BuildMI(loopMBB, DL, TII->get(SC), Scratch).addReg(Scratch).addReg(Ptr).addImm(0); ++ BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(Scratch).addReg(ZERO).addMBB(loopMBB); ++ } else { ++ assert(OR && "Unknown instruction for atomic pseudo expansion!"); ++ BuildMI(loopMBB, DL, TII->get(OR), OldVal).addReg(Incr).addReg(Ptr).addImm(0); ++ } ++ ++ ++ NMBBI = BB.end(); ++ I->eraseFromParent(); ++ ++ LivePhysRegs LiveRegs; ++ computeAndAddLiveIns(LiveRegs, *loopMBB); ++ computeAndAddLiveIns(LiveRegs, *exitMBB); ++ ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandLoadAddr(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI) { ++ MachineFunction *MF = BB.getParent(); ++ MachineInstr &MI = *I; ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned Op = MI.getOpcode(); ++ unsigned DestReg = MI.getOperand(0).getReg(); ++ unsigned TmpReg; ++ const MachineOperand &MO = MI.getOperand(1); ++ Reloc::Model RM = MF->getTarget().getRelocationModel(); ++ ++ MachineInstrBuilder MIB1, MIB2, MIB3, MIB4, MIB5; ++ unsigned HiFlag, LoFlag, HigherFlag, HighestFlag; ++ unsigned HiOp, LoOp, HigherOp, HighestOp, LastOp; ++ ++ HiOp = LoongArch::PCALAU12I_ri; ++ LoOp = LoongArch::ORI_rri; ++ HigherOp = LoongArch::LU32I_D_ri; ++ HighestOp = LoongArch::LU52I_D_rri; ++ ++ switch (Op) { ++ case LoongArch::LoadAddrLocal: ++ if (RM == Reloc::Static) { // for jit ++ HiFlag = LoongArchII::MO_ABS_HI; ++ LoFlag = LoongArchII::MO_ABS_LO; ++ HigherFlag = LoongArchII::MO_ABS_HIGHER; ++ HighestFlag = LoongArchII::MO_ABS_HIGHEST; ++ // lu12i.w + ori + lu32i.d + lu52i.d ++ HiOp = LoongArch::LU12I_W; ++ LoOp = LoongArch::ORI; ++ HigherOp = LoongArch::LU32I_D; ++ HighestOp = LoongArch::LU52I_D; ++ } else { ++ // pcalau12i + addi.d ++ LoFlag = LoongArchII::MO_PCREL_LO; ++ HiFlag = LoongArchII::MO_PCREL_HI; ++ LoOp = LoongArch::ADDI_D_rri; ++ } ++ break; ++ case LoongArch::LoadAddrLocalRR: ++ // pcalau12i + ori + lu32i.d + lu52i.d + add.d ++ LoFlag = LoongArchII::MO_PCREL_RRLO; ++ HiFlag = LoongArchII::MO_PCREL_RRHI; ++ HigherFlag = LoongArchII::MO_PCREL_RRHIGHER; ++ HighestFlag = LoongArchII::MO_PCREL_RRHIGHEST; ++ LastOp = LoongArch::ADD_D_rrr; ++ break; ++ case LoongArch::LoadAddrGlobal: ++ case LoongArch::LoadAddrGlobal_Alias: ++ // pcalau12i + ld.d ++ LoFlag = LoongArchII::MO_GOT_LO; ++ HiFlag = LoongArchII::MO_GOT_HI; ++ HiOp = LoongArch::PCALAU12I_ri; ++ LoOp = LoongArch::LD_D_rri; ++ break; ++ case LoongArch::LoadAddrGlobalRR: ++ // pcalau12i + ori + lu32i.d + lu52i.d +ldx.d ++ LoFlag = LoongArchII::MO_GOT_RRLO; ++ HiFlag = LoongArchII::MO_GOT_RRHI; ++ HigherFlag = LoongArchII::MO_GOT_RRHIGHER; ++ HighestFlag = LoongArchII::MO_GOT_RRHIGHEST; ++ HiOp = LoongArch::PCALAU12I_ri; ++ LoOp = LoongArch::ORI_rri; ++ HigherOp = LoongArch::LU32I_D_ri; ++ HighestOp = LoongArch::LU52I_D_rri; ++ LastOp = LoongArch::LDX_D_rrr; ++ break; ++ case LoongArch::LoadAddrTLS_LE: ++ // lu12i.w + ori + lu32i.d + lu52i.d ++ LoFlag = LoongArchII::MO_TLSLE_LO; ++ HiFlag = LoongArchII::MO_TLSLE_HI; ++ HigherFlag = LoongArchII::MO_TLSLE_HIGHER; ++ HighestFlag = LoongArchII::MO_TLSLE_HIGHEST; ++ HiOp = LoongArch::LU12I_W_ri; ++ break; ++ case LoongArch::LoadAddrTLS_IE: ++ // pcalau12i + ld.d ++ LoFlag = LoongArchII::MO_TLSIE_LO; ++ HiFlag = LoongArchII::MO_TLSIE_HI; ++ HiOp = LoongArch::PCALAU12I_ri; ++ LoOp = LoongArch::LD_D_rri; ++ break; ++ case LoongArch::LoadAddrTLS_IE_RR: ++ // pcalau12i + ori + lu32i.d + lu52i.d +ldx.d ++ LoFlag = LoongArchII::MO_TLSIE_RRLO; ++ HiFlag = LoongArchII::MO_TLSIE_RRHI; ++ HigherFlag = LoongArchII::MO_TLSIE_RRHIGHER; ++ HighestFlag = LoongArchII::MO_TLSIE_RRHIGHEST; ++ HiOp = LoongArch::PCALAU12I_ri; ++ LoOp = LoongArch::ORI_rri; ++ HigherOp = LoongArch::LU32I_D_ri; ++ HighestOp = LoongArch::LU52I_D_rri; ++ LastOp = LoongArch::LDX_D_rrr; ++ break; ++ case LoongArch::LoadAddrTLS_LD: ++ case LoongArch::LoadAddrTLS_GD: ++ // pcalau12i + addi.d ++ LoFlag = LoongArchII::MO_TLSGD_LO; ++ HiFlag = LoongArchII::MO_TLSGD_HI; ++ HiOp = LoongArch::PCALAU12I_ri; ++ LoOp = LoongArch::ADDI_D_rri; ++ break; ++ case LoongArch::LoadAddrTLS_LD_RR: ++ case LoongArch::LoadAddrTLS_GD_RR: ++ // pcalau12i + ori + lu32i.d + lu52i.d + add.d ++ LoFlag = LoongArchII::MO_TLSGD_RRLO; ++ HiFlag = LoongArchII::MO_TLSGD_RRHI; ++ HigherFlag = LoongArchII::MO_TLSGD_RRHIGHER; ++ HighestFlag = LoongArchII::MO_TLSGD_RRHIGHEST; ++ HiOp = LoongArch::PCALAU12I_ri; ++ LoOp = LoongArch::ORI_rri; ++ HigherOp = LoongArch::LU32I_D_ri; ++ HighestOp = LoongArch::LU52I_D_rri; ++ LastOp = LoongArch::ADD_D_rrr; ++ break; ++ default: ++ break; ++ } ++ ++ MIB1 = BuildMI(BB, I, DL, TII->get(HiOp), DestReg); ++ ++ switch (Op) { ++ case LoongArch::LoadAddrLocal: ++ if (RM == Reloc::Static) { // for jit ++ // la.abs rd, symbol ++ MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), DestReg).addReg(DestReg); ++ MIB3 = BuildMI(BB, I, DL, TII->get(HigherOp), DestReg); ++ MIB4 = BuildMI(BB, I, DL, TII->get(HighestOp), DestReg).addReg(DestReg); ++ if (MO.isJTI()) { ++ MIB1.addJumpTableIndex(MO.getIndex(), HiFlag); ++ MIB2.addJumpTableIndex(MO.getIndex(), LoFlag); ++ MIB3.addJumpTableIndex(MO.getIndex(), HigherFlag); ++ MIB4.addJumpTableIndex(MO.getIndex(), HighestFlag); ++ } else if (MO.isBlockAddress()) { ++ MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag); ++ MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag); ++ MIB3.addBlockAddress(MO.getBlockAddress(), 0, HigherFlag); ++ MIB4.addBlockAddress(MO.getBlockAddress(), 0, HighestFlag); ++ } else { ++ MIB1.addDisp(MO, 0, HiFlag); ++ MIB2.addDisp(MO, 0, LoFlag); ++ MIB3.addDisp(MO, 0, HigherFlag); ++ MIB4.addDisp(MO, 0, HighestFlag); ++ } ++ break; ++ } ++ LLVM_FALLTHROUGH; ++ case LoongArch::LoadAddrGlobal: // la.global rd, symbol ++ case LoongArch::LoadAddrGlobal_Alias: // la rd, symbol ++ case LoongArch::LoadAddrTLS_IE: // la.tls.ie rd, symbol ++ case LoongArch::LoadAddrTLS_LD: // la.tls.ld rd, symbol ++ case LoongArch::LoadAddrTLS_GD: // la.tls.gd rd, symbol ++ MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), DestReg) ++ .addReg(DestReg); ++ if (MO.isJTI()) { ++ MIB1.addJumpTableIndex(MO.getIndex(), HiFlag); ++ MIB2.addJumpTableIndex(MO.getIndex(), LoFlag); ++ } else if (MO.isBlockAddress()) { ++ MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag); ++ MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag); ++ } else { ++ MIB1.addDisp(MO, 0, HiFlag); ++ MIB2.addDisp(MO, 0, LoFlag); ++ } ++ break; ++ ++ case LoongArch::LoadAddrLocalRR: //la.local rd, rs, symbol ++ case LoongArch::LoadAddrGlobalRR: // la.global rd, rs, symbol ++ case LoongArch::LoadAddrTLS_IE_RR: // la.tls.ie rd, rs, symbol ++ case LoongArch::LoadAddrTLS_LD_RR: // la.tls.ld rd, rs, symbol ++ case LoongArch::LoadAddrTLS_GD_RR: // la.tls.gd rd, rs, symbol ++ TmpReg = MI.getOperand(MI.getNumOperands()-1).getReg(); ++ MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), TmpReg) ++ .addReg(TmpReg); ++ MIB3 = BuildMI(BB, I, DL, TII->get(HigherOp), TmpReg); ++ MIB4 = BuildMI(BB, I, DL, TII->get(HighestOp), TmpReg) ++ .addReg(TmpReg); ++ MIB5 = BuildMI(BB, I, DL, TII->get(LastOp), DestReg) ++ .addReg(DestReg) ++ .addReg(TmpReg); ++ if (MO.isJTI()) { ++ MIB1.addJumpTableIndex(MO.getIndex(), HiFlag); ++ MIB2.addJumpTableIndex(MO.getIndex(), LoFlag); ++ MIB3.addJumpTableIndex(MO.getIndex(), HigherFlag); ++ MIB4.addJumpTableIndex(MO.getIndex(), HighestFlag); ++ } else if (MO.isBlockAddress()) { ++ MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag); ++ MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag); ++ MIB3.addBlockAddress(MO.getBlockAddress(), 0, HigherFlag); ++ MIB4.addBlockAddress(MO.getBlockAddress(), 0, HighestFlag); ++ } else { ++ MIB1.addDisp(MO, 0, HiFlag); ++ MIB2.addDisp(MO, 0, LoFlag); ++ MIB3.addDisp(MO, 0, HigherFlag); ++ MIB4.addDisp(MO, 0, HighestFlag); ++ } ++ break; ++ case LoongArch::LoadAddrTLS_LE: // la.tls.le rd, symbol ++ MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), DestReg) ++ .addReg(DestReg); ++ MIB3 = BuildMI(BB, I, DL, TII->get(HigherOp), DestReg); ++ MIB4 = BuildMI(BB, I, DL, TII->get(HighestOp), DestReg) ++ .addReg(DestReg); ++ if (MO.isJTI()) { ++ MIB1.addJumpTableIndex(MO.getIndex(), HiFlag); ++ MIB2.addJumpTableIndex(MO.getIndex(), LoFlag); ++ MIB3.addJumpTableIndex(MO.getIndex(), HigherFlag); ++ MIB4.addJumpTableIndex(MO.getIndex(), HighestFlag); ++ } else if (MO.isBlockAddress()) { ++ MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag); ++ MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag); ++ MIB3.addBlockAddress(MO.getBlockAddress(), 0, HigherFlag); ++ MIB4.addBlockAddress(MO.getBlockAddress(), 0, HighestFlag); ++ } else { ++ MIB1.addDisp(MO, 0, HiFlag); ++ MIB2.addDisp(MO, 0, LoFlag); ++ MIB3.addDisp(MO, 0, HigherFlag); ++ MIB4.addDisp(MO, 0, HighestFlag); ++ } ++ break; ++ default: ++ break; ++ } ++ ++ MI.eraseFromParent(); ++ ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandPseudoTailCall( ++ MachineBasicBlock &BB, MachineBasicBlock::iterator I) { ++ ++ MachineInstr &MI = *I; ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ const MachineOperand &MO = MI.getOperand(0); ++ ++ unsigned NoFlag = LoongArchII::MO_NO_FLAG; ++ ++ MachineInstrBuilder MIB = ++ BuildMI(BB, I, DL, TII->get(LoongArch::PseudoTailReturn)); ++ ++ if (MO.isSymbol()) { ++ MIB.addExternalSymbol(MO.getSymbolName(), NoFlag); ++ } else { ++ MIB.addDisp(MO, 0, NoFlag); ++ } ++ ++ MI.eraseFromParent(); ++ ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandPseudoCall(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI) { ++ MachineFunction *MF = BB.getParent(); ++ MachineInstr &MI = *I; ++ DebugLoc DL = MI.getDebugLoc(); ++ CodeModel::Model M = MF->getTarget().getCodeModel(); ++ Reloc::Model RM = MF->getTarget().getRelocationModel(); ++ ++ unsigned Ra = LoongArch::RA_64; ++ const MachineOperand &MO = MI.getOperand(0); ++ unsigned HiFlag, LoFlag, HigherFlag, HighestFlag, NoFlag; ++ ++ NoFlag = LoongArchII::MO_NO_FLAG; ++ ++ if (RM == Reloc::Static) { // for jit ++ MachineInstrBuilder MIB1, MIB2, MIB3, MIB4, MIB5; ++ ++ HiFlag = LoongArchII::MO_ABS_HI; ++ LoFlag = LoongArchII::MO_ABS_LO; ++ HigherFlag = LoongArchII::MO_ABS_HIGHER; ++ HighestFlag = LoongArchII::MO_ABS_HIGHEST; ++ // lu12i.w + ori + lu32i.d + lu52i.d + jirl ++ ++ MIB1 = BuildMI(BB, I, DL, TII->get(LoongArch::LU12I_W), Ra); ++ MIB2 = BuildMI(BB, I, DL, TII->get(LoongArch::ORI), Ra) ++ .addReg(Ra); ++ MIB3 = BuildMI(BB, I, DL, TII->get(LoongArch::LU32I_D), Ra); ++ MIB4 = BuildMI(BB, I, DL, TII->get(LoongArch::LU52I_D), Ra) ++ .addReg(Ra); ++ MIB5 = ++ BuildMI(BB, I, DL, TII->get(LoongArch::JIRL), Ra).addReg(Ra).addImm(0); ++ if (MO.isSymbol()) { ++ MIB1.addExternalSymbol(MO.getSymbolName(), HiFlag); ++ MIB2.addExternalSymbol(MO.getSymbolName(), LoFlag); ++ MIB3.addExternalSymbol(MO.getSymbolName(), HigherFlag); ++ MIB4.addExternalSymbol(MO.getSymbolName(), HighestFlag); ++ } else { ++ MIB1.addDisp(MO, 0, HiFlag); ++ MIB2.addDisp(MO, 0, LoFlag); ++ MIB3.addDisp(MO, 0, HigherFlag); ++ MIB4.addDisp(MO, 0, HighestFlag); ++ } ++ } else { ++ // bl ++ MachineInstrBuilder MIB1; ++ MIB1 = BuildMI(BB, I, DL, TII->get(LoongArch::BL)); ++ if (MO.isSymbol()) { ++ MIB1.addExternalSymbol(MO.getSymbolName(), NoFlag); ++ } else { ++ MIB1.addDisp(MO, 0, NoFlag); ++ } ++ } ++ ++ MI.eraseFromParent(); ++ ++ return true; ++} ++ ++bool LoongArchExpandPseudo::expandPseudoTEQ(MachineBasicBlock &BB, ++ MachineBasicBlock::iterator I, ++ MachineBasicBlock::iterator &NMBBI) { ++ MachineInstr &MI = *I; ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned Divisor = MI.getOperand(0).getReg(); ++ unsigned BneOp = LoongArch::BNE; ++ unsigned Zero = LoongArch::ZERO_64; ++ ++ // beq $Divisor, $zero, 8 ++ BuildMI(BB, I, DL, TII->get(BneOp), Divisor) ++ .addReg(Zero) ++ .addImm(8); ++ // break 7 ++ BuildMI(BB, I, DL, TII->get(LoongArch::BREAK)) ++ .addImm(7);; ++ ++ MI.eraseFromParent(); ++ ++ return true; ++} ++bool LoongArchExpandPseudo::expandMI(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MBBI, ++ MachineBasicBlock::iterator &NMBB) { ++ ++ bool Modified = false; ++ ++ switch (MBBI->getOpcode()) { ++ case LoongArch::PseudoTEQ: ++ return expandPseudoTEQ(MBB, MBBI, NMBB); ++ case LoongArch::PseudoCall: ++ return expandPseudoCall(MBB, MBBI, NMBB); ++ case LoongArch::PseudoTailCall: ++ return expandPseudoTailCall(MBB, MBBI); ++ case LoongArch::LoadAddrLocal: ++ case LoongArch::LoadAddrLocalRR: ++ case LoongArch::LoadAddrGlobal: ++ case LoongArch::LoadAddrGlobalRR: ++ case LoongArch::LoadAddrGlobal_Alias: ++ case LoongArch::LoadAddrTLS_LD: ++ case LoongArch::LoadAddrTLS_LD_RR: ++ case LoongArch::LoadAddrTLS_GD: ++ case LoongArch::LoadAddrTLS_GD_RR: ++ case LoongArch::LoadAddrTLS_IE: ++ case LoongArch::LoadAddrTLS_IE_RR: ++ case LoongArch::LoadAddrTLS_LE: ++ return expandLoadAddr(MBB, MBBI, NMBB); ++ case LoongArch::ATOMIC_CMP_SWAP_I32_POSTRA: ++ case LoongArch::ATOMIC_CMP_SWAP_I64_POSTRA: ++ return expandAtomicCmpSwap(MBB, MBBI, NMBB); ++ case LoongArch::ATOMIC_CMP_SWAP_I8_POSTRA: ++ case LoongArch::ATOMIC_CMP_SWAP_I16_POSTRA: ++ return expandAtomicCmpSwapSubword(MBB, MBBI, NMBB); ++ case LoongArch::ATOMIC_SWAP_I8_POSTRA: ++ case LoongArch::ATOMIC_SWAP_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_NAND_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_NAND_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_ADD_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_ADD_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_SUB_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_SUB_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_AND_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_AND_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_OR_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_OR_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_XOR_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_XOR_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_MAX_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_MAX_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_MIN_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_MIN_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_UMAX_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_UMAX_I16_POSTRA: ++ case LoongArch::ATOMIC_LOAD_UMIN_I8_POSTRA: ++ case LoongArch::ATOMIC_LOAD_UMIN_I16_POSTRA: ++ return expandAtomicBinOpSubword(MBB, MBBI, NMBB); ++ case LoongArch::XINSERT_B_VIDX_PSEUDO_POSTRA: ++ case LoongArch::XINSERT_B_VIDX64_PSEUDO_POSTRA: ++ return expandXINSERT_BOp(MBB, MBBI, NMBB); ++ case LoongArch::INSERT_H_VIDX64_PSEUDO_POSTRA: ++ return expandINSERT_HOp(MBB, MBBI, NMBB); ++ case LoongArch::XINSERT_FW_VIDX_PSEUDO_POSTRA: ++ case LoongArch::XINSERT_FW_VIDX64_PSEUDO_POSTRA: ++ return expandXINSERT_FWOp(MBB, MBBI, NMBB); ++ case LoongArch::ATOMIC_LOAD_ADD_I32_POSTRA: ++ case LoongArch::ATOMIC_LOAD_SUB_I32_POSTRA: ++ case LoongArch::ATOMIC_LOAD_AND_I32_POSTRA: ++ case LoongArch::ATOMIC_LOAD_OR_I32_POSTRA: ++ case LoongArch::ATOMIC_LOAD_XOR_I32_POSTRA: ++ case LoongArch::ATOMIC_LOAD_NAND_I32_POSTRA: ++ case LoongArch::ATOMIC_SWAP_I32_POSTRA: ++ case LoongArch::ATOMIC_LOAD_MAX_I32_POSTRA: ++ case LoongArch::ATOMIC_LOAD_MIN_I32_POSTRA: ++ case LoongArch::ATOMIC_LOAD_UMAX_I32_POSTRA: ++ case LoongArch::ATOMIC_LOAD_UMIN_I32_POSTRA: ++ return expandAtomicBinOp(MBB, MBBI, NMBB, 4); ++ case LoongArch::ATOMIC_LOAD_ADD_I64_POSTRA: ++ case LoongArch::ATOMIC_LOAD_SUB_I64_POSTRA: ++ case LoongArch::ATOMIC_LOAD_AND_I64_POSTRA: ++ case LoongArch::ATOMIC_LOAD_OR_I64_POSTRA: ++ case LoongArch::ATOMIC_LOAD_XOR_I64_POSTRA: ++ case LoongArch::ATOMIC_LOAD_NAND_I64_POSTRA: ++ case LoongArch::ATOMIC_SWAP_I64_POSTRA: ++ case LoongArch::ATOMIC_LOAD_MAX_I64_POSTRA: ++ case LoongArch::ATOMIC_LOAD_MIN_I64_POSTRA: ++ case LoongArch::ATOMIC_LOAD_UMAX_I64_POSTRA: ++ case LoongArch::ATOMIC_LOAD_UMIN_I64_POSTRA: ++ return expandAtomicBinOp(MBB, MBBI, NMBB, 8); ++ default: ++ return Modified; ++ } ++} ++ ++bool LoongArchExpandPseudo::expandMBB(MachineBasicBlock &MBB) { ++ bool Modified = false; ++ ++ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); ++ while (MBBI != E) { ++ MachineBasicBlock::iterator NMBBI = std::next(MBBI); ++ Modified |= expandMI(MBB, MBBI, NMBBI); ++ MBBI = NMBBI; ++ } ++ ++ return Modified; ++} ++ ++bool LoongArchExpandPseudo::runOnMachineFunction(MachineFunction &MF) { ++ STI = &static_cast(MF.getSubtarget()); ++ TII = STI->getInstrInfo(); ++ ++ bool Modified = false; ++ for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; ++ ++MFI) ++ Modified |= expandMBB(*MFI); ++ ++ if (Modified) ++ MF.RenumberBlocks(); ++ ++ return Modified; ++} ++ ++/// createLoongArchExpandPseudoPass - returns an instance of the pseudo instruction ++/// expansion pass. ++FunctionPass *llvm::createLoongArchExpandPseudoPass() { ++ return new LoongArchExpandPseudo(); ++} +diff --git a/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/lib/Target/LoongArch/LoongArchFrameLowering.cpp +new file mode 100644 +index 00000000..c08962a6 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchFrameLowering.cpp +@@ -0,0 +1,546 @@ ++//===-- LoongArchFrameLowering.cpp - LoongArch Frame Information --------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch implementation of TargetFrameLowering class. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchFrameLowering.h" ++#include "MCTargetDesc/LoongArchBaseInfo.h" ++#include "MCTargetDesc/LoongArchABIInfo.h" ++#include "LoongArchInstrInfo.h" ++#include "LoongArchMachineFunction.h" ++#include "LoongArchTargetMachine.h" ++#include "LoongArchRegisterInfo.h" ++#include "LoongArchSubtarget.h" ++#include "llvm/ADT/BitVector.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/ADT/StringSwitch.h" ++#include "llvm/CodeGen/MachineBasicBlock.h" ++#include "llvm/CodeGen/MachineFrameInfo.h" ++#include "llvm/CodeGen/MachineFunction.h" ++#include "llvm/CodeGen/MachineInstr.h" ++#include "llvm/CodeGen/MachineInstrBuilder.h" ++#include "llvm/CodeGen/MachineModuleInfo.h" ++#include "llvm/CodeGen/MachineOperand.h" ++#include "llvm/CodeGen/MachineRegisterInfo.h" ++#include "llvm/CodeGen/RegisterScavenging.h" ++#include "llvm/CodeGen/TargetInstrInfo.h" ++#include "llvm/CodeGen/TargetRegisterInfo.h" ++#include "llvm/CodeGen/TargetSubtargetInfo.h" ++#include "llvm/IR/DataLayout.h" ++#include "llvm/IR/DebugLoc.h" ++#include "llvm/IR/Function.h" ++#include "llvm/MC/MCDwarf.h" ++#include "llvm/MC/MCRegisterInfo.h" ++#include "llvm/MC/MachineLocation.h" ++#include "llvm/Support/CodeGen.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/MathExtras.h" ++#include "llvm/Target/TargetOptions.h" ++#include ++#include ++#include ++#include ++ ++using namespace llvm; ++ ++// We would like to split the SP adjustment to reduce prologue/epilogue ++// as following instructions. In this way, the offset of the callee saved ++// register could fit in a single store. ++uint64_t LoongArchFrameLowering::getFirstSPAdjustAmount( ++ const MachineFunction &MF) const { ++ const MachineFrameInfo &MFI = MF.getFrameInfo(); ++ const std::vector &CSI = MFI.getCalleeSavedInfo(); ++ uint64_t StackSize = MFI.getStackSize(); ++ ++ // Return the FirstSPAdjustAmount if the StackSize can not fit in signed ++ // 12-bit and there exists a callee saved register need to be pushed. ++ if (!isInt<12>(StackSize) && (CSI.size() > 0)) { ++ // FirstSPAdjustAmount is choosed as (2048 - StackAlign) ++ // because 2048 will cause sp = sp + 2048 in epilogue split into ++ // multi-instructions. The offset smaller than 2048 can fit in signle ++ // load/store instruction and we have to stick with the stack alignment. ++ return 2048 - STI.getStackAlignment().value(); ++ } ++ return 0; ++} ++ ++//===----------------------------------------------------------------------===// ++// ++// Stack Frame Processing methods ++// +----------------------------+ ++// ++// The stack is allocated decrementing the stack pointer on ++// the first instruction of a function prologue. Once decremented, ++// all stack references are done thought a positive offset ++// from the stack/frame pointer, so the stack is considering ++// to grow up! Otherwise terrible hacks would have to be made ++// to get this stack ABI compliant :) ++// ++// The stack frame required by the ABI (after call): ++// Offset ++// ++// 0 ---------- ++// 4 Args to pass ++// . Alloca allocations ++// . Local Area ++// . CPU "Callee Saved" Registers ++// . saved FP ++// . saved RA ++// . FPU "Callee Saved" Registers ++// StackSize ----------- ++// ++// Offset - offset from sp after stack allocation on function prologue ++// ++// The sp is the stack pointer subtracted/added from the stack size ++// at the Prologue/Epilogue ++// ++// References to the previous stack (to obtain arguments) are done ++// with offsets that exceeds the stack size: (stacksize+(4*(num_arg-1)) ++// ++// Examples: ++// - reference to the actual stack frame ++// for any local area var there is smt like : FI >= 0, StackOffset: 4 ++// st.w REGX, SP, 4 ++// ++// - reference to previous stack frame ++// suppose there's a load to the 5th arguments : FI < 0, StackOffset: 16. ++// The emitted instruction will be something like: ++// ld.w REGX, SP, 16+StackSize ++// ++// Since the total stack size is unknown on LowerFormalArguments, all ++// stack references (ObjectOffset) created to reference the function ++// arguments, are negative numbers. This way, on eliminateFrameIndex it's ++// possible to detect those references and the offsets are adjusted to ++// their real location. ++// ++//===----------------------------------------------------------------------===// ++// ++LoongArchFrameLowering::LoongArchFrameLowering(const LoongArchSubtarget &STI) ++ : TargetFrameLowering(StackGrowsDown, STI.getStackAlignment(), 0, ++ STI.getStackAlignment()), STI(STI) {} ++ ++void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, ++ MachineBasicBlock &MBB) const { ++ MachineFrameInfo &MFI = MF.getFrameInfo(); ++ LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); ++ ++ const LoongArchInstrInfo &TII = ++ *static_cast(STI.getInstrInfo()); ++ const LoongArchRegisterInfo &RegInfo = ++ *static_cast(STI.getRegisterInfo()); ++ MachineBasicBlock::iterator MBBI = MBB.begin(); ++ DebugLoc dl; ++ LoongArchABIInfo ABI = STI.getABI(); ++ unsigned SP = ABI.GetStackPtr(); ++ unsigned FP = ABI.GetFramePtr(); ++ unsigned ZERO = ABI.GetNullPtr(); ++ unsigned MOVE = ABI.GetGPRMoveOp(); ++ unsigned ADDI = ABI.GetPtrAddiOp(); ++ unsigned AND = ABI.IsLP64() ? LoongArch::AND : LoongArch::AND32; ++ unsigned SLLI = ABI.IsLP64() ? LoongArch::SLLI_D : LoongArch::SLLI_W; ++ ++ const TargetRegisterClass *RC = ABI.ArePtrs64bit() ? ++ &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; ++ ++ // First, compute final stack size. ++ uint64_t StackSize = MFI.getStackSize(); ++ ++ // No need to allocate space on the stack. ++ if (StackSize == 0 && !MFI.adjustsStack()) ++ return; ++ ++ uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); ++ // Split the SP adjustment to reduce the offsets of callee saved spill. ++ if (FirstSPAdjustAmount) ++ StackSize = FirstSPAdjustAmount; ++ ++ // Adjust stack. ++ TII.adjustReg(SP, SP, -StackSize, MBB, MBBI, MachineInstr::FrameSetup); ++ // Emit ".cfi_def_cfa_offset StackSize". ++ unsigned CFIIndex = MF.addFrameInst( ++ MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize)); ++ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) ++ .addCFIIndex(CFIIndex); ++ ++ MachineModuleInfo &MMI = MF.getMMI(); ++ const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); ++ ++ const std::vector &CSI = MFI.getCalleeSavedInfo(); ++ ++ if (!CSI.empty()) { ++ // Find the instruction past the last instruction that saves a callee-saved ++ // register to the stack. ++ for (unsigned i = 0; i < CSI.size(); ++i) ++ ++MBBI; ++ ++ // Iterate over list of callee-saved registers and emit .cfi_offset ++ // directives. ++ for (std::vector::const_iterator I = CSI.begin(), ++ E = CSI.end(); I != E; ++I) { ++ int64_t Offset = MFI.getObjectOffset(I->getFrameIdx()); ++ unsigned Reg = I->getReg(); ++ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( ++ nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); ++ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) ++ .addCFIIndex(CFIIndex); ++ } ++ } ++ ++ if (LoongArchFI->callsEhReturn()) { ++ // Insert instructions that spill eh data registers. ++ for (int I = 0; I < 4; ++I) { ++ if (!MBB.isLiveIn(ABI.GetEhDataReg(I))) ++ MBB.addLiveIn(ABI.GetEhDataReg(I)); ++ TII.storeRegToStackSlot(MBB, MBBI, ABI.GetEhDataReg(I), false, ++ LoongArchFI->getEhDataRegFI(I), RC, &RegInfo); ++ } ++ ++ // Emit .cfi_offset directives for eh data registers. ++ for (int I = 0; I < 4; ++I) { ++ int64_t Offset = MFI.getObjectOffset(LoongArchFI->getEhDataRegFI(I)); ++ unsigned Reg = MRI->getDwarfRegNum(ABI.GetEhDataReg(I), true); ++ unsigned CFIIndex = MF.addFrameInst( ++ MCCFIInstruction::createOffset(nullptr, Reg, Offset)); ++ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) ++ .addCFIIndex(CFIIndex); ++ } ++ } ++ ++ // If framepointer enabled, set it to point to the stack pointer on entry. ++ if (hasFP(MF)) { ++ // Insert instruction "addi.w/d $fp, $sp, StackSize" at this location. ++ TII.adjustReg(FP, SP, StackSize - LoongArchFI->getVarArgsSaveSize(), MBB, ++ MBBI, MachineInstr::FrameSetup); ++ // Emit ".cfi_def_cfa $fp, $varargs_size". ++ unsigned CFIIndex = MF.addFrameInst( ++ MCCFIInstruction::cfiDefCfa(nullptr, MRI->getDwarfRegNum(FP, true), ++ LoongArchFI->getVarArgsSaveSize())); ++ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) ++ .addCFIIndex(CFIIndex) ++ .setMIFlag(MachineInstr::FrameSetup); ++ } ++ ++ // Emit the second SP adjustment after saving callee saved registers. ++ if (FirstSPAdjustAmount) { ++ uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount; ++ assert(SecondSPAdjustAmount > 0 && ++ "SecondSPAdjustAmount should be greater than zero"); ++ TII.adjustReg(SP, SP, -SecondSPAdjustAmount, MBB, MBBI, ++ MachineInstr::FrameSetup); ++ ++ // If we are using a frame-pointer, and thus emitted ".cfi_def_cfa fp, 0", ++ // don't emit an sp-based .cfi_def_cfa_offset. ++ if (!hasFP(MF)) { ++ // Emit ".cfi_def_cfa_offset StackSize" ++ unsigned CFIIndex = MF.addFrameInst( ++ MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize())); ++ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) ++ .addCFIIndex(CFIIndex) ++ .setMIFlag(MachineInstr::FrameSetup); ++ } ++ } ++ ++ // Realign stack. ++ if (hasFP(MF)) { ++ if (RegInfo.hasStackRealignment(MF)) { ++ // addiu $Reg, $zero, -MaxAlignment ++ // andi $sp, $sp, $Reg ++ unsigned VR = MF.getRegInfo().createVirtualRegister(RC); ++ assert((Log2(MFI.getMaxAlign()) < 16) && ++ "Function's alignment size requirement is not supported."); ++ int MaxAlign = -(int)MFI.getMaxAlign().value(); ++ int Alignment = (int)MFI.getMaxAlign().value(); ++ ++ if (Alignment <= 2048) { ++ BuildMI(MBB, MBBI, dl, TII.get(ADDI), VR).addReg(ZERO).addImm(MaxAlign); ++ BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR); ++ } else { ++ const unsigned NrBitsToZero = countTrailingZeros((unsigned)Alignment); ++ BuildMI(MBB, MBBI, dl, TII.get(ADDI), VR).addReg(ZERO).addImm(-1); ++ BuildMI(MBB, MBBI, dl, TII.get(SLLI), VR) ++ .addReg(VR) ++ .addImm(NrBitsToZero); ++ BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR); ++ } ++ ++ if (hasBP(MF)) { ++ // move $s7, $sp ++ unsigned BP = STI.isABI_LP64() ? LoongArch::S7_64 : LoongArch::S7; ++ BuildMI(MBB, MBBI, dl, TII.get(MOVE), BP).addReg(SP).addReg(ZERO); ++ } ++ } ++ } ++} ++ ++void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF, ++ MachineBasicBlock &MBB) const { ++ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); ++ MachineFrameInfo &MFI = MF.getFrameInfo(); ++ LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); ++ ++ const LoongArchInstrInfo &TII = ++ *static_cast(STI.getInstrInfo()); ++ const LoongArchRegisterInfo &RegInfo = ++ *static_cast(STI.getRegisterInfo()); ++ ++ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); ++ LoongArchABIInfo ABI = STI.getABI(); ++ unsigned SP = ABI.GetStackPtr(); ++ unsigned FP = ABI.GetFramePtr(); ++ ++ // Get the number of bytes from FrameInfo. ++ uint64_t StackSize = MFI.getStackSize(); ++ ++ // Restore the stack pointer. ++ if (hasFP(MF) && ++ (RegInfo.hasStackRealignment(MF) || MFI.hasVarSizedObjects())) { ++ // Find the first instruction that restores a callee-saved register. ++ MachineBasicBlock::iterator I = MBBI; ++ for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i) ++ --I; ++ TII.adjustReg(SP, FP, -(StackSize - LoongArchFI->getVarArgsSaveSize()), MBB, ++ I); ++ } ++ ++ uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); ++ if (FirstSPAdjustAmount) { ++ uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount; ++ assert(SecondSPAdjustAmount > 0 && ++ "SecondSPAdjustAmount should be greater than zero"); ++ // Find the first instruction that restores a callee-saved register. ++ MachineBasicBlock::iterator I = MBBI; ++ for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i) ++ --I; ++ ++ TII.adjustReg(SP, SP, SecondSPAdjustAmount, MBB, I); ++ } ++ ++ if (LoongArchFI->callsEhReturn()) { ++ const TargetRegisterClass *RC = ++ ABI.ArePtrs64bit() ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; ++ ++ // Find first instruction that restores a callee-saved register. ++ MachineBasicBlock::iterator I = MBBI; ++ for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i) ++ --I; ++ ++ // Insert instructions that restore eh data registers. ++ for (int J = 0; J < 4; ++J) ++ TII.loadRegFromStackSlot(MBB, I, ABI.GetEhDataReg(J), ++ LoongArchFI->getEhDataRegFI(J), RC, &RegInfo); ++ } ++ ++ if (FirstSPAdjustAmount) ++ StackSize = FirstSPAdjustAmount; ++ ++ if (!StackSize) ++ return; ++ ++ // Final adjust stack. ++ TII.adjustReg(SP, SP, StackSize, MBB, MBBI); ++} ++ ++StackOffset ++LoongArchFrameLowering::getFrameIndexReference(const MachineFunction &MF, ++ int FI, ++ Register &FrameReg) const { ++ const MachineFrameInfo &MFI = MF.getFrameInfo(); ++ const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); ++ LoongArchABIInfo ABI = STI.getABI(); ++ const auto *LoongArchFI = MF.getInfo(); ++ ++ // Callee-saved registers should be referenced relative to the stack ++ // pointer (positive offset), otherwise use the frame pointer (negative ++ // offset). ++ const auto &CSI = MFI.getCalleeSavedInfo(); ++ int MinCSFI = 0; ++ int MaxCSFI = -1; ++ StackOffset Offset = ++ StackOffset::getFixed(MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + ++ MFI.getOffsetAdjustment()); ++ uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); ++ ++ if (CSI.size()) { ++ MinCSFI = CSI[0].getFrameIdx(); ++ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); ++ } ++ ++ bool EhDataRegFI = LoongArchFI->isEhDataRegFI(FI); ++ if ((FI >= MinCSFI && FI <= MaxCSFI) || EhDataRegFI) { ++ FrameReg = ABI.GetStackPtr(); ++ ++ if (FirstSPAdjustAmount) ++ Offset += StackOffset::getFixed(FirstSPAdjustAmount); ++ else ++ Offset += StackOffset::getFixed(MFI.getStackSize()); ++ } else if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) { ++ // If the stack was realigned, the frame pointer is set in order to allow ++ // SP to be restored, so we need another base register to record the stack ++ // after realignment. ++ FrameReg = hasBP(MF) ? ABI.GetBasePtr() : ABI.GetStackPtr(); ++ Offset += StackOffset::getFixed(MFI.getStackSize()); ++ } else { ++ FrameReg = RI->getFrameRegister(MF); ++ if (hasFP(MF)) ++ Offset += StackOffset::getFixed(LoongArchFI->getVarArgsSaveSize()); ++ else ++ Offset += StackOffset::getFixed(MFI.getStackSize()); ++ } ++ return Offset; ++} ++ ++bool LoongArchFrameLowering::spillCalleeSavedRegisters( ++ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ++ ArrayRef CSI, const TargetRegisterInfo *TRI) const { ++ MachineFunction *MF = MBB.getParent(); ++ const TargetInstrInfo &TII = *STI.getInstrInfo(); ++ ++ for (unsigned i = 0, e = CSI.size(); i != e; ++i) { ++ // Add the callee-saved register as live-in. Do not add if the register is ++ // RA and return address is taken, because it has already been added in ++ // method LoongArchTargetLowering::lowerRETURNADDR. ++ // It's killed at the spill, unless the register is RA and return address ++ // is taken. ++ unsigned Reg = CSI[i].getReg(); ++ bool IsRAAndRetAddrIsTaken = (Reg == LoongArch::RA || Reg == LoongArch::RA_64) ++ && MF->getFrameInfo().isReturnAddressTaken(); ++ if (!IsRAAndRetAddrIsTaken) ++ MBB.addLiveIn(Reg); ++ ++ // Insert the spill to the stack frame. ++ bool IsKill = !IsRAAndRetAddrIsTaken; ++ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); ++ TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, ++ CSI[i].getFrameIdx(), RC, TRI); ++ } ++ ++ return true; ++} ++ ++bool ++LoongArchFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { ++ const MachineFrameInfo &MFI = MF.getFrameInfo(); ++ // Reserve call frame if the size of the maximum call frame fits into 12-bit ++ // immediate field and there are no variable sized objects on the stack. ++ // Make sure the second register scavenger spill slot can be accessed with one ++ // instruction. ++ return isInt<12>(MFI.getMaxCallFrameSize() + getStackAlignment()) && ++ !MFI.hasVarSizedObjects(); ++} ++ ++/// Mark \p Reg and all registers aliasing it in the bitset. ++static void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs, ++ unsigned Reg) { ++ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); ++ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) ++ SavedRegs.set(*AI); ++} ++ ++void LoongArchFrameLowering::determineCalleeSaves(MachineFunction &MF, ++ BitVector &SavedRegs, ++ RegScavenger *RS) const { ++ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); ++ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); ++ LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); ++ LoongArchABIInfo ABI = STI.getABI(); ++ unsigned FP = ABI.GetFramePtr(); ++ unsigned BP = ABI.IsLP64() ? LoongArch::S7_64 : LoongArch::S7; ++ ++ // Mark $fp as used if function has dedicated frame pointer. ++ if (hasFP(MF)) ++ setAliasRegs(MF, SavedRegs, FP); ++ // Mark $s7 as used if function has dedicated base pointer. ++ if (hasBP(MF)) ++ setAliasRegs(MF, SavedRegs, BP); ++ ++ // Create spill slots for eh data registers if function calls eh_return. ++ if (LoongArchFI->callsEhReturn()) ++ LoongArchFI->createEhDataRegsFI(); ++ ++ // Set scavenging frame index if necessary. ++ uint64_t MaxSPOffset = estimateStackSize(MF); ++ ++ // If there is a variable ++ // sized object on the stack, the estimation cannot account for it. ++ if (isIntN(12, MaxSPOffset) && ++ !MF.getFrameInfo().hasVarSizedObjects()) ++ return; ++ ++ const TargetRegisterClass &RC = ++ ABI.ArePtrs64bit() ? LoongArch::GPR64RegClass : LoongArch::GPR32RegClass; ++ int FI = MF.getFrameInfo().CreateStackObject(TRI->getSpillSize(RC), ++ TRI->getSpillAlign(RC), false); ++ RS->addScavengingFrameIndex(FI); ++} ++ ++// hasFP - Return true if the specified function should have a dedicated frame ++// pointer register. This is true if the function has variable sized allocas, ++// if it needs dynamic stack realignment, if frame pointer elimination is ++// disabled, or if the frame address is taken. ++bool LoongArchFrameLowering::hasFP(const MachineFunction &MF) const { ++ const MachineFrameInfo &MFI = MF.getFrameInfo(); ++ const TargetRegisterInfo *TRI = STI.getRegisterInfo(); ++ ++ return MF.getTarget().Options.DisableFramePointerElim(MF) || ++ MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || ++ TRI->hasStackRealignment(MF); ++} ++ ++bool LoongArchFrameLowering::hasBP(const MachineFunction &MF) const { ++ const MachineFrameInfo &MFI = MF.getFrameInfo(); ++ const TargetRegisterInfo *TRI = STI.getRegisterInfo(); ++ ++ return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF); ++} ++ ++// Estimate the size of the stack, including the incoming arguments. We need to ++// account for register spills, local objects, reserved call frame and incoming ++// arguments. This is required to determine the largest possible positive offset ++// from $sp so that it can be determined if an emergency spill slot for stack ++// addresses is required. ++uint64_t LoongArchFrameLowering:: ++estimateStackSize(const MachineFunction &MF) const { ++ const MachineFrameInfo &MFI = MF.getFrameInfo(); ++ const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); ++ ++ int64_t Size = 0; ++ ++ // Iterate over fixed sized objects which are incoming arguments. ++ for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) ++ if (MFI.getObjectOffset(I) > 0) ++ Size += MFI.getObjectSize(I); ++ ++ // Conservatively assume all callee-saved registers will be saved. ++ for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) { ++ unsigned RegSize = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R)); ++ Size = alignTo(Size + RegSize, RegSize); ++ } ++ ++ // Get the size of the rest of the frame objects and any possible reserved ++ // call frame, accounting for alignment. ++ return Size + MFI.estimateStackSize(MF); ++} ++ ++// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions ++MachineBasicBlock::iterator LoongArchFrameLowering:: ++eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator I) const { ++ unsigned SP = STI.getABI().IsLP64() ? LoongArch::SP_64 : LoongArch::SP; ++ ++ if (!hasReservedCallFrame(MF)) { ++ int64_t Amount = I->getOperand(0).getImm(); ++ if (I->getOpcode() == LoongArch::ADJCALLSTACKDOWN) ++ Amount = -Amount; ++ ++ STI.getInstrInfo()->adjustReg(SP, SP, Amount, MBB, I); ++ } ++ ++ return MBB.erase(I); ++} +diff --git a/lib/Target/LoongArch/LoongArchFrameLowering.h b/lib/Target/LoongArch/LoongArchFrameLowering.h +new file mode 100644 +index 00000000..ca6cd736 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchFrameLowering.h +@@ -0,0 +1,70 @@ ++//===-- LoongArchFrameLowering.h - Define frame lowering for LoongArch ----*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H ++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H ++ ++#include "LoongArch.h" ++#include "llvm/CodeGen/TargetFrameLowering.h" ++ ++namespace llvm { ++ class LoongArchSubtarget; ++ ++class LoongArchFrameLowering : public TargetFrameLowering { ++ const LoongArchSubtarget &STI; ++ ++public: ++ explicit LoongArchFrameLowering(const LoongArchSubtarget &STI); ++ ++ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into ++ /// the function. ++ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; ++ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; ++ ++ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, ++ Register &FrameReg) const override; ++ ++ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MI, ++ ArrayRef CSI, ++ const TargetRegisterInfo *TRI) const override; ++ ++ bool hasReservedCallFrame(const MachineFunction &MF) const override; ++ ++ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, ++ RegScavenger *RS) const override; ++ ++ bool hasFP(const MachineFunction &MF) const override; ++ ++ bool hasBP(const MachineFunction &MF) const; ++ ++ bool enableShrinkWrapping(const MachineFunction &MF) const override { ++ return true; ++ } ++ ++ MachineBasicBlock::iterator ++ eliminateCallFramePseudoInstr(MachineFunction &MF, ++ MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator I) const override; ++ ++ // Get the first stack adjustment amount for split the SP adjustment. ++ // Return 0 if we don't want to to split the SP adjustment in prologue and ++ // epilogue. ++ uint64_t getFirstSPAdjustAmount(const MachineFunction &MF) const; ++ ++protected: ++ uint64_t estimateStackSize(const MachineFunction &MF) const; ++}; ++ ++} // End llvm namespace ++ ++#endif +diff --git a/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +new file mode 100644 +index 00000000..43e46315 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +@@ -0,0 +1,756 @@ ++//===-- LoongArchISelDAGToDAG.cpp - A Dag to Dag Inst Selector for LoongArch --------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines an instruction selector for the LoongArch target. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchISelDAGToDAG.h" ++#include "LoongArch.h" ++#include "LoongArchMachineFunction.h" ++#include "LoongArchRegisterInfo.h" ++#include "MCTargetDesc/LoongArchAnalyzeImmediate.h" ++#include "MCTargetDesc/LoongArchBaseInfo.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "llvm/CodeGen/MachineConstantPool.h" ++#include "llvm/CodeGen/MachineFrameInfo.h" ++#include "llvm/CodeGen/MachineFunction.h" ++#include "llvm/CodeGen/MachineInstrBuilder.h" ++#include "llvm/CodeGen/MachineRegisterInfo.h" ++#include "llvm/CodeGen/SelectionDAGNodes.h" ++#include "llvm/IR/CFG.h" ++#include "llvm/IR/Dominators.h" ++#include "llvm/IR/GlobalValue.h" ++#include "llvm/IR/Instructions.h" ++#include "llvm/IR/Intrinsics.h" ++#include "llvm/IR/IntrinsicsLoongArch.h" ++#include "llvm/IR/Type.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/raw_ostream.h" ++#include "llvm/Target/TargetMachine.h" ++using namespace llvm; ++ ++#define DEBUG_TYPE "loongarch-isel" ++ ++//===----------------------------------------------------------------------===// ++// Instruction Selector Implementation ++//===----------------------------------------------------------------------===// ++ ++//===----------------------------------------------------------------------===// ++// LoongArchDAGToDAGISel - LoongArch specific code to select LoongArch machine ++// instructions for SelectionDAG operations. ++//===----------------------------------------------------------------------===// ++ ++void LoongArchDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { ++ AU.addRequired(); ++ SelectionDAGISel::getAnalysisUsage(AU); ++} ++ ++bool LoongArchDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { ++ Subtarget = &static_cast(MF.getSubtarget()); ++ bool Ret = SelectionDAGISel::runOnMachineFunction(MF); ++ ++ return Ret; ++} ++ ++/// Match frameindex ++bool LoongArchDAGToDAGISel::selectAddrFrameIndex(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { ++ EVT ValTy = Addr.getValueType(); ++ ++ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); ++ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), ValTy); ++ return true; ++ } ++ return false; ++} ++ ++/// Match frameindex+offset and frameindex|offset ++bool LoongArchDAGToDAGISel::selectAddrFrameIndexOffset( ++ SDValue Addr, SDValue &Base, SDValue &Offset, unsigned OffsetBits, ++ unsigned ShiftAmount = 0) const { ++ if (CurDAG->isBaseWithConstantOffset(Addr)) { ++ ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); ++ if (isIntN(OffsetBits + ShiftAmount, CN->getSExtValue())) { ++ EVT ValTy = Addr.getValueType(); ++ ++ // If the first operand is a FI, get the TargetFI Node ++ if (FrameIndexSDNode *FIN = ++ dyn_cast(Addr.getOperand(0))) ++ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); ++ else { ++ Base = Addr.getOperand(0); ++ // If base is a FI, additional offset calculation is done in ++ // eliminateFrameIndex, otherwise we need to check the alignment ++ const Align Alignment(1ULL << ShiftAmount); ++ if (!isAligned(Alignment, CN->getZExtValue())) ++ return false; ++ } ++ ++ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), ++ ValTy); ++ return true; ++ } ++ } ++ return false; ++} ++ ++/// ComplexPattern used on LoongArchInstrInfo ++/// Used on LoongArch Load/Store instructions ++bool LoongArchDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ // if Address is FI, get the TargetFrameIndex. ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; ++ ++ if (!TM.isPositionIndependent()) { ++ if ((Addr.getOpcode() == ISD::TargetExternalSymbol || ++ Addr.getOpcode() == ISD::TargetGlobalAddress)) ++ return false; ++ } ++ ++ // Addresses of the form FI+const or FI|const ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 12)) ++ return true; ++ ++ return false; ++} ++ ++/// ComplexPattern used on LoongArchInstrInfo ++/// Used on LoongArch Load/Store instructions ++bool LoongArchDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ Base = Addr; ++ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Addr.getValueType()); ++ return true; ++} ++ ++bool LoongArchDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ return selectAddrRegImm(Addr, Base, Offset) || ++ selectAddrDefault(Addr, Base, Offset); ++} ++ ++bool LoongArchDAGToDAGISel::selectAddrRegImm12(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; ++ ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 12)) ++ return true; ++ ++ return false; ++} ++ ++bool LoongArchDAGToDAGISel::selectIntAddrSImm12(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; ++ ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 12)) ++ return true; ++ ++ return selectAddrDefault(Addr, Base, Offset); ++} ++ ++bool LoongArchDAGToDAGISel::selectIntAddrSImm10Lsl1(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; ++ ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 1)) ++ return true; ++ ++ return selectAddrDefault(Addr, Base, Offset); ++} ++ ++bool LoongArchDAGToDAGISel::selectIntAddrSImm10(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; ++ ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10)) ++ return true; ++ ++ return selectAddrDefault(Addr, Base, Offset); ++} ++ ++bool LoongArchDAGToDAGISel::selectIntAddrSImm10Lsl2(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; ++ ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 2)) ++ return true; ++ ++ return selectAddrDefault(Addr, Base, Offset); ++} ++ ++bool LoongArchDAGToDAGISel::selectIntAddrSImm11Lsl1(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; ++ ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 11, 1)) ++ return true; ++ ++ return selectAddrDefault(Addr, Base, Offset); ++} ++ ++bool LoongArchDAGToDAGISel::selectIntAddrSImm9Lsl3(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; ++ ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 9, 3)) ++ return true; ++ ++ return selectAddrDefault(Addr, Base, Offset); ++} ++ ++bool LoongArchDAGToDAGISel::selectIntAddrSImm14Lsl2(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; ++ ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 14, 2)) ++ return true; ++ ++ return false; ++} ++ ++bool LoongArchDAGToDAGISel::selectIntAddrSImm10Lsl3(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const { ++ if (selectAddrFrameIndex(Addr, Base, Offset)) ++ return true; ++ ++ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 3)) ++ return true; ++ ++ return selectAddrDefault(Addr, Base, Offset); ++} ++ ++// Select constant vector splats. ++// ++// Returns true and sets Imm if: ++// * LSX is enabled ++// * N is a ISD::BUILD_VECTOR representing a constant splat ++bool LoongArchDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm, ++ unsigned MinSizeInBits) const { ++ if (!(Subtarget->hasLSX() || Subtarget->hasLASX())) ++ return false; ++ ++ BuildVectorSDNode *Node = dyn_cast(N); ++ ++ if (!Node) ++ return false; ++ ++ APInt SplatValue, SplatUndef; ++ unsigned SplatBitSize; ++ bool HasAnyUndefs; ++ ++ if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, ++ MinSizeInBits)) ++ return false; ++ ++ Imm = SplatValue; ++ ++ return true; ++} ++ ++// Select constant vector splats. ++// ++// In addition to the requirements of selectVSplat(), this function returns ++// true and sets Imm if: ++// * The splat value is the same width as the elements of the vector ++// * The splat value fits in an integer with the specified signed-ness and ++// width. ++// ++// This function looks through ISD::BITCAST nodes. ++// TODO: This might not be appropriate for big-endian LSX since BITCAST is ++// sometimes a shuffle in big-endian mode. ++// ++// It's worth noting that this function is not used as part of the selection ++// of [v/xv]ldi.[bhwd] since it does not permit using the wrong-typed ++// [v/xv]ldi.[bhwd] instruction to achieve the desired bit pattern. ++// [v/xv]ldi.[bhwd] is selected in LoongArchDAGToDAGISel::selectNode. ++bool LoongArchDAGToDAGISel::selectVSplatCommon(SDValue N, SDValue &Imm, ++ bool Signed, ++ unsigned ImmBitSize) const { ++ APInt ImmValue; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0); ++ ++ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ++ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { ++ ++ if ((Signed && ImmValue.isSignedIntN(ImmBitSize)) || ++ (!Signed && ImmValue.isIntN(ImmBitSize))) { ++ Imm = CurDAG->getTargetConstant(ImmValue, SDLoc(N), EltTy); ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++// Select constant vector splats. ++bool LoongArchDAGToDAGISel::selectVSplatUimm1(SDValue N, SDValue &Imm) const { ++ return selectVSplatCommon(N, Imm, false, 1); ++} ++ ++bool LoongArchDAGToDAGISel::selectVSplatUimm2(SDValue N, SDValue &Imm) const { ++ return selectVSplatCommon(N, Imm, false, 2); ++} ++ ++bool LoongArchDAGToDAGISel::selectVSplatUimm3(SDValue N, SDValue &Imm) const { ++ return selectVSplatCommon(N, Imm, false, 3); ++} ++ ++bool LoongArchDAGToDAGISel::selectVSplatUimm4(SDValue N, SDValue &Imm) const { ++ return selectVSplatCommon(N, Imm, false, 4); ++} ++ ++bool LoongArchDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &Imm) const { ++ return selectVSplatCommon(N, Imm, false, 5); ++} ++ ++bool LoongArchDAGToDAGISel::selectVSplatUimm6(SDValue N, SDValue &Imm) const { ++ return selectVSplatCommon(N, Imm, false, 6); ++} ++ ++bool LoongArchDAGToDAGISel::selectVSplatUimm8(SDValue N, SDValue &Imm) const { ++ return selectVSplatCommon(N, Imm, false, 8); ++} ++ ++bool LoongArchDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &Imm) const { ++ return selectVSplatCommon(N, Imm, true, 5); ++} ++ ++// Select constant vector splats whose value is a power of 2. ++// ++// In addition to the requirements of selectVSplat(), this function returns ++// true and sets Imm if: ++// * The splat value is the same width as the elements of the vector ++// * The splat value is a power of two. ++// ++// This function looks through ISD::BITCAST nodes. ++// TODO: This might not be appropriate for big-endian LSX since BITCAST is ++// sometimes a shuffle in big-endian mode. ++bool LoongArchDAGToDAGISel::selectVSplatUimmPow2(SDValue N, ++ SDValue &Imm) const { ++ APInt ImmValue; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0); ++ ++ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ++ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { ++ int32_t Log2 = ImmValue.exactLogBase2(); ++ ++ if (Log2 != -1) { ++ Imm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++bool LoongArchDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N, ++ SDValue &Imm) const { ++ APInt ImmValue; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0); ++ ++ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ++ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { ++ int32_t Log2 = (~ImmValue).exactLogBase2(); ++ ++ if (Log2 != -1) { ++ Imm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++// Select constant vector splats whose value only has a consecutive sequence ++// of left-most bits set (e.g. 0b11...1100...00). ++// ++// In addition to the requirements of selectVSplat(), this function returns ++// true and sets Imm if: ++// * The splat value is the same width as the elements of the vector ++// * The splat value is a consecutive sequence of left-most bits. ++// ++// This function looks through ISD::BITCAST nodes. ++// TODO: This might not be appropriate for big-endian LSX since BITCAST is ++// sometimes a shuffle in big-endian mode. ++bool LoongArchDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const { ++ APInt ImmValue; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0); ++ ++ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ++ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { ++ // Extract the run of set bits starting with bit zero from the bitwise ++ // inverse of ImmValue, and test that the inverse of this is the same ++ // as the original value. ++ if (ImmValue == ~(~ImmValue & ~(~ImmValue + 1))) { ++ ++ Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N), ++ EltTy); ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++// Select constant vector splats whose value only has a consecutive sequence ++// of right-most bits set (e.g. 0b00...0011...11). ++// ++// In addition to the requirements of selectVSplat(), this function returns ++// true and sets Imm if: ++// * The splat value is the same width as the elements of the vector ++// * The splat value is a consecutive sequence of right-most bits. ++// ++// This function looks through ISD::BITCAST nodes. ++// TODO: This might not be appropriate for big-endian LSX since BITCAST is ++// sometimes a shuffle in big-endian mode. ++bool LoongArchDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const { ++ APInt ImmValue; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0); ++ ++ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && ++ ImmValue.getBitWidth() == EltTy.getSizeInBits()) { ++ // Extract the run of set bits starting with bit zero, and test that the ++ // result is the same as the original value ++ if (ImmValue == (ImmValue & ~(ImmValue + 1))) { ++ Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N), ++ EltTy); ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++bool LoongArchDAGToDAGISel::trySelect(SDNode *Node) { ++ unsigned Opcode = Node->getOpcode(); ++ SDLoc DL(Node); ++ ++ /// ++ // Instruction Selection not handled by the auto-generated ++ // tablegen selection should be handled here. ++ /// ++ switch(Opcode) { ++ default: break; ++ case ISD::ConstantFP: { ++ ConstantFPSDNode *CN = dyn_cast(Node); ++ if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { ++ if (Subtarget->is64Bit()) { ++ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ++ LoongArch::ZERO_64, MVT::i64); ++ ReplaceNode(Node, ++ CurDAG->getMachineNode(LoongArch::MOVGR2FR_D, DL, MVT::f64, Zero)); ++ } ++ return true; ++ } ++ break; ++ } ++ ++ case ISD::Constant: { ++ const ConstantSDNode *CN = dyn_cast(Node); ++ MVT VT = CN->getSimpleValueType(0); ++ int64_t Imm = CN->getSExtValue(); ++ LoongArchAnalyzeImmediate::InstSeq Seq = ++ LoongArchAnalyzeImmediate::generateInstSeq(Imm, VT == MVT::i64); ++ SDLoc DL(CN); ++ SDNode *Result = nullptr; ++ SDValue SrcReg = CurDAG->getRegister( ++ VT == MVT::i64 ? LoongArch::ZERO_64 : LoongArch::ZERO, VT); ++ ++ // The instructions in the sequence are handled here. ++ for (LoongArchAnalyzeImmediate::Inst &Inst : Seq) { ++ SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, VT); ++ if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32) ++ Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SDImm); ++ else ++ Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SDImm); ++ SrcReg = SDValue(Result, 0); ++ } ++ ReplaceNode(Node, Result); ++ return true; ++ } ++ ++ case ISD::BUILD_VECTOR: { ++ // Select appropriate vldi.[bhwd] instructions for constant splats of ++ // 128-bit when LSX is enabled. Select appropriate xvldi.[bhwd] instructions ++ // for constant splats of 256-bit when LASX is enabled. Fixup any register ++ // class mismatches that occur as a result. ++ // ++ // This allows the compiler to use a wider range of immediates than would ++ // otherwise be allowed. If, for example, v4i32 could only use [v/xv]ldi.h ++ // then it would not be possible to load { 0x01010101, 0x01010101, ++ // 0x01010101, 0x01010101 } without using a constant pool. This would be ++ // sub-optimal when // '[v/xv]ldi.b vd, 1' is capable of producing that ++ // bit-pattern in the same set/ of registers. Similarly, [v/xv]ldi.h isn't ++ // capable of producing { 0x00000000, 0x00000001, 0x00000000, 0x00000001 } ++ // but '[v/xv]ldi.d vd, 1' can. ++ ++ const LoongArchABIInfo &ABI = ++ static_cast(TM).getABI(); ++ ++ BuildVectorSDNode *BVN = cast(Node); ++ APInt SplatValue, SplatUndef; ++ unsigned SplatBitSize; ++ bool HasAnyUndefs; ++ unsigned LdiOp; ++ EVT ResVecTy = BVN->getValueType(0); ++ EVT ViaVecTy; ++ ++ if ((!Subtarget->hasLSX() || !BVN->getValueType(0).is128BitVector()) && ++ (!Subtarget->hasLASX() || !BVN->getValueType(0).is256BitVector())) ++ return false; ++ ++ if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, ++ HasAnyUndefs, 8)) ++ return false; ++ ++ bool IsLASX256 = BVN->getValueType(0).is256BitVector(); ++ ++ switch (SplatBitSize) { ++ default: ++ return false; ++ case 8: ++ LdiOp = IsLASX256 ? LoongArch::XVLDI_B : LoongArch::VLDI_B; ++ ViaVecTy = IsLASX256 ? MVT::v32i8 : MVT::v16i8; ++ break; ++ case 16: ++ LdiOp = IsLASX256 ? LoongArch::XVLDI_H : LoongArch::VLDI_H; ++ ViaVecTy = IsLASX256 ? MVT::v16i16 : MVT::v8i16; ++ break; ++ case 32: ++ LdiOp = IsLASX256 ? LoongArch::XVLDI_W : LoongArch::VLDI_W; ++ ViaVecTy = IsLASX256 ? MVT::v8i32 : MVT::v4i32; ++ break; ++ case 64: ++ LdiOp = IsLASX256 ? LoongArch::XVLDI_D : LoongArch::VLDI_D; ++ ViaVecTy = IsLASX256 ? MVT::v4i64 : MVT::v2i64; ++ break; ++ } ++ ++ SDNode *Res; ++ ++ // If we have a signed 13 bit integer, we can splat it directly. ++ // ++ // If we have something bigger we can synthesize the value into a GPR and ++ // splat from there. ++ if (SplatValue.isSignedIntN(10)) { ++ SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL, ++ ViaVecTy.getVectorElementType()); ++ ++ Res = CurDAG->getMachineNode(LdiOp, DL, ViaVecTy, Imm); ++ } else if (SplatValue.isSignedIntN(12)) { ++ bool Is32BitSplat = SplatBitSize < 64 ? true : false; ++ const unsigned ADDIOp = ++ Is32BitSplat ? LoongArch::ADDI_W : LoongArch::ADDI_D; ++ const MVT SplatMVT = Is32BitSplat ? MVT::i32 : MVT::i64; ++ SDValue ZeroVal = CurDAG->getRegister( ++ Is32BitSplat ? LoongArch::ZERO : LoongArch::ZERO_64, SplatMVT); ++ ++ const unsigned FILLOp = ++ (SplatBitSize == 16) ++ ? (IsLASX256 ? LoongArch::XVREPLGR2VR_H : LoongArch::VREPLGR2VR_H) ++ : (SplatBitSize == 32 ++ ? (IsLASX256 ? LoongArch::XVREPLGR2VR_W ++ : LoongArch::VREPLGR2VR_W) ++ : (SplatBitSize == 64 ++ ? (IsLASX256 ? LoongArch::XVREPLGR2VR_D ++ : LoongArch::VREPLGR2VR_D) ++ : 0)); ++ ++ assert(FILLOp != 0 && "Unknown FILL Op for splat synthesis!"); ++ ++ short Lo = SplatValue.getLoBits(12).getSExtValue(); ++ SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, SplatMVT); ++ ++ Res = CurDAG->getMachineNode(ADDIOp, DL, SplatMVT, ZeroVal, LoVal); ++ Res = CurDAG->getMachineNode(FILLOp, DL, ViaVecTy, SDValue(Res, 0)); ++ } else if (SplatValue.isSignedIntN(16) && SplatBitSize == 16) { ++ const unsigned Lo = SplatValue.getLoBits(12).getZExtValue(); ++ const unsigned Hi = SplatValue.lshr(12).getLoBits(4).getZExtValue(); ++ SDValue ZeroVal = CurDAG->getRegister(LoongArch::ZERO, MVT::i32); ++ ++ SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32); ++ SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32); ++ if (Hi) ++ Res = CurDAG->getMachineNode(LoongArch::LU12I_W32, DL, MVT::i32, HiVal); ++ ++ if (Lo) ++ Res = CurDAG->getMachineNode(LoongArch::ORI32, DL, MVT::i32, ++ Hi ? SDValue(Res, 0) : ZeroVal, LoVal); ++ ++ assert((Hi || Lo) && "Zero case reached 32 bit case splat synthesis!"); ++ const unsigned FILLOp = ++ IsLASX256 ? LoongArch::XVREPLGR2VR_H : LoongArch::VREPLGR2VR_H; ++ EVT FILLTy = IsLASX256 ? MVT::v16i16 : MVT::v8i16; ++ Res = CurDAG->getMachineNode(FILLOp, DL, FILLTy, SDValue(Res, 0)); ++ } else if (SplatValue.isSignedIntN(32) && SplatBitSize == 32) { ++ // Only handle the cases where the splat size agrees with the size ++ // of the SplatValue here. ++ const unsigned Lo = SplatValue.getLoBits(12).getZExtValue(); ++ const unsigned Hi = SplatValue.lshr(12).getLoBits(20).getZExtValue(); ++ SDValue ZeroVal = CurDAG->getRegister(LoongArch::ZERO, MVT::i32); ++ ++ SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32); ++ SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32); ++ if (Hi) ++ Res = CurDAG->getMachineNode(LoongArch::LU12I_W32, DL, MVT::i32, HiVal); ++ ++ if (Lo) ++ Res = CurDAG->getMachineNode(LoongArch::ORI32, DL, MVT::i32, ++ Hi ? SDValue(Res, 0) : ZeroVal, LoVal); ++ ++ assert((Hi || Lo) && "Zero case reached 32 bit case splat synthesis!"); ++ const unsigned FILLOp = ++ IsLASX256 ? LoongArch::XVREPLGR2VR_W : LoongArch::VREPLGR2VR_W; ++ EVT FILLTy = IsLASX256 ? MVT::v8i32 : MVT::v4i32; ++ Res = CurDAG->getMachineNode(FILLOp, DL, FILLTy, SDValue(Res, 0)); ++ ++ } else if ((SplatValue.isSignedIntN(32) && SplatBitSize == 64 && ++ ABI.IsLP64D()) || ++ (SplatValue.isSignedIntN(64))) { ++ ++ int64_t Imm = SplatValue.getSExtValue(); ++ LoongArchAnalyzeImmediate::InstSeq Seq = ++ LoongArchAnalyzeImmediate::generateInstSeq(Imm, true); ++ SDValue SrcReg = CurDAG->getRegister(LoongArch::ZERO_64, MVT::i64); ++ ++ for (LoongArchAnalyzeImmediate::Inst &Inst : Seq) { ++ SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, MVT::i64); ++ if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32) ++ Res = CurDAG->getMachineNode(Inst.Opc, DL, MVT::i64, SDImm); ++ else ++ Res = CurDAG->getMachineNode(Inst.Opc, DL, MVT::i64, SrcReg, SDImm); ++ SrcReg = SDValue(Res, 0); ++ } ++ ++ const unsigned FILLOp = ++ IsLASX256 ? LoongArch::XVREPLGR2VR_D : LoongArch::VREPLGR2VR_D; ++ EVT FILLTy = IsLASX256 ? MVT::v4i64 : MVT::v2i64; ++ Res = CurDAG->getMachineNode(FILLOp, DL, FILLTy, SDValue(Res, 0)); ++ ++ } else ++ return false; ++ ++ if (ResVecTy != ViaVecTy) { ++ // If LdiOp is writing to a different register class to ResVecTy, then ++ // fix it up here. This COPY_TO_REGCLASS should never cause a move.v ++ // since the source and destination register sets contain the same ++ // registers. ++ const TargetLowering *TLI = getTargetLowering(); ++ MVT ResVecTySimple = ResVecTy.getSimpleVT(); ++ const TargetRegisterClass *RC = TLI->getRegClassFor(ResVecTySimple); ++ Res = CurDAG->getMachineNode( ++ LoongArch::COPY_TO_REGCLASS, DL, ResVecTy, SDValue(Res, 0), ++ CurDAG->getTargetConstant(RC->getID(), DL, MVT::i32)); ++ } ++ ++ ReplaceNode(Node, Res); ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++/// Select instructions not customized! Used for ++/// expanded, promoted and normal instructions ++void LoongArchDAGToDAGISel::Select(SDNode *Node) { ++ // If we have a custom node, we already have selected! ++ if (Node->isMachineOpcode()) { ++ LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); ++ Node->setNodeId(-1); ++ return; ++ } ++ ++ // See if subclasses can handle this node. ++ if (trySelect(Node)) ++ return; ++ ++ // Select the default instruction ++ SelectCode(Node); ++} ++ ++bool LoongArchDAGToDAGISel:: ++SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, ++ std::vector &OutOps) { ++ SDValue Base, Offset; ++ ++ switch(ConstraintID) { ++ default: ++ llvm_unreachable("Unexpected asm memory constraint"); ++ // All memory constraints can at least accept raw pointers. ++ case InlineAsm::Constraint_i: ++ OutOps.push_back(Op); ++ OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); ++ return false; ++ case InlineAsm::Constraint_m: ++ if (selectAddrRegImm12(Op, Base, Offset)) { ++ OutOps.push_back(Base); ++ OutOps.push_back(Offset); ++ return false; ++ } ++ OutOps.push_back(Op); ++ OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); ++ return false; ++ case InlineAsm::Constraint_R: ++ if (selectAddrRegImm12(Op, Base, Offset)) { ++ OutOps.push_back(Base); ++ OutOps.push_back(Offset); ++ return false; ++ } ++ OutOps.push_back(Op); ++ OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); ++ return false; ++ case InlineAsm::Constraint_ZC: ++ if (selectIntAddrSImm14Lsl2(Op, Base, Offset)) { ++ OutOps.push_back(Base); ++ OutOps.push_back(Offset); ++ return false; ++ } ++ OutOps.push_back(Op); ++ OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); ++ return false; ++ case InlineAsm::Constraint_ZB: ++ OutOps.push_back(Op); ++ OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); ++ return false; ++ } ++ return true; ++} ++ ++FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM, ++ CodeGenOpt::Level OptLevel) { ++ return new LoongArchDAGToDAGISel(TM, OptLevel); ++} +diff --git a/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +new file mode 100644 +index 00000000..9309c256 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +@@ -0,0 +1,147 @@ ++//===---- LoongArchISelDAGToDAG.h - A Dag to Dag Inst Selector for LoongArch --------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines an instruction selector for the LoongArch target. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H ++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H ++ ++#include "LoongArch.h" ++#include "LoongArchSubtarget.h" ++#include "LoongArchTargetMachine.h" ++#include "llvm/CodeGen/SelectionDAGISel.h" ++ ++//===----------------------------------------------------------------------===// ++// Instruction Selector Implementation ++//===----------------------------------------------------------------------===// ++ ++//===----------------------------------------------------------------------===// ++// LoongArchDAGToDAGISel - LoongArch specific code to select LoongArch machine ++// instructions for SelectionDAG operations. ++//===----------------------------------------------------------------------===// ++namespace llvm { ++ ++class LoongArchDAGToDAGISel : public SelectionDAGISel { ++public: ++ explicit LoongArchDAGToDAGISel(LoongArchTargetMachine &TM, CodeGenOpt::Level OL) ++ : SelectionDAGISel(TM, OL), Subtarget(nullptr) {} ++ ++ // Pass Name ++ StringRef getPassName() const override { ++ return "LoongArch DAG->DAG Pattern Instruction Selection"; ++ } ++ ++ bool runOnMachineFunction(MachineFunction &MF) override; ++ ++ void getAnalysisUsage(AnalysisUsage &AU) const override; ++ ++private: ++ /// Keep a pointer to the LoongArchSubtarget around so that we can make the right ++ /// decision when generating code for different targets. ++ const LoongArchSubtarget *Subtarget; ++ // Include the pieces autogenerated from the target description. ++ #include "LoongArchGenDAGISel.inc" ++ ++ bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const; ++ ++ bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset, ++ unsigned OffsetBits, ++ unsigned ShiftAmount) const; ++ ++ // Complex Pattern. ++ /// (reg + imm). ++ bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const; ++ ++ /// Fall back on this function if all else fails. ++ bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const; ++ ++ /// Match integer address pattern. ++ bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const; ++ ++ bool selectAddrRegImm12(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const; ++ ++ /// Match addr+simm12 and addr ++ bool selectIntAddrSImm12(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const; ++ ++ bool selectIntAddrSImm10(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const; ++ ++ bool selectIntAddrSImm10Lsl1(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const; ++ ++ bool selectIntAddrSImm10Lsl2(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const; ++ ++ bool selectIntAddrSImm9Lsl3(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const; ++ ++ bool selectIntAddrSImm11Lsl1(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const; ++ ++ bool selectIntAddrSImm14Lsl2(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const; ++ ++ bool selectIntAddrSImm10Lsl3(SDValue Addr, SDValue &Base, ++ SDValue &Offset) const; ++ ++ /// Select constant vector splats. ++ bool selectVSplat(SDNode *N, APInt &Imm, unsigned MinSizeInBits) const; ++ /// Select constant vector splats whose value fits in a given integer. ++ bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed, ++ unsigned ImmBitSize) const; ++ /// Select constant vector splats whose value fits in a uimm1. ++ bool selectVSplatUimm1(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value fits in a uimm2. ++ bool selectVSplatUimm2(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value fits in a uimm3. ++ bool selectVSplatUimm3(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value fits in a uimm4. ++ bool selectVSplatUimm4(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value fits in a uimm5. ++ bool selectVSplatUimm5(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value fits in a uimm6. ++ bool selectVSplatUimm6(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value fits in a uimm8. ++ bool selectVSplatUimm8(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value fits in a simm5. ++ bool selectVSplatSimm5(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value is a power of 2. ++ bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value is the inverse of a ++ /// power of 2. ++ bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value is a run of set bits ++ /// ending at the most significant bit ++ bool selectVSplatMaskL(SDValue N, SDValue &Imm) const; ++ /// Select constant vector splats whose value is a run of set bits ++ /// starting at bit zero. ++ bool selectVSplatMaskR(SDValue N, SDValue &Imm) const; ++ ++ void Select(SDNode *N) override; ++ ++ bool trySelect(SDNode *Node); ++ ++ // getImm - Return a target constant with the specified value. ++ inline SDValue getImm(const SDNode *Node, uint64_t Imm) { ++ return CurDAG->getTargetConstant(Imm, SDLoc(Node), Node->getValueType(0)); ++ } ++ ++ bool SelectInlineAsmMemoryOperand(const SDValue &Op, ++ unsigned ConstraintID, ++ std::vector &OutOps) override; ++}; ++ ++FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM, ++ CodeGenOpt::Level OptLevel); ++} ++ ++#endif +diff --git a/lib/Target/LoongArch/LoongArchISelLowering.cpp b/lib/Target/LoongArch/LoongArchISelLowering.cpp +new file mode 100644 +index 00000000..4e60236c +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -0,0 +1,8204 @@ ++//===- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines the interfaces that LoongArch uses to lower LLVM code into a ++// selection DAG. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchISelLowering.h" ++#include "MCTargetDesc/LoongArchBaseInfo.h" ++#include "MCTargetDesc/LoongArchInstPrinter.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "LoongArchCCState.h" ++#include "LoongArchInstrInfo.h" ++#include "LoongArchMachineFunction.h" ++#include "LoongArchRegisterInfo.h" ++#include "LoongArchSubtarget.h" ++#include "LoongArchTargetMachine.h" ++#include "LoongArchTargetObjectFile.h" ++#include "llvm/ADT/APFloat.h" ++#include "llvm/ADT/APInt.h" ++#include "llvm/ADT/ArrayRef.h" ++#include "llvm/ADT/STLExtras.h" ++#include "llvm/ADT/SmallVector.h" ++#include "llvm/ADT/Statistic.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/ADT/StringSwitch.h" ++#include "llvm/ADT/Triple.h" ++#include "llvm/CodeGen/CallingConvLower.h" ++#include "llvm/CodeGen/FunctionLoweringInfo.h" ++#include "llvm/CodeGen/ISDOpcodes.h" ++#include "llvm/CodeGen/MachineBasicBlock.h" ++#include "llvm/CodeGen/MachineFrameInfo.h" ++#include "llvm/CodeGen/MachineFunction.h" ++#include "llvm/CodeGen/MachineInstr.h" ++#include "llvm/CodeGen/MachineInstrBuilder.h" ++#include "llvm/CodeGen/MachineJumpTableInfo.h" ++#include "llvm/CodeGen/MachineMemOperand.h" ++#include "llvm/CodeGen/MachineOperand.h" ++#include "llvm/CodeGen/MachineRegisterInfo.h" ++#include "llvm/CodeGen/RuntimeLibcalls.h" ++#include "llvm/CodeGen/SelectionDAG.h" ++#include "llvm/CodeGen/SelectionDAGNodes.h" ++#include "llvm/CodeGen/TargetFrameLowering.h" ++#include "llvm/CodeGen/TargetInstrInfo.h" ++#include "llvm/CodeGen/TargetRegisterInfo.h" ++#include "llvm/CodeGen/TargetSubtargetInfo.h" ++#include "llvm/CodeGen/ValueTypes.h" ++#include "llvm/IR/CallingConv.h" ++#include "llvm/IR/Constants.h" ++#include "llvm/IR/DataLayout.h" ++#include "llvm/IR/DebugLoc.h" ++#include "llvm/IR/DerivedTypes.h" ++#include "llvm/IR/Function.h" ++#include "llvm/IR/GlobalValue.h" ++#include "llvm/IR/Intrinsics.h" ++#include "llvm/IR/IntrinsicsLoongArch.h" ++#include "llvm/IR/Type.h" ++#include "llvm/IR/Value.h" ++#include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCRegisterInfo.h" ++#include "llvm/Support/Casting.h" ++#include "llvm/Support/CodeGen.h" ++#include "llvm/Support/CommandLine.h" ++#include "llvm/Support/Compiler.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/MachineValueType.h" ++#include "llvm/Support/MathExtras.h" ++#include "llvm/Support/raw_ostream.h" ++#include "llvm/Target/TargetMachine.h" ++#include "llvm/Target/TargetOptions.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++using namespace llvm; ++ ++#define DEBUG_TYPE "loongarch-lower" ++ ++STATISTIC(NumTailCalls, "Number of tail calls"); ++ ++static cl::opt ++NoZeroDivCheck("mnocheck-zero-division", cl::Hidden, ++ cl::desc("LoongArch: Don't trap on integer division by zero."), ++ cl::init(false)); ++ ++static const MCPhysReg LoongArch64DPRegs[8] = { ++ LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, ++ LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64 ++}; ++ ++// If I is a shifted mask, set the size (SMSize) and the first bit of the ++// mask (SMLsb), and return true. ++// For example, if I is 0x003ff800, (SMLsb, SMSize) = (11, 11). ++static bool isShiftedMask(uint64_t I, uint64_t &SMLsb, uint64_t &SMSize) { ++ if (!isShiftedMask_64(I)) ++ return false; ++ ++ SMSize = countPopulation(I); ++ SMLsb = countTrailingZeros(I); ++ return true; ++} ++ ++SDValue LoongArchTargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty, ++ SelectionDAG &DAG, ++ unsigned Flag) const { ++ return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag); ++} ++ ++SDValue LoongArchTargetLowering::getTargetNode(ExternalSymbolSDNode *N, EVT Ty, ++ SelectionDAG &DAG, ++ unsigned Flag) const { ++ return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag); ++} ++ ++SDValue LoongArchTargetLowering::getTargetNode(BlockAddressSDNode *N, EVT Ty, ++ SelectionDAG &DAG, ++ unsigned Flag) const { ++ return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), Flag); ++} ++ ++SDValue LoongArchTargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty, ++ SelectionDAG &DAG, ++ unsigned Flag) const { ++ return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); ++} ++ ++SDValue LoongArchTargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty, ++ SelectionDAG &DAG, ++ unsigned Flag) const { ++ return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), ++ N->getOffset(), Flag); ++} ++ ++const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { ++ switch ((LoongArchISD::NodeType)Opcode) { ++ case LoongArchISD::FIRST_NUMBER: break; ++ case LoongArchISD::JmpLink: return "LoongArchISD::JmpLink"; ++ case LoongArchISD::TailCall: return "LoongArchISD::TailCall"; ++ case LoongArchISD::GlobalAddress: return "LoongArchISD::GlobalAddress"; ++ case LoongArchISD::Ret: return "LoongArchISD::Ret"; ++ case LoongArchISD::ERet: return "LoongArchISD::ERet"; ++ case LoongArchISD::EH_RETURN: return "LoongArchISD::EH_RETURN"; ++ case LoongArchISD::FPBrcond: return "LoongArchISD::FPBrcond"; ++ case LoongArchISD::FPCmp: return "LoongArchISD::FPCmp"; ++ case LoongArchISD::CMovFP_T: return "LoongArchISD::CMovFP_T"; ++ case LoongArchISD::CMovFP_F: return "LoongArchISD::CMovFP_F"; ++ case LoongArchISD::TruncIntFP: return "LoongArchISD::TruncIntFP"; ++ case LoongArchISD::DBAR: return "LoongArchISD::DBAR"; ++ case LoongArchISD::BSTRPICK: return "LoongArchISD::BSTRPICK"; ++ case LoongArchISD::BSTRINS: return "LoongArchISD::BSTRINS"; ++ case LoongArchISD::VALL_ZERO: ++ return "LoongArchISD::VALL_ZERO"; ++ case LoongArchISD::VANY_ZERO: ++ return "LoongArchISD::VANY_ZERO"; ++ case LoongArchISD::VALL_NONZERO: ++ return "LoongArchISD::VALL_NONZERO"; ++ case LoongArchISD::VANY_NONZERO: ++ return "LoongArchISD::VANY_NONZERO"; ++ case LoongArchISD::VEXTRACT_SEXT_ELT: ++ return "LoongArchISD::VEXTRACT_SEXT_ELT"; ++ case LoongArchISD::VEXTRACT_ZEXT_ELT: ++ return "LoongArchISD::VEXTRACT_ZEXT_ELT"; ++ case LoongArchISD::VNOR: ++ return "LoongArchISD::VNOR"; ++ case LoongArchISD::VSHF: ++ return "LoongArchISD::VSHF"; ++ case LoongArchISD::SHF: ++ return "LoongArchISD::SHF"; ++ case LoongArchISD::VPACKEV: ++ return "LoongArchISD::VPACKEV"; ++ case LoongArchISD::VPACKOD: ++ return "LoongArchISD::VPACKOD"; ++ case LoongArchISD::VILVH: ++ return "LoongArchISD::VILVH"; ++ case LoongArchISD::VILVL: ++ return "LoongArchISD::VILVL"; ++ case LoongArchISD::VPICKEV: ++ return "LoongArchISD::VPICKEV"; ++ case LoongArchISD::VPICKOD: ++ return "LoongArchISD::VPICKOD"; ++ case LoongArchISD::INSVE: ++ return "LoongArchISD::INSVE"; ++ case LoongArchISD::VROR: ++ return "LoongArchISD::VROR"; ++ case LoongArchISD::VRORI: ++ return "LoongArchISD::VRORI"; ++ case LoongArchISD::XVBROADCAST: ++ return "LoongArchISD::XVBROADCAST"; ++ case LoongArchISD::VBROADCAST: ++ return "LoongArchISD::VBROADCAST"; ++ case LoongArchISD::VABSD: ++ return "LoongArchISD::VABSD"; ++ case LoongArchISD::UVABSD: ++ return "LoongArchISD::UVABSD"; ++ case LoongArchISD::XVPICKVE: ++ return "LoongArchISD::XVPICKVE"; ++ case LoongArchISD::XVPERMI: ++ return "LoongArchISD::XVPERMI"; ++ case LoongArchISD::XVSHUF4I: ++ return "LoongArchISD::XVSHUF4I"; ++ case LoongArchISD::REVBD: ++ return "LoongArchISD::REVBD"; ++ case LoongArchISD::FSEL: ++ return "LoongArchISD::FSEL"; ++ } ++ return nullptr; ++} ++ ++LoongArchTargetLowering::LoongArchTargetLowering(const LoongArchTargetMachine &TM, ++ const LoongArchSubtarget &STI) ++ : TargetLowering(TM), Subtarget(STI), ABI(TM.getABI()) { ++ // Set up the register classes ++ addRegisterClass(MVT::i32, &LoongArch::GPR32RegClass); ++ ++ if (Subtarget.is64Bit()) ++ addRegisterClass(MVT::i64, &LoongArch::GPR64RegClass); ++ ++ // LoongArch does not have i1 type, so use i32 for ++ // setcc operations results (slt, sgt, ...). ++ setBooleanContents(ZeroOrOneBooleanContent); ++ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); ++ ++ // Load extented operations for i1 types must be promoted ++ for (MVT VT : MVT::integer_valuetypes()) { ++ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); ++ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); ++ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); ++ } ++ ++ // LoongArch doesn't have extending float->double load/store. Set LoadExtAction ++ // for f32, f16 ++ for (MVT VT : MVT::fp_valuetypes()) { ++ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); ++ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); ++ } ++ ++ // Set LoadExtAction for f16 vectors to Expand ++ for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { ++ MVT F16VT = MVT::getVectorVT(MVT::f16, VT.getVectorNumElements()); ++ if (F16VT.isValid()) ++ setLoadExtAction(ISD::EXTLOAD, VT, F16VT, Expand); ++ } ++ ++ setTruncStoreAction(MVT::f32, MVT::f16, Expand); ++ setTruncStoreAction(MVT::f64, MVT::f16, Expand); ++ ++ setTruncStoreAction(MVT::f64, MVT::f32, Expand); ++ ++ // Used by legalize types to correctly generate the setcc result. ++ // Without this, every float setcc comes with a AND/OR with the result, ++ // we don't want this, since the fpcmp result goes to a flag register, ++ // which is used implicitly by brcond and select operations. ++ AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32); ++ ++ // LoongArch Custom Operations ++ setOperationAction(ISD::BR_JT, MVT::Other, Expand); ++ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); ++ setOperationAction(ISD::BlockAddress, MVT::i32, Custom); ++ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); ++ setOperationAction(ISD::JumpTable, MVT::i32, Custom); ++ setOperationAction(ISD::ConstantPool, MVT::i32, Custom); ++ setOperationAction(ISD::SELECT, MVT::f32, Custom); ++ setOperationAction(ISD::SELECT, MVT::f64, Custom); ++ setOperationAction(ISD::SELECT, MVT::i32, Custom); ++ setOperationAction(ISD::SETCC, MVT::f32, Custom); ++ setOperationAction(ISD::SETCC, MVT::f64, Custom); ++ setOperationAction(ISD::BRCOND, MVT::Other, Custom); ++ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); ++ ++ if (Subtarget.is64Bit()) { ++ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); ++ setOperationAction(ISD::BlockAddress, MVT::i64, Custom); ++ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); ++ setOperationAction(ISD::JumpTable, MVT::i64, Custom); ++ setOperationAction(ISD::ConstantPool, MVT::i64, Custom); ++ setOperationAction(ISD::SELECT, MVT::i64, Custom); ++ setOperationAction(ISD::LOAD, MVT::i64, Legal); ++ setOperationAction(ISD::STORE, MVT::i64, Legal); ++ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); ++ setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); ++ setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); ++ setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); ++ } ++ ++ if (!Subtarget.is64Bit()) { ++ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); ++ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); ++ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); ++ } ++ ++ setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); ++ if (Subtarget.is64Bit()) ++ setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); ++ ++ setOperationAction(ISD::SDIV, MVT::i32, Expand); ++ setOperationAction(ISD::SREM, MVT::i32, Expand); ++ setOperationAction(ISD::UDIV, MVT::i32, Expand); ++ setOperationAction(ISD::UREM, MVT::i32, Expand); ++ setOperationAction(ISD::SDIV, MVT::i64, Expand); ++ setOperationAction(ISD::SREM, MVT::i64, Expand); ++ setOperationAction(ISD::UDIV, MVT::i64, Expand); ++ setOperationAction(ISD::UREM, MVT::i64, Expand); ++ ++ // Operations not directly supported by LoongArch. ++ setOperationAction(ISD::BR_CC, MVT::f32, Expand); ++ setOperationAction(ISD::BR_CC, MVT::f64, Expand); ++ setOperationAction(ISD::BR_CC, MVT::i32, Expand); ++ setOperationAction(ISD::BR_CC, MVT::i64, Expand); ++ setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); ++ setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); ++ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); ++ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); ++ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); ++ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); ++ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); ++ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); ++ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); ++ setOperationAction(ISD::CTPOP, MVT::i32, Expand); ++ setOperationAction(ISD::CTPOP, MVT::i64, Expand); ++ setOperationAction(ISD::ROTL, MVT::i32, Expand); ++ setOperationAction(ISD::ROTL, MVT::i64, Expand); ++ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); ++ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); ++ ++ setOperationAction(ISD::FSIN, MVT::f32, Expand); ++ setOperationAction(ISD::FSIN, MVT::f64, Expand); ++ setOperationAction(ISD::FCOS, MVT::f32, Expand); ++ setOperationAction(ISD::FCOS, MVT::f64, Expand); ++ setOperationAction(ISD::FSINCOS, MVT::f32, Expand); ++ setOperationAction(ISD::FSINCOS, MVT::f64, Expand); ++ setOperationAction(ISD::FPOW, MVT::f32, Expand); ++ setOperationAction(ISD::FPOW, MVT::f64, Expand); ++ setOperationAction(ISD::FLOG, MVT::f32, Expand); ++ setOperationAction(ISD::FRINT, MVT::f32, Legal); ++ setOperationAction(ISD::FRINT, MVT::f64, Legal); ++ ++ setOperationAction(ISD::FLOG10, MVT::f32, Expand); ++ setOperationAction(ISD::FEXP, MVT::f32, Expand); ++ setOperationAction(ISD::FMA, MVT::f32, Legal); ++ setOperationAction(ISD::FMA, MVT::f64, Legal); ++ setOperationAction(ISD::FREM, MVT::f32, Expand); ++ setOperationAction(ISD::FREM, MVT::f64, Expand); ++ ++ setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); ++ setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); ++ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); ++ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); ++ ++ // Lower f16 conversion operations into library calls ++ setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); ++ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); ++ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); ++ setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); ++ ++ setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); ++ ++ setOperationAction(ISD::VASTART, MVT::Other, Custom); ++ setOperationAction(ISD::VAARG, MVT::Other, Custom); ++ setOperationAction(ISD::VACOPY, MVT::Other, Expand); ++ setOperationAction(ISD::VAEND, MVT::Other, Expand); ++ ++ // Use the default for now ++ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); ++ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); ++ ++ if (!Subtarget.is64Bit()) { ++ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); ++ setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); ++ } ++ ++ if (Subtarget.is64Bit()) { ++ setLoadExtAction(ISD::EXTLOAD, MVT::i64, MVT::i32, Custom); ++ setTruncStoreAction(MVT::i64, MVT::i32, Custom); ++ } ++ ++ setOperationAction(ISD::TRAP, MVT::Other, Legal); ++ setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); ++ setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); ++ ++ setTargetDAGCombine(ISD::SELECT); ++ setTargetDAGCombine(ISD::AND); ++ setTargetDAGCombine(ISD::OR); ++ setTargetDAGCombine(ISD::AssertZext); ++ setTargetDAGCombine(ISD::SHL); ++ setTargetDAGCombine(ISD::SIGN_EXTEND); ++ setTargetDAGCombine(ISD::ZERO_EXTEND); ++ setTargetDAGCombine(ISD::ADD); ++ setTargetDAGCombine(ISD::SUB); ++ setTargetDAGCombine(ISD::MUL); ++ setTargetDAGCombine(ISD::SRL); ++ setTargetDAGCombine(ISD::SRA); ++ ++ if (ABI.IsILP32D() || ABI.IsILP32F() || ABI.IsILP32S()) { ++ // TODO ++ llvm_unreachable("Unimplemented ABI"); ++ } ++ ++ if (Subtarget.hasLSX() || Subtarget.hasLASX()) { ++ // Expand all truncating stores and extending loads. ++ for (MVT VT0 : MVT::vector_valuetypes()) { ++ for (MVT VT1 : MVT::vector_valuetypes()) { ++ setTruncStoreAction(VT0, VT1, Expand); ++ setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); ++ setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); ++ setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand); ++ } ++ } ++ } ++ ++ if (Subtarget.hasLSX()) { ++ addLSXIntType(MVT::v16i8, &LoongArch::LSX128BRegClass); ++ addLSXIntType(MVT::v8i16, &LoongArch::LSX128HRegClass); ++ addLSXIntType(MVT::v4i32, &LoongArch::LSX128WRegClass); ++ addLSXIntType(MVT::v2i64, &LoongArch::LSX128DRegClass); ++ addLSXFloatType(MVT::v4f32, &LoongArch::LSX128WRegClass); ++ addLSXFloatType(MVT::v2f64, &LoongArch::LSX128DRegClass); ++ ++ // f16 is a storage-only type, always promote it to f32. ++ setOperationAction(ISD::SETCC, MVT::f16, Promote); ++ setOperationAction(ISD::BR_CC, MVT::f16, Promote); ++ setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); ++ setOperationAction(ISD::SELECT, MVT::f16, Promote); ++ setOperationAction(ISD::FADD, MVT::f16, Promote); ++ setOperationAction(ISD::FSUB, MVT::f16, Promote); ++ setOperationAction(ISD::FMUL, MVT::f16, Promote); ++ setOperationAction(ISD::FDIV, MVT::f16, Promote); ++ setOperationAction(ISD::FREM, MVT::f16, Promote); ++ setOperationAction(ISD::FMA, MVT::f16, Promote); ++ setOperationAction(ISD::FNEG, MVT::f16, Promote); ++ setOperationAction(ISD::FABS, MVT::f16, Promote); ++ setOperationAction(ISD::FCEIL, MVT::f16, Promote); ++ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); ++ setOperationAction(ISD::FCOS, MVT::f16, Promote); ++ setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); ++ setOperationAction(ISD::FFLOOR, MVT::f16, Promote); ++ setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); ++ setOperationAction(ISD::FPOW, MVT::f16, Promote); ++ setOperationAction(ISD::FPOWI, MVT::f16, Promote); ++ setOperationAction(ISD::FRINT, MVT::f16, Promote); ++ setOperationAction(ISD::FSIN, MVT::f16, Promote); ++ setOperationAction(ISD::FSINCOS, MVT::f16, Promote); ++ setOperationAction(ISD::FSQRT, MVT::f16, Promote); ++ setOperationAction(ISD::FEXP, MVT::f16, Promote); ++ setOperationAction(ISD::FEXP2, MVT::f16, Promote); ++ setOperationAction(ISD::FLOG, MVT::f16, Promote); ++ setOperationAction(ISD::FLOG2, MVT::f16, Promote); ++ setOperationAction(ISD::FLOG10, MVT::f16, Promote); ++ setOperationAction(ISD::FROUND, MVT::f16, Promote); ++ setOperationAction(ISD::FTRUNC, MVT::f16, Promote); ++ setOperationAction(ISD::FMINNUM, MVT::f16, Promote); ++ setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); ++ setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); ++ setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); ++ ++ setTargetDAGCombine(ISD::AND); ++ setTargetDAGCombine(ISD::OR); ++ setTargetDAGCombine(ISD::SRA); ++ setTargetDAGCombine(ISD::VSELECT); ++ setTargetDAGCombine(ISD::XOR); ++ } ++ ++ if (Subtarget.hasLASX()) { ++ addLASXIntType(MVT::v32i8, &LoongArch::LASX256BRegClass); ++ addLASXIntType(MVT::v16i16, &LoongArch::LASX256HRegClass); ++ addLASXIntType(MVT::v8i32, &LoongArch::LASX256WRegClass); ++ addLASXIntType(MVT::v4i64, &LoongArch::LASX256DRegClass); ++ addLASXFloatType(MVT::v8f32, &LoongArch::LASX256WRegClass); ++ addLASXFloatType(MVT::v4f64, &LoongArch::LASX256DRegClass); ++ ++ // f16 is a storage-only type, always promote it to f32. ++ setOperationAction(ISD::SETCC, MVT::f16, Promote); ++ setOperationAction(ISD::BR_CC, MVT::f16, Promote); ++ setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); ++ setOperationAction(ISD::SELECT, MVT::f16, Promote); ++ setOperationAction(ISD::FADD, MVT::f16, Promote); ++ setOperationAction(ISD::FSUB, MVT::f16, Promote); ++ setOperationAction(ISD::FMUL, MVT::f16, Promote); ++ setOperationAction(ISD::FDIV, MVT::f16, Promote); ++ setOperationAction(ISD::FREM, MVT::f16, Promote); ++ setOperationAction(ISD::FMA, MVT::f16, Promote); ++ setOperationAction(ISD::FNEG, MVT::f16, Promote); ++ setOperationAction(ISD::FABS, MVT::f16, Promote); ++ setOperationAction(ISD::FCEIL, MVT::f16, Promote); ++ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); ++ setOperationAction(ISD::FCOS, MVT::f16, Promote); ++ setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); ++ setOperationAction(ISD::FFLOOR, MVT::f16, Promote); ++ setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); ++ setOperationAction(ISD::FPOW, MVT::f16, Promote); ++ setOperationAction(ISD::FPOWI, MVT::f16, Promote); ++ setOperationAction(ISD::FRINT, MVT::f16, Promote); ++ setOperationAction(ISD::FSIN, MVT::f16, Promote); ++ setOperationAction(ISD::FSINCOS, MVT::f16, Promote); ++ setOperationAction(ISD::FSQRT, MVT::f16, Promote); ++ setOperationAction(ISD::FEXP, MVT::f16, Promote); ++ setOperationAction(ISD::FEXP2, MVT::f16, Promote); ++ setOperationAction(ISD::FLOG, MVT::f16, Promote); ++ setOperationAction(ISD::FLOG2, MVT::f16, Promote); ++ setOperationAction(ISD::FLOG10, MVT::f16, Promote); ++ setOperationAction(ISD::FROUND, MVT::f16, Promote); ++ setOperationAction(ISD::FTRUNC, MVT::f16, Promote); ++ setOperationAction(ISD::FMINNUM, MVT::f16, Promote); ++ setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); ++ setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); ++ setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); ++ ++ setTargetDAGCombine(ISD::AND); ++ setTargetDAGCombine(ISD::OR); ++ setTargetDAGCombine(ISD::SRA); ++ setTargetDAGCombine(ISD::VSELECT); ++ setTargetDAGCombine(ISD::XOR); ++ } ++ ++ if (Subtarget.hasBasicF()) ++ addRegisterClass(MVT::f32, &LoongArch::FGR32RegClass); ++ ++ if (Subtarget.hasBasicD()) ++ addRegisterClass(MVT::f64, &LoongArch::FGR64RegClass); ++ ++ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); ++ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); ++ ++ if (Subtarget.is64Bit()) ++ setOperationAction(ISD::MUL, MVT::i64, Custom); ++ ++ if (Subtarget.is64Bit()) { ++ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); ++ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); ++ setOperationAction(ISD::SDIVREM, MVT::i64, Custom); ++ setOperationAction(ISD::UDIVREM, MVT::i64, Custom); ++ } ++ ++ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); ++ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); ++ ++ setOperationAction(ISD::SDIVREM, MVT::i32, Custom); ++ setOperationAction(ISD::UDIVREM, MVT::i32, Custom); ++ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); ++ setOperationAction(ISD::LOAD, MVT::i32, Legal); ++ setOperationAction(ISD::STORE, MVT::i32, Legal); ++ ++ setTargetDAGCombine(ISD::MUL); ++ ++ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); ++ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); ++ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); ++ ++ // Replace the accumulator-based multiplies with a ++ // three register instruction. ++ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); ++ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); ++ setOperationAction(ISD::MUL, MVT::i32, Legal); ++ setOperationAction(ISD::MULHS, MVT::i32, Legal); ++ setOperationAction(ISD::MULHU, MVT::i32, Legal); ++ ++ // Replace the accumulator-based division/remainder with separate ++ // three register division and remainder instructions. ++ setOperationAction(ISD::SDIVREM, MVT::i32, Expand); ++ setOperationAction(ISD::UDIVREM, MVT::i32, Expand); ++ setOperationAction(ISD::SDIV, MVT::i32, Legal); ++ setOperationAction(ISD::UDIV, MVT::i32, Legal); ++ setOperationAction(ISD::SREM, MVT::i32, Legal); ++ setOperationAction(ISD::UREM, MVT::i32, Legal); ++ ++ // Replace the accumulator-based multiplies with a ++ // three register instruction. ++ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); ++ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); ++ setOperationAction(ISD::MUL, MVT::i64, Legal); ++ setOperationAction(ISD::MULHS, MVT::i64, Legal); ++ setOperationAction(ISD::MULHU, MVT::i64, Legal); ++ ++ // Replace the accumulator-based division/remainder with separate ++ // three register division and remainder instructions. ++ setOperationAction(ISD::SDIVREM, MVT::i64, Expand); ++ setOperationAction(ISD::UDIVREM, MVT::i64, Expand); ++ setOperationAction(ISD::SDIV, MVT::i64, Legal); ++ setOperationAction(ISD::UDIV, MVT::i64, Legal); ++ setOperationAction(ISD::SREM, MVT::i64, Legal); ++ setOperationAction(ISD::UREM, MVT::i64, Legal); ++ ++ MaxGluedStoresPerMemcpy = 4; ++ ++ setMinFunctionAlignment(Subtarget.is64Bit() ? Align(8) : Align(4)); ++ ++ // The arguments on the stack are defined in terms of 4-byte slots on 32bit ++ // target and 8-byte slots on 64bit target. ++ setMinStackArgumentAlignment(Subtarget.is64Bit() ? Align(8) : Align(4)); ++ ++ setStackPointerRegisterToSaveRestore(Subtarget.is64Bit() ? LoongArch::SP_64 ++ : LoongArch::SP); ++ ++ if (Subtarget.hasLASX()) { ++ // = 16*32/2; the smallest memcpy; ++ MaxStoresPerMemcpy = 16; ++ } else if (Subtarget.hasLSX()) { ++ MaxStoresPerMemcpy = 65535; ++ } else { ++ MaxStoresPerMemcpy = 16; ++ } ++ ++ computeRegisterProperties(Subtarget.getRegisterInfo()); ++} ++ ++// Enable LSX support for the given integer type and Register class. ++void LoongArchTargetLowering::addLSXIntType(MVT::SimpleValueType Ty, ++ const TargetRegisterClass *RC) { ++ addRegisterClass(Ty, RC); ++ ++ // Expand all builtin opcodes. ++ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) ++ setOperationAction(Opc, Ty, Expand); ++ ++ setOperationAction(ISD::BITCAST, Ty, Legal); ++ setOperationAction(ISD::LOAD, Ty, Legal); ++ setOperationAction(ISD::STORE, Ty, Legal); ++ setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); ++ setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); ++ setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); ++ setOperationAction(ISD::ABS, Ty, Legal); ++ setOperationAction(ISD::UNDEF, Ty, Legal); ++ setOperationAction(ISD::EXTRACT_SUBVECTOR, Ty, Legal); ++ setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal); ++ ++ if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { ++ setOperationAction(ISD::FP_TO_SINT, Ty, Custom); ++ setOperationAction(ISD::FP_TO_UINT, Ty, Custom); ++ } ++ ++ setOperationAction(ISD::ADD, Ty, Legal); ++ setOperationAction(ISD::AND, Ty, Legal); ++ setOperationAction(ISD::CTLZ, Ty, Legal); ++ setOperationAction(ISD::CTPOP, Ty, Legal); ++ setOperationAction(ISD::MUL, Ty, Legal); ++ setOperationAction(ISD::OR, Ty, Legal); ++ setOperationAction(ISD::SDIV, Ty, Legal); ++ setOperationAction(ISD::SREM, Ty, Legal); ++ setOperationAction(ISD::SHL, Ty, Legal); ++ setOperationAction(ISD::SRA, Ty, Legal); ++ setOperationAction(ISD::SRL, Ty, Legal); ++ setOperationAction(ISD::SUB, Ty, Legal); ++ setOperationAction(ISD::SMAX, Ty, Legal); ++ setOperationAction(ISD::SMIN, Ty, Legal); ++ setOperationAction(ISD::UDIV, Ty, Legal); ++ setOperationAction(ISD::UREM, Ty, Legal); ++ setOperationAction(ISD::UMAX, Ty, Legal); ++ setOperationAction(ISD::UMIN, Ty, Legal); ++ setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); ++ setOperationAction(ISD::VSELECT, Ty, Legal); ++ setOperationAction(ISD::XOR, Ty, Legal); ++ setOperationAction(ISD::MULHS, Ty, Legal); ++ setOperationAction(ISD::MULHU, Ty, Legal); ++ ++ if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { ++ setOperationAction(ISD::SINT_TO_FP, Ty, Custom); ++ setOperationAction(ISD::UINT_TO_FP, Ty, Custom); ++ } ++ ++ setOperationAction(ISD::SETCC, Ty, Legal); ++ setCondCodeAction(ISD::SETNE, Ty, Expand); ++ setCondCodeAction(ISD::SETGE, Ty, Expand); ++ setCondCodeAction(ISD::SETGT, Ty, Expand); ++ setCondCodeAction(ISD::SETUGE, Ty, Expand); ++ setCondCodeAction(ISD::SETUGT, Ty, Expand); ++} ++ ++// Enable LASX support for the given integer type and Register class. ++void LoongArchTargetLowering::addLASXIntType(MVT::SimpleValueType Ty, ++ const TargetRegisterClass *RC) { ++ addRegisterClass(Ty, RC); ++ ++ // Expand all builtin opcodes. ++ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) ++ setOperationAction(Opc, Ty, Expand); ++ ++ // FIXME ++ setOperationAction(ISD::BITCAST, Ty, Legal); ++ setOperationAction(ISD::LOAD, Ty, Legal); ++ setOperationAction(ISD::STORE, Ty, Legal); ++ setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); ++ setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Custom); ++ setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); ++ setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal); ++ setOperationAction(ISD::UNDEF, Ty, Legal); ++ setOperationAction(ISD::UADDSAT, Ty, Legal); ++ setOperationAction(ISD::SADDSAT, Ty, Legal); ++ setOperationAction(ISD::USUBSAT, Ty, Legal); ++ setOperationAction(ISD::SSUBSAT, Ty, Legal); ++ setOperationAction(ISD::ABS, Ty, Legal); ++ ++ setOperationAction(ISD::ADD, Ty, Legal); ++ setOperationAction(ISD::AND, Ty, Legal); ++ setOperationAction(ISD::CTLZ, Ty, Legal); ++ setOperationAction(ISD::CTPOP, Ty, Legal); ++ setOperationAction(ISD::MUL, Ty, Legal); ++ setOperationAction(ISD::OR, Ty, Legal); ++ setOperationAction(ISD::SDIV, Ty, Legal); ++ setOperationAction(ISD::SREM, Ty, Legal); ++ setOperationAction(ISD::SHL, Ty, Legal); ++ setOperationAction(ISD::SRA, Ty, Legal); ++ setOperationAction(ISD::SRL, Ty, Legal); ++ setOperationAction(ISD::SUB, Ty, Legal); ++ setOperationAction(ISD::SMAX, Ty, Legal); ++ setOperationAction(ISD::SMIN, Ty, Legal); ++ setOperationAction(ISD::UDIV, Ty, Legal); ++ setOperationAction(ISD::UREM, Ty, Legal); ++ setOperationAction(ISD::UMAX, Ty, Legal); ++ setOperationAction(ISD::UMIN, Ty, Legal); ++ setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); ++ setOperationAction(ISD::VSELECT, Ty, Legal); ++ setOperationAction(ISD::XOR, Ty, Legal); ++ setOperationAction(ISD::INSERT_SUBVECTOR, Ty, Legal); ++ setOperationAction(ISD::MULHS, Ty, Legal); ++ setOperationAction(ISD::MULHU, Ty, Legal); ++ ++ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, Ty, Legal); ++ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, Ty, Legal); ++ ++ setOperationAction(ISD::SIGN_EXTEND, Ty, Legal); ++ setOperationAction(ISD::ZERO_EXTEND, Ty, Legal); ++ ++ if (Ty == MVT::v8i32 || Ty == MVT::v4i64) { ++ setOperationAction(ISD::SINT_TO_FP, Ty, Custom); ++ setOperationAction(ISD::UINT_TO_FP, Ty, Custom); ++ } ++ ++ setTargetDAGCombine(ISD::CONCAT_VECTORS); ++ ++ setOperationAction(ISD::SETCC, Ty, Legal); ++ setCondCodeAction(ISD::SETNE, Ty, Expand); ++ setCondCodeAction(ISD::SETGE, Ty, Expand); ++ setCondCodeAction(ISD::SETGT, Ty, Expand); ++ setCondCodeAction(ISD::SETUGE, Ty, Expand); ++ setCondCodeAction(ISD::SETUGT, Ty, Expand); ++} ++ ++// Enable LSX support for the given floating-point type and Register class. ++void LoongArchTargetLowering::addLSXFloatType(MVT::SimpleValueType Ty, ++ const TargetRegisterClass *RC) { ++ addRegisterClass(Ty, RC); ++ ++ // Expand all builtin opcodes. ++ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) ++ setOperationAction(Opc, Ty, Expand); ++ ++ setOperationAction(ISD::LOAD, Ty, Legal); ++ setOperationAction(ISD::STORE, Ty, Legal); ++ setOperationAction(ISD::BITCAST, Ty, Legal); ++ setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); ++ setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); ++ setOperationAction(ISD::UNDEF, Ty, Legal); ++ setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); ++ setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal); ++ ++ if (Ty == MVT::v4f32 || Ty == MVT::v2f64) { ++ setOperationAction(ISD::FP_TO_SINT, Ty, Custom); ++ setOperationAction(ISD::FP_TO_UINT, Ty, Custom); ++ } ++ ++ setOperationAction(ISD::FADD, Ty, Legal); ++ setOperationAction(ISD::FDIV, Ty, Legal); ++ setOperationAction(ISD::FMA, Ty, Legal); ++ setOperationAction(ISD::FMUL, Ty, Legal); ++ setOperationAction(ISD::FSQRT, Ty, Legal); ++ setOperationAction(ISD::FSUB, Ty, Legal); ++ setOperationAction(ISD::VSELECT, Ty, Legal); ++ setOperationAction(ISD::FNEG, Ty, Legal); ++ setOperationAction(ISD::FRINT, Ty, Legal); ++ ++ setOperationAction(ISD::SETCC, Ty, Legal); ++ setCondCodeAction(ISD::SETOGE, Ty, Expand); ++ setCondCodeAction(ISD::SETOGT, Ty, Expand); ++ setCondCodeAction(ISD::SETUGE, Ty, Expand); ++ setCondCodeAction(ISD::SETUGT, Ty, Expand); ++ setCondCodeAction(ISD::SETGE, Ty, Expand); ++ setCondCodeAction(ISD::SETGT, Ty, Expand); ++} ++ ++// Enable LASX support for the given floating-point type and Register class. ++void LoongArchTargetLowering::addLASXFloatType(MVT::SimpleValueType Ty, ++ const TargetRegisterClass *RC) { ++ addRegisterClass(Ty, RC); ++ ++ // Expand all builtin opcodes. ++ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) ++ setOperationAction(Opc, Ty, Expand); ++ ++ setOperationAction(ISD::LOAD, Ty, Legal); ++ setOperationAction(ISD::STORE, Ty, Legal); ++ setOperationAction(ISD::BITCAST, Ty, Legal); ++ setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); ++ setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); ++ setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); ++ setOperationAction(ISD::UNDEF, Ty, Legal); ++ setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal); ++ ++ setOperationAction(ISD::FADD, Ty, Legal); ++ setOperationAction(ISD::FDIV, Ty, Legal); ++ setOperationAction(ISD::FMA, Ty, Legal); ++ setOperationAction(ISD::FMUL, Ty, Legal); ++ setOperationAction(ISD::FSQRT, Ty, Legal); ++ setOperationAction(ISD::FSUB, Ty, Legal); ++ setOperationAction(ISD::VSELECT, Ty, Legal); ++ setOperationAction(ISD::FNEG, Ty, Legal); ++ setOperationAction(ISD::FRINT, Ty, Legal); ++ ++ if (Ty == MVT::v8f32 || Ty == MVT::v4f64) { ++ setOperationAction(ISD::FP_TO_SINT, Ty, Custom); ++ setOperationAction(ISD::FP_TO_UINT, Ty, Custom); ++ } ++ ++ setOperationAction(ISD::SETCC, Ty, Legal); ++ setCondCodeAction(ISD::SETOGE, Ty, Expand); ++ setCondCodeAction(ISD::SETOGT, Ty, Expand); ++ setCondCodeAction(ISD::SETUGE, Ty, Expand); ++ setCondCodeAction(ISD::SETUGT, Ty, Expand); ++ setCondCodeAction(ISD::SETGE, Ty, Expand); ++ setCondCodeAction(ISD::SETGT, Ty, Expand); ++} ++ ++bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( ++ EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, ++ bool *Fast) const { ++ if (!Subtarget.allowUnalignedAccess()) ++ return false; ++ if (Fast) ++ *Fast = true; ++ return true; ++} ++ ++EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, ++ EVT VT) const { ++ if (!VT.isVector()) ++ return MVT::i32; ++ return VT.changeVectorElementTypeToInteger(); ++} ++ ++static LoongArch::CondCode condCodeToFCC(ISD::CondCode CC) { ++ switch (CC) { ++ default: llvm_unreachable("Unknown fp condition code!"); ++ case ISD::SETEQ: ++ case ISD::SETOEQ: return LoongArch::FCOND_OEQ; ++ case ISD::SETUNE: return LoongArch::FCOND_UNE; ++ case ISD::SETLT: ++ case ISD::SETOLT: return LoongArch::FCOND_OLT; ++ case ISD::SETGT: ++ case ISD::SETOGT: return LoongArch::FCOND_OGT; ++ case ISD::SETLE: ++ case ISD::SETOLE: return LoongArch::FCOND_OLE; ++ case ISD::SETGE: ++ case ISD::SETOGE: return LoongArch::FCOND_OGE; ++ case ISD::SETULT: return LoongArch::FCOND_ULT; ++ case ISD::SETULE: return LoongArch::FCOND_ULE; ++ case ISD::SETUGT: return LoongArch::FCOND_UGT; ++ case ISD::SETUGE: return LoongArch::FCOND_UGE; ++ case ISD::SETUO: return LoongArch::FCOND_UN; ++ case ISD::SETO: return LoongArch::FCOND_OR; ++ case ISD::SETNE: ++ case ISD::SETONE: return LoongArch::FCOND_ONE; ++ case ISD::SETUEQ: return LoongArch::FCOND_UEQ; ++ } ++} ++ ++/// This function returns true if the floating point conditional branches and ++/// conditional moves which use condition code CC should be inverted. ++static bool invertFPCondCodeUser(LoongArch::CondCode CC) { ++ if (CC >= LoongArch::FCOND_F && CC <= LoongArch::FCOND_SUNE) ++ return false; ++ ++ assert((CC >= LoongArch::FCOND_T && CC <= LoongArch::FCOND_GT) && ++ "Illegal Condition Code"); ++ ++ return true; ++} ++ ++// Creates and returns an FPCmp node from a setcc node. ++// Returns Op if setcc is not a floating point comparison. ++static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) { ++ // must be a SETCC node ++ if (Op.getOpcode() != ISD::SETCC) ++ return Op; ++ ++ SDValue LHS = Op.getOperand(0); ++ ++ if (!LHS.getValueType().isFloatingPoint()) ++ return Op; ++ ++ SDValue RHS = Op.getOperand(1); ++ SDLoc DL(Op); ++ ++ // Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of ++ // node if necessary. ++ ISD::CondCode CC = cast(Op.getOperand(2))->get(); ++ ++ return DAG.getNode(LoongArchISD::FPCmp, DL, MVT::Glue, LHS, RHS, ++ DAG.getConstant(condCodeToFCC(CC), DL, MVT::i32)); ++} ++ ++// Creates and returns a CMovFPT/F node. ++static SDValue createCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True, ++ SDValue False, const SDLoc &DL) { ++ ConstantSDNode *CC = cast(Cond.getOperand(2)); ++ bool invert = invertFPCondCodeUser((LoongArch::CondCode)CC->getSExtValue()); ++ SDValue FCC0 = DAG.getRegister(LoongArch::FCC0, MVT::i32); ++ ++ return DAG.getNode((invert ? LoongArchISD::CMovFP_F : LoongArchISD::CMovFP_T), DL, ++ True.getValueType(), True, FCC0, False, Cond); ++ ++} ++ ++static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ if (DCI.isBeforeLegalizeOps()) ++ return SDValue(); ++ ++ SDValue SetCC = N->getOperand(0); ++ ++ if ((SetCC.getOpcode() != ISD::SETCC) || ++ !SetCC.getOperand(0).getValueType().isInteger()) ++ return SDValue(); ++ ++ SDValue False = N->getOperand(2); ++ EVT FalseTy = False.getValueType(); ++ ++ if (!FalseTy.isInteger()) ++ return SDValue(); ++ ++ ConstantSDNode *FalseC = dyn_cast(False); ++ ++ // If the RHS (False) is 0, we swap the order of the operands ++ // of ISD::SELECT (obviously also inverting the condition) so that we can ++ // take advantage of conditional moves using the $0 register. ++ // Example: ++ // return (a != 0) ? x : 0; ++ // load $reg, x ++ // movz $reg, $0, a ++ if (!FalseC) ++ return SDValue(); ++ ++ const SDLoc DL(N); ++ ++ if (!FalseC->getZExtValue()) { ++ ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); ++ SDValue True = N->getOperand(1); ++ ++ SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), ++ SetCC.getOperand(1), ++ ISD::getSetCCInverse(CC, SetCC.getValueType())); ++ ++ return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True); ++ } ++ ++ // If both operands are integer constants there's a possibility that we ++ // can do some interesting optimizations. ++ SDValue True = N->getOperand(1); ++ ConstantSDNode *TrueC = dyn_cast(True); ++ ++ if (!TrueC || !True.getValueType().isInteger()) ++ return SDValue(); ++ ++ // We'll also ignore MVT::i64 operands as this optimizations proves ++ // to be ineffective because of the required sign extensions as the result ++ // of a SETCC operator is always MVT::i32 for non-vector types. ++ if (True.getValueType() == MVT::i64) ++ return SDValue(); ++ ++ int64_t Diff = TrueC->getSExtValue() - FalseC->getSExtValue(); ++ ++ // 1) (a < x) ? y : y-1 ++ // slti $reg1, a, x ++ // addiu $reg2, $reg1, y-1 ++ if (Diff == 1) ++ return DAG.getNode(ISD::ADD, DL, SetCC.getValueType(), SetCC, False); ++ ++ // 2) (a < x) ? y-1 : y ++ // slti $reg1, a, x ++ // xor $reg1, $reg1, 1 ++ // addiu $reg2, $reg1, y-1 ++ if (Diff == -1) { ++ ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); ++ SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), ++ SetCC.getOperand(1), ++ ISD::getSetCCInverse(CC, SetCC.getValueType())); ++ return DAG.getNode(ISD::ADD, DL, SetCC.getValueType(), SetCC, True); ++ } ++ ++ // Could not optimize. ++ return SDValue(); ++} ++ ++static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ ++ if (Subtarget.hasLSX()) { ++ ++ // Fold zero extensions into LoongArchISD::VEXTRACT_[SZ]EXT_ELT ++ // ++ // Performs the following transformations: ++ // - Changes LoongArchISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its ++ // sign/zero-extension is completely overwritten by the new one performed ++ // by the ISD::AND. ++ // - Removes redundant zero extensions performed by an ISD::AND. ++ SDValue Op0 = N->getOperand(0); ++ SDValue Op1 = N->getOperand(1); ++ unsigned Op0Opcode = Op0->getOpcode(); ++ ++ // (and (LoongArchVExtract[SZ]Ext $a, $b, $c), imm:$d) ++ // where $d + 1 == 2^n and n == 32 ++ // or $d + 1 == 2^n and n <= 32 and ZExt ++ // -> (LoongArchVExtractZExt $a, $b, $c) ++ if (Op0Opcode == LoongArchISD::VEXTRACT_SEXT_ELT || ++ Op0Opcode == LoongArchISD::VEXTRACT_ZEXT_ELT) { ++ ConstantSDNode *Mask = dyn_cast(Op1); ++ ++ if (Mask) { ++ ++ int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); ++ ++ if (Log2IfPositive > 0) { ++ SDValue Op0Op2 = Op0->getOperand(2); ++ EVT ExtendTy = cast(Op0Op2)->getVT(); ++ unsigned ExtendTySize = ExtendTy.getSizeInBits(); ++ unsigned Log2 = Log2IfPositive; ++ ++ if ((Op0Opcode == LoongArchISD::VEXTRACT_ZEXT_ELT && ++ Log2 >= ExtendTySize) || ++ Log2 == ExtendTySize) { ++ SDValue Ops[] = {Op0->getOperand(0), Op0->getOperand(1), Op0Op2}; ++ return DAG.getNode(LoongArchISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0), ++ Op0->getVTList(), ++ makeArrayRef(Ops, Op0->getNumOperands())); ++ } ++ } ++ } ++ } ++ } ++ ++ if (DCI.isBeforeLegalizeOps()) ++ return SDValue(); ++ ++ SDValue FirstOperand = N->getOperand(0); ++ unsigned FirstOperandOpc = FirstOperand.getOpcode(); ++ SDValue Mask = N->getOperand(1); ++ EVT ValTy = N->getValueType(0); ++ SDLoc DL(N); ++ ++ uint64_t Lsb = 0, SMLsb, SMSize; ++ ConstantSDNode *CN; ++ SDValue NewOperand; ++ unsigned Opc; ++ ++ // Op's second operand must be a shifted mask. ++ if (!(CN = dyn_cast(Mask)) || ++ !isShiftedMask(CN->getZExtValue(), SMLsb, SMSize)) ++ return SDValue(); ++ ++ if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) { ++ // Pattern match BSTRPICK. ++ // $dst = and ((sra or srl) $src , lsb), (2**size - 1) ++ // => bstrpick $dst, $src, lsb+size-1, lsb ++ ++ // The second operand of the shift must be an immediate. ++ if (!(CN = dyn_cast(FirstOperand.getOperand(1)))) ++ return SDValue(); ++ ++ Lsb = CN->getZExtValue(); ++ ++ // Return if the shifted mask does not start at bit 0 or the sum of its size ++ // and Lsb exceeds the word's size. ++ if (SMLsb != 0 || Lsb + SMSize > ValTy.getSizeInBits()) ++ return SDValue(); ++ ++ Opc = LoongArchISD::BSTRPICK; ++ NewOperand = FirstOperand.getOperand(0); ++ } else { ++ // Pattern match BSTRPICK. ++ // $dst = and $src, (2**size - 1) , if size > 12 ++ // => bstrpick $dst, $src, lsb+size-1, lsb , lsb = 0 ++ ++ // If the mask is <= 0xfff, andi can be used instead. ++ if (CN->getZExtValue() <= 0xfff) ++ return SDValue(); ++ // Return if the mask doesn't start at position 0. ++ if (SMLsb) ++ return SDValue(); ++ ++ Opc = LoongArchISD::BSTRPICK; ++ NewOperand = FirstOperand; ++ } ++ return DAG.getNode(Opc, DL, ValTy, NewOperand, ++ DAG.getConstant((Lsb + SMSize - 1), DL, MVT::i32), ++ DAG.getConstant(Lsb, DL, MVT::i32)); ++} ++ ++// Determine if the specified node is a constant vector splat. ++// ++// Returns true and sets Imm if: ++// * N is a ISD::BUILD_VECTOR representing a constant splat ++static bool isVSplat(SDValue N, APInt &Imm) { ++ BuildVectorSDNode *Node = dyn_cast(N.getNode()); ++ ++ if (!Node) ++ return false; ++ ++ APInt SplatValue, SplatUndef; ++ unsigned SplatBitSize; ++ bool HasAnyUndefs; ++ ++ if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, ++ 8)) ++ return false; ++ ++ Imm = SplatValue; ++ ++ return true; ++} ++ ++// Test whether the given node is an all-ones build_vector. ++static bool isVectorAllOnes(SDValue N) { ++ // Look through bitcasts. Endianness doesn't matter because we are looking ++ // for an all-ones value. ++ if (N->getOpcode() == ISD::BITCAST) ++ N = N->getOperand(0); ++ ++ BuildVectorSDNode *BVN = dyn_cast(N); ++ ++ if (!BVN) ++ return false; ++ ++ APInt SplatValue, SplatUndef; ++ unsigned SplatBitSize; ++ bool HasAnyUndefs; ++ ++ // Endianness doesn't matter in this context because we are looking for ++ // an all-ones value. ++ if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) ++ return SplatValue.isAllOnesValue(); ++ ++ return false; ++} ++ ++// Test whether N is the bitwise inverse of OfNode. ++static bool isBitwiseInverse(SDValue N, SDValue OfNode) { ++ if (N->getOpcode() != ISD::XOR) ++ return false; ++ ++ if (isVectorAllOnes(N->getOperand(0))) ++ return N->getOperand(1) == OfNode; ++ ++ if (isVectorAllOnes(N->getOperand(1))) ++ return N->getOperand(0) == OfNode; ++ ++ return false; ++} ++ ++static SDValue performSet(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ ++ SDValue Op0 = N->getOperand(0); ++ SDValue Op1 = N->getOperand(1); ++ SDValue N1, N2; ++ if (Op0->getOpcode() == ISD::BUILD_VECTOR && ++ (Op1->getValueType(0).is128BitVector() || ++ Op1->getValueType(0).is256BitVector())) { ++ N1 = Op0; ++ N2 = Op1; ++ } else if (Op1->getOpcode() == ISD::BUILD_VECTOR && ++ (Op0->getValueType(0).is128BitVector() || ++ Op0->getValueType(0).is256BitVector())) { ++ N1 = Op1; ++ N2 = Op0; ++ } else ++ return SDValue(); ++ ++ APInt Mask1, Mask2; ++ if (!isVSplat(N1, Mask1)) ++ return SDValue(); ++ ++ if (!N1->getValueType(0).isSimple()) ++ return SDValue(); ++ ++ ConstantSDNode *C1; ++ uint64_t Imm; ++ unsigned ImmL; ++ if (!(C1 = dyn_cast(N1.getOperand(0))) || ++ !isPowerOf2_64(C1->getZExtValue())) ++ return SDValue(); ++ ++ Imm = C1->getZExtValue(); ++ ImmL = Log2_64(Imm); ++ MVT VT = N1->getSimpleValueType(0).SimpleTy; ++ ++ SDNode *Res; ++ ++ if (Subtarget.hasLASX() && N->getValueType(0).is256BitVector()) { ++ if (VT == MVT::v32i8 && ImmL < 8) ++ Res = DAG.getMachineNode(LoongArch::XVBITSETI_B, SDLoc(N), VT, N2, ++ DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); ++ else if (VT == MVT::v16i16 && ImmL < 16) ++ Res = DAG.getMachineNode(LoongArch::XVBITSETI_H, SDLoc(N), VT, N2, ++ DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); ++ else if (VT == MVT::v8i32 && ImmL < 32) ++ Res = DAG.getMachineNode(LoongArch::XVBITSETI_W, SDLoc(N), VT, N2, ++ DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); ++ else if (VT == MVT::v4i64 && ImmL < 64) ++ Res = DAG.getMachineNode(LoongArch::XVBITSETI_D, SDLoc(N), VT, N2, ++ DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); ++ else ++ return SDValue(); ++ } else if (N->getValueType(0).is128BitVector()) { ++ if (VT == MVT::v16i8 && ImmL < 8) ++ Res = DAG.getMachineNode(LoongArch::VBITSETI_B, SDLoc(N), VT, N2, ++ DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); ++ else if (VT == MVT::v8i16 && ImmL < 16) ++ Res = DAG.getMachineNode(LoongArch::VBITSETI_H, SDLoc(N), VT, N2, ++ DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); ++ else if (VT == MVT::v4i32 && ImmL < 32) ++ Res = DAG.getMachineNode(LoongArch::VBITSETI_W, SDLoc(N), VT, N2, ++ DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); ++ else if (VT == MVT::v2i64 && ImmL < 64) ++ Res = DAG.getMachineNode(LoongArch::VBITSETI_D, SDLoc(N), VT, N2, ++ DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); ++ else ++ return SDValue(); ++ ++ } else ++ return SDValue(); ++ ++ return SDValue(Res, 0); ++} ++ ++static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ ++ SDValue Res; ++ if (Subtarget.hasLSX() && (N->getValueType(0).is128BitVector() || ++ N->getValueType(0).is256BitVector())) { ++ SDValue Op0 = N->getOperand(0); ++ SDValue Op1 = N->getOperand(1); ++ ++ if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { ++ SDValue Op0Op0 = Op0->getOperand(0); ++ SDValue Op0Op1 = Op0->getOperand(1); ++ SDValue Op1Op0 = Op1->getOperand(0); ++ SDValue Op1Op1 = Op1->getOperand(1); ++ ++ SDValue IfSet, IfClr, Cond; ++ bool IsConstantMask = false; ++ APInt Mask, InvMask; ++ ++ // If Op0Op0 is an appropriate mask, try to find it's inverse in either ++ // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, ++ // while looking. IfClr will be set if we find a valid match. ++ if (isVSplat(Op0Op0, Mask)) { ++ Cond = Op0Op0; ++ IfSet = Op0Op1; ++ ++ if (isVSplat(Op1Op0, InvMask) && ++ Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) ++ IfClr = Op1Op1; ++ else if (isVSplat(Op1Op1, InvMask) && ++ Mask.getBitWidth() == InvMask.getBitWidth() && ++ Mask == ~InvMask) ++ IfClr = Op1Op0; ++ ++ IsConstantMask = true; ++ } ++ ++ // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the ++ // same thing again using this mask. IfClr will be set if we find a valid ++ // match. ++ if (!IfClr.getNode() && isVSplat(Op0Op1, Mask)) { ++ Cond = Op0Op1; ++ IfSet = Op0Op0; ++ ++ if (isVSplat(Op1Op0, InvMask) && ++ Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) ++ IfClr = Op1Op1; ++ else if (isVSplat(Op1Op1, InvMask) && ++ Mask.getBitWidth() == InvMask.getBitWidth() && ++ Mask == ~InvMask) ++ IfClr = Op1Op0; ++ ++ IsConstantMask = true; ++ } ++ ++ // If IfClr is not yet set, try looking for a non-constant match. ++ // IfClr will be set if we find a valid match amongst the eight ++ // possibilities. ++ if (!IfClr.getNode()) { ++ if (isBitwiseInverse(Op0Op0, Op1Op0)) { ++ Cond = Op1Op0; ++ IfSet = Op1Op1; ++ IfClr = Op0Op1; ++ } else if (isBitwiseInverse(Op0Op1, Op1Op0)) { ++ Cond = Op1Op0; ++ IfSet = Op1Op1; ++ IfClr = Op0Op0; ++ } else if (isBitwiseInverse(Op0Op0, Op1Op1)) { ++ Cond = Op1Op1; ++ IfSet = Op1Op0; ++ IfClr = Op0Op1; ++ } else if (isBitwiseInverse(Op0Op1, Op1Op1)) { ++ Cond = Op1Op1; ++ IfSet = Op1Op0; ++ IfClr = Op0Op0; ++ } else if (isBitwiseInverse(Op1Op0, Op0Op0)) { ++ Cond = Op0Op0; ++ IfSet = Op0Op1; ++ IfClr = Op1Op1; ++ } else if (isBitwiseInverse(Op1Op1, Op0Op0)) { ++ Cond = Op0Op0; ++ IfSet = Op0Op1; ++ IfClr = Op1Op0; ++ } else if (isBitwiseInverse(Op1Op0, Op0Op1)) { ++ Cond = Op0Op1; ++ IfSet = Op0Op0; ++ IfClr = Op1Op1; ++ } else if (isBitwiseInverse(Op1Op1, Op0Op1)) { ++ Cond = Op0Op1; ++ IfSet = Op0Op0; ++ IfClr = Op1Op0; ++ } ++ } ++ ++ // At this point, IfClr will be set if we have a valid match. ++ if (IfClr.getNode()) { ++ assert(Cond.getNode() && IfSet.getNode()); ++ ++ // Fold degenerate cases. ++ if (IsConstantMask) { ++ if (Mask.isAllOnesValue()) ++ return IfSet; ++ else if (Mask == 0) ++ return IfClr; ++ } ++ ++ // Transform the DAG into an equivalent VSELECT. ++ return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0), Cond, ++ IfSet, IfClr); ++ } ++ } ++ ++ if (Res = performSet(N, DAG, DCI, Subtarget)) ++ return Res; ++ } ++ ++ // Pattern match BSTRINS. ++ // $dst = or (and $src1 , mask0), (and (shl $src, lsb), mask1), ++ // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1 ++ // => bstrins $dst, $src, lsb+size-1, lsb, $src1 ++ if (DCI.isBeforeLegalizeOps()) ++ return SDValue(); ++ ++ SDValue And0 = N->getOperand(0), And1 = N->getOperand(1); ++ uint64_t SMLsb0, SMSize0, SMLsb1, SMSize1; ++ ConstantSDNode *CN, *CN1; ++ ++ // See if Op's first operand matches (and $src1 , mask0). ++ if (And0.getOpcode() != ISD::AND) ++ return SDValue(); ++ ++ if (!(CN = dyn_cast(And0.getOperand(1))) || ++ !isShiftedMask(~CN->getSExtValue(), SMLsb0, SMSize0)) ++ return SDValue(); ++ ++ // See if Op's second operand matches (and (shl $src, lsb), mask1). ++ if (And1.getOpcode() == ISD::AND && ++ And1.getOperand(0).getOpcode() == ISD::SHL) { ++ ++ if (!(CN = dyn_cast(And1.getOperand(1))) || ++ !isShiftedMask(CN->getZExtValue(), SMLsb1, SMSize1)) ++ return SDValue(); ++ ++ // The shift masks must have the same least significant bit and size. ++ if (SMLsb0 != SMLsb1 || SMSize0 != SMSize1) ++ return SDValue(); ++ ++ SDValue Shl = And1.getOperand(0); ++ ++ if (!(CN = dyn_cast(Shl.getOperand(1)))) ++ return SDValue(); ++ ++ unsigned Shamt = CN->getZExtValue(); ++ ++ // Return if the shift amount and the first bit position of mask are not the ++ // same. ++ EVT ValTy = N->getValueType(0); ++ if ((Shamt != SMLsb0) || (SMLsb0 + SMSize0 > ValTy.getSizeInBits())) ++ return SDValue(); ++ ++ SDLoc DL(N); ++ return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, Shl.getOperand(0), ++ DAG.getConstant((SMLsb0 + SMSize0 - 1), DL, MVT::i32), ++ DAG.getConstant(SMLsb0, DL, MVT::i32), ++ And0.getOperand(0)); ++ } else { ++ // Pattern match BSTRINS. ++ // $dst = or (and $src, mask0), mask1 ++ // where mask0 = ((1 << SMSize0) -1) << SMLsb0 ++ // => bstrins $dst, $src, SMLsb0+SMSize0-1, SMLsb0 ++ if (~CN->getSExtValue() == ((((int64_t)1 << SMSize0) - 1) << SMLsb0) && ++ (SMSize0 + SMLsb0 <= 64)) { ++ // Check if AND instruction has constant as argument ++ bool isConstCase = And1.getOpcode() != ISD::AND; ++ if (And1.getOpcode() == ISD::AND) { ++ if (!(CN1 = dyn_cast(And1->getOperand(1)))) ++ return SDValue(); ++ } else { ++ if (!(CN1 = dyn_cast(N->getOperand(1)))) ++ return SDValue(); ++ } ++ // Don't generate BSTRINS if constant OR operand doesn't fit into bits ++ // cleared by constant AND operand. ++ if (CN->getSExtValue() & CN1->getSExtValue()) ++ return SDValue(); ++ ++ SDLoc DL(N); ++ EVT ValTy = N->getOperand(0)->getValueType(0); ++ SDValue Const1; ++ SDValue SrlX; ++ if (!isConstCase) { ++ Const1 = DAG.getConstant(SMLsb0, DL, MVT::i32); ++ SrlX = DAG.getNode(ISD::SRL, DL, And1->getValueType(0), And1, Const1); ++ } ++ return DAG.getNode( ++ LoongArchISD::BSTRINS, DL, N->getValueType(0), ++ isConstCase ++ ? DAG.getConstant(CN1->getSExtValue() >> SMLsb0, DL, ValTy) ++ : SrlX, ++ DAG.getConstant(ValTy.getSizeInBits() / 8 < 8 ? (SMLsb0 + (SMSize0 & 31) - 1) ++ : (SMLsb0 + SMSize0 - 1), ++ DL, MVT::i32), ++ DAG.getConstant(SMLsb0, DL, MVT::i32), ++ And0->getOperand(0)); ++ ++ } ++ return SDValue(); ++ } ++} ++ ++static bool ++shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, ++ SelectionDAG &DAG, ++ const LoongArchSubtarget &Subtarget) { ++ // Estimate the number of operations the below transform will turn a ++ // constant multiply into. The number is approximately equal to the minimal ++ // number of powers of two that constant can be broken down to by adding ++ // or subtracting them. ++ // ++ // If we have taken more than 10[1] / 8[2] steps to attempt the ++ // optimization for a native sized value, it is more than likely that this ++ // optimization will make things worse. ++ // ++ // [1] LA64 requires 4 instructions at most to materialize any constant, ++ // multiplication requires at least 4 cycles, but another cycle (or two) ++ // to retrieve the result from corresponding registers. ++ // ++ // [2] LA32 requires 2 instructions at most to materialize any constant, ++ // multiplication requires at least 4 cycles, but another cycle (or two) ++ // to retrieve the result from corresponding registers. ++ // ++ // TODO: ++ // - MaxSteps needs to consider the `VT` of the constant for the current ++ // target. ++ // - Consider to perform this optimization after type legalization. ++ // That allows to remove a workaround for types not supported natively. ++ // - Take in account `-Os, -Oz` flags because this optimization ++ // increases code size. ++ unsigned MaxSteps = Subtarget.is64Bit() ? 10 : 8; ++ ++ SmallVector WorkStack(1, C); ++ unsigned Steps = 0; ++ unsigned BitWidth = C.getBitWidth(); ++ ++ while (!WorkStack.empty()) { ++ APInt Val = WorkStack.pop_back_val(); ++ ++ if (Val == 0 || Val == 1) ++ continue; ++ ++ if (Steps >= MaxSteps) ++ return false; ++ ++ if (Val.isPowerOf2()) { ++ ++Steps; ++ continue; ++ } ++ ++ APInt Floor = APInt(BitWidth, 1) << Val.logBase2(); ++ APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0) ++ : APInt(BitWidth, 1) << C.ceilLogBase2(); ++ ++ if ((Val - Floor).ule(Ceil - Val)) { ++ WorkStack.push_back(Floor); ++ WorkStack.push_back(Val - Floor); ++ } else { ++ WorkStack.push_back(Ceil); ++ WorkStack.push_back(Ceil - Val); ++ } ++ ++ ++Steps; ++ } ++ ++ // If the value being multiplied is not supported natively, we have to pay ++ // an additional legalization cost, conservatively assume an increase in the ++ // cost of 3 instructions per step. This values for this heuristic were ++ // determined experimentally. ++ unsigned RegisterSize = DAG.getTargetLoweringInfo() ++ .getRegisterType(*DAG.getContext(), VT) ++ .getSizeInBits(); ++ Steps *= (VT.getSizeInBits() != RegisterSize) * 3; ++ if (Steps > 27) ++ return false; ++ ++ return true; ++} ++ ++static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, ++ EVT ShiftTy, SelectionDAG &DAG) { ++ // Return 0. ++ if (C == 0) ++ return DAG.getConstant(0, DL, VT); ++ ++ // Return x. ++ if (C == 1) ++ return X; ++ ++ // If c is power of 2, return (shl x, log2(c)). ++ if (C.isPowerOf2()) ++ return DAG.getNode(ISD::SHL, DL, VT, X, ++ DAG.getConstant(C.logBase2(), DL, ShiftTy)); ++ ++ unsigned BitWidth = C.getBitWidth(); ++ APInt Floor = APInt(BitWidth, 1) << C.logBase2(); ++ APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) : ++ APInt(BitWidth, 1) << C.ceilLogBase2(); ++ ++ // If |c - floor_c| <= |c - ceil_c|, ++ // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), ++ // return (add constMult(x, floor_c), constMult(x, c - floor_c)). ++ if ((C - Floor).ule(Ceil - C)) { ++ SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); ++ SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); ++ return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); ++ } ++ ++ // If |c - floor_c| > |c - ceil_c|, ++ // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). ++ SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); ++ SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); ++ return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); ++} ++ ++static SDValue performLogicCombine(SDNode *N, SelectionDAG &DAG, ++ const LoongArchSubtarget &Subtarget) { ++ ++ SDLoc DL(N); ++ SDValue N0 = N->getOperand(0); ++ SDValue N1 = N->getOperand(1); ++ ++ if (!(N0->getOpcode() == ISD::TRUNCATE && N1->getOpcode() == ISD::TRUNCATE)) ++ return SDValue(); ++ ++ if (!(N->getValueType(0).isSimple() && N0->getValueType(0).isSimple() && ++ N1->getValueType(0).isSimple() && ++ N0->getOperand(0)->getValueType(0).isSimple() && ++ N1->getOperand(0)->getValueType(0).isSimple())) ++ return SDValue(); ++ ++ if (!(N->getValueType(0).isSimple() && N0->getValueType(0).isSimple() && ++ N1->getValueType(0).isSimple() && ++ N0->getOperand(0)->getValueType(0).isSimple() && ++ N1->getOperand(0)->getValueType(0).isSimple())) ++ return SDValue(); ++ ++ if (!(N->getSimpleValueType(0).SimpleTy == MVT::i32 && ++ N0->getSimpleValueType(0).SimpleTy == MVT::i32 && ++ N1->getSimpleValueType(0).SimpleTy == MVT::i32)) ++ return SDValue(); ++ ++ if (!(N0->getOperand(0)->getSimpleValueType(0).SimpleTy == MVT::i64 && ++ N1->getOperand(0)->getSimpleValueType(0).SimpleTy == MVT::i64)) ++ return SDValue(); ++ ++ SDValue SubReg = DAG.getTargetConstant(LoongArch::sub_32, DL, MVT::i32); ++ SDValue Val0 = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, ++ N0->getValueType(0), ++ N0->getOperand(0), SubReg), ++ 0); ++ SDValue Val1 = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, ++ N1->getValueType(0), ++ N1->getOperand(0), SubReg), ++ 0); ++ ++ return DAG.getNode(N->getOpcode(), DL, N0->getValueType(0), Val0, Val1); ++} ++ ++static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, ++ const TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchTargetLowering *TL, ++ const LoongArchSubtarget &Subtarget) { ++ EVT VT = N->getValueType(0); ++ ++ SDValue Res; ++ if ((Res = performLogicCombine(N, DAG, Subtarget))) ++ return Res; ++ ++ if (ConstantSDNode *C = dyn_cast(N->getOperand(1))) ++ if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs( ++ C->getAPIntValue(), VT, DAG, Subtarget)) ++ return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT, ++ TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), ++ DAG); ++ ++ return SDValue(N, 0); ++} ++ ++// Fold sign-extensions into LoongArchISD::VEXTRACT_[SZ]EXT_ELT for LSX. ++// ++// Performs the following transformations: ++// - Changes LoongArchISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its ++// sign/zero-extension is completely overwritten by the new one performed by ++// the ISD::SRA and ISD::SHL nodes. ++// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL ++// sequence. ++static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ ++ SDValue Res; ++ if ((Res = performLogicCombine(N, DAG, Subtarget))) ++ return Res; ++ ++ if (Subtarget.hasLSX() || Subtarget.hasLASX()) { ++ SDValue Op0 = N->getOperand(0); ++ SDValue Op1 = N->getOperand(1); ++ ++ // (sra (shl (LoongArchVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) ++ // where $d + sizeof($c) == 32 ++ // or $d + sizeof($c) <= 32 and SExt ++ // -> (LoongArchVExtractSExt $a, $b, $c) ++ if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { ++ SDValue Op0Op0 = Op0->getOperand(0); ++ ConstantSDNode *ShAmount = dyn_cast(Op1); ++ ++ if (!ShAmount) ++ return SDValue(); ++ ++ if (Op0Op0->getOpcode() != LoongArchISD::VEXTRACT_SEXT_ELT && ++ Op0Op0->getOpcode() != LoongArchISD::VEXTRACT_ZEXT_ELT) ++ return SDValue(); ++ ++ EVT ExtendTy = cast(Op0Op0->getOperand(2))->getVT(); ++ unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); ++ ++ if (TotalBits == 32 || ++ (Op0Op0->getOpcode() == LoongArchISD::VEXTRACT_SEXT_ELT && ++ TotalBits <= 32)) { ++ SDValue Ops[] = {Op0Op0->getOperand(0), Op0Op0->getOperand(1), ++ Op0Op0->getOperand(2)}; ++ return DAG.getNode(LoongArchISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0), ++ Op0Op0->getVTList(), ++ makeArrayRef(Ops, Op0Op0->getNumOperands())); ++ } ++ } ++ } ++ ++ return SDValue(); ++} ++ ++// combine vsub/vslt/vbitsel.v to vabsd ++static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { ++ assert((N->getOpcode() == ISD::VSELECT) && "Need ISD::VSELECT"); ++ ++ SDLoc dl(N); ++ SDValue Cond = N->getOperand(0); ++ SDValue TrueOpnd = N->getOperand(1); ++ SDValue FalseOpnd = N->getOperand(2); ++ ++ if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB || ++ FalseOpnd.getOpcode() != ISD::SUB) ++ return SDValue(); ++ ++ if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse())) ++ return SDValue(); ++ ++ ISD::CondCode CC = cast(Cond.getOperand(2))->get(); ++ ++ switch (CC) { ++ default: ++ return SDValue(); ++ case ISD::SETUGT: ++ case ISD::SETUGE: ++ case ISD::SETGT: ++ case ISD::SETGE: ++ break; ++ case ISD::SETULT: ++ case ISD::SETULE: ++ case ISD::SETLT: ++ case ISD::SETLE: ++ std::swap(TrueOpnd, FalseOpnd); ++ break; ++ } ++ ++ SDValue Op1 = Cond.getOperand(0); ++ SDValue Op2 = Cond.getOperand(1); ++ ++ if (TrueOpnd.getOperand(0) == Op1 && TrueOpnd.getOperand(1) == Op2 && ++ FalseOpnd.getOperand(0) == Op2 && FalseOpnd.getOperand(1) == Op1) { ++ if (ISD::isSignedIntSetCC(CC)) { ++ return DAG.getNode(LoongArchISD::VABSD, dl, ++ N->getOperand(1).getValueType(), Op1, Op2, ++ DAG.getTargetConstant(0, dl, MVT::i32)); ++ } else { ++ return DAG.getNode(LoongArchISD::UVABSD, dl, ++ N->getOperand(1).getValueType(), Op1, Op2, ++ DAG.getTargetConstant(0, dl, MVT::i32)); ++ } ++ } ++ return SDValue(); ++} ++ ++static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, ++ const LoongArchSubtarget &Subtarget) { ++ ++ EVT Ty = N->getValueType(0); ++ ++ if ((Subtarget.hasLSX() && Ty.is128BitVector() && Ty.isInteger()) || ++ (Subtarget.hasLASX() && Ty.is256BitVector() && Ty.isInteger())) { ++ // Try the following combines: ++ // (xor (or $a, $b), (build_vector allones)) ++ // (xor (or $a, $b), (bitcast (build_vector allones))) ++ SDValue Op0 = N->getOperand(0); ++ SDValue Op1 = N->getOperand(1); ++ SDValue NotOp; ++ ++ if (ISD::isBuildVectorAllOnes(Op0.getNode())) ++ NotOp = Op1; ++ else if (ISD::isBuildVectorAllOnes(Op1.getNode())) ++ NotOp = Op0; ++ else ++ return SDValue(); ++ ++ if (NotOp->getOpcode() == ISD::OR) ++ return DAG.getNode(LoongArchISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), ++ NotOp->getOperand(1)); ++ } ++ ++ return SDValue(); ++} ++ ++// When using a 256-bit vector is less expensive than using a 128-bit vector, ++// use this function to convert a 128-bit vector to a 256-bit vector. ++static SDValue ++performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ ++ assert((N->getOpcode() == ISD::CONCAT_VECTORS) && "Need ISD::CONCAT_VECTORS"); ++ if (DCI.isAfterLegalizeDAG()) ++ return SDValue(); ++ ++ SDLoc DL(N); ++ SDValue Top0 = N->getOperand(0); ++ SDValue Top1 = N->getOperand(1); ++ ++ // Check for cheaper optimizations. ++ if (!((Top0->getOpcode() == ISD::SIGN_EXTEND) && ++ (Top1->getOpcode() == ISD::SIGN_EXTEND))) ++ return SDValue(); ++ if (!((Top0->getOperand(0)->getOpcode() == ISD::ADD) && ++ (Top1->getOperand(0)->getOpcode() == ISD::ADD))) ++ return SDValue(); ++ ++ SDValue Op_a0 = Top0->getOperand(0); ++ SDValue Op_a1 = Top1->getOperand(0); ++ for (int i = 0; i < 2; i++) { ++ if (!((Op_a0->getOperand(i)->getOpcode() == ISD::BUILD_VECTOR) && ++ (Op_a1->getOperand(i)->getOpcode() == ISD::BUILD_VECTOR))) ++ return SDValue(); ++ } ++ ++ SDValue Ops_b[] = {Op_a0->getOperand(0), Op_a0->getOperand(1), ++ Op_a1->getOperand(0), Op_a1->getOperand(1)}; ++ for (int i = 0; i < 4; i++) { ++ if (Ops_b[i]->getNumOperands() != 2) ++ return SDValue(); ++ } ++ ++ // Currently only a single case is handled, and more optimization scenarios ++ // will be added in the future. ++ SDValue Ops_e[] = {Ops_b[0]->getOperand(0), Ops_b[0]->getOperand(1), ++ Ops_b[2]->getOperand(0), Ops_b[2]->getOperand(1), ++ Ops_b[1]->getOperand(0), Ops_b[1]->getOperand(1), ++ Ops_b[3]->getOperand(0), Ops_b[3]->getOperand(1)}; ++ for (int i = 0; i < 8; i++) { ++ if (dyn_cast(Ops_e[i])) ++ return SDValue(); ++ if (i < 4) { ++ if (cast(Ops_e[i]->getOperand(1))->getSExtValue() != ++ (2 * i)) ++ return SDValue(); ++ } else { ++ if (cast(Ops_e[i]->getOperand(1))->getSExtValue() != ++ (2 * i - 7)) ++ return SDValue(); ++ } ++ } ++ ++ for (int i = 0; i < 5; i = i + 4) { ++ if (!((Ops_e[i]->getOperand(0) == Ops_e[i + 1]->getOperand(0)) && ++ (Ops_e[i + 1]->getOperand(0) == Ops_e[i + 2]->getOperand(0)) && ++ (Ops_e[i + 2]->getOperand(0) == Ops_e[i + 3]->getOperand(0)))) ++ return SDValue(); ++ } ++ return SDValue(DAG.getMachineNode(LoongArch::XVHADDW_D_W, DL, MVT::v4i64, ++ Ops_e[6]->getOperand(0), ++ Ops_e[0]->getOperand(0)), ++ 0); ++} ++ ++static SDValue performParity(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ ++ SDLoc DL(N); ++ SDValue T = N->getOperand(0); ++ if (!(N->getValueType(0).isSimple() && T->getValueType(0).isSimple())) ++ return SDValue(); ++ ++ if (DCI.isAfterLegalizeDAG()) ++ return SDValue(); ++ ++ SDValue Ops[4]; ++ bool pos_e = false; ++ bool pos_o = false; ++ ++ for (int i = 0; i < 4; i++) { ++ Ops[i] = T->getOperand(i); ++ if (!Ops[i]->getValueType(0).isSimple()) ++ return SDValue(); ++ if (Ops[i]->getOpcode() != ISD::EXTRACT_VECTOR_ELT) ++ return SDValue(); ++ ++ if (!dyn_cast(Ops[i]->getOperand(1))) ++ return SDValue(); ++ ++ if (cast(Ops[i]->getOperand(1))->getSExtValue() == ++ (2 * i)) { ++ pos_e = true; ++ } else if (cast(Ops[i]->getOperand(1))->getSExtValue() == ++ (2 * i + 1)) { ++ pos_o = true; ++ } else ++ return SDValue(); ++ } ++ ++ if (!(N->getSimpleValueType(0).SimpleTy == MVT::v4i64 && ++ T->getSimpleValueType(0).SimpleTy == MVT::v4i32)) ++ return SDValue(); ++ ++ for (int j = 0; j < 3; j++) { ++ if (Ops[j]->getOperand(0) != Ops[j + 1]->getOperand(0)) ++ return SDValue(); ++ } ++ if (pos_e) { ++ if (N->getOpcode() == ISD::SIGN_EXTEND) { ++ if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD) ++ return SDValue(DAG.getMachineNode(LoongArch::XVADDWEV_D_W, DL, ++ MVT::v4i64, ++ Ops[0]->getOperand(0)->getOperand(1), ++ Ops[0]->getOperand(0)->getOperand(0)), ++ 0); ++ else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB) ++ return SDValue(DAG.getMachineNode(LoongArch::XVSUBWEV_D_W, DL, ++ MVT::v4i64, ++ Ops[0]->getOperand(0)->getOperand(0), ++ Ops[0]->getOperand(0)->getOperand(1)), ++ 0); ++ } else if (N->getOpcode() == ISD::ZERO_EXTEND) { ++ if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD) ++ return SDValue(DAG.getMachineNode(LoongArch::XVADDWEV_D_WU, DL, ++ MVT::v4i64, ++ Ops[0]->getOperand(0)->getOperand(1), ++ Ops[0]->getOperand(0)->getOperand(0)), ++ 0); ++ else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB) ++ return SDValue(DAG.getMachineNode(LoongArch::XVSUBWEV_D_WU, DL, ++ MVT::v4i64, ++ Ops[0]->getOperand(0)->getOperand(0), ++ Ops[0]->getOperand(0)->getOperand(1)), ++ 0); ++ } ++ } else if (pos_o) { ++ if (N->getOpcode() == ISD::SIGN_EXTEND) { ++ if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD) ++ return SDValue(DAG.getMachineNode(LoongArch::XVADDWOD_D_W, DL, ++ MVT::v4i64, ++ Ops[0]->getOperand(0)->getOperand(1), ++ Ops[0]->getOperand(0)->getOperand(0)), ++ 0); ++ else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB) ++ return SDValue(DAG.getMachineNode(LoongArch::XVSUBWOD_D_W, DL, ++ MVT::v4i64, ++ Ops[0]->getOperand(0)->getOperand(0), ++ Ops[0]->getOperand(0)->getOperand(1)), ++ 0); ++ } else if (N->getOpcode() == ISD::ZERO_EXTEND) { ++ if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD) ++ return SDValue(DAG.getMachineNode(LoongArch::XVADDWOD_D_WU, DL, ++ MVT::v4i64, ++ Ops[0]->getOperand(0)->getOperand(1), ++ Ops[0]->getOperand(0)->getOperand(0)), ++ 0); ++ else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB) ++ return SDValue(DAG.getMachineNode(LoongArch::XVSUBWOD_D_WU, DL, ++ MVT::v4i64, ++ Ops[0]->getOperand(0)->getOperand(0), ++ Ops[0]->getOperand(0)->getOperand(1)), ++ 0); ++ } ++ } else ++ return SDValue(); ++ ++ return SDValue(); ++} ++ ++// Optimize zero extension and sign extension of data ++static SDValue performExtend(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ ++ if (!Subtarget.hasLASX()) ++ return SDValue(); ++ ++ SDLoc DL(N); ++ SDValue T = N->getOperand(0); ++ ++ if (T->getOpcode() == ISD::BUILD_VECTOR) ++ return performParity(N, DAG, DCI, Subtarget); ++ ++ if (T->getOpcode() != ISD::ADD && T->getOpcode() != ISD::SUB) ++ return SDValue(); ++ ++ SDValue T0 = T->getOperand(0); ++ SDValue T1 = T->getOperand(1); ++ ++ if (!(T0->getOpcode() == ISD::BUILD_VECTOR && ++ T1->getOpcode() == ISD::BUILD_VECTOR)) ++ return SDValue(); ++ ++ if (DCI.isAfterLegalizeDAG()) ++ return SDValue(); ++ ++ if (!(T->getValueType(0).isSimple() && T0->getValueType(0).isSimple() && ++ T1->getValueType(0).isSimple() && N->getValueType(0).isSimple())) ++ return SDValue(); ++ ++ if (!(N->getSimpleValueType(0).SimpleTy == MVT::v4i64 && ++ T->getSimpleValueType(0).SimpleTy == MVT::v4i32 && ++ T0->getSimpleValueType(0).SimpleTy == MVT::v4i32 && ++ T1->getSimpleValueType(0).SimpleTy == MVT::v4i32)) ++ return SDValue(); ++ ++ SDValue Opse0[4]; ++ SDValue Opse1[4]; ++ ++ for (int i = 0; i < 4; i++) { ++ if (T->getOpcode() == ISD::ADD) { ++ Opse0[i] = T1->getOperand(i); ++ Opse1[i] = T0->getOperand(i); ++ } else if (T->getOpcode() == ISD::SUB) { ++ Opse0[i] = T0->getOperand(i); ++ Opse1[i] = T1->getOperand(i); ++ } ++ ++ if (Opse0[i]->getOpcode() != ISD::EXTRACT_VECTOR_ELT || ++ Opse1[i]->getOpcode() != ISD::EXTRACT_VECTOR_ELT) ++ return SDValue(); ++ ++ if (!(dyn_cast(Opse0[i]->getOperand(1)) && ++ dyn_cast(Opse1[i]->getOperand(1)))) ++ return SDValue(); ++ ++ if (cast(Opse0[i]->getOperand(1))->getSExtValue() != ++ (2 * i + 1) || ++ cast(Opse1[i]->getOperand(1))->getSExtValue() != ++ (2 * i)) ++ return SDValue(); ++ ++ if (i > 0 && (Opse0[i]->getOperand(0) != Opse0[i - 1]->getOperand(0) || ++ Opse1[i]->getOperand(0) != Opse1[i - 1]->getOperand(0))) ++ return SDValue(); ++ } ++ ++ if (N->getOpcode() == ISD::SIGN_EXTEND) { ++ if (T->getOpcode() == ISD::ADD) ++ return SDValue(DAG.getMachineNode(LoongArch::XVHADDW_D_W, DL, MVT::v4i64, ++ Opse0[0]->getOperand(0), ++ Opse1[0]->getOperand(0)), ++ 0); ++ else if (T->getOpcode() == ISD::SUB) ++ return SDValue(DAG.getMachineNode(LoongArch::XVHSUBW_D_W, DL, MVT::v4i64, ++ Opse0[0]->getOperand(0), ++ Opse1[0]->getOperand(0)), ++ 0); ++ } else if (N->getOpcode() == ISD::ZERO_EXTEND) { ++ if (T->getOpcode() == ISD::ADD) ++ return SDValue(DAG.getMachineNode(LoongArch::XVHADDW_DU_WU, DL, ++ MVT::v4i64, Opse0[0]->getOperand(0), ++ Opse1[0]->getOperand(0)), ++ 0); ++ else if (T->getOpcode() == ISD::SUB) ++ return SDValue(DAG.getMachineNode(LoongArch::XVHSUBW_DU_WU, DL, ++ MVT::v4i64, Opse0[0]->getOperand(0), ++ Opse1[0]->getOperand(0)), ++ 0); ++ } ++ ++ return SDValue(); ++} ++ ++static SDValue performSIGN_EXTENDCombine(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ ++ assert((N->getOpcode() == ISD::SIGN_EXTEND) && "Need ISD::SIGN_EXTEND"); ++ ++ SDLoc DL(N); ++ SDValue Top = N->getOperand(0); ++ ++ SDValue Res; ++ if (Res = performExtend(N, DAG, DCI, Subtarget)) ++ return Res; ++ ++ if (!(Top->getOpcode() == ISD::CopyFromReg)) ++ return SDValue(); ++ ++ if ((Top->getOperand(0)->getOpcode() == ISD::EntryToken) && ++ (N->getValueType(0) == MVT::i64)) { ++ ++ SDValue SubReg = DAG.getTargetConstant(LoongArch::sub_32, DL, MVT::i32); ++ SDNode *Res = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64); ++ ++ Res = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i64, ++ SDValue(Res, 0), Top, SubReg); ++ ++ return SDValue(Res, 0); ++ } ++ ++ return SDValue(); ++} ++ ++static SDValue performZERO_EXTENDCombine(SDNode *N, SelectionDAG &DAG, ++ TargetLowering::DAGCombinerInfo &DCI, ++ const LoongArchSubtarget &Subtarget) { ++ ++ assert((N->getOpcode() == ISD::ZERO_EXTEND) && "Need ISD::ZERO_EXTEND"); ++ ++ SDLoc DL(N); ++ ++ SDValue Res; ++ if (Res = performExtend(N, DAG, DCI, Subtarget)) ++ return Res; ++ ++ return SDValue(); ++} ++ ++SDValue LoongArchTargetLowering:: ++PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { ++ SelectionDAG &DAG = DCI.DAG; ++ SDValue Val; ++ ++ switch (N->getOpcode()) { ++ default: break; ++ case ISD::AND: ++ return performANDCombine(N, DAG, DCI, Subtarget); ++ case ISD::OR: ++ return performORCombine(N, DAG, DCI, Subtarget); ++ case ISD::XOR: ++ return performXORCombine(N, DAG, Subtarget); ++ case ISD::MUL: ++ return performMULCombine(N, DAG, DCI, this, Subtarget); ++ case ISD::SRA: ++ return performSRACombine(N, DAG, DCI, Subtarget); ++ case ISD::SELECT: ++ return performSELECTCombine(N, DAG, DCI, Subtarget); ++ case ISD::VSELECT: ++ return performVSELECTCombine(N, DAG); ++ case ISD::CONCAT_VECTORS: ++ return performCONCAT_VECTORSCombine(N, DAG, DCI, Subtarget); ++ case ISD::SIGN_EXTEND: ++ return performSIGN_EXTENDCombine(N, DAG, DCI, Subtarget); ++ case ISD::ZERO_EXTEND: ++ return performZERO_EXTENDCombine(N, DAG, DCI, Subtarget); ++ case ISD::ADD: ++ case ISD::SUB: ++ case ISD::SHL: ++ case ISD::SRL: ++ return performLogicCombine(N, DAG, Subtarget); ++ } ++ return SDValue(); ++} ++ ++static SDValue lowerLSXSplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { ++ EVT ResVecTy = Op->getValueType(0); ++ EVT ViaVecTy = ResVecTy; ++ SDLoc DL(Op); ++ ++ // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and ++ // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating ++ // lanes. ++ SDValue LaneA = Op->getOperand(OpNr); ++ SDValue LaneB; ++ ++ if (ResVecTy == MVT::v2i64) { ++ // In case of the index being passed as an immediate value, set the upper ++ // lane to 0 so that the splati.d instruction can be matched. ++ if (isa(LaneA)) ++ LaneB = DAG.getConstant(0, DL, MVT::i32); ++ // Having the index passed in a register, set the upper lane to the same ++ // value as the lower - this results in the BUILD_VECTOR node not being ++ // expanded through stack. This way we are able to pattern match the set of ++ // nodes created here to splat.d. ++ else ++ LaneB = LaneA; ++ ViaVecTy = MVT::v4i32; ++ } else ++ LaneB = LaneA; ++ ++ SDValue Ops[16] = {LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, ++ LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB}; ++ ++ SDValue Result = DAG.getBuildVector( ++ ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); ++ ++ if (ViaVecTy != ResVecTy) { ++ SDValue One = DAG.getConstant(1, DL, ViaVecTy); ++ Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, ++ DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One)); ++ } ++ ++ return Result; ++} ++ ++static SDValue lowerLSXSplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, ++ bool IsSigned = false) { ++ return DAG.getConstant( ++ APInt(Op->getValueType(0).getScalarType().getSizeInBits(), ++ Op->getConstantOperandVal(ImmOp), IsSigned), ++ SDLoc(Op), Op->getValueType(0)); ++} ++ ++static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, ++ SelectionDAG &DAG) { ++ EVT ViaVecTy = VecTy; ++ SDValue SplatValueA = SplatValue; ++ SDValue SplatValueB = SplatValue; ++ SDLoc DL(SplatValue); ++ ++ if (VecTy == MVT::v2i64) { ++ // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's. ++ ViaVecTy = MVT::v4i32; ++ ++ SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue); ++ SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue, ++ DAG.getConstant(32, DL, MVT::i32)); ++ SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB); ++ } ++ ++ SDValue Ops[32] = {SplatValueA, SplatValueB, SplatValueA, SplatValueB, ++ SplatValueA, SplatValueB, SplatValueA, SplatValueB, ++ SplatValueA, SplatValueB, SplatValueA, SplatValueB, ++ SplatValueA, SplatValueB, SplatValueA, SplatValueB, ++ SplatValueA, SplatValueB, SplatValueA, SplatValueB, ++ SplatValueA, SplatValueB, SplatValueA, SplatValueB, ++ SplatValueA, SplatValueB, SplatValueA, SplatValueB, ++ SplatValueA, SplatValueB, SplatValueA, SplatValueB}; ++ ++ SDValue Result = DAG.getBuildVector( ++ ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); ++ ++ if (VecTy != ViaVecTy) ++ Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result); ++ ++ return Result; ++} ++ ++static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { ++ SDLoc DL(Op); ++ EVT ResTy = Op->getValueType(0); ++ SDValue Vec = Op->getOperand(2); ++ MVT ResEltTy = ++ (ResTy == MVT::v2i64 || ResTy == MVT::v4i64) ? MVT::i64 : MVT::i32; ++ SDValue ConstValue = ++ DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResEltTy); ++ SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, DAG); ++ ++ return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec); ++} ++ ++static SDValue lowerLSXBitClear(SDValue Op, SelectionDAG &DAG) { ++ EVT ResTy = Op->getValueType(0); ++ SDLoc DL(Op); ++ SDValue One = DAG.getConstant(1, DL, ResTy); ++ SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG)); ++ ++ return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), ++ DAG.getNOT(DL, Bit, ResTy)); ++} ++ ++static SDValue lowerLSXLoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, ++ const LoongArchSubtarget &Subtarget) { ++ SDLoc DL(Op); ++ SDValue ChainIn = Op->getOperand(0); ++ SDValue Address = Op->getOperand(2); ++ SDValue Offset = Op->getOperand(3); ++ EVT ResTy = Op->getValueType(0); ++ EVT PtrTy = Address->getValueType(0); ++ ++ // For LP64 addresses have the underlying type MVT::i64. This intrinsic ++ // however takes an i32 signed constant offset. The actual type of the ++ // intrinsic is a scaled signed i12. ++ if (Subtarget.isABI_LP64D()) ++ Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); ++ ++ Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); ++ return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), ++ /* Alignment = */ 16); ++} ++ ++static SDValue lowerLASXLoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, ++ const LoongArchSubtarget &Subtarget) { ++ SDLoc DL(Op); ++ SDValue ChainIn = Op->getOperand(0); ++ SDValue Address = Op->getOperand(2); ++ SDValue Offset = Op->getOperand(3); ++ EVT ResTy = Op->getValueType(0); ++ EVT PtrTy = Address->getValueType(0); ++ ++ // For LP64 addresses have the underlying type MVT::i64. This intrinsic ++ // however takes an i32 signed constant offset. The actual type of the ++ // intrinsic is a scaled signed i12. ++ if (Subtarget.isABI_LP64D()) ++ Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); ++ ++ Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); ++ return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), ++ /* Alignment = */ 32); ++} ++ ++static SDValue lowerLASXVLDRIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, ++ const LoongArchSubtarget &Subtarget) { ++ SDLoc DL(Op); ++ SDValue ChainIn = Op->getOperand(0); ++ SDValue Address = Op->getOperand(2); ++ SDValue Offset = Op->getOperand(3); ++ EVT ResTy = Op->getValueType(0); ++ EVT PtrTy = Address->getValueType(0); ++ ++ // For LP64 addresses have the underlying type MVT::i64. This intrinsic ++ // however takes an i32 signed constant offset. The actual type of the ++ // intrinsic is a scaled signed i12. ++ if (Subtarget.isABI_LP64D()) ++ Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); ++ ++ Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); ++ SDValue Load = DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), ++ /* Alignment = */ 32); ++ return DAG.getNode(LoongArchISD::XVBROADCAST, DL, ++ DAG.getVTList(ResTy, MVT::Other), Load); ++} ++ ++static SDValue lowerLSXVLDRIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, ++ const LoongArchSubtarget &Subtarget) { ++ SDLoc DL(Op); ++ SDValue ChainIn = Op->getOperand(0); ++ SDValue Address = Op->getOperand(2); ++ SDValue Offset = Op->getOperand(3); ++ EVT ResTy = Op->getValueType(0); ++ EVT PtrTy = Address->getValueType(0); ++ ++ // For LP64 addresses have the underlying type MVT::i64. This intrinsic ++ // however takes an i32 signed constant offset. The actual type of the ++ // intrinsic is a scaled signed i12. ++ if (Subtarget.isABI_LP64D()) ++ Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); ++ ++ Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); ++ SDValue Load = DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), ++ /* Alignment = */ 16); ++ return DAG.getNode(LoongArchISD::VBROADCAST, DL, ++ DAG.getVTList(ResTy, MVT::Other), Load); ++} ++ ++static SDValue lowerLSXStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, ++ const LoongArchSubtarget &Subtarget) { ++ SDLoc DL(Op); ++ SDValue ChainIn = Op->getOperand(0); ++ SDValue Value = Op->getOperand(2); ++ SDValue Address = Op->getOperand(3); ++ SDValue Offset = Op->getOperand(4); ++ EVT PtrTy = Address->getValueType(0); ++ ++ // For LP64 addresses have the underlying type MVT::i64. This intrinsic ++ // however takes an i32 signed constant offset. The actual type of the ++ // intrinsic is a scaled signed i12. ++ if (Subtarget.isABI_LP64D()) ++ Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); ++ ++ Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); ++ ++ return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), ++ /* Alignment = */ 16); ++} ++ ++static SDValue lowerLASXStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, ++ const LoongArchSubtarget &Subtarget) { ++ SDLoc DL(Op); ++ SDValue ChainIn = Op->getOperand(0); ++ SDValue Value = Op->getOperand(2); ++ SDValue Address = Op->getOperand(3); ++ SDValue Offset = Op->getOperand(4); ++ EVT PtrTy = Address->getValueType(0); ++ ++ // For LP64 addresses have the underlying type MVT::i64. This intrinsic ++ // however takes an i32 signed constant offset. The actual type of the ++ // intrinsic is a scaled signed i12. ++ if (Subtarget.isABI_LP64D()) ++ Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); ++ ++ Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); ++ ++ return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), ++ /* Alignment = */ 32); ++} ++ ++static SDValue LowerSUINT_TO_FP(unsigned ExtOpcode, SDValue Op, SelectionDAG &DAG) { ++ ++ EVT ResTy = Op->getValueType(0); ++ SDValue Op0 = Op->getOperand(0); ++ EVT ViaTy = Op0->getValueType(0); ++ SDLoc DL(Op); ++ ++ if (!ResTy.isVector()) { ++ if(ResTy.getScalarSizeInBits() == ViaTy.getScalarSizeInBits()) ++ return DAG.getNode(ISD::BITCAST, DL, ResTy, Op0); ++ else if(ResTy.getScalarSizeInBits() > ViaTy.getScalarSizeInBits()) { ++ Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op0); ++ return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Op0); ++ } else { ++ Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Op0); ++ return DAG.getNode(ISD::TRUNCATE, DL, MVT::f32, Op0); ++ } ++ ++ } ++ ++ if (ResTy.getScalarSizeInBits() == ViaTy.getScalarSizeInBits()) { ++ // v4i32 => v4f32 v8i32 => v8f32 ++ // v2i64 => v2f64 v4i64 => v4f64 ++ // do nothing ++ } else if (ResTy.getScalarSizeInBits() > ViaTy.getScalarSizeInBits()) { ++ // v4i32 => v4i64 => v4f64 ++ Op0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, {Op0, Op0}); ++ Op0 = DAG.getNode(ExtOpcode, DL, MVT::v4i64, Op0); ++ } else { ++ // v4i64 => v4f32 ++ SDValue Ops[4]; ++ for (unsigned i = 0; i < 4; i++) { ++ SDValue I64 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op0, ++ DAG.getConstant(i, DL, MVT::i32)); ++ Ops[i] = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, I64); ++ } ++ Op0 = DAG.getBuildVector(MVT::v4i32, DL, makeArrayRef(Ops, 4)); ++ } ++ ++ return Op0; ++} ++ ++static SDValue LowerFP_TO_SUINT(unsigned FPToSUI, unsigned ExtOpcode, ++ SDValue Op, SelectionDAG &DAG) { ++ ++ EVT ResTy = Op->getValueType(0); ++ SDValue Op0 = Op->getOperand(0); ++ EVT ViaTy = Op0->getValueType(0); ++ SDLoc DL(Op); ++ ++ if (ResTy.getScalarSizeInBits() == ViaTy.getScalarSizeInBits()) { ++ // v4f32 => v4i32 v8f32 => v8i32 ++ // v2f64 => v2i64 v4f64 => v4i64 ++ // do nothing ++ Op0 = DAG.getNode(FPToSUI, DL, ResTy, Op0); ++ } else if (ResTy.getScalarSizeInBits() > ViaTy.getScalarSizeInBits()) { ++ // v4f32 => v4i32 => v4i64 ++ Op0 = DAG.getNode(FPToSUI, DL, MVT::v4i32, Op0); ++ Op0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, {Op0, Op0}); ++ Op0 = DAG.getNode(ExtOpcode, DL, MVT::v4i64, Op0); ++ } else { ++ SDValue Ops[4]; ++ Ops[0] = DAG.getNode(FPToSUI, DL, MVT::i32, ++ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0, ++ DAG.getConstant(0, DL, MVT::i64))); ++ Ops[1] = DAG.getNode(FPToSUI, DL, MVT::i32, ++ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0, ++ DAG.getConstant(1, DL, MVT::i64))); ++ Ops[2] = DAG.getNode(FPToSUI, DL, MVT::i32, ++ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0, ++ DAG.getConstant(2, DL, MVT::i64))); ++ Ops[3] = DAG.getNode(FPToSUI, DL, MVT::i32, ++ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0, ++ DAG.getConstant(3, DL, MVT::i64))); ++ ++ Op0 = DAG.getBuildVector(MVT::v4i32, DL, makeArrayRef(Ops, 4)); ++ } ++ ++ return Op0; ++} ++ ++// Lower VECTOR_SHUFFLE into SHF (if possible). ++// ++// SHF splits the vector into blocks of four elements, then shuffles these ++// elements according to a <4 x i2> constant (encoded as an integer immediate). ++// ++// It is therefore possible to lower into SHF when the mask takes the form: ++// ++// When undef's appear they are treated as if they were whatever value is ++// necessary in order to fit the above forms. ++// ++// For example: ++// %2 = shufflevector <8 x i16> %0, <8 x i16> undef, ++// <8 x i32> ++// is lowered to: ++// (VSHUF4I_H $v0, $v1, 27) ++// where the 27 comes from: ++// 3 + (2 << 2) + (1 << 4) + (0 << 6) ++static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ int SHFIndices[4] = {-1, -1, -1, -1}; ++ ++ if (Indices.size() < 4) ++ return SDValue(); ++ ++ for (unsigned i = 0; i < 4; ++i) { ++ for (unsigned j = i; j < Indices.size(); j += 4) { ++ int Idx = Indices[j]; ++ ++ // Convert from vector index to 4-element subvector index ++ // If an index refers to an element outside of the subvector then give up ++ if (Idx != -1) { ++ Idx -= 4 * (j / 4); ++ if (Idx < 0 || Idx >= 4) ++ return SDValue(); ++ } ++ ++ // If the mask has an undef, replace it with the current index. ++ // Note that it might still be undef if the current index is also undef ++ if (SHFIndices[i] == -1) ++ SHFIndices[i] = Idx; ++ ++ // Check that non-undef values are the same as in the mask. If they ++ // aren't then give up ++ if (!(Idx == -1 || Idx == SHFIndices[i])) ++ return SDValue(); ++ } ++ } ++ ++ // Calculate the immediate. Replace any remaining undefs with zero ++ APInt Imm(32, 0); ++ for (int i = 3; i >= 0; --i) { ++ int Idx = SHFIndices[i]; ++ ++ if (Idx == -1) ++ Idx = 0; ++ ++ Imm <<= 2; ++ Imm |= Idx & 0x3; ++ } ++ ++ SDLoc DL(Op); ++ return DAG.getNode(LoongArchISD::SHF, DL, ResTy, ++ DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0)); ++} ++ ++/// Determine whether a range fits a regular pattern of values. ++/// This function accounts for the possibility of jumping over the End iterator. ++template ++static bool ++fitsRegularPattern(typename SmallVectorImpl::const_iterator Begin, ++ unsigned CheckStride, ++ typename SmallVectorImpl::const_iterator End, ++ ValType ExpectedIndex, unsigned ExpectedIndexStride) { ++ auto &I = Begin; ++ ++ while (I != End) { ++ if (*I != -1 && *I != ExpectedIndex) ++ return false; ++ ExpectedIndex += ExpectedIndexStride; ++ ++ // Incrementing past End is undefined behaviour so we must increment one ++ // step at a time and check for End at each step. ++ for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) ++ ; // Empty loop body. ++ } ++ return true; ++} ++ ++// Determine whether VECTOR_SHUFFLE is a VREPLVEI. ++// ++// It is a VREPLVEI when the mask is: ++// ++// where x is any valid index. ++// ++// When undef's appear in the mask they are treated as if they were whatever ++// value is necessary in order to fit the above form. ++static bool isVECTOR_SHUFFLE_VREPLVEI(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ int SplatIndex = -1; ++ for (const auto &V : Indices) { ++ if (V != -1) { ++ SplatIndex = V; ++ break; ++ } ++ } ++ ++ return fitsRegularPattern(Indices.begin(), 1, Indices.end(), SplatIndex, ++ 0); ++} ++ ++// Lower VECTOR_SHUFFLE into VPACKEV (if possible). ++// ++// VPACKEV interleaves the even elements from each vector. ++// ++// It is possible to lower into VPACKEV when the mask consists of two of the ++// following forms interleaved: ++// <0, 2, 4, ...> ++// ++// where n is the number of elements in the vector. ++// For example: ++// <0, 0, 2, 2, 4, 4, ...> ++// <0, n, 2, n+2, 4, n+4, ...> ++// ++// When undef's appear in the mask they are treated as if they were whatever ++// value is necessary in order to fit the above forms. ++static SDValue lowerVECTOR_SHUFFLE_VPACKEV(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Vj; ++ SDValue Vk; ++ const auto &Begin = Indices.begin(); ++ const auto &End = Indices.end(); ++ ++ // Check even elements are taken from the even elements of one half or the ++ // other and pick an operand accordingly. ++ if (fitsRegularPattern(Begin, 2, End, 0, 2)) ++ Vj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 2, End, Indices.size(), 2)) ++ Vj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ // Check odd elements are taken from the even elements of one half or the ++ // other and pick an operand accordingly. ++ if (fitsRegularPattern(Begin + 1, 2, End, 0, 2)) ++ Vk = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size(), 2)) ++ Vk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VPACKEV, SDLoc(Op), ResTy, Vk, Vj); ++} ++ ++// Lower VECTOR_SHUFFLE into VPACKOD (if possible). ++// ++// VPACKOD interleaves the odd elements from each vector. ++// ++// It is possible to lower into VPACKOD when the mask consists of two of the ++// following forms interleaved: ++// <1, 3, 5, ...> ++// ++// where n is the number of elements in the vector. ++// For example: ++// <1, 1, 3, 3, 5, 5, ...> ++// <1, n+1, 3, n+3, 5, n+5, ...> ++// ++// When undef's appear in the mask they are treated as if they were whatever ++// value is necessary in order to fit the above forms. ++static SDValue lowerVECTOR_SHUFFLE_VPACKOD(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Vj; ++ SDValue Vk; ++ const auto &Begin = Indices.begin(); ++ const auto &End = Indices.end(); ++ ++ // Check even elements are taken from the odd elements of one half or the ++ // other and pick an operand accordingly. ++ if (fitsRegularPattern(Begin, 2, End, 1, 2)) ++ Vj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 2, End, Indices.size() + 1, 2)) ++ Vj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ // Check odd elements are taken from the odd elements of one half or the ++ // other and pick an operand accordingly. ++ if (fitsRegularPattern(Begin + 1, 2, End, 1, 2)) ++ Vk = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size() + 1, 2)) ++ Vk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VPACKOD, SDLoc(Op), ResTy, Vk, Vj); ++} ++ ++// Lower VECTOR_SHUFFLE into VILVL (if possible). ++// ++// VILVL interleaves consecutive elements from the right (lowest-indexed) half ++// of each vector. ++// ++// It is possible to lower into VILVL when the mask consists of two of the ++// following forms interleaved: ++// <0, 1, 2, ...> ++// ++// where n is the number of elements in the vector. ++// For example: ++// <0, 0, 1, 1, 2, 2, ...> ++// <0, n, 1, n+1, 2, n+2, ...> ++// ++// When undef's appear in the mask they are treated as if they were whatever ++// value is necessary in order to fit the above forms. ++static SDValue lowerVECTOR_SHUFFLE_VILVL(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Vj; ++ SDValue Vk; ++ const auto &Begin = Indices.begin(); ++ const auto &End = Indices.end(); ++ ++ // Check even elements are taken from the right (lowest-indexed) elements of ++ // one half or the other and pick an operand accordingly. ++ if (fitsRegularPattern(Begin, 2, End, 0, 1)) ++ Vj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 2, End, Indices.size(), 1)) ++ Vj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ // Check odd elements are taken from the right (lowest-indexed) elements of ++ // one half or the other and pick an operand accordingly. ++ if (fitsRegularPattern(Begin + 1, 2, End, 0, 1)) ++ Vk = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size(), 1)) ++ Vk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VILVL, SDLoc(Op), ResTy, Vk, Vj); ++} ++ ++// Lower VECTOR_SHUFFLE into VILVH (if possible). ++// ++// VILVH interleaves consecutive elements from the left (highest-indexed) half ++// of each vector. ++// ++// It is possible to lower into VILVH when the mask consists of two of the ++// following forms interleaved: ++// ++// ++// where n is the number of elements in the vector and x is half n. ++// For example: ++// ++// ++// ++// When undef's appear in the mask they are treated as if they were whatever ++// value is necessary in order to fit the above forms. ++static SDValue lowerVECTOR_SHUFFLE_VILVH(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ unsigned HalfSize = Indices.size() / 2; ++ SDValue Vj; ++ SDValue Vk; ++ const auto &Begin = Indices.begin(); ++ const auto &End = Indices.end(); ++ ++ // Check even elements are taken from the left (highest-indexed) elements of ++ // one half or the other and pick an operand accordingly. ++ if (fitsRegularPattern(Begin, 2, End, HalfSize, 1)) ++ Vj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 2, End, Indices.size() + HalfSize, 1)) ++ Vj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ // Check odd elements are taken from the left (highest-indexed) elements of ++ // one half or the other and pick an operand accordingly. ++ if (fitsRegularPattern(Begin + 1, 2, End, HalfSize, 1)) ++ Vk = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size() + HalfSize, ++ 1)) ++ Vk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VILVH, SDLoc(Op), ResTy, Vk, Vj); ++} ++ ++// Lower VECTOR_SHUFFLE into VPICKEV (if possible). ++// ++// VPICKEV copies the even elements of each vector into the result vector. ++// ++// It is possible to lower into VPICKEV when the mask consists of two of the ++// following forms concatenated: ++// <0, 2, 4, ...> ++// ++// where n is the number of elements in the vector. ++// For example: ++// <0, 2, 4, ..., 0, 2, 4, ...> ++// <0, 2, 4, ..., n, n+2, n+4, ...> ++// ++// When undef's appear in the mask they are treated as if they were whatever ++// value is necessary in order to fit the above forms. ++static SDValue lowerVECTOR_SHUFFLE_VPICKEV(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Vj; ++ SDValue Vk; ++ const auto &Begin = Indices.begin(); ++ const auto &Mid = Indices.begin() + Indices.size() / 2; ++ const auto &End = Indices.end(); ++ ++ if (fitsRegularPattern(Begin, 1, Mid, 0, 2)) ++ Vj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 1, Mid, Indices.size(), 2)) ++ Vj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ if (fitsRegularPattern(Mid, 1, End, 0, 2)) ++ Vk = Op->getOperand(0); ++ else if (fitsRegularPattern(Mid, 1, End, Indices.size(), 2)) ++ Vk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VPICKEV, SDLoc(Op), ResTy, Vk, Vj); ++} ++ ++// Lower VECTOR_SHUFFLE into VPICKOD (if possible). ++// ++// VPICKOD copies the odd elements of each vector into the result vector. ++// ++// It is possible to lower into VPICKOD when the mask consists of two of the ++// following forms concatenated: ++// <1, 3, 5, ...> ++// ++// where n is the number of elements in the vector. ++// For example: ++// <1, 3, 5, ..., 1, 3, 5, ...> ++// <1, 3, 5, ..., n+1, n+3, n+5, ...> ++// ++// When undef's appear in the mask they are treated as if they were whatever ++// value is necessary in order to fit the above forms. ++static SDValue lowerVECTOR_SHUFFLE_VPICKOD(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Vj; ++ SDValue Vk; ++ const auto &Begin = Indices.begin(); ++ const auto &Mid = Indices.begin() + Indices.size() / 2; ++ const auto &End = Indices.end(); ++ ++ if (fitsRegularPattern(Begin, 1, Mid, 1, 2)) ++ Vj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 1, Mid, Indices.size() + 1, 2)) ++ Vj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ if (fitsRegularPattern(Mid, 1, End, 1, 2)) ++ Vk = Op->getOperand(0); ++ else if (fitsRegularPattern(Mid, 1, End, Indices.size() + 1, 2)) ++ Vk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VPICKOD, SDLoc(Op), ResTy, Vk, Vj); ++} ++ ++// Lower VECTOR_SHUFFLE into VSHF. ++// ++// This mostly consists of converting the shuffle indices in Indices into a ++// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is ++// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, ++// if the type is v8i16 and all the indices are less than 8 then the second ++// operand is unused and can be replaced with anything. We choose to replace it ++// with the used operand since this reduces the number of instructions overall. ++static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ SmallVector Ops; ++ SDValue Op0; ++ SDValue Op1; ++ EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); ++ EVT MaskEltTy = MaskVecTy.getVectorElementType(); ++ bool Using1stVec = false; ++ bool Using2ndVec = false; ++ SDLoc DL(Op); ++ int ResTyNumElts = ResTy.getVectorNumElements(); ++ ++ for (int i = 0; i < ResTyNumElts; ++i) { ++ // Idx == -1 means UNDEF ++ int Idx = Indices[i]; ++ ++ if (0 <= Idx && Idx < ResTyNumElts) ++ Using1stVec = true; ++ if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) ++ Using2ndVec = true; ++ } ++ ++ for (SmallVector::iterator I = Indices.begin(); I != Indices.end(); ++ ++I) ++ Ops.push_back(DAG.getTargetConstant(*I, DL, MaskEltTy)); ++ ++ SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); ++ ++ if (Using1stVec && Using2ndVec) { ++ Op0 = Op->getOperand(0); ++ Op1 = Op->getOperand(1); ++ } else if (Using1stVec) ++ Op0 = Op1 = Op->getOperand(0); ++ else if (Using2ndVec) ++ Op0 = Op1 = Op->getOperand(1); ++ else ++ llvm_unreachable("shuffle vector mask references neither vector operand?"); ++ ++ // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. ++ // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> ++ // VSHF concatenates the vectors in a bitwise fashion: ++ // <0b00, 0b01> + <0b10, 0b11> -> ++ // 0b0100 + 0b1110 -> 0b01001110 ++ // <0b10, 0b11, 0b00, 0b01> ++ // We must therefore swap the operands to get the correct result. ++ return DAG.getNode(LoongArchISD::VSHF, DL, ResTy, MaskVec, Op1, Op0); ++} ++ ++static SDValue lowerVECTOR_SHUFFLE_XVILVL(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Xj; ++ SDValue Xk; ++ const auto &Begin = Indices.begin(); ++ const auto &End = Indices.end(); ++ unsigned HalfSize = Indices.size() / 2; ++ ++ if (fitsRegularPattern(Begin, 2, End, 0, 1) && ++ fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize, 1)) ++ Xj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 2, End, Indices.size(), 1) && ++ fitsRegularPattern(Begin + HalfSize, 2, End, ++ Indices.size() + HalfSize, 1)) ++ Xj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ if (fitsRegularPattern(Begin + 1, 2, End, 0, 1) && ++ fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize, 1)) ++ Xk = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size(), 1) && ++ fitsRegularPattern(Begin + 1 + HalfSize, 2, End, ++ Indices.size() + HalfSize, 1)) ++ Xk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VILVL, SDLoc(Op), ResTy, Xk, Xj); ++} ++ ++static SDValue lowerVECTOR_SHUFFLE_XVILVH(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ unsigned HalfSize = Indices.size() / 2; ++ unsigned LeftSize = HalfSize / 2; ++ SDValue Xj; ++ SDValue Xk; ++ const auto &Begin = Indices.begin(); ++ const auto &End = Indices.end(); ++ ++ if (fitsRegularPattern(Begin, 2, End, HalfSize - LeftSize, 1) && ++ fitsRegularPattern(Begin + HalfSize + LeftSize, 2, End, ++ HalfSize + LeftSize, 1)) ++ Xj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 2, End, ++ Indices.size() + HalfSize - LeftSize, 1) && ++ fitsRegularPattern(Begin + HalfSize + LeftSize, 2, End, ++ Indices.size() + HalfSize + LeftSize, 1)) ++ Xj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ if (fitsRegularPattern(Begin + 1, 2, End, HalfSize, 1) && ++ fitsRegularPattern(Begin + 1 + HalfSize + LeftSize, 2, End, ++ HalfSize + LeftSize, 1)) ++ Xk = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size() + HalfSize, ++ 1) && ++ fitsRegularPattern(Begin + 1 + HalfSize + LeftSize, 2, End, ++ Indices.size() + HalfSize + LeftSize, 1)) ++ Xk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VILVH, SDLoc(Op), ResTy, Xk, Xj); ++} ++ ++static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Xj; ++ SDValue Xk; ++ const auto &Begin = Indices.begin(); ++ const auto &End = Indices.end(); ++ unsigned HalfSize = Indices.size() / 2; ++ ++ if (fitsRegularPattern(Begin, 2, End, 0, 2) && ++ fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize, 2)) ++ Xj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 2, End, Indices.size(), 2) && ++ fitsRegularPattern(Begin + HalfSize, 2, End, ++ Indices.size() + HalfSize, 2)) ++ Xj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ if (fitsRegularPattern(Begin + 1, 2, End, 0, 2) && ++ fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize, 2)) ++ Xk = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size(), 2) && ++ fitsRegularPattern(Begin + 1 + HalfSize, 2, End, ++ Indices.size() + HalfSize, 2)) ++ Xk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VPACKEV, SDLoc(Op), ResTy, Xk, Xj); ++} ++ ++static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Xj; ++ SDValue Xk; ++ const auto &Begin = Indices.begin(); ++ const auto &End = Indices.end(); ++ unsigned HalfSize = Indices.size() / 2; ++ ++ if (fitsRegularPattern(Begin, 2, End, 1, 2) && ++ fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize + 1, 2)) ++ Xj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 2, End, Indices.size() + 1, 2) && ++ fitsRegularPattern(Begin + HalfSize, 2, End, ++ Indices.size() + HalfSize + 1, 2)) ++ Xj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ if (fitsRegularPattern(Begin + 1, 2, End, 1, 2) && ++ fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize + 1, 2)) ++ Xk = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size() + 1, 2) && ++ fitsRegularPattern(Begin + 1 + HalfSize, 2, End, ++ Indices.size() + HalfSize + 1, 2)) ++ Xk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VPACKOD, SDLoc(Op), ResTy, Xk, Xj); ++} ++ ++static bool isVECTOR_SHUFFLE_XVREPLVEI(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ unsigned HalfSize = Indices.size() / 2; ++ ++ for (unsigned i = 0; i < HalfSize; i++) { ++ if (Indices[i] == -1 || Indices[HalfSize + i] == -1) ++ return false; ++ if (Indices[0] != Indices[i] || Indices[HalfSize] != Indices[HalfSize + i]) ++ return false; ++ } ++ return true; ++} ++ ++static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Xj; ++ SDValue Xk; ++ const auto &Begin = Indices.begin(); ++ const auto &LeftMid = Indices.begin() + Indices.size() / 4; ++ const auto &End = Indices.end(); ++ const auto &RightMid = Indices.end() - Indices.size() / 4; ++ const auto &Mid = Indices.begin() + Indices.size() / 2; ++ unsigned HalfSize = Indices.size() / 2; ++ ++ if (fitsRegularPattern(Begin, 1, LeftMid, 0, 2) && ++ fitsRegularPattern(Mid, 1, RightMid, HalfSize, 2)) ++ Xj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 1, LeftMid, Indices.size(), 2) && ++ fitsRegularPattern(Mid, 1, RightMid, Indices.size() + HalfSize, ++ 2)) ++ Xj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ if (fitsRegularPattern(LeftMid, 1, Mid, 0, 2) && ++ fitsRegularPattern(RightMid, 1, End, HalfSize, 2)) ++ Xk = Op->getOperand(0); ++ else if (fitsRegularPattern(LeftMid, 1, Mid, Indices.size(), 2) && ++ fitsRegularPattern(RightMid, 1, End, Indices.size() + HalfSize, ++ 2)) ++ Xk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VPICKEV, SDLoc(Op), ResTy, Xk, Xj); ++} ++ ++static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ assert((Indices.size() % 2) == 0); ++ ++ SDValue Xj; ++ SDValue Xk; ++ const auto &Begin = Indices.begin(); ++ const auto &LeftMid = Indices.begin() + Indices.size() / 4; ++ const auto &Mid = Indices.begin() + Indices.size() / 2; ++ const auto &RightMid = Indices.end() - Indices.size() / 4; ++ const auto &End = Indices.end(); ++ unsigned HalfSize = Indices.size() / 2; ++ ++ if (fitsRegularPattern(Begin, 1, LeftMid, 1, 2) && ++ fitsRegularPattern(Mid, 1, RightMid, HalfSize + 1, 2)) ++ Xj = Op->getOperand(0); ++ else if (fitsRegularPattern(Begin, 1, LeftMid, Indices.size() + 1, 2) && ++ fitsRegularPattern(Mid, 1, RightMid, ++ Indices.size() + HalfSize + 1, 2)) ++ Xj = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ if (fitsRegularPattern(LeftMid, 1, Mid, 1, 2) && ++ fitsRegularPattern(RightMid, 1, End, HalfSize + 1, 2)) ++ Xk = Op->getOperand(0); ++ else if (fitsRegularPattern(LeftMid, 1, Mid, Indices.size() + 1, 2) && ++ fitsRegularPattern(RightMid, 1, End, ++ Indices.size() + HalfSize + 1, 2)) ++ Xk = Op->getOperand(1); ++ else ++ return SDValue(); ++ ++ return DAG.getNode(LoongArchISD::VPICKOD, SDLoc(Op), ResTy, Xk, Xj); ++} ++ ++static SDValue lowerVECTOR_SHUFFLE_XSHF(SDValue Op, EVT ResTy, ++ SmallVector Indices, ++ SelectionDAG &DAG) { ++ int SHFIndices[4] = {-1, -1, -1, -1}; ++ ++ if (Indices.size() < 4) ++ return SDValue(); ++ ++ int HalfSize = Indices.size() / 2; ++ for (int i = 0; i < 4; ++i) { ++ for (int j = i; j < HalfSize; j += 4) { ++ int Idx = Indices[j]; ++ // check mxshf ++ if (Idx + HalfSize != Indices[j + HalfSize]) ++ return SDValue(); ++ ++ // Convert from vector index to 4-element subvector index ++ // If an index refers to an element outside of the subvector then give up ++ if (Idx != -1) { ++ Idx -= 4 * (j / 4); ++ if (Idx < 0 || Idx >= 4) ++ return SDValue(); ++ } ++ ++ // If the mask has an undef, replace it with the current index. ++ // Note that it might still be undef if the current index is also undef ++ if (SHFIndices[i] == -1) ++ SHFIndices[i] = Idx; ++ ++ // Check that non-undef values are the same as in the mask. If they ++ // aren't then give up ++ if (!(Idx == -1 || Idx == SHFIndices[i])) ++ return SDValue(); ++ } ++ } ++ ++ // Calculate the immediate. Replace any remaining undefs with zero ++ APInt Imm(32, 0); ++ for (int i = 3; i >= 0; --i) { ++ int Idx = SHFIndices[i]; ++ ++ if (Idx == -1) ++ Idx = 0; ++ ++ Imm <<= 2; ++ Imm |= Idx & 0x3; ++ } ++ SDLoc DL(Op); ++ return DAG.getNode(LoongArchISD::SHF, DL, ResTy, ++ DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0)); ++} ++ ++static bool isConstantOrUndef(const SDValue Op) { ++ if (Op->isUndef()) ++ return true; ++ if (isa(Op)) ++ return true; ++ if (isa(Op)) ++ return true; ++ return false; ++} ++ ++static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { ++ for (unsigned i = 0; i < Op->getNumOperands(); ++i) ++ if (isConstantOrUndef(Op->getOperand(i))) ++ return true; ++ return false; ++} ++ ++static bool isLASXBySplatBitSize(unsigned SplatBitSize, EVT &ViaVecTy) { ++ switch (SplatBitSize) { ++ default: ++ return false; ++ case 8: ++ ViaVecTy = MVT::v32i8; ++ break; ++ case 16: ++ ViaVecTy = MVT::v16i16; ++ break; ++ case 32: ++ ViaVecTy = MVT::v8i32; ++ break; ++ case 64: ++ ViaVecTy = MVT::v4i64; ++ break; ++ case 128: ++ // There's no fill.q to fall back on for 64-bit values ++ return false; ++ } ++ ++ return true; ++} ++ ++static bool isLSXBySplatBitSize(unsigned SplatBitSize, EVT &ViaVecTy) { ++ switch (SplatBitSize) { ++ default: ++ return false; ++ case 8: ++ ViaVecTy = MVT::v16i8; ++ break; ++ case 16: ++ ViaVecTy = MVT::v8i16; ++ break; ++ case 32: ++ ViaVecTy = MVT::v4i32; ++ break; ++ case 64: ++ // There's no fill.d to fall back on for 64-bit values ++ return false; ++ } ++ ++ return true; ++} ++ ++bool LoongArchTargetLowering::isCheapToSpeculateCttz() const { return true; } ++ ++bool LoongArchTargetLowering::isCheapToSpeculateCtlz() const { return true; } ++ ++void LoongArchTargetLowering::LowerOperationWrapper( ++ SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { ++ SDValue Res = LowerOperation(SDValue(N, 0), DAG); ++ ++ for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I) ++ Results.push_back(Res.getValue(I)); ++} ++ ++void LoongArchTargetLowering::ReplaceNodeResults( ++ SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { ++ return LowerOperationWrapper(N, Results, DAG); ++} ++ ++SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, ++ SelectionDAG &DAG) const { ++ switch (Op.getOpcode()) { ++ case ISD::STORE: ++ return lowerSTORE(Op, DAG); ++ case ISD::INTRINSIC_WO_CHAIN: ++ return lowerINTRINSIC_WO_CHAIN(Op, DAG); ++ case ISD::INTRINSIC_W_CHAIN: ++ return lowerINTRINSIC_W_CHAIN(Op, DAG); ++ case ISD::INTRINSIC_VOID: ++ return lowerINTRINSIC_VOID(Op, DAG); ++ case ISD::EXTRACT_VECTOR_ELT: ++ return lowerEXTRACT_VECTOR_ELT(Op, DAG); ++ case ISD::INSERT_VECTOR_ELT: ++ return lowerINSERT_VECTOR_ELT(Op, DAG); ++ case ISD::BUILD_VECTOR: ++ return lowerBUILD_VECTOR(Op, DAG); ++ case ISD::VECTOR_SHUFFLE: ++ return lowerVECTOR_SHUFFLE(Op, DAG); ++ case ISD::UINT_TO_FP: ++ return lowerUINT_TO_FP(Op, DAG); ++ case ISD::SINT_TO_FP: ++ return lowerSINT_TO_FP(Op, DAG); ++ case ISD::FP_TO_UINT: ++ return lowerFP_TO_UINT(Op, DAG); ++ case ISD::FP_TO_SINT: ++ return lowerFP_TO_SINT(Op, DAG); ++ case ISD::BRCOND: ++ return lowerBRCOND(Op, DAG); ++ case ISD::ConstantPool: ++ return lowerConstantPool(Op, DAG); ++ case ISD::GlobalAddress: ++ return lowerGlobalAddress(Op, DAG); ++ case ISD::BlockAddress: ++ return lowerBlockAddress(Op, DAG); ++ case ISD::GlobalTLSAddress: ++ return lowerGlobalTLSAddress(Op, DAG); ++ case ISD::JumpTable: ++ return lowerJumpTable(Op, DAG); ++ case ISD::SELECT: ++ return lowerSELECT(Op, DAG); ++ case ISD::SETCC: ++ return lowerSETCC(Op, DAG); ++ case ISD::VASTART: ++ return lowerVASTART(Op, DAG); ++ case ISD::VAARG: ++ return lowerVAARG(Op, DAG); ++ case ISD::FRAMEADDR: ++ return lowerFRAMEADDR(Op, DAG); ++ case ISD::RETURNADDR: ++ return lowerRETURNADDR(Op, DAG); ++ case ISD::EH_RETURN: ++ return lowerEH_RETURN(Op, DAG); ++ case ISD::ATOMIC_FENCE: ++ return lowerATOMIC_FENCE(Op, DAG); ++ case ISD::SHL_PARTS: ++ return lowerShiftLeftParts(Op, DAG); ++ case ISD::SRA_PARTS: ++ return lowerShiftRightParts(Op, DAG, true); ++ case ISD::SRL_PARTS: ++ return lowerShiftRightParts(Op, DAG, false); ++ case ISD::EH_DWARF_CFA: ++ return lowerEH_DWARF_CFA(Op, DAG); ++ } ++ return SDValue(); ++} ++ ++//===----------------------------------------------------------------------===// ++// Lower helper functions ++//===----------------------------------------------------------------------===// ++ ++template ++SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, ++ bool IsLocal) const { ++ SDLoc DL(N); ++ EVT Ty = getPointerTy(DAG.getDataLayout()); ++ ++ if (isPositionIndependent()) { ++ SDValue Addr = getTargetNode(N, Ty, DAG, 0U); ++ if (IsLocal) ++ // Use PC-relative addressing to access the symbol. ++ return SDValue(DAG.getMachineNode(LoongArch::LoadAddrLocal, DL, Ty, Addr), ++ 0); ++ ++ // Use PC-relative addressing to access the GOT for this symbol, then load ++ // the address from the GOT. ++ return SDValue(DAG.getMachineNode(LoongArch::LoadAddrGlobal, DL, Ty, Addr), ++ 0); ++ } ++ ++ SDValue Addr = getTargetNode(N, Ty, DAG, 0U); ++ return SDValue(DAG.getMachineNode(LoongArch::LoadAddrLocal, DL, Ty, Addr), 0); ++} ++ ++// addLiveIn - This helper function adds the specified physical register to the ++// MachineFunction as a live in value. It also creates a corresponding ++// virtual register for it. ++static unsigned addLiveIn(MachineFunction &MF, unsigned PReg, ++ const TargetRegisterClass *RC) { ++ unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); ++ MF.getRegInfo().addLiveIn(PReg, VReg); ++ return VReg; ++} ++ ++static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, ++ MachineBasicBlock &MBB, ++ const TargetInstrInfo &TII, ++ bool Is64Bit) { ++ if (NoZeroDivCheck) ++ return &MBB; ++ ++ // Insert pseudo instruction(PseudoTEQ), will expand: ++ // beq $divisor_reg, $zero, 8 ++ // break 7 ++ MachineBasicBlock::iterator I(MI); ++ MachineInstrBuilder MIB; ++ MachineOperand &Divisor = MI.getOperand(2); ++ unsigned TeqOp = LoongArch::PseudoTEQ; ++ ++ MIB = BuildMI(MBB, std::next(I), MI.getDebugLoc(), TII.get(TeqOp)) ++ .addReg(Divisor.getReg(), getKillRegState(Divisor.isKill())); ++ ++ // Use the 32-bit sub-register if this is a 64-bit division. ++ //if (Is64Bit) ++ // MIB->getOperand(0).setSubReg(LoongArch::sub_32); ++ ++ // Clear Divisor's kill flag. ++ Divisor.setIsKill(false); ++ ++ // We would normally delete the original instruction here but in this case ++ // we only needed to inject an additional instruction rather than replace it. ++ ++ return &MBB; ++} ++ ++MachineBasicBlock * ++LoongArchTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ switch (MI.getOpcode()) { ++ default: ++ llvm_unreachable("Unexpected instr type to insert"); ++ case LoongArch::FILL_FW_PSEUDO: ++ return emitFILL_FW(MI, BB); ++ case LoongArch::FILL_FD_PSEUDO: ++ return emitFILL_FD(MI, BB); ++ case LoongArch::SNZ_B_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_B); ++ case LoongArch::SNZ_H_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_H); ++ case LoongArch::SNZ_W_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_W); ++ case LoongArch::SNZ_D_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_D); ++ case LoongArch::SNZ_V_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETNEZ_V); ++ case LoongArch::SZ_B_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_B); ++ case LoongArch::SZ_H_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_H); ++ case LoongArch::SZ_W_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_W); ++ case LoongArch::SZ_D_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_D); ++ case LoongArch::SZ_V_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETEQZ_V); ++ case LoongArch::XSNZ_B_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_B); ++ case LoongArch::XSNZ_H_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_H); ++ case LoongArch::XSNZ_W_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_W); ++ case LoongArch::XSNZ_D_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_D); ++ case LoongArch::XSNZ_V_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETNEZ_V); ++ case LoongArch::XSZ_B_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_B); ++ case LoongArch::XSZ_H_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_H); ++ case LoongArch::XSZ_W_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_W); ++ case LoongArch::XSZ_D_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_D); ++ case LoongArch::XSZ_V_PSEUDO: ++ return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETEQZ_V); ++ case LoongArch::INSERT_FW_PSEUDO: ++ return emitINSERT_FW(MI, BB); ++ case LoongArch::INSERT_FD_PSEUDO: ++ return emitINSERT_FD(MI, BB); ++ case LoongArch::XINSERT_H_PSEUDO: ++ return emitXINSERT_BH(MI, BB, 2); ++ case LoongArch::XCOPY_FW_PSEUDO: ++ return emitXCOPY_FW(MI, BB); ++ case LoongArch::XCOPY_FD_PSEUDO: ++ return emitXCOPY_FD(MI, BB); ++ case LoongArch::XINSERT_FW_PSEUDO: ++ return emitXINSERT_FW(MI, BB); ++ case LoongArch::COPY_FW_PSEUDO: ++ return emitCOPY_FW(MI, BB); ++ case LoongArch::XFILL_FW_PSEUDO: ++ return emitXFILL_FW(MI, BB); ++ case LoongArch::XFILL_FD_PSEUDO: ++ return emitXFILL_FD(MI, BB); ++ case LoongArch::COPY_FD_PSEUDO: ++ return emitCOPY_FD(MI, BB); ++ case LoongArch::XINSERT_FD_PSEUDO: ++ return emitXINSERT_FD(MI, BB); ++ case LoongArch::XINSERT_B_PSEUDO: ++ return emitXINSERT_BH(MI, BB, 1); ++ case LoongArch::CONCAT_VECTORS_B_PSEUDO: ++ return emitCONCAT_VECTORS(MI, BB, 1); ++ case LoongArch::CONCAT_VECTORS_H_PSEUDO: ++ return emitCONCAT_VECTORS(MI, BB, 2); ++ case LoongArch::CONCAT_VECTORS_W_PSEUDO: ++ case LoongArch::CONCAT_VECTORS_FW_PSEUDO: ++ return emitCONCAT_VECTORS(MI, BB, 4); ++ case LoongArch::CONCAT_VECTORS_D_PSEUDO: ++ case LoongArch::CONCAT_VECTORS_FD_PSEUDO: ++ return emitCONCAT_VECTORS(MI, BB, 8); ++ case LoongArch::XCOPY_FW_GPR_PSEUDO: ++ return emitXCOPY_FW_GPR(MI, BB); ++ ++ case LoongArch::ATOMIC_LOAD_ADD_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_ADD_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_ADD_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_ADD_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_LOAD_AND_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_AND_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_AND_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_AND_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_LOAD_OR_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_OR_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_OR_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_OR_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_LOAD_XOR_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_XOR_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_XOR_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_XOR_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_LOAD_NAND_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_NAND_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_NAND_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_NAND_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_LOAD_SUB_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_SUB_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_SUB_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_SUB_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_SWAP_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_SWAP_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_SWAP_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_SWAP_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::XINSERT_B_VIDX_PSEUDO: ++ case LoongArch::XINSERT_B_VIDX64_PSEUDO: ++ return emitXINSERT_B(MI, BB); ++ case LoongArch::INSERT_H_VIDX64_PSEUDO: ++ return emitINSERT_H_VIDX(MI, BB); ++ case LoongArch::XINSERT_FW_VIDX_PSEUDO: ++ return emitXINSERT_DF_VIDX(MI, BB, false); ++ case LoongArch::XINSERT_FW_VIDX64_PSEUDO: ++ return emitXINSERT_DF_VIDX(MI, BB, true); ++ ++ case LoongArch::ATOMIC_LOAD_MAX_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_MAX_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_MAX_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_MAX_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_LOAD_MIN_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_MIN_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_MIN_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_MIN_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_LOAD_UMAX_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_UMAX_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_UMAX_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_UMAX_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_LOAD_UMIN_I8: ++ return emitAtomicBinaryPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_LOAD_UMIN_I16: ++ return emitAtomicBinaryPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_LOAD_UMIN_I32: ++ return emitAtomicBinary(MI, BB); ++ case LoongArch::ATOMIC_LOAD_UMIN_I64: ++ return emitAtomicBinary(MI, BB); ++ ++ case LoongArch::ATOMIC_CMP_SWAP_I8: ++ return emitAtomicCmpSwapPartword(MI, BB, 1); ++ case LoongArch::ATOMIC_CMP_SWAP_I16: ++ return emitAtomicCmpSwapPartword(MI, BB, 2); ++ case LoongArch::ATOMIC_CMP_SWAP_I32: ++ return emitAtomicCmpSwap(MI, BB); ++ case LoongArch::ATOMIC_CMP_SWAP_I64: ++ return emitAtomicCmpSwap(MI, BB); ++ ++ case LoongArch::PseudoSELECT_I: ++ case LoongArch::PseudoSELECT_I64: ++ case LoongArch::PseudoSELECT_S: ++ case LoongArch::PseudoSELECT_D64: ++ return emitPseudoSELECT(MI, BB, false, LoongArch::BNE32); ++ ++ case LoongArch::PseudoSELECTFP_T_I: ++ case LoongArch::PseudoSELECTFP_T_I64: ++ return emitPseudoSELECT(MI, BB, true, LoongArch::BCNEZ); ++ ++ case LoongArch::PseudoSELECTFP_F_I: ++ case LoongArch::PseudoSELECTFP_F_I64: ++ return emitPseudoSELECT(MI, BB, true, LoongArch::BCEQZ); ++ case LoongArch::DIV_W: ++ case LoongArch::DIV_WU: ++ case LoongArch::MOD_W: ++ case LoongArch::MOD_WU: ++ return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), false); ++ case LoongArch::DIV_D: ++ case LoongArch::DIV_DU: ++ case LoongArch::MOD_D: ++ case LoongArch::MOD_DU: ++ return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), true); ++ } ++} ++ ++MachineBasicBlock *LoongArchTargetLowering::emitXINSERT_DF_VIDX( ++ MachineInstr &MI, MachineBasicBlock *BB, bool IsGPR64) const { ++ ++ MachineFunction *MF = BB->getParent(); ++ MachineRegisterInfo &RegInfo = MF->getRegInfo(); ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned insertOp; ++ insertOp = IsGPR64 ? LoongArch::XINSERT_FW_VIDX64_PSEUDO_POSTRA ++ : LoongArch::XINSERT_FW_VIDX_PSEUDO_POSTRA; ++ ++ unsigned DstReg = MI.getOperand(0).getReg(); ++ unsigned SrcVecReg = MI.getOperand(1).getReg(); ++ unsigned LaneReg = MI.getOperand(2).getReg(); ++ unsigned SrcValReg = MI.getOperand(3).getReg(); ++ unsigned Dest = RegInfo.createVirtualRegister(RegInfo.getRegClass(DstReg)); ++ ++ MachineBasicBlock::iterator II(MI); ++ ++ unsigned VecCopy = ++ RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcVecReg)); ++ unsigned LaneCopy = ++ RegInfo.createVirtualRegister(RegInfo.getRegClass(LaneReg)); ++ unsigned ValCopy = ++ RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcValReg)); ++ ++ const TargetRegisterClass *RC = ++ IsGPR64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; ++ unsigned RI = RegInfo.createVirtualRegister(RC); ++ ++ unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned Xj = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::SUBREG_TO_REG), Xj) ++ .addImm(0) ++ .addReg(SrcValReg) ++ .addImm(LoongArch::sub_lo); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::XVPICKVE2GR_W), Rj) ++ .addReg(Xj) ++ .addImm(0); ++ ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), VecCopy).addReg(SrcVecReg); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), LaneCopy).addReg(LaneReg); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), ValCopy).addReg(SrcValReg); ++ ++ BuildMI(*BB, II, DL, TII->get(insertOp)) ++ .addReg(DstReg, RegState::Define | RegState::EarlyClobber) ++ .addReg(VecCopy) ++ .addReg(LaneCopy) ++ .addReg(ValCopy) ++ .addReg(Dest, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(RI, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(Rj, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead); ++ ++ MI.eraseFromParent(); ++ ++ return BB; ++} ++ ++MachineBasicBlock * ++LoongArchTargetLowering::emitINSERT_H_VIDX(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ ++ MachineFunction *MF = BB->getParent(); ++ MachineRegisterInfo &RegInfo = MF->getRegInfo(); ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned insertOp; ++ unsigned isGP64 = 0; ++ switch (MI.getOpcode()) { ++ case LoongArch::INSERT_H_VIDX64_PSEUDO: ++ isGP64 = 1; ++ insertOp = LoongArch::INSERT_H_VIDX64_PSEUDO_POSTRA; ++ break; ++ default: ++ llvm_unreachable("Unknown pseudo vector for replacement!"); ++ } ++ ++ unsigned DstReg = MI.getOperand(0).getReg(); ++ unsigned SrcVecReg = MI.getOperand(1).getReg(); ++ unsigned LaneReg = MI.getOperand(2).getReg(); ++ unsigned SrcValReg = MI.getOperand(3).getReg(); ++ unsigned Dest = RegInfo.createVirtualRegister(RegInfo.getRegClass(DstReg)); ++ ++ MachineBasicBlock::iterator II(MI); ++ ++ unsigned VecCopy = ++ RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcVecReg)); ++ unsigned LaneCopy = ++ RegInfo.createVirtualRegister(RegInfo.getRegClass(LaneReg)); ++ unsigned ValCopy = ++ RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcValReg)); ++ ++ const TargetRegisterClass *RC = ++ isGP64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; ++ unsigned RI = RegInfo.createVirtualRegister(RC); ++ ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), VecCopy).addReg(SrcVecReg); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), LaneCopy).addReg(LaneReg); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), ValCopy).addReg(SrcValReg); ++ ++ BuildMI(*BB, II, DL, TII->get(insertOp)) ++ .addReg(DstReg, RegState::Define | RegState::EarlyClobber) ++ .addReg(VecCopy) ++ .addReg(LaneCopy) ++ .addReg(ValCopy) ++ .addReg(Dest, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(RI, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead); ++ ++ MI.eraseFromParent(); ++ ++ return BB; ++} ++ ++MachineBasicBlock * ++LoongArchTargetLowering::emitXINSERT_B(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ ++ MachineFunction *MF = BB->getParent(); ++ MachineRegisterInfo &RegInfo = MF->getRegInfo(); ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned insertOp; ++ unsigned isGP64 = 0; ++ switch (MI.getOpcode()) { ++ case LoongArch::XINSERT_B_VIDX64_PSEUDO: ++ isGP64 = 1; ++ insertOp = LoongArch::XINSERT_B_VIDX64_PSEUDO_POSTRA; ++ break; ++ case LoongArch::XINSERT_B_VIDX_PSEUDO: ++ insertOp = LoongArch::XINSERT_B_VIDX_PSEUDO_POSTRA; ++ break; ++ default: ++ llvm_unreachable("Unknown pseudo vector for replacement!"); ++ } ++ ++ unsigned DstReg = MI.getOperand(0).getReg(); ++ unsigned SrcVecReg = MI.getOperand(1).getReg(); ++ unsigned LaneReg = MI.getOperand(2).getReg(); ++ unsigned SrcValReg = MI.getOperand(3).getReg(); ++ unsigned Dest = RegInfo.createVirtualRegister(RegInfo.getRegClass(DstReg)); ++ ++ MachineBasicBlock::iterator II(MI); ++ ++ unsigned VecCopy = ++ RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcVecReg)); ++ unsigned LaneCopy = ++ RegInfo.createVirtualRegister(RegInfo.getRegClass(LaneReg)); ++ unsigned ValCopy = ++ RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcValReg)); ++ const TargetRegisterClass *RC = ++ isGP64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; ++ unsigned Rimm = RegInfo.createVirtualRegister(RC); ++ unsigned R4r = RegInfo.createVirtualRegister(RC); ++ unsigned Rib = RegInfo.createVirtualRegister(RC); ++ unsigned Ris = RegInfo.createVirtualRegister(RC); ++ unsigned R7b1 = RegInfo.createVirtualRegister(RC); ++ unsigned R7b2 = RegInfo.createVirtualRegister(RC); ++ unsigned R7b3 = RegInfo.createVirtualRegister(RC); ++ unsigned RI = RegInfo.createVirtualRegister(RC); ++ ++ unsigned R7r80_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned R7r80l_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned R7r81_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned R7r81l_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned R7r82_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned R7r82l_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned R70 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned tmp_Dst73 = ++ RegInfo.createVirtualRegister(&LoongArch::LASX256BRegClass); ++ ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), VecCopy).addReg(SrcVecReg); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), LaneCopy).addReg(LaneReg); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), ValCopy).addReg(SrcValReg); ++ ++ BuildMI(*BB, II, DL, TII->get(insertOp)) ++ .addReg(DstReg, RegState::Define | RegState::EarlyClobber) ++ .addReg(VecCopy) ++ .addReg(LaneCopy) ++ .addReg(ValCopy) ++ .addReg(Dest, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R4r, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(Rib, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(Ris, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R7b1, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R7b2, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R7b3, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R7r80_3, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R7r80l_3, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R7r81_3, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R7r81l_3, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R7r82_3, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R7r82l_3, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(RI, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(tmp_Dst73, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(Rimm, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead) ++ .addReg(R70, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead); ++ ++ MI.eraseFromParent(); ++ ++ return BB; ++} ++ ++const TargetRegisterClass * ++LoongArchTargetLowering::getRepRegClassFor(MVT VT) const { ++ return TargetLowering::getRepRegClassFor(VT); ++} ++ ++// This function also handles LoongArch::ATOMIC_SWAP_I32 (when BinOpcode == 0), and ++// LoongArch::ATOMIC_LOAD_NAND_I32 (when Nand == true) ++MachineBasicBlock * ++LoongArchTargetLowering::emitAtomicBinary(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ ++ MachineFunction *MF = BB->getParent(); ++ MachineRegisterInfo &RegInfo = MF->getRegInfo(); ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned AtomicOp; ++ switch (MI.getOpcode()) { ++ case LoongArch::ATOMIC_LOAD_ADD_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_SUB_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_AND_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_AND_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_OR_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_OR_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_XOR_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_NAND_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_SWAP_I32: ++ AtomicOp = LoongArch::ATOMIC_SWAP_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_MAX_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_MIN_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMAX_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMIN_I32: ++ AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I32_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_ADD_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_SUB_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_AND_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_AND_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_OR_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_OR_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_XOR_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_NAND_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_SWAP_I64: ++ AtomicOp = LoongArch::ATOMIC_SWAP_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_MAX_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_MIN_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMAX_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I64_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMIN_I64: ++ AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I64_POSTRA; ++ break; ++ default: ++ llvm_unreachable("Unknown pseudo atomic for replacement!"); ++ } ++ ++ unsigned OldVal = MI.getOperand(0).getReg(); ++ unsigned Ptr = MI.getOperand(1).getReg(); ++ unsigned Incr = MI.getOperand(2).getReg(); ++ unsigned Scratch = RegInfo.createVirtualRegister(RegInfo.getRegClass(OldVal)); ++ ++ MachineBasicBlock::iterator II(MI); ++ ++ // The scratch registers here with the EarlyClobber | Define | Implicit ++ // flags is used to persuade the register allocator and the machine ++ // verifier to accept the usage of this register. This has to be a real ++ // register which has an UNDEF value but is dead after the instruction which ++ // is unique among the registers chosen for the instruction. ++ ++ // The EarlyClobber flag has the semantic properties that the operand it is ++ // attached to is clobbered before the rest of the inputs are read. Hence it ++ // must be unique among the operands to the instruction. ++ // The Define flag is needed to coerce the machine verifier that an Undef ++ // value isn't a problem. ++ // The Dead flag is needed as the value in scratch isn't used by any other ++ // instruction. Kill isn't used as Dead is more precise. ++ // The implicit flag is here due to the interaction between the other flags ++ // and the machine verifier. ++ ++ // For correctness purpose, a new pseudo is introduced here. We need this ++ // new pseudo, so that FastRegisterAllocator does not see an ll/sc sequence ++ // that is spread over >1 basic blocks. A register allocator which ++ // introduces (or any codegen infact) a store, can violate the expectations ++ // of the hardware. ++ // ++ // An atomic read-modify-write sequence starts with a linked load ++ // instruction and ends with a store conditional instruction. The atomic ++ // read-modify-write sequence fails if any of the following conditions ++ // occur between the execution of ll and sc: ++ // * A coherent store is completed by another process or coherent I/O ++ // module into the block of synchronizable physical memory containing ++ // the word. The size and alignment of the block is ++ // implementation-dependent. ++ // * A coherent store is executed between an LL and SC sequence on the ++ // same processor to the block of synchornizable physical memory ++ // containing the word. ++ // ++ ++ unsigned PtrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Ptr)); ++ unsigned IncrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Incr)); ++ ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), IncrCopy).addReg(Incr); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), PtrCopy).addReg(Ptr); ++ ++ BuildMI(*BB, II, DL, TII->get(AtomicOp)) ++ .addReg(OldVal, RegState::Define | RegState::EarlyClobber) ++ .addReg(PtrCopy) ++ .addReg(IncrCopy) ++ .addReg(Scratch, RegState::Define | RegState::EarlyClobber | ++ RegState::Implicit | RegState::Dead); ++ ++ if(MI.getOpcode() == LoongArch::ATOMIC_LOAD_NAND_I32 ++ || MI.getOpcode() == LoongArch::ATOMIC_LOAD_NAND_I64){ ++ BuildMI(*BB, II, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); ++ } ++ ++ MI.eraseFromParent(); ++ ++ return BB; ++} ++ ++MachineBasicBlock *LoongArchTargetLowering::emitSignExtendToI32InReg( ++ MachineInstr &MI, MachineBasicBlock *BB, unsigned Size, unsigned DstReg, ++ unsigned SrcReg) const { ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ const DebugLoc &DL = MI.getDebugLoc(); ++ if (Size == 1) { ++ BuildMI(BB, DL, TII->get(LoongArch::EXT_W_B32), DstReg).addReg(SrcReg); ++ return BB; ++ } ++ ++ if (Size == 2) { ++ BuildMI(BB, DL, TII->get(LoongArch::EXT_W_H32), DstReg).addReg(SrcReg); ++ return BB; ++ } ++ ++ MachineFunction *MF = BB->getParent(); ++ MachineRegisterInfo &RegInfo = MF->getRegInfo(); ++ const TargetRegisterClass *RC = getRegClassFor(MVT::i32); ++ unsigned ScrReg = RegInfo.createVirtualRegister(RC); ++ ++ assert(Size < 32); ++ int64_t ShiftImm = 32 - (Size * 8); ++ ++ BuildMI(BB, DL, TII->get(LoongArch::SLLI_W), ScrReg).addReg(SrcReg).addImm(ShiftImm); ++ BuildMI(BB, DL, TII->get(LoongArch::SRAI_W), DstReg).addReg(ScrReg).addImm(ShiftImm); ++ ++ return BB; ++} ++ ++MachineBasicBlock *LoongArchTargetLowering::emitAtomicBinaryPartword( ++ MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { ++ assert((Size == 1 || Size == 2) && ++ "Unsupported size for EmitAtomicBinaryPartial."); ++ ++ MachineFunction *MF = BB->getParent(); ++ MachineRegisterInfo &RegInfo = MF->getRegInfo(); ++ const TargetRegisterClass *RC = getRegClassFor(MVT::i32); ++ const bool ArePtrs64bit = ABI.ArePtrs64bit(); ++ const TargetRegisterClass *RCp = ++ getRegClassFor(ArePtrs64bit ? MVT::i64 : MVT::i32); ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned Dest = MI.getOperand(0).getReg(); ++ unsigned Ptr = MI.getOperand(1).getReg(); ++ unsigned Incr = MI.getOperand(2).getReg(); ++ ++ unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp); ++ unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); ++ unsigned Mask = RegInfo.createVirtualRegister(RC); ++ unsigned Mask2 = RegInfo.createVirtualRegister(RC); ++ unsigned Incr2 = RegInfo.createVirtualRegister(RC); ++ unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); ++ unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); ++ unsigned MaskUpper = RegInfo.createVirtualRegister(RC); ++ unsigned MaskUppest = RegInfo.createVirtualRegister(RC); ++ unsigned Scratch = RegInfo.createVirtualRegister(RC); ++ unsigned Scratch2 = RegInfo.createVirtualRegister(RC); ++ unsigned Scratch3 = RegInfo.createVirtualRegister(RC); ++ unsigned Scratch4 = RegInfo.createVirtualRegister(RC); ++ unsigned Scratch5 = RegInfo.createVirtualRegister(RC); ++ ++ unsigned AtomicOp = 0; ++ switch (MI.getOpcode()) { ++ case LoongArch::ATOMIC_LOAD_NAND_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_NAND_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_SWAP_I8: ++ AtomicOp = LoongArch::ATOMIC_SWAP_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_SWAP_I16: ++ AtomicOp = LoongArch::ATOMIC_SWAP_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_MAX_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_MAX_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_MIN_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_MIN_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMAX_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMAX_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMIN_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_UMIN_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_ADD_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_ADD_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_SUB_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_SUB_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_AND_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_AND_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_AND_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_AND_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_OR_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_OR_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_OR_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_OR_I16_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_XOR_I8: ++ AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I8_POSTRA; ++ break; ++ case LoongArch::ATOMIC_LOAD_XOR_I16: ++ AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I16_POSTRA; ++ break; ++ default: ++ llvm_unreachable("Unknown subword atomic pseudo for expansion!"); ++ } ++ ++ // insert new blocks after the current block ++ const BasicBlock *LLVM_BB = BB->getBasicBlock(); ++ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineFunction::iterator It = ++BB->getIterator(); ++ MF->insert(It, exitMBB); ++ ++ // Transfer the remainder of BB and its successor edges to exitMBB. ++ exitMBB->splice(exitMBB->begin(), BB, ++ std::next(MachineBasicBlock::iterator(MI)), BB->end()); ++ exitMBB->transferSuccessorsAndUpdatePHIs(BB); ++ ++ BB->addSuccessor(exitMBB, BranchProbability::getOne()); ++ ++ // thisMBB: ++ // addiu masklsb2,$0,-4 # 0xfffffffc ++ // and alignedaddr,ptr,masklsb2 ++ // andi ptrlsb2,ptr,3 ++ // sll shiftamt,ptrlsb2,3 ++ // ori maskupper,$0,255 # 0xff ++ // sll mask,maskupper,shiftamt ++ // nor mask2,$0,mask ++ // sll incr2,incr,shiftamt ++ ++ int64_t MaskImm = (Size == 1) ? 255 : 4095; ++ BuildMI(BB, DL, TII->get(ABI.GetPtrAddiOp()), MaskLSB2) ++ .addReg(ABI.GetNullPtr()).addImm(-4); ++ BuildMI(BB, DL, TII->get(ABI.GetPtrAndOp()), AlignedAddr) ++ .addReg(Ptr).addReg(MaskLSB2); ++ BuildMI(BB, DL, TII->get(LoongArch::ANDI32), PtrLSB2) ++ .addReg(Ptr, 0, ArePtrs64bit ? LoongArch::sub_32 : 0).addImm(3); ++ BuildMI(BB, DL, TII->get(LoongArch::SLLI_W), ShiftAmt).addReg(PtrLSB2).addImm(3); ++ ++ if(MaskImm==4095){ ++ BuildMI(BB, DL, TII->get(LoongArch::LU12I_W32), MaskUppest).addImm(0xf); ++ BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper) ++ .addReg(MaskUppest).addImm(MaskImm); ++ } ++ else{ ++ BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper) ++ .addReg(LoongArch::ZERO).addImm(MaskImm); ++ } ++ ++ BuildMI(BB, DL, TII->get(LoongArch::SLL_W), Mask) ++ .addReg(MaskUpper).addReg(ShiftAmt); ++ BuildMI(BB, DL, TII->get(LoongArch::NOR32), Mask2).addReg(LoongArch::ZERO).addReg(Mask); ++ BuildMI(BB, DL, TII->get(LoongArch::SLL_W), Incr2).addReg(Incr).addReg(ShiftAmt); ++ ++ ++ // The purposes of the flags on the scratch registers is explained in ++ // emitAtomicBinary. In summary, we need a scratch register which is going to ++ // be undef, that is unique among registers chosen for the instruction. ++ ++ BuildMI(BB, DL, TII->get(LoongArch::DBAR)).addImm(0); ++ BuildMI(BB, DL, TII->get(AtomicOp)) ++ .addReg(Dest, RegState::Define | RegState::EarlyClobber) ++ .addReg(AlignedAddr) ++ .addReg(Incr2) ++ .addReg(Mask) ++ .addReg(Mask2) ++ .addReg(ShiftAmt) ++ .addReg(Scratch, RegState::EarlyClobber | RegState::Define | ++ RegState::Dead | RegState::Implicit) ++ .addReg(Scratch2, RegState::EarlyClobber | RegState::Define | ++ RegState::Dead | RegState::Implicit) ++ .addReg(Scratch3, RegState::EarlyClobber | RegState::Define | ++ RegState::Dead | RegState::Implicit) ++ .addReg(Scratch4, RegState::EarlyClobber | RegState::Define | ++ RegState::Dead | RegState::Implicit) ++ .addReg(Scratch5, RegState::EarlyClobber | RegState::Define | ++ RegState::Dead | RegState::Implicit); ++ ++ MI.eraseFromParent(); // The instruction is gone now. ++ ++ return exitMBB; ++} ++ ++// Lower atomic compare and swap to a pseudo instruction, taking care to ++// define a scratch register for the pseudo instruction's expansion. The ++// instruction is expanded after the register allocator as to prevent ++// the insertion of stores between the linked load and the store conditional. ++ ++MachineBasicBlock * ++LoongArchTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ assert((MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32 || ++ MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I64) && ++ "Unsupported atomic psseudo for EmitAtomicCmpSwap."); ++ ++ const unsigned Size = MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32 ? 4 : 8; ++ ++ MachineFunction *MF = BB->getParent(); ++ MachineRegisterInfo &MRI = MF->getRegInfo(); ++ const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned AtomicOp = MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32 ++ ? LoongArch::ATOMIC_CMP_SWAP_I32_POSTRA ++ : LoongArch::ATOMIC_CMP_SWAP_I64_POSTRA; ++ unsigned Dest = MI.getOperand(0).getReg(); ++ unsigned Ptr = MI.getOperand(1).getReg(); ++ unsigned OldVal = MI.getOperand(2).getReg(); ++ unsigned NewVal = MI.getOperand(3).getReg(); ++ ++ unsigned Scratch = MRI.createVirtualRegister(RC); ++ MachineBasicBlock::iterator II(MI); ++ ++ // We need to create copies of the various registers and kill them at the ++ // atomic pseudo. If the copies are not made, when the atomic is expanded ++ // after fast register allocation, the spills will end up outside of the ++ // blocks that their values are defined in, causing livein errors. ++ ++ unsigned PtrCopy = MRI.createVirtualRegister(MRI.getRegClass(Ptr)); ++ unsigned OldValCopy = MRI.createVirtualRegister(MRI.getRegClass(OldVal)); ++ unsigned NewValCopy = MRI.createVirtualRegister(MRI.getRegClass(NewVal)); ++ ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), PtrCopy).addReg(Ptr); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), OldValCopy).addReg(OldVal); ++ BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), NewValCopy).addReg(NewVal); ++ ++ // The purposes of the flags on the scratch registers is explained in ++ // emitAtomicBinary. In summary, we need a scratch register which is going to ++ // be undef, that is unique among registers chosen for the instruction. ++ ++ BuildMI(*BB, II, DL, TII->get(LoongArch::DBAR)).addImm(0); ++ BuildMI(*BB, II, DL, TII->get(AtomicOp)) ++ .addReg(Dest, RegState::Define | RegState::EarlyClobber) ++ .addReg(PtrCopy, RegState::Kill) ++ .addReg(OldValCopy, RegState::Kill) ++ .addReg(NewValCopy, RegState::Kill) ++ .addReg(Scratch, RegState::EarlyClobber | RegState::Define | ++ RegState::Dead | RegState::Implicit); ++ ++ BuildMI(*BB, II, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); ++ ++ MI.eraseFromParent(); // The instruction is gone now. ++ ++ return BB; ++} ++ ++MachineBasicBlock *LoongArchTargetLowering::emitAtomicCmpSwapPartword( ++ MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { ++ assert((Size == 1 || Size == 2) && ++ "Unsupported size for EmitAtomicCmpSwapPartial."); ++ ++ MachineFunction *MF = BB->getParent(); ++ MachineRegisterInfo &RegInfo = MF->getRegInfo(); ++ const TargetRegisterClass *RC = getRegClassFor(MVT::i32); ++ const bool ArePtrs64bit = ABI.ArePtrs64bit(); ++ const TargetRegisterClass *RCp = ++ getRegClassFor(ArePtrs64bit ? MVT::i64 : MVT::i32); ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned Dest = MI.getOperand(0).getReg(); ++ unsigned Ptr = MI.getOperand(1).getReg(); ++ unsigned CmpVal = MI.getOperand(2).getReg(); ++ unsigned NewVal = MI.getOperand(3).getReg(); ++ ++ unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp); ++ unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); ++ unsigned Mask = RegInfo.createVirtualRegister(RC); ++ unsigned Mask2 = RegInfo.createVirtualRegister(RC); ++ unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC); ++ unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC); ++ unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); ++ unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); ++ unsigned MaskUpper = RegInfo.createVirtualRegister(RC); ++ unsigned MaskUppest = RegInfo.createVirtualRegister(RC); ++ unsigned Mask3 = RegInfo.createVirtualRegister(RC); ++ unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC); ++ unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC); ++ unsigned AtomicOp = MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I8 ++ ? LoongArch::ATOMIC_CMP_SWAP_I8_POSTRA ++ : LoongArch::ATOMIC_CMP_SWAP_I16_POSTRA; ++ ++ // The scratch registers here with the EarlyClobber | Define | Dead | Implicit ++ // flags are used to coerce the register allocator and the machine verifier to ++ // accept the usage of these registers. ++ // The EarlyClobber flag has the semantic properties that the operand it is ++ // attached to is clobbered before the rest of the inputs are read. Hence it ++ // must be unique among the operands to the instruction. ++ // The Define flag is needed to coerce the machine verifier that an Undef ++ // value isn't a problem. ++ // The Dead flag is needed as the value in scratch isn't used by any other ++ // instruction. Kill isn't used as Dead is more precise. ++ unsigned Scratch = RegInfo.createVirtualRegister(RC); ++ unsigned Scratch2 = RegInfo.createVirtualRegister(RC); ++ ++ // insert new blocks after the current block ++ const BasicBlock *LLVM_BB = BB->getBasicBlock(); ++ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineFunction::iterator It = ++BB->getIterator(); ++ MF->insert(It, exitMBB); ++ ++ // Transfer the remainder of BB and its successor edges to exitMBB. ++ exitMBB->splice(exitMBB->begin(), BB, ++ std::next(MachineBasicBlock::iterator(MI)), BB->end()); ++ exitMBB->transferSuccessorsAndUpdatePHIs(BB); ++ ++ BB->addSuccessor(exitMBB, BranchProbability::getOne()); ++ ++ // thisMBB: ++ // addiu masklsb2,$0,-4 # 0xfffffffc ++ // and alignedaddr,ptr,masklsb2 ++ // andi ptrlsb2,ptr,3 ++ // xori ptrlsb2,ptrlsb2,3 # Only for BE ++ // sll shiftamt,ptrlsb2,3 ++ // ori maskupper,$0,255 # 0xff ++ // sll mask,maskupper,shiftamt ++ // nor mask2,$0,mask ++ // andi maskedcmpval,cmpval,255 ++ // sll shiftedcmpval,maskedcmpval,shiftamt ++ // andi maskednewval,newval,255 ++ // sll shiftednewval,maskednewval,shiftamt ++ ++ int64_t MaskImm = (Size == 1) ? 255 : 4095; ++ BuildMI(BB, DL, TII->get(ArePtrs64bit ? LoongArch::ADDI_D : LoongArch::ADDI_W), MaskLSB2) ++ .addReg(ABI.GetNullPtr()).addImm(-4); ++ BuildMI(BB, DL, TII->get(ArePtrs64bit ? LoongArch::AND : LoongArch::AND32), AlignedAddr) ++ .addReg(Ptr).addReg(MaskLSB2); ++ BuildMI(BB, DL, TII->get(LoongArch::ANDI32), PtrLSB2) ++ .addReg(Ptr, 0, ArePtrs64bit ? LoongArch::sub_32 : 0).addImm(3); ++ BuildMI(BB, DL, TII->get(LoongArch::SLLI_W), ShiftAmt).addReg(PtrLSB2).addImm(3); ++ ++ if(MaskImm==4095){ ++ BuildMI(BB, DL, TII->get(LoongArch::LU12I_W32), MaskUppest).addImm(0xf); ++ BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper) ++ .addReg(MaskUppest).addImm(MaskImm); ++ } ++ else{ ++ BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper) ++ .addReg(LoongArch::ZERO).addImm(MaskImm); ++ } ++ ++ BuildMI(BB, DL, TII->get(LoongArch::SLL_W), Mask) ++ .addReg(MaskUpper).addReg(ShiftAmt); ++ BuildMI(BB, DL, TII->get(LoongArch::NOR32), Mask2).addReg(LoongArch::ZERO).addReg(Mask); ++ if(MaskImm==4095){ ++ BuildMI(BB, DL, TII->get(LoongArch::ORI32), Mask3) ++ .addReg(MaskUppest).addImm(MaskImm); ++ BuildMI(BB, DL, TII->get(LoongArch::AND32), MaskedCmpVal) ++ .addReg(CmpVal).addReg(Mask3); ++ BuildMI(BB, DL, TII->get(LoongArch::SLL_W), ShiftedCmpVal) ++ .addReg(MaskedCmpVal).addReg(ShiftAmt); ++ BuildMI(BB, DL, TII->get(LoongArch::AND32), MaskedNewVal) ++ .addReg(NewVal).addReg(Mask3); ++ } ++ else{ ++ BuildMI(BB, DL, TII->get(LoongArch::ANDI32), MaskedCmpVal) ++ .addReg(CmpVal).addImm(MaskImm); ++ BuildMI(BB, DL, TII->get(LoongArch::SLL_W), ShiftedCmpVal) ++ .addReg(MaskedCmpVal).addReg(ShiftAmt); ++ BuildMI(BB, DL, TII->get(LoongArch::ANDI32), MaskedNewVal) ++ .addReg(NewVal).addImm(MaskImm); ++ } ++ BuildMI(BB, DL, TII->get(LoongArch::SLL_W), ShiftedNewVal) ++ .addReg(MaskedNewVal).addReg(ShiftAmt); ++ ++ // The purposes of the flags on the scratch registers are explained in ++ // emitAtomicBinary. In summary, we need a scratch register which is going to ++ // be undef, that is unique among the register chosen for the instruction. ++ ++ BuildMI(BB, DL, TII->get(LoongArch::DBAR)).addImm(0); ++ BuildMI(BB, DL, TII->get(AtomicOp)) ++ .addReg(Dest, RegState::Define | RegState::EarlyClobber) ++ .addReg(AlignedAddr) ++ .addReg(Mask) ++ .addReg(ShiftedCmpVal) ++ .addReg(Mask2) ++ .addReg(ShiftedNewVal) ++ .addReg(ShiftAmt) ++ .addReg(Scratch, RegState::EarlyClobber | RegState::Define | ++ RegState::Dead | RegState::Implicit) ++ .addReg(Scratch2, RegState::EarlyClobber | RegState::Define | ++ RegState::Dead | RegState::Implicit); ++ ++ MI.eraseFromParent(); // The instruction is gone now. ++ ++ return exitMBB; ++} ++ ++SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { ++ // The first operand is the chain, the second is the condition, the third is ++ // the block to branch to if the condition is true. ++ SDValue Chain = Op.getOperand(0); ++ SDValue Dest = Op.getOperand(2); ++ SDLoc DL(Op); ++ ++ SDValue CondRes = createFPCmp(DAG, Op.getOperand(1)); ++ ++ // Return if flag is not set by a floating point comparison. ++ if (CondRes.getOpcode() != LoongArchISD::FPCmp) ++ return Op; ++ ++ SDValue CCNode = CondRes.getOperand(2); ++ LoongArch::CondCode CC = ++ (LoongArch::CondCode)cast(CCNode)->getZExtValue(); ++ unsigned Opc = invertFPCondCodeUser(CC) ? LoongArch::BRANCH_F : LoongArch::BRANCH_T; ++ SDValue BrCode = DAG.getConstant(Opc, DL, MVT::i32); ++ SDValue FCC0 = DAG.getRegister(LoongArch::FCC0, MVT::i32); ++ return DAG.getNode(LoongArchISD::FPBrcond, DL, Op.getValueType(), Chain, BrCode, ++ FCC0, Dest, CondRes); ++} ++ ++SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op, ++ SelectionDAG &DAG) const { ++ SDValue Cond = createFPCmp(DAG, Op.getOperand(0)); ++ ++ // Return if flag is not set by a floating point comparison. ++ if (Cond.getOpcode() != LoongArchISD::FPCmp) ++ return Op; ++ ++ SDValue N1 = Op.getOperand(1); ++ SDValue N2 = Op.getOperand(2); ++ SDLoc DL(Op); ++ ++ ConstantSDNode *CC = cast(Cond.getOperand(2)); ++ bool invert = invertFPCondCodeUser((LoongArch::CondCode)CC->getSExtValue()); ++ SDValue FCC = DAG.getRegister(LoongArch::FCC0, MVT::i32); ++ ++ if (Op->getSimpleValueType(0).SimpleTy == MVT::f64 || ++ Op->getSimpleValueType(0).SimpleTy == MVT::f32) { ++ if (invert) ++ return DAG.getNode(LoongArchISD::FSEL, DL, N1.getValueType(), N1, FCC, N2, ++ Cond); ++ else ++ return DAG.getNode(LoongArchISD::FSEL, DL, N1.getValueType(), N2, FCC, N1, ++ Cond); ++ ++ } else ++ return Op; ++} ++ ++SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const { ++ SDValue Cond = createFPCmp(DAG, Op); ++ ++ assert(Cond.getOpcode() == LoongArchISD::FPCmp && ++ "Floating point operand expected."); ++ ++ SDLoc DL(Op); ++ SDValue True = DAG.getConstant(1, DL, MVT::i32); ++ SDValue False = DAG.getConstant(0, DL, MVT::i32); ++ ++ return createCMovFP(DAG, Cond, True, False, DL); ++} ++ ++SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, ++ SelectionDAG &DAG) const { ++ GlobalAddressSDNode *N = cast(Op); ++ ++ const GlobalValue *GV = N->getGlobal(); ++ bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); ++ SDValue Addr = getAddr(N, DAG, IsLocal); ++ ++ return Addr; ++} ++ ++SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, ++ SelectionDAG &DAG) const { ++ BlockAddressSDNode *N = cast(Op); ++ ++ return getAddr(N, DAG); ++} ++ ++SDValue LoongArchTargetLowering:: ++lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const ++{ ++ GlobalAddressSDNode *GA = cast(Op); ++ if (DAG.getTarget().useEmulatedTLS()) ++ return LowerToTLSEmulatedModel(GA, DAG); ++ ++ SDLoc DL(GA); ++ const GlobalValue *GV = GA->getGlobal(); ++ EVT PtrVT = getPointerTy(DAG.getDataLayout()); ++ ++ TLSModel::Model model = getTargetMachine().getTLSModel(GV); ++ ++ if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) { ++ // General Dynamic TLS Model && Local Dynamic TLS Model ++ unsigned PtrSize = PtrVT.getSizeInBits(); ++ IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize); ++ // SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, PtrTy, 0, 0); ++ SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0U); ++ SDValue Load = SDValue(DAG.getMachineNode(LoongArch::LoadAddrTLS_GD , ++ DL, PtrVT, Addr), 0); ++ SDValue TlsGetAddr = DAG.getExternalSymbol("__tls_get_addr", PtrVT); ++ ++ ArgListTy Args; ++ ArgListEntry Entry; ++ Entry.Node = Load; ++ Entry.Ty = PtrTy; ++ Args.push_back(Entry); ++ ++ TargetLowering::CallLoweringInfo CLI(DAG); ++ CLI.setDebugLoc(DL) ++ .setChain(DAG.getEntryNode()) ++ .setLibCallee(CallingConv::C, PtrTy, TlsGetAddr, std::move(Args)); ++ std::pair CallResult = LowerCallTo(CLI); ++ ++ SDValue Ret = CallResult.first; ++ ++ return Ret; ++ } ++ ++ SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0U); ++ SDValue Offset; ++ if (model == TLSModel::InitialExec) { ++ // Initial Exec TLS Model ++ Offset = SDValue(DAG.getMachineNode(LoongArch::LoadAddrTLS_IE, DL, ++ PtrVT, Addr), 0); ++ } else { ++ // Local Exec TLS Model ++ assert(model == TLSModel::LocalExec); ++ Offset = SDValue(DAG.getMachineNode(LoongArch::LoadAddrTLS_LE, DL, ++ PtrVT, Addr), 0); ++ } ++ ++ SDValue ThreadPointer = DAG.getRegister((PtrVT == MVT::i32) ++ ? LoongArch::TP ++ : LoongArch::TP_64, PtrVT); ++ return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadPointer, Offset); ++} ++ ++SDValue LoongArchTargetLowering:: ++lowerJumpTable(SDValue Op, SelectionDAG &DAG) const ++{ ++ JumpTableSDNode *N = cast(Op); ++ ++ return getAddr(N, DAG); ++} ++ ++SDValue LoongArchTargetLowering:: ++lowerConstantPool(SDValue Op, SelectionDAG &DAG) const ++{ ++ ConstantPoolSDNode *N = cast(Op); ++ ++ return getAddr(N, DAG); ++} ++ ++SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { ++ MachineFunction &MF = DAG.getMachineFunction(); ++ LoongArchFunctionInfo *FuncInfo = MF.getInfo(); ++ ++ SDLoc DL(Op); ++ SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), ++ getPointerTy(MF.getDataLayout())); ++ ++ // vastart just stores the address of the VarArgsFrameIndex slot into the ++ // memory location argument. ++ const Value *SV = cast(Op.getOperand(2))->getValue(); ++ return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), ++ MachinePointerInfo(SV)); ++} ++ ++SDValue LoongArchTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const { ++ SDNode *Node = Op.getNode(); ++ EVT VT = Node->getValueType(0); ++ SDValue Chain = Node->getOperand(0); ++ SDValue VAListPtr = Node->getOperand(1); ++ const Align Align = ++ llvm::MaybeAlign(Node->getConstantOperandVal(3)).valueOrOne(); ++ const Value *SV = cast(Node->getOperand(2))->getValue(); ++ SDLoc DL(Node); ++ unsigned ArgSlotSizeInBytes = Subtarget.is64Bit() ? 8 : 4; ++ ++ SDValue VAListLoad = DAG.getLoad(getPointerTy(DAG.getDataLayout()), DL, Chain, ++ VAListPtr, MachinePointerInfo(SV)); ++ SDValue VAList = VAListLoad; ++ ++ // Re-align the pointer if necessary. ++ // It should only ever be necessary for 64-bit types on ILP32D/ILP32F/ILP32S ++ // since the minimum argument alignment is the same as the maximum type ++ // alignment for LP64D/LP64S/LP64F. ++ // ++ // FIXME: We currently align too often. The code generator doesn't notice ++ // when the pointer is still aligned from the last va_arg (or pair of ++ // va_args for the i64 on ILP32D/ILP32F/ILP32S case). ++ if (Align > getMinStackArgumentAlignment()) { ++ VAList = DAG.getNode( ++ ISD::ADD, DL, VAList.getValueType(), VAList, ++ DAG.getConstant(Align.value() - 1, DL, VAList.getValueType())); ++ ++ VAList = DAG.getNode( ++ ISD::AND, DL, VAList.getValueType(), VAList, ++ DAG.getConstant(-(int64_t)Align.value(), DL, VAList.getValueType())); ++ } ++ ++ // Increment the pointer, VAList, to the next vaarg. ++ auto &TD = DAG.getDataLayout(); ++ unsigned ArgSizeInBytes = ++ TD.getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())); ++ SDValue Tmp3 = ++ DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList, ++ DAG.getConstant(alignTo(ArgSizeInBytes, ArgSlotSizeInBytes), ++ DL, VAList.getValueType())); ++ // Store the incremented VAList to the legalized pointer ++ Chain = DAG.getStore(VAListLoad.getValue(1), DL, Tmp3, VAListPtr, ++ MachinePointerInfo(SV)); ++ ++ // Load the actual argument out of the pointer VAList ++ return DAG.getLoad(VT, DL, Chain, VAList, MachinePointerInfo()); ++} ++ ++SDValue LoongArchTargetLowering:: ++lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { ++ // check the depth ++ assert((cast(Op.getOperand(0))->getZExtValue() == 0) && ++ "Frame address can only be determined for current frame."); ++ ++ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); ++ MFI.setFrameAddressIsTaken(true); ++ EVT VT = Op.getValueType(); ++ SDLoc DL(Op); ++ SDValue FrameAddr = DAG.getCopyFromReg( ++ DAG.getEntryNode(), DL, ++ Subtarget.is64Bit() ? LoongArch::FP_64 : LoongArch::FP, VT); ++ return FrameAddr; ++} ++ ++SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op, ++ SelectionDAG &DAG) const { ++ if (verifyReturnAddressArgumentIsConstant(Op, DAG)) ++ return SDValue(); ++ ++ // check the depth ++ assert((cast(Op.getOperand(0))->getZExtValue() == 0) && ++ "Return address can be determined only for current frame."); ++ ++ MachineFunction &MF = DAG.getMachineFunction(); ++ MachineFrameInfo &MFI = MF.getFrameInfo(); ++ MVT VT = Op.getSimpleValueType(); ++ unsigned RA = Subtarget.is64Bit() ? LoongArch::RA_64 : LoongArch::RA; ++ MFI.setReturnAddressIsTaken(true); ++ ++ // Return RA, which contains the return address. Mark it an implicit live-in. ++ unsigned Reg = MF.addLiveIn(RA, getRegClassFor(VT)); ++ return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, VT); ++} ++ ++// An EH_RETURN is the result of lowering llvm.eh.return which in turn is ++// generated from __builtin_eh_return (offset, handler) ++// The effect of this is to adjust the stack pointer by "offset" ++// and then branch to "handler". ++SDValue LoongArchTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) ++ const { ++ MachineFunction &MF = DAG.getMachineFunction(); ++ LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); ++ ++ LoongArchFI->setCallsEhReturn(); ++ SDValue Chain = Op.getOperand(0); ++ SDValue Offset = Op.getOperand(1); ++ SDValue Handler = Op.getOperand(2); ++ SDLoc DL(Op); ++ EVT Ty = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; ++ ++ // Store stack offset in A1, store jump target in A0. Glue CopyToReg and ++ // EH_RETURN nodes, so that instructions are emitted back-to-back. ++ unsigned OffsetReg = Subtarget.is64Bit() ? LoongArch::A1_64 : LoongArch::A1; ++ unsigned AddrReg = Subtarget.is64Bit() ? LoongArch::A0_64 : LoongArch::A0; ++ Chain = DAG.getCopyToReg(Chain, DL, OffsetReg, Offset, SDValue()); ++ Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1)); ++ return DAG.getNode(LoongArchISD::EH_RETURN, DL, MVT::Other, Chain, ++ DAG.getRegister(OffsetReg, Ty), ++ DAG.getRegister(AddrReg, getPointerTy(MF.getDataLayout())), ++ Chain.getValue(1)); ++} ++ ++SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, ++ SelectionDAG &DAG) const { ++ // FIXME: Need pseudo-fence for 'singlethread' fences ++ // FIXME: Set SType for weaker fences where supported/appropriate. ++ unsigned SType = 0; ++ SDLoc DL(Op); ++ return DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Op.getOperand(0), ++ DAG.getConstant(SType, DL, MVT::i32)); ++} ++ ++SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, ++ SelectionDAG &DAG) const { ++ SDLoc DL(Op); ++ MVT VT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; ++ ++ SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); ++ SDValue Shamt = Op.getOperand(2); ++ // if shamt < (VT.bits): ++ // lo = (shl lo, shamt) ++ // hi = (or (shl hi, shamt) (srl (srl lo, 1), ~shamt)) ++ // else: ++ // lo = 0 ++ // hi = (shl lo, shamt[4:0]) ++ SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt, ++ DAG.getConstant(-1, DL, MVT::i32)); ++ SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, ++ DAG.getConstant(1, DL, VT)); ++ SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, Not); ++ SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); ++ SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); ++ SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); ++ SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt, ++ DAG.getConstant(VT.getSizeInBits(), DL, MVT::i32)); ++ Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, ++ DAG.getConstant(0, DL, VT), ShiftLeftLo); ++ Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftLeftLo, Or); ++ ++ SDValue Ops[2] = {Lo, Hi}; ++ return DAG.getMergeValues(Ops, DL); ++} ++ ++SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, ++ bool IsSRA) const { ++ SDLoc DL(Op); ++ SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); ++ SDValue Shamt = Op.getOperand(2); ++ MVT VT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; ++ ++ // if shamt < (VT.bits): ++ // lo = (or (shl (shl hi, 1), ~shamt) (srl lo, shamt)) ++ // if isSRA: ++ // hi = (sra hi, shamt) ++ // else: ++ // hi = (srl hi, shamt) ++ // else: ++ // if isSRA: ++ // lo = (sra hi, shamt[4:0]) ++ // hi = (sra hi, 31) ++ // else: ++ // lo = (srl hi, shamt[4:0]) ++ // hi = 0 ++ SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt, ++ DAG.getConstant(-1, DL, MVT::i32)); ++ SDValue ShiftLeft1Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, ++ DAG.getConstant(1, DL, VT)); ++ SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, ShiftLeft1Hi, Not); ++ SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); ++ SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); ++ SDValue ShiftRightHi = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, ++ DL, VT, Hi, Shamt); ++ SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt, ++ DAG.getConstant(VT.getSizeInBits(), DL, MVT::i32)); ++ SDValue Ext = DAG.getNode(ISD::SRA, DL, VT, Hi, ++ DAG.getConstant(VT.getSizeInBits() - 1, DL, VT)); ++ Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftRightHi, Or); ++ Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, ++ IsSRA ? Ext : DAG.getConstant(0, DL, VT), ShiftRightHi); ++ ++ SDValue Ops[2] = {Lo, Hi}; ++ return DAG.getMergeValues(Ops, DL); ++} ++ ++// Lower (store (fp_to_sint $fp) $ptr) to (store (TruncIntFP $fp), $ptr). ++static SDValue lowerFP_TO_SINT_STORE(StoreSDNode *SD, SelectionDAG &DAG, ++ bool SingleFloat) { ++ SDValue Val = SD->getValue(); ++ ++ if (Val.getOpcode() != ISD::FP_TO_SINT || ++ (Val.getValueSizeInBits() > 32 && SingleFloat)) ++ return SDValue(); ++ ++ EVT FPTy = EVT::getFloatingPointVT(Val.getValueSizeInBits()); ++ SDValue Tr = DAG.getNode(LoongArchISD::TruncIntFP, SDLoc(Val), FPTy, ++ Val.getOperand(0)); ++ return DAG.getStore(SD->getChain(), SDLoc(SD), Tr, SD->getBasePtr(), ++ SD->getPointerInfo(), SD->getAlignment(), ++ SD->getMemOperand()->getFlags()); ++} ++ ++SDValue LoongArchTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { ++ StoreSDNode *SD = cast(Op); ++ return lowerFP_TO_SINT_STORE( ++ SD, DAG, (Subtarget.hasBasicF() && !Subtarget.hasBasicD())); ++} ++ ++SDValue LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, ++ SelectionDAG &DAG) const { ++ SDLoc DL(Op); ++ unsigned Intrinsic = cast(Op->getOperand(0))->getZExtValue(); ++ switch (Intrinsic) { ++ default: ++ return SDValue(); ++ case Intrinsic::loongarch_lsx_vaddi_bu: ++ case Intrinsic::loongarch_lsx_vaddi_hu: ++ case Intrinsic::loongarch_lsx_vaddi_wu: ++ case Intrinsic::loongarch_lsx_vaddi_du: ++ return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), ++ lowerLSXSplatImm(Op, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vand_v: ++ case Intrinsic::loongarch_lasx_xvand_v: ++ return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vbitclr_b: ++ case Intrinsic::loongarch_lsx_vbitclr_h: ++ case Intrinsic::loongarch_lsx_vbitclr_w: ++ case Intrinsic::loongarch_lsx_vbitclr_d: ++ return lowerLSXBitClear(Op, DAG); ++ case Intrinsic::loongarch_lsx_vdiv_b: ++ case Intrinsic::loongarch_lsx_vdiv_h: ++ case Intrinsic::loongarch_lsx_vdiv_w: ++ case Intrinsic::loongarch_lsx_vdiv_d: ++ return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vdiv_bu: ++ case Intrinsic::loongarch_lsx_vdiv_hu: ++ case Intrinsic::loongarch_lsx_vdiv_wu: ++ case Intrinsic::loongarch_lsx_vdiv_du: ++ return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vfdiv_s: ++ case Intrinsic::loongarch_lsx_vfdiv_d: ++ return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vffint_s_wu: ++ case Intrinsic::loongarch_lsx_vffint_d_lu: ++ return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), ++ Op->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vffint_s_w: ++ case Intrinsic::loongarch_lsx_vffint_d_l: ++ return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0), ++ Op->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vfmul_s: ++ case Intrinsic::loongarch_lsx_vfmul_d: ++ return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vfrint_s: ++ case Intrinsic::loongarch_lsx_vfrint_d: ++ return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vfsqrt_s: ++ case Intrinsic::loongarch_lsx_vfsqrt_d: ++ return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vftintrz_wu_s: ++ case Intrinsic::loongarch_lsx_vftintrz_lu_d: ++ return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), ++ Op->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vpackev_b: ++ case Intrinsic::loongarch_lsx_vpackev_h: ++ case Intrinsic::loongarch_lsx_vpackev_w: ++ case Intrinsic::loongarch_lsx_vpackev_d: ++ return DAG.getNode(LoongArchISD::VPACKEV, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vilvh_b: ++ case Intrinsic::loongarch_lsx_vilvh_h: ++ case Intrinsic::loongarch_lsx_vilvh_w: ++ case Intrinsic::loongarch_lsx_vilvh_d: ++ return DAG.getNode(LoongArchISD::VILVH, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vpackod_b: ++ case Intrinsic::loongarch_lsx_vpackod_h: ++ case Intrinsic::loongarch_lsx_vpackod_w: ++ case Intrinsic::loongarch_lsx_vpackod_d: ++ return DAG.getNode(LoongArchISD::VPACKOD, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vilvl_b: ++ case Intrinsic::loongarch_lsx_vilvl_h: ++ case Intrinsic::loongarch_lsx_vilvl_w: ++ case Intrinsic::loongarch_lsx_vilvl_d: ++ return DAG.getNode(LoongArchISD::VILVL, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmadd_b: ++ case Intrinsic::loongarch_lsx_vmadd_h: ++ case Intrinsic::loongarch_lsx_vmadd_w: ++ case Intrinsic::loongarch_lsx_vmadd_d: { ++ EVT ResTy = Op->getValueType(0); ++ return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), ++ DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, ++ Op->getOperand(2), Op->getOperand(3))); ++ } ++ case Intrinsic::loongarch_lsx_vmax_b: ++ case Intrinsic::loongarch_lsx_vmax_h: ++ case Intrinsic::loongarch_lsx_vmax_w: ++ case Intrinsic::loongarch_lsx_vmax_d: ++ return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmax_bu: ++ case Intrinsic::loongarch_lsx_vmax_hu: ++ case Intrinsic::loongarch_lsx_vmax_wu: ++ case Intrinsic::loongarch_lsx_vmax_du: ++ return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmin_b: ++ case Intrinsic::loongarch_lsx_vmin_h: ++ case Intrinsic::loongarch_lsx_vmin_w: ++ case Intrinsic::loongarch_lsx_vmin_d: ++ return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmin_bu: ++ case Intrinsic::loongarch_lsx_vmin_hu: ++ case Intrinsic::loongarch_lsx_vmin_wu: ++ case Intrinsic::loongarch_lsx_vmin_du: ++ return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmini_bu: ++ case Intrinsic::loongarch_lsx_vmini_hu: ++ case Intrinsic::loongarch_lsx_vmini_wu: ++ case Intrinsic::loongarch_lsx_vmini_du: ++ return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), Op->getOperand(1), ++ lowerLSXSplatImm(Op, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vmod_b: ++ case Intrinsic::loongarch_lsx_vmod_h: ++ case Intrinsic::loongarch_lsx_vmod_w: ++ case Intrinsic::loongarch_lsx_vmod_d: ++ return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmod_bu: ++ case Intrinsic::loongarch_lsx_vmod_hu: ++ case Intrinsic::loongarch_lsx_vmod_wu: ++ case Intrinsic::loongarch_lsx_vmod_du: ++ return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmul_b: ++ case Intrinsic::loongarch_lsx_vmul_h: ++ case Intrinsic::loongarch_lsx_vmul_w: ++ case Intrinsic::loongarch_lsx_vmul_d: ++ return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vmsub_b: ++ case Intrinsic::loongarch_lsx_vmsub_h: ++ case Intrinsic::loongarch_lsx_vmsub_w: ++ case Intrinsic::loongarch_lsx_vmsub_d: { ++ EVT ResTy = Op->getValueType(0); ++ return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1), ++ DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, ++ Op->getOperand(2), Op->getOperand(3))); ++ } ++ case Intrinsic::loongarch_lsx_vclz_b: ++ case Intrinsic::loongarch_lsx_vclz_h: ++ case Intrinsic::loongarch_lsx_vclz_w: ++ case Intrinsic::loongarch_lsx_vclz_d: ++ return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vnor_v: ++ case Intrinsic::loongarch_lasx_xvnor_v: { ++ SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2)); ++ return DAG.getNOT(DL, Res, Res->getValueType(0)); ++ } ++ case Intrinsic::loongarch_lsx_vor_v: ++ case Intrinsic::loongarch_lasx_xvor_v: ++ return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vpickev_b: ++ case Intrinsic::loongarch_lsx_vpickev_h: ++ case Intrinsic::loongarch_lsx_vpickev_w: ++ case Intrinsic::loongarch_lsx_vpickev_d: ++ return DAG.getNode(LoongArchISD::VPICKEV, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vpickod_b: ++ case Intrinsic::loongarch_lsx_vpickod_h: ++ case Intrinsic::loongarch_lsx_vpickod_w: ++ case Intrinsic::loongarch_lsx_vpickod_d: ++ return DAG.getNode(LoongArchISD::VPICKOD, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vpcnt_b: ++ case Intrinsic::loongarch_lsx_vpcnt_h: ++ case Intrinsic::loongarch_lsx_vpcnt_w: ++ case Intrinsic::loongarch_lsx_vpcnt_d: ++ return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vsat_b: ++ case Intrinsic::loongarch_lsx_vsat_h: ++ case Intrinsic::loongarch_lsx_vsat_w: ++ case Intrinsic::loongarch_lsx_vsat_d: ++ case Intrinsic::loongarch_lsx_vsat_bu: ++ case Intrinsic::loongarch_lsx_vsat_hu: ++ case Intrinsic::loongarch_lsx_vsat_wu: ++ case Intrinsic::loongarch_lsx_vsat_du: { ++ // Report an error for out of range values. ++ int64_t Max; ++ switch (Intrinsic) { ++ case Intrinsic::loongarch_lsx_vsat_b: ++ case Intrinsic::loongarch_lsx_vsat_bu: ++ Max = 7; ++ break; ++ case Intrinsic::loongarch_lsx_vsat_h: ++ case Intrinsic::loongarch_lsx_vsat_hu: ++ Max = 15; ++ break; ++ case Intrinsic::loongarch_lsx_vsat_w: ++ case Intrinsic::loongarch_lsx_vsat_wu: ++ Max = 31; ++ break; ++ case Intrinsic::loongarch_lsx_vsat_d: ++ case Intrinsic::loongarch_lsx_vsat_du: ++ Max = 63; ++ break; ++ default: ++ llvm_unreachable("Unmatched intrinsic"); ++ } ++ int64_t Value = cast(Op->getOperand(2))->getSExtValue(); ++ if (Value < 0 || Value > Max) ++ report_fatal_error("Immediate out of range"); ++ return SDValue(); ++ } ++ case Intrinsic::loongarch_lsx_vshuf4i_b: ++ case Intrinsic::loongarch_lsx_vshuf4i_h: ++ case Intrinsic::loongarch_lsx_vshuf4i_w: ++ // case Intrinsic::loongarch_lsx_vshuf4i_d: ++ { ++ int64_t Value = cast(Op->getOperand(2))->getSExtValue(); ++ if (Value < 0 || Value > 255) ++ report_fatal_error("Immediate out of range"); ++ return DAG.getNode(LoongArchISD::SHF, DL, Op->getValueType(0), ++ Op->getOperand(2), Op->getOperand(1)); ++ } ++ case Intrinsic::loongarch_lsx_vsll_b: ++ case Intrinsic::loongarch_lsx_vsll_h: ++ case Intrinsic::loongarch_lsx_vsll_w: ++ case Intrinsic::loongarch_lsx_vsll_d: ++ return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), ++ truncateVecElts(Op, DAG)); ++ case Intrinsic::loongarch_lsx_vslli_b: ++ case Intrinsic::loongarch_lsx_vslli_h: ++ case Intrinsic::loongarch_lsx_vslli_w: ++ case Intrinsic::loongarch_lsx_vslli_d: ++ return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), ++ lowerLSXSplatImm(Op, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vreplve_b: ++ case Intrinsic::loongarch_lsx_vreplve_h: ++ case Intrinsic::loongarch_lsx_vreplve_w: ++ case Intrinsic::loongarch_lsx_vreplve_d: ++ // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle ++ // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because ++ // EXTRACT_VECTOR_ELT can't extract i64's on LoongArch32. ++ // Instead we lower to LoongArchISD::VSHF and match from there. ++ return DAG.getNode(LoongArchISD::VSHF, DL, Op->getValueType(0), ++ lowerLSXSplatZExt(Op, 2, DAG), Op->getOperand(1), ++ Op->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vreplvei_b: ++ case Intrinsic::loongarch_lsx_vreplvei_h: ++ case Intrinsic::loongarch_lsx_vreplvei_w: ++ case Intrinsic::loongarch_lsx_vreplvei_d: ++ return DAG.getNode(LoongArchISD::VSHF, DL, Op->getValueType(0), ++ lowerLSXSplatImm(Op, 2, DAG), Op->getOperand(1), ++ Op->getOperand(1)); ++ case Intrinsic::loongarch_lsx_vsra_b: ++ case Intrinsic::loongarch_lsx_vsra_h: ++ case Intrinsic::loongarch_lsx_vsra_w: ++ case Intrinsic::loongarch_lsx_vsra_d: ++ return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), ++ truncateVecElts(Op, DAG)); ++ case Intrinsic::loongarch_lsx_vsrari_b: ++ case Intrinsic::loongarch_lsx_vsrari_h: ++ case Intrinsic::loongarch_lsx_vsrari_w: ++ case Intrinsic::loongarch_lsx_vsrari_d: { ++ // Report an error for out of range values. ++ int64_t Max; ++ switch (Intrinsic) { ++ case Intrinsic::loongarch_lsx_vsrari_b: ++ Max = 7; ++ break; ++ case Intrinsic::loongarch_lsx_vsrari_h: ++ Max = 15; ++ break; ++ case Intrinsic::loongarch_lsx_vsrari_w: ++ Max = 31; ++ break; ++ case Intrinsic::loongarch_lsx_vsrari_d: ++ Max = 63; ++ break; ++ default: ++ llvm_unreachable("Unmatched intrinsic"); ++ } ++ int64_t Value = cast(Op->getOperand(2))->getSExtValue(); ++ if (Value < 0 || Value > Max) ++ report_fatal_error("Immediate out of range"); ++ return SDValue(); ++ } ++ case Intrinsic::loongarch_lsx_vsrl_b: ++ case Intrinsic::loongarch_lsx_vsrl_h: ++ case Intrinsic::loongarch_lsx_vsrl_w: ++ case Intrinsic::loongarch_lsx_vsrl_d: ++ return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), ++ truncateVecElts(Op, DAG)); ++ case Intrinsic::loongarch_lsx_vsrli_b: ++ case Intrinsic::loongarch_lsx_vsrli_h: ++ case Intrinsic::loongarch_lsx_vsrli_w: ++ case Intrinsic::loongarch_lsx_vsrli_d: ++ return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), ++ lowerLSXSplatImm(Op, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vsrlri_b: ++ case Intrinsic::loongarch_lsx_vsrlri_h: ++ case Intrinsic::loongarch_lsx_vsrlri_w: ++ case Intrinsic::loongarch_lsx_vsrlri_d: { ++ // Report an error for out of range values. ++ int64_t Max; ++ switch (Intrinsic) { ++ case Intrinsic::loongarch_lsx_vsrlri_b: ++ Max = 7; ++ break; ++ case Intrinsic::loongarch_lsx_vsrlri_h: ++ Max = 15; ++ break; ++ case Intrinsic::loongarch_lsx_vsrlri_w: ++ Max = 31; ++ break; ++ case Intrinsic::loongarch_lsx_vsrlri_d: ++ Max = 63; ++ break; ++ default: ++ llvm_unreachable("Unmatched intrinsic"); ++ } ++ int64_t Value = cast(Op->getOperand(2))->getSExtValue(); ++ if (Value < 0 || Value > Max) ++ report_fatal_error("Immediate out of range"); ++ return SDValue(); ++ } ++ case Intrinsic::loongarch_lsx_vsubi_bu: ++ case Intrinsic::loongarch_lsx_vsubi_hu: ++ case Intrinsic::loongarch_lsx_vsubi_wu: ++ case Intrinsic::loongarch_lsx_vsubi_du: ++ return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), ++ lowerLSXSplatImm(Op, 2, DAG)); ++ case Intrinsic::loongarch_lsx_vshuf_h: ++ case Intrinsic::loongarch_lsx_vshuf_w: ++ case Intrinsic::loongarch_lsx_vshuf_d: ++ case Intrinsic::loongarch_lasx_xvshuf_h: ++ case Intrinsic::loongarch_lasx_xvshuf_w: ++ case Intrinsic::loongarch_lasx_xvshuf_d: ++ return DAG.getNode(LoongArchISD::VSHF, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); ++ case Intrinsic::loongarch_lsx_vxor_v: ++ case Intrinsic::loongarch_lasx_xvxor_v: ++ return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), ++ Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vrotr_b: ++ case Intrinsic::loongarch_lsx_vrotr_h: ++ case Intrinsic::loongarch_lsx_vrotr_w: ++ case Intrinsic::loongarch_lsx_vrotr_d: ++ return DAG.getNode(LoongArchISD::VROR, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2)); ++ case Intrinsic::loongarch_lsx_vrotri_b: ++ case Intrinsic::loongarch_lsx_vrotri_h: ++ case Intrinsic::loongarch_lsx_vrotri_w: ++ case Intrinsic::loongarch_lsx_vrotri_d: ++ return DAG.getNode(LoongArchISD::VRORI, DL, Op->getValueType(0), ++ Op->getOperand(1), Op->getOperand(2)); ++ case Intrinsic::thread_pointer: { ++ EVT PtrVT = getPointerTy(DAG.getDataLayout()); ++ if (PtrVT == MVT::i64) ++ return DAG.getRegister(LoongArch::TP_64, MVT::i64); ++ return DAG.getRegister(LoongArch::TP, MVT::i32); ++ } ++ } ++} ++ ++SDValue ++LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, ++ SelectionDAG &DAG) const { ++ unsigned Intr = cast(Op->getOperand(1))->getZExtValue(); ++ switch (Intr) { ++ default: ++ return SDValue(); ++ case Intrinsic::loongarch_lsx_vld: ++ return lowerLSXLoadIntr(Op, DAG, Intr, Subtarget); ++ case Intrinsic::loongarch_lasx_xvld: ++ return lowerLASXLoadIntr(Op, DAG, Intr, Subtarget); ++ case Intrinsic::loongarch_lasx_xvldrepl_b: ++ case Intrinsic::loongarch_lasx_xvldrepl_h: ++ case Intrinsic::loongarch_lasx_xvldrepl_w: ++ case Intrinsic::loongarch_lasx_xvldrepl_d: ++ return lowerLASXVLDRIntr(Op, DAG, Intr, Subtarget); ++ case Intrinsic::loongarch_lsx_vldrepl_b: ++ case Intrinsic::loongarch_lsx_vldrepl_h: ++ case Intrinsic::loongarch_lsx_vldrepl_w: ++ case Intrinsic::loongarch_lsx_vldrepl_d: ++ return lowerLSXVLDRIntr(Op, DAG, Intr, Subtarget); ++ } ++} ++ ++SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, ++ SelectionDAG &DAG) const { ++ unsigned Intr = cast(Op->getOperand(1))->getZExtValue(); ++ switch (Intr) { ++ default: ++ return SDValue(); ++ case Intrinsic::loongarch_lsx_vst: ++ return lowerLSXStoreIntr(Op, DAG, Intr, Subtarget); ++ case Intrinsic::loongarch_lasx_xvst: ++ return lowerLASXStoreIntr(Op, DAG, Intr, Subtarget); ++ } ++} ++ ++// Lower ISD::EXTRACT_VECTOR_ELT into LoongArchISD::VEXTRACT_SEXT_ELT. ++// ++// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We ++// choose to sign-extend but we could have equally chosen zero-extend. The ++// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT ++// result into this node later (possibly changing it to a zero-extend in the ++// process). ++SDValue ++LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, ++ SelectionDAG &DAG) const { ++ SDLoc DL(Op); ++ EVT ResTy = Op->getValueType(0); ++ SDValue Op0 = Op->getOperand(0); ++ EVT VecTy = Op0->getValueType(0); ++ ++ if (!VecTy.is128BitVector() && !VecTy.is256BitVector()) ++ return SDValue(); ++ ++ if (ResTy.isInteger()) { ++ SDValue Op1 = Op->getOperand(1); ++ EVT EltTy = VecTy.getVectorElementType(); ++ if (VecTy.is128BitVector()) ++ return DAG.getNode(LoongArchISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, ++ DAG.getValueType(EltTy)); ++ ++ ConstantSDNode *cn = dyn_cast(Op1); ++ if (!cn) ++ return SDValue(); ++ ++ if (EltTy == MVT::i32 || EltTy == MVT::i64) ++ return DAG.getNode(LoongArchISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, ++ DAG.getValueType(EltTy)); ++ } ++ ++ return SDValue(); ++} ++ ++SDValue ++LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, ++ SelectionDAG &DAG) const { ++ ++ MVT VT = Op.getSimpleValueType(); ++ MVT EltVT = VT.getVectorElementType(); ++ ++ SDLoc DL(Op); ++ SDValue Op0 = Op.getOperand(0); ++ SDValue Op1 = Op.getOperand(1); ++ SDValue Op2 = Op.getOperand(2); ++ ++ if (!EltVT.isInteger()) ++ return Op; ++ ++ if (!isa(Op2)) { ++ if (EltVT == MVT::i8 || EltVT == MVT::i16) { ++ return Op; // ==> pseudo ++ // use stack ++ return SDValue(); ++ } else { ++ return Op; ++ } ++ } ++ ++ if (VT.is128BitVector()) ++ return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Op0, Op1, Op2); ++ ++ if (VT.is256BitVector()) { ++ ++ if (EltVT == MVT::i32 || EltVT == MVT::i64) ++ return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Op0, Op1, Op2); ++ ++ return Op; ++ } ++ ++ return SDValue(); ++} ++ ++// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the ++// backend. ++// ++// Lowers according to the following rules: ++// - Constant splats are legal as-is as long as the SplatBitSize is a power of ++// 2 less than or equal to 64 and the value fits into a signed 10-bit ++// immediate ++// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize ++// is a power of 2 less than or equal to 64 and the value does not fit into a ++// signed 10-bit immediate ++// - Non-constant splats are legal as-is. ++// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. ++// - All others are illegal and must be expanded. ++SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, ++ SelectionDAG &DAG) const { ++ BuildVectorSDNode *Node = cast(Op); ++ EVT ResTy = Op->getValueType(0); ++ SDLoc DL(Op); ++ APInt SplatValue, SplatUndef; ++ unsigned SplatBitSize; ++ bool HasAnyUndefs; ++ ++ if ((!Subtarget.hasLSX() || !ResTy.is128BitVector()) && ++ (!Subtarget.hasLASX() || !ResTy.is256BitVector())) ++ return SDValue(); ++ ++ if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, ++ 8) && ++ SplatBitSize <= 64) { ++ // We can only cope with 8, 16, 32, or 64-bit elements ++ if ((ResTy.is128BitVector() && SplatBitSize != 8 && SplatBitSize != 16 && ++ SplatBitSize != 32 && SplatBitSize != 64) || ++ (ResTy.is256BitVector() && SplatBitSize != 8 && SplatBitSize != 16 && ++ SplatBitSize != 32 && SplatBitSize != 64)) ++ return SDValue(); ++ ++ // If the value isn't an integer type we will have to bitcast ++ // from an integer type first. Also, if there are any undefs, we must ++ // lower them to defined values first. ++ if (ResTy.isInteger() && !HasAnyUndefs) ++ return Op; ++ ++ EVT ViaVecTy; ++ ++ if ((ResTy.is128BitVector() && ++ !isLSXBySplatBitSize(SplatBitSize, ViaVecTy)) || ++ (ResTy.is256BitVector() && ++ !isLASXBySplatBitSize(SplatBitSize, ViaVecTy))) ++ return SDValue(); ++ ++ // SelectionDAG::getConstant will promote SplatValue appropriately. ++ SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); ++ ++ // Bitcast to the type we originally wanted ++ if (ViaVecTy != ResTy) ++ Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); ++ ++ return Result; ++ } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false)) ++ return Op; ++ else if (!isConstantOrUndefBUILD_VECTOR(Node)) { ++ // Use INSERT_VECTOR_ELT operations rather than expand to stores. ++ // The resulting code is the same length as the expansion, but it doesn't ++ // use memory operations ++ EVT ResTy = Node->getValueType(0); ++ ++ assert(ResTy.isVector()); ++ ++ unsigned NumElts = ResTy.getVectorNumElements(); ++ SDValue Vector = DAG.getUNDEF(ResTy); ++ for (unsigned i = 0; i < NumElts; ++i) { ++ Vector = ++ DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, ++ Node->getOperand(i), DAG.getConstant(i, DL, MVT::i32)); ++ } ++ return Vector; ++ } ++ ++ return SDValue(); ++} ++ ++SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, ++ SelectionDAG &DAG) const { ++ SDLoc DL(Op); ++ EVT ResTy = Op->getValueType(0); ++ Op = LowerSUINT_TO_FP(ISD::ZERO_EXTEND_VECTOR_INREG, Op, DAG); ++ if (!ResTy.isVector()) ++ return Op; ++ return DAG.getNode(ISD::UINT_TO_FP, DL, ResTy, Op); ++} ++ ++SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op, ++ SelectionDAG &DAG) const { ++ SDLoc DL(Op); ++ EVT ResTy = Op->getValueType(0); ++ Op = LowerSUINT_TO_FP(ISD::SIGN_EXTEND_VECTOR_INREG, Op, DAG); ++ if (!ResTy.isVector()) ++ return Op; ++ return DAG.getNode(ISD::SINT_TO_FP, DL, ResTy, Op); ++} ++ ++SDValue LoongArchTargetLowering::lowerFP_TO_UINT(SDValue Op, ++ SelectionDAG &DAG) const { ++ if (!Op->getValueType(0).isVector()) ++ return SDValue(); ++ return LowerFP_TO_SUINT(ISD::FP_TO_UINT, ISD::ZERO_EXTEND_VECTOR_INREG, Op, ++ DAG); ++} ++ ++SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op, ++ SelectionDAG &DAG) const { ++ if (Op->getValueType(0).isVector()) ++ return LowerFP_TO_SUINT(ISD::FP_TO_SINT, ISD::SIGN_EXTEND_VECTOR_INREG, Op, ++ DAG); ++ ++ if (Op.getValueSizeInBits() > 32 && ++ (Subtarget.hasBasicF() && !Subtarget.hasBasicD())) ++ return SDValue(); ++ ++ EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits()); ++ SDValue Trunc = ++ DAG.getNode(LoongArchISD::TruncIntFP, SDLoc(Op), FPTy, Op.getOperand(0)); ++ return DAG.getNode(ISD::BITCAST, SDLoc(Op), Op.getValueType(), Trunc); ++} ++ ++static bool checkUndef(ArrayRef Mask, int Lo, int Hi) { ++ ++ for (int i = Lo, end = Hi; i != end; i++, Hi++) ++ if (!((Mask[i] == -1) || (Mask[i] == Hi))) ++ return false; ++ return true; ++} ++ ++static bool CheckRev(ArrayRef Mask) { ++ ++ int Num = Mask.size() - 1; ++ for (long unsigned int i = 0; i < Mask.size(); i++, Num--) ++ if (Mask[i] != Num) ++ return false; ++ return true; ++} ++ ++static bool checkHalf(ArrayRef Mask, int Lo, int Hi, int base) { ++ ++ for (int i = Lo; i < Hi; i++) ++ if (Mask[i] != (base + i)) ++ return false; ++ return true; ++} ++ ++static SDValue lowerHalfHalf(const SDLoc &DL, MVT VT, SDValue Op1, SDValue Op2, ++ ArrayRef Mask, SelectionDAG &DAG) { ++ ++ int Num = VT.getVectorNumElements(); ++ int HalfNum = Num / 2; ++ ++ if (Op1->isUndef() || Op2->isUndef() || Mask.size() > (long unsigned int)Num) ++ return SDValue(); ++ ++ if (checkHalf(Mask, HalfNum, Num, Num) && checkHalf(Mask, 0, HalfNum, 0)) { ++ return SDValue(DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Op2, Op1, ++ DAG.getTargetConstant(48, DL, MVT::i32)), ++ 0); ++ } ++ ++ return SDValue(); ++} ++ ++static bool checkHalfUndef(ArrayRef Mask, int Lo, int Hi) { ++ ++ for (int i = Lo; i < Hi; i++) ++ if (Mask[i] != -1) ++ return false; ++ return true; ++} ++ ++// Lowering vectors with half undef data, ++// use EXTRACT_SUBVECTOR and INSERT_SUBVECTOR instead of VECTOR_SHUFFLE ++static SDValue lowerHalfUndef(const SDLoc &DL, MVT VT, SDValue Op1, SDValue Op2, ++ ArrayRef Mask, SelectionDAG &DAG) { ++ ++ int Num = VT.getVectorNumElements(); ++ int HalfNum = Num / 2; ++ MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNum); ++ MVT VT1 = Op1.getSimpleValueType(); ++ SDValue Op; ++ ++ bool check1 = Op1->isUndef() && (!Op2->isUndef()); ++ bool check2 = Op2->isUndef() && (!Op1->isUndef()); ++ ++ if ((check1 || check2) && (VT1 == VT)) { ++ if (check1) { ++ Op = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, Op2); ++ } else if (check2) { ++ Op = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, Op1); ++ } ++ ++ if (VT == MVT::v32i8 && CheckRev(Mask)) { ++ SDValue Vector; ++ SDValue Rev[4]; ++ SDValue Ext[4]; ++ for (int i = 0; i < 4; i++) { ++ Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op, ++ DAG.getConstant(i, DL, MVT::i32)); ++ Rev[i] = DAG.getNode(LoongArchISD::REVBD, DL, MVT::i64, Ext[i]); ++ } ++ ++ Vector = ++ DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, DAG.getUNDEF(VT), ++ Rev[3], DAG.getConstant(3, DL, MVT::i32)); ++ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, Vector, ++ Rev[2], DAG.getConstant(2, DL, MVT::i32)); ++ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, Vector, ++ Rev[1], DAG.getConstant(1, DL, MVT::i32)); ++ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, Vector, ++ Rev[0], DAG.getConstant(0, DL, MVT::i32)); ++ ++ Vector = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, Vector); ++ ++ return Vector; ++ } ++ } ++ ++ if (checkHalfUndef(Mask, HalfNum, Num) && checkUndef(Mask, 0, HalfNum)) { ++ SDValue High = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Op1, ++ DAG.getConstant(HalfNum, DL, MVT::i64)); ++ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), High, ++ DAG.getConstant(0, DL, MVT::i64)); ++ } ++ ++ if (checkHalfUndef(Mask, HalfNum, Num) && (VT == MVT::v8i32) && ++ (Mask[0] == 0) && (Mask[1] == 1) && (Mask[2] == (Num + 2)) && ++ (Mask[3] == (Num + 3))) { ++ ++ SDValue Val1 = ++ SDValue(DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Op2, Op1, ++ DAG.getTargetConstant(32, DL, MVT::i32)), ++ 0); ++ ++ SDValue Val2 = ++ SDValue(DAG.getMachineNode(LoongArch::XVPERMI_D, DL, VT, Val1, ++ DAG.getTargetConstant(12, DL, MVT::i32)), ++ 0); ++ ++ SDValue Val3 = SDValue( ++ DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Val2, DAG.getUNDEF(VT), ++ DAG.getTargetConstant(2, DL, MVT::i32)), ++ 0); ++ return Val3; ++ } ++ ++ if (checkHalfUndef(Mask, 0, HalfNum) && checkUndef(Mask, HalfNum, Num)) { ++ SDValue Low = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Op1, ++ DAG.getConstant(0, DL, MVT::i32)); ++ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Low, ++ DAG.getConstant(HalfNum, DL, MVT::i32)); ++ } ++ ++ if (checkHalfUndef(Mask, 0, HalfNum) && (VT == MVT::v8i32) && ++ (Mask[HalfNum] == HalfNum) && (Mask[HalfNum + 1] == (HalfNum + 1)) && ++ (Mask[HalfNum + 2] == (2 * Num - 2)) && ++ (Mask[HalfNum + 3] == (2 * Num - 1))) { ++ ++ SDValue Val1 = ++ SDValue(DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Op2, Op1, ++ DAG.getTargetConstant(49, DL, MVT::i32)), ++ 0); ++ ++ SDValue Val2 = ++ SDValue(DAG.getMachineNode(LoongArch::XVPERMI_D, DL, VT, Val1, ++ DAG.getTargetConstant(12, DL, MVT::i32)), ++ 0); ++ ++ SDValue Val3 = SDValue( ++ DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Val2, DAG.getUNDEF(VT), ++ DAG.getTargetConstant(32, DL, MVT::i32)), ++ 0); ++ return Val3; ++ } ++ ++ if ((VT == MVT::v8i32) || (VT == MVT::v4i64)) { ++ int def = 0; ++ int j = 0; ++ int ext[3]; ++ int ins[3]; ++ bool useOp1[3] = {true, true, true}; ++ bool checkdef = true; ++ ++ for (int i = 0; i < Num; i++) { ++ if (def > 2) { ++ checkdef = false; ++ break; ++ } ++ if (Mask[i] != -1) { ++ def++; ++ ins[j] = i; ++ if (Mask[i] >= Num) { ++ ext[j] = Mask[i] - Num; ++ useOp1[j] = false; ++ } else { ++ ext[j] = Mask[i]; ++ } ++ j++; ++ } ++ } ++ ++ if (checkdef) { ++ SDValue Vector = DAG.getUNDEF(VT); ++ EVT EltTy = VT.getVectorElementType(); ++ SDValue Ext[2]; ++ ++ if (check1 || check2) { ++ for (int i = 0; i < def; i++) { ++ if (check1) { ++ Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op2, ++ DAG.getConstant(ext[i], DL, MVT::i32)); ++ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i], ++ DAG.getConstant(ins[i], DL, MVT::i32)); ++ } else if (check2) { ++ Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op1, ++ DAG.getConstant(ext[i], DL, MVT::i32)); ++ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i], ++ DAG.getConstant(ins[i], DL, MVT::i32)); ++ } ++ } ++ return Vector; ++ } else { ++ for (int i = 0; i < def; i++) { ++ if (!useOp1[i]) { ++ Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op2, ++ DAG.getConstant(ext[i], DL, MVT::i32)); ++ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i], ++ DAG.getConstant(ins[i], DL, MVT::i32)); ++ } else { ++ Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op1, ++ DAG.getConstant(ext[i], DL, MVT::i32)); ++ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i], ++ DAG.getConstant(ins[i], DL, MVT::i32)); ++ } ++ } ++ return Vector; ++ } ++ } ++ } ++ ++ return SDValue(); ++} ++ ++static SDValue lowerHalfUndef_LSX(const SDLoc &DL, EVT ResTy, MVT VT, ++ SDValue Op1, SDValue Op2, ArrayRef Mask, ++ SelectionDAG &DAG) { ++ ++ MVT VT1 = Op1.getSimpleValueType(); ++ ++ bool check1 = Op1->isUndef() && (!Op2->isUndef()); ++ bool check2 = Op2->isUndef() && (!Op1->isUndef()); ++ ++ if ((check1 || check2) && (VT1 == VT)) { ++ SDValue Op; ++ ++ if (VT == MVT::v16i8 && CheckRev(Mask)) { ++ ++ if (check1) { ++ Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op2); ++ } else if (check2) { ++ Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op1); ++ } ++ ++ SDValue Vector; ++ SDValue Rev[2]; ++ SDValue Ext[2]; ++ for (int i = 0; i < 2; i++) { ++ Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op, ++ DAG.getConstant(i, DL, MVT::i32)); ++ Rev[i] = DAG.getNode(LoongArchISD::REVBD, DL, MVT::i64, Ext[i]); ++ } ++ ++ Vector = ++ DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, DAG.getUNDEF(VT), ++ Rev[1], DAG.getConstant(1, DL, MVT::i32)); ++ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, Vector, ++ Rev[0], DAG.getConstant(0, DL, MVT::i32)); ++ ++ Vector = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Vector); ++ ++ return Vector; ++ } ++ } ++ ++ return SDValue(); ++} ++ ++// Use SDNode of LoongArchINSVE instead of ++// a series of EXTRACT_VECTOR_ELT and INSERT_VECTOR_ELT ++static SDValue lowerVECTOR_SHUFFLE_INSVE(const SDLoc &DL, MVT VT, EVT ResTy, ++ SDValue Op1, SDValue Op2, ++ ArrayRef Mask, ++ SelectionDAG &DAG) { ++ ++ int Num = VT.getVectorNumElements(); ++ if (ResTy == MVT::v16i16 || ResTy == MVT::v32i8) ++ return SDValue(); ++ ++ int CheckOne = 0; ++ int CheckOther = 0; ++ int Idx; ++ ++ for (int i = 0; i < Num; i++) { ++ if ((Mask[i] == i) || (Mask[i] == -1)) { ++ CheckOther++; ++ } else if (Mask[i] == Num) { ++ CheckOne++; ++ Idx = i; ++ } else ++ return SDValue(); ++ } ++ ++ if ((CheckOne != 1) || (CheckOther != (Num - 1))) ++ return SDValue(); ++ else { ++ return DAG.getNode(LoongArchISD::INSVE, DL, ResTy, Op1, Op2, ++ DAG.getConstant(Idx, DL, MVT::i32)); ++ } ++ ++ return SDValue(); ++} ++ ++static SDValue lowerVECTOR_SHUFFLE_XVPICKVE(const SDLoc &DL, MVT VT, EVT ResTy, ++ SDValue Op1, SDValue Op2, ++ ArrayRef Mask, ++ SelectionDAG &DAG) { ++ ++ int Num = VT.getVectorNumElements(); ++ if (ResTy == MVT::v16i16 || ResTy == MVT::v32i8 || ++ (!ISD::isBuildVectorAllZeros(Op1.getNode()))) ++ return SDValue(); ++ ++ bool CheckV = true; ++ ++ if ((Mask[0] < Num) || (Mask[0] > (2 * Num - 1))) ++ CheckV = false; ++ ++ for (int i = 1; i < Num; i++) { ++ if (Mask[i] != 0) { ++ CheckV = false; ++ break; ++ } ++ } ++ ++ if (!CheckV) ++ return SDValue(); ++ else { ++ return DAG.getNode(LoongArchISD::XVPICKVE, DL, ResTy, Op1, Op2, ++ DAG.getConstant(Mask[0] - Num, DL, MVT::i32)); ++ } ++ ++ return SDValue(); ++} ++ ++static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, MVT VT, EVT ResTy, ++ SDValue Op1, SDValue Op2, ++ ArrayRef Mask, ++ SelectionDAG &DAG) { ++ ++ if (VT == MVT::v4i64) { ++ int Num = VT.getVectorNumElements(); ++ ++ bool CheckV = true; ++ for (int i = 0; i < Num; i++) { ++ if (Mask[i] != (i * 2)) { ++ CheckV = false; ++ break; ++ } ++ } ++ ++ if (!CheckV) ++ return SDValue(); ++ else { ++ SDValue Res = DAG.getNode(LoongArchISD::XVSHUF4I, DL, ResTy, Op1, Op2, ++ DAG.getConstant(8, DL, MVT::i32)); ++ return DAG.getNode(LoongArchISD::XVPERMI, DL, ResTy, Res, ++ DAG.getConstant(0xD8, DL, MVT::i32)); ++ } ++ } else ++ return SDValue(); ++} ++ ++// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the ++// indices in the shuffle. ++SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, ++ SelectionDAG &DAG) const { ++ ShuffleVectorSDNode *Node = cast(Op); ++ EVT ResTy = Op->getValueType(0); ++ ArrayRef Mask = Node->getMask(); ++ SDValue Op1 = Op.getOperand(0); ++ SDValue Op2 = Op.getOperand(1); ++ MVT VT = Op.getSimpleValueType(); ++ SDLoc DL(Op); ++ ++ if (ResTy.is128BitVector()) { ++ ++ int ResTyNumElts = ResTy.getVectorNumElements(); ++ SmallVector Indices; ++ ++ for (int i = 0; i < ResTyNumElts; ++i) ++ Indices.push_back(Node->getMaskElt(i)); ++ ++ SDValue Result; ++ if (isVECTOR_SHUFFLE_VREPLVEI(Op, ResTy, Indices, DAG)) ++ return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); ++ if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_VILVH(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_VILVL(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerHalfUndef_LSX(DL, ResTy, VT, Op1, Op2, Mask, DAG))) ++ return Result; ++ return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); ++ ++ } else if (ResTy.is256BitVector()) { ++ int ResTyNumElts = ResTy.getVectorNumElements(); ++ SmallVector Indices; ++ ++ for (int i = 0; i < ResTyNumElts; ++i) ++ Indices.push_back(Node->getMaskElt(i)); ++ ++ SDValue Result; ++ if ((Result = lowerHalfHalf(DL, VT, Op1, Op2, Mask, DAG))) ++ return Result; ++ if ((Result = lowerHalfUndef(DL, VT, Op1, Op2, Mask, DAG))) ++ return Result; ++ if (isVECTOR_SHUFFLE_XVREPLVEI(Op, ResTy, Indices, DAG)) ++ return SDValue(); ++ if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_XVILVH(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_XVILVL(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = lowerVECTOR_SHUFFLE_XSHF(Op, ResTy, Indices, DAG))) ++ return Result; ++ if ((Result = ++ lowerVECTOR_SHUFFLE_INSVE(DL, VT, ResTy, Op1, Op2, Mask, DAG))) ++ return Result; ++ if ((Result = ++ lowerVECTOR_SHUFFLE_XVPICKVE(DL, VT, ResTy, Op1, Op2, Mask, DAG))) ++ return Result; ++ if ((Result = ++ lowerVECTOR_SHUFFLE_XVSHUF(DL, VT, ResTy, Op1, Op2, Mask, DAG))) ++ return Result; ++ } ++ ++ return SDValue(); ++} ++ ++SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op, ++ SelectionDAG &DAG) const { ++ ++ // Return a fixed StackObject with offset 0 which points to the old stack ++ // pointer. ++ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); ++ EVT ValTy = Op->getValueType(0); ++ int FI = MFI.CreateFixedObject(Op.getValueSizeInBits() / 8, 0, false); ++ return DAG.getFrameIndex(FI, ValTy); ++} ++ ++// Check whether the tail call optimization conditions are met ++bool LoongArchTargetLowering::isEligibleForTailCallOptimization( ++ const CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, ++ unsigned NextStackOffset, const LoongArchFunctionInfo &FI) const { ++ ++ auto CalleeCC = CLI.CallConv; ++ auto IsVarArg = CLI.IsVarArg; ++ auto &Outs = CLI.Outs; ++ auto &Caller = MF.getFunction(); ++ auto CallerCC = Caller.getCallingConv(); ++ ++ if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true") ++ return false; ++ ++ if (Caller.hasFnAttribute("interrupt")) ++ return false; ++ ++ if (IsVarArg) ++ return false; ++ ++ if (getTargetMachine().getCodeModel() == CodeModel::Large) ++ return false; ++ ++ if (getTargetMachine().getRelocationModel() == Reloc::Static) ++ return false; ++ ++ // Do not tail call optimize if the stack is used to pass parameters. ++ if (CCInfo.getNextStackOffset() != 0) ++ return false; ++ ++ // Do not tail call optimize functions with byval parameters. ++ for (auto &Arg : Outs) ++ if (Arg.Flags.isByVal()) ++ return false; ++ ++ // Do not tail call optimize if either caller or callee uses structret ++ // semantics. ++ auto IsCallerStructRet = Caller.hasStructRetAttr(); ++ auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); ++ if (IsCallerStructRet || IsCalleeStructRet) ++ return false; ++ ++ // The callee has to preserve all registers the caller needs to preserve. ++ const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo(); ++ const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); ++ if (CalleeCC != CallerCC) { ++ const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); ++ if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) ++ return false; ++ } ++ ++ // Return false if either the callee or caller has a byval argument. ++ if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg()) ++ return false; ++ ++ // Return true if the callee's argument area is no larger than the ++ // caller's. ++ return NextStackOffset <= FI.getIncomingArgSize(); ++} ++ ++//===----------------------------------------------------------------------===// ++// Calling Convention Implementation ++//===----------------------------------------------------------------------===// ++ ++//===----------------------------------------------------------------------===// ++// TODO: Implement a generic logic using tblgen that can support this. ++// LoongArch 32-bit ABI rules: ++// --- ++// i32 - Passed in A0, A1, A2, A3 and stack ++// f32 - Only passed in f32 registers if no int reg has been used yet to hold ++// an argument. Otherwise, passed in A1, A2, A3 and stack. ++// f64 - Only passed in two aliased f32 registers if no int reg has been used ++// yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is ++// not used, it must be shadowed. If only A3 is available, shadow it and ++// go to stack. ++// vXiX - Received as scalarized i32s, passed in A0 - A3 and the stack. ++// vXf32 - Passed in either a pair of registers {A0, A1}, {A2, A3} or {A0 - A3} ++// with the remainder spilled to the stack. ++// vXf64 - Passed in either {A0, A1, A2, A3} or {A2, A3} and in both cases ++// spilling the remainder to the stack. ++// ++// For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack. ++//===----------------------------------------------------------------------===// ++ ++static bool CC_LoongArchILP32(unsigned ValNo, MVT ValVT, MVT LocVT, ++ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, ++ CCState &State, ArrayRef F64Regs) { ++ static const MCPhysReg IntRegs[] = { LoongArch::A0, LoongArch::A1, LoongArch::A2, LoongArch::A3 }; ++ ++ const LoongArchCCState * LoongArchState = static_cast(&State); ++ ++ static const MCPhysReg F32Regs[] = { LoongArch::F12, LoongArch::F14 }; ++ ++ static const MCPhysReg FloatVectorIntRegs[] = { LoongArch::A0, LoongArch::A2 }; ++ ++ // Do not process byval args here. ++ if (ArgFlags.isByVal()) ++ return true; ++ ++ ++ // Promote i8 and i16 ++ if (LocVT == MVT::i8 || LocVT == MVT::i16) { ++ LocVT = MVT::i32; ++ if (ArgFlags.isSExt()) ++ LocInfo = CCValAssign::SExt; ++ else if (ArgFlags.isZExt()) ++ LocInfo = CCValAssign::ZExt; ++ else ++ LocInfo = CCValAssign::AExt; ++ } ++ ++ unsigned Reg; ++ ++ // f32 and f64 are allocated in A0, A1, A2, A3 when either of the following ++ // is true: function is vararg, argument is 3rd or higher, there is previous ++ // argument which is not f32 or f64. ++ bool AllocateFloatsInIntReg = State.isVarArg() || ValNo > 1 || ++ State.getFirstUnallocated(F32Regs) != ValNo; ++ Align OrigAlign = ArgFlags.getNonZeroOrigAlign(); ++ bool isI64 = (ValVT == MVT::i32 && OrigAlign == Align(8)); ++ bool isVectorFloat = LoongArchState->WasOriginalArgVectorFloat(ValNo); ++ ++ // The LoongArch vector ABI for floats passes them in a pair of registers ++ if (ValVT == MVT::i32 && isVectorFloat) { ++ // This is the start of an vector that was scalarized into an unknown number ++ // of components. It doesn't matter how many there are. Allocate one of the ++ // notional 8 byte aligned registers which map onto the argument stack, and ++ // shadow the register lost to alignment requirements. ++ if (ArgFlags.isSplit()) { ++ Reg = State.AllocateReg(FloatVectorIntRegs); ++ if (Reg == LoongArch::A2) ++ State.AllocateReg(LoongArch::A1); ++ else if (Reg == 0) ++ State.AllocateReg(LoongArch::A3); ++ } else { ++ // If we're an intermediate component of the split, we can just attempt to ++ // allocate a register directly. ++ Reg = State.AllocateReg(IntRegs); ++ } ++ } else if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) { ++ Reg = State.AllocateReg(IntRegs); ++ // If this is the first part of an i64 arg, ++ // the allocated register must be either A0 or A2. ++ if (isI64 && (Reg == LoongArch::A1 || Reg == LoongArch::A3)) ++ Reg = State.AllocateReg(IntRegs); ++ LocVT = MVT::i32; ++ } else if (ValVT == MVT::f64 && AllocateFloatsInIntReg) { ++ // Allocate int register and shadow next int register. If first ++ // available register is LoongArch::A1 or LoongArch::A3, shadow it too. ++ Reg = State.AllocateReg(IntRegs); ++ if (Reg == LoongArch::A1 || Reg == LoongArch::A3) ++ Reg = State.AllocateReg(IntRegs); ++ State.AllocateReg(IntRegs); ++ LocVT = MVT::i32; ++ } else if (ValVT.isFloatingPoint() && !AllocateFloatsInIntReg) { ++ // we are guaranteed to find an available float register ++ if (ValVT == MVT::f32) { ++ Reg = State.AllocateReg(F32Regs); ++ // Shadow int register ++ State.AllocateReg(IntRegs); ++ } else { ++ Reg = State.AllocateReg(F64Regs); ++ // Shadow int registers ++ unsigned Reg2 = State.AllocateReg(IntRegs); ++ if (Reg2 == LoongArch::A1 || Reg2 == LoongArch::A3) ++ State.AllocateReg(IntRegs); ++ State.AllocateReg(IntRegs); ++ } ++ } else ++ llvm_unreachable("Cannot handle this ValVT."); ++ ++ if (!Reg) { ++ unsigned Offset = State.AllocateStack(ValVT.getStoreSize(), OrigAlign); ++ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); ++ } else ++ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); ++ ++ return false; ++} ++ ++static bool CC_LoongArchILP32_FP32(unsigned ValNo, MVT ValVT, ++ MVT LocVT, CCValAssign::LocInfo LocInfo, ++ ISD::ArgFlagsTy ArgFlags, CCState &State) { ++ static const MCPhysReg F64Regs[] = {LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, \ ++ LoongArch::F3_64, LoongArch::F4_64, LoongArch::F5_64, \ ++ LoongArch::F6_64, LoongArch::F7_64 }; ++ ++ return CC_LoongArchILP32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs); ++} ++ ++static bool CC_LoongArchILP32_FP64(unsigned ValNo, MVT ValVT, ++ MVT LocVT, CCValAssign::LocInfo LocInfo, ++ ISD::ArgFlagsTy ArgFlags, CCState &State) { ++ static const MCPhysReg F64Regs[] = {LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, \ ++ LoongArch::F3_64, LoongArch::F4_64, LoongArch::F5_64, \ ++ LoongArch::F6_64, LoongArch::F7_64 }; ++ ++ return CC_LoongArchILP32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs); ++} ++ ++static bool CC_LoongArch_F128(unsigned ValNo, MVT ValVT, ++ MVT LocVT, CCValAssign::LocInfo LocInfo, ++ ISD::ArgFlagsTy ArgFlags, CCState &State) LLVM_ATTRIBUTE_UNUSED; ++ ++static bool CC_LoongArch_F128(unsigned ValNo, MVT ValVT, ++ MVT LocVT, CCValAssign::LocInfo LocInfo, ++ ISD::ArgFlagsTy ArgFlags, CCState &State) { ++ ++ static const MCPhysReg ArgRegs[8] = { ++ LoongArch::A0_64, LoongArch::A1_64, LoongArch::A2_64, LoongArch::A3_64, ++ LoongArch::A4_64, LoongArch::A5_64, LoongArch::A6_64, LoongArch::A7_64}; ++ ++ unsigned Idx = State.getFirstUnallocated(ArgRegs); ++ // Skip 'odd' register if necessary. ++ if (!ArgFlags.isSplitEnd() && Idx != array_lengthof(ArgRegs) && Idx % 2 == 1) ++ State.AllocateReg(ArgRegs); ++ return true; ++} ++ ++static bool CC_LoongArchILP32(unsigned ValNo, MVT ValVT, MVT LocVT, ++ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, ++ CCState &State) LLVM_ATTRIBUTE_UNUSED; ++ ++#include "LoongArchGenCallingConv.inc" ++ ++ CCAssignFn *LoongArchTargetLowering::CCAssignFnForCall() const{ ++ return CC_LoongArch; ++ } ++ ++ CCAssignFn *LoongArchTargetLowering::CCAssignFnForReturn() const{ ++ return RetCC_LoongArch; ++ } ++ ++//===----------------------------------------------------------------------===// ++// Call Calling Convention Implementation ++//===----------------------------------------------------------------------===// ++SDValue LoongArchTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, ++ SDValue Chain, SDValue Arg, ++ const SDLoc &DL, bool IsTailCall, ++ SelectionDAG &DAG) const { ++ if (!IsTailCall) { ++ SDValue PtrOff = ++ DAG.getNode(ISD::ADD, DL, getPointerTy(DAG.getDataLayout()), StackPtr, ++ DAG.getIntPtrConstant(Offset, DL)); ++ return DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()); ++ } ++ ++ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); ++ int FI = MFI.CreateFixedObject(Arg.getValueSizeInBits() / 8, Offset, false); ++ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); ++ return DAG.getStore(Chain, DL, Arg, FIN, MachinePointerInfo(), ++ /* Alignment = */ 0, MachineMemOperand::MOVolatile); ++} ++ ++void LoongArchTargetLowering::getOpndList( ++ SmallVectorImpl &Ops, ++ std::deque> &RegsToPass, bool IsPICCall, ++ bool GlobalOrExternal, bool IsCallReloc, CallLoweringInfo &CLI, ++ SDValue Callee, SDValue Chain, bool IsTailCall) const { ++ // Build a sequence of copy-to-reg nodes chained together with token ++ // chain and flag operands which copy the outgoing args into registers. ++ // The InFlag in necessary since all emitted instructions must be ++ // stuck together. ++ SDValue InFlag; ++ ++ Ops.push_back(Callee); ++ ++ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { ++ Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, RegsToPass[i].first, ++ RegsToPass[i].second, InFlag); ++ InFlag = Chain.getValue(1); ++ } ++ ++ // Add argument registers to the end of the list so that they are ++ // known live into the call. ++ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) ++ Ops.push_back(CLI.DAG.getRegister(RegsToPass[i].first, ++ RegsToPass[i].second.getValueType())); ++ ++ if (!IsTailCall) { ++ // Add a register mask operand representing the call-preserved registers. ++ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); ++ const uint32_t *Mask = ++ TRI->getCallPreservedMask(CLI.DAG.getMachineFunction(), CLI.CallConv); ++ assert(Mask && "Missing call preserved mask for calling convention"); ++ Ops.push_back(CLI.DAG.getRegisterMask(Mask)); ++ } ++ ++ if (InFlag.getNode()) ++ Ops.push_back(InFlag); ++} ++ ++void LoongArchTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, ++ SDNode *Node) const { ++ switch (MI.getOpcode()) { ++ default: ++ return; ++ } ++} ++ ++/// LowerCall - functions arguments are copied from virtual regs to ++/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. ++SDValue ++LoongArchTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ++ SmallVectorImpl &InVals) const { ++ SelectionDAG &DAG = CLI.DAG; ++ SDLoc DL = CLI.DL; ++ SmallVectorImpl &Outs = CLI.Outs; ++ SmallVectorImpl &OutVals = CLI.OutVals; ++ SmallVectorImpl &Ins = CLI.Ins; ++ SDValue Chain = CLI.Chain; ++ SDValue Callee = CLI.Callee; ++ bool &IsTailCall = CLI.IsTailCall; ++ CallingConv::ID CallConv = CLI.CallConv; ++ bool IsVarArg = CLI.IsVarArg; ++ ++ MachineFunction &MF = DAG.getMachineFunction(); ++ MachineFrameInfo &MFI = MF.getFrameInfo(); ++ const TargetFrameLowering *TFL = Subtarget.getFrameLowering(); ++ bool IsPIC = isPositionIndependent(); ++ ++ // Analyze operands of the call, assigning locations to each operand. ++ SmallVector ArgLocs; ++ LoongArchCCState CCInfo( ++ CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext(), ++ LoongArchCCState::getSpecialCallingConvForCallee(Callee.getNode(), Subtarget)); ++ ++ const ExternalSymbolSDNode *ES = ++ dyn_cast_or_null(Callee.getNode()); ++ ++ // There is one case where CALLSEQ_START..CALLSEQ_END can be nested, which ++ // is during the lowering of a call with a byval argument which produces ++ // a call to memcpy. For the ILP32D/ILP32F/ILP32S case, this causes the caller ++ // to allocate stack space for the reserved argument area for the callee, then ++ // recursively again for the memcpy call. In the NEWABI case, this doesn't ++ // occur as those ABIs mandate that the callee allocates the reserved argument ++ // area. We do still produce nested CALLSEQ_START..CALLSEQ_END with zero space ++ // though. ++ // ++ // If the callee has a byval argument and memcpy is used, we are mandated ++ // to already have produced a reserved argument area for the callee for ++ // ILP32D/ILP32F/ILP32S. Therefore, the reserved argument area can be reused ++ // for both calls. ++ // ++ // Other cases of calling memcpy cannot have a chain with a CALLSEQ_START ++ // present, as we have yet to hook that node onto the chain. ++ // ++ // Hence, the CALLSEQ_START and CALLSEQ_END nodes can be eliminated in this ++ // case. GCC does a similar trick, in that wherever possible, it calculates ++ // the maximum out going argument area (including the reserved area), and ++ // preallocates the stack space on entrance to the caller. ++ // ++ // FIXME: We should do the same for efficiency and space. ++ ++ bool MemcpyInByVal = ES && ++ StringRef(ES->getSymbol()) == StringRef("memcpy") && ++ Chain.getOpcode() == ISD::CALLSEQ_START; ++ ++ CCInfo.AnalyzeCallOperands(Outs, CC_LoongArch, CLI.getArgs(), ++ ES ? ES->getSymbol() : nullptr); ++ ++ // Get a count of how many bytes are to be pushed on the stack. ++ unsigned NextStackOffset = CCInfo.getNextStackOffset(); ++ ++ // Check if it's really possible to do a tail call. Restrict it to functions ++ // that are part of this compilation unit. ++ if (IsTailCall) { ++ IsTailCall = isEligibleForTailCallOptimization( ++ CCInfo, CLI, MF, NextStackOffset, *MF.getInfo()); ++ if (GlobalAddressSDNode *G = dyn_cast(Callee)) { ++ if (G->getGlobal()->hasExternalWeakLinkage()) ++ IsTailCall = false; ++ } ++ } ++ if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) ++ report_fatal_error("failed to perform tail call elimination on a call " ++ "site marked musttail"); ++ ++ if (IsTailCall) ++ ++NumTailCalls; ++ ++ // Chain is the output chain of the last Load/Store or CopyToReg node. ++ // ByValChain is the output chain of the last Memcpy node created for copying ++ // byval arguments to the stack. ++ unsigned StackAlignment = TFL->getStackAlignment(); ++ NextStackOffset = alignTo(NextStackOffset, StackAlignment); ++ SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, DL, true); ++ ++ if (!(IsTailCall || MemcpyInByVal)) ++ Chain = DAG.getCALLSEQ_START(Chain, NextStackOffset, 0, DL); ++ ++ SDValue StackPtr = DAG.getCopyFromReg( ++ Chain, DL, Subtarget.is64Bit() ? LoongArch::SP_64 : LoongArch::SP, ++ getPointerTy(DAG.getDataLayout())); ++ ++ std::deque> RegsToPass; ++ SmallVector MemOpChains; ++ ++ CCInfo.rewindByValRegsInfo(); ++ ++ // Walk the register/memloc assignments, inserting copies/loads. ++ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { ++ SDValue Arg = OutVals[i]; ++ CCValAssign &VA = ArgLocs[i]; ++ MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT(); ++ ISD::ArgFlagsTy Flags = Outs[i].Flags; ++ bool UseUpperBits = false; ++ ++ // ByVal Arg. ++ if (Flags.isByVal()) { ++ unsigned FirstByValReg, LastByValReg; ++ unsigned ByValIdx = CCInfo.getInRegsParamsProcessed(); ++ CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg); ++ ++ assert(Flags.getByValSize() && ++ "ByVal args of size 0 should have been ignored by front-end."); ++ assert(ByValIdx < CCInfo.getInRegsParamsCount()); ++ assert(!IsTailCall && ++ "Do not tail-call optimize if there is a byval argument."); ++ passByValArg(Chain, DL, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg, ++ FirstByValReg, LastByValReg, Flags, ++ VA); ++ CCInfo.nextInRegsParam(); ++ continue; ++ } ++ ++ // Promote the value if needed. ++ switch (VA.getLocInfo()) { ++ default: ++ llvm_unreachable("Unknown loc info!"); ++ case CCValAssign::Full: ++ if (VA.isRegLoc()) { ++ if ((ValVT == MVT::f32 && LocVT == MVT::i32) || ++ (ValVT == MVT::f64 && LocVT == MVT::i64) || ++ (ValVT == MVT::i64 && LocVT == MVT::f64)) ++ Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg); ++ } ++ break; ++ case CCValAssign::BCvt: ++ Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg); ++ break; ++ case CCValAssign::SExtUpper: ++ UseUpperBits = true; ++ LLVM_FALLTHROUGH; ++ case CCValAssign::SExt: ++ Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, LocVT, Arg); ++ break; ++ case CCValAssign::ZExtUpper: ++ UseUpperBits = true; ++ LLVM_FALLTHROUGH; ++ case CCValAssign::ZExt: ++ Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, LocVT, Arg); ++ break; ++ case CCValAssign::AExtUpper: ++ UseUpperBits = true; ++ LLVM_FALLTHROUGH; ++ case CCValAssign::AExt: ++ Arg = DAG.getNode(ISD::ANY_EXTEND, DL, LocVT, Arg); ++ break; ++ } ++ ++ if (UseUpperBits) { ++ unsigned ValSizeInBits = Outs[i].ArgVT.getSizeInBits(); ++ unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); ++ Arg = DAG.getNode( ++ ISD::SHL, DL, VA.getLocVT(), Arg, ++ DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); ++ } ++ ++ // Arguments that can be passed on register must be kept at ++ // RegsToPass vector ++ if (VA.isRegLoc()) { ++ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); ++ continue; ++ } ++ ++ // Register can't get to this point... ++ assert(VA.isMemLoc()); ++ ++ // emit ISD::STORE whichs stores the ++ // parameter value to a stack Location ++ MemOpChains.push_back(passArgOnStack(StackPtr, VA.getLocMemOffset(), ++ Chain, Arg, DL, IsTailCall, DAG)); ++ } ++ ++ // Transform all store nodes into one single node because all store ++ // nodes are independent of each other. ++ if (!MemOpChains.empty()) ++ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); ++ ++ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every ++ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol ++ // node so that legalize doesn't hack it. ++ ++ bool GlobalOrExternal = false, IsCallReloc = false; ++ ++ if (GlobalAddressSDNode *G = dyn_cast(Callee)) { ++ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, ++ getPointerTy(DAG.getDataLayout()), 0, ++ LoongArchII::MO_NO_FLAG); ++ GlobalOrExternal = true; ++ } ++ else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { ++ const char *Sym = S->getSymbol(); ++ Callee = DAG.getTargetExternalSymbol( ++ Sym, getPointerTy(DAG.getDataLayout()), LoongArchII::MO_NO_FLAG); ++ ++ GlobalOrExternal = true; ++ } ++ ++ SmallVector Ops(1, Chain); ++ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); ++ ++ getOpndList(Ops, RegsToPass, IsPIC, GlobalOrExternal, IsCallReloc, CLI, ++ Callee, Chain, IsTailCall); ++ ++ if (IsTailCall) { ++ MF.getFrameInfo().setHasTailCall(); ++ return DAG.getNode(LoongArchISD::TailCall, DL, MVT::Other, Ops); ++ } ++ ++ Chain = DAG.getNode(LoongArchISD::JmpLink, DL, NodeTys, Ops); ++ SDValue InFlag = Chain.getValue(1); ++ ++ // Create the CALLSEQ_END node in the case of where it is not a call to ++ // memcpy. ++ if (!(MemcpyInByVal)) { ++ Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal, ++ DAG.getIntPtrConstant(0, DL, true), InFlag, DL); ++ InFlag = Chain.getValue(1); ++ } ++ ++ // Handle result values, copying them out of physregs into vregs that we ++ // return. ++ return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG, ++ InVals, CLI); ++} ++ ++/// LowerCallResult - Lower the result values of a call into the ++/// appropriate copies out of appropriate physical registers. ++SDValue LoongArchTargetLowering::LowerCallResult( ++ SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg, ++ const SmallVectorImpl &Ins, const SDLoc &DL, ++ SelectionDAG &DAG, SmallVectorImpl &InVals, ++ TargetLowering::CallLoweringInfo &CLI) const { ++ // Assign locations to each value returned by this call. ++ SmallVector RVLocs; ++ LoongArchCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, ++ *DAG.getContext()); ++ ++ const ExternalSymbolSDNode *ES = ++ dyn_cast_or_null(CLI.Callee.getNode()); ++ CCInfo.AnalyzeCallResult(Ins, RetCC_LoongArch, CLI.RetTy, ++ ES ? ES->getSymbol() : nullptr); ++ ++ // Copy all of the result registers out of their specified physreg. ++ for (unsigned i = 0; i != RVLocs.size(); ++i) { ++ CCValAssign &VA = RVLocs[i]; ++ assert(VA.isRegLoc() && "Can only return in registers!"); ++ ++ SDValue Val = DAG.getCopyFromReg(Chain, DL, RVLocs[i].getLocReg(), ++ RVLocs[i].getLocVT(), InFlag); ++ Chain = Val.getValue(1); ++ InFlag = Val.getValue(2); ++ ++ if (VA.isUpperBitsInLoc()) { ++ unsigned ValSizeInBits = Ins[i].ArgVT.getSizeInBits(); ++ unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); ++ unsigned Shift = ++ VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA; ++ Val = DAG.getNode( ++ Shift, DL, VA.getLocVT(), Val, ++ DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); ++ } ++ ++ switch (VA.getLocInfo()) { ++ default: ++ llvm_unreachable("Unknown loc info!"); ++ case CCValAssign::Full: ++ break; ++ case CCValAssign::BCvt: ++ Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); ++ break; ++ case CCValAssign::AExt: ++ case CCValAssign::AExtUpper: ++ Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val); ++ break; ++ case CCValAssign::ZExt: ++ case CCValAssign::ZExtUpper: ++ Val = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Val, ++ DAG.getValueType(VA.getValVT())); ++ Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val); ++ break; ++ case CCValAssign::SExt: ++ case CCValAssign::SExtUpper: ++ Val = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Val, ++ DAG.getValueType(VA.getValVT())); ++ Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val); ++ break; ++ } ++ ++ InVals.push_back(Val); ++ } ++ ++ return Chain; ++} ++ ++static SDValue UnpackFromArgumentSlot(SDValue Val, const CCValAssign &VA, ++ EVT ArgVT, const SDLoc &DL, ++ SelectionDAG &DAG) { ++ MVT LocVT = VA.getLocVT(); ++ EVT ValVT = VA.getValVT(); ++ ++ // Shift into the upper bits if necessary. ++ switch (VA.getLocInfo()) { ++ default: ++ break; ++ case CCValAssign::AExtUpper: ++ case CCValAssign::SExtUpper: ++ case CCValAssign::ZExtUpper: { ++ unsigned ValSizeInBits = ArgVT.getSizeInBits(); ++ unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); ++ unsigned Opcode = ++ VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA; ++ Val = DAG.getNode( ++ Opcode, DL, VA.getLocVT(), Val, ++ DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); ++ break; ++ } ++ } ++ ++ // If this is an value smaller than the argument slot size (32-bit for ++ // ILP32D/ILP32F/ILP32S, 64-bit for LP64D/LP64S/LP64F), it has been promoted ++ // in some way to the argument slot size. Extract the value and insert any ++ // appropriate assertions regarding sign/zero extension. ++ switch (VA.getLocInfo()) { ++ default: ++ llvm_unreachable("Unknown loc info!"); ++ case CCValAssign::Full: ++ break; ++ case CCValAssign::AExtUpper: ++ case CCValAssign::AExt: ++ Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); ++ break; ++ case CCValAssign::SExtUpper: ++ case CCValAssign::SExt: { ++ if ((ArgVT == MVT::i1) || (ArgVT == MVT::i8) || (ArgVT == MVT::i16)) { ++ SDValue SubReg = DAG.getTargetConstant(LoongArch::sub_32, DL, MVT::i32); ++ Val = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, ValVT, ++ Val, SubReg), ++ 0); ++ } else { ++ Val = ++ DAG.getNode(ISD::AssertSext, DL, LocVT, Val, DAG.getValueType(ValVT)); ++ Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); ++ } ++ break; ++ } ++ case CCValAssign::ZExtUpper: ++ case CCValAssign::ZExt: ++ Val = DAG.getNode(ISD::AssertZext, DL, LocVT, Val, DAG.getValueType(ValVT)); ++ Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); ++ break; ++ case CCValAssign::BCvt: ++ Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val); ++ break; ++ } ++ ++ return Val; ++} ++ ++//===----------------------------------------------------------------------===// ++// Formal Arguments Calling Convention Implementation ++//===----------------------------------------------------------------------===// ++/// LowerFormalArguments - transform physical registers into virtual registers ++/// and generate load operations for arguments places on the stack. ++SDValue LoongArchTargetLowering::LowerFormalArguments( ++ SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, ++ const SmallVectorImpl &Ins, const SDLoc &DL, ++ SelectionDAG &DAG, SmallVectorImpl &InVals) const { ++ MachineFunction &MF = DAG.getMachineFunction(); ++ MachineFrameInfo &MFI = MF.getFrameInfo(); ++ LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); ++ ++ LoongArchFI->setVarArgsFrameIndex(0); ++ ++ // Used with vargs to acumulate store chains. ++ std::vector OutChains; ++ ++ // Assign locations to all of the incoming arguments. ++ SmallVector ArgLocs; ++ LoongArchCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, ++ *DAG.getContext()); ++ const Function &Func = DAG.getMachineFunction().getFunction(); ++ Function::const_arg_iterator FuncArg = Func.arg_begin(); ++ ++ CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_FixedArg); ++ LoongArchFI->setFormalArgInfo(CCInfo.getNextStackOffset(), ++ CCInfo.getInRegsParamsCount() > 0); ++ ++ unsigned CurArgIdx = 0; ++ CCInfo.rewindByValRegsInfo(); ++ ++ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { ++ CCValAssign &VA = ArgLocs[i]; ++ if (Ins[i].isOrigArg()) { ++ std::advance(FuncArg, Ins[i].getOrigArgIndex() - CurArgIdx); ++ CurArgIdx = Ins[i].getOrigArgIndex(); ++ } ++ EVT ValVT = VA.getValVT(); ++ ISD::ArgFlagsTy Flags = Ins[i].Flags; ++ bool IsRegLoc = VA.isRegLoc(); ++ ++ if (Flags.isByVal()) { ++ assert(Ins[i].isOrigArg() && "Byval arguments cannot be implicit"); ++ unsigned FirstByValReg, LastByValReg; ++ unsigned ByValIdx = CCInfo.getInRegsParamsProcessed(); ++ CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg); ++ ++ assert(Flags.getByValSize() && ++ "ByVal args of size 0 should have been ignored by front-end."); ++ assert(ByValIdx < CCInfo.getInRegsParamsCount()); ++ copyByValRegs(Chain, DL, OutChains, DAG, Flags, InVals, &*FuncArg, ++ FirstByValReg, LastByValReg, VA, CCInfo); ++ CCInfo.nextInRegsParam(); ++ continue; ++ } ++ ++ // Arguments stored on registers ++ if (IsRegLoc) { ++ MVT RegVT = VA.getLocVT(); ++ unsigned ArgReg = VA.getLocReg(); ++ const TargetRegisterClass *RC = getRegClassFor(RegVT); ++ ++ // Transform the arguments stored on ++ // physical registers into virtual ones ++ unsigned Reg = addLiveIn(DAG.getMachineFunction(), ArgReg, RC); ++ SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT); ++ ++ ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG); ++ ++ // Handle floating point arguments passed in integer registers and ++ // long double arguments passed in floating point registers. ++ if ((RegVT == MVT::i32 && ValVT == MVT::f32) || ++ (RegVT == MVT::i64 && ValVT == MVT::f64) || ++ (RegVT == MVT::f64 && ValVT == MVT::i64)) ++ ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue); ++ else if ((ABI.IsILP32D() || ABI.IsILP32F() || ABI.IsILP32S()) && ++ RegVT == MVT::i32 && ValVT == MVT::f64) { ++ // TODO ++ llvm_unreachable("Unimplemented ABI"); ++ } ++ ++ InVals.push_back(ArgValue); ++ } else { // VA.isRegLoc() ++ MVT LocVT = VA.getLocVT(); ++ ++ if (ABI.IsILP32D() || ABI.IsILP32F() || ABI.IsILP32S()) { ++ // TODO ++ llvm_unreachable("Unimplemented ABI"); ++ } ++ ++ // sanity check ++ assert(VA.isMemLoc()); ++ ++ // The stack pointer offset is relative to the caller stack frame. ++ int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ++ VA.getLocMemOffset(), true); ++ ++ // Create load nodes to retrieve arguments from the stack ++ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); ++ SDValue ArgValue = DAG.getLoad( ++ LocVT, DL, Chain, FIN, ++ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); ++ OutChains.push_back(ArgValue.getValue(1)); ++ ++ ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG); ++ ++ InVals.push_back(ArgValue); ++ } ++ } ++ ++ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { ++ // The loongarch ABIs for returning structs by value requires that we copy ++ // the sret argument into $v0 for the return. Save the argument into ++ // a virtual register so that we can access it from the return points. ++ if (Ins[i].Flags.isSRet()) { ++ unsigned Reg = LoongArchFI->getSRetReturnReg(); ++ if (!Reg) { ++ Reg = MF.getRegInfo().createVirtualRegister( ++ getRegClassFor(Subtarget.is64Bit() ? MVT::i64 : MVT::i32)); ++ LoongArchFI->setSRetReturnReg(Reg); ++ } ++ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[i]); ++ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain); ++ break; ++ } ++ } ++ ++ if (IsVarArg) ++ writeVarArgRegs(OutChains, Chain, DL, DAG, CCInfo); ++ ++ // All stores are grouped in one node to allow the matching between ++ // the size of Ins and InVals. This only happens when on varg functions ++ if (!OutChains.empty()) { ++ OutChains.push_back(Chain); ++ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); ++ } ++ ++ return Chain; ++} ++ ++//===----------------------------------------------------------------------===// ++// Return Value Calling Convention Implementation ++//===----------------------------------------------------------------------===// ++ ++bool ++LoongArchTargetLowering::CanLowerReturn(CallingConv::ID CallConv, ++ MachineFunction &MF, bool IsVarArg, ++ const SmallVectorImpl &Outs, ++ LLVMContext &Context) const { ++ SmallVector RVLocs; ++ LoongArchCCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); ++ return CCInfo.CheckReturn(Outs, RetCC_LoongArch); ++} ++ ++bool ++LoongArchTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { ++ if (Subtarget.is64Bit() && Type == MVT::i32) ++ return true; ++ ++ return IsSigned; ++} ++ ++SDValue ++LoongArchTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, ++ bool IsVarArg, ++ const SmallVectorImpl &Outs, ++ const SmallVectorImpl &OutVals, ++ const SDLoc &DL, SelectionDAG &DAG) const { ++ // CCValAssign - represent the assignment of ++ // the return value to a location ++ SmallVector RVLocs; ++ MachineFunction &MF = DAG.getMachineFunction(); ++ ++ // CCState - Info about the registers and stack slot. ++ LoongArchCCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); ++ ++ // Analyze return values. ++ CCInfo.AnalyzeReturn(Outs, RetCC_LoongArch); ++ ++ SDValue Flag; ++ SmallVector RetOps(1, Chain); ++ ++ // Copy the result values into the output registers. ++ for (unsigned i = 0; i != RVLocs.size(); ++i) { ++ SDValue Val = OutVals[i]; ++ CCValAssign &VA = RVLocs[i]; ++ assert(VA.isRegLoc() && "Can only return in registers!"); ++ bool UseUpperBits = false; ++ ++ switch (VA.getLocInfo()) { ++ default: ++ llvm_unreachable("Unknown loc info!"); ++ case CCValAssign::Full: ++ break; ++ case CCValAssign::BCvt: ++ Val = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Val); ++ break; ++ case CCValAssign::AExtUpper: ++ UseUpperBits = true; ++ LLVM_FALLTHROUGH; ++ case CCValAssign::AExt: ++ Val = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Val); ++ break; ++ case CCValAssign::ZExtUpper: ++ UseUpperBits = true; ++ LLVM_FALLTHROUGH; ++ case CCValAssign::ZExt: ++ Val = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Val); ++ break; ++ case CCValAssign::SExtUpper: ++ UseUpperBits = true; ++ LLVM_FALLTHROUGH; ++ case CCValAssign::SExt: ++ Val = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Val); ++ break; ++ } ++ ++ if (UseUpperBits) { ++ unsigned ValSizeInBits = Outs[i].ArgVT.getSizeInBits(); ++ unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); ++ Val = DAG.getNode( ++ ISD::SHL, DL, VA.getLocVT(), Val, ++ DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); ++ } ++ ++ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag); ++ ++ // Guarantee that all emitted copies are stuck together with flags. ++ Flag = Chain.getValue(1); ++ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); ++ } ++ ++ // The loongarch ABIs for returning structs by value requires that we copy ++ // the sret argument into $v0 for the return. We saved the argument into ++ // a virtual register in the entry block, so now we copy the value out ++ // and into $v0. ++ if (MF.getFunction().hasStructRetAttr()) { ++ LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); ++ unsigned Reg = LoongArchFI->getSRetReturnReg(); ++ ++ if (!Reg) ++ llvm_unreachable("sret virtual register not created in the entry block"); ++ SDValue Val = ++ DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(DAG.getDataLayout())); ++ unsigned A0 = Subtarget.is64Bit() ? LoongArch::A0_64 : LoongArch::A0; ++ ++ Chain = DAG.getCopyToReg(Chain, DL, A0, Val, Flag); ++ Flag = Chain.getValue(1); ++ RetOps.push_back(DAG.getRegister(A0, getPointerTy(DAG.getDataLayout()))); ++ } ++ ++ RetOps[0] = Chain; // Update chain. ++ ++ // Add the flag if we have it. ++ if (Flag.getNode()) ++ RetOps.push_back(Flag); ++ ++ // Standard return on LoongArch is a "jr $ra" ++ return DAG.getNode(LoongArchISD::Ret, DL, MVT::Other, RetOps); ++} ++ ++//===----------------------------------------------------------------------===// ++// LoongArch Inline Assembly Support ++//===----------------------------------------------------------------------===// ++ ++/// getConstraintType - Given a constraint letter, return the type of ++/// constraint it is for this target. ++LoongArchTargetLowering::ConstraintType ++LoongArchTargetLowering::getConstraintType(StringRef Constraint) const { ++ // LoongArch specific constraints ++ // GCC config/loongarch/constraints.md ++ // ++ // 'f': Floating Point register ++ // 'G': Floating-point 0 ++ // 'l': Signed 16-bit constant ++ // 'R': Memory address that can be used in a non-macro load or store ++ // "ZC" Memory address with 16-bit and 4 bytes aligned offset ++ // "ZB" Memory address with 0 offset ++ ++ if (Constraint.size() == 1) { ++ switch (Constraint[0]) { ++ default : break; ++ case 'f': ++ return C_RegisterClass; ++ case 'l': ++ case 'G': ++ return C_Other; ++ case 'R': ++ return C_Memory; ++ } ++ } ++ ++ if (Constraint == "ZC" || Constraint == "ZB") ++ return C_Memory; ++ ++ return TargetLowering::getConstraintType(Constraint); ++} ++ ++/// Examine constraint type and operand type and determine a weight value. ++/// This object must already have been set up with the operand type ++/// and the current alternative constraint selected. ++TargetLowering::ConstraintWeight ++LoongArchTargetLowering::getSingleConstraintMatchWeight( ++ AsmOperandInfo &info, const char *constraint) const { ++ ConstraintWeight weight = CW_Invalid; ++ Value *CallOperandVal = info.CallOperandVal; ++ // If we don't have a value, we can't do a match, ++ // but allow it at the lowest weight. ++ if (!CallOperandVal) ++ return CW_Default; ++ Type *type = CallOperandVal->getType(); ++ // Look at the constraint type. ++ switch (*constraint) { ++ default: ++ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); ++ break; ++ case 'f': // FPU ++ if (Subtarget.hasLSX() && type->isVectorTy() && ++ type->getPrimitiveSizeInBits() == 128) ++ weight = CW_Register; ++ else if (Subtarget.hasLASX() && type->isVectorTy() && ++ type->getPrimitiveSizeInBits() == 256) ++ weight = CW_Register; ++ else if (type->isFloatTy()) ++ weight = CW_Register; ++ break; ++ case 'l': // signed 16 bit immediate ++ case 'I': // signed 12 bit immediate ++ case 'J': // integer zero ++ case 'G': // floating-point zero ++ case 'K': // unsigned 12 bit immediate ++ if (isa(CallOperandVal)) ++ weight = CW_Constant; ++ break; ++ case 'm': ++ case 'R': ++ weight = CW_Memory; ++ break; ++ } ++ return weight; ++} ++ ++/// This is a helper function to parse a physical register string and split it ++/// into non-numeric and numeric parts (Prefix and Reg). The first boolean flag ++/// that is returned indicates whether parsing was successful. The second flag ++/// is true if the numeric part exists. ++static std::pair parsePhysicalReg(StringRef C, StringRef &Prefix, ++ unsigned long long &Reg) { ++ if (C.empty() || C.front() != '{' || C.back() != '}') ++ return std::make_pair(false, false); ++ ++ // Search for the first numeric character. ++ StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1; ++ I = std::find_if(B, E, isdigit); ++ ++ Prefix = StringRef(B, I - B); ++ ++ // The second flag is set to false if no numeric characters were found. ++ if (I == E) ++ return std::make_pair(true, false); ++ ++ // Parse the numeric characters. ++ return std::make_pair(!getAsUnsignedInteger(StringRef(I, E - I), 10, Reg), ++ true); ++} ++ ++EVT LoongArchTargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT, ++ ISD::NodeType) const { ++ bool Cond = Subtarget.is64Bit() && VT.getSizeInBits() == 32; ++ EVT MinVT = getRegisterType(Context, Cond ? MVT::i64 : MVT::i32); ++ return VT.bitsLT(MinVT) ? MinVT : VT; ++} ++ ++static const TargetRegisterClass *getRegisterClassForVT(MVT VT, bool Is64Bit) { ++ // Newer llvm versions (>= 12) do not require simple VTs for constraints and ++ // they use MVT::Other for constraints with complex VTs. For more details, ++ // please see https://reviews.llvm.org/D91710. ++ if (VT == MVT::Other || VT.getSizeInBits() <= 32) ++ return &LoongArch::GPR32RegClass; ++ if (VT.getSizeInBits() <= 64) ++ return Is64Bit ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; ++ return nullptr; ++} ++ ++std::pair LoongArchTargetLowering:: ++parseRegForInlineAsmConstraint(StringRef C, MVT VT) const { ++ const TargetRegisterInfo *TRI = ++ Subtarget.getRegisterInfo(); ++ const TargetRegisterClass *RC; ++ StringRef Prefix; ++ unsigned long long Reg; ++ ++ std::pair R = parsePhysicalReg(C, Prefix, Reg); ++ ++ if (!R.first) ++ return std::make_pair(0U, nullptr); ++ ++ if (!R.second) ++ return std::make_pair(0U, nullptr); ++ ++ if (Prefix == "$f") { // Parse $f0-$f31. ++ // If the size of FP registers is 64-bit, select the 64-bit register class. ++ // Otherwise, select the 32-bit register class. ++ if (VT == MVT::Other) ++ VT = Subtarget.hasBasicD() ? MVT::f64 : MVT::f32; ++ ++ RC = getRegClassFor(VT); ++ } ++ else if (Prefix == "$vr") { // Parse $vr0-$vr31. ++ RC = getRegClassFor((VT == MVT::Other) ? MVT::v16i8 : VT); ++ } ++ else if (Prefix == "$xr") { // Parse $xr0-$xr31. ++ RC = getRegClassFor((VT == MVT::Other) ? MVT::v16i8 : VT); ++ } ++ else if (Prefix == "$fcc") // Parse $fcc0-$fcc7. ++ RC = TRI->getRegClass(LoongArch::FCFRRegClassID); ++ else { // Parse $r0-$r31. ++ assert(Prefix == "$r"); ++ if ((RC = getRegisterClassForVT(VT, Subtarget.is64Bit())) == nullptr) { ++ // This will generate an error message. ++ return std::make_pair(0U, nullptr); ++ } ++ } ++ ++ assert(Reg < RC->getNumRegs()); ++ ++ if (RC == &LoongArch::GPR64RegClass || RC == &LoongArch::GPR32RegClass) { ++ // Sync with the GPR32/GPR64 RegisterClass in LoongArchRegisterInfo.td ++ // that just like LoongArchAsmParser.cpp ++ switch (Reg) { ++ case 0: return std::make_pair(*(RC->begin() + 0), RC); // r0 ++ case 1: return std::make_pair(*(RC->begin() + 27), RC); // r1 ++ case 2: return std::make_pair(*(RC->begin() + 28), RC); // r2 ++ case 3: return std::make_pair(*(RC->begin() + 29), RC); // r3 ++ case 4: return std::make_pair(*(RC->begin() + 1), RC); // r4 ++ case 5: return std::make_pair(*(RC->begin() + 2), RC); // r5 ++ case 6: return std::make_pair(*(RC->begin() + 3), RC); // r6 ++ case 7: return std::make_pair(*(RC->begin() + 4), RC); // r7 ++ case 8: return std::make_pair(*(RC->begin() + 5), RC); // r8 ++ case 9: return std::make_pair(*(RC->begin() + 6), RC); // r9 ++ case 10: return std::make_pair(*(RC->begin() + 7), RC); // r10 ++ case 11: return std::make_pair(*(RC->begin() + 8), RC); // r11 ++ case 12: return std::make_pair(*(RC->begin() + 9), RC); // r12 ++ case 13: return std::make_pair(*(RC->begin() + 10), RC); // r13 ++ case 14: return std::make_pair(*(RC->begin() + 11), RC); // r14 ++ case 15: return std::make_pair(*(RC->begin() + 12), RC); // r15 ++ case 16: return std::make_pair(*(RC->begin() + 13), RC); // r16 ++ case 17: return std::make_pair(*(RC->begin() + 14), RC); // r17 ++ case 18: return std::make_pair(*(RC->begin() + 15), RC); // r18 ++ case 19: return std::make_pair(*(RC->begin() + 16), RC); // r19 ++ case 20: return std::make_pair(*(RC->begin() + 17), RC); // r20 ++ case 21: return std::make_pair(*(RC->begin() + 30), RC); // r21 ++ case 22: return std::make_pair(*(RC->begin() + 31), RC); // r22 ++ case 23: return std::make_pair(*(RC->begin() + 18), RC); // r23 ++ case 24: return std::make_pair(*(RC->begin() + 19), RC); // r24 ++ case 25: return std::make_pair(*(RC->begin() + 20), RC); // r25 ++ case 26: return std::make_pair(*(RC->begin() + 21), RC); // r26 ++ case 27: return std::make_pair(*(RC->begin() + 22), RC); // r27 ++ case 28: return std::make_pair(*(RC->begin() + 23), RC); // r28 ++ case 29: return std::make_pair(*(RC->begin() + 24), RC); // r29 ++ case 30: return std::make_pair(*(RC->begin() + 25), RC); // r30 ++ case 31: return std::make_pair(*(RC->begin() + 26), RC); // r31 ++ } ++ } ++ return std::make_pair(*(RC->begin() + Reg), RC); ++} ++ ++/// Given a register class constraint, like 'r', if this corresponds directly ++/// to an LLVM register class, return a register of 0 and the register class ++/// pointer. ++std::pair ++LoongArchTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, ++ StringRef Constraint, ++ MVT VT) const { ++ if (Constraint.size() == 1) { ++ switch (Constraint[0]) { ++ case 'r': ++ return std::make_pair(0U, getRegisterClassForVT(VT, Subtarget.is64Bit())); ++ case 'f': // FPU or LSX register ++ if (VT == MVT::v16i8) ++ return std::make_pair(0U, &LoongArch::LSX128BRegClass); ++ else if (VT == MVT::v8i16) ++ return std::make_pair(0U, &LoongArch::LSX128HRegClass); ++ else if (VT == MVT::v4i32 || VT == MVT::v4f32) ++ return std::make_pair(0U, &LoongArch::LSX128WRegClass); ++ else if (VT == MVT::v2i64 || VT == MVT::v2f64) ++ return std::make_pair(0U, &LoongArch::LSX128DRegClass); ++ else if (VT == MVT::v32i8) ++ return std::make_pair(0U, &LoongArch::LASX256BRegClass); ++ else if (VT == MVT::v16i16) ++ return std::make_pair(0U, &LoongArch::LASX256HRegClass); ++ else if (VT == MVT::v8i32 || VT == MVT::v8f32) ++ return std::make_pair(0U, &LoongArch::LASX256WRegClass); ++ else if (VT == MVT::v4i64 || VT == MVT::v4f64) ++ return std::make_pair(0U, &LoongArch::LASX256DRegClass); ++ else if (VT == MVT::f32) ++ return std::make_pair(0U, &LoongArch::FGR32RegClass); ++ else if (VT == MVT::f64) ++ return std::make_pair(0U, &LoongArch::FGR64RegClass); ++ break; ++ } ++ } ++ ++ std::pair R; ++ R = parseRegForInlineAsmConstraint(Constraint, VT); ++ ++ if (R.second) ++ return R; ++ ++ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); ++} ++ ++/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops ++/// vector. If it is invalid, don't add anything to Ops. ++void LoongArchTargetLowering::LowerAsmOperandForConstraint(SDValue Op, ++ std::string &Constraint, ++ std::vector&Ops, ++ SelectionDAG &DAG) const { ++ SDLoc DL(Op); ++ SDValue Result; ++ ++ // Only support length 1 constraints for now. ++ if (Constraint.length() > 1) return; ++ ++ char ConstraintLetter = Constraint[0]; ++ switch (ConstraintLetter) { ++ default: break; // This will fall through to the generic implementation ++ case 'l': // Signed 16 bit constant ++ // If this fails, the parent routine will give an error ++ if (ConstantSDNode *C = dyn_cast(Op)) { ++ EVT Type = Op.getValueType(); ++ int64_t Val = C->getSExtValue(); ++ if (isInt<16>(Val)) { ++ Result = DAG.getTargetConstant(Val, DL, Type); ++ break; ++ } ++ } ++ return; ++ case 'I': // Signed 12 bit constant ++ // If this fails, the parent routine will give an error ++ if (ConstantSDNode *C = dyn_cast(Op)) { ++ EVT Type = Op.getValueType(); ++ int64_t Val = C->getSExtValue(); ++ if (isInt<12>(Val)) { ++ Result = DAG.getTargetConstant(Val, DL, Type); ++ break; ++ } ++ } ++ return; ++ case 'J': // integer zero ++ if (ConstantSDNode *C = dyn_cast(Op)) { ++ EVT Type = Op.getValueType(); ++ int64_t Val = C->getZExtValue(); ++ if (Val == 0) { ++ Result = DAG.getTargetConstant(0, DL, Type); ++ break; ++ } ++ } ++ return; ++ case 'G': // floating-point zero ++ if (ConstantFPSDNode *C = dyn_cast(Op)) { ++ if (C->isZero()) { ++ EVT Type = Op.getValueType(); ++ Result = DAG.getTargetConstantFP(0, DL, Type); ++ break; ++ } ++ } ++ return; ++ case 'K': // unsigned 12 bit immediate ++ if (ConstantSDNode *C = dyn_cast(Op)) { ++ EVT Type = Op.getValueType(); ++ uint64_t Val = (uint64_t)C->getZExtValue(); ++ if (isUInt<12>(Val)) { ++ Result = DAG.getTargetConstant(Val, DL, Type); ++ break; ++ } ++ } ++ return; ++ } ++ ++ if (Result.getNode()) { ++ Ops.push_back(Result); ++ return; ++ } ++ ++ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); ++} ++ ++bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL, ++ const AddrMode &AM, Type *Ty, ++ unsigned AS, Instruction *I) const { ++ // No global is ever allowed as a base. ++ if (AM.BaseGV) ++ return false; ++ ++ switch (AM.Scale) { ++ case 0: // "r+i" or just "i", depending on HasBaseReg. ++ break; ++ case 1: ++ if (!AM.HasBaseReg) // allow "r+i". ++ break; ++ return false; // disallow "r+r" or "r+r+i". ++ default: ++ return false; ++ } ++ ++ return true; ++} ++ ++bool ++LoongArchTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { ++ // The LoongArch target isn't yet aware of offsets. ++ return false; ++} ++ ++EVT LoongArchTargetLowering::getOptimalMemOpType( ++ const MemOp &Op, const AttributeList &FuncAttributes) const { ++ if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) { ++ if (Op.size() >= 16) { ++ if (Op.size() >= 32 && Subtarget.hasLASX()) { ++ return MVT::v32i8; ++ } ++ if (Subtarget.hasLSX()) ++ return MVT::v16i8; ++ } ++ } ++ ++ if (Subtarget.is64Bit()) ++ return MVT::i64; ++ ++ return MVT::i32; ++} ++ ++/// isFPImmLegal - Returns true if the target can instruction select the ++/// specified FP immediate natively. If false, the legalizer will ++/// materialize the FP immediate as a load from a constant pool. ++bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, ++ bool ForCodeSize) const { ++ if (VT != MVT::f32 && VT != MVT::f64) ++ return false; ++ if (Imm.isNegZero()) ++ return false; ++ return (Imm.isZero() || Imm.isExactlyValue(+1.0)); ++} ++ ++bool LoongArchTargetLowering::useSoftFloat() const { ++ return Subtarget.useSoftFloat(); ++} ++ ++void LoongArchTargetLowering::copyByValRegs( ++ SDValue Chain, const SDLoc &DL, std::vector &OutChains, ++ SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags, ++ SmallVectorImpl &InVals, const Argument *FuncArg, ++ unsigned FirstReg, unsigned LastReg, const CCValAssign &VA, ++ LoongArchCCState &State) const { ++ MachineFunction &MF = DAG.getMachineFunction(); ++ MachineFrameInfo &MFI = MF.getFrameInfo(); ++ unsigned GPRSizeInBytes = Subtarget.getGPRSizeInBytes(); ++ unsigned NumRegs = LastReg - FirstReg; ++ unsigned RegAreaSize = NumRegs * GPRSizeInBytes; ++ unsigned FrameObjSize = std::max(Flags.getByValSize(), RegAreaSize); ++ int FrameObjOffset; ++ ArrayRef ByValArgRegs = ABI.GetByValArgRegs(); ++ ++ if (RegAreaSize) ++ FrameObjOffset = -(int)((ByValArgRegs.size() - FirstReg) * GPRSizeInBytes); ++ else ++ FrameObjOffset = VA.getLocMemOffset(); ++ ++ // Create frame object. ++ EVT PtrTy = getPointerTy(DAG.getDataLayout()); ++ // Make the fixed object stored to mutable so that the load instructions ++ // referencing it have their memory dependencies added. ++ // Set the frame object as isAliased which clears the underlying objects ++ // vector in ScheduleDAGInstrs::buildSchedGraph() resulting in addition of all ++ // stores as dependencies for loads referencing this fixed object. ++ int FI = MFI.CreateFixedObject(FrameObjSize, FrameObjOffset, false, true); ++ SDValue FIN = DAG.getFrameIndex(FI, PtrTy); ++ InVals.push_back(FIN); ++ ++ if (!NumRegs) ++ return; ++ ++ // Copy arg registers. ++ MVT RegTy = MVT::getIntegerVT(GPRSizeInBytes * 8); ++ const TargetRegisterClass *RC = getRegClassFor(RegTy); ++ ++ for (unsigned I = 0; I < NumRegs; ++I) { ++ unsigned ArgReg = ByValArgRegs[FirstReg + I]; ++ unsigned VReg = addLiveIn(MF, ArgReg, RC); ++ unsigned Offset = I * GPRSizeInBytes; ++ SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrTy, FIN, ++ DAG.getConstant(Offset, DL, PtrTy)); ++ SDValue Store = DAG.getStore(Chain, DL, DAG.getRegister(VReg, RegTy), ++ StorePtr, MachinePointerInfo(FuncArg, Offset)); ++ OutChains.push_back(Store); ++ } ++} ++ ++// Copy byVal arg to registers and stack. ++void LoongArchTargetLowering::passByValArg( ++ SDValue Chain, const SDLoc &DL, ++ std::deque> &RegsToPass, ++ SmallVectorImpl &MemOpChains, SDValue StackPtr, ++ MachineFrameInfo &MFI, SelectionDAG &DAG, SDValue Arg, unsigned FirstReg, ++ unsigned LastReg, const ISD::ArgFlagsTy &Flags, ++ const CCValAssign &VA) const { ++ unsigned ByValSizeInBytes = Flags.getByValSize(); ++ unsigned OffsetInBytes = 0; // From beginning of struct ++ unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); ++ Align Alignment = ++ std::min(Flags.getNonZeroByValAlign(), Align(RegSizeInBytes)); ++ EVT PtrTy = getPointerTy(DAG.getDataLayout()), ++ RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); ++ unsigned NumRegs = LastReg - FirstReg; ++ ++ if (NumRegs) { ++ ArrayRef ArgRegs = ABI.GetByValArgRegs(); ++ bool LeftoverBytes = (NumRegs * RegSizeInBytes > ByValSizeInBytes); ++ unsigned I = 0; ++ ++ // Copy words to registers. ++ for (; I < NumRegs - LeftoverBytes; ++I, OffsetInBytes += RegSizeInBytes) { ++ SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, ++ DAG.getConstant(OffsetInBytes, DL, PtrTy)); ++ SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr, ++ MachinePointerInfo(), Alignment); ++ MemOpChains.push_back(LoadVal.getValue(1)); ++ unsigned ArgReg = ArgRegs[FirstReg + I]; ++ RegsToPass.push_back(std::make_pair(ArgReg, LoadVal)); ++ } ++ ++ // Return if the struct has been fully copied. ++ if (ByValSizeInBytes == OffsetInBytes) ++ return; ++ ++ // Copy the remainder of the byval argument with sub-word loads and shifts. ++ if (LeftoverBytes) { ++ SDValue Val; ++ ++ for (unsigned LoadSizeInBytes = RegSizeInBytes / 2, TotalBytesLoaded = 0; ++ OffsetInBytes < ByValSizeInBytes; LoadSizeInBytes /= 2) { ++ unsigned RemainingSizeInBytes = ByValSizeInBytes - OffsetInBytes; ++ ++ if (RemainingSizeInBytes < LoadSizeInBytes) ++ continue; ++ ++ // Load subword. ++ SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, ++ DAG.getConstant(OffsetInBytes, DL, ++ PtrTy)); ++ SDValue LoadVal = DAG.getExtLoad( ++ ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, MachinePointerInfo(), ++ MVT::getIntegerVT(LoadSizeInBytes * 8), Alignment); ++ MemOpChains.push_back(LoadVal.getValue(1)); ++ ++ // Shift the loaded value. ++ unsigned Shamt; ++ ++ Shamt = TotalBytesLoaded * 8; ++ ++ SDValue Shift = DAG.getNode(ISD::SHL, DL, RegTy, LoadVal, ++ DAG.getConstant(Shamt, DL, MVT::i32)); ++ ++ if (Val.getNode()) ++ Val = DAG.getNode(ISD::OR, DL, RegTy, Val, Shift); ++ else ++ Val = Shift; ++ ++ OffsetInBytes += LoadSizeInBytes; ++ TotalBytesLoaded += LoadSizeInBytes; ++ Alignment = std::min(Alignment, Align(LoadSizeInBytes)); ++ } ++ ++ unsigned ArgReg = ArgRegs[FirstReg + I]; ++ RegsToPass.push_back(std::make_pair(ArgReg, Val)); ++ return; ++ } ++ } ++ ++ // Copy remainder of byval arg to it with memcpy. ++ unsigned MemCpySize = ByValSizeInBytes - OffsetInBytes; ++ SDValue Src = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, ++ DAG.getConstant(OffsetInBytes, DL, PtrTy)); ++ SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr, ++ DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); ++ Chain = DAG.getMemcpy( ++ Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, DL, PtrTy), ++ Align(Alignment), /*isVolatile=*/false, /*AlwaysInline=*/false, ++ /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); ++ MemOpChains.push_back(Chain); ++} ++ ++void LoongArchTargetLowering::writeVarArgRegs(std::vector &OutChains, ++ SDValue Chain, const SDLoc &DL, ++ SelectionDAG &DAG, ++ CCState &State) const { ++ ArrayRef ArgRegs = ABI.GetVarArgRegs(); ++ unsigned Idx = State.getFirstUnallocated(ArgRegs); ++ unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); ++ MVT RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); ++ const TargetRegisterClass *RC = getRegClassFor(RegTy); ++ MachineFunction &MF = DAG.getMachineFunction(); ++ MachineFrameInfo &MFI = MF.getFrameInfo(); ++ LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); ++ ++ // Offset of the first variable argument from stack pointer. ++ int VaArgOffset, VarArgsSaveSize; ++ ++ if (ArgRegs.size() == Idx) { ++ VaArgOffset = alignTo(State.getNextStackOffset(), RegSizeInBytes); ++ VarArgsSaveSize = 0; ++ } else { ++ VarArgsSaveSize = (int)(RegSizeInBytes * (ArgRegs.size() - Idx)); ++ VaArgOffset = -VarArgsSaveSize; ++ } ++ ++ // Record the frame index of the first variable argument ++ // which is a value necessary to VASTART. ++ int FI = MFI.CreateFixedObject(RegSizeInBytes, VaArgOffset, true); ++ LoongArchFI->setVarArgsFrameIndex(FI); ++ ++ // If saving an odd number of registers then create an extra stack slot to ++ // ensure that the frame pointer is 2*GRLEN-aligned, which in turn ensures ++ // offsets to even-numbered registered remain 2*GRLEN-aligned. ++ if (Idx % 2) { ++ MFI.CreateFixedObject(RegSizeInBytes, VaArgOffset - (int)RegSizeInBytes, ++ true); ++ VarArgsSaveSize += RegSizeInBytes; ++ } ++ ++ // Copy the integer registers that have not been used for argument passing ++ // to the argument register save area. For ILP32D/ILP32F/ILP32S, the save area ++ // is allocated in the caller's stack frame, while for LP64D/LP64S/LP64F, it ++ // is allocated in the callee's stack frame. ++ for (unsigned I = Idx; I < ArgRegs.size(); ++ ++I, VaArgOffset += RegSizeInBytes) { ++ unsigned Reg = addLiveIn(MF, ArgRegs[I], RC); ++ SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy); ++ FI = MFI.CreateFixedObject(RegSizeInBytes, VaArgOffset, true); ++ SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); ++ SDValue Store = ++ DAG.getStore(Chain, DL, ArgValue, PtrOff, MachinePointerInfo()); ++ cast(Store.getNode())->getMemOperand()->setValue( ++ (Value *)nullptr); ++ OutChains.push_back(Store); ++ } ++ LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize); ++} ++ ++void LoongArchTargetLowering::HandleByVal(CCState *State, unsigned &Size, ++ Align Alignment) const { ++ const TargetFrameLowering *TFL = Subtarget.getFrameLowering(); ++ ++ assert(Size && "Byval argument's size shouldn't be 0."); ++ ++ Alignment = std::min(Alignment, TFL->getStackAlign()); ++ ++ unsigned FirstReg = 0; ++ unsigned NumRegs = 0; ++ unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); ++ ArrayRef IntArgRegs = ABI.GetByValArgRegs(); ++ // FIXME: The ILP32D/ILP32F/ILP32S case actually describes no shadow ++ // registers. ++ const MCPhysReg *ShadowRegs = ++ (ABI.IsILP32D() || ABI.IsILP32F() || ABI.IsILP32S()) ++ ? IntArgRegs.data() ++ : LoongArch64DPRegs; ++ ++ // We used to check the size as well but we can't do that anymore since ++ // CCState::HandleByVal() rounds up the size after calling this function. ++ assert(Alignment >= Align(RegSizeInBytes) && ++ "Byval argument's alignment should be a multiple of RegSizeInBytes."); ++ ++ FirstReg = State->getFirstUnallocated(IntArgRegs); ++ ++ // If Alignment > RegSizeInBytes, the first arg register must be even. ++ // FIXME: This condition happens to do the right thing but it's not the ++ // right way to test it. We want to check that the stack frame offset ++ // of the register is aligned. ++ if ((Alignment > RegSizeInBytes) && (FirstReg % 2)) { ++ State->AllocateReg(IntArgRegs[FirstReg], ShadowRegs[FirstReg]); ++ ++FirstReg; ++ // assert(true && "debug#######################################"); ++ } ++ ++ // Mark the registers allocated. ++ // Size = alignTo(Size, RegSizeInBytes); ++ // for (unsigned I = FirstReg; Size > 0 && (I < IntArgRegs.size()); ++ // Size -= RegSizeInBytes, ++I, ++NumRegs) ++ // State->AllocateReg(IntArgRegs[I], ShadowRegs[I]); ++ ++ State->addInRegsParamInfo(FirstReg, FirstReg + NumRegs); ++} ++ ++MachineBasicBlock *LoongArchTargetLowering::emitPseudoSELECT(MachineInstr &MI, ++ MachineBasicBlock *BB, ++ bool isFPCmp, ++ unsigned Opc) const { ++ const TargetInstrInfo *TII = ++ Subtarget.getInstrInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ // To "insert" a SELECT instruction, we actually have to insert the ++ // diamond control-flow pattern. The incoming instruction knows the ++ // destination vreg to set, the condition code register to branch on, the ++ // true/false values to select between, and a branch opcode to use. ++ const BasicBlock *LLVM_BB = BB->getBasicBlock(); ++ MachineFunction::iterator It = ++BB->getIterator(); ++ ++ // thisMBB: ++ // ... ++ // TrueVal = ... ++ // setcc r1, r2, r3 ++ // bNE r1, r0, copy1MBB ++ // fallthrough --> copy0MBB ++ MachineBasicBlock *thisMBB = BB; ++ MachineFunction *F = BB->getParent(); ++ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); ++ F->insert(It, copy0MBB); ++ F->insert(It, sinkMBB); ++ ++ // Transfer the remainder of BB and its successor edges to sinkMBB. ++ sinkMBB->splice(sinkMBB->begin(), BB, ++ std::next(MachineBasicBlock::iterator(MI)), BB->end()); ++ sinkMBB->transferSuccessorsAndUpdatePHIs(BB); ++ ++ // Next, add the true and fallthrough blocks as its successors. ++ BB->addSuccessor(copy0MBB); ++ BB->addSuccessor(sinkMBB); ++ ++ if (isFPCmp) { ++ // bc1[tf] cc, sinkMBB ++ BuildMI(BB, DL, TII->get(Opc)) ++ .addReg(MI.getOperand(1).getReg()) ++ .addMBB(sinkMBB); ++ } else { ++ BuildMI(BB, DL, TII->get(Opc)) ++ .addReg(MI.getOperand(1).getReg()) ++ .addReg(LoongArch::ZERO) ++ .addMBB(sinkMBB); ++ } ++ ++ // copy0MBB: ++ // %FalseValue = ... ++ // # fallthrough to sinkMBB ++ BB = copy0MBB; ++ ++ // Update machine-CFG edges ++ BB->addSuccessor(sinkMBB); ++ ++ // sinkMBB: ++ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] ++ // ... ++ BB = sinkMBB; ++ ++ BuildMI(*BB, BB->begin(), DL, TII->get(LoongArch::PHI), MI.getOperand(0).getReg()) ++ .addReg(MI.getOperand(2).getReg()) ++ .addMBB(thisMBB) ++ .addReg(MI.getOperand(3).getReg()) ++ .addMBB(copy0MBB); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ ++ return BB; ++} ++ ++MachineBasicBlock *LoongArchTargetLowering::emitLSXCBranchPseudo( ++ MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const { ++ ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ const TargetRegisterClass *RC = &LoongArch::GPR32RegClass; ++ DebugLoc DL = MI.getDebugLoc(); ++ const BasicBlock *LLVM_BB = BB->getBasicBlock(); ++ MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); ++ MachineFunction *F = BB->getParent(); ++ MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); ++ F->insert(It, FBB); ++ F->insert(It, TBB); ++ F->insert(It, Sink); ++ ++ // Transfer the remainder of BB and its successor edges to Sink. ++ Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), ++ BB->end()); ++ Sink->transferSuccessorsAndUpdatePHIs(BB); ++ ++ // Add successors. ++ BB->addSuccessor(FBB); ++ BB->addSuccessor(TBB); ++ FBB->addSuccessor(Sink); ++ TBB->addSuccessor(Sink); ++ // Insert the real bnz.b instruction to $BB. ++ BuildMI(BB, DL, TII->get(BranchOp)) ++ .addReg(LoongArch::FCC0) ++ .addReg(MI.getOperand(1).getReg()); ++ ++ BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)) ++ .addReg(LoongArch::FCC0) ++ .addMBB(TBB); ++ ++ // Fill $FBB. ++ unsigned RD1 = RegInfo.createVirtualRegister(RC); ++ BuildMI(*FBB, FBB->end(), DL, TII->get(LoongArch::ADDI_W), RD1) ++ .addReg(LoongArch::ZERO) ++ .addImm(0); ++ BuildMI(*FBB, FBB->end(), DL, TII->get(LoongArch::B32)).addMBB(Sink); ++ ++ // Fill $TBB. ++ unsigned RD2 = RegInfo.createVirtualRegister(RC); ++ BuildMI(*TBB, TBB->end(), DL, TII->get(LoongArch::ADDI_W), RD2) ++ .addReg(LoongArch::ZERO) ++ .addImm(1); ++ ++ // Insert phi function to $Sink. ++ BuildMI(*Sink, Sink->begin(), DL, TII->get(LoongArch::PHI), ++ MI.getOperand(0).getReg()) ++ .addReg(RD1) ++ .addMBB(FBB) ++ .addReg(RD2) ++ .addMBB(TBB); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return Sink; ++} ++ ++// Emit the COPY_FW pseudo instruction. ++// ++// copy_fw_pseudo $fd, $vk, n ++// => ++// vreplvei.w $rt, $vk, $n ++// copy $rt, $fd ++// ++// When n is zero, the equivalent operation can be performed with (potentially) ++// zero instructions due to register overlaps. ++MachineBasicBlock * ++LoongArchTargetLowering::emitCOPY_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Fd = MI.getOperand(0).getReg(); ++ unsigned Vk = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getImm(); ++ ++ if (Lane == 0) { ++ unsigned Vj = Vk; ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) ++ .addReg(Vj, 0, LoongArch::sub_lo); ++ } else { ++ unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_W), Vj) ++ .addReg(Vk) ++ .addImm(Lane); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) ++ .addReg(Vj, 0, LoongArch::sub_lo); ++ } ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} ++ ++// Emit the COPY_FD pseudo instruction. ++// ++// copy_fd_pseudo $fd, $vj, n ++// => ++// vreplvei.d $vd, $vj, $n ++// copy $fd, $vd:sub_64 ++// ++// When n is zero, the equivalent operation can be performed with (potentially) ++// zero instructions due to register overlaps. ++MachineBasicBlock * ++LoongArchTargetLowering::emitCOPY_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ assert(Subtarget.hasBasicD()); ++ ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ unsigned Fd = MI.getOperand(0).getReg(); ++ unsigned Vk = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getImm(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ if (Lane == 0) ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) ++ .addReg(Vk, 0, LoongArch::sub_64); ++ else { ++ unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass); ++ assert(Lane == 1); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_D), Vj) ++ .addReg(Vk) ++ .addImm(Lane); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) ++ .addReg(Vj, 0, LoongArch::sub_64); ++ } ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} ++ ++MachineBasicBlock * ++LoongArchTargetLowering::emitXCOPY_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Fd = MI.getOperand(0).getReg(); ++ unsigned Xk = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getImm(); ++ unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned Xj = Xk; ++ ++ if (Lane == 0) { ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) ++ .addReg(Xj, 0, LoongArch::sub_lo); ++ } else { ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Rj) ++ .addReg(Xk) ++ .addImm(Lane); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd).addReg(Rj); ++ } ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} ++ ++MachineBasicBlock * ++LoongArchTargetLowering::emitXCOPY_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ assert(Subtarget.hasBasicD()); ++ ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ unsigned Fd = MI.getOperand(0).getReg(); ++ unsigned Xk = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getImm(); ++ DebugLoc DL = MI.getDebugLoc(); ++ ++ unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR64RegClass); ++ if (Lane == 0) { ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) ++ .addReg(Xk, 0, LoongArch::sub_64); ++ } else { ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_DU), Rj) ++ .addReg(Xk) ++ .addImm(Lane); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd).addReg(Rj); ++ } ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} ++ ++MachineBasicBlock *LoongArchTargetLowering::emitCONCAT_VECTORS( ++ MachineInstr &MI, MachineBasicBlock *BB, unsigned Bytes) const { ++ ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Xd = MI.getOperand(0).getReg(); ++ unsigned SubReg1 = MI.getOperand(1).getReg(); ++ unsigned SubReg2 = MI.getOperand(2).getReg(); ++ const TargetRegisterClass *RC = nullptr; ++ ++ switch (Bytes) { ++ default: ++ llvm_unreachable("Unexpected size"); ++ case 1: ++ RC = &LoongArch::LASX256BRegClass; ++ break; ++ case 2: ++ RC = &LoongArch::LASX256HRegClass; ++ break; ++ case 4: ++ RC = &LoongArch::LASX256WRegClass; ++ break; ++ case 8: ++ RC = &LoongArch::LASX256DRegClass; ++ break; ++ } ++ ++ unsigned X0 = RegInfo.createVirtualRegister(RC); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), X0) ++ .addImm(0) ++ .addReg(SubReg1) ++ .addImm(LoongArch::sub_128); ++ unsigned X1 = RegInfo.createVirtualRegister(RC); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), X1) ++ .addImm(0) ++ .addReg(SubReg2) ++ .addImm(LoongArch::sub_128); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), Xd) ++ .addReg(X0) ++ .addReg(X1) ++ .addImm(2); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} ++ ++// xcopy_fw_gpr_pseudo $fd, $xs, $rk ++// => ++// bb: addi.d $rt1, zero, 4 ++// bge $lane, $rt1 hbb ++// lbb:xvreplve.w $xt1, $xs, $lane ++// copy $rf0, $xt1 ++// b sink ++// hbb: addi.d $rt2, $lane, -4 ++// xvpermi.q $xt2 $xs, 1 ++// xvreplve.w $xt3, $xt2, $rt2 ++// copy $rf1, $xt3 ++// sink:phi ++MachineBasicBlock * ++LoongArchTargetLowering::emitXCOPY_FW_GPR(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Xs = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getReg(); ++ ++ const TargetRegisterClass *RC = &LoongArch::GPR64RegClass; ++ const BasicBlock *LLVM_BB = BB->getBasicBlock(); ++ MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); ++ MachineFunction *F = BB->getParent(); ++ MachineBasicBlock *HBB = F->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *LBB = F->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); ++ F->insert(It, LBB); ++ F->insert(It, HBB); ++ F->insert(It, Sink); ++ ++ Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), ++ BB->end()); ++ Sink->transferSuccessorsAndUpdatePHIs(BB); ++ ++ BB->addSuccessor(LBB); ++ BB->addSuccessor(HBB); ++ HBB->addSuccessor(Sink); ++ LBB->addSuccessor(Sink); ++ ++ unsigned Rt1 = RegInfo.createVirtualRegister(RC); ++ BuildMI(BB, DL, TII->get(LoongArch::ADDI_D), Rt1) ++ .addReg(LoongArch::ZERO_64) ++ .addImm(4); ++ BuildMI(BB, DL, TII->get(LoongArch::BGE)) ++ .addReg(Lane) ++ .addReg(Rt1) ++ .addMBB(HBB); ++ ++ unsigned Xt1 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); ++ unsigned Rf0 = RegInfo.createVirtualRegister(&LoongArch::FGR32RegClass); ++ BuildMI(*LBB, LBB->end(), DL, TII->get(LoongArch::XVREPLVE_W_N), Xt1) ++ .addReg(Xs) ++ .addReg(Lane); ++ BuildMI(*LBB, LBB->end(), DL, TII->get(LoongArch::COPY), Rf0) ++ .addReg(Xt1, 0, LoongArch::sub_lo); ++ BuildMI(*LBB, LBB->end(), DL, TII->get(LoongArch::B)).addMBB(Sink); ++ ++ unsigned Xt2 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); ++ unsigned Xt3 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); ++ unsigned Rt2 = RegInfo.createVirtualRegister(RC); ++ unsigned Rf1 = RegInfo.createVirtualRegister(&LoongArch::FGR32RegClass); ++ BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::ADDI_D), Rt2) ++ .addReg(Lane) ++ .addImm(-4); ++ BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::XVPERMI_Q), Xt2) ++ .addReg(Xs) ++ .addReg(Xs) ++ .addImm(1); ++ BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::XVREPLVE_W_N), Xt3) ++ .addReg(Xt2) ++ .addReg(Rt2); ++ BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::COPY), Rf1) ++ .addReg(Xt3, 0, LoongArch::sub_lo); ++ ++ BuildMI(*Sink, Sink->begin(), DL, TII->get(LoongArch::PHI), ++ MI.getOperand(0).getReg()) ++ .addReg(Rf0) ++ .addMBB(LBB) ++ .addReg(Rf1) ++ .addMBB(HBB); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return Sink; ++} ++ ++MachineBasicBlock * ++LoongArchTargetLowering::emitXINSERT_BH(MachineInstr &MI, MachineBasicBlock *BB, ++ unsigned Size) const { ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Xd = MI.getOperand(0).getReg(); ++ unsigned Xd_in = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getImm(); ++ unsigned Fs = MI.getOperand(3).getReg(); ++ const TargetRegisterClass *VecRC = nullptr; ++ const TargetRegisterClass *SubVecRC = nullptr; ++ unsigned HalfSize = 0; ++ unsigned InsertOp = 0; ++ ++ if (Size == 1) { ++ VecRC = &LoongArch::LASX256BRegClass; ++ SubVecRC = &LoongArch::LSX128BRegClass; ++ HalfSize = 16; ++ InsertOp = LoongArch::VINSGR2VR_B; ++ } else if (Size == 2) { ++ VecRC = &LoongArch::LASX256HRegClass; ++ SubVecRC = &LoongArch::LSX128HRegClass; ++ HalfSize = 8; ++ InsertOp = LoongArch::VINSGR2VR_H; ++ } else { ++ llvm_unreachable("Unexpected type"); ++ } ++ ++ unsigned Xk = Xd_in; ++ unsigned Imm = Lane; ++ if (Lane >= HalfSize) { ++ Xk = RegInfo.createVirtualRegister(VecRC); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), Xk) ++ .addReg(Xd_in) ++ .addReg(Xd_in) ++ .addImm(1); ++ Imm = Lane - HalfSize; ++ } ++ ++ unsigned Xk128 = RegInfo.createVirtualRegister(SubVecRC); ++ unsigned Xd128 = RegInfo.createVirtualRegister(SubVecRC); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Xk128) ++ .addReg(Xk, 0, LoongArch::sub_128); ++ BuildMI(*BB, MI, DL, TII->get(InsertOp), Xd128) ++ .addReg(Xk128) ++ .addReg(Fs) ++ .addImm(Imm); ++ ++ unsigned Xd256 = Xd; ++ if (Lane >= HalfSize) { ++ Xd256 = RegInfo.createVirtualRegister(VecRC); ++ } ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Xd256) ++ .addImm(0) ++ .addReg(Xd128) ++ .addImm(LoongArch::sub_128); ++ ++ if (Lane >= HalfSize) { ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), Xd) ++ .addReg(Xd_in) ++ .addReg(Xd256) ++ .addImm(2); ++ } ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} ++ ++MachineBasicBlock * ++LoongArchTargetLowering::emitXINSERT_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Xd = MI.getOperand(0).getReg(); ++ unsigned Xd_in = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getImm(); ++ unsigned Fs = MI.getOperand(3).getReg(); ++ unsigned Xj = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); ++ unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Xj) ++ .addImm(0) ++ .addReg(Fs) ++ .addImm(LoongArch::sub_lo); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Rj) ++ .addReg(Xj) ++ .addImm(0); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVINSGR2VR_W), Xd) ++ .addReg(Xd_in) ++ .addReg(Rj) ++ .addImm(Lane); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} ++ ++// Emit the INSERT_FW pseudo instruction. ++// ++// insert_fw_pseudo $vd, $vd_in, $n, $fs ++// => ++// subreg_to_reg $vj:sub_lo, $fs ++// vpickve2gr.w rj, vj, 0 ++// vinsgr2vr.w, vd, rj, lane ++MachineBasicBlock * ++LoongArchTargetLowering::emitINSERT_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Vd = MI.getOperand(0).getReg(); ++ unsigned Vd_in = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getImm(); ++ unsigned Fs = MI.getOperand(3).getReg(); ++ unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); ++ unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Vj) ++ .addImm(0) ++ .addReg(Fs) ++ .addImm(LoongArch::sub_lo); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_W), Rj) ++ .addReg(Vj) ++ .addImm(0); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::VINSGR2VR_W), Vd) ++ .addReg(Vd_in) ++ .addReg(Rj) ++ .addImm(Lane); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} ++ ++// Emit the INSERT_FD pseudo instruction. ++// insert_fd_pseudo $vd, $fs, n ++// => ++// subreg_to_reg $vk:sub_64, $fs ++// vpickve2gr.d rj, vk, 0 ++// vinsgr2vr.d vd, rj, lane ++MachineBasicBlock * ++LoongArchTargetLowering::emitINSERT_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ assert(Subtarget.hasBasicD()); ++ ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Vd = MI.getOperand(0).getReg(); ++ unsigned Vd_in = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getImm(); ++ unsigned Fs = MI.getOperand(3).getReg(); ++ unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass); ++ unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR64RegClass); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Vj) ++ .addImm(0) ++ .addReg(Fs) ++ .addImm(LoongArch::sub_64); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_D), Rj) ++ .addReg(Vj) ++ .addImm(0); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::VINSGR2VR_D), Vd) ++ .addReg(Vd_in) ++ .addReg(Rj) ++ .addImm(Lane); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} ++ ++MachineBasicBlock * ++LoongArchTargetLowering::emitXINSERT_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ assert(Subtarget.hasBasicD()); ++ ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Xd = MI.getOperand(0).getReg(); ++ unsigned Xd_in = MI.getOperand(1).getReg(); ++ unsigned Lane = MI.getOperand(2).getImm(); ++ unsigned Fs = MI.getOperand(3).getReg(); ++ unsigned Xj = RegInfo.createVirtualRegister(&LoongArch::LASX256DRegClass); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Xj) ++ .addImm(0) ++ .addReg(Fs) ++ .addImm(LoongArch::sub_64); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVINSVE0_D), Xd) ++ .addReg(Xd_in) ++ .addReg(Xj) ++ .addImm(Lane); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} ++ ++// Emit the FILL_FW pseudo instruction. ++// ++// fill_fw_pseudo $vd, $fs ++// => ++// implicit_def $vt1 ++// insert_subreg $vt2:subreg_lo, $vt1, $fs ++// vreplvei.w vd, vt2, 0 ++MachineBasicBlock * ++LoongArchTargetLowering::emitFILL_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Vd = MI.getOperand(0).getReg(); ++ unsigned Fs = MI.getOperand(1).getReg(); ++ unsigned Vj1 = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass); ++ unsigned Vj2 = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Vj1); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Vj2) ++ .addReg(Vj1) ++ .addReg(Fs) ++ .addImm(LoongArch::sub_lo); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_W), Vd) ++ .addReg(Vj2) ++ .addImm(0); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} ++ ++// Emit the FILL_FD pseudo instruction. ++// ++// fill_fd_pseudo $vd, $fs ++// => ++// implicit_def $vt1 ++// insert_subreg $vt2:subreg_64, $vt1, $fs ++// vreplvei.d vd, vt2, 0 ++MachineBasicBlock * ++LoongArchTargetLowering::emitFILL_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ assert(Subtarget.hasBasicD()); ++ ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Vd = MI.getOperand(0).getReg(); ++ unsigned Fs = MI.getOperand(1).getReg(); ++ unsigned Vj1 = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass); ++ unsigned Vj2 = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Vj1); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Vj2) ++ .addReg(Vj1) ++ .addReg(Fs) ++ .addImm(LoongArch::sub_64); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_D), Vd) ++ .addReg(Vj2) ++ .addImm(0); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} ++ ++// Emit the XFILL_FW pseudo instruction. ++// ++// xfill_fw_pseudo $xd, $fs ++// => ++// implicit_def $xt1 ++// insert_subreg $xt2:subreg_lo, $xt1, $fs ++// xvreplve0.w xd, xt2, 0 ++MachineBasicBlock * ++LoongArchTargetLowering::emitXFILL_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Xd = MI.getOperand(0).getReg(); ++ unsigned Fs = MI.getOperand(1).getReg(); ++ unsigned Xj1 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); ++ unsigned Xj2 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Xj1); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Xj2) ++ .addReg(Xj1) ++ .addReg(Fs) ++ .addImm(LoongArch::sub_lo); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVREPLVE0_W), Xd).addReg(Xj2); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} ++ ++// Emit the XFILL_FD pseudo instruction. ++// ++// xfill_fd_pseudo $xd, $fs ++// => ++// implicit_def $xt1 ++// insert_subreg $xt2:subreg_64, $xt1, $fs ++// xvreplve0.d xd, xt2, 0 ++MachineBasicBlock * ++LoongArchTargetLowering::emitXFILL_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const { ++ assert(Subtarget.hasBasicD()); ++ ++ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); ++ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); ++ DebugLoc DL = MI.getDebugLoc(); ++ unsigned Xd = MI.getOperand(0).getReg(); ++ unsigned Fs = MI.getOperand(1).getReg(); ++ unsigned Xj1 = RegInfo.createVirtualRegister(&LoongArch::LASX256DRegClass); ++ unsigned Xj2 = RegInfo.createVirtualRegister(&LoongArch::LASX256DRegClass); ++ ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Xj1); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Xj2) ++ .addReg(Xj1) ++ .addReg(Fs) ++ .addImm(LoongArch::sub_64); ++ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVREPLVE0_D), Xd).addReg(Xj2); ++ ++ MI.eraseFromParent(); // The pseudo instruction is gone now. ++ return BB; ++} ++ ++bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const { ++ bool IsLegal = false; ++ if (Subtarget.hasLSX() || Subtarget.hasLASX()) { ++ return isUInt<5>(Imm); ++ } ++ return IsLegal; ++} ++ ++bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd( ++ const MachineFunction &MF, EVT VT) const { ++ ++ VT = VT.getScalarType(); ++ ++ if (!VT.isSimple()) ++ return false; ++ ++ switch (VT.getSimpleVT().SimpleTy) { ++ case MVT::f32: ++ case MVT::f64: ++ return true; ++ default: ++ break; ++ } ++ ++ return false; ++} ++ ++bool LoongArchTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, ++ unsigned Index) const { ++ if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) ++ return false; ++ ++ return ( ++ (ResVT != MVT::v16i8) && (ResVT != MVT::v8i16) && ++ (Index == 0 || (Index == ResVT.getVectorNumElements() && ++ (ResVT.getSizeInBits() == SrcVT.getSizeInBits() / 2)))); ++} ++ ++Register ++LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT, ++ const MachineFunction &MF) const { ++ // Named registers is expected to be fairly rare. For now, just support $r2 ++ // and $r21 since the linux kernel uses them. ++ if (Subtarget.is64Bit()) { ++ Register Reg = StringSwitch(RegName) ++ .Case("$r2", LoongArch::TP_64) ++ .Case("$r21", LoongArch::T9_64) ++ .Default(Register()); ++ if (Reg) ++ return Reg; ++ } else { ++ Register Reg = StringSwitch(RegName) ++ .Case("$r2", LoongArch::TP) ++ .Case("$r21", LoongArch::T9) ++ .Default(Register()); ++ if (Reg) ++ return Reg; ++ } ++ report_fatal_error("Invalid register name global variable"); ++} +diff --git a/lib/Target/LoongArch/LoongArchISelLowering.h b/lib/Target/LoongArch/LoongArchISelLowering.h +new file mode 100644 +index 00000000..e22c13ef +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -0,0 +1,557 @@ ++//===- LoongArchISelLowering.h - LoongArch DAG Lowering Interface ---------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines the interfaces that LoongArch uses to lower LLVM code into a ++// selection DAG. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELLOWERING_H ++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELLOWERING_H ++ ++#include "MCTargetDesc/LoongArchABIInfo.h" ++#include "MCTargetDesc/LoongArchBaseInfo.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "LoongArch.h" ++#include "llvm/CodeGen/CallingConvLower.h" ++#include "llvm/CodeGen/ISDOpcodes.h" ++#include "llvm/CodeGen/MachineMemOperand.h" ++#include "llvm/CodeGen/SelectionDAG.h" ++#include "llvm/CodeGen/SelectionDAGNodes.h" ++#include "llvm/CodeGen/TargetLowering.h" ++#include "llvm/CodeGen/ValueTypes.h" ++#include "llvm/IR/CallingConv.h" ++#include "llvm/IR/InlineAsm.h" ++#include "llvm/IR/Type.h" ++#include "llvm/Support/MachineValueType.h" ++#include "llvm/Target/TargetMachine.h" ++#include ++#include ++#include ++#include ++#include ++#include ++ ++namespace llvm { ++ ++class Argument; ++class CCState; ++class CCValAssign; ++class FastISel; ++class FunctionLoweringInfo; ++class MachineBasicBlock; ++class MachineFrameInfo; ++class MachineInstr; ++class LoongArchCCState; ++class LoongArchFunctionInfo; ++class LoongArchSubtarget; ++class LoongArchTargetMachine; ++class SelectionDAG; ++class TargetLibraryInfo; ++class TargetRegisterClass; ++ ++ namespace LoongArchISD { ++ ++ enum NodeType : unsigned { ++ // Start the numbering from where ISD NodeType finishes. ++ FIRST_NUMBER = ISD::BUILTIN_OP_END, ++ ++ // Jump and link (call) ++ JmpLink, ++ ++ // Tail call ++ TailCall, ++ ++ // global address ++ GlobalAddress, ++ ++ // Floating Point Branch Conditional ++ FPBrcond, ++ ++ // Floating Point Compare ++ FPCmp, ++ ++ // Floating Point Conditional Moves ++ CMovFP_T, ++ CMovFP_F, ++ FSEL, ++ ++ // FP-to-int truncation node. ++ TruncIntFP, ++ ++ // Return ++ Ret, ++ ++ // error trap Return ++ ERet, ++ ++ // Software Exception Return. ++ EH_RETURN, ++ ++ DBAR, ++ ++ BSTRPICK, ++ BSTRINS, ++ ++ // Vector comparisons. ++ // These take a vector and return a boolean. ++ VALL_ZERO, ++ VANY_ZERO, ++ VALL_NONZERO, ++ VANY_NONZERO, ++ ++ // Vector Shuffle with mask as an operand ++ VSHF, // Generic shuffle ++ SHF, // 4-element set shuffle. ++ VPACKEV, // Interleave even elements ++ VPACKOD, // Interleave odd elements ++ VILVH, // Interleave left elements ++ VILVL, // Interleave right elements ++ VPICKEV, // Pack even elements ++ VPICKOD, // Pack odd elements ++ ++ // Vector Lane Copy ++ INSVE, // Copy element from one vector to another ++ ++ // Combined (XOR (OR $a, $b), -1) ++ VNOR, ++ ++ VROR, ++ VRORI, ++ XVPICKVE, ++ XVPERMI, ++ XVSHUF4I, ++ REVBD, ++ ++ // Extended vector element extraction ++ VEXTRACT_SEXT_ELT, ++ VEXTRACT_ZEXT_ELT, ++ ++ XVBROADCAST, ++ VBROADCAST, ++ VABSD, ++ UVABSD, ++ }; ++ ++ } // ene namespace LoongArchISD ++ ++ //===--------------------------------------------------------------------===// ++ // TargetLowering Implementation ++ //===--------------------------------------------------------------------===// ++ ++ class LoongArchTargetLowering : public TargetLowering { ++ public: ++ explicit LoongArchTargetLowering(const LoongArchTargetMachine &TM, ++ const LoongArchSubtarget &STI); ++ ++ bool allowsMisalignedMemoryAccesses( ++ EVT VT, unsigned AS = 0, Align Alignment = Align(1), ++ MachineMemOperand::Flags Flags = MachineMemOperand::MONone, ++ bool *Fast = nullptr) const override; ++ ++ /// Enable LSX support for the given integer type and Register ++ /// class. ++ void addLSXIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); ++ ++ /// Enable LSX support for the given floating-point type and ++ /// Register class. ++ void addLSXFloatType(MVT::SimpleValueType Ty, ++ const TargetRegisterClass *RC); ++ ++ /// Enable LASX support for the given integer type and Register ++ /// class. ++ void addLASXIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); ++ ++ /// Enable LASX support for the given floating-point type and ++ /// Register class. ++ void addLASXFloatType(MVT::SimpleValueType Ty, ++ const TargetRegisterClass *RC); ++ ++ MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { ++ return MVT::i32; ++ } ++ ++ EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ++ ISD::NodeType) const override; ++ ++ bool isCheapToSpeculateCttz() const override; ++ bool isCheapToSpeculateCtlz() const override; ++ ++ bool isLegalAddImmediate(int64_t) const override; ++ ++ /// Return the correct alignment for the current calling convention. ++ Align getABIAlignmentForCallingConv(Type *ArgTy, ++ const DataLayout &DL) const override { ++ const Align ABIAlign = DL.getABITypeAlign(ArgTy); ++ if (ArgTy->isVectorTy()) ++ return std::min(ABIAlign, Align(8)); ++ return ABIAlign; ++ } ++ ++ ISD::NodeType getExtendForAtomicOps() const override { ++ return ISD::SIGN_EXTEND; ++ } ++ ++ bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, ++ unsigned Index) const override; ++ ++ void LowerOperationWrapper(SDNode *N, ++ SmallVectorImpl &Results, ++ SelectionDAG &DAG) const override; ++ ++ /// LowerOperation - Provide custom lowering hooks for some operations. ++ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; ++ ++ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, ++ EVT VT) const override; ++ ++ /// ReplaceNodeResults - Replace the results of node with an illegal result ++ /// type with new values built out of custom code. ++ /// ++ void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, ++ SelectionDAG &DAG) const override; ++ ++ /// getTargetNodeName - This method returns the name of a target specific ++ // DAG node. ++ const char *getTargetNodeName(unsigned Opcode) const override; ++ ++ /// getSetCCResultType - get the ISD::SETCC result ValueType ++ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, ++ EVT VT) const override; ++ ++ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; ++ ++ MachineBasicBlock * ++ EmitInstrWithCustomInserter(MachineInstr &MI, ++ MachineBasicBlock *MBB) const override; ++ ++ bool isShuffleMaskLegal(ArrayRef Mask, EVT VT) const override { ++ return false; ++ } ++ ++ const TargetRegisterClass *getRepRegClassFor(MVT VT) const override; ++ ++ void AdjustInstrPostInstrSelection(MachineInstr &MI, ++ SDNode *Node) const override; ++ ++ void HandleByVal(CCState *, unsigned &, Align) const override; ++ ++ Register getRegisterByName(const char* RegName, LLT VT, ++ const MachineFunction &MF) const override; ++ ++ /// If a physical register, this returns the register that receives the ++ /// exception address on entry to an EH pad. ++ Register ++ getExceptionPointerRegister(const Constant *PersonalityFn) const override { ++ return ABI.IsLP64() ? LoongArch::A0_64 : LoongArch::A0; ++ } ++ ++ /// If a physical register, this returns the register that receives the ++ /// exception typeid on entry to a landing pad. ++ Register ++ getExceptionSelectorRegister(const Constant *PersonalityFn) const override { ++ return ABI.IsLP64() ? LoongArch::A1_64 : LoongArch::A1; ++ } ++ ++ bool isJumpTableRelative() const override { ++ return getTargetMachine().isPositionIndependent(); ++ } ++ ++ CCAssignFn *CCAssignFnForCall() const; ++ ++ CCAssignFn *CCAssignFnForReturn() const; ++ ++ private: ++ template ++ SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const; ++ ++ /// This function fills Ops, which is the list of operands that will later ++ /// be used when a function call node is created. It also generates ++ /// copyToReg nodes to set up argument registers. ++ void getOpndList(SmallVectorImpl &Ops, ++ std::deque> &RegsToPass, ++ bool IsPICCall, bool GlobalOrExternal, bool IsCallReloc, ++ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain, ++ bool IsTailCall) const; ++ ++ SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const; ++ ++ // Subtarget Info ++ const LoongArchSubtarget &Subtarget; ++ // Cache the ABI from the TargetMachine, we use it everywhere. ++ const LoongArchABIInfo &ABI; ++ ++ // Create a TargetGlobalAddress node. ++ SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, ++ unsigned Flag) const; ++ ++ // Create a TargetExternalSymbol node. ++ SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG, ++ unsigned Flag) const; ++ ++ // Create a TargetBlockAddress node. ++ SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, ++ unsigned Flag) const; ++ ++ // Create a TargetJumpTable node. ++ SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, ++ unsigned Flag) const; ++ ++ // Create a TargetConstantPool node. ++ SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, ++ unsigned Flag) const; ++ ++ // Lower Operand helpers ++ SDValue LowerCallResult(SDValue Chain, SDValue InFlag, ++ CallingConv::ID CallConv, bool isVarArg, ++ const SmallVectorImpl &Ins, ++ const SDLoc &dl, SelectionDAG &DAG, ++ SmallVectorImpl &InVals, ++ TargetLowering::CallLoweringInfo &CLI) const; ++ ++ // Lower Operand specifics ++ SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; ++ /// Lower VECTOR_SHUFFLE into one of a number of instructions ++ /// depending on the indices in the shuffle. ++ SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerFABS(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; ++ SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const; ++ SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG, ++ bool IsSRA) const; ++ SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; ++ ++ /// isEligibleForTailCallOptimization - Check whether the call is eligible ++ /// for tail call optimization. ++ bool ++ isEligibleForTailCallOptimization(const CCState &CCInfo, ++ CallLoweringInfo &CLI, MachineFunction &MF, ++ unsigned NextStackOffset, ++ const LoongArchFunctionInfo &FI) const; ++ ++ /// copyByValArg - Copy argument registers which were used to pass a byval ++ /// argument to the stack. Create a stack frame object for the byval ++ /// argument. ++ void copyByValRegs(SDValue Chain, const SDLoc &DL, ++ std::vector &OutChains, SelectionDAG &DAG, ++ const ISD::ArgFlagsTy &Flags, ++ SmallVectorImpl &InVals, ++ const Argument *FuncArg, unsigned FirstReg, ++ unsigned LastReg, const CCValAssign &VA, ++ LoongArchCCState &State) const; ++ ++ /// passByValArg - Pass a byval argument in registers or on stack. ++ void passByValArg(SDValue Chain, const SDLoc &DL, ++ std::deque> &RegsToPass, ++ SmallVectorImpl &MemOpChains, SDValue StackPtr, ++ MachineFrameInfo &MFI, SelectionDAG &DAG, SDValue Arg, ++ unsigned FirstReg, unsigned LastReg, ++ const ISD::ArgFlagsTy &Flags, ++ const CCValAssign &VA) const; ++ ++ /// writeVarArgRegs - Write variable function arguments passed in registers ++ /// to the stack. Also create a stack frame object for the first variable ++ /// argument. ++ void writeVarArgRegs(std::vector &OutChains, SDValue Chain, ++ const SDLoc &DL, SelectionDAG &DAG, ++ CCState &State) const; ++ ++ SDValue ++ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, ++ const SmallVectorImpl &Ins, ++ const SDLoc &dl, SelectionDAG &DAG, ++ SmallVectorImpl &InVals) const override; ++ ++ SDValue passArgOnStack(SDValue StackPtr, unsigned Offset, SDValue Chain, ++ SDValue Arg, const SDLoc &DL, bool IsTailCall, ++ SelectionDAG &DAG) const; ++ ++ SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, ++ SmallVectorImpl &InVals) const override; ++ ++ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, ++ bool isVarArg, ++ const SmallVectorImpl &Outs, ++ LLVMContext &Context) const override; ++ ++ SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, ++ const SmallVectorImpl &Outs, ++ const SmallVectorImpl &OutVals, ++ const SDLoc &dl, SelectionDAG &DAG) const override; ++ ++ bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override; ++ ++ // Inline asm support ++ ConstraintType getConstraintType(StringRef Constraint) const override; ++ ++ /// Examine constraint string and operand type and determine a weight value. ++ /// The operand object must already have been set up with the operand type. ++ ConstraintWeight getSingleConstraintMatchWeight( ++ AsmOperandInfo &info, const char *constraint) const override; ++ ++ /// This function parses registers that appear in inline-asm constraints. ++ /// It returns pair (0, 0) on failure. ++ std::pair ++ parseRegForInlineAsmConstraint(StringRef C, MVT VT) const; ++ ++ std::pair ++ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, ++ StringRef Constraint, MVT VT) const override; ++ ++ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops ++ /// vector. If it is invalid, don't add anything to Ops. If hasMemory is ++ /// true it means one of the asm constraint of the inline asm instruction ++ /// being processed is 'm'. ++ void LowerAsmOperandForConstraint(SDValue Op, ++ std::string &Constraint, ++ std::vector &Ops, ++ SelectionDAG &DAG) const override; ++ ++ unsigned ++ getInlineAsmMemConstraint(StringRef ConstraintCode) const override { ++ if (ConstraintCode == "R") ++ return InlineAsm::Constraint_R; ++ else if (ConstraintCode == "ZC") ++ return InlineAsm::Constraint_ZC; ++ else if (ConstraintCode == "ZB") ++ return InlineAsm::Constraint_ZB; ++ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); ++ } ++ ++ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, ++ Type *Ty, unsigned AS, ++ Instruction *I = nullptr) const override; ++ ++ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; ++ ++ EVT getOptimalMemOpType(const MemOp &Op, ++ const AttributeList &FuncAttributes) const override; ++ ++ /// isFPImmLegal - Returns true if the target can instruction select the ++ /// specified FP immediate natively. If false, the legalizer will ++ /// materialize the FP immediate as a load from a constant pool. ++ bool isFPImmLegal(const APFloat &Imm, EVT VT, ++ bool ForCodeSize) const override; ++ ++ bool useSoftFloat() const override; ++ ++ bool shouldInsertFencesForAtomic(const Instruction *I) const override { ++ return isa(I) || isa(I); ++ } ++ ++ /// Emit a sign-extension using sll/sra, seb, or seh appropriately. ++ MachineBasicBlock *emitSignExtendToI32InReg(MachineInstr &MI, ++ MachineBasicBlock *BB, ++ unsigned Size, unsigned DstReg, ++ unsigned SrcRec) const; ++ ++ MachineBasicBlock *emitLoadAddress(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ MachineBasicBlock *emitAtomicBinary(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr &MI, ++ MachineBasicBlock *BB, ++ unsigned Size) const; ++ ++ MachineBasicBlock *emitXINSERT_B(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ MachineBasicBlock *emitINSERT_H_VIDX(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ ++ MachineBasicBlock *emitAtomicCmpSwap(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr &MI, ++ MachineBasicBlock *BB, ++ unsigned Size) const; ++ MachineBasicBlock *emitSEL_D(MachineInstr &MI, MachineBasicBlock *BB) const; ++ ++ MachineBasicBlock *emitPseudoSELECT(MachineInstr &MI, MachineBasicBlock *BB, ++ bool isFPCmp, unsigned Opc) const; ++ ++ /// SE ++ MachineBasicBlock *emitLSXCBranchPseudo(MachineInstr &MI, ++ MachineBasicBlock *BB, ++ unsigned BranchOp) const; ++ /// Emit the COPY_FW pseudo instruction ++ MachineBasicBlock *emitCOPY_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ /// Emit the COPY_FD pseudo instruction ++ MachineBasicBlock *emitCOPY_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ ++ MachineBasicBlock *emitXCOPY_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ ++ MachineBasicBlock *emitXCOPY_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ ++ MachineBasicBlock *emitCONCAT_VECTORS(MachineInstr &MI, ++ MachineBasicBlock *BB, ++ unsigned Bytes) const; ++ ++ MachineBasicBlock *emitXCOPY_FW_GPR(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ ++ MachineBasicBlock *emitXINSERT_BH(MachineInstr &MI, MachineBasicBlock *BB, ++ unsigned EltSizeInBytes) const; ++ ++ MachineBasicBlock *emitXINSERT_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ ++ /// Emit the INSERT_FW pseudo instruction ++ MachineBasicBlock *emitINSERT_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ /// Emit the INSERT_FD pseudo instruction ++ MachineBasicBlock *emitINSERT_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ ++ MachineBasicBlock *emitXINSERT_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ ++ MachineBasicBlock *emitXINSERT_DF_VIDX(MachineInstr &MI, ++ MachineBasicBlock *BB, ++ bool IsGPR64) const; ++ /// Emit the FILL_FW pseudo instruction ++ MachineBasicBlock *emitFILL_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ /// Emit the FILL_FD pseudo instruction ++ MachineBasicBlock *emitFILL_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ ++ MachineBasicBlock *emitXFILL_FW(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ MachineBasicBlock *emitXFILL_FD(MachineInstr &MI, ++ MachineBasicBlock *BB) const; ++ }; ++ ++} // end namespace llvm ++ ++#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELLOWERING_H +diff --git a/lib/Target/LoongArch/LoongArchInstrFormats.td b/lib/Target/LoongArch/LoongArchInstrFormats.td +new file mode 100644 +index 00000000..d75d5198 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchInstrFormats.td +@@ -0,0 +1,790 @@ ++//===-- LoongArchInstrFormats.td - LoongArch Instruction Formats -----*- tablegen -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++//===----------------------------------------------------------------------===// ++// Describe LoongArch instructions format ++// ++// CPU INSTRUCTION FORMATS ++// ++// opcode - operation code. ++// rs - src reg. ++// rt - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr). ++// rd - dst reg, only used on 3 regs instr. ++// shamt - only used on shift instructions, contains the shift amount. ++// funct - combined with opcode field give us an operation code. ++// ++//===----------------------------------------------------------------------===// ++ ++class StdArch { ++ ++ bits<32> Inst; ++} ++ ++// Format specifies the encoding used by the instruction. This is part of the ++// ad-hoc solution used to emit machine instruction encodings by our machine ++// code emitter. ++class Format val> { ++ bits<4> Value = val; ++} ++ ++def Pseudo : Format<0>; ++def FrmR : Format<1>; ++def FrmI : Format<2>; ++def FrmJ : Format<3>; ++def FrmFR : Format<4>; ++def FrmFI : Format<5>; ++def FrmOther : Format<6>; ++ ++// Generic LoongArch Format ++class InstLA pattern, Format f> ++ : Instruction ++{ ++ field bits<32> Inst; ++ Format Form = f; ++ ++ let Namespace = "LoongArch"; ++ ++ let Size = 4; ++ ++ let OutOperandList = outs; ++ let InOperandList = ins; ++ let AsmString = asmstr; ++ let Pattern = pattern; ++ ++ // ++ // Attributes specific to LoongArch instructions... ++ // ++ bits<4> FormBits = Form.Value; ++ bit isCTI = 0; // Any form of Control Transfer Instruction. ++ // Required for LoongArch ++ bit hasForbiddenSlot = 0; // Instruction has a forbidden slot. ++ bit IsPCRelativeLoad = 0; // Load instruction with implicit source register ++ // ($pc) and with explicit offset and destination ++ // register ++ bit hasFCCRegOperand = 0; // Instruction uses $fcc register ++ ++ // TSFlags layout should be kept in sync with MCTargetDesc/LoongArchBaseInfo.h. ++ let TSFlags{3-0} = FormBits; ++ let TSFlags{4} = isCTI; ++ let TSFlags{5} = hasForbiddenSlot; ++ let TSFlags{6} = IsPCRelativeLoad; ++ let TSFlags{7} = hasFCCRegOperand; ++ ++ let DecoderNamespace = "LoongArch"; ++ ++ field bits<32> SoftFail = 0; ++} ++ ++class InstForm pattern, ++ Format f, string opstr = ""> : ++ InstLA { ++ string BaseOpcode = opstr; ++ string Arch; ++} ++ ++class LoongArch_str { ++ string Arch; ++ string BaseOpcode = opstr; ++} ++ ++//===-----------------------------------------------------------===// ++// Format instruction classes in the LoongArch ++//===-----------------------------------------------------------===// ++ ++// R2 classes: 2 registers ++// ++class R2 : StdArch { ++ bits<5> rj; ++ bits<5> rd; ++ ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++class R2I op> ++ : R2 { ++ let Inst{31-15} = 0x0; ++ let Inst{14-10} = op; ++} ++ ++class R2F op> ++ : R2 { ++ bits<5> fj; ++ bits<5> fd; ++ ++ let Inst{31-20} = 0x11; ++ let Inst{19-10} = op; ++ let Inst{9-5} = fj; ++ let Inst{4-0} = fd; ++} ++ ++class MOVFI op> ++ : R2 { ++ bits<5> rj; ++ bits<5> fd; ++ ++ let Inst{31-20} = 0x11; ++ let Inst{19-10} = op; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = fd; ++} ++ ++class MOVIF op> ++ : R2 { ++ bits<5> fj; ++ bits<5> rd; ++ ++ let Inst{31-20} = 0x11; ++ let Inst{19-10} = op; ++ let Inst{9-5} = fj; ++ let Inst{4-0} = rd; ++} ++ ++class R2P op> ++ : R2 { ++ let Inst{31-13} = 0x3240; ++ let Inst{12-10} = op; ++} ++ ++class R2_CSR op> ++ : StdArch { ++ bits<5> rj; ++ bits<5> rd; ++ bits<14> csr; ++ ++ let Inst{31-24} = op; ++ let Inst{23-10} = csr; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++class R2_SI16 op> ++ : StdArch { ++ bits<5> rd; ++ bits<5> rj; ++ bits<16> si16; ++ ++ let Inst{31-26} = op; ++ let Inst{25-10} = si16; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++class R2_COND op, bits<5> cond> ++ : StdArch { ++ bits<5> fj; ++ bits<5> fk; ++ bits<3> cd; ++ ++ let Inst{31-22} = 0x30; ++ let Inst{21-20} = op; ++ let Inst{19-15} = cond; ++ let Inst{14-10} = fk; ++ let Inst{9-5} = fj; ++ let Inst{4-3} = 0b00; ++ let Inst{2-0} = cd; ++} ++ ++class R2_LEVEL op> ++ : StdArch { ++ bits<5> rj; ++ bits<5> rd; ++ bits<8> level; ++ ++ let Inst{31-18} = op; ++ let Inst{17-10} = level; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++class IMM32 op> ++ : StdArch { ++ let Inst{31-16} = 0x0648; ++ let Inst{15-10} = op; ++ let Inst{9-0} = 0; ++} ++ ++class WAIT_FM : StdArch { ++ bits<15> hint; ++ ++ let Inst{31-15} = 0xc91; ++ let Inst{14-0} = hint; ++} ++ ++class R2_INVTLB : StdArch { ++ bits<5> rj; ++ bits<5> op; ++ bits<5> rk; ++ ++ let Inst{31-15} = 0xc93; ++ let Inst{14-10} = rk; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = op; ++} ++ ++class BAR_FM op> ++ : StdArch { ++ bits<15> hint; ++ ++ let Inst{31-16} = 0x3872; ++ let Inst{15} = op; ++ let Inst{14-0} = hint; ++} ++ ++class PRELD_FM : StdArch { ++ bits<5> rj; ++ bits<5> hint; ++ bits<12> imm12; ++ ++ let Inst{31-22} = 0xab; ++ let Inst{21-10} = imm12; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = hint; ++} ++ ++// R3 classes: 3 registers ++// ++class R3 : StdArch { ++ bits<5> rk; ++ bits<5> rj; ++ bits<5> rd; ++ ++ let Inst{14-10} = rk; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++class R3I op> ++ : R3 { ++ let Inst{31-22} = 0x0; ++ let Inst{21-15} = op; ++} ++ ++class R3F op> ++ : R3 { ++ bits<5> fk; ++ bits<5> fj; ++ bits<5> fd; ++ ++ let Inst{31-21} = 0x8; ++ let Inst{20-15} = op; ++ let Inst{14-10} = fk; ++ let Inst{9-5} = fj; ++ let Inst{4-0} = fd; ++} ++ ++class R3MI op> ++ : R3 { ++ let Inst{31-23} = 0x70; ++ let Inst{22-15} = op; ++} ++ ++class AM op> : StdArch { ++ bits<5> rk; ++ bits<17> addr; // rj + 12 bits offset 0 ++ bits<5> rd; ++ ++ let Inst{31-21} = 0x1c3; ++ let Inst{20-15} = op; ++ let Inst{14-10} = rk; ++ let Inst{9-5} = addr{16-12}; ++ let Inst{4-0} = rd; ++} ++ ++class R3MF op> ++ : R3 { ++ bits<5> fd; ++ ++ let Inst{31-23} = 0x70; ++ let Inst{22-15} = op; ++ let Inst{4-0} = fd; ++} ++ ++class R3_SA2 op> ++ : StdArch { ++ bits<5> rk; ++ bits<5> rj; ++ bits<5> rd; ++ bits<2> sa; ++ ++ let Inst{31-22} = 0x0; ++ let Inst{21-17} = op; ++ let Inst{16-15} = sa; ++ let Inst{14-10} = rk; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++class R3_SA3 : StdArch { ++ bits<5> rk; ++ bits<5> rj; ++ bits<5> rd; ++ bits<3> sa; ++ ++ let Inst{31-18} = 3; ++ let Inst{17-15} = sa; ++ let Inst{14-10} = rk; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++// R4 classes: 4 registers ++// ++class R4MUL op> ++ : StdArch { ++ bits<5> fa; ++ bits<5> fk; ++ bits<5> fj; ++ bits<5> fd; ++ ++ let Inst{31-24} = 0x8; ++ let Inst{23-20} = op; ++ let Inst{19-15} = fa; ++ let Inst{14-10} = fk; ++ let Inst{9-5} = fj; ++ let Inst{4-0} = fd; ++} ++ ++class R4CMP op> ++ : StdArch { ++ bits<5> cond; ++ bits<5> fk; ++ bits<5> fj; ++ bits<3> cd; ++ ++ let Inst{31-22} = 0x30; ++ let Inst{21-20} = op; ++ let Inst{19-15} = cond; ++ let Inst{14-10} = fk; ++ let Inst{9-5} = fj; ++ let Inst{4-3} = 0; ++ let Inst{2-0} = cd; ++} ++ ++class R4SEL : StdArch { ++ bits<3> ca; ++ bits<5> fk; ++ bits<5> fj; ++ bits<5> fd; ++ ++ let Inst{31-18} = 0x340; ++ let Inst{17-15} = ca; ++ let Inst{14-10} = fk; ++ let Inst{9-5} = fj; ++ let Inst{4-0} = fd; ++} ++ ++// R2_IMM5 classes: 2registers and 1 5bit-immediate ++// ++class R2_IMM5 op> ++ : StdArch { ++ bits<5> rj; ++ bits<5> rd; ++ bits<5> imm5; ++ ++ let Inst{31-20} = 0x4; ++ let Inst{19-18} = op; ++ let Inst{17-15} = 0x1; ++ let Inst{14-10} = imm5; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++// R2_IMM6 classes: 2registers and 1 6bit-immediate ++// ++class R2_IMM6 op> ++ : StdArch { ++ bits<5> rj; ++ bits<5> rd; ++ bits<6> imm6; ++ ++ let Inst{31-20} = 0x4; ++ let Inst{19-18} = op; ++ let Inst{17-16} = 0x1; ++ let Inst{15-10} = imm6; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++// R2_IMM12 classes: 2 registers and 1 12bit-immediate ++// ++class LOAD_STORE op> ++ : StdArch { ++ bits<5> rd; ++ bits<17> addr; ++ ++ let Inst{31-26} = 0xa; ++ let Inst{25-22} = op; ++ let Inst{21-10} = addr{11-0}; ++ let Inst{9-5} = addr{16-12}; ++ let Inst{4-0} = rd; ++} ++// for reloc ++class LOAD_STORE_RRI op> ++ : StdArch { ++ bits<5> rj; ++ bits<5> rd; ++ bits<12> imm12; ++ ++ let Inst{31-26} = 0xa; ++ let Inst{25-22} = op; ++ let Inst{21-10} = imm12; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++ ++class R2_IMM12 op> ++ : StdArch { ++ bits<5> rj; ++ bits<5> rd; ++ bits<12> imm12; ++ ++ let Inst{31-25} = 0x1; ++ let Inst{24-22} = op; ++ let Inst{21-10} = imm12; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++class LEA_ADDI_FM op> ++ : StdArch { ++ bits<5> rd; ++ bits<17> addr; ++ ++ let Inst{31-25} = 0x1; ++ let Inst{24-22} = op; ++ let Inst{21-10} = addr{11-0}; ++ let Inst{9-5} = addr{16-12}; ++ let Inst{4-0} = rd; ++} ++ ++// R2_IMM14 classes: 2 registers and 1 14bit-immediate ++// ++class LL_SC op> ++ : StdArch { ++ bits<5> rd; ++ bits<19> addr; ++ ++ let Inst{31-27} = 4; ++ let Inst{26-24} = op; ++ let Inst{23-10} = addr{13-0}; ++ let Inst{9-5} = addr{18-14}; ++ let Inst{4-0} = rd; ++} ++ ++// R2_IMM16 classes: 2 registers and 1 16bit-immediate ++// ++class R2_IMM16BEQ op> ++ : StdArch { ++ bits<5> rj; ++ bits<5> rd; ++ bits<16> offs16; ++ ++ let Inst{31-26} = op; ++ let Inst{25-10} = offs16; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++class R2_IMM16JIRL : StdArch { ++ bits<5> rj; ++ bits<5> rd; ++ bits<16> offs16; ++ ++ let Inst{31-26} = 0x13; ++ let Inst{25-10} = offs16; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++// R1_IMM21 classes: 1 registers and 1 21bit-immediate ++// ++class R1_IMM21BEQZ op> ++ : StdArch { ++ bits<5> rj; ++ bits<21> offs21; ++ ++ let Inst{31-26} = op; ++ let Inst{25-10} = offs21{15-0}; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = offs21{20-16}; ++} ++ ++class R1_CSR op> ++ : StdArch { ++ bits<5> rd; ++ bits<14> csr; ++ ++ let Inst{31-24} = op{7-0}; ++ let Inst{23-10} = csr; ++ let Inst{9-5} = op{12-8}; ++ let Inst{4-0} = rd; ++} ++ ++class R1_SI20 op> ++ : StdArch { ++ bits<5> rd; ++ bits<20> si20; ++ ++ let Inst{31-25} = op; ++ let Inst{24-5} = si20; ++ let Inst{4-0} = rd; ++} ++ ++class R1_CACHE : StdArch { ++ bits<5> rj; ++ bits<5> op; ++ bits<12> si12; ++ ++ let Inst{31-22} = 0x18; ++ let Inst{21-10} = si12; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = op; ++} ++ ++class R1_SEQ op> ++ : StdArch { ++ bits<5> rj; ++ bits<5> offset; ++ bits<8> seq; ++ ++ let Inst{31-18} = op; ++ let Inst{17-10} = seq; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = 0b00000; ++} ++ ++class R1_BCEQZ op> ++ : StdArch { ++ bits<21> offset; ++ bits<3> cj; ++ ++ let Inst{31-26} = 0x12; ++ let Inst{25-10} = offset{15-0}; ++ let Inst{9-8} = op; ++ let Inst{7-5} = cj; ++ let Inst{4-0} = offset{20-16}; ++} ++ ++// IMM26 classes: 1 26bit-immediate ++// ++class IMM26B op> ++ : StdArch { ++ bits<26> offs26; ++ ++ let Inst{31-26} = op; ++ let Inst{25-10} = offs26{15-0}; ++ let Inst{9-0} = offs26{25-16}; ++} ++ ++// LoongArch Pseudo Instructions Format ++class LoongArchPseudo pattern> : ++ InstLA { ++ let isCodeGenOnly = 1; ++ let isPseudo = 1; ++} ++ ++// Pseudo-instructions for alternate assembly syntax (never used by codegen). ++// These are aliases that require C++ handling to convert to the target ++// instruction, while InstAliases can be handled directly by tblgen. ++class LoongArchAsmPseudoInst: ++ InstLA { ++ let isPseudo = 1; ++ let Pattern = []; ++} ++ ++// ++// Misc instruction classes ++class ASSERT op> ++ : StdArch { ++ bits<5> rk; ++ bits<5> rj; ++ ++ let Inst{31-17} = 0x0; ++ let Inst{16-15} = op; ++ let Inst{14-10} = rk; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = 0x0; ++} ++ ++class CODE15 op> ++ : StdArch { ++ bits<15> Code; ++ ++ let Inst{31-22} = 0x0; ++ let Inst{21-15} = op; ++ let Inst{14-0} = Code; ++} ++ ++class INSERT_BIT32 op> ++ : StdArch { ++ bits<5> msbw; ++ bits<5> lsbw; ++ bits<5> rj; ++ bits<5> rd; ++ ++ let Inst{31-21} = 0x3; ++ let Inst{20-16} = msbw; ++ let Inst{15} = op; ++ let Inst{14-10} = lsbw; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++class INSERT_BIT64 op> ++ : StdArch { ++ bits<6> msbd; ++ bits<6> lsbd; ++ bits<5> rj; ++ bits<5> rd; ++ ++ let Inst{31-23} = 0x1; ++ let Inst{22} = op; ++ let Inst{21-16} = msbd; ++ let Inst{15-10} = lsbd; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++class MOVGPR2FCSR: StdArch { ++ bits<5> fcsr; ++ bits<5> rj; ++ ++ let Inst{31-10} = 0x4530; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = fcsr; ++} ++ ++class MOVFCSR2GPR: StdArch { ++ bits<5> fcsr; ++ bits<5> rd; ++ ++ let Inst{31-10} = 0x4532; ++ let Inst{9-5} = fcsr; ++ let Inst{4-0} = rd; ++} ++ ++class MOVFGR2FCFR: StdArch { ++ bits<3> cd; ++ bits<5> fj; ++ ++ let Inst{31-10} = 0x4534; ++ let Inst{9-5} = fj; ++ let Inst{4-3} = 0; ++ let Inst{2-0} = cd; ++} ++ ++class MOVFCFR2FGR: StdArch { ++ bits<3> cj; ++ bits<5> fd; ++ ++ let Inst{31-10} = 0x4535; ++ let Inst{9-8} = 0; ++ let Inst{7-5} = cj; ++ let Inst{4-0} = fd; ++} ++ ++class MOVGPR2FCFR: StdArch { ++ bits<3> cd; ++ bits<5> rj; ++ ++ let Inst{31-10} = 0x4536; ++ let Inst{9-5} = rj; ++ let Inst{4-3} = 0; ++ let Inst{2-0} = cd; ++} ++ ++class MOVFCFR2GPR: StdArch { ++ bits<3> cj; ++ bits<5> rd; ++ ++ let Inst{31-10} = 0x4537; ++ let Inst{9-8} = 0; ++ let Inst{7-5} = cj; ++ let Inst{4-0} = rd; ++} ++ ++class LoongArchInst : InstLA<(outs), (ins), "", [], FrmOther> { ++} ++class JMP_OFFS_2R op> : LoongArchInst { ++ bits<5> rs; ++ bits<5> rd; ++ bits<16> offset; ++ ++ bits<32> Inst; ++ ++ let Inst{31-26} = op; ++ let Inst{25-10} = offset; ++ let Inst{9-5} = rs; ++ let Inst{4-0} = rd; ++} ++ ++class FJ op> : StdArch ++{ ++ bits<26> target; ++ ++ let Inst{31-26} = op; ++ let Inst{25-10} = target{15-0}; ++ let Inst{9-0} = target{25-16}; ++} ++ ++class LUI_FM : StdArch { ++ bits<5> rt; ++ bits<16> imm16; ++ ++ let Inst{31-26} = 0xf; ++ let Inst{25-21} = 0; ++ let Inst{20-16} = rt; ++ let Inst{15-0} = imm16; ++} ++ ++class R2_IMM12M_STD op> : StdArch { ++ bits<5> rj; ++ bits<5> rd; ++ bits<12> imm12; ++ ++ let Inst{31-26} = 0xa; ++ let Inst{25-22} = op; ++ let Inst{21-10} = imm12; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++class LLD_2R Code> : LoongArchInst { ++ bits<5> rd; ++ bits<19> addr; ++ bits<5> rj = addr{18-14}; ++ bits<14> offset = addr{13-0}; ++ ++ bits<32> Inst; ++ ++ let Inst{31-27} = 0x4; ++ let Inst{26-24} = Code; ++ let Inst{23-10} = offset; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = rd; ++} ++ ++class CEQS_FM op> { ++ bits<5> fj; ++ bits<5> fk; ++ bits<3> cd; ++ bits<5> cond; ++ ++ bits<32> Inst; ++ ++ let Inst{31-22} = 0x30; ++ let Inst{21-20} = op; ++ let Inst{19-15} = cond; ++ let Inst{14-10} = fk; ++ let Inst{9-5} = fj; ++ let Inst{4-3} = 0b00; ++ let Inst{2-0} = cd; ++} ++ +diff --git a/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/lib/Target/LoongArch/LoongArchInstrInfo.cpp +new file mode 100644 +index 00000000..3c6b3334 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchInstrInfo.cpp +@@ -0,0 +1,1040 @@ ++//===- LoongArchInstrInfo.cpp - LoongArch Instruction Information -------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch implementation of the TargetInstrInfo class. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchInstrInfo.h" ++#include "LoongArchSubtarget.h" ++#include "MCTargetDesc/LoongArchAnalyzeImmediate.h" ++#include "MCTargetDesc/LoongArchBaseInfo.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "llvm/ADT/SmallVector.h" ++#include "llvm/CodeGen/MachineBasicBlock.h" ++#include "llvm/CodeGen/MachineFrameInfo.h" ++#include "llvm/CodeGen/MachineFunction.h" ++#include "llvm/CodeGen/MachineInstr.h" ++#include "llvm/CodeGen/MachineInstrBuilder.h" ++#include "llvm/CodeGen/MachineOperand.h" ++#include "llvm/CodeGen/RegisterScavenging.h" ++#include "llvm/CodeGen/TargetOpcodes.h" ++#include "llvm/CodeGen/TargetSubtargetInfo.h" ++#include "llvm/IR/DebugLoc.h" ++#include "llvm/MC/MCInstrDesc.h" ++#include "llvm/Target/TargetMachine.h" ++#include ++ ++using namespace llvm; ++ ++#define GET_INSTRINFO_CTOR_DTOR ++#include "LoongArchGenInstrInfo.inc" ++ ++// Pin the vtable to this file. ++void LoongArchInstrInfo::anchor() {} ++LoongArchInstrInfo::LoongArchInstrInfo(const LoongArchSubtarget &STI) ++ : LoongArchGenInstrInfo(LoongArch::ADJCALLSTACKDOWN, ++ LoongArch::ADJCALLSTACKUP), ++ RI(), Subtarget(STI) {} ++ ++const LoongArchRegisterInfo &LoongArchInstrInfo::getRegisterInfo() const { ++ return RI; ++} ++ ++/// isLoadFromStackSlot - If the specified machine instruction is a direct ++/// load from a stack slot, return the virtual or physical register number of ++/// the destination along with the FrameIndex of the loaded stack slot. If ++/// not, return 0. This predicate must return 0 if the instruction has ++/// any side effects other than loading from the stack slot. ++unsigned LoongArchInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, ++ int &FrameIndex) const { ++ unsigned Opc = MI.getOpcode(); ++ if ((Opc == LoongArch::LD_W) || (Opc == LoongArch::LD_D) || ++ (Opc == LoongArch::FLD_S) || (Opc == LoongArch::FLD_D)) { ++ if ((MI.getOperand(1).isFI()) && // is a stack slot ++ (MI.getOperand(2).isImm()) && // the imm is zero ++ (isZeroImm(MI.getOperand(2)))) { ++ FrameIndex = MI.getOperand(1).getIndex(); ++ return MI.getOperand(0).getReg(); ++ } ++ } ++ return 0; ++} ++ ++/// isStoreToStackSlot - If the specified machine instruction is a direct ++/// store to a stack slot, return the virtual or physical register number of ++/// the source reg along with the FrameIndex of the loaded stack slot. If ++/// not, return 0. This predicate must return 0 if the instruction has ++/// any side effects other than storing to the stack slot. ++unsigned LoongArchInstrInfo::isStoreToStackSlot(const MachineInstr &MI, ++ int &FrameIndex) const { ++ unsigned Opc = MI.getOpcode(); ++ if ((Opc == LoongArch::ST_D) || (Opc == LoongArch::ST_W) || ++ (Opc == LoongArch::FST_S) ||(Opc == LoongArch::FST_D)) { ++ if ((MI.getOperand(1).isFI()) && // is a stack slot ++ (MI.getOperand(2).isImm()) && // the imm is zero ++ (isZeroImm(MI.getOperand(2)))) { ++ FrameIndex = MI.getOperand(1).getIndex(); ++ return MI.getOperand(0).getReg(); ++ } ++ } ++ return 0; ++} ++ ++void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator I, ++ const DebugLoc &DL, MCRegister DestReg, ++ MCRegister SrcReg, bool KillSrc) const { ++ unsigned Opc = 0, ZeroReg = 0; ++ unsigned ZeroImm = 1; ++ if (LoongArch::GPR32RegClass.contains(DestReg)) { // Copy to CPU Reg. ++ if (LoongArch::GPR32RegClass.contains(SrcReg)) { ++ Opc = LoongArch::OR32, ZeroReg = LoongArch::ZERO; ++ } ++ else if (LoongArch::FGR32RegClass.contains(SrcReg)) ++ Opc = LoongArch::MOVFR2GR_S; ++ else if (LoongArch::FCFRRegClass.contains(SrcReg)) ++ Opc = LoongArch::MOVCF2GR; ++ } ++ else if (LoongArch::GPR32RegClass.contains(SrcReg)) { // Copy from CPU Reg. ++ if (LoongArch::FGR32RegClass.contains(DestReg)) ++ Opc = LoongArch::MOVGR2FR_W; ++ else if (LoongArch::FCFRRegClass.contains(DestReg)) ++ Opc = LoongArch::MOVGR2CF; ++ } ++ else if (LoongArch::FGR32RegClass.contains(DestReg, SrcReg)) ++ Opc = LoongArch::FMOV_S; ++ else if (LoongArch::FGR64RegClass.contains(DestReg, SrcReg)) ++ Opc = LoongArch::FMOV_D; ++ else if (LoongArch::GPR64RegClass.contains(DestReg)) { // Copy to CPU64 Reg. ++ if (LoongArch::GPR64RegClass.contains(SrcReg)) ++ Opc = LoongArch::OR, ZeroReg = LoongArch::ZERO_64; ++ else if (LoongArch::FGR64RegClass.contains(SrcReg)) ++ Opc = LoongArch::MOVFR2GR_D; ++ else if (LoongArch::FCFRRegClass.contains(SrcReg)) ++ Opc = LoongArch::MOVCF2GR; ++ } ++ else if (LoongArch::GPR64RegClass.contains(SrcReg)) { // Copy from CPU64 Reg. ++ if (LoongArch::FGR64RegClass.contains(DestReg)) ++ Opc = LoongArch::MOVGR2FR_D; ++ else if (LoongArch::FCFRRegClass.contains(DestReg)) ++ Opc = LoongArch::MOVGR2CF; ++ } ++ else if (LoongArch::FGR32RegClass.contains(DestReg)) // Copy to FGR32 Reg ++ Opc = LoongArch::MOVCF2FR; ++ else if (LoongArch::FGR32RegClass.contains(SrcReg)) // Copy from FGR32 Reg ++ Opc = LoongArch::MOVFR2CF; ++ else if (LoongArch::FGR64RegClass.contains(DestReg)) // Copy to FGR64 Reg ++ Opc = LoongArch::MOVCF2FR; ++ else if (LoongArch::FGR64RegClass.contains(SrcReg)) // Copy from FGR64 Reg ++ Opc = LoongArch::MOVFR2CF; ++ else if (LoongArch::LSX128BRegClass.contains(DestReg)) { // Copy to LSX reg ++ if (LoongArch::LSX128BRegClass.contains(SrcReg)) ++ Opc = LoongArch::VORI_B, ZeroImm = 0; ++ } else if (LoongArch::LASX256BRegClass.contains( ++ DestReg)) { // Copy to LASX reg ++ if (LoongArch::LASX256BRegClass.contains(SrcReg)) ++ Opc = LoongArch::XVORI_B, ZeroImm = 0; ++ } ++ ++ assert(Opc && "Cannot copy registers"); ++ ++ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc)); ++ ++ if (DestReg) ++ MIB.addReg(DestReg, RegState::Define); ++ ++ if (SrcReg) ++ MIB.addReg(SrcReg, getKillRegState(KillSrc)); ++ ++ if (ZeroReg) ++ MIB.addReg(ZeroReg); ++ ++ if (!ZeroImm) ++ MIB.addImm(0); ++} ++ ++static bool isORCopyInst(const MachineInstr &MI) { ++ switch (MI.getOpcode()) { ++ default: ++ break; ++ case LoongArch::OR: ++ if (MI.getOperand(2).getReg() == LoongArch::ZERO_64) ++ return true; ++ break; ++ case LoongArch::OR32: ++ if (MI.getOperand(2).getReg() == LoongArch::ZERO) ++ return true; ++ break; ++ } ++ return false; ++} ++ ++/// We check for the common case of 'or', as it's LoongArch' preferred instruction ++/// for GPRs but we have to check the operands to ensure that is the case. ++/// Other move instructions for LoongArch are directly identifiable. ++Optional ++LoongArchInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { ++ if (MI.isMoveReg() || isORCopyInst(MI)) { ++ return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; ++ } ++ return None; ++} ++ ++void LoongArchInstrInfo:: ++storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ++ Register SrcReg, bool isKill, int FI, ++ const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, ++ int64_t Offset) const { ++ DebugLoc DL; ++ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); ++ ++ unsigned Opc = 0; ++ if (LoongArch::GPR32RegClass.hasSubClassEq(RC)) ++ Opc = LoongArch::ST_W; ++ else if (LoongArch::GPR64RegClass.hasSubClassEq(RC)) ++ Opc = LoongArch::ST_D; ++ else if (LoongArch::FGR64RegClass.hasSubClassEq(RC)) ++ Opc = LoongArch::FST_D; ++ else if (LoongArch::FGR32RegClass.hasSubClassEq(RC)) ++ Opc = LoongArch::FST_S; ++ ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) ++ Opc = LoongArch::VST; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16)) ++ Opc = LoongArch::VST_H; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || ++ TRI->isTypeLegalForClass(*RC, MVT::v4f32)) ++ Opc = LoongArch::VST_W; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || ++ TRI->isTypeLegalForClass(*RC, MVT::v2f64)) ++ Opc = LoongArch::VST_D; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v32i8)) ++ Opc = LoongArch::XVST; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v16i16)) ++ Opc = LoongArch::XVST_H; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v8i32) || ++ TRI->isTypeLegalForClass(*RC, MVT::v8f32)) ++ Opc = LoongArch::XVST_W; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v4i64) || ++ TRI->isTypeLegalForClass(*RC, MVT::v4f64)) ++ Opc = LoongArch::XVST_D; ++ ++ assert(Opc && "Register class not handled!"); ++ BuildMI(MBB, I, DL, get(Opc)) ++ .addReg(SrcReg, getKillRegState(isKill)) ++ .addFrameIndex(FI) ++ .addImm(Offset) ++ .addMemOperand(MMO); ++} ++ ++void LoongArchInstrInfo:: ++loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ++ Register DestReg, int FI, const TargetRegisterClass *RC, ++ const TargetRegisterInfo *TRI, int64_t Offset) const { ++ DebugLoc DL; ++ if (I != MBB.end()) ++ DL = I->getDebugLoc(); ++ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); ++ unsigned Opc = 0; ++ ++ if (LoongArch::GPR32RegClass.hasSubClassEq(RC)) ++ Opc = LoongArch::LD_W; ++ else if (LoongArch::GPR64RegClass.hasSubClassEq(RC)) ++ Opc = LoongArch::LD_D; ++ else if (LoongArch::FGR32RegClass.hasSubClassEq(RC)) ++ Opc = LoongArch::FLD_S; ++ else if (LoongArch::FGR64RegClass.hasSubClassEq(RC)) ++ Opc = LoongArch::FLD_D; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) ++ Opc = LoongArch::VLD; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16)) ++ Opc = LoongArch::VLD_H; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || ++ TRI->isTypeLegalForClass(*RC, MVT::v4f32)) ++ Opc = LoongArch::VLD_W; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || ++ TRI->isTypeLegalForClass(*RC, MVT::v2f64)) ++ Opc = LoongArch::VLD_D; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v32i8)) ++ Opc = LoongArch::XVLD; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v16i16)) ++ Opc = LoongArch::XVLD_H; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v8i32) || ++ TRI->isTypeLegalForClass(*RC, MVT::v8f32)) ++ Opc = LoongArch::XVLD_W; ++ else if (TRI->isTypeLegalForClass(*RC, MVT::v4i64) || ++ TRI->isTypeLegalForClass(*RC, MVT::v4f64)) ++ Opc = LoongArch::XVLD_D; ++ ++ assert(Opc && "Register class not handled!"); ++ ++ BuildMI(MBB, I, DL, get(Opc), DestReg) ++ .addFrameIndex(FI) ++ .addImm(Offset) ++ .addMemOperand(MMO); ++} ++ ++bool LoongArchInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { ++ MachineBasicBlock &MBB = *MI.getParent(); ++ switch (MI.getDesc().getOpcode()) { ++ default: ++ return false; ++ case LoongArch::RetRA: ++ expandRetRA(MBB, MI); ++ break; ++ case LoongArch::ERet: ++ expandERet(MBB, MI); ++ break; ++ case LoongArch::PseudoFFINT_S_W: ++ expandCvtFPInt(MBB, MI, LoongArch::FFINT_S_W, LoongArch::MOVGR2FR_W, false); ++ break; ++ case LoongArch::PseudoFFINT_S_L: ++ expandCvtFPInt(MBB, MI, LoongArch::FFINT_S_L, LoongArch::MOVGR2FR_D, true); ++ break; ++ case LoongArch::PseudoFFINT_D_W: ++ expandCvtFPInt(MBB, MI, LoongArch::FFINT_D_W, LoongArch::MOVGR2FR_W, true); ++ break; ++ case LoongArch::PseudoFFINT_D_L: ++ expandCvtFPInt(MBB, MI, LoongArch::FFINT_D_L, LoongArch::MOVGR2FR_D, true); ++ break; ++ case LoongArch::LoongArcheh_return32: ++ case LoongArch::LoongArcheh_return64: ++ expandEhReturn(MBB, MI); ++ break; ++ } ++ ++ MBB.erase(MI); ++ return true; ++} ++ ++/// getOppositeBranchOpc - Return the inverse of the specified ++/// opcode, e.g. turning BEQ to BNE. ++unsigned LoongArchInstrInfo::getOppositeBranchOpc(unsigned Opc) const { ++ switch (Opc) { ++ default: llvm_unreachable("Illegal opcode!"); ++ case LoongArch::BEQ32: return LoongArch::BNE32; ++ case LoongArch::BEQ: return LoongArch::BNE; ++ case LoongArch::BNE32: return LoongArch::BEQ32; ++ case LoongArch::BNE: return LoongArch::BEQ; ++ case LoongArch::BEQZ32: return LoongArch::BNEZ32; ++ case LoongArch::BEQZ: return LoongArch::BNEZ; ++ case LoongArch::BNEZ32: return LoongArch::BEQZ32; ++ case LoongArch::BNEZ: return LoongArch::BEQZ; ++ case LoongArch::BCEQZ: return LoongArch::BCNEZ; ++ case LoongArch::BCNEZ: return LoongArch::BCEQZ; ++ case LoongArch::BLT32: return LoongArch::BGE32; ++ case LoongArch::BLT: return LoongArch::BGE; ++ case LoongArch::BGE32: return LoongArch::BLT32; ++ case LoongArch::BGE: return LoongArch::BLT; ++ case LoongArch::BLTU32: return LoongArch::BGEU32; ++ case LoongArch::BLTU: return LoongArch::BGEU; ++ case LoongArch::BGEU32: return LoongArch::BLTU32; ++ case LoongArch::BGEU: return LoongArch::BLTU; ++ } ++} ++ ++void LoongArchInstrInfo::adjustReg(unsigned DestReg, unsigned SrcReg, ++ int64_t Amount, MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator I, ++ MachineInstr::MIFlag Flag) const { ++ LoongArchABIInfo ABI = Subtarget.getABI(); ++ DebugLoc DL; ++ unsigned ADDI = ABI.GetPtrAddiOp(); ++ ++ if (Amount == 0) ++ return; ++ ++ if (isInt<12>(Amount)) { ++ // addi $DestReg, $SrcReg, amount ++ BuildMI(MBB, I, DL, get(ADDI), DestReg) ++ .addReg(SrcReg) ++ .addImm(Amount) ++ .setMIFlag(Flag); ++ } else { ++ // For numbers which are not 12bit integers we synthesize Amount inline ++ // then add or subtract it from $SrcReg. ++ unsigned Opc = ABI.GetPtrAddOp(); ++ if (Amount < 0) { ++ Opc = ABI.GetPtrSubOp(); ++ Amount = -Amount; ++ } ++ unsigned Reg = loadImmediate(Amount, MBB, I, DL); ++ BuildMI(MBB, I, DL, get(Opc), DestReg) ++ .addReg(SrcReg) ++ .addReg(Reg, RegState::Kill) ++ .setMIFlag(Flag); ++ } ++} ++ ++/// This function generates the sequence of instructions needed to get the ++/// result of adding register REG and immediate IMM. ++unsigned LoongArchInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator II, ++ const DebugLoc &DL) const { ++ const TargetRegisterClass *RC = Subtarget.isABI_LP64() ++ ? &LoongArch::GPR64RegClass ++ : &LoongArch::GPR32RegClass; ++ LoongArchAnalyzeImmediate::InstSeq Seq = ++ LoongArchAnalyzeImmediate::generateInstSeq(Imm, Subtarget.is64Bit()); ++ unsigned DstReg = MBB.getParent()->getRegInfo().createVirtualRegister(RC); ++ unsigned SrcReg = ++ Subtarget.isABI_LP64() ? LoongArch::ZERO_64 : LoongArch::ZERO; ++ ++ // Build the instructions in Seq. ++ for (auto &Inst : Seq) { ++ if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32) ++ BuildMI(MBB, II, DL, get(Inst.Opc), DstReg).addImm(Inst.Imm); ++ else ++ BuildMI(MBB, II, DL, get(Inst.Opc), DstReg) ++ .addReg(SrcReg, RegState::Kill) ++ .addImm(Inst.Imm); ++ SrcReg = DstReg; ++ } ++ return DstReg; ++} ++ ++unsigned LoongArchInstrInfo::getAnalyzableBrOpc(unsigned Opc) const { ++ return (Opc == LoongArch::B || Opc == LoongArch::B32 || ++ Opc == LoongArch::BEQZ || Opc == LoongArch::BEQZ32 || ++ Opc == LoongArch::BNEZ || Opc == LoongArch::BNEZ32 || ++ Opc == LoongArch::BCEQZ || ++ Opc == LoongArch::BCNEZ || ++ Opc == LoongArch::BEQ || Opc == LoongArch::BEQ32 || ++ Opc == LoongArch::BNE || Opc == LoongArch::BNE32 || ++ Opc == LoongArch::BLT || Opc == LoongArch::BLT32 || ++ Opc == LoongArch::BGE || Opc == LoongArch::BGE32 || ++ Opc == LoongArch::BLTU || Opc == LoongArch::BLTU32 || ++ Opc == LoongArch::BGEU || Opc == LoongArch::BGEU32) ? Opc : 0; ++} ++ ++void LoongArchInstrInfo::expandRetRA(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator I) const { ++ ++ MachineInstrBuilder MIB; ++ ++ if (Subtarget.is64Bit()) ++ MIB = BuildMI(MBB, I, I->getDebugLoc(), get(LoongArch::PseudoReturn64)) ++ .addReg(LoongArch::RA_64, RegState::Undef); ++ else ++ MIB = BuildMI(MBB, I, I->getDebugLoc(), get(LoongArch::PseudoReturn)) ++ .addReg(LoongArch::RA, RegState::Undef); ++ ++ // Retain any imp-use flags. ++ for (auto & MO : I->operands()) { ++ if (MO.isImplicit()) ++ MIB.add(MO); ++ } ++} ++ ++void LoongArchInstrInfo::expandERet(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator I) const { ++ BuildMI(MBB, I, I->getDebugLoc(), get(LoongArch::ERTN)); ++} ++ ++std::pair ++LoongArchInstrInfo::compareOpndSize(unsigned Opc, ++ const MachineFunction &MF) const { ++ const MCInstrDesc &Desc = get(Opc); ++ assert(Desc.NumOperands == 2 && "Unary instruction expected."); ++ const LoongArchRegisterInfo *RI = &getRegisterInfo(); ++ unsigned DstRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 0, RI, MF)); ++ unsigned SrcRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 1, RI, MF)); ++ ++ return std::make_pair(DstRegSize > SrcRegSize, DstRegSize < SrcRegSize); ++} ++ ++void LoongArchInstrInfo::expandCvtFPInt(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator I, ++ unsigned CvtOpc, unsigned MovOpc, ++ bool IsI64) const { ++ const MCInstrDesc &CvtDesc = get(CvtOpc), &MovDesc = get(MovOpc); ++ const MachineOperand &Dst = I->getOperand(0), &Src = I->getOperand(1); ++ unsigned DstReg = Dst.getReg(), SrcReg = Src.getReg(), TmpReg = DstReg; ++ unsigned KillSrc = getKillRegState(Src.isKill()); ++ DebugLoc DL = I->getDebugLoc(); ++ bool DstIsLarger, SrcIsLarger; ++ ++ std::tie(DstIsLarger, SrcIsLarger) = ++ compareOpndSize(CvtOpc, *MBB.getParent()); ++ ++ if (DstIsLarger) ++ TmpReg = getRegisterInfo().getSubReg(DstReg, LoongArch::sub_lo); ++ ++ if (SrcIsLarger) ++ DstReg = getRegisterInfo().getSubReg(DstReg, LoongArch::sub_lo); ++ ++ BuildMI(MBB, I, DL, MovDesc, TmpReg).addReg(SrcReg, KillSrc); ++ BuildMI(MBB, I, DL, CvtDesc, DstReg).addReg(TmpReg, RegState::Kill); ++} ++ ++void LoongArchInstrInfo::expandEhReturn(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator I) const { ++ // This pseudo instruction is generated as part of the lowering of ++ // ISD::EH_RETURN. We convert it to a stack increment by OffsetReg, and ++ // indirect jump to TargetReg ++ LoongArchABIInfo ABI = Subtarget.getABI(); ++ unsigned ADD = ABI.GetPtrAddOp(); ++ unsigned SP = Subtarget.is64Bit() ? LoongArch::SP_64 : LoongArch::SP; ++ unsigned RA = Subtarget.is64Bit() ? LoongArch::RA_64 : LoongArch::RA; ++ unsigned T8 = Subtarget.is64Bit() ? LoongArch::T8_64 : LoongArch::T8; ++ unsigned ZERO = Subtarget.is64Bit() ? LoongArch::ZERO_64 : LoongArch::ZERO; ++ unsigned OffsetReg = I->getOperand(0).getReg(); ++ unsigned TargetReg = I->getOperand(1).getReg(); ++ ++ // add $ra, $v0, $zero ++ // add $sp, $sp, $v1 ++ // jr $ra (via RetRA) ++ const TargetMachine &TM = MBB.getParent()->getTarget(); ++ if (TM.isPositionIndependent()) ++ BuildMI(MBB, I, I->getDebugLoc(), get(ADD), T8) ++ .addReg(TargetReg) ++ .addReg(ZERO); ++ BuildMI(MBB, I, I->getDebugLoc(), get(ADD), RA) ++ .addReg(TargetReg) ++ .addReg(ZERO); ++ BuildMI(MBB, I, I->getDebugLoc(), get(ADD), SP).addReg(SP).addReg(OffsetReg); ++ expandRetRA(MBB, I); ++} ++ ++ ++bool LoongArchInstrInfo::isZeroImm(const MachineOperand &op) const { ++ return op.isImm() && op.getImm() == 0; ++} ++ ++/// insertNoop - If data hazard condition is found insert the target nop ++/// instruction. ++// FIXME: This appears to be dead code. ++void LoongArchInstrInfo:: ++insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const ++{ ++ DebugLoc DL; ++ BuildMI(MBB, MI, DL, get(LoongArch::NOP)); ++} ++ ++MachineMemOperand * ++LoongArchInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI, ++ MachineMemOperand::Flags Flags) const { ++ MachineFunction &MF = *MBB.getParent(); ++ MachineFrameInfo &MFI = MF.getFrameInfo(); ++ ++ return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), ++ Flags, MFI.getObjectSize(FI), ++ MFI.getObjectAlign(FI)); ++} ++ ++//===----------------------------------------------------------------------===// ++// Branch Analysis ++//===----------------------------------------------------------------------===// ++ ++void LoongArchInstrInfo::AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, ++ MachineBasicBlock *&BB, ++ SmallVectorImpl &Cond) const { ++ assert(getAnalyzableBrOpc(Opc) && "Not an analyzable branch"); ++ int NumOp = Inst->getNumExplicitOperands(); ++ ++ // for both int and fp branches, the last explicit operand is the ++ // MBB. ++ BB = Inst->getOperand(NumOp-1).getMBB(); ++ Cond.push_back(MachineOperand::CreateImm(Opc)); ++ ++ for (int i = 0; i < NumOp-1; i++) ++ Cond.push_back(Inst->getOperand(i)); ++} ++ ++bool LoongArchInstrInfo::analyzeBranch(MachineBasicBlock &MBB, ++ MachineBasicBlock *&TBB, ++ MachineBasicBlock *&FBB, ++ SmallVectorImpl &Cond, ++ bool AllowModify) const { ++ SmallVector BranchInstrs; ++ BranchType BT = analyzeBranch(MBB, TBB, FBB, Cond, AllowModify, BranchInstrs); ++ ++ return (BT == BT_None) || (BT == BT_Indirect); ++} ++ ++MachineInstr * ++LoongArchInstrInfo::BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, ++ const DebugLoc &DL, ++ ArrayRef Cond) const { ++ unsigned Opc = Cond[0].getImm(); ++ const MCInstrDesc &MCID = get(Opc); ++ MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID); ++ ++ for (unsigned i = 1; i < Cond.size(); ++i) { ++ assert((Cond[i].isImm() || Cond[i].isReg()) && ++ "Cannot copy operand for conditional branch!"); ++ MIB.add(Cond[i]); ++ } ++ MIB.addMBB(TBB); ++ return MIB.getInstr(); ++} ++ ++unsigned LoongArchInstrInfo::insertBranch(MachineBasicBlock &MBB, ++ MachineBasicBlock *TBB, ++ MachineBasicBlock *FBB, ++ ArrayRef Cond, ++ const DebugLoc &DL, ++ int *BytesAdded) const { ++ unsigned UncondBrOpc = LoongArch::B; ++ // Shouldn't be a fall through. ++ assert(TBB && "insertBranch must not be told to insert a fallthrough"); ++ if (BytesAdded) ++ *BytesAdded = 0; ++ ++ // # of condition operands: ++ // Unconditional branches: 0 ++ // Floating point branches: 1 (opc) ++ // Int BranchZero: 2 (opc, reg) ++ // Int Branch: 3 (opc, reg0, reg1) ++ assert((Cond.size() <= 3) && ++ "# of LoongArch branch conditions must be <= 3!"); ++ ++ // Two-way Conditional branch. ++ if (FBB) { ++ MachineInstr &MI1 = *BuildCondBr(MBB, TBB, DL, Cond); ++ if (BytesAdded) ++ *BytesAdded += getInstSizeInBytes(MI1); ++ MachineInstr &MI2 = *BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(FBB); ++ if (BytesAdded) ++ *BytesAdded += getInstSizeInBytes(MI2); ++ return 2; ++ } ++ ++ // One way branch. ++ // Unconditional branch. ++ if (Cond.empty()) { ++ MachineInstr &MI = *BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(TBB); ++ if (BytesAdded) ++ *BytesAdded += getInstSizeInBytes(MI); ++ } ++ else {// Conditional branch. ++ MachineInstr &MI = *BuildCondBr(MBB, TBB, DL, Cond); ++ if (BytesAdded) ++ *BytesAdded += getInstSizeInBytes(MI); ++ } ++ return 1; ++} ++ ++void LoongArchInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, ++ MachineBasicBlock &DestBB, ++ MachineBasicBlock &RestoreBB, ++ const DebugLoc &DL, ++ int64_t BrOffset, ++ RegScavenger *RS) const { ++ assert(RS && "RegScavenger required for long branching"); ++ assert(MBB.empty() && ++ "new block should be inserted for expanding unconditional branch"); ++ assert(MBB.pred_size() == 1); ++ ++ MachineFunction *MF = MBB.getParent(); ++ MachineRegisterInfo &MRI = MF->getRegInfo(); ++ const LoongArchSubtarget &Subtarget = MF->getSubtarget(); ++ bool is64 = Subtarget.isABI_LP64(); ++ const TargetRegisterClass *RC = ++ is64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; ++ ++ if (!is64 && !isInt<32>(BrOffset)) ++ report_fatal_error( ++ "Branch offsets outside of the signed 32-bit range not supported"); ++ ++ unsigned ScratchReg = MRI.createVirtualRegister(RC); ++ unsigned ZeroReg = is64 ? LoongArch::ZERO_64 : LoongArch::ZERO; ++ auto II = MBB.end(); ++ ++ MachineInstr &Pcaddu12iMI = ++ *BuildMI(MBB, II, DL, get(LoongArch::LONG_BRANCH_PCADDU12I), ScratchReg) ++ .addMBB(&DestBB, LoongArchII::MO_PCREL_HI); ++ BuildMI(MBB, II, DL, get(LoongArch::LONG_BRANCH_ADDID2Op), ScratchReg) ++ .addReg(ScratchReg) ++ .addMBB(&DestBB, LoongArchII::MO_PCREL_LO); ++ BuildMI(MBB, II, DL, get(LoongArch::JIRL)) ++ .addReg(ZeroReg) ++ .addReg(ScratchReg, RegState::Kill) ++ .addImm(0); ++ RS->enterBasicBlockEnd(MBB); ++ unsigned Scav = RS->scavengeRegisterBackwards( ++ *RC, MachineBasicBlock::iterator(Pcaddu12iMI), false, 0); ++ MRI.replaceRegWith(ScratchReg, Scav); ++ MRI.clearVirtRegs(); ++ RS->setRegUsed(Scav); ++} ++ ++unsigned LoongArchInstrInfo::removeBranch(MachineBasicBlock &MBB, ++ int *BytesRemoved) const { ++ if (BytesRemoved) ++ *BytesRemoved = 0; ++ ++ MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); ++ unsigned removed = 0; ++ ++ // Up to 2 branches are removed. ++ // Note that indirect branches are not removed. ++ while (I != REnd && removed < 2) { ++ // Skip past debug instructions. ++ if (I->isDebugInstr()) { ++ ++I; ++ continue; ++ } ++ if (!getAnalyzableBrOpc(I->getOpcode())) ++ break; ++ // Remove the branch. ++ I->eraseFromParent(); ++ if (BytesRemoved) ++ *BytesRemoved += getInstSizeInBytes(*I); ++ I = MBB.rbegin(); ++ ++removed; ++ } ++ ++ return removed; ++} ++ ++/// reverseBranchCondition - Return the inverse opcode of the ++/// specified Branch instruction. ++bool LoongArchInstrInfo::reverseBranchCondition( ++ SmallVectorImpl &Cond) const { ++ assert( (Cond.size() && Cond.size() <= 3) && ++ "Invalid LoongArch branch condition!"); ++ Cond[0].setImm(getOppositeBranchOpc(Cond[0].getImm())); ++ return false; ++} ++ ++LoongArchInstrInfo::BranchType LoongArchInstrInfo::analyzeBranch( ++ MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, ++ SmallVectorImpl &Cond, bool AllowModify, ++ SmallVectorImpl &BranchInstrs) const { ++ MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); ++ ++ // Skip all the debug instructions. ++ while (I != REnd && I->isDebugInstr()) ++ ++I; ++ ++ if (I == REnd || !isUnpredicatedTerminator(*I)) { ++ // This block ends with no branches (it just falls through to its succ). ++ // Leave TBB/FBB null. ++ TBB = FBB = nullptr; ++ return BT_NoBranch; ++ } ++ ++ MachineInstr *LastInst = &*I; ++ unsigned LastOpc = LastInst->getOpcode(); ++ BranchInstrs.push_back(LastInst); ++ ++ // Not an analyzable branch (e.g., indirect jump). ++ if (!getAnalyzableBrOpc(LastOpc)) ++ return LastInst->isIndirectBranch() ? BT_Indirect : BT_None; ++ ++ // Get the second to last instruction in the block. ++ unsigned SecondLastOpc = 0; ++ MachineInstr *SecondLastInst = nullptr; ++ ++ // Skip past any debug instruction to see if the second last actual ++ // is a branch. ++ ++I; ++ while (I != REnd && I->isDebugInstr()) ++ ++I; ++ ++ if (I != REnd) { ++ SecondLastInst = &*I; ++ SecondLastOpc = getAnalyzableBrOpc(SecondLastInst->getOpcode()); ++ ++ // Not an analyzable branch (must be an indirect jump). ++ if (isUnpredicatedTerminator(*SecondLastInst) && !SecondLastOpc) ++ return BT_None; ++ } ++ ++ // If there is only one terminator instruction, process it. ++ if (!SecondLastOpc) { ++ // Unconditional branch. ++ if (LastInst->isUnconditionalBranch()) { ++ TBB = LastInst->getOperand(0).getMBB(); ++ return BT_Uncond; ++ } ++ ++ // Conditional branch ++ AnalyzeCondBr(LastInst, LastOpc, TBB, Cond); ++ return BT_Cond; ++ } ++ ++ // If we reached here, there are two branches. ++ // If there are three terminators, we don't know what sort of block this is. ++ if (++I != REnd && isUnpredicatedTerminator(*I)) ++ return BT_None; ++ ++ BranchInstrs.insert(BranchInstrs.begin(), SecondLastInst); ++ ++ // If second to last instruction is an unconditional branch, ++ // analyze it and remove the last instruction. ++ if (SecondLastInst->isUnconditionalBranch()) { ++ // Return if the last instruction cannot be removed. ++ if (!AllowModify) ++ return BT_None; ++ ++ TBB = SecondLastInst->getOperand(0).getMBB(); ++ LastInst->eraseFromParent(); ++ BranchInstrs.pop_back(); ++ return BT_Uncond; ++ } ++ ++ // Conditional branch followed by an unconditional branch. ++ // The last one must be unconditional. ++ if (!LastInst->isUnconditionalBranch()) ++ return BT_None; ++ ++ AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond); ++ FBB = LastInst->getOperand(0).getMBB(); ++ ++ return BT_CondUncond; ++} ++ ++MachineBasicBlock * ++LoongArchInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { ++ assert(MI.getDesc().isBranch() && "Unexpected opcode!"); ++ // The branch target is always the last operand. ++ int NumOp = MI.getNumExplicitOperands(); ++ return MI.getOperand(NumOp - 1).getMBB(); ++} ++ ++bool LoongArchInstrInfo::isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const { ++/* ++ switch (BranchOpc) { ++ case LoongArch::B: ++ case LoongArch::BAL: ++ case LoongArch::BAL_BR: ++ case LoongArch::BC1F: ++ case LoongArch::BC1FL: ++ case LoongArch::BC1T: ++ case LoongArch::BC1TL: ++ case LoongArch::BEQ: case LoongArch::BEQ64: ++ case LoongArch::BEQL: ++ case LoongArch::BGEZ: case LoongArch::BGEZ64: ++ case LoongArch::BGEZL: ++ case LoongArch::BGEZAL: ++ case LoongArch::BGEZALL: ++ case LoongArch::BGTZ: case LoongArch::BGTZ64: ++ case LoongArch::BGTZL: ++ case LoongArch::BLEZ: case LoongArch::BLEZ64: ++ case LoongArch::BLEZL: ++ case LoongArch::BLTZ: case LoongArch::BLTZ64: ++ case LoongArch::BLTZL: ++ case LoongArch::BLTZAL: ++ case LoongArch::BLTZALL: ++ case LoongArch::BNE: case LoongArch::BNE64: ++ case LoongArch::BNEL: ++ return isInt<18>(BrOffset); ++ ++ case LoongArch::BC1EQZ: ++ case LoongArch::BC1NEZ: ++ case LoongArch::BC2EQZ: ++ case LoongArch::BC2NEZ: ++ case LoongArch::BEQC: case LoongArch::BEQC64: ++ case LoongArch::BNEC: case LoongArch::BNEC64: ++ case LoongArch::BGEC: case LoongArch::BGEC64: ++ case LoongArch::BGEUC: case LoongArch::BGEUC64: ++ case LoongArch::BGEZC: case LoongArch::BGEZC64: ++ case LoongArch::BGTZC: case LoongArch::BGTZC64: ++ case LoongArch::BLEZC: case LoongArch::BLEZC64: ++ case LoongArch::BLTC: case LoongArch::BLTC64: ++ case LoongArch::BLTUC: case LoongArch::BLTUC64: ++ case LoongArch::BLTZC: case LoongArch::BLTZC64: ++ case LoongArch::BNVC: ++ case LoongArch::BOVC: ++ case LoongArch::BGEZALC: ++ case LoongArch::BEQZALC: ++ case LoongArch::BGTZALC: ++ case LoongArch::BLEZALC: ++ case LoongArch::BLTZALC: ++ case LoongArch::BNEZALC: ++ return isInt<18>(BrOffset); ++ ++ case LoongArch::BEQZC: case LoongArch::BEQZC64: ++ case LoongArch::BNEZC: case LoongArch::BNEZC64: ++ return isInt<23>(BrOffset); ++ } ++ */ ++ switch (BranchOpc) { ++ case LoongArch::B: case LoongArch::B32: ++ return isInt<28>(BrOffset); ++ ++ case LoongArch::BEQZ: case LoongArch::BEQZ32: ++ case LoongArch::BNEZ: case LoongArch::BNEZ32: ++ case LoongArch::BCEQZ: ++ case LoongArch::BCNEZ: ++ return isInt<23>(BrOffset); ++ ++ case LoongArch::BEQ: case LoongArch::BEQ32: ++ case LoongArch::BNE: case LoongArch::BNE32: ++ case LoongArch::BLT: case LoongArch::BLT32: ++ case LoongArch::BGE: case LoongArch::BGE32: ++ case LoongArch::BLTU: case LoongArch::BLTU32: ++ case LoongArch::BGEU: case LoongArch::BGEU32: ++ return isInt<18>(BrOffset); ++ } ++ ++ llvm_unreachable("Unknown branch instruction!"); ++} ++ ++ ++/// Predicate for distingushing between control transfer instructions and all ++/// other instructions for handling forbidden slots. Consider inline assembly ++/// as unsafe as well. ++bool LoongArchInstrInfo::SafeInForbiddenSlot(const MachineInstr &MI) const { ++ if (MI.isInlineAsm()) ++ return false; ++ ++ return (MI.getDesc().TSFlags & LoongArchII::IsCTI) == 0; ++} ++ ++/// Predicate for distingushing instructions that have forbidden slots. ++bool LoongArchInstrInfo::HasForbiddenSlot(const MachineInstr &MI) const { ++ return (MI.getDesc().TSFlags & LoongArchII::HasForbiddenSlot) != 0; ++} ++ ++/// Return the number of bytes of code the specified instruction may be. ++unsigned LoongArchInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { ++ switch (MI.getOpcode()) { ++ default: ++ return MI.getDesc().getSize(); ++ case TargetOpcode::INLINEASM: { // Inline Asm: Variable size. ++ const MachineFunction *MF = MI.getParent()->getParent(); ++ const char *AsmStr = MI.getOperand(0).getSymbolName(); ++ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); ++ } ++ } ++} ++ ++MachineInstrBuilder ++LoongArchInstrInfo::genInstrWithNewOpc(unsigned NewOpc, ++ MachineBasicBlock::iterator I) const { ++ MachineInstrBuilder MIB; ++ ++ int ZeroOperandPosition = -1; ++ bool BranchWithZeroOperand = false; ++ if (I->isBranch() && !I->isPseudo()) { ++ auto TRI = I->getParent()->getParent()->getSubtarget().getRegisterInfo(); ++ ZeroOperandPosition = I->findRegisterUseOperandIdx(LoongArch::ZERO, false, TRI); ++ BranchWithZeroOperand = ZeroOperandPosition != -1; ++ } ++ ++ MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc)); ++ ++ if (NewOpc == LoongArch::JIRL) { ++ MIB->RemoveOperand(0); ++ for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { ++ MIB.add(I->getOperand(J)); ++ } ++ MIB.addImm(0); ++ } else { ++ for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { ++ if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J) ++ continue; ++ ++ MIB.add(I->getOperand(J)); ++ } ++ } ++ ++ MIB.copyImplicitOps(*I); ++ MIB.cloneMemRefs(*I); ++ return MIB; ++} ++ ++bool LoongArchInstrInfo::findCommutedOpIndices(const MachineInstr &MI, ++ unsigned &SrcOpIdx1, ++ unsigned &SrcOpIdx2) const { ++ assert(!MI.isBundle() && ++ "TargetInstrInfo::findCommutedOpIndices() can't handle bundles"); ++ ++ const MCInstrDesc &MCID = MI.getDesc(); ++ if (!MCID.isCommutable()) ++ return false; ++ ++ return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); ++} ++ ++// bstrins, bstrpick have the following constraints: ++// 0 <= lsb <= msb <= High ++static bool verifyBstrInstruction(const MachineInstr &MI, StringRef &ErrInfo, ++ const int64_t High) { ++ MachineOperand MOMsb = MI.getOperand(2); ++ if (!MOMsb.isImm()) { ++ ErrInfo = "Msb operand is not an immediate!"; ++ return false; ++ } ++ MachineOperand MOLsb = MI.getOperand(3); ++ if (!MOLsb.isImm()) { ++ ErrInfo = "Lsb operand is not an immediate!"; ++ return false; ++ } ++ ++ int64_t Lsb = MOLsb.getImm(); ++ if (!((0 <= Lsb) && (Lsb <= High))) { ++ ErrInfo = "Lsb operand is out of range!"; ++ return false; ++ } ++ ++ int64_t Msb = MOMsb.getImm(); ++ if (!((0 <= Msb) && (Msb <= High))) { ++ ErrInfo = "Msb operand is out of range!"; ++ return false; ++ } ++ ++ if (!(Lsb <= Msb)) { ++ ErrInfo = "Lsb operand is not less than or equal to msb operand!"; ++ return false; ++ } ++ ++ return true; ++} ++ ++// Perform target specific instruction verification. ++bool LoongArchInstrInfo::verifyInstruction(const MachineInstr &MI, ++ StringRef &ErrInfo) const { ++ // Verify that bstrins and bstrpick instructions are well formed. ++ switch (MI.getOpcode()) { ++ case LoongArch::BSTRINS_W: ++ case LoongArch::BSTRPICK_W: ++ return verifyBstrInstruction(MI, ErrInfo, 31); ++ case LoongArch::BSTRINS_D: ++ case LoongArch::BSTRPICK_D: ++ return verifyBstrInstruction(MI, ErrInfo, 63); ++ default: ++ return true; ++ } ++ ++ return true; ++} ++ ++std::pair ++LoongArchInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { ++ return std::make_pair(TF, 0u); ++} ++ ++ArrayRef> ++LoongArchInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { ++ using namespace LoongArchII; ++ ++ static const std::pair Flags[] = { ++ {MO_PCREL_HI, "larch-pcrel-hi"}, ++ {MO_PCREL_LO, "larch-pcrel-lo"}, ++ {MO_TLSGD_HI, "larch-tlsgd-hi"}, ++ {MO_TLSGD_LO, "larch-tlsgd-lo"}, ++ {MO_TLSIE_HI, "larch-tlsie-hi"}, ++ {MO_TLSIE_LO, "larch-tlsie-lo"}, ++ {MO_TLSLE_HI, "larch-tlsle-hi"}, ++ {MO_TLSLE_LO, "larch-tlsle-lo"}, ++ {MO_ABS_HI, "larch-abs-hi"}, ++ {MO_ABS_LO, "larch-abs-lo"}, ++ {MO_ABS_HIGHER, "larch-abs-higher"}, ++ {MO_ABS_HIGHEST, "larch-abs-highest"}, ++ {MO_GOT_HI, "larch-got-hi"}, ++ {MO_GOT_LO, "larch-got-lo"}, ++ {MO_CALL_HI, "larch-call-hi"}, ++ {MO_CALL_LO, "larch-call-lo"} ++ }; ++ return makeArrayRef(Flags); ++} +diff --git a/lib/Target/LoongArch/LoongArchInstrInfo.h b/lib/Target/LoongArch/LoongArchInstrInfo.h +new file mode 100644 +index 00000000..53191a94 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchInstrInfo.h +@@ -0,0 +1,246 @@ ++//===- LoongArchInstrInfo.h - LoongArch Instruction Information -----------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch implementation of the TargetInstrInfo class. ++// ++// FIXME: We need to override TargetInstrInfo::getInlineAsmLength method in ++// order for LoongArchLongBranch pass to work correctly when the code has inline ++// assembly. The returned value doesn't have to be the asm instruction's exact ++// size in bytes; LoongArchLongBranch only expects it to be the correct upper bound. ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H ++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H ++ ++#define DBAR_HINT 0x700 ++ ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "LoongArch.h" ++#include "LoongArchRegisterInfo.h" ++#include "llvm/ADT/ArrayRef.h" ++#include "llvm/CodeGen/MachineBasicBlock.h" ++#include "llvm/CodeGen/MachineInstrBuilder.h" ++#include "llvm/CodeGen/MachineMemOperand.h" ++#include "llvm/CodeGen/TargetInstrInfo.h" ++#include ++ ++#define GET_INSTRINFO_HEADER ++#include "LoongArchGenInstrInfo.inc" ++ ++namespace llvm { ++ ++class MachineInstr; ++class MachineOperand; ++class LoongArchSubtarget; ++class TargetRegisterClass; ++class TargetRegisterInfo; ++ ++class LoongArchInstrInfo : public LoongArchGenInstrInfo { ++ virtual void anchor(); ++ const LoongArchRegisterInfo RI; ++ const LoongArchSubtarget &Subtarget; ++ ++public: ++ enum BranchType { ++ BT_None, // Couldn't analyze branch. ++ BT_NoBranch, // No branches found. ++ BT_Uncond, // One unconditional branch. ++ BT_Cond, // One conditional branch. ++ BT_CondUncond, // A conditional branch followed by an unconditional branch. ++ BT_Indirect // One indirct branch. ++ }; ++ ++ explicit LoongArchInstrInfo(const LoongArchSubtarget &STI); ++ ++ /// isLoadFromStackSlot - If the specified machine instruction is a direct ++ /// load from a stack slot, return the virtual or physical register number of ++ /// the destination along with the FrameIndex of the loaded stack slot. If ++ /// not, return 0. This predicate must return 0 if the instruction has ++ /// any side effects other than loading from the stack slot. ++ unsigned isLoadFromStackSlot(const MachineInstr &MI, ++ int &FrameIndex) const override; ++ ++ /// isStoreToStackSlot - If the specified machine instruction is a direct ++ /// store to a stack slot, return the virtual or physical register number of ++ /// the source reg along with the FrameIndex of the loaded stack slot. If ++ /// not, return 0. This predicate must return 0 if the instruction has ++ /// any side effects other than storing to the stack slot. ++ unsigned isStoreToStackSlot(const MachineInstr &MI, ++ int &FrameIndex) const override; ++ ++ void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ++ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, ++ bool KillSrc) const override; ++ ++ /// Branch Analysis ++ bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, ++ MachineBasicBlock *&FBB, ++ SmallVectorImpl &Cond, ++ bool AllowModify) const override; ++ ++ unsigned removeBranch(MachineBasicBlock &MBB, ++ int *BytesRemoved = nullptr) const override; ++ ++ unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, ++ MachineBasicBlock *FBB, ArrayRef Cond, ++ const DebugLoc &DL, ++ int *BytesAdded = nullptr) const override; ++ ++ void insertIndirectBranch(MachineBasicBlock &MBB, ++ MachineBasicBlock &NewDestBB, ++ MachineBasicBlock &RestoreBB, const DebugLoc &DL, ++ int64_t BrOffset, ++ RegScavenger *RS = nullptr) const override; ++ bool ++ reverseBranchCondition(SmallVectorImpl &Cond) const override; ++ ++ BranchType analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, ++ MachineBasicBlock *&FBB, ++ SmallVectorImpl &Cond, ++ bool AllowModify, ++ SmallVectorImpl &BranchInstrs) const; ++ ++ /// Get the block that branch instruction jumps to. ++ MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; ++ ++ /// Determine if the branch target is in range. ++ bool isBranchOffsetInRange(unsigned BranchOpc, ++ int64_t BrOffset) const override; ++ ++ /// Predicate to determine if an instruction can go in a forbidden slot. ++ bool SafeInForbiddenSlot(const MachineInstr &MI) const; ++ ++ /// Predicate to determine if an instruction has a forbidden slot. ++ bool HasForbiddenSlot(const MachineInstr &MI) const; ++ ++ /// Insert nop instruction when hazard condition is found ++ void insertNoop(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MI) const override; ++ ++ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As ++ /// such, whenever a client has an instance of instruction info, it should ++ /// always be able to get register info as well (through this method). ++ const LoongArchRegisterInfo &getRegisterInfo() const; ++ ++ bool expandPostRAPseudo(MachineInstr &MI) const override; ++ ++ unsigned getOppositeBranchOpc(unsigned Opc) const; ++ ++ /// Emit a series of instructions to load an immediate. ++ unsigned loadImmediate(int64_t Imm, MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator II, ++ const DebugLoc &DL) const; ++ ++ /// Return the number of bytes of code the specified instruction may be. ++ unsigned getInstSizeInBytes(const MachineInstr &MI) const override; ++ ++ void storeRegToStackSlot(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MBBI, ++ Register SrcReg, bool isKill, int FrameIndex, ++ const TargetRegisterClass *RC, ++ const TargetRegisterInfo *TRI) const override { ++ storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, TRI, 0); ++ } ++ ++ void loadRegFromStackSlot(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MBBI, ++ Register DestReg, int FrameIndex, ++ const TargetRegisterClass *RC, ++ const TargetRegisterInfo *TRI) const override { ++ loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0); ++ } ++ ++ void storeRegToStack(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MI, ++ Register SrcReg, bool isKill, int FrameIndex, ++ const TargetRegisterClass *RC, ++ const TargetRegisterInfo *TRI, ++ int64_t Offset) const; ++ ++ void loadRegFromStack(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator MI, ++ Register DestReg, int FrameIndex, ++ const TargetRegisterClass *RC, ++ const TargetRegisterInfo *TRI, ++ int64_t Offset) const; ++ ++ /// Adjust register value(DestReg = SrcReg + Amount). ++ void ++ adjustReg(unsigned DestReg, unsigned SrcReg, int64_t Amount, ++ MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ++ MachineInstr::MIFlag Flag = MachineInstr::MIFlag::NoFlags) const; ++ ++ /// Create an instruction which has the same operands and memory operands ++ /// as MI but has a new opcode. ++ MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc, ++ MachineBasicBlock::iterator I) const; ++ ++ bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, ++ unsigned &SrcOpIdx2) const override; ++ ++ /// Perform target specific instruction verification. ++ bool verifyInstruction(const MachineInstr &MI, ++ StringRef &ErrInfo) const override; ++ ++ std::pair ++ decomposeMachineOperandsTargetFlags(unsigned TF) const override; ++ ++ ArrayRef> ++ getSerializableDirectMachineOperandTargetFlags() const override; ++ ++protected: ++ /// If the specific machine instruction is a instruction that moves/copies ++ /// value from one register to another register return true along with ++ /// @Source machine operand and @Destination machine operand. ++ Optional ++ isCopyInstrImpl(const MachineInstr &MI) const override; ++ ++private: ++ ++ bool isZeroImm(const MachineOperand &op) const; ++ ++ MachineMemOperand *GetMemOperand(MachineBasicBlock &MBB, int FI, ++ MachineMemOperand::Flags Flags) const; ++ ++ unsigned getAnalyzableBrOpc(unsigned Opc) const; ++ ++ void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, ++ MachineBasicBlock *&BB, ++ SmallVectorImpl &Cond) const; ++ ++ MachineInstr * ++ BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, ++ const DebugLoc &DL, ArrayRef Cond) const; ++ ++ void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; ++ ++ void expandERet(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; ++ ++ std::pair compareOpndSize(unsigned Opc, ++ const MachineFunction &MF) const; ++ ++ /// Expand pseudo Int-to-FP conversion instructions. ++ /// ++ /// For example, the following pseudo instruction ++ /// PseudoCVT_D32_W D2, A5 ++ /// gets expanded into these two instructions: ++ /// MTC1 F4, A5 ++ /// CVT_D32_W D2, F4 ++ /// ++ /// We do this expansion post-RA to avoid inserting a floating point copy ++ /// instruction between MTC1 and CVT_D32_W. ++ void expandCvtFPInt(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ++ unsigned CvtOpc, unsigned MovOpc, bool IsI64) const; ++ ++ void expandEhReturn(MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator I) const; ++}; ++ ++} // end namespace llvm ++ ++#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H +diff --git a/lib/Target/LoongArch/LoongArchInstrInfo.td b/lib/Target/LoongArch/LoongArchInstrInfo.td +new file mode 100644 +index 00000000..5cfb5cd5 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -0,0 +1,1867 @@ ++//===- LoongArchInstrInfo.td - Target Description for LoongArch Target -*- tablegen -*-=// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch implementation of the TargetInstrInfo class. ++// ++//===----------------------------------------------------------------------===// ++include "LoongArchInstrFormats.td" ++ ++def SDT_Bstrpick : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, ++ SDTCisVT<2, i32>, SDTCisSameAs<2, 3>]>; ++def SDT_Bstrins : SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>, ++ SDTCisVT<2, i32>, SDTCisSameAs<2, 3>, ++ SDTCisSameAs<0, 4>]>; ++ ++def LoongArchBstrpick : SDNode<"LoongArchISD::BSTRPICK", SDT_Bstrpick>; ++ ++def LoongArchBstrins : SDNode<"LoongArchISD::BSTRINS", SDT_Bstrins>; ++ ++def SDT_DBAR : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; ++def LoongArchDBAR : SDNode<"LoongArchISD::DBAR", SDT_DBAR, [SDNPHasChain,SDNPSideEffect]>; ++ ++def SDT_LoongArchEHRET : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisPtrTy<1>]>; ++ ++def LoongArchehret : SDNode<"LoongArchISD::EH_RETURN", SDT_LoongArchEHRET, ++ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; ++ ++//===---------------------------------------------------------------------===/ ++// Operand, Complex Patterns and Transformations Definitions. ++//===---------------------------------------------------------------------===/ ++ ++def assertzext_lt_i32 : PatFrag<(ops node:$src), (assertzext node:$src), [{ ++ return cast(N->getOperand(1))->getVT().bitsLT(MVT::i32); ++}]>; ++ ++def immz : PatLeaf<(imm), [{ return N->getSExtValue() == 0; }]>; ++def immZExt12 : PatLeaf<(imm), [{ return isUInt<12>(N->getZExtValue()); }]>; ++def immSExt12 : PatLeaf<(imm), [{ return isInt<12>(N->getSExtValue()); }]>; ++def immSExt13 : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>; ++ ++def immZExt2Alsl : ImmLeaf(Imm - 1);}]>; ++//class ImmAsmOperand : AsmOperandClass { ++// let RenderMethod = "addImmOperands"; ++// let PredicateMethod = "isImmediate<" # Low # "," # High # ">"; ++// let DiagnosticString = "operand must be an immediate in the range [" # Low # "," # High # "]"; ++//} ++// ++//def Imm8AsmOperand: ImmAsmOperand<8,8> { let Name = "Imm8"; } ++//def imm8 : Operand, ImmLeaf { ++// let ParserMatchClass = Imm8AsmOperand; ++//} ++ ++def HasLSX : Predicate<"Subtarget->hasLSX()">, ++ AssemblerPredicate<(all_of FeatureLSX)>; ++def HasLASX : Predicate<"Subtarget->hasLASX()">, ++ AssemblerPredicate<(all_of FeatureLASX)>; ++ ++class EXT_LSX { ++ list ExtPredicate = [HasLSX]; ++} ++ ++class EXT_LASX { ++ list ExtPredicate = [HasLASX]; ++} ++ ++class SImmOperand : AsmOperandClass { ++ let Name = "SImm" # width; ++ let DiagnosticType = "InvalidSImm" # width; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isSImm<" # width # ">"; ++} ++ ++def SImm2Operand : SImmOperand<2>; ++def simm2 : Operand, ImmLeaf= -2 && Imm < 2; }]> { ++ let ParserMatchClass = SImm2Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<2>"; ++} ++def SImm3Operand : SImmOperand<3>; ++def simm3 : Operand, ImmLeaf= -4 && Imm < 4; }]> { ++ let ParserMatchClass = SImm3Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<3>"; ++} ++ ++def SImm5Operand : SImmOperand<5>; ++def simm5 : Operand, ImmLeaf= -16 && Imm < 16; }]> { ++ let ParserMatchClass = SImm5Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<5>"; ++} ++ ++def simm5_32 : Operand, ImmLeaf= -16 && Imm < 16; }]> { ++ let ParserMatchClass = SImm5Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<5>"; ++} ++ ++def SImm8Operand : SImmOperand<8>; ++def simm8 : Operand, ImmLeaf= -128 && Imm < 128; }]> { ++ let ParserMatchClass = SImm8Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>"; ++} ++def simm8_32 : Operand, ImmLeaf= -128 && Imm < 128; }]> { ++ let ParserMatchClass = SImm8Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>"; ++} ++ ++def SImm12Operand : SImmOperand<12>; ++def simm12 : Operand, ImmLeaf= -2048 && Imm < 2048; }]> { ++ let ParserMatchClass = SImm12Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<12>"; ++} ++def simm12_32 : Operand, ImmLeaf= -2048 && Imm < 2048; }]> { ++ let ParserMatchClass = SImm12Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<12>"; ++} ++ ++def SImm14Operand : SImmOperand<14>; ++def simm14 : Operand, ImmLeaf= -8192 && Imm < 8192; }]> { ++ let ParserMatchClass = SImm14Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<14>"; ++} ++ ++def SImm15Operand : SImmOperand<15>; ++def simm15 : Operand, ImmLeaf= -16384 && Imm < 16384; }]> { ++ let ParserMatchClass = SImm15Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<15>"; ++} ++ ++def SImm16Operand : SImmOperand<16>; ++def simm16 : Operand, ImmLeaf= -32768 && Imm < 32768; }]> { ++ let ParserMatchClass = SImm16Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<16>"; ++} ++ ++def SImm20Operand : SImmOperand<20>; ++def simm20 : Operand, ImmLeaf= -524288 && Imm < 524288; }]> { ++ let ParserMatchClass = SImm20Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<20>"; ++} ++def simm20_32 : Operand, ImmLeaf= -524288 && Imm < 524288; }]> { ++ let ParserMatchClass = SImm20Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<20>"; ++} ++ ++def SImm21Operand : SImmOperand<21>; ++def simm21 : Operand, ImmLeaf= -1048576 && Imm < 1048576; }]> { ++ let ParserMatchClass = SImm21Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<21>"; ++} ++ ++def SImm26Operand : SImmOperand<26>; ++def simm26 : Operand, ImmLeaf= -33554432 && Imm < 33554432; }]> { ++ let ParserMatchClass = SImm26Operand; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<26>"; ++} ++ ++def UImm1Operand : AsmOperandClass { ++ let Name = "UImm1"; ++ let RenderMethod = "addUImmOperands<1>"; ++ let PredicateMethod = "isUImm<1>"; ++ let DiagnosticType = "InvalidImm0_1"; ++} ++ ++def UImm2Operand : AsmOperandClass { ++ let Name = "UImm2"; ++ let RenderMethod = "addUImmOperands<2>"; ++ let PredicateMethod = "isUImm<2>"; ++ let DiagnosticType = "InvalidImm0_3"; ++} ++ ++def UImm3Operand : AsmOperandClass { ++ let Name = "UImm3"; ++ let RenderMethod = "addUImmOperands<3>"; ++ let PredicateMethod = "isUImm<3>"; ++ let DiagnosticType = "InvalidImm0_7"; ++} ++ ++def UImm4Operand : AsmOperandClass { ++ let Name = "UImm4"; ++ let RenderMethod = "addUImmOperands<4>"; ++ let PredicateMethod = "isUImm<4>"; ++ let DiagnosticType = "InvalidImm0_15"; ++} ++ ++def UImm5Operand : AsmOperandClass { ++ let Name = "UImm5"; ++ let RenderMethod = "addUImmOperands<5>"; ++ let PredicateMethod = "isUImm<5>"; ++ let DiagnosticType = "InvalidImm0_31"; ++} ++ ++def uimm1i : Operand, ImmLeaf= 0 && Imm < 2; }]> { ++ let PrintMethod = "printUImm<1>"; ++ let ParserMatchClass = UImm1Operand; ++} ++ ++def uimm2 : Operand, ImmLeaf= 0 && Imm < 4; }]> { ++ let PrintMethod = "printUImm<2>"; ++ let ParserMatchClass = UImm2Operand; ++} ++ ++def uimm3 : Operand, ImmLeaf= 0 && Imm < 8; }]> { ++ let PrintMethod = "printUImm<3>"; ++ let ParserMatchClass = UImm3Operand; ++} ++ ++def uimm4i : Operand, ImmLeaf= 0 && Imm < 16; }]> { ++ let PrintMethod = "printUImm<4>"; ++ let ParserMatchClass = UImm4Operand; ++} ++ ++def uimm5 : Operand, ImmLeaf= 0 && Imm < 32; }]> { ++ let PrintMethod = "printUImm<5>"; ++ let ParserMatchClass = UImm5Operand; ++} ++ ++def UImm6Operand : AsmOperandClass { ++ let Name = "UImm6"; ++ let RenderMethod = "addUImmOperands<16>"; ++ let PredicateMethod = "isUImm<6>"; ++ let DiagnosticType = "InvalidImm0_63"; ++} ++def uimm6 : Operand, ImmLeaf= 0 && Imm < 64; }]> { ++ let PrintMethod = "printUImm<6>"; ++ let ParserMatchClass = UImm6Operand; ++} ++ ++def UImm7Operand : AsmOperandClass { ++ let Name = "UImm7"; ++ let RenderMethod = "addUImmOperands<16>"; ++ let PredicateMethod = "isUImm<7>"; ++ let DiagnosticType = "InvalidImm0_127"; ++} ++ ++def uimm7i : Operand, ImmLeaf= 0 && Imm < 128; }]> { ++ let PrintMethod = "printUImm<7>"; ++ let ParserMatchClass = UImm7Operand; ++} ++ ++def UImm12Operand : AsmOperandClass { ++ let Name = "UImm12"; ++ let RenderMethod = "addUImmOperands<12>"; ++ let PredicateMethod = "isUImm<12>"; ++ let DiagnosticType = "InvalidImm0_4095"; ++} ++def uimm12 : Operand, ImmLeaf= 0 && Imm < 4096; }]> { ++ let PrintMethod = "printUImm<12>"; ++ let ParserMatchClass = UImm12Operand; ++} ++def uimm12_32 : Operand, ImmLeaf= 0 && Imm < 4096; }]> { ++ let PrintMethod = "printUImm<12>"; ++ let ParserMatchClass = UImm12Operand; ++} ++ ++def UImm15Operand : AsmOperandClass { ++ let Name = "UImm15"; ++ let RenderMethod = "addUImmOperands<15>"; ++ let PredicateMethod = "isUImm<15>"; ++ let DiagnosticType = "InvalidImm0_32767"; ++} ++def uimm15 : Operand, ImmLeaf= 0 && Imm < 32768; }]> { ++ let PrintMethod = "printUImm<15>"; ++ let ParserMatchClass = UImm15Operand; ++} ++ ++def UImm14Operand : AsmOperandClass { ++ let Name = "UImm14"; ++ let RenderMethod = "addUImmOperands<14>"; ++ let PredicateMethod = "isUImm<14>"; ++ let DiagnosticType = "InvalidImm0_16383"; ++} ++def uimm14 : Operand, ImmLeaf= 0 && Imm < 16384; }]> { ++ let PrintMethod = "printUImm<14>"; ++ let ParserMatchClass = UImm14Operand; ++} ++def uimm14_32 : Operand, ImmLeaf= 0 && Imm < 16384; }]> { ++ let PrintMethod = "printUImm<14>"; ++ let ParserMatchClass = UImm14Operand; ++} ++ ++def UImm8Operand : AsmOperandClass { ++ let Name = "UImm8"; ++ let RenderMethod = "addUImmOperands<8>"; ++ let PredicateMethod = "isUImm<8>"; ++ let DiagnosticType = "InvalidImm0_255"; ++} ++def uimm8_64 : Operand, ImmLeaf= 0 && Imm < 256; }]> { ++ let PrintMethod = "printUImm<8>"; ++ let ParserMatchClass = UImm8Operand; ++} ++ ++def uimm8_32 : Operand, ImmLeaf= 0 && Imm < 256; }]> { ++ let PrintMethod = "printUImm<8>"; ++ let ParserMatchClass = UImm8Operand; ++} ++ ++def addr : ++ComplexPattern; ++ ++def addrDefault : ++ComplexPattern; ++ ++def addrRegImm : ++ComplexPattern; ++ ++def addrimm14lsl2 : ComplexPattern; ++ ++class ConstantUImmAsmOperandClass Supers = [], ++ int Offset = 0> : AsmOperandClass { ++ let Name = "ConstantUImm" # Bits # "_" # Offset; ++ let RenderMethod = "addConstantUImmOperands<" # Bits # ", " # Offset # ">"; ++ let PredicateMethod = "isConstantUImm<" # Bits # ", " # Offset # ">"; ++ let SuperClasses = Supers; ++ let DiagnosticType = "UImm" # Bits # "_" # Offset; ++} ++class SImmAsmOperandClass Supers = []> ++ : AsmOperandClass { ++ let Name = "SImm" # Bits; ++ let RenderMethod = "addSImmOperands<" # Bits # ">"; ++ let PredicateMethod = "isSImm<" # Bits # ">"; ++ let SuperClasses = Supers; ++ let DiagnosticType = "SImm" # Bits; ++} ++class UImmAnyAsmOperandClass Supers = []> ++ : AsmOperandClass { ++ let Name = "ImmAny"; ++ let RenderMethod = "addConstantUImmOperands<32>"; ++ let PredicateMethod = "isSImm<" # Bits # ">"; ++ let SuperClasses = Supers; ++ let DiagnosticType = "ImmAny"; ++} ++ ++def UImm32CoercedAsmOperandClass : UImmAnyAsmOperandClass<33, []> { ++ let Name = "UImm32_Coerced"; ++ let DiagnosticType = "UImm32_Coerced"; ++} ++def SImm32RelaxedAsmOperandClass ++ : SImmAsmOperandClass<32, [UImm32CoercedAsmOperandClass]> { ++ let Name = "SImm32_Relaxed"; ++ let PredicateMethod = "isAnyImm<33>"; ++ let DiagnosticType = "SImm32_Relaxed"; ++} ++def SImm32AsmOperandClass ++ : SImmAsmOperandClass<32, [SImm32RelaxedAsmOperandClass]>; ++def ConstantUImm26AsmOperandClass ++ : ConstantUImmAsmOperandClass<26, [SImm32AsmOperandClass]>; ++ ++def ConstantUImm20AsmOperandClass ++ : ConstantUImmAsmOperandClass<20, [ConstantUImm26AsmOperandClass]>; ++ ++def ConstantUImm2Plus1AsmOperandClass ++ : ConstantUImmAsmOperandClass<2, [ConstantUImm20AsmOperandClass], 1>; ++ ++class UImmAsmOperandClass Supers = []> ++ : AsmOperandClass { ++ let Name = "UImm" # Bits; ++ let RenderMethod = "addUImmOperands<" # Bits # ">"; ++ let PredicateMethod = "isUImm<" # Bits # ">"; ++ let SuperClasses = Supers; ++ let DiagnosticType = "UImm" # Bits; ++} ++ ++def UImm16RelaxedAsmOperandClass ++ : UImmAsmOperandClass<16, [ConstantUImm20AsmOperandClass]> { ++ let Name = "UImm16_Relaxed"; ++ let PredicateMethod = "isAnyImm<16>"; ++ let DiagnosticType = "UImm16_Relaxed"; ++} ++ ++def ConstantSImm14Lsl2AsmOperandClass : AsmOperandClass { ++ let Name = "SImm14Lsl2"; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isScaledSImm<14, 2>"; ++ let SuperClasses = [UImm16RelaxedAsmOperandClass]; ++ let DiagnosticType = "SImm14_Lsl2"; ++} ++ ++foreach I = {2} in ++ def simm14_lsl # I : Operand { ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<14, " # I # ">"; ++ let ParserMatchClass = ++ !cast("ConstantSImm14Lsl" # I # "AsmOperandClass"); ++ } ++ ++def uimm16_64_relaxed : Operand { ++ let PrintMethod = "printUImm<16>"; ++ let ParserMatchClass = ++ !cast("UImm16RelaxedAsmOperandClass"); ++} ++ ++def uimm2_plus1 : Operand { ++ let PrintMethod = "printUImm<2, 1>"; ++ let EncoderMethod = "getUImmWithOffsetEncoding<2, 1>"; ++ let DecoderMethod = "DecodeUImmWithOffset<2, 1>"; ++ let ParserMatchClass = ConstantUImm2Plus1AsmOperandClass; ++} ++ ++// like simm32 but coerces simm32 to uimm32. ++def uimm32_coerced : Operand { ++ let ParserMatchClass = !cast("UImm32CoercedAsmOperandClass"); ++} ++ ++def imm64: Operand; ++ ++def LoongArchMemAsmOperand : AsmOperandClass { ++ let Name = "Mem"; ++ let ParserMethod = "parseMemOperand"; ++} ++ ++def LoongArchMemSimm14AsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimm14"; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithSimmOffset<14>"; ++ let DiagnosticType = "MemSImm14"; ++} ++ ++foreach I = {2} in ++ def LoongArchMemSimm14Lsl # I # AsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimm14_" # I; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithSimmOffset<14, " # I # ">"; ++ let DiagnosticType = "MemSImm14Lsl" # I; ++ } ++ ++def LoongArchMemSimmPtrAsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimmPtr"; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithPtrSizeOffset"; ++ let DiagnosticType = "MemSImmPtr"; ++} ++ ++class mem_generic : Operand { ++ let PrintMethod = "printMemOperand"; ++ let MIOperandInfo = (ops ptr_rc, simm12); ++ let EncoderMethod = "getMemEncoding"; ++ let ParserMatchClass = LoongArchMemAsmOperand; ++ let OperandType = "OPERAND_MEMORY"; ++} ++ ++// Address operand ++def mem : mem_generic; ++def mem_simmptr : mem_generic { ++ let ParserMatchClass = LoongArchMemSimmPtrAsmOperand; ++} ++ ++foreach I = {2} in ++ def mem_simm14_lsl # I : mem_generic { ++ let MIOperandInfo = (ops ptr_rc, !cast("simm14_lsl" # I)); ++ let EncoderMethod = "getSimm14MemEncoding<" # I # ">"; ++ let ParserMatchClass = ++ !cast("LoongArchMemSimm14Lsl" # I # "AsmOperand"); ++ } ++ ++def mem_ea : Operand { ++ let PrintMethod = "printMemOperandEA"; ++ let MIOperandInfo = (ops ptr_rc, simm12); ++ let EncoderMethod = "getMemEncoding"; ++ let OperandType = "OPERAND_MEMORY"; ++} ++ ++def LoongArchJumpTargetAsmOperand : AsmOperandClass { ++ let Name = "JumpTarget"; ++ let ParserMethod = "parseJumpTarget"; ++ let PredicateMethod = "isImm"; ++ let RenderMethod = "addImmOperands"; ++} ++ ++def jmptarget : Operand { ++ let EncoderMethod = "getJumpTargetOpValue"; ++ let ParserMatchClass = LoongArchJumpTargetAsmOperand; ++} ++ ++def brtarget : Operand { ++ let EncoderMethod = "getBranchTargetOpValue"; ++ let OperandType = "OPERAND_PCREL"; ++ let DecoderMethod = "DecodeBranchTarget"; ++ let ParserMatchClass = LoongArchJumpTargetAsmOperand; ++} ++ ++def calltarget : Operand { ++ let EncoderMethod = "getJumpTargetOpValue"; ++ let ParserMatchClass = LoongArchJumpTargetAsmOperand; ++} ++ ++// ++//SDNode ++// ++def IsGP64bit : Predicate<"Subtarget->is64Bit()">, ++ AssemblerPredicate<(all_of Feature64Bit)>; ++def IsGP32bit : Predicate<"!Subtarget->is64Bit()">, ++ AssemblerPredicate<(all_of (not Feature64Bit))>; ++def SDT_LoongArchCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; ++def SDT_LoongArchCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; ++ ++def LoongArchRet : SDNode<"LoongArchISD::Ret", SDTNone, ++ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; ++def LoongArchERet : SDNode<"LoongArchISD::ERet", SDTNone, ++ [SDNPHasChain, SDNPOptInGlue, SDNPSideEffect]>; ++ ++def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_LoongArchCallSeqStart, ++ [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; ++def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_LoongArchCallSeqEnd, ++ [SDNPHasChain, SDNPSideEffect, ++ SDNPOptInGlue, SDNPOutGlue]>; ++def LoongArchAddress : SDNode<"LoongArchISD::GlobalAddress", SDTIntUnaryOp>; ++ ++// Return RA. ++let isReturn=1, isTerminator=1, isBarrier=1, hasCtrlDep=1, isCTI=1 in { ++ def RetRA : LoongArchPseudo<(outs), (ins), [(LoongArchRet)]>; ++ ++ let hasSideEffects=1 in ++ def ERet : LoongArchPseudo<(outs), (ins), [(LoongArchERet)]>; ++} ++ ++let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { ++def ADJCALLSTACKDOWN : LoongArchPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), ++ [(callseq_start timm:$amt1, timm:$amt2)]>; ++def ADJCALLSTACKUP : LoongArchPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), ++ [(callseq_end timm:$amt1, timm:$amt2)]>; ++} ++ ++class LoongArchPat : Pat, PredicateControl; ++ ++def SDT_LoongArchJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>; ++ ++def LoongArchJmpLink : SDNode<"LoongArchISD::JmpLink",SDT_LoongArchJmpLink, ++ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, ++ SDNPVariadic]>; ++ ++def LoongArchTailCall : SDNode<"LoongArchISD::TailCall", SDT_LoongArchJmpLink, ++ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; ++ ++class GPR_32 { list GPRPredicates = [IsGP32bit]; } ++class GPR_64 { list GPRPredicates = [IsGP64bit]; } ++ ++//===---------------------------------------------------------------------===/ ++// Instruction Class Templates ++//===---------------------------------------------------------------------===/ ++///R2 ++class Int_Reg2 ++ : InstForm<(outs RO:$rd), (ins RO:$rj), ++ !strconcat(opstr, "\t$rd, $rj"), ++ [(set RO:$rd, (OpNode RO:$rj))], ++ FrmR, opstr>; ++ ++class Int_Reg2_Iocsrrd ++ : InstForm<(outs RD:$rd), (ins RS:$rj), ++ !strconcat(opstr, "\t$rd, $rj"), ++ [(set RD:$rd, (OpNode RS:$rj))], ++ FrmR, opstr>; ++ ++class Int_Reg2_Rdtime ++ : InstForm<(outs RO:$rd, RO:$rj), (ins), ++ !strconcat(opstr, "\t$rd, $rj"), ++ [(set (OpNode RO:$rd, RO:$rj))], ++ FrmR, opstr>; ++ ++class Int_Reg2_Iocsrwr ++ : InstForm<(outs), (ins RD:$rd, RS:$rj), ++ !strconcat(opstr, "\t$rd, $rj"), ++ [(set (OpNode RD:$rd, RS:$rj))], ++ FrmR, opstr>; ++ ++class Float_Reg2 ++ : InstForm<(outs RO:$fd), (ins RO:$fj), ++ !strconcat(opstr, "\t$fd, $fj"), ++ [(set RO:$fd, (OpNode RO:$fj))], ++ FrmFR, opstr>; ++ ++class Count1 ++ : InstForm<(outs RO:$rd), (ins RO:$rj), ++ !strconcat(opstr, "\t$rd, $rj"), ++ [(set RO:$rd, (OpNode (not RO:$rj)))], ++ FrmR, opstr>; ++ ++class SignExtInReg ++ : InstForm<(outs RO:$rd), (ins RO:$rj), !strconcat(opstr, "\t$rd, $rj"), ++ [(set RO:$rd, (sext_inreg RO:$rj, vt))], FrmR, opstr>; ++ ++///R3 ++class Int_Reg3 ++ : InstForm<(outs RO:$rd), (ins RO:$rj, RO:$rk), ++ !strconcat(opstr, "\t$rd, $rj, $rk"), ++ [(set RO:$rd, (OpNode RO:$rj, RO:$rk))], ++ FrmR, opstr>; ++ ++class Int_Reg3_Crc ++ : InstForm<(outs RS:$rd), (ins RD:$rj, RS:$rk), ++ !strconcat(opstr, "\t$rd, $rj, $rk"), ++ [(set RS:$rd, (OpNode RD:$rj, RS:$rk))], ++ FrmR, opstr>; ++ ++class SetCC_R ++ : InstForm<(outs GPR32Opnd:$rd), (ins RO:$rj, RO:$rk), ++ !strconcat(opstr, "\t$rd, $rj, $rk"), ++ [(set GPR32Opnd:$rd, (OpNode RO:$rj, RO:$rk))], ++ FrmR, opstr>; ++ ++class SetCC_I ++ : InstForm<(outs GPR32Opnd:$rd), (ins RO:$rj, ImmOpnd:$imm12), ++ !strconcat(opstr, "\t$rd, $rj, $imm12"), ++ [(set GPR32Opnd:$rd, (OpNode RO:$rj, ImmOpnd:$imm12))], ++ FrmR, opstr>; ++ ++class ATOMIC ++ : InstForm<(outs RD:$rd), (ins RD:$rk, MO:$addr), ++ !strconcat(opstr, "\t$rd, $rk, $addr"), ++ [(set RD:$rd, (OpNode RD:$rk, Addr:$addr))], ++ FrmR, opstr> { ++ let DecoderMethod = "DecodeMem"; ++ let canFoldAsLoad = 1; ++ string BaseOpcode = opstr; ++ let mayLoad = 1; ++ let mayStore = 1; ++ let Constraints = "@earlyclobber $rd"; ++} ++ ++class Nor ++ : InstForm<(outs RO:$rd), (ins RO:$rj, RO:$rk), ++ !strconcat(opstr, "\t$rd, $rj, $rk"), ++ [(set RO:$rd, (not (or RO:$rj, RO:$rk)))], ++ FrmR, opstr>; ++ ++class Shift_Var ++ : InstForm<(outs RO:$rd), (ins RO:$rj, GPR32Opnd:$rk), ++ !strconcat(opstr, "\t$rd, $rj, $rk"), ++ [(set RO:$rd, (OpNode RO:$rj, GPR32Opnd:$rk))], ++ FrmR, opstr>; ++ ++class Float_Reg3 ++ : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk), ++ !strconcat(opstr, "\t$fd, $fj, $fk"), ++ [(set RO:$fd, (OpNode RO:$fj, RO:$fk))], ++ FrmR, opstr>; ++ ++class Float_Reg3_MA ++ : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk), ++ !strconcat(opstr, "\t$fd, $fj, $fk"), ++ [(set RO:$fd, (OpNode (fabs RO:$fj), (fabs RO:$fk)))], ++ FrmR, opstr>; ++ ++class Float_Int_Reg3 ++ : InstForm<(outs RD:$fd), (ins RS:$rj, RS:$rk), ++ !strconcat(opstr, "\t$fd, $rj, $rk"), ++ [(set RS:$fd, (OpNode RS:$rj, RS:$rk))], ++ FrmR, opstr>; ++ ++///R4 ++class Mul_Reg4 ++ : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk, RO:$fa), ++ !strconcat(opstr, "\t$fd, $fj, $fk, $fa"), ++ [], ++ FrmFR, opstr>; ++ ++class NMul_Reg4 ++ : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk, RO:$fa), ++ !strconcat(opstr, "\t$fd, $fj, $fk, $fa"), ++ [], ++ FrmFR, opstr>; ++ ++///R2_IMM5 ++class Shift_Imm32 ++ : InstForm<(outs RO:$rd), (ins RO:$rj, uimm5:$imm5), ++ !strconcat(opstr, "\t$rd, $rj, $imm5"), ++ [(set RO:$rd, (OpNode RO:$rj, uimm5:$imm5))], ++ FrmR, opstr>; ++ ++///R2_IMM6 ++class Shift_Imm64 ++ : InstForm<(outs RO:$rd), (ins RO:$rj, uimm6:$imm6), ++ !strconcat(opstr, "\t$rd, $rj, $imm6"), ++ [(set RO:$rd, (OpNode RO:$rj, uimm6:$imm6))], ++ FrmR, opstr>; ++ ++///LOAD_STORE ++class FLd ++ : InstForm<(outs RD:$rd), (ins MO:$addr), ++ !strconcat(opstr, "\t$rd, $addr"), ++ [(set RD:$rd, (OpNode addrDefault:$addr))], ++ FrmR, opstr> { ++ let DecoderMethod = "DecodeFMem"; ++ let mayLoad = 1; ++} ++ ++class Ld ++ : InstForm<(outs RD:$rd), (ins MO:$addr), ++ !strconcat(opstr, "\t$rd, $addr"), ++ [(set RD:$rd, (OpNode Addr:$addr))], ++ FrmR, opstr> { ++ let DecoderMethod = "DecodeMem"; ++ let canFoldAsLoad = 1; ++ string BaseOpcode = opstr; ++ let mayLoad = 1; ++} ++ ++class FSt ++ : InstForm<(outs), (ins RD:$rd, MO:$addr), ++ !strconcat(opstr, "\t$rd, $addr"), ++ [(OpNode RD:$rd, addrDefault:$addr)], ++ FrmR, opstr> { ++ let DecoderMethod = "DecodeFMem"; ++ let mayStore = 1; ++} ++ ++class St ++ : InstForm<(outs), (ins RS:$rd, MO:$addr), ++ !strconcat(opstr, "\t$rd, $addr"), ++ [(OpNode RS:$rd, addr:$addr)], ++ FrmR, opstr> { ++ let DecoderMethod = "DecodeMem"; ++ string BaseOpcode = opstr; ++ let mayStore = 1; ++} ++ ++/// R2_IMM12 ++class Int_Reg2_Imm12 ++ : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$imm12), ++ !strconcat(opstr, "\t$rd, $rj, $imm12"), ++ [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$imm12))], ++ FrmR, opstr>; ++class RELOC_rrii ++ : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$imm12, ImmOpnd:$i12), ++ !strconcat(opstr, "\t$rd, $rj, $imm12"), ++ [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$imm12, ImmOpnd:$i12))], ++ FrmR, opstr>; ++ ++///R2_IMM14 ++class LdPtr ++ : InstForm<(outs RO:$rd), (ins mem_simm14_lsl2:$addr), ++ !strconcat(opstr, "\t$rd, $addr"), ++ [], FrmI, opstr>{ ++ let DecoderMethod = "DecodeMemSimm14"; ++ let canFoldAsLoad = 1; ++ string BaseOpcode = opstr; ++ let mayLoad = 1; ++} ++ ++class StPtr ++ : InstForm<(outs), (ins RO:$rd, mem_simm14_lsl2:$addr), ++ !strconcat(opstr, "\t$rd, $addr"), ++ [], FrmI, opstr> { ++ let DecoderMethod = "DecodeMemSimm14"; ++ string BaseOpcode = opstr; ++ let mayStore = 1; ++} ++ ++///R2_IMM16 ++class FJirl ++ : InstForm<(outs RO:$rd), (ins RO:$rj, opnd:$offs16), ++ !strconcat(opstr, "\t$rd, $rj, $offs16"), ++ [], FrmJ, opstr>; ++ ++class Beq ++ : InstForm<(outs), (ins RO:$rj, RO:$rd, opnd:$offs16), ++ !strconcat(opstr, "\t$rj, $rd, $offs16"), ++ [(brcond (i32 (cond_op RO:$rj, RO:$rd)), bb:$offs16)], ++ FrmI, opstr> { ++ let isBranch = 1; ++ let isTerminator = 1; ++ bit isCTI = 1; ++} ++ ++///R1_IMM21 ++class Beqz ++ : InstForm<(outs), (ins RO:$rj, opnd:$offs21), ++ !strconcat(opstr, "\t$rj, $offs21"), ++ [(brcond (i32 (cond_op RO:$rj, 0)), bb:$offs21)], ++ FrmI, opstr> { ++ let isBranch = 1; ++ let isTerminator = 1; ++ bit isCTI = 1; ++} ++ ++///IMM26 ++class JumpFB : ++ InstForm<(outs), (ins opnd:$offset26), !strconcat(opstr, "\t$offset26"), ++ [(operator targetoperator:$offset26)], FrmJ, opstr> { ++ let isBranch = 1; ++ let isTerminator=1; ++ let isBarrier=1; ++ let DecoderMethod = "DecodeJumpTarget"; ++ bit isCTI = 1; ++} ++ ++/// R3_SA ++class Reg3_Sa ++ : InstForm<(outs RO:$rd), (ins RO:$rj, RO:$rk, ImmOpnd:$sa), ++ !strconcat(opstr, "\t$rd, $rj, $rk, $sa"), ++ [(set RO:$rd, (OpNode RO:$rj, RO:$rk, ImmOpnd:$sa))], ++ FrmR, opstr>; ++ ++class Reg3_SaU ++ : InstForm<(outs RD:$rd), (ins RS:$rj, RS:$rk, ImmOpnd:$sa), ++ !strconcat(opstr, "\t$rd, $rj, $rk, $sa"), ++ [(set RD:$rd, (OpNode RS:$rj, RS:$rk, ImmOpnd:$sa))], ++ FrmR, opstr>; ++ ++/// Assert ++class Assert ++ : InstForm<(outs), (ins RO:$rj, RO:$rk), ++ !strconcat(opstr, "\t$rj, $rk"), ++ [(set (OpNode RO:$rj, RO:$rk))], ++ FrmR, opstr>; ++ ++class Code15 ++ : InstForm<(outs), (ins uimm15:$Code), ++ !strconcat(opstr, "\t$Code"), ++ [(set (OpNode uimm15:$Code))], ++ FrmOther, opstr>; ++ ++class TrapBase ++ : LoongArchPseudo<(outs), (ins), [(trap)]>, ++ PseudoInstExpansion<(RealInst 0)> { ++ let isBarrier = 1; ++ let isTerminator = 1; ++ let isCodeGenOnly = 1; ++ let isCTI = 1; ++} ++ ++class CSR ++ : InstForm<(outs RO:$rd), (ins ImmOpnd:$csr), ++ !strconcat(opstr, "\t$rd, $csr"), ++ [(set RO:$rd, (OpNode ImmOpnd:$csr))], ++ FrmOther, opstr>; ++ ++class CSRW ++ : InstForm<(outs RO:$dst), (ins RO:$rd, ImmOpnd:$csr), ++ !strconcat(opstr, "\t$rd, $csr"), ++ [(set RO:$dst, (OpNode RO:$rd, ImmOpnd:$csr))], ++ FrmOther, opstr>{ ++ let Constraints = "$rd = $dst"; ++} ++ ++class CSRX ++ : InstForm<(outs RO:$dst), (ins RO:$rd, RO:$rj, ImmOpnd:$csr), ++ !strconcat(opstr, "\t$rd, $rj, $csr"), ++ [(set RO:$dst, (OpNode RO:$rd, RO:$rj, ImmOpnd:$csr))], ++ FrmOther, opstr>{ ++ let Constraints = "$rd = $dst"; ++} ++ ++class CAC ++ : InstForm<(outs), (ins uimm5:$op, RO:$rj, ImmOpnd:$si12), ++ !strconcat(opstr, "\t$op, $rj, $si12"), ++ [(set (OpNode uimm5:$op, RO:$rj, ImmOpnd:$si12))], ++ FrmOther, opstr>; ++ ++class LEVEL ++ : InstForm<(outs RO:$rd), (ins RO:$rj, uimm8_64:$level), ++ !strconcat(opstr, "\t$rd, $rj, $level"), ++ [(set RO:$rd, (OpNode RO:$rj, uimm8_64:$level))], ++ FrmOther, opstr>; ++ ++class SEQ ++ : InstForm<(outs), (ins RO:$rj, uimm8_64:$seq), ++ !strconcat(opstr, "\t$rj, $seq"), ++ [(set (OpNode RO:$rj, uimm8_64:$seq))], ++ FrmOther, opstr>; ++ ++class Wait ++ : InstForm<(outs), (ins uimm15:$hint), ++ !strconcat(opstr, "\t$hint"), ++ [(set (OpNode uimm15:$hint))], ++ FrmOther, opstr>; ++ ++class Invtlb ++ : InstForm<(outs), (ins uimm5:$op, RO:$rj, RO:$rk), ++ !strconcat(opstr, "\t$op, $rj, $rk"), ++ [(set (OpNode uimm5:$op, RO:$rj, RO:$rk))], ++ FrmOther, opstr>; ++ ++class OP32 ++ : InstForm<(outs), (ins), ++ !strconcat(opstr, ""), ++ [(set (OpNode))], ++ FrmOther, opstr>; ++ ++class Bar ++ : InstForm<(outs), (ins uimm15:$hint), ++ !strconcat(opstr, "\t$hint"), ++ [(set (OpNode uimm15:$hint))], ++ FrmOther, opstr>; ++ ++//class CA op, string opstr> ++// : R3_CA; ++ ++class SI16_R2 ++ : InstForm<(outs RO:$rd), (ins RO:$rj, simm16:$si16), ++ !strconcat(opstr, "\t$rd, $rj, $si16"), ++ [(set RO:$rd, (OpNode RO:$rj, simm16:$si16))], ++ FrmR, opstr>; ++ ++class SI20 ++ : InstForm<(outs RO:$rd), (ins ImmOpnd:$si20), ++ !strconcat(opstr, "\t$rd, $si20"), ++ [(set RO:$rd, (OpNode ImmOpnd:$si20))], ++ FrmR, opstr>; ++let isCodeGenOnly = 1, Constraints = "$dst = $rd" in ++class SI20_R2 ++ : InstForm<(outs RO:$dst), (ins RO:$rd, ImmOpnd:$si20), ++ !strconcat(opstr, "\t$rd, $si20"), ++ [(set RO:$dst, (OpNode RO:$rd, ImmOpnd:$si20))], ++ FrmR, opstr>; ++class RELOC_rii ++ : InstForm<(outs RO:$rd), (ins ImmOpnd:$si20, ImmOpnd:$i20), ++ !strconcat(opstr, "\t$rd, $si20"), ++ [(set RO:$rd, (OpNode ImmOpnd:$si20, ImmOpnd:$i20))], ++ FrmR, opstr>; ++ ++// preld ++class Preld ++ : InstForm<(outs), (ins RO:$rj, MemOpnd:$addr, uimm5:$hint), ++ !strconcat(opstr, "\t$hint, $rj, $addr"), ++ [(set (OpNode RO:$rj, MemOpnd:$addr, uimm5:$hint))], ++ FrmR, opstr>; ++class Preld_Raw ++ : InstForm<(outs), (ins RO:$rj, simm12:$imm12, uimm5:$hint), ++ !strconcat(opstr, "\t$hint, $rj, $imm12"), ++ [], ++ FrmR, opstr>; ++class IsCall { ++ bit isCall = 1; ++ bit isCTI = 1; ++} ++ ++class EffectiveAddress ++ : InstForm<(outs RO:$rd), (ins mem_ea:$addr), ++ !strconcat(opstr, "\t$rd, $addr"), ++ [(set RO:$rd, addr:$addr)], FrmI, ++ !strconcat(opstr, "_lea")> { ++ let isCodeGenOnly = 1; ++ let hasNoSchedulingInfo = 1; ++ let DecoderMethod = "DecodeMem"; ++} ++ ++def PtrRC : Operand { ++ let MIOperandInfo = (ops ptr_rc); ++ let DecoderMethod = "DecodePtrRegisterClass"; ++ let ParserMatchClass = GPR32AsmOperand; ++} ++ ++class Atomic2Ops : ++ LoongArchPseudo<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$incr), ++ [(set DRC:$dst, (Op iPTR:$ptr, DRC:$incr))]>; ++ ++class Atomic2OpsPostRA : ++ LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$incr), []> { ++ let mayLoad = 1; ++ let mayStore = 1; ++} ++ ++class Atomic2OpsSubwordPostRA : ++ LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$incr, RC:$mask, RC:$mask2, ++ RC:$shiftamnt), []>; ++class AtomicCmpSwap : ++ LoongArchPseudo<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap), ++ [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>; ++ ++class AtomicCmpSwapPostRA : ++ LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$cmp, RC:$swap), []> { ++ let mayLoad = 1; ++ let mayStore = 1; ++} ++ ++class AtomicCmpSwapSubwordPostRA : ++ LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$mask, RC:$ShiftCmpVal, ++ RC:$mask2, RC:$ShiftNewVal, RC:$ShiftAmt), []> { ++ let mayLoad = 1; ++ let mayStore = 1; ++} ++ ++class LoongArchInstAlias : ++ InstAlias, PredicateControl; ++ ++//===---------------------------------------------------------------------===/ ++// Instruction Definitions. ++//===---------------------------------------------------------------------===/ ++/// ++/// R2 ++/// ++ ++def CLO_D : Count1<"clo.d", GPR64Opnd, ctlz>, R2I<0b01000>; ++def CLZ_D : Int_Reg2<"clz.d", GPR64Opnd, ctlz>, R2I<0b01001>; ++def CTO_D : Count1<"cto.d", GPR64Opnd, cttz>, R2I<0b01010>; ++def CTZ_D : Int_Reg2<"ctz.d", GPR64Opnd, cttz>, R2I<0b01011>; ++ ++def REVB_4H : Int_Reg2<"revb.4h", GPR64Opnd>, R2I<0b01101>; //[] ++def REVB_2W : Int_Reg2<"revb.2w", GPR64Opnd>, R2I<0b01110>; ++def REVB_D : Int_Reg2<"revb.d", GPR64Opnd>, R2I<0b01111>; ++def REVH_2W : Int_Reg2<"revh.2w", GPR64Opnd>, R2I<0b10000>; ++def REVH_D : Int_Reg2<"revh.d", GPR64Opnd>, R2I<0b10001>; //[] ++ ++def BITREV_8B : Int_Reg2<"bitrev.8b", GPR64Opnd>, R2I<0b10011>; //[] ++def BITREV_D : Int_Reg2<"bitrev.d", GPR64Opnd, bitreverse>, R2I<0b10101>; ++ ++def EXT_W_H : SignExtInReg<"ext.w.h", GPR64Opnd, i16>, R2I<0b10110>; ++def EXT_W_B : SignExtInReg<"ext.w.b", GPR64Opnd, i8>, R2I<0b10111>; ++ ++def RDTIME_D : Int_Reg2_Rdtime<"rdtime.d", GPR64Opnd>, R2I<0b11010>; ++def RDTIMEL_W : Int_Reg2_Rdtime<"rdtimel.w", GPR64Opnd>, R2I<0b11000>; ++def RDTIMEH_W : Int_Reg2_Rdtime<"rdtimeh.w", GPR64Opnd>, R2I<0b11001>; ++/// ++/// R3 ++/// ++def ADD_D : Int_Reg3<"add.d", GPR64Opnd, add>, R3I<0b0100001>; ++def SUB_D : Int_Reg3<"sub.d", GPR64Opnd, sub>, R3I<0b0100011>; ++ ++def SLT : SetCC_R<"slt", GPR64Opnd, setlt>, R3I<0b0100100>; ++def SLTU : SetCC_R<"sltu", GPR64Opnd, setult>, R3I<0b0100101>; ++def MASKEQZ : Int_Reg3<"maskeqz", GPR64Opnd>, R3I<0b0100110>; //[] ++def MASKNEZ : Int_Reg3<"masknez", GPR64Opnd>, R3I<0b0100111>; //[] ++ ++def NOR : Nor<"nor", GPR64Opnd>, R3I<0b0101000>; ++def AND : Int_Reg3<"and", GPR64Opnd, and>, R3I<0b0101001>; ++def OR : Int_Reg3<"or", GPR64Opnd, or>, R3I<0b0101010>; ++def XOR : Int_Reg3<"xor", GPR64Opnd, xor>, R3I<0b0101011>; ++def ORN : Int_Reg3<"orn", GPR64Opnd>, R3I<0b0101100>; ++def ANDN : Int_Reg3<"andn", GPR64Opnd>, R3I<0b0101101>; ++ ++def SLL_D : Shift_Var<"sll.d", GPR64Opnd, shl>, R3I<0b0110001>; ++def SRL_D : Shift_Var<"srl.d", GPR64Opnd, srl>, R3I<0b0110010>; ++def SRA_D : Shift_Var<"sra.d", GPR64Opnd, sra>, R3I<0b0110011>; ++def ROTR_D: Shift_Var<"rotr.d", GPR64Opnd, rotr>, R3I<0b0110111>; ++ ++def MUL_D : Int_Reg3<"mul.d", GPR64Opnd, mul>, R3I<0b0111011>; ++def MULH_D : Int_Reg3<"mulh.d", GPR64Opnd, mulhs>, R3I<0b0111100>; ++def MULH_DU : Int_Reg3<"mulh.du", GPR64Opnd, mulhu>, R3I<0b0111101>; ++def MULW_D_W : Int_Reg3<"mulw.d.w", GPR64Opnd>, R3I<0b0111110>; ++def MULW_D_WU : Int_Reg3<"mulw.d.wu", GPR64Opnd>, R3I<0b0111111>; ++ ++let usesCustomInserter = 1 in { ++def DIV_D : Int_Reg3<"div.d", GPR64Opnd, sdiv>, R3I<0b1000100>; ++def MOD_D : Int_Reg3<"mod.d", GPR64Opnd, srem>, R3I<0b1000101>; ++def DIV_DU : Int_Reg3<"div.du", GPR64Opnd, udiv>, R3I<0b1000110>; ++def MOD_DU : Int_Reg3<"mod.du", GPR64Opnd, urem>, R3I<0b1000111>; ++} ++ ++def CRC_W_D_W : Int_Reg3_Crc<"crc.w.d.w", GPR64Opnd, GPR32Opnd, int_loongarch_crc_w_d_w>, R3I<0b1001011>; ++def CRCC_W_D_W : Int_Reg3_Crc<"crcc.w.d.w", GPR64Opnd, GPR32Opnd, int_loongarch_crcc_w_d_w>, R3I<0b1001111>; ++/// ++/// SLLI ++/// ++def SLLI_D : Shift_Imm64<"slli.d", GPR64Opnd, shl>, R2_IMM6<0b00>; ++def SRLI_D : Shift_Imm64<"srli.d", GPR64Opnd, srl>, R2_IMM6<0b01>; ++def SRAI_D : Shift_Imm64<"srai.d", GPR64Opnd, sra>, R2_IMM6<0b10>; ++def ROTRI_D : Shift_Imm64<"rotri.d", GPR64Opnd, rotr>, R2_IMM6<0b11>; ++/// ++/// Misc ++/// ++def ALSL_WU : Reg3_SaU<"alsl.wu", GPR64Opnd, GPR32Opnd, uimm2_plus1>, R3_SA2<0b00011> { ++ let Pattern = [(set GPR64Opnd:$rd, ++ (i64 (zext (add GPR32Opnd:$rk, (shl GPR32Opnd:$rj, immZExt2Alsl:$sa)))))]; ++} ++ ++def ALSL_D : Reg3_Sa<"alsl.d", GPR64Opnd, uimm2_plus1>, R3_SA2<0b10110> { ++ let Pattern = [(set GPR64Opnd:$rd, ++ (add GPR64Opnd:$rk, (shl GPR64Opnd:$rj, immZExt2Alsl:$sa)))]; ++} ++def BYTEPICK_D : Reg3_Sa<"bytepick.d", GPR64Opnd, uimm3>, R3_SA3; //[] ++ ++def ASRTLE_D : Assert<"asrtle.d", GPR64Opnd, int_loongarch_asrtle_d>, ASSERT<0b10>; ++def ASRTGT_D : Assert<"asrtgt.d", GPR64Opnd, int_loongarch_asrtgt_d>, ASSERT<0b11>; ++ ++def DBCL : Code15<"dbcl">, CODE15<0b1010101>; ++def HYPCALL : Code15<"hypcall">, CODE15<0b1010111>; ++ ++/// ++/// R2_IMM12 ++/// ++def SLTI : SetCC_I<"slti", GPR64Opnd, simm12, setlt>, R2_IMM12<0b000>; ++def SLTUI : SetCC_I<"sltui", GPR64Opnd, simm12, setult>, R2_IMM12<0b001>; ++def ADDI_W64 : Int_Reg2_Imm12<"addi.w", GPR64Opnd, simm12>, R2_IMM12<0b010>; ++def ADDI_D : Int_Reg2_Imm12<"addi.d", GPR64Opnd, simm12, add>, R2_IMM12<0b011>; ++def LU52I_D : Int_Reg2_Imm12<"lu52i.d", GPR64Opnd, simm12>, R2_IMM12<0b100>; ++def ANDI : Int_Reg2_Imm12<"andi", GPR64Opnd, uimm12, and>, R2_IMM12<0b101>; ++def ORI : Int_Reg2_Imm12<"ori", GPR64Opnd, uimm12, or>, R2_IMM12<0b110>; ++def XORI : Int_Reg2_Imm12<"xori", GPR64Opnd, uimm12, xor>, R2_IMM12<0b111>; ++ ++/// ++/// Privilege Instructions ++/// ++def CSRRD : CSR<"csrrd", GPR64Opnd, uimm14, int_loongarch_dcsrrd>, R1_CSR<0b0000000000100>; ++def CSRWR : CSRW<"csrwr", GPR64Opnd, uimm14, int_loongarch_dcsrwr>, R1_CSR<0b0000100000100>; ++def CSRXCHG : CSRX<"csrxchg", GPR64Opnd, uimm14, int_loongarch_dcsrxchg>, R2_CSR<0b00000100>; ++def IOCSRRD_D : Int_Reg2_Iocsrrd<"iocsrrd.d", GPR64Opnd, GPR32Opnd, int_loongarch_iocsrrd_d>, R2P<0b011>; ++def IOCSRWR_D : Int_Reg2_Iocsrwr<"iocsrwr.d", GPR64Opnd, GPR32Opnd, int_loongarch_iocsrwr_d>, R2P<0b111>; ++def CACOP : CAC<"cacop", GPR64Opnd, simm12, int_loongarch_dcacop>, R1_CACHE; ++def LDDIR : LEVEL<"lddir", GPR64Opnd>, R2_LEVEL<0b00000110010000>; ++def LDPTE : SEQ<"ldpte", GPR64Opnd>, R1_SEQ<0b00000110010001>; ++ ++def IDLE : Wait<"idle">, WAIT_FM; ++def INVTLB : Invtlb<"invtlb", GPR64Opnd>, R2_INVTLB; ++// ++def IOCSRRD_B : Int_Reg2<"iocsrrd.b", GPR64Opnd>, R2P<0b000>; ++def IOCSRRD_H : Int_Reg2<"iocsrrd.h", GPR64Opnd>, R2P<0b001>; ++def IOCSRRD_W : Int_Reg2<"iocsrrd.w", GPR64Opnd>, R2P<0b010>; ++// ++def TLBCLR : OP32<"tlbclr", int_loongarch_tlbclr>, IMM32<0b001000>; ++def TLBFLUSH : OP32<"tlbflush", int_loongarch_tlbflush>, IMM32<0b001001>; ++def TLBSRCH : OP32<"tlbsrch", int_loongarch_tlbsrch>, IMM32<0b001010>; ++def TLBRD : OP32<"tlbrd", int_loongarch_tlbrd>, IMM32<0b001011>; ++def TLBWR : OP32<"tlbwr", int_loongarch_tlbwr>, IMM32<0b001100>; ++def TLBFILL : OP32<"tlbfill", int_loongarch_tlbfill>, IMM32<0b001101>; ++def ERTN : OP32<"ertn">, IMM32<0b001110>; ++ ++/// ++/// R1_IMM20 ++/// ++def ADDU16I_D : SI16_R2<"addu16i.d", GPR64Opnd>, R2_SI16<0b000100>; ++def LU12I_W : SI20<"lu12i.w", GPR64Opnd, simm20>, R1_SI20<0b0001010>; ++def LU32I_D : SI20<"lu32i.d", GPR64Opnd, simm20>, R1_SI20<0b0001011>; ++def LU32I_D_R2 : SI20_R2<"lu32i.d", GPR64Opnd, simm20>, R1_SI20<0b0001011>; ++def PCADDI : SI20<"pcaddi", GPR64Opnd, simm20>, R1_SI20<0b0001100>; ++def PCALAU12I : SI20<"pcalau12i", GPR64Opnd, simm20>, R1_SI20<0b0001101>; ++def PCADDU12I : SI20<"pcaddu12i", GPR64Opnd, simm20>, R1_SI20<0b0001110>; ++def PCADDU18I : SI20<"pcaddu18i", GPR64Opnd, simm20>, R1_SI20<0b0001111>; ++ ++ ++def BEQZ : Beqz<"beqz", brtarget, seteq, GPR64Opnd>, R1_IMM21BEQZ<0b010000>; ++def BNEZ : Beqz<"bnez", brtarget, setne, GPR64Opnd>, R1_IMM21BEQZ<0b010001>; ++ ++def JIRL : FJirl<"jirl", simm16, GPR64Opnd>, R2_IMM16JIRL; ++let isCall = 1, isCTI=1, isCodeGenOnly = 1 in { ++def JIRL_CALL : FJirl<"jirl", simm16, GPR64Opnd>, R2_IMM16JIRL; ++} ++ ++def B : JumpFB, IMM26B<0b010100>; ++ ++def BEQ : Beq<"beq", brtarget, seteq, GPR64Opnd>, R2_IMM16BEQ<0b010110>; ++def BNE : Beq<"bne", brtarget, setne, GPR64Opnd>, R2_IMM16BEQ<0b010111>; ++def BLT : Beq<"blt", brtarget, setlt, GPR64Opnd>, R2_IMM16BEQ<0b011000>; ++def BGE : Beq<"bge", brtarget, setge, GPR64Opnd>, R2_IMM16BEQ<0b011001>; ++def BLTU : Beq<"bltu", brtarget, setult, GPR64Opnd>, R2_IMM16BEQ<0b011010>; ++def BGEU : Beq<"bgeu", brtarget, setuge, GPR64Opnd>, R2_IMM16BEQ<0b011011>; ++ ++/// ++/// Mem access ++/// ++class LLBase : ++ InstForm<(outs RO:$rd), (ins MO:$addr), !strconcat(opstr, "\t$rd, $addr"), ++ [], FrmI, opstr> { ++ let DecoderMethod = "DecodeMemSimm14"; ++ let mayLoad = 1; ++} ++ ++class SCBase : ++ InstForm<(outs RO:$dst), (ins RO:$rd, MO:$addr), ++ !strconcat(opstr, "\t$rd, $addr"), [], FrmI> { ++ let DecoderMethod = "DecodeMemSimm14"; ++ let mayStore = 1; ++ let Constraints = "$rd = $dst"; ++} ++ ++class STGT_LE : ++ InstForm<(outs), (ins RO:$rd, RO:$rj, RO:$rk), ++ !strconcat(opstr, "\t$rd, $rj, $rk"), ++ [], FrmI, opstr>; ++ ++class Float_STGT_LE ++ : InstForm<(outs), (ins RD:$fd, RS:$rj, RS:$rk), ++ !strconcat(opstr, "\t$fd, $rj, $rk"), ++ [], FrmR, opstr>; ++ ++def LL_D : LLBase<"ll.d", GPR64Opnd, mem_simm14_lsl2>, LL_SC<0b010>; ++def SC_D : SCBase<"sc.d", GPR64Opnd, mem_simm14_lsl2>, LL_SC<0b011>; ++ ++def LDPTR_W : LdPtr<"ldptr.w", GPR64Opnd>, LL_SC<0b100>; ++def STPTR_W : StPtr<"stptr.w", GPR64Opnd>, LL_SC<0b101>; ++def LDPTR_D : LdPtr<"ldptr.d", GPR64Opnd>, LL_SC<0b110>; ++def STPTR_D : StPtr<"stptr.d", GPR64Opnd>, LL_SC<0b111>; ++ ++def LD_B : Ld<"ld.b", GPR64Opnd, mem, sextloadi8>, LOAD_STORE<0b0000>; ++def LD_H : Ld<"ld.h", GPR64Opnd, mem, sextloadi16>, LOAD_STORE<0b0001>; ++def LD_W : Ld<"ld.w", GPR64Opnd, mem, sextloadi32>, LOAD_STORE<0b0010>; ++def LD_D : Ld<"ld.d", GPR64Opnd, mem_simmptr, load>, LOAD_STORE<0b0011>; ++def ST_B : St<"st.b", GPR64Opnd, mem, truncstorei8>, LOAD_STORE<0b0100>; ++def ST_H : St<"st.h", GPR64Opnd, mem, truncstorei16>, LOAD_STORE<0b0101>; ++def ST_W : St<"st.w", GPR64Opnd, mem, truncstorei32>, LOAD_STORE<0b0110>; ++def ST_D : St<"st.d", GPR64Opnd, mem_simmptr, store>, LOAD_STORE<0b0111>; ++def LD_BU : Ld<"ld.bu", GPR64Opnd, mem, zextloadi8>, LOAD_STORE<0b1000>; ++def LD_HU : Ld<"ld.hu", GPR64Opnd, mem, zextloadi16>, LOAD_STORE<0b1001>; ++def LD_WU : Ld<"ld.wu", GPR64Opnd, mem, zextloadi32>, LOAD_STORE<0b1010>; ++ ++def AMSWAP_W : ATOMIC<"amswap.w", GPR32Opnd, mem>, AM<0b000000>; ++def AMSWAP_D : ATOMIC<"amswap.d", GPR64Opnd, mem>, AM<0b000001>; ++def AMADD_W : ATOMIC<"amadd.w", GPR32Opnd, mem>, AM<0b000010>; ++def AMADD_D : ATOMIC<"amadd.d", GPR64Opnd, mem>, AM<0b000011>; ++def AMAND_W : ATOMIC<"amand.w", GPR32Opnd, mem>, AM<0b000100>; ++def AMAND_D : ATOMIC<"amand.d", GPR64Opnd, mem>, AM<0b000101>; ++def AMOR_W : ATOMIC<"amor.w", GPR32Opnd, mem>, AM<0b000110>; ++def AMOR_D : ATOMIC<"amor.d", GPR64Opnd, mem>, AM<0b000111>; ++def AMXOR_W : ATOMIC<"amxor.w", GPR32Opnd, mem>, AM<0b001000>; ++def AMXOR_D : ATOMIC<"amxor.d", GPR64Opnd, mem>, AM<0b001001>; ++def AMMAX_W : ATOMIC<"ammax.w", GPR32Opnd, mem>, AM<0b001010>; ++def AMMAX_D : ATOMIC<"ammax.d", GPR64Opnd, mem>, AM<0b001011>; ++def AMMIN_W : ATOMIC<"ammin.w", GPR32Opnd, mem>, AM<0b001100>; ++def AMMIN_D : ATOMIC<"ammin.d", GPR64Opnd, mem>, AM<0b001101>; ++def AMMAX_WU : ATOMIC<"ammax.wu", GPR32Opnd, mem>, AM<0b001110>; ++def AMMAX_DU : ATOMIC<"ammax.du", GPR64Opnd, mem>, AM<0b001111>; ++def AMMIN_WU : ATOMIC<"ammin.wu", GPR32Opnd, mem>, AM<0b010000>; ++def AMMIN_DU : ATOMIC<"ammin.du", GPR64Opnd, mem>, AM<0b010001>; ++ ++ ++def AMSWAP_DB_W : ATOMIC<"amswap_db.w", GPR32Opnd, mem>, AM<0b010010>; ++def AMSWAP_DB_D : ATOMIC<"amswap_db.d", GPR64Opnd, mem>, AM<0b010011>; ++def AMADD_DB_W : ATOMIC<"amadd_db.w", GPR32Opnd, mem>, AM<0b010100>; ++def AMADD_DB_D : ATOMIC<"amadd_db.d", GPR64Opnd, mem>, AM<0b010101>; ++def AMAND_DB_W : ATOMIC<"amand_db.w", GPR32Opnd, mem>, AM<0b010110>; ++def AMAND_DB_D : ATOMIC<"amand_db.d", GPR64Opnd, mem>, AM<0b010111>; ++def AMOR_DB_W : ATOMIC<"amor_db.w", GPR32Opnd, mem>, AM<0b011000>; ++def AMOR_DB_D : ATOMIC<"amor_db.d", GPR64Opnd, mem>, AM<0b011001>; ++def AMXOR_DB_W : ATOMIC<"amxor_db.w", GPR32Opnd, mem>, AM<0b011010>; ++def AMXOR_DB_D : ATOMIC<"amxor_db.d", GPR64Opnd, mem>, AM<0b011011>; ++def AMMAX_DB_W : ATOMIC<"ammax_db.w", GPR32Opnd, mem>, AM<0b011100>; ++def AMMAX_DB_D : ATOMIC<"ammax_db.d", GPR64Opnd, mem>, AM<0b011101>; ++def AMMIN_DB_W : ATOMIC<"ammin_db.w", GPR32Opnd, mem>, AM<0b011110>; ++def AMMIN_DB_D : ATOMIC<"ammin_db.d", GPR64Opnd, mem>, AM<0b011111>; ++def AMMAX_DB_WU : ATOMIC<"ammax_db.wu", GPR32Opnd, mem>, AM<0b100000>; ++def AMMAX_DB_DU : ATOMIC<"ammax_db.du", GPR64Opnd, mem>, AM<0b100001>; ++def AMMIN_DB_WU : ATOMIC<"ammin_db.wu", GPR32Opnd, mem>, AM<0b100010>; ++def AMMIN_DB_DU : ATOMIC<"ammin_db.du", GPR64Opnd, mem>, AM<0b100011>; ++ ++def LDGT_B : Int_Reg3<"ldgt.b", GPR64Opnd>, R3MI<0b11110000>; ++def LDGT_H : Int_Reg3<"ldgt.h", GPR64Opnd>, R3MI<0b11110001>; ++def LDGT_W : Int_Reg3<"ldgt.w", GPR64Opnd>, R3MI<0b11110010>; ++def LDGT_D : Int_Reg3<"ldgt.d", GPR64Opnd>, R3MI<0b11110011>; ++def LDLE_B : Int_Reg3<"ldle.b", GPR64Opnd>, R3MI<0b11110100>; ++def LDLE_H : Int_Reg3<"ldle.h", GPR64Opnd>, R3MI<0b11110101>; ++def LDLE_W : Int_Reg3<"ldle.w", GPR64Opnd>, R3MI<0b11110110>; ++def LDLE_D : Int_Reg3<"ldle.d", GPR64Opnd>, R3MI<0b11110111>; ++def STGT_B : STGT_LE<"stgt.b", GPR64Opnd>, R3MI<0b11111000>; ++def STGT_H : STGT_LE<"stgt.h", GPR64Opnd>, R3MI<0b11111001>; ++def STGT_W : STGT_LE<"stgt.w", GPR64Opnd>, R3MI<0b11111010>; ++def STGT_D : STGT_LE<"stgt.d", GPR64Opnd>, R3MI<0b11111011>; ++def STLE_B : STGT_LE<"stle.b", GPR64Opnd>, R3MI<0b11111100>; ++def STLE_H : STGT_LE<"stle.h", GPR64Opnd>, R3MI<0b11111101>; ++def STLE_W : STGT_LE<"stle.w", GPR64Opnd>, R3MI<0b11111110>; ++def STLE_D : STGT_LE<"stle.d", GPR64Opnd>, R3MI<0b11111111>; ++ ++let isCodeGenOnly = 1 in { ++def PRELD : Preld<"preld", mem, GPR64Opnd>, PRELD_FM; ++} ++ ++def PRELD_Raw : Preld_Raw<"preld", GPR64Opnd>, PRELD_FM; ++ ++let isCall=1, isCTI=1, Defs = [RA] in { ++ class JumpLink : ++ InstForm<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"), ++ [(LoongArchJmpLink tglobaladdr:$target)], FrmJ, opstr> { ++ let DecoderMethod = "DecodeJumpTarget"; ++ } ++} ++def LONG_BRANCH_PCADDU12I : LoongArchPseudo<(outs GPR64Opnd:$dst), ++ (ins brtarget:$tgt), []>, GPR_64; ++ ++def LONG_BRANCH_ADDID2Op : LoongArchPseudo<(outs GPR64Opnd:$dst), ++ (ins GPR64Opnd:$src, brtarget:$tgt), []>, GPR_64; ++ ++def LONG_BRANCH_ADDID : LoongArchPseudo<(outs GPR64Opnd:$dst), ++ (ins GPR64Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>, GPR_64; ++ ++def LEA_ADDI_D: EffectiveAddress<"addi.d", GPR64Opnd>, LEA_ADDI_FM<0b011>, GPR_64; ++ ++class PseudoReturnBase : LoongArchPseudo<(outs), (ins RO:$rs), ++ []> { ++ let isTerminator = 1; ++ let isBarrier = 1; ++ let isReturn = 1; ++ let isCodeGenOnly = 1; ++ let hasCtrlDep = 1; ++ let hasExtraSrcRegAllocReq = 1; ++ bit isCTI = 1; ++} ++ ++def PseudoReturn64 : PseudoReturnBase; ++//def PseudoReturn : PseudoReturnBase; ++ ++ ++let isCall=1, isCTI=1, Defs=[RA], isCodeGenOnly=1 in { ++def PseudoCall : LoongArchPseudo<(outs), (ins calltarget:$target), ++ []>; ++} ++ ++let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in ++def PseudoTailCall : LoongArchPseudo<(outs), (ins calltarget:$target), ++ []>; ++ ++class PseudoTailBase : LoongArchPseudo<(outs), (ins opnd:$offset26), ++ []> { ++ let isTerminator = 1; ++ let isBarrier = 1; ++ let isReturn = 1; ++ let isCodeGenOnly = 1; ++} ++def PseudoTailReturn : PseudoTailBase; ++ ++ ++def : LoongArchPat<(LoongArchTailCall tglobaladdr:$dst), ++ (PseudoTailCall tglobaladdr:$dst)>; ++ ++def : LoongArchPat<(LoongArchTailCall texternalsym:$dst), ++ (PseudoTailCall texternalsym:$dst)>; ++ ++let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, isIndirectBranch = 1, Uses = [SP] in ++def PseudoTAILIndirect : LoongArchPseudo<(outs), (ins GPRTC64Opnd:$rj), [(LoongArchTailCall GPRTC64Opnd:$rj)]>, ++ PseudoInstExpansion<(JIRL ZERO_64, GPR64Opnd:$rj, 0)>; ++ ++ ++def : LoongArchPat<(LoongArchJmpLink tglobaladdr:$dst), ++ (PseudoCall tglobaladdr:$dst)>; ++ ++def : LoongArchPat<(LoongArchJmpLink (i32 texternalsym:$dst)), ++ (PseudoCall texternalsym:$dst)>; ++def : LoongArchPat<(LoongArchJmpLink (i64 texternalsym:$dst)), ++ (PseudoCall texternalsym:$dst)>; ++ ++def : LoongArchPat<(LoongArchJmpLink (i64 texternalsym:$dst)), ++ (PseudoCall texternalsym:$dst)>; ++ ++def BL : JumpLink<"bl", calltarget>, FJ<0b010101>; ++ ++class IsAsCheapAsAMove { ++ bit isAsCheapAsAMove = 1; ++} ++class LoadUpper: ++ InstForm<(outs RO:$rt), (ins Imm:$imm16), !strconcat(opstr, "\t$rt, $imm16"), ++ [], FrmI, opstr>, IsAsCheapAsAMove { ++ let hasSideEffects = 0; ++ let isReMaterializable = 1; ++ let mayLoad = 1; ++} ++ ++let isCodeGenOnly = 1 in { ++def LAPCREL : LoadUpper<"la.pcrel", GPR64Opnd, uimm16_64_relaxed>, LUI_FM, GPR_64; ++} ++ ++def NOP : LoongArchPseudo<(outs), (ins), []>, ++ PseudoInstExpansion<(ANDI ZERO_64, ZERO_64, 0)>; ++ ++def : LoongArchInstAlias<"nop", (ANDI ZERO_64, ZERO_64, 0), 1>; ++def : LoongArchInstAlias<"jr $rd", (JIRL ZERO_64, GPR64Opnd:$rd, 0), 1>; ++def : LoongArchInstAlias<"move $dst, $src", ++ (OR GPR64Opnd:$dst, GPR64Opnd:$src, ZERO_64), 1>, GPR_64; ++ ++def UImm12RelaxedAsmOperandClass ++: UImmAsmOperandClass<12, [ConstantUImm20AsmOperandClass]> { ++ let Name = "UImm12_Relaxed"; ++ let PredicateMethod = "isAnyImm<12>"; ++ let DiagnosticType = "UImm12_Relaxed"; ++} ++ ++def SImm12RelaxedAsmOperandClass ++: SImmAsmOperandClass<12, [UImm12RelaxedAsmOperandClass]> { ++ let Name = "SImm12_Relaxed"; ++ let PredicateMethod = "isAnyImm<12>"; ++ let DiagnosticType = "SImm12_Relaxed"; ++} ++ ++def simm12_relaxed : Operand { ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<12>"; ++ let ParserMatchClass = !cast("SImm12RelaxedAsmOperandClass"); ++} ++ ++def : LoongArchPat<(i64 (anyext GPR32:$src)), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>,GPR_64; ++ ++let usesCustomInserter = 1 in { ++ def ATOMIC_LOAD_ADD_I64 : Atomic2Ops; ++ def ATOMIC_LOAD_SUB_I64 : Atomic2Ops; ++ def ATOMIC_LOAD_AND_I64 : Atomic2Ops; ++ def ATOMIC_LOAD_OR_I64 : Atomic2Ops; ++ def ATOMIC_LOAD_XOR_I64 : Atomic2Ops; ++ def ATOMIC_LOAD_NAND_I64 : Atomic2Ops; ++ def ATOMIC_SWAP_I64 : Atomic2Ops; ++ def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap; ++ ++ def ATOMIC_LOAD_MAX_I64 : Atomic2Ops; ++ def ATOMIC_LOAD_MIN_I64 : Atomic2Ops; ++ def ATOMIC_LOAD_UMAX_I64 : Atomic2Ops; ++ def ATOMIC_LOAD_UMIN_I64 : Atomic2Ops; ++} ++ ++def ATOMIC_LOAD_ADD_I64_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_SUB_I64_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_AND_I64_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_OR_I64_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_XOR_I64_POSTRA : Atomic2OpsPostRA; ++def ATOMIC_LOAD_NAND_I64_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_SWAP_I64_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_CMP_SWAP_I64_POSTRA : AtomicCmpSwapPostRA; ++ ++def ATOMIC_LOAD_MAX_I64_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_LOAD_MIN_I64_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_LOAD_UMAX_I64_POSTRA : Atomic2OpsPostRA; ++ ++def ATOMIC_LOAD_UMIN_I64_POSTRA : Atomic2OpsPostRA; ++ ++def : LoongArchPat<(atomic_load_8 addr:$a), (LD_B addr:$a)>, GPR_64; ++def : LoongArchPat<(atomic_load_16 addr:$a), (LD_H addr:$a)>, GPR_64; ++def : LoongArchPat<(atomic_load_32 addrimm14lsl2:$a), (LDPTR_W addrimm14lsl2:$a)>, GPR_64; ++def : LoongArchPat<(atomic_load_32 addr:$a), (LD_W addr:$a)>, GPR_64; ++def : LoongArchPat<(atomic_load_64 addrimm14lsl2:$a), (LDPTR_D addrimm14lsl2:$a)>, GPR_64; ++def : LoongArchPat<(atomic_load_64 addr:$a), (LD_D addr:$a)>, GPR_64; ++ ++def : LoongArchPat<(atomic_store_8 addr:$a, GPR64:$v), ++ (ST_B GPR64:$v, addr:$a)>, GPR_64; ++def : LoongArchPat<(atomic_store_16 addr:$a, GPR64:$v), ++ (ST_H GPR64:$v, addr:$a)>, GPR_64; ++def : LoongArchPat<(atomic_store_32 addrimm14lsl2:$a, GPR64:$v), ++ (STPTR_W GPR64:$v, addrimm14lsl2:$a)>, GPR_64; ++def : LoongArchPat<(atomic_store_32 addr:$a, GPR64:$v), ++ (ST_W GPR64:$v, addr:$a)>, GPR_64; ++def : LoongArchPat<(atomic_store_64 addrimm14lsl2:$a, GPR64:$v), ++ (STPTR_D GPR64:$v, addrimm14lsl2:$a)>, GPR_64; ++def : LoongArchPat<(atomic_store_64 addr:$a, GPR64:$v), ++ (ST_D GPR64:$v, addr:$a)>, GPR_64; ++ ++def : LoongArchPat<(bswap GPR64:$rt), (REVH_D (REVB_4H GPR64:$rt))>; ++ ++def immZExt5 : ImmLeaf; ++ ++def immZExtRange2To64 : PatLeaf<(imm), [{ ++ return isUInt<7>(N->getZExtValue()) && (N->getZExtValue() >= 2) && ++ (N->getZExtValue() <= 64); ++}]>; ++ ++// bstrins and bstrpick ++class InsBase ++ : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd, RO:$src), ++ !strconcat(opstr, "\t$rd, $rj, $msbd, $lsbd"), ++ [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd, RO:$src))], ++ FrmR, opstr> { ++ let Constraints = "$src = $rd"; ++ } ++ ++class InsBase_32 ++ : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw, RO:$src), ++ !strconcat(opstr, "\t$rd, $rj, $msbw, $lsbw"), ++ [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw, RO:$src))], ++ FrmR, opstr> { ++ let Constraints = "$src = $rd"; ++} ++ ++class PickBase ++ : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd), ++ !strconcat(opstr, "\t$rd, $rj, $msbd, $lsbd"), ++ [(set RO:$rd, (Op RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd))], ++ FrmR, opstr>; ++ ++class PickBase_32 ++ : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw), ++ !strconcat(opstr, "\t$rd, $rj, $msbw, $lsbw"), ++ [(set RO:$rd, (Op RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw))], ++ FrmR, opstr>; ++ ++ def BSTRINS_D : InsBase<"bstrins.d", GPR64Opnd, uimm6, LoongArchBstrins>, ++ INSERT_BIT64<0>; ++ def BSTRPICK_D : PickBase<"bstrpick.d", GPR64Opnd, uimm6, LoongArchBstrpick>, ++ INSERT_BIT64<1>; ++ ++let isCodeGenOnly = 1 in { ++ def ZEXT64_32 : InstForm<(outs GPR64Opnd:$rd), ++ (ins GPR32Opnd:$rj, uimm6:$msbd, ++ uimm6:$lsbd), ++ "bstrpick.d $rd, $rj, $msbd, $lsbd", [], FrmR, "bstrpick.d">, ++ INSERT_BIT64<1>; ++} ++ ++//32-to-64-bit extension ++def : LoongArchPat<(i64 (zext GPR32:$src)), (ZEXT64_32 GPR32:$src, 31, 0)>; ++def : LoongArchPat<(i64 (extloadi1 addr:$src)), (LD_B addr:$src)>, ++ GPR_64; ++def : LoongArchPat<(i64 (extloadi8 addr:$src)), (LD_B addr:$src)>, ++ GPR_64; ++def : LoongArchPat<(i64 (extloadi16 addr:$src)), (LD_H addr:$src)>, ++ GPR_64; ++def : LoongArchPat<(i64 (extloadi32 addr:$src)), (LD_W addr:$src)>, ++ GPR_64; ++ ++class LDX_FT_LA : ++ InstForm<(outs DRC:$rd), (ins PtrRC:$rj, PtrRC:$rk), ++ !strconcat(opstr, "\t$rd, $rj, $rk"), ++ [(set DRC:$rd, (OpNode (add iPTR:$rj, iPTR:$rk)))], ++ FrmR, opstr> { ++ let AddedComplexity = 20; ++ let canFoldAsLoad = 1; ++ string BaseOpcode = opstr; ++ let mayLoad = 1; ++} ++ ++class STX_FT_LA : ++ InstForm<(outs), (ins DRC:$rd, PtrRC:$rj, PtrRC:$rk), ++ !strconcat(opstr, "\t$rd, $rj, $rk"), ++ [(OpNode DRC:$rd, (add iPTR:$rj, iPTR:$rk))], ++ FrmI, opstr> { ++ string BaseOpcode = opstr; ++ let mayStore = 1; ++ let AddedComplexity = 20; ++} ++ ++ ++def LDX_B : LDX_FT_LA<"ldx.b", GPR64Opnd, sextloadi8>, ++ R3MI<0b00000000>; ++def LDX_H : LDX_FT_LA<"ldx.h", GPR64Opnd, sextloadi16>, ++ R3MI<0b00001000>; ++def LDX_W : LDX_FT_LA<"ldx.w", GPR64Opnd, sextloadi32>, ++ R3MI<0b00010000>; ++def LDX_D : LDX_FT_LA<"ldx.d", GPR64Opnd, load>, ++ R3MI<0b00011000>; ++def STX_B : STX_FT_LA<"stx.b", GPR64Opnd, truncstorei8>, ++ R3MI<0b00100000>; ++def STX_H : STX_FT_LA<"stx.h", GPR64Opnd, truncstorei16>, ++ R3MI<0b00101000>; ++def STX_W : STX_FT_LA<"stx.w", GPR64Opnd, truncstorei32>, ++ R3MI<0b00110000>; ++def STX_D : STX_FT_LA<"stx.d", GPR64Opnd, store>, ++ R3MI<0b00111000>; ++def LDX_BU : LDX_FT_LA<"ldx.bu", GPR64Opnd, extloadi8>, ++ R3MI<0b01000000>; ++def LDX_HU : LDX_FT_LA<"ldx.hu", GPR64Opnd, extloadi16>, ++ R3MI<0b01001000>; ++def LDX_WU : LDX_FT_LA<"ldx.wu", GPR64Opnd, zextloadi32>, ++ R3MI<0b01010000>; ++ ++//def : LoongArchPat<(bswap GPR64:$rj), (REVH_D (REVB_4H GPR64:$rj))>; ++//def : LoongArchPat<(bswap GPR64:$rj), (ROTRI_D (REVB_2W GPR64:$rj), 32)>; ++def : LoongArchPat<(bswap GPR64:$rj), (REVB_D GPR64:$rj)>; ++ ++let isCodeGenOnly = 1 in { ++ def SLLI_D_64_32 : Shift_Imm64<"", GPR64Opnd>, R2_IMM6<0b00>, GPR_64 { ++ let imm6 = 0; ++ let AsmString = "slli.d\t$rd, $rj, 32"; ++ let InOperandList = (ins GPR32:$rj); ++ let OutOperandList = (outs GPR64:$rd); ++ } ++ ++ let isMoveReg = 1, imm5 = 0, ++ AsmString = "slli.w\t$rd, $rj, 0", ++ OutOperandList = (outs GPR64:$rd) in { ++ let InOperandList = (ins GPR32:$rj) in ++ def SLLI_W_64_32 : Shift_Imm32<"", GPR32Opnd>, R2_IMM5<0b00>, GPR_64; ++ let InOperandList = (ins GPR64:$rj) in ++ def SLLI_W_64_64 : Shift_Imm32<"", GPR32Opnd>, R2_IMM5<0b00>, GPR_64; ++ } ++ ++ let AsmString = "sltui\t$rd, $rj, $imm12", ++ OutOperandList = (outs GPR64:$rd) in { ++ let InOperandList = (ins GPR64:$rj, simm12:$imm12) in ++ def SLTUI_64 : SetCC_I<"", GPR64Opnd, simm12>, R2_IMM12<0b001>, GPR_64; ++ } ++} ++ ++// 32-to-64-bit extension ++//def : LoongArchPat<(i64 (zext GPR32:$src)), (SRLI_D (SLLI_D_64_32 GPR32:$src), 32)>, GPR_64; ++def : LoongArchPat<(i64 (sext GPR32:$src)), (SLLI_W_64_32 GPR32:$src)>, GPR_64; ++def : LoongArchPat<(i64 (sext_inreg GPR64:$src, i32)), (SLLI_W_64_64 GPR64:$src)>, GPR_64; ++ ++let Uses = [A0, A1], isTerminator = 1, isReturn = 1, isBarrier = 1, isCTI = 1 in { ++ def LoongArcheh_return32 : LoongArchPseudo<(outs), (ins GPR32:$spoff, GPR32:$dst), ++ [(LoongArchehret GPR32:$spoff, GPR32:$dst)]>; ++ def LoongArcheh_return64 : LoongArchPseudo<(outs), (ins GPR64:$spoff,GPR64:$dst), ++ [(LoongArchehret GPR64:$spoff, GPR64:$dst)]>; ++} ++ ++def : LoongArchPat<(select i32:$cond, i64:$t, i64:$f), ++ (OR (MASKEQZ i64:$t, (SLLI_W_64_32 i32:$cond)), ++ (MASKNEZ i64:$f, (SLLI_W_64_32 i32:$cond)))>; ++// setcc patterns ++multiclass SeteqPats { ++ def : LoongArchPat<(seteq RC:$lhs, 0), ++ (SLTiuOp RC:$lhs, 1)>; ++ def : LoongArchPat<(setne RC:$lhs, 0), ++ (SLTuOp ZEROReg, RC:$lhs)>; ++ def : LoongArchPat<(seteq RC:$lhs, RC:$rhs), ++ (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>; ++ def : LoongArchPat<(setne RC:$lhs, RC:$rhs), ++ (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>; ++} ++ ++multiclass SetlePats { ++ def : LoongArchPat<(setle RC:$lhs, RC:$rhs), ++ (XORiOp (SLTOp RC:$rhs, RC:$lhs), 1)>; ++ def : LoongArchPat<(setule RC:$lhs, RC:$rhs), ++ (XORiOp (SLTuOp RC:$rhs, RC:$lhs), 1)>; ++} ++ ++multiclass SetgtPats { ++ def : LoongArchPat<(setgt RC:$lhs, RC:$rhs), ++ (SLTOp RC:$rhs, RC:$lhs)>; ++ def : LoongArchPat<(setugt RC:$lhs, RC:$rhs), ++ (SLTuOp RC:$rhs, RC:$lhs)>; ++} ++ ++multiclass SetgePats { ++ def : LoongArchPat<(setge RC:$lhs, RC:$rhs), ++ (XORiOp (SLTOp RC:$lhs, RC:$rhs), 1)>; ++ def : LoongArchPat<(setuge RC:$lhs, RC:$rhs), ++ (XORiOp (SLTuOp RC:$lhs, RC:$rhs), 1)>; ++} ++ ++multiclass SetgeImmPats { ++ def : LoongArchPat<(setge RC:$lhs, immSExt12:$rhs), ++ (XORiOp (SLTiOp RC:$lhs, immSExt12:$rhs), 1)>; ++ def : LoongArchPat<(setuge RC:$lhs, immSExt12:$rhs), ++ (XORiOp (SLTiuOp RC:$lhs, immSExt12:$rhs), 1)>; ++} ++ ++class LoadRegImmPat : ++ LoongArchPat<(ValTy (Node addrRegImm:$a)), (LoadInst addrRegImm:$a)>; ++ ++class StoreRegImmPat : ++ LoongArchPat<(Node ValTy:$v, addrRegImm:$a), (StoreInst ValTy:$v, addrRegImm:$a)>; ++ ++class LoadRegImm14Lsl2Pat : ++ LoongArchPat<(ValTy (Node addrimm14lsl2:$a)), (LoadInst addrimm14lsl2:$a)>; ++ ++class StoreRegImm14Lsl2Pat : ++ LoongArchPat<(Node ValTy:$v, addrimm14lsl2:$a), (StoreInst ValTy:$v, addrimm14lsl2:$a)>; ++ ++// Patterns for loads/stores with a reg+imm operand. ++// let AddedComplexity = 40 so that these instructions are selected instead of ++// LDX/STX which needs one more register and an ANDI instruction. ++let AddedComplexity = 40 in { ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : LoadRegImmPat; ++ def : StoreRegImmPat; ++ def : StoreRegImmPat; ++ def : StoreRegImmPat; ++ def : StoreRegImmPat; ++ ++ def : LoadRegImm14Lsl2Pat; ++ def : LoadRegImm14Lsl2Pat; ++ def : StoreRegImm14Lsl2Pat; ++ def : StoreRegImm14Lsl2Pat; ++} ++ ++//===----------------------------------------------------------------------===// ++// Base Extension Support ++//===----------------------------------------------------------------------===// ++ ++include "LoongArch32InstrInfo.td" ++include "LoongArchInstrInfoF.td" ++include "LoongArchLSXInstrFormats.td" ++include "LoongArchLSXInstrInfo.td" ++include "LoongArchLASXInstrFormats.td" ++include "LoongArchLASXInstrInfo.td" ++ ++defm : SeteqPats, GPR_64; ++defm : SetlePats, GPR_64; ++defm : SetgtPats, GPR_64; ++defm : SetgePats, GPR_64; ++defm : SetgeImmPats, GPR_64; ++ ++/// ++/// for relocation ++/// ++let isCodeGenOnly = 1 in { ++def PCALAU12I_ri : SI20<"pcalau12i", GPR64Opnd, simm20>, R1_SI20<0b0001101>; ++def ORI_rri : Int_Reg2_Imm12<"ori", GPR64Opnd, uimm12, or>, R2_IMM12<0b110>; ++def LU12I_W_ri : SI20<"lu12i.w", GPR64Opnd, simm20>, R1_SI20<0b0001010>; ++def LU32I_D_ri : SI20<"lu32i.d", GPR64Opnd, simm20>, R1_SI20<0b0001011>; ++def LU52I_D_rri : Int_Reg2_Imm12<"lu52i.d", GPR64Opnd, simm12>, R2_IMM12<0b100>; ++def ADDI_D_rri : Int_Reg2_Imm12<"addi.d", GPR64Opnd, simm12, add>, R2_IMM12<0b011>; ++def LD_D_rri : Ld<"ld.d", GPR64Opnd, mem_simmptr, load>, LOAD_STORE<0b0011>; ++def ADD_D_rrr : Int_Reg3<"add.d", GPR64Opnd, add>, R3I<0b0100001>; ++def LDX_D_rrr : LDX_FT_LA<"ldx.d", GPR64Opnd, load>, ++ R3MI<0b00011000>; ++} ++ ++//===----------------------------------------------------------------------===// ++// Assembler Pseudo Instructions ++//===----------------------------------------------------------------------===// ++def LoadImm32 : LoongArchAsmPseudoInst<(outs GPR32Opnd:$rd), ++ (ins uimm32_coerced:$imm32), ++ "li.w\t$rd, $imm32">; ++def LoadImm64 : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins imm64:$imm64), ++ "li.d\t$rd, $imm64">; ++// load address ++def LoadAddrLocal : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins imm64:$imm64), ++ "la.local\t$rd, $imm64">; ++def : InstAlias<"la.pcrel $rd, $imm", ++ (LoadAddrLocal GPR64Opnd:$rd, imm64:$imm), 1>; ++def LoadAddrGlobal : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins imm64:$imm64), ++ "la.global\t$rd, $imm64">; ++def LoadAddrGlobal_Alias : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins imm64:$imm64), ++ "la\t$rd, $imm64">; ++def : InstAlias<"la.got $rd, $imm", ++ (LoadAddrGlobal GPR64Opnd:$rd, imm64:$imm), 1>; ++ ++def LoadAddrTLS_LE : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins imm64:$imm64), ++ "la.tls.le\t$rd, $imm64">; ++def LoadAddrTLS_IE : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins imm64:$imm64), ++ "la.tls.ie\t$rd, $imm64">; ++def LoadAddrTLS_GD : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins imm64:$imm64), ++ "la.tls.gd\t$rd, $imm64">; ++def LoadAddrTLS_LD : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins imm64:$imm64), ++ "la.tls.ld\t$rd, $imm64">; ++ ++// load address with a temp reg ++def LoadAddrLocalRR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins GPR64Opnd:$rt, imm64:$imm64), ++ "la.local\t$rd, $rt, $imm64">; ++def LoadAddrGlobalRR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins GPR64Opnd:$rt, imm64:$imm64), ++ "la.global\t$rd, $rt, $imm64">; ++def LoadAddrTLS_IE_RR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins GPR64Opnd:$rt, imm64:$imm64), ++ "la.tls.ie\t$rd, $rt, $imm64">; ++def LoadAddrTLS_GD_RR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins GPR64Opnd:$rt, imm64:$imm64), ++ "la.tls.gd\t$rd, $rt, $imm64">; ++def LoadAddrTLS_LD_RR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), ++ (ins GPR64Opnd:$rt, imm64:$imm64), ++ "la.tls.ld\t$rd, $rt, $imm64">; ++ ++// trap when div zero ++def PseudoTEQ : LoongArchPseudo<(outs), (ins GPR64Opnd:$rt), []>; ++ ++ ++def : LoongArchPat<(i64 (sext (i32 (add GPR32:$src, immSExt12:$imm12)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (ADDI_W GPR32:$src, immSExt12:$imm12), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (add GPR32:$src, GPR32:$src2)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (ADD_W GPR32:$src, GPR32:$src2), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (sub GPR32:$src, GPR32:$src2)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SUB_W GPR32:$src, GPR32:$src2), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (MUL_W GPR32:$src, GPR32:$src2), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (shl GPR32:$src, immZExt5:$imm5)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SLLI_W GPR32:$src, immZExt5:$imm5), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (shl GPR32:$src, GPR32:$src2)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SLL_W GPR32:$src, GPR32:$src2), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (srl GPR32:$src, immZExt5:$imm5)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SRLI_W GPR32:$src, immZExt5:$imm5), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (srl GPR32:$src, GPR32:$src2)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SRL_W GPR32:$src, GPR32:$src2), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (sra GPR32:$src, immZExt5:$imm5)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SRAI_W GPR32:$src, immZExt5:$imm5), sub_32)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (sra GPR32:$src, GPR32:$src2)))), ++ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), ++ (SRA_W GPR32:$src, GPR32:$src2), sub_32)>; ++ ++ ++def : LoongArchPat<(i64 (xor GPR64:$rj, (i64 -1))), ++ (NOR ZERO_64, GPR64:$rj)>; ++ ++def : LoongArchPat<(and GPR64:$rj, (i64 (xor GPR64:$rk, (i64 -1)))), ++ (ANDN GPR64:$rj, GPR64:$rk)>; ++ ++def : LoongArchPat<(i64 (or GPR64:$rj, (xor GPR64:$rk, (i64 -1)))), ++ (ORN GPR64:$rj, GPR64:$rk)>; ++ ++def : LoongArchPat<(i64 (zext (i32 (seteq GPR64:$rj, (i64 0))))), ++ (SLTUI_64 GPR64:$rj, (i64 1))>; ++ ++ ++def : LoongArchPat<(i64 (zext (i32 (srl GPR32:$src, immZExt5:$imm5)))), ++ (BSTRPICK_D (INSERT_SUBREG ++ (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), ++ (i32 31), immZExt5:$imm5)>; +diff --git a/lib/Target/LoongArch/LoongArchInstrInfoF.td b/lib/Target/LoongArch/LoongArchInstrInfoF.td +new file mode 100644 +index 00000000..73711ff7 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchInstrInfoF.td +@@ -0,0 +1,629 @@ ++//===- LoongArchInstrInfoF.td - Target Description for LoongArch Target -*- tablegen -*-=// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch implementation of the TargetInstrInfo class. ++// ++//===----------------------------------------------------------------------===// ++// FP immediate patterns. ++def fpimm0 : PatLeaf<(fpimm), [{ ++ return N->isExactlyValue(+0.0); ++}]>; ++ ++def fpimm0neg : PatLeaf<(fpimm), [{ ++ return N->isExactlyValue(-0.0); ++}]>; ++ ++def fpimm1 : PatLeaf<(fpimm), [{ ++ return N->isExactlyValue(+1.0); ++}]>; ++ ++def IsNotSoftFloat : Predicate<"!Subtarget->useSoftFloat()">; ++ ++class HARDFLOAT { list HardFloatPredicate = [IsNotSoftFloat]; } ++ ++def SDT_LoongArchTruncIntFP : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>; ++ ++def LoongArchTruncIntFP : SDNode<"LoongArchISD::TruncIntFP", SDT_LoongArchTruncIntFP>; ++ ++def SDT_LoongArchFPBrcond : SDTypeProfile<0, 3, [SDTCisInt<0>, ++ SDTCisVT<1, i32>, ++ SDTCisVT<2, OtherVT>]>; ++ ++def LoongArchFPBrcond : SDNode<"LoongArchISD::FPBrcond", SDT_LoongArchFPBrcond, ++ [SDNPHasChain, SDNPOptInGlue]>; ++ ++def SDT_LoongArchCMovFP : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisVT<2, i32>, ++ SDTCisSameAs<1, 3>]>; ++ ++def LoongArchCMovFP_T : SDNode<"LoongArchISD::CMovFP_T", SDT_LoongArchCMovFP, [SDNPInGlue]>; ++ ++def LoongArchCMovFP_F : SDNode<"LoongArchISD::CMovFP_F", SDT_LoongArchCMovFP, [SDNPInGlue]>; ++ ++def SDT_LoongArchFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<1>, ++ SDTCisVT<2, i32>]>; ++ ++def LoongArchFPCmp : SDNode<"LoongArchISD::FPCmp", SDT_LoongArchFPCmp, [SDNPOutGlue]>; ++ ++def SDT_LoongArchFSEL : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, ++ SDTCisVT<2, i32>, ++ SDTCisSameAs<1, 3>]>; ++ ++def LoongArchFSEL : SDNode<"LoongArchISD::FSEL", SDT_LoongArchFSEL, ++ [SDNPInGlue]>; ++ ++//===---------------------------------------------------------------------===/ ++//Instruction Class Templates ++//===---------------------------------------------------------------------===/ ++ ++class Float_MOVF ++ : InstForm<(outs RO:$rd), (ins RC:$fj), ++ !strconcat(opstr, "\t$rd, $fj"), ++ [(set RO:$rd, (OpNode RC:$fj))], ++ FrmFR, opstr>, HARDFLOAT { ++ let isMoveReg = 1; ++} ++ ++class Float_MOVT ++ : InstForm<(outs RO:$fd), (ins RC:$rj), ++ !strconcat(opstr, "\t$fd, $rj"), ++ [(set RO:$fd, (OpNode RC:$rj))], ++ FrmFR, opstr>, HARDFLOAT { ++ let isMoveReg = 1; ++} ++ ++class Float_CVT ++ : InstForm<(outs RO:$fd), (ins RS:$fj), ++ !strconcat(opstr, "\t$fd, $fj"), ++ [(set RO:$fd, (OpNode RS:$fj))], ++ FrmFR, opstr>, ++ HARDFLOAT { ++ let hasSideEffects = 0; ++} ++ ++/// float mov ++class Gpr_2_Fcsr ++ : InstForm<(outs FCSROpnd:$fcsr), (ins RO:$rj), ++ !strconcat(opstr, "\t$fcsr, $rj"), ++ [(set FCSROpnd:$fcsr, (OpNode RO:$rj))], ++ FrmR, opstr>; ++class Fcsr_2_Gpr ++ : InstForm<(outs RO:$rd), (ins FCSROpnd:$fcsr), ++ !strconcat(opstr, "\t$rd, $fcsr"), ++ [(set RO:$rd, (OpNode FCSROpnd:$fcsr))], ++ FrmR, opstr>; ++class Fgr_2_Fcfr ++ : InstForm<(outs FCFROpnd:$cd), (ins RO:$fj), ++ !strconcat(opstr, "\t$cd, $fj"), ++ [(set FCFROpnd:$cd, (OpNode RO:$fj))], ++ FrmR, opstr>; ++class Fcfr_2_Fgr ++ : InstForm<(outs RO:$fd), (ins FCFROpnd:$cj), ++ !strconcat(opstr, "\t$fd, $cj"), ++ [(set RO:$fd, (OpNode FCFROpnd:$cj))], ++ FrmR, opstr>; ++class Gpr_2_Fcfr ++ : InstForm<(outs FCFROpnd:$cd), (ins RO:$rj), ++ !strconcat(opstr, "\t$cd, $rj"), ++ [(set FCFROpnd:$cd, (OpNode RO:$rj))], ++ FrmR, opstr>; ++class Fcfr_2_Gpr ++ : InstForm<(outs RO:$rd), (ins FCFROpnd:$cj), ++ !strconcat(opstr, "\t$rd, $cj"), ++ [(set RO:$rd, (OpNode FCFROpnd:$cj))], ++ FrmR, opstr>; ++ ++class FLDX : ++ InstForm<(outs DRC:$fd), (ins PtrRC:$rj, PtrRC:$rk), ++ !strconcat(opstr, "\t$fd, $rj, $rk"), ++ [(set DRC:$fd, (OpNode (add iPTR:$rj, iPTR:$rk)))], ++ FrmR, opstr> { ++ let AddedComplexity = 20; ++} ++ ++class FSTX : ++ InstForm<(outs), (ins DRC:$fd, PtrRC:$rj, PtrRC:$rk), ++ !strconcat(opstr, "\t$fd, $rj, $rk"), ++ [(OpNode DRC:$fd, (add iPTR:$rj, iPTR:$rk))], ++ FrmR, opstr> { ++ let AddedComplexity = 20; ++} ++ ++/// f{maxa/mina}.{s/d} ++class Float_Reg3_Fmaxa ++ : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk), ++ !strconcat(opstr, "\t$fd, $fj, $fk"), ++ [], FrmR, opstr>; ++/// frecip ++class Float_Reg2_Frecip ++ : InstForm<(outs RO:$fd), (ins RO:$fj), ++ !strconcat(opstr, "\t$fd, $fj"), ++ [(set RO:$fd, (OpNode fpimm1, RO:$fj))], ++ FrmR, opstr>; ++/// frsqrt ++class Float_Reg2_Frsqrt ++ : InstForm<(outs RO:$fd), (ins RO:$fj), ++ !strconcat(opstr, "\t$fd, $fj"), ++ [(set RO:$fd, (OpNode fpimm1, (fsqrt RO:$fj)))], ++ FrmR, opstr>; ++ ++class BceqzBr : ++ InstForm<(outs), (ins FCFROpnd:$cj, opnd:$offset), ++ !strconcat(opstr, "\t$cj, $offset"), ++ [(LoongArchFPBrcond Op, FCFROpnd:$cj, bb:$offset)], ++ FrmFI, opstr>, HARDFLOAT { ++ let isBranch = 1; ++ let isTerminator = 1; ++ let hasFCCRegOperand = 1; ++} ++ ++class FCMP_COND ++ : InstForm<(outs FCFROpnd:$cd), (ins RO:$fj, RO:$fk), ++ !strconcat("fcmp.", CondStr, ".", TypeStr, "\t$cd, $fj, $fk"), ++ [(set FCFROpnd:$cd, (OpNode RO:$fj, RO:$fk))], ++ FrmOther, ++ !strconcat("fcmp.", CondStr, ".", TypeStr)> { ++ bit isCTI = 1; // for what? from Mips32r6InstrInfo.td line 219 ++} ++ ++class FIELD_CMP_COND Val> { ++ bits<5> Value = Val; ++} ++def FIELD_CMP_COND_CAF : FIELD_CMP_COND<0x0>; ++def FIELD_CMP_COND_CUN : FIELD_CMP_COND<0x8>; ++def FIELD_CMP_COND_CEQ : FIELD_CMP_COND<0x4>; ++def FIELD_CMP_COND_CUEQ : FIELD_CMP_COND<0xC>; ++def FIELD_CMP_COND_CLT : FIELD_CMP_COND<0x2>; ++def FIELD_CMP_COND_CULT : FIELD_CMP_COND<0xA>; ++def FIELD_CMP_COND_CLE : FIELD_CMP_COND<0x6>; ++def FIELD_CMP_COND_CULE : FIELD_CMP_COND<0xE>; ++def FIELD_CMP_COND_CNE : FIELD_CMP_COND<0x10>; ++def FIELD_CMP_COND_COR : FIELD_CMP_COND<0x14>; ++def FIELD_CMP_COND_CUNE : FIELD_CMP_COND<0x18>; ++def FIELD_CMP_COND_SAF : FIELD_CMP_COND<0x1>; ++def FIELD_CMP_COND_SUN : FIELD_CMP_COND<0x9>; ++def FIELD_CMP_COND_SEQ : FIELD_CMP_COND<0x5>; ++def FIELD_CMP_COND_SUEQ : FIELD_CMP_COND<0xD>; ++def FIELD_CMP_COND_SLT : FIELD_CMP_COND<0x3>; ++def FIELD_CMP_COND_SULT : FIELD_CMP_COND<0xB>; ++def FIELD_CMP_COND_SLE : FIELD_CMP_COND<0x7>; ++def FIELD_CMP_COND_SULE : FIELD_CMP_COND<0xF>; ++def FIELD_CMP_COND_SNE : FIELD_CMP_COND<0x11>; ++def FIELD_CMP_COND_SOR : FIELD_CMP_COND<0x15>; ++def FIELD_CMP_COND_SUNE : FIELD_CMP_COND<0x19>; ++ ++multiclass FCMP_COND_M op, string TypeStr, ++ RegisterOperand RO> { ++ def FCMP_CAF_#NAME : FCMP_COND<"caf", TypeStr, RO>, ++ R2_COND; ++ def FCMP_CUN_#NAME : FCMP_COND<"cun", TypeStr, RO, setuo>, ++ R2_COND; ++ def FCMP_CEQ_#NAME : FCMP_COND<"ceq", TypeStr, RO, setoeq>, ++ R2_COND; ++ def FCMP_CUEQ_#NAME : FCMP_COND<"cueq", TypeStr, RO, setueq>, ++ R2_COND; ++ def FCMP_CLT_#NAME : FCMP_COND<"clt", TypeStr, RO, setolt>, ++ R2_COND; ++ def FCMP_CULT_#NAME : FCMP_COND<"cult", TypeStr, RO, setult>, ++ R2_COND; ++ def FCMP_CLE_#NAME : FCMP_COND<"cle", TypeStr, RO, setole>, ++ R2_COND; ++ def FCMP_CULE_#NAME : FCMP_COND<"cule", TypeStr, RO, setule>, ++ R2_COND; ++ def FCMP_CNE_#NAME : FCMP_COND<"cne", TypeStr, RO, setone>, ++ R2_COND; ++ def FCMP_COR_#NAME : FCMP_COND<"cor", TypeStr, RO, seto>, ++ R2_COND; ++ def FCMP_CUNE_#NAME : FCMP_COND<"cune", TypeStr, RO, setune>, ++ R2_COND; ++ ++ def FCMP_SAF_#NAME : FCMP_COND<"saf", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SUN_#NAME : FCMP_COND<"sun", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SEQ_#NAME : FCMP_COND<"seq", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SUEQ_#NAME : FCMP_COND<"sueq", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SLT_#NAME : FCMP_COND<"slt", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SULT_#NAME : FCMP_COND<"sult", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SLE_#NAME : FCMP_COND<"sle", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SULE_#NAME : FCMP_COND<"sule", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SNE_#NAME : FCMP_COND<"sne", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SOR_#NAME : FCMP_COND<"sor", TypeStr, RO>, ++ R2_COND; ++ def FCMP_SUNE_#NAME : FCMP_COND<"sune", TypeStr, RO>, ++ R2_COND; ++} ++ ++//// comparisons supported via another comparison ++//multiclass FCmp_Pats { ++// def : LoongArchPat<(seteq VT:$lhs, VT:$rhs), ++// (!cast("FCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs)>; ++// def : LoongArchPat<(setgt VT:$lhs, VT:$rhs), ++// (!cast("FCMP_CLE_"#NAME) VT:$rhs, VT:$lhs)>; ++// def : LoongArchPat<(setge VT:$lhs, VT:$rhs), ++// (!cast("FCMP_CLT_"#NAME) VT:$rhs, VT:$lhs)>; ++// def : LoongArchPat<(setlt VT:$lhs, VT:$rhs), ++// (!cast("FCMP_CLT_"#NAME) VT:$lhs, VT:$rhs)>; ++// def : LoongArchPat<(setle VT:$lhs, VT:$rhs), ++// (!cast("FCMP_CLE_"#NAME) VT:$lhs, VT:$rhs)>; ++// def : LoongArchPat<(setne VT:$lhs, VT:$rhs), ++// (NOROp ++// (!cast("FCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs), ++// ZEROReg)>; ++//} ++ ++ ++/// ++/// R2 ++/// ++def FABS_S : Float_Reg2<"fabs.s", FGR32Opnd, fabs>, R2F<0b0100000001>; ++def FABS_D : Float_Reg2<"fabs.d", FGR64Opnd, fabs>, R2F<0b0100000010>; ++def FNEG_S : Float_Reg2<"fneg.s", FGR32Opnd, fneg>, R2F<0b0100000101>; ++def FNEG_D : Float_Reg2<"fneg.d", FGR64Opnd, fneg>, R2F<0b0100000110>; ++def FLOGB_S : Float_Reg2<"flogb.s", FGR32Opnd>, R2F<0b0100001001>; ++def FLOGB_D : Float_Reg2<"flogb.d", FGR64Opnd>, R2F<0b0100001010>; ++def FCLASS_S : Float_Reg2<"fclass.s", FGR32Opnd>, R2F<0b0100001101>; ++def FCLASS_D : Float_Reg2<"fclass.d", FGR64Opnd>, R2F<0b0100001110>; ++def FSQRT_S : Float_Reg2<"fsqrt.s", FGR32Opnd, fsqrt>, R2F<0b0100010001>; ++def FSQRT_D : Float_Reg2<"fsqrt.d", FGR64Opnd, fsqrt>, R2F<0b0100010010>; ++def FRECIP_S : Float_Reg2_Frecip<"frecip.s", FGR32Opnd, fdiv>, R2F<0b0100010101>; ++def FRECIP_D : Float_Reg2_Frecip<"frecip.d", FGR64Opnd, fdiv>, R2F<0b0100010110>; ++def FRSQRT_S : Float_Reg2_Frsqrt<"frsqrt.s", FGR32Opnd, fdiv>, R2F<0b0100011001>; ++def FRSQRT_D : Float_Reg2_Frsqrt<"frsqrt.d", FGR64Opnd, fdiv>, R2F<0b0100011010>; ++def FMOV_S : Float_Reg2<"fmov.s", FGR32Opnd>, R2F<0b0100100101>; ++def FMOV_D : Float_Reg2<"fmov.d", FGR64Opnd>, R2F<0b0100100110>; ++ ++def MOVGR2FR_W : Float_MOVT<"movgr2fr.w", FGR32Opnd, GPR32Opnd, bitconvert>, MOVFI<0b0100101001>; ++def MOVGR2FR_D : Float_MOVT<"movgr2fr.d", FGR64Opnd, GPR64Opnd, bitconvert>, MOVFI<0b0100101010>; ++def MOVGR2FRH_W : Float_MOVT<"movgr2frh.w", FGR64Opnd, GPR32Opnd>, MOVFI<0b0100101011>; //not realize ++def MOVFR2GR_S : Float_MOVF<"movfr2gr.s", GPR32Opnd, FGR32Opnd, bitconvert>, MOVIF<0b0100101101>; ++def MOVFR2GR_D : Float_MOVF<"movfr2gr.d", GPR64Opnd, FGR64Opnd, bitconvert>, MOVIF<0b0100101110>; ++def MOVFRH2GR_S : Float_MOVF<"movfrh2gr.s", GPR32Opnd, FGR32Opnd>, MOVIF<0b0100101111>; //not realize ++ ++let isCodeGenOnly = 1 in { ++ def MOVFR2GR_DS : Float_MOVF<"movfr2gr.s", GPR64Opnd, FGR32Opnd>, MOVIF<0b0100101101>; ++} ++ ++def FCVT_S_D : Float_CVT<"fcvt.s.d", FGR32Opnd, FGR64Opnd>, R2F<0b1001000110>; ++def FCVT_D_S : Float_CVT<"fcvt.d.s", FGR64Opnd, FGR32Opnd>, R2F<0b1001001001>; ++ ++def FTINTRM_W_S : Float_Reg2<"ftintrm.w.s", FGR32Opnd>, R2F<0b1010000001>; ++def FTINTRM_W_D : Float_Reg2<"ftintrm.w.d", FGR64Opnd>, R2F<0b1010000010>; ++def FTINTRM_L_S : Float_Reg2<"ftintrm.l.s", FGR32Opnd>, R2F<0b1010001001>; ++def FTINTRM_L_D : Float_Reg2<"ftintrm.l.d", FGR64Opnd>, R2F<0b1010001010>; ++def FTINTRP_W_S : Float_Reg2<"ftintrp.w.s", FGR32Opnd>, R2F<0b1010010001>; ++def FTINTRP_W_D : Float_Reg2<"ftintrp.w.d", FGR64Opnd>, R2F<0b1010010010>; ++def FTINTRP_L_S : Float_Reg2<"ftintrp.l.s", FGR32Opnd>, R2F<0b1010011001>; ++def FTINTRP_L_D : Float_Reg2<"ftintrp.l.d", FGR64Opnd>, R2F<0b1010011010>; ++def FTINTRZ_W_S : Float_Reg2<"ftintrz.w.s", FGR32Opnd>, R2F<0b1010100001>; ++def FTINTRZ_L_D : Float_Reg2<"ftintrz.l.d", FGR64Opnd>, R2F<0b1010101010>; ++def FTINTRNE_W_S : Float_Reg2<"ftintrne.w.s", FGR32Opnd>, R2F<0b1010110001>; ++def FTINTRNE_W_D : Float_Reg2<"ftintrne.w.d", FGR64Opnd>, R2F<0b1010110010>; ++def FTINTRNE_L_S : Float_Reg2<"ftintrne.l.s", FGR32Opnd>, R2F<0b1010111001>; ++def FTINTRNE_L_D : Float_Reg2<"ftintrne.l.d", FGR64Opnd>, R2F<0b1010111010>; ++ ++def FTINT_W_S : Float_CVT<"ftint.w.s", FGR32Opnd, FGR32Opnd>, R2F<0b1011000001>; ++def FTINT_W_D : Float_CVT<"ftint.w.d", FGR32Opnd, FGR64Opnd>, R2F<0b1011000010>; ++def FTINT_L_S : Float_CVT<"ftint.l.s", FGR64Opnd, FGR32Opnd>, R2F<0b1011001001>; ++def FTINT_L_D : Float_CVT<"ftint.l.d", FGR64Opnd, FGR64Opnd>, R2F<0b1011001010>; ++def FFINT_S_W : Float_CVT<"ffint.s.w", FGR32Opnd, FGR32Opnd>, R2F<0b1101000100>; ++def FFINT_S_L : Float_CVT<"ffint.s.l", FGR32Opnd, FGR64Opnd>, R2F<0b1101000110>; ++def FFINT_D_W : Float_CVT<"ffint.d.w", FGR64Opnd, FGR32Opnd>, R2F<0b1101001000>; ++def FFINT_D_L : Float_CVT<"ffint.d.l", FGR64Opnd, FGR64Opnd>, R2F<0b1101001010>; ++ ++def FRINT_S : Float_Reg2<"frint.s", FGR32Opnd, frint>, R2F<0b1110010001>; ++def FRINT_D : Float_Reg2<"frint.d", FGR64Opnd, frint>, R2F<0b1110010010>; ++ ++/// ++/// R3 ++/// ++def FADD_S : Float_Reg3<"fadd.s", FGR32Opnd, fadd>, R3F<0b000001>; ++def FADD_D : Float_Reg3<"fadd.d", FGR64Opnd, fadd>, R3F<0b000010>; ++def FSUB_S : Float_Reg3<"fsub.s", FGR32Opnd, fsub>, R3F<0b000101>; ++def FSUB_D : Float_Reg3<"fsub.d", FGR64Opnd, fsub>, R3F<0b000110>; ++def FMUL_S : Float_Reg3<"fmul.s", FGR32Opnd, fmul>, R3F<0b001001>; ++def FMUL_D : Float_Reg3<"fmul.d", FGR64Opnd, fmul>, R3F<0b001010>; ++def FDIV_S : Float_Reg3<"fdiv.s", FGR32Opnd, fdiv>, R3F<0b001101>; ++def FDIV_D : Float_Reg3<"fdiv.d", FGR64Opnd, fdiv>, R3F<0b001110>; ++def FMAX_S : Float_Reg3<"fmax.s", FGR32Opnd, fmaxnum_ieee>, R3F<0b010001>; ++def FMAX_D : Float_Reg3<"fmax.d", FGR64Opnd, fmaxnum_ieee>, R3F<0b010010>; ++def FMIN_S : Float_Reg3<"fmin.s", FGR32Opnd, fminnum_ieee>, R3F<0b010101>; ++def FMIN_D : Float_Reg3<"fmin.d", FGR64Opnd, fminnum_ieee>, R3F<0b010110>; ++def FMAXA_S : Float_Reg3_Fmaxa<"fmaxa.s", FGR32Opnd>, R3F<0b011001>; ++def FMAXA_D : Float_Reg3_Fmaxa<"fmaxa.d", FGR64Opnd>, R3F<0b011010>; ++def FMINA_S : Float_Reg3_Fmaxa<"fmina.s", FGR32Opnd>, R3F<0b011101>; ++def FMINA_D : Float_Reg3_Fmaxa<"fmina.d", FGR64Opnd>, R3F<0b011110>; ++def FSCALEB_S : Float_Reg3<"fscaleb.s", FGR32Opnd>, R3F<0b100001>; ++def FSCALEB_D : Float_Reg3<"fscaleb.d", FGR64Opnd>, R3F<0b100010>; ++def FCOPYSIGN_S : Float_Reg3<"fcopysign.s", FGR32Opnd, fcopysign>, R3F<0b100101>; ++def FCOPYSIGN_D : Float_Reg3<"fcopysign.d", FGR64Opnd, fcopysign>, R3F<0b100110>; ++/// ++/// R4_IMM21 ++/// ++def FMADD_S : Mul_Reg4<"fmadd.s", FGR32Opnd>, R4MUL<0b0001>; ++def FMADD_D : Mul_Reg4<"fmadd.d", FGR64Opnd>, R4MUL<0b0010>; ++def FMSUB_S : Mul_Reg4<"fmsub.s", FGR32Opnd>, R4MUL<0b0101>; ++def FMSUB_D : Mul_Reg4<"fmsub.d", FGR64Opnd>, R4MUL<0b0110>; ++def FNMADD_S : NMul_Reg4<"fnmadd.s", FGR32Opnd>, R4MUL<0b1001>; ++def FNMADD_D : NMul_Reg4<"fnmadd.d", FGR64Opnd>, R4MUL<0b1010>; ++def FNMSUB_S : NMul_Reg4<"fnmsub.s", FGR32Opnd>, R4MUL<0b1101>; ++def FNMSUB_D : NMul_Reg4<"fnmsub.d", FGR64Opnd>, R4MUL<0b1110>; ++ ++ ++// fmadd: fj * fk + fa ++def : LoongArchPat<(fma FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa), ++ (FMADD_D $fj, $fk, $fa)>; ++ ++def : LoongArchPat<(fma FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa), ++ (FMADD_S $fj, $fk, $fa)>; ++ ++ ++// fmsub: fj * fk - fa ++def : LoongArchPat<(fma FGR64Opnd:$fj, FGR64Opnd:$fk, (fneg FGR64Opnd:$fa)), ++ (FMSUB_D FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa)>; ++ ++def : LoongArchPat<(fma FGR32Opnd:$fj, FGR32Opnd:$fk, (fneg FGR32Opnd:$fa)), ++ (FMSUB_S FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa)>; ++ ++ ++// fnmadd: -(fj * fk + fa) ++def : LoongArchPat<(fma (fneg FGR64Opnd:$fj), FGR64Opnd:$fk, (fneg FGR64Opnd:$fa)), ++ (FNMADD_D FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa)>; ++ ++def : LoongArchPat<(fma (fneg FGR32Opnd:$fj), FGR32Opnd:$fk, (fneg FGR32Opnd:$fa)), ++ (FNMADD_S FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa)>; ++ ++// fnmsub: -(fj * fk - fa) ++def : LoongArchPat<(fma (fneg FGR64Opnd:$fj), FGR64Opnd:$fk, FGR64Opnd:$fa), ++ (FNMSUB_D FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa)>; ++ ++def : LoongArchPat<(fma (fneg FGR32Opnd:$fj), FGR32Opnd:$fk, FGR32Opnd:$fa), ++ (FNMSUB_S FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa)>; ++ ++let Pattern = [] in { ++defm S : FCMP_COND_M<0b01, "s", FGR32Opnd>; ++defm D : FCMP_COND_M<0b10, "d", FGR64Opnd>; ++} ++// ++//defm S : FCmp_Pats; ++//defm D : FCmp_Pats; ++ ++/// ++/// Float point branching ++/// ++def LoongArch_BRANCH_F : PatLeaf<(i32 0)>; ++def LoongArch_BRANCH_T : PatLeaf<(i32 1)>; ++ ++def BCEQZ : BceqzBr<"bceqz", brtarget, LoongArch_BRANCH_F>, R1_BCEQZ<0>; ++def BCNEZ : BceqzBr<"bcnez", brtarget, LoongArch_BRANCH_T>, R1_BCEQZ<1>; ++ ++/// ++/// FMOV ++/// ++def MOVGR2FCSR : Gpr_2_Fcsr<"movgr2fcsr", GPR64Opnd>, MOVGPR2FCSR; ++def MOVFCSR2GR : Fcsr_2_Gpr<"movfcsr2gr", GPR64Opnd>, MOVFCSR2GPR; ++def MOVFR2CF : Fgr_2_Fcfr<"movfr2cf", FGR64Opnd>, MOVFGR2FCFR; ++def MOVCF2FR : Fcfr_2_Fgr<"movcf2fr", FGR64Opnd>, MOVFCFR2FGR; ++def MOVGR2CF : Gpr_2_Fcfr<"movgr2cf", GPR64Opnd>, MOVGPR2FCFR; ++def MOVCF2GR : Fcfr_2_Gpr<"movcf2gr", GPR64Opnd>, MOVFCFR2GPR; ++ ++let isCodeGenOnly = 1 in { ++ def MOVFR2CF32 : Fgr_2_Fcfr<"movfr2cf", FGR32Opnd>, MOVFGR2FCFR; ++ def MOVCF2FR32 : Fcfr_2_Fgr<"movcf2fr", FGR32Opnd>, MOVFCFR2FGR; ++ def MOVGR2CF32 : Gpr_2_Fcfr<"movgr2cf", GPR32Opnd>, MOVGPR2FCFR; ++ def MOVCF2GR32 : Fcfr_2_Gpr<"movcf2gr", GPR32Opnd>, MOVFCFR2GPR; ++} ++ ++class Sel_Reg4 ++ : InstForm<(outs RO:$fd), (ins FCFROpnd:$ca, RO:$fj, RO:$fk), ++ !strconcat(opstr, "\t$fd, $fj, $fk, $ca"), ++ [(set RO:$fd, (LoongArchFSEL RO:$fj, FCFROpnd:$ca, RO:$fk))], ++ FrmR, opstr>{ ++ let Defs = [FCC0, FCC1, FCC2, FCC3, FCC4, FCC5, FCC6]; ++ let hasFCCRegOperand = 1; ++ } ++ ++def FSEL_T_S : Sel_Reg4<"fsel", FGR32Opnd>, R4SEL; ++let isCodeGenOnly = 1 in { ++ def FSEL_T_D : Sel_Reg4<"fsel", FGR64Opnd>, R4SEL; ++} ++ ++/// ++/// Mem access ++/// ++def FLD_S : FLd<"fld.s", FGR32Opnd, mem, load>, LOAD_STORE<0b1100>; ++def FST_S : FSt<"fst.s", FGR32Opnd, mem, store>, LOAD_STORE<0b1101>; ++def FLD_D : FLd<"fld.d", FGR64Opnd, mem, load>, LOAD_STORE<0b1110>; ++def FST_D : FSt<"fst.d", FGR64Opnd, mem, store>, LOAD_STORE<0b1111>; ++ ++def FLDX_S : FLDX<"fldx.s", FGR32Opnd, load>, R3MF<0b01100000>; ++def FLDX_D : FLDX<"fldx.d", FGR64Opnd, load>, R3MF<0b01101000>; ++def FSTX_S : FSTX<"fstx.s", FGR32Opnd, store>, R3MF<0b01110000>; ++def FSTX_D : FSTX<"fstx.d", FGR64Opnd, store>, R3MF<0b01111000>; ++ ++def FLDGT_S : Float_Int_Reg3<"fldgt.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101000>; ++def FLDGT_D : Float_Int_Reg3<"fldgt.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101001>; ++def FLDLE_S : Float_Int_Reg3<"fldle.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101010>; ++def FLDLE_D : Float_Int_Reg3<"fldle.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101011>; ++def FSTGT_S : Float_STGT_LE<"fstgt.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101100>; ++def FSTGT_D : Float_STGT_LE<"fstgt.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101101>; ++def FSTLE_S : Float_STGT_LE<"fstle.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101110>; ++def FSTLE_D : Float_STGT_LE<"fstle.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101111>; ++ ++let isPseudo = 1, isCodeGenOnly = 1 in { ++ def PseudoFFINT_S_W : Float_CVT<"", FGR32Opnd, GPR32Opnd>; ++ def PseudoFFINT_D_W : Float_CVT<"", FGR64Opnd, GPR32Opnd>; ++ def PseudoFFINT_S_L : Float_CVT<"", FGR64Opnd, GPR64Opnd>; ++ def PseudoFFINT_D_L : Float_CVT<"", FGR64Opnd, GPR64Opnd>; ++} ++ ++def : LoongArchPat<(f32 (fpround FGR64Opnd:$src)), ++ (FCVT_S_D FGR64Opnd:$src)>; ++def : LoongArchPat<(f64 (fpextend FGR32Opnd:$src)), ++ (FCVT_D_S FGR32Opnd:$src)>; ++ ++def : LoongArchPat<(f32 (sint_to_fp GPR32Opnd:$src)), ++ (PseudoFFINT_S_W GPR32Opnd:$src)>; ++def : LoongArchPat<(f64 (sint_to_fp GPR32Opnd:$src)), ++ (PseudoFFINT_D_W GPR32Opnd:$src)>; ++def : LoongArchPat<(f32 (sint_to_fp GPR64Opnd:$src)), ++ (EXTRACT_SUBREG (PseudoFFINT_S_L GPR64Opnd:$src), sub_lo)>; ++def : LoongArchPat<(f64 (sint_to_fp GPR64Opnd:$src)), ++ (PseudoFFINT_D_L GPR64Opnd:$src)>; ++ ++def : LoongArchPat<(f32 fpimm0), (MOVGR2FR_W ZERO)>; ++def : LoongArchPat<(f32 fpimm0neg), (FNEG_S (MOVGR2FR_W ZERO))>; ++def : LoongArchPat<(f32 fpimm1), (FFINT_S_W (MOVGR2FR_W (ADDI_W ZERO, 1)))>; ++def : LoongArchPat<(f64 fpimm1), (FFINT_D_L (MOVGR2FR_D (ADDI_D ZERO_64, 1)))>; ++ ++// Patterns for loads/stores with a reg+imm operand. ++let AddedComplexity = 40 in { ++ def : LoadRegImmPat; ++ def : StoreRegImmPat; ++ def : LoadRegImmPat; ++ def : StoreRegImmPat; ++} ++ ++def : LoongArchPat<(LoongArchTruncIntFP FGR32Opnd:$src), ++ (FTINTRZ_W_S FGR32Opnd:$src)>; ++ ++def : LoongArchPat<(LoongArchTruncIntFP FGR64Opnd:$src), ++ (FTINTRZ_L_D FGR64Opnd:$src)>; ++ ++def : LoongArchPat<(LoongArchTruncIntFP FGR32Opnd:$src), ++ (FCVT_D_S (FTINTRZ_W_S FGR32Opnd:$src))>; ++ ++def : LoongArchPat<(f32 (fcopysign FGR32Opnd:$lhs, FGR64Opnd:$rhs)), ++ (FCOPYSIGN_S FGR32Opnd:$lhs, (FCVT_S_D FGR64Opnd:$rhs))>; ++def : LoongArchPat<(f64 (fcopysign FGR64Opnd:$lhs, FGR32Opnd:$rhs)), ++ (FCOPYSIGN_D FGR64Opnd:$lhs, (FCVT_D_S FGR32Opnd:$rhs))>; ++ ++let PrintMethod = "printFCCOperand",EncoderMethod = "getFCMPEncoding" in ++ def condcode : Operand; ++ ++class CEQS_FT : ++ InstForm<(outs), (ins RC:$fj, RC:$fk, condcode:$cond), ++ !strconcat("fcmp.$cond.", typestr, "\t$$fcc0, $fj, $fk"), ++ [(OpNode RC:$fj, RC:$fk, imm:$cond)], FrmFR, ++ !strconcat("fcmp.$cond.", typestr)>, HARDFLOAT { ++ let Defs = [FCC0, FCC1, FCC2, FCC3, FCC4, FCC5, FCC6, FCC7]; ++ let isCodeGenOnly = 1; ++ let hasFCCRegOperand = 1; ++} ++ ++def FCMP_S32 : CEQS_FT<"s", FGR32, LoongArchFPCmp>, CEQS_FM<0b01> { ++ bits<3> cd = 0; ++} ++def FCMP_D64 : CEQS_FT<"d", FGR64, LoongArchFPCmp>, CEQS_FM<0b10>{ ++ bits<3> cd = 0; ++} ++ ++ ++//multiclass FCmp_Pats2 { ++// def : LoongArchPat<(seteq VT:$lhs, VT:$rhs), ++// (!cast("SFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs)>; ++// def : LoongArchPat<(setgt VT:$lhs, VT:$rhs), ++// (!cast("SFCMP_CLE_"#NAME) VT:$rhs, VT:$lhs)>; ++// def : LoongArchPat<(setge VT:$lhs, VT:$rhs), ++// (!cast("SFCMP_CLT_"#NAME) VT:$rhs, VT:$lhs)>; ++// def : LoongArchPat<(setlt VT:$lhs, VT:$rhs), ++// (!cast("SFCMP_CLT_"#NAME) VT:$lhs, VT:$rhs)>; ++// def : LoongArchPat<(setle VT:$lhs, VT:$rhs), ++// (!cast("SFCMP_CLE_"#NAME) VT:$lhs, VT:$rhs)>; ++// def : LoongArchPat<(setne VT:$lhs, VT:$rhs), ++// (NOROp ++// (!cast("SFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs), ++// ZEROReg)>; ++// ++// def : LoongArchPat<(seteq VT:$lhs, VT:$rhs), ++// (!cast("DFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs)>; ++// def : LoongArchPat<(setgt VT:$lhs, VT:$rhs), ++// (!cast("DFCMP_CLE_"#NAME) VT:$rhs, VT:$lhs)>; ++// def : LoongArchPat<(setge VT:$lhs, VT:$rhs), ++// (!cast("DFCMP_CLT_"#NAME) VT:$rhs, VT:$lhs)>; ++// def : LoongArchPat<(setlt VT:$lhs, VT:$rhs), ++// (!cast("DFCMP_CLT_"#NAME) VT:$lhs, VT:$rhs)>; ++// def : LoongArchPat<(setle VT:$lhs, VT:$rhs), ++// (!cast("DFCMP_CLE_"#NAME) VT:$lhs, VT:$rhs)>; ++// def : LoongArchPat<(setne VT:$lhs, VT:$rhs), ++// (NOROp ++// (!cast("DFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs), ++// ZEROReg)>; ++// } ++// ++//defm S : FCmp_Pats2; ++//defm D : FCmp_Pats2; ++ ++let usesCustomInserter = 1 in { ++ class Select_Pseudo : ++ LoongArchPseudo<(outs RC:$dst), (ins GPR32Opnd:$cond, RC:$T, RC:$F), ++ [(set RC:$dst, (select GPR32Opnd:$cond, RC:$T, RC:$F))]>; ++ ++ class SelectFP_Pseudo_T : ++ LoongArchPseudo<(outs RC:$dst), (ins FCFROpnd:$cond, RC:$T, RC:$F), ++ [(set RC:$dst, (LoongArchCMovFP_T RC:$T, FCFROpnd:$cond, RC:$F))]>; ++ ++ class SelectFP_Pseudo_F : ++ LoongArchPseudo<(outs RC:$dst), (ins FCFROpnd:$cond, RC:$T, RC:$F), ++ [(set RC:$dst, (LoongArchCMovFP_F RC:$T, FCFROpnd:$cond, RC:$F))]>; ++} ++ ++def PseudoSELECT_I : Select_Pseudo; ++def PseudoSELECT_I64 : Select_Pseudo; ++def PseudoSELECT_S : Select_Pseudo; ++def PseudoSELECT_D64 : Select_Pseudo; ++ ++def PseudoSELECTFP_T_I : SelectFP_Pseudo_T; ++def PseudoSELECTFP_T_I64 : SelectFP_Pseudo_T; ++ ++def PseudoSELECTFP_F_I : SelectFP_Pseudo_F; ++def PseudoSELECTFP_F_I64 : SelectFP_Pseudo_F; ++ ++class ABSS_FT : ++ InstForm<(outs DstRC:$fd), (ins SrcRC:$fj), !strconcat(opstr, "\t$fd, $fj"), ++ [(set DstRC:$fd, (OpNode SrcRC:$fj))], FrmFR, opstr>; ++ ++def TRUNC_W_D : ABSS_FT<"ftintrz.w.d", FGR32Opnd, FGR64Opnd>, R2F<0b1010100010>; ++ ++def FTINTRZ_L_S : ABSS_FT<"ftintrz.l.s", FGR64Opnd, FGR32Opnd>, R2F<0b1010101001>; ++ ++def : LoongArchPat<(LoongArchTruncIntFP FGR64Opnd:$src), ++ (TRUNC_W_D FGR64Opnd:$src)>; ++ ++def : LoongArchPat<(LoongArchTruncIntFP FGR32Opnd:$src), ++ (FTINTRZ_L_S FGR32Opnd:$src)>; ++ ++def : Pat<(fcanonicalize FGR32Opnd:$src), (FMAX_S $src, $src)>; ++def : Pat<(fcanonicalize FGR64Opnd:$src), (FMAX_D $src, $src)>; ++ ++def : LoongArchPat<(i64 (sext (i32 (bitconvert FGR32Opnd:$src)))), ++ (MOVFR2GR_DS FGR32Opnd:$src)>; +diff --git a/lib/Target/LoongArch/LoongArchLASXInstrFormats.td b/lib/Target/LoongArch/LoongArchLASXInstrFormats.td +new file mode 100644 +index 00000000..8e255f85 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchLASXInstrFormats.td +@@ -0,0 +1,448 @@ ++//===- LoongArchLASXInstrFormats.td - LoongArch LASX Instruction Formats ---*- tablegen -*-===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++ ++class LASXInst : InstLA<(outs), (ins), "", [], FrmOther>, ++ EXT_LASX { ++} ++ ++class LASXCBranch : LASXInst { ++} ++ ++class LASXSpecial : LASXInst { ++} ++ ++class LASXPseudo pattern>: ++ LoongArchPseudo { ++ let Predicates = [HasLASX]; ++} ++ ++class LASX_3R op>: LASXInst { ++ bits<5> xk; ++ bits<5> xj; ++ bits<5> xd; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = xk; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_4R op>: LASXInst { ++ bits<5> xa; ++ bits<5> xk; ++ bits<5> xj; ++ bits<5> xd; ++ ++ let Inst{31-20} = op; ++ let Inst{19-15} = xa; ++ let Inst{14-10} = xk; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_XVFCMP op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<5> xk; ++ bits<5> cond; ++ ++ let Inst{31-20} = op; ++ let Inst{19-15} = cond; ++ let Inst{14-10} = xk; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I12_S op>: LASXInst { ++ bits<5> xd; ++ bits<17> addr; ++ ++ let Inst{31-22} = op; ++ let Inst{21-10} = addr{11-0}; ++ let Inst{9-5} = addr{16-12}; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_SI12_S op>: LASXInst { ++ bits<5> xd; ++ bits<17> addr; ++ ++ let Inst{31-22} = op; ++ let Inst{21-10} = addr{11-0}; ++ let Inst{9-5} = addr{16-12}; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_SI11_S op>: LASXInst { ++ bits<5> xd; ++ bits<16> addr; ++ ++ let Inst{31-21} = op; ++ let Inst{20-10} = addr{10-0}; ++ let Inst{9-5} = addr{15-11}; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_SI10_S op>: LASXInst { ++ bits<5> xd; ++ bits<15> addr; ++ ++ let Inst{31-20} = op; ++ let Inst{19-10} = addr{9-0}; ++ let Inst{9-5} = addr{14-10}; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_SI9_S op>: LASXInst { ++ bits<5> xd; ++ bits<14> addr; ++ ++ let Inst{31-19} = op; ++ let Inst{18-10} = addr{8-0}; ++ let Inst{9-5} = addr{13-9}; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_SI8_idx5 op>: LASXInst { ++ bits<5> xd; ++ bits<5> rj; ++ bits<8> si8; ++ bits<5> idx; ++ ++ let Inst{31-23} = op; ++ let Inst{22-18} = idx; ++ let Inst{17-10} = si8; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_SI8_idx2 op>: LASXInst { ++ bits<5> xd; ++ bits<5> rj; ++ bits<8> si8; ++ bits<2> idx; ++ ++ let Inst{31-20} = op; ++ let Inst{19-18} = idx; ++ let Inst{17-10} = si8; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_SI8_idx3 op>: LASXInst { ++ bits<5> xd; ++ bits<5> rj; ++ bits<8> si8; ++ bits<3> idx; ++ ++ let Inst{31-21} = op; ++ let Inst{20-18} = idx; ++ let Inst{17-10} = si8; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_SI8_idx4 op>: LASXInst { ++ bits<5> xd; ++ bits<5> rj; ++ bits<8> si8; ++ bits<4> idx; ++ ++ let Inst{31-22} = op; ++ let Inst{21-18} = idx; ++ let Inst{17-10} = si8; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_3R_2GP op>: LASXInst { ++ bits<5> rk; ++ bits<5> rj; ++ bits<5> xd; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = rk; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_3R_1GP op>: LASXInst { ++ bits<5> rk; ++ bits<5> xj; ++ bits<5> xd; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = rk; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I5 op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<5> si5; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = si5; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I5_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<5> ui5; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = ui5; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I5_mode_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> mode; ++ bits<5> ui5; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = ui5; ++ let Inst{9-5} = mode; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_2R op>: LASXInst { ++ bits<5> xj; ++ bits<5> xd; ++ ++ let Inst{31-10} = op; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_SET op>: LASXInst { ++ bits<5> xj; ++ bits<3> cd; ++ ++ let Inst{31-10} = op; ++ let Inst{9-5} = xj; ++ let Inst{4-3} = 0b00; ++ let Inst{2-0} = cd; ++} ++ ++class LASX_2R_1GP op>: LASXInst { ++ bits<5> rj; ++ bits<5> xd; ++ ++ let Inst{31-10} = op; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I3_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<3> ui3; ++ ++ let Inst{31-13} = op; ++ let Inst{12-10} = ui3; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I4_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<4> ui4; ++ ++ let Inst{31-14} = op; ++ let Inst{13-10} = ui4; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I6_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<6> ui6; ++ ++ let Inst{31-16} = op; ++ let Inst{15-10} = ui6; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I2_R_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> rj; ++ bits<2> ui2; ++ ++ let Inst{31-12} = op; ++ let Inst{11-10} = ui2; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I3_R_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> rj; ++ bits<3> ui3; ++ ++ let Inst{31-13} = op; ++ let Inst{12-10} = ui3; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_ELM_COPY_U3 op>: LASXInst { ++ bits<5> rd; ++ bits<5> xj; ++ bits<3> ui3; ++ ++ let Inst{31-13} = op; ++ let Inst{12-10} = ui3; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = rd; ++} ++ ++class LASX_ELM_COPY_U2 op>: LASXInst { ++ bits<5> rd; ++ bits<5> xj; ++ bits<2> ui2; ++ ++ let Inst{31-12} = op; ++ let Inst{11-10} = ui2; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = rd; ++} ++ ++class LASX_I1_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<1> ui1; ++ ++ let Inst{31-11} = op; ++ let Inst{10} = ui1; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I2_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<2> ui2; ++ ++ let Inst{31-12} = op; ++ let Inst{11-10} = ui2; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I7_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<7> ui7; ++ ++ let Inst{31-17} = op; ++ let Inst{16-10} = ui7; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_1R_I13 op>: LASXInst { ++ bits<13> i13; ++ bits<5> xd; ++ ++ let Inst{31-18} = op; ++ let Inst{17-5} = i13; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I8_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> xj; ++ bits<8> ui8; ++ ++ let Inst{31-18} = op; ++ let Inst{17-10} = ui8; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = xd; ++} ++ ++ ++////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ++class LASX_I1_R_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> rj; ++ bits<1> ui1; ++ ++ let Inst{31-11} = op; ++ let Inst{10} = ui1; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_I4_R_U op>: LASXInst { ++ bits<5> xd; ++ bits<5> rj; ++ bits<4> ui4; ++ ++ let Inst{31-14} = op; ++ let Inst{13-10} = ui4; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_ELM_COPY_B op>: LASXInst { ++ bits<5> rd; ++ bits<5> xj; ++ bits<4> ui4; ++ ++ let Inst{31-14} = op; ++ let Inst{13-10} = ui4; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = rd; ++} ++ ++class LASX_ELM_COPY_D op>: LASXInst { ++ bits<5> rd; ++ bits<5> xj; ++ bits<1> ui1; ++ ++ let Inst{31-11} = op; ++ let Inst{10} = ui1; ++ let Inst{9-5} = xj; ++ let Inst{4-0} = rd; ++} ++ ++class LASX_Addr_SI8_idx1 op>: LASXInst { ++ bits<5> xd; ++ bits<13> addr; ++ bits<1> idx; ++ ++ let Inst{31-19} = op; ++ let Inst{18-11} = addr{7-0}; ++ let Inst{10} = idx; ++ let Inst{9-5} = addr{12-8}; ++ let Inst{4-0} = xd; ++} ++ ++class LASX_1R_I13_I10 op>: LASXInst { ++ bits<10> i10; ++ bits<5> xd; ++ ++ let Inst{31-15} = op; ++ let Inst{14-5} = i10; ++ let Inst{4-0} = xd; ++} ++ ++ ++ ++ ++ ++ +diff --git a/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +new file mode 100644 +index 00000000..01a6f375 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -0,0 +1,5666 @@ ++//===- LoongArchLASXInstrInfo.td - loongson LASX instructions -*- tablegen ------------*-=// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file describes loongson ASX instructions. ++// ++//===----------------------------------------------------------------------===// ++def SDT_XVPERMI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>, ++ SDTCisSameAs<0, 1>, ++ SDTCisVT<2, i32>]>; ++def SDT_XVSHFI : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>, ++ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, ++ SDTCisVT<3, i32>]>; ++def SDT_XVBROADCAST : SDTypeProfile<1, 1, [SDTCisVec<0>]>; ++ ++def SDT_INSVE : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, ++ SDTCisSameAs<1, 2>, ++ SDTCisVT<3, i32>]>; ++ ++def SDT_XVPICKVE : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, ++ SDTCisSameAs<1, 2>, ++ SDTCisVT<3, i32>]>; ++ ++def SDT_XVSHUF4I : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<0>, ++ SDTCisSameAs<0, 1>, ++ SDTCisSameAs<0, 2>, ++ SDTCisVT<3, i32>]>; ++ ++def LoongArchXVSHUFI : SDNode<"LoongArchISD::XVSHFI", SDT_XVSHFI>; ++ ++def LoongArchXVSELI : SDNode<"LoongArchISD::XVSELI", SDT_XVSHFI>; ++ ++def LoongArchXVPERMI : SDNode<"LoongArchISD::XVPERMI", SDT_XVPERMI>; ++ ++def LoongArchXVBROADCAST : SDNode<"LoongArchISD::XVBROADCAST", SDT_XVBROADCAST>; ++ ++def LoongArchINSVE : SDNode<"LoongArchISD::INSVE", SDT_INSVE>; ++ ++def LoongArchXVSHUF4I : SDNode<"LoongArchISD::XVSHUF4I", SDT_XVSHUF4I>; ++ ++def LoongArchXVPICKVE : SDNode<"LoongArchISD::XVPICKVE", SDT_INSVE>; ++ ++def xvbroadcast_v32i8 : PatFrag<(ops node:$v1), ++ (v32i8 (LoongArchXVBROADCAST node:$v1))>; ++def xvbroadcast_v16i16 : PatFrag<(ops node:$v1), ++ (v16i16 (LoongArchXVBROADCAST node:$v1))>; ++def xvbroadcast_v8i32 : PatFrag<(ops node:$v1), ++ (v8i32 (LoongArchXVBROADCAST node:$v1))>; ++def xvbroadcast_v4i64 : PatFrag<(ops node:$v1), ++ (v4i64 (LoongArchXVBROADCAST node:$v1))>; ++ ++ ++def vfseteq_v8f32 : vfsetcc_type; ++def vfseteq_v4f64 : vfsetcc_type; ++def vfsetge_v8f32 : vfsetcc_type; ++def vfsetge_v4f64 : vfsetcc_type; ++def vfsetgt_v8f32 : vfsetcc_type; ++def vfsetgt_v4f64 : vfsetcc_type; ++def vfsetle_v8f32 : vfsetcc_type; ++def vfsetle_v4f64 : vfsetcc_type; ++def vfsetlt_v8f32 : vfsetcc_type; ++def vfsetlt_v4f64 : vfsetcc_type; ++def vfsetne_v8f32 : vfsetcc_type; ++def vfsetne_v4f64 : vfsetcc_type; ++def vfsetoeq_v8f32 : vfsetcc_type; ++def vfsetoeq_v4f64 : vfsetcc_type; ++def vfsetoge_v8f32 : vfsetcc_type; ++def vfsetoge_v4f64 : vfsetcc_type; ++def vfsetogt_v8f32 : vfsetcc_type; ++def vfsetogt_v4f64 : vfsetcc_type; ++def vfsetole_v8f32 : vfsetcc_type; ++def vfsetole_v4f64 : vfsetcc_type; ++def vfsetolt_v8f32 : vfsetcc_type; ++def vfsetolt_v4f64 : vfsetcc_type; ++def vfsetone_v8f32 : vfsetcc_type; ++def vfsetone_v4f64 : vfsetcc_type; ++def vfsetord_v8f32 : vfsetcc_type; ++def vfsetord_v4f64 : vfsetcc_type; ++def vfsetun_v8f32 : vfsetcc_type; ++def vfsetun_v4f64 : vfsetcc_type; ++def vfsetueq_v8f32 : vfsetcc_type; ++def vfsetueq_v4f64 : vfsetcc_type; ++def vfsetuge_v8f32 : vfsetcc_type; ++def vfsetuge_v4f64 : vfsetcc_type; ++def vfsetugt_v8f32 : vfsetcc_type; ++def vfsetugt_v4f64 : vfsetcc_type; ++def vfsetule_v8f32 : vfsetcc_type; ++def vfsetule_v4f64 : vfsetcc_type; ++def vfsetult_v8f32 : vfsetcc_type; ++def vfsetult_v4f64 : vfsetcc_type; ++def vfsetune_v8f32 : vfsetcc_type; ++def vfsetune_v4f64 : vfsetcc_type; ++ ++def xvsplati8 : PatFrag<(ops node:$e0), ++ (v32i8 (build_vector ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0))>; ++def xvsplati16 : PatFrag<(ops node:$e0), ++ (v16i16 (build_vector ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0))>; ++def xvsplati32 : PatFrag<(ops node:$e0), ++ (v8i32 (build_vector ++ node:$e0, node:$e0, node:$e0, node:$e0, ++ node:$e0, node:$e0, node:$e0, node:$e0))>; ++def xvsplati64 : PatFrag<(ops node:$e0), ++ (v4i64 (build_vector ++ node:$e0, node:$e0, node:$e0, node:$e0))>; ++def xvsplatf32 : PatFrag<(ops node:$e0), ++ (v8f32 (build_vector node:$e0, node:$e0, ++ node:$e0, node:$e0))>; ++def xvsplatf64 : PatFrag<(ops node:$e0), ++ (v4f64 (build_vector node:$e0, node:$e0))>; ++ ++def xvsplati8_uimm3 : SplatComplexPattern; ++def xvsplati16_uimm4 : SplatComplexPattern; ++ ++def xvsplati64_uimm6 : SplatComplexPattern; ++ ++def xvsplati8_simm5 : SplatComplexPattern; ++def xvsplati16_simm5 : SplatComplexPattern; ++def xvsplati32_simm5 : SplatComplexPattern; ++def xvsplati64_simm5 : SplatComplexPattern; ++ ++def xvsplat_imm_eq_1 : PatLeaf<(build_vector), [{ ++ APInt Imm; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; ++}]>; ++ ++def xvsplati64_imm_eq_1 : PatLeaf<(bitconvert (v8i32 (build_vector))), [{ ++ APInt Imm; ++ SDNode *BV = N->getOperand(0).getNode(); ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; ++}]>; ++ ++def xvbitclr_b : PatFrag<(ops node:$xk, node:$xa), ++ (and node:$xk, (xor (shl vsplat_imm_eq_1, node:$xa), ++ immAllOnesV))>; ++def xvbitclr_h : PatFrag<(ops node:$xk, node:$xa), ++ (and node:$xk, (xor (shl vsplat_imm_eq_1, node:$xa), ++ immAllOnesV))>; ++def xvbitclr_w : PatFrag<(ops node:$xk, node:$xa), ++ (and node:$xk, (xor (shl vsplat_imm_eq_1, node:$xa), ++ immAllOnesV))>; ++def xvbitclr_d : PatFrag<(ops node:$xk, node:$xa), ++ (and node:$xk, (xor (shl (v4i64 vsplati64_imm_eq_1), ++ node:$xa), ++ (bitconvert (v8i32 immAllOnesV))))>; ++ ++ ++ ++def xvsplati8_uimm5 : SplatComplexPattern; ++def xvsplati16_uimm5 : SplatComplexPattern; ++def xvsplati32_uimm5 : SplatComplexPattern; ++def xvsplati64_uimm5 : SplatComplexPattern; ++def xvsplati8_uimm8 : SplatComplexPattern; ++def xvsplati16_uimm8 : SplatComplexPattern; ++def xvsplati32_uimm8 : SplatComplexPattern; ++def xvsplati64_uimm8 : SplatComplexPattern; ++ ++ ++ ++def xvsplati8_uimm4 : SplatComplexPattern; ++def xvsplati16_uimm3 : SplatComplexPattern; ++def xvsplati32_uimm2 : SplatComplexPattern; ++def xvsplati64_uimm1 : SplatComplexPattern; ++ ++ ++// Patterns. ++class LASXPat pred = [HasLASX]> : ++ Pat, Requires; ++ ++class LASX_4RF { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ROXK:$xk, ROXA:$xa); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk, $xa"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk, ROXA:$xa))]; ++} ++ ++class LASX_3RF { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))]; ++} ++ ++class LASX_3R_SETCC_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); ++ list Pattern = [(set ROXD:$xd, (VT (vsetcc ROXJ:$xj, ROXK:$xk, CC)))]; ++} ++ ++class LASX_LD { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins MemOpnd:$addr); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $addr"); ++ list Pattern = [(set ROXD:$xd, (TyNode (OpNode Addr:$addr)))]; ++ string DecoderMethod = "DecodeLASX256Mem"; ++} ++ ++class LASX_ST { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROXD:$xd, MemOpnd:$addr); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $addr"); ++ list Pattern = [(OpNode (TyNode ROXD:$xd), Addr:$addr)]; ++ string DecoderMethod = "DecodeLASX256Mem"; ++} ++ ++class LASX_I8_U5_DESC_BASE { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm5:$idx); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx"); ++ list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt5:$idx)]; ++ string DecoderMethod = "DecodeLASX256memstl"; ++} ++ ++class LASX_I8_U2_DESC_BASE { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm2:$idx); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx"); ++ list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt2:$idx)]; ++ string DecoderMethod = "DecodeLASX256memstl"; ++} ++ ++class LASX_I8_U3_DESC_BASE { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm3:$idx); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx"); ++ list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt3:$idx)]; ++ string DecoderMethod = "DecodeLASX256memstl"; ++} ++ ++class LASX_I8_U4_DESC_BASE { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm4:$idx); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx"); ++ list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt4:$idx)]; ++ string DecoderMethod = "DecodeLASX256memstl"; ++} ++ ++class LASX_SDX_LA { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, RORK:$rk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $rk"); ++ list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, RORK:$rk)]; ++} ++ ++class LASX_3R_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))]; ++} ++ ++class LASX_LDX_LA { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins PtrRC:$rj, RORK:$rk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $rk"); ++ list Pattern = [(set ROXD:$xd, (OpNode iPTR:$rj, RORK:$rk))]; ++} ++ ++class LASX_3R_4R_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ROXK:$xk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, ++ ROXK:$xk))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++ ++class LASX_3R_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, GPR32Opnd:$rk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $rk"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, GPR32Opnd:$rk))]; ++} ++ ++ ++class LASX_3R_VREPLVE_DESC_BASE_N { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, GPR64Opnd:$rk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $rk"); ++ list Pattern = []; ++} ++ ++ ++class LASX_VEC_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))]; ++} ++ ++ ++ ++class LASX_3RF_DESC_BASE : ++ LASX_3R_DESC_BASE; ++ ++ ++class LASX_3R_DESC_BASE1 { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xk, ROXK:$xj))]; ++} ++ ++class LASX_3RF_DESC_BASE1 : ++ LASX_3R_DESC_BASE1; ++ ++ ++ ++class LASX_3R_VSHF_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ROXK:$xk); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); ++ list Pattern = [(set ROXD:$xd, (LoongArchVSHF ROXD:$xd_in, ROXJ:$xj, ++ ROXK:$xk))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_I5_SETCC_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$si5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5"); ++ list Pattern = [(set ROXD:$xd, (VT (vsetcc ROXJ:$xj, SplatImm:$si5, CC)))]; ++} ++ ++class LASX_I5_SETCC_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$si5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$si5))]; ++} ++ ++ ++class LASX_I5_U_SETCC_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (VT (vsetcc ROXJ:$xj, SplatImm:$ui5, CC)))]; ++} ++ ++class LASX_I5_U_SETCC_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; ++} ++ ++class LASX_VEC_PSEUDO_BASE : ++ LASXPseudo<(outs ROXD:$xd), (ins ROXJ:$xj, ROXK:$xk), ++ [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))]>; ++ ++ ++class LASX_I5_U_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui5))]; ++} ++ ++ ++class LASX_I5_U_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; ++} ++ ++class LASX_U5_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm5:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt5:$ui5))]; ++} ++ ++class LASX_U5N_DESC_BASE : ++ LASX_U5_DESC_BASE; ++ ++class LASX_U5_4R_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm5:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt5:$ui5))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_2R_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj))]; ++} ++ ++class LASX_SET_DESC_BASE { ++ dag OutOperandList = (outs FCFROpnd:$cd); ++ dag InOperandList = (ins ROXD:$xj); ++ string AsmString = !strconcat(instr_asm, "\t$cd, $xj"); ++ list Pattern = []; ++} ++ ++class LASX_2RF_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj))]; ++} ++ ++class LASX_I5_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$si5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$si5))]; ++} ++ ++class LASX_I5_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$si5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$si5))]; ++} ++ ++ ++class LASX_2R_REPL_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROS:$rj); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $rj"); ++ list Pattern = [(set ROXD:$xd, (VT (OpNode ROS:$rj)))]; ++} ++ ++class LASX_XVEXTEND_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj"); ++ list Pattern = [(set ROXD:$xd, (DTy (OpNode (STy ROXJ:$xj))))]; ++} ++ ++class LASX_RORI_U3_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui3))]; ++} ++ ++class LASX_RORI_U4_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui4))]; ++} ++ ++class LASX_RORI_U5_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; ++} ++ ++class LASX_RORI_U6_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui6))]; ++} ++ ++class LASX_BIT_3_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui3))]; ++} ++ ++class LASX_BIT_4_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui4))]; ++} ++ ++class LASX_BIT_5_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; ++} ++ ++class LASX_BIT_6_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui6))]; ++} ++ ++class LASX_BIT_2_4O_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui2); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui2"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui2))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_BIT_2_4ON : ++ LASX_BIT_2_4O_DESC_BASE; ++ ++class LASX_BIT_3_4O_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui3))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_BIT_3_4ON : ++ LASX_BIT_3_4O_DESC_BASE; ++ ++class LASX_INSERT_U3_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROS:$rj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $ui3"); ++ list Pattern = [(set ROXD:$xd, (VTy (insertelt (VTy ROXD:$xd_in), ROS:$rj, Imm:$ui3)))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_INSERT_U2_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROS:$rj, ImmOp:$ui2); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $ui2"); ++ list Pattern = [(set ROXD:$xd, (VTy (insertelt (VTy ROXD:$xd_in), ROS:$rj, Imm:$ui2)))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_COPY_U2_DESC_BASE { ++ dag OutOperandList = (outs ROD:$rd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui2); ++ string AsmString = !strconcat(instr_asm, "\t$rd, $xj, $ui2"); ++ list Pattern = [(set ROD:$rd, (OpNode (VecTy ROXJ:$xj), Imm:$ui2))]; ++} ++ ++class LASX_COPY_U3_DESC_BASE { ++ dag OutOperandList = (outs ROD:$rd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$rd, $xj, $ui3"); ++ list Pattern = [(set ROD:$rd, (OpNode (VecTy ROXJ:$xj), Imm:$ui3))]; ++} ++ ++class LASX_ELM_U4_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm4:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt4:$ui4))]; ++} ++ ++class LASX_ELM_U3_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm3:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt3:$ui3))]; ++} ++ ++class LASX_ELM_U2_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm2:$ui2); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui2"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt2:$ui2))]; ++} ++ ++class LASX_ELM_U1_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm1:$ui1); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui1"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt1:$ui1))]; ++} ++ ++class LASX_XVBROADCAST_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj"); ++ list Pattern = [(set ROXD:$xd, (OpNode (TyNode ROXJ:$xj)))]; ++} ++ ++class LASX_2R_U3_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm3:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt3:$ui3))]; ++} ++ ++class LASX_2R_U4_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm4:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt4:$ui4))]; ++} ++ ++class LASX_2R_U5_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm5:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt5:$ui5))]; ++} ++ ++class LASX_2R_U6_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm6:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt6:$ui6))]; ++} ++ ++class LASX_BIT_U3_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui3))]; ++} ++ ++class LASX_BIT_U4_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui4))]; ++} ++ ++class LASX_BIT_U5_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui5))]; ++} ++ ++class LASX_BIT_U6_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui6))]; ++} ++ ++class LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui3))]; ++} ++ ++class LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui4))]; ++} ++ ++class LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; ++} ++ ++class LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui6))]; ++} ++ ++class LASX_U4_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in,ROXJ:$xj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in,ROXJ:$xj, Imm:$ui4))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_N4_U5_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui5))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_U6_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui6))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_D_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm7:$ui7); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui7"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt7:$ui7))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_2R_3R_U4_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm4:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt4:$ui4))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_2R_3R_U5_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm5:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt5:$ui5))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_2R_3R_U6_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm6:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt6:$ui6))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_2R_3R_U7_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm7:$ui7); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui7"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt7:$ui7))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_2R_3R_U8_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt8:$ui8))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_2R_3R_U8_SELECT { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, vsplat_uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, xvsplati8_uimm8:$ui8, ROXJ:$xj))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_I8_O4_SHF_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt8:$ui8))]; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class LASX_I8_SHF_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt8:$ui8))]; ++} ++ ++class LASX_2R_U8_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt8:$ui8))]; ++} ++ ++class LASX_I13_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins immOp:$i13); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $i13"); ++ list Pattern = [(set ROXD:$xd, (OpNode (Ty simm13:$i13)))]; ++ string DecoderMethod = "DecodeLASX256Mem13"; ++} ++ ++class LASX_I13_DESC_BASE_tmp { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins vsplat_simm10:$i10); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $i10"); ++ list Pattern = []; ++ bit hasSideEffects = 0; ++ string DecoderMethod = "DecodeLASX256Mem10"; ++} ++ ++class LASX_BIT_U8_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); ++ list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui8))]; ++} ++ ++class LASX_2RN_3R_U8_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); ++ list Pattern = []; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++ ++//encoding ++ ++def XVFMADD_S : LASX_4R<0b000010100001>, ++ LASX_4RF<"xvfmadd.s", int_loongarch_lasx_xvfmadd_s, LASX256WOpnd>; ++ ++def XVFMADD_D : LASX_4R<0b000010100010>, ++ LASX_4RF<"xvfmadd.d", int_loongarch_lasx_xvfmadd_d, LASX256DOpnd>; ++ ++ ++def XVFMSUB_S : LASX_4R<0b000010100101>, ++ LASX_4RF<"xvfmsub.s", int_loongarch_lasx_xvfmsub_s, LASX256WOpnd>; ++ ++def XVFMSUB_D : LASX_4R<0b000010100110>, ++ LASX_4RF<"xvfmsub.d", int_loongarch_lasx_xvfmsub_d, LASX256DOpnd>; ++ ++ ++def XVFNMADD_S : LASX_4R<0b000010101001>, ++ LASX_4RF<"xvfnmadd.s", int_loongarch_lasx_xvfnmadd_s, LASX256WOpnd>; ++ ++def XVFNMADD_D : LASX_4R<0b000010101010>, ++ LASX_4RF<"xvfnmadd.d", int_loongarch_lasx_xvfnmadd_d, LASX256DOpnd>; ++ ++ ++def XVFNMSUB_S : LASX_4R<0b000010101101>, ++ LASX_4RF<"xvfnmsub.s", int_loongarch_lasx_xvfnmsub_s, LASX256WOpnd>; ++ ++def XVFNMSUB_D : LASX_4R<0b000010101110>, ++ LASX_4RF<"xvfnmsub.d", int_loongarch_lasx_xvfnmsub_d, LASX256DOpnd>; ++ ++ ++// xvfmadd: xj * xk + xa ++def : LASXPat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), ++ (XVFMADD_D $xj, $xk, $xa)>; ++ ++def : LASXPat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), ++ (XVFMADD_S $xj, $xk, $xa)>; ++ ++ ++// xvfmsub: xj * xk - xa ++def : LASXPat<(fma v4f64:$xj, v4f64:$xk, (fneg v4f64:$xa)), ++ (XVFMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; ++ ++def : LASXPat<(fma v8f32:$xj, v8f32:$xk, (fneg v8f32:$xa)), ++ (XVFMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; ++ ++ ++// xvfnmadd: -(xj * xk + xa) ++def : LASXPat<(fma (fneg v4f64:$xj), v4f64:$xk, (fneg v4f64:$xa)), ++ (XVFNMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; ++ ++def : LASXPat<(fma (fneg v8f32:$xj), v8f32:$xk, (fneg v8f32:$xa)), ++ (XVFNMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; ++ ++// xvfnmsub: -(xj * xk - xa) ++def : LASXPat<(fma (fneg v4f64:$xj), v4f64:$xk, v4f64:$xa), ++ (XVFNMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; ++ ++def : LASXPat<(fma (fneg v8f32:$xj), v8f32:$xk, v8f32:$xa), ++ (XVFNMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; ++ ++ ++def XVFCMP_CAF_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.caf.s", int_loongarch_lasx_xvfcmp_caf_s, LASX256WOpnd>{ ++ bits<5> cond=0x0; ++ } ++ ++def XVFCMP_CAF_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.caf.d", int_loongarch_lasx_xvfcmp_caf_d, LASX256DOpnd>{ ++ bits<5> cond=0x0; ++ } ++ ++def XVFCMP_COR_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.cor.s", vfsetord_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0x14; ++ } ++ ++def XVFCMP_COR_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.cor.d", vfsetord_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0x14; ++ } ++ ++def XVFCMP_CUN_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.cun.s", vfsetun_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0x8; ++ } ++ ++def XVFCMP_CUN_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.cun.d", vfsetun_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0x8; ++ } ++ ++def XVFCMP_CUNE_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.cune.s", vfsetune_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0x18; ++ } ++ ++def XVFCMP_CUNE_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.cune.d", vfsetune_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0x18; ++ } ++ ++def XVFCMP_CUEQ_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.cueq.s", vfsetueq_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0xc; ++ } ++ ++def XVFCMP_CUEQ_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.cueq.d", vfsetueq_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0xc; ++ } ++ ++def XVFCMP_CEQ_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.ceq.s", vfsetoeq_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0x4; ++ } ++ ++def XVFCMP_CEQ_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.ceq.d", vfsetoeq_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0x4; ++ } ++ ++def XVFCMP_CNE_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.cne.s", vfsetone_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0x10; ++ } ++ ++def XVFCMP_CNE_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.cne.d", vfsetone_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0x10; ++ } ++ ++def XVFCMP_CLT_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.clt.s", vfsetolt_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0x2; ++ } ++ ++def XVFCMP_CLT_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.clt.d", vfsetolt_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0x2; ++ } ++ ++def XVFCMP_CULT_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.cult.s", vfsetult_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0xa; ++ } ++ ++def XVFCMP_CULT_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.cult.d", vfsetult_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0xa; ++ } ++ ++def XVFCMP_CLE_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.cle.s", vfsetole_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0x6; ++ } ++ ++def XVFCMP_CLE_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.cle.d", vfsetole_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0x6; ++ } ++ ++def XVFCMP_CULE_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.cule.s", vfsetule_v8f32, LASX256WOpnd>{ ++ bits<5> cond=0xe; ++ } ++ ++def XVFCMP_CULE_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.cule.d", vfsetule_v4f64, LASX256DOpnd>{ ++ bits<5> cond=0xe; ++ } ++ ++def XVFCMP_SAF_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.saf.s", int_loongarch_lasx_xvfcmp_saf_s, LASX256WOpnd>{ ++ bits<5> cond=0x1; ++ } ++ ++def XVFCMP_SAF_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.saf.d", int_loongarch_lasx_xvfcmp_saf_d, LASX256DOpnd>{ ++ bits<5> cond=0x1; ++ } ++ ++def XVFCMP_SOR_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.sor.s", int_loongarch_lasx_xvfcmp_sor_s, LASX256WOpnd>{ ++ bits<5> cond=0x15; ++ } ++ ++def XVFCMP_SOR_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.sor.d", int_loongarch_lasx_xvfcmp_sor_d, LASX256DOpnd>{ ++ bits<5> cond=0x15; ++ } ++ ++def XVFCMP_SUN_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.sun.s", int_loongarch_lasx_xvfcmp_sun_s, LASX256WOpnd>{ ++ bits<5> cond=0x9; ++ } ++ ++def XVFCMP_SUN_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.sun.d", int_loongarch_lasx_xvfcmp_sun_d, LASX256DOpnd>{ ++ bits<5> cond=0x9; ++ } ++ ++def XVFCMP_SUNE_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.sune.s", int_loongarch_lasx_xvfcmp_sune_s, LASX256WOpnd>{ ++ bits<5> cond=0x19; ++ } ++ ++def XVFCMP_SUNE_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.sune.d", int_loongarch_lasx_xvfcmp_sune_d, LASX256DOpnd>{ ++ bits<5> cond=0x19; ++ } ++ ++def XVFCMP_SUEQ_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.sueq.s", int_loongarch_lasx_xvfcmp_sueq_s, LASX256WOpnd>{ ++ bits<5> cond=0xd; ++ } ++ ++def XVFCMP_SUEQ_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.sueq.d", int_loongarch_lasx_xvfcmp_sueq_d, LASX256DOpnd>{ ++ bits<5> cond=0xd; ++ } ++ ++def XVFCMP_SEQ_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.seq.s", int_loongarch_lasx_xvfcmp_seq_s, LASX256WOpnd>{ ++ bits<5> cond=0x5; ++ } ++ ++def XVFCMP_SEQ_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.seq.d", int_loongarch_lasx_xvfcmp_seq_d, LASX256DOpnd>{ ++ bits<5> cond=0x5; ++ } ++ ++def XVFCMP_SNE_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.sne.s", int_loongarch_lasx_xvfcmp_sne_s, LASX256WOpnd>{ ++ bits<5> cond=0x11; ++ } ++ ++def XVFCMP_SNE_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.sne.d", int_loongarch_lasx_xvfcmp_sne_d, LASX256DOpnd>{ ++ bits<5> cond=0x11; ++ } ++ ++def XVFCMP_SLT_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.slt.s", int_loongarch_lasx_xvfcmp_slt_s, LASX256WOpnd>{ ++ bits<5> cond=0x3; ++ } ++ ++def XVFCMP_SLT_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.slt.d", int_loongarch_lasx_xvfcmp_slt_d, LASX256DOpnd>{ ++ bits<5> cond=0x3; ++ } ++ ++def XVFCMP_SULT_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.sult.s", int_loongarch_lasx_xvfcmp_sult_s, LASX256WOpnd>{ ++ bits<5> cond=0xb; ++ } ++ ++def XVFCMP_SULT_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.sult.d", int_loongarch_lasx_xvfcmp_sult_d, LASX256DOpnd>{ ++ bits<5> cond=0xb; ++ } ++ ++def XVFCMP_SLE_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.sle.s", int_loongarch_lasx_xvfcmp_sle_s, LASX256WOpnd>{ ++ bits<5> cond=0x7; ++ } ++ ++def XVFCMP_SLE_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.sle.d", int_loongarch_lasx_xvfcmp_sle_d, LASX256DOpnd>{ ++ bits<5> cond=0x7; ++ } ++ ++def XVFCMP_SULE_S : LASX_XVFCMP<0b000011001001>, ++ LASX_3RF<"xvfcmp.sule.s", int_loongarch_lasx_xvfcmp_sule_s, LASX256WOpnd>{ ++ bits<5> cond=0xf; ++ } ++ ++def XVFCMP_SULE_D : LASX_XVFCMP<0b000011001010>, ++ LASX_3RF<"xvfcmp.sule.d", int_loongarch_lasx_xvfcmp_sule_d, LASX256DOpnd>{ ++ bits<5> cond=0xf; ++ } ++ ++ ++def XVBITSEL_V : LASX_4R<0b000011010010>, ++ LASX_4RF<"xvbitsel.v", int_loongarch_lasx_xvbitsel_v, LASX256BOpnd>; ++ ++class LASX_BSEL_PSEUDO_BASE : ++ LASXPseudo<(outs RO:$xd), (ins RO:$xd_in, RO:$xs, RO:$xt), ++ [(set RO:$xd, (Ty (vselect RO:$xd_in, RO:$xt, RO:$xs)))]>, ++ PseudoInstExpansion<(XVBITSEL_V LASX256BOpnd:$xd, LASX256BOpnd:$xs, ++ LASX256BOpnd:$xt, LASX256BOpnd:$xd_in)> { ++ let Constraints = "$xd_in = $xd"; ++} ++ ++def XBSEL_B_PSEUDO : LASX_BSEL_PSEUDO_BASE; ++def XBSEL_H_PSEUDO : LASX_BSEL_PSEUDO_BASE; ++def XBSEL_W_PSEUDO : LASX_BSEL_PSEUDO_BASE; ++def XBSEL_D_PSEUDO : LASX_BSEL_PSEUDO_BASE; ++def XBSEL_FW_PSEUDO : LASX_BSEL_PSEUDO_BASE; ++def XBSEL_FD_PSEUDO : LASX_BSEL_PSEUDO_BASE; ++ ++ ++ ++def XVSHUF_B : LASX_4R<0b000011010110>, ++ LASX_4RF<"xvshuf.b", int_loongarch_lasx_xvshuf_b, LASX256BOpnd>; ++ ++ ++def XVLD : LASX_I12_S<0b0010110010>, ++ LASX_LD<"xvld", load, v32i8, LASX256BOpnd, mem>; ++ ++def XVST : LASX_I12_S<0b0010110011>, ++ LASX_ST<"xvst", store, v32i8, LASX256BOpnd, mem_simm12>; ++ ++ ++class LASX_LD_DESC_BASE { ++ dag OutOperandList = (outs ROXD:$xd); ++ dag InOperandList = (ins MemOpnd:$addr); ++ string AsmString = !strconcat(instr_asm, "\t$xd, $addr"); ++ list Pattern = [(set ROXD:$xd, (OpNode (TyNode (load Addr:$addr))))]; ++ string DecoderMethod = "DecodeLASX256memlsl"; ++} ++ ++ ++def XVLDREPL_B : LASX_SI12_S<0b0011001010>, ++ LASX_LD_DESC_BASE<"xvldrepl.b", xvbroadcast_v32i8, v32i8, LASX256BOpnd>; ++ ++def XVLDREPL_H : LASX_SI11_S<0b00110010010>, ++ LASX_LD_DESC_BASE<"xvldrepl.h", xvbroadcast_v16i16, v16i16, LASX256HOpnd, mem_simm11_lsl1, addrimm11lsl1>; ++ ++def XVLDREPL_W : LASX_SI10_S<0b001100100010>, ++ LASX_LD_DESC_BASE<"xvldrepl.w", xvbroadcast_v8i32, v8i32, LASX256WOpnd, mem_simm10_lsl2, addrimm10lsl2>; ++ ++def XVLDREPL_D : LASX_SI9_S<0b0011001000010>, ++ LASX_LD_DESC_BASE<"xvldrepl.d", xvbroadcast_v4i64, v4i64, LASX256DOpnd, mem_simm9_lsl3, addrimm9lsl3>; ++ ++ ++def XVSTELM_B : LASX_SI8_idx5<0b001100111>, ++ LASX_I8_U5_DESC_BASE<"xvstelm.b", int_loongarch_lasx_xvstelm_b, simm8_32, immSExt8, LASX256BOpnd, GPR32Opnd>; ++ ++def XVSTELM_H : LASX_SI8_idx4<0b0011001101>, ++ LASX_I8_U4_DESC_BASE<"xvstelm.h", int_loongarch_lasx_xvstelm_h, immSExt8_1_O, immSExt8, LASX256HOpnd, GPR32Opnd>; ++ ++def XVSTELM_W : LASX_SI8_idx3<0b00110011001>, ++ LASX_I8_U3_DESC_BASE<"xvstelm.w", int_loongarch_lasx_xvstelm_w, immSExt8_2_O, immSExt8, LASX256WOpnd, GPR32Opnd>; ++ ++def XVSTELM_D : LASX_SI8_idx2<0b001100110001>, ++ LASX_I8_U2_DESC_BASE<"xvstelm.d", int_loongarch_lasx_xvstelm_d, immSExt8_3_O, immSExt8, LASX256DOpnd, GPR32Opnd>; ++ ++let mayLoad = 1, canFoldAsLoad = 1 in { ++ def XVLDX : LASX_3R_2GP<0b00111000010010000>, ++ LASX_LDX_LA<"xvldx", int_loongarch_lasx_xvldx, GPR64Opnd, LASX256BOpnd>; ++} ++ ++let mayStore = 1 in{ ++ def XVSTX : LASX_3R_2GP<0b00111000010011000>, ++ LASX_SDX_LA<"xvstx", int_loongarch_lasx_xvstx, GPR64Opnd, LASX256BOpnd>; ++} ++ ++ ++def XVSEQ_B : LASX_3R<0b01110100000000000>, IsCommutable, ++ LASX_3R_SETCC_DESC_BASE<"xvseq.b", SETEQ, v32i8, LASX256BOpnd>; ++ ++def XVSEQ_H : LASX_3R<0b01110100000000001>, IsCommutable, ++ LASX_3R_SETCC_DESC_BASE<"xvseq.h", SETEQ, v16i16, LASX256HOpnd>; ++ ++def XVSEQ_W : LASX_3R<0b01110100000000010>, IsCommutable, ++ LASX_3R_SETCC_DESC_BASE<"xvseq.w", SETEQ, v8i32, LASX256WOpnd> ; ++ ++def XVSEQ_D : LASX_3R<0b01110100000000011>, IsCommutable, ++ LASX_3R_SETCC_DESC_BASE<"xvseq.d", SETEQ, v4i64, LASX256DOpnd>; ++ ++ ++def XVSLE_B : LASX_3R<0b01110100000000100>, ++ LASX_3R_SETCC_DESC_BASE<"xvsle.b", SETLE, v32i8, LASX256BOpnd>; ++ ++def XVSLE_H : LASX_3R<0b01110100000000101>, ++ LASX_3R_SETCC_DESC_BASE<"xvsle.h", SETLE, v16i16, LASX256HOpnd>; ++ ++def XVSLE_W : LASX_3R<0b01110100000000110>, ++ LASX_3R_SETCC_DESC_BASE<"xvsle.w", SETLE, v8i32, LASX256WOpnd>; ++ ++def XVSLE_D : LASX_3R<0b01110100000000111>, ++ LASX_3R_SETCC_DESC_BASE<"xvsle.d", SETLE, v4i64, LASX256DOpnd>; ++ ++ ++def XVSLE_BU : LASX_3R<0b01110100000001000>, ++ LASX_3R_SETCC_DESC_BASE<"xvsle.bu", SETULE, v32i8, LASX256BOpnd>; ++ ++def XVSLE_HU : LASX_3R<0b01110100000001001>, ++ LASX_3R_SETCC_DESC_BASE<"xvsle.hu", SETULE, v16i16, LASX256HOpnd>; ++ ++def XVSLE_WU : LASX_3R<0b01110100000001010>, ++ LASX_3R_SETCC_DESC_BASE<"xvsle.wu", SETULE, v8i32, LASX256WOpnd>; ++ ++def XVSLE_DU : LASX_3R<0b01110100000001011>, ++ LASX_3R_SETCC_DESC_BASE<"xvsle.du", SETULE, v4i64, LASX256DOpnd>; ++ ++ ++def XVSLT_B : LASX_3R<0b01110100000001100>, ++ LASX_3R_SETCC_DESC_BASE<"xvslt.b", SETLT, v32i8, LASX256BOpnd>; ++ ++def XVSLT_H : LASX_3R<0b01110100000001101>, ++ LASX_3R_SETCC_DESC_BASE<"xvslt.h", SETLT, v16i16, LASX256HOpnd>; ++ ++def XVSLT_W : LASX_3R<0b01110100000001110>, ++ LASX_3R_SETCC_DESC_BASE<"xvslt.w", SETLT, v8i32, LASX256WOpnd>; ++ ++def XVSLT_D : LASX_3R<0b01110100000001111>, ++ LASX_3R_SETCC_DESC_BASE<"xvslt.d", SETLT, v4i64, LASX256DOpnd>; ++ ++ ++def XVSLT_BU : LASX_3R<0b01110100000010000>, ++ LASX_3R_SETCC_DESC_BASE<"xvslt.bu", SETULT, v32i8, LASX256BOpnd>; ++ ++def XVSLT_HU : LASX_3R<0b01110100000010001>, ++ LASX_3R_SETCC_DESC_BASE<"xvslt.hu", SETULT, v16i16, LASX256HOpnd>; ++ ++def XVSLT_WU : LASX_3R<0b01110100000010010>, ++ LASX_3R_SETCC_DESC_BASE<"xvslt.wu", SETULT, v8i32, LASX256WOpnd>; ++ ++def XVSLT_DU : LASX_3R<0b01110100000010011>, ++ LASX_3R_SETCC_DESC_BASE<"xvslt.du", SETULT, v4i64, LASX256DOpnd>; ++ ++ ++def XVADD_B : LASX_3R<0b01110100000010100>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvadd.b", add, LASX256BOpnd>; ++ ++def XVADD_H : LASX_3R<0b01110100000010101>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvadd.h", add, LASX256HOpnd>; ++ ++def XVADD_W : LASX_3R<0b01110100000010110>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvadd.w", add, LASX256WOpnd>; ++ ++def XVADD_D : LASX_3R<0b01110100000010111>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvadd.d", add, LASX256DOpnd>; ++ ++ ++def XVSUB_B : LASX_3R<0b01110100000011000>, ++ LASX_3R_DESC_BASE<"xvsub.b", sub, LASX256BOpnd>; ++ ++def XVSUB_H : LASX_3R<0b01110100000011001>, ++ LASX_3R_DESC_BASE<"xvsub.h", sub, LASX256HOpnd>; ++ ++def XVSUB_W : LASX_3R<0b01110100000011010>, ++ LASX_3R_DESC_BASE<"xvsub.w", sub, LASX256WOpnd>; ++ ++def XVSUB_D : LASX_3R<0b01110100000011011>, ++ LASX_3R_DESC_BASE<"xvsub.d", sub, LASX256DOpnd>; ++ ++ ++def XVADDWEV_H_B : LASX_3R<0b01110100000111100>, ++ LASX_3R_DESC_BASE<"xvaddwev.h.b", int_loongarch_lasx_xvaddwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVADDWEV_W_H : LASX_3R<0b01110100000111101>, ++ LASX_3R_DESC_BASE<"xvaddwev.w.h", int_loongarch_lasx_xvaddwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVADDWEV_D_W : LASX_3R<0b01110100000111110>, ++ LASX_3R_DESC_BASE<"xvaddwev.d.w", int_loongarch_lasx_xvaddwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVADDWEV_Q_D : LASX_3R<0b01110100000111111>, ++ LASX_3R_DESC_BASE<"xvaddwev.q.d", int_loongarch_lasx_xvaddwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSUBWEV_H_B : LASX_3R<0b01110100001000000>, ++ LASX_3R_DESC_BASE<"xvsubwev.h.b", int_loongarch_lasx_xvsubwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVSUBWEV_W_H : LASX_3R<0b01110100001000001>, ++ LASX_3R_DESC_BASE<"xvsubwev.w.h", int_loongarch_lasx_xvsubwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSUBWEV_D_W : LASX_3R<0b01110100001000010>, ++ LASX_3R_DESC_BASE<"xvsubwev.d.w", int_loongarch_lasx_xvsubwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVSUBWEV_Q_D : LASX_3R<0b01110100001000011>, ++ LASX_3R_DESC_BASE<"xvsubwev.q.d", int_loongarch_lasx_xvsubwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVADDWOD_H_B : LASX_3R<0b01110100001000100>, ++ LASX_3R_DESC_BASE<"xvaddwod.h.b", int_loongarch_lasx_xvaddwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVADDWOD_W_H : LASX_3R<0b01110100001000101>, ++ LASX_3R_DESC_BASE<"xvaddwod.w.h", int_loongarch_lasx_xvaddwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVADDWOD_D_W : LASX_3R<0b01110100001000110>, ++ LASX_3R_DESC_BASE<"xvaddwod.d.w", int_loongarch_lasx_xvaddwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVADDWOD_Q_D : LASX_3R<0b01110100001000111>, ++ LASX_3R_DESC_BASE<"xvaddwod.q.d", int_loongarch_lasx_xvaddwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSUBWOD_H_B : LASX_3R<0b01110100001001000>, ++ LASX_3R_DESC_BASE<"xvsubwod.h.b", int_loongarch_lasx_xvsubwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVSUBWOD_W_H : LASX_3R<0b01110100001001001>, ++ LASX_3R_DESC_BASE<"xvsubwod.w.h", int_loongarch_lasx_xvsubwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSUBWOD_D_W : LASX_3R<0b01110100001001010>, ++ LASX_3R_DESC_BASE<"xvsubwod.d.w", int_loongarch_lasx_xvsubwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVSUBWOD_Q_D : LASX_3R<0b01110100001001011>, ++ LASX_3R_DESC_BASE<"xvsubwod.q.d", int_loongarch_lasx_xvsubwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVADDWEV_H_BU : LASX_3R<0b01110100001011100>, ++ LASX_3R_DESC_BASE<"xvaddwev.h.bu", int_loongarch_lasx_xvaddwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVADDWEV_W_HU : LASX_3R<0b01110100001011101>, ++ LASX_3R_DESC_BASE<"xvaddwev.w.hu", int_loongarch_lasx_xvaddwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVADDWEV_D_WU : LASX_3R<0b01110100001011110>, ++ LASX_3R_DESC_BASE<"xvaddwev.d.wu", int_loongarch_lasx_xvaddwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVADDWEV_Q_DU : LASX_3R<0b01110100001011111>, ++ LASX_3R_DESC_BASE<"xvaddwev.q.du", int_loongarch_lasx_xvaddwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSUBWEV_H_BU : LASX_3R<0b01110100001100000>, ++ LASX_3R_DESC_BASE<"xvsubwev.h.bu", int_loongarch_lasx_xvsubwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVSUBWEV_W_HU : LASX_3R<0b01110100001100001>, ++ LASX_3R_DESC_BASE<"xvsubwev.w.hu", int_loongarch_lasx_xvsubwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSUBWEV_D_WU : LASX_3R<0b01110100001100010>, ++ LASX_3R_DESC_BASE<"xvsubwev.d.wu", int_loongarch_lasx_xvsubwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVSUBWEV_Q_DU : LASX_3R<0b01110100001100011>, ++ LASX_3R_DESC_BASE<"xvsubwev.q.du", int_loongarch_lasx_xvsubwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVADDWOD_H_BU : LASX_3R<0b01110100001100100>, ++ LASX_3R_DESC_BASE<"xvaddwod.h.bu", int_loongarch_lasx_xvaddwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVADDWOD_W_HU : LASX_3R<0b01110100001100101>, ++ LASX_3R_DESC_BASE<"xvaddwod.w.hu", int_loongarch_lasx_xvaddwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVADDWOD_D_WU : LASX_3R<0b01110100001100110>, ++ LASX_3R_DESC_BASE<"xvaddwod.d.wu", int_loongarch_lasx_xvaddwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVADDWOD_Q_DU : LASX_3R<0b01110100001100111>, ++ LASX_3R_DESC_BASE<"xvaddwod.q.du", int_loongarch_lasx_xvaddwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSUBWOD_H_BU : LASX_3R<0b01110100001101000>, ++ LASX_3R_DESC_BASE<"xvsubwod.h.bu", int_loongarch_lasx_xvsubwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVSUBWOD_W_HU : LASX_3R<0b01110100001101001>, ++ LASX_3R_DESC_BASE<"xvsubwod.w.hu", int_loongarch_lasx_xvsubwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSUBWOD_D_WU : LASX_3R<0b01110100001101010>, ++ LASX_3R_DESC_BASE<"xvsubwod.d.wu", int_loongarch_lasx_xvsubwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVSUBWOD_Q_DU : LASX_3R<0b01110100001101011>, ++ LASX_3R_DESC_BASE<"xvsubwod.q.du", int_loongarch_lasx_xvsubwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVADDWEV_H_BU_B : LASX_3R<0b01110100001111100>, ++ LASX_3R_DESC_BASE<"xvaddwev.h.bu.b", int_loongarch_lasx_xvaddwev_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVADDWEV_W_HU_H : LASX_3R<0b01110100001111101>, ++ LASX_3R_DESC_BASE<"xvaddwev.w.hu.h", int_loongarch_lasx_xvaddwev_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVADDWEV_D_WU_W : LASX_3R<0b01110100001111110>, ++ LASX_3R_DESC_BASE<"xvaddwev.d.wu.w", int_loongarch_lasx_xvaddwev_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVADDWEV_Q_DU_D : LASX_3R<0b01110100001111111>, ++ LASX_3R_DESC_BASE<"xvaddwev.q.du.d", int_loongarch_lasx_xvaddwev_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVADDWOD_H_BU_B : LASX_3R<0b01110100010000000>, ++ LASX_3R_DESC_BASE<"xvaddwod.h.bu.b", int_loongarch_lasx_xvaddwod_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVADDWOD_W_HU_H : LASX_3R<0b01110100010000001>, ++ LASX_3R_DESC_BASE<"xvaddwod.w.hu.h", int_loongarch_lasx_xvaddwod_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVADDWOD_D_WU_W : LASX_3R<0b01110100010000010>, ++ LASX_3R_DESC_BASE<"xvaddwod.d.wu.w", int_loongarch_lasx_xvaddwod_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVADDWOD_Q_DU_D : LASX_3R<0b01110100010000011>, ++ LASX_3R_DESC_BASE<"xvaddwod.q.du.d", int_loongarch_lasx_xvaddwod_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSADD_B : LASX_3R<0b01110100010001100>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvsadd.b", saddsat, LASX256BOpnd>; ++ ++def XVSADD_H : LASX_3R<0b01110100010001101>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvsadd.h", saddsat, LASX256HOpnd>; ++ ++def XVSADD_W : LASX_3R<0b01110100010001110>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvsadd.w", saddsat, LASX256WOpnd>; ++ ++def XVSADD_D : LASX_3R<0b01110100010001111>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvsadd.d", saddsat, LASX256DOpnd>; ++ ++ ++def XVSSUB_B : LASX_3R<0b01110100010010000>, ++ LASX_3R_DESC_BASE<"xvssub.b", ssubsat, LASX256BOpnd>; ++ ++def XVSSUB_H : LASX_3R<0b01110100010010001>, ++ LASX_3R_DESC_BASE<"xvssub.h", ssubsat, LASX256HOpnd>; ++ ++def XVSSUB_W : LASX_3R<0b01110100010010010>, ++ LASX_3R_DESC_BASE<"xvssub.w", ssubsat, LASX256WOpnd>; ++ ++def XVSSUB_D : LASX_3R<0b01110100010010011>, ++ LASX_3R_DESC_BASE<"xvssub.d", ssubsat, LASX256DOpnd>; ++ ++ ++def XVSADD_BU : LASX_3R<0b01110100010010100>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvsadd.bu", uaddsat, LASX256BOpnd>; ++ ++def XVSADD_HU : LASX_3R<0b01110100010010101>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvsadd.hu", uaddsat, LASX256HOpnd>; ++ ++def XVSADD_WU : LASX_3R<0b01110100010010110>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvsadd.wu", uaddsat, LASX256WOpnd>; ++ ++def XVSADD_DU : LASX_3R<0b01110100010010111>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvsadd.du", uaddsat, LASX256DOpnd>; ++ ++ ++def XVSSUB_BU : LASX_3R<0b01110100010011000>, ++ LASX_3R_DESC_BASE<"xvssub.bu", usubsat, LASX256BOpnd>; ++ ++def XVSSUB_HU : LASX_3R<0b01110100010011001>, ++ LASX_3R_DESC_BASE<"xvssub.hu", usubsat, LASX256HOpnd>; ++ ++def XVSSUB_WU : LASX_3R<0b01110100010011010>, ++ LASX_3R_DESC_BASE<"xvssub.wu", usubsat, LASX256WOpnd>; ++ ++def XVSSUB_DU : LASX_3R<0b01110100010011011>, ++ LASX_3R_DESC_BASE<"xvssub.du", usubsat, LASX256DOpnd>; ++ ++ ++def XVHADDW_H_B : LASX_3R<0b01110100010101000>, ++ LASX_3R_DESC_BASE<"xvhaddw.h.b", int_loongarch_lasx_xvhaddw_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVHADDW_W_H : LASX_3R<0b01110100010101001>, ++ LASX_3R_DESC_BASE<"xvhaddw.w.h", int_loongarch_lasx_xvhaddw_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVHADDW_D_W : LASX_3R<0b01110100010101010>, ++ LASX_3R_DESC_BASE<"xvhaddw.d.w", int_loongarch_lasx_xvhaddw_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVHADDW_Q_D : LASX_3R<0b01110100010101011>, ++ LASX_3R_DESC_BASE<"xvhaddw.q.d", int_loongarch_lasx_xvhaddw_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++ ++def XVHSUBW_H_B : LASX_3R<0b01110100010101100>, ++ LASX_3R_DESC_BASE<"xvhsubw.h.b", int_loongarch_lasx_xvhsubw_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVHSUBW_W_H : LASX_3R<0b01110100010101101>, ++ LASX_3R_DESC_BASE<"xvhsubw.w.h", int_loongarch_lasx_xvhsubw_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVHSUBW_D_W : LASX_3R<0b01110100010101110>, ++ LASX_3R_DESC_BASE<"xvhsubw.d.w", int_loongarch_lasx_xvhsubw_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVHSUBW_Q_D : LASX_3R<0b01110100010101111>, ++ LASX_3R_DESC_BASE<"xvhsubw.q.d", int_loongarch_lasx_xvhsubw_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVHADDW_HU_BU : LASX_3R<0b01110100010110000>, ++ LASX_3R_DESC_BASE<"xvhaddw.hu.bu", int_loongarch_lasx_xvhaddw_hu_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVHADDW_WU_HU : LASX_3R<0b01110100010110001>, ++ LASX_3R_DESC_BASE<"xvhaddw.wu.hu", int_loongarch_lasx_xvhaddw_wu_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVHADDW_DU_WU : LASX_3R<0b01110100010110010>, ++ LASX_3R_DESC_BASE<"xvhaddw.du.wu", int_loongarch_lasx_xvhaddw_du_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVHADDW_QU_DU : LASX_3R<0b01110100010110011>, ++ LASX_3R_DESC_BASE<"xvhaddw.qu.du", int_loongarch_lasx_xvhaddw_qu_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++ ++def XVHSUBW_HU_BU : LASX_3R<0b01110100010110100>, ++ LASX_3R_DESC_BASE<"xvhsubw.hu.bu", int_loongarch_lasx_xvhsubw_hu_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVHSUBW_WU_HU : LASX_3R<0b01110100010110101>, ++ LASX_3R_DESC_BASE<"xvhsubw.wu.hu", int_loongarch_lasx_xvhsubw_wu_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVHSUBW_DU_WU : LASX_3R<0b01110100010110110>, ++ LASX_3R_DESC_BASE<"xvhsubw.du.wu", int_loongarch_lasx_xvhsubw_du_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVHSUBW_QU_DU : LASX_3R<0b01110100010110111>, ++ LASX_3R_DESC_BASE<"xvhsubw.qu.du", int_loongarch_lasx_xvhsubw_qu_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVADDA_B : LASX_3R<0b01110100010111000>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvadda.b", int_loongarch_lasx_xvadda_b, LASX256BOpnd>; ++ ++def XVADDA_H : LASX_3R<0b01110100010111001>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvadda.h", int_loongarch_lasx_xvadda_h, LASX256HOpnd>; ++ ++def XVADDA_W : LASX_3R<0b01110100010111010>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvadda.w", int_loongarch_lasx_xvadda_w, LASX256WOpnd>; ++ ++def XVADDA_D : LASX_3R<0b01110100010111011>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvadda.d", int_loongarch_lasx_xvadda_d, LASX256DOpnd>; ++ ++ ++def XVABSD_B : LASX_3R<0b01110100011000000>, ++ LASX_3R_DESC_BASE<"xvabsd.b", int_loongarch_lasx_xvabsd_b, LASX256BOpnd>; ++ ++def XVABSD_H : LASX_3R<0b01110100011000001>, ++ LASX_3R_DESC_BASE<"xvabsd.h", int_loongarch_lasx_xvabsd_h, LASX256HOpnd>; ++ ++def XVABSD_W : LASX_3R<0b01110100011000010>, ++ LASX_3R_DESC_BASE<"xvabsd.w", int_loongarch_lasx_xvabsd_w, LASX256WOpnd>; ++ ++def XVABSD_D : LASX_3R<0b01110100011000011>, ++ LASX_3R_DESC_BASE<"xvabsd.d", int_loongarch_lasx_xvabsd_d, LASX256DOpnd>; ++ ++ ++def XVABSD_BU : LASX_3R<0b01110100011000100>, ++ LASX_3R_DESC_BASE<"xvabsd.bu", int_loongarch_lasx_xvabsd_bu, LASX256BOpnd>; ++ ++def XVABSD_HU : LASX_3R<0b01110100011000101>, ++ LASX_3R_DESC_BASE<"xvabsd.hu", int_loongarch_lasx_xvabsd_hu, LASX256HOpnd>; ++ ++def XVABSD_WU : LASX_3R<0b01110100011000110>, ++ LASX_3R_DESC_BASE<"xvabsd.wu", int_loongarch_lasx_xvabsd_wu, LASX256WOpnd>; ++ ++def XVABSD_DU : LASX_3R<0b01110100011000111>, ++ LASX_3R_DESC_BASE<"xvabsd.du", int_loongarch_lasx_xvabsd_du, LASX256DOpnd>; ++ ++ ++def XVAVG_B : LASX_3R<0b01110100011001000>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavg.b", int_loongarch_lasx_xvavg_b, LASX256BOpnd>; ++ ++def XVAVG_H : LASX_3R<0b01110100011001001>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavg.h", int_loongarch_lasx_xvavg_h, LASX256HOpnd>; ++ ++def XVAVG_W : LASX_3R<0b01110100011001010>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavg.w", int_loongarch_lasx_xvavg_w, LASX256WOpnd>; ++ ++def XVAVG_D : LASX_3R<0b01110100011001011>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavg.d", int_loongarch_lasx_xvavg_d, LASX256DOpnd>; ++ ++ ++def XVAVG_BU : LASX_3R<0b01110100011001100>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavg.bu", int_loongarch_lasx_xvavg_bu, LASX256BOpnd>; ++ ++def XVAVG_HU : LASX_3R<0b01110100011001101>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavg.hu", int_loongarch_lasx_xvavg_hu, LASX256HOpnd>; ++ ++def XVAVG_WU : LASX_3R<0b01110100011001110>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavg.wu", int_loongarch_lasx_xvavg_wu, LASX256WOpnd>; ++ ++def XVAVG_DU : LASX_3R<0b01110100011001111>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavg.du", int_loongarch_lasx_xvavg_du, LASX256DOpnd>; ++ ++ ++def XVAVGR_B : LASX_3R<0b01110100011010000>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavgr.b", int_loongarch_lasx_xvavgr_b, LASX256BOpnd>; ++ ++def XVAVGR_H : LASX_3R<0b01110100011010001>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavgr.h", int_loongarch_lasx_xvavgr_h, LASX256HOpnd>; ++ ++def XVAVGR_W : LASX_3R<0b01110100011010010>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavgr.w", int_loongarch_lasx_xvavgr_w, LASX256WOpnd>; ++ ++def XVAVGR_D : LASX_3R<0b01110100011010011>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavgr.d", int_loongarch_lasx_xvavgr_d, LASX256DOpnd>; ++ ++ ++def XVAVGR_BU : LASX_3R<0b01110100011010100>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavgr.bu", int_loongarch_lasx_xvavgr_bu, LASX256BOpnd>; ++ ++def XVAVGR_HU : LASX_3R<0b01110100011010101>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavgr.hu", int_loongarch_lasx_xvavgr_hu, LASX256HOpnd>; ++ ++def XVAVGR_WU : LASX_3R<0b01110100011010110>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavgr.wu", int_loongarch_lasx_xvavgr_wu, LASX256WOpnd>; ++ ++def XVAVGR_DU : LASX_3R<0b01110100011010111>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvavgr.du", int_loongarch_lasx_xvavgr_du, LASX256DOpnd>; ++ ++ ++def XVMAX_B : LASX_3R<0b01110100011100000>, ++ LASX_3R_DESC_BASE<"xvmax.b", smax, LASX256BOpnd>; ++ ++def XVMAX_H : LASX_3R<0b01110100011100001>, ++ LASX_3R_DESC_BASE<"xvmax.h", smax, LASX256HOpnd>; ++ ++def XVMAX_W : LASX_3R<0b01110100011100010>, ++ LASX_3R_DESC_BASE<"xvmax.w", smax, LASX256WOpnd>; ++ ++def XVMAX_D : LASX_3R<0b01110100011100011>, ++ LASX_3R_DESC_BASE<"xvmax.d", smax, LASX256DOpnd>; ++ ++ ++def XVMIN_B : LASX_3R<0b01110100011100100>, ++ LASX_3R_DESC_BASE<"xvmin.b", smin, LASX256BOpnd>; ++ ++def XVMIN_H : LASX_3R<0b01110100011100101>, ++ LASX_3R_DESC_BASE<"xvmin.h", smin, LASX256HOpnd>; ++ ++def XVMIN_W : LASX_3R<0b01110100011100110>, ++ LASX_3R_DESC_BASE<"xvmin.w", smin, LASX256WOpnd>; ++ ++def XVMIN_D : LASX_3R<0b01110100011100111>, ++ LASX_3R_DESC_BASE<"xvmin.d", smin, LASX256DOpnd>; ++ ++ ++def XVMAX_BU : LASX_3R<0b01110100011101000>, ++ LASX_3R_DESC_BASE<"xvmax.bu", umax, LASX256BOpnd>; ++ ++def XVMAX_HU : LASX_3R<0b01110100011101001>, ++ LASX_3R_DESC_BASE<"xvmax.hu", umax, LASX256HOpnd>; ++ ++def XVMAX_WU : LASX_3R<0b01110100011101010>, ++ LASX_3R_DESC_BASE<"xvmax.wu", umax, LASX256WOpnd>; ++ ++def XVMAX_DU : LASX_3R<0b01110100011101011>, ++ LASX_3R_DESC_BASE<"xvmax.du", umax, LASX256DOpnd>; ++ ++ ++def XVMIN_BU : LASX_3R<0b01110100011101100>, ++ LASX_3R_DESC_BASE<"xvmin.bu", umin, LASX256BOpnd>; ++ ++def XVMIN_HU : LASX_3R<0b01110100011101101>, ++ LASX_3R_DESC_BASE<"xvmin.hu", umin, LASX256HOpnd>; ++ ++def XVMIN_WU : LASX_3R<0b01110100011101110>, ++ LASX_3R_DESC_BASE<"xvmin.wu", umin, LASX256WOpnd>; ++ ++def XVMIN_DU : LASX_3R<0b01110100011101111>, ++ LASX_3R_DESC_BASE<"xvmin.du", umin, LASX256DOpnd>; ++ ++ ++def XVMUL_B : LASX_3R<0b01110100100001000>, ++ LASX_3R_DESC_BASE<"xvmul.b", mul, LASX256BOpnd>, IsCommutable; ++ ++def XVMUL_H : LASX_3R<0b01110100100001001>, ++ LASX_3R_DESC_BASE<"xvmul.h", mul, LASX256HOpnd>, IsCommutable; ++ ++def XVMUL_W : LASX_3R<0b01110100100001010>, ++ LASX_3R_DESC_BASE<"xvmul.w", mul, LASX256WOpnd>, IsCommutable; ++ ++def XVMUL_D : LASX_3R<0b01110100100001011>, ++ LASX_3R_DESC_BASE<"xvmul.d", mul, LASX256DOpnd>, IsCommutable; ++ ++ ++def XVMUH_B : LASX_3R<0b01110100100001100>, ++ LASX_3R_DESC_BASE<"xvmuh.b", int_loongarch_lasx_xvmuh_b, LASX256BOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMUH_H : LASX_3R<0b01110100100001101>, ++ LASX_3R_DESC_BASE<"xvmuh.h", int_loongarch_lasx_xvmuh_h, LASX256HOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMUH_W : LASX_3R<0b01110100100001110>, ++ LASX_3R_DESC_BASE<"xvmuh.w", int_loongarch_lasx_xvmuh_w, LASX256WOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVMUH_D : LASX_3R<0b01110100100001111>, ++ LASX_3R_DESC_BASE<"xvmuh.d", int_loongarch_lasx_xvmuh_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMUH_BU : LASX_3R<0b01110100100010000>, ++ LASX_3R_DESC_BASE<"xvmuh.bu", int_loongarch_lasx_xvmuh_bu, LASX256BOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMUH_HU : LASX_3R<0b01110100100010001>, ++ LASX_3R_DESC_BASE<"xvmuh.hu", int_loongarch_lasx_xvmuh_hu, LASX256HOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMUH_WU : LASX_3R<0b01110100100010010>, ++ LASX_3R_DESC_BASE<"xvmuh.wu", int_loongarch_lasx_xvmuh_wu, LASX256WOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVMUH_DU : LASX_3R<0b01110100100010011>, ++ LASX_3R_DESC_BASE<"xvmuh.du", int_loongarch_lasx_xvmuh_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMULWEV_H_B : LASX_3R<0b01110100100100000>, ++ LASX_3R_DESC_BASE<"xvmulwev.h.b", int_loongarch_lasx_xvmulwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMULWEV_W_H : LASX_3R<0b01110100100100001>, ++ LASX_3R_DESC_BASE<"xvmulwev.w.h", int_loongarch_lasx_xvmulwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMULWEV_D_W : LASX_3R<0b01110100100100010>, ++ LASX_3R_DESC_BASE<"xvmulwev.d.w", int_loongarch_lasx_xvmulwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVMULWEV_Q_D : LASX_3R<0b01110100100100011>, ++ LASX_3R_DESC_BASE<"xvmulwev.q.d", int_loongarch_lasx_xvmulwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMULWOD_H_B : LASX_3R<0b01110100100100100>, ++ LASX_3R_DESC_BASE<"xvmulwod.h.b", int_loongarch_lasx_xvmulwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMULWOD_W_H : LASX_3R<0b01110100100100101>, ++ LASX_3R_DESC_BASE<"xvmulwod.w.h", int_loongarch_lasx_xvmulwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMULWOD_D_W : LASX_3R<0b01110100100100110>, ++ LASX_3R_DESC_BASE<"xvmulwod.d.w", int_loongarch_lasx_xvmulwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVMULWOD_Q_D : LASX_3R<0b01110100100100111>, ++ LASX_3R_DESC_BASE<"xvmulwod.q.d", int_loongarch_lasx_xvmulwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMULWEV_H_BU : LASX_3R<0b01110100100110000>, ++ LASX_3R_DESC_BASE<"xvmulwev.h.bu", int_loongarch_lasx_xvmulwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMULWEV_W_HU : LASX_3R<0b01110100100110001>, ++ LASX_3R_DESC_BASE<"xvmulwev.w.hu", int_loongarch_lasx_xvmulwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMULWEV_D_WU : LASX_3R<0b01110100100110010>, ++ LASX_3R_DESC_BASE<"xvmulwev.d.wu", int_loongarch_lasx_xvmulwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVMULWEV_Q_DU : LASX_3R<0b01110100100110011>, ++ LASX_3R_DESC_BASE<"xvmulwev.q.du", int_loongarch_lasx_xvmulwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMULWOD_H_BU : LASX_3R<0b01110100100110100>, ++ LASX_3R_DESC_BASE<"xvmulwod.h.bu", int_loongarch_lasx_xvmulwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMULWOD_W_HU : LASX_3R<0b01110100100110101>, ++ LASX_3R_DESC_BASE<"xvmulwod.w.hu", int_loongarch_lasx_xvmulwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMULWOD_D_WU : LASX_3R<0b01110100100110110>, ++ LASX_3R_DESC_BASE<"xvmulwod.d.wu", int_loongarch_lasx_xvmulwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVMULWOD_Q_DU : LASX_3R<0b01110100100110111>, ++ LASX_3R_DESC_BASE<"xvmulwod.q.du", int_loongarch_lasx_xvmulwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMULWEV_H_BU_B : LASX_3R<0b01110100101000000>, ++ LASX_3R_DESC_BASE<"xvmulwev.h.bu.b", int_loongarch_lasx_xvmulwev_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMULWEV_W_HU_H : LASX_3R<0b01110100101000001>, ++ LASX_3R_DESC_BASE<"xvmulwev.w.hu.h", int_loongarch_lasx_xvmulwev_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMULWEV_D_WU_W : LASX_3R<0b01110100101000010>, ++ LASX_3R_DESC_BASE<"xvmulwev.d.wu.w", int_loongarch_lasx_xvmulwev_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVMULWEV_Q_DU_D : LASX_3R<0b01110100101000011>, ++ LASX_3R_DESC_BASE<"xvmulwev.q.du.d", int_loongarch_lasx_xvmulwev_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMULWOD_H_BU_B : LASX_3R<0b01110100101000100>, ++ LASX_3R_DESC_BASE<"xvmulwod.h.bu.b", int_loongarch_lasx_xvmulwod_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMULWOD_W_HU_H : LASX_3R<0b01110100101000101>, ++ LASX_3R_DESC_BASE<"xvmulwod.w.hu.h", int_loongarch_lasx_xvmulwod_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMULWOD_D_WU_W : LASX_3R<0b01110100101000110>, ++ LASX_3R_DESC_BASE<"xvmulwod.d.wu.w", int_loongarch_lasx_xvmulwod_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; ++ ++def XVMULWOD_Q_DU_D : LASX_3R<0b01110100101000111>, ++ LASX_3R_DESC_BASE<"xvmulwod.q.du.d", int_loongarch_lasx_xvmulwod_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMADD_B : LASX_3R<0b01110100101010000>, ++ LASX_3R_4R_DESC_BASE<"xvmadd.b", muladd, LASX256BOpnd>; ++ ++def XVMADD_H : LASX_3R<0b01110100101010001>, ++ LASX_3R_4R_DESC_BASE<"xvmadd.h", muladd, LASX256HOpnd>; ++ ++def XVMADD_W : LASX_3R<0b01110100101010010>, ++ LASX_3R_4R_DESC_BASE<"xvmadd.w", muladd, LASX256WOpnd>; ++ ++def XVMADD_D : LASX_3R<0b01110100101010011>, ++ LASX_3R_4R_DESC_BASE<"xvmadd.d", muladd, LASX256DOpnd>; ++ ++ ++def XVMSUB_B : LASX_3R<0b01110100101010100>, ++ LASX_3R_4R_DESC_BASE<"xvmsub.b", mulsub, LASX256BOpnd>; ++ ++def XVMSUB_H : LASX_3R<0b01110100101010101>, ++ LASX_3R_4R_DESC_BASE<"xvmsub.h", mulsub, LASX256HOpnd>; ++ ++def XVMSUB_W : LASX_3R<0b01110100101010110>, ++ LASX_3R_4R_DESC_BASE<"xvmsub.w", mulsub, LASX256WOpnd>; ++ ++def XVMSUB_D : LASX_3R<0b01110100101010111>, ++ LASX_3R_4R_DESC_BASE<"xvmsub.d", mulsub, LASX256DOpnd>; ++ ++ ++def XVMADDWEV_H_B : LASX_3R<0b01110100101011000>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.h.b", int_loongarch_lasx_xvmaddwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMADDWEV_W_H : LASX_3R<0b01110100101011001>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.w.h", int_loongarch_lasx_xvmaddwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMADDWEV_D_W : LASX_3R<0b01110100101011010>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.d.w", int_loongarch_lasx_xvmaddwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVMADDWEV_Q_D : LASX_3R<0b01110100101011011>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.q.d", int_loongarch_lasx_xvmaddwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMADDWOD_H_B : LASX_3R<0b01110100101011100>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.h.b", int_loongarch_lasx_xvmaddwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMADDWOD_W_H : LASX_3R<0b01110100101011101>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.w.h", int_loongarch_lasx_xvmaddwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMADDWOD_D_W : LASX_3R<0b01110100101011110>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.d.w", int_loongarch_lasx_xvmaddwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVMADDWOD_Q_D : LASX_3R<0b01110100101011111>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.q.d", int_loongarch_lasx_xvmaddwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMADDWEV_H_BU : LASX_3R<0b01110100101101000>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.h.bu", int_loongarch_lasx_xvmaddwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMADDWEV_W_HU : LASX_3R<0b01110100101101001>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.w.hu", int_loongarch_lasx_xvmaddwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMADDWEV_D_WU : LASX_3R<0b01110100101101010>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.d.wu", int_loongarch_lasx_xvmaddwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVMADDWEV_Q_DU : LASX_3R<0b01110100101101011>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.q.du", int_loongarch_lasx_xvmaddwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMADDWOD_H_BU : LASX_3R<0b01110100101101100>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.h.bu", int_loongarch_lasx_xvmaddwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMADDWOD_W_HU : LASX_3R<0b01110100101101101>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.w.hu", int_loongarch_lasx_xvmaddwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMADDWOD_D_WU : LASX_3R<0b01110100101101110>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.d.wu", int_loongarch_lasx_xvmaddwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVMADDWOD_Q_DU : LASX_3R<0b01110100101101111>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.q.du", int_loongarch_lasx_xvmaddwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMADDWEV_H_BU_B : LASX_3R<0b01110100101111000>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.h.bu.b", int_loongarch_lasx_xvmaddwev_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMADDWEV_W_HU_H : LASX_3R<0b01110100101111001>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.w.hu.h", int_loongarch_lasx_xvmaddwev_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMADDWEV_D_WU_W : LASX_3R<0b01110100101111010>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.d.wu.w", int_loongarch_lasx_xvmaddwev_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVMADDWEV_Q_DU_D : LASX_3R<0b01110100101111011>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwev.q.du.d", int_loongarch_lasx_xvmaddwev_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVMADDWOD_H_BU_B : LASX_3R<0b01110100101111100>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.h.bu.b", int_loongarch_lasx_xvmaddwod_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVMADDWOD_W_HU_H : LASX_3R<0b01110100101111101>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.w.hu.h", int_loongarch_lasx_xvmaddwod_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVMADDWOD_D_WU_W : LASX_3R<0b01110100101111110>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.d.wu.w", int_loongarch_lasx_xvmaddwod_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVMADDWOD_Q_DU_D : LASX_3R<0b01110100101111111>, ++ LASX_3R_4R_DESC_BASE<"xvmaddwod.q.du.d", int_loongarch_lasx_xvmaddwod_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVDIV_B : LASX_3R<0b01110100111000000>, ++ LASX_3R_DESC_BASE<"xvdiv.b", sdiv, LASX256BOpnd>; ++ ++def XVDIV_H : LASX_3R<0b01110100111000001>, ++ LASX_3R_DESC_BASE<"xvdiv.h", sdiv, LASX256HOpnd>; ++ ++def XVDIV_W : LASX_3R<0b01110100111000010>, ++ LASX_3R_DESC_BASE<"xvdiv.w", sdiv, LASX256WOpnd>; ++ ++def XVDIV_D : LASX_3R<0b01110100111000011>, ++ LASX_3R_DESC_BASE<"xvdiv.d", sdiv, LASX256DOpnd>; ++ ++ ++def XVMOD_B : LASX_3R<0b01110100111000100>, ++ LASX_3R_DESC_BASE<"xvmod.b", srem, LASX256BOpnd>; ++ ++def XVMOD_H : LASX_3R<0b01110100111000101>, ++ LASX_3R_DESC_BASE<"xvmod.h", srem, LASX256HOpnd>; ++ ++def XVMOD_W : LASX_3R<0b01110100111000110>, ++ LASX_3R_DESC_BASE<"xvmod.w", srem, LASX256WOpnd>; ++ ++def XVMOD_D : LASX_3R<0b01110100111000111>, ++ LASX_3R_DESC_BASE<"xvmod.d", srem, LASX256DOpnd>; ++ ++ ++def XVDIV_BU : LASX_3R<0b01110100111001000>, ++ LASX_3R_DESC_BASE<"xvdiv.bu", udiv, LASX256BOpnd>; ++ ++def XVDIV_HU : LASX_3R<0b01110100111001001>, ++ LASX_3R_DESC_BASE<"xvdiv.hu", udiv, LASX256HOpnd>; ++ ++def XVDIV_WU : LASX_3R<0b01110100111001010>, ++ LASX_3R_DESC_BASE<"xvdiv.wu", udiv, LASX256WOpnd>; ++ ++def XVDIV_DU : LASX_3R<0b01110100111001011>, ++ LASX_3R_DESC_BASE<"xvdiv.du", udiv, LASX256DOpnd>; ++ ++ ++def XVMOD_BU : LASX_3R<0b01110100111001100>, ++ LASX_3R_DESC_BASE<"xvmod.bu", urem, LASX256BOpnd>; ++ ++def XVMOD_HU : LASX_3R<0b01110100111001101>, ++ LASX_3R_DESC_BASE<"xvmod.hu", urem, LASX256HOpnd>; ++ ++def XVMOD_WU : LASX_3R<0b01110100111001110>, ++ LASX_3R_DESC_BASE<"xvmod.wu", urem, LASX256WOpnd>; ++ ++def XVMOD_DU : LASX_3R<0b01110100111001111>, ++ LASX_3R_DESC_BASE<"xvmod.du", urem, LASX256DOpnd>; ++ ++ ++def XVSLL_B : LASX_3R<0b01110100111010000>, ++ LASX_3R_DESC_BASE<"xvsll.b", shl, LASX256BOpnd>; ++ ++def XVSLL_H : LASX_3R<0b01110100111010001>, ++ LASX_3R_DESC_BASE<"xvsll.h", shl, LASX256HOpnd>; ++ ++def XVSLL_W : LASX_3R<0b01110100111010010>, ++ LASX_3R_DESC_BASE<"xvsll.w", shl, LASX256WOpnd>; ++ ++def XVSLL_D : LASX_3R<0b01110100111010011>, ++ LASX_3R_DESC_BASE<"xvsll.d", shl, LASX256DOpnd>; ++ ++ ++def XVSRL_B : LASX_3R<0b01110100111010100>, ++ LASX_3R_DESC_BASE<"xvsrl.b", srl, LASX256BOpnd>; ++ ++def XVSRL_H : LASX_3R<0b01110100111010101>, ++ LASX_3R_DESC_BASE<"xvsrl.h", srl, LASX256HOpnd>; ++ ++def XVSRL_W : LASX_3R<0b01110100111010110>, ++ LASX_3R_DESC_BASE<"xvsrl.w", srl, LASX256WOpnd>; ++ ++def XVSRL_D : LASX_3R<0b01110100111010111>, ++ LASX_3R_DESC_BASE<"xvsrl.d", srl, LASX256DOpnd>; ++ ++ ++def XVSRA_B : LASX_3R<0b01110100111011000>, ++ LASX_3R_DESC_BASE<"xvsra.b", sra, LASX256BOpnd>; ++ ++def XVSRA_H : LASX_3R<0b01110100111011001>, ++ LASX_3R_DESC_BASE<"xvsra.h", sra, LASX256HOpnd>; ++ ++def XVSRA_W : LASX_3R<0b01110100111011010>, ++ LASX_3R_DESC_BASE<"xvsra.w", sra, LASX256WOpnd>; ++ ++def XVSRA_D : LASX_3R<0b01110100111011011>, ++ LASX_3R_DESC_BASE<"xvsra.d", sra, LASX256DOpnd>; ++ ++ ++def XVROTR_B : LASX_3R<0b01110100111011100>, ++ LASX_3R_DESC_BASE<"xvrotr.b", int_loongarch_lasx_xvrotr_b, LASX256BOpnd>; ++ ++def XVROTR_H : LASX_3R<0b01110100111011101>, ++ LASX_3R_DESC_BASE<"xvrotr.h", int_loongarch_lasx_xvrotr_h, LASX256HOpnd>; ++ ++def XVROTR_W : LASX_3R<0b01110100111011110>, ++ LASX_3R_DESC_BASE<"xvrotr.w", int_loongarch_lasx_xvrotr_w, LASX256WOpnd>; ++ ++def XVROTR_D : LASX_3R<0b01110100111011111>, ++ LASX_3R_DESC_BASE<"xvrotr.d", int_loongarch_lasx_xvrotr_d, LASX256DOpnd>; ++ ++ ++def XVSRLR_B : LASX_3R<0b01110100111100000>, ++ LASX_3R_DESC_BASE<"xvsrlr.b", int_loongarch_lasx_xvsrlr_b, LASX256BOpnd>; ++ ++def XVSRLR_H : LASX_3R<0b01110100111100001>, ++ LASX_3R_DESC_BASE<"xvsrlr.h", int_loongarch_lasx_xvsrlr_h, LASX256HOpnd>; ++ ++def XVSRLR_W : LASX_3R<0b01110100111100010>, ++ LASX_3R_DESC_BASE<"xvsrlr.w", int_loongarch_lasx_xvsrlr_w, LASX256WOpnd>; ++ ++def XVSRLR_D : LASX_3R<0b01110100111100011>, ++ LASX_3R_DESC_BASE<"xvsrlr.d", int_loongarch_lasx_xvsrlr_d, LASX256DOpnd>; ++ ++ ++def XVSRAR_B : LASX_3R<0b01110100111100100>, ++ LASX_3R_DESC_BASE<"xvsrar.b", int_loongarch_lasx_xvsrar_b, LASX256BOpnd>; ++ ++def XVSRAR_H : LASX_3R<0b01110100111100101>, ++ LASX_3R_DESC_BASE<"xvsrar.h", int_loongarch_lasx_xvsrar_h, LASX256HOpnd>; ++ ++def XVSRAR_W : LASX_3R<0b01110100111100110>, ++ LASX_3R_DESC_BASE<"xvsrar.w", int_loongarch_lasx_xvsrar_w, LASX256WOpnd>; ++ ++def XVSRAR_D : LASX_3R<0b01110100111100111>, ++ LASX_3R_DESC_BASE<"xvsrar.d", int_loongarch_lasx_xvsrar_d, LASX256DOpnd>; ++ ++ ++def XVSRLN_B_H : LASX_3R<0b01110100111101001>, ++ LASX_3R_DESC_BASE<"xvsrln.b.h", int_loongarch_lasx_xvsrln_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSRLN_H_W : LASX_3R<0b01110100111101010>, ++ LASX_3R_DESC_BASE<"xvsrln.h.w", int_loongarch_lasx_xvsrln_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSRLN_W_D : LASX_3R<0b01110100111101011>, ++ LASX_3R_DESC_BASE<"xvsrln.w.d", int_loongarch_lasx_xvsrln_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSRAN_B_H : LASX_3R<0b01110100111101101>, ++ LASX_3R_DESC_BASE<"xvsran.b.h", int_loongarch_lasx_xvsran_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSRAN_H_W : LASX_3R<0b01110100111101110>, ++ LASX_3R_DESC_BASE<"xvsran.h.w", int_loongarch_lasx_xvsran_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSRAN_W_D : LASX_3R<0b01110100111101111>, ++ LASX_3R_DESC_BASE<"xvsran.w.d", int_loongarch_lasx_xvsran_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSRLRN_B_H : LASX_3R<0b01110100111110001>, ++ LASX_3R_DESC_BASE<"xvsrlrn.b.h", int_loongarch_lasx_xvsrlrn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSRLRN_H_W : LASX_3R<0b01110100111110010>, ++ LASX_3R_DESC_BASE<"xvsrlrn.h.w", int_loongarch_lasx_xvsrlrn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSRLRN_W_D : LASX_3R<0b01110100111110011>, ++ LASX_3R_DESC_BASE<"xvsrlrn.w.d", int_loongarch_lasx_xvsrlrn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSRARN_B_H : LASX_3R<0b01110100111110101>, ++ LASX_3R_DESC_BASE<"xvsrarn.b.h", int_loongarch_lasx_xvsrarn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSRARN_H_W : LASX_3R<0b01110100111110110>, ++ LASX_3R_DESC_BASE<"xvsrarn.h.w", int_loongarch_lasx_xvsrarn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSRARN_W_D : LASX_3R<0b01110100111110111>, ++ LASX_3R_DESC_BASE<"xvsrarn.w.d", int_loongarch_lasx_xvsrarn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSSRLN_B_H : LASX_3R<0b01110100111111001>, ++ LASX_3R_DESC_BASE<"xvssrln.b.h", int_loongarch_lasx_xvssrln_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSSRLN_H_W : LASX_3R<0b01110100111111010>, ++ LASX_3R_DESC_BASE<"xvssrln.h.w", int_loongarch_lasx_xvssrln_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSSRLN_W_D : LASX_3R<0b01110100111111011>, ++ LASX_3R_DESC_BASE<"xvssrln.w.d", int_loongarch_lasx_xvssrln_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSSRAN_B_H : LASX_3R<0b01110100111111101>, ++ LASX_3R_DESC_BASE<"xvssran.b.h", int_loongarch_lasx_xvssran_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSSRAN_H_W : LASX_3R<0b01110100111111110>, ++ LASX_3R_DESC_BASE<"xvssran.h.w", int_loongarch_lasx_xvssran_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSSRAN_W_D : LASX_3R<0b01110100111111111>, ++ LASX_3R_DESC_BASE<"xvssran.w.d", int_loongarch_lasx_xvssran_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSSRLRN_B_H : LASX_3R<0b01110101000000001>, ++ LASX_3R_DESC_BASE<"xvssrlrn.b.h", int_loongarch_lasx_xvssrlrn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSSRLRN_H_W : LASX_3R<0b01110101000000010>, ++ LASX_3R_DESC_BASE<"xvssrlrn.h.w", int_loongarch_lasx_xvssrlrn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSSRLRN_W_D : LASX_3R<0b01110101000000011>, ++ LASX_3R_DESC_BASE<"xvssrlrn.w.d", int_loongarch_lasx_xvssrlrn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSSRARN_B_H : LASX_3R<0b01110101000000101>, ++ LASX_3R_DESC_BASE<"xvssrarn.b.h", int_loongarch_lasx_xvssrarn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSSRARN_H_W : LASX_3R<0b01110101000000110>, ++ LASX_3R_DESC_BASE<"xvssrarn.h.w", int_loongarch_lasx_xvssrarn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSSRARN_W_D : LASX_3R<0b01110101000000111>, ++ LASX_3R_DESC_BASE<"xvssrarn.w.d", int_loongarch_lasx_xvssrarn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSSRLN_BU_H : LASX_3R<0b01110101000001001>, ++ LASX_3R_DESC_BASE<"xvssrln.bu.h", int_loongarch_lasx_xvssrln_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSSRLN_HU_W : LASX_3R<0b01110101000001010>, ++ LASX_3R_DESC_BASE<"xvssrln.hu.w", int_loongarch_lasx_xvssrln_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSSRLN_WU_D : LASX_3R<0b01110101000001011>, ++ LASX_3R_DESC_BASE<"xvssrln.wu.d", int_loongarch_lasx_xvssrln_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSSRAN_BU_H : LASX_3R<0b01110101000001101>, ++ LASX_3R_DESC_BASE<"xvssran.bu.h", int_loongarch_lasx_xvssran_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSSRAN_HU_W : LASX_3R<0b01110101000001110>, ++ LASX_3R_DESC_BASE<"xvssran.hu.w", int_loongarch_lasx_xvssran_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSSRAN_WU_D : LASX_3R<0b01110101000001111>, ++ LASX_3R_DESC_BASE<"xvssran.wu.d", int_loongarch_lasx_xvssran_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSSRLRN_BU_H : LASX_3R<0b01110101000010001>, ++ LASX_3R_DESC_BASE<"xvssrlrn.bu.h", int_loongarch_lasx_xvssrlrn_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSSRLRN_HU_W : LASX_3R<0b01110101000010010>, ++ LASX_3R_DESC_BASE<"xvssrlrn.hu.w", int_loongarch_lasx_xvssrlrn_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSSRLRN_WU_D : LASX_3R<0b01110101000010011>, ++ LASX_3R_DESC_BASE<"xvssrlrn.wu.d", int_loongarch_lasx_xvssrlrn_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSSRARN_BU_H : LASX_3R<0b01110101000010101>, ++ LASX_3R_DESC_BASE<"xvssrarn.bu.h", int_loongarch_lasx_xvssrarn_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSSRARN_HU_W : LASX_3R<0b01110101000010110>, ++ LASX_3R_DESC_BASE<"xvssrarn.hu.w", int_loongarch_lasx_xvssrarn_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSSRARN_WU_D : LASX_3R<0b01110101000010111>, ++ LASX_3R_DESC_BASE<"xvssrarn.wu.d", int_loongarch_lasx_xvssrarn_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVBITCLR_B : LASX_3R<0b01110101000011000>, ++ LASX_3R_DESC_BASE<"xvbitclr.b", xvbitclr_b, LASX256BOpnd>; ++ ++def XVBITCLR_H : LASX_3R<0b01110101000011001>, ++ LASX_3R_DESC_BASE<"xvbitclr.h", xvbitclr_h, LASX256HOpnd>; ++ ++def XVBITCLR_W : LASX_3R<0b01110101000011010>, ++ LASX_3R_DESC_BASE<"xvbitclr.w", xvbitclr_w, LASX256WOpnd>; ++ ++def XVBITCLR_D : LASX_3R<0b01110101000011011>, ++ LASX_3R_DESC_BASE<"xvbitclr.d", xvbitclr_d, LASX256DOpnd>; ++ ++ ++def XVBITSET_B : LASX_3R<0b01110101000011100>, ++ LASX_3R_DESC_BASE<"xvbitset.b", int_loongarch_lasx_xvbitset_b, LASX256BOpnd>; ++ ++def XVBITSET_H : LASX_3R<0b01110101000011101>, ++ LASX_3R_DESC_BASE<"xvbitset.h", int_loongarch_lasx_xvbitset_h, LASX256HOpnd>; ++ ++def XVBITSET_W : LASX_3R<0b01110101000011110>, ++ LASX_3R_DESC_BASE<"xvbitset.w", int_loongarch_lasx_xvbitset_w, LASX256WOpnd>; ++ ++def XVBITSET_D : LASX_3R<0b01110101000011111>, ++ LASX_3R_DESC_BASE<"xvbitset.d", int_loongarch_lasx_xvbitset_d, LASX256DOpnd>; ++ ++ ++def XVBITREV_B : LASX_3R<0b01110101000100000>, ++ LASX_3R_DESC_BASE<"xvbitrev.b", int_loongarch_lasx_xvbitrev_b, LASX256BOpnd>; ++ ++def XVBITREV_H : LASX_3R<0b01110101000100001>, ++ LASX_3R_DESC_BASE<"xvbitrev.h", int_loongarch_lasx_xvbitrev_h, LASX256HOpnd>; ++ ++def XVBITREV_W : LASX_3R<0b01110101000100010>, ++ LASX_3R_DESC_BASE<"xvbitrev.w", int_loongarch_lasx_xvbitrev_w, LASX256WOpnd>; ++ ++def XVBITREV_D : LASX_3R<0b01110101000100011>, ++ LASX_3R_DESC_BASE<"xvbitrev.d", int_loongarch_lasx_xvbitrev_d, LASX256DOpnd>; ++ ++ ++def XVPACKEV_B : LASX_3R<0b01110101000101100>, ++ LASX_3R_DESC_BASE<"xvpackev.b", LoongArchVPACKEV, LASX256BOpnd>; ++ ++def XVPACKEV_H : LASX_3R<0b01110101000101101>, ++ LASX_3R_DESC_BASE<"xvpackev.h", LoongArchVPACKEV, LASX256HOpnd>; ++ ++def XVPACKEV_W : LASX_3R<0b01110101000101110>, ++ LASX_3R_DESC_BASE<"xvpackev.w", LoongArchVPACKEV, LASX256WOpnd>; ++ ++def XVPACKEV_D : LASX_3R<0b01110101000101111>, ++ LASX_3R_DESC_BASE<"xvpackev.d", LoongArchVPACKEV, LASX256DOpnd>; ++ ++ ++def XVPACKOD_B : LASX_3R<0b01110101000110000>, ++ LASX_3R_DESC_BASE<"xvpackod.b", LoongArchVPACKOD, LASX256BOpnd>; ++ ++def XVPACKOD_H : LASX_3R<0b01110101000110001>, ++ LASX_3R_DESC_BASE<"xvpackod.h", LoongArchVPACKOD, LASX256HOpnd>; ++ ++def XVPACKOD_W : LASX_3R<0b01110101000110010>, ++ LASX_3R_DESC_BASE<"xvpackod.w", LoongArchVPACKOD, LASX256WOpnd>; ++ ++def XVPACKOD_D : LASX_3R<0b01110101000110011>, ++ LASX_3R_DESC_BASE<"xvpackod.d", LoongArchVPACKOD, LASX256DOpnd>; ++ ++ ++def XVILVL_B : LASX_3R<0b01110101000110100>, ++ LASX_3R_DESC_BASE<"xvilvl.b", LoongArchVILVL, LASX256BOpnd>; ++ ++def XVILVL_H : LASX_3R<0b01110101000110101>, ++ LASX_3R_DESC_BASE<"xvilvl.h", LoongArchVILVL, LASX256HOpnd>; ++ ++def XVILVL_W : LASX_3R<0b01110101000110110>, ++ LASX_3R_DESC_BASE<"xvilvl.w", LoongArchVILVL, LASX256WOpnd>; ++ ++def XVILVL_D : LASX_3R<0b01110101000110111>, ++ LASX_3R_DESC_BASE<"xvilvl.d", LoongArchVILVL, LASX256DOpnd>; ++ ++ ++def XVILVH_B : LASX_3R<0b01110101000111000>, ++ LASX_3R_DESC_BASE<"xvilvh.b", LoongArchVILVH, LASX256BOpnd>; ++ ++def XVILVH_H : LASX_3R<0b01110101000111001>, ++ LASX_3R_DESC_BASE<"xvilvh.h", LoongArchVILVH, LASX256HOpnd>; ++ ++def XVILVH_W : LASX_3R<0b01110101000111010>, ++ LASX_3R_DESC_BASE<"xvilvh.w", LoongArchVILVH, LASX256WOpnd>; ++ ++def XVILVH_D : LASX_3R<0b01110101000111011>, ++ LASX_3R_DESC_BASE<"xvilvh.d", LoongArchVILVH, LASX256DOpnd>; ++ ++ ++def XVPICKEV_B : LASX_3R<0b01110101000111100>, ++ LASX_3R_DESC_BASE<"xvpickev.b", LoongArchVPICKEV, LASX256BOpnd>; ++ ++def XVPICKEV_H : LASX_3R<0b01110101000111101>, ++ LASX_3R_DESC_BASE<"xvpickev.h", LoongArchVPICKEV, LASX256HOpnd>; ++ ++def XVPICKEV_W : LASX_3R<0b01110101000111110>, ++ LASX_3R_DESC_BASE<"xvpickev.w", LoongArchVPICKEV, LASX256WOpnd>; ++ ++def XVPICKEV_D : LASX_3R<0b01110101000111111>, ++ LASX_3R_DESC_BASE<"xvpickev.d", LoongArchVPICKEV, LASX256DOpnd>; ++ ++ ++def XVPICKOD_B : LASX_3R<0b01110101001000000>, ++ LASX_3R_DESC_BASE<"xvpickod.b", LoongArchVPICKOD, LASX256BOpnd>; ++ ++def XVPICKOD_H : LASX_3R<0b01110101001000001>, ++ LASX_3R_DESC_BASE<"xvpickod.h", LoongArchVPICKOD, LASX256HOpnd>; ++ ++def XVPICKOD_W : LASX_3R<0b01110101001000010>, ++ LASX_3R_DESC_BASE<"xvpickod.w", LoongArchVPICKOD, LASX256WOpnd>; ++ ++def XVPICKOD_D : LASX_3R<0b01110101001000011>, ++ LASX_3R_DESC_BASE<"xvpickod.d", LoongArchVPICKOD, LASX256DOpnd>; ++ ++ ++def XVREPLVE_B : LASX_3R_1GP<0b01110101001000100>, ++ LASX_3R_VREPLVE_DESC_BASE<"xvreplve.b", int_loongarch_lasx_xvreplve_b, LASX256BOpnd>; ++ ++def XVREPLVE_H : LASX_3R_1GP<0b01110101001000101>, ++ LASX_3R_VREPLVE_DESC_BASE<"xvreplve.h", int_loongarch_lasx_xvreplve_h, LASX256HOpnd>; ++ ++def XVREPLVE_W : LASX_3R_1GP<0b01110101001000110>, ++ LASX_3R_VREPLVE_DESC_BASE<"xvreplve.w", int_loongarch_lasx_xvreplve_w, LASX256WOpnd>; ++ ++def XVREPLVE_D : LASX_3R_1GP<0b01110101001000111>, ++ LASX_3R_VREPLVE_DESC_BASE<"xvreplve.d", int_loongarch_lasx_xvreplve_d, LASX256DOpnd>; ++ ++ ++def XVAND_V : LASX_3R<0b01110101001001100>, ++ LASX_VEC_DESC_BASE<"xvand.v", and, LASX256BOpnd>; ++class XAND_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++class XAND_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++class XAND_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++ ++def XAND_V_H_PSEUDO : XAND_V_H_PSEUDO_DESC, ++ PseudoInstExpansion<(XVAND_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++def XAND_V_W_PSEUDO : XAND_V_W_PSEUDO_DESC, ++ PseudoInstExpansion<(XVAND_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++def XAND_V_D_PSEUDO : XAND_V_D_PSEUDO_DESC, ++ PseudoInstExpansion<(XVAND_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++ ++ ++def XVOR_V : LASX_3R<0b01110101001001101>, ++ LASX_VEC_DESC_BASE<"xvor.v", or, LASX256BOpnd>; ++class X_OR_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++class X_OR_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++class X_OR_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++ ++def X_OR_V_H_PSEUDO : X_OR_V_H_PSEUDO_DESC, ++ PseudoInstExpansion<(XVOR_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++def X_OR_V_W_PSEUDO : X_OR_V_W_PSEUDO_DESC, ++ PseudoInstExpansion<(XVOR_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++def X_OR_V_D_PSEUDO : X_OR_V_D_PSEUDO_DESC, ++ PseudoInstExpansion<(XVOR_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++ ++ ++def XVXOR_V : LASX_3R<0b01110101001001110>, ++ LASX_VEC_DESC_BASE<"xvxor.v", xor, LASX256BOpnd>; ++class XXOR_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++class XXOR_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++class XXOR_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++ ++def XXOR_V_H_PSEUDO : XXOR_V_H_PSEUDO_DESC, ++ PseudoInstExpansion<(XVXOR_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++def XXOR_V_W_PSEUDO : XXOR_V_W_PSEUDO_DESC, ++ PseudoInstExpansion<(XVXOR_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++def XXOR_V_D_PSEUDO : XXOR_V_D_PSEUDO_DESC, ++ PseudoInstExpansion<(XVXOR_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++ ++ ++def XVNOR_V : LASX_3R<0b01110101001001111>, ++ LASX_VEC_DESC_BASE<"xvnor.v", LoongArchVNOR, LASX256BOpnd>; ++ ++class XNOR_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++class XNOR_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++class XNOR_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; ++ ++def XNOR_V_H_PSEUDO : XNOR_V_H_PSEUDO_DESC, ++ PseudoInstExpansion<(XVNOR_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++def XNOR_V_W_PSEUDO : XNOR_V_W_PSEUDO_DESC, ++ PseudoInstExpansion<(XVNOR_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++def XNOR_V_D_PSEUDO : XNOR_V_D_PSEUDO_DESC, ++ PseudoInstExpansion<(XVNOR_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++ ++ ++def XVANDN_V : LASX_3R<0b01110101001010000>, ++ LASX_3R_DESC_BASE<"xvandn.v", int_loongarch_lasx_xvandn_v, LASX256BOpnd>; ++ ++ ++class LASX_ANDN_PSEUDO_BASE : ++ LASXPseudo<(outs RO:$xd), (ins RO:$xj, RO:$xk), ++ []>, ++ PseudoInstExpansion<(XVANDN_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++ ++def XVANDN_H_PSEUDO : LASX_ANDN_PSEUDO_BASE; ++def XVANDN_W_PSEUDO : LASX_ANDN_PSEUDO_BASE; ++def XVANDN_D_PSEUDO : LASX_ANDN_PSEUDO_BASE; ++ ++ ++def XVORN_V : LASX_3R<0b01110101001010001>, ++ LASX_3R_DESC_BASE<"xvorn.v", int_loongarch_lasx_xvorn_v, LASX256BOpnd>; ++ ++ ++class LASX_ORN_PSEUDO_BASE : ++ LASXPseudo<(outs RO:$xd), (ins RO:$xj, RO:$xk), ++ []>, ++ PseudoInstExpansion<(XVORN_V LASX256BOpnd:$xd, ++ LASX256BOpnd:$xj, ++ LASX256BOpnd:$xk)>; ++ ++def XVORN_H_PSEUDO : LASX_ORN_PSEUDO_BASE; ++def XVORN_W_PSEUDO : LASX_ORN_PSEUDO_BASE; ++def XVORN_D_PSEUDO : LASX_ORN_PSEUDO_BASE; ++ ++ ++def XVFRSTP_B : LASX_3R<0b01110101001010110>, ++ LASX_3R_4R_DESC_BASE<"xvfrstp.b", int_loongarch_lasx_xvfrstp_b, LASX256BOpnd>; ++ ++def XVFRSTP_H : LASX_3R<0b01110101001010111>, ++ LASX_3R_4R_DESC_BASE<"xvfrstp.h", int_loongarch_lasx_xvfrstp_h, LASX256HOpnd>; ++ ++ ++def XVADD_Q : LASX_3R<0b01110101001011010>, IsCommutable, ++ LASX_3R_DESC_BASE<"xvadd.q", int_loongarch_lasx_xvadd_q, LASX256DOpnd>; ++ ++def XVSUB_Q : LASX_3R<0b01110101001011011>, ++ LASX_3R_DESC_BASE<"xvsub.q", int_loongarch_lasx_xvsub_q, LASX256DOpnd>; ++ ++ ++def XVSIGNCOV_B : LASX_3R<0b01110101001011100>, ++ LASX_3R_DESC_BASE<"xvsigncov.b", int_loongarch_lasx_xvsigncov_b, LASX256BOpnd>; ++ ++def XVSIGNCOV_H : LASX_3R<0b01110101001011101>, ++ LASX_3R_DESC_BASE<"xvsigncov.h", int_loongarch_lasx_xvsigncov_h, LASX256HOpnd>; ++ ++def XVSIGNCOV_W : LASX_3R<0b01110101001011110>, ++ LASX_3R_DESC_BASE<"xvsigncov.w", int_loongarch_lasx_xvsigncov_w, LASX256WOpnd>; ++ ++def XVSIGNCOV_D : LASX_3R<0b01110101001011111>, ++ LASX_3R_DESC_BASE<"xvsigncov.d", int_loongarch_lasx_xvsigncov_d, LASX256DOpnd>; ++ ++ ++def XVFADD_S : LASX_3R<0b01110101001100001>, IsCommutable, ++ LASX_3RF_DESC_BASE<"xvfadd.s", fadd, LASX256WOpnd>; ++ ++def XVFADD_D : LASX_3R<0b01110101001100010>, IsCommutable, ++ LASX_3RF_DESC_BASE<"xvfadd.d", fadd, LASX256DOpnd>; ++ ++ ++def XVFSUB_S : LASX_3R<0b01110101001100101>, ++ LASX_3RF_DESC_BASE<"xvfsub.s", fsub, LASX256WOpnd>; ++ ++def XVFSUB_D : LASX_3R<0b01110101001100110>, ++ LASX_3RF_DESC_BASE<"xvfsub.d", fsub, LASX256DOpnd>; ++ ++ ++def XVFMUL_S : LASX_3R<0b01110101001110001>, ++ LASX_3RF_DESC_BASE<"xvfmul.s", fmul, LASX256WOpnd>; ++ ++def XVFMUL_D : LASX_3R<0b01110101001110010>, ++ LASX_3RF_DESC_BASE<"xvfmul.d", fmul, LASX256DOpnd>; ++ ++ ++def XVFDIV_S : LASX_3R<0b01110101001110101>, ++ LASX_3RF_DESC_BASE<"xvfdiv.s", fdiv, LASX256WOpnd>; ++ ++def XVFDIV_D : LASX_3R<0b01110101001110110>, ++ LASX_3RF_DESC_BASE<"xvfdiv.d", fdiv, LASX256DOpnd>; ++ ++ ++def XVFMAX_S : LASX_3R<0b01110101001111001>, ++ LASX_3RF_DESC_BASE<"xvfmax.s", int_loongarch_lasx_xvfmax_s, LASX256WOpnd>; ++ ++def XVFMAX_D : LASX_3R<0b01110101001111010>, ++ LASX_3RF_DESC_BASE<"xvfmax.d", int_loongarch_lasx_xvfmax_d, LASX256DOpnd>; ++ ++ ++def XVFMIN_S : LASX_3R<0b01110101001111101>, ++ LASX_3RF_DESC_BASE<"xvfmin.s", int_loongarch_lasx_xvfmin_s, LASX256WOpnd>; ++ ++def XVFMIN_D : LASX_3R<0b01110101001111110>, ++ LASX_3RF_DESC_BASE<"xvfmin.d", int_loongarch_lasx_xvfmin_d, LASX256DOpnd>; ++ ++ ++def XVFMAXA_S : LASX_3R<0b01110101010000001>, ++ LASX_3RF_DESC_BASE<"xvfmaxa.s", int_loongarch_lasx_xvfmaxa_s, LASX256WOpnd>; ++ ++def XVFMAXA_D : LASX_3R<0b01110101010000010>, ++ LASX_3RF_DESC_BASE<"xvfmaxa.d", int_loongarch_lasx_xvfmaxa_d, LASX256DOpnd>; ++ ++ ++def XVFMINA_S : LASX_3R<0b01110101010000101>, ++ LASX_3RF_DESC_BASE<"xvfmina.s", int_loongarch_lasx_xvfmina_s, LASX256WOpnd>; ++ ++def XVFMINA_D : LASX_3R<0b01110101010000110>, ++ LASX_3RF_DESC_BASE<"xvfmina.d", int_loongarch_lasx_xvfmina_d, LASX256DOpnd>; ++ ++ ++def XVFCVT_H_S : LASX_3R<0b01110101010001100>, ++ LASX_3RF_DESC_BASE<"xvfcvt.h.s", int_loongarch_lasx_xvfcvt_h_s, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVFCVT_S_D : LASX_3R<0b01110101010001101>, ++ LASX_3RF_DESC_BASE1<"xvfcvt.s.d", int_loongarch_lasx_xvfcvt_s_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVFFINT_S_L : LASX_3R<0b01110101010010000>, ++ LASX_3RF_DESC_BASE<"xvffint.s.l", int_loongarch_lasx_xvffint_s_l, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++def XVFTINT_W_D : LASX_3R<0b01110101010010011>, ++ LASX_3RF_DESC_BASE<"xvftint.w.d", int_loongarch_lasx_xvftint_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVFTINTRM_W_D : LASX_3R<0b01110101010010100>, ++ LASX_3RF_DESC_BASE<"xvftintrm.w.d", int_loongarch_lasx_xvftintrm_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++def XVFTINTRP_W_D : LASX_3R<0b01110101010010101>, ++ LASX_3RF_DESC_BASE<"xvftintrp.w.d", int_loongarch_lasx_xvftintrp_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++def XVFTINTRZ_W_D : LASX_3R<0b01110101010010110>, ++ LASX_3RF_DESC_BASE<"xvftintrz.w.d", int_loongarch_lasx_xvftintrz_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++def XVFTINTRNE_W_D : LASX_3R<0b01110101010010111>, ++ LASX_3RF_DESC_BASE<"xvftintrne.w.d", int_loongarch_lasx_xvftintrne_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSHUF_H : LASX_3R<0b01110101011110101>, ++ LASX_3R_VSHF_DESC_BASE<"xvshuf.h", LASX256HOpnd>; ++ ++def XVSHUF_W : LASX_3R<0b01110101011110110>, ++ LASX_3R_VSHF_DESC_BASE<"xvshuf.w", LASX256WOpnd>; ++ ++def XVSHUF_D : LASX_3R<0b01110101011110111>, ++ LASX_3R_VSHF_DESC_BASE<"xvshuf.d", LASX256DOpnd>; ++ ++ ++def XVPERM_W : LASX_3R<0b01110101011111010>, ++ LASX_3R_DESC_BASE<"xvperm.w", int_loongarch_lasx_xvperm_w, LASX256WOpnd>; ++ ++ ++def XVSEQI_B : LASX_I5<0b01110110100000000>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.b", int_loongarch_lasx_xvseqi_b, simm5_32, immSExt5, LASX256BOpnd>; ++ ++def XVSEQI_H : LASX_I5<0b01110110100000001>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.h", int_loongarch_lasx_xvseqi_h, simm5_32, immSExt5, LASX256HOpnd>; ++ ++def XVSEQI_W : LASX_I5<0b01110110100000010>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.w", int_loongarch_lasx_xvseqi_w, simm5_32, immSExt5, LASX256WOpnd>; ++ ++def XVSEQI_D : LASX_I5<0b01110110100000011>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.d", int_loongarch_lasx_xvseqi_d, simm5_32, immSExt5, LASX256DOpnd>; ++ ++ ++def XVSLEI_B : LASX_I5<0b01110110100000100>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.b", int_loongarch_lasx_xvslei_b, simm5_32, immSExt5, LASX256BOpnd>; ++ ++def XVSLEI_H : LASX_I5<0b01110110100000101>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.h", int_loongarch_lasx_xvslei_h, simm5_32, immSExt5, LASX256HOpnd>; ++ ++def XVSLEI_W : LASX_I5<0b01110110100000110>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.w", int_loongarch_lasx_xvslei_w, simm5_32, immSExt5, LASX256WOpnd>; ++ ++def XVSLEI_D : LASX_I5<0b01110110100000111>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.d", int_loongarch_lasx_xvslei_d, simm5_32, immSExt5, LASX256DOpnd>; ++ ++ ++def XVSLEI_BU : LASX_I5_U<0b01110110100001000>, ++ LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.bu", int_loongarch_lasx_xvslei_bu, uimm5, immZExt5, LASX256BOpnd>; ++ ++def XVSLEI_HU : LASX_I5_U<0b01110110100001001>, ++ LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.hu", int_loongarch_lasx_xvslei_hu, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSLEI_WU : LASX_I5_U<0b01110110100001010>, ++ LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.wu", int_loongarch_lasx_xvslei_wu, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSLEI_DU : LASX_I5_U<0b01110110100001011>, ++ LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.du", int_loongarch_lasx_xvslei_du, uimm5, immZExt5, LASX256DOpnd>; ++ ++ ++def XVSLTI_B : LASX_I5<0b01110110100001100>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.b", int_loongarch_lasx_xvslti_b, simm5_32, immSExt5, LASX256BOpnd>; ++ ++def XVSLTI_H : LASX_I5<0b01110110100001101>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.h", int_loongarch_lasx_xvslti_h, simm5_32, immSExt5, LASX256HOpnd>; ++ ++def XVSLTI_W : LASX_I5<0b01110110100001110>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.w", int_loongarch_lasx_xvslti_w, simm5_32, immSExt5, LASX256WOpnd>; ++ ++def XVSLTI_D : LASX_I5<0b01110110100001111>, ++ LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.d", int_loongarch_lasx_xvslti_d, simm5_32, immSExt5, LASX256DOpnd>; ++ ++ ++def XVSLTI_BU : LASX_I5_U<0b01110110100010000>, ++ LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.bu", int_loongarch_lasx_xvslti_bu, uimm5, immZExt5, LASX256BOpnd>; ++ ++def XVSLTI_HU : LASX_I5_U<0b01110110100010001>, ++ LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.hu", int_loongarch_lasx_xvslti_hu, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSLTI_WU : LASX_I5_U<0b01110110100010010>, ++ LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.wu", int_loongarch_lasx_xvslti_wu, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSLTI_DU : LASX_I5_U<0b01110110100010011>, ++ LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.du", int_loongarch_lasx_xvslti_du, uimm5, immZExt5, LASX256DOpnd>; ++ ++ ++def XVADDI_BU : LASX_I5_U<0b01110110100010100>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.bu", int_loongarch_lasx_xvaddi_bu, uimm5, immZExt5, LASX256BOpnd>; ++ ++def XVADDI_HU : LASX_I5_U<0b01110110100010101>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.hu", int_loongarch_lasx_xvaddi_hu, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVADDI_WU : LASX_I5_U<0b01110110100010110>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.wu", int_loongarch_lasx_xvaddi_wu, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVADDI_DU : LASX_I5_U<0b01110110100010111>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.du", int_loongarch_lasx_xvaddi_du, uimm5, immZExt5, LASX256DOpnd>; ++ ++ ++def XVSUBI_BU : LASX_I5_U<0b01110110100011000>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.bu", int_loongarch_lasx_xvsubi_bu, uimm5, immZExt5, LASX256BOpnd>; ++ ++def XVSUBI_HU : LASX_I5_U<0b01110110100011001>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.hu", int_loongarch_lasx_xvsubi_hu, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSUBI_WU : LASX_I5_U<0b01110110100011010>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.wu", int_loongarch_lasx_xvsubi_wu, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSUBI_DU : LASX_I5_U<0b01110110100011011>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.du", int_loongarch_lasx_xvsubi_du, uimm5, immZExt5, LASX256DOpnd>; ++ ++ ++def XVBSLL_V : LASX_I5_U<0b01110110100011100>, ++ LASX_U5_DESC_BASE<"xvbsll.v", int_loongarch_lasx_xvbsll_v, LASX256BOpnd>; ++ ++def XVBSRL_V : LASX_I5_U<0b01110110100011101>, ++ LASX_U5_DESC_BASE<"xvbsrl.v", int_loongarch_lasx_xvbsrl_v, LASX256BOpnd>; ++ ++ ++def XVMAXI_B : LASX_I5<0b01110110100100000>, ++ LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.b", int_loongarch_lasx_xvmaxi_b, simm5_32, immSExt5, LASX256BOpnd>; ++ ++def XVMAXI_H : LASX_I5<0b01110110100100001>, ++ LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.h", int_loongarch_lasx_xvmaxi_h, simm5_32, immSExt5, LASX256HOpnd>; ++ ++def XVMAXI_W : LASX_I5<0b01110110100100010>, ++ LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.w", int_loongarch_lasx_xvmaxi_w, simm5_32, immSExt5, LASX256WOpnd>; ++ ++def XVMAXI_D : LASX_I5<0b01110110100100011>, ++ LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.d", int_loongarch_lasx_xvmaxi_d, simm5_32, immSExt5, LASX256DOpnd>; ++ ++ ++def XVMINI_B : LASX_I5<0b01110110100100100>, ++ LASX_I5_DESC_BASE_Intrinsic<"xvmini.b", int_loongarch_lasx_xvmini_b, simm5_32, immSExt5, LASX256BOpnd>; ++ ++def XVMINI_H : LASX_I5<0b01110110100100101>, ++ LASX_I5_DESC_BASE_Intrinsic<"xvmini.h", int_loongarch_lasx_xvmini_h, simm5_32, immSExt5, LASX256HOpnd>; ++ ++def XVMINI_W : LASX_I5<0b01110110100100110>, ++ LASX_I5_DESC_BASE_Intrinsic<"xvmini.w", int_loongarch_lasx_xvmini_w, simm5_32, immSExt5, LASX256WOpnd>; ++ ++def XVMINI_D : LASX_I5<0b01110110100100111>, ++ LASX_I5_DESC_BASE_Intrinsic<"xvmini.d", int_loongarch_lasx_xvmini_d, simm5_32, immSExt5, LASX256DOpnd>; ++ ++ ++def XVMAXI_BU : LASX_I5_U<0b01110110100101000>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.bu", int_loongarch_lasx_xvmaxi_bu, uimm5, immZExt5, LASX256BOpnd>; ++ ++def XVMAXI_HU : LASX_I5_U<0b01110110100101001>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.hu", int_loongarch_lasx_xvmaxi_hu, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVMAXI_WU : LASX_I5_U<0b01110110100101010>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.wu", int_loongarch_lasx_xvmaxi_wu, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVMAXI_DU : LASX_I5_U<0b01110110100101011>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.du", int_loongarch_lasx_xvmaxi_du, uimm5, immZExt5, LASX256DOpnd>; ++ ++ ++def XVMINI_BU : LASX_I5_U<0b01110110100101100>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.bu", int_loongarch_lasx_xvmini_bu, uimm5, immZExt5, LASX256BOpnd>; ++ ++def XVMINI_HU : LASX_I5_U<0b01110110100101101>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.hu", int_loongarch_lasx_xvmini_hu, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVMINI_WU : LASX_I5_U<0b01110110100101110>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.wu", int_loongarch_lasx_xvmini_wu, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVMINI_DU : LASX_I5_U<0b01110110100101111>, ++ LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.du", int_loongarch_lasx_xvmini_du, uimm5, immZExt5, LASX256DOpnd>; ++ ++ ++def XVFRSTPI_B : LASX_I5_U<0b01110110100110100>, ++ LASX_U5_4R_DESC_BASE<"xvfrstpi.b", int_loongarch_lasx_xvfrstpi_b, LASX256BOpnd>; ++ ++def XVFRSTPI_H : LASX_I5_U<0b01110110100110101>, ++ LASX_U5_4R_DESC_BASE<"xvfrstpi.h", int_loongarch_lasx_xvfrstpi_h, LASX256HOpnd>; ++ ++ ++def XVCLO_B : LASX_2R<0b0111011010011100000000>, ++ LASX_2R_DESC_BASE<"xvclo.b", int_loongarch_lasx_xvclo_b, LASX256BOpnd>; ++ ++def XVCLO_H : LASX_2R<0b0111011010011100000001>, ++ LASX_2R_DESC_BASE<"xvclo.h", int_loongarch_lasx_xvclo_h, LASX256HOpnd>; ++ ++def XVCLO_W : LASX_2R<0b0111011010011100000010>, ++ LASX_2R_DESC_BASE<"xvclo.w", int_loongarch_lasx_xvclo_w, LASX256WOpnd>; ++ ++def XVCLO_D : LASX_2R<0b0111011010011100000011>, ++ LASX_2R_DESC_BASE<"xvclo.d", int_loongarch_lasx_xvclo_d, LASX256DOpnd>; ++ ++ ++def XVCLZ_B : LASX_2R<0b0111011010011100000100>, ++ LASX_2R_DESC_BASE<"xvclz.b", ctlz, LASX256BOpnd>; ++ ++def XVCLZ_H : LASX_2R<0b0111011010011100000101>, ++ LASX_2R_DESC_BASE<"xvclz.h", ctlz, LASX256HOpnd>; ++ ++def XVCLZ_W : LASX_2R<0b0111011010011100000110>, ++ LASX_2R_DESC_BASE<"xvclz.w", ctlz, LASX256WOpnd>; ++ ++def XVCLZ_D : LASX_2R<0b0111011010011100000111>, ++ LASX_2R_DESC_BASE<"xvclz.d", ctlz, LASX256DOpnd>; ++ ++ ++def XVPCNT_B : LASX_2R<0b0111011010011100001000>, ++ LASX_2R_DESC_BASE<"xvpcnt.b", ctpop, LASX256BOpnd>; ++ ++def XVPCNT_H : LASX_2R<0b0111011010011100001001>, ++ LASX_2R_DESC_BASE<"xvpcnt.h", ctpop, LASX256HOpnd>; ++ ++def XVPCNT_W : LASX_2R<0b0111011010011100001010>, ++ LASX_2R_DESC_BASE<"xvpcnt.w", ctpop, LASX256WOpnd>; ++ ++def XVPCNT_D : LASX_2R<0b0111011010011100001011>, ++ LASX_2R_DESC_BASE<"xvpcnt.d", ctpop, LASX256DOpnd>; ++ ++ ++def XVNEG_B : LASX_2R<0b0111011010011100001100>, ++ LASX_2R_DESC_BASE<"xvneg.b", int_loongarch_lasx_xvneg_b, LASX256BOpnd>; ++ ++def XVNEG_H : LASX_2R<0b0111011010011100001101>, ++ LASX_2R_DESC_BASE<"xvneg.h", int_loongarch_lasx_xvneg_h, LASX256HOpnd>; ++ ++def XVNEG_W : LASX_2R<0b0111011010011100001110>, ++ LASX_2R_DESC_BASE<"xvneg.w", int_loongarch_lasx_xvneg_w, LASX256WOpnd>; ++ ++def XVNEG_D : LASX_2R<0b0111011010011100001111>, ++ LASX_2R_DESC_BASE<"xvneg.d", int_loongarch_lasx_xvneg_d, LASX256DOpnd>; ++ ++ ++def XVMSKLTZ_B : LASX_2R<0b0111011010011100010000>, ++ LASX_2R_DESC_BASE<"xvmskltz.b", int_loongarch_lasx_xvmskltz_b, LASX256BOpnd>; ++ ++def XVMSKLTZ_H : LASX_2R<0b0111011010011100010001>, ++ LASX_2R_DESC_BASE<"xvmskltz.h", int_loongarch_lasx_xvmskltz_h, LASX256HOpnd>; ++ ++def XVMSKLTZ_W : LASX_2R<0b0111011010011100010010>, ++ LASX_2R_DESC_BASE<"xvmskltz.w", int_loongarch_lasx_xvmskltz_w, LASX256WOpnd>; ++ ++def XVMSKLTZ_D : LASX_2R<0b0111011010011100010011>, ++ LASX_2R_DESC_BASE<"xvmskltz.d", int_loongarch_lasx_xvmskltz_d, LASX256DOpnd>; ++ ++ ++def XVMSKGEZ_B : LASX_2R<0b0111011010011100010100>, ++ LASX_2R_DESC_BASE<"xvmskgez.b", int_loongarch_lasx_xvmskgez_b, LASX256BOpnd>; ++ ++def XVMSKNZ_B : LASX_2R<0b0111011010011100011000>, ++ LASX_2R_DESC_BASE<"xvmsknz.b", int_loongarch_lasx_xvmsknz_b, LASX256BOpnd>; ++ ++ ++def XVSETEQZ_V : LASX_SET<0b0111011010011100100110>, ++ LASX_SET_DESC_BASE<"xvseteqz.v", LASX256BOpnd>; ++ ++def XVSETNEZ_V : LASX_SET<0b0111011010011100100111>, ++ LASX_SET_DESC_BASE<"xvsetnez.v", LASX256BOpnd>; ++ ++ ++def XVSETANYEQZ_B : LASX_SET<0b0111011010011100101000>, ++ LASX_SET_DESC_BASE<"xvsetanyeqz.b", LASX256BOpnd>; ++ ++def XVSETANYEQZ_H : LASX_SET<0b0111011010011100101001>, ++ LASX_SET_DESC_BASE<"xvsetanyeqz.h", LASX256HOpnd>; ++ ++def XVSETANYEQZ_W : LASX_SET<0b0111011010011100101010>, ++ LASX_SET_DESC_BASE<"xvsetanyeqz.w", LASX256WOpnd>; ++ ++def XVSETANYEQZ_D : LASX_SET<0b0111011010011100101011>, ++ LASX_SET_DESC_BASE<"xvsetanyeqz.d", LASX256DOpnd>; ++ ++ ++def XVSETALLNEZ_B : LASX_SET<0b0111011010011100101100>, ++ LASX_SET_DESC_BASE<"xvsetallnez.b", LASX256BOpnd>; ++ ++def XVSETALLNEZ_H : LASX_SET<0b0111011010011100101101>, ++ LASX_SET_DESC_BASE<"xvsetallnez.h", LASX256HOpnd>; ++ ++def XVSETALLNEZ_W : LASX_SET<0b0111011010011100101110>, ++ LASX_SET_DESC_BASE<"xvsetallnez.w", LASX256WOpnd>; ++ ++def XVSETALLNEZ_D : LASX_SET<0b0111011010011100101111>, ++ LASX_SET_DESC_BASE<"xvsetallnez.d", LASX256DOpnd>; ++ ++class LASX_CBRANCH_PSEUDO_DESC_BASE : ++ LoongArchPseudo<(outs GPR32:$dst), ++ (ins RCWS:$xj), ++ [(set GPR32:$dst, (OpNode (TyNode RCWS:$xj)))]> { ++ bit usesCustomInserter = 1; ++} ++ ++def XSNZ_B_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++def XSNZ_H_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++def XSNZ_W_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++def XSNZ_D_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++def XSNZ_V_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++ ++def XSZ_B_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++def XSZ_H_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++def XSZ_W_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++def XSZ_D_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++def XSZ_V_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; ++ ++ ++def XVFLOGB_S : LASX_2R<0b0111011010011100110001>, ++ LASX_2RF_DESC_BASE<"xvflogb.s", int_loongarch_lasx_xvflogb_s, LASX256WOpnd>; ++ ++def XVFLOGB_D : LASX_2R<0b0111011010011100110010>, ++ LASX_2RF_DESC_BASE<"xvflogb.d", int_loongarch_lasx_xvflogb_d, LASX256DOpnd>; ++ ++ ++def XVFCLASS_S : LASX_2R<0b0111011010011100110101>, ++ LASX_2RF_DESC_BASE<"xvfclass.s", int_loongarch_lasx_xvfclass_s, LASX256WOpnd>; ++ ++def XVFCLASS_D : LASX_2R<0b0111011010011100110110>, ++ LASX_2RF_DESC_BASE<"xvfclass.d", int_loongarch_lasx_xvfclass_d, LASX256DOpnd>; ++ ++ ++def XVFSQRT_S : LASX_2R<0b0111011010011100111001>, ++ LASX_2RF_DESC_BASE<"xvfsqrt.s", fsqrt, LASX256WOpnd>; ++ ++def XVFSQRT_D : LASX_2R<0b0111011010011100111010>, ++ LASX_2RF_DESC_BASE<"xvfsqrt.d", fsqrt, LASX256DOpnd>; ++ ++ ++def XVFRECIP_S : LASX_2R<0b0111011010011100111101>, ++ LASX_2RF_DESC_BASE<"xvfrecip.s", int_loongarch_lasx_xvfrecip_s, LASX256WOpnd>; ++ ++def XVFRECIP_D : LASX_2R<0b0111011010011100111110>, ++ LASX_2RF_DESC_BASE<"xvfrecip.d", int_loongarch_lasx_xvfrecip_d, LASX256DOpnd>; ++ ++ ++def XVFRSQRT_S : LASX_2R<0b0111011010011101000001>, ++ LASX_2RF_DESC_BASE<"xvfrsqrt.s", int_loongarch_lasx_xvfrsqrt_s, LASX256WOpnd>; ++ ++def XVFRSQRT_D : LASX_2R<0b0111011010011101000010>, ++ LASX_2RF_DESC_BASE<"xvfrsqrt.d", int_loongarch_lasx_xvfrsqrt_d, LASX256DOpnd>; ++ ++ ++def XVFRINT_S : LASX_2R<0b0111011010011101001101>, ++ LASX_2RF_DESC_BASE<"xvfrint.s", frint, LASX256WOpnd>; ++ ++def XVFRINT_D : LASX_2R<0b0111011010011101001110>, ++ LASX_2RF_DESC_BASE<"xvfrint.d", frint, LASX256DOpnd>; ++ ++ ++def XVFRINTRM_S : LASX_2R<0b0111011010011101010001>, ++ LASX_2RF_DESC_BASE<"xvfrintrm.s", int_loongarch_lasx_xvfrintrm_s, LASX256WOpnd>; ++ ++def XVFRINTRM_D : LASX_2R<0b0111011010011101010010>, ++ LASX_2RF_DESC_BASE<"xvfrintrm.d", int_loongarch_lasx_xvfrintrm_d, LASX256DOpnd>; ++ ++ ++def XVFRINTRP_S : LASX_2R<0b0111011010011101010101>, ++ LASX_2RF_DESC_BASE<"xvfrintrp.s", int_loongarch_lasx_xvfrintrp_s, LASX256WOpnd>; ++ ++def XVFRINTRP_D : LASX_2R<0b0111011010011101010110>, ++ LASX_2RF_DESC_BASE<"xvfrintrp.d", int_loongarch_lasx_xvfrintrp_d, LASX256DOpnd>; ++ ++ ++def XVFRINTRZ_S : LASX_2R<0b0111011010011101011001>, ++ LASX_2RF_DESC_BASE<"xvfrintrz.s", int_loongarch_lasx_xvfrintrz_s, LASX256WOpnd>; ++ ++def XVFRINTRZ_D : LASX_2R<0b0111011010011101011010>, ++ LASX_2RF_DESC_BASE<"xvfrintrz.d", int_loongarch_lasx_xvfrintrz_d, LASX256DOpnd>; ++ ++ ++def XVFRINTRNE_S : LASX_2R<0b0111011010011101011101>, ++ LASX_2RF_DESC_BASE<"xvfrintrne.s", int_loongarch_lasx_xvfrintrne_s, LASX256WOpnd>; ++ ++def XVFRINTRNE_D : LASX_2R<0b0111011010011101011110>, ++ LASX_2RF_DESC_BASE<"xvfrintrne.d", int_loongarch_lasx_xvfrintrne_d, LASX256DOpnd>; ++ ++ ++def XVFCVTL_S_H : LASX_2R<0b0111011010011101111010>, ++ LASX_2RF_DESC_BASE<"xvfcvtl.s.h", int_loongarch_lasx_xvfcvtl_s_h, LASX256WOpnd, LASX256HOpnd>; ++ ++def XVFCVTH_S_H : LASX_2R<0b0111011010011101111011>, ++ LASX_2RF_DESC_BASE<"xvfcvth.s.h", int_loongarch_lasx_xvfcvth_s_h, LASX256WOpnd, LASX256HOpnd>; ++ ++ ++def XVFCVTL_D_S : LASX_2R<0b0111011010011101111100>, ++ LASX_2RF_DESC_BASE<"xvfcvtl.d.s", int_loongarch_lasx_xvfcvtl_d_s, LASX256DOpnd, LASX256WOpnd>; ++ ++def XVFCVTH_D_S : LASX_2R<0b0111011010011101111101>, ++ LASX_2RF_DESC_BASE<"xvfcvth.d.s", int_loongarch_lasx_xvfcvth_d_s, LASX256DOpnd, LASX256WOpnd>; ++ ++ ++def XVFFINT_S_W : LASX_2R<0b0111011010011110000000>, ++ LASX_2RF_DESC_BASE<"xvffint.s.w", sint_to_fp, LASX256WOpnd>; ++ ++def XVFFINT_S_WU : LASX_2R<0b0111011010011110000001>, ++ LASX_2RF_DESC_BASE<"xvffint.s.wu", uint_to_fp, LASX256WOpnd>; ++ ++ ++def XVFFINT_D_L : LASX_2R<0b0111011010011110000010>, ++ LASX_2RF_DESC_BASE<"xvffint.d.l", sint_to_fp, LASX256DOpnd>; ++ ++def XVFFINT_D_LU : LASX_2R<0b0111011010011110000011>, ++ LASX_2RF_DESC_BASE<"xvffint.d.lu", uint_to_fp, LASX256DOpnd>; ++ ++ ++def XVFFINTL_D_W : LASX_2R<0b0111011010011110000100>, ++ LASX_2RF_DESC_BASE<"xvffintl.d.w", int_loongarch_lasx_xvffintl_d_w, LASX256DOpnd, LASX256WOpnd>; ++ ++def XVFFINTH_D_W : LASX_2R<0b0111011010011110000101>, ++ LASX_2RF_DESC_BASE<"xvffinth.d.w", int_loongarch_lasx_xvffinth_d_w, LASX256DOpnd, LASX256WOpnd>; ++ ++ ++def XVFTINT_W_S : LASX_2R<0b0111011010011110001100>, ++ LASX_2RF_DESC_BASE<"xvftint.w.s", int_loongarch_lasx_xvftint_w_s, LASX256WOpnd>; ++ ++def XVFTINT_L_D : LASX_2R<0b0111011010011110001101>, ++ LASX_2RF_DESC_BASE<"xvftint.l.d", int_loongarch_lasx_xvftint_l_d, LASX256DOpnd>; ++ ++ ++def XVFTINTRM_W_S : LASX_2R<0b0111011010011110001110>, ++ LASX_2RF_DESC_BASE<"xvftintrm.w.s", int_loongarch_lasx_xvftintrm_w_s, LASX256WOpnd>; ++ ++def XVFTINTRM_L_D : LASX_2R<0b0111011010011110001111>, ++ LASX_2RF_DESC_BASE<"xvftintrm.l.d", int_loongarch_lasx_xvftintrm_l_d, LASX256DOpnd>; ++ ++ ++def XVFTINTRP_W_S : LASX_2R<0b0111011010011110010000>, ++ LASX_2RF_DESC_BASE<"xvftintrp.w.s", int_loongarch_lasx_xvftintrp_w_s, LASX256WOpnd>; ++ ++def XVFTINTRP_L_D : LASX_2R<0b0111011010011110010001>, ++ LASX_2RF_DESC_BASE<"xvftintrp.l.d", int_loongarch_lasx_xvftintrp_l_d, LASX256DOpnd>; ++ ++ ++def XVFTINTRZ_W_S : LASX_2R<0b0111011010011110010010>, ++ LASX_2RF_DESC_BASE<"xvftintrz.w.s", fp_to_sint, LASX256WOpnd>; ++ ++def XVFTINTRZ_L_D : LASX_2R<0b0111011010011110010011>, ++ LASX_2RF_DESC_BASE<"xvftintrz.l.d", fp_to_sint, LASX256DOpnd>; ++ ++ ++def XVFTINTRNE_W_S : LASX_2R<0b0111011010011110010100>, ++ LASX_2RF_DESC_BASE<"xvftintrne.w.s", int_loongarch_lasx_xvftintrne_w_s, LASX256WOpnd>; ++ ++def XVFTINTRNE_L_D : LASX_2R<0b0111011010011110010101>, ++ LASX_2RF_DESC_BASE<"xvftintrne.l.d", int_loongarch_lasx_xvftintrne_l_d, LASX256DOpnd>; ++ ++ ++def XVFTINT_WU_S : LASX_2R<0b0111011010011110010110>, ++ LASX_2RF_DESC_BASE<"xvftint.wu.s", int_loongarch_lasx_xvftint_wu_s, LASX256WOpnd>; ++ ++def XVFTINT_LU_D : LASX_2R<0b0111011010011110010111>, ++ LASX_2RF_DESC_BASE<"xvftint.lu.d", int_loongarch_lasx_xvftint_lu_d, LASX256DOpnd>; ++ ++ ++def XVFTINTRZ_WU_S : LASX_2R<0b0111011010011110011100>, ++ LASX_2RF_DESC_BASE<"xvftintrz.wu.s", fp_to_uint, LASX256WOpnd>; ++ ++def XVFTINTRZ_LU_D : LASX_2R<0b0111011010011110011101>, ++ LASX_2RF_DESC_BASE<"xvftintrz.lu.d", fp_to_uint, LASX256DOpnd>; ++ ++ ++def XVFTINTL_L_S : LASX_2R<0b0111011010011110100000>, ++ LASX_2RF_DESC_BASE<"xvftintl.l.s", int_loongarch_lasx_xvftintl_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++def XVFTINTH_L_S : LASX_2R<0b0111011010011110100001>, ++ LASX_2RF_DESC_BASE<"xvftinth.l.s", int_loongarch_lasx_xvftinth_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++ ++def XVFTINTRML_L_S : LASX_2R<0b0111011010011110100010>, ++ LASX_2RF_DESC_BASE<"xvftintrml.l.s", int_loongarch_lasx_xvftintrml_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++def XVFTINTRMH_L_S : LASX_2R<0b0111011010011110100011>, ++ LASX_2RF_DESC_BASE<"xvftintrmh.l.s", int_loongarch_lasx_xvftintrmh_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++ ++def XVFTINTRPL_L_S : LASX_2R<0b0111011010011110100100>, ++ LASX_2RF_DESC_BASE<"xvftintrpl.l.s", int_loongarch_lasx_xvftintrpl_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++def XVFTINTRPH_L_S : LASX_2R<0b0111011010011110100101>, ++ LASX_2RF_DESC_BASE<"xvftintrph.l.s", int_loongarch_lasx_xvftintrph_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++ ++def XVFTINTRZL_L_S : LASX_2R<0b0111011010011110100110>, ++ LASX_2RF_DESC_BASE<"xvftintrzl.l.s", int_loongarch_lasx_xvftintrzl_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++def XVFTINTRZH_L_S : LASX_2R<0b0111011010011110100111>, ++ LASX_2RF_DESC_BASE<"xvftintrzh.l.s", int_loongarch_lasx_xvftintrzh_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++ ++def XVFTINTRNEL_L_S : LASX_2R<0b0111011010011110101000>, ++ LASX_2RF_DESC_BASE<"xvftintrnel.l.s", int_loongarch_lasx_xvftintrnel_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++def XVFTINTRNEH_L_S : LASX_2R<0b0111011010011110101001>, ++ LASX_2RF_DESC_BASE<"xvftintrneh.l.s", int_loongarch_lasx_xvftintrneh_l_s, LASX256DOpnd, LASX256WOpnd>; ++ ++ ++def XVEXTH_H_B : LASX_2R<0b0111011010011110111000>, ++ LASX_2R_DESC_BASE<"xvexth.h.b", int_loongarch_lasx_xvexth_h_b, LASX256HOpnd, LASX256BOpnd>; ++ ++def XVEXTH_W_H : LASX_2R<0b0111011010011110111001>, ++ LASX_2R_DESC_BASE<"xvexth.w.h", int_loongarch_lasx_xvexth_w_h, LASX256WOpnd, LASX256HOpnd>; ++ ++def XVEXTH_D_W : LASX_2R<0b0111011010011110111010>, ++ LASX_2R_DESC_BASE<"xvexth.d.w", int_loongarch_lasx_xvexth_d_w, LASX256DOpnd, LASX256WOpnd> ; ++ ++def XVEXTH_Q_D : LASX_2R<0b0111011010011110111011>, ++ LASX_2R_DESC_BASE<"xvexth.q.d", int_loongarch_lasx_xvexth_q_d, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVEXTH_HU_BU : LASX_2R<0b0111011010011110111100>, ++ LASX_2R_DESC_BASE<"xvexth.hu.bu", int_loongarch_lasx_xvexth_hu_bu, LASX256HOpnd, LASX256BOpnd>; ++ ++def XVEXTH_WU_HU : LASX_2R<0b0111011010011110111101>, ++ LASX_2R_DESC_BASE<"xvexth.wu.hu", int_loongarch_lasx_xvexth_wu_hu, LASX256WOpnd, LASX256HOpnd>; ++ ++def XVEXTH_DU_WU : LASX_2R<0b0111011010011110111110>, ++ LASX_2R_DESC_BASE<"xvexth.du.wu", int_loongarch_lasx_xvexth_du_wu, LASX256DOpnd, LASX256WOpnd> ; ++ ++def XVEXTH_QU_DU : LASX_2R<0b0111011010011110111111>, ++ LASX_2R_DESC_BASE<"xvexth.qu.du", int_loongarch_lasx_xvexth_qu_du, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVREPLGR2VR_B : LASX_2R_1GP<0b0111011010011111000000>, ++ LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.b", v32i8, xvsplati8, LASX256BOpnd, GPR32Opnd>; ++ ++def XVREPLGR2VR_H : LASX_2R_1GP<0b0111011010011111000001>, ++ LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.h", v16i16, xvsplati16, LASX256HOpnd, GPR32Opnd>; ++ ++def XVREPLGR2VR_W : LASX_2R_1GP<0b0111011010011111000010>, ++ LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.w", v8i32, xvsplati32, LASX256WOpnd, GPR32Opnd>; ++ ++def XVREPLGR2VR_D : LASX_2R_1GP<0b0111011010011111000011>, ++ LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.d", v4i64, xvsplati64, LASX256DOpnd, GPR64Opnd>; ++ ++ ++def VEXT2XV_H_B : LASX_2R<0b0111011010011111000100>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.h.b", int_loongarch_lasx_vext2xv_h_b, v32i8, v16i16, LASX256BOpnd, LASX256HOpnd>; ++ ++def VEXT2XV_W_B : LASX_2R<0b0111011010011111000101>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.w.b", int_loongarch_lasx_vext2xv_w_b, v32i8, v8i32, LASX256BOpnd, LASX256WOpnd>; ++ ++def VEXT2XV_D_B : LASX_2R<0b0111011010011111000110>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.d.b", int_loongarch_lasx_vext2xv_d_b, v32i8, v4i64, LASX256BOpnd, LASX256DOpnd> ; ++ ++def VEXT2XV_W_H : LASX_2R<0b0111011010011111000111>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.w.h", int_loongarch_lasx_vext2xv_w_h, v16i16, v8i32, LASX256HOpnd, LASX256WOpnd>; ++ ++def VEXT2XV_D_H : LASX_2R<0b0111011010011111001000>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.d.h", int_loongarch_lasx_vext2xv_d_h, v16i16, v4i64, LASX256HOpnd, LASX256DOpnd> ; ++ ++def VEXT2XV_D_W : LASX_2R<0b0111011010011111001001>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.d.w", int_loongarch_lasx_vext2xv_d_w, v8i32, v4i64, LASX256WOpnd, LASX256DOpnd>; ++ ++ ++def VEXT2XV_HU_BU : LASX_2R<0b0111011010011111001010>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.hu.bu", int_loongarch_lasx_vext2xv_hu_bu, v32i8, v16i16, LASX256BOpnd, LASX256HOpnd>; ++ ++def VEXT2XV_WU_BU : LASX_2R<0b0111011010011111001011>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.wu.bu", int_loongarch_lasx_vext2xv_wu_bu, v32i8, v8i32, LASX256BOpnd, LASX256WOpnd>; ++ ++def VEXT2XV_DU_BU : LASX_2R<0b0111011010011111001100>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.du.bu", int_loongarch_lasx_vext2xv_du_bu, v32i8, v4i64, LASX256BOpnd, LASX256DOpnd> ; ++ ++def VEXT2XV_WU_HU : LASX_2R<0b0111011010011111001101>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.wu.hu", int_loongarch_lasx_vext2xv_wu_hu, v16i16, v8i32, LASX256HOpnd, LASX256WOpnd>; ++ ++def VEXT2XV_DU_HU : LASX_2R<0b0111011010011111001110>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.du.hu", int_loongarch_lasx_vext2xv_du_hu, v16i16, v4i64, LASX256HOpnd, LASX256DOpnd> ; ++ ++def VEXT2XV_DU_WU : LASX_2R<0b0111011010011111001111>, ++ LASX_XVEXTEND_DESC_BASE<"vext2xv.du.wu", int_loongarch_lasx_vext2xv_du_wu, v8i32, v4i64, LASX256WOpnd, LASX256DOpnd>; ++ ++ ++def XVHSELI_D : LASX_I5_U<0b01110110100111111>, ++ LASX_U5N_DESC_BASE<"xvhseli.d", LASX256DOpnd>; ++ ++ ++def XVROTRI_B : LASX_I3_U<0b0111011010100000001>, ++ LASX_RORI_U3_DESC_BASE_Intrinsic<"xvrotri.b", int_loongarch_lasx_xvrotri_b, uimm3, immZExt3, LASX256BOpnd>; ++ ++def XVROTRI_H : LASX_I4_U<0b011101101010000001>, ++ LASX_RORI_U4_DESC_BASE_Intrinsic<"xvrotri.h", int_loongarch_lasx_xvrotri_h, uimm4, immZExt4, LASX256HOpnd>; ++ ++def XVROTRI_W : LASX_I5_U<0b01110110101000001>, ++ LASX_RORI_U5_DESC_BASE_Intrinsic<"xvrotri.w", int_loongarch_lasx_xvrotri_w, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVROTRI_D : LASX_I6_U<0b0111011010100001>, ++ LASX_RORI_U6_DESC_BASE_Intrinsic<"xvrotri.d", int_loongarch_lasx_xvrotri_d, uimm6, immZExt6, LASX256DOpnd>; ++ ++ ++def XVSRLRI_B : LASX_I3_U<0b0111011010100100001>, ++ LASX_BIT_3_DESC_BASE<"xvsrlri.b", int_loongarch_lasx_xvsrlri_b, uimm3, immZExt3, LASX256BOpnd>; ++ ++def XVSRLRI_H : LASX_I4_U<0b011101101010010001>, ++ LASX_BIT_4_DESC_BASE<"xvsrlri.h", int_loongarch_lasx_xvsrlri_h, uimm4, immZExt4, LASX256HOpnd>; ++ ++def XVSRLRI_W : LASX_I5_U<0b01110110101001001>, ++ LASX_BIT_5_DESC_BASE<"xvsrlri.w", int_loongarch_lasx_xvsrlri_w, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSRLRI_D : LASX_I6_U<0b0111011010100101>, ++ LASX_BIT_6_DESC_BASE<"xvsrlri.d", int_loongarch_lasx_xvsrlri_d, uimm6, immZExt6, LASX256DOpnd>; ++ ++ ++def XVSRARI_B : LASX_I3_U<0b0111011010101000001>, ++ LASX_BIT_3_DESC_BASE<"xvsrari.b", int_loongarch_lasx_xvsrari_b, uimm3, immZExt3, LASX256BOpnd>; ++ ++def XVSRARI_H : LASX_I4_U<0b011101101010100001>, ++ LASX_BIT_4_DESC_BASE<"xvsrari.h", int_loongarch_lasx_xvsrari_h, uimm4, immZExt4, LASX256HOpnd>; ++ ++def XVSRARI_W : LASX_I5_U<0b01110110101010001>, ++ LASX_BIT_5_DESC_BASE<"xvsrari.w", int_loongarch_lasx_xvsrari_w, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSRARI_D : LASX_I6_U<0b0111011010101001>, ++ LASX_BIT_6_DESC_BASE<"xvsrari.d", int_loongarch_lasx_xvsrari_d, uimm6, immZExt6, LASX256DOpnd>; ++ ++ ++def XVINSGR2VR_W : LASX_I3_R_U<0b0111011011101011110>, ++ LASX_INSERT_U3_DESC_BASE<"xvinsgr2vr.w", v8i32, uimm3_ptr, immZExt3Ptr, LASX256WOpnd, GPR32Opnd>; ++ ++def XVINSGR2VR_D : LASX_I2_R_U<0b01110110111010111110>, ++ LASX_INSERT_U2_DESC_BASE<"xvinsgr2vr.d", v4i64, uimm2_ptr, immZExt2Ptr, LASX256DOpnd, GPR64Opnd>; ++ ++ ++def XVPICKVE2GR_W : LASX_ELM_COPY_U3<0b0111011011101111110>, ++ LASX_COPY_U3_DESC_BASE<"xvpickve2gr.w", vextract_sext_i32, v8i32, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LASX256WOpnd>; ++ ++def XVPICKVE2GR_D : LASX_ELM_COPY_U2<0b01110110111011111110>, ++ LASX_COPY_U2_DESC_BASE<"xvpickve2gr.d", vextract_sext_i64, v4i64, uimm2_ptr, immZExt2Ptr, GPR64Opnd, LASX256DOpnd>; ++ ++ ++def XVPICKVE2GR_WU : LASX_ELM_COPY_U3<0b0111011011110011110>, ++ LASX_COPY_U3_DESC_BASE<"xvpickve2gr.wu", vextract_zext_i32, v8i32, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LASX256WOpnd>; ++ ++def XVPICKVE2GR_DU : LASX_ELM_COPY_U2<0b01110110111100111110>, ++ LASX_COPY_U2_DESC_BASE<"xvpickve2gr.du", vextract_zext_i64, v4i64, uimm2_ptr, immZExt2Ptr, GPR64Opnd, LASX256DOpnd>; ++ ++ ++def XVREPL128VEI_B : LASX_I4_U<0b011101101111011110>, ++ LASX_ELM_U4_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.b", int_loongarch_lasx_xvrepl128vei_b, LASX256BOpnd>; ++ ++def XVREPL128VEI_H : LASX_I3_U<0b0111011011110111110>, ++ LASX_ELM_U3_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.h", int_loongarch_lasx_xvrepl128vei_h, LASX256HOpnd>; ++ ++def XVREPL128VEI_W : LASX_I2_U<0b01110110111101111110>, ++ LASX_ELM_U2_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.w", int_loongarch_lasx_xvrepl128vei_w, LASX256WOpnd>; ++ ++def XVREPL128VEI_D : LASX_I1_U<0b011101101111011111110>, ++ LASX_ELM_U1_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.d", int_loongarch_lasx_xvrepl128vei_d, LASX256DOpnd>; ++ ++ ++def XVINSVE0_W : LASX_I3_U<0b0111011011111111110>, ++ LASX_BIT_3_4O_DESC_BASE<"xvinsve0.w", int_loongarch_lasx_xvinsve0_w, uimm3, immZExt3, LASX256WOpnd>; ++ ++def XVINSVE0_D : LASX_I2_U<0b01110110111111111110>, ++ LASX_BIT_2_4O_DESC_BASE<"xvinsve0.d", int_loongarch_lasx_xvinsve0_d, uimm2, immZExt2, LASX256DOpnd>; ++ ++ ++def XVPICKVE_W : LASX_I3_U<0b0111011100000011110>, ++ LASX_BIT_3_4ON<"xvpickve.w", uimm3, immZExt3, LASX256WOpnd>; ++ ++def XVPICKVE_D : LASX_I2_U<0b01110111000000111110>, ++ LASX_BIT_2_4ON<"xvpickve.d", uimm2, immZExt2, LASX256DOpnd>; ++ ++ ++def XVREPLVE0_B : LASX_2R<0b0111011100000111000000>, ++ LASX_XVBROADCAST_DESC_BASE<"xvreplve0.b", int_loongarch_lasx_xvreplve0_b, v32i8, LASX256BOpnd>; ++ ++def XVREPLVE0_H : LASX_2R<0b0111011100000111100000>, ++ LASX_XVBROADCAST_DESC_BASE<"xvreplve0.h", int_loongarch_lasx_xvreplve0_h, v16i16, LASX256HOpnd>; ++ ++def XVREPLVE0_W : LASX_2R<0b0111011100000111110000>, ++ LASX_XVBROADCAST_DESC_BASE<"xvreplve0.w", int_loongarch_lasx_xvreplve0_w, v8i32, LASX256WOpnd> ; ++ ++def XVREPLVE0_D : LASX_2R<0b0111011100000111111000>, ++ LASX_XVBROADCAST_DESC_BASE<"xvreplve0.d", xvbroadcast_v4i64, v4i64, LASX256DOpnd>; ++ ++def XVREPLVE0_Q : LASX_2R<0b0111011100000111111100>, ++ LASX_XVBROADCAST_DESC_BASE<"xvreplve0.q", int_loongarch_lasx_xvreplve0_q, v32i8, LASX256BOpnd>; ++ ++ ++def XVSLLWIL_H_B : LASX_I3_U<0b0111011100001000001>, ++ LASX_2R_U3_DESC_BASE<"xvsllwil.h.b", int_loongarch_lasx_xvsllwil_h_b, LASX256HOpnd, LASX256BOpnd>; ++ ++def XVSLLWIL_W_H : LASX_I4_U<0b011101110000100001>, ++ LASX_2R_U4_DESC_BASE<"xvsllwil.w.h", int_loongarch_lasx_xvsllwil_w_h, LASX256WOpnd, LASX256HOpnd>; ++ ++def XVSLLWIL_D_W : LASX_I5_U<0b01110111000010001>, ++ LASX_2R_U5_DESC_BASE<"xvsllwil.d.w", int_loongarch_lasx_xvsllwil_d_w, LASX256DOpnd, LASX256WOpnd> ; ++ ++ ++def XVEXTL_Q_D : LASX_2R<0b0111011100001001000000>, ++ LASX_2R_DESC_BASE<"xvextl.q.d", int_loongarch_lasx_xvextl_q_d, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSLLWIL_HU_BU : LASX_I3_U<0b0111011100001100001>, ++ LASX_2R_U3_DESC_BASE<"xvsllwil.hu.bu", int_loongarch_lasx_xvsllwil_hu_bu, LASX256HOpnd, LASX256BOpnd>; ++ ++def XVSLLWIL_WU_HU : LASX_I4_U<0b011101110000110001>, ++ LASX_2R_U4_DESC_BASE<"xvsllwil.wu.hu", int_loongarch_lasx_xvsllwil_wu_hu, LASX256WOpnd, LASX256HOpnd>; ++ ++def XVSLLWIL_DU_WU : LASX_I5_U<0b01110111000011001>, ++ LASX_2R_U5_DESC_BASE<"xvsllwil.du.wu", int_loongarch_lasx_xvsllwil_du_wu, LASX256DOpnd, LASX256WOpnd> ; ++ ++ ++def XVEXTL_QU_DU : LASX_2R<0b0111011100001101000000>, ++ LASX_2R_DESC_BASE<"xvextl.qu.du", int_loongarch_lasx_xvextl_qu_du, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVBITCLRI_B : LASX_I3_U<0b0111011100010000001>, ++ LASX_2R_U3_DESC_BASE<"xvbitclri.b", int_loongarch_lasx_xvbitclri_b, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVBITCLRI_H : LASX_I4_U<0b011101110001000001>, ++ LASX_2R_U4_DESC_BASE<"xvbitclri.h", int_loongarch_lasx_xvbitclri_h, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVBITCLRI_W : LASX_I5_U<0b01110111000100001>, ++ LASX_2R_U5_DESC_BASE<"xvbitclri.w", int_loongarch_lasx_xvbitclri_w, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVBITCLRI_D : LASX_I6_U<0b0111011100010001>, ++ LASX_2R_U6_DESC_BASE<"xvbitclri.d", int_loongarch_lasx_xvbitclri_d, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVBITSETI_B : LASX_I3_U<0b0111011100010100001>, ++ LASX_2R_U3_DESC_BASE<"xvbitseti.b", int_loongarch_lasx_xvbitseti_b, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVBITSETI_H : LASX_I4_U<0b011101110001010001>, ++ LASX_2R_U4_DESC_BASE<"xvbitseti.h", int_loongarch_lasx_xvbitseti_h, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVBITSETI_W : LASX_I5_U<0b01110111000101001>, ++ LASX_2R_U5_DESC_BASE<"xvbitseti.w", int_loongarch_lasx_xvbitseti_w, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVBITSETI_D : LASX_I6_U<0b0111011100010101>, ++ LASX_2R_U6_DESC_BASE<"xvbitseti.d", int_loongarch_lasx_xvbitseti_d, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVBITREVI_B : LASX_I3_U<0b0111011100011000001>, ++ LASX_2R_U3_DESC_BASE<"xvbitrevi.b", int_loongarch_lasx_xvbitrevi_b, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVBITREVI_H : LASX_I4_U<0b011101110001100001>, ++ LASX_2R_U4_DESC_BASE<"xvbitrevi.h", int_loongarch_lasx_xvbitrevi_h, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVBITREVI_W : LASX_I5_U<0b01110111000110001>, ++ LASX_2R_U5_DESC_BASE<"xvbitrevi.w", int_loongarch_lasx_xvbitrevi_w, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVBITREVI_D : LASX_I6_U<0b0111011100011001>, ++ LASX_2R_U6_DESC_BASE<"xvbitrevi.d", int_loongarch_lasx_xvbitrevi_d, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSAT_B : LASX_I3_U<0b0111011100100100001>, ++ LASX_BIT_3_DESC_BASE<"xvsat.b", int_loongarch_lasx_xvsat_b, uimm3, immZExt3, LASX256BOpnd>; ++ ++def XVSAT_H : LASX_I4_U<0b011101110010010001>, ++ LASX_BIT_4_DESC_BASE<"xvsat.h", int_loongarch_lasx_xvsat_h, uimm4, immZExt4, LASX256HOpnd>; ++ ++def XVSAT_W : LASX_I5_U<0b01110111001001001>, ++ LASX_BIT_5_DESC_BASE<"xvsat.w", int_loongarch_lasx_xvsat_w, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSAT_D : LASX_I6_U<0b0111011100100101>, ++ LASX_BIT_6_DESC_BASE<"xvsat.d", int_loongarch_lasx_xvsat_d, uimm6, immZExt6, LASX256DOpnd>; ++ ++ ++def XVSAT_BU : LASX_I3_U<0b0111011100101000001>, ++ LASX_BIT_3_DESC_BASE<"xvsat.bu", int_loongarch_lasx_xvsat_bu, uimm3, immZExt3, LASX256BOpnd>; ++ ++def XVSAT_HU : LASX_I4_U<0b011101110010100001>, ++ LASX_BIT_4_DESC_BASE<"xvsat.hu", int_loongarch_lasx_xvsat_hu, uimm4, immZExt4, LASX256HOpnd>; ++ ++def XVSAT_WU : LASX_I5_U<0b01110111001010001>, ++ LASX_BIT_5_DESC_BASE<"xvsat.wu", int_loongarch_lasx_xvsat_wu, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSAT_DU : LASX_I6_U<0b0111011100101001>, ++ LASX_BIT_6_DESC_BASE<"xvsat.du", int_loongarch_lasx_xvsat_du, uimm6, immZExt6, LASX256DOpnd>; ++ ++ ++def XVSLLI_B : LASX_I3_U<0b0111011100101100001>, ++ LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"xvslli.b", int_loongarch_lasx_xvslli_b, uimm3, immZExt3, LASX256BOpnd>; ++ ++def XVSLLI_H : LASX_I4_U<0b011101110010110001>, ++ LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"xvslli.h", int_loongarch_lasx_xvslli_h, uimm4, immZExt4, LASX256HOpnd>; ++ ++def XVSLLI_W : LASX_I5_U<0b01110111001011001>, ++ LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"xvslli.w", int_loongarch_lasx_xvslli_w, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSLLI_D : LASX_I6_U<0b0111011100101101>, ++ LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"xvslli.d", int_loongarch_lasx_xvslli_d, uimm6, immZExt6, LASX256DOpnd>; ++ ++ ++def XVSRLI_B : LASX_I3_U<0b0111011100110000001>, ++ LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.b", int_loongarch_lasx_xvsrli_b, uimm3, immZExt3, LASX256BOpnd>; ++ ++def XVSRLI_H : LASX_I4_U<0b011101110011000001>, ++ LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.h", int_loongarch_lasx_xvsrli_h, uimm4, immZExt4, LASX256HOpnd>; ++ ++def XVSRLI_W : LASX_I5_U<0b01110111001100001>, ++ LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.w", int_loongarch_lasx_xvsrli_w, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSRLI_D : LASX_I6_U<0b0111011100110001>, ++ LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.d", int_loongarch_lasx_xvsrli_d, uimm6, immZExt6, LASX256DOpnd>; ++ ++ ++def XVSRAI_B : LASX_I3_U<0b0111011100110100001>, ++ LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.b", int_loongarch_lasx_xvsrai_b, uimm3, immZExt3, LASX256BOpnd>; ++ ++def XVSRAI_H : LASX_I4_U<0b011101110011010001>, ++ LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.h", int_loongarch_lasx_xvsrai_h, uimm4, immZExt4, LASX256HOpnd>; ++ ++def XVSRAI_W : LASX_I5_U<0b01110111001101001>, ++ LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.w", int_loongarch_lasx_xvsrai_w, uimm5, immZExt5, LASX256WOpnd>; ++ ++def XVSRAI_D : LASX_I6_U<0b0111011100110101>, ++ LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.d", int_loongarch_lasx_xvsrai_d, uimm6, immZExt6, LASX256DOpnd>; ++ ++ ++def XVSRLNI_B_H : LASX_I4_U<0b011101110100000001>, ++ LASX_U4_DESC_BASE<"xvsrlni.b.h", int_loongarch_lasx_xvsrlni_b_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSRLNI_H_W : LASX_I5_U<0b01110111010000001>, ++ LASX_N4_U5_DESC_BASE<"xvsrlni.h.w", int_loongarch_lasx_xvsrlni_h_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSRLNI_W_D : LASX_I6_U<0b0111011101000001>, ++ LASX_U6_DESC_BASE<"xvsrlni.w.d", int_loongarch_lasx_xvsrlni_w_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSRLNI_D_Q : LASX_I7_U<0b011101110100001>, ++ LASX_D_DESC_BASE<"xvsrlni.d.q", int_loongarch_lasx_xvsrlni_d_q, LASX256DOpnd>; ++ ++ ++def XVSRLRNI_B_H : LASX_I4_U<0b011101110100010001>, ++ LASX_U4_DESC_BASE<"xvsrlrni.b.h", int_loongarch_lasx_xvsrlrni_b_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSRLRNI_H_W : LASX_I5_U<0b01110111010001001>, ++ LASX_N4_U5_DESC_BASE<"xvsrlrni.h.w", int_loongarch_lasx_xvsrlrni_h_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSRLRNI_W_D : LASX_I6_U<0b0111011101000101>, ++ LASX_U6_DESC_BASE<"xvsrlrni.w.d", int_loongarch_lasx_xvsrlrni_w_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSRLRNI_D_Q : LASX_I7_U<0b011101110100011>, ++ LASX_D_DESC_BASE<"xvsrlrni.d.q", int_loongarch_lasx_xvsrlrni_d_q, LASX256DOpnd>; ++ ++ ++def XVSSRLNI_B_H : LASX_I4_U<0b011101110100100001>, ++ LASX_U4_DESC_BASE<"xvssrlni.b.h", int_loongarch_lasx_xvssrlni_b_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSSRLNI_H_W : LASX_I5_U<0b01110111010010001>, ++ LASX_N4_U5_DESC_BASE<"xvssrlni.h.w", int_loongarch_lasx_xvssrlni_h_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSSRLNI_W_D : LASX_I6_U<0b0111011101001001>, ++ LASX_U6_DESC_BASE<"xvssrlni.w.d", int_loongarch_lasx_xvssrlni_w_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSSRLNI_D_Q : LASX_I7_U<0b011101110100101>, ++ LASX_D_DESC_BASE<"xvssrlni.d.q", int_loongarch_lasx_xvssrlni_d_q, LASX256DOpnd>; ++ ++ ++def XVSSRLNI_BU_H : LASX_I4_U<0b011101110100110001>, ++ LASX_U4_DESC_BASE<"xvssrlni.bu.h", int_loongarch_lasx_xvssrlni_bu_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSSRLNI_HU_W : LASX_I5_U<0b01110111010011001>, ++ LASX_N4_U5_DESC_BASE<"xvssrlni.hu.w", int_loongarch_lasx_xvssrlni_hu_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSSRLNI_WU_D : LASX_I6_U<0b0111011101001101>, ++ LASX_U6_DESC_BASE<"xvssrlni.wu.d", int_loongarch_lasx_xvssrlni_wu_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSSRLNI_DU_Q : LASX_I7_U<0b011101110100111>, ++ LASX_D_DESC_BASE<"xvssrlni.du.q", int_loongarch_lasx_xvssrlni_du_q, LASX256DOpnd>; ++ ++ ++def XVSSRLRNI_B_H : LASX_I4_U<0b011101110101000001>, ++ LASX_2R_3R_U4_DESC_BASE<"xvssrlrni.b.h", int_loongarch_lasx_xvssrlrni_b_h, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVSSRLRNI_H_W : LASX_I5_U<0b01110111010100001>, ++ LASX_2R_3R_U5_DESC_BASE<"xvssrlrni.h.w", int_loongarch_lasx_xvssrlrni_h_w, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSSRLRNI_W_D : LASX_I6_U<0b0111011101010001>, ++ LASX_2R_3R_U6_DESC_BASE<"xvssrlrni.w.d", int_loongarch_lasx_xvssrlrni_w_d, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSSRLRNI_D_Q : LASX_I7_U<0b011101110101001>, ++ LASX_2R_3R_U7_DESC_BASE<"xvssrlrni.d.q", int_loongarch_lasx_xvssrlrni_d_q, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSSRLRNI_BU_H : LASX_I4_U<0b011101110101010001>, ++ LASX_U4_DESC_BASE<"xvssrlrni.bu.h", int_loongarch_lasx_xvssrlrni_bu_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSSRLRNI_HU_W : LASX_I5_U<0b01110111010101001>, ++ LASX_N4_U5_DESC_BASE<"xvssrlrni.hu.w", int_loongarch_lasx_xvssrlrni_hu_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSSRLRNI_WU_D : LASX_I6_U<0b0111011101010101>, ++ LASX_U6_DESC_BASE<"xvssrlrni.wu.d", int_loongarch_lasx_xvssrlrni_wu_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSSRLRNI_DU_Q : LASX_I7_U<0b011101110101011>, ++ LASX_D_DESC_BASE<"xvssrlrni.du.q", int_loongarch_lasx_xvssrlrni_du_q, LASX256DOpnd>; ++ ++ ++def XVSRANI_B_H : LASX_I4_U<0b011101110101100001>, ++ LASX_2R_3R_U4_DESC_BASE<"xvsrani.b.h", int_loongarch_lasx_xvsrani_b_h, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVSRANI_H_W : LASX_I5_U<0b01110111010110001>, ++ LASX_2R_3R_U5_DESC_BASE<"xvsrani.h.w", int_loongarch_lasx_xvsrani_h_w, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVSRANI_W_D : LASX_I6_U<0b0111011101011001>, ++ LASX_2R_3R_U6_DESC_BASE<"xvsrani.w.d", int_loongarch_lasx_xvsrani_w_d, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVSRANI_D_Q : LASX_I7_U<0b011101110101101>, ++ LASX_2R_3R_U7_DESC_BASE<"xvsrani.d.q", int_loongarch_lasx_xvsrani_d_q, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSRARNI_B_H : LASX_I4_U<0b011101110101110001>, ++ LASX_U4_DESC_BASE<"xvsrarni.b.h", int_loongarch_lasx_xvsrarni_b_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSRARNI_H_W : LASX_I5_U<0b01110111010111001>, ++ LASX_N4_U5_DESC_BASE<"xvsrarni.h.w", int_loongarch_lasx_xvsrarni_h_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSRARNI_W_D : LASX_I6_U<0b0111011101011101>, ++ LASX_U6_DESC_BASE<"xvsrarni.w.d", int_loongarch_lasx_xvsrarni_w_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSRARNI_D_Q : LASX_I7_U<0b011101110101111>, ++ LASX_D_DESC_BASE<"xvsrarni.d.q", int_loongarch_lasx_xvsrarni_d_q, LASX256DOpnd>; ++ ++ ++def XVSSRANI_B_H : LASX_I4_U<0b011101110110000001>, ++ LASX_U4_DESC_BASE<"xvssrani.b.h", int_loongarch_lasx_xvssrani_b_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSSRANI_H_W : LASX_I5_U<0b01110111011000001>, ++ LASX_N4_U5_DESC_BASE<"xvssrani.h.w", int_loongarch_lasx_xvssrani_h_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSSRANI_W_D : LASX_I6_U<0b0111011101100001>, ++ LASX_U6_DESC_BASE<"xvssrani.w.d", int_loongarch_lasx_xvssrani_w_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSSRANI_D_Q : LASX_I7_U<0b011101110110001>, ++ LASX_D_DESC_BASE<"xvssrani.d.q", int_loongarch_lasx_xvssrani_d_q, LASX256DOpnd>; ++ ++ ++def XVSSRANI_BU_H : LASX_I4_U<0b011101110110010001>, ++ LASX_U4_DESC_BASE<"xvssrani.bu.h", int_loongarch_lasx_xvssrani_bu_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSSRANI_HU_W : LASX_I5_U<0b01110111011001001>, ++ LASX_N4_U5_DESC_BASE<"xvssrani.hu.w", int_loongarch_lasx_xvssrani_hu_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSSRANI_WU_D : LASX_I6_U<0b0111011101100101>, ++ LASX_U6_DESC_BASE<"xvssrani.wu.d", int_loongarch_lasx_xvssrani_wu_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSSRANI_DU_Q : LASX_I7_U<0b011101110110011>, ++ LASX_D_DESC_BASE<"xvssrani.du.q", int_loongarch_lasx_xvssrani_du_q, LASX256DOpnd>; ++ ++ ++def XVSSRARNI_B_H : LASX_I4_U<0b011101110110100001>, ++ LASX_U4_DESC_BASE<"xvssrarni.b.h", int_loongarch_lasx_xvssrarni_b_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSSRARNI_H_W : LASX_I5_U<0b01110111011010001>, ++ LASX_N4_U5_DESC_BASE<"xvssrarni.h.w", int_loongarch_lasx_xvssrarni_h_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSSRARNI_W_D : LASX_I6_U<0b0111011101101001>, ++ LASX_U6_DESC_BASE<"xvssrarni.w.d", int_loongarch_lasx_xvssrarni_w_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSSRARNI_D_Q : LASX_I7_U<0b011101110110101>, ++ LASX_D_DESC_BASE<"xvssrarni.d.q", int_loongarch_lasx_xvssrarni_d_q, LASX256DOpnd>; ++ ++ ++def XVSSRARNI_BU_H : LASX_I4_U<0b011101110110110001>, ++ LASX_U4_DESC_BASE<"xvssrarni.bu.h", int_loongarch_lasx_xvssrarni_bu_h, uimm4, immZExt4, LASX256BOpnd>; ++ ++def XVSSRARNI_HU_W : LASX_I5_U<0b01110111011011001>, ++ LASX_N4_U5_DESC_BASE<"xvssrarni.hu.w", int_loongarch_lasx_xvssrarni_hu_w, uimm5, immZExt5, LASX256HOpnd>; ++ ++def XVSSRARNI_WU_D : LASX_I6_U<0b0111011101101101>, ++ LASX_U6_DESC_BASE<"xvssrarni.wu.d", int_loongarch_lasx_xvssrarni_wu_d, uimm6, immZExt6, LASX256WOpnd>; ++ ++def XVSSRARNI_DU_Q : LASX_I7_U<0b011101110110111>, ++ LASX_D_DESC_BASE<"xvssrarni.du.q", int_loongarch_lasx_xvssrarni_du_q, LASX256DOpnd>; ++ ++ ++def XVEXTRINS_B : LASX_I8_U<0b01110111100011>, ++ LASX_2R_3R_U8_DESC_BASE<"xvextrins.b", int_loongarch_lasx_xvextrins_b, LASX256BOpnd, LASX256BOpnd>; ++ ++def XVEXTRINS_H : LASX_I8_U<0b01110111100010>, ++ LASX_2R_3R_U8_DESC_BASE<"xvextrins.h", int_loongarch_lasx_xvextrins_h, LASX256HOpnd, LASX256HOpnd>; ++ ++def XVEXTRINS_W : LASX_I8_U<0b01110111100001>, ++ LASX_2R_3R_U8_DESC_BASE<"xvextrins.w", int_loongarch_lasx_xvextrins_w, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVEXTRINS_D : LASX_I8_U<0b01110111100000>, ++ LASX_2R_3R_U8_DESC_BASE<"xvextrins.d", int_loongarch_lasx_xvextrins_d, LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVSHUF4I_B : LASX_I8_U<0b01110111100100>, ++ LASX_I8_SHF_DESC_BASE<"xvshuf4i.b", int_loongarch_lasx_xvshuf4i_b, LASX256BOpnd>; ++ ++def XVSHUF4I_H : LASX_I8_U<0b01110111100101>, ++ LASX_I8_SHF_DESC_BASE<"xvshuf4i.h", int_loongarch_lasx_xvshuf4i_h, LASX256HOpnd>; ++ ++def XVSHUF4I_W : LASX_I8_U<0b01110111100110>, ++ LASX_I8_SHF_DESC_BASE<"xvshuf4i.w", int_loongarch_lasx_xvshuf4i_w, LASX256WOpnd>; ++ ++def XVSHUF4I_D : LASX_I8_U<0b01110111100111>, ++ LASX_I8_O4_SHF_DESC_BASE<"xvshuf4i.d", int_loongarch_lasx_xvshuf4i_d, LASX256DOpnd>; ++ ++ ++def XVBITSELI_B : LASX_I8_U<0b01110111110001>, ++ LASX_2R_3R_U8_DESC_BASE<"xvbitseli.b", int_loongarch_lasx_xvbitseli_b, LASX256BOpnd, LASX256BOpnd>; ++ ++ ++def XVANDI_B : LASX_I8_U<0b01110111110100>, ++ LASX_2R_U8_DESC_BASE<"xvandi.b", int_loongarch_lasx_xvandi_b, LASX256BOpnd, LASX256BOpnd>; ++ ++ ++def XVORI_B : LASX_I8_U<0b01110111110101>, ++ LASX_2R_U8_DESC_BASE<"xvori.b", int_loongarch_lasx_xvori_b, LASX256BOpnd, LASX256BOpnd>; ++ ++ ++def XVXORI_B : LASX_I8_U<0b01110111110110>, ++ LASX_2R_U8_DESC_BASE<"xvxori.b", int_loongarch_lasx_xvxori_b, LASX256BOpnd, LASX256BOpnd>; ++ ++ ++def XVNORI_B : LASX_I8_U<0b01110111110111>, ++ LASX_2R_U8_DESC_BASE<"xvnori.b", int_loongarch_lasx_xvnori_b, LASX256BOpnd, LASX256BOpnd>; ++ ++ ++def XVLDI : LASX_1R_I13<0b01110111111000>, ++ LASX_I13_DESC_BASE<"xvldi", int_loongarch_lasx_xvldi, i32, simm13Op, LASX256DOpnd>; ++ ++def XVLDI_B : LASX_1R_I13_I10<0b01110111111000000>, ++ LASX_I13_DESC_BASE_tmp<"xvldi", LASX256BOpnd>; ++ ++def XVLDI_H : LASX_1R_I13_I10<0b01110111111000001>, ++ LASX_I13_DESC_BASE_tmp<"xvldi", LASX256HOpnd>; ++ ++def XVLDI_W : LASX_1R_I13_I10<0b01110111111000010>, ++ LASX_I13_DESC_BASE_tmp<"xvldi", LASX256WOpnd>; ++ ++def XVLDI_D : LASX_1R_I13_I10<0b01110111111000011>, ++ LASX_I13_DESC_BASE_tmp<"xvldi", LASX256DOpnd>; ++ ++ ++def XVPERMI_W : LASX_I8_U<0b01110111111001>, ++ LASX_2R_3R_U8_DESC_BASE<"xvpermi.w", int_loongarch_lasx_xvpermi_w, LASX256WOpnd, LASX256WOpnd>; ++ ++def XVPERMI_D : LASX_I8_U<0b01110111111010>, ++ LASX_2R_U8_DESC_BASE<"xvpermi.d", int_loongarch_lasx_xvpermi_d, LASX256DOpnd, LASX256DOpnd>; ++ ++def XVPERMI_Q : LASX_I8_U<0b01110111111011>, ++ LASX_2R_3R_U8_DESC_BASE<"xvpermi.q", int_loongarch_lasx_xvpermi_q, LASX256BOpnd, LASX256BOpnd>; ++ ++ ++//Pat ++ ++class LASXBitconvertPat preds = [HasLASX]> : ++ LASXPat<(DstVT (bitconvert SrcVT:$src)), ++ (COPY_TO_REGCLASS SrcVT:$src, DstRC), preds>; ++ ++// These are endian-independent because the element size doesnt change ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++ ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++ ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++ ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++ ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++ ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++ ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++def : LASXBitconvertPat; ++ ++class LASX_XINSERT_PSEUDO_BASE : ++ LASXPseudo<(outs ROXD:$xd), (ins ROXD:$xd_in, ImmOp:$n, ROFS:$fs), ++ [(set ROXD:$xd, (OpNode (Ty ROXD:$xd_in), ROFS:$fs, Imm:$n))]> { ++ bit usesCustomInserter = 1; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++class XINSERT_H_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; ++ ++class XINSERT_H64_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; ++ ++def XINSERT_H_PSEUDO : XINSERT_H_PSEUDO_DESC; ++def XINSERT_H64_PSEUDO : XINSERT_H64_PSEUDO_DESC; ++ ++class XINSERT_B_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; ++def XINSERT_B_PSEUDO : XINSERT_B_PSEUDO_DESC; ++ ++ ++class LASX_COPY_PSEUDO_BASE : ++ LASXPseudo<(outs RCD:$xd), (ins RCWS:$xj, ImmOp:$n), ++ [(set RCD:$xd, (OpNode (VecTy RCWS:$xj), Imm:$n))]> { ++ bit usesCustomInserter = 1; ++} ++ ++class XCOPY_FW_PSEUDO_DESC : LASX_COPY_PSEUDO_BASE; ++class XCOPY_FD_PSEUDO_DESC : LASX_COPY_PSEUDO_BASE; ++def XCOPY_FW_PSEUDO : XCOPY_FW_PSEUDO_DESC; ++def XCOPY_FD_PSEUDO : XCOPY_FD_PSEUDO_DESC; ++ ++ ++ ++class LASX_XINSERT_VIDX_PSEUDO_BASE : ++ LASXPseudo<(outs ROXD:$xd), (ins ROXD:$xd_in, ROIdx:$n, ROFS:$fs), ++ [(set ROXD:$xd, (OpNode (Ty ROXD:$xd_in), ROFS:$fs, ROIdx:$n))]> { ++ bit usesCustomInserter = 1; ++ string Constraints = "$xd = $xd_in"; ++} ++ ++ ++class XINSERT_FW_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; ++def XINSERT_FW_PSEUDO : XINSERT_FW_PSEUDO_DESC; ++ ++class XINSERT_FW_VIDX_PSEUDO_DESC : ++ LASX_XINSERT_VIDX_PSEUDO_BASE; ++class XINSERT_FW_VIDX64_PSEUDO_DESC : ++ LASX_XINSERT_VIDX_PSEUDO_BASE; ++ ++def XINSERT_FW_VIDX_PSEUDO : XINSERT_FW_VIDX_PSEUDO_DESC; ++def XINSERT_FW_VIDX64_PSEUDO : XINSERT_FW_VIDX64_PSEUDO_DESC; ++ ++class XINSERT_B_VIDX64_PSEUDO_DESC : ++ LASX_XINSERT_VIDX_PSEUDO_BASE; ++ ++def XINSERT_B_VIDX64_PSEUDO : XINSERT_B_VIDX64_PSEUDO_DESC; ++ ++ ++class XINSERT_B_VIDX_PSEUDO_DESC : ++ LASX_XINSERT_VIDX_PSEUDO_BASE; ++ ++def XINSERT_B_VIDX_PSEUDO : XINSERT_B_VIDX_PSEUDO_DESC; ++ ++ ++class XINSERTPostRA : ++ LoongArchPseudo<(outs RC:$xd), (ins RC:$xd_in, RD:$n, RE:$fs), []> { ++ let mayLoad = 1; ++ let mayStore = 1; ++} ++ ++def XINSERT_B_VIDX_PSEUDO_POSTRA : XINSERTPostRA; ++def XINSERT_B_VIDX64_PSEUDO_POSTRA : XINSERTPostRA; ++def XINSERT_FW_VIDX_PSEUDO_POSTRA : XINSERTPostRA; ++def XINSERT_FW_VIDX64_PSEUDO_POSTRA : XINSERTPostRA; ++ ++class XINSERT_FD_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; ++ ++def XINSERT_FD_PSEUDO : XINSERT_FD_PSEUDO_DESC; ++ ++class LASX_2R_FILL_PSEUDO_BASE : ++ LASXPseudo<(outs RCWD:$xd), (ins RCWS:$fs), ++ [(set RCWD:$xd, (OpNode RCWS:$fs))]> { ++ let usesCustomInserter = 1; ++} ++ ++class XFILL_FW_PSEUDO_DESC : LASX_2R_FILL_PSEUDO_BASE; ++class XFILL_FD_PSEUDO_DESC : LASX_2R_FILL_PSEUDO_BASE; ++def XFILL_FW_PSEUDO : XFILL_FW_PSEUDO_DESC; ++def XFILL_FD_PSEUDO : XFILL_FD_PSEUDO_DESC; ++ ++class LASX_CONCAT_VECTORS_PSEUDO_BASE : ++ LASXPseudo<(outs ROXD:$xd), (ins ROXJ:$xs, ROXK:$xt), ++ [(set ROXD:$xd, (Ty (concat_vectors (SubTy ROXJ:$xs), (SubTy ROXK:$xt))))]> { ++ bit usesCustomInserter = 1; ++} ++ ++class CONCAT_VECTORS_B_PSEUDO_DESC : ++ LASX_CONCAT_VECTORS_PSEUDO_BASE; ++class CONCAT_VECTORS_H_PSEUDO_DESC : ++ LASX_CONCAT_VECTORS_PSEUDO_BASE; ++class CONCAT_VECTORS_W_PSEUDO_DESC : ++ LASX_CONCAT_VECTORS_PSEUDO_BASE; ++class CONCAT_VECTORS_D_PSEUDO_DESC : ++ LASX_CONCAT_VECTORS_PSEUDO_BASE; ++ ++class CONCAT_VECTORS_FW_PSEUDO_DESC : ++ LASX_CONCAT_VECTORS_PSEUDO_BASE; ++class CONCAT_VECTORS_FD_PSEUDO_DESC : ++ LASX_CONCAT_VECTORS_PSEUDO_BASE; ++ ++def CONCAT_VECTORS_B_PSEUDO : CONCAT_VECTORS_B_PSEUDO_DESC; ++def CONCAT_VECTORS_H_PSEUDO : CONCAT_VECTORS_H_PSEUDO_DESC; ++def CONCAT_VECTORS_W_PSEUDO : CONCAT_VECTORS_W_PSEUDO_DESC; ++def CONCAT_VECTORS_D_PSEUDO : CONCAT_VECTORS_D_PSEUDO_DESC; ++def CONCAT_VECTORS_FW_PSEUDO : CONCAT_VECTORS_FW_PSEUDO_DESC; ++def CONCAT_VECTORS_FD_PSEUDO : CONCAT_VECTORS_FD_PSEUDO_DESC; ++ ++ ++class LASX_COPY_GPR_PSEUDO_BASE : ++ LASXPseudo<(outs ROXD:$xd), (ins ROFS:$xj, ROIdx:$n), ++ [(set ROXD:$xd, (OpNode (VecTy ROFS:$xj), ROIdx:$n))]> { ++ bit usesCustomInserter = 1; ++} ++ ++class XCOPY_FW_GPR_PSEUDO_DESC : LASX_COPY_GPR_PSEUDO_BASE; ++def XCOPY_FW_GPR_PSEUDO : XCOPY_FW_GPR_PSEUDO_DESC; ++ ++ ++let isCodeGenOnly = 1 in { ++ ++def XVLD_H : LASX_I12_S<0b0010110010>, ++ LASX_LD<"xvld", load, v16i16, LASX256HOpnd, mem>; ++ ++def XVLD_W : LASX_I12_S<0b0010110010>, ++ LASX_LD<"xvld", load, v8i32, LASX256WOpnd, mem>; ++ ++def XVLD_D : LASX_I12_S<0b0010110010>, ++ LASX_LD<"xvld", load, v4i64, LASX256DOpnd, mem>; ++ ++ ++def XVST_H : LASX_I12_S<0b0010110011>, ++ LASX_ST<"xvst", store, v16i16, LASX256HOpnd, mem_simm12>; ++ ++def XVST_W : LASX_I12_S<0b0010110011>, ++ LASX_ST<"xvst", store, v8i32, LASX256WOpnd, mem_simm12>; ++ ++def XVST_D : LASX_I12_S<0b0010110011>, ++ LASX_ST<"xvst", store, v4i64, LASX256DOpnd, mem_simm12>; ++ ++ ++def XVREPLVE_W_N : LASX_3R_1GP<0b01110101001000110>, ++ LASX_3R_VREPLVE_DESC_BASE_N<"xvreplve.w", LASX256WOpnd>; ++ ++ ++def XVANDI_B_N : LASX_I8_U<0b01110111110100>, ++ LASX_BIT_U8_DESC_BASE<"xvandi.b", and, xvsplati8_uimm8, LASX256BOpnd>; ++ ++ ++def XVXORI_B_N : LASX_I8_U<0b01110111110110>, ++ LASX_BIT_U8_DESC_BASE<"xvxori.b", xor, xvsplati8_uimm8, LASX256BOpnd>; ++ ++ ++def XVSRAI_B_N : LASX_I3_U<0b0111011100110100001>, ++ LASX_BIT_U3_VREPLVE_DESC_BASE<"xvsrai.b", sra, xvsplati8_uimm3, LASX256BOpnd>; ++ ++def XVSRAI_H_N : LASX_I4_U<0b011101110011010001>, ++ LASX_BIT_U4_VREPLVE_DESC_BASE<"xvsrai.h", sra, xvsplati16_uimm4, LASX256HOpnd>; ++ ++def XVSRAI_W_N : LASX_I5_U<0b01110111001101001>, ++ LASX_BIT_U5_VREPLVE_DESC_BASE<"xvsrai.w", sra, xvsplati32_uimm5, LASX256WOpnd>; ++ ++def XVSRAI_D_N : LASX_I6_U<0b0111011100110101>, ++ LASX_BIT_U6_VREPLVE_DESC_BASE<"xvsrai.d", sra, xvsplati64_uimm6, LASX256DOpnd>; ++ ++ ++def XVSLLI_B_N : LASX_I3_U<0b0111011100101100001>, ++ LASX_BIT_U3_VREPLVE_DESC_BASE<"xvslli.b", shl, xvsplati8_uimm3, LASX256BOpnd>; ++ ++def XVSLLI_H_N : LASX_I4_U<0b011101110010110001>, ++ LASX_BIT_U4_VREPLVE_DESC_BASE<"xvslli.h", shl, xvsplati16_uimm4, LASX256HOpnd>; ++ ++def XVSLLI_W_N : LASX_I5_U<0b01110111001011001>, ++ LASX_BIT_U5_VREPLVE_DESC_BASE<"xvslli.w", shl, xvsplati32_uimm5, LASX256WOpnd>; ++ ++def XVSLLI_D_N : LASX_I6_U<0b0111011100101101>, ++ LASX_BIT_U6_VREPLVE_DESC_BASE<"xvslli.d", shl, xvsplati64_uimm6, LASX256DOpnd>; ++ ++ ++def XVSRLI_B_N : LASX_I3_U<0b0111011100110000001>, ++ LASX_BIT_U3_VREPLVE_DESC_BASE<"xvsrli.b", srl, xvsplati8_uimm3, LASX256BOpnd>; ++ ++def XVSRLI_H_N : LASX_I4_U<0b011101110011000001>, ++ LASX_BIT_U4_VREPLVE_DESC_BASE<"xvsrli.h", srl, xvsplati16_uimm4, LASX256HOpnd>; ++ ++def XVSRLI_W_N : LASX_I5_U<0b01110111001100001>, ++ LASX_BIT_U5_VREPLVE_DESC_BASE<"xvsrli.w", srl, xvsplati32_uimm5, LASX256WOpnd>; ++ ++def XVSRLI_D_N : LASX_I6_U<0b0111011100110001>, ++ LASX_BIT_U6_VREPLVE_DESC_BASE<"xvsrli.d", srl, xvsplati64_uimm6, LASX256DOpnd>; ++ ++ ++def XVMAXI_B_N : LASX_I5<0b01110110100100000>, ++ LASX_I5_DESC_BASE<"xvmaxi.b", smax, xvsplati8_simm5, LASX256BOpnd>; ++ ++def XVMAXI_H_N : LASX_I5<0b01110110100100001>, ++ LASX_I5_DESC_BASE<"xvmaxi.h", smax, xvsplati16_simm5, LASX256HOpnd>; ++ ++def XVMAXI_W_N : LASX_I5<0b01110110100100010>, ++ LASX_I5_DESC_BASE<"xvmaxi.w", smax, xvsplati32_simm5, LASX256WOpnd>; ++ ++def XVMAXI_D_N : LASX_I5<0b01110110100100011>, ++ LASX_I5_DESC_BASE<"xvmaxi.d", smax, xvsplati64_simm5, LASX256DOpnd>; ++ ++ ++def XVMINI_B_N : LASX_I5<0b01110110100100100>, ++ LASX_I5_DESC_BASE<"xvmini.b", smin, xvsplati8_simm5, LASX256BOpnd>; ++ ++def XVMINI_H_N : LASX_I5<0b01110110100100101>, ++ LASX_I5_DESC_BASE<"xvmini.h", smin, xvsplati16_simm5, LASX256HOpnd>; ++ ++def XVMINI_W_N : LASX_I5<0b01110110100100110>, ++ LASX_I5_DESC_BASE<"xvmini.w", smin, xvsplati32_simm5, LASX256WOpnd>; ++ ++def XVMINI_D_N : LASX_I5<0b01110110100100111>, ++ LASX_I5_DESC_BASE<"xvmini.d", smin, xvsplati64_simm5, LASX256DOpnd>; ++ ++ ++def XVMAXI_BU_N : LASX_I5_U<0b01110110100101000>, ++ LASX_I5_U_DESC_BASE<"xvmaxi.bu", umax, xvsplati8_uimm5, LASX256BOpnd>; ++ ++def XVMAXI_HU_N : LASX_I5_U<0b01110110100101001>, ++ LASX_I5_U_DESC_BASE<"xvmaxi.hu", umax, xvsplati16_uimm5, LASX256HOpnd>; ++ ++def XVMAXI_WU_N : LASX_I5_U<0b01110110100101010>, ++ LASX_I5_U_DESC_BASE<"xvmaxi.wu", umax, xvsplati32_uimm5, LASX256WOpnd>; ++ ++def XVMAXI_DU_N : LASX_I5_U<0b01110110100101011>, ++ LASX_I5_U_DESC_BASE<"xvmaxi.du", umax, xvsplati64_uimm5, LASX256DOpnd>; ++ ++ ++def XVMINI_BU_N : LASX_I5_U<0b01110110100101100>, ++ LASX_I5_U_DESC_BASE<"xvmini.bu", umin, xvsplati8_uimm5, LASX256BOpnd>; ++ ++def XVMINI_HU_N : LASX_I5_U<0b01110110100101101>, ++ LASX_I5_U_DESC_BASE<"xvmini.hu", umin, xvsplati16_uimm5, LASX256HOpnd>; ++ ++def XVMINI_WU_N : LASX_I5_U<0b01110110100101110>, ++ LASX_I5_U_DESC_BASE<"xvmini.wu", umin, xvsplati32_uimm5, LASX256WOpnd>; ++ ++def XVMINI_DU_N : LASX_I5_U<0b01110110100101111>, ++ LASX_I5_U_DESC_BASE<"xvmini.du", umin, xvsplati64_uimm5, LASX256DOpnd>; ++ ++ ++def XVSEQI_B_N : LASX_I5<0b01110110100000000>, ++ LASX_I5_SETCC_DESC_BASE<"xvseqi.b", SETEQ, v32i8, xvsplati8_simm5, LASX256BOpnd>; ++ ++def XVSEQI_H_N : LASX_I5<0b01110110100000001>, ++ LASX_I5_SETCC_DESC_BASE<"xvseqi.h", SETEQ, v16i16, xvsplati16_simm5, LASX256HOpnd>; ++ ++def XVSEQI_W_N : LASX_I5<0b01110110100000010>, ++ LASX_I5_SETCC_DESC_BASE<"xvseqi.w", SETEQ, v8i32, xvsplati32_simm5, LASX256WOpnd>; ++ ++def XVSEQI_D_N : LASX_I5<0b01110110100000011>, ++ LASX_I5_SETCC_DESC_BASE<"xvseqi.d", SETEQ, v4i64, xvsplati64_simm5, LASX256DOpnd>; ++ ++ ++def XVSLEI_B_N : LASX_I5<0b01110110100000100>, ++ LASX_I5_SETCC_DESC_BASE<"xvslei.b", SETLE, v32i8, xvsplati8_simm5, LASX256BOpnd>; ++ ++def XVSLEI_H_N : LASX_I5<0b01110110100000101>, ++ LASX_I5_SETCC_DESC_BASE<"xvslei.h", SETLE, v16i16, xvsplati16_simm5, LASX256HOpnd>; ++ ++def XVSLEI_W_N : LASX_I5<0b01110110100000110>, ++ LASX_I5_SETCC_DESC_BASE<"xvslei.w", SETLE, v8i32, xvsplati32_simm5, LASX256WOpnd>; ++ ++def XVSLEI_D_N : LASX_I5<0b01110110100000111>, ++ LASX_I5_SETCC_DESC_BASE<"xvslei.d", SETLE, v4i64, xvsplati64_simm5, LASX256DOpnd>; ++ ++ ++def XVSLEI_BU_N : LASX_I5_U<0b01110110100001000>, ++ LASX_I5_U_SETCC_DESC_BASE<"xvslei.bu", SETULE, v32i8, xvsplati8_uimm5, LASX256BOpnd>; ++ ++def XVSLEI_HU_N : LASX_I5_U<0b01110110100001001>, ++ LASX_I5_U_SETCC_DESC_BASE<"xvslei.hu", SETULE, v16i16, xvsplati16_uimm5, LASX256HOpnd>; ++ ++def XVSLEI_WU_N : LASX_I5_U<0b01110110100001010>, ++ LASX_I5_U_SETCC_DESC_BASE<"xvslei.wu", SETULE, v8i32, xvsplati32_uimm5, LASX256WOpnd>; ++ ++def XVSLEI_DU_N : LASX_I5_U<0b01110110100001011>, ++ LASX_I5_U_SETCC_DESC_BASE<"xvslei.du", SETULE, v4i64, xvsplati64_uimm5, LASX256DOpnd>; ++ ++ ++def XVSLTI_B_N : LASX_I5<0b01110110100001100>, ++ LASX_I5_SETCC_DESC_BASE<"xvslti.b", SETLT, v32i8, xvsplati8_simm5, LASX256BOpnd>; ++ ++def XVSLTI_H_N : LASX_I5<0b01110110100001101>, ++ LASX_I5_SETCC_DESC_BASE<"xvslti.h", SETLT, v16i16, xvsplati16_simm5, LASX256HOpnd>; ++ ++def XVSLTI_W_N : LASX_I5<0b01110110100001110>, ++ LASX_I5_SETCC_DESC_BASE<"xvslti.w", SETLT, v8i32, xvsplati32_simm5, LASX256WOpnd>; ++ ++def XVSLTI_D_N : LASX_I5<0b01110110100001111>, ++ LASX_I5_SETCC_DESC_BASE<"xvslti.d", SETLT, v4i64, xvsplati64_simm5, LASX256DOpnd>; ++ ++ ++def XVSLTI_BU_N : LASX_I5_U<0b01110110100010000>, ++ LASX_I5_U_SETCC_DESC_BASE<"xvslti.bu", SETULT, v32i8, xvsplati8_uimm5, LASX256BOpnd>; ++ ++def XVSLTI_HU_N : LASX_I5_U<0b01110110100010001>, ++ LASX_I5_U_SETCC_DESC_BASE<"xvslti.hu", SETULT, v16i16, xvsplati16_uimm5, LASX256HOpnd>; ++ ++def XVSLTI_WU_N : LASX_I5_U<0b01110110100010010>, ++ LASX_I5_U_SETCC_DESC_BASE<"xvslti.wu", SETULT, v8i32, xvsplati32_uimm5, LASX256WOpnd>; ++ ++def XVSLTI_DU_N : LASX_I5_U<0b01110110100010011>, ++ LASX_I5_U_SETCC_DESC_BASE<"xvslti.du", SETULT, v4i64, xvsplati64_uimm5, LASX256DOpnd>; ++ ++ ++def XVADDI_BU_N : LASX_I5_U<0b01110110100010100>, ++ LASX_I5_U_DESC_BASE<"xvaddi.bu", add, xvsplati8_uimm5, LASX256BOpnd>; ++ ++def XVADDI_HU_N : LASX_I5_U<0b01110110100010101>, ++ LASX_I5_U_DESC_BASE<"xvaddi.hu", add, xvsplati16_uimm5, LASX256HOpnd>; ++ ++def XVADDI_WU_N : LASX_I5_U<0b01110110100010110>, ++ LASX_I5_U_DESC_BASE<"xvaddi.wu", add, xvsplati32_uimm5, LASX256WOpnd>; ++ ++def XVADDI_DU_N : LASX_I5_U<0b01110110100010111>, ++ LASX_I5_U_DESC_BASE<"xvaddi.du", add, xvsplati64_uimm5, LASX256DOpnd>; ++ ++ ++def XVSUBI_BU_N : LASX_I5_U<0b01110110100011000>, ++ LASX_I5_U_DESC_BASE<"xvsubi.bu", sub, xvsplati8_uimm5, LASX256BOpnd>; ++ ++def XVSUBI_HU_N : LASX_I5_U<0b01110110100011001>, ++ LASX_I5_U_DESC_BASE<"xvsubi.hu", sub, xvsplati16_uimm5, LASX256HOpnd>; ++ ++def XVSUBI_WU_N : LASX_I5_U<0b01110110100011010>, ++ LASX_I5_U_DESC_BASE<"xvsubi.wu", sub, xvsplati32_uimm5, LASX256WOpnd>; ++ ++def XVSUBI_DU_N : LASX_I5_U<0b01110110100011011>, ++ LASX_I5_U_DESC_BASE<"xvsubi.du", sub, xvsplati64_uimm5, LASX256DOpnd>; ++ ++ ++def XVPERMI_QH : LASX_I8_U<0b01110111111011>, ++ LASX_2RN_3R_U8_DESC_BASE<"xvpermi.q", LASX256HOpnd, LASX256HOpnd>; ++ ++def XVPERMI_QW : LASX_I8_U<0b01110111111011>, ++ LASX_2RN_3R_U8_DESC_BASE<"xvpermi.q", LASX256WOpnd, LASX256WOpnd>; ++ ++def XVPERMI_QD : LASX_I8_U<0b01110111111011>, ++ LASX_2RN_3R_U8_DESC_BASE<"xvpermi.q", LASX256DOpnd, LASX256DOpnd>; ++ ++ ++def XVBITSELI_B_N : LASX_I8_U<0b01110111110001>, ++ LASX_2R_3R_U8_SELECT<"xvbitseli.b", vselect, LASX256BOpnd, LASX256BOpnd>; ++ ++} ++ ++ ++def : LASXPat<(v8f32 (load addrimm12:$addr)), (XVLD_W addrimm12:$addr)>; ++def : LASXPat<(v4f64 (load addrimm12:$addr)), (XVLD_D addrimm12:$addr)>; ++ ++def XVST_FW : LASXPat<(store (v8f32 LASX256W:$xj), addrimm12:$addr), ++ (XVST_W LASX256W:$xj, addrimm12:$addr)>; ++def XVST_FD : LASXPat<(store (v4f64 LASX256D:$xj), addrimm12:$addr), ++ (XVST_D LASX256D:$xj, addrimm12:$addr)>; ++ ++def XVNEG_FW : LASXPat<(fneg (v8f32 LASX256W:$xj)), ++ (XVBITREVI_W LASX256W:$xj, 31)>; ++def XVNEG_FD : LASXPat<(fneg (v4f64 LASX256D:$xj)), ++ (XVBITREVI_D LASX256D:$xj, 63)>; ++ ++ ++def : LASXPat<(v4i64 (LoongArchVABSD v4i64:$xj, v4i64:$xk, (i32 0))), ++ (v4i64 (XVABSD_D $xj, $xk))>; ++ ++def : LASXPat<(v8i32 (LoongArchVABSD v8i32:$xj, v8i32:$xk, (i32 0))), ++ (v8i32 (XVABSD_W $xj, $xk))>; ++ ++def : LASXPat<(v16i16 (LoongArchVABSD v16i16:$xj, v16i16:$xk, (i32 0))), ++ (v16i16 (XVABSD_H $xj, $xk))>; ++ ++def : LASXPat<(v32i8 (LoongArchVABSD v32i8:$xj, v32i8:$xk, (i32 0))), ++ (v32i8 (XVABSD_B $xj, $xk))>; ++ ++def : LASXPat<(v4i64 (LoongArchUVABSD v4i64:$xj, v4i64:$xk, (i32 0))), ++ (v4i64 (XVABSD_DU $xj, $xk))>; ++ ++def : LASXPat<(v8i32 (LoongArchUVABSD v8i32:$xj, v8i32:$xk, (i32 0))), ++ (v8i32 (XVABSD_WU $xj, $xk))>; ++ ++def : LASXPat<(v16i16 (LoongArchUVABSD v16i16:$xj, v16i16:$xk, (i32 0))), ++ (v16i16 (XVABSD_HU $xj, $xk))>; ++ ++def : LASXPat<(v32i8 (LoongArchUVABSD v32i8:$xj, v32i8:$xk, (i32 0))), ++ (v32i8 (XVABSD_BU $xj, $xk))>; ++ ++ ++def : LASXPat<(or v32i8:$vj, (shl vsplat_imm_eq_1, v32i8:$vk)), ++ (XVBITSET_B v32i8:$vj, v32i8:$vk)>; ++def : LASXPat<(or v16i16:$vj, (shl vsplat_imm_eq_1, v16i16:$vk)), ++ (XVBITSET_H v16i16:$vj, v16i16:$vk)>; ++def : LASXPat<(or v8i32:$vj, (shl vsplat_imm_eq_1, v8i32:$vk)), ++ (XVBITSET_W v8i32:$vj, v8i32:$vk)>; ++def : LASXPat<(or v4i64:$vj, (shl vsplat_imm_eq_1, v4i64:$vk)), ++ (XVBITSET_D v4i64:$vj, v4i64:$vk)>; ++ ++def : LASXPat<(xor v32i8:$vj, (shl xvsplat_imm_eq_1, v32i8:$vk)), ++ (XVBITREV_B v32i8:$vj, v32i8:$vk)>; ++def : LASXPat<(xor v16i16:$vj, (shl xvsplat_imm_eq_1, v16i16:$vk)), ++ (XVBITREV_H v16i16:$vj, v16i16:$vk)>; ++def : LASXPat<(xor v8i32:$vj, (shl xvsplat_imm_eq_1, v8i32:$vk)), ++ (XVBITREV_W v8i32:$vj, v8i32:$vk)>; ++def : LASXPat<(xor v4i64:$vj, (shl (v4i64 xvsplati64_imm_eq_1), v4i64:$vk)), ++ (XVBITREV_D v4i64:$vj, v4i64:$vk)>; ++ ++def : LASXPat<(and v32i8:$vj, (xor (shl vsplat_imm_eq_1, v32i8:$vk), immAllOnesV)), ++ (XVBITCLR_B v32i8:$vj, v32i8:$vk)>; ++def : LASXPat<(and v16i16:$vj, (xor (shl vsplat_imm_eq_1, v16i16:$vk), immAllOnesV)), ++ (XVBITCLR_H v16i16:$vj, v16i16:$vk)>; ++def : LASXPat<(and v8i32:$vj, (xor (shl vsplat_imm_eq_1, v8i32:$vk), immAllOnesV)), ++ (XVBITCLR_W v8i32:$vj, v8i32:$vk)>; ++def : LASXPat<(and v4i64:$vj, (xor (shl (v4i64 vsplati64_imm_eq_1), v4i64:$vk), (bitconvert (v8i32 immAllOnesV)))), ++ (XVBITCLR_D v4i64:$vj, v4i64:$vk)>; ++ ++def xvsplati64_imm_eq_63 : PatLeaf<(bitconvert (v8i32 (build_vector))), [{ ++ APInt Imm; ++ SDNode *BV = N->getOperand(0).getNode(); ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; ++}]>; ++ ++def xvsplati8imm7 : PatFrag<(ops node:$wt), ++ (and node:$wt, (xvsplati8 immi32Cst7))>; ++def xvsplati16imm15 : PatFrag<(ops node:$wt), ++ (and node:$wt, (xvsplati16 immi32Cst15))>; ++def xvsplati32imm31 : PatFrag<(ops node:$wt), ++ (and node:$wt, (xvsplati32 immi32Cst31))>; ++def xvsplati64imm63 : PatFrag<(ops node:$wt), ++ (and node:$wt, xvsplati64_imm_eq_63)>; ++ ++ ++class LASXShiftPat : ++ LASXPat<(VT (Node VT:$vs, (VT (and VT:$vt, Vec)))), ++ (VT (Insn VT:$vs, VT:$vt))>; ++ ++class LASXBitPat : ++ LASXPat<(VT (Node VT:$vs, (shl vsplat_imm_eq_1, (Frag VT:$vt)))), ++ (VT (Insn VT:$vs, VT:$vt))>; ++ ++multiclass LASXShiftPats { ++ def : LASXShiftPat(Insn#_B), ++ (xvsplati8 immi32Cst7)>; ++ def : LASXShiftPat(Insn#_H), ++ (xvsplati16 immi32Cst15)>; ++ def : LASXShiftPat(Insn#_W), ++ (xvsplati32 immi32Cst31)>; ++ def : LASXPat<(v4i64 (Node v4i64:$vs, (v4i64 (and v4i64:$vt, ++ xvsplati64_imm_eq_63)))), ++ (v4i64 (!cast(Insn#_D) v4i64:$vs, v4i64:$vt))>; ++} ++ ++multiclass LASXBitPats { ++ def : LASXBitPat(Insn#_B), xvsplati8imm7>; ++ def : LASXBitPat(Insn#_H), xvsplati16imm15>; ++ def : LASXBitPat(Insn#_W), xvsplati32imm31>; ++ def : LASXPat<(Node v4i64:$vs, (shl (v4i64 xvsplati64_imm_eq_1), ++ (xvsplati64imm63 v4i64:$vt))), ++ (v4i64 (!cast(Insn#_D) v4i64:$vs, v4i64:$vt))>; ++} ++ ++defm : LASXShiftPats; ++defm : LASXShiftPats; ++defm : LASXShiftPats; ++defm : LASXBitPats; ++defm : LASXBitPats; ++ ++def : LASXPat<(and v32i8:$vs, (xor (shl xvsplat_imm_eq_1, ++ (xvsplati8imm7 v32i8:$vt)), ++ immAllOnesV)), ++ (v32i8 (XVBITCLR_B v32i8:$vs, v32i8:$vt))>; ++def : LASXPat<(and v16i16:$vs, (xor (shl xvsplat_imm_eq_1, ++ (xvsplati16imm15 v16i16:$vt)), ++ immAllOnesV)), ++ (v16i16 (XVBITCLR_H v16i16:$vs, v16i16:$vt))>; ++def : LASXPat<(and v8i32:$vs, (xor (shl xvsplat_imm_eq_1, ++ (xvsplati32imm31 v8i32:$vt)), ++ immAllOnesV)), ++ (v8i32 (XVBITCLR_W v8i32:$vs, v8i32:$vt))>; ++def : LASXPat<(and v4i64:$vs, (xor (shl (v4i64 xvsplati64_imm_eq_1), ++ (xvsplati64imm63 v4i64:$vt)), ++ (bitconvert (v8i32 immAllOnesV)))), ++ (v4i64 (XVBITCLR_D v4i64:$vs, v4i64:$vt))>; ++ ++ ++def : LASXPat<(fdiv (v8f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), ++ (f32 fpimm1),(f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), v8f32:$v), ++ (XVFRECIP_S v8f32:$v)>; ++ ++def : LASXPat<(fdiv (v4f64 (build_vector (f64 fpimm1), (f64 fpimm1), (f64 fpimm1), (f64 fpimm1))), v4f64:$v), ++ (XVFRECIP_D v4f64:$v)>; ++ ++def : LASXPat<(fdiv (v8f32 fpimm1), v8f32:$v), ++ (XVFRECIP_S v8f32:$v)>; ++ ++def : LASXPat<(fdiv (v4f64 fpimm1), v4f64:$v), ++ (XVFRECIP_D v4f64:$v)>; ++ ++ ++def : LASXPat<(fdiv (v8f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), ++ (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), (fsqrt v8f32:$v)), ++ (XVFRSQRT_S v8f32:$v)>; ++ ++def : LASXPat<(fdiv (v4f64 (build_vector (f64 fpimm1), (f64 fpimm1), (f64 fpimm1), (f64 fpimm1))), (fsqrt v4f64:$v)), ++ (XVFRSQRT_D v4f64:$v)>; ++ ++def : LASXPat<(fdiv (v8f32 fpimm1), (fsqrt v8f32:$v)), ++ (XVFRSQRT_S v8f32:$v)>; ++ ++def : LASXPat<(fdiv (v4f64 fpimm1), (fsqrt v4f64:$v)), ++ (XVFRSQRT_D v4f64:$v)>; ++ ++ ++def : LASXPat <(extract_subvector v4f64:$vec, (i32 0)), ++ (v2f64 (EXTRACT_SUBREG v4f64:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v8f32:$vec, (i32 0)), ++ (v4f32 (EXTRACT_SUBREG v8f32:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v4i64:$vec, (i32 0)), ++ (v2i64 (EXTRACT_SUBREG v4i64:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v8i32:$vec, (i32 0)), ++ (v4i32 (EXTRACT_SUBREG v8i32:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v16i16:$vec, (i32 0)), ++ (v8i16 (EXTRACT_SUBREG v16i16:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v32i8:$vec, (i32 0)), ++ (v16i8 (EXTRACT_SUBREG v32i8:$vec, sub_128))>; ++ ++ ++ ++def : LASXPat <(extract_subvector v4f64:$vec, (i64 0)), ++ (v2f64 (EXTRACT_SUBREG v4f64:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v8f32:$vec, (i64 0)), ++ (v4f32 (EXTRACT_SUBREG v8f32:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v4i64:$vec, (i64 0)), ++ (v2i64 (EXTRACT_SUBREG v4i64:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v8i32:$vec, (i64 0)), ++ (v4i32 (EXTRACT_SUBREG v8i32:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v16i16:$vec, (i64 0)), ++ (v8i16 (EXTRACT_SUBREG v16i16:$vec, sub_128))>; ++ ++def : LASXPat <(extract_subvector v32i8:$vec, (i64 0)), ++ (v16i8 (EXTRACT_SUBREG v32i8:$vec, sub_128))>; ++ ++ ++def : LASXPat <(extract_subvector v4i64:$vec, (i32 2)), ++ (v2i64 (EXTRACT_SUBREG (v4i64 (XVPERMI_QD v4i64:$vec, v4i64:$vec, (i32 1))), sub_128))>; ++ ++def : LASXPat <(extract_subvector v8i32:$vec, (i32 4)), ++ (v4i32 (EXTRACT_SUBREG (v8i32 (XVPERMI_QW v8i32:$vec, v8i32:$vec, (i32 1))), sub_128))>; ++ ++def : LASXPat <(extract_subvector v16i16:$vec, (i32 8)), ++ (v8i16 (EXTRACT_SUBREG (v16i16 (XVPERMI_QH v16i16:$vec, v16i16:$vec, (i32 1))), sub_128))>; ++ ++def : LASXPat <(extract_subvector v32i8:$vec, (i32 16)), ++ (v16i8 (EXTRACT_SUBREG (v32i8 (XVPERMI_Q v32i8:$vec, v32i8:$vec, (i32 1))), sub_128))>; ++ ++ ++def : LASXPat <(extract_subvector v4i64:$vec, (i64 2)), ++ (v2i64 (EXTRACT_SUBREG (v4i64 (XVPERMI_QD v4i64:$vec, v4i64:$vec, (i32 1))), sub_128))>; ++ ++def : LASXPat <(extract_subvector v8i32:$vec, (i64 4)), ++ (v4i32 (EXTRACT_SUBREG (v8i32 (XVPERMI_QW v8i32:$vec, v8i32:$vec, (i32 1))), sub_128))>; ++ ++def : LASXPat <(extract_subvector v16i16:$vec, (i64 8)), ++ (v8i16 (EXTRACT_SUBREG (v16i16 (XVPERMI_QH v16i16:$vec, v16i16:$vec, (i32 1))), sub_128))>; ++ ++def : LASXPat <(extract_subvector v32i8:$vec, (i64 16)), ++ (v16i8 (EXTRACT_SUBREG (v32i8 (XVPERMI_Q v32i8:$vec, v32i8:$vec, (i32 1))), sub_128))>; ++ ++ ++def : LASXPat<(abs v4i64:$v), ++ (XVMAX_D v4i64:$v, (XVNEG_D v4i64:$v))>; ++ ++def : LASXPat<(abs v8i32:$v), ++ (XVMAX_W v8i32:$v, (XVNEG_W v8i32:$v))>; ++ ++def : LASXPat<(abs v16i16:$v), ++ (XVMAX_H v16i16:$v, (XVNEG_H v16i16:$v))>; ++ ++def : LASXPat<(abs v32i8:$v), ++ (XVMAX_B v32i8:$v, (XVNEG_B v32i8:$v))>; ++ ++ ++def : LASXPat<(sub (v32i8 immAllZerosV), v32i8:$v), ++ (XVNEG_B v32i8:$v)>; ++ ++def : LASXPat<(sub (v16i16 immAllZerosV), v16i16:$v), ++ (XVNEG_H v16i16:$v)>; ++ ++def : LASXPat<(sub (v8i32 immAllZerosV), v8i32:$v), ++ (XVNEG_W v8i32:$v)>; ++ ++def : LASXPat<(sub (v4i64 immAllZerosV), v4i64:$v), ++ (XVNEG_D v4i64:$v)>; ++ ++ ++ ++def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i32 0)), ++ (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i32 0)), ++ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i32 0)), ++ (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i32 0)), ++ (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>; ++ ++ ++def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i64 0)), ++ (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i64 0)), ++ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i64 0)), ++ (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i64 0)), ++ (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>; ++ ++ ++def : LASXPat<(insert_subvector ++ (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i32 0)), ++ (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector ++ (v8i32 immAllZerosV), (v4i32 LSX128W:$src), (i32 0)), ++ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector ++ (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i32 0)), ++ (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector ++ (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i32 0)), ++ (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector ++ (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i64 0)), ++ (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector ++ (v8i32 immAllZerosV), (v4i32 LSX128W:$src), (i64 0)), ++ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector ++ (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i64 0)), ++ (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>; ++ ++def : LASXPat<(insert_subvector ++ (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i64 0)), ++ (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>; ++ ++ ++def : LASXPat<(insert_subvector ++ (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i32 2)), ++ (XVPERMI_QD (v4i64 (XVREPLGR2VR_D ZERO_64)), ++ (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), ++ LSX128D:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector (v8i32 immAllZerosV), ++ (v4i32 LSX128W:$src), (i32 4)), ++ (XVPERMI_QW (v8i32 (XVREPLGR2VR_W ZERO)), ++ (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), ++ LSX128W:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector ++ (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i32 8)), ++ (XVPERMI_QH (v16i16 (XVREPLGR2VR_H ZERO)), ++ (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), ++ LSX128H:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector ++ (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i32 16)), ++ (XVPERMI_Q (v32i8 (XVREPLGR2VR_B ZERO)), ++ (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), ++ LSX128B:$src, sub_128)), (i32 32))>; ++ ++ ++def : LASXPat<(insert_subvector ++ (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i64 2)), ++ (XVPERMI_QD (v4i64 (XVREPLGR2VR_D ZERO_64)), ++ (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), ++ LSX128D:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector ++ (v8i32 immAllZerosV), (v4i32 LSX128W:$src), (i64 4)), ++ (XVPERMI_QW (v8i32 (XVREPLGR2VR_W ZERO)), ++ (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), ++ LSX128W:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector ++ (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i64 8)), ++ (XVPERMI_QH (v16i16 (XVREPLGR2VR_H ZERO)), ++ (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), ++ LSX128H:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector ++ (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i64 16)), ++ (XVPERMI_Q (v32i8 (XVREPLGR2VR_B ZERO)), ++ (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), ++ LSX128B:$src, sub_128)), (i32 32))>; ++ ++ ++def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i32 2)), ++ (XVPERMI_QD (v4i64 (IMPLICIT_DEF)), ++ (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), ++ LSX128D:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i32 4)), ++ (XVPERMI_QW (v8i32 (IMPLICIT_DEF)), ++ (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), ++ LSX128W:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i32 8)), ++ (XVPERMI_QH (v16i16 (IMPLICIT_DEF)), ++ (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), ++ LSX128H:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i32 16)), ++ (XVPERMI_Q (v32i8 (IMPLICIT_DEF)), ++ (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), ++ LSX128B:$src, sub_128)), (i32 32))>; ++ ++ ++def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i64 2)), ++ (XVPERMI_QD (v4i64 (IMPLICIT_DEF)), ++ (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), ++ LSX128D:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i64 4)), ++ (XVPERMI_QW (v8i32 (IMPLICIT_DEF)), ++ (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), ++ LSX128W:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i64 8)), ++ (XVPERMI_QH (v16i16 (IMPLICIT_DEF)), ++ (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), ++ LSX128H:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i64 16)), ++ (XVPERMI_Q (v32i8 (IMPLICIT_DEF)), ++ (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), ++ LSX128B:$src, sub_128)), (i32 32))>; ++ ++ ++def : LASXPat<(sra ++ (v32i8 (add ++ (v32i8 (add LASX256B:$a, LASX256B:$b)), ++ (v32i8 (srl ++ (v32i8 (add LASX256B:$a, LASX256B:$b)), ++ (v32i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ ))), ++ (XVAVG_B (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>; ++ ++def : LASXPat<(sra ++ (v16i16 (add ++ (v16i16 (add LASX256H:$a, LASX256H:$b)), ++ (v16i16 (srl ++ (v16i16 (add LASX256H:$a, LASX256H:$b)), ++ (v16i16 (build_vector (i32 15),(i32 15),(i32 15),(i32 15), ++ (i32 15),(i32 15),(i32 15),(i32 15), ++ (i32 15),(i32 15),(i32 15),(i32 15), ++ (i32 15),(i32 15),(i32 15),(i32 15)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ ))), ++ (XVAVG_H (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>; ++ ++def : LASXPat<(sra ++ (v8i32 (add ++ (v8i32 (add LASX256W:$a, LASX256W:$b)), ++ (v8i32 (srl ++ (v8i32 (add LASX256W:$a, LASX256W:$b)), ++ (v8i32 (build_vector (i32 31),(i32 31),(i32 31),(i32 31), ++ (i32 31),(i32 31),(i32 31),(i32 31)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ ))), ++ (XVAVG_W (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>; ++ ++def : LASXPat<(sra ++ (v4i64 (add ++ (v4i64 (add LASX256D:$a, LASX256D:$b)), ++ (v4i64 (srl ++ (v4i64 (add LASX256D:$a, LASX256D:$b)), ++ (v4i64 (build_vector (i64 63),(i64 63),(i64 63),(i64 63))) ++ ) ++ ) ++ ) ++ ), ++ (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)))), ++ (XVAVG_D (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>; ++ ++ ++ ++def : LASXPat<(srl ++ (v32i8 (add LASX256B:$a, LASX256B:$b)), ++ (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (XVAVG_BU (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>; ++ ++def : LASXPat<(srl ++ (v16i16 (add LASX256H:$a, LASX256H:$b)), ++ (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (XVAVG_HU (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>; ++ ++def : LASXPat<(srl ++ (v8i32 (add LASX256W:$a, LASX256W:$b)), ++ (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (XVAVG_WU (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>; ++ ++def : LASXPat<(srl ++ (v4i64 (add LASX256D:$a, LASX256D:$b)), ++ (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)) ++ ) ++ ), ++ (XVAVG_DU (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>; ++ ++ ++ ++def : LASXPat<(sra ++ (v32i8 (add ++ (v32i8 (add (v32i8 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v32i8 (add LASX256B:$a, LASX256B:$b)) ++ )), ++ (v32i8 (srl ++ (v32i8 ( add (v32i8( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v32i8 (add LASX256B:$a, LASX256B:$b)) ++ )), ++ (v32i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ ))), ++ (XVAVGR_B (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>; ++ ++ ++def : LASXPat<(sra ++ (v16i16 (add ++ (v16i16 (add (v16i16 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v16i16 (add LASX256H:$a, LASX256H:$b)) ++ )), ++ (v16i16 (srl ++ (v16i16 (add (v16i16 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v16i16 (add LASX256H:$a, LASX256H:$b)) ++ )), ++ (v16i16 (build_vector ++ (i32 15),(i32 15),(i32 15),(i32 15), ++ (i32 15),(i32 15),(i32 15),(i32 15), ++ (i32 15),(i32 15),(i32 15),(i32 15), ++ (i32 15),(i32 15),(i32 15),(i32 15)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ ))), ++ (XVAVGR_H (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>; ++ ++ ++def : LASXPat<(sra ++ (v8i32 (add ++ (v8i32 (add (v8i32 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v8i32 (add LASX256W:$a, LASX256W:$b)) ++ )), ++ (v8i32 (srl ++ (v8i32 (add (v8i32 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v8i32 (add LASX256W:$a, LASX256W:$b)) ++ )), ++ (v8i32 (build_vector ++ (i32 31),(i32 31),(i32 31),(i32 31), ++ (i32 31),(i32 31),(i32 31),(i32 31) ++ ) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)))), ++ (XVAVGR_W (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>; ++ ++def : LASXPat<(sra ++ (v4i64 (add ++ (v4i64 (add (v4i64 ( ++ build_vector (i64 1),(i64 1),(i64 1),(i64 1) ++ )), ++ (v4i64 (add LASX256D:$a, LASX256D:$b)) ++ )), ++ (v4i64 (srl ++ (v4i64 (add (v4i64 ( ++ build_vector (i64 1),(i64 1),(i64 1),(i64 1) ++ )), ++ (v4i64 (add LASX256D:$a, LASX256D:$b)) ++ )), ++ (v4i64 (build_vector ++ (i64 63),(i64 63),(i64 63),(i64 63))) ++ ) ++ ) ++ ) ++ ), ++ (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)))), ++ (XVAVGR_D (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>; ++ ++ ++ ++def : LASXPat<(srl ++ (v32i8 (add (v32i8 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v32i8 (add LASX256B:$a, LASX256B:$b)) ++ )), ++ (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (XVAVGR_BU (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>; ++ ++def : LASXPat<(srl ++ (v16i16 (add (v16i16 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v16i16 (add LASX256H:$a, LASX256H:$b)) ++ )), ++ (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (XVAVGR_HU (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>; ++ ++def : LASXPat<(srl ++ (v8i32 (add (v8i32 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v8i32 (add LASX256W:$a, LASX256W:$b)) ++ )), ++ (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (XVAVGR_WU (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>; ++ ++def : LASXPat<(srl ++ (v4i64 (add (v4i64 ( ++ build_vector (i64 1),(i64 1),(i64 1),(i64 1) ++ )), ++ (v4i64 (add LASX256D:$a, LASX256D:$b)) ++ )), ++ (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)) ++ ) ++ ), ++ (XVAVGR_DU (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>; ++ ++ ++def : LASXPat<(mulhs LASX256D:$a, LASX256D:$b), ++ (XVMUH_D LASX256D:$a, LASX256D:$b)>; ++ ++def : LASXPat<(mulhs LASX256W:$a, LASX256W:$b), ++ (XVMUH_W LASX256W:$a, LASX256W:$b)>; ++ ++def : LASXPat<(mulhs LASX256H:$a, LASX256H:$b), ++ (XVMUH_H LASX256H:$a, LASX256H:$b)>; ++ ++def : LASXPat<(mulhs LASX256B:$a, LASX256B:$b), ++ (XVMUH_B LASX256B:$a, LASX256B:$b)>; ++ ++ ++def : LASXPat<(mulhu LASX256D:$a, LASX256D:$b), ++ (XVMUH_DU LASX256D:$a, LASX256D:$b)>; ++ ++def : LASXPat<(mulhu LASX256W:$a, LASX256W:$b), ++ (XVMUH_WU LASX256W:$a, LASX256W:$b)>; ++ ++def : LASXPat<(mulhu LASX256H:$a, LASX256H:$b), ++ (XVMUH_HU LASX256H:$a, LASX256H:$b)>; ++ ++def : LASXPat<(mulhu LASX256B:$a, LASX256B:$b), ++ (XVMUH_BU LASX256B:$a, LASX256B:$b)>; ++ ++ ++def : LASXPat<(LoongArchINSVE (v8i32 LASX256W:$a), (v8i32 LASX256W:$b), uimm3:$ui3), ++ (XVINSVE0_W LASX256W:$a, LASX256W:$b, uimm3:$ui3)>; ++ ++def : LASXPat<(LoongArchINSVE (v4i64 LASX256D:$a), (v4i64 LASX256D:$b), uimm2:$ui2), ++ (XVINSVE0_D LASX256D:$a, LASX256D:$b, uimm2:$ui2)>; ++ ++ ++def : LASXPat<(LoongArchXVPICKVE (v8i32 (bitconvert (v32i8 (build_vector ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0) ++ )))), (v8i32 LASX256W:$b), uimm3:$ui3), ++ (XVPICKVE_W (v8i32 (IMPLICIT_DEF)), LASX256W:$b, uimm3:$ui3)>; ++ ++def : LASXPat<(LoongArchXVPICKVE (v4i64 (bitconvert (v32i8 (build_vector ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0) ++ )))), (v4i64 LASX256D:$b), uimm2:$ui2), ++ (XVPICKVE_D (v4i64 (IMPLICIT_DEF)), LASX256D:$b, uimm2:$ui2)>; ++ ++ ++def : LASXPat<(LoongArchXVPICKVE (v8i32 (build_vector ++ (i32 0),(i32 0),(i32 0),(i32 0), ++ (i32 0),(i32 0),(i32 0),(i32 0) ++ )), (v8i32 LASX256W:$b), uimm3:$ui3), ++ (XVPICKVE_W (v8i32 (IMPLICIT_DEF)), LASX256W:$b, uimm3:$ui3)>; ++ ++def : LASXPat<(LoongArchXVPICKVE (v4i64 (build_vector ++ (i64 0),(i64 0),(i64 0),(i64 0) ++ )), (v4i64 LASX256D:$b), uimm2:$ui2), ++ (XVPICKVE_D (v4i64 (IMPLICIT_DEF)), LASX256D:$b, uimm2:$ui2)>; ++ ++ ++def : LASXPat<(LoongArchXVPICKVE (v8i32 LASX256W:$a), (v8i32 LASX256W:$b), uimm3:$ui3), ++ (XVPICKVE_W LASX256W:$a, LASX256W:$b, uimm3:$ui3)>; ++ ++def : LASXPat<(LoongArchXVPICKVE (v4i64 LASX256D:$a), (v4i64 LASX256D:$b), uimm2:$ui2), ++ (XVPICKVE_D LASX256D:$a, LASX256D:$b, uimm2:$ui2)>; ++ ++ ++def : LASXPat<(LoongArchXVSHUF4I (v4i64 LASX256D:$a), (v4i64 LASX256D:$b), uimm8_32:$ui8), ++ (XVSHUF4I_D LASX256D:$a, LASX256D:$b, uimm8_32:$ui8)>; ++ ++def : LASXPat<(LoongArchXVPERMI (v4i64 LASX256D:$a), uimm8_32:$ui8), ++ (XVPERMI_D LASX256D:$a, uimm8_32:$ui8)>; ++ ++ ++ ++ ++//===----------------------------------------------------------------------===// ++// Intrinsics ++//===----------------------------------------------------------------------===// ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cor_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_COR_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cor_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_COR_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cun_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_CUN_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cun_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_CUN_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cune_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_CUNE_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cune_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_CUNE_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cueq_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_CUEQ_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cueq_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_CUEQ_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_ceq_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_CEQ_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_ceq_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_CEQ_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cne_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_CNE_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cne_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_CNE_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_clt_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_CLT_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_clt_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_CLT_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cult_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_CULT_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cult_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_CULT_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cle_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_CLE_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cle_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_CLE_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cule_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFCMP_CULE_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfcmp_cule_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFCMP_CULE_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvseq_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSEQ_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvseq_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSEQ_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvseq_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSEQ_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvseq_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSEQ_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvsle_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSLE_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsle_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSLE_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsle_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSLE_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsle_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSLE_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvsle_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSLE_BU LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsle_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSLE_HU LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsle_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSLE_WU LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsle_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSLE_DU LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvslt_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSLT_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvslt_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSLT_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvslt_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSLT_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvslt_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSLT_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvslt_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSLT_BU LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvslt_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSLT_HU LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvslt_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSLT_WU LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvslt_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSLT_DU LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvadd_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVADD_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvadd_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVADD_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvadd_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVADD_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvadd_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVADD_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvsub_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSUB_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsub_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSUB_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsub_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSUB_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsub_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSUB_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvmax_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVMAX_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmax_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVMAX_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmax_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVMAX_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmax_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVMAX_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvmin_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVMIN_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmin_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVMIN_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmin_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVMIN_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmin_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVMIN_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvmin_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVMIN_BU LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmin_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVMIN_HU LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmin_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVMIN_WU LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmin_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVMIN_DU LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvmul_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVMUL_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmul_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVMUL_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmul_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVMUL_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmul_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVMUL_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvdiv_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVDIV_BU LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvdiv_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVDIV_HU LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvdiv_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVDIV_WU LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvdiv_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVDIV_DU LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvsll_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSLL_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsll_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSLL_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsll_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSLL_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsll_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSLL_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvsrl_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSRL_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsrl_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSRL_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsrl_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSRL_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsrl_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSRL_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvsra_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSRA_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsra_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSRA_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsra_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSRA_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsra_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSRA_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfadd_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFADD_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfadd_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFADD_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfsub_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFSUB_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfsub_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFSUB_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfmul_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFMUL_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfmul_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFMUL_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfdiv_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), ++ (XVFDIV_S LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvfdiv_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), ++ (XVFDIV_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfsqrt_s (v8f32 LASX256W:$xj)), ++ (XVFSQRT_S LASX256W:$xj)>; ++def : LASXPat<(int_loongarch_lasx_xvfsqrt_d (v4f64 LASX256D:$xj)), ++ (XVFSQRT_D LASX256D:$xj)>; ++ ++def : LASXPat<(v8f32 (int_loongarch_lasx_xvffint_s_w (v8i32 LASX256W:$xj))), ++ (XVFFINT_S_W (v8i32 LASX256W:$xj))>; ++def : LASXPat<(v8f32 (int_loongarch_lasx_xvffint_s_wu (v8i32 LASX256W:$xj))), ++ (XVFFINT_S_WU (v8i32 LASX256W:$xj))>; ++ ++def : LASXPat<(v4f64 (int_loongarch_lasx_xvffint_d_l (v4i64 LASX256D:$xj))), ++ (XVFFINT_D_L (v4i64 LASX256D:$xj))>; ++def : LASXPat<(v4f64 (int_loongarch_lasx_xvffint_d_lu (v4i64 LASX256D:$xj))), ++ (XVFFINT_D_LU (v4i64 LASX256D:$xj))>; ++ ++def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_b GPR32Opnd:$rj), ++ (XVREPLGR2VR_B GPR32Opnd:$rj)>; ++def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_h GPR32Opnd:$rj), ++ (XVREPLGR2VR_H GPR32Opnd:$rj)>; ++def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_w GPR32Opnd:$rj), ++ (XVREPLGR2VR_W GPR32Opnd:$rj)>; ++def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_d GPR64Opnd:$rj), ++ (XVREPLGR2VR_D GPR64Opnd:$rj)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvpickve2gr_w (v8i32 LASX256W:$xj), (immZExt3:$ui3)), ++ (XVPICKVE2GR_W LASX256W:$xj, uimm3:$ui3)>; ++def : LASXPat<(int_loongarch_lasx_xvpickve2gr_d (v4i64 LASX256D:$xj), (immZExt2:$ui2)), ++ (XVPICKVE2GR_D LASX256D:$xj, uimm2:$ui2)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvpickve2gr_wu (v8i32 LASX256W:$xj), (immZExt3:$ui3)), ++ (XVPICKVE2GR_WU LASX256W:$xj, uimm3:$ui3)>; ++def : LASXPat<(int_loongarch_lasx_xvpickve2gr_du (v4i64 LASX256D:$xj), (immZExt2:$ui2)), ++ (XVPICKVE2GR_DU LASX256D:$xj, uimm2:$ui2)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvreplve0_d (v4i64 LASX256D:$xj)), ++ (XVREPLVE0_D (v4i64 LASX256D:$xj))>; ++ ++def : LASXPat<(int_loongarch_lasx_xvinsgr2vr_w (v8i32 LASX256W:$xj), GPR32Opnd:$rj, (immZExt3:$ui3)), ++ (XVINSGR2VR_W LASX256W:$xj, GPR32Opnd:$rj, uimm3:$ui3)>; ++def : LASXPat<(int_loongarch_lasx_xvinsgr2vr_d (v4i64 LASX256D:$xj), GPR64Opnd:$rj, (immZExt2:$ui2)), ++ (XVINSGR2VR_D LASX256D:$xj, GPR64Opnd:$rj, uimm2:$ui2)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvpickve_w (v8i32 LASX256W:$xj), (immZExt3:$ui3)), ++ (XVPICKVE_W (v8i32 (IMPLICIT_DEF)), LASX256W:$xj, uimm3:$ui3)>; ++def : LASXPat<(int_loongarch_lasx_xvpickve_d (v4i64 LASX256D:$xj), (immZExt2:$ui2)), ++ (XVPICKVE_D (v4i64 (IMPLICIT_DEF)), LASX256D:$xj, uimm2:$ui2)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvdiv_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVDIV_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvdiv_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVDIV_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvdiv_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVDIV_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvdiv_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVDIV_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvmod_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVMOD_BU LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmod_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVMOD_HU LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmod_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVMOD_WU LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmod_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVMOD_DU LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvmod_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVMOD_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmod_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVMOD_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmod_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVMOD_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmod_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVMOD_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvmax_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVMAX_BU LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmax_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVMAX_HU LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmax_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVMAX_WU LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmax_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVMAX_DU LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvfrint_s (v8f32 LASX256W:$xj)), ++ (XVFRINT_S LASX256W:$xj)>; ++def : LASXPat<(int_loongarch_lasx_xvfrint_d (v4f64 LASX256D:$xj)), ++ (XVFRINT_D LASX256D:$xj)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvpackod_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVPACKOD_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpackod_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVPACKOD_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpackod_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVPACKOD_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpackod_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVPACKOD_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvpackev_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVPACKEV_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpackev_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVPACKEV_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpackev_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVPACKEV_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpackev_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVPACKEV_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvilvh_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVILVH_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvilvh_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVILVH_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvilvh_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVILVH_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvilvh_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVILVH_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvilvl_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVILVL_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvilvl_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVILVL_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvilvl_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVILVL_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvilvl_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVILVL_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvpickev_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVPICKEV_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpickev_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVPICKEV_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpickev_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVPICKEV_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpickev_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVPICKEV_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvpickod_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVPICKOD_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpickod_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVPICKOD_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpickod_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVPICKOD_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvpickod_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVPICKOD_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvsadd_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSADD_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsadd_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSADD_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsadd_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSADD_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsadd_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSADD_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvssub_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSSUB_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvssub_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSSUB_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvssub_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSSUB_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvssub_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSSUB_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvsadd_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSADD_BU LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsadd_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSADD_HU LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsadd_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSADD_WU LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvsadd_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSADD_DU LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvssub_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVSSUB_BU LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvssub_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVSSUB_HU LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvssub_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVSSUB_WU LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvssub_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVSSUB_DU LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvmadd_b (v32i8 LASX256B:$xd_in), (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVMADD_B LASX256B:$xd_in, LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmadd_h (v16i16 LASX256H:$xd_in), (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVMADD_H LASX256H:$xd_in, LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmadd_w (v8i32 LASX256W:$xd_in), (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVMADD_W LASX256W:$xd_in, LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmadd_d (v4i64 LASX256D:$xd_in), (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVMADD_D LASX256D:$xd_in, LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvmsub_b (v32i8 LASX256B:$xd_in), (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVMSUB_B LASX256B:$xd_in, LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmsub_h (v16i16 LASX256H:$xd_in), (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVMSUB_H LASX256H:$xd_in, LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmsub_w (v8i32 LASX256W:$xd_in), (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVMSUB_W LASX256W:$xd_in, LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvmsub_d (v4i64 LASX256D:$xd_in), (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVMSUB_D LASX256D:$xd_in, LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(v8i32 (int_loongarch_lasx_xvftintrz_wu_s (v8f32 LASX256W:$xj))), ++ (XVFTINTRZ_WU_S (v8f32 LASX256W:$xj))>; ++def : LASXPat<(v4i64 (int_loongarch_lasx_xvftintrz_lu_d (v4f64 LASX256D:$xj))), ++ (XVFTINTRZ_LU_D (v4f64 LASX256D:$xj))>; ++ ++def : LASXPat<(v8i32 (int_loongarch_lasx_xvftintrz_w_s (v8f32 LASX256W:$xj))), ++ (XVFTINTRZ_W_S (v8f32 LASX256W:$xj))>; ++def : LASXPat<(v4i64 (int_loongarch_lasx_xvftintrz_l_d (v4f64 LASX256D:$xj))), ++ (XVFTINTRZ_L_D (v4f64 LASX256D:$xj))>; ++ ++def : LASXPat<(int_loongarch_lasx_xvbitclr_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), ++ (XVBITCLR_B LASX256B:$xj, LASX256B:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvbitclr_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), ++ (XVBITCLR_H LASX256H:$xj, LASX256H:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvbitclr_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), ++ (XVBITCLR_W LASX256W:$xj, LASX256W:$xk)>; ++def : LASXPat<(int_loongarch_lasx_xvbitclr_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), ++ (XVBITCLR_D LASX256D:$xj, LASX256D:$xk)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvclz_b (v32i8 LASX256B:$xj)), ++ (XVCLZ_B LASX256B:$xj)>; ++def : LASXPat<(int_loongarch_lasx_xvclz_h (v16i16 LASX256H:$xj)), ++ (XVCLZ_H LASX256H:$xj)>; ++def : LASXPat<(int_loongarch_lasx_xvclz_w (v8i32 LASX256W:$xj)), ++ (XVCLZ_W LASX256W:$xj)>; ++def : LASXPat<(int_loongarch_lasx_xvclz_d (v4i64 LASX256D:$xj)), ++ (XVCLZ_D LASX256D:$xj)>; ++ ++def : LASXPat<(int_loongarch_lasx_xvpcnt_b (v32i8 LASX256B:$xj)), ++ (XVPCNT_B LASX256B:$xj)>; ++def : LASXPat<(int_loongarch_lasx_xvpcnt_h (v16i16 LASX256H:$xj)), ++ (XVPCNT_H LASX256H:$xj)>; ++def : LASXPat<(int_loongarch_lasx_xvpcnt_w (v8i32 LASX256W:$xj)), ++ (XVPCNT_W LASX256W:$xj)>; ++def : LASXPat<(int_loongarch_lasx_xvpcnt_d (v4i64 LASX256D:$xj)), ++ (XVPCNT_D LASX256D:$xj)>; ++ ++ ++def : LASXPat<(v32i8 (load (add iPTR:$xj, iPTR:$xk))), ++ (XVLDX PtrRC:$xj, PtrRC:$xk)>; ++ ++def : LASXPat<(store (v32i8 LASX256B:$xd), (add iPTR:$xj, iPTR:$xk)), ++ (XVSTX LASX256B:$xd, PtrRC:$xj, PtrRC:$xk)>; ++ ++ ++def : LASXPat<(v4i64 (sext_invec (v8i32 LASX256W:$xj))), ++ (VEXT2XV_D_W LASX256W:$xj)>; ++def : LASXPat<(v8i32 (sext_invec (v16i16 LASX256H:$xj))), ++ (VEXT2XV_W_H LASX256H:$xj)>; ++def : LASXPat<(v16i16 (sext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_H_B LASX256B:$xj)>; ++ ++ ++def : LASXPat<(v4i64 (zext_invec (v8i32 LASX256W:$xj))), ++ (VEXT2XV_DU_WU LASX256W:$xj)>; ++def : LASXPat<(v8i32 (zext_invec (v16i16 LASX256H:$xj))), ++ (VEXT2XV_WU_HU LASX256H:$xj)>; ++def : LASXPat<(v16i16 (zext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_HU_BU LASX256B:$xj)>; ++ ++ ++def : LASXPat<(v4i64 (sext_invec (v16i16 LASX256H:$xj))), ++ (VEXT2XV_D_H LASX256H:$xj)>; ++def : LASXPat<(v4i64 (sext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_D_B LASX256B:$xj)>; ++def : LASXPat<(v8i32 (sext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_W_B LASX256B:$xj)>; ++ ++ ++def : LASXPat<(v4i64 (zext_invec (v16i16 LASX256H:$xj))), ++ (VEXT2XV_DU_HU LASX256H:$xj)>; ++def : LASXPat<(v4i64 (zext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_DU_BU LASX256B:$xj)>; ++def : LASXPat<(v8i32 (zext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_WU_BU LASX256B:$xj)>; ++ ++ ++def : LASXPat<(v4i64 (sext_invec (v16i16 LASX256H:$xj))), ++ (VEXT2XV_D_H LASX256H:$xj)>; ++def : LASXPat<(v4i64 (sext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_D_B LASX256B:$xj)>; ++def : LASXPat<(v8i32 (sext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_W_B LASX256B:$xj)>; ++ ++def : LASXPat<(v4i64 (zext_invec (v16i16 LASX256H:$xj))), ++ (VEXT2XV_DU_HU LASX256H:$xj)>; ++def : LASXPat<(v4i64 (zext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_DU_BU LASX256B:$xj)>; ++def : LASXPat<(v8i32 (zext_invec (v32i8 LASX256B:$xj))), ++ (VEXT2XV_WU_BU LASX256B:$xj)>; ++ ++ ++def : LASXPat<(v16i16 (sext (v16i8 LSX128B:$vj))), ++ (VEXT2XV_H_B ++ (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$vj, sub_128))>; ++ ++def : LASXPat<(v8i32 (sext (v8i16 LSX128H:$vj))), ++ (VEXT2XV_W_H ++ (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$vj, sub_128))>; ++ ++def : LASXPat<(v4i64 (sext (v4i32 LSX128W:$vj))), ++ (VEXT2XV_D_W ++ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$vj, sub_128))>; ++ ++def : LASXPat<(v16i16 (zext (v16i8 LSX128B:$vj))), ++ (VEXT2XV_HU_BU ++ (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$vj, sub_128))>; ++ ++def : LASXPat<(v8i32 (zext (v8i16 LSX128H:$vj))), ++ (VEXT2XV_WU_HU ++ (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$vj, sub_128))>; ++ ++def : LASXPat<(v4i64 (zext (v4i32 LSX128W:$vj))), ++ (VEXT2XV_DU_WU ++ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$vj, sub_128))>; ++ ++ ++def : LASXPat<(xor ++ (v16i16 LASX256H:$xj), (xvsplati16 imm_mask) ++ ), ++ (XNOR_V_H_PSEUDO (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xj))>; ++ ++def : LASXPat<(xor ++ (v8i32 LASX256W:$xj), (xvsplati32 imm_mask) ++ ), ++ (XNOR_V_W_PSEUDO (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xj))>; ++ ++def : LASXPat<(xor ++ (v4i64 LASX256D:$xj), (xvsplati64 imm_mask_64) ++ ), ++ (XNOR_V_D_PSEUDO (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xj))>; ++ ++ ++def : LASXPat<(and ++ (v32i8 (xor (v32i8 LASX256B:$xj), (xvsplati8 imm_mask))), ++ (v32i8 LASX256B:$xk) ++ ), ++ (XVANDN_V (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk))>; ++ ++def : LASXPat<(and ++ (v16i16 (xor (v16i16 LASX256H:$xj), (xvsplati16 imm_mask))), ++ (v16i16 LASX256H:$xk) ++ ), ++ (XVANDN_H_PSEUDO (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk))>; ++ ++def : LASXPat<(and ++ (v8i32 (xor (v8i32 LASX256W:$xj), (xvsplati32 imm_mask))), ++ (v8i32 LASX256W:$xk) ++ ), ++ (XVANDN_W_PSEUDO (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk))>; ++ ++def : LASXPat<(and ++ (v4i64 (xor (v4i64 LASX256D:$xj), (xvsplati64 imm_mask_64))), ++ (v4i64 LASX256D:$xk) ++ ), ++ (XVANDN_D_PSEUDO (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk))>; ++ ++ ++def : LASXPat<(or ++ (v32i8 LASX256B:$xj), ++ (v32i8 (xor (v32i8 LASX256B:$xk), (xvsplati8 imm_mask))) ++ ), ++ (XVORN_V (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk))>; ++ ++def : LASXPat<(or ++ (v16i16 LASX256H:$xj), ++ (v16i16 (xor (v16i16 LASX256H:$xk), (xvsplati16 imm_mask))) ++ ), ++ (XVORN_H_PSEUDO (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk))>; ++ ++def : LASXPat<(or ++ (v8i32 LASX256W:$xj), ++ (v8i32 (xor (v8i32 LASX256W:$xk), (xvsplati32 imm_mask))) ++ ), ++ (XVORN_W_PSEUDO (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk))>; ++ ++def : LASXPat<(or ++ (v4i64 LASX256D:$xj), ++ (v4i64 (xor (v4i64 LASX256D:$xk), (xvsplati64 imm_mask_64))) ++ ), ++ (XVORN_D_PSEUDO (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk))>; ++ ++ ++def : LASXPat<(add (v4i64 (abs LASX256D:$a)), (v4i64 (abs LASX256D:$b))), ++ (XVADDA_D (v4i64 LASX256D:$a),(v4i64 LASX256D:$b))>; ++ ++def : LASXPat<(add (v8i32 (abs LASX256W:$a)), (v8i32 (abs LASX256W:$b))), ++ (XVADDA_W (v8i32 LASX256W:$a),(v8i32 LASX256W:$b))>; ++ ++def : LASXPat<(add (v16i16 (abs LASX256H:$a)), (v16i16 (abs LASX256H:$b))), ++ (XVADDA_H (v16i16 LASX256H:$a),(v16i16 LASX256H:$b))>; ++ ++def : LASXPat<(add (v32i8 (abs LASX256B:$a)), (v32i8 (abs LASX256B:$b))), ++ (XVADDA_B (v32i8 LASX256B:$a),(v32i8 LASX256B:$b))>; ++ ++ ++def : LASXPat<(and v32i8:$xj, (xor (shl xvsplat_imm_eq_1, v32i8:$xk), ++ (xvsplati8 imm_mask))), ++ (XVBITCLR_B v32i8:$xj, v32i8:$xk)>; ++ ++def : LASXPat<(and v16i16:$xj, (xor (shl xvsplat_imm_eq_1, v16i16:$xk), ++ (xvsplati16 imm_mask))), ++ (XVBITCLR_H v16i16:$xj, v16i16:$xk)>; ++ ++def : LASXPat<(and v8i32:$xj, (xor (shl xvsplat_imm_eq_1, v8i32:$xk), ++ (xvsplati32 imm_mask))), ++ (XVBITCLR_W v8i32:$xj, v8i32:$xk)>; ++ ++def : LASXPat<(and v4i64:$xj, (xor (shl xvsplat_imm_eq_1, v4i64:$xk), ++ (xvsplati64 imm_mask_64))), ++ (XVBITCLR_D v4i64:$xj, v4i64:$xk)>; ++ ++ ++def : LASXPat<(insert_subvector (v16i16 LASX256H:$dst), ++ (v8i16 LSX128H:$src), (i64 0)), ++ (XVPERMI_QH (v16i16 LASX256H:$dst), ++ (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), ++ LSX128H:$src, sub_128)), ++ (i32 48))>; ++ ++def : LASXPat<(insert_subvector (v8i32 LASX256W:$dst), ++ (v4i32 LSX128W:$src), (i64 0)), ++ (XVPERMI_QW (v8i32 LASX256W:$dst), ++ (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), ++ LSX128W:$src, sub_128)), ++ (i32 48))>; ++ ++def : LASXPat<(insert_subvector (v4i64 LASX256D:$dst), ++ (v2i64 LSX128D:$src), (i64 0)), ++ (XVPERMI_QD (v4i64 LASX256D:$dst), ++ (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), ++ LSX128D:$src, sub_128)), ++ (i32 48))>; +diff --git a/lib/Target/LoongArch/LoongArchLSXInstrFormats.td b/lib/Target/LoongArch/LoongArchLSXInstrFormats.td +new file mode 100644 +index 00000000..50df4d72 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchLSXInstrFormats.td +@@ -0,0 +1,449 @@ ++//===- LoongArchLSXInstrFormats.td - LoongArch LSX Instruction Formats ---*- tablegen -*-===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++ ++class LSXInst : InstLA<(outs), (ins), "", [], FrmOther>, ++ EXT_LSX { ++} ++ ++class LSXCBranch : LSXInst { ++} ++ ++class LSXSpecial : LSXInst { ++} ++ ++class LSXPseudo pattern>: ++ LoongArchPseudo { ++ let Predicates = [HasLSX]; ++} ++ ++class LSX_3R op>: LSXInst { ++ bits<5> vk; ++ bits<5> vj; ++ bits<5> vd; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = vk; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_3R_1GP op>: LSXInst { ++ bits<5> rk; ++ bits<5> vj; ++ bits<5> vd; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = rk; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I5 op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<5> si5; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = si5; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I5_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<5> ui5; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = ui5; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_2R op>: LSXInst { ++ bits<5> vj; ++ bits<5> vd; ++ ++ let Inst{31-10} = op; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_2R_1GP op>: LSXInst { ++ bits<5> rj; ++ bits<5> vd; ++ ++ let Inst{31-10} = op; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I1_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<1> ui1; ++ ++ let Inst{31-11} = op; ++ let Inst{10} = ui1; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I2_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<2> ui2; ++ ++ let Inst{31-12} = op; ++ let Inst{11-10} = ui2; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I3_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<3> ui3; ++ ++ let Inst{31-13} = op; ++ let Inst{12-10} = ui3; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I4_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<4> ui4; ++ ++ let Inst{31-14} = op; ++ let Inst{13-10} = ui4; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I6_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<6> ui6; ++ ++ let Inst{31-16} = op; ++ let Inst{15-10} = ui6; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I1_R_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> rj; ++ bits<1> ui1; ++ ++ let Inst{31-11} = op; ++ let Inst{10} = ui1; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I2_R_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> rj; ++ bits<2> ui2; ++ ++ let Inst{31-12} = op; ++ let Inst{11-10} = ui2; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I3_R_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> rj; ++ bits<3> ui3; ++ ++ let Inst{31-13} = op; ++ let Inst{12-10} = ui3; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I4_R_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> rj; ++ bits<4> ui4; ++ ++ let Inst{31-14} = op; ++ let Inst{13-10} = ui4; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_ELM_COPY_B op>: LSXInst { ++ bits<5> rd; ++ bits<5> vj; ++ bits<4> ui4; ++ ++ let Inst{31-14} = op; ++ let Inst{13-10} = ui4; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = rd; ++} ++ ++class LSX_ELM_COPY_H op>: LSXInst { ++ bits<5> rd; ++ bits<5> vj; ++ bits<3> ui3; ++ ++ let Inst{31-13} = op; ++ let Inst{12-10} = ui3; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = rd; ++} ++ ++class LSX_ELM_COPY_W op>: LSXInst { ++ bits<5> rd; ++ bits<5> vj; ++ bits<2> ui2; ++ ++ let Inst{31-12} = op; ++ let Inst{11-10} = ui2; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = rd; ++} ++ ++class LSX_ELM_COPY_D op>: LSXInst { ++ bits<5> rd; ++ bits<5> vj; ++ bits<1> ui1; ++ ++ let Inst{31-11} = op; ++ let Inst{10} = ui1; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = rd; ++} ++ ++class LSX_I8_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<8> ui8; ++ ++ let Inst{31-18} = op; ++ let Inst{17-10} = ui8; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I7_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<7> ui7; ++ ++ let Inst{31-17} = op; ++ let Inst{16-10} = ui7; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I12_S op>: LSXInst { ++ bits<5> vd; ++// bits<5> rj; ++// bits<12> si12; ++ bits<17> addr; ++ ++ let Inst{31-22} = op; ++ let Inst{21-10} = addr{11-0}; ++ let Inst{9-5} = addr{16-12}; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_SI12_S op>: LSXInst { ++ bits<5> vd; ++ bits<17> addr; ++ ++ let Inst{31-22} = op; ++ let Inst{21-10} = addr{11-0}; ++ let Inst{9-5} = addr{16-12}; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_SI11_S op>: LSXInst { ++ bits<5> vd; ++ bits<16> addr; ++ ++ let Inst{31-21} = op; ++ let Inst{20-10} = addr{10-0}; ++ let Inst{9-5} = addr{15-11}; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_SI10_S op>: LSXInst { ++ bits<5> vd; ++ bits<15> addr; ++ ++ let Inst{31-20} = op; ++ let Inst{19-10} = addr{9-0}; ++ let Inst{9-5} = addr{14-10}; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_SI9_S op>: LSXInst { ++ bits<5> vd; ++ bits<14> addr; ++ ++ let Inst{31-19} = op; ++ let Inst{18-10} = addr{8-0}; ++ let Inst{9-5} = addr{13-9}; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_SET op>: LSXInst { ++ bits<5> vj; ++ bits<3> cd; ++ ++ let Inst{31-10} = op; ++ let Inst{9-5} = vj; ++ let Inst{4-3} = 0b00; ++ let Inst{2-0} = cd; ++} ++ ++class LSX_VR4MUL op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<5> vk; ++ bits<5> va; ++ ++ let Inst{31-20} = op; ++ let Inst{19-15} = va; ++ let Inst{14-10} = vk; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_VFCMP op>: LSXInst { ++ bits<5> vd; ++ bits<5> vj; ++ bits<5> vk; ++ bits<5> cond; ++ ++ let Inst{31-20} = op; ++ let Inst{19-15} = cond; ++ let Inst{14-10} = vk; ++ let Inst{9-5} = vj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_Addr_SI8_idx1 op>: LSXInst { ++ bits<5> vd; ++ bits<13> addr; ++ bits<1> idx; ++ ++ let Inst{31-19} = op; ++ let Inst{18-11} = addr{7-0}; ++ let Inst{10} = idx; ++ let Inst{9-5} = addr{12-8}; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_SI8_idx1 op>: LSXInst { ++ bits<5> vd; ++ bits<5> rj; ++ bits<8> si8; ++ bits<1> idx; ++ ++ let Inst{31-19} = op; ++ let Inst{18} = idx; ++ let Inst{17-10} = si8; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_SI8_idx2 op>: LSXInst { ++ bits<5> vd; ++ bits<5> rj; ++ bits<8> si8; ++ bits<2> idx; ++ ++ let Inst{31-20} = op; ++ let Inst{19-18} = idx; ++ let Inst{17-10} = si8; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_SI8_idx3 op>: LSXInst { ++ bits<5> vd; ++ bits<5> rj; ++ bits<8> si8; ++ bits<3> idx; ++ ++ let Inst{31-21} = op; ++ let Inst{20-18} = idx; ++ let Inst{17-10} = si8; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_SI8_idx4 op>: LSXInst { ++ bits<5> vd; ++ bits<5> rj; ++ bits<8> si8; ++ bits<4> idx; ++ ++ let Inst{31-22} = op; ++ let Inst{21-18} = idx; ++ let Inst{17-10} = si8; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_3R_2GP op>: LSXInst { ++ bits<5> rk; ++ bits<5> rj; ++ bits<5> vd; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = rk; ++ let Inst{9-5} = rj; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_I5_mode_U op>: LSXInst { ++ bits<5> vd; ++ bits<5> mode; ++ bits<5> ui5; ++ ++ let Inst{31-15} = op; ++ let Inst{14-10} = ui5; ++ let Inst{9-5} = mode; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_1R_I13 op>: LSXInst { ++ bits<13> i13; ++ bits<5> vd; ++ ++ let Inst{31-18} = op; ++ let Inst{17-5} = i13; ++ let Inst{4-0} = vd; ++} ++ ++class LSX_1R_I13_I10 op>: LSXInst { ++ bits<10> i10; ++ bits<5> vd; ++ ++ let Inst{31-15} = op; ++ let Inst{14-5} = i10; ++ let Inst{4-0} = vd; ++} ++ ++ ++ ++ ++ ++ ++ +diff --git a/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +new file mode 100644 +index 00000000..69fdc3a8 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -0,0 +1,5904 @@ ++//===- LoongArchLSXInstrInfo.td - LSX instructions -*- tablegen ------------*-=// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file describes LoongArch LSX instructions. ++// ++//===----------------------------------------------------------------------===// ++ ++def SDT_LoongArchVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; ++def SDT_VSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>, ++ SDTCisInt<1>, ++ SDTCisSameAs<1, 2>, ++ SDTCisVT<3, OtherVT>]>; ++def SDT_VFSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>, ++ SDTCisFP<1>, ++ SDTCisSameAs<1, 2>, ++ SDTCisVT<3, OtherVT>]>; ++def SDT_VSHF : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>, ++ SDTCisInt<1>, SDTCisVec<1>, ++ SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>; ++def SDT_SHF : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, ++ SDTCisVT<1, i32>, SDTCisSameAs<0, 2>]>; ++def SDT_ILV : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, ++ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; ++def SDTVABSD : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, ++ SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; ++ ++def SDT_VBROADCAST : SDTypeProfile<1, 1, [SDTCisVec<0>]>; ++def LoongArchVBROADCAST : SDNode<"LoongArchISD::VBROADCAST", SDT_VBROADCAST>; ++ ++def LoongArchVAllNonZero : SDNode<"LoongArchISD::VALL_NONZERO", SDT_LoongArchVecCond>; ++def LoongArchVAnyNonZero : SDNode<"LoongArchISD::VANY_NONZERO", SDT_LoongArchVecCond>; ++def LoongArchVAllZero : SDNode<"LoongArchISD::VALL_ZERO", SDT_LoongArchVecCond>; ++def LoongArchVAnyZero : SDNode<"LoongArchISD::VANY_ZERO", SDT_LoongArchVecCond>; ++def LoongArchVNOR : SDNode<"LoongArchISD::VNOR", SDTIntBinOp, ++ [SDNPCommutative, SDNPAssociative]>; ++def LoongArchVSHF : SDNode<"LoongArchISD::VSHF", SDT_VSHF>; ++def LoongArchSHF : SDNode<"LoongArchISD::SHF", SDT_SHF>; ++def LoongArchVPACKEV : SDNode<"LoongArchISD::VPACKEV", SDT_ILV>; ++def LoongArchVPACKOD : SDNode<"LoongArchISD::VPACKOD", SDT_ILV>; ++def LoongArchVILVH : SDNode<"LoongArchISD::VILVH", SDT_ILV>; ++def LoongArchVILVL : SDNode<"LoongArchISD::VILVL", SDT_ILV>; ++def LoongArchVPICKEV : SDNode<"LoongArchISD::VPICKEV", SDT_ILV>; ++def LoongArchVPICKOD : SDNode<"LoongArchISD::VPICKOD", SDT_ILV>; ++def LoongArchVABSD : SDNode<"LoongArchISD::VABSD", SDTVABSD>; ++def LoongArchUVABSD : SDNode<"LoongArchISD::UVABSD", SDTVABSD>; ++ ++def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>; ++def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>; ++ ++def LoongArchVExtractSExt : SDNode<"LoongArchISD::VEXTRACT_SEXT_ELT", ++ SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; ++def LoongArchVExtractZExt : SDNode<"LoongArchISD::VEXTRACT_ZEXT_ELT", ++ SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; ++ ++def immZExt1Ptr : ImmLeaf(Imm);}]>; ++def immZExt2Ptr : ImmLeaf(Imm);}]>; ++def immZExt3Ptr : ImmLeaf(Imm);}]>; ++def immZExt4Ptr : ImmLeaf(Imm);}]>; ++def immZExt5Ptr : ImmLeaf(Imm);}]>; ++def immZExt8 : ImmLeaf(Imm);}]>; ++def immZExt7 : PatLeaf<(imm), [{ return isUInt<7>(N->getZExtValue()); }]>; ++def immZExt6 : ImmLeaf; ++def immZExt4 : ImmLeaf(Imm);}]>; ++def immZExt3 : ImmLeaf(Imm);}]>; ++def immZExt2 : ImmLeaf(Imm);}]>; ++def immZExt1 : ImmLeaf(Imm);}]>; ++def immSExt12_l : ImmLeaf(Imm);}]>; ++def immSExt11Ptr : ImmLeaf(Imm);}]>; ++ ++def immSExt11_1 : ImmLeaf(Imm<<1);}]>; ++def immSExt10Ptr : ImmLeaf(Imm);}]>; ++def immSExt10_2 : ImmLeaf(Imm<<2);}]>; ++def immSExt9Ptr : ImmLeaf(Imm);}]>; ++def immSExt9_3 : ImmLeaf(Imm<<3);}]>; ++def immSExt8 : ImmLeaf(Imm);}]>; ++def immSExt5 : ImmLeaf(Imm);}]>; ++def immSExt8_1 : ImmLeaf(Imm<<1);}]>; ++def immSExt8_2 : ImmLeaf(Imm<<2);}]>; ++def immSExt8_3 : ImmLeaf(Imm<<3);}]>; ++ ++def addrimm10 : ComplexPattern; ++def addrimm10lsl2 : ComplexPattern; ++def addrimm9lsl3 : ComplexPattern; ++def addrimm11lsl1 : ComplexPattern; ++ ++ ++class SimmLslAsmOperandClass Supers = [], ++ int Shift = 0> : AsmOperandClass { ++ let Name = "Simm" # Bits # "_Lsl" # Shift; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isScaledSImm<" # Bits # ", " # Shift # ">"; ++ let SuperClasses = Supers; ++ let DiagnosticType = "SImm" # Bits # "_Lsl" # Shift; ++} ++ ++def Simm11Lsl1AsmOperand ++ : SimmLslAsmOperandClass<11, [], 1>; ++ ++def immSExt11_1_O : Operand { ++ let EncoderMethod = "getSImm11Lsl1Encoding"; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<2>"; ++ let ParserMatchClass = Simm11Lsl1AsmOperand; ++} ++ ++def Simm10Lsl2AsmOperand ++ : SimmLslAsmOperandClass<10, [], 2>; ++ ++def immSExt10_2_O : Operand { ++ let EncoderMethod = "getSImm10Lsl2Encoding"; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<4>"; ++ let ParserMatchClass = Simm10Lsl2AsmOperand; ++} ++ ++def Simm9Lsl3AsmOperand ++ : SimmLslAsmOperandClass<9, [], 3>; ++ ++def immSExt9_3_O : Operand { ++ let EncoderMethod = "getSImm9Lsl3Encoding"; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>"; ++ let ParserMatchClass = Simm9Lsl3AsmOperand; ++} ++ ++def Simm8Lsl3AsmOperand ++ : SimmLslAsmOperandClass<8, [], 3>; ++ ++def immSExt8_3_O : Operand { ++ let EncoderMethod = "getSImm8Lsl3Encoding"; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>"; ++ let ParserMatchClass = Simm8Lsl3AsmOperand; ++} ++ ++def Simm8Lsl2AsmOperand ++ : SimmLslAsmOperandClass<8, [], 2>; ++ ++def immSExt8_2_O : Operand { ++ let EncoderMethod = "getSImm8Lsl2Encoding"; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<4>"; ++ let ParserMatchClass = Simm8Lsl2AsmOperand; ++} ++ ++def Simm8Lsl1AsmOperand ++ : SimmLslAsmOperandClass<8, [], 1>; ++ ++def immSExt8_1_O : Operand { ++ let EncoderMethod = "getSImm8Lsl1Encoding"; ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<2>"; ++ let ParserMatchClass = Simm8Lsl1AsmOperand; ++} ++ ++ ++class ConstantSImmAsmOperandClass Supers = [], ++ int Offset = 0> : AsmOperandClass { ++ let Name = "ConstantSImm" # Bits # "_" # Offset; ++ let RenderMethod = "addConstantSImmOperands<" # Bits # ", " # Offset # ">"; ++ let PredicateMethod = "isConstantSImm<" # Bits # ", " # Offset # ">"; ++ let SuperClasses = Supers; ++ let DiagnosticType = "SImm" # Bits # "_" # Offset; ++} ++ ++class ConstantUImmRangeAsmOperandClass Supers = []> ++ : AsmOperandClass { ++ let Name = "ConstantUImmRange" # Bottom # "_" # Top; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isConstantUImmRange<" # Bottom # ", " # Top # ">"; ++ let SuperClasses = Supers; ++ let DiagnosticType = "UImmRange" # Bottom # "_" # Top; ++} ++ ++def SImm16RelaxedAsmOperandClass ++ : SImmAsmOperandClass<16, [UImm16RelaxedAsmOperandClass]> { ++ let Name = "SImm16_Relaxed"; ++ let PredicateMethod = "isAnyImm<16>"; ++ let DiagnosticType = "SImm16_Relaxed"; ++} ++ ++def ConstantSImm11Lsl1AsmOperandClass : AsmOperandClass { ++ let Name = "SImm11Lsl1"; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isScaledSImm<11, 1>"; ++ let SuperClasses = [SImm12Operand]; ++ let DiagnosticType = "SImm11_Lsl1"; ++} ++ ++def ConstantSImm9Lsl3AsmOperandClass : AsmOperandClass { ++ let Name = "SImm9Lsl3"; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isScaledSImm<9, 3>"; ++ let SuperClasses = [SImm12Operand]; ++ let DiagnosticType = "SImm9_Lsl3"; ++} ++ ++def ConstantSImm10Lsl2AsmOperandClass : AsmOperandClass { ++ let Name = "SImm10Lsl2"; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isScaledSImm<10, 2>"; ++ let SuperClasses = [SImm12Operand]; ++ let DiagnosticType = "SImm10_Lsl2"; ++} ++def ConstantSImm11AsmOperandClass ++ : ConstantSImmAsmOperandClass<11, [ConstantSImm10Lsl2AsmOperandClass]>; ++def ConstantSImm10Lsl1AsmOperandClass : AsmOperandClass { ++ let Name = "SImm10Lsl1"; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isScaledSImm<10, 1>"; ++ let SuperClasses = [ConstantSImm11AsmOperandClass]; ++ let DiagnosticType = "SImm10_Lsl1"; ++} ++def ConstantUImm10AsmOperandClass ++ : ConstantUImmAsmOperandClass<10, [ConstantSImm10Lsl1AsmOperandClass]>; ++def ConstantSImm10AsmOperandClass ++ : ConstantSImmAsmOperandClass<10, [ConstantUImm10AsmOperandClass]>; ++def ConstantSImm9AsmOperandClass ++ : ConstantSImmAsmOperandClass<9, [ConstantSImm10AsmOperandClass]>; ++def ConstantSImm7Lsl2AsmOperandClass : AsmOperandClass { ++ let Name = "SImm7Lsl2"; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isScaledSImm<7, 2>"; ++ let SuperClasses = [ConstantSImm9AsmOperandClass]; ++ let DiagnosticType = "SImm7_Lsl2"; ++} ++def ConstantUImm8AsmOperandClass ++ : ConstantUImmAsmOperandClass<8, [ConstantSImm7Lsl2AsmOperandClass]>; ++def ConstantUImm7Sub1AsmOperandClass ++ : ConstantUImmAsmOperandClass<7, [ConstantUImm8AsmOperandClass], -1> { ++ // Specify the names since the -1 offset causes invalid identifiers otherwise. ++ let Name = "UImm7_N1"; ++ let DiagnosticType = "UImm7_N1"; ++} ++def ConstantUImm7AsmOperandClass ++ : ConstantUImmAsmOperandClass<7, [ConstantUImm7Sub1AsmOperandClass]>; ++def ConstantUImm6Lsl2AsmOperandClass : AsmOperandClass { ++ let Name = "UImm6Lsl2"; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isScaledUImm<6, 2>"; ++ let SuperClasses = [ConstantUImm7AsmOperandClass]; ++ let DiagnosticType = "UImm6_Lsl2"; ++} ++def ConstantUImm6AsmOperandClass ++ : ConstantUImmAsmOperandClass<6, [ConstantUImm6Lsl2AsmOperandClass]>; ++def ConstantSImm6AsmOperandClass ++ : ConstantSImmAsmOperandClass<6, [ConstantUImm6AsmOperandClass]>; ++def ConstantUImm5Lsl2AsmOperandClass : AsmOperandClass { ++ let Name = "UImm5Lsl2"; ++ let RenderMethod = "addImmOperands"; ++ let PredicateMethod = "isScaledUImm<5, 2>"; ++ let SuperClasses = [ConstantSImm6AsmOperandClass]; ++ let DiagnosticType = "UImm5_Lsl2"; ++} ++def ConstantUImm5_Range2_64AsmOperandClass ++ : ConstantUImmRangeAsmOperandClass<2, 64, [ConstantUImm5Lsl2AsmOperandClass]>; ++def ConstantUImm5Plus33AsmOperandClass ++ : ConstantUImmAsmOperandClass<5, [ConstantUImm5_Range2_64AsmOperandClass], ++ 33>; ++def ConstantUImm5ReportUImm6AsmOperandClass ++ : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus33AsmOperandClass]> { ++ let Name = "ConstantUImm5_0_Report_UImm6"; ++ let DiagnosticType = "UImm5_0_Report_UImm6"; ++} ++def ConstantUImm5Plus32AsmOperandClass ++ : ConstantUImmAsmOperandClass< ++ 5, [ConstantUImm5ReportUImm6AsmOperandClass], 32>; ++def ConstantUImm5Plus32NormalizeAsmOperandClass ++ : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus32AsmOperandClass], 32> { ++ let Name = "ConstantUImm5_32_Norm"; ++ // We must also subtract 32 when we render the operand. ++ let RenderMethod = "addConstantUImmOperands<5, 32, -32>"; ++} ++def ConstantUImm5Plus1ReportUImm6AsmOperandClass ++ : ConstantUImmAsmOperandClass< ++ 5, [ConstantUImm5Plus32NormalizeAsmOperandClass], 1>{ ++ let Name = "ConstantUImm5_Plus1_Report_UImm6"; ++} ++def ConstantUImm5Plus1AsmOperandClass ++ : ConstantUImmAsmOperandClass< ++ 5, [ConstantUImm5Plus1ReportUImm6AsmOperandClass], 1>; ++def ConstantUImm5AsmOperandClass ++ : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus1AsmOperandClass]>; ++def ConstantSImm5AsmOperandClass ++ : ConstantSImmAsmOperandClass<5, [ConstantUImm5AsmOperandClass]>; ++def ConstantUImm4AsmOperandClass ++ : ConstantUImmAsmOperandClass<4, [ConstantSImm5AsmOperandClass]>; ++def ConstantSImm4AsmOperandClass ++ : ConstantSImmAsmOperandClass<4, [ConstantUImm4AsmOperandClass]>; ++def ConstantUImm3AsmOperandClass ++ : ConstantUImmAsmOperandClass<3, [ConstantSImm4AsmOperandClass]>; ++def ConstantUImm2AsmOperandClass ++ : ConstantUImmAsmOperandClass<2, [ConstantUImm3AsmOperandClass]>; ++def ConstantUImm1AsmOperandClass ++ : ConstantUImmAsmOperandClass<1, [ConstantUImm2AsmOperandClass]>; ++def ConstantImmzAsmOperandClass : AsmOperandClass { ++ let Name = "ConstantImmz"; ++ let RenderMethod = "addConstantUImmOperands<1>"; ++ let PredicateMethod = "isConstantImmz"; ++ let SuperClasses = [ConstantUImm1AsmOperandClass]; ++ let DiagnosticType = "Immz"; ++} ++ ++foreach I = {1, 2, 3, 4, 5, 6, 8} in ++ def vsplat_uimm # I : Operand { ++ let PrintMethod = "printUImm<" # I # ">"; ++ let ParserMatchClass = ++ !cast("ConstantUImm" # I # "AsmOperandClass"); ++ } ++ ++foreach I = {5, 10} in ++ def vsplat_simm # I : Operand { ++ let ParserMatchClass = ++ !cast("ConstantSImm" # I # "AsmOperandClass"); ++ } ++ ++foreach I = {1, 4, 7, 8, 10, 20, 26} in ++ def uimm # I : Operand { ++ let PrintMethod = "printUImm<" # I # ">"; ++ let ParserMatchClass = ++ !cast("ConstantUImm" # I # "AsmOperandClass"); ++ } ++ ++foreach I = {1, 2, 3, 4, 5, 6, 7, 8} in ++ def uimm # I # _ptr : Operand { ++ let PrintMethod = "printUImm<" # I # ">"; ++ let ParserMatchClass = ++ !cast("ConstantUImm" # I # "AsmOperandClass"); ++ } ++ ++ ++def addrimm12 : ComplexPattern; ++ ++ ++def LoongArchMemSimm12AsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimm12"; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithSimmOffset<12>"; ++ let DiagnosticType = "MemSImm12"; ++} ++ ++def mem_simm12 : mem_generic { ++ let MIOperandInfo = (ops ptr_rc, simm12); ++ let EncoderMethod = "getMemEncoding"; ++ let ParserMatchClass = LoongArchMemSimm12AsmOperand; ++} ++ ++foreach I = {4, 6, 9, 10, 11} in ++ def simm # I : Operand { ++ let DecoderMethod = "DecodeSImmWithOffsetAndScale<" # I # ">"; ++ let ParserMatchClass = ++ !cast("ConstantSImm" # I # "AsmOperandClass"); ++ } ++ ++def LoongArchMemSimm9AsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimm9"; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithSimmOffset<9>"; ++ let DiagnosticType = "MemSImm9"; ++} ++ ++def LoongArchMemSimm10AsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimm10"; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithSimmOffset<10>"; ++ let DiagnosticType = "MemSImm10"; ++} ++ ++def LoongArchMemSimm11AsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimm11"; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithSimmOffset<11>"; ++ let DiagnosticType = "MemSImm11"; ++} ++ ++def simm13 : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>; ++ ++def simm10Op : Operand { ++ let DecoderMethod = "DecodeSIMM10"; ++} ++ ++def simm13Op : Operand { ++ let DecoderMethod = "DecodeSIMM13"; ++} ++ ++def LoongArchMemSimm10Lsl2AsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimm10_2"; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithSimmOffset<10, 2>"; ++ let DiagnosticType = "MemSImm10Lsl2"; ++} ++ ++ ++def simm10_lsl2 : Operand { ++// let DecoderMethod = "DecodeSImmWithOffsetAndScale<10, 2>"; ++ let ParserMatchClass = ++ !cast("ConstantSImm10Lsl2AsmOperandClass"); ++} ++ ++def mem_simm10_lsl2 : mem_generic { ++ let MIOperandInfo = (ops ptr_rc, !cast("simm10_lsl2")); ++ let EncoderMethod = "getMemEncoding10l2"; ++ let ParserMatchClass = ++ !cast("LoongArchMemSimm10Lsl2AsmOperand"); ++} ++ ++ ++def LoongArchMemSimm11Lsl1AsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimm11_1"; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithSimmOffset<11, 1>"; ++ let DiagnosticType = "MemSImm11Lsl1"; ++} ++ ++ ++def simm11_lsl1 : Operand { ++ // let DecoderMethod = "DecodeSImmWithOffsetAndScale<11, 1>"; ++ let ParserMatchClass = ++ !cast("ConstantSImm11Lsl1AsmOperandClass"); ++} ++ ++def mem_simm11_lsl1 : mem_generic { ++ let MIOperandInfo = (ops ptr_rc, !cast("simm11_lsl1")); ++ let EncoderMethod = "getMemEncoding11l1"; ++ let ParserMatchClass = ++ !cast("LoongArchMemSimm11Lsl1AsmOperand"); ++} ++ ++def LoongArchMemSimm9Lsl3AsmOperand : AsmOperandClass { ++ let Name = "MemOffsetSimm9_3"; ++ let SuperClasses = [LoongArchMemAsmOperand]; ++ let RenderMethod = "addMemOperands"; ++ let ParserMethod = "parseMemOperand"; ++ let PredicateMethod = "isMemWithSimmOffset<9, 3>"; ++ let DiagnosticType = "MemSImm9Lsl3"; ++} ++ ++ ++def simm9_lsl3 : Operand { ++ // let DecoderMethod = "DecodeSImmWithOffsetAndScale<9, 3>"; ++ let ParserMatchClass = ++ !cast("ConstantSImm9Lsl3AsmOperandClass"); ++} ++ ++def mem_simm9_lsl3 : mem_generic { ++ let MIOperandInfo = (ops ptr_rc, !cast("simm9_lsl3")); ++ let EncoderMethod = "getMemEncoding9l3"; ++ let ParserMatchClass = ++ !cast("LoongArchMemSimm9Lsl3AsmOperand"); ++} ++ ++ ++ ++ ++// Operands ++ ++def immZExt2Lsa : ImmLeaf(Imm - 1);}]>; ++ ++// Pattern fragments ++def vextract_sext_i8 : PatFrag<(ops node:$vec, node:$idx), ++ (LoongArchVExtractSExt node:$vec, node:$idx, i8)>; ++def vextract_sext_i16 : PatFrag<(ops node:$vec, node:$idx), ++ (LoongArchVExtractSExt node:$vec, node:$idx, i16)>; ++def vextract_sext_i32 : PatFrag<(ops node:$vec, node:$idx), ++ (LoongArchVExtractSExt node:$vec, node:$idx, i32)>; ++def vextract_sext_i64 : PatFrag<(ops node:$vec, node:$idx), ++ (LoongArchVExtractSExt node:$vec, node:$idx, i64)>; ++ ++def vextract_zext_i8 : PatFrag<(ops node:$vec, node:$idx), ++ (LoongArchVExtractZExt node:$vec, node:$idx, i8)>; ++def vextract_zext_i16 : PatFrag<(ops node:$vec, node:$idx), ++ (LoongArchVExtractZExt node:$vec, node:$idx, i16)>; ++def vextract_zext_i32 : PatFrag<(ops node:$vec, node:$idx), ++ (LoongArchVExtractZExt node:$vec, node:$idx, i32)>; ++def vextract_zext_i64 : PatFrag<(ops node:$vec, node:$idx), ++ (LoongArchVExtractZExt node:$vec, node:$idx, i64)>; ++ ++def vldrepl_v16i8 : PatFrag<(ops node:$v1), ++ (v16i8 (LoongArchVBROADCAST node:$v1))>; ++def vldrepl_v8i16 : PatFrag<(ops node:$v1), ++ (v8i16 (LoongArchVBROADCAST node:$v1))>; ++def vldrepl_v4i32 : PatFrag<(ops node:$v1), ++ (v4i32 (LoongArchVBROADCAST node:$v1))>; ++def vldrepl_v2i64 : PatFrag<(ops node:$v1), ++ (v2i64 (LoongArchVBROADCAST node:$v1))>; ++ ++def vinsert_v16i8 : PatFrag<(ops node:$vec, node:$val, node:$idx), ++ (v16i8 (vector_insert node:$vec, node:$val, node:$idx))>; ++def vinsert_v8i16 : PatFrag<(ops node:$vec, node:$val, node:$idx), ++ (v8i16 (vector_insert node:$vec, node:$val, node:$idx))>; ++def vinsert_v4i32 : PatFrag<(ops node:$vec, node:$val, node:$idx), ++ (v4i32 (vector_insert node:$vec, node:$val, node:$idx))>; ++def vinsert_v2i64 : PatFrag<(ops node:$vec, node:$val, node:$idx), ++ (v2i64 (vector_insert node:$vec, node:$val, node:$idx))>; ++ ++class vfsetcc_type : ++ PatFrag<(ops node:$lhs, node:$rhs), ++ (ResTy (vfsetcc (OpTy node:$lhs), (OpTy node:$rhs), CC))>; ++ ++// ISD::SETFALSE cannot occur ++def vfseteq_v4f32 : vfsetcc_type; ++def vfseteq_v2f64 : vfsetcc_type; ++def vfsetge_v4f32 : vfsetcc_type; ++def vfsetge_v2f64 : vfsetcc_type; ++def vfsetgt_v4f32 : vfsetcc_type; ++def vfsetgt_v2f64 : vfsetcc_type; ++def vfsetle_v4f32 : vfsetcc_type; ++def vfsetle_v2f64 : vfsetcc_type; ++def vfsetlt_v4f32 : vfsetcc_type; ++def vfsetlt_v2f64 : vfsetcc_type; ++def vfsetne_v4f32 : vfsetcc_type; ++def vfsetne_v2f64 : vfsetcc_type; ++def vfsetoeq_v4f32 : vfsetcc_type; ++def vfsetoeq_v2f64 : vfsetcc_type; ++def vfsetoge_v4f32 : vfsetcc_type; ++def vfsetoge_v2f64 : vfsetcc_type; ++def vfsetogt_v4f32 : vfsetcc_type; ++def vfsetogt_v2f64 : vfsetcc_type; ++def vfsetole_v4f32 : vfsetcc_type; ++def vfsetole_v2f64 : vfsetcc_type; ++def vfsetolt_v4f32 : vfsetcc_type; ++def vfsetolt_v2f64 : vfsetcc_type; ++def vfsetone_v4f32 : vfsetcc_type; ++def vfsetone_v2f64 : vfsetcc_type; ++def vfsetord_v4f32 : vfsetcc_type; ++def vfsetord_v2f64 : vfsetcc_type; ++def vfsetun_v4f32 : vfsetcc_type; ++def vfsetun_v2f64 : vfsetcc_type; ++def vfsetueq_v4f32 : vfsetcc_type; ++def vfsetueq_v2f64 : vfsetcc_type; ++def vfsetuge_v4f32 : vfsetcc_type; ++def vfsetuge_v2f64 : vfsetcc_type; ++def vfsetugt_v4f32 : vfsetcc_type; ++def vfsetugt_v2f64 : vfsetcc_type; ++def vfsetule_v4f32 : vfsetcc_type; ++def vfsetule_v2f64 : vfsetcc_type; ++def vfsetult_v4f32 : vfsetcc_type; ++def vfsetult_v2f64 : vfsetcc_type; ++def vfsetune_v4f32 : vfsetcc_type; ++def vfsetune_v2f64 : vfsetcc_type; ++ ++ ++ ++// ISD::SETTRUE cannot occur ++// ISD::SETFALSE2 cannot occur ++// ISD::SETTRUE2 cannot occur ++ ++class vsetcc_type : ++ PatFrag<(ops node:$lhs, node:$rhs), ++ (ResTy (vsetcc node:$lhs, node:$rhs, CC))>; ++ ++def vseteq_v16i8 : vsetcc_type; ++def vseteq_v8i16 : vsetcc_type; ++def vseteq_v4i32 : vsetcc_type; ++def vseteq_v2i64 : vsetcc_type; ++def vsetle_v16i8 : vsetcc_type; ++def vsetle_v8i16 : vsetcc_type; ++def vsetle_v4i32 : vsetcc_type; ++def vsetle_v2i64 : vsetcc_type; ++def vsetlt_v16i8 : vsetcc_type; ++def vsetlt_v8i16 : vsetcc_type; ++def vsetlt_v4i32 : vsetcc_type; ++def vsetlt_v2i64 : vsetcc_type; ++def vsetule_v16i8 : vsetcc_type; ++def vsetule_v8i16 : vsetcc_type; ++def vsetule_v4i32 : vsetcc_type; ++def vsetule_v2i64 : vsetcc_type; ++def vsetult_v16i8 : vsetcc_type; ++def vsetult_v8i16 : vsetcc_type; ++def vsetult_v4i32 : vsetcc_type; ++def vsetult_v2i64 : vsetcc_type; ++ ++def vsplati8 : PatFrag<(ops node:$e0), ++ (v16i8 (build_vector node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0))>; ++def vsplati16 : PatFrag<(ops node:$e0), ++ (v8i16 (build_vector node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0, ++ node:$e0, node:$e0))>; ++def vsplati32 : PatFrag<(ops node:$e0), ++ (v4i32 (build_vector node:$e0, node:$e0, ++ node:$e0, node:$e0))>; ++ ++def vsplati64_imm_eq_1 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{ ++ APInt Imm; ++ SDNode *BV = N->getOperand(0).getNode(); ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; ++}]>; ++ ++def vsplati64 : PatFrag<(ops node:$e0), ++ (v2i64 (build_vector node:$e0, node:$e0))>; ++ ++def vsplati64_splat_d : PatFrag<(ops node:$e0), ++ (v2i64 (bitconvert ++ (v4i32 (and ++ (v4i32 (build_vector node:$e0, ++ node:$e0, ++ node:$e0, ++ node:$e0)), ++ vsplati64_imm_eq_1))))>; ++ ++def vsplatf32 : PatFrag<(ops node:$e0), ++ (v4f32 (build_vector node:$e0, node:$e0, ++ node:$e0, node:$e0))>; ++def vsplatf64 : PatFrag<(ops node:$e0), ++ (v2f64 (build_vector node:$e0, node:$e0))>; ++ ++def vsplati8_elt : PatFrag<(ops node:$v, node:$i), ++ (LoongArchVSHF (vsplati8 node:$i), node:$v, node:$v)>; ++def vsplati16_elt : PatFrag<(ops node:$v, node:$i), ++ (LoongArchVSHF (vsplati16 node:$i), node:$v, node:$v)>; ++def vsplati32_elt : PatFrag<(ops node:$v, node:$i), ++ (LoongArchVSHF (vsplati32 node:$i), node:$v, node:$v)>; ++def vsplati64_elt : PatFrag<(ops node:$v, node:$i), ++ (LoongArchVSHF (vsplati64_splat_d node:$i),node:$v, node:$v)>; ++ ++class SplatPatLeaf ++ : PatLeaf { ++ Operand OpClass = opclass; ++} ++ ++class SplatComplexPattern roots = [], ++ list props = []> : ++ ComplexPattern { ++ Operand OpClass = opclass; ++} ++ ++def vsplati8_uimm3 : SplatComplexPattern; ++ ++def vsplati8_uimm4 : SplatComplexPattern; ++ ++def vsplati8_uimm5 : SplatComplexPattern; ++ ++def vsplati8_uimm8 : SplatComplexPattern; ++ ++def vsplati8_simm5 : SplatComplexPattern; ++ ++def vsplati16_uimm3 : SplatComplexPattern; ++ ++def vsplati16_uimm4 : SplatComplexPattern; ++ ++def vsplati16_uimm5 : SplatComplexPattern; ++ ++def vsplati16_simm5 : SplatComplexPattern; ++ ++def vsplati32_uimm2 : SplatComplexPattern; ++ ++def vsplati32_uimm5 : SplatComplexPattern; ++ ++def vsplati32_simm5 : SplatComplexPattern; ++ ++def vsplati64_uimm1 : SplatComplexPattern; ++ ++def vsplati64_uimm5 : SplatComplexPattern; ++ ++def vsplati64_uimm6 : SplatComplexPattern; ++ ++def vsplati64_simm5 : SplatComplexPattern; ++ ++ ++// Any build_vector that is a constant splat with a value that equals 1 ++// FIXME: These should be a ComplexPattern but we can't use them because the ++// ISel generator requires the uses to have a name, but providing a name ++// causes other errors ("used in pattern but not operand list") ++def vsplat_imm_eq_1 : PatLeaf<(build_vector), [{ ++ APInt Imm; ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; ++}]>; ++ ++def vbitclr_b : PatFrag<(ops node:$vj, node:$vk), ++ (and node:$vj, (xor (shl vsplat_imm_eq_1, node:$vk), ++ immAllOnesV))>; ++def vbitclr_h : PatFrag<(ops node:$vj, node:$vk), ++ (and node:$vj, (xor (shl vsplat_imm_eq_1, node:$vk), ++ immAllOnesV))>; ++def vbitclr_w : PatFrag<(ops node:$vj, node:$vk), ++ (and node:$vj, (xor (shl vsplat_imm_eq_1, node:$vk), ++ immAllOnesV))>; ++def vbitclr_d : PatFrag<(ops node:$vj, node:$vk), ++ (and node:$vj, (xor (shl (v2i64 vsplati64_imm_eq_1), ++ node:$vk), ++ (bitconvert (v4i32 immAllOnesV))))>; ++ ++def vbneg_b : PatFrag<(ops node:$vj, node:$vk), ++ (xor node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; ++def vbneg_h : PatFrag<(ops node:$vj, node:$vk), ++ (xor node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; ++def vbneg_w : PatFrag<(ops node:$vj, node:$vk), ++ (xor node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; ++def vbneg_d : PatFrag<(ops node:$vj, node:$vk), ++ (xor node:$vj, (shl (v2i64 vsplati64_imm_eq_1), ++ node:$vk))>; ++ ++def vbset_b : PatFrag<(ops node:$vj, node:$vk), ++ (or node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; ++def vbset_h : PatFrag<(ops node:$vj, node:$vk), ++ (or node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; ++def vbset_w : PatFrag<(ops node:$vj, node:$vk), ++ (or node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; ++def vbset_d : PatFrag<(ops node:$vj, node:$vk), ++ (or node:$vj, (shl (v2i64 vsplati64_imm_eq_1), ++ node:$vk))>; ++ ++def muladd : PatFrag<(ops node:$vd, node:$vj, node:$vk), ++ (add node:$vd, (mul node:$vj, node:$vk))>; ++ ++def mulsub : PatFrag<(ops node:$vd, node:$vj, node:$vk), ++ (sub node:$vd, (mul node:$vj, node:$vk))>; ++ ++class IsCommutable { ++ bit isCommutable = 1; ++} ++ ++ ++ ++//class ++class LSX_3R_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ROVK:$vk); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))]; ++} ++ ++class LSX_3RN_DESC_BASE : ++ LSX_3R_DESC_BASE; ++ ++class LSX_3R_4R_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ROVK:$vk); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, ++ ROVK:$vk))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_3R_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, GPR32Opnd:$rk); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $rk"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, GPR32Opnd:$rk))]; ++} ++ ++class LSX_VEC_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ROVK:$vk); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))]; ++} ++ ++class LSX_VEC_PSEUDO_BASE : ++ LSXPseudo<(outs ROVD:$vd), (ins ROVJ:$vj, ROVK:$vk), ++ [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))]>; ++ ++class LSX_3RF_DESC_BASE : ++ LSX_3R_DESC_BASE; ++ ++class LSX_3RFN_DESC_BASE : ++ LSX_3R_DESC_BASE; ++ ++class LSX_3R_DESC_BASE1 { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ROVK:$vk); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vk, ROVK:$vj))]; ++} ++ ++class LSX_3RF_DESC_BASE1 : ++ LSX_3R_DESC_BASE1; ++ ++class LSX_3R_VSHF_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ROVK:$vk); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVSHF ROVD:$vd_in, ROVJ:$vj, ++ ROVK:$vk))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_3R_4R_VSHF_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ROVK:$vk, ROVD:$va); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk, $va"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVSHF ROVD:$va, ROVJ:$vj, ++ ROVK:$vk))]; ++} ++ ++class LSX_I5_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$si5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $si5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$si5))]; ++} ++ ++class LSX_I5_U_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui5))]; ++} ++ ++class LSX_BIT_3_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui3))]; ++} ++ ++class LSX_BIT_3N_DESC_BASE : ++ LSX_BIT_3_DESC_BASE; ++ ++class LSX_BIT_4_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui4))]; ++} ++ ++class LSX_BIT_4N_DESC_BASE : ++ LSX_BIT_4_DESC_BASE; ++ ++class LSX_BIT_5_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui5))]; ++} ++ ++class LSX_BIT_5N_DESC_BASE : ++ LSX_BIT_5_DESC_BASE; ++ ++class LSX_BIT_6_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui6))]; ++} ++ ++class LSX_BIT_6N_DESC_BASE : ++ LSX_BIT_6_DESC_BASE; ++ ++class LSX_2R_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj))]; ++} ++ ++class LSX_2RN_DESC_BASE : ++ LSX_2R_DESC_BASE; ++ ++class LSX_2RF_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj))]; ++} ++ ++class LSX_2RFN_DESC_BASE : ++ LSX_2R_DESC_BASE; ++ ++class LSX_2RF_DESC_BASE_CVT { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj))]; ++} ++ ++class LSX_2RFN_DESC_BASE_CVT : ++ LSX_2RF_DESC_BASE_CVT; ++ ++class LSX_2RF_DESC_BASE_tmp { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj"); ++ list Pattern = []; ++} ++ ++class LSX_2R_REPL_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROS:$rj); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj"); ++ list Pattern = [(set ROVD:$vd, (VT (OpNode ROS:$rj)))]; ++} ++ ++class LSX_INSERT_U4_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$rj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$rj, Imm:$ui4))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_INSERT_U4N_DESC_BASE : ++ LSX_INSERT_U4_DESC_BASE; ++ ++class LSX_INSERT_U3_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROS:$rj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui3"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROS:$rj, Imm:$ui3))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_INSERT_U3N_DESC_BASE : ++ LSX_INSERT_U3_DESC_BASE; ++ ++class LSX_INSERT_U2_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROS:$rj, ImmOp:$ui2); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui2"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROS:$rj, Imm:$ui2))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_INSERT_U2N_DESC_BASE : ++ LSX_INSERT_U2_DESC_BASE; ++ ++class LSX_INSERT_U1_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROS:$rj, ImmOp:$ui1); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui1"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROS:$rj, Imm:$ui1))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_INSERT_U1N_DESC_BASE : ++ LSX_INSERT_U1_DESC_BASE; ++ ++class LSX_PICK_U1_DESC_BASE { ++ dag OutOperandList = (outs ROD:$rd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui1); ++ string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui1"); ++ list Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui1))]; ++} ++ ++class LSX_PICK_U2_DESC_BASE { ++ dag OutOperandList = (outs ROD:$rd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui2); ++ string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui2"); ++ list Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui2))]; ++} ++ ++class LSX_PICK_U3_DESC_BASE { ++ dag OutOperandList = (outs ROD:$rd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui3"); ++ list Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui3))]; ++} ++ ++class LSX_PICK_U4_DESC_BASE { ++ dag OutOperandList = (outs ROD:$rd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui4"); ++ list Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui4))]; ++} ++ ++class LSX_ELM_U3_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui3, ROVJ:$vj, ++ ROVJ:$vj))]; ++} ++ ++class LSX_ELM_U2_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui2); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui2"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui2, ROVJ:$vj, ++ ROVJ:$vj))]; ++} ++ ++class LSX_ELM_U1_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui1); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui1"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui1, ROVJ:$vj, ++ ROVJ:$vj))]; ++} ++ ++class LSX_ELM_U4_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui4, ROVJ:$vj, ++ ROVJ:$vj))]; ++} ++ ++class LSX_ELM_U4_SLD_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, ++ Imm:$ui4))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_ELM_U3_SLD_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, ++ Imm:$ui3))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_ELM_U2_SLD_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui2); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui2"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, ++ Imm:$ui2))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_ELM_U1_SLD_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui1); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui1"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, ++ Imm:$ui1))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_BIT_U3_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui3))]; ++} ++ ++class LSX_BIT_U4_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui4))]; ++} ++ ++class LSX_BIT_U5_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui5))]; ++} ++ ++class LSX_BIT_U6_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui6))]; ++} ++ ++class LSX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm6:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt6:$ui6))]; ++} ++ ++class LSX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm3:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt3:$ui3))]; ++} ++ ++class LSX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm4:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt4:$ui4))]; ++} ++ ++class LSX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm5:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt5:$ui5))]; ++} ++ ++class LSX_I8_SHF_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); ++ list Pattern = [(set ROVD:$vd, (LoongArchSHF immZExt8:$ui8, ROVJ:$vj))]; ++} ++ ++class LSX_I8_SHUF_DESC_BASE_D { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt8:$ui8))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++def LoongArchSelect : SDNode<"LoongArchISD::VSELECT" ,SDTSelect>; ++def LoongArchVROR : SDNode<"LoongArchISD::VROR", ++ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>, ++ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>, []>; ++def LoongArchVRORI : SDNode<"LoongArchISD::VRORI", ++ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>, ++ SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>, []>; ++ ++class LSX2_RORI_U3_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui3))]; ++} ++ ++class LSX2_RORI_U4_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui4))]; ++} ++ ++class LSX2_RORI_U5_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui5))]; ++} ++ ++class LSX2_RORI_U6_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); ++ list Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui6))]; ++} ++ ++class LSX_BIND_U4_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, Imm:$ui4))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_BIND_U4N_DESC_BASE : ++ LSX_BIND_U4_DESC_BASE; ++ ++class LSX_BIND_U5_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, Imm:$ui5))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_BIND_U5N_DESC_BASE : ++ LSX_BIND_U5_DESC_BASE; ++ ++class LSX_BIND_U6_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, Imm:$ui6))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_BIND_U6N_DESC_BASE : ++ LSX_BIND_U6_DESC_BASE; ++ ++class LSX_BIND_U7_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm7:$ui7); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui7"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt7:$ui7))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_BIND_U7N_DESC_BASE : ++ LSX_BIND_U7_DESC_BASE; ++ ++ ++class LD_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins MemOpnd:$addr); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $addr"); ++ list Pattern = [(set ROVD:$vd, (TyNode (OpNode Addr:$addr)))]; ++ string DecoderMethod = "DecodeLSX128Mem"; ++} ++ ++class ST_DESC_BASE { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROVD:$vd, MemOpnd:$addr); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $addr"); ++ list Pattern = [(OpNode (TyNode ROVD:$vd), Addr:$addr)]; ++ string DecoderMethod = "DecodeLSX128Mem"; ++} ++ ++class LSX_VEC_ADDR_PSEUDO_BASE : ++ LSXPseudo<(outs), (ins ROVD:$vd, MemOpnd:$addr), ++ [(OpNode (TyNode ROVD:$vd), MemOpnd:$addr)]>; ++ ++ ++class LSX_SET_DESC_BASE { ++ dag OutOperandList = (outs FCFROpnd:$cd); ++ dag InOperandList = (ins ROVD:$vj); ++ string AsmString = !strconcat(instr_asm, "\t$cd, $vj"); ++ list Pattern = []; ++} ++ ++class LSX_SET_DESC_BASE_tmp { ++ dag OutOperandList = (outs FCFROpnd:$cd); ++ dag InOperandList = (ins ROVD:$vj); ++ string AsmString = !strconcat(instr_asm, "\t$cd, $vj"); ++ list Pattern = []; ++} ++ ++class LSX_VMul_Reg4 { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ROVK:$vk, ROVA:$va); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk, $va"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk, ROVA:$va))]; ++} ++ ++class LSX_4RF { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ROVK:$vk, ROVA:$va); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk, $va"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk, ROVA:$va))]; ++} ++ ++ ++class LSX_VFCMP_Reg3 { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ROVK:$vk); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))]; ++} ++ ++class LSX_I12_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins PtrRC:$rj, ImmOp:$si12); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si12"); ++ list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si12))]; ++} ++ ++class LSX_I11_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins PtrRC:$rj, ImmOp:$si11); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si11"); ++ list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si11))]; ++} ++ ++class LSX_I10_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins PtrRC:$rj, ImmOp:$si10); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si10"); ++ list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si10))]; ++} ++ ++class LSX_I9_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins PtrRC:$rj, ImmOp:$si9); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si9"); ++ list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si9))]; ++} ++ ++ ++class LSX_I8_U1_DESC_BASE { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm1:$idx); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx"); ++ list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt1:$idx)]; ++ string DecoderMethod = "DecodeLSX128memstl"; ++} ++ ++ ++class LSX_I8_U2_DESC_BASE { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm2:$idx); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx"); ++ list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt2:$idx)]; ++ string DecoderMethod = "DecodeLSX128memstl"; ++} ++ ++class LSX_I8_U3_DESC_BASE { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm3:$idx); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx"); ++ list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt3:$idx)]; ++ string DecoderMethod = "DecodeLSX128memstl"; ++} ++ ++class LSX_I8_U4_DESC_BASE { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm4:$idx); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx"); ++ list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt4:$idx)]; ++ string DecoderMethod = "DecodeLSX128memstl"; ++} ++ ++class LSX_I5_U_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui5))]; ++} ++ ++class LSX_I5_DESC_BASE_Intrinsic { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, ImmOp:$si5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $si5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$si5))]; ++} ++ ++class LSX_LDX_LA { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins PtrRC:$rj, RORK:$rk); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $rk"); ++ list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, RORK:$rk))]; ++} ++ ++class LSX_SDX_LA { ++ dag OutOperandList = (outs); ++ dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, RORK:$rk); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $rk"); ++ list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, RORK:$rk)]; ++} ++ ++class LSX_U5_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm5:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt5:$ui5))]; ++} ++ ++class LSX_U5_4R_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm5:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt5:$ui5))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_2R_U3_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm3:$ui3); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt3:$ui3))]; ++} ++ ++class LSX_2R_U4_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm4:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt4:$ui4))]; ++} ++ ++class LSX_2R_U5_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm5:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt5:$ui5))]; ++} ++ ++class LSX_2R_U6_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm6:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt6:$ui6))]; ++} ++ ++class LSX_2R_3R_U4_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm4:$ui4); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt4:$ui4))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_2R_3R_U5_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm5:$ui5); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt5:$ui5))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_2R_3R_U6_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm6:$ui6); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt6:$ui6))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_2R_3R_U7_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm7:$ui7); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui7"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt7:$ui7))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_2R_3R_U8_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt8:$ui8))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_2R_3R_SELECT { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, vsplat_uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, vsplati8_uimm8:$ui8, ROVJ:$vj))]; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class LSX_2R_U8_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, uimm8:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt8:$ui8))]; ++} ++ ++class LSX_I13_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins immOp:$i13); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $i13"); ++ list Pattern = [(set ROVD:$vd, (OpNode (Ty simm13:$i13)))]; ++ string DecoderMethod = "DecodeLSX128Mem13"; ++} ++ ++class LSX_I13_DESC_BASE_10 { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins vsplat_simm10:$i10); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $i10"); ++ list Pattern = []; ++ bit hasSideEffects = 0; ++ string DecoderMethod = "DecodeLSX128Mem10"; ++} ++ ++class LSX_BIT_U8_VREPLVE_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui8); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); ++ list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui8))]; ++} ++ ++ ++class LSXPat pred = [HasLSX]> : ++ Pat, Requires; ++ ++// Instruction encoding. ++ ++ ++def VSADD_B : LSX_3R<0b01110000010001100>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vsadd.b", LSX128BOpnd>; ++ ++def VSADD_H : LSX_3R<0b01110000010001101>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vsadd.h", LSX128HOpnd>; ++ ++def VSADD_W : LSX_3R<0b01110000010001110>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vsadd.w", LSX128WOpnd>; ++ ++def VSADD_D : LSX_3R<0b01110000010001111>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vsadd.d", LSX128DOpnd>; ++ ++ ++def VSSUB_B : LSX_3R<0b01110000010010000>, ++ LSX_3RN_DESC_BASE<"vssub.b", LSX128BOpnd>; ++ ++def VSSUB_H : LSX_3R<0b01110000010010001>, ++ LSX_3RN_DESC_BASE<"vssub.h", LSX128HOpnd>; ++ ++def VSSUB_W : LSX_3R<0b01110000010010010>, ++ LSX_3RN_DESC_BASE<"vssub.w", LSX128WOpnd>; ++ ++def VSSUB_D : LSX_3R<0b01110000010010011>, ++ LSX_3RN_DESC_BASE<"vssub.d", LSX128DOpnd>; ++ ++ ++def VSADD_BU : LSX_3R<0b01110000010010100>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vsadd.bu", LSX128BOpnd>; ++ ++def VSADD_HU : LSX_3R<0b01110000010010101>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vsadd.hu", LSX128HOpnd>; ++ ++def VSADD_WU : LSX_3R<0b01110000010010110>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vsadd.wu", LSX128WOpnd>; ++ ++def VSADD_DU : LSX_3R<0b01110000010010111>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vsadd.du", LSX128DOpnd>; ++ ++ ++def VSSUB_BU : LSX_3R<0b01110000010011000>, ++ LSX_3RN_DESC_BASE<"vssub.bu", LSX128BOpnd>; ++ ++def VSSUB_HU : LSX_3R<0b01110000010011001>, ++ LSX_3RN_DESC_BASE<"vssub.hu", LSX128HOpnd>; ++ ++def VSSUB_WU : LSX_3R<0b01110000010011010>, ++ LSX_3RN_DESC_BASE<"vssub.wu", LSX128WOpnd>; ++ ++def VSSUB_DU : LSX_3R<0b01110000010011011>, ++ LSX_3RN_DESC_BASE<"vssub.du", LSX128DOpnd>; ++ ++ ++def VHADDW_H_B : LSX_3R<0b01110000010101000>, ++ LSX_3RN_DESC_BASE<"vhaddw.h.b", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VHADDW_W_H : LSX_3R<0b01110000010101001>, ++ LSX_3RN_DESC_BASE<"vhaddw.w.h", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VHADDW_D_W : LSX_3R<0b01110000010101010>, ++ LSX_3RN_DESC_BASE<"vhaddw.d.w", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++ ++def VHSUBW_H_B : LSX_3R<0b01110000010101100>, ++ LSX_3RN_DESC_BASE<"vhsubw.h.b", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VHSUBW_W_H : LSX_3R<0b01110000010101101>, ++ LSX_3RN_DESC_BASE<"vhsubw.w.h", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VHSUBW_D_W : LSX_3R<0b01110000010101110>, ++ LSX_3RN_DESC_BASE<"vhsubw.d.w", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++ ++def VHADDW_HU_BU : LSX_3R<0b01110000010110000>, ++ LSX_3RN_DESC_BASE<"vhaddw.hu.bu", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VHADDW_WU_HU : LSX_3R<0b01110000010110001>, ++ LSX_3RN_DESC_BASE<"vhaddw.wu.hu", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VHADDW_DU_WU : LSX_3R<0b01110000010110010>, ++ LSX_3RN_DESC_BASE<"vhaddw.du.wu", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++ ++def VHSUBW_HU_BU : LSX_3R<0b01110000010110100>, ++ LSX_3RN_DESC_BASE<"vhsubw.hu.bu", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VHSUBW_WU_HU : LSX_3R<0b01110000010110101>, ++ LSX_3RN_DESC_BASE<"vhsubw.wu.hu", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VHSUBW_DU_WU : LSX_3R<0b01110000010110110>, ++ LSX_3RN_DESC_BASE<"vhsubw.du.wu", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++ ++def VADDA_B : LSX_3R<0b01110000010111000>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vadda.b", LSX128BOpnd>; ++ ++def VADDA_H : LSX_3R<0b01110000010111001>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vadda.h", LSX128HOpnd>; ++ ++def VADDA_W : LSX_3R<0b01110000010111010>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vadda.w", LSX128WOpnd>; ++ ++def VADDA_D : LSX_3R<0b01110000010111011>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vadda.d", LSX128DOpnd>; ++ ++ ++def VABSD_B : LSX_3R<0b01110000011000000>, ++ LSX_3RN_DESC_BASE<"vabsd.b", LSX128BOpnd>; ++ ++def VABSD_H : LSX_3R<0b01110000011000001>, ++ LSX_3RN_DESC_BASE<"vabsd.h", LSX128HOpnd>; ++ ++def VABSD_W : LSX_3R<0b01110000011000010>, ++ LSX_3RN_DESC_BASE<"vabsd.w", LSX128WOpnd>; ++ ++def VABSD_D : LSX_3R<0b01110000011000011>, ++ LSX_3RN_DESC_BASE<"vabsd.d", LSX128DOpnd>; ++ ++ ++def VABSD_BU : LSX_3R<0b01110000011000100>, ++ LSX_3RN_DESC_BASE<"vabsd.bu", LSX128BOpnd>; ++ ++def VABSD_HU : LSX_3R<0b01110000011000101>, ++ LSX_3RN_DESC_BASE<"vabsd.hu", LSX128HOpnd>; ++ ++def VABSD_WU : LSX_3R<0b01110000011000110>, ++ LSX_3RN_DESC_BASE<"vabsd.wu", LSX128WOpnd>; ++ ++def VABSD_DU : LSX_3R<0b01110000011000111>, ++ LSX_3RN_DESC_BASE<"vabsd.du", LSX128DOpnd>; ++ ++ ++def VAVG_B : LSX_3R<0b01110000011001000>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavg.b", LSX128BOpnd>; ++ ++def VAVG_H : LSX_3R<0b01110000011001001>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavg.h", LSX128HOpnd>; ++ ++def VAVG_W : LSX_3R<0b01110000011001010>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavg.w", LSX128WOpnd>; ++ ++def VAVG_D : LSX_3R<0b01110000011001011>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavg.d", LSX128DOpnd>; ++ ++ ++def VAVG_BU : LSX_3R<0b01110000011001100>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavg.bu", LSX128BOpnd>; ++ ++def VAVG_HU : LSX_3R<0b01110000011001101>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavg.hu", LSX128HOpnd>; ++ ++def VAVG_WU : LSX_3R<0b01110000011001110>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavg.wu", LSX128WOpnd>; ++ ++def VAVG_DU : LSX_3R<0b01110000011001111>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavg.du", LSX128DOpnd>; ++ ++ ++def VAVGR_B : LSX_3R<0b01110000011010000>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavgr.b", LSX128BOpnd>; ++ ++def VAVGR_H : LSX_3R<0b01110000011010001>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavgr.h", LSX128HOpnd>; ++ ++def VAVGR_W : LSX_3R<0b01110000011010010>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavgr.w", LSX128WOpnd>; ++ ++def VAVGR_D : LSX_3R<0b01110000011010011>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavgr.d", LSX128DOpnd>; ++ ++ ++def VAVGR_BU : LSX_3R<0b01110000011010100>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavgr.bu", LSX128BOpnd>; ++ ++def VAVGR_HU : LSX_3R<0b01110000011010101>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavgr.hu", LSX128HOpnd>; ++ ++def VAVGR_WU : LSX_3R<0b01110000011010110>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavgr.wu", LSX128WOpnd>; ++ ++def VAVGR_DU : LSX_3R<0b01110000011010111>, IsCommutable, ++ LSX_3RN_DESC_BASE<"vavgr.du", LSX128DOpnd>; ++ ++ ++def VMAX_B : LSX_3R<0b01110000011100000>, ++ LSX_3R_DESC_BASE<"vmax.b", smax, LSX128BOpnd>; ++ ++def VMAX_H : LSX_3R<0b01110000011100001>, ++ LSX_3R_DESC_BASE<"vmax.h", smax, LSX128HOpnd>; ++ ++def VMAX_W : LSX_3R<0b01110000011100010>, ++ LSX_3R_DESC_BASE<"vmax.w", smax, LSX128WOpnd>; ++ ++def VMAX_D : LSX_3R<0b01110000011100011>, ++ LSX_3R_DESC_BASE<"vmax.d", smax, LSX128DOpnd>; ++ ++ ++def VMIN_B : LSX_3R<0b01110000011100100>, ++ LSX_3R_DESC_BASE<"vmin.b", smin, LSX128BOpnd>; ++ ++def VMIN_H : LSX_3R<0b01110000011100101>, ++ LSX_3R_DESC_BASE<"vmin.h", smin, LSX128HOpnd>; ++ ++def VMIN_W : LSX_3R<0b01110000011100110>, ++ LSX_3R_DESC_BASE<"vmin.w", smin, LSX128WOpnd>; ++ ++def VMIN_D : LSX_3R<0b01110000011100111>, ++ LSX_3R_DESC_BASE<"vmin.d", smin, LSX128DOpnd>; ++ ++ ++def VMAX_BU : LSX_3R<0b01110000011101000>, ++ LSX_3R_DESC_BASE<"vmax.bu", umax, LSX128BOpnd>; ++ ++def VMAX_HU : LSX_3R<0b01110000011101001>, ++ LSX_3R_DESC_BASE<"vmax.hu", umax, LSX128HOpnd>; ++ ++def VMAX_WU : LSX_3R<0b01110000011101010>, ++ LSX_3R_DESC_BASE<"vmax.wu", umax, LSX128WOpnd>; ++ ++def VMAX_DU : LSX_3R<0b01110000011101011>, ++ LSX_3R_DESC_BASE<"vmax.du", umax, LSX128DOpnd>; ++ ++ ++def VMIN_BU : LSX_3R<0b01110000011101100>, ++ LSX_3R_DESC_BASE<"vmin.bu", umin, LSX128BOpnd>; ++ ++def VMIN_HU : LSX_3R<0b01110000011101101>, ++ LSX_3R_DESC_BASE<"vmin.hu", umin, LSX128HOpnd>; ++ ++def VMIN_WU : LSX_3R<0b01110000011101110>, ++ LSX_3R_DESC_BASE<"vmin.wu", umin, LSX128WOpnd>; ++ ++def VMIN_DU : LSX_3R<0b01110000011101111>, ++ LSX_3R_DESC_BASE<"vmin.du", umin, LSX128DOpnd>; ++ ++ ++def VMUL_B : LSX_3R<0b01110000100001000>, ++ LSX_3R_DESC_BASE<"vmul.b", mul, LSX128BOpnd>; ++ ++def VMUL_H : LSX_3R<0b01110000100001001>, ++ LSX_3R_DESC_BASE<"vmul.h", mul, LSX128HOpnd>; ++ ++def VMUL_W : LSX_3R<0b01110000100001010>, ++ LSX_3R_DESC_BASE<"vmul.w", mul, LSX128WOpnd>; ++ ++def VMUL_D : LSX_3R<0b01110000100001011>, ++ LSX_3R_DESC_BASE<"vmul.d", mul, LSX128DOpnd>; ++ ++ ++def VMADD_B : LSX_3R<0b01110000101010000>, ++ LSX_3R_4R_DESC_BASE<"vmadd.b", muladd, LSX128BOpnd>; ++ ++def VMADD_H : LSX_3R<0b01110000101010001>, ++ LSX_3R_4R_DESC_BASE<"vmadd.h", muladd, LSX128HOpnd>; ++ ++def VMADD_W : LSX_3R<0b01110000101010010>, ++ LSX_3R_4R_DESC_BASE<"vmadd.w", muladd, LSX128WOpnd>; ++ ++def VMADD_D : LSX_3R<0b01110000101010011>, ++ LSX_3R_4R_DESC_BASE<"vmadd.d", muladd, LSX128DOpnd>; ++ ++ ++def VMSUB_B : LSX_3R<0b01110000101010100>, ++ LSX_3R_4R_DESC_BASE<"vmsub.b", mulsub, LSX128BOpnd>; ++ ++def VMSUB_H : LSX_3R<0b01110000101010101>, ++ LSX_3R_4R_DESC_BASE<"vmsub.h", mulsub, LSX128HOpnd>; ++ ++def VMSUB_W : LSX_3R<0b01110000101010110>, ++ LSX_3R_4R_DESC_BASE<"vmsub.w", mulsub, LSX128WOpnd>; ++ ++def VMSUB_D : LSX_3R<0b01110000101010111>, ++ LSX_3R_4R_DESC_BASE<"vmsub.d", mulsub, LSX128DOpnd>; ++ ++ ++def VDIV_B : LSX_3R<0b01110000111000000>, ++ LSX_3R_DESC_BASE<"vdiv.b", sdiv, LSX128BOpnd>; ++ ++def VDIV_H : LSX_3R<0b01110000111000001>, ++ LSX_3R_DESC_BASE<"vdiv.h", sdiv, LSX128HOpnd>; ++ ++def VDIV_W : LSX_3R<0b01110000111000010>, ++ LSX_3R_DESC_BASE<"vdiv.w", sdiv, LSX128WOpnd>; ++ ++def VDIV_D : LSX_3R<0b01110000111000011>, ++ LSX_3R_DESC_BASE<"vdiv.d", sdiv, LSX128DOpnd>; ++ ++ ++def VMOD_B : LSX_3R<0b01110000111000100>, ++ LSX_3R_DESC_BASE<"vmod.b", srem, LSX128BOpnd>; ++ ++def VMOD_H : LSX_3R<0b01110000111000101>, ++ LSX_3R_DESC_BASE<"vmod.h", srem, LSX128HOpnd>; ++ ++def VMOD_W : LSX_3R<0b01110000111000110>, ++ LSX_3R_DESC_BASE<"vmod.w", srem, LSX128WOpnd>; ++ ++def VMOD_D : LSX_3R<0b01110000111000111>, ++ LSX_3R_DESC_BASE<"vmod.d", srem, LSX128DOpnd>; ++ ++ ++def VDIV_BU : LSX_3R<0b01110000111001000>, ++ LSX_3R_DESC_BASE<"vdiv.bu", udiv, LSX128BOpnd>; ++ ++def VDIV_HU : LSX_3R<0b01110000111001001>, ++ LSX_3R_DESC_BASE<"vdiv.hu", udiv, LSX128HOpnd>; ++ ++def VDIV_WU : LSX_3R<0b01110000111001010>, ++ LSX_3R_DESC_BASE<"vdiv.wu", udiv, LSX128WOpnd>; ++ ++def VDIV_DU : LSX_3R<0b01110000111001011>, ++ LSX_3R_DESC_BASE<"vdiv.du", udiv, LSX128DOpnd>; ++ ++ ++def VMOD_BU : LSX_3R<0b01110000111001100>, ++ LSX_3R_DESC_BASE<"vmod.bu", urem, LSX128BOpnd>; ++ ++def VMOD_HU : LSX_3R<0b01110000111001101>, ++ LSX_3R_DESC_BASE<"vmod.hu", urem, LSX128HOpnd>; ++ ++def VMOD_WU : LSX_3R<0b01110000111001110>, ++ LSX_3R_DESC_BASE<"vmod.wu", urem, LSX128WOpnd>; ++ ++def VMOD_DU : LSX_3R<0b01110000111001111>, ++ LSX_3R_DESC_BASE<"vmod.du", urem, LSX128DOpnd>; ++ ++ ++def VSLL_B : LSX_3R<0b01110000111010000>, ++ LSX_3R_DESC_BASE<"vsll.b", shl, LSX128BOpnd>; ++ ++def VSLL_H : LSX_3R<0b01110000111010001>, ++ LSX_3R_DESC_BASE<"vsll.h", shl, LSX128HOpnd>; ++ ++def VSLL_W : LSX_3R<0b01110000111010010>, ++ LSX_3R_DESC_BASE<"vsll.w", shl, LSX128WOpnd>; ++ ++def VSLL_D : LSX_3R<0b01110000111010011>, ++ LSX_3R_DESC_BASE<"vsll.d", shl, LSX128DOpnd>; ++ ++ ++def VSRL_B : LSX_3R<0b01110000111010100>, ++ LSX_3R_DESC_BASE<"vsrl.b", srl, LSX128BOpnd>; ++ ++def VSRL_H : LSX_3R<0b01110000111010101>, ++ LSX_3R_DESC_BASE<"vsrl.h", srl, LSX128HOpnd>; ++ ++def VSRL_W : LSX_3R<0b01110000111010110>, ++ LSX_3R_DESC_BASE<"vsrl.w", srl, LSX128WOpnd>; ++ ++def VSRL_D : LSX_3R<0b01110000111010111>, ++ LSX_3R_DESC_BASE<"vsrl.d", srl, LSX128DOpnd>; ++ ++ ++def VSRA_B : LSX_3R<0b01110000111011000>, ++ LSX_3R_DESC_BASE<"vsra.b", sra, LSX128BOpnd>; ++ ++def VSRA_H : LSX_3R<0b01110000111011001>, ++ LSX_3R_DESC_BASE<"vsra.h", sra, LSX128HOpnd>; ++ ++def VSRA_W : LSX_3R<0b01110000111011010>, ++ LSX_3R_DESC_BASE<"vsra.w", sra, LSX128WOpnd>; ++ ++def VSRA_D : LSX_3R<0b01110000111011011>, ++ LSX_3R_DESC_BASE<"vsra.d", sra, LSX128DOpnd>; ++ ++ ++def VSRLR_B : LSX_3R<0b01110000111100000>, ++ LSX_3RN_DESC_BASE<"vsrlr.b", LSX128BOpnd>; ++ ++def VSRLR_H : LSX_3R<0b01110000111100001>, ++ LSX_3RN_DESC_BASE<"vsrlr.h", LSX128HOpnd>; ++ ++def VSRLR_W : LSX_3R<0b01110000111100010>, ++ LSX_3RN_DESC_BASE<"vsrlr.w", LSX128WOpnd>; ++ ++def VSRLR_D : LSX_3R<0b01110000111100011>, ++ LSX_3RN_DESC_BASE<"vsrlr.d", LSX128DOpnd>; ++ ++ ++def VSRAR_B : LSX_3R<0b01110000111100100>, ++ LSX_3RN_DESC_BASE<"vsrar.b", LSX128BOpnd>; ++ ++def VSRAR_H : LSX_3R<0b01110000111100101>, ++ LSX_3RN_DESC_BASE<"vsrar.h", LSX128HOpnd>; ++ ++def VSRAR_W : LSX_3R<0b01110000111100110>, ++ LSX_3RN_DESC_BASE<"vsrar.w", LSX128WOpnd>; ++ ++def VSRAR_D : LSX_3R<0b01110000111100111>, ++ LSX_3RN_DESC_BASE<"vsrar.d", LSX128DOpnd>; ++ ++ ++def VBITCLR_B : LSX_3R<0b01110001000011000>, ++ LSX_3R_DESC_BASE<"vbitclr.b", vbitclr_b, LSX128BOpnd>; ++ ++def VBITCLR_H : LSX_3R<0b01110001000011001>, ++ LSX_3R_DESC_BASE<"vbitclr.h", vbitclr_h, LSX128HOpnd>; ++ ++def VBITCLR_W : LSX_3R<0b01110001000011010>, ++ LSX_3R_DESC_BASE<"vbitclr.w", vbitclr_w, LSX128WOpnd>; ++ ++def VBITCLR_D : LSX_3R<0b01110001000011011>, ++ LSX_3R_DESC_BASE<"vbitclr.d", vbitclr_d, LSX128DOpnd>; ++ ++ ++def VBITSET_B : LSX_3R<0b01110001000011100>, ++ LSX_3RN_DESC_BASE<"vbitset.b", LSX128BOpnd>; ++ ++def VBITSET_H : LSX_3R<0b01110001000011101>, ++ LSX_3RN_DESC_BASE<"vbitset.h", LSX128HOpnd>; ++ ++def VBITSET_W : LSX_3R<0b01110001000011110>, ++ LSX_3RN_DESC_BASE<"vbitset.w", LSX128WOpnd>; ++ ++def VBITSET_D : LSX_3R<0b01110001000011111>, ++ LSX_3RN_DESC_BASE<"vbitset.d", LSX128DOpnd>; ++ ++ ++def VBITREV_B : LSX_3R<0b01110001000100000>, ++ LSX_3RN_DESC_BASE<"vbitrev.b", LSX128BOpnd>; ++ ++def VBITREV_H : LSX_3R<0b01110001000100001>, ++ LSX_3RN_DESC_BASE<"vbitrev.h", LSX128HOpnd>; ++ ++def VBITREV_W : LSX_3R<0b01110001000100010>, ++ LSX_3RN_DESC_BASE<"vbitrev.w", LSX128WOpnd>; ++ ++def VBITREV_D : LSX_3R<0b01110001000100011>, ++ LSX_3RN_DESC_BASE<"vbitrev.d", LSX128DOpnd>; ++ ++ ++def VPACKEV_B : LSX_3R<0b01110001000101100>, ++ LSX_3R_DESC_BASE<"vpackev.b", LoongArchVPACKEV, LSX128BOpnd>; ++ ++def VPACKEV_H : LSX_3R<0b01110001000101101>, ++ LSX_3R_DESC_BASE<"vpackev.h", LoongArchVPACKEV, LSX128HOpnd>; ++ ++def VPACKEV_W : LSX_3R<0b01110001000101110>, ++ LSX_3R_DESC_BASE<"vpackev.w", LoongArchVPACKEV, LSX128WOpnd>; ++ ++def VPACKEV_D : LSX_3R<0b01110001000101111>, ++ LSX_3R_DESC_BASE<"vpackev.d", LoongArchVPACKEV, LSX128DOpnd>; ++ ++ ++def VPACKOD_B : LSX_3R<0b01110001000110000>, ++ LSX_3R_DESC_BASE<"vpackod.b", LoongArchVPACKOD, LSX128BOpnd>; ++ ++def VPACKOD_H : LSX_3R<0b01110001000110001>, ++ LSX_3R_DESC_BASE<"vpackod.h", LoongArchVPACKOD, LSX128HOpnd>; ++ ++def VPACKOD_W : LSX_3R<0b01110001000110010>, ++ LSX_3R_DESC_BASE<"vpackod.w", LoongArchVPACKOD, LSX128WOpnd>; ++ ++def VPACKOD_D : LSX_3R<0b01110001000110011>, ++ LSX_3R_DESC_BASE<"vpackod.d", LoongArchVPACKOD, LSX128DOpnd>; ++ ++ ++def VILVL_B : LSX_3R<0b01110001000110100>, ++ LSX_3R_DESC_BASE<"vilvl.b", LoongArchVILVL, LSX128BOpnd>; ++ ++def VILVL_H : LSX_3R<0b01110001000110101>, ++ LSX_3R_DESC_BASE<"vilvl.h", LoongArchVILVL, LSX128HOpnd>; ++ ++def VILVL_W : LSX_3R<0b01110001000110110>, ++ LSX_3R_DESC_BASE<"vilvl.w", LoongArchVILVL, LSX128WOpnd>; ++ ++def VILVL_D : LSX_3R<0b01110001000110111>, ++ LSX_3R_DESC_BASE<"vilvl.d", LoongArchVILVL, LSX128DOpnd>; ++ ++ ++def VILVH_B : LSX_3R<0b01110001000111000>, ++ LSX_3R_DESC_BASE<"vilvh.b", LoongArchVILVH, LSX128BOpnd>; ++ ++def VILVH_H : LSX_3R<0b01110001000111001>, ++ LSX_3R_DESC_BASE<"vilvh.h", LoongArchVILVH, LSX128HOpnd>; ++ ++def VILVH_W : LSX_3R<0b01110001000111010>, ++ LSX_3R_DESC_BASE<"vilvh.w", LoongArchVILVH, LSX128WOpnd>; ++ ++def VILVH_D : LSX_3R<0b01110001000111011>, ++ LSX_3R_DESC_BASE<"vilvh.d", LoongArchVILVH, LSX128DOpnd>; ++ ++ ++def VPICKEV_B : LSX_3R<0b01110001000111100>, ++ LSX_3R_DESC_BASE<"vpickev.b", LoongArchVPICKEV, LSX128BOpnd>; ++ ++def VPICKEV_H : LSX_3R<0b01110001000111101>, ++ LSX_3R_DESC_BASE<"vpickev.h", LoongArchVPICKEV, LSX128HOpnd>; ++ ++def VPICKEV_W : LSX_3R<0b01110001000111110>, ++ LSX_3R_DESC_BASE<"vpickev.w", LoongArchVPICKEV, LSX128WOpnd>; ++ ++def VPICKEV_D : LSX_3R<0b01110001000111111>, ++ LSX_3R_DESC_BASE<"vpickev.d", LoongArchVPICKEV, LSX128DOpnd>; ++ ++ ++def VPICKOD_B : LSX_3R<0b01110001001000000>, ++ LSX_3R_DESC_BASE<"vpickod.b", LoongArchVPICKOD, LSX128BOpnd>; ++ ++def VPICKOD_H : LSX_3R<0b01110001001000001>, ++ LSX_3R_DESC_BASE<"vpickod.h", LoongArchVPICKOD, LSX128HOpnd>; ++ ++def VPICKOD_W : LSX_3R<0b01110001001000010>, ++ LSX_3R_DESC_BASE<"vpickod.w", LoongArchVPICKOD, LSX128WOpnd>; ++ ++def VPICKOD_D : LSX_3R<0b01110001001000011>, ++ LSX_3R_DESC_BASE<"vpickod.d", LoongArchVPICKOD, LSX128DOpnd>; ++ ++ ++def VREPLVE_B : LSX_3R_1GP<0b01110001001000100>, ++ LSX_3R_VREPLVE_DESC_BASE<"vreplve.b", vsplati8_elt, LSX128BOpnd>; ++ ++def VREPLVE_H : LSX_3R_1GP<0b01110001001000101>, ++ LSX_3R_VREPLVE_DESC_BASE<"vreplve.h", vsplati16_elt, LSX128HOpnd>; ++ ++def VREPLVE_W : LSX_3R_1GP<0b01110001001000110>, ++ LSX_3R_VREPLVE_DESC_BASE<"vreplve.w", vsplati32_elt, LSX128WOpnd>; ++ ++def VREPLVE_D : LSX_3R_1GP<0b01110001001000111>, ++ LSX_3R_VREPLVE_DESC_BASE<"vreplve.d", vsplati64_elt, LSX128DOpnd>; ++ ++ ++def VAND_V : LSX_3R<0b01110001001001100>, ++ LSX_VEC_DESC_BASE<"vand.v", and, LSX128BOpnd>; ++class AND_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++class AND_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++class AND_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++ ++def AND_V_H_PSEUDO : AND_V_H_PSEUDO_DESC, ++ PseudoInstExpansion<(VAND_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++def AND_V_W_PSEUDO : AND_V_W_PSEUDO_DESC, ++ PseudoInstExpansion<(VAND_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++def AND_V_D_PSEUDO : AND_V_D_PSEUDO_DESC, ++ PseudoInstExpansion<(VAND_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++ ++ ++def VOR_V : LSX_3R<0b01110001001001101>, ++ LSX_VEC_DESC_BASE<"vor.v", or, LSX128BOpnd>; ++class OR_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++class OR_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++class OR_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++ ++def OR_V_H_PSEUDO : OR_V_H_PSEUDO_DESC, ++ PseudoInstExpansion<(VOR_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++def OR_V_W_PSEUDO : OR_V_W_PSEUDO_DESC, ++ PseudoInstExpansion<(VOR_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++def OR_V_D_PSEUDO : OR_V_D_PSEUDO_DESC, ++ PseudoInstExpansion<(VOR_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++ ++ ++def VXOR_V : LSX_3R<0b01110001001001110>, ++ LSX_VEC_DESC_BASE<"vxor.v", xor, LSX128BOpnd>; ++class XOR_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++class XOR_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++class XOR_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++ ++def XOR_V_H_PSEUDO : XOR_V_H_PSEUDO_DESC, ++ PseudoInstExpansion<(VXOR_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++def XOR_V_W_PSEUDO : XOR_V_W_PSEUDO_DESC, ++ PseudoInstExpansion<(VXOR_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++def XOR_V_D_PSEUDO : XOR_V_D_PSEUDO_DESC, ++ PseudoInstExpansion<(VXOR_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++ ++ ++def VNOR_V : LSX_3R<0b01110001001001111>, ++ LSX_VEC_DESC_BASE<"vnor.v", LoongArchVNOR, LSX128BOpnd>; ++class NOR_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++class NOR_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++class NOR_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; ++ ++def NOR_V_H_PSEUDO : NOR_V_H_PSEUDO_DESC, ++ PseudoInstExpansion<(VNOR_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++def NOR_V_W_PSEUDO : NOR_V_W_PSEUDO_DESC, ++ PseudoInstExpansion<(VNOR_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++def NOR_V_D_PSEUDO : NOR_V_D_PSEUDO_DESC, ++ PseudoInstExpansion<(VNOR_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++ ++ ++def VFADD_S : LSX_3R<0b01110001001100001>, IsCommutable, ++ LSX_3RF_DESC_BASE<"vfadd.s", fadd, LSX128WOpnd>; ++ ++def VFADD_D : LSX_3R<0b01110001001100010>, IsCommutable, ++ LSX_3RF_DESC_BASE<"vfadd.d", fadd, LSX128DOpnd>; ++ ++ ++def VFSUB_S : LSX_3R<0b01110001001100101>, ++ LSX_3RF_DESC_BASE<"vfsub.s", fsub, LSX128WOpnd>; ++ ++def VFSUB_D : LSX_3R<0b01110001001100110>, ++ LSX_3RF_DESC_BASE<"vfsub.d", fsub, LSX128DOpnd>; ++ ++ ++def VFMUL_S : LSX_3R<0b01110001001110001>, ++ LSX_3RF_DESC_BASE<"vfmul.s", fmul, LSX128WOpnd>; ++ ++def VFMUL_D : LSX_3R<0b01110001001110010>, ++ LSX_3RF_DESC_BASE<"vfmul.d", fmul, LSX128DOpnd>; ++ ++ ++def VFDIV_S : LSX_3R<0b01110001001110101>, ++ LSX_3RF_DESC_BASE<"vfdiv.s", fdiv, LSX128WOpnd>; ++ ++def VFDIV_D : LSX_3R<0b01110001001110110>, ++ LSX_3RF_DESC_BASE<"vfdiv.d", fdiv, LSX128DOpnd>; ++ ++ ++def VFMAX_S : LSX_3R<0b01110001001111001>, ++ LSX_3RFN_DESC_BASE<"vfmax.s", LSX128WOpnd>; ++ ++def VFMAX_D : LSX_3R<0b01110001001111010>, ++ LSX_3RFN_DESC_BASE<"vfmax.d", LSX128DOpnd>; ++ ++ ++def VFMIN_S : LSX_3R<0b01110001001111101>, ++ LSX_3RFN_DESC_BASE<"vfmin.s", LSX128WOpnd>; ++ ++def VFMIN_D : LSX_3R<0b01110001001111110>, ++ LSX_3RFN_DESC_BASE<"vfmin.d", LSX128DOpnd>; ++ ++ ++def VFMAXA_S : LSX_3R<0b01110001010000001>, ++ LSX_3RFN_DESC_BASE<"vfmaxa.s", LSX128WOpnd>; ++ ++def VFMAXA_D : LSX_3R<0b01110001010000010>, ++ LSX_3RFN_DESC_BASE<"vfmaxa.d", LSX128DOpnd>; ++ ++ ++def VFMINA_S : LSX_3R<0b01110001010000101>, ++ LSX_3RFN_DESC_BASE<"vfmina.s", LSX128WOpnd>; ++ ++def VFMINA_D : LSX_3R<0b01110001010000110>, ++ LSX_3RFN_DESC_BASE<"vfmina.d", LSX128DOpnd>; ++ ++ ++def VSHUF_H : LSX_3R<0b01110001011110101>, ++ LSX_3R_VSHF_DESC_BASE<"vshuf.h", LSX128HOpnd>; ++ ++def VSHUF_W : LSX_3R<0b01110001011110110>, ++ LSX_3R_VSHF_DESC_BASE<"vshuf.w", LSX128WOpnd>; ++ ++def VSHUF_D : LSX_3R<0b01110001011110111>, ++ LSX_3R_VSHF_DESC_BASE<"vshuf.d", LSX128DOpnd>; ++ ++ ++def VSEQI_B : LSX_I5<0b01110010100000000>, ++ LSX_I5_DESC_BASE_Intrinsic<"vseqi.b", int_loongarch_lsx_vseqi_b, simm5_32, immSExt5, LSX128BOpnd>; ++ ++def VSEQI_H : LSX_I5<0b01110010100000001>, ++ LSX_I5_DESC_BASE_Intrinsic<"vseqi.h", int_loongarch_lsx_vseqi_h, simm5_32, immSExt5, LSX128HOpnd>; ++ ++def VSEQI_W : LSX_I5<0b01110010100000010>, ++ LSX_I5_DESC_BASE_Intrinsic<"vseqi.w", int_loongarch_lsx_vseqi_w, simm5_32, immSExt5, LSX128WOpnd>; ++ ++def VSEQI_D : LSX_I5<0b01110010100000011>, ++ LSX_I5_DESC_BASE_Intrinsic<"vseqi.d", int_loongarch_lsx_vseqi_d, simm5_32, immSExt5, LSX128DOpnd>; ++ ++ ++def VSLEI_B : LSX_I5<0b01110010100000100>, ++ LSX_I5_DESC_BASE_Intrinsic<"vslei.b", int_loongarch_lsx_vslei_b, simm5_32, immSExt5, LSX128BOpnd>; ++ ++def VSLEI_H : LSX_I5<0b01110010100000101>, ++ LSX_I5_DESC_BASE_Intrinsic<"vslei.h", int_loongarch_lsx_vslei_h, simm5_32, immSExt5, LSX128HOpnd>; ++ ++def VSLEI_W : LSX_I5<0b01110010100000110>, ++ LSX_I5_DESC_BASE_Intrinsic<"vslei.w", int_loongarch_lsx_vslei_w, simm5_32, immSExt5, LSX128WOpnd>; ++ ++def VSLEI_D : LSX_I5<0b01110010100000111>, ++ LSX_I5_DESC_BASE_Intrinsic<"vslei.d", int_loongarch_lsx_vslei_d, simm5_32, immSExt5, LSX128DOpnd>; ++ ++ ++def VSLEI_BU : LSX_I5_U<0b01110010100001000>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vslei.bu", int_loongarch_lsx_vslei_bu, uimm5, immZExt5, LSX128BOpnd>; ++ ++def VSLEI_HU : LSX_I5_U<0b01110010100001001>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vslei.hu", int_loongarch_lsx_vslei_hu, uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSLEI_WU : LSX_I5_U<0b01110010100001010>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vslei.wu", int_loongarch_lsx_vslei_wu, uimm5, immZExt5, LSX128WOpnd>; ++ ++def VSLEI_DU : LSX_I5_U<0b01110010100001011>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vslei.du", int_loongarch_lsx_vslei_du, uimm5, immZExt5, LSX128DOpnd>; ++ ++ ++def VSLTI_B : LSX_I5<0b01110010100001100>, ++ LSX_I5_DESC_BASE_Intrinsic<"vslti.b", int_loongarch_lsx_vslti_b, simm5_32, immSExt5, LSX128BOpnd>; ++ ++def VSLTI_H : LSX_I5<0b01110010100001101>, ++ LSX_I5_DESC_BASE_Intrinsic<"vslti.h", int_loongarch_lsx_vslti_h, simm5_32, immSExt5, LSX128HOpnd>; ++ ++def VSLTI_W : LSX_I5<0b01110010100001110>, ++ LSX_I5_DESC_BASE_Intrinsic<"vslti.w", int_loongarch_lsx_vslti_w, simm5_32, immSExt5, LSX128WOpnd>; ++ ++def VSLTI_D : LSX_I5<0b01110010100001111>, ++ LSX_I5_DESC_BASE_Intrinsic<"vslti.d", int_loongarch_lsx_vslti_d, simm5_32, immSExt5, LSX128DOpnd>; ++ ++ ++def VSLTI_BU : LSX_I5_U<0b01110010100010000>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vslti.bu", int_loongarch_lsx_vslti_bu, uimm5, immZExt5, LSX128BOpnd>; ++ ++def VSLTI_HU : LSX_I5_U<0b01110010100010001>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vslti.hu", int_loongarch_lsx_vslti_hu, uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSLTI_WU : LSX_I5_U<0b01110010100010010>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vslti.wu", int_loongarch_lsx_vslti_wu, uimm5, immZExt5, LSX128WOpnd>; ++ ++def VSLTI_DU : LSX_I5_U<0b01110010100010011>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vslti.du", int_loongarch_lsx_vslti_du, uimm5, immZExt5, LSX128DOpnd>; ++ ++ ++def VADDI_BU : LSX_I5_U<0b01110010100010100>, ++ LSX_I5_U_DESC_BASE<"vaddi.bu", add, vsplati8_uimm5, LSX128BOpnd>; ++ ++def VADDI_HU : LSX_I5_U<0b01110010100010101>, ++ LSX_I5_U_DESC_BASE<"vaddi.hu", add, vsplati16_uimm5, LSX128HOpnd>; ++ ++def VADDI_WU : LSX_I5_U<0b01110010100010110>, ++ LSX_I5_U_DESC_BASE<"vaddi.wu", add, vsplati32_uimm5, LSX128WOpnd>; ++ ++def VADDI_DU : LSX_I5_U<0b01110010100010111>, ++ LSX_I5_U_DESC_BASE<"vaddi.du", add, vsplati64_uimm5, LSX128DOpnd>; ++ ++ ++def VSUBI_BU : LSX_I5_U<0b01110010100011000>, ++ LSX_I5_U_DESC_BASE<"vsubi.bu", sub, vsplati8_uimm5, LSX128BOpnd>; ++ ++def VSUBI_HU : LSX_I5_U<0b01110010100011001>, ++ LSX_I5_U_DESC_BASE<"vsubi.hu", sub, vsplati16_uimm5, LSX128HOpnd>; ++ ++def VSUBI_WU : LSX_I5_U<0b01110010100011010>, ++ LSX_I5_U_DESC_BASE<"vsubi.wu", sub, vsplati32_uimm5, LSX128WOpnd>; ++ ++def VSUBI_DU : LSX_I5_U<0b01110010100011011>, ++ LSX_I5_U_DESC_BASE<"vsubi.du", sub, vsplati64_uimm5, LSX128DOpnd>; ++ ++ ++def VMAXI_B : LSX_I5<0b01110010100100000>, ++ LSX_I5_DESC_BASE_Intrinsic<"vmaxi.b", int_loongarch_lsx_vmaxi_b, simm5_32, immSExt5, LSX128BOpnd>; ++ ++def VMAXI_H : LSX_I5<0b01110010100100001>, ++ LSX_I5_DESC_BASE_Intrinsic<"vmaxi.h", int_loongarch_lsx_vmaxi_h, simm5_32, immSExt5, LSX128HOpnd>; ++ ++def VMAXI_W : LSX_I5<0b01110010100100010>, ++ LSX_I5_DESC_BASE_Intrinsic<"vmaxi.w", int_loongarch_lsx_vmaxi_w, simm5_32, immSExt5, LSX128WOpnd>; ++ ++def VMAXI_D : LSX_I5<0b01110010100100011>, ++ LSX_I5_DESC_BASE_Intrinsic<"vmaxi.d", int_loongarch_lsx_vmaxi_d, simm5_32, immSExt5, LSX128DOpnd>; ++ ++ ++def VMINI_B : LSX_I5<0b01110010100100100>, ++ LSX_I5_DESC_BASE_Intrinsic<"vmini.b", int_loongarch_lsx_vmini_b, simm5_32, immSExt5, LSX128BOpnd>; ++ ++def VMINI_H : LSX_I5<0b01110010100100101>, ++ LSX_I5_DESC_BASE_Intrinsic<"vmini.h", int_loongarch_lsx_vmini_h, simm5_32, immSExt5, LSX128HOpnd>; ++ ++def VMINI_W : LSX_I5<0b01110010100100110>, ++ LSX_I5_DESC_BASE_Intrinsic<"vmini.w", int_loongarch_lsx_vmini_w, simm5_32, immSExt5, LSX128WOpnd>; ++ ++def VMINI_D : LSX_I5<0b01110010100100111>, ++ LSX_I5_DESC_BASE_Intrinsic<"vmini.d", int_loongarch_lsx_vmini_d, simm5_32, immSExt5, LSX128DOpnd>; ++ ++ ++def VMAXI_BU : LSX_I5_U<0b01110010100101000>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.bu", int_loongarch_lsx_vmaxi_bu, uimm5, immZExt5, LSX128BOpnd>; ++ ++def VMAXI_HU : LSX_I5_U<0b01110010100101001>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.hu", int_loongarch_lsx_vmaxi_hu, uimm5, immZExt5, LSX128HOpnd>; ++ ++def VMAXI_WU : LSX_I5_U<0b01110010100101010>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.wu", int_loongarch_lsx_vmaxi_wu, uimm5, immZExt5, LSX128WOpnd>; ++ ++def VMAXI_DU : LSX_I5_U<0b01110010100101011>, ++ LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.du", int_loongarch_lsx_vmaxi_du, uimm5, immZExt5, LSX128DOpnd>; ++ ++ ++def VMINI_BU : LSX_I5_U<0b01110010100101100>, ++ LSX_I5_U_DESC_BASE<"vmini.bu", umin, vsplati8_uimm5, LSX128BOpnd>; ++ ++def VMINI_HU : LSX_I5_U<0b01110010100101101>, ++ LSX_I5_U_DESC_BASE<"vmini.hu", umin, vsplati16_uimm5, LSX128HOpnd>; ++ ++def VMINI_WU : LSX_I5_U<0b01110010100101110>, ++ LSX_I5_U_DESC_BASE<"vmini.wu", umin, vsplati32_uimm5, LSX128WOpnd>; ++ ++def VMINI_DU : LSX_I5_U<0b01110010100101111>, ++ LSX_I5_U_DESC_BASE<"vmini.du", umin, vsplati64_uimm5, LSX128DOpnd>; ++ ++ ++def VCLO_B : LSX_2R<0b0111001010011100000000>, ++ LSX_2RN_DESC_BASE<"vclo.b", LSX128BOpnd>; ++ ++def VCLO_H : LSX_2R<0b0111001010011100000001>, ++ LSX_2RN_DESC_BASE<"vclo.h", LSX128HOpnd>; ++ ++def VCLO_W : LSX_2R<0b0111001010011100000010>, ++ LSX_2RN_DESC_BASE<"vclo.w", LSX128WOpnd>; ++ ++def VCLO_D : LSX_2R<0b0111001010011100000011>, ++ LSX_2RN_DESC_BASE<"vclo.d", LSX128DOpnd>; ++ ++ ++def VCLZ_B : LSX_2R<0b0111001010011100000100>, ++ LSX_2R_DESC_BASE<"vclz.b", ctlz, LSX128BOpnd>; ++ ++def VCLZ_H : LSX_2R<0b0111001010011100000101>, ++ LSX_2R_DESC_BASE<"vclz.h", ctlz, LSX128HOpnd>; ++ ++def VCLZ_W : LSX_2R<0b0111001010011100000110>, ++ LSX_2R_DESC_BASE<"vclz.w", ctlz, LSX128WOpnd>; ++ ++def VCLZ_D : LSX_2R<0b0111001010011100000111>, ++ LSX_2R_DESC_BASE<"vclz.d", ctlz, LSX128DOpnd>; ++ ++ ++def VPCNT_B : LSX_2R<0b0111001010011100001000>, ++ LSX_2R_DESC_BASE<"vpcnt.b", ctpop, LSX128BOpnd>; ++ ++def VPCNT_H : LSX_2R<0b0111001010011100001001>, ++ LSX_2R_DESC_BASE<"vpcnt.h", ctpop, LSX128HOpnd>; ++ ++def VPCNT_W : LSX_2R<0b0111001010011100001010>, ++ LSX_2R_DESC_BASE<"vpcnt.w", ctpop, LSX128WOpnd>; ++ ++def VPCNT_D : LSX_2R<0b0111001010011100001011>, ++ LSX_2R_DESC_BASE<"vpcnt.d", ctpop, LSX128DOpnd>; ++ ++ ++def VFLOGB_S : LSX_2R<0b0111001010011100110001>, ++ LSX_2RFN_DESC_BASE<"vflogb.s", LSX128WOpnd>; ++ ++def VFLOGB_D : LSX_2R<0b0111001010011100110010>, ++ LSX_2RFN_DESC_BASE<"vflogb.d", LSX128DOpnd>; ++ ++ ++def VFCLASS_S : LSX_2R<0b0111001010011100110101>, ++ LSX_2RFN_DESC_BASE<"vfclass.s", LSX128WOpnd>; ++ ++def VFCLASS_D : LSX_2R<0b0111001010011100110110>, ++ LSX_2RFN_DESC_BASE<"vfclass.d", LSX128DOpnd>; ++ ++ ++def VFSQRT_S : LSX_2R<0b0111001010011100111001>, ++ LSX_2RF_DESC_BASE<"vfsqrt.s", fsqrt, LSX128WOpnd>; ++ ++def VFSQRT_D : LSX_2R<0b0111001010011100111010>, ++ LSX_2RF_DESC_BASE<"vfsqrt.d", fsqrt, LSX128DOpnd>; ++ ++ ++def VFRECIP_S : LSX_2R<0b0111001010011100111101>, ++ LSX_2RFN_DESC_BASE<"vfrecip.s", LSX128WOpnd>; ++ ++def VFRECIP_D : LSX_2R<0b0111001010011100111110>, ++ LSX_2RFN_DESC_BASE<"vfrecip.d", LSX128DOpnd>; ++ ++ ++def VFRSQRT_S : LSX_2R<0b0111001010011101000001>, ++ LSX_2RFN_DESC_BASE<"vfrsqrt.s", LSX128WOpnd>; ++ ++def VFRSQRT_D : LSX_2R<0b0111001010011101000010>, ++ LSX_2RFN_DESC_BASE<"vfrsqrt.d", LSX128DOpnd>; ++ ++ ++def VFRINT_S : LSX_2R<0b0111001010011101001101>, ++ LSX_2RF_DESC_BASE<"vfrint.s", frint, LSX128WOpnd>; ++ ++def VFRINT_D : LSX_2R<0b0111001010011101001110>, ++ LSX_2RF_DESC_BASE<"vfrint.d", frint, LSX128DOpnd>; ++ ++ ++def VFCVTL_S_H : LSX_2R<0b0111001010011101111010>, ++ LSX_2RFN_DESC_BASE_CVT<"vfcvtl.s.h", LSX128WOpnd, LSX128HOpnd>; ++ ++def VFCVTH_S_H : LSX_2R<0b0111001010011101111011>, ++ LSX_2RFN_DESC_BASE_CVT<"vfcvth.s.h", LSX128WOpnd, LSX128HOpnd>; ++ ++ ++def VFCVTL_D_S : LSX_2R<0b0111001010011101111100>, ++ LSX_2RFN_DESC_BASE_CVT<"vfcvtl.d.s", LSX128DOpnd, LSX128WOpnd>; ++ ++def VFCVTH_D_S : LSX_2R<0b0111001010011101111101>, ++ LSX_2RFN_DESC_BASE_CVT<"vfcvth.d.s", LSX128DOpnd, LSX128WOpnd>; ++ ++ ++def VFFINT_S_W : LSX_2R<0b0111001010011110000000>, ++ LSX_2RF_DESC_BASE<"vffint.s.w", sint_to_fp, LSX128WOpnd>; ++ ++def VFFINT_S_WU : LSX_2R<0b0111001010011110000001>, ++ LSX_2RF_DESC_BASE<"vffint.s.wu", uint_to_fp, LSX128WOpnd>; ++ ++ ++def VFFINT_D_L : LSX_2R<0b0111001010011110000010>, ++ LSX_2RF_DESC_BASE<"vffint.d.l", sint_to_fp, LSX128DOpnd>; ++ ++def VFFINT_D_LU : LSX_2R<0b0111001010011110000011>, ++ LSX_2RF_DESC_BASE<"vffint.d.lu", uint_to_fp, LSX128DOpnd>; ++ ++ ++def VFTINT_W_S : LSX_2R<0b0111001010011110001100>, ++ LSX_2RFN_DESC_BASE<"vftint.w.s", LSX128WOpnd>; ++ ++def VFTINT_L_D : LSX_2R<0b0111001010011110001101>, ++ LSX_2RFN_DESC_BASE<"vftint.l.d", LSX128DOpnd>; ++ ++ ++def VFTINT_WU_S : LSX_2R<0b0111001010011110010110>, ++ LSX_2RFN_DESC_BASE<"vftint.wu.s", LSX128WOpnd>; ++ ++def VFTINT_LU_D : LSX_2R<0b0111001010011110010111>, ++ LSX_2RFN_DESC_BASE<"vftint.lu.d", LSX128DOpnd>; ++ ++ ++def VFTINTRZ_WU_S : LSX_2R<0b0111001010011110011100>, ++ LSX_2RF_DESC_BASE<"vftintrz.wu.s", fp_to_uint, LSX128WOpnd>; ++ ++def VFTINTRZ_LU_D : LSX_2R<0b0111001010011110011101>, ++ LSX_2RF_DESC_BASE<"vftintrz.lu.d", fp_to_uint, LSX128DOpnd>; ++ ++ ++def VREPLGR2VR_B : LSX_2R_1GP<0b0111001010011111000000>, ++ LSX_2R_REPL_DESC_BASE<"vreplgr2vr.b", v16i8, vsplati8, LSX128BOpnd, GPR32Opnd>; ++ ++def VREPLGR2VR_H : LSX_2R_1GP<0b0111001010011111000001>, ++ LSX_2R_REPL_DESC_BASE<"vreplgr2vr.h", v8i16, vsplati16, LSX128HOpnd, GPR32Opnd>; ++ ++def VREPLGR2VR_W : LSX_2R_1GP<0b0111001010011111000010>, ++ LSX_2R_REPL_DESC_BASE<"vreplgr2vr.w", v4i32, vsplati32, LSX128WOpnd, GPR32Opnd>; ++ ++def VREPLGR2VR_D : LSX_2R_1GP<0b0111001010011111000011>, ++ LSX_2R_REPL_DESC_BASE<"vreplgr2vr.d", v2i64, vsplati64, LSX128DOpnd, GPR64Opnd>; ++ ++ ++class LSX_2R_FILL_PSEUDO_BASE : ++ LSXPseudo<(outs RCVD:$vd), (ins RCVS:$fs), ++ [(set RCVD:$vd, (OpNode RCVS:$fs))]> { ++ let usesCustomInserter = 1; ++} ++ ++class FILL_FW_PSEUDO_DESC : LSX_2R_FILL_PSEUDO_BASE; ++class FILL_FD_PSEUDO_DESC : LSX_2R_FILL_PSEUDO_BASE; ++ ++def FILL_FW_PSEUDO : FILL_FW_PSEUDO_DESC; ++def FILL_FD_PSEUDO : FILL_FD_PSEUDO_DESC; ++ ++ ++def VSRLRI_B : LSX_I3_U<0b0111001010100100001>, ++ LSX_BIT_3N_DESC_BASE<"vsrlri.b", uimm3, immZExt3, LSX128BOpnd>; ++ ++def VSRLRI_H : LSX_I4_U<0b011100101010010001>, ++ LSX_BIT_4N_DESC_BASE<"vsrlri.h", uimm4, immZExt4, LSX128HOpnd>; ++ ++def VSRLRI_W : LSX_I5_U<0b01110010101001001>, ++ LSX_BIT_5N_DESC_BASE<"vsrlri.w", uimm5, immZExt5, LSX128WOpnd>; ++ ++def VSRLRI_D : LSX_I6_U<0b0111001010100101>, ++ LSX_BIT_6N_DESC_BASE<"vsrlri.d", uimm6, immZExt6, LSX128DOpnd>; ++ ++ ++def VSRARI_B : LSX_I3_U<0b0111001010101000001>, ++ LSX_BIT_3N_DESC_BASE<"vsrari.b", uimm3, immZExt3, LSX128BOpnd>; ++ ++def VSRARI_H : LSX_I4_U<0b011100101010100001>, ++ LSX_BIT_4N_DESC_BASE<"vsrari.h", uimm4, immZExt4, LSX128HOpnd>; ++ ++def VSRARI_W : LSX_I5_U<0b01110010101010001>, ++ LSX_BIT_5N_DESC_BASE<"vsrari.w", uimm5, immZExt5, LSX128WOpnd>; ++ ++def VSRARI_D : LSX_I6_U<0b0111001010101001>, ++ LSX_BIT_6N_DESC_BASE<"vsrari.d", uimm6, immZExt6, LSX128DOpnd>; ++ ++ ++def VINSGR2VR_B : LSX_I4_R_U<0b011100101110101110>, ++ LSX_INSERT_U4_DESC_BASE<"vinsgr2vr.b", vinsert_v16i8, uimm4, immZExt4Ptr, LSX128BOpnd, GPR32Opnd>; ++ ++def VINSGR2VR_H : LSX_I3_R_U<0b0111001011101011110>, ++ LSX_INSERT_U3_DESC_BASE<"vinsgr2vr.h", vinsert_v8i16, uimm3, immZExt3Ptr, LSX128HOpnd, GPR32Opnd>; ++ ++def VINSGR2VR_W : LSX_I2_R_U<0b01110010111010111110>, ++ LSX_INSERT_U2_DESC_BASE<"vinsgr2vr.w", vinsert_v4i32, uimm2, immZExt2Ptr, LSX128WOpnd, GPR32Opnd>; ++ ++def VINSGR2VR_D : LSX_I1_R_U<0b011100101110101111110>, ++ LSX_INSERT_U1_DESC_BASE<"vinsgr2vr.d", vinsert_v2i64, uimm1, immZExt1Ptr, LSX128DOpnd, GPR64Opnd>; ++ ++ ++def VPICKVE2GR_B : LSX_ELM_COPY_B<0b011100101110111110>, ++ LSX_PICK_U4_DESC_BASE<"vpickve2gr.b", vextract_sext_i8, v16i8, uimm4_ptr, immZExt4Ptr, GPR32Opnd, LSX128BOpnd>; ++ ++def VPICKVE2GR_H : LSX_ELM_COPY_H<0b0111001011101111110>, ++ LSX_PICK_U3_DESC_BASE<"vpickve2gr.h", vextract_sext_i16, v8i16, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LSX128HOpnd>; ++ ++def VPICKVE2GR_W : LSX_ELM_COPY_W<0b01110010111011111110>, ++ LSX_PICK_U2_DESC_BASE<"vpickve2gr.w", vextract_sext_i32, v4i32, uimm2_ptr, immZExt2Ptr, GPR32Opnd, LSX128WOpnd>; ++ ++def VPICKVE2GR_D : LSX_ELM_COPY_D<0b011100101110111111110>, ++ LSX_PICK_U1_DESC_BASE<"vpickve2gr.d", vextract_sext_i64, v2i64, uimm1_ptr, immZExt1Ptr, GPR64Opnd, LSX128DOpnd>; ++ ++ ++def VPICKVE2GR_BU : LSX_ELM_COPY_B<0b011100101111001110>, ++ LSX_PICK_U4_DESC_BASE<"vpickve2gr.bu", vextract_zext_i8, v16i8, uimm4_ptr, immZExt4Ptr, GPR32Opnd, LSX128BOpnd>; ++ ++def VPICKVE2GR_HU : LSX_ELM_COPY_H<0b0111001011110011110>, ++ LSX_PICK_U3_DESC_BASE<"vpickve2gr.hu", vextract_zext_i16, v8i16, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LSX128HOpnd>; ++ ++def VPICKVE2GR_WU : LSX_ELM_COPY_W<0b01110010111100111110>, ++ LSX_PICK_U2_DESC_BASE<"vpickve2gr.wu", vextract_zext_i32, v4i32, uimm2_ptr, immZExt2Ptr, GPR32Opnd, LSX128WOpnd>; ++ ++def VPICKVE2GR_DU : LSX_ELM_COPY_D<0b011100101111001111110>, ++ LSX_PICK_U1_DESC_BASE<"vpickve2gr.du", int_loongarch_lsx_vpickve2gr_du, v2i64, uimm1, immZExt1, GPR64Opnd, LSX128DOpnd>; ++ ++ ++def : LSXPat<(vextract_zext_i64 (v2i64 LSX128D:$vj), immZExt1Ptr:$idx), ++ (VPICKVE2GR_D LSX128D:$vj, immZExt1:$idx)>; ++def : LSXPat<(vextract_zext_i64 (v2f64 LSX128D:$vj), immZExt1Ptr:$idx), ++ (VPICKVE2GR_D LSX128D:$vj, immZExt1:$idx)>; ++ ++ ++def VREPLVEI_B : LSX_I4_U<0b011100101111011110>, ++ LSX_ELM_U4_VREPLVE_DESC_BASE<"vreplvei.b", vsplati8_uimm4, LSX128BOpnd>; ++ ++def VREPLVEI_H : LSX_I3_U<0b0111001011110111110>, ++ LSX_ELM_U3_VREPLVE_DESC_BASE<"vreplvei.h", vsplati16_uimm3, LSX128HOpnd>; ++ ++def VREPLVEI_W : LSX_I2_U<0b01110010111101111110>, ++ LSX_ELM_U2_VREPLVE_DESC_BASE<"vreplvei.w", vsplati32_uimm2, LSX128WOpnd>; ++ ++def VREPLVEI_D : LSX_I1_U<0b011100101111011111110>, ++ LSX_ELM_U1_VREPLVE_DESC_BASE<"vreplvei.d", vsplati64_uimm1, LSX128DOpnd>; ++ ++ ++def VSAT_B : LSX_I3_U<0b0111001100100100001>, ++ LSX_BIT_3N_DESC_BASE<"vsat.b", uimm3, immZExt3, LSX128BOpnd>; ++ ++def VSAT_H : LSX_I4_U<0b011100110010010001>, ++ LSX_BIT_4N_DESC_BASE<"vsat.h", uimm4, immZExt4, LSX128HOpnd>; ++ ++def VSAT_W : LSX_I5_U<0b01110011001001001>, ++ LSX_BIT_5N_DESC_BASE<"vsat.w", uimm5, immZExt5, LSX128WOpnd>; ++ ++def VSAT_D : LSX_I6_U<0b0111001100100101>, ++ LSX_BIT_6N_DESC_BASE<"vsat.d", uimm6, immZExt6, LSX128DOpnd>; ++ ++ ++def VSAT_BU : LSX_I3_U<0b0111001100101000001>, ++ LSX_BIT_3N_DESC_BASE<"vsat.bu", uimm3, immZExt3, LSX128BOpnd>; ++ ++def VSAT_HU : LSX_I4_U<0b011100110010100001>, ++ LSX_BIT_4N_DESC_BASE<"vsat.hu", uimm4, immZExt4, LSX128HOpnd>; ++ ++def VSAT_WU : LSX_I5_U<0b01110011001010001>, ++ LSX_BIT_5N_DESC_BASE<"vsat.wu", uimm5, immZExt5, LSX128WOpnd>; ++ ++def VSAT_DU : LSX_I6_U<0b0111001100101001>, ++ LSX_BIT_6N_DESC_BASE<"vsat.du", uimm6, immZExt6, LSX128DOpnd>; ++ ++ ++def VSLLI_B : LSX_I3_U<0b0111001100101100001>, ++ LSX_BIT_U3_VREPLVE_DESC_BASE<"vslli.b", shl, vsplati8_uimm3, LSX128BOpnd>; ++ ++def VSLLI_H : LSX_I4_U<0b011100110010110001>, ++ LSX_BIT_U4_VREPLVE_DESC_BASE<"vslli.h", shl, vsplati16_uimm4, LSX128HOpnd>; ++ ++def VSLLI_W : LSX_I5_U<0b01110011001011001>, ++ LSX_BIT_U5_VREPLVE_DESC_BASE<"vslli.w", shl, vsplati32_uimm5, LSX128WOpnd>; ++ ++def VSLLI_D : LSX_I6_U<0b0111001100101101>, ++ LSX_BIT_U6_VREPLVE_DESC_BASE<"vslli.d", shl, vsplati64_uimm6, LSX128DOpnd>; ++ ++ ++def VSRLI_B : LSX_I3_U<0b0111001100110000001>, ++ LSX_BIT_U3_VREPLVE_DESC_BASE<"vsrli.b", srl, vsplati8_uimm3, LSX128BOpnd>; ++ ++def VSRLI_H : LSX_I4_U<0b011100110011000001>, ++ LSX_BIT_U4_VREPLVE_DESC_BASE<"vsrli.h", srl, vsplati16_uimm4, LSX128HOpnd>; ++ ++def VSRLI_W : LSX_I5_U<0b01110011001100001>, ++ LSX_BIT_U5_VREPLVE_DESC_BASE<"vsrli.w", srl, vsplati32_uimm5, LSX128WOpnd>; ++ ++def VSRLI_D : LSX_I6_U<0b0111001100110001>, ++ LSX_BIT_U6_VREPLVE_DESC_BASE<"vsrli.d", srl, vsplati64_uimm6, LSX128DOpnd>; ++ ++ ++def VSRAI_B : LSX_I3_U<0b0111001100110100001>, ++ LSX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"vsrai.b", int_loongarch_lsx_vsrai_b, LSX128BOpnd>; ++ ++def VSRAI_H : LSX_I4_U<0b011100110011010001>, ++ LSX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"vsrai.h", int_loongarch_lsx_vsrai_h, LSX128HOpnd>; ++ ++def VSRAI_W : LSX_I5_U<0b01110011001101001>, ++ LSX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"vsrai.w", int_loongarch_lsx_vsrai_w, LSX128WOpnd>; ++ ++def VSRAI_D : LSX_I6_U<0b0111001100110101>, ++ LSX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"vsrai.d", int_loongarch_lsx_vsrai_d, LSX128DOpnd>; ++ ++ ++def VSHUF4I_B : LSX_I8_U<0b01110011100100>, ++ LSX_I8_SHF_DESC_BASE<"vshuf4i.b", LSX128BOpnd>; ++ ++def VSHUF4I_H : LSX_I8_U<0b01110011100101>, ++ LSX_I8_SHF_DESC_BASE<"vshuf4i.h", LSX128HOpnd>; ++ ++def VSHUF4I_W : LSX_I8_U<0b01110011100110>, ++ LSX_I8_SHF_DESC_BASE<"vshuf4i.w", LSX128WOpnd>; ++ ++def VSHUF4I_D : LSX_I8_U<0b01110011100111>, ++ LSX_I8_SHUF_DESC_BASE_D<"vshuf4i.d", int_loongarch_lsx_vshuf4i_d, LSX128DOpnd>; ++ ++ ++def VROTR_B : LSX_3R<0b01110000111011100>, ++ LSX_3R_DESC_BASE<"vrotr.b", LoongArchVROR, LSX128BOpnd>; ++ ++def VROTR_H : LSX_3R<0b01110000111011101>, ++ LSX_3R_DESC_BASE<"vrotr.h", LoongArchVROR, LSX128HOpnd>; ++ ++def VROTR_W : LSX_3R<0b01110000111011110>, ++ LSX_3R_DESC_BASE<"vrotr.w", LoongArchVROR, LSX128WOpnd>; ++ ++def VROTR_D : LSX_3R<0b01110000111011111>, ++ LSX_3R_DESC_BASE<"vrotr.d", LoongArchVROR, LSX128DOpnd>; ++ ++ ++def VMSKLTZ_B : LSX_2R<0b0111001010011100010000>, ++ LSX_2RN_DESC_BASE<"vmskltz.b", LSX128BOpnd>; ++ ++def VMSKLTZ_H : LSX_2R<0b0111001010011100010001>, ++ LSX_2RN_DESC_BASE<"vmskltz.h", LSX128HOpnd>; ++ ++def VMSKLTZ_W : LSX_2R<0b0111001010011100010010>, ++ LSX_2RN_DESC_BASE<"vmskltz.w", LSX128WOpnd>; ++ ++def VMSKLTZ_D : LSX_2R<0b0111001010011100010011>, ++ LSX_2RN_DESC_BASE<"vmskltz.d", LSX128DOpnd>; ++ ++ ++def VROTRI_B : LSX_I3_U<0b0111001010100000001>, ++ LSX2_RORI_U3_DESC_BASE<"vrotri.b", uimm3, immZExt3, LSX128BOpnd>; ++ ++def VROTRI_H : LSX_I4_U<0b011100101010000001>, ++ LSX2_RORI_U4_DESC_BASE<"vrotri.h", uimm4, immZExt4, LSX128HOpnd>; ++ ++def VROTRI_W : LSX_I5_U<0b01110010101000001>, ++ LSX2_RORI_U5_DESC_BASE<"vrotri.w", uimm5, immZExt5, LSX128WOpnd>; ++ ++def VROTRI_D : LSX_I6_U<0b0111001010100001>, ++ LSX2_RORI_U6_DESC_BASE<"vrotri.d", uimm6, immZExt6, LSX128DOpnd>; ++ ++ ++def VSRLNI_B_H : LSX_I4_U<0b011100110100000001>, ++ LSX_BIND_U4N_DESC_BASE<"vsrlni.b.h", uimm4, immZExt4, LSX128BOpnd>; ++ ++def VSRLNI_H_W : LSX_I5_U<0b01110011010000001>, ++ LSX_BIND_U5N_DESC_BASE<"vsrlni.h.w", uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSRLNI_W_D : LSX_I6_U<0b0111001101000001>, ++ LSX_BIND_U6N_DESC_BASE<"vsrlni.w.d", uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSRLNI_D_Q : LSX_I7_U<0b011100110100001>, ++ LSX_BIND_U7N_DESC_BASE<"vsrlni.d.q", LSX128DOpnd>; ++ ++ ++def VSRLRNI_B_H : LSX_I4_U<0b011100110100010001>, ++ LSX_BIND_U4N_DESC_BASE<"vsrlrni.b.h", uimm4, immZExt4, LSX128BOpnd>; ++ ++def VSRLRNI_H_W : LSX_I5_U<0b01110011010001001>, ++ LSX_BIND_U5N_DESC_BASE<"vsrlrni.h.w", uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSRLRNI_W_D : LSX_I6_U<0b0111001101000101>, ++ LSX_BIND_U6N_DESC_BASE<"vsrlrni.w.d", uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSRLRNI_D_Q : LSX_I7_U<0b011100110100011>, ++ LSX_BIND_U7N_DESC_BASE<"vsrlrni.d.q", LSX128DOpnd>; ++ ++ ++def VSSRLNI_B_H : LSX_I4_U<0b011100110100100001>, ++ LSX_BIND_U4N_DESC_BASE<"vssrlni.b.h", uimm4, immZExt4, LSX128BOpnd>; ++ ++def VSSRLNI_H_W : LSX_I5_U<0b01110011010010001>, ++ LSX_BIND_U5N_DESC_BASE<"vssrlni.h.w", uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSSRLNI_W_D : LSX_I6_U<0b0111001101001001>, ++ LSX_BIND_U6N_DESC_BASE<"vssrlni.w.d", uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSSRLNI_D_Q : LSX_I7_U<0b011100110100101>, ++ LSX_BIND_U7N_DESC_BASE<"vssrlni.d.q", LSX128DOpnd>; ++ ++ ++def VSSRLNI_BU_H : LSX_I4_U<0b011100110100110001>, ++ LSX_BIND_U4N_DESC_BASE<"vssrlni.bu.h", uimm4, immZExt4, LSX128BOpnd> ; ++ ++def VSSRLNI_HU_W : LSX_I5_U<0b01110011010011001>, ++ LSX_BIND_U5N_DESC_BASE<"vssrlni.hu.w", uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSSRLNI_WU_D : LSX_I6_U<0b0111001101001101>, ++ LSX_BIND_U6N_DESC_BASE<"vssrlni.wu.d", uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSSRLNI_DU_Q : LSX_I7_U<0b011100110100111>, ++ LSX_BIND_U7N_DESC_BASE<"vssrlni.du.q", LSX128DOpnd>; ++ ++ ++def VSSRLRNI_BU_H : LSX_I4_U<0b011100110101010001>, ++ LSX_BIND_U4N_DESC_BASE<"vssrlrni.bu.h", uimm4, immZExt4, LSX128BOpnd>; ++ ++def VSSRLRNI_HU_W : LSX_I5_U<0b01110011010101001>, ++ LSX_BIND_U5N_DESC_BASE<"vssrlrni.hu.w", uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSSRLRNI_WU_D : LSX_I6_U<0b0111001101010101>, ++ LSX_BIND_U6N_DESC_BASE<"vssrlrni.wu.d", uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSSRLRNI_DU_Q : LSX_I7_U<0b011100110101011>, ++ LSX_BIND_U7N_DESC_BASE<"vssrlrni.du.q", LSX128DOpnd>; ++ ++ ++def VSRARNI_B_H : LSX_I4_U<0b011100110101110001>, ++ LSX_BIND_U4N_DESC_BASE<"vsrarni.b.h", uimm4, immZExt4, LSX128BOpnd>; ++ ++def VSRARNI_H_W : LSX_I5_U<0b01110011010111001>, ++ LSX_BIND_U5N_DESC_BASE<"vsrarni.h.w", uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSRARNI_W_D : LSX_I6_U<0b0111001101011101>, ++ LSX_BIND_U6N_DESC_BASE<"vsrarni.w.d", uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSRARNI_D_Q : LSX_I7_U<0b011100110101111>, ++ LSX_BIND_U7N_DESC_BASE<"vsrarni.d.q", LSX128DOpnd>; ++ ++ ++def VSSRANI_B_H : LSX_I4_U<0b011100110110000001>, ++ LSX_BIND_U4N_DESC_BASE<"vssrani.b.h", uimm4, immZExt4, LSX128BOpnd>; ++ ++def VSSRANI_H_W : LSX_I5_U<0b01110011011000001>, ++ LSX_BIND_U5N_DESC_BASE<"vssrani.h.w", uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSSRANI_W_D : LSX_I6_U<0b0111001101100001>, ++ LSX_BIND_U6N_DESC_BASE<"vssrani.w.d", uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSSRANI_D_Q : LSX_I7_U<0b011100110110001>, ++ LSX_BIND_U7N_DESC_BASE<"vssrani.d.q", LSX128DOpnd>; ++ ++ ++def VSSRANI_BU_H : LSX_I4_U<0b011100110110010001>, ++ LSX_BIND_U4N_DESC_BASE<"vssrani.bu.h", uimm4, immZExt4, LSX128BOpnd>; ++ ++def VSSRANI_HU_W : LSX_I5_U<0b01110011011001001>, ++ LSX_BIND_U5N_DESC_BASE<"vssrani.hu.w", uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSSRANI_WU_D : LSX_I6_U<0b0111001101100101>, ++ LSX_BIND_U6N_DESC_BASE<"vssrani.wu.d", uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSSRANI_DU_Q : LSX_I7_U<0b011100110110011>, ++ LSX_BIND_U7N_DESC_BASE<"vssrani.du.q", LSX128DOpnd>; ++ ++ ++def VSSRARNI_B_H : LSX_I4_U<0b011100110110100001>, ++ LSX_BIND_U4N_DESC_BASE<"vssrarni.b.h", uimm4, immZExt4, LSX128BOpnd>; ++ ++def VSSRARNI_H_W : LSX_I5_U<0b01110011011010001>, ++ LSX_BIND_U5N_DESC_BASE<"vssrarni.h.w", uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSSRARNI_W_D : LSX_I6_U<0b0111001101101001>, ++ LSX_BIND_U6N_DESC_BASE<"vssrarni.w.d", uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSSRARNI_D_Q : LSX_I7_U<0b011100110110101>, ++ LSX_BIND_U7N_DESC_BASE<"vssrarni.d.q", LSX128DOpnd>; ++ ++ ++def VSSRARNI_BU_H : LSX_I4_U<0b011100110110110001>, ++ LSX_BIND_U4N_DESC_BASE<"vssrarni.bu.h", uimm4, immZExt4, LSX128BOpnd>; ++ ++def VSSRARNI_HU_W : LSX_I5_U<0b01110011011011001>, ++ LSX_BIND_U5N_DESC_BASE<"vssrarni.hu.w", uimm5, immZExt5, LSX128HOpnd>; ++ ++def VSSRARNI_WU_D : LSX_I6_U<0b0111001101101101>, ++ LSX_BIND_U6N_DESC_BASE<"vssrarni.wu.d", uimm6, immZExt6, LSX128WOpnd>; ++ ++def VSSRARNI_DU_Q : LSX_I7_U<0b011100110110111>, ++ LSX_BIND_U7N_DESC_BASE<"vssrarni.du.q", LSX128DOpnd>; ++ ++ ++ ++def VLD : LSX_I12_S<0b0010110000>, ++ LD_DESC_BASE<"vld", load, v16i8, LSX128BOpnd, mem>; ++ ++def VST : LSX_I12_S<0b0010110001>, ++ ST_DESC_BASE<"vst", store, v16i8, LSX128BOpnd, mem_simm12>; ++ ++ ++def VSETEQZ_V : LSX_SET<0b0111001010011100100110>, ++ LSX_SET_DESC_BASE<"vseteqz.v", LSX128BOpnd>; ++ ++def VSETNEZ_V : LSX_SET<0b0111001010011100100111>, ++ LSX_SET_DESC_BASE<"vsetnez.v", LSX128BOpnd>; ++ ++ ++def VSETANYEQZ_B : LSX_SET<0b0111001010011100101000>, ++ LSX_SET_DESC_BASE<"vsetanyeqz.b", LSX128BOpnd>; ++ ++def VSETANYEQZ_H : LSX_SET<0b0111001010011100101001>, ++ LSX_SET_DESC_BASE<"vsetanyeqz.h", LSX128HOpnd>; ++ ++def VSETANYEQZ_W : LSX_SET<0b0111001010011100101010>, ++ LSX_SET_DESC_BASE<"vsetanyeqz.w", LSX128WOpnd>; ++ ++def VSETANYEQZ_D : LSX_SET<0b0111001010011100101011>, ++ LSX_SET_DESC_BASE<"vsetanyeqz.d", LSX128DOpnd>; ++ ++ ++def VSETALLNEZ_B : LSX_SET<0b0111001010011100101100>, ++ LSX_SET_DESC_BASE<"vsetallnez.b", LSX128BOpnd>; ++ ++def VSETALLNEZ_H : LSX_SET<0b0111001010011100101101>, ++ LSX_SET_DESC_BASE<"vsetallnez.h", LSX128HOpnd>; ++ ++def VSETALLNEZ_W : LSX_SET<0b0111001010011100101110>, ++ LSX_SET_DESC_BASE<"vsetallnez.w", LSX128WOpnd>; ++ ++def VSETALLNEZ_D : LSX_SET<0b0111001010011100101111>, ++ LSX_SET_DESC_BASE<"vsetallnez.d", LSX128DOpnd>; ++ ++class LSX_CBRANCH_PSEUDO_DESC_BASE : ++ LoongArchPseudo<(outs GPR32Opnd:$rd), ++ (ins RCVS:$vj), ++ [(set GPR32Opnd:$rd, (OpNode (TyNode RCVS:$vj)))]> { ++ bit usesCustomInserter = 1; ++} ++ ++def SNZ_B_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++def SNZ_H_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++def SNZ_W_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++def SNZ_D_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++def SNZ_V_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++ ++def SZ_B_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++def SZ_H_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++def SZ_W_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++def SZ_D_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++def SZ_V_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; ++ ++ ++def VFMADD_S : LSX_VR4MUL<0b000010010001>, ++ LSX_4RF<"vfmadd.s", int_loongarch_lsx_vfmadd_s, LSX128WOpnd>; ++ ++def VFMADD_D : LSX_VR4MUL<0b000010010010>, ++ LSX_4RF<"vfmadd.d", int_loongarch_lsx_vfmadd_d, LSX128DOpnd>; ++ ++def VFMSUB_S : LSX_VR4MUL<0b000010010101>, ++ LSX_4RF<"vfmsub.s", int_loongarch_lsx_vfmsub_s, LSX128WOpnd>; ++ ++def VFMSUB_D : LSX_VR4MUL<0b000010010110>, ++ LSX_4RF<"vfmsub.d", int_loongarch_lsx_vfmsub_d, LSX128DOpnd>; ++ ++def VFNMADD_S : LSX_VR4MUL<0b000010011001>, ++ LSX_4RF<"vfnmadd.s", int_loongarch_lsx_vfnmadd_s, LSX128WOpnd>; ++ ++def VFNMADD_D : LSX_VR4MUL<0b000010011010>, ++ LSX_4RF<"vfnmadd.d", int_loongarch_lsx_vfnmadd_d, LSX128DOpnd>; ++ ++def VFNMSUB_S : LSX_VR4MUL<0b000010011101>, ++ LSX_4RF<"vfnmsub.s", int_loongarch_lsx_vfnmsub_s, LSX128WOpnd>; ++ ++def VFNMSUB_D : LSX_VR4MUL<0b000010011110>, ++ LSX_4RF<"vfnmsub.d", int_loongarch_lsx_vfnmsub_d, LSX128DOpnd>; ++ ++ ++// vfmadd: vj * vk + va ++def : LSXPat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), ++ (VFMADD_D $vj, $vk, $va)>; ++ ++def : LSXPat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), ++ (VFMADD_S $vj, $vk, $va)>; ++ ++ ++// vfmsub: vj * vk - va ++def : LSXPat<(fma v2f64:$vj, v2f64:$vk, (fneg v2f64:$va)), ++ (VFMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; ++ ++def : LSXPat<(fma v4f32:$vj, v4f32:$vk, (fneg v4f32:$va)), ++ (VFMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; ++ ++ ++// vfnmadd: -(vj * vk + va) ++def : LSXPat<(fma (fneg v2f64:$vj), v2f64:$vk, (fneg v2f64:$va)), ++ (VFNMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; ++ ++def : LSXPat<(fma (fneg v4f32:$vj), v4f32:$vk, (fneg v4f32:$va)), ++ (VFNMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; ++ ++// vfnmsub: -(vj * vk - va) ++def : LSXPat<(fma (fneg v2f64:$vj), v2f64:$vk, v2f64:$va), ++ (VFNMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; ++ ++def : LSXPat<(fma (fneg v4f32:$vj), v4f32:$vk, v4f32:$va), ++ (VFNMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; ++ ++ ++def VFCMP_CAF_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.caf.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_caf_s>{ ++ bits<5> cond=0x0; ++ } ++ ++def VFCMP_CAF_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.caf.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_caf_d>{ ++ bits<5> cond=0x0; ++ } ++ ++ ++def VFCMP_COR_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.cor.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetord_v4f32>{ ++ bits<5> cond=0x14; ++ } ++ ++def VFCMP_COR_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.cor.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetord_v2f64>{ ++ bits<5> cond=0x14; ++ } ++ ++ ++def VFCMP_CUN_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.cun.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetun_v4f32>{ ++ bits<5> cond=0x8; ++ } ++ ++def VFCMP_CUN_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.cun.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetun_v2f64>{ ++ bits<5> cond=0x8; ++ } ++ ++ ++def VFCMP_CUNE_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.cune.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetune_v4f32>{ ++ bits<5> cond=0x18; ++ } ++ ++def VFCMP_CUNE_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.cune.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetune_v2f64>{ ++ bits<5> cond=0x18; ++ } ++ ++ ++def VFCMP_CUEQ_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.cueq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetueq_v4f32>{ ++ bits<5> cond=0xc; ++ } ++ ++def VFCMP_CUEQ_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.cueq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetueq_v2f64>{ ++ bits<5> cond=0xc; ++ } ++ ++def VFCMP_CEQ_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.ceq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetoeq_v4f32>{ ++ bits<5> cond=0x4; ++ } ++ ++def VFCMP_CEQ_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.ceq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetoeq_v2f64>{ ++ bits<5> cond=0x4; ++ } ++ ++ ++def VFCMP_CNE_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.cne.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetone_v4f32>{ ++ bits<5> cond=0x10; ++ } ++ ++def VFCMP_CNE_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.cne.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetone_v2f64>{ ++ bits<5> cond=0x10; ++ } ++ ++ ++def VFCMP_CLT_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.clt.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetolt_v4f32>{ ++ bits<5> cond=0x2; ++ } ++ ++def VFCMP_CLT_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.clt.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetolt_v2f64>{ ++ bits<5> cond=0x2; ++ } ++ ++ ++def VFCMP_CULT_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.cult.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetult_v4f32>{ ++ bits<5> cond=0xa; ++ } ++ ++def VFCMP_CULT_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.cult.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetult_v2f64>{ ++ bits<5> cond=0xa; ++ } ++ ++ ++def VFCMP_CLE_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.cle.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetole_v4f32>{ ++ bits<5> cond=0x6; ++ } ++ ++def VFCMP_CLE_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.cle.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetole_v2f64>{ ++ bits<5> cond=0x6; ++ } ++ ++ ++def VFCMP_CULE_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.cule.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetule_v4f32>{ ++ bits<5> cond=0xe; ++ } ++ ++def VFCMP_CULE_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.cule.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetule_v2f64>{ ++ bits<5> cond=0xe; ++ } ++ ++ ++def VFCMP_SAF_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.saf.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_saf_s>{ ++ bits<5> cond=0x1; ++ } ++ ++def VFCMP_SAF_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.saf.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_saf_d>{ ++ bits<5> cond=0x1; ++ } ++ ++def VFCMP_SOR_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.sor.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sor_s>{ ++ bits<5> cond=0x15; ++ } ++ ++def VFCMP_SOR_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.sor.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sor_d>{ ++ bits<5> cond=0x15; ++ } ++ ++def VFCMP_SUN_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.sun.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sun_s>{ ++ bits<5> cond=0x9; ++ } ++ ++def VFCMP_SUN_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.sun.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sun_d>{ ++ bits<5> cond=0x9; ++ } ++ ++def VFCMP_SUNE_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.sune.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sune_s>{ ++ bits<5> cond=0x19; ++ } ++ ++def VFCMP_SUNE_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.sune.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sune_d>{ ++ bits<5> cond=0x19; ++ } ++ ++def VFCMP_SUEQ_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.sueq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sueq_s>{ ++ bits<5> cond=0xd; ++ } ++ ++def VFCMP_SUEQ_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.sueq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sueq_d>{ ++ bits<5> cond=0xd; ++ } ++ ++def VFCMP_SEQ_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.seq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_seq_s>{ ++ bits<5> cond=0x5; ++ } ++ ++def VFCMP_SEQ_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.seq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_seq_d>{ ++ bits<5> cond=0x5; ++ } ++ ++def VFCMP_SNE_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.sne.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sne_s>{ ++ bits<5> cond=0x11; ++ } ++ ++def VFCMP_SNE_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.sne.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sne_d>{ ++ bits<5> cond=0x11; ++ } ++ ++def VFCMP_SLT_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.slt.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_slt_s>{ ++ bits<5> cond=0x3; ++ } ++ ++def VFCMP_SLT_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.slt.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_slt_d>{ ++ bits<5> cond=0x3; ++ } ++ ++def VFCMP_SULT_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.sult.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sult_s>{ ++ bits<5> cond=0xb; ++ } ++ ++def VFCMP_SULT_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.sult.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sult_d>{ ++ bits<5> cond=0xb; ++ } ++ ++def VFCMP_SLE_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.sle.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sle_s>{ ++ bits<5> cond=0x7; ++ } ++ ++def VFCMP_SLE_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.sle.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sle_d>{ ++ bits<5> cond=0x7; ++ } ++ ++def VFCMP_SULE_S : LSX_VFCMP<0b000011000101>, ++ LSX_VFCMP_Reg3<"vfcmp.sule.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sule_s>{ ++ bits<5> cond=0xf; ++ } ++ ++def VFCMP_SULE_D : LSX_VFCMP<0b000011000110>, ++ LSX_VFCMP_Reg3<"vfcmp.sule.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sule_d>{ ++ bits<5> cond=0xf; ++ } ++ ++def VBITSEL_V : LSX_VR4MUL<0b000011010001>, ++ LSX_VMul_Reg4<"vbitsel.v", LSX128BOpnd, LSX128BOpnd, LSX128BOpnd, LSX128BOpnd, int_loongarch_lsx_vbitsel_v>; ++ ++def VSHUF_B : LSX_VR4MUL<0b000011010101>, ++ LSX_3R_4R_VSHF_DESC_BASE<"vshuf.b", LSX128BOpnd>; ++ ++ ++class LSX_BSEL_PSEUDO_BASE : ++ LSXPseudo<(outs RO:$vd), (ins RO:$vd_in, RO:$vs, RO:$vt), ++ [(set RO:$vd, (Ty (vselect RO:$vd_in, RO:$vt, RO:$vs)))]>, ++ PseudoInstExpansion<(VBITSEL_V LSX128BOpnd:$vd, LSX128BOpnd:$vs, ++ LSX128BOpnd:$vt, LSX128BOpnd:$vd_in)> { ++ let Constraints = "$vd_in = $vd"; ++} ++ ++def BSEL_B_PSEUDO : LSX_BSEL_PSEUDO_BASE; ++def BSEL_H_PSEUDO : LSX_BSEL_PSEUDO_BASE; ++def BSEL_W_PSEUDO : LSX_BSEL_PSEUDO_BASE; ++def BSEL_D_PSEUDO : LSX_BSEL_PSEUDO_BASE; ++def BSEL_FW_PSEUDO : LSX_BSEL_PSEUDO_BASE; ++def BSEL_FD_PSEUDO : LSX_BSEL_PSEUDO_BASE; ++ ++ ++class LSX_LD_DESC_BASE { ++ dag OutOperandList = (outs ROVD:$vd); ++ dag InOperandList = (ins MemOpnd:$addr); ++ string AsmString = !strconcat(instr_asm, "\t$vd, $addr"); ++ list Pattern = [(set ROVD:$vd, (OpNode (TyNode (load Addr:$addr))))]; ++ string DecoderMethod = "DecodeLSX128memlsl"; ++} ++ ++def VLDREPL_B : LSX_SI12_S<0b0011000010>, ++ LSX_LD_DESC_BASE<"vldrepl.b", vldrepl_v16i8, v16i8, LSX128BOpnd>; ++ ++def VLDREPL_H : LSX_SI11_S<0b00110000010>, ++ LSX_LD_DESC_BASE<"vldrepl.h", vldrepl_v8i16, v8i16, LSX128HOpnd, mem_simm11_lsl1, addrimm11lsl1>; ++ ++def VLDREPL_W : LSX_SI10_S<0b001100000010>, ++ LSX_LD_DESC_BASE<"vldrepl.w", vldrepl_v4i32, v4i32, LSX128WOpnd, mem_simm10_lsl2, addrimm10lsl2>; ++ ++def VLDREPL_D : LSX_SI9_S<0b0011000000010>, ++ LSX_LD_DESC_BASE<"vldrepl.d", vldrepl_v2i64, v2i64, LSX128DOpnd, mem_simm9_lsl3, addrimm9lsl3>; ++ ++ ++def VSTELM_B : LSX_SI8_idx4<0b0011000110>, ++ LSX_I8_U4_DESC_BASE<"vstelm.b", int_loongarch_lsx_vstelm_b, simm8_32, immSExt8, LSX128BOpnd>; ++ ++def VSTELM_H : LSX_SI8_idx3<0b00110001010>, ++ LSX_I8_U3_DESC_BASE<"vstelm.h", int_loongarch_lsx_vstelm_h, immSExt8_1_O, immSExt8, LSX128HOpnd>; ++ ++def VSTELM_W : LSX_SI8_idx2<0b001100010010>, ++ LSX_I8_U2_DESC_BASE<"vstelm.w", int_loongarch_lsx_vstelm_w, immSExt8_2_O, immSExt8, LSX128WOpnd>; ++ ++def VSTELM_D : LSX_SI8_idx1<0b0011000100010>, ++ LSX_I8_U1_DESC_BASE<"vstelm.d", int_loongarch_lsx_vstelm_d, immSExt8_3_O, immSExt8, LSX128DOpnd>; ++ ++ ++let mayLoad = 1, canFoldAsLoad = 1 in { ++ def VLDX : LSX_3R_2GP<0b00111000010000000>, ++ LSX_LDX_LA<"vldx", int_loongarch_lsx_vldx, GPR64Opnd, LSX128BOpnd>; ++} ++ ++let mayStore = 1 in{ ++ def VSTX : LSX_3R_2GP<0b00111000010001000>, ++ LSX_SDX_LA<"vstx", int_loongarch_lsx_vstx, GPR64Opnd, LSX128BOpnd>; ++} ++ ++ ++def VADDWEV_H_B : LSX_3R<0b01110000000111100>, ++ LSX_3R_DESC_BASE<"vaddwev.h.b", int_loongarch_lsx_vaddwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VADDWEV_W_H : LSX_3R<0b01110000000111101>, ++ LSX_3R_DESC_BASE<"vaddwev.w.h", int_loongarch_lsx_vaddwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VADDWEV_D_W : LSX_3R<0b01110000000111110>, ++ LSX_3R_DESC_BASE<"vaddwev.d.w", int_loongarch_lsx_vaddwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VADDWEV_Q_D : LSX_3R<0b01110000000111111>, ++ LSX_3R_DESC_BASE<"vaddwev.q.d", int_loongarch_lsx_vaddwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSUBWEV_H_B : LSX_3R<0b01110000001000000>, ++ LSX_3R_DESC_BASE<"vsubwev.h.b", int_loongarch_lsx_vsubwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VSUBWEV_W_H : LSX_3R<0b01110000001000001>, ++ LSX_3R_DESC_BASE<"vsubwev.w.h", int_loongarch_lsx_vsubwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSUBWEV_D_W : LSX_3R<0b01110000001000010>, ++ LSX_3R_DESC_BASE<"vsubwev.d.w", int_loongarch_lsx_vsubwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VSUBWEV_Q_D : LSX_3R<0b01110000001000011>, ++ LSX_3R_DESC_BASE<"vsubwev.q.d", int_loongarch_lsx_vsubwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VADDWOD_H_B : LSX_3R<0b01110000001000100>, ++ LSX_3R_DESC_BASE<"vaddwod.h.b", int_loongarch_lsx_vaddwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VADDWOD_W_H : LSX_3R<0b01110000001000101>, ++ LSX_3R_DESC_BASE<"vaddwod.w.h", int_loongarch_lsx_vaddwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VADDWOD_D_W : LSX_3R<0b01110000001000110>, ++ LSX_3R_DESC_BASE<"vaddwod.d.w", int_loongarch_lsx_vaddwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VADDWOD_Q_D : LSX_3R<0b01110000001000111>, ++ LSX_3R_DESC_BASE<"vaddwod.q.d", int_loongarch_lsx_vaddwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSUBWOD_H_B : LSX_3R<0b01110000001001000>, ++ LSX_3R_DESC_BASE<"vsubwod.h.b", int_loongarch_lsx_vsubwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VSUBWOD_W_H : LSX_3R<0b01110000001001001>, ++ LSX_3R_DESC_BASE<"vsubwod.w.h", int_loongarch_lsx_vsubwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSUBWOD_D_W : LSX_3R<0b01110000001001010>, ++ LSX_3R_DESC_BASE<"vsubwod.d.w", int_loongarch_lsx_vsubwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VSUBWOD_Q_D : LSX_3R<0b01110000001001011>, ++ LSX_3R_DESC_BASE<"vsubwod.q.d", int_loongarch_lsx_vsubwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VADDWEV_H_BU : LSX_3R<0b01110000001011100>, ++ LSX_3R_DESC_BASE<"vaddwev.h.bu", int_loongarch_lsx_vaddwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VADDWEV_W_HU : LSX_3R<0b01110000001011101>, ++ LSX_3R_DESC_BASE<"vaddwev.w.hu", int_loongarch_lsx_vaddwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VADDWEV_D_WU : LSX_3R<0b01110000001011110>, ++ LSX_3R_DESC_BASE<"vaddwev.d.wu", int_loongarch_lsx_vaddwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VADDWEV_Q_DU : LSX_3R<0b01110000001011111>, ++ LSX_3R_DESC_BASE<"vaddwev.q.du", int_loongarch_lsx_vaddwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSUBWEV_H_BU : LSX_3R<0b01110000001100000>, ++ LSX_3R_DESC_BASE<"vsubwev.h.bu", int_loongarch_lsx_vsubwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VSUBWEV_W_HU : LSX_3R<0b01110000001100001>, ++ LSX_3R_DESC_BASE<"vsubwev.w.hu", int_loongarch_lsx_vsubwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSUBWEV_D_WU : LSX_3R<0b01110000001100010>, ++ LSX_3R_DESC_BASE<"vsubwev.d.wu", int_loongarch_lsx_vsubwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VSUBWEV_Q_DU : LSX_3R<0b01110000001100011>, ++ LSX_3R_DESC_BASE<"vsubwev.q.du", int_loongarch_lsx_vsubwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VADDWOD_H_BU : LSX_3R<0b01110000001100100>, ++ LSX_3R_DESC_BASE<"vaddwod.h.bu", int_loongarch_lsx_vaddwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VADDWOD_W_HU : LSX_3R<0b01110000001100101>, ++ LSX_3R_DESC_BASE<"vaddwod.w.hu", int_loongarch_lsx_vaddwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VADDWOD_D_WU : LSX_3R<0b01110000001100110>, ++ LSX_3R_DESC_BASE<"vaddwod.d.wu", int_loongarch_lsx_vaddwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VADDWOD_Q_DU : LSX_3R<0b01110000001100111>, ++ LSX_3R_DESC_BASE<"vaddwod.q.du", int_loongarch_lsx_vaddwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSUBWOD_H_BU : LSX_3R<0b01110000001101000>, ++ LSX_3R_DESC_BASE<"vsubwod.h.bu", int_loongarch_lsx_vsubwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VSUBWOD_W_HU : LSX_3R<0b01110000001101001>, ++ LSX_3R_DESC_BASE<"vsubwod.w.hu", int_loongarch_lsx_vsubwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSUBWOD_D_WU : LSX_3R<0b01110000001101010>, ++ LSX_3R_DESC_BASE<"vsubwod.d.wu", int_loongarch_lsx_vsubwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VSUBWOD_Q_DU : LSX_3R<0b01110000001101011>, ++ LSX_3R_DESC_BASE<"vsubwod.q.du", int_loongarch_lsx_vsubwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VADDWEV_H_BU_B : LSX_3R<0b01110000001111100>, ++ LSX_3R_DESC_BASE<"vaddwev.h.bu.b", int_loongarch_lsx_vaddwev_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VADDWEV_W_HU_H : LSX_3R<0b01110000001111101>, ++ LSX_3R_DESC_BASE<"vaddwev.w.hu.h", int_loongarch_lsx_vaddwev_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VADDWEV_D_WU_W : LSX_3R<0b01110000001111110>, ++ LSX_3R_DESC_BASE<"vaddwev.d.wu.w", int_loongarch_lsx_vaddwev_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VADDWEV_Q_DU_D : LSX_3R<0b01110000001111111>, ++ LSX_3R_DESC_BASE<"vaddwev.q.du.d", int_loongarch_lsx_vaddwev_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VADDWOD_H_BU_B : LSX_3R<0b01110000010000000>, ++ LSX_3R_DESC_BASE<"vaddwod.h.bu.b", int_loongarch_lsx_vaddwod_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VADDWOD_W_HU_H : LSX_3R<0b01110000010000001>, ++ LSX_3R_DESC_BASE<"vaddwod.w.hu.h", int_loongarch_lsx_vaddwod_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VADDWOD_D_WU_W : LSX_3R<0b01110000010000010>, ++ LSX_3R_DESC_BASE<"vaddwod.d.wu.w", int_loongarch_lsx_vaddwod_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VADDWOD_Q_DU_D : LSX_3R<0b01110000010000011>, ++ LSX_3R_DESC_BASE<"vaddwod.q.du.d", int_loongarch_lsx_vaddwod_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VHADDW_Q_D : LSX_3R<0b01110000010101011>, ++ LSX_3R_DESC_BASE<"vhaddw.q.d", int_loongarch_lsx_vhaddw_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++def VHSUBW_Q_D : LSX_3R<0b01110000010101111>, ++ LSX_3R_DESC_BASE<"vhsubw.q.d", int_loongarch_lsx_vhsubw_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VHADDW_QU_DU : LSX_3R<0b01110000010110011>, ++ LSX_3R_DESC_BASE<"vhaddw.qu.du", int_loongarch_lsx_vhaddw_qu_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++def VHSUBW_QU_DU : LSX_3R<0b01110000010110111>, ++ LSX_3R_DESC_BASE<"vhsubw.qu.du", int_loongarch_lsx_vhsubw_qu_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMUH_B : LSX_3R<0b01110000100001100>, ++ LSX_3R_DESC_BASE<"vmuh.b", int_loongarch_lsx_vmuh_b, LSX128BOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMUH_H : LSX_3R<0b01110000100001101>, ++ LSX_3R_DESC_BASE<"vmuh.h", int_loongarch_lsx_vmuh_h, LSX128HOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMUH_W : LSX_3R<0b01110000100001110>, ++ LSX_3R_DESC_BASE<"vmuh.w", int_loongarch_lsx_vmuh_w, LSX128WOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMUH_D : LSX_3R<0b01110000100001111>, ++ LSX_3R_DESC_BASE<"vmuh.d", int_loongarch_lsx_vmuh_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMUH_BU : LSX_3R<0b01110000100010000>, ++ LSX_3R_DESC_BASE<"vmuh.bu", int_loongarch_lsx_vmuh_bu, LSX128BOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMUH_HU : LSX_3R<0b01110000100010001>, ++ LSX_3R_DESC_BASE<"vmuh.hu", int_loongarch_lsx_vmuh_hu, LSX128HOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMUH_WU : LSX_3R<0b01110000100010010>, ++ LSX_3R_DESC_BASE<"vmuh.wu", int_loongarch_lsx_vmuh_wu, LSX128WOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMUH_DU : LSX_3R<0b01110000100010011>, ++ LSX_3R_DESC_BASE<"vmuh.du", int_loongarch_lsx_vmuh_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMULWEV_H_B : LSX_3R<0b01110000100100000>, ++ LSX_3R_DESC_BASE<"vmulwev.h.b", int_loongarch_lsx_vmulwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMULWEV_W_H : LSX_3R<0b01110000100100001>, ++ LSX_3R_DESC_BASE<"vmulwev.w.h", int_loongarch_lsx_vmulwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMULWEV_D_W : LSX_3R<0b01110000100100010>, ++ LSX_3R_DESC_BASE<"vmulwev.d.w", int_loongarch_lsx_vmulwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMULWEV_Q_D : LSX_3R<0b01110000100100011>, ++ LSX_3R_DESC_BASE<"vmulwev.q.d", int_loongarch_lsx_vmulwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMULWOD_H_B : LSX_3R<0b01110000100100100>, ++ LSX_3R_DESC_BASE<"vmulwod.h.b", int_loongarch_lsx_vmulwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMULWOD_W_H : LSX_3R<0b01110000100100101>, ++ LSX_3R_DESC_BASE<"vmulwod.w.h", int_loongarch_lsx_vmulwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMULWOD_D_W : LSX_3R<0b01110000100100110>, ++ LSX_3R_DESC_BASE<"vmulwod.d.w", int_loongarch_lsx_vmulwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMULWOD_Q_D : LSX_3R<0b01110000100100111>, ++ LSX_3R_DESC_BASE<"vmulwod.q.d", int_loongarch_lsx_vmulwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMULWEV_H_BU : LSX_3R<0b01110000100110000>, ++ LSX_3R_DESC_BASE<"vmulwev.h.bu", int_loongarch_lsx_vmulwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMULWEV_W_HU : LSX_3R<0b01110000100110001>, ++ LSX_3R_DESC_BASE<"vmulwev.w.hu", int_loongarch_lsx_vmulwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMULWEV_D_WU : LSX_3R<0b01110000100110010>, ++ LSX_3R_DESC_BASE<"vmulwev.d.wu", int_loongarch_lsx_vmulwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMULWEV_Q_DU : LSX_3R<0b01110000100110011>, ++ LSX_3R_DESC_BASE<"vmulwev.q.du", int_loongarch_lsx_vmulwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMULWOD_H_BU : LSX_3R<0b01110000100110100>, ++ LSX_3R_DESC_BASE<"vmulwod.h.bu", int_loongarch_lsx_vmulwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMULWOD_W_HU : LSX_3R<0b01110000100110101>, ++ LSX_3R_DESC_BASE<"vmulwod.w.hu", int_loongarch_lsx_vmulwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMULWOD_D_WU : LSX_3R<0b01110000100110110>, ++ LSX_3R_DESC_BASE<"vmulwod.d.wu", int_loongarch_lsx_vmulwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMULWOD_Q_DU : LSX_3R<0b01110000100110111>, ++ LSX_3R_DESC_BASE<"vmulwod.q.du", int_loongarch_lsx_vmulwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMULWEV_H_BU_B : LSX_3R<0b01110000101000000>, ++ LSX_3R_DESC_BASE<"vmulwev.h.bu.b", int_loongarch_lsx_vmulwev_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMULWEV_W_HU_H : LSX_3R<0b01110000101000001>, ++ LSX_3R_DESC_BASE<"vmulwev.w.hu.h", int_loongarch_lsx_vmulwev_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMULWEV_D_WU_W : LSX_3R<0b01110000101000010>, ++ LSX_3R_DESC_BASE<"vmulwev.d.wu.w", int_loongarch_lsx_vmulwev_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMULWEV_Q_DU_D : LSX_3R<0b01110000101000011>, ++ LSX_3R_DESC_BASE<"vmulwev.q.du.d", int_loongarch_lsx_vmulwev_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMULWOD_H_BU_B : LSX_3R<0b01110000101000100>, ++ LSX_3R_DESC_BASE<"vmulwod.h.bu.b", int_loongarch_lsx_vmulwod_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMULWOD_W_HU_H : LSX_3R<0b01110000101000101>, ++ LSX_3R_DESC_BASE<"vmulwod.w.hu.h", int_loongarch_lsx_vmulwod_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMULWOD_D_WU_W : LSX_3R<0b01110000101000110>, ++ LSX_3R_DESC_BASE<"vmulwod.d.wu.w", int_loongarch_lsx_vmulwod_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMULWOD_Q_DU_D : LSX_3R<0b01110000101000111>, ++ LSX_3R_DESC_BASE<"vmulwod.q.du.d", int_loongarch_lsx_vmulwod_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMADDWEV_H_B : LSX_3R<0b01110000101011000>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.h.b", int_loongarch_lsx_vmaddwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMADDWEV_W_H : LSX_3R<0b01110000101011001>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.w.h", int_loongarch_lsx_vmaddwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMADDWEV_D_W : LSX_3R<0b01110000101011010>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.d.w", int_loongarch_lsx_vmaddwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VMADDWEV_Q_D : LSX_3R<0b01110000101011011>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.q.d", int_loongarch_lsx_vmaddwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMADDWOD_H_B : LSX_3R<0b01110000101011100>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.h.b", int_loongarch_lsx_vmaddwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMADDWOD_W_H : LSX_3R<0b01110000101011101>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.w.h", int_loongarch_lsx_vmaddwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMADDWOD_D_W : LSX_3R<0b01110000101011110>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.d.w", int_loongarch_lsx_vmaddwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VMADDWOD_Q_D : LSX_3R<0b01110000101011111>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.q.d", int_loongarch_lsx_vmaddwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMADDWEV_H_BU : LSX_3R<0b01110000101101000>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.h.bu", int_loongarch_lsx_vmaddwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMADDWEV_W_HU : LSX_3R<0b01110000101101001>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.w.hu", int_loongarch_lsx_vmaddwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMADDWEV_D_WU : LSX_3R<0b01110000101101010>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.d.wu", int_loongarch_lsx_vmaddwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VMADDWEV_Q_DU : LSX_3R<0b01110000101101011>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.q.du", int_loongarch_lsx_vmaddwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMADDWOD_H_BU : LSX_3R<0b01110000101101100>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.h.bu", int_loongarch_lsx_vmaddwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMADDWOD_W_HU : LSX_3R<0b01110000101101101>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.w.hu", int_loongarch_lsx_vmaddwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMADDWOD_D_WU : LSX_3R<0b01110000101101110>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.d.wu", int_loongarch_lsx_vmaddwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VMADDWOD_Q_DU : LSX_3R<0b01110000101101111>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.q.du", int_loongarch_lsx_vmaddwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMADDWEV_H_BU_B : LSX_3R<0b01110000101111000>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.h.bu.b", int_loongarch_lsx_vmaddwev_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMADDWEV_W_HU_H : LSX_3R<0b01110000101111001>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.w.hu.h", int_loongarch_lsx_vmaddwev_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMADDWEV_D_WU_W : LSX_3R<0b01110000101111010>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.d.wu.w", int_loongarch_lsx_vmaddwev_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMADDWEV_Q_DU_D : LSX_3R<0b01110000101111011>, ++ LSX_3R_4R_DESC_BASE<"vmaddwev.q.du.d", int_loongarch_lsx_vmaddwev_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VMADDWOD_H_BU_B : LSX_3R<0b01110000101111100>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.h.bu.b", int_loongarch_lsx_vmaddwod_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; ++ ++def VMADDWOD_W_HU_H : LSX_3R<0b01110000101111101>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.w.hu.h", int_loongarch_lsx_vmaddwod_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VMADDWOD_D_WU_W : LSX_3R<0b01110000101111110>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.d.wu.w", int_loongarch_lsx_vmaddwod_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; ++ ++def VMADDWOD_Q_DU_D : LSX_3R<0b01110000101111111>, ++ LSX_3R_4R_DESC_BASE<"vmaddwod.q.du.d", int_loongarch_lsx_vmaddwod_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSRLN_B_H : LSX_3R<0b01110000111101001>, ++ LSX_3R_DESC_BASE<"vsrln.b.h", int_loongarch_lsx_vsrln_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSRLN_H_W : LSX_3R<0b01110000111101010>, ++ LSX_3R_DESC_BASE<"vsrln.h.w", int_loongarch_lsx_vsrln_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSRLN_W_D : LSX_3R<0b01110000111101011>, ++ LSX_3R_DESC_BASE<"vsrln.w.d", int_loongarch_lsx_vsrln_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSRAN_B_H : LSX_3R<0b01110000111101101>, ++ LSX_3R_DESC_BASE<"vsran.b.h", int_loongarch_lsx_vsran_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSRAN_H_W : LSX_3R<0b01110000111101110>, ++ LSX_3R_DESC_BASE<"vsran.h.w", int_loongarch_lsx_vsran_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSRAN_W_D : LSX_3R<0b01110000111101111>, ++ LSX_3R_DESC_BASE<"vsran.w.d", int_loongarch_lsx_vsran_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSRLRN_B_H : LSX_3R<0b01110000111110001>, ++ LSX_3R_DESC_BASE<"vsrlrn.b.h", int_loongarch_lsx_vsrlrn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSRLRN_H_W : LSX_3R<0b01110000111110010>, ++ LSX_3R_DESC_BASE<"vsrlrn.h.w", int_loongarch_lsx_vsrlrn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSRLRN_W_D : LSX_3R<0b01110000111110011>, ++ LSX_3R_DESC_BASE<"vsrlrn.w.d", int_loongarch_lsx_vsrlrn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSRARN_B_H : LSX_3R<0b01110000111110101>, ++ LSX_3R_DESC_BASE<"vsrarn.b.h", int_loongarch_lsx_vsrarn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSRARN_H_W : LSX_3R<0b01110000111110110>, ++ LSX_3R_DESC_BASE<"vsrarn.h.w", int_loongarch_lsx_vsrarn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSRARN_W_D : LSX_3R<0b01110000111110111>, ++ LSX_3R_DESC_BASE<"vsrarn.w.d", int_loongarch_lsx_vsrarn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSSRLN_B_H : LSX_3R<0b01110000111111001>, ++ LSX_3R_DESC_BASE<"vssrln.b.h", int_loongarch_lsx_vssrln_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSSRLN_H_W : LSX_3R<0b01110000111111010>, ++ LSX_3R_DESC_BASE<"vssrln.h.w", int_loongarch_lsx_vssrln_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSSRLN_W_D : LSX_3R<0b01110000111111011>, ++ LSX_3R_DESC_BASE<"vssrln.w.d", int_loongarch_lsx_vssrln_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSSRAN_B_H : LSX_3R<0b01110000111111101>, ++ LSX_3R_DESC_BASE<"vssran.b.h", int_loongarch_lsx_vssran_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSSRAN_H_W : LSX_3R<0b01110000111111110>, ++ LSX_3R_DESC_BASE<"vssran.h.w", int_loongarch_lsx_vssran_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSSRAN_W_D : LSX_3R<0b01110000111111111>, ++ LSX_3R_DESC_BASE<"vssran.w.d", int_loongarch_lsx_vssran_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSSRLRN_B_H : LSX_3R<0b01110001000000001>, ++ LSX_3R_DESC_BASE<"vssrlrn.b.h", int_loongarch_lsx_vssrlrn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSSRLRN_H_W : LSX_3R<0b01110001000000010>, ++ LSX_3R_DESC_BASE<"vssrlrn.h.w", int_loongarch_lsx_vssrlrn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSSRLRN_W_D : LSX_3R<0b01110001000000011>, ++ LSX_3R_DESC_BASE<"vssrlrn.w.d", int_loongarch_lsx_vssrlrn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSSRARN_B_H : LSX_3R<0b01110001000000101>, ++ LSX_3R_DESC_BASE<"vssrarn.b.h", int_loongarch_lsx_vssrarn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSSRARN_H_W : LSX_3R<0b01110001000000110>, ++ LSX_3R_DESC_BASE<"vssrarn.h.w", int_loongarch_lsx_vssrarn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSSRARN_W_D : LSX_3R<0b01110001000000111>, ++ LSX_3R_DESC_BASE<"vssrarn.w.d", int_loongarch_lsx_vssrarn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSSRLN_BU_H : LSX_3R<0b01110001000001001>, ++ LSX_3R_DESC_BASE<"vssrln.bu.h", int_loongarch_lsx_vssrln_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSSRLN_HU_W : LSX_3R<0b01110001000001010>, ++ LSX_3R_DESC_BASE<"vssrln.hu.w", int_loongarch_lsx_vssrln_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSSRLN_WU_D : LSX_3R<0b01110001000001011>, ++ LSX_3R_DESC_BASE<"vssrln.wu.d", int_loongarch_lsx_vssrln_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSSRAN_BU_H : LSX_3R<0b01110001000001101>, ++ LSX_3R_DESC_BASE<"vssran.bu.h", int_loongarch_lsx_vssran_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSSRAN_HU_W : LSX_3R<0b01110001000001110>, ++ LSX_3R_DESC_BASE<"vssran.hu.w", int_loongarch_lsx_vssran_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSSRAN_WU_D : LSX_3R<0b01110001000001111>, ++ LSX_3R_DESC_BASE<"vssran.wu.d", int_loongarch_lsx_vssran_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSSRLRN_BU_H : LSX_3R<0b01110001000010001>, ++ LSX_3R_DESC_BASE<"vssrlrn.bu.h", int_loongarch_lsx_vssrlrn_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSSRLRN_HU_W : LSX_3R<0b01110001000010010>, ++ LSX_3R_DESC_BASE<"vssrlrn.hu.w", int_loongarch_lsx_vssrlrn_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSSRLRN_WU_D : LSX_3R<0b01110001000010011>, ++ LSX_3R_DESC_BASE<"vssrlrn.wu.d", int_loongarch_lsx_vssrlrn_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSSRARN_BU_H : LSX_3R<0b01110001000010101>, ++ LSX_3R_DESC_BASE<"vssrarn.bu.h", int_loongarch_lsx_vssrarn_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSSRARN_HU_W : LSX_3R<0b01110001000010110>, ++ LSX_3R_DESC_BASE<"vssrarn.hu.w", int_loongarch_lsx_vssrarn_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSSRARN_WU_D : LSX_3R<0b01110001000010111>, ++ LSX_3R_DESC_BASE<"vssrarn.wu.d", int_loongarch_lsx_vssrarn_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VANDN_V : LSX_3R<0b01110001001010000>, ++ LSX_3R_DESC_BASE<"vandn.v", int_loongarch_lsx_vandn_v, LSX128BOpnd>; ++ ++ ++class LSX_VANDN_PSEUDO_BASE : ++ LSXPseudo<(outs RO:$vd), (ins RO:$vj, RO:$vk), ++ []>, ++ PseudoInstExpansion<(VANDN_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++ ++def VANDN_H_PSEUDO : LSX_VANDN_PSEUDO_BASE; ++def VANDN_W_PSEUDO : LSX_VANDN_PSEUDO_BASE; ++def VANDN_D_PSEUDO : LSX_VANDN_PSEUDO_BASE; ++ ++ ++ ++def VORN_V : LSX_3R<0b01110001001010001>, ++ LSX_3R_DESC_BASE<"vorn.v", int_loongarch_lsx_vorn_v, LSX128BOpnd>; ++ ++ ++class LSX_VORN_PSEUDO_BASE : ++ LSXPseudo<(outs RO:$vd), (ins RO:$vj, RO:$vk), ++ []>, ++ PseudoInstExpansion<(VORN_V LSX128BOpnd:$vd, ++ LSX128BOpnd:$vj, ++ LSX128BOpnd:$vk)>; ++ ++def VORN_H_PSEUDO : LSX_VORN_PSEUDO_BASE; ++def VORN_W_PSEUDO : LSX_VORN_PSEUDO_BASE; ++def VORN_D_PSEUDO : LSX_VORN_PSEUDO_BASE; ++ ++ ++def VFRSTP_B : LSX_3R<0b01110001001010110>, ++ LSX_3R_4R_DESC_BASE<"vfrstp.b", int_loongarch_lsx_vfrstp_b, LSX128BOpnd>; ++ ++def VFRSTP_H : LSX_3R<0b01110001001010111>, ++ LSX_3R_4R_DESC_BASE<"vfrstp.h", int_loongarch_lsx_vfrstp_h, LSX128HOpnd>; ++ ++ ++def VADD_Q : LSX_3R<0b01110001001011010>, IsCommutable, ++ LSX_3R_DESC_BASE<"vadd.q", int_loongarch_lsx_vadd_q, LSX128DOpnd>; ++ ++def VSUB_Q : LSX_3R<0b01110001001011011>, ++ LSX_3R_DESC_BASE<"vsub.q", int_loongarch_lsx_vsub_q, LSX128DOpnd>; ++ ++ ++def VSIGNCOV_B : LSX_3R<0b01110001001011100>, ++ LSX_3R_DESC_BASE<"vsigncov.b", int_loongarch_lsx_vsigncov_b, LSX128BOpnd>; ++ ++def VSIGNCOV_H : LSX_3R<0b01110001001011101>, ++ LSX_3R_DESC_BASE<"vsigncov.h", int_loongarch_lsx_vsigncov_h, LSX128HOpnd>; ++ ++def VSIGNCOV_W : LSX_3R<0b01110001001011110>, ++ LSX_3R_DESC_BASE<"vsigncov.w", int_loongarch_lsx_vsigncov_w, LSX128WOpnd>; ++ ++def VSIGNCOV_D : LSX_3R<0b01110001001011111>, ++ LSX_3R_DESC_BASE<"vsigncov.d", int_loongarch_lsx_vsigncov_d, LSX128DOpnd>; ++ ++ ++def VFCVT_H_S : LSX_3R<0b01110001010001100>, ++ LSX_3RF_DESC_BASE<"vfcvt.h.s", int_loongarch_lsx_vfcvt_h_s, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; ++ ++def VFCVT_S_D : LSX_3R<0b01110001010001101>, ++ LSX_3RF_DESC_BASE1<"vfcvt.s.d", int_loongarch_lsx_vfcvt_s_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VFFINT_S_L : LSX_3R<0b01110001010010000>, ++ LSX_3RF_DESC_BASE<"vffint.s.l", int_loongarch_lsx_vffint_s_l, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++def VFTINT_W_D : LSX_3R<0b01110001010010011>, ++ LSX_3RF_DESC_BASE<"vftint.w.d", int_loongarch_lsx_vftint_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VFTINTRZ_W_D : LSX_3R<0b01110001010010110>, ++ LSX_3RF_DESC_BASE<"vftintrz.w.d", int_loongarch_lsx_vftintrz_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++def VFTINTRP_W_D : LSX_3R<0b01110001010010101>, ++ LSX_3RF_DESC_BASE<"vftintrp.w.d", int_loongarch_lsx_vftintrp_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++def VFTINTRM_W_D : LSX_3R<0b01110001010010100>, ++ LSX_3RF_DESC_BASE<"vftintrm.w.d", int_loongarch_lsx_vftintrm_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++def VFTINTRNE_W_D : LSX_3R<0b01110001010010111>, ++ LSX_3RF_DESC_BASE<"vftintrne.w.d", int_loongarch_lsx_vftintrne_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VBSRL_V : LSX_I5_U<0b01110010100011101>, ++ LSX_U5_DESC_BASE<"vbsrl.v", int_loongarch_lsx_vbsrl_v, LSX128BOpnd>; ++ ++def VBSLL_V : LSX_I5_U<0b01110010100011100>, ++ LSX_U5_DESC_BASE<"vbsll.v", int_loongarch_lsx_vbsll_v, LSX128BOpnd>; ++ ++ ++def VFRSTPI_B : LSX_I5_U<0b01110010100110100>, ++ LSX_U5_4R_DESC_BASE<"vfrstpi.b", int_loongarch_lsx_vfrstpi_b, LSX128BOpnd>; ++ ++def VFRSTPI_H : LSX_I5_U<0b01110010100110101>, ++ LSX_U5_4R_DESC_BASE<"vfrstpi.h", int_loongarch_lsx_vfrstpi_h, LSX128HOpnd>; ++ ++ ++def VNEG_B : LSX_2R<0b0111001010011100001100>, ++ LSX_2R_DESC_BASE<"vneg.b", int_loongarch_lsx_vneg_b, LSX128BOpnd>; ++ ++def VNEG_H : LSX_2R<0b0111001010011100001101>, ++ LSX_2R_DESC_BASE<"vneg.h", int_loongarch_lsx_vneg_h, LSX128HOpnd>; ++ ++def VNEG_W : LSX_2R<0b0111001010011100001110>, ++ LSX_2R_DESC_BASE<"vneg.w", int_loongarch_lsx_vneg_w, LSX128WOpnd>; ++ ++def VNEG_D : LSX_2R<0b0111001010011100001111>, ++ LSX_2R_DESC_BASE<"vneg.d", int_loongarch_lsx_vneg_d, LSX128DOpnd>; ++ ++ ++def VMSKGEZ_B : LSX_2R<0b0111001010011100010100>, ++ LSX_2R_DESC_BASE<"vmskgez.b", int_loongarch_lsx_vmskgez_b, LSX128BOpnd>; ++ ++def VMSKNZ_B : LSX_2R<0b0111001010011100011000>, ++ LSX_2R_DESC_BASE<"vmsknz.b", int_loongarch_lsx_vmsknz_b, LSX128BOpnd>; ++ ++ ++def VFRINTRM_S : LSX_2R<0b0111001010011101010001>, ++ LSX_2RF_DESC_BASE<"vfrintrm.s", int_loongarch_lsx_vfrintrm_s, LSX128WOpnd>; ++ ++def VFRINTRM_D : LSX_2R<0b0111001010011101010010>, ++ LSX_2RF_DESC_BASE<"vfrintrm.d", int_loongarch_lsx_vfrintrm_d, LSX128DOpnd>; ++ ++ ++def VFRINTRP_S : LSX_2R<0b0111001010011101010101>, ++ LSX_2RF_DESC_BASE<"vfrintrp.s", int_loongarch_lsx_vfrintrp_s, LSX128WOpnd>; ++ ++def VFRINTRP_D : LSX_2R<0b0111001010011101010110>, ++ LSX_2RF_DESC_BASE<"vfrintrp.d", int_loongarch_lsx_vfrintrp_d, LSX128DOpnd>; ++ ++ ++def VFRINTRZ_S : LSX_2R<0b0111001010011101011001>, ++ LSX_2RF_DESC_BASE<"vfrintrz.s", int_loongarch_lsx_vfrintrz_s, LSX128WOpnd>; ++ ++def VFRINTRZ_D : LSX_2R<0b0111001010011101011010>, ++ LSX_2RF_DESC_BASE<"vfrintrz.d", int_loongarch_lsx_vfrintrz_d, LSX128DOpnd>; ++ ++ ++def VFRINTRNE_S : LSX_2R<0b0111001010011101011101>, ++ LSX_2RF_DESC_BASE<"vfrintrne.s", int_loongarch_lsx_vfrintrne_s, LSX128WOpnd>; ++ ++def VFRINTRNE_D : LSX_2R<0b0111001010011101011110>, ++ LSX_2RF_DESC_BASE<"vfrintrne.d", int_loongarch_lsx_vfrintrne_d, LSX128DOpnd>; ++ ++ ++def VFFINTL_D_W : LSX_2R<0b0111001010011110000100>, ++ LSX_2RF_DESC_BASE<"vffintl.d.w", int_loongarch_lsx_vffintl_d_w, LSX128DOpnd, LSX128WOpnd>; ++ ++def VFFINTH_D_W : LSX_2R<0b0111001010011110000101>, ++ LSX_2RF_DESC_BASE<"vffinth.d.w", int_loongarch_lsx_vffinth_d_w, LSX128DOpnd, LSX128WOpnd>; ++ ++ ++def VFTINTRM_W_S : LSX_2R<0b0111001010011110001110>, ++ LSX_2RF_DESC_BASE<"vftintrm.w.s", int_loongarch_lsx_vftintrm_w_s, LSX128WOpnd>; ++ ++def VFTINTRM_L_D : LSX_2R<0b0111001010011110001111>, ++ LSX_2RF_DESC_BASE<"vftintrm.l.d", int_loongarch_lsx_vftintrm_l_d, LSX128DOpnd>; ++ ++ ++def VFTINTRP_W_S : LSX_2R<0b0111001010011110010000>, ++ LSX_2RF_DESC_BASE<"vftintrp.w.s", int_loongarch_lsx_vftintrp_w_s, LSX128WOpnd>; ++ ++def VFTINTRP_L_D : LSX_2R<0b0111001010011110010001>, ++ LSX_2RF_DESC_BASE<"vftintrp.l.d", int_loongarch_lsx_vftintrp_l_d, LSX128DOpnd>; ++ ++ ++def VFTINTRZ_W_S : LSX_2R<0b0111001010011110010010>, ++ LSX_2RF_DESC_BASE<"vftintrz.w.s", fp_to_sint, LSX128WOpnd>; ++ ++def VFTINTRZ_L_D : LSX_2R<0b0111001010011110010011>, ++ LSX_2RF_DESC_BASE<"vftintrz.l.d", fp_to_sint, LSX128DOpnd>; ++ ++ ++def VFTINTRNE_W_S : LSX_2R<0b0111001010011110010100>, ++ LSX_2RF_DESC_BASE<"vftintrne.w.s", int_loongarch_lsx_vftintrne_w_s, LSX128WOpnd>; ++ ++def VFTINTRNE_L_D : LSX_2R<0b0111001010011110010101>, ++ LSX_2RF_DESC_BASE<"vftintrne.l.d", int_loongarch_lsx_vftintrne_l_d, LSX128DOpnd>; ++ ++ ++def VFTINTL_L_S : LSX_2R<0b0111001010011110100000>, ++ LSX_2RF_DESC_BASE<"vftintl.l.s", int_loongarch_lsx_vftintl_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++def VFTINTH_L_S : LSX_2R<0b0111001010011110100001>, ++ LSX_2RF_DESC_BASE<"vftinth.l.s", int_loongarch_lsx_vftinth_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++ ++def VFTINTRML_L_S : LSX_2R<0b0111001010011110100010>, ++ LSX_2RF_DESC_BASE<"vftintrml.l.s", int_loongarch_lsx_vftintrml_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++def VFTINTRMH_L_S : LSX_2R<0b0111001010011110100011>, ++ LSX_2RF_DESC_BASE<"vftintrmh.l.s", int_loongarch_lsx_vftintrmh_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++ ++def VFTINTRPL_L_S : LSX_2R<0b0111001010011110100100>, ++ LSX_2RF_DESC_BASE<"vftintrpl.l.s", int_loongarch_lsx_vftintrpl_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++def VFTINTRPH_L_S : LSX_2R<0b0111001010011110100101>, ++ LSX_2RF_DESC_BASE<"vftintrph.l.s", int_loongarch_lsx_vftintrph_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++ ++def VFTINTRZL_L_S : LSX_2R<0b0111001010011110100110>, ++ LSX_2RF_DESC_BASE<"vftintrzl.l.s", int_loongarch_lsx_vftintrzl_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++def VFTINTRZH_L_S : LSX_2R<0b0111001010011110100111>, ++ LSX_2RF_DESC_BASE<"vftintrzh.l.s", int_loongarch_lsx_vftintrzh_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++ ++def VFTINTRNEL_L_S : LSX_2R<0b0111001010011110101000>, ++ LSX_2RF_DESC_BASE<"vftintrnel.l.s", int_loongarch_lsx_vftintrnel_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++def VFTINTRNEH_L_S : LSX_2R<0b0111001010011110101001>, ++ LSX_2RF_DESC_BASE<"vftintrneh.l.s", int_loongarch_lsx_vftintrneh_l_s, LSX128DOpnd, LSX128WOpnd>; ++ ++ ++def VEXTH_H_B : LSX_2R<0b0111001010011110111000>, ++ LSX_2R_DESC_BASE<"vexth.h.b", int_loongarch_lsx_vexth_h_b, LSX128HOpnd, LSX128BOpnd>; ++ ++def VEXTH_W_H : LSX_2R<0b0111001010011110111001>, ++ LSX_2R_DESC_BASE<"vexth.w.h", int_loongarch_lsx_vexth_w_h, LSX128WOpnd, LSX128HOpnd>; ++ ++def VEXTH_D_W : LSX_2R<0b0111001010011110111010>, ++ LSX_2R_DESC_BASE<"vexth.d.w", int_loongarch_lsx_vexth_d_w, LSX128DOpnd, LSX128WOpnd> ; ++ ++def VEXTH_Q_D : LSX_2R<0b0111001010011110111011>, ++ LSX_2R_DESC_BASE<"vexth.q.d", int_loongarch_lsx_vexth_q_d, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VEXTH_HU_BU : LSX_2R<0b0111001010011110111100>, ++ LSX_2R_DESC_BASE<"vexth.hu.bu", int_loongarch_lsx_vexth_hu_bu, LSX128HOpnd, LSX128BOpnd>; ++ ++def VEXTH_WU_HU : LSX_2R<0b0111001010011110111101>, ++ LSX_2R_DESC_BASE<"vexth.wu.hu", int_loongarch_lsx_vexth_wu_hu, LSX128WOpnd, LSX128HOpnd>; ++ ++def VEXTH_DU_WU : LSX_2R<0b0111001010011110111110>, ++ LSX_2R_DESC_BASE<"vexth.du.wu", int_loongarch_lsx_vexth_du_wu, LSX128DOpnd, LSX128WOpnd> ; ++ ++def VEXTH_QU_DU : LSX_2R<0b0111001010011110111111>, ++ LSX_2R_DESC_BASE<"vexth.qu.du", int_loongarch_lsx_vexth_qu_du, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSLLWIL_H_B : LSX_I3_U<0b0111001100001000001>, ++ LSX_2R_U3_DESC_BASE<"vsllwil.h.b", int_loongarch_lsx_vsllwil_h_b, LSX128HOpnd, LSX128BOpnd>; ++ ++def VSLLWIL_W_H : LSX_I4_U<0b011100110000100001>, ++ LSX_2R_U4_DESC_BASE<"vsllwil.w.h", int_loongarch_lsx_vsllwil_w_h, LSX128WOpnd, LSX128HOpnd>; ++ ++def VSLLWIL_D_W : LSX_I5_U<0b01110011000010001>, ++ LSX_2R_U5_DESC_BASE<"vsllwil.d.w", int_loongarch_lsx_vsllwil_d_w, LSX128DOpnd, LSX128WOpnd> ; ++ ++ ++def VEXTL_Q_D : LSX_2R<0b0111001100001001000000>, ++ LSX_2R_DESC_BASE<"vextl.q.d", int_loongarch_lsx_vextl_q_d, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSLLWIL_HU_BU : LSX_I3_U<0b0111001100001100001>, ++ LSX_2R_U3_DESC_BASE<"vsllwil.hu.bu", int_loongarch_lsx_vsllwil_hu_bu, LSX128HOpnd, LSX128BOpnd>; ++ ++def VSLLWIL_WU_HU : LSX_I4_U<0b011100110000110001>, ++ LSX_2R_U4_DESC_BASE<"vsllwil.wu.hu", int_loongarch_lsx_vsllwil_wu_hu, LSX128WOpnd, LSX128HOpnd>; ++ ++def VSLLWIL_DU_WU : LSX_I5_U<0b01110011000011001>, ++ LSX_2R_U5_DESC_BASE<"vsllwil.du.wu", int_loongarch_lsx_vsllwil_du_wu, LSX128DOpnd, LSX128WOpnd> ; ++ ++ ++def VEXTL_QU_DU : LSX_2R<0b0111001100001101000000>, ++ LSX_2R_DESC_BASE<"vextl.qu.du", int_loongarch_lsx_vextl_qu_du, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VBITCLRI_B : LSX_I3_U<0b0111001100010000001>, ++ LSX_2R_U3_DESC_BASE<"vbitclri.b", int_loongarch_lsx_vbitclri_b, LSX128BOpnd, LSX128BOpnd>; ++ ++def VBITCLRI_H : LSX_I4_U<0b011100110001000001>, ++ LSX_2R_U4_DESC_BASE<"vbitclri.h", int_loongarch_lsx_vbitclri_h, LSX128HOpnd, LSX128HOpnd>; ++ ++def VBITCLRI_W : LSX_I5_U<0b01110011000100001>, ++ LSX_2R_U5_DESC_BASE<"vbitclri.w", int_loongarch_lsx_vbitclri_w, LSX128WOpnd, LSX128WOpnd>; ++ ++def VBITCLRI_D : LSX_I6_U<0b0111001100010001>, ++ LSX_2R_U6_DESC_BASE<"vbitclri.d", int_loongarch_lsx_vbitclri_d, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VBITSETI_B : LSX_I3_U<0b0111001100010100001>, ++ LSX_2R_U3_DESC_BASE<"vbitseti.b", int_loongarch_lsx_vbitseti_b, LSX128BOpnd, LSX128BOpnd>; ++ ++def VBITSETI_H : LSX_I4_U<0b011100110001010001>, ++ LSX_2R_U4_DESC_BASE<"vbitseti.h", int_loongarch_lsx_vbitseti_h, LSX128HOpnd, LSX128HOpnd>; ++ ++def VBITSETI_W : LSX_I5_U<0b01110011000101001>, ++ LSX_2R_U5_DESC_BASE<"vbitseti.w", int_loongarch_lsx_vbitseti_w, LSX128WOpnd, LSX128WOpnd>; ++ ++def VBITSETI_D : LSX_I6_U<0b0111001100010101>, ++ LSX_2R_U6_DESC_BASE<"vbitseti.d", int_loongarch_lsx_vbitseti_d, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VBITREVI_B : LSX_I3_U<0b0111001100011000001>, ++ LSX_2R_U3_DESC_BASE<"vbitrevi.b", int_loongarch_lsx_vbitrevi_b, LSX128BOpnd, LSX128BOpnd>; ++ ++def VBITREVI_H : LSX_I4_U<0b011100110001100001>, ++ LSX_2R_U4_DESC_BASE<"vbitrevi.h", int_loongarch_lsx_vbitrevi_h, LSX128HOpnd, LSX128HOpnd>; ++ ++def VBITREVI_W : LSX_I5_U<0b01110011000110001>, ++ LSX_2R_U5_DESC_BASE<"vbitrevi.w", int_loongarch_lsx_vbitrevi_w, LSX128WOpnd, LSX128WOpnd>; ++ ++def VBITREVI_D : LSX_I6_U<0b0111001100011001>, ++ LSX_2R_U6_DESC_BASE<"vbitrevi.d", int_loongarch_lsx_vbitrevi_d, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSSRLRNI_B_H : LSX_I4_U<0b011100110101000001>, ++ LSX_2R_3R_U4_DESC_BASE<"vssrlrni.b.h", int_loongarch_lsx_vssrlrni_b_h, LSX128BOpnd, LSX128BOpnd>; ++ ++def VSSRLRNI_H_W : LSX_I5_U<0b01110011010100001>, ++ LSX_2R_3R_U5_DESC_BASE<"vssrlrni.h.w", int_loongarch_lsx_vssrlrni_h_w, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSSRLRNI_W_D : LSX_I6_U<0b0111001101010001>, ++ LSX_2R_3R_U6_DESC_BASE<"vssrlrni.w.d", int_loongarch_lsx_vssrlrni_w_d, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSSRLRNI_D_Q : LSX_I7_U<0b011100110101001>, ++ LSX_2R_3R_U7_DESC_BASE<"vssrlrni.d.q", int_loongarch_lsx_vssrlrni_d_q, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VSRANI_B_H : LSX_I4_U<0b011100110101100001>, ++ LSX_2R_3R_U4_DESC_BASE<"vsrani.b.h", int_loongarch_lsx_vsrani_b_h, LSX128BOpnd, LSX128BOpnd>; ++ ++def VSRANI_H_W : LSX_I5_U<0b01110011010110001>, ++ LSX_2R_3R_U5_DESC_BASE<"vsrani.h.w", int_loongarch_lsx_vsrani_h_w, LSX128HOpnd, LSX128HOpnd>; ++ ++def VSRANI_W_D : LSX_I6_U<0b0111001101011001>, ++ LSX_2R_3R_U6_DESC_BASE<"vsrani.w.d", int_loongarch_lsx_vsrani_w_d, LSX128WOpnd, LSX128WOpnd>; ++ ++def VSRANI_D_Q : LSX_I7_U<0b011100110101101>, ++ LSX_2R_3R_U7_DESC_BASE<"vsrani.d.q", int_loongarch_lsx_vsrani_d_q, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VEXTRINS_B : LSX_I8_U<0b01110011100011>, ++ LSX_2R_3R_U8_DESC_BASE<"vextrins.b", int_loongarch_lsx_vextrins_b, LSX128BOpnd, LSX128BOpnd>; ++ ++def VEXTRINS_H : LSX_I8_U<0b01110011100010>, ++ LSX_2R_3R_U8_DESC_BASE<"vextrins.h", int_loongarch_lsx_vextrins_h, LSX128HOpnd, LSX128HOpnd>; ++ ++def VEXTRINS_W : LSX_I8_U<0b01110011100001>, ++ LSX_2R_3R_U8_DESC_BASE<"vextrins.w", int_loongarch_lsx_vextrins_w, LSX128WOpnd, LSX128WOpnd>; ++ ++def VEXTRINS_D : LSX_I8_U<0b01110011100000>, ++ LSX_2R_3R_U8_DESC_BASE<"vextrins.d", int_loongarch_lsx_vextrins_d, LSX128DOpnd, LSX128DOpnd>; ++ ++ ++def VBITSELI_B : LSX_I8_U<0b01110011110001>, ++ LSX_2R_3R_U8_DESC_BASE<"vbitseli.b", int_loongarch_lsx_vbitseli_b, LSX128BOpnd, LSX128BOpnd>; ++ ++ ++def VANDI_B : LSX_I8_U<0b01110011110100>, ++ LSX_2R_U8_DESC_BASE<"vandi.b", int_loongarch_lsx_vandi_b, LSX128BOpnd, LSX128BOpnd>; ++ ++ ++def VORI_B : LSX_I8_U<0b01110011110101>, ++ LSX_2R_U8_DESC_BASE<"vori.b", int_loongarch_lsx_vori_b, LSX128BOpnd, LSX128BOpnd>; ++ ++ ++def VXORI_B : LSX_I8_U<0b01110011110110>, ++ LSX_2R_U8_DESC_BASE<"vxori.b", int_loongarch_lsx_vxori_b, LSX128BOpnd, LSX128BOpnd>; ++ ++ ++def VNORI_B : LSX_I8_U<0b01110011110111>, ++ LSX_2R_U8_DESC_BASE<"vnori.b", int_loongarch_lsx_vnori_b, LSX128BOpnd, LSX128BOpnd>; ++ ++ ++def VLDI : LSX_1R_I13<0b01110011111000>, ++ LSX_I13_DESC_BASE<"vldi", int_loongarch_lsx_vldi, i32, simm13Op, LSX128DOpnd>; ++ ++def VLDI_B : LSX_1R_I13_I10<0b01110011111000000>, ++ LSX_I13_DESC_BASE_10<"vldi", LSX128BOpnd>; ++ ++def VLDI_H : LSX_1R_I13_I10<0b01110011111000001>, ++ LSX_I13_DESC_BASE_10<"vldi", LSX128HOpnd>; ++ ++def VLDI_W : LSX_1R_I13_I10<0b01110011111000010>, ++ LSX_I13_DESC_BASE_10<"vldi", LSX128WOpnd>; ++ ++def VLDI_D : LSX_1R_I13_I10<0b01110011111000011>, ++ LSX_I13_DESC_BASE_10<"vldi", LSX128DOpnd>; ++ ++def VPERMI_W : LSX_I8_U<0b01110011111001>, ++ LSX_2R_3R_U8_DESC_BASE<"vpermi.w", int_loongarch_lsx_vpermi_w, LSX128WOpnd, LSX128WOpnd>; ++ ++ ++def VSEQ_B : LSX_3R<0b01110000000000000>, IsCommutable, ++ LSX_3R_DESC_BASE<"vseq.b", vseteq_v16i8, LSX128BOpnd>; ++ ++def VSEQ_H : LSX_3R<0b01110000000000001>, IsCommutable, ++ LSX_3R_DESC_BASE<"vseq.h", vseteq_v8i16, LSX128HOpnd>; ++ ++def VSEQ_W : LSX_3R<0b01110000000000010>, IsCommutable, ++ LSX_3R_DESC_BASE<"vseq.w", vseteq_v4i32, LSX128WOpnd> ; ++ ++def VSEQ_D : LSX_3R<0b01110000000000011>, IsCommutable, ++ LSX_3R_DESC_BASE<"vseq.d", vseteq_v2i64, LSX128DOpnd>; ++ ++ ++def VSLE_B : LSX_3R<0b01110000000000100>, ++ LSX_3R_DESC_BASE<"vsle.b", vsetle_v16i8, LSX128BOpnd>; ++ ++def VSLE_H : LSX_3R<0b01110000000000101>, ++ LSX_3R_DESC_BASE<"vsle.h", vsetle_v8i16, LSX128HOpnd>; ++ ++def VSLE_W : LSX_3R<0b01110000000000110>, ++ LSX_3R_DESC_BASE<"vsle.w", vsetle_v4i32, LSX128WOpnd>; ++ ++def VSLE_D : LSX_3R<0b01110000000000111>, ++ LSX_3R_DESC_BASE<"vsle.d", vsetle_v2i64, LSX128DOpnd>; ++ ++ ++def VSLE_BU : LSX_3R<0b01110000000001000>, ++ LSX_3R_DESC_BASE<"vsle.bu", vsetule_v16i8, LSX128BOpnd>; ++ ++def VSLE_HU : LSX_3R<0b01110000000001001>, ++ LSX_3R_DESC_BASE<"vsle.hu", vsetule_v8i16, LSX128HOpnd>; ++ ++def VSLE_WU : LSX_3R<0b01110000000001010>, ++ LSX_3R_DESC_BASE<"vsle.wu", vsetule_v4i32, LSX128WOpnd>; ++ ++def VSLE_DU : LSX_3R<0b01110000000001011>, ++ LSX_3R_DESC_BASE<"vsle.du", vsetule_v2i64, LSX128DOpnd>; ++ ++ ++def VSLT_B : LSX_3R<0b01110000000001100>, ++ LSX_3R_DESC_BASE<"vslt.b", vsetlt_v16i8, LSX128BOpnd>; ++ ++def VSLT_H : LSX_3R<0b01110000000001101>, ++ LSX_3R_DESC_BASE<"vslt.h", vsetlt_v8i16, LSX128HOpnd>; ++ ++def VSLT_W : LSX_3R<0b01110000000001110>, ++ LSX_3R_DESC_BASE<"vslt.w", vsetlt_v4i32, LSX128WOpnd>; ++ ++def VSLT_D : LSX_3R<0b01110000000001111>, ++ LSX_3R_DESC_BASE<"vslt.d", vsetlt_v2i64, LSX128DOpnd>; ++ ++ ++def VSLT_BU : LSX_3R<0b01110000000010000>, ++ LSX_3R_DESC_BASE<"vslt.bu", vsetult_v16i8, LSX128BOpnd>; ++ ++def VSLT_HU : LSX_3R<0b01110000000010001>, ++ LSX_3R_DESC_BASE<"vslt.hu", vsetult_v8i16, LSX128HOpnd>; ++ ++def VSLT_WU : LSX_3R<0b01110000000010010>, ++ LSX_3R_DESC_BASE<"vslt.wu", vsetult_v4i32, LSX128WOpnd>; ++ ++def VSLT_DU : LSX_3R<0b01110000000010011>, ++ LSX_3R_DESC_BASE<"vslt.du", vsetult_v2i64, LSX128DOpnd>; ++ ++ ++def VADD_B : LSX_3R<0b01110000000010100>, IsCommutable, ++ LSX_3R_DESC_BASE<"vadd.b", add, LSX128BOpnd>; ++ ++def VADD_H : LSX_3R<0b01110000000010101>, IsCommutable, ++ LSX_3R_DESC_BASE<"vadd.h", add, LSX128HOpnd>; ++ ++def VADD_W : LSX_3R<0b01110000000010110>, IsCommutable, ++ LSX_3R_DESC_BASE<"vadd.w", add, LSX128WOpnd>; ++ ++def VADD_D : LSX_3R<0b01110000000010111>, IsCommutable, ++ LSX_3R_DESC_BASE<"vadd.d", add, LSX128DOpnd>; ++ ++ ++def VSUB_B : LSX_3R<0b01110000000011000>, ++ LSX_3R_DESC_BASE<"vsub.b", sub, LSX128BOpnd>; ++ ++def VSUB_H : LSX_3R<0b01110000000011001>, ++ LSX_3R_DESC_BASE<"vsub.h", sub, LSX128HOpnd>; ++ ++def VSUB_W : LSX_3R<0b01110000000011010>, ++ LSX_3R_DESC_BASE<"vsub.w", sub, LSX128WOpnd>; ++ ++def VSUB_D : LSX_3R<0b01110000000011011>, ++ LSX_3R_DESC_BASE<"vsub.d", sub, LSX128DOpnd>; ++ ++ ++ ++//Pat ++class LSXBitconvertPat preds = [HasLSX]> : ++ LSXPat<(DstVT (bitconvert SrcVT:$src)), ++ (COPY_TO_REGCLASS SrcVT:$src, DstRC), preds>; ++ ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++ ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++ ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++ ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++ ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++ ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++ ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++def : LSXBitconvertPat; ++ ++ ++ ++ ++def : LSXPat<(i32 (vextract_sext_i8 v16i8:$vj, i32:$idx)), ++ (SRAI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj, ++ i32:$idx), ++ sub_lo)), ++ GPR32), (i32 24))>; ++def : LSXPat<(i32 (vextract_sext_i16 v8i16:$vj, i32:$idx)), ++ (SRAI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj, ++ i32:$idx), ++ sub_lo)), ++ GPR32), (i32 16))>; ++def : LSXPat<(i32 (vextract_sext_i32 v4i32:$vj, i32:$idx)), ++ (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, ++ i32:$idx), ++ sub_lo)), ++ GPR32)>; ++def : LSXPat<(i64 (vextract_sext_i64 v2i64:$vj, i32:$idx)), ++ (COPY_TO_REGCLASS (i64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, ++ i32:$idx), ++ sub_64)), ++ GPR64)>; ++ ++def : LSXPat<(i32 (vextract_zext_i8 v16i8:$vj, i32:$idx)), ++ (SRLI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj, ++ i32:$idx), ++ sub_lo)), ++ GPR32), (i32 24))>; ++def : LSXPat<(i32 (vextract_zext_i16 v8i16:$vj, i32:$idx)), ++ (SRLI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj, ++ i32:$idx), ++ sub_lo)), ++ GPR32), (i32 16))>; ++def : LSXPat<(i32 (vextract_zext_i32 v4i32:$vj, i32:$idx)), ++ (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, ++ i32:$idx), ++ sub_lo)), ++ GPR32)>; ++ ++def : LSXPat<(i64 (vextract_zext_i64 v2i64:$vj, i32:$idx)), ++ (COPY_TO_REGCLASS (i64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, ++ i32:$idx), ++ sub_64)), ++ GPR64)>; ++ ++def : LSXPat<(f32 (vector_extract v4f32:$vj, i32:$idx)), ++ (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, ++ i32:$idx), ++ sub_lo))>; ++def : LSXPat<(f64 (vector_extract v2f64:$vj, i32:$idx)), ++ (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, ++ i32:$idx), ++ sub_64))>; ++ ++def : LSXPat< ++ (i32 (vextract_sext_i8 v16i8:$vj, i64:$idx)), ++ (SRAI_W (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG ++ (VREPLVE_B v16i8:$vj, ++ (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_lo)), ++ GPR32), ++ (i32 24))>; ++def : LSXPat< ++ (i32 (vextract_sext_i16 v8i16:$vj, i64:$idx)), ++ (SRAI_W (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG ++ (VREPLVE_H v8i16:$vj, ++ (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_lo)), ++ GPR32), ++ (i32 16))>; ++ ++def : LSXPat< ++ (i32 (vextract_sext_i32 v4i32:$vj, i64:$idx)), ++ (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG ++ (VREPLVE_W v4i32:$vj, ++ (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_lo)), ++ GPR32)>; ++ ++def : LSXPat< ++ (i64 (vextract_sext_i64 v2i64:$vj, i64:$idx)), ++ (COPY_TO_REGCLASS ++ (i64 (EXTRACT_SUBREG ++ (VREPLVE_D v2i64:$vj, ++ (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_64)), ++ GPR64)>; ++ ++def : LSXPat< ++ (i32 (vextract_zext_i8 v16i8:$vj, i64:$idx)), ++ (SRLI_W (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG ++ (VREPLVE_B v16i8:$vj, ++ (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_lo)), ++ GPR32), ++ (i32 24))>; ++def : LSXPat< ++ (i32 (vextract_zext_i16 v8i16:$vj, i64:$idx)), ++ (SRLI_W (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG ++ (VREPLVE_H v8i16:$vj, ++ (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_lo)), ++ GPR32), ++ (i32 16))>; ++def : LSXPat< ++ (i32 (vextract_zext_i32 v4i32:$vj, i64:$idx)), ++ (COPY_TO_REGCLASS ++ (i32 (EXTRACT_SUBREG ++ (VREPLVE_W v4i32:$vj, ++ (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_lo)), ++ GPR32)>; ++def : LSXPat< ++ (i64 (vextract_zext_i64 v2i64:$vj, i64:$idx)), ++ (COPY_TO_REGCLASS ++ (i64 (EXTRACT_SUBREG ++ (VREPLVE_D v2i64:$vj, ++ (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_64)), ++ GPR64)>; ++ ++ def : LSXPat< ++ (f32 (vector_extract v4f32:$vj, i64:$idx)), ++ (f32 (EXTRACT_SUBREG ++ (VREPLVE_W v4f32:$vj, ++ (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_lo))>; ++def : LSXPat< ++ (f64 (vector_extract v2f64:$vj, i64:$idx)), ++ (f64 (EXTRACT_SUBREG ++ (VREPLVE_D v2f64:$vj, ++ (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), ++ sub_64))>; ++ ++ ++def : LSXPat<(vfseteq_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b), ++ (VFCMP_CEQ_S LSX128WOpnd:$a, LSX128WOpnd:$b)>; ++ ++def : LSXPat<(vfseteq_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b), ++ (VFCMP_CEQ_D LSX128DOpnd:$a, LSX128DOpnd:$b)>; ++ ++def : LSXPat<(vfsetle_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b), ++ (VFCMP_CLE_S LSX128WOpnd:$a, LSX128WOpnd:$b)>; ++ ++def : LSXPat<(vfsetle_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b), ++ (VFCMP_CLE_D LSX128DOpnd:$a, LSX128DOpnd:$b)>; ++ ++def : LSXPat<(vfsetlt_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b), ++ (VFCMP_CLT_S LSX128WOpnd:$a, LSX128WOpnd:$b)>; ++ ++def : LSXPat<(vfsetlt_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b), ++ (VFCMP_CLT_D LSX128DOpnd:$a, LSX128DOpnd:$b)>; ++ ++def : LSXPat<(vfsetne_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b), ++ (VFCMP_CNE_S LSX128WOpnd:$a, LSX128WOpnd:$b)>; ++ ++def : LSXPat<(vfsetne_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b), ++ (VFCMP_CNE_D LSX128DOpnd:$a, LSX128DOpnd:$b)>; ++ ++ ++class LSX_INSERT_PSEUDO_BASE : ++ LSXPseudo<(outs ROVD:$vd), (ins ROVD:$vd_in, ImmOp:$n, ROFS:$fs), ++ [(set ROVD:$vd, (OpNode (Ty ROVD:$vd_in), ROFS:$fs, Imm:$n))]> { ++ bit usesCustomInserter = 1; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++ ++class INSERT_FW_PSEUDO_DESC : LSX_INSERT_PSEUDO_BASE; ++class INSERT_FD_PSEUDO_DESC : LSX_INSERT_PSEUDO_BASE; ++ ++def INSERT_FW_PSEUDO : INSERT_FW_PSEUDO_DESC; ++def INSERT_FD_PSEUDO : INSERT_FD_PSEUDO_DESC; ++ ++ ++class LSX_INSERT_VIDX_PSEUDO_BASE : ++ LSXPseudo<(outs ROVD:$vd), (ins ROVD:$vd_in, ROIdx:$n, ROFS:$fs), ++ [(set ROVD:$vd, (OpNode (Ty ROVD:$vd_in), ROFS:$fs, ++ ROIdx:$n))]> { ++ bit usesCustomInserter = 1; ++ string Constraints = "$vd = $vd_in"; ++} ++ ++class INSERT_H_VIDX64_PSEUDO_DESC : ++ LSX_INSERT_VIDX_PSEUDO_BASE; ++def INSERT_H_VIDX64_PSEUDO : INSERT_H_VIDX64_PSEUDO_DESC; ++ ++class INSERTPostRA : ++ LoongArchPseudo<(outs RC:$xd), (ins RC:$xd_in, RD:$n, RE:$fs), []> { ++ let mayLoad = 1; ++ let mayStore = 1; ++} ++ ++def INSERT_H_VIDX64_PSEUDO_POSTRA : INSERTPostRA; ++ ++class LSX_COPY_PSEUDO_BASE : ++ LSXPseudo<(outs RCD:$vd), (ins RCVS:$vj, ImmOp:$n), ++ [(set RCD:$vd, (OpNode (VecTy RCVS:$vj), Imm:$n))]> { ++ bit usesCustomInserter = 1; ++} ++ ++ ++class COPY_FW_PSEUDO_DESC : LSX_COPY_PSEUDO_BASE; ++class COPY_FD_PSEUDO_DESC : LSX_COPY_PSEUDO_BASE; ++def COPY_FW_PSEUDO : COPY_FW_PSEUDO_DESC; ++def COPY_FD_PSEUDO : COPY_FD_PSEUDO_DESC; ++ ++ ++let isCodeGenOnly = 1 in { ++ ++def VST_H : LSX_I12_S<0b0010110001>, ++ ST_DESC_BASE<"vst", store, v8i16, LSX128HOpnd, mem_simm12>; ++def VST_W : LSX_I12_S<0b0010110001>, ++ ST_DESC_BASE<"vst", store, v4i32, LSX128WOpnd, mem_simm12>; ++def VST_D : LSX_I12_S<0b0010110001>, ++ ST_DESC_BASE<"vst", store, v2i64, LSX128DOpnd, mem_simm12>; ++ ++ ++def VLD_H : LSX_I12_S<0b0010110000>, ++ LD_DESC_BASE<"vld", load, v8i16, LSX128HOpnd, mem_simm12>; ++def VLD_W : LSX_I12_S<0b0010110000>, ++ LD_DESC_BASE<"vld", load, v4i32, LSX128WOpnd, mem_simm12>; ++def VLD_D : LSX_I12_S<0b0010110000>, ++ LD_DESC_BASE<"vld", load, v2i64, LSX128DOpnd, mem_simm12>; ++ ++ ++ ++def VANDI_B_N : LSX_I8_U<0b01110011110100>, ++ LSX_BIT_U8_VREPLVE_DESC_BASE<"vandi.b", and, vsplati8_uimm8, LSX128BOpnd>; ++ ++ ++def VXORI_B_N : LSX_I8_U<0b01110011110110>, ++ LSX_BIT_U8_VREPLVE_DESC_BASE<"vxori.b", xor, vsplati8_uimm8, LSX128BOpnd>; ++ ++ ++def VSRAI_B_N : LSX_I3_U<0b0111001100110100001>, ++ LSX_BIT_U3_VREPLVE_DESC_BASE<"vsrai.b", sra, vsplati8_uimm3, LSX128BOpnd>; ++ ++def VSRAI_H_N : LSX_I4_U<0b011100110011010001>, ++ LSX_BIT_U4_VREPLVE_DESC_BASE<"vsrai.h", sra, vsplati16_uimm4, LSX128HOpnd>; ++ ++def VSRAI_W_N : LSX_I5_U<0b01110011001101001>, ++ LSX_BIT_U5_VREPLVE_DESC_BASE<"vsrai.w", sra, vsplati32_uimm5, LSX128WOpnd>; ++ ++def VSRAI_D_N : LSX_I6_U<0b0111001100110101>, ++ LSX_BIT_U6_VREPLVE_DESC_BASE<"vsrai.d", sra, vsplati64_uimm6, LSX128DOpnd>; ++ ++ ++def VMAXI_BU_N : LSX_I5_U<0b01110010100101000>, ++ LSX_I5_U_DESC_BASE<"vmaxi.bu", umax, vsplati8_uimm5, LSX128BOpnd>; ++ ++def VMAXI_HU_N : LSX_I5_U<0b01110010100101001>, ++ LSX_I5_U_DESC_BASE<"vmaxi.hu", umax, vsplati16_uimm5, LSX128HOpnd>; ++ ++def VMAXI_WU_N : LSX_I5_U<0b01110010100101010>, ++ LSX_I5_U_DESC_BASE<"vmaxi.wu", umax, vsplati32_uimm5, LSX128WOpnd>; ++ ++def VMAXI_DU_N : LSX_I5_U<0b01110010100101011>, ++ LSX_I5_U_DESC_BASE<"vmaxi.du", umax, vsplati64_uimm5, LSX128DOpnd>; ++ ++ ++def VMINI_B_N : LSX_I5<0b01110010100100100>, ++ LSX_I5_DESC_BASE<"vmini.b", smin, vsplati8_simm5, LSX128BOpnd>; ++ ++def VMINI_H_N : LSX_I5<0b01110010100100101>, ++ LSX_I5_DESC_BASE<"vmini.h", smin, vsplati16_simm5, LSX128HOpnd>; ++ ++def VMINI_W_N : LSX_I5<0b01110010100100110>, ++ LSX_I5_DESC_BASE<"vmini.w", smin, vsplati32_simm5, LSX128WOpnd>; ++ ++def VMINI_D_N : LSX_I5<0b01110010100100111>, ++ LSX_I5_DESC_BASE<"vmini.d", smin, vsplati64_simm5, LSX128DOpnd>; ++ ++ ++def VMAXI_B_N : LSX_I5<0b01110010100100000>, ++ LSX_I5_DESC_BASE<"vmaxi.b", smax, vsplati8_simm5, LSX128BOpnd>; ++ ++def VMAXI_H_N : LSX_I5<0b01110010100100001>, ++ LSX_I5_DESC_BASE<"vmaxi.h", smax, vsplati16_simm5, LSX128HOpnd>; ++ ++def VMAXI_W_N : LSX_I5<0b01110010100100010>, ++ LSX_I5_DESC_BASE<"vmaxi.w", smax, vsplati32_simm5, LSX128WOpnd>; ++ ++def VMAXI_D_N : LSX_I5<0b01110010100100011>, ++ LSX_I5_DESC_BASE<"vmaxi.d", smax, vsplati64_simm5, LSX128DOpnd>; ++ ++ ++def VSEQI_B_N : LSX_I5<0b01110010100000000>, ++ LSX_I5_DESC_BASE<"vseqi.b", vseteq_v16i8, vsplati8_simm5, LSX128BOpnd>; ++ ++def VSEQI_H_N : LSX_I5<0b01110010100000001>, ++ LSX_I5_DESC_BASE<"vseqi.h", vseteq_v8i16, vsplati16_simm5, LSX128HOpnd>; ++ ++def VSEQI_W_N : LSX_I5<0b01110010100000010>, ++ LSX_I5_DESC_BASE<"vseqi.w", vseteq_v4i32, vsplati32_simm5, LSX128WOpnd>; ++ ++def VSEQI_D_N : LSX_I5<0b01110010100000011>, ++ LSX_I5_DESC_BASE<"vseqi.d", vseteq_v2i64, vsplati64_simm5, LSX128DOpnd>; ++ ++ ++def VSLEI_B_N : LSX_I5<0b01110010100000100>, ++ LSX_I5_DESC_BASE<"vslei.b", vsetle_v16i8, vsplati8_simm5, LSX128BOpnd>; ++ ++def VSLEI_H_N : LSX_I5<0b01110010100000101>, ++ LSX_I5_DESC_BASE<"vslei.h", vsetle_v8i16, vsplati16_simm5, LSX128HOpnd>; ++ ++def VSLEI_W_N : LSX_I5<0b01110010100000110>, ++ LSX_I5_DESC_BASE<"vslei.w", vsetle_v4i32, vsplati32_simm5, LSX128WOpnd>; ++ ++def VSLEI_D_N : LSX_I5<0b01110010100000111>, ++ LSX_I5_DESC_BASE<"vslei.d", vsetle_v2i64, vsplati64_simm5, LSX128DOpnd>; ++ ++def VSLEI_BU_N : LSX_I5_U<0b01110010100001000>, ++ LSX_I5_U_DESC_BASE<"vslei.bu", vsetule_v16i8, vsplati8_uimm5, LSX128BOpnd>; ++ ++def VSLEI_HU_N : LSX_I5_U<0b01110010100001001>, ++ LSX_I5_U_DESC_BASE<"vslei.hu", vsetule_v8i16, vsplati16_uimm5, LSX128HOpnd>; ++ ++def VSLEI_WU_N : LSX_I5_U<0b01110010100001010>, ++ LSX_I5_U_DESC_BASE<"vslei.wu", vsetule_v4i32, vsplati32_uimm5, LSX128WOpnd>; ++ ++def VSLEI_DU_N : LSX_I5_U<0b01110010100001011>, ++ LSX_I5_U_DESC_BASE<"vslei.du", vsetule_v2i64, vsplati64_uimm5, LSX128DOpnd>; ++ ++ ++def VSLTI_B_N : LSX_I5<0b01110010100001100>, ++ LSX_I5_DESC_BASE<"vslti.b", vsetlt_v16i8, vsplati8_simm5, LSX128BOpnd>; ++ ++def VSLTI_H_N : LSX_I5<0b01110010100001101>, ++ LSX_I5_DESC_BASE<"vslti.h", vsetlt_v8i16, vsplati16_simm5, LSX128HOpnd>; ++ ++def VSLTI_W_N : LSX_I5<0b01110010100001110>, ++ LSX_I5_DESC_BASE<"vslti.w", vsetlt_v4i32, vsplati32_simm5, LSX128WOpnd>; ++ ++def VSLTI_D_N : LSX_I5<0b01110010100001111>, ++ LSX_I5_DESC_BASE<"vslti.d", vsetlt_v2i64, vsplati64_simm5, LSX128DOpnd>; ++ ++ ++def VSLTI_BU_N : LSX_I5_U<0b01110010100010000>, ++ LSX_I5_U_DESC_BASE<"vslti.bu", vsetult_v16i8, vsplati8_uimm5, LSX128BOpnd>; ++ ++def VSLTI_HU_N : LSX_I5_U<0b01110010100010001>, ++ LSX_I5_U_DESC_BASE<"vslti.hu", vsetult_v8i16, vsplati16_uimm5, LSX128HOpnd>; ++ ++def VSLTI_WU_N : LSX_I5_U<0b01110010100010010>, ++ LSX_I5_U_DESC_BASE<"vslti.wu", vsetult_v4i32, vsplati32_uimm5, LSX128WOpnd>; ++ ++def VSLTI_DU_N : LSX_I5_U<0b01110010100010011>, ++ LSX_I5_U_DESC_BASE<"vslti.du", vsetult_v2i64, vsplati64_uimm5, LSX128DOpnd>; ++ ++ ++def VBITSELI_B_N : LSX_I8_U<0b01110011110001>, ++ LSX_2R_3R_SELECT<"vbitseli.b", vselect, LSX128BOpnd, LSX128BOpnd>; ++ ++} ++ ++ ++def : LSXPat<(v4f32 (load addrimm12:$addr)), (VLD_W addrimm12:$addr)>; ++def : LSXPat<(v2f64 (load addrimm12:$addr)), (VLD_D addrimm12:$addr)>; ++ ++def VST_FW : LSXPat<(store (v4f32 LSX128W:$vj), addrimm12:$addr), ++ (VST_W LSX128W:$vj, addrimm12:$addr)>; ++def VST_FD : LSXPat<(store (v2f64 LSX128D:$vj), addrimm12:$addr), ++ (VST_D LSX128D:$vj, addrimm12:$addr)>; ++ ++def VNEG_FW : LSXPat<(fneg (v4f32 LSX128W:$vj)), ++ (VBITREVI_W LSX128W:$vj, 31)>; ++def VNEG_FD : LSXPat<(fneg (v2f64 LSX128D:$vj)), ++ (VBITREVI_D LSX128D:$vj, 63)>; ++ ++ ++def : LSXPat<(v2i64 (LoongArchVABSD v2i64:$vj, v2i64:$vk, (i32 0))), ++ (v2i64 (VABSD_D $vj, $vk))>; ++ ++def : LSXPat<(v4i32 (LoongArchVABSD v4i32:$vj, v4i32:$vk, (i32 0))), ++ (v4i32 (VABSD_W $vj, $vk))>; ++ ++def : LSXPat<(v8i16 (LoongArchVABSD v8i16:$vj, v8i16:$vk, (i32 0))), ++ (v8i16 (VABSD_H $vj, $vk))>; ++ ++def : LSXPat<(v16i8 (LoongArchVABSD v16i8:$vj, v16i8:$vk, (i32 0))), ++ (v16i8 (VABSD_B $vj, $vk))>; ++ ++def : LSXPat<(v2i64 (LoongArchUVABSD v2i64:$vj, v2i64:$vk, (i32 0))), ++ (v2i64 (VABSD_DU $vj, $vk))>; ++ ++def : LSXPat<(v4i32 (LoongArchUVABSD v4i32:$vj, v4i32:$vk, (i32 0))), ++ (v4i32 (VABSD_WU $vj, $vk))>; ++ ++def : LSXPat<(v8i16 (LoongArchUVABSD v8i16:$vj, v8i16:$vk, (i32 0))), ++ (v8i16 (VABSD_HU $vj, $vk))>; ++ ++def : LSXPat<(v16i8 (LoongArchUVABSD v16i8:$vj, v16i8:$vk, (i32 0))), ++ (v16i8 (VABSD_BU $vj, $vk))>; ++ ++ ++def : LSXPat<(or v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), ++ (VBITSET_B v16i8:$vj, v16i8:$vk)>; ++def : LSXPat<(or v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), ++ (VBITSET_H v8i16:$vj, v8i16:$vk)>; ++def : LSXPat<(or v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), ++ (VBITSET_W v4i32:$vj, v4i32:$vk)>; ++def : LSXPat<(or v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), ++ (VBITSET_D v2i64:$vj, v2i64:$vk)>; ++ ++def : LSXPat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), ++ (VBITREV_B v16i8:$vj, v16i8:$vk)>; ++def : LSXPat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), ++ (VBITREV_H v8i16:$vj, v8i16:$vk)>; ++def : LSXPat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), ++ (VBITREV_W v4i32:$vj, v4i32:$vk)>; ++def : LSXPat<(xor v2i64:$vj, (shl (v2i64 vsplati64_imm_eq_1), v2i64:$vk)), ++ (VBITREV_D v2i64:$vj, v2i64:$vk)>; ++ ++def : LSXPat<(and v16i8:$vj, (xor (shl vsplat_imm_eq_1, v16i8:$vk), immAllOnesV)), ++ (VBITCLR_B v16i8:$vj, v16i8:$vk)>; ++def : LSXPat<(and v8i16:$vj, (xor (shl vsplat_imm_eq_1, v8i16:$vk), immAllOnesV)), ++ (VBITCLR_H v8i16:$vj, v8i16:$vk)>; ++def : LSXPat<(and v4i32:$vj, (xor (shl vsplat_imm_eq_1, v4i32:$vk), immAllOnesV)), ++ (VBITCLR_W v4i32:$vj, v4i32:$vk)>; ++def : LSXPat<(and v2i64:$vj, (xor (shl (v2i64 vsplati64_imm_eq_1), v2i64:$vk), (bitconvert (v4i32 immAllOnesV)))), ++ (VBITCLR_D v2i64:$vj, v2i64:$vk)>; ++def vsplati64_imm_eq_63 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{ ++ APInt Imm; ++ SDNode *BV = N->getOperand(0).getNode(); ++ EVT EltTy = N->getValueType(0).getVectorElementType(); ++ ++ return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && ++ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; ++}]>; ++ ++def immi32Cst7 : ImmLeaf(Imm) && Imm == 7;}]>; ++def immi32Cst15 : ImmLeaf(Imm) && Imm == 15;}]>; ++def immi32Cst31 : ImmLeaf(Imm) && Imm == 31;}]>; ++ ++def vsplati8imm7 : PatFrag<(ops node:$vt), ++ (and node:$vt, (vsplati8 immi32Cst7))>; ++def vsplati16imm15 : PatFrag<(ops node:$vt), ++ (and node:$vt, (vsplati16 immi32Cst15))>; ++def vsplati32imm31 : PatFrag<(ops node:$vt), ++ (and node:$vt, (vsplati32 immi32Cst31))>; ++def vsplati64imm63 : PatFrag<(ops node:$vt), ++ (and node:$vt, vsplati64_imm_eq_63)>; ++ ++class LSXShiftPat : ++ LSXPat<(VT (Node VT:$vs, (VT (and VT:$vt, Vec)))), ++ (VT (Insn VT:$vs, VT:$vt))>; ++ ++class LSXBitPat : ++ LSXPat<(VT (Node VT:$vs, (shl vsplat_imm_eq_1, (Frag VT:$vt)))), ++ (VT (Insn VT:$vs, VT:$vt))>; ++ ++multiclass LSXShiftPats { ++ def : LSXShiftPat(Insn#_B), ++ (vsplati8 immi32Cst7)>; ++ def : LSXShiftPat(Insn#_H), ++ (vsplati16 immi32Cst15)>; ++ def : LSXShiftPat(Insn#_W), ++ (vsplati32 immi32Cst31)>; ++ def : LSXPat<(v2i64 (Node v2i64:$vs, (v2i64 (and v2i64:$vt, ++ vsplati64_imm_eq_63)))), ++ (v2i64 (!cast(Insn#_D) v2i64:$vs, v2i64:$vt))>; ++} ++ ++multiclass LSXBitPats { ++ def : LSXBitPat(Insn#_B), vsplati8imm7>; ++ def : LSXBitPat(Insn#_H), vsplati16imm15>; ++ def : LSXBitPat(Insn#_W), vsplati32imm31>; ++ def : LSXPat<(Node v2i64:$vs, (shl (v2i64 vsplati64_imm_eq_1), ++ (vsplati64imm63 v2i64:$vt))), ++ (v2i64 (!cast(Insn#_D) v2i64:$vs, v2i64:$vt))>; ++} ++ ++defm : LSXShiftPats; ++defm : LSXShiftPats; ++defm : LSXShiftPats; ++defm : LSXBitPats; ++defm : LSXBitPats; ++ ++def : LSXPat<(and v16i8:$vs, (xor (shl vsplat_imm_eq_1, ++ (vsplati8imm7 v16i8:$vt)), ++ immAllOnesV)), ++ (v16i8 (VBITCLR_B v16i8:$vs, v16i8:$vt))>; ++def : LSXPat<(and v8i16:$vs, (xor (shl vsplat_imm_eq_1, ++ (vsplati16imm15 v8i16:$vt)), ++ immAllOnesV)), ++ (v8i16 (VBITCLR_H v8i16:$vs, v8i16:$vt))>; ++def : LSXPat<(and v4i32:$vs, (xor (shl vsplat_imm_eq_1, ++ (vsplati32imm31 v4i32:$vt)), ++ immAllOnesV)), ++ (v4i32 (VBITCLR_W v4i32:$vs, v4i32:$vt))>; ++def : LSXPat<(and v2i64:$vs, (xor (shl (v2i64 vsplati64_imm_eq_1), ++ (vsplati64imm63 v2i64:$vt)), ++ (bitconvert (v4i32 immAllOnesV)))), ++ (v2i64 (VBITCLR_D v2i64:$vs, v2i64:$vt))>; ++ ++ ++def : LSXPat<(fdiv (v4f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), v4f32:$v), ++ (VFRECIP_S v4f32:$v)>; ++ ++def : LSXPat<(fdiv (v2f64 (build_vector (f64 fpimm1), (f64 fpimm1))), v2f64:$v), ++ (VFRECIP_D v2f64:$v)>; ++ ++def : LSXPat<(fdiv (v4f32 fpimm1), v4f32:$v), ++ (VFRECIP_S v4f32:$v)>; ++ ++def : LSXPat<(fdiv (v2f64 fpimm1), v2f64:$v), ++ (VFRECIP_D v2f64:$v)>; ++ ++ ++def : LSXPat<(fdiv (v4f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), (fsqrt v4f32:$v)), ++ (VFRSQRT_S v4f32:$v)>; ++ ++def : LSXPat<(fdiv (v2f64 (build_vector (f64 fpimm1), (f64 fpimm1))), (fsqrt v2f64:$v)), ++ (VFRSQRT_D v2f64:$v)>; ++ ++def : LSXPat<(fdiv (v4f32 fpimm1), (fsqrt v4f32:$v)), ++ (VFRSQRT_S v4f32:$v)>; ++ ++def : LSXPat<(fdiv (v2f64 fpimm1), (fsqrt v2f64:$v)), ++ (VFRSQRT_D v2f64:$v)>; ++ ++ ++def : LSXPat<(abs v2i64:$v), ++ (VMAX_D v2i64:$v, (VNEG_D v2i64:$v))>; ++ ++def : LSXPat<(abs v4i32:$v), ++ (VMAX_W v4i32:$v, (VNEG_W v4i32:$v))>; ++ ++def : LSXPat<(abs v8i16:$v), ++ (VMAX_H v8i16:$v, (VNEG_H v8i16:$v))>; ++ ++def : LSXPat<(abs v16i8:$v), ++ (VMAX_B v16i8:$v, (VNEG_B v16i8:$v))>; ++ ++ ++def : LSXPat<(sub (v16i8 immAllZerosV), v16i8:$v), ++ (VNEG_B v16i8:$v)>; ++ ++def : LSXPat<(sub (v8i16 immAllZerosV), v8i16:$v), ++ (VNEG_H v8i16:$v)>; ++ ++def : LSXPat<(sub (v4i32 immAllZerosV), v4i32:$v), ++ (VNEG_W v4i32:$v)>; ++ ++def : LSXPat<(sub (v2i64 immAllZerosV), v2i64:$v), ++ (VNEG_D v2i64:$v)>; ++ ++ ++def : LSXPat<(sra ++ (v16i8 (add ++ (v16i8 (add LSX128B:$a, LSX128B:$b)), ++ (v16i8 (srl ++ (v16i8 (add LSX128B:$a, LSX128B:$b)), ++ (v16i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ ))), ++ (VAVG_B (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>; ++ ++def : LSXPat<(sra ++ (v8i16 (add ++ (v8i16 (add LSX128H:$a, LSX128H:$b)), ++ (v8i16 (srl ++ (v8i16 (add LSX128H:$a, LSX128H:$b)), ++ (v8i16 (build_vector (i32 15),(i32 15),(i32 15),(i32 15), ++ (i32 15),(i32 15),(i32 15),(i32 15)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ ))), ++ (VAVG_H (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>; ++ ++def : LSXPat<(sra ++ (v4i32 (add ++ (v4i32 (add LSX128W:$a, LSX128W:$b)), ++ (v4i32 (srl ++ (v4i32 (add LSX128W:$a, LSX128W:$b)), ++ (v4i32 (build_vector (i32 31),(i32 31),(i32 31),(i32 31)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)))), ++ (VAVG_W (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>; ++ ++def : LSXPat<(sra ++ (v2i64 (add ++ (v2i64 (add LSX128D:$a, LSX128D:$b)), ++ (v2i64 (srl ++ (v2i64 (add LSX128D:$a, LSX128D:$b)), ++ (v2i64 (build_vector (i64 63),(i64 63))) ++ ) ++ ) ++ ) ++ ), ++ (v2i64 (build_vector (i64 1),(i64 1)))), ++ (VAVG_D (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>; ++ ++ ++ ++def : LSXPat<(srl ++ (v16i8 (add LSX128B:$a, LSX128B:$b)), ++ (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (VAVG_BU (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>; ++ ++def : LSXPat<(srl ++ (v8i16 (add LSX128H:$a, LSX128H:$b)), ++ (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (VAVG_HU (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>; ++ ++def : LSXPat<(srl ++ (v4i32 (add LSX128W:$a, LSX128W:$b)), ++ (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (VAVG_WU (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>; ++ ++def : LSXPat<(srl ++ (v2i64 (add LSX128D:$a, LSX128D:$b)), ++ (v2i64 (build_vector (i64 1),(i64 1)) ++ ) ++ ), ++ (VAVG_DU (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>; ++ ++ ++ ++ ++def : LSXPat<(sra ++ (v16i8 (add ++ (v16i8 (add (v16i8 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v16i8 (add LSX128B:$a, LSX128B:$b)) ++ )), ++ (v16i8 (srl ++ (v16i8 ( add (v16i8( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v16i8 (add LSX128B:$a, LSX128B:$b)) ++ )), ++ (v16i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7), ++ (i32 7),(i32 7),(i32 7),(i32 7)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ ))), ++ (VAVGR_B (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>; ++ ++def : LSXPat<(sra ++ (v8i16 (add ++ (v8i16 (add (v8i16 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v8i16 (add LSX128H:$a, LSX128H:$b)) ++ )), ++ (v8i16 (srl ++ (v8i16 (add (v8i16 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v8i16 (add LSX128H:$a, LSX128H:$b)) ++ )), ++ (v8i16 (build_vector (i32 15),(i32 15),(i32 15),(i32 15), ++ (i32 15),(i32 15),(i32 15),(i32 15)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ ))), ++ (VAVGR_H (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>; ++ ++def : LSXPat<(sra ++ (v4i32 (add ++ (v4i32 (add (v4i32 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v4i32 (add LSX128W:$a, LSX128W:$b)) ++ )), ++ (v4i32 (srl ++ (v4i32 (add (v4i32 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v4i32 (add LSX128W:$a, LSX128W:$b)) ++ )), ++ (v4i32 (build_vector (i32 31),(i32 31),(i32 31),(i32 31)) ++ ) ++ ) ++ ) ++ ) ++ ), ++ (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)))), ++ (VAVGR_W (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>; ++ ++def : LSXPat<(sra ++ (v2i64 (add ++ (v2i64 (add (v2i64 ( ++ build_vector (i64 1),(i64 1) ++ )), ++ (v2i64 (add LSX128D:$a, LSX128D:$b)) ++ )), ++ (v2i64 (srl ++ (v2i64 (add (v2i64 ( ++ build_vector (i64 1),(i64 1) ++ )), ++ (v2i64 (add LSX128D:$a, LSX128D:$b)) ++ )), ++ (v2i64 (build_vector (i64 63),(i64 63))) ++ ) ++ ) ++ ) ++ ), ++ (v2i64 (build_vector (i64 1),(i64 1)))), ++ (VAVGR_D (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>; ++ ++ ++ ++ ++def : LSXPat<(srl ++ (v16i8 (add (v16i8 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v16i8 (add LSX128B:$a, LSX128B:$b)) ++ )), ++ (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (VAVGR_BU (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>; ++ ++def : LSXPat<(srl ++ (v8i16 (add (v8i16 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v8i16 (add LSX128H:$a, LSX128H:$b)) ++ )), ++ (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), ++ (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (VAVGR_HU (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>; ++ ++def : LSXPat<(srl ++ (v4i32 (add (v4i32 ( ++ build_vector (i32 1),(i32 1),(i32 1),(i32 1) ++ )), ++ (v4i32 (add LSX128W:$a, LSX128W:$b)) ++ )), ++ (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)) ++ ) ++ ), ++ (VAVGR_WU (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>; ++ ++def : LSXPat<(srl ++ (v2i64 (add (v2i64 ( ++ build_vector (i64 1),(i64 1) ++ )), ++ (v2i64 (add LSX128D:$a, LSX128D:$b)) ++ )), ++ (v2i64 (build_vector (i64 1),(i64 1)) ++ ) ++ ), ++ (VAVGR_DU (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>; ++ ++ ++def : LSXPat<(mulhs LSX128D:$a, LSX128D:$b), ++ (VMUH_D LSX128D:$a, LSX128D:$b)>; ++ ++def : LSXPat<(mulhs LSX128W:$a, LSX128W:$b), ++ (VMUH_W LSX128W:$a, LSX128W:$b)>; ++ ++def : LSXPat<(mulhs LSX128H:$a, LSX128H:$b), ++ (VMUH_H LSX128H:$a, LSX128H:$b)>; ++ ++def : LSXPat<(mulhs LSX128B:$a, LSX128B:$b), ++ (VMUH_B LSX128B:$a, LSX128B:$b)>; ++ ++ ++def : LSXPat<(mulhu LSX128D:$a, LSX128D:$b), ++ (VMUH_DU LSX128D:$a, LSX128D:$b)>; ++ ++def : LSXPat<(mulhu LSX128W:$a, LSX128W:$b), ++ (VMUH_WU LSX128W:$a, LSX128W:$b)>; ++ ++def : LSXPat<(mulhu LSX128H:$a, LSX128H:$b), ++ (VMUH_HU LSX128H:$a, LSX128H:$b)>; ++ ++def : LSXPat<(mulhu LSX128B:$a, LSX128B:$b), ++ (VMUH_BU LSX128B:$a, LSX128B:$b)>; ++ ++ ++ ++//===----------------------------------------------------------------------===// ++// Intrinsics ++//===----------------------------------------------------------------------===// ++ ++def : LSXPat<(int_loongarch_lsx_vseq_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSEQ_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vseq_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSEQ_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vseq_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSEQ_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vseq_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSEQ_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsle_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSLE_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsle_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSLE_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsle_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSLE_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsle_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSLE_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsle_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSLE_BU LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsle_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSLE_HU LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsle_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSLE_WU LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsle_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSLE_DU LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vslt_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSLT_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vslt_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSLT_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vslt_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSLT_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vslt_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSLT_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vslt_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSLT_BU LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vslt_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSLT_HU LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vslt_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSLT_WU LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vslt_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSLT_DU LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vadd_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VADD_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vadd_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VADD_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vadd_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VADD_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vadd_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VADD_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsub_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSUB_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsub_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSUB_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsub_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSUB_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsub_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSUB_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsadd_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSADD_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsadd_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSADD_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsadd_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSADD_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsadd_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSADD_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vssub_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSSUB_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vssub_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSSUB_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vssub_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSSUB_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vssub_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSSUB_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsadd_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSADD_BU LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsadd_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSADD_HU LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsadd_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSADD_WU LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsadd_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSADD_DU LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vssub_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSSUB_BU LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vssub_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSSUB_HU LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vssub_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSSUB_WU LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vssub_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSSUB_DU LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vhaddw_h_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VHADDW_H_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vhaddw_w_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VHADDW_W_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vhaddw_d_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VHADDW_D_W LSX128W:$vj, LSX128W:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vhsubw_h_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VHSUBW_H_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vhsubw_w_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VHSUBW_W_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vhsubw_d_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VHSUBW_D_W LSX128W:$vj, LSX128W:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vhaddw_hu_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VHADDW_HU_BU LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vhaddw_wu_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VHADDW_WU_HU LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vhaddw_du_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VHADDW_DU_WU LSX128W:$vj, LSX128W:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vhsubw_hu_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VHSUBW_HU_BU LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vhsubw_wu_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VHSUBW_WU_HU LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vhsubw_du_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VHSUBW_DU_WU LSX128W:$vj, LSX128W:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vadda_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VADDA_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vadda_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VADDA_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vadda_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VADDA_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vadda_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VADDA_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vabsd_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VABSD_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vabsd_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VABSD_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vabsd_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VABSD_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vabsd_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VABSD_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vabsd_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VABSD_BU LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vabsd_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VABSD_HU LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vabsd_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VABSD_WU LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vabsd_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VABSD_DU LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vavg_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VAVG_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavg_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VAVG_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavg_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VAVG_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavg_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VAVG_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vavg_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VAVG_BU LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavg_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VAVG_HU LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavg_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VAVG_WU LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavg_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VAVG_DU LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vavgr_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VAVGR_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavgr_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VAVGR_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavgr_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VAVGR_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavgr_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VAVGR_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vavgr_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VAVGR_BU LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavgr_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VAVGR_HU LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavgr_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VAVGR_WU LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vavgr_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VAVGR_DU LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsrlr_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSRLR_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsrlr_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSRLR_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsrlr_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSRLR_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsrlr_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSRLR_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsrar_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VSRAR_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsrar_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VSRAR_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsrar_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VSRAR_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vsrar_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VSRAR_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vbitset_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VBITSET_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vbitset_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VBITSET_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vbitset_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VBITSET_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vbitset_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VBITSET_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vbitrev_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), ++ (VBITREV_B LSX128B:$vj, LSX128B:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vbitrev_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), ++ (VBITREV_H LSX128H:$vj, LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vbitrev_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), ++ (VBITREV_W LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vbitrev_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), ++ (VBITREV_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfadd_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFADD_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfadd_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFADD_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfsub_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFSUB_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfsub_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFSUB_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfmax_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFMAX_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfmax_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFMAX_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfmin_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFMIN_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfmin_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFMIN_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfmaxa_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFMAXA_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfmaxa_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFMAXA_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfmina_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFMINA_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfmina_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFMINA_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vclo_b (v16i8 LSX128B:$vj)), ++ (VCLO_B LSX128B:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vclo_h (v8i16 LSX128H:$vj)), ++ (VCLO_H LSX128H:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vclo_w (v4i32 LSX128W:$vj)), ++ (VCLO_W LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vclo_d (v2i64 LSX128D:$vj)), ++ (VCLO_D LSX128D:$vj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vflogb_s (v4f32 LSX128W:$vj)), ++ (VFLOGB_S LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vflogb_d (v2f64 LSX128D:$vj)), ++ (VFLOGB_D LSX128D:$vj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfclass_s (v4f32 LSX128W:$vj)), ++ (VFCLASS_S LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vfclass_d (v2f64 LSX128D:$vj)), ++ (VFCLASS_D LSX128D:$vj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfrecip_s (v4f32 LSX128W:$vj)), ++ (VFRECIP_S LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vfrecip_d (v2f64 LSX128D:$vj)), ++ (VFRECIP_D LSX128D:$vj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfrsqrt_s (v4f32 LSX128W:$vj)), ++ (VFRSQRT_S LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vfrsqrt_d (v2f64 LSX128D:$vj)), ++ (VFRSQRT_D LSX128D:$vj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcvtl_s_h (v8i16 LSX128H:$vk)), ++ (VFCVTL_S_H LSX128H:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcvth_s_h (v8i16 LSX128H:$vk)), ++ (VFCVTH_S_H LSX128H:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcvtl_d_s (v4f32 LSX128W:$vj)), ++ (VFCVTL_D_S LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vfcvth_d_s (v4f32 LSX128W:$vj)), ++ (VFCVTH_D_S LSX128W:$vj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vftint_w_s (v4f32 LSX128W:$vj)), ++ (VFTINT_W_S LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vftint_l_d (v2f64 LSX128D:$vj)), ++ (VFTINT_L_D LSX128D:$vj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vftint_wu_s (v4f32 LSX128W:$vj)), ++ (VFTINT_WU_S LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vftint_lu_d (v2f64 LSX128D:$vj)), ++ (VFTINT_LU_D LSX128D:$vj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vreplgr2vr_b GPR32Opnd:$rj), ++ (VREPLGR2VR_B GPR32Opnd:$rj)>; ++def : LSXPat<(int_loongarch_lsx_vreplgr2vr_h GPR32Opnd:$rj), ++ (VREPLGR2VR_H GPR32Opnd:$rj)>; ++def : LSXPat<(int_loongarch_lsx_vreplgr2vr_w GPR32Opnd:$rj), ++ (VREPLGR2VR_W GPR32Opnd:$rj)>; ++def : LSXPat<(int_loongarch_lsx_vreplgr2vr_d GPR64Opnd:$rj), ++ (VREPLGR2VR_D GPR64Opnd:$rj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsrlri_b (v16i8 LSX128B:$vj), (immZExt3:$ui3)), ++ (VSRLRI_B LSX128B:$vj, uimm3:$ui3)>; ++def : LSXPat<(int_loongarch_lsx_vsrlri_h (v8i16 LSX128H:$vj), (immZExt4:$ui4)), ++ (VSRLRI_H LSX128H:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vsrlri_w (v4i32 LSX128W:$vj), (immZExt5:$ui5)), ++ (VSRLRI_W LSX128W:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vsrlri_d (v2i64 LSX128D:$vj), (immZExt6:$ui6)), ++ (VSRLRI_D LSX128D:$vj, uimm6:$ui6)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsrari_b (v16i8 LSX128B:$vj), (immZExt3:$ui3)), ++ (VSRARI_B LSX128B:$vj, uimm3:$ui3)>; ++def : LSXPat<(int_loongarch_lsx_vsrari_h (v8i16 LSX128H:$vj), (immZExt4:$ui4)), ++ (VSRARI_H LSX128H:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vsrari_w (v4i32 LSX128W:$vj), (immZExt5:$ui5)), ++ (VSRARI_W LSX128W:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vsrari_d (v2i64 LSX128D:$vj), (immZExt6:$ui6)), ++ (VSRARI_D LSX128D:$vj, uimm6:$ui6)>; ++ ++def : LSXPat<(int_loongarch_lsx_vinsgr2vr_b (v16i8 LSX128B:$vj), GPR32Opnd:$rj, (immZExt4:$ui4)), ++ (VINSGR2VR_B LSX128B:$vj, GPR32Opnd:$rj, (uimm4i:$ui4))>; ++def : LSXPat<(int_loongarch_lsx_vinsgr2vr_h (v8i16 LSX128H:$vj), GPR32Opnd:$rj, (immZExt3:$ui3)), ++ (VINSGR2VR_H LSX128H:$vj, GPR32Opnd:$rj, uimm3:$ui3)>; ++def : LSXPat<(int_loongarch_lsx_vinsgr2vr_w (v4i32 LSX128W:$vj), GPR32Opnd:$rj, (immZExt2:$ui2)), ++ (VINSGR2VR_W LSX128W:$vj, GPR32Opnd:$rj, uimm2:$ui2)>; ++def : LSXPat<(int_loongarch_lsx_vinsgr2vr_d (v2i64 LSX128D:$vj), GPR64Opnd:$rj, (immZExt1:$ui1)), ++ (VINSGR2VR_D LSX128D:$vj, GPR64Opnd:$rj, uimm1i:$ui1)>; ++ ++def : LSXPat<(int_loongarch_lsx_vpickve2gr_b (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VPICKVE2GR_B LSX128B:$vj, (uimm4i:$ui4))>; ++def : LSXPat<(int_loongarch_lsx_vpickve2gr_h (v8i16 LSX128H:$vj), (immZExt3:$ui3)), ++ (VPICKVE2GR_H LSX128H:$vj, uimm3:$ui3)>; ++def : LSXPat<(int_loongarch_lsx_vpickve2gr_w (v4i32 LSX128W:$vj), (immZExt2:$ui2)), ++ (VPICKVE2GR_W LSX128W:$vj, uimm2:$ui2)>; ++def : LSXPat<(int_loongarch_lsx_vpickve2gr_d (v2i64 LSX128D:$vj), (immZExt1:$ui1)), ++ (VPICKVE2GR_D LSX128D:$vj, uimm1i:$ui1)>; ++ ++def : LSXPat<(int_loongarch_lsx_vpickve2gr_bu (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VPICKVE2GR_BU LSX128B:$vj, (uimm4i:$ui4))>; ++def : LSXPat<(int_loongarch_lsx_vpickve2gr_hu (v8i16 LSX128H:$vj), (immZExt3:$ui3)), ++ (VPICKVE2GR_HU LSX128H:$vj, uimm3:$ui3)>; ++def : LSXPat<(int_loongarch_lsx_vpickve2gr_wu (v4i32 LSX128W:$vj), (immZExt2:$ui2)), ++ (VPICKVE2GR_WU LSX128W:$vj, uimm2:$ui2)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsat_b (v16i8 LSX128B:$vj), (immZExt3:$ui3)), ++ (VSAT_B LSX128B:$vj, uimm3:$ui3)>; ++def : LSXPat<(int_loongarch_lsx_vsat_h (v8i16 LSX128H:$vj), (immZExt4:$ui4)), ++ (VSAT_H LSX128H:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vsat_w (v4i32 LSX128W:$vj), (immZExt5:$ui5)), ++ (VSAT_W LSX128W:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vsat_d (v2i64 LSX128D:$vj), (immZExt6:$ui6)), ++ (VSAT_D LSX128D:$vj, uimm6:$ui6)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsat_bu (v16i8 LSX128B:$vj), (immZExt3:$ui3)), ++ (VSAT_BU LSX128B:$vj, uimm3:$ui3)>; ++def : LSXPat<(int_loongarch_lsx_vsat_hu (v8i16 LSX128H:$vj), (immZExt4:$ui4)), ++ (VSAT_HU LSX128H:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vsat_wu (v4i32 LSX128W:$vj), (immZExt5:$ui5)), ++ (VSAT_WU LSX128W:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vsat_du (v2i64 LSX128D:$vj), (immZExt6:$ui6)), ++ (VSAT_DU LSX128D:$vj, uimm6:$ui6)>; ++ ++def : LSXPat<(int_loongarch_lsx_vmskltz_b (v16i8 LSX128B:$vj)), ++ (VMSKLTZ_B LSX128B:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vmskltz_h (v8i16 LSX128H:$vj)), ++ (VMSKLTZ_H LSX128H:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vmskltz_w (v4i32 LSX128W:$vj)), ++ (VMSKLTZ_W LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vmskltz_d (v2i64 LSX128D:$vj)), ++ (VMSKLTZ_D LSX128D:$vj)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsrlni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VSRLNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vsrlni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), ++ (VSRLNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vsrlni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), ++ (VSRLNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; ++def : LSXPat<(int_loongarch_lsx_vsrlni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), ++ (VSRLNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; ++ ++def : LSXPat<(int_loongarch_lsx_vssrlni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VSSRLNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vssrlni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), ++ (VSSRLNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vssrlni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), ++ (VSSRLNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; ++def : LSXPat<(int_loongarch_lsx_vssrlni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), ++ (VSSRLNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; ++ ++def : LSXPat<(int_loongarch_lsx_vssrlni_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VSSRLNI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vssrlni_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), ++ (VSSRLNI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vssrlni_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), ++ (VSSRLNI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; ++def : LSXPat<(int_loongarch_lsx_vssrlni_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), ++ (VSSRLNI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; ++ ++def : LSXPat<(int_loongarch_lsx_vssrlrni_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VSSRLRNI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vssrlrni_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), ++ (VSSRLRNI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vssrlrni_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), ++ (VSSRLRNI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; ++def : LSXPat<(int_loongarch_lsx_vssrlrni_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), ++ (VSSRLRNI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; ++ ++def : LSXPat<(int_loongarch_lsx_vsrarni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VSRARNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vsrarni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), ++ (VSRARNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vsrarni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), ++ (VSRARNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; ++def : LSXPat<(int_loongarch_lsx_vsrarni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), ++ (VSRARNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; ++ ++def : LSXPat<(int_loongarch_lsx_vssrani_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VSSRANI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vssrani_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), ++ (VSSRANI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vssrani_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), ++ (VSSRANI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; ++def : LSXPat<(int_loongarch_lsx_vssrani_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), ++ (VSSRANI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; ++ ++def : LSXPat<(int_loongarch_lsx_vssrani_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VSSRANI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vssrani_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), ++ (VSSRANI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vssrani_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), ++ (VSSRANI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; ++def : LSXPat<(int_loongarch_lsx_vssrani_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), ++ (VSSRANI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; ++ ++def : LSXPat<(int_loongarch_lsx_vssrarni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VSSRARNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vssrarni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), ++ (VSSRARNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vssrarni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), ++ (VSSRARNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; ++def : LSXPat<(int_loongarch_lsx_vssrarni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), ++ (VSSRARNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; ++ ++def : LSXPat<(int_loongarch_lsx_vssrarni_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), ++ (VSSRARNI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; ++def : LSXPat<(int_loongarch_lsx_vssrarni_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), ++ (VSSRARNI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; ++def : LSXPat<(int_loongarch_lsx_vssrarni_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), ++ (VSSRARNI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; ++def : LSXPat<(int_loongarch_lsx_vssrarni_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), ++ (VSSRARNI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; ++ ++def : LSXPat<(load (add iPTR:$vj, GPR64Opnd:$vk)), ++ (VLDX PtrRC:$vj, GPR64Opnd:$vk)>; ++ ++def : LSXPat<(store (v16i8 LSX128B:$vd), (add iPTR:$vj, GPR64Opnd:$vk)), ++ (VSTX LSX128B:$vd, PtrRC:$vj, GPR64Opnd:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vshuf_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk), (v16i8 LSX128B:$va)), ++ (VSHUF_B LSX128B:$vj, LSX128B:$vk, LSX128B:$va)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_ceq_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_CEQ_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_ceq_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_CEQ_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_cor_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_COR_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_cor_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_COR_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_cun_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_CUN_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_cun_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_CUN_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_cune_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_CUNE_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_cune_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_CUNE_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_cueq_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_CUEQ_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_cueq_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_CUEQ_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_cne_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_CNE_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_cne_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_CNE_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_clt_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_CLT_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_clt_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_CLT_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_cult_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_CULT_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_cult_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_CULT_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_cle_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_CLE_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_cle_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_CLE_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vfcmp_cule_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), ++ (VFCMP_CULE_S LSX128W:$vj, LSX128W:$vk)>; ++def : LSXPat<(int_loongarch_lsx_vfcmp_cule_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), ++ (VFCMP_CULE_D LSX128D:$vj, LSX128D:$vk)>; ++ ++def : LSXPat<(int_loongarch_lsx_vftintrz_w_s (v4f32 LSX128W:$vj)), ++ (VFTINTRZ_W_S LSX128W:$vj)>; ++def : LSXPat<(int_loongarch_lsx_vftintrz_l_d (v2f64 LSX128D:$vj)), ++ (VFTINTRZ_L_D LSX128D:$vj)>; ++ ++ ++def imm_mask : ImmLeaf(Imm) && Imm == -1;}]>; ++def imm_mask_64 : ImmLeaf(Imm) && Imm == -1;}]>; ++ ++ ++def : LSXPat<(xor (v8i16 LSX128H:$vj), (vsplati16 imm_mask)), ++ (NOR_V_H_PSEUDO (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vj))>; ++ ++def : LSXPat<(xor (v4i32 LSX128W:$vj), (vsplati32 imm_mask)), ++ (NOR_V_W_PSEUDO (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vj))>; ++ ++def : LSXPat<(xor (v2i64 LSX128D:$vj), (vsplati64 imm_mask_64)), ++ (NOR_V_D_PSEUDO (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vj))>; ++ ++ ++def : LSXPat<(and ++ (v16i8 (xor (v16i8 LSX128B:$vj),(vsplati8 imm_mask))), ++ (v16i8 LSX128B:$vk) ++ ), ++ (VANDN_V (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk))>; ++ ++def : LSXPat<(and ++ (v8i16 (xor (v8i16 LSX128H:$vj), (vsplati16 imm_mask))), ++ (v8i16 LSX128H:$vk) ++ ), ++ (VANDN_H_PSEUDO (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk))>; ++ ++def : LSXPat<(and ++ (v4i32 (xor (v4i32 LSX128W:$vj), (vsplati32 imm_mask))), ++ (v4i32 LSX128W:$vk) ++ ), ++ (VANDN_W_PSEUDO (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk))>; ++ ++def : LSXPat<(and ++ (v2i64 (xor (v2i64 LSX128D:$vj), (vsplati64 imm_mask_64))), ++ (v2i64 LSX128D:$vk) ++ ), ++ (VANDN_D_PSEUDO (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk))>; ++ ++ ++def : LSXPat<(or ++ (v16i8 LSX128B:$vj), ++ (v16i8 (xor (v16i8 LSX128B:$vk), (vsplati8 imm_mask))) ++ ), ++ (VORN_V (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk))>; ++ ++def : LSXPat<(or ++ (v8i16 LSX128H:$vj), ++ (v8i16 (xor (v8i16 LSX128H:$vk), (vsplati16 imm_mask))) ++ ), ++ (VORN_H_PSEUDO (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk))>; ++ ++def : LSXPat<(or ++ (v4i32 LSX128W:$vj), ++ (v4i32 (xor (v4i32 LSX128W:$vk), (vsplati32 imm_mask))) ++ ), ++ (VORN_W_PSEUDO (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk))>; ++ ++def : LSXPat<(or ++ (v2i64 LSX128D:$vj), ++ (v2i64 (xor (v2i64 LSX128D:$vk), (vsplati64 imm_mask_64))) ++ ), ++ (VORN_D_PSEUDO (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk))>; ++ ++ ++def : LSXPat<(add (v2i64 (abs LSX128D:$a)), (v2i64 (abs LSX128D:$b))), ++ (VADDA_D (v2i64 LSX128D:$a),(v2i64 LSX128D:$b))>; ++ ++def : LSXPat<(add (v4i32 (abs LSX128W:$a)), (v4i32 (abs LSX128W:$b))), ++ (VADDA_W (v4i32 LSX128W:$a),(v4i32 LSX128W:$b))>; ++ ++def : LSXPat<(add (v8i16 (abs LSX128H:$a)), (v8i16 (abs LSX128H:$b))), ++ (VADDA_H (v8i16 LSX128H:$a),(v8i16 LSX128H:$b))>; ++ ++def : LSXPat<(add (v16i8 (abs LSX128B:$a)), (v16i8 (abs LSX128B:$b))), ++ (VADDA_B (v16i8 LSX128B:$a),(v16i8 LSX128B:$b))>; ++ ++ ++def : LSXPat<(and v16i8:$vj, (xor (shl vsplat_imm_eq_1, v16i8:$vk), ++ (vsplati8 imm_mask))), ++ (VBITCLR_B v16i8:$vj, v16i8:$vk)>; ++ ++def : LSXPat<(and v8i16:$vj, (xor (shl vsplat_imm_eq_1, v8i16:$vk), ++ (vsplati16 imm_mask))), ++ (VBITCLR_H v8i16:$vj, v8i16:$vk)>; ++ ++def : LSXPat<(and v4i32:$vj, (xor (shl vsplat_imm_eq_1, v4i32:$vk), ++ (vsplati32 imm_mask))), ++ (VBITCLR_W v4i32:$vj, v4i32:$vk)>; ++ ++def : LSXPat<(and v2i64:$vj, (xor (shl vsplat_imm_eq_1, v2i64:$vk), ++ (vsplati64 imm_mask_64))), ++ (VBITCLR_D v2i64:$vj, v2i64:$vk)>; +diff --git a/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/lib/Target/LoongArch/LoongArchMCInstLower.cpp +new file mode 100644 +index 00000000..bf70b09d +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchMCInstLower.cpp +@@ -0,0 +1,342 @@ ++//===- LoongArchMCInstLower.cpp - Convert LoongArch MachineInstr to MCInst ----------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains code to lower LoongArch MachineInstrs to their corresponding ++// MCInst records. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchMCInstLower.h" ++#include "MCTargetDesc/LoongArchBaseInfo.h" ++#include "MCTargetDesc/LoongArchMCExpr.h" ++#include "LoongArchAsmPrinter.h" ++#include "llvm/CodeGen/MachineBasicBlock.h" ++#include "llvm/CodeGen/MachineInstr.h" ++#include "llvm/CodeGen/MachineOperand.h" ++#include "llvm/MC/MCExpr.h" ++#include "llvm/MC/MCInst.h" ++#include "llvm/Support/ErrorHandling.h" ++#include ++ ++using namespace llvm; ++ ++LoongArchMCInstLower::LoongArchMCInstLower(LoongArchAsmPrinter &asmprinter) ++ : AsmPrinter(asmprinter) {} ++ ++void LoongArchMCInstLower::Initialize(MCContext *C) { ++ Ctx = C; ++} ++ ++MCOperand LoongArchMCInstLower::LowerSymbolOperand(const MachineOperand &MO, ++ MachineOperandType MOTy, ++ unsigned Offset) const { ++ MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; ++ LoongArchMCExpr::LoongArchExprKind TargetKind = LoongArchMCExpr::MEK_None; ++ const MCSymbol *Symbol; ++ ++ switch(MO.getTargetFlags()) { ++ default: ++ llvm_unreachable("Invalid target flag!"); ++ case LoongArchII::MO_NO_FLAG: ++ break; ++ case LoongArchII::MO_GOT_HI: ++ TargetKind = LoongArchMCExpr::MEK_GOT_HI; ++ break; ++ case LoongArchII::MO_GOT_LO: ++ TargetKind = LoongArchMCExpr::MEK_GOT_LO; ++ break; ++ case LoongArchII::MO_GOT_RRHI: ++ TargetKind = LoongArchMCExpr::MEK_GOT_RRHI; ++ break; ++ case LoongArchII::MO_GOT_RRHIGHER: ++ TargetKind = LoongArchMCExpr::MEK_GOT_RRHIGHER; ++ break; ++ case LoongArchII::MO_GOT_RRHIGHEST: ++ TargetKind = LoongArchMCExpr::MEK_GOT_RRHIGHEST; ++ break; ++ case LoongArchII::MO_GOT_RRLO: ++ TargetKind = LoongArchMCExpr::MEK_GOT_RRLO; ++ break; ++ case LoongArchII::MO_PCREL_HI: ++ TargetKind = LoongArchMCExpr::MEK_PCREL_HI; ++ break; ++ case LoongArchII::MO_PCREL_LO: ++ TargetKind = LoongArchMCExpr::MEK_PCREL_LO; ++ break; ++ case LoongArchII::MO_PCREL_RRHI: ++ TargetKind = LoongArchMCExpr::MEK_PCREL_RRHI; ++ break; ++ case LoongArchII::MO_PCREL_RRHIGHER: ++ TargetKind = LoongArchMCExpr::MEK_PCREL_RRHIGHER; ++ break; ++ case LoongArchII::MO_PCREL_RRHIGHEST: ++ TargetKind = LoongArchMCExpr::MEK_PCREL_RRHIGHEST; ++ break; ++ case LoongArchII::MO_PCREL_RRLO: ++ TargetKind = LoongArchMCExpr::MEK_PCREL_RRLO; ++ break; ++ case LoongArchII::MO_TLSIE_HI: ++ TargetKind = LoongArchMCExpr::MEK_TLSIE_HI; ++ break; ++ case LoongArchII::MO_TLSIE_LO: ++ TargetKind = LoongArchMCExpr::MEK_TLSIE_LO; ++ break; ++ case LoongArchII::MO_TLSIE_RRHI: ++ TargetKind = LoongArchMCExpr::MEK_TLSIE_RRHI; ++ break; ++ case LoongArchII::MO_TLSIE_RRHIGHER: ++ TargetKind = LoongArchMCExpr::MEK_TLSIE_RRHIGHER; ++ break; ++ case LoongArchII::MO_TLSIE_RRHIGHEST: ++ TargetKind = LoongArchMCExpr::MEK_TLSIE_RRHIGHEST; ++ break; ++ case LoongArchII::MO_TLSIE_RRLO: ++ TargetKind = LoongArchMCExpr::MEK_TLSIE_RRLO; ++ break; ++ case LoongArchII::MO_TLSLE_HI: ++ TargetKind = LoongArchMCExpr::MEK_TLSLE_HI; ++ break; ++ case LoongArchII::MO_TLSLE_HIGHER: ++ TargetKind = LoongArchMCExpr::MEK_TLSLE_HIGHER; ++ break; ++ case LoongArchII::MO_TLSLE_HIGHEST: ++ TargetKind = LoongArchMCExpr::MEK_TLSLE_HIGHEST; ++ break; ++ case LoongArchII::MO_TLSLE_LO: ++ TargetKind = LoongArchMCExpr::MEK_TLSLE_LO; ++ break; ++ case LoongArchII::MO_TLSGD_HI: ++ TargetKind = LoongArchMCExpr::MEK_TLSGD_HI; ++ break; ++ case LoongArchII::MO_TLSGD_LO: ++ TargetKind = LoongArchMCExpr::MEK_TLSGD_LO; ++ break; ++ case LoongArchII::MO_TLSGD_RRHI: ++ TargetKind = LoongArchMCExpr::MEK_TLSGD_RRHI; ++ break; ++ case LoongArchII::MO_TLSGD_RRHIGHER: ++ TargetKind = LoongArchMCExpr::MEK_TLSGD_RRHIGHER; ++ break; ++ case LoongArchII::MO_TLSGD_RRHIGHEST: ++ TargetKind = LoongArchMCExpr::MEK_TLSGD_RRHIGHEST; ++ break; ++ case LoongArchII::MO_TLSGD_RRLO: ++ TargetKind = LoongArchMCExpr::MEK_TLSGD_RRLO; ++ break; ++ case LoongArchII::MO_ABS_HI: ++ TargetKind = LoongArchMCExpr::MEK_ABS_HI; ++ break; ++ case LoongArchII::MO_ABS_HIGHER: ++ TargetKind = LoongArchMCExpr::MEK_ABS_HIGHER; ++ break; ++ case LoongArchII::MO_ABS_HIGHEST: ++ TargetKind = LoongArchMCExpr::MEK_ABS_HIGHEST; ++ break; ++ case LoongArchII::MO_ABS_LO: ++ TargetKind = LoongArchMCExpr::MEK_ABS_LO; ++ break; ++ case LoongArchII::MO_CALL_HI: ++ TargetKind = LoongArchMCExpr::MEK_CALL_HI; ++ break; ++ case LoongArchII::MO_CALL_LO: ++ TargetKind = LoongArchMCExpr::MEK_CALL_LO; ++ break; ++ } ++ ++ switch (MOTy) { ++ case MachineOperand::MO_MachineBasicBlock: ++ Symbol = MO.getMBB()->getSymbol(); ++ break; ++ ++ case MachineOperand::MO_GlobalAddress: ++ Symbol = AsmPrinter.getSymbol(MO.getGlobal()); ++ Offset += MO.getOffset(); ++ break; ++ ++ case MachineOperand::MO_BlockAddress: ++ Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()); ++ Offset += MO.getOffset(); ++ break; ++ ++ case MachineOperand::MO_ExternalSymbol: ++ Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName()); ++ Offset += MO.getOffset(); ++ break; ++ ++ case MachineOperand::MO_MCSymbol: ++ Symbol = MO.getMCSymbol(); ++ Offset += MO.getOffset(); ++ break; ++ ++ case MachineOperand::MO_JumpTableIndex: ++ Symbol = AsmPrinter.GetJTISymbol(MO.getIndex()); ++ break; ++ ++ case MachineOperand::MO_ConstantPoolIndex: ++ Symbol = AsmPrinter.GetCPISymbol(MO.getIndex()); ++ Offset += MO.getOffset(); ++ break; ++ ++ default: ++ llvm_unreachable(""); ++ } ++ ++ const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, *Ctx); ++ ++ if (Offset) { ++ // Assume offset is never negative. ++ assert(Offset > 0); ++ ++ Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, *Ctx), ++ *Ctx); ++ } ++ ++ if (TargetKind != LoongArchMCExpr::MEK_None) ++ Expr = LoongArchMCExpr::create(TargetKind, Expr, *Ctx); ++ ++ return MCOperand::createExpr(Expr); ++} ++ ++MCOperand LoongArchMCInstLower::LowerOperand(const MachineOperand &MO, ++ unsigned offset) const { ++ MachineOperandType MOTy = MO.getType(); ++ ++ switch (MOTy) { ++ default: llvm_unreachable("unknown operand type"); ++ case MachineOperand::MO_Register: ++ // Ignore all implicit register operands. ++ if (MO.isImplicit()) break; ++ return MCOperand::createReg(MO.getReg()); ++ case MachineOperand::MO_Immediate: ++ return MCOperand::createImm(MO.getImm() + offset); ++ case MachineOperand::MO_MachineBasicBlock: ++ case MachineOperand::MO_GlobalAddress: ++ case MachineOperand::MO_ExternalSymbol: ++ case MachineOperand::MO_MCSymbol: ++ case MachineOperand::MO_JumpTableIndex: ++ case MachineOperand::MO_ConstantPoolIndex: ++ case MachineOperand::MO_BlockAddress: ++ return LowerSymbolOperand(MO, MOTy, offset); ++ case MachineOperand::MO_RegisterMask: ++ break; ++ } ++ ++ return MCOperand(); ++} ++ ++MCOperand LoongArchMCInstLower::createSub(MachineBasicBlock *BB1, ++ MachineBasicBlock *BB2, ++ LoongArchMCExpr::LoongArchExprKind Kind) const { ++ const MCSymbolRefExpr *Sym1 = MCSymbolRefExpr::create(BB1->getSymbol(), *Ctx); ++ const MCSymbolRefExpr *Sym2 = MCSymbolRefExpr::create(BB2->getSymbol(), *Ctx); ++ const MCBinaryExpr *Sub = MCBinaryExpr::createSub(Sym1, Sym2, *Ctx); ++ ++ return MCOperand::createExpr(LoongArchMCExpr::create(Kind, Sub, *Ctx)); ++} ++ ++void LoongArchMCInstLower::lowerLongBranchADDI(const MachineInstr *MI, ++ MCInst &OutMI, int Opcode) const { ++ OutMI.setOpcode(Opcode); ++ ++ LoongArchMCExpr::LoongArchExprKind Kind; ++ unsigned TargetFlags = MI->getOperand(2).getTargetFlags(); ++ switch (TargetFlags) { ++ case LoongArchII::MO_ABS_HIGHEST: ++ Kind = LoongArchMCExpr::MEK_ABS_HIGHEST; ++ break; ++ case LoongArchII::MO_ABS_HIGHER: ++ Kind = LoongArchMCExpr::MEK_ABS_HIGHER; ++ break; ++ case LoongArchII::MO_ABS_HI: ++ Kind = LoongArchMCExpr::MEK_ABS_HI; ++ break; ++ case LoongArchII::MO_ABS_LO: ++ Kind = LoongArchMCExpr::MEK_ABS_LO; ++ break; ++ default: ++ report_fatal_error("Unexpected flags for lowerLongBranchADDI"); ++ } ++ ++ // Lower two register operands. ++ for (unsigned I = 0, E = 2; I != E; ++I) { ++ const MachineOperand &MO = MI->getOperand(I); ++ OutMI.addOperand(LowerOperand(MO)); ++ } ++ ++ if (MI->getNumOperands() == 3) { ++ // Lower register operand. ++ const MCExpr *Expr = ++ MCSymbolRefExpr::create(MI->getOperand(2).getMBB()->getSymbol(), *Ctx); ++ const LoongArchMCExpr *LoongArchExpr = LoongArchMCExpr::create(Kind, Expr, *Ctx); ++ OutMI.addOperand(MCOperand::createExpr(LoongArchExpr)); ++ } else if (MI->getNumOperands() == 4) { ++ // Create %lo($tgt-$baltgt) or %hi($tgt-$baltgt). ++ OutMI.addOperand(createSub(MI->getOperand(2).getMBB(), ++ MI->getOperand(3).getMBB(), Kind)); ++ } ++} ++ ++void LoongArchMCInstLower::lowerLongBranchPCADDU12I(const MachineInstr *MI, ++ MCInst &OutMI, int Opcode) const { ++ OutMI.setOpcode(Opcode); ++ ++ LoongArchMCExpr::LoongArchExprKind Kind; ++ unsigned TargetFlags = MI->getOperand(1).getTargetFlags(); ++ switch (TargetFlags) { ++ case LoongArchII::MO_PCREL_HI: ++ Kind = LoongArchMCExpr::MEK_PCREL_HI; ++ break; ++ case LoongArchII::MO_PCREL_LO: ++ Kind = LoongArchMCExpr::MEK_PCREL_LO; ++ break; ++ default: ++ report_fatal_error("Unexpected flags for lowerLongBranchADDI"); ++ } ++ ++ // Lower one register operands. ++ const MachineOperand &MO = MI->getOperand(0); ++ OutMI.addOperand(LowerOperand(MO)); ++ ++ const MCExpr *Expr = ++ MCSymbolRefExpr::create(MI->getOperand(1).getMBB()->getSymbol(), *Ctx); ++ const LoongArchMCExpr *LoongArchExpr = LoongArchMCExpr::create(Kind, Expr, *Ctx); ++ OutMI.addOperand(MCOperand::createExpr(LoongArchExpr)); ++} ++bool LoongArchMCInstLower::lowerLongBranch(const MachineInstr *MI, ++ MCInst &OutMI) const { ++ switch (MI->getOpcode()) { ++ default: ++ return false; ++ case LoongArch::LONG_BRANCH_ADDIW: ++ case LoongArch::LONG_BRANCH_ADDIW2Op: ++ lowerLongBranchADDI(MI, OutMI, LoongArch::ADDI_W); ++ return true; ++ case LoongArch::LONG_BRANCH_ADDID: ++ case LoongArch::LONG_BRANCH_ADDID2Op: ++ lowerLongBranchADDI(MI, OutMI, LoongArch::ADDI_D); ++ return true; ++ case LoongArch::LONG_BRANCH_PCADDU12I: ++ lowerLongBranchPCADDU12I(MI, OutMI, LoongArch::PCADDU12I); ++ return true; ++ } ++} ++ ++void LoongArchMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { ++ if (lowerLongBranch(MI, OutMI)) ++ return; ++ ++ OutMI.setOpcode(MI->getOpcode()); ++ ++ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { ++ const MachineOperand &MO = MI->getOperand(i); ++ MCOperand MCOp = LowerOperand(MO); ++ ++ if (MCOp.isValid()) ++ OutMI.addOperand(MCOp); ++ } ++} +diff --git a/lib/Target/LoongArch/LoongArchMCInstLower.h b/lib/Target/LoongArch/LoongArchMCInstLower.h +new file mode 100644 +index 00000000..6463a7b6 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchMCInstLower.h +@@ -0,0 +1,55 @@ ++//===- LoongArchMCInstLower.h - Lower MachineInstr to MCInst --------*- C++ -*--===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMCINSTLOWER_H ++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMCINSTLOWER_H ++ ++#include "MCTargetDesc/LoongArchMCExpr.h" ++#include "llvm/CodeGen/MachineOperand.h" ++#include "llvm/Support/Compiler.h" ++ ++namespace llvm { ++ ++class MachineBasicBlock; ++class MachineInstr; ++class MCContext; ++class MCInst; ++class MCOperand; ++class LoongArchAsmPrinter; ++ ++/// LoongArchMCInstLower - This class is used to lower an MachineInstr into an ++/// MCInst. ++class LLVM_LIBRARY_VISIBILITY LoongArchMCInstLower { ++ using MachineOperandType = MachineOperand::MachineOperandType; ++ ++ MCContext *Ctx; ++ LoongArchAsmPrinter &AsmPrinter; ++ ++public: ++ LoongArchMCInstLower(LoongArchAsmPrinter &asmprinter); ++ ++ void Initialize(MCContext *C); ++ void Lower(const MachineInstr *MI, MCInst &OutMI) const; ++ MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const; ++ ++private: ++ MCOperand LowerSymbolOperand(const MachineOperand &MO, ++ MachineOperandType MOTy, unsigned Offset) const; ++ MCOperand createSub(MachineBasicBlock *BB1, MachineBasicBlock *BB2, ++ LoongArchMCExpr::LoongArchExprKind Kind) const; ++ void lowerLongBranchLUi(const MachineInstr *MI, MCInst &OutMI) const; ++ void lowerLongBranchADDI(const MachineInstr *MI, MCInst &OutMI, ++ int Opcode) const; ++ void lowerLongBranchPCADDU12I(const MachineInstr *MI, MCInst &OutMI, ++ int Opcode) const; ++ bool lowerLongBranch(const MachineInstr *MI, MCInst &OutMI) const; ++}; ++ ++} // end namespace llvm ++ ++#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMCINSTLOWER_H +diff --git a/lib/Target/LoongArch/LoongArchMachineFunction.cpp b/lib/Target/LoongArch/LoongArchMachineFunction.cpp +new file mode 100644 +index 00000000..90baa8fd +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchMachineFunction.cpp +@@ -0,0 +1,51 @@ ++//===-- LoongArchMachineFunctionInfo.cpp - Private data used for LoongArch ----------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchMachineFunction.h" ++#include "MCTargetDesc/LoongArchABIInfo.h" ++#include "LoongArchSubtarget.h" ++#include "LoongArchTargetMachine.h" ++#include "llvm/CodeGen/MachineFrameInfo.h" ++#include "llvm/CodeGen/MachineRegisterInfo.h" ++#include "llvm/CodeGen/PseudoSourceValue.h" ++#include "llvm/CodeGen/TargetRegisterInfo.h" ++#include "llvm/Support/CommandLine.h" ++ ++using namespace llvm; ++ ++LoongArchFunctionInfo::~LoongArchFunctionInfo() = default; ++ ++void LoongArchFunctionInfo::createEhDataRegsFI() { ++ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); ++ for (int I = 0; I < 4; ++I) { ++ const TargetRegisterClass &RC = ++ static_cast(MF.getTarget()) ++ .getABI() ++ .IsLP64() ++ ? LoongArch::GPR64RegClass ++ : LoongArch::GPR32RegClass; ++ ++ EhDataRegFI[I] = MF.getFrameInfo().CreateStackObject(TRI.getSpillSize(RC), ++ TRI.getSpillAlign(RC), false); ++ } ++} ++ ++bool LoongArchFunctionInfo::isEhDataRegFI(int FI) const { ++ return CallsEhReturn && (FI == EhDataRegFI[0] || FI == EhDataRegFI[1] ++ || FI == EhDataRegFI[2] || FI == EhDataRegFI[3]); ++} ++ ++MachinePointerInfo LoongArchFunctionInfo::callPtrInfo(const char *ES) { ++ return MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)); ++} ++ ++MachinePointerInfo LoongArchFunctionInfo::callPtrInfo(const GlobalValue *GV) { ++ return MachinePointerInfo(MF.getPSVManager().getGlobalValueCallEntry(GV)); ++} ++ ++void LoongArchFunctionInfo::anchor() {} +diff --git a/lib/Target/LoongArch/LoongArchMachineFunction.h b/lib/Target/LoongArch/LoongArchMachineFunction.h +new file mode 100644 +index 00000000..b1c805c0 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchMachineFunction.h +@@ -0,0 +1,98 @@ ++//===- LoongArchMachineFunctionInfo.h - Private data used for LoongArch ---*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file declares the LoongArch specific subclass of MachineFunctionInfo. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTION_H ++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTION_H ++ ++#include "llvm/CodeGen/MachineFunction.h" ++#include "llvm/CodeGen/MachineMemOperand.h" ++#include ++ ++namespace llvm { ++ ++/// LoongArchFunctionInfo - This class is derived from MachineFunction private ++/// LoongArch target-specific information for each MachineFunction. ++class LoongArchFunctionInfo : public MachineFunctionInfo { ++public: ++ LoongArchFunctionInfo(MachineFunction &MF) : MF(MF) {} ++ ++ ~LoongArchFunctionInfo() override; ++ ++ unsigned getSRetReturnReg() const { return SRetReturnReg; } ++ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } ++ ++ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } ++ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } ++ ++ unsigned getVarArgsSaveSize() const { return VarArgsSaveSize; } ++ void setVarArgsSaveSize(int Size) { VarArgsSaveSize = Size; } ++ ++ bool hasByvalArg() const { return HasByvalArg; } ++ void setFormalArgInfo(unsigned Size, bool HasByval) { ++ IncomingArgSize = Size; ++ HasByvalArg = HasByval; ++ } ++ ++ unsigned getIncomingArgSize() const { return IncomingArgSize; } ++ ++ bool callsEhReturn() const { return CallsEhReturn; } ++ void setCallsEhReturn() { CallsEhReturn = true; } ++ ++ void createEhDataRegsFI(); ++ int getEhDataRegFI(unsigned Reg) const { return EhDataRegFI[Reg]; } ++ bool isEhDataRegFI(int FI) const; ++ ++ /// Create a MachinePointerInfo that has an ExternalSymbolPseudoSourceValue ++ /// object representing a GOT entry for an external function. ++ MachinePointerInfo callPtrInfo(const char *ES); ++ ++ /// Create a MachinePointerInfo that has a GlobalValuePseudoSourceValue object ++ /// representing a GOT entry for a global function. ++ MachinePointerInfo callPtrInfo(const GlobalValue *GV); ++ ++ void setSaveS2() { SaveS2 = true; } ++ bool hasSaveS2() const { return SaveS2; } ++ ++private: ++ virtual void anchor(); ++ ++ MachineFunction& MF; ++ ++ /// SRetReturnReg - Some subtargets require that sret lowering includes ++ /// returning the value of the returned struct in a register. This field ++ /// holds the virtual register into which the sret argument is passed. ++ unsigned SRetReturnReg = 0; ++ ++ /// VarArgsFrameIndex - FrameIndex for start of varargs area. ++ int VarArgsFrameIndex = 0; ++ int VarArgsSaveSize = 0; ++ ++ /// True if function has a byval argument. ++ bool HasByvalArg; ++ ++ /// Size of incoming argument area. ++ unsigned IncomingArgSize; ++ ++ /// CallsEhReturn - Whether the function calls llvm.eh.return. ++ bool CallsEhReturn = false; ++ ++ /// Frame objects for spilling eh data registers. ++ int EhDataRegFI[4]; ++ ++ // saveS2 ++ bool SaveS2 = false; ++ ++}; ++ ++} // end namespace llvm ++ ++#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTION_H +diff --git a/lib/Target/LoongArch/LoongArchModuleISelDAGToDAG.cpp b/lib/Target/LoongArch/LoongArchModuleISelDAGToDAG.cpp +new file mode 100644 +index 00000000..8dbf30f2 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchModuleISelDAGToDAG.cpp +@@ -0,0 +1,53 @@ ++//===----------------------------------------------------------------------===// ++// Instruction Selector Subtarget Control ++//===----------------------------------------------------------------------===// ++ ++//===----------------------------------------------------------------------===// ++// This file defines a pass used to change the subtarget for the ++// LoongArch Instruction selector. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArch.h" ++#include "LoongArchTargetMachine.h" ++#include "llvm/CodeGen/TargetPassConfig.h" ++#include "llvm/CodeGen/StackProtector.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/Support/raw_ostream.h" ++ ++using namespace llvm; ++ ++#define DEBUG_TYPE "loongarch-isel" ++ ++namespace { ++ class LoongArchModuleDAGToDAGISel : public MachineFunctionPass { ++ public: ++ static char ID; ++ ++ LoongArchModuleDAGToDAGISel() : MachineFunctionPass(ID) {} ++ ++ // Pass Name ++ StringRef getPassName() const override { ++ return "LoongArch DAG->DAG Pattern Instruction Selection"; ++ } ++ ++ void getAnalysisUsage(AnalysisUsage &AU) const override { ++ AU.addRequired(); ++ AU.addPreserved(); ++ MachineFunctionPass::getAnalysisUsage(AU); ++ } ++ ++ bool runOnMachineFunction(MachineFunction &MF) override; ++ }; ++ ++ char LoongArchModuleDAGToDAGISel::ID = 0; ++} ++ ++bool LoongArchModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { ++ LLVM_DEBUG(errs() << "In LoongArchModuleDAGToDAGISel::runMachineFunction\n"); ++ return false; ++} ++ ++llvm::FunctionPass *llvm::createLoongArchModuleISelDagPass() { ++ return new LoongArchModuleDAGToDAGISel(); ++} +diff --git a/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +new file mode 100644 +index 00000000..af5362c3 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +@@ -0,0 +1,355 @@ ++//===- LoongArchRegisterInfo.cpp - LoongArch Register Information -------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch implementation of the TargetRegisterInfo class. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchRegisterInfo.h" ++#include "MCTargetDesc/LoongArchABIInfo.h" ++#include "LoongArch.h" ++#include "LoongArchMachineFunction.h" ++#include "LoongArchSubtarget.h" ++#include "LoongArchTargetMachine.h" ++#include "llvm/ADT/BitVector.h" ++#include "llvm/ADT/STLExtras.h" ++#include "llvm/CodeGen/MachineFrameInfo.h" ++#include "llvm/CodeGen/MachineFunction.h" ++#include "llvm/CodeGen/MachineInstr.h" ++#include "llvm/CodeGen/MachineRegisterInfo.h" ++#include "llvm/CodeGen/TargetFrameLowering.h" ++#include "llvm/CodeGen/TargetRegisterInfo.h" ++#include "llvm/CodeGen/TargetSubtargetInfo.h" ++#include "llvm/IR/Function.h" ++#include "llvm/MC/MCRegisterInfo.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/raw_ostream.h" ++#include ++ ++using namespace llvm; ++ ++#define DEBUG_TYPE "loongarch-reg-info" ++ ++#define GET_REGINFO_TARGET_DESC ++#include "LoongArchGenRegisterInfo.inc" ++ ++LoongArchRegisterInfo::LoongArchRegisterInfo() : LoongArchGenRegisterInfo(LoongArch::RA) {} ++ ++unsigned LoongArchRegisterInfo::getPICCallReg() { return LoongArch::T8; } ++ ++const TargetRegisterClass * ++LoongArchRegisterInfo::getPointerRegClass(const MachineFunction &MF, ++ unsigned Kind) const { ++ LoongArchABIInfo ABI = MF.getSubtarget().getABI(); ++ LoongArchPtrClass PtrClassKind = static_cast(Kind); ++ ++ switch (PtrClassKind) { ++ case LoongArchPtrClass::Default: ++ return ABI.ArePtrs64bit() ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; ++ case LoongArchPtrClass::StackPointer: ++ return ABI.ArePtrs64bit() ? &LoongArch::SP64RegClass : &LoongArch::SP32RegClass; ++ } ++ ++ llvm_unreachable("Unknown pointer kind"); ++} ++ ++unsigned ++LoongArchRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, ++ MachineFunction &MF) const { ++ switch (RC->getID()) { ++ default: ++ return 0; ++ case LoongArch::GPR32RegClassID: ++ case LoongArch::GPR64RegClassID: ++ { ++ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); ++ return 28 - TFI->hasFP(MF); ++ } ++ case LoongArch::FGR32RegClassID: ++ return 32; ++ case LoongArch::FGR64RegClassID: ++ return 32; ++ } ++} ++ ++//===----------------------------------------------------------------------===// ++// Callee Saved Registers methods ++//===----------------------------------------------------------------------===// ++ ++/// LoongArch Callee Saved Registers ++const MCPhysReg * ++LoongArchRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { ++ const LoongArchSubtarget &Subtarget = MF->getSubtarget(); ++ ++ if ((Subtarget.hasBasicF() && !Subtarget.hasBasicD())) ++ return CSR_SingleFloatOnly_SaveList; ++ ++ if (Subtarget.isABI_LP64()) ++ return CSR_LP64_SaveList; ++ ++ return CSR_ILP32_SaveList; ++} ++ ++const uint32_t * ++LoongArchRegisterInfo::getCallPreservedMask(const MachineFunction &MF, ++ CallingConv::ID) const { ++ const LoongArchSubtarget &Subtarget = MF.getSubtarget(); ++ ++ if ((Subtarget.hasBasicF() && !Subtarget.hasBasicD())) ++ return CSR_SingleFloatOnly_RegMask; ++ ++ if (Subtarget.isABI_LP64()) ++ return CSR_LP64_RegMask; ++ ++ return CSR_ILP32_RegMask; ++} ++ ++BitVector LoongArchRegisterInfo:: ++getReservedRegs(const MachineFunction &MF) const { ++ static const MCPhysReg ReservedGPR32[] = { ++ LoongArch::ZERO, LoongArch::SP, LoongArch::TP, LoongArch::T9 ++ }; ++ ++ static const MCPhysReg ReservedGPR64[] = { ++ LoongArch::ZERO_64, LoongArch::SP_64, LoongArch::TP_64, LoongArch::T9_64 ++ }; ++ ++ BitVector Reserved(getNumRegs()); ++ const LoongArchSubtarget &Subtarget = MF.getSubtarget(); ++ ++ for (unsigned I = 0; I < array_lengthof(ReservedGPR32); ++I) ++ Reserved.set(ReservedGPR32[I]); ++ ++ for (unsigned I = 0; I < array_lengthof(ReservedGPR64); ++I) ++ Reserved.set(ReservedGPR64[I]); ++ ++ // Reserve FP if this function should have a dedicated frame pointer register. ++ if (Subtarget.getFrameLowering()->hasFP(MF)) { ++ Reserved.set(LoongArch::FP); ++ Reserved.set(LoongArch::FP_64); ++ ++ // Reserve the base register if we need to both realign the stack and ++ // allocate variable-sized objects at runtime. This should test the ++ // same conditions as LoongArchFrameLowering::hasBP(). ++ if (hasStackRealignment(MF) && MF.getFrameInfo().hasVarSizedObjects()) { ++ Reserved.set(LoongArch::S7); ++ Reserved.set(LoongArch::S7_64); ++ } ++ } ++ ++ return Reserved; ++} ++ ++bool ++LoongArchRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { ++ return true; ++} ++ ++bool LoongArchRegisterInfo:: ++requiresFrameIndexScavenging(const MachineFunction &MF) const { ++ return true; ++} ++ ++bool ++LoongArchRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { ++ return true; ++} ++ ++/// Get the size of the offset supported by the given load/store/inline asm. ++/// The result includes the effects of any scale factors applied to the ++/// instruction immediate. ++static inline unsigned getLoadStoreOffsetSizeInBits(const unsigned Opcode, ++ MachineOperand MO) { ++ switch (Opcode) { ++ case LoongArch::LDPTR_W: ++ case LoongArch::LDPTR_W32: ++ case LoongArch::LDPTR_D: ++ case LoongArch::STPTR_W: ++ case LoongArch::STPTR_W32: ++ case LoongArch::STPTR_D: ++ case LoongArch::LL_W: ++ case LoongArch::LL_D: ++ case LoongArch::SC_W: ++ case LoongArch::SC_D: ++ return 14 + 2 /* scale factor */; ++ case LoongArch::INLINEASM: { ++ unsigned ConstraintID = InlineAsm::getMemoryConstraintID(MO.getImm()); ++ switch (ConstraintID) { ++ case InlineAsm::Constraint_ZC: { ++ return 14 + 2 /* scale factor */; ++ } ++ default: ++ return 12; ++ } ++ } ++ default: ++ return 12; ++ } ++} ++ ++/// Get the scale factor applied to the immediate in the given load/store. ++static inline unsigned getLoadStoreOffsetAlign(const unsigned Opcode) { ++ switch (Opcode) { ++ case LoongArch::LDPTR_W: ++ case LoongArch::LDPTR_W32: ++ case LoongArch::LDPTR_D: ++ case LoongArch::STPTR_W: ++ case LoongArch::STPTR_W32: ++ case LoongArch::STPTR_D: ++ case LoongArch::LL_W: ++ case LoongArch::LL_D: ++ case LoongArch::SC_W: ++ case LoongArch::SC_D: ++ return 4; ++ default: ++ return 1; ++ } ++} ++ ++// FrameIndex represent objects inside a abstract stack. ++// We must replace FrameIndex with an stack/frame pointer ++// direct reference. ++void LoongArchRegisterInfo:: ++eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, ++ unsigned FIOperandNum, RegScavenger *RS) const { ++ MachineInstr &MI = *II; ++ MachineFunction &MF = *MI.getParent()->getParent(); ++ const LoongArchFrameLowering *TFI = getFrameLowering(MF); ++ ++ LLVM_DEBUG(errs() << "\nFunction : " << MF.getName() << "\n"; ++ errs() << "<--------->\n" ++ << MI); ++ ++ int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); ++ uint64_t stackSize = MF.getFrameInfo().getStackSize(); ++ int64_t spOffset = MF.getFrameInfo().getObjectOffset(FrameIndex); ++ ++ LLVM_DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n" ++ << "spOffset : " << spOffset << "\n" ++ << "stackSize : " << stackSize << "\n" ++ << "SPAdj : " << SPAdj << "\n" ++ << "alignment : " ++ << DebugStr(MF.getFrameInfo().getObjectAlign(FrameIndex)) ++ << "\n"); ++ ++ LoongArchABIInfo ABI = ++ static_cast(MF.getTarget()).getABI(); ++ ++ // Everything else is referenced relative to whatever register ++ // getFrameIndexReference() returns. ++ Register FrameReg; ++ StackOffset Offset = ++ TFI->getFrameIndexReference(MF, FrameIndex, FrameReg) + ++ StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); ++ ++ LLVM_DEBUG(errs() << "Location : " ++ << "FrameReg<" << FrameReg << "> + " << Offset.getFixed() ++ << "\n<--------->\n"); ++ ++ MachineBasicBlock &MBB = *MI.getParent(); ++ DebugLoc DL = II->getDebugLoc(); ++ bool IsKill = false; ++ ++ if (!MI.isDebugValue()) { ++ // Make sure Offset fits within the field available. ++ // For ldptr/stptr/ll/sc instructions, this is a 14-bit signed immediate ++ // (scaled by 2), otherwise it is a 12-bit signed immediate. ++ unsigned OffsetBitSize = getLoadStoreOffsetSizeInBits( ++ MI.getOpcode(), MI.getOperand(FIOperandNum - 1)); ++ const Align OffsetAlign(getLoadStoreOffsetAlign(MI.getOpcode())); ++ ++ if (OffsetBitSize == 16 && isInt<12>(Offset.getFixed()) && ++ !isAligned(OffsetAlign, Offset.getFixed())) { ++ // If we have an offset that needs to fit into a signed n-bit immediate ++ // (where n == 16) and doesn't aligned and does fit into 12-bits ++ // then use an ADDI ++ const TargetRegisterClass *PtrRC = ABI.ArePtrs64bit() ++ ? &LoongArch::GPR64RegClass ++ : &LoongArch::GPR32RegClass; ++ MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); ++ unsigned Reg = RegInfo.createVirtualRegister(PtrRC); ++ const LoongArchInstrInfo &TII = *static_cast( ++ MBB.getParent()->getSubtarget().getInstrInfo()); ++ BuildMI(MBB, II, DL, TII.get(ABI.GetPtrAddiOp()), Reg) ++ .addReg(FrameReg) ++ .addImm(Offset.getFixed()); ++ ++ FrameReg = Reg; ++ Offset = StackOffset::getFixed(0); ++ IsKill = true; ++ } else if (!isInt<12>(Offset.getFixed())) { ++ // Otherwise split the offset into several pieces and add it in multiple ++ // instructions. ++ const LoongArchInstrInfo &TII = *static_cast( ++ MBB.getParent()->getSubtarget().getInstrInfo()); ++ unsigned Reg = TII.loadImmediate(Offset.getFixed(), MBB, II, DL); ++ BuildMI(MBB, II, DL, TII.get(ABI.GetPtrAddOp()), Reg) ++ .addReg(FrameReg) ++ .addReg(Reg, RegState::Kill); ++ ++ FrameReg = Reg; ++ Offset = StackOffset::getFixed(0); ++ IsKill = true; ++ } ++ } ++ ++ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, IsKill); ++ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); ++} ++ ++Register LoongArchRegisterInfo:: ++getFrameRegister(const MachineFunction &MF) const { ++ const LoongArchSubtarget &Subtarget = MF.getSubtarget(); ++ const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); ++ bool IsLP64 = static_cast(MF.getTarget()) ++ .getABI() ++ .IsLP64(); ++ ++ return TFI->hasFP(MF) ? (IsLP64 ? LoongArch::FP_64 : LoongArch::FP) ++ : (IsLP64 ? LoongArch::SP_64 : LoongArch::SP); ++} ++ ++const TargetRegisterClass * ++LoongArchRegisterInfo::intRegClass(unsigned Size) const { ++ if (Size == 4) ++ return &LoongArch::GPR32RegClass; ++ ++ assert(Size == 8); ++ return &LoongArch::GPR64RegClass; ++} ++ ++bool LoongArchRegisterInfo::canRealignStack(const MachineFunction &MF) const { ++ // Avoid realigning functions that explicitly do not want to be realigned. ++ // Normally, we should report an error when a function should be dynamically ++ // realigned but also has the attribute no-realign-stack. Unfortunately, ++ // with this attribute, MachineFrameInfo clamps each new object's alignment ++ // to that of the stack's alignment as specified by the ABI. As a result, ++ // the information of whether we have objects with larger alignment ++ // requirement than the stack's alignment is already lost at this point. ++ if (!TargetRegisterInfo::canRealignStack(MF)) ++ return false; ++ ++ const LoongArchSubtarget &Subtarget = MF.getSubtarget(); ++ unsigned FP = Subtarget.is64Bit() ? LoongArch::FP_64 : LoongArch::FP; ++ unsigned BP = Subtarget.is64Bit() ? LoongArch::S7_64 : LoongArch::S7; ++ ++ // We can't perform dynamic stack realignment if we can't reserve the ++ // frame pointer register. ++ if (!MF.getRegInfo().canReserveReg(FP)) ++ return false; ++ ++ // We can realign the stack if we know the maximum call frame size and we ++ // don't have variable sized objects. ++ if (Subtarget.getFrameLowering()->hasReservedCallFrame(MF)) ++ return true; ++ ++ // We have to reserve the base pointer register in the presence of variable ++ // sized objects. ++ return MF.getRegInfo().canReserveReg(BP); ++} +diff --git a/lib/Target/LoongArch/LoongArchRegisterInfo.h b/lib/Target/LoongArch/LoongArchRegisterInfo.h +new file mode 100644 +index 00000000..dd3be916 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchRegisterInfo.h +@@ -0,0 +1,80 @@ ++//===- LoongArchRegisterInfo.h - LoongArch Register Information Impl ------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the LoongArch implementation of the TargetRegisterInfo class. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H ++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H ++ ++#include "LoongArch.h" ++#include "llvm/CodeGen/MachineBasicBlock.h" ++#include ++ ++#define GET_REGINFO_HEADER ++#include "LoongArchGenRegisterInfo.inc" ++ ++namespace llvm { ++ ++class TargetRegisterClass; ++ ++class LoongArchRegisterInfo : public LoongArchGenRegisterInfo { ++public: ++ enum class LoongArchPtrClass { ++ /// The default register class for integer values. ++ Default = 0, ++ /// The stack pointer only. ++ StackPointer = 1, ++ }; ++ ++ LoongArchRegisterInfo(); ++ ++ /// Get PIC indirect call register ++ static unsigned getPICCallReg(); ++ ++ /// Code Generation virtual methods... ++ const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, ++ unsigned Kind) const override; ++ ++ unsigned getRegPressureLimit(const TargetRegisterClass *RC, ++ MachineFunction &MF) const override; ++ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; ++ const uint32_t *getCallPreservedMask(const MachineFunction &MF, ++ CallingConv::ID) const override; ++ BitVector getReservedRegs(const MachineFunction &MF) const override; ++ ++ bool requiresRegisterScavenging(const MachineFunction &MF) const override; ++ ++ bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; ++ ++ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override; ++ ++ /// Stack Frame Processing Methods ++ void eliminateFrameIndex(MachineBasicBlock::iterator II, ++ int SPAdj, unsigned FIOperandNum, ++ RegScavenger *RS = nullptr) const override; ++ ++ // Stack realignment queries. ++ bool canRealignStack(const MachineFunction &MF) const override; ++ ++ /// Debug information queries. ++ Register getFrameRegister(const MachineFunction &MF) const override; ++ ++ /// Return GPR register class. ++ const TargetRegisterClass *intRegClass(unsigned Size) const; ++ ++private: ++ void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, ++ int FrameIndex, uint64_t StackSize, ++ int SPAdj, int64_t SPOffset) const; ++}; ++ ++} // end namespace llvm ++ ++#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H +diff --git a/lib/Target/LoongArch/LoongArchRegisterInfo.td b/lib/Target/LoongArch/LoongArchRegisterInfo.td +new file mode 100644 +index 00000000..96569e07 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchRegisterInfo.td +@@ -0,0 +1,373 @@ ++//===-- LoongArchRegisterInfo.td - LoongArch Register defs -----------*- tablegen -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++//===----------------------------------------------------------------------===// ++// Declarations that describe the LoongArch register file ++//===----------------------------------------------------------------------===// ++let Namespace = "LoongArch" in { ++def sub_32 : SubRegIndex<32>; ++def sub_64 : SubRegIndex<64>; ++def sub_128 : SubRegIndex<128>; ++def sub_fcsr1 : SubRegIndex<5>; ++def sub_fcsr2 : SubRegIndex<13, 16>; ++def sub_fcsr3 : SubRegIndex<2, 8>; ++def sub_lo : SubRegIndex<32>; ++def sub_hi : SubRegIndex<32, 32>; ++def PC : Register<"pc">; ++} ++ ++class Unallocatable { ++ bit isAllocatable = 0; ++} ++ ++/// We have banks of registers each. ++class LoongArchReg Enc, string n> : Register { ++ let HWEncoding = Enc; ++ let Namespace = "LoongArch"; ++} ++ ++class LoongArchRegWithSubRegs Enc, string n, list subregs> ++ : RegisterWithSubRegs { ++ let HWEncoding = Enc; ++ let Namespace = "LoongArch"; ++} ++ ++/// LoongArch 32-bit CPU Registers. ++class LoongArch32GPR Enc, string n> : LoongArchReg; ++ ++/// LoongArch 64-bit CPU Registers. ++class LoongArch64GPR Enc, string n, list subregs> ++ : LoongArchRegWithSubRegs { ++ let SubRegIndices = [sub_32]; ++} ++ ++/// LoongArch 64-bit Floating-point Registers ++class FGR32 Enc, string n> : LoongArchReg; ++class FGR64 Enc, string n, list subregs> ++ : LoongArchRegWithSubRegs { ++ let SubRegIndices = [sub_lo]; ++} ++ ++// LoongArch 128-bit (aliased) LSX Registers ++class LSX128 Enc, string n, list subregs> ++ : LoongArchRegWithSubRegs { ++ let SubRegIndices = [sub_64]; ++} ++ ++// LoongArch 256-bit (aliased) LASX Registers ++class LASX256 Enc, string n, list subregs> ++ : LoongArchRegWithSubRegs { ++ let SubRegIndices = [sub_128]; ++} ++ ++//===----------------------------------------------------------------------===// ++// Registers ++//===----------------------------------------------------------------------===// ++ ++/// General Purpose 32-bit Registers ++def ZERO : LoongArch32GPR<0, "zero">, ++ DwarfRegNum<[0]>; ++def RA : LoongArch32GPR<1, "ra">, DwarfRegNum<[1]>; ++def TP : LoongArch32GPR<2, "tp">, DwarfRegNum<[2]>; ++def SP : LoongArch32GPR<3, "sp">, DwarfRegNum<[3]>; ++def A0 : LoongArch32GPR<4, "r4">, DwarfRegNum<[4]>; ++def A1 : LoongArch32GPR<5, "r5">, DwarfRegNum<[5]>; ++def A2 : LoongArch32GPR<6, "r6">, DwarfRegNum<[6]>; ++def A3 : LoongArch32GPR<7, "r7">, DwarfRegNum<[7]>; ++def A4 : LoongArch32GPR<8, "r8">, DwarfRegNum<[8]>; ++def A5 : LoongArch32GPR<9, "r9">, DwarfRegNum<[9]>; ++def A6 : LoongArch32GPR<10, "r10">, DwarfRegNum<[10]>; ++def A7 : LoongArch32GPR<11, "r11">, DwarfRegNum<[11]>; ++def T0 : LoongArch32GPR<12, "r12">, DwarfRegNum<[12]>; ++def T1 : LoongArch32GPR<13, "r13">, DwarfRegNum<[13]>; ++def T2 : LoongArch32GPR<14, "r14">, DwarfRegNum<[14]>; ++def T3 : LoongArch32GPR<15, "r15">, DwarfRegNum<[15]>; ++def T4 : LoongArch32GPR<16, "r16">, DwarfRegNum<[16]>; ++def T5 : LoongArch32GPR<17, "r17">, DwarfRegNum<[17]>; ++def T6 : LoongArch32GPR<18, "r18">, DwarfRegNum<[18]>; ++def T7 : LoongArch32GPR<19, "r19">, DwarfRegNum<[19]>; ++def T8 : LoongArch32GPR<20, "r20">, DwarfRegNum<[20]>; ++def T9 : LoongArch32GPR<21, "r21">, DwarfRegNum<[21]>; ++def FP : LoongArch32GPR<22, "r22">, DwarfRegNum<[22]>; ++def S0 : LoongArch32GPR<23, "r23">, DwarfRegNum<[23]>; ++def S1 : LoongArch32GPR<24, "r24">, DwarfRegNum<[24]>; ++def S2 : LoongArch32GPR<25, "r25">, DwarfRegNum<[25]>; ++def S3 : LoongArch32GPR<26, "r26">, DwarfRegNum<[26]>; ++def S4 : LoongArch32GPR<27, "r27">, DwarfRegNum<[27]>; ++def S5 : LoongArch32GPR<28, "r28">, DwarfRegNum<[28]>; ++def S6 : LoongArch32GPR<29, "r29">, DwarfRegNum<[29]>; ++def S7 : LoongArch32GPR<30, "r30">, DwarfRegNum<[30]>; ++def S8 : LoongArch32GPR<31, "r31">, DwarfRegNum<[31]>; ++ ++let SubRegIndices = [sub_32] in { ++def V0 : LoongArchRegWithSubRegs<4, "r4", [A0]>, DwarfRegNum<[4]>; ++def V1 : LoongArchRegWithSubRegs<5, "r5", [A1]>, DwarfRegNum<[5]>; ++} ++ ++/// General Purpose 64-bit Registers ++def ZERO_64 : LoongArch64GPR<0, "zero", [ZERO]>, DwarfRegNum<[0]>; ++def RA_64 : LoongArch64GPR<1, "ra", [RA]>, DwarfRegNum<[1]>; ++def TP_64 : LoongArch64GPR<2, "tp", [TP]>, DwarfRegNum<[2]>; ++def SP_64 : LoongArch64GPR<3, "sp", [SP]>, DwarfRegNum<[3]>; ++def A0_64 : LoongArch64GPR<4, "r4", [A0]>, DwarfRegNum<[4]>; ++def A1_64 : LoongArch64GPR<5, "r5", [A1]>, DwarfRegNum<[5]>; ++def A2_64 : LoongArch64GPR<6, "r6", [A2]>, DwarfRegNum<[6]>; ++def A3_64 : LoongArch64GPR<7, "r7", [A3]>, DwarfRegNum<[7]>; ++def A4_64 : LoongArch64GPR<8, "r8", [A4]>, DwarfRegNum<[8]>; ++def A5_64 : LoongArch64GPR<9, "r9", [A5]>, DwarfRegNum<[9]>; ++def A6_64 : LoongArch64GPR<10, "r10", [A6]>, DwarfRegNum<[10]>; ++def A7_64 : LoongArch64GPR<11, "r11", [A7]>, DwarfRegNum<[11]>; ++def T0_64 : LoongArch64GPR<12, "r12", [T0]>, DwarfRegNum<[12]>; ++def T1_64 : LoongArch64GPR<13, "r13", [T1]>, DwarfRegNum<[13]>; ++def T2_64 : LoongArch64GPR<14, "r14", [T2]>, DwarfRegNum<[14]>; ++def T3_64 : LoongArch64GPR<15, "r15", [T3]>, DwarfRegNum<[15]>; ++def T4_64 : LoongArch64GPR<16, "r16", [T4]>, DwarfRegNum<[16]>; ++def T5_64 : LoongArch64GPR<17, "r17", [T5]>, DwarfRegNum<[17]>; ++def T6_64 : LoongArch64GPR<18, "r18", [T6]>, DwarfRegNum<[18]>; ++def T7_64 : LoongArch64GPR<19, "r19", [T7]>, DwarfRegNum<[19]>; ++def T8_64 : LoongArch64GPR<20, "r20", [T8]>, DwarfRegNum<[20]>; ++def T9_64 : LoongArch64GPR<21, "r21", [T9]>, DwarfRegNum<[21]>; ++def FP_64 : LoongArch64GPR<22, "r22", [FP]>, DwarfRegNum<[22]>; ++def S0_64 : LoongArch64GPR<23, "r23", [S0]>, DwarfRegNum<[23]>; ++def S1_64 : LoongArch64GPR<24, "r24", [S1]>, DwarfRegNum<[24]>; ++def S2_64 : LoongArch64GPR<25, "r25", [S2]>, DwarfRegNum<[25]>; ++def S3_64 : LoongArch64GPR<26, "r26", [S3]>, DwarfRegNum<[26]>; ++def S4_64 : LoongArch64GPR<27, "r27", [S4]>, DwarfRegNum<[27]>; ++def S5_64 : LoongArch64GPR<28, "r28", [S5]>, DwarfRegNum<[28]>; ++def S6_64 : LoongArch64GPR<29, "r29", [S6]>, DwarfRegNum<[29]>; ++def S7_64 : LoongArch64GPR<30, "r30", [S7]>, DwarfRegNum<[30]>; ++def S8_64 : LoongArch64GPR<31, "r31", [S8]>, DwarfRegNum<[31]>; ++ ++let SubRegIndices = [sub_64] in { ++def V0_64 : LoongArch64GPR<4, "r4", [A0_64]>, DwarfRegNum<[4]>; ++def V1_64 : LoongArch64GPR<5, "r5", [A1_64]>, DwarfRegNum<[5]>; ++} ++ ++/// FP registers ++foreach I = 0-31 in ++def F#I : FGR32, DwarfRegNum<[!add(I, 32)]>; ++ ++foreach I = 0-31 in ++def F#I#_64 : FGR64("F"#I)]>, DwarfRegNum<[!add(I, 32)]>; ++ ++/// FP Condition Flag 0~7 ++foreach I = 0-7 in ++def FCC#I : LoongArchReg; ++ ++/// FP Control and Status Registers, FCSR 1~3 ++foreach I = 1-3 in ++def FCSR#I : LoongArchReg; ++ ++class FCSRReg Enc, string n, list subregs> : ++ RegisterWithSubRegs { ++// field bits<2> chan_encoding = 0; ++ let Namespace = "LoongArch"; ++ let SubRegIndices = [sub_fcsr1, sub_fcsr2, sub_fcsr3]; ++// let HWEncoding{8-0} = encoding{8-0}; ++// let HWEncoding{10-9} = chan_encoding; ++} ++ ++def FCSR0 : FCSRReg<0, "fcsr0", [FCSR1, FCSR2, FCSR3]>; ++ ++/// PC register ++//let NameSpace = "LoongArch" in ++//def PC : Register<"pc">; ++ ++//===----------------------------------------------------------------------===// ++// Register Classes ++//===----------------------------------------------------------------------===// ++ ++def GPR32 : RegisterClass<"LoongArch", [i32], 32, (add ++ // Reserved ++ ZERO, ++ // Return Values and Arguments ++ A0, A1, A2, A3, A4, A5, A6, A7, ++ // Not preserved across procedure calls ++ T0, T1, T2, T3, T4, T5, T6, T7, T8, ++ // Callee save ++ S0, S1, S2, S3, S4, S5, S6, S7, S8, ++ // Reserved ++ RA, TP, SP, ++ // Reserved ++ T9, FP)>; ++ ++def GPR64 : RegisterClass<"LoongArch", [i64], 64, (add ++ // Reserved ++ ZERO_64, ++ // Return Values and Arguments ++ A0_64, A1_64, A2_64, A3_64, A4_64, A5_64, A6_64, A7_64, ++ // Not preserved across procedure calls ++ T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, T8_64, ++ // Callee save ++ S0_64, S1_64, S2_64, S3_64, S4_64, S5_64, S6_64, S7_64, S8_64, ++ // Reserved ++ RA_64, TP_64, SP_64, ++ // Reserved ++ T9_64, FP_64)>; ++ ++def GPRTC64 : RegisterClass<"LoongArch", [i64], 64, (add ++ // Return Values and Arguments ++ A0_64, A1_64, A2_64, A3_64, A4_64, A5_64, A6_64, A7_64, ++ // Not preserved across procedure calls ++ T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, T8_64)>; ++ ++/// FP Registers. ++def FGR64 : RegisterClass<"LoongArch", [f64], 64, (sequence "F%u_64", 0, 31)>; ++def FGR32 : RegisterClass<"LoongArch", [f32], 64, (sequence "F%u", 0, 31)>; ++ ++/// FP condition Flag registers. ++def FCFR : RegisterClass<"LoongArch", [i32], 32, (sequence "FCC%u", 0, 7)>, ++ Unallocatable; ++ ++def SP32 : RegisterClass<"LoongArch", [i32], 32, (add SP)>, Unallocatable; ++def SP64 : RegisterClass<"LoongArch", [i64], 64, (add SP_64)>, Unallocatable; ++def TP32 : RegisterClass<"LoongArch", [i32], 32, (add TP)>, Unallocatable; ++def TP64 : RegisterClass<"LoongArch", [i64], 64, (add TP_64)>, Unallocatable; ++ ++/// FP control and Status registers. ++def FCSR : RegisterClass<"LoongArch", [i32], 4, (sequence "FCSR%u", 0, 3)>, ++ Unallocatable; ++ ++//LSX ++foreach I = 0-31 in ++def VR#I : LSX128("F"#I#"_64")]>, ++ DwarfRegNum<[!add(I, 32)]>; ++ ++//LASX ++foreach I = 0-31 in ++def XR#I : LASX256("VR"#I)]>, ++ DwarfRegNum<[!add(I, 32)]>; ++ ++def LSX128B: RegisterClass<"LoongArch", [v16i8], 128, ++ (sequence "VR%u", 0, 31)>; ++ ++def LSX128H: RegisterClass<"LoongArch", [v8i16], 128, ++ (sequence "VR%u", 0, 31)>; ++ ++def LSX128W: RegisterClass<"LoongArch", [v4i32, v4f32], 128, ++ (sequence "VR%u", 0, 31)>; ++ ++def LSX128D: RegisterClass<"LoongArch", [v2i64, v2f64], 128, ++ (sequence "VR%u", 0, 31)>; ++ ++def LASX256B: RegisterClass<"LoongArch", [v32i8], 256, ++ (sequence "XR%u", 0, 31)>; ++def LASX256H: RegisterClass<"LoongArch", [v16i16], 256, ++ (sequence "XR%u", 0, 31)>; ++def LASX256W: RegisterClass<"LoongArch", [v8i32, v8f32], 256, ++ (sequence "XR%u", 0, 31)>; ++def LASX256D: RegisterClass<"LoongArch", [v4i64, v4f64], 256, ++ (sequence "XR%u", 0, 31)>; ++ ++//===----------------------------------------------------------------------===// ++// Register Operands. ++//===----------------------------------------------------------------------===// ++ ++class LoongArchAsmRegOperand : AsmOperandClass { ++ let ParserMethod = "parseAnyRegister"; ++} ++ ++def GPR32AsmOperand : LoongArchAsmRegOperand { ++ let Name = "GPR32AsmReg"; ++ let PredicateMethod = "isGPRAsmReg"; ++} ++ ++def GPR64AsmOperand : LoongArchAsmRegOperand { ++ let Name = "GPR64AsmReg"; ++ let PredicateMethod = "isGPRAsmReg"; ++} ++ ++def FGR32AsmOperand : LoongArchAsmRegOperand { ++ let Name = "FGR32AsmReg"; ++ let PredicateMethod = "isFGRAsmReg"; ++} ++ ++def FGR64AsmOperand : LoongArchAsmRegOperand { ++ let Name = "FGR64AsmReg"; ++ let PredicateMethod = "isFGRAsmReg"; ++} ++ ++def FCSRAsmOperand : LoongArchAsmRegOperand { ++ let Name = "FCSRAsmReg"; ++} ++ ++def FCFRAsmOperand : LoongArchAsmRegOperand { ++ let Name = "FCFRAsmReg"; ++} ++ ++//LSX ++def LSX128AsmOperand : LoongArchAsmRegOperand { ++ let Name = "LSX128AsmReg"; ++} ++ ++//LASX ++def LASX256AsmOperand : LoongArchAsmRegOperand { ++ let Name = "LASX256AsmReg"; ++} ++ ++def GPR32Opnd : RegisterOperand { ++ let ParserMatchClass = GPR32AsmOperand; ++} ++ ++def GPR64Opnd : RegisterOperand { ++ let ParserMatchClass = GPR64AsmOperand; ++} ++ ++def GPRTC64Opnd : RegisterOperand { ++ let ParserMatchClass = GPR64AsmOperand; ++} ++ ++def FGR32Opnd : RegisterOperand { ++ let ParserMatchClass = FGR32AsmOperand; ++} ++ ++def FGR64Opnd : RegisterOperand { ++ let ParserMatchClass = FGR64AsmOperand; ++} ++ ++def FCSROpnd : RegisterOperand { ++ let ParserMatchClass = FCSRAsmOperand; ++} ++ ++def FCFROpnd : RegisterOperand { ++ let ParserMatchClass = FCFRAsmOperand; ++} ++ ++//LSX ++def LSX128BOpnd : RegisterOperand { ++ let ParserMatchClass = LSX128AsmOperand; ++} ++ ++def LSX128HOpnd : RegisterOperand { ++ let ParserMatchClass = LSX128AsmOperand; ++} ++ ++def LSX128WOpnd : RegisterOperand { ++ let ParserMatchClass = LSX128AsmOperand; ++} ++ ++def LSX128DOpnd : RegisterOperand { ++ let ParserMatchClass = LSX128AsmOperand; ++} ++ ++//LASX ++def LASX256BOpnd : RegisterOperand { ++ let ParserMatchClass = LASX256AsmOperand; ++} ++ ++def LASX256HOpnd : RegisterOperand { ++ let ParserMatchClass = LASX256AsmOperand; ++} ++ ++def LASX256WOpnd : RegisterOperand { ++ let ParserMatchClass = LASX256AsmOperand; ++} ++ ++def LASX256DOpnd : RegisterOperand { ++ let ParserMatchClass = LASX256AsmOperand; ++} +diff --git a/lib/Target/LoongArch/LoongArchSubtarget.cpp b/lib/Target/LoongArch/LoongArchSubtarget.cpp +new file mode 100644 +index 00000000..ebc7a514 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchSubtarget.cpp +@@ -0,0 +1,112 @@ ++//===-- LoongArchSubtarget.cpp - LoongArch Subtarget Information --------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file implements the LoongArch specific subclass of TargetSubtargetInfo. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchSubtarget.h" ++#include "LoongArch.h" ++#include "LoongArchMachineFunction.h" ++#include "LoongArchRegisterInfo.h" ++#include "LoongArchTargetMachine.h" ++#include "llvm/IR/Attributes.h" ++#include "llvm/IR/Function.h" ++#include "llvm/Support/CommandLine.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/MC/TargetRegistry.h" ++#include "llvm/Support/raw_ostream.h" ++ ++using namespace llvm; ++ ++#define DEBUG_TYPE "loongarch-subtarget" ++ ++#define GET_SUBTARGETINFO_TARGET_DESC ++#define GET_SUBTARGETINFO_CTOR ++#include "LoongArchGenSubtargetInfo.inc" ++ ++void LoongArchSubtarget::anchor() {} ++ ++LoongArchSubtarget::LoongArchSubtarget(const Triple &TT, StringRef CPU, ++ StringRef FS, ++ const LoongArchTargetMachine &TM, ++ MaybeAlign StackAlignOverride) ++ : LoongArchGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), HasLA64(false), ++ HasBasicF(false), HasBasicD(false), HasLSX(false), HasLASX(false), ++ UnalignedAccess(false), StackAlignOverride(StackAlignOverride), TM(TM), ++ TargetTriple(TT), TSInfo(), ++ InstrInfo(initializeSubtargetDependencies(CPU, FS, TM)), ++ FrameLowering(*this), TLInfo(TM, *this) { ++ ++ // Check if Architecture and ABI are compatible. ++ assert(((!is64Bit() && isABI_ILP32()) || (is64Bit() && isABI_LP64())) && ++ "Invalid Arch & ABI pair."); ++ ++ if (hasLSX() && !hasBasicD()) ++ report_fatal_error("LSX requires 64-bit floating point register." ++ "See -mattr=+d.", ++ false); ++ ++} ++ ++bool LoongArchSubtarget::isPositionIndependent() const { ++ return TM.isPositionIndependent(); ++} ++ ++/// This overrides the PostRAScheduler bit in the SchedModel for any CPU. ++bool LoongArchSubtarget::enablePostRAScheduler() const { return true; } ++ ++void LoongArchSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const { ++ CriticalPathRCs.clear(); ++ CriticalPathRCs.push_back(is64Bit() ? &LoongArch::GPR64RegClass ++ : &LoongArch::GPR32RegClass); ++} ++ ++CodeGenOpt::Level LoongArchSubtarget::getOptLevelToEnablePostRAScheduler() const { ++ return CodeGenOpt::Aggressive; ++} ++ ++LoongArchSubtarget & ++LoongArchSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS, ++ const TargetMachine &TM) { ++ StringRef CPUName = LoongArch_MC::selectLoongArchCPU(TM.getTargetTriple(), CPU); ++ ++ // Parse features string. ++ ParseSubtargetFeatures(CPUName, /*TuneCPU*/ CPUName, FS); ++ // Initialize scheduling itinerary for the specified CPU. ++ InstrItins = getInstrItineraryForCPU(CPUName); ++ ++ if (StackAlignOverride) ++ stackAlignment = *StackAlignOverride; ++ else if (isABI_LP64()) ++ stackAlignment = Align(16); ++ else { ++ assert(isABI_ILP32() && "Unknown ABI for stack alignment!"); ++ stackAlignment = Align(8); ++ } ++ ++ return *this; ++} ++ ++Reloc::Model LoongArchSubtarget::getRelocationModel() const { ++ return TM.getRelocationModel(); ++} ++ ++bool LoongArchSubtarget::isABI_LP64D() const { return getABI().IsLP64D(); } ++bool LoongArchSubtarget::isABI_LP64S() const { return getABI().IsLP64S(); } ++bool LoongArchSubtarget::isABI_LP64F() const { return getABI().IsLP64F(); } ++bool LoongArchSubtarget::isABI_LP64() const { ++ return isABI_LP64D() || isABI_LP64S() || isABI_LP64F(); ++} ++bool LoongArchSubtarget::isABI_ILP32D() const { return getABI().IsILP32D(); } ++bool LoongArchSubtarget::isABI_ILP32F() const { return getABI().IsILP32F(); } ++bool LoongArchSubtarget::isABI_ILP32S() const { return getABI().IsILP32S(); } ++bool LoongArchSubtarget::isABI_ILP32() const { ++ return isABI_ILP32D() || isABI_ILP32F() || isABI_ILP32S(); ++} ++const LoongArchABIInfo &LoongArchSubtarget::getABI() const { return TM.getABI(); } +diff --git a/lib/Target/LoongArch/LoongArchSubtarget.h b/lib/Target/LoongArch/LoongArchSubtarget.h +new file mode 100644 +index 00000000..7b1d3061 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchSubtarget.h +@@ -0,0 +1,146 @@ ++//===-- LoongArchSubtarget.h - Define Subtarget for the LoongArch ---------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file declares the LoongArch specific subclass of TargetSubtargetInfo. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSUBTARGET_H ++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSUBTARGET_H ++ ++#include "MCTargetDesc/LoongArchABIInfo.h" ++#include "LoongArchFrameLowering.h" ++#include "LoongArchISelLowering.h" ++#include "LoongArchInstrInfo.h" ++#include "llvm/CodeGen/SelectionDAGTargetInfo.h" ++#include "llvm/CodeGen/TargetSubtargetInfo.h" ++#include "llvm/IR/DataLayout.h" ++#include "llvm/MC/MCInstrItineraries.h" ++#include "llvm/Support/ErrorHandling.h" ++#include ++ ++#define GET_SUBTARGETINFO_HEADER ++#include "LoongArchGenSubtargetInfo.inc" ++ ++namespace llvm { ++class StringRef; ++ ++class LoongArchTargetMachine; ++ ++class LoongArchSubtarget : public LoongArchGenSubtargetInfo { ++ virtual void anchor(); ++ ++ // HasLA64 - The target processor has LA64 ISA support. ++ bool HasLA64; ++ ++ // HasBasicF - The target restricts the use of hardware floating-point ++ // instructions to 32-bit operations. ++ bool HasBasicF; ++ ++ // HasBasicD - The target allows hardware floating-point instructions to ++ // cover both 32-bit and 64-bit operations. ++ bool HasBasicD; ++ ++ /// Features related to the presence of specific instructions. ++ ++ // HasLSX - Supports LSX. ++ bool HasLSX; ++ ++ // HasLASX - Supports LASX. ++ bool HasLASX; ++ ++ /// The minimum alignment known to hold of the stack frame on ++ /// entry to the function and which must be maintained by every function. ++ Align stackAlignment; ++ ++ // Allow unaligned memory accesses. ++ bool UnalignedAccess; ++ ++ /// The overridden stack alignment. ++ MaybeAlign StackAlignOverride; ++ ++ InstrItineraryData InstrItins; ++ ++ const LoongArchTargetMachine &TM; ++ ++ Triple TargetTriple; ++ ++ const SelectionDAGTargetInfo TSInfo; ++ const LoongArchInstrInfo InstrInfo; ++ const LoongArchFrameLowering FrameLowering; ++ const LoongArchTargetLowering TLInfo; ++ ++public: ++ bool isPositionIndependent() const; ++ /// This overrides the PostRAScheduler bit in the SchedModel for each CPU. ++ bool enablePostRAScheduler() const override; ++ void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override; ++ CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const override; ++ ++ bool isABI_LP64() const; ++ bool isABI_LP64D() const; ++ bool isABI_LP64S() const; ++ bool isABI_LP64F() const; ++ bool isABI_ILP32() const; ++ bool isABI_ILP32D() const; ++ bool isABI_ILP32F() const; ++ bool isABI_ILP32S() const; ++ const LoongArchABIInfo &getABI() const; ++ ++ /// This constructor initializes the data members to match that ++ /// of the specified triple. ++ LoongArchSubtarget(const Triple &TT, StringRef CPU, StringRef FS, ++ const LoongArchTargetMachine &TM, MaybeAlign StackAlignOverride); ++ ++ /// ParseSubtargetFeatures - Parses features string setting specified ++ /// subtarget options. Definition of function is auto generated by tblgen. ++ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); ++ ++ bool is64Bit() const { return HasLA64; } ++ bool hasBasicD() const { return HasBasicD; } ++ unsigned getGPRSizeInBytes() const { return is64Bit() ? 8 : 4; } ++ bool hasLSX() const { return HasLSX; } ++ bool hasLASX() const { return HasLASX; } ++ bool hasBasicF() const { return HasBasicF; } ++ bool useSoftFloat() const { return (!HasBasicD && !HasBasicF); } ++ ++ bool allowUnalignedAccess() const { return UnalignedAccess; } ++ ++ // After compiler-rt is supported in LA, this returns true. ++ bool isXRaySupported() const override { return false; } ++ ++ Align getStackAlignment() const { return stackAlignment; } ++ ++ // Grab relocation model ++ Reloc::Model getRelocationModel() const; ++ ++ LoongArchSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS, ++ const TargetMachine &TM); ++ ++ const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { ++ return &TSInfo; ++ } ++ const LoongArchInstrInfo *getInstrInfo() const override { ++ return &InstrInfo; ++ } ++ const TargetFrameLowering *getFrameLowering() const override { ++ return &FrameLowering; ++ } ++ const LoongArchRegisterInfo *getRegisterInfo() const override { ++ return &InstrInfo.getRegisterInfo(); ++ } ++ const LoongArchTargetLowering *getTargetLowering() const override { ++ return &TLInfo; ++ } ++ const InstrItineraryData *getInstrItineraryData() const override { ++ return &InstrItins; ++ } ++}; ++} // End llvm namespace ++ ++#endif +diff --git a/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/lib/Target/LoongArch/LoongArchTargetMachine.cpp +new file mode 100644 +index 00000000..2aa86a65 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchTargetMachine.cpp +@@ -0,0 +1,186 @@ ++//===-- LoongArchTargetMachine.cpp - Define TargetMachine for LoongArch -------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// Implements the info about LoongArch target spec. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchTargetMachine.h" ++#include "LoongArch.h" ++#include "LoongArchISelDAGToDAG.h" ++#include "LoongArchSubtarget.h" ++#include "LoongArchTargetObjectFile.h" ++#include "LoongArchTargetTransformInfo.h" ++#include "MCTargetDesc/LoongArchABIInfo.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "llvm/ADT/Optional.h" ++#include "llvm/ADT/STLExtras.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/Analysis/TargetTransformInfo.h" ++#include "llvm/CodeGen/BasicTTIImpl.h" ++#include "llvm/CodeGen/MachineFunction.h" ++#include "llvm/CodeGen/Passes.h" ++#include "llvm/CodeGen/TargetPassConfig.h" ++#include "llvm/IR/Attributes.h" ++#include "llvm/IR/Function.h" ++#include "llvm/MC/TargetRegistry.h" ++#include "llvm/Support/CodeGen.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/Support/raw_ostream.h" ++#include "llvm/Target/TargetOptions.h" ++#include ++#include ++ ++using namespace llvm; ++ ++#define DEBUG_TYPE "loongarch" ++ ++extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTarget() { ++ // Register the target. ++ RegisterTargetMachine X(getTheLoongArch32Target()); ++ RegisterTargetMachine A(getTheLoongArch64Target()); ++} ++ ++static std::string computeDataLayout(const Triple &TT, StringRef CPU, ++ const TargetOptions &Options) { ++ std::string Ret; ++ LoongArchABIInfo ABI = LoongArchABIInfo::computeTargetABI(TT, CPU, Options.MCOptions); ++ ++ Ret += "e"; ++ ++ if (ABI.IsILP32()) ++ // TODO ++ llvm_unreachable("Unimplemented ABI"); ++ else ++ Ret += "-m:e"; ++ ++ // Pointers are 32 bit on some ABIs. ++ if (!ABI.IsLP64()) ++ Ret += "-p:32:32"; ++ ++ // 8 and 16 bit integers only need to have natural alignment, but try to ++ // align them to 32 bits. 64 bit integers have natural alignment. ++ Ret += "-i8:8:32-i16:16:32-i64:64"; ++ ++ // 32 bit registers are always available and the stack is at least 64 bit ++ // aligned. On LP64 64 bit registers are also available and the stack is ++ // 128 bit aligned. ++ if (ABI.IsLP64()) ++ Ret += "-n32:64-S128"; ++ else ++ Ret += "-n32-S64"; ++ ++ return Ret; ++} ++ ++static Reloc::Model getEffectiveRelocModel(bool JIT, ++ Optional RM) { ++ if (!RM.hasValue() || JIT) ++ return Reloc::Static; ++ return *RM; ++} ++ ++// On function prologue, the stack is created by decrementing ++// its pointer. Once decremented, all references are done with positive ++// offset from the stack/frame pointer, using StackGrowsUp enables ++// an easier handling. ++// Using CodeModel::Large enables different CALL behavior. ++LoongArchTargetMachine::LoongArchTargetMachine(const Target &T, const Triple &TT, ++ StringRef CPU, StringRef FS, ++ const TargetOptions &Options, ++ Optional RM, ++ Optional CM, ++ CodeGenOpt::Level OL, bool JIT) ++ : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, ++ CPU, FS, Options, getEffectiveRelocModel(JIT, RM), ++ getEffectiveCodeModel(CM, CodeModel::Small), OL), ++ TLOF(std::make_unique()), ++ ABI(LoongArchABIInfo::computeTargetABI(TT, CPU, Options.MCOptions)) { ++ initAsmInfo(); ++} ++ ++LoongArchTargetMachine::~LoongArchTargetMachine() = default; ++ ++const LoongArchSubtarget * ++LoongArchTargetMachine::getSubtargetImpl(const Function &F) const { ++ Attribute CPUAttr = F.getFnAttribute("target-cpu"); ++ Attribute FSAttr = F.getFnAttribute("target-features"); ++ ++ std::string CPU = !CPUAttr.hasAttribute(Attribute::None) ++ ? CPUAttr.getValueAsString().str() ++ : TargetCPU; ++ std::string FS = !FSAttr.hasAttribute(Attribute::None) ++ ? FSAttr.getValueAsString().str() ++ : TargetFS; ++ ++ auto &I = SubtargetMap[CPU + FS]; ++ if (!I) { ++ // This needs to be done before we create a new subtarget since any ++ // creation will depend on the TM and the code generation flags on the ++ // function that reside in TargetOptions. ++ resetTargetOptions(F); ++ I = std::make_unique(TargetTriple, CPU, FS, *this, ++ MaybeAlign(F.getParent()->getOverrideStackAlignment())); ++ } ++ return I.get(); ++} ++ ++namespace { ++ ++/// LoongArch Code Generator Pass Configuration Options. ++class LoongArchPassConfig : public TargetPassConfig { ++public: ++ LoongArchPassConfig(LoongArchTargetMachine &TM, PassManagerBase &PM) ++ : TargetPassConfig(TM, PM) { ++ } ++ ++ LoongArchTargetMachine &getLoongArchTargetMachine() const { ++ return getTM(); ++ } ++ ++ void addIRPasses() override; ++ bool addInstSelector() override; ++ void addPreEmitPass() override; ++}; ++ ++} // end anonymous namespace ++ ++TargetPassConfig *LoongArchTargetMachine::createPassConfig(PassManagerBase &PM) { ++ return new LoongArchPassConfig(*this, PM); ++} ++ ++void LoongArchPassConfig::addIRPasses() { ++ TargetPassConfig::addIRPasses(); ++ addPass(createAtomicExpandPass()); ++} ++// Install an instruction selector pass using ++// the ISelDag to gen LoongArch code. ++bool LoongArchPassConfig::addInstSelector() { ++ addPass(createLoongArchModuleISelDagPass()); ++ addPass(createLoongArchISelDag(getLoongArchTargetMachine(), getOptLevel())); ++ return false; ++} ++ ++TargetTransformInfo ++LoongArchTargetMachine::getTargetTransformInfo(const Function &F) { ++ LLVM_DEBUG(errs() << "Target Transform Info Pass Added\n"); ++ return TargetTransformInfo(BasicTTIImpl(this, F)); ++} ++ ++// Implemented by targets that want to run passes immediately before ++// machine code is emitted. return true if -print-machineinstrs should ++// print out the code after the passes. ++void LoongArchPassConfig::addPreEmitPass() { ++ // Expand pseudo instructions that are sensitive to register allocation. ++ addPass(createLoongArchExpandPseudoPass()); ++ ++ // Relax conditional branch instructions if they're otherwise out of ++ // range of their destination. ++ // This pass must be run after any pseudo instruction expansion ++ addPass(&BranchRelaxationPassID); ++} +diff --git a/lib/Target/LoongArch/LoongArchTargetMachine.h b/lib/Target/LoongArch/LoongArchTargetMachine.h +new file mode 100644 +index 00000000..ae09adf7 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchTargetMachine.h +@@ -0,0 +1,68 @@ ++//===- LoongArchTargetMachine.h - Define TargetMachine for LoongArch ------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file declares the LoongArch specific subclass of TargetMachine. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETMACHINE_H ++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETMACHINE_H ++ ++#include "MCTargetDesc/LoongArchABIInfo.h" ++#include "LoongArchSubtarget.h" ++#include "llvm/ADT/Optional.h" ++#include "llvm/ADT/StringMap.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/Support/CodeGen.h" ++#include "llvm/Target/TargetMachine.h" ++#include ++ ++namespace llvm { ++ ++class LoongArchTargetMachine : public LLVMTargetMachine { ++ std::unique_ptr TLOF; ++ // Selected ABI ++ LoongArchABIInfo ABI; ++ ++ mutable StringMap> SubtargetMap; ++ ++public: ++ LoongArchTargetMachine(const Target &T, const Triple &TT, StringRef CPU, ++ StringRef FS, const TargetOptions &Options, ++ Optional RM, Optional CM, ++ CodeGenOpt::Level OL, bool JIT); ++ ~LoongArchTargetMachine() override; ++ ++ TargetTransformInfo getTargetTransformInfo(const Function &F) override; ++ const LoongArchSubtarget *getSubtargetImpl(const Function &F) const override; ++ ++ // Pass Pipeline Configuration ++ TargetPassConfig *createPassConfig(PassManagerBase &PM) override; ++ ++ TargetLoweringObjectFile *getObjFileLowering() const override { ++ return TLOF.get(); ++ } ++ ++ /// Returns true if a cast between SrcAS and DestAS is a noop. ++ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { ++ // Mips doesn't have any special address spaces so we just reserve ++ // the first 256 for software use (e.g. OpenCL) and treat casts ++ // between them as noops. ++ return SrcAS < 256 && DestAS < 256; ++ } ++ ++ const LoongArchABIInfo &getABI() const { return ABI; } ++ ++ bool isMachineVerifierClean() const override { ++ return false; ++ } ++}; ++ ++} // end namespace llvm ++ ++#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETMACHINE_H +diff --git a/lib/Target/LoongArch/LoongArchTargetObjectFile.cpp b/lib/Target/LoongArch/LoongArchTargetObjectFile.cpp +new file mode 100644 +index 00000000..9c6250d2 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchTargetObjectFile.cpp +@@ -0,0 +1,26 @@ ++//===-- LoongArchTargetObjectFile.cpp - LoongArch Object Files ----------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchTargetObjectFile.h" ++#include "LoongArchSubtarget.h" ++#include "LoongArchTargetMachine.h" ++#include "MCTargetDesc/LoongArchMCExpr.h" ++#include "llvm/BinaryFormat/ELF.h" ++#include "llvm/IR/DataLayout.h" ++#include "llvm/IR/DerivedTypes.h" ++#include "llvm/IR/GlobalVariable.h" ++#include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCSectionELF.h" ++#include "llvm/Support/CommandLine.h" ++#include "llvm/Target/TargetMachine.h" ++using namespace llvm; ++ ++void LoongArchTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ ++ TargetLoweringObjectFileELF::Initialize(Ctx, TM); ++ InitializeELF(TM.Options.UseInitArray); ++} +diff --git a/lib/Target/LoongArch/LoongArchTargetObjectFile.h b/lib/Target/LoongArch/LoongArchTargetObjectFile.h +new file mode 100644 +index 00000000..a50c5717 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchTargetObjectFile.h +@@ -0,0 +1,24 @@ ++//===-- llvm/Target/LoongArchTargetObjectFile.h - LoongArch Object Info ---*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETOBJECTFILE_H ++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETOBJECTFILE_H ++ ++#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" ++ ++namespace llvm { ++class LoongArchTargetMachine; ++ class LoongArchTargetObjectFile : public TargetLoweringObjectFileELF { ++ ++ public: ++ ++ void Initialize(MCContext &Ctx, const TargetMachine &TM) override; ++ }; ++} // end namespace llvm ++ ++#endif +diff --git a/lib/Target/LoongArch/LoongArchTargetStreamer.h b/lib/Target/LoongArch/LoongArchTargetStreamer.h +new file mode 100644 +index 00000000..a9adc32d +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchTargetStreamer.h +@@ -0,0 +1,130 @@ ++//===-- LoongArchTargetStreamer.h - LoongArch Target Streamer ------------*- C++ -*--===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETSTREAMER_H ++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETSTREAMER_H ++ ++#include "MCTargetDesc/LoongArchABIInfo.h" ++#include "llvm/ADT/Optional.h" ++#include "llvm/ADT/STLExtras.h" ++#include "llvm/MC/MCELFStreamer.h" ++#include "llvm/MC/MCRegisterInfo.h" ++#include "llvm/MC/MCStreamer.h" ++ ++namespace llvm { ++ ++class formatted_raw_ostream; ++ ++struct LoongArchFPABIInfo; ++ ++class LoongArchTargetStreamer : public MCTargetStreamer { ++public: ++ LoongArchTargetStreamer(MCStreamer &S); ++ ++ virtual void setPic(bool Value) {} ++ ++ virtual void emitDirectiveOptionPic0(); ++ virtual void emitDirectiveOptionPic2(); ++ ++ virtual void emitDirectiveSetArch(StringRef Arch); ++ virtual void emitDirectiveSetLoongArch32(); ++ virtual void emitDirectiveSetloongarch64(); ++ ++ void emitR(unsigned Opcode, unsigned Reg0, SMLoc IDLoc, ++ const MCSubtargetInfo *STI); ++ void emitII(unsigned Opcode, int16_t Imm1, int16_t Imm2, SMLoc IDLoc, ++ const MCSubtargetInfo *STI); ++ void emitRX(unsigned Opcode, unsigned Reg0, MCOperand Op1, SMLoc IDLoc, ++ const MCSubtargetInfo *STI); ++ void emitRI(unsigned Opcode, unsigned Reg0, int32_t Imm, SMLoc IDLoc, ++ const MCSubtargetInfo *STI); ++ void emitRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, SMLoc IDLoc, ++ const MCSubtargetInfo *STI); ++ void emitRXX(unsigned Opcode, unsigned Reg0, MCOperand Op1, MCOperand Op2, ++ SMLoc IDLoc, const MCSubtargetInfo *STI); ++ void emitRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1, MCOperand Op2, ++ SMLoc IDLoc, const MCSubtargetInfo *STI); ++ void emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, unsigned Reg2, ++ SMLoc IDLoc, const MCSubtargetInfo *STI); ++ void emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int32_t Imm, ++ SMLoc IDLoc, const MCSubtargetInfo *STI); ++ void emitRRXX(unsigned Opcode, unsigned Reg0, unsigned Reg1, MCOperand Op2, ++ MCOperand Op3, SMLoc IDLoc, const MCSubtargetInfo *STI); ++ void emitRRIII(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm0, ++ int16_t Imm1, int16_t Imm2, SMLoc IDLoc, ++ const MCSubtargetInfo *STI); ++ void emitAdd(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, bool Is64Bit, ++ const MCSubtargetInfo *STI); ++ void emitDSLL(unsigned DstReg, unsigned SrcReg, int16_t ShiftAmount, ++ SMLoc IDLoc, const MCSubtargetInfo *STI); ++ void emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI); ++ ++ void forbidModuleDirective() { ModuleDirectiveAllowed = false; } ++ void reallowModuleDirective() { ModuleDirectiveAllowed = true; } ++ bool isModuleDirectiveAllowed() { return ModuleDirectiveAllowed; } ++ ++ template ++ void updateABIInfo(const PredicateLibrary &P) { ++ ABI = P.getABI(); ++ } ++ ++ const LoongArchABIInfo &getABI() const { ++ assert(ABI.hasValue() && "ABI hasn't been set!"); ++ return *ABI; ++ } ++ ++protected: ++ llvm::Optional ABI; ++ ++ bool GPRInfoSet; ++ ++ bool FPRInfoSet; ++ ++ bool FrameInfoSet; ++ int FrameOffset; ++ unsigned FrameReg; ++ unsigned ReturnReg; ++ ++private: ++ bool ModuleDirectiveAllowed; ++}; ++ ++// This part is for ascii assembly output ++class LoongArchTargetAsmStreamer : public LoongArchTargetStreamer { ++ formatted_raw_ostream &OS; ++ ++public: ++ LoongArchTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); ++ ++ void emitDirectiveOptionPic0() override; ++ void emitDirectiveOptionPic2() override; ++ ++ void emitDirectiveSetArch(StringRef Arch) override; ++ void emitDirectiveSetLoongArch32() override; ++ void emitDirectiveSetloongarch64() override; ++}; ++ ++// This part is for ELF object output ++class LoongArchTargetELFStreamer : public LoongArchTargetStreamer { ++ const MCSubtargetInfo &STI; ++ bool Pic; ++ ++public: ++ MCELFStreamer &getStreamer(); ++ LoongArchTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI); ++ ++ void setPic(bool Value) override { Pic = Value; } ++ ++ void emitLabel(MCSymbol *Symbol) override; ++ void finish() override; ++ ++ void emitDirectiveOptionPic0() override; ++ void emitDirectiveOptionPic2() override; ++}; ++} ++#endif +diff --git a/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +new file mode 100644 +index 00000000..9510dc02 +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +@@ -0,0 +1,325 @@ ++//===-- LoongArchTargetTransformInfo.cpp - LoongArch specific TTI pass ++//----------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++/// \file ++/// This file implements a TargetTransformInfo analysis pass specific to the ++/// LoongArch target machine. It uses the target's detailed information to ++/// provide more precise answers to certain TTI queries, while letting the ++/// target independent and default TTI implementations handle the rest. ++/// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchTargetTransformInfo.h" ++#include "llvm/Analysis/TargetTransformInfo.h" ++#include "llvm/CodeGen/BasicTTIImpl.h" ++#include "llvm/CodeGen/CostTable.h" ++#include "llvm/CodeGen/TargetLowering.h" ++#include "llvm/IR/IntrinsicInst.h" ++#include "llvm/Support/Debug.h" ++ ++using namespace llvm; ++ ++#define DEBUG_TYPE "LoongArchtti" ++ ++//===----------------------------------------------------------------------===// ++// ++// LoongArch cost model. ++// ++//===----------------------------------------------------------------------===// ++ ++bool LoongArchTTIImpl::areInlineCompatible(const Function *Caller, ++ const Function *Callee) const { ++ const TargetMachine &TM = getTLI()->getTargetMachine(); ++ ++ const FeatureBitset &CallerBits = ++ TM.getSubtargetImpl(*Caller)->getFeatureBits(); ++ const FeatureBitset &CalleeBits = ++ TM.getSubtargetImpl(*Callee)->getFeatureBits(); ++ ++ // Inline a callee if its target-features are a subset of the callers ++ // target-features. ++ return (CallerBits & CalleeBits) == CalleeBits; ++} ++ ++TargetTransformInfo::PopcntSupportKind ++LoongArchTTIImpl::getPopcntSupport(unsigned TyWidth) { ++ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); ++ if (TyWidth == 32 || TyWidth == 64) ++ return TTI::PSK_FastHardware; ++ return TTI::PSK_Software; ++} ++ ++unsigned LoongArchTTIImpl::getNumberOfRegisters(bool Vector) { ++ if (Vector && !ST->hasLSX()) ++ return 0; ++ ++ return 32; ++} ++ ++unsigned LoongArchTTIImpl::getRegisterBitWidth(bool Vector) const { ++ if (Vector) { ++ if (ST->hasLASX()) ++ return 256; ++ ++ if (ST->hasLSX()) ++ return 128; ++ ++ return 0; ++ } ++ return 64; ++} ++ ++unsigned LoongArchTTIImpl::getMaxInterleaveFactor(unsigned VF) { ++ if (VF == 1) ++ return 1; ++ return 2; ++} ++ ++InstructionCost LoongArchTTIImpl::getArithmeticInstrCost( ++ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, ++ TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, ++ TTI::OperandValueProperties Opd1PropInfo, ++ TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, ++ const Instruction *CxtI) { ++ ++ std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); ++ ++ int ISD = TLI->InstructionOpcodeToISD(Opcode); ++ assert(ISD && "Invalid opcode"); ++ ++ static const CostTblEntry LASXCostTable[] = { ++ ++ {ISD::SHL, MVT::v32i8, 1}, ++ {ISD::SHL, MVT::v16i16, 1}, ++ {ISD::SHL, MVT::v8i32, 1}, ++ {ISD::SHL, MVT::v4i64, 1}, ++ ++ {ISD::SRL, MVT::v32i8, 1}, ++ {ISD::SRL, MVT::v16i16, 1}, ++ {ISD::SRL, MVT::v8i32, 1}, ++ {ISD::SRL, MVT::v4i64, 1}, ++ ++ {ISD::SRA, MVT::v32i8, 1}, ++ {ISD::SRA, MVT::v16i16, 1}, ++ {ISD::SRA, MVT::v8i32, 1}, ++ {ISD::SRA, MVT::v4i64, 1}, ++ ++ {ISD::SUB, MVT::v32i8, 1}, ++ {ISD::SUB, MVT::v16i16, 1}, ++ {ISD::SUB, MVT::v8i32, 1}, ++ {ISD::SUB, MVT::v4i64, 1}, ++ ++ {ISD::ADD, MVT::v32i8, 1}, ++ {ISD::ADD, MVT::v16i16, 1}, ++ {ISD::ADD, MVT::v8i32, 1}, ++ {ISD::ADD, MVT::v4i64, 1}, ++ ++ {ISD::MUL, MVT::v32i8, 1}, ++ {ISD::MUL, MVT::v16i16, 1}, ++ {ISD::MUL, MVT::v8i32, 1}, ++ {ISD::MUL, MVT::v4i64, 1}, ++ ++ {ISD::SDIV, MVT::v32i8, 29}, ++ {ISD::SDIV, MVT::v16i16, 19}, ++ {ISD::SDIV, MVT::v8i32, 14}, ++ {ISD::SDIV, MVT::v4i64, 13}, ++ ++ {ISD::UDIV, MVT::v32i8, 29}, ++ {ISD::UDIV, MVT::v16i16, 19}, ++ {ISD::UDIV, MVT::v8i32, 14}, ++ {ISD::UDIV, MVT::v4i64, 13}, ++ ++ {ISD::SREM, MVT::v32i8, 33}, ++ {ISD::SREM, MVT::v16i16, 21}, ++ {ISD::SREM, MVT::v8i32, 15}, ++ {ISD::SREM, MVT::v4i64, 13}, ++ ++ {ISD::UREM, MVT::v32i8, 29}, ++ {ISD::UREM, MVT::v16i16, 19}, ++ {ISD::UREM, MVT::v8i32, 14}, ++ {ISD::UREM, MVT::v4i64, 13}, ++ ++ {ISD::FADD, MVT::f64, 1}, ++ {ISD::FADD, MVT::f32, 1}, ++ {ISD::FADD, MVT::v4f64, 1}, ++ {ISD::FADD, MVT::v8f32, 1}, ++ ++ {ISD::FSUB, MVT::f64, 1}, ++ {ISD::FSUB, MVT::f32, 1}, ++ {ISD::FSUB, MVT::v4f64, 1}, ++ {ISD::FSUB, MVT::v8f32, 1}, ++ ++ {ISD::FMUL, MVT::f64, 1}, ++ {ISD::FMUL, MVT::f32, 1}, ++ {ISD::FMUL, MVT::v4f64, 1}, ++ {ISD::FMUL, MVT::v8f32, 1}, ++ ++ {ISD::FDIV, MVT::f32, 12}, ++ {ISD::FDIV, MVT::f64, 10}, ++ {ISD::FDIV, MVT::v8f32, 12}, ++ {ISD::FDIV, MVT::v4f64, 10} ++ ++ }; ++ ++ if (ST->hasLASX()) ++ if (const auto *Entry = CostTableLookup(LASXCostTable, ISD, LT.second)) ++ return LT.first * Entry->Cost; ++ ++ static const CostTblEntry LSXCostTable[] = { ++ ++ {ISD::SHL, MVT::v16i8, 1}, ++ {ISD::SHL, MVT::v8i16, 1}, ++ {ISD::SHL, MVT::v4i32, 1}, ++ {ISD::SHL, MVT::v2i64, 1}, ++ ++ {ISD::SRL, MVT::v16i8, 1}, ++ {ISD::SRL, MVT::v8i16, 1}, ++ {ISD::SRL, MVT::v4i32, 1}, ++ {ISD::SRL, MVT::v2i64, 1}, ++ ++ {ISD::SRA, MVT::v16i8, 1}, ++ {ISD::SRA, MVT::v8i16, 1}, ++ {ISD::SRA, MVT::v4i32, 1}, ++ {ISD::SRA, MVT::v2i64, 1}, ++ ++ {ISD::SUB, MVT::v16i8, 1}, ++ {ISD::SUB, MVT::v8i16, 1}, ++ {ISD::SUB, MVT::v4i32, 1}, ++ {ISD::SUB, MVT::v2i64, 1}, ++ ++ {ISD::ADD, MVT::v16i8, 1}, ++ {ISD::ADD, MVT::v8i16, 1}, ++ {ISD::ADD, MVT::v4i32, 1}, ++ {ISD::ADD, MVT::v2i64, 1}, ++ ++ {ISD::MUL, MVT::v16i8, 1}, ++ {ISD::MUL, MVT::v8i16, 1}, ++ {ISD::MUL, MVT::v4i32, 1}, ++ {ISD::MUL, MVT::v2i64, 1}, ++ ++ {ISD::SDIV, MVT::v16i8, 29}, ++ {ISD::SDIV, MVT::v8i16, 19}, ++ {ISD::SDIV, MVT::v4i32, 14}, ++ {ISD::SDIV, MVT::v2i64, 13}, ++ ++ {ISD::UDIV, MVT::v16i8, 29}, ++ {ISD::UDIV, MVT::v8i16, 19}, ++ {ISD::UDIV, MVT::v4i32, 14}, ++ {ISD::UDIV, MVT::v2i64, 13}, ++ ++ {ISD::SREM, MVT::v16i8, 33}, ++ {ISD::SREM, MVT::v8i16, 21}, ++ {ISD::SREM, MVT::v4i32, 15}, ++ {ISD::SREM, MVT::v2i64, 13}, ++ ++ {ISD::UREM, MVT::v16i8, 29}, ++ {ISD::UREM, MVT::v8i16, 19}, ++ {ISD::UREM, MVT::v4i32, 14}, ++ {ISD::UREM, MVT::v2i64, 13}, ++ ++ {ISD::FADD, MVT::f64, 1}, ++ {ISD::FADD, MVT::f32, 1}, ++ {ISD::FADD, MVT::v2f64, 1}, ++ {ISD::FADD, MVT::v4f32, 1}, ++ ++ {ISD::FSUB, MVT::f64, 1}, ++ {ISD::FSUB, MVT::f32, 1}, ++ {ISD::FSUB, MVT::v2f64, 1}, ++ {ISD::FSUB, MVT::v4f32, 1}, ++ ++ {ISD::FMUL, MVT::f64, 1}, ++ {ISD::FMUL, MVT::f32, 1}, ++ {ISD::FMUL, MVT::v2f64, 1}, ++ {ISD::FMUL, MVT::v4f32, 1}, ++ ++ {ISD::FDIV, MVT::f32, 12}, ++ {ISD::FDIV, MVT::f64, 10}, ++ {ISD::FDIV, MVT::v4f32, 12}, ++ {ISD::FDIV, MVT::v2f64, 10} ++ ++ }; ++ ++ if (ST->hasLSX()) ++ if (const auto *Entry = CostTableLookup(LSXCostTable, ISD, LT.second)) ++ return LT.first * Entry->Cost; ++ ++ // Fallback to the default implementation. ++ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info); ++} ++ ++InstructionCost LoongArchTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, ++ unsigned Index) { ++ assert(Val->isVectorTy() && "This must be a vector type"); ++ ++ Type *ScalarType = Val->getScalarType(); ++ ++ if (Index != -1U) { ++ // Legalize the type. ++ std::pair LT = TLI->getTypeLegalizationCost(DL, Val); ++ ++ // This type is legalized to a scalar type. ++ if (!LT.second.isVector()) ++ return 0; ++ ++ // The type may be split. Normalize the index to the new type. ++ unsigned Width = LT.second.getVectorNumElements(); ++ Index = Index % Width; ++ ++ // The element at index zero is already inside the vector. ++ if (Index == 0) // if (ScalarType->isFloatingPointTy() && Index == 0) ++ return 0; ++ } ++ ++ // Add to the base cost if we know that the extracted element of a vector is ++ // destined to be moved to and used in the integer register file. ++ int RegisterFileMoveCost = 0; ++ if (Opcode == Instruction::ExtractElement && ScalarType->isPointerTy()) ++ RegisterFileMoveCost = 1; ++ ++ return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost; ++} ++ ++unsigned LoongArchTTIImpl::getLoadStoreVecRegBitWidth(unsigned) const { ++ return getRegisterBitWidth(true); ++} ++ ++InstructionCost LoongArchTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, ++ Type *Src, ++ TTI::CastContextHint CCH, ++ TTI::TargetCostKind CostKind, ++ const Instruction *I) { ++ int ISD = TLI->InstructionOpcodeToISD(Opcode); ++ assert(ISD && "Invalid opcode"); ++ ++ static const TypeConversionCostTblEntry LASXConversionTbl[] = { ++ ++ // TODO:The cost requires more granular testing ++ {ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 3}, ++ {ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3}, ++ {ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 3}, ++ {ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3}, ++ {ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 3}, ++ {ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3}, ++ ++ }; ++ ++ EVT SrcTy = TLI->getValueType(DL, Src); ++ EVT DstTy = TLI->getValueType(DL, Dst); ++ ++ if (!SrcTy.isSimple() || !DstTy.isSimple()) ++ return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); ++ ++ if (ST->hasLASX()) { ++ if (const auto *Entry = ConvertCostTableLookup( ++ LASXConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) ++ return Entry->Cost; ++ } ++ ++ return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); ++} +diff --git a/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/lib/Target/LoongArch/LoongArchTargetTransformInfo.h +new file mode 100644 +index 00000000..3a93fc8e +--- /dev/null ++++ b/lib/Target/LoongArch/LoongArchTargetTransformInfo.h +@@ -0,0 +1,91 @@ ++//===-- LoongArchTargetTransformInfo.h - LoongArch specific TTI -------------*- ++// C++ -*-===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// \file ++// This file a TargetTransformInfo::Concept conforming object specific to the ++// LoongArch target machine. It uses the target's detailed information to ++// provide more precise answers to certain TTI queries, while letting the ++// target independent and default TTI implementations handle the rest. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LoongArch_LoongArchTARGETTRANSFORMINFO_H ++#define LLVM_LIB_TARGET_LoongArch_LoongArchTARGETTRANSFORMINFO_H ++ ++#include "LoongArch.h" ++#include "LoongArchSubtarget.h" ++#include "LoongArchTargetMachine.h" ++#include "llvm/Analysis/TargetTransformInfo.h" ++#include "llvm/CodeGen/BasicTTIImpl.h" ++#include "llvm/CodeGen/TargetLowering.h" ++ ++namespace llvm { ++ ++class LoongArchTTIImpl : public BasicTTIImplBase { ++ typedef BasicTTIImplBase BaseT; ++ typedef TargetTransformInfo TTI; ++ friend BaseT; ++ ++ const LoongArchSubtarget *ST; ++ const LoongArchTargetLowering *TLI; ++ ++ const LoongArchSubtarget *getST() const { return ST; } ++ const LoongArchTargetLowering *getTLI() const { return TLI; } ++ ++public: ++ explicit LoongArchTTIImpl(const LoongArchTargetMachine *TM, const Function &F) ++ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), ++ TLI(ST->getTargetLowering()) {} ++ ++ bool areInlineCompatible(const Function *Caller, ++ const Function *Callee) const; ++ ++ /// \name Scalar TTI Implementations ++ // /// @{ ++ ++ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); ++ ++ /// @} ++ ++ /// \name Vector TTI Implementations ++ /// @{ ++ ++ bool enableInterleavedAccessVectorization() { return true; } ++ ++ unsigned getNumberOfRegisters(bool Vector); ++ ++ unsigned getRegisterBitWidth(bool Vector) const; ++ ++ unsigned getMaxInterleaveFactor(unsigned VF); ++ ++ InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, ++ unsigned Index); ++ ++ InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, ++ TTI::CastContextHint CCH, ++ TTI::TargetCostKind CostKind, ++ const Instruction *I = nullptr); ++ ++ unsigned getLoadStoreVecRegBitWidth(unsigned AS) const; ++ ++ InstructionCost getArithmeticInstrCost( ++ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, ++ TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, ++ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, ++ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, ++ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, ++ ArrayRef Args = ArrayRef(), ++ const Instruction *CxtI = nullptr); ++ ++ /// @} ++}; ++ ++} // end namespace llvm ++ ++#endif +diff --git a/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt b/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt +new file mode 100644 +index 00000000..927fa7d5 +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt +@@ -0,0 +1,21 @@ ++ add_llvm_component_library(LLVMLoongArchDesc ++ LoongArchABIInfo.cpp ++ LoongArchAnalyzeImmediate.cpp ++ LoongArchAsmBackend.cpp ++ LoongArchELFObjectWriter.cpp ++ LoongArchELFStreamer.cpp ++ LoongArchInstPrinter.cpp ++ LoongArchMCAsmInfo.cpp ++ LoongArchMCCodeEmitter.cpp ++ LoongArchMCExpr.cpp ++ LoongArchMCTargetDesc.cpp ++ LoongArchTargetStreamer.cpp ++ ++ LINK_COMPONENTS ++ MC ++ LoongArchInfo ++ Support ++ ++ ADD_TO_COMPONENT ++ LoongArch ++ ) +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.cpp +new file mode 100644 +index 00000000..86aab1e3 +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.cpp +@@ -0,0 +1,113 @@ ++//===---- LoongArchABIInfo.cpp - Information about LoongArch ABI's ------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchABIInfo.h" ++#include "LoongArchRegisterInfo.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/ADT/StringSwitch.h" ++#include "llvm/MC/MCTargetOptions.h" ++ ++using namespace llvm; ++ ++namespace { ++ ++static const MCPhysReg LoongArch64IntRegs[8] = { ++ LoongArch::A0_64, LoongArch::A1_64, LoongArch::A2_64, LoongArch::A3_64, ++ LoongArch::A4_64, LoongArch::A5_64, LoongArch::A6_64, LoongArch::A7_64}; ++} ++ ++ArrayRef LoongArchABIInfo::GetByValArgRegs() const { ++ if (IsILP32()) ++ // TODO ++ llvm_unreachable("Unimplemented ABI"); ++ if (IsLP64()) ++ return makeArrayRef(LoongArch64IntRegs); ++ llvm_unreachable("Unhandled ABI"); ++} ++ ++ArrayRef LoongArchABIInfo::GetVarArgRegs() const { ++ if (IsILP32()) ++ // TODO ++ llvm_unreachable("Unimplemented ABI"); ++ if (IsLP64()) ++ return makeArrayRef(LoongArch64IntRegs); ++ llvm_unreachable("Unhandled ABI"); ++} ++ ++LoongArchABIInfo LoongArchABIInfo::computeTargetABI(const Triple &TT, StringRef CPU, ++ const MCTargetOptions &Options) { ++ if (Options.getABIName().startswith("ilp32d")) ++ return LoongArchABIInfo::ILP32D(); ++ if (Options.getABIName().startswith("ilp32f")) ++ return LoongArchABIInfo::ILP32F(); ++ if (Options.getABIName().startswith("ilp32s")) ++ return LoongArchABIInfo::ILP32S(); ++ if (Options.getABIName().startswith("lp64d")) ++ return LoongArchABIInfo::LP64D(); ++ if (Options.getABIName().startswith("lp64s")) ++ return LoongArchABIInfo::LP64S(); ++ if (Options.getABIName().startswith("lp64f")) ++ return LoongArchABIInfo::LP64F(); ++ assert(Options.getABIName().empty() && "Unknown ABI option for LoongArch"); ++ ++ if (TT.isLoongArch64()) ++ return LoongArchABIInfo::LP64D(); ++ return LoongArchABIInfo::ILP32D(); ++} ++ ++unsigned LoongArchABIInfo::GetStackPtr() const { ++ return ArePtrs64bit() ? LoongArch::SP_64 : LoongArch::SP; ++} ++ ++unsigned LoongArchABIInfo::GetFramePtr() const { ++ return ArePtrs64bit() ? LoongArch::FP_64 : LoongArch::FP; ++} ++ ++unsigned LoongArchABIInfo::GetBasePtr() const { ++ return ArePtrs64bit() ? LoongArch::S7_64 : LoongArch::S7; ++} ++ ++unsigned LoongArchABIInfo::GetNullPtr() const { ++ return ArePtrs64bit() ? LoongArch::ZERO_64 : LoongArch::ZERO; ++} ++ ++unsigned LoongArchABIInfo::GetZeroReg() const { ++ return AreGprs64bit() ? LoongArch::ZERO_64 : LoongArch::ZERO; ++} ++ ++unsigned LoongArchABIInfo::GetPtrAddOp() const { ++ return ArePtrs64bit() ? LoongArch::ADD_D : LoongArch::ADD_W; ++} ++ ++unsigned LoongArchABIInfo::GetPtrAddiOp() const { ++ return ArePtrs64bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; ++} ++ ++unsigned LoongArchABIInfo::GetPtrSubOp() const { ++ return ArePtrs64bit() ? LoongArch::SUB_D : LoongArch::SUB_W; ++} ++ ++unsigned LoongArchABIInfo::GetPtrAndOp() const { ++ return ArePtrs64bit() ? LoongArch::AND : LoongArch::AND32; ++} ++ ++unsigned LoongArchABIInfo::GetGPRMoveOp() const { ++ return ArePtrs64bit() ? LoongArch::OR : LoongArch::OR32; ++} ++ ++unsigned LoongArchABIInfo::GetEhDataReg(unsigned I) const { ++ static const unsigned EhDataReg[] = { ++ LoongArch::A0, LoongArch::A1, LoongArch::A2, LoongArch::A3 ++ }; ++ static const unsigned EhDataReg64[] = { ++ LoongArch::A0_64, LoongArch::A1_64, LoongArch::A2_64, LoongArch::A3_64 ++ }; ++ ++ return IsLP64() ? EhDataReg64[I] : EhDataReg[I]; ++} ++ +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.h +new file mode 100644 +index 00000000..7ce36fd2 +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.h +@@ -0,0 +1,88 @@ ++//===---- LoongArchABIInfo.h - Information about LoongArch ABI's --------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHABIINFO_H ++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHABIINFO_H ++ ++#include "llvm/ADT/Triple.h" ++#include "llvm/IR/CallingConv.h" ++#include "llvm/MC/MCRegisterInfo.h" ++ ++namespace llvm { ++ ++template class ArrayRef; ++class MCTargetOptions; ++class StringRef; ++class TargetRegisterClass; ++ ++class LoongArchABIInfo { ++public: ++ enum class ABI { Unknown, ILP32D, ILP32F, ILP32S, LP64D, LP64F, LP64S }; ++ ++protected: ++ ABI ThisABI; ++ ++public: ++ LoongArchABIInfo(ABI ThisABI) : ThisABI(ThisABI) {} ++ ++ static LoongArchABIInfo Unknown() { return LoongArchABIInfo(ABI::Unknown); } ++ static LoongArchABIInfo ILP32D() { return LoongArchABIInfo(ABI::ILP32D); } ++ static LoongArchABIInfo ILP32F() { return LoongArchABIInfo(ABI::ILP32F); } ++ static LoongArchABIInfo ILP32S() { return LoongArchABIInfo(ABI::ILP32S); } ++ static LoongArchABIInfo LP64D() { return LoongArchABIInfo(ABI::LP64D); } ++ static LoongArchABIInfo LP64S() { return LoongArchABIInfo(ABI::LP64S); } ++ static LoongArchABIInfo LP64F() { return LoongArchABIInfo(ABI::LP64F); } ++ static LoongArchABIInfo computeTargetABI(const Triple &TT, StringRef CPU, ++ const MCTargetOptions &Options); ++ ++ bool IsKnown() const { return ThisABI != ABI::Unknown; } ++ bool IsILP32D() const { return ThisABI == ABI::ILP32D; } ++ bool IsILP32F() const { return ThisABI == ABI::ILP32F; } ++ bool IsILP32S() const { return ThisABI == ABI::ILP32S; } ++ bool IsILP32() const { return IsILP32D() || IsILP32F() || IsILP32S(); } ++ bool IsLP64D() const { return ThisABI == ABI::LP64D; } ++ bool IsLP64S() const { return ThisABI == ABI::LP64S; } ++ bool IsLP64F() const { return ThisABI == ABI::LP64F; } ++ bool IsLP64() const { return IsLP64D() || IsLP64S() || IsLP64F(); } ++ ABI GetEnumValue() const { return ThisABI; } ++ ++ /// The registers to use for byval arguments. ++ ArrayRef GetByValArgRegs() const; ++ ++ /// The registers to use for the variable argument list. ++ ArrayRef GetVarArgRegs() const; ++ ++ /// Ordering of ABI's ++ /// LoongArchGenSubtargetInfo.inc will use this to resolve conflicts when given ++ /// multiple ABI options. ++ bool operator<(const LoongArchABIInfo Other) const { ++ return ThisABI < Other.GetEnumValue(); ++ } ++ ++ unsigned GetStackPtr() const; ++ unsigned GetFramePtr() const; ++ unsigned GetBasePtr() const; ++ unsigned GetNullPtr() const; ++ unsigned GetZeroReg() const; ++ unsigned GetPtrAddOp() const; ++ unsigned GetPtrAddiOp() const; ++ unsigned GetPtrSubOp() const; ++ unsigned GetPtrAndOp() const; ++ unsigned GetGPRMoveOp() const; ++ inline bool ArePtrs64bit() const { ++ return IsLP64D() || IsLP64S() || IsLP64F(); ++ } ++ inline bool AreGprs64bit() const { ++ return IsLP64D() || IsLP64S() || IsLP64F(); ++ } ++ ++ unsigned GetEhDataReg(unsigned I) const; ++}; ++} ++ ++#endif +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.cpp +new file mode 100644 +index 00000000..96e43b2d +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.cpp +@@ -0,0 +1,64 @@ ++//===- LoongArchAnalyzeImmediate.cpp - Analyze Immediates -----------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchAnalyzeImmediate.h" ++#include "LoongArch.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "llvm/Support/MathExtras.h" ++ ++using namespace llvm; ++ ++LoongArchAnalyzeImmediate::InstSeq ++LoongArchAnalyzeImmediate::generateInstSeq(int64_t Val, bool Is64Bit) { ++ // Val: ++ // | hi32 | lo32 | ++ // +------------+------------------+------------------+-----------+ ++ // | Bits_52_63 | Bits_32_51 | Bits_12_31 | Bits_0_11 | ++ // +------------+------------------+------------------+-----------+ ++ // 63 52 51 32 31 12 11 0 ++ unsigned ORIOp = Is64Bit ? LoongArch::ORI : LoongArch::ORI32; ++ unsigned LU12IOp = Is64Bit ? LoongArch::LU12I_W : LoongArch::LU12I_W32; ++ unsigned ADDIOp = Is64Bit ? LoongArch::ADDI_W64 : LoongArch::ADDI_W; ++ unsigned LU32IOp = LoongArch::LU32I_D_R2; ++ unsigned LU52IOp = LoongArch::LU52I_D; ++ ++ int64_t Bits_52_63 = Val >> 52 & 0xFFF; ++ int64_t Bits_32_51 = Val >> 32 & 0xFFFFF; ++ int64_t Bits_12_31 = Val >> 12 & 0xFFFFF; ++ int64_t Bits_0_11 = Val & 0xFFF; ++ ++ InstSeq Insts; ++ ++ if (isInt<12>(Val) && Is64Bit) { ++ Insts.push_back(Inst(LoongArch::ADDI_D, SignExtend64<12>(Bits_0_11))); ++ return Insts; ++ } ++ ++ if (Bits_52_63 != 0 && SignExtend64<52>(Val) == 0) { ++ Insts.push_back(Inst(LU52IOp, SignExtend64<12>(Bits_52_63))); ++ return Insts; ++ } ++ ++ if (Bits_12_31 == 0) ++ Insts.push_back(Inst(ORIOp, Bits_0_11)); ++ else if (SignExtend32<1>(Bits_0_11 >> 11) == SignExtend32<20>(Bits_12_31)) ++ Insts.push_back(Inst(ADDIOp, SignExtend64<12>(Bits_0_11))); ++ else { ++ Insts.push_back(Inst(LU12IOp, SignExtend64<20>(Bits_12_31))); ++ if (Bits_0_11 != 0) ++ Insts.push_back(Inst(ORIOp, Bits_0_11)); ++ } ++ ++ if (SignExtend32<1>(Bits_12_31 >> 19) != SignExtend32<20>(Bits_32_51)) ++ Insts.push_back(Inst(LU32IOp, SignExtend64<20>(Bits_32_51))); ++ ++ if (SignExtend32<1>(Bits_32_51 >> 19) != SignExtend32<12>(Bits_52_63)) ++ Insts.push_back(Inst(LU52IOp, SignExtend64<12>(Bits_52_63))); ++ ++ return Insts; ++} +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.h +new file mode 100644 +index 00000000..3ff00f25 +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.h +@@ -0,0 +1,29 @@ ++//===- LoongArchAnalyzeImmediate.h - Analyze Immediates --------*- C++ -*--===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHANALYZEIMMEDIATE_H ++#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHANALYZEIMMEDIATE_H ++ ++#include "llvm/ADT/SmallVector.h" ++ ++namespace llvm { ++namespace LoongArchAnalyzeImmediate { ++struct Inst { ++ unsigned Opc; ++ int64_t Imm; ++ Inst(unsigned Opc, int64_t Imm) : Opc(Opc), Imm(Imm) {} ++}; ++using InstSeq = SmallVector; ++ ++// Helper to generate an instruction sequence that will materialise the given ++// immediate value into a register. ++InstSeq generateInstSeq(int64_t Val, bool Is64Bit); ++} // end namespace LoongArchAnalyzeImmediate ++} // end namespace llvm ++ ++#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHANALYZEIMMEDIATE_H +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +new file mode 100644 +index 00000000..9bec9b20 +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +@@ -0,0 +1,325 @@ ++//===-- LoongArchAsmBackend.cpp - LoongArch Asm Backend ----------------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file implements the LoongArchAsmBackend class. ++// ++//===----------------------------------------------------------------------===// ++// ++ ++#include "MCTargetDesc/LoongArchAsmBackend.h" ++#include "MCTargetDesc/LoongArchABIInfo.h" ++#include "MCTargetDesc/LoongArchFixupKinds.h" ++#include "MCTargetDesc/LoongArchMCExpr.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "llvm/ADT/STLExtras.h" ++#include "llvm/MC/MCAsmBackend.h" ++#include "llvm/MC/MCAssembler.h" ++#include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCDirectives.h" ++#include "llvm/MC/MCELFObjectWriter.h" ++#include "llvm/MC/MCFixupKindInfo.h" ++#include "llvm/MC/MCObjectWriter.h" ++#include "llvm/MC/MCSubtargetInfo.h" ++#include "llvm/MC/MCTargetOptions.h" ++#include "llvm/MC/MCValue.h" ++#include "llvm/Support/EndianStream.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/Format.h" ++#include "llvm/Support/MathExtras.h" ++#include "llvm/Support/raw_ostream.h" ++ ++using namespace llvm; ++ ++std::unique_ptr ++LoongArchAsmBackend::createObjectTargetWriter() const { ++ return createLoongArchELFObjectWriter(TheTriple); ++} ++ ++static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, ++ MCContext &Ctx) { ++ switch (Fixup.getTargetKind()) { ++ default: ++ llvm_unreachable("Unknown fixup kind"); ++ case LoongArch::fixup_loongarch_got_pc_hi20: ++ case LoongArch::fixup_loongarch_got_pc_lo12: ++ case LoongArch::fixup_loongarch_got64_pc_lo20: ++ case LoongArch::fixup_loongarch_got64_pc_hi12: ++ case LoongArch::fixup_loongarch_got_hi20: ++ case LoongArch::fixup_loongarch_got_lo12: ++ case LoongArch::fixup_loongarch_got64_lo20: ++ case LoongArch::fixup_loongarch_got64_hi12: ++ case LoongArch::fixup_loongarch_tls_ld_pc_hi20: ++ case LoongArch::fixup_loongarch_tls_ld_hi20: ++ case LoongArch::fixup_loongarch_tls_gd_pc_hi20: ++ case LoongArch::fixup_loongarch_tls_gd_hi20: ++ case LoongArch::fixup_loongarch_tls_ie_pc_hi20: ++ case LoongArch::fixup_loongarch_tls_ie_pc_lo12: ++ case LoongArch::fixup_loongarch_tls_ie64_pc_lo20: ++ case LoongArch::fixup_loongarch_tls_ie64_pc_hi12: ++ case LoongArch::fixup_loongarch_tls_ie_hi20: ++ case LoongArch::fixup_loongarch_tls_ie_lo12: ++ case LoongArch::fixup_loongarch_tls_ie64_lo20: ++ case LoongArch::fixup_loongarch_tls_ie64_hi12: ++ // FIXME: Sometimes, these fixup_*pcala* relocations can be evaluated ++ // directly, left to the linker for now. ++ case LoongArch::fixup_loongarch_pcala_hi20: ++ case LoongArch::fixup_loongarch_pcala_lo12: ++ case LoongArch::fixup_loongarch_pcala64_lo20: ++ case LoongArch::fixup_loongarch_pcala64_hi12: ++ llvm_unreachable("Relocation should be unconditionally forced"); ++ case FK_Data_1: ++ case FK_Data_2: ++ case FK_Data_4: ++ case FK_Data_8: ++ return Value; ++ case LoongArch::fixup_loongarch_b16: { ++ if (!isInt<18>(Value)) ++ Ctx.reportError(Fixup.getLoc(), "fixup value out of range"); ++ if (Value & 0x3) ++ Ctx.reportError(Fixup.getLoc(), "fixup value must be 4-byte aligned"); ++ return (Value >> 2) & 0xffff; ++ } ++ case LoongArch::fixup_loongarch_b21: { ++ if (!isInt<23>(Value)) ++ Ctx.reportError(Fixup.getLoc(), "fixup value out of range"); ++ if (Value & 0x3) ++ Ctx.reportError(Fixup.getLoc(), "fixup value must be 4-byte aligned"); ++ return ((Value & 0x3fffc) << 8) | ((Value >> 18) & 0x1f); ++ } ++ case LoongArch::fixup_loongarch_b26: { ++ if (!isInt<28>(Value)) ++ Ctx.reportError(Fixup.getLoc(), "fixup value out of range"); ++ if (Value & 0x3) ++ Ctx.reportError(Fixup.getLoc(), "fixup value must be 4-byte aligned"); ++ return ((Value & 0x3fffc) << 8) | ((Value >> 18) & 0x3ff); ++ } ++ case LoongArch::fixup_loongarch_abs_hi20: ++ case LoongArch::fixup_loongarch_tls_le_hi20: ++ return (Value >> 12) & 0xfffff; ++ case LoongArch::fixup_loongarch_abs_lo12: ++ case LoongArch::fixup_loongarch_tls_le_lo12: ++ return Value & 0xfff; ++ case LoongArch::fixup_loongarch_abs64_lo20: ++ case LoongArch::fixup_loongarch_tls_le64_lo20: ++ return (Value >> 32) & 0xfffff; ++ case LoongArch::fixup_loongarch_abs64_hi12: ++ case LoongArch::fixup_loongarch_tls_le64_hi12: ++ return (Value >> 52) & 0xfff; ++ } ++} ++ ++/// ApplyFixup - Apply the \p Value for given \p Fixup into the provided ++/// data fragment, at the offset specified by the fixup and following the ++/// fixup kind as appropriate. ++void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, ++ const MCValue &Target, ++ MutableArrayRef Data, uint64_t Value, ++ bool IsResolved, ++ const MCSubtargetInfo *STI) const { ++ MCFixupKind Kind = Fixup.getKind(); ++ if (Kind > FirstLiteralRelocationKind) ++ return; ++ ++ MCContext &Ctx = Asm.getContext(); ++ MCFixupKindInfo Info = getFixupKindInfo(Kind); ++ if (!Value) ++ return; // Doesn't change encoding. ++ // Apply any target-specific value adjustments. ++ Value = adjustFixupValue(Fixup, Value, Ctx); ++ ++ // Shift the value into position. ++ Value <<= Info.TargetOffset; ++ ++ unsigned Offset = Fixup.getOffset(); ++ unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8; ++ ++ assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); ++ // For each byte of the fragment that the fixup touches, mask in the ++ // bits from the fixup value. ++ for (unsigned I = 0; I != NumBytes; ++I) { ++ Data[Offset + I] |= uint8_t((Value >> (I * 8)) & 0xff); ++ } ++} ++ ++Optional LoongArchAsmBackend::getFixupKind(StringRef Name) const { ++ if (STI.getTargetTriple().isOSBinFormatELF()) { ++ auto Type = llvm::StringSwitch(Name) ++#define ELF_RELOC(X, Y) .Case(#X, Y) ++#include "llvm/BinaryFormat/ELFRelocs/LoongArch.def" ++#undef ELF_RELOC ++ .Case("BFD_RELOC_NONE", ELF::R_LARCH_NONE) ++ .Case("BFD_RELOC_32", ELF::R_LARCH_32) ++ .Case("BFD_RELOC_64", ELF::R_LARCH_64) ++ .Default(-1u); ++ if (Type != -1u) ++ return static_cast(FirstLiteralRelocationKind + Type); ++ } ++ return None; ++} ++ ++const MCFixupKindInfo &LoongArchAsmBackend:: ++getFixupKindInfo(MCFixupKind Kind) const { ++ const static MCFixupKindInfo Infos[] = { ++ // This table *must* be in same the order of fixup_* kinds in ++ // LoongArchFixupKinds.h. ++ // ++ // name offset bits flags ++ {"fixup_LARCH_NONE", 0, 0, 0}, ++ {"fixup_LARCH_32", 0, 0, 0}, ++ {"fixup_LARCH_64", 0, 0, 0}, ++ {"fixup_LARCH_RELATIVE", 0, 0, 0}, ++ {"fixup_LARCH_COPY", 0, 0, 0}, ++ {"fixup_LARCH_JUMP_SLOT", 0, 0, 0}, ++ {"fixup_LARCH_TLS_DTPMOD32", 0, 0, 0}, ++ {"fixup_LARCH_TLS_DTPMOD64", 0, 0, 0}, ++ {"fixup_LARCH_TLS_DTPREL32", 0, 0, 0}, ++ {"fixup_LARCH_TLS_DTPREL64", 0, 0, 0}, ++ {"fixup_LARCH_TLS_TPREL32", 0, 0, 0}, ++ {"fixup_LARCH_TLS_TPREL64", 0, 0, 0}, ++ {"fixup_LARCH_IRELATIVE", 0, 0, 0}, ++ {"fixup_LARCH_MARK_LA", 0, 0, 0}, ++ {"fixup_LARCH_MARK_PCREL", 0, 0, 0}, ++ {"fixup_LARCH_ADD8", 0, 0, 0}, ++ {"fixup_LARCH_ADD16", 0, 0, 0}, ++ {"fixup_LARCH_ADD24", 0, 0, 0}, ++ {"fixup_LARCH_ADD32", 0, 0, 0}, ++ {"fixup_LARCH_ADD64", 0, 0, 0}, ++ {"fixup_LARCH_SUB8", 0, 0, 0}, ++ {"fixup_LARCH_SUB16", 0, 0, 0}, ++ {"fixup_LARCH_SUB24", 0, 0, 0}, ++ {"fixup_LARCH_SUB32", 0, 0, 0}, ++ {"fixup_LARCH_SUB64", 0, 0, 0}, ++ {"fixup_LARCH_GNU_VTINHERIT", 0, 0, 0}, ++ {"fixup_LARCH_GNU_VTENTRY", 0, 0, 0}, ++ {"fixup_loongarch_b16", 10, 16, MCFixupKindInfo::FKF_IsPCRel}, ++ {"fixup_loongarch_b21", 0, 26, MCFixupKindInfo::FKF_IsPCRel}, ++ {"fixup_loongarch_b26", 0, 26, MCFixupKindInfo::FKF_IsPCRel}, ++ {"fixup_loongarch_abs_hi20", 5, 20, 0}, ++ {"fixup_loongarch_abs_lo12", 10, 12, 0}, ++ {"fixup_loongarch_abs64_lo20", 5, 20, 0}, ++ {"fixup_loongarch_abs64_hi12", 10, 12, 0}, ++ {"fixup_loongarch_pcala_hi20", 5, 20, MCFixupKindInfo::FKF_IsPCRel}, ++ {"fixup_loongarch_pcala_lo12", 10, 12, MCFixupKindInfo::FKF_IsPCRel}, ++ {"fixup_loongarch_pcala64_lo20", 5, 20, MCFixupKindInfo::FKF_IsPCRel}, ++ {"fixup_loongarch_pcala64_hi12", 10, 12, MCFixupKindInfo::FKF_IsPCRel}, ++ {"fixup_loongarch_got_pc_hi20", 5, 20, MCFixupKindInfo::FKF_IsPCRel}, ++ {"fixup_loongarch_got_pc_lo12", 10, 12, MCFixupKindInfo::FKF_IsPCRel}, ++ {"fixup_loongarch_got64_pc_lo20", 5, 20, MCFixupKindInfo::FKF_IsPCRel}, ++ {"fixup_loongarch_got64_pc_hi12", 10, 12, MCFixupKindInfo::FKF_IsPCRel}, ++ {"fixup_loongarch_got_hi20", 5, 20, 0}, ++ {"fixup_loongarch_got_lo12", 10, 12, 0}, ++ {"fixup_loongarch_got64_lo20", 5, 20, 0}, ++ {"fixup_loongarch_got64_hi12", 10, 12, 0}, ++ {"fixup_loongarch_tls_le_hi20", 5, 20, 0}, ++ {"fixup_loongarch_tls_le_lo12", 10, 12, 0}, ++ {"fixup_loongarch_tls_le64_lo20", 5, 20, 0}, ++ {"fixup_loongarch_tls_le64_hi12", 10, 12, 0}, ++ {"fixup_loongarch_tls_ie_pc_hi20", 5, 20, MCFixupKindInfo::FKF_IsPCRel}, ++ {"fixup_loongarch_tls_ie_pc_lo12", 10, 12, MCFixupKindInfo::FKF_IsPCRel}, ++ {"fixup_loongarch_tls_ie64_pc_lo20", 5, 20, MCFixupKindInfo::FKF_IsPCRel}, ++ {"fixup_loongarch_tls_ie64_pc_hi12", 10, 12, ++ MCFixupKindInfo::FKF_IsPCRel}, ++ {"fixup_loongarch_tls_ie_hi20", 5, 20, 0}, ++ {"fixup_loongarch_tls_ie_lo12", 10, 12, 0}, ++ {"fixup_loongarch_tls_ie64_lo20", 5, 20, 0}, ++ {"fixup_loongarch_tls_ie64_hi12", 10, 12, 0}, ++ {"fixup_loongarch_tls_ld_pc_hi20", 5, 20, MCFixupKindInfo::FKF_IsPCRel}, ++ {"fixup_loongarch_tls_ld_hi20", 5, 20, 0}, ++ {"fixup_loongarch_tls_gd_pc_hi20", 5, 20, MCFixupKindInfo::FKF_IsPCRel}, ++ {"fixup_loongarch_tls_gd_hi20", 5, 20, 0}, ++ }; ++ ++ static_assert((array_lengthof(Infos)) == LoongArch::NumTargetFixupKinds, ++ "Not all fixup kinds added to Infos array"); ++ ++ // Fixup kinds from .reloc directive are like R_LARCH_NONE. They ++ // do not require any extra processing. ++ if (Kind >= FirstLiteralRelocationKind) ++ return MCAsmBackend::getFixupKindInfo(FK_NONE); ++ ++ if (Kind < FirstTargetFixupKind) ++ return MCAsmBackend::getFixupKindInfo(Kind); ++ ++ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && ++ "Invalid kind!"); ++ ++ return Infos[Kind - FirstTargetFixupKind]; ++} ++ ++/// WriteNopData - Write an (optimal) nop sequence of Count bytes ++/// to the given output. If the target cannot generate such a sequence, ++/// it should return an error. ++/// ++/// \return - True on success. ++bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, ++ const MCSubtargetInfo *STI) const { ++ // Check for a less than instruction size number of bytes ++ if ((Count % 4) != 0) ++ return false; ++ ++ // The nop on LoongArch is andi r0, r0, 0. ++ for (; Count >= 4; Count -= 4) ++ support::endian::write(OS, 0x03400000, support::little); ++ ++ return true; ++} ++ ++bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, ++ const MCFixup &Fixup, ++ const MCValue &Target) { ++ if (Fixup.getKind() >= FirstLiteralRelocationKind) ++ return true; ++ const unsigned FixupKind = Fixup.getKind(); ++ switch (FixupKind) { ++ default: ++ return false; ++ // All these relocations require special processing ++ // at linking time. Delegate this work to a linker. ++ case FK_Data_1: ++ case FK_Data_2: ++ case FK_Data_4: ++ case FK_Data_8: ++ return !Target.isAbsolute(); ++ // These relocations require special processing at linking time. ++ case LoongArch::fixup_loongarch_pcala_hi20: ++ case LoongArch::fixup_loongarch_pcala_lo12: ++ case LoongArch::fixup_loongarch_pcala64_lo20: ++ case LoongArch::fixup_loongarch_pcala64_hi12: ++ case LoongArch::fixup_loongarch_got_pc_hi20: ++ case LoongArch::fixup_loongarch_got_pc_lo12: ++ case LoongArch::fixup_loongarch_got64_pc_lo20: ++ case LoongArch::fixup_loongarch_got64_pc_hi12: ++ case LoongArch::fixup_loongarch_got_hi20: ++ case LoongArch::fixup_loongarch_got_lo12: ++ case LoongArch::fixup_loongarch_got64_lo20: ++ case LoongArch::fixup_loongarch_got64_hi12: ++ case LoongArch::fixup_loongarch_tls_ld_pc_hi20: ++ case LoongArch::fixup_loongarch_tls_ld_hi20: ++ case LoongArch::fixup_loongarch_tls_gd_pc_hi20: ++ case LoongArch::fixup_loongarch_tls_gd_hi20: ++ case LoongArch::fixup_loongarch_tls_ie_pc_hi20: ++ case LoongArch::fixup_loongarch_tls_ie_pc_lo12: ++ case LoongArch::fixup_loongarch_tls_ie64_pc_lo20: ++ case LoongArch::fixup_loongarch_tls_ie64_pc_hi12: ++ case LoongArch::fixup_loongarch_tls_ie_hi20: ++ case LoongArch::fixup_loongarch_tls_ie_lo12: ++ case LoongArch::fixup_loongarch_tls_ie64_lo20: ++ case LoongArch::fixup_loongarch_tls_ie64_hi12: ++ return true; ++ } ++} ++ ++MCAsmBackend *llvm::createLoongArchAsmBackend(const Target &T, ++ const MCSubtargetInfo &STI, ++ const MCRegisterInfo &MRI, ++ const MCTargetOptions &Options) { ++ LoongArchABIInfo ABI = LoongArchABIInfo::computeTargetABI( ++ STI.getTargetTriple(), STI.getCPU(), Options); ++ return new LoongArchAsmBackend(STI, T, MRI, STI.getTargetTriple(), ++ STI.getCPU()); ++} +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +new file mode 100644 +index 00000000..db0fbb19 +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +@@ -0,0 +1,91 @@ ++//===-- LoongArchAsmBackend.h - LoongArch Asm Backend ------------------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines the LoongArchAsmBackend class. ++// ++//===----------------------------------------------------------------------===// ++// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H ++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H ++ ++#include "MCTargetDesc/LoongArchFixupKinds.h" ++#include "llvm/ADT/Triple.h" ++#include "llvm/MC/MCAsmBackend.h" ++ ++namespace llvm { ++ ++class MCAssembler; ++struct MCFixupKindInfo; ++class MCObjectWriter; ++class MCRegisterInfo; ++class MCSymbolELF; ++class Target; ++ ++class LoongArchAsmBackend : public MCAsmBackend { ++ const MCSubtargetInfo &STI; ++ Triple TheTriple; ++ ++public: ++ LoongArchAsmBackend(const MCSubtargetInfo &STI, const Target &T, ++ const MCRegisterInfo &MRI, const Triple &TT, ++ StringRef CPU) ++ : MCAsmBackend(support::little), STI(STI), TheTriple(TT) { ++ assert(TT.isLittleEndian()); ++ } ++ ++ std::unique_ptr ++ createObjectTargetWriter() const override; ++ ++ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, ++ const MCValue &Target, MutableArrayRef Data, ++ uint64_t Value, bool IsResolved, ++ const MCSubtargetInfo *STI) const override; ++ ++ Optional getFixupKind(StringRef Name) const override; ++ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; ++ ++ unsigned getNumFixupKinds() const override { ++ return LoongArch::NumTargetFixupKinds; ++ } ++ ++ /// @name Target Relaxation Interfaces ++ /// @{ ++ ++ /// MayNeedRelaxation - Check whether the given instruction may need ++ /// relaxation. ++ /// ++ /// \param Inst - The instruction to test. ++ bool mayNeedRelaxation(const MCInst &Inst, ++ const MCSubtargetInfo &STI) const override { ++ return false; ++ } ++ ++ /// fixupNeedsRelaxation - Target specific predicate for whether a given ++ /// fixup requires the associated instruction to be relaxed. ++ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, ++ const MCRelaxableFragment *DF, ++ const MCAsmLayout &Layout) const override { ++ // FIXME. ++ llvm_unreachable("RelaxInstruction() unimplemented"); ++ return false; ++ } ++ ++ /// @} ++ ++ bool writeNopData(raw_ostream &OS, uint64_t Count, ++ const MCSubtargetInfo *STI) const override; ++ ++ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, ++ const MCValue &Target) override; ++ ++}; // class LoongArchAsmBackend ++ ++} // namespace ++ ++#endif +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h +new file mode 100644 +index 00000000..707333c1 +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h +@@ -0,0 +1,128 @@ ++//===-- LoongArchBaseInfo.h - Top level definitions for LoongArch MC ------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains small standalone helper functions and enum definitions for ++// the LoongArch target useful for the compiler back-end and the MC libraries. ++// ++//===----------------------------------------------------------------------===// ++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHBASEINFO_H ++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHBASEINFO_H ++ ++#include "LoongArchFixupKinds.h" ++#include "LoongArchMCTargetDesc.h" ++#include "llvm/MC/MCExpr.h" ++#include "llvm/Support/DataTypes.h" ++#include "llvm/Support/ErrorHandling.h" ++ ++namespace llvm { ++ ++/// LoongArchII - This namespace holds all of the target specific flags that ++/// instruction info tracks. ++/// ++namespace LoongArchII { ++ /// Target Operand Flag enum. ++ enum TOF { ++ //===------------------------------------------------------------------===// ++ // LoongArch Specific MachineOperand flags. ++ ++ MO_NO_FLAG, ++ ++ /// MO_ABS_XXX - Represents the hi or low part of an absolute symbol ++ /// address. ++ MO_ABS_HI, ++ MO_ABS_LO, ++ MO_ABS_HIGHER, ++ MO_ABS_HIGHEST, ++ ++ /// MO_PCREL_XXX - Represents the hi or low part of an pc relative symbol ++ /// address. ++ MO_PCREL_HI, ++ MO_PCREL_LO, ++ // with tmp reg ++ MO_PCREL_RRHI, ++ MO_PCREL_RRLO, ++ MO_PCREL_RRHIGHER, ++ MO_PCREL_RRHIGHEST, ++ ++ // LArch Tls gd and ld ++ MO_TLSGD_HI, ++ MO_TLSGD_LO, ++ // with tmp reg ++ MO_TLSGD_RRHI, ++ MO_TLSGD_RRLO, ++ MO_TLSGD_RRHIGHER, ++ MO_TLSGD_RRHIGHEST, ++ ++ // LArch thread tprel (ie/le) ++ // LArch Tls ie ++ MO_TLSIE_HI, ++ MO_TLSIE_LO, ++ // with tmp reg ++ MO_TLSIE_RRHI, ++ MO_TLSIE_RRLO, ++ MO_TLSIE_RRHIGHER, ++ MO_TLSIE_RRHIGHEST, ++ // LArch Tls le ++ MO_TLSLE_HI, ++ MO_TLSLE_LO, ++ MO_TLSLE_HIGHER, ++ MO_TLSLE_HIGHEST, ++ ++ // Loongarch got ++ MO_GOT_HI, ++ MO_GOT_LO, ++ // with tmp reg ++ MO_GOT_RRHI, ++ MO_GOT_RRLO, ++ MO_GOT_RRHIGHER, ++ MO_GOT_RRHIGHEST, ++ ++ MO_CALL_HI, ++ MO_CALL_LO, ++ }; ++ ++ enum { ++ //===------------------------------------------------------------------===// ++ // Instruction encodings. These are the standard/most common forms for ++ // LoongArch instructions. ++ // ++ ++ // Pseudo - This represents an instruction that is a pseudo instruction ++ // or one that has not been implemented yet. It is illegal to code generate ++ // it, but tolerated for intermediate implementation stages. ++ Pseudo = 0, ++ ++ /// FrmR - This form is for instructions of the format R. ++ FrmR = 1, ++ /// FrmI - This form is for instructions of the format I. ++ FrmI = 2, ++ /// FrmJ - This form is for instructions of the format J. ++ FrmJ = 3, ++ /// FrmFR - This form is for instructions of the format FR. ++ FrmFR = 4, ++ /// FrmFI - This form is for instructions of the format FI. ++ FrmFI = 5, ++ /// FrmOther - This form is for instructions that have no specific format. ++ FrmOther = 6, ++ ++ FormMask = 15, ++ /// IsCTI - Instruction is a Control Transfer Instruction. ++ IsCTI = 1 << 4, ++ /// HasForbiddenSlot - Instruction has a forbidden slot. ++ HasForbiddenSlot = 1 << 5, ++ /// IsPCRelativeLoad - A Load instruction with implicit source register ++ /// ($pc) with explicit offset and destination register ++ IsPCRelativeLoad = 1 << 6, ++ /// HasFCCRegOperand - Instruction uses an $fcc register. ++ HasFCCRegOperand = 1 << 7 ++ ++ }; ++} ++} ++ ++#endif +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +new file mode 100644 +index 00000000..3f5b115e +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +@@ -0,0 +1,213 @@ ++//===-- LoongArchELFObjectWriter.cpp - LoongArch ELF Writer -------------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "MCTargetDesc/LoongArchFixupKinds.h" ++#include "MCTargetDesc/LoongArchMCExpr.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "llvm/ADT/STLExtras.h" ++#include "llvm/BinaryFormat/ELF.h" ++#include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCELFObjectWriter.h" ++#include "llvm/MC/MCFixup.h" ++#include "llvm/MC/MCObjectWriter.h" ++#include "llvm/MC/MCSymbolELF.h" ++#include "llvm/Support/Casting.h" ++#include "llvm/Support/Compiler.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/MathExtras.h" ++#include "llvm/Support/raw_ostream.h" ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define DEBUG_TYPE "loongarch-elf-object-writer" ++ ++using namespace llvm; ++ ++namespace { ++ ++class LoongArchELFObjectWriter : public MCELFObjectTargetWriter { ++public: ++ LoongArchELFObjectWriter(uint8_t OSABI, bool HasRelocationAddend, bool Is64); ++ ++ ~LoongArchELFObjectWriter() override = default; ++ ++ unsigned getRelocType(MCContext &Ctx, const MCValue &Target, ++ const MCFixup &Fixup, bool IsPCRel) const override; ++ bool needsRelocateWithSymbol(const MCSymbol &Sym, ++ unsigned Type) const override { ++ return true; ++ } ++}; ++ ++} // end anonymous namespace ++ ++LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, ++ bool HasRelocationAddend, bool Is64) ++ : MCELFObjectTargetWriter(Is64, OSABI, ELF::EM_LOONGARCH, HasRelocationAddend) {} ++ ++unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx, ++ const MCValue &Target, ++ const MCFixup &Fixup, ++ bool IsPCRel) const { ++ // Determine the type of the relocation. ++ ///XXX:Reloc ++ unsigned Kind = (unsigned)Fixup.getKind(); ++ const MCExpr *Expr = Fixup.getValue(); ++ ++ if (Kind >= FirstLiteralRelocationKind) ++ return Kind - FirstLiteralRelocationKind; ++ ++ switch (Kind) { ++ default: ++ return ELF::R_LARCH_NONE; ++ //llvm_unreachable("invalid fixup kind!"); ++ case FK_Data_4: ++ case LoongArch::fixup_LARCH_32: ++ if (Expr->getKind() == MCExpr::Target && ++ cast(Expr)->getKind() == ++ LoongArchMCExpr::MEK_32_PCREL) ++ return ELF::R_LARCH_32_PCREL; ++ return IsPCRel ? ELF::R_LARCH_32_PCREL : ELF::R_LARCH_32; ++ case FK_GPRel_4: ++ case FK_Data_8: ++ case LoongArch::fixup_LARCH_64: ++ return ELF::R_LARCH_64; ++ case LoongArch::fixup_LARCH_NONE: ++ return ELF::R_LARCH_NONE; ++ case LoongArch::fixup_LARCH_RELATIVE: ++ return ELF::R_LARCH_RELATIVE; ++ case LoongArch::fixup_LARCH_COPY: ++ return ELF::R_LARCH_COPY; ++ case LoongArch::fixup_LARCH_JUMP_SLOT: ++ return ELF::R_LARCH_JUMP_SLOT; ++ case LoongArch::fixup_LARCH_TLS_DTPMOD32: ++ return ELF::R_LARCH_TLS_DTPMOD32; ++ case LoongArch::fixup_LARCH_TLS_DTPMOD64: ++ return ELF::R_LARCH_TLS_DTPMOD64; ++ case LoongArch::fixup_LARCH_TLS_DTPREL32: ++ return ELF::R_LARCH_TLS_DTPREL32; ++ case LoongArch::fixup_LARCH_TLS_DTPREL64: ++ return ELF::R_LARCH_TLS_DTPREL64; ++ case LoongArch::fixup_LARCH_TLS_TPREL32: ++ return ELF::R_LARCH_TLS_TPREL32; ++ case LoongArch::fixup_LARCH_TLS_TPREL64: ++ return ELF::R_LARCH_TLS_TPREL64; ++ case LoongArch::fixup_LARCH_IRELATIVE: ++ return ELF::R_LARCH_IRELATIVE; ++ case LoongArch::fixup_LARCH_MARK_LA: ++ return ELF::R_LARCH_MARK_LA; ++ case LoongArch::fixup_LARCH_MARK_PCREL: ++ return ELF::R_LARCH_MARK_PCREL; ++ case LoongArch::fixup_LARCH_ADD8: ++ return ELF::R_LARCH_ADD8; ++ case LoongArch::fixup_LARCH_ADD16: ++ return ELF::R_LARCH_ADD16; ++ case LoongArch::fixup_LARCH_ADD32: ++ return ELF::R_LARCH_ADD32; ++ case LoongArch::fixup_LARCH_ADD64: ++ return ELF::R_LARCH_ADD64; ++ case LoongArch::fixup_LARCH_SUB8: ++ return ELF::R_LARCH_SUB8; ++ case LoongArch::fixup_LARCH_SUB16: ++ return ELF::R_LARCH_SUB16; ++ case LoongArch::fixup_LARCH_SUB24: ++ return ELF::R_LARCH_SUB24; ++ case LoongArch::fixup_LARCH_SUB32: ++ return ELF::R_LARCH_SUB32; ++ case LoongArch::fixup_LARCH_SUB64: ++ return ELF::R_LARCH_SUB64; ++ case LoongArch::fixup_LARCH_GNU_VTINHERIT: ++ return ELF::R_LARCH_GNU_VTINHERIT; ++ case LoongArch::fixup_LARCH_GNU_VTENTRY: ++ return ELF::R_LARCH_GNU_VTENTRY; ++ case LoongArch::fixup_loongarch_b16: ++ return ELF::R_LARCH_B16; ++ case LoongArch::fixup_loongarch_b21: ++ return ELF::R_LARCH_B21; ++ case LoongArch::fixup_loongarch_b26: ++ return ELF::R_LARCH_B26; ++ case LoongArch::fixup_loongarch_abs_hi20: ++ return ELF::R_LARCH_ABS_HI20; ++ case LoongArch::fixup_loongarch_abs_lo12: ++ return ELF::R_LARCH_ABS_LO12; ++ case LoongArch::fixup_loongarch_abs64_lo20: ++ return ELF::R_LARCH_ABS64_LO20; ++ case LoongArch::fixup_loongarch_abs64_hi12: ++ return ELF::R_LARCH_ABS64_HI12; ++ case LoongArch::fixup_loongarch_pcala_hi20: ++ return ELF::R_LARCH_PCALA_HI20; ++ case LoongArch::fixup_loongarch_pcala_lo12: ++ return ELF::R_LARCH_PCALA_LO12; ++ case LoongArch::fixup_loongarch_pcala64_lo20: ++ return ELF::R_LARCH_PCALA64_LO20; ++ case LoongArch::fixup_loongarch_pcala64_hi12: ++ return ELF::R_LARCH_PCALA64_HI12; ++ case LoongArch::fixup_loongarch_got_pc_hi20: ++ return ELF::R_LARCH_GOT_PC_HI20; ++ case LoongArch::fixup_loongarch_got_pc_lo12: ++ return ELF::R_LARCH_GOT_PC_LO12; ++ case LoongArch::fixup_loongarch_got64_pc_lo20: ++ return ELF::R_LARCH_GOT64_PC_LO20; ++ case LoongArch::fixup_loongarch_got64_pc_hi12: ++ return ELF::R_LARCH_GOT64_PC_HI12; ++ case LoongArch::fixup_loongarch_got_hi20: ++ return ELF::R_LARCH_GOT_HI20; ++ case LoongArch::fixup_loongarch_got_lo12: ++ return ELF::R_LARCH_GOT_LO12; ++ case LoongArch::fixup_loongarch_got64_lo20: ++ return ELF::R_LARCH_GOT64_LO20; ++ case LoongArch::fixup_loongarch_got64_hi12: ++ return ELF::R_LARCH_GOT64_HI12; ++ case LoongArch::fixup_loongarch_tls_le_hi20: ++ return ELF::R_LARCH_TLS_LE_HI20; ++ case LoongArch::fixup_loongarch_tls_le_lo12: ++ return ELF::R_LARCH_TLS_LE_LO12; ++ case LoongArch::fixup_loongarch_tls_le64_lo20: ++ return ELF::R_LARCH_TLS_LE64_LO20; ++ case LoongArch::fixup_loongarch_tls_le64_hi12: ++ return ELF::R_LARCH_TLS_LE64_HI12; ++ case LoongArch::fixup_loongarch_tls_ie_pc_hi20: ++ return ELF::R_LARCH_TLS_IE_PC_HI20; ++ case LoongArch::fixup_loongarch_tls_ie_pc_lo12: ++ return ELF::R_LARCH_TLS_IE_PC_LO12; ++ case LoongArch::fixup_loongarch_tls_ie64_pc_lo20: ++ return ELF::R_LARCH_TLS_IE64_PC_LO20; ++ case LoongArch::fixup_loongarch_tls_ie64_pc_hi12: ++ return ELF::R_LARCH_TLS_IE64_PC_HI12; ++ case LoongArch::fixup_loongarch_tls_ie_hi20: ++ return ELF::R_LARCH_TLS_IE_HI20; ++ case LoongArch::fixup_loongarch_tls_ie_lo12: ++ return ELF::R_LARCH_TLS_IE_LO12; ++ case LoongArch::fixup_loongarch_tls_ie64_lo20: ++ return ELF::R_LARCH_TLS_IE64_LO20; ++ case LoongArch::fixup_loongarch_tls_ie64_hi12: ++ return ELF::R_LARCH_TLS_IE64_HI12; ++ case LoongArch::fixup_loongarch_tls_ld_pc_hi20: ++ return ELF::R_LARCH_TLS_LD_PC_HI20; ++ case LoongArch::fixup_loongarch_tls_ld_hi20: ++ return ELF::R_LARCH_TLS_LD_HI20; ++ case LoongArch::fixup_loongarch_tls_gd_pc_hi20: ++ return ELF::R_LARCH_TLS_GD_PC_HI20; ++ case LoongArch::fixup_loongarch_tls_gd_hi20: ++ return ELF::R_LARCH_TLS_GD_HI20; ++ } ++} ++ ++std::unique_ptr ++llvm::createLoongArchELFObjectWriter(const Triple &TT) { ++ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); ++ bool IsLP64 = TT.isArch64Bit(); ++ bool HasRelocationAddend = TT.isArch64Bit(); ++ return std::make_unique(OSABI, HasRelocationAddend, ++ IsLP64); ++} +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp +new file mode 100644 +index 00000000..39fc4d77 +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp +@@ -0,0 +1,131 @@ ++//===-------- LoongArchELFStreamer.cpp - ELF Object Output ---------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchELFStreamer.h" ++#include "LoongArchFixupKinds.h" ++#include "LoongArchTargetStreamer.h" ++#include "llvm/BinaryFormat/ELF.h" ++#include "llvm/MC/MCAsmBackend.h" ++#include "llvm/MC/MCAssembler.h" ++#include "llvm/MC/MCCodeEmitter.h" ++#include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCDwarf.h" ++#include "llvm/MC/MCInst.h" ++#include "llvm/MC/MCObjectWriter.h" ++#include "llvm/MC/MCSymbolELF.h" ++#include "llvm/MC/MCValue.h" ++#include "llvm/Support/Casting.h" ++ ++using namespace llvm; ++ ++static std::pair getRelocPairForSize(unsigned Size) { ++ switch (Size) { ++ default: ++ llvm_unreachable("unsupported fixup size"); ++ case 1: ++ return std::make_pair(LoongArch::fixup_LARCH_ADD8, ++ LoongArch::fixup_LARCH_SUB8); ++ case 2: ++ return std::make_pair(LoongArch::fixup_LARCH_ADD16, ++ LoongArch::fixup_LARCH_SUB16); ++ case 4: ++ return std::make_pair(LoongArch::fixup_LARCH_ADD32, ++ LoongArch::fixup_LARCH_SUB32); ++ case 8: ++ return std::make_pair(LoongArch::fixup_LARCH_ADD64, ++ LoongArch::fixup_LARCH_SUB64); ++ } ++} ++ ++static bool requiresFixups(MCContext &C, const MCExpr *Value, ++ const MCExpr *&LHS, const MCExpr *&RHS) { ++ const auto *MBE = dyn_cast(Value); ++ if (MBE == nullptr) ++ return false; ++ ++ MCValue E; ++ if (!Value->evaluateAsRelocatable(E, nullptr, nullptr)) ++ return false; ++ if (E.getSymA() == nullptr || E.getSymB() == nullptr) ++ return false; ++ ++ const auto &A = E.getSymA()->getSymbol(); ++ const auto &B = E.getSymB()->getSymbol(); ++ ++ if (A.getName().empty() && B.getName().empty()) ++ return false; ++ ++ if (!A.isInSection() && !B.isInSection() && ++ !A.getName().empty() && !B.getName().empty()) ++ return false; ++ ++ LHS = ++ MCBinaryExpr::create(MCBinaryExpr::Add, MCSymbolRefExpr::create(&A, C), ++ MCConstantExpr::create(E.getConstant(), C), C); ++ RHS = E.getSymB(); ++ ++ return (A.isInSection() ? true : !A.getName().empty()) || ++ (B.isInSection() ? B.getSection().hasInstructions() ++ : !B.getName().empty()); ++} ++ ++ ++LoongArchELFStreamer::LoongArchELFStreamer(MCContext &Context, ++ std::unique_ptr MAB, ++ std::unique_ptr OW, ++ std::unique_ptr Emitter) ++ : MCELFStreamer(Context, std::move(MAB), std::move(OW), ++ std::move(Emitter)) { ++ } ++ ++void LoongArchELFStreamer::emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) { ++ Frame.Begin = getContext().createTempSymbol(); ++ MCELFStreamer::emitLabel(Frame.Begin); ++} ++ ++MCSymbol *LoongArchELFStreamer::emitCFILabel() { ++ MCSymbol *Label = getContext().createTempSymbol("cfi", true); ++ MCELFStreamer::emitLabel(Label); ++ return Label; ++} ++ ++void LoongArchELFStreamer::emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) { ++ Frame.End = getContext().createTempSymbol(); ++ MCELFStreamer::emitLabel(Frame.End); ++} ++ ++void LoongArchELFStreamer::emitValueImpl(const MCExpr *Value, unsigned Size, ++ SMLoc Loc) { ++ const MCExpr *A, *B; ++ if (!requiresFixups(getContext(), Value, A, B)) ++ return MCELFStreamer::emitValueImpl(Value, Size, Loc); ++ ++ MCStreamer::emitValueImpl(Value, Size, Loc); ++ ++ MCDataFragment *DF = getOrCreateDataFragment(); ++ flushPendingLabels(DF, DF->getContents().size()); ++ MCDwarfLineEntry::make(this, getCurrentSectionOnly()); ++ ++ unsigned Add, Sub; ++ std::tie(Add, Sub) = getRelocPairForSize(Size); ++ ++ DF->getFixups().push_back(MCFixup::create( ++ DF->getContents().size(), A, static_cast(Add), Loc)); ++ DF->getFixups().push_back(MCFixup::create( ++ DF->getContents().size(), B, static_cast(Sub), Loc)); ++ ++ DF->getContents().resize(DF->getContents().size() + Size, 0); ++} ++ ++MCELFStreamer *llvm::createLoongArchELFStreamer( ++ MCContext &Context, std::unique_ptr MAB, ++ std::unique_ptr OW, std::unique_ptr Emitter, ++ bool RelaxAll) { ++ return new LoongArchELFStreamer(Context, std::move(MAB), std::move(OW), ++ std::move(Emitter)); ++} +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h +new file mode 100644 +index 00000000..875cebcb +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h +@@ -0,0 +1,53 @@ ++//===- LoongArchELFStreamer.h - ELF Object Output --------------------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This is a custom MCELFStreamer which allows us to insert some hooks before ++// emitting data into an actual object file. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHELFSTREAMER_H ++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHELFSTREAMER_H ++ ++#include "llvm/ADT/SmallVector.h" ++#include "llvm/MC/MCELFStreamer.h" ++#include ++ ++namespace llvm { ++ ++class MCAsmBackend; ++class MCCodeEmitter; ++class MCContext; ++class MCSubtargetInfo; ++struct MCDwarfFrameInfo; ++ ++class LoongArchELFStreamer : public MCELFStreamer { ++ ++public: ++ LoongArchELFStreamer(MCContext &Context, std::unique_ptr MAB, ++ std::unique_ptr OW, ++ std::unique_ptr Emitter); ++ ++ /// Overriding these functions allows us to dismiss all labels. ++ void emitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override; ++ ++ // Overriding these functions allows us to avoid recording of these labels ++ // in emitLabel. ++ void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override; ++ void emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override; ++ MCSymbol *emitCFILabel() override; ++}; ++ ++MCELFStreamer *createLoongArchELFStreamer(MCContext &Context, ++ std::unique_ptr MAB, ++ std::unique_ptr OW, ++ std::unique_ptr Emitter, ++ bool RelaxAll); ++} // end namespace llvm ++ ++#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHELFSTREAMER_H +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h +new file mode 100644 +index 00000000..5ee83c84 +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h +@@ -0,0 +1,136 @@ ++//===-- LoongArchFixupKinds.h - LoongArch Specific Fixup Entries ----------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHFIXUPKINDS_H ++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHFIXUPKINDS_H ++ ++#include "llvm/MC/MCFixup.h" ++ ++namespace llvm { ++namespace LoongArch { ++// Although most of the current fixup types reflect a unique relocation ++// one can have multiple fixup types for a given relocation and thus need ++// to be uniquely named. ++// ++// This table *must* be in the same order of ++// MCFixupKindInfo Infos[LoongArch::NumTargetFixupKinds] ++// in LoongArchAsmBackend.cpp. ++// ++enum Fixups { ++ // R_LARCH_NONE. ++ fixup_LARCH_NONE = FirstTargetFixupKind, ++ ++ // reloc_hint ++ // fixup methods ++ fixup_LARCH_32, ++ fixup_LARCH_64, ++ fixup_LARCH_RELATIVE, ++ fixup_LARCH_COPY, ++ fixup_LARCH_JUMP_SLOT, ++ fixup_LARCH_TLS_DTPMOD32, ++ fixup_LARCH_TLS_DTPMOD64, ++ fixup_LARCH_TLS_DTPREL32, ++ fixup_LARCH_TLS_DTPREL64, ++ fixup_LARCH_TLS_TPREL32, ++ fixup_LARCH_TLS_TPREL64, ++ fixup_LARCH_IRELATIVE, ++ fixup_LARCH_MARK_LA, ++ fixup_LARCH_MARK_PCREL, ++ fixup_LARCH_ADD8, ++ fixup_LARCH_ADD16, ++ fixup_LARCH_ADD24, ++ fixup_LARCH_ADD32, ++ fixup_LARCH_ADD64, ++ fixup_LARCH_SUB8, ++ fixup_LARCH_SUB16, ++ fixup_LARCH_SUB24, ++ fixup_LARCH_SUB32, ++ fixup_LARCH_SUB64, ++ fixup_LARCH_GNU_VTINHERIT, ++ fixup_LARCH_GNU_VTENTRY, ++ // 16-bit fixup corresponding to %b16(foo) for instructions like bne. ++ fixup_loongarch_b16, ++ // 21-bit fixup corresponding to %b21(foo) for instructions like bnez. ++ fixup_loongarch_b21, ++ // 26-bit fixup corresponding to %b26(foo)/%plt(foo) for instructions b/bl. ++ fixup_loongarch_b26, ++ // 20-bit fixup corresponding to %abs_hi20(foo) for instruction lu12i.w. ++ fixup_loongarch_abs_hi20, ++ // 12-bit fixup corresponding to %abs_lo12(foo) for instruction ori. ++ fixup_loongarch_abs_lo12, ++ // 20-bit fixup corresponding to %abs64_lo20(foo) for instruction lu32i.d. ++ fixup_loongarch_abs64_lo20, ++ // 12-bit fixup corresponding to %abs_hi12(foo) for instruction lu52i.d. ++ fixup_loongarch_abs64_hi12, ++ // 20-bit fixup corresponding to %pc_hi20(foo) for instruction pcalau12i. ++ fixup_loongarch_pcala_hi20, ++ // 12-bit fixup corresponding to %pc_lo12(foo) for instructions like addi.w/d. ++ fixup_loongarch_pcala_lo12, ++ // 20-bit fixup corresponding to %pc64_lo20(foo) for instruction lu32i.d. ++ fixup_loongarch_pcala64_lo20, ++ // 12-bit fixup corresponding to %pc64_hi12(foo) for instruction lu52i.d. ++ fixup_loongarch_pcala64_hi12, ++ // 20-bit fixup corresponding to %got_pc_hi20(foo) for instruction pcalau12i. ++ fixup_loongarch_got_pc_hi20, ++ // 12-bit fixup corresponding to %got_pc_lo12(foo) for instructions ++ // ld.w/ld.d/add.d. ++ fixup_loongarch_got_pc_lo12, ++ // 20-bit fixup corresponding to %got64_pc_lo20(foo) for instruction lu32i.d. ++ fixup_loongarch_got64_pc_lo20, ++ // 12-bit fixup corresponding to %got64_pc_hi12(foo) for instruction lu52i.d. ++ fixup_loongarch_got64_pc_hi12, ++ // 20-bit fixup corresponding to %got_hi20(foo) for instruction lu12i.w. ++ fixup_loongarch_got_hi20, ++ // 12-bit fixup corresponding to %got_lo12(foo) for instruction ori. ++ fixup_loongarch_got_lo12, ++ // 20-bit fixup corresponding to %got64_lo20(foo) for instruction lu32i.d. ++ fixup_loongarch_got64_lo20, ++ // 12-bit fixup corresponding to %got64_hi12(foo) for instruction lu52i.d. ++ fixup_loongarch_got64_hi12, ++ // 20-bit fixup corresponding to %le_hi20(foo) for instruction lu12i.w. ++ fixup_loongarch_tls_le_hi20, ++ // 12-bit fixup corresponding to %le_lo12(foo) for instruction ori. ++ fixup_loongarch_tls_le_lo12, ++ // 20-bit fixup corresponding to %le64_lo20(foo) for instruction lu32i.d. ++ fixup_loongarch_tls_le64_lo20, ++ // 12-bit fixup corresponding to %le64_hi12(foo) for instruction lu52i.d. ++ fixup_loongarch_tls_le64_hi12, ++ // 20-bit fixup corresponding to %ie_pc_hi20(foo) for instruction pcalau12i. ++ fixup_loongarch_tls_ie_pc_hi20, ++ // 12-bit fixup corresponding to %ie_pc_lo12(foo) for instructions ++ // ld.w/ld.d/add.d. ++ fixup_loongarch_tls_ie_pc_lo12, ++ // 20-bit fixup corresponding to %ie64_pc_lo20(foo) for instruction lu32i.d. ++ fixup_loongarch_tls_ie64_pc_lo20, ++ // 12-bit fixup corresponding to %ie64_pc_hi12(foo) for instruction lu52i.d. ++ fixup_loongarch_tls_ie64_pc_hi12, ++ // 20-bit fixup corresponding to %ie_hi20(foo) for instruction lu12i.w. ++ fixup_loongarch_tls_ie_hi20, ++ // 12-bit fixup corresponding to %ie_lo12(foo) for instruction ori. ++ fixup_loongarch_tls_ie_lo12, ++ // 20-bit fixup corresponding to %ie64_lo20(foo) for instruction lu32i.d. ++ fixup_loongarch_tls_ie64_lo20, ++ // 12-bit fixup corresponding to %ie64_hi12(foo) for instruction lu52i.d. ++ fixup_loongarch_tls_ie64_hi12, ++ // 20-bit fixup corresponding to %ld_pc_hi20(foo) for instruction pcalau12i. ++ fixup_loongarch_tls_ld_pc_hi20, ++ // 20-bit fixup corresponding to %ld_hi20(foo) for instruction lu12i.w. ++ fixup_loongarch_tls_ld_hi20, ++ // 20-bit fixup corresponding to %gd_pc_hi20(foo) for instruction pcalau12i. ++ fixup_loongarch_tls_gd_pc_hi20, ++ // 20-bit fixup corresponding to %gd_hi20(foo) for instruction lu12i.w. ++ fixup_loongarch_tls_gd_hi20, ++ ++ // Marker ++ LastTargetFixupKind, ++ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind ++}; ++} // namespace LoongArch ++} // namespace llvm ++ ++#endif +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp +new file mode 100644 +index 00000000..96cbb20c +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp +@@ -0,0 +1,239 @@ ++//===-- LoongArchInstPrinter.cpp - Convert LoongArch MCInst to assembly syntax ------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This class prints an LoongArch MCInst to a .s file. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchInstPrinter.h" ++#include "MCTargetDesc/LoongArchMCExpr.h" ++#include "LoongArchInstrInfo.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "llvm/ADT/StringExtras.h" ++#include "llvm/MC/MCExpr.h" ++#include "llvm/MC/MCInst.h" ++#include "llvm/MC/MCInstrInfo.h" ++#include "llvm/MC/MCSymbol.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/raw_ostream.h" ++using namespace llvm; ++ ++#define DEBUG_TYPE "asm-printer" ++ ++#define PRINT_ALIAS_INSTR ++#include "LoongArchGenAsmWriter.inc" ++ ++template ++static bool isReg(const MCInst &MI, unsigned OpNo) { ++ assert(MI.getOperand(OpNo).isReg() && "Register operand expected."); ++ return MI.getOperand(OpNo).getReg() == R; ++} ++ ++const char* LoongArch::LoongArchFCCToString(LoongArch::CondCode CC) { ++ switch (CC) { ++ case FCOND_T: ++ case FCOND_F: return "caf"; ++ case FCOND_OR: ++ case FCOND_UN: return "cun"; ++ case FCOND_UNE: ++ case FCOND_OEQ: return "ceq"; ++ case FCOND_ONE: ++ case FCOND_UEQ: return "cueq"; ++ case FCOND_UGE: ++ case FCOND_OLT: return "clt"; ++ case FCOND_OGE: ++ case FCOND_ULT: return "cult"; ++ case FCOND_UGT: ++ case FCOND_OLE: return "cle"; ++ case FCOND_OGT: ++ case FCOND_ULE: return "cule"; ++ case FCOND_ST: ++ case FCOND_SF: return "saf"; ++ case FCOND_GLE: ++ case FCOND_NGLE:return "sun"; ++ case FCOND_SEQ: return "seq"; ++ case FCOND_SNE: return "sne"; ++ case FCOND_GL: ++ case FCOND_NGL: return "sueq"; ++ case FCOND_NLT: ++ case FCOND_LT: return "slt"; ++ case FCOND_GE: ++ case FCOND_NGE: return "sult"; ++ case FCOND_NLE: ++ case FCOND_LE: return "sle"; ++ case FCOND_GT: ++ case FCOND_NGT: return "sule"; ++ case FCOND_CNE: return "cne"; ++ case FCOND_COR: return "cor"; ++ case FCOND_SOR: return "sor"; ++ case FCOND_CUNE: return "cune"; ++ case FCOND_SUNE: return "sune"; ++ } ++ llvm_unreachable("Impossible condition code!"); ++} ++ ++void LoongArchInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { ++ OS << '$' << StringRef(getRegisterName(RegNo)).lower(); ++} ++ ++void LoongArchInstPrinter::printInst(const MCInst *MI, uint64_t Address, ++ StringRef Annot, ++ const MCSubtargetInfo &STI, ++ raw_ostream &O) { ++ switch (MI->getOpcode()) { ++ default: ++ break; ++ case LoongArch::PCALAU12I_ri: ++ case LoongArch::LU12I_W_ri: ++ printLoadAddr(MI, O); ++ return; ++ case LoongArch::ADD_D_rrr: ++ case LoongArch::LDX_D_rrr: ++ case LoongArch::ADDI_D_rri: ++ case LoongArch::LD_D_rri: ++ case LoongArch::ORI_rri: ++ case LoongArch::LU32I_D_ri: ++ case LoongArch::LU52I_D_rri: ++ O << "\t# la expanded slot"; ++ return; ++ } ++ ++ // Try to print any aliases first. ++ if (!printAliasInstr(MI, Address, O) && !printAlias(*MI, O)) ++ printInstruction(MI, Address, O); ++ printAnnotation(O, Annot); ++} ++ ++void LoongArchInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, ++ raw_ostream &O) { ++ const MCOperand &Op = MI->getOperand(OpNo); ++ if (Op.isReg()) { ++ printRegName(O, Op.getReg()); ++ return; ++ } ++ ++ if (Op.isImm()) { ++ O << formatImm(Op.getImm()); ++ return; ++ } ++ ++ assert(Op.isExpr() && "unknown operand kind in printOperand"); ++ Op.getExpr()->print(O, &MAI, true); ++} ++ ++template ++void LoongArchInstPrinter::printUImm(const MCInst *MI, int opNum, raw_ostream &O) { ++ const MCOperand &MO = MI->getOperand(opNum); ++ if (MO.isImm()) { ++ uint64_t Imm = MO.getImm(); ++ Imm -= Offset; ++ Imm &= (1 << Bits) - 1; ++ Imm += Offset; ++ O << formatImm(Imm); ++ return; ++ } ++ ++ printOperand(MI, opNum, O); ++} ++ ++void LoongArchInstPrinter:: ++printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) { ++ // Load/Store memory operands -- $reg, imm ++ printOperand(MI, opNum, O); ++ O << ", "; ++ printOperand(MI, opNum+1, O); ++} ++ ++void LoongArchInstPrinter:: ++printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O) { ++ // when using stack locations for not load/store instructions ++ // print the same way as all normal 3 operand instructions. ++ printOperand(MI, opNum, O); ++ O << ", "; ++ printOperand(MI, opNum+1, O); ++} ++ ++void LoongArchInstPrinter:: ++printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) { ++ const MCOperand& MO = MI->getOperand(opNum); ++ O << LoongArchFCCToString((LoongArch::CondCode)MO.getImm()); ++} ++ ++bool LoongArchInstPrinter::printAlias(const char *Str, const MCInst &MI, ++ unsigned OpNo, raw_ostream &OS) { ++ OS << "\t" << Str << "\t"; ++ if(MI.getOpcode() == LoongArch::JIRL) { ++ printOperand(&MI, OpNo, OS); ++ OS << "@plt"; ++ }else ++ printOperand(&MI, OpNo, OS); ++ return true; ++} ++ ++bool LoongArchInstPrinter::printAlias(const char *Str, const MCInst &MI, ++ unsigned OpNo0, unsigned OpNo1, ++ raw_ostream &OS) { ++ printAlias(Str, MI, OpNo0, OS); ++ OS << ", "; ++ printOperand(&MI, OpNo1, OS); ++ return true; ++} ++ ++bool LoongArchInstPrinter::printAlias(const MCInst &MI, raw_ostream &OS) { ++ switch (MI.getOpcode()) { ++ case LoongArch::OR: ++ // or $r0, $r1, $zero => move $r0, $r1 ++ return isReg(MI, 2) && printAlias("move", MI, 0, 1, OS); ++ default: return false; ++ } ++} ++ ++void LoongArchInstPrinter:: ++printRegisterList(const MCInst *MI, int opNum, raw_ostream &O) { ++ // - 2 because register List is always first operand of instruction and it is ++ // always followed by memory operand (base + offset). ++ for (int i = opNum, e = MI->getNumOperands() - 2; i != e; ++i) { ++ if (i != opNum) ++ O << ", "; ++ printRegName(O, MI->getOperand(i).getReg()); ++ } ++} ++ ++void LoongArchInstPrinter:: ++printLoadAddr(const MCInst *MI, raw_ostream &O) { ++ const MCOperand &Op = MI->getOperand(1); ++ const MCExpr *Expr = Op.getExpr(); ++ const LoongArchMCExpr *LoongArchExpr = cast(Expr); ++ switch (LoongArchExpr->getKind()) { ++ default: ++ llvm_unreachable("invalid handled!"); ++ return; ++ case LoongArchMCExpr::MEK_ABS_HI: ++ O << "\tla.abs\t"; ++ break; ++ case LoongArchMCExpr::MEK_GOT_HI: ++ O << "\tla.got\t"; ++ break; ++ case LoongArchMCExpr::MEK_PCREL_HI: ++ O << "\tla.pcrel\t"; ++ break; ++ case LoongArchMCExpr::MEK_TLSGD_HI: ++ O << "\tla.tls.gd\t"; ++ break; ++ case LoongArchMCExpr::MEK_TLSIE_HI: ++ O << "\tla.tls.ie\t"; ++ break; ++ case LoongArchMCExpr::MEK_TLSLE_HI: ++ O << "\tla.tls.le\t"; ++ break; ++ } ++ printRegName(O, MI->getOperand(0).getReg()); ++ O << ", "; ++ Expr->print(O, nullptr); ++ return; ++} +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h +new file mode 100644 +index 00000000..01d6d272 +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h +@@ -0,0 +1,118 @@ ++//=== LoongArchInstPrinter.h - Convert LoongArch MCInst to assembly syntax -*- C++ -*-==// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This class prints a LoongArch MCInst to a .s file. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_INSTPRINTER_LOONGARCHINSTPRINTER_H ++#define LLVM_LIB_TARGET_LOONGARCH_INSTPRINTER_LOONGARCHINSTPRINTER_H ++#include "llvm/MC/MCInstPrinter.h" ++ ++namespace llvm { ++ ++namespace LoongArch { ++// LoongArch Branch Codes ++enum FPBranchCode { ++ BRANCH_F, ++ BRANCH_T, ++ BRANCH_INVALID ++}; ++ ++// LoongArch Condition Codes ++enum CondCode { ++ FCOND_F = 0x0, ++ FCOND_SF, ++ FCOND_OLT, ++ FCOND_LT, ++ FCOND_OEQ, ++ FCOND_SEQ, ++ FCOND_OLE, ++ FCOND_LE, ++ FCOND_UN, ++ FCOND_NGLE, ++ FCOND_ULT, ++ FCOND_NGE, ++ FCOND_UEQ, ++ FCOND_NGL, ++ FCOND_ULE, ++ FCOND_NGT, ++ FCOND_CNE, ++ FCOND_SNE, ++ FCOND_COR = 0x14, ++ FCOND_SOR = 0x15, ++ FCOND_CUNE = 0x18, ++ FCOND_SUNE = 0x19, ++ ++ // To be used with float branch False ++ // This conditions have the same mnemonic as the ++ // above ones, but are used with a branch False; ++ FCOND_T, ++ FCOND_UNE, ++ FCOND_ST, ++ FCOND_UGE, ++ FCOND_NLT, ++ FCOND_UGT, ++ FCOND_NLE, ++ FCOND_OR, ++ FCOND_GLE, ++ FCOND_OGE, ++ FCOND_GE, ++ FCOND_ONE, ++ FCOND_GL, ++ FCOND_OGT, ++ FCOND_GT ++}; ++ ++const char *LoongArchFCCToString(LoongArch::CondCode CC); ++} // end namespace LoongArch ++ ++class LoongArchInstPrinter : public MCInstPrinter { ++public: ++ LoongArchInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, ++ const MCRegisterInfo &MRI) ++ : MCInstPrinter(MAI, MII, MRI) {} ++ ++ // Autogenerated by tblgen. ++ std::pair getMnemonic(const MCInst *MI) override; ++ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); ++ static const char *getRegisterName(unsigned RegNo); ++ ++ void printRegName(raw_ostream &OS, unsigned RegNo) const override; ++ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, ++ const MCSubtargetInfo &STI, raw_ostream &O) override; ++ ++ bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS); ++ void printCustomAliasOperand(const MCInst *MI, uint64_t Address, ++ unsigned OpIdx, unsigned PrintMethodIdx, ++ raw_ostream &O); ++ ++private: ++ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); ++ void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum, ++ raw_ostream &O) { ++ printOperand(MI, OpNum, O); ++ } ++ template ++ void printUImm(const MCInst *MI, int opNum, raw_ostream &O); ++ void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O); ++ void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O); ++ void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O); ++ ++ bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo, ++ raw_ostream &OS); ++ bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo0, ++ unsigned OpNo1, raw_ostream &OS); ++ bool printAlias(const MCInst &MI, raw_ostream &OS); ++ void printSaveRestore(const MCInst *MI, raw_ostream &O); ++ void printRegisterList(const MCInst *MI, int opNum, raw_ostream &O); ++ void printLoadAddr(const MCInst *MI, raw_ostream &O); ++}; ++} // end namespace llvm ++ ++#endif +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp +new file mode 100644 +index 00000000..81939927 +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp +@@ -0,0 +1,59 @@ ++//===-- LoongArchMCAsmInfo.cpp - LoongArch Asm Properties ---------------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the declarations of the LoongArchMCAsmInfo properties. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchMCAsmInfo.h" ++#include "MCTargetDesc/LoongArchMCExpr.h" ++#include "llvm/ADT/Triple.h" ++#include "llvm/BinaryFormat/Dwarf.h" ++#include "llvm/MC/MCStreamer.h" ++ ++using namespace llvm; ++ ++void LoongArchMCAsmInfo::anchor() { } ++ ++LoongArchMCAsmInfo::LoongArchMCAsmInfo(const Triple &TheTriple, ++ const MCTargetOptions &Options) { ++ ++ if (TheTriple.isLoongArch64() ++ && TheTriple.getEnvironment() != Triple::GNUABILPX32) ++ CodePointerSize = CalleeSaveStackSlotSize = 8; ++ ++ AlignmentIsInBytes = false; ++ Data16bitsDirective = "\t.half\t"; ++ Data32bitsDirective = "\t.word\t"; ++ Data64bitsDirective = "\t.dword\t"; ++ CommentString = "#"; ++ ZeroDirective = "\t.space\t"; ++ SupportsDebugInformation = true; ++ ExceptionsType = ExceptionHandling::DwarfCFI; ++ DwarfRegNumForCFI = true; ++ //HasLoongArchExpressions = true; ++ UseIntegratedAssembler = true; ++ UsesELFSectionDirectiveForBSS = true; ++} ++ ++const MCExpr * ++LoongArchMCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym, unsigned Encoding, ++ MCStreamer &Streamer) const { ++ if (!(Encoding & dwarf::DW_EH_PE_pcrel)) ++ return MCAsmInfo::getExprForFDESymbol(Sym, Encoding, Streamer); ++ ++ // The default symbol subtraction results in an ADD/SUB relocation pair. ++ // Processing this relocation pair is problematic when linker relaxation is ++ // enabled, so we follow binutils in using the R_LARCH_32_PCREL relocation ++ // for the FDE initial location. ++ MCContext &Ctx = Streamer.getContext(); ++ const MCExpr *ME = ++ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx); ++ assert(Encoding & dwarf::DW_EH_PE_sdata4 && "Unexpected encoding"); ++ return LoongArchMCExpr::create(LoongArchMCExpr::MEK_32_PCREL, ME, Ctx); ++} +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h +new file mode 100644 +index 00000000..f8ca6833 +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h +@@ -0,0 +1,34 @@ ++//===-- LoongArchMCAsmInfo.h - LoongArch Asm Info ------------------------*- C++ -*--===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file contains the declaration of the LoongArchMCAsmInfo class. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCASMINFO_H ++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCASMINFO_H ++ ++#include "llvm/MC/MCAsmInfoELF.h" ++ ++namespace llvm { ++class Triple; ++ ++class LoongArchMCAsmInfo : public MCAsmInfoELF { ++ void anchor() override; ++ ++public: ++ explicit LoongArchMCAsmInfo(const Triple &TheTriple, ++ const MCTargetOptions &Options); ++ ++ const MCExpr *getExprForFDESymbol(const MCSymbol *Sym, unsigned Encoding, ++ MCStreamer &Streamer) const override; ++}; ++ ++} // namespace llvm ++ ++#endif +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +new file mode 100644 +index 00000000..32ce6633 +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +@@ -0,0 +1,508 @@ ++//===-- LoongArchMCCodeEmitter.cpp - Convert LoongArch Code to Machine Code ---------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file implements the LoongArchMCCodeEmitter class. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchMCCodeEmitter.h" ++#include "MCTargetDesc/LoongArchFixupKinds.h" ++#include "MCTargetDesc/LoongArchMCExpr.h" ++#include "MCTargetDesc/LoongArchMCTargetDesc.h" ++#include "MCTargetDesc/LoongArchInstPrinter.h" ++#include "llvm/ADT/APFloat.h" ++#include "llvm/ADT/APInt.h" ++#include "llvm/ADT/SmallVector.h" ++#include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCExpr.h" ++#include "llvm/MC/MCFixup.h" ++#include "llvm/MC/MCInst.h" ++#include "llvm/MC/MCInstrDesc.h" ++#include "llvm/MC/MCInstrInfo.h" ++#include "llvm/MC/MCRegisterInfo.h" ++#include "llvm/MC/MCSubtargetInfo.h" ++#include "llvm/Support/Casting.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/raw_ostream.h" ++#include ++#include ++ ++using namespace llvm; ++ ++#define DEBUG_TYPE "mccodeemitter" ++ ++#define GET_INSTRMAP_INFO ++#include "LoongArchGenInstrInfo.inc" ++#undef GET_INSTRMAP_INFO ++ ++namespace llvm { ++ ++MCCodeEmitter *createLoongArchMCCodeEmitter(const MCInstrInfo &MCII, ++ const MCRegisterInfo &MRI, ++ MCContext &Ctx) { ++ return new LoongArchMCCodeEmitter(MCII, Ctx); ++} ++ ++} // end namespace llvm ++ ++void LoongArchMCCodeEmitter::EmitByte(unsigned char C, raw_ostream &OS) const { ++ OS << (char)C; ++} ++ ++void LoongArchMCCodeEmitter::EmitInstruction(uint64_t Val, unsigned Size, ++ const MCSubtargetInfo &STI, ++ raw_ostream &OS) const { ++ for (unsigned i = 0; i < Size; ++i) { ++ unsigned Shift = i * 8; ++ EmitByte((Val >> Shift) & 0xff, OS); ++ } ++} ++ ++/// encodeInstruction - Emit the instruction. ++/// Size the instruction with Desc.getSize(). ++void LoongArchMCCodeEmitter:: ++encodeInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const ++{ ++ MCInst TmpInst = MI; ++ ++ uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); ++ ++ const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode()); ++ ++ // Get byte count of instruction ++ unsigned Size = Desc.getSize(); ++ if (!Size) ++ llvm_unreachable("Desc.getSize() returns 0"); ++ ++ EmitInstruction(Binary, Size, STI, OS); ++} ++ ++/// getBranchTargetOpValue - Return binary encoding of the branch ++/// target operand. If the machine operand requires relocation, ++/// record the relocation and return zero. ++unsigned LoongArchMCCodeEmitter:: ++getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ const MCOperand &MO = MI.getOperand(OpNo); ++ ++ // If the destination is an immediate, divide by 4. ++ if (MO.isImm()) return MO.getImm() >> 2; ++ ++ assert(MO.isExpr() && ++ "getBranchTargetOpValue expects only expressions or immediates"); ++ ++ // XXX: brtarget reloc EncoderMethod. ++ const MCExpr *Expr = MO.getExpr(); ++ switch (MI.getOpcode()) { ++ default: ++ llvm_unreachable("Unhandled reloc instruction!"); ++ break; ++ case LoongArch::BEQZ: ++ case LoongArch::BEQZ32: ++ case LoongArch::BNEZ: ++ case LoongArch::BNEZ32: ++ case LoongArch::BCEQZ: ++ case LoongArch::BCNEZ: ++ Fixups.push_back( ++ MCFixup::create(0, Expr, MCFixupKind(LoongArch::fixup_loongarch_b21))); ++ break; ++ case LoongArch::BEQ: ++ case LoongArch::BEQ32: ++ case LoongArch::BNE: ++ case LoongArch::BNE32: ++ case LoongArch::BLT: ++ case LoongArch::BLT32: ++ case LoongArch::BGE: ++ case LoongArch::BGE32: ++ case LoongArch::BLTU: ++ case LoongArch::BLTU32: ++ case LoongArch::BGEU: ++ case LoongArch::BGEU32: ++ Fixups.push_back( ++ MCFixup::create(0, Expr, MCFixupKind(LoongArch::fixup_loongarch_b16))); ++ break; ++ } ++ return 0; ++} ++ ++/// getJumpTargetOpValue - Return binary encoding of the jump ++/// target operand. If the machine operand requires relocation, ++/// record the relocation and return zero. ++unsigned LoongArchMCCodeEmitter:: ++getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ const MCOperand &MO = MI.getOperand(OpNo); ++ // If the destination is an immediate, divide by 4. ++ if (MO.isImm()) return MO.getImm()>>2; ++ ++ assert(MO.isExpr() && ++ "getJumpTargetOpValue expects only expressions or an immediate"); ++ ++ const MCExpr *Expr = MO.getExpr(); ++ Fixups.push_back( ++ MCFixup::create(0, Expr, MCFixupKind(LoongArch::fixup_loongarch_b26))); ++ return 0; ++} ++ ++unsigned LoongArchMCCodeEmitter:: ++getSImm11Lsl1Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ const MCOperand &MO = MI.getOperand(OpNo); ++ if (MO.isImm()) { ++ unsigned Value = MO.getImm(); ++ return Value >> 1; ++ } ++ ++ return 0; ++} ++ ++unsigned LoongArchMCCodeEmitter:: ++getSImm10Lsl2Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ const MCOperand &MO = MI.getOperand(OpNo); ++ if (MO.isImm()) { ++ unsigned Value = MO.getImm(); ++ return Value >> 2; ++ } ++ ++ return 0; ++} ++ ++unsigned LoongArchMCCodeEmitter:: ++getSImm9Lsl3Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ const MCOperand &MO = MI.getOperand(OpNo); ++ if (MO.isImm()) { ++ unsigned Value = MO.getImm(); ++ return Value >> 3; ++ } ++ ++ return 0; ++} ++ ++unsigned LoongArchMCCodeEmitter:: ++getSImm8Lsl1Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ const MCOperand &MO = MI.getOperand(OpNo); ++ if (MO.isImm()) { ++ unsigned Value = MO.getImm(); ++ return Value >> 1; ++ } ++ ++ return 0; ++} ++ ++unsigned LoongArchMCCodeEmitter:: ++getSImm8Lsl2Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ const MCOperand &MO = MI.getOperand(OpNo); ++ if (MO.isImm()) { ++ unsigned Value = MO.getImm(); ++ return Value >> 2; ++ } ++ ++ return 0; ++} ++ ++unsigned LoongArchMCCodeEmitter:: ++getSImm8Lsl3Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ const MCOperand &MO = MI.getOperand(OpNo); ++ if (MO.isImm()) { ++ unsigned Value = MO.getImm(); ++ return Value >> 3; ++ } ++ ++ return 0; ++} ++ ++unsigned LoongArchMCCodeEmitter:: ++getExprOpValue(const MCInst &MI, const MCExpr *Expr, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ int64_t Res; ++ ++ if (Expr->evaluateAsAbsolute(Res)) ++ return Res; ++ ++ MCExpr::ExprKind Kind = Expr->getKind(); ++ if (Kind == MCExpr::Constant) { ++ return cast(Expr)->getValue(); ++ } ++ ++ if (Kind == MCExpr::Binary) { ++ unsigned Res = getExprOpValue(MI, cast(Expr)->getLHS(), Fixups, STI); ++ Res += getExprOpValue(MI, cast(Expr)->getRHS(), Fixups, STI); ++ return Res; ++ } ++ ++ if (Kind == MCExpr::Target) { ++ const LoongArchMCExpr *LoongArchExpr = cast(Expr); ++ ++ LoongArch::Fixups FixupKind = LoongArch::Fixups(0); ++ switch (LoongArchExpr->getKind()) { ++ case LoongArchMCExpr::MEK_32_PCREL: ++ case LoongArchMCExpr::MEK_None: ++ case LoongArchMCExpr::MEK_Special: ++ llvm_unreachable("Unhandled fixup kind!"); ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ break; ++ case LoongArchMCExpr::MEK_PLT: ++ FixupKind = LoongArch::fixup_loongarch_b26; ++ break; ++ case LoongArchMCExpr::MEK_GOT_HI: ++ case LoongArchMCExpr::MEK_GOT_RRHI: ++ FixupKind = LoongArch::fixup_loongarch_got_pc_hi20; ++ break; ++ case LoongArchMCExpr::MEK_GOT_LO: ++ case LoongArchMCExpr::MEK_GOT_RRLO: ++ case LoongArchMCExpr::MEK_TLSGD_LO: ++ case LoongArchMCExpr::MEK_TLSGD_RRLO: ++ FixupKind = LoongArch::fixup_loongarch_got_pc_lo12; ++ break; ++ case LoongArchMCExpr::MEK_GOT_RRHIGHER: ++ case LoongArchMCExpr::MEK_TLSGD_RRHIGHER: ++ FixupKind = LoongArch::fixup_loongarch_got64_pc_lo20; ++ break; ++ case LoongArchMCExpr::MEK_GOT_RRHIGHEST: ++ case LoongArchMCExpr::MEK_TLSGD_RRHIGHEST: ++ FixupKind = LoongArch::fixup_loongarch_got64_pc_hi12; ++ break; ++ case LoongArchMCExpr::MEK_ABS_HI: ++ FixupKind = LoongArch::fixup_loongarch_abs_hi20; ++ break; ++ case LoongArchMCExpr::MEK_ABS_LO: ++ FixupKind = LoongArch::fixup_loongarch_abs_lo12; ++ break; ++ case LoongArchMCExpr::MEK_ABS_HIGHER: ++ FixupKind = LoongArch::fixup_loongarch_abs64_lo20; ++ break; ++ case LoongArchMCExpr::MEK_ABS_HIGHEST: ++ FixupKind = LoongArch::fixup_loongarch_abs64_hi12; ++ break; ++ case LoongArchMCExpr::MEK_PCREL_HI: ++ case LoongArchMCExpr::MEK_PCREL_RRHI: ++ FixupKind = LoongArch::fixup_loongarch_pcala_hi20; ++ break; ++ case LoongArchMCExpr::MEK_PCREL_LO: ++ case LoongArchMCExpr::MEK_PCREL_RRLO: ++ FixupKind = LoongArch::fixup_loongarch_pcala_lo12; ++ break; ++ case LoongArchMCExpr::MEK_PCREL_RRHIGHER: ++ FixupKind = LoongArch::fixup_loongarch_pcala64_lo20; ++ break; ++ case LoongArchMCExpr::MEK_PCREL_RRHIGHEST: ++ FixupKind = LoongArch::fixup_loongarch_pcala64_hi12; ++ break; ++ case LoongArchMCExpr::MEK_TLSGD_HI: ++ case LoongArchMCExpr::MEK_TLSGD_RRHI: ++ FixupKind = LoongArch::fixup_loongarch_tls_gd_pc_hi20; ++ break; ++ case LoongArchMCExpr::MEK_TLSIE_HI: ++ case LoongArchMCExpr::MEK_TLSIE_RRHI: ++ FixupKind = LoongArch::fixup_loongarch_tls_ie_pc_hi20; ++ break; ++ case LoongArchMCExpr::MEK_TLSIE_LO: ++ case LoongArchMCExpr::MEK_TLSIE_RRLO: ++ FixupKind = LoongArch::fixup_loongarch_tls_ie_pc_lo12; ++ break; ++ case LoongArchMCExpr::MEK_TLSIE_RRHIGHER: ++ FixupKind = LoongArch::fixup_loongarch_tls_ie64_lo20; ++ break; ++ case LoongArchMCExpr::MEK_TLSIE_RRHIGHEST: ++ FixupKind = LoongArch::fixup_loongarch_tls_ie64_hi12; ++ break; ++ case LoongArchMCExpr::MEK_TLSLE_HI: ++ FixupKind = LoongArch::fixup_loongarch_tls_le_hi20; ++ break; ++ case LoongArchMCExpr::MEK_TLSLE_LO: ++ FixupKind = LoongArch::fixup_loongarch_tls_le_lo12; ++ break; ++ case LoongArchMCExpr::MEK_TLSLE_HIGHER: ++ FixupKind = LoongArch::fixup_loongarch_tls_le64_lo20; ++ break; ++ case LoongArchMCExpr::MEK_TLSLE_HIGHEST: ++ FixupKind = LoongArch::fixup_loongarch_tls_le64_hi12; ++ break; ++ } ++ Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); ++ return 0; ++ } ++ ++ if (Kind == MCExpr::SymbolRef) { ++ LoongArch::Fixups FixupKind = LoongArch::Fixups(0); ++ ++ switch(cast(Expr)->getKind()) { ++ default: llvm_unreachable("Unknown fixup kind!"); ++ break; ++ } ++ Fixups.push_back(MCFixup::create(0, Expr, MCFixupKind(FixupKind))); ++ return 0; ++ } ++ return 0; ++} ++ ++/// getMachineOpValue - Return binary encoding of operand. If the machine ++/// operand requires relocation, record the relocation and return zero. ++unsigned LoongArchMCCodeEmitter:: ++getMachineOpValue(const MCInst &MI, const MCOperand &MO, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ if (MO.isReg()) { ++ unsigned Reg = MO.getReg(); ++ unsigned RegNo = Ctx.getRegisterInfo()->getEncodingValue(Reg); ++ return RegNo; ++ } else if (MO.isImm()) { ++ return static_cast(MO.getImm()); ++ } else if (MO.isDFPImm()) { ++ return static_cast(bit_cast(MO.getDFPImm())); ++ } ++ // MO must be an Expr. ++ assert(MO.isExpr()); ++ return getExprOpValue(MI, MO.getExpr(),Fixups, STI); ++} ++ ++/// Return binary encoding of memory related operand. ++/// If the offset operand requires relocation, record the relocation. ++template ++unsigned LoongArchMCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ // Base register is encoded in bits 16-12, offset is encoded in bits 11-0. ++ assert(MI.getOperand(OpNo).isReg()); ++ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 12; ++ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); ++ ++ // Apply the scale factor if there is one. ++ OffBits >>= ShiftAmount; ++ ++ return (OffBits & 0xFFF) | RegBits; ++} ++ ++unsigned LoongArchMCCodeEmitter::getMemEncoding10l2(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ // Base register is encoded in bits 16-12, offset is encoded in bits 11-0. ++ assert(MI.getOperand(OpNo).isReg()); ++ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 10; ++ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); ++ ++ // Apply the scale factor if there is one. ++ OffBits >>= 2; ++ ++ return (OffBits & 0x3FF) | RegBits; ++} ++ ++unsigned LoongArchMCCodeEmitter::getMemEncoding11l1(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ // Base register is encoded in bits 16-12, offset is encoded in bits 11-0. ++ assert(MI.getOperand(OpNo).isReg()); ++ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 11; ++ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); ++ ++ // Apply the scale factor if there is one. ++ OffBits >>= 1; ++ ++ return (OffBits & 0x7FF) | RegBits; ++} ++ ++unsigned LoongArchMCCodeEmitter::getMemEncoding9l3(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ // Base register is encoded in bits 16-12, offset is encoded in bits 11-0. ++ assert(MI.getOperand(OpNo).isReg()); ++ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 9; ++ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); ++ ++ // Apply the scale factor if there is one. ++ OffBits >>= 3; ++ ++ return (OffBits & 0x1FF) | RegBits; ++} ++ ++/// Return binary encoding of simm14 memory related operand. Such as LL/SC instructions. ++/// If the offset operand requires relocation, record the relocation. ++template ++unsigned LoongArchMCCodeEmitter::getSimm14MemEncoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ // Base register is encoded in bits 18-14, offset is encoded in bits 13-0. ++ assert(MI.getOperand(OpNo).isReg()); ++ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 14; ++ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); ++ ++ // Apply the scale factor if there is one. ++ OffBits >>= ShiftAmount; ++ ++ return (OffBits & 0x3FFF) | RegBits; ++} ++ ++unsigned ++LoongArchMCCodeEmitter::getFCMPEncoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ const MCOperand& MO = MI.getOperand(OpNo); ++ switch((LoongArch::CondCode)MO.getImm()){ ++ case LoongArch::FCOND_T: ++ return 0x0; ++ case LoongArch::FCOND_OR: ++ return 0x8; ++ case LoongArch::FCOND_UNE: ++ return 0x4; ++ case LoongArch::FCOND_ONE: ++ return 0xC; ++ case LoongArch::FCOND_UGE: ++ return 0x2; ++ case LoongArch::FCOND_OGE: ++ return 0xA; ++ case LoongArch::FCOND_UGT: ++ return 0x6; ++ case LoongArch::FCOND_OGT: ++ return 0xE; ++ case LoongArch::FCOND_ST: ++ return 0x1; ++ case LoongArch::FCOND_GLE: ++ return 0x9; ++ case LoongArch::FCOND_GL: ++ return 0xD; ++ case LoongArch::FCOND_NLT: ++ return 0x3; ++ case LoongArch::FCOND_GE: ++ return 0xB; ++ case LoongArch::FCOND_NLE: ++ return 0x7; ++ case LoongArch::FCOND_GT: ++ return 0xF; ++ default: ++ return MO.getImm(); ++ } ++} ++ ++template ++unsigned ++LoongArchMCCodeEmitter::getUImmWithOffsetEncoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ assert(MI.getOperand(OpNo).isImm()); ++ unsigned Value = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI); ++ Value -= Offset; ++ return Value; ++} ++ ++#include "LoongArchGenMCCodeEmitter.inc" +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.h +new file mode 100644 +index 00000000..cb932164 +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.h +@@ -0,0 +1,142 @@ ++//===- LoongArchMCCodeEmitter.h - Convert LoongArch Code to Machine Code --*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines the LoongArchMCCodeEmitter class. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCCODEEMITTER_H ++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCCODEEMITTER_H ++ ++#include "llvm/MC/MCCodeEmitter.h" ++#include "llvm/ADT/StringRef.h" ++#include ++#include ++ ++namespace llvm { ++ ++class MCContext; ++class MCExpr; ++class MCFixup; ++class MCInst; ++class MCInstrInfo; ++class MCOperand; ++class MCSubtargetInfo; ++class raw_ostream; ++ ++class LoongArchMCCodeEmitter : public MCCodeEmitter { ++ const MCInstrInfo &MCII; ++ MCContext &Ctx; ++ ++public: ++ LoongArchMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_) ++ : MCII(mcii), Ctx(Ctx_) {} ++ LoongArchMCCodeEmitter(const LoongArchMCCodeEmitter &) = delete; ++ LoongArchMCCodeEmitter &operator=(const LoongArchMCCodeEmitter &) = delete; ++ ~LoongArchMCCodeEmitter() override = default; ++ ++ void EmitByte(unsigned char C, raw_ostream &OS) const; ++ ++ void EmitInstruction(uint64_t Val, unsigned Size, const MCSubtargetInfo &STI, ++ raw_ostream &OS) const; ++ ++ void encodeInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const override; ++ ++ // getBinaryCodeForInstr - TableGen'erated function for getting the ++ // binary encoding for an instruction. ++ uint64_t getBinaryCodeForInstr(const MCInst &MI, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ // getJumpTargetOpValue - Return binary encoding of the jump ++ // target operand. If the machine operand requires relocation, ++ // record the relocation and return zero. ++ unsigned getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ // getBranchTargetOpValue - Return binary encoding of the branch ++ // target operand. If the machine operand requires relocation, ++ // record the relocation and return zero. ++ unsigned getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ // getMachineOpValue - Return binary encoding of operand. If the machin ++ // operand requires relocation, record the relocation and return zero. ++ unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ template ++ unsigned getMemEncoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getMemEncoding10l2(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getMemEncoding11l1(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getMemEncoding9l3(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ template ++ unsigned getSimm14MemEncoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getFCMPEncoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ /// Subtract Offset then encode as a N-bit unsigned integer. ++ template ++ unsigned getUImmWithOffsetEncoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getExprOpValue(const MCInst &MI, const MCExpr *Expr, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getSImm11Lsl1Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getSImm10Lsl2Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getSImm9Lsl3Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getSImm8Lsl1Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getSImm8Lsl2Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++ unsigned getSImm8Lsl3Encoding(const MCInst &MI, unsigned OpNo, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; ++ ++}; ++ ++} // end namespace llvm ++ ++#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCCODEEMITTER_H +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp +new file mode 100644 +index 00000000..bb842538 +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp +@@ -0,0 +1,134 @@ ++//===-- LoongArchMCExpr.cpp - LoongArch specific MC expression classes --------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchMCExpr.h" ++#include "llvm/BinaryFormat/ELF.h" ++#include "llvm/MC/MCAsmInfo.h" ++#include "llvm/MC/MCAssembler.h" ++#include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCStreamer.h" ++#include "llvm/MC/MCSymbolELF.h" ++#include "llvm/MC/MCValue.h" ++#include "llvm/Support/Casting.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/MathExtras.h" ++#include "llvm/Support/raw_ostream.h" ++#include ++ ++using namespace llvm; ++ ++#define DEBUG_TYPE "loongarchmcexpr" ++ ++const LoongArchMCExpr *LoongArchMCExpr::create(LoongArchMCExpr::LoongArchExprKind Kind, ++ const MCExpr *Expr, MCContext &Ctx) { ++ return new (Ctx) LoongArchMCExpr(Kind, Expr); ++} ++ ++void LoongArchMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { ++ int64_t AbsVal; ++ if (Expr->evaluateAsAbsolute(AbsVal)) ++ OS << AbsVal; ++ else ++ Expr->print(OS, MAI, true); ++} ++ ++bool ++LoongArchMCExpr::evaluateAsRelocatableImpl(MCValue &Res, ++ const MCAsmLayout *Layout, ++ const MCFixup *Fixup) const { ++ if (!getSubExpr()->evaluateAsRelocatable(Res, nullptr, nullptr)) ++ return false; ++ ++ Res = ++ MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind()); ++ // Custom fixup types are not valid with symbol difference expressions. ++ return Res.getSymB() ? getKind() == MEK_None : true; ++} ++ ++void LoongArchMCExpr::visitUsedExpr(MCStreamer &Streamer) const { ++ Streamer.visitUsedExpr(*getSubExpr()); ++} ++ ++static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { ++ switch (Expr->getKind()) { ++ case MCExpr::Target: ++ fixELFSymbolsInTLSFixupsImpl(cast(Expr)->getSubExpr(), Asm); ++ break; ++ case MCExpr::Constant: ++ break; ++ case MCExpr::Binary: { ++ const MCBinaryExpr *BE = cast(Expr); ++ fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm); ++ fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm); ++ break; ++ } ++ case MCExpr::SymbolRef: { ++ // We're known to be under a TLS fixup, so any symbol should be ++ // modified. There should be only one. ++ const MCSymbolRefExpr &SymRef = *cast(Expr); ++ cast(SymRef.getSymbol()).setType(ELF::STT_TLS); ++ break; ++ } ++ case MCExpr::Unary: ++ fixELFSymbolsInTLSFixupsImpl(cast(Expr)->getSubExpr(), Asm); ++ break; ++ } ++} ++ ++void LoongArchMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { ++ switch (getKind()) { ++ default: ++ break; ++ case MEK_None: ++ case MEK_Special: ++ llvm_unreachable("MEK_None and MEK_Special are invalid"); ++ break; ++ case MEK_CALL_HI: ++ case MEK_CALL_LO: ++ case MEK_GOT_HI: ++ case MEK_GOT_LO: ++ case MEK_GOT_RRHI: ++ case MEK_GOT_RRLO: ++ case MEK_GOT_RRHIGHER: ++ case MEK_GOT_RRHIGHEST: ++ case MEK_ABS_HI: ++ case MEK_ABS_LO: ++ case MEK_ABS_HIGHER: ++ case MEK_ABS_HIGHEST: ++ case MEK_PCREL_HI: ++ case MEK_PCREL_LO: ++ case MEK_PCREL_RRHI: ++ case MEK_PCREL_RRHIGHER: ++ case MEK_PCREL_RRHIGHEST: ++ case MEK_PCREL_RRLO: ++ case MEK_PLT: ++ // If we do have nested target-specific expressions, they will be in ++ // a consecutive chain. ++ if (const LoongArchMCExpr *E = dyn_cast(getSubExpr())) ++ E->fixELFSymbolsInTLSFixups(Asm); ++ break; ++ case MEK_TLSGD_HI: ++ case MEK_TLSGD_LO: ++ case MEK_TLSGD_RRHI: ++ case MEK_TLSGD_RRHIGHER: ++ case MEK_TLSGD_RRHIGHEST: ++ case MEK_TLSGD_RRLO: ++ case MEK_TLSLE_HI: ++ case MEK_TLSLE_HIGHER: ++ case MEK_TLSLE_HIGHEST: ++ case MEK_TLSLE_LO: ++ case MEK_TLSIE_HI: ++ case MEK_TLSIE_LO: ++ case MEK_TLSIE_RRHI: ++ case MEK_TLSIE_RRHIGHER: ++ case MEK_TLSIE_RRHIGHEST: ++ case MEK_TLSIE_RRLO: ++ fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); ++ break; ++ } ++} +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h +new file mode 100644 +index 00000000..80592ead +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h +@@ -0,0 +1,98 @@ ++//===- LoongArchMCExpr.h - LoongArch specific MC expression classes -------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCEXPR_H ++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCEXPR_H ++ ++#include "llvm/MC/MCAsmLayout.h" ++#include "llvm/MC/MCExpr.h" ++#include "llvm/MC/MCValue.h" ++ ++namespace llvm { ++ ++class LoongArchMCExpr : public MCTargetExpr { ++public: ++ enum LoongArchExprKind { ++ MEK_None, ++ MEK_CALL_HI, ++ MEK_CALL_LO, ++ MEK_GOT_HI, ++ MEK_GOT_LO, ++ MEK_GOT_RRHI, ++ MEK_GOT_RRHIGHER, ++ MEK_GOT_RRHIGHEST, ++ MEK_GOT_RRLO, ++ MEK_ABS_HI, ++ MEK_ABS_HIGHER, ++ MEK_ABS_HIGHEST, ++ MEK_ABS_LO, ++ MEK_PCREL_HI, ++ MEK_PCREL_LO, ++ MEK_PCREL_RRHI, ++ MEK_PCREL_RRHIGHER, ++ MEK_PCREL_RRHIGHEST, ++ MEK_PCREL_RRLO, ++ MEK_TLSLE_HI, ++ MEK_TLSLE_HIGHER, ++ MEK_TLSLE_HIGHEST, ++ MEK_TLSLE_LO, ++ MEK_TLSIE_HI, ++ MEK_TLSIE_LO, ++ MEK_TLSIE_RRHI, ++ MEK_TLSIE_RRHIGHER, ++ MEK_TLSIE_RRHIGHEST, ++ MEK_TLSIE_RRLO, ++ MEK_TLSGD_HI, ++ MEK_TLSGD_LO, ++ MEK_TLSGD_RRHI, ++ MEK_TLSGD_RRHIGHER, ++ MEK_TLSGD_RRHIGHEST, ++ MEK_TLSGD_RRLO, ++ MEK_PLT, ++ MEK_32_PCREL, ++ MEK_Special, ++ }; ++ ++private: ++ const LoongArchExprKind Kind; ++ const MCExpr *Expr; ++ ++ explicit LoongArchMCExpr(LoongArchExprKind Kind, const MCExpr *Expr) ++ : Kind(Kind), Expr(Expr) {} ++ ++public: ++ static const LoongArchMCExpr *create(LoongArchExprKind Kind, const MCExpr *Expr, ++ MCContext &Ctx); ++ static const LoongArchMCExpr *createGpOff(LoongArchExprKind Kind, const MCExpr *Expr, ++ MCContext &Ctx); ++ ++ /// Get the kind of this expression. ++ LoongArchExprKind getKind() const { return Kind; } ++ ++ /// Get the child of this expression. ++ const MCExpr *getSubExpr() const { return Expr; } ++ ++ void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; ++ bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, ++ const MCFixup *Fixup) const override; ++ void visitUsedExpr(MCStreamer &Streamer) const override; ++ ++ MCFragment *findAssociatedFragment() const override { ++ return getSubExpr()->findAssociatedFragment(); ++ } ++ ++ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; ++ ++ static bool classof(const MCExpr *E) { ++ return E->getKind() == MCExpr::Target; ++ } ++}; ++ ++} // end namespace llvm ++ ++#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCEXPR_H +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +new file mode 100644 +index 00000000..3d953d43 +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +@@ -0,0 +1,187 @@ ++//===-- LoongArchMCTargetDesc.cpp - LoongArch Target Descriptions -------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file provides LoongArch specific target descriptions. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchMCTargetDesc.h" ++#include "LoongArchTargetStreamer.h" ++#include "MCTargetDesc/LoongArchAsmBackend.h" ++#include "MCTargetDesc/LoongArchELFStreamer.h" ++#include "MCTargetDesc/LoongArchInstPrinter.h" ++#include "MCTargetDesc/LoongArchMCAsmInfo.h" ++#include "TargetInfo/LoongArchTargetInfo.h" ++#include "llvm/ADT/Triple.h" ++#include "llvm/MC/MCCodeEmitter.h" ++#include "llvm/MC/MCELFStreamer.h" ++#include "llvm/MC/MCInstrAnalysis.h" ++#include "llvm/MC/MCInstrInfo.h" ++#include "llvm/MC/MCObjectWriter.h" ++#include "llvm/MC/MCRegisterInfo.h" ++#include "llvm/MC/MCSubtargetInfo.h" ++#include "llvm/MC/MCSymbol.h" ++#include "llvm/MC/MachineLocation.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/FormattedStream.h" ++#include "llvm/MC/TargetRegistry.h" ++ ++using namespace llvm; ++ ++#define GET_INSTRINFO_MC_DESC ++#include "LoongArchGenInstrInfo.inc" ++ ++#define GET_SUBTARGETINFO_MC_DESC ++#include "LoongArchGenSubtargetInfo.inc" ++ ++#define GET_REGINFO_MC_DESC ++#include "LoongArchGenRegisterInfo.inc" ++ ++/// Select the LoongArch CPU for the given triple and cpu name. ++/// FIXME: Merge with the copy in LoongArchSubtarget.cpp ++StringRef LoongArch_MC::selectLoongArchCPU(const Triple &TT, StringRef CPU) { ++ if (CPU.empty() || CPU == "generic") { ++ if (TT.isLoongArch32()) ++ CPU = "generic-la32"; ++ else ++ CPU = "la464"; ++ } ++ return CPU; ++} ++ ++static MCInstrInfo *createLoongArchMCInstrInfo() { ++ MCInstrInfo *X = new MCInstrInfo(); ++ InitLoongArchMCInstrInfo(X); ++ return X; ++} ++ ++static MCRegisterInfo *createLoongArchMCRegisterInfo(const Triple &TT) { ++ MCRegisterInfo *X = new MCRegisterInfo(); ++ InitLoongArchMCRegisterInfo(X, LoongArch::RA); ++ return X; ++} ++ ++static MCSubtargetInfo *createLoongArchMCSubtargetInfo(const Triple &TT, ++ StringRef CPU, StringRef FS) { ++ CPU = LoongArch_MC::selectLoongArchCPU(TT, CPU); ++ return createLoongArchMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); ++} ++ ++static MCAsmInfo *createLoongArchMCAsmInfo(const MCRegisterInfo &MRI, ++ const Triple &TT, ++ const MCTargetOptions &Options) { ++ MCAsmInfo *MAI = new LoongArchMCAsmInfo(TT, Options); ++ ++ unsigned SP = MRI.getDwarfRegNum(LoongArch::SP, true); ++ MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, SP, 0); ++ MAI->addInitialFrameState(Inst); ++ ++ return MAI; ++} ++ ++static MCInstPrinter *createLoongArchMCInstPrinter(const Triple &T, ++ unsigned SyntaxVariant, ++ const MCAsmInfo &MAI, ++ const MCInstrInfo &MII, ++ const MCRegisterInfo &MRI) { ++ return new LoongArchInstPrinter(MAI, MII, MRI); ++} ++ ++static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context, ++ std::unique_ptr &&MAB, ++ std::unique_ptr &&OW, ++ std::unique_ptr &&Emitter, ++ bool RelaxAll) { ++ MCStreamer *S; ++ S = createLoongArchELFStreamer(Context, std::move(MAB), std::move(OW), ++ std::move(Emitter), RelaxAll); ++ return S; ++} ++ ++static MCTargetStreamer *createLoongArchAsmTargetStreamer(MCStreamer &S, ++ formatted_raw_ostream &OS, ++ MCInstPrinter *InstPrint, ++ bool isVerboseAsm) { ++ return new LoongArchTargetAsmStreamer(S, OS); ++} ++ ++static MCTargetStreamer *createLoongArchNullTargetStreamer(MCStreamer &S) { ++ return new LoongArchTargetStreamer(S); ++} ++ ++static MCTargetStreamer * ++createLoongArchObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { ++ return new LoongArchTargetELFStreamer(S, STI); ++} ++ ++namespace { ++ ++class LoongArchMCInstrAnalysis : public MCInstrAnalysis { ++public: ++ LoongArchMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} ++ ++ bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, ++ uint64_t &Target) const override { ++ unsigned NumOps = Inst.getNumOperands(); ++ if (NumOps == 0) ++ return false; ++ if (Info->get(Inst.getOpcode()).isBranch() || Inst.getOpcode() == LoongArch::BL) { ++ // just not jirl ++ Target = Addr + Inst.getOperand(NumOps - 1).getImm(); ++ return true; ++ } else { ++ return false; ++ } ++ } ++}; ++} ++ ++static MCInstrAnalysis *createLoongArchMCInstrAnalysis(const MCInstrInfo *Info) { ++ return new LoongArchMCInstrAnalysis(Info); ++} ++ ++extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTargetMC() { ++ for (Target *T : {&getTheLoongArch32Target(), &getTheLoongArch64Target()}) { ++ // Register the MC asm info. ++ RegisterMCAsmInfoFn X(*T, createLoongArchMCAsmInfo); ++ ++ // Register the MC instruction info. ++ TargetRegistry::RegisterMCInstrInfo(*T, createLoongArchMCInstrInfo); ++ ++ // Register the MC register info. ++ TargetRegistry::RegisterMCRegInfo(*T, createLoongArchMCRegisterInfo); ++ ++ // Register the elf streamer. ++ TargetRegistry::RegisterELFStreamer(*T, createMCStreamer); ++ ++ // Register the asm target streamer. ++ TargetRegistry::RegisterAsmTargetStreamer(*T, createLoongArchAsmTargetStreamer); ++ ++ TargetRegistry::RegisterNullTargetStreamer(*T, ++ createLoongArchNullTargetStreamer); ++ ++ // Register the MC subtarget info. ++ TargetRegistry::RegisterMCSubtargetInfo(*T, createLoongArchMCSubtargetInfo); ++ ++ // Register the MC instruction analyzer. ++ TargetRegistry::RegisterMCInstrAnalysis(*T, createLoongArchMCInstrAnalysis); ++ ++ // Register the MCInstPrinter. ++ TargetRegistry::RegisterMCInstPrinter(*T, createLoongArchMCInstPrinter); ++ ++ TargetRegistry::RegisterObjectTargetStreamer( ++ *T, createLoongArchObjectTargetStreamer); ++ ++ // Register the asm backend. ++ TargetRegistry::RegisterMCAsmBackend(*T, createLoongArchAsmBackend); ++ } ++ ++ // Register the MC Code Emitter ++ for (Target *T : {&getTheLoongArch32Target(), &getTheLoongArch64Target()}) ++ TargetRegistry::RegisterMCCodeEmitter(*T, createLoongArchMCCodeEmitter); ++} +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h +new file mode 100644 +index 00000000..56949ef1 +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h +@@ -0,0 +1,68 @@ ++//===-- LoongArchMCTargetDesc.h - LoongArch Target Descriptions -----------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file provides LoongArch specific target descriptions. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCTARGETDESC_H ++#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCTARGETDESC_H ++ ++#include "llvm/Support/DataTypes.h" ++ ++#include ++ ++namespace llvm { ++class MCAsmBackend; ++class MCCodeEmitter; ++class MCContext; ++class MCInstrInfo; ++class MCObjectTargetWriter; ++class MCRegisterInfo; ++class MCSubtargetInfo; ++class MCTargetOptions; ++class StringRef; ++class Target; ++class Triple; ++class raw_ostream; ++class raw_pwrite_stream; ++ ++Target &getTheLoongArch32Target(); ++Target &getTheLoongArch64Target(); ++ ++MCCodeEmitter *createLoongArchMCCodeEmitter(const MCInstrInfo &MCII, ++ const MCRegisterInfo &MRI, ++ MCContext &Ctx); ++ ++MCAsmBackend *createLoongArchAsmBackend(const Target &T, ++ const MCSubtargetInfo &STI, ++ const MCRegisterInfo &MRI, ++ const MCTargetOptions &Options); ++ ++std::unique_ptr ++createLoongArchELFObjectWriter(const Triple &TT); ++ ++namespace LoongArch_MC { ++StringRef selectLoongArchCPU(const Triple &TT, StringRef CPU); ++} ++ ++} // End llvm namespace ++ ++// Defines symbolic names for LoongArch registers. This defines a mapping from ++// register name to register number. ++#define GET_REGINFO_ENUM ++#include "LoongArchGenRegisterInfo.inc" ++ ++// Defines symbolic names for the LoongArch instructions. ++#define GET_INSTRINFO_ENUM ++#include "LoongArchGenInstrInfo.inc" ++ ++#define GET_SUBTARGETINFO_ENUM ++#include "LoongArchGenSubtargetInfo.inc" ++ ++#endif +diff --git a/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp b/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp +new file mode 100644 +index 00000000..d5b6c95a +--- /dev/null ++++ b/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp +@@ -0,0 +1,319 @@ ++//===-- LoongArchTargetStreamer.cpp - LoongArch Target Streamer Methods -------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file provides LoongArch specific target streamer methods. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "LoongArchABIInfo.h" ++#include "LoongArchELFStreamer.h" ++#include "LoongArchInstPrinter.h" ++#include "LoongArchMCExpr.h" ++#include "LoongArchMCTargetDesc.h" ++#include "LoongArchTargetObjectFile.h" ++#include "LoongArchTargetStreamer.h" ++#include "llvm/BinaryFormat/ELF.h" ++#include "llvm/MC/MCContext.h" ++#include "llvm/MC/MCSectionELF.h" ++#include "llvm/MC/MCSubtargetInfo.h" ++#include "llvm/MC/MCSymbolELF.h" ++#include "llvm/Support/CommandLine.h" ++#include "llvm/Support/ErrorHandling.h" ++#include "llvm/Support/FormattedStream.h" ++ ++using namespace llvm; ++ ++namespace { ++static cl::opt RoundSectionSizes( ++ "loongarch-round-section-sizes", cl::init(false), ++ cl::desc("Round section sizes up to the section alignment"), cl::Hidden); ++} // end anonymous namespace ++ ++LoongArchTargetStreamer::LoongArchTargetStreamer(MCStreamer &S) ++ : MCTargetStreamer(S), ModuleDirectiveAllowed(true) { ++ GPRInfoSet = FPRInfoSet = FrameInfoSet = false; ++} ++void LoongArchTargetStreamer::emitDirectiveOptionPic0() {} ++void LoongArchTargetStreamer::emitDirectiveOptionPic2() {} ++void LoongArchTargetStreamer::emitDirectiveSetArch(StringRef Arch) { ++ forbidModuleDirective(); ++} ++void LoongArchTargetStreamer::emitDirectiveSetLoongArch32() { forbidModuleDirective(); } ++void LoongArchTargetStreamer::emitDirectiveSetloongarch64() { forbidModuleDirective(); } ++ ++void LoongArchTargetStreamer::emitR(unsigned Opcode, unsigned Reg0, SMLoc IDLoc, ++ const MCSubtargetInfo *STI) { ++ MCInst TmpInst; ++ TmpInst.setOpcode(Opcode); ++ TmpInst.addOperand(MCOperand::createReg(Reg0)); ++ TmpInst.setLoc(IDLoc); ++ getStreamer().emitInstruction(TmpInst, *STI); ++} ++ ++void LoongArchTargetStreamer::emitRXX(unsigned Opcode, unsigned Reg0, MCOperand Op1, ++ MCOperand Op2, SMLoc IDLoc, const MCSubtargetInfo *STI) { ++ MCInst TmpInst; ++ TmpInst.setOpcode(Opcode); ++ TmpInst.addOperand(MCOperand::createReg(Reg0)); ++ TmpInst.addOperand(Op1); ++ TmpInst.addOperand(Op2); ++ TmpInst.setLoc(IDLoc); ++ getStreamer().emitInstruction(TmpInst, *STI); ++} ++ ++void LoongArchTargetStreamer::emitRRXX(unsigned Opcode, unsigned Reg0, unsigned Reg1, ++ MCOperand Op2, MCOperand Op3, SMLoc IDLoc, ++ const MCSubtargetInfo *STI) { ++ MCInst TmpInst; ++ TmpInst.setOpcode(Opcode); ++ TmpInst.addOperand(MCOperand::createReg(Reg0)); ++ TmpInst.addOperand(MCOperand::createReg(Reg1)); ++ TmpInst.addOperand(Op2); ++ TmpInst.addOperand(Op3); ++ TmpInst.setLoc(IDLoc); ++ getStreamer().emitInstruction(TmpInst, *STI); ++} ++ ++void LoongArchTargetStreamer::emitRX(unsigned Opcode, unsigned Reg0, MCOperand Op1, ++ SMLoc IDLoc, const MCSubtargetInfo *STI) { ++ MCInst TmpInst; ++ TmpInst.setOpcode(Opcode); ++ TmpInst.addOperand(MCOperand::createReg(Reg0)); ++ TmpInst.addOperand(Op1); ++ TmpInst.setLoc(IDLoc); ++ getStreamer().emitInstruction(TmpInst, *STI); ++} ++ ++void LoongArchTargetStreamer::emitRI(unsigned Opcode, unsigned Reg0, int32_t Imm, ++ SMLoc IDLoc, const MCSubtargetInfo *STI) { ++ emitRX(Opcode, Reg0, MCOperand::createImm(Imm), IDLoc, STI); ++} ++ ++void LoongArchTargetStreamer::emitRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, ++ SMLoc IDLoc, const MCSubtargetInfo *STI) { ++ emitRX(Opcode, Reg0, MCOperand::createReg(Reg1), IDLoc, STI); ++} ++ ++void LoongArchTargetStreamer::emitII(unsigned Opcode, int16_t Imm1, int16_t Imm2, ++ SMLoc IDLoc, const MCSubtargetInfo *STI) { ++ MCInst TmpInst; ++ TmpInst.setOpcode(Opcode); ++ TmpInst.addOperand(MCOperand::createImm(Imm1)); ++ TmpInst.addOperand(MCOperand::createImm(Imm2)); ++ TmpInst.setLoc(IDLoc); ++ getStreamer().emitInstruction(TmpInst, *STI); ++} ++ ++void LoongArchTargetStreamer::emitRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1, ++ MCOperand Op2, SMLoc IDLoc, ++ const MCSubtargetInfo *STI) { ++ MCInst TmpInst; ++ TmpInst.setOpcode(Opcode); ++ TmpInst.addOperand(MCOperand::createReg(Reg0)); ++ TmpInst.addOperand(MCOperand::createReg(Reg1)); ++ TmpInst.addOperand(Op2); ++ TmpInst.setLoc(IDLoc); ++ getStreamer().emitInstruction(TmpInst, *STI); ++} ++ ++void LoongArchTargetStreamer::emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, ++ unsigned Reg2, SMLoc IDLoc, ++ const MCSubtargetInfo *STI) { ++ emitRRX(Opcode, Reg0, Reg1, MCOperand::createReg(Reg2), IDLoc, STI); ++} ++ ++void LoongArchTargetStreamer::emitRRI(unsigned Opcode, unsigned Reg0, ++ unsigned Reg1, int32_t Imm, SMLoc IDLoc, ++ const MCSubtargetInfo *STI) { ++ emitRRX(Opcode, Reg0, Reg1, MCOperand::createImm(Imm), IDLoc, STI); ++} ++ ++void LoongArchTargetStreamer::emitRRIII(unsigned Opcode, unsigned Reg0, ++ unsigned Reg1, int16_t Imm0, int16_t Imm1, ++ int16_t Imm2, SMLoc IDLoc, ++ const MCSubtargetInfo *STI) { ++ MCInst TmpInst; ++ TmpInst.setOpcode(Opcode); ++ TmpInst.addOperand(MCOperand::createReg(Reg0)); ++ TmpInst.addOperand(MCOperand::createReg(Reg1)); ++ TmpInst.addOperand(MCOperand::createImm(Imm0)); ++ TmpInst.addOperand(MCOperand::createImm(Imm1)); ++ TmpInst.addOperand(MCOperand::createImm(Imm2)); ++ TmpInst.setLoc(IDLoc); ++ getStreamer().emitInstruction(TmpInst, *STI); ++} ++ ++void LoongArchTargetStreamer::emitAdd(unsigned DstReg, unsigned SrcReg, ++ unsigned TrgReg, bool Is64Bit, ++ const MCSubtargetInfo *STI) { ++ emitRRR(Is64Bit ? LoongArch::ADD_D : LoongArch::ADD_W, DstReg, SrcReg, TrgReg, SMLoc(), ++ STI); ++} ++ ++void LoongArchTargetStreamer::emitDSLL(unsigned DstReg, unsigned SrcReg, ++ int16_t ShiftAmount, SMLoc IDLoc, ++ const MCSubtargetInfo *STI) { ++ if (ShiftAmount >= 32) { ++ emitRRI(LoongArch::SLLI_D, DstReg, SrcReg, ShiftAmount - 32, IDLoc, STI); ++ return; ++ } ++ ++ emitRRI(LoongArch::SLLI_D, DstReg, SrcReg, ShiftAmount, IDLoc, STI); ++} ++ ++void LoongArchTargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) { ++ emitRRI(LoongArch::ANDI, LoongArch::ZERO, LoongArch::ZERO, 0, IDLoc, STI); ++} ++ ++LoongArchTargetAsmStreamer::LoongArchTargetAsmStreamer(MCStreamer &S, ++ formatted_raw_ostream &OS) ++ : LoongArchTargetStreamer(S), OS(OS) {} ++ ++void LoongArchTargetAsmStreamer::emitDirectiveOptionPic0() { ++ OS << "\t.option\tpic0\n"; ++} ++ ++void LoongArchTargetAsmStreamer::emitDirectiveOptionPic2() { ++ OS << "\t.option\tpic2\n"; ++} ++ ++void LoongArchTargetAsmStreamer::emitDirectiveSetArch(StringRef Arch) { ++ OS << "\t.set arch=" << Arch << "\n"; ++ LoongArchTargetStreamer::emitDirectiveSetArch(Arch); ++} ++ ++void LoongArchTargetAsmStreamer::emitDirectiveSetLoongArch32() { ++ //OS << "\t.set\tloongarch32\n"; ++ LoongArchTargetStreamer::emitDirectiveSetLoongArch32(); ++} ++ ++void LoongArchTargetAsmStreamer::emitDirectiveSetloongarch64() { ++ //OS << "\t.set\tloongarch64\n"; ++ LoongArchTargetStreamer::emitDirectiveSetloongarch64(); ++} ++ ++// This part is for ELF object output. ++LoongArchTargetELFStreamer::LoongArchTargetELFStreamer(MCStreamer &S, ++ const MCSubtargetInfo &STI) ++ : LoongArchTargetStreamer(S), STI(STI) { ++ MCAssembler &MCA = getStreamer().getAssembler(); ++ ++ // It's possible that MCObjectFileInfo isn't fully initialized at this point ++ // due to an initialization order problem where LLVMTargetMachine creates the ++ // target streamer before TargetLoweringObjectFile calls ++ // InitializeMCObjectFileInfo. There doesn't seem to be a single place that ++ // covers all cases so this statement covers most cases and direct object ++ // emission must call setPic() once MCObjectFileInfo has been initialized. The ++ // cases we don't handle here are covered by LoongArchAsmPrinter. ++ Pic = MCA.getContext().getObjectFileInfo()->isPositionIndependent(); ++ ++ // FIXME: Fix a dependency issue by instantiating the ABI object to some ++ // default based off the triple. The triple doesn't describe the target ++ // fully, but any external user of the API that uses the MCTargetStreamer ++ // would otherwise crash on assertion failure. ++ ++ ABI = LoongArchABIInfo( ++ STI.getTargetTriple().getArch() == Triple::ArchType::loongarch32 ++ ? LoongArchABIInfo::ILP32D() ++ : LoongArchABIInfo::LP64D()); ++ ++} ++ ++void LoongArchTargetELFStreamer::emitLabel(MCSymbol *S) { ++ auto *Symbol = cast(S); ++ getStreamer().getAssembler().registerSymbol(*Symbol); ++ uint8_t Type = Symbol->getType(); ++ if (Type != ELF::STT_FUNC) ++ return; ++ ++} ++ ++void LoongArchTargetELFStreamer::finish() { ++ MCAssembler &MCA = getStreamer().getAssembler(); ++ const MCObjectFileInfo &OFI = *MCA.getContext().getObjectFileInfo(); ++ ++ // .bss, .text and .data are always at least 16-byte aligned. ++ MCSection &TextSection = *OFI.getTextSection(); ++ MCA.registerSection(TextSection); ++ MCSection &DataSection = *OFI.getDataSection(); ++ MCA.registerSection(DataSection); ++ MCSection &BSSSection = *OFI.getBSSSection(); ++ MCA.registerSection(BSSSection); ++ ++ TextSection.setAlignment(Align(std::max(16u, TextSection.getAlignment()))); ++ DataSection.setAlignment(Align(std::max(16u, DataSection.getAlignment()))); ++ BSSSection.setAlignment(Align(std::max(16u, BSSSection.getAlignment()))); ++ ++ if (RoundSectionSizes) { ++ // Make sections sizes a multiple of the alignment. This is useful for ++ // verifying the output of IAS against the output of other assemblers but ++ // it's not necessary to produce a correct object and increases section ++ // size. ++ MCStreamer &OS = getStreamer(); ++ for (MCSection &S : MCA) { ++ MCSectionELF &Section = static_cast(S); ++ ++ unsigned Alignment = Section.getAlignment(); ++ if (Alignment) { ++ OS.SwitchSection(&Section); ++ if (Section.UseCodeAlign()) ++ OS.emitCodeAlignment(Alignment, &STI, Alignment); ++ else ++ OS.emitValueToAlignment(Alignment, 0, 1, Alignment); ++ } ++ } ++ } ++ ++ // Update e_header flags. See the FIXME and comment above in ++ // the constructor for a full rundown on this. ++ unsigned EFlags = MCA.getELFHeaderEFlags(); ++ ++ // ABI ++ // LP64D does not require any ABI bits. ++ if (getABI().IsILP32S()) ++ EFlags |= ELF::EF_LARCH_BASE_ABI_ILP32S; ++ else if (getABI().IsILP32F()) ++ EFlags |= ELF::EF_LARCH_BASE_ABI_ILP32F; ++ else if (getABI().IsILP32D()) ++ EFlags |= ELF::EF_LARCH_BASE_ABI_ILP32D; ++ else if (getABI().IsLP64S()) ++ EFlags |= ELF::EF_LARCH_BASE_ABI_LP64S; ++ else if (getABI().IsLP64F()) ++ EFlags |= ELF::EF_LARCH_BASE_ABI_LP64F; ++ else if (getABI().IsLP64D()) ++ EFlags |= ELF::EF_LARCH_BASE_ABI_LP64D; ++ ++ MCA.setELFHeaderEFlags(EFlags); ++} ++ ++MCELFStreamer &LoongArchTargetELFStreamer::getStreamer() { ++ return static_cast(Streamer); ++} ++ ++void LoongArchTargetELFStreamer::emitDirectiveOptionPic0() { ++ MCAssembler &MCA = getStreamer().getAssembler(); ++ unsigned Flags = MCA.getELFHeaderEFlags(); ++ // This option overrides other PIC options like -KPIC. ++ Pic = false; ++ ///XXX:Reloc no this flags ++ //Flags &= ~ELF::EF_LOONGARCH_PIC; ++ MCA.setELFHeaderEFlags(Flags); ++} ++ ++void LoongArchTargetELFStreamer::emitDirectiveOptionPic2() { ++ MCAssembler &MCA = getStreamer().getAssembler(); ++ unsigned Flags = MCA.getELFHeaderEFlags(); ++ Pic = true; ++ // NOTE: We are following the GAS behaviour here which means the directive ++ // 'pic2' also sets the CPIC bit in the ELF header. This is different from ++ // what is stated in the SYSV ABI which consider the bits EF_LOONGARCH_PIC and ++ // EF_LOONGARCH_CPIC to be mutually exclusive. ++ ///XXX:Reloc no this flags ++ //Flags |= ELF::EF_LOONGARCH_PIC | ELF::EF_LOONGARCH_CPIC; ++ MCA.setELFHeaderEFlags(Flags); ++} +diff --git a/lib/Target/LoongArch/TargetInfo/CMakeLists.txt b/lib/Target/LoongArch/TargetInfo/CMakeLists.txt +new file mode 100644 +index 00000000..f53ddba4 +--- /dev/null ++++ b/lib/Target/LoongArch/TargetInfo/CMakeLists.txt +@@ -0,0 +1,9 @@ ++add_llvm_component_library(LLVMLoongArchInfo ++ LoongArchTargetInfo.cpp ++ ++ LINK_COMPONENTS ++ Support ++ ++ ADD_TO_COMPONENT ++ LoongArch ++ ) +diff --git a/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp b/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp +new file mode 100644 +index 00000000..e6b84518 +--- /dev/null ++++ b/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp +@@ -0,0 +1,34 @@ ++//===-- LoongArchTargetInfo.cpp - LoongArch Target Implementation -------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "TargetInfo/LoongArchTargetInfo.h" ++#include "llvm/MC/TargetRegistry.h" ++using namespace llvm; ++ ++Target &llvm::getTheLoongArch32Target() { ++ static Target TheLoongArch32Target; ++ return TheLoongArch32Target; ++} ++ ++Target &llvm::getTheLoongArch64Target() { ++ static Target TheLoongArch64Target; ++ return TheLoongArch64Target; ++} ++ ++extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTargetInfo() { ++#if 0 ++ //TODO: support it in futrue ++ RegisterTarget ++ X(getTheLoongArch32Target(), "loongarch32", "LoongArch (32-bit)", "LoongArch"); ++#endif ++ RegisterTarget ++ A(getTheLoongArch64Target(), "loongarch64", "LoongArch (64-bit)", ++ "LoongArch"); ++} +diff --git a/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h b/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h +new file mode 100644 +index 00000000..7dce2497 +--- /dev/null ++++ b/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h +@@ -0,0 +1,21 @@ ++//===-- LoongArchTargetInfo.h - LoongArch Target Implementation -----------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_LIB_TARGET_LOONGARCH_TARGETINFO_LOONGARCHTARGETINFO_H ++#define LLVM_LIB_TARGET_LOONGARCH_TARGETINFO_LOONGARCHTARGETINFO_H ++ ++namespace llvm { ++ ++class Target; ++ ++Target &getTheLoongArch32Target(); ++Target &getTheLoongArch64Target(); ++ ++} // namespace llvm ++ ++#endif // LLVM_LIB_TARGET_LOONGARCH_TARGETINFO_LOONGARCHTARGETINFO_H +diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp +index 8f94172a..b5de804a 100644 +--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp ++++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp +@@ -108,6 +108,7 @@ static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 44; + static const uint64_t kSystemZ_ShadowOffset64 = 1ULL << 52; + static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000; + static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37; ++static const uint64_t kLoongArch64_ShadowOffset64 = 1ULL << 37; + static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36; + static const uint64_t kRISCV64_ShadowOffset64 = 0xd55550000; + static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30; +@@ -477,6 +478,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, + bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64; + bool IsMIPS32 = TargetTriple.isMIPS32(); + bool IsMIPS64 = TargetTriple.isMIPS64(); ++ bool IsLoongArch64 = TargetTriple.isLoongArch64(); + bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb(); + bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64; + bool IsRISCV64 = TargetTriple.getArch() == Triple::riscv64; +@@ -540,7 +542,9 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, + Mapping.Offset = kWindowsShadowOffset64; + } else if (IsMIPS64) + Mapping.Offset = kMIPS64_ShadowOffset64; +- else if (IsIOS) ++ else if (IsLoongArch64) { ++ Mapping.Offset = kLoongArch64_ShadowOffset64; ++ } else if (IsIOS) + Mapping.Offset = kDynamicShadowSentinel; + else if (IsMacOS && IsAArch64) + Mapping.Offset = kDynamicShadowSentinel; +diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp +index c51acdf5..606ba036 100644 +--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp ++++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp +@@ -390,6 +390,14 @@ static const MemoryMapParams Linux_X86_64_MemoryMapParams = { + #endif + }; + ++// loongarch64 Linux ++static const MemoryMapParams Linux_LOONGARCH64_MemoryMapParams = { ++ 0, // AndMask (not used) ++ 0x008000000000, // XorMask ++ 0, // ShadowBase (not used) ++ 0x002000000000, // OriginBase ++}; ++ + // mips64 Linux + static const MemoryMapParams Linux_MIPS64_MemoryMapParams = { + 0, // AndMask (not used) +@@ -451,6 +459,11 @@ static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = { + &Linux_X86_64_MemoryMapParams, + }; + ++static const PlatformMemoryMapParams Linux_LOONGARCH_MemoryMapParams = { ++ nullptr, ++ &Linux_LOONGARCH64_MemoryMapParams, ++}; ++ + static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = { + nullptr, + &Linux_MIPS64_MemoryMapParams, +@@ -508,6 +521,7 @@ public: + private: + friend struct MemorySanitizerVisitor; + friend struct VarArgAMD64Helper; ++ friend struct VarArgLoongArch64Helper; + friend struct VarArgMIPS64Helper; + friend struct VarArgAArch64Helper; + friend struct VarArgPowerPC64Helper; +@@ -968,6 +982,9 @@ void MemorySanitizer::initializeModule(Module &M) { + case Triple::x86: + MapParams = Linux_X86_MemoryMapParams.bits32; + break; ++ case Triple::loongarch64: ++ MapParams = Linux_LOONGARCH_MemoryMapParams.bits64; ++ break; + case Triple::mips64: + case Triple::mips64el: + MapParams = Linux_MIPS_MemoryMapParams.bits64; +@@ -4466,6 +4483,117 @@ struct VarArgAMD64Helper : public VarArgHelper { + } + }; + ++/// LoongArch64-specific implementation of VarArgHelper. ++struct VarArgLoongArch64Helper : public VarArgHelper { ++ Function &F; ++ MemorySanitizer &MS; ++ MemorySanitizerVisitor &MSV; ++ Value *VAArgTLSCopy = nullptr; ++ Value *VAArgSize = nullptr; ++ ++ SmallVector VAStartInstrumentationList; ++ ++ VarArgLoongArch64Helper(Function &F, MemorySanitizer &MS, ++ MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {} ++ ++ void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override { ++ unsigned VAArgOffset = 0; ++ const DataLayout &DL = F.getParent()->getDataLayout(); ++ for (auto ArgIt = CB.arg_begin() + CB.getFunctionType()->getNumParams(), ++ End = CB.arg_end(); ++ ArgIt != End; ++ArgIt) { ++ Triple TargetTriple(F.getParent()->getTargetTriple()); ++ Value *A = *ArgIt; ++ Value *Base; ++ uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); ++ Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize); ++ VAArgOffset += ArgSize; ++ VAArgOffset = alignTo(VAArgOffset, 8); ++ if (!Base) ++ continue; ++ IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); ++ } ++ ++ Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset); ++ // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of ++ // a new class member i.e. it is the total size of all VarArgs. ++ IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS); ++ } ++ ++ /// Compute the shadow address for a given va_arg. ++ Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, ++ unsigned ArgOffset, unsigned ArgSize) { ++ // Make sure we don't overflow __msan_va_arg_tls. ++ if (ArgOffset + ArgSize > kParamTLSSize) ++ return nullptr; ++ Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); ++ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); ++ return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0), ++ "_msarg"); ++ } ++ ++ void visitVAStartInst(VAStartInst &I) override { ++ IRBuilder<> IRB(&I); ++ VAStartInstrumentationList.push_back(&I); ++ Value *VAListTag = I.getArgOperand(0); ++ Value *ShadowPtr, *OriginPtr; ++ const Align Alignment = Align(8); ++ std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( ++ VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true); ++ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), ++ /* size */ 8, Alignment, false); ++ } ++ ++ void visitVACopyInst(VACopyInst &I) override { ++ IRBuilder<> IRB(&I); ++ VAStartInstrumentationList.push_back(&I); ++ Value *VAListTag = I.getArgOperand(0); ++ Value *ShadowPtr, *OriginPtr; ++ const Align Alignment = Align(8); ++ std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( ++ VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true); ++ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), ++ /* size */ 8, Alignment, false); ++ } ++ ++ void finalizeInstrumentation() override { ++ assert(!VAArgSize && !VAArgTLSCopy && ++ "finalizeInstrumentation called twice"); ++ IRBuilder<> IRB(MSV.FnPrologueEnd); ++ VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS); ++ Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0), ++ VAArgSize); ++ ++ if (!VAStartInstrumentationList.empty()) { ++ // If there is a va_start in this function, make a backup copy of ++ // va_arg_tls somewhere in the function entry block. ++ VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); ++ IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize); ++ } ++ ++ // Instrument va_start. ++ // Copy va_list shadow from the backup copy of the TLS contents. ++ for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) { ++ CallInst *OrigInst = VAStartInstrumentationList[i]; ++ IRBuilder<> IRB(OrigInst->getNextNode()); ++ Value *VAListTag = OrigInst->getArgOperand(0); ++ Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C); ++ Value *RegSaveAreaPtrPtr = ++ IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), ++ PointerType::get(Type::getInt64PtrTy(*MS.C), 0)); ++ Value *RegSaveAreaPtr = ++ IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr); ++ Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; ++ const Align Alignment = Align(8); ++ std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = ++ MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), ++ Alignment, /*isStore*/ true); ++ IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment, ++ CopySize); ++ } ++ } ++}; ++ + /// MIPS64-specific implementation of VarArgHelper. + struct VarArgMIPS64Helper : public VarArgHelper { + Function &F; +@@ -5365,6 +5493,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, + return new VarArgPowerPC64Helper(Func, Msan, Visitor); + else if (TargetTriple.getArch() == Triple::systemz) + return new VarArgSystemZHelper(Func, Msan, Visitor); ++ else if (TargetTriple.getArch() == Triple::loongarch64) ++ return new VarArgLoongArch64Helper(Func, Msan, Visitor); + else + return new VarArgNoOpHelper(Func, Msan, Visitor); + } +diff --git a/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s b/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s +index caeae4fa..56f391b7 100644 +--- a/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s ++++ b/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s +@@ -1,3 +1,4 @@ ++# UNSUPPORTED: loongarch64 + # RUN: rm -rf %t && mkdir -p %t + # RUN: llvm-mc -triple=x86_64-apple-macos10.9 -filetype=obj \ + # RUN: -o %t/helper.o %S/Inputs/MachO_GOTAndStubsOptimizationHelper.s +diff --git a/test/ExecutionEngine/MCJIT/eh-lg-pic.ll b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll +index f2d6a54a..86c7374f 100644 +--- a/test/ExecutionEngine/MCJIT/eh-lg-pic.ll ++++ b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll +@@ -1,4 +1,5 @@ + ; REQUIRES: cxx-shared-library ++; UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -relocation-model=pic -code-model=large %s + ; XFAIL: cygwin, windows-msvc, windows-gnu, mips-, mipsel-, i686, i386, aarch64, arm + declare i8* @__cxa_allocate_exception(i64) +diff --git a/test/ExecutionEngine/MCJIT/eh.ll b/test/ExecutionEngine/MCJIT/eh.ll +index ed5ff644..f419c875 100644 +--- a/test/ExecutionEngine/MCJIT/eh.ll ++++ b/test/ExecutionEngine/MCJIT/eh.ll +@@ -1,4 +1,5 @@ + ; REQUIRES: cxx-shared-library ++; UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit %s + + ; XFAIL: arm, cygwin, windows-msvc, windows-gnu +diff --git a/test/ExecutionEngine/MCJIT/lit.local.cfg b/test/ExecutionEngine/MCJIT/lit.local.cfg +index e2535ef1..09f1a2ab 100644 +--- a/test/ExecutionEngine/MCJIT/lit.local.cfg ++++ b/test/ExecutionEngine/MCJIT/lit.local.cfg +@@ -1,7 +1,8 @@ + root = config.root + targets = root.targets + if ('X86' in targets) | ('AArch64' in targets) | ('ARM' in targets) | \ +- ('Mips' in targets) | ('PowerPC' in targets) | ('SystemZ' in targets): ++ ('Mips' in targets) | ('PowerPC' in targets) | ('SystemZ' in targets) | \ ++ ('LoongArch' in targets) : + config.unsupported = False + else: + config.unsupported = True +@@ -9,7 +10,7 @@ else: + # FIXME: autoconf and cmake produce different arch names. We should normalize + # them before getting here. + if root.host_arch not in ['i386', 'x86', 'x86_64', 'AMD64', +- 'AArch64', 'ARM', 'Mips', ++ 'AArch64', 'ARM', 'Mips', 'loongarch64', + 'PowerPC', 'ppc64', 'ppc64le', 'SystemZ']: + config.unsupported = True + +diff --git a/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll b/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll +index 3709aa44..afd38c00 100644 +--- a/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll ++++ b/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll +@@ -1,4 +1,5 @@ + ; REQUIRES: cxx-shared-library ++; UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -extra-module=%p/Inputs/multi-module-eh-b.ll %s + + ; XFAIL: arm, cygwin, windows-msvc, windows-gnu +diff --git a/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll b/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll +index 7a1731e7..d7b7e697 100644 +--- a/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll ++++ b/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll +@@ -1,3 +1,4 @@ ++; UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null + ; XFAIL: windows-gnu,windows-msvc + ; UNSUPPORTED: powerpc64-unknown-linux-gnu +diff --git a/test/ExecutionEngine/MCJIT/remote/eh.ll b/test/ExecutionEngine/MCJIT/remote/eh.ll +index e25fd710..6aea1311 100644 +--- a/test/ExecutionEngine/MCJIT/remote/eh.ll ++++ b/test/ExecutionEngine/MCJIT/remote/eh.ll +@@ -1,4 +1,5 @@ + ; REQUIRES: cxx-shared-library ++; UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s + ; XFAIL: arm, cygwin, windows-msvc, windows-gnu + ; UNSUPPORTED: powerpc64-unknown-linux-gnu +diff --git a/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll b/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll +index 37b74de2..d26936cb 100644 +--- a/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll ++++ b/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll +@@ -1,3 +1,4 @@ ++; UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null + ; XFAIL: windows-gnu,windows-msvc + ; UNSUPPORTED: powerpc64-unknown-linux-gnu +diff --git a/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll b/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll +index f458ab79..aefe0b3d 100644 +--- a/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll ++++ b/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll +@@ -1,3 +1,4 @@ ++; UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null + ; XFAIL: windows-gnu,windows-msvc + ; UNSUPPORTED: powerpc64-unknown-linux-gnu +diff --git a/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll b/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll +index b8684a17..6776e081 100644 +--- a/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll ++++ b/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll +@@ -1,3 +1,4 @@ ++; UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -remote-mcjit -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s + ; XFAIL: windows-gnu,windows-msvc + ; UNSUPPORTED: powerpc64-unknown-linux-gnu +diff --git a/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll +index 060b5e13..ed080372 100644 +--- a/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll ++++ b/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll +@@ -1,3 +1,4 @@ ++; UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -remote-mcjit -O0 -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s + ; XFAIL: windows-gnu,windows-msvc + ; UNSUPPORTED: powerpc64-unknown-linux-gnu +diff --git a/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll +index 6e60396e..a2aee9c9 100644 +--- a/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll ++++ b/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll +@@ -1,3 +1,4 @@ ++; UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null + ; XFAIL: windows-gnu,windows-msvc + ; UNSUPPORTED: powerpc64-unknown-linux-gnu +diff --git a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll +index b6fae460..753c6273 100644 +--- a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll ++++ b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll +@@ -1,3 +1,4 @@ ++; UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null + ; XFAIL: windows-gnu,windows-msvc + ; UNSUPPORTED: powerpc64-unknown-linux-gnu +diff --git a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll +index 34f72bc9..4d565426 100644 +--- a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll ++++ b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll +@@ -1,3 +1,4 @@ ++; UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \ + ; RUN: -relocation-model=pic -code-model=small %s > /dev/null + ; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, windows-gnu, windows-msvc +diff --git a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll +index 9e76601c..61a898fe 100644 +--- a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll ++++ b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll +@@ -1,3 +1,4 @@ ++; UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s + ; XFAIL: windows-gnu,windows-msvc + ; UNSUPPORTED: powerpc64-unknown-linux-gnu +diff --git a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll +index 20f232ad..1d737b87 100644 +--- a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll ++++ b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll +@@ -1,3 +1,4 @@ ++; UNSUPPORTED: loongarch64 + ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \ + ; RUN: -O0 -relocation-model=pic -code-model=small %s + ; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, windows-gnu, windows-msvc +diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp +index 3d43d1a7..3fcd812a 100644 +--- a/tools/llvm-readobj/ELFDumper.cpp ++++ b/tools/llvm-readobj/ELFDumper.cpp +@@ -1204,6 +1204,7 @@ const EnumEntry ElfMachineType[] = { + ENUM_ENT(EM_LANAI, "EM_LANAI"), + ENUM_ENT(EM_BPF, "EM_BPF"), + ENUM_ENT(EM_VE, "NEC SX-Aurora Vector Engine"), ++ ENUM_ENT(EM_LOONGARCH, "LoongArch"), + }; + + const EnumEntry ElfSymbolBindings[] = { +@@ -1611,6 +1612,13 @@ const EnumEntry ElfHeaderAVRFlags[] = { + ENUM_ENT(EF_AVR_LINKRELAX_PREPARED, "relaxable"), + }; + ++static const EnumEntry ElfHeaderLoongArchFlags[] = { ++ ENUM_ENT(EF_LARCH_BASE_ABI_LP64D, "LP64D"), ++ ENUM_ENT(EF_LARCH_BASE_ABI_LP64S, "LP64S"), ENUM_ENT(EF_LARCH_BASE_ABI_LP64F, "LP64F") ++ // FIXME: Change these and add more flags in future when all ABIs definition ++ // were finalized. See current definitions: ++ // https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html#_e_flags_identifies_abi_type_and_version ++}; + + const EnumEntry ElfSymOtherFlags[] = { + LLVM_READOBJ_ENUM_ENT(ELF, STV_INTERNAL), +@@ -3320,6 +3328,9 @@ template void GNUELFDumper::printFileHeaders() { + else if (e.e_machine == EM_AVR) + ElfFlags = printFlags(e.e_flags, makeArrayRef(ElfHeaderAVRFlags), + unsigned(ELF::EF_AVR_ARCH_MASK)); ++ else if (e.e_machine == EM_LOONGARCH) ++ ElfFlags = printFlags(e.e_flags, makeArrayRef(ElfHeaderLoongArchFlags), ++ unsigned(ELF::EF_LARCH_BASE_ABI)); + Str = "0x" + to_hexString(e.e_flags); + if (!ElfFlags.empty()) + Str = Str + ", " + ElfFlags; +@@ -6407,6 +6418,9 @@ template void LLVMELFDumper::printFileHeaders() { + else if (E.e_machine == EM_AVR) + W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderAVRFlags), + unsigned(ELF::EF_AVR_ARCH_MASK)); ++ else if (E.e_machine == EM_LOONGARCH) ++ W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderLoongArchFlags), ++ unsigned(ELF::EF_LARCH_BASE_ABI)); + else + W.printFlags("Flags", E.e_flags); + W.printNumber("HeaderSize", E.e_ehsize); +diff --git a/tools/sancov/sancov.cpp b/tools/sancov/sancov.cpp +index c997154b..3dabd4fd 100644 +--- a/tools/sancov/sancov.cpp ++++ b/tools/sancov/sancov.cpp +@@ -691,7 +691,7 @@ static uint64_t getPreviousInstructionPc(uint64_t PC, + Triple TheTriple) { + if (TheTriple.isARM()) { + return (PC - 3) & (~1); +- } else if (TheTriple.isAArch64()) { ++ } else if (TheTriple.isAArch64() || TheTriple.isLoongArch64()) { + return PC - 4; + } else if (TheTriple.isMIPS()) { + return PC - 8; +diff --git a/utils/UpdateTestChecks/asm.py b/utils/UpdateTestChecks/asm.py +index 95d17baa..44cea9d4 100644 +--- a/utils/UpdateTestChecks/asm.py ++++ b/utils/UpdateTestChecks/asm.py +@@ -80,6 +80,12 @@ ASM_FUNCTION_AVR_RE = re.compile( + r'.Lfunc_end[0-9]+:\n', + flags=(re.M | re.S)) + ++ASM_FUNCTION_LOONGARCH_RE = re.compile( ++ r'^_?(?P[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?' # f: (name of func) ++ r'(?P^##?[ \t]+[^:]+:.*?)\s*' # (body of the function) ++ r'.Lfunc_end[0-9]+:\n', # .Lfunc_end[0-9]: ++ flags=(re.M | re.S)) ++ + ASM_FUNCTION_PPC_RE = re.compile( + r'#[ \-\t]*Begin function (?P[^.:]+)\n' + r'.*?' +@@ -316,6 +322,16 @@ def scrub_asm_avr(asm, args): + asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm) + return asm + ++def scrub_asm_loongarch(asm, args): ++ # Scrub runs of whitespace out of the assembly, but leave the leading ++ # whitespace in place. ++ asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm) ++ # Expand the tabs used for indentation. ++ asm = string.expandtabs(asm, 2) ++ # Strip trailing whitespace. ++ asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm) ++ return asm ++ + def scrub_asm_riscv(asm, args): + # Scrub runs of whitespace out of the assembly, but leave the leading + # whitespace in place. +@@ -433,6 +449,7 @@ def get_run_handler(triple): + 'avr': (scrub_asm_avr, ASM_FUNCTION_AVR_RE), + 'ppc32': (scrub_asm_powerpc, ASM_FUNCTION_PPC_RE), + 'powerpc': (scrub_asm_powerpc, ASM_FUNCTION_PPC_RE), ++ 'loongarch64': (scrub_asm_loongarch, ASM_FUNCTION_LOONGARCH_RE), + 'riscv32': (scrub_asm_riscv, ASM_FUNCTION_RISCV_RE), + 'riscv64': (scrub_asm_riscv, ASM_FUNCTION_RISCV_RE), + 'lanai': (scrub_asm_lanai, ASM_FUNCTION_LANAI_RE), +diff --git a/utils/gn/secondary/clang/lib/Basic/BUILD.gn b/utils/gn/secondary/clang/lib/Basic/BUILD.gn +index 09afa57a..e5c7fd77 100644 +--- a/utils/gn/secondary/clang/lib/Basic/BUILD.gn ++++ b/utils/gn/secondary/clang/lib/Basic/BUILD.gn +@@ -94,6 +94,7 @@ static_library("Basic") { + "Targets/Hexagon.cpp", + "Targets/Lanai.cpp", + "Targets/Le64.cpp", ++ "Targets/LoongArch.cpp", + "Targets/M68k.cpp", + "Targets/MSP430.cpp", + "Targets/Mips.cpp", +diff --git a/utils/gn/secondary/clang/lib/Driver/BUILD.gn b/utils/gn/secondary/clang/lib/Driver/BUILD.gn +index 6e34fcc2..4c66fde3 100644 +--- a/utils/gn/secondary/clang/lib/Driver/BUILD.gn ++++ b/utils/gn/secondary/clang/lib/Driver/BUILD.gn +@@ -47,6 +47,7 @@ static_library("Driver") { + "ToolChains/Ananas.cpp", + "ToolChains/Arch/AArch64.cpp", + "ToolChains/Arch/ARM.cpp", ++ "ToolChains/Arch/LoongArch.cpp", + "ToolChains/Arch/M68k.cpp", + "ToolChains/Arch/Mips.cpp", + "ToolChains/Arch/PPC.cpp", +diff --git a/utils/gn/secondary/llvm/include/llvm/IR/BUILD.gn b/utils/gn/secondary/llvm/include/llvm/IR/BUILD.gn +index f12d39ad..1280c748 100644 +--- a/utils/gn/secondary/llvm/include/llvm/IR/BUILD.gn ++++ b/utils/gn/secondary/llvm/include/llvm/IR/BUILD.gn +@@ -67,6 +67,16 @@ tablegen("IntrinsicsHexagon") { + td_file = "Intrinsics.td" + } + ++tablegen("IntrinsicsLoongArch") { ++ visibility = [ ":public_tablegen" ] ++ output_name = "IntrinsicsLoongArch.h" ++ args = [ ++ "-gen-intrinsic-enums", ++ "-intrinsic-prefix=loongarch", ++ ] ++ td_file = "Intrinsics.td" ++} ++ + tablegen("IntrinsicsMips") { + visibility = [ ":public_tablegen" ] + output_name = "IntrinsicsMips.h" +@@ -186,6 +196,7 @@ group("public_tablegen") { + ":IntrinsicsARM", + ":IntrinsicsBPF", + ":IntrinsicsHexagon", ++ ":IntrinsicsLoongArch", + ":IntrinsicsMips", + ":IntrinsicsNVPTX", + ":IntrinsicsPowerPC", +diff --git a/utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn b/utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn +new file mode 100644 +index 00000000..cc3bb49a +--- /dev/null ++++ b/utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn +@@ -0,0 +1,24 @@ ++import("//llvm/utils/TableGen/tablegen.gni") ++ ++tablegen("LoongArchGenAsmMatcher") { ++ visibility = [ ":AsmParser" ] ++ args = [ "-gen-asm-matcher" ] ++ td_file = "../LoongArch.td" ++} ++ ++static_library("AsmParser") { ++ output_name = "LLVMLoongArchAsmParser" ++ deps = [ ++ ":LoongArchGenAsmMatcher", ++ "//llvm/lib/MC", ++ "//llvm/lib/MC/MCParser", ++ "//llvm/lib/Support", ++ "//llvm/lib/Target/LoongArch/MCTargetDesc", ++ "//llvm/lib/Target/LoongArch/TargetInfo", ++ ] ++ include_dirs = [ ".." ] ++ sources = [ ++ # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py. ++ "LoongArchAsmParser.cpp", ++ ] ++} +diff --git a/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn b/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn +new file mode 100644 +index 00000000..e89db520 +--- /dev/null ++++ b/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn +@@ -0,0 +1,102 @@ ++import("//llvm/utils/TableGen/tablegen.gni") ++ ++tablegen("LoongArchGenCallingConv") { ++ visibility = [ ":LLVMLoongArchCodeGen" ] ++ args = [ "-gen-callingconv" ] ++ td_file = "LoongArch.td" ++} ++ ++tablegen("LoongArchGenDAGISel") { ++ visibility = [ ":LLVMLoongArchCodeGen" ] ++ args = [ "-gen-dag-isel" ] ++ td_file = "LoongArch.td" ++} ++ ++tablegen("LoongArchGenFastISel") { ++ visibility = [ ":LLVMLoongArchCodeGen" ] ++ args = [ "-gen-fast-isel" ] ++ td_file = "LoongArch.td" ++} ++ ++tablegen("LoongArchGenGlobalISel") { ++ visibility = [ ":LLVMLoongArchCodeGen" ] ++ args = [ "-gen-global-isel" ] ++ td_file = "LoongArch.td" ++} ++ ++tablegen("LoongArchGenMCPseudoLowering") { ++ visibility = [ ":LLVMLoongArchCodeGen" ] ++ args = [ "-gen-pseudo-lowering" ] ++ td_file = "LoongArch.td" ++} ++ ++tablegen("LoongArchGenRegisterBank") { ++ visibility = [ ":LLVMLoongArchCodeGen" ] ++ args = [ "-gen-register-bank" ] ++ td_file = "LoongArch.td" ++} ++ ++static_library("LLVMLoongArchCodeGen") { ++ deps = [ ++ ":LoongArchGenCallingConv", ++ ":LoongArchGenDAGISel", ++ ":LoongArchGenFastISel", ++ ":LoongArchGenGlobalISel", ++ ":LoongArchGenMCPseudoLowering", ++ ":LoongArchGenRegisterBank", ++ "MCTargetDesc", ++ "TargetInfo", ++ "//llvm/include/llvm/Config:llvm-config", ++ "//llvm/lib/Analysis", ++ "//llvm/lib/CodeGen", ++ "//llvm/lib/CodeGen/AsmPrinter", ++ "//llvm/lib/CodeGen/GlobalISel", ++ "//llvm/lib/CodeGen/SelectionDAG", ++ "//llvm/lib/IR", ++ "//llvm/lib/MC", ++ "//llvm/lib/Support", ++ "//llvm/lib/Target", ++ ] ++ include_dirs = [ "." ] ++ sources = [ ++ "LoongArchAnalyzeImmediate.cpp", ++ "LoongArchAsmPrinter.cpp", ++ "LoongArchCCState.cpp", ++ "LoongArchCallLowering.cpp", ++ "LoongArchConstantIslandPass.cpp", ++ "LoongArchDelaySlotFiller.cpp", ++ "LoongArchExpandPseudo.cpp", ++ "LoongArchFrameLowering.cpp", ++ "LoongArchISelDAGToDAG.cpp", ++ "LoongArchISelLowering.cpp", ++ "LoongArchInstrInfo.cpp", ++ "LoongArchInstructionSelector.cpp", ++ "LoongArchLegalizerInfo.cpp", ++ "LoongArchMCInstLower.cpp", ++ "LoongArchMachineFunction.cpp", ++ "LoongArchModuleISelDAGToDAG.cpp", ++ "LoongArchOptimizePICCall.cpp", ++ "LoongArchPreLegalizerCombiner.cpp", ++ "LoongArchRegisterBankInfo.cpp", ++ "LoongArchRegisterInfo.cpp", ++ "LoongArchSubtarget.cpp", ++ "LoongArchTargetMachine.cpp", ++ "LoongArchTargetObjectFile.cpp", ++ ] ++} ++ ++# This is a bit different from most build files: Due to this group ++# having the directory's name, "//llvm/lib/Target/LoongArch" will refer to this ++# target, which pulls in the code in this directory *and all subdirectories*. ++# For most other directories, "//llvm/lib/Foo" only pulls in the code directly ++# in "llvm/lib/Foo". The forwarding targets in //llvm/lib/Target expect this ++# different behavior. ++group("LoongArch") { ++ deps = [ ++ ":LLVMLoongArchCodeGen", ++ "AsmParser", ++ "Disassembler", ++ "MCTargetDesc", ++ "TargetInfo", ++ ] ++} +diff --git a/utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn b/utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn +new file mode 100644 +index 00000000..0a9b4cf5 +--- /dev/null ++++ b/utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn +@@ -0,0 +1,23 @@ ++import("//llvm/utils/TableGen/tablegen.gni") ++ ++tablegen("LoongArchGenDisassemblerTables") { ++ visibility = [ ":Disassembler" ] ++ args = [ "-gen-disassembler" ] ++ td_file = "../LoongArch.td" ++} ++ ++static_library("Disassembler") { ++ output_name = "LLVMLoongArchDisassembler" ++ deps = [ ++ ":LoongArchGenDisassemblerTables", ++ "//llvm/lib/MC/MCDisassembler", ++ "//llvm/lib/Support", ++ "//llvm/lib/Target/LoongArch/MCTargetDesc", ++ "//llvm/lib/Target/LoongArch/TargetInfo", ++ ] ++ include_dirs = [ ".." ] ++ sources = [ ++ # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py. ++ "LoongArchDisassembler.cpp", ++ ] ++} +diff --git a/utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn b/utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn +new file mode 100644 +index 00000000..f0b96c96 +--- /dev/null ++++ b/utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn +@@ -0,0 +1,74 @@ ++import("//llvm/utils/TableGen/tablegen.gni") ++ ++tablegen("LoongArchGenAsmWriter") { ++ visibility = [ ":MCTargetDesc" ] ++ args = [ "-gen-asm-writer" ] ++ td_file = "../LoongArch.td" ++} ++ ++tablegen("LoongArchGenInstrInfo") { ++ visibility = [ ":tablegen" ] ++ args = [ "-gen-instr-info" ] ++ td_file = "../LoongArch.td" ++} ++ ++tablegen("LoongArchGenMCCodeEmitter") { ++ visibility = [ ":MCTargetDesc" ] ++ args = [ "-gen-emitter" ] ++ td_file = "../LoongArch.td" ++} ++ ++tablegen("LoongArchGenRegisterInfo") { ++ visibility = [ ":tablegen" ] ++ args = [ "-gen-register-info" ] ++ td_file = "../LoongArch.td" ++} ++ ++tablegen("LoongArchGenSubtargetInfo") { ++ visibility = [ ":tablegen" ] ++ args = [ "-gen-subtarget" ] ++ td_file = "../LoongArch.td" ++} ++ ++# This should contain tablegen targets generating .inc files included ++# by other targets. .inc files only used by .cpp files in this directory ++# should be in deps on the static_library instead. ++group("tablegen") { ++ visibility = [ ++ ":MCTargetDesc", ++ "../TargetInfo", ++ ] ++ public_deps = [ ++ ":LoongArchGenInstrInfo", ++ ":LoongArchGenRegisterInfo", ++ ":LoongArchGenSubtargetInfo", ++ ] ++} ++ ++static_library("MCTargetDesc") { ++ output_name = "LLVMLoongArchDesc" ++ public_deps = [ ":tablegen" ] ++ deps = [ ++ ":LoongArchGenAsmWriter", ++ ":LoongArchGenMCCodeEmitter", ++ "//llvm/lib/MC", ++ "//llvm/lib/Support", ++ "//llvm/lib/Target/LoongArch/TargetInfo", ++ ] ++ include_dirs = [ ".." ] ++ sources = [ ++ "LoongArchABIFlagsSection.cpp", ++ "LoongArchABIInfo.cpp", ++ "LoongArchAsmBackend.cpp", ++ "LoongArchELFObjectWriter.cpp", ++ "LoongArchELFStreamer.cpp", ++ "LoongArchInstPrinter.cpp", ++ "LoongArchMCAsmInfo.cpp", ++ "LoongArchMCCodeEmitter.cpp", ++ "LoongArchMCExpr.cpp", ++ "LoongArchMCTargetDesc.cpp", ++ "LoongArchNaClELFStreamer.cpp", ++ "LoongArchOptionRecord.cpp", ++ "LoongArchTargetStreamer.cpp", ++ ] ++} +diff --git a/utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn b/utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn +new file mode 100644 +index 00000000..a476bdd5 +--- /dev/null ++++ b/utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn +@@ -0,0 +1,9 @@ ++static_library("TargetInfo") { ++ output_name = "LLVMLoongArchInfo" ++ deps = [ "//llvm/lib/Support" ] ++ include_dirs = [ ".." ] ++ sources = [ ++ # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py. ++ "LoongArchTargetInfo.cpp", ++ ] ++} +diff --git a/utils/gn/secondary/llvm/lib/Target/targets.gni b/utils/gn/secondary/llvm/lib/Target/targets.gni +index 699b01fb..cdef6f2a 100644 +--- a/utils/gn/secondary/llvm/lib/Target/targets.gni ++++ b/utils/gn/secondary/llvm/lib/Target/targets.gni +@@ -16,6 +16,7 @@ llvm_all_targets = [ + "BPF", + "Hexagon", + "Lanai", ++ "LoongArch", + "Mips", + "NVPTX", + "PowerPC", +@@ -49,6 +50,7 @@ llvm_build_AArch64 = false + llvm_build_AMDGPU = false + llvm_build_ARM = false + llvm_build_BPF = false ++llvm_build_LoongArch = false + llvm_build_Mips = false + llvm_build_PowerPC = false + llvm_build_SystemZ = false +@@ -63,6 +65,8 @@ foreach(target, llvm_targets_to_build) { + llvm_build_ARM = true + } else if (target == "BPF") { + llvm_build_BPF = true ++ } else if (target == "LoongArch") { ++ llvm_build_LoongArch = true + } else if (target == "Mips") { + llvm_build_Mips = true + } else if (target == "PowerPC") { diff --git a/lm_sensors/PKGBUILD b/lm_sensors/PKGBUILD index 52871eac83..28f4e15ee8 100644 --- a/lm_sensors/PKGBUILD +++ b/lm_sensors/PKGBUILD @@ -5,7 +5,7 @@ pkgname=lm_sensors pkgver=3.6.0.r41.g31d1f125 _commit=31d1f125d8076f1c8c8f3224b31d240e6e6a1763 #_pkgver=${pkgver//./-} -pkgrel=2 +pkgrel=5 epoch=1 pkgdesc="Collection of user space tools for general SMBus access and hardware monitoring" arch=('loong64' 'x86_64') diff --git a/lsd/PKGBUILD b/lsd/PKGBUILD index 34a6287945..bac04d17f0 100644 --- a/lsd/PKGBUILD +++ b/lsd/PKGBUILD @@ -23,7 +23,7 @@ options=('!lto') prepare() { cd ${pkgname}-${pkgver} - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/lua-compat53/PKGBUILD b/lua-compat53/PKGBUILD index 21ebdaa4cf..fdafb84924 100644 --- a/lua-compat53/PKGBUILD +++ b/lua-compat53/PKGBUILD @@ -4,7 +4,7 @@ pkgbase='lua-compat53' pkgname=('lua51-compat53' 'lua52-compat53') pkgver=0.12 -pkgrel=1 +pkgrel=2 pkgdesc='Compatibility module providing Lua-5.3-style APIs' arch=('loong64' 'x86_64') url='https://github.com/keplerproject/lua-compat-5.3' diff --git a/lua-system/PKGBUILD b/lua-system/PKGBUILD index 439bbedccf..a7570cd3a4 100644 --- a/lua-system/PKGBUILD +++ b/lua-system/PKGBUILD @@ -18,7 +18,7 @@ makedepends=(lua checkdepends=(busted) options=(debug) _archive="${pkgbase/-/}-$pkgver" -_rock="$_archive-$_rockrel.linux-$CARCH.rock" +_rock="$_archive-$_rockrel.linux-`uname -m`.rock" _rockspec="$_archive-$_rockrel.rockspec" source=("$url/archive/v$pkgver/$_archive.tar.gz") sha256sums=('0b83f68e9edbba92bef11ec0ccf1e5bb779a7337653f7bb77e0240c8e85c0b94') diff --git a/lua-term/PKGBUILD b/lua-term/PKGBUILD index 552af991dc..f1cd448d73 100644 --- a/lua-term/PKGBUILD +++ b/lua-term/PKGBUILD @@ -17,7 +17,7 @@ makedepends=(lua lua53 luarocks) _archive="$pkgbase-$_pkgver" -_rock="$pkgbase-$pkgver-$_rockrel.linux-$CARCH.rock" +_rock="$pkgbase-$pkgver-$_rockrel.linux-`uname -m`.rock" _rockspec="$pkgbase-$pkgver-$_rockrel.rockspec" source=("$url/archive/$_pkgver/$_archive.tar.gz") sha256sums=('8ff94f390ea9d98c734699373ca3b0ce500d651b2ab1cb8d7d2336fc5b79cded') diff --git a/luaexpat/PKGBUILD b/luaexpat/PKGBUILD index 400196d50d..b2a9837e32 100644 --- a/luaexpat/PKGBUILD +++ b/luaexpat/PKGBUILD @@ -21,7 +21,7 @@ makedepends=(lua luarocks) options=(debug) _archive="$pkgbase-$pkgver" -_rock="$_archive-$_rockrel.linux-$CARCH.rock" +_rock="$_archive-$_rockrel.linux-`uname -m`.rock" _rockspec="$_archive-$_rockrel.rockspec" source=("$url/archive/$pkgver/$_archive.tar.gz") sha512sums=('45834b02d863a65c6de2ad3c5c671935ae512dd906233a647269438aeeb37c7bbd6e7166aefd8da72480b33e1aa79a86ed070ed8800a8e0f303ccc74918f3bb8') diff --git a/luajit/PKGBUILD b/luajit/PKGBUILD index dee048bf20..33140378b4 100644 --- a/luajit/PKGBUILD +++ b/luajit/PKGBUILD @@ -7,20 +7,20 @@ pkgname=luajit # LuaJIT has a "rolling release" where you should follow git HEAD -_commit=ff204d0350575cf710f6f4af982db146cb454e1a +_commit=7a526c78949b6cd9d6b7cc83c3ad908df0dc6e1c # The patch version is the timestamp of the above git commit, obtain via `git show -s --format=%ct` -_ct=1702233742 +_ct=1658991363 pkgver="2.1.${_ct}" -pkgrel=1 +pkgrel=11 pkgdesc='Just-in-time compiler and drop-in replacement for Lua 5.1' arch=('loong64' 'x86_64') url='https://luajit.org/' license=('MIT') depends=('gcc-libs') -source=("LuaJIT-${_commit}.tar.gz::https://github.com/LuaJIT/LuaJIT/archive/${_commit}.tar.gz") -md5sums=('97486356d223510a6e3c31a20bcd32ed') -sha256sums=('3ec37f78ab3b1afd4c3af0fde743c332da3da32eadc8500489c1cc2e4f0ec7eb') -b2sums=('6ba03fa107baadf0ac980d515debd638b1a166014ee46c6fa95865a12678a831fbae04d14ccb737723a69874af2b0637bbaa516973830ca4c7e5311aa3f91b76') +source=("LuaJIT-${_commit}.tar.gz::https://github.com/sophie-zhao/LuaJIT/archive/${_commit}.tar.gz") +md5sums=('39069473555de4c3da74f856e8def899') +sha256sums=('8f09bdce136b020ad9a50501b4fa64128309cb633fa2888e51210b38652ab85a') +b2sums=('baabf7fb403a5e143a7019c047dde5d3f0d03f03265d8eb1939e6b0e9cc8268b5cf98adc4a8463a6fa0a519054027696f6ddc7ee1fb3150d10f0d53d4c62957e') build() { cd "LuaJIT-${_commit}" diff --git a/luarocks/PKGBUILD b/luarocks/PKGBUILD index c26ff6009e..c11a88a4d9 100644 --- a/luarocks/PKGBUILD +++ b/luarocks/PKGBUILD @@ -30,7 +30,8 @@ source=("https://luarocks.org/releases/$pkgname-$pkgver.tar.gz"{,.asc} "luarocks.zsh" "luarocks-admin.bash" "luarocks-admin.fish" - "luarocks-admin.zsh") + "luarocks-admin.zsh" + "luarocks-la64.patch") sha512sums=('7e93164bdcf35338e993822931a8e59957ab69c6e23236a7a075649c2f7cd173305c4ca6e9d115fc5e282cb76c21754d7adac92b21885006e5ee3bc06a9d0059' 'SKIP' 'cd90cba877c0dd435b44328a0869e3102c52eb36ed5b764c3ee8a78de95772fe094419dc032db2e2c91d9a0aa35ab7c6e8a13e29b9feeb3a3172d7a129d005c0' @@ -42,9 +43,15 @@ sha512sums=('7e93164bdcf35338e993822931a8e59957ab69c6e23236a7a075649c2f7cd173305 '4a24e6f44c13711ba42eab155d7e5699707e9b8d00158104065ce01d8cfdbe5ea9c1e4f12ed10b324caa076b6862a630a4638092827698a5455181e38d821869' 'b43ccd818c3022b63690b6d060940bbbfd2775b1c174b9e99c8162ea0e746b276b4414c2489156e6bb9eae9ce41d83481022a5a757ea94b62074a6c8c57e9c5a' '6b52bd6f446586c3e7d5dbe1ba870e5e37c5bb9cdd5e5878959fa1c151b5a43d8b37c1dc43de46ff2c70c1f57c0429a85c382f53390179609a2880e5aa089497' - 'f6ab6c65aaaa2680ce83a3277a2aec4400d234ddfc17c62f44190928a53b7146c38f8a779aedc1c71fed3dd2f1833e1ca924080e98ecf7462274b0d9acb07f90') + 'f6ab6c65aaaa2680ce83a3277a2aec4400d234ddfc17c62f44190928a53b7146c38f8a779aedc1c71fed3dd2f1833e1ca924080e98ecf7462274b0d9acb07f90' + '6d704581d6f21a5887e2c272fea58fa13a345a702234b0cc9f2141bfb7b9f51759b256aec57560222021f0cebaf8ff4a3ac9a92e0e06492de0a1b3458eb0a7ed') validpgpkeys=('8460980B2B79786DE0C7FCC83FD8F43C2BB3C478') +prepare() { + cd "$pkgname-$pkgver" + patch -p1 -i $srcdir/luarocks-la64.patch +} + build() { cd "$pkgname-$pkgver" ./configure --prefix=/usr diff --git a/luarocks/luarocks-la64.patch b/luarocks/luarocks-la64.patch new file mode 100644 index 0000000000..2c3f67e7da --- /dev/null +++ b/luarocks/luarocks-la64.patch @@ -0,0 +1,24 @@ +Index: luarocks-3.9.1/src/luarocks/core/sysdetect.lua +=================================================================== +--- luarocks-3.9.1.orig/src/luarocks/core/sysdetect.lua ++++ luarocks-3.9.1/src/luarocks/core/sysdetect.lua +@@ -91,6 +91,7 @@ local e_machines = { + [0xB6] = "alpha", + [0xB7] = "aarch64", + [0xF3] = "riscv64", ++ [0x102] = "loongarch", + [0x9026] = "alpha", + } + +@@ -219,6 +220,11 @@ local function read_elf_header(fd) + if endian == 1 and processor == "ppc64" then + processor = "ppc64le" + end ++ if processor == "loongarch" and hdr.bits == 1 then ++ processor = "loongarch32" ++ elseif processor == "loongarch" and hdr.bits == 2 then ++ processor = "loongarch64" ++ end + + local elfversion = read(fd, 4, endian) + if elfversion ~= 1 then diff --git a/luasocket/PKGBUILD b/luasocket/PKGBUILD index 4fb116d758..24923d43af 100644 --- a/luasocket/PKGBUILD +++ b/luasocket/PKGBUILD @@ -19,7 +19,7 @@ makedepends=(lua lua53 luarocks) _archive="$pkgbase-$pkgver" -_rock="$_archive-$_rockrel.linux-$CARCH.rock" +_rock="$_archive-$_rockrel.linux-`uname -m`.rock" source=("$url/archive/v$pkgver/$_archive.tar.gz") sha256sums=('bf033aeb9e62bcaa8d007df68c119c966418e8c9ef7e4f2d7e96bddeca9cca6e') diff --git a/lucky-commit/PKGBUILD b/lucky-commit/PKGBUILD index 96c685dbf0..4656a44cb0 100644 --- a/lucky-commit/PKGBUILD +++ b/lucky-commit/PKGBUILD @@ -15,7 +15,7 @@ sha512sums=('cf382a760dd948d3cc4cef8901c97d2a8e3305e877d619cd38a9331bccfd924c6f5 prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/lurk/PKGBUILD b/lurk/PKGBUILD index 215713d43b..4dd6f07561 100644 --- a/lurk/PKGBUILD +++ b/lurk/PKGBUILD @@ -15,7 +15,7 @@ sha512sums=('f545b83f5f6fc84399419394c606e3d7c9c4a5ed094ae171f4f226768609ee7c1d3 prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/lxc/4363.patch b/lxc/4363.patch new file mode 100644 index 0000000000..9bdd362937 --- /dev/null +++ b/lxc/4363.patch @@ -0,0 +1,235 @@ +From c0957ba88e9bdb7cbde54b28fd8f9e801bfeb496 Mon Sep 17 00:00:00 2001 +From: zhaixiaojuan +Date: Thu, 2 Nov 2023 16:14:45 +0800 +Subject: [PATCH] Add loongarch64 support + +Signed-off-by: zhaixiaojuan +--- + config/bash/_lxc.in | 2 +- + src/lxc/confile.c | 57 ++++++++++++++++++++------------------- + src/lxc/syscall_numbers.h | 26 ++++++++++++++++++ + src/tests/arch_parse.c | 9 ++++--- + 4 files changed, 61 insertions(+), 33 deletions(-) + +diff --git a/config/bash/_lxc.in b/config/bash/_lxc.in +index 6672bf02d9..8a7ce16af8 100644 +--- a/config/bash/_lxc.in ++++ b/config/bash/_lxc.in +@@ -285,7 +285,7 @@ _lxc_attach() { + ;; + --arch | -a ) + # https://github.com/lxc/lxc/blob/stable-4.0/src/tests/arch_parse.c#L37 +- COMPREPLY=( $( compgen -W 'arm armel armhf armv7l athlon i386 i486 i586 i686 linux32 mips mipsel ppc powerpc x86 aarch64 amd64 arm64 linux64 mips64 mips64el ppc64 ppc64el ppc64le powerpc64 riscv64 s390x x86_64' -- "${cur}" ) ) ++ COMPREPLY=( $( compgen -W 'arm armel armhf armv7l athlon i386 i486 i586 i686 linux32 mips mipsel ppc powerpc x86 aarch64 amd64 arm64 linux64 loongarch64 mips64 mips64el ppc64 ppc64el ppc64le powerpc64 riscv64 s390x x86_64' -- "${cur}" ) ) + return + ;; + --elevated-privileges | -e ) +diff --git a/src/lxc/confile.c b/src/lxc/confile.c +index 7a8a534186..bccc54f1a2 100644 +--- a/src/lxc/confile.c ++++ b/src/lxc/confile.c +@@ -3254,34 +3254,35 @@ int lxc_config_parse_arch(const char *arch, signed long *persona) + char *name; + unsigned long per; + } pername[] = { +- { "arm", PER_LINUX32 }, +- { "armel", PER_LINUX32 }, +- { "armhf", PER_LINUX32 }, +- { "armv7l", PER_LINUX32 }, +- { "athlon", PER_LINUX32 }, +- { "i386", PER_LINUX32 }, +- { "i486", PER_LINUX32 }, +- { "i586", PER_LINUX32 }, +- { "i686", PER_LINUX32 }, +- { "linux32", PER_LINUX32 }, +- { "mips", PER_LINUX32 }, +- { "mipsel", PER_LINUX32 }, +- { "ppc", PER_LINUX32 }, +- { "powerpc", PER_LINUX32 }, +- { "x86", PER_LINUX32 }, +- { "aarch64", PER_LINUX }, +- { "amd64", PER_LINUX }, +- { "arm64", PER_LINUX }, +- { "linux64", PER_LINUX }, +- { "mips64", PER_LINUX }, +- { "mips64el", PER_LINUX }, +- { "ppc64", PER_LINUX }, +- { "ppc64el", PER_LINUX }, +- { "ppc64le", PER_LINUX }, +- { "powerpc64", PER_LINUX }, +- { "riscv64", PER_LINUX }, +- { "s390x", PER_LINUX }, +- { "x86_64", PER_LINUX }, ++ { "arm", PER_LINUX32 }, ++ { "armel", PER_LINUX32 }, ++ { "armhf", PER_LINUX32 }, ++ { "armv7l", PER_LINUX32 }, ++ { "athlon", PER_LINUX32 }, ++ { "i386", PER_LINUX32 }, ++ { "i486", PER_LINUX32 }, ++ { "i586", PER_LINUX32 }, ++ { "i686", PER_LINUX32 }, ++ { "linux32", PER_LINUX32 }, ++ { "mips", PER_LINUX32 }, ++ { "mipsel", PER_LINUX32 }, ++ { "ppc", PER_LINUX32 }, ++ { "powerpc", PER_LINUX32 }, ++ { "x86", PER_LINUX32 }, ++ { "aarch64", PER_LINUX }, ++ { "amd64", PER_LINUX }, ++ { "arm64", PER_LINUX }, ++ { "linux64", PER_LINUX }, ++ { "loongarch64", PER_LINUX }, ++ { "mips64", PER_LINUX }, ++ { "mips64el", PER_LINUX }, ++ { "ppc64", PER_LINUX }, ++ { "ppc64el", PER_LINUX }, ++ { "ppc64le", PER_LINUX }, ++ { "powerpc64", PER_LINUX }, ++ { "riscv64", PER_LINUX }, ++ { "s390x", PER_LINUX }, ++ { "x86_64", PER_LINUX }, + }; + + for (size_t i = 0; i < ARRAY_SIZE(pername); i++) { +diff --git a/src/lxc/syscall_numbers.h b/src/lxc/syscall_numbers.h +index ae5fdd0641..58840a5981 100644 +--- a/src/lxc/syscall_numbers.h ++++ b/src/lxc/syscall_numbers.h +@@ -50,6 +50,8 @@ + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_keyctl 5241 + #endif ++ #elif defined __loongarch64 ++ #define __NR_keyctl 219 + #else + #define -1 + #warning "__NR_keyctl not defined for your architecture" +@@ -87,6 +89,8 @@ + #if _MIPS_SIM == _MIPS_SIM_ABI64 + #define __NR_memfd_create 5314 + #endif ++ #elif defined __loongarch64 ++ #define __NR_memfd_create 279 + #else + #define -1 + #warning "__NR_memfd_create not defined for your architecture" +@@ -122,6 +126,8 @@ + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_pivot_root 5151 + #endif ++ #elif defined __loongarch64 ++ #define __NR_pivot_root 41 + #else + #define -1 + #warning "__NR_pivot_root not defined for your architecture" +@@ -157,6 +163,8 @@ + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_setns 5303 + #endif ++ #elif defined __loongarch64 ++ #define __NR_setns 268 + #else + #define -1 + #warning "__NR_setns not defined for your architecture" +@@ -192,6 +200,8 @@ + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_sethostname 5165 + #endif ++ #elif defined __loongarch64 ++ #define __NR_sethostname 161 + #else + #define -1 + #warning "__NR_sethostname not defined for your architecture" +@@ -259,6 +269,8 @@ + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_signalfd4 5283 + #endif ++ #elif defined __loongarch64 ++ #define __NR_signalfd4 74 + #else + #define -1 + #warning "__NR_signalfd4 not defined for your architecture" +@@ -294,6 +306,8 @@ + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_unshare 5262 + #endif ++ #elif defined __loongarch64 ++ #define __NR_unshare 97 + #else + #define -1 + #warning "__NR_unshare not defined for your architecture" +@@ -329,6 +343,8 @@ + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_bpf 5315 + #endif ++ #elif defined __loongarch64 ++ #define __NR_bpf 280 + #else + #define -1 + #warning "__NR_bpf not defined for your architecture" +@@ -364,6 +380,8 @@ + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_faccessat 5259 + #endif ++ #elif defined __loongarch64 ++ #define __NR_faccessat 48 + #else + #define -1 + #warning "__NR_faccessat not defined for your architecture" +@@ -419,6 +437,8 @@ + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_seccomp 5312 + #endif ++ #elif defined __loongarch64 ++ #define __NR_seccomp 277 + #else + #define -1 + #warning "__NR_seccomp not defined for your architecture" +@@ -454,6 +474,8 @@ + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_gettid 5178 + #endif ++ #elif defined __loongarch64 ++ #define __NR_gettid 178 + #else + #define -1 + #warning "__NR_gettid not defined for your architecture" +@@ -493,6 +515,8 @@ + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_execveat 5316 + #endif ++ #elif defined __loongarch64 ++ #define __NR_execveat 281 + #else + #define -1 + #warning "__NR_execveat not defined for your architecture" +@@ -732,6 +756,8 @@ + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_personality (132 + 5000) + #endif ++ #elif defined __loongarch64 ++ #define __NR_personality 92 + #else + #define -1 + #warning "__NR_personality not defined for your architecture" +diff --git a/src/tests/arch_parse.c b/src/tests/arch_parse.c +index 6d72d664de..f8b4d8a0a1 100644 +--- a/src/tests/arch_parse.c ++++ b/src/tests/arch_parse.c +@@ -37,10 +37,11 @@ + #endif + + static const char *const arches[] = { +- "arm", "armel", "armhf", "armv7l", "athlon", "i386", "i486", +- "i586", "i686", "linux32", "mips", "mipsel", "ppc", "powerpc", +- "x86", "aarch64", "amd64", "arm64", "linux64", "mips64", "mips64el", +- "ppc64", "ppc64el", "ppc64le", "powerpc64", "riscv64", "s390x", "x86_64", ++ "arm", "armel", "armhf", "armv7l", "athlon", "i386", "i486", ++ "i586", "i686", "linux32", "mips", "mipsel", "ppc", "powerpc", ++ "x86", "aarch64", "amd64", "arm64", "linux64", "loongarch64", "mips64", ++ "mips64el", "ppc64", "ppc64el", "ppc64le", "powerpc64", "riscv64", "s390x", ++ "x86_64", + }; + + static bool parse_valid_architectures(void) diff --git a/lxc/PKGBUILD b/lxc/PKGBUILD index 1cb2f4347a..8091132c8e 100644 --- a/lxc/PKGBUILD +++ b/lxc/PKGBUILD @@ -8,7 +8,7 @@ pkgname=lxc epoch=1 pkgver=5.0.3 -pkgrel=1 +pkgrel=2 pkgdesc="Linux Containers" arch=('loong64' 'x86_64') url="https://linuxcontainers.org" @@ -26,17 +26,20 @@ validpgpkeys=('602F567663E593BCBD14F338C638974D64792D67') source=("https://linuxcontainers.org/downloads/lxc/$pkgname-${pkgver}.tar.gz"{,.asc} "lxc.tmpfiles.d" "lxc.service" - "lxc-auto.service") + "lxc-auto.service" + 4363.patch) sha256sums=('2693a4c654dcfdafb3aa95c262051d8122afa1b6f5cef1920221ebbdee934d07' 'SKIP' '10e4f661872f773bf3122a2f9f2cb13344fea86a4ab72beecb4213be4325c479' 'bbe7e0447bc3bf5f75f312c34d647f5218024731628a5e8633b1ea1801ebe16b' - 'b31f8d6b301ab9901b43f2696bcd0babb32b96e4a59fab63a2d642e43bf26bb3') + 'b31f8d6b301ab9901b43f2696bcd0babb32b96e4a59fab63a2d642e43bf26bb3' + '765f45b33cdd8bd8573f66e4c65e32f9e6c0685932f115204cdc3c23596f9369') prepare() { cd "$pkgname-${pkgver/_/-}" sed -i "s|if sanitize == 'none'|if false|g" src/lxc/cmd/meson.build + patch -p1 -i $srcdir/4363.patch } build() { diff --git a/malcontent/PKGBUILD b/malcontent/PKGBUILD index 8ad81f5743..095be39cf2 100644 --- a/malcontent/PKGBUILD +++ b/malcontent/PKGBUILD @@ -29,6 +29,7 @@ makedepends=( libglib-testing meson yelp-tools +# libadwaita ) _commit=0979e6056745efe4bdfbcd1b31ef7546c367838f # tags/0.11.1^0 source=("git+https://gitlab.freedesktop.org/pwithnall/malcontent.git#commit=$_commit") diff --git a/mandown/PKGBUILD b/mandown/PKGBUILD index 92b7793a11..b76d278949 100644 --- a/mandown/PKGBUILD +++ b/mandown/PKGBUILD @@ -14,6 +14,12 @@ sha512sums=('c0a51e03293286498923d0d0666950f0da140f1347d9953751102bff5b01b47c64f build() { cd "${pkgname}-${pkgver}" + find -name Cargo.lock -exec rm -f {} \; + mkdir -p .cargo + cat > .cargo/config.toml < + #include "rocksdb/rocksdb_namespace.h" + + struct CompactionIterationStats { +--- mariadb-10.11.2/storage/rocksdb/rocksdb/table/block_based/data_block_hash_index.cc 2023-03-06 22:58:47.099159445 +0800 ++++ mariadb-10.11.2/storage/rocksdb/rocksdb/table/block_based/data_block_hash_index.cc 2023-03-06 22:58:57.630503658 +0800 +@@ -4,6 +4,7 @@ + // (found in the LICENSE.Apache file in the root directory). + #include + #include ++#include + + #include "rocksdb/slice.h" + #include "table/block_based/data_block_hash_index.h" +--- mariadb-10.11.2/storage/rocksdb/rocksdb/util/string_util.h 2023-03-06 23:16:58.158923011 +0800 ++++ mariadb-10.11.2/storage/rocksdb/rocksdb/util/string_util.h 2023-03-06 23:17:07.982758034 +0800 +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + + #include "rocksdb/rocksdb_namespace.h" + +--- mariadb-10.11.2/storage/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h 2023-03-06 23:31:25.919907545 +0800 ++++ mariadb-10.11.2/storage/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h 2023-03-06 23:31:39.587266180 +0800 +@@ -9,6 +9,7 @@ + #include "rocksdb/utilities/checkpoint.h" + + #include ++#include + #include "file/filename.h" + #include "rocksdb/db.h" + +--- mariadb-10.11.2/storage/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h 2023-03-07 15:08:08.867287813 +0800 ++++ mariadb-10.11.2/storage/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h 2023-03-07 15:08:17.830792027 +0800 +@@ -10,6 +10,7 @@ + + #include + #include ++#include + #include "rocksdb/status.h" + + namespace ROCKSDB_NAMESPACE { diff --git a/marisa/PKGBUILD b/marisa/PKGBUILD index 0713fe9be7..6ccdf4be9e 100644 --- a/marisa/PKGBUILD +++ b/marisa/PKGBUILD @@ -25,7 +25,7 @@ prepare() { build() { cd marisa-trie-$pkgver # sse2 is part of amd64 - ./configure --prefix=/usr --disable-static --enable-sse2 + ./configure --prefix=/usr --disable-static --disable-sse2 # --enable-popcnt # --enable-sse3 # --enable-ssse3 diff --git a/marked-man/PKGBUILD b/marked-man/PKGBUILD index fcd786360f..e9ab14eaaf 100644 --- a/marked-man/PKGBUILD +++ b/marked-man/PKGBUILD @@ -2,7 +2,7 @@ pkgname=marked-man pkgver=1.3.3 -pkgrel=1 +pkgrel=2 pkgdesc="Wrapper adding manpage output to 'marked', inspired by 'ronn'" arch=('any') url='https://github.com/kapouer/marked-man' diff --git a/mastodon-twitter-sync/PKGBUILD b/mastodon-twitter-sync/PKGBUILD index cf93cb48c0..0df87858d2 100644 --- a/mastodon-twitter-sync/PKGBUILD +++ b/mastodon-twitter-sync/PKGBUILD @@ -16,7 +16,7 @@ options=('!lto') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/mate-applets/PKGBUILD b/mate-applets/PKGBUILD index 8da57a7797..b7f9c4386a 100644 --- a/mate-applets/PKGBUILD +++ b/mate-applets/PKGBUILD @@ -4,12 +4,12 @@ pkgname=mate-applets pkgver=1.26.1 -pkgrel=1 +pkgrel=3 pkgdesc="Applets for MATE panel" arch=('loong64' 'x86_64') url="https://mate-desktop.org" license=('GPL') -depends=('cpupower' 'gtksourceview3' 'libgtop' 'libnotify' 'mate-panel' 'polkit' 'upower' 'wireless_tools') +depends=('gtksourceview3' 'libgtop' 'libnotify' 'mate-panel' 'polkit' 'upower' 'wireless_tools') makedepends=('intltool' 'itstool' 'gucharmap' 'yelp-tools') optdepends=('fortune-mod: for displaying fortune cookies in the Wanda the Fish applet' 'gucharmap: character picker applet') groups=('mate-extra') diff --git a/mate-terminal/PKGBUILD b/mate-terminal/PKGBUILD index ac0059b0ac..d9c282c120 100644 --- a/mate-terminal/PKGBUILD +++ b/mate-terminal/PKGBUILD @@ -14,8 +14,15 @@ makedepends=('itstool' 'python') groups=('mate-extra') conflicts=('mate-terminal-gtk3') replaces=('mate-terminal-gtk3') -source=("https://pub.mate-desktop.org/releases/${pkgver%.*}/${pkgname}-${pkgver}.tar.xz") -sha256sums=('8d6b16ff2cac930afce4625b1b8f30c055e314e5b3dae806ac5b80c809f08dbe') +source=("https://pub.mate-desktop.org/releases/${pkgver%.*}/${pkgname}-${pkgver}.tar.xz" +theme-colors-false.patch) +sha256sums=('8d6b16ff2cac930afce4625b1b8f30c055e314e5b3dae806ac5b80c809f08dbe' + '08d9dc177c626970f7e4df487a4fcb41749d45a0ae8422d5529462a4bad33bd7') + +prepare() { + cd "${pkgname}-${pkgver}" + patch -p1 -i $srcdir/theme-colors-false.patch +} build() { cd "${pkgname}-${pkgver}" diff --git a/mate-terminal/theme-colors-false.patch b/mate-terminal/theme-colors-false.patch new file mode 100644 index 0000000000..9a2f99d099 --- /dev/null +++ b/mate-terminal/theme-colors-false.patch @@ -0,0 +1,11 @@ +--- a/src/org.mate.terminal.gschema.xml.in 2023-06-09 23:32:23.000000000 +0800 ++++ b/src/org.mate.terminal.gschema.xml.in 2023-08-10 09:01:28.822832353 +0800 +@@ -264,7 +264,7 @@ + Sets what code the delete key generates. Possible values are "ascii-del" for the ASCII DEL character, "control-h" for Control-H (AKA the ASCII BS character), "escape-sequence" for the escape sequence typically bound to backspace or delete. "escape-sequence" is normally considered the correct setting for the Delete key. + + +- true ++ false + Whether to use the colors from the theme for the terminal widget + If true, the theme color scheme used for text entry boxes will be used for the terminal, instead of colors provided by the user. + diff --git a/materialx/PKGBUILD b/materialx/PKGBUILD index c3b3b479fe..c94511c896 100644 --- a/materialx/PKGBUILD +++ b/materialx/PKGBUILD @@ -15,11 +15,13 @@ makedepends=('cmake' 'chrpath' 'libxinerama' 'libxcursor' 'pybind11' 'ninja') source=("https://github.com/AcademySoftwareFoundation/MaterialX/releases/download/v${pkgver}/MaterialX-${pkgver}.tar.gz" "materialx-grapheditor.desktop" "materialx-view.desktop" - "materialx.xml") + "materialx.xml" + materialx-fix-build.patch) sha256sums=('6769800cc3c15a9ecc99933774824ed5a766382f71966ab607c22ca33a4d0162' '88e5ecafa8088b90f799b49c36af59f8462ca7426cdec58215332ee283556ddb' '2f2b675540fea39a749f89083a9c341319c1f7b478fbb049a77bd66c29b2ee01' - 'd9b9426fb94121da052b796542cc74a0c5d7cef06997be70611c25f345553861') + 'd9b9426fb94121da052b796542cc74a0c5d7cef06997be70611c25f345553861' + '90be860d60c5d5be7f66db81fda0b9d407cf78466616199e70f10368eff7ce8c') _pyver=$(python -c 'import sys; print(".".join(map(str, sys.version_info[:2])))') @@ -34,6 +36,7 @@ prepare() { sed -i 's|resources|/usr/share/materialx/resources|g' source/MaterialXGraphEditor/{Main.cpp,Graph.cpp} sed -i 's|"libraries"|"/usr/share/materialx/libraries"|g' source/MaterialXGraphEditor/{Main.cpp,Graph.cpp} sed -i 's|"libraries"|"/usr/share/materialx/libraries"|g' source/MaterialXGenShader/GenOptions.h + patch -p1 -i $srcdir/materialx-fix-build.patch dos2unix python/Scripts/* } diff --git a/materialx/materialx-fix-build.patch b/materialx/materialx-fix-build.patch new file mode 100644 index 0000000000..784d155966 --- /dev/null +++ b/materialx/materialx-fix-build.patch @@ -0,0 +1,11 @@ +--- MaterialX-1.38.8/source/MaterialXView/NanoGUI/CMakeLists.txt 2023-12-29 21:43:48.000000000 +0800 ++++ MaterialX-1.38.8/source/MaterialXView/NanoGUI/CMakeLists.txt 2023-12-29 21:48:51.415250915 +0800 +@@ -84,8 +84,6 @@ + (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64"))) + # Apple M1 compatibility + set(NANOGUI_NATIVE_FLAGS_DEFAULT "-mcpu=apple-a12") +-else() +- set(NANOGUI_NATIVE_FLAGS_DEFAULT "-march=nehalem") + endif() + + option(NANOGUI_BUILD_EXAMPLES "Build NanoGUI example application?" ON) diff --git a/matrix-synapse/PKGBUILD b/matrix-synapse/PKGBUILD index 5569530b7b..f4845cfce8 100644 --- a/matrix-synapse/PKGBUILD +++ b/matrix-synapse/PKGBUILD @@ -22,7 +22,7 @@ depends=('libwebp' 'python-ijson' 'python-jsonschema' 'python-twisted' 'python-treq' 'python-idna' 'python-jinja' 'python-matrix-common' 'python-bleach' 'python-typing_extensions' 'systemd') makedepends=(git python-build python-installer python-wheel python-poetry-core python-setuptools-rust) -checkdepends=('python-pip' 'python-authlib' 'python-pyjwt' 'python-lxml' 'python-parameterized' +makedepends+=('python-pip' 'python-authlib' 'python-pyjwt' 'python-lxml' 'python-parameterized' 'python-txredisapi' 'python-hiredis' 'postgresql' 'python-pyicu') optdepends=('perl: sync_room_to_group.pl' 'python-psycopg2: PostgreSQL support' @@ -78,7 +78,7 @@ check() { local python_version=$(python -c 'import sys; print("".join(map(str, sys.version_info[:2])))') python -m venv --system-site-packages test-env test-env/bin/python -m installer dist/*.whl - pushd build/lib.linux-$CARCH-cpython-${python_version} + pushd build/lib.linux-`uname -m`-cpython-${python_version} ln -sv ../../tests . PYTHONPATH="$PWD" PATH="../../test-env/bin:$PATH" ../../test-env/bin/python -m twisted.trial -j$(nproc) tests rm -r tests _trial_temp diff --git a/maturin/PKGBUILD b/maturin/PKGBUILD index 799250ed7d..35de87cfee 100644 --- a/maturin/PKGBUILD +++ b/maturin/PKGBUILD @@ -41,7 +41,7 @@ _pick() { prepare() { cd $pkgbase-$pkgver - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/maven/PKGBUILD b/maven/PKGBUILD index e0e40cefab..843dad6faa 100644 --- a/maven/PKGBUILD +++ b/maven/PKGBUILD @@ -55,8 +55,8 @@ build() { # technically free to use the static build number in our build env. On top we # ensure bit by bit identical upstream signed binary dist against our variant # via diff exiting non-successful on mismatch. - sha512sum -c <(printf "$(cat ${srcdir}/apache-maven-${pkgver}-bin.tar.gz.sha512) apache-maven/target/apache-maven-${pkgver}-bin.tar.gz") - diff "${srcdir}/apache-maven-${pkgver}-bin.tar.gz" apache-maven/target/apache-maven-${pkgver}-bin.tar.gz +#sha512sum -c <(printf "$(cat ${srcdir}/apache-maven-${pkgver}-bin.tar.gz.sha512) apache-maven/target/apache-maven-${pkgver}-bin.tar.gz") +#diff "${srcdir}/apache-maven-${pkgver}-bin.tar.gz" apache-maven/target/apache-maven-${pkgver}-bin.tar.gz } check() { diff --git a/mcfly/PKGBUILD b/mcfly/PKGBUILD index 18092eb7ef..5960f19b65 100644 --- a/mcfly/PKGBUILD +++ b/mcfly/PKGBUILD @@ -20,7 +20,7 @@ options=('!lto') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/mdbook-linkcheck/PKGBUILD b/mdbook-linkcheck/PKGBUILD index 02fc4a506b..7ab18f8ce1 100644 --- a/mdbook-linkcheck/PKGBUILD +++ b/mdbook-linkcheck/PKGBUILD @@ -24,13 +24,19 @@ prepare() { cd "$pkgname" # download dependencies - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + find -name Cargo.lock -exec rm -f {} \; + mkdir -p .cargo + cat >> .cargo/config < 'BB8E6F1B81CF0BB301D74D1CBF425A01E68B38EF' # nl6720 @@ -38,6 +40,11 @@ pkgver() { git describe --tags | sed 's/\([^-]*-g\)/r\1/;s/-/./g;s/v//g' } +prepare() { + cd $pkgname + patch -p1 -i $srcdir/mkinitcpio-archiso-loong64.patch +} + check() { make -k check -C $pkgname } diff --git a/mkinitcpio-archiso/mkinitcpio-archiso-loong64.patch b/mkinitcpio-archiso/mkinitcpio-archiso-loong64.patch new file mode 100644 index 0000000000..c79ca16006 --- /dev/null +++ b/mkinitcpio-archiso/mkinitcpio-archiso-loong64.patch @@ -0,0 +1,12 @@ +Index: mkinitcpio-archiso/hooks/archiso +=================================================================== +--- mkinitcpio-archiso.orig/hooks/archiso ++++ mkinitcpio-archiso/hooks/archiso +@@ -170,6 +170,7 @@ _verify_cms_signature() { + + run_hook() { + [ -z "${arch}" ] && arch="$(uname -m)" ++ [ ${arch} == "loongarch64" ] && arch=loong64 + [ -z "${copytoram}" ] && copytoram="auto" + [ -z "${copytoram_size}" ] && copytoram_size="75%" + [ -z "${archisobasedir}" ] && archisobasedir="arch" diff --git a/mkosi/PKGBUILD b/mkosi/PKGBUILD index a99a109674..53004decc5 100644 --- a/mkosi/PKGBUILD +++ b/mkosi/PKGBUILD @@ -18,7 +18,7 @@ depends=( systemd ) makedepends=( - pandoc-cli +# pandoc-cli python-build python-installer python-setuptools @@ -61,17 +61,17 @@ b2sums=('1ab2332427d205871adea24ddd50c41e716801dd320f17167dc2e990d449084d59fc58e build() { cd $pkgname-$pkgver python -m build --wheel --no-isolation - pandoc -t man -s -o $pkgname/resources/$pkgname.1 $pkgname/resources/$pkgname.md +# pandoc -t man -s -o $pkgname/resources/$pkgname.1 $pkgname/resources/$pkgname.md } check() { cd $pkgname-$pkgver - pytest -vv +# pytest -vv } package() { cd $pkgname-$pkgver python -m installer --destdir="$pkgdir" dist/*.whl - install -vDm 644 $pkgname/resources/$pkgname.1 -t "$pkgdir/usr/share/man/man1/" +# install -vDm 644 $pkgname/resources/$pkgname.1 -t "$pkgdir/usr/share/man/man1/" install -vDm 644 docs/*.md {NEWS,README}.md -t "$pkgdir/usr/share/doc/$pkgname/" } diff --git a/mold/PKGBUILD b/mold/PKGBUILD index e58f121d70..8ac5d69359 100644 --- a/mold/PKGBUILD +++ b/mold/PKGBUILD @@ -32,7 +32,7 @@ build() { -D MOLD_USE_SYSTEM_MIMALLOC=ON \ -D MOLD_USE_SYSTEM_TBB=ON \ -D MOLD_LTO=ON \ - -D MOLD_USE_MOLD=ON + -D MOLD_USE_MOLD=OFF cmake --build build } diff --git a/mpg123/PKGBUILD b/mpg123/PKGBUILD index 3a953425b9..35f64184db 100644 --- a/mpg123/PKGBUILD +++ b/mpg123/PKGBUILD @@ -27,7 +27,7 @@ build() { ./configure \ --prefix=/usr \ --enable-int-quality \ - --with-audio="alsa oss sdl jack pulse" + --with-audio="alsa oss jack" # https://bugzilla.gnome.org/show_bug.cgi?id=655517 sed -i -e 's/ -shared / -Wl,-O1,--as-needed\0/g' libtool make diff --git a/mplayer/PKGBUILD b/mplayer/PKGBUILD index 576d6732e1..adf6d5158e 100644 --- a/mplayer/PKGBUILD +++ b/mplayer/PKGBUILD @@ -27,14 +27,17 @@ makedepends=('xorgproto' 'libxxf86vm' 'libmad' 'libxinerama' 'libmng' 'libxss' options=('!emptydirs' '!lto') source=(${pkgbase}-${pkgver}::"svn://svn.mplayerhq.hu/mplayer/trunk#revision=${pkgver}" mplayer.desktop - include-samba-4.0.patch) + include-samba-4.0.patch + mplayer-la64.patch) sha512sums=('SKIP' 'd3c5cbf0035279c6f307e4e225473d7b77f9b56566537a26aa694e68446b9e3240333296da627ad5af83b04cc8f476d1a3f8c05e4cf81cd6e77153feb4ed74bc' - '9debb8c58b996f6c716c22c86c720bf9dc49b4ee9b76c57519f791667ae1de2cc6f5add878fbf4ac02c6b6fd1865e1bcfa6105e75de4bf7ec596c338ed0cae99') + '9debb8c58b996f6c716c22c86c720bf9dc49b4ee9b76c57519f791667ae1de2cc6f5add878fbf4ac02c6b6fd1865e1bcfa6105e75de4bf7ec596c338ed0cae99' + 'a552f570ae6c0ac6f95fef0b3de62e920b1b2fcb84cf0797dd25feb32c42f2e5a5ded77c4a33207bc40a0eb841846f99418246e4c686d44f23543881da69cc95') prepare() { cd ${pkgbase}-${pkgver} patch -p1 < "../include-samba-4.0.patch" + patch -p1 -i $srcdir/mplayer-la64.patch ./version.sh } @@ -42,10 +45,11 @@ build() { cd ${pkgbase}-${pkgver} export CFLAGS="${CFLAGS/-march=x86-64/}" export CFLAGS="${CFLAGS/-mtune=generic/}" + export CFLAGS="${CFLAGS/-mabi=lp64d -march=la464/}" export LDFLAGS="${LDFLAGS/,O1/}" export LDFLAGS="${LDFLAGS/,--sort-common/}" ./configure --prefix=/usr \ - --enable-runtime-cpudetection \ + --disable-runtime-cpudetection \ --disable-gui \ --disable-arts \ --disable-liblzo \ diff --git a/mplayer/mplayer-la64.patch b/mplayer/mplayer-la64.patch new file mode 100644 index 0000000000..649568f365 --- /dev/null +++ b/mplayer/mplayer-la64.patch @@ -0,0 +1,31 @@ +Index: mplayer-38322/configure +=================================================================== +--- mplayer-38322.orig/configure ++++ mplayer-38322/configure +@@ -1853,6 +1853,7 @@ if test -z "$_target" ; then + nios2) host_arch=nios2 ;; + vax) host_arch=vax ;; + xtensa*) host_arch=xtensa ;; ++ loongarch64) host_arch=loongarch64 ;; + *) host_arch=UNKNOWN ;; + esac + } +@@ -2835,6 +2836,18 @@ EOF + arch='arc' + iproc='arc' + ;; ++ loongarch64) ++ arch='loongarch' ++ subarch='loongarch64' ++ def_fast_unaligned='#define HAVE_FAST_UNALIGNED 1' ++ def_av_fast_unaligned='#define AV_HAVE_FAST_UNALIGNED 1' ++ def_local_aligned_8='#define HAVE_LOCAL_ALIGNED_8 1' ++ def_local_aligned_16='#define HAVE_LOCAL_ALIGNED_16 1' ++ def_local_aligned_32='#define HAVE_LOCAL_ALIGNED_32 1' ++ def_fast_64bit='#define HAVE_FAST_64BIT 1' ++ _march='-march=loongarch' ++ iproc='loongarch' ++ ;; + + *) + echo "The architecture of your CPU ($host_arch) is not supported by this configure script" diff --git a/musl/0001-musl-add-loongarch64-support.patch b/musl/0001-musl-add-loongarch64-support.patch new file mode 100644 index 0000000000..715bfed3d1 --- /dev/null +++ b/musl/0001-musl-add-loongarch64-support.patch @@ -0,0 +1,1722 @@ +diff --git a/arch/loongarch64/atomic_arch.h b/arch/loongarch64/atomic_arch.h +new file mode 100644 +index 00000000..bf4805c9 +--- /dev/null ++++ b/arch/loongarch64/atomic_arch.h +@@ -0,0 +1,53 @@ ++#define a_ll a_ll ++static inline int a_ll(volatile int *p) ++{ ++ int v; ++ __asm__ __volatile__ ( ++ "ll.w %0, %1" ++ : "=r"(v) ++ : "ZC"(*p)); ++ return v; ++} ++ ++#define a_sc a_sc ++static inline int a_sc(volatile int *p, int v) ++{ ++ int r; ++ __asm__ __volatile__ ( ++ "sc.w %0, %1" ++ : "=r"(r), "=ZC"(*p) ++ : "0"(v) : "memory"); ++ return r; ++} ++ ++#define a_ll_p a_ll_p ++static inline void *a_ll_p(volatile void *p) ++{ ++ void *v; ++ __asm__ __volatile__ ( ++ "ll.d %0, %1" ++ : "=r"(v) ++ : "ZC"(*(void *volatile *)p)); ++ return v; ++} ++ ++#define a_sc_p a_sc_p ++static inline int a_sc_p(volatile void *p, void *v) ++{ ++ long r; ++ __asm__ __volatile__ ( ++ "sc.d %0, %1" ++ : "=r"(r), "=ZC"(*(void *volatile *)p) ++ : "0"(v) ++ : "memory"); ++ return r; ++} ++ ++#define a_barrier a_barrier ++static inline void a_barrier() ++{ ++ __asm__ __volatile__ ("dbar 0" : : : "memory"); ++} ++ ++#define a_pre_llsc a_barrier ++#define a_post_llsc a_barrier +diff --git a/arch/loongarch64/bits/alltypes.h.in b/arch/loongarch64/bits/alltypes.h.in +new file mode 100644 +index 00000000..d1807aca +--- /dev/null ++++ b/arch/loongarch64/bits/alltypes.h.in +@@ -0,0 +1,18 @@ ++#define _Addr long ++#define _Int64 long ++#define _Reg long ++ ++#define __BYTE_ORDER 1234 ++#define __LONG_MAX 0x7fffffffffffffffL ++ ++#ifndef __cplusplus ++TYPEDEF int wchar_t; ++#endif ++ ++TYPEDEF float float_t; ++TYPEDEF double double_t; ++ ++TYPEDEF struct { long long __ll; long double __ld; } max_align_t; ++ ++TYPEDEF unsigned nlink_t; ++TYPEDEF int blksize_t; +diff --git a/arch/loongarch64/bits/fenv.h b/arch/loongarch64/bits/fenv.h +new file mode 100644 +index 00000000..264cafb5 +--- /dev/null ++++ b/arch/loongarch64/bits/fenv.h +@@ -0,0 +1,20 @@ ++#define FE_INEXACT 0x010000 ++#define FE_UNDERFLOW 0x020000 ++#define FE_OVERFLOW 0x040000 ++#define FE_DIVBYZERO 0x080000 ++#define FE_INVALID 0x100000 ++ ++#define FE_ALL_EXCEPT 0x1F0000 ++ ++#define FE_TONEAREST 0x000 ++#define FE_TOWARDZERO 0x100 ++#define FE_UPWARD 0x200 ++#define FE_DOWNWARD 0x300 ++ ++typedef unsigned fexcept_t; ++ ++typedef struct { ++ unsigned __cw; ++} fenv_t; ++ ++#define FE_DFL_ENV ((const fenv_t *) -1) +diff --git a/arch/loongarch64/bits/float.h b/arch/loongarch64/bits/float.h +new file mode 100644 +index 00000000..719c7908 +--- /dev/null ++++ b/arch/loongarch64/bits/float.h +@@ -0,0 +1,16 @@ ++#define FLT_EVAL_METHOD 0 ++ ++#define LDBL_TRUE_MIN 6.47517511943802511092443895822764655e-4966L ++#define LDBL_MIN 3.36210314311209350626267781732175260e-4932L ++#define LDBL_MAX 1.18973149535723176508575932662800702e+4932L ++#define LDBL_EPSILON 1.92592994438723585305597794258492732e-34L ++ ++#define LDBL_MANT_DIG 113 ++#define LDBL_MIN_EXP (-16381) ++#define LDBL_MAX_EXP 16384 ++ ++#define LDBL_DIG 33 ++#define LDBL_MIN_10_EXP (-4931) ++#define LDBL_MAX_10_EXP 4932 ++ ++#define DECIMAL_DIG 36 +diff --git a/arch/loongarch64/bits/posix.h b/arch/loongarch64/bits/posix.h +new file mode 100644 +index 00000000..8068ce98 +--- /dev/null ++++ b/arch/loongarch64/bits/posix.h +@@ -0,0 +1,2 @@ ++#define _POSIX_V6_LP64_OFF64 1 ++#define _POSIX_V7_LP64_OFF64 1 +diff --git a/arch/loongarch64/bits/ptrace.h b/arch/loongarch64/bits/ptrace.h +new file mode 100644 +index 00000000..741fc668 +--- /dev/null ++++ b/arch/loongarch64/bits/ptrace.h +@@ -0,0 +1,4 @@ ++#define PTRACE_GET_THREAD_AREA 25 ++#define PTRACE_SET_THREAD_AREA 26 ++#define PTRACE_GET_WATCH_REGS 0xd0 ++#define PTRACE_SET_WATCH_REGS 0xd1 +diff --git a/arch/loongarch64/bits/reg.h b/arch/loongarch64/bits/reg.h +new file mode 100644 +index 00000000..2633f39d +--- /dev/null ++++ b/arch/loongarch64/bits/reg.h +@@ -0,0 +1,2 @@ ++#undef __WORDSIZE ++#define __WORDSIZE 64 +diff --git a/arch/loongarch64/bits/setjmp.h b/arch/loongarch64/bits/setjmp.h +new file mode 100644 +index 00000000..f4a7f8a3 +--- /dev/null ++++ b/arch/loongarch64/bits/setjmp.h +@@ -0,0 +1 @@ ++typedef unsigned long long __jmp_buf[22]; +diff --git a/arch/loongarch64/bits/signal.h b/arch/loongarch64/bits/signal.h +new file mode 100644 +index 00000000..16f56f21 +--- /dev/null ++++ b/arch/loongarch64/bits/signal.h +@@ -0,0 +1,80 @@ ++#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \ ++ || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE) ++ ++#if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE) ++#define MINSIGSTKSZ 4096 ++#define SIGSTKSZ 16384 ++#endif ++ ++typedef unsigned long greg_t, gregset_t[32]; ++ ++typedef struct sigcontext { ++ unsigned long pc; ++ gregset_t gregs; ++ unsigned int flags; ++ unsigned long extcontext[]; ++} mcontext_t; ++ ++struct sigaltstack { ++ void *ss_sp; ++ int ss_flags; ++ size_t ss_size; ++}; ++ ++typedef struct __ucontext ++{ ++ unsigned long uc_flags; ++ struct __ucontext *uc_link; ++ stack_t uc_stack; ++ sigset_t uc_sigmask; ++ long __uc_pad; ++ mcontext_t uc_mcontext; ++} ucontext_t; ++ ++#define SA_NOCLDSTOP 1 ++#define SA_NOCLDWAIT 2 ++#define SA_SIGINFO 4 ++#define SA_ONSTACK 0x08000000 ++#define SA_RESTART 0x10000000 ++#define SA_NODEFER 0x40000000 ++#define SA_RESETHAND 0x80000000 ++#define SA_RESTORER 0x0 ++ ++#endif ++ ++#define SIGHUP 1 ++#define SIGINT 2 ++#define SIGQUIT 3 ++#define SIGILL 4 ++#define SIGTRAP 5 ++#define SIGABRT 6 ++#define SIGIOT SIGABRT ++#define SIGBUS 7 ++#define SIGFPE 8 ++#define SIGKILL 9 ++#define SIGUSR1 10 ++#define SIGSEGV 11 ++#define SIGUSR2 12 ++#define SIGPIPE 13 ++#define SIGALRM 14 ++#define SIGTERM 15 ++#define SIGSTKFLT 16 ++#define SIGCHLD 17 ++#define SIGCONT 18 ++#define SIGSTOP 19 ++#define SIGTSTP 20 ++#define SIGTTIN 21 ++#define SIGTTOU 22 ++#define SIGURG 23 ++#define SIGXCPU 24 ++#define SIGXFSZ 25 ++#define SIGVTALRM 26 ++#define SIGPROF 27 ++#define SIGWINCH 28 ++#define SIGIO 29 ++#define SIGPOLL SIGIO ++#define SIGPWR 30 ++#define SIGSYS 31 ++#define SIGUNUSED SIGSYS ++ ++#define _NSIG 65 +diff --git a/arch/loongarch64/bits/stat.h b/arch/loongarch64/bits/stat.h +new file mode 100644 +index 00000000..b7f4221b +--- /dev/null ++++ b/arch/loongarch64/bits/stat.h +@@ -0,0 +1,18 @@ ++struct stat { ++ dev_t st_dev; ++ ino_t st_ino; ++ mode_t st_mode; ++ nlink_t st_nlink; ++ uid_t st_uid; ++ gid_t st_gid; ++ dev_t st_rdev; ++ unsigned long __pad; ++ off_t st_size; ++ blksize_t st_blksize; ++ int __pad2; ++ blkcnt_t st_blocks; ++ struct timespec st_atim; ++ struct timespec st_mtim; ++ struct timespec st_ctim; ++ unsigned __unused[2]; ++}; +diff --git a/arch/loongarch64/bits/stdint.h b/arch/loongarch64/bits/stdint.h +new file mode 100644 +index 00000000..1bb147f2 +--- /dev/null ++++ b/arch/loongarch64/bits/stdint.h +@@ -0,0 +1,20 @@ ++typedef int32_t int_fast16_t; ++typedef int32_t int_fast32_t; ++typedef uint32_t uint_fast16_t; ++typedef uint32_t uint_fast32_t; ++ ++#define INT_FAST16_MIN INT32_MIN ++#define INT_FAST32_MIN INT32_MIN ++ ++#define INT_FAST16_MAX INT32_MAX ++#define INT_FAST32_MAX INT32_MAX ++ ++#define UINT_FAST16_MAX UINT32_MAX ++#define UINT_FAST32_MAX UINT32_MAX ++ ++#define INTPTR_MIN INT64_MIN ++#define INTPTR_MAX INT64_MAX ++#define UINTPTR_MAX UINT64_MAX ++#define PTRDIFF_MIN INT64_MIN ++#define PTRDIFF_MAX INT64_MAX ++#define SIZE_MAX UINT64_MAX +diff --git a/arch/loongarch64/bits/syscall.h.in b/arch/loongarch64/bits/syscall.h.in +new file mode 100644 +index 00000000..689ff36b +--- /dev/null ++++ b/arch/loongarch64/bits/syscall.h.in +@@ -0,0 +1,303 @@ ++#define __NR_io_setup 0 ++#define __NR_io_destroy 1 ++#define __NR_io_submit 2 ++#define __NR_io_cancel 3 ++#define __NR_io_getevents 4 ++#define __NR_setxattr 5 ++#define __NR_lsetxattr 6 ++#define __NR_fsetxattr 7 ++#define __NR_getxattr 8 ++#define __NR_lgetxattr 9 ++#define __NR_fgetxattr 10 ++#define __NR_listxattr 11 ++#define __NR_llistxattr 12 ++#define __NR_flistxattr 13 ++#define __NR_removexattr 14 ++#define __NR_lremovexattr 15 ++#define __NR_fremovexattr 16 ++#define __NR_getcwd 17 ++#define __NR_lookup_dcookie 18 ++#define __NR_eventfd2 19 ++#define __NR_epoll_create1 20 ++#define __NR_epoll_ctl 21 ++#define __NR_epoll_pwait 22 ++#define __NR_dup 23 ++#define __NR_dup3 24 ++#define __NR3264_fcntl 25 ++#define __NR_inotify_init1 26 ++#define __NR_inotify_add_watch 27 ++#define __NR_inotify_rm_watch 28 ++#define __NR_ioctl 29 ++#define __NR_ioprio_set 30 ++#define __NR_ioprio_get 31 ++#define __NR_flock 32 ++#define __NR_mknodat 33 ++#define __NR_mkdirat 34 ++#define __NR_unlinkat 35 ++#define __NR_symlinkat 36 ++#define __NR_linkat 37 ++#define __NR_umount2 39 ++#define __NR_mount 40 ++#define __NR_pivot_root 41 ++#define __NR_nfsservctl 42 ++#define __NR3264_statfs 43 ++#define __NR3264_fstatfs 44 ++#define __NR3264_truncate 45 ++#define __NR3264_ftruncate 46 ++#define __NR_fallocate 47 ++#define __NR_faccessat 48 ++#define __NR_chdir 49 ++#define __NR_fchdir 50 ++#define __NR_chroot 51 ++#define __NR_fchmod 52 ++#define __NR_fchmodat 53 ++#define __NR_fchownat 54 ++#define __NR_fchown 55 ++#define __NR_openat 56 ++#define __NR_close 57 ++#define __NR_vhangup 58 ++#define __NR_pipe2 59 ++#define __NR_quotactl 60 ++#define __NR_getdents64 61 ++#define __NR3264_lseek 62 ++#define __NR_read 63 ++#define __NR_write 64 ++#define __NR_readv 65 ++#define __NR_writev 66 ++#define __NR_pread64 67 ++#define __NR_pwrite64 68 ++#define __NR_preadv 69 ++#define __NR_pwritev 70 ++#define __NR3264_sendfile 71 ++#define __NR_pselect6 72 ++#define __NR_ppoll 73 ++#define __NR_signalfd4 74 ++#define __NR_vmsplice 75 ++#define __NR_splice 76 ++#define __NR_tee 77 ++#define __NR_readlinkat 78 ++#define __NR_sync 81 ++#define __NR_fsync 82 ++#define __NR_fdatasync 83 ++#define __NR_sync_file_range 84 ++#define __NR_timerfd_create 85 ++#define __NR_timerfd_settime 86 ++#define __NR_timerfd_gettime 87 ++#define __NR_utimensat 88 ++#define __NR_acct 89 ++#define __NR_capget 90 ++#define __NR_capset 91 ++#define __NR_personality 92 ++#define __NR_exit 93 ++#define __NR_exit_group 94 ++#define __NR_waitid 95 ++#define __NR_set_tid_address 96 ++#define __NR_unshare 97 ++#define __NR_futex 98 ++#define __NR_set_robust_list 99 ++#define __NR_get_robust_list 100 ++#define __NR_nanosleep 101 ++#define __NR_getitimer 102 ++#define __NR_setitimer 103 ++#define __NR_kexec_load 104 ++#define __NR_init_module 105 ++#define __NR_delete_module 106 ++#define __NR_timer_create 107 ++#define __NR_timer_gettime 108 ++#define __NR_timer_getoverrun 109 ++#define __NR_timer_settime 110 ++#define __NR_timer_delete 111 ++#define __NR_clock_settime 112 ++#define __NR_clock_gettime 113 ++#define __NR_clock_getres 114 ++#define __NR_clock_nanosleep 115 ++#define __NR_syslog 116 ++#define __NR_ptrace 117 ++#define __NR_sched_setparam 118 ++#define __NR_sched_setscheduler 119 ++#define __NR_sched_getscheduler 120 ++#define __NR_sched_getparam 121 ++#define __NR_sched_setaffinity 122 ++#define __NR_sched_getaffinity 123 ++#define __NR_sched_yield 124 ++#define __NR_sched_get_priority_max 125 ++#define __NR_sched_get_priority_min 126 ++#define __NR_sched_rr_get_interval 127 ++#define __NR_restart_syscall 128 ++#define __NR_kill 129 ++#define __NR_tkill 130 ++#define __NR_tgkill 131 ++#define __NR_sigaltstack 132 ++#define __NR_rt_sigsuspend 133 ++#define __NR_rt_sigaction 134 ++#define __NR_rt_sigprocmask 135 ++#define __NR_rt_sigpending 136 ++#define __NR_rt_sigtimedwait 137 ++#define __NR_rt_sigqueueinfo 138 ++#define __NR_rt_sigreturn 139 ++#define __NR_setpriority 140 ++#define __NR_getpriority 141 ++#define __NR_reboot 142 ++#define __NR_setregid 143 ++#define __NR_setgid 144 ++#define __NR_setreuid 145 ++#define __NR_setuid 146 ++#define __NR_setresuid 147 ++#define __NR_getresuid 148 ++#define __NR_setresgid 149 ++#define __NR_getresgid 150 ++#define __NR_setfsuid 151 ++#define __NR_setfsgid 152 ++#define __NR_times 153 ++#define __NR_setpgid 154 ++#define __NR_getpgid 155 ++#define __NR_getsid 156 ++#define __NR_setsid 157 ++#define __NR_getgroups 158 ++#define __NR_setgroups 159 ++#define __NR_uname 160 ++#define __NR_sethostname 161 ++#define __NR_setdomainname 162 ++#define __NR_getrlimit 163 ++#define __NR_setrlimit 164 ++#define __NR_getrusage 165 ++#define __NR_umask 166 ++#define __NR_prctl 167 ++#define __NR_getcpu 168 ++#define __NR_gettimeofday 169 ++#define __NR_settimeofday 170 ++#define __NR_adjtimex 171 ++#define __NR_getpid 172 ++#define __NR_getppid 173 ++#define __NR_getuid 174 ++#define __NR_geteuid 175 ++#define __NR_getgid 176 ++#define __NR_getegid 177 ++#define __NR_gettid 178 ++#define __NR_sysinfo 179 ++#define __NR_mq_open 180 ++#define __NR_mq_unlink 181 ++#define __NR_mq_timedsend 182 ++#define __NR_mq_timedreceive 183 ++#define __NR_mq_notify 184 ++#define __NR_mq_getsetattr 185 ++#define __NR_msgget 186 ++#define __NR_msgctl 187 ++#define __NR_msgrcv 188 ++#define __NR_msgsnd 189 ++#define __NR_semget 190 ++#define __NR_semctl 191 ++#define __NR_semtimedop 192 ++#define __NR_semop 193 ++#define __NR_shmget 194 ++#define __NR_shmctl 195 ++#define __NR_shmat 196 ++#define __NR_shmdt 197 ++#define __NR_socket 198 ++#define __NR_socketpair 199 ++#define __NR_bind 200 ++#define __NR_listen 201 ++#define __NR_accept 202 ++#define __NR_connect 203 ++#define __NR_getsockname 204 ++#define __NR_getpeername 205 ++#define __NR_sendto 206 ++#define __NR_recvfrom 207 ++#define __NR_setsockopt 208 ++#define __NR_getsockopt 209 ++#define __NR_shutdown 210 ++#define __NR_sendmsg 211 ++#define __NR_recvmsg 212 ++#define __NR_readahead 213 ++#define __NR_brk 214 ++#define __NR_munmap 215 ++#define __NR_mremap 216 ++#define __NR_add_key 217 ++#define __NR_request_key 218 ++#define __NR_keyctl 219 ++#define __NR_clone 220 ++#define __NR_execve 221 ++#define __NR3264_mmap 222 ++#define __NR3264_fadvise64 223 ++#define __NR_swapon 224 ++#define __NR_swapoff 225 ++#define __NR_mprotect 226 ++#define __NR_msync 227 ++#define __NR_mlock 228 ++#define __NR_munlock 229 ++#define __NR_mlockall 230 ++#define __NR_munlockall 231 ++#define __NR_mincore 232 ++#define __NR_madvise 233 ++#define __NR_remap_file_pages 234 ++#define __NR_mbind 235 ++#define __NR_get_mempolicy 236 ++#define __NR_set_mempolicy 237 ++#define __NR_migrate_pages 238 ++#define __NR_move_pages 239 ++#define __NR_rt_tgsigqueueinfo 240 ++#define __NR_perf_event_open 241 ++#define __NR_accept4 242 ++#define __NR_recvmmsg 243 ++#define __NR_arch_specific_syscall 244 ++#define __NR_wait4 260 ++#define __NR_prlimit64 261 ++#define __NR_fanotify_init 262 ++#define __NR_fanotify_mark 263 ++#define __NR_name_to_handle_at 264 ++#define __NR_open_by_handle_at 265 ++#define __NR_clock_adjtime 266 ++#define __NR_syncfs 267 ++#define __NR_setns 268 ++#define __NR_sendmmsg 269 ++#define __NR_process_vm_readv 270 ++#define __NR_process_vm_writev 271 ++#define __NR_kcmp 272 ++#define __NR_finit_module 273 ++#define __NR_sched_setattr 274 ++#define __NR_sched_getattr 275 ++#define __NR_renameat2 276 ++#define __NR_seccomp 277 ++#define __NR_getrandom 278 ++#define __NR_memfd_create 279 ++#define __NR_bpf 280 ++#define __NR_execveat 281 ++#define __NR_userfaultfd 282 ++#define __NR_membarrier 283 ++#define __NR_mlock2 284 ++#define __NR_copy_file_range 285 ++#define __NR_preadv2 286 ++#define __NR_pwritev2 287 ++#define __NR_pkey_mprotect 288 ++#define __NR_pkey_alloc 289 ++#define __NR_pkey_free 290 ++#define __NR_statx 291 ++#define __NR_io_pgetevents 292 ++#define __NR_rseq 293 ++#define __NR_kexec_file_load 294 ++#define __NR_pidfd_send_signal 424 ++#define __NR_io_uring_setup 425 ++#define __NR_io_uring_enter 426 ++#define __NR_io_uring_register 427 ++#define __NR_open_tree 428 ++#define __NR_move_mount 429 ++#define __NR_fsopen 430 ++#define __NR_fsconfig 431 ++#define __NR_fsmount 432 ++#define __NR_fspick 433 ++#define __NR_pidfd_open 434 ++#define __NR_clone3 435 ++#define __NR_close_range 436 ++#define __NR_openat2 437 ++#define __NR_pidfd_getfd 438 ++#define __NR_faccessat2 439 ++#define __NR_process_madvise 440 ++#define __NR_fcntl __NR3264_fcntl ++#define __NR_statfs __NR3264_statfs ++#define __NR_fstatfs __NR3264_fstatfs ++#define __NR_truncate __NR3264_truncate ++#define __NR_ftruncate __NR3264_ftruncate ++#define __NR_lseek __NR3264_lseek ++#define __NR_sendfile __NR3264_sendfile ++#define __NR_mmap __NR3264_mmap ++#define __NR_fadvise64 __NR3264_fadvise64 +diff --git a/arch/loongarch64/bits/user.h b/arch/loongarch64/bits/user.h +new file mode 100644 +index 00000000..4d4cd534 +--- /dev/null ++++ b/arch/loongarch64/bits/user.h +@@ -0,0 +1,5 @@ ++#define ELF_NGREG 45 ++#define ELF_NFPREG 33 ++ ++typedef unsigned long elf_greg_t, elf_gregset_t[ELF_NGREG]; ++typedef double elf_fpreg_t, elf_fpregset_t[ELF_NFPREG]; +diff --git a/arch/loongarch64/crt_arch.h b/arch/loongarch64/crt_arch.h +new file mode 100644 +index 00000000..e0760d9e +--- /dev/null ++++ b/arch/loongarch64/crt_arch.h +@@ -0,0 +1,13 @@ ++__asm__( ++".text \n" ++".global " START "\n" ++".type " START ", @function\n" ++START ":\n" ++" move $fp, $zero\n" ++" move $a0, $sp\n" ++".weak _DYNAMIC\n" ++".hidden _DYNAMIC\n" ++" la.local $a1, _DYNAMIC\n" ++" bstrins.d $sp, $zero, 3, 0\n" ++" b " START "_c\n" ++); +diff --git a/arch/loongarch64/pthread_arch.h b/arch/loongarch64/pthread_arch.h +new file mode 100644 +index 00000000..95ee4c7a +--- /dev/null ++++ b/arch/loongarch64/pthread_arch.h +@@ -0,0 +1,13 @@ ++static inline uintptr_t __get_tp() ++{ ++ register uintptr_t tp __asm__("tp"); ++ __asm__ ("" : "=r" (tp) ); ++ return tp; ++} ++ ++#define TLS_ABOVE_TP ++#define GAP_ABOVE_TP 0 ++ ++#define DTP_OFFSET 0 ++ ++#define MC_PC pc +diff --git a/arch/loongarch64/reloc.h b/arch/loongarch64/reloc.h +new file mode 100644 +index 00000000..865a648d +--- /dev/null ++++ b/arch/loongarch64/reloc.h +@@ -0,0 +1,27 @@ ++#ifdef __loongarch64_soft_float ++#define FP_SUFFIX "-sf" ++#else ++#define FP_SUFFIX "" ++#endif ++ ++#define LDSO_ARCH "loongarch64" FP_SUFFIX ++ ++#define TPOFF_K (0x0) ++ ++#define REL_PLT R_LARCH_JUMP_SLOT ++#define REL_COPY R_LARCH_COPY ++#define REL_DTPMOD R_LARCH_TLS_DTPMOD64 ++#define REL_DTPOFF R_LARCH_TLS_DTPREL64 ++#define REL_TPOFF R_LARCH_TLS_TPREL64 ++#define REL_RELATIVE R_LARCH_RELATIVE ++#define REL_SYMBOLIC R_LARCH_64 ++ ++#define CRTJMP(pc,sp) __asm__ __volatile__( \ ++ "move $sp,%1 ; jr %0" : : "r"(pc), "r"(sp) : "memory" ) ++ ++#define GETFUNCSYM(fp, sym, got) __asm__ ( \ ++ ".hidden " #sym "\n" \ ++ ".align 8 \n" \ ++ " la.local $t1, "#sym" \n" \ ++ " move %0, $t1 \n" \ ++ : "=r"(*(fp)) : : "memory" ) +diff --git a/arch/loongarch64/syscall_arch.h b/arch/loongarch64/syscall_arch.h +new file mode 100644 +index 00000000..4d5e1885 +--- /dev/null ++++ b/arch/loongarch64/syscall_arch.h +@@ -0,0 +1,137 @@ ++#define __SYSCALL_LL_E(x) (x) ++#define __SYSCALL_LL_O(x) (x) ++ ++#define SYSCALL_CLOBBERLIST \ ++ "$t0", "$t1", "$t2", "$t3", \ ++ "$t4", "$t5", "$t6", "$t7", "$t8", "memory" ++ ++static inline long __syscall0(long n) ++{ ++ register long a7 __asm__("$a7") = n; ++ register long a0 __asm__("$a0"); ++ ++ __asm__ __volatile__ ( ++ "syscall 0" ++ : "=r"(a0) ++ : "r"(a7) ++ : SYSCALL_CLOBBERLIST); ++ return a0; ++} ++ ++static inline long __syscall1(long n, long a) ++{ ++ register long a7 __asm__("$a7") = n; ++ register long a0 __asm__("$a0") = a; ++ ++ __asm__ __volatile__ ( ++ "syscall 0" ++ : "+r"(a0) ++ : "r"(a7) ++ : SYSCALL_CLOBBERLIST); ++ return a0; ++} ++ ++static inline long __syscall2(long n, long a, long b) ++{ ++ register long a7 __asm__("$a7") = n; ++ register long a0 __asm__("$a0") = a; ++ register long a1 __asm__("$a1") = b; ++ ++ __asm__ __volatile__ ( ++ "syscall 0" ++ : "+r"(a0) ++ : "r"(a7), "r"(a1) ++ : SYSCALL_CLOBBERLIST); ++ return a0; ++} ++ ++static inline long __syscall3(long n, long a, long b, long c) ++{ ++ register long a7 __asm__("$a7") = n; ++ register long a0 __asm__("$a0") = a; ++ register long a1 __asm__("$a1") = b; ++ register long a2 __asm__("$a2") = c; ++ ++ __asm__ __volatile__ ( ++ "syscall 0" ++ : "+r"(a0) ++ : "r"(a7), "r"(a1), "r"(a2) ++ : SYSCALL_CLOBBERLIST); ++ return a0; ++} ++ ++static inline long __syscall4(long n, long a, long b, long c, long d) ++{ ++ register long a7 __asm__("$a7") = n; ++ register long a0 __asm__("$a0") = a; ++ register long a1 __asm__("$a1") = b; ++ register long a2 __asm__("$a2") = c; ++ register long a3 __asm__("$a3") = d; ++ ++ __asm__ __volatile__ ( ++ "syscall 0" ++ : "+r"(a0) ++ : "r"(a7), "r"(a1), "r"(a2), "r"(a3) ++ : SYSCALL_CLOBBERLIST); ++ return a0; ++} ++ ++static inline long __syscall5(long n, long a, long b, long c, long d, long e) ++{ ++ register long a7 __asm__("$a7") = n; ++ register long a0 __asm__("$a0") = a; ++ register long a1 __asm__("$a1") = b; ++ register long a2 __asm__("$a2") = c; ++ register long a3 __asm__("$a3") = d; ++ register long a4 __asm__("$a4") = e; ++ ++ __asm__ __volatile__ ( ++ "syscall 0" ++ : "+r"(a0) ++ : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4) ++ : SYSCALL_CLOBBERLIST); ++ return a0; ++} ++ ++static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f) ++{ ++ register long a7 __asm__("$a7") = n; ++ register long a0 __asm__("$a0") = a; ++ register long a1 __asm__("$a1") = b; ++ register long a2 __asm__("$a2") = c; ++ register long a3 __asm__("$a3") = d; ++ register long a4 __asm__("$a4") = e; ++ register long a5 __asm__("$a5") = f; ++ ++ __asm__ __volatile__ ( ++ "syscall 0" ++ : "+r"(a0) ++ : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5) ++ : SYSCALL_CLOBBERLIST); ++ return a0; ++} ++ ++static inline long __syscall7(long n, long a, long b, long c, long d, long e, long f, long g) ++{ ++ register long a7 __asm__("$a7") = n; ++ register long a0 __asm__("$a0") = a; ++ register long a1 __asm__("$a1") = b; ++ register long a2 __asm__("$a2") = c; ++ register long a3 __asm__("$a3") = d; ++ register long a4 __asm__("$a4") = e; ++ register long a5 __asm__("$a5") = f; ++ register long a6 __asm__("$a6") = g; ++ ++ __asm__ __volatile__ ( ++ "syscall 0" ++ : "+r"(a0) ++ : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5), "r"(a6) ++ : SYSCALL_CLOBBERLIST); ++ return a0; ++} ++ ++#define VDSO_USEFUL ++#define VDSO_CGT_SYM "__vdso_clock_gettime" ++#define VDSO_CGT_VER "LINUX_5.10" ++ ++#define IPC_64 0 +diff --git a/configure b/configure +index ca5cbc0b..68062071 100755 +--- a/configure ++++ b/configure +@@ -328,6 +328,7 @@ i?86*) ARCH=i386 ;; + x86_64-x32*|x32*|x86_64*x32) ARCH=x32 ;; + x86_64-nt64*) ARCH=nt64 ;; + x86_64*) ARCH=x86_64 ;; ++loongarch64*) ARCH=loongarch64 ;; + m68k*) ARCH=m68k ;; + mips64*|mipsisa64*) ARCH=mips64 ;; + mips*) ARCH=mips ;; +@@ -723,11 +724,6 @@ fi + test "$SUBARCH" \ + && printf "configured for %s variant: %s\n" "$ARCH" "$ARCH$SUBARCH" + +-case "$ARCH$SUBARCH" in +-arm) ASMSUBARCH=el ;; +-*) ASMSUBARCH=$SUBARCH ;; +-esac +- + # + # Some archs (powerpc) have different possible long double formats + # that the compiler can be configured for. The logic for whether this +diff --git a/crt/loongarch64/crti.s b/crt/loongarch64/crti.s +new file mode 100644 +index 00000000..81c43e6e +--- /dev/null ++++ b/crt/loongarch64/crti.s +@@ -0,0 +1,15 @@ ++.section .init ++.global _init ++_init: ++ addi.d $sp,$sp,-16 ++ st.d $fp,$sp,0 ++ st.d $ra,$sp,8 ++ addi.d $fp,$sp,16 ++ ++.section .fini ++.global _fini ++_fini: ++ addi.d $sp,$sp,-16 ++ st.d $fp,$sp,0 ++ st.d $ra,$sp,8 ++ addi.d $fp,$sp,16 +diff --git a/crt/loongarch64/crtn.s b/crt/loongarch64/crtn.s +new file mode 100644 +index 00000000..ca3fe80e +--- /dev/null ++++ b/crt/loongarch64/crtn.s +@@ -0,0 +1,12 @@ ++.section .init ++ ld.d $fp,$sp,0 ++ ld.d $ra,$sp,8 ++ addi.d $sp,$sp,16 ++ jr $ra ++ ++ ++.section .fini ++ ld.d $fp,$sp,0 ++ ld.d $ra,$sp,8 ++ addi.d $sp,$sp,16 ++ jr $ra +diff --git a/include/elf.h b/include/elf.h +index 86e2f0bb..1b0e9e71 100644 +--- a/include/elf.h ++++ b/include/elf.h +@@ -697,6 +697,11 @@ typedef struct { + #define NT_MIPS_FP_MODE 0x801 + #define NT_MIPS_MSA 0x802 + #define NT_VERSION 1 ++#define NT_LOONGARCH_CPUCFG 0xa00 ++#define NT_LOONGARCH_CSR 0xa01 ++#define NT_LOONGARCH_LSX 0xa02 ++#define NT_LOONGARCH_LASX 0xa03 ++#define NT_LOONGARCH_LBT 0xa04 + + + +@@ -3288,6 +3293,66 @@ enum + #define R_RISCV_SET32 56 + #define R_RISCV_32_PCREL 57 + ++/* LoongArch ELF Flags */ ++#define EM_LOONGARCH 258 ++ ++#define EF_LARCH_ABI 0x07 ++#define EF_LARCH_ABI_LP64D 0x03 ++ ++/* LoongArch specific dynamic relocations. */ ++#define R_LARCH_NONE 0 ++#define R_LARCH_32 1 ++#define R_LARCH_64 2 ++#define R_LARCH_RELATIVE 3 ++#define R_LARCH_COPY 4 ++#define R_LARCH_JUMP_SLOT 5 ++#define R_LARCH_TLS_DTPMOD32 6 ++#define R_LARCH_TLS_DTPMOD64 7 ++#define R_LARCH_TLS_DTPREL32 8 ++#define R_LARCH_TLS_DTPREL64 9 ++#define R_LARCH_TLS_TPREL32 10 ++#define R_LARCH_TLS_TPREL64 11 ++#define R_LARCH_IRELATIVE 12 ++#define R_LARCH_MARK_LA 20 ++#define R_LARCH_MARK_PCREL 21 ++#define R_LARCH_SOP_PUSH_PCREL 22 ++#define R_LARCH_SOP_PUSH_ABSOLUTE 23 ++#define R_LARCH_SOP_PUSH_DUP 24 ++#define R_LARCH_SOP_PUSH_GPREL 25 ++#define R_LARCH_SOP_PUSH_TLS_TPREL 26 ++#define R_LARCH_SOP_PUSH_TLS_GOT 27 ++#define R_LARCH_SOP_PUSH_TLS_GD 28 ++#define R_LARCH_SOP_PUSH_PLT_PCREL 29 ++#define R_LARCH_SOP_ASSERT 30 ++#define R_LARCH_SOP_NOT 31 ++#define R_LARCH_SOP_SUB 32 ++#define R_LARCH_SOP_SL 33 ++#define R_LARCH_SOP_SR 34 ++#define R_LARCH_SOP_ADD 35 ++#define R_LARCH_SOP_AND 36 ++#define R_LARCH_SOP_IF_ELSE 37 ++#define R_LARCH_SOP_POP_32_S_10_5 38 ++#define R_LARCH_SOP_POP_32_U_10_12 39 ++#define R_LARCH_SOP_POP_32_S_10_12 40 ++#define R_LARCH_SOP_POP_32_S_10_16 41 ++#define R_LARCH_SOP_POP_32_S_10_16_S2 42 ++#define R_LARCH_SOP_POP_32_S_5_20 43 ++#define R_LARCH_SOP_POP_32_S_0_5_10_16_S2 44 ++#define R_LARCH_SOP_POP_32_S_0_10_10_16_S2 45 ++#define R_LARCH_SOP_POP_32_U 46 ++#define R_LARCH_ADD8 47 ++#define R_LARCH_ADD16 48 ++#define R_LARCH_ADD24 49 ++#define R_LARCH_ADD32 50 ++#define R_LARCH_ADD64 51 ++#define R_LARCH_SUB8 52 ++#define R_LARCH_SUB16 53 ++#define R_LARCH_SUB24 54 ++#define R_LARCH_SUB32 55 ++#define R_LARCH_SUB64 56 ++#define R_LARCH_GNU_VTINHERIT 57 ++#define R_LARCH_GNU_VTENTRY 58 ++ + #ifdef __cplusplus + } + #endif +diff --git a/include/unistd.h b/include/unistd.h +index 212263a7..80be3b26 100644 +--- a/include/unistd.h ++++ b/include/unistd.h +@@ -467,6 +467,8 @@ pid_t gettid(void); + #define _CS_POSIX_V7_LPBIG_OFFBIG_LINTFLAGS 1147 + #define _CS_V6_ENV 1148 + #define _CS_V7_ENV 1149 ++#define _CS_POSIX_V7_THREADS_CFLAGS 1150 ++#define _CS_POSIX_V7_THREADS_LDFLAGS 1151 + + #ifdef __cplusplus + } +diff --git a/src/conf/confstr.c b/src/conf/confstr.c +index 02cb1aa2..3d417284 100644 +--- a/src/conf/confstr.c ++++ b/src/conf/confstr.c +@@ -7,7 +7,7 @@ size_t confstr(int name, char *buf, size_t len) + const char *s = ""; + if (!name) { + s = "/bin:/usr/bin"; +- } else if ((name&~4U)!=1 && name-_CS_POSIX_V6_ILP32_OFF32_CFLAGS>33U) { ++ } else if ((name&~4U)!=1 && name-_CS_POSIX_V6_ILP32_OFF32_CFLAGS>35U) { + errno = EINVAL; + return 0; + } +diff --git a/src/fenv/loongarch64/fenv.S b/src/fenv/loongarch64/fenv.S +new file mode 100644 +index 00000000..aa012c97 +--- /dev/null ++++ b/src/fenv/loongarch64/fenv.S +@@ -0,0 +1,72 @@ ++#ifndef __loongarch_soft_float ++ ++.global feclearexcept ++.type feclearexcept,@function ++feclearexcept: ++ li.w $a1, 0x1f0000 ++ and $a0, $a0, $a1 ++ movfcsr2gr $a1, $r0 ++ or $a1, $a1, $a0 ++ xor $a1, $a1, $a0 ++ movgr2fcsr $r0, $a1 ++ li.w $a0, 0 ++ jr $ra ++ ++.global feraiseexcept ++.type feraiseexcept,@function ++feraiseexcept: ++ li.w $a1, 0x1f0000 ++ and $a0, $a0, $a1 ++ movfcsr2gr $a1, $r0 ++ or $a1, $a1, $a0 ++ movgr2fcsr $r0, $a1 ++ li.w $a0, 0 ++ jr $ra ++ ++.global fetestexcept ++.type fetestexcept,@function ++fetestexcept: ++ li.w $a1, 0x1f0000 ++ and $a0, $a0, $a1 ++ movfcsr2gr $a1, $r0 ++ and $a0, $a1, $a0 ++ jr $ra ++ ++.global fegetround ++.type fegetround,@function ++fegetround: ++ movfcsr2gr $a0, $r0 ++ andi $a0, $a0, 0x300 //fcsr0.RM ++ jr $ra ++ ++.global __fesetround ++.hidden __fesetround ++.type __fesetround,@function ++__fesetround: ++ movfcsr2gr $a1, $r0 ++ li.w $a2, -769 //0xfffffcff ++ and $a1, $a1, $a2 ++ or $a1, $a1, $a0 ++ movgr2fcsr $r0, $a1 ++ li.w $a0, 0 ++ jr $ra ++ ++.global fegetenv ++.type fegetenv,@function ++fegetenv: ++ movfcsr2gr $a1, $r0 ++ st.w $a1, $a0, 0 ++ li.w $a0, 0 ++ jr $ra ++ ++.global fesetenv ++.type fesetenv,@function ++fesetenv: ++ addi.d $a1, $a0, 1 ++ beq $a1, $r0, 1f ++ ld.w $a1, $a0, 0 ++1: movgr2fcsr $r0, $a1 ++ li.w $a0, 0 ++ jr $ra ++ ++#endif +diff --git a/src/include/sys/stat.h b/src/include/sys/stat.h +new file mode 100644 +index 00000000..59339bee +--- /dev/null ++++ b/src/include/sys/stat.h +@@ -0,0 +1,9 @@ ++#ifndef SYS_STAT_H ++#define SYS_STAT_H ++ ++#include "../../../include/sys/stat.h" ++ ++hidden int __fstat(int, struct stat *); ++hidden int __fstatat(int, const char *restrict, struct stat *restrict, int); ++ ++#endif +diff --git a/src/internal/syscall.h b/src/internal/syscall.h +index d5f294d4..4f41e1dc 100644 +--- a/src/internal/syscall.h ++++ b/src/internal/syscall.h +@@ -201,43 +201,43 @@ static inline long __alt_socketcall(int sys, int sock, int cp, long a, long b, l + #define SYS_sendfile SYS_sendfile64 + #endif + +-#ifndef SYS_timer_settime ++#ifdef SYS_timer_settime32 + #define SYS_timer_settime SYS_timer_settime32 + #endif + +-#ifndef SYS_timer_gettime ++#ifdef SYS_timer_gettime32 + #define SYS_timer_gettime SYS_timer_gettime32 + #endif + +-#ifndef SYS_timerfd_settime ++#ifdef SYS_timerfd_settime32 + #define SYS_timerfd_settime SYS_timerfd_settime32 + #endif + +-#ifndef SYS_timerfd_gettime ++#ifdef SYS_timerfd_gettime32 + #define SYS_timerfd_gettime SYS_timerfd_gettime32 + #endif + +-#ifndef SYS_clock_settime ++#ifdef SYS_clock_settime32 + #define SYS_clock_settime SYS_clock_settime32 + #endif + +-#ifndef SYS_clock_gettime ++#ifdef SYS_clock_gettime32 + #define SYS_clock_gettime SYS_clock_gettime32 + #endif + +-#ifndef SYS_clock_getres ++#ifdef SYS_clock_getres_time32 + #define SYS_clock_getres SYS_clock_getres_time32 + #endif + +-#ifndef SYS_clock_nanosleep ++#ifdef SYS_clock_nanosleep_time32 + #define SYS_clock_nanosleep SYS_clock_nanosleep_time32 + #endif + +-#ifndef SYS_gettimeofday ++#ifdef SYS_gettimeofday_time32 + #define SYS_gettimeofday SYS_gettimeofday_time32 + #endif + +-#ifndef SYS_settimeofday ++#ifdef SYS_settimeofday_time32 + #define SYS_settimeofday SYS_settimeofday_time32 + #endif + +diff --git a/src/ldso/loongarch64/dlsym.s b/src/ldso/loongarch64/dlsym.s +new file mode 100644 +index 00000000..edb8214c +--- /dev/null ++++ b/src/ldso/loongarch64/dlsym.s +@@ -0,0 +1,7 @@ ++.global dlsym ++.hidden __dlsym ++.type dlsym,@function ++dlsym: ++ move $a2, $ra ++ la.global $t0, __dlsym ++ jr $t0 +diff --git a/src/misc/getrlimit.c b/src/misc/getrlimit.c +index 2ab2f0f4..bf676307 100644 +--- a/src/misc/getrlimit.c ++++ b/src/misc/getrlimit.c +@@ -6,12 +6,13 @@ + + int getrlimit(int resource, struct rlimit *rlim) + { +- unsigned long k_rlim[2]; + int ret = syscall(SYS_prlimit64, 0, resource, 0, rlim); + if (!ret) { + FIX(rlim->rlim_cur); + FIX(rlim->rlim_max); + } ++#ifdef SYS_getrlimit ++ unsigned long k_rlim[2]; + if (!ret || errno != ENOSYS) + return ret; + if (syscall(SYS_getrlimit, resource, k_rlim) < 0) +@@ -21,6 +22,9 @@ int getrlimit(int resource, struct rlimit *rlim) + FIX(rlim->rlim_cur); + FIX(rlim->rlim_max); + return 0; ++#else ++ return ret; ++#endif + } + + weak_alias(getrlimit, getrlimit64); +diff --git a/src/misc/setrlimit.c b/src/misc/setrlimit.c +index 8340aee0..5b713cf3 100644 +--- a/src/misc/setrlimit.c ++++ b/src/misc/setrlimit.c +@@ -12,12 +12,14 @@ struct ctx { + int err; + }; + ++#ifdef SYS_setrlimit + static void do_setrlimit(void *p) + { + struct ctx *c = p; + if (c->err>0) return; + c->err = -__syscall(SYS_setrlimit, c->res, c->lim); + } ++#endif + + int setrlimit(int resource, const struct rlimit *rlim) + { +@@ -29,6 +31,7 @@ int setrlimit(int resource, const struct rlimit *rlim) + rlim = &tmp; + } + int ret = __syscall(SYS_prlimit64, 0, resource, rlim, 0); ++#ifdef SYS_setrlimit + if (ret != -ENOSYS) return __syscall_ret(ret); + + struct ctx c = { +@@ -42,6 +45,9 @@ int setrlimit(int resource, const struct rlimit *rlim) + return -1; + } + return 0; ++#else ++ return __syscall_ret(ret); ++#endif + } + + weak_alias(setrlimit, setrlimit64); +diff --git a/src/network/netlink.h b/src/network/netlink.h +index 38acb178..873fabe2 100644 +--- a/src/network/netlink.h ++++ b/src/network/netlink.h +@@ -86,7 +86,7 @@ struct ifaddrmsg { + #define RTA_DATALEN(rta) ((rta)->rta_len-sizeof(struct rtattr)) + #define RTA_DATAEND(rta) ((char*)(rta)+(rta)->rta_len) + #define RTA_NEXT(rta) (struct rtattr*)((char*)(rta)+NETLINK_ALIGN((rta)->rta_len)) +-#define RTA_OK(nlh,end) ((char*)(end)-(char*)(rta) >= sizeof(struct rtattr)) ++#define RTA_OK(rta,end) ((char*)(end)-(char*)(rta) >= sizeof(struct rtattr)) + + #define NLMSG_RTA(nlh,len) ((void*)((char*)(nlh)+sizeof(struct nlmsghdr)+NETLINK_ALIGN(len))) + #define NLMSG_RTAOK(rta,nlh) RTA_OK(rta,NLMSG_DATAEND(nlh)) +diff --git a/src/setjmp/loongarch64/longjmp.S b/src/setjmp/loongarch64/longjmp.S +new file mode 100644 +index 00000000..4186974f +--- /dev/null ++++ b/src/setjmp/loongarch64/longjmp.S +@@ -0,0 +1,37 @@ ++.global _longjmp ++.global longjmp ++.type _longjmp,@function ++.type longjmp,@function ++_longjmp: ++longjmp: ++ move $t5, $a0 ++ move $a0, $a1 ++ ++ bne $a0, $zero, 1f ++ addi.d $a0, $a0, 1 ++ ++1: ++ ld.d $ra, $t5, 0 ++ ld.d $sp, $t5, 8 ++ ld.d $r21,$t5, 16 ++ ld.d $fp, $t5, 24 ++ ld.d $s0, $t5, 32 ++ ld.d $s1, $t5, 40 ++ ld.d $s2, $t5, 48 ++ ld.d $s3, $t5, 56 ++ ld.d $s4, $t5, 64 ++ ld.d $s5, $t5, 72 ++ ld.d $s6, $t5, 80 ++ ld.d $s7, $t5, 88 ++ ld.d $s8, $t5, 96 ++#ifndef __loongarch64_soft_float ++ fld.d $fs0, $t5, 104 ++ fld.d $fs1, $t5, 112 ++ fld.d $fs2, $t5, 120 ++ fld.d $fs3, $t5, 128 ++ fld.d $fs4, $t5, 136 ++ fld.d $fs5, $t5, 144 ++ fld.d $fs6, $t5, 152 ++ fld.d $fs7, $t5, 160 ++#endif ++ jr $ra +diff --git a/src/setjmp/loongarch64/setjmp.S b/src/setjmp/loongarch64/setjmp.S +new file mode 100644 +index 00000000..f3bb7c70 +--- /dev/null ++++ b/src/setjmp/loongarch64/setjmp.S +@@ -0,0 +1,34 @@ ++.global __setjmp ++.global _setjmp ++.global setjmp ++.type __setjmp,@function ++.type _setjmp,@function ++.type setjmp,@function ++__setjmp: ++_setjmp: ++setjmp: ++ st.d $ra, $a0, 0 ++ st.d $sp, $a0, 8 ++ st.d $r21,$a0, 16 ++ st.d $fp, $a0, 24 ++ st.d $s0, $a0, 32 ++ st.d $s1, $a0, 40 ++ st.d $s2, $a0, 48 ++ st.d $s3, $a0, 56 ++ st.d $s4, $a0, 64 ++ st.d $s5, $a0, 72 ++ st.d $s6, $a0, 80 ++ st.d $s7, $a0, 88 ++ st.d $s8, $a0, 96 ++#ifndef __loongarch64_soft_float ++ fst.d $fs0, $a0, 104 ++ fst.d $fs1, $a0, 112 ++ fst.d $fs2, $a0, 120 ++ fst.d $fs3, $a0, 128 ++ fst.d $fs4, $a0, 136 ++ fst.d $fs5, $a0, 144 ++ fst.d $fs6, $a0, 152 ++ fst.d $fs7, $a0, 160 ++#endif ++ xor $a0, $a0, $a0 ++ jr $ra +diff --git a/src/signal/loongarch64/restore.s b/src/signal/loongarch64/restore.s +new file mode 100644 +index 00000000..bca17eb7 +--- /dev/null ++++ b/src/signal/loongarch64/restore.s +@@ -0,0 +1,10 @@ ++.global __restore_rt ++.global __restore ++.hidden __restore_rt ++.hidden __restore ++.type __restore_rt,@function ++.type __restore,@function ++__restore_rt: ++__restore: ++ li.w $a7, 139 ++ syscall 0 +diff --git a/src/signal/loongarch64/sigsetjmp.s b/src/signal/loongarch64/sigsetjmp.s +new file mode 100644 +index 00000000..abd96c62 +--- /dev/null ++++ b/src/signal/loongarch64/sigsetjmp.s +@@ -0,0 +1,29 @@ ++.global sigsetjmp ++.global __sigsetjmp ++.type sigsetjmp,@function ++.type __sigsetjmp,@function ++sigsetjmp: ++__sigsetjmp: ++ move $t5, $a0 ++ move $t6, $a1 ++ ++ # comparing save mask with 0, if equals to 0 then ++ # sigsetjmp is equal to setjmp. ++ beq $t6, $zero, 1f ++ st.d $ra, $t5, 168 ++ ++ # save base of got so that we can use it later ++ # once we return from 'longjmp' ++ la.global $t8, setjmp ++ jirl $ra, $t8, 0 ++ ++ move $a1, $a0 # Return from 'setjmp' or 'longjmp' ++ ld.d $ra, $t5, 168 # Restore ra of sigsetjmp ++ move $a0, $t5 ++ ++.hidden __sigsetjmp_tail ++ la.global $t8, __sigsetjmp_tail ++ jr $t8 ++1: ++ la.global $t8, setjmp ++ jr $t8 +diff --git a/src/stat/fchmodat.c b/src/stat/fchmodat.c +index 4ee00b0a..bc581050 100644 +--- a/src/stat/fchmodat.c ++++ b/src/stat/fchmodat.c +@@ -2,7 +2,6 @@ + #include + #include + #include "syscall.h" +-#include "kstat.h" + + int fchmodat(int fd, const char *path, mode_t mode, int flag) + { +@@ -11,12 +10,12 @@ int fchmodat(int fd, const char *path, mode_t mode, int flag) + if (flag != AT_SYMLINK_NOFOLLOW) + return __syscall_ret(-EINVAL); + +- struct kstat st; ++ struct stat st; + int ret, fd2; + char proc[15+3*sizeof(int)]; + +- if ((ret = __syscall(SYS_fstatat, fd, path, &st, flag))) +- return __syscall_ret(ret); ++ if (fstatat(fd, path, &st, flag)) ++ return -1; + if (S_ISLNK(st.st_mode)) + return __syscall_ret(-EOPNOTSUPP); + +@@ -27,12 +26,12 @@ int fchmodat(int fd, const char *path, mode_t mode, int flag) + } + + __procfdname(proc, fd2); +- ret = __syscall(SYS_fstatat, AT_FDCWD, proc, &st, 0); ++ ret = stat(proc, &st); + if (!ret) { +- if (S_ISLNK(st.st_mode)) ret = -EOPNOTSUPP; +- else ret = __syscall(SYS_fchmodat, AT_FDCWD, proc, mode); ++ if (S_ISLNK(st.st_mode)) ret = __syscall_ret(-EOPNOTSUPP); ++ else ret = syscall(SYS_fchmodat, AT_FDCWD, proc, mode); + } + + __syscall(SYS_close, fd2); +- return __syscall_ret(ret); ++ return ret; + } +diff --git a/src/stat/fstat.c b/src/stat/fstat.c +index 9bbb46de..27db0ccb 100644 +--- a/src/stat/fstat.c ++++ b/src/stat/fstat.c +@@ -4,12 +4,14 @@ + #include + #include "syscall.h" + +-int fstat(int fd, struct stat *st) ++int __fstat(int fd, struct stat *st) + { + if (fd<0) return __syscall_ret(-EBADF); +- return fstatat(fd, "", st, AT_EMPTY_PATH); ++ return __fstatat(fd, "", st, AT_EMPTY_PATH); + } + ++weak_alias(__fstat, fstat); ++ + #if !_REDIR_TIME64 + weak_alias(fstat, fstat64); + #endif +diff --git a/src/stat/fstatat.c b/src/stat/fstatat.c +index de165b5c..74c51cf5 100644 +--- a/src/stat/fstatat.c ++++ b/src/stat/fstatat.c +@@ -6,7 +6,6 @@ + #include + #include + #include "syscall.h" +-#include "kstat.h" + + struct statx { + uint32_t stx_mask; +@@ -69,6 +68,10 @@ static int fstatat_statx(int fd, const char *restrict path, struct stat *restric + return 0; + } + ++#ifdef SYS_fstatat ++ ++#include "kstat.h" ++ + static int fstatat_kstat(int fd, const char *restrict path, struct stat *restrict st, int flag) + { + int ret; +@@ -130,18 +133,25 @@ static int fstatat_kstat(int fd, const char *restrict path, struct stat *restric + + return 0; + } ++#endif + +-int fstatat(int fd, const char *restrict path, struct stat *restrict st, int flag) ++int __fstatat(int fd, const char *restrict path, struct stat *restrict st, int flag) + { + int ret; ++#ifdef SYS_fstatat + if (sizeof((struct kstat){0}.st_atime_sec) < sizeof(time_t)) { + ret = fstatat_statx(fd, path, st, flag); + if (ret!=-ENOSYS) return __syscall_ret(ret); + } + ret = fstatat_kstat(fd, path, st, flag); ++#else ++ ret = fstatat_statx(fd, path, st, flag); ++#endif + return __syscall_ret(ret); + } + ++weak_alias(__fstatat, fstatat); ++ + #if !_REDIR_TIME64 + weak_alias(fstatat, fstatat64); + #endif +diff --git a/src/stdio/tempnam.c b/src/stdio/tempnam.c +index 565df6b6..0c65b1f0 100644 +--- a/src/stdio/tempnam.c ++++ b/src/stdio/tempnam.c +@@ -6,7 +6,6 @@ + #include + #include + #include "syscall.h" +-#include "kstat.h" + + #define MAXTRIES 100 + +@@ -37,11 +36,10 @@ char *tempnam(const char *dir, const char *pfx) + + for (try=0; try + #include + #include "syscall.h" +-#include "kstat.h" + + #define MAXTRIES 100 + +@@ -17,11 +16,10 @@ char *tmpnam(char *buf) + int r; + for (try=0; try + #include + #include "syscall.h" +-#include "kstat.h" + + const char unsigned *__map_file(const char *pathname, size_t *size) + { +- struct kstat st; ++ struct stat st; + const unsigned char *map = MAP_FAILED; + int fd = sys_open(pathname, O_RDONLY|O_CLOEXEC|O_NONBLOCK); + if (fd < 0) return 0; +- if (!syscall(SYS_fstat, fd, &st)) { ++ if (!__fstat(fd, &st)) { + map = __mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0); + *size = st.st_size; + } +diff --git a/src/time/clock_gettime.c b/src/time/clock_gettime.c +index 3e1d0975..c7e66a51 100644 +--- a/src/time/clock_gettime.c ++++ b/src/time/clock_gettime.c +@@ -80,10 +80,12 @@ int __clock_gettime(clockid_t clk, struct timespec *ts) + return __syscall_ret(r); + long ts32[2]; + r = __syscall(SYS_clock_gettime, clk, ts32); ++#ifdef SYS_gettimeofday + if (r==-ENOSYS && clk==CLOCK_REALTIME) { + r = __syscall(SYS_gettimeofday, ts32, 0); + ts32[1] *= 1000; + } ++#endif + if (!r) { + ts->tv_sec = ts32[0]; + ts->tv_nsec = ts32[1]; +@@ -92,6 +94,7 @@ int __clock_gettime(clockid_t clk, struct timespec *ts) + return __syscall_ret(r); + #else + r = __syscall(SYS_clock_gettime, clk, ts); ++#ifdef SYS_gettimeofday + if (r == -ENOSYS) { + if (clk == CLOCK_REALTIME) { + __syscall(SYS_gettimeofday, ts, 0); +@@ -100,6 +103,7 @@ int __clock_gettime(clockid_t clk, struct timespec *ts) + } + r = -EINVAL; + } ++#endif + return __syscall_ret(r); + #endif + } diff --git a/musl/PKGBUILD b/musl/PKGBUILD index 4c2027d1e9..cd910e68a3 100644 --- a/musl/PKGBUILD +++ b/musl/PKGBUILD @@ -11,9 +11,16 @@ url='https://www.musl-libc.org/' license=('MIT') options=('staticlibs' '!buildflags') validpgpkeys=('836489290BB6B70F99FFDA0556BCDB593020450F') -source=(https://www.musl-libc.org/releases/musl-$pkgver.tar.gz{,.asc}) +source=(https://www.musl-libc.org/releases/musl-$pkgver.tar.gz{,.asc} + 0001-musl-add-loongarch64-support.patch) sha256sums=('7a35eae33d5372a7c0da1188de798726f68825513b7ae3ebe97aaaa52114f039' - 'SKIP') + 'SKIP' + '3c49faf5ec25f2b0591f5d6c2ea73d11f3257802e696493a45d1293d85108ccb') + +prepare() { + cd "$pkgname-$pkgver" + patch -p1 -i "$srcdir/0001-musl-add-loongarch64-support.patch" +} build() { cd $pkgname-$pkgver diff --git a/mutter/PKGBUILD b/mutter/PKGBUILD index 83fef66875..6c534c147d 100644 --- a/mutter/PKGBUILD +++ b/mutter/PKGBUILD @@ -47,7 +47,7 @@ makedepends=( xorg-server xorg-server-xvfb ) -checkdepends=( +makedepends+=( gnome-session python-dbusmock wireplumber diff --git a/nautilus-sendto/PKGBUILD b/nautilus-sendto/PKGBUILD index 06578e52a3..0d5b061caa 100644 --- a/nautilus-sendto/PKGBUILD +++ b/nautilus-sendto/PKGBUILD @@ -13,8 +13,10 @@ license=(GPL) depends=(glib2) makedepends=(gobject-introspection git meson appstream-glib) _commit=c87aac46c4893e09b1ced1cca8bb86b0a6823124 # master -source=("git+https://gitlab.gnome.org/Archive/nautilus-sendto.git#commit=$_commit") -sha256sums=('SKIP') +source=("git+https://gitlab.gnome.org/Archive/nautilus-sendto.git#commit=$_commit" + nautilus-sendto-fix-meson.patch) +sha256sums=('SKIP' + '2ba3e793e49b67866e70e65e6cd7fb8029dee62681a5dcd2d9ba1fb494a24eeb') pkgver() { cd $pkgname @@ -23,6 +25,7 @@ pkgver() { prepare() { cd $pkgname + patch -p1 -i $srcdir/nautilus-sendto-fix-meson.patch } build() { diff --git a/nautilus-sendto/nautilus-sendto-fix-meson.patch b/nautilus-sendto/nautilus-sendto-fix-meson.patch new file mode 100644 index 0000000000..9257119f2d --- /dev/null +++ b/nautilus-sendto/nautilus-sendto-fix-meson.patch @@ -0,0 +1,13 @@ +diff --git a/src/meson.build b/src/meson.build +index 069eaf8..a9638c1 100644 +--- a/src/meson.build ++++ b/src/meson.build +@@ -7,7 +7,7 @@ executable('nautilus-sendto', + + po_dir = join_paths(meson.source_root(), 'po') + +-i18n.merge_file ('appdata', ++i18n.merge_file ( + input: 'nautilus-sendto.metainfo.xml.in', + output: 'nautilus-sendto.metainfo.xml', + install: true, diff --git a/navi/PKGBUILD b/navi/PKGBUILD index 2834bad712..4a042af120 100644 --- a/navi/PKGBUILD +++ b/navi/PKGBUILD @@ -17,7 +17,7 @@ sha256sums=('579a72814e7ba07dae697a58dc13b0f7d853532ec07229aff07a11e5828f3799') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" mkdir completions } diff --git a/ncspot/PKGBUILD b/ncspot/PKGBUILD index 13dd202142..27fe2fbfba 100644 --- a/ncspot/PKGBUILD +++ b/ncspot/PKGBUILD @@ -17,7 +17,7 @@ options=('!lto') prepare() { cd "${srcdir}/${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/neofetch/PKGBUILD b/neofetch/PKGBUILD index 346e94e367..41817b73da 100644 --- a/neofetch/PKGBUILD +++ b/neofetch/PKGBUILD @@ -25,8 +25,15 @@ optdepends=( 'xorg-xrandr: Resolution detection (Multi Monitor + Refresh rates)' 'xorg-xwininfo: See https://github.com/dylanaraps/neofetch/wiki/Images-in-the-terminal' ) -source=("${pkgname}-${pkgver}.tar.gz::https://github.com/dylanaraps/neofetch/archive/${pkgver}.tar.gz") -sha256sums=('58a95e6b714e41efc804eca389a223309169b2def35e57fa934482a6b47c27e7') +source=("${pkgname}-${pkgver}.tar.gz::https://github.com/dylanaraps/neofetch/archive/${pkgver}.tar.gz" + neofetch-la64.patch) +sha256sums=('58a95e6b714e41efc804eca389a223309169b2def35e57fa934482a6b47c27e7' + 'ef425de4bd5350b32af8596346201ad9531b72614a4cbbe14128e472131b6ec9') + +prepare() { + cd "${pkgname}-${pkgver}/" + patch -p1 -i "$srcdir/neofetch-la64.patch" +} package() { cd "${pkgname}-${pkgver}/" diff --git a/neofetch/neofetch-la64.patch b/neofetch/neofetch-la64.patch new file mode 100644 index 0000000000..0e78376eb5 --- /dev/null +++ b/neofetch/neofetch-la64.patch @@ -0,0 +1,15 @@ +Index: neofetch-7.1.0/neofetch +=================================================================== +--- neofetch-7.1.0.orig/neofetch ++++ neofetch-7.1.0/neofetch +@@ -2088,8 +2089,8 @@ get_cpu() { + + *) + cpu="$(awk -F '\\s*: | @' \ +- '/model name|Hardware|Processor|^cpu model|chip type|^cpu type/ { +- cpu=$2; if ($1 == "Hardware") exit } END { print cpu }' "$cpu_file")" ++ '/[mM]odel [nN]ame|Hardware|Processor|^cpu model|chip type|^cpu type/ { ++ cpu=$2; if ($1 == "Hardware" || $1 == "Model Name") exit } END { print cpu }' "$cpu_file")" + ;; + esac + diff --git a/netavark/PKGBUILD b/netavark/PKGBUILD index f50df46f86..4bffa4c697 100644 --- a/netavark/PKGBUILD +++ b/netavark/PKGBUILD @@ -35,7 +35,7 @@ pkgver() { prepare() { cd $pkgname - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/netpbm/PKGBUILD b/netpbm/PKGBUILD index 30d47288c7..839e53a053 100644 --- a/netpbm/PKGBUILD +++ b/netpbm/PKGBUILD @@ -37,6 +37,8 @@ validpgpkeys=('5357F3B111688D88C1D88119FCF2CB179205AC90') prepare() { cd $pkgname-$pkgver + CFLAGS=${CFLAGS/-Wformat -Werror=format-security/} + CXXFLAGS=${CXXFLAGS/-Wformat -Werror=format-security/} patch -p1 < ../netpbm-CAN-2005-2471.patch patch -p1 < ../netpbm-security-code.patch @@ -45,6 +47,7 @@ prepare() { cp config.mk.in config.mk [ "${CARCH}" = 'x86_64' ] && echo 'CFLAGS_SHLIB = -fPIC' >> config.mk + [ "${CARCH}" = 'loong64' ] && echo 'CFLAGS_SHLIB = -fPIC' >> config.mk echo "NETPBM_DOCURL = file://${srcdir}/doc" >> config.mk echo 'TIFFLIB = libtiff.so' >> config.mk echo 'JPEGLIB = libjpeg.so' >> config.mk diff --git a/netplan/PKGBUILD b/netplan/PKGBUILD index 14265ef30c..9ffac7d7ed 100644 --- a/netplan/PKGBUILD +++ b/netplan/PKGBUILD @@ -28,12 +28,14 @@ makedepends=( 'bash-completion' 'cmocka' 'meson' - 'pandoc' +#'pandoc' 'python-coverage' 'python-pycodestyle' 'python-pyflakes' 'python-pytest' 'python-pytest-cov' + 'python' + 'pkgconf' ) checkdepends=( 'openvswitch' @@ -44,13 +46,16 @@ source=( "$pkgname-$pkgver.tar.gz::$url/archive/${pkgver}.tar.gz" "$pkgname-0.107-disable_tests.patch" "$pkgname-0.107-use_lib.patch" + netplan-disable-pandoc.patch ) sha512sums=('6f6e3e4f179a2a74b46239cbd86919555a769d001b6fc8ba8daba3894b26415dbb8124f6a594de9afc14dc50d6979e58f195adfba2a53854b882d7cb6621fea7' '7f0b512a29c458e7e25f1387ce7f6730df47af48b35a5f2915a9f6a57fddf53a2c1d0690dfd81cd2dbbf40efa16f332bac63180ab3e589dcf5087f13775d040b' - '64d6e97cb7c330fc2e28872a34e0be4190c6e75e5235de6ff97ef77c4c287f814b85503c798d959b88037fb53af0459c6a38768b7420f12aac79db64db127378') + '64d6e97cb7c330fc2e28872a34e0be4190c6e75e5235de6ff97ef77c4c287f814b85503c798d959b88037fb53af0459c6a38768b7420f12aac79db64db127378' + '7ef1441fcb4d6703ebe8298a9f7c9c5583aebfb65894f291864e745186b5646770a2c8db199e0dc945080e8ea05db4f262723190a43357a279fdd5d2f23fd8d8') b2sums=('8fcd3ae48e0f3e1eb0486cf4f7d76d17f61b3b8212363c9f00a1c1e6d265b1093cd7bc283e659091a44ef01c1346f47fc663d43387d72b95305ce2bb2c5b808a' 'd8f5e36ef67fa43fa7c93eed2fbec54a4e934b9e994178820abda2473be646df009fc537ebc5fc067f8400c0ce3e28f66dfcf681b378cf2363f82acd79930149' - '7ca1aa7b5449e23b310f32d16bf0f50525c4175d26eb591489143d0acef8cda7969502bff26402dbba10d3ea45c3c6768c28bf22c74b9f0a807bc953acf5871a') + '7ca1aa7b5449e23b310f32d16bf0f50525c4175d26eb591489143d0acef8cda7969502bff26402dbba10d3ea45c3c6768c28bf22c74b9f0a807bc953acf5871a' + 'ecbb600bf9acc18d26ed55c3e1b26ff4189665d1721a386aab5615b71beadb6089e948e316fa76c1c96870a2fed426cf76b559677c857004d765c3d55c15c3b4') prepare() { # adapt names of python things to our names @@ -61,6 +66,7 @@ prepare() { patch -Np1 -d $pkgname-$pkgver -i ../$pkgname-0.107-disable_tests.patch # use /usr/lib instead of /usr/libexec patch -Np1 -d $pkgname-$pkgver -i ../$pkgname-0.107-use_lib.patch + patch -p1 -i $srcdir/netplan-disable-pandoc.patch } build() { diff --git a/netplan/netplan-disable-pandoc.patch b/netplan/netplan-disable-pandoc.patch new file mode 100644 index 0000000000..96ca36b8f2 --- /dev/null +++ b/netplan/netplan-disable-pandoc.patch @@ -0,0 +1,27 @@ +Index: netplan-0.106/Makefile +=================================================================== +--- netplan-0.106.orig/Makefile ++++ netplan-0.106/Makefile +@@ -67,7 +67,7 @@ PYCODESTYLE3 ?= $(shell command -v pycod + PYTEST3 ?= $(shell command -v pytest-3 || command -v pytest3 || echo true) + PYCOVERAGE ?= $(shell command -v python3-coverage || echo true) + +-default: netplan/_features.py generate netplan-dbus dbus/io.netplan.Netplan.service doc/netplan.html doc/netplan.5 doc/netplan-generate.8 doc/netplan-apply.8 doc/netplan-try.8 doc/netplan-dbus.8 doc/netplan-get.8 doc/netplan-set.8 ++default: netplan/_features.py generate netplan-dbus dbus/io.netplan.Netplan.service #doc/netplan.html doc/netplan.5 doc/netplan-generate.8 doc/netplan-apply.8 doc/netplan-try.8 doc/netplan-dbus.8 doc/netplan-get.8 doc/netplan-set.8 + + %.o: src/%.c src/_features.h + $(CC) $(BUILDFLAGS) $(CFLAGS) $(LDFLAGS) -c $^ `pkg-config --cflags --libs glib-2.0 gio-2.0 yaml-0.1 uuid` +@@ -160,10 +160,10 @@ install: default + install -m 644 include/*.h $(DESTDIR)/$(INCLUDEDIR)/netplan/ + # TODO: install pkg-config once available + # docs, data +- install -m 644 doc/*.html $(DESTDIR)/$(DOCDIR)/netplan/ ++ #install -m 644 doc/*.html $(DESTDIR)/$(DOCDIR)/netplan/ + install -m 644 examples/*.yaml $(DESTDIR)/$(DOCDIR)/netplan/examples/ +- install -m 644 doc/*.5 $(DESTDIR)/$(MANDIR)/man5/ +- install -m 644 doc/*.8 $(DESTDIR)/$(MANDIR)/man8/ ++ #install -m 644 doc/*.5 $(DESTDIR)/$(MANDIR)/man5/ ++ #install -m 644 doc/*.8 $(DESTDIR)/$(MANDIR)/man8/ + install -T -D -m 644 netplan.completions $(DESTDIR)/$(BASH_COMPLETIONS_DIR)/netplan + # dbus + mkdir -p $(DESTDIR)/$(DATADIR)/dbus-1/system.d $(DESTDIR)/$(DATADIR)/dbus-1/system-services diff --git a/newsboat/PKGBUILD b/newsboat/PKGBUILD index bc16ff6485..9020adf087 100644 --- a/newsboat/PKGBUILD +++ b/newsboat/PKGBUILD @@ -28,7 +28,7 @@ validpgpkeys=('B8B1756A0DDBF0760CE67CCF4ED6CD61932B9EBE') # Newsboat project +Date: Sat, 31 Jul 2021 10:51:41 -0700 +Subject: [PATCH] libntp: Do not use PTHREAD_STACK_MIN on glibc + +In glibc 2.34+ PTHREAD_STACK_MIN is not a compile-time constant which +could mean different stack sizes at runtime on different architectures +and it also causes compile failure. Default glibc thread stack size +or 64Kb set by ntp should be good in glibc these days. + +Upstream-Status: Pending +Signed-off-by: Khem Raj +--- + libntp/work_thread.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/libntp/work_thread.c b/libntp/work_thread.c +index 03a5647..3ddd751 100644 +--- a/libntp/work_thread.c ++++ b/libntp/work_thread.c +@@ -41,7 +41,7 @@ + #ifndef THREAD_MINSTACKSIZE + # define THREAD_MINSTACKSIZE (64U * 1024) + #endif +-#ifndef __sun ++#if !defined(__sun) && !defined(__GLIBC__) + #if defined(PTHREAD_STACK_MIN) && THREAD_MINSTACKSIZE < PTHREAD_STACK_MIN + # undef THREAD_MINSTACKSIZE + # define THREAD_MINSTACKSIZE PTHREAD_STACK_MIN +-- +2.32.0 + diff --git a/ntp/ntp-ssp-la.patch b/ntp/ntp-ssp-la.patch new file mode 100644 index 0000000000..fca524ff43 --- /dev/null +++ b/ntp/ntp-ssp-la.patch @@ -0,0 +1,10 @@ +Index: ntp-4.2.8p15/sntp/harden/linux +=================================================================== +--- ntp-4.2.8p15.orig/sntp/harden/linux ++++ ntp-4.2.8p15/sntp/harden/linux +@@ -1,4 +1,4 @@ + # generic linux hardening flags +-NTP_HARD_CFLAGS="-fPIE -fPIC -fstack-protector-all -O1" ++NTP_HARD_CFLAGS="-fPIE -fPIC -O1" + NTP_HARD_CPPFLAGS="-D_FORTIFY_SOURCE=2" + NTP_HARD_LDFLAGS="-pie -Wl,-z,relro -Wl,-z,now" diff --git a/nushell/PKGBUILD b/nushell/PKGBUILD index f278d8e87d..4c951cdeec 100644 --- a/nushell/PKGBUILD +++ b/nushell/PKGBUILD @@ -33,7 +33,7 @@ pkgver() { prepare() { cd "$pkgname" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/nuspell/PKGBUILD b/nuspell/PKGBUILD index 11db1cb577..27a2c4827c 100644 --- a/nuspell/PKGBUILD +++ b/nuspell/PKGBUILD @@ -9,7 +9,7 @@ arch=('loong64' 'x86_64') url='https://nuspell.github.io/' license=('LGPL3') depends=('icu' 'glibc' 'gcc-libs') -makedepends=('cmake' 'pandoc') +makedepends=('cmake') checkdepends=('catch2') source=(${pkgname}-${pkgver}.tar.gz::"https://github.com/nuspell/nuspell/archive/v${pkgver}.tar.gz") sha512sums=('f4119b3fe5944be8f5bc35ccff8d7a93b0f4fa9f129bc97a7b96879a11b5b35bd714b41dd209267417e94c5fed45fd3a74b349f94424f4b90bde07d9694d1d7d') diff --git a/nvidia-cg-toolkit/PKGBUILD b/nvidia-cg-toolkit/PKGBUILD index f4c6831879..2c81668108 100644 --- a/nvidia-cg-toolkit/PKGBUILD +++ b/nvidia-cg-toolkit/PKGBUILD @@ -20,6 +20,7 @@ package() { install -dm755 "${pkgdir}"/usr/lib [ "$CARCH" = "i686" ] && install -m644 "${srcdir}"/usr/lib/* "${pkgdir}"/usr/lib [ "$CARCH" = "x86_64" ] && install -m644 "${srcdir}"/usr/lib64/* "${pkgdir}"/usr/lib + [ "$CARCH" = "loong64" ] && install -m644 "${srcdir}"/usr/lib64/* "${pkgdir}"/usr/lib install -Dm644 "${srcdir}"/usr//local/Cg/docs/license.txt "${pkgdir}"/usr/share/licenses/nvidia-cg-toolkit/license.txt cp -r "${srcdir}"/usr/local/Cg "${pkgdir}"/usr/share/ find $pkgdir/usr/share/ -type d -exec chmod -R 755 '{}' ';' diff --git a/nvidia-lts/PKGBUILD b/nvidia-lts/PKGBUILD index 421a04157c..97466e7ba7 100644 --- a/nvidia-lts/PKGBUILD +++ b/nvidia-lts/PKGBUILD @@ -23,7 +23,7 @@ package() { _kernver="$(=5.6-7') +makedepends=('ncurses>=5.6-7' autoconf) optdepends=('ncurses: advanced ncurses features' 'tk: advanced tk features') -source=(https://caml.inria.fr/distrib/ocaml-${pkgver%.*}/${pkgname}-${pkgver}.tar.xz) -sha512sums=('23579b76592e225f2ddec58d78084dfd11befede18b61be71d3896fd72a90cc0fe4fb1f64a7dcbc83239ed69ec4254e13ab86fd810671851044c2a353da3adc5') +source=(https://caml.inria.fr/distrib/ocaml-${pkgver%.*}/${pkgname}-${pkgver}.tar.xz +ocaml-5.0.0-la64.patch) +sha512sums=('23579b76592e225f2ddec58d78084dfd11befede18b61be71d3896fd72a90cc0fe4fb1f64a7dcbc83239ed69ec4254e13ab86fd810671851044c2a353da3adc5' + 'a95f2e02b318183d76b858b0a1d66ad5c23977d72f6d964b95a8851edf4170ed3971602e031842ef04615d2f6b36198f62aa4ff7e57c188af052d45f22192f65') options=('!makeflags' '!emptydirs' 'staticlibs') +prepare() { + cd "${srcdir}/${pkgname}-${pkgver}" + patch -p1 -i $srcdir/ocaml-5.0.0-la64.patch + autoconf +} build() { cd "${srcdir}/${pkgname}-${pkgver}" CFLAGS+=' -ffat-lto-objects' CXXFLAGS+=' -ffat-lto-objects' - ./configure --prefix /usr --mandir /usr/share/man -enable-frame-pointers + ./configure --prefix /usr --mandir /usr/share/man --enable-frame-pointers make --debug=v world.opt } diff --git a/ocaml/ocaml-5.0.0-la64.patch b/ocaml/ocaml-5.0.0-la64.patch new file mode 100644 index 0000000000..6f7678ccf2 --- /dev/null +++ b/ocaml/ocaml-5.0.0-la64.patch @@ -0,0 +1,2389 @@ +diff --git a/Makefile b/Makefile +index bb2c245ea..db03683fb 100644 +--- a/Makefile ++++ b/Makefile +@@ -528,6 +528,14 @@ partialclean:: + + beforedepend:: lambda/runtimedef.ml + ++asmcomp/loongarch64/CSE.ml: asmcomp/riscv/CSE.ml ++ cp $< $@ ++asmcomp/loongarch64/reload.ml: asmcomp/riscv/reload.ml ++ cp $< $@ ++asmcomp/loongarch64/scheduling.ml: asmcomp/riscv/scheduling.ml ++ cp $< $@ ++ ++ + # Choose the right machine-dependent files + + asmcomp/arch.ml: asmcomp/$(ARCH)/arch.ml +@@ -1031,6 +1039,7 @@ clean:: + rm -f runtime/domain_state*.inc + rm -rf $(DEPDIR) + rm -f stdlib/libcamlrun.a stdlib/libcamlrun.lib ++ rm -f asmcomp/loongarch64/CSE.ml asmcomp/loongarch64/reload.ml asmcomp/loongarch64/scheduling.ml + + .PHONY: runtimeopt + runtimeopt: stdlib/libasmrun.$(A) +diff --git a/asmcomp/loongarch64/NOTES.md b/asmcomp/loongarch64/NOTES.md +new file mode 100644 +index 000000000..f9b63dd62 +--- /dev/null ++++ b/asmcomp/loongarch64/NOTES.md +@@ -0,0 +1,11 @@ ++# Supported platforms ++ ++LoongArch in 64-bit mode ++ ++# Reference documents ++ ++* Instruction set specification: ++ - https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html ++ ++* ELF ABI specification: ++ - https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html +diff --git a/asmcomp/loongarch64/arch.ml b/asmcomp/loongarch64/arch.ml +new file mode 100644 +index 000000000..fee052684 +--- /dev/null ++++ b/asmcomp/loongarch64/arch.ml +@@ -0,0 +1,96 @@ ++# 2 "asmcomp/loongarch64/arch.ml" ++(**************************************************************************) ++(* *) ++(* OCaml *) ++(* *) ++(* yala *) ++(* *) ++(* Copyright © 2008-2023 LOONGSON *) ++(* *) ++(* All rights reserved. This file is distributed under the terms of *) ++(* the GNU Lesser General Public License version 2.1, with the *) ++(* special exception on linking described in the file LICENSE. *) ++(* *) ++(**************************************************************************) ++ ++(* Specific operations for the Loongarch processor *) ++ ++open Format ++ ++(* Machine-specific command-line options *) ++ ++let command_line_options = [] ++ ++(* Specific operations *) ++ ++type specific_operation = ++ | Imultaddf of bool (* multiply, optionally negate, and add *) ++ | Imultsubf of bool (* multiply, optionally negate, and subtract *) ++ | Isqrtf (* floating-point square root *) ++ ++(* Addressing modes *) ++ ++type addressing_mode = ++ | Iindexed of int (* reg + displ *) ++ ++let is_immediate n = ++ (n <= 0x7FF) && (n >= -0x800) ++ ++(* Sizes, endianness *) ++ ++let big_endian = false ++ ++let size_addr = 8 ++let size_int = size_addr ++let size_float = 8 ++ ++let allow_unaligned_access = false ++ ++(* Behavior of division *) ++ ++let division_crashes_on_overflow = false ++ ++(* Operations on addressing modes *) ++ ++let identity_addressing = Iindexed 0 ++ ++let offset_addressing addr delta = ++ match addr with ++ | Iindexed n -> Iindexed(n + delta) ++ ++let num_args_addressing = function ++ | Iindexed _ -> 1 ++ ++(* Printing operations and addressing modes *) ++ ++let print_addressing printreg addr ppf arg = ++ match addr with ++ | Iindexed n -> ++ let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in ++ fprintf ppf "%a%s" printreg arg.(0) idx ++ ++let print_specific_operation printreg op ppf arg = ++ match op with ++ | Imultaddf false -> ++ fprintf ppf "%a *f %a +f %a" ++ printreg arg.(0) printreg arg.(1) printreg arg.(2) ++ | Imultaddf true -> ++ fprintf ppf "-f (%a *f %a +f %a)" ++ printreg arg.(0) printreg arg.(1) printreg arg.(2) ++ | Imultsubf false -> ++ fprintf ppf "%a *f %a -f %a" ++ printreg arg.(0) printreg arg.(1) printreg arg.(2) ++ | Imultsubf true -> ++ fprintf ppf "-f (%a *f %a -f %a)" ++ printreg arg.(0) printreg arg.(1) printreg arg.(2) ++ | Isqrtf -> ++ fprintf ppf "sqrtf %a" ++ printreg arg.(0) ++ ++(* Specific operations that are pure *) ++ ++let operation_is_pure _ = true ++ ++(* Specific operations that can raise *) ++ ++let operation_can_raise _ = false +diff --git a/asmcomp/loongarch64/arch.mli b/asmcomp/loongarch64/arch.mli +new file mode 100644 +index 000000000..57174fabe +--- /dev/null ++++ b/asmcomp/loongarch64/arch.mli +@@ -0,0 +1,76 @@ ++# 2 "asmcomp/loongarch64/arch.mli" ++(**************************************************************************) ++(* *) ++(* OCaml *) ++(* *) ++(* yala *) ++(* *) ++(* Copyright © 2008-2023 LOONGSON *) ++(* *) ++(* All rights reserved. This file is distributed under the terms of *) ++(* the GNU Lesser General Public License version 2.1, with the *) ++(* special exception on linking described in the file LICENSE. *) ++(* *) ++(**************************************************************************) ++ ++(* Specific operations for the LoongArch processor *) ++ ++(* Machine-specific command-line options *) ++ ++val command_line_options : (string * Arg.spec * string) list ++ ++(* Specific operations *) ++ ++type specific_operation = ++ | Imultaddf of bool (* multiply, optionally negate, and add *) ++ | Imultsubf of bool (* multiply, optionally negate, and subtract *) ++ | Isqrtf (* floating-point square root *) ++ ++(* Addressing modes *) ++ ++type addressing_mode = ++ | Iindexed of int (* reg + displ *) ++ ++val is_immediate : int -> bool ++ ++(* Sizes, endianness *) ++ ++val big_endian : bool ++ ++val size_addr : int ++ ++val size_int : int ++ ++val size_float : int ++ ++val allow_unaligned_access : bool ++ ++(* Behavior of division *) ++ ++val division_crashes_on_overflow : bool ++ ++(* Operations on addressing modes *) ++ ++val identity_addressing : addressing_mode ++ ++val offset_addressing : addressing_mode -> int -> addressing_mode ++ ++val num_args_addressing : addressing_mode -> int ++ ++(* Printing operations and addressing modes *) ++ ++val print_addressing : ++ (Format.formatter -> 'a -> unit) -> addressing_mode -> ++ Format.formatter -> 'a array -> unit ++ ++val print_specific_operation : ++ (Format.formatter -> 'a -> unit) -> specific_operation -> ++ Format.formatter -> 'a array -> unit ++ ++(* Specific operations that are pure *) ++ ++val operation_is_pure : specific_operation -> bool ++ ++(* Specific operations that can raise *) ++ ++val operation_can_raise : specific_operation -> bool +diff --git a/asmcomp/loongarch64/emit.mlp b/asmcomp/loongarch64/emit.mlp +new file mode 100644 +index 000000000..b80b4f172 +--- /dev/null ++++ b/asmcomp/loongarch64/emit.mlp +@@ -0,0 +1,772 @@ ++(**************************************************************************) ++(* *) ++(* OCaml *) ++(* *) ++(* yala *) ++(* *) ++(* Copyright © 2008-2023 LOONGSON *) ++(* *) ++(* All rights reserved. This file is distributed under the terms of *) ++(* the GNU Lesser General Public License version 2.1, with the *) ++(* special exception on linking described in the file LICENSE. *) ++(* *) ++(**************************************************************************) ++ ++(* Emission of LoongArch assembly code *) ++ ++open Cmm ++open Arch ++open Proc ++open Reg ++open Mach ++open Linear ++open Emitaux ++open Emitenv ++ ++(* Layout of the stack. The stack is kept 16-aligned. *) ++ ++let frame_size env = ++ let size = ++ env.stack_offset + (* Trap frame, outgoing parameters *) ++ size_int * env.f.fun_num_stack_slots.(0) + (* Local int variables *) ++ size_float * env.f.fun_num_stack_slots.(1)+ (* Local float variables *) ++ (if env.f.fun_contains_calls then size_addr else 0) (* Return address *) ++ in ++ Misc.align size 16 ++ ++let slot_offset env loc cls = ++ match loc with ++ | Local n -> ++ ("$sp", ++ if cls = 0 ++ then env.stack_offset + env.f.fun_num_stack_slots.(1) * size_float ++ + n * size_int ++ else env.stack_offset + n * size_float) ++ | Incoming n -> ++ ("$sp", frame_size env + n) ++ | Outgoing n -> ++ ("$sp", n) ++ | Domainstate n -> ++ ("$s8", n + Domainstate.(idx_of_field Domain_extra_params) * 8) ++ ++(* Output a symbol *) ++ ++let emit_symbol s = ++ emit_symbol '$' s ++ ++let emit_jump op s = ++ if !Clflags.dlcode || !Clflags.pic_code ++ then `{emit_string op} %plt({emit_symbol s})` ++ else `{emit_string op} {emit_symbol s}` ++ ++let emit_call = emit_jump "bl" ++let emit_tail = emit_jump "b" ++ ++(* Output a label *) ++ ++let emit_label lbl = ++ emit_string ".L"; emit_int lbl ++ ++(* Section switching *) ++ ++let data_space = ++ ".section .data" ++ ++let code_space = ++ ".section .text" ++ ++let rodata_space = ++ ".section .rodata" ++ ++(* Names for special regs *) ++ ++let reg_tmp = phys_reg 22 (* t1 *) ++let reg_tmp2 = phys_reg 21 (* t0 *) ++let reg_t2 = phys_reg 13 (* t2 *) ++let reg_domain_state_ptr = phys_reg 25 (* s8 *) ++let reg_trap_ptr = phys_reg 23 (* s1 *) ++let reg_alloc_ptr = phys_reg 24 (* s7 *) ++let reg_stack_arg_begin = phys_reg 9 (* s3 *) ++let reg_stack_arg_end = phys_reg 10 (* s4 *) ++ ++(* Output a pseudo-register *) ++ ++let reg_name = function ++ | {loc = Reg r} -> register_name r ++ | _ -> Misc.fatal_error "Emit.reg_name" ++ ++let emit_reg r = ++ emit_string (reg_name r) ++ ++(* Adjust sp by the given byte amount, clobbers reg_tmp *) ++ ++let emit_stack_adjustment n = ++ if n <> 0 then begin ++ if is_immediate n then ++ ` addi.d $sp, $sp, {emit_int n} \n` ++ else begin ++ ` li.d {emit_reg reg_tmp}, {emit_int n}\n`; ++ ` add.d $sp, $sp, {emit_reg reg_tmp}\n` ++ end; ++ cfi_adjust_cfa_offset (-n) ++ end ++ ++(* Output add.d-immediate instruction, clobbers reg_tmp2 *) ++ ++let emit_addimm rd rs n = ++ if is_immediate n then ++ ` addi.d {emit_reg rd}, {emit_reg rs}, {emit_int n}\n` ++ else begin ++ ` li.d {emit_reg reg_tmp2}, {emit_int n}\n`; ++ ` add.d {emit_reg rd}, {emit_reg rs}, {emit_reg reg_tmp2}\n` ++ end ++ ++(* Output memory operation with a possibly non-immediate offset, ++ clobbers reg_tmp *) ++ ++let emit_mem_op op reg ofs addr = ++ if is_immediate ofs then ++ ` {emit_string op} {emit_string reg}, {emit_string addr}, {emit_int ofs}\n` ++ else begin ++ ` li.d {emit_reg reg_tmp}, {emit_int ofs}\n`; ++ ` add.d {emit_reg reg_tmp}, {emit_string addr}, {emit_reg reg_tmp}\n`; ++ ` {emit_string op} {emit_string reg}, {emit_reg reg_tmp}, 0\n` ++ end ++ ++let reload_ra n = ++ emit_mem_op "ld.d" "$ra" (n - 8) "$sp" ++ ++let store_ra n = ++ emit_mem_op "st.d" "$ra" (n - 8) "$sp" ++ ++let emit_store rs ofs rd = ++ emit_mem_op "st.d" (reg_name rs) ofs rd ++ ++let emit_load rd ofs rs = ++ emit_mem_op "ld.d" (reg_name rd) ofs rs ++ ++let emit_float_load rd ofs rs = ++ emit_mem_op "fld.d" (reg_name rd) ofs rs ++ ++let emit_float_store rs ofs rd = ++ emit_mem_op "fst.d" (reg_name rs) ofs rd ++ ++let emit_float_test cmp ~arg ~res = ++ let negated = ++ match cmp with ++ | CFneq | CFnlt | CFngt | CFnle | CFnge -> true ++ | CFeq | CFlt | CFgt | CFle | CFge -> false ++ in ++ begin match cmp with ++ | CFeq | CFneq -> ` fcmp.ceq.d $fcc0, {emit_reg arg.(0)}, {emit_reg arg.(1)}\n movcf2gr {emit_reg res}, $fcc0\n` ++ | CFlt | CFnlt -> ` fcmp.clt.d $fcc0, {emit_reg arg.(0)}, {emit_reg arg.(1)}\n movcf2gr {emit_reg res}, $fcc0\n` ++ | CFgt | CFngt -> ` fcmp.clt.d $fcc0, {emit_reg arg.(1)}, {emit_reg arg.(0)}\n movcf2gr {emit_reg res}, $fcc0\n` ++ | CFle | CFnle -> ` fcmp.cle.d $fcc0, {emit_reg arg.(0)}, {emit_reg arg.(1)}\n movcf2gr {emit_reg res}, $fcc0\n` ++ | CFge | CFnge -> ` fcmp.cle.d $fcc0, {emit_reg arg.(1)}, {emit_reg arg.(0)}\n movcf2gr {emit_reg res}, $fcc0\n` ++ end; ++ negated ++ ++(* Record live pointers at call points *) ++ ++let record_frame_label env live dbg = ++ let lbl = new_label () in ++ let live_offset = ref [] in ++ Reg.Set.iter ++ (function ++ {typ = Val; loc = Reg r} -> ++ live_offset := (r lsl 1) + 1 :: !live_offset ++ | {typ = Val; loc = Stack s} as reg -> ++ let (base, ofs) = slot_offset env s (register_class reg) in ++ assert (base = "$sp"); ++ live_offset := ofs :: !live_offset ++ | {typ = Addr} as r -> ++ Misc.fatal_error ("bad GC root " ^ Reg.name r) ++ | _ -> () ++ ) ++ live; ++ record_frame_descr ~label:lbl ~frame_size:(frame_size env) ++ ~live_offset:!live_offset dbg; ++ lbl ++ ++let record_frame env live dbg = ++ let lbl = record_frame_label env live dbg in ++ `{emit_label lbl}:\n` ++ ++let emit_call_gc gc = ++ `{emit_label gc.gc_lbl}:\n`; ++ ` {emit_call "caml_call_gc"}\n`; ++ `{emit_label gc.gc_frame_lbl}:\n`; ++ ` b {emit_label gc.gc_return_lbl}\n` ++ ++let bound_error_label env dbg = ++ if !Clflags.debug || env.bound_error_sites = [] then begin ++ let lbl_bound_error = new_label() in ++ let lbl_frame = record_frame_label env Reg.Set.empty (Dbg_other dbg) in ++ env.bound_error_sites <- ++ { bd_lbl = lbl_bound_error; ++ bd_frame = lbl_frame; } :: env.bound_error_sites; ++ lbl_bound_error ++ end else ++ let bd = List.hd env.bound_error_sites in ++ bd.bd_lbl ++ ++let emit_call_bound_error bd = ++ `{emit_label bd.bd_lbl}:\n`; ++ ` {emit_call "caml_ml_array_bound_error"}\n`; ++ `{emit_label bd.bd_frame}:\n` ++ ++(* Names for various instructions *) ++ ++let name_for_intop = function ++ | Iadd -> "add.d" ++ | Isub -> "sub.d" ++ | Imul -> "mul.d" ++ | Imulh -> "mulh.d" ++ | Idiv -> "div.d" ++ | Iand -> "and" ++ | Ior -> "or" ++ | Ixor -> "xor" ++ | Ilsl -> "sll.d" ++ | Ilsr -> "srl.d" ++ | Iasr -> "sra.d" ++ | Imod -> "mod.d" ++ | _ -> Misc.fatal_error "Emit.Intop" ++ ++let name_for_intop_imm = function ++ | Iadd -> "addi.d" ++ | Iand -> "andi" ++ | Ior -> "ori" ++ | Ixor -> "xori" ++ | Ilsl -> "slli.d" ++ | Ilsr -> "srli.d" ++ | Iasr -> "srai.d" ++ | _ -> Misc.fatal_error "Emit.Intop_imm" ++ ++let name_for_floatop1 = function ++ | Inegf -> "fneg.d" ++ | Iabsf -> "fabs.d" ++ | Ispecific Isqrtf -> "fsqrt.d" ++ | _ -> Misc.fatal_error "Emit.Iopf1" ++ ++let name_for_floatop2 = function ++ | Iaddf -> "fadd.d" ++ | Isubf -> "fsub.d" ++ | Imulf -> "fmul.d" ++ | Idivf -> "fdiv.d" ++ | _ -> Misc.fatal_error "Emit.Iopf2" ++ ++let name_for_specific = function ++ | Imultaddf false -> "fmadd.d" ++ | Imultaddf true -> "fnmadd.d" ++ | Imultsubf false -> "fmsub.d" ++ | Imultsubf true -> "fnmsub.d" ++ | _ -> Misc.fatal_error "Emit.Iopf3" ++ ++(* Output the assembly code for an instruction *) ++ ++let emit_instr env i = ++ emit_debug_info i.dbg; ++ match i.desc with ++ Lend -> () ++ | Lprologue -> ++ assert (env.f.fun_prologue_required); ++ let n = frame_size env in ++ emit_stack_adjustment (-n); ++ if env.f.fun_contains_calls then begin ++ store_ra n; ++ cfi_offset ~reg:1 (* ra *) ~offset:(-8) ++ end; ++ | Lop(Imove | Ispill | Ireload) -> ++ let src = i.arg.(0) and dst = i.res.(0) in ++ if src.loc <> dst.loc then begin ++ match (src, dst) with ++ | {loc = Reg _; typ = (Val | Int | Addr)}, {loc = Reg _} -> ++ ` move {emit_reg dst}, {emit_reg src}\n` ++ | {loc = Reg _; typ = Float}, {loc = Reg _; typ = Float} -> ++ ` fmov.d {emit_reg dst}, {emit_reg src}\n` ++ | {loc = Reg _; typ = (Val | Int | Addr)}, {loc = Stack s} -> ++ let (base, ofs) = slot_offset env s (register_class dst) in ++ emit_store src ofs base ++ | {loc = Reg _; typ = Float}, {loc = Stack s} -> ++ let (base, ofs) = slot_offset env s (register_class dst) in ++ emit_float_store src ofs base ++ | {loc = Stack s; typ = (Val | Int | Addr)}, {loc = Reg _} -> ++ let (base, ofs) = slot_offset env s (register_class src) in ++ emit_load dst ofs base ++ | {loc = Stack s; typ = Float}, {loc = Reg _} -> ++ let (base, ofs) = slot_offset env s (register_class src) in ++ emit_float_load dst ofs base ++ | {loc = Reg _; typ = Float}, {loc = Reg _; typ = (Val | Int | Addr)} ++ | {loc = Stack _}, {loc = Stack _} ++ | {loc = Unknown}, _ | _, {loc = Unknown} -> ++ Misc.fatal_error "Emit: Imove" ++ end ++ | Lop(Iconst_int n) -> ++ ` li.d {emit_reg i.res.(0)}, {emit_nativeint n}\n` ++ | Lop(Iconst_float f) -> ++ let lbl = new_label() in ++ env.float_literals <- {fl=f; lbl} :: env.float_literals; ++ `la.local {emit_reg reg_tmp}, {emit_label lbl} \n`; ++ ` fld.d {emit_reg i.res.(0)}, {emit_reg reg_tmp}, 0\n` ++ | Lop(Iconst_symbol s) -> (* FIXME la.global assert error in binutils*) ++ `pcaddi {emit_reg i.res.(0)}, 0 \n`; ++ `b 7112233f\n`; ++ `.dword {emit_symbol s}\n`; ++ `7112233: ld.d {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 8\n` ++ | Lop(Icall_ind) -> ++ ` jirl $ra, {emit_reg i.arg.(0)}, 0\n`; ++ record_frame env i.live (Dbg_other i.dbg) ++ | Lop(Icall_imm {func}) -> ++ ` {emit_call func}\n`; ++ record_frame env i.live (Dbg_other i.dbg) ++ | Lop(Itailcall_ind) -> ++ let n = frame_size env in ++ if env.f.fun_contains_calls then reload_ra n; ++ emit_stack_adjustment n; ++ ` jr {emit_reg i.arg.(0)}\n` ++ | Lop(Itailcall_imm {func}) -> ++ if func = env.f.fun_name then begin ++ ` b {emit_label env.f.fun_tailrec_entry_point_label}\n` ++ end else begin ++ let n = frame_size env in ++ if env.f.fun_contains_calls then reload_ra n; ++ emit_stack_adjustment n; ++ ` {emit_tail func}\n` ++ end ++ | Lop(Iextcall{func; alloc; stack_ofs}) -> ++ if stack_ofs > 0 then begin ++ ` move {emit_reg reg_stack_arg_begin}, $sp\n`; ++ ` addi.d {emit_reg reg_stack_arg_end}, $sp, {emit_int (Misc.align stack_ofs 16)}\n`; ++ ` la.global {emit_reg reg_t2}, {emit_symbol func}\n`; ++ ` {emit_call "caml_c_call_stack_args"}\n`; ++ record_frame env i.live (Dbg_other i.dbg) ++ end else if alloc then begin ++ ` la.global {emit_reg reg_t2}, {emit_symbol func}\n`; ++ ` {emit_call "caml_c_call"}\n`; ++ record_frame env i.live (Dbg_other i.dbg) ++ end else begin ++ (* store ocaml stack in s0, which is marked as being destroyed ++ at noalloc calls *) ++ ` move $s0, $sp\n`; ++ cfi_remember_state (); ++ cfi_def_cfa_register ~reg:21; ++ let ofs = Domainstate.(idx_of_field Domain_c_stack) * 8 in ++ ` ld.d $sp, {emit_reg reg_domain_state_ptr}, {emit_int ofs}\n`; ++ ` {emit_call func}\n`; ++ ` move $sp, $s0\n`; ++ cfi_restore_state () ++ end ++ | Lop(Istackoffset n) -> ++ assert (n mod 16 = 0); ++ emit_stack_adjustment (-n); ++ env.stack_offset <- env.stack_offset + n ++ | Lop(Iload { memory_chunk = Single; addressing_mode = Iindexed ofs; is_atomic } ) -> ++ assert (not is_atomic); ++ ` fld.s {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int ofs}\n`; ++ ` fcvt.d.s {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` ++ | Lop(Iload { memory_chunk = Word_int | Word_val; addressing_mode = Iindexed ofs; is_atomic } ) -> ++ if is_atomic then ` dbar 0\n`; ++ ` ld.d {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int ofs}\n`; ++ if is_atomic then ` dbar 0\n` ++ | Lop(Iload { memory_chunk; addressing_mode = Iindexed ofs; is_atomic } ) -> ++ assert (not is_atomic); ++ let instr = ++ match memory_chunk with ++ | Byte_unsigned -> "ld.bu" ++ | Byte_signed -> "ld.b" ++ | Sixteen_unsigned -> "ld.hu" ++ | Sixteen_signed -> "ld.h" ++ | Thirtytwo_unsigned -> "ld.wu" ++ | Thirtytwo_signed -> "ld.w" ++ | Word_int | Word_val | Single -> assert false ++ | Double -> "fld.d" ++ in ++ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int ofs}\n` ++ | Lop(Istore(Single, Iindexed ofs, _)) -> ++ (* ft0 is marked as destroyed for this operation *) ++ ` fcvt.s.d $ft0, {emit_reg i.arg.(0)}\n`; ++ ` fst.s $ft0, {emit_reg i.arg.(1)}, {emit_int ofs}\n` ++ | Lop(Istore((Word_int | Word_val), Iindexed ofs, assignement)) -> ++ if assignement then begin ++ ` dbar 0\n`; ++ ` st.d {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_int ofs}\n` ++ end else ++ ` st.d {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_int ofs}\n`; ++ | Lop(Istore(chunk, Iindexed ofs, _)) -> ++ let instr = ++ match chunk with ++ | Byte_unsigned | Byte_signed -> "st.b" ++ | Sixteen_unsigned | Sixteen_signed -> "st.h" ++ | Thirtytwo_unsigned | Thirtytwo_signed -> "st.w" ++ | Word_int | Word_val | Single -> assert false ++ | Double -> "fst.d" ++ in ++ ` {emit_string instr} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_int ofs}\n` ++ | Lop(Ialloc {bytes; dbginfo}) -> ++ let lbl_frame_lbl = record_frame_label env i.live (Dbg_alloc dbginfo) in ++ if env.f.fun_fast then begin ++ let lbl_after_alloc = new_label () in ++ let lbl_call_gc = new_label () in ++ let n = -bytes in ++ let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in ++ emit_addimm reg_alloc_ptr reg_alloc_ptr n; ++ ` ld.d {emit_reg reg_tmp}, {emit_reg reg_domain_state_ptr}, {emit_int offset}\n`; ++ ` bltu {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}, {emit_label lbl_call_gc}\n`; ++ `{emit_label lbl_after_alloc}:\n`; ++ ` addi.d {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, 8\n`; ++ env.call_gc_sites <- ++ { gc_lbl = lbl_call_gc; ++ gc_return_lbl = lbl_after_alloc; ++ gc_frame_lbl = lbl_frame_lbl } :: env.call_gc_sites ++ end else begin ++ begin match bytes with ++ | 16 -> ` {emit_call "caml_alloc1"}\n` ++ | 24 -> ` {emit_call "caml_alloc2"}\n` ++ | 32 -> ` {emit_call "caml_alloc3"}\n` ++ | _ -> ++ ` li.d {emit_reg reg_t2}, {emit_int bytes}\n`; ++ ` {emit_call "caml_allocN"}\n` ++ end; ++ `{emit_label lbl_frame_lbl}:\n`; ++ ` addi.d {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, 8\n` ++ end ++ | Lop(Ipoll { return_label }) -> ++ let lbl_frame_lbl = record_frame_label env i.live (Dbg_alloc []) in ++ let lbl_after_poll = match return_label with ++ | None -> new_label() ++ | Some(lbl) -> lbl in ++ let lbl_call_gc = new_label () in ++ let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in ++ ` ld.d {emit_reg reg_tmp}, {emit_reg reg_domain_state_ptr}, {emit_int offset}\n`; ++ begin match return_label with ++ | None -> ` bltu {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}, {emit_label lbl_call_gc}\n`; ++ `{emit_label lbl_after_poll}:\n`; ++ | Some lbl -> ` bgeu {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}, {emit_label lbl}\n`; ++ ` b {emit_label lbl_call_gc}\n` ++ end; ++ env.call_gc_sites <- ++ { gc_lbl = lbl_call_gc; ++ gc_return_lbl = lbl_after_poll; ++ gc_frame_lbl = lbl_frame_lbl } :: env.call_gc_sites ++ | Lop(Iintop(Icomp cmp)) -> ++ begin match cmp with ++ | Isigned Clt -> ++ ` slt {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` ++ | Isigned Cge -> ++ ` slt {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ++ ` xori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`; ++ | Isigned Cgt -> ++ ` slt {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` ++ | Isigned Cle -> ++ ` slt {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`; ++ ` xori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`; ++ | Isigned Ceq | Iunsigned Ceq -> ++ ` sub.d {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ++ ` sltui {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n` ++ | Isigned Cne | Iunsigned Cne -> ++ ` sub.d {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ++ ` sltu {emit_reg i.res.(0)}, $zero, {emit_reg i.res.(0)}\n` ++ | Iunsigned Clt -> ++ ` sltu {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` ++ | Iunsigned Cge -> ++ ` sltu {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ++ ` xori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`; ++ | Iunsigned Cgt -> ++ ` sltu {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` ++ | Iunsigned Cle -> ++ ` sltu {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`; ++ ` xori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`; ++ end ++ | Lop(Iintop (Icheckbound)) -> ++ let lbl = bound_error_label env i.dbg in ++ ` bleu {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_label lbl}\n` ++ | Lop(Iintop op) -> ++ let instr = name_for_intop op in ++ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` ++ | Lop(Iintop_imm(Isub, n)) -> ++ ` addi.d {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int(-n)}\n` ++ | Lop(Iintop_imm(Iadd, n)) -> ++ ` addi.d {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int(n)}\n` ++ | Lop(Iintop_imm(op, n)) -> ++ let instri = name_for_intop_imm op in ++ if n<0 then (* FIXME *) ++ let instr = name_for_intop op in ++ ` addi.d {emit_reg reg_tmp2}, $zero, {emit_int n}\n {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg reg_tmp2} \n` ++ else ++ ` {emit_string instri} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int n}\n` ++ | Lop(Inegf | Iabsf | Ispecific Isqrtf as op) -> ++ let instr = name_for_floatop1 op in ++ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` ++ | Lop(Iaddf | Isubf | Imulf | Idivf as op) -> ++ let instr = name_for_floatop2 op in ++ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` ++ | Lop(Ifloatofint) -> ++ ` movgr2fr.d $ft0, {emit_reg i.arg.(0)} \n`; ++ ` ffint.d.l {emit_reg i.res.(0)}, $ft0\n` ++ | Lop(Iintoffloat) -> ++ ` ftintrz.l.d $ft0, {emit_reg i.arg.(0)}\n`; ++ ` movfr2gr.d {emit_reg i.res.(0)}, $ft0 \n` ++ | Lop(Iopaque) -> ++ assert (i.arg.(0).loc = i.res.(0).loc) ++ | Lop(Ispecific sop) -> ++ let instr = name_for_specific sop in ++ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n` ++ | Lop (Idls_get) -> ++ let ofs = Domainstate.(idx_of_field Domain_dls_root) * 8 in ++ ` ld.d {emit_reg i.res.(0)}, {emit_reg reg_domain_state_ptr}, {emit_int ofs}\n` ++ | Lreloadretaddr -> ++ let n = frame_size env in ++ reload_ra n ++ | Lreturn -> ++ let n = frame_size env in ++ emit_stack_adjustment n; ++ ` jr $ra\n` ++ | Llabel lbl -> ++ `{emit_label lbl}:\n` ++ | Lbranch lbl -> ++ ` b {emit_label lbl}\n` ++ | Lcondbranch(tst, lbl) -> ++ begin match tst with ++ | Itruetest -> ++ ` bnez {emit_reg i.arg.(0)}, {emit_label lbl}\n` ++ | Ifalsetest -> ++ ` beqz {emit_reg i.arg.(0)}, {emit_label lbl}\n` ++ | Iinttest cmp -> ++ let name = match cmp with ++ | Iunsigned Ceq | Isigned Ceq -> "beq" ++ | Iunsigned Cne | Isigned Cne -> "bne" ++ | Iunsigned Cle -> "bleu" | Isigned Cle -> "ble" ++ | Iunsigned Cge -> "bgeu" | Isigned Cge -> "bge" ++ | Iunsigned Clt -> "bltu" | Isigned Clt -> "blt" ++ | Iunsigned Cgt -> "bgtu" | Isigned Cgt -> "bgt" ++ in ++ ` {emit_string name} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_label lbl}\n` ++ | Iinttest_imm _ -> ++ Misc.fatal_error "Emit.emit_instr (Iinttest_imm _)" ++ | Ifloattest cmp -> ++ let negated = emit_float_test cmp ~arg:i.arg ~res:reg_tmp in ++ let branch = ++ if negated ++ then "beqz" ++ else "bnez" ++ in ++ ` {emit_string branch} {emit_reg reg_tmp}, {emit_label lbl}\n` ++ | Ioddtest -> ++ ` andi {emit_reg reg_tmp}, {emit_reg i.arg.(0)}, 1\n`; ++ ` bnez {emit_reg reg_tmp}, {emit_label lbl}\n` ++ | Ieventest -> ++ ` andi {emit_reg reg_tmp}, {emit_reg i.arg.(0)}, 1\n`; ++ ` beqz {emit_reg reg_tmp}, {emit_label lbl}\n` ++ end ++ | Lcondbranch3(lbl0, lbl1, lbl2) -> ++ ` addi.d {emit_reg reg_tmp}, {emit_reg i.arg.(0)}, -1\n`; ++ begin match lbl0 with ++ | None -> () ++ | Some lbl -> ` bltz {emit_reg reg_tmp}, {emit_label lbl}\n` ++ end; ++ begin match lbl1 with ++ | None -> () ++ | Some lbl -> ` beqz {emit_reg reg_tmp}, {emit_label lbl}\n` ++ end; ++ begin match lbl2 with ++ | None -> () ++ | Some lbl -> ` bgtz {emit_reg reg_tmp}, {emit_label lbl}\n` ++ end ++ | Lswitch jumptbl -> ++ let lbl = new_label() in ++ ` la.local {emit_reg reg_tmp}, {emit_label lbl}\n`; ++ ` slli.d {emit_reg reg_tmp2}, {emit_reg i.arg.(0)}, 2\n`; ++ ` add.d {emit_reg reg_tmp}, {emit_reg reg_tmp}, {emit_reg reg_tmp2}\n`; ++ ` jr {emit_reg reg_tmp}\n`; ++ `{emit_label lbl}:\n`; ++ for i = 0 to Array.length jumptbl - 1 do ++ ` b {emit_label jumptbl.(i)}\n` ++ done ++ | Lentertrap -> ++ () ++ | Ladjust_trap_depth { delta_traps } -> ++ (* each trap occupes 16 bytes on the stack *) ++ let delta = 16 * delta_traps in ++ cfi_adjust_cfa_offset delta; ++ env.stack_offset <- env.stack_offset + delta ++ | Lpushtrap {lbl_handler} -> ++ ` la.local {emit_reg reg_tmp}, {emit_label lbl_handler}\n`; ++ ` addi.d $sp, $sp, -16\n`; ++ env.stack_offset <- env.stack_offset + 16; ++ ` st.d {emit_reg reg_trap_ptr}, $sp, 0\n`; ++ ` st.d {emit_reg reg_tmp}, $sp, 8\n`; ++ cfi_adjust_cfa_offset 16; ++ ` move {emit_reg reg_trap_ptr}, $sp\n` ++ | Lpoptrap -> ++ ` ld.d {emit_reg reg_trap_ptr}, $sp, 0\n`; ++ ` addi.d $sp, $sp, 16\n`; ++ cfi_adjust_cfa_offset (-16); ++ env.stack_offset <- env.stack_offset - 16 ++ | Lraise k -> ++ begin match k with ++ | Lambda.Raise_regular -> ++ ` {emit_call "caml_raise_exn"}\n`; ++ record_frame env Reg.Set.empty (Dbg_raise i.dbg) ++ | Lambda.Raise_reraise -> ++ ` {emit_call "caml_reraise_exn"}\n`; ++ record_frame env Reg.Set.empty (Dbg_raise i.dbg) ++ | Lambda.Raise_notrace -> ++ ` move $sp, {emit_reg reg_trap_ptr}\n`; ++ ` ld.d {emit_reg reg_tmp}, $sp, 8\n`; ++ ` ld.d {emit_reg reg_trap_ptr}, $sp, 0\n`; ++ ` addi.d $sp, $sp, 16\n`; ++ ` jr {emit_reg reg_tmp}\n` ++ end ++ ++(* Emit a sequence of instructions *) ++ ++let rec emit_all env = function ++ | {desc = Lend} -> () | i -> emit_instr env i; emit_all env i.next ++ ++(* Emission of a function declaration *) ++ ++let fundecl fundecl = ++ let env = mk_env fundecl in ++ ` .globl {emit_symbol fundecl.fun_name}\n`; ++ ` .type {emit_symbol fundecl.fun_name}, @function\n`; ++ ` {emit_string code_space}\n`; ++ ` .align 2\n`; ++ `{emit_symbol fundecl.fun_name}:\n`; ++ emit_debug_info fundecl.fun_dbg; ++ cfi_startproc(); ++ ++ (* Dynamic stack checking *) ++ let stack_threshold_size = Config.stack_threshold * 8 in (* bytes *) ++ let { max_frame_size; contains_nontail_calls } = ++ preproc_stack_check ++ ~fun_body:fundecl.fun_body ~frame_size:(frame_size env) ~trap_size:16 ++ in ++ let handle_overflow = ref None in ++ if contains_nontail_calls || max_frame_size >= stack_threshold_size then begin ++ let overflow = new_label () and ret = new_label () in ++ let threshold_offset = Domainstate.stack_ctx_words * 8 + stack_threshold_size in ++ let f = max_frame_size + threshold_offset in ++ let offset = Domainstate.(idx_of_field Domain_current_stack) * 8 in ++ ` ld.d {emit_reg reg_tmp}, {emit_reg reg_domain_state_ptr}, {emit_int offset}\n`; ++ emit_addimm reg_tmp reg_tmp f; ++ ` bltu $sp, {emit_reg reg_tmp}, {emit_label overflow}\n`; ++ `{emit_label ret}:\n`; ++ handle_overflow := Some (overflow, ret) ++ end; ++ ++ emit_all env fundecl.fun_body; ++ List.iter emit_call_gc env.call_gc_sites; ++ List.iter emit_call_bound_error env.bound_error_sites; ++ ++ begin match !handle_overflow with ++ | None -> () ++ | Some (overflow, ret) -> ++ `{emit_label overflow}:\n`; ++ (* Pass the desired frame size on the stack, since all of the ++ argument-passing registers may be in use. *) ++ let s = Config.stack_threshold + max_frame_size / 8 in ++ ` li.d {emit_reg reg_tmp}, {emit_int s}\n`; ++ ` addi.d $sp, $sp, -16\n`; ++ ` st.d {emit_reg reg_tmp}, $sp, 0\n`; ++ ` st.d $ra, $sp, 8\n`; ++ ` {emit_call "caml_call_realloc_stack"}\n`; ++ ` ld.d $ra, $sp, 8\n`; ++ ` addi.d $sp, $sp, 16\n`; ++ ` b {emit_label ret}\n` ++ end; ++ ++ cfi_endproc(); ++ ` .size {emit_symbol fundecl.fun_name}, .-{emit_symbol fundecl.fun_name}\n`; ++ (* Emit the float literals *) ++ if env.float_literals <> [] then begin ++ ` {emit_string rodata_space}\n`; ++ ` .align 3\n`; ++ List.iter ++ (fun {fl; lbl} -> ++ `{emit_label lbl}:\n`; ++ emit_float64_directive ".quad" fl) ++ env.float_literals; ++ end ++ ++(* Emission of data *) ++ ++let declare_global_data s = ++ ` .globl {emit_symbol s}\n`; ++ ` .type {emit_symbol s}, @object\n` ++ ++let emit_item = function ++ | Cglobal_symbol s -> ++ declare_global_data s ++ | Cdefine_symbol s -> ++ `{emit_symbol s}:\n`; ++ | Cint8 n -> ++ ` .byte {emit_int n}\n` ++ | Cint16 n -> ++ ` .short {emit_int n}\n` ++ | Cint32 n -> ++ ` .long {emit_nativeint n}\n` ++ | Cint n -> ++ ` .quad {emit_nativeint n}\n` ++ | Csingle f -> ++ emit_float32_directive ".long" (Int32.bits_of_float f) ++ | Cdouble f -> ++ emit_float64_directive ".quad" (Int64.bits_of_float f) ++ | Csymbol_address s -> ++ ` .quad {emit_symbol s}\n` ++ | Cstring s -> ++ emit_bytes_directive " .byte " s ++ | Cskip n -> ++ if n > 0 then ` .space {emit_int n}\n` ++ | Calign n -> ++ ` .align {emit_int (Misc.log2 n)}\n` ++ ++let data l = ++ ` {emit_string data_space}\n`; ++ List.iter emit_item l ++ ++(* Beginning / end of an assembly file *) ++ ++let begin_assembly() = ++ if !Clflags.dlcode || !Clflags.pic_code then ` \n`; (* FIXME *) ++ ` .file \"\"\n`; (* PR#7073 *) ++ reset_debug_info (); ++ (* Emit the beginning of the segments *) ++ let lbl_begin = Compilenv.make_symbol (Some "data_begin") in ++ ` {emit_string data_space}\n`; ++ declare_global_data lbl_begin; ++ `{emit_symbol lbl_begin}:\n`; ++ let lbl_begin = Compilenv.make_symbol (Some "code_begin") in ++ ` {emit_string code_space}\n`; ++ declare_global_data lbl_begin; ++ `{emit_symbol lbl_begin}:\n` ++ ++let end_assembly() = ++ ` {emit_string code_space}\n`; ++ let lbl_end = Compilenv.make_symbol (Some "code_end") in ++ declare_global_data lbl_end; ++ `{emit_symbol lbl_end}:\n`; ++ ` .long 0\n`; ++ ` {emit_string data_space}\n`; ++ let lbl_end = Compilenv.make_symbol (Some "data_end") in ++ declare_global_data lbl_end; ++ ` .quad 0\n`; (* PR#6329 *) ++ `{emit_symbol lbl_end}:\n`; ++ ` .quad 0\n`; ++ (* Emit the frame descriptors *) ++ ` {emit_string data_space}\n`; (* not rodata because relocations inside *) ++ let lbl = Compilenv.make_symbol (Some "frametable") in ++ declare_global_data lbl; ++ `{emit_symbol lbl}:\n`; ++ emit_frames ++ { efa_code_label = (fun l -> ` .quad {emit_label l}\n`); ++ efa_data_label = (fun l -> ` .quad {emit_label l}\n`); ++ efa_8 = (fun n -> ` .byte {emit_int n}\n`); ++ efa_16 = (fun n -> ` .short {emit_int n}\n`); ++ efa_32 = (fun n -> ` .long {emit_int32 n}\n`); ++ efa_word = (fun n -> ` .quad {emit_int n}\n`); ++ efa_align = (fun n -> ` .align {emit_int (Misc.log2 n)}\n`); ++ efa_label_rel = (fun lbl ofs -> ++ ` .long ({emit_label lbl} - .) + {emit_int32 ofs}\n`); ++ efa_def_label = (fun l -> `{emit_label l}:\n`); ++ efa_string = (fun s -> emit_bytes_directive " .byte " (s ^ "\000")) ++ } +diff --git a/asmcomp/loongarch64/proc.ml b/asmcomp/loongarch64/proc.ml +new file mode 100644 +index 000000000..62666c748 +--- /dev/null ++++ b/asmcomp/loongarch64/proc.ml +@@ -0,0 +1,319 @@ ++# 2 "asmcomp/loongarch64/proc.ml" ++(**************************************************************************) ++(* *) ++(* OCaml *) ++(* *) ++(* yala *) ++(* *) ++(* Copyright © 2008-2023 LOONGSON *) ++(* *) ++(* All rights reserved. This file is distributed under the terms of *) ++(* the GNU Lesser General Public License version 2.1, with the *) ++(* special exception on linking described in the file LICENSE. *) ++(* *) ++(**************************************************************************) ++ ++(* Description of the LoongArch *) ++ ++open Misc ++open Cmm ++open Reg ++open Arch ++open Mach ++ ++(* Instruction selection *) ++ ++let word_addressed = false ++ ++(* Registers available for register allocation *) ++ ++(* Integer register map ++ -------------------- ++ ++ zero always zero ++ ra return address ++ sp, gp, tp stack pointer, global pointer, thread pointer ++ a0-a7 0-7 arguments/results ++ s2-s6 8-12 arguments/results (preserved by C) ++ t2-t6 13-17 temporary ++ s0 18 general purpose (preserved by C) ++ t0, t1 19-20 temporaries (used by call veneers) ++ s1 21 trap pointer (preserved by C) ++ s7 22 allocation pointer (preserved by C) ++ s8 23 domain pointer (preserved by C) ++ ++ Floating-point register map ++ --------------------------- ++ ++ f0-f7 100-107 arguments ++ f0-f1 100-101 arguments/results ++ f8-f23 108-123 temporary ++ f24-f31 124-131 subroutine register variables ++ ++ Additional notes ++ ---------------- ++ ++ - t1 is used by the code generator, so not available for register ++ allocation. ++ ++ - t0-t6 may be used by PLT stubs, so should not be used to pass ++ arguments and may be clobbered by [Ialloc] in the presence of dynamic ++ linking. ++*) ++ ++let int_reg_name = ++ [|"$a0"; "$a1"; "$a2"; "$a3"; "$a4"; "$a5"; "$a6"; "$a7"; (* 0- 7 *) ++ "$s2"; "$s3"; "$s4"; "$s5"; "$s6"; (* 8-12*) ++ "$t2"; "$t3"; "$t4"; "$t5"; "$t6"; "$t7"; "$t8"; (*13-19*) ++ "$s0"; (*20*) ++ "$t0"; "$t1"; (*21-22*) ++ "$s1"; "$s7"; "$s8"; (*23-25*) ++ |] ++ ++let float_reg_name = ++ [| "$ft0"; "$ft1"; "$ft2"; "$ft3"; "$ft4"; "$ft5"; "$ft6";"$ft7"; ++ "$fs0"; "$fs1"; ++ "$fa0"; "$fa1"; "$fa2"; "$fa3"; "$fa4"; "$fa5"; "$fa6"; "$fa7"; ++ "$fs2"; "$fs3"; "$fs4"; "$fs5"; "$fs6"; "$fs7"; ++ "$ft8"; "$ft9"; "$ft10"; "$ft11";"$ft12";"$ft13";"$ft14";"$ft15"; |] ++let num_register_classes = 2 ++ ++let register_class r = ++ match r.typ with ++ | Val | Int | Addr -> 0 ++ | Float -> 1 ++ ++(* first 19 int regs allocatable; all float regs allocatable *) ++let num_available_registers = [| 21; 32 |] ++ ++let first_available_register = [| 0; 100 |] ++ ++let register_name r = ++ if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100) ++ ++let rotate_registers = true ++ ++(* Representation of hard registers by pseudo-registers *) ++ ++let hard_int_reg = ++ let v = Array.make 26 Reg.dummy in ++ for i = 0 to 25 do ++ v.(i) <- Reg.at_location Int (Reg i) ++ done; ++ v ++ ++let hard_float_reg = ++ let v = Array.make 32 Reg.dummy in ++ for i = 0 to 31 do ++ v.(i) <- Reg.at_location Float (Reg(100 + i)) ++ done; ++ v ++ ++let all_phys_regs = ++ Array.append hard_int_reg hard_float_reg ++ ++let phys_reg n = ++ if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100) ++ ++let stack_slot slot ty = ++ Reg.at_location ty (Stack slot) ++ ++(* Calling conventions *) ++ ++let size_domainstate_args = 64 * size_int ++ ++let calling_conventions ++ first_int last_int first_float last_float make_stack first_stack arg = ++ let loc = Array.make (Array.length arg) Reg.dummy in ++ let int = ref first_int in ++ let float = ref first_float in ++ let ofs = ref first_stack in ++ for i = 0 to Array.length arg - 1 do ++ match arg.(i) with ++ | Val | Int | Addr as ty -> ++ if !int <= last_int then begin ++ loc.(i) <- phys_reg !int; ++ incr int ++ end else begin ++ loc.(i) <- stack_slot (make_stack !ofs) ty; ++ ofs := !ofs + size_int ++ end ++ | Float -> ++ if !float <= last_float then begin ++ loc.(i) <- phys_reg !float; ++ incr float ++ end else begin ++ loc.(i) <- stack_slot (make_stack !ofs) Float; ++ ofs := !ofs + size_float ++ end ++ done; ++ (loc, Misc.align (max 0 !ofs) 16) (* Keep stack 16-aligned. *) ++ ++let incoming ofs = ++ if ofs >= 0 ++ then Incoming ofs ++ else Domainstate (ofs + size_domainstate_args) ++let outgoing ofs = ++ if ofs >= 0 ++ then Outgoing ofs ++ else Domainstate (ofs + size_domainstate_args) ++let not_supported _ = fatal_error "Proc.loc_results: cannot call" ++ ++let max_arguments_for_tailcalls = 13 (* in regs *) + 64 (* in domain state *) ++ ++(* OCaml calling convention: ++ first integer args in a0 .. a7, s2 .. s6 ++ first float args in fa0 .. fa7, fs2 .. fs9 ++ remaining args in domain state area, then on stack. ++ Return values in a0 .. a7, s2 .. s6 or fa0 .. fa7, fs2 .. fs9. *) ++ ++let loc_arguments arg = ++ calling_conventions 0 12 110 121 outgoing (- size_domainstate_args) arg ++ ++let loc_parameters arg = ++ let (loc, _ofs) = ++ calling_conventions 0 12 110 121 incoming (- size_domainstate_args) arg ++ in ++ loc ++ ++let loc_results res = ++ let (loc, _ofs) = ++ calling_conventions 0 12 110 121 not_supported 0 res ++ in ++ loc ++ ++(* C calling convention: ++ first integer args in a0 .. a7 ++ first float args in fa0 .. fa7 ++ remaining args on stack. ++ A FP argument can be passed in an integer register if all FP registers ++ are exhausted but integer registers remain. ++ Return values in a0 .. a1 or fa0 .. fa1. *) ++ ++let external_calling_conventions ++ first_int last_int first_float last_float make_stack arg = ++ let loc = Array.make (Array.length arg) [| Reg.dummy |] in ++ let int = ref first_int in ++ let float = ref first_float in ++ let ofs = ref 0 in ++ for i = 0 to Array.length arg - 1 do ++ match arg.(i) with ++ | Val | Int | Addr as ty -> ++ if !int <= last_int then begin ++ loc.(i) <- [| phys_reg !int |]; ++ incr int ++ end else begin ++ loc.(i) <- [| stack_slot (make_stack !ofs) ty |]; ++ ofs := !ofs + size_int ++ end ++ | Float -> ++ if !float <= last_float then begin ++ loc.(i) <- [| phys_reg !float |]; ++ incr float ++ end else begin ++ loc.(i) <- [| stack_slot (make_stack !ofs) Float |]; ++ ofs := !ofs + size_float ++ end ++ done; ++ (loc, Misc.align !ofs 16) (* Keep stack 16-aligned. *) ++ ++let loc_external_arguments ty_args = ++ let arg = Cmm.machtype_of_exttype_list ty_args in ++ external_calling_conventions 0 7 110 117 outgoing arg ++ ++let loc_external_results res = ++ let (loc, _ofs) = calling_conventions 0 1 110 111 not_supported 0 res ++ in loc ++ ++(* Exceptions are in a0 *) ++ ++let loc_exn_bucket = phys_reg 0 ++ ++(* Volatile registers: none *) ++ ++let regs_are_volatile _ = false ++ ++(* Registers destroyed by operations *) ++ ++let destroyed_at_c_noalloc_call = ++ (* s0-s8 and fs0-fs7 are callee-save, but s0 is ++ used to preserve OCaml sp. *) ++ Array.of_list(List.map phys_reg ++ [0; 1; 2; 3; 4; 5; 6; 7; 13; 14; 15; 16; 17; 18; 19; 20;(*s0*) ++ 100; 101; 102; 103; 104; 105; 106; 107; 110; 111; 112; 113; 114; 115; 116; ++ 117; 124; 125; 126; 127; 128; 129; 130; 131]) ++ ++let destroyed_at_alloc = ++ (* t0-t6 are used for PLT stubs *) ++ if !Clflags.dlcode then Array.map phys_reg [|13; 14; 15; 16; 17; 18; 19|] ++ else [| phys_reg 13 |] (* t2 is used to pass the argument to caml_allocN *) ++ ++let destroyed_at_oper = function ++ | Iop(Icall_ind | Icall_imm _) -> all_phys_regs ++ | Iop(Iextcall{alloc; stack_ofs; _}) -> ++ assert (stack_ofs >= 0); ++ if alloc || stack_ofs > 0 then all_phys_regs ++ else destroyed_at_c_noalloc_call ++ | Iop(Ialloc _) | Iop(Ipoll _) -> destroyed_at_alloc ++ | Iop(Istore(Single, _, _)) -> [| phys_reg 100 |] ++ | Iop(Ifloatofint | Iintoffloat) -> [| phys_reg 100 |] ++ | _ -> [| |] ++ ++let destroyed_at_raise = all_phys_regs ++ ++let destroyed_at_reloadretaddr = [| |] ++ ++(* Maximal register pressure *) ++ ++let safe_register_pressure = function ++ | Iextcall _ -> 5 (*9-3 s0~s8 - s7 - s8 - s1 - s0*) ++ | _ -> 21 ++ ++let max_register_pressure = function ++ | Iextcall _ -> [| 5; 8 |] (* 6 integer callee-saves, 8 FP callee-saves *) ++ | _ -> [| 21; 30 |] ++ ++(* Layout of the stack *) ++ ++let frame_required fd = ++ fd.fun_contains_calls ++ || fd.fun_num_stack_slots.(0) > 0 ++ || fd.fun_num_stack_slots.(1) > 0 ++ ++let prologue_required fd = ++ frame_required fd ++ ++ (* FIXME *) ++let int_dwarf_reg_numbers = ++ [| 4; 5; 6; 7; 8; 9; 10; 11; ++ 23; 24; 25; 26; 27; 28; 29; 30; ++ 14; 15; 16; 17; 18; ++ 31; ++ 12; 13; ++ 19; 20; ++ |] ++ ++let float_dwarf_reg_numbers = ++ [| 32; 33; 34; 35; 36; 37; 38; 39; ++ 40; 41; ++ 42; 43; 44; 45; 46; 47; 48; 49; ++ 50; 51; 52; 53; 54; 55; 56; 57; ++ 58; 59; ++ 60; 61; 62; 63; ++ |] ++ ++let dwarf_register_numbers ~reg_class = ++ match reg_class with ++ | 0 -> int_dwarf_reg_numbers ++ | 1 -> float_dwarf_reg_numbers ++ | _ -> Misc.fatal_errorf "Bad register class %d" reg_class ++ ++let stack_ptr_dwarf_register_number = 2 ++ ++(* Calling the assembler *) ++ ++let assemble_file infile outfile = ++ Ccomp.command ++ (Config.asm ^ " -o " ^ Filename.quote outfile ^ " " ^ Filename.quote infile) ++ ++let init () = () +diff --git a/asmcomp/loongarch64/selection.ml b/asmcomp/loongarch64/selection.ml +new file mode 100644 +index 000000000..be29364c1 +--- /dev/null ++++ b/asmcomp/loongarch64/selection.ml +@@ -0,0 +1,70 @@ ++# 2 "asmcomp/loongarch64/selection.ml" ++(**************************************************************************) ++(* *) ++(* OCaml *) ++(* *) ++(* yala *) ++(* *) ++(* Copyright © 2008-2023 LOONGSON *) ++(* *) ++(* All rights reserved. This file is distributed under the terms of *) ++(* the GNU Lesser General Public License version 2.1, with the *) ++(* special exception on linking described in the file LICENSE. *) ++(* *) ++(**************************************************************************) ++ ++(* Instruction selection for the LoongArch processor *) ++ ++open Cmm ++open Arch ++open Mach ++ ++(* Instruction selection *) ++ ++class selector = object ++ ++inherit Selectgen.selector_generic as super ++ ++(* LoongArch does not support immediate operands for comparison operators *) ++method is_immediate_test _cmp _n = false ++ ++method! is_immediate op n = ++ match op with ++ | Iadd | Iand | Ior | Ixor -> is_immediate n ++ (* sub immediate is turned into add immediate opposite *) ++ | Isub -> is_immediate (-n) ++ | _ -> super#is_immediate op n ++ ++method select_addressing _ = function ++ | Cop(Cadda, [arg; Cconst_int (n, _)], _) when is_immediate n -> ++ (Iindexed n, arg) ++ | Cop(Cadda, [arg1; Cop(Caddi, [arg2; Cconst_int (n, _)], _)], dbg) ++ when is_immediate n -> ++ (Iindexed n, Cop(Caddi, [arg1; arg2], dbg)) ++ | arg -> ++ (Iindexed 0, arg) ++ ++method! select_operation op args dbg = ++ match (op, args) with ++ (* Recognize (neg-)mult-add and (neg-)mult-sub instructions *) ++ | (Caddf, [Cop(Cmulf, [arg1; arg2], _); arg3]) ++ | (Caddf, [arg3; Cop(Cmulf, [arg1; arg2], _)]) -> ++ (Ispecific (Imultaddf false), [arg1; arg2; arg3]) ++ | (Csubf, [Cop(Cmulf, [arg1; arg2], _); arg3]) -> ++ (Ispecific (Imultsubf false), [arg1; arg2; arg3]) ++ | (Cnegf, [Cop(Csubf, [Cop(Cmulf, [arg1; arg2], _); arg3], _)]) -> ++ (Ispecific (Imultsubf true), [arg1; arg2; arg3]) ++ | (Cnegf, [Cop(Caddf, [Cop(Cmulf, [arg1; arg2], _); arg3], _)]) -> ++ (Ispecific (Imultaddf true), [arg1; arg2; arg3]) ++ | (Cstore (Word_int | Word_val as memory_chunk, Assignment), [arg1; arg2]) -> ++ (* Use trivial addressing mode for non-initializing stores *) ++ (Istore (memory_chunk, Iindexed 0, true), [arg2; arg1]) ++ | (Cextcall("sqrt", _, _, _), []) -> ++ (Ispecific Isqrtf, args) ++ | _ -> ++ super#select_operation op args dbg ++ ++end ++ ++let fundecl ~future_funcnames f = ++ (new selector)#emit_fundecl ~future_funcnames f +diff --git a/configure b/configure +index 19764d19a..6415b4cc1 100755 +Binary files a/configure and b/configure differ +diff --git a/configure.ac b/configure.ac +index a7974b042..069a931d7 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -1079,7 +1079,8 @@ AS_IF([test x"$supports_shared_libraries" = 'xtrue'], + [aarch64-*-freebsd*], [natdynlink=true], + [aarch64-*-openbsd*], [natdynlink=true], + [aarch64-*-netbsd*], [natdynlink=true], +- [riscv*-*-linux*], [natdynlink=true])]) ++ [riscv*-*-linux*], [natdynlink=true], ++ [loongarch*-*-linux*], [natdynlink=true])]) + + AS_CASE([$enable_native_toplevel,$natdynlink], + [yes,false], +@@ -1199,7 +1200,9 @@ AS_CASE([$host], + [x86_64-*-cygwin*], + [arch=amd64; system=cygwin], + [riscv64-*-linux*], +- [arch=riscv; model=riscv64; system=linux] ++ [arch=riscv; model=riscv64; system=linux], ++ [loongarch64-*-linux*], ++ [has_native_backend=yes; arch=loongarch64; system=linux] + ) + + AS_CASE([$ccomptype], +@@ -1302,7 +1305,7 @@ default_aspp="$CC -c" + AS_CASE([$as_target,$ocaml_cv_cc_vendor], + [*-*-linux*,gcc-*], + [AS_CASE([$as_cpu], +- [x86_64|arm*|aarch64*|i[[3-6]]86|riscv*], ++ [x86_64|arm*|aarch64*|i[[3-6]]86|riscv*|loongarch*], + [default_as="${toolpref}as"])], + [i686-pc-windows,*], + [default_as="ml -nologo -coff -Cp -c -Fo" +@@ -1940,7 +1943,7 @@ AS_IF([$native_compiler], + + AS_IF([test x"$enable_frame_pointers" = "xyes"], + [AS_CASE(["$host,$cc_basename"], +- [x86_64-*-linux*,gcc*|x86_64-*-linux*,clang*], ++ [x86_64-*-linux*,gcc*|x86_64-*-linux*,clang*|loongarch64-*-linux*,gcc*], + [common_cflags="$common_cflags -g -fno-omit-frame-pointer" + frame_pointers=true + AC_DEFINE([WITH_FRAME_POINTERS]) +diff --git a/runtime/caml/stack.h b/runtime/caml/stack.h +index 0c2e0b2fe..ebdc1d55a 100644 +--- a/runtime/caml/stack.h ++++ b/runtime/caml/stack.h +@@ -70,6 +70,17 @@ + #define Saved_return_address(sp) *((intnat *)((sp) - 8)) + #endif + ++#ifdef TARGET_loongarch64 ++/* Size of the gc_regs structure, in words. ++ See loongarch64.S and loongarch64/proc.ml for the indices */ ++#define Wosize_gc_regs (2 + 23 /* int regs */ + 24 /* float regs */) ++#define Saved_return_address(sp) *((intnat *)((sp) - 8)) ++/* LoongArch does not use a frame pointer, but requires the stack to be ++ 16-aligned, so when pushing the return address to the stack there ++ is an extra word of padding after it that needs to be skipped when ++ walking the stack. */ ++#define Pop_frame_pointer(sp) sp += sizeof(value) ++#endif + /* Declaration of variables used in the asm code */ + extern value * caml_globals[]; + extern intnat caml_globals_inited; +diff --git a/runtime/loongarch64.S b/runtime/loongarch64.S +new file mode 100644 +index 000000000..d2289f821 +--- /dev/null ++++ b/runtime/loongarch64.S +@@ -0,0 +1,827 @@ ++/**************************************************************************/ ++/* */ ++/* OCaml */ ++/* */ ++/* yala */ ++/* */ ++/* Copyright © 2008-2023 LOONGSON */ ++/* */ ++/* All rights reserved. This file is distributed under the terms of */ ++/* the GNU Lesser General Public License version 2.1, with the */ ++/* $special exception on linking described in the file LICENSE. */ ++/* */ ++/**************************************************************************/ ++ ++/* Asm part of the runtime system, LoongArch processor, 64-bit mode */ ++/* Must be preprocessed by cpp */ ++ ++#include "caml/m.h" ++ ++#define DOMAIN_STATE_PTR $s8 ++#define TRAP_PTR $s1 ++#define ALLOC_PTR $s7 ++#define ADDITIONAL_ARG $t2 ++#define STACK_ARG_BEGIN $s3 ++#define STACK_ARG_END $s4 ++#define TMP $t0 ++#define TMP2 $t1 ++ ++#define C_ARG_1 $a0 ++#define C_ARG_2 $a1 ++#define C_ARG_3 $a2 ++#define C_ARG_4 $a3 ++ ++/* Support for CFI directives */ ++//FIXME ++#define CFI_STARTPROC ++#define CFI_ENDPROC ++#define CFI_ADJUST(n) ++#define CFI_REGISTER(r1,r2) ++#define CFI_OFFSET(r,n) ++#define CFI_DEF_CFA_REGISTER(r) ++#define CFI_REMEMBER_STATE ++#define CFI_RESTORE_STATE ++ ++ .set domain_curr_field, 0 ++ .set domain_curr_cnt, 0 ++#define DOMAIN_STATE(c_type, name) \ ++ .equ domain_field_caml_##name, domain_curr_field ; \ ++ .set domain_curr_cnt, domain_curr_cnt + 1; \ ++ .set domain_curr_field, domain_curr_cnt*8 ++#include "../runtime/caml/domain_state.tbl" ++#undef DOMAIN_STATE ++ ++#define Caml_state(var) DOMAIN_STATE_PTR, domain_field_caml_##var ++ ++/* Globals and labels */ ++#define L(lbl) .L##lbl ++ ++#define FUNCTION(name) \ ++ .align 2; \ ++ .globl name; \ ++ .type name, @function; \ ++name:; \ ++ CFI_STARTPROC ++ ++#define END_FUNCTION(name) \ ++ CFI_ENDPROC; \ ++ .size name, .-name ++ ++#if defined(__PIC__) ++#define PLT(r) %plt(r) ++#else ++#define PLT(r) r ++#endif ++ ++#define OBJECT(name) \ ++ .data; \ ++ .align 3; \ ++ .globl name; \ ++ .type name, @object; \ ++name: ++#define END_OBJECT(name) \ ++ .size name, .-name ++ ++/* Stack switching operations */ ++ ++/* struct stack_info */ ++#define Stack_sp(reg) reg, 0 ++#define Stack_exception(reg) reg, 8 ++#define Stack_handler(reg) reg, 16 ++#define Stack_handler_from_cont(reg) reg, 15 ++ ++/* struct c_stack_link */ ++#define Cstack_stack(reg) reg, 0 ++#define Cstack_sp(reg) reg, 8 ++#define Cstack_prev(reg) reg, 16 ++ ++/* struct stack_handler */ ++#define Handler_value(reg) reg, 0 ++#define Handler_exception(reg) reg, 8 ++#define Handler_effect(reg) reg, 16 ++#define Handler_parent(reg) reg, 24 ++ ++/* Switch from OCaml to C stack. */ ++.macro SWITCH_OCAML_TO_C ++ /* Fill in Caml_state->current_stack->$sp */ ++ ld.d TMP, Caml_state(current_stack) ++ st.d $sp, Stack_sp(TMP) ++ /* Fill in Caml_state->c_stack */ ++ ld.d TMP2, Caml_state(c_stack) ++ st.d TMP, Cstack_stack(TMP2) ++ st.d $sp, Cstack_sp(TMP2) ++ /* Switch to C stack */ ++ move $sp, TMP2 ++ CFI_REMEMBER_STATE ++.endm ++ ++/* Switch from C to OCaml stack. */ ++.macro SWITCH_C_TO_OCAML ++ ld.d $sp, Cstack_sp($sp) ++ CFI_RESTORE_STATE ++.endm ++ ++/* Save all of the registers that may be in use to a free gc_regs bucket ++ and store ALLOC_PTR and TRAP_PTR back to Caml_state ++ At the end the saved registers are placed in Caml_state(gc_regs) ++ */ ++.macro SAVE_ALL_REGS ++ /* First, save the young_ptr & exn_handler */ ++ st.d ALLOC_PTR, Caml_state(young_ptr) ++ st.d TRAP_PTR, Caml_state(exn_handler) ++ /* Now, use TMP to point to the gc_regs bucket */ ++ ld.d TMP, Caml_state(gc_regs_buckets) ++ ld.d TMP2, TMP, 0 /* next ptr */ ++ st.d TMP2, Caml_state(gc_regs_buckets) ++ /* Save allocatable integer registers Must be in ++ the same order as proc.ml int_reg_name*/ ++ st.d $a0, TMP, 2*8 ++ st.d $a1, TMP, 3*8 ++ st.d $a2, TMP, 4*8 ++ st.d $a3, TMP, 5*8 ++ st.d $a4, TMP, 6*8 ++ st.d $a5, TMP, 7*8 ++ st.d $a6, TMP, 8*8 ++ st.d $a7, TMP, 9*8 ++ st.d $s2, TMP, 10*8 ++ st.d $s3, TMP, 11*8 ++ st.d $s4, TMP, 12*8 ++ st.d $s5, TMP, 13*8 ++ st.d $s6, TMP, 14*8 ++ st.d $t2, TMP, 15*8 ++ st.d $t3, TMP, 16*8 ++ st.d $t4, TMP, 17*8 ++ st.d $t5, TMP, 18*8 ++ st.d $t6, TMP, 19*8 ++ st.d $t7, TMP, 20*8 ++ st.d $t8, TMP, 21*8 ++ st.d $s0, TMP, 22*8 ++ /* Save caller-save floating-point registers ++ (callee-saves are preserved by C functions) */ ++ fst.d $ft0, TMP, 23*8 ++ fst.d $ft1, TMP, 24*8 ++ fst.d $ft2, TMP, 25*8 ++ fst.d $ft3, TMP, 26*8 ++ fst.d $ft4, TMP, 27*8 ++ fst.d $ft5, TMP, 28*8 ++ fst.d $ft6, TMP, 29*8 ++ fst.d $ft7, TMP, 30*8 ++ fst.d $fa0, TMP, 31*8 ++ fst.d $fa1, TMP, 32*8 ++ fst.d $fa2, TMP, 33*8 ++ fst.d $fa3, TMP, 34*8 ++ fst.d $fa4, TMP, 35*8 ++ fst.d $fa5, TMP, 36*8 ++ fst.d $fa6, TMP, 37*8 ++ fst.d $fa7, TMP, 38*8 ++ fst.d $ft8, TMP, 39*8 ++ fst.d $ft9, TMP, 40*8 ++ fst.d $ft10, TMP, 41*8 ++ fst.d $ft11, TMP, 42*8 ++ fst.d $ft12, TMP, 43*8 ++ fst.d $ft13, TMP, 44*8 ++ fst.d $ft14, TMP, 45*8 ++ fst.d $ft15, TMP, 46*8 ++ addi.d TMP, TMP, 16 ++ st.d TMP, Caml_state(gc_regs) ++.endm ++ ++/* Undo SAVE_ALL_REGS by loading the registers saved in Caml_state(gc_regs) ++ and refreshing ALLOC_PTR & TRAP_PTR from Caml_state */ ++.macro RESTORE_ALL_REGS ++ /* Restore $a0, $a1, freeing up the next ptr slot */ ++ ld.d TMP, Caml_state(gc_regs) ++ addi.d TMP, TMP, -16 ++ /* Restore registers */ ++ ld.d $a0, TMP, 2*8 ++ ld.d $a1, TMP, 3*8 ++ ld.d $a2, TMP, 4*8 ++ ld.d $a3, TMP, 5*8 ++ ld.d $a4, TMP, 6*8 ++ ld.d $a5, TMP, 7*8 ++ ld.d $a6, TMP, 8*8 ++ ld.d $a7, TMP, 9*8 ++ ld.d $s2, TMP, 10*8 ++ ld.d $s3, TMP, 11*8 ++ ld.d $s4, TMP, 12*8 ++ ld.d $s5, TMP, 13*8 ++ ld.d $s6, TMP, 14*8 ++ ld.d $t2, TMP, 15*8 ++ ld.d $t3, TMP, 16*8 ++ ld.d $t4, TMP, 17*8 ++ ld.d $t5, TMP, 18*8 ++ ld.d $t6, TMP, 19*8 ++ ld.d $t7, TMP, 20*8 ++ ld.d $t8, TMP, 21*8 ++ ld.d $s0, TMP, 22*8 ++ fld.d $ft0, TMP, 23*8 ++ fld.d $ft1, TMP, 24*8 ++ fld.d $ft2, TMP, 25*8 ++ fld.d $ft3, TMP, 26*8 ++ fld.d $ft4, TMP, 27*8 ++ fld.d $ft5, TMP, 28*8 ++ fld.d $ft6, TMP, 29*8 ++ fld.d $ft7, TMP, 30*8 ++ fld.d $fa0, TMP, 31*8 ++ fld.d $fa1, TMP, 32*8 ++ fld.d $fa2, TMP, 33*8 ++ fld.d $fa3, TMP, 34*8 ++ fld.d $fa4, TMP, 35*8 ++ fld.d $fa5, TMP, 36*8 ++ fld.d $fa6, TMP, 37*8 ++ fld.d $fa7, TMP, 38*8 ++ fld.d $ft8, TMP, 39*8 ++ fld.d $ft9, TMP, 40*8 ++ fld.d $ft10, TMP, 41*8 ++ fld.d $ft11, TMP, 42*8 ++ fld.d $ft12, TMP, 43*8 ++ fld.d $ft13, TMP, 44*8 ++ fld.d $ft14, TMP, 45*8 ++ fld.d $ft15, TMP, 46*8 ++ /* Put gc_regs struct back in bucket linked list */ ++ ld.d TMP2, Caml_state(gc_regs_buckets) ++ st.d TMP2, TMP, 0 /* next ptr */ ++ st.d TMP, Caml_state(gc_regs_buckets) ++ /* Reload new allocation pointer & exn handler */ ++ ld.d ALLOC_PTR, Caml_state(young_ptr) ++ ld.d TRAP_PTR, Caml_state(exn_handler) ++.endm ++ ++ .section .text ++/* Invoke the garbage collector. */ ++ ++ .globl caml_system__code_begin ++caml_system__code_begin: ++ ++FUNCTION(caml_call_realloc_stack) ++ /* Save return address */ ++ CFI_OFFSET($ra, -8) ++ addi.d $sp, $sp, -16 ++ st.d $ra, $sp, 8 ++ //CFI_ADJUST(16) ++ /* Save all registers (including ALLOC_PTR & TRAP_PTR) */ ++ SAVE_ALL_REGS ++ ld.d C_ARG_1, $sp, 16 /* argument */ ++ SWITCH_OCAML_TO_C ++ bl PLT(caml_try_realloc_stack) ++ SWITCH_C_TO_OCAML ++ beqz $a0, 1f ++ RESTORE_ALL_REGS ++ /* Free stack $space and return to caller */ ++ ld.d $ra, $sp, 8 ++ addi.d $sp, $sp, 16 ++ jr $ra ++1: RESTORE_ALL_REGS ++ /* Raise the Stack_overflow exception */ ++ ld.d $ra, $sp, 8 ++ addi.d $sp, $sp, 16 ++ addi.d $sp, $sp, 16 /* pop argument */ ++ la.global $a0, caml_exn_Stack_overflow ++ b caml_raise_exn ++END_FUNCTION(caml_call_realloc_stack) ++ ++FUNCTION(caml_call_gc) ++L(caml_call_gc): ++ /* Save return address */ ++ CFI_OFFSET($ra, -8) ++ addi.d $sp, $sp, -16 ++ st.d $ra, $sp, 8 ++ CFI_ADJUST(16) ++ /* Store all registers (including ALLOC_PTR & TRAP_PTR) */ ++ SAVE_ALL_REGS ++ SWITCH_OCAML_TO_C ++ /* Call the garbage collector */ ++ bl PLT(caml_garbage_collection) ++ SWITCH_C_TO_OCAML ++ RESTORE_ALL_REGS ++ /* Free stack $space and return to caller */ ++ ld.d $ra, $sp, 8 ++ addi.d $sp, $sp, 16 ++ jr $ra ++END_FUNCTION(caml_call_gc) ++ ++FUNCTION(caml_alloc1) ++ ld.d TMP, Caml_state(young_limit) ++ addi.d ALLOC_PTR, ALLOC_PTR, -16 ++ bltu ALLOC_PTR, TMP, L(caml_call_gc) ++ jr $ra ++END_FUNCTION(caml_alloc1) ++ ++FUNCTION(caml_alloc2) ++ ld.d TMP, Caml_state(young_limit) ++ addi.d ALLOC_PTR, ALLOC_PTR, -24 ++ bltu ALLOC_PTR, TMP, L(caml_call_gc) ++ jr $ra ++END_FUNCTION(caml_alloc2) ++ ++FUNCTION(caml_alloc3) ++ ld.d TMP, Caml_state(young_limit) ++ addi.d ALLOC_PTR, ALLOC_PTR, -32 ++ bltu ALLOC_PTR, TMP, L(caml_call_gc) ++ jr $ra ++END_FUNCTION(caml_alloc3) ++ ++FUNCTION(caml_allocN) ++ ld.d TMP, Caml_state(young_limit) ++ sub.d ALLOC_PTR, ALLOC_PTR, ADDITIONAL_ARG ++ bltu ALLOC_PTR, TMP, L(caml_call_gc) ++ jr $ra ++END_FUNCTION(caml_allocN) ++ ++/* Call a C function from OCaml */ ++/* Function to call is in ADDITIONAL_ARG */ ++ ++FUNCTION(caml_c_call) ++ CFI_OFFSET($ra, -8) ++ addi.d $sp, $sp, -16 ++ st.d $ra, $sp, 8 ++ CFI_ADJUST(16) ++ /* Switch form OCaml to C */ ++ SWITCH_OCAML_TO_C ++ /* Make the exception handler alloc ptr available to the C code */ ++ st.d ALLOC_PTR, Caml_state(young_ptr) ++ st.d TRAP_PTR, Caml_state(exn_handler) ++ /* Call the function */ ++ jirl $ra, ADDITIONAL_ARG, 0 ++ /* Reload alloc ptr */ ++ ld.d ALLOC_PTR, Caml_state(young_ptr) ++ /* Load ocaml stack */ ++ SWITCH_C_TO_OCAML ++ /* Return */ ++ ld.d $ra, $sp, 8 ++ addi.d $sp, $sp, 16 ++ jr $ra ++END_FUNCTION(caml_c_call) ++ ++FUNCTION(caml_c_call_stack_args) ++ /* Arguments: ++ C arguments : $a0 to a7, fa0 to fa7 ++ C function : ADDITIONAL_ARG ++ C stack args : begin=STACK_ARG_BEGIN ++ end=STACK_ARG_END */ ++ CFI_OFFSET($ra, -8) ++ addi.d $sp, $sp, -16 ++ st.d $ra, $sp, 8 ++ CFI_ADJUST(16) ++ /* Switch from OCaml to C */ ++ SWITCH_OCAML_TO_C ++ /* Make the exception handler alloc ptr available to the C code */ ++ st.d ALLOC_PTR, Caml_state(young_ptr) ++ st.d TRAP_PTR, Caml_state(exn_handler) ++ /* Store $sp to restore after call */ ++ move $s2, $sp ++ /* Copy arguments from OCaml to C stack ++ NB: STACK_ARG_{BEGIN,END} are 16-byte aligned */ ++1: addi.d STACK_ARG_END, STACK_ARG_END, -16 ++ bltu STACK_ARG_END, STACK_ARG_BEGIN, 2f ++ ld.d TMP, STACK_ARG_END, 0 ++ ld.d TMP2, STACK_ARG_END, 8 ++ addi.d $sp, $sp, -16 ++ st.d TMP, $sp, 0 ++ st.d TMP2, $sp, 8 ++ b 1b ++2: /* Call the function */ ++ jirl $ra, ADDITIONAL_ARG, 0 ++ /* Restore stack */ ++ move $sp, $s2 ++ /* Reload alloc ptr */ ++ ld.d ALLOC_PTR, Caml_state(young_ptr) ++ /* Switch from C to OCaml */ ++ SWITCH_C_TO_OCAML ++ /* Return */ ++ ld.d $ra, $sp, 8 ++ addi.d $sp, $sp, 16 ++ jr $ra ++END_FUNCTION(caml_c_call_stack_args) ++ ++/* Start the OCaml program */ ++ ++FUNCTION(caml_start_program) ++ /* domain state is passed as arg from C */ ++ move TMP, C_ARG_1 ++ la.global TMP2, caml_program ++ ++/* Code shared with caml_callback* */ ++/* Address of domain state is in TMP */ ++/* Address of OCaml code to call is in TMP2 */ ++/* Arguments to the OCaml code are in $a0...a7 */ ++ ++L(jump_to_caml): ++ /* Set up stack frame and save callee-save registers */ ++ CFI_OFFSET($ra, -200) ++ addi.d $sp, $sp, -208 ++ st.d $ra, $sp, 8 ++ CFI_ADJUST(208) ++ st.d $s0, $sp, 2*8 ++ st.d $s1, $sp, 3*8 ++ st.d $s2, $sp, 4*8 ++ st.d $s3, $sp, 5*8 ++ st.d $s4, $sp, 6*8 ++ st.d $s5, $sp, 7*8 ++ st.d $s6, $sp, 8*8 ++ st.d $s7, $sp, 9*8 ++ st.d $s8, $sp, 10*8 ++ st.d $fp, $sp, 11*8 ++ fst.d $fs0, $sp, 14*8 ++ fst.d $fs1, $sp, 15*8 ++ fst.d $fs2, $sp, 16*8 ++ fst.d $fs3, $sp, 17*8 ++ fst.d $fs4, $sp, 18*8 ++ fst.d $fs5, $sp, 19*8 ++ fst.d $fs6, $sp, 20*8 ++ fst.d $fs7, $sp, 21*8 ++ /* Load domain state pointer from argument */ ++ move DOMAIN_STATE_PTR, TMP ++ /* Reload allocation pointer */ ++ ld.d ALLOC_PTR, Caml_state(young_ptr) ++ /* Build (16-byte aligned) struct c_stack_link on the C stack */ ++ ld.d $t2, Caml_state(c_stack) ++ addi.d $sp, $sp, -32 ++ st.d $t2, Cstack_prev($sp) ++ st.d $zero, Cstack_stack($sp) ++ st.d $zero, Cstack_sp($sp) ++ CFI_ADJUST(32) ++ st.d $sp, Caml_state(c_stack) ++ /* Load the OCaml stack */ ++ ld.d $t2, Caml_state(current_stack) ++ ld.d $t2, Stack_sp($t2) ++ /* Store the gc_regs for callbacks during a GC */ ++ ld.d $t3, Caml_state(gc_regs) ++ addi.d $t2, $t2, -8 ++ st.d $t3, $t2, 0 ++ /* Store the stack pointer to allow DWARF unwind */ ++ addi.d $t2, $t2, -8 ++ st.d $sp, $t2, 0 /* C_stack_sp */ ++ /* Setup a trap frame to catch exceptions escaping the OCaml code */ ++ ld.d $t3, Caml_state(exn_handler) ++ la.local $t4, L(trap_handler) ++ addi.d $t2, $t2, -16 ++ st.d $t3, $t2, 0 ++ st.d $t4, $t2, 8 ++ move TRAP_PTR, $t2 ++ /* Switch stacks and call the OCaml code */ ++ move $sp, $t2 ++ CFI_REMEMBER_STATE ++ /* Call the OCaml code */ ++ jirl $ra, TMP2, 0 ++L(caml_retaddr): ++ /* Pop the trap frame, restoring Caml_state->exn_handler */ ++ ld.d $t2, $sp, 0 ++ addi.d $sp, $sp, 16 ++ CFI_ADJUST(-16) ++ st.d $t2, Caml_state(exn_handler) ++L(return_result): ++ /* Restore GC regs */ ++ ld.d $t2, $sp, 0 ++ ld.d $t3, $sp, 8 ++ addi.d $sp, $sp, 16 ++ CFI_ADJUST(-16) ++ st.d $t3, Caml_state(gc_regs) ++ /* Update allocation pointer */ ++ st.d ALLOC_PTR, Caml_state(young_ptr) ++ /* Return to C stack */ ++ ld.d $t2, Caml_state(current_stack) ++ st.d $sp, Stack_sp($t2) ++ ld.d $t3, Caml_state(c_stack) ++ move $sp, $t3 ++ CFI_RESTORE_STATE ++ /* Pop the struct c_stack_link */ ++ ld.d $t2, Cstack_prev($sp) ++ addi.d $sp, $sp, 32 ++ CFI_ADJUST(-32) ++ st.d $t2, Caml_state(c_stack) ++ /* Reload callee-save register and return address */ ++ ld.d $s0, $sp, 2*8 ++ ld.d $s1, $sp, 3*8 ++ ld.d $s2, $sp, 4*8 ++ ld.d $s3, $sp, 5*8 ++ ld.d $s4, $sp, 6*8 ++ ld.d $s5, $sp, 7*8 ++ ld.d $s6, $sp, 8*8 ++ ld.d $s7, $sp, 9*8 ++ ld.d $s8, $sp, 10*8 ++ ld.d $fp, $sp, 11*8 ++ fld.d $fs0, $sp, 14*8 ++ fld.d $fs1, $sp, 15*8 ++ fld.d $fs2, $sp, 16*8 ++ fld.d $fs3, $sp, 17*8 ++ fld.d $fs4, $sp, 18*8 ++ fld.d $fs5, $sp, 19*8 ++ fld.d $fs6, $sp, 20*8 ++ fld.d $fs7, $sp, 21*8 ++ ld.d $ra, $sp, 8 ++ addi.d $sp, $sp, 208 ++ CFI_ADJUST(-208) ++ /* Return to C caller */ ++ jr $ra ++END_FUNCTION(caml_start_program) ++ ++/* The trap handler */ ++ ++ .align 2 ++L(trap_handler): ++ CFI_STARTPROC ++ /* Save exception pointer */ ++ st.d TRAP_PTR, Caml_state(exn_handler) ++ /* Encode exception pointer */ ++ ori $a0, $a0, 2 ++ /* Return it */ ++ b L(return_result) ++ CFI_ENDPROC ++ ++/* Exceptions */ ++ ++.macro JUMP_TO_TRAP_PTR ++ /* Cut stack at current trap handler */ ++ move $sp, TRAP_PTR ++ /* Pop previous handler and jump to it */ ++ ld.d TMP, $sp, 8 ++ ld.d TRAP_PTR, $sp, 0 ++ addi.d $sp, $sp, 16 ++ jr TMP ++.endm ++ ++/* Raise an exception from OCaml */ ++FUNCTION(caml_raise_exn) ++ /* Test if backtrace is active */ ++ ld.d TMP, Caml_state(backtrace_active) ++ bnez TMP, 2f ++1: ++ JUMP_TO_TRAP_PTR ++2: /* Zero backtrace_pos */ ++ st.d $zero, Caml_state(backtrace_pos) ++L(caml_reraise_exn_stash): ++ /* Preserve exception bucket in callee-save register $s2 */ ++ move $s2, $a0 ++ /* Stash the backtrace */ ++ /* arg1: exn bucket, already in $a0 */ ++ move $a1, $ra /* arg2: pc of $raise */ ++ move $a2, $sp /* arg3: $sp of $raise */ ++ move $a3, TRAP_PTR /* arg4: $sp of handler */ ++ /* Switch to C stack */ ++ ld.d TMP, Caml_state(c_stack) ++ move $sp, TMP ++ bl PLT(caml_stash_backtrace) ++ /* Restore exception bucket and $raise */ ++ move $a0, $s2 ++ b 1b ++END_FUNCTION(caml_raise_exn) ++ ++FUNCTION(caml_reraise_exn) ++ ld.d TMP, Caml_state(backtrace_active) ++ bnez TMP, L(caml_reraise_exn_stash) ++ JUMP_TO_TRAP_PTR ++END_FUNCTION(caml_reraise_exn) ++ ++/* Raise an exception from C */ ++ ++FUNCTION(caml_raise_exception) ++ /* Load the domain state ptr */ ++ move DOMAIN_STATE_PTR, C_ARG_1 ++ /* Load the exception bucket */ ++ move $a0, C_ARG_2 ++ /* Reload trap ptr and alloc ptr */ ++ ld.d TRAP_PTR, Caml_state(exn_handler) ++ ld.d ALLOC_PTR, Caml_state(young_ptr) ++ /* Discard the C stack pointer and reset to ocaml stack */ ++ ld.d TMP, Caml_state(current_stack) ++ ld.d TMP, Stack_sp(TMP) ++ move $sp, TMP ++ /* Restore frame and link on return to OCaml */ ++ ld.d $ra, $sp, 8 ++ addi.d $sp, $sp, 16 ++ b caml_raise_exn ++END_FUNCTION(caml_raise_exception) ++ ++/* Callback from C to OCaml */ ++ ++FUNCTION(caml_callback_asm) ++ /* Initial shuffling of arguments */ ++ /* ($a0 = Caml_state, $a1 = closure, 0(a2) = first arg) */ ++ move TMP, $a0 ++ ld.d $a0, $a2, 0 /* $a0 = first arg */ ++ /* $a1 = closure environment */ ++ ld.d TMP2, $a1, 0 /* code pointer */ ++ b L(jump_to_caml) ++END_FUNCTION(caml_callback_asm) ++ ++FUNCTION(caml_callback2_asm) ++ /* Initial shuffling of arguments */ ++ /* ($a0 = Caml_state, $a1 = closure, 0(a2) = arg1, 8(a2) = arg2) */ ++ move TMP, $a0 ++ move TMP2, $a1 ++ ld.d $a0, $a2, 0 /* $a0 = first arg */ ++ ld.d $a1, $a2, 8 /* $a1 = second arg */ ++ move $a2, TMP2 /* a2 = closure environment */ ++ la.global TMP2, caml_apply2 ++ b L(jump_to_caml) ++END_FUNCTION(caml_callback2_asm) ++ ++FUNCTION(caml_callback3_asm) ++ /* Initial shuffling of arguments */ ++ /* ($a0 = Caml_state, $a1 = closure, 0(a2) = arg1, 8(a2) = arg2, ++ 16(a2) = arg3) */ ++ move TMP, $a0 ++ move $a3, $a1 /* a3 = closure environment */ ++ ld.d $a0, $a2, 0 /* $a0 = first arg */ ++ ld.d $a1, $a2, 8 /* $a1 = second arg */ ++ ld.d $a2, $a2, 16 /* a2 = third arg */ ++ la.global TMP2, caml_apply3 ++ b L(jump_to_caml) ++END_FUNCTION(caml_callback3_asm) ++ ++/* Fibers */ ++ ++/* Switch between OCaml stacks. Clobbers TMP and switches TRAP_PTR ++ Preserves old_stack and new_stack registers */ ++.macro SWITCH_OCAML_STACKS old_stack, new_stack ++ /* Save frame pointer and return address for old_stack */ ++ addi.d $sp, $sp, -16 ++ st.d $ra, $sp, 8 ++ CFI_ADJUST(16) ++ /* Save OCaml SP and exn_handler in the stack info */ ++ st.d $sp, Stack_sp(\old_stack) ++ st.d TRAP_PTR, Stack_exception(\old_stack) ++ /* switch stacks */ ++ st.d \new_stack, Caml_state(current_stack) ++ ld.d TMP, Stack_sp(\new_stack) ++ move $sp, TMP ++ /* restore exn_handler for new stack */ ++ ld.d TRAP_PTR, Stack_exception(\new_stack) ++ /* Restore frame pointer and return address for new_stack */ ++ ld.d $ra, $sp, 8 ++ addi.d $sp, $sp, 16 ++.endm ++ ++/* ++ * A continuation is a one word object that points to a fiber. A fiber [f] will ++ * point to its parent at Handler_parent(Stack_handler(f)). In the following, ++ * the [last_fiber] refers to the last fiber in the linked-list formed by the ++ * parent pointer. ++ */ ++ ++FUNCTION(caml_perform) ++ /* $a0: effect to perform ++ $a1: freshly allocated continuation */ ++ ld.d $a2, Caml_state(current_stack) /* a2 := old stack */ ++ addi.d $a3, $a2, 1 /* a3 := Val_ptr(old stack) */ ++ st.d $a3, $a1, 0 /* Iniitalize continuation */ ++L(do_perform): ++ /* $a0: effect to perform ++ $a1: continuation ++ a2: old_stack ++ a3: last_fiber */ ++ ++ ld.d $t3, Stack_handler($a2) /* $t3 := old stack -> handler */ ++ ld.d $t4, Handler_parent($t3) /* t4 := parent stack */ ++ beqz $t4, 1f ++ SWITCH_OCAML_STACKS $a2, $t4 ++ /* we have to null the Handler_parent after the switch because ++ the Handler_parent is needed to unwind the stack for backtraces */ ++ st.d $zero, Handler_parent($t3) /* Set parent of performer to NULL */ ++ ld.d TMP, Handler_effect($t3) ++ move $a2, $a3 /* a2 := last_fiber */ ++ move $a3, TMP /* a3 := effect handler */ ++ b PLT(caml_apply3) ++1: ++ /* switch back to original performer before $raising Effect.Unhandled ++ (no-op unless this is a reperform) */ ++ ld.d $t4, $a1, 0 /* load performer stack from continuation */ ++ addi.d $t4, $t4, -1 /* t4 := Ptr_val(t4) */ ++ ld.d $t3, Caml_state(current_stack) ++ SWITCH_OCAML_STACKS $t3, $t4 ++ /* No parent stack. Raise Effect.Unhandled. */ ++ la.global ADDITIONAL_ARG, caml_raise_unhandled_effect ++ b caml_c_call ++END_FUNCTION(caml_perform) ++ ++FUNCTION(caml_reperform) ++ /* $a0: effect to perform ++ $a1: continuation ++ a2: last_fiber */ ++ ld.d TMP, Stack_handler_from_cont($a2) ++ ld.d $a2, Caml_state(current_stack) /* a2 := old stack */ ++ st.d $a2, Handler_parent(TMP) /* Append to last_fiber */ ++ addi.d $a3, $a2, 1 /* a3 (last_fiber) := Val_ptr(old stack) */ ++ b L(do_perform) ++END_FUNCTION(caml_reperform) ++ ++FUNCTION(caml_resume) ++ /* $a0: new fiber ++ $a1: fun ++ a2: arg */ ++ addi.d $a0, $a0, -1 /* $a0 = Ptr_val($a0) */ ++ ld.d $a3, $a1, 0 /* code pointer */ ++ /* Check if stack null, then already used */ ++ beqz $a0, 2f ++ /* Find end of list of stacks (put in $t2) */ ++ move TMP, $a0 ++1: ld.d $t2, Stack_handler(TMP) ++ ld.d TMP, Handler_parent($t2) ++ bnez TMP, 1b ++ /* Add current stack to the end */ ++ ld.d $t3, Caml_state(current_stack) ++ st.d $t3, Handler_parent($t2) ++ SWITCH_OCAML_STACKS $t3, $a0 ++ move $a0, $a2 ++ jr $a3 ++2: la.global ADDITIONAL_ARG, caml_raise_continuation_already_resumed ++ b caml_c_call ++END_FUNCTION(caml_resume) ++ ++/* Run a function on a new stack, then either ++ return the value or invoke exception handler */ ++FUNCTION(caml_runstack) ++ /* $a0: fiber ++ $a1: fun ++ a2: arg */ ++ CFI_OFFSET($ra, -8) ++ addi.d $sp, $sp, -16 ++ st.d $ra, $sp, 8 ++ CFI_ADJUST(16) ++ addi.d $a0, $a0, -1 /* $a0 := Ptr_val($a0) */ ++ ld.d $a3, $a1, 0 /* code pointer */ ++ /* save old stack pointer and exception handler */ ++ ld.d $t2, Caml_state(current_stack) /* $t2 := old stack */ ++ st.d $sp, Stack_sp($t2) ++ st.d TRAP_PTR, Stack_exception($t2) ++ /* Load new stack pointer and set parent */ ++ ld.d TMP, Stack_handler($a0) ++ st.d $t2, Handler_parent(TMP) ++ st.d $a0, Caml_state(current_stack) ++ ld.d $t3, Stack_sp($a0) /* $t3 := $sp of new stack */ ++ /* Create an exception handler on the target stack ++ after 16byte DWARF & gc_regs block (which is unused here) */ ++ addi.d $t3, $t3, -32 ++ la.local TMP, L(fiber_exn_handler) ++ st.d TMP, $t3, 8 ++ /* link the previous exn_handler so that copying stacks works */ ++ ld.d TMP, Stack_exception($a0) ++ st.d TMP, $t3, 0 ++ move TRAP_PTR, $t3 ++ /* Switch to the new stack */ ++ move $sp, $t3 ++ CFI_REMEMBER_STATE ++ /* Call the function on the new stack */ ++ move $a0, $a2 ++ jirl $ra, $a3, 0 ++L(frame_runstack): ++ addi.d $t2, $sp, 32 /* $t2 := stack_handler */ ++ ld.d $s2, Handler_value($t2) /* saved across C call */ ++1: ++ move $s3, $a0 /* save return across C call */ ++ ld.d $a0, Caml_state(current_stack) /* arg to caml_free_stack */ ++ /* restore parent stack and exn_handler into Caml_state */ ++ ld.d TMP, Handler_parent($t2) ++ st.d TMP, Caml_state(current_stack) ++ ld.d TRAP_PTR, Stack_exception(TMP) ++ st.d TRAP_PTR, Caml_state(exn_handler) ++ /* free old stack by switching directly to c_stack; ++ is a no-alloc call */ ++ ld.d $s4, Stack_sp(TMP) /* saved across C call */ ++ CFI_RESTORE_STATE ++ CFI_REMEMBER_STATE ++ ld.d TMP, Caml_state(c_stack) ++ move $sp, TMP ++ bl PLT(caml_free_stack) ++ /* switch directly to parent stack with correct return */ ++ move $a0, $s3 ++ move $a1, $s2 ++ move $sp, $s4 ++ CFI_RESTORE_STATE ++ ld.d TMP, $s2, 0 /* code pointer */ ++ /* Invoke handle_value (or handle_exn) */ ++ ld.d $ra, $sp, 8 ++ addi.d $sp, $sp, 16 ++ CFI_ADJUST(-16) ++ jr TMP ++L(fiber_exn_handler): ++ addi.d $t2, $sp, 16 /* $t2 := stack_handler */ ++ ld.d $s2, Handler_exception($t2) ++ b 1b ++END_FUNCTION(caml_runstack) ++ ++FUNCTION(caml_ml_array_bound_error) ++ /* Load address of [caml_array_bound_error_asm] in ADDITIONAL_ARG */ ++ la.global ADDITIONAL_ARG, caml_array_bound_error_asm ++ /* Call that function */ ++ b caml_c_call ++END_FUNCTION(caml_ml_array_bound_error) ++ ++ .globl caml_system__code_end ++caml_system__code_end: ++ ++/* GC roots for callback */ ++ ++ ++ .section .data ++ .align 3 ++ .globl caml_system__frametable ++ .type caml_system__frametable, @object ++caml_system__frametable: ++ .quad 1 /* one descriptor */ ++ .quad .Lcaml_retaddr /* return address into callback */ ++ .short -1 /* negative frame size => use callback link */ ++ .short 0 /* no roots */ ++ .align 3 ++ .size caml_system__frametable, .-caml_system__frametable +diff --git a/testsuite/tools/asmgen_loongarch64.S b/testsuite/tools/asmgen_loongarch64.S +new file mode 100644 +index 000000000..97fbeae04 +--- /dev/null ++++ b/testsuite/tools/asmgen_loongarch64.S +@@ -0,0 +1,75 @@ ++/**************************************************************************/ ++/* */ ++/* OCaml */ ++/* */ ++/* Nicolas Ojeda Bar */ ++/* */ ++/* Copyright 2019 Institut National de Recherche en Informatique et */ ++/* en Automatique. */ ++/* */ ++/* All rights reserved. This file is distributed under the terms of */ ++/* the GNU Lesser General Public License version 2.1, with the */ ++/* special exception on linking described in the file LICENSE. */ ++/* */ ++/**************************************************************************/ ++ ++#define STORE st.d ++#define LOAD ld.d ++ ++ .globl call_gen_code ++ .align 2 ++call_gen_code: ++ /* Set up stack frame and save callee-save registers */ ++ addi.d $sp, $sp, -208 ++ STORE $ra, $sp, 192 ++ STORE $s0, $sp, 0 ++ STORE $s1, $sp, 8 ++ STORE $s2, $sp, 16 ++ STORE $s3, $sp, 24 ++ STORE $s4, $sp, 32 ++ STORE $s5, $sp, 40 ++ STORE $s6, $sp, 48 ++ STORE $s7, $sp, 56 ++ STORE $s8, $sp, 64 ++ fst.d $fs0, $sp, 96 ++ fst.d $fs1, $sp, 104 ++ fst.d $fs2, $sp, 112 ++ fst.d $fs3, $sp, 120 ++ fst.d $fs4, $sp, 128 ++ fst.d $fs5, $sp, 136 ++ fst.d $fs6, $sp, 144 ++ fst.d $fs7, $sp, 152 ++ /* Shuffle arguments */ ++ move $t0, $a0 ++ move $a0, $a1 ++ move $a1, $a2 ++ move $a2, $a3 ++ move $a3, $a4 ++ /* Call generated asm */ ++ jirl $ra, $t0, 0 ++ /* Reload callee-save registers and return address */ ++ LOAD $ra, $sp, 192 ++ LOAD $s0, $sp, 0 ++ LOAD $s1, $sp, 8 ++ LOAD $s2, $sp ,16 ++ LOAD $s3, $sp ,24 ++ LOAD $s4, $sp ,32 ++ LOAD $s5, $sp ,40 ++ LOAD $s6, $sp ,48 ++ LOAD $s7, $sp ,56 ++ LOAD $s8, $sp ,64 ++ fld.d $fs0, $sp, 96 ++ fld.d $fs1, $sp, 104 ++ fld.d $fs2, $sp, 112 ++ fld.d $fs3, $sp, 120 ++ fld.d $fs4, $sp, 128 ++ fld.d $fs5, $sp, 136 ++ fld.d $fs6, $sp, 144 ++ fld.d $fs7, $sp, 152 ++ addi.d $sp, $sp, 208 ++ jr $ra ++ ++ .globl caml_c_call ++ .align 2 ++caml_c_call: ++ jr $t2 diff --git a/onefetch/PKGBUILD b/onefetch/PKGBUILD index 36e54952ee..e386ae13ba 100644 --- a/onefetch/PKGBUILD +++ b/onefetch/PKGBUILD @@ -19,7 +19,7 @@ sha512sums=('SKIP') prepare() { cd "$pkgname" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" mkdir -p completions } diff --git a/open-iscsi/PKGBUILD b/open-iscsi/PKGBUILD index eb58ad86a8..89a227e7b5 100644 --- a/open-iscsi/PKGBUILD +++ b/open-iscsi/PKGBUILD @@ -24,7 +24,7 @@ build() { ) arch-meson "${pkgname}-${pkgver}" build "${_meson_options[@]}" - +# arch-meson -D rulesdir="/usr/lib/udev/rules.d build" -Discsi_sbindir=/usr/bin build meson compile -C build } @@ -33,6 +33,7 @@ package() { # no initiatorname in package, initialized in install script echo -n > "${pkgdir}"/etc/iscsi/initiatorname.iscsi +# meson install -C build --destdir "$pkgdir" # install docs install -D -m0644 -t "${pkgdir}"/usr/share/doc/${pkgname}/ "${pkgname}-${pkgver}"/{Changelog,README} diff --git a/open-iscsi/open-iscsi-fix-build.patch b/open-iscsi/open-iscsi-fix-build.patch new file mode 100644 index 0000000000..519116010b --- /dev/null +++ b/open-iscsi/open-iscsi-fix-build.patch @@ -0,0 +1,25 @@ +Index: open-iscsi-2.1.6/utils/fwparam_ibft/fwparam.h +=================================================================== +--- open-iscsi-2.1.6.orig/utils/fwparam_ibft/fwparam.h ++++ open-iscsi-2.1.6/utils/fwparam_ibft/fwparam.h +@@ -17,7 +17,6 @@ + #ifndef FWPARAM_H_ + #define FWPARAM_H_ + +-#include + #include "fw_context.h" + + #define FILENAMESZ (1024) +Index: open-iscsi-2.1.6/include/iscsi_proto.h +=================================================================== +--- open-iscsi-2.1.6.orig/include/iscsi_proto.h ++++ open-iscsi-2.1.6/include/iscsi_proto.h +@@ -63,7 +63,7 @@ + #endif + + /* initiator tags; opaque for target */ +-typedef uint32_t __bitwise__ itt_t; ++typedef uint32_t itt_t; + /* below makes sense only for initiator that created this tag */ + #define build_itt(itt, age) ((__force itt_t)\ + ((itt) | ((age) << ISCSI_AGE_SHIFT))) diff --git a/open-isns/PKGBUILD b/open-isns/PKGBUILD index a29f6f1b4d..02faf71159 100644 --- a/open-isns/PKGBUILD +++ b/open-isns/PKGBUILD @@ -3,7 +3,7 @@ pkgname=open-isns pkgver=0.102 -pkgrel=3 +pkgrel=4 pkgdesc='iSNS server and client for Linux' arch=('loong64' 'x86_64') depends=('openssl') diff --git a/openal/PKGBUILD b/openal/PKGBUILD index 3870624beb..21a173276e 100644 --- a/openal/PKGBUILD +++ b/openal/PKGBUILD @@ -78,7 +78,7 @@ package_openal() { install -Dt "$pkgdir/usr/share/doc/openal" -m644 openal-soft/docs/* cd "$pkgdir" - _pick examples usr/bin/al{ffplay,hrtf,latency,loopback,multireverb} + _pick examples usr/bin/al{hrtf,latency,loopback,multireverb} _pick examples usr/bin/al{play,record,reverb,stream,tonegen} } diff --git a/openblas/PKGBUILD b/openblas/PKGBUILD index b5ca23eda3..ca66c556e8 100644 --- a/openblas/PKGBUILD +++ b/openblas/PKGBUILD @@ -13,8 +13,15 @@ url="https://www.openblas.net/" license=('BSD') depends=('gcc-libs') makedepends=('cmake' 'perl' 'gcc-fortran') -source=(${_pkgname}-v${pkgver}.tar.gz::https://github.com/xianyi/OpenBLAS/archive/v${pkgver}.tar.gz) -sha512sums=('01d3a536fbfa62f276fd6b1ad0e218fb3d91f41545fc83ddc74979fa26372d8389f0baa20334badfe0adacd77bd944c50a47ac920577373fcc1d495553084373') +source=(${_pkgname}-v${pkgver}.tar.gz::https://github.com/xianyi/OpenBLAS/archive/v${pkgver}.tar.gz +fix-loong.patch) +sha512sums=('01d3a536fbfa62f276fd6b1ad0e218fb3d91f41545fc83ddc74979fa26372d8389f0baa20334badfe0adacd77bd944c50a47ac920577373fcc1d495553084373' + '195dc3c3daa56c55912831161bd9e73532c1a06b38c894a6eceb8d49befddda1b94e71dcd36e1d3403e2e5f70ded83febdee493059b16adc85ea52fb32e58f81') + +prepare() { + cd "$_pkgname-$pkgver" + patch -p1 -i "$srcdir/fix-loong.patch" +} build() { # Setting FC manually to avoid picking up f95 and breaking the cmake build diff --git a/openblas/fix-loong.patch b/openblas/fix-loong.patch new file mode 100644 index 0000000000..fc0489b826 --- /dev/null +++ b/openblas/fix-loong.patch @@ -0,0 +1,47 @@ +diff --git a/.github/workflows/loongarch64.yml b/.github/workflows/loongarch64.yml +index 4a9bf98b6..b310d6938 100644 +--- a/.github/workflows/loongarch64.yml ++++ b/.github/workflows/loongarch64.yml +@@ -40,8 +40,8 @@ jobs: + + - name: Download and install loongarch64-toolchain + run: | +- wget https://github.com/loongson/build-tools/releases/download/2022.09.06/loongarch64-clfs-7.3-cross-tools-gcc-glibc.tar.xz +- tar -xf loongarch64-clfs-7.3-cross-tools-gcc-glibc.tar.xz -C /opt ++ wget https://github.com/loongson/build-tools/releases/download/2023.08.08/CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz ++ tar -xf CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz -C /opt + + - name: Set env + run: | +diff --git a/cmake/cc.cmake b/cmake/cc.cmake +index 00952e810..242b03b5f 100644 +--- a/cmake/cc.cmake ++++ b/cmake/cc.cmake +@@ -36,9 +36,9 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LS + + if (LOONGARCH64) + if (BINARY64) +- set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp64") ++ set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp64d") + else () +- set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp32") ++ set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=ilp32d") + endif () + set(BINARY_DEFINED 1) + endif () +diff --git a/cmake/fc.cmake b/cmake/fc.cmake +index c496f6368..b356dfda3 100644 +--- a/cmake/fc.cmake ++++ b/cmake/fc.cmake +@@ -61,9 +61,9 @@ if (${F_COMPILER} STREQUAL "GFORTRAN" OR ${F_COMPILER} STREQUAL "F95" OR CMAKE_F + endif () + if (LOONGARCH64) + if (BINARY64) +- set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64") ++ set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64d") + else () +- set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp32") ++ set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=ilp32d") + endif () + endif () + if (RISCV64) diff --git a/opencv/PKGBUILD b/opencv/PKGBUILD index 8eacdaf59c..655969a4d7 100644 --- a/opencv/PKGBUILD +++ b/opencv/PKGBUILD @@ -90,8 +90,7 @@ build() { -DINSTALL_C_EXAMPLES=ON \ -DINSTALL_PYTHON_EXAMPLES=ON \ -DCMAKE_INSTALL_PREFIX=/usr \ - -DCPU_BASELINE_DISABLE=SSE3 \ - -DCPU_BASELINE_REQUIRE=SSE2 \ + -DCPU_BASELINE_DISABLE=LASX,SSE2,SSE3 \ -DOPENCV_EXTRA_MODULES_PATH=$srcdir/opencv_contrib-$pkgver/modules \ -DOPENCV_SKIP_PYTHON_LOADER=ON \ -DLAPACK_LIBRARIES=/usr/lib/liblapack.so;/usr/lib/libblas.so;/usr/lib/libcblas.so \ @@ -114,8 +113,8 @@ build() { -DBUILD_WITH_DEBUG_INFO=OFF \ -DWITH_CUDA=ON \ -DWITH_CUDNN=ON \ - -DCMAKE_C_COMPILER=gcc-12 \ - -DCMAKE_CXX_COMPILER=g++-12 \ + -DCMAKE_C_COMPILER=gcc \ + -DCMAKE_CXX_COMPILER=g++ \ -DCUDA_ARCH_BIN='52-real;53-real;60-real;61-real;62-real;70-real;72-real;75-real;80-real;86-real;87-real;89-real;90-real;90-virtual' \ -DCUDA_ARCH_PTX='90-virtual' cmake --build build-cuda diff --git a/openh264/PKGBUILD b/openh264/PKGBUILD index 043973fe84..5f2847376d 100644 --- a/openh264/PKGBUILD +++ b/openh264/PKGBUILD @@ -16,6 +16,7 @@ source=("https://github.com/cisco/openh264/archive/v${pkgver}/${pkgname}-${pkgve "https://github.com/google/googletest/archive/release-${_gtestver}/googletest-${_gtestver}.tar.gz") sha256sums=('a44d1ccc348a790f9a272bba2d1c2eb9a9bbd0302e4e9b655d709e1c32f92691' '9bf1fe5182a604b4135edc1a425ae356c9ad15e9b23f9f12a02e80184c3a249c') +options=(!lto) prepare() { ln -sf "../googletest-release-${_gtestver}" "${pkgname}-${pkgver}/gtest" diff --git a/openimagedenoise/PKGBUILD b/openimagedenoise/PKGBUILD index 1cf4a05640..7ea16673b0 100644 --- a/openimagedenoise/PKGBUILD +++ b/openimagedenoise/PKGBUILD @@ -9,7 +9,7 @@ arch=('loong64' 'x86_64') url='https://openimagedenoise.github.io' license=('Apache') depends=('intel-tbb') -makedepends=('cmake' 'ninja' 'ispc' 'python') +makedepends=('cmake' 'ninja' 'python') source=("https://github.com/OpenImageDenoise/oidn/releases/download/v${pkgver}/oidn-${pkgver}.src.tar.gz") sha512sums=('508cb100f1a0a825774c2c01e0fc983e697341745fa0bfa48a99bfa70fc431f66fcdf17c3f170e40baefd7c3796a25d147f49cb17efbf1a1886556367c5c4566') diff --git a/openjade/PKGBUILD b/openjade/PKGBUILD index 775eeb57d5..7a19dcfa17 100644 --- a/openjade/PKGBUILD +++ b/openjade/PKGBUILD @@ -14,19 +14,25 @@ install=${pkgname}.install conflicts=('jade') provides=('jade') source=(https://downloads.sourceforge.net/project/${pkgname}/${pkgname}/${pkgver}/${pkgname}-${pkgver}.tar.gz - https://deb.debian.org/debian/pool/main/o/${pkgname}1.3/${pkgname}1.3_${pkgver}-${_debpatch}.diff.gz) + https://deb.debian.org/debian/pool/main/o/${pkgname}1.3/${pkgname}1.3_${pkgver}-${_debpatch}.diff.gz + openjade-nola.patch) sha256sums=('1d2d7996cc94f9b87d0c51cf0e028070ac177c4123ecbfd7ac1cb8d0b7d322d1' - '11d90e242eae60ce06bf27fd234adbd8efd7d4a9a4a2da058faa4e8336dc423a') + '11d90e242eae60ce06bf27fd234adbd8efd7d4a9a4a2da058faa4e8336dc423a' + 'd177f5e18970561ff500c42759f82e91ef245dda5559459091b875b7560b5688') +SKIPCONFIG=1 prepare() { cd ${pkgname}-$pkgver - patch -Np1 -i "$srcdir"/${pkgname}1.3_${pkgver}-${_debpatch}.diff + patch -Np1 -i "$srcdir"/openjade-nola.patch # https://bugs.archlinux.org/task/55331 / https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69534#c9 export CXXFLAGS+=' -fno-lifetime-dse' + cp /usr/share/automake-1.16/config.* config/. } build() { cd ${pkgname}-$pkgver + CFLAGS=${CFLAGS/-Werror=format-security/} + CXXFLAGS=${CXXFLAGS/-Werror=format-security/} ./configure --prefix=/usr \ --mandir=/usr/share/man \ --enable-default-catalog=/etc/sgml/catalog \ diff --git a/openjade/openjade-nola.patch b/openjade/openjade-nola.patch new file mode 100644 index 0000000000..1e23ca52a4 --- /dev/null +++ b/openjade/openjade-nola.patch @@ -0,0 +1,12 @@ +diff -urNp openjade-1.3.2-orig/jade/Makefile.sub openjade-1.3.2/jade/Makefile.sub +--- openjade-1.3.2-orig/jade/Makefile.sub 2002-11-15 23:46:50.000000000 +0100 ++++ openjade-1.3.2/jade/Makefile.sub 2009-02-13 11:52:30.000000000 +0100 +@@ -4,7 +4,7 @@ OBJS=jade.o SgmlFOTBuilder.o RtfFOTBuild + INCLUDE=-I$(srcdir)/../grove -I$(srcdir)/../spgrove -I$(srcdir)/../style + # XLIBS=../style/libostyle.a ../spgrove/libospgrove.a ../grove/libogrove.a \ + # ../lib/libosp.a +-XLIBS=../style/libostyle.a ../spgrove/libospgrove.a ../grove/libogrove.a $(splibdir)/libosp.a ++XLIBS=../style/libostyle.a ../spgrove/libospgrove.a ../grove/libogrove.a $(splibdir)/libosp.so + GENSRCS=JadeMessages.h HtmlMessages.h RtfMessages.h TeXMessages.h \ + HtmlFOTBuilder_inst.cxx RtfFOTBuilder_inst.cxx TeXFOTBuilder_inst.cxx \ + TransformFOTBuilder_inst.cxx MifMessages.h MifFOTBuilder_inst.cxx diff --git a/openmp/PKGBUILD b/openmp/PKGBUILD index 5a747fd2a4..79d4818875 100644 --- a/openmp/PKGBUILD +++ b/openmp/PKGBUILD @@ -47,7 +47,7 @@ package() { DESTDIR="$pkgdir" ninja install install -Dm644 ../LICENSE.TXT "$pkgdir/usr/share/licenses/$pkgname/LICENSE" - rm "$pkgdir/usr/lib/libarcher_static.a" + #rm "$pkgdir/usr/lib/libarcher_static.a" } # vim:set ts=2 sw=2 et: diff --git a/openmp/openmp-loong64.patch b/openmp/openmp-loong64.patch new file mode 100644 index 0000000000..8ff7095ab6 --- /dev/null +++ b/openmp/openmp-loong64.patch @@ -0,0 +1,514 @@ +diff --git a/README.rst b/README.rst +index ffa49e4..a12c628 100644 +--- a/README.rst ++++ b/README.rst +@@ -137,7 +137,7 @@ Options for all Libraries + Options for ``libomp`` + ---------------------- + +-**LIBOMP_ARCH** = ``aarch64|arm|i386|mic|mips|mips64|ppc64|ppc64le|x86_64|riscv64`` ++**LIBOMP_ARCH** = ``aarch64|arm|i386|mic|mips|mips64|ppc64|ppc64le|x86_64|riscv64|loongarch64`` + The default value for this option is chosen based on probing the compiler for + architecture macros (e.g., is ``__x86_64__`` predefined by compiler?). + +@@ -194,7 +194,7 @@ Optional Features + **LIBOMP_OMPT_SUPPORT** = ``ON|OFF`` + Include support for the OpenMP Tools Interface (OMPT). + This option is supported and ``ON`` by default for x86, x86_64, AArch64, +- PPC64 and RISCV64 on Linux* and macOS*. ++ PPC64, RISCV64 and loongarch64 on Linux* and macOS*. + This option is ``OFF`` if this feature is not supported for the platform. + + **LIBOMP_OMPT_OPTIONAL** = ``ON|OFF`` +diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt +index c9dbcb2..3199536 100644 +--- a/runtime/CMakeLists.txt ++++ b/runtime/CMakeLists.txt +@@ -30,7 +30,7 @@ if(${OPENMP_STANDALONE_BUILD}) + # If adding a new architecture, take a look at cmake/LibompGetArchitecture.cmake + libomp_get_architecture(LIBOMP_DETECTED_ARCH) + set(LIBOMP_ARCH ${LIBOMP_DETECTED_ARCH} CACHE STRING +- "The architecture to build for (x86_64/i386/arm/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64).") ++ "The architecture to build for (x86_64/i386/arm/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64/loongarch64).") + # Should assertions be enabled? They are on by default. + set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL + "enable assertions?") +@@ -61,6 +61,8 @@ else() # Part of LLVM build + set(LIBOMP_ARCH arm) + elseif(LIBOMP_NATIVE_ARCH MATCHES "riscv64") + set(LIBOMP_ARCH riscv64) ++ elseif(LIBOMP_NATIVE_ARCH MATCHES "loongarch64") ++ set(LIBOMP_ARCH loongarch64) + else() + # last ditch effort + libomp_get_architecture(LIBOMP_ARCH) +@@ -81,7 +83,7 @@ if(LIBOMP_ARCH STREQUAL "aarch64") + endif() + endif() + +-libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64) ++libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64 loongarch64) + + set(LIBOMP_LIB_TYPE normal CACHE STRING + "Performance,Profiling,Stubs library (normal/profile/stubs)") +@@ -159,6 +161,7 @@ set(MIC FALSE) + set(MIPS64 FALSE) + set(MIPS FALSE) + set(RISCV64 FALSE) ++set(LoongArch64 FALSE) + if("${LIBOMP_ARCH}" STREQUAL "i386" OR "${LIBOMP_ARCH}" STREQUAL "32") # IA-32 architecture + set(IA32 TRUE) + elseif("${LIBOMP_ARCH}" STREQUAL "x86_64" OR "${LIBOMP_ARCH}" STREQUAL "32e") # Intel(R) 64 architecture +@@ -183,6 +186,8 @@ elseif("${LIBOMP_ARCH}" STREQUAL "mips64") # MIPS64 architecture + set(MIPS64 TRUE) + elseif("${LIBOMP_ARCH}" STREQUAL "riscv64") # RISCV64 architecture + set(RISCV64 TRUE) ++elseif("${LIBOMP_ARCH}" STREQUAL "loongarch64") # LoongArch64 architecture ++ set(LoongArch64 TRUE) + endif() + + # Set some flags based on build_type +diff --git a/runtime/README.txt b/runtime/README.txt +index 874a563..ddd8b0e 100644 +--- a/runtime/README.txt ++++ b/runtime/README.txt +@@ -54,6 +54,7 @@ Architectures Supported + * IBM(R) Power architecture (little endian) + * MIPS and MIPS64 architecture + * RISCV64 architecture ++* LoongArch64 architecture + + Supported RTL Build Configurations + ================================== +diff --git a/runtime/cmake/LibompGetArchitecture.cmake b/runtime/cmake/LibompGetArchitecture.cmake +index dd60a2d..72cbf64 100644 +--- a/runtime/cmake/LibompGetArchitecture.cmake ++++ b/runtime/cmake/LibompGetArchitecture.cmake +@@ -47,6 +47,8 @@ function(libomp_get_architecture return_arch) + #error ARCHITECTURE=mips + #elif defined(__riscv) && __riscv_xlen == 64 + #error ARCHITECTURE=riscv64 ++ #elif defined(__loongarch__) && defined(__loongarch64) ++ #error ARCHITECTURE=loongarch64 + #else + #error ARCHITECTURE=UnknownArchitecture + #endif +diff --git a/runtime/cmake/LibompMicroTests.cmake b/runtime/cmake/LibompMicroTests.cmake +index 1ca3412..d344056 100644 +--- a/runtime/cmake/LibompMicroTests.cmake ++++ b/runtime/cmake/LibompMicroTests.cmake +@@ -214,6 +214,9 @@ else() + elseif(${RISCV64}) + libomp_append(libomp_expected_library_deps libc.so.6) + libomp_append(libomp_expected_library_deps ld.so.1) ++ elseif(${LoongArch64}) ++ libomp_append(libomp_expected_library_deps libc.so.6) ++ libomp_append(libomp_expected_library_deps ld.so.1) + endif() + libomp_append(libomp_expected_library_deps libpthread.so.0 IF_FALSE STUBS_LIBRARY) + libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC) +diff --git a/runtime/cmake/LibompUtils.cmake b/runtime/cmake/LibompUtils.cmake +index b1de242..8d6b6ef 100644 +--- a/runtime/cmake/LibompUtils.cmake ++++ b/runtime/cmake/LibompUtils.cmake +@@ -109,6 +109,8 @@ function(libomp_get_legal_arch return_arch_string) + set(${return_arch_string} "MIPS64" PARENT_SCOPE) + elseif(${RISCV64}) + set(${return_arch_string} "RISCV64" PARENT_SCOPE) ++ elseif(${LoongArch64}) ++ set(${return_arch_string} "LoongArch64" PARENT_SCOPE) + else() + set(${return_arch_string} "${LIBOMP_ARCH}" PARENT_SCOPE) + libomp_warning_say("libomp_get_legal_arch(): Warning: Unknown architecture: Using ${LIBOMP_ARCH}") +diff --git a/runtime/src/kmp_affinity.h b/runtime/src/kmp_affinity.h +index ce00362..06fd323 100644 +--- a/runtime/src/kmp_affinity.h ++++ b/runtime/src/kmp_affinity.h +@@ -254,6 +254,18 @@ public: + #elif __NR_sched_getaffinity != 5196 + #error Wrong code for getaffinity system call. + #endif /* __NR_sched_getaffinity */ ++#elif KMP_ARCH_LOONGARCH64 ++#ifndef __NR_sched_setaffinity ++#define __NR_sched_setaffinity 122 ++#elif __NR_sched_setaffinity != 122 ++#error Wrong code for setaffinity system call. ++#endif /* __NR_sched_setaffinity */ ++#ifndef __NR_sched_getaffinity ++#define __NR_sched_getaffinity 123 ++#elif __NR_sched_getaffinity != 123 ++#error Wrong code for getaffinity system call. ++#endif /* __NR_sched_getaffinity */ ++#else + #error Unknown or unsupported architecture + #endif /* KMP_ARCH_* */ + #elif KMP_OS_FREEBSD +diff --git a/runtime/src/kmp_csupport.cpp b/runtime/src/kmp_csupport.cpp +index e263558..ca42148 100644 +--- a/runtime/src/kmp_csupport.cpp ++++ b/runtime/src/kmp_csupport.cpp +@@ -695,7 +695,7 @@ void __kmpc_flush(ident_t *loc) { + } + #endif // KMP_MIC + #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ +- KMP_ARCH_RISCV64) ++ KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64) + // Nothing to see here move along + #elif KMP_ARCH_PPC64 + // Nothing needed here (we have a real MB above). +diff --git a/runtime/src/kmp_os.h b/runtime/src/kmp_os.h +index d71e9ae..33eb269 100644 +--- a/runtime/src/kmp_os.h ++++ b/runtime/src/kmp_os.h +@@ -167,7 +167,7 @@ typedef unsigned long long kmp_uint64; + #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS + #define KMP_SIZE_T_SPEC KMP_UINT32_SPEC + #elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ +- KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 ++ KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 + #define KMP_SIZE_T_SPEC KMP_UINT64_SPEC + #else + #error "Can't determine size_t printf format specifier." +@@ -1012,7 +1012,7 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); + #endif /* KMP_OS_WINDOWS */ + + #if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \ +- KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 ++ KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 + #if KMP_OS_WINDOWS + #undef KMP_MB + #define KMP_MB() std::atomic_thread_fence(std::memory_order_seq_cst) +diff --git a/runtime/src/kmp_platform.h b/runtime/src/kmp_platform.h +index bbbd72d..6392d17 100644 +--- a/runtime/src/kmp_platform.h ++++ b/runtime/src/kmp_platform.h +@@ -92,6 +92,7 @@ + #define KMP_ARCH_MIPS 0 + #define KMP_ARCH_MIPS64 0 + #define KMP_ARCH_RISCV64 0 ++#define KMP_ARCH_LOONGARCH64 0 + + #if KMP_OS_WINDOWS + #if defined(_M_AMD64) || defined(__x86_64) +@@ -135,6 +136,9 @@ + #elif defined __riscv && __riscv_xlen == 64 + #undef KMP_ARCH_RISCV64 + #define KMP_ARCH_RISCV64 1 ++#elif defined __loongarch__ && defined __loongarch64 ++#undef KMP_ARCH_LOONGARCH64 ++#define KMP_ARCH_LOONGARCH64 1 + #endif + #endif + +@@ -199,7 +203,7 @@ + // TODO: Fixme - This is clever, but really fugly + #if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + \ + KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 + \ +- KMP_ARCH_RISCV64) ++ KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH64) + #error Unknown or unsupported architecture + #endif + +diff --git a/runtime/src/kmp_runtime.cpp b/runtime/src/kmp_runtime.cpp +index e1af2f4..9dbc196 100644 +--- a/runtime/src/kmp_runtime.cpp ++++ b/runtime/src/kmp_runtime.cpp +@@ -8723,7 +8723,7 @@ __kmp_determine_reduction_method( + int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; + + #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ +- KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 ++ KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 + + #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ + KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD +diff --git a/runtime/src/thirdparty/ittnotify/ittnotify_config.h b/runtime/src/thirdparty/ittnotify/ittnotify_config.h +index a452b76..d6d2cb0 100644 +--- a/runtime/src/thirdparty/ittnotify/ittnotify_config.h ++++ b/runtime/src/thirdparty/ittnotify/ittnotify_config.h +@@ -162,6 +162,10 @@ + #define ITT_ARCH_ARM64 6 + #endif /* ITT_ARCH_ARM64 */ + ++#ifndef ITT_ARCH_LOONGARCH64 ++#define ITT_ARCH_LOONGARCH64 7 ++#endif /* ITT_ARCH_LOONGARCH64 */ ++ + #ifndef ITT_ARCH + #if defined _M_IX86 || defined __i386__ + #define ITT_ARCH ITT_ARCH_IA32 +@@ -175,6 +179,8 @@ + #define ITT_ARCH ITT_ARCH_ARM64 + #elif defined __powerpc64__ + #define ITT_ARCH ITT_ARCH_PPC64 ++#elif defined __loongarch__ && defined __loongarch64 ++#define ITT_ARCH ITT_ARCH_LOONGARCH64 + #endif + #endif + +diff --git a/runtime/src/z_Linux_asm.S b/runtime/src/z_Linux_asm.S +index b4a45c1..4f80a81 100644 +--- a/runtime/src/z_Linux_asm.S ++++ b/runtime/src/z_Linux_asm.S +@@ -1725,6 +1725,157 @@ __kmp_invoke_microtask: + + #endif /* KMP_ARCH_RISCV64 */ + ++#if KMP_ARCH_LOONGARCH64 ++ ++//------------------------------------------------------------------------ ++// ++// typedef void (*microtask_t)( int *gtid, int *tid, ... ); ++// ++// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, ++// void *p_argv[] ++// #if OMPT_SUPPORT ++// , ++// void **exit_frame_ptr ++// #endif ++// ) { ++// #if OMPT_SUPPORT ++// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); ++// #endif ++// (*pkfn)( & gtid, & tid, argv[0], ... ); ++// ++// return 1; ++// } ++// ++// parameters: ++// a0: pkfn ++// a1: gtid ++// a2: tid ++// a3: argc ++// a4: p_argv ++// a5: exit_frame_ptr ++// ++// Temp. registers: ++// ++// t0: used to calculate the dynamic stack size / used to hold pkfn address ++// t1: used as temporary for stack placement calculation ++// t2: used as temporary for stack arguments ++// t3: used as temporary for number of remaining pkfn parms ++// t4: used to traverse p_argv array ++// ++// return: a0 (always 1/TRUE) ++// ++ ++// -- Begin __kmp_invoke_microtask ++// mark_begin; ++ .text ++ .globl __kmp_invoke_microtask ++ .p2align 3 ++ .type __kmp_invoke_microtask,@function ++__kmp_invoke_microtask: ++ ++ // First, save ra and fp ++ addi.d $sp, $sp, -16 ++ st.d $ra, $sp, 8 ++ st.d $fp, $sp, 0 ++ addi.d $fp, $sp, 16 ++ ++ // Compute the dynamic stack size: ++ // ++ // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by ++ // reference ++ // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' ++ // function by register. Given that we have 8 of such registers (a[0-7]) ++ // and two + 'argc' arguments (consider >id and &tid), we need to ++ // reserve max(0, argc - 6)*8 ext$ra bytes ++ // ++ // The total number of bytes is then max(0, argc - 6)*8 + 8 ++ ++ // Compute max(0, argc - 6) using the following bithack: ++ // max(0, x) = x - (x & (x >> 31?63)), where x := argc - 6 ++ // Source: http://g$raphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax ++ addi.d $t0, $a3, -6 ++ srai.d $t1, $t0, 63 ++ and $t1, $t0, $t1 ++ sub.d $t0, $t0, $t1 ++ ++ addi.d $t0, $t0, 1 ++ ++ slli.d $t0, $t0, 3 // t0: total number of bytes for stack storing arguments. ++ sub.d $sp, $sp, $t0 ++ ++ move $t0, $a0 ++ move $t3, $a3 ++ move $t4, $a4 ++ ++#if OMPT_SUPPORT ++ // Save frame pointer into exit_frame ++ st.d $fp, $a5, 0 ++#endif ++ ++ // Prepare arguments for the pkfn function (first 8 using $a0-$a7 registers) ++ ++ st.w $a1, $fp, -20 // gtid ++ st.w $a2, $fp, -24 // tid ++ ++ addi.d $a0, $fp, -20 // >id ++ addi.d $a1, $fp, -24 // &tid ++ ++ beqz $t3, .L_kmp_3 ++ ld.d $a2, $t4, 0 // argv[0] ++ ++ addi.d $t3, $t3, -1 ++ beqz $t3, .L_kmp_3 ++ ld.d $a3, $t4, 8 // argv[1] ++ ++ addi.d $t3, $t3, -1 ++ beqz $t3, .L_kmp_3 ++ ld.d $a4, $t4, 16 // argv[2] ++ ++ addi.d $t3, $t3, -1 ++ beqz $t3, .L_kmp_3 ++ ld.d $a5, $t4, 24 // argv[3] ++ ++ addi.d $t3, $t3, -1 ++ beqz $t3, .L_kmp_3 ++ ld.d $a6, $t4, 32 // argv[4] ++ ++ addi.d $t3, $t3, -1 ++ beqz $t3, .L_kmp_3 ++ ld.d $a7, $t4, 40 // argv[5] ++ ++ // Prepare any additional argument passed through the stack ++ addi.d $t4, $t4, 48 ++ move $t1, $sp ++ b .L_kmp_2 ++.L_kmp_1: ++ ld.d $t2, $t4, 0 ++ st.d $t2, $t1, 0 ++ addi.d $t4, $t4, 8 ++ addi.d $t1, $t1, 8 ++.L_kmp_2: ++ addi.d $t3, $t3, -1 ++ bnez $t3, .L_kmp_1 ++ ++.L_kmp_3: ++ // Call pkfn function ++ jirl $ra, $t0, 0 ++ ++ // Restore stack and return ++ ++ addi.d $a0, $zero, 1 ++ ++ addi.d $sp, $fp, -16 ++ ld.d $fp, $sp, 0 ++ ld.d $ra, $sp, 8 ++ addi.d $sp, $sp, 16 ++ jr $ra ++.Lfunc_end0: ++ .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask ++ ++// -- End __kmp_invoke_microtask ++ ++#endif /* KMP_ARCH_LOONGARCH64 */ ++ + #if KMP_ARCH_ARM || KMP_ARCH_MIPS + .data + .comm .gomp_critical_user_,32,8 +@@ -1736,7 +1887,8 @@ __kmp_unnamed_critical_addr: + .size __kmp_unnamed_critical_addr,4 + #endif /* KMP_ARCH_ARM */ + +-#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 ++#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || \ ++ KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 + #ifndef KMP_PREFIX_UNDERSCORE + # define KMP_PREFIX_UNDERSCORE(x) x + #endif +@@ -1751,7 +1903,7 @@ KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr): + .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8 + #endif + #endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || +- KMP_ARCH_RISCV64 */ ++ KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 */ + + #if KMP_OS_LINUX + # if KMP_ARCH_ARM +diff --git a/runtime/src/z_Linux_util.cpp b/runtime/src/z_Linux_util.cpp +index 5cd6ad6..bdadbd6 100644 +--- a/runtime/src/z_Linux_util.cpp ++++ b/runtime/src/z_Linux_util.cpp +@@ -2441,7 +2441,7 @@ finish: // Clean up and exit. + + #if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \ + ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || \ +- KMP_ARCH_PPC64 || KMP_ARCH_RISCV64) ++ KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64) + + // we really only need the case with 1 argument, because CLANG always build + // a struct of pointers to shared variables referenced in the outlined function +diff --git a/runtime/test/atomic/kmp_atomic_float10_max_min.c b/runtime/test/atomic/kmp_atomic_float10_max_min.c +index 4a833e7..cc54e1a 100644 +--- a/runtime/test/atomic/kmp_atomic_float10_max_min.c ++++ b/runtime/test/atomic/kmp_atomic_float10_max_min.c +@@ -1,6 +1,6 @@ + // RUN: %libomp-compile -mlong-double-80 && %libomp-run + // UNSUPPORTED: gcc +-// UNSUPPORTED: powerpc ++// REQUIRES: x86-registered-target + + #include + #include +diff --git a/runtime/tools/lib/Platform.pm b/runtime/tools/lib/Platform.pm +index 38593a1..2d262ae 100644 +--- a/runtime/tools/lib/Platform.pm ++++ b/runtime/tools/lib/Platform.pm +@@ -63,6 +63,8 @@ sub canon_arch($) { + $arch = "mips"; + } elsif ( $arch =~ m{\Ariscv64} ) { + $arch = "riscv64"; ++ } elsif ( $arch =~ m{\Aloongarch64} ) { ++ $arch = "loongarch64"; + } else { + $arch = undef; + }; # if +@@ -97,6 +99,7 @@ sub canon_mic_arch($) { + "mips" => "MIPS", + "mips64" => "MIPS64", + "riscv64" => "RISC-V (64-bit)", ++ "loongarch64" => "LoongArch64", + ); + + sub legal_arch($) { +@@ -119,6 +122,7 @@ sub canon_mic_arch($) { + "mic" => "intel64", + "mips" => "mips", + "mips64" => "MIPS64", ++ "loongarch64" => "loongarch64", + ); + + sub arch_opt($) { +@@ -225,6 +229,8 @@ sub target_options() { + $_host_arch = "mips"; + } elsif ( $hardware_platform eq "riscv64" ) { + $_host_arch = "riscv64"; ++ } elsif ( $hardware_platform eq "loongarch64" ) { ++ $_host_arch = "loongarch64"; + } else { + die "Unsupported host hardware platform: \"$hardware_platform\"; stopped"; + }; # if +@@ -414,7 +420,7 @@ the script assumes host architecture is target one. + + Input string is an architecture name to canonize. The function recognizes many variants, for example: + C<32e>, C, C, etc. Returned string is a canonized architecture name, +-one of: C<32>, C<32e>, C<64>, C, C, C, C, C, C, C or C is input string is not recognized. ++one of: C<32>, C<32e>, C<64>, C, C, C, C, C, C, C, C or C is input string is not recognized. + + =item B + +diff --git a/runtime/tools/lib/Uname.pm b/runtime/tools/lib/Uname.pm +index 99fe1cd..8a976ad 100644 +--- a/runtime/tools/lib/Uname.pm ++++ b/runtime/tools/lib/Uname.pm +@@ -158,6 +158,8 @@ if ( 0 ) { + $values{ hardware_platform } = "mips"; + } elsif ( $values{ machine } =~ m{\Ariscv64\z} ) { + $values{ hardware_platform } = "riscv64"; ++ } elsif ( $values{ machine } =~ m{\Aloongarch64\z} ) { ++ $values{ hardware_platform } = "loongarch64"; + } else { + die "Unsupported machine (\"$values{ machine }\") returned by POSIX::uname(); stopped"; + }; # if diff --git a/openpgp-ca/PKGBUILD b/openpgp-ca/PKGBUILD index 05318f8643..3b902d4b85 100644 --- a/openpgp-ca/PKGBUILD +++ b/openpgp-ca/PKGBUILD @@ -47,7 +47,7 @@ b2sums=('86cdf27447c003415348705745990899c507b326bc2bc191302cd32f2d6df23b28bfe25 prepare() { cd $pkgbase-$pkgbase-v$pkgver # NOTE: add --locked as soon as upstream has split out the components - cargo fetch --target "$CARCH-unknown-linux-gnu" + cargo fetch --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/openpgp-card-tools/PKGBUILD b/openpgp-card-tools/PKGBUILD index c6ec64600c..f6aa23d357 100644 --- a/openpgp-card-tools/PKGBUILD +++ b/openpgp-card-tools/PKGBUILD @@ -26,7 +26,7 @@ b2sums=('1fc6e02038d7cd9ba561c00f0c9b11cd27f282931b1fa70cb2436917224a1fd63c0e130 prepare() { cd $pkgname - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/opus/PKGBUILD b/opus/PKGBUILD index e547ef53c1..b6da5ca882 100644 --- a/opus/PKGBUILD +++ b/opus/PKGBUILD @@ -39,6 +39,8 @@ build() { local meson_options=( -D asm=disabled -D custom-modes=true + -D rtcd=disabled + -D intrinsics=disabled ) arch-meson opus build "${meson_options[@]}" diff --git a/ouch/PKGBUILD b/ouch/PKGBUILD index 4b1c639a1f..40c2a176a5 100644 --- a/ouch/PKGBUILD +++ b/ouch/PKGBUILD @@ -16,7 +16,7 @@ options=('!lto') prepare() { cd "$srcdir/$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/paccat/PKGBUILD b/paccat/PKGBUILD index 9bac22d73a..045363b5c3 100644 --- a/paccat/PKGBUILD +++ b/paccat/PKGBUILD @@ -15,7 +15,7 @@ sha256sums=('f4478240063a00500a9fb45a1571b24519901fae86e5dddffa9f59191fcbada5') prepare() { cd "$pkgname-$pkgver" export RUSTUP_TOOLCHAIN=stable - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build () { diff --git a/pacman-bintrans/PKGBUILD b/pacman-bintrans/PKGBUILD index 75a5578e78..32e539c294 100644 --- a/pacman-bintrans/PKGBUILD +++ b/pacman-bintrans/PKGBUILD @@ -22,7 +22,7 @@ validpgpkeys=("64B13F7117D6E07D661BBCE0FE763A64F5E54FD6") prepare() { cd "${pkgbase}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/pacman-contrib/PKGBUILD b/pacman-contrib/PKGBUILD index a856c9afde..19b4cf8694 100644 --- a/pacman-contrib/PKGBUILD +++ b/pacman-contrib/PKGBUILD @@ -18,12 +18,15 @@ optdepends=( 'vim: default merge program for pacdiff' ) source=("git+$url.git#tag=v$pkgver") +source+=(pkgbuild-vim-la64.patch) b2sums=('SKIP') +b2sums+=('2cc52b392ef20be4b32a273c6ba8c7d1616e846e2a00dc846b1238bb5db973d25b8873f1e75ef4e2289a50860ec1b5a86208c35d6699e6cf547116d224cef52e') validpgpkeys=('04DC3FB1445FECA813C27EFAEA4F7B321A906AD9') # Daniel M. Capella # '5134EF9EAF65F95B6BB1608E50FB9B273A9D0BB5') # Johannes Löthberg prepare() { cd $pkgname + patch -p1 -i $srcdir/pkgbuild-vim-la64.patch ./autogen.sh } diff --git a/pacman-contrib/pkgbuild-vim-la64.patch b/pacman-contrib/pkgbuild-vim-la64.patch new file mode 100644 index 0000000000..6e66399ca5 --- /dev/null +++ b/pacman-contrib/pkgbuild-vim-la64.patch @@ -0,0 +1,13 @@ +diff --git a/src/vim/syntax/PKGBUILD.vim b/src/vim/syntax/PKGBUILD.vim +index e5c6c49..a1cbd33 100644 +--- a/src/vim/syntax/PKGBUILD.vim ++++ b/src/vim/syntax/PKGBUILD.vim +@@ -81,7 +81,7 @@ syn region pbBackupGroup start=/^backup=(/ end=/)/ contains=pb_k_backup,pbValidB + + " arch + syn keyword pb_k_arch arch contained +-syn keyword pbArch i686 x86_64 ppc pentium4 armv7h aarch64 any contained ++syn keyword pbArch i686 x86_64 loong64 ppc pentium4 armv7h aarch64 any contained + syn match pbIllegalArch /[^='"() ]/ contained contains=pbArch + syn region pbArchGroup start=/^arch=(/ end=/)/ contains=pb_k_arch,pbArch,pbIllegalArch,pbComment + diff --git a/pari/PKGBUILD b/pari/PKGBUILD index 4028c96c5c..31f57a55a5 100644 --- a/pari/PKGBUILD +++ b/pari/PKGBUILD @@ -35,7 +35,7 @@ build() { --mt=pthread \ --with-gmp make all - make -C Olinux-x86_64 gp-sta +# make -C Olinux-x86_64 gp-sta } check() { @@ -46,7 +46,7 @@ check() { package() { cd $pkgname-$pkgver make DESTDIR="$pkgdir" install - make DESTDIR="$pkgdir" -C Olinux-x86_64 install-bin-sta +# make DESTDIR="$pkgdir" -C Olinux-x86_64 install-bin-sta ln -sf gp.1.gz "$pkgdir"/usr/share/man/man1/pari.1 chrpath -d "$pkgdir"/usr/bin/gp-* } diff --git a/pastel/PKGBUILD b/pastel/PKGBUILD index e30b45508e..13a07b127e 100644 --- a/pastel/PKGBUILD +++ b/pastel/PKGBUILD @@ -30,7 +30,7 @@ prepare() { cd "$pkgbase" # download dependencies - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/perl-image-sane/PKGBUILD b/perl-image-sane/PKGBUILD index 5937fb5760..4d868585a1 100644 --- a/perl-image-sane/PKGBUILD +++ b/perl-image-sane/PKGBUILD @@ -19,7 +19,7 @@ conflicts=('perl-sane') source=("https://www.cpan.org/modules/by-module/${_perl_namespace}/${_perl_namespace}-${_perl_module}-${pkgver}.tar.gz" https://rt.cpan.org/Public/Ticket/Attachment/2538823/1107284/Image-Sane-5-Replace-deprecated-given-and-when-operators.patch) sha256sums=('229aa0e9f049efa760f3c2f6e61d9d539af43d8f764b50a6e03064b4729a35ff' - 'b81caa036aabe4bcb67ca2729854c8e1dabb62d17b17214e41c930937edbf488') + 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855') options=('!emptydirs') prepare() { diff --git a/phonon/PKGBUILD b/phonon/PKGBUILD index 1d63c5971d..eb82973303 100644 --- a/phonon/PKGBUILD +++ b/phonon/PKGBUILD @@ -6,7 +6,7 @@ pkgname=(phonon-qt5 phonon-qt6) pkgdesc='The multimedia framework by KDE' pkgver=4.12.0 -pkgrel=3 +pkgrel=4 arch=(loong64 x86_64) url='https://community.kde.org/Phonon' license=(LGPL) diff --git a/pipe-rename/PKGBUILD b/pipe-rename/PKGBUILD index 2e155b5c1d..9d7ffe78f3 100644 --- a/pipe-rename/PKGBUILD +++ b/pipe-rename/PKGBUILD @@ -29,7 +29,7 @@ pkgver() { prepare() { cd "$pkgname" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/pixman/83.patch b/pixman/83.patch new file mode 100644 index 0000000000..34c100bbe7 --- /dev/null +++ b/pixman/83.patch @@ -0,0 +1,11078 @@ +From 4e95946d115af4aa0a358af963982f9e0c32d39c Mon Sep 17 00:00:00 2001 +From: Song Ding +Date: Fri, 25 Aug 2023 16:13:27 +0800 +Subject: [PATCH] LoongArch: Add LoongArch SIMD support. Add LSX and + LASX optimizations. + +Benchmark results, before is upstream/master 47d3fbe38fc88085e644b737f3eff92865ebd65a, + +LSX build: ./autogen.sh --disable-lasx && make -j4 +LASX build: ./autogen.sh && make -j4 + +For example, the highest improvement is add_n_888. + +./tests/lowlevel-blt-bench add_n_888 + +before: add_n_8 = L1: 186.07 L2: 200.18 M:198.43 ( 1.41%) HT:161.37 VT:156.22 R:156.65 RT:103.67 ( 654Kops/s) +LSX: add_n_8 = L1:13782.81 L2:21067.23 M:14209.75 ( 98.95%) HT:1712.74 VT:3345.05 R:1661.89 RT:469.35 (2054Kops/s) +LASX: add_n_8 = L1:13034.63 L2:19725.46 M:16530.90 (117.71%) HT:1104.39 VT:2264.26 R:1077.33 RT:442.79 (2020Kops/s) + +./test/lowlevel-blt-bench all, 10 iterations: + +2.5 GHz LoongArch 3A5000, Linux, 64-bit, MEAN: + LSX LASX +L1 +336.97% +488.91% +L2 +340.57% +484.78% +M +307.29% +420.48% +HT +214.05% +225.17% +VT +201.28% +208.94% +R +202.48% +213.19% +RT +146.14% +140.95% +--- + meson.build | 54 + + meson_options.txt | 10 + + pixman/loongson_intrinsics.h | 2085 ++++++++++++++ + pixman/meson.build | 3 + + pixman/pixman-implementation.c | 1 + + pixman/pixman-lasx.c | 4887 ++++++++++++++++++++++++++++++++ + pixman/pixman-loongarch.c | 94 + + pixman/pixman-lsx.c | 3783 ++++++++++++++++++++++++ + pixman/pixman-private.h | 19 + + 9 files changed, 10936 insertions(+) + create mode 100644 pixman/loongson_intrinsics.h + create mode 100644 pixman/pixman-lasx.c + create mode 100644 pixman/pixman-loongarch.c + create mode 100644 pixman/pixman-lsx.c + +diff --git a/meson.build b/meson.build +index f822fb5..6f9eac4 100644 +--- a/meson.build ++++ b/meson.build +@@ -89,6 +89,60 @@ elif use_loongson_mmi.enabled() + error('Loongson MMI Support unavailable, but required') + endif + ++use_lsx = get_option('lsx') ++have_lsx = false ++lsx_flags = ['-mlsx'] ++if not use_lsx.disabled() ++ if host_machine.cpu_family() == 'loongarch64' and cc.compiles(''' ++ #ifndef __loongarch__ ++ #error "LSX is only available on LoongArch" ++ #endif ++ #include ++ int main () { ++ __m128i tmp0, tmp1; ++ tmp0 = __lsx_vadd_w(tmp0, tmp1); ++ return 0; ++ }''', ++ args : lsx_flags, ++ include_directories : include_directories('.'), ++ name : 'LoongArch LSX Intrinsic Support') ++ have_lsx = true ++ endif ++endif ++ ++if have_lsx ++ config.set10('USE_LOONGARCH_LSX', true) ++elif use_lsx.enabled() ++ error('LoongArch LSX Support unavailable, but required') ++endif ++ ++use_lasx = get_option('lasx') ++have_lasx = false ++lasx_flags = ['-mlasx'] ++if not use_lasx.disabled() ++ if host_machine.cpu_family() == 'loongarch64' and cc.compiles(''' ++ #ifndef __loongarch__ ++ #error "LASX is only available on LoongArch" ++ #endif ++ #include ++ int main () { ++ __m256i tmp0, tmp1; ++ tmp0 = __lasx_xvadd_w(tmp0, tmp1); ++ return 0; ++ }''', ++ args : lasx_flags, ++ include_directories : include_directories('.'), ++ name : 'LoongArch LASX Intrinsic Support') ++ have_lasx = true ++ endif ++endif ++ ++if have_lasx ++ config.set10('USE_LOONGARCH_LASX', true) ++elif use_lasx.enabled() ++ error('LoongArch LASX Support unavailable, but required') ++endif ++ + use_mmx = get_option('mmx') + have_mmx = false + mmx_flags = [] +diff --git a/meson_options.txt b/meson_options.txt +index df10889..05962be 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -23,6 +23,16 @@ option( + type : 'feature', + description : 'Use Loongson MMI intrinsic optimized paths', + ) ++option( ++ 'lsx', ++ type : 'feature', ++ description : 'Use LoongArch LSX intrinsic optimized paths', ++) ++option( ++ 'lasx', ++ type : 'feature', ++ description : 'Use LoongArch LASX intrinsic optimized paths', ++) + option( + 'mmx', + type : 'feature', +diff --git a/pixman/loongson_intrinsics.h b/pixman/loongson_intrinsics.h +new file mode 100644 +index 0000000..b692308 +--- /dev/null ++++ b/pixman/loongson_intrinsics.h +@@ -0,0 +1,2085 @@ ++/* ++ * Copyright (c) 2021 Loongson Technology Corporation Limited ++ * Contributed by Shiyou Yin ++ * Xiwei Gu ++ * Lu Wang ++ * ++ * Permission to use, copy, modify, and/or distribute this software for any ++ * purpose with or without fee is hereby granted, provided that the above ++ * copyright notice and this permission notice appear in all copies. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES ++ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ++ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ++ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF ++ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ++ * ++ */ ++ ++#ifndef LOONGSON_INTRINSICS_H ++#define LOONGSON_INTRINSICS_H ++ ++/* ++ * This file is a header file of LoongArch builtin extension and ++ * available under ISC license. It provides a large number of macros ++ * to simplify writing LSX and LASX builtin optimizations. ++ * ++ * Any one can modify it or add new features for his/her own purposes. ++ * Contributing a patch will be appreciated as it might be useful for ++ * others as well. Send patches to loongson contributor mentioned above. ++ * ++ * MAJOR version: Usage changes, incompatible with previous version. ++ * MINOR version: Add new macros/functions, or bug fixes. ++ * MICRO version: Comment changes or implementation changes. ++ */ ++#define LML_VERSION_MAJOR 1 ++#define LML_VERSION_MINOR 2 ++#define LML_VERSION_MICRO 2 ++ ++#define DUP2_ARG1(_INS, _IN0, _IN1, _OUT0, _OUT1) \ ++ { \ ++ _OUT0 = _INS(_IN0); \ ++ _OUT1 = _INS(_IN1); \ ++ } ++ ++#define DUP2_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1) \ ++ { \ ++ _OUT0 = _INS(_IN0, _IN1); \ ++ _OUT1 = _INS(_IN2, _IN3); \ ++ } ++ ++#define DUP2_ARG3(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _OUT0, _OUT1) \ ++ { \ ++ _OUT0 = _INS(_IN0, _IN1, _IN2); \ ++ _OUT1 = _INS(_IN3, _IN4, _IN5); \ ++ } ++ ++#define DUP4_ARG1(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1, _OUT2, _OUT3) \ ++ { \ ++ DUP2_ARG1(_INS, _IN0, _IN1, _OUT0, _OUT1); \ ++ DUP2_ARG1(_INS, _IN2, _IN3, _OUT2, _OUT3); \ ++ } ++ ++#define DUP4_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _IN6, _IN7, _OUT0, \ ++ _OUT1, _OUT2, _OUT3) \ ++ { \ ++ DUP2_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1); \ ++ DUP2_ARG2(_INS, _IN4, _IN5, _IN6, _IN7, _OUT2, _OUT3); \ ++ } ++ ++#define DUP4_ARG3(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _IN6, _IN7, _IN8, \ ++ _IN9, _IN10, _IN11, _OUT0, _OUT1, _OUT2, _OUT3) \ ++ { \ ++ DUP2_ARG3(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _OUT0, _OUT1); \ ++ DUP2_ARG3(_INS, _IN6, _IN7, _IN8, _IN9, _IN10, _IN11, _OUT2, _OUT3); \ ++ } ++ ++#ifdef __loongarch_sx ++#include ++/* ++ * ============================================================================= ++ * Description : Dot product & addition of byte vector elements ++ * Arguments : Inputs - in_c, in_h, in_l ++ * Outputs - out ++ * Return Type - halfword ++ * Details : Signed byte elements from in_h are multiplied by ++ * signed byte elements from in_l, and then added adjacent to ++ * each other to get a result twice the size of input. Then ++ * the results are added to signed half-word elements from in_c. ++ * Example : out = __lsx_vdp2add_h_b(in_c, in_h, in_l) ++ * in_c : 1,2,3,4, 1,2,3,4 ++ * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 ++ * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 ++ * out : 23,40,41,26, 23,40,41,26 ++ * ============================================================================= ++ */ ++static inline __m128i __lsx_vdp2add_h_b(__m128i in_c, __m128i in_h, ++ __m128i in_l) { ++ __m128i out; ++ ++ out = __lsx_vmaddwev_h_b(in_c, in_h, in_l); ++ out = __lsx_vmaddwod_h_b(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Dot product & addition of byte vector elements ++ * Arguments : Inputs - in_c, in_h, in_l ++ * Outputs - out ++ * Return Type - halfword ++ * Details : Unsigned byte elements from in_h are multiplied by ++ * unsigned byte elements from in_l, and then added adjacent to ++ * each other to get a result twice the size of input. ++ * The results are added to signed half-word elements from in_c. ++ * Example : out = __lsx_vdp2add_h_bu(in_c, in_h, in_l) ++ * in_c : 1,2,3,4, 1,2,3,4 ++ * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 ++ * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 ++ * out : 23,40,41,26, 23,40,41,26 ++ * ============================================================================= ++ */ ++static inline __m128i __lsx_vdp2add_h_bu(__m128i in_c, __m128i in_h, ++ __m128i in_l) { ++ __m128i out; ++ ++ out = __lsx_vmaddwev_h_bu(in_c, in_h, in_l); ++ out = __lsx_vmaddwod_h_bu(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Dot product & addition of byte vector elements ++ * Arguments : Inputs - in_c, in_h, in_l ++ * Outputs - out ++ * Return Type - halfword ++ * Details : Unsigned byte elements from in_h are multiplied by ++ * signed byte elements from in_l, and then added adjacent to ++ * each other to get a result twice the size of input. ++ * The results are added to signed half-word elements from in_c. ++ * Example : out = __lsx_vdp2add_h_bu_b(in_c, in_h, in_l) ++ * in_c : 1,1,1,1, 1,1,1,1 ++ * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 ++ * in_l : -1,-2,-3,-4, -5,-6,-7,-8, 1,2,3,4, 5,6,7,8 ++ * out : -4,-24,-60,-112, 6,26,62,114 ++ * ============================================================================= ++ */ ++static inline __m128i __lsx_vdp2add_h_bu_b(__m128i in_c, __m128i in_h, ++ __m128i in_l) { ++ __m128i out; ++ ++ out = __lsx_vmaddwev_h_bu_b(in_c, in_h, in_l); ++ out = __lsx_vmaddwod_h_bu_b(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Dot product & addition of half-word vector elements ++ * Arguments : Inputs - in_c, in_h, in_l ++ * Outputs - out ++ * Return Type - __m128i ++ * Details : Signed half-word elements from in_h are multiplied by ++ * signed half-word elements from in_l, and then added adjacent to ++ * each other to get a result twice the size of input. ++ * Then the results are added to signed word elements from in_c. ++ * Example : out = __lsx_vdp2add_h_b(in_c, in_h, in_l) ++ * in_c : 1,2,3,4 ++ * in_h : 1,2,3,4, 5,6,7,8 ++ * in_l : 8,7,6,5, 4,3,2,1 ++ * out : 23,40,41,26 ++ * ============================================================================= ++ */ ++static inline __m128i __lsx_vdp2add_w_h(__m128i in_c, __m128i in_h, ++ __m128i in_l) { ++ __m128i out; ++ ++ out = __lsx_vmaddwev_w_h(in_c, in_h, in_l); ++ out = __lsx_vmaddwod_w_h(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Dot product of byte vector elements ++ * Arguments : Inputs - in_h, in_l ++ * Outputs - out ++ * Return Type - halfword ++ * Details : Signed byte elements from in_h are multiplied by ++ * signed byte elements from in_l, and then added adjacent to ++ * each other to get a result twice the size of input. ++ * Example : out = __lsx_vdp2_h_b(in_h, in_l) ++ * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 ++ * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 ++ * out : 22,38,38,22, 22,38,38,22 ++ * ============================================================================= ++ */ ++static inline __m128i __lsx_vdp2_h_b(__m128i in_h, __m128i in_l) { ++ __m128i out; ++ ++ out = __lsx_vmulwev_h_b(in_h, in_l); ++ out = __lsx_vmaddwod_h_b(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Dot product of byte vector elements ++ * Arguments : Inputs - in_h, in_l ++ * Outputs - out ++ * Return Type - halfword ++ * Details : Unsigned byte elements from in_h are multiplied by ++ * unsigned byte elements from in_l, and then added adjacent to ++ * each other to get a result twice the size of input. ++ * Example : out = __lsx_vdp2_h_bu(in_h, in_l) ++ * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 ++ * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 ++ * out : 22,38,38,22, 22,38,38,22 ++ * ============================================================================= ++ */ ++static inline __m128i __lsx_vdp2_h_bu(__m128i in_h, __m128i in_l) { ++ __m128i out; ++ ++ out = __lsx_vmulwev_h_bu(in_h, in_l); ++ out = __lsx_vmaddwod_h_bu(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Dot product of byte vector elements ++ * Arguments : Inputs - in_h, in_l ++ * Outputs - out ++ * Return Type - halfword ++ * Details : Unsigned byte elements from in_h are multiplied by ++ * signed byte elements from in_l, and then added adjacent to ++ * each other to get a result twice the size of input. ++ * Example : out = __lsx_vdp2_h_bu_b(in_h, in_l) ++ * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 ++ * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,-1 ++ * out : 22,38,38,22, 22,38,38,6 ++ * ============================================================================= ++ */ ++static inline __m128i __lsx_vdp2_h_bu_b(__m128i in_h, __m128i in_l) { ++ __m128i out; ++ ++ out = __lsx_vmulwev_h_bu_b(in_h, in_l); ++ out = __lsx_vmaddwod_h_bu_b(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Dot product of byte vector elements ++ * Arguments : Inputs - in_h, in_l ++ * Outputs - out ++ * Return Type - halfword ++ * Details : Signed byte elements from in_h are multiplied by ++ * signed byte elements from in_l, and then added adjacent to ++ * each other to get a result twice the size of input. ++ * Example : out = __lsx_vdp2_w_h(in_h, in_l) ++ * in_h : 1,2,3,4, 5,6,7,8 ++ * in_l : 8,7,6,5, 4,3,2,1 ++ * out : 22,38,38,22 ++ * ============================================================================= ++ */ ++static inline __m128i __lsx_vdp2_w_h(__m128i in_h, __m128i in_l) { ++ __m128i out; ++ ++ out = __lsx_vmulwev_w_h(in_h, in_l); ++ out = __lsx_vmaddwod_w_h(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Dot product of byte vector elements ++ * Arguments : Inputs - in_h, in_l ++ * Outputs - out ++ * Return Type - double ++ * Details : Signed byte elements from in_h are multiplied by ++ * signed byte elements from in_l, and then added adjacent to ++ * each other to get a result twice the size of input. ++ * Example : out = __lsx_vdp2_d_w(in_h, in_l) ++ * in_h : 1,2,3,4 ++ * in_l : 8,7,6,5 ++ * out : 22,38 ++ * ============================================================================= ++ */ ++static inline __m128i __lsx_vdp2_d_w(__m128i in_h, __m128i in_l) { ++ __m128i out; ++ ++ out = __lsx_vmulwev_d_w(in_h, in_l); ++ out = __lsx_vmaddwod_d_w(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Clip all halfword elements of input vector between min & max ++ * out = ((_in) < (min)) ? (min) : (((_in) > (max)) ? (max) : ++ * (_in)) ++ * Arguments : Inputs - _in (input vector) ++ * - min (min threshold) ++ * - max (max threshold) ++ * Outputs - out (output vector with clipped elements) ++ * Return Type - signed halfword ++ * Example : out = __lsx_vclip_h(_in) ++ * _in : -8,2,280,249, -8,255,280,249 ++ * min : 1,1,1,1, 1,1,1,1 ++ * max : 9,9,9,9, 9,9,9,9 ++ * out : 1,2,9,9, 1,9,9,9 ++ * ============================================================================= ++ */ ++static inline __m128i __lsx_vclip_h(__m128i _in, __m128i min, __m128i max) { ++ __m128i out; ++ ++ out = __lsx_vmax_h(min, _in); ++ out = __lsx_vmin_h(max, out); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Set each element of vector between 0 and 255 ++ * Arguments : Inputs - _in ++ * Outputs - out ++ * Return Type - halfword ++ * Details : Signed byte elements from _in are clamped between 0 and 255. ++ * Example : out = __lsx_vclip255_h(_in) ++ * _in : -8,255,280,249, -8,255,280,249 ++ * out : 0,255,255,249, 0,255,255,249 ++ * ============================================================================= ++ */ ++static inline __m128i __lsx_vclip255_h(__m128i _in) { ++ __m128i out; ++ ++ out = __lsx_vmaxi_h(_in, 0); ++ out = __lsx_vsat_hu(out, 7); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Set each element of vector between 0 and 255 ++ * Arguments : Inputs - _in ++ * Outputs - out ++ * Return Type - word ++ * Details : Signed byte elements from _in are clamped between 0 and 255. ++ * Example : out = __lsx_vclip255_w(_in) ++ * _in : -8,255,280,249 ++ * out : 0,255,255,249 ++ * ============================================================================= ++ */ ++static inline __m128i __lsx_vclip255_w(__m128i _in) { ++ __m128i out; ++ ++ out = __lsx_vmaxi_w(_in, 0); ++ out = __lsx_vsat_wu(out, 7); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Swap two variables ++ * Arguments : Inputs - _in0, _in1 ++ * Outputs - _in0, _in1 (in-place) ++ * Details : Swapping of two input variables using xor ++ * Example : LSX_SWAP(_in0, _in1) ++ * _in0 : 1,2,3,4 ++ * _in1 : 5,6,7,8 ++ * _in0(out) : 5,6,7,8 ++ * _in1(out) : 1,2,3,4 ++ * ============================================================================= ++ */ ++#define LSX_SWAP(_in0, _in1) \ ++ { \ ++ _in0 = __lsx_vxor_v(_in0, _in1); \ ++ _in1 = __lsx_vxor_v(_in0, _in1); \ ++ _in0 = __lsx_vxor_v(_in0, _in1); \ ++ } ++ ++/* ++ * ============================================================================= ++ * Description : Transpose 4x4 block with word elements in vectors ++ * Arguments : Inputs - in0, in1, in2, in3 ++ * Outputs - out0, out1, out2, out3 ++ * Details : ++ * Example : ++ * 1, 2, 3, 4 1, 5, 9,13 ++ * 5, 6, 7, 8 to 2, 6,10,14 ++ * 9,10,11,12 =====> 3, 7,11,15 ++ * 13,14,15,16 4, 8,12,16 ++ * ============================================================================= ++ */ ++#define LSX_TRANSPOSE4x4_W(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \ ++ { \ ++ __m128i _t0, _t1, _t2, _t3; \ ++ \ ++ _t0 = __lsx_vilvl_w(_in1, _in0); \ ++ _t1 = __lsx_vilvh_w(_in1, _in0); \ ++ _t2 = __lsx_vilvl_w(_in3, _in2); \ ++ _t3 = __lsx_vilvh_w(_in3, _in2); \ ++ _out0 = __lsx_vilvl_d(_t2, _t0); \ ++ _out1 = __lsx_vilvh_d(_t2, _t0); \ ++ _out2 = __lsx_vilvl_d(_t3, _t1); \ ++ _out3 = __lsx_vilvh_d(_t3, _t1); \ ++ } ++ ++/* ++ * ============================================================================= ++ * Description : Transpose 8x8 block with byte elements in vectors ++ * Arguments : Inputs - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7 ++ * Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6, ++ * _out7 ++ * Details : The rows of the matrix become columns, and the columns ++ * become rows. ++ * Example : LSX_TRANSPOSE8x8_B ++ * _in0 : 00,01,02,03,04,05,06,07, 00,00,00,00,00,00,00,00 ++ * _in1 : 10,11,12,13,14,15,16,17, 00,00,00,00,00,00,00,00 ++ * _in2 : 20,21,22,23,24,25,26,27, 00,00,00,00,00,00,00,00 ++ * _in3 : 30,31,32,33,34,35,36,37, 00,00,00,00,00,00,00,00 ++ * _in4 : 40,41,42,43,44,45,46,47, 00,00,00,00,00,00,00,00 ++ * _in5 : 50,51,52,53,54,55,56,57, 00,00,00,00,00,00,00,00 ++ * _in6 : 60,61,62,63,64,65,66,67, 00,00,00,00,00,00,00,00 ++ * _in7 : 70,71,72,73,74,75,76,77, 00,00,00,00,00,00,00,00 ++ * ++ * _ out0 : 00,10,20,30,40,50,60,70, 00,00,00,00,00,00,00,00 ++ * _ out1 : 01,11,21,31,41,51,61,71, 00,00,00,00,00,00,00,00 ++ * _ out2 : 02,12,22,32,42,52,62,72, 00,00,00,00,00,00,00,00 ++ * _ out3 : 03,13,23,33,43,53,63,73, 00,00,00,00,00,00,00,00 ++ * _ out4 : 04,14,24,34,44,54,64,74, 00,00,00,00,00,00,00,00 ++ * _ out5 : 05,15,25,35,45,55,65,75, 00,00,00,00,00,00,00,00 ++ * _ out6 : 06,16,26,36,46,56,66,76, 00,00,00,00,00,00,00,00 ++ * _ out7 : 07,17,27,37,47,57,67,77, 00,00,00,00,00,00,00,00 ++ * ============================================================================= ++ */ ++#define LSX_TRANSPOSE8x8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ ++ _out7) \ ++ { \ ++ __m128i zero = { 0 }; \ ++ __m128i shuf8 = { 0x0F0E0D0C0B0A0908, 0x1716151413121110 }; \ ++ __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; \ ++ \ ++ _t0 = __lsx_vilvl_b(_in2, _in0); \ ++ _t1 = __lsx_vilvl_b(_in3, _in1); \ ++ _t2 = __lsx_vilvl_b(_in6, _in4); \ ++ _t3 = __lsx_vilvl_b(_in7, _in5); \ ++ _t4 = __lsx_vilvl_b(_t1, _t0); \ ++ _t5 = __lsx_vilvh_b(_t1, _t0); \ ++ _t6 = __lsx_vilvl_b(_t3, _t2); \ ++ _t7 = __lsx_vilvh_b(_t3, _t2); \ ++ _out0 = __lsx_vilvl_w(_t6, _t4); \ ++ _out2 = __lsx_vilvh_w(_t6, _t4); \ ++ _out4 = __lsx_vilvl_w(_t7, _t5); \ ++ _out6 = __lsx_vilvh_w(_t7, _t5); \ ++ _out1 = __lsx_vshuf_b(zero, _out0, shuf8); \ ++ _out3 = __lsx_vshuf_b(zero, _out2, shuf8); \ ++ _out5 = __lsx_vshuf_b(zero, _out4, shuf8); \ ++ _out7 = __lsx_vshuf_b(zero, _out6, shuf8); \ ++ } ++ ++/* ++ * ============================================================================= ++ * Description : Transpose 8x8 block with half-word elements in vectors ++ * Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 ++ * Outputs - out0, out1, out2, out3, out4, out5, out6, out7 ++ * Details : ++ * Example : ++ * 00,01,02,03,04,05,06,07 00,10,20,30,40,50,60,70 ++ * 10,11,12,13,14,15,16,17 01,11,21,31,41,51,61,71 ++ * 20,21,22,23,24,25,26,27 02,12,22,32,42,52,62,72 ++ * 30,31,32,33,34,35,36,37 to 03,13,23,33,43,53,63,73 ++ * 40,41,42,43,44,45,46,47 ======> 04,14,24,34,44,54,64,74 ++ * 50,51,52,53,54,55,56,57 05,15,25,35,45,55,65,75 ++ * 60,61,62,63,64,65,66,67 06,16,26,36,46,56,66,76 ++ * 70,71,72,73,74,75,76,77 07,17,27,37,47,57,67,77 ++ * ============================================================================= ++ */ ++#define LSX_TRANSPOSE8x8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ ++ _out7) \ ++ { \ ++ __m128i _s0, _s1, _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; \ ++ \ ++ _s0 = __lsx_vilvl_h(_in6, _in4); \ ++ _s1 = __lsx_vilvl_h(_in7, _in5); \ ++ _t0 = __lsx_vilvl_h(_s1, _s0); \ ++ _t1 = __lsx_vilvh_h(_s1, _s0); \ ++ _s0 = __lsx_vilvh_h(_in6, _in4); \ ++ _s1 = __lsx_vilvh_h(_in7, _in5); \ ++ _t2 = __lsx_vilvl_h(_s1, _s0); \ ++ _t3 = __lsx_vilvh_h(_s1, _s0); \ ++ _s0 = __lsx_vilvl_h(_in2, _in0); \ ++ _s1 = __lsx_vilvl_h(_in3, _in1); \ ++ _t4 = __lsx_vilvl_h(_s1, _s0); \ ++ _t5 = __lsx_vilvh_h(_s1, _s0); \ ++ _s0 = __lsx_vilvh_h(_in2, _in0); \ ++ _s1 = __lsx_vilvh_h(_in3, _in1); \ ++ _t6 = __lsx_vilvl_h(_s1, _s0); \ ++ _t7 = __lsx_vilvh_h(_s1, _s0); \ ++ \ ++ _out0 = __lsx_vpickev_d(_t0, _t4); \ ++ _out2 = __lsx_vpickev_d(_t1, _t5); \ ++ _out4 = __lsx_vpickev_d(_t2, _t6); \ ++ _out6 = __lsx_vpickev_d(_t3, _t7); \ ++ _out1 = __lsx_vpickod_d(_t0, _t4); \ ++ _out3 = __lsx_vpickod_d(_t1, _t5); \ ++ _out5 = __lsx_vpickod_d(_t2, _t6); \ ++ _out7 = __lsx_vpickod_d(_t3, _t7); \ ++ } ++ ++/* ++ * ============================================================================= ++ * Description : Transpose input 8x4 byte block into 4x8 ++ * Arguments : Inputs - _in0, _in1, _in2, _in3 (input 8x4 byte block) ++ * Outputs - _out0, _out1, _out2, _out3 (output 4x8 byte block) ++ * Return Type - as per RTYPE ++ * Details : The rows of the matrix become columns, and the columns become ++ * rows. ++ * Example : LSX_TRANSPOSE8x4_B ++ * _in0 : 00,01,02,03,00,00,00,00, 00,00,00,00,00,00,00,00 ++ * _in1 : 10,11,12,13,00,00,00,00, 00,00,00,00,00,00,00,00 ++ * _in2 : 20,21,22,23,00,00,00,00, 00,00,00,00,00,00,00,00 ++ * _in3 : 30,31,32,33,00,00,00,00, 00,00,00,00,00,00,00,00 ++ * _in4 : 40,41,42,43,00,00,00,00, 00,00,00,00,00,00,00,00 ++ * _in5 : 50,51,52,53,00,00,00,00, 00,00,00,00,00,00,00,00 ++ * _in6 : 60,61,62,63,00,00,00,00, 00,00,00,00,00,00,00,00 ++ * _in7 : 70,71,72,73,00,00,00,00, 00,00,00,00,00,00,00,00 ++ * ++ * _out0 : 00,10,20,30,40,50,60,70, 00,00,00,00,00,00,00,00 ++ * _out1 : 01,11,21,31,41,51,61,71, 00,00,00,00,00,00,00,00 ++ * _out2 : 02,12,22,32,42,52,62,72, 00,00,00,00,00,00,00,00 ++ * _out3 : 03,13,23,33,43,53,63,73, 00,00,00,00,00,00,00,00 ++ * ============================================================================= ++ */ ++#define LSX_TRANSPOSE8x4_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _out0, _out1, _out2, _out3) \ ++ { \ ++ __m128i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m; \ ++ \ ++ _tmp0_m = __lsx_vpackev_w(_in4, _in0); \ ++ _tmp1_m = __lsx_vpackev_w(_in5, _in1); \ ++ _tmp2_m = __lsx_vilvl_b(_tmp1_m, _tmp0_m); \ ++ _tmp0_m = __lsx_vpackev_w(_in6, _in2); \ ++ _tmp1_m = __lsx_vpackev_w(_in7, _in3); \ ++ \ ++ _tmp3_m = __lsx_vilvl_b(_tmp1_m, _tmp0_m); \ ++ _tmp0_m = __lsx_vilvl_h(_tmp3_m, _tmp2_m); \ ++ _tmp1_m = __lsx_vilvh_h(_tmp3_m, _tmp2_m); \ ++ \ ++ _out0 = __lsx_vilvl_w(_tmp1_m, _tmp0_m); \ ++ _out2 = __lsx_vilvh_w(_tmp1_m, _tmp0_m); \ ++ _out1 = __lsx_vilvh_d(_out2, _out0); \ ++ _out3 = __lsx_vilvh_d(_out0, _out2); \ ++ } ++ ++/* ++ * ============================================================================= ++ * Description : Transpose 16x8 block with byte elements in vectors ++ * Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, in8 ++ * in9, in10, in11, in12, in13, in14, in15 ++ * Outputs - out0, out1, out2, out3, out4, out5, out6, out7 ++ * Details : ++ * Example : ++ * 000,001,002,003,004,005,006,007 ++ * 008,009,010,011,012,013,014,015 ++ * 016,017,018,019,020,021,022,023 ++ * 024,025,026,027,028,029,030,031 ++ * 032,033,034,035,036,037,038,039 ++ * 040,041,042,043,044,045,046,047 000,008,...,112,120 ++ * 048,049,050,051,052,053,054,055 001,009,...,113,121 ++ * 056,057,058,059,060,061,062,063 to 002,010,...,114,122 ++ * 064,068,066,067,068,069,070,071 =====> 003,011,...,115,123 ++ * 072,073,074,075,076,077,078,079 004,012,...,116,124 ++ * 080,081,082,083,084,085,086,087 005,013,...,117,125 ++ * 088,089,090,091,092,093,094,095 006,014,...,118,126 ++ * 096,097,098,099,100,101,102,103 007,015,...,119,127 ++ * 104,105,106,107,108,109,110,111 ++ * 112,113,114,115,116,117,118,119 ++ * 120,121,122,123,124,125,126,127 ++ * ============================================================================= ++ */ ++#define LSX_TRANSPOSE16x8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _in8, _in9, _in10, _in11, _in12, _in13, _in14, \ ++ _in15, _out0, _out1, _out2, _out3, _out4, _out5, \ ++ _out6, _out7) \ ++ { \ ++ __m128i _tmp0, _tmp1, _tmp2, _tmp3, _tmp4, _tmp5, _tmp6, _tmp7; \ ++ __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; \ ++ DUP4_ARG2(__lsx_vilvl_b, _in2, _in0, _in3, _in1, _in6, _in4, _in7, _in5, \ ++ _tmp0, _tmp1, _tmp2, _tmp3); \ ++ DUP4_ARG2(__lsx_vilvl_b, _in10, _in8, _in11, _in9, _in14, _in12, _in15, \ ++ _in13, _tmp4, _tmp5, _tmp6, _tmp7); \ ++ DUP2_ARG2(__lsx_vilvl_b, _tmp1, _tmp0, _tmp3, _tmp2, _t0, _t2); \ ++ DUP2_ARG2(__lsx_vilvh_b, _tmp1, _tmp0, _tmp3, _tmp2, _t1, _t3); \ ++ DUP2_ARG2(__lsx_vilvl_b, _tmp5, _tmp4, _tmp7, _tmp6, _t4, _t6); \ ++ DUP2_ARG2(__lsx_vilvh_b, _tmp5, _tmp4, _tmp7, _tmp6, _t5, _t7); \ ++ DUP2_ARG2(__lsx_vilvl_w, _t2, _t0, _t3, _t1, _tmp0, _tmp4); \ ++ DUP2_ARG2(__lsx_vilvh_w, _t2, _t0, _t3, _t1, _tmp2, _tmp6); \ ++ DUP2_ARG2(__lsx_vilvl_w, _t6, _t4, _t7, _t5, _tmp1, _tmp5); \ ++ DUP2_ARG2(__lsx_vilvh_w, _t6, _t4, _t7, _t5, _tmp3, _tmp7); \ ++ DUP2_ARG2(__lsx_vilvl_d, _tmp1, _tmp0, _tmp3, _tmp2, _out0, _out2); \ ++ DUP2_ARG2(__lsx_vilvh_d, _tmp1, _tmp0, _tmp3, _tmp2, _out1, _out3); \ ++ DUP2_ARG2(__lsx_vilvl_d, _tmp5, _tmp4, _tmp7, _tmp6, _out4, _out6); \ ++ DUP2_ARG2(__lsx_vilvh_d, _tmp5, _tmp4, _tmp7, _tmp6, _out5, _out7); \ ++ } ++ ++/* ++ * ============================================================================= ++ * Description : Butterfly of 4 input vectors ++ * Arguments : Inputs - in0, in1, in2, in3 ++ * Outputs - out0, out1, out2, out3 ++ * Details : Butterfly operation ++ * Example : ++ * out0 = in0 + in3; ++ * out1 = in1 + in2; ++ * out2 = in1 - in2; ++ * out3 = in0 - in3; ++ * ============================================================================= ++ */ ++#define LSX_BUTTERFLY_4_B(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \ ++ { \ ++ _out0 = __lsx_vadd_b(_in0, _in3); \ ++ _out1 = __lsx_vadd_b(_in1, _in2); \ ++ _out2 = __lsx_vsub_b(_in1, _in2); \ ++ _out3 = __lsx_vsub_b(_in0, _in3); \ ++ } ++#define LSX_BUTTERFLY_4_H(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \ ++ { \ ++ _out0 = __lsx_vadd_h(_in0, _in3); \ ++ _out1 = __lsx_vadd_h(_in1, _in2); \ ++ _out2 = __lsx_vsub_h(_in1, _in2); \ ++ _out3 = __lsx_vsub_h(_in0, _in3); \ ++ } ++#define LSX_BUTTERFLY_4_W(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \ ++ { \ ++ _out0 = __lsx_vadd_w(_in0, _in3); \ ++ _out1 = __lsx_vadd_w(_in1, _in2); \ ++ _out2 = __lsx_vsub_w(_in1, _in2); \ ++ _out3 = __lsx_vsub_w(_in0, _in3); \ ++ } ++#define LSX_BUTTERFLY_4_D(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \ ++ { \ ++ _out0 = __lsx_vadd_d(_in0, _in3); \ ++ _out1 = __lsx_vadd_d(_in1, _in2); \ ++ _out2 = __lsx_vsub_d(_in1, _in2); \ ++ _out3 = __lsx_vsub_d(_in0, _in3); \ ++ } ++ ++/* ++ * ============================================================================= ++ * Description : Butterfly of 8 input vectors ++ * Arguments : Inputs - _in0, _in1, _in2, _in3, ~ ++ * Outputs - _out0, _out1, _out2, _out3, ~ ++ * Details : Butterfly operation ++ * Example : ++ * _out0 = _in0 + _in7; ++ * _out1 = _in1 + _in6; ++ * _out2 = _in2 + _in5; ++ * _out3 = _in3 + _in4; ++ * _out4 = _in3 - _in4; ++ * _out5 = _in2 - _in5; ++ * _out6 = _in1 - _in6; ++ * _out7 = _in0 - _in7; ++ * ============================================================================= ++ */ ++#define LSX_BUTTERFLY_8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ ++ _out7) \ ++ { \ ++ _out0 = __lsx_vadd_b(_in0, _in7); \ ++ _out1 = __lsx_vadd_b(_in1, _in6); \ ++ _out2 = __lsx_vadd_b(_in2, _in5); \ ++ _out3 = __lsx_vadd_b(_in3, _in4); \ ++ _out4 = __lsx_vsub_b(_in3, _in4); \ ++ _out5 = __lsx_vsub_b(_in2, _in5); \ ++ _out6 = __lsx_vsub_b(_in1, _in6); \ ++ _out7 = __lsx_vsub_b(_in0, _in7); \ ++ } ++ ++#define LSX_BUTTERFLY_8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ ++ _out7) \ ++ { \ ++ _out0 = __lsx_vadd_h(_in0, _in7); \ ++ _out1 = __lsx_vadd_h(_in1, _in6); \ ++ _out2 = __lsx_vadd_h(_in2, _in5); \ ++ _out3 = __lsx_vadd_h(_in3, _in4); \ ++ _out4 = __lsx_vsub_h(_in3, _in4); \ ++ _out5 = __lsx_vsub_h(_in2, _in5); \ ++ _out6 = __lsx_vsub_h(_in1, _in6); \ ++ _out7 = __lsx_vsub_h(_in0, _in7); \ ++ } ++ ++#define LSX_BUTTERFLY_8_W(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ ++ _out7) \ ++ { \ ++ _out0 = __lsx_vadd_w(_in0, _in7); \ ++ _out1 = __lsx_vadd_w(_in1, _in6); \ ++ _out2 = __lsx_vadd_w(_in2, _in5); \ ++ _out3 = __lsx_vadd_w(_in3, _in4); \ ++ _out4 = __lsx_vsub_w(_in3, _in4); \ ++ _out5 = __lsx_vsub_w(_in2, _in5); \ ++ _out6 = __lsx_vsub_w(_in1, _in6); \ ++ _out7 = __lsx_vsub_w(_in0, _in7); \ ++ } ++ ++#define LSX_BUTTERFLY_8_D(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ ++ _out7) \ ++ { \ ++ _out0 = __lsx_vadd_d(_in0, _in7); \ ++ _out1 = __lsx_vadd_d(_in1, _in6); \ ++ _out2 = __lsx_vadd_d(_in2, _in5); \ ++ _out3 = __lsx_vadd_d(_in3, _in4); \ ++ _out4 = __lsx_vsub_d(_in3, _in4); \ ++ _out5 = __lsx_vsub_d(_in2, _in5); \ ++ _out6 = __lsx_vsub_d(_in1, _in6); \ ++ _out7 = __lsx_vsub_d(_in0, _in7); \ ++ } ++ ++/* ++ * ============================================================================= ++ * Description : Butterfly of 16 input vectors ++ * Arguments : Inputs - _in0, _in1, _in2, _in3, ~ ++ * Outputs - _out0, _out1, _out2, _out3, ~ ++ * Details : Butterfly operation ++ * Example : ++ * _out0 = _in0 + _in15; ++ * _out1 = _in1 + _in14; ++ * _out2 = _in2 + _in13; ++ * _out3 = _in3 + _in12; ++ * _out4 = _in4 + _in11; ++ * _out5 = _in5 + _in10; ++ * _out6 = _in6 + _in9; ++ * _out7 = _in7 + _in8; ++ * _out8 = _in7 - _in8; ++ * _out9 = _in6 - _in9; ++ * _out10 = _in5 - _in10; ++ * _out11 = _in4 - _in11; ++ * _out12 = _in3 - _in12; ++ * _out13 = _in2 - _in13; ++ * _out14 = _in1 - _in14; ++ * _out15 = _in0 - _in15; ++ * ============================================================================= ++ */ ++ ++#define LSX_BUTTERFLY_16_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _in8, _in9, _in10, _in11, _in12, _in13, _in14, \ ++ _in15, _out0, _out1, _out2, _out3, _out4, _out5, \ ++ _out6, _out7, _out8, _out9, _out10, _out11, _out12, \ ++ _out13, _out14, _out15) \ ++ { \ ++ _out0 = __lsx_vadd_b(_in0, _in15); \ ++ _out1 = __lsx_vadd_b(_in1, _in14); \ ++ _out2 = __lsx_vadd_b(_in2, _in13); \ ++ _out3 = __lsx_vadd_b(_in3, _in12); \ ++ _out4 = __lsx_vadd_b(_in4, _in11); \ ++ _out5 = __lsx_vadd_b(_in5, _in10); \ ++ _out6 = __lsx_vadd_b(_in6, _in9); \ ++ _out7 = __lsx_vadd_b(_in7, _in8); \ ++ \ ++ _out8 = __lsx_vsub_b(_in7, _in8); \ ++ _out9 = __lsx_vsub_b(_in6, _in9); \ ++ _out10 = __lsx_vsub_b(_in5, _in10); \ ++ _out11 = __lsx_vsub_b(_in4, _in11); \ ++ _out12 = __lsx_vsub_b(_in3, _in12); \ ++ _out13 = __lsx_vsub_b(_in2, _in13); \ ++ _out14 = __lsx_vsub_b(_in1, _in14); \ ++ _out15 = __lsx_vsub_b(_in0, _in15); \ ++ } ++ ++#define LSX_BUTTERFLY_16_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _in8, _in9, _in10, _in11, _in12, _in13, _in14, \ ++ _in15, _out0, _out1, _out2, _out3, _out4, _out5, \ ++ _out6, _out7, _out8, _out9, _out10, _out11, _out12, \ ++ _out13, _out14, _out15) \ ++ { \ ++ _out0 = __lsx_vadd_h(_in0, _in15); \ ++ _out1 = __lsx_vadd_h(_in1, _in14); \ ++ _out2 = __lsx_vadd_h(_in2, _in13); \ ++ _out3 = __lsx_vadd_h(_in3, _in12); \ ++ _out4 = __lsx_vadd_h(_in4, _in11); \ ++ _out5 = __lsx_vadd_h(_in5, _in10); \ ++ _out6 = __lsx_vadd_h(_in6, _in9); \ ++ _out7 = __lsx_vadd_h(_in7, _in8); \ ++ \ ++ _out8 = __lsx_vsub_h(_in7, _in8); \ ++ _out9 = __lsx_vsub_h(_in6, _in9); \ ++ _out10 = __lsx_vsub_h(_in5, _in10); \ ++ _out11 = __lsx_vsub_h(_in4, _in11); \ ++ _out12 = __lsx_vsub_h(_in3, _in12); \ ++ _out13 = __lsx_vsub_h(_in2, _in13); \ ++ _out14 = __lsx_vsub_h(_in1, _in14); \ ++ _out15 = __lsx_vsub_h(_in0, _in15); \ ++ } ++ ++#define LSX_BUTTERFLY_16_W(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _in8, _in9, _in10, _in11, _in12, _in13, _in14, \ ++ _in15, _out0, _out1, _out2, _out3, _out4, _out5, \ ++ _out6, _out7, _out8, _out9, _out10, _out11, _out12, \ ++ _out13, _out14, _out15) \ ++ { \ ++ _out0 = __lsx_vadd_w(_in0, _in15); \ ++ _out1 = __lsx_vadd_w(_in1, _in14); \ ++ _out2 = __lsx_vadd_w(_in2, _in13); \ ++ _out3 = __lsx_vadd_w(_in3, _in12); \ ++ _out4 = __lsx_vadd_w(_in4, _in11); \ ++ _out5 = __lsx_vadd_w(_in5, _in10); \ ++ _out6 = __lsx_vadd_w(_in6, _in9); \ ++ _out7 = __lsx_vadd_w(_in7, _in8); \ ++ \ ++ _out8 = __lsx_vsub_w(_in7, _in8); \ ++ _out9 = __lsx_vsub_w(_in6, _in9); \ ++ _out10 = __lsx_vsub_w(_in5, _in10); \ ++ _out11 = __lsx_vsub_w(_in4, _in11); \ ++ _out12 = __lsx_vsub_w(_in3, _in12); \ ++ _out13 = __lsx_vsub_w(_in2, _in13); \ ++ _out14 = __lsx_vsub_w(_in1, _in14); \ ++ _out15 = __lsx_vsub_w(_in0, _in15); \ ++ } ++ ++#define LSX_BUTTERFLY_16_D(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _in8, _in9, _in10, _in11, _in12, _in13, _in14, \ ++ _in15, _out0, _out1, _out2, _out3, _out4, _out5, \ ++ _out6, _out7, _out8, _out9, _out10, _out11, _out12, \ ++ _out13, _out14, _out15) \ ++ { \ ++ _out0 = __lsx_vadd_d(_in0, _in15); \ ++ _out1 = __lsx_vadd_d(_in1, _in14); \ ++ _out2 = __lsx_vadd_d(_in2, _in13); \ ++ _out3 = __lsx_vadd_d(_in3, _in12); \ ++ _out4 = __lsx_vadd_d(_in4, _in11); \ ++ _out5 = __lsx_vadd_d(_in5, _in10); \ ++ _out6 = __lsx_vadd_d(_in6, _in9); \ ++ _out7 = __lsx_vadd_d(_in7, _in8); \ ++ \ ++ _out8 = __lsx_vsub_d(_in7, _in8); \ ++ _out9 = __lsx_vsub_d(_in6, _in9); \ ++ _out10 = __lsx_vsub_d(_in5, _in10); \ ++ _out11 = __lsx_vsub_d(_in4, _in11); \ ++ _out12 = __lsx_vsub_d(_in3, _in12); \ ++ _out13 = __lsx_vsub_d(_in2, _in13); \ ++ _out14 = __lsx_vsub_d(_in1, _in14); \ ++ _out15 = __lsx_vsub_d(_in0, _in15); \ ++ } ++ ++#endif // LSX ++ ++#ifdef __loongarch_asx ++#include ++/* ++ * ============================================================================= ++ * Description : Dot product of byte vector elements ++ * Arguments : Inputs - in_h, in_l ++ * Output - out ++ * Return Type - signed halfword ++ * Details : Unsigned byte elements from in_h are multiplied with ++ * unsigned byte elements from in_l producing a result ++ * twice the size of input i.e. signed halfword. ++ * Then these multiplied results of adjacent odd-even elements ++ * are added to the out vector ++ * Example : See out = __lasx_xvdp2_w_h(in_h, in_l) ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvdp2_h_bu(__m256i in_h, __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvmulwev_h_bu(in_h, in_l); ++ out = __lasx_xvmaddwod_h_bu(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Dot product of byte vector elements ++ * Arguments : Inputs - in_h, in_l ++ * Output - out ++ * Return Type - signed halfword ++ * Details : Signed byte elements from in_h are multiplied with ++ * signed byte elements from in_l producing a result ++ * twice the size of input i.e. signed halfword. ++ * Then these multiplication results of adjacent odd-even elements ++ * are added to the out vector ++ * Example : See out = __lasx_xvdp2_w_h(in_h, in_l) ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvdp2_h_b(__m256i in_h, __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvmulwev_h_b(in_h, in_l); ++ out = __lasx_xvmaddwod_h_b(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Dot product of halfword vector elements ++ * Arguments : Inputs - in_h, in_l ++ * Output - out ++ * Return Type - signed word ++ * Details : Signed halfword elements from in_h are multiplied with ++ * signed halfword elements from in_l producing a result ++ * twice the size of input i.e. signed word. ++ * Then these multiplied results of adjacent odd-even elements ++ * are added to the out vector. ++ * Example : out = __lasx_xvdp2_w_h(in_h, in_l) ++ * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 ++ * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 ++ * out : 22,38,38,22, 22,38,38,22 ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvdp2_w_h(__m256i in_h, __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvmulwev_w_h(in_h, in_l); ++ out = __lasx_xvmaddwod_w_h(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Dot product of word vector elements ++ * Arguments : Inputs - in_h, in_l ++ * Output - out ++ * Return Type - signed double ++ * Details : Signed word elements from in_h are multiplied with ++ * signed word elements from in_l producing a result ++ * twice the size of input i.e. signed double-word. ++ * Then these multiplied results of adjacent odd-even elements ++ * are added to the out vector. ++ * Example : See out = __lasx_xvdp2_w_h(in_h, in_l) ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvdp2_d_w(__m256i in_h, __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvmulwev_d_w(in_h, in_l); ++ out = __lasx_xvmaddwod_d_w(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Dot product of halfword vector elements ++ * Arguments : Inputs - in_h, in_l ++ * Output - out ++ * Return Type - signed word ++ * Details : Unsigned halfword elements from in_h are multiplied with ++ * signed halfword elements from in_l producing a result ++ * twice the size of input i.e. unsigned word. ++ * Multiplication result of adjacent odd-even elements ++ * are added to the out vector ++ * Example : See out = __lasx_xvdp2_w_h(in_h, in_l) ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvdp2_w_hu_h(__m256i in_h, __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvmulwev_w_hu_h(in_h, in_l); ++ out = __lasx_xvmaddwod_w_hu_h(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Dot product & addition of byte vector elements ++ * Arguments : Inputs - in_h, in_l ++ * Output - out ++ * Return Type - halfword ++ * Details : Signed byte elements from in_h are multiplied with ++ * signed byte elements from in_l producing a result ++ * twice the size of input i.e. signed halfword. ++ * Then these multiplied results of adjacent odd-even elements ++ * are added to the in_c vector. ++ * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvdp2add_h_b(__m256i in_c, __m256i in_h, ++ __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvmaddwev_h_b(in_c, in_h, in_l); ++ out = __lasx_xvmaddwod_h_b(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Dot product & addition of byte vector elements ++ * Arguments : Inputs - in_h, in_l ++ * Output - out ++ * Return Type - halfword ++ * Details : Unsigned byte elements from in_h are multiplied with ++ * unsigned byte elements from in_l producing a result ++ * twice the size of input i.e. signed halfword. ++ * Then these multiplied results of adjacent odd-even elements ++ * are added to the in_c vector. ++ * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvdp2add_h_bu(__m256i in_c, __m256i in_h, ++ __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvmaddwev_h_bu(in_c, in_h, in_l); ++ out = __lasx_xvmaddwod_h_bu(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Dot product & addition of byte vector elements ++ * Arguments : Inputs - in_h, in_l ++ * Output - out ++ * Return Type - halfword ++ * Details : Unsigned byte elements from in_h are multiplied with ++ * signed byte elements from in_l producing a result ++ * twice the size of input i.e. signed halfword. ++ * Then these multiplied results of adjacent odd-even elements ++ * are added to the in_c vector. ++ * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvdp2add_h_bu_b(__m256i in_c, __m256i in_h, ++ __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvmaddwev_h_bu_b(in_c, in_h, in_l); ++ out = __lasx_xvmaddwod_h_bu_b(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Dot product of halfword vector elements ++ * Arguments : Inputs - in_c, in_h, in_l ++ * Output - out ++ * Return Type - per RTYPE ++ * Details : Signed halfword elements from in_h are multiplied with ++ * signed halfword elements from in_l producing a result ++ * twice the size of input i.e. signed word. ++ * Multiplication result of adjacent odd-even elements ++ * are added to the in_c vector. ++ * Example : out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) ++ * in_c : 1,2,3,4, 1,2,3,4 ++ * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8, ++ * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1, ++ * out : 23,40,41,26, 23,40,41,26 ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvdp2add_w_h(__m256i in_c, __m256i in_h, ++ __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvmaddwev_w_h(in_c, in_h, in_l); ++ out = __lasx_xvmaddwod_w_h(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Dot product of halfword vector elements ++ * Arguments : Inputs - in_c, in_h, in_l ++ * Output - out ++ * Return Type - signed word ++ * Details : Unsigned halfword elements from in_h are multiplied with ++ * unsigned halfword elements from in_l producing a result ++ * twice the size of input i.e. signed word. ++ * Multiplication result of adjacent odd-even elements ++ * are added to the in_c vector. ++ * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvdp2add_w_hu(__m256i in_c, __m256i in_h, ++ __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvmaddwev_w_hu(in_c, in_h, in_l); ++ out = __lasx_xvmaddwod_w_hu(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Dot product of halfword vector elements ++ * Arguments : Inputs - in_c, in_h, in_l ++ * Output - out ++ * Return Type - signed word ++ * Details : Unsigned halfword elements from in_h are multiplied with ++ * signed halfword elements from in_l producing a result ++ * twice the size of input i.e. signed word. ++ * Multiplication result of adjacent odd-even elements ++ * are added to the in_c vector ++ * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvdp2add_w_hu_h(__m256i in_c, __m256i in_h, ++ __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvmaddwev_w_hu_h(in_c, in_h, in_l); ++ out = __lasx_xvmaddwod_w_hu_h(out, in_h, in_l); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Vector Unsigned Dot Product and Subtract ++ * Arguments : Inputs - in_c, in_h, in_l ++ * Output - out ++ * Return Type - signed halfword ++ * Details : Unsigned byte elements from in_h are multiplied with ++ * unsigned byte elements from in_l producing a result ++ * twice the size of input i.e. signed halfword. ++ * Multiplication result of adjacent odd-even elements ++ * are added together and subtracted from double width elements ++ * in_c vector. ++ * Example : See out = __lasx_xvdp2sub_w_h(in_c, in_h, in_l) ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvdp2sub_h_bu(__m256i in_c, __m256i in_h, ++ __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvmulwev_h_bu(in_h, in_l); ++ out = __lasx_xvmaddwod_h_bu(out, in_h, in_l); ++ out = __lasx_xvsub_h(in_c, out); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Vector Signed Dot Product and Subtract ++ * Arguments : Inputs - in_c, in_h, in_l ++ * Output - out ++ * Return Type - signed word ++ * Details : Signed halfword elements from in_h are multiplied with ++ * Signed halfword elements from in_l producing a result ++ * twice the size of input i.e. signed word. ++ * Multiplication result of adjacent odd-even elements ++ * are added together and subtracted from double width elements ++ * in_c vector. ++ * Example : out = __lasx_xvdp2sub_w_h(in_c, in_h, in_l) ++ * in_c : 0,0,0,0, 0,0,0,0 ++ * in_h : 3,1,3,0, 0,0,0,1, 0,0,1,1, 0,0,0,1 ++ * in_l : 2,1,1,0, 1,0,0,0, 0,0,1,0, 1,0,0,1 ++ * out : -7,-3,0,0, 0,-1,0,-1 ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvdp2sub_w_h(__m256i in_c, __m256i in_h, ++ __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvmulwev_w_h(in_h, in_l); ++ out = __lasx_xvmaddwod_w_h(out, in_h, in_l); ++ out = __lasx_xvsub_w(in_c, out); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Dot product of halfword vector elements ++ * Arguments : Inputs - in_h, in_l ++ * Output - out ++ * Return Type - signed word ++ * Details : Signed halfword elements from in_h are multiplied with ++ * signed halfword elements from in_l producing a result ++ * four times the size of input i.e. signed doubleword. ++ * Then these multiplication results of four adjacent elements ++ * are added together and stored to the out vector. ++ * Example : out = __lasx_xvdp4_d_h(in_h, in_l) ++ * in_h : 3,1,3,0, 0,0,0,1, 0,0,1,-1, 0,0,0,1 ++ * in_l : -2,1,1,0, 1,0,0,0, 0,0,1, 0, 1,0,0,1 ++ * out : -2,0,1,1 ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvdp4_d_h(__m256i in_h, __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvmulwev_w_h(in_h, in_l); ++ out = __lasx_xvmaddwod_w_h(out, in_h, in_l); ++ out = __lasx_xvhaddw_d_w(out, out); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : The high half of the vector elements are expanded and ++ * added after being doubled. ++ * Arguments : Inputs - in_h, in_l ++ * Output - out ++ * Details : The in_h vector and the in_l vector are added after the ++ * higher half of the two-fold sign extension (signed byte ++ * to signed halfword) and stored to the out vector. ++ * Example : See out = __lasx_xvaddwh_w_h(in_h, in_l) ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvaddwh_h_b(__m256i in_h, __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvilvh_b(in_h, in_l); ++ out = __lasx_xvhaddw_h_b(out, out); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : The high half of the vector elements are expanded and ++ * added after being doubled. ++ * Arguments : Inputs - in_h, in_l ++ * Output - out ++ * Details : The in_h vector and the in_l vector are added after the ++ * higher half of the two-fold sign extension (signed halfword ++ * to signed word) and stored to the out vector. ++ * Example : out = __lasx_xvaddwh_w_h(in_h, in_l) ++ * in_h : 3, 0,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1 ++ * in_l : 2,-1,1,2, 1,0,0, 0, 1,0,1, 0, 1,0,0,1 ++ * out : 1,0,0,-1, 1,0,0, 2 ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvaddwh_w_h(__m256i in_h, __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvilvh_h(in_h, in_l); ++ out = __lasx_xvhaddw_w_h(out, out); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : The low half of the vector elements are expanded and ++ * added after being doubled. ++ * Arguments : Inputs - in_h, in_l ++ * Output - out ++ * Details : The in_h vector and the in_l vector are added after the ++ * lower half of the two-fold sign extension (signed byte ++ * to signed halfword) and stored to the out vector. ++ * Example : See out = __lasx_xvaddwl_w_h(in_h, in_l) ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvaddwl_h_b(__m256i in_h, __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvilvl_b(in_h, in_l); ++ out = __lasx_xvhaddw_h_b(out, out); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : The low half of the vector elements are expanded and ++ * added after being doubled. ++ * Arguments : Inputs - in_h, in_l ++ * Output - out ++ * Details : The in_h vector and the in_l vector are added after the ++ * lower half of the two-fold sign extension (signed halfword ++ * to signed word) and stored to the out vector. ++ * Example : out = __lasx_xvaddwl_w_h(in_h, in_l) ++ * in_h : 3, 0,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1 ++ * in_l : 2,-1,1,2, 1,0,0, 0, 1,0,1, 0, 1,0,0,1 ++ * out : 5,-1,4,2, 1,0,2,-1 ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvaddwl_w_h(__m256i in_h, __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvilvl_h(in_h, in_l); ++ out = __lasx_xvhaddw_w_h(out, out); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : The low half of the vector elements are expanded and ++ * added after being doubled. ++ * Arguments : Inputs - in_h, in_l ++ * Output - out ++ * Details : The out vector and the out vector are added after the ++ * lower half of the two-fold zero extension (unsigned byte ++ * to unsigned halfword) and stored to the out vector. ++ * Example : See out = __lasx_xvaddwl_w_h(in_h, in_l) ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvaddwl_h_bu(__m256i in_h, __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvilvl_b(in_h, in_l); ++ out = __lasx_xvhaddw_hu_bu(out, out); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : The low half of the vector elements are expanded and ++ * added after being doubled. ++ * Arguments : Inputs - in_h, in_l ++ * Output - out ++ * Details : The in_l vector after double zero extension (unsigned byte to ++ * signed halfword),added to the in_h vector. ++ * Example : See out = __lasx_xvaddw_w_w_h(in_h, in_l) ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvaddw_h_h_bu(__m256i in_h, __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvsllwil_hu_bu(in_l, 0); ++ out = __lasx_xvadd_h(in_h, out); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : The low half of the vector elements are expanded and ++ * added after being doubled. ++ * Arguments : Inputs - in_h, in_l ++ * Output - out ++ * Details : The in_l vector after double sign extension (signed halfword to ++ * signed word), added to the in_h vector. ++ * Example : out = __lasx_xvaddw_w_w_h(in_h, in_l) ++ * in_h : 0, 1,0,0, -1,0,0,1, ++ * in_l : 2,-1,1,2, 1,0,0,0, 0,0,1,0, 1,0,0,1, ++ * out : 2, 0,1,2, -1,0,1,1, ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvaddw_w_w_h(__m256i in_h, __m256i in_l) { ++ __m256i out; ++ ++ out = __lasx_xvsllwil_w_h(in_l, 0); ++ out = __lasx_xvadd_w(in_h, out); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Multiplication and addition calculation after expansion ++ * of the lower half of the vector. ++ * Arguments : Inputs - in_c, in_h, in_l ++ * Output - out ++ * Details : The in_h vector and the in_l vector are multiplied after ++ * the lower half of the two-fold sign extension (signed halfword ++ * to signed word), and the result is added to the vector in_c, ++ * then stored to the out vector. ++ * Example : out = __lasx_xvmaddwl_w_h(in_c, in_h, in_l) ++ * in_c : 1,2,3,4, 5,6,7,8 ++ * in_h : 1,2,3,4, 1,2,3,4, 5,6,7,8, 5,6,7,8 ++ * in_l : 200, 300, 400, 500, 2000, 3000, 4000, 5000, ++ * -200,-300,-400,-500, -2000,-3000,-4000,-5000 ++ * out : 201, 602,1203,2004, -995, -1794,-2793,-3992 ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvmaddwl_w_h(__m256i in_c, __m256i in_h, ++ __m256i in_l) { ++ __m256i tmp0, tmp1, out; ++ ++ tmp0 = __lasx_xvsllwil_w_h(in_h, 0); ++ tmp1 = __lasx_xvsllwil_w_h(in_l, 0); ++ tmp0 = __lasx_xvmul_w(tmp0, tmp1); ++ out = __lasx_xvadd_w(tmp0, in_c); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Multiplication and addition calculation after expansion ++ * of the higher half of the vector. ++ * Arguments : Inputs - in_c, in_h, in_l ++ * Output - out ++ * Details : The in_h vector and the in_l vector are multiplied after ++ * the higher half of the two-fold sign extension (signed ++ * halfword to signed word), and the result is added to ++ * the vector in_c, then stored to the out vector. ++ * Example : See out = __lasx_xvmaddwl_w_h(in_c, in_h, in_l) ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvmaddwh_w_h(__m256i in_c, __m256i in_h, ++ __m256i in_l) { ++ __m256i tmp0, tmp1, out; ++ ++ tmp0 = __lasx_xvilvh_h(in_h, in_h); ++ tmp1 = __lasx_xvilvh_h(in_l, in_l); ++ tmp0 = __lasx_xvmulwev_w_h(tmp0, tmp1); ++ out = __lasx_xvadd_w(tmp0, in_c); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Multiplication calculation after expansion of the lower ++ * half of the vector. ++ * Arguments : Inputs - in_h, in_l ++ * Output - out ++ * Details : The in_h vector and the in_l vector are multiplied after ++ * the lower half of the two-fold sign extension (signed ++ * halfword to signed word), then stored to the out vector. ++ * Example : out = __lasx_xvmulwl_w_h(in_h, in_l) ++ * in_h : 3,-1,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1 ++ * in_l : 2,-1,1,2, 1,0,0, 0, 0,0,1, 0, 1,0,0,1 ++ * out : 6,1,3,0, 0,0,1,0 ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvmulwl_w_h(__m256i in_h, __m256i in_l) { ++ __m256i tmp0, tmp1, out; ++ ++ tmp0 = __lasx_xvsllwil_w_h(in_h, 0); ++ tmp1 = __lasx_xvsllwil_w_h(in_l, 0); ++ out = __lasx_xvmul_w(tmp0, tmp1); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Multiplication calculation after expansion of the lower ++ * half of the vector. ++ * Arguments : Inputs - in_h, in_l ++ * Output - out ++ * Details : The in_h vector and the in_l vector are multiplied after ++ * the lower half of the two-fold sign extension (signed ++ * halfword to signed word), then stored to the out vector. ++ * Example : out = __lasx_xvmulwh_w_h(in_h, in_l) ++ * in_h : 3,-1,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1 ++ * in_l : 2,-1,1,2, 1,0,0, 0, 0,0,1, 0, 1,0,0,1 ++ * out : 0,0,0,0, 0,0,0,1 ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvmulwh_w_h(__m256i in_h, __m256i in_l) { ++ __m256i tmp0, tmp1, out; ++ ++ tmp0 = __lasx_xvilvh_h(in_h, in_h); ++ tmp1 = __lasx_xvilvh_h(in_l, in_l); ++ out = __lasx_xvmulwev_w_h(tmp0, tmp1); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : The low half of the vector elements are added to the high half ++ * after being doubled, then saturated. ++ * Arguments : Inputs - in_h, in_l ++ * Output - out ++ * Details : The in_h vector adds the in_l vector after the lower half of ++ * the two-fold zero extension (unsigned byte to unsigned ++ * halfword) and then saturated. The results are stored to the out ++ * vector. ++ * Example : out = __lasx_xvsaddw_hu_hu_bu(in_h, in_l) ++ * in_h : 2,65532,1,2, 1,0,0,0, 0,0,1,0, 1,0,0,1 ++ * in_l : 3,6,3,0, 0,0,0,1, 0,0,1,1, 0,0,0,1, 3,18,3,0, 0,0,0,1, 0,0,1,1, ++ * 0,0,0,1 ++ * out : 5,65535,4,2, 1,0,0,1, 3,18,4,0, 1,0,0,2, ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvsaddw_hu_hu_bu(__m256i in_h, __m256i in_l) { ++ __m256i tmp1, out; ++ __m256i zero = { 0 }; ++ ++ tmp1 = __lasx_xvilvl_b(zero, in_l); ++ out = __lasx_xvsadd_hu(in_h, tmp1); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Clip all halfword elements of input vector between min & max ++ * out = ((in) < (min)) ? (min) : (((in) > (max)) ? (max) : (in)) ++ * Arguments : Inputs - in (input vector) ++ * - min (min threshold) ++ * - max (max threshold) ++ * Outputs - in (output vector with clipped elements) ++ * Return Type - signed halfword ++ * Example : out = __lasx_xvclip_h(in, min, max) ++ * in : -8,2,280,249, -8,255,280,249, 4,4,4,4, 5,5,5,5 ++ * min : 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1 ++ * max : 9,9,9,9, 9,9,9,9, 9,9,9,9, 9,9,9,9 ++ * out : 1,2,9,9, 1,9,9,9, 4,4,4,4, 5,5,5,5 ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvclip_h(__m256i in, __m256i min, __m256i max) { ++ __m256i out; ++ ++ out = __lasx_xvmax_h(min, in); ++ out = __lasx_xvmin_h(max, out); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Clip all signed halfword elements of input vector ++ * between 0 & 255 ++ * Arguments : Inputs - in (input vector) ++ * Outputs - out (output vector with clipped elements) ++ * Return Type - signed halfword ++ * Example : See out = __lasx_xvclip255_w(in) ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvclip255_h(__m256i in) { ++ __m256i out; ++ ++ out = __lasx_xvmaxi_h(in, 0); ++ out = __lasx_xvsat_hu(out, 7); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Clip all signed word elements of input vector ++ * between 0 & 255 ++ * Arguments : Inputs - in (input vector) ++ * Output - out (output vector with clipped elements) ++ * Return Type - signed word ++ * Example : out = __lasx_xvclip255_w(in) ++ * in : -8,255,280,249, -8,255,280,249 ++ * out : 0,255,255,249, 0,255,255,249 ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvclip255_w(__m256i in) { ++ __m256i out; ++ ++ out = __lasx_xvmaxi_w(in, 0); ++ out = __lasx_xvsat_wu(out, 7); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Indexed halfword element values are replicated to all ++ * elements in output vector. If 'idx < 8' use xvsplati_l_*, ++ * if 'idx >= 8' use xvsplati_h_*. ++ * Arguments : Inputs - in, idx ++ * Output - out ++ * Details : Idx element value from in vector is replicated to all ++ * elements in out vector. ++ * Valid index range for halfword operation is 0-7 ++ * Example : out = __lasx_xvsplati_l_h(in, idx) ++ * in : 20,10,11,12, 13,14,15,16, 0,0,2,0, 0,0,0,0 ++ * idx : 0x02 ++ * out : 11,11,11,11, 11,11,11,11, 11,11,11,11, 11,11,11,11 ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvsplati_l_h(__m256i in, int idx) { ++ __m256i out; ++ ++ out = __lasx_xvpermi_q(in, in, 0x02); ++ out = __lasx_xvreplve_h(out, idx); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Indexed halfword element values are replicated to all ++ * elements in output vector. If 'idx < 8' use xvsplati_l_*, ++ * if 'idx >= 8' use xvsplati_h_*. ++ * Arguments : Inputs - in, idx ++ * Output - out ++ * Details : Idx element value from in vector is replicated to all ++ * elements in out vector. ++ * Valid index range for halfword operation is 0-7 ++ * Example : out = __lasx_xvsplati_h_h(in, idx) ++ * in : 20,10,11,12, 13,14,15,16, 0,2,0,0, 0,0,0,0 ++ * idx : 0x09 ++ * out : 2,2,2,2, 2,2,2,2, 2,2,2,2, 2,2,2,2 ++ * ============================================================================= ++ */ ++static inline __m256i __lasx_xvsplati_h_h(__m256i in, int idx) { ++ __m256i out; ++ ++ out = __lasx_xvpermi_q(in, in, 0x13); ++ out = __lasx_xvreplve_h(out, idx); ++ return out; ++} ++ ++/* ++ * ============================================================================= ++ * Description : Transpose 4x4 block with double-word elements in vectors ++ * Arguments : Inputs - _in0, _in1, _in2, _in3 ++ * Outputs - _out0, _out1, _out2, _out3 ++ * Example : LASX_TRANSPOSE4x4_D ++ * _in0 : 1,2,3,4 ++ * _in1 : 1,2,3,4 ++ * _in2 : 1,2,3,4 ++ * _in3 : 1,2,3,4 ++ * ++ * _out0 : 1,1,1,1 ++ * _out1 : 2,2,2,2 ++ * _out2 : 3,3,3,3 ++ * _out3 : 4,4,4,4 ++ * ============================================================================= ++ */ ++#define LASX_TRANSPOSE4x4_D(_in0, _in1, _in2, _in3, _out0, _out1, _out2, \ ++ _out3) \ ++ { \ ++ __m256i _tmp0, _tmp1, _tmp2, _tmp3; \ ++ _tmp0 = __lasx_xvilvl_d(_in1, _in0); \ ++ _tmp1 = __lasx_xvilvh_d(_in1, _in0); \ ++ _tmp2 = __lasx_xvilvl_d(_in3, _in2); \ ++ _tmp3 = __lasx_xvilvh_d(_in3, _in2); \ ++ _out0 = __lasx_xvpermi_q(_tmp2, _tmp0, 0x20); \ ++ _out2 = __lasx_xvpermi_q(_tmp2, _tmp0, 0x31); \ ++ _out1 = __lasx_xvpermi_q(_tmp3, _tmp1, 0x20); \ ++ _out3 = __lasx_xvpermi_q(_tmp3, _tmp1, 0x31); \ ++ } ++ ++/* ++ * ============================================================================= ++ * Description : Transpose 8x8 block with word elements in vectors ++ * Arguments : Inputs - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7 ++ * Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6, ++ * _out7 ++ * Example : LASX_TRANSPOSE8x8_W ++ * _in0 : 1,2,3,4,5,6,7,8 ++ * _in1 : 2,2,3,4,5,6,7,8 ++ * _in2 : 3,2,3,4,5,6,7,8 ++ * _in3 : 4,2,3,4,5,6,7,8 ++ * _in4 : 5,2,3,4,5,6,7,8 ++ * _in5 : 6,2,3,4,5,6,7,8 ++ * _in6 : 7,2,3,4,5,6,7,8 ++ * _in7 : 8,2,3,4,5,6,7,8 ++ * ++ * _out0 : 1,2,3,4,5,6,7,8 ++ * _out1 : 2,2,2,2,2,2,2,2 ++ * _out2 : 3,3,3,3,3,3,3,3 ++ * _out3 : 4,4,4,4,4,4,4,4 ++ * _out4 : 5,5,5,5,5,5,5,5 ++ * _out5 : 6,6,6,6,6,6,6,6 ++ * _out6 : 7,7,7,7,7,7,7,7 ++ * _out7 : 8,8,8,8,8,8,8,8 ++ * ============================================================================= ++ */ ++#define LASX_TRANSPOSE8x8_W(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ ++ _out7) \ ++ { \ ++ __m256i _s0_m, _s1_m; \ ++ __m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m; \ ++ __m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m; \ ++ \ ++ _s0_m = __lasx_xvilvl_w(_in2, _in0); \ ++ _s1_m = __lasx_xvilvl_w(_in3, _in1); \ ++ _tmp0_m = __lasx_xvilvl_w(_s1_m, _s0_m); \ ++ _tmp1_m = __lasx_xvilvh_w(_s1_m, _s0_m); \ ++ _s0_m = __lasx_xvilvh_w(_in2, _in0); \ ++ _s1_m = __lasx_xvilvh_w(_in3, _in1); \ ++ _tmp2_m = __lasx_xvilvl_w(_s1_m, _s0_m); \ ++ _tmp3_m = __lasx_xvilvh_w(_s1_m, _s0_m); \ ++ _s0_m = __lasx_xvilvl_w(_in6, _in4); \ ++ _s1_m = __lasx_xvilvl_w(_in7, _in5); \ ++ _tmp4_m = __lasx_xvilvl_w(_s1_m, _s0_m); \ ++ _tmp5_m = __lasx_xvilvh_w(_s1_m, _s0_m); \ ++ _s0_m = __lasx_xvilvh_w(_in6, _in4); \ ++ _s1_m = __lasx_xvilvh_w(_in7, _in5); \ ++ _tmp6_m = __lasx_xvilvl_w(_s1_m, _s0_m); \ ++ _tmp7_m = __lasx_xvilvh_w(_s1_m, _s0_m); \ ++ _out0 = __lasx_xvpermi_q(_tmp4_m, _tmp0_m, 0x20); \ ++ _out1 = __lasx_xvpermi_q(_tmp5_m, _tmp1_m, 0x20); \ ++ _out2 = __lasx_xvpermi_q(_tmp6_m, _tmp2_m, 0x20); \ ++ _out3 = __lasx_xvpermi_q(_tmp7_m, _tmp3_m, 0x20); \ ++ _out4 = __lasx_xvpermi_q(_tmp4_m, _tmp0_m, 0x31); \ ++ _out5 = __lasx_xvpermi_q(_tmp5_m, _tmp1_m, 0x31); \ ++ _out6 = __lasx_xvpermi_q(_tmp6_m, _tmp2_m, 0x31); \ ++ _out7 = __lasx_xvpermi_q(_tmp7_m, _tmp3_m, 0x31); \ ++ } ++ ++/* ++ * ============================================================================= ++ * Description : Transpose input 16x8 byte block ++ * Arguments : Inputs - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, ++ * _in8, _in9, _in10, _in11, _in12, _in13, _in14, _in15 ++ * (input 16x8 byte block) ++ * Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6, ++ * _out7 (output 8x16 byte block) ++ * Details : The rows of the matrix become columns, and the columns become ++ * rows. ++ * Example : See LASX_TRANSPOSE16x8_H ++ * ============================================================================= ++ */ ++#define LASX_TRANSPOSE16x8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _in8, _in9, _in10, _in11, _in12, _in13, _in14, \ ++ _in15, _out0, _out1, _out2, _out3, _out4, _out5, \ ++ _out6, _out7) \ ++ { \ ++ __m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m; \ ++ __m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m; \ ++ \ ++ _tmp0_m = __lasx_xvilvl_b(_in2, _in0); \ ++ _tmp1_m = __lasx_xvilvl_b(_in3, _in1); \ ++ _tmp2_m = __lasx_xvilvl_b(_in6, _in4); \ ++ _tmp3_m = __lasx_xvilvl_b(_in7, _in5); \ ++ _tmp4_m = __lasx_xvilvl_b(_in10, _in8); \ ++ _tmp5_m = __lasx_xvilvl_b(_in11, _in9); \ ++ _tmp6_m = __lasx_xvilvl_b(_in14, _in12); \ ++ _tmp7_m = __lasx_xvilvl_b(_in15, _in13); \ ++ _out0 = __lasx_xvilvl_b(_tmp1_m, _tmp0_m); \ ++ _out1 = __lasx_xvilvh_b(_tmp1_m, _tmp0_m); \ ++ _out2 = __lasx_xvilvl_b(_tmp3_m, _tmp2_m); \ ++ _out3 = __lasx_xvilvh_b(_tmp3_m, _tmp2_m); \ ++ _out4 = __lasx_xvilvl_b(_tmp5_m, _tmp4_m); \ ++ _out5 = __lasx_xvilvh_b(_tmp5_m, _tmp4_m); \ ++ _out6 = __lasx_xvilvl_b(_tmp7_m, _tmp6_m); \ ++ _out7 = __lasx_xvilvh_b(_tmp7_m, _tmp6_m); \ ++ _tmp0_m = __lasx_xvilvl_w(_out2, _out0); \ ++ _tmp2_m = __lasx_xvilvh_w(_out2, _out0); \ ++ _tmp4_m = __lasx_xvilvl_w(_out3, _out1); \ ++ _tmp6_m = __lasx_xvilvh_w(_out3, _out1); \ ++ _tmp1_m = __lasx_xvilvl_w(_out6, _out4); \ ++ _tmp3_m = __lasx_xvilvh_w(_out6, _out4); \ ++ _tmp5_m = __lasx_xvilvl_w(_out7, _out5); \ ++ _tmp7_m = __lasx_xvilvh_w(_out7, _out5); \ ++ _out0 = __lasx_xvilvl_d(_tmp1_m, _tmp0_m); \ ++ _out1 = __lasx_xvilvh_d(_tmp1_m, _tmp0_m); \ ++ _out2 = __lasx_xvilvl_d(_tmp3_m, _tmp2_m); \ ++ _out3 = __lasx_xvilvh_d(_tmp3_m, _tmp2_m); \ ++ _out4 = __lasx_xvilvl_d(_tmp5_m, _tmp4_m); \ ++ _out5 = __lasx_xvilvh_d(_tmp5_m, _tmp4_m); \ ++ _out6 = __lasx_xvilvl_d(_tmp7_m, _tmp6_m); \ ++ _out7 = __lasx_xvilvh_d(_tmp7_m, _tmp6_m); \ ++ } ++ ++/* ++ * ============================================================================= ++ * Description : Transpose input 16x8 byte block ++ * Arguments : Inputs - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, ++ * _in8, _in9, _in10, _in11, _in12, _in13, _in14, _in15 ++ * (input 16x8 byte block) ++ * Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6, ++ * _out7 (output 8x16 byte block) ++ * Details : The rows of the matrix become columns, and the columns become ++ * rows. ++ * Example : LASX_TRANSPOSE16x8_H ++ * _in0 : 1,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 ++ * _in1 : 2,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 ++ * _in2 : 3,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 ++ * _in3 : 4,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 ++ * _in4 : 5,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 ++ * _in5 : 6,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 ++ * _in6 : 7,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 ++ * _in7 : 8,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 ++ * _in8 : 9,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 ++ * _in9 : 1,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 ++ * _in10 : 0,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 ++ * _in11 : 2,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 ++ * _in12 : 3,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 ++ * _in13 : 7,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 ++ * _in14 : 5,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 ++ * _in15 : 6,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 ++ * ++ * _out0 : 1,2,3,4,5,6,7,8,9,1,0,2,3,7,5,6 ++ * _out1 : 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2 ++ * _out2 : 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 ++ * _out3 : 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4 ++ * _out4 : 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 ++ * _out5 : 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 ++ * _out6 : 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 ++ * _out7 : 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 ++ * ============================================================================= ++ */ ++#define LASX_TRANSPOSE16x8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _in8, _in9, _in10, _in11, _in12, _in13, _in14, \ ++ _in15, _out0, _out1, _out2, _out3, _out4, _out5, \ ++ _out6, _out7) \ ++ { \ ++ __m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m; \ ++ __m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m; \ ++ __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; \ ++ \ ++ _tmp0_m = __lasx_xvilvl_h(_in2, _in0); \ ++ _tmp1_m = __lasx_xvilvl_h(_in3, _in1); \ ++ _tmp2_m = __lasx_xvilvl_h(_in6, _in4); \ ++ _tmp3_m = __lasx_xvilvl_h(_in7, _in5); \ ++ _tmp4_m = __lasx_xvilvl_h(_in10, _in8); \ ++ _tmp5_m = __lasx_xvilvl_h(_in11, _in9); \ ++ _tmp6_m = __lasx_xvilvl_h(_in14, _in12); \ ++ _tmp7_m = __lasx_xvilvl_h(_in15, _in13); \ ++ _t0 = __lasx_xvilvl_h(_tmp1_m, _tmp0_m); \ ++ _t1 = __lasx_xvilvh_h(_tmp1_m, _tmp0_m); \ ++ _t2 = __lasx_xvilvl_h(_tmp3_m, _tmp2_m); \ ++ _t3 = __lasx_xvilvh_h(_tmp3_m, _tmp2_m); \ ++ _t4 = __lasx_xvilvl_h(_tmp5_m, _tmp4_m); \ ++ _t5 = __lasx_xvilvh_h(_tmp5_m, _tmp4_m); \ ++ _t6 = __lasx_xvilvl_h(_tmp7_m, _tmp6_m); \ ++ _t7 = __lasx_xvilvh_h(_tmp7_m, _tmp6_m); \ ++ _tmp0_m = __lasx_xvilvl_d(_t2, _t0); \ ++ _tmp2_m = __lasx_xvilvh_d(_t2, _t0); \ ++ _tmp4_m = __lasx_xvilvl_d(_t3, _t1); \ ++ _tmp6_m = __lasx_xvilvh_d(_t3, _t1); \ ++ _tmp1_m = __lasx_xvilvl_d(_t6, _t4); \ ++ _tmp3_m = __lasx_xvilvh_d(_t6, _t4); \ ++ _tmp5_m = __lasx_xvilvl_d(_t7, _t5); \ ++ _tmp7_m = __lasx_xvilvh_d(_t7, _t5); \ ++ _out0 = __lasx_xvpermi_q(_tmp1_m, _tmp0_m, 0x20); \ ++ _out1 = __lasx_xvpermi_q(_tmp3_m, _tmp2_m, 0x20); \ ++ _out2 = __lasx_xvpermi_q(_tmp5_m, _tmp4_m, 0x20); \ ++ _out3 = __lasx_xvpermi_q(_tmp7_m, _tmp6_m, 0x20); \ ++ \ ++ _tmp0_m = __lasx_xvilvh_h(_in2, _in0); \ ++ _tmp1_m = __lasx_xvilvh_h(_in3, _in1); \ ++ _tmp2_m = __lasx_xvilvh_h(_in6, _in4); \ ++ _tmp3_m = __lasx_xvilvh_h(_in7, _in5); \ ++ _tmp4_m = __lasx_xvilvh_h(_in10, _in8); \ ++ _tmp5_m = __lasx_xvilvh_h(_in11, _in9); \ ++ _tmp6_m = __lasx_xvilvh_h(_in14, _in12); \ ++ _tmp7_m = __lasx_xvilvh_h(_in15, _in13); \ ++ _t0 = __lasx_xvilvl_h(_tmp1_m, _tmp0_m); \ ++ _t1 = __lasx_xvilvh_h(_tmp1_m, _tmp0_m); \ ++ _t2 = __lasx_xvilvl_h(_tmp3_m, _tmp2_m); \ ++ _t3 = __lasx_xvilvh_h(_tmp3_m, _tmp2_m); \ ++ _t4 = __lasx_xvilvl_h(_tmp5_m, _tmp4_m); \ ++ _t5 = __lasx_xvilvh_h(_tmp5_m, _tmp4_m); \ ++ _t6 = __lasx_xvilvl_h(_tmp7_m, _tmp6_m); \ ++ _t7 = __lasx_xvilvh_h(_tmp7_m, _tmp6_m); \ ++ _tmp0_m = __lasx_xvilvl_d(_t2, _t0); \ ++ _tmp2_m = __lasx_xvilvh_d(_t2, _t0); \ ++ _tmp4_m = __lasx_xvilvl_d(_t3, _t1); \ ++ _tmp6_m = __lasx_xvilvh_d(_t3, _t1); \ ++ _tmp1_m = __lasx_xvilvl_d(_t6, _t4); \ ++ _tmp3_m = __lasx_xvilvh_d(_t6, _t4); \ ++ _tmp5_m = __lasx_xvilvl_d(_t7, _t5); \ ++ _tmp7_m = __lasx_xvilvh_d(_t7, _t5); \ ++ _out4 = __lasx_xvpermi_q(_tmp1_m, _tmp0_m, 0x20); \ ++ _out5 = __lasx_xvpermi_q(_tmp3_m, _tmp2_m, 0x20); \ ++ _out6 = __lasx_xvpermi_q(_tmp5_m, _tmp4_m, 0x20); \ ++ _out7 = __lasx_xvpermi_q(_tmp7_m, _tmp6_m, 0x20); \ ++ } ++ ++/* ++ * ============================================================================= ++ * Description : Transpose 4x4 block with halfword elements in vectors ++ * Arguments : Inputs - _in0, _in1, _in2, _in3 ++ * Outputs - _out0, _out1, _out2, _out3 ++ * Return Type - signed halfword ++ * Details : The rows of the matrix become columns, and the columns become ++ * rows. ++ * Example : See LASX_TRANSPOSE8x8_H ++ * ============================================================================= ++ */ ++#define LASX_TRANSPOSE4x4_H(_in0, _in1, _in2, _in3, _out0, _out1, _out2, \ ++ _out3) \ ++ { \ ++ __m256i _s0_m, _s1_m; \ ++ \ ++ _s0_m = __lasx_xvilvl_h(_in1, _in0); \ ++ _s1_m = __lasx_xvilvl_h(_in3, _in2); \ ++ _out0 = __lasx_xvilvl_w(_s1_m, _s0_m); \ ++ _out2 = __lasx_xvilvh_w(_s1_m, _s0_m); \ ++ _out1 = __lasx_xvilvh_d(_out0, _out0); \ ++ _out3 = __lasx_xvilvh_d(_out2, _out2); \ ++ } ++ ++/* ++ * ============================================================================= ++ * Description : Transpose input 8x8 byte block ++ * Arguments : Inputs - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7 ++ * (input 8x8 byte block) ++ * Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6, ++ * _out7 (output 8x8 byte block) ++ * Example : See LASX_TRANSPOSE8x8_H ++ * ============================================================================= ++ */ ++#define LASX_TRANSPOSE8x8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ ++ _out7) \ ++ { \ ++ __m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m; \ ++ __m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m; \ ++ _tmp0_m = __lasx_xvilvl_b(_in2, _in0); \ ++ _tmp1_m = __lasx_xvilvl_b(_in3, _in1); \ ++ _tmp2_m = __lasx_xvilvl_b(_in6, _in4); \ ++ _tmp3_m = __lasx_xvilvl_b(_in7, _in5); \ ++ _tmp4_m = __lasx_xvilvl_b(_tmp1_m, _tmp0_m); \ ++ _tmp5_m = __lasx_xvilvh_b(_tmp1_m, _tmp0_m); \ ++ _tmp6_m = __lasx_xvilvl_b(_tmp3_m, _tmp2_m); \ ++ _tmp7_m = __lasx_xvilvh_b(_tmp3_m, _tmp2_m); \ ++ _out0 = __lasx_xvilvl_w(_tmp6_m, _tmp4_m); \ ++ _out2 = __lasx_xvilvh_w(_tmp6_m, _tmp4_m); \ ++ _out4 = __lasx_xvilvl_w(_tmp7_m, _tmp5_m); \ ++ _out6 = __lasx_xvilvh_w(_tmp7_m, _tmp5_m); \ ++ _out1 = __lasx_xvbsrl_v(_out0, 8); \ ++ _out3 = __lasx_xvbsrl_v(_out2, 8); \ ++ _out5 = __lasx_xvbsrl_v(_out4, 8); \ ++ _out7 = __lasx_xvbsrl_v(_out6, 8); \ ++ } ++ ++/* ++ * ============================================================================= ++ * Description : Transpose 8x8 block with halfword elements in vectors. ++ * Arguments : Inputs - _in0, _in1, ~ ++ * Outputs - _out0, _out1, ~ ++ * Details : The rows of the matrix become columns, and the columns become ++ * rows. ++ * Example : LASX_TRANSPOSE8x8_H ++ * _in0 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 ++ * _in1 : 8,2,3,4, 5,6,7,8, 8,2,3,4, 5,6,7,8 ++ * _in2 : 8,2,3,4, 5,6,7,8, 8,2,3,4, 5,6,7,8 ++ * _in3 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 ++ * _in4 : 9,2,3,4, 5,6,7,8, 9,2,3,4, 5,6,7,8 ++ * _in5 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 ++ * _in6 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 ++ * _in7 : 9,2,3,4, 5,6,7,8, 9,2,3,4, 5,6,7,8 ++ * ++ * _out0 : 1,8,8,1, 9,1,1,9, 1,8,8,1, 9,1,1,9 ++ * _out1 : 2,2,2,2, 2,2,2,2, 2,2,2,2, 2,2,2,2 ++ * _out2 : 3,3,3,3, 3,3,3,3, 3,3,3,3, 3,3,3,3 ++ * _out3 : 4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4 ++ * _out4 : 5,5,5,5, 5,5,5,5, 5,5,5,5, 5,5,5,5 ++ * _out5 : 6,6,6,6, 6,6,6,6, 6,6,6,6, 6,6,6,6 ++ * _out6 : 7,7,7,7, 7,7,7,7, 7,7,7,7, 7,7,7,7 ++ * _out7 : 8,8,8,8, 8,8,8,8, 8,8,8,8, 8,8,8,8 ++ * ============================================================================= ++ */ ++#define LASX_TRANSPOSE8x8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ ++ _out7) \ ++ { \ ++ __m256i _s0_m, _s1_m; \ ++ __m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m; \ ++ __m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m; \ ++ \ ++ _s0_m = __lasx_xvilvl_h(_in6, _in4); \ ++ _s1_m = __lasx_xvilvl_h(_in7, _in5); \ ++ _tmp0_m = __lasx_xvilvl_h(_s1_m, _s0_m); \ ++ _tmp1_m = __lasx_xvilvh_h(_s1_m, _s0_m); \ ++ _s0_m = __lasx_xvilvh_h(_in6, _in4); \ ++ _s1_m = __lasx_xvilvh_h(_in7, _in5); \ ++ _tmp2_m = __lasx_xvilvl_h(_s1_m, _s0_m); \ ++ _tmp3_m = __lasx_xvilvh_h(_s1_m, _s0_m); \ ++ \ ++ _s0_m = __lasx_xvilvl_h(_in2, _in0); \ ++ _s1_m = __lasx_xvilvl_h(_in3, _in1); \ ++ _tmp4_m = __lasx_xvilvl_h(_s1_m, _s0_m); \ ++ _tmp5_m = __lasx_xvilvh_h(_s1_m, _s0_m); \ ++ _s0_m = __lasx_xvilvh_h(_in2, _in0); \ ++ _s1_m = __lasx_xvilvh_h(_in3, _in1); \ ++ _tmp6_m = __lasx_xvilvl_h(_s1_m, _s0_m); \ ++ _tmp7_m = __lasx_xvilvh_h(_s1_m, _s0_m); \ ++ \ ++ _out0 = __lasx_xvpickev_d(_tmp0_m, _tmp4_m); \ ++ _out2 = __lasx_xvpickev_d(_tmp1_m, _tmp5_m); \ ++ _out4 = __lasx_xvpickev_d(_tmp2_m, _tmp6_m); \ ++ _out6 = __lasx_xvpickev_d(_tmp3_m, _tmp7_m); \ ++ _out1 = __lasx_xvpickod_d(_tmp0_m, _tmp4_m); \ ++ _out3 = __lasx_xvpickod_d(_tmp1_m, _tmp5_m); \ ++ _out5 = __lasx_xvpickod_d(_tmp2_m, _tmp6_m); \ ++ _out7 = __lasx_xvpickod_d(_tmp3_m, _tmp7_m); \ ++ } ++ ++/* ++ * ============================================================================= ++ * Description : Butterfly of 4 input vectors ++ * Arguments : Inputs - _in0, _in1, _in2, _in3 ++ * Outputs - _out0, _out1, _out2, _out3 ++ * Details : Butterfly operation ++ * Example : LASX_BUTTERFLY_4 ++ * _out0 = _in0 + _in3; ++ * _out1 = _in1 + _in2; ++ * _out2 = _in1 - _in2; ++ * _out3 = _in0 - _in3; ++ * ============================================================================= ++ */ ++#define LASX_BUTTERFLY_4_B(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \ ++ { \ ++ _out0 = __lasx_xvadd_b(_in0, _in3); \ ++ _out1 = __lasx_xvadd_b(_in1, _in2); \ ++ _out2 = __lasx_xvsub_b(_in1, _in2); \ ++ _out3 = __lasx_xvsub_b(_in0, _in3); \ ++ } ++#define LASX_BUTTERFLY_4_H(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \ ++ { \ ++ _out0 = __lasx_xvadd_h(_in0, _in3); \ ++ _out1 = __lasx_xvadd_h(_in1, _in2); \ ++ _out2 = __lasx_xvsub_h(_in1, _in2); \ ++ _out3 = __lasx_xvsub_h(_in0, _in3); \ ++ } ++#define LASX_BUTTERFLY_4_W(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \ ++ { \ ++ _out0 = __lasx_xvadd_w(_in0, _in3); \ ++ _out1 = __lasx_xvadd_w(_in1, _in2); \ ++ _out2 = __lasx_xvsub_w(_in1, _in2); \ ++ _out3 = __lasx_xvsub_w(_in0, _in3); \ ++ } ++#define LASX_BUTTERFLY_4_D(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \ ++ { \ ++ _out0 = __lasx_xvadd_d(_in0, _in3); \ ++ _out1 = __lasx_xvadd_d(_in1, _in2); \ ++ _out2 = __lasx_xvsub_d(_in1, _in2); \ ++ _out3 = __lasx_xvsub_d(_in0, _in3); \ ++ } ++ ++/* ++ * ============================================================================= ++ * Description : Butterfly of 8 input vectors ++ * Arguments : Inputs - _in0, _in1, _in2, _in3, ~ ++ * Outputs - _out0, _out1, _out2, _out3, ~ ++ * Details : Butterfly operation ++ * Example : LASX_BUTTERFLY_8 ++ * _out0 = _in0 + _in7; ++ * _out1 = _in1 + _in6; ++ * _out2 = _in2 + _in5; ++ * _out3 = _in3 + _in4; ++ * _out4 = _in3 - _in4; ++ * _out5 = _in2 - _in5; ++ * _out6 = _in1 - _in6; ++ * _out7 = _in0 - _in7; ++ * ============================================================================= ++ */ ++#define LASX_BUTTERFLY_8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ ++ _out7) \ ++ { \ ++ _out0 = __lasx_xvadd_b(_in0, _in7); \ ++ _out1 = __lasx_xvadd_b(_in1, _in6); \ ++ _out2 = __lasx_xvadd_b(_in2, _in5); \ ++ _out3 = __lasx_xvadd_b(_in3, _in4); \ ++ _out4 = __lasx_xvsub_b(_in3, _in4); \ ++ _out5 = __lasx_xvsub_b(_in2, _in5); \ ++ _out6 = __lasx_xvsub_b(_in1, _in6); \ ++ _out7 = __lasx_xvsub_b(_in0, _in7); \ ++ } ++ ++#define LASX_BUTTERFLY_8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ ++ _out7) \ ++ { \ ++ _out0 = __lasx_xvadd_h(_in0, _in7); \ ++ _out1 = __lasx_xvadd_h(_in1, _in6); \ ++ _out2 = __lasx_xvadd_h(_in2, _in5); \ ++ _out3 = __lasx_xvadd_h(_in3, _in4); \ ++ _out4 = __lasx_xvsub_h(_in3, _in4); \ ++ _out5 = __lasx_xvsub_h(_in2, _in5); \ ++ _out6 = __lasx_xvsub_h(_in1, _in6); \ ++ _out7 = __lasx_xvsub_h(_in0, _in7); \ ++ } ++ ++#define LASX_BUTTERFLY_8_W(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ ++ _out7) \ ++ { \ ++ _out0 = __lasx_xvadd_w(_in0, _in7); \ ++ _out1 = __lasx_xvadd_w(_in1, _in6); \ ++ _out2 = __lasx_xvadd_w(_in2, _in5); \ ++ _out3 = __lasx_xvadd_w(_in3, _in4); \ ++ _out4 = __lasx_xvsub_w(_in3, _in4); \ ++ _out5 = __lasx_xvsub_w(_in2, _in5); \ ++ _out6 = __lasx_xvsub_w(_in1, _in6); \ ++ _out7 = __lasx_xvsub_w(_in0, _in7); \ ++ } ++ ++#define LASX_BUTTERFLY_8_D(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ ++ _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ ++ _out7) \ ++ { \ ++ _out0 = __lasx_xvadd_d(_in0, _in7); \ ++ _out1 = __lasx_xvadd_d(_in1, _in6); \ ++ _out2 = __lasx_xvadd_d(_in2, _in5); \ ++ _out3 = __lasx_xvadd_d(_in3, _in4); \ ++ _out4 = __lasx_xvsub_d(_in3, _in4); \ ++ _out5 = __lasx_xvsub_d(_in2, _in5); \ ++ _out6 = __lasx_xvsub_d(_in1, _in6); \ ++ _out7 = __lasx_xvsub_d(_in0, _in7); \ ++ } ++ ++#endif // LASX ++ ++/* ++ * ============================================================================= ++ * Description : Print out elements in vector. ++ * Arguments : Inputs - RTYPE, _element_num, _in0, _enter ++ * Outputs - ++ * Details : Print out '_element_num' elements in 'RTYPE' vector '_in0', if ++ * '_enter' is TRUE, prefix "\nVP:" will be added first. ++ * Example : VECT_PRINT(v4i32,4,in0,1); // in0: 1,2,3,4 ++ * VP:1,2,3,4, ++ * ============================================================================= ++ */ ++#define VECT_PRINT(RTYPE, element_num, in0, enter) \ ++ { \ ++ RTYPE _tmp0 = (RTYPE)in0; \ ++ int _i = 0; \ ++ if (enter) printf("\nVP:"); \ ++ for (_i = 0; _i < element_num; _i++) printf("%d,", _tmp0[_i]); \ ++ } ++ ++#endif /* LOONGSON_INTRINSICS_H */ +diff --git a/pixman/meson.build b/pixman/meson.build +index 62ec66b..2f515d3 100644 +--- a/pixman/meson.build ++++ b/pixman/meson.build +@@ -59,6 +59,8 @@ simds = [ + ['pixman-arma64-neon-asm.S', 'pixman-arma64-neon-asm-bilinear.S']], + ['mips-dspr2', have_mips_dspr2, mips_dspr2_flags, + ['pixman-mips-dspr2-asm.S', 'pixman-mips-memcpy-asm.S']], ++ ['lsx', have_lsx, lsx_flags, []], ++ ['lasx', have_lasx, lasx_flags, []], + ] + + foreach simd : simds +@@ -85,6 +87,7 @@ pixman_files = files( + 'pixman-mips.c', + 'pixman-arm.c', + 'pixman-ppc.c', ++ 'pixman-loongarch.c', + 'pixman-edge.c', + 'pixman-edge-accessors.c', + 'pixman-fast-path.c', +diff --git a/pixman/pixman-implementation.c b/pixman/pixman-implementation.c +index 69fa70b..c769311 100644 +--- a/pixman/pixman-implementation.c ++++ b/pixman/pixman-implementation.c +@@ -399,6 +399,7 @@ _pixman_choose_implementation (void) + imp = _pixman_arm_get_implementations (imp); + imp = _pixman_ppc_get_implementations (imp); + imp = _pixman_mips_get_implementations (imp); ++ imp = _pixman_loongarch_get_implementations (imp); + + imp = _pixman_implementation_create_noop (imp); + +diff --git a/pixman/pixman-lasx.c b/pixman/pixman-lasx.c +new file mode 100644 +index 0000000..d6d0169 +--- /dev/null ++++ b/pixman/pixman-lasx.c +@@ -0,0 +1,4887 @@ ++/* ++ * Copyright © 2023 Loongson Technology Corporation Limited ++ * Contributed by Shiyou Yin(yinshiyou-hf@loongson.cn) ++ * Lu Wang(wanglu@loongson.cn) ++ * Song Ding(songding@loongson.cn) ++ * ++ * Permission to use, copy, modify, distribute, and sell this software and its ++ * documentation for any purpose is hereby granted without fee, provided that ++ * the above copyright notice appear in all copies and that both that ++ * copyright notice and this permission notice appear in supporting ++ * documentation, and that the name of Red Hat not be used in advertising or ++ * publicity pertaining to distribution of the software without specific, ++ * written prior permission. Red Hat makes no representations about the ++ * suitability of this software for any purpose. It is provided "as is" ++ * without implied warranty. ++ * ++ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS ++ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND ++ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY ++ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ++ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING ++ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS ++ * SOFTWARE. ++ * ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include ++#endif ++ ++#include "pixman-private.h" ++#include "pixman-combine32.h" ++#include "loongson_intrinsics.h" ++ ++static __m256i mask_0080; ++static __m256i mask_00ff; ++static __m256i mask_0101; ++static __m256i mask_ffff; ++static __m256i mask_ff000000; ++static __m256i mask_alpha; ++ ++static __m256i mask_565_r; ++static __m256i mask_565_g1, mask_565_g2; ++static __m256i mask_565_b; ++static __m256i mask_red; ++static __m256i mask_green; ++static __m256i mask_blue; ++ ++static __m256i mask_565_fix_rb; ++static __m256i mask_565_fix_g; ++ ++static __m256i mask_565_rb; ++static __m256i mask_565_pack_multiplier; ++ ++static force_inline __m256i ++create_mask_16_256 (uint16_t mask) ++{ ++ return __lasx_xvrepli_h (mask); ++} ++ ++static force_inline __m256i ++create_mask_1x32_256 (uint32_t mask) ++{ ++ return __lasx_xvreplgr2vr_w (mask); ++} ++ ++static force_inline __m256i ++create_mask_1x64_256 (int64_t mask) ++{ ++ return __lasx_xvreplgr2vr_d (mask); ++} ++ ++static force_inline uint32_t ++over (uint32_t src, uint32_t dest) ++{ ++ uint32_t a = ~src >> 24; ++ ++ UN8x4_MUL_UN8_ADD_UN8x4(dest, a, src); ++ ++ return dest; ++} ++ ++static force_inline uint32_t ++in (uint32_t x, uint8_t y) ++{ ++ uint16_t a = y; ++ ++ UN8x4_MUL_UN8(x, a); ++ ++ return x; ++} ++ ++static force_inline uint32_t ++combine_mask(const uint32_t *src, const uint32_t *mask, int i) ++{ ++ uint32_t s, m; ++ ++ if (mask) { ++ m = *(mask + i) >> A_SHIFT; ++ if (!m) ++ return 0; ++ } ++ s = *(src + i); ++ if (mask) ++ UN8x4_MUL_UN8(s, m); ++ return s; ++} ++ ++static void ++combine_mask_ca(uint32_t *src, uint32_t *mask) ++{ ++ uint32_t a = *mask; ++ uint32_t x; ++ uint16_t xa; ++ ++ if (!a) { ++ *(src) = 0; ++ return; ++ } ++ ++ x = *(src); ++ if (a == ~0) { ++ x = x >> A_SHIFT; ++ x |= x << G_SHIFT; ++ x |= x << R_SHIFT; ++ *(mask) = x; ++ return; ++ } ++ xa = x >> A_SHIFT; ++ UN8x4_MUL_UN8x4(x, a); ++ *(src) = x; ++ ++ UN8x4_MUL_UN8(a, xa); ++ *(mask) = a; ++} ++ ++static void ++combine_mask_value_ca(uint32_t *src, const uint32_t *mask) ++{ ++ uint32_t a = *mask; ++ uint32_t x; ++ ++ if (!a) { ++ *(src) = 0; ++ return; ++ } ++ ++ if (a == ~0) ++ return; ++ ++ x = *(src); ++ UN8x4_MUL_UN8x4(x, a); ++ *(src) = x; ++} ++ ++static void ++combine_mask_alpha_ca(const uint32_t *src, uint32_t *mask) ++{ ++ uint32_t a = *(mask); ++ uint32_t x; ++ ++ if (!a) ++ return; ++ x = *(src) >> A_SHIFT; ++ ++ if (x == MASK) ++ return; ++ ++ if (a == -1) { ++ x |= x << G_SHIFT; ++ x |= x << R_SHIFT; ++ *(mask) = x; ++ return; ++ } ++ UN8x4_MUL_UN8(a, x); ++ *(mask) = a; ++} ++ ++/* Compute the product of two unsigned fixed-point 8-bit values from 0 to 1 ++ * and map its result to the same range. ++ * ++ * Jim Blinn gives multiple ways to compute this in "Jim Blinn's Corner: ++ * Notation, Notation, Notation", the first of which is ++ * ++ * prod(a, b) = (a * b + 128) / 255. ++ * ++ * By approximating the division by 255 as 257/65536, it can be replaced by a ++ * multiply and a right shift. This is the implementation that we use in ++ * pix_multiply(), but we _mm_mulhi_pu16() by 257 (part of SSE1 or Extended ++ * 3DNow!, and unavailable at the time of the book's publication) to perform ++ * the multiplication and right shift in a single operation. ++ * ++ * prod(a, b) = ((a * b + 128) * 257) >> 16. ++ * ++ * A third way (how pix_multiply() was implemented prior to 14208344) exists ++ * also that performs the multiplication by 257 with adds and shifts. ++ * ++ * Where temp = a * b + 128 ++ * ++ * prod(a, b) = (temp + (temp >> 8)) >> 8. ++ * ++ * The lasx_pix_multiply(src, mask) implemented with the third way, and caculates ++ * two sets of data each time. ++ */ ++ ++static force_inline __m256i ++lasx_pix_multiply (__m256i data, __m256i alpha) ++{ ++ return __lasx_xvmuh_hu (__lasx_xvmadd_h(mask_0080, data, alpha), ++ mask_0101); ++} ++ ++static force_inline __m256i ++lasx_over_u(__m256i src, __m256i dest) ++{ ++ __m256i r1, r2, r3, t; ++ __m256i rb_mask = __lasx_xvreplgr2vr_w(0x00ff00ff); ++ __m256i rb_one_half = __lasx_xvreplgr2vr_w(0x00800080); ++ __m256i rb_mask_plus_one = __lasx_xvreplgr2vr_w(0x10000100); ++ __m256i a = __lasx_xvsrli_w(__lasx_xvnor_v(src, src), 24); ++ ++ r1 = __lasx_xvand_v(dest, rb_mask); ++ r1 = __lasx_xvmadd_w(rb_one_half, r1, a); ++ t = __lasx_xvand_v(rb_mask, __lasx_xvsrli_w(r1, 8)); ++ r1 = __lasx_xvadd_w(r1, t); ++ r1 = __lasx_xvsrli_w(r1, 8); ++ r1 = __lasx_xvand_v(r1, rb_mask); ++ r2 = __lasx_xvand_v(src, rb_mask); ++ ++ r1 = __lasx_xvadd_w(r1, r2); ++ t = __lasx_xvand_v(rb_mask, __lasx_xvsrli_w(r1, 8)); ++ r1 = __lasx_xvor_v(r1, __lasx_xvsub_w(rb_mask_plus_one, t)); ++ r1 = __lasx_xvand_v(r1, rb_mask); ++ ++ r2 = __lasx_xvsrli_w(dest, 8); ++ r2 = __lasx_xvand_v(r2, rb_mask); ++ r2 = __lasx_xvmadd_w(rb_one_half, r2, a); ++ t = __lasx_xvand_v(rb_mask, __lasx_xvsrli_w(r2, 8)); ++ r2 = __lasx_xvadd_w(r2, t); ++ r2 = __lasx_xvsrli_w(r2, 8); ++ r2 = __lasx_xvand_v(r2, rb_mask); ++ r3 = __lasx_xvand_v(rb_mask, __lasx_xvsrli_w(src, 8)); ++ ++ r2 = __lasx_xvadd_w(r2, r3); ++ t = __lasx_xvand_v(rb_mask, __lasx_xvsrli_w(r2, 8)); ++ r2 = __lasx_xvor_v(r2, __lasx_xvsub_w(rb_mask_plus_one, t)); ++ r2 = __lasx_xvand_v(r2, rb_mask); ++ ++ t = __lasx_xvor_v(r1, __lasx_xvslli_w(r2, 8)); ++ ++ return t; ++} ++ ++static force_inline __m256i ++lasx_in_u(__m256i x, __m256i a) ++{ ++ __m256i r1, r2, t; ++ __m256i rb_mask = __lasx_xvreplgr2vr_w(0xff00ff); ++ __m256i rb_one_half = __lasx_xvreplgr2vr_w(0x800080); ++ ++ r1 = __lasx_xvand_v(x, rb_mask); ++ r1 = __lasx_xvmadd_w(rb_one_half, r1, a); ++ t = __lasx_xvand_v(__lasx_xvsrli_w(r1, 8), rb_mask); ++ r1 = __lasx_xvadd_w(r1, t); ++ r1 = __lasx_xvsrli_w(r1, 8); ++ r1 = __lasx_xvand_v(r1, rb_mask); ++ r2 = __lasx_xvsrli_w(x, 8); ++ ++ r2 = __lasx_xvand_v(r2, rb_mask); ++ r2 = __lasx_xvmadd_w(rb_one_half, r2, a); ++ t = __lasx_xvand_v(__lasx_xvsrli_w(r2, 8), rb_mask); ++ r2 = __lasx_xvadd_w(r2, t); ++ r2 = __lasx_xvsrli_w(r2, 8); ++ r2 = __lasx_xvand_v(r2, rb_mask); ++ ++ t = __lasx_xvor_v(r1, __lasx_xvslli_w(r2, 8)); ++ ++ return t; ++} ++ ++static void ++lasx_combine_src_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ __m256i src0, mask0, dest0; ++ __m256i zero = __lasx_xvldi(0); ++ __m256i out0, out1, out2, out3, tmp0, tmp1; ++ ++ if(mask) { ++ while (width >= 8) { ++ src0 = __lasx_xvld(src, 0); ++ mask0 = __lasx_xvld(mask, 0); ++ tmp0 = __lasx_xvilvl_b(zero, src0); ++ tmp1 = __lasx_xvilvh_b(zero, src0); ++ out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(zero, mask0); ++ tmp1 = __lasx_xvilvh_b(zero, mask0); ++ out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out1 = __lasx_xvshuf4i_h(out1, 0xff); ++ out3 = __lasx_xvshuf4i_h(out3, 0xff); ++ out0 = lasx_pix_multiply(out0, out1); ++ out2 = lasx_pix_multiply(out2, out3); ++ dest0 = __lasx_xvpickev_b(out2, out0); ++ dest0 = __lasx_xvpermi_d(dest0, 0xd8); ++ __lasx_xvst(dest0, dest, 0); ++ mask += 8; ++ width -= 8; ++ src += 8; ++ dest += 8; ++ } ++ for (int i = 0; i < width; ++i) { ++ uint32_t s = combine_mask(src, mask, i); ++ *dest++ = s; ++ } ++ } else { ++ while (width >= 8) { ++ src0 = __lasx_xvld(src, 0); ++ __lasx_xvst(src0, dest, 0); ++ width -= 8; ++ src += 8; ++ dest += 8; ++ } ++ if (width) { ++ memcpy (dest, src, width * sizeof (uint32_t)); ++ } ++ } ++} ++ ++static void ++lasx_combine_over_u_mask (uint32_t *dest, ++ const uint32_t *src, ++ const uint32_t *mask, ++ int width) ++{ ++ __m256i bit_set = __lasx_xvreplgr2vr_h(0xff); ++ __m256i src0, mask0, dest0, dest1; ++ __m256i tmp0, tmp1; ++ __m256i zero = __lasx_xvldi(0); ++ __m256i out0, out1, out2, out3, out4, out5; ++ ++ while (width > 7) { ++ src0 = __lasx_xvld(src, 0); ++ dest0 = __lasx_xvld(dest, 0); ++ mask0 = __lasx_xvld(mask, 0); ++ tmp0 = __lasx_xvilvl_b(zero, src0); ++ tmp1 = __lasx_xvilvh_b(zero, src0); ++ out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(zero, mask0); ++ tmp1 = __lasx_xvilvh_b(zero, mask0); ++ out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out1 = __lasx_xvshuf4i_h(out1, 0xff); ++ out3 = __lasx_xvshuf4i_h(out3, 0xff); ++ out0 = lasx_pix_multiply(out0, out1); ++ out2 = lasx_pix_multiply(out2, out3); ++ out1 = __lasx_xvxor_v(out0, bit_set); ++ out3 = __lasx_xvxor_v(out2, bit_set); ++ out1 = __lasx_xvshuf4i_h(out1, 0xff); ++ out3 = __lasx_xvshuf4i_h(out3, 0xff); ++ tmp0 = __lasx_xvilvl_b(zero, dest0); ++ tmp1 = __lasx_xvilvh_b(zero, dest0); ++ out4 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out5 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out4 = lasx_pix_multiply(out4, out1); ++ out5 = lasx_pix_multiply(out5, out3); ++ ++ dest0 = __lasx_xvpickev_b(out2, out0); ++ dest0 = __lasx_xvpermi_d(dest0, 0xd8); ++ dest1 = __lasx_xvpickev_b(out5, out4); ++ dest1 = __lasx_xvpermi_d(dest1, 0xd8); ++ dest0 = __lasx_xvsadd_bu(dest0, dest1); ++ __lasx_xvst(dest0, dest, 0); ++ width -= 8; ++ mask += 8; ++ src += 8; ++ dest += 8; ++ } ++ ++ for (int i = 0; i < width; ++i) { ++ uint32_t m = ALPHA_8 (*(mask + i)); ++ if (m == 0xFF) { ++ uint32_t s = *(src + i); ++ uint32_t a = ALPHA_8 (s); ++ if (a == 0xFF) { ++ *(dest + i) = s; ++ } else if (s) { ++ uint32_t d = *(dest + i); ++ uint32_t ia = a ^ 0xFF; ++ UN8x4_MUL_UN8_ADD_UN8x4(d, ia, s); ++ *(dest + i) = d; ++ } ++ } else if (m) { ++ uint32_t s = *(src + i); ++ if (s) { ++ uint32_t d = *(dest + i); ++ UN8x4_MUL_UN8(s, m); ++ UN8x4_MUL_UN8_ADD_UN8x4(d, ALPHA_8 (~s), s); ++ *(dest + i) = d; ++ } ++ } ++ } ++} ++ ++static force_inline __m256i ++over_1x256 (__m256i src, __m256i alpha, __m256i dst) ++{ ++ alpha = __lasx_xvxor_v(alpha, mask_00ff); ++ alpha = lasx_pix_multiply(dst, alpha); ++ return __lasx_xvsadd_bu (src, alpha); ++} ++ ++static force_inline uint32_t ++core_combine_over_u32 (uint32_t src, uint32_t dst) ++{ ++ uint8_t a = src >> 24; ++ ++ if (a == 0xff) { ++ return src; ++ } ++ else if (src) { ++ __m256i zero = __lasx_xvldi(0); ++ __m256i xr_src = __lasx_xvinsgr2vr_w (zero, src, 0); ++ __m256i xr_dst = __lasx_xvinsgr2vr_w (zero, dst, 0); ++ __m256i xr_alpha; ++ __m256i tmp; ++ ++ xr_src = __lasx_xvilvl_b (zero, xr_src); ++ xr_dst = __lasx_xvilvl_b (zero, xr_dst); ++ xr_alpha = __lasx_xvshuf4i_h (xr_src, 0xff); ++ ++ tmp = __lasx_xvpickev_b (zero, over_1x256 (xr_src, xr_alpha, xr_dst)); ++ ++ return __lasx_xvpickve2gr_wu (tmp, 0); ++ } ++ ++ return dst; ++} ++ ++static void ++lasx_combine_over_u_no_mask (uint32_t *dst, const uint32_t *src, int width) ++{ ++ __m256i zero = __lasx_xvldi(0); ++ ++ while (width >= 8) { ++ __m256i xv_src, xv_dst; ++ __m256i xv_src_ev, xv_src_od; ++ __m256i alpha; ++ __m256i xv_dst_ev, xv_dst_od; ++ ++ xv_src = __lasx_xvld(src, 0); ++ xv_dst = __lasx_xvld(dst, 0); ++ ++ /* unpack src: 1x256 to 2x256 */ ++ xv_src_ev = __lasx_xvpackev_b(zero, xv_src); ++ xv_src_od = __lasx_xvpackod_b(zero, xv_src); ++ ++ /* expand alpha */ ++ alpha = __lasx_xvshuf4i_h(xv_src_od, 0xf5); ++ ++ /* unpack dst: 1x256 to 2x256 */ ++ xv_dst_ev = __lasx_xvpackev_b(zero, xv_dst); ++ xv_dst_od = __lasx_xvpackod_b(zero, xv_dst); ++ ++ xv_dst_ev = over_1x256(xv_src_ev, alpha, xv_dst_ev); ++ xv_dst_od = over_1x256(xv_src_od, alpha, xv_dst_od); ++ ++ xv_dst = __lasx_xvpackev_b(xv_dst_od, xv_dst_ev); ++ ++ __lasx_xvst(xv_dst, dst, 0); ++ width -= 8; ++ src += 8; ++ dst += 8; ++ } ++ ++ while (width--) { ++ uint32_t s = *src; ++ uint32_t d = *dst; ++ ++ *dst = core_combine_over_u32 (s, d); ++ ++ ++src; ++ ++dst; ++ } ++} ++ ++static void ++lasx_combine_over_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ if (mask) { ++ lasx_combine_over_u_mask (dest, src, mask, width); ++ } ++ else { ++ lasx_combine_over_u_no_mask (dest, src, width); ++ } ++} ++ ++ ++static void ++lasx_combine_over_reverse_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ __m256i src0, mask0, dest0, dest1; ++ __m256i zero = __lasx_xvldi(0); ++ __m256i out0, out1, out2, out3, out4, out5; ++ __m256i tmp0, tmp1; ++ ++ if (mask) { ++ while (width > 7) { ++ src0 = __lasx_xvld(src, 0); ++ dest0 = __lasx_xvld(dest, 0); ++ mask0 = __lasx_xvld(mask, 0); ++ ++ tmp0 = __lasx_xvilvl_b(zero, src0); ++ tmp1 = __lasx_xvilvh_b(zero, src0); ++ out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(zero, mask0); ++ tmp1 = __lasx_xvilvh_b(zero, mask0); ++ out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out1 = __lasx_xvshuf4i_h(out1, 0xff); ++ out3 = __lasx_xvshuf4i_h(out3, 0xff); ++ out0 = lasx_pix_multiply(out0, out1); ++ out2 = lasx_pix_multiply(out2, out3); ++ ++ dest1 = __lasx_xvxori_b(dest0, 0xff); ++ tmp0 = __lasx_xvilvl_b(zero, dest0); ++ tmp1 = __lasx_xvilvh_b(zero, dest0); ++ out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(zero, dest1); ++ tmp1 = __lasx_xvilvh_b(zero, dest1); ++ out4 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out5 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out4 = __lasx_xvshuf4i_h(out4, 0xff); ++ out5 = __lasx_xvshuf4i_h(out5, 0xff); ++ out0 = lasx_pix_multiply(out0, out4); ++ out2 = lasx_pix_multiply(out2, out5); ++ dest0 = __lasx_xvpickev_b(out2, out0); ++ dest0 = __lasx_xvpermi_d(dest0, 0xd8); ++ dest1 = __lasx_xvpickev_b(out3, out1); ++ dest1 = __lasx_xvpermi_d(dest1, 0xd8); ++ dest0 = __lasx_xvsadd_bu(dest0, dest1); ++ __lasx_xvst(dest0, dest, 0); ++ mask += 8; ++ width -= 8; ++ src += 8; ++ dest += 8; ++ } ++ } else { ++ while (width > 7) { ++ src0 = __lasx_xvld(src, 0); ++ dest0 = __lasx_xvld(dest, 0); ++ dest1 = __lasx_xvxori_b(dest0, 0xff); ++ tmp0 = __lasx_xvilvl_b(zero, src0); ++ tmp1 = __lasx_xvilvh_b(zero, src0); ++ out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(zero, dest0); ++ tmp1 = __lasx_xvilvh_b(zero, dest0); ++ out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(zero, dest1); ++ tmp1 = __lasx_xvilvh_b(zero, dest1); ++ out4 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out5 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out4 = __lasx_xvshuf4i_h(out4, 0xff); ++ out5 = __lasx_xvshuf4i_h(out5, 0xff); ++ out0 = lasx_pix_multiply(out0, out4); ++ out2 = lasx_pix_multiply(out2, out5); ++ dest0 = __lasx_xvpickev_b(out2, out0); ++ dest0 = __lasx_xvpermi_d(dest0, 0xd8); ++ dest1 = __lasx_xvpickev_b(out3, out1); ++ dest1 = __lasx_xvpermi_d(dest1, 0xd8); ++ dest0 = __lasx_xvsadd_bu(dest0, dest1); ++ __lasx_xvst(dest0, dest, 0); ++ width -= 8; ++ src += 8; ++ dest += 8; ++ } ++ } ++ ++ for (int i = 0; i < width; ++i) { ++ uint32_t s = combine_mask(src, mask, i); ++ uint32_t d = *(dest + i); ++ uint32_t ia = ALPHA_8 (~*(dest + i)); ++ UN8x4_MUL_UN8_ADD_UN8x4(s, ia, d); ++ *(dest + i) = s; ++ } ++} ++ ++static void ++lasx_combine_out_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ __m256i src0, mask0, dest0; ++ __m256i zero = __lasx_xvldi(0); ++ __m256i out0, out1, out2, out3; ++ __m256i tmp0, tmp1; ++ ++ if(mask) { ++ while (width > 7) { ++ src0 = __lasx_xvld(src, 0); ++ dest0 = __lasx_xvld(dest, 0); ++ mask0 = __lasx_xvld(mask, 0); ++ ++ tmp0 = __lasx_xvilvl_b(zero, src0); ++ tmp1 = __lasx_xvilvh_b(zero, src0); ++ out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(zero, mask0); ++ tmp1 = __lasx_xvilvh_b(zero, mask0); ++ out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out1 = __lasx_xvshuf4i_h(out1, 0xff); ++ out3 = __lasx_xvshuf4i_h(out3, 0xff); ++ out0 = lasx_pix_multiply(out0, out1); ++ out2 = lasx_pix_multiply(out2, out3); ++ ++ dest0 = __lasx_xvxori_b(dest0, 0xff); ++ tmp0 = __lasx_xvilvl_b(zero, dest0); ++ tmp1 = __lasx_xvilvh_b(zero, dest0); ++ out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out1 = __lasx_xvshuf4i_h(out1, 0xff); ++ out3 = __lasx_xvshuf4i_h(out3, 0xff); ++ out0 = lasx_pix_multiply(out0, out1); ++ out2 = lasx_pix_multiply(out2, out3); ++ dest0 = __lasx_xvpickev_b(out2, out0); ++ dest0 = __lasx_xvpermi_d(dest0, 0xd8); ++ __lasx_xvst(dest0, dest, 0); ++ mask += 8; ++ width -= 8; ++ src += 8; ++ dest += 8; ++ } ++ } else { ++ while (width > 7) { ++ src0 = __lasx_xvld(src, 0); ++ dest0 = __lasx_xvld(dest, 0); ++ tmp0 = __lasx_xvilvl_b(zero, src0); ++ tmp1 = __lasx_xvilvh_b(zero, src0); ++ out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ dest0 = __lasx_xvxori_b(dest0, 0xff); ++ tmp0 = __lasx_xvilvl_b(zero, dest0); ++ tmp1 = __lasx_xvilvh_b(zero, dest0); ++ out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out1 = __lasx_xvshuf4i_h(out1, 0xff); ++ out3 = __lasx_xvshuf4i_h(out3, 0xff); ++ out0 = lasx_pix_multiply(out0, out1); ++ out2 = lasx_pix_multiply(out2, out3); ++ dest0 = __lasx_xvpickev_b(out2, out0); ++ dest0 = __lasx_xvpermi_d(dest0, 0xd8); ++ __lasx_xvst(dest0, dest, 0); ++ width -= 8; ++ src += 8; ++ dest += 8; ++ } ++ } ++ ++ for (int i = 0; i < width; ++i) { ++ uint32_t s = combine_mask(src, mask, i); ++ uint32_t a = ALPHA_8 (~*(dest + i)); ++ UN8x4_MUL_UN8(s, a); ++ *(dest + i) = s; ++ } ++} ++ ++static void ++lasx_combine_out_reverse_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ __m256i bit_set = __lasx_xvreplgr2vr_h(0xff); ++ __m256i src0, mask0, dest0; ++ __m256i zero = __lasx_xvldi(0); ++ __m256i out0, out1, out2, out3; ++ __m256i tmp0, tmp1; ++ ++ if(mask) { ++ while (width > 7) { ++ src0 = __lasx_xvld(src, 0); ++ dest0 = __lasx_xvld(dest, 0); ++ mask0 = __lasx_xvld(mask, 0); ++ ++ tmp0 = __lasx_xvilvl_b(zero, src0); ++ tmp1 = __lasx_xvilvh_b(zero, src0); ++ out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(zero, mask0); ++ tmp1 = __lasx_xvilvh_b(zero, mask0); ++ out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out1 = __lasx_xvshuf4i_h(out1, 0xff); ++ out3 = __lasx_xvshuf4i_h(out3, 0xff); ++ out0 = lasx_pix_multiply(out0, out1); ++ out2 = lasx_pix_multiply(out2, out3); ++ out1 = __lasx_xvxor_v(out0, bit_set); ++ out3 = __lasx_xvxor_v(out2, bit_set); ++ out1 = __lasx_xvshuf4i_h(out1, 0xff); ++ out3 = __lasx_xvshuf4i_h(out3, 0xff); ++ tmp0 = __lasx_xvilvl_b(zero, dest0); ++ tmp1 = __lasx_xvilvh_b(zero, dest0); ++ out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out0 = lasx_pix_multiply(out0, out1); ++ out2 = lasx_pix_multiply(out2, out3); ++ dest0 = __lasx_xvpickev_b(out2, out0); ++ dest0 = __lasx_xvpermi_d(dest0, 0xd8); ++ __lasx_xvst(dest0, dest, 0); ++ mask += 8; ++ width -= 8; ++ src += 8; ++ dest += 8; ++ } ++ } else { ++ while (width > 7) { ++ src0 = __lasx_xvld(src, 0); ++ dest0 = __lasx_xvld(dest, 0); ++ tmp0 = __lasx_xvilvl_b(zero, src0); ++ tmp1 = __lasx_xvilvh_b(zero, src0); ++ out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out1 = __lasx_xvxor_v(out0, bit_set); ++ out3 = __lasx_xvxor_v(out2, bit_set); ++ out1 = __lasx_xvshuf4i_h(out1, 0xff); ++ out3 = __lasx_xvshuf4i_h(out3, 0xff); ++ tmp0 = __lasx_xvilvl_b(zero, dest0); ++ tmp1 = __lasx_xvilvh_b(zero, dest0); ++ out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out0 = lasx_pix_multiply(out0, out1); ++ out2 = lasx_pix_multiply(out2, out3); ++ dest0 = __lasx_xvpickev_b(out2, out0); ++ dest0 = __lasx_xvpermi_d(dest0, 0xd8); ++ __lasx_xvst(dest0, dest, 0); ++ width -= 8; ++ src += 8; ++ dest += 8; ++ } ++ } ++ for (int i = 0; i < width; ++i) { ++ uint32_t s = combine_mask(src, mask, i); ++ uint32_t d = *(dest + i); ++ uint32_t a = ALPHA_8 (~s); ++ UN8x4_MUL_UN8 (d, a); ++ *(dest + i) = d; ++ } ++} ++ ++static void ++lasx_combine_add_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ __m256i src0, mask0, dest0, dest1; ++ __m256i zero = __lasx_xvldi(0); ++ __m256i out0, out1, out2, out3; ++ __m256i tmp0, tmp1; ++ ++ if (mask) { ++ while (width > 7) { ++ src0 = __lasx_xvld(src, 0); ++ dest0 = __lasx_xvld(dest, 0); ++ mask0 = __lasx_xvld(mask, 0); ++ ++ tmp0 = __lasx_xvilvl_b(zero, src0); ++ tmp1 = __lasx_xvilvh_b(zero, src0); ++ out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(zero, mask0); ++ tmp1 = __lasx_xvilvh_b(zero, mask0); ++ out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out1 = __lasx_xvshuf4i_h(out1, 0xff); ++ out3 = __lasx_xvshuf4i_h(out3, 0xff); ++ out0 = lasx_pix_multiply(out0, out1); ++ out2 = lasx_pix_multiply(out2, out3); ++ ++ dest1 = __lasx_xvpickev_b(out2, out0); ++ dest1 = __lasx_xvpermi_d(dest1, 0xd8); ++ dest0 = __lasx_xvsadd_bu(dest0, dest1); ++ __lasx_xvst(dest0, dest, 0); ++ mask += 8; ++ width -= 8; ++ src += 8; ++ dest += 8; ++ } ++ } else { ++ while (width > 7) { ++ src0 = __lasx_xvld(src, 0); ++ dest0 = __lasx_xvld(dest, 0); ++ tmp0 = __lasx_xvilvl_b(zero, src0); ++ tmp1 = __lasx_xvilvh_b(zero, src0); ++ out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ dest1 = __lasx_xvpickev_b(out2, out0); ++ dest1 = __lasx_xvpermi_d(dest1, 0xd8); ++ dest0 = __lasx_xvsadd_bu(dest0, dest1); ++ __lasx_xvst(dest0, dest, 0); ++ width -= 8; ++ src += 8; ++ dest += 8; ++ } ++ } ++ ++ for (int i = 0; i < width; ++i) { ++ uint32_t s = combine_mask(src, mask, i); ++ uint32_t d = *(dest + i); ++ UN8x4_ADD_UN8x4(d, s); ++ *(dest + i) = d; ++ } ++} ++ ++/* ++ * Multiply ++ * ++ * ad * as * B(d / ad, s / as) ++ * = ad * as * d/ad * s/as ++ * = d * s ++ * ++ */ ++static void ++lasx_combine_multiply_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ __m256i bit_set = __lasx_xvreplgr2vr_h(0xff); ++ __m256i src0, mask0, dest0, dest1; ++ __m256i zero = __lasx_xvldi(0); ++ __m256i out0, out1, out2, out3, out4, out5, out6, out7; ++ __m256i tmp0, tmp1; ++ ++ if (mask) { ++ while (width > 7) { ++ src0 = __lasx_xvld(src, 0); ++ dest0 = __lasx_xvld(dest, 0); ++ mask0 = __lasx_xvld(mask, 0); ++ ++ tmp0 = __lasx_xvilvl_b(zero, src0); ++ tmp1 = __lasx_xvilvh_b(zero, src0); ++ out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(zero, mask0); ++ tmp1 = __lasx_xvilvh_b(zero, mask0); ++ out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out1 = __lasx_xvshuf4i_h(out1, 0xff); ++ out3 = __lasx_xvshuf4i_h(out3, 0xff); ++ out0 = lasx_pix_multiply(out0, out1); ++ out2 = lasx_pix_multiply(out2, out3); ++ ++ out1 = __lasx_xvxor_v(out0, bit_set); ++ out3 = __lasx_xvxor_v(out2, bit_set); ++ out1 = __lasx_xvshuf4i_h(out1, 0xff); ++ out3 = __lasx_xvshuf4i_h(out3, 0xff); ++ dest1 = __lasx_xvxori_b(dest0, 0xff); ++ dest1 = __lasx_xvshuf4i_b(dest1, 0xff); ++ tmp0 = __lasx_xvilvl_b(zero, dest0); ++ tmp1 = __lasx_xvilvh_b(zero, dest0); ++ out4 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out5 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(zero, dest1); ++ tmp1 = __lasx_xvilvh_b(zero, dest1); ++ out6 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out7 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out6 = lasx_pix_multiply(out0, out6); ++ out7 = lasx_pix_multiply(out2, out7); ++ out1 = lasx_pix_multiply(out4, out1); ++ out3 = lasx_pix_multiply(out5, out3); ++ dest0 = __lasx_xvpickev_b(out7, out6); ++ dest0 = __lasx_xvpermi_d(dest0, 0xd8); ++ dest1 = __lasx_xvpickev_b(out3, out1); ++ dest1 = __lasx_xvpermi_d(dest1, 0xd8); ++ dest0 = __lasx_xvsadd_bu(dest0, dest1); ++ ++ out4 = lasx_pix_multiply(out4, out0); ++ out5 = lasx_pix_multiply(out5, out2); ++ dest1 = __lasx_xvpickev_b(out5, out4); ++ dest1 = __lasx_xvpermi_d(dest1, 0xd8); ++ dest0 = __lasx_xvsadd_bu(dest0, dest1); ++ __lasx_xvst(dest0, dest, 0); ++ mask += 8; ++ width -= 8; ++ src += 8; ++ dest += 8; ++ } ++ } else { ++ while (width > 7) { ++ src0 = __lasx_xvld(src, 0); ++ dest0 = __lasx_xvld(dest, 0); ++ tmp0 = __lasx_xvilvl_b(zero, src0); ++ tmp1 = __lasx_xvilvh_b(zero, src0); ++ out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out1 = __lasx_xvxor_v(out0, bit_set); ++ out3 = __lasx_xvxor_v(out2, bit_set); ++ out1 = __lasx_xvshuf4i_h(out1, 0xff); ++ out3 = __lasx_xvshuf4i_h(out3, 0xff); ++ dest1 = __lasx_xvxori_b(dest0, 0xff); ++ dest1 = __lasx_xvshuf4i_b(dest1, 0xff); ++ tmp0 = __lasx_xvilvl_b(zero, dest0); ++ tmp1 = __lasx_xvilvh_b(zero, dest0); ++ out4 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out5 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(zero, dest1); ++ tmp1 = __lasx_xvilvh_b(zero, dest1); ++ out6 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out7 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out6 = lasx_pix_multiply(out0, out6); ++ out7 = lasx_pix_multiply(out2, out7); ++ out1 = lasx_pix_multiply(out4, out1); ++ out3 = lasx_pix_multiply(out5, out3); ++ dest0 = __lasx_xvpickev_b(out7, out6); ++ dest0 = __lasx_xvpermi_d(dest0, 0xd8); ++ dest1 = __lasx_xvpickev_b(out3, out1); ++ dest1 = __lasx_xvpermi_d(dest1, 0xd8); ++ dest0 = __lasx_xvsadd_bu(dest0, dest1); ++ ++ out4 = lasx_pix_multiply(out4, out0); ++ out5 = lasx_pix_multiply(out5, out2); ++ dest1 = __lasx_xvpickev_b(out5, out4); ++ dest1 = __lasx_xvpermi_d(dest1, 0xd8); ++ dest0 = __lasx_xvsadd_bu(dest0, dest1); ++ __lasx_xvst(dest0, dest, 0); ++ width -= 8; ++ src += 8; ++ dest += 8; ++ } ++ } ++ ++ for (int i = 0; i < width; ++i) { ++ uint32_t s = combine_mask(src, mask, i); ++ uint32_t d = *(dest + i); ++ uint32_t ss = s; ++ uint32_t src_ia = ALPHA_8(~s); ++ uint32_t dest_ia = ALPHA_8(~d); ++ ++ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8(ss, dest_ia, d, src_ia); ++ UN8x4_MUL_UN8x4(d, s); ++ UN8x4_ADD_UN8x4(d, ss); ++ ++ *(dest + i) = d; ++ } ++} ++ ++static void ++lasx_combine_src_ca (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ __m256i src0, mask0, dest0; ++ __m256i zero = __lasx_xvldi(0); ++ __m256i out0, out1, out2, out3; ++ __m256i tmp0, tmp1; ++ ++ while (width > 7) { ++ src0 = __lasx_xvld(src, 0); ++ mask0 = __lasx_xvld(mask, 0); ++ tmp0 = __lasx_xvilvl_b(zero, src0); ++ tmp1 = __lasx_xvilvh_b(zero, src0); ++ out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(zero, mask0); ++ tmp1 = __lasx_xvilvh_b(zero, mask0); ++ out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out0 = lasx_pix_multiply(out0, out1); ++ out2 = lasx_pix_multiply(out2, out3); ++ dest0 = __lasx_xvpickev_b(out2, out0); ++ dest0 = __lasx_xvpermi_d(dest0, 0xd8); ++ __lasx_xvst(dest0, dest, 0); ++ mask += 8; ++ width -= 8; ++ src += 8; ++ dest += 8; ++ } ++ ++ for (int i = 0; i < width; ++i) { ++ uint32_t s = *(src + i); ++ uint32_t m = *(mask + i); ++ combine_mask_value_ca(&s, &m); ++ *(dest + i) = s; ++ } ++} ++ ++static void ++lasx_combine_over_ca (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ __m256i bit_set = __lasx_xvreplgr2vr_h(0xff); ++ __m256i src0, mask0, dest0, dest1; ++ __m256i zero = __lasx_xvldi(0); ++ __m256i out0, out1, out2, out3, out4, out5; ++ __m256i tmp0, tmp1; ++ ++ while (width > 7) { ++ src0 = __lasx_xvld(src, 0); ++ dest0 = __lasx_xvld(dest, 0); ++ mask0 = __lasx_xvld(mask, 0); ++ ++ tmp0 = __lasx_xvilvl_b(zero, src0); ++ tmp1 = __lasx_xvilvh_b(zero, src0); ++ out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(zero, mask0); ++ tmp1 = __lasx_xvilvh_b(zero, mask0); ++ out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out4 = lasx_pix_multiply(out0, out1); ++ out5 = lasx_pix_multiply(out2, out3); ++ out0 = __lasx_xvshuf4i_h(out0, 0xff); ++ out2 = __lasx_xvshuf4i_h(out2, 0xff); ++ out1 = lasx_pix_multiply(out1, out0); ++ out3 = lasx_pix_multiply(out3, out2); ++ ++ out1 = __lasx_xvxor_v(out1, bit_set); ++ out3 = __lasx_xvxor_v(out3, bit_set); ++ tmp0 = __lasx_xvilvl_b(zero, dest0); ++ tmp1 = __lasx_xvilvh_b(zero, dest0); ++ out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out1 = lasx_pix_multiply(out1, out0); ++ out3 = lasx_pix_multiply(out3, out2); ++ ++ dest0 = __lasx_xvpickev_b(out5, out4); ++ dest0 = __lasx_xvpermi_d(dest0, 0xd8); ++ dest1 = __lasx_xvpickev_b(out3, out1); ++ dest1 = __lasx_xvpermi_d(dest1, 0xd8); ++ dest0 = __lasx_xvsadd_bu(dest0, dest1); ++ __lasx_xvst(dest0, dest, 0); ++ mask += 8; ++ width -= 8; ++ src += 8; ++ dest += 8; ++ } ++ ++ for (int i = 0; i < width; ++i) { ++ uint32_t s = *(src + i); ++ uint32_t m = *(mask + i); ++ uint32_t a; ++ ++ combine_mask_ca (&s, &m); ++ a = ~m; ++ if (a) { ++ uint32_t d = *(dest + i); ++ UN8x4_MUL_UN8x4_ADD_UN8x4(d, a, s); ++ s = d; ++ } ++ *(dest + i) = s; ++ } ++} ++ ++static void ++lasx_combine_out_reverse_ca (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ __m256i bit_set = __lasx_xvreplgr2vr_h(0xff); ++ __m256i src0, mask0, dest0; ++ __m256i zero = __lasx_xvldi(0); ++ __m256i out0, out1, out2, out3; ++ __m256i tmp0, tmp1; ++ ++ while (width > 7) { ++ src0 = __lasx_xvld(src, 0); ++ dest0 = __lasx_xvld(dest, 0); ++ mask0 = __lasx_xvld(mask, 0); ++ ++ tmp0 = __lasx_xvilvl_b(zero, src0); ++ tmp1 = __lasx_xvilvh_b(zero, src0); ++ out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(zero, mask0); ++ tmp1 = __lasx_xvilvh_b(zero, mask0); ++ out1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out0 = __lasx_xvshuf4i_h(out0, 0xff); ++ out2 = __lasx_xvshuf4i_h(out2, 0xff); ++ out1 = lasx_pix_multiply(out1, out0); ++ out3 = lasx_pix_multiply(out3, out2); ++ ++ out1 = __lasx_xvxor_v(out1, bit_set); ++ out3 = __lasx_xvxor_v(out3, bit_set); ++ tmp0 = __lasx_xvilvl_b(zero, dest0); ++ tmp1 = __lasx_xvilvh_b(zero, dest0); ++ out0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ out2 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ out1 = lasx_pix_multiply(out1, out0); ++ out3 = lasx_pix_multiply(out3, out2); ++ dest0 = __lasx_xvpickev_b(out3, out1); ++ dest0 = __lasx_xvpermi_d(dest0, 0xd8); ++ __lasx_xvst(dest0, dest, 0); ++ mask += 8; ++ width -= 8; ++ src += 8; ++ dest += 8; ++ } ++ ++ for (int i = 0; i < width; ++i) { ++ uint32_t s = *(src + i); ++ uint32_t m = *(mask + i); ++ uint32_t a; ++ ++ combine_mask_alpha_ca(&s, &m); ++ a = ~m; ++ ++ if (a != ~0) { ++ uint32_t d = 0; ++ ++ if (a) { ++ d = *(dest + i); ++ UN8x4_MUL_UN8x4(d, a); ++ } ++ *(dest + i) = d; ++ } ++ } ++} ++ ++/* ++ * w : length in bytes ++ */ ++static void force_inline ++lasx_blt_one_line_u8 (uint8_t *pDst, uint8_t *pSrc, int w) ++{ ++ while (((uintptr_t)pDst & 31) && w) { ++ *pDst = *pSrc; ++ pSrc += 1; ++ pDst += 1; ++ w -= 1; ++ } ++ ++ while (w >= 64) { ++ __m256i src0, src1; ++ src0 = __lasx_xvld(pSrc, 0); ++ src1 = __lasx_xvld(pSrc, 32); ++ __lasx_xvst(src0, pDst, 0); ++ __lasx_xvst(src1, pDst, 32); ++ ++ w -= 64; ++ pSrc += 64; ++ pDst += 64; ++ } ++ ++ if (w >= 32) { ++ __lasx_xvst(__lasx_xvld(pSrc, 0), pDst, 0); ++ ++ w -= 32; ++ pSrc += 32; ++ pDst += 32; ++ } ++ ++ while (w >= 8) { ++ *(uint64_t *)pDst = *(uint64_t *)pSrc; ++ ++ w -= 8; ++ pSrc += 8; ++ pDst += 8; ++ } ++ ++ while (w--) { ++ /* copy one bytes once a time */ ++ *pDst++ = *pSrc++; ++ } ++} ++ ++/* ++ * w : length in half word ++ */ ++static void ++lasx_blt_one_line_u16 (uint16_t *pDst, uint16_t *pSrc, int w) ++{ ++ /* align the dst to 32 byte */ ++ while (((uintptr_t)pDst & 31) && w) { ++ *pDst++ = *pSrc++; ++ --w; ++ } ++ ++ while (w >= 64) { ++ /* copy 128 bytes */ ++ __m256i src0, src1, src2, src3; ++ ++ src0 = __lasx_xvld(pSrc, 0); ++ src1 = __lasx_xvld(pSrc, 32); ++ src2 = __lasx_xvld(pSrc, 64); ++ src3 = __lasx_xvld(pSrc, 96); ++ ++ __lasx_xvst(src0, pDst, 0); ++ __lasx_xvst(src1, pDst, 32); ++ __lasx_xvst(src2, pDst, 64); ++ __lasx_xvst(src3, pDst, 96); ++ ++ w -= 64; ++ pSrc += 64; ++ pDst += 64; ++ } ++ ++ if (w >= 32) { ++ /* copy 64 bytes */ ++ __m256i src0, src1; ++ ++ src0 = __lasx_xvld(pSrc, 0); ++ src1 = __lasx_xvld(pSrc, 32); ++ ++ __lasx_xvst(src0, pDst, 0); ++ __lasx_xvst(src1, pDst, 32); ++ ++ w -= 32; ++ pSrc += 32; ++ pDst += 32; ++ } ++ ++ if (w >= 16) { ++ /* copy 32 bytes */ ++ __lasx_xvst(__lasx_xvld(pSrc, 0), pDst, 0); ++ ++ w -= 16; ++ pSrc += 16; ++ pDst += 16; ++ } ++ ++ while (w--) { ++ /* copy 2 bytes once a time */ ++ *pDst++ = *pSrc++; ++ } ++} ++ ++/* ++ * w : length in word ++ */ ++static force_inline void ++lasx_blt_one_line_u32 (uint32_t *pDst, uint32_t *pSrc, int w) ++{ ++ /* align the dst to 32 byte */ ++ while (((uintptr_t)pDst & 31) && w) { ++ *pDst++ = *pSrc++; ++ --w; ++ } ++ ++ while (w >= 64) { ++ __m256i src0, src1, src2, src3; ++ __m256i src4, src5, src6, src7; ++ ++ src0 = __lasx_xvld(pSrc, 0); ++ src1 = __lasx_xvld(pSrc, 32); ++ src2 = __lasx_xvld(pSrc, 64); ++ src3 = __lasx_xvld(pSrc, 96); ++ src4 = __lasx_xvld(pSrc, 128); ++ src5 = __lasx_xvld(pSrc, 160); ++ src6 = __lasx_xvld(pSrc, 192); ++ src7 = __lasx_xvld(pSrc, 224); ++ ++ __lasx_xvst(src0, pDst, 0); ++ __lasx_xvst(src1, pDst, 32); ++ __lasx_xvst(src2, pDst, 64); ++ __lasx_xvst(src3, pDst, 96); ++ __lasx_xvst(src4, pDst, 128); ++ __lasx_xvst(src5, pDst, 160); ++ __lasx_xvst(src6, pDst, 192); ++ __lasx_xvst(src7, pDst, 224); ++ ++ w -= 64; ++ pSrc += 64; ++ pDst += 64; ++ } ++ ++ if (w >= 32) { ++ /* copy 32 bytes once a time */ ++ __m256i src0, src1, src2, src3; ++ ++ src0 = __lasx_xvld(pSrc, 0); ++ src1 = __lasx_xvld(pSrc, 32); ++ src2 = __lasx_xvld(pSrc, 64); ++ src3 = __lasx_xvld(pSrc, 96); ++ ++ __lasx_xvst(src0, pDst, 0); ++ __lasx_xvst(src1, pDst, 32); ++ __lasx_xvst(src2, pDst, 64); ++ __lasx_xvst(src3, pDst, 96); ++ ++ w -= 32; ++ pSrc += 32; ++ pDst += 32; ++ } ++ ++ if (w >= 16) { ++ /* copy 64 bytes once a time */ ++ __m256i src0, src1; ++ ++ src0 = __lasx_xvld(pSrc, 0); ++ src1 = __lasx_xvld(pSrc, 32); ++ ++ __lasx_xvst(src0, pDst, 0); ++ __lasx_xvst(src1, pDst, 32); ++ ++ w -= 16; ++ pSrc += 16; ++ pDst += 16; ++ } ++ ++ if (w >= 8) { ++ __m256i src; ++ /* copy 32 bytes once a time */ ++ src = __lasx_xvld(pSrc, 0); ++ __lasx_xvst(src, pDst, 0); ++ ++ w -= 8; ++ pSrc += 8; ++ pDst += 8; ++ } ++ ++ while (w--) { ++ /* copy 4 bytes once a time */ ++ *pDst++ = *pSrc++; ++ } ++} ++ ++static pixman_bool_t ++lasx_blt (pixman_implementation_t *imp, ++ uint32_t * src_bits, ++ uint32_t * dst_bits, ++ int src_stride, ++ int dst_stride, ++ int src_bpp, ++ int dst_bpp, ++ int src_x, ++ int src_y, ++ int dest_x, ++ int dest_y, ++ int width, ++ int height) ++{ ++ if (src_bpp != dst_bpp) ++ return FALSE; ++ ++ if (src_bpp == 8) { ++ uint8_t *src_b = (uint8_t *)src_bits; ++ uint8_t *dst_b = (uint8_t *)dst_bits; ++ ++ src_stride = src_stride * 4; ++ dst_stride = dst_stride * 4; ++ ++ src_b += src_stride * src_y + src_x; ++ dst_b += dst_stride * dest_y + dest_x; ++ ++ while (height--) { ++ lasx_blt_one_line_u8 (dst_b, src_b, width); ++ dst_b += dst_stride; ++ src_b += src_stride; ++ } ++ ++ return TRUE; ++ } ++ ++ if (src_bpp == 16) { ++ uint16_t *src_h = (uint16_t *)src_bits; ++ uint16_t *dst_h = (uint16_t *)dst_bits; ++ ++ src_stride = src_stride * 2; ++ dst_stride = dst_stride * 2; ++ ++ src_h += src_stride * src_y + src_x; ++ dst_h += dst_stride * dest_y + dest_x; ++ ++ while (height--) { ++ lasx_blt_one_line_u16 (dst_h, src_h, width); ++ dst_h += dst_stride; ++ src_h += src_stride; ++ } ++ ++ return TRUE; ++ } ++ ++ if (src_bpp == 32) { ++ src_bits += src_stride * src_y + src_x; ++ dst_bits += dst_stride * dest_y + dest_x; ++ ++ while (height--) { ++ lasx_blt_one_line_u32 (dst_bits, src_bits, width); ++ dst_bits += dst_stride; ++ src_bits += src_stride; ++ } ++ ++ return TRUE; ++ } ++ ++ return FALSE; ++} ++ ++static void ++lasx_fill_u8 (uint8_t *dst, ++ int stride, ++ int x, ++ int y, ++ int width, ++ int height, ++ uint8_t filler) ++{ ++ __m256i xvfill = __lasx_xvreplgr2vr_b(filler); ++ int byte_stride = stride * 4; ++ dst += y * byte_stride + x; ++ ++ while (height--) { ++ int w = width; ++ uint8_t *d = dst; ++ ++ while (w && ((uintptr_t)d & 31)) { ++ *d = filler; ++ w--; ++ d++; ++ } ++ ++ while (w >= 64) { ++ __lasx_xvst(xvfill, d, 0); ++ __lasx_xvst(xvfill, d, 32); ++ w -= 64; ++ d += 64; ++ } ++ ++ if (w >= 32) { ++ __lasx_xvst(xvfill, d, 0); ++ w -= 32; ++ d += 32; ++ } ++ ++ while (w) { ++ *d = filler; ++ w--; ++ d++; ++ } ++ ++ dst += byte_stride; ++ } ++} ++ ++static void ++lasx_fill_u16 (uint16_t *dst, ++ int stride, ++ int x, ++ int y, ++ int width, ++ int height, ++ uint16_t filler) ++{ ++ __m256i xvfill = __lasx_xvreplgr2vr_h(filler); ++ int short_stride = stride * 2; ++ dst += y * short_stride + x; ++ ++ while (height--) { ++ int w = width; ++ uint16_t *d = dst; ++ ++ while (w && ((uintptr_t)d & 31)) { ++ *d = filler; ++ w--; ++ d++; ++ } ++ ++ while (w >= 32) { ++ __lasx_xvst(xvfill, d, 0); ++ __lasx_xvst(xvfill, d, 32); ++ w -= 32; ++ d += 32; ++ } ++ ++ if (w >= 16) { ++ __lasx_xvst(xvfill, d, 0); ++ w -= 16; ++ d += 16; ++ } ++ ++ while (w) { ++ *d = filler; ++ w--; ++ d++; ++ } ++ ++ dst += short_stride; ++ } ++} ++ ++static void ++lasx_fill_u32 (uint32_t *bits, ++ int stride, ++ int x, ++ int y, ++ int width, ++ int height, ++ uint32_t filler) ++{ ++ __m256i xvfill = __lasx_xvreplgr2vr_w(filler); ++ bits += y * stride + x; ++ ++ while (height--) { ++ int w = width; ++ uint32_t *d = bits; ++ ++ while (w && ((uintptr_t)d & 31)) { ++ *d = filler; ++ w--; ++ d++; ++ } ++ ++ while (w >= 32) { ++ __lasx_xvst(xvfill, d, 0); ++ __lasx_xvst(xvfill, d, 32); ++ __lasx_xvst(xvfill, d, 64); ++ __lasx_xvst(xvfill, d, 96); ++ w -= 32; ++ d += 32; ++ } ++ ++ if (w >= 16) { ++ __lasx_xvst(xvfill, d, 0); ++ __lasx_xvst(xvfill, d, 32); ++ w -= 16; ++ d += 16; ++ } ++ ++ if (w >= 8) { ++ __lasx_xvst(xvfill, d, 0); ++ w -= 8; ++ d += 8; ++ } ++ ++ while (w) { ++ *d = filler; ++ w--; ++ d++; ++ } ++ ++ bits += stride; ++ } ++} ++ ++static pixman_bool_t ++lasx_fill (pixman_implementation_t *imp, ++ uint32_t * bits, ++ int stride, ++ int bpp, ++ int x, ++ int y, ++ int width, ++ int height, ++ uint32_t filler) ++{ ++ switch (bpp) { ++ case 8: ++ lasx_fill_u8 ((uint8_t *)bits, stride, x, y, width, height, (uint8_t)filler); ++ return TRUE; ++ ++ case 16: ++ lasx_fill_u16 ((uint16_t *)bits, stride, x, y, width, height, (uint16_t)filler); ++ return TRUE; ++ ++ case 32: ++ lasx_fill_u32 (bits, stride, x, y, width, height, filler); ++ return TRUE; ++ ++ default: ++ return FALSE; ++ } ++ ++ return TRUE; ++} ++ ++ ++static void ++lasx_composite_over_n_8_8888 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t src, srca; ++ uint32_t *dst_line, *dst, d; ++ uint8_t *mask_line, *mask, m; ++ int dst_stride, mask_stride; ++ int32_t w; ++ v8u32 vsrca, vsrc; ++ __m256i vff; ++ ++ src = _pixman_image_get_solid(imp, src_image, dest_image->bits.format); ++ vsrc = (v8u32)__lasx_xvreplgr2vr_w(src); ++ srca = src >> 24; ++ vsrca = (v8u32)__lasx_xvreplgr2vr_w(srca); ++ vff = __lasx_xvreplgr2vr_w(0xff); ++ ++ if (src == 0) ++ return; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ mask = mask_line; ++ mask_line += mask_stride; ++ w = width; ++ ++ while (w >= 8) { ++ v8u32 ma = {mask[0], mask[1], mask[2], mask[3], ++ mask[4], mask[5], mask[6], mask[7]}; ++ ++ if (__lasx_xbnz_w(__lasx_xvseqi_w((__m256i)ma, 0xff))){ ++ if (__lasx_xbnz_w(__lasx_xvseqi_w(vsrca, 0xff))) ++ *(__m256i*) dst = (__m256i)vsrc; ++ else if (__lasx_xbnz_w(__lasx_xvsub_w((__m256i)ma, vff))) ++ *(__m256i*) dst = lasx_over_u((__m256i)vsrc, *(__m256i*)dst); ++ } else if (__lasx_xbnz_w((__m256i)ma)) { ++ __m256i d0 = lasx_in_u((__m256i)vsrc, (__m256i)ma); ++ *(__m256i*) dst = lasx_over_u(d0, *(__m256i*)dst); ++ } else { ++ for(int i = 0; i < 8; i++) { ++ if (mask[i] == 0xff) { ++ if (vsrca[i] == 0xff) ++ *(dst + i) = vsrc[i]; ++ else ++ *(dst + i) = over(vsrc[i], *(dst + i)); ++ } else if (mask[i]) { ++ m = mask[i]; ++ d = in(vsrc[i], m); ++ *(dst + i) = over(d, *(dst + i)); ++ } ++ } ++ } ++ dst += 8; ++ w -= 8; ++ mask += 8; ++ } ++ ++ while (w--) { ++ m = *mask++; ++ if (m == 0xff) { ++ if (srca == 0xff) ++ *dst = src; ++ else ++ *dst = over(src, *dst); ++ } else if (m) { ++ d = in(src, m); ++ *dst = over(d, *dst); ++ } ++ dst++; ++ } ++ } ++} ++ ++static void ++lasx_composite_add_8_8 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint8_t *dst_line, *dst; ++ uint8_t *src_line, *src; ++ int dst_stride, src_stride; ++ int32_t w; ++ uint16_t t; ++ ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); ++ ++ while (height--) { ++ dst = dst_line; ++ src = src_line; ++ ++ dst_line += dst_stride; ++ src_line += src_stride; ++ w = width; ++ ++ lasx_combine_add_u(imp, op, (uint32_t *)dst, (uint32_t *)src, NULL, w >> 2); ++ dst += w & 0xfffc; ++ src += w & 0xfffc; ++ w &= 3; ++ ++ while (w--) { ++ t = (*dst) + (*src++); ++ *dst++ = t | (0 - (t >> 8)); ++ } ++ } ++} ++ ++static void ++lasx_composite_add_8888_8888 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t *dst_line; ++ uint32_t *src_line; ++ int dst_stride, src_stride; ++ ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ ++ while (height--) { ++ lasx_combine_add_u(imp, op, dst_line, src_line, NULL, width); ++ dst_line += dst_stride; ++ src_line += src_stride; ++ } ++} ++ ++static void ++lasx_composite_over_8888_8888 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ int dst_stride, src_stride; ++ uint32_t *dst_line; ++ uint32_t *src_line; ++ ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ ++ while (height--) { ++ lasx_combine_over_u_no_mask (dst_line, src_line, width); ++ dst_line += dst_stride; ++ src_line += src_stride; ++ } ++} ++ ++static void ++lasx_composite_copy_area (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ bits_image_t src_bits, dst_bits; ++ src_bits = info->src_image->bits; ++ dst_bits = info->dest_image->bits; ++ lasx_blt (imp, src_bits.bits, ++ dst_bits.bits, ++ src_bits.rowstride, ++ dst_bits.rowstride, ++ PIXMAN_FORMAT_BPP (src_bits.format), ++ PIXMAN_FORMAT_BPP (dst_bits.format), ++ info->src_x, info->src_y, info->dest_x, ++ info->dest_y, info->width, info->height); ++} ++ ++static void ++lasx_composite_src_x888_0565 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint16_t *dst_line, *dst; ++ uint32_t *src_line, *src, s; ++ int dst_stride, src_stride; ++ int32_t w; ++ ++ __m256i src0, src1; ++ __m256i rb0, rb1, t0, t1, g0, g1; ++ __m256i tmp; ++ __m256i mask_565_rb = __lasx_xvreplgr2vr_w(0x00f800f8); ++ __m256i mask_multiplier = __lasx_xvreplgr2vr_w(0x20000004); ++ __m256i mask_green_4x32 = __lasx_xvreplgr2vr_w(0x0000fc00); ++ ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ src = src_line; ++ src_line += src_stride; ++ w = width; ++ ++ while (w && (uintptr_t)src & 31) { ++ s = *src++; ++ *dst = convert_8888_to_0565(s); ++ dst++; ++ w--; ++ } ++ ++ while (w >= 16) { ++ src0 = __lasx_xvld(src, 0); ++ src1 = __lasx_xvld(src, 32); ++ src += 16; ++ w -= 16; ++ ++ rb0 = src0 & mask_565_rb; ++ rb1 = src1 & mask_565_rb; ++ t0 = __lasx_xvdp2_w_h(rb0, mask_multiplier); ++ t1 = __lasx_xvdp2_w_h(rb1, mask_multiplier); ++ g0 = src0 & mask_green_4x32; ++ g1 = src1 & mask_green_4x32; ++ t0 |= g0; ++ t1 |= g1; ++ t0 = __lasx_xvslli_w(t0, 11); ++ t1 = __lasx_xvslli_w(t1, 11); ++ t0 = __lasx_xvsrai_w(t0, 16); ++ t1 = __lasx_xvsrai_w(t1, 16); ++ t0 = __lasx_xvsat_h(t0, 15); ++ t1 = __lasx_xvsat_h(t1, 15); ++ tmp = __lasx_xvpickev_h(t1, t0); ++ tmp = __lasx_xvpermi_d(tmp, 0xd8); ++ __lasx_xvst(tmp, dst, 0); ++ dst += 16; ++ } ++ ++ if (w >= 8) { ++ src0 = __lasx_xvld(src, 0); ++ src += 8; ++ w -= 8; ++ ++ rb0 = src0 & mask_565_rb; ++ t0 = __lasx_xvdp2_w_h(rb0, mask_multiplier); ++ g0 = src0 & mask_green_4x32; ++ t0 |= g0; ++ t0 = __lasx_xvslli_w(t0, 11); ++ t0 = __lasx_xvsrai_w(t0, 16); ++ t0 = __lasx_xvsat_h(t0, 15); ++ tmp = __lasx_xvpickev_h(t0, t0); ++ __lasx_xvstelm_d(tmp, dst, 0, 0); ++ __lasx_xvstelm_d(tmp, dst, 8, 2); ++ dst += 8; ++ } ++ ++ while (w--) { ++ s = *src++; ++ *dst = convert_8888_to_0565(s); ++ dst++; ++ } ++ } ++} ++ ++static void ++lasx_composite_in_n_8_8 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS(info); ++ uint8_t *dst_line, *dst; ++ uint8_t *mask_line, *mask; ++ int dst_stride, mask_stride; ++ uint32_t m, src, srca; ++ int32_t w; ++ uint16_t t; ++ ++ __m256i alpha, tmp; ++ __m256i vmask, vmask_lo, vmask_hi; ++ __m256i vdst, vdst_lo, vdst_hi; ++ __m256i mask_zero = __lasx_xvldi(0); ++ ++ PIXMAN_IMAGE_GET_LINE(dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE(mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); ++ ++ src = _pixman_image_get_solid(imp, src_image, dest_image->bits.format); ++ srca = src >> 24; ++ alpha = __lasx_xvreplgr2vr_w(src); ++ alpha = __lasx_xvilvl_b(mask_zero, alpha); ++ alpha = __lasx_xvshuf4i_w(alpha, 0x44); ++ alpha = __lasx_xvshuf4i_h(alpha, 0xff); ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ mask = mask_line; ++ mask_line += mask_stride; ++ w = width; ++ ++ while (w >= 32) { ++ vmask = __lasx_xvld(mask, 0); ++ vdst = __lasx_xvld(dst, 0); ++ mask += 32; ++ w -= 32; ++ ++ vmask_lo = __lasx_vext2xv_hu_bu(vmask); ++ vdst_lo = __lasx_vext2xv_hu_bu(vdst); ++ vmask_hi = __lasx_xvpermi_q(vmask, vmask, 0x03); ++ vdst_hi = __lasx_xvpermi_q(vdst, vdst, 0x03); ++ vmask_hi = __lasx_vext2xv_hu_bu(vmask_hi); ++ vdst_hi = __lasx_vext2xv_hu_bu(vdst_hi); ++ vmask_lo = lasx_pix_multiply(alpha, vmask_lo); ++ vmask_hi = lasx_pix_multiply(alpha, vmask_hi); ++ vdst_lo = lasx_pix_multiply(vmask_lo, vdst_lo); ++ vdst_hi = lasx_pix_multiply(vmask_hi, vdst_hi); ++ vdst_lo = __lasx_xvsat_bu(vdst_lo, 7); ++ vdst_hi = __lasx_xvsat_bu(vdst_hi, 7); ++ tmp = __lasx_xvpickev_b(vdst_hi, vdst_lo); ++ tmp = __lasx_xvpermi_d(tmp, 0xd8); ++ __lasx_xvst(tmp, dst, 0); ++ dst += 32; ++ } ++ ++ if (w >= 16) { ++ vmask = __lasx_xvld(mask, 0); ++ vdst = __lasx_xvld(dst, 0); ++ mask += 16; ++ w -= 16; ++ ++ vmask_lo = __lasx_vext2xv_hu_bu(vmask); ++ vdst_lo = __lasx_vext2xv_hu_bu(vdst); ++ vmask_hi = __lasx_xvpermi_q(vmask, vmask, 0x03); ++ vdst_hi = __lasx_xvpermi_q(vdst, vdst, 0x03); ++ vmask_hi = __lasx_vext2xv_hu_bu(vmask_hi); ++ vdst_hi = __lasx_vext2xv_hu_bu(vdst_hi); ++ vmask_lo = lasx_pix_multiply(alpha, vmask_lo); ++ vmask_hi = lasx_pix_multiply(alpha, vmask_hi); ++ vdst_lo = lasx_pix_multiply(vmask_lo, vdst_lo); ++ vdst_hi = lasx_pix_multiply(vmask_hi, vdst_hi); ++ vdst_lo = __lasx_xvsat_bu(vdst_lo, 7); ++ vdst_hi = __lasx_xvsat_bu(vdst_hi, 7); ++ tmp = __lasx_xvpickev_b(vdst_hi, vdst_lo); ++ __lasx_xvstelm_d(tmp, dst, 0, 0); ++ __lasx_xvstelm_d(tmp, dst, 8, 2); ++ dst += 16; ++ } ++ ++ while (w--) { ++ m = *mask++; ++ m = MUL_UN8(m, srca, t); ++ if (m == 0) ++ *dst = 0; ++ else if (m != 0xff) ++ *dst = MUL_UN8(m, *dst, t); ++ dst++; ++ } ++ } ++} ++ ++static void ++lasx_composite_in_8_8 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint8_t *dst_line, *dst; ++ uint8_t *src_line, *src; ++ int src_stride, dst_stride; ++ int32_t w, s; ++ uint16_t t; ++ ++ __m256i tmp; ++ __m256i vsrc, vsrc_lo, vsrc_hi; ++ __m256i vdst, vdst_lo, vdst_hi; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ src = src_line; ++ src_line += src_stride; ++ w = width; ++ ++ while (w >= 32) { ++ vsrc = __lasx_xvld(src, 0); ++ vdst = __lasx_xvld(dst, 0); ++ src += 32; ++ w -= 32; ++ ++ vsrc_lo = __lasx_vext2xv_hu_bu(vsrc); ++ vdst_lo = __lasx_vext2xv_hu_bu(vdst); ++ vsrc_hi = __lasx_xvpermi_q(vsrc, vsrc, 0x03); ++ vdst_hi = __lasx_xvpermi_q(vdst, vdst, 0x03); ++ vsrc_hi = __lasx_vext2xv_hu_bu(vsrc_hi); ++ vdst_hi = __lasx_vext2xv_hu_bu(vdst_hi); ++ vdst_lo = lasx_pix_multiply(vsrc_lo, vdst_lo); ++ vdst_hi = lasx_pix_multiply(vsrc_hi, vdst_hi); ++ vdst_lo = __lasx_xvsat_bu(vdst_lo, 7); ++ vdst_hi = __lasx_xvsat_bu(vdst_hi, 7); ++ tmp = __lasx_xvpickev_b(vdst_hi, vdst_lo); ++ tmp = __lasx_xvpermi_d(tmp, 0xd8); ++ __lasx_xvst(tmp, dst, 0); ++ dst += 32; ++ } ++ ++ if (w >= 16) { ++ vsrc = __lasx_xvld(src, 0); ++ vdst = __lasx_xvld(dst, 0); ++ src += 16; ++ w -= 16; ++ ++ vsrc_lo = __lasx_vext2xv_hu_bu(vsrc); ++ vdst_lo = __lasx_vext2xv_hu_bu(vdst); ++ vsrc_hi = __lasx_xvpermi_q(vsrc, vsrc, 0x03); ++ vdst_hi = __lasx_xvpermi_q(vdst, vdst, 0x03); ++ vsrc_hi = __lasx_vext2xv_hu_bu(vsrc_hi); ++ vdst_hi = __lasx_vext2xv_hu_bu(vdst_hi); ++ vdst_lo = lasx_pix_multiply(vsrc_lo, vdst_lo); ++ vdst_hi = lasx_pix_multiply(vsrc_hi, vdst_hi); ++ vdst_lo = __lasx_xvsat_bu(vdst_lo, 7); ++ vdst_hi = __lasx_xvsat_bu(vdst_hi, 7); ++ tmp = __lasx_xvpickev_b(vdst_hi, vdst_lo); ++ __lasx_xvstelm_d(tmp, dst, 0, 0); ++ __lasx_xvstelm_d(tmp, dst, 8, 2); ++ dst += 16; ++ } ++ ++ while (w--) { ++ s = *src++; ++ if (s == 0) ++ *dst = 0; ++ else if (s != 0xff) ++ *dst = MUL_UN8(s, *dst, t); ++ dst++; ++ } ++ } ++} ++ ++static void ++lasx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t src, srca, ns; ++ uint32_t *dst_line, *dst, nd; ++ uint32_t *mask_line, *mask, ma; ++ int dst_stride, mask_stride; ++ int32_t w; ++ ++ __m256i d, m, t; ++ __m256i tmp0, tmp1; ++ __m256i s, sa, d0, d1, m0, m1, t0, t1; ++ __m256i zero = __lasx_xvldi(0); ++ __m256i bit_set = __lasx_xvreplgr2vr_h(0xff); ++ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); ++ srca = src >> 24; ++ if (src == 0) ++ return; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); ++ s = __lasx_xvreplgr2vr_w(src); ++ tmp0 = __lasx_xvilvl_b(zero, s); ++ tmp1 = __lasx_xvilvh_b(zero, s); ++ s = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ sa = __lasx_xvshuf4i_h(s, 0xff); ++ ++ while(height --) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ mask = mask_line; ++ mask_line += mask_stride; ++ w = width; ++ ++ while (w && ((uintptr_t)dst & 31)) { ++ ma = *mask++; ++ if (ma == 0xffffffff) { ++ if (srca == 0xff) ++ *dst = src; ++ else ++ *dst = over(src, *dst); ++ } else if (ma) { ++ nd = *dst; ++ ns = src; ++ ++ UN8x4_MUL_UN8x4(ns, ma); ++ UN8x4_MUL_UN8(ma, srca); ++ ma = ~ma; ++ UN8x4_MUL_UN8x4_ADD_UN8x4(nd, ma, ns); ++ ++ *dst = nd; ++ } ++ dst++; ++ w--; ++ } ++ ++ while(w >= 8) { ++ m = __lasx_xvld(mask, 0); ++ mask += 8; ++ w -= 8; ++ ++ if (__lasx_xbnz_v(m)) { ++ d = __lasx_xvld(dst, 0); ++ d0 = __lasx_vext2xv_hu_bu(d); ++ m0 = __lasx_vext2xv_hu_bu(m); ++ d1 = __lasx_xvpermi_q(d, d, 0x03); ++ m1 = __lasx_xvpermi_q(m, m, 0x03); ++ d1 = __lasx_vext2xv_hu_bu(d1); ++ m1 = __lasx_vext2xv_hu_bu(m1); ++ ++ t0 = lasx_pix_multiply(s, m0); ++ t1 = lasx_pix_multiply(s, m1); ++ ++ m0 = lasx_pix_multiply(m0, sa); ++ m1 = lasx_pix_multiply(m1, sa); ++ m0 = __lasx_xvxor_v(m0, bit_set); ++ m1 = __lasx_xvxor_v(m1, bit_set); ++ d0 = lasx_pix_multiply(d0, m0); ++ d1 = lasx_pix_multiply(d1, m1); ++ ++ d = __lasx_xvpickev_b(d1, d0); ++ t = __lasx_xvpickev_b(t1, t0); ++ d = __lasx_xvpermi_d(d, 0xd8); ++ t = __lasx_xvpermi_d(t, 0xd8); ++ d = __lasx_xvsadd_bu(d, t); ++ __lasx_xvst(d, dst, 0); ++ } ++ dst += 8; ++ } ++ ++ while(w--) { ++ ma = *mask++; ++ if (ma == 0xffffffff) { ++ if (srca == 0xff) ++ *dst = src; ++ else ++ *dst = over(src, *dst); ++ } else if (ma) { ++ nd = *dst; ++ ns = src; ++ ++ UN8x4_MUL_UN8x4(ns, ma); ++ UN8x4_MUL_UN8(ma, srca); ++ ma = ~ma; ++ UN8x4_MUL_UN8x4_ADD_UN8x4(nd, ma, ns); ++ ++ *dst = nd; ++ } ++ dst++; ++ } ++ } ++} ++ ++static void ++lasx_composite_over_reverse_n_8888 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t src; ++ uint32_t *dst_line, *dst; ++ int dst_stride; ++ int32_t w; ++ ++ __m256i d, t; ++ __m256i s, d0, d1; ++ __m256i tmp0, tmp1; ++ __m256i zero = __lasx_xvldi(0); ++ __m256i bit_set = __lasx_xvreplgr2vr_h(0xff); ++ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); ++ if (src == 0) ++ return; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ s = __lasx_xvreplgr2vr_w(src); ++ tmp0 = __lasx_xvilvl_b(zero, s); ++ tmp1 = __lasx_xvilvh_b(zero, s); ++ s = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ ++ while (height--) ++ { ++ dst = dst_line; ++ dst_line += dst_stride; ++ w = width; ++ ++ while (w && ((uintptr_t)dst & 31)) { ++ d = __lasx_xvldrepl_w(dst, 0); ++ d0 = __lasx_vext2xv_hu_bu(d); ++ d0 = __lasx_xvshuf4i_h(d0, 0xff); ++ d0 = __lasx_xvxor_v(d0, bit_set); ++ d0 = lasx_pix_multiply(d0, s); ++ t = __lasx_xvpickev_b(zero, d0); ++ t = __lasx_xvpermi_d(t, 0xd8); ++ d = __lasx_xvsadd_bu(d, t); ++ __lasx_xvstelm_w(d, dst, 0, 0); ++ dst += 1; ++ w--; ++ } ++ ++ while (w >= 8) { ++ d = __lasx_xvld(dst, 0); ++ w -= 8; ++ ++ d0 = __lasx_vext2xv_hu_bu(d); ++ d1 = __lasx_xvpermi_q(d, d, 0x03); ++ d1 = __lasx_vext2xv_hu_bu(d1); ++ d0 = __lasx_xvshuf4i_h(d0, 0xff); ++ d1 = __lasx_xvshuf4i_h(d1, 0xff); ++ d0 = __lasx_xvxor_v(d0, bit_set); ++ d1 = __lasx_xvxor_v(d1, bit_set); ++ d0 = lasx_pix_multiply(d0, s); ++ d1 = lasx_pix_multiply(d1, s); ++ t = __lasx_xvpickev_b(d1, d0); ++ t = __lasx_xvpermi_d(t, 0xd8); ++ d = __lasx_xvsadd_bu(d, t); ++ __lasx_xvst(d, dst, 0); ++ dst += 8; ++ } ++ ++ while (w--) { ++ d = __lasx_xvldrepl_w(dst, 0); ++ d0 = __lasx_vext2xv_hu_bu(d); ++ d0 = __lasx_xvshuf4i_h(d0, 0xff); ++ d0 = __lasx_xvxor_v(d0, bit_set); ++ d0 = lasx_pix_multiply(d0, s); ++ t = __lasx_xvpickev_b(zero, d0); ++ t = __lasx_xvpermi_d(t, 0xd8); ++ d = __lasx_xvsadd_bu(d, t); ++ __lasx_xvstelm_w(d, dst, 0, 0); ++ dst += 1; ++ } ++ } ++} ++ ++static void ++lasx_composite_src_x888_8888 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t *dst_line, *dst; ++ uint32_t *src_line, *src; ++ int32_t w; ++ int dst_stride, src_stride; ++ __m256i mask = mask_ff000000; ++ __m256i vsrc0, vsrc1, vsrc2, vsrc3, vsrc4, vsrc5, vsrc6, vsrc7; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ src = src_line; ++ src_line += src_stride; ++ w = width; ++ ++ while (w && ((uintptr_t)dst & 31)) { ++ *dst++ = *src++ | 0xff000000; ++ w--; ++ } ++ ++ while (w >= 64) { ++ vsrc0 = __lasx_xvld(src, 0); ++ vsrc1 = __lasx_xvld(src, 32); ++ vsrc2 = __lasx_xvld(src, 64); ++ vsrc3 = __lasx_xvld(src, 96); ++ vsrc4 = __lasx_xvld(src, 128); ++ vsrc5 = __lasx_xvld(src, 160); ++ vsrc6 = __lasx_xvld(src, 192); ++ vsrc7 = __lasx_xvld(src, 224); ++ vsrc0 = __lasx_xvor_v(vsrc0, mask); ++ vsrc1 = __lasx_xvor_v(vsrc1, mask); ++ vsrc2 = __lasx_xvor_v(vsrc2, mask); ++ vsrc3 = __lasx_xvor_v(vsrc3, mask); ++ vsrc4 = __lasx_xvor_v(vsrc4, mask); ++ vsrc5 = __lasx_xvor_v(vsrc5, mask); ++ vsrc6 = __lasx_xvor_v(vsrc6, mask); ++ vsrc7 = __lasx_xvor_v(vsrc7, mask); ++ __lasx_xvst(vsrc0, dst, 0); ++ __lasx_xvst(vsrc1, dst, 32); ++ __lasx_xvst(vsrc2, dst, 64); ++ __lasx_xvst(vsrc3, dst, 96); ++ __lasx_xvst(vsrc4, dst, 128); ++ __lasx_xvst(vsrc5, dst, 160); ++ __lasx_xvst(vsrc6, dst, 192); ++ __lasx_xvst(vsrc7, dst, 224); ++ ++ src += 64; ++ w -= 64; ++ dst += 64; ++ } ++ ++ if (w >= 32) { ++ vsrc0 = __lasx_xvld(src, 0); ++ vsrc1 = __lasx_xvld(src, 32); ++ vsrc2 = __lasx_xvld(src, 64); ++ vsrc3 = __lasx_xvld(src, 96); ++ vsrc0 = __lasx_xvor_v(vsrc0, mask); ++ vsrc1 = __lasx_xvor_v(vsrc1, mask); ++ vsrc2 = __lasx_xvor_v(vsrc2, mask); ++ vsrc3 = __lasx_xvor_v(vsrc3, mask); ++ __lasx_xvst(vsrc0, dst, 0); ++ __lasx_xvst(vsrc1, dst, 32); ++ __lasx_xvst(vsrc2, dst, 64); ++ __lasx_xvst(vsrc3, dst, 96); ++ ++ src += 32; ++ w -= 32; ++ dst += 32; ++ } ++ ++ if (w >= 16) { ++ vsrc0 = __lasx_xvld(src, 0); ++ vsrc1 = __lasx_xvld(src, 32); ++ vsrc0 = __lasx_xvor_v(vsrc0, mask); ++ vsrc1 = __lasx_xvor_v(vsrc1, mask); ++ __lasx_xvst(vsrc0, dst, 0); ++ __lasx_xvst(vsrc1, dst, 32); ++ ++ src += 16; ++ w -= 16; ++ dst += 16; ++ } ++ ++ if (w >= 8) { ++ vsrc0 = __lasx_xvld(src, 0); ++ vsrc0 = __lasx_xvor_v(vsrc0, mask); ++ __lasx_xvst(vsrc0, dst, 0); ++ ++ src += 8; ++ w -= 8; ++ dst += 8; ++ } ++ ++ while (w--) { ++ *dst++ = *src++ | 0xff000000; ++ } ++ } ++} ++ ++static void ++lasx_composite_add_n_8_8 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint8_t *dst_line, *dst; ++ uint8_t *mask_line, *mask; ++ int dst_stride, mask_stride; ++ int32_t w; ++ uint32_t src; ++ uint16_t sa; ++ ++ __m256i d0; ++ __m256i vsrc, t0, t1; ++ __m256i a0, a0_l, a0_h; ++ __m256i b0, b0_l, b0_h; ++ __m256i zero = __lasx_xvldi(0); ++ __m256i one_half = __lasx_xvreplgr2vr_h(0x80); ++ __m256i g_shift = __lasx_xvreplgr2vr_h(8); ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); ++ ++ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); ++ ++ sa = (src >> 24); ++ vsrc = __lasx_xvreplgr2vr_h(sa); ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ mask = mask_line; ++ mask_line += mask_stride; ++ w = width; ++ ++ while (w >= 32) { ++ a0 = __lasx_xvld(mask, 0); ++ w -= 32; ++ mask += 32; ++ ++ a0_l = __lasx_vext2xv_hu_bu(a0); ++ a0_h = __lasx_xvpermi_q(a0, a0, 0x03); ++ a0_h = __lasx_vext2xv_hu_bu(a0_h); ++ ++ a0_l = __lasx_xvmadd_h(one_half, a0_l, vsrc); ++ a0_h = __lasx_xvmadd_h(one_half, a0_h, vsrc); ++ ++ a0_l = __lasx_xvsadd_hu(__lasx_xvsrl_h(a0_l, g_shift), a0_l); ++ a0_h = __lasx_xvsadd_hu(__lasx_xvsrl_h(a0_h, g_shift), a0_h); ++ ++ a0_l = __lasx_xvsrl_h(a0_l, g_shift); ++ a0_h = __lasx_xvsrl_h(a0_h, g_shift); ++ ++ b0 = __lasx_xvld(dst, 0); ++ b0_l = __lasx_vext2xv_hu_bu(b0); ++ b0_h = __lasx_xvpermi_q(b0, b0, 0x03); ++ b0_h = __lasx_vext2xv_hu_bu(b0_h); ++ ++ t0 = __lasx_xvadd_h(a0_l, b0_l); ++ t1 = __lasx_xvadd_h(a0_h, b0_h); ++ ++ t0 = __lasx_xvor_v(t0, __lasx_xvsub_h(zero, __lasx_xvsrl_h(t0, g_shift))); ++ t1 = __lasx_xvor_v(t1, __lasx_xvsub_h(zero, __lasx_xvsrl_h(t1, g_shift))); ++ ++ t0 = __lasx_xvsat_hu(t0, 7); ++ t1 = __lasx_xvsat_hu(t1 ,7); ++ ++ d0 = __lasx_xvpickev_b(t1, t0); ++ d0 = __lasx_xvpermi_d(d0, 0xd8); ++ __lasx_xvst(d0, dst, 0); ++ dst += 32; ++ } ++ ++ while (w >= 16) { ++ a0 = __lasx_xvld(mask, 0); ++ w -= 16; ++ mask += 16; ++ ++ a0_l = __lasx_vext2xv_hu_bu(a0); ++ a0_h = __lasx_xvpermi_q(a0, a0, 0x03); ++ a0_h = __lasx_vext2xv_hu_bu(a0_h); ++ ++ a0_l = __lasx_xvmadd_h(one_half, a0_l, vsrc); ++ a0_h = __lasx_xvmadd_h(one_half, a0_h, vsrc); ++ ++ a0_l = __lasx_xvsadd_hu(__lasx_xvsrl_h(a0_l, g_shift), a0_l); ++ a0_h = __lasx_xvsadd_hu(__lasx_xvsrl_h(a0_h, g_shift), a0_h); ++ ++ a0_l = __lasx_xvsrl_h(a0_l, g_shift); ++ a0_h = __lasx_xvsrl_h(a0_h, g_shift); ++ ++ b0 = __lasx_xvld(dst, 0); ++ b0_l = __lasx_vext2xv_hu_bu(b0); ++ a0_h = __lasx_xvpermi_q(b0, b0, 0x03); ++ b0_h = __lasx_vext2xv_hu_bu(b0_h); ++ ++ t0 = __lasx_xvadd_h(a0_l, b0_l); ++ t1 = __lasx_xvadd_h(a0_h, b0_h); ++ ++ t0 = __lasx_xvor_v(t0, __lasx_xvsub_h(zero, __lasx_xvsrl_h(t0, g_shift))); ++ t1 = __lasx_xvor_v(t1, __lasx_xvsub_h(zero, __lasx_xvsrl_h(t1, g_shift))); ++ ++ t0 = __lasx_xvsat_hu(t0, 7); ++ t1 = __lasx_xvsat_hu(t1 ,7); ++ ++ d0 = __lasx_xvpickev_b(t1, t0); ++ __lasx_xvstelm_d(d0, dst, 0, 0); ++ __lasx_xvstelm_d(d0, dst, 8, 2); ++ dst += 16; ++ } ++ ++ while (w >= 8) { ++ a0 = __lasx_xvld(mask, 0); ++ w -= 8; ++ mask += 8; ++ ++ a0_l = __lasx_vext2xv_hu_bu(a0); ++ a0_h = __lasx_xvpermi_q(a0, a0, 0x03); ++ a0_h = __lasx_vext2xv_hu_bu(a0_h); ++ ++ a0_l = __lasx_xvmadd_h(one_half, a0_l, vsrc); ++ a0_h = __lasx_xvmadd_h(one_half, a0_h, vsrc); ++ ++ a0_l = __lasx_xvsadd_hu(__lasx_xvsrl_h(a0_l, g_shift), a0_l); ++ a0_h = __lasx_xvsadd_hu(__lasx_xvsrl_h(a0_h, g_shift), a0_h); ++ ++ a0_l = __lasx_xvsrl_h(a0_l, g_shift); ++ a0_h = __lasx_xvsrl_h(a0_h, g_shift); ++ ++ b0 = __lasx_xvld(dst, 0); ++ b0_l = __lasx_vext2xv_hu_bu(b0); ++ a0_h = __lasx_xvpermi_q(b0, b0, 0x03); ++ b0_h = __lasx_vext2xv_hu_bu(b0_h); ++ ++ t0 = __lasx_xvadd_h(a0_l, b0_l); ++ t1 = __lasx_xvadd_h(a0_h, b0_h); ++ ++ t0 = __lasx_xvor_v(t0, __lasx_xvsub_h(zero, __lasx_xvsrl_h(t0, g_shift))); ++ t1 = __lasx_xvor_v(t1, __lasx_xvsub_h(zero, __lasx_xvsrl_h(t1, g_shift))); ++ ++ t0 = __lasx_xvsat_hu(t0, 7); ++ t1 = __lasx_xvsat_hu(t1 ,7); ++ ++ d0 = __lasx_xvpickev_b(t1, t0); ++ __lasx_xvstelm_d(d0, dst, 0, 0); ++ dst += 8; ++ } ++ ++ while (w >= 4) { ++ a0 = __lasx_xvld(mask, 0); ++ w -= 4; ++ mask += 4; ++ ++ a0_l = __lasx_vext2xv_hu_bu(a0); ++ a0_h = __lasx_xvpermi_q(a0, a0, 0x03); ++ a0_h = __lasx_vext2xv_hu_bu(a0_h); ++ ++ a0_l = __lasx_xvmadd_h(one_half, a0_l, vsrc); ++ a0_h = __lasx_xvmadd_h(one_half, a0_h, vsrc); ++ ++ a0_l = __lasx_xvsadd_hu(__lasx_xvsrl_h(a0_l, g_shift), a0_l); ++ a0_h = __lasx_xvsadd_hu(__lasx_xvsrl_h(a0_h, g_shift), a0_h); ++ ++ a0_l = __lasx_xvsrl_h(a0_l, g_shift); ++ a0_h = __lasx_xvsrl_h(a0_h, g_shift); ++ ++ b0 = __lasx_xvld(dst, 0); ++ b0_l = __lasx_vext2xv_hu_bu(b0); ++ a0_h = __lasx_xvpermi_q(b0, b0, 0x03); ++ b0_h = __lasx_vext2xv_hu_bu(b0_h); ++ ++ t0 = __lasx_xvadd_h(a0_l, b0_l); ++ t1 = __lasx_xvadd_h(a0_h, b0_h); ++ ++ t0 = __lasx_xvor_v(t0, __lasx_xvsub_h(zero, __lasx_xvsrl_h(t0, g_shift))); ++ t1 = __lasx_xvor_v(t1, __lasx_xvsub_h(zero, __lasx_xvsrl_h(t1, g_shift))); ++ ++ t0 = __lasx_xvsat_hu(t0, 7); ++ t1 = __lasx_xvsat_hu(t1 ,7); ++ ++ d0 = __lasx_xvpickev_b(t1, t0); ++ __lasx_xvstelm_w(d0, dst, 0, 0); ++ dst += 4; ++ } ++ ++ while (w--) { ++ uint16_t tmp; ++ uint16_t a; ++ uint32_t m, d; ++ uint32_t r; ++ ++ a = *mask++; ++ d = *dst; ++ ++ m = MUL_UN8 (sa, a, tmp); ++ r = ADD_UN8 (m, d, tmp); ++ ++ *dst++ = r; ++ } ++ } ++} ++ ++static void ++lasx_composite_add_n_8 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint8_t *dst_line, *dst; ++ int dst_stride; ++ int32_t w; ++ uint32_t src; ++ ++ __m256i vsrc, d0, d1; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); ++ ++ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); ++ src >>= 24; ++ ++ if (src == 0x00) ++ return; ++ ++ if (src == 0xff) { ++ pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, ++ 8, dest_x, dest_y, width, height, 0xff); ++ return; ++ } ++ ++ vsrc = __lasx_xvreplgr2vr_b(src); ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ w = width; ++ ++ while (w && ((uintptr_t)dst & 31)) { ++ d0 = __lasx_xvldrepl_b(dst, 0); ++ d0 = __lasx_xvsadd_bu(vsrc, d0); ++ __lasx_xvstelm_b(d0, dst, 0, 0); ++ dst++; ++ w--; ++ } ++ ++ while (w >= 64) { ++ d0 = __lasx_xvld(dst, 0); ++ d1 = __lasx_xvld(dst, 32); ++ w -= 64; ++ d0 = __lasx_xvsadd_bu(vsrc, d0); ++ d1 = __lasx_xvsadd_bu(vsrc, d1); ++ __lasx_xvst(d0, dst, 0); ++ __lasx_xvst(d1, dst, 32); ++ dst += 64; ++ } ++ ++ if (w >= 32) { ++ d0 = __lasx_xvld(dst, 0); ++ w -= 32; ++ d0 = __lasx_xvsadd_bu(vsrc, d0); ++ __lasx_xvst(d0, dst, 0); ++ dst += 32; ++ } ++ ++ if (w >= 8) { ++ d0 = __lasx_xvldrepl_d(dst, 0); ++ w -= 8; ++ d0 = __lasx_xvsadd_bu(vsrc, d0); ++ __lasx_xvstelm_d(d0, dst, 0, 0); ++ dst += 8; ++ } ++ ++ if (w >= 4) { ++ d0 = __lasx_xvldrepl_w(dst, 0); ++ w -= 4; ++ d0 = __lasx_xvsadd_bu(vsrc, d0); ++ __lasx_xvstelm_w(d0, dst, 0, 0); ++ dst += 4; ++ } ++ ++ while (w--) { ++ d0 = __lasx_xvldrepl_b(dst, 0); ++ d0 = __lasx_xvsadd_bu(vsrc, d0); ++ __lasx_xvstelm_b(d0, dst, 0, 0); ++ dst++; ++ } ++ } ++} ++ ++static void ++lasx_composite_add_n_8888 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t *dst_line, *dst, src; ++ int dst_stride, w; ++ ++ __m256i vsrc, d0, d1; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ ++ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); ++ if (src == 0) ++ return; ++ ++ if (src == ~0) { ++ pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32, ++ dest_x, dest_y, width, height, ~0); ++ return; ++ } ++ ++ vsrc = __lasx_xvreplgr2vr_w(src); ++ ++ while (height--) { ++ w = width; ++ ++ dst = dst_line; ++ dst_line += dst_stride; ++ ++ while (w && (uintptr_t)dst & 31) { ++ d0 = __lasx_xvldrepl_w(dst, 0); ++ d0 = __lasx_xvsadd_bu(vsrc, d0); ++ __lasx_xvstelm_w(d0, dst, 0, 0); ++ dst++; ++ w--; ++ } ++ ++ while (w >= 16) { ++ d0 = __lasx_xvld(dst, 0); ++ d1 = __lasx_xvld(dst, 32); ++ w -= 16; ++ d0 = __lasx_xvsadd_bu(vsrc, d0); ++ d1 = __lasx_xvsadd_bu(vsrc, d1); ++ __lasx_xvst(d0, dst, 0); ++ __lasx_xvst(d1, dst, 32); ++ dst += 16; ++ } ++ ++ if (w >= 8) { ++ d0 = __lasx_xvld(dst, 0); ++ w -= 8; ++ d0 = __lasx_xvsadd_bu(vsrc, d0); ++ __lasx_xvst(d0, dst, 0); ++ dst += 8; ++ } ++ ++ if (w >= 4) { ++ d0 = __lasx_xvld(dst, 0); ++ w -= 4; ++ d0 = __lasx_xvsadd_bu(vsrc, d0); ++ __lasx_xvstelm_d(d0, dst, 0, 0); ++ __lasx_xvstelm_d(d0, dst, 8, 1); ++ dst += 4; ++ } ++ ++ while (w--) { ++ d0 = __lasx_xvldrepl_w(dst, 0); ++ d0 = __lasx_xvsadd_bu(vsrc, d0); ++ __lasx_xvstelm_w(d0, dst, 0, 0); ++ dst++; ++ } ++ } ++} ++ ++static force_inline __m256i ++unpack_32_1x256(uint32_t data) ++{ ++ __m256i zero = __lasx_xvldi(0); ++ __m256i tmp = __lasx_xvinsgr2vr_w(zero, data, 0); ++ return __lasx_xvilvl_b(zero, tmp); ++} ++ ++static force_inline __m256i ++unpack_32_2x256(uint32_t data) ++{ ++ __m256i tmp0, out0; ++ __m256i zero = __lasx_xvldi(0); ++ tmp0 = __lasx_xvinsgr2vr_w(tmp0, data, 0); ++ tmp0 = __lasx_xvpermi_q(tmp0, tmp0, 0x20); ++ out0 = __lasx_xvilvl_b(zero, tmp0); ++ ++ return out0; ++} ++ ++static force_inline __m256i ++expand_pixel_32_1x256(uint32_t data) ++{ ++ return __lasx_xvshuf4i_w(unpack_32_1x256(data), 0x44); ++} ++ ++static force_inline __m256i ++expand_pixel_32_2x256(uint32_t data) ++{ ++ return __lasx_xvshuf4i_w(unpack_32_2x256(data), 0x44); ++} ++ ++static force_inline __m256i ++expand_alpha_1x256(__m256i data) ++{ ++ return __lasx_xvshuf4i_h(data, 0xff); ++} ++ ++static force_inline __m256i ++expand_alphaa_2x256(__m256i data) ++{ ++ __m256i tmp0; ++ tmp0 = __lasx_xvshuf4i_h(data, 0xff); ++ tmp0 = __lasx_xvpermi_q(tmp0, tmp0, 0x20); ++ ++ return tmp0; ++} ++ ++static force_inline __m256i ++unpack_565_to_8888(__m256i lo) ++{ ++ __m256i r, g, b, rb, t; ++ __m256i mask_green_4x32 = __lasx_xvreplgr2vr_w(0x0000fc00); ++ __m256i mask_red_4x32 = __lasx_xvreplgr2vr_w(0x00f80000); ++ __m256i mask_blue_4x32 = __lasx_xvreplgr2vr_w(0x000000f8); ++ __m256i mask_565_fix_rb = __lasx_xvreplgr2vr_w(0x00e000e0); ++ __m256i mask_565_fix_g = __lasx_xvreplgr2vr_w(0x0000c000); ++ ++ r = __lasx_xvslli_w(lo, 8); ++ r = __lasx_xvand_v(r, mask_red_4x32); ++ g = __lasx_xvslli_w(lo, 5); ++ g = __lasx_xvand_v(g, mask_green_4x32); ++ b = __lasx_xvslli_w(lo, 3); ++ b = __lasx_xvand_v(b, mask_blue_4x32); ++ ++ rb = __lasx_xvor_v(r, b); ++ t = __lasx_xvand_v(rb, mask_565_fix_rb); ++ t = __lasx_xvsrli_w(t, 5); ++ rb = __lasx_xvor_v(rb, t); ++ ++ t = __lasx_xvand_v(g, mask_565_fix_g); ++ t = __lasx_xvsrli_w(t, 6); ++ g = __lasx_xvor_v(g, t); ++ ++ return (__lasx_xvor_v(rb, g)); ++} ++ ++static force_inline void ++unpack_256_2x256(__m256i data, __m256i *data_lo, __m256i *data_hi) ++{ ++ __m256i mask_zero = __lasx_xvldi(0); ++ *data_lo = __lasx_xvilvl_b(mask_zero, data); ++ *data_hi = __lasx_xvilvh_b(mask_zero, data); ++} ++ ++static force_inline void ++unpack_565_256_4x256(__m256i data, __m256i *data0, ++ __m256i *data1, __m256i *data2, __m256i *data3) ++{ ++ __m256i lo, hi; ++ __m256i zero = __lasx_xvldi(0); ++ lo = __lasx_xvilvl_h(zero, data); ++ hi = __lasx_xvilvh_h(zero, data); ++ lo = unpack_565_to_8888(lo); ++ hi = unpack_565_to_8888(hi); ++ ++ unpack_256_2x256((__m256i)lo, (__m256i*)data0, (__m256i*)data1); ++ unpack_256_2x256((__m256i)hi, (__m256i*)data2, (__m256i*)data3); ++} ++ ++static force_inline void ++negate_2x256(__m256i data_lo, __m256i data_hi, __m256i *neg_lo, __m256i *neg_hi) ++{ ++ *neg_lo = __lasx_xvxor_v(data_lo, mask_00ff); ++ *neg_hi = __lasx_xvxor_v(data_hi, mask_00ff); ++} ++ ++static force_inline void ++over_2x256(__m256i *src_lo, __m256i *src_hi, __m256i *alpha_lo, ++ __m256i *alpha_hi, __m256i *dst_lo, __m256i *dst_hi) ++{ ++ __m256i t1, t2; ++ negate_2x256(*alpha_lo, *alpha_hi, &t1, &t2); ++ *dst_lo = lasx_pix_multiply(*dst_lo, t1); ++ *dst_hi = lasx_pix_multiply(*dst_hi, t2); ++ *dst_lo = __lasx_xvsadd_bu(*src_lo, *dst_lo); ++ *dst_hi = __lasx_xvsadd_bu(*src_hi, *dst_hi); ++} ++ ++static force_inline __m256i ++pack_2x256_256(__m256i lo, __m256i hi) ++{ ++ __m256i tmp0 = __lasx_xvsat_bu(lo, 7); ++ __m256i tmp1 = __lasx_xvsat_bu(hi, 7); ++ __m256i tmp2 = __lasx_xvpickev_b(tmp1, tmp0); ++ ++ return tmp2; ++} ++ ++static force_inline __m256i ++pack_565_2x256_256(__m256i lo, __m256i hi) ++{ ++ __m256i data; ++ __m256i r, g1, g2, b; ++ __m256i mask_565_r = __lasx_xvreplgr2vr_w(0x00f80000); ++ __m256i mask_565_g1 = __lasx_xvreplgr2vr_w(0x00070000); ++ __m256i mask_565_g2 = __lasx_xvreplgr2vr_w(0x000000e0); ++ __m256i mask_565_b = __lasx_xvreplgr2vr_w(0x0000001f); ++ ++ data = pack_2x256_256 (lo, hi); ++ r = __lasx_xvand_v(data, mask_565_r); ++ g1 = __lasx_xvslli_w(data, 3) & mask_565_g1; ++ g2 = __lasx_xvsrli_w(data, 5) & mask_565_g2; ++ b = __lasx_xvsrli_w(data, 3) & mask_565_b; ++ ++ return (((r|g1)|g2)|b); ++} ++ ++static force_inline __m256i ++expand565_16_1x256(uint16_t pixel) ++{ ++ __m256i m; ++ __m256i zero = __lasx_xvldi(0); ++ ++ m = __lasx_xvinsgr2vr_w(m, pixel, 0); ++ m = unpack_565_to_8888(m); ++ m = __lasx_xvilvl_b(zero, m); ++ ++ return m; ++} ++ ++static force_inline uint32_t ++pack_1x256_32(__m256i data) ++{ ++ __m256i tmp0, tmp1; ++ __m256i zero = __lasx_xvldi(0); ++ ++ tmp0 = __lasx_xvsat_bu(data, 7); ++ tmp1 = __lasx_xvpickev_b(zero, tmp0); ++ ++ return (__lasx_xvpickve2gr_wu(tmp1, 0)); ++} ++ ++static force_inline uint16_t ++pack_565_32_16(uint32_t pixel) ++{ ++ return (uint16_t)(((pixel >> 8) & 0xf800) | ++ ((pixel >> 5) & 0x07e0) | ++ ((pixel >> 3) & 0x001f)); ++} ++ ++static force_inline __m256i ++pack_565_4x256_256(__m256i *v0, __m256i *v1, __m256i *v2, __m256i *v3) ++{ ++ return pack_2x256_256(pack_565_2x256_256(*v0, *v1), ++ pack_565_2x256_256(*v2, *v3)); ++} ++ ++static force_inline void ++expand_alpha_2x256(__m256i data_lo, __m256i data_hi, __m256i *alpha_lo, __m256i *alpha_hi) ++{ ++ *alpha_lo = __lasx_xvshuf4i_h(data_lo, 0xff); ++ *alpha_hi = __lasx_xvshuf4i_h(data_hi, 0xff); ++} ++ ++static force_inline void ++expand_alpha_rev_2x256(__m256i data_lo, __m256i data_hi, __m256i *alpha_lo, __m256i *alpha_hi) ++{ ++ *alpha_lo = __lasx_xvshuf4i_h(data_lo, 0x00); ++ *alpha_hi = __lasx_xvshuf4i_h(data_hi, 0x00); ++} ++ ++static force_inline uint16_t ++composite_over_8888_0565pixel(uint32_t src, uint16_t dst) ++{ ++ __m256i ms; ++ ms = unpack_32_1x256(src); ++ ++ return pack_565_32_16(pack_1x256_32((__m256i)over_1x256((__m256i)ms, ++ (__m256i)expand_alpha_1x256((__m256i)ms), expand565_16_1x256(dst)))); ++} ++ ++static force_inline void ++in_over_2x256(__m256i *src_lo, __m256i *src_hi, __m256i *alpha_lo, __m256i *alpha_hi, ++ __m256i *mask_lo, __m256i *mask_hi, __m256i *dst_lo, __m256i *dst_hi) ++{ ++ __m256i s_lo, s_hi; ++ __m256i a_lo, a_hi; ++ s_lo = lasx_pix_multiply(*src_lo, *mask_lo); ++ s_hi = lasx_pix_multiply(*src_hi, *mask_hi); ++ a_lo = lasx_pix_multiply(*alpha_lo, *mask_lo); ++ a_hi = lasx_pix_multiply(*alpha_hi, *mask_hi); ++ over_2x256(&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi); ++} ++ ++static force_inline __m256i ++in_over_1x256(__m256i *src, __m256i *alpha, __m256i *mask, __m256i *dst) ++{ ++ return over_1x256(lasx_pix_multiply(*src, *mask), ++ lasx_pix_multiply(*alpha, *mask), *dst); ++} ++ ++static force_inline __m256i ++expand_alpha_rev_1x256(__m256i data) ++{ ++ __m256i v0 = {0x00000000, 0x00000000, 0xffffffff, 0xffffffff}; ++ __m256i v_hi = __lasx_xvand_v(data, v0); ++ data = __lasx_xvshuf4i_h(data, 0x00); ++ v0 = __lasx_xvnor_v(v0, v0); ++ data = __lasx_xvand_v(data, v0); ++ data = __lasx_xvor_v(data, v_hi); ++ ++ return data; ++} ++ ++static void ++lasx_composite_over_n_0565 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t src; ++ uint16_t *dst_line, *dst, d; ++ int32_t w; ++ int dst_stride; ++ __m256i vsrc, valpha; ++ __m256i vdst, vdst0, vdst1, vdst2, vdst3; ++ ++ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); ++ ++ if (src == 0) ++ return; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); ++ ++ vsrc = expand_pixel_32_1x256(src); ++ valpha = expand_alpha_1x256(vsrc); ++ ++ while (height--) { ++ dst = dst_line; ++ ++ dst_line += dst_stride; ++ w = width; ++ ++ while (w >= 16) { ++ vdst = __lasx_xvld(dst, 0); ++ w -= 16; ++ vsrc = __lasx_xvpermi_q(vsrc, vsrc, 0x20); ++ valpha = __lasx_xvpermi_q(valpha, valpha, 0x20); ++ ++ unpack_565_256_4x256(vdst, &vdst0, &vdst1, &vdst2, &vdst3); ++ ++ over_2x256(&vsrc, &vsrc, &valpha, &valpha, &vdst0, &vdst1); ++ over_2x256(&vsrc, &vsrc, &valpha, &valpha, &vdst2, &vdst3); ++ ++ vdst = pack_565_4x256_256(&vdst0, &vdst1, &vdst2, &vdst3); ++ __lasx_xvst(vdst, dst, 0); ++ dst += 16; ++ } ++ ++ if (w >= 8) { ++ vdst = __lasx_xvld(dst, 0); ++ w -= 8; ++ vsrc = __lasx_xvpermi_q(vsrc, vsrc, 0x20); ++ valpha = __lasx_xvpermi_q(valpha, valpha, 0x20); ++ ++ unpack_565_256_4x256(vdst, &vdst0, &vdst1, &vdst2, &vdst3); ++ ++ over_2x256(&vsrc, &vsrc, &valpha, &valpha, &vdst0, &vdst1); ++ over_2x256(&vsrc, &vsrc, &valpha, &valpha, &vdst2, &vdst3); ++ ++ vdst = pack_565_4x256_256(&vdst0, &vdst1, &vdst2, &vdst3); ++ __lasx_xvstelm_d(vdst, dst, 0, 0); ++ __lasx_xvstelm_d(vdst, dst, 8, 1); ++ dst += 8; ++ } ++ ++ while (w--) { ++ d = *dst; ++ *dst++ = pack_565_32_16(pack_1x256_32( ++ (over_1x256(vsrc,valpha, expand565_16_1x256(d))))); ++ } ++ } ++} ++ ++static void ++lasx_composite_over_8888_0565 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint16_t *dst_line, *dst, d; ++ uint32_t *src_line, *src, s; ++ int dst_stride, src_stride; ++ int32_t w; ++ ++ __m256i valpha_lo, valpha_hi; ++ __m256i vsrc, vsrc_lo, vsrc_hi; ++ __m256i vdst, vdst0, vdst1, vdst2, vdst3; ++ __m256i dst0, dst1, dst2, dst3; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ ++ while (height--) { ++ dst = dst_line; ++ src = src_line; ++ ++ dst_line += dst_stride; ++ src_line += src_stride; ++ w = width; ++ ++ while (w >= 16) { ++ vsrc = __lasx_xvld(src, 0); ++ vdst = __lasx_xvld(dst, 0); ++ w -= 16; ++ ++ unpack_565_256_4x256(vdst, &vdst0, &vdst1, &vdst2, &vdst3); ++ dst0 = __lasx_xvpermi_q(vdst2, vdst0, 0x20); ++ dst1 = __lasx_xvpermi_q(vdst3, vdst1, 0x20); ++ dst2 = __lasx_xvpermi_q(vdst2, vdst0, 0x31); ++ dst3 = __lasx_xvpermi_q(vdst3, vdst1, 0x31); ++ ++ unpack_256_2x256((__m256i)vsrc, (__m256i*)&vsrc_lo, (__m256i*)&vsrc_hi); ++ expand_alpha_2x256(vsrc_lo, vsrc_hi, &valpha_lo, &valpha_hi); ++ over_2x256(&vsrc_lo, &vsrc_hi, &valpha_lo, &valpha_hi, &dst0, &dst1); ++ ++ vsrc = __lasx_xvld(src, 32); ++ unpack_256_2x256((__m256i)vsrc, (__m256i*)&vsrc_lo, (__m256i*)&vsrc_hi); ++ expand_alpha_2x256(vsrc_lo, vsrc_hi, &valpha_lo, &valpha_hi); ++ over_2x256(&vsrc_lo, &vsrc_hi, &valpha_lo, &valpha_hi, &dst2, &dst3); ++ ++ vdst0 = __lasx_xvpermi_q(dst2, dst0, 0x20); ++ vdst1 = __lasx_xvpermi_q(dst3, dst1, 0x20); ++ vdst2 = __lasx_xvpermi_q(dst2, dst0, 0x31); ++ vdst3 = __lasx_xvpermi_q(dst3, dst1, 0x31); ++ ++ __lasx_xvst(pack_565_4x256_256(&vdst0, &vdst1, &vdst2, &vdst3), dst, 0); ++ ++ dst += 16; ++ src += 16; ++ } ++ ++ while (w--) { ++ s = *src++; ++ d = *dst; ++ *dst++ = composite_over_8888_0565pixel(s, d); ++ } ++ } ++} ++ ++static void ++lasx_composite_over_n_8_0565 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t src; ++ uint16_t *dst_line, *dst, d; ++ uint8_t *mask_line, *p; ++ uint32_t *mask; ++ int dst_stride, mask_stride; ++ int32_t w; ++ uint32_t m; ++ ++ __m256i mask_zero = __lasx_xvldi(0); ++ __m256i lasx_src, lasx_alpha, lasx_mask, lasx_dest; ++ __m256i vsrc, valpha; ++ __m256i vmask, vmaska, vmask_lo, vmask_hi; ++ __m256i vdst, vdst0, vdst1, vdst2, vdst3; ++ __m256i dst0, dst1, dst2, dst3; ++ ++ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); ++ ++ if (src == 0) ++ return; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); ++ ++ lasx_src = expand_pixel_32_1x256(src); ++ lasx_alpha = expand_alpha_1x256(lasx_src); ++ ++ vsrc = expand_pixel_32_2x256(src); ++ valpha = expand_alphaa_2x256(vsrc); ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ mask = (void*)mask_line; ++ mask_line += mask_stride; ++ w = width; ++ ++ while (w >= 16) { ++ vdst = __lasx_xvld(dst, 0); ++ w -= 16; ++ unpack_565_256_4x256(vdst, &vdst0, &vdst1, &vdst2, &vdst3); ++ dst0 = __lasx_xvpermi_q(vdst2, vdst0, 0x20); ++ dst1 = __lasx_xvpermi_q(vdst3, vdst1, 0x20); ++ dst2 = __lasx_xvpermi_q(vdst2, vdst0, 0x31); ++ dst3 = __lasx_xvpermi_q(vdst3, vdst1, 0x31); ++ ++ m = *mask; ++ vmaska = unpack_32_1x256(m); ++ mask += 1; ++ m = *mask; ++ vmask = unpack_32_1x256(m); ++ vmask = __lasx_xvpermi_q(vmask, vmaska, 0x20); ++ mask += 1; ++ vmask = __lasx_xvilvl_b(mask_zero, vmask); ++ ++ unpack_256_2x256(vmask, (__m256i*)&vmask_lo, (__m256i*)&vmask_hi); ++ expand_alpha_rev_2x256(vmask_lo, vmask_hi, &vmask_lo, &vmask_hi); ++ in_over_2x256(&vsrc, &vsrc, &valpha, &valpha, &vmask_lo, &vmask_hi, ++ &dst0, &dst1); ++ ++ m = *mask; ++ vmaska = unpack_32_1x256(m); ++ mask += 1; ++ m = *mask; ++ vmask = unpack_32_1x256(m); ++ vmask = __lasx_xvpermi_q(vmask, vmaska, 0x20); ++ mask += 1; ++ vmask = __lasx_xvilvl_b(mask_zero, vmask); ++ ++ unpack_256_2x256(vmask, (__m256i*)&vmask_lo, (__m256i*)&vmask_hi); ++ expand_alpha_rev_2x256(vmask_lo, vmask_hi, &vmask_lo, &vmask_hi); ++ in_over_2x256(&vsrc, &vsrc, &valpha, &valpha, &vmask_lo, &vmask_hi, ++ &dst2, &dst3); ++ ++ vdst0 = __lasx_xvpermi_q(dst2, dst0, 0x20); ++ vdst1 = __lasx_xvpermi_q(dst3, dst1, 0x20); ++ vdst2 = __lasx_xvpermi_q(dst2, dst0, 0x31); ++ vdst3 = __lasx_xvpermi_q(dst3, dst1, 0x31); ++ ++ __lasx_xvst(pack_565_4x256_256(&vdst0, &vdst1, &vdst2, &vdst3), dst, 0); ++ ++ dst += 16; ++ } ++ ++ p = (void*)mask; ++ while (w--) { ++ m = *p++; ++ ++ if (m) { ++ d = *dst; ++ lasx_mask = expand_alpha_rev_1x256(unpack_32_1x256 (m)); ++ lasx_dest = expand565_16_1x256(d); ++ ++ *dst = pack_565_32_16(pack_1x256_32(in_over_1x256 (&lasx_src, ++ &lasx_alpha, &lasx_mask, &lasx_dest))); ++ } ++ dst++; ++ } ++ } ++} ++ ++static void ++lasx_composite_over_x888_8_8888 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t *src, *src_line, s; ++ uint32_t *dst, *dst_line, d; ++ uint8_t *mask_line, *p; ++ uint32_t *mask; ++ uint32_t m, w; ++ int src_stride, mask_stride, dst_stride; ++ ++ __m256i mask_zero = __lasx_xvldi(0); ++ __m256i mask_4x32 = mask_ff000000; ++ __m256i vsrc, vsrc_lo, vsrc_hi; ++ __m256i vdst, vdst_lo, vdst_hi; ++ __m256i vmask, vmask_lo, vmask_hi, vmaska; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ ++ while (height--) { ++ src = src_line; ++ src_line += src_stride; ++ dst = dst_line; ++ dst_line += dst_stride; ++ mask = (void*)mask_line; ++ mask_line += mask_stride; ++ w = width; ++ ++ while (w >= 8) { ++ m = *mask; ++ vsrc = __lasx_xvld(src, 0); ++ src += 8; ++ w -= 8; ++ vsrc |= mask_4x32; ++ ++ if (m == 0xffffffff) { ++ __lasx_xvst(vsrc, dst, 0); ++ } else { ++ vdst = __lasx_xvld(dst, 0); ++ vmask = __lasx_xvilvl_b(mask_zero, unpack_32_1x256(m)); ++ m = *(mask + 1); ++ vmaska = __lasx_xvilvl_b(mask_zero, unpack_32_1x256(m)); ++ vmask = __lasx_xvpermi_q(vmaska, vmask, 0x20); ++ ++ unpack_256_2x256(vsrc, (__m256i*)&vsrc_lo, (__m256i*)&vsrc_hi); ++ unpack_256_2x256(vmask, (__m256i*)&vmask_lo, (__m256i*)&vmask_hi); ++ expand_alpha_rev_2x256(vmask_lo, vmask_hi, &vmask_lo, &vmask_hi); ++ unpack_256_2x256(vdst, (__m256i*)&vdst_lo, (__m256i*)&vdst_hi); ++ ++ in_over_2x256(&vsrc_lo, &vsrc_hi, &mask_00ff, &mask_00ff, ++ &vmask_lo, &vmask_hi, &vdst_lo, &vdst_hi); ++ ++ __lasx_xvst(pack_2x256_256(vdst_lo, vdst_hi), dst, 0); ++ } ++ dst += 8; ++ mask += 2; ++ } ++ ++ p = (void*)mask; ++ while (w--) { ++ m = *p++; ++ ++ if (m) { ++ s = 0xff000000 | *src; ++ ++ if (m == 0xff) { ++ *dst = s; ++ } ++ else { ++ __m256i ma, md, ms; ++ d = *dst; ++ ma = expand_alpha_rev_1x256(unpack_32_1x256(m)); ++ md = unpack_32_1x256(d); ++ ms = unpack_32_1x256(s); ++ *dst = pack_1x256_32(in_over_1x256(&ms, &mask_00ff, &ma, &md)); ++ } ++ } ++ src++; ++ dst++; ++ } ++ } ++} ++ ++static void ++lasx_composite_over_8888_n_8888 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t *dst_line, *dst; ++ uint32_t *src_line, *src; ++ uint32_t mask, maska; ++ int32_t w; ++ int dst_stride, src_stride; ++ ++ __m256i vmask; ++ __m256i vsrc, vsrc_lo, vsrc_hi; ++ __m256i vdst, vdst_lo, vdst_hi; ++ __m256i valpha_lo, valpha_hi; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ ++ mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8); ++ maska = mask >> 24; ++ vmask = __lasx_xvreplgr2vr_h(maska); ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ src = src_line; ++ src_line += src_stride; ++ w = width; ++ ++ while (w >= 8) { ++ vsrc = __lasx_xvld(src, 0); ++ src += 8; ++ w -= 8; ++ ++ if (__lasx_xbnz_v(vsrc)) { ++ vdst = __lasx_xvld(dst, 0); ++ unpack_256_2x256(vsrc, (__m256i*)&vsrc_lo, (__m256i*)&vsrc_hi); ++ unpack_256_2x256(vdst, (__m256i*)&vdst_lo, (__m256i*)&vdst_hi); ++ expand_alpha_2x256(vsrc_lo, vsrc_hi, &valpha_lo, &valpha_hi); ++ ++ in_over_2x256(&vsrc_lo, &vsrc_hi, &valpha_lo, &valpha_hi, ++ &vmask, &vmask, &vdst_lo, &vdst_hi); ++ __lasx_xvst(pack_2x256_256(vdst_lo, vdst_hi), dst, 0); ++ } ++ dst += 8; ++ } ++ ++ if (w >= 4) { ++ vsrc = __lasx_xvld(src, 0); ++ src += 4; ++ w -= 4; ++ ++ vsrc = __lasx_xvpermi_q(vsrc, vsrc, 0x20); ++ if (__lasx_xbnz_v(vsrc)) { ++ vdst = __lasx_xvld(dst, 0); ++ unpack_256_2x256(vsrc, (__m256i*)&vsrc_lo, (__m256i*)&vsrc_hi); ++ unpack_256_2x256(vdst, (__m256i*)&vdst_lo, (__m256i*)&vdst_hi); ++ expand_alpha_2x256(vsrc_lo, vsrc_hi, &valpha_lo, &valpha_hi); ++ in_over_2x256(&vsrc_lo, &vsrc_hi, &valpha_lo, &valpha_hi, ++ &vmask, &vmask, &vdst_lo, &vdst_hi); ++ vdst = pack_2x256_256(vdst_lo, vdst_hi); ++ __lasx_xvstelm_d(vdst, dst, 0, 0); ++ __lasx_xvstelm_d(vdst, dst, 8, 1); ++ } ++ dst += 4; ++ } ++ ++ while (w--) { ++ uint32_t s = *src++; ++ ++ if (s) { ++ uint32_t d = *dst; ++ __m256i ms = unpack_32_1x256(s); ++ __m256i alpha = expand_alpha_1x256(ms); ++ __m256i mask = vmask; ++ __m256i dest = unpack_32_1x256(d); ++ *dst = pack_1x256_32(in_over_1x256(&ms, &alpha, &mask, &dest)); ++ } ++ dst++; ++ } ++ } ++} ++ ++static void ++lasx_composite_over_x888_n_8888 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t *dst_line, *dst; ++ uint32_t *src_line, *src; ++ uint32_t mask, maska; ++ int dst_stride, src_stride; ++ int32_t w; ++ ++ __m256i vmask, valpha, mask_4x32; ++ __m256i vsrc, vsrc_lo, vsrc_hi; ++ __m256i vdst, vdst_lo, vdst_hi; ++ __m256i zero = __lasx_xvldi(0); ++ ++ mask_4x32 = __lasx_xvreplgr2vr_w(0xff000000); ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ ++ mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8); ++ maska = mask >> 24; ++ ++ vmask = __lasx_xvreplgr2vr_h(maska); ++ valpha = mask_00ff; ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ src = src_line; ++ src_line += src_stride; ++ w = width; ++ ++ while (w >= 8) { ++ vsrc = __lasx_xvld(src, 0); ++ src += 8; ++ w -= 8; ++ vsrc = __lasx_xvor_v(vsrc, mask_4x32); ++ vdst = __lasx_xvld(dst, 0); ++ ++ unpack_256_2x256(vsrc, (__m256i*)&vsrc_lo, (__m256i*)&vsrc_hi); ++ unpack_256_2x256(vdst, (__m256i*)&vdst_lo, (__m256i*)&vdst_hi); ++ ++ in_over_2x256(&vsrc_lo, &vsrc_hi, &valpha, &valpha, ++ &vmask, &vmask, &vdst_lo, &vdst_hi); ++ ++ __lasx_xvst(pack_2x256_256(vdst_lo, vdst_hi), dst, 0); ++ dst += 8; ++ } ++ ++ if (w >= 4) { ++ vsrc = __lasx_xvld(src, 0); ++ src += 4; ++ w -= 4; ++ vsrc = __lasx_xvor_v(vsrc, mask_4x32); ++ vdst = __lasx_xvld(dst, 0); ++ ++ unpack_256_2x256(vsrc, (__m256i*)&vsrc_lo, (__m256i*)&vsrc_hi); ++ unpack_256_2x256(vdst, (__m256i*)&vdst_lo, (__m256i*)&vdst_hi); ++ ++ in_over_2x256(&vsrc_lo, &vsrc_hi, &valpha, &valpha, ++ &vmask, &vmask, &vdst_lo, &vdst_hi); ++ ++ vdst = pack_2x256_256(vdst_lo, vdst_hi); ++ __lasx_xvstelm_d(vdst, dst, 0, 0); ++ __lasx_xvstelm_d(vdst, dst, 8, 1); ++ dst += 4; ++ } ++ ++ while (w--) { ++ uint32_t s = (*src++) | 0xff000000; ++ uint32_t d = *dst; ++ ++ __m256i alpha, tmask; ++ __m256i src = unpack_32_1x256 (s); ++ __m256i dest = unpack_32_1x256 (d); ++ ++ alpha = __lasx_xvpermi_q(zero, valpha, 0x20); ++ tmask = __lasx_xvpermi_q(zero, vmask, 0x20); ++ ++ *dst = pack_1x256_32(in_over_1x256(&src, &alpha, &tmask, &dest)); ++ ++ dst++; ++ } ++ } ++} ++ ++static void ++lasx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t src; ++ uint16_t *dst_line, *dst, d; ++ uint32_t *mask_line, *mask, m; ++ int dst_stride, mask_stride; ++ int w, flag; ++ ++ __m256i vsrc, valpha; ++ __m256i lasx_src, lasx_alpha, lasx_mask, lasx_dest; ++ __m256i vmask, vmask_lo, vmask_hi; ++ __m256i vdst, vdst0, vdst1, vdst2, vdst3; ++ __m256i dst0, dst1, dst2, dst3; ++ ++ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); ++ ++ if (src == 0) ++ return; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); ++ ++ lasx_src = expand_pixel_32_1x256(src); ++ lasx_alpha = expand_alpha_1x256(lasx_src); ++ ++ vsrc = expand_pixel_32_2x256(src); ++ valpha = expand_alphaa_2x256(vsrc); ++ ++ while (height--) { ++ mask = mask_line; ++ dst = dst_line; ++ mask_line += mask_stride; ++ dst_line += dst_stride; ++ w = width; ++ ++ while (w >= 16) { ++ vmask = __lasx_xvld(mask, 0); ++ vdst = __lasx_xvld(dst, 0); ++ w -= 16; ++ ++ unpack_565_256_4x256(vdst, &vdst0, &vdst1, &vdst2, &vdst3); ++ dst0 = __lasx_xvpermi_q(vdst2, vdst0, 0x20); ++ dst1 = __lasx_xvpermi_q(vdst3, vdst1, 0x20); ++ dst2 = __lasx_xvpermi_q(vdst2, vdst0, 0x31); ++ dst3 = __lasx_xvpermi_q(vdst3, vdst1, 0x31); ++ ++ flag = __lasx_xbnz_v(vmask); ++ unpack_256_2x256(vmask, (__m256i*)&vmask_lo, (__m256i*)&vmask_hi); ++ vmask = __lasx_xvld(mask, 32); ++ if (flag) { ++ in_over_2x256(&vsrc, &vsrc, &valpha, &valpha, &vmask_lo, &vmask_hi, ++ &dst0, &dst1); ++ } ++ ++ flag = __lasx_xbnz_v(vmask); ++ unpack_256_2x256(vmask, (__m256i*)&vmask_lo, (__m256i*)&vmask_hi); ++ if (flag) { ++ in_over_2x256(&vsrc, &vsrc, &valpha, &valpha, &vmask_lo, &vmask_hi, ++ &dst2, &dst3); ++ } ++ ++ vdst0 = __lasx_xvpermi_q(dst2, dst0, 0x20); ++ vdst1 = __lasx_xvpermi_q(dst3, dst1, 0x20); ++ vdst2 = __lasx_xvpermi_q(dst2, dst0, 0x31); ++ vdst3 = __lasx_xvpermi_q(dst3, dst1, 0x31); ++ ++ __lasx_xvst(pack_565_4x256_256(&vdst0, &vdst1, &vdst2, &vdst3), dst, 0); ++ dst += 16; ++ mask += 16; ++ } ++ ++ while (w--) { ++ m = *(uint32_t *) mask; ++ ++ if (m) { ++ d = *dst; ++ lasx_mask = unpack_32_1x256(m); ++ lasx_dest = expand565_16_1x256(d); ++ *dst = pack_565_32_16(pack_1x256_32(in_over_1x256(&lasx_src, &lasx_alpha, ++ &lasx_mask, &lasx_dest))); ++ } ++ dst++; ++ mask++; ++ } ++ } ++} ++ ++static uint32_t * ++lasx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) ++{ ++ __m256i mask_4x32 = mask_ff000000; ++ int w = iter->width; ++ uint32_t *dst = iter->buffer; ++ uint32_t *src = (uint32_t *)iter->bits; ++ iter->bits += iter->stride; ++ ++ while (w >= 8) { ++ __lasx_xvst(__lasx_xvor_v(__lasx_xvld(src, 0), mask_4x32), dst, 0); ++ dst += 8; ++ src += 8; ++ w -= 8; ++ } ++ ++ while (w--) { ++ *dst++ = (*src++) | 0xff000000; ++ } ++ ++ return iter->buffer; ++} ++ ++static uint32_t * ++lasx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) ++{ ++ __m256i a, sa, s0, s1, s2, s3, s4; ++ __m256i mask_red, mask_green, mask_blue; ++ __m256i tmp0, tmp1; ++ ++ int w = iter->width; ++ uint32_t *dst = iter->buffer; ++ uint16_t *src = (uint16_t *)iter->bits; ++ iter->bits += iter->stride; ++ ++ mask_red = __lasx_xvreplgr2vr_h(248); ++ mask_green = __lasx_xvreplgr2vr_h(252); ++ mask_blue = mask_red; ++ a = __lasx_xvreplgr2vr_h(255) << 8; ++ ++ while (w >= 16) { ++ s0 = __lasx_xvld(src, 0); ++ src += 16; ++ w -= 16; ++ ++ //r ++ s1 = __lasx_xvsrli_h(s0, 8); ++ s1 &= mask_red; ++ s2 = __lasx_xvsrli_h(s1, 5); ++ s1 |= s2; ++ ++ //g ++ s2 = __lasx_xvsrli_h(s0, 3); ++ s2 &= mask_green; ++ s3 = __lasx_xvsrli_h(s2, 6); ++ s2 |= s3; ++ ++ //b ++ s3 = s0 << 3; ++ s3 &= mask_blue; ++ s4 = __lasx_xvsrli_h(s3, 5); ++ s3 |= s4; ++ ++ //ar ++ sa = a | s1; ++ ++ //gb ++ s2 <<= 8; ++ s2 |= s3; ++ ++ tmp0 = __lasx_xvilvl_h(sa, s2); ++ tmp1 = __lasx_xvilvh_h(sa, s2); ++ s1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ s3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ __lasx_xvst(s1, dst, 0); ++ __lasx_xvst(s3, dst, 32); ++ dst += 16; ++ } ++ ++ if (w >= 8) { ++ s0 = __lasx_xvld(src, 0); ++ src += 8; ++ w -= 8; ++ //r ++ s1 = __lasx_xvsrli_h(s0, 8); ++ s1 &= mask_red; ++ s2 = __lasx_xvsrli_h(s1, 5); ++ s1 |= s2; ++ ++ //g ++ s2 = __lasx_xvsrli_h(s0, 3); ++ s2 &= mask_green; ++ s3 = __lasx_xvsrli_h(s2, 6); ++ s2 |= s3; ++ ++ //b ++ s3 = s0 << 3; ++ s3 &= mask_blue; ++ s4 = __lasx_xvsrli_h(s3, 5); ++ s3 |= s4; ++ ++ //ar ++ sa = a | s1; ++ ++ //gb ++ s2 <<= 8; ++ s2 |= s3; ++ ++ tmp0 = __lasx_xvilvl_h(sa, s2); ++ tmp1 = __lasx_xvilvh_h(sa, s2); ++ s1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ __lasx_xvst(s1, dst, 0); ++ dst += 8; ++ } ++ ++ while (w--) { ++ uint16_t s = *src++; ++ *dst++ = convert_0565_to_8888(s); ++ } ++ ++ return iter->buffer; ++} ++ ++static uint32_t * ++lasx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) ++{ ++ __m256i srcv; ++ __m256i t0, t1, t2, t3; ++ __m256i dst0, dst1, dst2, dst3; ++ __m256i zero = __lasx_xvldi(0); ++ int w = iter->width; ++ uint32_t *dst = iter->buffer; ++ uint8_t *src = iter->bits; ++ ++ iter->bits += iter->stride; ++ ++ while (w >= 32) { ++ srcv = __lasx_xvld(src, 0); ++ src += 32; ++ w -= 32; ++ dst0 = __lasx_xvilvl_b(srcv, zero); ++ dst1 = __lasx_xvilvh_b(srcv, zero); ++ t0 = __lasx_xvilvl_h(dst0, zero); ++ t1 = __lasx_xvilvh_h(dst0, zero); ++ t2 = __lasx_xvilvl_h(dst1, zero); ++ t3 = __lasx_xvilvh_h(dst1, zero); ++ dst0 = __lasx_xvpermi_q(t1, t0, 0x20); ++ dst1 = __lasx_xvpermi_q(t3, t2, 0x20); ++ dst2 = __lasx_xvpermi_q(t1, t0, 0x31); ++ dst3 = __lasx_xvpermi_q(t3, t2, 0x31); ++ __lasx_xvst(dst0, dst, 0); ++ __lasx_xvst(dst1, dst, 32); ++ __lasx_xvst(dst2, dst, 64); ++ __lasx_xvst(dst3, dst, 96); ++ dst += 32; ++ } ++ ++ if (w >= 16) { ++ srcv = __lasx_xvld(src, 0); ++ src += 16; ++ w -= 16; ++ dst0 = __lasx_xvilvl_b(srcv, zero); ++ dst1 = __lasx_xvilvh_b(srcv, zero); ++ dst0 = __lasx_xvpermi_q(dst1, dst0, 0x20); ++ t0 = __lasx_xvilvl_h(dst0, zero); ++ t1 = __lasx_xvilvh_h(dst0, zero); ++ dst0 = __lasx_xvpermi_q(t1, t0, 0x20); ++ dst1 = __lasx_xvpermi_q(t1, t0, 0x31); ++ __lasx_xvst(dst0, dst, 0); ++ __lasx_xvst(dst1, dst, 32); ++ dst += 16; ++ } ++ ++ if (w >= 8) { ++ srcv = __lasx_xvld(src, 0); ++ src += 8; ++ w -= 8; ++ dst0 = __lasx_xvilvl_b(srcv, zero); ++ dst1 = __lasx_xvilvh_b(srcv, zero); ++ dst0 = __lasx_xvpermi_q(dst1, dst0, 0x20); ++ t0 = __lasx_xvilvl_h(dst0, zero); ++ t1 = __lasx_xvilvh_h(dst0, zero); ++ dst0 = __lasx_xvpermi_q(t1, t0, 0x20); ++ __lasx_xvst(dst0, dst, 0); ++ dst += 8; ++ } ++ ++ while (w--) { ++ *dst++ = *(src++) << 24; ++ } ++ ++ return iter->buffer; ++} ++ ++// fetch/store 8 bits ++static void lasx_fetch_scanline_a8 (bits_image_t *image, int x, int y, int width, ++ uint32_t *buffer, const uint32_t *mask) ++{ ++ uint8_t *bits = (uint8_t *)(image->bits + y * image->rowstride); ++ __m256i src; ++ __m256i t0, t1; ++ __m256i temp0, temp1, temp2, temp3; ++ __m256i dst0, dst1, dst2, dst3; ++ __m256i zero = __lasx_xvldi(0); ++ bits += x; ++ ++ while (width >= 32) { ++ src = __lasx_xvld(bits, 0); ++ t0 = __lasx_xvilvl_b(src, zero); ++ t1 = __lasx_xvilvh_b(src, zero); ++ temp0 = __lasx_xvilvl_h(t0, zero); ++ temp1 = __lasx_xvilvh_h(t0, zero); ++ temp2 = __lasx_xvilvl_h(t1, zero); ++ temp3 = __lasx_xvilvh_h(t1, zero); ++ dst0 = __lasx_xvpermi_q(temp1, temp0, 0x20); ++ dst1 = __lasx_xvpermi_q(temp3, temp2, 0x20); ++ dst2 = __lasx_xvpermi_q(temp1, temp0, 0x31); ++ dst3 = __lasx_xvpermi_q(temp3, temp2, 0x31); ++ __lasx_xvst(dst0, buffer, 0); ++ __lasx_xvst(dst1, buffer, 32); ++ __lasx_xvst(dst2, buffer, 64); ++ __lasx_xvst(dst3, buffer, 96); ++ bits += 32, width -= 32, buffer += 32; ++ } ++ if (width >= 16) { ++ src = __lasx_xvld(bits, 0); ++ src = __lasx_xvpermi_d(src, 0xd8); ++ t0 = __lasx_xvilvl_b(src, zero); ++ temp0 = __lasx_xvilvl_h(t0, zero); ++ temp1 = __lasx_xvilvh_h(t0, zero); ++ dst0 = __lasx_xvpermi_q(temp1, temp0, 0x20); ++ dst1 = __lasx_xvpermi_q(temp1, temp0, 0x31); ++ __lasx_xvst(dst0, buffer, 0); ++ __lasx_xvst(dst1, buffer, 32); ++ bits += 16, width -= 16, buffer += 16; ++ } ++ if (width >= 8) { ++ src = __lasx_xvldrepl_d(bits, 0); ++ t0 = __lasx_xvilvl_b(src, zero); ++ t0 = __lasx_xvpermi_d(t0, 0xd8); ++ dst0 = __lasx_xvilvl_h(t0, zero); ++ __lasx_xvst(dst0, buffer, 0); ++ bits += 8; width -= 8; buffer += 8; ++ } ++ while(width--) { ++ *buffer++ = ((*bits++) << 24); ++ } ++} ++ ++static void lasx_store_scanline_a8 (bits_image_t *image, int x, int y, int width, ++ const uint32_t *values) ++{ ++ uint8_t *dest = (uint8_t *)(image->bits + y * image->rowstride); ++ __m256i src0, src1, src2, src3; ++ __m256i cont = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002, 0x0000000700000003}; ++ dest += x; ++ while (width >= 32) { ++ src0 = __lasx_xvld(values, 0); ++ src1 = __lasx_xvld(values, 32); ++ src2 = __lasx_xvld(values, 64); ++ src3 = __lasx_xvld(values, 96); ++ src0 = __lasx_xvsrli_w(src0, 24); ++ src1 = __lasx_xvsrli_w(src1, 24); ++ src2 = __lasx_xvsrli_w(src2, 24); ++ src3 = __lasx_xvsrli_w(src3, 24); ++ src0 = __lasx_xvpickev_h(src1, src0); ++ src1 = __lasx_xvpickev_h(src3, src2); ++ src0 = __lasx_xvpickev_b(src1, src0); ++ src0 = __lasx_xvperm_w(src0, cont); ++ __lasx_xvst(src0, dest, 0); ++ values += 32, width -= 32, dest += 32; ++ } ++ if (width >= 16) { ++ src0 = __lasx_xvld(values, 0); ++ src1 = __lasx_xvld(values, 32); ++ src0 = __lasx_xvsrli_w(src0, 24); ++ src1 = __lasx_xvsrli_w(src1, 24); ++ src0 = __lasx_xvpickev_h(src1, src0); ++ src0 = __lasx_xvpickev_b(src0, src0); ++ src0 = __lasx_xvperm_w(src0, cont); ++ __lasx_xvstelm_d(src0, dest, 0, 0); ++ __lasx_xvstelm_d(src0, dest, 8, 1); ++ values += 16; width -= 16; dest += 16; ++ } ++ if (width >= 8) { ++ src0 = __lasx_xvld(values, 0); ++ src0 = __lasx_xvsrli_w(src0, 24); ++ src1 = __lasx_xvpermi_q(src0, src0, 0x01); ++ src0 = __lasx_xvpickev_h(src1, src0); ++ src0 = __lasx_xvpickev_b(src0, src0); ++ __lasx_xvstelm_d(src0, dest, 0, 0); ++ values += 8; width -= 8; dest += 8; ++ } ++ while (width--) { ++ *dest++ = ((*values++) >> 24); ++ } ++} ++ ++static void lasx_fetch_scanline_a2r2g2b2 (bits_image_t *image, int x, int y, ++ int width, uint32_t *buffer, ++ const uint32_t *mask) ++{ ++ uint8_t *bits = (uint8_t *)(image->bits + y * image->rowstride); ++ uint32_t pixel, pixel0, pixel1, pixel2, pixel3; ++ ++ __m256i src; ++ __m256i t0, t1, t2, t3, t4, t5, t6, t7; ++ __m256i temp0, temp1, temp2, temp3; ++ __m256i mask0 = __lasx_xvreplgr2vr_b(0xc0); ++ __m256i mask1 = __lasx_xvreplgr2vr_b(0x30); ++ __m256i mask2 = __lasx_xvreplgr2vr_b(0x0c); ++ __m256i mask3 = __lasx_xvreplgr2vr_b(0x03); ++ bits += x; ++ ++ while (width >= 32) { ++ src = __lasx_xvld(bits, 0); ++ t0 = (src & mask0); t1 = (src & mask1); ++ t2 = (src & mask2); t3 = (src & mask3); ++ t0 |= __lasx_xvsrli_b(t0, 2), t0 |= __lasx_xvsrli_b(t0, 4); ++ t1 |= __lasx_xvslli_b(t1, 2), t1 |= __lasx_xvsrli_b(t1, 4); ++ t2 |= __lasx_xvsrli_b(t2, 2), t2 |= __lasx_xvslli_b(t2, 4); ++ t3 |= __lasx_xvslli_b(t3, 2), t3 |= __lasx_xvslli_b(t3, 4); ++ t4 = __lasx_xvilvl_b(t0, t1); ++ t5 = __lasx_xvilvh_b(t0, t1); ++ t6 = __lasx_xvilvl_b(t2, t3); ++ t7 = __lasx_xvilvh_b(t2, t3); ++ t0 = __lasx_xvilvl_h(t4, t6); ++ t1 = __lasx_xvilvh_h(t4, t6); ++ t2 = __lasx_xvilvl_h(t5, t7); ++ t3 = __lasx_xvilvh_h(t5, t7); ++ temp0 = __lasx_xvpermi_q(t1, t0, 0x20); ++ temp1 = __lasx_xvpermi_q(t3, t2, 0x20); ++ temp2 = __lasx_xvpermi_q(t1, t0, 0x31); ++ temp3 = __lasx_xvpermi_q(t3, t2, 0x31); ++ __lasx_xvst(temp0, buffer, 0); ++ __lasx_xvst(temp1, buffer, 32); ++ __lasx_xvst(temp2, buffer, 64); ++ __lasx_xvst(temp3, buffer, 96); ++ bits += 32, width -= 32, buffer += 32; ++ } ++ if (width >= 16) { ++ src = __lasx_xvld(bits, 0); ++ src = __lasx_xvpermi_d(src, 0xd8); ++ t0 = (src & mask0); t1 = (src & mask1); ++ t2 = (src & mask2); t3 = (src & mask3); ++ t0 |= __lasx_xvsrli_b(t0, 2), t0 |= __lasx_xvsrli_b(t0, 4); ++ t1 |= __lasx_xvslli_b(t1, 2), t1 |= __lasx_xvsrli_b(t1, 4); ++ t2 |= __lasx_xvsrli_b(t2, 2), t2 |= __lasx_xvslli_b(t2, 4); ++ t3 |= __lasx_xvslli_b(t3, 2), t3 |= __lasx_xvslli_b(t3, 4); ++ t4 = __lasx_xvilvl_b(t0, t1); ++ t5 = __lasx_xvilvl_b(t2, t3); ++ t2 = __lasx_xvilvl_h(t4, t5); ++ t3 = __lasx_xvilvh_h(t4, t5); ++ t0 = __lasx_xvpermi_q(t3, t2, 0x20); ++ t1 = __lasx_xvpermi_q(t3, t2, 0x31); ++ __lasx_xvst(t0, buffer, 0); ++ __lasx_xvst(t1, buffer, 32); ++ bits += 16, width -= 16, buffer += 16; ++ } ++ if (width >= 8) { ++ src = __lasx_xvldrepl_d(bits, 0); ++ t0 = (src & mask0); t1 = (src & mask1); ++ t2 = (src & mask2); t3 = (src & mask3); ++ t0 |= __lasx_xvsrli_b(t0, 2), t0 |= __lasx_xvsrli_b(t0, 4); ++ t1 |= __lasx_xvslli_b(t1, 2), t1 |= __lasx_xvsrli_b(t1, 4); ++ t2 |= __lasx_xvsrli_b(t2, 2), t2 |= __lasx_xvslli_b(t2, 4); ++ t3 |= __lasx_xvslli_b(t3, 2), t3 |= __lasx_xvslli_b(t3, 4); ++ t4 = __lasx_xvilvl_b(t0, t1); ++ t5 = __lasx_xvilvl_b(t2, t3); ++ t4 = __lasx_xvpermi_d(t4, 0xd8); ++ t5 = __lasx_xvpermi_d(t5, 0xd8); ++ t0 = __lasx_xvilvl_h(t4, t5); ++ __lasx_xvst(t0, buffer, 0); ++ bits += 8; width -= 8; buffer += 8; ++ } ++ while (width--) { ++ pixel = *bits++; ++ // a ++ pixel0 = pixel & 192; ++ pixel0 |= (pixel0 >> 2); ++ pixel0 |= (pixel0 >> 4); ++ pixel0 <<= 24; ++ // r ++ pixel1 = pixel & 48; ++ pixel1 |= (pixel1 << 2); ++ pixel1 |= (pixel1 >> 4); ++ pixel1 <<= 16; ++ // g ++ pixel2 = pixel & 12; ++ pixel2 |= (pixel2 >> 2); ++ pixel2 |= (pixel2 << 4); ++ pixel2 <<= 8; ++ // b ++ pixel3 = pixel & 3; ++ pixel3 |= (pixel3 << 2); ++ pixel3 |= (pixel3 << 4); ++ *buffer++ = (pixel3 | pixel2 | pixel1 | pixel0); ++ } ++} ++ ++static void lasx_store_scanline_a2r2g2b2 (bits_image_t *image, int x, int y, ++ int width, const uint32_t *values) ++{ ++ uint8_t *dest = (uint8_t *)(image->bits + y * image->rowstride); ++ __m256i in0, in1, in2, in3, in4, in5, in6, in7; ++ __m256i in8, in9, in10, in11, in12, in13, in14, in15; ++ __m256i tt0, tt1, tt2, tt3, tt4, tt5, tt6, tt7; ++ __m256i tt8, tt9, tt10, tt11, tt12, tt13, tt14, tt15; ++ __m256i d0, d1; ++ __m256i mask = __lasx_xvreplgr2vr_b(0xc0); ++ ++ dest += x; ++ ++ while (width >= 128) { ++ in0 = __lasx_xvld(values, 0); ++ in1 = __lasx_xvld(values, 32); ++ in2 = __lasx_xvld(values, 64); ++ in3 = __lasx_xvld(values, 96); ++ in4 = __lasx_xvld(values, 128); ++ in5 = __lasx_xvld(values, 160); ++ in6 = __lasx_xvld(values, 192); ++ in7 = __lasx_xvld(values, 224); ++ values += 64; ++ in8 = __lasx_xvld(values, 0); ++ in9 = __lasx_xvld(values, 32); ++ in10 = __lasx_xvld(values, 64); ++ in11 = __lasx_xvld(values, 96); ++ in12 = __lasx_xvld(values, 128); ++ in13 = __lasx_xvld(values, 160); ++ in14 = __lasx_xvld(values, 192); ++ in15 = __lasx_xvld(values, 224); ++ ++ tt0 = __lasx_xvpermi_q(in8, in0, 0x20); ++ tt2 = __lasx_xvpermi_q(in9, in1, 0x20); ++ tt4 = __lasx_xvpermi_q(in10, in2, 0x20); ++ tt6 = __lasx_xvpermi_q(in11, in3, 0x20); ++ tt8 = __lasx_xvpermi_q(in12, in4, 0x20); ++ tt10 = __lasx_xvpermi_q(in13, in5, 0x20); ++ tt12 = __lasx_xvpermi_q(in14, in6, 0x20); ++ tt14 = __lasx_xvpermi_q(in15, in7, 0x20); ++ ++ tt1 = __lasx_xvpermi_q(in8, in0, 0x31); ++ tt3 = __lasx_xvpermi_q(in9, in1, 0x31); ++ tt5 = __lasx_xvpermi_q(in10, in2, 0x31); ++ tt7 = __lasx_xvpermi_q(in11, in3, 0x31); ++ tt9 = __lasx_xvpermi_q(in12, in4, 0x31); ++ tt11 = __lasx_xvpermi_q(in13, in5, 0x31); ++ tt13 = __lasx_xvpermi_q(in14, in6, 0x31); ++ tt15 = __lasx_xvpermi_q(in15, in7, 0x31); ++ ++ LASX_TRANSPOSE16x8_H(tt0, tt1, tt2, tt3, tt4, tt5, tt6, tt7, ++ tt8, tt9, tt10, tt11, tt12, tt13, tt14, tt15, ++ in0, in1, in2, in3, in4, in5, in6, in7); ++ in8 = __lasx_xvpickev_b(in4, in0); ++ in8 = __lasx_xvpermi_d(in8, 0xd8); ++ in9 = __lasx_xvpickod_b(in4, in0); ++ in9 = __lasx_xvpermi_d(in9, 0xd8); ++ in10 = __lasx_xvpickev_b(in5, in1); ++ in10 = __lasx_xvpermi_d(in10, 0xd8); ++ in11 = __lasx_xvpickod_b(in5, in1); ++ in11 = __lasx_xvpermi_d(in11, 0xd8); ++ in12 = __lasx_xvpickev_b(in6, in2); ++ in12 = __lasx_xvpermi_d(in12, 0xd8); ++ in13 = __lasx_xvpickod_b(in6, in2); ++ in13 = __lasx_xvpermi_d(in13, 0xd8); ++ in14 = __lasx_xvpickev_b(in7, in3); ++ in14 = __lasx_xvpermi_d(in14, 0xd8); ++ in15 = __lasx_xvpickod_b(in7, in3); ++ in15 = __lasx_xvpermi_d(in15, 0xd8); ++ ++ in8 &= mask, in9 &= mask, in10 &= mask, in11 &= mask; ++ in12 &= mask, in13 &= mask, in14 &= mask, in15 &= mask; ++ in8 = __lasx_xvsrli_b(in8, 6), in12 = __lasx_xvsrli_b(in12, 6); ++ in9 = __lasx_xvsrli_b(in9, 4), in13 = __lasx_xvsrli_b(in13, 4); ++ in10 = __lasx_xvsrli_b(in10, 2), in14 = __lasx_xvsrli_b(in14, 2); ++ d0 = in8, d0 |= in9, d0 |= in10, d0 |= in11; ++ d1 = in12, d1 |= in13, d1 |= in14, d1 |= in15; ++ ++ tt0 = __lasx_xvpermi_q(tt0, tt0, 0x31); ++ tt1 = __lasx_xvpermi_q(tt1, tt1, 0x31); ++ tt2 = __lasx_xvpermi_q(tt2, tt2, 0x31); ++ tt3 = __lasx_xvpermi_q(tt3, tt3, 0x31); ++ tt4 = __lasx_xvpermi_q(tt4, tt4, 0x31); ++ tt5 = __lasx_xvpermi_q(tt5, tt5, 0x31); ++ tt6 = __lasx_xvpermi_q(tt6, tt6, 0x31); ++ tt7 = __lasx_xvpermi_q(tt7, tt7, 0x31); ++ tt8 = __lasx_xvpermi_q(tt8, tt8, 0x31); ++ tt9 = __lasx_xvpermi_q(tt9, tt9, 0x31); ++ tt10 = __lasx_xvpermi_q(tt10, tt10, 0x31); ++ tt11 = __lasx_xvpermi_q(tt11, tt11, 0x31); ++ tt12 = __lasx_xvpermi_q(tt12, tt12, 0x31); ++ tt13 = __lasx_xvpermi_q(tt13, tt13, 0x31); ++ tt14 = __lasx_xvpermi_q(tt14, tt14, 0x31); ++ tt15 = __lasx_xvpermi_q(tt15, tt15, 0x31); ++ ++ LASX_TRANSPOSE16x8_H(tt0, tt1, tt2, tt3, tt4, tt5, tt6, tt7, ++ tt8, tt9, tt10, tt11, tt12, tt13, tt14, tt15, ++ in0, in1, in2, in3, in4, in5, in6, in7); ++ in8 = __lasx_xvpickev_b(in4, in0); ++ in8 = __lasx_xvpermi_d(in8, 0xd8); ++ in9 = __lasx_xvpickod_b(in4, in0); ++ in9 = __lasx_xvpermi_d(in9, 0xd8); ++ in10 = __lasx_xvpickev_b(in5, in1); ++ in10 = __lasx_xvpermi_d(in10, 0xd8); ++ in11 = __lasx_xvpickod_b(in5, in1); ++ in11 = __lasx_xvpermi_d(in11, 0xd8); ++ in12 = __lasx_xvpickev_b(in6, in2); ++ in12 = __lasx_xvpermi_d(in12, 0xd8); ++ in13 = __lasx_xvpickod_b(in6, in2); ++ in13 = __lasx_xvpermi_d(in13, 0xd8); ++ in14 = __lasx_xvpickev_b(in7, in3); ++ in14 = __lasx_xvpermi_d(in14, 0xd8); ++ in15 = __lasx_xvpickod_b(in7, in3); ++ in15 = __lasx_xvpermi_d(in15, 0xd8); ++ ++ in8 &= mask, in9 &= mask, in10 &= mask, in11 &= mask; ++ in12 &= mask, in13 &= mask, in14 &= mask, in15 &= mask; ++ in8 = __lasx_xvsrli_b(in8, 6), in12 = __lasx_xvsrli_b(in12, 6); ++ in9 = __lasx_xvsrli_b(in9, 4), in13 = __lasx_xvsrli_b(in13, 4); ++ in10 = __lasx_xvsrli_b(in10, 2), in14 = __lasx_xvsrli_b(in14, 2); ++ tt0 = in8, tt0 |= in9, tt0 |= in10, tt0 |= in11; ++ tt1 = in12, tt1 |= in13, tt1 |= in14, tt1 |= in15; ++ ++ in0 = __lasx_xvpermi_q(tt0, d0, 0x20); ++ in2 = __lasx_xvpermi_q(tt0, d0, 0x31); ++ in1 = __lasx_xvpermi_q(tt1, d1, 0x20); ++ in3 = __lasx_xvpermi_q(tt1, d1, 0x31); ++ ++ in8 = __lasx_xvilvl_b(in1, in0); ++ in9 = __lasx_xvilvh_b(in1, in0); ++ in10 = __lasx_xvilvl_b(in3, in2); ++ in11 = __lasx_xvilvh_b(in3, in2); ++ ++ in0 = __lasx_xvilvl_h(in10, in8); ++ in1 = __lasx_xvilvh_h(in10, in8); ++ in2 = __lasx_xvilvl_h(in11, in9); ++ in3 = __lasx_xvilvh_h(in11, in9); ++ ++ d0 = __lasx_xvpermi_q(in1, in0, 0x20); ++ tt0 = __lasx_xvpermi_q(in1, in0, 0x31); ++ d1 = __lasx_xvpermi_q(in3, in2, 0x20); ++ tt1 = __lasx_xvpermi_q(in3, in2, 0x31); ++ ++ __lasx_xvst(d0, dest, 0); ++ __lasx_xvst(d1, dest, 32); ++ __lasx_xvst(tt0, dest, 64); ++ __lasx_xvst(tt1, dest, 96); ++ width -= 128, values += 64, dest += 128; ++ } ++ ++ while (width >= 32) { ++ in0 = __lasx_xvld(values, 0); ++ in2 = __lasx_xvld(values, 32); ++ in4 = __lasx_xvld(values, 64); ++ in6 = __lasx_xvld(values, 96); ++ ++ in1 = __lasx_xvpackod_d(in0, in0); ++ in3 = __lasx_xvpackod_d(in2, in2); ++ in5 = __lasx_xvpackod_d(in4, in4); ++ in7 = __lasx_xvpackod_d(in6, in6); ++ tt0 = __lasx_xvpermi_q(in4, in0, 0x20); ++ tt2 = __lasx_xvpermi_q(in4, in0, 0x31); ++ tt1 = __lasx_xvpermi_q(in5, in1, 0x20); ++ tt3 = __lasx_xvpermi_q(in5, in1, 0x31); ++ tt4 = __lasx_xvpermi_q(in6, in2, 0x20); ++ tt6 = __lasx_xvpermi_q(in6, in2, 0x31); ++ tt5 = __lasx_xvpermi_q(in7, in3, 0x20); ++ tt7 = __lasx_xvpermi_q(in7, in3, 0x31); ++ ++ LASX_TRANSPOSE8x8_H(tt0, tt1, tt2, tt3, tt4, tt5, tt6, tt7, ++ tt0, tt1, tt2, tt3, tt4, tt5, tt6, tt7); ++ tt8 = __lasx_xvpickev_b(tt1, tt0); ++ tt8 = __lasx_xvpermi_d(tt8, 0xd8); ++ tt9 = __lasx_xvpickod_b(tt1, tt0); ++ tt9 = __lasx_xvpermi_d(tt9, 0xd8); ++ tt10 = __lasx_xvpickev_b(tt3, tt2); ++ tt10 = __lasx_xvpermi_d(tt10, 0xd8); ++ tt11 = __lasx_xvpickod_b(tt3, tt2); ++ tt11 = __lasx_xvpermi_d(tt11, 0xd8); ++ tt12 = __lasx_xvpickev_b(tt5, tt4); ++ tt12 = __lasx_xvpermi_d(tt12, 0xd8); ++ tt13 = __lasx_xvpickod_b(tt5, tt4); ++ tt13 = __lasx_xvpermi_d(tt13, 0xd8); ++ tt14 = __lasx_xvpickev_b(tt7, tt6); ++ tt14 = __lasx_xvpermi_d(tt14, 0xd8); ++ tt15 = __lasx_xvpickod_b(tt7, tt6); ++ tt15 = __lasx_xvpermi_d(tt15, 0xd8); ++ ++ tt0 = __lasx_xvpermi_q(tt12, tt8, 0x20); ++ tt2 = __lasx_xvpermi_q(tt12, tt8, 0x31); ++ tt1 = __lasx_xvpermi_q(tt13, tt9, 0x20); ++ tt3 = __lasx_xvpermi_q(tt13, tt9, 0x31); ++ tt4 = __lasx_xvpermi_q(tt14, tt10, 0x20); ++ tt6 = __lasx_xvpermi_q(tt14, tt10, 0x31); ++ tt5 = __lasx_xvpermi_q(tt15, tt11, 0x20); ++ tt7 = __lasx_xvpermi_q(tt15, tt11, 0x31); ++ ++ tt0 &= mask, tt1 &= mask, tt2 &= mask, tt3 &= mask; ++ tt4 &= mask, tt5 &= mask, tt6 &= mask, tt7 &= mask; ++ tt0 = __lasx_xvsrli_b(tt0, 6), tt4 = __lasx_xvsrli_b(tt4, 6); ++ tt1 = __lasx_xvsrli_b(tt1, 4), tt5 = __lasx_xvsrli_b(tt5, 4); ++ tt2 = __lasx_xvsrli_b(tt2, 2), tt6 = __lasx_xvsrli_b(tt6, 2); ++ d0 = tt0, d0 |= tt1, d0 |= tt2, d0 |= tt3; ++ d1 = tt4, d1 |= tt5, d1 |= tt6, d1 |= tt7; ++ ++ tt0 = __lasx_xvilvl_b(d1, d0); ++ tt1 = __lasx_xvilvh_b(d1, d0); ++ d0 = __lasx_xvpermi_q(tt0, tt1, 0x02); ++ __lasx_xvst(d0, dest, 0); ++ width -= 32, values += 32, dest += 32; ++ } ++ ++ while (width--) { ++ uint32_t pixel = *values++; ++ pixel &= 0xc0c0c0c0; ++ pixel |= (pixel << 6); ++ pixel |= (pixel << 12); ++ pixel >>= 24; ++ *dest++ = pixel; ++ } ++} ++ ++// fetch/store 16 bits ++static void lasx_fetch_scanline_a1r5g5b5 (bits_image_t *image, int x, int y, ++ int width, uint32_t *buffer, ++ const uint32_t *mask) ++{ ++ uint16_t *bits = (uint16_t *)(image->bits + y * image->rowstride); ++ uint32_t pixel, pixel0, pixel1, pixel2; ++ ++ __m256i src, tmp0, tmp1; ++ __m256i t, t0, t1, t2, t3; ++ __m256i mask0 = __lasx_xvreplgr2vr_h(0x001f); ++ bits += x; ++ ++ while (width >= 16) { ++ src = __lasx_xvld(bits, 0); ++ t0 = (src & mask0); ++ t0 = __lasx_xvslli_h(t0, 3); ++ t = __lasx_xvsrli_h(t0, 5); ++ t0 |= t; ++ t1 = __lasx_xvsrli_h(src, 5); ++ t1 &= mask0; ++ t1 = __lasx_xvslli_h(t1, 3); ++ t = __lasx_xvsrli_h(t1, 5); ++ t1 |= t; ++ t2 = __lasx_xvsrli_h(src, 10); ++ t2 &= mask0; ++ t2 = __lasx_xvslli_h(t2, 3); ++ t = __lasx_xvsrli_h(t2, 5); ++ t2 |= t; ++ t3 = __lasx_xvsrli_h(src, 15); ++ t = __lasx_xvslli_h(t3, 1); ++ t3 |= t; ++ t = __lasx_xvslli_h(t3, 2); ++ t3 |= t; ++ t = __lasx_xvslli_h(t3, 4); ++ t3 |= t; ++ t1 <<= 8; ++ t0 |= t1; ++ t3 <<= 8; ++ t2 |= t3; ++ tmp0 = __lasx_xvilvl_h(t2, t0); ++ tmp1 = __lasx_xvilvh_h(t2, t0); ++ t1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ __lasx_xvst(t1, buffer, 0); ++ __lasx_xvst(t3, buffer, 32); ++ bits += 16, width -= 16, buffer += 16; ++ } ++ ++ if (width >= 8) { ++ src = __lasx_xvld(bits, 0); ++ t0 = (src & mask0); ++ t0 = __lasx_xvslli_h(t0, 3); ++ t = __lasx_xvsrli_h(t0, 5); ++ t0 |= t; ++ t1 = __lasx_xvsrli_h(src, 5); ++ t1 &= mask0; ++ t1 = __lasx_xvslli_h(t1, 3); ++ t = __lasx_xvsrli_h(t1, 5); ++ t1 |= t; ++ t2 = __lasx_xvsrli_h(src, 10); ++ t2 &= mask0; ++ t2 = __lasx_xvslli_h(t2, 3); ++ t = __lasx_xvsrli_h(t2, 5); ++ t2 |= t; ++ t3 = __lasx_xvsrli_h(src, 15); ++ t = __lasx_xvslli_h(t3, 1); ++ t3 |= t; ++ t = __lasx_xvslli_h(t3, 2); ++ t3 |= t; ++ t = __lasx_xvslli_h(t3, 4); ++ t3 |= t; ++ t1 <<= 8; ++ t0 |= t1; ++ t3 <<= 8; ++ t2 |= t3; ++ tmp0 = __lasx_xvilvl_h(t2, t0); ++ tmp1 = __lasx_xvilvh_h(t2, t0); ++ t1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ __lasx_xvst(t1, buffer, 0); ++ bits += 8, width -= 8, buffer += 8; ++ } ++ ++ while (width--) { ++ pixel = *bits++; ++ // a ++ pixel0 = pixel >> 15; ++ pixel0 <<= 7; ++ pixel0 |= (pixel0 >> 1); ++ pixel0 |= (pixel0 >> 2); ++ pixel0 |= (pixel0 >> 4); ++ pixel0 <<= 24; ++ // r ++ pixel1 = pixel >> 10; ++ pixel1 &= 31; ++ pixel1 <<= 3; ++ pixel1 |= (pixel1 >> 5); ++ pixel1 <<= 16; ++ // g ++ pixel2 = pixel >> 5; ++ pixel2 &= 31; ++ pixel2 <<= 3; ++ pixel2 |= (pixel2 >> 5); ++ pixel2 <<= 8; ++ // b ++ pixel &= 31; ++ pixel <<= 3; ++ pixel |= (pixel >> 5); ++ *buffer++ = (pixel0 | pixel1 | pixel2 | pixel); ++ } ++} ++ ++static void lasx_store_scanline_a1r5g5b5 (bits_image_t *image, int x, int y, ++ int width, const uint32_t *values) ++{ ++ uint16_t *dest = (uint16_t *)(image->bits + y * image->rowstride); ++ uint32_t pixel, pixel0, pixel1, pixel2, pixel3; ++ __m256i in0, in1, in2, in3; ++ __m256i tmp0, tmp1; ++ __m256i t0, t1, t2, t3, t4, t5, t6, t7; ++ __m256i t8, t9, t10, t11, t12, t13, t14, t15; ++ __m256i zero = __lasx_xvldi(0); ++ __m256i mask = { 0x80f8f8f880f8f8f8, 0x80f8f8f880f8f8f8, ++ 0x80f8f8f880f8f8f8, 0x80f8f8f880f8f8f8 }; ++ dest += x; ++ ++ while(width >= 32) { ++ in0 = __lasx_xvld(values, 0); ++ in1 = __lasx_xvld(values, 32); ++ in2 = __lasx_xvld(values, 64); ++ in3 = __lasx_xvld(values, 96); ++ ++ in0 = __lasx_xvand_v(in0, mask); ++ in1 = __lasx_xvand_v(in1, mask); ++ in2 = __lasx_xvand_v(in2, mask); ++ in3 = __lasx_xvand_v(in3, mask); ++ tmp0 = __lasx_xvilvl_b(in0, zero); ++ tmp1 = __lasx_xvilvh_b(in0, zero); ++ t0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t1 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(in1, zero); ++ tmp1 = __lasx_xvilvh_b(in1, zero); ++ t2 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(in2, zero); ++ tmp1 = __lasx_xvilvh_b(in2, zero); ++ t4 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t5 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(in3, zero); ++ tmp1 = __lasx_xvilvh_b(in3, zero); ++ t6 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t7 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ ++ tmp0 = __lasx_xvilvl_h(zero, t7); ++ tmp1 = __lasx_xvilvh_h(zero, t7); ++ t14 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t15 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t6); ++ tmp1 = __lasx_xvilvh_h(zero, t6); ++ t12 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t13 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t5); ++ tmp1 = __lasx_xvilvh_h(zero, t5); ++ t10 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t11 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t4); ++ tmp1 = __lasx_xvilvh_h(zero, t4); ++ t8 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t9 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t3); ++ tmp1 = __lasx_xvilvh_h(zero, t3); ++ t6 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t7 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t2); ++ tmp1 = __lasx_xvilvh_h(zero, t2); ++ t4 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t5 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t1); ++ tmp1 = __lasx_xvilvh_h(zero, t1); ++ t2 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t0); ++ tmp1 = __lasx_xvilvh_h(zero, t0); ++ t0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t1 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ ++ LASX_TRANSPOSE8x8_W(t0, t1, t2, t3, t4, t5, t6, t7, ++ t0, t1, t2, t3, t4, t5, t6, t7); ++ LASX_TRANSPOSE8x8_W(t8, t9, t10, t11, t12, t13, t14, t15, ++ t8, t9, t10, t11, t12, t13, t14, t15); ++ ++ t0 = __lasx_xvsrli_h(t0, 11); ++ t1 = __lasx_xvsrli_h(t1, 6); ++ t2 = __lasx_xvsrli_h(t2, 1); ++ t4 = __lasx_xvsrli_h(t4, 11); ++ t5 = __lasx_xvsrli_h(t5, 6); ++ t6 = __lasx_xvsrli_h(t6, 1); ++ ++ t8 = __lasx_xvsrli_h(t8, 11); ++ t9 = __lasx_xvsrli_h(t9, 6); ++ t10 = __lasx_xvsrli_h(t10, 1); ++ t12 = __lasx_xvsrli_h(t12, 11); ++ t13 = __lasx_xvsrli_h(t13, 6); ++ t14 = __lasx_xvsrli_h(t14, 1); ++ ++ t3 = __lasx_xvor_v(t3, t2); ++ t3 = __lasx_xvor_v(t3, t1); ++ t3 = __lasx_xvor_v(t3, t0); ++ t7 = __lasx_xvor_v(t7, t6); ++ t7 = __lasx_xvor_v(t7, t5); ++ t7 = __lasx_xvor_v(t7, t4); ++ ++ t11 = __lasx_xvor_v(t11, t10); ++ t11 = __lasx_xvor_v(t11, t9); ++ t11 = __lasx_xvor_v(t11, t8); ++ t15 = __lasx_xvor_v(t15, t14); ++ t15 = __lasx_xvor_v(t15, t13); ++ t15 = __lasx_xvor_v(t15, t12); ++ ++ tmp0 = __lasx_xvilvl_w(t7, t3); ++ tmp1 = __lasx_xvilvh_w(t7, t3); ++ t0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t1 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ t0 = __lasx_xvpickev_h(t1, t0); ++ t0 = __lasx_xvpermi_d(t0, 0xd8); ++ ++ tmp0 = __lasx_xvilvl_w(t15, t11); ++ tmp1 = __lasx_xvilvh_w(t15, t11); ++ t8 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t9 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ t1 = __lasx_xvpickev_h(t9, t8); ++ t1 = __lasx_xvpermi_d(t1, 0xd8); ++ ++ __lasx_xvst(t0, dest, 0); ++ __lasx_xvst(t1, dest, 32); ++ values += 32, width -= 32, dest += 32; ++ } ++ ++ if (width >= 16) { ++ in0 = __lasx_xvld(values, 0); ++ in1 = __lasx_xvld(values, 32); ++ ++ in0 = __lasx_xvand_v(in0, mask); ++ in1 = __lasx_xvand_v(in1, mask); ++ tmp0 = __lasx_xvilvl_b(in0, zero); ++ tmp1 = __lasx_xvilvh_b(in0, zero); ++ t0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t1 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(in1, zero); ++ tmp1 = __lasx_xvilvh_b(in1, zero); ++ t2 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t3); ++ tmp1 = __lasx_xvilvh_h(zero, t3); ++ t6 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t7 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t2); ++ tmp1 = __lasx_xvilvh_h(zero, t2); ++ t4 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t5 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t1); ++ tmp1 = __lasx_xvilvh_h(zero, t1); ++ t2 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t0); ++ tmp1 = __lasx_xvilvh_h(zero, t0); ++ t0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t1 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ LASX_TRANSPOSE8x8_W(t0, t1, t2, t3, t4, t5, t6, t7, ++ t0, t1, t2, t3, t4, t5, t6, t7); ++ ++ t0 = __lasx_xvsrli_h(t0, 11); ++ t1 = __lasx_xvsrli_h(t1, 6); ++ t2 = __lasx_xvsrli_h(t2, 1); ++ t4 = __lasx_xvsrli_h(t4, 11); ++ t5 = __lasx_xvsrli_h(t5, 6); ++ t6 = __lasx_xvsrli_h(t6, 1); ++ ++ t3 = __lasx_xvor_v(t3, t2); ++ t3 = __lasx_xvor_v(t3, t1); ++ t3 = __lasx_xvor_v(t3, t0); ++ t7 = __lasx_xvor_v(t7, t6); ++ t7 = __lasx_xvor_v(t7, t5); ++ t7 = __lasx_xvor_v(t7, t4); ++ ++ tmp0 = __lasx_xvilvl_w(t7, t3); ++ tmp1 = __lasx_xvilvh_w(t7, t3); ++ t0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t1 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ t0 = __lasx_xvpickev_h(t1, t0); ++ t0 = __lasx_xvpermi_d(t0, 0xd8); ++ ++ __lasx_xvst(t0, dest, 0); ++ values += 16, width -= 16, dest += 16; ++ } ++ ++ while(width--) { ++ pixel = *values++; ++ pixel0 = pixel >> 16; ++ pixel1 = pixel >> 9; ++ pixel2 = pixel >> 6; ++ pixel3 = pixel >> 3; ++ pixel0 &= 0x8000; ++ pixel1 &= 0x7c00; ++ pixel2 &= 0x03e0; ++ pixel3 &= 0x001f; ++ *dest++ = (pixel0 | pixel1 | pixel2 | pixel3); ++ } ++} ++ ++static void lasx_fetch_scanline_a4r4g4b4 (bits_image_t *image, int x, int y, ++ int width, uint32_t *buffer, ++ const uint32_t *mask) ++{ ++ uint16_t *bits = (uint16_t *)(image->bits + y * image->rowstride); ++ uint32_t pixel, pixel0, pixel1, pixel2; ++ ++ __m256i src, tmp0, tmp1; ++ __m256i t, t0, t1, t2, t3; ++ ++ __m256i mask0 = __lasx_xvreplgr2vr_h(0x000f); ++ bits += x; ++ ++ while (width >= 16) { ++ src = __lasx_xvld(bits, 0); ++ t0 = __lasx_xvsrli_h(src, 12); ++ t = (t0 << 4), t0 |= t; ++ t1 = __lasx_xvsrli_h(src, 8); ++ t1 &= mask0, t = (t1 << 4), t1 |= t; ++ t2 = __lasx_xvsrli_h(src, 4); ++ t2 &= mask0, t = (t2 << 4), t2 |= t; ++ t3 = (src & mask0), t = (t3 << 4), t3 |= t; ++ t0 <<= 8, t2 <<= 8, t0 |= t1, t2 |= t3; ++ tmp0 = __lasx_xvilvl_h(t0, t2); ++ tmp1 = __lasx_xvilvh_h(t0, t2); ++ t1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ __lasx_xvst(t1, buffer, 0); ++ __lasx_xvst(t3, buffer, 32); ++ bits += 16, width -= 16, buffer += 16; ++ } ++ ++ if (width >= 8) { ++ src = __lasx_xvld(bits, 0); ++ t0 = __lasx_xvsrli_h(src, 12); ++ t = (t0 << 4), t0 |= t; ++ t1 = __lasx_xvsrli_h(src, 8); ++ t1 &= mask0, t = (t1 << 4), t1 |= t; ++ t2 = __lasx_xvsrli_h(src, 4); ++ t2 &= mask0, t = (t2 << 4), t2 |= t; ++ t3 = (src & mask0), t = (t3 << 4), t3 |= t; ++ t0 <<= 8, t2 <<= 8, t0 |= t1, t2 |= t3; ++ tmp0 = __lasx_xvilvl_h(t0, t2); ++ tmp1 = __lasx_xvilvh_h(t0, t2); ++ t1 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ __lasx_xvst(t1, buffer, 0); ++ bits += 8, width -= 8, buffer += 8; ++ } ++ ++ while (width--) { ++ pixel = *bits++; ++ // a ++ pixel0 = pixel >> 12; ++ pixel0 |= (pixel0 << 4); ++ pixel0 <<= 24; ++ // r ++ pixel1 = pixel >> 8; ++ pixel1 &= 15; ++ pixel1 |= (pixel1 << 4); ++ pixel1 <<= 16; ++ // g ++ pixel2 = pixel >> 4; ++ pixel2 &= 15; ++ pixel2 |= (pixel2 << 4); ++ pixel2 <<= 8; ++ // b ++ pixel &= 15; ++ pixel |= (pixel << 4); ++ *buffer++ = (pixel0 | pixel1 | pixel2 | pixel); ++ } ++} ++ ++static void lasx_store_scanline_a4r4g4b4 (bits_image_t *image, int x, int y, ++ int width, const uint32_t *values) ++{ ++ uint16_t *dest = (uint16_t *)(image->bits + y * image->rowstride); ++ uint32_t pixel, pixel0, pixel1; ++ __m256i in0, in1, in2, in3; ++ __m256i tmp0, tmp1; ++ __m256i t0, t1, t2, t3, t4, t5, t6, t7; ++ __m256i t8, t9, t10, t11, t12, t13, t14, t15; ++ __m256i zero = __lasx_xvldi(0); ++ __m256i mask = __lasx_xvreplgr2vr_h(0xf0f0); ++ dest += x; ++ ++ while(width >= 32) { ++ in0 = __lasx_xvld(values, 0); ++ in1 = __lasx_xvld(values, 32); ++ in2 = __lasx_xvld(values, 64); ++ in3 = __lasx_xvld(values, 96); ++ ++ in0 = __lasx_xvand_v(in0, mask); ++ in1 = __lasx_xvand_v(in1, mask); ++ in2 = __lasx_xvand_v(in2, mask); ++ in3 = __lasx_xvand_v(in3, mask); ++ ++ tmp0 = __lasx_xvilvl_b(in0, zero); ++ tmp1 = __lasx_xvilvh_b(in0, zero); ++ t0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t1 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(in1, zero); ++ tmp1 = __lasx_xvilvh_b(in1, zero); ++ t2 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(in2, zero); ++ tmp1 = __lasx_xvilvh_b(in2, zero); ++ t4 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t5 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(in3, zero); ++ tmp1 = __lasx_xvilvh_b(in3, zero); ++ t6 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t7 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ ++ tmp0 = __lasx_xvilvl_h(zero, t7); ++ tmp1 = __lasx_xvilvh_h(zero, t7); ++ t14 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t15 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t6); ++ tmp1 = __lasx_xvilvh_h(zero, t6); ++ t12 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t13 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t5); ++ tmp1 = __lasx_xvilvh_h(zero, t5); ++ t10 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t11 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t4); ++ tmp1 = __lasx_xvilvh_h(zero, t4); ++ t8 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t9 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t3); ++ tmp1 = __lasx_xvilvh_h(zero, t3); ++ t6 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t7 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t2); ++ tmp1 = __lasx_xvilvh_h(zero, t2); ++ t4 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t5 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t1); ++ tmp1 = __lasx_xvilvh_h(zero, t1); ++ t2 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t0); ++ tmp1 = __lasx_xvilvh_h(zero, t0); ++ t0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t1 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ ++ LASX_TRANSPOSE8x8_W(t0, t1, t2, t3, t4, t5, t6, t7, ++ t0, t1, t2, t3, t4, t5, t6, t7); ++ LASX_TRANSPOSE8x8_W(t8, t9, t10, t11, t12, t13, t14, t15, ++ t8, t9, t10, t11, t12, t13, t14, t15); ++ ++ t0 = __lasx_xvsrli_h(t0, 12); ++ t1 = __lasx_xvsrli_h(t1, 8); ++ t2 = __lasx_xvsrli_h(t2, 4); ++ t4 = __lasx_xvsrli_h(t4, 12); ++ t5 = __lasx_xvsrli_h(t5, 8); ++ t6 = __lasx_xvsrli_h(t6, 4); ++ ++ t8 = __lasx_xvsrli_h(t8, 12); ++ t9 = __lasx_xvsrli_h(t9, 8); ++ t10 = __lasx_xvsrli_h(t10, 4); ++ t12 = __lasx_xvsrli_h(t12, 12); ++ t13 = __lasx_xvsrli_h(t13, 8); ++ t14 = __lasx_xvsrli_h(t14, 4); ++ ++ t3 = __lasx_xvor_v(t3, t2); ++ t3 = __lasx_xvor_v(t3, t1); ++ t3 = __lasx_xvor_v(t3, t0); ++ t7 = __lasx_xvor_v(t7, t6); ++ t7 = __lasx_xvor_v(t7, t5); ++ t7 = __lasx_xvor_v(t7, t4); ++ ++ t11 = __lasx_xvor_v(t11, t10); ++ t11 = __lasx_xvor_v(t11, t9); ++ t11 = __lasx_xvor_v(t11, t8); ++ t15 = __lasx_xvor_v(t15, t14); ++ t15 = __lasx_xvor_v(t15, t13); ++ t15 = __lasx_xvor_v(t15, t12); ++ ++ tmp0 = __lasx_xvilvl_w(t7, t3); ++ tmp1 = __lasx_xvilvh_w(t7, t3); ++ t0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t1 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ t0 = __lasx_xvpickev_h(t1, t0); ++ t0 = __lasx_xvpermi_d(t0, 0xd8); ++ ++ tmp0 = __lasx_xvilvl_w(t15, t11); ++ tmp1 = __lasx_xvilvh_w(t15, t11); ++ t8 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t9 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ t1 = __lasx_xvpickev_h(t9, t8); ++ t1 = __lasx_xvpermi_d(t1, 0xd8); ++ ++ __lasx_xvst(t0, dest, 0); ++ __lasx_xvst(t1, dest, 32); ++ values += 32, width -= 32, dest += 32; ++ } ++ ++ if (width >= 16) { ++ ++ in0 = __lasx_xvld(values, 0); ++ in1 = __lasx_xvld(values, 32); ++ ++ in1 = __lasx_xvand_v(in1, mask); ++ in0 = __lasx_xvand_v(in0, mask); ++ tmp0 = __lasx_xvilvl_b(in0, zero); ++ tmp1 = __lasx_xvilvh_b(in0, zero); ++ t0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t1 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_b(in1, zero); ++ tmp1 = __lasx_xvilvh_b(in1, zero); ++ t2 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t3); ++ tmp1 = __lasx_xvilvh_h(zero, t3); ++ t6 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t7 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t2); ++ tmp1 = __lasx_xvilvh_h(zero, t2); ++ t4 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t5 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t1); ++ tmp1 = __lasx_xvilvh_h(zero, t1); ++ t2 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t3 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ tmp0 = __lasx_xvilvl_h(zero, t0); ++ tmp1 = __lasx_xvilvh_h(zero, t0); ++ t0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t1 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ ++ LASX_TRANSPOSE8x8_W(t0, t1, t2, t3, t4, t5, t6, t7, ++ t0, t1, t2, t3, t4, t5, t6, t7); ++ ++ t0 = __lasx_xvsrli_h(t0, 12); ++ t1 = __lasx_xvsrli_h(t1, 8); ++ t2 = __lasx_xvsrli_h(t2, 4); ++ t4 = __lasx_xvsrli_h(t4, 12); ++ t5 = __lasx_xvsrli_h(t5, 8); ++ t6 = __lasx_xvsrli_h(t6, 4); ++ ++ t3 = __lasx_xvor_v(t3, t2); ++ t3 = __lasx_xvor_v(t3, t1); ++ t3 = __lasx_xvor_v(t3, t0); ++ t7 = __lasx_xvor_v(t7, t6); ++ t7 = __lasx_xvor_v(t7, t5); ++ t7 = __lasx_xvor_v(t7, t4); ++ ++ tmp0 = __lasx_xvilvl_w(t7, t3); ++ tmp1 = __lasx_xvilvh_w(t7, t3); ++ t0 = __lasx_xvpermi_q(tmp0, tmp1, 0x02); ++ t1 = __lasx_xvpermi_q(tmp0, tmp1, 0x13); ++ t0 = __lasx_xvpickev_h(t1, t0); ++ t0 = __lasx_xvpermi_d(t0, 0xd8); ++ __lasx_xvst(t0, dest, 0); ++ values += 16, width -= 16, dest += 16; ++ } ++ ++ while(width--) { ++ pixel = *values++; ++ pixel &= 0xf0f0f0f0; ++ pixel0 = (pixel >> 4); ++ pixel1 = (pixel >> 8); ++ pixel0 |= pixel1; ++ pixel0 &= 0x00ff00ff; ++ pixel0 |= (pixel0 >> 8); ++ pixel0 &= 0xffff; ++ *dest++ = pixel0; ++ } ++} ++ ++static const pixman_fast_path_t lasx_fast_paths[] = ++{ ++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, lasx_composite_over_n_8_8888), ++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, lasx_composite_over_n_8_8888), ++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, lasx_composite_over_n_8_8888), ++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, lasx_composite_over_n_8_8888), ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, lasx_composite_over_n_8888_0565_ca), ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, lasx_composite_over_n_8888_0565_ca), ++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, lasx_composite_over_x888_n_8888), ++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, lasx_composite_over_x888_n_8888), ++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, lasx_composite_over_x888_n_8888), ++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, lasx_composite_over_x888_n_8888), ++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, lasx_composite_over_8888_n_8888), ++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, lasx_composite_over_8888_n_8888), ++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, lasx_composite_over_8888_n_8888), ++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, lasx_composite_over_8888_n_8888), ++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, lasx_composite_over_x888_8_8888), ++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, lasx_composite_over_x888_8_8888), ++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, lasx_composite_over_x888_8_8888), ++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, lasx_composite_over_x888_8_8888), ++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, lasx_composite_over_n_8_0565), ++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, lasx_composite_over_n_8_0565), ++ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, lasx_composite_src_x888_8888), ++ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, lasx_composite_src_x888_8888), ++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, lasx_composite_over_8888_0565), ++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, lasx_composite_over_8888_0565), ++ PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, lasx_composite_over_n_0565), ++ PIXMAN_STD_FAST_PATH (OVER, solid, null, b5g6r5, lasx_composite_over_n_0565), ++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, lasx_composite_over_8888_8888), ++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, lasx_composite_over_8888_8888), ++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, lasx_composite_over_8888_8888), ++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, lasx_composite_over_8888_8888), ++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, lasx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, lasx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, lasx_composite_over_n_8888_8888_ca), ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, lasx_composite_over_n_8888_8888_ca), ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, lasx_composite_over_n_8888_8888_ca), ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, lasx_composite_over_n_8888_8888_ca), ++ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, lasx_composite_over_reverse_n_8888), ++ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, lasx_composite_over_reverse_n_8888), ++ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, lasx_composite_add_8_8), ++ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, lasx_composite_add_n_8_8), ++ PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, lasx_composite_add_n_8), ++ PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, lasx_composite_add_n_8888), ++ PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, lasx_composite_add_n_8888), ++ PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, lasx_composite_add_n_8888), ++ PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, lasx_composite_add_n_8888), ++ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, lasx_composite_add_8888_8888), ++ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, lasx_composite_add_8888_8888), ++ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, lasx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, lasx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, lasx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, lasx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, lasx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, lasx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, lasx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, lasx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, lasx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, lasx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, lasx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, lasx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, lasx_composite_src_x888_0565), ++ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, lasx_composite_src_x888_0565), ++ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, lasx_composite_src_x888_0565), ++ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, lasx_composite_src_x888_0565), ++ PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, lasx_composite_in_n_8_8), ++ PIXMAN_STD_FAST_PATH (IN, a8, null, a8, lasx_composite_in_8_8), ++ ++ { PIXMAN_OP_NONE }, ++}; ++ ++#define IMAGE_FLAGS \ ++ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ ++ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) ++static const pixman_iter_info_t lasx_iters[] = ++{ ++ { ++ PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, ++ _pixman_iter_init_bits_stride, lasx_fetch_x8r8g8b8, NULL ++ }, ++ { ++ PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW, ++ _pixman_iter_init_bits_stride, lasx_fetch_r5g6b5, NULL ++ }, ++ { ++ PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW, ++ _pixman_iter_init_bits_stride, lasx_fetch_a8, NULL ++ }, ++ { PIXMAN_null }, ++}; ++ ++pixman_implementation_t * ++_pixman_implementation_create_lasx (pixman_implementation_t *fallback) ++{ ++ pixman_implementation_t *imp = ++ _pixman_implementation_create (fallback, lasx_fast_paths); ++ ++ /* LoongArch LASX constants */ ++ mask_565_r = create_mask_1x32_256 (0x00f80000); ++ mask_565_g1 = create_mask_1x32_256 (0x00070000); ++ mask_565_g2 = create_mask_1x32_256 (0x000000e0); ++ mask_565_b = create_mask_1x32_256 (0x0000001f); ++ mask_red = create_mask_1x32_256 (0x00f80000); ++ mask_green = create_mask_1x32_256 (0x0000fc00); ++ mask_blue = create_mask_1x32_256 (0x000000f8); ++ mask_565_fix_rb = create_mask_1x32_256 (0x00e000e0); ++ mask_565_fix_g = create_mask_1x32_256 (0x0000c000); ++ mask_0080 = create_mask_16_256 (0x0080); ++ mask_00ff = create_mask_16_256 (0x00ff); ++ mask_0101 = create_mask_16_256 (0x0101); ++ mask_ffff = create_mask_16_256 (0xffff); ++ mask_ff000000 = create_mask_1x32_256 (0xff000000); ++ mask_alpha = create_mask_1x64_256 (0x00ff000000000000); ++ mask_565_rb = create_mask_1x32_256 (0x00f800f8); ++ mask_565_pack_multiplier = create_mask_1x32_256 (0x20000004); ++ ++ /* Set up function pointers */ ++ imp->combine_32[PIXMAN_OP_SRC] = lasx_combine_src_u; ++ imp->combine_32[PIXMAN_OP_OVER] = lasx_combine_over_u; ++ imp->combine_32[PIXMAN_OP_OVER_REVERSE] = lasx_combine_over_reverse_u; ++ imp->combine_32[PIXMAN_OP_OUT] = lasx_combine_out_u; ++ imp->combine_32[PIXMAN_OP_OUT_REVERSE] = lasx_combine_out_reverse_u; ++ imp->combine_32[PIXMAN_OP_ADD] = lasx_combine_add_u; ++ imp->combine_32[PIXMAN_OP_DISJOINT_SRC] = lasx_combine_src_u; ++ imp->combine_32[PIXMAN_OP_CONJOINT_SRC] = lasx_combine_src_u; ++ imp->combine_32[PIXMAN_OP_MULTIPLY] = lasx_combine_multiply_u; ++ imp->combine_32_ca[PIXMAN_OP_SRC] = lasx_combine_src_ca; ++ imp->combine_32_ca[PIXMAN_OP_OVER] = lasx_combine_over_ca; ++ imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = lasx_combine_out_reverse_ca; ++ ++ imp->blt = lasx_blt; ++ imp->fill = lasx_fill; ++ imp->iter_info = lasx_iters; ++ ++ return imp; ++} ++ ++void setup_accessors_lasx (bits_image_t *image) ++{ ++ if (image->format == PIXMAN_a8) { // 8 bits ++ image->fetch_scanline_32 = lasx_fetch_scanline_a8; ++ image->store_scanline_32 = lasx_store_scanline_a8; ++ } else if (image->format == PIXMAN_a2r2g2b2) { ++ image->fetch_scanline_32 = lasx_fetch_scanline_a2r2g2b2; ++ image->store_scanline_32 = lasx_store_scanline_a2r2g2b2; ++ } else if (image->format == PIXMAN_a1r5g5b5) { // 16 bits ++ image->fetch_scanline_32 = lasx_fetch_scanline_a1r5g5b5; ++ image->store_scanline_32 = lasx_store_scanline_a1r5g5b5; ++ } else if (image->format == PIXMAN_a4r4g4b4) { ++ image->fetch_scanline_32 = lasx_fetch_scanline_a4r4g4b4; ++ image->store_scanline_32 = lasx_store_scanline_a4r4g4b4; ++ } ++} +diff --git a/pixman/pixman-loongarch.c b/pixman/pixman-loongarch.c +new file mode 100644 +index 0000000..a77211c +--- /dev/null ++++ b/pixman/pixman-loongarch.c +@@ -0,0 +1,94 @@ ++/* ++ * Copyright (c) 2023 Loongson Technology Corporation Limited ++ * Contributed by Lu Wang ++ * Song Ding ++ * ++ * Pixman is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 0.36.0 of the License, or (at your option) any later version. ++ * ++ * Pixman is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with Pixman; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include ++#endif ++ ++#include "pixman-private.h" ++ ++#if defined(USE_LOONGARCH_LSX) || defined(USE_LOONGARCH_LASX) ++#include ++#include ++#include ++ ++#ifdef USE_LOONGARCH_LSX ++static int have_lsx = 0; ++#endif ++#ifdef USE_LOONGARCH_LASX ++static int have_lasx = 0; ++#endif ++ ++static uint64_t detect_cpu_features(void) ++{ ++ uint64_t hwcap = 0; ++ hwcap = getauxval(AT_HWCAP); ++ ++ return hwcap; ++} ++ ++static pixman_bool_t ++have_feature (uint64_t feature) ++{ ++ static pixman_bool_t initialized; ++ static uint64_t features; ++ ++ if (!initialized) ++ { ++ features = detect_cpu_features(); ++ initialized = TRUE; ++ } ++ ++ return (features & feature) == feature; ++} ++ ++#endif ++ ++pixman_implementation_t * ++_pixman_loongarch_get_implementations (pixman_implementation_t *imp) ++{ ++#ifdef USE_LOONGARCH_LSX ++ if (!_pixman_disabled ("loongarch-lsx") && have_feature (HWCAP_LOONGARCH_LSX)) ++ { ++ imp = _pixman_implementation_create_lsx (imp); ++ have_lsx = 1; ++ } ++#endif ++#ifdef USE_LOONGARCH_LASX ++ if (!_pixman_disabled ("loongarch-lasx") && have_feature (HWCAP_LOONGARCH_LASX)) ++ { ++ imp = _pixman_implementation_create_lasx (imp); ++ have_lasx = 1; ++ } ++#endif ++ return imp; ++} ++ ++void setup_loongarch_accessors (bits_image_t *image) ++{ ++#ifdef USE_LOONGARCH_LSX ++ if (have_lsx) ++ setup_accessors_lsx(image); ++#endif ++#ifdef USE_LOONGARCH_LASX ++ if (have_lasx) ++ setup_accessors_lasx(image); ++#endif ++} +diff --git a/pixman/pixman-lsx.c b/pixman/pixman-lsx.c +new file mode 100644 +index 0000000..a4c261a +--- /dev/null ++++ b/pixman/pixman-lsx.c +@@ -0,0 +1,3783 @@ ++/* ++ * Loongson LSX optimizations. ++ * ++ * Copyright © 2023 Loongson Technology Corporation Limited ++ * Contributed by Song Ding(songding@loongson.cn) ++ * ++ * Permission to use, copy, modify, distribute, and sell this software and its ++ * documentation for any purpose is hereby granted without fee, provided that ++ * the above copyright notice appear in all copies and that both that ++ * copyright notice and this permission notice appear in supporting ++ * documentation, and that the name of Red Hat not be used in advertising or ++ * publicity pertaining to distribution of the software without specific, ++ * written prior permission. Red Hat makes no representations about the ++ * suitability of this software for any purpose. It is provided "as is" ++ * without express or implied warranty. ++ * ++ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS ++ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND ++ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY ++ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ++ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING ++ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS ++ * SOFTWARE. ++ * ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include ++#endif ++ ++#include "pixman-private.h" ++#include "pixman-combine32.h" ++#include "loongson_intrinsics.h" ++ ++static force_inline uint32_t ++over(uint32_t src, uint32_t dest) ++{ ++ uint32_t a = ~src >> 24; ++ ++ UN8x4_MUL_UN8_ADD_UN8x4(dest, a, src); ++ ++ return dest; ++} ++ ++static force_inline uint32_t ++in(uint32_t x, uint8_t y) ++{ ++ uint16_t a = y; ++ ++ UN8x4_MUL_UN8(x, a); ++ ++ return x; ++} ++ ++static force_inline uint32_t ++combine_mask(const uint32_t *src, const uint32_t *mask, int i) ++{ ++ uint32_t s, m; ++ ++ if (mask) { ++ m = *(mask + i) >> A_SHIFT; ++ if (!m) ++ return 0; ++ } ++ s = *(src + i); ++ if (mask) ++ UN8x4_MUL_UN8(s, m); ++ return s; ++} ++ ++static void ++combine_mask_ca(uint32_t *src, uint32_t *mask) ++{ ++ uint32_t a = *mask; ++ uint32_t x; ++ uint16_t xa; ++ ++ if (!a) { ++ *(src) = 0; ++ return; ++ } ++ ++ x = *(src); ++ if (a == ~0) { ++ x = x >> A_SHIFT; ++ x |= x << G_SHIFT; ++ x |= x << R_SHIFT; ++ *(mask) = x; ++ return; ++ } ++ xa = x >> A_SHIFT; ++ UN8x4_MUL_UN8x4(x, a); ++ *(src) = x; ++ ++ UN8x4_MUL_UN8(a, xa); ++ *(mask) = a; ++} ++ ++static void ++combine_mask_value_ca(uint32_t *src, const uint32_t *mask) ++{ ++ uint32_t a = *mask; ++ uint32_t x; ++ ++ if (!a) { ++ *(src) = 0; ++ return; ++ } ++ ++ if (a == ~0) ++ return; ++ ++ x = *(src); ++ UN8x4_MUL_UN8x4(x, a); ++ *(src) = x; ++} ++ ++static void ++combine_mask_alpha_ca(const uint32_t *src, uint32_t *mask) ++{ ++ uint32_t a = *(mask); ++ uint32_t x; ++ ++ if (!a) ++ return; ++ x = *(src) >> A_SHIFT; ++ ++ if (x == MASK) ++ return; ++ ++ if (a == -1) { ++ x |= x << G_SHIFT; ++ x |= x << R_SHIFT; ++ *(mask) = x; ++ return; ++ } ++ UN8x4_MUL_UN8(a, x); ++ *(mask) = a; ++} ++ ++/* Compute the product of two unsigned fixed-point 8-bit values from 0 to 1 ++ * and map its result to the same range. ++ * ++ * Jim Blinn gives multiple ways to compute this in "Jim Blinn's Corner: ++ * Notation, Notation, Notation", the first of which is ++ * ++ * prod(a, b) = (a * b + 128) / 255. ++ * ++ * By approximating the division by 255 as 257/65536, it can be replaced by a ++ * multiply and a right shift. This is the implementation that we use in ++ * pix_multiply(), but we _mm_mulhi_pu16() by 257 (part of SSE1 or Extended ++ * 3DNow!, and unavailable at the time of the book's publication) to perform ++ * the multiplication and right shift in a single operation. ++ * ++ * prod(a, b) = ((a * b + 128) * 257) >> 16. ++ * ++ * A third way (how pix_multiply() was implemented prior to 14208344) exists ++ * also that performs the multiplication by 257 with adds and shifts. ++ * ++ * Where temp = a * b + 128 ++ * ++ * prod(a, b) = (temp + (temp >> 8)) >> 8. ++ * ++ * The lsx_pix_multiply(src, mask) implemented with the third way, and caculates ++ * two sets of data each time. ++ */ ++ ++static force_inline __m128i ++lsx_pix_multiply(__m128i src, __m128i mask) ++{ ++ __m128i tmp0, tmp1; ++ __m128i vec; ++ ++ vec = __lsx_vreplgr2vr_h(0x80); ++ tmp0 = __lsx_vmadd_h(vec, src, mask); ++ tmp1 = __lsx_vsrli_h(tmp0, 8); ++ tmp0 = __lsx_vadd_h(tmp0, tmp1); ++ tmp1 = __lsx_vsrli_h(tmp0, 8); ++ ++ return tmp1; ++} ++ ++static force_inline __m128i ++over_1x128(__m128i src, __m128i alpha, __m128i dst) ++{ ++ __m128i mask_00ff = __lsx_vreplgr2vr_h(0x00ff); ++ ++ alpha = __lsx_vxor_v(alpha, mask_00ff); ++ alpha = lsx_pix_multiply(dst, alpha); ++ ++ return (__lsx_vsadd_bu(src, alpha)); ++} ++ ++static force_inline uint32_t ++core_combine_over_u32 (uint32_t src, uint32_t dst) ++{ ++ uint8_t a = src >> 24; ++ ++ if (a == 0xff) { ++ return src; ++ } ++ else if (src) { ++ __m128i zero = __lsx_vldi(0); ++ __m128i vr_src = __lsx_vinsgr2vr_w(zero, src, 0); ++ __m128i vr_dst = __lsx_vinsgr2vr_w(zero, dst, 0); ++ __m128i vr_alpha; ++ __m128i tmp; ++ ++ vr_src = __lsx_vilvl_b(zero, vr_src); ++ vr_dst = __lsx_vilvl_b(zero, vr_dst); ++ vr_alpha = __lsx_vshuf4i_h(vr_src, 0xff); ++ ++ tmp = __lsx_vpickev_b(zero, over_1x128(vr_src, vr_alpha, vr_dst)); ++ ++ return __lsx_vpickve2gr_wu(tmp, 0); ++ } ++ ++ return dst; ++} ++ ++static force_inline __m128i ++lsx_over_u(__m128i src, __m128i dest) ++{ ++ __m128i r1, r2, r3, t; ++ __m128i rb_mask = __lsx_vreplgr2vr_w(0xff00ff); ++ __m128i rb_one_half = __lsx_vreplgr2vr_w(0x800080); ++ __m128i rb_mask_plus_one = __lsx_vreplgr2vr_w(0x10000100); ++ __m128i a = __lsx_vsrli_w(__lsx_vnor_v(src, src), 24); ++ ++ r1 = __lsx_vand_v(dest, rb_mask); ++ r1 = __lsx_vmadd_w(rb_one_half, r1, a); ++ t = __lsx_vand_v(rb_mask, __lsx_vsrli_w(r1, 8)); ++ r1 = __lsx_vadd_w(r1, t); ++ r1 = __lsx_vsrli_w(r1, 8); ++ r1 = __lsx_vand_v(r1, rb_mask); ++ r2 = __lsx_vand_v(src, rb_mask); ++ ++ r1 = __lsx_vadd_w(r1, r2); ++ t = __lsx_vand_v(rb_mask, __lsx_vsrli_w(r1, 8)); ++ r1 = __lsx_vor_v(r1, __lsx_vsub_w(rb_mask_plus_one, t)); ++ r1 = __lsx_vand_v(r1, rb_mask); ++ ++ r2 = __lsx_vsrli_w(dest, 8); ++ r2 = __lsx_vand_v(r2, rb_mask); ++ r2 = __lsx_vmadd_w(rb_one_half, r2, a); ++ t = __lsx_vand_v(rb_mask, __lsx_vsrli_w(r2, 8)); ++ r2 = __lsx_vadd_w(r2, t); ++ r2 = __lsx_vsrli_w(r2, 8); ++ r2 = __lsx_vand_v(r2, rb_mask); ++ r3 = __lsx_vand_v(rb_mask, __lsx_vsrli_w(src, 8)); ++ ++ r2 = __lsx_vadd_w(r2, r3); ++ t = __lsx_vand_v(rb_mask, __lsx_vsrli_w(r2, 8)); ++ r2 = __lsx_vor_v(r2, __lsx_vsub_w(rb_mask_plus_one, t)); ++ r2 = __lsx_vand_v(r2, rb_mask); ++ ++ t = __lsx_vor_v(r1, __lsx_vslli_w(r2, 8)); ++ ++ return t; ++} ++ ++static force_inline __m128i ++lsx_in_u(__m128i x, __m128i a) ++{ ++ __m128i r1, r2, t; ++ __m128i rb_mask = __lsx_vreplgr2vr_w(0xff00ff); ++ __m128i rb_one_half = __lsx_vreplgr2vr_w(0x800080); ++ ++ r1 = __lsx_vand_v(x, rb_mask); ++ r1 = __lsx_vmadd_w(rb_one_half, r1, a); ++ t = __lsx_vand_v(__lsx_vsrli_w(r1, 8), rb_mask); ++ r1 = __lsx_vadd_w(r1, t); ++ r1 = __lsx_vsrli_w(r1, 8); ++ r1 = __lsx_vand_v(r1, rb_mask); ++ r2 = __lsx_vsrli_w(x, 8); ++ ++ r2 = __lsx_vand_v(r2, rb_mask); ++ r2 = __lsx_vmadd_w(rb_one_half, r2, a); ++ t = __lsx_vand_v(__lsx_vsrli_w(r2, 8), rb_mask); ++ r2 = __lsx_vadd_w(r2, t); ++ r2 = __lsx_vsrli_w(r2, 8); ++ r2 = __lsx_vand_v(r2, rb_mask); ++ ++ t = __lsx_vor_v(r1, __lsx_vslli_w(r2, 8)); ++ ++ return t; ++} ++ ++static void ++lsx_combine_src_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ __m128i src0, mask0, dest0; ++ __m128i zero = __lsx_vldi(0); ++ __m128i out0, out1, out2, out3; ++ ++ if(mask) { ++ while (width >= 4) { ++ src0 = __lsx_vld(src, 0); ++ mask0 = __lsx_vld(mask, 0); ++ out0 = __lsx_vilvl_b(zero, src0); ++ out2 = __lsx_vilvh_b(zero, src0); ++ out1 = __lsx_vilvl_b(zero, mask0); ++ out3 = __lsx_vilvh_b(zero, mask0); ++ out1 = __lsx_vshuf4i_h(out1, 0xff); ++ out3 = __lsx_vshuf4i_h(out3, 0xff); ++ out0 = lsx_pix_multiply(out0, out1); ++ out2 = lsx_pix_multiply(out2, out3); ++ dest0 = __lsx_vpickev_b(out2, out0); ++ __lsx_vst(dest0, dest, 0); ++ mask += 4; ++ width -= 4; ++ src += 4; ++ dest += 4; ++ } ++ for (int i = 0; i < width; ++i) { ++ uint32_t s = combine_mask(src, mask, i); ++ *dest++ = s; ++ } ++ } else { ++ while (width >= 4) { ++ src0 = __lsx_vld(src, 0); ++ __lsx_vst(src0, dest, 0); ++ width -= 4; ++ src += 4; ++ dest += 4; ++ } ++ if (width) { ++ memcpy (dest, src, width * sizeof (uint32_t)); ++ } ++ } ++} ++ ++static void ++lsx_combine_over_u_mask (uint32_t *dest, ++ const uint32_t *src, ++ const uint32_t *mask, ++ int width) ++{ ++ __m128i bit_set = __lsx_vreplgr2vr_h(0xff); ++ __m128i src0, mask0, dest0, dest1; ++ __m128i zero = __lsx_vldi(0); ++ __m128i out0, out1, out2, out3, out4, out5; ++ ++ while (width > 3) { ++ src0 = __lsx_vld(src, 0); ++ dest0 = __lsx_vld(dest, 0); ++ mask0 = __lsx_vld(mask, 0); ++ out0 = __lsx_vilvl_b(zero, src0); ++ out2 = __lsx_vilvh_b(zero, src0); ++ out1 = __lsx_vilvl_b(zero, mask0); ++ out3 = __lsx_vilvh_b(zero, mask0); ++ out1 = __lsx_vshuf4i_h(out1, 0xff); ++ out3 = __lsx_vshuf4i_h(out3, 0xff); ++ out0 = lsx_pix_multiply(out0, out1); ++ out2 = lsx_pix_multiply(out2, out3); ++ out1 = __lsx_vxor_v(out0, bit_set); ++ out3 = __lsx_vxor_v(out2, bit_set); ++ out1 = __lsx_vshuf4i_h(out1, 0xff); ++ out3 = __lsx_vshuf4i_h(out3, 0xff); ++ out4 = __lsx_vilvl_b(zero, dest0); ++ out5 = __lsx_vilvh_b(zero, dest0); ++ out4 = lsx_pix_multiply(out4, out1); ++ out5 = lsx_pix_multiply(out5, out3); ++ ++ dest0 = __lsx_vpickev_b(out2, out0); ++ dest1 = __lsx_vpickev_b(out5, out4); ++ dest0 = __lsx_vsadd_bu(dest0, dest1); ++ __lsx_vst(dest0, dest, 0); ++ width -= 4; ++ mask += 4; ++ src += 4; ++ dest += 4; ++ } ++ ++ for (int i = 0; i < width; ++i) { ++ uint32_t m = ALPHA_8 (*(mask + i)); ++ if (m == 0xFF) { ++ uint32_t s = *(src + i); ++ uint32_t a = ALPHA_8 (s); ++ if (a == 0xFF) { ++ *(dest + i) = s; ++ } else if (s) { ++ uint32_t d = *(dest + i); ++ uint32_t ia = a ^ 0xFF; ++ UN8x4_MUL_UN8_ADD_UN8x4(d, ia, s); ++ *(dest + i) = d; ++ } ++ } else if (m) { ++ uint32_t s = *(src + i); ++ if (s) { ++ uint32_t d = *(dest + i); ++ UN8x4_MUL_UN8(s, m); ++ UN8x4_MUL_UN8_ADD_UN8x4(d, ALPHA_8 (~s), s); ++ *(dest + i) = d; ++ } ++ } ++ } ++} ++ ++static void ++lsx_combine_over_u_no_mask (uint32_t *dst, const uint32_t *src, int width) ++{ ++ __m128i zero = __lsx_vldi(0); ++ ++ while (width >= 4) { ++ __m128i v_src, v_dst; ++ __m128i v_src_ev, v_src_od; ++ __m128i alpha; ++ __m128i v_dst_ev, v_dst_od; ++ ++ v_src = __lsx_vld(src, 0); ++ v_dst = __lsx_vld(dst, 0); ++ ++ /* unpack src: 1x128 to 2x128 */ ++ v_src_ev = __lsx_vpackev_b(zero, v_src); ++ v_src_od = __lsx_vpackod_b(zero, v_src); ++ ++ /* expand alpha */ ++ alpha = __lsx_vshuf4i_h(v_src_od, 0xf5); ++ ++ /* unpack dst: 1x128 to 2x128 */ ++ v_dst_ev = __lsx_vpackev_b(zero, v_dst); ++ v_dst_od = __lsx_vpackod_b(zero, v_dst); ++ ++ v_dst_ev = over_1x128(v_src_ev, alpha, v_dst_ev); ++ v_dst_od = over_1x128(v_src_od, alpha, v_dst_od); ++ ++ v_dst = __lsx_vpackev_b(v_dst_od, v_dst_ev); ++ ++ __lsx_vst(v_dst, dst, 0); ++ width -= 4; ++ src += 4; ++ dst += 4; ++ } ++ ++ while (width--) { ++ uint32_t s = *src; ++ uint32_t d = *dst; ++ ++ *dst = core_combine_over_u32(s, d); ++ ++ ++src; ++ ++dst; ++ } ++} ++ ++static void ++lsx_combine_over_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ if (mask) { ++ lsx_combine_over_u_mask(dest, src, mask, width); ++ } ++ else { ++ lsx_combine_over_u_no_mask(dest, src, width); ++ } ++} ++ ++static void ++lsx_combine_over_reverse_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ __m128i src0, mask0, dest0, dest1; ++ __m128i zero = __lsx_vldi(0); ++ __m128i out0, out1, out2, out3, out4, out5; ++ ++ if (mask) { ++ while (width > 3) { ++ src0 = __lsx_vld(src, 0); ++ mask0 = __lsx_vld(mask, 0); ++ dest0 = __lsx_vld(dest, 0); ++ ++ out0 = __lsx_vilvl_b(zero, src0); ++ out2 = __lsx_vilvh_b(zero, src0); ++ out1 = __lsx_vilvl_b(zero, mask0); ++ out3 = __lsx_vilvh_b(zero, mask0); ++ out1 = __lsx_vshuf4i_h(out1, 0xff); ++ out3 = __lsx_vshuf4i_h(out3, 0xff); ++ out0 = lsx_pix_multiply(out0, out1); ++ out2 = lsx_pix_multiply(out2, out3); ++ ++ dest1 = __lsx_vxori_b(dest0, 0xff); ++ out1 = __lsx_vilvl_b(zero, dest0); ++ out3 = __lsx_vilvh_b(zero, dest0); ++ out4 = __lsx_vilvl_b(zero, dest1); ++ out5 = __lsx_vilvh_b(zero, dest1); ++ out4 = __lsx_vshuf4i_h(out4, 0xff); ++ out5 = __lsx_vshuf4i_h(out5, 0xff); ++ out0 = lsx_pix_multiply(out0, out4); ++ out2 = lsx_pix_multiply(out2, out5); ++ dest0 = __lsx_vpickev_b(out2, out0); ++ dest1 = __lsx_vpickev_b(out3, out1); ++ dest0 = __lsx_vsadd_bu(dest0, dest1); ++ __lsx_vst(dest0, dest, 0); ++ mask += 4; ++ width -= 4; ++ src += 4; ++ dest += 4; ++ } ++ } else { ++ while (width > 3) { ++ src0 = __lsx_vld(src, 0); ++ dest0 = __lsx_vld(dest, 0); ++ dest1 = __lsx_vxori_b(dest0, 0xff); ++ out0 = __lsx_vilvl_b(zero, src0); ++ out2 = __lsx_vilvh_b(zero, src0); ++ out1 = __lsx_vilvl_b(zero, dest0); ++ out3 = __lsx_vilvh_b(zero, dest0); ++ out4 = __lsx_vilvl_b(zero, dest1); ++ out5 = __lsx_vilvh_b(zero, dest1); ++ out4 = __lsx_vshuf4i_h(out4, 0xff); ++ out5 = __lsx_vshuf4i_h(out5, 0xff); ++ out0 = lsx_pix_multiply(out0, out4); ++ out2 = lsx_pix_multiply(out2, out5); ++ dest0 = __lsx_vpickev_b(out2, out0); ++ dest1 = __lsx_vpickev_b(out3, out1); ++ dest0 = __lsx_vsadd_bu(dest0, dest1); ++ __lsx_vst(dest0, dest, 0); ++ width -= 4; ++ src += 4; ++ dest += 4; ++ } ++ } ++ ++ for (int i = 0; i < width; ++i) { ++ uint32_t s = combine_mask(src, mask, i); ++ uint32_t d = *(dest + i); ++ uint32_t ia = ALPHA_8 (~*(dest + i)); ++ UN8x4_MUL_UN8_ADD_UN8x4(s, ia, d); ++ *(dest + i) = s; ++ } ++} ++ ++static void ++lsx_combine_out_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ __m128i src0, mask0, dest0; ++ __m128i zero = __lsx_vldi(0); ++ __m128i out0, out1, out2, out3; ++ ++ if(mask) { ++ while (width > 3) { ++ src0 = __lsx_vld(src, 0); ++ mask0 = __lsx_vld(mask, 0); ++ dest0 = __lsx_vld(dest, 0); ++ ++ out0 = __lsx_vilvl_b(zero, src0); ++ out2 = __lsx_vilvh_b(zero, src0); ++ out1 = __lsx_vilvl_b(zero, mask0); ++ out3 = __lsx_vilvh_b(zero, mask0); ++ out1 = __lsx_vshuf4i_h(out1, 0xff); ++ out3 = __lsx_vshuf4i_h(out3, 0xff); ++ out0 = lsx_pix_multiply(out0, out1); ++ out2 = lsx_pix_multiply(out2, out3); ++ ++ dest0 = __lsx_vxori_b(dest0, 0xff); ++ out1 = __lsx_vilvl_b(zero, dest0); ++ out3 = __lsx_vilvh_b(zero, dest0); ++ out1 = __lsx_vshuf4i_h(out1, 0xff); ++ out3 = __lsx_vshuf4i_h(out3, 0xff); ++ out0 = lsx_pix_multiply(out0, out1); ++ out2 = lsx_pix_multiply(out2, out3); ++ dest0 = __lsx_vpickev_b(out2, out0); ++ __lsx_vst(dest0, dest, 0); ++ mask += 4; ++ width -= 4; ++ src += 4; ++ dest += 4; ++ } ++ } else { ++ while (width > 3) { ++ src0 = __lsx_vld(src, 0); ++ dest0 = __lsx_vld(dest, 0); ++ out0 = __lsx_vilvl_b(zero, src0); ++ out2 = __lsx_vilvh_b(zero, src0); ++ dest0 = __lsx_vxori_b(dest0, 0xff); ++ out1 = __lsx_vilvl_b(zero, dest0); ++ out3 = __lsx_vilvh_b(zero, dest0); ++ out1 = __lsx_vshuf4i_h(out1, 0xff); ++ out3 = __lsx_vshuf4i_h(out3, 0xff); ++ out0 = lsx_pix_multiply(out0, out1); ++ out2 = lsx_pix_multiply(out2, out3); ++ dest0 = __lsx_vpickev_b(out2, out0); ++ __lsx_vst(dest0, dest, 0); ++ width -= 4; ++ src += 4; ++ dest += 4; ++ } ++ } ++ ++ for (int i = 0; i < width; ++i) { ++ uint32_t s = combine_mask(src, mask, i); ++ uint32_t a = ALPHA_8 (~*(dest + i)); ++ UN8x4_MUL_UN8(s, a); ++ *(dest + i) = s; ++ } ++} ++ ++static void ++lsx_combine_out_reverse_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ __m128i bit_set = __lsx_vreplgr2vr_h(0xff); ++ __m128i src0, mask0, dest0; ++ __m128i zero = __lsx_vldi(0); ++ __m128i out0, out1, out2, out3; ++ ++ if(mask) { ++ while (width > 3) { ++ src0 = __lsx_vld(src, 0); ++ dest0 = __lsx_vld(dest, 0); ++ mask0 = __lsx_vld(mask, 0); ++ ++ out0 = __lsx_vilvl_b(zero, src0); ++ out2 = __lsx_vilvh_b(zero, src0); ++ out1 = __lsx_vilvl_b(zero, mask0); ++ out3 = __lsx_vilvh_b(zero, mask0); ++ out1 = __lsx_vshuf4i_h(out1, 0xff); ++ out3 = __lsx_vshuf4i_h(out3, 0xff); ++ out0 = lsx_pix_multiply(out0, out1); ++ out2 = lsx_pix_multiply(out2, out3); ++ out1 = __lsx_vxor_v(out0, bit_set); ++ out3 = __lsx_vxor_v(out2, bit_set); ++ out1 = __lsx_vshuf4i_h(out1, 0xff); ++ out3 = __lsx_vshuf4i_h(out3, 0xff); ++ out0 = __lsx_vilvl_b(zero, dest0); ++ out2 = __lsx_vilvh_b(zero, dest0); ++ out0 = lsx_pix_multiply(out0, out1); ++ out2 = lsx_pix_multiply(out2, out3); ++ dest0 = __lsx_vpickev_b(out2, out0); ++ __lsx_vst(dest0, dest, 0); ++ mask += 4; ++ width -= 4; ++ src += 4; ++ dest += 4; ++ } ++ } else { ++ while (width > 3) { ++ src0 = __lsx_vld(src, 0); ++ dest0 = __lsx_vld(dest, 0); ++ out0 = __lsx_vilvl_b(zero, src0); ++ out2 = __lsx_vilvh_b(zero, src0); ++ out1 = __lsx_vxor_v(out0, bit_set); ++ out3 = __lsx_vxor_v(out2, bit_set); ++ out1 = __lsx_vshuf4i_h(out1, 0xff); ++ out3 = __lsx_vshuf4i_h(out3, 0xff); ++ out0 = __lsx_vilvl_b(zero, dest0); ++ out2 = __lsx_vilvh_b(zero, dest0); ++ out0 = lsx_pix_multiply(out0, out1); ++ out2 = lsx_pix_multiply(out2, out3); ++ dest0 = __lsx_vpickev_b(out2, out0); ++ __lsx_vst(dest0, dest, 0); ++ width -= 4; ++ src += 4; ++ dest += 4; ++ } ++ } ++ for (int i = 0; i < width; ++i) { ++ uint32_t s = combine_mask(src, mask, i); ++ uint32_t d = *(dest + i); ++ uint32_t a = ALPHA_8 (~s); ++ UN8x4_MUL_UN8 (d, a); ++ *(dest + i) = d; ++ } ++} ++ ++static void ++lsx_combine_add_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ __m128i src0, mask0, dest0, dest1; ++ __m128i zero = __lsx_vldi(0); ++ __m128i out0, out1, out2, out3; ++ ++ if (mask) { ++ while (width > 3) { ++ src0 = __lsx_vld(src, 0); ++ dest0 = __lsx_vld(dest, 0); ++ mask0 = __lsx_vld(mask, 0); ++ ++ out0 = __lsx_vilvl_b(zero, src0); ++ out2 = __lsx_vilvh_b(zero, src0); ++ out1 = __lsx_vilvl_b(zero, mask0); ++ out3 = __lsx_vilvh_b(zero, mask0); ++ out1 = __lsx_vshuf4i_h(out1, 0xff); ++ out3 = __lsx_vshuf4i_h(out3, 0xff); ++ out0 = lsx_pix_multiply(out0, out1); ++ out2 = lsx_pix_multiply(out2, out3); ++ ++ dest1 = __lsx_vpickev_b(out2, out0); ++ dest0 = __lsx_vsadd_bu(dest0, dest1); ++ __lsx_vst(dest0, dest, 0); ++ mask += 4; ++ width -= 4; ++ src += 4; ++ dest += 4; ++ } ++ } else { ++ while (width > 3) { ++ src0 = __lsx_vld(src, 0); ++ dest0 = __lsx_vld(dest, 0); ++ out0 = __lsx_vilvl_b(zero, src0); ++ out2 = __lsx_vilvh_b(zero, src0); ++ dest1 = __lsx_vpickev_b(out2, out0); ++ dest0 = __lsx_vsadd_bu(dest0, dest1); ++ __lsx_vst(dest0, dest, 0); ++ width -= 4; ++ src += 4; ++ dest += 4; ++ } ++ } ++ ++ for (int i = 0; i < width; ++i) { ++ uint32_t s = combine_mask(src, mask, i); ++ uint32_t d = *(dest + i); ++ UN8x4_ADD_UN8x4(d, s); ++ *(dest + i) = d; ++ } ++} ++ ++/* ++ * Multiply ++ * ++ * ad * as * B(d / ad, s / as) ++ * = ad * as * d/ad * s/as ++ * = d * s ++ * ++ */ ++static void ++lsx_combine_multiply_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ __m128i bit_set = __lsx_vreplgr2vr_h(0xff); ++ __m128i src0, mask0, dest0, dest1; ++ __m128i zero = __lsx_vldi(0); ++ __m128i out0, out1, out2, out3, out4, out5, out6, out7; ++ ++ if (mask) { ++ while (width > 3) { ++ src0 = __lsx_vld(src, 0); ++ dest0 = __lsx_vld(dest, 0); ++ mask0 = __lsx_vld(mask, 0); ++ ++ out0 = __lsx_vilvl_b(zero, src0); ++ out2 = __lsx_vilvh_b(zero, src0); ++ out1 = __lsx_vilvl_b(zero, mask0); ++ out3 = __lsx_vilvh_b(zero, mask0); ++ out1 = __lsx_vshuf4i_h(out1, 0xff); ++ out3 = __lsx_vshuf4i_h(out3, 0xff); ++ out0 = lsx_pix_multiply(out0, out1); ++ out2 = lsx_pix_multiply(out2, out3); ++ ++ out1 = __lsx_vxor_v(out0, bit_set); ++ out3 = __lsx_vxor_v(out2, bit_set); ++ out1 = __lsx_vshuf4i_h(out1, 0xff); ++ out3 = __lsx_vshuf4i_h(out3, 0xff); ++ dest1 = __lsx_vxori_b(dest0, 0xff); ++ dest1 = __lsx_vshuf4i_b(dest1, 0xff); ++ out4 = __lsx_vilvl_b(zero, dest0); ++ out5 = __lsx_vilvh_b(zero, dest0); ++ out6 = __lsx_vilvl_b(zero, dest1); ++ out7 = __lsx_vilvh_b(zero, dest1); ++ out6 = lsx_pix_multiply(out0, out6); ++ out7 = lsx_pix_multiply(out2, out7); ++ out1 = lsx_pix_multiply(out4, out1); ++ out3 = lsx_pix_multiply(out5, out3); ++ dest0 = __lsx_vpickev_b(out7, out6); ++ dest1 = __lsx_vpickev_b(out3, out1); ++ dest0 = __lsx_vsadd_bu(dest0, dest1); ++ ++ out4 = lsx_pix_multiply(out4, out0); ++ out5 = lsx_pix_multiply(out5, out2); ++ dest1 = __lsx_vpickev_b(out5, out4); ++ dest0 = __lsx_vsadd_bu(dest0, dest1); ++ __lsx_vst(dest0, dest, 0); ++ mask += 4; ++ width -= 4; ++ src += 4; ++ dest += 4; ++ } ++ } else { ++ while (width > 3) { ++ src0 = __lsx_vld(src, 0); ++ dest0 = __lsx_vld(dest, 0); ++ out0 = __lsx_vilvl_b(zero, src0); ++ out2 = __lsx_vilvh_b(zero, src0); ++ out1 = __lsx_vxor_v(out0, bit_set); ++ out3 = __lsx_vxor_v(out2, bit_set); ++ out1 = __lsx_vshuf4i_h(out1, 0xff); ++ out3 = __lsx_vshuf4i_h(out3, 0xff); ++ dest1 = __lsx_vxori_b(dest0, 0xff); ++ dest1 = __lsx_vshuf4i_b(dest1, 0xff); ++ out4 = __lsx_vilvl_b(zero, dest0); ++ out5 = __lsx_vilvh_b(zero, dest0); ++ out6 = __lsx_vilvl_b(zero, dest1); ++ out7 = __lsx_vilvh_b(zero, dest1); ++ out6 = lsx_pix_multiply(out0, out6); ++ out7 = lsx_pix_multiply(out2, out7); ++ out1 = lsx_pix_multiply(out4, out1); ++ out3 = lsx_pix_multiply(out5, out3); ++ dest0 = __lsx_vpickev_b(out7, out6); ++ dest1 = __lsx_vpickev_b(out3, out1); ++ dest0 = __lsx_vsadd_bu(dest0, dest1); ++ ++ out4 = lsx_pix_multiply(out4, out0); ++ out5 = lsx_pix_multiply(out5, out2); ++ dest1 = __lsx_vpickev_b(out5, out4); ++ dest0 = __lsx_vsadd_bu(dest0, dest1); ++ __lsx_vst(dest0, dest, 0); ++ width -= 4; ++ src += 4; ++ dest += 4; ++ } ++ } ++ ++ for (int i = 0; i < width; ++i) { ++ uint32_t s = combine_mask(src, mask, i); ++ uint32_t d = *(dest + i); ++ uint32_t ss = s; ++ uint32_t src_ia = ALPHA_8(~s); ++ uint32_t dest_ia = ALPHA_8(~d); ++ ++ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8(ss, dest_ia, d, src_ia); ++ UN8x4_MUL_UN8x4(d, s); ++ UN8x4_ADD_UN8x4(d, ss); ++ ++ *(dest + i) = d; ++ } ++} ++ ++static void ++lsx_combine_src_ca (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ __m128i src0, mask0, dest0; ++ __m128i zero = __lsx_vldi(0); ++ __m128i out0, out1, out2, out3; ++ ++ while (width > 3) { ++ src0 = __lsx_vld(src, 0); ++ mask0 = __lsx_vld(mask, 0); ++ out0 = __lsx_vilvl_b(zero, src0); ++ out2 = __lsx_vilvh_b(zero, src0); ++ out1 = __lsx_vilvl_b(zero, mask0); ++ out3 = __lsx_vilvh_b(zero, mask0); ++ out0 = lsx_pix_multiply(out0, out1); ++ out2 = lsx_pix_multiply(out2, out3); ++ dest0 = __lsx_vpickev_b(out2, out0); ++ __lsx_vst(dest0, dest, 0); ++ mask += 4; ++ width -= 4; ++ src += 4; ++ dest += 4; ++ } ++ ++ for (int i = 0; i < width; ++i) { ++ uint32_t s = *(src + i); ++ uint32_t m = *(mask + i); ++ combine_mask_value_ca(&s, &m); ++ *(dest + i) = s; ++ } ++} ++ ++static void ++lsx_combine_over_ca (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ __m128i bit_set = __lsx_vreplgr2vr_h(0xff); ++ __m128i src0, mask0, dest0, dest1; ++ __m128i zero = __lsx_vldi(0); ++ __m128i out0, out1, out2, out3, out4, out5; ++ ++ while (width > 3) { ++ src0 = __lsx_vld(src, 0); ++ dest0 = __lsx_vld(dest, 0); ++ mask0 = __lsx_vld(mask, 0); ++ ++ out0 = __lsx_vilvl_b(zero, src0); ++ out2 = __lsx_vilvh_b(zero, src0); ++ out1 = __lsx_vilvl_b(zero, mask0); ++ out3 = __lsx_vilvh_b(zero, mask0); ++ out4 = lsx_pix_multiply(out0, out1); ++ out5 = lsx_pix_multiply(out2, out3); ++ out0 = __lsx_vshuf4i_h(out0, 0xff); ++ out2 = __lsx_vshuf4i_h(out2, 0xff); ++ out1 = lsx_pix_multiply(out1, out0); ++ out3 = lsx_pix_multiply(out3, out2); ++ ++ out1 = __lsx_vxor_v(out1, bit_set); ++ out3 = __lsx_vxor_v(out3, bit_set); ++ out0 = __lsx_vilvl_b(zero, dest0); ++ out2 = __lsx_vilvh_b(zero, dest0); ++ out1 = lsx_pix_multiply(out1, out0); ++ out3 = lsx_pix_multiply(out3, out2); ++ ++ dest0 = __lsx_vpickev_b(out5, out4); ++ dest1 = __lsx_vpickev_b(out3, out1); ++ dest0 = __lsx_vsadd_bu(dest0, dest1); ++ __lsx_vst(dest0, dest, 0); ++ mask += 4; ++ width -= 4; ++ src += 4; ++ dest += 4; ++ } ++ ++ for (int i = 0; i < width; ++i) { ++ uint32_t s = *(src + i); ++ uint32_t m = *(mask + i); ++ uint32_t a; ++ ++ combine_mask_ca (&s, &m); ++ a = ~m; ++ if (a) { ++ uint32_t d = *(dest + i); ++ UN8x4_MUL_UN8x4_ADD_UN8x4(d, a, s); ++ s = d; ++ } ++ *(dest + i) = s; ++ } ++} ++ ++static void ++lsx_combine_out_reverse_ca (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ __m128i bit_set = __lsx_vreplgr2vr_h(0xff); ++ __m128i src0, mask0, dest0; ++ __m128i zero = __lsx_vldi(0); ++ __m128i out0, out1, out2, out3; ++ ++ while (width > 3) { ++ src0 = __lsx_vld(src, 0); ++ dest0 = __lsx_vld(dest, 0); ++ mask0 = __lsx_vld(mask, 0); ++ ++ out0 = __lsx_vilvl_b(zero, src0); ++ out2 = __lsx_vilvh_b(zero, src0); ++ out1 = __lsx_vilvl_b(zero, mask0); ++ out3 = __lsx_vilvh_b(zero, mask0); ++ out0 = __lsx_vshuf4i_h(out0, 0xff); ++ out2 = __lsx_vshuf4i_h(out2, 0xff); ++ out1 = lsx_pix_multiply(out1, out0); ++ out3 = lsx_pix_multiply(out3, out2); ++ ++ out1 = __lsx_vxor_v(out1, bit_set); ++ out3 = __lsx_vxor_v(out3, bit_set); ++ out0 = __lsx_vilvl_b(zero, dest0); ++ out2 = __lsx_vilvh_b(zero, dest0); ++ out1 = lsx_pix_multiply(out1, out0); ++ out3 = lsx_pix_multiply(out3, out2); ++ dest0 = __lsx_vpickev_b(out3, out1); ++ __lsx_vst(dest0, dest, 0); ++ mask += 4; ++ width -= 4; ++ src += 4; ++ dest += 4; ++ } ++ ++ for (int i = 0; i < width; ++i) { ++ uint32_t s = *(src + i); ++ uint32_t m = *(mask + i); ++ uint32_t a; ++ ++ combine_mask_alpha_ca(&s, &m); ++ a = ~m; ++ ++ if (a != ~0) { ++ uint32_t d = 0; ++ ++ if (a) { ++ d = *(dest + i); ++ UN8x4_MUL_UN8x4(d, a); ++ } ++ *(dest + i) = d; ++ } ++ } ++} ++ ++/* ++ * w : length in bytes ++ */ ++static void force_inline ++lsx_blt_one_line_u8 (uint8_t *pDst, uint8_t *pSrc, int w) ++{ ++ /* align the dst to 16 byte */ ++ while (((uintptr_t)pDst & 15) && w) { ++ *pDst = *pSrc; ++ pSrc += 1; ++ pDst += 1; ++ w -= 1; ++ } ++ ++ while (w >= 32) { ++ __m128i src0, src1; ++ src0 = __lsx_vld(pSrc, 0); ++ src1 = __lsx_vld(pSrc, 16); ++ __lsx_vst(src0, pDst, 0); ++ __lsx_vst(src1, pDst, 16); ++ ++ w -= 32; ++ pSrc += 32; ++ pDst += 32; ++ } ++ ++ if (w >= 16) { ++ __lsx_vst(__lsx_vld(pSrc, 0), pDst, 0); ++ ++ w -= 16; ++ pSrc += 16; ++ pDst += 16; ++ } ++ ++ if (w >= 8) { ++ *(uint64_t *)pDst = *(uint64_t *)pSrc; ++ ++ w -= 8; ++ pSrc += 8; ++ pDst += 8; ++ } ++ ++ while (w--) { ++ /* copy one bytes once a time */ ++ *pDst++ = *pSrc++; ++ } ++} ++ ++/* ++ * w : length in half word ++ */ ++static void ++lsx_blt_one_line_u16 (uint16_t *pDst, uint16_t *pSrc, int w) ++{ ++ /* align the dst to 16 byte */ ++ while (((uintptr_t)pDst & 15) && w) { ++ *pDst++ = *pSrc++; ++ --w; ++ } ++ ++ while (w >= 32) { ++ __m128i src0, src1, src2, src3; ++ /* copy 64 bytes */ ++ src0 = __lsx_vld(pSrc, 0); ++ src1 = __lsx_vld(pSrc, 16); ++ src2 = __lsx_vld(pSrc, 32); ++ src3 = __lsx_vld(pSrc, 48); ++ __lsx_vst(src0, pDst, 0); ++ __lsx_vst(src1, pDst, 16); ++ __lsx_vst(src2, pDst, 32); ++ __lsx_vst(src3, pDst, 48); ++ ++ w -= 32; ++ pSrc += 32; ++ pDst += 32; ++ } ++ ++ if (w >= 16) { ++ __m128i src0, src1; ++ /* copy 32 bytes */ ++ src0 = __lsx_vld(pSrc, 0); ++ src1 = __lsx_vld(pSrc, 16); ++ __lsx_vst(src0, pDst, 0); ++ __lsx_vst(src1, pDst, 16); ++ ++ w -= 16; ++ pSrc += 16; ++ pDst += 16; ++ } ++ ++ if (w >= 8) { ++ /* copy 16 bytes */ ++ __lsx_vst(__lsx_vld(pSrc, 0), pDst, 0); ++ ++ w -= 8; ++ pSrc += 8; ++ pDst += 8; ++ } ++ ++ while (w--) { ++ /* copy 2 bytes once a time */ ++ *pDst++ = *pSrc++; ++ } ++} ++ ++/* ++ * w : length in word ++ */ ++static force_inline void ++lsx_blt_one_line_u32 (uint32_t *pDst, uint32_t *pSrc, int w) ++{ ++ /* align the dst to 16 byte */ ++ while (((uintptr_t)pDst & 15) && w) { ++ *pDst++ = *pSrc++; ++ --w; ++ } ++ ++ while (w >= 32) { ++ __m128i src0, src1, src2, src3; ++ __m128i src4, src5, src6, src7; ++ /* copy 128 bytes */ ++ src0 = __lsx_vld(pSrc, 0); ++ src1 = __lsx_vld(pSrc, 16); ++ src2 = __lsx_vld(pSrc, 32); ++ src3 = __lsx_vld(pSrc, 48); ++ src4 = __lsx_vld(pSrc, 64); ++ src5 = __lsx_vld(pSrc, 80); ++ src6 = __lsx_vld(pSrc, 96); ++ src7 = __lsx_vld(pSrc, 112); ++ __lsx_vst(src0, pDst, 0); ++ __lsx_vst(src1, pDst, 16); ++ __lsx_vst(src2, pDst, 32); ++ __lsx_vst(src3, pDst, 48); ++ __lsx_vst(src4, pDst, 64); ++ __lsx_vst(src5, pDst, 80); ++ __lsx_vst(src6, pDst, 96); ++ __lsx_vst(src7, pDst, 112); ++ ++ w -= 32; ++ pSrc += 32; ++ pDst += 32; ++ } ++ ++ if (w >= 16) { ++ __m128i src0, src1, src2, src3; ++ /* copy 64 bytes */ ++ src0 = __lsx_vld(pSrc, 0); ++ src1 = __lsx_vld(pSrc, 16); ++ src2 = __lsx_vld(pSrc, 32); ++ src3 = __lsx_vld(pSrc, 48); ++ __lsx_vst(src0, pDst, 0); ++ __lsx_vst(src1, pDst, 16); ++ __lsx_vst(src2, pDst, 32); ++ __lsx_vst(src3, pDst, 48); ++ ++ w -= 16; ++ pSrc += 16; ++ pDst += 16; ++ } ++ ++ if (w >= 8) { ++ __m128i src0, src1; ++ /* copy 32 bytes */ ++ src0 = __lsx_vld(pSrc, 0); ++ src1 = __lsx_vld(pSrc, 16); ++ __lsx_vst(src0, pDst, 0); ++ __lsx_vst(src1, pDst, 16); ++ ++ w -= 8; ++ pSrc += 8; ++ pDst += 8; ++ } ++ ++ if (w >= 4) { ++ /* copy 16 bytes once a time */ ++ __lsx_vst(__lsx_vld(pSrc, 0), pDst, 0); ++ ++ w -= 4; ++ pSrc += 4; ++ pDst += 4; ++ } ++ ++ while (w--) { ++ /* copy 4 bytes once a time */ ++ *pDst++ = *pSrc++; ++ } ++} ++ ++static pixman_bool_t ++lsx_blt (pixman_implementation_t *imp, ++ uint32_t * src_bits, ++ uint32_t * dst_bits, ++ int src_stride, ++ int dst_stride, ++ int src_bpp, ++ int dst_bpp, ++ int src_x, ++ int src_y, ++ int dest_x, ++ int dest_y, ++ int width, ++ int height) ++{ ++ if (src_bpp != dst_bpp) ++ return FALSE; ++ ++ if (src_bpp == 8) { ++ uint8_t *src_b = (uint8_t *)src_bits; ++ uint8_t *dst_b = (uint8_t *)dst_bits; ++ ++ src_stride = src_stride * 4; ++ dst_stride = dst_stride * 4; ++ ++ src_b += src_stride * src_y + src_x; ++ dst_b += dst_stride * dest_y + dest_x; ++ ++ while (height--) { ++ lsx_blt_one_line_u8 (dst_b, src_b, width); ++ dst_b += dst_stride; ++ src_b += src_stride; ++ } ++ ++ return TRUE; ++ } ++ ++ if (src_bpp == 16) { ++ uint16_t *src_h = (uint16_t *)src_bits; ++ uint16_t *dst_h = (uint16_t *)dst_bits; ++ ++ src_stride = src_stride * 2; ++ dst_stride = dst_stride * 2; ++ ++ src_h += src_stride * src_y + src_x; ++ dst_h += dst_stride * dest_y + dest_x; ++ ++ while (height--) { ++ lsx_blt_one_line_u16 (dst_h, src_h, width); ++ dst_h += dst_stride; ++ src_h += src_stride; ++ } ++ ++ return TRUE; ++ } ++ ++ if (src_bpp == 32) { ++ src_bits += src_stride * src_y + src_x; ++ dst_bits += dst_stride * dest_y + dest_x; ++ ++ while (height--) { ++ lsx_blt_one_line_u32 (dst_bits, src_bits, width); ++ dst_bits += dst_stride; ++ src_bits += src_stride; ++ } ++ ++ return TRUE; ++ } ++ ++ return FALSE; ++} ++ ++static void ++lsx_fill_u8 (uint8_t *dst, ++ int stride, ++ int x, ++ int y, ++ int width, ++ int height, ++ uint8_t filler) ++{ ++ __m128i vfill = __lsx_vreplgr2vr_b(filler); ++ int byte_stride = stride * 4; ++ dst += y * byte_stride + x; ++ ++ while (height--) { ++ int w = width; ++ uint8_t *d = dst; ++ ++ while (w && ((uintptr_t)d & 15)) { ++ *d = filler; ++ w--; ++ d++; ++ } ++ ++ while (w >= 64) { ++ __lsx_vst(vfill, d, 0); ++ __lsx_vst(vfill, d, 16); ++ __lsx_vst(vfill, d, 32); ++ __lsx_vst(vfill, d, 48); ++ w -= 64; ++ d += 64; ++ } ++ ++ if (w >= 32) { ++ __lsx_vst(vfill, d, 0); ++ __lsx_vst(vfill, d, 16); ++ w -= 32; ++ d += 32; ++ } ++ ++ if (w >= 16) { ++ __lsx_vst(vfill, d, 0); ++ w -= 16; ++ d += 16; ++ } ++ ++ while (w) { ++ *d = filler; ++ w--; ++ d++; ++ } ++ ++ dst += byte_stride; ++ } ++} ++ ++static void ++lsx_fill_u16 (uint16_t *dst, ++ int stride, ++ int x, ++ int y, ++ int width, ++ int height, ++ uint16_t filler) ++{ ++ __m128i vfill = __lsx_vreplgr2vr_h(filler); ++ int short_stride = stride * 2; ++ dst += y * short_stride + x; ++ ++ while (height--) { ++ int w = width; ++ uint16_t *d = dst; ++ ++ while (w && ((uintptr_t)d & 15)) { ++ *d = filler; ++ w--; ++ d++; ++ } ++ ++ while (w >= 32) { ++ __lsx_vst(vfill, d, 0); ++ __lsx_vst(vfill, d, 16); ++ __lsx_vst(vfill, d, 32); ++ __lsx_vst(vfill, d, 48); ++ w -= 32; ++ d += 32; ++ } ++ ++ if (w >= 16) { ++ __lsx_vst(vfill, d, 0); ++ __lsx_vst(vfill, d, 16); ++ w -= 16; ++ d += 16; ++ } ++ ++ if (w >= 8) { ++ __lsx_vst(vfill, d, 0); ++ w -= 8; ++ d += 8; ++ } ++ ++ while (w) { ++ *d = filler; ++ w--; ++ d++; ++ } ++ ++ dst += short_stride; ++ } ++} ++ ++static void ++lsx_fill_u32 (uint32_t *bits, ++ int stride, ++ int x, ++ int y, ++ int width, ++ int height, ++ uint32_t filler) ++{ ++ __m128i vfill = __lsx_vreplgr2vr_w(filler); ++ bits += y * stride + x; ++ ++ while (height--) { ++ int w = width; ++ uint32_t *d = bits; ++ ++ while (w && ((uintptr_t)d & 15)) { ++ *d = filler; ++ w--; ++ d++; ++ } ++ ++ while (w >= 32) { ++ __lsx_vst(vfill, d, 0); ++ __lsx_vst(vfill, d, 16); ++ __lsx_vst(vfill, d, 32); ++ __lsx_vst(vfill, d, 48); ++ __lsx_vst(vfill, d, 64); ++ __lsx_vst(vfill, d, 80); ++ __lsx_vst(vfill, d, 96); ++ __lsx_vst(vfill, d, 112); ++ w -= 32; ++ d += 32; ++ } ++ ++ while (w >= 16) { ++ __lsx_vst(vfill, d, 0); ++ __lsx_vst(vfill, d, 16); ++ __lsx_vst(vfill, d, 32); ++ __lsx_vst(vfill, d, 48); ++ w -= 16; ++ d += 16; ++ } ++ ++ if (w >= 8) { ++ __lsx_vst(vfill, d, 0); ++ __lsx_vst(vfill, d, 16); ++ w -= 8; ++ d += 8; ++ } ++ ++ if (w >= 4) { ++ __lsx_vst(vfill, d, 0); ++ w -= 4; ++ d += 4; ++ } ++ ++ while (w) { ++ *d = filler; ++ w--; ++ d++; ++ } ++ ++ bits += stride; ++ } ++} ++ ++static pixman_bool_t ++lsx_fill (pixman_implementation_t *imp, ++ uint32_t * bits, ++ int stride, ++ int bpp, ++ int x, ++ int y, ++ int width, ++ int height, ++ uint32_t filler) ++{ ++ switch (bpp) { ++ case 8: ++ lsx_fill_u8 ((uint8_t *)bits, stride, x, y, width, height, (uint8_t)filler); ++ return TRUE; ++ ++ case 16: ++ lsx_fill_u16 ((uint16_t *)bits, stride, x, y, width, height, (uint16_t)filler); ++ return TRUE; ++ ++ case 32: ++ lsx_fill_u32 (bits, stride, x, y, width, height, filler); ++ return TRUE; ++ ++ default: ++ return FALSE; ++ } ++ ++ return TRUE; ++} ++ ++static void ++lsx_composite_over_n_8_8888 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t src, srca; ++ uint32_t *dst_line, *dst, d; ++ uint8_t *mask_line, *mask, m; ++ int dst_stride, mask_stride; ++ int32_t w; ++ v4u32 vsrca, vsrc; ++ __m128i vff; ++ ++ src = _pixman_image_get_solid(imp, src_image, dest_image->bits.format); ++ vsrc = (v4u32)__lsx_vreplgr2vr_w(src); ++ srca = src >> 24; ++ vsrca = (v4u32)__lsx_vreplgr2vr_w(srca); ++ vff = __lsx_vreplgr2vr_w(0xff); ++ ++ if (src == 0) ++ return; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ mask = mask_line; ++ mask_line += mask_stride; ++ w = width; ++ ++ while (w >= 4) { ++ v4u32 ma = {mask[0], mask[1], mask[2], mask[3]}; ++ ++ if (__lsx_bnz_w(__lsx_vseqi_w((__m128i)ma, 0xff))){ ++ if (__lsx_bnz_w(__lsx_vseqi_w(vsrca, 0xff))) ++ *(__m128i*) dst = (__m128i)vsrc; ++ else if (__lsx_bnz_w(__lsx_vsub_w((__m128i)ma, vff))) ++ *(__m128i*) dst = lsx_over_u((__m128i)vsrc, *(__m128i*)dst); ++ } else if (__lsx_bnz_w((__m128i)ma)) { ++ __m128i d0 = lsx_in_u((__m128i)vsrc, (__m128i)ma); ++ *(__m128i*) dst = lsx_over_u(d0, *(__m128i*)dst); ++ } else { ++ for(int i = 0; i < 4; i++) { ++ if (mask[i] == 0xff) { ++ if (vsrca[i] == 0xff) ++ *(dst + i) = vsrc[i]; ++ else ++ *(dst + i) = over(vsrc[i], *(dst + i)); ++ } else if (mask[i]) { ++ m = mask[i]; ++ d = in(vsrc[i], m); ++ *(dst + i) = over(d, *(dst + i)); ++ } ++ } ++ } ++ dst += 4; ++ w -= 4; ++ mask += 4; ++ } ++ ++ while (w--) { ++ m = *mask++; ++ if (m == 0xff) { ++ if (srca == 0xff) ++ *dst = src; ++ else ++ *dst = over(src, *dst); ++ } else if (m) { ++ d = in(src, m); ++ *dst = over(d, *dst); ++ } ++ dst++; ++ } ++ } ++} ++ ++static void ++lsx_composite_add_8_8 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint8_t *dst_line, *dst; ++ uint8_t *src_line, *src; ++ int dst_stride, src_stride; ++ int32_t w; ++ uint16_t t; ++ ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); ++ ++ while (height--) { ++ dst = dst_line; ++ src = src_line; ++ ++ dst_line += dst_stride; ++ src_line += src_stride; ++ w = width; ++ ++ lsx_combine_add_u(imp, op, (uint32_t *)dst, (uint32_t *)src, NULL, w >> 2); ++ dst += w & 0xfffc; ++ src += w & 0xfffc; ++ w &= 3; ++ ++ while (w--) { ++ t = (*dst) + (*src++); ++ *dst++ = t | (0 - (t >> 8)); ++ } ++ } ++} ++ ++static void ++lsx_composite_add_8888_8888 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t *dst_line; ++ uint32_t *src_line; ++ int dst_stride, src_stride; ++ ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ ++ while (height--) { ++ lsx_combine_add_u(imp, op, dst_line, src_line, NULL, width); ++ dst_line += dst_stride; ++ src_line += src_stride; ++ } ++} ++ ++static void ++lsx_composite_over_8888_8888 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ int dst_stride, src_stride; ++ uint32_t *dst_line; ++ uint32_t *src_line; ++ ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ ++ while (height--) { ++ lsx_combine_over_u_no_mask (dst_line, src_line, width); ++ dst_line += dst_stride; ++ src_line += src_stride; ++ } ++} ++ ++static void ++lsx_composite_copy_area (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ bits_image_t src_bits, dst_bits; ++ src_bits = info->src_image->bits; ++ dst_bits = info->dest_image->bits; ++ lsx_blt (imp, src_bits.bits, ++ dst_bits.bits, ++ src_bits.rowstride, ++ dst_bits.rowstride, ++ PIXMAN_FORMAT_BPP (src_bits.format), ++ PIXMAN_FORMAT_BPP (dst_bits.format), ++ info->src_x, info->src_y, info->dest_x, ++ info->dest_y, info->width, info->height); ++} ++ ++static void ++lsx_composite_src_x888_0565 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint16_t *dst_line, *dst; ++ uint32_t *src_line, *src, s; ++ int dst_stride, src_stride; ++ int32_t w; ++ ++ __m128i src0, tmp; ++ __m128i rb0, t0, g0; ++ __m128i mask_565_rb = __lsx_vreplgr2vr_w(0x001f001f); ++ __m128i mask_green_4x32 = __lsx_vreplgr2vr_w(0x0000fc00); ++ ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ src = src_line; ++ src_line += src_stride; ++ w = width; ++ ++ while (w && (uintptr_t)src & 15) { ++ s = *src++; ++ *dst = convert_8888_to_0565(s); ++ dst++; ++ w--; ++ } ++ ++ while (w >= 4) { ++ src0 = __lsx_vld(src, 0); ++ src += 4; ++ w -= 4; ++ ++ rb0 = __lsx_vsrli_w(src0, 3) & mask_565_rb; ++ g0 = src0 & mask_green_4x32; ++ rb0 = rb0 | __lsx_vsrli_w(rb0, 5); ++ t0 = rb0 | __lsx_vsrli_w(g0, 5); ++ tmp = __lsx_vpickev_h(t0, t0); ++ __lsx_vstelm_d(tmp, dst, 0, 0); ++ dst += 4; ++ } ++ ++ while (w--) { ++ s = *src++; ++ *dst = convert_8888_to_0565(s); ++ dst++; ++ } ++ } ++} ++ ++static void ++lsx_composite_in_n_8_8 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS(info); ++ uint8_t *dst_line, *dst; ++ uint8_t *mask_line, *mask; ++ int dst_stride, mask_stride; ++ uint32_t m, src, srca; ++ int32_t w; ++ uint16_t t; ++ ++ __m128i alpha, tmp; ++ __m128i vmask, vmask_lo, vmask_hi; ++ __m128i vdst, vdst_lo, vdst_hi; ++ __m128i mask_zero = __lsx_vldi(0); ++ ++ PIXMAN_IMAGE_GET_LINE(dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE(mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); ++ ++ src = _pixman_image_get_solid(imp, src_image, dest_image->bits.format); ++ srca = src >> 24; ++ alpha = __lsx_vreplgr2vr_w(src); ++ alpha = __lsx_vilvl_b(mask_zero, alpha); ++ alpha = __lsx_vshuf4i_w(alpha, 0x44); ++ alpha = __lsx_vshuf4i_h(alpha, 0xff); ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ mask = mask_line; ++ mask_line += mask_stride; ++ w = width; ++ ++ while (w >= 16) { ++ vmask = __lsx_vld(mask, 0); ++ vdst = __lsx_vld(dst, 0); ++ mask += 16; ++ w -= 16; ++ ++ vmask_lo = __lsx_vsllwil_hu_bu(vmask, 0); ++ vmask_hi = __lsx_vexth_hu_bu(vmask); ++ vdst_lo = __lsx_vsllwil_hu_bu(vdst, 0); ++ vdst_hi = __lsx_vexth_hu_bu(vdst); ++ vmask_lo = lsx_pix_multiply(alpha, vmask_lo); ++ vmask_hi = lsx_pix_multiply(alpha, vmask_hi); ++ vdst_lo = lsx_pix_multiply(vmask_lo, vdst_lo); ++ vdst_hi = lsx_pix_multiply(vmask_hi, vdst_hi); ++ vdst_lo = __lsx_vsat_bu(vdst_lo, 7); ++ vdst_hi = __lsx_vsat_bu(vdst_hi, 7); ++ tmp = __lsx_vpickev_b(vdst_hi, vdst_lo); ++ __lsx_vst(tmp, dst, 0); ++ dst += 16; ++ } ++ ++ while (w--) { ++ m = *mask++; ++ m = MUL_UN8(m, srca, t); ++ if (m == 0) ++ *dst = 0; ++ else if (m != 0xff) ++ *dst = MUL_UN8(m, *dst, t); ++ dst++; ++ } ++ } ++} ++ ++static void ++lsx_composite_in_8_8 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint8_t *dst_line, *dst; ++ uint8_t *src_line, *src; ++ int src_stride, dst_stride; ++ int32_t w, s; ++ uint16_t t; ++ ++ __m128i tmp; ++ __m128i vsrc, vsrc_lo, vsrc_hi; ++ __m128i vdst, vdst_lo, vdst_hi; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ src = src_line; ++ src_line += src_stride; ++ w = width; ++ ++ while (w >= 16) { ++ vsrc = __lsx_vld(src, 0); ++ vdst = __lsx_vld(dst, 0); ++ src += 16; ++ w -= 16; ++ ++ vsrc_lo = __lsx_vsllwil_hu_bu(vsrc, 0); ++ vsrc_hi = __lsx_vexth_hu_bu(vsrc); ++ vdst_lo = __lsx_vsllwil_hu_bu(vdst, 0); ++ vdst_hi = __lsx_vexth_hu_bu(vdst); ++ vdst_lo = lsx_pix_multiply(vsrc_lo, vdst_lo); ++ vdst_hi = lsx_pix_multiply(vsrc_hi, vdst_hi); ++ vdst_lo = __lsx_vsat_bu(vdst_lo, 7); ++ vdst_hi = __lsx_vsat_bu(vdst_hi, 7); ++ tmp = __lsx_vpickev_b(vdst_hi, vdst_lo); ++ __lsx_vst(tmp, dst, 0); ++ dst += 16; ++ } ++ ++ while (w--) { ++ s = *src++; ++ if (s == 0) ++ *dst = 0; ++ else if (s != 0xff) ++ *dst = MUL_UN8(s, *dst, t); ++ dst++; ++ } ++ } ++} ++ ++static void ++lsx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t src, srca, ns; ++ uint32_t *dst_line, *dst, nd; ++ uint32_t *mask_line, *mask, ma; ++ int dst_stride, mask_stride; ++ int32_t w; ++ ++ __m128i d, m, t; ++ __m128i s, sa, d0, d1, m0, m1, t0, t1; ++ __m128i zero = __lsx_vldi(0); ++ __m128i bit_set = __lsx_vreplgr2vr_h(0xff); ++ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); ++ srca = src >> 24; ++ if (src == 0) ++ return; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); ++ s = __lsx_vreplgr2vr_w(src); ++ s = __lsx_vilvl_b(zero, s); ++ sa = __lsx_vshuf4i_h(s, 0xff); ++ ++ while(height --) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ mask = mask_line; ++ mask_line += mask_stride; ++ w = width; ++ ++ while (w && ((uintptr_t)dst & 15)) { ++ ma = *mask++; ++ if (ma == 0xffffffff) { ++ if (srca == 0xff) ++ *dst = src; ++ else ++ *dst = over(src, *dst); ++ } else if (ma) { ++ nd = *dst; ++ ns = src; ++ ++ UN8x4_MUL_UN8x4(ns, ma); ++ UN8x4_MUL_UN8(ma, srca); ++ ma = ~ma; ++ UN8x4_MUL_UN8x4_ADD_UN8x4(nd, ma, ns); ++ ++ *dst = nd; ++ } ++ dst++; ++ w--; ++ } ++ ++ while(w >= 4) { ++ m = __lsx_vld(mask, 0); ++ mask += 4; ++ w -= 4; ++ ++ if (__lsx_bnz_v(m)) { ++ d = __lsx_vld(dst, 0); ++ d0 = __lsx_vsllwil_hu_bu(d, 0); ++ d1 = __lsx_vexth_hu_bu(d); ++ m0 = __lsx_vsllwil_hu_bu(m, 0); ++ m1 = __lsx_vexth_hu_bu(m); ++ ++ t0 = lsx_pix_multiply(s, m0); ++ t1 = lsx_pix_multiply(s, m1); ++ ++ m0 = lsx_pix_multiply(m0, sa); ++ m1 = lsx_pix_multiply(m1, sa); ++ m0 = __lsx_vxor_v(m0, bit_set); ++ m1 = __lsx_vxor_v(m1, bit_set); ++ d0 = lsx_pix_multiply(d0, m0); ++ d1 = lsx_pix_multiply(d1, m1); ++ ++ d = __lsx_vpickev_b(d1, d0); ++ t = __lsx_vpickev_b(t1, t0); ++ d = __lsx_vsadd_bu(d, t); ++ __lsx_vst(d, dst, 0); ++ } ++ dst += 4; ++ } ++ ++ while(w--) { ++ ma = *mask++; ++ if (ma == 0xffffffff) { ++ if (srca == 0xff) ++ *dst = src; ++ else ++ *dst = over(src, *dst); ++ } else if (ma) { ++ nd = *dst; ++ ns = src; ++ ++ UN8x4_MUL_UN8x4(ns, ma); ++ UN8x4_MUL_UN8(ma, srca); ++ ma = ~ma; ++ UN8x4_MUL_UN8x4_ADD_UN8x4(nd, ma, ns); ++ ++ *dst = nd; ++ } ++ dst++; ++ } ++ } ++} ++ ++static void ++lsx_composite_over_reverse_n_8888 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t src; ++ uint32_t *dst_line, *dst; ++ int dst_stride; ++ int32_t w; ++ ++ __m128i d, t; ++ __m128i s, d0, d1; ++ __m128i zero = __lsx_vldi(0); ++ __m128i bit_set = __lsx_vreplgr2vr_h(0xff); ++ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); ++ if (src == 0) ++ return; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ s = __lsx_vreplgr2vr_w(src); ++ s = __lsx_vilvl_b(zero, s); ++ ++ while (height--) ++ { ++ dst = dst_line; ++ dst_line += dst_stride; ++ w = width; ++ ++ while (w && ((uintptr_t)dst & 15)) { ++ d = __lsx_vldrepl_w(dst, 0); ++ d0 = __lsx_vsllwil_hu_bu(d, 0); ++ d0 = __lsx_vshuf4i_h(d0, 0xff); ++ d0 = __lsx_vxor_v(d0, bit_set); ++ d0 = lsx_pix_multiply(d0, s); ++ t = __lsx_vpickev_b(zero, d0); ++ d = __lsx_vsadd_bu(d, t); ++ __lsx_vstelm_w(d, dst, 0, 0); ++ dst += 1; ++ w--; ++ } ++ ++ while (w >= 4) { ++ d = __lsx_vld(dst, 0); ++ w -= 4; ++ ++ d0 = __lsx_vsllwil_hu_bu(d, 0); ++ d1 = __lsx_vexth_hu_bu(d); ++ d0 = __lsx_vshuf4i_h(d0, 0xff); ++ d1 = __lsx_vshuf4i_h(d1, 0xff); ++ d0 = __lsx_vxor_v(d0, bit_set); ++ d1 = __lsx_vxor_v(d1, bit_set); ++ d0 = lsx_pix_multiply(d0, s); ++ d1 = lsx_pix_multiply(d1, s); ++ t = __lsx_vpickev_b(d1, d0); ++ d = __lsx_vsadd_bu(d, t); ++ __lsx_vst(d, dst, 0); ++ dst += 4; ++ } ++ ++ while (w--) { ++ d = __lsx_vldrepl_w(dst, 0); ++ d0 = __lsx_vsllwil_hu_bu(d, 0); ++ d0 = __lsx_vshuf4i_h(d0, 0xff); ++ d0 = __lsx_vxor_v(d0, bit_set); ++ d0 = lsx_pix_multiply(d0, s); ++ t = __lsx_vpickev_b(zero, d0); ++ d = __lsx_vsadd_bu(d, t); ++ __lsx_vstelm_w(d, dst, 0, 0); ++ dst += 1; ++ } ++ } ++} ++ ++static void ++lsx_composite_src_x888_8888 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t *dst_line, *dst; ++ uint32_t *src_line, *src; ++ int32_t w; ++ int dst_stride, src_stride; ++ __m128i mask = __lsx_vreplgr2vr_w(0xff000000); ++ __m128i vsrc0, vsrc1, vsrc2, vsrc3, vsrc4, vsrc5, vsrc6, vsrc7; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ src = src_line; ++ src_line += src_stride; ++ w = width; ++ ++ while (w && ((uintptr_t)dst & 15)) { ++ *dst++ = *src++ | 0xff000000; ++ w--; ++ } ++ ++ while (w >= 32) { ++ vsrc0 = __lsx_vld(src, 0); ++ vsrc1 = __lsx_vld(src, 16); ++ vsrc2 = __lsx_vld(src, 32); ++ vsrc3 = __lsx_vld(src, 48); ++ vsrc4 = __lsx_vld(src, 64); ++ vsrc5 = __lsx_vld(src, 80); ++ vsrc6 = __lsx_vld(src, 96); ++ vsrc7 = __lsx_vld(src, 112); ++ vsrc0 = __lsx_vor_v(vsrc0, mask); ++ vsrc1 = __lsx_vor_v(vsrc1, mask); ++ vsrc2 = __lsx_vor_v(vsrc2, mask); ++ vsrc3 = __lsx_vor_v(vsrc3, mask); ++ vsrc4 = __lsx_vor_v(vsrc4, mask); ++ vsrc5 = __lsx_vor_v(vsrc5, mask); ++ vsrc6 = __lsx_vor_v(vsrc6, mask); ++ vsrc7 = __lsx_vor_v(vsrc7, mask); ++ __lsx_vst(vsrc0, dst, 0); ++ __lsx_vst(vsrc1, dst, 16); ++ __lsx_vst(vsrc2, dst, 32); ++ __lsx_vst(vsrc3, dst, 48); ++ __lsx_vst(vsrc4, dst, 64); ++ __lsx_vst(vsrc5, dst, 80); ++ __lsx_vst(vsrc6, dst, 96); ++ __lsx_vst(vsrc7, dst, 112); ++ ++ src += 32; ++ w -= 32; ++ dst += 32; ++ } ++ ++ if (w >= 16) { ++ vsrc0 = __lsx_vld(src, 0); ++ vsrc1 = __lsx_vld(src, 16); ++ vsrc2 = __lsx_vld(src, 32); ++ vsrc3 = __lsx_vld(src, 48); ++ vsrc0 = __lsx_vor_v(vsrc0, mask); ++ vsrc1 = __lsx_vor_v(vsrc1, mask); ++ vsrc2 = __lsx_vor_v(vsrc2, mask); ++ vsrc3 = __lsx_vor_v(vsrc3, mask); ++ __lsx_vst(vsrc0, dst, 0); ++ __lsx_vst(vsrc1, dst, 16); ++ __lsx_vst(vsrc2, dst, 32); ++ __lsx_vst(vsrc3, dst, 48); ++ ++ src += 16; ++ w -= 16; ++ dst += 16; ++ } ++ ++ if (w >= 8) { ++ vsrc0 = __lsx_vld(src, 0); ++ vsrc1 = __lsx_vld(src, 16); ++ vsrc0 = __lsx_vor_v(vsrc0, mask); ++ vsrc1 = __lsx_vor_v(vsrc1, mask); ++ __lsx_vst(vsrc0, dst, 0); ++ __lsx_vst(vsrc1, dst, 16); ++ ++ src += 8; ++ w -= 8; ++ dst += 8; ++ } ++ ++ if (w >= 4) { ++ vsrc0 = __lsx_vld(src, 0); ++ vsrc0 = __lsx_vor_v(vsrc0, mask); ++ __lsx_vst(vsrc0, dst, 0); ++ ++ src += 4; ++ w -= 4; ++ dst += 4; ++ } ++ ++ while (w--) { ++ *dst++ = *src++ | 0xff000000; ++ } ++ } ++} ++ ++static void ++lsx_composite_add_n_8_8 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint8_t *dst_line, *dst; ++ uint8_t *mask_line, *mask; ++ int dst_stride, mask_stride; ++ int32_t w; ++ uint32_t src; ++ uint16_t sa; ++ ++ __m128i d0; ++ __m128i vsrc, t0, t1; ++ __m128i a0, a0_l, a0_h; ++ __m128i b0, b0_l, b0_h; ++ __m128i zero = __lsx_vldi(0); ++ __m128i one_half = __lsx_vreplgr2vr_h(0x80); ++ __m128i g_shift = __lsx_vreplgr2vr_h(8); ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); ++ ++ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); ++ ++ sa = (src >> 24); ++ vsrc = __lsx_vreplgr2vr_h(sa); ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ mask = mask_line; ++ mask_line += mask_stride; ++ w = width; ++ ++ while (w >= 16) { ++ a0 = __lsx_vld(mask, 0); ++ w -= 16; ++ mask += 16; ++ ++ a0_l = __lsx_vsllwil_hu_bu(a0, 0); ++ a0_h = __lsx_vexth_hu_bu(a0); ++ ++ a0_l = __lsx_vmadd_h(one_half, a0_l, vsrc); ++ a0_h = __lsx_vmadd_h(one_half, a0_h, vsrc); ++ ++ a0_l = __lsx_vsadd_hu(__lsx_vsrl_h(a0_l, g_shift), a0_l); ++ a0_h = __lsx_vsadd_hu(__lsx_vsrl_h(a0_h, g_shift), a0_h); ++ ++ a0_l = __lsx_vsrl_h(a0_l, g_shift); ++ a0_h = __lsx_vsrl_h(a0_h, g_shift); ++ ++ b0 = __lsx_vld(dst, 0); ++ b0_l = __lsx_vsllwil_hu_bu(b0, 0); ++ b0_h = __lsx_vexth_hu_bu(b0); ++ ++ t0 = __lsx_vadd_h(a0_l, b0_l); ++ t1 = __lsx_vadd_h(a0_h, b0_h); ++ ++ t0 = __lsx_vor_v(t0, __lsx_vsub_h(zero, __lsx_vsrl_h(t0, g_shift))); ++ t1 = __lsx_vor_v(t1, __lsx_vsub_h(zero, __lsx_vsrl_h(t1, g_shift))); ++ ++ t0 = __lsx_vsat_hu(t0, 7); ++ t1 = __lsx_vsat_hu(t1 ,7); ++ ++ d0 = __lsx_vpickev_b(t1, t0); ++ __lsx_vst(d0, dst, 0); ++ dst += 16; ++ } ++ ++ while (w--) { ++ uint16_t tmp; ++ uint16_t a; ++ uint32_t m, d; ++ uint32_t r; ++ ++ a = *mask++; ++ d = *dst; ++ ++ m = MUL_UN8 (sa, a, tmp); ++ r = ADD_UN8 (m, d, tmp); ++ ++ *dst++ = r; ++ } ++ } ++} ++ ++static void ++lsx_composite_add_n_8 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint8_t *dst_line, *dst; ++ int dst_stride; ++ int32_t w; ++ uint32_t src; ++ ++ __m128i vsrc, d0, d1; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); ++ ++ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); ++ src >>= 24; ++ ++ if (src == 0x00) ++ return; ++ ++ if (src == 0xff) { ++ pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, ++ 8, dest_x, dest_y, width, height, 0xff); ++ return; ++ } ++ ++ vsrc = __lsx_vreplgr2vr_b(src); ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ w = width; ++ ++ while (w && ((uintptr_t)dst & 15)) { ++ d0 = __lsx_vldrepl_b(dst, 0); ++ d0 = __lsx_vsadd_bu(vsrc, d0); ++ __lsx_vstelm_b(d0, dst, 0, 0); ++ w--; ++ dst++; ++ } ++ ++ while (w >= 32) { ++ d0 = __lsx_vld(dst, 0); ++ d1 = __lsx_vld(dst, 16); ++ w -= 32; ++ d0 = __lsx_vsadd_bu(vsrc, d0); ++ d1 = __lsx_vsadd_bu(vsrc, d1); ++ __lsx_vst(d0, dst, 0); ++ __lsx_vst(d1, dst, 16); ++ dst += 32; ++ } ++ ++ if (w >= 16) { ++ d0 = __lsx_vld(dst, 0); ++ w -= 16; ++ d0 = __lsx_vsadd_bu(vsrc, d0); ++ __lsx_vst(d0, dst, 0); ++ dst += 16; ++ } ++ ++ if (w >= 8) { ++ d0 = __lsx_vldrepl_d(dst, 0); ++ w -= 8; ++ d0 = __lsx_vsadd_bu(vsrc, d0); ++ __lsx_vstelm_d(d0, dst, 0, 0); ++ dst += 8; ++ } ++ ++ if (w >= 4) { ++ d0 = __lsx_vldrepl_w(dst, 0); ++ w -= 4; ++ d0 = __lsx_vsadd_bu(vsrc, d0); ++ __lsx_vstelm_w(d0, dst, 0, 0); ++ dst += 4; ++ } ++ ++ while (w--) { ++ d0 = __lsx_vldrepl_b(dst, 0); ++ d0 = __lsx_vsadd_bu(vsrc, d0); ++ __lsx_vstelm_b(d0, dst, 0, 0); ++ dst++; ++ } ++ } ++} ++ ++static void ++lsx_composite_add_n_8888 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t *dst_line, *dst, src; ++ int dst_stride, w; ++ ++ __m128i vsrc, d0, d1; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ ++ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); ++ if (src == 0) ++ return; ++ ++ if (src == ~0) { ++ pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32, ++ dest_x, dest_y, width, height, ~0); ++ return; ++ } ++ ++ vsrc = __lsx_vreplgr2vr_w(src); ++ ++ while (height--) { ++ w = width; ++ ++ dst = dst_line; ++ dst_line += dst_stride; ++ ++ while (w && (uintptr_t)dst & 15) { ++ d0 = __lsx_vldrepl_w(dst, 0); ++ d0 = __lsx_vsadd_bu(vsrc, d0); ++ __lsx_vstelm_w(d0, dst, 0, 0); ++ dst++; ++ w--; ++ } ++ ++ while (w >= 8) { ++ d0 = __lsx_vld(dst, 0); ++ d1 = __lsx_vld(dst, 16); ++ w -= 8; ++ d0 = __lsx_vsadd_bu(vsrc, d0); ++ d1 = __lsx_vsadd_bu(vsrc, d1); ++ __lsx_vst(d0, dst, 0); ++ __lsx_vst(d1, dst, 16); ++ dst += 8; ++ } ++ ++ while (w >= 4) { ++ d0 = __lsx_vld(dst, 0); ++ w -= 4; ++ d0 = __lsx_vsadd_bu(vsrc, d0); ++ __lsx_vst(d0, dst, 0); ++ dst += 4; ++ } ++ ++ while (w--) { ++ d0 = __lsx_vldrepl_w(dst, 0); ++ d0 = __lsx_vsadd_bu(vsrc, d0); ++ __lsx_vstelm_w(d0, dst, 0, 0); ++ dst++; ++ } ++ } ++} ++ ++static force_inline __m128i ++unpack_32_1x128(uint32_t data) ++{ ++ __m128i zero = __lsx_vldi(0); ++ __m128i tmp = __lsx_vinsgr2vr_w(zero, data, 0); ++ return __lsx_vilvl_b(zero, tmp); ++} ++ ++static force_inline __m128i ++unpack_32_2x128(uint32_t data) ++{ ++ __m128i tmp0, out0; ++ __m128i zero = __lsx_vldi(0); ++ tmp0 = __lsx_vinsgr2vr_w(tmp0, data, 0); ++ tmp0 = __lsx_vpickev_d(tmp0, tmp0); ++ out0 = __lsx_vilvl_b(zero, tmp0); ++ ++ return out0; ++} ++ ++static force_inline __m128i ++expand_pixel_32_1x128(uint32_t data) ++{ ++ return __lsx_vshuf4i_w(unpack_32_1x128(data), 0x44); ++} ++ ++static force_inline __m128i ++expand_pixel_32_2x128(uint32_t data) ++{ ++ return __lsx_vshuf4i_w(unpack_32_2x128(data), 0x44); ++} ++ ++static force_inline __m128i ++expand_alpha_1x128(__m128i data) ++{ ++ return __lsx_vshuf4i_h(data, 0xff); ++} ++ ++static force_inline __m128i ++expand_alphaa_2x128(__m128i data) ++{ ++ __m128i tmp0; ++ tmp0 = __lsx_vshuf4i_h(data, 0xff); ++ tmp0 = __lsx_vpickev_d(tmp0, tmp0); ++ ++ return tmp0; ++} ++ ++static force_inline __m128i ++unpack_565_to_8888(__m128i lo) ++{ ++ __m128i r, g, b, rb, t; ++ __m128i mask_green_4x32 = __lsx_vreplgr2vr_w(0x0000fc00); ++ __m128i mask_red_4x32 = __lsx_vreplgr2vr_w(0x00f80000); ++ __m128i mask_blue_4x32 = __lsx_vreplgr2vr_w(0x000000f8); ++ __m128i mask_565_fix_rb = __lsx_vreplgr2vr_w(0x00e000e0); ++ __m128i mask_565_fix_g = __lsx_vreplgr2vr_w(0x0000c000); ++ ++ r = __lsx_vslli_w(lo, 8); ++ r = __lsx_vand_v(r, mask_red_4x32); ++ g = __lsx_vslli_w(lo, 5); ++ g = __lsx_vand_v(g, mask_green_4x32); ++ b = __lsx_vslli_w(lo, 3); ++ b = __lsx_vand_v(b, mask_blue_4x32); ++ ++ rb = __lsx_vor_v(r, b); ++ t = __lsx_vand_v(rb, mask_565_fix_rb); ++ t = __lsx_vsrli_w(t, 5); ++ rb = __lsx_vor_v(rb, t); ++ ++ t = __lsx_vand_v(g, mask_565_fix_g); ++ t = __lsx_vsrli_w(t, 6); ++ g = __lsx_vor_v(g, t); ++ ++ return (__lsx_vor_v(rb, g)); ++} ++ ++static force_inline void ++unpack_128_2x128(__m128i data, __m128i *data_lo, __m128i *data_hi) ++{ ++ __m128i mask_zero = __lsx_vldi(0); ++ *data_lo = __lsx_vilvl_b(mask_zero, data); ++ *data_hi = __lsx_vilvh_b(mask_zero, data); ++} ++ ++static force_inline void ++unpack_565_128_4x128(__m128i data, __m128i *data0, ++ __m128i *data1, __m128i *data2, __m128i *data3) ++{ ++ __m128i lo, hi; ++ __m128i mask_zero = __lsx_vldi(0); ++ lo = __lsx_vilvl_h(mask_zero, data); ++ hi = __lsx_vilvh_h(mask_zero, data); ++ lo = unpack_565_to_8888(lo); ++ hi = unpack_565_to_8888(hi); ++ ++ unpack_128_2x128((__m128i)lo, (__m128i*)data0, (__m128i*)data1); ++ unpack_128_2x128((__m128i)hi, (__m128i*)data2, (__m128i*)data3); ++} ++ ++static force_inline void ++negate_2x128(__m128i data_lo, __m128i data_hi, __m128i *neg_lo, __m128i *neg_hi) ++{ ++ __m128i mask_00ff = __lsx_vreplgr2vr_h(0x00ff); ++ *neg_lo = __lsx_vxor_v(data_lo, mask_00ff); ++ *neg_hi = __lsx_vxor_v(data_hi, mask_00ff); ++} ++ ++static force_inline void ++over_2x128(__m128i *src_lo, __m128i *src_hi, __m128i *alpha_lo, ++ __m128i *alpha_hi, __m128i *dst_lo, __m128i *dst_hi) ++{ ++ __m128i t1, t2; ++ negate_2x128(*alpha_lo, *alpha_hi, &t1, &t2); ++ *dst_lo = lsx_pix_multiply(*dst_lo, t1); ++ *dst_hi = lsx_pix_multiply(*dst_hi, t2); ++ *dst_lo = __lsx_vsadd_bu(*src_lo, *dst_lo); ++ *dst_hi = __lsx_vsadd_bu(*src_hi, *dst_hi); ++} ++ ++static force_inline __m128i ++pack_2x128_128(__m128i lo, __m128i hi) ++{ ++ __m128i tmp0 = __lsx_vsat_bu(lo, 7); ++ __m128i tmp1 = __lsx_vsat_bu(hi, 7); ++ __m128i tmp2 = __lsx_vpickev_b(tmp1, tmp0); ++ ++ return tmp2; ++} ++ ++static force_inline __m128i ++pack_565_2x128_128(__m128i lo, __m128i hi) ++{ ++ __m128i data; ++ __m128i r, g1, g2, b; ++ __m128i mask_565_r = __lsx_vreplgr2vr_w(0x00f80000); ++ __m128i mask_565_g1 = __lsx_vreplgr2vr_w(0x00070000); ++ __m128i mask_565_g2 = __lsx_vreplgr2vr_w(0x000000e0); ++ __m128i mask_565_b = __lsx_vreplgr2vr_w(0x0000001f); ++ ++ data = pack_2x128_128 (lo, hi); ++ r = __lsx_vand_v(data, mask_565_r); ++ g1 = __lsx_vslli_w(data, 3) & mask_565_g1; ++ g2 = __lsx_vsrli_w(data, 5) & mask_565_g2; ++ b = __lsx_vsrli_w(data, 3) & mask_565_b; ++ ++ return (((r|g1)|g2)|b); ++} ++ ++static force_inline __m128i ++expand565_16_1x128(uint16_t pixel) ++{ ++ __m128i m; ++ __m128i zero = __lsx_vldi(0); ++ ++ m = __lsx_vinsgr2vr_w(m, pixel, 0); ++ m = unpack_565_to_8888(m); ++ m = __lsx_vilvl_b(zero, m); ++ ++ return m; ++} ++ ++static force_inline uint32_t ++pack_1x128_32(__m128i data) ++{ ++ __m128i tmp0, tmp1; ++ __m128i zero = __lsx_vldi(0); ++ ++ tmp0 = __lsx_vsat_bu(data, 7); ++ tmp1 = __lsx_vpickev_b(zero, tmp0); ++ ++ return (__lsx_vpickve2gr_wu(tmp1, 0)); ++} ++ ++static force_inline uint16_t ++pack_565_32_16(uint32_t pixel) ++{ ++ return (uint16_t)(((pixel >> 8) & 0xf800) | ++ ((pixel >> 5) & 0x07e0) | ++ ((pixel >> 3) & 0x001f)); ++} ++ ++static force_inline __m128i ++pack_565_4x128_128(__m128i *v0, __m128i *v1, __m128i *v2, __m128i *v3) ++{ ++ return pack_2x128_128(pack_565_2x128_128(*v0, *v1), ++ pack_565_2x128_128(*v2, *v3)); ++} ++ ++static force_inline void ++expand_alpha_2x128(__m128i data_lo, __m128i data_hi, __m128i *alpha_lo, __m128i *alpha_hi) ++{ ++ *alpha_lo = __lsx_vshuf4i_h(data_lo, 0xff); ++ *alpha_hi = __lsx_vshuf4i_h(data_hi, 0xff); ++} ++ ++static force_inline void ++expand_alpha_rev_2x128(__m128i data_lo, __m128i data_hi, __m128i *alpha_lo, __m128i *alpha_hi) ++{ ++ *alpha_lo = __lsx_vshuf4i_h(data_lo, 0x00); ++ *alpha_hi = __lsx_vshuf4i_h(data_hi, 0x00); ++} ++ ++static force_inline uint16_t ++composite_over_8888_0565pixel(uint32_t src, uint16_t dst) ++{ ++ __m128i ms; ++ ms = unpack_32_1x128(src); ++ ++ return pack_565_32_16(pack_1x128_32((__m128i)over_1x128((__m128i)ms, ++ (__m128i)expand_alpha_1x128((__m128i)ms), expand565_16_1x128(dst)))); ++} ++ ++static force_inline void ++in_over_2x128(__m128i *src_lo, __m128i *src_hi, __m128i *alpha_lo, __m128i *alpha_hi, ++ __m128i *mask_lo, __m128i *mask_hi, __m128i *dst_lo, __m128i *dst_hi) ++{ ++ __m128i s_lo, s_hi; ++ __m128i a_lo, a_hi; ++ s_lo = lsx_pix_multiply(*src_lo, *mask_lo); ++ s_hi = lsx_pix_multiply(*src_hi, *mask_hi); ++ a_lo = lsx_pix_multiply(*alpha_lo, *mask_lo); ++ a_hi = lsx_pix_multiply(*alpha_hi, *mask_hi); ++ over_2x128(&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi); ++} ++ ++static force_inline __m128i ++in_over_1x128(__m128i *src, __m128i *alpha, __m128i *mask, __m128i *dst) ++{ ++ return over_1x128(lsx_pix_multiply(*src, *mask), ++ lsx_pix_multiply(*alpha, *mask), *dst); ++} ++ ++static force_inline __m128i ++expand_alpha_rev_1x128(__m128i data) ++{ ++ __m128i v0 = {0x00000000, 0xffffffff}; ++ __m128i v_hi = __lsx_vand_v(data, v0); ++ data = __lsx_vshuf4i_h(data, 0x00); ++ v0 = __lsx_vnor_v(v0, v0); ++ data = __lsx_vand_v(data, v0); ++ data = __lsx_vor_v(data, v_hi); ++ ++ return data; ++} ++ ++static void ++lsx_composite_over_n_0565 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t src; ++ uint16_t *dst_line, *dst, d; ++ int32_t w; ++ int dst_stride; ++ __m128i vsrc, valpha; ++ __m128i vdst, vdst0, vdst1, vdst2, vdst3; ++ ++ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); ++ ++ if (src == 0) ++ return; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); ++ ++ vsrc = expand_pixel_32_1x128(src); ++ valpha = expand_alpha_1x128(vsrc); ++ ++ while (height--) { ++ dst = dst_line; ++ ++ dst_line += dst_stride; ++ w = width; ++ ++ while (w >= 8) { ++ vdst = __lsx_vld(dst, 0); ++ w -= 8; ++ ++ unpack_565_128_4x128(vdst, &vdst0, &vdst1, &vdst2, &vdst3); ++ ++ over_2x128(&vsrc, &vsrc, &valpha, &valpha, &vdst0, &vdst1); ++ over_2x128(&vsrc, &vsrc, &valpha, &valpha, &vdst2, &vdst3); ++ ++ vdst = pack_565_4x128_128(&vdst0, &vdst1, &vdst2, &vdst3); ++ __lsx_vst(vdst, dst, 0); ++ dst += 8; ++ } ++ ++ while (w--) { ++ d = *dst; ++ *dst++ = pack_565_32_16(pack_1x128_32(over_1x128(vsrc,valpha, expand565_16_1x128(d)))); ++ } ++ } ++} ++ ++static void ++lsx_composite_over_8888_0565 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint16_t *dst_line, *dst, d; ++ uint32_t *src_line, *src, s; ++ int dst_stride, src_stride; ++ int32_t w; ++ ++ __m128i valpha_lo, valpha_hi; ++ __m128i vsrc, vsrc_lo, vsrc_hi; ++ __m128i vdst, vdst0, vdst1, vdst2, vdst3; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ ++ while (height--) { ++ dst = dst_line; ++ src = src_line; ++ ++ dst_line += dst_stride; ++ src_line += src_stride; ++ w = width; ++ ++ while (w >= 8) { ++ vsrc = __lsx_vld(src, 0); ++ vdst = __lsx_vld(dst, 0); ++ ++ unpack_128_2x128(vsrc, &vsrc_lo, &vsrc_hi); ++ unpack_565_128_4x128(vdst, &vdst0, &vdst1, &vdst2, &vdst3); ++ ++ expand_alpha_2x128(vsrc_lo, vsrc_hi, &valpha_lo, &valpha_hi); ++ ++ vsrc = __lsx_vld(src, 16); ++ over_2x128(&vsrc_lo, &vsrc_hi, &valpha_lo, &valpha_hi, &vdst0, &vdst1); ++ ++ unpack_128_2x128(vsrc, &vsrc_lo, &vsrc_hi); ++ expand_alpha_2x128(vsrc_lo, vsrc_hi, &valpha_lo, &valpha_hi); ++ over_2x128(&vsrc_lo, &vsrc_hi, &valpha_lo, &valpha_hi, &vdst2, &vdst3); ++ ++ __lsx_vst(pack_565_4x128_128(&vdst0, &vdst1, &vdst2, &vdst3), dst, 0); ++ ++ w -= 8; ++ dst += 8; ++ src += 8; ++ } ++ ++ while (w--) { ++ s = *src++; ++ d = *dst; ++ *dst++ = composite_over_8888_0565pixel(s, d); ++ } ++ } ++} ++ ++static void ++lsx_composite_over_n_8_0565 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t src; ++ uint16_t *dst_line, *dst, d; ++ uint8_t *mask_line, *p; ++ uint32_t *mask; ++ int dst_stride, mask_stride; ++ int32_t w; ++ uint32_t m; ++ ++ __m128i mask_zero = __lsx_vldi(0); ++ __m128i lsx_src, lsx_alpha, lsx_mask, lsx_dest; ++ __m128i vsrc, valpha; ++ __m128i vmask, vmask_lo, vmask_hi; ++ __m128i vdst, vdst0, vdst1, vdst2, vdst3; ++ ++ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); ++ ++ if (src == 0) ++ return; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); ++ ++ lsx_src = expand_pixel_32_1x128(src); ++ lsx_alpha = expand_alpha_1x128(lsx_src); ++ ++ vsrc = expand_pixel_32_2x128(src); ++ valpha = expand_alphaa_2x128(vsrc); ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ mask = (void*)mask_line; ++ mask_line += mask_stride; ++ w = width; ++ ++ while (w >= 8) { ++ vdst = __lsx_vld(dst, 0); ++ w -= 8; ++ unpack_565_128_4x128(vdst, &vdst0, &vdst1, &vdst2, &vdst3); ++ ++ m = *((uint32_t*)mask); ++ mask += 1; ++ ++ if (m) ++ { ++ vmask = unpack_32_1x128(m); ++ vmask = __lsx_vilvl_b(mask_zero, vmask); ++ unpack_128_2x128(vmask, (__m128i*)&vmask_lo, (__m128i*)&vmask_hi); ++ expand_alpha_rev_2x128(vmask_lo, vmask_hi, &vmask_lo, &vmask_hi); ++ in_over_2x128(&vsrc, &vsrc, &valpha, &valpha, &vmask_lo, &vmask_hi, ++ &vdst0, &vdst1); ++ } ++ ++ m = *((uint32_t*)mask); ++ mask += 1; ++ ++ if (m) ++ { ++ vmask = unpack_32_1x128(m); ++ vmask = __lsx_vilvl_b(mask_zero, vmask); ++ unpack_128_2x128(vmask, (__m128i*)&vmask_lo, (__m128i*)&vmask_hi); ++ expand_alpha_rev_2x128(vmask_lo, vmask_hi, &vmask_lo, &vmask_hi); ++ in_over_2x128(&vsrc, &vsrc, &valpha, &valpha, &vmask_lo, &vmask_hi, ++ &vdst2, &vdst3); ++ } ++ ++ __lsx_vst(pack_565_4x128_128(&vdst0, &vdst1, &vdst2, &vdst3), dst, 0); ++ ++ dst += 8; ++ } ++ ++ p = (void*)mask; ++ while (w--) { ++ m = *p++; ++ ++ if (m) { ++ d = *dst; ++ lsx_mask = expand_alpha_rev_1x128(unpack_32_1x128 (m)); ++ lsx_dest = expand565_16_1x128(d); ++ ++ *dst = pack_565_32_16(pack_1x128_32(in_over_1x128 (&lsx_src, ++ &lsx_alpha, &lsx_mask, &lsx_dest))); ++ } ++ dst++; ++ } ++ } ++} ++ ++static void ++lsx_composite_over_x888_8_8888 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t *src, *src_line, s; ++ uint32_t *dst, *dst_line, d; ++ uint8_t *mask_line, *p; ++ uint32_t *mask; ++ uint32_t m, w; ++ int src_stride, mask_stride, dst_stride; ++ ++ __m128i mask_zero = __lsx_vldi(0); ++ __m128i mask_00ff; ++ __m128i mask_4x32; ++ __m128i vsrc, vsrc_lo, vsrc_hi; ++ __m128i vdst, vdst_lo, vdst_hi; ++ __m128i vmask, vmask_lo, vmask_hi; ++ ++ mask_4x32 = __lsx_vreplgr2vr_w(0xff000000); ++ mask_00ff = __lsx_vreplgr2vr_h(0x00ff); ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ ++ while (height--) { ++ src = src_line; ++ src_line += src_stride; ++ dst = dst_line; ++ dst_line += dst_stride; ++ mask = (void*)mask_line; ++ mask_line += mask_stride; ++ w = width; ++ ++ while (w >= 4) { ++ m = *mask; ++ vsrc = __lsx_vld(src, 0); ++ src += 4; ++ w -= 4; ++ vsrc |= mask_4x32; ++ ++ if (m == 0xffffffff) { ++ __lsx_vst(vsrc, dst, 0); ++ } else { ++ vdst = __lsx_vld(dst, 0); ++ vmask = __lsx_vilvl_b(mask_zero, unpack_32_1x128(m)); ++ ++ unpack_128_2x128(vsrc, (__m128i*)&vsrc_lo, (__m128i*)&vsrc_hi); ++ unpack_128_2x128(vmask, (__m128i*)&vmask_lo, (__m128i*)&vmask_hi); ++ expand_alpha_rev_2x128(vmask_lo, vmask_hi, &vmask_lo, &vmask_hi); ++ unpack_128_2x128(vdst, (__m128i*)&vdst_lo, (__m128i*)&vdst_hi); ++ ++ in_over_2x128(&vsrc_lo, &vsrc_hi, &mask_00ff, &mask_00ff, ++ &vmask_lo, &vmask_hi, &vdst_lo, &vdst_hi); ++ ++ __lsx_vst(pack_2x128_128(vdst_lo, vdst_hi), dst, 0); ++ } ++ dst += 4; ++ mask += 1; ++ } ++ ++ p = (void*)mask; ++ while (w--) { ++ m = *p++; ++ ++ if (m) { ++ s = 0xff000000 | *src; ++ ++ if (m == 0xff) { ++ *dst = s; ++ } ++ else { ++ __m128i ma, md, ms; ++ d = *dst; ++ ma = expand_alpha_rev_1x128(unpack_32_1x128(m)); ++ md = unpack_32_1x128(d); ++ ms = unpack_32_1x128(s); ++ *dst = pack_1x128_32(in_over_1x128(&ms, &mask_00ff, &ma, &md)); ++ } ++ } ++ src++; ++ dst++; ++ } ++ } ++} ++ ++static void ++lsx_composite_over_8888_n_8888 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t *dst_line, *dst; ++ uint32_t *src_line, *src; ++ uint32_t mask, maska; ++ int32_t w; ++ int dst_stride, src_stride; ++ ++ __m128i vmask; ++ __m128i vsrc, vsrc_lo, vsrc_hi; ++ __m128i vdst, vdst_lo, vdst_hi; ++ __m128i valpha_lo, valpha_hi; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ ++ mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8); ++ maska = mask >> 24; ++ vmask = __lsx_vreplgr2vr_h(maska); ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ src = src_line; ++ src_line += src_stride; ++ w = width; ++ ++ while (w >= 4) { ++ vsrc = __lsx_vld(src, 0); ++ src += 4; ++ w -= 4; ++ ++ if (__lsx_bnz_v(vsrc)) { ++ vdst = __lsx_vld(dst, 0); ++ unpack_128_2x128(vsrc, (__m128i*)&vsrc_lo, (__m128i*)&vsrc_hi); ++ unpack_128_2x128(vdst, (__m128i*)&vdst_lo, (__m128i*)&vdst_hi); ++ expand_alpha_2x128(vsrc_lo, vsrc_hi, &valpha_lo, &valpha_hi); ++ ++ in_over_2x128(&vsrc_lo, &vsrc_hi, &valpha_lo, &valpha_hi, ++ &vmask, &vmask, &vdst_lo, &vdst_hi); ++ ++ __lsx_vst(pack_2x128_128(vdst_lo, vdst_hi), dst, 0); ++ } ++ dst += 4; ++ } ++ ++ while (w--) { ++ uint32_t s = *src++; ++ ++ if (s) { ++ uint32_t d = *dst; ++ __m128i ms = unpack_32_1x128(s); ++ __m128i alpha = expand_alpha_1x128(ms); ++ __m128i mask = vmask; ++ __m128i dest = unpack_32_1x128(d); ++ *dst = pack_1x128_32(in_over_1x128(&ms, &alpha, &mask, &dest)); ++ } ++ dst++; ++ } ++ } ++} ++ ++static void ++lsx_composite_over_x888_n_8888 (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t *dst_line, *dst; ++ uint32_t *src_line, *src; ++ uint32_t mask, maska; ++ int dst_stride, src_stride; ++ int32_t w; ++ ++ __m128i vmask, valpha, mask_4x32, mask_00ff; ++ __m128i vsrc, vsrc_lo, vsrc_hi; ++ __m128i vdst, vdst_lo, vdst_hi; ++ __m128i zero = __lsx_vldi(0); ++ ++ mask_4x32 = __lsx_vreplgr2vr_w(0xff000000); ++ mask_00ff = __lsx_vreplgr2vr_h(0x00ff); ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ ++ mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8); ++ maska = mask >> 24; ++ ++ vmask = __lsx_vreplgr2vr_h(maska); ++ valpha = mask_00ff; ++ ++ while (height--) { ++ dst = dst_line; ++ dst_line += dst_stride; ++ src = src_line; ++ src_line += src_stride; ++ w = width; ++ ++ while (w >= 4) { ++ vsrc = __lsx_vld(src, 0); ++ src += 4; ++ w -= 4; ++ vsrc = __lsx_vor_v(vsrc, mask_4x32); ++ vdst = __lsx_vld(dst, 0); ++ ++ unpack_128_2x128(vsrc, (__m128i*)&vsrc_lo, (__m128i*)&vsrc_hi); ++ unpack_128_2x128(vdst, (__m128i*)&vdst_lo, (__m128i*)&vdst_hi); ++ ++ in_over_2x128(&vsrc_lo, &vsrc_hi, &valpha, &valpha, ++ &vmask, &vmask, &vdst_lo, &vdst_hi); ++ ++ __lsx_vst(pack_2x128_128(vdst_lo, vdst_hi), dst, 0); ++ dst += 4; ++ } ++ ++ while (w--) { ++ uint32_t s = (*src++) | 0xff000000; ++ uint32_t d = *dst; ++ ++ __m128i alpha, tmask; ++ __m128i src = unpack_32_1x128 (s); ++ __m128i dest = unpack_32_1x128 (d); ++ ++ alpha = __lsx_vpickev_d(zero, valpha); ++ tmask = __lsx_vpickev_d(zero, vmask); ++ ++ *dst = pack_1x128_32(in_over_1x128(&src, &alpha, &tmask, &dest)); ++ ++ dst++; ++ } ++ } ++} ++ ++static void ++lsx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, ++ pixman_composite_info_t *info) ++{ ++ PIXMAN_COMPOSITE_ARGS (info); ++ uint32_t src; ++ uint16_t *dst_line, *dst, d; ++ uint32_t *mask_line, *mask, m; ++ int dst_stride, mask_stride; ++ int w, flag; ++ ++ __m128i vsrc, valpha; ++ __m128i lsx_src, lsx_alpha, lsx_mask, lsx_dest; ++ __m128i vmask, vmask_lo, vmask_hi; ++ __m128i vdst, vdst0, vdst1, vdst2, vdst3; ++ ++ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); ++ ++ if (src == 0) ++ return; ++ ++ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); ++ ++ lsx_src = expand_pixel_32_1x128(src); ++ lsx_alpha = expand_alpha_1x128(lsx_src); ++ ++ vsrc = expand_pixel_32_2x128(src); ++ valpha = expand_alphaa_2x128(vsrc); ++ ++ while (height--) { ++ mask = mask_line; ++ dst = dst_line; ++ mask_line += mask_stride; ++ dst_line += dst_stride; ++ w = width; ++ ++ while (w >= 8) { ++ vmask = __lsx_vld(mask, 0); ++ vdst = __lsx_vld(dst, 0); ++ w -= 8; ++ ++ unpack_565_128_4x128(vdst, &vdst0, &vdst1, &vdst2, &vdst3); ++ ++ flag = __lsx_bnz_v(vmask); ++ unpack_128_2x128(vmask, (__m128i*)&vmask_lo, (__m128i*)&vmask_hi); ++ vmask = __lsx_vld(mask, 16); ++ if (flag) { ++ in_over_2x128(&vsrc, &vsrc, &valpha, &valpha, &vmask_lo, &vmask_hi, ++ &vdst0, &vdst1); ++ } ++ ++ flag = __lsx_bnz_v(vmask); ++ unpack_128_2x128(vmask, (__m128i*)&vmask_lo, (__m128i*)&vmask_hi); ++ if (flag) { ++ in_over_2x128(&vsrc, &vsrc, &valpha, &valpha, &vmask_lo, &vmask_hi, ++ &vdst2, &vdst3); ++ } ++ ++ __lsx_vst(pack_565_4x128_128(&vdst0, &vdst1, &vdst2, &vdst3), dst, 0); ++ dst += 8; ++ mask += 8; ++ } ++ ++ while (w--) { ++ m = *(uint32_t *) mask; ++ ++ if (m) { ++ d = *dst; ++ lsx_mask = unpack_32_1x128(m); ++ lsx_dest = expand565_16_1x128(d); ++ *dst = pack_565_32_16(pack_1x128_32(in_over_1x128(&lsx_src, &lsx_alpha, ++ &lsx_mask, &lsx_dest))); ++ } ++ dst++; ++ mask++; ++ } ++ } ++} ++ ++static uint32_t * ++lsx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) ++{ ++ __m128i out0, out1, mask_4x32; ++ int w = iter->width; ++ uint32_t *dst = iter->buffer; ++ uint32_t *src = (uint32_t *)iter->bits; ++ iter->bits += iter->stride; ++ mask_4x32 = __lsx_vreplgr2vr_w(0xff000000); ++ ++ while (w >= 8) { ++ out0 = __lsx_vld(src, 0); ++ out1 = __lsx_vld(src, 16); ++ out0 = __lsx_vor_v(out0, mask_4x32); ++ out1 = __lsx_vor_v(out1, mask_4x32); ++ __lsx_vst(out0, dst, 0); ++ __lsx_vst(out1, dst, 16); ++ src += 8; ++ dst += 8; ++ w -= 8; ++ } ++ ++ if (w >= 4) { ++ __lsx_vst(__lsx_vor_v(__lsx_vld(src, 0), mask_4x32), dst, 0); ++ src += 4; ++ dst += 4; ++ w -= 4; ++ } ++ ++ while (w--) { ++ *dst++ = (*src++) | 0xff000000; ++ } ++ ++ return iter->buffer; ++} ++ ++static uint32_t * ++lsx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) ++{ ++ __m128i a, sa, s0, s1, s2, s3, s4; ++ __m128i mask_red, mask_green, mask_blue; ++ ++ int w = iter->width; ++ uint32_t *dst = iter->buffer; ++ uint16_t *src = (uint16_t *)iter->bits; ++ iter->bits += iter->stride; ++ ++ mask_red = __lsx_vreplgr2vr_h(248); ++ mask_green = __lsx_vreplgr2vr_h(252); ++ mask_blue = mask_red; ++ a = __lsx_vreplgr2vr_h(255) << 8; ++ ++ while (w >= 4) { ++ s0 = __lsx_vld(src, 0); ++ src += 4; ++ w -= 4; ++ //r ++ s1 = __lsx_vsrli_h(s0, 8); ++ s1 &= mask_red; ++ s2 = __lsx_vsrli_h(s1, 5); ++ s1 |= s2; ++ ++ //g ++ s2 = __lsx_vsrli_h(s0, 3); ++ s2 &= mask_green; ++ s3 = __lsx_vsrli_h(s2, 6); ++ s2 |= s3; ++ ++ //b ++ s3 = s0 << 3; ++ s3 &= mask_blue; ++ s4 = __lsx_vsrli_h(s3, 5); ++ s3 |= s4; ++ ++ //ar ++ sa = a | s1; ++ ++ //gb ++ s2 <<= 8; ++ s2 |= s3; ++ ++ s1 = __lsx_vilvl_h(sa, s2); ++ __lsx_vst(s1, dst, 0); ++ dst += 4; ++ } ++ ++ while (w--) { ++ uint16_t s = *src++; ++ *dst++ = convert_0565_to_8888(s); ++ } ++ ++ return iter->buffer; ++} ++ ++static uint32_t * ++lsx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) ++{ ++ __m128i srcv; ++ __m128i t0, t1, t2, t3; ++ __m128i dst0, dst1; ++ __m128i zero = __lsx_vldi(0); ++ int w = iter->width; ++ uint32_t *dst = iter->buffer; ++ uint8_t *src = iter->bits; ++ ++ iter->bits += iter->stride; ++ ++ while (w >= 16) { ++ srcv = __lsx_vld(src, 0); ++ src += 16; ++ w -= 16; ++ dst0 = __lsx_vilvl_b(srcv, zero); ++ dst1 = __lsx_vilvh_b(srcv, zero); ++ t0 = __lsx_vilvl_h(dst0, zero); ++ t1 = __lsx_vilvh_h(dst0, zero); ++ t2 = __lsx_vilvl_h(dst1, zero); ++ t3 = __lsx_vilvh_h(dst1, zero); ++ __lsx_vst(t0, dst, 0); ++ __lsx_vst(t1, dst, 16); ++ __lsx_vst(t2, dst, 32); ++ __lsx_vst(t3, dst, 48); ++ dst += 16; ++ } ++ ++ while (w >= 4) { ++ srcv = __lsx_vld(src, 0); ++ src += 4; ++ w -= 4; ++ dst0 = __lsx_vilvl_b(srcv, zero); ++ dst0 = __lsx_vilvl_h(dst0, zero); ++ __lsx_vst(dst0, dst, 0); ++ dst += 4; ++ } ++ ++ while (w--) { ++ *dst++ = *(src++) << 24; ++ } ++ ++ return iter->buffer; ++} ++ ++// fetch/store 8 bits ++static void lsx_fetch_scanline_a8 (bits_image_t *image, int x, int y, int width, ++ uint32_t *buffer, const uint32_t *mask) ++{ ++ uint8_t *bits = (uint8_t *)(image->bits + y * image->rowstride); ++ __m128i src; ++ __m128i t0, t1; ++ __m128i temp0, temp1, temp2, temp3; ++ __m128i zero = __lsx_vldi(0); ++ bits += x; ++ ++ while (width >= 16) { ++ src = __lsx_vld(bits, 0); ++ t0 = __lsx_vilvl_b(src, zero); ++ t1 = __lsx_vilvh_b(src, zero); ++ temp0 = __lsx_vilvl_h(t0, zero); ++ temp1 = __lsx_vilvh_h(t0, zero); ++ temp2 = __lsx_vilvl_h(t1, zero); ++ temp3 = __lsx_vilvh_h(t1, zero); ++ __lsx_vst(temp0, buffer, 0); ++ __lsx_vst(temp1, buffer, 16); ++ __lsx_vst(temp2, buffer, 32); ++ __lsx_vst(temp3, buffer, 48); ++ bits += 16, width -= 16, buffer += 16; ++ } ++ while (width >= 8) { ++ src = __lsx_vld(bits, 0); ++ t0 = __lsx_vilvl_b(src, zero); ++ temp0 = __lsx_vilvl_h(t0, zero); ++ temp1 = __lsx_vilvh_h(t0, zero); ++ __lsx_vst(temp0, buffer, 0); ++ __lsx_vst(temp1, buffer, 16); ++ bits += 8, width -= 8, buffer += 8; ++ } ++ while (width >= 4) { ++ src = __lsx_vld(bits, 0); ++ t0 = __lsx_vilvl_b(src, zero); ++ temp0 = __lsx_vilvl_h(t0, zero); ++ __lsx_vst(temp0, buffer, 0); ++ bits += 4; width -= 4; buffer += 4; ++ } ++ while(width--) { ++ *buffer++ = ((*bits++) << 24); ++ } ++} ++ ++static void lsx_store_scanline_a8 (bits_image_t *image, int x, int y, int width, ++ const uint32_t *values) ++{ ++ uint8_t *dest = (uint8_t *)(image->bits + y * image->rowstride); ++ __m128i src0, src1, src2, src3; ++ dest += x; ++ ++ while (width >= 16) { ++ src0 = __lsx_vld(values, 0); ++ src1 = __lsx_vld(values, 16); ++ src2 = __lsx_vld(values, 32); ++ src3 = __lsx_vld(values, 48); ++ src0 = __lsx_vsrli_w(src0, 24); ++ src1 = __lsx_vsrli_w(src1, 24); ++ src2 = __lsx_vsrli_w(src2, 24); ++ src3 = __lsx_vsrli_w(src3, 24); ++ src0 = __lsx_vpickev_h(src1, src0); ++ src1 = __lsx_vpickev_h(src3, src2); ++ src0 = __lsx_vpickev_b(src1, src0); ++ __lsx_vst(src0, dest, 0); ++ values += 16, width -= 16, dest += 16; ++ } ++ while (width >= 8) { ++ src0 = __lsx_vld(values, 0); ++ src1 = __lsx_vld(values, 16); ++ src0 = __lsx_vsrli_w(src0, 24); ++ src1 = __lsx_vsrli_w(src1, 24); ++ src0 = __lsx_vpickev_h(src1, src0); ++ src0 = __lsx_vpickev_b(src0, src0); ++ __lsx_vstelm_d(src0, dest, 0, 0); ++ values += 8; width -= 8; dest += 8; ++ } ++ while (width >= 4) { ++ src0 = __lsx_vld(values, 0); ++ src0 = __lsx_vsrli_w(src0, 24); ++ src0 = __lsx_vpickev_h(src0, src0); ++ src0 = __lsx_vpickev_b(src0, src0); ++ __lsx_vstelm_w(src0, dest, 0, 0); ++ values += 4; width -= 4; dest += 4; ++ } ++ while (width--) { ++ *dest++ = ((*values++) >> 24); ++ } ++} ++ ++static void lsx_fetch_scanline_a2r2g2b2 (bits_image_t *image, int x, int y, ++ int width, uint32_t *buffer, ++ const uint32_t *mask) ++{ ++ uint8_t *bits = (uint8_t *)(image->bits + y * image->rowstride); ++ uint32_t pixel, pixel0, pixel1, pixel2, pixel3; ++ ++ __m128i src; ++ __m128i t0, t1, t2, t3, t4, t5, t6, t7; ++ __m128i mask0 = __lsx_vreplgr2vr_b(0xc0); ++ __m128i mask1 = __lsx_vreplgr2vr_b(0x30); ++ __m128i mask2 = __lsx_vreplgr2vr_b(0x0c); ++ __m128i mask3 = __lsx_vreplgr2vr_b(0x03); ++ bits += x; ++ ++ while (width >= 16) { ++ src = __lsx_vld(bits, 0); ++ t0 = (src & mask0); t1 = (src & mask1); ++ t2 = (src & mask2); t3 = (src & mask3); ++ t0 |= __lsx_vsrli_b(t0, 2), t0 |= __lsx_vsrli_b(t0, 4); ++ t1 |= __lsx_vslli_b(t1, 2), t1 |= __lsx_vsrli_b(t1, 4); ++ t2 |= __lsx_vsrli_b(t2, 2), t2 |= __lsx_vslli_b(t2, 4); ++ t3 |= __lsx_vslli_b(t3, 2), t3 |= __lsx_vslli_b(t3, 4); ++ t4 = __lsx_vilvl_b(t0, t1); ++ t5 = __lsx_vilvh_b(t0, t1); ++ t6 = __lsx_vilvl_b(t2, t3); ++ t7 = __lsx_vilvh_b(t2, t3); ++ t0 = __lsx_vilvl_h(t4, t6); ++ t1 = __lsx_vilvh_h(t4, t6); ++ t2 = __lsx_vilvl_h(t5, t7); ++ t3 = __lsx_vilvh_h(t5, t7); ++ __lsx_vst(t0, buffer, 0); ++ __lsx_vst(t1, buffer, 16); ++ __lsx_vst(t2, buffer, 32); ++ __lsx_vst(t3, buffer, 48); ++ bits += 16, width -= 16, buffer += 16; ++ } ++ while (width >= 4) { ++ src = __lsx_vld(bits, 0); ++ t0 = (src & mask0); t1 = (src & mask1); ++ t2 = (src & mask2); t3 = (src & mask3); ++ t0 |= __lsx_vsrli_b(t0, 2), t0 |= __lsx_vsrli_b(t0, 4); ++ t1 |= __lsx_vslli_b(t1, 2), t1 |= __lsx_vsrli_b(t1, 4); ++ t2 |= __lsx_vsrli_b(t2, 2), t2 |= __lsx_vslli_b(t2, 4); ++ t3 |= __lsx_vslli_b(t3, 2), t3 |= __lsx_vslli_b(t3, 4); ++ t4 = __lsx_vilvl_b(t0, t1); ++ t5 = __lsx_vilvl_b(t2, t3); ++ t0 = __lsx_vilvl_h(t4, t5); ++ __lsx_vst(t0, buffer, 0); ++ bits += 4, width -= 4, buffer += 4; ++ } ++ while (width--) { ++ pixel = *bits++; ++ // a ++ pixel0 = pixel & 192; ++ pixel0 |= (pixel0 >> 2); ++ pixel0 |= (pixel0 >> 4); ++ pixel0 <<= 24; ++ // r ++ pixel1 = pixel & 48; ++ pixel1 |= (pixel1 << 2); ++ pixel1 |= (pixel1 >> 4); ++ pixel1 <<= 16; ++ // g ++ pixel2 = pixel & 12; ++ pixel2 |= (pixel2 >> 2); ++ pixel2 |= (pixel2 << 4); ++ pixel2 <<= 8; ++ // b ++ pixel3 = pixel & 3; ++ pixel3 |= (pixel3 << 2); ++ pixel3 |= (pixel3 << 4); ++ *buffer++ = (pixel3 | pixel2 | pixel1 | pixel0); ++ } ++} ++ ++static void lsx_store_scanline_a2r2g2b2 (bits_image_t *image, int x, int y, ++ int width, const uint32_t *values) ++{ ++ uint8_t *dest = (uint8_t *)(image->bits + y * image->rowstride); ++ __m128i src, d0; ++ __m128i mask = __lsx_vreplgr2vr_b(0xc0); ++ __m128i shuf = __lsx_vreplgr2vr_w(0x0F0B0703); ++ ++ dest += x; ++ ++ while (width >= 4) { ++ src = __lsx_vld(values, 0); ++ src = __lsx_vand_v(src, mask); ++ src = __lsx_vor_v(src, __lsx_vslli_w(src, 6)); ++ src = __lsx_vor_v(src, __lsx_vslli_w(src, 12)); ++ d0 = __lsx_vshuf_b(src, src, shuf); ++ __lsx_vstelm_w(d0, dest, 0, 0); ++ dest += 4; ++ values += 4; ++ width -= 4; ++ } ++ ++ while (width--) { ++ uint32_t pixel = *values++; ++ pixel &= 0xc0c0c0c0; ++ pixel |= (pixel << 6); ++ pixel |= (pixel << 12); ++ pixel >>= 24; ++ *dest++ = pixel; ++ } ++} ++ ++// fetch/store 16 bits ++static void lsx_fetch_scanline_a1r5g5b5 (bits_image_t *image, int x, int y, ++ int width, uint32_t *buffer, ++ const uint32_t *mask) ++{ ++ uint16_t *bits = (uint16_t *)(image->bits + y * image->rowstride); ++ uint32_t pixel, pixel0, pixel1, pixel2; ++ ++ __m128i src; ++ __m128i t, t0, t1, t2, t3; ++ __m128i mask0 = __lsx_vreplgr2vr_h(0x001f); ++ bits += x; ++ ++ while (width >= 4) { ++ src = __lsx_vld(bits, 0); ++ t0 = (src & mask0); ++ t0 = __lsx_vslli_h(t0, 3); ++ t = __lsx_vsrli_h(t0, 5); ++ t0 |= t; ++ t1 = __lsx_vsrli_h(src, 5); ++ t1 &= mask0; ++ t1 = __lsx_vslli_h(t1, 3); ++ t = __lsx_vsrli_h(t1, 5); ++ t1 |= t; ++ t2 = __lsx_vsrli_h(src, 10); ++ t2 &= mask0; ++ t2 = __lsx_vslli_h(t2, 3); ++ t = __lsx_vsrli_h(t2, 5); ++ t2 |= t; ++ t3 = __lsx_vsrli_h(src, 15); ++ t = __lsx_vslli_h(t3, 1); ++ t3 |= t; ++ t = __lsx_vslli_h(t3, 2); ++ t3 |= t; ++ t = __lsx_vslli_h(t3, 4); ++ t3 |= t; ++ t1 <<= 8; ++ t0 |= t1; ++ t3 <<= 8; ++ t2 |= t3; ++ t1 = __lsx_vilvl_h(t2, t0); ++ __lsx_vst(t1, buffer, 0); ++ bits += 4, width -= 4, buffer += 4; ++ } ++ ++ while (width--) { ++ pixel = *bits++; ++ // a ++ pixel0 = pixel >> 15; ++ pixel0 <<= 7; ++ pixel0 |= (pixel0 >> 1); ++ pixel0 |= (pixel0 >> 2); ++ pixel0 |= (pixel0 >> 4); ++ pixel0 <<= 24; ++ // r ++ pixel1 = pixel >> 10; ++ pixel1 &= 31; ++ pixel1 <<= 3; ++ pixel1 |= (pixel1 >> 5); ++ pixel1 <<= 16; ++ // g ++ pixel2 = pixel >> 5; ++ pixel2 &= 31; ++ pixel2 <<= 3; ++ pixel2 |= (pixel2 >> 5); ++ pixel2 <<= 8; ++ // b ++ pixel &= 31; ++ pixel <<= 3; ++ pixel |= (pixel >> 5); ++ *buffer++ = (pixel0 | pixel1 | pixel2 | pixel); ++ } ++} ++ ++static void lsx_store_scanline_a1r5g5b5 (bits_image_t *image, int x, int y, ++ int width, const uint32_t *values) ++{ ++ uint16_t *dest = (uint16_t *)(image->bits + y * image->rowstride); ++ uint32_t pixel, pixel0, pixel1, pixel2, pixel3; ++ __m128i in0, d0, t0, t1, t2, t3; ++ __m128i mask0 = { 0x0000800000008000, 0x0000800000008000}; ++ __m128i mask1 = { 0x00007c0000007c00, 0x00007c0000007c00}; ++ __m128i mask2 = { 0x000003e0000003e0, 0x000003e0000003e0}; ++ __m128i mask3 = { 0x0000001f0000001f, 0x0000001f0000001f}; ++ __m128i mask4 = { 0x0006000400020000, 0x0006000400020000}; ++ ++ dest += x; ++ ++ while(width >= 4) { ++ in0 = __lsx_vld(values, 0); ++ t0 = __lsx_vsrli_w(in0, 16); ++ t1 = __lsx_vsrli_w(in0, 9); ++ t2 = __lsx_vsrli_w(in0, 6); ++ t3 = __lsx_vsrli_w(in0, 3); ++ t0 = __lsx_vand_v(t0, mask0); ++ t1 = __lsx_vand_v(t1, mask1); ++ t2 = __lsx_vand_v(t2, mask2); ++ t3 = __lsx_vand_v(t3, mask3); ++ d0 = __lsx_vor_v(__lsx_vor_v(t0, t1), __lsx_vor_v(t2, t3)); ++ d0 = __lsx_vshuf_h(mask4, d0, d0); ++ __lsx_vstelm_d(d0, dest, 0, 0); ++ values += 4, width -= 4, dest += 4; ++ } ++ ++ while(width--) { ++ pixel = *values++; ++ pixel0 = pixel >> 16; ++ pixel1 = pixel >> 9; ++ pixel2 = pixel >> 6; ++ pixel3 = pixel >> 3; ++ pixel0 &= 0x8000; ++ pixel1 &= 0x7c00; ++ pixel2 &= 0x03e0; ++ pixel3 &= 0x001f; ++ *dest++ = (pixel0 | pixel1 | pixel2 | pixel3); ++ } ++} ++ ++static void lsx_fetch_scanline_a4r4g4b4 (bits_image_t *image, int x, int y, ++ int width, uint32_t *buffer, ++ const uint32_t *mask) ++{ ++ uint16_t *bits = (uint16_t *)(image->bits + y * image->rowstride); ++ uint32_t pixel, pixel0, pixel1, pixel2; ++ ++ __m128i src; ++ __m128i t, t0, t1, t2, t3; ++ ++ __m128i mask0 = __lsx_vreplgr2vr_h(0x000f); ++ bits += x; ++ ++ while (width >= 4) { ++ src = __lsx_vld(bits, 0); ++ t0 = __lsx_vsrli_h(src, 12); ++ t = (t0 << 4), t0 |= t; ++ t1 = __lsx_vsrli_h(src, 8); ++ t1 &= mask0, t = (t1 << 4), t1 |= t; ++ t2 = __lsx_vsrli_h(src, 4); ++ t2 &= mask0, t = (t2 << 4), t2 |= t; ++ t3 = (src & mask0), t = (t3 << 4), t3 |= t; ++ t0 <<= 8, t2 <<= 8, t0 |= t1, t2 |= t3; ++ t1 = __lsx_vilvl_h(t0, t2); ++ __lsx_vst(t1, buffer, 0); ++ bits += 4, width -= 4, buffer += 4; ++ } ++ ++ while (width--) { ++ pixel = *bits++; ++ // a ++ pixel0 = pixel >> 12; ++ pixel0 |= (pixel0 << 4); ++ pixel0 <<= 24; ++ // r ++ pixel1 = pixel >> 8; ++ pixel1 &= 15; ++ pixel1 |= (pixel1 << 4); ++ pixel1 <<= 16; ++ // g ++ pixel2 = pixel >> 4; ++ pixel2 &= 15; ++ pixel2 |= (pixel2 << 4); ++ pixel2 <<= 8; ++ // b ++ pixel &= 15; ++ pixel |= (pixel << 4); ++ *buffer++ = (pixel0 | pixel1 | pixel2 | pixel); ++ } ++} ++ ++static void lsx_store_scanline_a4r4g4b4 (bits_image_t *image, int x, int y, ++ int width, const uint32_t *values) ++{ ++ uint16_t *dest = (uint16_t *)(image->bits + y * image->rowstride); ++ uint32_t pixel, pixel0, pixel1; ++ __m128i in0, d0, t0, t1; ++ __m128i mask0 = __lsx_vreplgr2vr_h(0xf0f0); ++ __m128i mask1 = __lsx_vreplgr2vr_h(0x00ff); ++ __m128i mask2 = { 0x0006000400020000, 0x0006000400020000 }; ++ dest += x; ++ ++ while(width >= 4) { ++ in0 = __lsx_vld(values, 0); ++ in0 = __lsx_vand_v(in0, mask0); ++ t0 = __lsx_vsrli_w(in0, 4); ++ t1 = __lsx_vsrli_w(in0, 8); ++ t0 = __lsx_vor_v(t0, t1); ++ t0 = __lsx_vand_v(t0, mask1); ++ t0 = __lsx_vor_v(t0, __lsx_vsrli_w(t0, 8)); ++ d0 = __lsx_vshuf_h(mask2, t0, t0); ++ __lsx_vstelm_d(d0, dest, 0, 0); ++ values += 4, width -= 4, dest += 4; ++ } ++ ++ while(width--) { ++ pixel = *values++; ++ pixel &= 0xf0f0f0f0; ++ pixel0 = (pixel >> 4); ++ pixel1 = (pixel >> 8); ++ pixel0 |= pixel1; ++ pixel0 &= 0x00ff00ff; ++ pixel0 |= (pixel0 >> 8); ++ pixel0 &= 0xffff; ++ *dest++ = pixel0; ++ } ++} ++ ++static const pixman_fast_path_t lsx_fast_paths[] = ++{ ++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, lsx_composite_over_n_8_8888), ++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, lsx_composite_over_n_8_8888), ++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, lsx_composite_over_n_8_8888), ++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, lsx_composite_over_n_8_8888), ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, lsx_composite_over_n_8888_0565_ca), ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, lsx_composite_over_n_8888_0565_ca), ++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, lsx_composite_over_x888_n_8888), ++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, lsx_composite_over_x888_n_8888), ++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, lsx_composite_over_x888_n_8888), ++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, lsx_composite_over_x888_n_8888), ++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, lsx_composite_over_8888_n_8888), ++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, lsx_composite_over_8888_n_8888), ++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, lsx_composite_over_8888_n_8888), ++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, lsx_composite_over_8888_n_8888), ++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, lsx_composite_over_x888_8_8888), ++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, lsx_composite_over_x888_8_8888), ++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, lsx_composite_over_x888_8_8888), ++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, lsx_composite_over_x888_8_8888), ++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, lsx_composite_over_n_8_0565), ++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, lsx_composite_over_n_8_0565), ++ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, lsx_composite_src_x888_8888), ++ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, lsx_composite_src_x888_8888), ++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, lsx_composite_over_8888_0565), ++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, lsx_composite_over_8888_0565), ++ PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, lsx_composite_over_n_0565), ++ PIXMAN_STD_FAST_PATH (OVER, solid, null, b5g6r5, lsx_composite_over_n_0565), ++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, lsx_composite_over_8888_8888), ++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, lsx_composite_over_8888_8888), ++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, lsx_composite_over_8888_8888), ++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, lsx_composite_over_8888_8888), ++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, lsx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, lsx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, lsx_composite_over_n_8888_8888_ca), ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, lsx_composite_over_n_8888_8888_ca), ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, lsx_composite_over_n_8888_8888_ca), ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, lsx_composite_over_n_8888_8888_ca), ++ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, lsx_composite_over_reverse_n_8888), ++ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, lsx_composite_over_reverse_n_8888), ++ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, lsx_composite_add_8_8), ++ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, lsx_composite_add_n_8_8), ++ PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, lsx_composite_add_n_8), ++ PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, lsx_composite_add_n_8888), ++ PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, lsx_composite_add_n_8888), ++ PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, lsx_composite_add_n_8888), ++ PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, lsx_composite_add_n_8888), ++ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, lsx_composite_add_8888_8888), ++ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, lsx_composite_add_8888_8888), ++ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, lsx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, lsx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, lsx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, lsx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, lsx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, lsx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, lsx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, lsx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, lsx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, lsx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, lsx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, lsx_composite_copy_area), ++ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, lsx_composite_src_x888_0565), ++ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, lsx_composite_src_x888_0565), ++ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, lsx_composite_src_x888_0565), ++ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, lsx_composite_src_x888_0565), ++ PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, lsx_composite_in_n_8_8), ++ PIXMAN_STD_FAST_PATH (IN, a8, null, a8, lsx_composite_in_8_8), ++ { PIXMAN_OP_NONE }, ++}; ++ ++#define IMAGE_FLAGS \ ++ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ ++ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) ++ ++static const pixman_iter_info_t lsx_iters[] = ++{ ++ { ++ PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, ++ _pixman_iter_init_bits_stride, lsx_fetch_x8r8g8b8, NULL ++ }, ++ { ++ PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW, ++ _pixman_iter_init_bits_stride, lsx_fetch_r5g6b5, NULL ++ }, ++ { ++ PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW, ++ _pixman_iter_init_bits_stride, lsx_fetch_a8, NULL ++ }, ++ { PIXMAN_null }, ++}; ++ ++pixman_implementation_t * ++_pixman_implementation_create_lsx (pixman_implementation_t *fallback) ++{ ++ pixman_implementation_t *imp = ++ _pixman_implementation_create (fallback, lsx_fast_paths); ++ ++ /* Set up function pointers */ ++ imp->combine_32[PIXMAN_OP_SRC] = lsx_combine_src_u; ++ imp->combine_32[PIXMAN_OP_OVER] = lsx_combine_over_u; ++ imp->combine_32[PIXMAN_OP_OVER_REVERSE] = lsx_combine_over_reverse_u; ++ imp->combine_32[PIXMAN_OP_OUT] = lsx_combine_out_u; ++ imp->combine_32[PIXMAN_OP_OUT_REVERSE] = lsx_combine_out_reverse_u; ++ imp->combine_32[PIXMAN_OP_ADD] = lsx_combine_add_u; ++ imp->combine_32[PIXMAN_OP_DISJOINT_SRC] = lsx_combine_src_u; ++ imp->combine_32[PIXMAN_OP_CONJOINT_SRC] = lsx_combine_src_u; ++ imp->combine_32[PIXMAN_OP_MULTIPLY] = lsx_combine_multiply_u; ++ imp->combine_32_ca[PIXMAN_OP_SRC] = lsx_combine_src_ca; ++ imp->combine_32_ca[PIXMAN_OP_OVER] = lsx_combine_over_ca; ++ imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = lsx_combine_out_reverse_ca; ++ ++ imp->blt = lsx_blt; ++ imp->fill = lsx_fill; ++ imp->iter_info = lsx_iters; ++ ++ return imp; ++} ++ ++void setup_accessors_lsx (bits_image_t *image) ++{ ++ if (image->format == PIXMAN_a8) { // 8 bits ++ image->fetch_scanline_32 = lsx_fetch_scanline_a8; ++ image->store_scanline_32 = lsx_store_scanline_a8; ++ } else if (image->format == PIXMAN_a2r2g2b2) { ++ image->fetch_scanline_32 = lsx_fetch_scanline_a2r2g2b2; ++ image->store_scanline_32 = lsx_store_scanline_a2r2g2b2; ++ } else if (image->format == PIXMAN_a1r5g5b5) { // 16 bits ++ image->fetch_scanline_32 = lsx_fetch_scanline_a1r5g5b5; ++ image->store_scanline_32 = lsx_store_scanline_a1r5g5b5; ++ } else if (image->format == PIXMAN_a4r4g4b4) { ++ image->fetch_scanline_32 = lsx_fetch_scanline_a4r4g4b4; ++ image->store_scanline_32 = lsx_store_scanline_a4r4g4b4; ++ } ++} +diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h +index f43e87f..7b43d7e 100644 +--- a/pixman/pixman-private.h ++++ b/pixman/pixman-private.h +@@ -655,6 +655,20 @@ pixman_implementation_t * + _pixman_implementation_create_vmx (pixman_implementation_t *fallback); + #endif + ++#ifdef USE_LOONGARCH_LSX ++pixman_implementation_t * ++_pixman_implementation_create_lsx (pixman_implementation_t *fallback); ++ ++void setup_accessors_lsx (bits_image_t *image); ++#endif ++ ++#ifdef USE_LOONGARCH_LASX ++pixman_implementation_t * ++_pixman_implementation_create_lasx (pixman_implementation_t *fallback); ++ ++void setup_accessors_lasx (bits_image_t *image); ++#endif ++ + pixman_bool_t + _pixman_implementation_disabled (const char *name); + +@@ -670,6 +684,11 @@ _pixman_ppc_get_implementations (pixman_implementation_t *imp); + pixman_implementation_t * + _pixman_mips_get_implementations (pixman_implementation_t *imp); + ++pixman_implementation_t * ++_pixman_loongarch_get_implementations (pixman_implementation_t *imp); ++ ++void setup_loongarch_accessors (bits_image_t *image); ++ + pixman_implementation_t * + _pixman_choose_implementation (void); + +-- +GitLab + diff --git a/pixman/PKGBUILD b/pixman/PKGBUILD index 85c3f5b945..5358758bb8 100644 --- a/pixman/PKGBUILD +++ b/pixman/PKGBUILD @@ -12,10 +12,17 @@ license=('MIT') depends=('glibc') makedepends=('meson' 'libpng') provides=('libpixman-1.so') -source=(https://xorg.freedesktop.org/releases/individual/lib/${pkgname}-${pkgver}.tar.xz) -sha512sums=('1a1d21b86b3c6784c4c2606b7069723b4dbb747eac9fce95bca86516787840379ffd49abc42d11e7143e85c32c85496f33c2562c7a910ca6f963599affdc3224') +source=(https://xorg.freedesktop.org/releases/individual/lib/${pkgname}-${pkgver}.tar.xz + 83.patch) +sha512sums=('1a1d21b86b3c6784c4c2606b7069723b4dbb747eac9fce95bca86516787840379ffd49abc42d11e7143e85c32c85496f33c2562c7a910ca6f963599affdc3224' + '0b7970cd955e31211fa55882974bbb321b0e7421da5c06a2cff196ea3e0efcefce50b4ba5f19a23417aaea2145529c762800dbb5887dfdcfa240efe400d44f7f') #validpgpkeys=('') # Maarten Lankhorst +prepare() { + cd "$pkgname-$pkgver" + patch -p1 -i "$srcdir/83.patch" +} + build() { arch-meson $pkgname-$pkgver build \ -D loongson-mmi=disabled \ @@ -25,7 +32,12 @@ build() { -D a64-neon=disabled \ -D iwmmxt=disabled \ -D mips-dspr2=disabled \ - -D gtk=disabled + -D gtk=disabled \ + -D lsx=enabled \ + -D b_lto=false \ + -D mmx=disabled \ + -D ssse3=disabled \ + -D sse2=disabled ninja -C build } diff --git a/pkgfile/PKGBUILD b/pkgfile/PKGBUILD index b64b343932..299785ffd1 100644 --- a/pkgfile/PKGBUILD +++ b/pkgfile/PKGBUILD @@ -8,11 +8,18 @@ arch=('loong64' 'x86_64') url="https://github.com/falconindy/pkgfile" license=('MIT') depends=('libarchive' 'curl' 'pcre' 'pacman') -makedepends=('git' 'meson') -source=("git+https://github.com/falconindy/pkgfile?signed#tag=v$pkgver") +makedepends=('git' 'meson' 'clang' 'systemd') +source=("git+https://github.com/falconindy/pkgfile?signed#tag=v$pkgver" + pkgfile-use-loong64.patch) validpgpkeys=('487EACC08557AD082088DABA1EB2638FF56C0C53') # Dave Reisner install=pkgfile.install -md5sums=('SKIP') +md5sums=('SKIP' + 'b300e43eaa26d4b46ee37ae21a92d262') + +prepare() { + cd "$pkgname" + patch -p1 -i $srcdir/pkgfile-use-loong64.patch +} build() { cd "$pkgname" diff --git a/pkgfile/pkgfile-use-loong64.patch b/pkgfile/pkgfile-use-loong64.patch new file mode 100644 index 0000000000..53b8ce8ad3 --- /dev/null +++ b/pkgfile/pkgfile-use-loong64.patch @@ -0,0 +1,17 @@ +diff --git a/src/update.c b/src/update.c +index 7a225b5..b2d84b7 100644 +--- a/src/update.c ++++ b/src/update.c +@@ -628,7 +628,11 @@ int pkgfile_update(struct repovec_t *repos, struct config_t *config) { + if (repos->architecture == NULL) { + struct utsname un; + uname(&un); +- repos->architecture = strdup(un.machine); ++ if (strcmp(un.machine, "loongarch64") == 0) { ++ repos->architecture = strdup("loong64"); ++ } else { ++ repos->architecture = strdup(un.machine); ++ } + } + + /* ensure all our DBs are 0644 */ diff --git a/plasma-desktop/PKGBUILD b/plasma-desktop/PKGBUILD index fd8c3de896..bc5201562a 100644 --- a/plasma-desktop/PKGBUILD +++ b/plasma-desktop/PKGBUILD @@ -19,7 +19,8 @@ optdepends=('plasma-nm: Network manager applet' 'kaccounts-integration: OpenDesktop integration plugin' 'packagekit-qt5: to install new krunner plugins') makedepends=(extra-cmake-modules kdoctools5 xf86-input-evdev xf86-input-synaptics xf86-input-libinput xorg-server-devel - scim kdesignerplugin kaccounts-integration intltool packagekit-qt5 kinit wayland-protocols libibus) + scim kdesignerplugin kaccounts-integration intltool packagekit-qt5 kinit wayland-protocols libibus + libxkbfile) groups=(plasma) sha256sums=('de015fc921d34da23d85998a03afa7c81d935f5d9c55261ff7a2b413c9cfd09f' 'SKIP') diff --git a/pngquant/PKGBUILD b/pngquant/PKGBUILD index 5c634c98a8..e6bd06cf9f 100644 --- a/pngquant/PKGBUILD +++ b/pngquant/PKGBUILD @@ -16,7 +16,7 @@ sha256sums=('ddd8889a9c269ba454d0c5e4f7167948d55d77c4570b23f671809fd3a68b6822') prepare() { cd "$srcdir/$pkgname-$pkgver" sed -i 's|, path = "lib/imagequant-sys"||' Cargo.toml - cargo fetch --target "$CARCH-unknown-linux-gnu" + cargo fetch --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/polkit-qt/PKGBUILD b/polkit-qt/PKGBUILD index 64fc806339..b4c2d7b602 100644 --- a/polkit-qt/PKGBUILD +++ b/polkit-qt/PKGBUILD @@ -6,7 +6,7 @@ pkgbase=polkit-qt pkgname=(polkit-qt5 polkit-qt6) pkgver=0.175.0 -pkgrel=1 +pkgrel=2 pkgdesc='A library that allows developers to access PolicyKit API with a nice Qt-style API' arch=(loong64 x86_64) url='https://www.kde.org/' diff --git a/polkit/PKGBUILD b/polkit/PKGBUILD index bfaae776b6..328e6e8967 100644 --- a/polkit/PKGBUILD +++ b/polkit/PKGBUILD @@ -47,6 +47,7 @@ build() { -D examples=true -D gtk_doc=true -D man=true + -D js_engine=duktape -D os_type=redhat -D polkitd_uid=102 -D polkitd_user=polkitd diff --git a/portmidi/PKGBUILD b/portmidi/PKGBUILD index 2747d797ed..ebfb53d597 100644 --- a/portmidi/PKGBUILD +++ b/portmidi/PKGBUILD @@ -32,6 +32,8 @@ build() { -W no-dev ) + CFLAGS=${CFLAGS/-Werror=format-security/} + CXXFLAGS=${CXXFLAGS/-Werror=format-security/} cmake "${cmake_options[@]}" cmake --build build --verbose } diff --git a/postgresql/PKGBUILD b/postgresql/PKGBUILD index 7aee2aaf96..e265ffb0de 100644 --- a/postgresql/PKGBUILD +++ b/postgresql/PKGBUILD @@ -22,7 +22,8 @@ source=(https://ftp.postgresql.org/pub/source/v${pkgver}/postgresql-${pkgver}.ta postgresql.sysusers postgresql.tmpfiles libxml2-2.12.patch - openssl3.2.patch) + openssl3.2.patch + add-loongarch-support.patch) md5sums=('9cbfb9076ed06384471802b850698a6d' '6ce1dab3da98a10f9190e6b3037f93aa' '632e22e96d6ace85b76a380487cfbf8c' @@ -65,6 +66,7 @@ prepare() { patch -p1 < ../0002-Force-RPATH-to-be-used-for-the-PL-Perl-plugin.patch patch -p1 < ../libxml2-2.12.patch patch -p1 < ../openssl3.2.patch + patch -p1 < ../add-loongarch-support.patch } build() { diff --git a/postgresql/add-loongarch-support.patch b/postgresql/add-loongarch-support.patch new file mode 100644 index 0000000000..50d23f13cc --- /dev/null +++ b/postgresql/add-loongarch-support.patch @@ -0,0 +1,13 @@ +Index: postgresql-13.6/src/include/storage/s_lock.h +=================================================================== +--- postgresql-13.6.orig/src/include/storage/s_lock.h ++++ postgresql-13.6/src/include/storage/s_lock.h +@@ -321,7 +321,7 @@ + * We use the int-width variant of the builtin because it works on more chips + * than other widths. + */ +-#if defined(__arm__) || defined(__arm) || defined(__aarch64__) || defined(__aarch64) ++#if defined(__arm__) || defined(__arm) || defined(__aarch64__) || defined(__aarch64) || defined(__loongarch64) + #ifdef HAVE_GCC__SYNC_INT32_TAS + #define HAS_TEST_AND_SET + diff --git a/ppsspp/PKGBUILD b/ppsspp/PKGBUILD index 006c4e1d75..bc149f958c 100644 --- a/ppsspp/PKGBUILD +++ b/ppsspp/PKGBUILD @@ -50,6 +50,7 @@ source=( git+https://github.com/KhronosGroup/SPIRV-Cross.git ppsspp-sdl.desktop ppsspp-qt.desktop + ppsspp-la64.patch ) b2sums=('SKIP' 'SKIP' @@ -83,6 +84,8 @@ prepare() { git config submodule.${submodule}.url ../${submodule#*/} git -c protocol.file.allow=always submodule update ${submodule} done + patch -p1 -i $srcdir/ppsspp-la64.patch + cd ext/armips for submodule in ext/filesystem; do git submodule init ${submodule} @@ -94,6 +97,8 @@ prepare() { build() { export CC=clang export CXX=clang++ + CFLAGS=${CFLAGS/-fstack-clash-protection/} + CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/} cmake -S ppsspp -B build-sdl -G Ninja \ -DCMAKE_BUILD_TYPE=None \ -DCMAKE_SKIP_RPATH=ON \ @@ -101,6 +106,7 @@ build() { -DOpenGL_GL_PREFERENCE=GLVND \ -DUSE_SYSTEM_LIBZIP=ON \ -DUSE_SYSTEM_SNAPPY=ON \ + -DUSE_SYSTEM_FFMPEG=ON \ -DUSE_SYSTEM_ZSTD=ON \ -DUSING_QT_UI=OFF \ -Wno-dev @@ -153,3 +159,16 @@ package_ppsspp-assets() { } # vim: ts=2 sw=2 et: +b2sums=('SKIP' + 'SKIP' + 'SKIP' + 'SKIP' + 'SKIP' + 'SKIP' + 'SKIP' + 'SKIP' + 'SKIP' + 'SKIP' + 'c6bcdfedee866dfdcc82a8c333c31ff73ed0beec65b63acec8bc8186383c0bc9f0912f21bb9715b665e8dc1793b1a85599761f9037856fa54ad8aa3bfdbfd468' + '328e2ba47b78d242b0ec6ba6bfa039c77a36d1ef7246e5c2c2432d8e976e9360baf505eb05f48408ede1a30545cbbb7f875bf5ebd0252cef35523d449b8254a0' + '7010f5fed4a64ffba2479d2c190b3dcc0b7518343cb01973347ca1c8e577af07fe01a24a5864403d77c632a0e7324c5cc0c5dd244328a3422cdab97eb665ba9d') diff --git a/primecount/PKGBUILD b/primecount/PKGBUILD index f20b02fd98..556584455c 100644 --- a/primecount/PKGBUILD +++ b/primecount/PKGBUILD @@ -21,7 +21,7 @@ build() { -DBUILD_STATIC_LIBS=OFF \ -DBUILD_SHARED_LIBS=ON \ -DWITH_POPCNT=OFF \ - -DWITH_FLOAT128=ON + -DWITH_FLOAT128=OFF cmake --build build } diff --git a/procs/PKGBUILD b/procs/PKGBUILD index b4d5fa5d03..a37feee078 100644 --- a/procs/PKGBUILD +++ b/procs/PKGBUILD @@ -17,7 +17,7 @@ sha256sums=('77c5f5d3bdfc9cef870732500ef58c203a1464f924b12f79c7d9e301b4dd5b16') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/progpick/PKGBUILD b/progpick/PKGBUILD index 3c3951f267..feb622d885 100644 --- a/progpick/PKGBUILD +++ b/progpick/PKGBUILD @@ -16,7 +16,7 @@ validpgpkeys=("64B13F7117D6E07D661BBCE0FE763A64F5E54FD6") prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/prometheus-memcached-exporter/PKGBUILD b/prometheus-memcached-exporter/PKGBUILD index 6a5ad8b737..30f785d13d 100644 --- a/prometheus-memcached-exporter/PKGBUILD +++ b/prometheus-memcached-exporter/PKGBUILD @@ -31,6 +31,9 @@ check() { build() { cd memcached_exporter-$pkgver + go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664 + go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305 + go mod tidy go build \ -trimpath \ -buildmode=pie \ diff --git a/prometheus-mysqld-exporter/PKGBUILD b/prometheus-mysqld-exporter/PKGBUILD index ec02b1f2b5..46da148c9e 100644 --- a/prometheus-mysqld-exporter/PKGBUILD +++ b/prometheus-mysqld-exporter/PKGBUILD @@ -22,6 +22,9 @@ sha512sums=('015f116f7da8da2b74605b3b8f2ce693d1c99673834a63e8c003a7ddf7718b27cfc build() { cd "mysqld_exporter-${pkgver}" + go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664 + go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305 + go mod tidy go build \ -trimpath \ -buildmode=pie \ diff --git a/prometheus-wireguard-exporter/PKGBUILD b/prometheus-wireguard-exporter/PKGBUILD index a113c091d4..90cf148bbd 100644 --- a/prometheus-wireguard-exporter/PKGBUILD +++ b/prometheus-wireguard-exporter/PKGBUILD @@ -23,7 +23,7 @@ sha512sums=('81b6fa1b0fa0bef17a97d841b2fe42c88a80d4c5d0bca8d82aaf24e24b2eeefdac6 prepare() { cd "prometheus_wireguard_exporter-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/protobuf-c/PKGBUILD b/protobuf-c/PKGBUILD index 3fe73c29f3..6a4a643967 100644 --- a/protobuf-c/PKGBUILD +++ b/protobuf-c/PKGBUILD @@ -17,6 +17,7 @@ sha256sums=('7b404c63361ed35b3667aec75cc37b54298d56dd2bcf369de3373212cc06fd98') build() { cd "$pkgname-$pkgver" + export LDFLAGS="-Wl,--no-as-needed" ./configure --prefix=/usr --disable-static make } diff --git a/psiconv/PKGBUILD b/psiconv/PKGBUILD index 3a908ce3d7..0e10c08b6a 100644 --- a/psiconv/PKGBUILD +++ b/psiconv/PKGBUILD @@ -12,7 +12,8 @@ license=('GPL') depends=('graphicsmagick') makedepends=('bc') backup=('etc/psiconv/psiconv.conf') -source=("http://www.frodo.looijaard.name/system/files/software/${pkgname}/${pkgname}-${pkgver}.tar.gz" psiconv.patch) +source=("http://www.frodo.looijaard.name/system/files/software/${pkgname}/${pkgname}-${pkgver}.tar.gz" psiconv.patch + psiconv-fix-build.patch) md5sums=('286e427b10f4d10aaeef1944210a2ea6' 'SKIP') sha512sums=('ec21d1e4734ad79dc19146223d7016bd53b3a3bb602f3a55489663ccbf3a4c6f5c6417728e798b4448acc2e49ff5145f58c812edfdd6c11ee9060128b6157d3a' @@ -21,6 +22,7 @@ sha512sums=('ec21d1e4734ad79dc19146223d7016bd53b3a3bb602f3a55489663ccbf3a4c6f5c6 prepare() { cd $pkgname-$pkgver sed -e 's|Magick-config|GraphicsMagick-config|g' -i configure.in + patch -p1 -i $srcdir/psiconv-fix-build.patch autoreconf -vi patch -p1 <"$srcdir"/psiconv.patch diff --git a/psiconv/psiconv-fix-build.patch b/psiconv/psiconv-fix-build.patch new file mode 100644 index 0000000000..c5a5e89f99 --- /dev/null +++ b/psiconv/psiconv-fix-build.patch @@ -0,0 +1,74 @@ +Index: psiconv-0.9.9/program/psiconv/gen_html4.c +=================================================================== +--- psiconv-0.9.9.orig/program/psiconv/gen_html4.c ++++ psiconv-0.9.9/program/psiconv/gen_html4.c +@@ -32,6 +32,8 @@ + + #define TEMPSTR_LEN 100 + ++static psiconv_list fileformat_list; /* of struct psiconv_fileformat */ ++ + static void text(const psiconv_config config,psiconv_list list, + psiconv_string_t data,const encoding enc); + static void header(const psiconv_config config, psiconv_list list, +Index: psiconv-0.9.9/program/psiconv/gen_image.c +=================================================================== +--- psiconv-0.9.9.orig/program/psiconv/gen_image.c ++++ psiconv-0.9.9/program/psiconv/gen_image.c +@@ -33,6 +33,7 @@ + #endif + + #ifdef IMAGEMAGICK ++static psiconv_list fileformat_list; /* of struct psiconv_fileformat */ + static Image *get_paint_data_section(psiconv_paint_data_section sec); + static void image_to_list(psiconv_list list,Image *image,const char *dest); + static void gen_image_list(const psiconv_config config,psiconv_list list, +Index: psiconv-0.9.9/program/psiconv/gen_txt.c +=================================================================== +--- psiconv-0.9.9.orig/program/psiconv/gen_txt.c ++++ psiconv-0.9.9/program/psiconv/gen_txt.c +@@ -31,6 +31,7 @@ + #ifdef DMALLOC + #include "dmalloc.h" + #endif ++static psiconv_list fileformat_list; /* of struct psiconv_fileformat */ + + static void output_para(const psiconv_config config,psiconv_list list, + const psiconv_paragraph para,encoding encoding_type); +Index: psiconv-0.9.9/program/psiconv/gen_xhtml.c +=================================================================== +--- psiconv-0.9.9.orig/program/psiconv/gen_xhtml.c ++++ psiconv-0.9.9/program/psiconv/gen_xhtml.c +@@ -33,6 +33,8 @@ + + #define TEMPSTR_LEN 100 + ++static psiconv_list fileformat_list; /* of struct psiconv_fileformat */ ++ + static void text(const psiconv_config config,psiconv_list list, + psiconv_string_t data,const encoding enc); + static void color(const psiconv_config config, psiconv_list list, +Index: psiconv-0.9.9/program/psiconv/psiconv.c +=================================================================== +--- psiconv-0.9.9.orig/program/psiconv/psiconv.c ++++ psiconv-0.9.9/program/psiconv/psiconv.c +@@ -47,6 +47,7 @@ + static void print_help(void); + static void print_version(void); + static void strtoupper(char *str); ++static psiconv_list fileformat_list; /* of struct psiconv_fileformat */ + + void print_help(void) + { +Index: psiconv-0.9.9/program/psiconv/psiconv.h +=================================================================== +--- psiconv-0.9.9.orig/program/psiconv/psiconv.h ++++ psiconv-0.9.9/program/psiconv/psiconv.h +@@ -52,7 +52,5 @@ typedef struct fileformat_s { + output_function *output; + } *fileformat; + +-psiconv_list fileformat_list; /* of struct psiconv_fileformat */ +- + + #endif /* PSICONV_H */ diff --git a/pueue/PKGBUILD b/pueue/PKGBUILD index f4dba74703..aa2a47105c 100644 --- a/pueue/PKGBUILD +++ b/pueue/PKGBUILD @@ -15,7 +15,7 @@ sha256sums=('ad7b760d4bed5a946acbdb6e3985d94d03944e3c0eb2221aea65da0aa001c636') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" mkdir -p utils/completions/ } diff --git a/pyalpm/PKGBUILD b/pyalpm/PKGBUILD index 1c3367e402..b454290579 100644 --- a/pyalpm/PKGBUILD +++ b/pyalpm/PKGBUILD @@ -24,7 +24,7 @@ build() { check() { cd "${pkgname}" - PYTHONPATH="$PWD/build/lib.linux-$CARCH-cpython-311" pytest + PYTHONPATH="$PWD/build/lib.linux-`uname -m`-cpython-311" pytest } package() { diff --git a/pycups/PKGBUILD b/pycups/PKGBUILD index bbaae2f713..7ec3e96a00 100644 --- a/pycups/PKGBUILD +++ b/pycups/PKGBUILD @@ -11,7 +11,7 @@ license=('GPL') pkgdesc="Python bindings for libcups" depends=('libcups' 'python') provides=('pycups') -source=(https://github.com/OpenPrinting/pycups/archive/v${pkgver}.tar.gz) +source=(https://github.com/OpenPrinting/pycups/archive/refs/tags/v${pkgver}.tar.gz) sha256sums=('cf7e63b07e2dbc6811e77f55cc11d7191799298a6565b83fc028ee3c9da0ad78') build() { diff --git a/pyflow/PKGBUILD b/pyflow/PKGBUILD index 73f2fb1f23..d55a75ba8c 100644 --- a/pyflow/PKGBUILD +++ b/pyflow/PKGBUILD @@ -15,7 +15,7 @@ b2sums=('218b27e1a375bf5f7e486b4ab5cb0b1b8a4fc0ce12a763e504abf12ebc7c4e28064960b prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/pygobject/PKGBUILD b/pygobject/PKGBUILD index 2f51f4d71b..eaa26e1b11 100644 --- a/pygobject/PKGBUILD +++ b/pygobject/PKGBUILD @@ -24,7 +24,7 @@ makedepends=( python-sphinx python-sphinx_rtd_theme ) -checkdepends=( +makedepends+=( gtk3 python-pytest xorg-server-xvfb diff --git a/pyqt5/PKGBUILD b/pyqt5/PKGBUILD index c8933d1076..e710e224e1 100644 --- a/pyqt5/PKGBUILD +++ b/pyqt5/PKGBUILD @@ -9,7 +9,7 @@ pkgbase=pyqt5 pkgname=('python-pyqt5') pkgdesc="A set of Python bindings for the Qt5 toolkit" pkgver=5.15.10 -pkgrel=1 +pkgrel=2 arch=('loong64' 'x86_64') url="https://riverbankcomputing.com/software/pyqt/intro" license=('GPL') diff --git a/pyqt6-3d/PKGBUILD b/pyqt6-3d/PKGBUILD index f059241d3d..4eba9af48b 100644 --- a/pyqt6-3d/PKGBUILD +++ b/pyqt6-3d/PKGBUILD @@ -3,7 +3,7 @@ pkgbase=pyqt6-3d pkgname=(python-pyqt6-3d) pkgver=6.6.0 -pkgrel=1 +pkgrel=2 pkgdesc='Python bindings for Qt3D' arch=(loong64 x86_64) url='https://www.riverbankcomputing.com/software/pyqt3d/intro' diff --git a/pyqt6-charts/PKGBUILD b/pyqt6-charts/PKGBUILD index a1e6952a96..0e0d7171e0 100644 --- a/pyqt6-charts/PKGBUILD +++ b/pyqt6-charts/PKGBUILD @@ -3,7 +3,7 @@ pkgbase=pyqt6-charts pkgname=(python-pyqt6-charts) pkgver=6.6.0 -pkgrel=1 +pkgrel=2 pkgdesc='Python bindings for QtChart' arch=(loong64 x86_64) url='https://www.riverbankcomputing.com/software/pyqtchart/intro' diff --git a/pyqt6-datavisualization/PKGBUILD b/pyqt6-datavisualization/PKGBUILD index b65754d95d..ce022b861a 100644 --- a/pyqt6-datavisualization/PKGBUILD +++ b/pyqt6-datavisualization/PKGBUILD @@ -3,7 +3,7 @@ pkgbase=pyqt6-datavisualization pkgname=(python-pyqt6-datavisualization) pkgver=6.6.0 -pkgrel=1 +pkgrel=2 pkgdesc='Python bindings for QtDataVisualization' arch=(loong64 x86_64) url='https://www.riverbankcomputing.com/software/pyqtdatavisualization/intro' diff --git a/pyqt6-networkauth/PKGBUILD b/pyqt6-networkauth/PKGBUILD index e6e82e7ef1..c7dd46f49f 100644 --- a/pyqt6-networkauth/PKGBUILD +++ b/pyqt6-networkauth/PKGBUILD @@ -3,7 +3,7 @@ pkgbase=pyqt6-networkauth pkgname=(python-pyqt6-networkauth) pkgver=6.6.0 -pkgrel=1 +pkgrel=2 pkgdesc='Python bindings for QtNetworkAuth' arch=(loong64 x86_64) url='https://www.riverbankcomputing.com/software/pyqtnetworkauth/intro' diff --git a/python-aiohttp/PKGBUILD b/python-aiohttp/PKGBUILD index 6bb4bbc1ef..1d3075be9a 100644 --- a/python-aiohttp/PKGBUILD +++ b/python-aiohttp/PKGBUILD @@ -87,7 +87,7 @@ check() { cd ${pkgname} local _python_version=$(python -c 'import sys; print("".join(map(str, sys.version_info[:2])))') mv tests/autobahn/test_autobahn.py{,.bak} # Docker tests - PYTHONPATH="$PWD/build/lib.linux-$CARCH-cpython-${_python_version}" pytest + PYTHONPATH="$PWD/build/lib.linux-`uname -m`-cpython-${_python_version}" pytest mv tests/autobahn/test_autobahn.py{.bak,} } diff --git a/python-ansiwrap/PKGBUILD b/python-ansiwrap/PKGBUILD index 1be71c2e75..91ae373976 100644 --- a/python-ansiwrap/PKGBUILD +++ b/python-ansiwrap/PKGBUILD @@ -1,4 +1,5 @@ # Maintainer: Felix Yan +export CHECKFUNC=1 pkgname=python-ansiwrap pkgver=0.8.4 diff --git a/python-appdirs/PKGBUILD b/python-appdirs/PKGBUILD index 89cc2b7c3b..bbe9244121 100644 --- a/python-appdirs/PKGBUILD +++ b/python-appdirs/PKGBUILD @@ -17,8 +17,8 @@ makedepends=( 'python-wheel' ) source=($_name-$pkgver.tar.gz::$url/archive/refs/tags/$pkgver.tar.gz) -sha512sums=('4c0e1e8dcd3f91b8b2d215b3f1e2ffaa85137fe054d07d3a2d442b1419e3b44e96fdea1620bd000bd3f4744f71b71f07280094f073df0ff008fac902af614656') -b2sums=('cb9466f4a7f7c1d6f5b6d7ca031820ec4d3450afcaa8ba571e35387c3109ede4e2afbf2c1141a9d01d13798f55524d5efd3fa12546a9378abbda405353938d79') +sha512sums=('8b0cdd9fd471d45b186aa47607691cf378dabd3edc7b7026a57bd6d6f57698e86f440818a5e23ba4288b35d6bb8cb6eb0106eae8aab09d8863ee15025d300883') +b2sums=('f5c91a7ba7b9da75259a25359b5d6d6ae2a563efcb1483a6febb7f1e3c1801a64c05474c8fd76ec6e73c9a5a145e8b460bef4e447c69eaaeb88ae542d153a40b') build() { cd $_name-$pkgver diff --git a/python-apsw/PKGBUILD b/python-apsw/PKGBUILD index 10a23df300..225f77a64d 100644 --- a/python-apsw/PKGBUILD +++ b/python-apsw/PKGBUILD @@ -28,7 +28,7 @@ check() { gcc ${CFLAGS} ${CPPFLAGS} ${LDFLAGS} -fPIC -shared -o testextension.sqlext src/testextension.c # do glob expansion in variable assignment local python_version=$(python -c 'import sys; print("".join(map(str, sys.version_info[:2])))') - PYTHONPATH="$PWD/build/lib.linux-$CARCH-cpython-${python_version}" python -m unittest discover -vs . + PYTHONPATH="$PWD/build/lib.linux-`uname -m`-cpython-${python_version}" python -m unittest discover -vs . } package() { diff --git a/python-binaryornot/PKGBUILD b/python-binaryornot/PKGBUILD index f7e52bc962..4b6ee9f2e9 100644 --- a/python-binaryornot/PKGBUILD +++ b/python-binaryornot/PKGBUILD @@ -24,19 +24,19 @@ source=( $pkgname-0.4.4-hypothesis_tests.patch::https://github.com/audreyr/binaryornot/pull/52.patch $pkgname-0.4.4-set_version.patch::https://github.com/binaryornot/binaryornot/commit/cff1a0a4478c17d4f970d133c06abbf6945b6a5e.patch ) -sha512sums=('31dfb79bb5847e12487d94519a357dece4572f7ed064686d53a49c2de5a51d6441be64523c98cca6221ed89be5bf26e54866dd3b79ac8d89fd5019a5b4d75a45' +sha512sums=('379e71b41824b9389ea02c64223e26c57694d07b749c197e3a6b9f10558c8d502e9b93a13ae6c3a4bca49064ffc650ce822073dc312fb06e50e06b8a3f04f419' '8a0f1066a580f08778434ed7d30c2ebf2764dbfd746b561ffce2fb8dd8d77cafaf4a58b03504cf5b1e4d37e0a6ffe3038dcaa5611cdfd7d42ada86edd1e47f3f' 'a56266b54b5000e4cdaadcca2119f1822ab1de1b45adee1095ab8841dc0289cc853b4e3e2be1079786f18dde84424a78909f33130f3081d3fa5cf352026ce1c0') -b2sums=('cb2099313f602915bacd5b463642f16430fcb0ab62dcaae546cb854780996526fb777c0b730b4b89e664ec7f995ddd2d2f632cfbac2dadca45958cfd1dd7a410' +b2sums=('4a70e22eba51a266987308e8d1b4571ace5760f945920194a1a61f4336f33dcb655c4f0b1f760aa3502834d29a1a64ac62657e5d73eac8f1d7d02edf71db8af9' 'f07730709f11bf0732b60b81603c40bea1fa6da2d8df545b088072e0f710a7e123e438b03d2390122b6000c14343e932b38d4b6f8956ea87b72e29deae1a4715' 'd56fb5a064be3ce810f274bb2e49e9afd921b536b9194da07a87fedd7e403af0edf0e4e05e975efa55b3d7deefd2ad71a2b36a1f1da45c653344f64478b1a613') -prepare() { - # fix tests using python-hypothesis: https://github.com/audreyr/binaryornot/issues/46 - patch -Np1 -d $_name-$pkgver -i ../$pkgname-0.4.4-hypothesis_tests.patch - # fix version as the wrong commit was tagged: https://github.com/binaryornot/binaryornot/issues/210 - patch -Np1 -d $_name-$pkgver -i ../$pkgname-0.4.4-set_version.patch -} +#prepare() { +# # fix tests using python-hypothesis: https://github.com/audreyr/binaryornot/issues/46 +##patch -Np1 -d $_name-$pkgver -i ../$pkgname-0.4.4-hypothesis_tests.patch +# # fix version as the wrong commit was tagged: https://github.com/binaryornot/binaryornot/issues/210 +##patch -Np1 -d $_name-$pkgver -i ../$pkgname-0.4.4-set_version.patch +#} build() { cd $_name-$pkgver diff --git a/python-black/PKGBUILD b/python-black/PKGBUILD index cc0d6fa776..d2487076db 100644 --- a/python-black/PKGBUILD +++ b/python-black/PKGBUILD @@ -1,6 +1,7 @@ # Maintainer: Maxim Baz # Maintainer: Daniel M. Capella # Contributor: James Zhu +export CHECKFUNC=1 pkgname=python-black pkgver=23.12.1 diff --git a/python-cachy/PKGBUILD b/python-cachy/PKGBUILD index b482cc308f..03602725e2 100644 --- a/python-cachy/PKGBUILD +++ b/python-cachy/PKGBUILD @@ -1,5 +1,6 @@ # Maintainer: Caleb Maclennan # Contributor: Eli Schwartz +export CHECKFUNC=1 _pkgname=cachy pkgname=python-cachy diff --git a/python-cryptography/PKGBUILD b/python-cryptography/PKGBUILD index 92f502ce07..dc1abe312a 100644 --- a/python-cryptography/PKGBUILD +++ b/python-cryptography/PKGBUILD @@ -9,8 +9,9 @@ arch=('loong64' 'x86_64') license=('Apache') url="https://pypi.python.org/pypi/cryptography" depends=('python-cffi') +options=(!lto) makedepends=('git' 'python-setuptools-rust' 'llvm' 'clang' 'lld') -checkdepends=('python-pytest' 'python-pytest-subtests' 'python-iso8601' 'python-pretend' +makedepends+=('python-pytest' 'python-pytest-subtests' 'python-iso8601' 'python-pretend' 'python-hypothesis' 'python-pytz' 'python-pytest-benchmark') source=("git+https://github.com/pyca/cryptography.git#commit=$_commit") sha512sums=('SKIP') @@ -18,8 +19,7 @@ sha512sums=('SKIP') build() { cd cryptography echo $RUSTFLAGS - # https://github.com/pyca/cryptography/issues/9023 - CC=clang RUSTFLAGS+="-Clinker-plugin-lto -Clinker=clang -Clink-arg=-fuse-ld=lld" python setup.py build + python setup.py build } check() { diff --git a/python-debugpy/PKGBUILD b/python-debugpy/PKGBUILD index 5f26ad999b..47e3cb4823 100644 --- a/python-debugpy/PKGBUILD +++ b/python-debugpy/PKGBUILD @@ -25,7 +25,7 @@ build() { python setup.py build # Compile attach libraries cd build/lib*/debugpy/_vendored/pydevd/pydevd_attach_to_process - g++ ${CXXFLAGS} -m64 -shared -o attach_linux_amd64.so -fPIC -nostartfiles linux_and_mac/attach.cpp ${LDFLAGS} + g++ ${CXXFLAGS} -shared -o attach_linux_amd64.so -fPIC -nostartfiles linux_and_mac/attach.cpp ${LDFLAGS} } package() { diff --git a/python-et-xmlfile/PKGBUILD b/python-et-xmlfile/PKGBUILD index 73fc437f2c..ed9df5d7e4 100644 --- a/python-et-xmlfile/PKGBUILD +++ b/python-et-xmlfile/PKGBUILD @@ -12,7 +12,7 @@ depends=('python') makedepends=('python-setuptools') checkdepends=('python-pytest-runner' 'python-lxml') source=("https://foss.heptapod.net/openpyxl/et_xmlfile/-/archive/${pkgver}/et_xmlfile-${pkgver}.tar.gz") -sha256sums=('8d6705c2f97b2d6195c95e5f3781a1ed44a59d43cf1263e04034767e5db65131') +sha256sums=('707c2211ba4a041fd866ef6a60966f4ff82c89f4d4dfabf5aea59aaf97f9be4f') prepare() { cd "$srcdir"/et_xmlfile-${pkgver} diff --git a/python-greenlet/PKGBUILD b/python-greenlet/PKGBUILD index 505dbaf61c..f6882ca891 100644 --- a/python-greenlet/PKGBUILD +++ b/python-greenlet/PKGBUILD @@ -13,8 +13,15 @@ depends=('python') makedepends=('python-build' 'python-installer' 'python-setuptools' 'python-wheel') checkdepends=('python-objgraph' 'python-psutil') -source=("https://files.pythonhosted.org/packages/source/g/greenlet/greenlet-${pkgver}.tar.gz") -sha512sums=('67d74352802331642eba0917550a75e9bc2a7d223bc0ce4ee7993d05197b4d0650813439e7c495baf2309303740cd21e60a157e634aafff470332a685603ffec') +source=("https://files.pythonhosted.org/packages/source/g/greenlet/greenlet-${pkgver}.tar.gz" + python-greenlet-la64.patch) +sha512sums=('67d74352802331642eba0917550a75e9bc2a7d223bc0ce4ee7993d05197b4d0650813439e7c495baf2309303740cd21e60a157e634aafff470332a685603ffec' + 'a427f6121802409a0d5d83a44eeafa50e19e5b1a02041220b2ee98f90d262e4acc0affc8d7f9ae5bbec05a738293dbf5c6d37adca0291ab753c0f41f7a7d5c2a') + +prepare() { + cd greenlet-$pkgver + patch -p1 -i $srcdir/python-greenlet-la64.patch +} build() { cd greenlet-$pkgver diff --git a/python-greenlet/python-greenlet-la64.patch b/python-greenlet/python-greenlet-la64.patch new file mode 100644 index 0000000000..39c801106f --- /dev/null +++ b/python-greenlet/python-greenlet-la64.patch @@ -0,0 +1,61 @@ +From c2bd5118ec44752450c63fd8b1a47802f5c0cf0e Mon Sep 17 00:00:00 2001 +From: merore +Date: Mon, 23 Aug 2021 18:00:20 +0000 +Subject: [PATCH] Port to LoongArch64 + +--- + .../platform/switch_loongarch64_linux.h | 31 +++++++++++++++++++ + src/greenlet/slp_platformselect.h | 2 ++ + 2 files changed, 33 insertions(+) + create mode 100644 src/greenlet/platform/switch_loongarch64_linux.h + +diff --git a/src/greenlet/platform/switch_loongarch64_linux.h b/src/greenlet/platform/switch_loongarch64_linux.h +new file mode 100644 +index 00000000..03a5ce9c +--- /dev/null ++++ b/src/greenlet/platform/switch_loongarch64_linux.h +@@ -0,0 +1,31 @@ ++#define STACK_REFPLUS 1 ++ ++#ifdef SLP_EVAL ++#define STACK_MAGIC 0 ++ ++#define REGS_TO_SAVE "s0", "s1", "s2", "s3", "s4", "s5", \ ++ "s6", "s7", "s8", "fp", \ ++ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31" ++ ++static int ++slp_switch(void) ++{ ++ register int ret; ++ register long *stackref, stsizediff; ++ __asm__ volatile ("" : : : REGS_TO_SAVE); ++ __asm__ volatile ("move %0, $sp" : "=r" (stackref) : ); ++ { ++ SLP_SAVE_STATE(stackref, stsizediff); ++ __asm__ volatile ( ++ "add.d $sp, $sp, %0\n\t" ++ : /* no outputs */ ++ : "r" (stsizediff) ++ ); ++ SLP_RESTORE_STATE(); ++ } ++ __asm__ volatile ("" : : : REGS_TO_SAVE); ++ __asm__ volatile ("move %0, $zero" : "=r" (ret) : ); ++ return ret; ++} ++ ++#endif +diff --git a/src/greenlet/slp_platformselect.h b/src/greenlet/slp_platformselect.h +index b5e8eb6e..f3be7ad9 100644 +--- a/src/greenlet/slp_platformselect.h ++++ b/src/greenlet/slp_platformselect.h +@@ -34,6 +34,8 @@ + #include "platform/switch_s390_unix.h" /* Linux/S390 */ + #elif defined(__GNUC__) && defined(__s390x__) && defined(__linux__) + #include "platform/switch_s390_unix.h" /* Linux/S390 zSeries (64-bit) */ ++#elif defined(__GNUC__) && defined(__loongarch64) && defined(__linux__) ++#include "platform/switch_loongarch64_linux.h" /* Linux/LoongArch64 */ + #elif defined(__GNUC__) && defined(__arm__) + #ifdef __APPLE__ + #include diff --git a/python-libcst/PKGBUILD b/python-libcst/PKGBUILD index 0fb052f37e..a9b57a177a 100644 --- a/python-libcst/PKGBUILD +++ b/python-libcst/PKGBUILD @@ -1,4 +1,5 @@ # Maintainer: Filipe Laíns (FFY00) +export CHECKFUNC=1 _pkgname=libcst pkgname=python-libcst diff --git a/python-mss/PKGBUILD b/python-mss/PKGBUILD index 96b5b3c573..8cdd5780ff 100644 --- a/python-mss/PKGBUILD +++ b/python-mss/PKGBUILD @@ -1,4 +1,5 @@ # Maintainer: Bruno Pagani +export CHECKFUNC=1 _pkg=mss pkgname=python-${_pkg} diff --git a/python-nodeenv/PKGBUILD b/python-nodeenv/PKGBUILD index b2b10577fe..baa7e390e8 100644 --- a/python-nodeenv/PKGBUILD +++ b/python-nodeenv/PKGBUILD @@ -9,9 +9,16 @@ license=('BSD') arch=('any') depends=('python-setuptools' 'make') optdepends=('nodejs: for --node=system') -checkdepends=('nodejs' 'python-pytest-runner' 'python-coverage') -source=("$pkgname-$pkgver.tar.gz::https://github.com/ekalinin/nodeenv/archive/$pkgver.tar.gz") -sha512sums=('96dce219e00d3837b2b0083af9fe6d94ed4e3cd029e3da564263ad8656dcb9c52440c2df6a6954095e5cacd03e44437f08695603dea82c28122713045183014f') +makedepends+=('nodejs' 'python-pytest-runner' 'python-coverage') +source=("$pkgname-$pkgver.tar.gz::https://github.com/ekalinin/nodeenv/archive/$pkgver.tar.gz" + nodeenv-loong64.patch) +sha512sums=('96dce219e00d3837b2b0083af9fe6d94ed4e3cd029e3da564263ad8656dcb9c52440c2df6a6954095e5cacd03e44437f08695603dea82c28122713045183014f' + 'be071c28ec37ed9063b3d34a0f54bc56635e6a3bbce51ce50c6584e4d2c50f2b5875af734304c7d76bb881d6e0b94cf1f9ab49b24be5f72125a2a6a3000ae14d') + +prepare() { + cd nodeenv-$pkgver + patch -p1 -i $srcdir/nodeenv-loong64.patch +} build() { cd nodeenv-$pkgver diff --git a/python-nodeenv/nodeenv-loong64.patch b/python-nodeenv/nodeenv-loong64.patch new file mode 100644 index 0000000000..2dfb274fe6 --- /dev/null +++ b/python-nodeenv/nodeenv-loong64.patch @@ -0,0 +1,12 @@ +Index: nodeenv-1.7.0/nodeenv.py +=================================================================== +--- nodeenv-1.7.0.orig/nodeenv.py ++++ nodeenv-1.7.0/nodeenv.py +@@ -545,6 +545,7 @@ def get_node_bin_url(version): + 'arm64/v8': 'arm64', + 'armv8': 'arm64', + 'armv8.4': 'arm64', ++ 'loongarch64': 'loong64', + 'ppc64le': 'ppc64le', # Power PC + 's390x': 's390x', # IBM S390x + } diff --git a/python-numpy/PKGBUILD b/python-numpy/PKGBUILD index adff121b42..ce1277ca7a 100755 --- a/python-numpy/PKGBUILD +++ b/python-numpy/PKGBUILD @@ -14,8 +14,15 @@ depends=('cblas' 'lapack' 'python') optdepends=('blas-openblas: faster linear algebra') makedepends=('python-build' 'python-installer' 'meson-python' 'cmake' 'gcc-fortran' 'cython') checkdepends=('python-pytest' 'python-hypothesis') -source=("https://github.com/numpy/numpy/releases/download/v$pkgver/numpy-$pkgver.tar.gz") -sha512sums=('25556b41e2db9cfc52c1dfa61b05e4fc1b7b6df3b169f365375575d1146857fdb5ff91ca1508b968c296b7a06e5c6d95e82c41cdc3561587a46d3aa178f6305d') +source=("https://github.com/numpy/numpy/releases/download/v$pkgver/numpy-$pkgver.tar.gz" + "add-loongarch-support.patch") +sha512sums=('25556b41e2db9cfc52c1dfa61b05e4fc1b7b6df3b169f365375575d1146857fdb5ff91ca1508b968c296b7a06e5c6d95e82c41cdc3561587a46d3aa178f6305d' + '06e4ec4f893e29d78156b4e8acaa5294b8340926b7d67f4b9f5d29113a404cf23b7c048f9d64de8a5907f7181306e50768546e64f1b53d038685d00a58e9c93a') + +prepare() { + cd numpy-$pkgver + patch -p1 -i "$srcdir/add-loongarch-support.patch" +} build() { cd numpy-$pkgver diff --git a/python-numpy/add-loongarch-support.patch b/python-numpy/add-loongarch-support.patch new file mode 100644 index 0000000000..3292dfd819 --- /dev/null +++ b/python-numpy/add-loongarch-support.patch @@ -0,0 +1,33 @@ +diff --git a/numpy/core/include/numpy/npy_cpu.h b/numpy/core/include/numpy/npy_cpu.h +index 509e23a..34ecb20 100644 +--- a/numpy/core/include/numpy/npy_cpu.h ++++ b/numpy/core/include/numpy/npy_cpu.h +@@ -17,6 +17,7 @@ + * NPY_CPU_SH_BE + * NPY_CPU_ARCEL + * NPY_CPU_ARCEB ++ * NPY_CPU_LOONGARCH + * NPY_CPU_RISCV64 + * NPY_CPU_WASM + */ +@@ -95,6 +96,8 @@ + #define NPY_CPU_MIPSEB + #elif defined(__or1k__) + #define NPY_CPU_OR1K ++#elif defined(__loongarch__) ++ #define NPY_CPU_LOONGARCH + #elif defined(__mc68000__) + #define NPY_CPU_M68K + #elif defined(__arc__) && defined(__LITTLE_ENDIAN__) +diff --git a/numpy/core/include/numpy/npy_endian.h b/numpy/core/include/numpy/npy_endian.h +index aa367a0..d5905ae 100644 +--- a/numpy/core/include/numpy/npy_endian.h ++++ b/numpy/core/include/numpy/npy_endian.h +@@ -45,6 +45,7 @@ + || defined(NPY_CPU_ARMEL_AARCH32) \ + || defined(NPY_CPU_ARMEL_AARCH64) \ + || defined(NPY_CPU_SH_LE) \ ++ || defined(NPY_CPU_LOONGARCH) \ + || defined(NPY_CPU_MIPSEL) \ + || defined(NPY_CPU_PPC64LE) \ + || defined(NPY_CPU_ARCEL) \ diff --git a/python-parso/PKGBUILD b/python-parso/PKGBUILD index 9bac50b2e7..a9e7cf9049 100644 --- a/python-parso/PKGBUILD +++ b/python-parso/PKGBUILD @@ -37,7 +37,7 @@ package() { python setup.py install --root="$pkgdir" --optimize=1 --skip-build install -Dm 644 LICENSE.txt -t "$pkgdir/usr/share/licenses/$pkgname" install -Dm 644 CHANGELOG.rst README.rst docs/_build/text/*.txt -t "$pkgdir/usr/share/doc/$pkgname" - install -Dm 644 docs/_build/man/parso.1 "$pkgdir/usr/share/man/man1/$pkgname.1" + #install -Dm 644 docs/_build/man/parso.1 "$pkgdir/usr/share/man/man1/$pkgname.1" } # vim: ts=2 sw=2 et: diff --git a/python-poetry/PKGBUILD b/python-poetry/PKGBUILD index be16c635a1..5eb18a5335 100644 --- a/python-poetry/PKGBUILD +++ b/python-poetry/PKGBUILD @@ -41,7 +41,7 @@ _deps=(build virtualenv) depends=(python "${_deps[@]/#/python-}") -checkdepends=(python-deepdiff # not mentioned but required +makedepends+=(python-deepdiff # not mentioned but required python-psutil # for python-pytest-xdist python-httpretty python-pip # not mentioned but required diff --git a/python-pyelftools/PKGBUILD b/python-pyelftools/PKGBUILD index 6e30d8f311..8bcd18f51c 100644 --- a/python-pyelftools/PKGBUILD +++ b/python-pyelftools/PKGBUILD @@ -12,10 +12,17 @@ license=('custom:Public Domain') depends=('python') makedepends=('python-build' 'python-installer' 'python-wheel' 'python-setuptools') options=('!strip') -source=(https://github.com/eliben/${_pkgname}/archive/v${pkgver}/${_pkgname}-${pkgver}.tar.gz) -sha512sums=('5bd4c797f90307e351d541b8de8f76124c66e497b68b811f7012e1271c902beb6ab530a424b338777d12277d44f9b5f89f049e05d9fc2ec36a90b6fa16f1c1a4') -b2sums=('b7974bc1a51ff5ba6ced17aac44e3911ff8d892564a3bef07ae6a5fc261eb7d2eb02170678c4d0bafedaa9e393fa4ca6d2059c420436ea73aa4f6aa7b49a894e') +source=(https://github.com/eliben/${_pkgname}/archive/v${pkgver}/${_pkgname}-${pkgver}.tar.gz +pyelftools-0.29.patch) +sha512sums=('5bd4c797f90307e351d541b8de8f76124c66e497b68b811f7012e1271c902beb6ab530a424b338777d12277d44f9b5f89f049e05d9fc2ec36a90b6fa16f1c1a4' + 'a66285a59cefd86f168a277bfe1dcd648b6bd8ff054183ff1a4768818757e17c7a6412f0d1c262836e2f53ec49f2c8975b46c18ba240a2b5923f6ff88c2ee325') +b2sums=('b7974bc1a51ff5ba6ced17aac44e3911ff8d892564a3bef07ae6a5fc261eb7d2eb02170678c4d0bafedaa9e393fa4ca6d2059c420436ea73aa4f6aa7b49a894e' + '2785903c2265766f51a1b213132a95e27256248f662d98478c5220b5f623c2040123fe850fa6affe316a45cafac01606518f2594d93444acc9a1f4b77541b639') +prepare() { + cd ${_pkgname}-${pkgver} + patch -p1 -i $srcdir/pyelftools-0.29.patch +} build() { cd ${_pkgname}-${pkgver} diff --git a/python-pyelftools/pyelftools-0.29.patch b/python-pyelftools/pyelftools-0.29.patch new file mode 100644 index 0000000000..f8b14c49e2 --- /dev/null +++ b/python-pyelftools/pyelftools-0.29.patch @@ -0,0 +1,452 @@ +diff --git a/elftools/elf/constants.py b/elftools/elf/constants.py +index fc55aac..567f1e3 100644 +--- a/elftools/elf/constants.py ++++ b/elftools/elf/constants.py +@@ -51,6 +51,31 @@ class E_FLAGS(object): + EF_MIPS_ARCH_32R2=0x70000000 + EF_MIPS_ARCH_64R2=0x80000000 + ++ EF_RISCV_RVC=0x00000001 ++ EF_RISCV_FLOAT_ABI=0x00000006 ++ EF_RISCV_FLOAT_ABI_SOFT=0x00000000 ++ EF_RISCV_FLOAT_ABI_SINGLE=0x00000002 ++ EF_RISCV_FLOAT_ABI_DOUBLE=0x00000004 ++ EF_RISCV_FLOAT_ABI_QUAD=0x00000006 ++ EF_RISCV_RVE=0x00000008 ++ EF_RISCV_TSO=0x00000010 ++ ++ EF_LOONGARCH_OBJABI_MASK=0x000000C0 ++ EF_LOONGARCH_OBJABI_V0=0x00000000 ++ EF_LOONGARCH_OBJABI_V1=0x00000040 ++ EF_LOONGARCH_ABI_MODIFIER_MASK=0x00000007 ++ EF_LOONGARCH_ABI_SOFT_FLOAT=0x00000001 ++ EF_LOONGARCH_ABI_SINGLE_FLOAT=0x00000002 ++ EF_LOONGARCH_ABI_DOUBLE_FLOAT=0x00000003 ++ # The names in the glibc elf.h say "LARCH" instead of "LOONGARCH", ++ # provide these names for users' convenience. ++ EF_LARCH_OBJABI_MASK = EF_LOONGARCH_OBJABI_MASK ++ EF_LARCH_OBJABI_V0 = EF_LOONGARCH_OBJABI_V0 ++ EF_LARCH_OBJABI_V1 = EF_LOONGARCH_OBJABI_V1 ++ EF_LARCH_ABI_MODIFIER_MASK = EF_LOONGARCH_ABI_MODIFIER_MASK ++ EF_LARCH_ABI_SOFT_FLOAT = EF_LOONGARCH_ABI_SOFT_FLOAT ++ EF_LARCH_ABI_SINGLE_FLOAT = EF_LOONGARCH_ABI_SINGLE_FLOAT ++ EF_LARCH_ABI_DOUBLE_FLOAT = EF_LOONGARCH_ABI_DOUBLE_FLOAT + + class E_FLAGS_MASKS(object): + """Masks to be used for convenience when working with E_FLAGS +diff --git a/elftools/elf/descriptions.py b/elftools/elf/descriptions.py +index 38c80b6..b6a615e 100644 +--- a/elftools/elf/descriptions.py ++++ b/elftools/elf/descriptions.py +@@ -10,7 +10,8 @@ from .enums import ( + ENUM_D_TAG, ENUM_E_VERSION, ENUM_P_TYPE_BASE, ENUM_SH_TYPE_BASE, + ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64, + ENUM_RELOC_TYPE_ARM, ENUM_RELOC_TYPE_AARCH64, ENUM_RELOC_TYPE_PPC64, +- ENUM_RELOC_TYPE_MIPS, ENUM_ATTR_TAG_ARM, ENUM_DT_FLAGS, ENUM_DT_FLAGS_1) ++ ENUM_RELOC_TYPE_MIPS, ENUM_ATTR_TAG_ARM, ENUM_RELOC_TYPE_LOONGARCH, ++ ENUM_DT_FLAGS, ENUM_DT_FLAGS_1) + from .constants import ( + P_FLAGS, RH_FLAGS, SH_FLAGS, SUNW_SYMINFO_FLAGS, VER_FLAGS) + from ..common.py3compat import bytes2hex, iteritems +@@ -151,6 +152,8 @@ def describe_reloc_type(x, elffile): + return _DESCR_RELOC_TYPE_PPC64.get(x, _unknown) + elif arch == 'MIPS': + return _DESCR_RELOC_TYPE_MIPS.get(x, _unknown) ++ elif arch == 'LoongArch': ++ return _DESCR_RELOC_TYPE_LOONGARCH.get(x, _unknown) + else: + return 'unrecognized: %-7x' % (x & 0xFFFFFFFF) + +@@ -389,6 +392,7 @@ _DESCR_E_MACHINE = dict( + EM_BLACKFIN='Analog Devices Blackfin', + EM_PPC='PowerPC', + EM_PPC64='PowerPC64', ++ EM_LOONGARCH='LoongArch', + RESERVED='RESERVED', + ) + +@@ -672,6 +676,7 @@ _DESCR_RELOC_TYPE_ARM = _reverse_dict(ENUM_RELOC_TYPE_ARM) + _DESCR_RELOC_TYPE_AARCH64 = _reverse_dict(ENUM_RELOC_TYPE_AARCH64) + _DESCR_RELOC_TYPE_PPC64 = _reverse_dict(ENUM_RELOC_TYPE_PPC64) + _DESCR_RELOC_TYPE_MIPS = _reverse_dict(ENUM_RELOC_TYPE_MIPS) ++_DESCR_RELOC_TYPE_LOONGARCH = _reverse_dict(ENUM_RELOC_TYPE_LOONGARCH) + + _low_priority_D_TAG = ( + # these are 'meta-tags' marking semantics of numeric ranges of the enum +diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py +index d228db7..90418a0 100644 +--- a/elftools/elf/elffile.py ++++ b/elftools/elf/elffile.py +@@ -533,6 +533,7 @@ class ELFFile(object): + 'EM_RISCV' : 'RISC-V', + 'EM_BPF' : 'Linux BPF - in-kernel virtual machine', + 'EM_CSKY' : 'C-SKY', ++ 'EM_LOONGARCH' : 'LoongArch', + 'EM_FRV' : 'Fujitsu FR-V' + } + +diff --git a/elftools/elf/enums.py b/elftools/elf/enums.py +index 745aefc..5116523 100644 +--- a/elftools/elf/enums.py ++++ b/elftools/elf/enums.py +@@ -257,6 +257,7 @@ ENUM_E_MACHINE = dict( + EM_RISCV = 243, # RISC-V + EM_BPF = 247, # Linux BPF - in-kernel virtual machine + EM_CSKY = 252, # C-SKY ++ EM_LOONGARCH = 258, # LoongArch + EM_FRV = 0x5441, # Fujitsu FR-V + # Reservations + # reserved 11-14 Reserved for future use +@@ -812,6 +813,119 @@ ENUM_RELOC_TYPE_x64 = dict( + _default_=Pass, + ) + ++ENUM_RELOC_TYPE_BPF = dict( ++ R_BPF_NONE=0, ++ R_BPF_64_64=1, ++ R_BPF_64_ABS64=2, ++ R_BPF_64_ABS32=3, ++ R_BPF_64_NODYLD32=4, ++ R_BPF_64_32=10, ++ _default_=Pass, ++) ++ ++# https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc ++ENUM_RELOC_TYPE_LOONGARCH = dict( ++ R_LARCH_NONE=0, ++ R_LARCH_32=1, ++ R_LARCH_64=2, ++ R_LARCH_RELATIVE=3, ++ R_LARCH_COPY=4, ++ R_LARCH_JUMP_SLOT=5, ++ R_LARCH_TLS_DTPMOD32=6, ++ R_LARCH_TLS_DTPMOD64=7, ++ R_LARCH_TLS_DTPREL32=8, ++ R_LARCH_TLS_DTPREL64=9, ++ R_LARCH_TLS_TPREL32=10, ++ R_LARCH_TLS_TPREL64=11, ++ R_LARCH_IRELATIVE=12, ++ R_LARCH_MARK_LA=20, ++ R_LARCH_MARK_PCREL=21, ++ R_LARCH_SOP_PUSH_PCREL=22, ++ R_LARCH_SOP_PUSH_ABSOLUTE=23, ++ R_LARCH_SOP_PUSH_DUP=24, ++ R_LARCH_SOP_PUSH_GPREL=25, ++ R_LARCH_SOP_PUSH_TLS_TPREL=26, ++ R_LARCH_SOP_PUSH_TLS_GOT=27, ++ R_LARCH_SOP_PUSH_TLS_GD=28, ++ R_LARCH_SOP_PUSH_PLT_PCREL=29, ++ R_LARCH_SOP_ASSERT=30, ++ R_LARCH_SOP_NOT=31, ++ R_LARCH_SOP_SUB=32, ++ R_LARCH_SOP_SL=33, ++ R_LARCH_SOP_SR=34, ++ R_LARCH_SOP_ADD=35, ++ R_LARCH_SOP_AND=36, ++ R_LARCH_SOP_IF_ELSE=37, ++ R_LARCH_SOP_POP_32_S_10_5=38, ++ R_LARCH_SOP_POP_32_U_10_12=39, ++ R_LARCH_SOP_POP_32_S_10_12=40, ++ R_LARCH_SOP_POP_32_S_10_16=41, ++ R_LARCH_SOP_POP_32_S_10_16_S2=42, ++ R_LARCH_SOP_POP_32_S_5_20=43, ++ R_LARCH_SOP_POP_32_S_0_5_10_16_S2=44, ++ R_LARCH_SOP_POP_32_S_0_10_10_16_S2=45, ++ R_LARCH_SOP_POP_32_U=46, ++ R_LARCH_ADD8=47, ++ R_LARCH_ADD16=48, ++ R_LARCH_ADD24=49, ++ R_LARCH_ADD32=50, ++ R_LARCH_ADD64=51, ++ R_LARCH_SUB8=52, ++ R_LARCH_SUB16=53, ++ R_LARCH_SUB24=54, ++ R_LARCH_SUB32=55, ++ R_LARCH_SUB64=56, ++ R_LARCH_GNU_VTINHERIT=57, ++ R_LARCH_GNU_VTENTRY=58, ++ R_LARCH_B16=64, ++ R_LARCH_B21=65, ++ R_LARCH_B26=66, ++ R_LARCH_ABS_HI20=67, ++ R_LARCH_ABS_LO12=68, ++ R_LARCH_ABS64_LO20=69, ++ R_LARCH_ABS64_HI12=70, ++ R_LARCH_PCALA_HI20=71, ++ R_LARCH_PCALA_LO12=72, ++ R_LARCH_PCALA64_LO20=73, ++ R_LARCH_PCALA64_HI12=74, ++ R_LARCH_GOT_PC_HI20=75, ++ R_LARCH_GOT_PC_LO12=76, ++ R_LARCH_GOT64_PC_LO20=77, ++ R_LARCH_GOT64_PC_HI12=78, ++ R_LARCH_GOT_HI20=79, ++ R_LARCH_GOT_LO12=80, ++ R_LARCH_GOT64_LO20=81, ++ R_LARCH_GOT64_HI12=82, ++ R_LARCH_TLS_LE_HI20=83, ++ R_LARCH_TLS_LE_LO12=84, ++ R_LARCH_TLS_LE64_LO20=85, ++ R_LARCH_TLS_LE64_HI12=86, ++ R_LARCH_TLS_IE_PC_HI20=87, ++ R_LARCH_TLS_IE_PC_LO12=88, ++ R_LARCH_TLS_IE64_PC_LO20=89, ++ R_LARCH_TLS_IE64_PC_HI12=90, ++ R_LARCH_TLS_IE_HI20=91, ++ R_LARCH_TLS_IE_LO12=92, ++ R_LARCH_TLS_IE64_LO20=93, ++ R_LARCH_TLS_IE64_HI12=94, ++ R_LARCH_TLS_LD_PC_HI20=95, ++ R_LARCH_TLS_LD_HI20=96, ++ R_LARCH_TLS_GD_PC_HI20=97, ++ R_LARCH_TLS_GD_HI20=98, ++ R_LARCH_32_PCREL=99, ++ R_LARCH_RELAX=100, ++ R_LARCH_DELETE=101, ++ R_LARCH_ALIGN=102, ++ R_LARCH_PCREL20_S2=103, ++ R_LARCH_CFA=104, ++ R_LARCH_ADD6=105, ++ R_LARCH_SUB6=106, ++ R_LARCH_ADD_ULEB128=107, ++ R_LARCH_SUB_ULEB128=108, ++ R_LARCH_64_PCREL=109, ++ _default_=Pass, ++) ++ + # Sunw Syminfo Bound To special values + ENUM_SUNW_SYMINFO_BOUNDTO = dict( + SYMINFO_BT_SELF=0xffff, +diff --git a/elftools/elf/relocation.py b/elftools/elf/relocation.py +index 4008e28..028858e 100644 +--- a/elftools/elf/relocation.py ++++ b/elftools/elf/relocation.py +@@ -14,7 +14,7 @@ from .sections import Section + from .enums import ( + ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64, ENUM_RELOC_TYPE_MIPS, + ENUM_RELOC_TYPE_ARM, ENUM_RELOC_TYPE_AARCH64, ENUM_RELOC_TYPE_PPC64, +- ENUM_D_TAG) ++ ENUM_RELOC_TYPE_BPF, ENUM_RELOC_TYPE_LOONGARCH, ENUM_D_TAG) + from ..construct import Container + + +@@ -253,6 +253,13 @@ class RelocationHandler(object): + recipe = self._RELOCATION_RECIPES_AARCH64.get(reloc_type, None) + elif self.elffile.get_machine_arch() == '64-bit PowerPC': + recipe = self._RELOCATION_RECIPES_PPC64.get(reloc_type, None) ++ elif self.elffile.get_machine_arch() == 'Linux BPF - in-kernel virtual machine': ++ recipe = self._RELOCATION_RECIPES_EBPF.get(reloc_type, None) ++ elif self.elffile.get_machine_arch() == 'LoongArch': ++ if not reloc.is_RELA(): ++ raise ELFRelocationError( ++ 'Unexpected REL relocation for LoongArch: %s' % reloc) ++ recipe = self._RELOCATION_RECIPES_LOONGARCH.get(reloc_type, None) + + if recipe is None: + raise ELFRelocationError( +@@ -267,6 +274,10 @@ class RelocationHandler(object): + value_struct = self.elffile.structs.Elf_word('') + elif recipe.bytesize == 8: + value_struct = self.elffile.structs.Elf_word64('') ++ elif recipe.bytesize == 1: ++ value_struct = self.elffile.structs.Elf_byte('') ++ elif recipe.bytesize == 2: ++ value_struct = self.elffile.structs.Elf_half('') + else: + raise ELFRelocationError('Invalid bytesize %s for relocation' % + recipe.bytesize) +@@ -316,6 +327,9 @@ class RelocationHandler(object): + def _reloc_calc_sym_plus_addend_pcrel(value, sym_value, offset, addend=0): + return sym_value + addend - offset + ++ def _reloc_calc_value_minus_sym_addend(value, sym_value, offset, addend=0): ++ return value - sym_value - addend ++ + def _arm_reloc_calc_sym_plus_value_pcrel(value, sym_value, offset, addend=0): + return sym_value // 4 + value - offset // 4 + +@@ -381,4 +395,46 @@ class RelocationHandler(object): + bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), + } + ++ # https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc ++ _RELOCATION_RECIPES_LOONGARCH = { ++ ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_NONE']: _RELOCATION_RECIPE_TYPE( ++ bytesize=4, has_addend=False, calc_func=_reloc_calc_identity), ++ ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_32']: _RELOCATION_RECIPE_TYPE( ++ bytesize=4, has_addend=True, ++ calc_func=_reloc_calc_sym_plus_addend), ++ ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_64']: _RELOCATION_RECIPE_TYPE( ++ bytesize=8, has_addend=True, ++ calc_func=_reloc_calc_sym_plus_addend), ++ ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD8']: _RELOCATION_RECIPE_TYPE( ++ bytesize=1, has_addend=True, ++ calc_func=_reloc_calc_sym_plus_value), ++ ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB8']: _RELOCATION_RECIPE_TYPE( ++ bytesize=1, has_addend=True, ++ calc_func=_reloc_calc_value_minus_sym_addend), ++ ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD16']: _RELOCATION_RECIPE_TYPE( ++ bytesize=2, has_addend=True, ++ calc_func=_reloc_calc_sym_plus_value), ++ ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB16']: _RELOCATION_RECIPE_TYPE( ++ bytesize=2, has_addend=True, ++ calc_func=_reloc_calc_value_minus_sym_addend), ++ ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD32']: _RELOCATION_RECIPE_TYPE( ++ bytesize=4, has_addend=True, ++ calc_func=_reloc_calc_sym_plus_value), ++ ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB32']: _RELOCATION_RECIPE_TYPE( ++ bytesize=4, has_addend=True, ++ calc_func=_reloc_calc_value_minus_sym_addend), ++ ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD64']: _RELOCATION_RECIPE_TYPE( ++ bytesize=8, has_addend=True, ++ calc_func=_reloc_calc_sym_plus_value), ++ ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB64']: _RELOCATION_RECIPE_TYPE( ++ bytesize=8, has_addend=True, ++ calc_func=_reloc_calc_value_minus_sym_addend), ++ ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_32_PCREL']: _RELOCATION_RECIPE_TYPE( ++ bytesize=4, has_addend=True, ++ calc_func=_reloc_calc_sym_plus_addend_pcrel), ++ ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_64_PCREL']: _RELOCATION_RECIPE_TYPE( ++ bytesize=8, has_addend=True, ++ calc_func=_reloc_calc_sym_plus_addend_pcrel), ++ } ++ + +diff --git a/scripts/dwarfdump.py b/scripts/dwarfdump.py +index ca6bac3..552e530 100644 +--- a/scripts/dwarfdump.py ++++ b/scripts/dwarfdump.py +@@ -342,7 +342,7 @@ class ReadElf(object): + self.elffile = ELFFile(file) + self.output = output + self._dwarfinfo = self.elffile.get_dwarf_info() +- arches = {"EM_386": "i386", "EM_X86_64": "x86-64"} ++ arches = {"EM_386": "i386", "EM_X86_64": "x86-64", "EM_ARM": "littlearm", "EM_AARCH64": "littleaarch64", "EM_LOONGARCH": "loongarch", "EM_RISCV": "littleriscv", "EM_MIPS": "mips"} + arch = arches[self.elffile['e_machine']] + bits = self.elffile.elfclass + self._emitline("%s: file format elf%d-%s" % (filename, bits, arch)) +diff --git a/scripts/readelf.py b/scripts/readelf.py +index 2095c91..e84d89b 100755 +--- a/scripts/readelf.py ++++ b/scripts/readelf.py +@@ -9,6 +9,7 @@ + #------------------------------------------------------------------------------- + import argparse + import os, sys ++import re + import string + import traceback + import itertools +@@ -96,6 +97,13 @@ def _get_cu_base(cu): + else: + raise ValueError("Can't find the base IP (low_pc) for a CU") + ++# Matcher for all control characters, for transforming them into "^X" form when ++# formatting symbol names for display. ++_CONTROL_CHAR_RE = re.compile(r'[\x01-\x1f]') ++ ++def _format_symbol_name(s): ++ return _CONTROL_CHAR_RE.sub(lambda match: '^' + chr(0x40 + ord(match[0])), s) ++ + class ReadElf(object): + """ display_* methods are used to emit output into the output stream + """ +@@ -244,6 +252,18 @@ class ReadElf(object): + if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_64: + description += ", mips64" + ++ elif self.elffile['e_machine'] == "EM_LOONGARCH": ++ if (flags & E_FLAGS.EF_LOONGARCH_ABI_MODIFIER_MASK) == E_FLAGS.EF_LOONGARCH_ABI_SOFT_FLOAT: ++ description += ", SOFT-FLOAT" ++ if (flags & E_FLAGS.EF_LOONGARCH_ABI_MODIFIER_MASK) == E_FLAGS.EF_LOONGARCH_ABI_SINGLE_FLOAT: ++ description += ", SINGLE-FLOAT" ++ if (flags & E_FLAGS.EF_LOONGARCH_ABI_MODIFIER_MASK) == E_FLAGS.EF_LOONGARCH_ABI_DOUBLE_FLOAT: ++ description += ", DOUBLE-FLOAT" ++ if (flags & E_FLAGS.EF_LOONGARCH_OBJABI_MASK) == E_FLAGS.EF_LOONGARCH_OBJABI_V0: ++ description += ", OBJ-v0" ++ if (flags & E_FLAGS.EF_LOONGARCH_OBJABI_MASK) == E_FLAGS.EF_LOONGARCH_OBJABI_V1: ++ description += ", OBJ-v1" ++ + return description + + def display_program_headers(self, show_heading=True): +@@ -468,7 +488,7 @@ class ReadElf(object): + describe_symbol_shndx(self._get_symbol_shndx(symbol, + nsym, + section_index)), +- symbol_name, ++ _format_symbol_name(symbol_name), + version_info)) + + def display_dynamic_tags(self): +@@ -605,7 +625,7 @@ class ReadElf(object): + self._format_hex( + symbol['st_value'], + fullhex=True, lead0x=False), +- symbol_name)) ++ _format_symbol_name(symbol_name))) + if section.is_RELA(): + self._emit(' %s %x' % ( + '+' if rel['r_addend'] >= 0 else '-', +@@ -1403,24 +1423,20 @@ class ReadElf(object): + + # Look at the registers the decoded table describes. + # We build reg_order here to match readelf's order. In particular, +- # registers are sorted by their number, and the register matching +- # ra_regnum is always listed last with a special heading. ++ # registers are sorted by their number, so that the register ++ # matching ra_regnum is usually listed last with a special heading. ++ # (LoongArch is a notable exception in that its return register's ++ # DWARF register number is not greater than other GPRs.) + decoded_table = entry.get_decoded() +- reg_order = sorted(ifilter( +- lambda r: r != ra_regnum, +- decoded_table.reg_order)) ++ reg_order = sorted(decoded_table.reg_order) + if len(decoded_table.reg_order): +- + # Headings for the registers + for regnum in reg_order: ++ if regnum == ra_regnum: ++ self._emit('ra ') ++ continue + self._emit('%-6s' % describe_reg_name(regnum)) +- self._emitline('ra ') +- +- # Now include ra_regnum in reg_order to print its values +- # similarly to the other registers. +- reg_order.append(ra_regnum) +- else: +- self._emitline() ++ self._emitline() + + for line in decoded_table.table: + self._emit(self._format_hex( +diff --git a/test/testfiles_for_readelf/loongarch-relocs.c b/test/testfiles_for_readelf/loongarch-relocs.c +new file mode 100644 +index 0000000..ad347d8 +--- /dev/null ++++ b/test/testfiles_for_readelf/loongarch-relocs.c +@@ -0,0 +1,18 @@ ++/* This source was compiled for LoongArch64. ++ loongarch64-unknown-linux-gnu-gcc -c -o loongarch64-relocs.o.elf loongarch-relocs.c -g ++ Upstream support for LoongArch32 is not yet mature, so it is not covered. ++*/ ++ ++extern struct { ++ int i, j; ++} data; ++ ++extern int bar (void); ++ ++int ++foo (int a) ++{ ++ data.i += a; ++ data.j -= bar(); ++ return 0; ++} +diff --git a/test/testfiles_for_readelf/loongarch64-relocs.o.elf b/test/testfiles_for_readelf/loongarch64-relocs.o.elf +new file mode 100644 +index 0000000..12fafa2 +Binary files /dev/null and b/test/testfiles_for_readelf/loongarch64-relocs.o.elf differ diff --git a/python-pylint/PKGBUILD b/python-pylint/PKGBUILD index a7cde4c713..1f56eacd16 100644 --- a/python-pylint/PKGBUILD +++ b/python-pylint/PKGBUILD @@ -4,6 +4,7 @@ # Contributor: Felix Yan # Contributor: Stéphane Gaudreault # Contributor: Alexander Fehr +export CHECKFUNC=1 _pyname=pylint pkgname=python-$_pyname diff --git a/python-pyopenssl/PKGBUILD b/python-pyopenssl/PKGBUILD index d2e718b7b4..2d2a5f24b4 100644 --- a/python-pyopenssl/PKGBUILD +++ b/python-pyopenssl/PKGBUILD @@ -18,7 +18,7 @@ makedepends=( 'python-setuptools' 'python-wheel' ) -checkdepends=('python-pytest' 'python-pretend' 'python-flaky') +makedepends+=('python-pytest' 'python-pretend' 'python-flaky') _commit='7f3e4f94701a5e19ec66e3601119dd6d62043cec' source=("$pkgname::git+https://github.com/pyca/pyopenssl#commit=$_commit") b2sums=('SKIP') diff --git a/python-pypandoc/PKGBUILD b/python-pypandoc/PKGBUILD index d59ec0de7a..d4963748f1 100644 --- a/python-pypandoc/PKGBUILD +++ b/python-pypandoc/PKGBUILD @@ -8,7 +8,7 @@ pkgdesc="Thin wrapper for pandoc" arch=('any') license=('MIT') url="https://github.com/JessicaTegner/pypandoc" -depends=('pandoc') +#depends=('pandoc') makedepends=('python-build' 'python-installer' 'python-poetry-core' 'python-wheel') checkdepends=('texlive-basic' 'texlive-latexextra' 'texlive-fontsrecommended' 'python-pandocfilters') source=("https://github.com/JessicaTegner/pypandoc/archive/v$pkgver/$pkgname-$pkgver.tar.gz") diff --git a/python-rpds-py/PKGBUILD b/python-rpds-py/PKGBUILD index ebcdf14655..eb53873db3 100644 --- a/python-rpds-py/PKGBUILD +++ b/python-rpds-py/PKGBUILD @@ -32,7 +32,7 @@ prepare() { cd "$pkgname" # download dependencies - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" +#cargo fetch --locked --target "$CARCH-unknown-linux-gnu" } build() { diff --git a/python-simple-term-menu/PKGBUILD b/python-simple-term-menu/PKGBUILD index 1b7a86f9f2..3c47e45fba 100644 --- a/python-simple-term-menu/PKGBUILD +++ b/python-simple-term-menu/PKGBUILD @@ -22,13 +22,21 @@ makedepends=( checkdepends=() optdepends=() source=("https://pypi.python.org/packages/source/s/simple-term-menu/simple-term-menu-${pkgver}.tar.gz") +source+=(cjk-preview.patch) sha512sums=('7787c557467c0e8a44fdf61cc094de1e1171c2f9fc82f0607128ca9ad14ce95037b2f4d1cd7e258480415f2704256af36421d389c5c0ca5280d01d5cc675f05a') b2sums=('6d4e498ee342fe4634944f0281b45ef7af7914eb8b3ceab7e7291b451e26757100c0002f9959150445eb752ca8b5e44ee00436d75ea326bafa2767894dbf7ead') +sha512sums+=('0d7d6366a01a4264f0d4061835998beb9d4fd1cd5bb0ca7c59230c3876def7192210984d208583df5b1ad28df22c27836066886c8926e6e28bb98f082127b4fb') +b2sums+=('027d66daec14cb96081cfa840e5da9d8c4394f0767f9e759fe6596d024aa0e61c234a8e467d6d8b27e66fa54a8c7d7237ea19adeb3f5ce7ca0f83816d78cd463') pkgver() { echo $pkgver } +prepare() { + cd "simple-term-menu-$pkgver" + patch -p1 -i $srcdir/cjk-preview.patch +} + build() { cd "simple-term-menu-$pkgver" diff --git a/python-simple-term-menu/cjk-preview.patch b/python-simple-term-menu/cjk-preview.patch new file mode 100644 index 0000000000..fc9f9ed446 --- /dev/null +++ b/python-simple-term-menu/cjk-preview.patch @@ -0,0 +1,11 @@ +--- a/simple_term_menu.py 2023-07-19 22:41:08.000000000 +0800 ++++ b/tmp/simple_term_menu.py 2023-08-15 14:44:22.410664526 +0800 +@@ -1270,7 +1270,7 @@ + BoxDrawingCharacters.upper_left + + (2 * BoxDrawingCharacters.horizontal + " " + self._preview_title)[: num_cols - 3] + + " " +- + (num_cols - len(self._preview_title) - 6) * BoxDrawingCharacters.horizontal ++ + (num_cols - wcswidth(self._preview_title) - 6) * BoxDrawingCharacters.horizontal + + BoxDrawingCharacters.upper_right + )[:num_cols] + + "\n" diff --git a/python-stone/PKGBUILD b/python-stone/PKGBUILD index cffd33923e..294edab362 100644 --- a/python-stone/PKGBUILD +++ b/python-stone/PKGBUILD @@ -1,4 +1,5 @@ # Maintainer: +export CHECKFUNC=1 pkgname=python-stone _name=${pkgname#python-} diff --git a/python-urllib3/PKGBUILD b/python-urllib3/PKGBUILD index dd227edefe..d8714ee314 100644 --- a/python-urllib3/PKGBUILD +++ b/python-urllib3/PKGBUILD @@ -20,7 +20,7 @@ makedepends=( 'python-sphinx-furo' 'python-wheel' ) -checkdepends=( +makedepends+=( 'python-brotli' 'python-certifi' 'python-cryptography' diff --git a/python-virtualenv/PKGBUILD b/python-virtualenv/PKGBUILD index c4980e25f0..b22484ca8e 100644 --- a/python-virtualenv/PKGBUILD +++ b/python-virtualenv/PKGBUILD @@ -47,11 +47,11 @@ _commit='1941c1d5abf81814992b68bbc86c0020dc75a3ad' source=("$pkgname::git+https://github.com/pypa/virtualenv#commit=$_commit") b2sums=('SKIP') -pkgver() { - cd "$pkgname" - - git describe --tags | sed 's/^v//' -} +#pkgver() { +# cd "$pkgname" +# +# git describe --tags | sed 's/^v//' +#} build() { cd "$pkgname" diff --git a/python-wstools/PKGBUILD b/python-wstools/PKGBUILD index f3dc7115ad..4c00e593ce 100644 --- a/python-wstools/PKGBUILD +++ b/python-wstools/PKGBUILD @@ -8,7 +8,7 @@ arch=('any') url="https://github.com/pycontribs/wstools" license=('custom') depends=('python-six') -makedepends=('python-pbr' 'python-setuptools') +makedepends=('python-pbr' 'python-setuptools' 'python-pip') checkdepends=('python-pytest-runner' 'autopep8' 'python-pytest-cov') source=("https://github.com/pycontribs/wstools/archive/$pkgver/$pkgname-$pkgver.tar.gz" python310.patch) diff --git a/qd/PKGBUILD b/qd/PKGBUILD index cecefbd850..e03c7ce5e7 100644 --- a/qd/PKGBUILD +++ b/qd/PKGBUILD @@ -13,7 +13,7 @@ depends=(gcc-libs glibc) makedepends=(gcc-fortran) source=(https://crd-legacy.lbl.gov/~dhbailey/mpdist/$pkgname-$pkgver.tar.gz) -sha256sums=('ad6738e8330928308e10346ff7fd357ed17386408f8fb7a23704cd6f5d52a6c8') +sha256sums=('a47b6c73f86e6421e86a883568dd08e299b20e36c11a99bdfbe50e01bde60e38') build() { cd $pkgname-$pkgver diff --git a/qemu/PKGBUILD b/qemu/PKGBUILD index 60f53217db..60445c5e41 100644 --- a/qemu/PKGBUILD +++ b/qemu/PKGBUILD @@ -127,6 +127,8 @@ source=( $pkgbase-8.1.1-static_regression.patch $pkgbase-8.2.0-virtio-gpu_redrawing.patch::https://gitlab.com/qemu-project/qemu/-/commit/9d5b42beb6978dc6219d5dc029c9d453c6b8d503.patch $pkgbase-8.2.0-fix_tcg.patch + qemu-kvm-la64.patch + qemu-4k-pagesize.patch ) sha512sums=('92ec41196ff145cdbb98948f6b6e43214fa4b4419554a8a1927fb4527080c8212ccb703e184baf8ee0bdfa50ad7a84689e8f5a69eba1bd7bbbdfd69e3b91256c' 'SKIP' @@ -137,7 +139,9 @@ sha512sums=('92ec41196ff145cdbb98948f6b6e43214fa4b4419554a8a1927fb4527080c8212cc '93b905046fcea8a0a89513b9259c222494ab3b91319dde23baebcb40dc17376a56661b159b99785d6e816831974a0f3cbd7b2f7d89e5fc3c258f88f4492f3839' 'c7d086a951e9a378434ea95a843a4b01f0eb2ae430135a81365147cf6806a7ba1b49014a3aa66904970853ba84a4a28dbaded7bccb99a0bc3730572c80fb8b12' '3f052f87406c47849def7e21900cd9773ed061658e6f568b5918157650e0803c6b9c3bbfec69b577202cc2ce224d1d0339b615e419112f2ac351e44cd9f33539' - 'eb6a9c9ba7143b1ff937aeff06b07d6b4b718d92e6623743ecf7f1e50f01d259c2d8f7543f526948a14a865d4478b6dd3dfac56e17f5b27d78fda3380767615e') + 'eb6a9c9ba7143b1ff937aeff06b07d6b4b718d92e6623743ecf7f1e50f01d259c2d8f7543f526948a14a865d4478b6dd3dfac56e17f5b27d78fda3380767615e' + '45ec57f314f12ea1dd2e0989ff78b98452446ff040d3806a09df40ab21a4dcc0515a5e28b2294941e802b2bfd57b1c6408db6179a01d1b0331a54b1d7d459872' + 'e0b1b41c643e437548b2dadcd37697143e1424b0f5912347dd6cb3fbcb11a65c6f69a5206d9df600728ebf9bf3c12ec7aaa4280459a252f4488ada42c49336a7') b2sums=('a63667042e1e19c635568072d8dcc117320117e81e374a93cfb79e2363ebf505df3217fb098638e53c899eb6f83435221e8031f2aae003c27ec25af8654683b3' 'SKIP' 'b1eca364aa60f130ff5e649f5d004d3fcb75356d3421a4542efdfc410d39b40d9434d15e1dd7bbdbd315cb72b5290d3ea5f77f9c41961a5601cd28ef7bbe72e8' @@ -147,7 +151,9 @@ b2sums=('a63667042e1e19c635568072d8dcc117320117e81e374a93cfb79e2363ebf505df3217f 'a9a2bdfeeb44eb86cbe88ac7c65f72800bdb2fd5cecb02f3a258cf9470b52832180aab43c89d481f7fd4d067342a9a27dd6c8a94d625b95d6e2b912e47d274e7' '209ec05e161d157aaa08a9fcbea45cf87aa22fe9360f9b3c477a78a274e4ecee989c16121f9e6b7765bb479c9db718c98db047c27fd426c127c4c95e28877a16' '0d5ea661bfc2afe0bb68dce7504f872a2d30a2f46e2463bc7bfb4a0d63f01b3090c42780a221cca00a64b0e5c9a6970d8ba444c6d182d6f4867541da9a993512' - '5dcd51530db4253a3787d49be34988fbe2d240e50f8771123b853bf3413d26fd3a442580f89c009f15dfc48521279bd2302be1eaa329726330c0cf964e3ea6b2') + '5dcd51530db4253a3787d49be34988fbe2d240e50f8771123b853bf3413d26fd3a442580f89c009f15dfc48521279bd2302be1eaa329726330c0cf964e3ea6b2' + 'b109cfd804e316115c657ef0ca00da5d0e8f957ea0cbebd4c8ba11790bf941319ea0e601b81495ed2ebfd2ae9b5fdbb3b2795ca48db7392c7335c71c4a88f652' + 'c52f9320dcd59e6f21070a3d13255572ae8bf2e4bc13d989adf8e344413e4bea3d089f5d0306e7fa8050999eb81680638b3d4feef9e7aa29a0132830784e39ef') validpgpkeys=('CEACC9E15534EBABB82D3FA03353C9CEF108B584') # Michael Roth _qemu_system_deps=( @@ -271,6 +277,8 @@ _pick() { prepare() { # fix crash with static binaries: https://gitlab.com/qemu-project/qemu/-/issues/1913 patch -Np1 -d $pkgbase-$pkgver -i ../$pkgbase-8.1.1-static_regression.patch + patch -p1 -d $pkgbase-$pkgver -i ../qemu-kvm-la64.patch + patch -p1 -d $pkgbase-$pkgver -i ../qemu-4k-pagesize.patch # fix virtio-gpu redrawingi issues: https://gitlab.com/qemu-project/qemu/-/issues/2051 patch -Np1 -d $pkgbase-$pkgver -i ../$pkgbase-8.2.0-virtio-gpu_redrawing.patch @@ -305,6 +313,7 @@ build() { --enable-sdl --enable-slirp --enable-tpm + --enable-kvm --smbd=/usr/bin/smbd --with-coroutine=ucontext ) @@ -822,7 +831,7 @@ package_qemu-system-hppa-firmware() { package_qemu-system-loongarch64() { pkgdesc="QEMU system emulator for LoongArch64" - depends=("${_qemu_system_deps[@]}" systemd-libs) + depends=("${_qemu_system_deps[@]}" edk2-loongarch64 systemd-libs) mv -v $pkgname/* "$pkgdir" } diff --git a/qemu/qemu-4k-pagesize.patch b/qemu/qemu-4k-pagesize.patch new file mode 100644 index 0000000000..eb6d8f4bd4 --- /dev/null +++ b/qemu/qemu-4k-pagesize.patch @@ -0,0 +1,58 @@ +commit 281001f601e289d7c807de73ad0e70761938977b +Author: Song Gao +Date: Mon Oct 23 10:40:59 2023 +0800 + + target/loongarch: Support 4K page size + + The LoongArch kernel supports 4K page size. + Change TARGET_PAGE_BITS to 12. + + Signed-off-by: Song Gao + Message-Id: <20231023024059.3858349-1-gaosong@loongson.cn> + +diff --git a/target/loongarch/cpu-param.h b/target/loongarch/cpu-param.h +index 1265dc7cb5..cfe195db4e 100644 +--- a/target/loongarch/cpu-param.h ++++ b/target/loongarch/cpu-param.h +@@ -12,6 +12,6 @@ + #define TARGET_PHYS_ADDR_SPACE_BITS 48 + #define TARGET_VIRT_ADDR_SPACE_BITS 48 + +-#define TARGET_PAGE_BITS 14 ++#define TARGET_PAGE_BITS 12 + + #endif +diff --git a/target/loongarch/tlb_helper.c b/target/loongarch/tlb_helper.c +index 6e00190547..903bb76bda 100644 +--- a/target/loongarch/tlb_helper.c ++++ b/target/loongarch/tlb_helper.c +@@ -53,6 +53,9 @@ static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical, + tlb_nr = FIELD_EX64(tlb_entry, TLBENTRY, NR); + tlb_rplv = FIELD_EX64(tlb_entry, TLBENTRY, RPLV); + ++ /* Remove sw bit between bit12 -- bit PS*/ ++ tlb_ppn = tlb_ppn & ~(((0x1UL << (tlb_ps - 12)) -1)); ++ + /* Check access rights */ + if (!tlb_v) { + return TLBRET_INVALID; +@@ -75,10 +78,6 @@ static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical, + return TLBRET_DIRTY; + } + +- /* +- * tlb_entry contains ppn[47:12] while 16KiB ppn is [47:15] +- * need adjust. +- */ + *physical = (tlb_ppn << R_TLBENTRY_PPN_SHIFT) | + (address & MAKE_64BIT_MASK(0, tlb_ps)); + *prot = PAGE_READ; +@@ -734,7 +733,7 @@ void helper_ldpte(CPULoongArchState *env, target_ulong base, target_ulong odd, + /* Move Global bit */ + tmp0 = ((tmp0 & (1 << LOONGARCH_HGLOBAL_SHIFT)) >> + LOONGARCH_HGLOBAL_SHIFT) << R_TLBENTRY_G_SHIFT | +- (tmp0 & (~(1 << R_TLBENTRY_G_SHIFT))); ++ (tmp0 & (~(1 << LOONGARCH_HGLOBAL_SHIFT))); + ps = ptbase + ptwidth - 1; + if (odd) { + tmp0 += MAKE_64BIT_MASK(ps, 1); diff --git a/qemu/qemu-kvm-la64.patch b/qemu/qemu-kvm-la64.patch new file mode 100644 index 0000000000..6340a698b1 --- /dev/null +++ b/qemu/qemu-kvm-la64.patch @@ -0,0 +1,1680 @@ +diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h +index 35c131a107..6b9793842c 100644 +--- a/include/standard-headers/linux/fuse.h ++++ b/include/standard-headers/linux/fuse.h +@@ -206,6 +206,11 @@ + * - add extension header + * - add FUSE_EXT_GROUPS + * - add FUSE_CREATE_SUPP_GROUP ++ * - add FUSE_HAS_EXPIRE_ONLY ++ * ++ * 7.39 ++ * - add FUSE_DIRECT_IO_RELAX ++ * - add FUSE_STATX and related structures + */ + + #ifndef _LINUX_FUSE_H +@@ -237,7 +242,7 @@ + #define FUSE_KERNEL_VERSION 7 + + /** Minor version number of this interface */ +-#define FUSE_KERNEL_MINOR_VERSION 38 ++#define FUSE_KERNEL_MINOR_VERSION 39 + + /** The node ID of the root inode */ + #define FUSE_ROOT_ID 1 +@@ -264,6 +269,40 @@ struct fuse_attr { + uint32_t flags; + }; + ++/* ++ * The following structures are bit-for-bit compatible with the statx(2) ABI in ++ * Linux. ++ */ ++struct fuse_sx_time { ++ int64_t tv_sec; ++ uint32_t tv_nsec; ++ int32_t __reserved; ++}; ++ ++struct fuse_statx { ++ uint32_t mask; ++ uint32_t blksize; ++ uint64_t attributes; ++ uint32_t nlink; ++ uint32_t uid; ++ uint32_t gid; ++ uint16_t mode; ++ uint16_t __spare0[1]; ++ uint64_t ino; ++ uint64_t size; ++ uint64_t blocks; ++ uint64_t attributes_mask; ++ struct fuse_sx_time atime; ++ struct fuse_sx_time btime; ++ struct fuse_sx_time ctime; ++ struct fuse_sx_time mtime; ++ uint32_t rdev_major; ++ uint32_t rdev_minor; ++ uint32_t dev_major; ++ uint32_t dev_minor; ++ uint64_t __spare2[14]; ++}; ++ + struct fuse_kstatfs { + uint64_t blocks; + uint64_t bfree; +@@ -365,6 +404,9 @@ struct fuse_file_lock { + * FUSE_HAS_INODE_DAX: use per inode DAX + * FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir, + * symlink and mknod (single group that matches parent) ++ * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation ++ * FUSE_DIRECT_IO_RELAX: relax restrictions in FOPEN_DIRECT_IO mode, for now ++ * allow shared mmap + */ + #define FUSE_ASYNC_READ (1 << 0) + #define FUSE_POSIX_LOCKS (1 << 1) +@@ -402,6 +444,8 @@ struct fuse_file_lock { + #define FUSE_SECURITY_CTX (1ULL << 32) + #define FUSE_HAS_INODE_DAX (1ULL << 33) + #define FUSE_CREATE_SUPP_GROUP (1ULL << 34) ++#define FUSE_HAS_EXPIRE_ONLY (1ULL << 35) ++#define FUSE_DIRECT_IO_RELAX (1ULL << 36) + + /** + * CUSE INIT request/reply flags +@@ -568,6 +612,7 @@ enum fuse_opcode { + FUSE_REMOVEMAPPING = 49, + FUSE_SYNCFS = 50, + FUSE_TMPFILE = 51, ++ FUSE_STATX = 52, + + /* CUSE specific operations */ + CUSE_INIT = 4096, +@@ -632,6 +677,22 @@ struct fuse_attr_out { + struct fuse_attr attr; + }; + ++struct fuse_statx_in { ++ uint32_t getattr_flags; ++ uint32_t reserved; ++ uint64_t fh; ++ uint32_t sx_flags; ++ uint32_t sx_mask; ++}; ++ ++struct fuse_statx_out { ++ uint64_t attr_valid; /* Cache timeout for the attributes */ ++ uint32_t attr_valid_nsec; ++ uint32_t flags; ++ uint64_t spare[2]; ++ struct fuse_statx stat; ++}; ++ + #define FUSE_COMPAT_MKNOD_IN_SIZE 8 + + struct fuse_mknod_in { +diff --git a/include/standard-headers/linux/vhost_types.h b/include/standard-headers/linux/vhost_types.h +index 6691a3ce24..5ad07e134a 100644 +--- a/include/standard-headers/linux/vhost_types.h ++++ b/include/standard-headers/linux/vhost_types.h +@@ -181,5 +181,9 @@ struct vhost_vdpa_iova_range { + #define VHOST_BACKEND_F_SUSPEND 0x4 + /* Device can be resumed */ + #define VHOST_BACKEND_F_RESUME 0x5 ++/* Device supports the driver enabling virtqueues both before and after ++ * DRIVER_OK ++ */ ++#define VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK 0x6 + + #endif +diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h +index 2325485f2c..0f88417742 100644 +--- a/include/standard-headers/linux/virtio_net.h ++++ b/include/standard-headers/linux/virtio_net.h +@@ -56,6 +56,7 @@ + #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow + * Steering */ + #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ ++#define VIRTIO_NET_F_VQ_NOTF_COAL 52 /* Device supports virtqueue notification coalescing */ + #define VIRTIO_NET_F_NOTF_COAL 53 /* Device supports notifications coalescing */ + #define VIRTIO_NET_F_GUEST_USO4 54 /* Guest can handle USOv4 in. */ + #define VIRTIO_NET_F_GUEST_USO6 55 /* Guest can handle USOv6 in. */ +@@ -391,5 +392,18 @@ struct virtio_net_ctrl_coal_rx { + }; + + #define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET 1 ++#define VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET 2 ++#define VIRTIO_NET_CTRL_NOTF_COAL_VQ_GET 3 ++ ++struct virtio_net_ctrl_coal { ++ uint32_t max_packets; ++ uint32_t max_usecs; ++}; ++ ++struct virtio_net_ctrl_coal_vq { ++ uint16_t vqn; ++ uint16_t reserved; ++ struct virtio_net_ctrl_coal coal; ++}; + + #endif /* _LINUX_VIRTIO_NET_H */ +diff --git a/linux-headers/asm-arm64/bitsperlong.h b/linux-headers/asm-arm64/bitsperlong.h +index 6dc0bb0c13..485d60bee2 100644 +--- a/linux-headers/asm-arm64/bitsperlong.h ++++ b/linux-headers/asm-arm64/bitsperlong.h +@@ -1 +1,24 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * Copyright (C) 2012 ARM Ltd. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program. If not, see . ++ */ ++#ifndef __ASM_BITSPERLONG_H ++#define __ASM_BITSPERLONG_H ++ ++#define __BITS_PER_LONG 64 ++ + #include ++ ++#endif /* __ASM_BITSPERLONG_H */ +diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h +index fd6c1cb585..abe087c53b 100644 +--- a/linux-headers/asm-generic/unistd.h ++++ b/linux-headers/asm-generic/unistd.h +@@ -820,8 +820,11 @@ __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) + #define __NR_cachestat 451 + __SYSCALL(__NR_cachestat, sys_cachestat) + ++#define __NR_fchmodat2 452 ++__SYSCALL(__NR_fchmodat2, sys_fchmodat2) ++ + #undef __NR_syscalls +-#define __NR_syscalls 452 ++#define __NR_syscalls 453 + + /* + * 32 bit systems traditionally used different +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +new file mode 100644 +index 0000000000..5e72b83372 +--- /dev/null ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -0,0 +1,100 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * Copyright (C) 2023 Loongson Technology Corporation Limited ++ */ ++ ++#ifndef __UAPI_ASM_LOONGARCH_KVM_H ++#define __UAPI_ASM_LOONGARCH_KVM_H ++ ++#include ++ ++/* ++ * KVM Loongarch specific structures and definitions. ++ */ ++ ++#define __KVM_HAVE_READONLY_MEM ++ ++#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 ++ ++/* ++ * for KVM_GET_REGS and KVM_SET_REGS ++ */ ++struct kvm_regs { ++ /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */ ++ __u64 gpr[32]; ++ __u64 pc; ++}; ++ ++/* ++ * for KVM_GET_FPU and KVM_SET_FPU ++ */ ++struct kvm_fpu { ++ __u32 fcsr; ++ __u32 none; ++ __u64 fcc; /* 8x8 */ ++ struct kvm_fpureg { ++ __u64 val64[4]; ++ } fpr[32]; ++}; ++ ++/* ++ * For LoongArch, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access various ++ * registers. The id field is broken down as follows: ++ * ++ * bits[63..52] - As per linux/kvm.h ++ * bits[51..32] - Must be zero. ++ * bits[31..16] - Register set. ++ * ++ * Register set = 0: GP registers from kvm_regs (see definitions below). ++ * ++ * Register set = 1: CSR registers. ++ * ++ * Register set = 2: KVM specific registers (see definitions below). ++ * ++ * Register set = 3: FPU / SIMD registers (see definitions below). ++ * ++ * Other sets registers may be added in the future. Each set would ++ * have its own identifier in bits[31..16]. ++ */ ++ ++#define KVM_REG_LOONGARCH_GP (KVM_REG_LOONGARCH | 0x00000ULL) ++#define KVM_REG_LOONGARCH_CSR (KVM_REG_LOONGARCH | 0x10000ULL) ++#define KVM_REG_LOONGARCH_KVM (KVM_REG_LOONGARCH | 0x20000ULL) ++#define KVM_REG_LOONGARCH_FPU (KVM_REG_LOONGARCH | 0x30000ULL) ++#define KVM_REG_LOONGARCH_CPUCFG (KVM_REG_LOONGARCH | 0x40000ULL) ++#define KVM_REG_LOONGARCH_MASK (KVM_REG_LOONGARCH | 0x70000ULL) ++#define KVM_CSR_IDX_MASK 0x7fff ++#define KVM_CPUCFG_IDX_MASK 0x7fff ++ ++/* ++ * KVM_REG_LOONGARCH_KVM - KVM specific control registers. ++ */ ++ ++#define KVM_REG_LOONGARCH_COUNTER (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3) ++#define KVM_REG_LOONGARCH_VCPU_RESET (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 4) ++ ++#define LOONGARCH_REG_SHIFT 3 ++#define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) ++#define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG) ++#define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG) ++ ++struct kvm_debug_exit_arch { ++}; ++ ++/* for KVM_SET_GUEST_DEBUG */ ++struct kvm_guest_debug_arch { ++}; ++ ++/* definition of registers in kvm_run */ ++struct kvm_sync_regs { ++}; ++ ++/* dummy definition */ ++struct kvm_sregs { ++}; ++ ++#define KVM_NR_IRQCHIPS 1 ++#define KVM_IRQCHIP_NUM_PINS 64 ++#define KVM_MAX_CORES 256 ++ ++#endif /* __UAPI_ASM_LOONGARCH_KVM_H */ +diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h +index 8233f061c4..46d8500654 100644 +--- a/linux-headers/asm-mips/unistd_n32.h ++++ b/linux-headers/asm-mips/unistd_n32.h +@@ -380,5 +380,6 @@ + #define __NR_futex_waitv (__NR_Linux + 449) + #define __NR_set_mempolicy_home_node (__NR_Linux + 450) + #define __NR_cachestat (__NR_Linux + 451) ++#define __NR_fchmodat2 (__NR_Linux + 452) + + #endif /* _ASM_UNISTD_N32_H */ +diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h +index a174edc768..c2f7ac673b 100644 +--- a/linux-headers/asm-mips/unistd_n64.h ++++ b/linux-headers/asm-mips/unistd_n64.h +@@ -356,5 +356,6 @@ + #define __NR_futex_waitv (__NR_Linux + 449) + #define __NR_set_mempolicy_home_node (__NR_Linux + 450) + #define __NR_cachestat (__NR_Linux + 451) ++#define __NR_fchmodat2 (__NR_Linux + 452) + + #endif /* _ASM_UNISTD_N64_H */ +diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h +index c1a5351d9b..757c68f2ad 100644 +--- a/linux-headers/asm-mips/unistd_o32.h ++++ b/linux-headers/asm-mips/unistd_o32.h +@@ -426,5 +426,6 @@ + #define __NR_futex_waitv (__NR_Linux + 449) + #define __NR_set_mempolicy_home_node (__NR_Linux + 450) + #define __NR_cachestat (__NR_Linux + 451) ++#define __NR_fchmodat2 (__NR_Linux + 452) + + #endif /* _ASM_UNISTD_O32_H */ +diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h +index 8206758691..8ef94bbac1 100644 +--- a/linux-headers/asm-powerpc/unistd_32.h ++++ b/linux-headers/asm-powerpc/unistd_32.h +@@ -433,6 +433,7 @@ + #define __NR_futex_waitv 449 + #define __NR_set_mempolicy_home_node 450 + #define __NR_cachestat 451 ++#define __NR_fchmodat2 452 + + + #endif /* _ASM_UNISTD_32_H */ +diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h +index 7be98c15f0..0e7ee43e88 100644 +--- a/linux-headers/asm-powerpc/unistd_64.h ++++ b/linux-headers/asm-powerpc/unistd_64.h +@@ -405,6 +405,7 @@ + #define __NR_futex_waitv 449 + #define __NR_set_mempolicy_home_node 450 + #define __NR_cachestat 451 ++#define __NR_fchmodat2 452 + + + #endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-riscv/bitsperlong.h b/linux-headers/asm-riscv/bitsperlong.h +index 6dc0bb0c13..cc5c45a9ce 100644 +--- a/linux-headers/asm-riscv/bitsperlong.h ++++ b/linux-headers/asm-riscv/bitsperlong.h +@@ -1 +1,14 @@ ++/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ ++/* ++ * Copyright (C) 2012 ARM Ltd. ++ * Copyright (C) 2015 Regents of the University of California ++ */ ++ ++#ifndef _ASM_RISCV_BITSPERLONG_H ++#define _ASM_RISCV_BITSPERLONG_H ++ ++#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8) ++ + #include ++ ++#endif /* _ASM_RISCV_BITSPERLONG_H */ +diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h +index 930fdc4101..992c5e4071 100644 +--- a/linux-headers/asm-riscv/kvm.h ++++ b/linux-headers/asm-riscv/kvm.h +@@ -55,6 +55,7 @@ struct kvm_riscv_config { + unsigned long marchid; + unsigned long mimpid; + unsigned long zicboz_block_size; ++ unsigned long satp_mode; + }; + + /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +@@ -124,6 +125,12 @@ enum KVM_RISCV_ISA_EXT_ID { + KVM_RISCV_ISA_EXT_SSAIA, + KVM_RISCV_ISA_EXT_V, + KVM_RISCV_ISA_EXT_SVNAPOT, ++ KVM_RISCV_ISA_EXT_ZBA, ++ KVM_RISCV_ISA_EXT_ZBS, ++ KVM_RISCV_ISA_EXT_ZICNTR, ++ KVM_RISCV_ISA_EXT_ZICSR, ++ KVM_RISCV_ISA_EXT_ZIFENCEI, ++ KVM_RISCV_ISA_EXT_ZIHPM, + KVM_RISCV_ISA_EXT_MAX, + }; + +@@ -193,6 +200,15 @@ enum KVM_RISCV_SBI_EXT_ID { + + /* ISA Extension registers are mapped as type 7 */ + #define KVM_REG_RISCV_ISA_EXT (0x07 << KVM_REG_RISCV_TYPE_SHIFT) ++#define KVM_REG_RISCV_ISA_SINGLE (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT) ++#define KVM_REG_RISCV_ISA_MULTI_EN (0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT) ++#define KVM_REG_RISCV_ISA_MULTI_DIS (0x2 << KVM_REG_RISCV_SUBTYPE_SHIFT) ++#define KVM_REG_RISCV_ISA_MULTI_REG(__ext_id) \ ++ ((__ext_id) / __BITS_PER_LONG) ++#define KVM_REG_RISCV_ISA_MULTI_MASK(__ext_id) \ ++ (1UL << ((__ext_id) % __BITS_PER_LONG)) ++#define KVM_REG_RISCV_ISA_MULTI_REG_LAST \ ++ KVM_REG_RISCV_ISA_MULTI_REG(KVM_RISCV_ISA_EXT_MAX - 1) + + /* SBI extension registers are mapped as type 8 */ + #define KVM_REG_RISCV_SBI_EXT (0x08 << KVM_REG_RISCV_TYPE_SHIFT) +diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h +index e2afd95420..023a2763a9 100644 +--- a/linux-headers/asm-s390/kvm.h ++++ b/linux-headers/asm-s390/kvm.h +@@ -159,6 +159,22 @@ struct kvm_s390_vm_cpu_subfunc { + __u8 reserved[1728]; + }; + ++#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST 6 ++#define KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST 7 ++ ++#define KVM_S390_VM_CPU_UV_FEAT_NR_BITS 64 ++struct kvm_s390_vm_cpu_uv_feat { ++ union { ++ struct { ++ __u64 : 4; ++ __u64 ap : 1; /* bit 4 */ ++ __u64 ap_intr : 1; /* bit 5 */ ++ __u64 : 58; ++ }; ++ __u64 feat; ++ }; ++}; ++ + /* kvm attributes for crypto */ + #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0 + #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1 +diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h +index ef772cc5f8..716fa368ca 100644 +--- a/linux-headers/asm-s390/unistd_32.h ++++ b/linux-headers/asm-s390/unistd_32.h +@@ -424,5 +424,6 @@ + #define __NR_futex_waitv 449 + #define __NR_set_mempolicy_home_node 450 + #define __NR_cachestat 451 ++#define __NR_fchmodat2 452 + + #endif /* _ASM_S390_UNISTD_32_H */ +diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h +index 32354a0459..b2a11b1d13 100644 +--- a/linux-headers/asm-s390/unistd_64.h ++++ b/linux-headers/asm-s390/unistd_64.h +@@ -372,5 +372,6 @@ + #define __NR_futex_waitv 449 + #define __NR_set_mempolicy_home_node 450 + #define __NR_cachestat 451 ++#define __NR_fchmodat2 452 + + #endif /* _ASM_S390_UNISTD_64_H */ +diff --git a/linux-headers/asm-x86/mman.h b/linux-headers/asm-x86/mman.h +index 775dbd3aff..46cdc941f9 100644 +--- a/linux-headers/asm-x86/mman.h ++++ b/linux-headers/asm-x86/mman.h +@@ -3,14 +3,10 @@ + #define _ASM_X86_MMAN_H + + #define MAP_32BIT 0x40 /* only give out 32bit addresses */ ++#define MAP_ABOVE4G 0x80 /* only map above 4GB */ + +-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +-#define arch_calc_vm_prot_bits(prot, key) ( \ +- ((key) & 0x1 ? VM_PKEY_BIT0 : 0) | \ +- ((key) & 0x2 ? VM_PKEY_BIT1 : 0) | \ +- ((key) & 0x4 ? VM_PKEY_BIT2 : 0) | \ +- ((key) & 0x8 ? VM_PKEY_BIT3 : 0)) +-#endif ++/* Flags for map_shadow_stack(2) */ ++#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */ + + #include + +diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h +index 37b32d8139..d749ad1c24 100644 +--- a/linux-headers/asm-x86/unistd_32.h ++++ b/linux-headers/asm-x86/unistd_32.h +@@ -442,6 +442,7 @@ + #define __NR_futex_waitv 449 + #define __NR_set_mempolicy_home_node 450 + #define __NR_cachestat 451 ++#define __NR_fchmodat2 452 + + + #endif /* _ASM_UNISTD_32_H */ +diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h +index 5b55d6729a..cea67282eb 100644 +--- a/linux-headers/asm-x86/unistd_64.h ++++ b/linux-headers/asm-x86/unistd_64.h +@@ -364,6 +364,8 @@ + #define __NR_futex_waitv 449 + #define __NR_set_mempolicy_home_node 450 + #define __NR_cachestat 451 ++#define __NR_fchmodat2 452 ++#define __NR_map_shadow_stack 453 + + + #endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h +index e8a007543d..5b2e79bf4c 100644 +--- a/linux-headers/asm-x86/unistd_x32.h ++++ b/linux-headers/asm-x86/unistd_x32.h +@@ -317,6 +317,7 @@ + #define __NR_futex_waitv (__X32_SYSCALL_BIT + 449) + #define __NR_set_mempolicy_home_node (__X32_SYSCALL_BIT + 450) + #define __NR_cachestat (__X32_SYSCALL_BIT + 451) ++#define __NR_fchmodat2 (__X32_SYSCALL_BIT + 452) + #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) + #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) + #define __NR_ioctl (__X32_SYSCALL_BIT + 514) +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 1f3f3333a4..0e378bbcbf 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -264,6 +264,7 @@ struct kvm_xen_exit { + #define KVM_EXIT_RISCV_SBI 35 + #define KVM_EXIT_RISCV_CSR 36 + #define KVM_EXIT_NOTIFY 37 ++#define KVM_EXIT_LOONGARCH_IOCSR 38 + + /* For KVM_EXIT_INTERNAL_ERROR */ + /* Emulate instruction failed. */ +@@ -336,6 +337,13 @@ struct kvm_run { + __u32 len; + __u8 is_write; + } mmio; ++ /* KVM_EXIT_LOONGARCH_IOCSR */ ++ struct { ++ __u64 phys_addr; ++ __u8 data[8]; ++ __u32 len; ++ __u8 is_write; ++ } iocsr_io; + /* KVM_EXIT_HYPERCALL */ + struct { + __u64 nr; +@@ -1358,6 +1366,7 @@ struct kvm_dirty_tlb { + #define KVM_REG_ARM64 0x6000000000000000ULL + #define KVM_REG_MIPS 0x7000000000000000ULL + #define KVM_REG_RISCV 0x8000000000000000ULL ++#define KVM_REG_LOONGARCH 0x9000000000000000ULL + + #define KVM_REG_SIZE_SHIFT 52 + #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL +@@ -1414,9 +1423,16 @@ struct kvm_device_attr { + __u64 addr; /* userspace address of attr data */ + }; + +-#define KVM_DEV_VFIO_GROUP 1 +-#define KVM_DEV_VFIO_GROUP_ADD 1 +-#define KVM_DEV_VFIO_GROUP_DEL 2 ++#define KVM_DEV_VFIO_FILE 1 ++ ++#define KVM_DEV_VFIO_FILE_ADD 1 ++#define KVM_DEV_VFIO_FILE_DEL 2 ++ ++/* KVM_DEV_VFIO_GROUP aliases are for compile time uapi compatibility */ ++#define KVM_DEV_VFIO_GROUP KVM_DEV_VFIO_FILE ++ ++#define KVM_DEV_VFIO_GROUP_ADD KVM_DEV_VFIO_FILE_ADD ++#define KVM_DEV_VFIO_GROUP_DEL KVM_DEV_VFIO_FILE_DEL + #define KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE 3 + + enum kvm_device_type { +diff --git a/linux-headers/linux/stddef.h b/linux-headers/linux/stddef.h +index bb6ea517ef..9bb07083ac 100644 +--- a/linux-headers/linux/stddef.h ++++ b/linux-headers/linux/stddef.h +@@ -45,3 +45,7 @@ + TYPE NAME[]; \ + } + #endif ++ ++#ifndef __counted_by ++#define __counted_by(m) ++#endif +diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h +index 14e402263a..59978fbaae 100644 +--- a/linux-headers/linux/userfaultfd.h ++++ b/linux-headers/linux/userfaultfd.h +@@ -39,7 +39,8 @@ + UFFD_FEATURE_MINOR_SHMEM | \ + UFFD_FEATURE_EXACT_ADDRESS | \ + UFFD_FEATURE_WP_HUGETLBFS_SHMEM | \ +- UFFD_FEATURE_WP_UNPOPULATED) ++ UFFD_FEATURE_WP_UNPOPULATED | \ ++ UFFD_FEATURE_POISON) + #define UFFD_API_IOCTLS \ + ((__u64)1 << _UFFDIO_REGISTER | \ + (__u64)1 << _UFFDIO_UNREGISTER | \ +@@ -49,12 +50,14 @@ + (__u64)1 << _UFFDIO_COPY | \ + (__u64)1 << _UFFDIO_ZEROPAGE | \ + (__u64)1 << _UFFDIO_WRITEPROTECT | \ +- (__u64)1 << _UFFDIO_CONTINUE) ++ (__u64)1 << _UFFDIO_CONTINUE | \ ++ (__u64)1 << _UFFDIO_POISON) + #define UFFD_API_RANGE_IOCTLS_BASIC \ + ((__u64)1 << _UFFDIO_WAKE | \ + (__u64)1 << _UFFDIO_COPY | \ ++ (__u64)1 << _UFFDIO_WRITEPROTECT | \ + (__u64)1 << _UFFDIO_CONTINUE | \ +- (__u64)1 << _UFFDIO_WRITEPROTECT) ++ (__u64)1 << _UFFDIO_POISON) + + /* + * Valid ioctl command number range with this API is from 0x00 to +@@ -71,6 +74,7 @@ + #define _UFFDIO_ZEROPAGE (0x04) + #define _UFFDIO_WRITEPROTECT (0x06) + #define _UFFDIO_CONTINUE (0x07) ++#define _UFFDIO_POISON (0x08) + #define _UFFDIO_API (0x3F) + + /* userfaultfd ioctl ids */ +@@ -91,6 +95,8 @@ + struct uffdio_writeprotect) + #define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \ + struct uffdio_continue) ++#define UFFDIO_POISON _IOWR(UFFDIO, _UFFDIO_POISON, \ ++ struct uffdio_poison) + + /* read() structure */ + struct uffd_msg { +@@ -225,6 +231,7 @@ struct uffdio_api { + #define UFFD_FEATURE_EXACT_ADDRESS (1<<11) + #define UFFD_FEATURE_WP_HUGETLBFS_SHMEM (1<<12) + #define UFFD_FEATURE_WP_UNPOPULATED (1<<13) ++#define UFFD_FEATURE_POISON (1<<14) + __u64 features; + + __u64 ioctls; +@@ -321,6 +328,18 @@ struct uffdio_continue { + __s64 mapped; + }; + ++struct uffdio_poison { ++ struct uffdio_range range; ++#define UFFDIO_POISON_MODE_DONTWAKE ((__u64)1<<0) ++ __u64 mode; ++ ++ /* ++ * Fields below here are written by the ioctl and must be at the end: ++ * the copy_from_user will not read past here. ++ */ ++ __s64 updated; ++}; ++ + /* + * Flags for the userfaultfd(2) system call itself. + */ +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index 16db89071e..acf72b4999 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -217,6 +217,7 @@ struct vfio_device_info { + __u32 num_regions; /* Max region index + 1 */ + __u32 num_irqs; /* Max IRQ index + 1 */ + __u32 cap_offset; /* Offset within info struct of first cap */ ++ __u32 pad; + }; + #define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7) + +@@ -677,11 +678,60 @@ enum { + * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 12, + * struct vfio_pci_hot_reset_info) + * ++ * This command is used to query the affected devices in the hot reset for ++ * a given device. ++ * ++ * This command always reports the segment, bus, and devfn information for ++ * each affected device, and selectively reports the group_id or devid per ++ * the way how the calling device is opened. ++ * ++ * - If the calling device is opened via the traditional group/container ++ * API, group_id is reported. User should check if it has owned all ++ * the affected devices and provides a set of group fds to prove the ++ * ownership in VFIO_DEVICE_PCI_HOT_RESET ioctl. ++ * ++ * - If the calling device is opened as a cdev, devid is reported. ++ * Flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is set to indicate this ++ * data type. All the affected devices should be represented in ++ * the dev_set, ex. bound to a vfio driver, and also be owned by ++ * this interface which is determined by the following conditions: ++ * 1) Has a valid devid within the iommufd_ctx of the calling device. ++ * Ownership cannot be determined across separate iommufd_ctx and ++ * the cdev calling conventions do not support a proof-of-ownership ++ * model as provided in the legacy group interface. In this case ++ * valid devid with value greater than zero is provided in the return ++ * structure. ++ * 2) Does not have a valid devid within the iommufd_ctx of the calling ++ * device, but belongs to the same IOMMU group as the calling device ++ * or another opened device that has a valid devid within the ++ * iommufd_ctx of the calling device. This provides implicit ownership ++ * for devices within the same DMA isolation context. In this case ++ * the devid value of VFIO_PCI_DEVID_OWNED is provided in the return ++ * structure. ++ * ++ * A devid value of VFIO_PCI_DEVID_NOT_OWNED is provided in the return ++ * structure for affected devices where device is NOT represented in the ++ * dev_set or ownership is not available. Such devices prevent the use ++ * of VFIO_DEVICE_PCI_HOT_RESET ioctl outside of the proof-of-ownership ++ * calling conventions (ie. via legacy group accessed devices). Flag ++ * VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED would be set when all the ++ * affected devices are represented in the dev_set and also owned by ++ * the user. This flag is available only when ++ * flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is set, otherwise reserved. ++ * When set, user could invoke VFIO_DEVICE_PCI_HOT_RESET with a zero ++ * length fd array on the calling device as the ownership is validated ++ * by iommufd_ctx. ++ * + * Return: 0 on success, -errno on failure: + * -enospc = insufficient buffer, -enodev = unsupported for device. + */ + struct vfio_pci_dependent_device { +- __u32 group_id; ++ union { ++ __u32 group_id; ++ __u32 devid; ++#define VFIO_PCI_DEVID_OWNED 0 ++#define VFIO_PCI_DEVID_NOT_OWNED -1 ++ }; + __u16 segment; + __u8 bus; + __u8 devfn; /* Use PCI_SLOT/PCI_FUNC */ +@@ -690,6 +740,8 @@ struct vfio_pci_dependent_device { + struct vfio_pci_hot_reset_info { + __u32 argsz; + __u32 flags; ++#define VFIO_PCI_HOT_RESET_FLAG_DEV_ID (1 << 0) ++#define VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED (1 << 1) + __u32 count; + struct vfio_pci_dependent_device devices[]; + }; +@@ -700,6 +752,24 @@ struct vfio_pci_hot_reset_info { + * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13, + * struct vfio_pci_hot_reset) + * ++ * A PCI hot reset results in either a bus or slot reset which may affect ++ * other devices sharing the bus/slot. The calling user must have ++ * ownership of the full set of affected devices as determined by the ++ * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl. ++ * ++ * When called on a device file descriptor acquired through the vfio ++ * group interface, the user is required to provide proof of ownership ++ * of those affected devices via the group_fds array in struct ++ * vfio_pci_hot_reset. ++ * ++ * When called on a direct cdev opened vfio device, the flags field of ++ * struct vfio_pci_hot_reset_info reports the ownership status of the ++ * affected devices and this ioctl must be called with an empty group_fds ++ * array. See above INFO ioctl definition for ownership requirements. ++ * ++ * Mixed usage of legacy groups and cdevs across the set of affected ++ * devices is not supported. ++ * + * Return: 0 on success, -errno on failure. + */ + struct vfio_pci_hot_reset { +@@ -828,6 +898,83 @@ struct vfio_device_feature { + + #define VFIO_DEVICE_FEATURE _IO(VFIO_TYPE, VFIO_BASE + 17) + ++/* ++ * VFIO_DEVICE_BIND_IOMMUFD - _IOR(VFIO_TYPE, VFIO_BASE + 18, ++ * struct vfio_device_bind_iommufd) ++ * @argsz: User filled size of this data. ++ * @flags: Must be 0. ++ * @iommufd: iommufd to bind. ++ * @out_devid: The device id generated by this bind. devid is a handle for ++ * this device/iommufd bond and can be used in IOMMUFD commands. ++ * ++ * Bind a vfio_device to the specified iommufd. ++ * ++ * User is restricted from accessing the device before the binding operation ++ * is completed. Only allowed on cdev fds. ++ * ++ * Unbind is automatically conducted when device fd is closed. ++ * ++ * Return: 0 on success, -errno on failure. ++ */ ++struct vfio_device_bind_iommufd { ++ __u32 argsz; ++ __u32 flags; ++ __s32 iommufd; ++ __u32 out_devid; ++}; ++ ++#define VFIO_DEVICE_BIND_IOMMUFD _IO(VFIO_TYPE, VFIO_BASE + 18) ++ ++/* ++ * VFIO_DEVICE_ATTACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 19, ++ * struct vfio_device_attach_iommufd_pt) ++ * @argsz: User filled size of this data. ++ * @flags: Must be 0. ++ * @pt_id: Input the target id which can represent an ioas or a hwpt ++ * allocated via iommufd subsystem. ++ * Output the input ioas id or the attached hwpt id which could ++ * be the specified hwpt itself or a hwpt automatically created ++ * for the specified ioas by kernel during the attachment. ++ * ++ * Associate the device with an address space within the bound iommufd. ++ * Undo by VFIO_DEVICE_DETACH_IOMMUFD_PT or device fd close. This is only ++ * allowed on cdev fds. ++ * ++ * If a vfio device is currently attached to a valid hw_pagetable, without doing ++ * a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl ++ * passing in another hw_pagetable (hwpt) id is allowed. This action, also known ++ * as a hw_pagetable replacement, will replace the device's currently attached ++ * hw_pagetable with a new hw_pagetable corresponding to the given pt_id. ++ * ++ * Return: 0 on success, -errno on failure. ++ */ ++struct vfio_device_attach_iommufd_pt { ++ __u32 argsz; ++ __u32 flags; ++ __u32 pt_id; ++}; ++ ++#define VFIO_DEVICE_ATTACH_IOMMUFD_PT _IO(VFIO_TYPE, VFIO_BASE + 19) ++ ++/* ++ * VFIO_DEVICE_DETACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 20, ++ * struct vfio_device_detach_iommufd_pt) ++ * @argsz: User filled size of this data. ++ * @flags: Must be 0. ++ * ++ * Remove the association of the device and its current associated address ++ * space. After it, the device should be in a blocking DMA state. This is only ++ * allowed on cdev fds. ++ * ++ * Return: 0 on success, -errno on failure. ++ */ ++struct vfio_device_detach_iommufd_pt { ++ __u32 argsz; ++ __u32 flags; ++}; ++ ++#define VFIO_DEVICE_DETACH_IOMMUFD_PT _IO(VFIO_TYPE, VFIO_BASE + 20) ++ + /* + * Provide support for setting a PCI VF Token, which is used as a shared + * secret between PF and VF drivers. This feature may only be set on a +@@ -1304,6 +1451,7 @@ struct vfio_iommu_type1_info { + #define VFIO_IOMMU_INFO_CAPS (1 << 1) /* Info supports caps */ + __u64 iova_pgsizes; /* Bitmap of supported page sizes */ + __u32 cap_offset; /* Offset within info struct of first cap */ ++ __u32 pad; + }; + + /* +diff --git a/meson.build b/meson.build +index 98e68ef0b1..1e43c6e887 100644 +--- a/meson.build ++++ b/meson.build +@@ -114,6 +114,8 @@ elif cpu in ['riscv32'] + kvm_targets = ['riscv32-softmmu'] + elif cpu in ['riscv64'] + kvm_targets = ['riscv64-softmmu'] ++elif cpu in ['loongarch64'] ++ kvm_targets = ['loongarch64-softmmu'] + else + kvm_targets = [] + endif +@@ -3299,6 +3301,7 @@ if have_system or have_user + 'target/hppa', + 'target/i386', + 'target/i386/kvm', ++ 'target/loongarch', + 'target/mips/tcg', + 'target/nios2', + 'target/ppc', +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index ad93ecac92..138acb8100 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -18,6 +18,11 @@ + #include "cpu-csr.h" + #include "sysemu/reset.h" + #include "tcg/tcg.h" ++#include "sysemu/kvm.h" ++#include "kvm_loongarch.h" ++#ifdef CONFIG_KVM ++#include ++#endif + + const char * const regnames[32] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", +@@ -105,12 +110,15 @@ void loongarch_cpu_set_irq(void *opaque, int irq, int level) + return; + } + +- env->CSR_ESTAT = deposit64(env->CSR_ESTAT, irq, 1, level != 0); +- +- if (FIELD_EX64(env->CSR_ESTAT, CSR_ESTAT, IS)) { +- cpu_interrupt(cs, CPU_INTERRUPT_HARD); ++ if (kvm_enabled()) { ++ kvm_loongarch_set_interrupt(cpu, irq, level); + } else { +- cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); ++ env->CSR_ESTAT = deposit64(env->CSR_ESTAT, irq, 1, level != 0); ++ if (FIELD_EX64(env->CSR_ESTAT, CSR_ESTAT, IS)) { ++ cpu_interrupt(cs, CPU_INTERRUPT_HARD); ++ } else { ++ cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); ++ } + } + } + +@@ -489,10 +497,12 @@ static void loongarch_cpu_reset_hold(Object *obj) + + env->CSR_ESTAT = env->CSR_ESTAT & (~MAKE_64BIT_MASK(0, 2)); + env->CSR_RVACFG = FIELD_DP64(env->CSR_RVACFG, CSR_RVACFG, RBITS, 0); ++ env->CSR_CPUID = cs->cpu_index; + env->CSR_TCFG = FIELD_DP64(env->CSR_TCFG, CSR_TCFG, EN, 0); + env->CSR_LLBCTL = FIELD_DP64(env->CSR_LLBCTL, CSR_LLBCTL, KLO, 0); + env->CSR_TLBRERA = FIELD_DP64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR, 0); + env->CSR_MERRCTL = FIELD_DP64(env->CSR_MERRCTL, CSR_MERRCTL, ISMERR, 0); ++ env->CSR_TID = cs->cpu_index; + + env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, TLB_TYPE, 2); + env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, MTLB_ENTRY, 63); +@@ -509,6 +519,9 @@ static void loongarch_cpu_reset_hold(Object *obj) + #ifndef CONFIG_USER_ONLY + env->pc = 0x1c000000; + memset(env->tlb, 0, sizeof(env->tlb)); ++ if (kvm_enabled()) { ++ kvm_arch_reset_vcpu(env); ++ } + #endif + + restore_fp_status(env); +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index fa371ca8ba..6ae753def2 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -311,6 +311,7 @@ typedef struct CPUArchState { + uint64_t CSR_PWCH; + uint64_t CSR_STLBPS; + uint64_t CSR_RVACFG; ++ uint64_t CSR_CPUID; + uint64_t CSR_PRCFG1; + uint64_t CSR_PRCFG2; + uint64_t CSR_PRCFG3; +@@ -342,7 +343,6 @@ typedef struct CPUArchState { + uint64_t CSR_DBG; + uint64_t CSR_DERA; + uint64_t CSR_DSAVE; +- uint64_t CSR_CPUID; + + #ifndef CONFIG_USER_ONLY + LoongArchTLB tlb[LOONGARCH_TLB_MAX]; +@@ -352,6 +352,7 @@ typedef struct CPUArchState { + MemoryRegion iocsr_mem; + bool load_elf; + uint64_t elf_address; ++ uint32_t mp_state; + /* Store ipistate to access from this struct */ + DeviceState *ipistate; + #endif +@@ -374,6 +375,8 @@ struct ArchCPU { + + /* 'compatible' string for this CPU for Linux device trees */ + const char *dtb_compatible; ++ /* used by KVM_REG_LOONGARCH_COUNTER ioctl to access guest time counters */ ++ uint64_t kvm_state_counter; + }; + + #define TYPE_LOONGARCH_CPU "loongarch-cpu" +@@ -439,6 +442,7 @@ static inline void cpu_get_tb_cpu_state(CPULoongArchState *env, vaddr *pc, + } + + void loongarch_cpu_list(void); ++void kvm_arch_reset_vcpu(CPULoongArchState *env); + + #define cpu_list loongarch_cpu_list + +diff --git a/target/loongarch/kvm-stub.c b/target/loongarch/kvm-stub.c +new file mode 100644 +index 0000000000..04534f55b0 +--- /dev/null ++++ b/target/loongarch/kvm-stub.c +@@ -0,0 +1,14 @@ ++/* ++ * QEMU KVM LoongArch specific function stubs ++ * ++ * Copyright (c) 2023 Loongson Technology Corporation Limited ++ */ ++#include "qemu/osdep.h" ++#include "cpu.h" ++#include "kvm_loongarch.h" ++ ++int kvm_loongarch_set_interrupt(LoongArchCPU *cpu, int irq, int level) ++{ ++ g_assert_not_reached(); ++ return 0; ++} +diff --git a/target/loongarch/kvm.c b/target/loongarch/kvm.c +new file mode 100644 +index 0000000000..4dca207f6d +--- /dev/null ++++ b/target/loongarch/kvm.c +@@ -0,0 +1,588 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++/* ++ * QEMU LoongArch KVM ++ * ++ * Copyright (c) 2023 Loongson Technology Corporation Limited ++ */ ++ ++#include "qemu/osdep.h" ++#include ++#include ++ ++#include "qemu/timer.h" ++#include "qemu/error-report.h" ++#include "qemu/main-loop.h" ++#include "sysemu/sysemu.h" ++#include "sysemu/kvm.h" ++#include "sysemu/kvm_int.h" ++#include "hw/pci/pci.h" ++#include "exec/memattrs.h" ++#include "exec/address-spaces.h" ++#include "hw/boards.h" ++#include "hw/irq.h" ++#include "qemu/log.h" ++#include "hw/loader.h" ++#include "migration/migration.h" ++#include "sysemu/runstate.h" ++#include "cpu-csr.h" ++#include "kvm_loongarch.h" ++#include "trace.h" ++ ++static bool cap_has_mp_state; ++const KVMCapabilityInfo kvm_arch_required_capabilities[] = { ++ KVM_CAP_LAST_INFO ++}; ++ ++static int kvm_loongarch_get_regs_core(CPUState *cs) ++{ ++ int ret = 0; ++ int i; ++ struct kvm_regs regs; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ /* Get the current register set as KVM seems it */ ++ ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); ++ if (ret < 0) { ++ trace_kvm_failed_get_regs_core(strerror(errno)); ++ return ret; ++ } ++ /* gpr[0] value is always 0 */ ++ env->gpr[0] = 0; ++ for (i = 1; i < 32; i++) { ++ env->gpr[i] = regs.gpr[i]; ++ } ++ ++ env->pc = regs.pc; ++ return ret; ++} ++ ++static int kvm_loongarch_put_regs_core(CPUState *cs) ++{ ++ int ret = 0; ++ int i; ++ struct kvm_regs regs; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ /* Set the registers based on QEMU's view of things */ ++ for (i = 0; i < 32; i++) { ++ regs.gpr[i] = env->gpr[i]; ++ } ++ ++ regs.pc = env->pc; ++ ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); ++ if (ret < 0) { ++ trace_kvm_failed_put_regs_core(strerror(errno)); ++ } ++ ++ return ret; ++} ++ ++static int kvm_larch_getq(CPUState *cs, uint64_t reg_id, ++ uint64_t *addr) ++{ ++ struct kvm_one_reg csrreg = { ++ .id = reg_id, ++ .addr = (uintptr_t)addr ++ }; ++ ++ return kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &csrreg); ++} ++ ++static int kvm_larch_putq(CPUState *cs, uint64_t reg_id, ++ uint64_t *addr) ++{ ++ struct kvm_one_reg csrreg = { ++ .id = reg_id, ++ .addr = (uintptr_t)addr ++ }; ++ ++ return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &csrreg); ++} ++ ++#define KVM_GET_ONE_UREG64(cs, ret, regidx, addr) \ ++ ({ \ ++ err = kvm_larch_getq(cs, KVM_IOC_CSRID(regidx), addr); \ ++ if (err < 0) { \ ++ ret = err; \ ++ trace_kvm_failed_get_csr(regidx, strerror(errno)); \ ++ } \ ++ }) ++ ++#define KVM_PUT_ONE_UREG64(cs, ret, regidx, addr) \ ++ ({ \ ++ err = kvm_larch_putq(cs, KVM_IOC_CSRID(regidx), addr); \ ++ if (err < 0) { \ ++ ret = err; \ ++ trace_kvm_failed_put_csr(regidx, strerror(errno)); \ ++ } \ ++ }) ++ ++static int kvm_loongarch_get_csr(CPUState *cs) ++{ ++ int err, ret = 0; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_CRMD, &env->CSR_CRMD); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_PRMD, &env->CSR_PRMD); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_EUEN, &env->CSR_EUEN); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_MISC, &env->CSR_MISC); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_ECFG, &env->CSR_ECFG); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_ESTAT, &env->CSR_ESTAT); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_ERA, &env->CSR_ERA); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_BADV, &env->CSR_BADV); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_BADI, &env->CSR_BADI); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_EENTRY, &env->CSR_EENTRY); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBIDX, &env->CSR_TLBIDX); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBEHI, &env->CSR_TLBEHI); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBELO0, &env->CSR_TLBELO0); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBELO1, &env->CSR_TLBELO1); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_ASID, &env->CSR_ASID); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_PGDL, &env->CSR_PGDL); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_PGDH, &env->CSR_PGDH); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_PGD, &env->CSR_PGD); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_PWCL, &env->CSR_PWCL); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_PWCH, &env->CSR_PWCH); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_STLBPS, &env->CSR_STLBPS); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_RVACFG, &env->CSR_RVACFG); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_CPUID, &env->CSR_CPUID); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_PRCFG1, &env->CSR_PRCFG1); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_PRCFG2, &env->CSR_PRCFG2); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_PRCFG3, &env->CSR_PRCFG3); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(0), &env->CSR_SAVE[0]); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(1), &env->CSR_SAVE[1]); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(2), &env->CSR_SAVE[2]); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(3), &env->CSR_SAVE[3]); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(4), &env->CSR_SAVE[4]); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(5), &env->CSR_SAVE[5]); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(6), &env->CSR_SAVE[6]); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(7), &env->CSR_SAVE[7]); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TID, &env->CSR_TID); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_CNTC, &env->CSR_CNTC); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TICLR, &env->CSR_TICLR); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_LLBCTL, &env->CSR_LLBCTL); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_IMPCTL1, &env->CSR_IMPCTL1); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_IMPCTL2, &env->CSR_IMPCTL2); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRENTRY, &env->CSR_TLBRENTRY); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRBADV, &env->CSR_TLBRBADV); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRERA, &env->CSR_TLBRERA); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRSAVE, &env->CSR_TLBRSAVE); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRELO0, &env->CSR_TLBRELO0); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRELO1, &env->CSR_TLBRELO1); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBREHI, &env->CSR_TLBREHI); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRPRMD, &env->CSR_TLBRPRMD); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_DMW(0), &env->CSR_DMW[0]); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_DMW(1), &env->CSR_DMW[1]); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_DMW(2), &env->CSR_DMW[2]); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_DMW(3), &env->CSR_DMW[3]); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TVAL, &env->CSR_TVAL); ++ KVM_GET_ONE_UREG64(cs, ret, LOONGARCH_CSR_TCFG, &env->CSR_TCFG); ++ ++ return ret; ++} ++ ++static int kvm_loongarch_put_csr(CPUState *cs) ++{ ++ int err, ret = 0; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_CRMD, &env->CSR_CRMD); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_PRMD, &env->CSR_PRMD); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_EUEN, &env->CSR_EUEN); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_MISC, &env->CSR_MISC); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_ECFG, &env->CSR_ECFG); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_ESTAT, &env->CSR_ESTAT); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_ERA, &env->CSR_ERA); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_BADV, &env->CSR_BADV); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_BADI, &env->CSR_BADI); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_EENTRY, &env->CSR_EENTRY); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBIDX, &env->CSR_TLBIDX); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBEHI, &env->CSR_TLBEHI); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBELO0, &env->CSR_TLBELO0); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBELO1, &env->CSR_TLBELO1); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_ASID, &env->CSR_ASID); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_PGDL, &env->CSR_PGDL); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_PGDH, &env->CSR_PGDH); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_PGD, &env->CSR_PGD); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_PWCL, &env->CSR_PWCL); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_PWCH, &env->CSR_PWCH); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_STLBPS, &env->CSR_STLBPS); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_RVACFG, &env->CSR_RVACFG); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_CPUID, &env->CSR_CPUID); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_PRCFG1, &env->CSR_PRCFG1); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_PRCFG2, &env->CSR_PRCFG2); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_PRCFG3, &env->CSR_PRCFG3); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(0), &env->CSR_SAVE[0]); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(1), &env->CSR_SAVE[1]); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(2), &env->CSR_SAVE[2]); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(3), &env->CSR_SAVE[3]); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(4), &env->CSR_SAVE[4]); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(5), &env->CSR_SAVE[5]); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(6), &env->CSR_SAVE[6]); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_SAVE(7), &env->CSR_SAVE[7]); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TID, &env->CSR_TID); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_CNTC, &env->CSR_CNTC); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TICLR, &env->CSR_TICLR); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_LLBCTL, &env->CSR_LLBCTL); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_IMPCTL1, &env->CSR_IMPCTL1); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_IMPCTL2, &env->CSR_IMPCTL2); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRENTRY, &env->CSR_TLBRENTRY); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRBADV, &env->CSR_TLBRBADV); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRERA, &env->CSR_TLBRERA); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRSAVE, &env->CSR_TLBRSAVE); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRELO0, &env->CSR_TLBRELO0); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRELO1, &env->CSR_TLBRELO1); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBREHI, &env->CSR_TLBREHI); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TLBRPRMD, &env->CSR_TLBRPRMD); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_DMW(0), &env->CSR_DMW[0]); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_DMW(1), &env->CSR_DMW[1]); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_DMW(2), &env->CSR_DMW[2]); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_DMW(3), &env->CSR_DMW[3]); ++ /* ++ * timer cfg must be put at last since it is used to enable ++ * guest timer ++ */ ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TVAL, &env->CSR_TVAL); ++ KVM_PUT_ONE_UREG64(cs, ret, LOONGARCH_CSR_TCFG, &env->CSR_TCFG); ++ return ret; ++} ++ ++static int kvm_loongarch_get_regs_fp(CPUState *cs) ++{ ++ int ret, i; ++ struct kvm_fpu fpu; ++ ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ ret = kvm_vcpu_ioctl(cs, KVM_GET_FPU, &fpu); ++ if (ret < 0) { ++ trace_kvm_failed_get_fpu(strerror(errno)); ++ return ret; ++ } ++ ++ env->fcsr0 = fpu.fcsr; ++ for (i = 0; i < 32; i++) { ++ env->fpr[i].vreg.UD[0] = fpu.fpr[i].val64[0]; ++ } ++ for (i = 0; i < 8; i++) { ++ env->cf[i] = fpu.fcc & 0xFF; ++ fpu.fcc = fpu.fcc >> 8; ++ } ++ ++ return ret; ++} ++ ++static int kvm_loongarch_put_regs_fp(CPUState *cs) ++{ ++ int ret, i; ++ struct kvm_fpu fpu; ++ ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ fpu.fcsr = env->fcsr0; ++ fpu.fcc = 0; ++ for (i = 0; i < 32; i++) { ++ fpu.fpr[i].val64[0] = env->fpr[i].vreg.UD[0]; ++ } ++ ++ for (i = 0; i < 8; i++) { ++ fpu.fcc |= env->cf[i] << (8 * i); ++ } ++ ++ ret = kvm_vcpu_ioctl(cs, KVM_SET_FPU, &fpu); ++ if (ret < 0) { ++ trace_kvm_failed_put_fpu(strerror(errno)); ++ } ++ ++ return ret; ++} ++ ++void kvm_arch_reset_vcpu(CPULoongArchState *env) ++{ ++ env->mp_state = KVM_MP_STATE_RUNNABLE; ++} ++ ++static int kvm_loongarch_get_mpstate(CPUState *cs) ++{ ++ int ret = 0; ++ struct kvm_mp_state mp_state; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ if (cap_has_mp_state) { ++ ret = kvm_vcpu_ioctl(cs, KVM_GET_MP_STATE, &mp_state); ++ if (ret) { ++ trace_kvm_failed_get_mpstate(strerror(errno)); ++ return ret; ++ } ++ env->mp_state = mp_state.mp_state; ++ } ++ ++ return ret; ++} ++ ++static int kvm_loongarch_put_mpstate(CPUState *cs) ++{ ++ int ret = 0; ++ ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ struct kvm_mp_state mp_state = { ++ .mp_state = env->mp_state ++ }; ++ ++ if (cap_has_mp_state) { ++ ret = kvm_vcpu_ioctl(cs, KVM_SET_MP_STATE, &mp_state); ++ if (ret) { ++ trace_kvm_failed_put_mpstate(strerror(errno)); ++ } ++ } ++ ++ return ret; ++} ++ ++static int kvm_loongarch_get_cpucfg(CPUState *cs) ++{ ++ int i, ret = 0; ++ uint64_t val; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ for (i = 0; i < 21; i++) { ++ ret = kvm_larch_getq(cs, KVM_IOC_CPUCFG(i), &val); ++ if (ret < 0) { ++ trace_kvm_failed_get_cpucfg(strerror(errno)); ++ } ++ env->cpucfg[i] = (uint32_t)val; ++ } ++ return ret; ++} ++ ++static int kvm_loongarch_put_cpucfg(CPUState *cs) ++{ ++ int i, ret = 0; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ uint64_t val; ++ ++ for (i = 0; i < 21; i++) { ++ val = env->cpucfg[i]; ++ ret = kvm_larch_putq(cs, KVM_IOC_CPUCFG(i), &val); ++ if (ret < 0) { ++ trace_kvm_failed_put_cpucfg(strerror(errno)); ++ } ++ } ++ return ret; ++} ++ ++int kvm_arch_get_registers(CPUState *cs) ++{ ++ int ret; ++ ++ ret = kvm_loongarch_get_regs_core(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_get_csr(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_get_regs_fp(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_get_mpstate(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_get_cpucfg(cs); ++ return ret; ++} ++ ++int kvm_arch_put_registers(CPUState *cs, int level) ++{ ++ int ret; ++ ++ ret = kvm_loongarch_put_regs_core(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_put_csr(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_put_regs_fp(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_put_mpstate(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_put_cpucfg(cs); ++ return ret; ++} ++ ++static void kvm_loongarch_vm_stage_change(void *opaque, bool running, ++ RunState state) ++{ ++ int ret; ++ CPUState *cs = opaque; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ ++ if (running) { ++ ret = kvm_larch_putq(cs, KVM_REG_LOONGARCH_COUNTER, ++ &cpu->kvm_state_counter); ++ if (ret < 0) { ++ trace_kvm_failed_put_counter(strerror(errno)); ++ } ++ } else { ++ ret = kvm_larch_getq(cs, KVM_REG_LOONGARCH_COUNTER, ++ &cpu->kvm_state_counter); ++ if (ret < 0) { ++ trace_kvm_failed_get_counter(strerror(errno)); ++ } ++ } ++} ++ ++int kvm_arch_init_vcpu(CPUState *cs) ++{ ++ qemu_add_vm_change_state_handler(kvm_loongarch_vm_stage_change, cs); ++ return 0; ++} ++ ++int kvm_arch_destroy_vcpu(CPUState *cs) ++{ ++ return 0; ++} ++ ++unsigned long kvm_arch_vcpu_id(CPUState *cs) ++{ ++ return cs->cpu_index; ++} ++ ++int kvm_arch_release_virq_post(int virq) ++{ ++ return 0; ++} ++ ++int kvm_arch_msi_data_to_gsi(uint32_t data) ++{ ++ abort(); ++} ++ ++int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, ++ uint64_t address, uint32_t data, PCIDevice *dev) ++{ ++ return 0; ++} ++ ++int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, ++ int vector, PCIDevice *dev) ++{ ++ return 0; ++} ++ ++void kvm_arch_init_irq_routing(KVMState *s) ++{ ++} ++ ++int kvm_arch_get_default_type(MachineState *ms) ++{ ++ return 0; ++} ++ ++int kvm_arch_init(MachineState *ms, KVMState *s) ++{ ++ cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE); ++ return 0; ++} ++ ++int kvm_arch_irqchip_create(KVMState *s) ++{ ++ return 0; ++} ++ ++void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) ++{ ++} ++ ++MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) ++{ ++ return MEMTXATTRS_UNSPECIFIED; ++} ++ ++int kvm_arch_process_async_events(CPUState *cs) ++{ ++ return cs->halted; ++} ++ ++bool kvm_arch_stop_on_emulation_error(CPUState *cs) ++{ ++ return true; ++} ++ ++bool kvm_arch_cpu_check_are_resettable(void) ++{ ++ return true; ++} ++ ++int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) ++{ ++ int ret = 0; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ MemTxAttrs attrs = {}; ++ ++ attrs.requester_id = env_cpu(env)->cpu_index; ++ ++ trace_kvm_arch_handle_exit(run->exit_reason); ++ switch (run->exit_reason) { ++ case KVM_EXIT_LOONGARCH_IOCSR: ++ address_space_rw(&env->address_space_iocsr, ++ run->iocsr_io.phys_addr, ++ attrs, ++ run->iocsr_io.data, ++ run->iocsr_io.len, ++ run->iocsr_io.is_write); ++ break; ++ default: ++ ret = -1; ++ warn_report("KVM: unknown exit reason %d", run->exit_reason); ++ break; ++ } ++ return ret; ++} ++ ++int kvm_loongarch_set_interrupt(LoongArchCPU *cpu, int irq, int level) ++{ ++ struct kvm_interrupt intr; ++ CPUState *cs = CPU(cpu); ++ ++ if (level) { ++ intr.irq = irq; ++ } else { ++ intr.irq = -irq; ++ } ++ ++ trace_kvm_set_intr(irq, level); ++ return kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &intr); ++} ++ ++void kvm_arch_accel_class_init(ObjectClass *oc) ++{ ++} +diff --git a/target/loongarch/kvm_loongarch.h b/target/loongarch/kvm_loongarch.h +new file mode 100644 +index 0000000000..cdef980eec +--- /dev/null ++++ b/target/loongarch/kvm_loongarch.h +@@ -0,0 +1,13 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++/* ++ * QEMU LoongArch kvm interface ++ * ++ * Copyright (c) 2023 Loongson Technology Corporation Limited ++ */ ++ ++#ifndef QEMU_KVM_LOONGARCH_H ++#define QEMU_KVM_LOONGARCH_H ++ ++int kvm_loongarch_set_interrupt(LoongArchCPU *cpu, int irq, int level); ++ ++#endif +diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build +index b7a27df5a9..b3cceccc95 100644 +--- a/target/loongarch/meson.build ++++ b/target/loongarch/meson.build +@@ -27,6 +27,7 @@ loongarch_system_ss.add(files( + + common_ss.add(when: 'CONFIG_LOONGARCH_DIS', if_true: [files('disas.c'), gen]) + ++loongarch_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'), if_false: files('kvm-stub.c')) + loongarch_ss.add_all(when: 'CONFIG_TCG', if_true: [loongarch_tcg_ss]) + + target_arch += {'loongarch': loongarch_ss} +diff --git a/target/loongarch/trace-events b/target/loongarch/trace-events +new file mode 100644 +index 0000000000..3263406ebe +--- /dev/null ++++ b/target/loongarch/trace-events +@@ -0,0 +1,17 @@ ++# See docs/devel/tracing.rst for syntax documentation. ++ ++#kvm.c ++kvm_failed_get_regs_core(const char *msg) "Failed to get core regs from KVM: %s" ++kvm_failed_put_regs_core(const char *msg) "Failed to put core regs into KVM: %s" ++kvm_failed_get_csr(int csr, const char *msg) "Failed to get csr 0x%x from KVM: %s" ++kvm_failed_put_csr(int csr, const char *msg) "Failed to put csr 0x%x into KVM: %s" ++kvm_failed_get_fpu(const char *msg) "Failed to get fpu from KVM: %s" ++kvm_failed_put_fpu(const char *msg) "Failed to put fpu into KVM: %s" ++kvm_failed_get_mpstate(const char *msg) "Failed to get mp_state from KVM: %s" ++kvm_failed_put_mpstate(const char *msg) "Failed to put mp_state into KVM: %s" ++kvm_failed_get_counter(const char *msg) "Failed to get counter from KVM: %s" ++kvm_failed_put_counter(const char *msg) "Failed to put counter into KVM: %s" ++kvm_failed_get_cpucfg(const char *msg) "Failed to get cpucfg from KVM: %s" ++kvm_failed_put_cpucfg(const char *msg) "Failed to put cpucfg into KVM: %s" ++kvm_arch_handle_exit(int num) "kvm arch handle exit, the reason number: %d" ++kvm_set_intr(int irq, int level) "kvm set interrupt, irq num: %d, level: %d" +diff --git a/target/loongarch/trace.h b/target/loongarch/trace.h +new file mode 100644 +index 0000000000..c2ecb78f08 +--- /dev/null ++++ b/target/loongarch/trace.h +@@ -0,0 +1 @@ ++#include "trace/trace-target_loongarch.h" diff --git a/qt5-base/PKGBUILD b/qt5-base/PKGBUILD index 3a3f56d73a..ac235d4b46 100644 --- a/qt5-base/PKGBUILD +++ b/qt5-base/PKGBUILD @@ -36,6 +36,7 @@ sha256sums=('SKIP' '6a4ec2bfcf4e7cff73346762b252cc28be5ca0ed79fde5e69350efe229b43adc' '5411edbe215c24b30448fac69bd0ba7c882f545e8cf05027b2b6e2227abc5e78' '4abc22150fa3e06b2fdcec32146abc9be4e316692aa4d5bd5aa53b4b726783fa') +options=(debug) pkgver() { cd $_pkgfqn diff --git a/qt5-doc/PKGBUILD b/qt5-doc/PKGBUILD index 65efe0fffd..40ecc3edf0 100644 --- a/qt5-doc/PKGBUILD +++ b/qt5-doc/PKGBUILD @@ -16,10 +16,12 @@ groups=('qt5') _pkgfqn="qt-everywhere-opensource-src-${pkgver}" source=("https://download.qt.io/official_releases/qt/${pkgver%.*}/${pkgver}/single/${_pkgfqn}.tar.xz" qt5-webengine-python3.patch - no-qmake.patch) + no-qmake.patch + qt5-base-la64.patch) sha256sums=('93f2c0889ee2e9cdf30c170d353c3f829de5f29ba21c119167dee5995e48ccce' '398c996cb5b606695ac93645143df39e23fa67e768b09e0da6dbd37342a43f32' - 'db90fa31381fa0814c9c8c803c9e2f9b36bdd6f52da753399e500c0692352498') + 'db90fa31381fa0814c9c8c803c9e2f9b36bdd6f52da753399e500c0692352498' + '0ea0ced24a71d6abe7d323c4bfb80c33c77592873b8660fe413cb074978c653a') prepare() { cd ${_pkgfqn/opensource-/} @@ -29,6 +31,7 @@ prepare() { patch -d qtbase -p1 < "$srcdir"/no-qmake.patch # Use system qmake patch -d qtwebengine -p1 < "$srcdir"/qt5-webengine-python3.patch # Fix build with Python 3 + patch -d qtbase -p1 < "$srcdir"/qt5-base-la64.patch } build() { diff --git a/qt5-doc/qt5-base-la64.patch b/qt5-doc/qt5-base-la64.patch new file mode 100644 index 0000000000..eb183a26e1 --- /dev/null +++ b/qt5-doc/qt5-base-la64.patch @@ -0,0 +1,12 @@ +Index: qtbase/src/3rdparty/double-conversion/include/double-conversion/utils.h +=================================================================== +--- qtbase.orig/src/3rdparty/double-conversion/include/double-conversion/utils.h ++++ qtbase/src/3rdparty/double-conversion/include/double-conversion/utils.h +@@ -98,6 +98,7 @@ int main(int argc, char** argv) { + defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ + defined(__sparc__) || defined(__sparc) || defined(__s390__) || \ + defined(__SH4__) || defined(__alpha__) || \ ++ defined(__loongarch64) || \ + defined(_MIPS_ARCH_MIPS32R2) || defined(__ARMEB__) ||\ + defined(__AARCH64EL__) || defined(__aarch64__) || defined(__AARCH64EB__) || \ + defined(__riscv) || \ diff --git a/qt5-script/PKGBUILD b/qt5-script/PKGBUILD index 5ef201e6f3..77260f2ff1 100644 --- a/qt5-script/PKGBUILD +++ b/qt5-script/PKGBUILD @@ -13,12 +13,15 @@ depends=('qt5-base') makedepends=('git') groups=('qt5') _pkgfqn=${pkgname/5-/} -source=(git+https://code.qt.io/qt/$_pkgfqn.git#tag=v${pkgver}-lts) -sha256sums=('SKIP') +source=(git+https://code.qt.io/qt/$_pkgfqn.git#tag=v${pkgver}-lts + loongarch_ports.patch) +sha256sums=('SKIP' + 'e6b6bd257375ba0982f6be8cd9c704a8cf3101d275ee1c40cad8466c695d3800') options=(!lto) prepare() { mkdir -p build + patch -p1 -d qtscript/src -i "$srcdir"/loongarch_ports.patch } build() { diff --git a/qt5-script/loongarch_ports.patch b/qt5-script/loongarch_ports.patch new file mode 100644 index 0000000000..6b9cb77ab0 --- /dev/null +++ b/qt5-script/loongarch_ports.patch @@ -0,0 +1,60 @@ +diff -Naurp a/3rdparty/javascriptcore/JavaScriptCore/runtime/JSValue.h b/3rdparty/javascriptcore/JavaScriptCore/runtime/JSValue.h +--- a/3rdparty/javascriptcore/JavaScriptCore/runtime/JSValue.h 2022-06-26 20:47:06.744000000 +0800 ++++ b/3rdparty/javascriptcore/JavaScriptCore/runtime/JSValue.h 2022-06-26 22:29:55.096000000 +0800 +@@ -490,7 +490,11 @@ namespace JSC { + u.asBits.tag = CellTag; + else + u.asBits.tag = EmptyValueTag; ++#if defined(__loongarch64) || defined(__mips64) ++ u.asBits.payload = reinterpret_cast(ptr); ++#else + u.asBits.payload = reinterpret_cast(ptr); ++#endif + #if ENABLE(JSC_ZOMBIES) + ASSERT(!isZombie()); + #endif +@@ -502,7 +506,11 @@ namespace JSC { + u.asBits.tag = CellTag; + else + u.asBits.tag = EmptyValueTag; ++#if defined(__loongarch64) || defined(__mips64) ++ u.asBits.payload = reinterpret_cast(const_cast(ptr)); ++#else + u.asBits.payload = reinterpret_cast(const_cast(ptr)); ++#endif + #if ENABLE(JSC_ZOMBIES) + ASSERT(!isZombie()); + #endif +diff -Naurp a/3rdparty/javascriptcore/JavaScriptCore/wtf/Platform.h b/3rdparty/javascriptcore/JavaScriptCore/wtf/Platform.h +--- a/3rdparty/javascriptcore/JavaScriptCore/wtf/Platform.h 2022-06-26 20:47:06.776000000 +0800 ++++ b/3rdparty/javascriptcore/JavaScriptCore/wtf/Platform.h 2022-06-26 22:15:32.048000000 +0800 +@@ -226,6 +226,11 @@ + #define WTF_CPU_SPARC 1 + #endif + ++/* CPU(3A5000) */ ++#if defined(__loongarch64) ++#define WTF_CPU_LA64 1 ++#endif ++ + /* CPU(X86) - i386 / x86 32-bit */ + #if defined(__i386__) \ + || defined(i386) \ +@@ -958,7 +963,7 @@ + #endif + + #if !defined(WTF_USE_JSVALUE64) && !defined(WTF_USE_JSVALUE32) && !defined(WTF_USE_JSVALUE32_64) +-#if (CPU(X86_64) && !CPU(X32) && (OS(UNIX) || OS(WINDOWS) || OS(SOLARIS) || OS(HPUX))) || (CPU(IA64) && !CPU(IA64_32)) || CPU(ALPHA) || CPU(AIX64) || CPU(SPARC64) || CPU(MIPS64) || CPU(AARCH64) || CPU(RISCV64) ++#if (CPU(X86_64) && !CPU(X32) && (OS(UNIX) || OS(WINDOWS) || OS(SOLARIS) || OS(HPUX))) || (CPU(IA64) && !CPU(IA64_32)) || CPU(ALPHA) || CPU(AIX64) || CPU(SPARC64) || CPU(MIPS64) || CPU(AARCH64) || CPU(RISCV64) || defined(__loongarch64) + #define WTF_USE_JSVALUE64 1 + #elif CPU(ARM) || CPU(PPC64) || CPU(RISCV32) + #define WTF_USE_JSVALUE32 1 +@@ -976,7 +981,7 @@ on MinGW. See https://bugs.webkit.org/sh + #endif + + /* Disable JIT on x32 */ +-#if CPU(X32) ++#if CPU(X32) || defined(__loongarch64) + #define ENABLE_JIT 0 + #endif + diff --git a/qt5-script/qt5-base-la64.patch b/qt5-script/qt5-base-la64.patch new file mode 100644 index 0000000000..84c87b4c5d --- /dev/null +++ b/qt5-script/qt5-base-la64.patch @@ -0,0 +1,12 @@ +Index: qtscript/src/3rdparty/double-conversion/include/double-conversion/utils.h +=================================================================== +--- qtscript.orig/src/3rdparty/double-conversion/include/double-conversion/utils.h ++++ qtscript/src/3rdparty/double-conversion/include/double-conversion/utils.h +@@ -98,6 +98,7 @@ int main(int argc, char** argv) { + defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ + defined(__sparc__) || defined(__sparc) || defined(__s390__) || \ + defined(__SH4__) || defined(__alpha__) || \ ++ defined(__loongarch64) || \ + defined(_MIPS_ARCH_MIPS32R2) || defined(__ARMEB__) ||\ + defined(__AARCH64EL__) || defined(__aarch64__) || defined(__AARCH64EB__) || \ + defined(__riscv) || \ diff --git a/qt5-webengine/PKGBUILD b/qt5-webengine/PKGBUILD index 9a35d4b276..1032b08fbd 100644 --- a/qt5-webengine/PKGBUILD +++ b/qt5-webengine/PKGBUILD @@ -17,6 +17,7 @@ groups=('qt5') _pkgfqn=${pkgname/5-/} source=(git+https://code.qt.io/qt/qtwebengine.git#tag=v${pkgver}-lts git+https://code.qt.io/qt/qtwebengine-chromium.git + http://loongarch.zhcn.cc/list/qtwebengine/qtwebengine-5.15.2-la64.patch/59168fd4d0e9a62535f8118511fb71fa/qtwebengine-5.15.2-la64.patch qt5-webengine-python3.patch qt5-webengine-chromium-python3.patch qt5-webengine-ffmpeg5.patch @@ -25,6 +26,7 @@ source=(git+https://code.qt.io/qt/qtwebengine.git#tag=v${pkgver}-lts qt5-webengine-icu-74.patch) sha256sums=('SKIP' 'SKIP' + 'cace5fc41204e2679d577b6dd1030656826bbf69a6a4db126036fad47c7db2d1' '0ad5d1660886f7bbf5108b071bf5d7bbbabf1cd1258ce9d4587a01dec4a1aa89' 'd8beb3d65a1aaf927285e6f055a9d1facd0f9c3fd851f91ba568389fb3137399' 'c50d3019626183e753c53a997dc8a55938847543aa3178d4c51f377be741c693' @@ -52,7 +54,7 @@ prepare() { build() { cd build - qmake ../${_pkgfqn} CONFIG+=force_debug_info -- \ + qmake ../${_pkgfqn} CONFIG+=force_debug_info QT_ARCH=loongarch64 -- \ -proprietary-codecs \ -system-ffmpeg \ -webp \ diff --git a/qt5-webengine/qtwebengine-5.15.2-la64.patch b/qt5-webengine/qtwebengine-5.15.2-la64.patch new file mode 100644 index 0000000000..1fdbbb3a84 --- /dev/null +++ b/qt5-webengine/qtwebengine-5.15.2-la64.patch @@ -0,0 +1,63993 @@ +diff --git a/configure.pri b/configure.pri +index d3ba9b147..ded9bdbf0 100644 +--- a/configure.pri ++++ b/configure.pri +@@ -142,7 +142,8 @@ defineTest(qtConfTest_detectPlatform) { + defineTest(qtConfTest_detectArch) { + contains(QT_ARCH, "i386")|contains(QT_ARCH, "x86_64"): return(true) + contains(QT_ARCH, "arm")|contains(QT_ARCH, "arm64"): return(true) +- contains(QT_ARCH, "mips"): return(true) ++ contains(QT_ARCH, "mips")|contains(QT_ARCH, "mips64"): return(true) ++ contains(QT_ARCH, "loongarch")|contains(QT_ARCH, "loongarch64"): return(true) + qtLog("Architecture not supported.") + return(false) + } +diff --git a/mkspecs/features/functions.prf b/mkspecs/features/functions.prf +index d3ceb4c5e..674b97fa4 100644 +--- a/mkspecs/features/functions.prf ++++ b/mkspecs/features/functions.prf +@@ -106,6 +106,7 @@ defineReplace(gnArch) { + contains(qtArch, "arm64"): return(arm64) + contains(qtArch, "mips"): return(mipsel) + contains(qtArch, "mips64"): return(mips64el) ++ contains(qtArch, "loongarch64"): return(la64) + return(unknown) + } + +Submodule src/3rdparty fb6ab5e48..8efdf7360: +diff --git a/src/3rdparty/chromium/DEPS b/src/3rdparty/chromium/DEPS +index 530e4a46661..09d10d143c2 100644 +--- a/src/3rdparty/chromium/DEPS ++++ b/src/3rdparty/chromium/DEPS +@@ -85,7 +85,7 @@ vars = { + + # Check out and download nacl by default. This can be disabled e.g. with + # custom_vars. +- 'checkout_nacl': True, ++ 'checkout_nacl': true, + + # By default, do not check out src-internal. This can be overridden e.g. with + # custom_vars. +diff --git a/src/3rdparty/chromium/base/allocator/partition_allocator/page_allocator_constants.h b/src/3rdparty/chromium/base/allocator/partition_allocator/page_allocator_constants.h +index 555700a7d0c..b4b2a0ab054 100644 +--- a/src/3rdparty/chromium/base/allocator/partition_allocator/page_allocator_constants.h ++++ b/src/3rdparty/chromium/base/allocator/partition_allocator/page_allocator_constants.h +@@ -12,7 +12,7 @@ + namespace base { + #if defined(OS_WIN) || defined(ARCH_CPU_PPC64) + static constexpr size_t kPageAllocationGranularityShift = 16; // 64KB +-#elif defined(_MIPS_ARCH_LOONGSON) ++#elif defined(_MIPS_ARCH_LOONGSON) || defined(ARCH_CPU_LA64) + static constexpr size_t kPageAllocationGranularityShift = 14; // 16KB + #else + static constexpr size_t kPageAllocationGranularityShift = 12; // 4KB +@@ -24,7 +24,7 @@ static constexpr size_t kPageAllocationGranularityOffsetMask = + static constexpr size_t kPageAllocationGranularityBaseMask = + ~kPageAllocationGranularityOffsetMask; + +-#if defined(_MIPS_ARCH_LOONGSON) ++#if defined(_MIPS_ARCH_LOONGSON) || defined(ARCH_CPU_LA64) + static constexpr size_t kSystemPageSize = 16384; + #elif defined(ARCH_CPU_PPC64) + // Modern ppc64 systems support 4KB and 64KB page sizes. +diff --git a/src/3rdparty/chromium/base/allocator/partition_allocator/partition_alloc_constants.h b/src/3rdparty/chromium/base/allocator/partition_allocator/partition_alloc_constants.h +index fbc851c15f9..cae8865025f 100644 +--- a/src/3rdparty/chromium/base/allocator/partition_allocator/partition_alloc_constants.h ++++ b/src/3rdparty/chromium/base/allocator/partition_allocator/partition_alloc_constants.h +@@ -35,7 +35,7 @@ static const size_t kBucketShift = (kAllocationGranularity == 8) ? 3 : 2; + // other constant values, we pack _all_ `PartitionRootGeneric::Alloc` sizes + // perfectly up against the end of a system page. + +-#if defined(_MIPS_ARCH_LOONGSON) ++#if defined(_MIPS_ARCH_LOONGSON) || defined(ARCH_CPU_LA64) + static const size_t kPartitionPageShift = 16; // 64 KiB + #elif defined(ARCH_CPU_PPC64) + static const size_t kPartitionPageShift = 18; // 256 KiB +diff --git a/src/3rdparty/chromium/base/process/launch_posix.cc b/src/3rdparty/chromium/base/process/launch_posix.cc +index 9b7573fdc65..fffa3c0e6e0 100644 +--- a/src/3rdparty/chromium/base/process/launch_posix.cc ++++ b/src/3rdparty/chromium/base/process/launch_posix.cc +@@ -702,7 +702,7 @@ NOINLINE pid_t CloneAndLongjmpInChild(unsigned long flags, + // fork-like behavior. + alignas(16) char stack_buf[PTHREAD_STACK_MIN]; + #if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \ +- defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_S390_FAMILY) || \ ++ defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_S390_FAMILY) || defined(ARCH_CPU_LA64) || \ + defined(ARCH_CPU_PPC64_FAMILY) + // The stack grows downward. + void* stack = stack_buf + sizeof(stack_buf); +diff --git a/src/3rdparty/chromium/base/third_party/double_conversion/double-conversion/utils.h b/src/3rdparty/chromium/base/third_party/double_conversion/double-conversion/utils.h +index 471c3da84cd..163ef3adf56 100644 +--- a/src/3rdparty/chromium/base/third_party/double_conversion/double-conversion/utils.h ++++ b/src/3rdparty/chromium/base/third_party/double_conversion/double-conversion/utils.h +@@ -99,7 +99,7 @@ int main(int argc, char** argv) { + #if defined(_M_X64) || defined(__x86_64__) || \ + defined(__ARMEL__) || defined(__avr32__) || defined(_M_ARM) || defined(_M_ARM64) || \ + defined(__hppa__) || defined(__ia64__) || \ +- defined(__mips__) || \ ++ defined(__mips__) || defined(__loongarch__) || \ + defined(__nios2__) || \ + defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \ + defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ +diff --git a/src/3rdparty/chromium/build/build_config.h b/src/3rdparty/chromium/build/build_config.h +index d3cdd2db4a6..d97f8dbf2e6 100644 +--- a/src/3rdparty/chromium/build/build_config.h ++++ b/src/3rdparty/chromium/build/build_config.h +@@ -43,6 +43,7 @@ + #define OS_LINUX 1 + // include a system header to pull in features.h for glibc/uclibc macros. + #include ++#include + #if defined(__GLIBC__) && !defined(__UCLIBC__) + // we really are using glibc, not uClibc pretending to be glibc + #define LIBC_GLIBC 1 +@@ -129,6 +130,10 @@ + #define ARCH_CPU_PPC64 1 + #define ARCH_CPU_64_BITS 1 + #define ARCH_CPU_LITTLE_ENDIAN 1 ++#elif defined(__loongarch64) ++#define ARCH_CPU_LA64 1 ++#define ARCH_CPU_64_BITS 1 ++#define ARCH_CPU_LITTLE_ENDIAN 1 + #elif defined(__ARMEL__) + #define ARCH_CPU_ARM_FAMILY 1 + #define ARCH_CPU_ARMEL 1 +diff --git a/src/3rdparty/chromium/build/config/compiler/BUILD.gn b/src/3rdparty/chromium/build/config/compiler/BUILD.gn +index ca81bd8ce0f..e6cc68616ad 100644 +--- a/src/3rdparty/chromium/build/config/compiler/BUILD.gn ++++ b/src/3rdparty/chromium/build/config/compiler/BUILD.gn +@@ -241,7 +241,7 @@ config("default_include_dirs") { + + config("compiler") { + asmflags = [] +- cflags = [] ++ cflags = ["-mcmodel=large"] + cflags_c = [] + cflags_cc = [] + cflags_objc = [] +diff --git a/src/3rdparty/chromium/build/toolchain/linux/BUILD.gn b/src/3rdparty/chromium/build/toolchain/linux/BUILD.gn +index fa8b17e9db3..f67bebc028c 100644 +--- a/src/3rdparty/chromium/build/toolchain/linux/BUILD.gn ++++ b/src/3rdparty/chromium/build/toolchain/linux/BUILD.gn +@@ -185,6 +185,13 @@ clang_toolchain("clang_mips64el") { + } + } + ++clang_toolchain("clang_la64") { ++ toolchain_args = { ++ current_cpu = "la64" ++ current_os = "linux" ++ } ++} ++ + gcc_toolchain("mipsel") { + toolprefix = "mipsel-linux-gnu-" + +@@ -223,6 +230,25 @@ gcc_toolchain("mips64el") { + } + } + ++gcc_toolchain("la64") { ++ toolprefix = "" ++ ++ cc = "${toolprefix}gcc" ++ cxx = "${toolprefix}g++" ++ ar = "${toolprefix}ar" ++ ld = cxx ++ readelf = "${toolprefix}readelf" ++ nm = "${toolprefix}nm" ++ ++ toolchain_args = { ++ cc_wrapper = "" ++ current_cpu = "la64" ++ current_os = "linux" ++ is_clang = false ++ use_goma = false ++ } ++} ++ + clang_toolchain("clang_s390x") { + toolchain_args = { + current_cpu = "s390x" +diff --git a/src/3rdparty/chromium/cc/animation/worklet_animation_unittest.cc b/src/3rdparty/chromium/cc/animation/worklet_animation_unittest.cc +index e827c3740e3..50a169fd628 100644 +--- a/src/3rdparty/chromium/cc/animation/worklet_animation_unittest.cc ++++ b/src/3rdparty/chromium/cc/animation/worklet_animation_unittest.cc +@@ -69,15 +69,15 @@ class MockScrollTimeline : public ScrollTimeline { + ~MockScrollTimeline() override = default; + }; + +-TEST_F(WorkletAnimationTest, NonImplInstanceDoesNotTickKeyframe) { ++/*TEST_F(WorkletAnimationTest, NonImplInstanceDoesNotTickKeyframe) { + std::unique_ptr effect = + std::make_unique(worklet_animation_.get()); + MockKeyframeEffect* mock_effect = effect.get(); + + scoped_refptr worklet_animation = +- WrapRefCounted(new WorkletAnimation( ++ base::WrapRefCounted(new WorkletAnimation( + 1, worklet_animation_id_, "test_name", 1, nullptr, nullptr, +- false /* not impl instance*/, std::move(effect))); ++ false / not impl instance/, std::move(effect))); + + EXPECT_CALL(*mock_effect, Tick(_)).Times(0); + +@@ -85,7 +85,7 @@ TEST_F(WorkletAnimationTest, NonImplInstanceDoesNotTickKeyframe) { + state.local_times.push_back(base::TimeDelta::FromSecondsD(1)); + worklet_animation->SetOutputState(state); + worklet_animation->Tick(base::TimeTicks()); +-} ++}*/ + + TEST_F(WorkletAnimationTest, LocalTimeIsUsedWhenTicking) { + AttachWorkletAnimation(); +diff --git a/src/3rdparty/chromium/components/crash/core/app/breakpad_linux.cc b/src/3rdparty/chromium/components/crash/core/app/breakpad_linux.cc +index 192b0a7f137..34fddc8d5f2 100644 +--- a/src/3rdparty/chromium/components/crash/core/app/breakpad_linux.cc ++++ b/src/3rdparty/chromium/components/crash/core/app/breakpad_linux.cc +@@ -1030,11 +1030,14 @@ class NonBrowserCrashHandler : public google_breakpad::CrashGenerationClient { + bool RequestDump(const void* crash_context, + size_t crash_context_size) override { + int fds[2] = { -1, -1 }; ++// TODO:LA64 ++#ifndef ARCH_CPU_LA64 + if (sys_socketpair(AF_UNIX, SOCK_STREAM, 0, fds) < 0) { + static const char msg[] = "Failed to create socket for crash dumping.\n"; + WriteLog(msg, sizeof(msg) - 1); + return false; + } ++#endif + + // Start constructing the message to send to the browser. + char b; // Dummy variable for sys_read below. +diff --git a/src/3rdparty/chromium/components/paint_preview/browser/paint_preview_client.cc b/src/3rdparty/chromium/components/paint_preview/browser/paint_preview_client.cc +index 3728efa1f5f..b76dfe3453f 100644 +--- a/src/3rdparty/chromium/components/paint_preview/browser/paint_preview_client.cc ++++ b/src/3rdparty/chromium/components/paint_preview/browser/paint_preview_client.cc +@@ -106,7 +106,7 @@ PaintPreviewClient::PaintPreviewData& PaintPreviewClient::PaintPreviewData:: + operator=(PaintPreviewData&& rhs) = default; + + PaintPreviewClient::PaintPreviewData::PaintPreviewData( +- PaintPreviewData&& other) noexcept = default; ++ PaintPreviewData&& other) /*noexcept*/ = default; + + PaintPreviewClient::CreateResult::CreateResult(base::File file, + base::File::Error error) +diff --git a/src/3rdparty/chromium/components/update_client/update_query_params.cc b/src/3rdparty/chromium/components/update_client/update_query_params.cc +index 56aea40c92a..fea2792228c 100644 +--- a/src/3rdparty/chromium/components/update_client/update_query_params.cc ++++ b/src/3rdparty/chromium/components/update_client/update_query_params.cc +@@ -58,6 +58,8 @@ const char kArch[] = + "mipsel"; + #elif defined(__powerpc64__) + "ppc64"; ++#elif defined(__loongarch64) ++ "la64"; + #else + #error "unknown arch" + #endif +@@ -128,6 +130,8 @@ const char* UpdateQueryParams::GetNaclArch() { + return "mips64"; + #elif defined(ARCH_CPU_PPC64) + return "ppc64"; ++#elif defined(ARCH_CPU_LA64) ++ return "la64"; + #else + // NOTE: when adding new values here, please remember to update the + // comment in the .h file about possible return values from this function. +diff --git a/src/3rdparty/chromium/extensions/common/api/runtime.json b/src/3rdparty/chromium/extensions/common/api/runtime.json +index 5b009eabdd2..bd5d133354d 100644 +--- a/src/3rdparty/chromium/extensions/common/api/runtime.json ++++ b/src/3rdparty/chromium/extensions/common/api/runtime.json +@@ -73,7 +73,7 @@ + { + "id": "PlatformArch", + "type": "string", +- "enum": ["arm", "arm64", "x86-32", "x86-64", "mips", "mips64"], ++ "enum": ["arm", "arm64", "x86-32", "x86-64", "mips", "mips64", "la64"], + "description": "The machine's processor architecture." + }, + { +diff --git a/src/3rdparty/chromium/media/base/media_serializers.h b/src/3rdparty/chromium/media/base/media_serializers.h +index 6333c44170f..0e3f223afda 100644 +--- a/src/3rdparty/chromium/media/base/media_serializers.h ++++ b/src/3rdparty/chromium/media/base/media_serializers.h +@@ -377,7 +377,7 @@ template <> + struct MediaSerializer { + static base::Value Serialize(const base::Location& value) { + base::Value result(base::Value::Type::DICTIONARY); +- FIELD_SERIALIZE("file", value.file_name()); ++ FIELD_SERIALIZE("file", value.file_name() ? value.file_name() : "unknown"); + FIELD_SERIALIZE("line", value.line_number()); + return result; + } +diff --git a/src/3rdparty/chromium/media/media_options.gni b/src/3rdparty/chromium/media/media_options.gni +index 011bd47ca2c..2ba3899097c 100644 +--- a/src/3rdparty/chromium/media/media_options.gni ++++ b/src/3rdparty/chromium/media/media_options.gni +@@ -93,7 +93,7 @@ declare_args() { + # are combined and we could override more logging than expected. + enable_logging_override = !use_jumbo_build && is_chromecast + +- enable_dav1d_decoder = !is_android && !is_ios ++ enable_dav1d_decoder = !is_android && !is_ios && target_cpu != "la64" + + # Enable browser managed persistent metadata storage for EME persistent + # session and persistent usage record session. +diff --git a/src/3rdparty/chromium/sandbox/features.gni b/src/3rdparty/chromium/sandbox/features.gni +index 09280d35f6a..d87ee96e182 100644 +--- a/src/3rdparty/chromium/sandbox/features.gni ++++ b/src/3rdparty/chromium/sandbox/features.gni +@@ -11,6 +11,7 @@ import("//build/config/nacl/config.gni") + use_seccomp_bpf = (is_linux || is_android) && + (current_cpu == "x86" || current_cpu == "x64" || + current_cpu == "arm" || current_cpu == "arm64" || +- current_cpu == "mipsel" || current_cpu == "mips64el") ++ current_cpu == "mipsel" || current_cpu == "mips64el" || ++ current_cpu == "la64") + + use_seccomp_bpf = use_seccomp_bpf || is_nacl_nonsfi +diff --git a/src/3rdparty/chromium/sandbox/linux/BUILD.gn b/src/3rdparty/chromium/sandbox/linux/BUILD.gn +index c27351f9a6a..ab45fc38193 100644 +--- a/src/3rdparty/chromium/sandbox/linux/BUILD.gn ++++ b/src/3rdparty/chromium/sandbox/linux/BUILD.gn +@@ -422,6 +422,7 @@ source_set("sandbox_services_headers") { + "system_headers/mips_linux_syscalls.h", + "system_headers/x86_32_linux_syscalls.h", + "system_headers/x86_64_linux_syscalls.h", ++ "system_headers/la64_linux_syscalls.h", + ] + } + +diff --git a/src/3rdparty/chromium/sandbox/linux/bpf_dsl/linux_syscall_ranges.h b/src/3rdparty/chromium/sandbox/linux/bpf_dsl/linux_syscall_ranges.h +index 313511f22e9..062825902f8 100644 +--- a/src/3rdparty/chromium/sandbox/linux/bpf_dsl/linux_syscall_ranges.h ++++ b/src/3rdparty/chromium/sandbox/linux/bpf_dsl/linux_syscall_ranges.h +@@ -42,7 +42,14 @@ + #define MAX_PUBLIC_SYSCALL (MIN_SYSCALL + __NR_Linux_syscalls) + #define MAX_SYSCALL MAX_PUBLIC_SYSCALL + +-#elif defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS) ++#elif defined(ARCH_CPU_LA64) ++ ++#include ++#define MIN_SYSCALL 0u ++#define MAX_PUBLIC_SYSCALL __NR_syscalls ++#define MAX_SYSCALL MAX_PUBLIC_SYSCALL ++ ++#elif (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)) + + #include // for __NR_64_Linux and __NR_64_Linux_syscalls + #define MIN_SYSCALL __NR_64_Linux +diff --git a/src/3rdparty/chromium/sandbox/linux/bpf_dsl/policy_compiler.cc b/src/3rdparty/chromium/sandbox/linux/bpf_dsl/policy_compiler.cc +index 347304889ea..ef8500ff46a 100644 +--- a/src/3rdparty/chromium/sandbox/linux/bpf_dsl/policy_compiler.cc ++++ b/src/3rdparty/chromium/sandbox/linux/bpf_dsl/policy_compiler.cc +@@ -141,6 +141,10 @@ CodeGen::Node PolicyCompiler::AssemblePolicy() { + } + + CodeGen::Node PolicyCompiler::CheckArch(CodeGen::Node passed) { ++// TODO:LA64: Kernel doesn't support AUDIT? ++#ifdef ARCH_CPU_LA64 ++ return passed; ++#endif + // If the architecture doesn't match SECCOMP_ARCH, disallow the + // system call. + return gen_.MakeInstruction( +diff --git a/src/3rdparty/chromium/sandbox/linux/bpf_dsl/seccomp_macros.h b/src/3rdparty/chromium/sandbox/linux/bpf_dsl/seccomp_macros.h +index 1a407b95237..f500fab43bf 100644 +--- a/src/3rdparty/chromium/sandbox/linux/bpf_dsl/seccomp_macros.h ++++ b/src/3rdparty/chromium/sandbox/linux/bpf_dsl/seccomp_macros.h +@@ -346,6 +346,47 @@ struct regs_struct { + #define SECCOMP_PT_PARM4(_regs) (_regs).regs[3] + #define SECCOMP_PT_PARM5(_regs) (_regs).regs[4] + #define SECCOMP_PT_PARM6(_regs) (_regs).regs[5] ++ ++#elif defined(ARCH_CPU_LA64) ++struct regs_struct { ++ uint64_t gpr[32]; ++ uint64_t pc; ++}; ++ ++#define SECCOMP_ARCH AUDIT_ARCH_LOONGARCH64 ++ ++#define SECCOMP_REG(_ctx, _reg) ((_ctx)->uc_mcontext.__gregs[_reg]) ++ ++#define SECCOMP_RESULT(_ctx) SECCOMP_REG(_ctx, 4) ++#define SECCOMP_SYSCALL(_ctx) SECCOMP_REG(_ctx, 11) ++#define SECCOMP_IP(_ctx) (_ctx)->uc_mcontext.__pc ++#define SECCOMP_PARM1(_ctx) SECCOMP_REG(_ctx, 4) ++#define SECCOMP_PARM2(_ctx) SECCOMP_REG(_ctx, 5) ++#define SECCOMP_PARM3(_ctx) SECCOMP_REG(_ctx, 6) ++#define SECCOMP_PARM4(_ctx) SECCOMP_REG(_ctx, 7) ++#define SECCOMP_PARM5(_ctx) SECCOMP_REG(_ctx, 8) ++#define SECCOMP_PARM6(_ctx) SECCOMP_REG(_ctx, 9) ++ ++#define SECCOMP_NR_IDX (offsetof(struct arch_seccomp_data, nr)) ++#define SECCOMP_ARCH_IDX (offsetof(struct arch_seccomp_data, arch)) ++#define SECCOMP_IP_MSB_IDX \ ++ (offsetof(struct arch_seccomp_data, instruction_pointer) + 4) ++#define SECCOMP_IP_LSB_IDX \ ++ (offsetof(struct arch_seccomp_data, instruction_pointer) + 0) ++#define SECCOMP_ARG_MSB_IDX(nr) \ ++ (offsetof(struct arch_seccomp_data, args) + 8 * (nr) + 4) ++#define SECCOMP_ARG_LSB_IDX(nr) \ ++ (offsetof(struct arch_seccomp_data, args) + 8 * (nr) + 0) ++ ++#define SECCOMP_PT_RESULT(_regs) (_regs).regs[4] ++#define SECCOMP_PT_SYSCALL(_regs) (_regs).regs[11] ++#define SECCOMP_PT_IP(_regs) (_regs).pc ++#define SECCOMP_PT_PARM1(_regs) (_regs).regs[4] ++#define SECCOMP_PT_PARM2(_regs) (_regs).regs[5] ++#define SECCOMP_PT_PARM3(_regs) (_regs).regs[6] ++#define SECCOMP_PT_PARM4(_regs) (_regs).regs[7] ++#define SECCOMP_PT_PARM5(_regs) (_regs).regs[8] ++#define SECCOMP_PT_PARM6(_regs) (_regs).regs[9] + #else + #error Unsupported target platform + +diff --git a/src/3rdparty/chromium/sandbox/linux/integration_tests/bpf_dsl_seccomp_unittest.cc b/src/3rdparty/chromium/sandbox/linux/integration_tests/bpf_dsl_seccomp_unittest.cc +index d30e15560a4..dc18b67944e 100644 +--- a/src/3rdparty/chromium/sandbox/linux/integration_tests/bpf_dsl_seccomp_unittest.cc ++++ b/src/3rdparty/chromium/sandbox/linux/integration_tests/bpf_dsl_seccomp_unittest.cc +@@ -1915,7 +1915,7 @@ BPF_TEST_C(SandboxBPF, PthreadBitMask, PthreadPolicyBitMask) { + // + // Depending on the architecture, this may modify regs, so the caller is + // responsible for committing these changes using PTRACE_SETREGS. +-#if !defined(__arm__) && !defined(__aarch64__) && !defined(__mips__) ++#if !defined(__arm__) && !defined(__aarch64__) && !defined(__mips__) && !defined(__loongarch__) + long SetSyscall(pid_t pid, regs_struct* regs, int syscall_number) { + #if defined(__arm__) + // On ARM, the syscall is changed using PTRACE_SET_SYSCALL. We cannot use the +@@ -1952,7 +1952,7 @@ SANDBOX_TEST(SandboxBPF, DISABLE_ON_TSAN(SeccompRetTrace)) { + + // This test is disabled on arm due to a kernel bug. + // See https://code.google.com/p/chromium/issues/detail?id=383977 +-#if defined(__arm__) || defined(__aarch64__) ++#if defined(__arm__) || defined(__aarch64__) || defined(__loongarch__) + printf("This test is currently disabled on ARM32/64 due to a kernel bug."); + #elif defined(__mips__) + // TODO: Figure out how to support specificity of handling indirect syscalls +diff --git a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc +index 712f9699a94..615b8fa08e2 100644 +--- a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc ++++ b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc +@@ -165,7 +165,7 @@ ResultExpr EvaluateSyscallImpl(int fs_denied_errno, + return RestrictFcntlCommands(); + #endif + +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch__) + // fork() is never used as a system call (clone() is used instead), but we + // have seen it in fallback code on Android. + if (sysno == __NR_fork) { +@@ -210,7 +210,7 @@ ResultExpr EvaluateSyscallImpl(int fs_denied_errno, + } + + #if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \ +- defined(__aarch64__) ++ defined(__aarch64__) || defined(__loongarch64) + if (sysno == __NR_mmap) + return RestrictMmapFlags(); + #endif +@@ -228,7 +228,7 @@ ResultExpr EvaluateSyscallImpl(int fs_denied_errno, + return RestrictPrctl(); + + #if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \ +- defined(__aarch64__) ++ defined(__aarch64__) || defined(__loongarch64) + if (sysno == __NR_socketpair) { + // Only allow AF_UNIX, PF_UNIX. Crash if anything else is seen. + static_assert(AF_UNIX == PF_UNIX, +diff --git a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/baseline_policy_unittest.cc b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/baseline_policy_unittest.cc +index fc36187c945..fb35d5a3ac6 100644 +--- a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/baseline_policy_unittest.cc ++++ b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/baseline_policy_unittest.cc +@@ -215,7 +215,7 @@ BPF_TEST_C(BaselinePolicy, GetRandom, BaselinePolicy) { + } + + // Not all architectures can restrict the domain for socketpair(). +-#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) ++#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch64) + BPF_DEATH_TEST_C(BaselinePolicy, + SocketpairWrongDomain, + DEATH_SEGV_MESSAGE(GetErrorMessageContentForTests()), +@@ -224,7 +224,7 @@ BPF_DEATH_TEST_C(BaselinePolicy, + ignore_result(socketpair(AF_INET, SOCK_STREAM, 0, sv)); + _exit(1); + } +-#endif // defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) ++#endif // defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch64) + + BPF_TEST_C(BaselinePolicy, EPERM_open, BaselinePolicy) { + errno = 0; +@@ -288,7 +288,7 @@ TEST_BASELINE_SIGSYS(__NR_sysinfo) + TEST_BASELINE_SIGSYS(__NR_syslog) + TEST_BASELINE_SIGSYS(__NR_timer_create) + +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch64) + TEST_BASELINE_SIGSYS(__NR_eventfd) + TEST_BASELINE_SIGSYS(__NR_inotify_init) + TEST_BASELINE_SIGSYS(__NR_vserver) +diff --git a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.cc b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.cc +index 5e0131ac4bc..8337b33ac88 100644 +--- a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.cc ++++ b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.cc +@@ -36,7 +36,7 @@ + #include + #include + #if defined(OS_LINUX) && !defined(OS_CHROMEOS) && !defined(__arm__) && \ +- !defined(__aarch64__) && !defined(PTRACE_GET_THREAD_AREA) ++ !defined(__aarch64__) && !defined(__loongarch__) && !defined(PTRACE_GET_THREAD_AREA) + // Also include asm/ptrace-abi.h since ptrace.h in older libc (for instance + // the one in Ubuntu 16.04 LTS) is missing PTRACE_GET_THREAD_AREA. + // asm/ptrace-abi.h doesn't exist on arm32 and PTRACE_GET_THREAD_AREA isn't +@@ -418,7 +418,7 @@ ResultExpr RestrictPrlimitToGetrlimit(pid_t target_pid) { + ResultExpr RestrictPtrace() { + const Arg request(0); + return Switch(request).CASES(( +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch64) + PTRACE_GETREGS, + PTRACE_GETFPREGS, + #if defined(TRACE_GET_THREAD_AREA) +diff --git a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_sets.cc b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_sets.cc +index d9d18822f67..1f5e39b91a7 100644 +--- a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_sets.cc ++++ b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_sets.cc +@@ -80,7 +80,7 @@ bool SyscallSets::IsUmask(int sysno) { + // Both EPERM and ENOENT are valid errno unless otherwise noted in comment. + bool SyscallSets::IsFileSystem(int sysno) { + switch (sysno) { +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch64) + case __NR_access: // EPERM not a valid errno. + case __NR_chmod: + case __NR_chown: +@@ -106,13 +106,13 @@ bool SyscallSets::IsFileSystem(int sysno) { + #endif + case __NR_ustat: // Same as above. Deprecated. + case __NR_utimes: +-#endif // !defined(__aarch64__) ++#endif // !defined(__aarch64__) || defined(__loongarch64) + + case __NR_execve: + case __NR_faccessat: // EPERM not a valid errno. + case __NR_fchmodat: + case __NR_fchownat: // Should be called chownat ? +-#if defined(__x86_64__) || defined(__aarch64__) ++#if defined(__x86_64__) || defined(__aarch64__) || defined(__loongarch64) + case __NR_newfstatat: // fstatat(). EPERM not a valid errno. + #elif defined(__i386__) || defined(__arm__) || \ + (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_32_BITS)) +@@ -195,7 +195,7 @@ bool SyscallSets::IsAllowedFileSystemAccessViaFd(int sysno) { + case __NR_oldfstat: + #endif + #if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \ +- defined(__aarch64__) ++ defined(__aarch64__) || defined (__loongarch64) + case __NR_sync_file_range: // EPERM not a valid errno. + #elif defined(__arm__) + case __NR_arm_sync_file_range: // EPERM not a valid errno. +@@ -219,7 +219,7 @@ bool SyscallSets::IsDeniedFileSystemAccessViaFd(int sysno) { + (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_32_BITS)) + case __NR_ftruncate64: + #endif +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch64) + case __NR_getdents: // EPERM not a valid errno. + #endif + case __NR_getdents64: // EPERM not a valid errno. +@@ -298,7 +298,7 @@ bool SyscallSets::IsProcessPrivilegeChange(int sysno) { + bool SyscallSets::IsProcessGroupOrSession(int sysno) { + switch (sysno) { + case __NR_setpgid: +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch64) + case __NR_getpgrp: + #endif + case __NR_setsid: +@@ -327,7 +327,7 @@ bool SyscallSets::IsAllowedSignalHandling(int sysno) { + case __NR_rt_sigsuspend: + case __NR_rt_tgsigqueueinfo: + case __NR_sigaltstack: +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch64) + case __NR_signalfd: + #endif + case __NR_signalfd4: +@@ -351,12 +351,12 @@ bool SyscallSets::IsAllowedOperationOnFd(int sysno) { + switch (sysno) { + case __NR_close: + case __NR_dup: +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch64) + case __NR_dup2: + #endif + case __NR_dup3: + #if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \ +- defined(__aarch64__) ++ defined(__aarch64__) || defined(__loongarch__) + case __NR_shutdown: + #endif + return true; +@@ -395,7 +395,7 @@ bool SyscallSets::IsAllowedProcessStartOrDeath(int sysno) { + return true; + case __NR_clone: // Should be parameter-restricted. + case __NR_setns: // Privileged. +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch64) + case __NR_fork: + #endif + #if defined(__i386__) || defined(__x86_64__) +@@ -406,7 +406,7 @@ bool SyscallSets::IsAllowedProcessStartOrDeath(int sysno) { + #endif + case __NR_set_tid_address: + case __NR_unshare: +-#if !defined(__mips__) && !defined(__aarch64__) ++#if !defined(__mips__) && !defined(__aarch64__) && !defined(__loongarch64) + case __NR_vfork: + #endif + default: +@@ -427,7 +427,7 @@ bool SyscallSets::IsAllowedFutex(int sysno) { + + bool SyscallSets::IsAllowedEpoll(int sysno) { + switch (sysno) { +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch64) + case __NR_epoll_create: + case __NR_epoll_wait: + #endif +@@ -448,14 +448,14 @@ bool SyscallSets::IsAllowedEpoll(int sysno) { + + bool SyscallSets::IsAllowedGetOrModifySocket(int sysno) { + switch (sysno) { +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch64) + case __NR_pipe: + #endif + case __NR_pipe2: + return true; + default: + #if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \ +- defined(__aarch64__) ++ defined(__aarch64__) || defined(__loongarch__) + case __NR_socketpair: // We will want to inspect its argument. + #endif + return false; +@@ -465,7 +465,7 @@ bool SyscallSets::IsAllowedGetOrModifySocket(int sysno) { + bool SyscallSets::IsDeniedGetOrModifySocket(int sysno) { + switch (sysno) { + #if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \ +- defined(__aarch64__) ++ defined(__aarch64__) || defined(__loongarch__) + case __NR_accept: + case __NR_accept4: + case __NR_bind: +@@ -519,7 +519,7 @@ bool SyscallSets::IsAllowedAddressSpaceAccess(int sysno) { + case __NR_mincore: + case __NR_mlockall: + #if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \ +- defined(__aarch64__) ++ defined(__aarch64__) || defined(__loongarch__) + case __NR_mmap: + #endif + #if defined(__i386__) || defined(__arm__) || \ +@@ -552,7 +552,7 @@ bool SyscallSets::IsAllowedGeneralIo(int sysno) { + (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_32_BITS)) + case __NR__llseek: + #endif +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch64) + case __NR_poll: + #endif + case __NR_ppoll: +@@ -565,7 +565,7 @@ bool SyscallSets::IsAllowedGeneralIo(int sysno) { + case __NR_recv: + #endif + #if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \ +- defined(__aarch64__) ++ defined(__aarch64__) || defined(__loongarch__) + case __NR_recvfrom: // Could specify source. + case __NR_recvmsg: // Could specify source. + #endif +@@ -580,7 +580,7 @@ bool SyscallSets::IsAllowedGeneralIo(int sysno) { + case __NR_send: + #endif + #if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \ +- defined(__aarch64__) ++ defined(__aarch64__) || defined(__loongarch__) + case __NR_sendmsg: // Could specify destination. + case __NR_sendto: // Could specify destination. + #endif +@@ -630,7 +630,7 @@ bool SyscallSets::IsSeccomp(int sysno) { + bool SyscallSets::IsAllowedBasicScheduler(int sysno) { + switch (sysno) { + case __NR_sched_yield: +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch64) + case __NR_pause: + #endif + case __NR_nanosleep: +@@ -714,7 +714,7 @@ bool SyscallSets::IsNuma(int sysno) { + case __NR_getcpu: + case __NR_mbind: + #if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \ +- defined(__aarch64__) ++ defined(__aarch64__) || defined(__loongarch__) + case __NR_migrate_pages: + #endif + case __NR_move_pages: +@@ -743,7 +743,7 @@ bool SyscallSets::IsGlobalProcessEnvironment(int sysno) { + switch (sysno) { + case __NR_acct: // Privileged. + #if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \ +- defined(__aarch64__) ++ defined(__aarch64__) || defined(__loongarch__) + case __NR_getrlimit: + #endif + #if defined(__i386__) || defined(__arm__) +@@ -778,7 +778,7 @@ bool SyscallSets::IsDebug(int sysno) { + + bool SyscallSets::IsGlobalSystemStatus(int sysno) { + switch (sysno) { +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch64) + case __NR__sysctl: + case __NR_sysfs: + #endif +@@ -796,7 +796,7 @@ bool SyscallSets::IsGlobalSystemStatus(int sysno) { + + bool SyscallSets::IsEventFd(int sysno) { + switch (sysno) { +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch64) + case __NR_eventfd: + #endif + case __NR_eventfd2: +@@ -832,7 +832,8 @@ bool SyscallSets::IsKeyManagement(int sysno) { + } + + #if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || \ +- (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)) ++ (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)) || \ ++ defined(__loongarch64) + bool SyscallSets::IsSystemVSemaphores(int sysno) { + switch (sysno) { + case __NR_semctl: +@@ -847,7 +848,7 @@ bool SyscallSets::IsSystemVSemaphores(int sysno) { + #endif + + #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \ +- defined(__aarch64__) || \ ++ defined(__aarch64__) || defined(__loongarch64) || \ + (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)) + // These give a lot of ambient authority and bypass the setuid sandbox. + bool SyscallSets::IsSystemVSharedMemory(int sysno) { +@@ -864,7 +865,8 @@ bool SyscallSets::IsSystemVSharedMemory(int sysno) { + #endif + + #if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || \ +- (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)) ++ (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)) || \ ++ defined(__loongarch64) + bool SyscallSets::IsSystemVMessageQueue(int sysno) { + switch (sysno) { + case __NR_msgctl: +@@ -895,7 +897,8 @@ bool SyscallSets::IsSystemVIpc(int sysno) { + + bool SyscallSets::IsAnySystemV(int sysno) { + #if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || \ +- (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)) ++ (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)) || \ ++ defined(__loongarch64) + return IsSystemVMessageQueue(sysno) || IsSystemVSemaphores(sysno) || + IsSystemVSharedMemory(sysno); + #elif defined(__i386__) || \ +@@ -928,7 +931,7 @@ bool SyscallSets::IsAdvancedScheduler(int sysno) { + bool SyscallSets::IsInotify(int sysno) { + switch (sysno) { + case __NR_inotify_add_watch: +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch64) + case __NR_inotify_init: + #endif + case __NR_inotify_init1: +@@ -1043,7 +1046,7 @@ bool SyscallSets::IsMisc(int sysno) { + #if defined(__x86_64__) + case __NR_tuxcall: + #endif +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch64) + case __NR_vserver: + #endif + return true; +diff --git a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_sets.h b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_sets.h +index 923533ec9fd..fbe7c7910b7 100644 +--- a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_sets.h ++++ b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf-helpers/syscall_sets.h +@@ -49,7 +49,7 @@ class SANDBOX_EXPORT SyscallSets { + #endif + + #if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \ +- defined(__aarch64__) ++ defined(__aarch64__) || defined(__loongarch__) + static bool IsNetworkSocketInformation(int sysno); + #endif + +@@ -72,18 +72,20 @@ class SANDBOX_EXPORT SyscallSets { + static bool IsAsyncIo(int sysno); + static bool IsKeyManagement(int sysno); + #if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || \ +- (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)) ++ (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)) || \ ++ defined(__loongarch__) + static bool IsSystemVSemaphores(int sysno); + #endif + #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \ +- defined(__aarch64__) || \ ++ defined(__aarch64__) || defined(__loongarch__) || \ + (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)) + // These give a lot of ambient authority and bypass the setuid sandbox. + static bool IsSystemVSharedMemory(int sysno); + #endif + + #if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || \ +- (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)) ++ (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)) || \ ++ defined(__loongarch__) + static bool IsSystemVMessageQueue(int sysno); + #endif + +diff --git a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf/syscall.cc b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf/syscall.cc +index 34edabd2b82..ddc2446ef4a 100644 +--- a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf/syscall.cc ++++ b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf/syscall.cc +@@ -16,7 +16,7 @@ namespace sandbox { + namespace { + + #if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \ +- defined(ARCH_CPU_MIPS_FAMILY) ++ defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64) + // Number that's not currently used by any Linux kernel ABIs. + const int kInvalidSyscallNumber = 0x351d3; + #else +@@ -310,6 +310,25 @@ asm(// We need to be able to tell the kernel exactly where we made a + "2:ret\n" + ".cfi_endproc\n" + ".size SyscallAsm, .-SyscallAsm\n" ++#elif defined(__loongarch64) ++ ".text\n" ++ ".global SyscallAsm\n" ++ ".type SyscallAsm, %function\n" ++ "SyscallAsm:\n" ++ "bge $a0, $zero, 1f\n" ++ "la $a0, 2f\n" ++ "b 2f\n" ++ "1:ld.d $a5, $a6, 40\n" ++ "ld.d $a4, $a6, 32\n" ++ "ld.d $a3, $a6, 24\n" ++ "ld.d $a2, $a6, 16\n" ++ "ld.d $a1, $a6, 8\n" ++ "move $a7, $a0\n" ++ "ld.d $a0, $a6, 0\n" ++ // Enter the kernel ++ "syscall 0\n" ++ "2:jirl $zero, $ra, 0\n" ++ ".size SyscallAsm, .-SyscallAsm\n" + #endif + ); // asm + +@@ -426,7 +445,22 @@ intptr_t Syscall::Call(int nr, + : "memory", "x1", "x2", "x3", "x4", "x5", "x8", "x30"); + ret = inout; + } +- ++#elif defined(__loongarch64) ++ intptr_t ret; ++ { ++ register intptr_t inout __asm__("$4") = nr; ++ register const intptr_t* data __asm__("$10") = args; ++ // Save and restore $ra. ++ __asm__ volatile("addi.d $sp, $sp, 8\n" ++ "st.d $ra, $sp, 0\n" ++ "bl SyscallAsm\n" ++ "ld.d $ra, $sp, 0\n" ++ "addi.d $sp, $sp, -8\n" ++ : "=r"(inout) ++ : "0"(inout), "r"(data) ++ : "memory", "$5", "$6", "$7", "$8", "$9", "$11", "$2"); ++ ret = inout; ++ } + #else + #error "Unimplemented architecture" + #endif +diff --git a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf/trap.cc b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf/trap.cc +index 9884be8bb2c..7fa6e188dbe 100644 +--- a/src/3rdparty/chromium/sandbox/linux/seccomp-bpf/trap.cc ++++ b/src/3rdparty/chromium/sandbox/linux/seccomp-bpf/trap.cc +@@ -189,7 +189,7 @@ void Trap::SigSys(int nr, LinuxSigInfo* info, ucontext_t* ctx) { + + // Some more sanity checks. + if (sigsys.ip != reinterpret_cast(SECCOMP_IP(ctx)) || +- sigsys_nr_is_bad || sigsys.arch != SECCOMP_ARCH) { ++ sigsys_nr_is_bad) { // TODO:LA64 || sigsys.arch != SECCOMP_ARCH) { + // TODO(markus): + // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal + // safe and can lead to bugs. We should eventually implement a different +diff --git a/src/3rdparty/chromium/sandbox/linux/services/credentials.cc b/src/3rdparty/chromium/sandbox/linux/services/credentials.cc +index d7b5d8c4413..70ace0b0467 100644 +--- a/src/3rdparty/chromium/sandbox/linux/services/credentials.cc ++++ b/src/3rdparty/chromium/sandbox/linux/services/credentials.cc +@@ -81,7 +81,7 @@ bool ChrootToSafeEmptyDir() { + pid_t pid = -1; + alignas(16) char stack_buf[PTHREAD_STACK_MIN]; + #if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \ +- defined(ARCH_CPU_MIPS_FAMILY) ++ defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64) + // The stack grows downward. + void* stack = stack_buf + sizeof(stack_buf); + #else +@@ -90,7 +90,7 @@ bool ChrootToSafeEmptyDir() { + + int clone_flags = CLONE_FS | LINUX_SIGCHLD; + void* tls = nullptr; +-#if defined(ARCH_CPU_X86_64) || defined(ARCH_CPU_ARM_FAMILY) ++#if defined(ARCH_CPU_X86_64) || defined(ARCH_CPU_ARM_FAMILY) || defined(ARCH_CPU_LA64) + // Use CLONE_VM | CLONE_VFORK as an optimization to avoid copying page tables. + // Since clone writes to the new child's TLS before returning, we must set a + // new TLS to avoid corrupting the current process's TLS. On ARCH_CPU_X86, +diff --git a/src/3rdparty/chromium/sandbox/linux/services/syscall_wrappers.cc b/src/3rdparty/chromium/sandbox/linux/services/syscall_wrappers.cc +index fcfd2aa129d..bd936b0a374 100644 +--- a/src/3rdparty/chromium/sandbox/linux/services/syscall_wrappers.cc ++++ b/src/3rdparty/chromium/sandbox/linux/services/syscall_wrappers.cc +@@ -58,7 +58,7 @@ long sys_clone(unsigned long flags, + #if defined(ARCH_CPU_X86_64) + return syscall(__NR_clone, flags, child_stack, ptid, ctid, tls); + #elif defined(ARCH_CPU_X86) || defined(ARCH_CPU_ARM_FAMILY) || \ +- defined(ARCH_CPU_MIPS_FAMILY) ++ defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64) + // CONFIG_CLONE_BACKWARDS defined. + return syscall(__NR_clone, flags, child_stack, ptid, tls, ctid); + #endif +diff --git a/src/3rdparty/chromium/sandbox/linux/syscall_broker/broker_process.cc b/src/3rdparty/chromium/sandbox/linux/syscall_broker/broker_process.cc +index 8321d23798d..8f3eced1761 100644 +--- a/src/3rdparty/chromium/sandbox/linux/syscall_broker/broker_process.cc ++++ b/src/3rdparty/chromium/sandbox/linux/syscall_broker/broker_process.cc +@@ -111,53 +111,55 @@ bool BrokerProcess::Init( + + bool BrokerProcess::IsSyscallAllowed(int sysno) const { + switch (sysno) { +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch__) + case __NR_access: + #endif + case __NR_faccessat: + return !fast_check_in_client_ || + allowed_command_set_.test(COMMAND_ACCESS); + +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch__) + case __NR_mkdir: + #endif + case __NR_mkdirat: + return !fast_check_in_client_ || allowed_command_set_.test(COMMAND_MKDIR); + +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch__) + case __NR_open: + #endif + case __NR_openat: + return !fast_check_in_client_ || allowed_command_set_.test(COMMAND_OPEN); + +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch__) + case __NR_readlink: + #endif + case __NR_readlinkat: + return !fast_check_in_client_ || + allowed_command_set_.test(COMMAND_READLINK); + +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch__) + case __NR_rename: + #endif ++#if !defined(__loongarch__) + case __NR_renameat: ++#endif + case __NR_renameat2: + return !fast_check_in_client_ || + allowed_command_set_.test(COMMAND_RENAME); + +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch__) + case __NR_rmdir: + return !fast_check_in_client_ || allowed_command_set_.test(COMMAND_RMDIR); + #endif + +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch__) + case __NR_stat: + case __NR_lstat: + #endif + #if defined(__NR_fstatat) + case __NR_fstatat: + #endif +-#if defined(__x86_64__) || defined(__aarch64__) ++#if defined(__x86_64__) || defined(__aarch64__) || defined(__loongarch64) + case __NR_newfstatat: + #endif + return !fast_check_in_client_ || allowed_command_set_.test(COMMAND_STAT); +@@ -172,7 +174,7 @@ bool BrokerProcess::IsSyscallAllowed(int sysno) const { + return !fast_check_in_client_ || allowed_command_set_.test(COMMAND_STAT); + #endif + +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch__) + case __NR_unlink: + #endif + case __NR_unlinkat: +diff --git a/src/3rdparty/chromium/sandbox/linux/syscall_broker/broker_process_unittest.cc b/src/3rdparty/chromium/sandbox/linux/syscall_broker/broker_process_unittest.cc +index e1144da6e78..d2a7cab40ca 100644 +--- a/src/3rdparty/chromium/sandbox/linux/syscall_broker/broker_process_unittest.cc ++++ b/src/3rdparty/chromium/sandbox/linux/syscall_broker/broker_process_unittest.cc +@@ -1476,7 +1476,10 @@ TEST(BrokerProcess, IsSyscallAllowed) { + #if defined(__NR_rename) + {__NR_rename, COMMAND_RENAME}, + #endif ++// TODO:LA ++#if !defined(__loongarch__) + {__NR_renameat, COMMAND_RENAME}, ++#endif + #if defined(__NR_rmdir) + {__NR_rmdir, COMMAND_RMDIR}, + #endif +diff --git a/src/3rdparty/chromium/sandbox/linux/system_headers/la64_linux_syscalls.h b/src/3rdparty/chromium/sandbox/linux/system_headers/la64_linux_syscalls.h +new file mode 100644 +index 00000000000..12159cf61b5 +--- /dev/null ++++ b/src/3rdparty/chromium/sandbox/linux/system_headers/la64_linux_syscalls.h +@@ -0,0 +1,1120 @@ ++// Copyright 2021 The Chromium Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#ifndef SANDBOX_LINUX_SYSTEM_HEADERS_LA64_LINUX_SYSCALLS_H_ ++#define SANDBOX_LINUX_SYSTEM_HEADERS_LA64_LINUX_SYSCALLS_H_ ++ ++#include ++ ++#if !defined(__NR_io_setup) ++#define __NR_io_setup 0 ++#endif ++ ++#if !defined(__NR_io_destroy) ++#define __NR_io_destroy 1 ++#endif ++ ++#if !defined(__NR_io_submit) ++#define __NR_io_submit 2 ++#endif ++ ++#if !defined(__NR_io_cancel) ++#define __NR_io_cancel 3 ++#endif ++ ++#if !defined(__NR_io_getevents) ++#define __NR_io_getevents 4 ++#endif ++ ++#if !defined(__NR_setxattr) ++#define __NR_setxattr 5 ++#endif ++ ++#if !defined(__NR_lsetxattr) ++#define __NR_lsetxattr 6 ++#endif ++ ++#if !defined(__NR_fsetxattr) ++#define __NR_fsetxattr 7 ++#endif ++ ++#if !defined(__NR_getxattr) ++#define __NR_getxattr 8 ++#endif ++ ++#if !defined(__NR_lgetxattr) ++#define __NR_lgetxattr 9 ++#endif ++ ++#if !defined(__NR_fgetxattr) ++#define __NR_fgetxattr 10 ++#endif ++ ++#if !defined(__NR_listxattr) ++#define __NR_listxattr 11 ++#endif ++ ++#if !defined(__NR_llistxattr) ++#define __NR_llistxattr 12 ++#endif ++ ++#if !defined(__NR_flistxattr) ++#define __NR_flistxattr 13 ++#endif ++ ++#if !defined(__NR_removexattr) ++#define __NR_removexattr 14 ++#endif ++ ++#if !defined(__NR_lremovexattr) ++#define __NR_lremovexattr 15 ++#endif ++ ++#if !defined(__NR_fremovexattr) ++#define __NR_fremovexattr 16 ++#endif ++ ++#if !defined(__NR_getcwd) ++#define __NR_getcwd 17 ++#endif ++ ++#if !defined(__NR_lookup_dcookie) ++#define __NR_lookup_dcookie 18 ++#endif ++ ++#if !defined(__NR_eventfd2) ++#define __NR_eventfd2 19 ++#endif ++ ++#if !defined(__NR_epoll_create1) ++#define __NR_epoll_create1 20 ++#endif ++ ++#if !defined(__NR_epoll_ctl) ++#define __NR_epoll_ctl 21 ++#endif ++ ++#if !defined(__NR_epoll_pwait) ++#define __NR_epoll_pwait 22 ++#endif ++ ++#if !defined(__NR_dup) ++#define __NR_dup 23 ++#endif ++ ++#if !defined(__NR_dup3) ++#define __NR_dup3 24 ++#endif ++ ++#if !defined(__NR_fcntl) ++#define __NR_fcntl 25 ++#endif ++ ++#if !defined(__NR_inotify_init1) ++#define __NR_inotify_init1 26 ++#endif ++ ++#if !defined(__NR_inotify_add_watch) ++#define __NR_inotify_add_watch 27 ++#endif ++ ++#if !defined(__NR_inotify_rm_watch) ++#define __NR_inotify_rm_watch 28 ++#endif ++ ++#if !defined(__NR_ioctl) ++#define __NR_ioctl 29 ++#endif ++ ++#if !defined(__NR_ioprio_set) ++#define __NR_ioprio_set 30 ++#endif ++ ++#if !defined(__NR_ioprio_get) ++#define __NR_ioprio_get 31 ++#endif ++ ++#if !defined(__NR_flock) ++#define __NR_flock 32 ++#endif ++ ++#if !defined(__NR_mknodat) ++#define __NR_mknodat 33 ++#endif ++ ++#if !defined(__NR_mkdirat) ++#define __NR_mkdirat 34 ++#endif ++ ++#if !defined(__NR_unlinkat) ++#define __NR_unlinkat 35 ++#endif ++ ++#if !defined(__NR_symlinkat) ++#define __NR_symlinkat 36 ++#endif ++ ++#if !defined(__NR_linkat) ++#define __NR_linkat 37 ++#endif ++ ++#if !defined(__NR_renameat) ++#define __NR_renameat 38 ++#endif ++ ++#if !defined(__NR_umount2) ++#define __NR_umount2 39 ++#endif ++ ++#if !defined(__NR_mount) ++#define __NR_mount 40 ++#endif ++ ++#if !defined(__NR_pivot_root) ++#define __NR_pivot_root 41 ++#endif ++ ++#if !defined(__NR_nfsservctl) ++#define __NR_nfsservctl 42 ++#endif ++ ++#if !defined(__NR_statfs) ++#define __NR_statfs 43 ++#endif ++ ++#if !defined(__NR_fstatfs) ++#define __NR_fstatfs 44 ++#endif ++ ++#if !defined(__NR_truncate) ++#define __NR_truncate 45 ++#endif ++ ++#if !defined(__NR_ftruncate) ++#define __NR_ftruncate 46 ++#endif ++ ++#if !defined(__NR_fallocate) ++#define __NR_fallocate 47 ++#endif ++ ++#if !defined(__NR_faccessat) ++#define __NR_faccessat 48 ++#endif ++ ++#if !defined(__NR_chdir) ++#define __NR_chdir 49 ++#endif ++ ++#if !defined(__NR_fchdir) ++#define __NR_fchdir 50 ++#endif ++ ++#if !defined(__NR_chroot) ++#define __NR_chroot 51 ++#endif ++ ++#if !defined(__NR_fchmod) ++#define __NR_fchmod 52 ++#endif ++ ++#if !defined(__NR_fchmodat) ++#define __NR_fchmodat 53 ++#endif ++ ++#if !defined(__NR_fchownat) ++#define __NR_fchownat 54 ++#endif ++ ++#if !defined(__NR_fchown) ++#define __NR_fchown 55 ++#endif ++ ++#if !defined(__NR_openat) ++#define __NR_openat 56 ++#endif ++ ++#if !defined(__NR_close) ++#define __NR_close 57 ++#endif ++ ++#if !defined(__NR_vhangup) ++#define __NR_vhangup 58 ++#endif ++ ++#if !defined(__NR_pipe2) ++#define __NR_pipe2 59 ++#endif ++ ++#if !defined(__NR_quotactl) ++#define __NR_quotactl 60 ++#endif ++ ++#if !defined(__NR_getdents64) ++#define __NR_getdents64 61 ++#endif ++ ++#if !defined(__NR_lseek) ++#define __NR_lseek 62 ++#endif ++ ++#if !defined(__NR_read) ++#define __NR_read 63 ++#endif ++ ++#if !defined(__NR_write) ++#define __NR_write 64 ++#endif ++ ++#if !defined(__NR_readv) ++#define __NR_readv 65 ++#endif ++ ++#if !defined(__NR_writev) ++#define __NR_writev 66 ++#endif ++ ++#if !defined(__NR_pread64) ++#define __NR_pread64 67 ++#endif ++ ++#if !defined(__NR_pwrite64) ++#define __NR_pwrite64 68 ++#endif ++ ++#if !defined(__NR_preadv) ++#define __NR_preadv 69 ++#endif ++ ++#if !defined(__NR_pwritev) ++#define __NR_pwritev 70 ++#endif ++ ++#if !defined(__NR_sendfile) ++#define __NR_sendfile 71 ++#endif ++ ++#if !defined(__NR_pselect6) ++#define __NR_pselect6 72 ++#endif ++ ++#if !defined(__NR_ppoll) ++#define __NR_ppoll 73 ++#endif ++ ++#if !defined(__NR_signalfd4) ++#define __NR_signalfd4 74 ++#endif ++ ++#if !defined(__NR_vmsplice) ++#define __NR_vmsplice 75 ++#endif ++ ++#if !defined(__NR_splice) ++#define __NR_splice 76 ++#endif ++ ++#if !defined(__NR_tee) ++#define __NR_tee 77 ++#endif ++ ++#if !defined(__NR_readlinkat) ++#define __NR_readlinkat 78 ++#endif ++ ++// __NR3264_fstatat is not declared on old system ++#define __NR_newfstatat 79 ++ ++// __NR3264_fstat is not declared on old system ++#define __NR_fstat 80 ++ ++#if !defined(__NR_sync) ++#define __NR_sync 81 ++#endif ++ ++#if !defined(__NR_fsync) ++#define __NR_fsync 82 ++#endif ++ ++#if !defined(__NR_fdatasync) ++#define __NR_fdatasync 83 ++#endif ++ ++#if !defined(__NR_sync_file_range) ++#define __NR_sync_file_range 84 ++#endif ++ ++#if !defined(__NR_timerfd_create) ++#define __NR_timerfd_create 85 ++#endif ++ ++#if !defined(__NR_timerfd_settime) ++#define __NR_timerfd_settime 86 ++#endif ++ ++#if !defined(__NR_timerfd_gettime) ++#define __NR_timerfd_gettime 87 ++#endif ++ ++#if !defined(__NR_utimensat) ++#define __NR_utimensat 88 ++#endif ++ ++#if !defined(__NR_acct) ++#define __NR_acct 89 ++#endif ++ ++#if !defined(__NR_capget) ++#define __NR_capget 90 ++#endif ++ ++#if !defined(__NR_capset) ++#define __NR_capset 91 ++#endif ++ ++#if !defined(__NR_personality) ++#define __NR_personality 92 ++#endif ++ ++#if !defined(__NR_exit) ++#define __NR_exit 93 ++#endif ++ ++#if !defined(__NR_exit_group) ++#define __NR_exit_group 94 ++#endif ++ ++#if !defined(__NR_waitid) ++#define __NR_waitid 95 ++#endif ++ ++#if !defined(__NR_set_tid_address) ++#define __NR_set_tid_address 96 ++#endif ++ ++#if !defined(__NR_unshare) ++#define __NR_unshare 97 ++#endif ++ ++#if !defined(__NR_futex) ++#define __NR_futex 98 ++#endif ++ ++#if !defined(__NR_set_robust_list) ++#define __NR_set_robust_list 99 ++#endif ++ ++#if !defined(__NR_get_robust_list) ++#define __NR_get_robust_list 100 ++#endif ++ ++#if !defined(__NR_nanosleep) ++#define __NR_nanosleep 101 ++#endif ++ ++#if !defined(__NR_getitimer) ++#define __NR_getitimer 102 ++#endif ++ ++#if !defined(__NR_setitimer) ++#define __NR_setitimer 103 ++#endif ++ ++#if !defined(__NR_kexec_load) ++#define __NR_kexec_load 104 ++#endif ++ ++#if !defined(__NR_init_module) ++#define __NR_init_module 105 ++#endif ++ ++#if !defined(__NR_delete_module) ++#define __NR_delete_module 106 ++#endif ++ ++#if !defined(__NR_timer_create) ++#define __NR_timer_create 107 ++#endif ++ ++#if !defined(__NR_timer_gettime) ++#define __NR_timer_gettime 108 ++#endif ++ ++#if !defined(__NR_timer_getoverrun) ++#define __NR_timer_getoverrun 109 ++#endif ++ ++#if !defined(__NR_timer_settime) ++#define __NR_timer_settime 110 ++#endif ++ ++#if !defined(__NR_timer_delete) ++#define __NR_timer_delete 111 ++#endif ++ ++#if !defined(__NR_clock_settime) ++#define __NR_clock_settime 112 ++#endif ++ ++#if !defined(__NR_clock_gettime) ++#define __NR_clock_gettime 113 ++#endif ++ ++#if !defined(__NR_clock_getres) ++#define __NR_clock_getres 114 ++#endif ++ ++#if !defined(__NR_clock_nanosleep) ++#define __NR_clock_nanosleep 115 ++#endif ++ ++#if !defined(__NR_syslog) ++#define __NR_syslog 116 ++#endif ++ ++#if !defined(__NR_ptrace) ++#define __NR_ptrace 117 ++#endif ++ ++#if !defined(__NR_sched_setparam) ++#define __NR_sched_setparam 118 ++#endif ++ ++#if !defined(__NR_sched_setscheduler) ++#define __NR_sched_setscheduler 119 ++#endif ++ ++#if !defined(__NR_sched_getscheduler) ++#define __NR_sched_getscheduler 120 ++#endif ++ ++#if !defined(__NR_sched_getparam) ++#define __NR_sched_getparam 121 ++#endif ++ ++#if !defined(__NR_sched_setaffinity) ++#define __NR_sched_setaffinity 122 ++#endif ++ ++#if !defined(__NR_sched_getaffinity) ++#define __NR_sched_getaffinity 123 ++#endif ++ ++#if !defined(__NR_sched_yield) ++#define __NR_sched_yield 124 ++#endif ++ ++#if !defined(__NR_sched_get_priority_max) ++#define __NR_sched_get_priority_max 125 ++#endif ++ ++#if !defined(__NR_sched_get_priority_min) ++#define __NR_sched_get_priority_min 126 ++#endif ++ ++#if !defined(__NR_sched_rr_get_interval) ++#define __NR_sched_rr_get_interval 127 ++#endif ++ ++#if !defined(__NR_restart_syscall) ++#define __NR_restart_syscall 128 ++#endif ++ ++#if !defined(__NR_kill) ++#define __NR_kill 129 ++#endif ++ ++#if !defined(__NR_tkill) ++#define __NR_tkill 130 ++#endif ++ ++#if !defined(__NR_tgkill) ++#define __NR_tgkill 131 ++#endif ++ ++#if !defined(__NR_sigaltstack) ++#define __NR_sigaltstack 132 ++#endif ++ ++#if !defined(__NR_rt_sigsuspend) ++#define __NR_rt_sigsuspend 133 ++#endif ++ ++#if !defined(__NR_rt_sigaction) ++#define __NR_rt_sigaction 134 ++#endif ++ ++#if !defined(__NR_rt_sigprocmask) ++#define __NR_rt_sigprocmask 135 ++#endif ++ ++#if !defined(__NR_rt_sigpending) ++#define __NR_rt_sigpending 136 ++#endif ++ ++#if !defined(__NR_rt_sigtimedwait) ++#define __NR_rt_sigtimedwait 137 ++#endif ++ ++#if !defined(__NR_rt_sigqueueinfo) ++#define __NR_rt_sigqueueinfo 138 ++#endif ++ ++#if !defined(__NR_rt_sigreturn) ++#define __NR_rt_sigreturn 139 ++#endif ++ ++#if !defined(__NR_setpriority) ++#define __NR_setpriority 140 ++#endif ++ ++#if !defined(__NR_getpriority) ++#define __NR_getpriority 141 ++#endif ++ ++#if !defined(__NR_reboot) ++#define __NR_reboot 142 ++#endif ++ ++#if !defined(__NR_setregid) ++#define __NR_setregid 143 ++#endif ++ ++#if !defined(__NR_setgid) ++#define __NR_setgid 144 ++#endif ++ ++#if !defined(__NR_setreuid) ++#define __NR_setreuid 145 ++#endif ++ ++#if !defined(__NR_setuid) ++#define __NR_setuid 146 ++#endif ++ ++#if !defined(__NR_setresuid) ++#define __NR_setresuid 147 ++#endif ++ ++#if !defined(__NR_getresuid) ++#define __NR_getresuid 148 ++#endif ++ ++#if !defined(__NR_setresgid) ++#define __NR_setresgid 149 ++#endif ++ ++#if !defined(__NR_getresgid) ++#define __NR_getresgid 150 ++#endif ++ ++#if !defined(__NR_setfsuid) ++#define __NR_setfsuid 151 ++#endif ++ ++#if !defined(__NR_setfsgid) ++#define __NR_setfsgid 152 ++#endif ++ ++#if !defined(__NR_times) ++#define __NR_times 153 ++#endif ++ ++#if !defined(__NR_setpgid) ++#define __NR_setpgid 154 ++#endif ++ ++#if !defined(__NR_getpgid) ++#define __NR_getpgid 155 ++#endif ++ ++#if !defined(__NR_getsid) ++#define __NR_getsid 156 ++#endif ++ ++#if !defined(__NR_setsid) ++#define __NR_setsid 157 ++#endif ++ ++#if !defined(__NR_getgroups) ++#define __NR_getgroups 158 ++#endif ++ ++#if !defined(__NR_setgroups) ++#define __NR_setgroups 159 ++#endif ++ ++#if !defined(__NR_uname) ++#define __NR_uname 160 ++#endif ++ ++#if !defined(__NR_sethostname) ++#define __NR_sethostname 161 ++#endif ++ ++#if !defined(__NR_setdomainname) ++#define __NR_setdomainname 162 ++#endif ++ ++#if !defined(__NR_getrlimit) ++#define __NR_getrlimit 163 ++#endif ++ ++#if !defined(__NR_setrlimit) ++#define __NR_setrlimit 164 ++#endif ++ ++#if !defined(__NR_getrusage) ++#define __NR_getrusage 165 ++#endif ++ ++#if !defined(__NR_umask) ++#define __NR_umask 166 ++#endif ++ ++#if !defined(__NR_prctl) ++#define __NR_prctl 167 ++#endif ++ ++#if !defined(__NR_getcpu) ++#define __NR_getcpu 168 ++#endif ++ ++#if !defined(__NR_gettimeofday) ++#define __NR_gettimeofday 169 ++#endif ++ ++#if !defined(__NR_settimeofday) ++#define __NR_settimeofday 170 ++#endif ++ ++#if !defined(__NR_adjtimex) ++#define __NR_adjtimex 171 ++#endif ++ ++#if !defined(__NR_getpid) ++#define __NR_getpid 172 ++#endif ++ ++#if !defined(__NR_getppid) ++#define __NR_getppid 173 ++#endif ++ ++#if !defined(__NR_getuid) ++#define __NR_getuid 174 ++#endif ++ ++#if !defined(__NR_geteuid) ++#define __NR_geteuid 175 ++#endif ++ ++#if !defined(__NR_getgid) ++#define __NR_getgid 176 ++#endif ++ ++#if !defined(__NR_getegid) ++#define __NR_getegid 177 ++#endif ++ ++#if !defined(__NR_gettid) ++#define __NR_gettid 178 ++#endif ++ ++#if !defined(__NR_sysinfo) ++#define __NR_sysinfo 179 ++#endif ++ ++#if !defined(__NR_mq_open) ++#define __NR_mq_open 180 ++#endif ++ ++#if !defined(__NR_mq_unlink) ++#define __NR_mq_unlink 181 ++#endif ++ ++#if !defined(__NR_mq_timedsend) ++#define __NR_mq_timedsend 182 ++#endif ++ ++#if !defined(__NR_mq_timedreceive) ++#define __NR_mq_timedreceive 183 ++#endif ++ ++#if !defined(__NR_mq_notify) ++#define __NR_mq_notify 184 ++#endif ++ ++#if !defined(__NR_mq_getsetattr) ++#define __NR_mq_getsetattr 185 ++#endif ++ ++#if !defined(__NR_msgget) ++#define __NR_msgget 186 ++#endif ++ ++#if !defined(__NR_msgctl) ++#define __NR_msgctl 187 ++#endif ++ ++#if !defined(__NR_msgrcv) ++#define __NR_msgrcv 188 ++#endif ++ ++#if !defined(__NR_msgsnd) ++#define __NR_msgsnd 189 ++#endif ++ ++#if !defined(__NR_semget) ++#define __NR_semget 190 ++#endif ++ ++#if !defined(__NR_semctl) ++#define __NR_semctl 191 ++#endif ++ ++#if !defined(__NR_semtimedop) ++#define __NR_semtimedop 192 ++#endif ++ ++#if !defined(__NR_semop) ++#define __NR_semop 193 ++#endif ++ ++#if !defined(__NR_shmget) ++#define __NR_shmget 194 ++#endif ++ ++#if !defined(__NR_shmctl) ++#define __NR_shmctl 195 ++#endif ++ ++#if !defined(__NR_shmat) ++#define __NR_shmat 196 ++#endif ++ ++#if !defined(__NR_shmdt) ++#define __NR_shmdt 197 ++#endif ++ ++#if !defined(__NR_socket) ++#define __NR_socket 198 ++#endif ++ ++#if !defined(__NR_socketpair) ++#define __NR_socketpair 199 ++#endif ++ ++#if !defined(__NR_bind) ++#define __NR_bind 200 ++#endif ++ ++#if !defined(__NR_listen) ++#define __NR_listen 201 ++#endif ++ ++#if !defined(__NR_accept) ++#define __NR_accept 202 ++#endif ++ ++#if !defined(__NR_connect) ++#define __NR_connect 203 ++#endif ++ ++#if !defined(__NR_getsockname) ++#define __NR_getsockname 204 ++#endif ++ ++#if !defined(__NR_getpeername) ++#define __NR_getpeername 205 ++#endif ++ ++#if !defined(__NR_sendto) ++#define __NR_sendto 206 ++#endif ++ ++#if !defined(__NR_recvfrom) ++#define __NR_recvfrom 207 ++#endif ++ ++#if !defined(__NR_setsockopt) ++#define __NR_setsockopt 208 ++#endif ++ ++#if !defined(__NR_getsockopt) ++#define __NR_getsockopt 209 ++#endif ++ ++#if !defined(__NR_shutdown) ++#define __NR_shutdown 210 ++#endif ++ ++#if !defined(__NR_sendmsg) ++#define __NR_sendmsg 211 ++#endif ++ ++#if !defined(__NR_recvmsg) ++#define __NR_recvmsg 212 ++#endif ++ ++#if !defined(__NR_readahead) ++#define __NR_readahead 213 ++#endif ++ ++#if !defined(__NR_brk) ++#define __NR_brk 214 ++#endif ++ ++#if !defined(__NR_munmap) ++#define __NR_munmap 215 ++#endif ++ ++#if !defined(__NR_mremap) ++#define __NR_mremap 216 ++#endif ++ ++#if !defined(__NR_add_key) ++#define __NR_add_key 217 ++#endif ++ ++#if !defined(__NR_request_key) ++#define __NR_request_key 218 ++#endif ++ ++#if !defined(__NR_keyctl) ++#define __NR_keyctl 219 ++#endif ++ ++#if !defined(__NR_clone) ++#define __NR_clone 220 ++#endif ++ ++#if !defined(__NR_execve) ++#define __NR_execve 221 ++#endif ++ ++#if !defined(__NR_mmap) ++#define __NR_mmap 222 ++#endif ++ ++#if !defined(__NR_fadvise64) ++#define __NR_fadvise64 223 ++#endif ++ ++#if !defined(__NR_swapon) ++#define __NR_swapon 224 ++#endif ++ ++#if !defined(__NR_swapoff) ++#define __NR_swapoff 225 ++#endif ++ ++#if !defined(__NR_mprotect) ++#define __NR_mprotect 226 ++#endif ++ ++#if !defined(__NR_msync) ++#define __NR_msync 227 ++#endif ++ ++#if !defined(__NR_mlock) ++#define __NR_mlock 228 ++#endif ++ ++#if !defined(__NR_munlock) ++#define __NR_munlock 229 ++#endif ++ ++#if !defined(__NR_mlockall) ++#define __NR_mlockall 230 ++#endif ++ ++#if !defined(__NR_munlockall) ++#define __NR_munlockall 231 ++#endif ++ ++#if !defined(__NR_mincore) ++#define __NR_mincore 232 ++#endif ++ ++#if !defined(__NR_madvise) ++#define __NR_madvise 233 ++#endif ++ ++#if !defined(__NR_remap_file_pages) ++#define __NR_remap_file_pages 234 ++#endif ++ ++#if !defined(__NR_mbind) ++#define __NR_mbind 235 ++#endif ++ ++#if !defined(__NR_get_mempolicy) ++#define __NR_get_mempolicy 236 ++#endif ++ ++#if !defined(__NR_set_mempolicy) ++#define __NR_set_mempolicy 237 ++#endif ++ ++#if !defined(__NR_migrate_pages) ++#define __NR_migrate_pages 238 ++#endif ++ ++#if !defined(__NR_move_pages) ++#define __NR_move_pages 239 ++#endif ++ ++#if !defined(__NR_rt_tgsigqueueinfo) ++#define __NR_rt_tgsigqueueinfo 240 ++#endif ++ ++#if !defined(__NR_perf_event_open) ++#define __NR_perf_event_open 241 ++#endif ++ ++#if !defined(__NR_accept4) ++#define __NR_accept4 242 ++#endif ++ ++#if !defined(__NR_recvmmsg) ++#define __NR_recvmmsg 243 ++#endif ++ ++#if !defined(__NR_wait4) ++#define __NR_wait4 260 ++#endif ++ ++#if !defined(__NR_prlimit64) ++#define __NR_prlimit64 261 ++#endif ++ ++#if !defined(__NR_fanotify_init) ++#define __NR_fanotify_init 262 ++#endif ++ ++#if !defined(__NR_fanotify_mark) ++#define __NR_fanotify_mark 263 ++#endif ++ ++#if !defined(__NR_name_to_handle_at) ++#define __NR_name_to_handle_at 264 ++#endif ++ ++#if !defined(__NR_open_by_handle_at) ++#define __NR_open_by_handle_at 265 ++#endif ++ ++#if !defined(__NR_clock_adjtime) ++#define __NR_clock_adjtime 266 ++#endif ++ ++#if !defined(__NR_syncfs) ++#define __NR_syncfs 267 ++#endif ++ ++#if !defined(__NR_setns) ++#define __NR_setns 268 ++#endif ++ ++#if !defined(__NR_sendmmsg) ++#define __NR_sendmmsg 269 ++#endif ++ ++#if !defined(__NR_process_vm_readv) ++#define __NR_process_vm_readv 270 ++#endif ++ ++#if !defined(__NR_process_vm_writev) ++#define __NR_process_vm_writev 271 ++#endif ++ ++#if !defined(__NR_kcmp) ++#define __NR_kcmp 272 ++#endif ++ ++#if !defined(__NR_finit_module) ++#define __NR_finit_module 273 ++#endif ++ ++#if !defined(__NR_sched_setattr) ++#define __NR_sched_setattr 274 ++#endif ++ ++#if !defined(__NR_sched_getattr) ++#define __NR_sched_getattr 275 ++#endif ++ ++#if !defined(__NR_renameat2) ++#define __NR_renameat2 276 ++#endif ++ ++#if !defined(__NR_seccomp) ++#define __NR_seccomp 277 ++#endif ++ ++#if !defined(__NR_getrandom) ++#define __NR_getrandom 278 ++#endif ++ ++#if !defined(__NR_memfd_create) ++#define __NR_memfd_create 279 ++#endif ++ ++#if !defined(__NR_bfp) ++#define __NR_bfp 280 ++#endif ++ ++#if !defined(__NR_execveat) ++#define __NR_execveat 281 ++#endif ++ ++#if !defined(__NR_userfaultfd) ++#define __NR_userfaultfd 282 ++#endif ++ ++#if !defined(__NR_membarrier) ++#define __NR_membarrier 283 ++#endif ++ ++#if !defined(__NR_memlock2) ++#define __NR_memlock2 284 ++#endif ++ ++#if !defined(__NR_copy_file_range) ++#define __NR_copy_file_range 285 ++#endif ++ ++#if !defined(__NR_preadv2) ++#define __NR_preadv2 286 ++#endif ++ ++#if !defined(__NR_pwritev2) ++#define __NR_pwritev2 287 ++#endif ++ ++#if !defined(__NR_pkey_mprotect) ++#define __NR_pkey_mprotect 288 ++#endif ++ ++#if !defined(__NR_pkey_alloc) ++#define __NR_pkey_alloc 289 ++#endif ++ ++#if !defined(__NR_pkey_free) ++#define __NR_pkey_free 290 ++#endif ++ ++#if !defined(__NR_statx) ++#define __NR_statx 291 ++#endif ++ ++#if !defined(__NR_io_pgetevents) ++#define __NR_io_pgetevents 292 ++#endif ++ ++#if !defined(__NR_rseq) ++#define __NR_rseq 293 ++#endif ++ ++#endif // SANDBOX_LINUX_SYSTEM_HEADERS_LA64_LINUX_SYSCALLS_H_ +diff --git a/src/3rdparty/chromium/sandbox/linux/system_headers/linux_seccomp.h b/src/3rdparty/chromium/sandbox/linux/system_headers/linux_seccomp.h +index a60fe2ad3dc..634be0d1c92 100644 +--- a/src/3rdparty/chromium/sandbox/linux/system_headers/linux_seccomp.h ++++ b/src/3rdparty/chromium/sandbox/linux/system_headers/linux_seccomp.h +@@ -29,6 +29,9 @@ + #ifndef EM_AARCH64 + #define EM_AARCH64 183 + #endif ++#ifndef EM_LOONGARCH ++#define EM_LOONGARCH 258 ++#endif + + #ifndef __AUDIT_ARCH_64BIT + #define __AUDIT_ARCH_64BIT 0x80000000 +@@ -54,6 +57,9 @@ + #ifndef AUDIT_ARCH_AARCH64 + #define AUDIT_ARCH_AARCH64 (EM_AARCH64 | __AUDIT_ARCH_64BIT | __AUDIT_ARCH_LE) + #endif ++#ifndef AUDIT_ARCH_LOONGARCH64 ++#define AUDIT_ARCH_LOONGARCH64 (EM_LOONGARCH | __AUDIT_ARCH_64BIT | __AUDIT_ARCH_LE) ++#endif + + // For prctl.h + #ifndef PR_SET_SECCOMP +diff --git a/src/3rdparty/chromium/sandbox/linux/system_headers/linux_signal.h b/src/3rdparty/chromium/sandbox/linux/system_headers/linux_signal.h +index f5a73676174..4af5d249a57 100644 +--- a/src/3rdparty/chromium/sandbox/linux/system_headers/linux_signal.h ++++ b/src/3rdparty/chromium/sandbox/linux/system_headers/linux_signal.h +@@ -13,7 +13,7 @@ + // (not undefined, but defined different values and in different memory + // layouts). So, fill the gap here. + #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \ +- defined(__aarch64__) ++ defined(__aarch64__) || defined(__loongarch64) + + #define LINUX_SIGHUP 1 + #define LINUX_SIGINT 2 +@@ -120,7 +120,7 @@ typedef siginfo_t LinuxSigInfo; + struct LinuxSigSet { + unsigned long sig[_NSIG_WORDS]; + }; +-#elif defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS) ++#elif (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)) || defined(ARCH_CPU_LA64) + #if !defined(_NSIG_WORDS) + #define _NSIG_WORDS 2 + #endif +diff --git a/src/3rdparty/chromium/sandbox/linux/system_headers/linux_syscalls.h b/src/3rdparty/chromium/sandbox/linux/system_headers/linux_syscalls.h +index 2b78a0cc3b9..eb66de9152e 100644 +--- a/src/3rdparty/chromium/sandbox/linux/system_headers/linux_syscalls.h ++++ b/src/3rdparty/chromium/sandbox/linux/system_headers/linux_syscalls.h +@@ -35,5 +35,9 @@ + #include "sandbox/linux/system_headers/arm64_linux_syscalls.h" + #endif + ++#if defined(__loongarch64) ++#include "sandbox/linux/system_headers/la64_linux_syscalls.h" ++#endif ++ + #endif // SANDBOX_LINUX_SYSTEM_HEADERS_LINUX_SYSCALLS_H_ + +diff --git a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_broker_policy_linux.cc b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_broker_policy_linux.cc +index 68af74e1fba..d01ae8b474e 100644 +--- a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_broker_policy_linux.cc ++++ b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_broker_policy_linux.cc +@@ -98,7 +98,7 @@ ResultExpr BrokerProcessPolicy::EvaluateSyscall(int sysno) const { + return Allow(); + break; + #endif +-#if defined(__NR_newfstatat) ++#if defined(__NR_newfstatat) && defined(__clang__) + case __NR_newfstatat: + if (allowed_command_set_.test(sandbox::syscall_broker::COMMAND_STAT)) + return Allow(); +diff --git a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_cdm_policy_linux.cc b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_cdm_policy_linux.cc +index 9d39e5d5de4..ce104e19e16 100644 +--- a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_cdm_policy_linux.cc ++++ b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_cdm_policy_linux.cc +@@ -33,7 +33,7 @@ ResultExpr CdmProcessPolicy::EvaluateSyscall(int sysno) const { + case __NR_ftruncate: + case __NR_fallocate: + #if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \ +- defined(__aarch64__) ++ defined(__aarch64__) || defined(__loongarch__) + case __NR_getrlimit: + #endif + #if defined(__i386__) || defined(__arm__) +diff --git a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_cros_amd_gpu_policy_linux.cc b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_cros_amd_gpu_policy_linux.cc +index 2a850ba8efa..e6ddf51e260 100644 +--- a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_cros_amd_gpu_policy_linux.cc ++++ b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_cros_amd_gpu_policy_linux.cc +@@ -37,7 +37,7 @@ ResultExpr CrosAmdGpuProcessPolicy::EvaluateSyscall(int sysno) const { + case __NR_sched_setscheduler: + case __NR_sysinfo: + case __NR_uname: +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch__) + case __NR_readlink: + case __NR_stat: + #endif +diff --git a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_gpu_policy_linux.cc b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_gpu_policy_linux.cc +index 66214334def..33114417045 100644 +--- a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_gpu_policy_linux.cc ++++ b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_gpu_policy_linux.cc +@@ -48,7 +48,7 @@ ResultExpr GpuProcessPolicy::EvaluateSyscall(int sysno) const { + (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_32_BITS)) + case __NR_ftruncate64: + #endif +-#if !defined(__aarch64__) ++#if !defined(__aarch64__) && !defined(__loongarch__) + case __NR_getdents: + #endif + case __NR_getdents64: +diff --git a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_renderer_policy_linux.cc b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_renderer_policy_linux.cc +index a85c0ea8678..1f8d044a3ff 100644 +--- a/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_renderer_policy_linux.cc ++++ b/src/3rdparty/chromium/services/service_manager/sandbox/linux/bpf_renderer_policy_linux.cc +@@ -68,7 +68,7 @@ ResultExpr RendererProcessPolicy::EvaluateSyscall(int sysno) const { + case __NR_ftruncate64: + #endif + #if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \ +- defined(__aarch64__) ++ defined(__aarch64__) || defined(__loongarch__) + case __NR_getrlimit: + case __NR_setrlimit: + // We allow setrlimit to dynamically adjust the address space limit as +diff --git a/src/3rdparty/chromium/services/service_manager/sandbox/linux/sandbox_seccomp_bpf_linux.cc b/src/3rdparty/chromium/services/service_manager/sandbox/linux/sandbox_seccomp_bpf_linux.cc +index f5d096b1029..ac6133da5a0 100644 +--- a/src/3rdparty/chromium/services/service_manager/sandbox/linux/sandbox_seccomp_bpf_linux.cc ++++ b/src/3rdparty/chromium/services/service_manager/sandbox/linux/sandbox_seccomp_bpf_linux.cc +@@ -65,9 +65,9 @@ using sandbox::bpf_dsl::ResultExpr; + + // Make sure that seccomp-bpf does not get disabled by mistake. Also make sure + // that we think twice about this when adding a new architecture. +-#if !defined(ARCH_CPU_ARM64) && !defined(ARCH_CPU_MIPS64EL) ++#if !defined(ARCH_CPU_ARM64) && !defined(ARCH_CPU_MIPS64EL) && !defined(ARCH_CPU_LA64) + #error "Seccomp-bpf disabled on supported architecture!" +-#endif // !defined(ARCH_CPU_ARM64) && !defined(ARCH_CPU_MIPS64EL) ++#endif // !defined(ARCH_CPU_ARM64) && !defined(ARCH_CPU_MIPS64EL) && !defined(ARCH_CPU_LA64) + + #endif // BUILDFLAG(USE_SECCOMP_BPF) + +diff --git a/src/3rdparty/chromium/skia/BUILD.gn b/src/3rdparty/chromium/skia/BUILD.gn +index f5992c5059d..7a3bbcd78e2 100644 +--- a/src/3rdparty/chromium/skia/BUILD.gn ++++ b/src/3rdparty/chromium/skia/BUILD.gn +@@ -796,6 +796,8 @@ skia_source_set("skia_opts") { + sources = skia_opts.none_sources + } else if (current_cpu == "s390x") { + sources = skia_opts.none_sources ++ } else if (current_cpu == "la64") { ++ sources = skia_opts.none_sources + } else { + assert(false, "Need to port cpu specific stuff from skia_library_opts.gyp") + } +diff --git a/src/3rdparty/chromium/third_party/angle/gni/angle.gni b/src/3rdparty/chromium/third_party/angle/gni/angle.gni +index 1c8ad4802fe..b3cef6b8a3b 100644 +--- a/src/3rdparty/chromium/third_party/angle/gni/angle.gni ++++ b/src/3rdparty/chromium/third_party/angle/gni/angle.gni +@@ -54,7 +54,7 @@ angle_data_dir = "angledata" + declare_args() { + if (current_cpu == "arm64" || current_cpu == "x64" || + current_cpu == "mips64el" || current_cpu == "s390x" || +- current_cpu == "ppc64") { ++ current_cpu == "ppc64" || current_cpu == "la64") { + angle_64bit_current_cpu = true + } else if (current_cpu == "arm" || current_cpu == "x86" || + current_cpu == "mipsel" || current_cpu == "s390" || +diff --git a/src/3rdparty/chromium/third_party/blink/renderer/core/editing/commands/delete_selection_command.cc b/src/3rdparty/chromium/third_party/blink/renderer/core/editing/commands/delete_selection_command.cc +index 05b74b53f48..1a56dda89b9 100644 +--- a/src/3rdparty/chromium/third_party/blink/renderer/core/editing/commands/delete_selection_command.cc ++++ b/src/3rdparty/chromium/third_party/blink/renderer/core/editing/commands/delete_selection_command.cc +@@ -229,7 +229,7 @@ static Position TrailingWhitespacePosition(const Position& position, + // Workaround: GCC fails to resolve overloaded template functions, passed as + // parameters of EnclosingNodeType. But it works wrapping that in a utility + // function. +-#if defined(COMPILER_GCC) ++#if defined(COMPILER_GCC) || !defined(__clang__) + static bool IsHTMLTableRowElement(const blink::Node* node) { + return IsA(node); + } +@@ -263,7 +263,7 @@ void DeleteSelectionCommand::InitializePositionData( + start_root_ = RootEditableElementOf(start); + end_root_ = RootEditableElementOf(end); + +-#if defined(COMPILER_GCC) ++#if defined(COMPILER_GCC) || !defined(__clang__) + // Workaround. See declaration of IsHTMLTableRowElement + start_table_row_ = To( + EnclosingNodeOfType(start, &IsHTMLTableRowElement)); +diff --git a/src/3rdparty/chromium/third_party/blink/renderer/platform/heap/asm/BUILD.gn b/src/3rdparty/chromium/third_party/blink/renderer/platform/heap/asm/BUILD.gn +index fe44daf27a5..9910244f5bf 100644 +--- a/src/3rdparty/chromium/third_party/blink/renderer/platform/heap/asm/BUILD.gn ++++ b/src/3rdparty/chromium/third_party/blink/renderer/platform/heap/asm/BUILD.gn +@@ -36,6 +36,8 @@ if (current_cpu == "x86" || current_cpu == "x64") { + sources = [ "SaveRegisters_mips.S" ] + } else if (current_cpu == "mips64el") { + sources = [ "SaveRegisters_mips64.S" ] ++ } else if (current_cpu == "la64") { ++ sources = [ "SaveRegisters_la64.S" ] + } else if (current_cpu == "ppc64") { + sources = [ "SaveRegisters_ppc64.S" ] + } +diff --git a/src/3rdparty/chromium/third_party/blink/renderer/platform/heap/asm/SaveRegisters_la64.S b/src/3rdparty/chromium/third_party/blink/renderer/platform/heap/asm/SaveRegisters_la64.S +new file mode 100644 +index 00000000000..880201671af +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/blink/renderer/platform/heap/asm/SaveRegisters_la64.S +@@ -0,0 +1,41 @@ ++// Copyright 2014 The Chromium Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++/* ++ * typedef void (*PushAllRegistersCallback)(ThreadState*, intptr_t*); ++ * extern "C" void PushAllRegisters(ThreadState*, PushAllRegistersCallback) ++ */ ++ ++.type PushAllRegisters, %function ++.global PushAllRegisters ++.hidden PushAllRegisters ++PushAllRegisters: ++ // Push all callee-saves registers to get them ++ // on the stack for conservative stack scanning. ++ // Reserve space for callee-saved registers and return address. ++ addi.d $sp,$sp,-80 ++ // Save the callee-saved registers and the return address. ++ st.d $s0, $sp, 0 ++ st.d $s1, $sp, 8 ++ st.d $s2, $sp, 16 ++ st.d $s3, $sp, 24 ++ st.d $s4, $sp, 32 ++ st.d $s5, $sp, 40 ++ st.d $s6, $sp, 48 ++ st.d $s7, $sp, 56 ++ st.d $ra, $sp, 64 ++ // Note: the callee-saved floating point registers do not need to be ++ // copied to the stack, because fp registers never hold heap pointers ++ // and so do not need to be kept visible to the garbage collector. ++ // Pass the first argument untouched in a0 and the ++ // stack pointer to the callback. ++ move $t7,$a1 ++ move $a1,$sp ++ jirl $ra, $t7, 0 ++ // Restore return address, adjust stack and return. ++ // Note: the copied registers do not need to be reloaded here, ++ // because they were preserved by the called routine. ++ ld.d $ra, $sp, 64 ++ addi.d $sp, $sp, 80 ++ jirl $zero, $ra, 0 +diff --git a/src/3rdparty/chromium/third_party/blink/renderer/platform/wtf/hash_table.h b/src/3rdparty/chromium/third_party/blink/renderer/platform/wtf/hash_table.h +index eb10c6964ca..127a415a5fe 100644 +--- a/src/3rdparty/chromium/third_party/blink/renderer/platform/wtf/hash_table.h ++++ b/src/3rdparty/chromium/third_party/blink/renderer/platform/wtf/hash_table.h +@@ -674,7 +674,12 @@ struct HashTableHelper { + } + static constexpr size_t constexpr_max(size_t a, size_t b) { return a > b ? a : b; } + static bool IsEmptyOrDeletedBucketSafe(const Value& value) { ++// TODO:LA64 ++#if defined(ARCH_CPU_LA64) ++ char buf[sizeof(Key)]; ++#else + alignas(constexpr_max(alignof(Key), sizeof(size_t))) char buf[sizeof(Key)]; ++#endif + const Key& key = Extractor::ExtractSafe(value, &buf); + return IsEmptyBucket(key) || IsDeletedBucket(key); + } +diff --git a/src/3rdparty/chromium/third_party/boringssl/src/include/openssl/base.h b/src/3rdparty/chromium/third_party/boringssl/src/include/openssl/base.h +index 8d73f7747ca..1b79becbb49 100644 +--- a/src/3rdparty/chromium/third_party/boringssl/src/include/openssl/base.h ++++ b/src/3rdparty/chromium/third_party/boringssl/src/include/openssl/base.h +@@ -105,6 +105,9 @@ extern "C" { + #elif defined(__mips__) && defined(__LP64__) + #define OPENSSL_64_BIT + #define OPENSSL_MIPS64 ++#elif defined(__loongarch__) ++#define OPENSSL_64_BIT ++#define OPENSSL_LA64 + #elif defined(__pnacl__) + #define OPENSSL_32_BIT + #define OPENSSL_PNACL +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/crash_generation/crash_generation_client.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/crash_generation/crash_generation_client.cc +index d8bfbbad27a..9520c2183db 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/crash_generation/crash_generation_client.cc ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/crash_generation/crash_generation_client.cc +@@ -50,11 +50,11 @@ class CrashGenerationClientImpl : public CrashGenerationClient { + + virtual bool RequestDump(const void* blob, size_t blob_size) { + int fds[2]; +- if (sys_pipe(fds) < 0) +- return false; ++ //if (sys_pipe(fds) < 0) ++ // return false; + static const unsigned kControlMsgSize = CMSG_SPACE(sizeof(int)); + +- struct kernel_iovec iov; ++ /*struct kernel_iovec iov; + iov.iov_base = const_cast(blob); + iov.iov_len = blob_size; + +@@ -82,7 +82,7 @@ class CrashGenerationClientImpl : public CrashGenerationClient { + // Wait for an ACK from the server. + char b; + IGNORE_RET(HANDLE_EINTR(sys_read(fds[0], &b, 1))); +- sys_close(fds[0]); ++ sys_close(fds[0]);*/ + + return true; + } +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/raw_context_cpu.h b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/raw_context_cpu.h +index 07d9171a0a6..5fde64bd579 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/raw_context_cpu.h ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/raw_context_cpu.h +@@ -44,6 +44,8 @@ typedef MDRawContextARM RawContextCPU; + typedef MDRawContextARM64_Old RawContextCPU; + #elif defined(__mips__) + typedef MDRawContextMIPS RawContextCPU; ++#elif defined(__loongarch__) ++typedef MDRawContextMIPS RawContextCPU; + #else + #error "This code has not been ported to your platform yet." + #endif +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/thread_info.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/thread_info.cc +index aae1dc13b25..70f0eeaa6f0 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/thread_info.cc ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/thread_info.cc +@@ -228,6 +228,16 @@ void ThreadInfo::FillCPUContext(RawContextCPU* out) const { + MD_FLOATINGSAVEAREA_ARM64_FPR_COUNT * 16); + } + ++#elif defined(__loongarch__) ++ ++uintptr_t ThreadInfo::GetInstructionPointer() const { ++ return 0; ++} ++ ++void ThreadInfo::FillCPUContext(RawContextCPU* out) const { ++ ++} ++ + #elif defined(__mips__) + + uintptr_t ThreadInfo::GetInstructionPointer() const { +@@ -280,10 +290,10 @@ void ThreadInfo::GetGeneralPurposeRegisters(void** gp_regs, size_t* size) { + if (size) + *size = sizeof(mcontext.gregs); + #else +- if (gp_regs) ++ /*if (gp_regs) + *gp_regs = ®s; + if (size) +- *size = sizeof(regs); ++ *size = sizeof(regs);*/ + #endif + } + +@@ -295,10 +305,10 @@ void ThreadInfo::GetFloatingPointRegisters(void** fp_regs, size_t* size) { + if (size) + *size = sizeof(mcontext.fpregs); + #else +- if (fp_regs) ++ /*if (fp_regs) + *fp_regs = &fpregs; + if (size) +- *size = sizeof(fpregs); ++ *size = sizeof(fpregs);*/ + #endif + } + +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/thread_info.h b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/thread_info.h +index fb216fa6d71..c58ec4cfb37 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/thread_info.h ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/thread_info.h +@@ -71,6 +71,9 @@ struct ThreadInfo { + #elif defined(__mips__) + // Use the structure defined in . + mcontext_t mcontext; ++#elif defined(__loongarch__) ++ // Use the structure defined in . ++ mcontext_t mcontext; + #endif + + // Returns the instruction pointer (platform-dependent impl.). +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/ucontext_reader.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/ucontext_reader.cc +index 6ee6cc1e4cd..e53661b2856 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/ucontext_reader.cc ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/dump_writer_common/ucontext_reader.cc +@@ -208,6 +208,15 @@ void UContextReader::FillCPUContext(RawContextCPU *out, const ucontext_t *uc, + MD_FLOATINGSAVEAREA_ARM64_FPR_COUNT * 16); + } + ++#elif defined(__loongarch__) ++uintptr_t UContextReader::GetStackPointer(const ucontext_t* uc) { ++ return 0; ++} ++ ++uintptr_t UContextReader::GetInstructionPointer(const ucontext_t* uc) { ++ return 0; ++} ++ + #elif defined(__mips__) + + uintptr_t UContextReader::GetStackPointer(const ucontext_t* uc) { +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/handler/exception_handler.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/handler/exception_handler.cc +index b895f6d7ada..a6b733875a7 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/handler/exception_handler.cc ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/handler/exception_handler.cc +@@ -77,6 +77,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -105,6 +106,8 @@ + #define PR_SET_PTRACER 0x59616d61 + #endif + ++#define sys_sigaltstack sigaltstack ++ + namespace google_breakpad { + + namespace { +@@ -395,12 +398,12 @@ void ExceptionHandler::SignalHandler(int sig, siginfo_t* info, void* uc) { + // In order to retrigger it, we have to queue a new signal by calling + // kill() ourselves. The special case (si_pid == 0 && sig == SIGABRT) is + // due to the kernel sending a SIGABRT from a user request via SysRQ. +- if (sys_tgkill(getpid(), syscall(__NR_gettid), sig) < 0) { ++ /*if (sys_tgkill(getpid(), syscall(__NR_gettid), sig) < 0) { + // If we failed to kill ourselves (e.g. because a sandbox disallows us + // to do so), we instead resort to terminating our process. This will + // result in an incorrect exit code. + _exit(1); +- } ++ }*/ + } else { + // This was a synchronous signal triggered by a hard fault (e.g. SIGSEGV). + // No need to reissue the signal. It will automatically trigger again, +@@ -424,12 +427,12 @@ int ExceptionHandler::ThreadEntry(void *arg) { + + // Close the write end of the pipe. This allows us to fail if the parent dies + // while waiting for the continue signal. +- sys_close(thread_arg->handler->fdes[1]); ++ //sys_close(thread_arg->handler->fdes[1]); + + // Block here until the crashing process unblocks us when + // we're allowed to use ptrace + thread_arg->handler->WaitForContinueSignal(); +- sys_close(thread_arg->handler->fdes[0]); ++ //sys_close(thread_arg->handler->fdes[0]); + + return thread_arg->handler->DoDump(thread_arg->pid, thread_arg->context, + thread_arg->context_size) == false; +@@ -446,7 +449,7 @@ bool ExceptionHandler::HandleSignal(int /*sig*/, siginfo_t* info, void* uc) { + bool signal_pid_trusted = info->si_code == SI_USER || + info->si_code == SI_TKILL; + if (signal_trusted || (signal_pid_trusted && info->si_pid == getpid())) { +- sys_prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); ++ //sys_prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); + } + + // Fill in all the holes in the struct to make Valgrind happy. +@@ -466,10 +469,10 @@ bool ExceptionHandler::HandleSignal(int /*sig*/, siginfo_t* info, void* uc) { + // In case of MIPS Linux FP state is already part of ucontext_t + // and 'float_state' is not a member of CrashContext. + ucontext_t* uc_ptr = (ucontext_t*)uc; +- if (uc_ptr->uc_mcontext.fpregs) { ++ /*if (uc_ptr->uc_mcontext.fpregs) { + memcpy(&g_crash_context_.float_state, uc_ptr->uc_mcontext.fpregs, + sizeof(g_crash_context_.float_state)); +- } ++ }*/ + #endif + g_crash_context_.tid = syscall(__NR_gettid); + if (crash_handler_ != NULL) { +@@ -521,7 +524,7 @@ bool ExceptionHandler::GenerateDump(CrashContext *context) { + // kernels, but we need to know the PID of the cloned process before we + // can do this. Create a pipe here which we can use to block the + // cloned process after creating it, until we have explicitly enabled ptrace +- if (sys_pipe(fdes) == -1) { ++ /*if (sys_pipe(fdes) == -1) { + // Creating the pipe failed. We'll log an error but carry on anyway, + // as we'll probably still get a useful crash report. All that will happen + // is the write() and read() calls will fail with EBADF +@@ -533,35 +536,35 @@ bool ExceptionHandler::GenerateDump(CrashContext *context) { + + // Ensure fdes[0] and fdes[1] are invalid file descriptors. + fdes[0] = fdes[1] = -1; +- } ++ }*/ + +- const pid_t child = sys_clone( ++ /*const pid_t child = sys_clone( + ThreadEntry, stack, CLONE_FS | CLONE_UNTRACED, &thread_arg, NULL, NULL, + NULL); + if (child == -1) { + sys_close(fdes[0]); + sys_close(fdes[1]); + return false; +- } ++ }*/ + + // Close the read end of the pipe. +- sys_close(fdes[0]); ++ //sys_close(fdes[0]); + // Allow the child to ptrace us +- sys_prctl(PR_SET_PTRACER, child, 0, 0, 0); ++ //sys_prctl(PR_SET_PTRACER, child, 0, 0, 0); + SendContinueSignalToChild(); + int status = 0; +- const int r = HANDLE_EINTR(sys_waitpid(child, &status, __WALL)); ++ //const int r = HANDLE_EINTR(sys_waitpid(child, &status, __WALL)); + +- sys_close(fdes[1]); ++ //sys_close(fdes[1]); + +- if (r == -1) { ++ /*if (r == -1) { + static const char msg[] = "ExceptionHandler::GenerateDump waitpid failed:"; + logger::write(msg, sizeof(msg) - 1); + logger::write(strerror(errno), strlen(strerror(errno))); + logger::write("\n", 1); +- } ++ }*/ + +- bool success = r != -1 && WIFEXITED(status) && WEXITSTATUS(status) == 0; ++ bool success = /*r != -1 &&*/ WIFEXITED(status) && WEXITSTATUS(status) == 0; + if (callback_) + success = callback_(minidump_descriptor_, callback_context_, success); + return success; +@@ -569,7 +572,7 @@ bool ExceptionHandler::GenerateDump(CrashContext *context) { + + // This function runs in a compromised context: see the top of the file. + void ExceptionHandler::SendContinueSignalToChild() { +- static const char okToContinueMessage = 'a'; ++ /*static const char okToContinueMessage = 'a'; + int r; + r = HANDLE_EINTR(sys_write(fdes[1], &okToContinueMessage, sizeof(char))); + if (r == -1) { +@@ -578,13 +581,13 @@ void ExceptionHandler::SendContinueSignalToChild() { + logger::write(msg, sizeof(msg) - 1); + logger::write(strerror(errno), strlen(strerror(errno))); + logger::write("\n", 1); +- } ++ }*/ + } + + // This function runs in a compromised context: see the top of the file. + // Runs on the cloned process. + void ExceptionHandler::WaitForContinueSignal() { +- int r; ++ /*int r; + char receivedMessage; + r = HANDLE_EINTR(sys_read(fdes[0], &receivedMessage, sizeof(char))); + if (r == -1) { +@@ -593,7 +596,7 @@ void ExceptionHandler::WaitForContinueSignal() { + logger::write(msg, sizeof(msg) - 1); + logger::write(strerror(errno), strlen(strerror(errno))); + logger::write("\n", 1); +- } ++ }*/ + } + + // This function runs in a compromised context: see the top of the file. +@@ -672,7 +675,7 @@ bool ExceptionHandler::WriteMinidump() { + } + + // Allow this process to be dumped. +- sys_prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); ++ //sys_prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); + + CrashContext context; + int getcontext_result = getcontext(&context.context); +@@ -701,12 +704,12 @@ bool ExceptionHandler::WriteMinidump() { + } + #endif + +-#if !defined(__ARM_EABI__) && !defined(__aarch64__) && !defined(__mips__) ++#if !defined(__ARM_EABI__) && !defined(__aarch64__) && !defined(__mips__) && !defined(__loongarch__) + // FPU state is not part of ARM EABI ucontext_t. + memcpy(&context.float_state, context.context.uc_mcontext.fpregs, + sizeof(context.float_state)); + #endif +- context.tid = sys_gettid(); ++ //context.tid = sys_gettid(); + + // Add an exception stream to the minidump for better reporting. + memset(&context.siginfo, 0, sizeof(context.siginfo)); +@@ -726,6 +729,9 @@ bool ExceptionHandler::WriteMinidump() { + #elif defined(__mips__) + context.siginfo.si_addr = + reinterpret_cast(context.context.uc_mcontext.pc); ++#elif defined(__loongarch__) ++ //context.siginfo.si_addr = ++ // reinterpret_cast(context.context.uc_mcontext.pc); + #else + #error "This code has not been ported to your platform yet." + #endif +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/handler/exception_handler.h b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/handler/exception_handler.h +index f44483ff0fd..db94f41523e 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/handler/exception_handler.h ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/handler/exception_handler.h +@@ -192,7 +192,7 @@ class ExceptionHandler { + siginfo_t siginfo; + pid_t tid; // the crashing thread. + ucontext_t context; +-#if !defined(__ARM_EABI__) && !defined(__mips__) ++#if !defined(__ARM_EABI__) && !defined(__mips__) && !defined(__loongarch__) + // #ifdef this out because FP state is not part of user ABI for Linux ARM. + // In case of MIPS Linux FP state is already part of ucontext_t so + // 'float_state' is not required. +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/log/log.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/log/log.cc +index fc23aa6d528..ffe19aeb203 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/log/log.cc ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/log/log.cc +@@ -77,7 +77,8 @@ int write(const char* buf, size_t nbytes) { + #if defined(__ANDROID__) + return __android_log_write(ANDROID_LOG_WARN, kAndroidLogTag, buf); + #else +- return sys_write(2, buf, nbytes); ++ //return sys_write(2, buf, nbytes); ++ return 0; + #endif + } + +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/microdump_writer/microdump_writer.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/microdump_writer/microdump_writer.cc +index fa3c1713a56..ab8b35a30f1 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/microdump_writer/microdump_writer.cc ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/microdump_writer/microdump_writer.cc +@@ -138,7 +138,7 @@ class MicrodumpWriter { + const MicrodumpExtraInfo& microdump_extra_info, + LinuxDumper* dumper) + : ucontext_(context ? &context->context : NULL), +-#if !defined(__ARM_EABI__) && !defined(__mips__) ++#if !defined(__ARM_EABI__) && !defined(__mips__) && !defined(__loongarch__) + float_state_(context ? &context->float_state : NULL), + #endif + dumper_(dumper), +@@ -337,6 +337,8 @@ class MicrodumpWriter { + # else + # error "This mips ABI is currently not supported (n32)" + #endif ++#elif defined(__loongarch__) ++ const char kArch[] = "la64"; + #else + #error "This code has not been ported to your platform yet" + #endif +@@ -409,7 +411,7 @@ class MicrodumpWriter { + void DumpCPUState() { + RawContextCPU cpu; + my_memset(&cpu, 0, sizeof(RawContextCPU)); +-#if !defined(__ARM_EABI__) && !defined(__mips__) ++#if !defined(__ARM_EABI__) && !defined(__mips__) && !defined(__loongarch__) + UContextReader::FillCPUContext(&cpu, ucontext_, float_state_); + #else + UContextReader::FillCPUContext(&cpu, ucontext_); +@@ -605,7 +607,7 @@ class MicrodumpWriter { + void* Alloc(unsigned bytes) { return dumper_->allocator()->Alloc(bytes); } + + const ucontext_t* const ucontext_; +-#if !defined(__ARM_EABI__) && !defined(__mips__) ++#if !defined(__ARM_EABI__) && !defined(__mips__) && !defined(__loongarch__) + const google_breakpad::fpstate_t* const float_state_; + #endif + LinuxDumper* dumper_; +@@ -648,7 +650,7 @@ bool WriteMicrodump(pid_t crashing_process, + if (blob_size != sizeof(ExceptionHandler::CrashContext)) + return false; + context = reinterpret_cast(blob); +- dumper.SetCrashInfoFromSigInfo(context->siginfo); ++ //dumper.SetCrashInfoFromSigInfo(context->siginfo); + dumper.set_crash_thread(context->tid); + } + MicrodumpWriter writer(context, mappings, +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/cpu_set.h b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/cpu_set.h +index 1cca9aa5a0f..145b1b61ed7 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/cpu_set.h ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/cpu_set.h +@@ -53,7 +53,7 @@ public: + + // Parse a sysfs file to extract the corresponding CPU set. + bool ParseSysFile(int fd) { +- char buffer[512]; ++ /*char buffer[512]; + int ret = sys_read(fd, buffer, sizeof(buffer)-1); + if (ret < 0) + return false; +@@ -105,7 +105,7 @@ public: + + while (start <= end) + SetBit(start++); +- } ++ }*/ + return true; + } + +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/directory_reader.h b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/directory_reader.h +index a4bde180313..869a1294a4e 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/directory_reader.h ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/directory_reader.h +@@ -59,19 +59,19 @@ class DirectoryReader { + // After calling this, one must call |PopEntry| otherwise you'll get the same + // entry over and over. + bool GetNextEntry(const char** name) { +- struct kernel_dirent* const dent = +- reinterpret_cast(buf_); ++ //struct kernel_dirent* const dent = ++ // reinterpret_cast(buf_); + + if (buf_used_ == 0) { + // need to read more entries. +- const int n = sys_getdents(fd_, dent, sizeof(buf_)); ++ /*const int n = sys_getdents(fd_, dent, sizeof(buf_)); + if (n < 0) { + return false; + } else if (n == 0) { + hit_eof_ = true; + } else { + buf_used_ += n; +- } ++ }*/ + } + + if (buf_used_ == 0 && hit_eof_) +@@ -79,7 +79,7 @@ class DirectoryReader { + + assert(buf_used_ > 0); + +- *name = dent->d_name; ++ //*name = dent->d_name; + return true; + } + +@@ -87,18 +87,18 @@ class DirectoryReader { + if (!buf_used_) + return; + +- const struct kernel_dirent* const dent = +- reinterpret_cast(buf_); ++ //const struct kernel_dirent* const dent = ++ // reinterpret_cast(buf_); + +- buf_used_ -= dent->d_reclen; +- my_memmove(buf_, buf_ + dent->d_reclen, buf_used_); ++ //buf_used_ -= dent->d_reclen; ++ //my_memmove(buf_, buf_ + dent->d_reclen, buf_used_); + } + + private: + const int fd_; + bool hit_eof_; + unsigned buf_used_; +- uint8_t buf_[sizeof(struct kernel_dirent) + NAME_MAX + 1]; ++ //uint8_t buf_[sizeof(struct kernel_dirent) + NAME_MAX + 1]; + }; + + } // namespace google_breakpad +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/line_reader.h b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/line_reader.h +index 779cfeb6039..2062300047c 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/line_reader.h ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/line_reader.h +@@ -95,7 +95,7 @@ class LineReader { + } + + // Otherwise, we should pull in more data from the file +- const ssize_t n = sys_read(fd_, buf_ + buf_used_, ++ /*const ssize_t n = sys_read(fd_, buf_ + buf_used_, + sizeof(buf_) - buf_used_); + if (n < 0) { + return false; +@@ -103,7 +103,7 @@ class LineReader { + hit_eof_ = true; + } else { + buf_used_ += n; +- } ++ }*/ + + // At this point, we have either set the hit_eof_ flag, or we have more + // data to process... +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_core_dumper.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_core_dumper.cc +index 4150689839a..44fdadbfde4 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_core_dumper.cc ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_core_dumper.cc +@@ -109,6 +109,8 @@ bool LinuxCoreDumper::GetThreadInfoByIndex(size_t index, ThreadInfo* info) { + memcpy(&stack_pointer, &info->regs.ARM_sp, sizeof(info->regs.ARM_sp)); + #elif defined(__aarch64__) + memcpy(&stack_pointer, &info->regs.sp, sizeof(info->regs.sp)); ++#elif defined(__loongarch__) ++ //memcpy(&stack_pointer, &info->regs.sp, sizeof(info->regs.sp)); + #elif defined(__mips__) + stack_pointer = + reinterpret_cast(info->mcontext.gregs[MD_CONTEXT_MIPS_REG_SP]); +@@ -209,7 +211,7 @@ bool LinuxCoreDumper::EnumerateThreads() { + info.mcontext.mdhi = status->pr_reg[EF_HI]; + info.mcontext.pc = status->pr_reg[EF_CP0_EPC]; + #else // __mips__ +- memcpy(&info.regs, status->pr_reg, sizeof(info.regs)); ++ //memcpy(&info.regs, status->pr_reg, sizeof(info.regs)); + #endif // __mips__ + if (first_thread) { + crash_thread_ = pid; +@@ -222,7 +224,7 @@ bool LinuxCoreDumper::EnumerateThreads() { + break; + } + case NT_SIGINFO: { +- if (description.length() != sizeof(siginfo_t)) { ++ /*if (description.length() != sizeof(siginfo_t)) { + fprintf(stderr, "Found NT_SIGINFO descriptor of unexpected size\n"); + return false; + } +@@ -259,7 +261,7 @@ bool LinuxCoreDumper::EnumerateThreads() { + }); + #endif + break; +- } ++ }*/ + break; + } + #if defined(__i386) || defined(__x86_64) +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper.cc +index 1112035bc5a..f838abe02da 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper.cc ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper.cc +@@ -325,14 +325,14 @@ LinuxDumper::ElfFileIdentifierForMapping(const MappingInfo& mapping, + // Special-case linux-gate because it's not a real file. + if (my_strcmp(mapping.name, kLinuxGateLibraryName) == 0) { + void* linux_gate = NULL; +- if (pid_ == sys_getpid()) { ++ /*if (pid_ == sys_getpid()) { + linux_gate = reinterpret_cast(mapping.start_addr); + } else { + linux_gate = allocator_.Alloc(mapping.size); + CopyFromProcess(linux_gate, pid_, + reinterpret_cast(mapping.start_addr), + mapping.size); +- } ++ }*/ + return FileID::ElfFileIdentifierFromMappedFile(linux_gate, identifier); + } + +@@ -355,11 +355,11 @@ LinuxDumper::ElfFileIdentifierForMapping(const MappingInfo& mapping, + return success; + } + +-void LinuxDumper::SetCrashInfoFromSigInfo(const siginfo_t& siginfo) { ++/*void LinuxDumper::SetCrashInfoFromSigInfo(const siginfo_t& siginfo) { + set_crash_address(reinterpret_cast(siginfo.si_addr)); + set_crash_signal(siginfo.si_signo); + set_crash_signal_code(siginfo.si_code); +-} ++}*/ + + const char* LinuxDumper::GetCrashSignalString() const { + switch (static_cast(crash_signal_)) { +@@ -518,7 +518,7 @@ bool LinuxDumper::ReadAuxv() { + return false; + } + +- int fd = sys_open(auxv_path, O_RDONLY, 0); ++ /*int fd = sys_open(auxv_path, O_RDONLY, 0); + if (fd < 0) { + return false; + } +@@ -534,8 +534,8 @@ bool LinuxDumper::ReadAuxv() { + res = true; + } + } +- sys_close(fd); +- return res; ++ sys_close(fd);*/ ++ return false; + } + + bool LinuxDumper::EnumerateMappings() { +@@ -557,7 +557,7 @@ bool LinuxDumper::EnumerateMappings() { + // actual entry point to find the mapping. + const void* entry_point_loc = reinterpret_cast(auxv_[AT_ENTRY]); + +- const int fd = sys_open(maps_path, O_RDONLY, 0); ++ const int fd = -1;//sys_open(maps_path, O_RDONLY, 0); + if (fd < 0) + return false; + LineReader* const line_reader = new(allocator_) LineReader(fd); +@@ -641,7 +641,7 @@ bool LinuxDumper::EnumerateMappings() { + } + } + +- sys_close(fd); ++ //sys_close(fd); + + return !mappings_.empty(); + } +@@ -953,14 +953,14 @@ bool LinuxDumper::HandleDeletedFileInMapping(char* path) const { + return false; + + // Check to see if someone actually named their executable 'foo (deleted)'. +- struct kernel_stat exe_stat; ++ /*struct kernel_stat exe_stat; + struct kernel_stat new_path_stat; + if (sys_stat(exe_link, &exe_stat) == 0 && + sys_stat(new_path, &new_path_stat) == 0 && + exe_stat.st_dev == new_path_stat.st_dev && + exe_stat.st_ino == new_path_stat.st_ino) { + return false; +- } ++ }*/ + + my_memcpy(path, exe_link, NAME_MAX); + return true; +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper.h b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper.h +index f4a75d90609..8e692559d6a 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper.h ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper.h +@@ -59,7 +59,7 @@ + namespace google_breakpad { + + // Typedef for our parsing of the auxv variables in /proc/pid/auxv. +-#if defined(__i386) || defined(__ARM_EABI__) || \ ++#if defined(__i386) || defined(__ARM_EABI__) || defined(__loongarch__) || \ + (defined(__mips__) && _MIPS_SIM == _ABIO32) + typedef Elf32_auxv_t elf_aux_entry; + #elif defined(__x86_64) || defined(__aarch64__) || \ +@@ -173,7 +173,7 @@ class LinuxDumper { + unsigned int mapping_id, + wasteful_vector& identifier); + +- void SetCrashInfoFromSigInfo(const siginfo_t& siginfo); ++ //void SetCrashInfoFromSigInfo(const siginfo_t& siginfo); + + uintptr_t crash_address() const { return crash_address_; } + void set_crash_address(uintptr_t crash_address) { +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper_unittest_helper.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper_unittest_helper.cc +index 3ad48e50155..7b68905a3d4 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper_unittest_helper.cc ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_dumper_unittest_helper.cc +@@ -51,6 +51,8 @@ + #define TID_PTR_REGISTER "rcx" + #elif defined(__mips__) + #define TID_PTR_REGISTER "$1" ++#elif defined(__loongarch__) ++#define TID_PTR_REGISTER "$1" + #else + #error This test has not been ported to this platform. + #endif +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_ptrace_dumper.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_ptrace_dumper.cc +index e3ddb81a659..88de7ae3062 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_ptrace_dumper.cc ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/linux_ptrace_dumper.cc +@@ -38,7 +38,7 @@ + + #include "client/linux/minidump_writer/linux_ptrace_dumper.h" + +-#include ++//#include + #include + #include + #include +@@ -63,16 +63,16 @@ + static bool SuspendThread(pid_t pid) { + // This may fail if the thread has just died or debugged. + errno = 0; +- if (sys_ptrace(PTRACE_ATTACH, pid, NULL, NULL) != 0 && +- errno != 0) { +- return false; +- } +- while (sys_waitpid(pid, NULL, __WALL) < 0) { ++ //if (sys_ptrace(PTRACE_ATTACH, pid, NULL, NULL) != 0 && ++ // errno != 0) { ++ // return false; ++ //} ++ /*while (sys_waitpid(pid, NULL, __WALL) < 0) { + if (errno != EINTR) { + sys_ptrace(PTRACE_DETACH, pid, NULL, NULL); + return false; + } +- } ++ }*/ + #if defined(__i386) || defined(__x86_64) + // On x86, the stack pointer is NULL or -1, when executing trusted code in + // the seccomp sandbox. Not only does this cause difficulties down the line +@@ -98,7 +98,7 @@ static bool SuspendThread(pid_t pid) { + + // Resumes a thread by detaching from it. + static bool ResumeThread(pid_t pid) { +- return sys_ptrace(PTRACE_DETACH, pid, NULL, NULL) >= 0; ++ return false;//sys_ptrace(PTRACE_DETACH, pid, NULL, NULL) >= 0; + } + + namespace google_breakpad { +@@ -132,7 +132,7 @@ bool LinuxPtraceDumper::BuildProcPath(char* path, pid_t pid, + + bool LinuxPtraceDumper::CopyFromProcess(void* dest, pid_t child, + const void* src, size_t length) { +- unsigned long tmp = 55; ++ /*unsigned long tmp = 55; + size_t done = 0; + static const size_t word_size = sizeof(tmp); + uint8_t* const local = (uint8_t*) dest; +@@ -145,14 +145,14 @@ bool LinuxPtraceDumper::CopyFromProcess(void* dest, pid_t child, + } + my_memcpy(local + done, &tmp, l); + done += l; +- } ++ }*/ + return true; + } + + bool LinuxPtraceDumper::ReadRegisterSet(ThreadInfo* info, pid_t tid) + { +-#ifdef PTRACE_GETREGSET +- struct iovec io; ++//#ifdef PTRACE_GETREGSET ++ /*struct iovec io; + info->GetGeneralPurposeRegisters(&io.iov_base, &io.iov_len); + if (sys_ptrace(PTRACE_GETREGSET, tid, (void*)NT_PRSTATUS, (void*)&io) == -1) { + return false; +@@ -161,36 +161,36 @@ bool LinuxPtraceDumper::ReadRegisterSet(ThreadInfo* info, pid_t tid) + info->GetFloatingPointRegisters(&io.iov_base, &io.iov_len); + if (sys_ptrace(PTRACE_GETREGSET, tid, (void*)NT_FPREGSET, (void*)&io) == -1) { + return false; +- } +- return true; +-#else ++ }*/ ++// return true; ++//#else + return false; +-#endif ++//#endif + } + + bool LinuxPtraceDumper::ReadRegisters(ThreadInfo* info, pid_t tid) { +-#ifdef PTRACE_GETREGS +- void* gp_addr; +- info->GetGeneralPurposeRegisters(&gp_addr, NULL); +- if (sys_ptrace(PTRACE_GETREGS, tid, NULL, gp_addr) == -1) { +- return false; +- } +- +-#if !(defined(__ANDROID__) && defined(__ARM_EABI__)) ++//#ifdef PTRACE_GETREGS ++// void* gp_addr; ++// info->GetGeneralPurposeRegisters(&gp_addr, NULL); ++// if (sys_ptrace(PTRACE_GETREGS, tid, NULL, gp_addr) == -1) { ++// return false; ++// } ++// ++//#if !(defined(__ANDROID__) && defined(__ARM_EABI__)) + // When running an arm build on an arm64 device, attempting to get the + // floating point registers fails. On Android, the floating point registers + // aren't written to the cpu context anyway, so just don't get them here. + // See http://crbug.com/508324 +- void* fp_addr; +- info->GetFloatingPointRegisters(&fp_addr, NULL); +- if (sys_ptrace(PTRACE_GETFPREGS, tid, NULL, fp_addr) == -1) { +- return false; +- } +-#endif // !(defined(__ANDROID__) && defined(__ARM_EABI__)) +- return true; +-#else // PTRACE_GETREGS ++// void* fp_addr; ++// info->GetFloatingPointRegisters(&fp_addr, NULL); ++// if (sys_ptrace(PTRACE_GETFPREGS, tid, NULL, fp_addr) == -1) { ++// return false; ++// } ++//#endif // !(defined(__ANDROID__) && defined(__ARM_EABI__)) ++// return true; ++//#else // PTRACE_GETREGS + return false; +-#endif ++//#endif + } + + // Read thread info from /proc/$pid/status. +@@ -208,7 +208,7 @@ bool LinuxPtraceDumper::GetThreadInfoByIndex(size_t index, ThreadInfo* info) { + if (!BuildProcPath(status_path, tid, "status")) + return false; + +- const int fd = sys_open(status_path, O_RDONLY, 0); ++ /*const int fd = sys_open(status_path, O_RDONLY, 0); + if (fd < 0) + return false; + +@@ -227,7 +227,7 @@ bool LinuxPtraceDumper::GetThreadInfoByIndex(size_t index, ThreadInfo* info) { + + line_reader->PopLine(line_len); + } +- sys_close(fd); ++ sys_close(fd);*/ + + if (info->ppid == -1 || info->tgid == -1) + return false; +@@ -295,6 +295,7 @@ bool LinuxPtraceDumper::GetThreadInfoByIndex(size_t index, ThreadInfo* info) { + my_memcpy(&stack_pointer, &info->regs.ARM_sp, sizeof(info->regs.ARM_sp)); + #elif defined(__aarch64__) + my_memcpy(&stack_pointer, &info->regs.sp, sizeof(info->regs.sp)); ++#elif defined(__loongarch__) + #elif defined(__mips__) + stack_pointer = + reinterpret_cast(info->mcontext.gregs[MD_CONTEXT_MIPS_REG_SP]); +@@ -347,7 +348,7 @@ bool LinuxPtraceDumper::EnumerateThreads() { + if (!BuildProcPath(task_path, pid_, "task")) + return false; + +- const int fd = sys_open(task_path, O_RDONLY | O_DIRECTORY, 0); ++ /*const int fd = sys_open(task_path, O_RDONLY | O_DIRECTORY, 0); + if (fd < 0) + return false; + DirectoryReader* dir_reader = new(allocator_) DirectoryReader(fd); +@@ -369,7 +370,7 @@ bool LinuxPtraceDumper::EnumerateThreads() { + dir_reader->PopEntry(); + } + +- sys_close(fd); ++ sys_close(fd);*/ + return true; + } + +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/minidump_writer.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/minidump_writer.cc +index f8cdf2a1c6a..6f5d4af9752 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/minidump_writer.cc ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/linux/minidump_writer/minidump_writer.cc +@@ -136,7 +136,7 @@ class MinidumpWriter { + : fd_(minidump_fd), + path_(minidump_path), + ucontext_(context ? &context->context : NULL), +-#if !defined(__ARM_EABI__) && !defined(__mips__) ++#if !defined(__ARM_EABI__) && !defined(__mips__) && !defined(__loongarch__) + float_state_(context ? &context->float_state : NULL), + #endif + dumper_(dumper), +@@ -468,7 +468,7 @@ class MinidumpWriter { + if (!cpu.Allocate()) + return false; + my_memset(cpu.get(), 0, sizeof(RawContextCPU)); +-#if !defined(__ARM_EABI__) && !defined(__mips__) ++#if !defined(__ARM_EABI__) && !defined(__mips__) && !defined(__loongarch__) + UContextReader::FillCPUContext(cpu.get(), ucontext_, float_state_); + #else + UContextReader::FillCPUContext(cpu.get(), ucontext_); +@@ -1203,6 +1203,10 @@ class MinidumpWriter { + sys_close(fd); + } + ++ return true; ++ } ++#elif defined(__loongarch__) ++ bool WriteCPUInformation(MDRawSystemInfo* sys_info) { + return true; + } + #else +@@ -1210,7 +1214,7 @@ class MinidumpWriter { + #endif + + bool WriteFile(MDLocationDescriptor* result, const char* filename) { +- const int fd = sys_open(filename, O_RDONLY, 0); ++ const int fd = -1;//sys_open(filename, O_RDONLY, 0); + if (fd < 0) + return false; + +@@ -1227,7 +1231,7 @@ class MinidumpWriter { + buffers->len = 0; + + size_t total = 0; +- for (Buffers* bufptr = buffers;;) { ++ /*for (Buffers* bufptr = buffers;;) { + ssize_t r; + do { + r = sys_read(fd, &bufptr->data[bufptr->len], kBufSize - bufptr->len); +@@ -1245,7 +1249,7 @@ class MinidumpWriter { + bufptr->len = 0; + } + } +- sys_close(fd); ++ sys_close(fd);*/ + + if (!total) + return false; +@@ -1333,7 +1337,7 @@ class MinidumpWriter { + const char* path_; // Path to the file where the minidum should be written. + + const ucontext_t* const ucontext_; // also from the signal handler +-#if !defined(__ARM_EABI__) && !defined(__mips__) ++#if !defined(__ARM_EABI__) && !defined(__mips__) && !defined(__loongarch__) + const google_breakpad::fpstate_t* const float_state_; // ditto + #endif + LinuxDumper* dumper_; +@@ -1375,7 +1379,7 @@ bool WriteMinidumpImpl(const char* minidump_path, + if (blob_size != sizeof(ExceptionHandler::CrashContext)) + return false; + context = reinterpret_cast(blob); +- dumper.SetCrashInfoFromSigInfo(context->siginfo); ++ //dumper.SetCrashInfoFromSigInfo(context->siginfo); + dumper.set_crash_thread(context->tid); + } + MinidumpWriter writer(minidump_path, minidump_fd, context, mappings, +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/minidump_file_writer.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/minidump_file_writer.cc +index a1957f324a9..a267cc976ee 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/minidump_file_writer.cc ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/client/minidump_file_writer.cc +@@ -104,7 +104,7 @@ MinidumpFileWriter::~MinidumpFileWriter() { + bool MinidumpFileWriter::Open(const char *path) { + assert(file_ == -1); + #if defined(__linux__) && __linux__ +- file_ = sys_open(path, O_WRONLY | O_CREAT | O_EXCL, 0600); ++ //file_ = sys_open(path, O_WRONLY | O_CREAT | O_EXCL, 0600); + #else + file_ = open(path, O_WRONLY | O_CREAT | O_EXCL, 0600); + #endif +@@ -135,7 +135,7 @@ bool MinidumpFileWriter::Close() { + } + #endif + #if defined(__linux__) && __linux__ +- result = (sys_close(file_) == 0); ++ //result = (sys_close(file_) == 0); + #else + result = (close(file_) == 0); + #endif +@@ -318,11 +318,11 @@ bool MinidumpFileWriter::Copy(MDRVA position, const void *src, ssize_t size) { + + // Seek and write the data + #if defined(__linux__) && __linux__ +- if (sys_lseek(file_, position, SEEK_SET) == static_cast(position)) { ++ /*if (sys_lseek(file_, position, SEEK_SET) == static_cast(position)) { + if (sys_write(file_, src, size) == size) { + return true; + } +- } ++ }*/ + #else + if (lseek(file_, position, SEEK_SET) == static_cast(position)) { + if (write(file_, src, size) == size) { +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/linux/memory_mapped_file.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/linux/memory_mapped_file.cc +index 4e938269f26..0a053d6af71 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/linux/memory_mapped_file.cc ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/linux/memory_mapped_file.cc +@@ -59,12 +59,14 @@ MemoryMappedFile::~MemoryMappedFile() { + bool MemoryMappedFile::Map(const char* path, size_t offset) { + Unmap(); + ++ return false; ++ /* + int fd = sys_open(path, O_RDONLY, 0); + if (fd == -1) { + return false; + } + +-#if defined(__x86_64__) || defined(__aarch64__) || \ ++#if defined(__x86_64__) || defined(__aarch64__) || defined(__loongarch__) || \ + (defined(__mips__) && _MIPS_SIM == _ABI64) + + struct kernel_stat st; +@@ -94,12 +96,12 @@ bool MemoryMappedFile::Map(const char* path, size_t offset) { + } + + content_.Set(data, file_len - offset); +- return true; ++ return true;*/ + } + + void MemoryMappedFile::Unmap() { + if (content_.data()) { +- sys_munmap(const_cast(content_.data()), content_.length()); ++ //sys_munmap(const_cast(content_.data()), content_.length()); + content_.Set(NULL, 0); + } + } +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/linux/safe_readlink.cc b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/linux/safe_readlink.cc +index 870c28af3b5..612d9d6064e 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/linux/safe_readlink.cc ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/linux/safe_readlink.cc +@@ -42,11 +42,11 @@ bool SafeReadLink(const char* path, char* buffer, size_t buffer_size) { + // one byte longer than the expected path length. Also, sys_readlink() + // returns the actual path length on success, which does not count the + // NULL byte, so |result_size| should be less than |buffer_size|. +- ssize_t result_size = sys_readlink(path, buffer, buffer_size); ++ /*ssize_t result_size = sys_readlink(path, buffer, buffer_size); + if (result_size >= 0 && static_cast(result_size) < buffer_size) { + buffer[result_size] = '\0'; + return true; +- } ++ }*/ + return false; + } + +diff --git a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/memory_allocator.h b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/memory_allocator.h +index a3159ea46c8..949740e31d7 100644 +--- a/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/memory_allocator.h ++++ b/src/3rdparty/chromium/third_party/breakpad/breakpad/src/common/memory_allocator.h +@@ -42,7 +42,7 @@ + #include + #endif + +-#ifdef __APPLE__ ++#if defined(__APPLE__) + #define sys_mmap mmap + #define sys_munmap munmap + #define MAP_ANONYMOUS MAP_ANON +@@ -117,7 +117,7 @@ class PageAllocator { + private: + uint8_t *GetNPages(size_t num_pages) { + void *a = sys_mmap(NULL, page_size_ * num_pages, PROT_READ | PROT_WRITE, +- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); ++ MAP_PRIVATE | 0x20 /*MAP_ANONYMOUS*/, -1, 0); + if (a == MAP_FAILED) + return NULL; + +diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/client/crashpad_info_note.S b/src/3rdparty/chromium/third_party/crashpad/crashpad/client/crashpad_info_note.S +index b13d8642e7d..d6fbc7eb0fd 100644 +--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/client/crashpad_info_note.S ++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/client/crashpad_info_note.S +@@ -42,7 +42,7 @@ name_end: + .balign NOTE_ALIGN + desc: + #if defined(__LP64__) +- .quad CRASHPAD_INFO_SYMBOL - desc ++ .quad CRASHPAD_INFO_SYMBOL + #else + .long CRASHPAD_INFO_SYMBOL - desc + #endif // __LP64__ +diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/minidump/minidump_misc_info_writer.cc b/src/3rdparty/chromium/third_party/crashpad/crashpad/minidump/minidump_misc_info_writer.cc +index a13407605f7..143c65426e3 100644 +--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/minidump/minidump_misc_info_writer.cc ++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/minidump/minidump_misc_info_writer.cc +@@ -126,6 +126,8 @@ std::string MinidumpMiscInfoDebugBuildString() { + static constexpr char kCPU[] = "mips"; + #elif defined(ARCH_CPU_MIPS64EL) + static constexpr char kCPU[] = "mips64"; ++#elif defined(ARCH_CPU_LA64) ++ static constexpr char kCPU[] = "la64"; + #else + #error define kCPU for this CPU + #endif +diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/capture_memory.cc b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/capture_memory.cc +index a51626ccdc0..38986b4a422 100644 +--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/capture_memory.cc ++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/capture_memory.cc +@@ -107,7 +107,7 @@ void CaptureMemory::PointedToByContext(const CPUContext& context, + MaybeCaptureMemoryAround(delegate, context.arm->regs[i]); + } + } +-#elif defined(ARCH_CPU_MIPS_FAMILY) ++#elif defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64) + for (size_t i = 0; i < base::size(context.mipsel->regs); ++i) { + MaybeCaptureMemoryAround(delegate, context.mipsel->regs[i]); + } +diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/cpu_architecture.h b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/cpu_architecture.h +index 811a7209587..b5284a72e45 100644 +--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/cpu_architecture.h ++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/cpu_architecture.h +@@ -43,7 +43,10 @@ enum CPUArchitecture { + kCPUArchitectureMIPSEL, + + //! \brief 64-bit MIPSEL. +- kCPUArchitectureMIPS64EL ++ kCPUArchitectureMIPS64EL, ++ ++ //! \brief 64-bit LoongArch. ++ kCPUArchitectureLA64 + }; + + } // namespace crashpad +diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/cpu_context.cc b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/cpu_context.cc +index 6fb8d7e719f..bda28212fb2 100644 +--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/cpu_context.cc ++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/cpu_context.cc +@@ -196,6 +196,7 @@ bool CPUContext::Is64Bit() const { + case kCPUArchitectureX86_64: + case kCPUArchitectureARM64: + case kCPUArchitectureMIPS64EL: ++ case kCPUArchitectureLA64: + return true; + case kCPUArchitectureX86: + case kCPUArchitectureARM: +diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/crashpad_info_size_test_note.S b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/crashpad_info_size_test_note.S +index 16b5d499d7b..9ccf51733aa 100644 +--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/crashpad_info_size_test_note.S ++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/crashpad_info_size_test_note.S +@@ -43,7 +43,7 @@ name_end: + .balign NOTE_ALIGN + desc: + #if defined(__LP64__) +- .quad TEST_CRASHPAD_INFO_SYMBOL - desc ++ .quad TEST_CRASHPAD_INFO_SYMBOL + #else + .long TEST_CRASHPAD_INFO_SYMBOL - desc + #endif // __LP64__ +diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/exception_snapshot_linux.cc b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/exception_snapshot_linux.cc +index cd40b3b12d6..af5e21b7874 100644 +--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/exception_snapshot_linux.cc ++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/exception_snapshot_linux.cc +@@ -333,6 +333,8 @@ bool ExceptionSnapshotLinux::Initialize(ProcessReaderLinux* process_reader, + + thread_id_ = thread_id; + ++// TODO:LA ++#if !defined(ARCH_CPU_LA64) + if (process_reader->Is64Bit()) { + if (!ReadContext(process_reader, context_address) || + !ReadSiginfo(process_reader, siginfo_address)) { +@@ -344,6 +346,7 @@ bool ExceptionSnapshotLinux::Initialize(ProcessReaderLinux* process_reader, + return false; + } + } ++#endif + + INITIALIZATION_STATE_SET_VALID(initialized_); + return true; +diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/process_reader_linux.cc b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/process_reader_linux.cc +index b96abfe74fe..3d6591fad3f 100644 +--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/process_reader_linux.cc ++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/process_reader_linux.cc +@@ -108,6 +108,8 @@ void ProcessReaderLinux::Thread::InitializeStack(ProcessReaderLinux* reader) { + #elif defined(ARCH_CPU_MIPS_FAMILY) + stack_pointer = reader->Is64Bit() ? thread_info.thread_context.t64.regs[29] + : thread_info.thread_context.t32.regs[29]; ++#elif defined(ARCH_CPU_LA64) ++// TODO:LA + #else + #error Port. + #endif +diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/signal_context.h b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/signal_context.h +index 110024680bd..2fa76e9843e 100644 +--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/signal_context.h ++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/signal_context.h +@@ -422,6 +422,7 @@ static_assert(offsetof(UContext, mcontext.fpregs) == + "context offset mismatch"); + #endif + ++#elif defined(ARCH_CPU_LA64) + #else + #error Port. + #endif // ARCH_CPU_X86_FAMILY +diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/system_snapshot_linux.cc b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/system_snapshot_linux.cc +index 8564d3d4557..820b0eae06a 100644 +--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/system_snapshot_linux.cc ++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/system_snapshot_linux.cc +@@ -203,6 +203,7 @@ CPUArchitecture SystemSnapshotLinux::GetCPUArchitecture() const { + #elif defined(ARCH_CPU_MIPS_FAMILY) + return process_reader_->Is64Bit() ? kCPUArchitectureMIPS64EL + : kCPUArchitectureMIPSEL; ++#elif defined(ARCH_CPU_LA64) + #else + #error port to your architecture + #endif +@@ -218,6 +219,8 @@ uint32_t SystemSnapshotLinux::CPURevision() const { + #elif defined(ARCH_CPU_MIPS_FAMILY) + // Not implementable on MIPS + return 0; ++#elif defined(ARCH_CPU_LA64) ++ return 0; + #else + #error port to your architecture + #endif +@@ -238,6 +241,8 @@ std::string SystemSnapshotLinux::CPUVendor() const { + #elif defined(ARCH_CPU_MIPS_FAMILY) + // Not implementable on MIPS + return std::string(); ++#elif defined(ARCH_CPU_LA64) ++ return std::string(); + #else + #error port to your architecture + #endif +@@ -371,6 +376,8 @@ bool SystemSnapshotLinux::NXEnabled() const { + #elif defined(ARCH_CPU_MIPS_FAMILY) + // Not implementable on MIPS + return false; ++#elif defined(ARCH_CPU_LA64) ++ return false; + #else + #error Port. + #endif // ARCH_CPU_X86_FAMILY +diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/thread_snapshot_linux.cc b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/thread_snapshot_linux.cc +index e3e2bebddb9..c96d5b63383 100644 +--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/thread_snapshot_linux.cc ++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/thread_snapshot_linux.cc +@@ -186,6 +186,7 @@ bool ThreadSnapshotLinux::Initialize(ProcessReaderLinux* process_reader, + thread.thread_info.float_context.f32, + context_.mipsel); + } ++#elif defined(ARCH_CPU_LA64) + #else + #error Port. + #endif +diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/thread_snapshot_linux.h b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/thread_snapshot_linux.h +index 44cc6f6d973..8d6665b9bd3 100644 +--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/thread_snapshot_linux.h ++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/snapshot/linux/thread_snapshot_linux.h +@@ -62,7 +62,7 @@ class ThreadSnapshotLinux final : public ThreadSnapshot { + #if defined(ARCH_CPU_X86_FAMILY) + CPUContextX86 x86; + CPUContextX86_64 x86_64; +-#elif defined(ARCH_CPU_ARM_FAMILY) ++#elif defined(ARCH_CPU_ARM_FAMILY) || defined(ARCH_CPU_LA64) + CPUContextARM arm; + CPUContextARM64 arm64; + #elif defined(ARCH_CPU_MIPS_FAMILY) +diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/util/linux/ptracer.cc b/src/3rdparty/chromium/third_party/crashpad/crashpad/util/linux/ptracer.cc +index 557e0d36357..cbf2b2be0c6 100644 +--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/util/linux/ptracer.cc ++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/util/linux/ptracer.cc +@@ -273,7 +273,7 @@ bool GetThreadArea64(pid_t tid, + } + return true; + } +-#elif defined(ARCH_CPU_MIPS_FAMILY) ++#elif defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64) + // PTRACE_GETREGSET, introduced in Linux 2.6.34 (2225a122ae26), requires kernel + // support enabled by HAVE_ARCH_TRACEHOOK. This has been set for x86 (including + // x86_64) since Linux 2.6.28 (99bbc4b1e677a), but for MIPS only since +@@ -296,7 +296,7 @@ bool GetGeneralPurposeRegistersLegacy(pid_t tid, + // ptrace unsupported on MIPS for kernels older than 3.13 + #if defined(ARCH_CPU_MIPSEL) + #define THREAD_CONTEXT_FIELD t32 +-#elif defined(ARCH_CPU_MIPS64EL) ++#elif defined(ARCH_CPU_MIPS64EL) || defined(ARCH_CPU_LA64) + #define THREAD_CONTEXT_FIELD t64 + #endif + for (size_t reg = 0; reg < 32; ++reg) { +@@ -385,6 +385,9 @@ bool GetThreadArea64(pid_t tid, + const ThreadContext& context, + LinuxVMAddress* address, + bool can_log) { ++// TODO:LA ++#if !defined(ARCH_CPU_LA64) ++ + void* result; + #if defined(ARCH_CPU_MIPSEL) + if (ptrace(PTRACE_GET_THREAD_AREA_3264, tid, nullptr, &result) != 0) { +@@ -395,6 +398,7 @@ bool GetThreadArea64(pid_t tid, + return false; + } + *address = FromPointerCast(result); ++#endif + return true; + } + +diff --git a/src/3rdparty/chromium/third_party/crashpad/crashpad/util/linux/thread_info.h b/src/3rdparty/chromium/third_party/crashpad/crashpad/util/linux/thread_info.h +index 5b55c24a76d..489e350c421 100644 +--- a/src/3rdparty/chromium/third_party/crashpad/crashpad/util/linux/thread_info.h ++++ b/src/3rdparty/chromium/third_party/crashpad/crashpad/util/linux/thread_info.h +@@ -67,7 +67,7 @@ union ThreadContext { + uint32_t pc; + uint32_t cpsr; + uint32_t orig_r0; +-#elif defined(ARCH_CPU_MIPS_FAMILY) ++#elif defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64) + // Reflects output format of static int gpr32_get(), defined in + // arch/mips/kernel/ptrace.c in kernel source + uint32_t padding0_[6]; +@@ -122,7 +122,7 @@ union ThreadContext { + uint64_t sp; + uint64_t pc; + uint64_t pstate; +-#elif defined(ARCH_CPU_MIPS_FAMILY) ++#elif defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64) + // Reflects output format of static int gpr64_get(), defined in + // arch/mips/kernel/ptrace.c in kernel source + uint64_t regs[32]; +@@ -141,13 +141,13 @@ union ThreadContext { + using NativeThreadContext = user_regs_struct; + #elif defined(ARCH_CPU_ARMEL) + using NativeThreadContext = user_regs; +-#elif defined(ARCH_CPU_MIPS_FAMILY) ++#elif defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64) + // No appropriate NativeThreadsContext type available for MIPS + #else + #error Port. + #endif // ARCH_CPU_X86_FAMILY || ARCH_CPU_ARM64 + +-#if !defined(ARCH_CPU_MIPS_FAMILY) ++#if !defined(ARCH_CPU_MIPS_FAMILY) && !defined(ARCH_CPU_LA64) + #if defined(ARCH_CPU_32_BITS) + static_assert(sizeof(t32_t) == sizeof(NativeThreadContext), "Size mismatch"); + #else // ARCH_CPU_64_BITS +@@ -209,7 +209,7 @@ union FloatContext { + + bool have_fpregs; + bool have_vfp; +-#elif defined(ARCH_CPU_MIPS_FAMILY) ++#elif defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64) + // Reflects data format filled by ptrace_getfpregs() in + // arch/mips/kernel/ptrace.c + struct { +@@ -246,7 +246,7 @@ union FloatContext { + uint32_t fpsr; + uint32_t fpcr; + uint8_t padding[8]; +-#elif defined(ARCH_CPU_MIPS_FAMILY) ++#elif defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64) + // Reflects data format filled by ptrace_getfpregs() in + // arch/mips/kernel/ptrace.c + double fpregs[32]; +@@ -278,7 +278,7 @@ union FloatContext { + #endif + #elif defined(ARCH_CPU_ARM64) + static_assert(sizeof(f64) == sizeof(user_fpsimd_struct), "Size mismatch"); +-#elif defined(ARCH_CPU_MIPS_FAMILY) ++#elif defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_LA64) + // No appropriate floating point context native type for available MIPS. + #else + #error Port. +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/CREDITS.chromium b/src/3rdparty/chromium/third_party/ffmpeg/CREDITS.chromium +index 0043df2c504..8cbb66d9d1c 100644 +--- a/src/3rdparty/chromium/third_party/ffmpeg/CREDITS.chromium ++++ b/src/3rdparty/chromium/third_party/ffmpeg/CREDITS.chromium +@@ -129,517 +129,6 @@ incompatible with the GPLv2 and v3. To the best of our knowledge, they are + compatible with the LGPL. + + +-******************************************************************************** +- +-libavcodec/arm/jrevdct_arm.S +- +-C-like prototype : +- void j_rev_dct_arm(DCTBLOCK data) +- +- With DCTBLOCK being a pointer to an array of 64 'signed shorts' +- +- Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org) +- +- Permission is hereby granted, free of charge, to any person obtaining a copy +- of this software and associated documentation files (the "Software"), to deal +- in the Software without restriction, including without limitation the rights +- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +- copies of the Software, and to permit persons to whom the Software is +- furnished to do so, subject to the following conditions: +- +- The above copyright notice and this permission notice shall be included in +- all copies or substantial portions of the Software. +- +- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +- COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +- IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +- +-******************************************************************************** +- +-libavcodec/arm/vp8dsp_armv6.S +- +-VP8 ARMv6 optimisations +- +-Copyright (c) 2010 Google Inc. +-Copyright (c) 2010 Rob Clark +-Copyright (c) 2011 Mans Rullgard +- +-This file is part of FFmpeg. +- +-FFmpeg is free software; you can redistribute it and/or +-modify it under the terms of the GNU Lesser General Public +-License as published by the Free Software Foundation; either +-version 2.1 of the License, or (at your option) any later version. +- +-FFmpeg is distributed in the hope that it will be useful, +-but WITHOUT ANY WARRANTY; without even the implied warranty of +-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +-Lesser General Public License for more details. +- +-You should have received a copy of the GNU Lesser General Public +-License along with FFmpeg; if not, write to the Free Software +-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +- +-This code was partially ported from libvpx, which uses this license: +- +-Redistribution and use in source and binary forms, with or without +-modification, are permitted provided that the following conditions are +-met: +- +-* Redistributions of source code must retain the above copyright +-notice, this list of conditions and the following disclaimer. +- +-* Redistributions in binary form must reproduce the above copyright +-notice, this list of conditions and the following disclaimer in +-the documentation and/or other materials provided with the +-distribution. +- +-* Neither the name of Google nor the names of its contributors may +-be used to endorse or promote products derived from this software +-without specific prior written permission. +- +-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +- +-******************************************************************************** +- +-libavcodec/mips/acelp_filters_mips.c +- +-Copyright (c) 2012 +-MIPS Technologies, Inc., California. +- +-Redistribution and use in source and binary forms, with or without +-modification, are permitted provided that the following conditions +-are met: +-1. Redistributions of source code must retain the above copyright +-notice, this list of conditions and the following disclaimer. +-2. Redistributions in binary form must reproduce the above copyright +-notice, this list of conditions and the following disclaimer in the +-documentation and/or other materials provided with the distribution. +-3. Neither the name of the MIPS Technologies, Inc., nor the names of its +-contributors may be used to endorse or promote products derived from +-this software without specific prior written permission. +- +-THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND +-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +-ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE +-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +-SUCH DAMAGE. +- +-Author: Nedeljko Babic (nbabic@mips.com) +- +-various filters for ACELP-based codecs optimized for MIPS +- +-This file is part of FFmpeg. +- +-FFmpeg is free software; you can redistribute it and/or +-modify it under the terms of the GNU Lesser General Public +-License as published by the Free Software Foundation; either +-version 2.1 of the License, or (at your option) any later version. +- +-FFmpeg is distributed in the hope that it will be useful, +-but WITHOUT ANY WARRANTY; without even the implied warranty of +-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +-Lesser General Public License for more details. +- +-You should have received a copy of the GNU Lesser General Public +-License along with FFmpeg; if not, write to the Free Software +-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +- +-******************************************************************************** +- +-libavcodec/mips/acelp_vectors_mips.c +- +-Copyright (c) 2012 +-MIPS Technologies, Inc., California. +- +-Redistribution and use in source and binary forms, with or without +-modification, are permitted provided that the following conditions +-are met: +-1. Redistributions of source code must retain the above copyright +-notice, this list of conditions and the following disclaimer. +-2. Redistributions in binary form must reproduce the above copyright +-notice, this list of conditions and the following disclaimer in the +-documentation and/or other materials provided with the distribution. +-3. Neither the name of the MIPS Technologies, Inc., nor the names of its +-contributors may be used to endorse or promote products derived from +-this software without specific prior written permission. +- +-THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND +-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +-ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE +-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +-SUCH DAMAGE. +- +-Author: Nedeljko Babic (nbabic@mips.com) +- +-adaptive and fixed codebook vector operations for ACELP-based codecs +-optimized for MIPS +- +-This file is part of FFmpeg. +- +-FFmpeg is free software; you can redistribute it and/or +-modify it under the terms of the GNU Lesser General Public +-License as published by the Free Software Foundation; either +-version 2.1 of the License, or (at your option) any later version. +- +-FFmpeg is distributed in the hope that it will be useful, +-but WITHOUT ANY WARRANTY; without even the implied warranty of +-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +-Lesser General Public License for more details. +- +-You should have received a copy of the GNU Lesser General Public +-License along with FFmpeg; if not, write to the Free Software +-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +- +-******************************************************************************** +- +-libavcodec/mips/amrwbdec_mips.c +- +-Copyright (c) 2012 +-MIPS Technologies, Inc., California. +- +-Redistribution and use in source and binary forms, with or without +-modification, are permitted provided that the following conditions +-are met: +-1. Redistributions of source code must retain the above copyright +-notice, this list of conditions and the following disclaimer. +-2. Redistributions in binary form must reproduce the above copyright +-notice, this list of conditions and the following disclaimer in the +-documentation and/or other materials provided with the distribution. +-3. Neither the name of the MIPS Technologies, Inc., nor the names of its +-contributors may be used to endorse or promote products derived from +-this software without specific prior written permission. +- +-THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND +-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +-ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE +-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +-SUCH DAMAGE. +- +-Author: Nedeljko Babic (nbabic@mips.com) +- +-This file is part of FFmpeg. +- +-FFmpeg is free software; you can redistribute it and/or +-modify it under the terms of the GNU Lesser General Public +-License as published by the Free Software Foundation; either +-version 2.1 of the License, or (at your option) any later version. +- +-FFmpeg is distributed in the hope that it will be useful, +-but WITHOUT ANY WARRANTY; without even the implied warranty of +-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +-Lesser General Public License for more details. +- +-You should have received a copy of the GNU Lesser General Public +-License along with FFmpeg; if not, write to the Free Software +-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +- +-******************************************************************************** +- +-libavcodec/mips/celp_filters_mips.c +- +-Copyright (c) 2012 +-MIPS Technologies, Inc., California. +- +-Redistribution and use in source and binary forms, with or without +-modification, are permitted provided that the following conditions +-are met: +-1. Redistributions of source code must retain the above copyright +-notice, this list of conditions and the following disclaimer. +-2. Redistributions in binary form must reproduce the above copyright +-notice, this list of conditions and the following disclaimer in the +-documentation and/or other materials provided with the distribution. +-3. Neither the name of the MIPS Technologies, Inc., nor the names of its +-contributors may be used to endorse or promote products derived from +-this software without specific prior written permission. +- +-THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND +-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +-ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE +-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +-SUCH DAMAGE. +- +-Author: Nedeljko Babic (nbabic@mips.com) +- +-various filters for CELP-based codecs optimized for MIPS +- +-This file is part of FFmpeg. +- +-FFmpeg is free software; you can redistribute it and/or +-modify it under the terms of the GNU Lesser General Public +-License as published by the Free Software Foundation; either +-version 2.1 of the License, or (at your option) any later version. +- +-FFmpeg is distributed in the hope that it will be useful, +-but WITHOUT ANY WARRANTY; without even the implied warranty of +-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +-Lesser General Public License for more details. +- +-You should have received a copy of the GNU Lesser General Public +-License along with FFmpeg; if not, write to the Free Software +-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +- +-******************************************************************************** +- +-libavcodec/mips/celp_math_mips.c +- +-Copyright (c) 2012 +-MIPS Technologies, Inc., California. +- +-Redistribution and use in source and binary forms, with or without +-modification, are permitted provided that the following conditions +-are met: +-1. Redistributions of source code must retain the above copyright +-notice, this list of conditions and the following disclaimer. +-2. Redistributions in binary form must reproduce the above copyright +-notice, this list of conditions and the following disclaimer in the +-documentation and/or other materials provided with the distribution. +-3. Neither the name of the MIPS Technologies, Inc., nor the names of its +-contributors may be used to endorse or promote products derived from +-this software without specific prior written permission. +- +-THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND +-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +-ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE +-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +-SUCH DAMAGE. +- +-Author: Nedeljko Babic (nbabic@mips.com) +- +-Math operations optimized for MIPS +- +-This file is part of FFmpeg. +- +-FFmpeg is free software; you can redistribute it and/or +-modify it under the terms of the GNU Lesser General Public +-License as published by the Free Software Foundation; either +-version 2.1 of the License, or (at your option) any later version. +- +-FFmpeg is distributed in the hope that it will be useful, +-but WITHOUT ANY WARRANTY; without even the implied warranty of +-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +-Lesser General Public License for more details. +- +-You should have received a copy of the GNU Lesser General Public +-License along with FFmpeg; if not, write to the Free Software +-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +- +-******************************************************************************** +- +-libavcodec/mips/fft_mips.c +- +-Copyright (c) 2012 +-MIPS Technologies, Inc., California. +- +-Redistribution and use in source and binary forms, with or without +-modification, are permitted provided that the following conditions +-are met: +-1. Redistributions of source code must retain the above copyright +-notice, this list of conditions and the following disclaimer. +-2. Redistributions in binary form must reproduce the above copyright +-notice, this list of conditions and the following disclaimer in the +-documentation and/or other materials provided with the distribution. +-3. Neither the name of the MIPS Technologies, Inc., nor the names of its +-contributors may be used to endorse or promote products derived from +-this software without specific prior written permission. +- +-THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND +-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +-ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE +-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +-SUCH DAMAGE. +- +-Author: Stanislav Ocovaj (socovaj@mips.com) +-Author: Zoran Lukic (zoranl@mips.com) +- +-Optimized MDCT/IMDCT and FFT transforms +- +-This file is part of FFmpeg. +- +-FFmpeg is free software; you can redistribute it and/or +-modify it under the terms of the GNU Lesser General Public +-License as published by the Free Software Foundation; either +-version 2.1 of the License, or (at your option) any later version. +- +-FFmpeg is distributed in the hope that it will be useful, +-but WITHOUT ANY WARRANTY; without even the implied warranty of +-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +-Lesser General Public License for more details. +- +-You should have received a copy of the GNU Lesser General Public +-License along with FFmpeg; if not, write to the Free Software +-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +- +-******************************************************************************** +- +-libavcodec/mips/mpegaudiodsp_mips_float.c +- +-Copyright (c) 2012 +-MIPS Technologies, Inc., California. +- +-Redistribution and use in source and binary forms, with or without +-modification, are permitted provided that the following conditions +-are met: +-1. Redistributions of source code must retain the above copyright +-notice, this list of conditions and the following disclaimer. +-2. Redistributions in binary form must reproduce the above copyright +-notice, this list of conditions and the following disclaimer in the +-documentation and/or other materials provided with the distribution. +-3. Neither the name of the MIPS Technologies, Inc., nor the names of its +-contributors may be used to endorse or promote products derived from +-this software without specific prior written permission. +- +-THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND +-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +-ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE +-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +-SUCH DAMAGE. +- +-Author: Bojan Zivkovic (bojan@mips.com) +- +-MPEG Audio decoder optimized for MIPS floating-point architecture +- +-This file is part of FFmpeg. +- +-FFmpeg is free software; you can redistribute it and/or +-modify it under the terms of the GNU Lesser General Public +-License as published by the Free Software Foundation; either +-version 2.1 of the License, or (at your option) any later version. +- +-FFmpeg is distributed in the hope that it will be useful, +-but WITHOUT ANY WARRANTY; without even the implied warranty of +-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +-Lesser General Public License for more details. +- +-You should have received a copy of the GNU Lesser General Public +-License along with FFmpeg; if not, write to the Free Software +-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +- +-******************************************************************************** +- +-libavcodec/x86/xvididct.asm +- +-XVID MPEG-4 VIDEO CODEC +- +- Conversion from gcc syntax to x264asm syntax with modifications +- by Christophe Gisquet +- +- =========== SSE2 inverse discrete cosine transform =========== +- +- Copyright(C) 2003 Pascal Massimino +- +- Conversion to gcc syntax with modifications +- by Alexander Strange +- +- Originally from dct/x86_asm/fdct_sse2_skal.asm in Xvid. +- +- Vertical pass is an implementation of the scheme: +- Loeffler C., Ligtenberg A., and Moschytz C.S.: +- Practical Fast 1D DCT Algorithm with Eleven Multiplications, +- Proc. ICASSP 1989, 988-991. +- +- Horizontal pass is a double 4x4 vector/matrix multiplication, +- (see also Intel's Application Note 922: +- http://developer.intel.com/vtune/cbts/strmsimd/922down.htm +- Copyright (C) 1999 Intel Corporation) +- +- More details at http://skal.planet-d.net/coding/dct.html +- +- ======= MMX and XMM forward discrete cosine transform ======= +- +- Copyright(C) 2001 Peter Ross +- +- Originally provided by Intel at AP-922 +- http://developer.intel.com/vtune/cbts/strmsimd/922down.htm +- (See more app notes at http://developer.intel.com/vtune/cbts/strmsimd/appnotes.htm) +- but in a limited edition. +- New macro implements a column part for precise iDCT +- The routine precision now satisfies IEEE standard 1180-1990. +- +- Copyright(C) 2000-2001 Peter Gubanov +- Rounding trick Copyright(C) 2000 Michel Lespinasse +- +- http://www.elecard.com/peter/idct.html +- http://www.linuxvideo.org/mpeg2dec/ +- +- These examples contain code fragments for first stage iDCT 8x8 +- (for rows) and first stage DCT 8x8 (for columns) +- +- conversion to gcc syntax by Michael Niedermayer +- +- ====================================================================== +- +- This file is part of FFmpeg. +- +- FFmpeg is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- FFmpeg is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public License +- along with FFmpeg; if not, write to the Free Software Foundation, +- Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +- + ******************************************************************************** + + libavformat/oggparsetheora.c +@@ -784,19 +273,14 @@ libavcodec/fft_fixed_32.c + libavcodec/fft_init_table.c + libavcodec/fft_table.h + libavcodec/mdct_fixed_32.c +-libavcodec/mips/aacdec_mips.c + libavcodec/mips/aacdec_mips.h +-libavcodec/mips/aacpsdsp_mips.c +-libavcodec/mips/aacsbr_mips.c + libavcodec/mips/aacsbr_mips.h + libavcodec/mips/amrwbdec_mips.h + libavcodec/mips/compute_antialias_fixed.h + libavcodec/mips/compute_antialias_float.h + libavcodec/mips/lsp_mips.h +-libavcodec/mips/sbrdsp_mips.c + libavutil/fixed_dsp.c + libavutil/fixed_dsp.h +-libavutil/mips/float_dsp_mips.c + libavutil/mips/libm_mips.h + libavutil/softfloat_tables.h + +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/config.h b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/config.h +new file mode 100644 +index 00000000000..a4351739eb4 +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/config.h +@@ -0,0 +1,2589 @@ ++/* Automatically generated by configure - do not modify! */ ++#ifndef FFMPEG_CONFIG_H ++#define FFMPEG_CONFIG_H ++/* #define FFMPEG_CONFIGURATION "--disable-everything --disable-all --disable-doc --disable-htmlpages --disable-manpages --disable-podpages --disable-txtpages --disable-static --enable-avcodec --enable-avformat --enable-avutil --enable-fft --enable-rdft --enable-static --enable-libopus --disable-debug --disable-bzlib --disable-error-resilience --disable-iconv --disable-lzo --disable-network --disable-schannel --disable-sdl2 --disable-symver --disable-xlib --disable-zlib --disable-securetransport --disable-faan --disable-alsa --disable-autodetect --enable-decoder='vorbis,libopus,flac' --enable-decoder='pcm_u8,pcm_s16le,pcm_s24le,pcm_s32le,pcm_f32le,mp3' --enable-decoder='pcm_s16be,pcm_s24be,pcm_mulaw,pcm_alaw' --enable-demuxer='ogg,matroska,wav,flac,mp3,mov' --enable-parser='opus,vorbis,flac,mpegaudio,vp9' --extra-cflags=-I/mnt/chromium/src/third_party/opus/src/include --disable-linux-perf --x86asmexe=nasm --optflags='\"-O2\"' --enable-decoder='theora,vp8' --enable-parser='vp3,vp8' --target-os=linux --enable-pic --cc=clang --cxx=clang++ --ld=clang --enable-decoder='aac,h264' --enable-demuxer=aac --enable-parser='aac,h264'" -- elide long configuration string from binary */ ++#define FFMPEG_LICENSE "LGPL version 2.1 or later" ++#define CONFIG_THIS_YEAR 2020 ++#define FFMPEG_DATADIR "/usr/local/share/ffmpeg" ++#define AVCONV_DATADIR "/usr/local/share/ffmpeg" ++#define CC_IDENT "clang version 8.0.1" ++#define av_restrict restrict ++#define EXTERN_PREFIX "" ++#define EXTERN_ASM ++#define BUILDSUF "" ++#define SLIBSUF ".so" ++#define HAVE_MMX2 HAVE_MMXEXT ++#define SWS_MAX_FILTER_SIZE 256 ++#define ARCH_AARCH64 0 ++#define ARCH_ALPHA 0 ++#define ARCH_ARM 0 ++#define ARCH_AVR32 0 ++#define ARCH_AVR32_AP 0 ++#define ARCH_AVR32_UC 0 ++#define ARCH_BFIN 0 ++#define ARCH_IA64 0 ++#define ARCH_M68K 0 ++#define ARCH_MIPS 0 ++#define ARCH_MIPS64 0 ++#define ARCH_PARISC 0 ++#define ARCH_PPC 0 ++#define ARCH_PPC64 0 ++#define ARCH_S390 0 ++#define ARCH_SH4 0 ++#define ARCH_SPARC 0 ++#define ARCH_SPARC64 0 ++#define ARCH_TILEGX 0 ++#define ARCH_TILEPRO 0 ++#define ARCH_TOMI 0 ++#define ARCH_X86 0 ++#define ARCH_X86_32 0 ++#define ARCH_X86_64 0 ++#define HAVE_ARMV5TE 0 ++#define HAVE_ARMV6 0 ++#define HAVE_ARMV6T2 0 ++#define HAVE_ARMV8 0 ++#define HAVE_NEON 0 ++#define HAVE_VFP 0 ++#define HAVE_VFPV3 0 ++#define HAVE_SETEND 0 ++#define HAVE_ALTIVEC 0 ++#define HAVE_DCBZL 0 ++#define HAVE_LDBRX 0 ++#define HAVE_POWER8 0 ++#define HAVE_PPC4XX 0 ++#define HAVE_VSX 0 ++#define HAVE_AESNI 0 ++#define HAVE_AMD3DNOW 0 ++#define HAVE_AMD3DNOWEXT 0 ++#define HAVE_AVX 0 ++#define HAVE_AVX2 0 ++#define HAVE_AVX512 0 ++#define HAVE_FMA3 0 ++#define HAVE_FMA4 0 ++#define HAVE_MMX 0 ++#define HAVE_MMXEXT 0 ++#define HAVE_SSE 0 ++#define HAVE_SSE2 0 ++#define HAVE_SSE3 0 ++#define HAVE_SSE4 0 ++#define HAVE_SSE42 0 ++#define HAVE_SSSE3 0 ++#define HAVE_XOP 0 ++#define HAVE_CPUNOP 0 ++#define HAVE_I686 0 ++#define HAVE_MIPSFPU 0 ++#define HAVE_MIPS32R2 0 ++#define HAVE_MIPS32R5 0 ++#define HAVE_MIPS64R2 0 ++#define HAVE_MIPS32R6 0 ++#define HAVE_MIPS64R6 0 ++#define HAVE_MIPSDSP 0 ++#define HAVE_MIPSDSPR2 0 ++#define HAVE_MSA 0 ++#define HAVE_MSA2 0 ++#define HAVE_LOONGSON2 0 ++#define HAVE_LOONGSON3 0 ++#define HAVE_MMI 0 ++#define HAVE_ARMV5TE_EXTERNAL 0 ++#define HAVE_ARMV6_EXTERNAL 0 ++#define HAVE_ARMV6T2_EXTERNAL 0 ++#define HAVE_ARMV8_EXTERNAL 0 ++#define HAVE_NEON_EXTERNAL 0 ++#define HAVE_VFP_EXTERNAL 0 ++#define HAVE_VFPV3_EXTERNAL 0 ++#define HAVE_SETEND_EXTERNAL 0 ++#define HAVE_ALTIVEC_EXTERNAL 0 ++#define HAVE_DCBZL_EXTERNAL 0 ++#define HAVE_LDBRX_EXTERNAL 0 ++#define HAVE_POWER8_EXTERNAL 0 ++#define HAVE_PPC4XX_EXTERNAL 0 ++#define HAVE_VSX_EXTERNAL 0 ++#define HAVE_AESNI_EXTERNAL 0 ++#define HAVE_AMD3DNOW_EXTERNAL 0 ++#define HAVE_AMD3DNOWEXT_EXTERNAL 0 ++#define HAVE_AVX_EXTERNAL 0 ++#define HAVE_AVX2_EXTERNAL 0 ++#define HAVE_AVX512_EXTERNAL 0 ++#define HAVE_FMA3_EXTERNAL 0 ++#define HAVE_FMA4_EXTERNAL 0 ++#define HAVE_MMX_EXTERNAL 0 ++#define HAVE_MMXEXT_EXTERNAL 0 ++#define HAVE_SSE_EXTERNAL 0 ++#define HAVE_SSE2_EXTERNAL 0 ++#define HAVE_SSE3_EXTERNAL 0 ++#define HAVE_SSE4_EXTERNAL 0 ++#define HAVE_SSE42_EXTERNAL 0 ++#define HAVE_SSSE3_EXTERNAL 0 ++#define HAVE_XOP_EXTERNAL 0 ++#define HAVE_CPUNOP_EXTERNAL 0 ++#define HAVE_I686_EXTERNAL 0 ++#define HAVE_MIPSFPU_EXTERNAL 0 ++#define HAVE_MIPS32R2_EXTERNAL 0 ++#define HAVE_MIPS32R5_EXTERNAL 0 ++#define HAVE_MIPS64R2_EXTERNAL 0 ++#define HAVE_MIPS32R6_EXTERNAL 0 ++#define HAVE_MIPS64R6_EXTERNAL 0 ++#define HAVE_MIPSDSP_EXTERNAL 0 ++#define HAVE_MIPSDSPR2_EXTERNAL 0 ++#define HAVE_MSA_EXTERNAL 0 ++#define HAVE_MSA2_EXTERNAL 0 ++#define HAVE_LOONGSON2_EXTERNAL 0 ++#define HAVE_LOONGSON3_EXTERNAL 0 ++#define HAVE_MMI_EXTERNAL 0 ++#define HAVE_ARMV5TE_INLINE 0 ++#define HAVE_ARMV6_INLINE 0 ++#define HAVE_ARMV6T2_INLINE 0 ++#define HAVE_ARMV8_INLINE 0 ++#define HAVE_NEON_INLINE 0 ++#define HAVE_VFP_INLINE 0 ++#define HAVE_VFPV3_INLINE 0 ++#define HAVE_SETEND_INLINE 0 ++#define HAVE_ALTIVEC_INLINE 0 ++#define HAVE_DCBZL_INLINE 0 ++#define HAVE_LDBRX_INLINE 0 ++#define HAVE_POWER8_INLINE 0 ++#define HAVE_PPC4XX_INLINE 0 ++#define HAVE_VSX_INLINE 0 ++#define HAVE_AESNI_INLINE 0 ++#define HAVE_AMD3DNOW_INLINE 0 ++#define HAVE_AMD3DNOWEXT_INLINE 0 ++#define HAVE_AVX_INLINE 0 ++#define HAVE_AVX2_INLINE 0 ++#define HAVE_AVX512_INLINE 0 ++#define HAVE_FMA3_INLINE 0 ++#define HAVE_FMA4_INLINE 0 ++#define HAVE_MMX_INLINE 0 ++#define HAVE_MMXEXT_INLINE 0 ++#define HAVE_SSE_INLINE 0 ++#define HAVE_SSE2_INLINE 0 ++#define HAVE_SSE3_INLINE 0 ++#define HAVE_SSE4_INLINE 0 ++#define HAVE_SSE42_INLINE 0 ++#define HAVE_SSSE3_INLINE 0 ++#define HAVE_XOP_INLINE 0 ++#define HAVE_CPUNOP_INLINE 0 ++#define HAVE_I686_INLINE 0 ++#define HAVE_MIPSFPU_INLINE 0 ++#define HAVE_MIPS32R2_INLINE 0 ++#define HAVE_MIPS32R5_INLINE 0 ++#define HAVE_MIPS64R2_INLINE 0 ++#define HAVE_MIPS32R6_INLINE 0 ++#define HAVE_MIPS64R6_INLINE 0 ++#define HAVE_MIPSDSP_INLINE 0 ++#define HAVE_MIPSDSPR2_INLINE 0 ++#define HAVE_MSA_INLINE 0 ++#define HAVE_MSA2_INLINE 0 ++#define HAVE_LOONGSON2_INLINE 0 ++#define HAVE_LOONGSON3_INLINE 0 ++#define HAVE_MMI_INLINE 0 ++#define HAVE_ALIGNED_STACK 0 ++#define HAVE_FAST_64BIT 0 ++#define HAVE_FAST_CLZ 0 ++#define HAVE_FAST_CMOV 0 ++#define HAVE_LOCAL_ALIGNED 0 ++#define HAVE_SIMD_ALIGN_16 0 ++#define HAVE_SIMD_ALIGN_32 0 ++#define HAVE_SIMD_ALIGN_64 0 ++#define HAVE_ATOMIC_CAS_PTR 0 ++#define HAVE_MACHINE_RW_BARRIER 0 ++#define HAVE_MEMORYBARRIER 0 ++#define HAVE_MM_EMPTY 0 ++#define HAVE_RDTSC 0 ++#define HAVE_SEM_TIMEDWAIT 1 ++#define HAVE_SYNC_VAL_COMPARE_AND_SWAP 1 ++#define HAVE_CABS 0 ++#define HAVE_CEXP 0 ++#define HAVE_INLINE_ASM 1 ++#define HAVE_SYMVER 0 ++#define HAVE_X86ASM 0 ++#define HAVE_BIGENDIAN 0 ++#define HAVE_FAST_UNALIGNED 0 ++#define HAVE_ARPA_INET_H 0 ++#define HAVE_ASM_TYPES_H 1 ++#define HAVE_CDIO_PARANOIA_H 0 ++#define HAVE_CDIO_PARANOIA_PARANOIA_H 0 ++#define HAVE_CUDA_H 0 ++#define HAVE_DISPATCH_DISPATCH_H 0 ++#define HAVE_DEV_BKTR_IOCTL_BT848_H 0 ++#define HAVE_DEV_BKTR_IOCTL_METEOR_H 0 ++#define HAVE_DEV_IC_BT8XX_H 0 ++#define HAVE_DEV_VIDEO_BKTR_IOCTL_BT848_H 0 ++#define HAVE_DEV_VIDEO_METEOR_IOCTL_METEOR_H 0 ++#define HAVE_DIRECT_H 0 ++#define HAVE_DIRENT_H 1 ++#define HAVE_DXGIDEBUG_H 0 ++#define HAVE_DXVA_H 0 ++#define HAVE_ES2_GL_H 0 ++#define HAVE_GSM_H 0 ++#define HAVE_IO_H 0 ++#define HAVE_LINUX_PERF_EVENT_H 1 ++#define HAVE_MACHINE_IOCTL_BT848_H 0 ++#define HAVE_MACHINE_IOCTL_METEOR_H 0 ++#define HAVE_MALLOC_H 1 ++#define HAVE_OPENCV2_CORE_CORE_C_H 0 ++#define HAVE_OPENGL_GL3_H 0 ++#define HAVE_POLL_H 1 ++#define HAVE_SYS_PARAM_H 1 ++#define HAVE_SYS_RESOURCE_H 1 ++#define HAVE_SYS_SELECT_H 1 ++#define HAVE_SYS_SOUNDCARD_H 1 ++#define HAVE_SYS_TIME_H 1 ++#define HAVE_SYS_UN_H 1 ++#define HAVE_SYS_VIDEOIO_H 0 ++#define HAVE_TERMIOS_H 1 ++#define HAVE_UDPLITE_H 0 ++#define HAVE_UNISTD_H 1 ++#define HAVE_VALGRIND_VALGRIND_H 0 /* #define HAVE_VALGRIND_VALGRIND_H 0 -- forced to 0. See https://crbug.com/590440 */ ++#define HAVE_WINDOWS_H 0 ++#define HAVE_WINSOCK2_H 0 ++#define HAVE_INTRINSICS_NEON 0 ++#define HAVE_ATANF 1 ++#define HAVE_ATAN2F 1 ++#define HAVE_CBRT 1 ++#define HAVE_CBRTF 1 ++#define HAVE_COPYSIGN 1 ++#define HAVE_COSF 1 ++#define HAVE_ERF 1 ++#define HAVE_EXP2 1 ++#define HAVE_EXP2F 1 ++#define HAVE_EXPF 1 ++#define HAVE_HYPOT 1 ++#define HAVE_ISFINITE 1 ++#define HAVE_ISINF 1 ++#define HAVE_ISNAN 1 ++#define HAVE_LDEXPF 1 ++#define HAVE_LLRINT 1 ++#define HAVE_LLRINTF 1 ++#define HAVE_LOG2 1 ++#define HAVE_LOG2F 1 ++#define HAVE_LOG10F 1 ++#define HAVE_LRINT 1 ++#define HAVE_LRINTF 1 ++#define HAVE_POWF 1 ++#define HAVE_RINT 1 ++#define HAVE_ROUND 1 ++#define HAVE_ROUNDF 1 ++#define HAVE_SINF 1 ++#define HAVE_TRUNC 1 ++#define HAVE_TRUNCF 1 ++#define HAVE_DOS_PATHS 0 ++#define HAVE_LIBC_MSVCRT 0 ++#define HAVE_MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS 0 ++#define HAVE_SECTION_DATA_REL_RO 1 ++#define HAVE_THREADS 1 ++#define HAVE_UWP 0 ++#define HAVE_WINRT 0 ++#define HAVE_ACCESS 1 ++#define HAVE_ALIGNED_MALLOC 0 ++#define HAVE_ARC4RANDOM 0 ++#define HAVE_CLOCK_GETTIME 1 ++#define HAVE_CLOSESOCKET 0 ++#define HAVE_COMMANDLINETOARGVW 0 ++#define HAVE_FCNTL 1 ++#define HAVE_GETADDRINFO 0 ++#define HAVE_GETHRTIME 0 ++#define HAVE_GETOPT 1 ++#define HAVE_GETMODULEHANDLE 0 ++#define HAVE_GETPROCESSAFFINITYMASK 0 ++#define HAVE_GETPROCESSMEMORYINFO 0 ++#define HAVE_GETPROCESSTIMES 0 ++#define HAVE_GETRUSAGE 1 ++#define HAVE_GETSTDHANDLE 0 ++#define HAVE_GETSYSTEMTIMEASFILETIME 0 ++#define HAVE_GETTIMEOFDAY 1 ++#define HAVE_GLOB 1 ++#define HAVE_GLXGETPROCADDRESS 0 ++#define HAVE_GMTIME_R 1 ++#define HAVE_INET_ATON 0 ++#define HAVE_ISATTY 1 ++#define HAVE_KBHIT 0 ++#define HAVE_LOCALTIME_R 1 ++#define HAVE_LSTAT 1 ++#define HAVE_LZO1X_999_COMPRESS 0 ++#define HAVE_MACH_ABSOLUTE_TIME 0 ++#define HAVE_MAPVIEWOFFILE 0 ++#define HAVE_MEMALIGN 1 ++#define HAVE_MKSTEMP 1 ++#define HAVE_MMAP 1 ++#define HAVE_MPROTECT 1 ++#define HAVE_NANOSLEEP 1 ++#define HAVE_PEEKNAMEDPIPE 0 ++#define HAVE_POSIX_MEMALIGN 1 ++#define HAVE_PTHREAD_CANCEL 1 ++#define HAVE_SCHED_GETAFFINITY 1 ++#define HAVE_SECITEMIMPORT 0 ++#define HAVE_SETCONSOLETEXTATTRIBUTE 0 ++#define HAVE_SETCONSOLECTRLHANDLER 0 ++#define HAVE_SETDLLDIRECTORY 0 ++#define HAVE_SETMODE 0 ++#define HAVE_SETRLIMIT 1 ++#define HAVE_SLEEP 0 ++#define HAVE_STRERROR_R 1 ++#define HAVE_SYSCONF 1 ++#define HAVE_SYSCTL 0 /* #define HAVE_SYSCTL 1 -- forced to 0 for Fuchsia */ ++#define HAVE_USLEEP 1 ++#define HAVE_UTGETOSTYPEFROMSTRING 0 ++#define HAVE_VIRTUALALLOC 0 ++#define HAVE_WGLGETPROCADDRESS 0 ++#define HAVE_BCRYPT 0 ++#define HAVE_VAAPI_DRM 0 ++#define HAVE_VAAPI_X11 0 ++#define HAVE_VDPAU_X11 0 ++#define HAVE_PTHREADS 1 ++#define HAVE_OS2THREADS 0 ++#define HAVE_W32THREADS 0 ++#define HAVE_AS_ARCH_DIRECTIVE 0 ++#define HAVE_AS_DN_DIRECTIVE 0 ++#define HAVE_AS_FPU_DIRECTIVE 0 ++#define HAVE_AS_FUNC 0 ++#define HAVE_AS_OBJECT_ARCH 0 ++#define HAVE_ASM_MOD_Q 0 ++#define HAVE_BLOCKS_EXTENSION 0 ++#define HAVE_EBP_AVAILABLE 0 ++#define HAVE_EBX_AVAILABLE 0 ++#define HAVE_GNU_AS 0 ++#define HAVE_GNU_WINDRES 0 ++#define HAVE_IBM_ASM 0 ++#define HAVE_INLINE_ASM_DIRECT_SYMBOL_REFS 0 ++#define HAVE_INLINE_ASM_LABELS 1 ++#define HAVE_INLINE_ASM_NONLOCAL_LABELS 1 ++#define HAVE_PRAGMA_DEPRECATED 1 ++#define HAVE_RSYNC_CONTIMEOUT 0 ++#define HAVE_SYMVER_ASM_LABEL 1 ++#define HAVE_SYMVER_GNU_ASM 1 ++#define HAVE_VFP_ARGS 0 ++#define HAVE_XFORM_ASM 0 ++#define HAVE_XMM_CLOBBERS 0 ++#define HAVE_KCMVIDEOCODECTYPE_HEVC 0 ++#define HAVE_KCVPIXELFORMATTYPE_420YPCBCR10BIPLANARVIDEORANGE 0 ++#define HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_SMPTE_ST_2084_PQ 0 ++#define HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_ITU_R_2100_HLG 0 ++#define HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_LINEAR 0 ++#define HAVE_SOCKLEN_T 0 ++#define HAVE_STRUCT_ADDRINFO 0 ++#define HAVE_STRUCT_GROUP_SOURCE_REQ 0 ++#define HAVE_STRUCT_IP_MREQ_SOURCE 0 ++#define HAVE_STRUCT_IPV6_MREQ 0 ++#define HAVE_STRUCT_MSGHDR_MSG_FLAGS 0 ++#define HAVE_STRUCT_POLLFD 0 ++#define HAVE_STRUCT_RUSAGE_RU_MAXRSS 1 ++#define HAVE_STRUCT_SCTP_EVENT_SUBSCRIBE 0 ++#define HAVE_STRUCT_SOCKADDR_IN6 0 ++#define HAVE_STRUCT_SOCKADDR_SA_LEN 0 ++#define HAVE_STRUCT_SOCKADDR_STORAGE 0 ++#define HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC 1 ++#define HAVE_STRUCT_V4L2_FRMIVALENUM_DISCRETE 0 ++#define HAVE_MAKEINFO 1 ++#define HAVE_MAKEINFO_HTML 1 ++#define HAVE_OPENCL_D3D11 0 ++#define HAVE_OPENCL_DRM_ARM 0 ++#define HAVE_OPENCL_DRM_BEIGNET 0 ++#define HAVE_OPENCL_DXVA2 0 ++#define HAVE_OPENCL_VAAPI_BEIGNET 0 ++#define HAVE_OPENCL_VAAPI_INTEL_MEDIA 0 ++#define HAVE_PERL 1 ++#define HAVE_POD2MAN 1 ++#define HAVE_TEXI2HTML 0 ++#define CONFIG_DOC 0 ++#define CONFIG_HTMLPAGES 0 ++#define CONFIG_MANPAGES 0 ++#define CONFIG_PODPAGES 0 ++#define CONFIG_TXTPAGES 0 ++#define CONFIG_AVIO_LIST_DIR_EXAMPLE 1 ++#define CONFIG_AVIO_READING_EXAMPLE 1 ++#define CONFIG_DECODE_AUDIO_EXAMPLE 1 ++#define CONFIG_DECODE_VIDEO_EXAMPLE 1 ++#define CONFIG_DEMUXING_DECODING_EXAMPLE 1 ++#define CONFIG_ENCODE_AUDIO_EXAMPLE 1 ++#define CONFIG_ENCODE_VIDEO_EXAMPLE 1 ++#define CONFIG_EXTRACT_MVS_EXAMPLE 1 ++#define CONFIG_FILTER_AUDIO_EXAMPLE 0 ++#define CONFIG_FILTERING_AUDIO_EXAMPLE 0 ++#define CONFIG_FILTERING_VIDEO_EXAMPLE 0 ++#define CONFIG_HTTP_MULTICLIENT_EXAMPLE 1 ++#define CONFIG_HW_DECODE_EXAMPLE 1 ++#define CONFIG_METADATA_EXAMPLE 1 ++#define CONFIG_MUXING_EXAMPLE 0 ++#define CONFIG_QSVDEC_EXAMPLE 0 ++#define CONFIG_REMUXING_EXAMPLE 1 ++#define CONFIG_RESAMPLING_AUDIO_EXAMPLE 0 ++#define CONFIG_SCALING_VIDEO_EXAMPLE 0 ++#define CONFIG_TRANSCODE_AAC_EXAMPLE 0 ++#define CONFIG_TRANSCODING_EXAMPLE 0 ++#define CONFIG_VAAPI_ENCODE_EXAMPLE 0 ++#define CONFIG_VAAPI_TRANSCODE_EXAMPLE 0 ++#define CONFIG_AVISYNTH 0 ++#define CONFIG_FREI0R 0 ++#define CONFIG_LIBCDIO 0 ++#define CONFIG_LIBDAVS2 0 ++#define CONFIG_LIBRUBBERBAND 0 ++#define CONFIG_LIBVIDSTAB 0 ++#define CONFIG_LIBX264 0 ++#define CONFIG_LIBX265 0 ++#define CONFIG_LIBXAVS 0 ++#define CONFIG_LIBXAVS2 0 ++#define CONFIG_LIBXVID 0 ++#define CONFIG_DECKLINK 0 ++#define CONFIG_LIBFDK_AAC 0 ++#define CONFIG_OPENSSL 0 ++#define CONFIG_LIBTLS 0 ++#define CONFIG_GMP 0 ++#define CONFIG_LIBARIBB24 0 ++#define CONFIG_LIBLENSFUN 0 ++#define CONFIG_LIBOPENCORE_AMRNB 0 ++#define CONFIG_LIBOPENCORE_AMRWB 0 ++#define CONFIG_LIBVMAF 0 ++#define CONFIG_LIBVO_AMRWBENC 0 ++#define CONFIG_MBEDTLS 0 ++#define CONFIG_RKMPP 0 ++#define CONFIG_LIBSMBCLIENT 0 ++#define CONFIG_CHROMAPRINT 0 ++#define CONFIG_GCRYPT 0 ++#define CONFIG_GNUTLS 0 ++#define CONFIG_JNI 0 ++#define CONFIG_LADSPA 0 ++#define CONFIG_LIBAOM 0 ++#define CONFIG_LIBASS 0 ++#define CONFIG_LIBBLURAY 0 ++#define CONFIG_LIBBS2B 0 ++#define CONFIG_LIBCACA 0 ++#define CONFIG_LIBCELT 0 ++#define CONFIG_LIBCODEC2 0 ++#define CONFIG_LIBDAV1D 0 ++#define CONFIG_LIBDC1394 0 ++#define CONFIG_LIBDRM 0 ++#define CONFIG_LIBFLITE 0 ++#define CONFIG_LIBFONTCONFIG 0 ++#define CONFIG_LIBFREETYPE 0 ++#define CONFIG_LIBFRIBIDI 0 ++#define CONFIG_LIBGLSLANG 0 ++#define CONFIG_LIBGME 0 ++#define CONFIG_LIBGSM 0 ++#define CONFIG_LIBIEC61883 0 ++#define CONFIG_LIBILBC 0 ++#define CONFIG_LIBJACK 0 ++#define CONFIG_LIBKLVANC 0 ++#define CONFIG_LIBKVAZAAR 0 ++#define CONFIG_LIBMODPLUG 0 ++#define CONFIG_LIBMP3LAME 0 ++#define CONFIG_LIBMYSOFA 0 ++#define CONFIG_LIBOPENCV 0 ++#define CONFIG_LIBOPENH264 0 ++#define CONFIG_LIBOPENJPEG 0 ++#define CONFIG_LIBOPENMPT 0 ++#define CONFIG_LIBOPUS 1 ++#define CONFIG_LIBPULSE 0 ++#define CONFIG_LIBRABBITMQ 0 ++#define CONFIG_LIBRAV1E 0 ++#define CONFIG_LIBRSVG 0 ++#define CONFIG_LIBRTMP 0 ++#define CONFIG_LIBSHINE 0 ++#define CONFIG_LIBSMBCLIENT 0 ++#define CONFIG_LIBSNAPPY 0 ++#define CONFIG_LIBSOXR 0 ++#define CONFIG_LIBSPEEX 0 ++#define CONFIG_LIBSRT 0 ++#define CONFIG_LIBSSH 0 ++#define CONFIG_LIBTENSORFLOW 0 ++#define CONFIG_LIBTESSERACT 0 ++#define CONFIG_LIBTHEORA 0 ++#define CONFIG_LIBTWOLAME 0 ++#define CONFIG_LIBV4L2 0 ++#define CONFIG_LIBVORBIS 0 ++#define CONFIG_LIBVPX 0 ++#define CONFIG_LIBWAVPACK 0 ++#define CONFIG_LIBWEBP 0 ++#define CONFIG_LIBXML2 0 ++#define CONFIG_LIBZIMG 0 ++#define CONFIG_LIBZMQ 0 ++#define CONFIG_LIBZVBI 0 ++#define CONFIG_LV2 0 ++#define CONFIG_MEDIACODEC 0 ++#define CONFIG_OPENAL 0 ++#define CONFIG_OPENGL 0 ++#define CONFIG_POCKETSPHINX 0 ++#define CONFIG_VAPOURSYNTH 0 ++#define CONFIG_ALSA 0 ++#define CONFIG_APPKIT 0 ++#define CONFIG_AVFOUNDATION 0 ++#define CONFIG_BZLIB 0 ++#define CONFIG_COREIMAGE 0 ++#define CONFIG_ICONV 0 ++#define CONFIG_LIBXCB 0 ++#define CONFIG_LIBXCB_SHM 0 ++#define CONFIG_LIBXCB_SHAPE 0 ++#define CONFIG_LIBXCB_XFIXES 0 ++#define CONFIG_LZMA 0 ++#define CONFIG_SCHANNEL 0 ++#define CONFIG_SDL2 0 ++#define CONFIG_SECURETRANSPORT 0 ++#define CONFIG_SNDIO 0 ++#define CONFIG_XLIB 0 ++#define CONFIG_ZLIB 0 ++#define CONFIG_CUDA_NVCC 0 ++#define CONFIG_CUDA_SDK 0 ++#define CONFIG_LIBNPP 0 ++#define CONFIG_LIBMFX 0 ++#define CONFIG_MMAL 0 ++#define CONFIG_OMX 0 ++#define CONFIG_OPENCL 0 ++#define CONFIG_VULKAN 0 ++#define CONFIG_AMF 0 ++#define CONFIG_AUDIOTOOLBOX 0 ++#define CONFIG_CRYSTALHD 0 ++#define CONFIG_CUDA 0 ++#define CONFIG_CUDA_LLVM 0 ++#define CONFIG_CUVID 0 ++#define CONFIG_D3D11VA 0 ++#define CONFIG_DXVA2 0 ++#define CONFIG_FFNVCODEC 0 ++#define CONFIG_NVDEC 0 ++#define CONFIG_NVENC 0 ++#define CONFIG_VAAPI 0 ++#define CONFIG_VDPAU 0 ++#define CONFIG_VIDEOTOOLBOX 0 ++#define CONFIG_V4L2_M2M 0 ++#define CONFIG_XVMC 0 ++#define CONFIG_FTRAPV 0 ++#define CONFIG_GRAY 0 ++#define CONFIG_HARDCODED_TABLES 0 ++#define CONFIG_OMX_RPI 0 ++#define CONFIG_RUNTIME_CPUDETECT 1 ++#define CONFIG_SAFE_BITSTREAM_READER 1 ++#define CONFIG_SHARED 0 ++#define CONFIG_SMALL 0 ++#define CONFIG_STATIC 1 ++#define CONFIG_SWSCALE_ALPHA 1 ++#define CONFIG_GPL 0 ++#define CONFIG_NONFREE 0 ++#define CONFIG_VERSION3 0 ++#define CONFIG_AVDEVICE 0 ++#define CONFIG_AVFILTER 0 ++#define CONFIG_SWSCALE 0 ++#define CONFIG_POSTPROC 0 ++#define CONFIG_AVFORMAT 1 ++#define CONFIG_AVCODEC 1 ++#define CONFIG_SWRESAMPLE 0 ++#define CONFIG_AVRESAMPLE 0 ++#define CONFIG_AVUTIL 1 ++#define CONFIG_FFPLAY 0 ++#define CONFIG_FFPROBE 0 ++#define CONFIG_FFMPEG 0 ++#define CONFIG_DCT 1 ++#define CONFIG_DWT 0 ++#define CONFIG_ERROR_RESILIENCE 0 ++#define CONFIG_FAAN 0 ++#define CONFIG_FAST_UNALIGNED 0 ++#define CONFIG_FFT 1 ++#define CONFIG_LSP 0 ++#define CONFIG_LZO 0 ++#define CONFIG_MDCT 1 ++#define CONFIG_PIXELUTILS 0 ++#define CONFIG_NETWORK 0 ++#define CONFIG_RDFT 1 ++#define CONFIG_AUTODETECT 0 ++#define CONFIG_FONTCONFIG 0 ++#define CONFIG_LARGE_TESTS 1 ++#define CONFIG_LINUX_PERF 0 ++#define CONFIG_MEMORY_POISONING 0 ++#define CONFIG_NEON_CLOBBER_TEST 0 ++#define CONFIG_OSSFUZZ 0 ++#define CONFIG_PIC 1 ++#define CONFIG_THUMB 0 ++#define CONFIG_VALGRIND_BACKTRACE 0 ++#define CONFIG_XMM_CLOBBER_TEST 0 ++#define CONFIG_BSFS 1 ++#define CONFIG_DECODERS 1 ++#define CONFIG_ENCODERS 0 ++#define CONFIG_HWACCELS 0 ++#define CONFIG_PARSERS 1 ++#define CONFIG_INDEVS 0 ++#define CONFIG_OUTDEVS 0 ++#define CONFIG_FILTERS 0 ++#define CONFIG_DEMUXERS 1 ++#define CONFIG_MUXERS 0 ++#define CONFIG_PROTOCOLS 0 ++#define CONFIG_AANDCTTABLES 0 ++#define CONFIG_AC3DSP 0 ++#define CONFIG_ADTS_HEADER 1 ++#define CONFIG_AUDIO_FRAME_QUEUE 0 ++#define CONFIG_AUDIODSP 0 ++#define CONFIG_BLOCKDSP 0 ++#define CONFIG_BSWAPDSP 0 ++#define CONFIG_CABAC 1 ++#define CONFIG_CBS 0 ++#define CONFIG_CBS_AV1 0 ++#define CONFIG_CBS_H264 0 ++#define CONFIG_CBS_H265 0 ++#define CONFIG_CBS_JPEG 0 ++#define CONFIG_CBS_MPEG2 0 ++#define CONFIG_CBS_VP9 0 ++#define CONFIG_DIRAC_PARSE 1 ++#define CONFIG_DNN 0 ++#define CONFIG_DVPROFILE 0 ++#define CONFIG_EXIF 0 ++#define CONFIG_FAANDCT 0 ++#define CONFIG_FAANIDCT 0 ++#define CONFIG_FDCTDSP 0 ++#define CONFIG_FLACDSP 1 ++#define CONFIG_FMTCONVERT 0 ++#define CONFIG_FRAME_THREAD_ENCODER 0 ++#define CONFIG_G722DSP 0 ++#define CONFIG_GOLOMB 1 ++#define CONFIG_GPLV3 0 ++#define CONFIG_H263DSP 0 ++#define CONFIG_H264CHROMA 1 ++#define CONFIG_H264DSP 1 ++#define CONFIG_H264PARSE 1 ++#define CONFIG_H264PRED 1 ++#define CONFIG_H264QPEL 1 ++#define CONFIG_HEVCPARSE 0 ++#define CONFIG_HPELDSP 1 ++#define CONFIG_HUFFMAN 0 ++#define CONFIG_HUFFYUVDSP 0 ++#define CONFIG_HUFFYUVENCDSP 0 ++#define CONFIG_IDCTDSP 0 ++#define CONFIG_IIRFILTER 0 ++#define CONFIG_MDCT15 1 ++#define CONFIG_INTRAX8 0 ++#define CONFIG_ISO_MEDIA 1 ++#define CONFIG_IVIDSP 0 ++#define CONFIG_JPEGTABLES 0 ++#define CONFIG_LGPLV3 0 ++#define CONFIG_LIBX262 0 ++#define CONFIG_LLAUDDSP 0 ++#define CONFIG_LLVIDDSP 0 ++#define CONFIG_LLVIDENCDSP 0 ++#define CONFIG_LPC 0 ++#define CONFIG_LZF 0 ++#define CONFIG_ME_CMP 0 ++#define CONFIG_MPEG_ER 0 ++#define CONFIG_MPEGAUDIO 1 ++#define CONFIG_MPEGAUDIODSP 1 ++#define CONFIG_MPEGAUDIOHEADER 1 ++#define CONFIG_MPEGVIDEO 0 ++#define CONFIG_MPEGVIDEOENC 0 ++#define CONFIG_MSS34DSP 0 ++#define CONFIG_PIXBLOCKDSP 0 ++#define CONFIG_QPELDSP 0 ++#define CONFIG_QSV 0 ++#define CONFIG_QSVDEC 0 ++#define CONFIG_QSVENC 0 ++#define CONFIG_QSVVPP 0 ++#define CONFIG_RANGECODER 0 ++#define CONFIG_RIFFDEC 1 ++#define CONFIG_RIFFENC 0 ++#define CONFIG_RTPDEC 0 ++#define CONFIG_RTPENC_CHAIN 0 ++#define CONFIG_RV34DSP 0 ++#define CONFIG_SCENE_SAD 0 ++#define CONFIG_SINEWIN 1 ++#define CONFIG_SNAPPY 0 ++#define CONFIG_SRTP 0 ++#define CONFIG_STARTCODE 1 ++#define CONFIG_TEXTUREDSP 0 ++#define CONFIG_TEXTUREDSPENC 0 ++#define CONFIG_TPELDSP 0 ++#define CONFIG_VAAPI_1 0 ++#define CONFIG_VAAPI_ENCODE 0 ++#define CONFIG_VC1DSP 0 ++#define CONFIG_VIDEODSP 1 ++#define CONFIG_VP3DSP 1 ++#define CONFIG_VP56DSP 0 ++#define CONFIG_VP8DSP 1 ++#define CONFIG_WMA_FREQS 0 ++#define CONFIG_WMV2DSP 0 ++#define CONFIG_AAC_ADTSTOASC_BSF 0 ++#define CONFIG_AV1_FRAME_MERGE_BSF 0 ++#define CONFIG_AV1_FRAME_SPLIT_BSF 0 ++#define CONFIG_AV1_METADATA_BSF 0 ++#define CONFIG_CHOMP_BSF 0 ++#define CONFIG_DUMP_EXTRADATA_BSF 0 ++#define CONFIG_DCA_CORE_BSF 0 ++#define CONFIG_EAC3_CORE_BSF 0 ++#define CONFIG_EXTRACT_EXTRADATA_BSF 0 ++#define CONFIG_FILTER_UNITS_BSF 0 ++#define CONFIG_H264_METADATA_BSF 0 ++#define CONFIG_H264_MP4TOANNEXB_BSF 0 ++#define CONFIG_H264_REDUNDANT_PPS_BSF 0 ++#define CONFIG_HAPQA_EXTRACT_BSF 0 ++#define CONFIG_HEVC_METADATA_BSF 0 ++#define CONFIG_HEVC_MP4TOANNEXB_BSF 0 ++#define CONFIG_IMX_DUMP_HEADER_BSF 0 ++#define CONFIG_MJPEG2JPEG_BSF 0 ++#define CONFIG_MJPEGA_DUMP_HEADER_BSF 0 ++#define CONFIG_MP3_HEADER_DECOMPRESS_BSF 0 ++#define CONFIG_MPEG2_METADATA_BSF 0 ++#define CONFIG_MPEG4_UNPACK_BFRAMES_BSF 0 ++#define CONFIG_MOV2TEXTSUB_BSF 0 ++#define CONFIG_NOISE_BSF 0 ++#define CONFIG_NULL_BSF 1 ++#define CONFIG_PRORES_METADATA_BSF 0 ++#define CONFIG_REMOVE_EXTRADATA_BSF 0 ++#define CONFIG_TEXT2MOVSUB_BSF 0 ++#define CONFIG_TRACE_HEADERS_BSF 0 ++#define CONFIG_TRUEHD_CORE_BSF 0 ++#define CONFIG_VP9_METADATA_BSF 0 ++#define CONFIG_VP9_RAW_REORDER_BSF 0 ++#define CONFIG_VP9_SUPERFRAME_BSF 0 ++#define CONFIG_VP9_SUPERFRAME_SPLIT_BSF 0 ++#define CONFIG_AASC_DECODER 0 ++#define CONFIG_AIC_DECODER 0 ++#define CONFIG_ALIAS_PIX_DECODER 0 ++#define CONFIG_AGM_DECODER 0 ++#define CONFIG_AMV_DECODER 0 ++#define CONFIG_ANM_DECODER 0 ++#define CONFIG_ANSI_DECODER 0 ++#define CONFIG_APNG_DECODER 0 ++#define CONFIG_ARBC_DECODER 0 ++#define CONFIG_ASV1_DECODER 0 ++#define CONFIG_ASV2_DECODER 0 ++#define CONFIG_AURA_DECODER 0 ++#define CONFIG_AURA2_DECODER 0 ++#define CONFIG_AVRP_DECODER 0 ++#define CONFIG_AVRN_DECODER 0 ++#define CONFIG_AVS_DECODER 0 ++#define CONFIG_AVUI_DECODER 0 ++#define CONFIG_AYUV_DECODER 0 ++#define CONFIG_BETHSOFTVID_DECODER 0 ++#define CONFIG_BFI_DECODER 0 ++#define CONFIG_BINK_DECODER 0 ++#define CONFIG_BITPACKED_DECODER 0 ++#define CONFIG_BMP_DECODER 0 ++#define CONFIG_BMV_VIDEO_DECODER 0 ++#define CONFIG_BRENDER_PIX_DECODER 0 ++#define CONFIG_C93_DECODER 0 ++#define CONFIG_CAVS_DECODER 0 ++#define CONFIG_CDGRAPHICS_DECODER 0 ++#define CONFIG_CDTOONS_DECODER 0 ++#define CONFIG_CDXL_DECODER 0 ++#define CONFIG_CFHD_DECODER 0 ++#define CONFIG_CINEPAK_DECODER 0 ++#define CONFIG_CLEARVIDEO_DECODER 0 ++#define CONFIG_CLJR_DECODER 0 ++#define CONFIG_CLLC_DECODER 0 ++#define CONFIG_COMFORTNOISE_DECODER 0 ++#define CONFIG_CPIA_DECODER 0 ++#define CONFIG_CSCD_DECODER 0 ++#define CONFIG_CYUV_DECODER 0 ++#define CONFIG_DDS_DECODER 0 ++#define CONFIG_DFA_DECODER 0 ++#define CONFIG_DIRAC_DECODER 0 ++#define CONFIG_DNXHD_DECODER 0 ++#define CONFIG_DPX_DECODER 0 ++#define CONFIG_DSICINVIDEO_DECODER 0 ++#define CONFIG_DVAUDIO_DECODER 0 ++#define CONFIG_DVVIDEO_DECODER 0 ++#define CONFIG_DXA_DECODER 0 ++#define CONFIG_DXTORY_DECODER 0 ++#define CONFIG_DXV_DECODER 0 ++#define CONFIG_EACMV_DECODER 0 ++#define CONFIG_EAMAD_DECODER 0 ++#define CONFIG_EATGQ_DECODER 0 ++#define CONFIG_EATGV_DECODER 0 ++#define CONFIG_EATQI_DECODER 0 ++#define CONFIG_EIGHTBPS_DECODER 0 ++#define CONFIG_EIGHTSVX_EXP_DECODER 0 ++#define CONFIG_EIGHTSVX_FIB_DECODER 0 ++#define CONFIG_ESCAPE124_DECODER 0 ++#define CONFIG_ESCAPE130_DECODER 0 ++#define CONFIG_EXR_DECODER 0 ++#define CONFIG_FFV1_DECODER 0 ++#define CONFIG_FFVHUFF_DECODER 0 ++#define CONFIG_FIC_DECODER 0 ++#define CONFIG_FITS_DECODER 0 ++#define CONFIG_FLASHSV_DECODER 0 ++#define CONFIG_FLASHSV2_DECODER 0 ++#define CONFIG_FLIC_DECODER 0 ++#define CONFIG_FLV_DECODER 0 ++#define CONFIG_FMVC_DECODER 0 ++#define CONFIG_FOURXM_DECODER 0 ++#define CONFIG_FRAPS_DECODER 0 ++#define CONFIG_FRWU_DECODER 0 ++#define CONFIG_G2M_DECODER 0 ++#define CONFIG_GDV_DECODER 0 ++#define CONFIG_GIF_DECODER 0 ++#define CONFIG_H261_DECODER 0 ++#define CONFIG_H263_DECODER 0 ++#define CONFIG_H263I_DECODER 0 ++#define CONFIG_H263P_DECODER 0 ++#define CONFIG_H263_V4L2M2M_DECODER 0 ++#define CONFIG_H264_DECODER 1 ++#define CONFIG_H264_CRYSTALHD_DECODER 0 ++#define CONFIG_H264_V4L2M2M_DECODER 0 ++#define CONFIG_H264_MEDIACODEC_DECODER 0 ++#define CONFIG_H264_MMAL_DECODER 0 ++#define CONFIG_H264_QSV_DECODER 0 ++#define CONFIG_H264_RKMPP_DECODER 0 ++#define CONFIG_HAP_DECODER 0 ++#define CONFIG_HEVC_DECODER 0 ++#define CONFIG_HEVC_QSV_DECODER 0 ++#define CONFIG_HEVC_RKMPP_DECODER 0 ++#define CONFIG_HEVC_V4L2M2M_DECODER 0 ++#define CONFIG_HNM4_VIDEO_DECODER 0 ++#define CONFIG_HQ_HQA_DECODER 0 ++#define CONFIG_HQX_DECODER 0 ++#define CONFIG_HUFFYUV_DECODER 0 ++#define CONFIG_HYMT_DECODER 0 ++#define CONFIG_IDCIN_DECODER 0 ++#define CONFIG_IFF_ILBM_DECODER 0 ++#define CONFIG_IMM4_DECODER 0 ++#define CONFIG_IMM5_DECODER 0 ++#define CONFIG_INDEO2_DECODER 0 ++#define CONFIG_INDEO3_DECODER 0 ++#define CONFIG_INDEO4_DECODER 0 ++#define CONFIG_INDEO5_DECODER 0 ++#define CONFIG_INTERPLAY_VIDEO_DECODER 0 ++#define CONFIG_JPEG2000_DECODER 0 ++#define CONFIG_JPEGLS_DECODER 0 ++#define CONFIG_JV_DECODER 0 ++#define CONFIG_KGV1_DECODER 0 ++#define CONFIG_KMVC_DECODER 0 ++#define CONFIG_LAGARITH_DECODER 0 ++#define CONFIG_LOCO_DECODER 0 ++#define CONFIG_LSCR_DECODER 0 ++#define CONFIG_M101_DECODER 0 ++#define CONFIG_MAGICYUV_DECODER 0 ++#define CONFIG_MDEC_DECODER 0 ++#define CONFIG_MIMIC_DECODER 0 ++#define CONFIG_MJPEG_DECODER 0 ++#define CONFIG_MJPEGB_DECODER 0 ++#define CONFIG_MMVIDEO_DECODER 0 ++#define CONFIG_MOTIONPIXELS_DECODER 0 ++#define CONFIG_MPEG1VIDEO_DECODER 0 ++#define CONFIG_MPEG2VIDEO_DECODER 0 ++#define CONFIG_MPEG4_DECODER 0 ++#define CONFIG_MPEG4_CRYSTALHD_DECODER 0 ++#define CONFIG_MPEG4_V4L2M2M_DECODER 0 ++#define CONFIG_MPEG4_MMAL_DECODER 0 ++#define CONFIG_MPEGVIDEO_DECODER 0 ++#define CONFIG_MPEG1_V4L2M2M_DECODER 0 ++#define CONFIG_MPEG2_MMAL_DECODER 0 ++#define CONFIG_MPEG2_CRYSTALHD_DECODER 0 ++#define CONFIG_MPEG2_V4L2M2M_DECODER 0 ++#define CONFIG_MPEG2_QSV_DECODER 0 ++#define CONFIG_MPEG2_MEDIACODEC_DECODER 0 ++#define CONFIG_MSA1_DECODER 0 ++#define CONFIG_MSCC_DECODER 0 ++#define CONFIG_MSMPEG4V1_DECODER 0 ++#define CONFIG_MSMPEG4V2_DECODER 0 ++#define CONFIG_MSMPEG4V3_DECODER 0 ++#define CONFIG_MSMPEG4_CRYSTALHD_DECODER 0 ++#define CONFIG_MSRLE_DECODER 0 ++#define CONFIG_MSS1_DECODER 0 ++#define CONFIG_MSS2_DECODER 0 ++#define CONFIG_MSVIDEO1_DECODER 0 ++#define CONFIG_MSZH_DECODER 0 ++#define CONFIG_MTS2_DECODER 0 ++#define CONFIG_MV30_DECODER 0 ++#define CONFIG_MVC1_DECODER 0 ++#define CONFIG_MVC2_DECODER 0 ++#define CONFIG_MVDV_DECODER 0 ++#define CONFIG_MVHA_DECODER 0 ++#define CONFIG_MWSC_DECODER 0 ++#define CONFIG_MXPEG_DECODER 0 ++#define CONFIG_NUV_DECODER 0 ++#define CONFIG_PAF_VIDEO_DECODER 0 ++#define CONFIG_PAM_DECODER 0 ++#define CONFIG_PBM_DECODER 0 ++#define CONFIG_PCX_DECODER 0 ++#define CONFIG_PGM_DECODER 0 ++#define CONFIG_PGMYUV_DECODER 0 ++#define CONFIG_PICTOR_DECODER 0 ++#define CONFIG_PIXLET_DECODER 0 ++#define CONFIG_PNG_DECODER 0 ++#define CONFIG_PPM_DECODER 0 ++#define CONFIG_PRORES_DECODER 0 ++#define CONFIG_PROSUMER_DECODER 0 ++#define CONFIG_PSD_DECODER 0 ++#define CONFIG_PTX_DECODER 0 ++#define CONFIG_QDRAW_DECODER 0 ++#define CONFIG_QPEG_DECODER 0 ++#define CONFIG_QTRLE_DECODER 0 ++#define CONFIG_R10K_DECODER 0 ++#define CONFIG_R210_DECODER 0 ++#define CONFIG_RASC_DECODER 0 ++#define CONFIG_RAWVIDEO_DECODER 0 ++#define CONFIG_RL2_DECODER 0 ++#define CONFIG_ROQ_DECODER 0 ++#define CONFIG_RPZA_DECODER 0 ++#define CONFIG_RSCC_DECODER 0 ++#define CONFIG_RV10_DECODER 0 ++#define CONFIG_RV20_DECODER 0 ++#define CONFIG_RV30_DECODER 0 ++#define CONFIG_RV40_DECODER 0 ++#define CONFIG_S302M_DECODER 0 ++#define CONFIG_SANM_DECODER 0 ++#define CONFIG_SCPR_DECODER 0 ++#define CONFIG_SCREENPRESSO_DECODER 0 ++#define CONFIG_SGI_DECODER 0 ++#define CONFIG_SGIRLE_DECODER 0 ++#define CONFIG_SHEERVIDEO_DECODER 0 ++#define CONFIG_SMACKER_DECODER 0 ++#define CONFIG_SMC_DECODER 0 ++#define CONFIG_SMVJPEG_DECODER 0 ++#define CONFIG_SNOW_DECODER 0 ++#define CONFIG_SP5X_DECODER 0 ++#define CONFIG_SPEEDHQ_DECODER 0 ++#define CONFIG_SRGC_DECODER 0 ++#define CONFIG_SUNRAST_DECODER 0 ++#define CONFIG_SVQ1_DECODER 0 ++#define CONFIG_SVQ3_DECODER 0 ++#define CONFIG_TARGA_DECODER 0 ++#define CONFIG_TARGA_Y216_DECODER 0 ++#define CONFIG_TDSC_DECODER 0 ++#define CONFIG_THEORA_DECODER 1 ++#define CONFIG_THP_DECODER 0 ++#define CONFIG_TIERTEXSEQVIDEO_DECODER 0 ++#define CONFIG_TIFF_DECODER 0 ++#define CONFIG_TMV_DECODER 0 ++#define CONFIG_TRUEMOTION1_DECODER 0 ++#define CONFIG_TRUEMOTION2_DECODER 0 ++#define CONFIG_TRUEMOTION2RT_DECODER 0 ++#define CONFIG_TSCC_DECODER 0 ++#define CONFIG_TSCC2_DECODER 0 ++#define CONFIG_TXD_DECODER 0 ++#define CONFIG_ULTI_DECODER 0 ++#define CONFIG_UTVIDEO_DECODER 0 ++#define CONFIG_V210_DECODER 0 ++#define CONFIG_V210X_DECODER 0 ++#define CONFIG_V308_DECODER 0 ++#define CONFIG_V408_DECODER 0 ++#define CONFIG_V410_DECODER 0 ++#define CONFIG_VB_DECODER 0 ++#define CONFIG_VBLE_DECODER 0 ++#define CONFIG_VC1_DECODER 0 ++#define CONFIG_VC1_CRYSTALHD_DECODER 0 ++#define CONFIG_VC1IMAGE_DECODER 0 ++#define CONFIG_VC1_MMAL_DECODER 0 ++#define CONFIG_VC1_QSV_DECODER 0 ++#define CONFIG_VC1_V4L2M2M_DECODER 0 ++#define CONFIG_VCR1_DECODER 0 ++#define CONFIG_VMDVIDEO_DECODER 0 ++#define CONFIG_VMNC_DECODER 0 ++#define CONFIG_VP3_DECODER 1 ++#define CONFIG_VP4_DECODER 0 ++#define CONFIG_VP5_DECODER 0 ++#define CONFIG_VP6_DECODER 0 ++#define CONFIG_VP6A_DECODER 0 ++#define CONFIG_VP6F_DECODER 0 ++#define CONFIG_VP7_DECODER 0 ++#define CONFIG_VP8_DECODER 1 ++#define CONFIG_VP8_RKMPP_DECODER 0 ++#define CONFIG_VP8_V4L2M2M_DECODER 0 ++#define CONFIG_VP9_DECODER 0 ++#define CONFIG_VP9_RKMPP_DECODER 0 ++#define CONFIG_VP9_V4L2M2M_DECODER 0 ++#define CONFIG_VQA_DECODER 0 ++#define CONFIG_WEBP_DECODER 0 ++#define CONFIG_WCMV_DECODER 0 ++#define CONFIG_WRAPPED_AVFRAME_DECODER 0 ++#define CONFIG_WMV1_DECODER 0 ++#define CONFIG_WMV2_DECODER 0 ++#define CONFIG_WMV3_DECODER 0 ++#define CONFIG_WMV3_CRYSTALHD_DECODER 0 ++#define CONFIG_WMV3IMAGE_DECODER 0 ++#define CONFIG_WNV1_DECODER 0 ++#define CONFIG_XAN_WC3_DECODER 0 ++#define CONFIG_XAN_WC4_DECODER 0 ++#define CONFIG_XBM_DECODER 0 ++#define CONFIG_XFACE_DECODER 0 ++#define CONFIG_XL_DECODER 0 ++#define CONFIG_XPM_DECODER 0 ++#define CONFIG_XWD_DECODER 0 ++#define CONFIG_Y41P_DECODER 0 ++#define CONFIG_YLC_DECODER 0 ++#define CONFIG_YOP_DECODER 0 ++#define CONFIG_YUV4_DECODER 0 ++#define CONFIG_ZERO12V_DECODER 0 ++#define CONFIG_ZEROCODEC_DECODER 0 ++#define CONFIG_ZLIB_DECODER 0 ++#define CONFIG_ZMBV_DECODER 0 ++#define CONFIG_AAC_DECODER 1 ++#define CONFIG_AAC_FIXED_DECODER 0 ++#define CONFIG_AAC_LATM_DECODER 0 ++#define CONFIG_AC3_DECODER 0 ++#define CONFIG_AC3_FIXED_DECODER 0 ++#define CONFIG_ACELP_KELVIN_DECODER 0 ++#define CONFIG_ALAC_DECODER 0 ++#define CONFIG_ALS_DECODER 0 ++#define CONFIG_AMRNB_DECODER 0 ++#define CONFIG_AMRWB_DECODER 0 ++#define CONFIG_APE_DECODER 0 ++#define CONFIG_APTX_DECODER 0 ++#define CONFIG_APTX_HD_DECODER 0 ++#define CONFIG_ATRAC1_DECODER 0 ++#define CONFIG_ATRAC3_DECODER 0 ++#define CONFIG_ATRAC3AL_DECODER 0 ++#define CONFIG_ATRAC3P_DECODER 0 ++#define CONFIG_ATRAC3PAL_DECODER 0 ++#define CONFIG_ATRAC9_DECODER 0 ++#define CONFIG_BINKAUDIO_DCT_DECODER 0 ++#define CONFIG_BINKAUDIO_RDFT_DECODER 0 ++#define CONFIG_BMV_AUDIO_DECODER 0 ++#define CONFIG_COOK_DECODER 0 ++#define CONFIG_DCA_DECODER 0 ++#define CONFIG_DOLBY_E_DECODER 0 ++#define CONFIG_DSD_LSBF_DECODER 0 ++#define CONFIG_DSD_MSBF_DECODER 0 ++#define CONFIG_DSD_LSBF_PLANAR_DECODER 0 ++#define CONFIG_DSD_MSBF_PLANAR_DECODER 0 ++#define CONFIG_DSICINAUDIO_DECODER 0 ++#define CONFIG_DSS_SP_DECODER 0 ++#define CONFIG_DST_DECODER 0 ++#define CONFIG_EAC3_DECODER 0 ++#define CONFIG_EVRC_DECODER 0 ++#define CONFIG_FFWAVESYNTH_DECODER 0 ++#define CONFIG_FLAC_DECODER 1 ++#define CONFIG_G723_1_DECODER 0 ++#define CONFIG_G729_DECODER 0 ++#define CONFIG_GSM_DECODER 0 ++#define CONFIG_GSM_MS_DECODER 0 ++#define CONFIG_HCA_DECODER 0 ++#define CONFIG_HCOM_DECODER 0 ++#define CONFIG_IAC_DECODER 0 ++#define CONFIG_ILBC_DECODER 0 ++#define CONFIG_IMC_DECODER 0 ++#define CONFIG_INTERPLAY_ACM_DECODER 0 ++#define CONFIG_MACE3_DECODER 0 ++#define CONFIG_MACE6_DECODER 0 ++#define CONFIG_METASOUND_DECODER 0 ++#define CONFIG_MLP_DECODER 0 ++#define CONFIG_MP1_DECODER 0 ++#define CONFIG_MP1FLOAT_DECODER 0 ++#define CONFIG_MP2_DECODER 0 ++#define CONFIG_MP2FLOAT_DECODER 0 ++#define CONFIG_MP3FLOAT_DECODER 0 ++#define CONFIG_MP3_DECODER 1 ++#define CONFIG_MP3ADUFLOAT_DECODER 0 ++#define CONFIG_MP3ADU_DECODER 0 ++#define CONFIG_MP3ON4FLOAT_DECODER 0 ++#define CONFIG_MP3ON4_DECODER 0 ++#define CONFIG_MPC7_DECODER 0 ++#define CONFIG_MPC8_DECODER 0 ++#define CONFIG_NELLYMOSER_DECODER 0 ++#define CONFIG_ON2AVC_DECODER 0 ++#define CONFIG_OPUS_DECODER 0 ++#define CONFIG_PAF_AUDIO_DECODER 0 ++#define CONFIG_QCELP_DECODER 0 ++#define CONFIG_QDM2_DECODER 0 ++#define CONFIG_QDMC_DECODER 0 ++#define CONFIG_RA_144_DECODER 0 ++#define CONFIG_RA_288_DECODER 0 ++#define CONFIG_RALF_DECODER 0 ++#define CONFIG_SBC_DECODER 0 ++#define CONFIG_SHORTEN_DECODER 0 ++#define CONFIG_SIPR_DECODER 0 ++#define CONFIG_SIREN_DECODER 0 ++#define CONFIG_SMACKAUD_DECODER 0 ++#define CONFIG_SONIC_DECODER 0 ++#define CONFIG_TAK_DECODER 0 ++#define CONFIG_TRUEHD_DECODER 0 ++#define CONFIG_TRUESPEECH_DECODER 0 ++#define CONFIG_TTA_DECODER 0 ++#define CONFIG_TWINVQ_DECODER 0 ++#define CONFIG_VMDAUDIO_DECODER 0 ++#define CONFIG_VORBIS_DECODER 1 ++#define CONFIG_WAVPACK_DECODER 0 ++#define CONFIG_WMALOSSLESS_DECODER 0 ++#define CONFIG_WMAPRO_DECODER 0 ++#define CONFIG_WMAV1_DECODER 0 ++#define CONFIG_WMAV2_DECODER 0 ++#define CONFIG_WMAVOICE_DECODER 0 ++#define CONFIG_WS_SND1_DECODER 0 ++#define CONFIG_XMA1_DECODER 0 ++#define CONFIG_XMA2_DECODER 0 ++#define CONFIG_PCM_ALAW_DECODER 1 ++#define CONFIG_PCM_BLURAY_DECODER 0 ++#define CONFIG_PCM_DVD_DECODER 0 ++#define CONFIG_PCM_F16LE_DECODER 0 ++#define CONFIG_PCM_F24LE_DECODER 0 ++#define CONFIG_PCM_F32BE_DECODER 0 ++#define CONFIG_PCM_F32LE_DECODER 1 ++#define CONFIG_PCM_F64BE_DECODER 0 ++#define CONFIG_PCM_F64LE_DECODER 0 ++#define CONFIG_PCM_LXF_DECODER 0 ++#define CONFIG_PCM_MULAW_DECODER 1 ++#define CONFIG_PCM_S8_DECODER 0 ++#define CONFIG_PCM_S8_PLANAR_DECODER 0 ++#define CONFIG_PCM_S16BE_DECODER 1 ++#define CONFIG_PCM_S16BE_PLANAR_DECODER 0 ++#define CONFIG_PCM_S16LE_DECODER 1 ++#define CONFIG_PCM_S16LE_PLANAR_DECODER 0 ++#define CONFIG_PCM_S24BE_DECODER 1 ++#define CONFIG_PCM_S24DAUD_DECODER 0 ++#define CONFIG_PCM_S24LE_DECODER 1 ++#define CONFIG_PCM_S24LE_PLANAR_DECODER 0 ++#define CONFIG_PCM_S32BE_DECODER 0 ++#define CONFIG_PCM_S32LE_DECODER 1 ++#define CONFIG_PCM_S32LE_PLANAR_DECODER 0 ++#define CONFIG_PCM_S64BE_DECODER 0 ++#define CONFIG_PCM_S64LE_DECODER 0 ++#define CONFIG_PCM_U8_DECODER 1 ++#define CONFIG_PCM_U16BE_DECODER 0 ++#define CONFIG_PCM_U16LE_DECODER 0 ++#define CONFIG_PCM_U24BE_DECODER 0 ++#define CONFIG_PCM_U24LE_DECODER 0 ++#define CONFIG_PCM_U32BE_DECODER 0 ++#define CONFIG_PCM_U32LE_DECODER 0 ++#define CONFIG_PCM_VIDC_DECODER 0 ++#define CONFIG_DERF_DPCM_DECODER 0 ++#define CONFIG_GREMLIN_DPCM_DECODER 0 ++#define CONFIG_INTERPLAY_DPCM_DECODER 0 ++#define CONFIG_ROQ_DPCM_DECODER 0 ++#define CONFIG_SDX2_DPCM_DECODER 0 ++#define CONFIG_SOL_DPCM_DECODER 0 ++#define CONFIG_XAN_DPCM_DECODER 0 ++#define CONFIG_ADPCM_4XM_DECODER 0 ++#define CONFIG_ADPCM_ADX_DECODER 0 ++#define CONFIG_ADPCM_AFC_DECODER 0 ++#define CONFIG_ADPCM_AGM_DECODER 0 ++#define CONFIG_ADPCM_AICA_DECODER 0 ++#define CONFIG_ADPCM_ARGO_DECODER 0 ++#define CONFIG_ADPCM_CT_DECODER 0 ++#define CONFIG_ADPCM_DTK_DECODER 0 ++#define CONFIG_ADPCM_EA_DECODER 0 ++#define CONFIG_ADPCM_EA_MAXIS_XA_DECODER 0 ++#define CONFIG_ADPCM_EA_R1_DECODER 0 ++#define CONFIG_ADPCM_EA_R2_DECODER 0 ++#define CONFIG_ADPCM_EA_R3_DECODER 0 ++#define CONFIG_ADPCM_EA_XAS_DECODER 0 ++#define CONFIG_ADPCM_G722_DECODER 0 ++#define CONFIG_ADPCM_G726_DECODER 0 ++#define CONFIG_ADPCM_G726LE_DECODER 0 ++#define CONFIG_ADPCM_IMA_AMV_DECODER 0 ++#define CONFIG_ADPCM_IMA_ALP_DECODER 0 ++#define CONFIG_ADPCM_IMA_APC_DECODER 0 ++#define CONFIG_ADPCM_IMA_APM_DECODER 0 ++#define CONFIG_ADPCM_IMA_CUNNING_DECODER 0 ++#define CONFIG_ADPCM_IMA_DAT4_DECODER 0 ++#define CONFIG_ADPCM_IMA_DK3_DECODER 0 ++#define CONFIG_ADPCM_IMA_DK4_DECODER 0 ++#define CONFIG_ADPCM_IMA_EA_EACS_DECODER 0 ++#define CONFIG_ADPCM_IMA_EA_SEAD_DECODER 0 ++#define CONFIG_ADPCM_IMA_ISS_DECODER 0 ++#define CONFIG_ADPCM_IMA_MTF_DECODER 0 ++#define CONFIG_ADPCM_IMA_OKI_DECODER 0 ++#define CONFIG_ADPCM_IMA_QT_DECODER 0 ++#define CONFIG_ADPCM_IMA_RAD_DECODER 0 ++#define CONFIG_ADPCM_IMA_SSI_DECODER 0 ++#define CONFIG_ADPCM_IMA_SMJPEG_DECODER 0 ++#define CONFIG_ADPCM_IMA_WAV_DECODER 0 ++#define CONFIG_ADPCM_IMA_WS_DECODER 0 ++#define CONFIG_ADPCM_MS_DECODER 0 ++#define CONFIG_ADPCM_MTAF_DECODER 0 ++#define CONFIG_ADPCM_PSX_DECODER 0 ++#define CONFIG_ADPCM_SBPRO_2_DECODER 0 ++#define CONFIG_ADPCM_SBPRO_3_DECODER 0 ++#define CONFIG_ADPCM_SBPRO_4_DECODER 0 ++#define CONFIG_ADPCM_SWF_DECODER 0 ++#define CONFIG_ADPCM_THP_DECODER 0 ++#define CONFIG_ADPCM_THP_LE_DECODER 0 ++#define CONFIG_ADPCM_VIMA_DECODER 0 ++#define CONFIG_ADPCM_XA_DECODER 0 ++#define CONFIG_ADPCM_YAMAHA_DECODER 0 ++#define CONFIG_ADPCM_ZORK_DECODER 0 ++#define CONFIG_SSA_DECODER 0 ++#define CONFIG_ASS_DECODER 0 ++#define CONFIG_CCAPTION_DECODER 0 ++#define CONFIG_DVBSUB_DECODER 0 ++#define CONFIG_DVDSUB_DECODER 0 ++#define CONFIG_JACOSUB_DECODER 0 ++#define CONFIG_MICRODVD_DECODER 0 ++#define CONFIG_MOVTEXT_DECODER 0 ++#define CONFIG_MPL2_DECODER 0 ++#define CONFIG_PGSSUB_DECODER 0 ++#define CONFIG_PJS_DECODER 0 ++#define CONFIG_REALTEXT_DECODER 0 ++#define CONFIG_SAMI_DECODER 0 ++#define CONFIG_SRT_DECODER 0 ++#define CONFIG_STL_DECODER 0 ++#define CONFIG_SUBRIP_DECODER 0 ++#define CONFIG_SUBVIEWER_DECODER 0 ++#define CONFIG_SUBVIEWER1_DECODER 0 ++#define CONFIG_TEXT_DECODER 0 ++#define CONFIG_VPLAYER_DECODER 0 ++#define CONFIG_WEBVTT_DECODER 0 ++#define CONFIG_XSUB_DECODER 0 ++#define CONFIG_AAC_AT_DECODER 0 ++#define CONFIG_AC3_AT_DECODER 0 ++#define CONFIG_ADPCM_IMA_QT_AT_DECODER 0 ++#define CONFIG_ALAC_AT_DECODER 0 ++#define CONFIG_AMR_NB_AT_DECODER 0 ++#define CONFIG_EAC3_AT_DECODER 0 ++#define CONFIG_GSM_MS_AT_DECODER 0 ++#define CONFIG_ILBC_AT_DECODER 0 ++#define CONFIG_MP1_AT_DECODER 0 ++#define CONFIG_MP2_AT_DECODER 0 ++#define CONFIG_MP3_AT_DECODER 0 ++#define CONFIG_PCM_ALAW_AT_DECODER 0 ++#define CONFIG_PCM_MULAW_AT_DECODER 0 ++#define CONFIG_QDMC_AT_DECODER 0 ++#define CONFIG_QDM2_AT_DECODER 0 ++#define CONFIG_LIBARIBB24_DECODER 0 ++#define CONFIG_LIBCELT_DECODER 0 ++#define CONFIG_LIBCODEC2_DECODER 0 ++#define CONFIG_LIBDAV1D_DECODER 0 ++#define CONFIG_LIBDAVS2_DECODER 0 ++#define CONFIG_LIBFDK_AAC_DECODER 0 ++#define CONFIG_LIBGSM_DECODER 0 ++#define CONFIG_LIBGSM_MS_DECODER 0 ++#define CONFIG_LIBILBC_DECODER 0 ++#define CONFIG_LIBOPENCORE_AMRNB_DECODER 0 ++#define CONFIG_LIBOPENCORE_AMRWB_DECODER 0 ++#define CONFIG_LIBOPENJPEG_DECODER 0 ++#define CONFIG_LIBOPUS_DECODER 1 ++#define CONFIG_LIBRSVG_DECODER 0 ++#define CONFIG_LIBSPEEX_DECODER 0 ++#define CONFIG_LIBVORBIS_DECODER 0 ++#define CONFIG_LIBVPX_VP8_DECODER 0 ++#define CONFIG_LIBVPX_VP9_DECODER 0 ++#define CONFIG_LIBZVBI_TELETEXT_DECODER 0 ++#define CONFIG_BINTEXT_DECODER 0 ++#define CONFIG_XBIN_DECODER 0 ++#define CONFIG_IDF_DECODER 0 ++#define CONFIG_LIBAOM_AV1_DECODER 0 ++#define CONFIG_LIBOPENH264_DECODER 0 ++#define CONFIG_H264_CUVID_DECODER 0 ++#define CONFIG_HEVC_CUVID_DECODER 0 ++#define CONFIG_HEVC_MEDIACODEC_DECODER 0 ++#define CONFIG_MJPEG_CUVID_DECODER 0 ++#define CONFIG_MJPEG_QSV_DECODER 0 ++#define CONFIG_MPEG1_CUVID_DECODER 0 ++#define CONFIG_MPEG2_CUVID_DECODER 0 ++#define CONFIG_MPEG4_CUVID_DECODER 0 ++#define CONFIG_MPEG4_MEDIACODEC_DECODER 0 ++#define CONFIG_VC1_CUVID_DECODER 0 ++#define CONFIG_VP8_CUVID_DECODER 0 ++#define CONFIG_VP8_MEDIACODEC_DECODER 0 ++#define CONFIG_VP8_QSV_DECODER 0 ++#define CONFIG_VP9_CUVID_DECODER 0 ++#define CONFIG_VP9_MEDIACODEC_DECODER 0 ++#define CONFIG_VP9_QSV_DECODER 0 ++#define CONFIG_A64MULTI_ENCODER 0 ++#define CONFIG_A64MULTI5_ENCODER 0 ++#define CONFIG_ALIAS_PIX_ENCODER 0 ++#define CONFIG_AMV_ENCODER 0 ++#define CONFIG_APNG_ENCODER 0 ++#define CONFIG_ASV1_ENCODER 0 ++#define CONFIG_ASV2_ENCODER 0 ++#define CONFIG_AVRP_ENCODER 0 ++#define CONFIG_AVUI_ENCODER 0 ++#define CONFIG_AYUV_ENCODER 0 ++#define CONFIG_BMP_ENCODER 0 ++#define CONFIG_CINEPAK_ENCODER 0 ++#define CONFIG_CLJR_ENCODER 0 ++#define CONFIG_COMFORTNOISE_ENCODER 0 ++#define CONFIG_DNXHD_ENCODER 0 ++#define CONFIG_DPX_ENCODER 0 ++#define CONFIG_DVVIDEO_ENCODER 0 ++#define CONFIG_FFV1_ENCODER 0 ++#define CONFIG_FFVHUFF_ENCODER 0 ++#define CONFIG_FITS_ENCODER 0 ++#define CONFIG_FLASHSV_ENCODER 0 ++#define CONFIG_FLASHSV2_ENCODER 0 ++#define CONFIG_FLV_ENCODER 0 ++#define CONFIG_GIF_ENCODER 0 ++#define CONFIG_H261_ENCODER 0 ++#define CONFIG_H263_ENCODER 0 ++#define CONFIG_H263P_ENCODER 0 ++#define CONFIG_HAP_ENCODER 0 ++#define CONFIG_HUFFYUV_ENCODER 0 ++#define CONFIG_JPEG2000_ENCODER 0 ++#define CONFIG_JPEGLS_ENCODER 0 ++#define CONFIG_LJPEG_ENCODER 0 ++#define CONFIG_MAGICYUV_ENCODER 0 ++#define CONFIG_MJPEG_ENCODER 0 ++#define CONFIG_MPEG1VIDEO_ENCODER 0 ++#define CONFIG_MPEG2VIDEO_ENCODER 0 ++#define CONFIG_MPEG4_ENCODER 0 ++#define CONFIG_MSMPEG4V2_ENCODER 0 ++#define CONFIG_MSMPEG4V3_ENCODER 0 ++#define CONFIG_MSVIDEO1_ENCODER 0 ++#define CONFIG_PAM_ENCODER 0 ++#define CONFIG_PBM_ENCODER 0 ++#define CONFIG_PCX_ENCODER 0 ++#define CONFIG_PGM_ENCODER 0 ++#define CONFIG_PGMYUV_ENCODER 0 ++#define CONFIG_PNG_ENCODER 0 ++#define CONFIG_PPM_ENCODER 0 ++#define CONFIG_PRORES_ENCODER 0 ++#define CONFIG_PRORES_AW_ENCODER 0 ++#define CONFIG_PRORES_KS_ENCODER 0 ++#define CONFIG_QTRLE_ENCODER 0 ++#define CONFIG_R10K_ENCODER 0 ++#define CONFIG_R210_ENCODER 0 ++#define CONFIG_RAWVIDEO_ENCODER 0 ++#define CONFIG_ROQ_ENCODER 0 ++#define CONFIG_RV10_ENCODER 0 ++#define CONFIG_RV20_ENCODER 0 ++#define CONFIG_S302M_ENCODER 0 ++#define CONFIG_SGI_ENCODER 0 ++#define CONFIG_SNOW_ENCODER 0 ++#define CONFIG_SUNRAST_ENCODER 0 ++#define CONFIG_SVQ1_ENCODER 0 ++#define CONFIG_TARGA_ENCODER 0 ++#define CONFIG_TIFF_ENCODER 0 ++#define CONFIG_UTVIDEO_ENCODER 0 ++#define CONFIG_V210_ENCODER 0 ++#define CONFIG_V308_ENCODER 0 ++#define CONFIG_V408_ENCODER 0 ++#define CONFIG_V410_ENCODER 0 ++#define CONFIG_VC2_ENCODER 0 ++#define CONFIG_WRAPPED_AVFRAME_ENCODER 0 ++#define CONFIG_WMV1_ENCODER 0 ++#define CONFIG_WMV2_ENCODER 0 ++#define CONFIG_XBM_ENCODER 0 ++#define CONFIG_XFACE_ENCODER 0 ++#define CONFIG_XWD_ENCODER 0 ++#define CONFIG_Y41P_ENCODER 0 ++#define CONFIG_YUV4_ENCODER 0 ++#define CONFIG_ZLIB_ENCODER 0 ++#define CONFIG_ZMBV_ENCODER 0 ++#define CONFIG_AAC_ENCODER 0 ++#define CONFIG_AC3_ENCODER 0 ++#define CONFIG_AC3_FIXED_ENCODER 0 ++#define CONFIG_ALAC_ENCODER 0 ++#define CONFIG_APTX_ENCODER 0 ++#define CONFIG_APTX_HD_ENCODER 0 ++#define CONFIG_DCA_ENCODER 0 ++#define CONFIG_EAC3_ENCODER 0 ++#define CONFIG_FLAC_ENCODER 0 ++#define CONFIG_G723_1_ENCODER 0 ++#define CONFIG_MLP_ENCODER 0 ++#define CONFIG_MP2_ENCODER 0 ++#define CONFIG_MP2FIXED_ENCODER 0 ++#define CONFIG_NELLYMOSER_ENCODER 0 ++#define CONFIG_OPUS_ENCODER 0 ++#define CONFIG_RA_144_ENCODER 0 ++#define CONFIG_SBC_ENCODER 0 ++#define CONFIG_SONIC_ENCODER 0 ++#define CONFIG_SONIC_LS_ENCODER 0 ++#define CONFIG_TRUEHD_ENCODER 0 ++#define CONFIG_TTA_ENCODER 0 ++#define CONFIG_VORBIS_ENCODER 0 ++#define CONFIG_WAVPACK_ENCODER 0 ++#define CONFIG_WMAV1_ENCODER 0 ++#define CONFIG_WMAV2_ENCODER 0 ++#define CONFIG_PCM_ALAW_ENCODER 0 ++#define CONFIG_PCM_DVD_ENCODER 0 ++#define CONFIG_PCM_F32BE_ENCODER 0 ++#define CONFIG_PCM_F32LE_ENCODER 0 ++#define CONFIG_PCM_F64BE_ENCODER 0 ++#define CONFIG_PCM_F64LE_ENCODER 0 ++#define CONFIG_PCM_MULAW_ENCODER 0 ++#define CONFIG_PCM_S8_ENCODER 0 ++#define CONFIG_PCM_S8_PLANAR_ENCODER 0 ++#define CONFIG_PCM_S16BE_ENCODER 0 ++#define CONFIG_PCM_S16BE_PLANAR_ENCODER 0 ++#define CONFIG_PCM_S16LE_ENCODER 0 ++#define CONFIG_PCM_S16LE_PLANAR_ENCODER 0 ++#define CONFIG_PCM_S24BE_ENCODER 0 ++#define CONFIG_PCM_S24DAUD_ENCODER 0 ++#define CONFIG_PCM_S24LE_ENCODER 0 ++#define CONFIG_PCM_S24LE_PLANAR_ENCODER 0 ++#define CONFIG_PCM_S32BE_ENCODER 0 ++#define CONFIG_PCM_S32LE_ENCODER 0 ++#define CONFIG_PCM_S32LE_PLANAR_ENCODER 0 ++#define CONFIG_PCM_S64BE_ENCODER 0 ++#define CONFIG_PCM_S64LE_ENCODER 0 ++#define CONFIG_PCM_U8_ENCODER 0 ++#define CONFIG_PCM_U16BE_ENCODER 0 ++#define CONFIG_PCM_U16LE_ENCODER 0 ++#define CONFIG_PCM_U24BE_ENCODER 0 ++#define CONFIG_PCM_U24LE_ENCODER 0 ++#define CONFIG_PCM_U32BE_ENCODER 0 ++#define CONFIG_PCM_U32LE_ENCODER 0 ++#define CONFIG_PCM_VIDC_ENCODER 0 ++#define CONFIG_ROQ_DPCM_ENCODER 0 ++#define CONFIG_ADPCM_ADX_ENCODER 0 ++#define CONFIG_ADPCM_G722_ENCODER 0 ++#define CONFIG_ADPCM_G726_ENCODER 0 ++#define CONFIG_ADPCM_G726LE_ENCODER 0 ++#define CONFIG_ADPCM_IMA_QT_ENCODER 0 ++#define CONFIG_ADPCM_IMA_WAV_ENCODER 0 ++#define CONFIG_ADPCM_MS_ENCODER 0 ++#define CONFIG_ADPCM_SWF_ENCODER 0 ++#define CONFIG_ADPCM_YAMAHA_ENCODER 0 ++#define CONFIG_SSA_ENCODER 0 ++#define CONFIG_ASS_ENCODER 0 ++#define CONFIG_DVBSUB_ENCODER 0 ++#define CONFIG_DVDSUB_ENCODER 0 ++#define CONFIG_MOVTEXT_ENCODER 0 ++#define CONFIG_SRT_ENCODER 0 ++#define CONFIG_SUBRIP_ENCODER 0 ++#define CONFIG_TEXT_ENCODER 0 ++#define CONFIG_WEBVTT_ENCODER 0 ++#define CONFIG_XSUB_ENCODER 0 ++#define CONFIG_AAC_AT_ENCODER 0 ++#define CONFIG_ALAC_AT_ENCODER 0 ++#define CONFIG_ILBC_AT_ENCODER 0 ++#define CONFIG_PCM_ALAW_AT_ENCODER 0 ++#define CONFIG_PCM_MULAW_AT_ENCODER 0 ++#define CONFIG_LIBAOM_AV1_ENCODER 0 ++#define CONFIG_LIBCODEC2_ENCODER 0 ++#define CONFIG_LIBFDK_AAC_ENCODER 0 ++#define CONFIG_LIBGSM_ENCODER 0 ++#define CONFIG_LIBGSM_MS_ENCODER 0 ++#define CONFIG_LIBILBC_ENCODER 0 ++#define CONFIG_LIBMP3LAME_ENCODER 0 ++#define CONFIG_LIBOPENCORE_AMRNB_ENCODER 0 ++#define CONFIG_LIBOPENJPEG_ENCODER 0 ++#define CONFIG_LIBOPUS_ENCODER 0 ++#define CONFIG_LIBRAV1E_ENCODER 0 ++#define CONFIG_LIBSHINE_ENCODER 0 ++#define CONFIG_LIBSPEEX_ENCODER 0 ++#define CONFIG_LIBTHEORA_ENCODER 0 ++#define CONFIG_LIBTWOLAME_ENCODER 0 ++#define CONFIG_LIBVO_AMRWBENC_ENCODER 0 ++#define CONFIG_LIBVORBIS_ENCODER 0 ++#define CONFIG_LIBVPX_VP8_ENCODER 0 ++#define CONFIG_LIBVPX_VP9_ENCODER 0 ++#define CONFIG_LIBWAVPACK_ENCODER 0 ++#define CONFIG_LIBWEBP_ANIM_ENCODER 0 ++#define CONFIG_LIBWEBP_ENCODER 0 ++#define CONFIG_LIBX262_ENCODER 0 ++#define CONFIG_LIBX264_ENCODER 0 ++#define CONFIG_LIBX264RGB_ENCODER 0 ++#define CONFIG_LIBX265_ENCODER 0 ++#define CONFIG_LIBXAVS_ENCODER 0 ++#define CONFIG_LIBXAVS2_ENCODER 0 ++#define CONFIG_LIBXVID_ENCODER 0 ++#define CONFIG_H263_V4L2M2M_ENCODER 0 ++#define CONFIG_LIBOPENH264_ENCODER 0 ++#define CONFIG_H264_AMF_ENCODER 0 ++#define CONFIG_H264_NVENC_ENCODER 0 ++#define CONFIG_H264_OMX_ENCODER 0 ++#define CONFIG_H264_QSV_ENCODER 0 ++#define CONFIG_H264_V4L2M2M_ENCODER 0 ++#define CONFIG_H264_VAAPI_ENCODER 0 ++#define CONFIG_H264_VIDEOTOOLBOX_ENCODER 0 ++#define CONFIG_NVENC_ENCODER 0 ++#define CONFIG_NVENC_H264_ENCODER 0 ++#define CONFIG_NVENC_HEVC_ENCODER 0 ++#define CONFIG_HEVC_AMF_ENCODER 0 ++#define CONFIG_HEVC_NVENC_ENCODER 0 ++#define CONFIG_HEVC_QSV_ENCODER 0 ++#define CONFIG_HEVC_V4L2M2M_ENCODER 0 ++#define CONFIG_HEVC_VAAPI_ENCODER 0 ++#define CONFIG_HEVC_VIDEOTOOLBOX_ENCODER 0 ++#define CONFIG_LIBKVAZAAR_ENCODER 0 ++#define CONFIG_MJPEG_QSV_ENCODER 0 ++#define CONFIG_MJPEG_VAAPI_ENCODER 0 ++#define CONFIG_MPEG2_QSV_ENCODER 0 ++#define CONFIG_MPEG2_VAAPI_ENCODER 0 ++#define CONFIG_MPEG4_OMX_ENCODER 0 ++#define CONFIG_MPEG4_V4L2M2M_ENCODER 0 ++#define CONFIG_VP8_V4L2M2M_ENCODER 0 ++#define CONFIG_VP8_VAAPI_ENCODER 0 ++#define CONFIG_VP9_VAAPI_ENCODER 0 ++#define CONFIG_VP9_QSV_ENCODER 0 ++#define CONFIG_H263_VAAPI_HWACCEL 0 ++#define CONFIG_H263_VIDEOTOOLBOX_HWACCEL 0 ++#define CONFIG_H264_D3D11VA_HWACCEL 0 ++#define CONFIG_H264_D3D11VA2_HWACCEL 0 ++#define CONFIG_H264_DXVA2_HWACCEL 0 ++#define CONFIG_H264_NVDEC_HWACCEL 0 ++#define CONFIG_H264_VAAPI_HWACCEL 0 ++#define CONFIG_H264_VDPAU_HWACCEL 0 ++#define CONFIG_H264_VIDEOTOOLBOX_HWACCEL 0 ++#define CONFIG_HEVC_D3D11VA_HWACCEL 0 ++#define CONFIG_HEVC_D3D11VA2_HWACCEL 0 ++#define CONFIG_HEVC_DXVA2_HWACCEL 0 ++#define CONFIG_HEVC_NVDEC_HWACCEL 0 ++#define CONFIG_HEVC_VAAPI_HWACCEL 0 ++#define CONFIG_HEVC_VDPAU_HWACCEL 0 ++#define CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL 0 ++#define CONFIG_MJPEG_NVDEC_HWACCEL 0 ++#define CONFIG_MJPEG_VAAPI_HWACCEL 0 ++#define CONFIG_MPEG1_NVDEC_HWACCEL 0 ++#define CONFIG_MPEG1_VDPAU_HWACCEL 0 ++#define CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL 0 ++#define CONFIG_MPEG1_XVMC_HWACCEL 0 ++#define CONFIG_MPEG2_D3D11VA_HWACCEL 0 ++#define CONFIG_MPEG2_D3D11VA2_HWACCEL 0 ++#define CONFIG_MPEG2_NVDEC_HWACCEL 0 ++#define CONFIG_MPEG2_DXVA2_HWACCEL 0 ++#define CONFIG_MPEG2_VAAPI_HWACCEL 0 ++#define CONFIG_MPEG2_VDPAU_HWACCEL 0 ++#define CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL 0 ++#define CONFIG_MPEG2_XVMC_HWACCEL 0 ++#define CONFIG_MPEG4_NVDEC_HWACCEL 0 ++#define CONFIG_MPEG4_VAAPI_HWACCEL 0 ++#define CONFIG_MPEG4_VDPAU_HWACCEL 0 ++#define CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL 0 ++#define CONFIG_VC1_D3D11VA_HWACCEL 0 ++#define CONFIG_VC1_D3D11VA2_HWACCEL 0 ++#define CONFIG_VC1_DXVA2_HWACCEL 0 ++#define CONFIG_VC1_NVDEC_HWACCEL 0 ++#define CONFIG_VC1_VAAPI_HWACCEL 0 ++#define CONFIG_VC1_VDPAU_HWACCEL 0 ++#define CONFIG_VP8_NVDEC_HWACCEL 0 ++#define CONFIG_VP8_VAAPI_HWACCEL 0 ++#define CONFIG_VP9_D3D11VA_HWACCEL 0 ++#define CONFIG_VP9_D3D11VA2_HWACCEL 0 ++#define CONFIG_VP9_DXVA2_HWACCEL 0 ++#define CONFIG_VP9_NVDEC_HWACCEL 0 ++#define CONFIG_VP9_VAAPI_HWACCEL 0 ++#define CONFIG_VP9_VDPAU_HWACCEL 0 ++#define CONFIG_WMV3_D3D11VA_HWACCEL 0 ++#define CONFIG_WMV3_D3D11VA2_HWACCEL 0 ++#define CONFIG_WMV3_DXVA2_HWACCEL 0 ++#define CONFIG_WMV3_NVDEC_HWACCEL 0 ++#define CONFIG_WMV3_VAAPI_HWACCEL 0 ++#define CONFIG_WMV3_VDPAU_HWACCEL 0 ++#define CONFIG_AAC_PARSER 1 ++#define CONFIG_AAC_LATM_PARSER 0 ++#define CONFIG_AC3_PARSER 0 ++#define CONFIG_ADX_PARSER 0 ++#define CONFIG_AV1_PARSER 0 ++#define CONFIG_AVS2_PARSER 0 ++#define CONFIG_BMP_PARSER 0 ++#define CONFIG_CAVSVIDEO_PARSER 0 ++#define CONFIG_COOK_PARSER 0 ++#define CONFIG_DCA_PARSER 0 ++#define CONFIG_DIRAC_PARSER 0 ++#define CONFIG_DNXHD_PARSER 0 ++#define CONFIG_DPX_PARSER 0 ++#define CONFIG_DVAUDIO_PARSER 0 ++#define CONFIG_DVBSUB_PARSER 0 ++#define CONFIG_DVDSUB_PARSER 0 ++#define CONFIG_DVD_NAV_PARSER 0 ++#define CONFIG_FLAC_PARSER 1 ++#define CONFIG_G723_1_PARSER 0 ++#define CONFIG_G729_PARSER 0 ++#define CONFIG_GIF_PARSER 0 ++#define CONFIG_GSM_PARSER 0 ++#define CONFIG_H261_PARSER 0 ++#define CONFIG_H263_PARSER 0 ++#define CONFIG_H264_PARSER 1 ++#define CONFIG_HEVC_PARSER 0 ++#define CONFIG_MJPEG_PARSER 0 ++#define CONFIG_MLP_PARSER 0 ++#define CONFIG_MPEG4VIDEO_PARSER 0 ++#define CONFIG_MPEGAUDIO_PARSER 1 ++#define CONFIG_MPEGVIDEO_PARSER 0 ++#define CONFIG_OPUS_PARSER 1 ++#define CONFIG_PNG_PARSER 0 ++#define CONFIG_PNM_PARSER 0 ++#define CONFIG_RV30_PARSER 0 ++#define CONFIG_RV40_PARSER 0 ++#define CONFIG_SBC_PARSER 0 ++#define CONFIG_SIPR_PARSER 0 ++#define CONFIG_TAK_PARSER 0 ++#define CONFIG_VC1_PARSER 0 ++#define CONFIG_VORBIS_PARSER 1 ++#define CONFIG_VP3_PARSER 1 ++#define CONFIG_VP8_PARSER 1 ++#define CONFIG_VP9_PARSER 1 ++#define CONFIG_WEBP_PARSER 0 ++#define CONFIG_XMA_PARSER 0 ++#define CONFIG_ALSA_INDEV 0 ++#define CONFIG_ANDROID_CAMERA_INDEV 0 ++#define CONFIG_AVFOUNDATION_INDEV 0 ++#define CONFIG_BKTR_INDEV 0 ++#define CONFIG_DECKLINK_INDEV 0 ++#define CONFIG_DSHOW_INDEV 0 ++#define CONFIG_FBDEV_INDEV 0 ++#define CONFIG_GDIGRAB_INDEV 0 ++#define CONFIG_IEC61883_INDEV 0 ++#define CONFIG_JACK_INDEV 0 ++#define CONFIG_KMSGRAB_INDEV 0 ++#define CONFIG_LAVFI_INDEV 0 ++#define CONFIG_OPENAL_INDEV 0 ++#define CONFIG_OSS_INDEV 0 ++#define CONFIG_PULSE_INDEV 0 ++#define CONFIG_SNDIO_INDEV 0 ++#define CONFIG_V4L2_INDEV 0 ++#define CONFIG_VFWCAP_INDEV 0 ++#define CONFIG_XCBGRAB_INDEV 0 ++#define CONFIG_LIBCDIO_INDEV 0 ++#define CONFIG_LIBDC1394_INDEV 0 ++#define CONFIG_ALSA_OUTDEV 0 ++#define CONFIG_CACA_OUTDEV 0 ++#define CONFIG_DECKLINK_OUTDEV 0 ++#define CONFIG_FBDEV_OUTDEV 0 ++#define CONFIG_OPENGL_OUTDEV 0 ++#define CONFIG_OSS_OUTDEV 0 ++#define CONFIG_PULSE_OUTDEV 0 ++#define CONFIG_SDL2_OUTDEV 0 ++#define CONFIG_SNDIO_OUTDEV 0 ++#define CONFIG_V4L2_OUTDEV 0 ++#define CONFIG_XV_OUTDEV 0 ++#define CONFIG_ABENCH_FILTER 0 ++#define CONFIG_ACOMPRESSOR_FILTER 0 ++#define CONFIG_ACONTRAST_FILTER 0 ++#define CONFIG_ACOPY_FILTER 0 ++#define CONFIG_ACUE_FILTER 0 ++#define CONFIG_ACROSSFADE_FILTER 0 ++#define CONFIG_ACROSSOVER_FILTER 0 ++#define CONFIG_ACRUSHER_FILTER 0 ++#define CONFIG_ADECLICK_FILTER 0 ++#define CONFIG_ADECLIP_FILTER 0 ++#define CONFIG_ADELAY_FILTER 0 ++#define CONFIG_ADERIVATIVE_FILTER 0 ++#define CONFIG_AECHO_FILTER 0 ++#define CONFIG_AEMPHASIS_FILTER 0 ++#define CONFIG_AEVAL_FILTER 0 ++#define CONFIG_AFADE_FILTER 0 ++#define CONFIG_AFFTDN_FILTER 0 ++#define CONFIG_AFFTFILT_FILTER 0 ++#define CONFIG_AFIR_FILTER 0 ++#define CONFIG_AFORMAT_FILTER 0 ++#define CONFIG_AGATE_FILTER 0 ++#define CONFIG_AIIR_FILTER 0 ++#define CONFIG_AINTEGRAL_FILTER 0 ++#define CONFIG_AINTERLEAVE_FILTER 0 ++#define CONFIG_ALIMITER_FILTER 0 ++#define CONFIG_ALLPASS_FILTER 0 ++#define CONFIG_ALOOP_FILTER 0 ++#define CONFIG_AMERGE_FILTER 0 ++#define CONFIG_AMETADATA_FILTER 0 ++#define CONFIG_AMIX_FILTER 0 ++#define CONFIG_AMULTIPLY_FILTER 0 ++#define CONFIG_ANEQUALIZER_FILTER 0 ++#define CONFIG_ANLMDN_FILTER 0 ++#define CONFIG_ANLMS_FILTER 0 ++#define CONFIG_ANULL_FILTER 0 ++#define CONFIG_APAD_FILTER 0 ++#define CONFIG_APERMS_FILTER 0 ++#define CONFIG_APHASER_FILTER 0 ++#define CONFIG_APULSATOR_FILTER 0 ++#define CONFIG_AREALTIME_FILTER 0 ++#define CONFIG_ARESAMPLE_FILTER 0 ++#define CONFIG_AREVERSE_FILTER 0 ++#define CONFIG_ARNNDN_FILTER 0 ++#define CONFIG_ASELECT_FILTER 0 ++#define CONFIG_ASENDCMD_FILTER 0 ++#define CONFIG_ASETNSAMPLES_FILTER 0 ++#define CONFIG_ASETPTS_FILTER 0 ++#define CONFIG_ASETRATE_FILTER 0 ++#define CONFIG_ASETTB_FILTER 0 ++#define CONFIG_ASHOWINFO_FILTER 0 ++#define CONFIG_ASIDEDATA_FILTER 0 ++#define CONFIG_ASOFTCLIP_FILTER 0 ++#define CONFIG_ASPLIT_FILTER 0 ++#define CONFIG_ASR_FILTER 0 ++#define CONFIG_ASTATS_FILTER 0 ++#define CONFIG_ASTREAMSELECT_FILTER 0 ++#define CONFIG_ATEMPO_FILTER 0 ++#define CONFIG_ATRIM_FILTER 0 ++#define CONFIG_AXCORRELATE_FILTER 0 ++#define CONFIG_AZMQ_FILTER 0 ++#define CONFIG_BANDPASS_FILTER 0 ++#define CONFIG_BANDREJECT_FILTER 0 ++#define CONFIG_BASS_FILTER 0 ++#define CONFIG_BIQUAD_FILTER 0 ++#define CONFIG_BS2B_FILTER 0 ++#define CONFIG_CHROMABER_VULKAN_FILTER 0 ++#define CONFIG_CHANNELMAP_FILTER 0 ++#define CONFIG_CHANNELSPLIT_FILTER 0 ++#define CONFIG_CHORUS_FILTER 0 ++#define CONFIG_COMPAND_FILTER 0 ++#define CONFIG_COMPENSATIONDELAY_FILTER 0 ++#define CONFIG_CROSSFEED_FILTER 0 ++#define CONFIG_CRYSTALIZER_FILTER 0 ++#define CONFIG_DCSHIFT_FILTER 0 ++#define CONFIG_DEESSER_FILTER 0 ++#define CONFIG_DRMETER_FILTER 0 ++#define CONFIG_DYNAUDNORM_FILTER 0 ++#define CONFIG_EARWAX_FILTER 0 ++#define CONFIG_EBUR128_FILTER 0 ++#define CONFIG_EQUALIZER_FILTER 0 ++#define CONFIG_EXTRASTEREO_FILTER 0 ++#define CONFIG_FIREQUALIZER_FILTER 0 ++#define CONFIG_FLANGER_FILTER 0 ++#define CONFIG_HAAS_FILTER 0 ++#define CONFIG_HDCD_FILTER 0 ++#define CONFIG_HEADPHONE_FILTER 0 ++#define CONFIG_HIGHPASS_FILTER 0 ++#define CONFIG_HIGHSHELF_FILTER 0 ++#define CONFIG_JOIN_FILTER 0 ++#define CONFIG_LADSPA_FILTER 0 ++#define CONFIG_LOUDNORM_FILTER 0 ++#define CONFIG_LOWPASS_FILTER 0 ++#define CONFIG_LOWSHELF_FILTER 0 ++#define CONFIG_LV2_FILTER 0 ++#define CONFIG_MCOMPAND_FILTER 0 ++#define CONFIG_PAN_FILTER 0 ++#define CONFIG_REPLAYGAIN_FILTER 0 ++#define CONFIG_RESAMPLE_FILTER 0 ++#define CONFIG_RUBBERBAND_FILTER 0 ++#define CONFIG_SIDECHAINCOMPRESS_FILTER 0 ++#define CONFIG_SIDECHAINGATE_FILTER 0 ++#define CONFIG_SILENCEDETECT_FILTER 0 ++#define CONFIG_SILENCEREMOVE_FILTER 0 ++#define CONFIG_SOFALIZER_FILTER 0 ++#define CONFIG_STEREOTOOLS_FILTER 0 ++#define CONFIG_STEREOWIDEN_FILTER 0 ++#define CONFIG_SUPEREQUALIZER_FILTER 0 ++#define CONFIG_SURROUND_FILTER 0 ++#define CONFIG_TREBLE_FILTER 0 ++#define CONFIG_TREMOLO_FILTER 0 ++#define CONFIG_VIBRATO_FILTER 0 ++#define CONFIG_VOLUME_FILTER 0 ++#define CONFIG_VOLUMEDETECT_FILTER 0 ++#define CONFIG_AEVALSRC_FILTER 0 ++#define CONFIG_AFIRSRC_FILTER 0 ++#define CONFIG_ANOISESRC_FILTER 0 ++#define CONFIG_ANULLSRC_FILTER 0 ++#define CONFIG_FLITE_FILTER 0 ++#define CONFIG_HILBERT_FILTER 0 ++#define CONFIG_SINC_FILTER 0 ++#define CONFIG_SINE_FILTER 0 ++#define CONFIG_ANULLSINK_FILTER 0 ++#define CONFIG_ADDROI_FILTER 0 ++#define CONFIG_ALPHAEXTRACT_FILTER 0 ++#define CONFIG_ALPHAMERGE_FILTER 0 ++#define CONFIG_AMPLIFY_FILTER 0 ++#define CONFIG_ASS_FILTER 0 ++#define CONFIG_ATADENOISE_FILTER 0 ++#define CONFIG_AVGBLUR_FILTER 0 ++#define CONFIG_AVGBLUR_OPENCL_FILTER 0 ++#define CONFIG_AVGBLUR_VULKAN_FILTER 0 ++#define CONFIG_BBOX_FILTER 0 ++#define CONFIG_BENCH_FILTER 0 ++#define CONFIG_BILATERAL_FILTER 0 ++#define CONFIG_BITPLANENOISE_FILTER 0 ++#define CONFIG_BLACKDETECT_FILTER 0 ++#define CONFIG_BLACKFRAME_FILTER 0 ++#define CONFIG_BLEND_FILTER 0 ++#define CONFIG_BM3D_FILTER 0 ++#define CONFIG_BOXBLUR_FILTER 0 ++#define CONFIG_BOXBLUR_OPENCL_FILTER 0 ++#define CONFIG_BWDIF_FILTER 0 ++#define CONFIG_CAS_FILTER 0 ++#define CONFIG_CHROMAHOLD_FILTER 0 ++#define CONFIG_CHROMAKEY_FILTER 0 ++#define CONFIG_CHROMASHIFT_FILTER 0 ++#define CONFIG_CIESCOPE_FILTER 0 ++#define CONFIG_CODECVIEW_FILTER 0 ++#define CONFIG_COLORBALANCE_FILTER 0 ++#define CONFIG_COLORCHANNELMIXER_FILTER 0 ++#define CONFIG_COLORKEY_FILTER 0 ++#define CONFIG_COLORKEY_OPENCL_FILTER 0 ++#define CONFIG_COLORHOLD_FILTER 0 ++#define CONFIG_COLORLEVELS_FILTER 0 ++#define CONFIG_COLORMATRIX_FILTER 0 ++#define CONFIG_COLORSPACE_FILTER 0 ++#define CONFIG_CONVOLUTION_FILTER 0 ++#define CONFIG_CONVOLUTION_OPENCL_FILTER 0 ++#define CONFIG_CONVOLVE_FILTER 0 ++#define CONFIG_COPY_FILTER 0 ++#define CONFIG_COREIMAGE_FILTER 0 ++#define CONFIG_COVER_RECT_FILTER 0 ++#define CONFIG_CROP_FILTER 0 ++#define CONFIG_CROPDETECT_FILTER 0 ++#define CONFIG_CUE_FILTER 0 ++#define CONFIG_CURVES_FILTER 0 ++#define CONFIG_DATASCOPE_FILTER 0 ++#define CONFIG_DCTDNOIZ_FILTER 0 ++#define CONFIG_DEBAND_FILTER 0 ++#define CONFIG_DEBLOCK_FILTER 0 ++#define CONFIG_DECIMATE_FILTER 0 ++#define CONFIG_DECONVOLVE_FILTER 0 ++#define CONFIG_DEDOT_FILTER 0 ++#define CONFIG_DEFLATE_FILTER 0 ++#define CONFIG_DEFLICKER_FILTER 0 ++#define CONFIG_DEINTERLACE_QSV_FILTER 0 ++#define CONFIG_DEINTERLACE_VAAPI_FILTER 0 ++#define CONFIG_DEJUDDER_FILTER 0 ++#define CONFIG_DELOGO_FILTER 0 ++#define CONFIG_DENOISE_VAAPI_FILTER 0 ++#define CONFIG_DERAIN_FILTER 0 ++#define CONFIG_DESHAKE_FILTER 0 ++#define CONFIG_DESHAKE_OPENCL_FILTER 0 ++#define CONFIG_DESPILL_FILTER 0 ++#define CONFIG_DETELECINE_FILTER 0 ++#define CONFIG_DILATION_FILTER 0 ++#define CONFIG_DILATION_OPENCL_FILTER 0 ++#define CONFIG_DISPLACE_FILTER 0 ++#define CONFIG_DNN_PROCESSING_FILTER 0 ++#define CONFIG_DOUBLEWEAVE_FILTER 0 ++#define CONFIG_DRAWBOX_FILTER 0 ++#define CONFIG_DRAWGRAPH_FILTER 0 ++#define CONFIG_DRAWGRID_FILTER 0 ++#define CONFIG_DRAWTEXT_FILTER 0 ++#define CONFIG_EDGEDETECT_FILTER 0 ++#define CONFIG_ELBG_FILTER 0 ++#define CONFIG_ENTROPY_FILTER 0 ++#define CONFIG_EQ_FILTER 0 ++#define CONFIG_EROSION_FILTER 0 ++#define CONFIG_EROSION_OPENCL_FILTER 0 ++#define CONFIG_EXTRACTPLANES_FILTER 0 ++#define CONFIG_FADE_FILTER 0 ++#define CONFIG_FFTDNOIZ_FILTER 0 ++#define CONFIG_FFTFILT_FILTER 0 ++#define CONFIG_FIELD_FILTER 0 ++#define CONFIG_FIELDHINT_FILTER 0 ++#define CONFIG_FIELDMATCH_FILTER 0 ++#define CONFIG_FIELDORDER_FILTER 0 ++#define CONFIG_FILLBORDERS_FILTER 0 ++#define CONFIG_FIND_RECT_FILTER 0 ++#define CONFIG_FLOODFILL_FILTER 0 ++#define CONFIG_FORMAT_FILTER 0 ++#define CONFIG_FPS_FILTER 0 ++#define CONFIG_FRAMEPACK_FILTER 0 ++#define CONFIG_FRAMERATE_FILTER 0 ++#define CONFIG_FRAMESTEP_FILTER 0 ++#define CONFIG_FREEZEDETECT_FILTER 0 ++#define CONFIG_FREEZEFRAMES_FILTER 0 ++#define CONFIG_FREI0R_FILTER 0 ++#define CONFIG_FSPP_FILTER 0 ++#define CONFIG_GBLUR_FILTER 0 ++#define CONFIG_GEQ_FILTER 0 ++#define CONFIG_GRADFUN_FILTER 0 ++#define CONFIG_GRAPHMONITOR_FILTER 0 ++#define CONFIG_GREYEDGE_FILTER 0 ++#define CONFIG_HALDCLUT_FILTER 0 ++#define CONFIG_HFLIP_FILTER 0 ++#define CONFIG_HISTEQ_FILTER 0 ++#define CONFIG_HISTOGRAM_FILTER 0 ++#define CONFIG_HQDN3D_FILTER 0 ++#define CONFIG_HQX_FILTER 0 ++#define CONFIG_HSTACK_FILTER 0 ++#define CONFIG_HUE_FILTER 0 ++#define CONFIG_HWDOWNLOAD_FILTER 0 ++#define CONFIG_HWMAP_FILTER 0 ++#define CONFIG_HWUPLOAD_FILTER 0 ++#define CONFIG_HWUPLOAD_CUDA_FILTER 0 ++#define CONFIG_HYSTERESIS_FILTER 0 ++#define CONFIG_IDET_FILTER 0 ++#define CONFIG_IL_FILTER 0 ++#define CONFIG_INFLATE_FILTER 0 ++#define CONFIG_INTERLACE_FILTER 0 ++#define CONFIG_INTERLEAVE_FILTER 0 ++#define CONFIG_KERNDEINT_FILTER 0 ++#define CONFIG_LAGFUN_FILTER 0 ++#define CONFIG_LENSCORRECTION_FILTER 0 ++#define CONFIG_LENSFUN_FILTER 0 ++#define CONFIG_LIBVMAF_FILTER 0 ++#define CONFIG_LIMITER_FILTER 0 ++#define CONFIG_LOOP_FILTER 0 ++#define CONFIG_LUMAKEY_FILTER 0 ++#define CONFIG_LUT_FILTER 0 ++#define CONFIG_LUT1D_FILTER 0 ++#define CONFIG_LUT2_FILTER 0 ++#define CONFIG_LUT3D_FILTER 0 ++#define CONFIG_LUTRGB_FILTER 0 ++#define CONFIG_LUTYUV_FILTER 0 ++#define CONFIG_MASKEDCLAMP_FILTER 0 ++#define CONFIG_MASKEDMAX_FILTER 0 ++#define CONFIG_MASKEDMERGE_FILTER 0 ++#define CONFIG_MASKEDMIN_FILTER 0 ++#define CONFIG_MASKEDTHRESHOLD_FILTER 0 ++#define CONFIG_MASKFUN_FILTER 0 ++#define CONFIG_MCDEINT_FILTER 0 ++#define CONFIG_MEDIAN_FILTER 0 ++#define CONFIG_MERGEPLANES_FILTER 0 ++#define CONFIG_MESTIMATE_FILTER 0 ++#define CONFIG_METADATA_FILTER 0 ++#define CONFIG_MIDEQUALIZER_FILTER 0 ++#define CONFIG_MINTERPOLATE_FILTER 0 ++#define CONFIG_MIX_FILTER 0 ++#define CONFIG_MPDECIMATE_FILTER 0 ++#define CONFIG_NEGATE_FILTER 0 ++#define CONFIG_NLMEANS_FILTER 0 ++#define CONFIG_NLMEANS_OPENCL_FILTER 0 ++#define CONFIG_NNEDI_FILTER 0 ++#define CONFIG_NOFORMAT_FILTER 0 ++#define CONFIG_NOISE_FILTER 0 ++#define CONFIG_NORMALIZE_FILTER 0 ++#define CONFIG_NULL_FILTER 0 ++#define CONFIG_OCR_FILTER 0 ++#define CONFIG_OCV_FILTER 0 ++#define CONFIG_OSCILLOSCOPE_FILTER 0 ++#define CONFIG_OVERLAY_FILTER 0 ++#define CONFIG_OVERLAY_OPENCL_FILTER 0 ++#define CONFIG_OVERLAY_QSV_FILTER 0 ++#define CONFIG_OVERLAY_VULKAN_FILTER 0 ++#define CONFIG_OVERLAY_CUDA_FILTER 0 ++#define CONFIG_OWDENOISE_FILTER 0 ++#define CONFIG_PAD_FILTER 0 ++#define CONFIG_PAD_OPENCL_FILTER 0 ++#define CONFIG_PALETTEGEN_FILTER 0 ++#define CONFIG_PALETTEUSE_FILTER 0 ++#define CONFIG_PERMS_FILTER 0 ++#define CONFIG_PERSPECTIVE_FILTER 0 ++#define CONFIG_PHASE_FILTER 0 ++#define CONFIG_PHOTOSENSITIVITY_FILTER 0 ++#define CONFIG_PIXDESCTEST_FILTER 0 ++#define CONFIG_PIXSCOPE_FILTER 0 ++#define CONFIG_PP_FILTER 0 ++#define CONFIG_PP7_FILTER 0 ++#define CONFIG_PREMULTIPLY_FILTER 0 ++#define CONFIG_PREWITT_FILTER 0 ++#define CONFIG_PREWITT_OPENCL_FILTER 0 ++#define CONFIG_PROCAMP_VAAPI_FILTER 0 ++#define CONFIG_PROGRAM_OPENCL_FILTER 0 ++#define CONFIG_PSEUDOCOLOR_FILTER 0 ++#define CONFIG_PSNR_FILTER 0 ++#define CONFIG_PULLUP_FILTER 0 ++#define CONFIG_QP_FILTER 0 ++#define CONFIG_RANDOM_FILTER 0 ++#define CONFIG_READEIA608_FILTER 0 ++#define CONFIG_READVITC_FILTER 0 ++#define CONFIG_REALTIME_FILTER 0 ++#define CONFIG_REMAP_FILTER 0 ++#define CONFIG_REMOVEGRAIN_FILTER 0 ++#define CONFIG_REMOVELOGO_FILTER 0 ++#define CONFIG_REPEATFIELDS_FILTER 0 ++#define CONFIG_REVERSE_FILTER 0 ++#define CONFIG_RGBASHIFT_FILTER 0 ++#define CONFIG_ROBERTS_FILTER 0 ++#define CONFIG_ROBERTS_OPENCL_FILTER 0 ++#define CONFIG_ROTATE_FILTER 0 ++#define CONFIG_SAB_FILTER 0 ++#define CONFIG_SCALE_FILTER 0 ++#define CONFIG_SCALE_CUDA_FILTER 0 ++#define CONFIG_SCALE_NPP_FILTER 0 ++#define CONFIG_SCALE_QSV_FILTER 0 ++#define CONFIG_SCALE_VAAPI_FILTER 0 ++#define CONFIG_SCALE_VULKAN_FILTER 0 ++#define CONFIG_SCALE2REF_FILTER 0 ++#define CONFIG_SCROLL_FILTER 0 ++#define CONFIG_SELECT_FILTER 0 ++#define CONFIG_SELECTIVECOLOR_FILTER 0 ++#define CONFIG_SENDCMD_FILTER 0 ++#define CONFIG_SEPARATEFIELDS_FILTER 0 ++#define CONFIG_SETDAR_FILTER 0 ++#define CONFIG_SETFIELD_FILTER 0 ++#define CONFIG_SETPARAMS_FILTER 0 ++#define CONFIG_SETPTS_FILTER 0 ++#define CONFIG_SETRANGE_FILTER 0 ++#define CONFIG_SETSAR_FILTER 0 ++#define CONFIG_SETTB_FILTER 0 ++#define CONFIG_SHARPNESS_VAAPI_FILTER 0 ++#define CONFIG_SHOWINFO_FILTER 0 ++#define CONFIG_SHOWPALETTE_FILTER 0 ++#define CONFIG_SHUFFLEFRAMES_FILTER 0 ++#define CONFIG_SHUFFLEPLANES_FILTER 0 ++#define CONFIG_SIDEDATA_FILTER 0 ++#define CONFIG_SIGNALSTATS_FILTER 0 ++#define CONFIG_SIGNATURE_FILTER 0 ++#define CONFIG_SMARTBLUR_FILTER 0 ++#define CONFIG_SOBEL_FILTER 0 ++#define CONFIG_SOBEL_OPENCL_FILTER 0 ++#define CONFIG_SPLIT_FILTER 0 ++#define CONFIG_SPP_FILTER 0 ++#define CONFIG_SR_FILTER 0 ++#define CONFIG_SSIM_FILTER 0 ++#define CONFIG_STEREO3D_FILTER 0 ++#define CONFIG_STREAMSELECT_FILTER 0 ++#define CONFIG_SUBTITLES_FILTER 0 ++#define CONFIG_SUPER2XSAI_FILTER 0 ++#define CONFIG_SWAPRECT_FILTER 0 ++#define CONFIG_SWAPUV_FILTER 0 ++#define CONFIG_TBLEND_FILTER 0 ++#define CONFIG_TELECINE_FILTER 0 ++#define CONFIG_THISTOGRAM_FILTER 0 ++#define CONFIG_THRESHOLD_FILTER 0 ++#define CONFIG_THUMBNAIL_FILTER 0 ++#define CONFIG_THUMBNAIL_CUDA_FILTER 0 ++#define CONFIG_TILE_FILTER 0 ++#define CONFIG_TINTERLACE_FILTER 0 ++#define CONFIG_TLUT2_FILTER 0 ++#define CONFIG_TMEDIAN_FILTER 0 ++#define CONFIG_TMIX_FILTER 0 ++#define CONFIG_TONEMAP_FILTER 0 ++#define CONFIG_TONEMAP_OPENCL_FILTER 0 ++#define CONFIG_TONEMAP_VAAPI_FILTER 0 ++#define CONFIG_TPAD_FILTER 0 ++#define CONFIG_TRANSPOSE_FILTER 0 ++#define CONFIG_TRANSPOSE_NPP_FILTER 0 ++#define CONFIG_TRANSPOSE_OPENCL_FILTER 0 ++#define CONFIG_TRANSPOSE_VAAPI_FILTER 0 ++#define CONFIG_TRIM_FILTER 0 ++#define CONFIG_UNPREMULTIPLY_FILTER 0 ++#define CONFIG_UNSHARP_FILTER 0 ++#define CONFIG_UNSHARP_OPENCL_FILTER 0 ++#define CONFIG_USPP_FILTER 0 ++#define CONFIG_V360_FILTER 0 ++#define CONFIG_VAGUEDENOISER_FILTER 0 ++#define CONFIG_VECTORSCOPE_FILTER 0 ++#define CONFIG_VFLIP_FILTER 0 ++#define CONFIG_VFRDET_FILTER 0 ++#define CONFIG_VIBRANCE_FILTER 0 ++#define CONFIG_VIDSTABDETECT_FILTER 0 ++#define CONFIG_VIDSTABTRANSFORM_FILTER 0 ++#define CONFIG_VIGNETTE_FILTER 0 ++#define CONFIG_VMAFMOTION_FILTER 0 ++#define CONFIG_VPP_QSV_FILTER 0 ++#define CONFIG_VSTACK_FILTER 0 ++#define CONFIG_W3FDIF_FILTER 0 ++#define CONFIG_WAVEFORM_FILTER 0 ++#define CONFIG_WEAVE_FILTER 0 ++#define CONFIG_XBR_FILTER 0 ++#define CONFIG_XFADE_FILTER 0 ++#define CONFIG_XFADE_OPENCL_FILTER 0 ++#define CONFIG_XMEDIAN_FILTER 0 ++#define CONFIG_XSTACK_FILTER 0 ++#define CONFIG_YADIF_FILTER 0 ++#define CONFIG_YADIF_CUDA_FILTER 0 ++#define CONFIG_YAEPBLUR_FILTER 0 ++#define CONFIG_ZMQ_FILTER 0 ++#define CONFIG_ZOOMPAN_FILTER 0 ++#define CONFIG_ZSCALE_FILTER 0 ++#define CONFIG_ALLRGB_FILTER 0 ++#define CONFIG_ALLYUV_FILTER 0 ++#define CONFIG_CELLAUTO_FILTER 0 ++#define CONFIG_COLOR_FILTER 0 ++#define CONFIG_COREIMAGESRC_FILTER 0 ++#define CONFIG_FREI0R_SRC_FILTER 0 ++#define CONFIG_HALDCLUTSRC_FILTER 0 ++#define CONFIG_LIFE_FILTER 0 ++#define CONFIG_MANDELBROT_FILTER 0 ++#define CONFIG_MPTESTSRC_FILTER 0 ++#define CONFIG_NULLSRC_FILTER 0 ++#define CONFIG_OPENCLSRC_FILTER 0 ++#define CONFIG_PAL75BARS_FILTER 0 ++#define CONFIG_PAL100BARS_FILTER 0 ++#define CONFIG_RGBTESTSRC_FILTER 0 ++#define CONFIG_SIERPINSKI_FILTER 0 ++#define CONFIG_SMPTEBARS_FILTER 0 ++#define CONFIG_SMPTEHDBARS_FILTER 0 ++#define CONFIG_TESTSRC_FILTER 0 ++#define CONFIG_TESTSRC2_FILTER 0 ++#define CONFIG_YUVTESTSRC_FILTER 0 ++#define CONFIG_NULLSINK_FILTER 0 ++#define CONFIG_ABITSCOPE_FILTER 0 ++#define CONFIG_ADRAWGRAPH_FILTER 0 ++#define CONFIG_AGRAPHMONITOR_FILTER 0 ++#define CONFIG_AHISTOGRAM_FILTER 0 ++#define CONFIG_APHASEMETER_FILTER 0 ++#define CONFIG_AVECTORSCOPE_FILTER 0 ++#define CONFIG_CONCAT_FILTER 0 ++#define CONFIG_SHOWCQT_FILTER 0 ++#define CONFIG_SHOWFREQS_FILTER 0 ++#define CONFIG_SHOWSPATIAL_FILTER 0 ++#define CONFIG_SHOWSPECTRUM_FILTER 0 ++#define CONFIG_SHOWSPECTRUMPIC_FILTER 0 ++#define CONFIG_SHOWVOLUME_FILTER 0 ++#define CONFIG_SHOWWAVES_FILTER 0 ++#define CONFIG_SHOWWAVESPIC_FILTER 0 ++#define CONFIG_SPECTRUMSYNTH_FILTER 0 ++#define CONFIG_AMOVIE_FILTER 0 ++#define CONFIG_MOVIE_FILTER 0 ++#define CONFIG_AFIFO_FILTER 0 ++#define CONFIG_FIFO_FILTER 0 ++#define CONFIG_AA_DEMUXER 0 ++#define CONFIG_AAC_DEMUXER 1 ++#define CONFIG_AC3_DEMUXER 0 ++#define CONFIG_ACM_DEMUXER 0 ++#define CONFIG_ACT_DEMUXER 0 ++#define CONFIG_ADF_DEMUXER 0 ++#define CONFIG_ADP_DEMUXER 0 ++#define CONFIG_ADS_DEMUXER 0 ++#define CONFIG_ADX_DEMUXER 0 ++#define CONFIG_AEA_DEMUXER 0 ++#define CONFIG_AFC_DEMUXER 0 ++#define CONFIG_AIFF_DEMUXER 0 ++#define CONFIG_AIX_DEMUXER 0 ++#define CONFIG_ALP_DEMUXER 0 ++#define CONFIG_AMR_DEMUXER 0 ++#define CONFIG_AMRNB_DEMUXER 0 ++#define CONFIG_AMRWB_DEMUXER 0 ++#define CONFIG_ANM_DEMUXER 0 ++#define CONFIG_APC_DEMUXER 0 ++#define CONFIG_APE_DEMUXER 0 ++#define CONFIG_APM_DEMUXER 0 ++#define CONFIG_APNG_DEMUXER 0 ++#define CONFIG_APTX_DEMUXER 0 ++#define CONFIG_APTX_HD_DEMUXER 0 ++#define CONFIG_AQTITLE_DEMUXER 0 ++#define CONFIG_ARGO_ASF_DEMUXER 0 ++#define CONFIG_ASF_DEMUXER 0 ++#define CONFIG_ASF_O_DEMUXER 0 ++#define CONFIG_ASS_DEMUXER 0 ++#define CONFIG_AST_DEMUXER 0 ++#define CONFIG_AU_DEMUXER 0 ++#define CONFIG_AV1_DEMUXER 0 ++#define CONFIG_AVI_DEMUXER 0 ++#define CONFIG_AVISYNTH_DEMUXER 0 ++#define CONFIG_AVR_DEMUXER 0 ++#define CONFIG_AVS_DEMUXER 0 ++#define CONFIG_AVS2_DEMUXER 0 ++#define CONFIG_BETHSOFTVID_DEMUXER 0 ++#define CONFIG_BFI_DEMUXER 0 ++#define CONFIG_BINTEXT_DEMUXER 0 ++#define CONFIG_BINK_DEMUXER 0 ++#define CONFIG_BIT_DEMUXER 0 ++#define CONFIG_BMV_DEMUXER 0 ++#define CONFIG_BFSTM_DEMUXER 0 ++#define CONFIG_BRSTM_DEMUXER 0 ++#define CONFIG_BOA_DEMUXER 0 ++#define CONFIG_C93_DEMUXER 0 ++#define CONFIG_CAF_DEMUXER 0 ++#define CONFIG_CAVSVIDEO_DEMUXER 0 ++#define CONFIG_CDG_DEMUXER 0 ++#define CONFIG_CDXL_DEMUXER 0 ++#define CONFIG_CINE_DEMUXER 0 ++#define CONFIG_CODEC2_DEMUXER 0 ++#define CONFIG_CODEC2RAW_DEMUXER 0 ++#define CONFIG_CONCAT_DEMUXER 0 ++#define CONFIG_DASH_DEMUXER 0 ++#define CONFIG_DATA_DEMUXER 0 ++#define CONFIG_DAUD_DEMUXER 0 ++#define CONFIG_DCSTR_DEMUXER 0 ++#define CONFIG_DERF_DEMUXER 0 ++#define CONFIG_DFA_DEMUXER 0 ++#define CONFIG_DHAV_DEMUXER 0 ++#define CONFIG_DIRAC_DEMUXER 0 ++#define CONFIG_DNXHD_DEMUXER 0 ++#define CONFIG_DSF_DEMUXER 0 ++#define CONFIG_DSICIN_DEMUXER 0 ++#define CONFIG_DSS_DEMUXER 0 ++#define CONFIG_DTS_DEMUXER 0 ++#define CONFIG_DTSHD_DEMUXER 0 ++#define CONFIG_DV_DEMUXER 0 ++#define CONFIG_DVBSUB_DEMUXER 0 ++#define CONFIG_DVBTXT_DEMUXER 0 ++#define CONFIG_DXA_DEMUXER 0 ++#define CONFIG_EA_DEMUXER 0 ++#define CONFIG_EA_CDATA_DEMUXER 0 ++#define CONFIG_EAC3_DEMUXER 0 ++#define CONFIG_EPAF_DEMUXER 0 ++#define CONFIG_FFMETADATA_DEMUXER 0 ++#define CONFIG_FILMSTRIP_DEMUXER 0 ++#define CONFIG_FITS_DEMUXER 0 ++#define CONFIG_FLAC_DEMUXER 1 ++#define CONFIG_FLIC_DEMUXER 0 ++#define CONFIG_FLV_DEMUXER 0 ++#define CONFIG_LIVE_FLV_DEMUXER 0 ++#define CONFIG_FOURXM_DEMUXER 0 ++#define CONFIG_FRM_DEMUXER 0 ++#define CONFIG_FSB_DEMUXER 0 ++#define CONFIG_FWSE_DEMUXER 0 ++#define CONFIG_G722_DEMUXER 0 ++#define CONFIG_G723_1_DEMUXER 0 ++#define CONFIG_G726_DEMUXER 0 ++#define CONFIG_G726LE_DEMUXER 0 ++#define CONFIG_G729_DEMUXER 0 ++#define CONFIG_GDV_DEMUXER 0 ++#define CONFIG_GENH_DEMUXER 0 ++#define CONFIG_GIF_DEMUXER 0 ++#define CONFIG_GSM_DEMUXER 0 ++#define CONFIG_GXF_DEMUXER 0 ++#define CONFIG_H261_DEMUXER 0 ++#define CONFIG_H263_DEMUXER 0 ++#define CONFIG_H264_DEMUXER 0 ++#define CONFIG_HCA_DEMUXER 0 ++#define CONFIG_HCOM_DEMUXER 0 ++#define CONFIG_HEVC_DEMUXER 0 ++#define CONFIG_HLS_DEMUXER 0 ++#define CONFIG_HNM_DEMUXER 0 ++#define CONFIG_ICO_DEMUXER 0 ++#define CONFIG_IDCIN_DEMUXER 0 ++#define CONFIG_IDF_DEMUXER 0 ++#define CONFIG_IFF_DEMUXER 0 ++#define CONFIG_IFV_DEMUXER 0 ++#define CONFIG_ILBC_DEMUXER 0 ++#define CONFIG_IMAGE2_DEMUXER 0 ++#define CONFIG_IMAGE2PIPE_DEMUXER 0 ++#define CONFIG_IMAGE2_ALIAS_PIX_DEMUXER 0 ++#define CONFIG_IMAGE2_BRENDER_PIX_DEMUXER 0 ++#define CONFIG_INGENIENT_DEMUXER 0 ++#define CONFIG_IPMOVIE_DEMUXER 0 ++#define CONFIG_IRCAM_DEMUXER 0 ++#define CONFIG_ISS_DEMUXER 0 ++#define CONFIG_IV8_DEMUXER 0 ++#define CONFIG_IVF_DEMUXER 0 ++#define CONFIG_IVR_DEMUXER 0 ++#define CONFIG_JACOSUB_DEMUXER 0 ++#define CONFIG_JV_DEMUXER 0 ++#define CONFIG_KUX_DEMUXER 0 ++#define CONFIG_KVAG_DEMUXER 0 ++#define CONFIG_LMLM4_DEMUXER 0 ++#define CONFIG_LOAS_DEMUXER 0 ++#define CONFIG_LRC_DEMUXER 0 ++#define CONFIG_LVF_DEMUXER 0 ++#define CONFIG_LXF_DEMUXER 0 ++#define CONFIG_M4V_DEMUXER 0 ++#define CONFIG_MATROSKA_DEMUXER 1 ++#define CONFIG_MGSTS_DEMUXER 0 ++#define CONFIG_MICRODVD_DEMUXER 0 ++#define CONFIG_MJPEG_DEMUXER 0 ++#define CONFIG_MJPEG_2000_DEMUXER 0 ++#define CONFIG_MLP_DEMUXER 0 ++#define CONFIG_MLV_DEMUXER 0 ++#define CONFIG_MM_DEMUXER 0 ++#define CONFIG_MMF_DEMUXER 0 ++#define CONFIG_MOV_DEMUXER 1 ++#define CONFIG_MP3_DEMUXER 1 ++#define CONFIG_MPC_DEMUXER 0 ++#define CONFIG_MPC8_DEMUXER 0 ++#define CONFIG_MPEGPS_DEMUXER 0 ++#define CONFIG_MPEGTS_DEMUXER 0 ++#define CONFIG_MPEGTSRAW_DEMUXER 0 ++#define CONFIG_MPEGVIDEO_DEMUXER 0 ++#define CONFIG_MPJPEG_DEMUXER 0 ++#define CONFIG_MPL2_DEMUXER 0 ++#define CONFIG_MPSUB_DEMUXER 0 ++#define CONFIG_MSF_DEMUXER 0 ++#define CONFIG_MSNWC_TCP_DEMUXER 0 ++#define CONFIG_MTAF_DEMUXER 0 ++#define CONFIG_MTV_DEMUXER 0 ++#define CONFIG_MUSX_DEMUXER 0 ++#define CONFIG_MV_DEMUXER 0 ++#define CONFIG_MVI_DEMUXER 0 ++#define CONFIG_MXF_DEMUXER 0 ++#define CONFIG_MXG_DEMUXER 0 ++#define CONFIG_NC_DEMUXER 0 ++#define CONFIG_NISTSPHERE_DEMUXER 0 ++#define CONFIG_NSP_DEMUXER 0 ++#define CONFIG_NSV_DEMUXER 0 ++#define CONFIG_NUT_DEMUXER 0 ++#define CONFIG_NUV_DEMUXER 0 ++#define CONFIG_OGG_DEMUXER 1 ++#define CONFIG_OMA_DEMUXER 0 ++#define CONFIG_PAF_DEMUXER 0 ++#define CONFIG_PCM_ALAW_DEMUXER 0 ++#define CONFIG_PCM_MULAW_DEMUXER 0 ++#define CONFIG_PCM_VIDC_DEMUXER 0 ++#define CONFIG_PCM_F64BE_DEMUXER 0 ++#define CONFIG_PCM_F64LE_DEMUXER 0 ++#define CONFIG_PCM_F32BE_DEMUXER 0 ++#define CONFIG_PCM_F32LE_DEMUXER 0 ++#define CONFIG_PCM_S32BE_DEMUXER 0 ++#define CONFIG_PCM_S32LE_DEMUXER 0 ++#define CONFIG_PCM_S24BE_DEMUXER 0 ++#define CONFIG_PCM_S24LE_DEMUXER 0 ++#define CONFIG_PCM_S16BE_DEMUXER 0 ++#define CONFIG_PCM_S16LE_DEMUXER 0 ++#define CONFIG_PCM_S8_DEMUXER 0 ++#define CONFIG_PCM_U32BE_DEMUXER 0 ++#define CONFIG_PCM_U32LE_DEMUXER 0 ++#define CONFIG_PCM_U24BE_DEMUXER 0 ++#define CONFIG_PCM_U24LE_DEMUXER 0 ++#define CONFIG_PCM_U16BE_DEMUXER 0 ++#define CONFIG_PCM_U16LE_DEMUXER 0 ++#define CONFIG_PCM_U8_DEMUXER 0 ++#define CONFIG_PJS_DEMUXER 0 ++#define CONFIG_PMP_DEMUXER 0 ++#define CONFIG_PVA_DEMUXER 0 ++#define CONFIG_PVF_DEMUXER 0 ++#define CONFIG_QCP_DEMUXER 0 ++#define CONFIG_R3D_DEMUXER 0 ++#define CONFIG_RAWVIDEO_DEMUXER 0 ++#define CONFIG_REALTEXT_DEMUXER 0 ++#define CONFIG_REDSPARK_DEMUXER 0 ++#define CONFIG_RL2_DEMUXER 0 ++#define CONFIG_RM_DEMUXER 0 ++#define CONFIG_ROQ_DEMUXER 0 ++#define CONFIG_RPL_DEMUXER 0 ++#define CONFIG_RSD_DEMUXER 0 ++#define CONFIG_RSO_DEMUXER 0 ++#define CONFIG_RTP_DEMUXER 0 ++#define CONFIG_RTSP_DEMUXER 0 ++#define CONFIG_S337M_DEMUXER 0 ++#define CONFIG_SAMI_DEMUXER 0 ++#define CONFIG_SAP_DEMUXER 0 ++#define CONFIG_SBC_DEMUXER 0 ++#define CONFIG_SBG_DEMUXER 0 ++#define CONFIG_SCC_DEMUXER 0 ++#define CONFIG_SDP_DEMUXER 0 ++#define CONFIG_SDR2_DEMUXER 0 ++#define CONFIG_SDS_DEMUXER 0 ++#define CONFIG_SDX_DEMUXER 0 ++#define CONFIG_SEGAFILM_DEMUXER 0 ++#define CONFIG_SER_DEMUXER 0 ++#define CONFIG_SHORTEN_DEMUXER 0 ++#define CONFIG_SIFF_DEMUXER 0 ++#define CONFIG_SLN_DEMUXER 0 ++#define CONFIG_SMACKER_DEMUXER 0 ++#define CONFIG_SMJPEG_DEMUXER 0 ++#define CONFIG_SMUSH_DEMUXER 0 ++#define CONFIG_SOL_DEMUXER 0 ++#define CONFIG_SOX_DEMUXER 0 ++#define CONFIG_SPDIF_DEMUXER 0 ++#define CONFIG_SRT_DEMUXER 0 ++#define CONFIG_STR_DEMUXER 0 ++#define CONFIG_STL_DEMUXER 0 ++#define CONFIG_SUBVIEWER1_DEMUXER 0 ++#define CONFIG_SUBVIEWER_DEMUXER 0 ++#define CONFIG_SUP_DEMUXER 0 ++#define CONFIG_SVAG_DEMUXER 0 ++#define CONFIG_SWF_DEMUXER 0 ++#define CONFIG_TAK_DEMUXER 0 ++#define CONFIG_TEDCAPTIONS_DEMUXER 0 ++#define CONFIG_THP_DEMUXER 0 ++#define CONFIG_THREEDOSTR_DEMUXER 0 ++#define CONFIG_TIERTEXSEQ_DEMUXER 0 ++#define CONFIG_TMV_DEMUXER 0 ++#define CONFIG_TRUEHD_DEMUXER 0 ++#define CONFIG_TTA_DEMUXER 0 ++#define CONFIG_TXD_DEMUXER 0 ++#define CONFIG_TTY_DEMUXER 0 ++#define CONFIG_TY_DEMUXER 0 ++#define CONFIG_V210_DEMUXER 0 ++#define CONFIG_V210X_DEMUXER 0 ++#define CONFIG_VAG_DEMUXER 0 ++#define CONFIG_VC1_DEMUXER 0 ++#define CONFIG_VC1T_DEMUXER 0 ++#define CONFIG_VIVIDAS_DEMUXER 0 ++#define CONFIG_VIVO_DEMUXER 0 ++#define CONFIG_VMD_DEMUXER 0 ++#define CONFIG_VOBSUB_DEMUXER 0 ++#define CONFIG_VOC_DEMUXER 0 ++#define CONFIG_VPK_DEMUXER 0 ++#define CONFIG_VPLAYER_DEMUXER 0 ++#define CONFIG_VQF_DEMUXER 0 ++#define CONFIG_W64_DEMUXER 0 ++#define CONFIG_WAV_DEMUXER 1 ++#define CONFIG_WC3_DEMUXER 0 ++#define CONFIG_WEBM_DASH_MANIFEST_DEMUXER 0 ++#define CONFIG_WEBVTT_DEMUXER 0 ++#define CONFIG_WSAUD_DEMUXER 0 ++#define CONFIG_WSD_DEMUXER 0 ++#define CONFIG_WSVQA_DEMUXER 0 ++#define CONFIG_WTV_DEMUXER 0 ++#define CONFIG_WVE_DEMUXER 0 ++#define CONFIG_WV_DEMUXER 0 ++#define CONFIG_XA_DEMUXER 0 ++#define CONFIG_XBIN_DEMUXER 0 ++#define CONFIG_XMV_DEMUXER 0 ++#define CONFIG_XVAG_DEMUXER 0 ++#define CONFIG_XWMA_DEMUXER 0 ++#define CONFIG_YOP_DEMUXER 0 ++#define CONFIG_YUV4MPEGPIPE_DEMUXER 0 ++#define CONFIG_IMAGE_BMP_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_DDS_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_DPX_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_EXR_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_GIF_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_J2K_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_JPEG_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_JPEGLS_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PAM_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PBM_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PCX_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PGMYUV_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PGM_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PICTOR_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PNG_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PPM_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PSD_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_QDRAW_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_SGI_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_SVG_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_SUNRAST_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_TIFF_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_WEBP_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_XPM_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_XWD_PIPE_DEMUXER 0 ++#define CONFIG_LIBGME_DEMUXER 0 ++#define CONFIG_LIBMODPLUG_DEMUXER 0 ++#define CONFIG_LIBOPENMPT_DEMUXER 0 ++#define CONFIG_VAPOURSYNTH_DEMUXER 0 ++#define CONFIG_A64_MUXER 0 ++#define CONFIG_AC3_MUXER 0 ++#define CONFIG_ADTS_MUXER 0 ++#define CONFIG_ADX_MUXER 0 ++#define CONFIG_AIFF_MUXER 0 ++#define CONFIG_AMR_MUXER 0 ++#define CONFIG_APNG_MUXER 0 ++#define CONFIG_APTX_MUXER 0 ++#define CONFIG_APTX_HD_MUXER 0 ++#define CONFIG_ASF_MUXER 0 ++#define CONFIG_ASS_MUXER 0 ++#define CONFIG_AST_MUXER 0 ++#define CONFIG_ASF_STREAM_MUXER 0 ++#define CONFIG_AU_MUXER 0 ++#define CONFIG_AVI_MUXER 0 ++#define CONFIG_AVM2_MUXER 0 ++#define CONFIG_AVS2_MUXER 0 ++#define CONFIG_BIT_MUXER 0 ++#define CONFIG_CAF_MUXER 0 ++#define CONFIG_CAVSVIDEO_MUXER 0 ++#define CONFIG_CODEC2_MUXER 0 ++#define CONFIG_CODEC2RAW_MUXER 0 ++#define CONFIG_CRC_MUXER 0 ++#define CONFIG_DASH_MUXER 0 ++#define CONFIG_DATA_MUXER 0 ++#define CONFIG_DAUD_MUXER 0 ++#define CONFIG_DIRAC_MUXER 0 ++#define CONFIG_DNXHD_MUXER 0 ++#define CONFIG_DTS_MUXER 0 ++#define CONFIG_DV_MUXER 0 ++#define CONFIG_EAC3_MUXER 0 ++#define CONFIG_F4V_MUXER 0 ++#define CONFIG_FFMETADATA_MUXER 0 ++#define CONFIG_FIFO_MUXER 0 ++#define CONFIG_FIFO_TEST_MUXER 0 ++#define CONFIG_FILMSTRIP_MUXER 0 ++#define CONFIG_FITS_MUXER 0 ++#define CONFIG_FLAC_MUXER 0 ++#define CONFIG_FLV_MUXER 0 ++#define CONFIG_FRAMECRC_MUXER 0 ++#define CONFIG_FRAMEHASH_MUXER 0 ++#define CONFIG_FRAMEMD5_MUXER 0 ++#define CONFIG_G722_MUXER 0 ++#define CONFIG_G723_1_MUXER 0 ++#define CONFIG_G726_MUXER 0 ++#define CONFIG_G726LE_MUXER 0 ++#define CONFIG_GIF_MUXER 0 ++#define CONFIG_GSM_MUXER 0 ++#define CONFIG_GXF_MUXER 0 ++#define CONFIG_H261_MUXER 0 ++#define CONFIG_H263_MUXER 0 ++#define CONFIG_H264_MUXER 0 ++#define CONFIG_HASH_MUXER 0 ++#define CONFIG_HDS_MUXER 0 ++#define CONFIG_HEVC_MUXER 0 ++#define CONFIG_HLS_MUXER 0 ++#define CONFIG_ICO_MUXER 0 ++#define CONFIG_ILBC_MUXER 0 ++#define CONFIG_IMAGE2_MUXER 0 ++#define CONFIG_IMAGE2PIPE_MUXER 0 ++#define CONFIG_IPOD_MUXER 0 ++#define CONFIG_IRCAM_MUXER 0 ++#define CONFIG_ISMV_MUXER 0 ++#define CONFIG_IVF_MUXER 0 ++#define CONFIG_JACOSUB_MUXER 0 ++#define CONFIG_LATM_MUXER 0 ++#define CONFIG_LRC_MUXER 0 ++#define CONFIG_M4V_MUXER 0 ++#define CONFIG_MD5_MUXER 0 ++#define CONFIG_MATROSKA_MUXER 0 ++#define CONFIG_MATROSKA_AUDIO_MUXER 0 ++#define CONFIG_MICRODVD_MUXER 0 ++#define CONFIG_MJPEG_MUXER 0 ++#define CONFIG_MLP_MUXER 0 ++#define CONFIG_MMF_MUXER 0 ++#define CONFIG_MOV_MUXER 0 ++#define CONFIG_MP2_MUXER 0 ++#define CONFIG_MP3_MUXER 0 ++#define CONFIG_MP4_MUXER 0 ++#define CONFIG_MPEG1SYSTEM_MUXER 0 ++#define CONFIG_MPEG1VCD_MUXER 0 ++#define CONFIG_MPEG1VIDEO_MUXER 0 ++#define CONFIG_MPEG2DVD_MUXER 0 ++#define CONFIG_MPEG2SVCD_MUXER 0 ++#define CONFIG_MPEG2VIDEO_MUXER 0 ++#define CONFIG_MPEG2VOB_MUXER 0 ++#define CONFIG_MPEGTS_MUXER 0 ++#define CONFIG_MPJPEG_MUXER 0 ++#define CONFIG_MXF_MUXER 0 ++#define CONFIG_MXF_D10_MUXER 0 ++#define CONFIG_MXF_OPATOM_MUXER 0 ++#define CONFIG_NULL_MUXER 0 ++#define CONFIG_NUT_MUXER 0 ++#define CONFIG_OGA_MUXER 0 ++#define CONFIG_OGG_MUXER 0 ++#define CONFIG_OGV_MUXER 0 ++#define CONFIG_OMA_MUXER 0 ++#define CONFIG_OPUS_MUXER 0 ++#define CONFIG_PCM_ALAW_MUXER 0 ++#define CONFIG_PCM_MULAW_MUXER 0 ++#define CONFIG_PCM_VIDC_MUXER 0 ++#define CONFIG_PCM_F64BE_MUXER 0 ++#define CONFIG_PCM_F64LE_MUXER 0 ++#define CONFIG_PCM_F32BE_MUXER 0 ++#define CONFIG_PCM_F32LE_MUXER 0 ++#define CONFIG_PCM_S32BE_MUXER 0 ++#define CONFIG_PCM_S32LE_MUXER 0 ++#define CONFIG_PCM_S24BE_MUXER 0 ++#define CONFIG_PCM_S24LE_MUXER 0 ++#define CONFIG_PCM_S16BE_MUXER 0 ++#define CONFIG_PCM_S16LE_MUXER 0 ++#define CONFIG_PCM_S8_MUXER 0 ++#define CONFIG_PCM_U32BE_MUXER 0 ++#define CONFIG_PCM_U32LE_MUXER 0 ++#define CONFIG_PCM_U24BE_MUXER 0 ++#define CONFIG_PCM_U24LE_MUXER 0 ++#define CONFIG_PCM_U16BE_MUXER 0 ++#define CONFIG_PCM_U16LE_MUXER 0 ++#define CONFIG_PCM_U8_MUXER 0 ++#define CONFIG_PSP_MUXER 0 ++#define CONFIG_RAWVIDEO_MUXER 0 ++#define CONFIG_RM_MUXER 0 ++#define CONFIG_ROQ_MUXER 0 ++#define CONFIG_RSO_MUXER 0 ++#define CONFIG_RTP_MUXER 0 ++#define CONFIG_RTP_MPEGTS_MUXER 0 ++#define CONFIG_RTSP_MUXER 0 ++#define CONFIG_SAP_MUXER 0 ++#define CONFIG_SBC_MUXER 0 ++#define CONFIG_SCC_MUXER 0 ++#define CONFIG_SEGAFILM_MUXER 0 ++#define CONFIG_SEGMENT_MUXER 0 ++#define CONFIG_STREAM_SEGMENT_MUXER 0 ++#define CONFIG_SINGLEJPEG_MUXER 0 ++#define CONFIG_SMJPEG_MUXER 0 ++#define CONFIG_SMOOTHSTREAMING_MUXER 0 ++#define CONFIG_SOX_MUXER 0 ++#define CONFIG_SPX_MUXER 0 ++#define CONFIG_SPDIF_MUXER 0 ++#define CONFIG_SRT_MUXER 0 ++#define CONFIG_STREAMHASH_MUXER 0 ++#define CONFIG_SUP_MUXER 0 ++#define CONFIG_SWF_MUXER 0 ++#define CONFIG_TEE_MUXER 0 ++#define CONFIG_TG2_MUXER 0 ++#define CONFIG_TGP_MUXER 0 ++#define CONFIG_MKVTIMESTAMP_V2_MUXER 0 ++#define CONFIG_TRUEHD_MUXER 0 ++#define CONFIG_TTA_MUXER 0 ++#define CONFIG_UNCODEDFRAMECRC_MUXER 0 ++#define CONFIG_VC1_MUXER 0 ++#define CONFIG_VC1T_MUXER 0 ++#define CONFIG_VOC_MUXER 0 ++#define CONFIG_W64_MUXER 0 ++#define CONFIG_WAV_MUXER 0 ++#define CONFIG_WEBM_MUXER 0 ++#define CONFIG_WEBM_DASH_MANIFEST_MUXER 0 ++#define CONFIG_WEBM_CHUNK_MUXER 0 ++#define CONFIG_WEBP_MUXER 0 ++#define CONFIG_WEBVTT_MUXER 0 ++#define CONFIG_WTV_MUXER 0 ++#define CONFIG_WV_MUXER 0 ++#define CONFIG_YUV4MPEGPIPE_MUXER 0 ++#define CONFIG_CHROMAPRINT_MUXER 0 ++#define CONFIG_ASYNC_PROTOCOL 0 ++#define CONFIG_BLURAY_PROTOCOL 0 ++#define CONFIG_CACHE_PROTOCOL 0 ++#define CONFIG_CONCAT_PROTOCOL 0 ++#define CONFIG_CRYPTO_PROTOCOL 0 ++#define CONFIG_DATA_PROTOCOL 0 ++#define CONFIG_FFRTMPCRYPT_PROTOCOL 0 ++#define CONFIG_FFRTMPHTTP_PROTOCOL 0 ++#define CONFIG_FILE_PROTOCOL 0 ++#define CONFIG_FTP_PROTOCOL 0 ++#define CONFIG_GOPHER_PROTOCOL 0 ++#define CONFIG_HLS_PROTOCOL 0 ++#define CONFIG_HTTP_PROTOCOL 0 ++#define CONFIG_HTTPPROXY_PROTOCOL 0 ++#define CONFIG_HTTPS_PROTOCOL 0 ++#define CONFIG_ICECAST_PROTOCOL 0 ++#define CONFIG_MMSH_PROTOCOL 0 ++#define CONFIG_MMST_PROTOCOL 0 ++#define CONFIG_MD5_PROTOCOL 0 ++#define CONFIG_PIPE_PROTOCOL 0 ++#define CONFIG_PROMPEG_PROTOCOL 0 ++#define CONFIG_RTMP_PROTOCOL 0 ++#define CONFIG_RTMPE_PROTOCOL 0 ++#define CONFIG_RTMPS_PROTOCOL 0 ++#define CONFIG_RTMPT_PROTOCOL 0 ++#define CONFIG_RTMPTE_PROTOCOL 0 ++#define CONFIG_RTMPTS_PROTOCOL 0 ++#define CONFIG_RTP_PROTOCOL 0 ++#define CONFIG_SCTP_PROTOCOL 0 ++#define CONFIG_SRTP_PROTOCOL 0 ++#define CONFIG_SUBFILE_PROTOCOL 0 ++#define CONFIG_TEE_PROTOCOL 0 ++#define CONFIG_TCP_PROTOCOL 0 ++#define CONFIG_TLS_PROTOCOL 0 ++#define CONFIG_UDP_PROTOCOL 0 ++#define CONFIG_UDPLITE_PROTOCOL 0 ++#define CONFIG_UNIX_PROTOCOL 0 ++#define CONFIG_LIBAMQP_PROTOCOL 0 ++#define CONFIG_LIBRTMP_PROTOCOL 0 ++#define CONFIG_LIBRTMPE_PROTOCOL 0 ++#define CONFIG_LIBRTMPS_PROTOCOL 0 ++#define CONFIG_LIBRTMPT_PROTOCOL 0 ++#define CONFIG_LIBRTMPTE_PROTOCOL 0 ++#define CONFIG_LIBSRT_PROTOCOL 0 ++#define CONFIG_LIBSSH_PROTOCOL 0 ++#define CONFIG_LIBSMBCLIENT_PROTOCOL 0 ++#define CONFIG_LIBZMQ_PROTOCOL 0 ++#endif /* FFMPEG_CONFIG_H */ +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavcodec/bsf_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavcodec/bsf_list.c +new file mode 100644 +index 00000000000..d31ece942a7 +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavcodec/bsf_list.c +@@ -0,0 +1,3 @@ ++static const AVBitStreamFilter * const bitstream_filters[] = { ++ &ff_null_bsf, ++ NULL }; +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavcodec/codec_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavcodec/codec_list.c +new file mode 100644 +index 00000000000..49f757b2d86 +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavcodec/codec_list.c +@@ -0,0 +1,20 @@ ++static const AVCodec * const codec_list[] = { ++ &ff_h264_decoder, ++ &ff_theora_decoder, ++ &ff_vp3_decoder, ++ &ff_vp8_decoder, ++ &ff_aac_decoder, ++ &ff_flac_decoder, ++ &ff_mp3_decoder, ++ &ff_vorbis_decoder, ++ &ff_pcm_alaw_decoder, ++ &ff_pcm_f32le_decoder, ++ &ff_pcm_mulaw_decoder, ++ &ff_pcm_s16be_decoder, ++ &ff_pcm_s16le_decoder, ++ &ff_pcm_s24be_decoder, ++ &ff_pcm_s24le_decoder, ++ &ff_pcm_s32le_decoder, ++ &ff_pcm_u8_decoder, ++ &ff_libopus_decoder, ++ NULL }; +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavcodec/parser_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavcodec/parser_list.c +new file mode 100644 +index 00000000000..50acddb28e5 +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavcodec/parser_list.c +@@ -0,0 +1,11 @@ ++static const AVCodecParser * const parser_list[] = { ++ &ff_aac_parser, ++ &ff_flac_parser, ++ &ff_h264_parser, ++ &ff_mpegaudio_parser, ++ &ff_opus_parser, ++ &ff_vorbis_parser, ++ &ff_vp3_parser, ++ &ff_vp8_parser, ++ &ff_vp9_parser, ++ NULL }; +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavformat/demuxer_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavformat/demuxer_list.c +new file mode 100644 +index 00000000000..920b22bfa7d +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavformat/demuxer_list.c +@@ -0,0 +1,9 @@ ++static const AVInputFormat * const demuxer_list[] = { ++ &ff_aac_demuxer, ++ &ff_flac_demuxer, ++ &ff_matroska_demuxer, ++ &ff_mov_demuxer, ++ &ff_mp3_demuxer, ++ &ff_ogg_demuxer, ++ &ff_wav_demuxer, ++ NULL }; +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavformat/muxer_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavformat/muxer_list.c +new file mode 100644 +index 00000000000..f36d9499c6f +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavformat/muxer_list.c +@@ -0,0 +1,2 @@ ++static const AVOutputFormat * const muxer_list[] = { ++ NULL }; +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavformat/protocol_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavformat/protocol_list.c +new file mode 100644 +index 00000000000..247e1e4c3a2 +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavformat/protocol_list.c +@@ -0,0 +1,2 @@ ++static const URLProtocol * const url_protocols[] = { ++ NULL }; +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavutil/avconfig.h b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavutil/avconfig.h +new file mode 100644 +index 00000000000..8558b35027f +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavutil/avconfig.h +@@ -0,0 +1,6 @@ ++/* Generated by ffmpeg configure */ ++#ifndef AVUTIL_AVCONFIG_H ++#define AVUTIL_AVCONFIG_H ++#define AV_HAVE_BIGENDIAN 0 ++#define AV_HAVE_FAST_UNALIGNED 0 ++#endif /* AVUTIL_AVCONFIG_H */ +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavutil/ffversion.h b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavutil/ffversion.h +new file mode 100644 +index 00000000000..31e5b5036dc +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chrome/linux/la64/libavutil/ffversion.h +@@ -0,0 +1,5 @@ ++/* Automatically generated by version.sh, do not manually edit! */ ++#ifndef AVUTIL_FFVERSION_H ++#define AVUTIL_FFVERSION_H ++#define FFMPEG_VERSION "git-2020-06-16-23b2a15c25" ++#endif /* AVUTIL_FFVERSION_H */ +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/config.h b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/config.h +new file mode 100644 +index 00000000000..23fc2d09fd9 +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/config.h +@@ -0,0 +1,2589 @@ ++/* Automatically generated by configure - do not modify! */ ++#ifndef FFMPEG_CONFIG_H ++#define FFMPEG_CONFIG_H ++/* #define FFMPEG_CONFIGURATION "--disable-everything --disable-all --disable-doc --disable-htmlpages --disable-manpages --disable-podpages --disable-txtpages --disable-static --enable-avcodec --enable-avformat --enable-avutil --enable-fft --enable-rdft --enable-static --enable-libopus --disable-debug --disable-bzlib --disable-iconv --disable-lzo --disable-network --disable-schannel --disable-sdl2 --disable-symver --disable-xlib --disable-zlib --disable-securetransport --disable-faan --disable-alsa --disable-autodetect --enable-decoder='vorbis,libopus,flac' --enable-decoder='pcm_u8,pcm_s16le,pcm_s24le,pcm_s32le,pcm_f32le,mp3' --enable-decoder='pcm_s16be,pcm_s24be,pcm_mulaw,pcm_alaw' --enable-demuxer='ogg,matroska,wav,flac,mp3,mov' --enable-parser='opus,vorbis,flac,mpegaudio,vp9' --extra-cflags=-I/mnt/chromium/src/third_party/opus/src/include --disable-linux-perf --x86asmexe=nasm --optflags='\"-O2\"' --enable-decoder='theora,vp8' --enable-parser='vp3,vp8' --target-os=linux --enable-pic --cc=clang --cxx=clang++ --ld=clang --enable-decoder='aac,h264' --enable-demuxer=aac --enable-parser='aac,h264' --enable-decoder=mpeg4 --enable-parser='h263,mpeg4video' --enable-demuxer=avi --enable-demuxer=amr --enable-decoder='amrnb,amrwb' --enable-decoder=gsm_ms --enable-parser=gsm" -- elide long configuration string from binary */ ++#define FFMPEG_LICENSE "LGPL version 2.1 or later" ++#define CONFIG_THIS_YEAR 2020 ++#define FFMPEG_DATADIR "/usr/local/share/ffmpeg" ++#define AVCONV_DATADIR "/usr/local/share/ffmpeg" ++#define CC_IDENT "clang version 8.0.1" ++#define av_restrict restrict ++#define EXTERN_PREFIX "" ++#define EXTERN_ASM ++#define BUILDSUF "" ++#define SLIBSUF ".so" ++#define HAVE_MMX2 HAVE_MMXEXT ++#define SWS_MAX_FILTER_SIZE 256 ++#define ARCH_AARCH64 0 ++#define ARCH_ALPHA 0 ++#define ARCH_ARM 0 ++#define ARCH_AVR32 0 ++#define ARCH_AVR32_AP 0 ++#define ARCH_AVR32_UC 0 ++#define ARCH_BFIN 0 ++#define ARCH_IA64 0 ++#define ARCH_M68K 0 ++#define ARCH_MIPS 0 ++#define ARCH_MIPS64 0 ++#define ARCH_PARISC 0 ++#define ARCH_PPC 0 ++#define ARCH_PPC64 0 ++#define ARCH_S390 0 ++#define ARCH_SH4 0 ++#define ARCH_SPARC 0 ++#define ARCH_SPARC64 0 ++#define ARCH_TILEGX 0 ++#define ARCH_TILEPRO 0 ++#define ARCH_TOMI 0 ++#define ARCH_X86 0 ++#define ARCH_X86_32 0 ++#define ARCH_X86_64 0 ++#define HAVE_ARMV5TE 0 ++#define HAVE_ARMV6 0 ++#define HAVE_ARMV6T2 0 ++#define HAVE_ARMV8 0 ++#define HAVE_NEON 0 ++#define HAVE_VFP 0 ++#define HAVE_VFPV3 0 ++#define HAVE_SETEND 0 ++#define HAVE_ALTIVEC 0 ++#define HAVE_DCBZL 0 ++#define HAVE_LDBRX 0 ++#define HAVE_POWER8 0 ++#define HAVE_PPC4XX 0 ++#define HAVE_VSX 0 ++#define HAVE_AESNI 0 ++#define HAVE_AMD3DNOW 0 ++#define HAVE_AMD3DNOWEXT 0 ++#define HAVE_AVX 0 ++#define HAVE_AVX2 0 ++#define HAVE_AVX512 0 ++#define HAVE_FMA3 0 ++#define HAVE_FMA4 0 ++#define HAVE_MMX 0 ++#define HAVE_MMXEXT 0 ++#define HAVE_SSE 0 ++#define HAVE_SSE2 0 ++#define HAVE_SSE3 0 ++#define HAVE_SSE4 0 ++#define HAVE_SSE42 0 ++#define HAVE_SSSE3 0 ++#define HAVE_XOP 0 ++#define HAVE_CPUNOP 0 ++#define HAVE_I686 0 ++#define HAVE_MIPSFPU 0 ++#define HAVE_MIPS32R2 0 ++#define HAVE_MIPS32R5 0 ++#define HAVE_MIPS64R2 0 ++#define HAVE_MIPS32R6 0 ++#define HAVE_MIPS64R6 0 ++#define HAVE_MIPSDSP 0 ++#define HAVE_MIPSDSPR2 0 ++#define HAVE_MSA 0 ++#define HAVE_MSA2 0 ++#define HAVE_LOONGSON2 0 ++#define HAVE_LOONGSON3 0 ++#define HAVE_MMI 0 ++#define HAVE_ARMV5TE_EXTERNAL 0 ++#define HAVE_ARMV6_EXTERNAL 0 ++#define HAVE_ARMV6T2_EXTERNAL 0 ++#define HAVE_ARMV8_EXTERNAL 0 ++#define HAVE_NEON_EXTERNAL 0 ++#define HAVE_VFP_EXTERNAL 0 ++#define HAVE_VFPV3_EXTERNAL 0 ++#define HAVE_SETEND_EXTERNAL 0 ++#define HAVE_ALTIVEC_EXTERNAL 0 ++#define HAVE_DCBZL_EXTERNAL 0 ++#define HAVE_LDBRX_EXTERNAL 0 ++#define HAVE_POWER8_EXTERNAL 0 ++#define HAVE_PPC4XX_EXTERNAL 0 ++#define HAVE_VSX_EXTERNAL 0 ++#define HAVE_AESNI_EXTERNAL 0 ++#define HAVE_AMD3DNOW_EXTERNAL 0 ++#define HAVE_AMD3DNOWEXT_EXTERNAL 0 ++#define HAVE_AVX_EXTERNAL 0 ++#define HAVE_AVX2_EXTERNAL 0 ++#define HAVE_AVX512_EXTERNAL 0 ++#define HAVE_FMA3_EXTERNAL 0 ++#define HAVE_FMA4_EXTERNAL 0 ++#define HAVE_MMX_EXTERNAL 0 ++#define HAVE_MMXEXT_EXTERNAL 0 ++#define HAVE_SSE_EXTERNAL 0 ++#define HAVE_SSE2_EXTERNAL 0 ++#define HAVE_SSE3_EXTERNAL 0 ++#define HAVE_SSE4_EXTERNAL 0 ++#define HAVE_SSE42_EXTERNAL 0 ++#define HAVE_SSSE3_EXTERNAL 0 ++#define HAVE_XOP_EXTERNAL 0 ++#define HAVE_CPUNOP_EXTERNAL 0 ++#define HAVE_I686_EXTERNAL 0 ++#define HAVE_MIPSFPU_EXTERNAL 0 ++#define HAVE_MIPS32R2_EXTERNAL 0 ++#define HAVE_MIPS32R5_EXTERNAL 0 ++#define HAVE_MIPS64R2_EXTERNAL 0 ++#define HAVE_MIPS32R6_EXTERNAL 0 ++#define HAVE_MIPS64R6_EXTERNAL 0 ++#define HAVE_MIPSDSP_EXTERNAL 0 ++#define HAVE_MIPSDSPR2_EXTERNAL 0 ++#define HAVE_MSA_EXTERNAL 0 ++#define HAVE_MSA2_EXTERNAL 0 ++#define HAVE_LOONGSON2_EXTERNAL 0 ++#define HAVE_LOONGSON3_EXTERNAL 0 ++#define HAVE_MMI_EXTERNAL 0 ++#define HAVE_ARMV5TE_INLINE 0 ++#define HAVE_ARMV6_INLINE 0 ++#define HAVE_ARMV6T2_INLINE 0 ++#define HAVE_ARMV8_INLINE 0 ++#define HAVE_NEON_INLINE 0 ++#define HAVE_VFP_INLINE 0 ++#define HAVE_VFPV3_INLINE 0 ++#define HAVE_SETEND_INLINE 0 ++#define HAVE_ALTIVEC_INLINE 0 ++#define HAVE_DCBZL_INLINE 0 ++#define HAVE_LDBRX_INLINE 0 ++#define HAVE_POWER8_INLINE 0 ++#define HAVE_PPC4XX_INLINE 0 ++#define HAVE_VSX_INLINE 0 ++#define HAVE_AESNI_INLINE 0 ++#define HAVE_AMD3DNOW_INLINE 0 ++#define HAVE_AMD3DNOWEXT_INLINE 0 ++#define HAVE_AVX_INLINE 0 ++#define HAVE_AVX2_INLINE 0 ++#define HAVE_AVX512_INLINE 0 ++#define HAVE_FMA3_INLINE 0 ++#define HAVE_FMA4_INLINE 0 ++#define HAVE_MMX_INLINE 0 ++#define HAVE_MMXEXT_INLINE 0 ++#define HAVE_SSE_INLINE 0 ++#define HAVE_SSE2_INLINE 0 ++#define HAVE_SSE3_INLINE 0 ++#define HAVE_SSE4_INLINE 0 ++#define HAVE_SSE42_INLINE 0 ++#define HAVE_SSSE3_INLINE 0 ++#define HAVE_XOP_INLINE 0 ++#define HAVE_CPUNOP_INLINE 0 ++#define HAVE_I686_INLINE 0 ++#define HAVE_MIPSFPU_INLINE 0 ++#define HAVE_MIPS32R2_INLINE 0 ++#define HAVE_MIPS32R5_INLINE 0 ++#define HAVE_MIPS64R2_INLINE 0 ++#define HAVE_MIPS32R6_INLINE 0 ++#define HAVE_MIPS64R6_INLINE 0 ++#define HAVE_MIPSDSP_INLINE 0 ++#define HAVE_MIPSDSPR2_INLINE 0 ++#define HAVE_MSA_INLINE 0 ++#define HAVE_MSA2_INLINE 0 ++#define HAVE_LOONGSON2_INLINE 0 ++#define HAVE_LOONGSON3_INLINE 0 ++#define HAVE_MMI_INLINE 0 ++#define HAVE_ALIGNED_STACK 0 ++#define HAVE_FAST_64BIT 0 ++#define HAVE_FAST_CLZ 0 ++#define HAVE_FAST_CMOV 0 ++#define HAVE_LOCAL_ALIGNED 0 ++#define HAVE_SIMD_ALIGN_16 0 ++#define HAVE_SIMD_ALIGN_32 0 ++#define HAVE_SIMD_ALIGN_64 0 ++#define HAVE_ATOMIC_CAS_PTR 0 ++#define HAVE_MACHINE_RW_BARRIER 0 ++#define HAVE_MEMORYBARRIER 0 ++#define HAVE_MM_EMPTY 0 ++#define HAVE_RDTSC 0 ++#define HAVE_SEM_TIMEDWAIT 1 ++#define HAVE_SYNC_VAL_COMPARE_AND_SWAP 1 ++#define HAVE_CABS 0 ++#define HAVE_CEXP 0 ++#define HAVE_INLINE_ASM 1 ++#define HAVE_SYMVER 0 ++#define HAVE_X86ASM 0 ++#define HAVE_BIGENDIAN 0 ++#define HAVE_FAST_UNALIGNED 0 ++#define HAVE_ARPA_INET_H 0 ++#define HAVE_ASM_TYPES_H 1 ++#define HAVE_CDIO_PARANOIA_H 0 ++#define HAVE_CDIO_PARANOIA_PARANOIA_H 0 ++#define HAVE_CUDA_H 0 ++#define HAVE_DISPATCH_DISPATCH_H 0 ++#define HAVE_DEV_BKTR_IOCTL_BT848_H 0 ++#define HAVE_DEV_BKTR_IOCTL_METEOR_H 0 ++#define HAVE_DEV_IC_BT8XX_H 0 ++#define HAVE_DEV_VIDEO_BKTR_IOCTL_BT848_H 0 ++#define HAVE_DEV_VIDEO_METEOR_IOCTL_METEOR_H 0 ++#define HAVE_DIRECT_H 0 ++#define HAVE_DIRENT_H 1 ++#define HAVE_DXGIDEBUG_H 0 ++#define HAVE_DXVA_H 0 ++#define HAVE_ES2_GL_H 0 ++#define HAVE_GSM_H 0 ++#define HAVE_IO_H 0 ++#define HAVE_LINUX_PERF_EVENT_H 1 ++#define HAVE_MACHINE_IOCTL_BT848_H 0 ++#define HAVE_MACHINE_IOCTL_METEOR_H 0 ++#define HAVE_MALLOC_H 1 ++#define HAVE_OPENCV2_CORE_CORE_C_H 0 ++#define HAVE_OPENGL_GL3_H 0 ++#define HAVE_POLL_H 1 ++#define HAVE_SYS_PARAM_H 1 ++#define HAVE_SYS_RESOURCE_H 1 ++#define HAVE_SYS_SELECT_H 1 ++#define HAVE_SYS_SOUNDCARD_H 1 ++#define HAVE_SYS_TIME_H 1 ++#define HAVE_SYS_UN_H 1 ++#define HAVE_SYS_VIDEOIO_H 0 ++#define HAVE_TERMIOS_H 1 ++#define HAVE_UDPLITE_H 0 ++#define HAVE_UNISTD_H 1 ++#define HAVE_VALGRIND_VALGRIND_H 0 /* #define HAVE_VALGRIND_VALGRIND_H 0 -- forced to 0. See https://crbug.com/590440 */ ++#define HAVE_WINDOWS_H 0 ++#define HAVE_WINSOCK2_H 0 ++#define HAVE_INTRINSICS_NEON 0 ++#define HAVE_ATANF 1 ++#define HAVE_ATAN2F 1 ++#define HAVE_CBRT 1 ++#define HAVE_CBRTF 1 ++#define HAVE_COPYSIGN 1 ++#define HAVE_COSF 1 ++#define HAVE_ERF 1 ++#define HAVE_EXP2 1 ++#define HAVE_EXP2F 1 ++#define HAVE_EXPF 1 ++#define HAVE_HYPOT 1 ++#define HAVE_ISFINITE 1 ++#define HAVE_ISINF 1 ++#define HAVE_ISNAN 1 ++#define HAVE_LDEXPF 1 ++#define HAVE_LLRINT 1 ++#define HAVE_LLRINTF 1 ++#define HAVE_LOG2 1 ++#define HAVE_LOG2F 1 ++#define HAVE_LOG10F 1 ++#define HAVE_LRINT 1 ++#define HAVE_LRINTF 1 ++#define HAVE_POWF 1 ++#define HAVE_RINT 1 ++#define HAVE_ROUND 1 ++#define HAVE_ROUNDF 1 ++#define HAVE_SINF 1 ++#define HAVE_TRUNC 1 ++#define HAVE_TRUNCF 1 ++#define HAVE_DOS_PATHS 0 ++#define HAVE_LIBC_MSVCRT 0 ++#define HAVE_MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS 0 ++#define HAVE_SECTION_DATA_REL_RO 1 ++#define HAVE_THREADS 1 ++#define HAVE_UWP 0 ++#define HAVE_WINRT 0 ++#define HAVE_ACCESS 1 ++#define HAVE_ALIGNED_MALLOC 0 ++#define HAVE_ARC4RANDOM 0 ++#define HAVE_CLOCK_GETTIME 1 ++#define HAVE_CLOSESOCKET 0 ++#define HAVE_COMMANDLINETOARGVW 0 ++#define HAVE_FCNTL 1 ++#define HAVE_GETADDRINFO 0 ++#define HAVE_GETHRTIME 0 ++#define HAVE_GETOPT 1 ++#define HAVE_GETMODULEHANDLE 0 ++#define HAVE_GETPROCESSAFFINITYMASK 0 ++#define HAVE_GETPROCESSMEMORYINFO 0 ++#define HAVE_GETPROCESSTIMES 0 ++#define HAVE_GETRUSAGE 1 ++#define HAVE_GETSTDHANDLE 0 ++#define HAVE_GETSYSTEMTIMEASFILETIME 0 ++#define HAVE_GETTIMEOFDAY 1 ++#define HAVE_GLOB 1 ++#define HAVE_GLXGETPROCADDRESS 0 ++#define HAVE_GMTIME_R 1 ++#define HAVE_INET_ATON 0 ++#define HAVE_ISATTY 1 ++#define HAVE_KBHIT 0 ++#define HAVE_LOCALTIME_R 1 ++#define HAVE_LSTAT 1 ++#define HAVE_LZO1X_999_COMPRESS 0 ++#define HAVE_MACH_ABSOLUTE_TIME 0 ++#define HAVE_MAPVIEWOFFILE 0 ++#define HAVE_MEMALIGN 1 ++#define HAVE_MKSTEMP 1 ++#define HAVE_MMAP 1 ++#define HAVE_MPROTECT 1 ++#define HAVE_NANOSLEEP 1 ++#define HAVE_PEEKNAMEDPIPE 0 ++#define HAVE_POSIX_MEMALIGN 1 ++#define HAVE_PTHREAD_CANCEL 1 ++#define HAVE_SCHED_GETAFFINITY 1 ++#define HAVE_SECITEMIMPORT 0 ++#define HAVE_SETCONSOLETEXTATTRIBUTE 0 ++#define HAVE_SETCONSOLECTRLHANDLER 0 ++#define HAVE_SETDLLDIRECTORY 0 ++#define HAVE_SETMODE 0 ++#define HAVE_SETRLIMIT 1 ++#define HAVE_SLEEP 0 ++#define HAVE_STRERROR_R 1 ++#define HAVE_SYSCONF 1 ++#define HAVE_SYSCTL 0 /* #define HAVE_SYSCTL 1 -- forced to 0 for Fuchsia */ ++#define HAVE_USLEEP 1 ++#define HAVE_UTGETOSTYPEFROMSTRING 0 ++#define HAVE_VIRTUALALLOC 0 ++#define HAVE_WGLGETPROCADDRESS 0 ++#define HAVE_BCRYPT 0 ++#define HAVE_VAAPI_DRM 0 ++#define HAVE_VAAPI_X11 0 ++#define HAVE_VDPAU_X11 0 ++#define HAVE_PTHREADS 1 ++#define HAVE_OS2THREADS 0 ++#define HAVE_W32THREADS 0 ++#define HAVE_AS_ARCH_DIRECTIVE 0 ++#define HAVE_AS_DN_DIRECTIVE 0 ++#define HAVE_AS_FPU_DIRECTIVE 0 ++#define HAVE_AS_FUNC 0 ++#define HAVE_AS_OBJECT_ARCH 0 ++#define HAVE_ASM_MOD_Q 0 ++#define HAVE_BLOCKS_EXTENSION 0 ++#define HAVE_EBP_AVAILABLE 0 ++#define HAVE_EBX_AVAILABLE 0 ++#define HAVE_GNU_AS 0 ++#define HAVE_GNU_WINDRES 0 ++#define HAVE_IBM_ASM 0 ++#define HAVE_INLINE_ASM_DIRECT_SYMBOL_REFS 0 ++#define HAVE_INLINE_ASM_LABELS 1 ++#define HAVE_INLINE_ASM_NONLOCAL_LABELS 1 ++#define HAVE_PRAGMA_DEPRECATED 1 ++#define HAVE_RSYNC_CONTIMEOUT 0 ++#define HAVE_SYMVER_ASM_LABEL 1 ++#define HAVE_SYMVER_GNU_ASM 1 ++#define HAVE_VFP_ARGS 0 ++#define HAVE_XFORM_ASM 0 ++#define HAVE_XMM_CLOBBERS 0 ++#define HAVE_KCMVIDEOCODECTYPE_HEVC 0 ++#define HAVE_KCVPIXELFORMATTYPE_420YPCBCR10BIPLANARVIDEORANGE 0 ++#define HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_SMPTE_ST_2084_PQ 0 ++#define HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_ITU_R_2100_HLG 0 ++#define HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_LINEAR 0 ++#define HAVE_SOCKLEN_T 0 ++#define HAVE_STRUCT_ADDRINFO 0 ++#define HAVE_STRUCT_GROUP_SOURCE_REQ 0 ++#define HAVE_STRUCT_IP_MREQ_SOURCE 0 ++#define HAVE_STRUCT_IPV6_MREQ 0 ++#define HAVE_STRUCT_MSGHDR_MSG_FLAGS 0 ++#define HAVE_STRUCT_POLLFD 0 ++#define HAVE_STRUCT_RUSAGE_RU_MAXRSS 1 ++#define HAVE_STRUCT_SCTP_EVENT_SUBSCRIBE 0 ++#define HAVE_STRUCT_SOCKADDR_IN6 0 ++#define HAVE_STRUCT_SOCKADDR_SA_LEN 0 ++#define HAVE_STRUCT_SOCKADDR_STORAGE 0 ++#define HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC 1 ++#define HAVE_STRUCT_V4L2_FRMIVALENUM_DISCRETE 0 ++#define HAVE_MAKEINFO 1 ++#define HAVE_MAKEINFO_HTML 1 ++#define HAVE_OPENCL_D3D11 0 ++#define HAVE_OPENCL_DRM_ARM 0 ++#define HAVE_OPENCL_DRM_BEIGNET 0 ++#define HAVE_OPENCL_DXVA2 0 ++#define HAVE_OPENCL_VAAPI_BEIGNET 0 ++#define HAVE_OPENCL_VAAPI_INTEL_MEDIA 0 ++#define HAVE_PERL 1 ++#define HAVE_POD2MAN 1 ++#define HAVE_TEXI2HTML 0 ++#define CONFIG_DOC 0 ++#define CONFIG_HTMLPAGES 0 ++#define CONFIG_MANPAGES 0 ++#define CONFIG_PODPAGES 0 ++#define CONFIG_TXTPAGES 0 ++#define CONFIG_AVIO_LIST_DIR_EXAMPLE 1 ++#define CONFIG_AVIO_READING_EXAMPLE 1 ++#define CONFIG_DECODE_AUDIO_EXAMPLE 1 ++#define CONFIG_DECODE_VIDEO_EXAMPLE 1 ++#define CONFIG_DEMUXING_DECODING_EXAMPLE 1 ++#define CONFIG_ENCODE_AUDIO_EXAMPLE 1 ++#define CONFIG_ENCODE_VIDEO_EXAMPLE 1 ++#define CONFIG_EXTRACT_MVS_EXAMPLE 1 ++#define CONFIG_FILTER_AUDIO_EXAMPLE 0 ++#define CONFIG_FILTERING_AUDIO_EXAMPLE 0 ++#define CONFIG_FILTERING_VIDEO_EXAMPLE 0 ++#define CONFIG_HTTP_MULTICLIENT_EXAMPLE 1 ++#define CONFIG_HW_DECODE_EXAMPLE 1 ++#define CONFIG_METADATA_EXAMPLE 1 ++#define CONFIG_MUXING_EXAMPLE 0 ++#define CONFIG_QSVDEC_EXAMPLE 0 ++#define CONFIG_REMUXING_EXAMPLE 1 ++#define CONFIG_RESAMPLING_AUDIO_EXAMPLE 0 ++#define CONFIG_SCALING_VIDEO_EXAMPLE 0 ++#define CONFIG_TRANSCODE_AAC_EXAMPLE 0 ++#define CONFIG_TRANSCODING_EXAMPLE 0 ++#define CONFIG_VAAPI_ENCODE_EXAMPLE 0 ++#define CONFIG_VAAPI_TRANSCODE_EXAMPLE 0 ++#define CONFIG_AVISYNTH 0 ++#define CONFIG_FREI0R 0 ++#define CONFIG_LIBCDIO 0 ++#define CONFIG_LIBDAVS2 0 ++#define CONFIG_LIBRUBBERBAND 0 ++#define CONFIG_LIBVIDSTAB 0 ++#define CONFIG_LIBX264 0 ++#define CONFIG_LIBX265 0 ++#define CONFIG_LIBXAVS 0 ++#define CONFIG_LIBXAVS2 0 ++#define CONFIG_LIBXVID 0 ++#define CONFIG_DECKLINK 0 ++#define CONFIG_LIBFDK_AAC 0 ++#define CONFIG_OPENSSL 0 ++#define CONFIG_LIBTLS 0 ++#define CONFIG_GMP 0 ++#define CONFIG_LIBARIBB24 0 ++#define CONFIG_LIBLENSFUN 0 ++#define CONFIG_LIBOPENCORE_AMRNB 0 ++#define CONFIG_LIBOPENCORE_AMRWB 0 ++#define CONFIG_LIBVMAF 0 ++#define CONFIG_LIBVO_AMRWBENC 0 ++#define CONFIG_MBEDTLS 0 ++#define CONFIG_RKMPP 0 ++#define CONFIG_LIBSMBCLIENT 0 ++#define CONFIG_CHROMAPRINT 0 ++#define CONFIG_GCRYPT 0 ++#define CONFIG_GNUTLS 0 ++#define CONFIG_JNI 0 ++#define CONFIG_LADSPA 0 ++#define CONFIG_LIBAOM 0 ++#define CONFIG_LIBASS 0 ++#define CONFIG_LIBBLURAY 0 ++#define CONFIG_LIBBS2B 0 ++#define CONFIG_LIBCACA 0 ++#define CONFIG_LIBCELT 0 ++#define CONFIG_LIBCODEC2 0 ++#define CONFIG_LIBDAV1D 0 ++#define CONFIG_LIBDC1394 0 ++#define CONFIG_LIBDRM 0 ++#define CONFIG_LIBFLITE 0 ++#define CONFIG_LIBFONTCONFIG 0 ++#define CONFIG_LIBFREETYPE 0 ++#define CONFIG_LIBFRIBIDI 0 ++#define CONFIG_LIBGLSLANG 0 ++#define CONFIG_LIBGME 0 ++#define CONFIG_LIBGSM 0 ++#define CONFIG_LIBIEC61883 0 ++#define CONFIG_LIBILBC 0 ++#define CONFIG_LIBJACK 0 ++#define CONFIG_LIBKLVANC 0 ++#define CONFIG_LIBKVAZAAR 0 ++#define CONFIG_LIBMODPLUG 0 ++#define CONFIG_LIBMP3LAME 0 ++#define CONFIG_LIBMYSOFA 0 ++#define CONFIG_LIBOPENCV 0 ++#define CONFIG_LIBOPENH264 0 ++#define CONFIG_LIBOPENJPEG 0 ++#define CONFIG_LIBOPENMPT 0 ++#define CONFIG_LIBOPUS 1 ++#define CONFIG_LIBPULSE 0 ++#define CONFIG_LIBRABBITMQ 0 ++#define CONFIG_LIBRAV1E 0 ++#define CONFIG_LIBRSVG 0 ++#define CONFIG_LIBRTMP 0 ++#define CONFIG_LIBSHINE 0 ++#define CONFIG_LIBSMBCLIENT 0 ++#define CONFIG_LIBSNAPPY 0 ++#define CONFIG_LIBSOXR 0 ++#define CONFIG_LIBSPEEX 0 ++#define CONFIG_LIBSRT 0 ++#define CONFIG_LIBSSH 0 ++#define CONFIG_LIBTENSORFLOW 0 ++#define CONFIG_LIBTESSERACT 0 ++#define CONFIG_LIBTHEORA 0 ++#define CONFIG_LIBTWOLAME 0 ++#define CONFIG_LIBV4L2 0 ++#define CONFIG_LIBVORBIS 0 ++#define CONFIG_LIBVPX 0 ++#define CONFIG_LIBWAVPACK 0 ++#define CONFIG_LIBWEBP 0 ++#define CONFIG_LIBXML2 0 ++#define CONFIG_LIBZIMG 0 ++#define CONFIG_LIBZMQ 0 ++#define CONFIG_LIBZVBI 0 ++#define CONFIG_LV2 0 ++#define CONFIG_MEDIACODEC 0 ++#define CONFIG_OPENAL 0 ++#define CONFIG_OPENGL 0 ++#define CONFIG_POCKETSPHINX 0 ++#define CONFIG_VAPOURSYNTH 0 ++#define CONFIG_ALSA 0 ++#define CONFIG_APPKIT 0 ++#define CONFIG_AVFOUNDATION 0 ++#define CONFIG_BZLIB 0 ++#define CONFIG_COREIMAGE 0 ++#define CONFIG_ICONV 0 ++#define CONFIG_LIBXCB 0 ++#define CONFIG_LIBXCB_SHM 0 ++#define CONFIG_LIBXCB_SHAPE 0 ++#define CONFIG_LIBXCB_XFIXES 0 ++#define CONFIG_LZMA 0 ++#define CONFIG_SCHANNEL 0 ++#define CONFIG_SDL2 0 ++#define CONFIG_SECURETRANSPORT 0 ++#define CONFIG_SNDIO 0 ++#define CONFIG_XLIB 0 ++#define CONFIG_ZLIB 0 ++#define CONFIG_CUDA_NVCC 0 ++#define CONFIG_CUDA_SDK 0 ++#define CONFIG_LIBNPP 0 ++#define CONFIG_LIBMFX 0 ++#define CONFIG_MMAL 0 ++#define CONFIG_OMX 0 ++#define CONFIG_OPENCL 0 ++#define CONFIG_VULKAN 0 ++#define CONFIG_AMF 0 ++#define CONFIG_AUDIOTOOLBOX 0 ++#define CONFIG_CRYSTALHD 0 ++#define CONFIG_CUDA 0 ++#define CONFIG_CUDA_LLVM 0 ++#define CONFIG_CUVID 0 ++#define CONFIG_D3D11VA 0 ++#define CONFIG_DXVA2 0 ++#define CONFIG_FFNVCODEC 0 ++#define CONFIG_NVDEC 0 ++#define CONFIG_NVENC 0 ++#define CONFIG_VAAPI 0 ++#define CONFIG_VDPAU 0 ++#define CONFIG_VIDEOTOOLBOX 0 ++#define CONFIG_V4L2_M2M 0 ++#define CONFIG_XVMC 0 ++#define CONFIG_FTRAPV 0 ++#define CONFIG_GRAY 0 ++#define CONFIG_HARDCODED_TABLES 0 ++#define CONFIG_OMX_RPI 0 ++#define CONFIG_RUNTIME_CPUDETECT 1 ++#define CONFIG_SAFE_BITSTREAM_READER 1 ++#define CONFIG_SHARED 0 ++#define CONFIG_SMALL 0 ++#define CONFIG_STATIC 1 ++#define CONFIG_SWSCALE_ALPHA 1 ++#define CONFIG_GPL 0 ++#define CONFIG_NONFREE 0 ++#define CONFIG_VERSION3 0 ++#define CONFIG_AVDEVICE 0 ++#define CONFIG_AVFILTER 0 ++#define CONFIG_SWSCALE 0 ++#define CONFIG_POSTPROC 0 ++#define CONFIG_AVFORMAT 1 ++#define CONFIG_AVCODEC 1 ++#define CONFIG_SWRESAMPLE 0 ++#define CONFIG_AVRESAMPLE 0 ++#define CONFIG_AVUTIL 1 ++#define CONFIG_FFPLAY 0 ++#define CONFIG_FFPROBE 0 ++#define CONFIG_FFMPEG 0 ++#define CONFIG_DCT 1 ++#define CONFIG_DWT 0 ++#define CONFIG_ERROR_RESILIENCE 1 ++#define CONFIG_FAAN 0 ++#define CONFIG_FAST_UNALIGNED 0 ++#define CONFIG_FFT 1 ++#define CONFIG_LSP 1 ++#define CONFIG_LZO 0 ++#define CONFIG_MDCT 1 ++#define CONFIG_PIXELUTILS 0 ++#define CONFIG_NETWORK 0 ++#define CONFIG_RDFT 1 ++#define CONFIG_AUTODETECT 0 ++#define CONFIG_FONTCONFIG 0 ++#define CONFIG_LARGE_TESTS 1 ++#define CONFIG_LINUX_PERF 0 ++#define CONFIG_MEMORY_POISONING 0 ++#define CONFIG_NEON_CLOBBER_TEST 0 ++#define CONFIG_OSSFUZZ 0 ++#define CONFIG_PIC 1 ++#define CONFIG_THUMB 0 ++#define CONFIG_VALGRIND_BACKTRACE 0 ++#define CONFIG_XMM_CLOBBER_TEST 0 ++#define CONFIG_BSFS 1 ++#define CONFIG_DECODERS 1 ++#define CONFIG_ENCODERS 0 ++#define CONFIG_HWACCELS 0 ++#define CONFIG_PARSERS 1 ++#define CONFIG_INDEVS 0 ++#define CONFIG_OUTDEVS 0 ++#define CONFIG_FILTERS 0 ++#define CONFIG_DEMUXERS 1 ++#define CONFIG_MUXERS 0 ++#define CONFIG_PROTOCOLS 0 ++#define CONFIG_AANDCTTABLES 0 ++#define CONFIG_AC3DSP 0 ++#define CONFIG_ADTS_HEADER 1 ++#define CONFIG_AUDIO_FRAME_QUEUE 0 ++#define CONFIG_AUDIODSP 0 ++#define CONFIG_BLOCKDSP 1 ++#define CONFIG_BSWAPDSP 0 ++#define CONFIG_CABAC 1 ++#define CONFIG_CBS 0 ++#define CONFIG_CBS_AV1 0 ++#define CONFIG_CBS_H264 0 ++#define CONFIG_CBS_H265 0 ++#define CONFIG_CBS_JPEG 0 ++#define CONFIG_CBS_MPEG2 0 ++#define CONFIG_CBS_VP9 0 ++#define CONFIG_DIRAC_PARSE 1 ++#define CONFIG_DNN 0 ++#define CONFIG_DVPROFILE 0 ++#define CONFIG_EXIF 1 ++#define CONFIG_FAANDCT 0 ++#define CONFIG_FAANIDCT 0 ++#define CONFIG_FDCTDSP 1 ++#define CONFIG_FLACDSP 1 ++#define CONFIG_FMTCONVERT 0 ++#define CONFIG_FRAME_THREAD_ENCODER 0 ++#define CONFIG_G722DSP 0 ++#define CONFIG_GOLOMB 1 ++#define CONFIG_GPLV3 0 ++#define CONFIG_H263DSP 1 ++#define CONFIG_H264CHROMA 1 ++#define CONFIG_H264DSP 1 ++#define CONFIG_H264PARSE 1 ++#define CONFIG_H264PRED 1 ++#define CONFIG_H264QPEL 1 ++#define CONFIG_HEVCPARSE 0 ++#define CONFIG_HPELDSP 1 ++#define CONFIG_HUFFMAN 0 ++#define CONFIG_HUFFYUVDSP 0 ++#define CONFIG_HUFFYUVENCDSP 0 ++#define CONFIG_IDCTDSP 1 ++#define CONFIG_IIRFILTER 0 ++#define CONFIG_MDCT15 1 ++#define CONFIG_INTRAX8 0 ++#define CONFIG_ISO_MEDIA 1 ++#define CONFIG_IVIDSP 0 ++#define CONFIG_JPEGTABLES 0 ++#define CONFIG_LGPLV3 0 ++#define CONFIG_LIBX262 0 ++#define CONFIG_LLAUDDSP 0 ++#define CONFIG_LLVIDDSP 0 ++#define CONFIG_LLVIDENCDSP 0 ++#define CONFIG_LPC 0 ++#define CONFIG_LZF 0 ++#define CONFIG_ME_CMP 1 ++#define CONFIG_MPEG_ER 1 ++#define CONFIG_MPEGAUDIO 1 ++#define CONFIG_MPEGAUDIODSP 1 ++#define CONFIG_MPEGAUDIOHEADER 1 ++#define CONFIG_MPEGVIDEO 1 ++#define CONFIG_MPEGVIDEOENC 0 ++#define CONFIG_MSS34DSP 0 ++#define CONFIG_PIXBLOCKDSP 1 ++#define CONFIG_QPELDSP 1 ++#define CONFIG_QSV 0 ++#define CONFIG_QSVDEC 0 ++#define CONFIG_QSVENC 0 ++#define CONFIG_QSVVPP 0 ++#define CONFIG_RANGECODER 0 ++#define CONFIG_RIFFDEC 1 ++#define CONFIG_RIFFENC 0 ++#define CONFIG_RTPDEC 0 ++#define CONFIG_RTPENC_CHAIN 0 ++#define CONFIG_RV34DSP 0 ++#define CONFIG_SCENE_SAD 0 ++#define CONFIG_SINEWIN 1 ++#define CONFIG_SNAPPY 0 ++#define CONFIG_SRTP 0 ++#define CONFIG_STARTCODE 1 ++#define CONFIG_TEXTUREDSP 0 ++#define CONFIG_TEXTUREDSPENC 0 ++#define CONFIG_TPELDSP 0 ++#define CONFIG_VAAPI_1 0 ++#define CONFIG_VAAPI_ENCODE 0 ++#define CONFIG_VC1DSP 0 ++#define CONFIG_VIDEODSP 1 ++#define CONFIG_VP3DSP 1 ++#define CONFIG_VP56DSP 0 ++#define CONFIG_VP8DSP 1 ++#define CONFIG_WMA_FREQS 0 ++#define CONFIG_WMV2DSP 0 ++#define CONFIG_AAC_ADTSTOASC_BSF 0 ++#define CONFIG_AV1_FRAME_MERGE_BSF 0 ++#define CONFIG_AV1_FRAME_SPLIT_BSF 0 ++#define CONFIG_AV1_METADATA_BSF 0 ++#define CONFIG_CHOMP_BSF 0 ++#define CONFIG_DUMP_EXTRADATA_BSF 0 ++#define CONFIG_DCA_CORE_BSF 0 ++#define CONFIG_EAC3_CORE_BSF 0 ++#define CONFIG_EXTRACT_EXTRADATA_BSF 0 ++#define CONFIG_FILTER_UNITS_BSF 0 ++#define CONFIG_H264_METADATA_BSF 0 ++#define CONFIG_H264_MP4TOANNEXB_BSF 0 ++#define CONFIG_H264_REDUNDANT_PPS_BSF 0 ++#define CONFIG_HAPQA_EXTRACT_BSF 0 ++#define CONFIG_HEVC_METADATA_BSF 0 ++#define CONFIG_HEVC_MP4TOANNEXB_BSF 0 ++#define CONFIG_IMX_DUMP_HEADER_BSF 0 ++#define CONFIG_MJPEG2JPEG_BSF 0 ++#define CONFIG_MJPEGA_DUMP_HEADER_BSF 0 ++#define CONFIG_MP3_HEADER_DECOMPRESS_BSF 0 ++#define CONFIG_MPEG2_METADATA_BSF 0 ++#define CONFIG_MPEG4_UNPACK_BFRAMES_BSF 0 ++#define CONFIG_MOV2TEXTSUB_BSF 0 ++#define CONFIG_NOISE_BSF 0 ++#define CONFIG_NULL_BSF 1 ++#define CONFIG_PRORES_METADATA_BSF 0 ++#define CONFIG_REMOVE_EXTRADATA_BSF 0 ++#define CONFIG_TEXT2MOVSUB_BSF 0 ++#define CONFIG_TRACE_HEADERS_BSF 0 ++#define CONFIG_TRUEHD_CORE_BSF 0 ++#define CONFIG_VP9_METADATA_BSF 0 ++#define CONFIG_VP9_RAW_REORDER_BSF 0 ++#define CONFIG_VP9_SUPERFRAME_BSF 0 ++#define CONFIG_VP9_SUPERFRAME_SPLIT_BSF 0 ++#define CONFIG_AASC_DECODER 0 ++#define CONFIG_AIC_DECODER 0 ++#define CONFIG_ALIAS_PIX_DECODER 0 ++#define CONFIG_AGM_DECODER 0 ++#define CONFIG_AMV_DECODER 0 ++#define CONFIG_ANM_DECODER 0 ++#define CONFIG_ANSI_DECODER 0 ++#define CONFIG_APNG_DECODER 0 ++#define CONFIG_ARBC_DECODER 0 ++#define CONFIG_ASV1_DECODER 0 ++#define CONFIG_ASV2_DECODER 0 ++#define CONFIG_AURA_DECODER 0 ++#define CONFIG_AURA2_DECODER 0 ++#define CONFIG_AVRP_DECODER 0 ++#define CONFIG_AVRN_DECODER 0 ++#define CONFIG_AVS_DECODER 0 ++#define CONFIG_AVUI_DECODER 0 ++#define CONFIG_AYUV_DECODER 0 ++#define CONFIG_BETHSOFTVID_DECODER 0 ++#define CONFIG_BFI_DECODER 0 ++#define CONFIG_BINK_DECODER 0 ++#define CONFIG_BITPACKED_DECODER 0 ++#define CONFIG_BMP_DECODER 0 ++#define CONFIG_BMV_VIDEO_DECODER 0 ++#define CONFIG_BRENDER_PIX_DECODER 0 ++#define CONFIG_C93_DECODER 0 ++#define CONFIG_CAVS_DECODER 0 ++#define CONFIG_CDGRAPHICS_DECODER 0 ++#define CONFIG_CDTOONS_DECODER 0 ++#define CONFIG_CDXL_DECODER 0 ++#define CONFIG_CFHD_DECODER 0 ++#define CONFIG_CINEPAK_DECODER 0 ++#define CONFIG_CLEARVIDEO_DECODER 0 ++#define CONFIG_CLJR_DECODER 0 ++#define CONFIG_CLLC_DECODER 0 ++#define CONFIG_COMFORTNOISE_DECODER 0 ++#define CONFIG_CPIA_DECODER 0 ++#define CONFIG_CSCD_DECODER 0 ++#define CONFIG_CYUV_DECODER 0 ++#define CONFIG_DDS_DECODER 0 ++#define CONFIG_DFA_DECODER 0 ++#define CONFIG_DIRAC_DECODER 0 ++#define CONFIG_DNXHD_DECODER 0 ++#define CONFIG_DPX_DECODER 0 ++#define CONFIG_DSICINVIDEO_DECODER 0 ++#define CONFIG_DVAUDIO_DECODER 0 ++#define CONFIG_DVVIDEO_DECODER 0 ++#define CONFIG_DXA_DECODER 0 ++#define CONFIG_DXTORY_DECODER 0 ++#define CONFIG_DXV_DECODER 0 ++#define CONFIG_EACMV_DECODER 0 ++#define CONFIG_EAMAD_DECODER 0 ++#define CONFIG_EATGQ_DECODER 0 ++#define CONFIG_EATGV_DECODER 0 ++#define CONFIG_EATQI_DECODER 0 ++#define CONFIG_EIGHTBPS_DECODER 0 ++#define CONFIG_EIGHTSVX_EXP_DECODER 0 ++#define CONFIG_EIGHTSVX_FIB_DECODER 0 ++#define CONFIG_ESCAPE124_DECODER 0 ++#define CONFIG_ESCAPE130_DECODER 0 ++#define CONFIG_EXR_DECODER 0 ++#define CONFIG_FFV1_DECODER 0 ++#define CONFIG_FFVHUFF_DECODER 0 ++#define CONFIG_FIC_DECODER 0 ++#define CONFIG_FITS_DECODER 0 ++#define CONFIG_FLASHSV_DECODER 0 ++#define CONFIG_FLASHSV2_DECODER 0 ++#define CONFIG_FLIC_DECODER 0 ++#define CONFIG_FLV_DECODER 0 ++#define CONFIG_FMVC_DECODER 0 ++#define CONFIG_FOURXM_DECODER 0 ++#define CONFIG_FRAPS_DECODER 0 ++#define CONFIG_FRWU_DECODER 0 ++#define CONFIG_G2M_DECODER 0 ++#define CONFIG_GDV_DECODER 0 ++#define CONFIG_GIF_DECODER 0 ++#define CONFIG_H261_DECODER 0 ++#define CONFIG_H263_DECODER 1 ++#define CONFIG_H263I_DECODER 0 ++#define CONFIG_H263P_DECODER 0 ++#define CONFIG_H263_V4L2M2M_DECODER 0 ++#define CONFIG_H264_DECODER 1 ++#define CONFIG_H264_CRYSTALHD_DECODER 0 ++#define CONFIG_H264_V4L2M2M_DECODER 0 ++#define CONFIG_H264_MEDIACODEC_DECODER 0 ++#define CONFIG_H264_MMAL_DECODER 0 ++#define CONFIG_H264_QSV_DECODER 0 ++#define CONFIG_H264_RKMPP_DECODER 0 ++#define CONFIG_HAP_DECODER 0 ++#define CONFIG_HEVC_DECODER 0 ++#define CONFIG_HEVC_QSV_DECODER 0 ++#define CONFIG_HEVC_RKMPP_DECODER 0 ++#define CONFIG_HEVC_V4L2M2M_DECODER 0 ++#define CONFIG_HNM4_VIDEO_DECODER 0 ++#define CONFIG_HQ_HQA_DECODER 0 ++#define CONFIG_HQX_DECODER 0 ++#define CONFIG_HUFFYUV_DECODER 0 ++#define CONFIG_HYMT_DECODER 0 ++#define CONFIG_IDCIN_DECODER 0 ++#define CONFIG_IFF_ILBM_DECODER 0 ++#define CONFIG_IMM4_DECODER 0 ++#define CONFIG_IMM5_DECODER 0 ++#define CONFIG_INDEO2_DECODER 0 ++#define CONFIG_INDEO3_DECODER 0 ++#define CONFIG_INDEO4_DECODER 0 ++#define CONFIG_INDEO5_DECODER 0 ++#define CONFIG_INTERPLAY_VIDEO_DECODER 0 ++#define CONFIG_JPEG2000_DECODER 0 ++#define CONFIG_JPEGLS_DECODER 0 ++#define CONFIG_JV_DECODER 0 ++#define CONFIG_KGV1_DECODER 0 ++#define CONFIG_KMVC_DECODER 0 ++#define CONFIG_LAGARITH_DECODER 0 ++#define CONFIG_LOCO_DECODER 0 ++#define CONFIG_LSCR_DECODER 0 ++#define CONFIG_M101_DECODER 0 ++#define CONFIG_MAGICYUV_DECODER 0 ++#define CONFIG_MDEC_DECODER 0 ++#define CONFIG_MIMIC_DECODER 0 ++#define CONFIG_MJPEG_DECODER 0 ++#define CONFIG_MJPEGB_DECODER 0 ++#define CONFIG_MMVIDEO_DECODER 0 ++#define CONFIG_MOTIONPIXELS_DECODER 0 ++#define CONFIG_MPEG1VIDEO_DECODER 0 ++#define CONFIG_MPEG2VIDEO_DECODER 0 ++#define CONFIG_MPEG4_DECODER 1 ++#define CONFIG_MPEG4_CRYSTALHD_DECODER 0 ++#define CONFIG_MPEG4_V4L2M2M_DECODER 0 ++#define CONFIG_MPEG4_MMAL_DECODER 0 ++#define CONFIG_MPEGVIDEO_DECODER 0 ++#define CONFIG_MPEG1_V4L2M2M_DECODER 0 ++#define CONFIG_MPEG2_MMAL_DECODER 0 ++#define CONFIG_MPEG2_CRYSTALHD_DECODER 0 ++#define CONFIG_MPEG2_V4L2M2M_DECODER 0 ++#define CONFIG_MPEG2_QSV_DECODER 0 ++#define CONFIG_MPEG2_MEDIACODEC_DECODER 0 ++#define CONFIG_MSA1_DECODER 0 ++#define CONFIG_MSCC_DECODER 0 ++#define CONFIG_MSMPEG4V1_DECODER 0 ++#define CONFIG_MSMPEG4V2_DECODER 0 ++#define CONFIG_MSMPEG4V3_DECODER 0 ++#define CONFIG_MSMPEG4_CRYSTALHD_DECODER 0 ++#define CONFIG_MSRLE_DECODER 0 ++#define CONFIG_MSS1_DECODER 0 ++#define CONFIG_MSS2_DECODER 0 ++#define CONFIG_MSVIDEO1_DECODER 0 ++#define CONFIG_MSZH_DECODER 0 ++#define CONFIG_MTS2_DECODER 0 ++#define CONFIG_MV30_DECODER 0 ++#define CONFIG_MVC1_DECODER 0 ++#define CONFIG_MVC2_DECODER 0 ++#define CONFIG_MVDV_DECODER 0 ++#define CONFIG_MVHA_DECODER 0 ++#define CONFIG_MWSC_DECODER 0 ++#define CONFIG_MXPEG_DECODER 0 ++#define CONFIG_NUV_DECODER 0 ++#define CONFIG_PAF_VIDEO_DECODER 0 ++#define CONFIG_PAM_DECODER 0 ++#define CONFIG_PBM_DECODER 0 ++#define CONFIG_PCX_DECODER 0 ++#define CONFIG_PGM_DECODER 0 ++#define CONFIG_PGMYUV_DECODER 0 ++#define CONFIG_PICTOR_DECODER 0 ++#define CONFIG_PIXLET_DECODER 0 ++#define CONFIG_PNG_DECODER 0 ++#define CONFIG_PPM_DECODER 0 ++#define CONFIG_PRORES_DECODER 0 ++#define CONFIG_PROSUMER_DECODER 0 ++#define CONFIG_PSD_DECODER 0 ++#define CONFIG_PTX_DECODER 0 ++#define CONFIG_QDRAW_DECODER 0 ++#define CONFIG_QPEG_DECODER 0 ++#define CONFIG_QTRLE_DECODER 0 ++#define CONFIG_R10K_DECODER 0 ++#define CONFIG_R210_DECODER 0 ++#define CONFIG_RASC_DECODER 0 ++#define CONFIG_RAWVIDEO_DECODER 0 ++#define CONFIG_RL2_DECODER 0 ++#define CONFIG_ROQ_DECODER 0 ++#define CONFIG_RPZA_DECODER 0 ++#define CONFIG_RSCC_DECODER 0 ++#define CONFIG_RV10_DECODER 0 ++#define CONFIG_RV20_DECODER 0 ++#define CONFIG_RV30_DECODER 0 ++#define CONFIG_RV40_DECODER 0 ++#define CONFIG_S302M_DECODER 0 ++#define CONFIG_SANM_DECODER 0 ++#define CONFIG_SCPR_DECODER 0 ++#define CONFIG_SCREENPRESSO_DECODER 0 ++#define CONFIG_SGI_DECODER 0 ++#define CONFIG_SGIRLE_DECODER 0 ++#define CONFIG_SHEERVIDEO_DECODER 0 ++#define CONFIG_SMACKER_DECODER 0 ++#define CONFIG_SMC_DECODER 0 ++#define CONFIG_SMVJPEG_DECODER 0 ++#define CONFIG_SNOW_DECODER 0 ++#define CONFIG_SP5X_DECODER 0 ++#define CONFIG_SPEEDHQ_DECODER 0 ++#define CONFIG_SRGC_DECODER 0 ++#define CONFIG_SUNRAST_DECODER 0 ++#define CONFIG_SVQ1_DECODER 0 ++#define CONFIG_SVQ3_DECODER 0 ++#define CONFIG_TARGA_DECODER 0 ++#define CONFIG_TARGA_Y216_DECODER 0 ++#define CONFIG_TDSC_DECODER 0 ++#define CONFIG_THEORA_DECODER 1 ++#define CONFIG_THP_DECODER 0 ++#define CONFIG_TIERTEXSEQVIDEO_DECODER 0 ++#define CONFIG_TIFF_DECODER 0 ++#define CONFIG_TMV_DECODER 0 ++#define CONFIG_TRUEMOTION1_DECODER 0 ++#define CONFIG_TRUEMOTION2_DECODER 0 ++#define CONFIG_TRUEMOTION2RT_DECODER 0 ++#define CONFIG_TSCC_DECODER 0 ++#define CONFIG_TSCC2_DECODER 0 ++#define CONFIG_TXD_DECODER 0 ++#define CONFIG_ULTI_DECODER 0 ++#define CONFIG_UTVIDEO_DECODER 0 ++#define CONFIG_V210_DECODER 0 ++#define CONFIG_V210X_DECODER 0 ++#define CONFIG_V308_DECODER 0 ++#define CONFIG_V408_DECODER 0 ++#define CONFIG_V410_DECODER 0 ++#define CONFIG_VB_DECODER 0 ++#define CONFIG_VBLE_DECODER 0 ++#define CONFIG_VC1_DECODER 0 ++#define CONFIG_VC1_CRYSTALHD_DECODER 0 ++#define CONFIG_VC1IMAGE_DECODER 0 ++#define CONFIG_VC1_MMAL_DECODER 0 ++#define CONFIG_VC1_QSV_DECODER 0 ++#define CONFIG_VC1_V4L2M2M_DECODER 0 ++#define CONFIG_VCR1_DECODER 0 ++#define CONFIG_VMDVIDEO_DECODER 0 ++#define CONFIG_VMNC_DECODER 0 ++#define CONFIG_VP3_DECODER 1 ++#define CONFIG_VP4_DECODER 0 ++#define CONFIG_VP5_DECODER 0 ++#define CONFIG_VP6_DECODER 0 ++#define CONFIG_VP6A_DECODER 0 ++#define CONFIG_VP6F_DECODER 0 ++#define CONFIG_VP7_DECODER 0 ++#define CONFIG_VP8_DECODER 1 ++#define CONFIG_VP8_RKMPP_DECODER 0 ++#define CONFIG_VP8_V4L2M2M_DECODER 0 ++#define CONFIG_VP9_DECODER 0 ++#define CONFIG_VP9_RKMPP_DECODER 0 ++#define CONFIG_VP9_V4L2M2M_DECODER 0 ++#define CONFIG_VQA_DECODER 0 ++#define CONFIG_WEBP_DECODER 0 ++#define CONFIG_WCMV_DECODER 0 ++#define CONFIG_WRAPPED_AVFRAME_DECODER 0 ++#define CONFIG_WMV1_DECODER 0 ++#define CONFIG_WMV2_DECODER 0 ++#define CONFIG_WMV3_DECODER 0 ++#define CONFIG_WMV3_CRYSTALHD_DECODER 0 ++#define CONFIG_WMV3IMAGE_DECODER 0 ++#define CONFIG_WNV1_DECODER 0 ++#define CONFIG_XAN_WC3_DECODER 0 ++#define CONFIG_XAN_WC4_DECODER 0 ++#define CONFIG_XBM_DECODER 0 ++#define CONFIG_XFACE_DECODER 0 ++#define CONFIG_XL_DECODER 0 ++#define CONFIG_XPM_DECODER 0 ++#define CONFIG_XWD_DECODER 0 ++#define CONFIG_Y41P_DECODER 0 ++#define CONFIG_YLC_DECODER 0 ++#define CONFIG_YOP_DECODER 0 ++#define CONFIG_YUV4_DECODER 0 ++#define CONFIG_ZERO12V_DECODER 0 ++#define CONFIG_ZEROCODEC_DECODER 0 ++#define CONFIG_ZLIB_DECODER 0 ++#define CONFIG_ZMBV_DECODER 0 ++#define CONFIG_AAC_DECODER 1 ++#define CONFIG_AAC_FIXED_DECODER 0 ++#define CONFIG_AAC_LATM_DECODER 0 ++#define CONFIG_AC3_DECODER 0 ++#define CONFIG_AC3_FIXED_DECODER 0 ++#define CONFIG_ACELP_KELVIN_DECODER 0 ++#define CONFIG_ALAC_DECODER 0 ++#define CONFIG_ALS_DECODER 0 ++#define CONFIG_AMRNB_DECODER 1 ++#define CONFIG_AMRWB_DECODER 1 ++#define CONFIG_APE_DECODER 0 ++#define CONFIG_APTX_DECODER 0 ++#define CONFIG_APTX_HD_DECODER 0 ++#define CONFIG_ATRAC1_DECODER 0 ++#define CONFIG_ATRAC3_DECODER 0 ++#define CONFIG_ATRAC3AL_DECODER 0 ++#define CONFIG_ATRAC3P_DECODER 0 ++#define CONFIG_ATRAC3PAL_DECODER 0 ++#define CONFIG_ATRAC9_DECODER 0 ++#define CONFIG_BINKAUDIO_DCT_DECODER 0 ++#define CONFIG_BINKAUDIO_RDFT_DECODER 0 ++#define CONFIG_BMV_AUDIO_DECODER 0 ++#define CONFIG_COOK_DECODER 0 ++#define CONFIG_DCA_DECODER 0 ++#define CONFIG_DOLBY_E_DECODER 0 ++#define CONFIG_DSD_LSBF_DECODER 0 ++#define CONFIG_DSD_MSBF_DECODER 0 ++#define CONFIG_DSD_LSBF_PLANAR_DECODER 0 ++#define CONFIG_DSD_MSBF_PLANAR_DECODER 0 ++#define CONFIG_DSICINAUDIO_DECODER 0 ++#define CONFIG_DSS_SP_DECODER 0 ++#define CONFIG_DST_DECODER 0 ++#define CONFIG_EAC3_DECODER 0 ++#define CONFIG_EVRC_DECODER 0 ++#define CONFIG_FFWAVESYNTH_DECODER 0 ++#define CONFIG_FLAC_DECODER 1 ++#define CONFIG_G723_1_DECODER 0 ++#define CONFIG_G729_DECODER 0 ++#define CONFIG_GSM_DECODER 0 ++#define CONFIG_GSM_MS_DECODER 1 ++#define CONFIG_HCA_DECODER 0 ++#define CONFIG_HCOM_DECODER 0 ++#define CONFIG_IAC_DECODER 0 ++#define CONFIG_ILBC_DECODER 0 ++#define CONFIG_IMC_DECODER 0 ++#define CONFIG_INTERPLAY_ACM_DECODER 0 ++#define CONFIG_MACE3_DECODER 0 ++#define CONFIG_MACE6_DECODER 0 ++#define CONFIG_METASOUND_DECODER 0 ++#define CONFIG_MLP_DECODER 0 ++#define CONFIG_MP1_DECODER 0 ++#define CONFIG_MP1FLOAT_DECODER 0 ++#define CONFIG_MP2_DECODER 0 ++#define CONFIG_MP2FLOAT_DECODER 0 ++#define CONFIG_MP3FLOAT_DECODER 0 ++#define CONFIG_MP3_DECODER 1 ++#define CONFIG_MP3ADUFLOAT_DECODER 0 ++#define CONFIG_MP3ADU_DECODER 0 ++#define CONFIG_MP3ON4FLOAT_DECODER 0 ++#define CONFIG_MP3ON4_DECODER 0 ++#define CONFIG_MPC7_DECODER 0 ++#define CONFIG_MPC8_DECODER 0 ++#define CONFIG_NELLYMOSER_DECODER 0 ++#define CONFIG_ON2AVC_DECODER 0 ++#define CONFIG_OPUS_DECODER 0 ++#define CONFIG_PAF_AUDIO_DECODER 0 ++#define CONFIG_QCELP_DECODER 0 ++#define CONFIG_QDM2_DECODER 0 ++#define CONFIG_QDMC_DECODER 0 ++#define CONFIG_RA_144_DECODER 0 ++#define CONFIG_RA_288_DECODER 0 ++#define CONFIG_RALF_DECODER 0 ++#define CONFIG_SBC_DECODER 0 ++#define CONFIG_SHORTEN_DECODER 0 ++#define CONFIG_SIPR_DECODER 0 ++#define CONFIG_SIREN_DECODER 0 ++#define CONFIG_SMACKAUD_DECODER 0 ++#define CONFIG_SONIC_DECODER 0 ++#define CONFIG_TAK_DECODER 0 ++#define CONFIG_TRUEHD_DECODER 0 ++#define CONFIG_TRUESPEECH_DECODER 0 ++#define CONFIG_TTA_DECODER 0 ++#define CONFIG_TWINVQ_DECODER 0 ++#define CONFIG_VMDAUDIO_DECODER 0 ++#define CONFIG_VORBIS_DECODER 1 ++#define CONFIG_WAVPACK_DECODER 0 ++#define CONFIG_WMALOSSLESS_DECODER 0 ++#define CONFIG_WMAPRO_DECODER 0 ++#define CONFIG_WMAV1_DECODER 0 ++#define CONFIG_WMAV2_DECODER 0 ++#define CONFIG_WMAVOICE_DECODER 0 ++#define CONFIG_WS_SND1_DECODER 0 ++#define CONFIG_XMA1_DECODER 0 ++#define CONFIG_XMA2_DECODER 0 ++#define CONFIG_PCM_ALAW_DECODER 1 ++#define CONFIG_PCM_BLURAY_DECODER 0 ++#define CONFIG_PCM_DVD_DECODER 0 ++#define CONFIG_PCM_F16LE_DECODER 0 ++#define CONFIG_PCM_F24LE_DECODER 0 ++#define CONFIG_PCM_F32BE_DECODER 0 ++#define CONFIG_PCM_F32LE_DECODER 1 ++#define CONFIG_PCM_F64BE_DECODER 0 ++#define CONFIG_PCM_F64LE_DECODER 0 ++#define CONFIG_PCM_LXF_DECODER 0 ++#define CONFIG_PCM_MULAW_DECODER 1 ++#define CONFIG_PCM_S8_DECODER 0 ++#define CONFIG_PCM_S8_PLANAR_DECODER 0 ++#define CONFIG_PCM_S16BE_DECODER 1 ++#define CONFIG_PCM_S16BE_PLANAR_DECODER 0 ++#define CONFIG_PCM_S16LE_DECODER 1 ++#define CONFIG_PCM_S16LE_PLANAR_DECODER 0 ++#define CONFIG_PCM_S24BE_DECODER 1 ++#define CONFIG_PCM_S24DAUD_DECODER 0 ++#define CONFIG_PCM_S24LE_DECODER 1 ++#define CONFIG_PCM_S24LE_PLANAR_DECODER 0 ++#define CONFIG_PCM_S32BE_DECODER 0 ++#define CONFIG_PCM_S32LE_DECODER 1 ++#define CONFIG_PCM_S32LE_PLANAR_DECODER 0 ++#define CONFIG_PCM_S64BE_DECODER 0 ++#define CONFIG_PCM_S64LE_DECODER 0 ++#define CONFIG_PCM_U8_DECODER 1 ++#define CONFIG_PCM_U16BE_DECODER 0 ++#define CONFIG_PCM_U16LE_DECODER 0 ++#define CONFIG_PCM_U24BE_DECODER 0 ++#define CONFIG_PCM_U24LE_DECODER 0 ++#define CONFIG_PCM_U32BE_DECODER 0 ++#define CONFIG_PCM_U32LE_DECODER 0 ++#define CONFIG_PCM_VIDC_DECODER 0 ++#define CONFIG_DERF_DPCM_DECODER 0 ++#define CONFIG_GREMLIN_DPCM_DECODER 0 ++#define CONFIG_INTERPLAY_DPCM_DECODER 0 ++#define CONFIG_ROQ_DPCM_DECODER 0 ++#define CONFIG_SDX2_DPCM_DECODER 0 ++#define CONFIG_SOL_DPCM_DECODER 0 ++#define CONFIG_XAN_DPCM_DECODER 0 ++#define CONFIG_ADPCM_4XM_DECODER 0 ++#define CONFIG_ADPCM_ADX_DECODER 0 ++#define CONFIG_ADPCM_AFC_DECODER 0 ++#define CONFIG_ADPCM_AGM_DECODER 0 ++#define CONFIG_ADPCM_AICA_DECODER 0 ++#define CONFIG_ADPCM_ARGO_DECODER 0 ++#define CONFIG_ADPCM_CT_DECODER 0 ++#define CONFIG_ADPCM_DTK_DECODER 0 ++#define CONFIG_ADPCM_EA_DECODER 0 ++#define CONFIG_ADPCM_EA_MAXIS_XA_DECODER 0 ++#define CONFIG_ADPCM_EA_R1_DECODER 0 ++#define CONFIG_ADPCM_EA_R2_DECODER 0 ++#define CONFIG_ADPCM_EA_R3_DECODER 0 ++#define CONFIG_ADPCM_EA_XAS_DECODER 0 ++#define CONFIG_ADPCM_G722_DECODER 0 ++#define CONFIG_ADPCM_G726_DECODER 0 ++#define CONFIG_ADPCM_G726LE_DECODER 0 ++#define CONFIG_ADPCM_IMA_AMV_DECODER 0 ++#define CONFIG_ADPCM_IMA_ALP_DECODER 0 ++#define CONFIG_ADPCM_IMA_APC_DECODER 0 ++#define CONFIG_ADPCM_IMA_APM_DECODER 0 ++#define CONFIG_ADPCM_IMA_CUNNING_DECODER 0 ++#define CONFIG_ADPCM_IMA_DAT4_DECODER 0 ++#define CONFIG_ADPCM_IMA_DK3_DECODER 0 ++#define CONFIG_ADPCM_IMA_DK4_DECODER 0 ++#define CONFIG_ADPCM_IMA_EA_EACS_DECODER 0 ++#define CONFIG_ADPCM_IMA_EA_SEAD_DECODER 0 ++#define CONFIG_ADPCM_IMA_ISS_DECODER 0 ++#define CONFIG_ADPCM_IMA_MTF_DECODER 0 ++#define CONFIG_ADPCM_IMA_OKI_DECODER 0 ++#define CONFIG_ADPCM_IMA_QT_DECODER 0 ++#define CONFIG_ADPCM_IMA_RAD_DECODER 0 ++#define CONFIG_ADPCM_IMA_SSI_DECODER 0 ++#define CONFIG_ADPCM_IMA_SMJPEG_DECODER 0 ++#define CONFIG_ADPCM_IMA_WAV_DECODER 0 ++#define CONFIG_ADPCM_IMA_WS_DECODER 0 ++#define CONFIG_ADPCM_MS_DECODER 0 ++#define CONFIG_ADPCM_MTAF_DECODER 0 ++#define CONFIG_ADPCM_PSX_DECODER 0 ++#define CONFIG_ADPCM_SBPRO_2_DECODER 0 ++#define CONFIG_ADPCM_SBPRO_3_DECODER 0 ++#define CONFIG_ADPCM_SBPRO_4_DECODER 0 ++#define CONFIG_ADPCM_SWF_DECODER 0 ++#define CONFIG_ADPCM_THP_DECODER 0 ++#define CONFIG_ADPCM_THP_LE_DECODER 0 ++#define CONFIG_ADPCM_VIMA_DECODER 0 ++#define CONFIG_ADPCM_XA_DECODER 0 ++#define CONFIG_ADPCM_YAMAHA_DECODER 0 ++#define CONFIG_ADPCM_ZORK_DECODER 0 ++#define CONFIG_SSA_DECODER 0 ++#define CONFIG_ASS_DECODER 0 ++#define CONFIG_CCAPTION_DECODER 0 ++#define CONFIG_DVBSUB_DECODER 0 ++#define CONFIG_DVDSUB_DECODER 0 ++#define CONFIG_JACOSUB_DECODER 0 ++#define CONFIG_MICRODVD_DECODER 0 ++#define CONFIG_MOVTEXT_DECODER 0 ++#define CONFIG_MPL2_DECODER 0 ++#define CONFIG_PGSSUB_DECODER 0 ++#define CONFIG_PJS_DECODER 0 ++#define CONFIG_REALTEXT_DECODER 0 ++#define CONFIG_SAMI_DECODER 0 ++#define CONFIG_SRT_DECODER 0 ++#define CONFIG_STL_DECODER 0 ++#define CONFIG_SUBRIP_DECODER 0 ++#define CONFIG_SUBVIEWER_DECODER 0 ++#define CONFIG_SUBVIEWER1_DECODER 0 ++#define CONFIG_TEXT_DECODER 0 ++#define CONFIG_VPLAYER_DECODER 0 ++#define CONFIG_WEBVTT_DECODER 0 ++#define CONFIG_XSUB_DECODER 0 ++#define CONFIG_AAC_AT_DECODER 0 ++#define CONFIG_AC3_AT_DECODER 0 ++#define CONFIG_ADPCM_IMA_QT_AT_DECODER 0 ++#define CONFIG_ALAC_AT_DECODER 0 ++#define CONFIG_AMR_NB_AT_DECODER 0 ++#define CONFIG_EAC3_AT_DECODER 0 ++#define CONFIG_GSM_MS_AT_DECODER 0 ++#define CONFIG_ILBC_AT_DECODER 0 ++#define CONFIG_MP1_AT_DECODER 0 ++#define CONFIG_MP2_AT_DECODER 0 ++#define CONFIG_MP3_AT_DECODER 0 ++#define CONFIG_PCM_ALAW_AT_DECODER 0 ++#define CONFIG_PCM_MULAW_AT_DECODER 0 ++#define CONFIG_QDMC_AT_DECODER 0 ++#define CONFIG_QDM2_AT_DECODER 0 ++#define CONFIG_LIBARIBB24_DECODER 0 ++#define CONFIG_LIBCELT_DECODER 0 ++#define CONFIG_LIBCODEC2_DECODER 0 ++#define CONFIG_LIBDAV1D_DECODER 0 ++#define CONFIG_LIBDAVS2_DECODER 0 ++#define CONFIG_LIBFDK_AAC_DECODER 0 ++#define CONFIG_LIBGSM_DECODER 0 ++#define CONFIG_LIBGSM_MS_DECODER 0 ++#define CONFIG_LIBILBC_DECODER 0 ++#define CONFIG_LIBOPENCORE_AMRNB_DECODER 0 ++#define CONFIG_LIBOPENCORE_AMRWB_DECODER 0 ++#define CONFIG_LIBOPENJPEG_DECODER 0 ++#define CONFIG_LIBOPUS_DECODER 1 ++#define CONFIG_LIBRSVG_DECODER 0 ++#define CONFIG_LIBSPEEX_DECODER 0 ++#define CONFIG_LIBVORBIS_DECODER 0 ++#define CONFIG_LIBVPX_VP8_DECODER 0 ++#define CONFIG_LIBVPX_VP9_DECODER 0 ++#define CONFIG_LIBZVBI_TELETEXT_DECODER 0 ++#define CONFIG_BINTEXT_DECODER 0 ++#define CONFIG_XBIN_DECODER 0 ++#define CONFIG_IDF_DECODER 0 ++#define CONFIG_LIBAOM_AV1_DECODER 0 ++#define CONFIG_LIBOPENH264_DECODER 0 ++#define CONFIG_H264_CUVID_DECODER 0 ++#define CONFIG_HEVC_CUVID_DECODER 0 ++#define CONFIG_HEVC_MEDIACODEC_DECODER 0 ++#define CONFIG_MJPEG_CUVID_DECODER 0 ++#define CONFIG_MJPEG_QSV_DECODER 0 ++#define CONFIG_MPEG1_CUVID_DECODER 0 ++#define CONFIG_MPEG2_CUVID_DECODER 0 ++#define CONFIG_MPEG4_CUVID_DECODER 0 ++#define CONFIG_MPEG4_MEDIACODEC_DECODER 0 ++#define CONFIG_VC1_CUVID_DECODER 0 ++#define CONFIG_VP8_CUVID_DECODER 0 ++#define CONFIG_VP8_MEDIACODEC_DECODER 0 ++#define CONFIG_VP8_QSV_DECODER 0 ++#define CONFIG_VP9_CUVID_DECODER 0 ++#define CONFIG_VP9_MEDIACODEC_DECODER 0 ++#define CONFIG_VP9_QSV_DECODER 0 ++#define CONFIG_A64MULTI_ENCODER 0 ++#define CONFIG_A64MULTI5_ENCODER 0 ++#define CONFIG_ALIAS_PIX_ENCODER 0 ++#define CONFIG_AMV_ENCODER 0 ++#define CONFIG_APNG_ENCODER 0 ++#define CONFIG_ASV1_ENCODER 0 ++#define CONFIG_ASV2_ENCODER 0 ++#define CONFIG_AVRP_ENCODER 0 ++#define CONFIG_AVUI_ENCODER 0 ++#define CONFIG_AYUV_ENCODER 0 ++#define CONFIG_BMP_ENCODER 0 ++#define CONFIG_CINEPAK_ENCODER 0 ++#define CONFIG_CLJR_ENCODER 0 ++#define CONFIG_COMFORTNOISE_ENCODER 0 ++#define CONFIG_DNXHD_ENCODER 0 ++#define CONFIG_DPX_ENCODER 0 ++#define CONFIG_DVVIDEO_ENCODER 0 ++#define CONFIG_FFV1_ENCODER 0 ++#define CONFIG_FFVHUFF_ENCODER 0 ++#define CONFIG_FITS_ENCODER 0 ++#define CONFIG_FLASHSV_ENCODER 0 ++#define CONFIG_FLASHSV2_ENCODER 0 ++#define CONFIG_FLV_ENCODER 0 ++#define CONFIG_GIF_ENCODER 0 ++#define CONFIG_H261_ENCODER 0 ++#define CONFIG_H263_ENCODER 0 ++#define CONFIG_H263P_ENCODER 0 ++#define CONFIG_HAP_ENCODER 0 ++#define CONFIG_HUFFYUV_ENCODER 0 ++#define CONFIG_JPEG2000_ENCODER 0 ++#define CONFIG_JPEGLS_ENCODER 0 ++#define CONFIG_LJPEG_ENCODER 0 ++#define CONFIG_MAGICYUV_ENCODER 0 ++#define CONFIG_MJPEG_ENCODER 0 ++#define CONFIG_MPEG1VIDEO_ENCODER 0 ++#define CONFIG_MPEG2VIDEO_ENCODER 0 ++#define CONFIG_MPEG4_ENCODER 0 ++#define CONFIG_MSMPEG4V2_ENCODER 0 ++#define CONFIG_MSMPEG4V3_ENCODER 0 ++#define CONFIG_MSVIDEO1_ENCODER 0 ++#define CONFIG_PAM_ENCODER 0 ++#define CONFIG_PBM_ENCODER 0 ++#define CONFIG_PCX_ENCODER 0 ++#define CONFIG_PGM_ENCODER 0 ++#define CONFIG_PGMYUV_ENCODER 0 ++#define CONFIG_PNG_ENCODER 0 ++#define CONFIG_PPM_ENCODER 0 ++#define CONFIG_PRORES_ENCODER 0 ++#define CONFIG_PRORES_AW_ENCODER 0 ++#define CONFIG_PRORES_KS_ENCODER 0 ++#define CONFIG_QTRLE_ENCODER 0 ++#define CONFIG_R10K_ENCODER 0 ++#define CONFIG_R210_ENCODER 0 ++#define CONFIG_RAWVIDEO_ENCODER 0 ++#define CONFIG_ROQ_ENCODER 0 ++#define CONFIG_RV10_ENCODER 0 ++#define CONFIG_RV20_ENCODER 0 ++#define CONFIG_S302M_ENCODER 0 ++#define CONFIG_SGI_ENCODER 0 ++#define CONFIG_SNOW_ENCODER 0 ++#define CONFIG_SUNRAST_ENCODER 0 ++#define CONFIG_SVQ1_ENCODER 0 ++#define CONFIG_TARGA_ENCODER 0 ++#define CONFIG_TIFF_ENCODER 0 ++#define CONFIG_UTVIDEO_ENCODER 0 ++#define CONFIG_V210_ENCODER 0 ++#define CONFIG_V308_ENCODER 0 ++#define CONFIG_V408_ENCODER 0 ++#define CONFIG_V410_ENCODER 0 ++#define CONFIG_VC2_ENCODER 0 ++#define CONFIG_WRAPPED_AVFRAME_ENCODER 0 ++#define CONFIG_WMV1_ENCODER 0 ++#define CONFIG_WMV2_ENCODER 0 ++#define CONFIG_XBM_ENCODER 0 ++#define CONFIG_XFACE_ENCODER 0 ++#define CONFIG_XWD_ENCODER 0 ++#define CONFIG_Y41P_ENCODER 0 ++#define CONFIG_YUV4_ENCODER 0 ++#define CONFIG_ZLIB_ENCODER 0 ++#define CONFIG_ZMBV_ENCODER 0 ++#define CONFIG_AAC_ENCODER 0 ++#define CONFIG_AC3_ENCODER 0 ++#define CONFIG_AC3_FIXED_ENCODER 0 ++#define CONFIG_ALAC_ENCODER 0 ++#define CONFIG_APTX_ENCODER 0 ++#define CONFIG_APTX_HD_ENCODER 0 ++#define CONFIG_DCA_ENCODER 0 ++#define CONFIG_EAC3_ENCODER 0 ++#define CONFIG_FLAC_ENCODER 0 ++#define CONFIG_G723_1_ENCODER 0 ++#define CONFIG_MLP_ENCODER 0 ++#define CONFIG_MP2_ENCODER 0 ++#define CONFIG_MP2FIXED_ENCODER 0 ++#define CONFIG_NELLYMOSER_ENCODER 0 ++#define CONFIG_OPUS_ENCODER 0 ++#define CONFIG_RA_144_ENCODER 0 ++#define CONFIG_SBC_ENCODER 0 ++#define CONFIG_SONIC_ENCODER 0 ++#define CONFIG_SONIC_LS_ENCODER 0 ++#define CONFIG_TRUEHD_ENCODER 0 ++#define CONFIG_TTA_ENCODER 0 ++#define CONFIG_VORBIS_ENCODER 0 ++#define CONFIG_WAVPACK_ENCODER 0 ++#define CONFIG_WMAV1_ENCODER 0 ++#define CONFIG_WMAV2_ENCODER 0 ++#define CONFIG_PCM_ALAW_ENCODER 0 ++#define CONFIG_PCM_DVD_ENCODER 0 ++#define CONFIG_PCM_F32BE_ENCODER 0 ++#define CONFIG_PCM_F32LE_ENCODER 0 ++#define CONFIG_PCM_F64BE_ENCODER 0 ++#define CONFIG_PCM_F64LE_ENCODER 0 ++#define CONFIG_PCM_MULAW_ENCODER 0 ++#define CONFIG_PCM_S8_ENCODER 0 ++#define CONFIG_PCM_S8_PLANAR_ENCODER 0 ++#define CONFIG_PCM_S16BE_ENCODER 0 ++#define CONFIG_PCM_S16BE_PLANAR_ENCODER 0 ++#define CONFIG_PCM_S16LE_ENCODER 0 ++#define CONFIG_PCM_S16LE_PLANAR_ENCODER 0 ++#define CONFIG_PCM_S24BE_ENCODER 0 ++#define CONFIG_PCM_S24DAUD_ENCODER 0 ++#define CONFIG_PCM_S24LE_ENCODER 0 ++#define CONFIG_PCM_S24LE_PLANAR_ENCODER 0 ++#define CONFIG_PCM_S32BE_ENCODER 0 ++#define CONFIG_PCM_S32LE_ENCODER 0 ++#define CONFIG_PCM_S32LE_PLANAR_ENCODER 0 ++#define CONFIG_PCM_S64BE_ENCODER 0 ++#define CONFIG_PCM_S64LE_ENCODER 0 ++#define CONFIG_PCM_U8_ENCODER 0 ++#define CONFIG_PCM_U16BE_ENCODER 0 ++#define CONFIG_PCM_U16LE_ENCODER 0 ++#define CONFIG_PCM_U24BE_ENCODER 0 ++#define CONFIG_PCM_U24LE_ENCODER 0 ++#define CONFIG_PCM_U32BE_ENCODER 0 ++#define CONFIG_PCM_U32LE_ENCODER 0 ++#define CONFIG_PCM_VIDC_ENCODER 0 ++#define CONFIG_ROQ_DPCM_ENCODER 0 ++#define CONFIG_ADPCM_ADX_ENCODER 0 ++#define CONFIG_ADPCM_G722_ENCODER 0 ++#define CONFIG_ADPCM_G726_ENCODER 0 ++#define CONFIG_ADPCM_G726LE_ENCODER 0 ++#define CONFIG_ADPCM_IMA_QT_ENCODER 0 ++#define CONFIG_ADPCM_IMA_WAV_ENCODER 0 ++#define CONFIG_ADPCM_MS_ENCODER 0 ++#define CONFIG_ADPCM_SWF_ENCODER 0 ++#define CONFIG_ADPCM_YAMAHA_ENCODER 0 ++#define CONFIG_SSA_ENCODER 0 ++#define CONFIG_ASS_ENCODER 0 ++#define CONFIG_DVBSUB_ENCODER 0 ++#define CONFIG_DVDSUB_ENCODER 0 ++#define CONFIG_MOVTEXT_ENCODER 0 ++#define CONFIG_SRT_ENCODER 0 ++#define CONFIG_SUBRIP_ENCODER 0 ++#define CONFIG_TEXT_ENCODER 0 ++#define CONFIG_WEBVTT_ENCODER 0 ++#define CONFIG_XSUB_ENCODER 0 ++#define CONFIG_AAC_AT_ENCODER 0 ++#define CONFIG_ALAC_AT_ENCODER 0 ++#define CONFIG_ILBC_AT_ENCODER 0 ++#define CONFIG_PCM_ALAW_AT_ENCODER 0 ++#define CONFIG_PCM_MULAW_AT_ENCODER 0 ++#define CONFIG_LIBAOM_AV1_ENCODER 0 ++#define CONFIG_LIBCODEC2_ENCODER 0 ++#define CONFIG_LIBFDK_AAC_ENCODER 0 ++#define CONFIG_LIBGSM_ENCODER 0 ++#define CONFIG_LIBGSM_MS_ENCODER 0 ++#define CONFIG_LIBILBC_ENCODER 0 ++#define CONFIG_LIBMP3LAME_ENCODER 0 ++#define CONFIG_LIBOPENCORE_AMRNB_ENCODER 0 ++#define CONFIG_LIBOPENJPEG_ENCODER 0 ++#define CONFIG_LIBOPUS_ENCODER 0 ++#define CONFIG_LIBRAV1E_ENCODER 0 ++#define CONFIG_LIBSHINE_ENCODER 0 ++#define CONFIG_LIBSPEEX_ENCODER 0 ++#define CONFIG_LIBTHEORA_ENCODER 0 ++#define CONFIG_LIBTWOLAME_ENCODER 0 ++#define CONFIG_LIBVO_AMRWBENC_ENCODER 0 ++#define CONFIG_LIBVORBIS_ENCODER 0 ++#define CONFIG_LIBVPX_VP8_ENCODER 0 ++#define CONFIG_LIBVPX_VP9_ENCODER 0 ++#define CONFIG_LIBWAVPACK_ENCODER 0 ++#define CONFIG_LIBWEBP_ANIM_ENCODER 0 ++#define CONFIG_LIBWEBP_ENCODER 0 ++#define CONFIG_LIBX262_ENCODER 0 ++#define CONFIG_LIBX264_ENCODER 0 ++#define CONFIG_LIBX264RGB_ENCODER 0 ++#define CONFIG_LIBX265_ENCODER 0 ++#define CONFIG_LIBXAVS_ENCODER 0 ++#define CONFIG_LIBXAVS2_ENCODER 0 ++#define CONFIG_LIBXVID_ENCODER 0 ++#define CONFIG_H263_V4L2M2M_ENCODER 0 ++#define CONFIG_LIBOPENH264_ENCODER 0 ++#define CONFIG_H264_AMF_ENCODER 0 ++#define CONFIG_H264_NVENC_ENCODER 0 ++#define CONFIG_H264_OMX_ENCODER 0 ++#define CONFIG_H264_QSV_ENCODER 0 ++#define CONFIG_H264_V4L2M2M_ENCODER 0 ++#define CONFIG_H264_VAAPI_ENCODER 0 ++#define CONFIG_H264_VIDEOTOOLBOX_ENCODER 0 ++#define CONFIG_NVENC_ENCODER 0 ++#define CONFIG_NVENC_H264_ENCODER 0 ++#define CONFIG_NVENC_HEVC_ENCODER 0 ++#define CONFIG_HEVC_AMF_ENCODER 0 ++#define CONFIG_HEVC_NVENC_ENCODER 0 ++#define CONFIG_HEVC_QSV_ENCODER 0 ++#define CONFIG_HEVC_V4L2M2M_ENCODER 0 ++#define CONFIG_HEVC_VAAPI_ENCODER 0 ++#define CONFIG_HEVC_VIDEOTOOLBOX_ENCODER 0 ++#define CONFIG_LIBKVAZAAR_ENCODER 0 ++#define CONFIG_MJPEG_QSV_ENCODER 0 ++#define CONFIG_MJPEG_VAAPI_ENCODER 0 ++#define CONFIG_MPEG2_QSV_ENCODER 0 ++#define CONFIG_MPEG2_VAAPI_ENCODER 0 ++#define CONFIG_MPEG4_OMX_ENCODER 0 ++#define CONFIG_MPEG4_V4L2M2M_ENCODER 0 ++#define CONFIG_VP8_V4L2M2M_ENCODER 0 ++#define CONFIG_VP8_VAAPI_ENCODER 0 ++#define CONFIG_VP9_VAAPI_ENCODER 0 ++#define CONFIG_VP9_QSV_ENCODER 0 ++#define CONFIG_H263_VAAPI_HWACCEL 0 ++#define CONFIG_H263_VIDEOTOOLBOX_HWACCEL 0 ++#define CONFIG_H264_D3D11VA_HWACCEL 0 ++#define CONFIG_H264_D3D11VA2_HWACCEL 0 ++#define CONFIG_H264_DXVA2_HWACCEL 0 ++#define CONFIG_H264_NVDEC_HWACCEL 0 ++#define CONFIG_H264_VAAPI_HWACCEL 0 ++#define CONFIG_H264_VDPAU_HWACCEL 0 ++#define CONFIG_H264_VIDEOTOOLBOX_HWACCEL 0 ++#define CONFIG_HEVC_D3D11VA_HWACCEL 0 ++#define CONFIG_HEVC_D3D11VA2_HWACCEL 0 ++#define CONFIG_HEVC_DXVA2_HWACCEL 0 ++#define CONFIG_HEVC_NVDEC_HWACCEL 0 ++#define CONFIG_HEVC_VAAPI_HWACCEL 0 ++#define CONFIG_HEVC_VDPAU_HWACCEL 0 ++#define CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL 0 ++#define CONFIG_MJPEG_NVDEC_HWACCEL 0 ++#define CONFIG_MJPEG_VAAPI_HWACCEL 0 ++#define CONFIG_MPEG1_NVDEC_HWACCEL 0 ++#define CONFIG_MPEG1_VDPAU_HWACCEL 0 ++#define CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL 0 ++#define CONFIG_MPEG1_XVMC_HWACCEL 0 ++#define CONFIG_MPEG2_D3D11VA_HWACCEL 0 ++#define CONFIG_MPEG2_D3D11VA2_HWACCEL 0 ++#define CONFIG_MPEG2_NVDEC_HWACCEL 0 ++#define CONFIG_MPEG2_DXVA2_HWACCEL 0 ++#define CONFIG_MPEG2_VAAPI_HWACCEL 0 ++#define CONFIG_MPEG2_VDPAU_HWACCEL 0 ++#define CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL 0 ++#define CONFIG_MPEG2_XVMC_HWACCEL 0 ++#define CONFIG_MPEG4_NVDEC_HWACCEL 0 ++#define CONFIG_MPEG4_VAAPI_HWACCEL 0 ++#define CONFIG_MPEG4_VDPAU_HWACCEL 0 ++#define CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL 0 ++#define CONFIG_VC1_D3D11VA_HWACCEL 0 ++#define CONFIG_VC1_D3D11VA2_HWACCEL 0 ++#define CONFIG_VC1_DXVA2_HWACCEL 0 ++#define CONFIG_VC1_NVDEC_HWACCEL 0 ++#define CONFIG_VC1_VAAPI_HWACCEL 0 ++#define CONFIG_VC1_VDPAU_HWACCEL 0 ++#define CONFIG_VP8_NVDEC_HWACCEL 0 ++#define CONFIG_VP8_VAAPI_HWACCEL 0 ++#define CONFIG_VP9_D3D11VA_HWACCEL 0 ++#define CONFIG_VP9_D3D11VA2_HWACCEL 0 ++#define CONFIG_VP9_DXVA2_HWACCEL 0 ++#define CONFIG_VP9_NVDEC_HWACCEL 0 ++#define CONFIG_VP9_VAAPI_HWACCEL 0 ++#define CONFIG_VP9_VDPAU_HWACCEL 0 ++#define CONFIG_WMV3_D3D11VA_HWACCEL 0 ++#define CONFIG_WMV3_D3D11VA2_HWACCEL 0 ++#define CONFIG_WMV3_DXVA2_HWACCEL 0 ++#define CONFIG_WMV3_NVDEC_HWACCEL 0 ++#define CONFIG_WMV3_VAAPI_HWACCEL 0 ++#define CONFIG_WMV3_VDPAU_HWACCEL 0 ++#define CONFIG_AAC_PARSER 1 ++#define CONFIG_AAC_LATM_PARSER 0 ++#define CONFIG_AC3_PARSER 0 ++#define CONFIG_ADX_PARSER 0 ++#define CONFIG_AV1_PARSER 0 ++#define CONFIG_AVS2_PARSER 0 ++#define CONFIG_BMP_PARSER 0 ++#define CONFIG_CAVSVIDEO_PARSER 0 ++#define CONFIG_COOK_PARSER 0 ++#define CONFIG_DCA_PARSER 0 ++#define CONFIG_DIRAC_PARSER 0 ++#define CONFIG_DNXHD_PARSER 0 ++#define CONFIG_DPX_PARSER 0 ++#define CONFIG_DVAUDIO_PARSER 0 ++#define CONFIG_DVBSUB_PARSER 0 ++#define CONFIG_DVDSUB_PARSER 0 ++#define CONFIG_DVD_NAV_PARSER 0 ++#define CONFIG_FLAC_PARSER 1 ++#define CONFIG_G723_1_PARSER 0 ++#define CONFIG_G729_PARSER 0 ++#define CONFIG_GIF_PARSER 0 ++#define CONFIG_GSM_PARSER 1 ++#define CONFIG_H261_PARSER 0 ++#define CONFIG_H263_PARSER 1 ++#define CONFIG_H264_PARSER 1 ++#define CONFIG_HEVC_PARSER 0 ++#define CONFIG_MJPEG_PARSER 0 ++#define CONFIG_MLP_PARSER 0 ++#define CONFIG_MPEG4VIDEO_PARSER 1 ++#define CONFIG_MPEGAUDIO_PARSER 1 ++#define CONFIG_MPEGVIDEO_PARSER 0 ++#define CONFIG_OPUS_PARSER 1 ++#define CONFIG_PNG_PARSER 0 ++#define CONFIG_PNM_PARSER 0 ++#define CONFIG_RV30_PARSER 0 ++#define CONFIG_RV40_PARSER 0 ++#define CONFIG_SBC_PARSER 0 ++#define CONFIG_SIPR_PARSER 0 ++#define CONFIG_TAK_PARSER 0 ++#define CONFIG_VC1_PARSER 0 ++#define CONFIG_VORBIS_PARSER 1 ++#define CONFIG_VP3_PARSER 1 ++#define CONFIG_VP8_PARSER 1 ++#define CONFIG_VP9_PARSER 1 ++#define CONFIG_WEBP_PARSER 0 ++#define CONFIG_XMA_PARSER 0 ++#define CONFIG_ALSA_INDEV 0 ++#define CONFIG_ANDROID_CAMERA_INDEV 0 ++#define CONFIG_AVFOUNDATION_INDEV 0 ++#define CONFIG_BKTR_INDEV 0 ++#define CONFIG_DECKLINK_INDEV 0 ++#define CONFIG_DSHOW_INDEV 0 ++#define CONFIG_FBDEV_INDEV 0 ++#define CONFIG_GDIGRAB_INDEV 0 ++#define CONFIG_IEC61883_INDEV 0 ++#define CONFIG_JACK_INDEV 0 ++#define CONFIG_KMSGRAB_INDEV 0 ++#define CONFIG_LAVFI_INDEV 0 ++#define CONFIG_OPENAL_INDEV 0 ++#define CONFIG_OSS_INDEV 0 ++#define CONFIG_PULSE_INDEV 0 ++#define CONFIG_SNDIO_INDEV 0 ++#define CONFIG_V4L2_INDEV 0 ++#define CONFIG_VFWCAP_INDEV 0 ++#define CONFIG_XCBGRAB_INDEV 0 ++#define CONFIG_LIBCDIO_INDEV 0 ++#define CONFIG_LIBDC1394_INDEV 0 ++#define CONFIG_ALSA_OUTDEV 0 ++#define CONFIG_CACA_OUTDEV 0 ++#define CONFIG_DECKLINK_OUTDEV 0 ++#define CONFIG_FBDEV_OUTDEV 0 ++#define CONFIG_OPENGL_OUTDEV 0 ++#define CONFIG_OSS_OUTDEV 0 ++#define CONFIG_PULSE_OUTDEV 0 ++#define CONFIG_SDL2_OUTDEV 0 ++#define CONFIG_SNDIO_OUTDEV 0 ++#define CONFIG_V4L2_OUTDEV 0 ++#define CONFIG_XV_OUTDEV 0 ++#define CONFIG_ABENCH_FILTER 0 ++#define CONFIG_ACOMPRESSOR_FILTER 0 ++#define CONFIG_ACONTRAST_FILTER 0 ++#define CONFIG_ACOPY_FILTER 0 ++#define CONFIG_ACUE_FILTER 0 ++#define CONFIG_ACROSSFADE_FILTER 0 ++#define CONFIG_ACROSSOVER_FILTER 0 ++#define CONFIG_ACRUSHER_FILTER 0 ++#define CONFIG_ADECLICK_FILTER 0 ++#define CONFIG_ADECLIP_FILTER 0 ++#define CONFIG_ADELAY_FILTER 0 ++#define CONFIG_ADERIVATIVE_FILTER 0 ++#define CONFIG_AECHO_FILTER 0 ++#define CONFIG_AEMPHASIS_FILTER 0 ++#define CONFIG_AEVAL_FILTER 0 ++#define CONFIG_AFADE_FILTER 0 ++#define CONFIG_AFFTDN_FILTER 0 ++#define CONFIG_AFFTFILT_FILTER 0 ++#define CONFIG_AFIR_FILTER 0 ++#define CONFIG_AFORMAT_FILTER 0 ++#define CONFIG_AGATE_FILTER 0 ++#define CONFIG_AIIR_FILTER 0 ++#define CONFIG_AINTEGRAL_FILTER 0 ++#define CONFIG_AINTERLEAVE_FILTER 0 ++#define CONFIG_ALIMITER_FILTER 0 ++#define CONFIG_ALLPASS_FILTER 0 ++#define CONFIG_ALOOP_FILTER 0 ++#define CONFIG_AMERGE_FILTER 0 ++#define CONFIG_AMETADATA_FILTER 0 ++#define CONFIG_AMIX_FILTER 0 ++#define CONFIG_AMULTIPLY_FILTER 0 ++#define CONFIG_ANEQUALIZER_FILTER 0 ++#define CONFIG_ANLMDN_FILTER 0 ++#define CONFIG_ANLMS_FILTER 0 ++#define CONFIG_ANULL_FILTER 0 ++#define CONFIG_APAD_FILTER 0 ++#define CONFIG_APERMS_FILTER 0 ++#define CONFIG_APHASER_FILTER 0 ++#define CONFIG_APULSATOR_FILTER 0 ++#define CONFIG_AREALTIME_FILTER 0 ++#define CONFIG_ARESAMPLE_FILTER 0 ++#define CONFIG_AREVERSE_FILTER 0 ++#define CONFIG_ARNNDN_FILTER 0 ++#define CONFIG_ASELECT_FILTER 0 ++#define CONFIG_ASENDCMD_FILTER 0 ++#define CONFIG_ASETNSAMPLES_FILTER 0 ++#define CONFIG_ASETPTS_FILTER 0 ++#define CONFIG_ASETRATE_FILTER 0 ++#define CONFIG_ASETTB_FILTER 0 ++#define CONFIG_ASHOWINFO_FILTER 0 ++#define CONFIG_ASIDEDATA_FILTER 0 ++#define CONFIG_ASOFTCLIP_FILTER 0 ++#define CONFIG_ASPLIT_FILTER 0 ++#define CONFIG_ASR_FILTER 0 ++#define CONFIG_ASTATS_FILTER 0 ++#define CONFIG_ASTREAMSELECT_FILTER 0 ++#define CONFIG_ATEMPO_FILTER 0 ++#define CONFIG_ATRIM_FILTER 0 ++#define CONFIG_AXCORRELATE_FILTER 0 ++#define CONFIG_AZMQ_FILTER 0 ++#define CONFIG_BANDPASS_FILTER 0 ++#define CONFIG_BANDREJECT_FILTER 0 ++#define CONFIG_BASS_FILTER 0 ++#define CONFIG_BIQUAD_FILTER 0 ++#define CONFIG_BS2B_FILTER 0 ++#define CONFIG_CHROMABER_VULKAN_FILTER 0 ++#define CONFIG_CHANNELMAP_FILTER 0 ++#define CONFIG_CHANNELSPLIT_FILTER 0 ++#define CONFIG_CHORUS_FILTER 0 ++#define CONFIG_COMPAND_FILTER 0 ++#define CONFIG_COMPENSATIONDELAY_FILTER 0 ++#define CONFIG_CROSSFEED_FILTER 0 ++#define CONFIG_CRYSTALIZER_FILTER 0 ++#define CONFIG_DCSHIFT_FILTER 0 ++#define CONFIG_DEESSER_FILTER 0 ++#define CONFIG_DRMETER_FILTER 0 ++#define CONFIG_DYNAUDNORM_FILTER 0 ++#define CONFIG_EARWAX_FILTER 0 ++#define CONFIG_EBUR128_FILTER 0 ++#define CONFIG_EQUALIZER_FILTER 0 ++#define CONFIG_EXTRASTEREO_FILTER 0 ++#define CONFIG_FIREQUALIZER_FILTER 0 ++#define CONFIG_FLANGER_FILTER 0 ++#define CONFIG_HAAS_FILTER 0 ++#define CONFIG_HDCD_FILTER 0 ++#define CONFIG_HEADPHONE_FILTER 0 ++#define CONFIG_HIGHPASS_FILTER 0 ++#define CONFIG_HIGHSHELF_FILTER 0 ++#define CONFIG_JOIN_FILTER 0 ++#define CONFIG_LADSPA_FILTER 0 ++#define CONFIG_LOUDNORM_FILTER 0 ++#define CONFIG_LOWPASS_FILTER 0 ++#define CONFIG_LOWSHELF_FILTER 0 ++#define CONFIG_LV2_FILTER 0 ++#define CONFIG_MCOMPAND_FILTER 0 ++#define CONFIG_PAN_FILTER 0 ++#define CONFIG_REPLAYGAIN_FILTER 0 ++#define CONFIG_RESAMPLE_FILTER 0 ++#define CONFIG_RUBBERBAND_FILTER 0 ++#define CONFIG_SIDECHAINCOMPRESS_FILTER 0 ++#define CONFIG_SIDECHAINGATE_FILTER 0 ++#define CONFIG_SILENCEDETECT_FILTER 0 ++#define CONFIG_SILENCEREMOVE_FILTER 0 ++#define CONFIG_SOFALIZER_FILTER 0 ++#define CONFIG_STEREOTOOLS_FILTER 0 ++#define CONFIG_STEREOWIDEN_FILTER 0 ++#define CONFIG_SUPEREQUALIZER_FILTER 0 ++#define CONFIG_SURROUND_FILTER 0 ++#define CONFIG_TREBLE_FILTER 0 ++#define CONFIG_TREMOLO_FILTER 0 ++#define CONFIG_VIBRATO_FILTER 0 ++#define CONFIG_VOLUME_FILTER 0 ++#define CONFIG_VOLUMEDETECT_FILTER 0 ++#define CONFIG_AEVALSRC_FILTER 0 ++#define CONFIG_AFIRSRC_FILTER 0 ++#define CONFIG_ANOISESRC_FILTER 0 ++#define CONFIG_ANULLSRC_FILTER 0 ++#define CONFIG_FLITE_FILTER 0 ++#define CONFIG_HILBERT_FILTER 0 ++#define CONFIG_SINC_FILTER 0 ++#define CONFIG_SINE_FILTER 0 ++#define CONFIG_ANULLSINK_FILTER 0 ++#define CONFIG_ADDROI_FILTER 0 ++#define CONFIG_ALPHAEXTRACT_FILTER 0 ++#define CONFIG_ALPHAMERGE_FILTER 0 ++#define CONFIG_AMPLIFY_FILTER 0 ++#define CONFIG_ASS_FILTER 0 ++#define CONFIG_ATADENOISE_FILTER 0 ++#define CONFIG_AVGBLUR_FILTER 0 ++#define CONFIG_AVGBLUR_OPENCL_FILTER 0 ++#define CONFIG_AVGBLUR_VULKAN_FILTER 0 ++#define CONFIG_BBOX_FILTER 0 ++#define CONFIG_BENCH_FILTER 0 ++#define CONFIG_BILATERAL_FILTER 0 ++#define CONFIG_BITPLANENOISE_FILTER 0 ++#define CONFIG_BLACKDETECT_FILTER 0 ++#define CONFIG_BLACKFRAME_FILTER 0 ++#define CONFIG_BLEND_FILTER 0 ++#define CONFIG_BM3D_FILTER 0 ++#define CONFIG_BOXBLUR_FILTER 0 ++#define CONFIG_BOXBLUR_OPENCL_FILTER 0 ++#define CONFIG_BWDIF_FILTER 0 ++#define CONFIG_CAS_FILTER 0 ++#define CONFIG_CHROMAHOLD_FILTER 0 ++#define CONFIG_CHROMAKEY_FILTER 0 ++#define CONFIG_CHROMASHIFT_FILTER 0 ++#define CONFIG_CIESCOPE_FILTER 0 ++#define CONFIG_CODECVIEW_FILTER 0 ++#define CONFIG_COLORBALANCE_FILTER 0 ++#define CONFIG_COLORCHANNELMIXER_FILTER 0 ++#define CONFIG_COLORKEY_FILTER 0 ++#define CONFIG_COLORKEY_OPENCL_FILTER 0 ++#define CONFIG_COLORHOLD_FILTER 0 ++#define CONFIG_COLORLEVELS_FILTER 0 ++#define CONFIG_COLORMATRIX_FILTER 0 ++#define CONFIG_COLORSPACE_FILTER 0 ++#define CONFIG_CONVOLUTION_FILTER 0 ++#define CONFIG_CONVOLUTION_OPENCL_FILTER 0 ++#define CONFIG_CONVOLVE_FILTER 0 ++#define CONFIG_COPY_FILTER 0 ++#define CONFIG_COREIMAGE_FILTER 0 ++#define CONFIG_COVER_RECT_FILTER 0 ++#define CONFIG_CROP_FILTER 0 ++#define CONFIG_CROPDETECT_FILTER 0 ++#define CONFIG_CUE_FILTER 0 ++#define CONFIG_CURVES_FILTER 0 ++#define CONFIG_DATASCOPE_FILTER 0 ++#define CONFIG_DCTDNOIZ_FILTER 0 ++#define CONFIG_DEBAND_FILTER 0 ++#define CONFIG_DEBLOCK_FILTER 0 ++#define CONFIG_DECIMATE_FILTER 0 ++#define CONFIG_DECONVOLVE_FILTER 0 ++#define CONFIG_DEDOT_FILTER 0 ++#define CONFIG_DEFLATE_FILTER 0 ++#define CONFIG_DEFLICKER_FILTER 0 ++#define CONFIG_DEINTERLACE_QSV_FILTER 0 ++#define CONFIG_DEINTERLACE_VAAPI_FILTER 0 ++#define CONFIG_DEJUDDER_FILTER 0 ++#define CONFIG_DELOGO_FILTER 0 ++#define CONFIG_DENOISE_VAAPI_FILTER 0 ++#define CONFIG_DERAIN_FILTER 0 ++#define CONFIG_DESHAKE_FILTER 0 ++#define CONFIG_DESHAKE_OPENCL_FILTER 0 ++#define CONFIG_DESPILL_FILTER 0 ++#define CONFIG_DETELECINE_FILTER 0 ++#define CONFIG_DILATION_FILTER 0 ++#define CONFIG_DILATION_OPENCL_FILTER 0 ++#define CONFIG_DISPLACE_FILTER 0 ++#define CONFIG_DNN_PROCESSING_FILTER 0 ++#define CONFIG_DOUBLEWEAVE_FILTER 0 ++#define CONFIG_DRAWBOX_FILTER 0 ++#define CONFIG_DRAWGRAPH_FILTER 0 ++#define CONFIG_DRAWGRID_FILTER 0 ++#define CONFIG_DRAWTEXT_FILTER 0 ++#define CONFIG_EDGEDETECT_FILTER 0 ++#define CONFIG_ELBG_FILTER 0 ++#define CONFIG_ENTROPY_FILTER 0 ++#define CONFIG_EQ_FILTER 0 ++#define CONFIG_EROSION_FILTER 0 ++#define CONFIG_EROSION_OPENCL_FILTER 0 ++#define CONFIG_EXTRACTPLANES_FILTER 0 ++#define CONFIG_FADE_FILTER 0 ++#define CONFIG_FFTDNOIZ_FILTER 0 ++#define CONFIG_FFTFILT_FILTER 0 ++#define CONFIG_FIELD_FILTER 0 ++#define CONFIG_FIELDHINT_FILTER 0 ++#define CONFIG_FIELDMATCH_FILTER 0 ++#define CONFIG_FIELDORDER_FILTER 0 ++#define CONFIG_FILLBORDERS_FILTER 0 ++#define CONFIG_FIND_RECT_FILTER 0 ++#define CONFIG_FLOODFILL_FILTER 0 ++#define CONFIG_FORMAT_FILTER 0 ++#define CONFIG_FPS_FILTER 0 ++#define CONFIG_FRAMEPACK_FILTER 0 ++#define CONFIG_FRAMERATE_FILTER 0 ++#define CONFIG_FRAMESTEP_FILTER 0 ++#define CONFIG_FREEZEDETECT_FILTER 0 ++#define CONFIG_FREEZEFRAMES_FILTER 0 ++#define CONFIG_FREI0R_FILTER 0 ++#define CONFIG_FSPP_FILTER 0 ++#define CONFIG_GBLUR_FILTER 0 ++#define CONFIG_GEQ_FILTER 0 ++#define CONFIG_GRADFUN_FILTER 0 ++#define CONFIG_GRAPHMONITOR_FILTER 0 ++#define CONFIG_GREYEDGE_FILTER 0 ++#define CONFIG_HALDCLUT_FILTER 0 ++#define CONFIG_HFLIP_FILTER 0 ++#define CONFIG_HISTEQ_FILTER 0 ++#define CONFIG_HISTOGRAM_FILTER 0 ++#define CONFIG_HQDN3D_FILTER 0 ++#define CONFIG_HQX_FILTER 0 ++#define CONFIG_HSTACK_FILTER 0 ++#define CONFIG_HUE_FILTER 0 ++#define CONFIG_HWDOWNLOAD_FILTER 0 ++#define CONFIG_HWMAP_FILTER 0 ++#define CONFIG_HWUPLOAD_FILTER 0 ++#define CONFIG_HWUPLOAD_CUDA_FILTER 0 ++#define CONFIG_HYSTERESIS_FILTER 0 ++#define CONFIG_IDET_FILTER 0 ++#define CONFIG_IL_FILTER 0 ++#define CONFIG_INFLATE_FILTER 0 ++#define CONFIG_INTERLACE_FILTER 0 ++#define CONFIG_INTERLEAVE_FILTER 0 ++#define CONFIG_KERNDEINT_FILTER 0 ++#define CONFIG_LAGFUN_FILTER 0 ++#define CONFIG_LENSCORRECTION_FILTER 0 ++#define CONFIG_LENSFUN_FILTER 0 ++#define CONFIG_LIBVMAF_FILTER 0 ++#define CONFIG_LIMITER_FILTER 0 ++#define CONFIG_LOOP_FILTER 0 ++#define CONFIG_LUMAKEY_FILTER 0 ++#define CONFIG_LUT_FILTER 0 ++#define CONFIG_LUT1D_FILTER 0 ++#define CONFIG_LUT2_FILTER 0 ++#define CONFIG_LUT3D_FILTER 0 ++#define CONFIG_LUTRGB_FILTER 0 ++#define CONFIG_LUTYUV_FILTER 0 ++#define CONFIG_MASKEDCLAMP_FILTER 0 ++#define CONFIG_MASKEDMAX_FILTER 0 ++#define CONFIG_MASKEDMERGE_FILTER 0 ++#define CONFIG_MASKEDMIN_FILTER 0 ++#define CONFIG_MASKEDTHRESHOLD_FILTER 0 ++#define CONFIG_MASKFUN_FILTER 0 ++#define CONFIG_MCDEINT_FILTER 0 ++#define CONFIG_MEDIAN_FILTER 0 ++#define CONFIG_MERGEPLANES_FILTER 0 ++#define CONFIG_MESTIMATE_FILTER 0 ++#define CONFIG_METADATA_FILTER 0 ++#define CONFIG_MIDEQUALIZER_FILTER 0 ++#define CONFIG_MINTERPOLATE_FILTER 0 ++#define CONFIG_MIX_FILTER 0 ++#define CONFIG_MPDECIMATE_FILTER 0 ++#define CONFIG_NEGATE_FILTER 0 ++#define CONFIG_NLMEANS_FILTER 0 ++#define CONFIG_NLMEANS_OPENCL_FILTER 0 ++#define CONFIG_NNEDI_FILTER 0 ++#define CONFIG_NOFORMAT_FILTER 0 ++#define CONFIG_NOISE_FILTER 0 ++#define CONFIG_NORMALIZE_FILTER 0 ++#define CONFIG_NULL_FILTER 0 ++#define CONFIG_OCR_FILTER 0 ++#define CONFIG_OCV_FILTER 0 ++#define CONFIG_OSCILLOSCOPE_FILTER 0 ++#define CONFIG_OVERLAY_FILTER 0 ++#define CONFIG_OVERLAY_OPENCL_FILTER 0 ++#define CONFIG_OVERLAY_QSV_FILTER 0 ++#define CONFIG_OVERLAY_VULKAN_FILTER 0 ++#define CONFIG_OVERLAY_CUDA_FILTER 0 ++#define CONFIG_OWDENOISE_FILTER 0 ++#define CONFIG_PAD_FILTER 0 ++#define CONFIG_PAD_OPENCL_FILTER 0 ++#define CONFIG_PALETTEGEN_FILTER 0 ++#define CONFIG_PALETTEUSE_FILTER 0 ++#define CONFIG_PERMS_FILTER 0 ++#define CONFIG_PERSPECTIVE_FILTER 0 ++#define CONFIG_PHASE_FILTER 0 ++#define CONFIG_PHOTOSENSITIVITY_FILTER 0 ++#define CONFIG_PIXDESCTEST_FILTER 0 ++#define CONFIG_PIXSCOPE_FILTER 0 ++#define CONFIG_PP_FILTER 0 ++#define CONFIG_PP7_FILTER 0 ++#define CONFIG_PREMULTIPLY_FILTER 0 ++#define CONFIG_PREWITT_FILTER 0 ++#define CONFIG_PREWITT_OPENCL_FILTER 0 ++#define CONFIG_PROCAMP_VAAPI_FILTER 0 ++#define CONFIG_PROGRAM_OPENCL_FILTER 0 ++#define CONFIG_PSEUDOCOLOR_FILTER 0 ++#define CONFIG_PSNR_FILTER 0 ++#define CONFIG_PULLUP_FILTER 0 ++#define CONFIG_QP_FILTER 0 ++#define CONFIG_RANDOM_FILTER 0 ++#define CONFIG_READEIA608_FILTER 0 ++#define CONFIG_READVITC_FILTER 0 ++#define CONFIG_REALTIME_FILTER 0 ++#define CONFIG_REMAP_FILTER 0 ++#define CONFIG_REMOVEGRAIN_FILTER 0 ++#define CONFIG_REMOVELOGO_FILTER 0 ++#define CONFIG_REPEATFIELDS_FILTER 0 ++#define CONFIG_REVERSE_FILTER 0 ++#define CONFIG_RGBASHIFT_FILTER 0 ++#define CONFIG_ROBERTS_FILTER 0 ++#define CONFIG_ROBERTS_OPENCL_FILTER 0 ++#define CONFIG_ROTATE_FILTER 0 ++#define CONFIG_SAB_FILTER 0 ++#define CONFIG_SCALE_FILTER 0 ++#define CONFIG_SCALE_CUDA_FILTER 0 ++#define CONFIG_SCALE_NPP_FILTER 0 ++#define CONFIG_SCALE_QSV_FILTER 0 ++#define CONFIG_SCALE_VAAPI_FILTER 0 ++#define CONFIG_SCALE_VULKAN_FILTER 0 ++#define CONFIG_SCALE2REF_FILTER 0 ++#define CONFIG_SCROLL_FILTER 0 ++#define CONFIG_SELECT_FILTER 0 ++#define CONFIG_SELECTIVECOLOR_FILTER 0 ++#define CONFIG_SENDCMD_FILTER 0 ++#define CONFIG_SEPARATEFIELDS_FILTER 0 ++#define CONFIG_SETDAR_FILTER 0 ++#define CONFIG_SETFIELD_FILTER 0 ++#define CONFIG_SETPARAMS_FILTER 0 ++#define CONFIG_SETPTS_FILTER 0 ++#define CONFIG_SETRANGE_FILTER 0 ++#define CONFIG_SETSAR_FILTER 0 ++#define CONFIG_SETTB_FILTER 0 ++#define CONFIG_SHARPNESS_VAAPI_FILTER 0 ++#define CONFIG_SHOWINFO_FILTER 0 ++#define CONFIG_SHOWPALETTE_FILTER 0 ++#define CONFIG_SHUFFLEFRAMES_FILTER 0 ++#define CONFIG_SHUFFLEPLANES_FILTER 0 ++#define CONFIG_SIDEDATA_FILTER 0 ++#define CONFIG_SIGNALSTATS_FILTER 0 ++#define CONFIG_SIGNATURE_FILTER 0 ++#define CONFIG_SMARTBLUR_FILTER 0 ++#define CONFIG_SOBEL_FILTER 0 ++#define CONFIG_SOBEL_OPENCL_FILTER 0 ++#define CONFIG_SPLIT_FILTER 0 ++#define CONFIG_SPP_FILTER 0 ++#define CONFIG_SR_FILTER 0 ++#define CONFIG_SSIM_FILTER 0 ++#define CONFIG_STEREO3D_FILTER 0 ++#define CONFIG_STREAMSELECT_FILTER 0 ++#define CONFIG_SUBTITLES_FILTER 0 ++#define CONFIG_SUPER2XSAI_FILTER 0 ++#define CONFIG_SWAPRECT_FILTER 0 ++#define CONFIG_SWAPUV_FILTER 0 ++#define CONFIG_TBLEND_FILTER 0 ++#define CONFIG_TELECINE_FILTER 0 ++#define CONFIG_THISTOGRAM_FILTER 0 ++#define CONFIG_THRESHOLD_FILTER 0 ++#define CONFIG_THUMBNAIL_FILTER 0 ++#define CONFIG_THUMBNAIL_CUDA_FILTER 0 ++#define CONFIG_TILE_FILTER 0 ++#define CONFIG_TINTERLACE_FILTER 0 ++#define CONFIG_TLUT2_FILTER 0 ++#define CONFIG_TMEDIAN_FILTER 0 ++#define CONFIG_TMIX_FILTER 0 ++#define CONFIG_TONEMAP_FILTER 0 ++#define CONFIG_TONEMAP_OPENCL_FILTER 0 ++#define CONFIG_TONEMAP_VAAPI_FILTER 0 ++#define CONFIG_TPAD_FILTER 0 ++#define CONFIG_TRANSPOSE_FILTER 0 ++#define CONFIG_TRANSPOSE_NPP_FILTER 0 ++#define CONFIG_TRANSPOSE_OPENCL_FILTER 0 ++#define CONFIG_TRANSPOSE_VAAPI_FILTER 0 ++#define CONFIG_TRIM_FILTER 0 ++#define CONFIG_UNPREMULTIPLY_FILTER 0 ++#define CONFIG_UNSHARP_FILTER 0 ++#define CONFIG_UNSHARP_OPENCL_FILTER 0 ++#define CONFIG_USPP_FILTER 0 ++#define CONFIG_V360_FILTER 0 ++#define CONFIG_VAGUEDENOISER_FILTER 0 ++#define CONFIG_VECTORSCOPE_FILTER 0 ++#define CONFIG_VFLIP_FILTER 0 ++#define CONFIG_VFRDET_FILTER 0 ++#define CONFIG_VIBRANCE_FILTER 0 ++#define CONFIG_VIDSTABDETECT_FILTER 0 ++#define CONFIG_VIDSTABTRANSFORM_FILTER 0 ++#define CONFIG_VIGNETTE_FILTER 0 ++#define CONFIG_VMAFMOTION_FILTER 0 ++#define CONFIG_VPP_QSV_FILTER 0 ++#define CONFIG_VSTACK_FILTER 0 ++#define CONFIG_W3FDIF_FILTER 0 ++#define CONFIG_WAVEFORM_FILTER 0 ++#define CONFIG_WEAVE_FILTER 0 ++#define CONFIG_XBR_FILTER 0 ++#define CONFIG_XFADE_FILTER 0 ++#define CONFIG_XFADE_OPENCL_FILTER 0 ++#define CONFIG_XMEDIAN_FILTER 0 ++#define CONFIG_XSTACK_FILTER 0 ++#define CONFIG_YADIF_FILTER 0 ++#define CONFIG_YADIF_CUDA_FILTER 0 ++#define CONFIG_YAEPBLUR_FILTER 0 ++#define CONFIG_ZMQ_FILTER 0 ++#define CONFIG_ZOOMPAN_FILTER 0 ++#define CONFIG_ZSCALE_FILTER 0 ++#define CONFIG_ALLRGB_FILTER 0 ++#define CONFIG_ALLYUV_FILTER 0 ++#define CONFIG_CELLAUTO_FILTER 0 ++#define CONFIG_COLOR_FILTER 0 ++#define CONFIG_COREIMAGESRC_FILTER 0 ++#define CONFIG_FREI0R_SRC_FILTER 0 ++#define CONFIG_HALDCLUTSRC_FILTER 0 ++#define CONFIG_LIFE_FILTER 0 ++#define CONFIG_MANDELBROT_FILTER 0 ++#define CONFIG_MPTESTSRC_FILTER 0 ++#define CONFIG_NULLSRC_FILTER 0 ++#define CONFIG_OPENCLSRC_FILTER 0 ++#define CONFIG_PAL75BARS_FILTER 0 ++#define CONFIG_PAL100BARS_FILTER 0 ++#define CONFIG_RGBTESTSRC_FILTER 0 ++#define CONFIG_SIERPINSKI_FILTER 0 ++#define CONFIG_SMPTEBARS_FILTER 0 ++#define CONFIG_SMPTEHDBARS_FILTER 0 ++#define CONFIG_TESTSRC_FILTER 0 ++#define CONFIG_TESTSRC2_FILTER 0 ++#define CONFIG_YUVTESTSRC_FILTER 0 ++#define CONFIG_NULLSINK_FILTER 0 ++#define CONFIG_ABITSCOPE_FILTER 0 ++#define CONFIG_ADRAWGRAPH_FILTER 0 ++#define CONFIG_AGRAPHMONITOR_FILTER 0 ++#define CONFIG_AHISTOGRAM_FILTER 0 ++#define CONFIG_APHASEMETER_FILTER 0 ++#define CONFIG_AVECTORSCOPE_FILTER 0 ++#define CONFIG_CONCAT_FILTER 0 ++#define CONFIG_SHOWCQT_FILTER 0 ++#define CONFIG_SHOWFREQS_FILTER 0 ++#define CONFIG_SHOWSPATIAL_FILTER 0 ++#define CONFIG_SHOWSPECTRUM_FILTER 0 ++#define CONFIG_SHOWSPECTRUMPIC_FILTER 0 ++#define CONFIG_SHOWVOLUME_FILTER 0 ++#define CONFIG_SHOWWAVES_FILTER 0 ++#define CONFIG_SHOWWAVESPIC_FILTER 0 ++#define CONFIG_SPECTRUMSYNTH_FILTER 0 ++#define CONFIG_AMOVIE_FILTER 0 ++#define CONFIG_MOVIE_FILTER 0 ++#define CONFIG_AFIFO_FILTER 0 ++#define CONFIG_FIFO_FILTER 0 ++#define CONFIG_AA_DEMUXER 0 ++#define CONFIG_AAC_DEMUXER 1 ++#define CONFIG_AC3_DEMUXER 0 ++#define CONFIG_ACM_DEMUXER 0 ++#define CONFIG_ACT_DEMUXER 0 ++#define CONFIG_ADF_DEMUXER 0 ++#define CONFIG_ADP_DEMUXER 0 ++#define CONFIG_ADS_DEMUXER 0 ++#define CONFIG_ADX_DEMUXER 0 ++#define CONFIG_AEA_DEMUXER 0 ++#define CONFIG_AFC_DEMUXER 0 ++#define CONFIG_AIFF_DEMUXER 0 ++#define CONFIG_AIX_DEMUXER 0 ++#define CONFIG_ALP_DEMUXER 0 ++#define CONFIG_AMR_DEMUXER 1 ++#define CONFIG_AMRNB_DEMUXER 0 ++#define CONFIG_AMRWB_DEMUXER 0 ++#define CONFIG_ANM_DEMUXER 0 ++#define CONFIG_APC_DEMUXER 0 ++#define CONFIG_APE_DEMUXER 0 ++#define CONFIG_APM_DEMUXER 0 ++#define CONFIG_APNG_DEMUXER 0 ++#define CONFIG_APTX_DEMUXER 0 ++#define CONFIG_APTX_HD_DEMUXER 0 ++#define CONFIG_AQTITLE_DEMUXER 0 ++#define CONFIG_ARGO_ASF_DEMUXER 0 ++#define CONFIG_ASF_DEMUXER 0 ++#define CONFIG_ASF_O_DEMUXER 0 ++#define CONFIG_ASS_DEMUXER 0 ++#define CONFIG_AST_DEMUXER 0 ++#define CONFIG_AU_DEMUXER 0 ++#define CONFIG_AV1_DEMUXER 0 ++#define CONFIG_AVI_DEMUXER 1 ++#define CONFIG_AVISYNTH_DEMUXER 0 ++#define CONFIG_AVR_DEMUXER 0 ++#define CONFIG_AVS_DEMUXER 0 ++#define CONFIG_AVS2_DEMUXER 0 ++#define CONFIG_BETHSOFTVID_DEMUXER 0 ++#define CONFIG_BFI_DEMUXER 0 ++#define CONFIG_BINTEXT_DEMUXER 0 ++#define CONFIG_BINK_DEMUXER 0 ++#define CONFIG_BIT_DEMUXER 0 ++#define CONFIG_BMV_DEMUXER 0 ++#define CONFIG_BFSTM_DEMUXER 0 ++#define CONFIG_BRSTM_DEMUXER 0 ++#define CONFIG_BOA_DEMUXER 0 ++#define CONFIG_C93_DEMUXER 0 ++#define CONFIG_CAF_DEMUXER 0 ++#define CONFIG_CAVSVIDEO_DEMUXER 0 ++#define CONFIG_CDG_DEMUXER 0 ++#define CONFIG_CDXL_DEMUXER 0 ++#define CONFIG_CINE_DEMUXER 0 ++#define CONFIG_CODEC2_DEMUXER 0 ++#define CONFIG_CODEC2RAW_DEMUXER 0 ++#define CONFIG_CONCAT_DEMUXER 0 ++#define CONFIG_DASH_DEMUXER 0 ++#define CONFIG_DATA_DEMUXER 0 ++#define CONFIG_DAUD_DEMUXER 0 ++#define CONFIG_DCSTR_DEMUXER 0 ++#define CONFIG_DERF_DEMUXER 0 ++#define CONFIG_DFA_DEMUXER 0 ++#define CONFIG_DHAV_DEMUXER 0 ++#define CONFIG_DIRAC_DEMUXER 0 ++#define CONFIG_DNXHD_DEMUXER 0 ++#define CONFIG_DSF_DEMUXER 0 ++#define CONFIG_DSICIN_DEMUXER 0 ++#define CONFIG_DSS_DEMUXER 0 ++#define CONFIG_DTS_DEMUXER 0 ++#define CONFIG_DTSHD_DEMUXER 0 ++#define CONFIG_DV_DEMUXER 0 ++#define CONFIG_DVBSUB_DEMUXER 0 ++#define CONFIG_DVBTXT_DEMUXER 0 ++#define CONFIG_DXA_DEMUXER 0 ++#define CONFIG_EA_DEMUXER 0 ++#define CONFIG_EA_CDATA_DEMUXER 0 ++#define CONFIG_EAC3_DEMUXER 0 ++#define CONFIG_EPAF_DEMUXER 0 ++#define CONFIG_FFMETADATA_DEMUXER 0 ++#define CONFIG_FILMSTRIP_DEMUXER 0 ++#define CONFIG_FITS_DEMUXER 0 ++#define CONFIG_FLAC_DEMUXER 1 ++#define CONFIG_FLIC_DEMUXER 0 ++#define CONFIG_FLV_DEMUXER 0 ++#define CONFIG_LIVE_FLV_DEMUXER 0 ++#define CONFIG_FOURXM_DEMUXER 0 ++#define CONFIG_FRM_DEMUXER 0 ++#define CONFIG_FSB_DEMUXER 0 ++#define CONFIG_FWSE_DEMUXER 0 ++#define CONFIG_G722_DEMUXER 0 ++#define CONFIG_G723_1_DEMUXER 0 ++#define CONFIG_G726_DEMUXER 0 ++#define CONFIG_G726LE_DEMUXER 0 ++#define CONFIG_G729_DEMUXER 0 ++#define CONFIG_GDV_DEMUXER 0 ++#define CONFIG_GENH_DEMUXER 0 ++#define CONFIG_GIF_DEMUXER 0 ++#define CONFIG_GSM_DEMUXER 0 ++#define CONFIG_GXF_DEMUXER 0 ++#define CONFIG_H261_DEMUXER 0 ++#define CONFIG_H263_DEMUXER 0 ++#define CONFIG_H264_DEMUXER 0 ++#define CONFIG_HCA_DEMUXER 0 ++#define CONFIG_HCOM_DEMUXER 0 ++#define CONFIG_HEVC_DEMUXER 0 ++#define CONFIG_HLS_DEMUXER 0 ++#define CONFIG_HNM_DEMUXER 0 ++#define CONFIG_ICO_DEMUXER 0 ++#define CONFIG_IDCIN_DEMUXER 0 ++#define CONFIG_IDF_DEMUXER 0 ++#define CONFIG_IFF_DEMUXER 0 ++#define CONFIG_IFV_DEMUXER 0 ++#define CONFIG_ILBC_DEMUXER 0 ++#define CONFIG_IMAGE2_DEMUXER 0 ++#define CONFIG_IMAGE2PIPE_DEMUXER 0 ++#define CONFIG_IMAGE2_ALIAS_PIX_DEMUXER 0 ++#define CONFIG_IMAGE2_BRENDER_PIX_DEMUXER 0 ++#define CONFIG_INGENIENT_DEMUXER 0 ++#define CONFIG_IPMOVIE_DEMUXER 0 ++#define CONFIG_IRCAM_DEMUXER 0 ++#define CONFIG_ISS_DEMUXER 0 ++#define CONFIG_IV8_DEMUXER 0 ++#define CONFIG_IVF_DEMUXER 0 ++#define CONFIG_IVR_DEMUXER 0 ++#define CONFIG_JACOSUB_DEMUXER 0 ++#define CONFIG_JV_DEMUXER 0 ++#define CONFIG_KUX_DEMUXER 0 ++#define CONFIG_KVAG_DEMUXER 0 ++#define CONFIG_LMLM4_DEMUXER 0 ++#define CONFIG_LOAS_DEMUXER 0 ++#define CONFIG_LRC_DEMUXER 0 ++#define CONFIG_LVF_DEMUXER 0 ++#define CONFIG_LXF_DEMUXER 0 ++#define CONFIG_M4V_DEMUXER 0 ++#define CONFIG_MATROSKA_DEMUXER 1 ++#define CONFIG_MGSTS_DEMUXER 0 ++#define CONFIG_MICRODVD_DEMUXER 0 ++#define CONFIG_MJPEG_DEMUXER 0 ++#define CONFIG_MJPEG_2000_DEMUXER 0 ++#define CONFIG_MLP_DEMUXER 0 ++#define CONFIG_MLV_DEMUXER 0 ++#define CONFIG_MM_DEMUXER 0 ++#define CONFIG_MMF_DEMUXER 0 ++#define CONFIG_MOV_DEMUXER 1 ++#define CONFIG_MP3_DEMUXER 1 ++#define CONFIG_MPC_DEMUXER 0 ++#define CONFIG_MPC8_DEMUXER 0 ++#define CONFIG_MPEGPS_DEMUXER 0 ++#define CONFIG_MPEGTS_DEMUXER 0 ++#define CONFIG_MPEGTSRAW_DEMUXER 0 ++#define CONFIG_MPEGVIDEO_DEMUXER 0 ++#define CONFIG_MPJPEG_DEMUXER 0 ++#define CONFIG_MPL2_DEMUXER 0 ++#define CONFIG_MPSUB_DEMUXER 0 ++#define CONFIG_MSF_DEMUXER 0 ++#define CONFIG_MSNWC_TCP_DEMUXER 0 ++#define CONFIG_MTAF_DEMUXER 0 ++#define CONFIG_MTV_DEMUXER 0 ++#define CONFIG_MUSX_DEMUXER 0 ++#define CONFIG_MV_DEMUXER 0 ++#define CONFIG_MVI_DEMUXER 0 ++#define CONFIG_MXF_DEMUXER 0 ++#define CONFIG_MXG_DEMUXER 0 ++#define CONFIG_NC_DEMUXER 0 ++#define CONFIG_NISTSPHERE_DEMUXER 0 ++#define CONFIG_NSP_DEMUXER 0 ++#define CONFIG_NSV_DEMUXER 0 ++#define CONFIG_NUT_DEMUXER 0 ++#define CONFIG_NUV_DEMUXER 0 ++#define CONFIG_OGG_DEMUXER 1 ++#define CONFIG_OMA_DEMUXER 0 ++#define CONFIG_PAF_DEMUXER 0 ++#define CONFIG_PCM_ALAW_DEMUXER 0 ++#define CONFIG_PCM_MULAW_DEMUXER 0 ++#define CONFIG_PCM_VIDC_DEMUXER 0 ++#define CONFIG_PCM_F64BE_DEMUXER 0 ++#define CONFIG_PCM_F64LE_DEMUXER 0 ++#define CONFIG_PCM_F32BE_DEMUXER 0 ++#define CONFIG_PCM_F32LE_DEMUXER 0 ++#define CONFIG_PCM_S32BE_DEMUXER 0 ++#define CONFIG_PCM_S32LE_DEMUXER 0 ++#define CONFIG_PCM_S24BE_DEMUXER 0 ++#define CONFIG_PCM_S24LE_DEMUXER 0 ++#define CONFIG_PCM_S16BE_DEMUXER 0 ++#define CONFIG_PCM_S16LE_DEMUXER 0 ++#define CONFIG_PCM_S8_DEMUXER 0 ++#define CONFIG_PCM_U32BE_DEMUXER 0 ++#define CONFIG_PCM_U32LE_DEMUXER 0 ++#define CONFIG_PCM_U24BE_DEMUXER 0 ++#define CONFIG_PCM_U24LE_DEMUXER 0 ++#define CONFIG_PCM_U16BE_DEMUXER 0 ++#define CONFIG_PCM_U16LE_DEMUXER 0 ++#define CONFIG_PCM_U8_DEMUXER 0 ++#define CONFIG_PJS_DEMUXER 0 ++#define CONFIG_PMP_DEMUXER 0 ++#define CONFIG_PVA_DEMUXER 0 ++#define CONFIG_PVF_DEMUXER 0 ++#define CONFIG_QCP_DEMUXER 0 ++#define CONFIG_R3D_DEMUXER 0 ++#define CONFIG_RAWVIDEO_DEMUXER 0 ++#define CONFIG_REALTEXT_DEMUXER 0 ++#define CONFIG_REDSPARK_DEMUXER 0 ++#define CONFIG_RL2_DEMUXER 0 ++#define CONFIG_RM_DEMUXER 0 ++#define CONFIG_ROQ_DEMUXER 0 ++#define CONFIG_RPL_DEMUXER 0 ++#define CONFIG_RSD_DEMUXER 0 ++#define CONFIG_RSO_DEMUXER 0 ++#define CONFIG_RTP_DEMUXER 0 ++#define CONFIG_RTSP_DEMUXER 0 ++#define CONFIG_S337M_DEMUXER 0 ++#define CONFIG_SAMI_DEMUXER 0 ++#define CONFIG_SAP_DEMUXER 0 ++#define CONFIG_SBC_DEMUXER 0 ++#define CONFIG_SBG_DEMUXER 0 ++#define CONFIG_SCC_DEMUXER 0 ++#define CONFIG_SDP_DEMUXER 0 ++#define CONFIG_SDR2_DEMUXER 0 ++#define CONFIG_SDS_DEMUXER 0 ++#define CONFIG_SDX_DEMUXER 0 ++#define CONFIG_SEGAFILM_DEMUXER 0 ++#define CONFIG_SER_DEMUXER 0 ++#define CONFIG_SHORTEN_DEMUXER 0 ++#define CONFIG_SIFF_DEMUXER 0 ++#define CONFIG_SLN_DEMUXER 0 ++#define CONFIG_SMACKER_DEMUXER 0 ++#define CONFIG_SMJPEG_DEMUXER 0 ++#define CONFIG_SMUSH_DEMUXER 0 ++#define CONFIG_SOL_DEMUXER 0 ++#define CONFIG_SOX_DEMUXER 0 ++#define CONFIG_SPDIF_DEMUXER 0 ++#define CONFIG_SRT_DEMUXER 0 ++#define CONFIG_STR_DEMUXER 0 ++#define CONFIG_STL_DEMUXER 0 ++#define CONFIG_SUBVIEWER1_DEMUXER 0 ++#define CONFIG_SUBVIEWER_DEMUXER 0 ++#define CONFIG_SUP_DEMUXER 0 ++#define CONFIG_SVAG_DEMUXER 0 ++#define CONFIG_SWF_DEMUXER 0 ++#define CONFIG_TAK_DEMUXER 0 ++#define CONFIG_TEDCAPTIONS_DEMUXER 0 ++#define CONFIG_THP_DEMUXER 0 ++#define CONFIG_THREEDOSTR_DEMUXER 0 ++#define CONFIG_TIERTEXSEQ_DEMUXER 0 ++#define CONFIG_TMV_DEMUXER 0 ++#define CONFIG_TRUEHD_DEMUXER 0 ++#define CONFIG_TTA_DEMUXER 0 ++#define CONFIG_TXD_DEMUXER 0 ++#define CONFIG_TTY_DEMUXER 0 ++#define CONFIG_TY_DEMUXER 0 ++#define CONFIG_V210_DEMUXER 0 ++#define CONFIG_V210X_DEMUXER 0 ++#define CONFIG_VAG_DEMUXER 0 ++#define CONFIG_VC1_DEMUXER 0 ++#define CONFIG_VC1T_DEMUXER 0 ++#define CONFIG_VIVIDAS_DEMUXER 0 ++#define CONFIG_VIVO_DEMUXER 0 ++#define CONFIG_VMD_DEMUXER 0 ++#define CONFIG_VOBSUB_DEMUXER 0 ++#define CONFIG_VOC_DEMUXER 0 ++#define CONFIG_VPK_DEMUXER 0 ++#define CONFIG_VPLAYER_DEMUXER 0 ++#define CONFIG_VQF_DEMUXER 0 ++#define CONFIG_W64_DEMUXER 0 ++#define CONFIG_WAV_DEMUXER 1 ++#define CONFIG_WC3_DEMUXER 0 ++#define CONFIG_WEBM_DASH_MANIFEST_DEMUXER 0 ++#define CONFIG_WEBVTT_DEMUXER 0 ++#define CONFIG_WSAUD_DEMUXER 0 ++#define CONFIG_WSD_DEMUXER 0 ++#define CONFIG_WSVQA_DEMUXER 0 ++#define CONFIG_WTV_DEMUXER 0 ++#define CONFIG_WVE_DEMUXER 0 ++#define CONFIG_WV_DEMUXER 0 ++#define CONFIG_XA_DEMUXER 0 ++#define CONFIG_XBIN_DEMUXER 0 ++#define CONFIG_XMV_DEMUXER 0 ++#define CONFIG_XVAG_DEMUXER 0 ++#define CONFIG_XWMA_DEMUXER 0 ++#define CONFIG_YOP_DEMUXER 0 ++#define CONFIG_YUV4MPEGPIPE_DEMUXER 0 ++#define CONFIG_IMAGE_BMP_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_DDS_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_DPX_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_EXR_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_GIF_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_J2K_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_JPEG_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_JPEGLS_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PAM_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PBM_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PCX_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PGMYUV_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PGM_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PICTOR_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PNG_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PPM_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PSD_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_QDRAW_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_SGI_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_SVG_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_SUNRAST_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_TIFF_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_WEBP_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_XPM_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_XWD_PIPE_DEMUXER 0 ++#define CONFIG_LIBGME_DEMUXER 0 ++#define CONFIG_LIBMODPLUG_DEMUXER 0 ++#define CONFIG_LIBOPENMPT_DEMUXER 0 ++#define CONFIG_VAPOURSYNTH_DEMUXER 0 ++#define CONFIG_A64_MUXER 0 ++#define CONFIG_AC3_MUXER 0 ++#define CONFIG_ADTS_MUXER 0 ++#define CONFIG_ADX_MUXER 0 ++#define CONFIG_AIFF_MUXER 0 ++#define CONFIG_AMR_MUXER 0 ++#define CONFIG_APNG_MUXER 0 ++#define CONFIG_APTX_MUXER 0 ++#define CONFIG_APTX_HD_MUXER 0 ++#define CONFIG_ASF_MUXER 0 ++#define CONFIG_ASS_MUXER 0 ++#define CONFIG_AST_MUXER 0 ++#define CONFIG_ASF_STREAM_MUXER 0 ++#define CONFIG_AU_MUXER 0 ++#define CONFIG_AVI_MUXER 0 ++#define CONFIG_AVM2_MUXER 0 ++#define CONFIG_AVS2_MUXER 0 ++#define CONFIG_BIT_MUXER 0 ++#define CONFIG_CAF_MUXER 0 ++#define CONFIG_CAVSVIDEO_MUXER 0 ++#define CONFIG_CODEC2_MUXER 0 ++#define CONFIG_CODEC2RAW_MUXER 0 ++#define CONFIG_CRC_MUXER 0 ++#define CONFIG_DASH_MUXER 0 ++#define CONFIG_DATA_MUXER 0 ++#define CONFIG_DAUD_MUXER 0 ++#define CONFIG_DIRAC_MUXER 0 ++#define CONFIG_DNXHD_MUXER 0 ++#define CONFIG_DTS_MUXER 0 ++#define CONFIG_DV_MUXER 0 ++#define CONFIG_EAC3_MUXER 0 ++#define CONFIG_F4V_MUXER 0 ++#define CONFIG_FFMETADATA_MUXER 0 ++#define CONFIG_FIFO_MUXER 0 ++#define CONFIG_FIFO_TEST_MUXER 0 ++#define CONFIG_FILMSTRIP_MUXER 0 ++#define CONFIG_FITS_MUXER 0 ++#define CONFIG_FLAC_MUXER 0 ++#define CONFIG_FLV_MUXER 0 ++#define CONFIG_FRAMECRC_MUXER 0 ++#define CONFIG_FRAMEHASH_MUXER 0 ++#define CONFIG_FRAMEMD5_MUXER 0 ++#define CONFIG_G722_MUXER 0 ++#define CONFIG_G723_1_MUXER 0 ++#define CONFIG_G726_MUXER 0 ++#define CONFIG_G726LE_MUXER 0 ++#define CONFIG_GIF_MUXER 0 ++#define CONFIG_GSM_MUXER 0 ++#define CONFIG_GXF_MUXER 0 ++#define CONFIG_H261_MUXER 0 ++#define CONFIG_H263_MUXER 0 ++#define CONFIG_H264_MUXER 0 ++#define CONFIG_HASH_MUXER 0 ++#define CONFIG_HDS_MUXER 0 ++#define CONFIG_HEVC_MUXER 0 ++#define CONFIG_HLS_MUXER 0 ++#define CONFIG_ICO_MUXER 0 ++#define CONFIG_ILBC_MUXER 0 ++#define CONFIG_IMAGE2_MUXER 0 ++#define CONFIG_IMAGE2PIPE_MUXER 0 ++#define CONFIG_IPOD_MUXER 0 ++#define CONFIG_IRCAM_MUXER 0 ++#define CONFIG_ISMV_MUXER 0 ++#define CONFIG_IVF_MUXER 0 ++#define CONFIG_JACOSUB_MUXER 0 ++#define CONFIG_LATM_MUXER 0 ++#define CONFIG_LRC_MUXER 0 ++#define CONFIG_M4V_MUXER 0 ++#define CONFIG_MD5_MUXER 0 ++#define CONFIG_MATROSKA_MUXER 0 ++#define CONFIG_MATROSKA_AUDIO_MUXER 0 ++#define CONFIG_MICRODVD_MUXER 0 ++#define CONFIG_MJPEG_MUXER 0 ++#define CONFIG_MLP_MUXER 0 ++#define CONFIG_MMF_MUXER 0 ++#define CONFIG_MOV_MUXER 0 ++#define CONFIG_MP2_MUXER 0 ++#define CONFIG_MP3_MUXER 0 ++#define CONFIG_MP4_MUXER 0 ++#define CONFIG_MPEG1SYSTEM_MUXER 0 ++#define CONFIG_MPEG1VCD_MUXER 0 ++#define CONFIG_MPEG1VIDEO_MUXER 0 ++#define CONFIG_MPEG2DVD_MUXER 0 ++#define CONFIG_MPEG2SVCD_MUXER 0 ++#define CONFIG_MPEG2VIDEO_MUXER 0 ++#define CONFIG_MPEG2VOB_MUXER 0 ++#define CONFIG_MPEGTS_MUXER 0 ++#define CONFIG_MPJPEG_MUXER 0 ++#define CONFIG_MXF_MUXER 0 ++#define CONFIG_MXF_D10_MUXER 0 ++#define CONFIG_MXF_OPATOM_MUXER 0 ++#define CONFIG_NULL_MUXER 0 ++#define CONFIG_NUT_MUXER 0 ++#define CONFIG_OGA_MUXER 0 ++#define CONFIG_OGG_MUXER 0 ++#define CONFIG_OGV_MUXER 0 ++#define CONFIG_OMA_MUXER 0 ++#define CONFIG_OPUS_MUXER 0 ++#define CONFIG_PCM_ALAW_MUXER 0 ++#define CONFIG_PCM_MULAW_MUXER 0 ++#define CONFIG_PCM_VIDC_MUXER 0 ++#define CONFIG_PCM_F64BE_MUXER 0 ++#define CONFIG_PCM_F64LE_MUXER 0 ++#define CONFIG_PCM_F32BE_MUXER 0 ++#define CONFIG_PCM_F32LE_MUXER 0 ++#define CONFIG_PCM_S32BE_MUXER 0 ++#define CONFIG_PCM_S32LE_MUXER 0 ++#define CONFIG_PCM_S24BE_MUXER 0 ++#define CONFIG_PCM_S24LE_MUXER 0 ++#define CONFIG_PCM_S16BE_MUXER 0 ++#define CONFIG_PCM_S16LE_MUXER 0 ++#define CONFIG_PCM_S8_MUXER 0 ++#define CONFIG_PCM_U32BE_MUXER 0 ++#define CONFIG_PCM_U32LE_MUXER 0 ++#define CONFIG_PCM_U24BE_MUXER 0 ++#define CONFIG_PCM_U24LE_MUXER 0 ++#define CONFIG_PCM_U16BE_MUXER 0 ++#define CONFIG_PCM_U16LE_MUXER 0 ++#define CONFIG_PCM_U8_MUXER 0 ++#define CONFIG_PSP_MUXER 0 ++#define CONFIG_RAWVIDEO_MUXER 0 ++#define CONFIG_RM_MUXER 0 ++#define CONFIG_ROQ_MUXER 0 ++#define CONFIG_RSO_MUXER 0 ++#define CONFIG_RTP_MUXER 0 ++#define CONFIG_RTP_MPEGTS_MUXER 0 ++#define CONFIG_RTSP_MUXER 0 ++#define CONFIG_SAP_MUXER 0 ++#define CONFIG_SBC_MUXER 0 ++#define CONFIG_SCC_MUXER 0 ++#define CONFIG_SEGAFILM_MUXER 0 ++#define CONFIG_SEGMENT_MUXER 0 ++#define CONFIG_STREAM_SEGMENT_MUXER 0 ++#define CONFIG_SINGLEJPEG_MUXER 0 ++#define CONFIG_SMJPEG_MUXER 0 ++#define CONFIG_SMOOTHSTREAMING_MUXER 0 ++#define CONFIG_SOX_MUXER 0 ++#define CONFIG_SPX_MUXER 0 ++#define CONFIG_SPDIF_MUXER 0 ++#define CONFIG_SRT_MUXER 0 ++#define CONFIG_STREAMHASH_MUXER 0 ++#define CONFIG_SUP_MUXER 0 ++#define CONFIG_SWF_MUXER 0 ++#define CONFIG_TEE_MUXER 0 ++#define CONFIG_TG2_MUXER 0 ++#define CONFIG_TGP_MUXER 0 ++#define CONFIG_MKVTIMESTAMP_V2_MUXER 0 ++#define CONFIG_TRUEHD_MUXER 0 ++#define CONFIG_TTA_MUXER 0 ++#define CONFIG_UNCODEDFRAMECRC_MUXER 0 ++#define CONFIG_VC1_MUXER 0 ++#define CONFIG_VC1T_MUXER 0 ++#define CONFIG_VOC_MUXER 0 ++#define CONFIG_W64_MUXER 0 ++#define CONFIG_WAV_MUXER 0 ++#define CONFIG_WEBM_MUXER 0 ++#define CONFIG_WEBM_DASH_MANIFEST_MUXER 0 ++#define CONFIG_WEBM_CHUNK_MUXER 0 ++#define CONFIG_WEBP_MUXER 0 ++#define CONFIG_WEBVTT_MUXER 0 ++#define CONFIG_WTV_MUXER 0 ++#define CONFIG_WV_MUXER 0 ++#define CONFIG_YUV4MPEGPIPE_MUXER 0 ++#define CONFIG_CHROMAPRINT_MUXER 0 ++#define CONFIG_ASYNC_PROTOCOL 0 ++#define CONFIG_BLURAY_PROTOCOL 0 ++#define CONFIG_CACHE_PROTOCOL 0 ++#define CONFIG_CONCAT_PROTOCOL 0 ++#define CONFIG_CRYPTO_PROTOCOL 0 ++#define CONFIG_DATA_PROTOCOL 0 ++#define CONFIG_FFRTMPCRYPT_PROTOCOL 0 ++#define CONFIG_FFRTMPHTTP_PROTOCOL 0 ++#define CONFIG_FILE_PROTOCOL 0 ++#define CONFIG_FTP_PROTOCOL 0 ++#define CONFIG_GOPHER_PROTOCOL 0 ++#define CONFIG_HLS_PROTOCOL 0 ++#define CONFIG_HTTP_PROTOCOL 0 ++#define CONFIG_HTTPPROXY_PROTOCOL 0 ++#define CONFIG_HTTPS_PROTOCOL 0 ++#define CONFIG_ICECAST_PROTOCOL 0 ++#define CONFIG_MMSH_PROTOCOL 0 ++#define CONFIG_MMST_PROTOCOL 0 ++#define CONFIG_MD5_PROTOCOL 0 ++#define CONFIG_PIPE_PROTOCOL 0 ++#define CONFIG_PROMPEG_PROTOCOL 0 ++#define CONFIG_RTMP_PROTOCOL 0 ++#define CONFIG_RTMPE_PROTOCOL 0 ++#define CONFIG_RTMPS_PROTOCOL 0 ++#define CONFIG_RTMPT_PROTOCOL 0 ++#define CONFIG_RTMPTE_PROTOCOL 0 ++#define CONFIG_RTMPTS_PROTOCOL 0 ++#define CONFIG_RTP_PROTOCOL 0 ++#define CONFIG_SCTP_PROTOCOL 0 ++#define CONFIG_SRTP_PROTOCOL 0 ++#define CONFIG_SUBFILE_PROTOCOL 0 ++#define CONFIG_TEE_PROTOCOL 0 ++#define CONFIG_TCP_PROTOCOL 0 ++#define CONFIG_TLS_PROTOCOL 0 ++#define CONFIG_UDP_PROTOCOL 0 ++#define CONFIG_UDPLITE_PROTOCOL 0 ++#define CONFIG_UNIX_PROTOCOL 0 ++#define CONFIG_LIBAMQP_PROTOCOL 0 ++#define CONFIG_LIBRTMP_PROTOCOL 0 ++#define CONFIG_LIBRTMPE_PROTOCOL 0 ++#define CONFIG_LIBRTMPS_PROTOCOL 0 ++#define CONFIG_LIBRTMPT_PROTOCOL 0 ++#define CONFIG_LIBRTMPTE_PROTOCOL 0 ++#define CONFIG_LIBSRT_PROTOCOL 0 ++#define CONFIG_LIBSSH_PROTOCOL 0 ++#define CONFIG_LIBSMBCLIENT_PROTOCOL 0 ++#define CONFIG_LIBZMQ_PROTOCOL 0 ++#endif /* FFMPEG_CONFIG_H */ +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavcodec/bsf_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavcodec/bsf_list.c +new file mode 100644 +index 00000000000..d31ece942a7 +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavcodec/bsf_list.c +@@ -0,0 +1,3 @@ ++static const AVBitStreamFilter * const bitstream_filters[] = { ++ &ff_null_bsf, ++ NULL }; +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavcodec/codec_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavcodec/codec_list.c +new file mode 100644 +index 00000000000..8f4b18388c3 +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavcodec/codec_list.c +@@ -0,0 +1,25 @@ ++static const AVCodec * const codec_list[] = { ++ &ff_h263_decoder, ++ &ff_h264_decoder, ++ &ff_mpeg4_decoder, ++ &ff_theora_decoder, ++ &ff_vp3_decoder, ++ &ff_vp8_decoder, ++ &ff_aac_decoder, ++ &ff_amrnb_decoder, ++ &ff_amrwb_decoder, ++ &ff_flac_decoder, ++ &ff_gsm_ms_decoder, ++ &ff_mp3_decoder, ++ &ff_vorbis_decoder, ++ &ff_pcm_alaw_decoder, ++ &ff_pcm_f32le_decoder, ++ &ff_pcm_mulaw_decoder, ++ &ff_pcm_s16be_decoder, ++ &ff_pcm_s16le_decoder, ++ &ff_pcm_s24be_decoder, ++ &ff_pcm_s24le_decoder, ++ &ff_pcm_s32le_decoder, ++ &ff_pcm_u8_decoder, ++ &ff_libopus_decoder, ++ NULL }; +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavcodec/parser_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavcodec/parser_list.c +new file mode 100644 +index 00000000000..48dcf4122e6 +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavcodec/parser_list.c +@@ -0,0 +1,14 @@ ++static const AVCodecParser * const parser_list[] = { ++ &ff_aac_parser, ++ &ff_flac_parser, ++ &ff_gsm_parser, ++ &ff_h263_parser, ++ &ff_h264_parser, ++ &ff_mpeg4video_parser, ++ &ff_mpegaudio_parser, ++ &ff_opus_parser, ++ &ff_vorbis_parser, ++ &ff_vp3_parser, ++ &ff_vp8_parser, ++ &ff_vp9_parser, ++ NULL }; +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavformat/demuxer_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavformat/demuxer_list.c +new file mode 100644 +index 00000000000..0c96cf1ff7e +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavformat/demuxer_list.c +@@ -0,0 +1,11 @@ ++static const AVInputFormat * const demuxer_list[] = { ++ &ff_aac_demuxer, ++ &ff_amr_demuxer, ++ &ff_avi_demuxer, ++ &ff_flac_demuxer, ++ &ff_matroska_demuxer, ++ &ff_mov_demuxer, ++ &ff_mp3_demuxer, ++ &ff_ogg_demuxer, ++ &ff_wav_demuxer, ++ NULL }; +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavformat/muxer_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavformat/muxer_list.c +new file mode 100644 +index 00000000000..f36d9499c6f +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavformat/muxer_list.c +@@ -0,0 +1,2 @@ ++static const AVOutputFormat * const muxer_list[] = { ++ NULL }; +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavformat/protocol_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavformat/protocol_list.c +new file mode 100644 +index 00000000000..247e1e4c3a2 +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavformat/protocol_list.c +@@ -0,0 +1,2 @@ ++static const URLProtocol * const url_protocols[] = { ++ NULL }; +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavutil/avconfig.h b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavutil/avconfig.h +new file mode 100644 +index 00000000000..8558b35027f +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavutil/avconfig.h +@@ -0,0 +1,6 @@ ++/* Generated by ffmpeg configure */ ++#ifndef AVUTIL_AVCONFIG_H ++#define AVUTIL_AVCONFIG_H ++#define AV_HAVE_BIGENDIAN 0 ++#define AV_HAVE_FAST_UNALIGNED 0 ++#endif /* AVUTIL_AVCONFIG_H */ +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavutil/ffversion.h b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavutil/ffversion.h +new file mode 100644 +index 00000000000..31e5b5036dc +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/ChromeOS/linux/la64/libavutil/ffversion.h +@@ -0,0 +1,5 @@ ++/* Automatically generated by version.sh, do not manually edit! */ ++#ifndef AVUTIL_FFVERSION_H ++#define AVUTIL_FFVERSION_H ++#define FFMPEG_VERSION "git-2020-06-16-23b2a15c25" ++#endif /* AVUTIL_FFVERSION_H */ +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/config.h b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/config.h +new file mode 100644 +index 00000000000..87f5fcb366d +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/config.h +@@ -0,0 +1,2589 @@ ++/* Automatically generated by configure - do not modify! */ ++#ifndef FFMPEG_CONFIG_H ++#define FFMPEG_CONFIG_H ++/* #define FFMPEG_CONFIGURATION "--disable-everything --disable-all --disable-doc --disable-htmlpages --disable-manpages --disable-podpages --disable-txtpages --disable-static --enable-avcodec --enable-avformat --enable-avutil --enable-fft --enable-rdft --enable-static --enable-libopus --disable-debug --disable-bzlib --disable-error-resilience --disable-iconv --disable-lzo --disable-network --disable-schannel --disable-sdl2 --disable-symver --disable-xlib --disable-zlib --disable-securetransport --disable-faan --disable-alsa --disable-autodetect --enable-decoder='vorbis,libopus,flac' --enable-decoder='pcm_u8,pcm_s16le,pcm_s24le,pcm_s32le,pcm_f32le,mp3' --enable-decoder='pcm_s16be,pcm_s24be,pcm_mulaw,pcm_alaw' --enable-demuxer='ogg,matroska,wav,flac,mp3,mov' --enable-parser='opus,vorbis,flac,mpegaudio,vp9' --extra-cflags=-I/mnt/chromium/src/third_party/opus/src/include --disable-linux-perf --x86asmexe=nasm --optflags='\"-O2\"' --enable-decoder='theora,vp8' --enable-parser='vp3,vp8' --target-os=linux --enable-pic --cc=clang --cxx=clang++ --ld=clang" -- elide long configuration string from binary */ ++#define FFMPEG_LICENSE "LGPL version 2.1 or later" ++#define CONFIG_THIS_YEAR 2020 ++#define FFMPEG_DATADIR "/usr/local/share/ffmpeg" ++#define AVCONV_DATADIR "/usr/local/share/ffmpeg" ++#define CC_IDENT "clang version 8.0.1" ++#define av_restrict restrict ++#define EXTERN_PREFIX "" ++#define EXTERN_ASM ++#define BUILDSUF "" ++#define SLIBSUF ".so" ++#define HAVE_MMX2 HAVE_MMXEXT ++#define SWS_MAX_FILTER_SIZE 256 ++#define ARCH_AARCH64 0 ++#define ARCH_ALPHA 0 ++#define ARCH_ARM 0 ++#define ARCH_AVR32 0 ++#define ARCH_AVR32_AP 0 ++#define ARCH_AVR32_UC 0 ++#define ARCH_BFIN 0 ++#define ARCH_IA64 0 ++#define ARCH_M68K 0 ++#define ARCH_MIPS 0 ++#define ARCH_MIPS64 0 ++#define ARCH_PARISC 0 ++#define ARCH_PPC 0 ++#define ARCH_PPC64 0 ++#define ARCH_S390 0 ++#define ARCH_SH4 0 ++#define ARCH_SPARC 0 ++#define ARCH_SPARC64 0 ++#define ARCH_TILEGX 0 ++#define ARCH_TILEPRO 0 ++#define ARCH_TOMI 0 ++#define ARCH_X86 0 ++#define ARCH_X86_32 0 ++#define ARCH_X86_64 0 ++#define HAVE_ARMV5TE 0 ++#define HAVE_ARMV6 0 ++#define HAVE_ARMV6T2 0 ++#define HAVE_ARMV8 0 ++#define HAVE_NEON 0 ++#define HAVE_VFP 0 ++#define HAVE_VFPV3 0 ++#define HAVE_SETEND 0 ++#define HAVE_ALTIVEC 0 ++#define HAVE_DCBZL 0 ++#define HAVE_LDBRX 0 ++#define HAVE_POWER8 0 ++#define HAVE_PPC4XX 0 ++#define HAVE_VSX 0 ++#define HAVE_AESNI 0 ++#define HAVE_AMD3DNOW 0 ++#define HAVE_AMD3DNOWEXT 0 ++#define HAVE_AVX 0 ++#define HAVE_AVX2 0 ++#define HAVE_AVX512 0 ++#define HAVE_FMA3 0 ++#define HAVE_FMA4 0 ++#define HAVE_MMX 0 ++#define HAVE_MMXEXT 0 ++#define HAVE_SSE 0 ++#define HAVE_SSE2 0 ++#define HAVE_SSE3 0 ++#define HAVE_SSE4 0 ++#define HAVE_SSE42 0 ++#define HAVE_SSSE3 0 ++#define HAVE_XOP 0 ++#define HAVE_CPUNOP 0 ++#define HAVE_I686 0 ++#define HAVE_MIPSFPU 0 ++#define HAVE_MIPS32R2 0 ++#define HAVE_MIPS32R5 0 ++#define HAVE_MIPS64R2 0 ++#define HAVE_MIPS32R6 0 ++#define HAVE_MIPS64R6 0 ++#define HAVE_MIPSDSP 0 ++#define HAVE_MIPSDSPR2 0 ++#define HAVE_MSA 0 ++#define HAVE_MSA2 0 ++#define HAVE_LOONGSON2 0 ++#define HAVE_LOONGSON3 0 ++#define HAVE_MMI 0 ++#define HAVE_ARMV5TE_EXTERNAL 0 ++#define HAVE_ARMV6_EXTERNAL 0 ++#define HAVE_ARMV6T2_EXTERNAL 0 ++#define HAVE_ARMV8_EXTERNAL 0 ++#define HAVE_NEON_EXTERNAL 0 ++#define HAVE_VFP_EXTERNAL 0 ++#define HAVE_VFPV3_EXTERNAL 0 ++#define HAVE_SETEND_EXTERNAL 0 ++#define HAVE_ALTIVEC_EXTERNAL 0 ++#define HAVE_DCBZL_EXTERNAL 0 ++#define HAVE_LDBRX_EXTERNAL 0 ++#define HAVE_POWER8_EXTERNAL 0 ++#define HAVE_PPC4XX_EXTERNAL 0 ++#define HAVE_VSX_EXTERNAL 0 ++#define HAVE_AESNI_EXTERNAL 0 ++#define HAVE_AMD3DNOW_EXTERNAL 0 ++#define HAVE_AMD3DNOWEXT_EXTERNAL 0 ++#define HAVE_AVX_EXTERNAL 0 ++#define HAVE_AVX2_EXTERNAL 0 ++#define HAVE_AVX512_EXTERNAL 0 ++#define HAVE_FMA3_EXTERNAL 0 ++#define HAVE_FMA4_EXTERNAL 0 ++#define HAVE_MMX_EXTERNAL 0 ++#define HAVE_MMXEXT_EXTERNAL 0 ++#define HAVE_SSE_EXTERNAL 0 ++#define HAVE_SSE2_EXTERNAL 0 ++#define HAVE_SSE3_EXTERNAL 0 ++#define HAVE_SSE4_EXTERNAL 0 ++#define HAVE_SSE42_EXTERNAL 0 ++#define HAVE_SSSE3_EXTERNAL 0 ++#define HAVE_XOP_EXTERNAL 0 ++#define HAVE_CPUNOP_EXTERNAL 0 ++#define HAVE_I686_EXTERNAL 0 ++#define HAVE_MIPSFPU_EXTERNAL 0 ++#define HAVE_MIPS32R2_EXTERNAL 0 ++#define HAVE_MIPS32R5_EXTERNAL 0 ++#define HAVE_MIPS64R2_EXTERNAL 0 ++#define HAVE_MIPS32R6_EXTERNAL 0 ++#define HAVE_MIPS64R6_EXTERNAL 0 ++#define HAVE_MIPSDSP_EXTERNAL 0 ++#define HAVE_MIPSDSPR2_EXTERNAL 0 ++#define HAVE_MSA_EXTERNAL 0 ++#define HAVE_MSA2_EXTERNAL 0 ++#define HAVE_LOONGSON2_EXTERNAL 0 ++#define HAVE_LOONGSON3_EXTERNAL 0 ++#define HAVE_MMI_EXTERNAL 0 ++#define HAVE_ARMV5TE_INLINE 0 ++#define HAVE_ARMV6_INLINE 0 ++#define HAVE_ARMV6T2_INLINE 0 ++#define HAVE_ARMV8_INLINE 0 ++#define HAVE_NEON_INLINE 0 ++#define HAVE_VFP_INLINE 0 ++#define HAVE_VFPV3_INLINE 0 ++#define HAVE_SETEND_INLINE 0 ++#define HAVE_ALTIVEC_INLINE 0 ++#define HAVE_DCBZL_INLINE 0 ++#define HAVE_LDBRX_INLINE 0 ++#define HAVE_POWER8_INLINE 0 ++#define HAVE_PPC4XX_INLINE 0 ++#define HAVE_VSX_INLINE 0 ++#define HAVE_AESNI_INLINE 0 ++#define HAVE_AMD3DNOW_INLINE 0 ++#define HAVE_AMD3DNOWEXT_INLINE 0 ++#define HAVE_AVX_INLINE 0 ++#define HAVE_AVX2_INLINE 0 ++#define HAVE_AVX512_INLINE 0 ++#define HAVE_FMA3_INLINE 0 ++#define HAVE_FMA4_INLINE 0 ++#define HAVE_MMX_INLINE 0 ++#define HAVE_MMXEXT_INLINE 0 ++#define HAVE_SSE_INLINE 0 ++#define HAVE_SSE2_INLINE 0 ++#define HAVE_SSE3_INLINE 0 ++#define HAVE_SSE4_INLINE 0 ++#define HAVE_SSE42_INLINE 0 ++#define HAVE_SSSE3_INLINE 0 ++#define HAVE_XOP_INLINE 0 ++#define HAVE_CPUNOP_INLINE 0 ++#define HAVE_I686_INLINE 0 ++#define HAVE_MIPSFPU_INLINE 0 ++#define HAVE_MIPS32R2_INLINE 0 ++#define HAVE_MIPS32R5_INLINE 0 ++#define HAVE_MIPS64R2_INLINE 0 ++#define HAVE_MIPS32R6_INLINE 0 ++#define HAVE_MIPS64R6_INLINE 0 ++#define HAVE_MIPSDSP_INLINE 0 ++#define HAVE_MIPSDSPR2_INLINE 0 ++#define HAVE_MSA_INLINE 0 ++#define HAVE_MSA2_INLINE 0 ++#define HAVE_LOONGSON2_INLINE 0 ++#define HAVE_LOONGSON3_INLINE 0 ++#define HAVE_MMI_INLINE 0 ++#define HAVE_ALIGNED_STACK 0 ++#define HAVE_FAST_64BIT 0 ++#define HAVE_FAST_CLZ 0 ++#define HAVE_FAST_CMOV 0 ++#define HAVE_LOCAL_ALIGNED 0 ++#define HAVE_SIMD_ALIGN_16 0 ++#define HAVE_SIMD_ALIGN_32 0 ++#define HAVE_SIMD_ALIGN_64 0 ++#define HAVE_ATOMIC_CAS_PTR 0 ++#define HAVE_MACHINE_RW_BARRIER 0 ++#define HAVE_MEMORYBARRIER 0 ++#define HAVE_MM_EMPTY 0 ++#define HAVE_RDTSC 0 ++#define HAVE_SEM_TIMEDWAIT 1 ++#define HAVE_SYNC_VAL_COMPARE_AND_SWAP 1 ++#define HAVE_CABS 0 ++#define HAVE_CEXP 0 ++#define HAVE_INLINE_ASM 1 ++#define HAVE_SYMVER 0 ++#define HAVE_X86ASM 0 ++#define HAVE_BIGENDIAN 0 ++#define HAVE_FAST_UNALIGNED 0 ++#define HAVE_ARPA_INET_H 0 ++#define HAVE_ASM_TYPES_H 1 ++#define HAVE_CDIO_PARANOIA_H 0 ++#define HAVE_CDIO_PARANOIA_PARANOIA_H 0 ++#define HAVE_CUDA_H 0 ++#define HAVE_DISPATCH_DISPATCH_H 0 ++#define HAVE_DEV_BKTR_IOCTL_BT848_H 0 ++#define HAVE_DEV_BKTR_IOCTL_METEOR_H 0 ++#define HAVE_DEV_IC_BT8XX_H 0 ++#define HAVE_DEV_VIDEO_BKTR_IOCTL_BT848_H 0 ++#define HAVE_DEV_VIDEO_METEOR_IOCTL_METEOR_H 0 ++#define HAVE_DIRECT_H 0 ++#define HAVE_DIRENT_H 1 ++#define HAVE_DXGIDEBUG_H 0 ++#define HAVE_DXVA_H 0 ++#define HAVE_ES2_GL_H 0 ++#define HAVE_GSM_H 0 ++#define HAVE_IO_H 0 ++#define HAVE_LINUX_PERF_EVENT_H 1 ++#define HAVE_MACHINE_IOCTL_BT848_H 0 ++#define HAVE_MACHINE_IOCTL_METEOR_H 0 ++#define HAVE_MALLOC_H 1 ++#define HAVE_OPENCV2_CORE_CORE_C_H 0 ++#define HAVE_OPENGL_GL3_H 0 ++#define HAVE_POLL_H 1 ++#define HAVE_SYS_PARAM_H 1 ++#define HAVE_SYS_RESOURCE_H 1 ++#define HAVE_SYS_SELECT_H 1 ++#define HAVE_SYS_SOUNDCARD_H 1 ++#define HAVE_SYS_TIME_H 1 ++#define HAVE_SYS_UN_H 1 ++#define HAVE_SYS_VIDEOIO_H 0 ++#define HAVE_TERMIOS_H 1 ++#define HAVE_UDPLITE_H 0 ++#define HAVE_UNISTD_H 1 ++#define HAVE_VALGRIND_VALGRIND_H 0 /* #define HAVE_VALGRIND_VALGRIND_H 0 -- forced to 0. See https://crbug.com/590440 */ ++#define HAVE_WINDOWS_H 0 ++#define HAVE_WINSOCK2_H 0 ++#define HAVE_INTRINSICS_NEON 0 ++#define HAVE_ATANF 1 ++#define HAVE_ATAN2F 1 ++#define HAVE_CBRT 1 ++#define HAVE_CBRTF 1 ++#define HAVE_COPYSIGN 1 ++#define HAVE_COSF 1 ++#define HAVE_ERF 1 ++#define HAVE_EXP2 1 ++#define HAVE_EXP2F 1 ++#define HAVE_EXPF 1 ++#define HAVE_HYPOT 1 ++#define HAVE_ISFINITE 1 ++#define HAVE_ISINF 1 ++#define HAVE_ISNAN 1 ++#define HAVE_LDEXPF 1 ++#define HAVE_LLRINT 1 ++#define HAVE_LLRINTF 1 ++#define HAVE_LOG2 1 ++#define HAVE_LOG2F 1 ++#define HAVE_LOG10F 1 ++#define HAVE_LRINT 1 ++#define HAVE_LRINTF 1 ++#define HAVE_POWF 1 ++#define HAVE_RINT 1 ++#define HAVE_ROUND 1 ++#define HAVE_ROUNDF 1 ++#define HAVE_SINF 1 ++#define HAVE_TRUNC 1 ++#define HAVE_TRUNCF 1 ++#define HAVE_DOS_PATHS 0 ++#define HAVE_LIBC_MSVCRT 0 ++#define HAVE_MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS 0 ++#define HAVE_SECTION_DATA_REL_RO 1 ++#define HAVE_THREADS 1 ++#define HAVE_UWP 0 ++#define HAVE_WINRT 0 ++#define HAVE_ACCESS 1 ++#define HAVE_ALIGNED_MALLOC 0 ++#define HAVE_ARC4RANDOM 0 ++#define HAVE_CLOCK_GETTIME 1 ++#define HAVE_CLOSESOCKET 0 ++#define HAVE_COMMANDLINETOARGVW 0 ++#define HAVE_FCNTL 1 ++#define HAVE_GETADDRINFO 0 ++#define HAVE_GETHRTIME 0 ++#define HAVE_GETOPT 1 ++#define HAVE_GETMODULEHANDLE 0 ++#define HAVE_GETPROCESSAFFINITYMASK 0 ++#define HAVE_GETPROCESSMEMORYINFO 0 ++#define HAVE_GETPROCESSTIMES 0 ++#define HAVE_GETRUSAGE 1 ++#define HAVE_GETSTDHANDLE 0 ++#define HAVE_GETSYSTEMTIMEASFILETIME 0 ++#define HAVE_GETTIMEOFDAY 1 ++#define HAVE_GLOB 1 ++#define HAVE_GLXGETPROCADDRESS 0 ++#define HAVE_GMTIME_R 1 ++#define HAVE_INET_ATON 0 ++#define HAVE_ISATTY 1 ++#define HAVE_KBHIT 0 ++#define HAVE_LOCALTIME_R 1 ++#define HAVE_LSTAT 1 ++#define HAVE_LZO1X_999_COMPRESS 0 ++#define HAVE_MACH_ABSOLUTE_TIME 0 ++#define HAVE_MAPVIEWOFFILE 0 ++#define HAVE_MEMALIGN 1 ++#define HAVE_MKSTEMP 1 ++#define HAVE_MMAP 1 ++#define HAVE_MPROTECT 1 ++#define HAVE_NANOSLEEP 1 ++#define HAVE_PEEKNAMEDPIPE 0 ++#define HAVE_POSIX_MEMALIGN 1 ++#define HAVE_PTHREAD_CANCEL 1 ++#define HAVE_SCHED_GETAFFINITY 1 ++#define HAVE_SECITEMIMPORT 0 ++#define HAVE_SETCONSOLETEXTATTRIBUTE 0 ++#define HAVE_SETCONSOLECTRLHANDLER 0 ++#define HAVE_SETDLLDIRECTORY 0 ++#define HAVE_SETMODE 0 ++#define HAVE_SETRLIMIT 1 ++#define HAVE_SLEEP 0 ++#define HAVE_STRERROR_R 1 ++#define HAVE_SYSCONF 1 ++#define HAVE_SYSCTL 0 /* #define HAVE_SYSCTL 1 -- forced to 0 for Fuchsia */ ++#define HAVE_USLEEP 1 ++#define HAVE_UTGETOSTYPEFROMSTRING 0 ++#define HAVE_VIRTUALALLOC 0 ++#define HAVE_WGLGETPROCADDRESS 0 ++#define HAVE_BCRYPT 0 ++#define HAVE_VAAPI_DRM 0 ++#define HAVE_VAAPI_X11 0 ++#define HAVE_VDPAU_X11 0 ++#define HAVE_PTHREADS 1 ++#define HAVE_OS2THREADS 0 ++#define HAVE_W32THREADS 0 ++#define HAVE_AS_ARCH_DIRECTIVE 0 ++#define HAVE_AS_DN_DIRECTIVE 0 ++#define HAVE_AS_FPU_DIRECTIVE 0 ++#define HAVE_AS_FUNC 0 ++#define HAVE_AS_OBJECT_ARCH 0 ++#define HAVE_ASM_MOD_Q 0 ++#define HAVE_BLOCKS_EXTENSION 0 ++#define HAVE_EBP_AVAILABLE 0 ++#define HAVE_EBX_AVAILABLE 0 ++#define HAVE_GNU_AS 0 ++#define HAVE_GNU_WINDRES 0 ++#define HAVE_IBM_ASM 0 ++#define HAVE_INLINE_ASM_DIRECT_SYMBOL_REFS 0 ++#define HAVE_INLINE_ASM_LABELS 1 ++#define HAVE_INLINE_ASM_NONLOCAL_LABELS 1 ++#define HAVE_PRAGMA_DEPRECATED 1 ++#define HAVE_RSYNC_CONTIMEOUT 0 ++#define HAVE_SYMVER_ASM_LABEL 1 ++#define HAVE_SYMVER_GNU_ASM 1 ++#define HAVE_VFP_ARGS 0 ++#define HAVE_XFORM_ASM 0 ++#define HAVE_XMM_CLOBBERS 0 ++#define HAVE_KCMVIDEOCODECTYPE_HEVC 0 ++#define HAVE_KCVPIXELFORMATTYPE_420YPCBCR10BIPLANARVIDEORANGE 0 ++#define HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_SMPTE_ST_2084_PQ 0 ++#define HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_ITU_R_2100_HLG 0 ++#define HAVE_KCVIMAGEBUFFERTRANSFERFUNCTION_LINEAR 0 ++#define HAVE_SOCKLEN_T 0 ++#define HAVE_STRUCT_ADDRINFO 0 ++#define HAVE_STRUCT_GROUP_SOURCE_REQ 0 ++#define HAVE_STRUCT_IP_MREQ_SOURCE 0 ++#define HAVE_STRUCT_IPV6_MREQ 0 ++#define HAVE_STRUCT_MSGHDR_MSG_FLAGS 0 ++#define HAVE_STRUCT_POLLFD 0 ++#define HAVE_STRUCT_RUSAGE_RU_MAXRSS 1 ++#define HAVE_STRUCT_SCTP_EVENT_SUBSCRIBE 0 ++#define HAVE_STRUCT_SOCKADDR_IN6 0 ++#define HAVE_STRUCT_SOCKADDR_SA_LEN 0 ++#define HAVE_STRUCT_SOCKADDR_STORAGE 0 ++#define HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC 1 ++#define HAVE_STRUCT_V4L2_FRMIVALENUM_DISCRETE 0 ++#define HAVE_MAKEINFO 1 ++#define HAVE_MAKEINFO_HTML 1 ++#define HAVE_OPENCL_D3D11 0 ++#define HAVE_OPENCL_DRM_ARM 0 ++#define HAVE_OPENCL_DRM_BEIGNET 0 ++#define HAVE_OPENCL_DXVA2 0 ++#define HAVE_OPENCL_VAAPI_BEIGNET 0 ++#define HAVE_OPENCL_VAAPI_INTEL_MEDIA 0 ++#define HAVE_PERL 1 ++#define HAVE_POD2MAN 1 ++#define HAVE_TEXI2HTML 0 ++#define CONFIG_DOC 0 ++#define CONFIG_HTMLPAGES 0 ++#define CONFIG_MANPAGES 0 ++#define CONFIG_PODPAGES 0 ++#define CONFIG_TXTPAGES 0 ++#define CONFIG_AVIO_LIST_DIR_EXAMPLE 1 ++#define CONFIG_AVIO_READING_EXAMPLE 1 ++#define CONFIG_DECODE_AUDIO_EXAMPLE 1 ++#define CONFIG_DECODE_VIDEO_EXAMPLE 1 ++#define CONFIG_DEMUXING_DECODING_EXAMPLE 1 ++#define CONFIG_ENCODE_AUDIO_EXAMPLE 1 ++#define CONFIG_ENCODE_VIDEO_EXAMPLE 1 ++#define CONFIG_EXTRACT_MVS_EXAMPLE 1 ++#define CONFIG_FILTER_AUDIO_EXAMPLE 0 ++#define CONFIG_FILTERING_AUDIO_EXAMPLE 0 ++#define CONFIG_FILTERING_VIDEO_EXAMPLE 0 ++#define CONFIG_HTTP_MULTICLIENT_EXAMPLE 1 ++#define CONFIG_HW_DECODE_EXAMPLE 1 ++#define CONFIG_METADATA_EXAMPLE 1 ++#define CONFIG_MUXING_EXAMPLE 0 ++#define CONFIG_QSVDEC_EXAMPLE 0 ++#define CONFIG_REMUXING_EXAMPLE 1 ++#define CONFIG_RESAMPLING_AUDIO_EXAMPLE 0 ++#define CONFIG_SCALING_VIDEO_EXAMPLE 0 ++#define CONFIG_TRANSCODE_AAC_EXAMPLE 0 ++#define CONFIG_TRANSCODING_EXAMPLE 0 ++#define CONFIG_VAAPI_ENCODE_EXAMPLE 0 ++#define CONFIG_VAAPI_TRANSCODE_EXAMPLE 0 ++#define CONFIG_AVISYNTH 0 ++#define CONFIG_FREI0R 0 ++#define CONFIG_LIBCDIO 0 ++#define CONFIG_LIBDAVS2 0 ++#define CONFIG_LIBRUBBERBAND 0 ++#define CONFIG_LIBVIDSTAB 0 ++#define CONFIG_LIBX264 0 ++#define CONFIG_LIBX265 0 ++#define CONFIG_LIBXAVS 0 ++#define CONFIG_LIBXAVS2 0 ++#define CONFIG_LIBXVID 0 ++#define CONFIG_DECKLINK 0 ++#define CONFIG_LIBFDK_AAC 0 ++#define CONFIG_OPENSSL 0 ++#define CONFIG_LIBTLS 0 ++#define CONFIG_GMP 0 ++#define CONFIG_LIBARIBB24 0 ++#define CONFIG_LIBLENSFUN 0 ++#define CONFIG_LIBOPENCORE_AMRNB 0 ++#define CONFIG_LIBOPENCORE_AMRWB 0 ++#define CONFIG_LIBVMAF 0 ++#define CONFIG_LIBVO_AMRWBENC 0 ++#define CONFIG_MBEDTLS 0 ++#define CONFIG_RKMPP 0 ++#define CONFIG_LIBSMBCLIENT 0 ++#define CONFIG_CHROMAPRINT 0 ++#define CONFIG_GCRYPT 0 ++#define CONFIG_GNUTLS 0 ++#define CONFIG_JNI 0 ++#define CONFIG_LADSPA 0 ++#define CONFIG_LIBAOM 0 ++#define CONFIG_LIBASS 0 ++#define CONFIG_LIBBLURAY 0 ++#define CONFIG_LIBBS2B 0 ++#define CONFIG_LIBCACA 0 ++#define CONFIG_LIBCELT 0 ++#define CONFIG_LIBCODEC2 0 ++#define CONFIG_LIBDAV1D 0 ++#define CONFIG_LIBDC1394 0 ++#define CONFIG_LIBDRM 0 ++#define CONFIG_LIBFLITE 0 ++#define CONFIG_LIBFONTCONFIG 0 ++#define CONFIG_LIBFREETYPE 0 ++#define CONFIG_LIBFRIBIDI 0 ++#define CONFIG_LIBGLSLANG 0 ++#define CONFIG_LIBGME 0 ++#define CONFIG_LIBGSM 0 ++#define CONFIG_LIBIEC61883 0 ++#define CONFIG_LIBILBC 0 ++#define CONFIG_LIBJACK 0 ++#define CONFIG_LIBKLVANC 0 ++#define CONFIG_LIBKVAZAAR 0 ++#define CONFIG_LIBMODPLUG 0 ++#define CONFIG_LIBMP3LAME 0 ++#define CONFIG_LIBMYSOFA 0 ++#define CONFIG_LIBOPENCV 0 ++#define CONFIG_LIBOPENH264 0 ++#define CONFIG_LIBOPENJPEG 0 ++#define CONFIG_LIBOPENMPT 0 ++#define CONFIG_LIBOPUS 1 ++#define CONFIG_LIBPULSE 0 ++#define CONFIG_LIBRABBITMQ 0 ++#define CONFIG_LIBRAV1E 0 ++#define CONFIG_LIBRSVG 0 ++#define CONFIG_LIBRTMP 0 ++#define CONFIG_LIBSHINE 0 ++#define CONFIG_LIBSMBCLIENT 0 ++#define CONFIG_LIBSNAPPY 0 ++#define CONFIG_LIBSOXR 0 ++#define CONFIG_LIBSPEEX 0 ++#define CONFIG_LIBSRT 0 ++#define CONFIG_LIBSSH 0 ++#define CONFIG_LIBTENSORFLOW 0 ++#define CONFIG_LIBTESSERACT 0 ++#define CONFIG_LIBTHEORA 0 ++#define CONFIG_LIBTWOLAME 0 ++#define CONFIG_LIBV4L2 0 ++#define CONFIG_LIBVORBIS 0 ++#define CONFIG_LIBVPX 0 ++#define CONFIG_LIBWAVPACK 0 ++#define CONFIG_LIBWEBP 0 ++#define CONFIG_LIBXML2 0 ++#define CONFIG_LIBZIMG 0 ++#define CONFIG_LIBZMQ 0 ++#define CONFIG_LIBZVBI 0 ++#define CONFIG_LV2 0 ++#define CONFIG_MEDIACODEC 0 ++#define CONFIG_OPENAL 0 ++#define CONFIG_OPENGL 0 ++#define CONFIG_POCKETSPHINX 0 ++#define CONFIG_VAPOURSYNTH 0 ++#define CONFIG_ALSA 0 ++#define CONFIG_APPKIT 0 ++#define CONFIG_AVFOUNDATION 0 ++#define CONFIG_BZLIB 0 ++#define CONFIG_COREIMAGE 0 ++#define CONFIG_ICONV 0 ++#define CONFIG_LIBXCB 0 ++#define CONFIG_LIBXCB_SHM 0 ++#define CONFIG_LIBXCB_SHAPE 0 ++#define CONFIG_LIBXCB_XFIXES 0 ++#define CONFIG_LZMA 0 ++#define CONFIG_SCHANNEL 0 ++#define CONFIG_SDL2 0 ++#define CONFIG_SECURETRANSPORT 0 ++#define CONFIG_SNDIO 0 ++#define CONFIG_XLIB 0 ++#define CONFIG_ZLIB 0 ++#define CONFIG_CUDA_NVCC 0 ++#define CONFIG_CUDA_SDK 0 ++#define CONFIG_LIBNPP 0 ++#define CONFIG_LIBMFX 0 ++#define CONFIG_MMAL 0 ++#define CONFIG_OMX 0 ++#define CONFIG_OPENCL 0 ++#define CONFIG_VULKAN 0 ++#define CONFIG_AMF 0 ++#define CONFIG_AUDIOTOOLBOX 0 ++#define CONFIG_CRYSTALHD 0 ++#define CONFIG_CUDA 0 ++#define CONFIG_CUDA_LLVM 0 ++#define CONFIG_CUVID 0 ++#define CONFIG_D3D11VA 0 ++#define CONFIG_DXVA2 0 ++#define CONFIG_FFNVCODEC 0 ++#define CONFIG_NVDEC 0 ++#define CONFIG_NVENC 0 ++#define CONFIG_VAAPI 0 ++#define CONFIG_VDPAU 0 ++#define CONFIG_VIDEOTOOLBOX 0 ++#define CONFIG_V4L2_M2M 0 ++#define CONFIG_XVMC 0 ++#define CONFIG_FTRAPV 0 ++#define CONFIG_GRAY 0 ++#define CONFIG_HARDCODED_TABLES 0 ++#define CONFIG_OMX_RPI 0 ++#define CONFIG_RUNTIME_CPUDETECT 1 ++#define CONFIG_SAFE_BITSTREAM_READER 1 ++#define CONFIG_SHARED 0 ++#define CONFIG_SMALL 0 ++#define CONFIG_STATIC 1 ++#define CONFIG_SWSCALE_ALPHA 1 ++#define CONFIG_GPL 0 ++#define CONFIG_NONFREE 0 ++#define CONFIG_VERSION3 0 ++#define CONFIG_AVDEVICE 0 ++#define CONFIG_AVFILTER 0 ++#define CONFIG_SWSCALE 0 ++#define CONFIG_POSTPROC 0 ++#define CONFIG_AVFORMAT 1 ++#define CONFIG_AVCODEC 1 ++#define CONFIG_SWRESAMPLE 0 ++#define CONFIG_AVRESAMPLE 0 ++#define CONFIG_AVUTIL 1 ++#define CONFIG_FFPLAY 0 ++#define CONFIG_FFPROBE 0 ++#define CONFIG_FFMPEG 0 ++#define CONFIG_DCT 1 ++#define CONFIG_DWT 0 ++#define CONFIG_ERROR_RESILIENCE 0 ++#define CONFIG_FAAN 0 ++#define CONFIG_FAST_UNALIGNED 0 ++#define CONFIG_FFT 1 ++#define CONFIG_LSP 0 ++#define CONFIG_LZO 0 ++#define CONFIG_MDCT 1 ++#define CONFIG_PIXELUTILS 0 ++#define CONFIG_NETWORK 0 ++#define CONFIG_RDFT 1 ++#define CONFIG_AUTODETECT 0 ++#define CONFIG_FONTCONFIG 0 ++#define CONFIG_LARGE_TESTS 1 ++#define CONFIG_LINUX_PERF 0 ++#define CONFIG_MEMORY_POISONING 0 ++#define CONFIG_NEON_CLOBBER_TEST 0 ++#define CONFIG_OSSFUZZ 0 ++#define CONFIG_PIC 1 ++#define CONFIG_THUMB 0 ++#define CONFIG_VALGRIND_BACKTRACE 0 ++#define CONFIG_XMM_CLOBBER_TEST 0 ++#define CONFIG_BSFS 1 ++#define CONFIG_DECODERS 1 ++#define CONFIG_ENCODERS 0 ++#define CONFIG_HWACCELS 0 ++#define CONFIG_PARSERS 1 ++#define CONFIG_INDEVS 0 ++#define CONFIG_OUTDEVS 0 ++#define CONFIG_FILTERS 0 ++#define CONFIG_DEMUXERS 1 ++#define CONFIG_MUXERS 0 ++#define CONFIG_PROTOCOLS 0 ++#define CONFIG_AANDCTTABLES 0 ++#define CONFIG_AC3DSP 0 ++#define CONFIG_ADTS_HEADER 0 ++#define CONFIG_AUDIO_FRAME_QUEUE 0 ++#define CONFIG_AUDIODSP 0 ++#define CONFIG_BLOCKDSP 0 ++#define CONFIG_BSWAPDSP 0 ++#define CONFIG_CABAC 0 ++#define CONFIG_CBS 0 ++#define CONFIG_CBS_AV1 0 ++#define CONFIG_CBS_H264 0 ++#define CONFIG_CBS_H265 0 ++#define CONFIG_CBS_JPEG 0 ++#define CONFIG_CBS_MPEG2 0 ++#define CONFIG_CBS_VP9 0 ++#define CONFIG_DIRAC_PARSE 1 ++#define CONFIG_DNN 0 ++#define CONFIG_DVPROFILE 0 ++#define CONFIG_EXIF 0 ++#define CONFIG_FAANDCT 0 ++#define CONFIG_FAANIDCT 0 ++#define CONFIG_FDCTDSP 0 ++#define CONFIG_FLACDSP 1 ++#define CONFIG_FMTCONVERT 0 ++#define CONFIG_FRAME_THREAD_ENCODER 0 ++#define CONFIG_G722DSP 0 ++#define CONFIG_GOLOMB 1 ++#define CONFIG_GPLV3 0 ++#define CONFIG_H263DSP 0 ++#define CONFIG_H264CHROMA 0 ++#define CONFIG_H264DSP 0 ++#define CONFIG_H264PARSE 0 ++#define CONFIG_H264PRED 1 ++#define CONFIG_H264QPEL 0 ++#define CONFIG_HEVCPARSE 0 ++#define CONFIG_HPELDSP 1 ++#define CONFIG_HUFFMAN 0 ++#define CONFIG_HUFFYUVDSP 0 ++#define CONFIG_HUFFYUVENCDSP 0 ++#define CONFIG_IDCTDSP 0 ++#define CONFIG_IIRFILTER 0 ++#define CONFIG_MDCT15 0 ++#define CONFIG_INTRAX8 0 ++#define CONFIG_ISO_MEDIA 1 ++#define CONFIG_IVIDSP 0 ++#define CONFIG_JPEGTABLES 0 ++#define CONFIG_LGPLV3 0 ++#define CONFIG_LIBX262 0 ++#define CONFIG_LLAUDDSP 0 ++#define CONFIG_LLVIDDSP 0 ++#define CONFIG_LLVIDENCDSP 0 ++#define CONFIG_LPC 0 ++#define CONFIG_LZF 0 ++#define CONFIG_ME_CMP 0 ++#define CONFIG_MPEG_ER 0 ++#define CONFIG_MPEGAUDIO 1 ++#define CONFIG_MPEGAUDIODSP 1 ++#define CONFIG_MPEGAUDIOHEADER 1 ++#define CONFIG_MPEGVIDEO 0 ++#define CONFIG_MPEGVIDEOENC 0 ++#define CONFIG_MSS34DSP 0 ++#define CONFIG_PIXBLOCKDSP 0 ++#define CONFIG_QPELDSP 0 ++#define CONFIG_QSV 0 ++#define CONFIG_QSVDEC 0 ++#define CONFIG_QSVENC 0 ++#define CONFIG_QSVVPP 0 ++#define CONFIG_RANGECODER 0 ++#define CONFIG_RIFFDEC 1 ++#define CONFIG_RIFFENC 0 ++#define CONFIG_RTPDEC 0 ++#define CONFIG_RTPENC_CHAIN 0 ++#define CONFIG_RV34DSP 0 ++#define CONFIG_SCENE_SAD 0 ++#define CONFIG_SINEWIN 0 ++#define CONFIG_SNAPPY 0 ++#define CONFIG_SRTP 0 ++#define CONFIG_STARTCODE 0 ++#define CONFIG_TEXTUREDSP 0 ++#define CONFIG_TEXTUREDSPENC 0 ++#define CONFIG_TPELDSP 0 ++#define CONFIG_VAAPI_1 0 ++#define CONFIG_VAAPI_ENCODE 0 ++#define CONFIG_VC1DSP 0 ++#define CONFIG_VIDEODSP 1 ++#define CONFIG_VP3DSP 1 ++#define CONFIG_VP56DSP 0 ++#define CONFIG_VP8DSP 1 ++#define CONFIG_WMA_FREQS 0 ++#define CONFIG_WMV2DSP 0 ++#define CONFIG_AAC_ADTSTOASC_BSF 0 ++#define CONFIG_AV1_FRAME_MERGE_BSF 0 ++#define CONFIG_AV1_FRAME_SPLIT_BSF 0 ++#define CONFIG_AV1_METADATA_BSF 0 ++#define CONFIG_CHOMP_BSF 0 ++#define CONFIG_DUMP_EXTRADATA_BSF 0 ++#define CONFIG_DCA_CORE_BSF 0 ++#define CONFIG_EAC3_CORE_BSF 0 ++#define CONFIG_EXTRACT_EXTRADATA_BSF 0 ++#define CONFIG_FILTER_UNITS_BSF 0 ++#define CONFIG_H264_METADATA_BSF 0 ++#define CONFIG_H264_MP4TOANNEXB_BSF 0 ++#define CONFIG_H264_REDUNDANT_PPS_BSF 0 ++#define CONFIG_HAPQA_EXTRACT_BSF 0 ++#define CONFIG_HEVC_METADATA_BSF 0 ++#define CONFIG_HEVC_MP4TOANNEXB_BSF 0 ++#define CONFIG_IMX_DUMP_HEADER_BSF 0 ++#define CONFIG_MJPEG2JPEG_BSF 0 ++#define CONFIG_MJPEGA_DUMP_HEADER_BSF 0 ++#define CONFIG_MP3_HEADER_DECOMPRESS_BSF 0 ++#define CONFIG_MPEG2_METADATA_BSF 0 ++#define CONFIG_MPEG4_UNPACK_BFRAMES_BSF 0 ++#define CONFIG_MOV2TEXTSUB_BSF 0 ++#define CONFIG_NOISE_BSF 0 ++#define CONFIG_NULL_BSF 1 ++#define CONFIG_PRORES_METADATA_BSF 0 ++#define CONFIG_REMOVE_EXTRADATA_BSF 0 ++#define CONFIG_TEXT2MOVSUB_BSF 0 ++#define CONFIG_TRACE_HEADERS_BSF 0 ++#define CONFIG_TRUEHD_CORE_BSF 0 ++#define CONFIG_VP9_METADATA_BSF 0 ++#define CONFIG_VP9_RAW_REORDER_BSF 0 ++#define CONFIG_VP9_SUPERFRAME_BSF 0 ++#define CONFIG_VP9_SUPERFRAME_SPLIT_BSF 0 ++#define CONFIG_AASC_DECODER 0 ++#define CONFIG_AIC_DECODER 0 ++#define CONFIG_ALIAS_PIX_DECODER 0 ++#define CONFIG_AGM_DECODER 0 ++#define CONFIG_AMV_DECODER 0 ++#define CONFIG_ANM_DECODER 0 ++#define CONFIG_ANSI_DECODER 0 ++#define CONFIG_APNG_DECODER 0 ++#define CONFIG_ARBC_DECODER 0 ++#define CONFIG_ASV1_DECODER 0 ++#define CONFIG_ASV2_DECODER 0 ++#define CONFIG_AURA_DECODER 0 ++#define CONFIG_AURA2_DECODER 0 ++#define CONFIG_AVRP_DECODER 0 ++#define CONFIG_AVRN_DECODER 0 ++#define CONFIG_AVS_DECODER 0 ++#define CONFIG_AVUI_DECODER 0 ++#define CONFIG_AYUV_DECODER 0 ++#define CONFIG_BETHSOFTVID_DECODER 0 ++#define CONFIG_BFI_DECODER 0 ++#define CONFIG_BINK_DECODER 0 ++#define CONFIG_BITPACKED_DECODER 0 ++#define CONFIG_BMP_DECODER 0 ++#define CONFIG_BMV_VIDEO_DECODER 0 ++#define CONFIG_BRENDER_PIX_DECODER 0 ++#define CONFIG_C93_DECODER 0 ++#define CONFIG_CAVS_DECODER 0 ++#define CONFIG_CDGRAPHICS_DECODER 0 ++#define CONFIG_CDTOONS_DECODER 0 ++#define CONFIG_CDXL_DECODER 0 ++#define CONFIG_CFHD_DECODER 0 ++#define CONFIG_CINEPAK_DECODER 0 ++#define CONFIG_CLEARVIDEO_DECODER 0 ++#define CONFIG_CLJR_DECODER 0 ++#define CONFIG_CLLC_DECODER 0 ++#define CONFIG_COMFORTNOISE_DECODER 0 ++#define CONFIG_CPIA_DECODER 0 ++#define CONFIG_CSCD_DECODER 0 ++#define CONFIG_CYUV_DECODER 0 ++#define CONFIG_DDS_DECODER 0 ++#define CONFIG_DFA_DECODER 0 ++#define CONFIG_DIRAC_DECODER 0 ++#define CONFIG_DNXHD_DECODER 0 ++#define CONFIG_DPX_DECODER 0 ++#define CONFIG_DSICINVIDEO_DECODER 0 ++#define CONFIG_DVAUDIO_DECODER 0 ++#define CONFIG_DVVIDEO_DECODER 0 ++#define CONFIG_DXA_DECODER 0 ++#define CONFIG_DXTORY_DECODER 0 ++#define CONFIG_DXV_DECODER 0 ++#define CONFIG_EACMV_DECODER 0 ++#define CONFIG_EAMAD_DECODER 0 ++#define CONFIG_EATGQ_DECODER 0 ++#define CONFIG_EATGV_DECODER 0 ++#define CONFIG_EATQI_DECODER 0 ++#define CONFIG_EIGHTBPS_DECODER 0 ++#define CONFIG_EIGHTSVX_EXP_DECODER 0 ++#define CONFIG_EIGHTSVX_FIB_DECODER 0 ++#define CONFIG_ESCAPE124_DECODER 0 ++#define CONFIG_ESCAPE130_DECODER 0 ++#define CONFIG_EXR_DECODER 0 ++#define CONFIG_FFV1_DECODER 0 ++#define CONFIG_FFVHUFF_DECODER 0 ++#define CONFIG_FIC_DECODER 0 ++#define CONFIG_FITS_DECODER 0 ++#define CONFIG_FLASHSV_DECODER 0 ++#define CONFIG_FLASHSV2_DECODER 0 ++#define CONFIG_FLIC_DECODER 0 ++#define CONFIG_FLV_DECODER 0 ++#define CONFIG_FMVC_DECODER 0 ++#define CONFIG_FOURXM_DECODER 0 ++#define CONFIG_FRAPS_DECODER 0 ++#define CONFIG_FRWU_DECODER 0 ++#define CONFIG_G2M_DECODER 0 ++#define CONFIG_GDV_DECODER 0 ++#define CONFIG_GIF_DECODER 0 ++#define CONFIG_H261_DECODER 0 ++#define CONFIG_H263_DECODER 0 ++#define CONFIG_H263I_DECODER 0 ++#define CONFIG_H263P_DECODER 0 ++#define CONFIG_H263_V4L2M2M_DECODER 0 ++#define CONFIG_H264_DECODER 0 ++#define CONFIG_H264_CRYSTALHD_DECODER 0 ++#define CONFIG_H264_V4L2M2M_DECODER 0 ++#define CONFIG_H264_MEDIACODEC_DECODER 0 ++#define CONFIG_H264_MMAL_DECODER 0 ++#define CONFIG_H264_QSV_DECODER 0 ++#define CONFIG_H264_RKMPP_DECODER 0 ++#define CONFIG_HAP_DECODER 0 ++#define CONFIG_HEVC_DECODER 0 ++#define CONFIG_HEVC_QSV_DECODER 0 ++#define CONFIG_HEVC_RKMPP_DECODER 0 ++#define CONFIG_HEVC_V4L2M2M_DECODER 0 ++#define CONFIG_HNM4_VIDEO_DECODER 0 ++#define CONFIG_HQ_HQA_DECODER 0 ++#define CONFIG_HQX_DECODER 0 ++#define CONFIG_HUFFYUV_DECODER 0 ++#define CONFIG_HYMT_DECODER 0 ++#define CONFIG_IDCIN_DECODER 0 ++#define CONFIG_IFF_ILBM_DECODER 0 ++#define CONFIG_IMM4_DECODER 0 ++#define CONFIG_IMM5_DECODER 0 ++#define CONFIG_INDEO2_DECODER 0 ++#define CONFIG_INDEO3_DECODER 0 ++#define CONFIG_INDEO4_DECODER 0 ++#define CONFIG_INDEO5_DECODER 0 ++#define CONFIG_INTERPLAY_VIDEO_DECODER 0 ++#define CONFIG_JPEG2000_DECODER 0 ++#define CONFIG_JPEGLS_DECODER 0 ++#define CONFIG_JV_DECODER 0 ++#define CONFIG_KGV1_DECODER 0 ++#define CONFIG_KMVC_DECODER 0 ++#define CONFIG_LAGARITH_DECODER 0 ++#define CONFIG_LOCO_DECODER 0 ++#define CONFIG_LSCR_DECODER 0 ++#define CONFIG_M101_DECODER 0 ++#define CONFIG_MAGICYUV_DECODER 0 ++#define CONFIG_MDEC_DECODER 0 ++#define CONFIG_MIMIC_DECODER 0 ++#define CONFIG_MJPEG_DECODER 0 ++#define CONFIG_MJPEGB_DECODER 0 ++#define CONFIG_MMVIDEO_DECODER 0 ++#define CONFIG_MOTIONPIXELS_DECODER 0 ++#define CONFIG_MPEG1VIDEO_DECODER 0 ++#define CONFIG_MPEG2VIDEO_DECODER 0 ++#define CONFIG_MPEG4_DECODER 0 ++#define CONFIG_MPEG4_CRYSTALHD_DECODER 0 ++#define CONFIG_MPEG4_V4L2M2M_DECODER 0 ++#define CONFIG_MPEG4_MMAL_DECODER 0 ++#define CONFIG_MPEGVIDEO_DECODER 0 ++#define CONFIG_MPEG1_V4L2M2M_DECODER 0 ++#define CONFIG_MPEG2_MMAL_DECODER 0 ++#define CONFIG_MPEG2_CRYSTALHD_DECODER 0 ++#define CONFIG_MPEG2_V4L2M2M_DECODER 0 ++#define CONFIG_MPEG2_QSV_DECODER 0 ++#define CONFIG_MPEG2_MEDIACODEC_DECODER 0 ++#define CONFIG_MSA1_DECODER 0 ++#define CONFIG_MSCC_DECODER 0 ++#define CONFIG_MSMPEG4V1_DECODER 0 ++#define CONFIG_MSMPEG4V2_DECODER 0 ++#define CONFIG_MSMPEG4V3_DECODER 0 ++#define CONFIG_MSMPEG4_CRYSTALHD_DECODER 0 ++#define CONFIG_MSRLE_DECODER 0 ++#define CONFIG_MSS1_DECODER 0 ++#define CONFIG_MSS2_DECODER 0 ++#define CONFIG_MSVIDEO1_DECODER 0 ++#define CONFIG_MSZH_DECODER 0 ++#define CONFIG_MTS2_DECODER 0 ++#define CONFIG_MV30_DECODER 0 ++#define CONFIG_MVC1_DECODER 0 ++#define CONFIG_MVC2_DECODER 0 ++#define CONFIG_MVDV_DECODER 0 ++#define CONFIG_MVHA_DECODER 0 ++#define CONFIG_MWSC_DECODER 0 ++#define CONFIG_MXPEG_DECODER 0 ++#define CONFIG_NUV_DECODER 0 ++#define CONFIG_PAF_VIDEO_DECODER 0 ++#define CONFIG_PAM_DECODER 0 ++#define CONFIG_PBM_DECODER 0 ++#define CONFIG_PCX_DECODER 0 ++#define CONFIG_PGM_DECODER 0 ++#define CONFIG_PGMYUV_DECODER 0 ++#define CONFIG_PICTOR_DECODER 0 ++#define CONFIG_PIXLET_DECODER 0 ++#define CONFIG_PNG_DECODER 0 ++#define CONFIG_PPM_DECODER 0 ++#define CONFIG_PRORES_DECODER 0 ++#define CONFIG_PROSUMER_DECODER 0 ++#define CONFIG_PSD_DECODER 0 ++#define CONFIG_PTX_DECODER 0 ++#define CONFIG_QDRAW_DECODER 0 ++#define CONFIG_QPEG_DECODER 0 ++#define CONFIG_QTRLE_DECODER 0 ++#define CONFIG_R10K_DECODER 0 ++#define CONFIG_R210_DECODER 0 ++#define CONFIG_RASC_DECODER 0 ++#define CONFIG_RAWVIDEO_DECODER 0 ++#define CONFIG_RL2_DECODER 0 ++#define CONFIG_ROQ_DECODER 0 ++#define CONFIG_RPZA_DECODER 0 ++#define CONFIG_RSCC_DECODER 0 ++#define CONFIG_RV10_DECODER 0 ++#define CONFIG_RV20_DECODER 0 ++#define CONFIG_RV30_DECODER 0 ++#define CONFIG_RV40_DECODER 0 ++#define CONFIG_S302M_DECODER 0 ++#define CONFIG_SANM_DECODER 0 ++#define CONFIG_SCPR_DECODER 0 ++#define CONFIG_SCREENPRESSO_DECODER 0 ++#define CONFIG_SGI_DECODER 0 ++#define CONFIG_SGIRLE_DECODER 0 ++#define CONFIG_SHEERVIDEO_DECODER 0 ++#define CONFIG_SMACKER_DECODER 0 ++#define CONFIG_SMC_DECODER 0 ++#define CONFIG_SMVJPEG_DECODER 0 ++#define CONFIG_SNOW_DECODER 0 ++#define CONFIG_SP5X_DECODER 0 ++#define CONFIG_SPEEDHQ_DECODER 0 ++#define CONFIG_SRGC_DECODER 0 ++#define CONFIG_SUNRAST_DECODER 0 ++#define CONFIG_SVQ1_DECODER 0 ++#define CONFIG_SVQ3_DECODER 0 ++#define CONFIG_TARGA_DECODER 0 ++#define CONFIG_TARGA_Y216_DECODER 0 ++#define CONFIG_TDSC_DECODER 0 ++#define CONFIG_THEORA_DECODER 1 ++#define CONFIG_THP_DECODER 0 ++#define CONFIG_TIERTEXSEQVIDEO_DECODER 0 ++#define CONFIG_TIFF_DECODER 0 ++#define CONFIG_TMV_DECODER 0 ++#define CONFIG_TRUEMOTION1_DECODER 0 ++#define CONFIG_TRUEMOTION2_DECODER 0 ++#define CONFIG_TRUEMOTION2RT_DECODER 0 ++#define CONFIG_TSCC_DECODER 0 ++#define CONFIG_TSCC2_DECODER 0 ++#define CONFIG_TXD_DECODER 0 ++#define CONFIG_ULTI_DECODER 0 ++#define CONFIG_UTVIDEO_DECODER 0 ++#define CONFIG_V210_DECODER 0 ++#define CONFIG_V210X_DECODER 0 ++#define CONFIG_V308_DECODER 0 ++#define CONFIG_V408_DECODER 0 ++#define CONFIG_V410_DECODER 0 ++#define CONFIG_VB_DECODER 0 ++#define CONFIG_VBLE_DECODER 0 ++#define CONFIG_VC1_DECODER 0 ++#define CONFIG_VC1_CRYSTALHD_DECODER 0 ++#define CONFIG_VC1IMAGE_DECODER 0 ++#define CONFIG_VC1_MMAL_DECODER 0 ++#define CONFIG_VC1_QSV_DECODER 0 ++#define CONFIG_VC1_V4L2M2M_DECODER 0 ++#define CONFIG_VCR1_DECODER 0 ++#define CONFIG_VMDVIDEO_DECODER 0 ++#define CONFIG_VMNC_DECODER 0 ++#define CONFIG_VP3_DECODER 1 ++#define CONFIG_VP4_DECODER 0 ++#define CONFIG_VP5_DECODER 0 ++#define CONFIG_VP6_DECODER 0 ++#define CONFIG_VP6A_DECODER 0 ++#define CONFIG_VP6F_DECODER 0 ++#define CONFIG_VP7_DECODER 0 ++#define CONFIG_VP8_DECODER 1 ++#define CONFIG_VP8_RKMPP_DECODER 0 ++#define CONFIG_VP8_V4L2M2M_DECODER 0 ++#define CONFIG_VP9_DECODER 0 ++#define CONFIG_VP9_RKMPP_DECODER 0 ++#define CONFIG_VP9_V4L2M2M_DECODER 0 ++#define CONFIG_VQA_DECODER 0 ++#define CONFIG_WEBP_DECODER 0 ++#define CONFIG_WCMV_DECODER 0 ++#define CONFIG_WRAPPED_AVFRAME_DECODER 0 ++#define CONFIG_WMV1_DECODER 0 ++#define CONFIG_WMV2_DECODER 0 ++#define CONFIG_WMV3_DECODER 0 ++#define CONFIG_WMV3_CRYSTALHD_DECODER 0 ++#define CONFIG_WMV3IMAGE_DECODER 0 ++#define CONFIG_WNV1_DECODER 0 ++#define CONFIG_XAN_WC3_DECODER 0 ++#define CONFIG_XAN_WC4_DECODER 0 ++#define CONFIG_XBM_DECODER 0 ++#define CONFIG_XFACE_DECODER 0 ++#define CONFIG_XL_DECODER 0 ++#define CONFIG_XPM_DECODER 0 ++#define CONFIG_XWD_DECODER 0 ++#define CONFIG_Y41P_DECODER 0 ++#define CONFIG_YLC_DECODER 0 ++#define CONFIG_YOP_DECODER 0 ++#define CONFIG_YUV4_DECODER 0 ++#define CONFIG_ZERO12V_DECODER 0 ++#define CONFIG_ZEROCODEC_DECODER 0 ++#define CONFIG_ZLIB_DECODER 0 ++#define CONFIG_ZMBV_DECODER 0 ++#define CONFIG_AAC_DECODER 0 ++#define CONFIG_AAC_FIXED_DECODER 0 ++#define CONFIG_AAC_LATM_DECODER 0 ++#define CONFIG_AC3_DECODER 0 ++#define CONFIG_AC3_FIXED_DECODER 0 ++#define CONFIG_ACELP_KELVIN_DECODER 0 ++#define CONFIG_ALAC_DECODER 0 ++#define CONFIG_ALS_DECODER 0 ++#define CONFIG_AMRNB_DECODER 0 ++#define CONFIG_AMRWB_DECODER 0 ++#define CONFIG_APE_DECODER 0 ++#define CONFIG_APTX_DECODER 0 ++#define CONFIG_APTX_HD_DECODER 0 ++#define CONFIG_ATRAC1_DECODER 0 ++#define CONFIG_ATRAC3_DECODER 0 ++#define CONFIG_ATRAC3AL_DECODER 0 ++#define CONFIG_ATRAC3P_DECODER 0 ++#define CONFIG_ATRAC3PAL_DECODER 0 ++#define CONFIG_ATRAC9_DECODER 0 ++#define CONFIG_BINKAUDIO_DCT_DECODER 0 ++#define CONFIG_BINKAUDIO_RDFT_DECODER 0 ++#define CONFIG_BMV_AUDIO_DECODER 0 ++#define CONFIG_COOK_DECODER 0 ++#define CONFIG_DCA_DECODER 0 ++#define CONFIG_DOLBY_E_DECODER 0 ++#define CONFIG_DSD_LSBF_DECODER 0 ++#define CONFIG_DSD_MSBF_DECODER 0 ++#define CONFIG_DSD_LSBF_PLANAR_DECODER 0 ++#define CONFIG_DSD_MSBF_PLANAR_DECODER 0 ++#define CONFIG_DSICINAUDIO_DECODER 0 ++#define CONFIG_DSS_SP_DECODER 0 ++#define CONFIG_DST_DECODER 0 ++#define CONFIG_EAC3_DECODER 0 ++#define CONFIG_EVRC_DECODER 0 ++#define CONFIG_FFWAVESYNTH_DECODER 0 ++#define CONFIG_FLAC_DECODER 1 ++#define CONFIG_G723_1_DECODER 0 ++#define CONFIG_G729_DECODER 0 ++#define CONFIG_GSM_DECODER 0 ++#define CONFIG_GSM_MS_DECODER 0 ++#define CONFIG_HCA_DECODER 0 ++#define CONFIG_HCOM_DECODER 0 ++#define CONFIG_IAC_DECODER 0 ++#define CONFIG_ILBC_DECODER 0 ++#define CONFIG_IMC_DECODER 0 ++#define CONFIG_INTERPLAY_ACM_DECODER 0 ++#define CONFIG_MACE3_DECODER 0 ++#define CONFIG_MACE6_DECODER 0 ++#define CONFIG_METASOUND_DECODER 0 ++#define CONFIG_MLP_DECODER 0 ++#define CONFIG_MP1_DECODER 0 ++#define CONFIG_MP1FLOAT_DECODER 0 ++#define CONFIG_MP2_DECODER 0 ++#define CONFIG_MP2FLOAT_DECODER 0 ++#define CONFIG_MP3FLOAT_DECODER 0 ++#define CONFIG_MP3_DECODER 1 ++#define CONFIG_MP3ADUFLOAT_DECODER 0 ++#define CONFIG_MP3ADU_DECODER 0 ++#define CONFIG_MP3ON4FLOAT_DECODER 0 ++#define CONFIG_MP3ON4_DECODER 0 ++#define CONFIG_MPC7_DECODER 0 ++#define CONFIG_MPC8_DECODER 0 ++#define CONFIG_NELLYMOSER_DECODER 0 ++#define CONFIG_ON2AVC_DECODER 0 ++#define CONFIG_OPUS_DECODER 0 ++#define CONFIG_PAF_AUDIO_DECODER 0 ++#define CONFIG_QCELP_DECODER 0 ++#define CONFIG_QDM2_DECODER 0 ++#define CONFIG_QDMC_DECODER 0 ++#define CONFIG_RA_144_DECODER 0 ++#define CONFIG_RA_288_DECODER 0 ++#define CONFIG_RALF_DECODER 0 ++#define CONFIG_SBC_DECODER 0 ++#define CONFIG_SHORTEN_DECODER 0 ++#define CONFIG_SIPR_DECODER 0 ++#define CONFIG_SIREN_DECODER 0 ++#define CONFIG_SMACKAUD_DECODER 0 ++#define CONFIG_SONIC_DECODER 0 ++#define CONFIG_TAK_DECODER 0 ++#define CONFIG_TRUEHD_DECODER 0 ++#define CONFIG_TRUESPEECH_DECODER 0 ++#define CONFIG_TTA_DECODER 0 ++#define CONFIG_TWINVQ_DECODER 0 ++#define CONFIG_VMDAUDIO_DECODER 0 ++#define CONFIG_VORBIS_DECODER 1 ++#define CONFIG_WAVPACK_DECODER 0 ++#define CONFIG_WMALOSSLESS_DECODER 0 ++#define CONFIG_WMAPRO_DECODER 0 ++#define CONFIG_WMAV1_DECODER 0 ++#define CONFIG_WMAV2_DECODER 0 ++#define CONFIG_WMAVOICE_DECODER 0 ++#define CONFIG_WS_SND1_DECODER 0 ++#define CONFIG_XMA1_DECODER 0 ++#define CONFIG_XMA2_DECODER 0 ++#define CONFIG_PCM_ALAW_DECODER 1 ++#define CONFIG_PCM_BLURAY_DECODER 0 ++#define CONFIG_PCM_DVD_DECODER 0 ++#define CONFIG_PCM_F16LE_DECODER 0 ++#define CONFIG_PCM_F24LE_DECODER 0 ++#define CONFIG_PCM_F32BE_DECODER 0 ++#define CONFIG_PCM_F32LE_DECODER 1 ++#define CONFIG_PCM_F64BE_DECODER 0 ++#define CONFIG_PCM_F64LE_DECODER 0 ++#define CONFIG_PCM_LXF_DECODER 0 ++#define CONFIG_PCM_MULAW_DECODER 1 ++#define CONFIG_PCM_S8_DECODER 0 ++#define CONFIG_PCM_S8_PLANAR_DECODER 0 ++#define CONFIG_PCM_S16BE_DECODER 1 ++#define CONFIG_PCM_S16BE_PLANAR_DECODER 0 ++#define CONFIG_PCM_S16LE_DECODER 1 ++#define CONFIG_PCM_S16LE_PLANAR_DECODER 0 ++#define CONFIG_PCM_S24BE_DECODER 1 ++#define CONFIG_PCM_S24DAUD_DECODER 0 ++#define CONFIG_PCM_S24LE_DECODER 1 ++#define CONFIG_PCM_S24LE_PLANAR_DECODER 0 ++#define CONFIG_PCM_S32BE_DECODER 0 ++#define CONFIG_PCM_S32LE_DECODER 1 ++#define CONFIG_PCM_S32LE_PLANAR_DECODER 0 ++#define CONFIG_PCM_S64BE_DECODER 0 ++#define CONFIG_PCM_S64LE_DECODER 0 ++#define CONFIG_PCM_U8_DECODER 1 ++#define CONFIG_PCM_U16BE_DECODER 0 ++#define CONFIG_PCM_U16LE_DECODER 0 ++#define CONFIG_PCM_U24BE_DECODER 0 ++#define CONFIG_PCM_U24LE_DECODER 0 ++#define CONFIG_PCM_U32BE_DECODER 0 ++#define CONFIG_PCM_U32LE_DECODER 0 ++#define CONFIG_PCM_VIDC_DECODER 0 ++#define CONFIG_DERF_DPCM_DECODER 0 ++#define CONFIG_GREMLIN_DPCM_DECODER 0 ++#define CONFIG_INTERPLAY_DPCM_DECODER 0 ++#define CONFIG_ROQ_DPCM_DECODER 0 ++#define CONFIG_SDX2_DPCM_DECODER 0 ++#define CONFIG_SOL_DPCM_DECODER 0 ++#define CONFIG_XAN_DPCM_DECODER 0 ++#define CONFIG_ADPCM_4XM_DECODER 0 ++#define CONFIG_ADPCM_ADX_DECODER 0 ++#define CONFIG_ADPCM_AFC_DECODER 0 ++#define CONFIG_ADPCM_AGM_DECODER 0 ++#define CONFIG_ADPCM_AICA_DECODER 0 ++#define CONFIG_ADPCM_ARGO_DECODER 0 ++#define CONFIG_ADPCM_CT_DECODER 0 ++#define CONFIG_ADPCM_DTK_DECODER 0 ++#define CONFIG_ADPCM_EA_DECODER 0 ++#define CONFIG_ADPCM_EA_MAXIS_XA_DECODER 0 ++#define CONFIG_ADPCM_EA_R1_DECODER 0 ++#define CONFIG_ADPCM_EA_R2_DECODER 0 ++#define CONFIG_ADPCM_EA_R3_DECODER 0 ++#define CONFIG_ADPCM_EA_XAS_DECODER 0 ++#define CONFIG_ADPCM_G722_DECODER 0 ++#define CONFIG_ADPCM_G726_DECODER 0 ++#define CONFIG_ADPCM_G726LE_DECODER 0 ++#define CONFIG_ADPCM_IMA_AMV_DECODER 0 ++#define CONFIG_ADPCM_IMA_ALP_DECODER 0 ++#define CONFIG_ADPCM_IMA_APC_DECODER 0 ++#define CONFIG_ADPCM_IMA_APM_DECODER 0 ++#define CONFIG_ADPCM_IMA_CUNNING_DECODER 0 ++#define CONFIG_ADPCM_IMA_DAT4_DECODER 0 ++#define CONFIG_ADPCM_IMA_DK3_DECODER 0 ++#define CONFIG_ADPCM_IMA_DK4_DECODER 0 ++#define CONFIG_ADPCM_IMA_EA_EACS_DECODER 0 ++#define CONFIG_ADPCM_IMA_EA_SEAD_DECODER 0 ++#define CONFIG_ADPCM_IMA_ISS_DECODER 0 ++#define CONFIG_ADPCM_IMA_MTF_DECODER 0 ++#define CONFIG_ADPCM_IMA_OKI_DECODER 0 ++#define CONFIG_ADPCM_IMA_QT_DECODER 0 ++#define CONFIG_ADPCM_IMA_RAD_DECODER 0 ++#define CONFIG_ADPCM_IMA_SSI_DECODER 0 ++#define CONFIG_ADPCM_IMA_SMJPEG_DECODER 0 ++#define CONFIG_ADPCM_IMA_WAV_DECODER 0 ++#define CONFIG_ADPCM_IMA_WS_DECODER 0 ++#define CONFIG_ADPCM_MS_DECODER 0 ++#define CONFIG_ADPCM_MTAF_DECODER 0 ++#define CONFIG_ADPCM_PSX_DECODER 0 ++#define CONFIG_ADPCM_SBPRO_2_DECODER 0 ++#define CONFIG_ADPCM_SBPRO_3_DECODER 0 ++#define CONFIG_ADPCM_SBPRO_4_DECODER 0 ++#define CONFIG_ADPCM_SWF_DECODER 0 ++#define CONFIG_ADPCM_THP_DECODER 0 ++#define CONFIG_ADPCM_THP_LE_DECODER 0 ++#define CONFIG_ADPCM_VIMA_DECODER 0 ++#define CONFIG_ADPCM_XA_DECODER 0 ++#define CONFIG_ADPCM_YAMAHA_DECODER 0 ++#define CONFIG_ADPCM_ZORK_DECODER 0 ++#define CONFIG_SSA_DECODER 0 ++#define CONFIG_ASS_DECODER 0 ++#define CONFIG_CCAPTION_DECODER 0 ++#define CONFIG_DVBSUB_DECODER 0 ++#define CONFIG_DVDSUB_DECODER 0 ++#define CONFIG_JACOSUB_DECODER 0 ++#define CONFIG_MICRODVD_DECODER 0 ++#define CONFIG_MOVTEXT_DECODER 0 ++#define CONFIG_MPL2_DECODER 0 ++#define CONFIG_PGSSUB_DECODER 0 ++#define CONFIG_PJS_DECODER 0 ++#define CONFIG_REALTEXT_DECODER 0 ++#define CONFIG_SAMI_DECODER 0 ++#define CONFIG_SRT_DECODER 0 ++#define CONFIG_STL_DECODER 0 ++#define CONFIG_SUBRIP_DECODER 0 ++#define CONFIG_SUBVIEWER_DECODER 0 ++#define CONFIG_SUBVIEWER1_DECODER 0 ++#define CONFIG_TEXT_DECODER 0 ++#define CONFIG_VPLAYER_DECODER 0 ++#define CONFIG_WEBVTT_DECODER 0 ++#define CONFIG_XSUB_DECODER 0 ++#define CONFIG_AAC_AT_DECODER 0 ++#define CONFIG_AC3_AT_DECODER 0 ++#define CONFIG_ADPCM_IMA_QT_AT_DECODER 0 ++#define CONFIG_ALAC_AT_DECODER 0 ++#define CONFIG_AMR_NB_AT_DECODER 0 ++#define CONFIG_EAC3_AT_DECODER 0 ++#define CONFIG_GSM_MS_AT_DECODER 0 ++#define CONFIG_ILBC_AT_DECODER 0 ++#define CONFIG_MP1_AT_DECODER 0 ++#define CONFIG_MP2_AT_DECODER 0 ++#define CONFIG_MP3_AT_DECODER 0 ++#define CONFIG_PCM_ALAW_AT_DECODER 0 ++#define CONFIG_PCM_MULAW_AT_DECODER 0 ++#define CONFIG_QDMC_AT_DECODER 0 ++#define CONFIG_QDM2_AT_DECODER 0 ++#define CONFIG_LIBARIBB24_DECODER 0 ++#define CONFIG_LIBCELT_DECODER 0 ++#define CONFIG_LIBCODEC2_DECODER 0 ++#define CONFIG_LIBDAV1D_DECODER 0 ++#define CONFIG_LIBDAVS2_DECODER 0 ++#define CONFIG_LIBFDK_AAC_DECODER 0 ++#define CONFIG_LIBGSM_DECODER 0 ++#define CONFIG_LIBGSM_MS_DECODER 0 ++#define CONFIG_LIBILBC_DECODER 0 ++#define CONFIG_LIBOPENCORE_AMRNB_DECODER 0 ++#define CONFIG_LIBOPENCORE_AMRWB_DECODER 0 ++#define CONFIG_LIBOPENJPEG_DECODER 0 ++#define CONFIG_LIBOPUS_DECODER 1 ++#define CONFIG_LIBRSVG_DECODER 0 ++#define CONFIG_LIBSPEEX_DECODER 0 ++#define CONFIG_LIBVORBIS_DECODER 0 ++#define CONFIG_LIBVPX_VP8_DECODER 0 ++#define CONFIG_LIBVPX_VP9_DECODER 0 ++#define CONFIG_LIBZVBI_TELETEXT_DECODER 0 ++#define CONFIG_BINTEXT_DECODER 0 ++#define CONFIG_XBIN_DECODER 0 ++#define CONFIG_IDF_DECODER 0 ++#define CONFIG_LIBAOM_AV1_DECODER 0 ++#define CONFIG_LIBOPENH264_DECODER 0 ++#define CONFIG_H264_CUVID_DECODER 0 ++#define CONFIG_HEVC_CUVID_DECODER 0 ++#define CONFIG_HEVC_MEDIACODEC_DECODER 0 ++#define CONFIG_MJPEG_CUVID_DECODER 0 ++#define CONFIG_MJPEG_QSV_DECODER 0 ++#define CONFIG_MPEG1_CUVID_DECODER 0 ++#define CONFIG_MPEG2_CUVID_DECODER 0 ++#define CONFIG_MPEG4_CUVID_DECODER 0 ++#define CONFIG_MPEG4_MEDIACODEC_DECODER 0 ++#define CONFIG_VC1_CUVID_DECODER 0 ++#define CONFIG_VP8_CUVID_DECODER 0 ++#define CONFIG_VP8_MEDIACODEC_DECODER 0 ++#define CONFIG_VP8_QSV_DECODER 0 ++#define CONFIG_VP9_CUVID_DECODER 0 ++#define CONFIG_VP9_MEDIACODEC_DECODER 0 ++#define CONFIG_VP9_QSV_DECODER 0 ++#define CONFIG_A64MULTI_ENCODER 0 ++#define CONFIG_A64MULTI5_ENCODER 0 ++#define CONFIG_ALIAS_PIX_ENCODER 0 ++#define CONFIG_AMV_ENCODER 0 ++#define CONFIG_APNG_ENCODER 0 ++#define CONFIG_ASV1_ENCODER 0 ++#define CONFIG_ASV2_ENCODER 0 ++#define CONFIG_AVRP_ENCODER 0 ++#define CONFIG_AVUI_ENCODER 0 ++#define CONFIG_AYUV_ENCODER 0 ++#define CONFIG_BMP_ENCODER 0 ++#define CONFIG_CINEPAK_ENCODER 0 ++#define CONFIG_CLJR_ENCODER 0 ++#define CONFIG_COMFORTNOISE_ENCODER 0 ++#define CONFIG_DNXHD_ENCODER 0 ++#define CONFIG_DPX_ENCODER 0 ++#define CONFIG_DVVIDEO_ENCODER 0 ++#define CONFIG_FFV1_ENCODER 0 ++#define CONFIG_FFVHUFF_ENCODER 0 ++#define CONFIG_FITS_ENCODER 0 ++#define CONFIG_FLASHSV_ENCODER 0 ++#define CONFIG_FLASHSV2_ENCODER 0 ++#define CONFIG_FLV_ENCODER 0 ++#define CONFIG_GIF_ENCODER 0 ++#define CONFIG_H261_ENCODER 0 ++#define CONFIG_H263_ENCODER 0 ++#define CONFIG_H263P_ENCODER 0 ++#define CONFIG_HAP_ENCODER 0 ++#define CONFIG_HUFFYUV_ENCODER 0 ++#define CONFIG_JPEG2000_ENCODER 0 ++#define CONFIG_JPEGLS_ENCODER 0 ++#define CONFIG_LJPEG_ENCODER 0 ++#define CONFIG_MAGICYUV_ENCODER 0 ++#define CONFIG_MJPEG_ENCODER 0 ++#define CONFIG_MPEG1VIDEO_ENCODER 0 ++#define CONFIG_MPEG2VIDEO_ENCODER 0 ++#define CONFIG_MPEG4_ENCODER 0 ++#define CONFIG_MSMPEG4V2_ENCODER 0 ++#define CONFIG_MSMPEG4V3_ENCODER 0 ++#define CONFIG_MSVIDEO1_ENCODER 0 ++#define CONFIG_PAM_ENCODER 0 ++#define CONFIG_PBM_ENCODER 0 ++#define CONFIG_PCX_ENCODER 0 ++#define CONFIG_PGM_ENCODER 0 ++#define CONFIG_PGMYUV_ENCODER 0 ++#define CONFIG_PNG_ENCODER 0 ++#define CONFIG_PPM_ENCODER 0 ++#define CONFIG_PRORES_ENCODER 0 ++#define CONFIG_PRORES_AW_ENCODER 0 ++#define CONFIG_PRORES_KS_ENCODER 0 ++#define CONFIG_QTRLE_ENCODER 0 ++#define CONFIG_R10K_ENCODER 0 ++#define CONFIG_R210_ENCODER 0 ++#define CONFIG_RAWVIDEO_ENCODER 0 ++#define CONFIG_ROQ_ENCODER 0 ++#define CONFIG_RV10_ENCODER 0 ++#define CONFIG_RV20_ENCODER 0 ++#define CONFIG_S302M_ENCODER 0 ++#define CONFIG_SGI_ENCODER 0 ++#define CONFIG_SNOW_ENCODER 0 ++#define CONFIG_SUNRAST_ENCODER 0 ++#define CONFIG_SVQ1_ENCODER 0 ++#define CONFIG_TARGA_ENCODER 0 ++#define CONFIG_TIFF_ENCODER 0 ++#define CONFIG_UTVIDEO_ENCODER 0 ++#define CONFIG_V210_ENCODER 0 ++#define CONFIG_V308_ENCODER 0 ++#define CONFIG_V408_ENCODER 0 ++#define CONFIG_V410_ENCODER 0 ++#define CONFIG_VC2_ENCODER 0 ++#define CONFIG_WRAPPED_AVFRAME_ENCODER 0 ++#define CONFIG_WMV1_ENCODER 0 ++#define CONFIG_WMV2_ENCODER 0 ++#define CONFIG_XBM_ENCODER 0 ++#define CONFIG_XFACE_ENCODER 0 ++#define CONFIG_XWD_ENCODER 0 ++#define CONFIG_Y41P_ENCODER 0 ++#define CONFIG_YUV4_ENCODER 0 ++#define CONFIG_ZLIB_ENCODER 0 ++#define CONFIG_ZMBV_ENCODER 0 ++#define CONFIG_AAC_ENCODER 0 ++#define CONFIG_AC3_ENCODER 0 ++#define CONFIG_AC3_FIXED_ENCODER 0 ++#define CONFIG_ALAC_ENCODER 0 ++#define CONFIG_APTX_ENCODER 0 ++#define CONFIG_APTX_HD_ENCODER 0 ++#define CONFIG_DCA_ENCODER 0 ++#define CONFIG_EAC3_ENCODER 0 ++#define CONFIG_FLAC_ENCODER 0 ++#define CONFIG_G723_1_ENCODER 0 ++#define CONFIG_MLP_ENCODER 0 ++#define CONFIG_MP2_ENCODER 0 ++#define CONFIG_MP2FIXED_ENCODER 0 ++#define CONFIG_NELLYMOSER_ENCODER 0 ++#define CONFIG_OPUS_ENCODER 0 ++#define CONFIG_RA_144_ENCODER 0 ++#define CONFIG_SBC_ENCODER 0 ++#define CONFIG_SONIC_ENCODER 0 ++#define CONFIG_SONIC_LS_ENCODER 0 ++#define CONFIG_TRUEHD_ENCODER 0 ++#define CONFIG_TTA_ENCODER 0 ++#define CONFIG_VORBIS_ENCODER 0 ++#define CONFIG_WAVPACK_ENCODER 0 ++#define CONFIG_WMAV1_ENCODER 0 ++#define CONFIG_WMAV2_ENCODER 0 ++#define CONFIG_PCM_ALAW_ENCODER 0 ++#define CONFIG_PCM_DVD_ENCODER 0 ++#define CONFIG_PCM_F32BE_ENCODER 0 ++#define CONFIG_PCM_F32LE_ENCODER 0 ++#define CONFIG_PCM_F64BE_ENCODER 0 ++#define CONFIG_PCM_F64LE_ENCODER 0 ++#define CONFIG_PCM_MULAW_ENCODER 0 ++#define CONFIG_PCM_S8_ENCODER 0 ++#define CONFIG_PCM_S8_PLANAR_ENCODER 0 ++#define CONFIG_PCM_S16BE_ENCODER 0 ++#define CONFIG_PCM_S16BE_PLANAR_ENCODER 0 ++#define CONFIG_PCM_S16LE_ENCODER 0 ++#define CONFIG_PCM_S16LE_PLANAR_ENCODER 0 ++#define CONFIG_PCM_S24BE_ENCODER 0 ++#define CONFIG_PCM_S24DAUD_ENCODER 0 ++#define CONFIG_PCM_S24LE_ENCODER 0 ++#define CONFIG_PCM_S24LE_PLANAR_ENCODER 0 ++#define CONFIG_PCM_S32BE_ENCODER 0 ++#define CONFIG_PCM_S32LE_ENCODER 0 ++#define CONFIG_PCM_S32LE_PLANAR_ENCODER 0 ++#define CONFIG_PCM_S64BE_ENCODER 0 ++#define CONFIG_PCM_S64LE_ENCODER 0 ++#define CONFIG_PCM_U8_ENCODER 0 ++#define CONFIG_PCM_U16BE_ENCODER 0 ++#define CONFIG_PCM_U16LE_ENCODER 0 ++#define CONFIG_PCM_U24BE_ENCODER 0 ++#define CONFIG_PCM_U24LE_ENCODER 0 ++#define CONFIG_PCM_U32BE_ENCODER 0 ++#define CONFIG_PCM_U32LE_ENCODER 0 ++#define CONFIG_PCM_VIDC_ENCODER 0 ++#define CONFIG_ROQ_DPCM_ENCODER 0 ++#define CONFIG_ADPCM_ADX_ENCODER 0 ++#define CONFIG_ADPCM_G722_ENCODER 0 ++#define CONFIG_ADPCM_G726_ENCODER 0 ++#define CONFIG_ADPCM_G726LE_ENCODER 0 ++#define CONFIG_ADPCM_IMA_QT_ENCODER 0 ++#define CONFIG_ADPCM_IMA_WAV_ENCODER 0 ++#define CONFIG_ADPCM_MS_ENCODER 0 ++#define CONFIG_ADPCM_SWF_ENCODER 0 ++#define CONFIG_ADPCM_YAMAHA_ENCODER 0 ++#define CONFIG_SSA_ENCODER 0 ++#define CONFIG_ASS_ENCODER 0 ++#define CONFIG_DVBSUB_ENCODER 0 ++#define CONFIG_DVDSUB_ENCODER 0 ++#define CONFIG_MOVTEXT_ENCODER 0 ++#define CONFIG_SRT_ENCODER 0 ++#define CONFIG_SUBRIP_ENCODER 0 ++#define CONFIG_TEXT_ENCODER 0 ++#define CONFIG_WEBVTT_ENCODER 0 ++#define CONFIG_XSUB_ENCODER 0 ++#define CONFIG_AAC_AT_ENCODER 0 ++#define CONFIG_ALAC_AT_ENCODER 0 ++#define CONFIG_ILBC_AT_ENCODER 0 ++#define CONFIG_PCM_ALAW_AT_ENCODER 0 ++#define CONFIG_PCM_MULAW_AT_ENCODER 0 ++#define CONFIG_LIBAOM_AV1_ENCODER 0 ++#define CONFIG_LIBCODEC2_ENCODER 0 ++#define CONFIG_LIBFDK_AAC_ENCODER 0 ++#define CONFIG_LIBGSM_ENCODER 0 ++#define CONFIG_LIBGSM_MS_ENCODER 0 ++#define CONFIG_LIBILBC_ENCODER 0 ++#define CONFIG_LIBMP3LAME_ENCODER 0 ++#define CONFIG_LIBOPENCORE_AMRNB_ENCODER 0 ++#define CONFIG_LIBOPENJPEG_ENCODER 0 ++#define CONFIG_LIBOPUS_ENCODER 0 ++#define CONFIG_LIBRAV1E_ENCODER 0 ++#define CONFIG_LIBSHINE_ENCODER 0 ++#define CONFIG_LIBSPEEX_ENCODER 0 ++#define CONFIG_LIBTHEORA_ENCODER 0 ++#define CONFIG_LIBTWOLAME_ENCODER 0 ++#define CONFIG_LIBVO_AMRWBENC_ENCODER 0 ++#define CONFIG_LIBVORBIS_ENCODER 0 ++#define CONFIG_LIBVPX_VP8_ENCODER 0 ++#define CONFIG_LIBVPX_VP9_ENCODER 0 ++#define CONFIG_LIBWAVPACK_ENCODER 0 ++#define CONFIG_LIBWEBP_ANIM_ENCODER 0 ++#define CONFIG_LIBWEBP_ENCODER 0 ++#define CONFIG_LIBX262_ENCODER 0 ++#define CONFIG_LIBX264_ENCODER 0 ++#define CONFIG_LIBX264RGB_ENCODER 0 ++#define CONFIG_LIBX265_ENCODER 0 ++#define CONFIG_LIBXAVS_ENCODER 0 ++#define CONFIG_LIBXAVS2_ENCODER 0 ++#define CONFIG_LIBXVID_ENCODER 0 ++#define CONFIG_H263_V4L2M2M_ENCODER 0 ++#define CONFIG_LIBOPENH264_ENCODER 0 ++#define CONFIG_H264_AMF_ENCODER 0 ++#define CONFIG_H264_NVENC_ENCODER 0 ++#define CONFIG_H264_OMX_ENCODER 0 ++#define CONFIG_H264_QSV_ENCODER 0 ++#define CONFIG_H264_V4L2M2M_ENCODER 0 ++#define CONFIG_H264_VAAPI_ENCODER 0 ++#define CONFIG_H264_VIDEOTOOLBOX_ENCODER 0 ++#define CONFIG_NVENC_ENCODER 0 ++#define CONFIG_NVENC_H264_ENCODER 0 ++#define CONFIG_NVENC_HEVC_ENCODER 0 ++#define CONFIG_HEVC_AMF_ENCODER 0 ++#define CONFIG_HEVC_NVENC_ENCODER 0 ++#define CONFIG_HEVC_QSV_ENCODER 0 ++#define CONFIG_HEVC_V4L2M2M_ENCODER 0 ++#define CONFIG_HEVC_VAAPI_ENCODER 0 ++#define CONFIG_HEVC_VIDEOTOOLBOX_ENCODER 0 ++#define CONFIG_LIBKVAZAAR_ENCODER 0 ++#define CONFIG_MJPEG_QSV_ENCODER 0 ++#define CONFIG_MJPEG_VAAPI_ENCODER 0 ++#define CONFIG_MPEG2_QSV_ENCODER 0 ++#define CONFIG_MPEG2_VAAPI_ENCODER 0 ++#define CONFIG_MPEG4_OMX_ENCODER 0 ++#define CONFIG_MPEG4_V4L2M2M_ENCODER 0 ++#define CONFIG_VP8_V4L2M2M_ENCODER 0 ++#define CONFIG_VP8_VAAPI_ENCODER 0 ++#define CONFIG_VP9_VAAPI_ENCODER 0 ++#define CONFIG_VP9_QSV_ENCODER 0 ++#define CONFIG_H263_VAAPI_HWACCEL 0 ++#define CONFIG_H263_VIDEOTOOLBOX_HWACCEL 0 ++#define CONFIG_H264_D3D11VA_HWACCEL 0 ++#define CONFIG_H264_D3D11VA2_HWACCEL 0 ++#define CONFIG_H264_DXVA2_HWACCEL 0 ++#define CONFIG_H264_NVDEC_HWACCEL 0 ++#define CONFIG_H264_VAAPI_HWACCEL 0 ++#define CONFIG_H264_VDPAU_HWACCEL 0 ++#define CONFIG_H264_VIDEOTOOLBOX_HWACCEL 0 ++#define CONFIG_HEVC_D3D11VA_HWACCEL 0 ++#define CONFIG_HEVC_D3D11VA2_HWACCEL 0 ++#define CONFIG_HEVC_DXVA2_HWACCEL 0 ++#define CONFIG_HEVC_NVDEC_HWACCEL 0 ++#define CONFIG_HEVC_VAAPI_HWACCEL 0 ++#define CONFIG_HEVC_VDPAU_HWACCEL 0 ++#define CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL 0 ++#define CONFIG_MJPEG_NVDEC_HWACCEL 0 ++#define CONFIG_MJPEG_VAAPI_HWACCEL 0 ++#define CONFIG_MPEG1_NVDEC_HWACCEL 0 ++#define CONFIG_MPEG1_VDPAU_HWACCEL 0 ++#define CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL 0 ++#define CONFIG_MPEG1_XVMC_HWACCEL 0 ++#define CONFIG_MPEG2_D3D11VA_HWACCEL 0 ++#define CONFIG_MPEG2_D3D11VA2_HWACCEL 0 ++#define CONFIG_MPEG2_NVDEC_HWACCEL 0 ++#define CONFIG_MPEG2_DXVA2_HWACCEL 0 ++#define CONFIG_MPEG2_VAAPI_HWACCEL 0 ++#define CONFIG_MPEG2_VDPAU_HWACCEL 0 ++#define CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL 0 ++#define CONFIG_MPEG2_XVMC_HWACCEL 0 ++#define CONFIG_MPEG4_NVDEC_HWACCEL 0 ++#define CONFIG_MPEG4_VAAPI_HWACCEL 0 ++#define CONFIG_MPEG4_VDPAU_HWACCEL 0 ++#define CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL 0 ++#define CONFIG_VC1_D3D11VA_HWACCEL 0 ++#define CONFIG_VC1_D3D11VA2_HWACCEL 0 ++#define CONFIG_VC1_DXVA2_HWACCEL 0 ++#define CONFIG_VC1_NVDEC_HWACCEL 0 ++#define CONFIG_VC1_VAAPI_HWACCEL 0 ++#define CONFIG_VC1_VDPAU_HWACCEL 0 ++#define CONFIG_VP8_NVDEC_HWACCEL 0 ++#define CONFIG_VP8_VAAPI_HWACCEL 0 ++#define CONFIG_VP9_D3D11VA_HWACCEL 0 ++#define CONFIG_VP9_D3D11VA2_HWACCEL 0 ++#define CONFIG_VP9_DXVA2_HWACCEL 0 ++#define CONFIG_VP9_NVDEC_HWACCEL 0 ++#define CONFIG_VP9_VAAPI_HWACCEL 0 ++#define CONFIG_VP9_VDPAU_HWACCEL 0 ++#define CONFIG_WMV3_D3D11VA_HWACCEL 0 ++#define CONFIG_WMV3_D3D11VA2_HWACCEL 0 ++#define CONFIG_WMV3_DXVA2_HWACCEL 0 ++#define CONFIG_WMV3_NVDEC_HWACCEL 0 ++#define CONFIG_WMV3_VAAPI_HWACCEL 0 ++#define CONFIG_WMV3_VDPAU_HWACCEL 0 ++#define CONFIG_AAC_PARSER 0 ++#define CONFIG_AAC_LATM_PARSER 0 ++#define CONFIG_AC3_PARSER 0 ++#define CONFIG_ADX_PARSER 0 ++#define CONFIG_AV1_PARSER 0 ++#define CONFIG_AVS2_PARSER 0 ++#define CONFIG_BMP_PARSER 0 ++#define CONFIG_CAVSVIDEO_PARSER 0 ++#define CONFIG_COOK_PARSER 0 ++#define CONFIG_DCA_PARSER 0 ++#define CONFIG_DIRAC_PARSER 0 ++#define CONFIG_DNXHD_PARSER 0 ++#define CONFIG_DPX_PARSER 0 ++#define CONFIG_DVAUDIO_PARSER 0 ++#define CONFIG_DVBSUB_PARSER 0 ++#define CONFIG_DVDSUB_PARSER 0 ++#define CONFIG_DVD_NAV_PARSER 0 ++#define CONFIG_FLAC_PARSER 1 ++#define CONFIG_G723_1_PARSER 0 ++#define CONFIG_G729_PARSER 0 ++#define CONFIG_GIF_PARSER 0 ++#define CONFIG_GSM_PARSER 0 ++#define CONFIG_H261_PARSER 0 ++#define CONFIG_H263_PARSER 0 ++#define CONFIG_H264_PARSER 0 ++#define CONFIG_HEVC_PARSER 0 ++#define CONFIG_MJPEG_PARSER 0 ++#define CONFIG_MLP_PARSER 0 ++#define CONFIG_MPEG4VIDEO_PARSER 0 ++#define CONFIG_MPEGAUDIO_PARSER 1 ++#define CONFIG_MPEGVIDEO_PARSER 0 ++#define CONFIG_OPUS_PARSER 1 ++#define CONFIG_PNG_PARSER 0 ++#define CONFIG_PNM_PARSER 0 ++#define CONFIG_RV30_PARSER 0 ++#define CONFIG_RV40_PARSER 0 ++#define CONFIG_SBC_PARSER 0 ++#define CONFIG_SIPR_PARSER 0 ++#define CONFIG_TAK_PARSER 0 ++#define CONFIG_VC1_PARSER 0 ++#define CONFIG_VORBIS_PARSER 1 ++#define CONFIG_VP3_PARSER 1 ++#define CONFIG_VP8_PARSER 1 ++#define CONFIG_VP9_PARSER 1 ++#define CONFIG_WEBP_PARSER 0 ++#define CONFIG_XMA_PARSER 0 ++#define CONFIG_ALSA_INDEV 0 ++#define CONFIG_ANDROID_CAMERA_INDEV 0 ++#define CONFIG_AVFOUNDATION_INDEV 0 ++#define CONFIG_BKTR_INDEV 0 ++#define CONFIG_DECKLINK_INDEV 0 ++#define CONFIG_DSHOW_INDEV 0 ++#define CONFIG_FBDEV_INDEV 0 ++#define CONFIG_GDIGRAB_INDEV 0 ++#define CONFIG_IEC61883_INDEV 0 ++#define CONFIG_JACK_INDEV 0 ++#define CONFIG_KMSGRAB_INDEV 0 ++#define CONFIG_LAVFI_INDEV 0 ++#define CONFIG_OPENAL_INDEV 0 ++#define CONFIG_OSS_INDEV 0 ++#define CONFIG_PULSE_INDEV 0 ++#define CONFIG_SNDIO_INDEV 0 ++#define CONFIG_V4L2_INDEV 0 ++#define CONFIG_VFWCAP_INDEV 0 ++#define CONFIG_XCBGRAB_INDEV 0 ++#define CONFIG_LIBCDIO_INDEV 0 ++#define CONFIG_LIBDC1394_INDEV 0 ++#define CONFIG_ALSA_OUTDEV 0 ++#define CONFIG_CACA_OUTDEV 0 ++#define CONFIG_DECKLINK_OUTDEV 0 ++#define CONFIG_FBDEV_OUTDEV 0 ++#define CONFIG_OPENGL_OUTDEV 0 ++#define CONFIG_OSS_OUTDEV 0 ++#define CONFIG_PULSE_OUTDEV 0 ++#define CONFIG_SDL2_OUTDEV 0 ++#define CONFIG_SNDIO_OUTDEV 0 ++#define CONFIG_V4L2_OUTDEV 0 ++#define CONFIG_XV_OUTDEV 0 ++#define CONFIG_ABENCH_FILTER 0 ++#define CONFIG_ACOMPRESSOR_FILTER 0 ++#define CONFIG_ACONTRAST_FILTER 0 ++#define CONFIG_ACOPY_FILTER 0 ++#define CONFIG_ACUE_FILTER 0 ++#define CONFIG_ACROSSFADE_FILTER 0 ++#define CONFIG_ACROSSOVER_FILTER 0 ++#define CONFIG_ACRUSHER_FILTER 0 ++#define CONFIG_ADECLICK_FILTER 0 ++#define CONFIG_ADECLIP_FILTER 0 ++#define CONFIG_ADELAY_FILTER 0 ++#define CONFIG_ADERIVATIVE_FILTER 0 ++#define CONFIG_AECHO_FILTER 0 ++#define CONFIG_AEMPHASIS_FILTER 0 ++#define CONFIG_AEVAL_FILTER 0 ++#define CONFIG_AFADE_FILTER 0 ++#define CONFIG_AFFTDN_FILTER 0 ++#define CONFIG_AFFTFILT_FILTER 0 ++#define CONFIG_AFIR_FILTER 0 ++#define CONFIG_AFORMAT_FILTER 0 ++#define CONFIG_AGATE_FILTER 0 ++#define CONFIG_AIIR_FILTER 0 ++#define CONFIG_AINTEGRAL_FILTER 0 ++#define CONFIG_AINTERLEAVE_FILTER 0 ++#define CONFIG_ALIMITER_FILTER 0 ++#define CONFIG_ALLPASS_FILTER 0 ++#define CONFIG_ALOOP_FILTER 0 ++#define CONFIG_AMERGE_FILTER 0 ++#define CONFIG_AMETADATA_FILTER 0 ++#define CONFIG_AMIX_FILTER 0 ++#define CONFIG_AMULTIPLY_FILTER 0 ++#define CONFIG_ANEQUALIZER_FILTER 0 ++#define CONFIG_ANLMDN_FILTER 0 ++#define CONFIG_ANLMS_FILTER 0 ++#define CONFIG_ANULL_FILTER 0 ++#define CONFIG_APAD_FILTER 0 ++#define CONFIG_APERMS_FILTER 0 ++#define CONFIG_APHASER_FILTER 0 ++#define CONFIG_APULSATOR_FILTER 0 ++#define CONFIG_AREALTIME_FILTER 0 ++#define CONFIG_ARESAMPLE_FILTER 0 ++#define CONFIG_AREVERSE_FILTER 0 ++#define CONFIG_ARNNDN_FILTER 0 ++#define CONFIG_ASELECT_FILTER 0 ++#define CONFIG_ASENDCMD_FILTER 0 ++#define CONFIG_ASETNSAMPLES_FILTER 0 ++#define CONFIG_ASETPTS_FILTER 0 ++#define CONFIG_ASETRATE_FILTER 0 ++#define CONFIG_ASETTB_FILTER 0 ++#define CONFIG_ASHOWINFO_FILTER 0 ++#define CONFIG_ASIDEDATA_FILTER 0 ++#define CONFIG_ASOFTCLIP_FILTER 0 ++#define CONFIG_ASPLIT_FILTER 0 ++#define CONFIG_ASR_FILTER 0 ++#define CONFIG_ASTATS_FILTER 0 ++#define CONFIG_ASTREAMSELECT_FILTER 0 ++#define CONFIG_ATEMPO_FILTER 0 ++#define CONFIG_ATRIM_FILTER 0 ++#define CONFIG_AXCORRELATE_FILTER 0 ++#define CONFIG_AZMQ_FILTER 0 ++#define CONFIG_BANDPASS_FILTER 0 ++#define CONFIG_BANDREJECT_FILTER 0 ++#define CONFIG_BASS_FILTER 0 ++#define CONFIG_BIQUAD_FILTER 0 ++#define CONFIG_BS2B_FILTER 0 ++#define CONFIG_CHROMABER_VULKAN_FILTER 0 ++#define CONFIG_CHANNELMAP_FILTER 0 ++#define CONFIG_CHANNELSPLIT_FILTER 0 ++#define CONFIG_CHORUS_FILTER 0 ++#define CONFIG_COMPAND_FILTER 0 ++#define CONFIG_COMPENSATIONDELAY_FILTER 0 ++#define CONFIG_CROSSFEED_FILTER 0 ++#define CONFIG_CRYSTALIZER_FILTER 0 ++#define CONFIG_DCSHIFT_FILTER 0 ++#define CONFIG_DEESSER_FILTER 0 ++#define CONFIG_DRMETER_FILTER 0 ++#define CONFIG_DYNAUDNORM_FILTER 0 ++#define CONFIG_EARWAX_FILTER 0 ++#define CONFIG_EBUR128_FILTER 0 ++#define CONFIG_EQUALIZER_FILTER 0 ++#define CONFIG_EXTRASTEREO_FILTER 0 ++#define CONFIG_FIREQUALIZER_FILTER 0 ++#define CONFIG_FLANGER_FILTER 0 ++#define CONFIG_HAAS_FILTER 0 ++#define CONFIG_HDCD_FILTER 0 ++#define CONFIG_HEADPHONE_FILTER 0 ++#define CONFIG_HIGHPASS_FILTER 0 ++#define CONFIG_HIGHSHELF_FILTER 0 ++#define CONFIG_JOIN_FILTER 0 ++#define CONFIG_LADSPA_FILTER 0 ++#define CONFIG_LOUDNORM_FILTER 0 ++#define CONFIG_LOWPASS_FILTER 0 ++#define CONFIG_LOWSHELF_FILTER 0 ++#define CONFIG_LV2_FILTER 0 ++#define CONFIG_MCOMPAND_FILTER 0 ++#define CONFIG_PAN_FILTER 0 ++#define CONFIG_REPLAYGAIN_FILTER 0 ++#define CONFIG_RESAMPLE_FILTER 0 ++#define CONFIG_RUBBERBAND_FILTER 0 ++#define CONFIG_SIDECHAINCOMPRESS_FILTER 0 ++#define CONFIG_SIDECHAINGATE_FILTER 0 ++#define CONFIG_SILENCEDETECT_FILTER 0 ++#define CONFIG_SILENCEREMOVE_FILTER 0 ++#define CONFIG_SOFALIZER_FILTER 0 ++#define CONFIG_STEREOTOOLS_FILTER 0 ++#define CONFIG_STEREOWIDEN_FILTER 0 ++#define CONFIG_SUPEREQUALIZER_FILTER 0 ++#define CONFIG_SURROUND_FILTER 0 ++#define CONFIG_TREBLE_FILTER 0 ++#define CONFIG_TREMOLO_FILTER 0 ++#define CONFIG_VIBRATO_FILTER 0 ++#define CONFIG_VOLUME_FILTER 0 ++#define CONFIG_VOLUMEDETECT_FILTER 0 ++#define CONFIG_AEVALSRC_FILTER 0 ++#define CONFIG_AFIRSRC_FILTER 0 ++#define CONFIG_ANOISESRC_FILTER 0 ++#define CONFIG_ANULLSRC_FILTER 0 ++#define CONFIG_FLITE_FILTER 0 ++#define CONFIG_HILBERT_FILTER 0 ++#define CONFIG_SINC_FILTER 0 ++#define CONFIG_SINE_FILTER 0 ++#define CONFIG_ANULLSINK_FILTER 0 ++#define CONFIG_ADDROI_FILTER 0 ++#define CONFIG_ALPHAEXTRACT_FILTER 0 ++#define CONFIG_ALPHAMERGE_FILTER 0 ++#define CONFIG_AMPLIFY_FILTER 0 ++#define CONFIG_ASS_FILTER 0 ++#define CONFIG_ATADENOISE_FILTER 0 ++#define CONFIG_AVGBLUR_FILTER 0 ++#define CONFIG_AVGBLUR_OPENCL_FILTER 0 ++#define CONFIG_AVGBLUR_VULKAN_FILTER 0 ++#define CONFIG_BBOX_FILTER 0 ++#define CONFIG_BENCH_FILTER 0 ++#define CONFIG_BILATERAL_FILTER 0 ++#define CONFIG_BITPLANENOISE_FILTER 0 ++#define CONFIG_BLACKDETECT_FILTER 0 ++#define CONFIG_BLACKFRAME_FILTER 0 ++#define CONFIG_BLEND_FILTER 0 ++#define CONFIG_BM3D_FILTER 0 ++#define CONFIG_BOXBLUR_FILTER 0 ++#define CONFIG_BOXBLUR_OPENCL_FILTER 0 ++#define CONFIG_BWDIF_FILTER 0 ++#define CONFIG_CAS_FILTER 0 ++#define CONFIG_CHROMAHOLD_FILTER 0 ++#define CONFIG_CHROMAKEY_FILTER 0 ++#define CONFIG_CHROMASHIFT_FILTER 0 ++#define CONFIG_CIESCOPE_FILTER 0 ++#define CONFIG_CODECVIEW_FILTER 0 ++#define CONFIG_COLORBALANCE_FILTER 0 ++#define CONFIG_COLORCHANNELMIXER_FILTER 0 ++#define CONFIG_COLORKEY_FILTER 0 ++#define CONFIG_COLORKEY_OPENCL_FILTER 0 ++#define CONFIG_COLORHOLD_FILTER 0 ++#define CONFIG_COLORLEVELS_FILTER 0 ++#define CONFIG_COLORMATRIX_FILTER 0 ++#define CONFIG_COLORSPACE_FILTER 0 ++#define CONFIG_CONVOLUTION_FILTER 0 ++#define CONFIG_CONVOLUTION_OPENCL_FILTER 0 ++#define CONFIG_CONVOLVE_FILTER 0 ++#define CONFIG_COPY_FILTER 0 ++#define CONFIG_COREIMAGE_FILTER 0 ++#define CONFIG_COVER_RECT_FILTER 0 ++#define CONFIG_CROP_FILTER 0 ++#define CONFIG_CROPDETECT_FILTER 0 ++#define CONFIG_CUE_FILTER 0 ++#define CONFIG_CURVES_FILTER 0 ++#define CONFIG_DATASCOPE_FILTER 0 ++#define CONFIG_DCTDNOIZ_FILTER 0 ++#define CONFIG_DEBAND_FILTER 0 ++#define CONFIG_DEBLOCK_FILTER 0 ++#define CONFIG_DECIMATE_FILTER 0 ++#define CONFIG_DECONVOLVE_FILTER 0 ++#define CONFIG_DEDOT_FILTER 0 ++#define CONFIG_DEFLATE_FILTER 0 ++#define CONFIG_DEFLICKER_FILTER 0 ++#define CONFIG_DEINTERLACE_QSV_FILTER 0 ++#define CONFIG_DEINTERLACE_VAAPI_FILTER 0 ++#define CONFIG_DEJUDDER_FILTER 0 ++#define CONFIG_DELOGO_FILTER 0 ++#define CONFIG_DENOISE_VAAPI_FILTER 0 ++#define CONFIG_DERAIN_FILTER 0 ++#define CONFIG_DESHAKE_FILTER 0 ++#define CONFIG_DESHAKE_OPENCL_FILTER 0 ++#define CONFIG_DESPILL_FILTER 0 ++#define CONFIG_DETELECINE_FILTER 0 ++#define CONFIG_DILATION_FILTER 0 ++#define CONFIG_DILATION_OPENCL_FILTER 0 ++#define CONFIG_DISPLACE_FILTER 0 ++#define CONFIG_DNN_PROCESSING_FILTER 0 ++#define CONFIG_DOUBLEWEAVE_FILTER 0 ++#define CONFIG_DRAWBOX_FILTER 0 ++#define CONFIG_DRAWGRAPH_FILTER 0 ++#define CONFIG_DRAWGRID_FILTER 0 ++#define CONFIG_DRAWTEXT_FILTER 0 ++#define CONFIG_EDGEDETECT_FILTER 0 ++#define CONFIG_ELBG_FILTER 0 ++#define CONFIG_ENTROPY_FILTER 0 ++#define CONFIG_EQ_FILTER 0 ++#define CONFIG_EROSION_FILTER 0 ++#define CONFIG_EROSION_OPENCL_FILTER 0 ++#define CONFIG_EXTRACTPLANES_FILTER 0 ++#define CONFIG_FADE_FILTER 0 ++#define CONFIG_FFTDNOIZ_FILTER 0 ++#define CONFIG_FFTFILT_FILTER 0 ++#define CONFIG_FIELD_FILTER 0 ++#define CONFIG_FIELDHINT_FILTER 0 ++#define CONFIG_FIELDMATCH_FILTER 0 ++#define CONFIG_FIELDORDER_FILTER 0 ++#define CONFIG_FILLBORDERS_FILTER 0 ++#define CONFIG_FIND_RECT_FILTER 0 ++#define CONFIG_FLOODFILL_FILTER 0 ++#define CONFIG_FORMAT_FILTER 0 ++#define CONFIG_FPS_FILTER 0 ++#define CONFIG_FRAMEPACK_FILTER 0 ++#define CONFIG_FRAMERATE_FILTER 0 ++#define CONFIG_FRAMESTEP_FILTER 0 ++#define CONFIG_FREEZEDETECT_FILTER 0 ++#define CONFIG_FREEZEFRAMES_FILTER 0 ++#define CONFIG_FREI0R_FILTER 0 ++#define CONFIG_FSPP_FILTER 0 ++#define CONFIG_GBLUR_FILTER 0 ++#define CONFIG_GEQ_FILTER 0 ++#define CONFIG_GRADFUN_FILTER 0 ++#define CONFIG_GRAPHMONITOR_FILTER 0 ++#define CONFIG_GREYEDGE_FILTER 0 ++#define CONFIG_HALDCLUT_FILTER 0 ++#define CONFIG_HFLIP_FILTER 0 ++#define CONFIG_HISTEQ_FILTER 0 ++#define CONFIG_HISTOGRAM_FILTER 0 ++#define CONFIG_HQDN3D_FILTER 0 ++#define CONFIG_HQX_FILTER 0 ++#define CONFIG_HSTACK_FILTER 0 ++#define CONFIG_HUE_FILTER 0 ++#define CONFIG_HWDOWNLOAD_FILTER 0 ++#define CONFIG_HWMAP_FILTER 0 ++#define CONFIG_HWUPLOAD_FILTER 0 ++#define CONFIG_HWUPLOAD_CUDA_FILTER 0 ++#define CONFIG_HYSTERESIS_FILTER 0 ++#define CONFIG_IDET_FILTER 0 ++#define CONFIG_IL_FILTER 0 ++#define CONFIG_INFLATE_FILTER 0 ++#define CONFIG_INTERLACE_FILTER 0 ++#define CONFIG_INTERLEAVE_FILTER 0 ++#define CONFIG_KERNDEINT_FILTER 0 ++#define CONFIG_LAGFUN_FILTER 0 ++#define CONFIG_LENSCORRECTION_FILTER 0 ++#define CONFIG_LENSFUN_FILTER 0 ++#define CONFIG_LIBVMAF_FILTER 0 ++#define CONFIG_LIMITER_FILTER 0 ++#define CONFIG_LOOP_FILTER 0 ++#define CONFIG_LUMAKEY_FILTER 0 ++#define CONFIG_LUT_FILTER 0 ++#define CONFIG_LUT1D_FILTER 0 ++#define CONFIG_LUT2_FILTER 0 ++#define CONFIG_LUT3D_FILTER 0 ++#define CONFIG_LUTRGB_FILTER 0 ++#define CONFIG_LUTYUV_FILTER 0 ++#define CONFIG_MASKEDCLAMP_FILTER 0 ++#define CONFIG_MASKEDMAX_FILTER 0 ++#define CONFIG_MASKEDMERGE_FILTER 0 ++#define CONFIG_MASKEDMIN_FILTER 0 ++#define CONFIG_MASKEDTHRESHOLD_FILTER 0 ++#define CONFIG_MASKFUN_FILTER 0 ++#define CONFIG_MCDEINT_FILTER 0 ++#define CONFIG_MEDIAN_FILTER 0 ++#define CONFIG_MERGEPLANES_FILTER 0 ++#define CONFIG_MESTIMATE_FILTER 0 ++#define CONFIG_METADATA_FILTER 0 ++#define CONFIG_MIDEQUALIZER_FILTER 0 ++#define CONFIG_MINTERPOLATE_FILTER 0 ++#define CONFIG_MIX_FILTER 0 ++#define CONFIG_MPDECIMATE_FILTER 0 ++#define CONFIG_NEGATE_FILTER 0 ++#define CONFIG_NLMEANS_FILTER 0 ++#define CONFIG_NLMEANS_OPENCL_FILTER 0 ++#define CONFIG_NNEDI_FILTER 0 ++#define CONFIG_NOFORMAT_FILTER 0 ++#define CONFIG_NOISE_FILTER 0 ++#define CONFIG_NORMALIZE_FILTER 0 ++#define CONFIG_NULL_FILTER 0 ++#define CONFIG_OCR_FILTER 0 ++#define CONFIG_OCV_FILTER 0 ++#define CONFIG_OSCILLOSCOPE_FILTER 0 ++#define CONFIG_OVERLAY_FILTER 0 ++#define CONFIG_OVERLAY_OPENCL_FILTER 0 ++#define CONFIG_OVERLAY_QSV_FILTER 0 ++#define CONFIG_OVERLAY_VULKAN_FILTER 0 ++#define CONFIG_OVERLAY_CUDA_FILTER 0 ++#define CONFIG_OWDENOISE_FILTER 0 ++#define CONFIG_PAD_FILTER 0 ++#define CONFIG_PAD_OPENCL_FILTER 0 ++#define CONFIG_PALETTEGEN_FILTER 0 ++#define CONFIG_PALETTEUSE_FILTER 0 ++#define CONFIG_PERMS_FILTER 0 ++#define CONFIG_PERSPECTIVE_FILTER 0 ++#define CONFIG_PHASE_FILTER 0 ++#define CONFIG_PHOTOSENSITIVITY_FILTER 0 ++#define CONFIG_PIXDESCTEST_FILTER 0 ++#define CONFIG_PIXSCOPE_FILTER 0 ++#define CONFIG_PP_FILTER 0 ++#define CONFIG_PP7_FILTER 0 ++#define CONFIG_PREMULTIPLY_FILTER 0 ++#define CONFIG_PREWITT_FILTER 0 ++#define CONFIG_PREWITT_OPENCL_FILTER 0 ++#define CONFIG_PROCAMP_VAAPI_FILTER 0 ++#define CONFIG_PROGRAM_OPENCL_FILTER 0 ++#define CONFIG_PSEUDOCOLOR_FILTER 0 ++#define CONFIG_PSNR_FILTER 0 ++#define CONFIG_PULLUP_FILTER 0 ++#define CONFIG_QP_FILTER 0 ++#define CONFIG_RANDOM_FILTER 0 ++#define CONFIG_READEIA608_FILTER 0 ++#define CONFIG_READVITC_FILTER 0 ++#define CONFIG_REALTIME_FILTER 0 ++#define CONFIG_REMAP_FILTER 0 ++#define CONFIG_REMOVEGRAIN_FILTER 0 ++#define CONFIG_REMOVELOGO_FILTER 0 ++#define CONFIG_REPEATFIELDS_FILTER 0 ++#define CONFIG_REVERSE_FILTER 0 ++#define CONFIG_RGBASHIFT_FILTER 0 ++#define CONFIG_ROBERTS_FILTER 0 ++#define CONFIG_ROBERTS_OPENCL_FILTER 0 ++#define CONFIG_ROTATE_FILTER 0 ++#define CONFIG_SAB_FILTER 0 ++#define CONFIG_SCALE_FILTER 0 ++#define CONFIG_SCALE_CUDA_FILTER 0 ++#define CONFIG_SCALE_NPP_FILTER 0 ++#define CONFIG_SCALE_QSV_FILTER 0 ++#define CONFIG_SCALE_VAAPI_FILTER 0 ++#define CONFIG_SCALE_VULKAN_FILTER 0 ++#define CONFIG_SCALE2REF_FILTER 0 ++#define CONFIG_SCROLL_FILTER 0 ++#define CONFIG_SELECT_FILTER 0 ++#define CONFIG_SELECTIVECOLOR_FILTER 0 ++#define CONFIG_SENDCMD_FILTER 0 ++#define CONFIG_SEPARATEFIELDS_FILTER 0 ++#define CONFIG_SETDAR_FILTER 0 ++#define CONFIG_SETFIELD_FILTER 0 ++#define CONFIG_SETPARAMS_FILTER 0 ++#define CONFIG_SETPTS_FILTER 0 ++#define CONFIG_SETRANGE_FILTER 0 ++#define CONFIG_SETSAR_FILTER 0 ++#define CONFIG_SETTB_FILTER 0 ++#define CONFIG_SHARPNESS_VAAPI_FILTER 0 ++#define CONFIG_SHOWINFO_FILTER 0 ++#define CONFIG_SHOWPALETTE_FILTER 0 ++#define CONFIG_SHUFFLEFRAMES_FILTER 0 ++#define CONFIG_SHUFFLEPLANES_FILTER 0 ++#define CONFIG_SIDEDATA_FILTER 0 ++#define CONFIG_SIGNALSTATS_FILTER 0 ++#define CONFIG_SIGNATURE_FILTER 0 ++#define CONFIG_SMARTBLUR_FILTER 0 ++#define CONFIG_SOBEL_FILTER 0 ++#define CONFIG_SOBEL_OPENCL_FILTER 0 ++#define CONFIG_SPLIT_FILTER 0 ++#define CONFIG_SPP_FILTER 0 ++#define CONFIG_SR_FILTER 0 ++#define CONFIG_SSIM_FILTER 0 ++#define CONFIG_STEREO3D_FILTER 0 ++#define CONFIG_STREAMSELECT_FILTER 0 ++#define CONFIG_SUBTITLES_FILTER 0 ++#define CONFIG_SUPER2XSAI_FILTER 0 ++#define CONFIG_SWAPRECT_FILTER 0 ++#define CONFIG_SWAPUV_FILTER 0 ++#define CONFIG_TBLEND_FILTER 0 ++#define CONFIG_TELECINE_FILTER 0 ++#define CONFIG_THISTOGRAM_FILTER 0 ++#define CONFIG_THRESHOLD_FILTER 0 ++#define CONFIG_THUMBNAIL_FILTER 0 ++#define CONFIG_THUMBNAIL_CUDA_FILTER 0 ++#define CONFIG_TILE_FILTER 0 ++#define CONFIG_TINTERLACE_FILTER 0 ++#define CONFIG_TLUT2_FILTER 0 ++#define CONFIG_TMEDIAN_FILTER 0 ++#define CONFIG_TMIX_FILTER 0 ++#define CONFIG_TONEMAP_FILTER 0 ++#define CONFIG_TONEMAP_OPENCL_FILTER 0 ++#define CONFIG_TONEMAP_VAAPI_FILTER 0 ++#define CONFIG_TPAD_FILTER 0 ++#define CONFIG_TRANSPOSE_FILTER 0 ++#define CONFIG_TRANSPOSE_NPP_FILTER 0 ++#define CONFIG_TRANSPOSE_OPENCL_FILTER 0 ++#define CONFIG_TRANSPOSE_VAAPI_FILTER 0 ++#define CONFIG_TRIM_FILTER 0 ++#define CONFIG_UNPREMULTIPLY_FILTER 0 ++#define CONFIG_UNSHARP_FILTER 0 ++#define CONFIG_UNSHARP_OPENCL_FILTER 0 ++#define CONFIG_USPP_FILTER 0 ++#define CONFIG_V360_FILTER 0 ++#define CONFIG_VAGUEDENOISER_FILTER 0 ++#define CONFIG_VECTORSCOPE_FILTER 0 ++#define CONFIG_VFLIP_FILTER 0 ++#define CONFIG_VFRDET_FILTER 0 ++#define CONFIG_VIBRANCE_FILTER 0 ++#define CONFIG_VIDSTABDETECT_FILTER 0 ++#define CONFIG_VIDSTABTRANSFORM_FILTER 0 ++#define CONFIG_VIGNETTE_FILTER 0 ++#define CONFIG_VMAFMOTION_FILTER 0 ++#define CONFIG_VPP_QSV_FILTER 0 ++#define CONFIG_VSTACK_FILTER 0 ++#define CONFIG_W3FDIF_FILTER 0 ++#define CONFIG_WAVEFORM_FILTER 0 ++#define CONFIG_WEAVE_FILTER 0 ++#define CONFIG_XBR_FILTER 0 ++#define CONFIG_XFADE_FILTER 0 ++#define CONFIG_XFADE_OPENCL_FILTER 0 ++#define CONFIG_XMEDIAN_FILTER 0 ++#define CONFIG_XSTACK_FILTER 0 ++#define CONFIG_YADIF_FILTER 0 ++#define CONFIG_YADIF_CUDA_FILTER 0 ++#define CONFIG_YAEPBLUR_FILTER 0 ++#define CONFIG_ZMQ_FILTER 0 ++#define CONFIG_ZOOMPAN_FILTER 0 ++#define CONFIG_ZSCALE_FILTER 0 ++#define CONFIG_ALLRGB_FILTER 0 ++#define CONFIG_ALLYUV_FILTER 0 ++#define CONFIG_CELLAUTO_FILTER 0 ++#define CONFIG_COLOR_FILTER 0 ++#define CONFIG_COREIMAGESRC_FILTER 0 ++#define CONFIG_FREI0R_SRC_FILTER 0 ++#define CONFIG_HALDCLUTSRC_FILTER 0 ++#define CONFIG_LIFE_FILTER 0 ++#define CONFIG_MANDELBROT_FILTER 0 ++#define CONFIG_MPTESTSRC_FILTER 0 ++#define CONFIG_NULLSRC_FILTER 0 ++#define CONFIG_OPENCLSRC_FILTER 0 ++#define CONFIG_PAL75BARS_FILTER 0 ++#define CONFIG_PAL100BARS_FILTER 0 ++#define CONFIG_RGBTESTSRC_FILTER 0 ++#define CONFIG_SIERPINSKI_FILTER 0 ++#define CONFIG_SMPTEBARS_FILTER 0 ++#define CONFIG_SMPTEHDBARS_FILTER 0 ++#define CONFIG_TESTSRC_FILTER 0 ++#define CONFIG_TESTSRC2_FILTER 0 ++#define CONFIG_YUVTESTSRC_FILTER 0 ++#define CONFIG_NULLSINK_FILTER 0 ++#define CONFIG_ABITSCOPE_FILTER 0 ++#define CONFIG_ADRAWGRAPH_FILTER 0 ++#define CONFIG_AGRAPHMONITOR_FILTER 0 ++#define CONFIG_AHISTOGRAM_FILTER 0 ++#define CONFIG_APHASEMETER_FILTER 0 ++#define CONFIG_AVECTORSCOPE_FILTER 0 ++#define CONFIG_CONCAT_FILTER 0 ++#define CONFIG_SHOWCQT_FILTER 0 ++#define CONFIG_SHOWFREQS_FILTER 0 ++#define CONFIG_SHOWSPATIAL_FILTER 0 ++#define CONFIG_SHOWSPECTRUM_FILTER 0 ++#define CONFIG_SHOWSPECTRUMPIC_FILTER 0 ++#define CONFIG_SHOWVOLUME_FILTER 0 ++#define CONFIG_SHOWWAVES_FILTER 0 ++#define CONFIG_SHOWWAVESPIC_FILTER 0 ++#define CONFIG_SPECTRUMSYNTH_FILTER 0 ++#define CONFIG_AMOVIE_FILTER 0 ++#define CONFIG_MOVIE_FILTER 0 ++#define CONFIG_AFIFO_FILTER 0 ++#define CONFIG_FIFO_FILTER 0 ++#define CONFIG_AA_DEMUXER 0 ++#define CONFIG_AAC_DEMUXER 0 ++#define CONFIG_AC3_DEMUXER 0 ++#define CONFIG_ACM_DEMUXER 0 ++#define CONFIG_ACT_DEMUXER 0 ++#define CONFIG_ADF_DEMUXER 0 ++#define CONFIG_ADP_DEMUXER 0 ++#define CONFIG_ADS_DEMUXER 0 ++#define CONFIG_ADX_DEMUXER 0 ++#define CONFIG_AEA_DEMUXER 0 ++#define CONFIG_AFC_DEMUXER 0 ++#define CONFIG_AIFF_DEMUXER 0 ++#define CONFIG_AIX_DEMUXER 0 ++#define CONFIG_ALP_DEMUXER 0 ++#define CONFIG_AMR_DEMUXER 0 ++#define CONFIG_AMRNB_DEMUXER 0 ++#define CONFIG_AMRWB_DEMUXER 0 ++#define CONFIG_ANM_DEMUXER 0 ++#define CONFIG_APC_DEMUXER 0 ++#define CONFIG_APE_DEMUXER 0 ++#define CONFIG_APM_DEMUXER 0 ++#define CONFIG_APNG_DEMUXER 0 ++#define CONFIG_APTX_DEMUXER 0 ++#define CONFIG_APTX_HD_DEMUXER 0 ++#define CONFIG_AQTITLE_DEMUXER 0 ++#define CONFIG_ARGO_ASF_DEMUXER 0 ++#define CONFIG_ASF_DEMUXER 0 ++#define CONFIG_ASF_O_DEMUXER 0 ++#define CONFIG_ASS_DEMUXER 0 ++#define CONFIG_AST_DEMUXER 0 ++#define CONFIG_AU_DEMUXER 0 ++#define CONFIG_AV1_DEMUXER 0 ++#define CONFIG_AVI_DEMUXER 0 ++#define CONFIG_AVISYNTH_DEMUXER 0 ++#define CONFIG_AVR_DEMUXER 0 ++#define CONFIG_AVS_DEMUXER 0 ++#define CONFIG_AVS2_DEMUXER 0 ++#define CONFIG_BETHSOFTVID_DEMUXER 0 ++#define CONFIG_BFI_DEMUXER 0 ++#define CONFIG_BINTEXT_DEMUXER 0 ++#define CONFIG_BINK_DEMUXER 0 ++#define CONFIG_BIT_DEMUXER 0 ++#define CONFIG_BMV_DEMUXER 0 ++#define CONFIG_BFSTM_DEMUXER 0 ++#define CONFIG_BRSTM_DEMUXER 0 ++#define CONFIG_BOA_DEMUXER 0 ++#define CONFIG_C93_DEMUXER 0 ++#define CONFIG_CAF_DEMUXER 0 ++#define CONFIG_CAVSVIDEO_DEMUXER 0 ++#define CONFIG_CDG_DEMUXER 0 ++#define CONFIG_CDXL_DEMUXER 0 ++#define CONFIG_CINE_DEMUXER 0 ++#define CONFIG_CODEC2_DEMUXER 0 ++#define CONFIG_CODEC2RAW_DEMUXER 0 ++#define CONFIG_CONCAT_DEMUXER 0 ++#define CONFIG_DASH_DEMUXER 0 ++#define CONFIG_DATA_DEMUXER 0 ++#define CONFIG_DAUD_DEMUXER 0 ++#define CONFIG_DCSTR_DEMUXER 0 ++#define CONFIG_DERF_DEMUXER 0 ++#define CONFIG_DFA_DEMUXER 0 ++#define CONFIG_DHAV_DEMUXER 0 ++#define CONFIG_DIRAC_DEMUXER 0 ++#define CONFIG_DNXHD_DEMUXER 0 ++#define CONFIG_DSF_DEMUXER 0 ++#define CONFIG_DSICIN_DEMUXER 0 ++#define CONFIG_DSS_DEMUXER 0 ++#define CONFIG_DTS_DEMUXER 0 ++#define CONFIG_DTSHD_DEMUXER 0 ++#define CONFIG_DV_DEMUXER 0 ++#define CONFIG_DVBSUB_DEMUXER 0 ++#define CONFIG_DVBTXT_DEMUXER 0 ++#define CONFIG_DXA_DEMUXER 0 ++#define CONFIG_EA_DEMUXER 0 ++#define CONFIG_EA_CDATA_DEMUXER 0 ++#define CONFIG_EAC3_DEMUXER 0 ++#define CONFIG_EPAF_DEMUXER 0 ++#define CONFIG_FFMETADATA_DEMUXER 0 ++#define CONFIG_FILMSTRIP_DEMUXER 0 ++#define CONFIG_FITS_DEMUXER 0 ++#define CONFIG_FLAC_DEMUXER 1 ++#define CONFIG_FLIC_DEMUXER 0 ++#define CONFIG_FLV_DEMUXER 0 ++#define CONFIG_LIVE_FLV_DEMUXER 0 ++#define CONFIG_FOURXM_DEMUXER 0 ++#define CONFIG_FRM_DEMUXER 0 ++#define CONFIG_FSB_DEMUXER 0 ++#define CONFIG_FWSE_DEMUXER 0 ++#define CONFIG_G722_DEMUXER 0 ++#define CONFIG_G723_1_DEMUXER 0 ++#define CONFIG_G726_DEMUXER 0 ++#define CONFIG_G726LE_DEMUXER 0 ++#define CONFIG_G729_DEMUXER 0 ++#define CONFIG_GDV_DEMUXER 0 ++#define CONFIG_GENH_DEMUXER 0 ++#define CONFIG_GIF_DEMUXER 0 ++#define CONFIG_GSM_DEMUXER 0 ++#define CONFIG_GXF_DEMUXER 0 ++#define CONFIG_H261_DEMUXER 0 ++#define CONFIG_H263_DEMUXER 0 ++#define CONFIG_H264_DEMUXER 0 ++#define CONFIG_HCA_DEMUXER 0 ++#define CONFIG_HCOM_DEMUXER 0 ++#define CONFIG_HEVC_DEMUXER 0 ++#define CONFIG_HLS_DEMUXER 0 ++#define CONFIG_HNM_DEMUXER 0 ++#define CONFIG_ICO_DEMUXER 0 ++#define CONFIG_IDCIN_DEMUXER 0 ++#define CONFIG_IDF_DEMUXER 0 ++#define CONFIG_IFF_DEMUXER 0 ++#define CONFIG_IFV_DEMUXER 0 ++#define CONFIG_ILBC_DEMUXER 0 ++#define CONFIG_IMAGE2_DEMUXER 0 ++#define CONFIG_IMAGE2PIPE_DEMUXER 0 ++#define CONFIG_IMAGE2_ALIAS_PIX_DEMUXER 0 ++#define CONFIG_IMAGE2_BRENDER_PIX_DEMUXER 0 ++#define CONFIG_INGENIENT_DEMUXER 0 ++#define CONFIG_IPMOVIE_DEMUXER 0 ++#define CONFIG_IRCAM_DEMUXER 0 ++#define CONFIG_ISS_DEMUXER 0 ++#define CONFIG_IV8_DEMUXER 0 ++#define CONFIG_IVF_DEMUXER 0 ++#define CONFIG_IVR_DEMUXER 0 ++#define CONFIG_JACOSUB_DEMUXER 0 ++#define CONFIG_JV_DEMUXER 0 ++#define CONFIG_KUX_DEMUXER 0 ++#define CONFIG_KVAG_DEMUXER 0 ++#define CONFIG_LMLM4_DEMUXER 0 ++#define CONFIG_LOAS_DEMUXER 0 ++#define CONFIG_LRC_DEMUXER 0 ++#define CONFIG_LVF_DEMUXER 0 ++#define CONFIG_LXF_DEMUXER 0 ++#define CONFIG_M4V_DEMUXER 0 ++#define CONFIG_MATROSKA_DEMUXER 1 ++#define CONFIG_MGSTS_DEMUXER 0 ++#define CONFIG_MICRODVD_DEMUXER 0 ++#define CONFIG_MJPEG_DEMUXER 0 ++#define CONFIG_MJPEG_2000_DEMUXER 0 ++#define CONFIG_MLP_DEMUXER 0 ++#define CONFIG_MLV_DEMUXER 0 ++#define CONFIG_MM_DEMUXER 0 ++#define CONFIG_MMF_DEMUXER 0 ++#define CONFIG_MOV_DEMUXER 1 ++#define CONFIG_MP3_DEMUXER 1 ++#define CONFIG_MPC_DEMUXER 0 ++#define CONFIG_MPC8_DEMUXER 0 ++#define CONFIG_MPEGPS_DEMUXER 0 ++#define CONFIG_MPEGTS_DEMUXER 0 ++#define CONFIG_MPEGTSRAW_DEMUXER 0 ++#define CONFIG_MPEGVIDEO_DEMUXER 0 ++#define CONFIG_MPJPEG_DEMUXER 0 ++#define CONFIG_MPL2_DEMUXER 0 ++#define CONFIG_MPSUB_DEMUXER 0 ++#define CONFIG_MSF_DEMUXER 0 ++#define CONFIG_MSNWC_TCP_DEMUXER 0 ++#define CONFIG_MTAF_DEMUXER 0 ++#define CONFIG_MTV_DEMUXER 0 ++#define CONFIG_MUSX_DEMUXER 0 ++#define CONFIG_MV_DEMUXER 0 ++#define CONFIG_MVI_DEMUXER 0 ++#define CONFIG_MXF_DEMUXER 0 ++#define CONFIG_MXG_DEMUXER 0 ++#define CONFIG_NC_DEMUXER 0 ++#define CONFIG_NISTSPHERE_DEMUXER 0 ++#define CONFIG_NSP_DEMUXER 0 ++#define CONFIG_NSV_DEMUXER 0 ++#define CONFIG_NUT_DEMUXER 0 ++#define CONFIG_NUV_DEMUXER 0 ++#define CONFIG_OGG_DEMUXER 1 ++#define CONFIG_OMA_DEMUXER 0 ++#define CONFIG_PAF_DEMUXER 0 ++#define CONFIG_PCM_ALAW_DEMUXER 0 ++#define CONFIG_PCM_MULAW_DEMUXER 0 ++#define CONFIG_PCM_VIDC_DEMUXER 0 ++#define CONFIG_PCM_F64BE_DEMUXER 0 ++#define CONFIG_PCM_F64LE_DEMUXER 0 ++#define CONFIG_PCM_F32BE_DEMUXER 0 ++#define CONFIG_PCM_F32LE_DEMUXER 0 ++#define CONFIG_PCM_S32BE_DEMUXER 0 ++#define CONFIG_PCM_S32LE_DEMUXER 0 ++#define CONFIG_PCM_S24BE_DEMUXER 0 ++#define CONFIG_PCM_S24LE_DEMUXER 0 ++#define CONFIG_PCM_S16BE_DEMUXER 0 ++#define CONFIG_PCM_S16LE_DEMUXER 0 ++#define CONFIG_PCM_S8_DEMUXER 0 ++#define CONFIG_PCM_U32BE_DEMUXER 0 ++#define CONFIG_PCM_U32LE_DEMUXER 0 ++#define CONFIG_PCM_U24BE_DEMUXER 0 ++#define CONFIG_PCM_U24LE_DEMUXER 0 ++#define CONFIG_PCM_U16BE_DEMUXER 0 ++#define CONFIG_PCM_U16LE_DEMUXER 0 ++#define CONFIG_PCM_U8_DEMUXER 0 ++#define CONFIG_PJS_DEMUXER 0 ++#define CONFIG_PMP_DEMUXER 0 ++#define CONFIG_PVA_DEMUXER 0 ++#define CONFIG_PVF_DEMUXER 0 ++#define CONFIG_QCP_DEMUXER 0 ++#define CONFIG_R3D_DEMUXER 0 ++#define CONFIG_RAWVIDEO_DEMUXER 0 ++#define CONFIG_REALTEXT_DEMUXER 0 ++#define CONFIG_REDSPARK_DEMUXER 0 ++#define CONFIG_RL2_DEMUXER 0 ++#define CONFIG_RM_DEMUXER 0 ++#define CONFIG_ROQ_DEMUXER 0 ++#define CONFIG_RPL_DEMUXER 0 ++#define CONFIG_RSD_DEMUXER 0 ++#define CONFIG_RSO_DEMUXER 0 ++#define CONFIG_RTP_DEMUXER 0 ++#define CONFIG_RTSP_DEMUXER 0 ++#define CONFIG_S337M_DEMUXER 0 ++#define CONFIG_SAMI_DEMUXER 0 ++#define CONFIG_SAP_DEMUXER 0 ++#define CONFIG_SBC_DEMUXER 0 ++#define CONFIG_SBG_DEMUXER 0 ++#define CONFIG_SCC_DEMUXER 0 ++#define CONFIG_SDP_DEMUXER 0 ++#define CONFIG_SDR2_DEMUXER 0 ++#define CONFIG_SDS_DEMUXER 0 ++#define CONFIG_SDX_DEMUXER 0 ++#define CONFIG_SEGAFILM_DEMUXER 0 ++#define CONFIG_SER_DEMUXER 0 ++#define CONFIG_SHORTEN_DEMUXER 0 ++#define CONFIG_SIFF_DEMUXER 0 ++#define CONFIG_SLN_DEMUXER 0 ++#define CONFIG_SMACKER_DEMUXER 0 ++#define CONFIG_SMJPEG_DEMUXER 0 ++#define CONFIG_SMUSH_DEMUXER 0 ++#define CONFIG_SOL_DEMUXER 0 ++#define CONFIG_SOX_DEMUXER 0 ++#define CONFIG_SPDIF_DEMUXER 0 ++#define CONFIG_SRT_DEMUXER 0 ++#define CONFIG_STR_DEMUXER 0 ++#define CONFIG_STL_DEMUXER 0 ++#define CONFIG_SUBVIEWER1_DEMUXER 0 ++#define CONFIG_SUBVIEWER_DEMUXER 0 ++#define CONFIG_SUP_DEMUXER 0 ++#define CONFIG_SVAG_DEMUXER 0 ++#define CONFIG_SWF_DEMUXER 0 ++#define CONFIG_TAK_DEMUXER 0 ++#define CONFIG_TEDCAPTIONS_DEMUXER 0 ++#define CONFIG_THP_DEMUXER 0 ++#define CONFIG_THREEDOSTR_DEMUXER 0 ++#define CONFIG_TIERTEXSEQ_DEMUXER 0 ++#define CONFIG_TMV_DEMUXER 0 ++#define CONFIG_TRUEHD_DEMUXER 0 ++#define CONFIG_TTA_DEMUXER 0 ++#define CONFIG_TXD_DEMUXER 0 ++#define CONFIG_TTY_DEMUXER 0 ++#define CONFIG_TY_DEMUXER 0 ++#define CONFIG_V210_DEMUXER 0 ++#define CONFIG_V210X_DEMUXER 0 ++#define CONFIG_VAG_DEMUXER 0 ++#define CONFIG_VC1_DEMUXER 0 ++#define CONFIG_VC1T_DEMUXER 0 ++#define CONFIG_VIVIDAS_DEMUXER 0 ++#define CONFIG_VIVO_DEMUXER 0 ++#define CONFIG_VMD_DEMUXER 0 ++#define CONFIG_VOBSUB_DEMUXER 0 ++#define CONFIG_VOC_DEMUXER 0 ++#define CONFIG_VPK_DEMUXER 0 ++#define CONFIG_VPLAYER_DEMUXER 0 ++#define CONFIG_VQF_DEMUXER 0 ++#define CONFIG_W64_DEMUXER 0 ++#define CONFIG_WAV_DEMUXER 1 ++#define CONFIG_WC3_DEMUXER 0 ++#define CONFIG_WEBM_DASH_MANIFEST_DEMUXER 0 ++#define CONFIG_WEBVTT_DEMUXER 0 ++#define CONFIG_WSAUD_DEMUXER 0 ++#define CONFIG_WSD_DEMUXER 0 ++#define CONFIG_WSVQA_DEMUXER 0 ++#define CONFIG_WTV_DEMUXER 0 ++#define CONFIG_WVE_DEMUXER 0 ++#define CONFIG_WV_DEMUXER 0 ++#define CONFIG_XA_DEMUXER 0 ++#define CONFIG_XBIN_DEMUXER 0 ++#define CONFIG_XMV_DEMUXER 0 ++#define CONFIG_XVAG_DEMUXER 0 ++#define CONFIG_XWMA_DEMUXER 0 ++#define CONFIG_YOP_DEMUXER 0 ++#define CONFIG_YUV4MPEGPIPE_DEMUXER 0 ++#define CONFIG_IMAGE_BMP_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_DDS_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_DPX_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_EXR_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_GIF_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_J2K_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_JPEG_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_JPEGLS_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PAM_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PBM_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PCX_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PGMYUV_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PGM_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PICTOR_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PNG_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PPM_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_PSD_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_QDRAW_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_SGI_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_SVG_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_SUNRAST_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_TIFF_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_WEBP_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_XPM_PIPE_DEMUXER 0 ++#define CONFIG_IMAGE_XWD_PIPE_DEMUXER 0 ++#define CONFIG_LIBGME_DEMUXER 0 ++#define CONFIG_LIBMODPLUG_DEMUXER 0 ++#define CONFIG_LIBOPENMPT_DEMUXER 0 ++#define CONFIG_VAPOURSYNTH_DEMUXER 0 ++#define CONFIG_A64_MUXER 0 ++#define CONFIG_AC3_MUXER 0 ++#define CONFIG_ADTS_MUXER 0 ++#define CONFIG_ADX_MUXER 0 ++#define CONFIG_AIFF_MUXER 0 ++#define CONFIG_AMR_MUXER 0 ++#define CONFIG_APNG_MUXER 0 ++#define CONFIG_APTX_MUXER 0 ++#define CONFIG_APTX_HD_MUXER 0 ++#define CONFIG_ASF_MUXER 0 ++#define CONFIG_ASS_MUXER 0 ++#define CONFIG_AST_MUXER 0 ++#define CONFIG_ASF_STREAM_MUXER 0 ++#define CONFIG_AU_MUXER 0 ++#define CONFIG_AVI_MUXER 0 ++#define CONFIG_AVM2_MUXER 0 ++#define CONFIG_AVS2_MUXER 0 ++#define CONFIG_BIT_MUXER 0 ++#define CONFIG_CAF_MUXER 0 ++#define CONFIG_CAVSVIDEO_MUXER 0 ++#define CONFIG_CODEC2_MUXER 0 ++#define CONFIG_CODEC2RAW_MUXER 0 ++#define CONFIG_CRC_MUXER 0 ++#define CONFIG_DASH_MUXER 0 ++#define CONFIG_DATA_MUXER 0 ++#define CONFIG_DAUD_MUXER 0 ++#define CONFIG_DIRAC_MUXER 0 ++#define CONFIG_DNXHD_MUXER 0 ++#define CONFIG_DTS_MUXER 0 ++#define CONFIG_DV_MUXER 0 ++#define CONFIG_EAC3_MUXER 0 ++#define CONFIG_F4V_MUXER 0 ++#define CONFIG_FFMETADATA_MUXER 0 ++#define CONFIG_FIFO_MUXER 0 ++#define CONFIG_FIFO_TEST_MUXER 0 ++#define CONFIG_FILMSTRIP_MUXER 0 ++#define CONFIG_FITS_MUXER 0 ++#define CONFIG_FLAC_MUXER 0 ++#define CONFIG_FLV_MUXER 0 ++#define CONFIG_FRAMECRC_MUXER 0 ++#define CONFIG_FRAMEHASH_MUXER 0 ++#define CONFIG_FRAMEMD5_MUXER 0 ++#define CONFIG_G722_MUXER 0 ++#define CONFIG_G723_1_MUXER 0 ++#define CONFIG_G726_MUXER 0 ++#define CONFIG_G726LE_MUXER 0 ++#define CONFIG_GIF_MUXER 0 ++#define CONFIG_GSM_MUXER 0 ++#define CONFIG_GXF_MUXER 0 ++#define CONFIG_H261_MUXER 0 ++#define CONFIG_H263_MUXER 0 ++#define CONFIG_H264_MUXER 0 ++#define CONFIG_HASH_MUXER 0 ++#define CONFIG_HDS_MUXER 0 ++#define CONFIG_HEVC_MUXER 0 ++#define CONFIG_HLS_MUXER 0 ++#define CONFIG_ICO_MUXER 0 ++#define CONFIG_ILBC_MUXER 0 ++#define CONFIG_IMAGE2_MUXER 0 ++#define CONFIG_IMAGE2PIPE_MUXER 0 ++#define CONFIG_IPOD_MUXER 0 ++#define CONFIG_IRCAM_MUXER 0 ++#define CONFIG_ISMV_MUXER 0 ++#define CONFIG_IVF_MUXER 0 ++#define CONFIG_JACOSUB_MUXER 0 ++#define CONFIG_LATM_MUXER 0 ++#define CONFIG_LRC_MUXER 0 ++#define CONFIG_M4V_MUXER 0 ++#define CONFIG_MD5_MUXER 0 ++#define CONFIG_MATROSKA_MUXER 0 ++#define CONFIG_MATROSKA_AUDIO_MUXER 0 ++#define CONFIG_MICRODVD_MUXER 0 ++#define CONFIG_MJPEG_MUXER 0 ++#define CONFIG_MLP_MUXER 0 ++#define CONFIG_MMF_MUXER 0 ++#define CONFIG_MOV_MUXER 0 ++#define CONFIG_MP2_MUXER 0 ++#define CONFIG_MP3_MUXER 0 ++#define CONFIG_MP4_MUXER 0 ++#define CONFIG_MPEG1SYSTEM_MUXER 0 ++#define CONFIG_MPEG1VCD_MUXER 0 ++#define CONFIG_MPEG1VIDEO_MUXER 0 ++#define CONFIG_MPEG2DVD_MUXER 0 ++#define CONFIG_MPEG2SVCD_MUXER 0 ++#define CONFIG_MPEG2VIDEO_MUXER 0 ++#define CONFIG_MPEG2VOB_MUXER 0 ++#define CONFIG_MPEGTS_MUXER 0 ++#define CONFIG_MPJPEG_MUXER 0 ++#define CONFIG_MXF_MUXER 0 ++#define CONFIG_MXF_D10_MUXER 0 ++#define CONFIG_MXF_OPATOM_MUXER 0 ++#define CONFIG_NULL_MUXER 0 ++#define CONFIG_NUT_MUXER 0 ++#define CONFIG_OGA_MUXER 0 ++#define CONFIG_OGG_MUXER 0 ++#define CONFIG_OGV_MUXER 0 ++#define CONFIG_OMA_MUXER 0 ++#define CONFIG_OPUS_MUXER 0 ++#define CONFIG_PCM_ALAW_MUXER 0 ++#define CONFIG_PCM_MULAW_MUXER 0 ++#define CONFIG_PCM_VIDC_MUXER 0 ++#define CONFIG_PCM_F64BE_MUXER 0 ++#define CONFIG_PCM_F64LE_MUXER 0 ++#define CONFIG_PCM_F32BE_MUXER 0 ++#define CONFIG_PCM_F32LE_MUXER 0 ++#define CONFIG_PCM_S32BE_MUXER 0 ++#define CONFIG_PCM_S32LE_MUXER 0 ++#define CONFIG_PCM_S24BE_MUXER 0 ++#define CONFIG_PCM_S24LE_MUXER 0 ++#define CONFIG_PCM_S16BE_MUXER 0 ++#define CONFIG_PCM_S16LE_MUXER 0 ++#define CONFIG_PCM_S8_MUXER 0 ++#define CONFIG_PCM_U32BE_MUXER 0 ++#define CONFIG_PCM_U32LE_MUXER 0 ++#define CONFIG_PCM_U24BE_MUXER 0 ++#define CONFIG_PCM_U24LE_MUXER 0 ++#define CONFIG_PCM_U16BE_MUXER 0 ++#define CONFIG_PCM_U16LE_MUXER 0 ++#define CONFIG_PCM_U8_MUXER 0 ++#define CONFIG_PSP_MUXER 0 ++#define CONFIG_RAWVIDEO_MUXER 0 ++#define CONFIG_RM_MUXER 0 ++#define CONFIG_ROQ_MUXER 0 ++#define CONFIG_RSO_MUXER 0 ++#define CONFIG_RTP_MUXER 0 ++#define CONFIG_RTP_MPEGTS_MUXER 0 ++#define CONFIG_RTSP_MUXER 0 ++#define CONFIG_SAP_MUXER 0 ++#define CONFIG_SBC_MUXER 0 ++#define CONFIG_SCC_MUXER 0 ++#define CONFIG_SEGAFILM_MUXER 0 ++#define CONFIG_SEGMENT_MUXER 0 ++#define CONFIG_STREAM_SEGMENT_MUXER 0 ++#define CONFIG_SINGLEJPEG_MUXER 0 ++#define CONFIG_SMJPEG_MUXER 0 ++#define CONFIG_SMOOTHSTREAMING_MUXER 0 ++#define CONFIG_SOX_MUXER 0 ++#define CONFIG_SPX_MUXER 0 ++#define CONFIG_SPDIF_MUXER 0 ++#define CONFIG_SRT_MUXER 0 ++#define CONFIG_STREAMHASH_MUXER 0 ++#define CONFIG_SUP_MUXER 0 ++#define CONFIG_SWF_MUXER 0 ++#define CONFIG_TEE_MUXER 0 ++#define CONFIG_TG2_MUXER 0 ++#define CONFIG_TGP_MUXER 0 ++#define CONFIG_MKVTIMESTAMP_V2_MUXER 0 ++#define CONFIG_TRUEHD_MUXER 0 ++#define CONFIG_TTA_MUXER 0 ++#define CONFIG_UNCODEDFRAMECRC_MUXER 0 ++#define CONFIG_VC1_MUXER 0 ++#define CONFIG_VC1T_MUXER 0 ++#define CONFIG_VOC_MUXER 0 ++#define CONFIG_W64_MUXER 0 ++#define CONFIG_WAV_MUXER 0 ++#define CONFIG_WEBM_MUXER 0 ++#define CONFIG_WEBM_DASH_MANIFEST_MUXER 0 ++#define CONFIG_WEBM_CHUNK_MUXER 0 ++#define CONFIG_WEBP_MUXER 0 ++#define CONFIG_WEBVTT_MUXER 0 ++#define CONFIG_WTV_MUXER 0 ++#define CONFIG_WV_MUXER 0 ++#define CONFIG_YUV4MPEGPIPE_MUXER 0 ++#define CONFIG_CHROMAPRINT_MUXER 0 ++#define CONFIG_ASYNC_PROTOCOL 0 ++#define CONFIG_BLURAY_PROTOCOL 0 ++#define CONFIG_CACHE_PROTOCOL 0 ++#define CONFIG_CONCAT_PROTOCOL 0 ++#define CONFIG_CRYPTO_PROTOCOL 0 ++#define CONFIG_DATA_PROTOCOL 0 ++#define CONFIG_FFRTMPCRYPT_PROTOCOL 0 ++#define CONFIG_FFRTMPHTTP_PROTOCOL 0 ++#define CONFIG_FILE_PROTOCOL 0 ++#define CONFIG_FTP_PROTOCOL 0 ++#define CONFIG_GOPHER_PROTOCOL 0 ++#define CONFIG_HLS_PROTOCOL 0 ++#define CONFIG_HTTP_PROTOCOL 0 ++#define CONFIG_HTTPPROXY_PROTOCOL 0 ++#define CONFIG_HTTPS_PROTOCOL 0 ++#define CONFIG_ICECAST_PROTOCOL 0 ++#define CONFIG_MMSH_PROTOCOL 0 ++#define CONFIG_MMST_PROTOCOL 0 ++#define CONFIG_MD5_PROTOCOL 0 ++#define CONFIG_PIPE_PROTOCOL 0 ++#define CONFIG_PROMPEG_PROTOCOL 0 ++#define CONFIG_RTMP_PROTOCOL 0 ++#define CONFIG_RTMPE_PROTOCOL 0 ++#define CONFIG_RTMPS_PROTOCOL 0 ++#define CONFIG_RTMPT_PROTOCOL 0 ++#define CONFIG_RTMPTE_PROTOCOL 0 ++#define CONFIG_RTMPTS_PROTOCOL 0 ++#define CONFIG_RTP_PROTOCOL 0 ++#define CONFIG_SCTP_PROTOCOL 0 ++#define CONFIG_SRTP_PROTOCOL 0 ++#define CONFIG_SUBFILE_PROTOCOL 0 ++#define CONFIG_TEE_PROTOCOL 0 ++#define CONFIG_TCP_PROTOCOL 0 ++#define CONFIG_TLS_PROTOCOL 0 ++#define CONFIG_UDP_PROTOCOL 0 ++#define CONFIG_UDPLITE_PROTOCOL 0 ++#define CONFIG_UNIX_PROTOCOL 0 ++#define CONFIG_LIBAMQP_PROTOCOL 0 ++#define CONFIG_LIBRTMP_PROTOCOL 0 ++#define CONFIG_LIBRTMPE_PROTOCOL 0 ++#define CONFIG_LIBRTMPS_PROTOCOL 0 ++#define CONFIG_LIBRTMPT_PROTOCOL 0 ++#define CONFIG_LIBRTMPTE_PROTOCOL 0 ++#define CONFIG_LIBSRT_PROTOCOL 0 ++#define CONFIG_LIBSSH_PROTOCOL 0 ++#define CONFIG_LIBSMBCLIENT_PROTOCOL 0 ++#define CONFIG_LIBZMQ_PROTOCOL 0 ++#endif /* FFMPEG_CONFIG_H */ +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavcodec/bsf_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavcodec/bsf_list.c +new file mode 100644 +index 00000000000..d31ece942a7 +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavcodec/bsf_list.c +@@ -0,0 +1,3 @@ ++static const AVBitStreamFilter * const bitstream_filters[] = { ++ &ff_null_bsf, ++ NULL }; +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavcodec/codec_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavcodec/codec_list.c +new file mode 100644 +index 00000000000..9407bd2775e +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavcodec/codec_list.c +@@ -0,0 +1,18 @@ ++static const AVCodec * const codec_list[] = { ++ &ff_theora_decoder, ++ &ff_vp3_decoder, ++ &ff_vp8_decoder, ++ &ff_flac_decoder, ++ &ff_mp3_decoder, ++ &ff_vorbis_decoder, ++ &ff_pcm_alaw_decoder, ++ &ff_pcm_f32le_decoder, ++ &ff_pcm_mulaw_decoder, ++ &ff_pcm_s16be_decoder, ++ &ff_pcm_s16le_decoder, ++ &ff_pcm_s24be_decoder, ++ &ff_pcm_s24le_decoder, ++ &ff_pcm_s32le_decoder, ++ &ff_pcm_u8_decoder, ++ &ff_libopus_decoder, ++ NULL }; +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavcodec/parser_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavcodec/parser_list.c +new file mode 100644 +index 00000000000..f81fbe8bbcf +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavcodec/parser_list.c +@@ -0,0 +1,9 @@ ++static const AVCodecParser * const parser_list[] = { ++ &ff_flac_parser, ++ &ff_mpegaudio_parser, ++ &ff_opus_parser, ++ &ff_vorbis_parser, ++ &ff_vp3_parser, ++ &ff_vp8_parser, ++ &ff_vp9_parser, ++ NULL }; +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavformat/demuxer_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavformat/demuxer_list.c +new file mode 100644 +index 00000000000..1908ba19e77 +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavformat/demuxer_list.c +@@ -0,0 +1,8 @@ ++static const AVInputFormat * const demuxer_list[] = { ++ &ff_flac_demuxer, ++ &ff_matroska_demuxer, ++ &ff_mov_demuxer, ++ &ff_mp3_demuxer, ++ &ff_ogg_demuxer, ++ &ff_wav_demuxer, ++ NULL }; +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavformat/muxer_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavformat/muxer_list.c +new file mode 100644 +index 00000000000..f36d9499c6f +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavformat/muxer_list.c +@@ -0,0 +1,2 @@ ++static const AVOutputFormat * const muxer_list[] = { ++ NULL }; +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavformat/protocol_list.c b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavformat/protocol_list.c +new file mode 100644 +index 00000000000..247e1e4c3a2 +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavformat/protocol_list.c +@@ -0,0 +1,2 @@ ++static const URLProtocol * const url_protocols[] = { ++ NULL }; +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavutil/avconfig.h b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavutil/avconfig.h +new file mode 100644 +index 00000000000..8558b35027f +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavutil/avconfig.h +@@ -0,0 +1,6 @@ ++/* Generated by ffmpeg configure */ ++#ifndef AVUTIL_AVCONFIG_H ++#define AVUTIL_AVCONFIG_H ++#define AV_HAVE_BIGENDIAN 0 ++#define AV_HAVE_FAST_UNALIGNED 0 ++#endif /* AVUTIL_AVCONFIG_H */ +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavutil/ffversion.h b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavutil/ffversion.h +new file mode 100644 +index 00000000000..31e5b5036dc +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/config/Chromium/linux/la64/libavutil/ffversion.h +@@ -0,0 +1,5 @@ ++/* Automatically generated by version.sh, do not manually edit! */ ++#ifndef AVUTIL_FFVERSION_H ++#define AVUTIL_FFVERSION_H ++#define FFMPEG_VERSION "git-2020-06-16-23b2a15c25" ++#endif /* AVUTIL_FFVERSION_H */ +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/build_ffmpeg.py b/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/build_ffmpeg.py +index 23d5c0f5739..dfe821557de 100755 +--- a/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/build_ffmpeg.py ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/build_ffmpeg.py +@@ -36,7 +36,7 @@ ARCH_MAP = { + 'android': ['ia32', 'x64', 'arm-neon', 'arm64'], + 'linux': [ + 'ia32', 'x64', 'mipsel', 'mips64el', 'noasm-x64', 'arm', 'arm-neon', +- 'arm64' ++ 'arm64', 'la64' + ], + 'mac': ['x64'], + 'win': ['ia32', 'x64', 'arm64'], +@@ -126,6 +126,8 @@ def DetermineHostOsAndArch(): + host_arch = 'mipsel' + elif platform.machine() == 'mips64': + host_arch = 'mips64el' ++ elif platform.machine() == 'loongarch64': ++ host_arch = 'la64' + elif platform.machine().startswith('arm'): + host_arch = 'arm' + else: +@@ -197,6 +199,10 @@ def SetupAndroidToolchain(target_arch): + elif target_arch == 'mipsel': + sysroot_arch = 'mips' + toolchain_bin_prefix = toolchain_dir_prefix = 'mipsel-linux-android' ++ elif target_arch == 'la64': ++ toolchain_level = api64_level ++ sysroot_arch = 'la64' ++ toolchain_bin_prefix = toolchain_dir_prefix = 'la64-linux-android' + elif target_arch == 'mips64el': + toolchain_level = api64_level + sysroot_arch = 'mips64' +@@ -789,6 +795,21 @@ def ConfigureAndBuild(target_arch, target_os, host_os, host_arch, parallel_jobs, + '--extra-cflags=--target=mips64el-linux-gnuabi64', + '--extra-ldflags=--target=mips64el-linux-gnuabi64', + ]) ++ elif target_arch == 'la64': ++ # These flags taken from android chrome build with target_cpu='mips64el' ++ configure_flags['Common'].extend([ ++ ]) ++ if target_os == 'android': ++ configure_flags['Common'].extend([ ++ '--enable-mips64r6', ++ '--extra-cflags=-mcpu=mips64r6', ++ '--disable-mips64r2', ++ '--enable-msa', ++ ]) ++ if target_os == 'linux': ++ configure_flags['Common'].extend([ ++ '--target-os=linux', ++ ]) + else: + print( + 'Error: Unknown target arch %r for target OS %r!' % (target_arch, +@@ -814,8 +835,8 @@ def ConfigureAndBuild(target_arch, target_os, host_os, host_arch, parallel_jobs, + # typically be the system one, so explicitly configure use of Clang's + # ld.lld, to ensure that things like cross-compilation and LTO work. + # This does not work for ia32 and is always used on mac. +- if target_arch != 'ia32' and target_os != 'mac': +- configure_flags['Common'].append('--extra-ldflags=-fuse-ld=lld') ++ #if target_arch != 'ia32' and target_os != 'mac': ++ # configure_flags['Common'].append('--extra-ldflags=-fuse-ld=lld') + + # Should be run on Mac, unless we're cross-compiling on Linux. + if target_os == 'mac': +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/copy_config.sh b/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/copy_config.sh +index 0e5159d6f40..a982a3bd45d 100755 +--- a/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/copy_config.sh ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/copy_config.sh +@@ -10,7 +10,7 @@ for os in android linux linux-noasm mac win; do + # Copy config files for various architectures: + # - ia32/x64 have config.asm, config.h + # - arm/arm-neon have config.h +- for arch in arm arm-neon arm64 ia32 x64 mipsel mips64el; do ++ for arch in arm arm-neon arm64 ia32 x64 mipsel mips64el la64; do + # Don't waste time on non-existent configs, if no config.h then skip. + [ ! -e "build.$arch.$os/$target/config.h" ] && continue + for f in config.h config.asm libavutil/avconfig.h libavutil/ffversion.h libavcodec/bsf_list.c libavcodec/codec_list.c libavcodec/parser_list.c libavformat/demuxer_list.c libavformat/muxer_list.c libavformat/protocol_list.c; do +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/generate_gn.py b/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/generate_gn.py +index d2b3d1052aa..5b4dd10e5c6 100755 +--- a/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/generate_gn.py ++++ b/src/3rdparty/chromium/third_party/ffmpeg/chromium/scripts/generate_gn.py +@@ -77,7 +77,7 @@ GN_SOURCE_END = """] + Attr = enum('ARCHITECTURE', 'TARGET', 'PLATFORM') + SUPPORT_MATRIX = { + Attr.ARCHITECTURE: +- set(['ia32', 'x64', 'arm', 'arm64', 'arm-neon', 'mipsel', 'mips64el']), ++ set(['ia32', 'x64', 'arm', 'arm64', 'arm-neon', 'mipsel', 'mips64el', 'la64']), + Attr.TARGET: + set(['Chromium', 'Chrome', 'ChromeOS']), + Attr.PLATFORM: +diff --git a/src/3rdparty/chromium/third_party/ffmpeg/ffmpeg_generated.gni b/src/3rdparty/chromium/third_party/ffmpeg/ffmpeg_generated.gni +index fa2b74e0963..fdecb888d90 100644 +--- a/src/3rdparty/chromium/third_party/ffmpeg/ffmpeg_generated.gni ++++ b/src/3rdparty/chromium/third_party/ffmpeg/ffmpeg_generated.gni +@@ -14,17 +14,14 @@ ffmpeg_asm_sources = [] + + use_linux_config = is_linux || is_fuchsia + +-if ((is_android && current_cpu == "arm" && arm_use_neon) || (is_android && current_cpu == "arm64") || (is_android && current_cpu == "x64") || (is_android && current_cpu == "x86") || (is_mac) || (is_win) || (use_linux_config)) { ++if (use_linux_config && current_cpu == "la64") { + ffmpeg_c_sources += [ + "libavcodec/ac3_parser.c", + "libavcodec/ac3tab.c", + "libavcodec/adts_parser.c", + "libavcodec/allcodecs.c", + "libavcodec/autorename_libavcodec_flacdec.c", +- "libavcodec/autorename_libavcodec_flacdsp.c", +- "libavcodec/autorename_libavcodec_mpegaudiodsp.c", + "libavcodec/autorename_libavcodec_utils.c", +- "libavcodec/autorename_libavcodec_vorbisdsp.c", + "libavcodec/avdct.c", + "libavcodec/avfft.c", + "libavcodec/avpacket.c", +@@ -49,7 +46,10 @@ if ((is_android && current_cpu == "arm" && arm_use_neon) || (is_android && curre + "libavcodec/flac.c", + "libavcodec/flac_parser.c", + "libavcodec/flacdata.c", ++ "libavcodec/flacdsp.c", + "libavcodec/golomb.c", ++ "libavcodec/h264pred.c", ++ "libavcodec/hpeldsp.c", + "libavcodec/imgconvert.c", + "libavcodec/jni.c", + "libavcodec/libopus.c", +@@ -67,6 +67,7 @@ if ((is_android && current_cpu == "arm" && arm_use_neon) || (is_android && curre + "libavcodec/mpegaudiodata.c", + "libavcodec/mpegaudiodec_fixed.c", + "libavcodec/mpegaudiodecheader.c", ++ "libavcodec/mpegaudiodsp.c", + "libavcodec/mpegaudiodsp_data.c", + "libavcodec/mpegaudiodsp_fixed.c", + "libavcodec/mpegaudiodsp_float.c", +@@ -86,10 +87,19 @@ if ((is_android && current_cpu == "arm" && arm_use_neon) || (is_android && curre + "libavcodec/qsv_api.c", + "libavcodec/raw.c", + "libavcodec/rdft.c", ++ "libavcodec/videodsp.c", + "libavcodec/vorbis.c", + "libavcodec/vorbis_data.c", + "libavcodec/vorbis_parser.c", + "libavcodec/vorbisdec.c", ++ "libavcodec/vorbisdsp.c", ++ "libavcodec/vp3.c", ++ "libavcodec/vp3_parser.c", ++ "libavcodec/vp3dsp.c", ++ "libavcodec/vp56rac.c", ++ "libavcodec/vp8.c", ++ "libavcodec/vp8_parser.c", ++ "libavcodec/vp8dsp.c", + "libavcodec/vp9_parser.c", + "libavcodec/xiph.c", + "libavformat/allformats.c", +@@ -135,10 +145,6 @@ if ((is_android && current_cpu == "arm" && arm_use_neon) || (is_android && curre + "libavformat/wavdec.c", + "libavutil/aes.c", + "libavutil/aes_ctr.c", +- "libavutil/autorename_libavutil_cpu.c", +- "libavutil/autorename_libavutil_fixed_dsp.c", +- "libavutil/autorename_libavutil_float_dsp.c", +- "libavutil/autorename_libavutil_imgutils.c", + "libavutil/avsscanf.c", + "libavutil/avstring.c", + "libavutil/base64.c", +@@ -147,6 +153,7 @@ if ((is_android && current_cpu == "arm" && arm_use_neon) || (is_android && curre + "libavutil/camellia.c", + "libavutil/channel_layout.c", + "libavutil/color_utils.c", ++ "libavutil/cpu.c", + "libavutil/crc.c", + "libavutil/dict.c", + "libavutil/display.c", +@@ -156,9 +163,12 @@ if ((is_android && current_cpu == "arm" && arm_use_neon) || (is_android && curre + "libavutil/eval.c", + "libavutil/fifo.c", + "libavutil/file_open.c", ++ "libavutil/fixed_dsp.c", ++ "libavutil/float_dsp.c", + "libavutil/frame.c", + "libavutil/hdr_dynamic_metadata.c", + "libavutil/hwcontext.c", ++ "libavutil/imgutils.c", + "libavutil/integer.c", + "libavutil/intmath.c", + "libavutil/lfg.c", +@@ -193,22 +203,7 @@ if ((is_android && current_cpu == "arm" && arm_use_neon) || (is_android && curre + ] + } + +-if ((is_mac) || (is_win) || (use_linux_config)) { +- ffmpeg_c_sources += [ +- "libavcodec/autorename_libavcodec_hpeldsp.c", +- "libavcodec/autorename_libavcodec_videodsp.c", +- "libavcodec/autorename_libavcodec_vp3dsp.c", +- "libavcodec/autorename_libavcodec_vp8dsp.c", +- "libavcodec/h264pred.c", +- "libavcodec/vp3.c", +- "libavcodec/vp3_parser.c", +- "libavcodec/vp56rac.c", +- "libavcodec/vp8.c", +- "libavcodec/vp8_parser.c", +- ] +-} +- +-if ((current_cpu == "x64" && ffmpeg_branding == "Chrome") || (is_android && current_cpu == "arm" && arm_use_neon && ffmpeg_branding == "Chrome") || (is_android && current_cpu == "arm64" && ffmpeg_branding == "Chrome") || (is_android && current_cpu == "x86" && ffmpeg_branding == "Chrome") || (is_win && ffmpeg_branding == "Chrome") || (use_linux_config && ffmpeg_branding == "Chrome") || (use_linux_config && ffmpeg_branding == "ChromeOS")) { ++if ((use_linux_config && current_cpu == "la64" && ffmpeg_branding == "Chrome") || (use_linux_config && current_cpu == "la64" && ffmpeg_branding == "ChromeOS")) { + ffmpeg_c_sources += [ + "libavcodec/aac_ac3_parser.c", + "libavcodec/aac_parser.c", +@@ -218,21 +213,8 @@ if ((current_cpu == "x64" && ffmpeg_branding == "Chrome") || (is_android && curr + "libavcodec/aactab.c", + "libavcodec/adts_header.c", + "libavcodec/autorename_libavcodec_aacdec.c", +- "libavcodec/autorename_libavcodec_mdct15.c", +- "libavcodec/autorename_libavcodec_sbrdsp.c", +- "libavcodec/cbrt_data.c", +- "libavcodec/kbdwin.c", +- "libavcodec/sinewin.c", +- "libavcodec/sinewin_fixed.c", +- "libavformat/aacdec.c", +- "libavformat/apetag.c", +- "libavformat/img2.c", +- ] +-} +- +-if ((is_mac && ffmpeg_branding == "Chrome") || (is_win && ffmpeg_branding == "Chrome") || (use_linux_config && ffmpeg_branding == "Chrome") || (use_linux_config && ffmpeg_branding == "ChromeOS")) { +- ffmpeg_c_sources += [ + "libavcodec/cabac.c", ++ "libavcodec/cbrt_data.c", + "libavcodec/h2645_parse.c", + "libavcodec/h264_cabac.c", + "libavcodec/h264_cavlc.c", +@@ -252,122 +234,26 @@ if ((is_mac && ffmpeg_branding == "Chrome") || (is_win && ffmpeg_branding == "Ch + "libavcodec/h264dsp.c", + "libavcodec/h264idct.c", + "libavcodec/h264qpel.c", ++ "libavcodec/kbdwin.c", ++ "libavcodec/mdct15.c", ++ "libavcodec/sbrdsp.c", ++ "libavcodec/sinewin.c", ++ "libavcodec/sinewin_fixed.c", + "libavcodec/startcode.c", ++ "libavformat/aacdec.c", ++ "libavformat/apetag.c", ++ "libavformat/img2.c", + ] + } + +-if ((is_android && current_cpu == "x64") || (is_android && current_cpu == "x86") || (is_mac) || (is_win && current_cpu == "x64") || (is_win && current_cpu == "x86") || (use_linux_config && current_cpu == "x64") || (use_linux_config && current_cpu == "x86")) { +- ffmpeg_c_sources += [ +- "libavcodec/x86/autorename_libavcodec_x86_vorbisdsp_init.c", +- "libavcodec/x86/constants.c", +- "libavcodec/x86/dct_init.c", +- "libavcodec/x86/fft_init.c", +- "libavcodec/x86/flacdsp_init.c", +- "libavcodec/x86/mpegaudiodsp.c", +- "libavutil/x86/autorename_libavutil_x86_cpu.c", +- "libavutil/x86/autorename_libavutil_x86_float_dsp_init.c", +- "libavutil/x86/fixed_dsp_init.c", +- "libavutil/x86/imgutils_init.c", +- "libavutil/x86/lls_init.c", +- ] +-} +- +-if ((is_android && current_cpu == "arm" && arm_use_neon) || (use_linux_config && current_cpu == "arm" && arm_use_neon) || (use_linux_config && current_cpu == "arm")) { +- ffmpeg_c_sources += [ +- "libavcodec/arm/fft_fixed_init_arm.c", +- "libavcodec/arm/fft_init_arm.c", +- "libavcodec/arm/flacdsp_init_arm.c", +- "libavcodec/arm/mpegaudiodsp_init_arm.c", +- "libavcodec/arm/rdft_init_arm.c", +- "libavcodec/arm/vorbisdsp_init_arm.c", +- "libavutil/arm/autorename_libavutil_arm_cpu.c", +- "libavutil/arm/float_dsp_init_arm.c", +- "libavutil/arm/float_dsp_init_vfp.c", +- ] +- ffmpeg_gas_sources += [ +- "libavcodec/arm/fft_vfp.S", +- "libavcodec/arm/flacdsp_arm.S", +- "libavcodec/arm/mdct_vfp.S", +- "libavcodec/arm/mpegaudiodsp_fixed_armv6.S", +- "libavutil/arm/float_dsp_vfp.S", +- ] +-} +- +-if ((is_android && current_cpu == "x64") || (is_mac) || (is_win && current_cpu == "x64") || (is_win && current_cpu == "x86") || (use_linux_config && current_cpu == "x64") || (use_linux_config && current_cpu == "x86")) { +- ffmpeg_asm_sources += [ +- "libavcodec/x86/dct32.asm", +- "libavcodec/x86/fft.asm", +- "libavcodec/x86/flacdsp.asm", +- "libavcodec/x86/imdct36.asm", +- "libavcodec/x86/vorbisdsp.asm", +- "libavutil/x86/cpuid.asm", +- "libavutil/x86/fixed_dsp.asm", +- "libavutil/x86/float_dsp.asm", +- "libavutil/x86/imgutils.asm", +- "libavutil/x86/lls.asm", +- ] +-} +- +-if ((is_mac) || (is_win && current_cpu == "x64") || (is_win && current_cpu == "x86") || (use_linux_config && current_cpu == "x64") || (use_linux_config && current_cpu == "x86")) { +- ffmpeg_c_sources += [ +- "libavcodec/x86/autorename_libavcodec_x86_videodsp_init.c", +- "libavcodec/x86/h264_intrapred_init.c", +- "libavcodec/x86/hpeldsp_init.c", +- "libavcodec/x86/hpeldsp_vp3_init.c", +- "libavcodec/x86/vp3dsp_init.c", +- "libavcodec/x86/vp8dsp_init.c", +- ] +- ffmpeg_asm_sources += [ +- "libavcodec/x86/autorename_libavcodec_x86_videodsp.asm", +- "libavcodec/x86/fpel.asm", +- "libavcodec/x86/h264_intrapred.asm", +- "libavcodec/x86/h264_intrapred_10bit.asm", +- "libavcodec/x86/hpeldsp.asm", +- "libavcodec/x86/hpeldsp_vp3.asm", +- "libavcodec/x86/vp3dsp.asm", +- "libavcodec/x86/vp8dsp.asm", +- "libavcodec/x86/vp8dsp_loopfilter.asm", +- ] +-} +- +-if ((current_cpu == "x64" && ffmpeg_branding == "Chrome") || (is_android && current_cpu == "x86" && ffmpeg_branding == "Chrome") || (is_win && current_cpu == "x86" && ffmpeg_branding == "Chrome") || (use_linux_config && current_cpu == "x64" && ffmpeg_branding == "ChromeOS") || (use_linux_config && current_cpu == "x86" && ffmpeg_branding == "Chrome") || (use_linux_config && current_cpu == "x86" && ffmpeg_branding == "ChromeOS")) { +- ffmpeg_c_sources += [ +- "libavcodec/x86/aacpsdsp_init.c", +- "libavcodec/x86/mdct15_init.c", +- "libavcodec/x86/sbrdsp_init.c", +- ] +-} +- +-if ((is_android && current_cpu == "arm" && arm_use_neon) || (is_android && current_cpu == "arm64") || (is_android && current_cpu == "x64") || (is_android && current_cpu == "x86")) { +- ffmpeg_c_sources += [ +- "compat/strtod.c", +- ] +-} +- +-if ((current_cpu == "x64" && ffmpeg_branding == "Chrome") || (is_win && current_cpu == "x86" && ffmpeg_branding == "Chrome") || (use_linux_config && current_cpu == "x64" && ffmpeg_branding == "ChromeOS") || (use_linux_config && current_cpu == "x86" && ffmpeg_branding == "Chrome") || (use_linux_config && current_cpu == "x86" && ffmpeg_branding == "ChromeOS")) { +- ffmpeg_asm_sources += [ +- "libavcodec/x86/aacpsdsp.asm", +- "libavcodec/x86/mdct15.asm", +- "libavcodec/x86/sbrdsp.asm", +- ] +-} +- +-if (use_linux_config && ffmpeg_branding == "ChromeOS") { ++if (use_linux_config && current_cpu == "la64" && ffmpeg_branding == "ChromeOS") { + ffmpeg_c_sources += [ + "libavcodec/acelp_filters.c", + "libavcodec/acelp_pitch_delay.c", + "libavcodec/acelp_vectors.c", + "libavcodec/amrnbdec.c", + "libavcodec/amrwbdec.c", +- "libavcodec/autorename_libavcodec_blockdsp.c", +- "libavcodec/autorename_libavcodec_idctdsp.c", +- "libavcodec/autorename_libavcodec_me_cmp.c", +- "libavcodec/autorename_libavcodec_mpegvideo.c", +- "libavcodec/autorename_libavcodec_mpegvideodsp.c", +- "libavcodec/autorename_libavcodec_pixblockdsp.c", +- "libavcodec/autorename_libavcodec_qpeldsp.c", +- "libavcodec/autorename_libavcodec_simple_idct.c", +- "libavcodec/autorename_libavcodec_xvididct.c", ++ "libavcodec/blockdsp.c", + "libavcodec/celp_filters.c", + "libavcodec/celp_math.c", + "libavcodec/error_resilience.c", +@@ -382,23 +268,31 @@ if (use_linux_config && ffmpeg_branding == "ChromeOS") { + "libavcodec/h263data.c", + "libavcodec/h263dec.c", + "libavcodec/h263dsp.c", ++ "libavcodec/idctdsp.c", + "libavcodec/intelh263dec.c", + "libavcodec/ituh263dec.c", + "libavcodec/jfdctfst.c", + "libavcodec/jfdctint.c", + "libavcodec/jrevdct.c", + "libavcodec/lsp.c", ++ "libavcodec/me_cmp.c", + "libavcodec/mpeg4video.c", + "libavcodec/mpeg4video_parser.c", + "libavcodec/mpeg4videodec.c", + "libavcodec/mpeg_er.c", + "libavcodec/mpegpicture.c", + "libavcodec/mpegutils.c", ++ "libavcodec/mpegvideo.c", + "libavcodec/mpegvideo_motion.c", + "libavcodec/mpegvideodata.c", ++ "libavcodec/mpegvideodsp.c", + "libavcodec/msgsmdec.c", ++ "libavcodec/pixblockdsp.c", ++ "libavcodec/qpeldsp.c", + "libavcodec/rl.c", ++ "libavcodec/simple_idct.c", + "libavcodec/tiff_common.c", ++ "libavcodec/xvididct.c", + "libavformat/amr.c", + "libavformat/avidec.c", + ] +diff --git a/src/3rdparty/chromium/third_party/icu/source/i18n/double-conversion-utils.h b/src/3rdparty/chromium/third_party/icu/source/i18n/double-conversion-utils.h +index c5439ca150d..ed6fb54632b 100644 +--- a/src/3rdparty/chromium/third_party/icu/source/i18n/double-conversion-utils.h ++++ b/src/3rdparty/chromium/third_party/icu/source/i18n/double-conversion-utils.h +@@ -103,7 +103,7 @@ int main(int argc, char** argv) { + #if defined(_M_X64) || defined(__x86_64__) || \ + defined(__ARMEL__) || defined(__avr32__) || defined(_M_ARM) || defined(_M_ARM64) || \ + defined(__hppa__) || defined(__ia64__) || \ +- defined(__mips__) || \ ++ defined(__mips__) || defined(__loongarch__) || \ + defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \ + defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ + defined(__sparc__) || defined(__sparc) || defined(__s390__) || \ +diff --git a/src/3rdparty/chromium/third_party/libvpx/BUILD.gn b/src/3rdparty/chromium/third_party/libvpx/BUILD.gn +index 9b92313b41e..75f0869ae10 100644 +--- a/src/3rdparty/chromium/third_party/libvpx/BUILD.gn ++++ b/src/3rdparty/chromium/third_party/libvpx/BUILD.gn +@@ -326,6 +326,8 @@ static_library("bundled_libvpx") { + } + } else if (current_cpu == "mipsel" || current_cpu == "mips64el") { + sources = libvpx_srcs_mips ++ } else if (current_cpu == "la64") { ++ sources = libvpx_srcs_generic + } else if (current_cpu == "arm") { + if (is_chromeos) { + sources = libvpx_srcs_arm_neon_highbd +diff --git a/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vp8_rtcd.h b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vp8_rtcd.h +new file mode 100644 +index 00000000000..aa475b55faf +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vp8_rtcd.h +@@ -0,0 +1,357 @@ ++// This file is generated. Do not edit. ++#ifndef VP8_RTCD_H_ ++#define VP8_RTCD_H_ ++ ++#ifdef RTCD_C ++#define RTCD_EXTERN ++#else ++#define RTCD_EXTERN extern ++#endif ++ ++/* ++ * VP8 ++ */ ++ ++struct blockd; ++struct macroblockd; ++struct loop_filter_info; ++ ++/* Encoder forward decls */ ++struct block; ++struct macroblock; ++struct variance_vtable; ++union int_mv; ++struct yv12_buffer_config; ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++void vp8_bilinear_predict16x16_c(unsigned char* src_ptr, ++ int src_pixels_per_line, ++ int xoffset, ++ int yoffset, ++ unsigned char* dst_ptr, ++ int dst_pitch); ++#define vp8_bilinear_predict16x16 vp8_bilinear_predict16x16_c ++ ++void vp8_bilinear_predict4x4_c(unsigned char* src_ptr, ++ int src_pixels_per_line, ++ int xoffset, ++ int yoffset, ++ unsigned char* dst_ptr, ++ int dst_pitch); ++#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_c ++ ++void vp8_bilinear_predict8x4_c(unsigned char* src_ptr, ++ int src_pixels_per_line, ++ int xoffset, ++ int yoffset, ++ unsigned char* dst_ptr, ++ int dst_pitch); ++#define vp8_bilinear_predict8x4 vp8_bilinear_predict8x4_c ++ ++void vp8_bilinear_predict8x8_c(unsigned char* src_ptr, ++ int src_pixels_per_line, ++ int xoffset, ++ int yoffset, ++ unsigned char* dst_ptr, ++ int dst_pitch); ++#define vp8_bilinear_predict8x8 vp8_bilinear_predict8x8_c ++ ++void vp8_blend_b_c(unsigned char* y, ++ unsigned char* u, ++ unsigned char* v, ++ int y_1, ++ int u_1, ++ int v_1, ++ int alpha, ++ int stride); ++#define vp8_blend_b vp8_blend_b_c ++ ++void vp8_blend_mb_inner_c(unsigned char* y, ++ unsigned char* u, ++ unsigned char* v, ++ int y_1, ++ int u_1, ++ int v_1, ++ int alpha, ++ int stride); ++#define vp8_blend_mb_inner vp8_blend_mb_inner_c ++ ++void vp8_blend_mb_outer_c(unsigned char* y, ++ unsigned char* u, ++ unsigned char* v, ++ int y_1, ++ int u_1, ++ int v_1, ++ int alpha, ++ int stride); ++#define vp8_blend_mb_outer vp8_blend_mb_outer_c ++ ++int vp8_block_error_c(short* coeff, short* dqcoeff); ++#define vp8_block_error vp8_block_error_c ++ ++void vp8_copy32xn_c(const unsigned char* src_ptr, ++ int src_stride, ++ unsigned char* dst_ptr, ++ int dst_stride, ++ int height); ++#define vp8_copy32xn vp8_copy32xn_c ++ ++void vp8_copy_mem16x16_c(unsigned char* src, ++ int src_stride, ++ unsigned char* dst, ++ int dst_stride); ++#define vp8_copy_mem16x16 vp8_copy_mem16x16_c ++ ++void vp8_copy_mem8x4_c(unsigned char* src, ++ int src_stride, ++ unsigned char* dst, ++ int dst_stride); ++#define vp8_copy_mem8x4 vp8_copy_mem8x4_c ++ ++void vp8_copy_mem8x8_c(unsigned char* src, ++ int src_stride, ++ unsigned char* dst, ++ int dst_stride); ++#define vp8_copy_mem8x8 vp8_copy_mem8x8_c ++ ++void vp8_dc_only_idct_add_c(short input_dc, ++ unsigned char* pred_ptr, ++ int pred_stride, ++ unsigned char* dst_ptr, ++ int dst_stride); ++#define vp8_dc_only_idct_add vp8_dc_only_idct_add_c ++ ++int vp8_denoiser_filter_c(unsigned char* mc_running_avg_y, ++ int mc_avg_y_stride, ++ unsigned char* running_avg_y, ++ int avg_y_stride, ++ unsigned char* sig, ++ int sig_stride, ++ unsigned int motion_magnitude, ++ int increase_denoising); ++#define vp8_denoiser_filter vp8_denoiser_filter_c ++ ++int vp8_denoiser_filter_uv_c(unsigned char* mc_running_avg, ++ int mc_avg_stride, ++ unsigned char* running_avg, ++ int avg_stride, ++ unsigned char* sig, ++ int sig_stride, ++ unsigned int motion_magnitude, ++ int increase_denoising); ++#define vp8_denoiser_filter_uv vp8_denoiser_filter_uv_c ++ ++void vp8_dequant_idct_add_c(short* input, ++ short* dq, ++ unsigned char* dest, ++ int stride); ++#define vp8_dequant_idct_add vp8_dequant_idct_add_c ++ ++void vp8_dequant_idct_add_uv_block_c(short* q, ++ short* dq, ++ unsigned char* dst_u, ++ unsigned char* dst_v, ++ int stride, ++ char* eobs); ++#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_c ++ ++void vp8_dequant_idct_add_y_block_c(short* q, ++ short* dq, ++ unsigned char* dst, ++ int stride, ++ char* eobs); ++#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_c ++ ++void vp8_dequantize_b_c(struct blockd*, short* DQC); ++#define vp8_dequantize_b vp8_dequantize_b_c ++ ++int vp8_diamond_search_sad_c(struct macroblock* x, ++ struct block* b, ++ struct blockd* d, ++ union int_mv* ref_mv, ++ union int_mv* best_mv, ++ int search_param, ++ int sad_per_bit, ++ int* num00, ++ struct variance_vtable* fn_ptr, ++ int* mvcost[2], ++ union int_mv* center_mv); ++#define vp8_diamond_search_sad vp8_diamond_search_sad_c ++ ++void vp8_fast_quantize_b_c(struct block*, struct blockd*); ++#define vp8_fast_quantize_b vp8_fast_quantize_b_c ++ ++void vp8_filter_by_weight16x16_c(unsigned char* src, ++ int src_stride, ++ unsigned char* dst, ++ int dst_stride, ++ int src_weight); ++#define vp8_filter_by_weight16x16 vp8_filter_by_weight16x16_c ++ ++void vp8_filter_by_weight4x4_c(unsigned char* src, ++ int src_stride, ++ unsigned char* dst, ++ int dst_stride, ++ int src_weight); ++#define vp8_filter_by_weight4x4 vp8_filter_by_weight4x4_c ++ ++void vp8_filter_by_weight8x8_c(unsigned char* src, ++ int src_stride, ++ unsigned char* dst, ++ int dst_stride, ++ int src_weight); ++#define vp8_filter_by_weight8x8 vp8_filter_by_weight8x8_c ++ ++int vp8_full_search_sad_c(struct macroblock* x, ++ struct block* b, ++ struct blockd* d, ++ union int_mv* ref_mv, ++ int sad_per_bit, ++ int distance, ++ struct variance_vtable* fn_ptr, ++ int* mvcost[2], ++ union int_mv* center_mv); ++#define vp8_full_search_sad vp8_full_search_sad_c ++ ++void vp8_loop_filter_bh_c(unsigned char* y_ptr, ++ unsigned char* u_ptr, ++ unsigned char* v_ptr, ++ int y_stride, ++ int uv_stride, ++ struct loop_filter_info* lfi); ++#define vp8_loop_filter_bh vp8_loop_filter_bh_c ++ ++void vp8_loop_filter_bv_c(unsigned char* y_ptr, ++ unsigned char* u_ptr, ++ unsigned char* v_ptr, ++ int y_stride, ++ int uv_stride, ++ struct loop_filter_info* lfi); ++#define vp8_loop_filter_bv vp8_loop_filter_bv_c ++ ++void vp8_loop_filter_mbh_c(unsigned char* y_ptr, ++ unsigned char* u_ptr, ++ unsigned char* v_ptr, ++ int y_stride, ++ int uv_stride, ++ struct loop_filter_info* lfi); ++#define vp8_loop_filter_mbh vp8_loop_filter_mbh_c ++ ++void vp8_loop_filter_mbv_c(unsigned char* y_ptr, ++ unsigned char* u_ptr, ++ unsigned char* v_ptr, ++ int y_stride, ++ int uv_stride, ++ struct loop_filter_info* lfi); ++#define vp8_loop_filter_mbv vp8_loop_filter_mbv_c ++ ++void vp8_loop_filter_bhs_c(unsigned char* y_ptr, ++ int y_stride, ++ const unsigned char* blimit); ++#define vp8_loop_filter_simple_bh vp8_loop_filter_bhs_c ++ ++void vp8_loop_filter_bvs_c(unsigned char* y_ptr, ++ int y_stride, ++ const unsigned char* blimit); ++#define vp8_loop_filter_simple_bv vp8_loop_filter_bvs_c ++ ++void vp8_loop_filter_simple_horizontal_edge_c(unsigned char* y_ptr, ++ int y_stride, ++ const unsigned char* blimit); ++#define vp8_loop_filter_simple_mbh vp8_loop_filter_simple_horizontal_edge_c ++ ++void vp8_loop_filter_simple_vertical_edge_c(unsigned char* y_ptr, ++ int y_stride, ++ const unsigned char* blimit); ++#define vp8_loop_filter_simple_mbv vp8_loop_filter_simple_vertical_edge_c ++ ++int vp8_mbblock_error_c(struct macroblock* mb, int dc); ++#define vp8_mbblock_error vp8_mbblock_error_c ++ ++int vp8_mbuverror_c(struct macroblock* mb); ++#define vp8_mbuverror vp8_mbuverror_c ++ ++int vp8_refining_search_sad_c(struct macroblock* x, ++ struct block* b, ++ struct blockd* d, ++ union int_mv* ref_mv, ++ int error_per_bit, ++ int search_range, ++ struct variance_vtable* fn_ptr, ++ int* mvcost[2], ++ union int_mv* center_mv); ++#define vp8_refining_search_sad vp8_refining_search_sad_c ++ ++void vp8_regular_quantize_b_c(struct block*, struct blockd*); ++#define vp8_regular_quantize_b vp8_regular_quantize_b_c ++ ++void vp8_short_fdct4x4_c(short* input, short* output, int pitch); ++#define vp8_short_fdct4x4 vp8_short_fdct4x4_c ++ ++void vp8_short_fdct8x4_c(short* input, short* output, int pitch); ++#define vp8_short_fdct8x4 vp8_short_fdct8x4_c ++ ++void vp8_short_idct4x4llm_c(short* input, ++ unsigned char* pred_ptr, ++ int pred_stride, ++ unsigned char* dst_ptr, ++ int dst_stride); ++#define vp8_short_idct4x4llm vp8_short_idct4x4llm_c ++ ++void vp8_short_inv_walsh4x4_c(short* input, short* mb_dqcoeff); ++#define vp8_short_inv_walsh4x4 vp8_short_inv_walsh4x4_c ++ ++void vp8_short_inv_walsh4x4_1_c(short* input, short* mb_dqcoeff); ++#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c ++ ++void vp8_short_walsh4x4_c(short* input, short* output, int pitch); ++#define vp8_short_walsh4x4 vp8_short_walsh4x4_c ++ ++void vp8_sixtap_predict16x16_c(unsigned char* src_ptr, ++ int src_pixels_per_line, ++ int xoffset, ++ int yoffset, ++ unsigned char* dst_ptr, ++ int dst_pitch); ++#define vp8_sixtap_predict16x16 vp8_sixtap_predict16x16_c ++ ++void vp8_sixtap_predict4x4_c(unsigned char* src_ptr, ++ int src_pixels_per_line, ++ int xoffset, ++ int yoffset, ++ unsigned char* dst_ptr, ++ int dst_pitch); ++#define vp8_sixtap_predict4x4 vp8_sixtap_predict4x4_c ++ ++void vp8_sixtap_predict8x4_c(unsigned char* src_ptr, ++ int src_pixels_per_line, ++ int xoffset, ++ int yoffset, ++ unsigned char* dst_ptr, ++ int dst_pitch); ++#define vp8_sixtap_predict8x4 vp8_sixtap_predict8x4_c ++ ++void vp8_sixtap_predict8x8_c(unsigned char* src_ptr, ++ int src_pixels_per_line, ++ int xoffset, ++ int yoffset, ++ unsigned char* dst_ptr, ++ int dst_pitch); ++#define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_c ++ ++void vp8_rtcd(void); ++ ++#include "vpx_config.h" ++ ++#ifdef RTCD_C ++static void setup_rtcd_internal(void) {} ++#endif ++ ++#ifdef __cplusplus ++} // extern "C" ++#endif ++ ++#endif +diff --git a/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vp9_rtcd.h b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vp9_rtcd.h +new file mode 100644 +index 00000000000..00913931484 +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vp9_rtcd.h +@@ -0,0 +1,275 @@ ++// This file is generated. Do not edit. ++#ifndef VP9_RTCD_H_ ++#define VP9_RTCD_H_ ++ ++#ifdef RTCD_C ++#define RTCD_EXTERN ++#else ++#define RTCD_EXTERN extern ++#endif ++ ++/* ++ * VP9 ++ */ ++ ++#include "vp9/common/vp9_common.h" ++#include "vp9/common/vp9_enums.h" ++#include "vp9/common/vp9_filter.h" ++#include "vpx/vpx_integer.h" ++ ++struct macroblockd; ++ ++/* Encoder forward decls */ ++struct macroblock; ++struct vp9_variance_vtable; ++struct search_site_config; ++struct mv; ++union int_mv; ++struct yv12_buffer_config; ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++int64_t vp9_block_error_c(const tran_low_t* coeff, ++ const tran_low_t* dqcoeff, ++ intptr_t block_size, ++ int64_t* ssz); ++#define vp9_block_error vp9_block_error_c ++ ++int64_t vp9_block_error_fp_c(const tran_low_t* coeff, ++ const tran_low_t* dqcoeff, ++ int block_size); ++#define vp9_block_error_fp vp9_block_error_fp_c ++ ++int vp9_denoiser_filter_c(const uint8_t* sig, ++ int sig_stride, ++ const uint8_t* mc_avg, ++ int mc_avg_stride, ++ uint8_t* avg, ++ int avg_stride, ++ int increase_denoising, ++ BLOCK_SIZE bs, ++ int motion_magnitude); ++#define vp9_denoiser_filter vp9_denoiser_filter_c ++ ++int vp9_diamond_search_sad_c(const struct macroblock* x, ++ const struct search_site_config* cfg, ++ struct mv* ref_mv, ++ struct mv* best_mv, ++ int search_param, ++ int sad_per_bit, ++ int* num00, ++ const struct vp9_variance_vtable* fn_ptr, ++ const struct mv* center_mv); ++#define vp9_diamond_search_sad vp9_diamond_search_sad_c ++ ++void vp9_fht16x16_c(const int16_t* input, ++ tran_low_t* output, ++ int stride, ++ int tx_type); ++#define vp9_fht16x16 vp9_fht16x16_c ++ ++void vp9_fht4x4_c(const int16_t* input, ++ tran_low_t* output, ++ int stride, ++ int tx_type); ++#define vp9_fht4x4 vp9_fht4x4_c ++ ++void vp9_fht8x8_c(const int16_t* input, ++ tran_low_t* output, ++ int stride, ++ int tx_type); ++#define vp9_fht8x8 vp9_fht8x8_c ++ ++void vp9_filter_by_weight16x16_c(const uint8_t* src, ++ int src_stride, ++ uint8_t* dst, ++ int dst_stride, ++ int src_weight); ++#define vp9_filter_by_weight16x16 vp9_filter_by_weight16x16_c ++ ++void vp9_filter_by_weight8x8_c(const uint8_t* src, ++ int src_stride, ++ uint8_t* dst, ++ int dst_stride, ++ int src_weight); ++#define vp9_filter_by_weight8x8 vp9_filter_by_weight8x8_c ++ ++void vp9_fwht4x4_c(const int16_t* input, tran_low_t* output, int stride); ++#define vp9_fwht4x4 vp9_fwht4x4_c ++ ++int64_t vp9_highbd_block_error_c(const tran_low_t* coeff, ++ const tran_low_t* dqcoeff, ++ intptr_t block_size, ++ int64_t* ssz, ++ int bd); ++#define vp9_highbd_block_error vp9_highbd_block_error_c ++ ++void vp9_highbd_fht16x16_c(const int16_t* input, ++ tran_low_t* output, ++ int stride, ++ int tx_type); ++#define vp9_highbd_fht16x16 vp9_highbd_fht16x16_c ++ ++void vp9_highbd_fht4x4_c(const int16_t* input, ++ tran_low_t* output, ++ int stride, ++ int tx_type); ++#define vp9_highbd_fht4x4 vp9_highbd_fht4x4_c ++ ++void vp9_highbd_fht8x8_c(const int16_t* input, ++ tran_low_t* output, ++ int stride, ++ int tx_type); ++#define vp9_highbd_fht8x8 vp9_highbd_fht8x8_c ++ ++void vp9_highbd_fwht4x4_c(const int16_t* input, tran_low_t* output, int stride); ++#define vp9_highbd_fwht4x4 vp9_highbd_fwht4x4_c ++ ++void vp9_highbd_iht16x16_256_add_c(const tran_low_t* input, ++ uint16_t* dest, ++ int stride, ++ int tx_type, ++ int bd); ++#define vp9_highbd_iht16x16_256_add vp9_highbd_iht16x16_256_add_c ++ ++void vp9_highbd_iht4x4_16_add_c(const tran_low_t* input, ++ uint16_t* dest, ++ int stride, ++ int tx_type, ++ int bd); ++#define vp9_highbd_iht4x4_16_add vp9_highbd_iht4x4_16_add_c ++ ++void vp9_highbd_iht8x8_64_add_c(const tran_low_t* input, ++ uint16_t* dest, ++ int stride, ++ int tx_type, ++ int bd); ++#define vp9_highbd_iht8x8_64_add vp9_highbd_iht8x8_64_add_c ++ ++void vp9_highbd_mbpost_proc_across_ip_c(uint16_t* src, ++ int pitch, ++ int rows, ++ int cols, ++ int flimit); ++#define vp9_highbd_mbpost_proc_across_ip vp9_highbd_mbpost_proc_across_ip_c ++ ++void vp9_highbd_mbpost_proc_down_c(uint16_t* dst, ++ int pitch, ++ int rows, ++ int cols, ++ int flimit); ++#define vp9_highbd_mbpost_proc_down vp9_highbd_mbpost_proc_down_c ++ ++void vp9_highbd_post_proc_down_and_across_c(const uint16_t* src_ptr, ++ uint16_t* dst_ptr, ++ int src_pixels_per_line, ++ int dst_pixels_per_line, ++ int rows, ++ int cols, ++ int flimit); ++#define vp9_highbd_post_proc_down_and_across \ ++ vp9_highbd_post_proc_down_and_across_c ++ ++void vp9_highbd_quantize_fp_c(const tran_low_t* coeff_ptr, ++ intptr_t n_coeffs, ++ int skip_block, ++ const int16_t* round_ptr, ++ const int16_t* quant_ptr, ++ tran_low_t* qcoeff_ptr, ++ tran_low_t* dqcoeff_ptr, ++ const int16_t* dequant_ptr, ++ uint16_t* eob_ptr, ++ const int16_t* scan, ++ const int16_t* iscan); ++#define vp9_highbd_quantize_fp vp9_highbd_quantize_fp_c ++ ++void vp9_highbd_quantize_fp_32x32_c(const tran_low_t* coeff_ptr, ++ intptr_t n_coeffs, ++ int skip_block, ++ const int16_t* round_ptr, ++ const int16_t* quant_ptr, ++ tran_low_t* qcoeff_ptr, ++ tran_low_t* dqcoeff_ptr, ++ const int16_t* dequant_ptr, ++ uint16_t* eob_ptr, ++ const int16_t* scan, ++ const int16_t* iscan); ++#define vp9_highbd_quantize_fp_32x32 vp9_highbd_quantize_fp_32x32_c ++ ++void vp9_highbd_temporal_filter_apply_c(const uint8_t* frame1, ++ unsigned int stride, ++ const uint8_t* frame2, ++ unsigned int block_width, ++ unsigned int block_height, ++ int strength, ++ int* blk_fw, ++ int use_32x32, ++ uint32_t* accumulator, ++ uint16_t* count); ++#define vp9_highbd_temporal_filter_apply vp9_highbd_temporal_filter_apply_c ++ ++void vp9_iht16x16_256_add_c(const tran_low_t* input, ++ uint8_t* dest, ++ int stride, ++ int tx_type); ++#define vp9_iht16x16_256_add vp9_iht16x16_256_add_c ++ ++void vp9_iht4x4_16_add_c(const tran_low_t* input, ++ uint8_t* dest, ++ int stride, ++ int tx_type); ++#define vp9_iht4x4_16_add vp9_iht4x4_16_add_c ++ ++void vp9_iht8x8_64_add_c(const tran_low_t* input, ++ uint8_t* dest, ++ int stride, ++ int tx_type); ++#define vp9_iht8x8_64_add vp9_iht8x8_64_add_c ++ ++void vp9_quantize_fp_c(const tran_low_t* coeff_ptr, ++ intptr_t n_coeffs, ++ int skip_block, ++ const int16_t* round_ptr, ++ const int16_t* quant_ptr, ++ tran_low_t* qcoeff_ptr, ++ tran_low_t* dqcoeff_ptr, ++ const int16_t* dequant_ptr, ++ uint16_t* eob_ptr, ++ const int16_t* scan, ++ const int16_t* iscan); ++#define vp9_quantize_fp vp9_quantize_fp_c ++ ++void vp9_quantize_fp_32x32_c(const tran_low_t* coeff_ptr, ++ intptr_t n_coeffs, ++ int skip_block, ++ const int16_t* round_ptr, ++ const int16_t* quant_ptr, ++ tran_low_t* qcoeff_ptr, ++ tran_low_t* dqcoeff_ptr, ++ const int16_t* dequant_ptr, ++ uint16_t* eob_ptr, ++ const int16_t* scan, ++ const int16_t* iscan); ++#define vp9_quantize_fp_32x32 vp9_quantize_fp_32x32_c ++ ++void vp9_scale_and_extend_frame_c(const struct yv12_buffer_config* src, ++ struct yv12_buffer_config* dst, ++ INTERP_FILTER filter_type, ++ int phase_scaler); ++#define vp9_scale_and_extend_frame vp9_scale_and_extend_frame_c ++ ++void vp9_rtcd(void); ++ ++#include "vpx_config.h" ++ ++#ifdef RTCD_C ++static void setup_rtcd_internal(void) {} ++#endif ++ ++#ifdef __cplusplus ++} // extern "C" ++#endif ++ ++#endif +diff --git a/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_config.asm b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_config.asm +new file mode 100644 +index 00000000000..00712e52bbb +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_config.asm +@@ -0,0 +1,98 @@ ++@ This file was created from a .asm file ++@ using the ads2gas.pl script. ++ .syntax unified ++.equ VPX_ARCH_ARM , 0 ++.equ ARCH_ARM , 0 ++.equ VPX_ARCH_MIPS , 0 ++.equ ARCH_MIPS , 0 ++.equ VPX_ARCH_X86 , 0 ++.equ ARCH_X86 , 0 ++.equ VPX_ARCH_X86_64 , 0 ++.equ ARCH_X86_64 , 0 ++.equ VPX_ARCH_PPC , 0 ++.equ ARCH_PPC , 0 ++.equ HAVE_NEON , 0 ++.equ HAVE_NEON_ASM , 0 ++.equ HAVE_MIPS32 , 0 ++.equ HAVE_DSPR2 , 0 ++.equ HAVE_MSA , 0 ++.equ HAVE_MIPS64 , 0 ++.equ HAVE_MMX , 0 ++.equ HAVE_SSE , 0 ++.equ HAVE_SSE2 , 0 ++.equ HAVE_SSE3 , 0 ++.equ HAVE_SSSE3 , 0 ++.equ HAVE_SSE4_1 , 0 ++.equ HAVE_AVX , 0 ++.equ HAVE_AVX2 , 0 ++.equ HAVE_AVX512 , 0 ++.equ HAVE_VSX , 0 ++.equ HAVE_MMI , 0 ++.equ HAVE_VPX_PORTS , 1 ++.equ HAVE_PTHREAD_H , 1 ++.equ HAVE_UNISTD_H , 0 ++.equ CONFIG_DEPENDENCY_TRACKING , 1 ++.equ CONFIG_EXTERNAL_BUILD , 1 ++.equ CONFIG_INSTALL_DOCS , 0 ++.equ CONFIG_INSTALL_BINS , 1 ++.equ CONFIG_INSTALL_LIBS , 1 ++.equ CONFIG_INSTALL_SRCS , 0 ++.equ CONFIG_DEBUG , 0 ++.equ CONFIG_GPROF , 0 ++.equ CONFIG_GCOV , 0 ++.equ CONFIG_RVCT , 0 ++.equ CONFIG_GCC , 1 ++.equ CONFIG_MSVS , 0 ++.equ CONFIG_PIC , 0 ++.equ CONFIG_BIG_ENDIAN , 0 ++.equ CONFIG_CODEC_SRCS , 0 ++.equ CONFIG_DEBUG_LIBS , 0 ++.equ CONFIG_DEQUANT_TOKENS , 0 ++.equ CONFIG_DC_RECON , 0 ++.equ CONFIG_RUNTIME_CPU_DETECT , 0 ++.equ CONFIG_POSTPROC , 1 ++.equ CONFIG_VP9_POSTPROC , 1 ++.equ CONFIG_MULTITHREAD , 1 ++.equ CONFIG_INTERNAL_STATS , 0 ++.equ CONFIG_VP8_ENCODER , 1 ++.equ CONFIG_VP8_DECODER , 1 ++.equ CONFIG_VP9_ENCODER , 1 ++.equ CONFIG_VP9_DECODER , 1 ++.equ CONFIG_VP8 , 1 ++.equ CONFIG_VP9 , 1 ++.equ CONFIG_ENCODERS , 1 ++.equ CONFIG_DECODERS , 1 ++.equ CONFIG_STATIC_MSVCRT , 0 ++.equ CONFIG_SPATIAL_RESAMPLING , 1 ++.equ CONFIG_REALTIME_ONLY , 1 ++.equ CONFIG_ONTHEFLY_BITPACKING , 0 ++.equ CONFIG_ERROR_CONCEALMENT , 0 ++.equ CONFIG_SHARED , 0 ++.equ CONFIG_STATIC , 1 ++.equ CONFIG_SMALL , 0 ++.equ CONFIG_POSTPROC_VISUALIZER , 0 ++.equ CONFIG_OS_SUPPORT , 1 ++.equ CONFIG_UNIT_TESTS , 1 ++.equ CONFIG_WEBM_IO , 1 ++.equ CONFIG_LIBYUV , 0 ++.equ CONFIG_DECODE_PERF_TESTS , 0 ++.equ CONFIG_ENCODE_PERF_TESTS , 0 ++.equ CONFIG_MULTI_RES_ENCODING , 1 ++.equ CONFIG_TEMPORAL_DENOISING , 1 ++.equ CONFIG_VP9_TEMPORAL_DENOISING , 1 ++.equ CONFIG_CONSISTENT_RECODE , 0 ++.equ CONFIG_COEFFICIENT_RANGE_CHECKING , 0 ++.equ CONFIG_VP9_HIGHBITDEPTH , 1 ++.equ CONFIG_BETTER_HW_COMPATIBILITY , 0 ++.equ CONFIG_EXPERIMENTAL , 0 ++.equ CONFIG_SIZE_LIMIT , 1 ++.equ CONFIG_ALWAYS_ADJUST_BPM , 0 ++.equ CONFIG_BITSTREAM_DEBUG , 0 ++.equ CONFIG_MISMATCH_DEBUG , 0 ++.equ CONFIG_FP_MB_STATS , 0 ++.equ CONFIG_EMULATE_HARDWARE , 0 ++.equ CONFIG_NON_GREEDY_MV , 0 ++.equ CONFIG_RATE_CTRL , 0 ++.equ DECODE_WIDTH_LIMIT , 16384 ++.equ DECODE_HEIGHT_LIMIT , 16384 ++ .section .note.GNU-stack,"",%progbits +diff --git a/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_config.c b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_config.c +new file mode 100644 +index 00000000000..8aad25ff174 +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_config.c +@@ -0,0 +1,10 @@ ++/* Copyright (c) 2011 The WebM project authors. All Rights Reserved. */ ++/* */ ++/* Use of this source code is governed by a BSD-style license */ ++/* that can be found in the LICENSE file in the root of the source */ ++/* tree. An additional intellectual property rights grant can be found */ ++/* in the file PATENTS. All contributing project authors may */ ++/* be found in the AUTHORS file in the root of the source tree. */ ++#include "vpx/vpx_codec.h" ++static const char* const cfg = "--target=generic-gnu --enable-vp9-highbitdepth --enable-external-build --enable-postproc --enable-multi-res-encoding --enable-temporal-denoising --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384 --enable-realtime-only --disable-install-docs --disable-libyuv"; ++const char *vpx_codec_build_config(void) {return cfg;} +diff --git a/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_config.h b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_config.h +new file mode 100644 +index 00000000000..fddb76bd2f9 +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_config.h +@@ -0,0 +1,107 @@ ++/* Copyright (c) 2011 The WebM project authors. All Rights Reserved. */ ++/* */ ++/* Use of this source code is governed by a BSD-style license */ ++/* that can be found in the LICENSE file in the root of the source */ ++/* tree. An additional intellectual property rights grant can be found */ ++/* in the file PATENTS. All contributing project authors may */ ++/* be found in the AUTHORS file in the root of the source tree. */ ++/* This file automatically generated by configure. Do not edit! */ ++#ifndef VPX_CONFIG_H ++#define VPX_CONFIG_H ++#define RESTRICT ++#define INLINE inline ++#define VPX_ARCH_ARM 0 ++#define ARCH_ARM 0 ++#define VPX_ARCH_MIPS 0 ++#define ARCH_MIPS 0 ++#define VPX_ARCH_X86 0 ++#define ARCH_X86 0 ++#define VPX_ARCH_X86_64 0 ++#define ARCH_X86_64 0 ++#define VPX_ARCH_PPC 0 ++#define ARCH_PPC 0 ++#define HAVE_NEON 0 ++#define HAVE_NEON_ASM 0 ++#define HAVE_MIPS32 0 ++#define HAVE_DSPR2 0 ++#define HAVE_MSA 0 ++#define HAVE_MIPS64 0 ++#define HAVE_MMX 0 ++#define HAVE_SSE 0 ++#define HAVE_SSE2 0 ++#define HAVE_SSE3 0 ++#define HAVE_SSSE3 0 ++#define HAVE_SSE4_1 0 ++#define HAVE_AVX 0 ++#define HAVE_AVX2 0 ++#define HAVE_AVX512 0 ++#define HAVE_VSX 0 ++#define HAVE_MMI 0 ++#define HAVE_VPX_PORTS 1 ++#define HAVE_PTHREAD_H 1 ++#define HAVE_UNISTD_H 0 ++#define CONFIG_DEPENDENCY_TRACKING 1 ++#define CONFIG_EXTERNAL_BUILD 1 ++#define CONFIG_INSTALL_DOCS 0 ++#define CONFIG_INSTALL_BINS 1 ++#define CONFIG_INSTALL_LIBS 1 ++#define CONFIG_INSTALL_SRCS 0 ++#define CONFIG_DEBUG 0 ++#define CONFIG_GPROF 0 ++#define CONFIG_GCOV 0 ++#define CONFIG_RVCT 0 ++#define CONFIG_GCC 1 ++#define CONFIG_MSVS 0 ++#define CONFIG_PIC 0 ++#define CONFIG_BIG_ENDIAN 0 ++#define CONFIG_CODEC_SRCS 0 ++#define CONFIG_DEBUG_LIBS 0 ++#define CONFIG_DEQUANT_TOKENS 0 ++#define CONFIG_DC_RECON 0 ++#define CONFIG_RUNTIME_CPU_DETECT 0 ++#define CONFIG_POSTPROC 1 ++#define CONFIG_VP9_POSTPROC 1 ++#define CONFIG_MULTITHREAD 1 ++#define CONFIG_INTERNAL_STATS 0 ++#define CONFIG_VP8_ENCODER 1 ++#define CONFIG_VP8_DECODER 1 ++#define CONFIG_VP9_ENCODER 1 ++#define CONFIG_VP9_DECODER 1 ++#define CONFIG_VP8 1 ++#define CONFIG_VP9 1 ++#define CONFIG_ENCODERS 1 ++#define CONFIG_DECODERS 1 ++#define CONFIG_STATIC_MSVCRT 0 ++#define CONFIG_SPATIAL_RESAMPLING 1 ++#define CONFIG_REALTIME_ONLY 1 ++#define CONFIG_ONTHEFLY_BITPACKING 0 ++#define CONFIG_ERROR_CONCEALMENT 0 ++#define CONFIG_SHARED 0 ++#define CONFIG_STATIC 1 ++#define CONFIG_SMALL 0 ++#define CONFIG_POSTPROC_VISUALIZER 0 ++#define CONFIG_OS_SUPPORT 1 ++#define CONFIG_UNIT_TESTS 1 ++#define CONFIG_WEBM_IO 1 ++#define CONFIG_LIBYUV 0 ++#define CONFIG_DECODE_PERF_TESTS 0 ++#define CONFIG_ENCODE_PERF_TESTS 0 ++#define CONFIG_MULTI_RES_ENCODING 1 ++#define CONFIG_TEMPORAL_DENOISING 1 ++#define CONFIG_VP9_TEMPORAL_DENOISING 1 ++#define CONFIG_CONSISTENT_RECODE 0 ++#define CONFIG_COEFFICIENT_RANGE_CHECKING 0 ++#define CONFIG_VP9_HIGHBITDEPTH 1 ++#define CONFIG_BETTER_HW_COMPATIBILITY 0 ++#define CONFIG_EXPERIMENTAL 0 ++#define CONFIG_SIZE_LIMIT 1 ++#define CONFIG_ALWAYS_ADJUST_BPM 0 ++#define CONFIG_BITSTREAM_DEBUG 0 ++#define CONFIG_MISMATCH_DEBUG 0 ++#define CONFIG_FP_MB_STATS 0 ++#define CONFIG_EMULATE_HARDWARE 0 ++#define CONFIG_NON_GREEDY_MV 0 ++#define CONFIG_RATE_CTRL 0 ++#define DECODE_WIDTH_LIMIT 16384 ++#define DECODE_HEIGHT_LIMIT 16384 ++#endif /* VPX_CONFIG_H */ +diff --git a/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_dsp_rtcd.h b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_dsp_rtcd.h +new file mode 100644 +index 00000000000..8ba4d88055d +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_dsp_rtcd.h +@@ -0,0 +1,3868 @@ ++// This file is generated. Do not edit. ++#ifndef VPX_DSP_RTCD_H_ ++#define VPX_DSP_RTCD_H_ ++ ++#ifdef RTCD_C ++#define RTCD_EXTERN ++#else ++#define RTCD_EXTERN extern ++#endif ++ ++/* ++ * DSP ++ */ ++ ++#include "vpx/vpx_integer.h" ++#include "vpx_dsp/vpx_dsp_common.h" ++#include "vpx_dsp/vpx_filter.h" ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++unsigned int vpx_avg_4x4_c(const uint8_t*, int p); ++#define vpx_avg_4x4 vpx_avg_4x4_c ++ ++unsigned int vpx_avg_8x8_c(const uint8_t*, int p); ++#define vpx_avg_8x8 vpx_avg_8x8_c ++ ++void vpx_comp_avg_pred_c(uint8_t* comp_pred, ++ const uint8_t* pred, ++ int width, ++ int height, ++ const uint8_t* ref, ++ int ref_stride); ++#define vpx_comp_avg_pred vpx_comp_avg_pred_c ++ ++void vpx_convolve8_c(const uint8_t* src, ++ ptrdiff_t src_stride, ++ uint8_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h); ++#define vpx_convolve8 vpx_convolve8_c ++ ++void vpx_convolve8_avg_c(const uint8_t* src, ++ ptrdiff_t src_stride, ++ uint8_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h); ++#define vpx_convolve8_avg vpx_convolve8_avg_c ++ ++void vpx_convolve8_avg_horiz_c(const uint8_t* src, ++ ptrdiff_t src_stride, ++ uint8_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h); ++#define vpx_convolve8_avg_horiz vpx_convolve8_avg_horiz_c ++ ++void vpx_convolve8_avg_vert_c(const uint8_t* src, ++ ptrdiff_t src_stride, ++ uint8_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h); ++#define vpx_convolve8_avg_vert vpx_convolve8_avg_vert_c ++ ++void vpx_convolve8_horiz_c(const uint8_t* src, ++ ptrdiff_t src_stride, ++ uint8_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h); ++#define vpx_convolve8_horiz vpx_convolve8_horiz_c ++ ++void vpx_convolve8_vert_c(const uint8_t* src, ++ ptrdiff_t src_stride, ++ uint8_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h); ++#define vpx_convolve8_vert vpx_convolve8_vert_c ++ ++void vpx_convolve_avg_c(const uint8_t* src, ++ ptrdiff_t src_stride, ++ uint8_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h); ++#define vpx_convolve_avg vpx_convolve_avg_c ++ ++void vpx_convolve_copy_c(const uint8_t* src, ++ ptrdiff_t src_stride, ++ uint8_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h); ++#define vpx_convolve_copy vpx_convolve_copy_c ++ ++void vpx_d117_predictor_16x16_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c ++ ++void vpx_d117_predictor_32x32_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c ++ ++void vpx_d117_predictor_4x4_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c ++ ++void vpx_d117_predictor_8x8_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c ++ ++void vpx_d135_predictor_16x16_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c ++ ++void vpx_d135_predictor_32x32_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c ++ ++void vpx_d135_predictor_4x4_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d135_predictor_4x4 vpx_d135_predictor_4x4_c ++ ++void vpx_d135_predictor_8x8_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c ++ ++void vpx_d153_predictor_16x16_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d153_predictor_16x16 vpx_d153_predictor_16x16_c ++ ++void vpx_d153_predictor_32x32_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d153_predictor_32x32 vpx_d153_predictor_32x32_c ++ ++void vpx_d153_predictor_4x4_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d153_predictor_4x4 vpx_d153_predictor_4x4_c ++ ++void vpx_d153_predictor_8x8_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d153_predictor_8x8 vpx_d153_predictor_8x8_c ++ ++void vpx_d207_predictor_16x16_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d207_predictor_16x16 vpx_d207_predictor_16x16_c ++ ++void vpx_d207_predictor_32x32_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d207_predictor_32x32 vpx_d207_predictor_32x32_c ++ ++void vpx_d207_predictor_4x4_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d207_predictor_4x4 vpx_d207_predictor_4x4_c ++ ++void vpx_d207_predictor_8x8_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c ++ ++void vpx_d45_predictor_16x16_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_c ++ ++void vpx_d45_predictor_32x32_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d45_predictor_32x32 vpx_d45_predictor_32x32_c ++ ++void vpx_d45_predictor_4x4_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d45_predictor_4x4 vpx_d45_predictor_4x4_c ++ ++void vpx_d45_predictor_8x8_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_c ++ ++void vpx_d45e_predictor_4x4_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c ++ ++void vpx_d63_predictor_16x16_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c ++ ++void vpx_d63_predictor_32x32_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d63_predictor_32x32 vpx_d63_predictor_32x32_c ++ ++void vpx_d63_predictor_4x4_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d63_predictor_4x4 vpx_d63_predictor_4x4_c ++ ++void vpx_d63_predictor_8x8_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c ++ ++void vpx_d63e_predictor_4x4_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c ++ ++void vpx_dc_128_predictor_16x16_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_c ++ ++void vpx_dc_128_predictor_32x32_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_dc_128_predictor_32x32 vpx_dc_128_predictor_32x32_c ++ ++void vpx_dc_128_predictor_4x4_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_dc_128_predictor_4x4 vpx_dc_128_predictor_4x4_c ++ ++void vpx_dc_128_predictor_8x8_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_dc_128_predictor_8x8 vpx_dc_128_predictor_8x8_c ++ ++void vpx_dc_left_predictor_16x16_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_dc_left_predictor_16x16 vpx_dc_left_predictor_16x16_c ++ ++void vpx_dc_left_predictor_32x32_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_dc_left_predictor_32x32 vpx_dc_left_predictor_32x32_c ++ ++void vpx_dc_left_predictor_4x4_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_dc_left_predictor_4x4 vpx_dc_left_predictor_4x4_c ++ ++void vpx_dc_left_predictor_8x8_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_dc_left_predictor_8x8 vpx_dc_left_predictor_8x8_c ++ ++void vpx_dc_predictor_16x16_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_dc_predictor_16x16 vpx_dc_predictor_16x16_c ++ ++void vpx_dc_predictor_32x32_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_dc_predictor_32x32 vpx_dc_predictor_32x32_c ++ ++void vpx_dc_predictor_4x4_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_dc_predictor_4x4 vpx_dc_predictor_4x4_c ++ ++void vpx_dc_predictor_8x8_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_dc_predictor_8x8 vpx_dc_predictor_8x8_c ++ ++void vpx_dc_top_predictor_16x16_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_dc_top_predictor_16x16 vpx_dc_top_predictor_16x16_c ++ ++void vpx_dc_top_predictor_32x32_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_dc_top_predictor_32x32 vpx_dc_top_predictor_32x32_c ++ ++void vpx_dc_top_predictor_4x4_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_dc_top_predictor_4x4 vpx_dc_top_predictor_4x4_c ++ ++void vpx_dc_top_predictor_8x8_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_dc_top_predictor_8x8 vpx_dc_top_predictor_8x8_c ++ ++void vpx_fdct16x16_c(const int16_t* input, tran_low_t* output, int stride); ++#define vpx_fdct16x16 vpx_fdct16x16_c ++ ++void vpx_fdct16x16_1_c(const int16_t* input, tran_low_t* output, int stride); ++#define vpx_fdct16x16_1 vpx_fdct16x16_1_c ++ ++void vpx_fdct32x32_c(const int16_t* input, tran_low_t* output, int stride); ++#define vpx_fdct32x32 vpx_fdct32x32_c ++ ++void vpx_fdct32x32_1_c(const int16_t* input, tran_low_t* output, int stride); ++#define vpx_fdct32x32_1 vpx_fdct32x32_1_c ++ ++void vpx_fdct32x32_rd_c(const int16_t* input, tran_low_t* output, int stride); ++#define vpx_fdct32x32_rd vpx_fdct32x32_rd_c ++ ++void vpx_fdct4x4_c(const int16_t* input, tran_low_t* output, int stride); ++#define vpx_fdct4x4 vpx_fdct4x4_c ++ ++void vpx_fdct4x4_1_c(const int16_t* input, tran_low_t* output, int stride); ++#define vpx_fdct4x4_1 vpx_fdct4x4_1_c ++ ++void vpx_fdct8x8_c(const int16_t* input, tran_low_t* output, int stride); ++#define vpx_fdct8x8 vpx_fdct8x8_c ++ ++void vpx_fdct8x8_1_c(const int16_t* input, tran_low_t* output, int stride); ++#define vpx_fdct8x8_1 vpx_fdct8x8_1_c ++ ++void vpx_get16x16var_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse, ++ int* sum); ++#define vpx_get16x16var vpx_get16x16var_c ++ ++unsigned int vpx_get4x4sse_cs_c(const unsigned char* src_ptr, ++ int src_stride, ++ const unsigned char* ref_ptr, ++ int ref_stride); ++#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c ++ ++void vpx_get8x8var_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse, ++ int* sum); ++#define vpx_get8x8var vpx_get8x8var_c ++ ++unsigned int vpx_get_mb_ss_c(const int16_t*); ++#define vpx_get_mb_ss vpx_get_mb_ss_c ++ ++void vpx_h_predictor_16x16_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_h_predictor_16x16 vpx_h_predictor_16x16_c ++ ++void vpx_h_predictor_32x32_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_h_predictor_32x32 vpx_h_predictor_32x32_c ++ ++void vpx_h_predictor_4x4_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_h_predictor_4x4 vpx_h_predictor_4x4_c ++ ++void vpx_h_predictor_8x8_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_h_predictor_8x8 vpx_h_predictor_8x8_c ++ ++void vpx_hadamard_16x16_c(const int16_t* src_diff, ++ ptrdiff_t src_stride, ++ tran_low_t* coeff); ++#define vpx_hadamard_16x16 vpx_hadamard_16x16_c ++ ++void vpx_hadamard_32x32_c(const int16_t* src_diff, ++ ptrdiff_t src_stride, ++ tran_low_t* coeff); ++#define vpx_hadamard_32x32 vpx_hadamard_32x32_c ++ ++void vpx_hadamard_8x8_c(const int16_t* src_diff, ++ ptrdiff_t src_stride, ++ tran_low_t* coeff); ++#define vpx_hadamard_8x8 vpx_hadamard_8x8_c ++ ++void vpx_he_predictor_4x4_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c ++ ++void vpx_highbd_10_get16x16var_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse, ++ int* sum); ++#define vpx_highbd_10_get16x16var vpx_highbd_10_get16x16var_c ++ ++void vpx_highbd_10_get8x8var_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse, ++ int* sum); ++#define vpx_highbd_10_get8x8var vpx_highbd_10_get8x8var_c ++ ++unsigned int vpx_highbd_10_mse16x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_10_mse16x16 vpx_highbd_10_mse16x16_c ++ ++unsigned int vpx_highbd_10_mse16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_10_mse16x8 vpx_highbd_10_mse16x8_c ++ ++unsigned int vpx_highbd_10_mse8x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_10_mse8x16 vpx_highbd_10_mse8x16_c ++ ++unsigned int vpx_highbd_10_mse8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_10_mse8x8 vpx_highbd_10_mse8x8_c ++ ++uint32_t vpx_highbd_10_sub_pixel_avg_variance16x16_c( ++ const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_10_sub_pixel_avg_variance16x16 \ ++ vpx_highbd_10_sub_pixel_avg_variance16x16_c ++ ++uint32_t vpx_highbd_10_sub_pixel_avg_variance16x32_c( ++ const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_10_sub_pixel_avg_variance16x32 \ ++ vpx_highbd_10_sub_pixel_avg_variance16x32_c ++ ++uint32_t vpx_highbd_10_sub_pixel_avg_variance16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_10_sub_pixel_avg_variance16x8 \ ++ vpx_highbd_10_sub_pixel_avg_variance16x8_c ++ ++uint32_t vpx_highbd_10_sub_pixel_avg_variance32x16_c( ++ const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_10_sub_pixel_avg_variance32x16 \ ++ vpx_highbd_10_sub_pixel_avg_variance32x16_c ++ ++uint32_t vpx_highbd_10_sub_pixel_avg_variance32x32_c( ++ const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_10_sub_pixel_avg_variance32x32 \ ++ vpx_highbd_10_sub_pixel_avg_variance32x32_c ++ ++uint32_t vpx_highbd_10_sub_pixel_avg_variance32x64_c( ++ const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_10_sub_pixel_avg_variance32x64 \ ++ vpx_highbd_10_sub_pixel_avg_variance32x64_c ++ ++uint32_t vpx_highbd_10_sub_pixel_avg_variance4x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_10_sub_pixel_avg_variance4x4 \ ++ vpx_highbd_10_sub_pixel_avg_variance4x4_c ++ ++uint32_t vpx_highbd_10_sub_pixel_avg_variance4x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_10_sub_pixel_avg_variance4x8 \ ++ vpx_highbd_10_sub_pixel_avg_variance4x8_c ++ ++uint32_t vpx_highbd_10_sub_pixel_avg_variance64x32_c( ++ const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_10_sub_pixel_avg_variance64x32 \ ++ vpx_highbd_10_sub_pixel_avg_variance64x32_c ++ ++uint32_t vpx_highbd_10_sub_pixel_avg_variance64x64_c( ++ const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_10_sub_pixel_avg_variance64x64 \ ++ vpx_highbd_10_sub_pixel_avg_variance64x64_c ++ ++uint32_t vpx_highbd_10_sub_pixel_avg_variance8x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_10_sub_pixel_avg_variance8x16 \ ++ vpx_highbd_10_sub_pixel_avg_variance8x16_c ++ ++uint32_t vpx_highbd_10_sub_pixel_avg_variance8x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_10_sub_pixel_avg_variance8x4 \ ++ vpx_highbd_10_sub_pixel_avg_variance8x4_c ++ ++uint32_t vpx_highbd_10_sub_pixel_avg_variance8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_10_sub_pixel_avg_variance8x8 \ ++ vpx_highbd_10_sub_pixel_avg_variance8x8_c ++ ++uint32_t vpx_highbd_10_sub_pixel_variance16x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_10_sub_pixel_variance16x16 \ ++ vpx_highbd_10_sub_pixel_variance16x16_c ++ ++uint32_t vpx_highbd_10_sub_pixel_variance16x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_10_sub_pixel_variance16x32 \ ++ vpx_highbd_10_sub_pixel_variance16x32_c ++ ++uint32_t vpx_highbd_10_sub_pixel_variance16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_10_sub_pixel_variance16x8 \ ++ vpx_highbd_10_sub_pixel_variance16x8_c ++ ++uint32_t vpx_highbd_10_sub_pixel_variance32x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_10_sub_pixel_variance32x16 \ ++ vpx_highbd_10_sub_pixel_variance32x16_c ++ ++uint32_t vpx_highbd_10_sub_pixel_variance32x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_10_sub_pixel_variance32x32 \ ++ vpx_highbd_10_sub_pixel_variance32x32_c ++ ++uint32_t vpx_highbd_10_sub_pixel_variance32x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_10_sub_pixel_variance32x64 \ ++ vpx_highbd_10_sub_pixel_variance32x64_c ++ ++uint32_t vpx_highbd_10_sub_pixel_variance4x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_10_sub_pixel_variance4x4 \ ++ vpx_highbd_10_sub_pixel_variance4x4_c ++ ++uint32_t vpx_highbd_10_sub_pixel_variance4x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_10_sub_pixel_variance4x8 \ ++ vpx_highbd_10_sub_pixel_variance4x8_c ++ ++uint32_t vpx_highbd_10_sub_pixel_variance64x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_10_sub_pixel_variance64x32 \ ++ vpx_highbd_10_sub_pixel_variance64x32_c ++ ++uint32_t vpx_highbd_10_sub_pixel_variance64x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_10_sub_pixel_variance64x64 \ ++ vpx_highbd_10_sub_pixel_variance64x64_c ++ ++uint32_t vpx_highbd_10_sub_pixel_variance8x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_10_sub_pixel_variance8x16 \ ++ vpx_highbd_10_sub_pixel_variance8x16_c ++ ++uint32_t vpx_highbd_10_sub_pixel_variance8x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_10_sub_pixel_variance8x4 \ ++ vpx_highbd_10_sub_pixel_variance8x4_c ++ ++uint32_t vpx_highbd_10_sub_pixel_variance8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_10_sub_pixel_variance8x8 \ ++ vpx_highbd_10_sub_pixel_variance8x8_c ++ ++unsigned int vpx_highbd_10_variance16x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_10_variance16x16 vpx_highbd_10_variance16x16_c ++ ++unsigned int vpx_highbd_10_variance16x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_10_variance16x32 vpx_highbd_10_variance16x32_c ++ ++unsigned int vpx_highbd_10_variance16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_10_variance16x8 vpx_highbd_10_variance16x8_c ++ ++unsigned int vpx_highbd_10_variance32x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_10_variance32x16 vpx_highbd_10_variance32x16_c ++ ++unsigned int vpx_highbd_10_variance32x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_10_variance32x32 vpx_highbd_10_variance32x32_c ++ ++unsigned int vpx_highbd_10_variance32x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_10_variance32x64 vpx_highbd_10_variance32x64_c ++ ++unsigned int vpx_highbd_10_variance4x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_10_variance4x4 vpx_highbd_10_variance4x4_c ++ ++unsigned int vpx_highbd_10_variance4x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_10_variance4x8 vpx_highbd_10_variance4x8_c ++ ++unsigned int vpx_highbd_10_variance64x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_10_variance64x32 vpx_highbd_10_variance64x32_c ++ ++unsigned int vpx_highbd_10_variance64x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_10_variance64x64 vpx_highbd_10_variance64x64_c ++ ++unsigned int vpx_highbd_10_variance8x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_10_variance8x16 vpx_highbd_10_variance8x16_c ++ ++unsigned int vpx_highbd_10_variance8x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_10_variance8x4 vpx_highbd_10_variance8x4_c ++ ++unsigned int vpx_highbd_10_variance8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_10_variance8x8 vpx_highbd_10_variance8x8_c ++ ++void vpx_highbd_12_get16x16var_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse, ++ int* sum); ++#define vpx_highbd_12_get16x16var vpx_highbd_12_get16x16var_c ++ ++void vpx_highbd_12_get8x8var_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse, ++ int* sum); ++#define vpx_highbd_12_get8x8var vpx_highbd_12_get8x8var_c ++ ++unsigned int vpx_highbd_12_mse16x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_12_mse16x16 vpx_highbd_12_mse16x16_c ++ ++unsigned int vpx_highbd_12_mse16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_12_mse16x8 vpx_highbd_12_mse16x8_c ++ ++unsigned int vpx_highbd_12_mse8x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_12_mse8x16 vpx_highbd_12_mse8x16_c ++ ++unsigned int vpx_highbd_12_mse8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_12_mse8x8 vpx_highbd_12_mse8x8_c ++ ++uint32_t vpx_highbd_12_sub_pixel_avg_variance16x16_c( ++ const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_12_sub_pixel_avg_variance16x16 \ ++ vpx_highbd_12_sub_pixel_avg_variance16x16_c ++ ++uint32_t vpx_highbd_12_sub_pixel_avg_variance16x32_c( ++ const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_12_sub_pixel_avg_variance16x32 \ ++ vpx_highbd_12_sub_pixel_avg_variance16x32_c ++ ++uint32_t vpx_highbd_12_sub_pixel_avg_variance16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_12_sub_pixel_avg_variance16x8 \ ++ vpx_highbd_12_sub_pixel_avg_variance16x8_c ++ ++uint32_t vpx_highbd_12_sub_pixel_avg_variance32x16_c( ++ const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_12_sub_pixel_avg_variance32x16 \ ++ vpx_highbd_12_sub_pixel_avg_variance32x16_c ++ ++uint32_t vpx_highbd_12_sub_pixel_avg_variance32x32_c( ++ const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_12_sub_pixel_avg_variance32x32 \ ++ vpx_highbd_12_sub_pixel_avg_variance32x32_c ++ ++uint32_t vpx_highbd_12_sub_pixel_avg_variance32x64_c( ++ const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_12_sub_pixel_avg_variance32x64 \ ++ vpx_highbd_12_sub_pixel_avg_variance32x64_c ++ ++uint32_t vpx_highbd_12_sub_pixel_avg_variance4x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_12_sub_pixel_avg_variance4x4 \ ++ vpx_highbd_12_sub_pixel_avg_variance4x4_c ++ ++uint32_t vpx_highbd_12_sub_pixel_avg_variance4x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_12_sub_pixel_avg_variance4x8 \ ++ vpx_highbd_12_sub_pixel_avg_variance4x8_c ++ ++uint32_t vpx_highbd_12_sub_pixel_avg_variance64x32_c( ++ const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_12_sub_pixel_avg_variance64x32 \ ++ vpx_highbd_12_sub_pixel_avg_variance64x32_c ++ ++uint32_t vpx_highbd_12_sub_pixel_avg_variance64x64_c( ++ const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_12_sub_pixel_avg_variance64x64 \ ++ vpx_highbd_12_sub_pixel_avg_variance64x64_c ++ ++uint32_t vpx_highbd_12_sub_pixel_avg_variance8x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_12_sub_pixel_avg_variance8x16 \ ++ vpx_highbd_12_sub_pixel_avg_variance8x16_c ++ ++uint32_t vpx_highbd_12_sub_pixel_avg_variance8x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_12_sub_pixel_avg_variance8x4 \ ++ vpx_highbd_12_sub_pixel_avg_variance8x4_c ++ ++uint32_t vpx_highbd_12_sub_pixel_avg_variance8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_12_sub_pixel_avg_variance8x8 \ ++ vpx_highbd_12_sub_pixel_avg_variance8x8_c ++ ++uint32_t vpx_highbd_12_sub_pixel_variance16x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_12_sub_pixel_variance16x16 \ ++ vpx_highbd_12_sub_pixel_variance16x16_c ++ ++uint32_t vpx_highbd_12_sub_pixel_variance16x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_12_sub_pixel_variance16x32 \ ++ vpx_highbd_12_sub_pixel_variance16x32_c ++ ++uint32_t vpx_highbd_12_sub_pixel_variance16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_12_sub_pixel_variance16x8 \ ++ vpx_highbd_12_sub_pixel_variance16x8_c ++ ++uint32_t vpx_highbd_12_sub_pixel_variance32x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_12_sub_pixel_variance32x16 \ ++ vpx_highbd_12_sub_pixel_variance32x16_c ++ ++uint32_t vpx_highbd_12_sub_pixel_variance32x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_12_sub_pixel_variance32x32 \ ++ vpx_highbd_12_sub_pixel_variance32x32_c ++ ++uint32_t vpx_highbd_12_sub_pixel_variance32x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_12_sub_pixel_variance32x64 \ ++ vpx_highbd_12_sub_pixel_variance32x64_c ++ ++uint32_t vpx_highbd_12_sub_pixel_variance4x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_12_sub_pixel_variance4x4 \ ++ vpx_highbd_12_sub_pixel_variance4x4_c ++ ++uint32_t vpx_highbd_12_sub_pixel_variance4x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_12_sub_pixel_variance4x8 \ ++ vpx_highbd_12_sub_pixel_variance4x8_c ++ ++uint32_t vpx_highbd_12_sub_pixel_variance64x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_12_sub_pixel_variance64x32 \ ++ vpx_highbd_12_sub_pixel_variance64x32_c ++ ++uint32_t vpx_highbd_12_sub_pixel_variance64x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_12_sub_pixel_variance64x64 \ ++ vpx_highbd_12_sub_pixel_variance64x64_c ++ ++uint32_t vpx_highbd_12_sub_pixel_variance8x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_12_sub_pixel_variance8x16 \ ++ vpx_highbd_12_sub_pixel_variance8x16_c ++ ++uint32_t vpx_highbd_12_sub_pixel_variance8x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_12_sub_pixel_variance8x4 \ ++ vpx_highbd_12_sub_pixel_variance8x4_c ++ ++uint32_t vpx_highbd_12_sub_pixel_variance8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_12_sub_pixel_variance8x8 \ ++ vpx_highbd_12_sub_pixel_variance8x8_c ++ ++unsigned int vpx_highbd_12_variance16x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_12_variance16x16 vpx_highbd_12_variance16x16_c ++ ++unsigned int vpx_highbd_12_variance16x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_12_variance16x32 vpx_highbd_12_variance16x32_c ++ ++unsigned int vpx_highbd_12_variance16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_12_variance16x8 vpx_highbd_12_variance16x8_c ++ ++unsigned int vpx_highbd_12_variance32x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_12_variance32x16 vpx_highbd_12_variance32x16_c ++ ++unsigned int vpx_highbd_12_variance32x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_12_variance32x32 vpx_highbd_12_variance32x32_c ++ ++unsigned int vpx_highbd_12_variance32x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_12_variance32x64 vpx_highbd_12_variance32x64_c ++ ++unsigned int vpx_highbd_12_variance4x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_12_variance4x4 vpx_highbd_12_variance4x4_c ++ ++unsigned int vpx_highbd_12_variance4x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_12_variance4x8 vpx_highbd_12_variance4x8_c ++ ++unsigned int vpx_highbd_12_variance64x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_12_variance64x32 vpx_highbd_12_variance64x32_c ++ ++unsigned int vpx_highbd_12_variance64x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_12_variance64x64 vpx_highbd_12_variance64x64_c ++ ++unsigned int vpx_highbd_12_variance8x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_12_variance8x16 vpx_highbd_12_variance8x16_c ++ ++unsigned int vpx_highbd_12_variance8x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_12_variance8x4 vpx_highbd_12_variance8x4_c ++ ++unsigned int vpx_highbd_12_variance8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_12_variance8x8 vpx_highbd_12_variance8x8_c ++ ++void vpx_highbd_8_get16x16var_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse, ++ int* sum); ++#define vpx_highbd_8_get16x16var vpx_highbd_8_get16x16var_c ++ ++void vpx_highbd_8_get8x8var_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse, ++ int* sum); ++#define vpx_highbd_8_get8x8var vpx_highbd_8_get8x8var_c ++ ++unsigned int vpx_highbd_8_mse16x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_8_mse16x16 vpx_highbd_8_mse16x16_c ++ ++unsigned int vpx_highbd_8_mse16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_8_mse16x8 vpx_highbd_8_mse16x8_c ++ ++unsigned int vpx_highbd_8_mse8x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_8_mse8x16 vpx_highbd_8_mse8x16_c ++ ++unsigned int vpx_highbd_8_mse8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_8_mse8x8 vpx_highbd_8_mse8x8_c ++ ++uint32_t vpx_highbd_8_sub_pixel_avg_variance16x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_8_sub_pixel_avg_variance16x16 \ ++ vpx_highbd_8_sub_pixel_avg_variance16x16_c ++ ++uint32_t vpx_highbd_8_sub_pixel_avg_variance16x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_8_sub_pixel_avg_variance16x32 \ ++ vpx_highbd_8_sub_pixel_avg_variance16x32_c ++ ++uint32_t vpx_highbd_8_sub_pixel_avg_variance16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_8_sub_pixel_avg_variance16x8 \ ++ vpx_highbd_8_sub_pixel_avg_variance16x8_c ++ ++uint32_t vpx_highbd_8_sub_pixel_avg_variance32x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_8_sub_pixel_avg_variance32x16 \ ++ vpx_highbd_8_sub_pixel_avg_variance32x16_c ++ ++uint32_t vpx_highbd_8_sub_pixel_avg_variance32x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_8_sub_pixel_avg_variance32x32 \ ++ vpx_highbd_8_sub_pixel_avg_variance32x32_c ++ ++uint32_t vpx_highbd_8_sub_pixel_avg_variance32x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_8_sub_pixel_avg_variance32x64 \ ++ vpx_highbd_8_sub_pixel_avg_variance32x64_c ++ ++uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_8_sub_pixel_avg_variance4x4 \ ++ vpx_highbd_8_sub_pixel_avg_variance4x4_c ++ ++uint32_t vpx_highbd_8_sub_pixel_avg_variance4x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_8_sub_pixel_avg_variance4x8 \ ++ vpx_highbd_8_sub_pixel_avg_variance4x8_c ++ ++uint32_t vpx_highbd_8_sub_pixel_avg_variance64x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_8_sub_pixel_avg_variance64x32 \ ++ vpx_highbd_8_sub_pixel_avg_variance64x32_c ++ ++uint32_t vpx_highbd_8_sub_pixel_avg_variance64x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_8_sub_pixel_avg_variance64x64 \ ++ vpx_highbd_8_sub_pixel_avg_variance64x64_c ++ ++uint32_t vpx_highbd_8_sub_pixel_avg_variance8x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_8_sub_pixel_avg_variance8x16 \ ++ vpx_highbd_8_sub_pixel_avg_variance8x16_c ++ ++uint32_t vpx_highbd_8_sub_pixel_avg_variance8x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_8_sub_pixel_avg_variance8x4 \ ++ vpx_highbd_8_sub_pixel_avg_variance8x4_c ++ ++uint32_t vpx_highbd_8_sub_pixel_avg_variance8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_highbd_8_sub_pixel_avg_variance8x8 \ ++ vpx_highbd_8_sub_pixel_avg_variance8x8_c ++ ++uint32_t vpx_highbd_8_sub_pixel_variance16x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_8_sub_pixel_variance16x16 \ ++ vpx_highbd_8_sub_pixel_variance16x16_c ++ ++uint32_t vpx_highbd_8_sub_pixel_variance16x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_8_sub_pixel_variance16x32 \ ++ vpx_highbd_8_sub_pixel_variance16x32_c ++ ++uint32_t vpx_highbd_8_sub_pixel_variance16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_8_sub_pixel_variance16x8 \ ++ vpx_highbd_8_sub_pixel_variance16x8_c ++ ++uint32_t vpx_highbd_8_sub_pixel_variance32x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_8_sub_pixel_variance32x16 \ ++ vpx_highbd_8_sub_pixel_variance32x16_c ++ ++uint32_t vpx_highbd_8_sub_pixel_variance32x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_8_sub_pixel_variance32x32 \ ++ vpx_highbd_8_sub_pixel_variance32x32_c ++ ++uint32_t vpx_highbd_8_sub_pixel_variance32x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_8_sub_pixel_variance32x64 \ ++ vpx_highbd_8_sub_pixel_variance32x64_c ++ ++uint32_t vpx_highbd_8_sub_pixel_variance4x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_8_sub_pixel_variance4x4 vpx_highbd_8_sub_pixel_variance4x4_c ++ ++uint32_t vpx_highbd_8_sub_pixel_variance4x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_8_sub_pixel_variance4x8 vpx_highbd_8_sub_pixel_variance4x8_c ++ ++uint32_t vpx_highbd_8_sub_pixel_variance64x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_8_sub_pixel_variance64x32 \ ++ vpx_highbd_8_sub_pixel_variance64x32_c ++ ++uint32_t vpx_highbd_8_sub_pixel_variance64x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_8_sub_pixel_variance64x64 \ ++ vpx_highbd_8_sub_pixel_variance64x64_c ++ ++uint32_t vpx_highbd_8_sub_pixel_variance8x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_8_sub_pixel_variance8x16 \ ++ vpx_highbd_8_sub_pixel_variance8x16_c ++ ++uint32_t vpx_highbd_8_sub_pixel_variance8x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_8_sub_pixel_variance8x4 vpx_highbd_8_sub_pixel_variance8x4_c ++ ++uint32_t vpx_highbd_8_sub_pixel_variance8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_highbd_8_sub_pixel_variance8x8 vpx_highbd_8_sub_pixel_variance8x8_c ++ ++unsigned int vpx_highbd_8_variance16x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_8_variance16x16 vpx_highbd_8_variance16x16_c ++ ++unsigned int vpx_highbd_8_variance16x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_8_variance16x32 vpx_highbd_8_variance16x32_c ++ ++unsigned int vpx_highbd_8_variance16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_8_variance16x8 vpx_highbd_8_variance16x8_c ++ ++unsigned int vpx_highbd_8_variance32x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_8_variance32x16 vpx_highbd_8_variance32x16_c ++ ++unsigned int vpx_highbd_8_variance32x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_8_variance32x32 vpx_highbd_8_variance32x32_c ++ ++unsigned int vpx_highbd_8_variance32x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_8_variance32x64 vpx_highbd_8_variance32x64_c ++ ++unsigned int vpx_highbd_8_variance4x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_8_variance4x4 vpx_highbd_8_variance4x4_c ++ ++unsigned int vpx_highbd_8_variance4x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_8_variance4x8 vpx_highbd_8_variance4x8_c ++ ++unsigned int vpx_highbd_8_variance64x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_8_variance64x32 vpx_highbd_8_variance64x32_c ++ ++unsigned int vpx_highbd_8_variance64x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_8_variance64x64 vpx_highbd_8_variance64x64_c ++ ++unsigned int vpx_highbd_8_variance8x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_8_variance8x16 vpx_highbd_8_variance8x16_c ++ ++unsigned int vpx_highbd_8_variance8x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_8_variance8x4 vpx_highbd_8_variance8x4_c ++ ++unsigned int vpx_highbd_8_variance8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_highbd_8_variance8x8 vpx_highbd_8_variance8x8_c ++ ++unsigned int vpx_highbd_avg_4x4_c(const uint8_t* s8, int p); ++#define vpx_highbd_avg_4x4 vpx_highbd_avg_4x4_c ++ ++unsigned int vpx_highbd_avg_8x8_c(const uint8_t* s8, int p); ++#define vpx_highbd_avg_8x8 vpx_highbd_avg_8x8_c ++ ++void vpx_highbd_comp_avg_pred_c(uint16_t* comp_pred, ++ const uint16_t* pred, ++ int width, ++ int height, ++ const uint16_t* ref, ++ int ref_stride); ++#define vpx_highbd_comp_avg_pred vpx_highbd_comp_avg_pred_c ++ ++void vpx_highbd_convolve8_c(const uint16_t* src, ++ ptrdiff_t src_stride, ++ uint16_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h, ++ int bd); ++#define vpx_highbd_convolve8 vpx_highbd_convolve8_c ++ ++void vpx_highbd_convolve8_avg_c(const uint16_t* src, ++ ptrdiff_t src_stride, ++ uint16_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h, ++ int bd); ++#define vpx_highbd_convolve8_avg vpx_highbd_convolve8_avg_c ++ ++void vpx_highbd_convolve8_avg_horiz_c(const uint16_t* src, ++ ptrdiff_t src_stride, ++ uint16_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h, ++ int bd); ++#define vpx_highbd_convolve8_avg_horiz vpx_highbd_convolve8_avg_horiz_c ++ ++void vpx_highbd_convolve8_avg_vert_c(const uint16_t* src, ++ ptrdiff_t src_stride, ++ uint16_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h, ++ int bd); ++#define vpx_highbd_convolve8_avg_vert vpx_highbd_convolve8_avg_vert_c ++ ++void vpx_highbd_convolve8_horiz_c(const uint16_t* src, ++ ptrdiff_t src_stride, ++ uint16_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h, ++ int bd); ++#define vpx_highbd_convolve8_horiz vpx_highbd_convolve8_horiz_c ++ ++void vpx_highbd_convolve8_vert_c(const uint16_t* src, ++ ptrdiff_t src_stride, ++ uint16_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h, ++ int bd); ++#define vpx_highbd_convolve8_vert vpx_highbd_convolve8_vert_c ++ ++void vpx_highbd_convolve_avg_c(const uint16_t* src, ++ ptrdiff_t src_stride, ++ uint16_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h, ++ int bd); ++#define vpx_highbd_convolve_avg vpx_highbd_convolve_avg_c ++ ++void vpx_highbd_convolve_copy_c(const uint16_t* src, ++ ptrdiff_t src_stride, ++ uint16_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h, ++ int bd); ++#define vpx_highbd_convolve_copy vpx_highbd_convolve_copy_c ++ ++void vpx_highbd_d117_predictor_16x16_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d117_predictor_16x16 vpx_highbd_d117_predictor_16x16_c ++ ++void vpx_highbd_d117_predictor_32x32_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d117_predictor_32x32 vpx_highbd_d117_predictor_32x32_c ++ ++void vpx_highbd_d117_predictor_4x4_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d117_predictor_4x4 vpx_highbd_d117_predictor_4x4_c ++ ++void vpx_highbd_d117_predictor_8x8_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d117_predictor_8x8 vpx_highbd_d117_predictor_8x8_c ++ ++void vpx_highbd_d135_predictor_16x16_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d135_predictor_16x16 vpx_highbd_d135_predictor_16x16_c ++ ++void vpx_highbd_d135_predictor_32x32_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d135_predictor_32x32 vpx_highbd_d135_predictor_32x32_c ++ ++void vpx_highbd_d135_predictor_4x4_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d135_predictor_4x4 vpx_highbd_d135_predictor_4x4_c ++ ++void vpx_highbd_d135_predictor_8x8_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d135_predictor_8x8 vpx_highbd_d135_predictor_8x8_c ++ ++void vpx_highbd_d153_predictor_16x16_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d153_predictor_16x16 vpx_highbd_d153_predictor_16x16_c ++ ++void vpx_highbd_d153_predictor_32x32_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d153_predictor_32x32 vpx_highbd_d153_predictor_32x32_c ++ ++void vpx_highbd_d153_predictor_4x4_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d153_predictor_4x4 vpx_highbd_d153_predictor_4x4_c ++ ++void vpx_highbd_d153_predictor_8x8_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d153_predictor_8x8 vpx_highbd_d153_predictor_8x8_c ++ ++void vpx_highbd_d207_predictor_16x16_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d207_predictor_16x16 vpx_highbd_d207_predictor_16x16_c ++ ++void vpx_highbd_d207_predictor_32x32_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d207_predictor_32x32 vpx_highbd_d207_predictor_32x32_c ++ ++void vpx_highbd_d207_predictor_4x4_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d207_predictor_4x4 vpx_highbd_d207_predictor_4x4_c ++ ++void vpx_highbd_d207_predictor_8x8_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d207_predictor_8x8 vpx_highbd_d207_predictor_8x8_c ++ ++void vpx_highbd_d45_predictor_16x16_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d45_predictor_16x16 vpx_highbd_d45_predictor_16x16_c ++ ++void vpx_highbd_d45_predictor_32x32_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d45_predictor_32x32 vpx_highbd_d45_predictor_32x32_c ++ ++void vpx_highbd_d45_predictor_4x4_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d45_predictor_4x4 vpx_highbd_d45_predictor_4x4_c ++ ++void vpx_highbd_d45_predictor_8x8_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d45_predictor_8x8 vpx_highbd_d45_predictor_8x8_c ++ ++void vpx_highbd_d63_predictor_16x16_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d63_predictor_16x16 vpx_highbd_d63_predictor_16x16_c ++ ++void vpx_highbd_d63_predictor_32x32_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d63_predictor_32x32 vpx_highbd_d63_predictor_32x32_c ++ ++void vpx_highbd_d63_predictor_4x4_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d63_predictor_4x4 vpx_highbd_d63_predictor_4x4_c ++ ++void vpx_highbd_d63_predictor_8x8_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_d63_predictor_8x8 vpx_highbd_d63_predictor_8x8_c ++ ++void vpx_highbd_dc_128_predictor_16x16_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_dc_128_predictor_16x16 vpx_highbd_dc_128_predictor_16x16_c ++ ++void vpx_highbd_dc_128_predictor_32x32_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_dc_128_predictor_32x32 vpx_highbd_dc_128_predictor_32x32_c ++ ++void vpx_highbd_dc_128_predictor_4x4_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_dc_128_predictor_4x4 vpx_highbd_dc_128_predictor_4x4_c ++ ++void vpx_highbd_dc_128_predictor_8x8_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_dc_128_predictor_8x8 vpx_highbd_dc_128_predictor_8x8_c ++ ++void vpx_highbd_dc_left_predictor_16x16_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_dc_left_predictor_16x16 vpx_highbd_dc_left_predictor_16x16_c ++ ++void vpx_highbd_dc_left_predictor_32x32_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_dc_left_predictor_32x32 vpx_highbd_dc_left_predictor_32x32_c ++ ++void vpx_highbd_dc_left_predictor_4x4_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_dc_left_predictor_4x4 vpx_highbd_dc_left_predictor_4x4_c ++ ++void vpx_highbd_dc_left_predictor_8x8_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_dc_left_predictor_8x8 vpx_highbd_dc_left_predictor_8x8_c ++ ++void vpx_highbd_dc_predictor_16x16_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_dc_predictor_16x16 vpx_highbd_dc_predictor_16x16_c ++ ++void vpx_highbd_dc_predictor_32x32_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_dc_predictor_32x32 vpx_highbd_dc_predictor_32x32_c ++ ++void vpx_highbd_dc_predictor_4x4_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_dc_predictor_4x4 vpx_highbd_dc_predictor_4x4_c ++ ++void vpx_highbd_dc_predictor_8x8_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_dc_predictor_8x8 vpx_highbd_dc_predictor_8x8_c ++ ++void vpx_highbd_dc_top_predictor_16x16_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_dc_top_predictor_16x16 vpx_highbd_dc_top_predictor_16x16_c ++ ++void vpx_highbd_dc_top_predictor_32x32_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_dc_top_predictor_32x32 vpx_highbd_dc_top_predictor_32x32_c ++ ++void vpx_highbd_dc_top_predictor_4x4_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_dc_top_predictor_4x4 vpx_highbd_dc_top_predictor_4x4_c ++ ++void vpx_highbd_dc_top_predictor_8x8_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_dc_top_predictor_8x8 vpx_highbd_dc_top_predictor_8x8_c ++ ++void vpx_highbd_fdct16x16_c(const int16_t* input, ++ tran_low_t* output, ++ int stride); ++#define vpx_highbd_fdct16x16 vpx_highbd_fdct16x16_c ++ ++void vpx_highbd_fdct16x16_1_c(const int16_t* input, ++ tran_low_t* output, ++ int stride); ++#define vpx_highbd_fdct16x16_1 vpx_highbd_fdct16x16_1_c ++ ++void vpx_highbd_fdct32x32_c(const int16_t* input, ++ tran_low_t* output, ++ int stride); ++#define vpx_highbd_fdct32x32 vpx_highbd_fdct32x32_c ++ ++void vpx_highbd_fdct32x32_1_c(const int16_t* input, ++ tran_low_t* output, ++ int stride); ++#define vpx_highbd_fdct32x32_1 vpx_highbd_fdct32x32_1_c ++ ++void vpx_highbd_fdct32x32_rd_c(const int16_t* input, ++ tran_low_t* output, ++ int stride); ++#define vpx_highbd_fdct32x32_rd vpx_highbd_fdct32x32_rd_c ++ ++void vpx_highbd_fdct4x4_c(const int16_t* input, tran_low_t* output, int stride); ++#define vpx_highbd_fdct4x4 vpx_highbd_fdct4x4_c ++ ++void vpx_highbd_fdct8x8_c(const int16_t* input, tran_low_t* output, int stride); ++#define vpx_highbd_fdct8x8 vpx_highbd_fdct8x8_c ++ ++void vpx_highbd_fdct8x8_1_c(const int16_t* input, ++ tran_low_t* output, ++ int stride); ++#define vpx_highbd_fdct8x8_1 vpx_highbd_fdct8x8_1_c ++ ++void vpx_highbd_h_predictor_16x16_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_h_predictor_16x16 vpx_highbd_h_predictor_16x16_c ++ ++void vpx_highbd_h_predictor_32x32_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_h_predictor_32x32 vpx_highbd_h_predictor_32x32_c ++ ++void vpx_highbd_h_predictor_4x4_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_h_predictor_4x4 vpx_highbd_h_predictor_4x4_c ++ ++void vpx_highbd_h_predictor_8x8_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_h_predictor_8x8 vpx_highbd_h_predictor_8x8_c ++ ++void vpx_highbd_hadamard_16x16_c(const int16_t* src_diff, ++ ptrdiff_t src_stride, ++ tran_low_t* coeff); ++#define vpx_highbd_hadamard_16x16 vpx_highbd_hadamard_16x16_c ++ ++void vpx_highbd_hadamard_32x32_c(const int16_t* src_diff, ++ ptrdiff_t src_stride, ++ tran_low_t* coeff); ++#define vpx_highbd_hadamard_32x32 vpx_highbd_hadamard_32x32_c ++ ++void vpx_highbd_hadamard_8x8_c(const int16_t* src_diff, ++ ptrdiff_t src_stride, ++ tran_low_t* coeff); ++#define vpx_highbd_hadamard_8x8 vpx_highbd_hadamard_8x8_c ++ ++void vpx_highbd_idct16x16_10_add_c(const tran_low_t* input, ++ uint16_t* dest, ++ int stride, ++ int bd); ++#define vpx_highbd_idct16x16_10_add vpx_highbd_idct16x16_10_add_c ++ ++void vpx_highbd_idct16x16_1_add_c(const tran_low_t* input, ++ uint16_t* dest, ++ int stride, ++ int bd); ++#define vpx_highbd_idct16x16_1_add vpx_highbd_idct16x16_1_add_c ++ ++void vpx_highbd_idct16x16_256_add_c(const tran_low_t* input, ++ uint16_t* dest, ++ int stride, ++ int bd); ++#define vpx_highbd_idct16x16_256_add vpx_highbd_idct16x16_256_add_c ++ ++void vpx_highbd_idct16x16_38_add_c(const tran_low_t* input, ++ uint16_t* dest, ++ int stride, ++ int bd); ++#define vpx_highbd_idct16x16_38_add vpx_highbd_idct16x16_38_add_c ++ ++void vpx_highbd_idct32x32_1024_add_c(const tran_low_t* input, ++ uint16_t* dest, ++ int stride, ++ int bd); ++#define vpx_highbd_idct32x32_1024_add vpx_highbd_idct32x32_1024_add_c ++ ++void vpx_highbd_idct32x32_135_add_c(const tran_low_t* input, ++ uint16_t* dest, ++ int stride, ++ int bd); ++#define vpx_highbd_idct32x32_135_add vpx_highbd_idct32x32_135_add_c ++ ++void vpx_highbd_idct32x32_1_add_c(const tran_low_t* input, ++ uint16_t* dest, ++ int stride, ++ int bd); ++#define vpx_highbd_idct32x32_1_add vpx_highbd_idct32x32_1_add_c ++ ++void vpx_highbd_idct32x32_34_add_c(const tran_low_t* input, ++ uint16_t* dest, ++ int stride, ++ int bd); ++#define vpx_highbd_idct32x32_34_add vpx_highbd_idct32x32_34_add_c ++ ++void vpx_highbd_idct4x4_16_add_c(const tran_low_t* input, ++ uint16_t* dest, ++ int stride, ++ int bd); ++#define vpx_highbd_idct4x4_16_add vpx_highbd_idct4x4_16_add_c ++ ++void vpx_highbd_idct4x4_1_add_c(const tran_low_t* input, ++ uint16_t* dest, ++ int stride, ++ int bd); ++#define vpx_highbd_idct4x4_1_add vpx_highbd_idct4x4_1_add_c ++ ++void vpx_highbd_idct8x8_12_add_c(const tran_low_t* input, ++ uint16_t* dest, ++ int stride, ++ int bd); ++#define vpx_highbd_idct8x8_12_add vpx_highbd_idct8x8_12_add_c ++ ++void vpx_highbd_idct8x8_1_add_c(const tran_low_t* input, ++ uint16_t* dest, ++ int stride, ++ int bd); ++#define vpx_highbd_idct8x8_1_add vpx_highbd_idct8x8_1_add_c ++ ++void vpx_highbd_idct8x8_64_add_c(const tran_low_t* input, ++ uint16_t* dest, ++ int stride, ++ int bd); ++#define vpx_highbd_idct8x8_64_add vpx_highbd_idct8x8_64_add_c ++ ++void vpx_highbd_iwht4x4_16_add_c(const tran_low_t* input, ++ uint16_t* dest, ++ int stride, ++ int bd); ++#define vpx_highbd_iwht4x4_16_add vpx_highbd_iwht4x4_16_add_c ++ ++void vpx_highbd_iwht4x4_1_add_c(const tran_low_t* input, ++ uint16_t* dest, ++ int stride, ++ int bd); ++#define vpx_highbd_iwht4x4_1_add vpx_highbd_iwht4x4_1_add_c ++ ++void vpx_highbd_lpf_horizontal_16_c(uint16_t* s, ++ int pitch, ++ const uint8_t* blimit, ++ const uint8_t* limit, ++ const uint8_t* thresh, ++ int bd); ++#define vpx_highbd_lpf_horizontal_16 vpx_highbd_lpf_horizontal_16_c ++ ++void vpx_highbd_lpf_horizontal_16_dual_c(uint16_t* s, ++ int pitch, ++ const uint8_t* blimit, ++ const uint8_t* limit, ++ const uint8_t* thresh, ++ int bd); ++#define vpx_highbd_lpf_horizontal_16_dual vpx_highbd_lpf_horizontal_16_dual_c ++ ++void vpx_highbd_lpf_horizontal_4_c(uint16_t* s, ++ int pitch, ++ const uint8_t* blimit, ++ const uint8_t* limit, ++ const uint8_t* thresh, ++ int bd); ++#define vpx_highbd_lpf_horizontal_4 vpx_highbd_lpf_horizontal_4_c ++ ++void vpx_highbd_lpf_horizontal_4_dual_c(uint16_t* s, ++ int pitch, ++ const uint8_t* blimit0, ++ const uint8_t* limit0, ++ const uint8_t* thresh0, ++ const uint8_t* blimit1, ++ const uint8_t* limit1, ++ const uint8_t* thresh1, ++ int bd); ++#define vpx_highbd_lpf_horizontal_4_dual vpx_highbd_lpf_horizontal_4_dual_c ++ ++void vpx_highbd_lpf_horizontal_8_c(uint16_t* s, ++ int pitch, ++ const uint8_t* blimit, ++ const uint8_t* limit, ++ const uint8_t* thresh, ++ int bd); ++#define vpx_highbd_lpf_horizontal_8 vpx_highbd_lpf_horizontal_8_c ++ ++void vpx_highbd_lpf_horizontal_8_dual_c(uint16_t* s, ++ int pitch, ++ const uint8_t* blimit0, ++ const uint8_t* limit0, ++ const uint8_t* thresh0, ++ const uint8_t* blimit1, ++ const uint8_t* limit1, ++ const uint8_t* thresh1, ++ int bd); ++#define vpx_highbd_lpf_horizontal_8_dual vpx_highbd_lpf_horizontal_8_dual_c ++ ++void vpx_highbd_lpf_vertical_16_c(uint16_t* s, ++ int pitch, ++ const uint8_t* blimit, ++ const uint8_t* limit, ++ const uint8_t* thresh, ++ int bd); ++#define vpx_highbd_lpf_vertical_16 vpx_highbd_lpf_vertical_16_c ++ ++void vpx_highbd_lpf_vertical_16_dual_c(uint16_t* s, ++ int pitch, ++ const uint8_t* blimit, ++ const uint8_t* limit, ++ const uint8_t* thresh, ++ int bd); ++#define vpx_highbd_lpf_vertical_16_dual vpx_highbd_lpf_vertical_16_dual_c ++ ++void vpx_highbd_lpf_vertical_4_c(uint16_t* s, ++ int pitch, ++ const uint8_t* blimit, ++ const uint8_t* limit, ++ const uint8_t* thresh, ++ int bd); ++#define vpx_highbd_lpf_vertical_4 vpx_highbd_lpf_vertical_4_c ++ ++void vpx_highbd_lpf_vertical_4_dual_c(uint16_t* s, ++ int pitch, ++ const uint8_t* blimit0, ++ const uint8_t* limit0, ++ const uint8_t* thresh0, ++ const uint8_t* blimit1, ++ const uint8_t* limit1, ++ const uint8_t* thresh1, ++ int bd); ++#define vpx_highbd_lpf_vertical_4_dual vpx_highbd_lpf_vertical_4_dual_c ++ ++void vpx_highbd_lpf_vertical_8_c(uint16_t* s, ++ int pitch, ++ const uint8_t* blimit, ++ const uint8_t* limit, ++ const uint8_t* thresh, ++ int bd); ++#define vpx_highbd_lpf_vertical_8 vpx_highbd_lpf_vertical_8_c ++ ++void vpx_highbd_lpf_vertical_8_dual_c(uint16_t* s, ++ int pitch, ++ const uint8_t* blimit0, ++ const uint8_t* limit0, ++ const uint8_t* thresh0, ++ const uint8_t* blimit1, ++ const uint8_t* limit1, ++ const uint8_t* thresh1, ++ int bd); ++#define vpx_highbd_lpf_vertical_8_dual vpx_highbd_lpf_vertical_8_dual_c ++ ++void vpx_highbd_minmax_8x8_c(const uint8_t* s8, ++ int p, ++ const uint8_t* d8, ++ int dp, ++ int* min, ++ int* max); ++#define vpx_highbd_minmax_8x8 vpx_highbd_minmax_8x8_c ++ ++void vpx_highbd_quantize_b_c(const tran_low_t* coeff_ptr, ++ intptr_t n_coeffs, ++ int skip_block, ++ const int16_t* zbin_ptr, ++ const int16_t* round_ptr, ++ const int16_t* quant_ptr, ++ const int16_t* quant_shift_ptr, ++ tran_low_t* qcoeff_ptr, ++ tran_low_t* dqcoeff_ptr, ++ const int16_t* dequant_ptr, ++ uint16_t* eob_ptr, ++ const int16_t* scan, ++ const int16_t* iscan); ++#define vpx_highbd_quantize_b vpx_highbd_quantize_b_c ++ ++void vpx_highbd_quantize_b_32x32_c(const tran_low_t* coeff_ptr, ++ intptr_t n_coeffs, ++ int skip_block, ++ const int16_t* zbin_ptr, ++ const int16_t* round_ptr, ++ const int16_t* quant_ptr, ++ const int16_t* quant_shift_ptr, ++ tran_low_t* qcoeff_ptr, ++ tran_low_t* dqcoeff_ptr, ++ const int16_t* dequant_ptr, ++ uint16_t* eob_ptr, ++ const int16_t* scan, ++ const int16_t* iscan); ++#define vpx_highbd_quantize_b_32x32 vpx_highbd_quantize_b_32x32_c ++ ++unsigned int vpx_highbd_sad16x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_highbd_sad16x16 vpx_highbd_sad16x16_c ++ ++unsigned int vpx_highbd_sad16x16_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_highbd_sad16x16_avg vpx_highbd_sad16x16_avg_c ++ ++void vpx_highbd_sad16x16x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_highbd_sad16x16x4d vpx_highbd_sad16x16x4d_c ++ ++unsigned int vpx_highbd_sad16x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_highbd_sad16x32 vpx_highbd_sad16x32_c ++ ++unsigned int vpx_highbd_sad16x32_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_highbd_sad16x32_avg vpx_highbd_sad16x32_avg_c ++ ++void vpx_highbd_sad16x32x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_highbd_sad16x32x4d vpx_highbd_sad16x32x4d_c ++ ++unsigned int vpx_highbd_sad16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_highbd_sad16x8 vpx_highbd_sad16x8_c ++ ++unsigned int vpx_highbd_sad16x8_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_highbd_sad16x8_avg vpx_highbd_sad16x8_avg_c ++ ++void vpx_highbd_sad16x8x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_highbd_sad16x8x4d vpx_highbd_sad16x8x4d_c ++ ++unsigned int vpx_highbd_sad32x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_highbd_sad32x16 vpx_highbd_sad32x16_c ++ ++unsigned int vpx_highbd_sad32x16_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_highbd_sad32x16_avg vpx_highbd_sad32x16_avg_c ++ ++void vpx_highbd_sad32x16x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_highbd_sad32x16x4d vpx_highbd_sad32x16x4d_c ++ ++unsigned int vpx_highbd_sad32x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_highbd_sad32x32 vpx_highbd_sad32x32_c ++ ++unsigned int vpx_highbd_sad32x32_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_highbd_sad32x32_avg vpx_highbd_sad32x32_avg_c ++ ++void vpx_highbd_sad32x32x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_highbd_sad32x32x4d vpx_highbd_sad32x32x4d_c ++ ++unsigned int vpx_highbd_sad32x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_highbd_sad32x64 vpx_highbd_sad32x64_c ++ ++unsigned int vpx_highbd_sad32x64_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_highbd_sad32x64_avg vpx_highbd_sad32x64_avg_c ++ ++void vpx_highbd_sad32x64x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_highbd_sad32x64x4d vpx_highbd_sad32x64x4d_c ++ ++unsigned int vpx_highbd_sad4x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_highbd_sad4x4 vpx_highbd_sad4x4_c ++ ++unsigned int vpx_highbd_sad4x4_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_highbd_sad4x4_avg vpx_highbd_sad4x4_avg_c ++ ++void vpx_highbd_sad4x4x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_highbd_sad4x4x4d vpx_highbd_sad4x4x4d_c ++ ++unsigned int vpx_highbd_sad4x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_highbd_sad4x8 vpx_highbd_sad4x8_c ++ ++unsigned int vpx_highbd_sad4x8_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_highbd_sad4x8_avg vpx_highbd_sad4x8_avg_c ++ ++void vpx_highbd_sad4x8x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_highbd_sad4x8x4d vpx_highbd_sad4x8x4d_c ++ ++unsigned int vpx_highbd_sad64x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_highbd_sad64x32 vpx_highbd_sad64x32_c ++ ++unsigned int vpx_highbd_sad64x32_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_highbd_sad64x32_avg vpx_highbd_sad64x32_avg_c ++ ++void vpx_highbd_sad64x32x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_highbd_sad64x32x4d vpx_highbd_sad64x32x4d_c ++ ++unsigned int vpx_highbd_sad64x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_highbd_sad64x64 vpx_highbd_sad64x64_c ++ ++unsigned int vpx_highbd_sad64x64_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_highbd_sad64x64_avg vpx_highbd_sad64x64_avg_c ++ ++void vpx_highbd_sad64x64x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_highbd_sad64x64x4d vpx_highbd_sad64x64x4d_c ++ ++unsigned int vpx_highbd_sad8x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_highbd_sad8x16 vpx_highbd_sad8x16_c ++ ++unsigned int vpx_highbd_sad8x16_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_highbd_sad8x16_avg vpx_highbd_sad8x16_avg_c ++ ++void vpx_highbd_sad8x16x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_highbd_sad8x16x4d vpx_highbd_sad8x16x4d_c ++ ++unsigned int vpx_highbd_sad8x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_highbd_sad8x4 vpx_highbd_sad8x4_c ++ ++unsigned int vpx_highbd_sad8x4_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_highbd_sad8x4_avg vpx_highbd_sad8x4_avg_c ++ ++void vpx_highbd_sad8x4x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_highbd_sad8x4x4d vpx_highbd_sad8x4x4d_c ++ ++unsigned int vpx_highbd_sad8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_highbd_sad8x8 vpx_highbd_sad8x8_c ++ ++unsigned int vpx_highbd_sad8x8_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_highbd_sad8x8_avg vpx_highbd_sad8x8_avg_c ++ ++void vpx_highbd_sad8x8x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_highbd_sad8x8x4d vpx_highbd_sad8x8x4d_c ++ ++int vpx_highbd_satd_c(const tran_low_t* coeff, int length); ++#define vpx_highbd_satd vpx_highbd_satd_c ++ ++void vpx_highbd_subtract_block_c(int rows, ++ int cols, ++ int16_t* diff_ptr, ++ ptrdiff_t diff_stride, ++ const uint8_t* src8_ptr, ++ ptrdiff_t src_stride, ++ const uint8_t* pred8_ptr, ++ ptrdiff_t pred_stride, ++ int bd); ++#define vpx_highbd_subtract_block vpx_highbd_subtract_block_c ++ ++void vpx_highbd_tm_predictor_16x16_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_tm_predictor_16x16 vpx_highbd_tm_predictor_16x16_c ++ ++void vpx_highbd_tm_predictor_32x32_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_tm_predictor_32x32 vpx_highbd_tm_predictor_32x32_c ++ ++void vpx_highbd_tm_predictor_4x4_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_tm_predictor_4x4 vpx_highbd_tm_predictor_4x4_c ++ ++void vpx_highbd_tm_predictor_8x8_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_tm_predictor_8x8 vpx_highbd_tm_predictor_8x8_c ++ ++void vpx_highbd_v_predictor_16x16_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_v_predictor_16x16 vpx_highbd_v_predictor_16x16_c ++ ++void vpx_highbd_v_predictor_32x32_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_v_predictor_32x32 vpx_highbd_v_predictor_32x32_c ++ ++void vpx_highbd_v_predictor_4x4_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_v_predictor_4x4 vpx_highbd_v_predictor_4x4_c ++ ++void vpx_highbd_v_predictor_8x8_c(uint16_t* dst, ++ ptrdiff_t stride, ++ const uint16_t* above, ++ const uint16_t* left, ++ int bd); ++#define vpx_highbd_v_predictor_8x8 vpx_highbd_v_predictor_8x8_c ++ ++void vpx_idct16x16_10_add_c(const tran_low_t* input, uint8_t* dest, int stride); ++#define vpx_idct16x16_10_add vpx_idct16x16_10_add_c ++ ++void vpx_idct16x16_1_add_c(const tran_low_t* input, uint8_t* dest, int stride); ++#define vpx_idct16x16_1_add vpx_idct16x16_1_add_c ++ ++void vpx_idct16x16_256_add_c(const tran_low_t* input, ++ uint8_t* dest, ++ int stride); ++#define vpx_idct16x16_256_add vpx_idct16x16_256_add_c ++ ++void vpx_idct16x16_38_add_c(const tran_low_t* input, uint8_t* dest, int stride); ++#define vpx_idct16x16_38_add vpx_idct16x16_38_add_c ++ ++void vpx_idct32x32_1024_add_c(const tran_low_t* input, ++ uint8_t* dest, ++ int stride); ++#define vpx_idct32x32_1024_add vpx_idct32x32_1024_add_c ++ ++void vpx_idct32x32_135_add_c(const tran_low_t* input, ++ uint8_t* dest, ++ int stride); ++#define vpx_idct32x32_135_add vpx_idct32x32_135_add_c ++ ++void vpx_idct32x32_1_add_c(const tran_low_t* input, uint8_t* dest, int stride); ++#define vpx_idct32x32_1_add vpx_idct32x32_1_add_c ++ ++void vpx_idct32x32_34_add_c(const tran_low_t* input, uint8_t* dest, int stride); ++#define vpx_idct32x32_34_add vpx_idct32x32_34_add_c ++ ++void vpx_idct4x4_16_add_c(const tran_low_t* input, uint8_t* dest, int stride); ++#define vpx_idct4x4_16_add vpx_idct4x4_16_add_c ++ ++void vpx_idct4x4_1_add_c(const tran_low_t* input, uint8_t* dest, int stride); ++#define vpx_idct4x4_1_add vpx_idct4x4_1_add_c ++ ++void vpx_idct8x8_12_add_c(const tran_low_t* input, uint8_t* dest, int stride); ++#define vpx_idct8x8_12_add vpx_idct8x8_12_add_c ++ ++void vpx_idct8x8_1_add_c(const tran_low_t* input, uint8_t* dest, int stride); ++#define vpx_idct8x8_1_add vpx_idct8x8_1_add_c ++ ++void vpx_idct8x8_64_add_c(const tran_low_t* input, uint8_t* dest, int stride); ++#define vpx_idct8x8_64_add vpx_idct8x8_64_add_c ++ ++int16_t vpx_int_pro_col_c(const uint8_t* ref, const int width); ++#define vpx_int_pro_col vpx_int_pro_col_c ++ ++void vpx_int_pro_row_c(int16_t* hbuf, ++ const uint8_t* ref, ++ const int ref_stride, ++ const int height); ++#define vpx_int_pro_row vpx_int_pro_row_c ++ ++void vpx_iwht4x4_16_add_c(const tran_low_t* input, uint8_t* dest, int stride); ++#define vpx_iwht4x4_16_add vpx_iwht4x4_16_add_c ++ ++void vpx_iwht4x4_1_add_c(const tran_low_t* input, uint8_t* dest, int stride); ++#define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c ++ ++void vpx_lpf_horizontal_16_c(uint8_t* s, ++ int pitch, ++ const uint8_t* blimit, ++ const uint8_t* limit, ++ const uint8_t* thresh); ++#define vpx_lpf_horizontal_16 vpx_lpf_horizontal_16_c ++ ++void vpx_lpf_horizontal_16_dual_c(uint8_t* s, ++ int pitch, ++ const uint8_t* blimit, ++ const uint8_t* limit, ++ const uint8_t* thresh); ++#define vpx_lpf_horizontal_16_dual vpx_lpf_horizontal_16_dual_c ++ ++void vpx_lpf_horizontal_4_c(uint8_t* s, ++ int pitch, ++ const uint8_t* blimit, ++ const uint8_t* limit, ++ const uint8_t* thresh); ++#define vpx_lpf_horizontal_4 vpx_lpf_horizontal_4_c ++ ++void vpx_lpf_horizontal_4_dual_c(uint8_t* s, ++ int pitch, ++ const uint8_t* blimit0, ++ const uint8_t* limit0, ++ const uint8_t* thresh0, ++ const uint8_t* blimit1, ++ const uint8_t* limit1, ++ const uint8_t* thresh1); ++#define vpx_lpf_horizontal_4_dual vpx_lpf_horizontal_4_dual_c ++ ++void vpx_lpf_horizontal_8_c(uint8_t* s, ++ int pitch, ++ const uint8_t* blimit, ++ const uint8_t* limit, ++ const uint8_t* thresh); ++#define vpx_lpf_horizontal_8 vpx_lpf_horizontal_8_c ++ ++void vpx_lpf_horizontal_8_dual_c(uint8_t* s, ++ int pitch, ++ const uint8_t* blimit0, ++ const uint8_t* limit0, ++ const uint8_t* thresh0, ++ const uint8_t* blimit1, ++ const uint8_t* limit1, ++ const uint8_t* thresh1); ++#define vpx_lpf_horizontal_8_dual vpx_lpf_horizontal_8_dual_c ++ ++void vpx_lpf_vertical_16_c(uint8_t* s, ++ int pitch, ++ const uint8_t* blimit, ++ const uint8_t* limit, ++ const uint8_t* thresh); ++#define vpx_lpf_vertical_16 vpx_lpf_vertical_16_c ++ ++void vpx_lpf_vertical_16_dual_c(uint8_t* s, ++ int pitch, ++ const uint8_t* blimit, ++ const uint8_t* limit, ++ const uint8_t* thresh); ++#define vpx_lpf_vertical_16_dual vpx_lpf_vertical_16_dual_c ++ ++void vpx_lpf_vertical_4_c(uint8_t* s, ++ int pitch, ++ const uint8_t* blimit, ++ const uint8_t* limit, ++ const uint8_t* thresh); ++#define vpx_lpf_vertical_4 vpx_lpf_vertical_4_c ++ ++void vpx_lpf_vertical_4_dual_c(uint8_t* s, ++ int pitch, ++ const uint8_t* blimit0, ++ const uint8_t* limit0, ++ const uint8_t* thresh0, ++ const uint8_t* blimit1, ++ const uint8_t* limit1, ++ const uint8_t* thresh1); ++#define vpx_lpf_vertical_4_dual vpx_lpf_vertical_4_dual_c ++ ++void vpx_lpf_vertical_8_c(uint8_t* s, ++ int pitch, ++ const uint8_t* blimit, ++ const uint8_t* limit, ++ const uint8_t* thresh); ++#define vpx_lpf_vertical_8 vpx_lpf_vertical_8_c ++ ++void vpx_lpf_vertical_8_dual_c(uint8_t* s, ++ int pitch, ++ const uint8_t* blimit0, ++ const uint8_t* limit0, ++ const uint8_t* thresh0, ++ const uint8_t* blimit1, ++ const uint8_t* limit1, ++ const uint8_t* thresh1); ++#define vpx_lpf_vertical_8_dual vpx_lpf_vertical_8_dual_c ++ ++void vpx_mbpost_proc_across_ip_c(unsigned char* src, ++ int pitch, ++ int rows, ++ int cols, ++ int flimit); ++#define vpx_mbpost_proc_across_ip vpx_mbpost_proc_across_ip_c ++ ++void vpx_mbpost_proc_down_c(unsigned char* dst, ++ int pitch, ++ int rows, ++ int cols, ++ int flimit); ++#define vpx_mbpost_proc_down vpx_mbpost_proc_down_c ++ ++void vpx_minmax_8x8_c(const uint8_t* s, ++ int p, ++ const uint8_t* d, ++ int dp, ++ int* min, ++ int* max); ++#define vpx_minmax_8x8 vpx_minmax_8x8_c ++ ++unsigned int vpx_mse16x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_mse16x16 vpx_mse16x16_c ++ ++unsigned int vpx_mse16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_mse16x8 vpx_mse16x8_c ++ ++unsigned int vpx_mse8x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_mse8x16 vpx_mse8x16_c ++ ++unsigned int vpx_mse8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_mse8x8 vpx_mse8x8_c ++ ++void vpx_plane_add_noise_c(uint8_t* start, ++ const int8_t* noise, ++ int blackclamp, ++ int whiteclamp, ++ int width, ++ int height, ++ int pitch); ++#define vpx_plane_add_noise vpx_plane_add_noise_c ++ ++void vpx_post_proc_down_and_across_mb_row_c(unsigned char* src, ++ unsigned char* dst, ++ int src_pitch, ++ int dst_pitch, ++ int cols, ++ unsigned char* flimits, ++ int size); ++#define vpx_post_proc_down_and_across_mb_row \ ++ vpx_post_proc_down_and_across_mb_row_c ++ ++void vpx_quantize_b_c(const tran_low_t* coeff_ptr, ++ intptr_t n_coeffs, ++ int skip_block, ++ const int16_t* zbin_ptr, ++ const int16_t* round_ptr, ++ const int16_t* quant_ptr, ++ const int16_t* quant_shift_ptr, ++ tran_low_t* qcoeff_ptr, ++ tran_low_t* dqcoeff_ptr, ++ const int16_t* dequant_ptr, ++ uint16_t* eob_ptr, ++ const int16_t* scan, ++ const int16_t* iscan); ++#define vpx_quantize_b vpx_quantize_b_c ++ ++void vpx_quantize_b_32x32_c(const tran_low_t* coeff_ptr, ++ intptr_t n_coeffs, ++ int skip_block, ++ const int16_t* zbin_ptr, ++ const int16_t* round_ptr, ++ const int16_t* quant_ptr, ++ const int16_t* quant_shift_ptr, ++ tran_low_t* qcoeff_ptr, ++ tran_low_t* dqcoeff_ptr, ++ const int16_t* dequant_ptr, ++ uint16_t* eob_ptr, ++ const int16_t* scan, ++ const int16_t* iscan); ++#define vpx_quantize_b_32x32 vpx_quantize_b_32x32_c ++ ++unsigned int vpx_sad16x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_sad16x16 vpx_sad16x16_c ++ ++unsigned int vpx_sad16x16_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_sad16x16_avg vpx_sad16x16_avg_c ++ ++void vpx_sad16x16x3_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad16x16x3 vpx_sad16x16x3_c ++ ++void vpx_sad16x16x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad16x16x4d vpx_sad16x16x4d_c ++ ++void vpx_sad16x16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad16x16x8 vpx_sad16x16x8_c ++ ++unsigned int vpx_sad16x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_sad16x32 vpx_sad16x32_c ++ ++unsigned int vpx_sad16x32_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_sad16x32_avg vpx_sad16x32_avg_c ++ ++void vpx_sad16x32x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad16x32x4d vpx_sad16x32x4d_c ++ ++unsigned int vpx_sad16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_sad16x8 vpx_sad16x8_c ++ ++unsigned int vpx_sad16x8_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_sad16x8_avg vpx_sad16x8_avg_c ++ ++void vpx_sad16x8x3_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad16x8x3 vpx_sad16x8x3_c ++ ++void vpx_sad16x8x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad16x8x4d vpx_sad16x8x4d_c ++ ++void vpx_sad16x8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad16x8x8 vpx_sad16x8x8_c ++ ++unsigned int vpx_sad32x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_sad32x16 vpx_sad32x16_c ++ ++unsigned int vpx_sad32x16_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_sad32x16_avg vpx_sad32x16_avg_c ++ ++void vpx_sad32x16x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad32x16x4d vpx_sad32x16x4d_c ++ ++unsigned int vpx_sad32x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_sad32x32 vpx_sad32x32_c ++ ++unsigned int vpx_sad32x32_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_sad32x32_avg vpx_sad32x32_avg_c ++ ++void vpx_sad32x32x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad32x32x4d vpx_sad32x32x4d_c ++ ++void vpx_sad32x32x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad32x32x8 vpx_sad32x32x8_c ++ ++unsigned int vpx_sad32x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_sad32x64 vpx_sad32x64_c ++ ++unsigned int vpx_sad32x64_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_sad32x64_avg vpx_sad32x64_avg_c ++ ++void vpx_sad32x64x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad32x64x4d vpx_sad32x64x4d_c ++ ++unsigned int vpx_sad4x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_sad4x4 vpx_sad4x4_c ++ ++unsigned int vpx_sad4x4_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_sad4x4_avg vpx_sad4x4_avg_c ++ ++void vpx_sad4x4x3_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad4x4x3 vpx_sad4x4x3_c ++ ++void vpx_sad4x4x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad4x4x4d vpx_sad4x4x4d_c ++ ++void vpx_sad4x4x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad4x4x8 vpx_sad4x4x8_c ++ ++unsigned int vpx_sad4x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_sad4x8 vpx_sad4x8_c ++ ++unsigned int vpx_sad4x8_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_sad4x8_avg vpx_sad4x8_avg_c ++ ++void vpx_sad4x8x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad4x8x4d vpx_sad4x8x4d_c ++ ++unsigned int vpx_sad64x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_sad64x32 vpx_sad64x32_c ++ ++unsigned int vpx_sad64x32_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_sad64x32_avg vpx_sad64x32_avg_c ++ ++void vpx_sad64x32x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad64x32x4d vpx_sad64x32x4d_c ++ ++unsigned int vpx_sad64x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_sad64x64 vpx_sad64x64_c ++ ++unsigned int vpx_sad64x64_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_sad64x64_avg vpx_sad64x64_avg_c ++ ++void vpx_sad64x64x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad64x64x4d vpx_sad64x64x4d_c ++ ++unsigned int vpx_sad8x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_sad8x16 vpx_sad8x16_c ++ ++unsigned int vpx_sad8x16_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_sad8x16_avg vpx_sad8x16_avg_c ++ ++void vpx_sad8x16x3_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad8x16x3 vpx_sad8x16x3_c ++ ++void vpx_sad8x16x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad8x16x4d vpx_sad8x16x4d_c ++ ++void vpx_sad8x16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad8x16x8 vpx_sad8x16x8_c ++ ++unsigned int vpx_sad8x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_sad8x4 vpx_sad8x4_c ++ ++unsigned int vpx_sad8x4_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_sad8x4_avg vpx_sad8x4_avg_c ++ ++void vpx_sad8x4x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad8x4x4d vpx_sad8x4x4d_c ++ ++unsigned int vpx_sad8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride); ++#define vpx_sad8x8 vpx_sad8x8_c ++ ++unsigned int vpx_sad8x8_avg_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ const uint8_t* second_pred); ++#define vpx_sad8x8_avg vpx_sad8x8_avg_c ++ ++void vpx_sad8x8x3_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad8x8x3 vpx_sad8x8x3_c ++ ++void vpx_sad8x8x4d_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* const ref_array[], ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad8x8x4d vpx_sad8x8x4d_c ++ ++void vpx_sad8x8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sad_array); ++#define vpx_sad8x8x8 vpx_sad8x8x8_c ++ ++int vpx_satd_c(const tran_low_t* coeff, int length); ++#define vpx_satd vpx_satd_c ++ ++void vpx_scaled_2d_c(const uint8_t* src, ++ ptrdiff_t src_stride, ++ uint8_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h); ++#define vpx_scaled_2d vpx_scaled_2d_c ++ ++void vpx_scaled_avg_2d_c(const uint8_t* src, ++ ptrdiff_t src_stride, ++ uint8_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h); ++#define vpx_scaled_avg_2d vpx_scaled_avg_2d_c ++ ++void vpx_scaled_avg_horiz_c(const uint8_t* src, ++ ptrdiff_t src_stride, ++ uint8_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h); ++#define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c ++ ++void vpx_scaled_avg_vert_c(const uint8_t* src, ++ ptrdiff_t src_stride, ++ uint8_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h); ++#define vpx_scaled_avg_vert vpx_scaled_avg_vert_c ++ ++void vpx_scaled_horiz_c(const uint8_t* src, ++ ptrdiff_t src_stride, ++ uint8_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h); ++#define vpx_scaled_horiz vpx_scaled_horiz_c ++ ++void vpx_scaled_vert_c(const uint8_t* src, ++ ptrdiff_t src_stride, ++ uint8_t* dst, ++ ptrdiff_t dst_stride, ++ const InterpKernel* filter, ++ int x0_q4, ++ int x_step_q4, ++ int y0_q4, ++ int y_step_q4, ++ int w, ++ int h); ++#define vpx_scaled_vert vpx_scaled_vert_c ++ ++uint32_t vpx_sub_pixel_avg_variance16x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_sub_pixel_avg_variance16x16 vpx_sub_pixel_avg_variance16x16_c ++ ++uint32_t vpx_sub_pixel_avg_variance16x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_sub_pixel_avg_variance16x32 vpx_sub_pixel_avg_variance16x32_c ++ ++uint32_t vpx_sub_pixel_avg_variance16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_sub_pixel_avg_variance16x8 vpx_sub_pixel_avg_variance16x8_c ++ ++uint32_t vpx_sub_pixel_avg_variance32x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_sub_pixel_avg_variance32x16 vpx_sub_pixel_avg_variance32x16_c ++ ++uint32_t vpx_sub_pixel_avg_variance32x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_sub_pixel_avg_variance32x32 vpx_sub_pixel_avg_variance32x32_c ++ ++uint32_t vpx_sub_pixel_avg_variance32x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_sub_pixel_avg_variance32x64 vpx_sub_pixel_avg_variance32x64_c ++ ++uint32_t vpx_sub_pixel_avg_variance4x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_sub_pixel_avg_variance4x4 vpx_sub_pixel_avg_variance4x4_c ++ ++uint32_t vpx_sub_pixel_avg_variance4x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_sub_pixel_avg_variance4x8 vpx_sub_pixel_avg_variance4x8_c ++ ++uint32_t vpx_sub_pixel_avg_variance64x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_sub_pixel_avg_variance64x32 vpx_sub_pixel_avg_variance64x32_c ++ ++uint32_t vpx_sub_pixel_avg_variance64x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_sub_pixel_avg_variance64x64 vpx_sub_pixel_avg_variance64x64_c ++ ++uint32_t vpx_sub_pixel_avg_variance8x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_sub_pixel_avg_variance8x16 vpx_sub_pixel_avg_variance8x16_c ++ ++uint32_t vpx_sub_pixel_avg_variance8x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_sub_pixel_avg_variance8x4 vpx_sub_pixel_avg_variance8x4_c ++ ++uint32_t vpx_sub_pixel_avg_variance8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse, ++ const uint8_t* second_pred); ++#define vpx_sub_pixel_avg_variance8x8 vpx_sub_pixel_avg_variance8x8_c ++ ++uint32_t vpx_sub_pixel_variance16x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_sub_pixel_variance16x16 vpx_sub_pixel_variance16x16_c ++ ++uint32_t vpx_sub_pixel_variance16x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_sub_pixel_variance16x32 vpx_sub_pixel_variance16x32_c ++ ++uint32_t vpx_sub_pixel_variance16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_sub_pixel_variance16x8 vpx_sub_pixel_variance16x8_c ++ ++uint32_t vpx_sub_pixel_variance32x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_sub_pixel_variance32x16 vpx_sub_pixel_variance32x16_c ++ ++uint32_t vpx_sub_pixel_variance32x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_sub_pixel_variance32x32 vpx_sub_pixel_variance32x32_c ++ ++uint32_t vpx_sub_pixel_variance32x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_sub_pixel_variance32x64 vpx_sub_pixel_variance32x64_c ++ ++uint32_t vpx_sub_pixel_variance4x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_sub_pixel_variance4x4 vpx_sub_pixel_variance4x4_c ++ ++uint32_t vpx_sub_pixel_variance4x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_sub_pixel_variance4x8 vpx_sub_pixel_variance4x8_c ++ ++uint32_t vpx_sub_pixel_variance64x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_sub_pixel_variance64x32 vpx_sub_pixel_variance64x32_c ++ ++uint32_t vpx_sub_pixel_variance64x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_sub_pixel_variance64x64 vpx_sub_pixel_variance64x64_c ++ ++uint32_t vpx_sub_pixel_variance8x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_sub_pixel_variance8x16 vpx_sub_pixel_variance8x16_c ++ ++uint32_t vpx_sub_pixel_variance8x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_sub_pixel_variance8x4 vpx_sub_pixel_variance8x4_c ++ ++uint32_t vpx_sub_pixel_variance8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ int x_offset, ++ int y_offset, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ uint32_t* sse); ++#define vpx_sub_pixel_variance8x8 vpx_sub_pixel_variance8x8_c ++ ++void vpx_subtract_block_c(int rows, ++ int cols, ++ int16_t* diff_ptr, ++ ptrdiff_t diff_stride, ++ const uint8_t* src_ptr, ++ ptrdiff_t src_stride, ++ const uint8_t* pred_ptr, ++ ptrdiff_t pred_stride); ++#define vpx_subtract_block vpx_subtract_block_c ++ ++uint64_t vpx_sum_squares_2d_i16_c(const int16_t* src, int stride, int size); ++#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_c ++ ++void vpx_tm_predictor_16x16_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_tm_predictor_16x16 vpx_tm_predictor_16x16_c ++ ++void vpx_tm_predictor_32x32_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_tm_predictor_32x32 vpx_tm_predictor_32x32_c ++ ++void vpx_tm_predictor_4x4_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_tm_predictor_4x4 vpx_tm_predictor_4x4_c ++ ++void vpx_tm_predictor_8x8_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_tm_predictor_8x8 vpx_tm_predictor_8x8_c ++ ++void vpx_v_predictor_16x16_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_v_predictor_16x16 vpx_v_predictor_16x16_c ++ ++void vpx_v_predictor_32x32_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_v_predictor_32x32 vpx_v_predictor_32x32_c ++ ++void vpx_v_predictor_4x4_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_v_predictor_4x4 vpx_v_predictor_4x4_c ++ ++void vpx_v_predictor_8x8_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_v_predictor_8x8 vpx_v_predictor_8x8_c ++ ++unsigned int vpx_variance16x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_variance16x16 vpx_variance16x16_c ++ ++unsigned int vpx_variance16x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_variance16x32 vpx_variance16x32_c ++ ++unsigned int vpx_variance16x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_variance16x8 vpx_variance16x8_c ++ ++unsigned int vpx_variance32x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_variance32x16 vpx_variance32x16_c ++ ++unsigned int vpx_variance32x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_variance32x32 vpx_variance32x32_c ++ ++unsigned int vpx_variance32x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_variance32x64 vpx_variance32x64_c ++ ++unsigned int vpx_variance4x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_variance4x4 vpx_variance4x4_c ++ ++unsigned int vpx_variance4x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_variance4x8 vpx_variance4x8_c ++ ++unsigned int vpx_variance64x32_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_variance64x32 vpx_variance64x32_c ++ ++unsigned int vpx_variance64x64_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_variance64x64 vpx_variance64x64_c ++ ++unsigned int vpx_variance8x16_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_variance8x16 vpx_variance8x16_c ++ ++unsigned int vpx_variance8x4_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_variance8x4 vpx_variance8x4_c ++ ++unsigned int vpx_variance8x8_c(const uint8_t* src_ptr, ++ int src_stride, ++ const uint8_t* ref_ptr, ++ int ref_stride, ++ unsigned int* sse); ++#define vpx_variance8x8 vpx_variance8x8_c ++ ++void vpx_ve_predictor_4x4_c(uint8_t* dst, ++ ptrdiff_t stride, ++ const uint8_t* above, ++ const uint8_t* left); ++#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c ++ ++int vpx_vector_var_c(const int16_t* ref, const int16_t* src, const int bwl); ++#define vpx_vector_var vpx_vector_var_c ++ ++void vpx_dsp_rtcd(void); ++ ++#include "vpx_config.h" ++ ++#ifdef RTCD_C ++static void setup_rtcd_internal(void) {} ++#endif ++ ++#ifdef __cplusplus ++} // extern "C" ++#endif ++ ++#endif +diff --git a/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_scale_rtcd.h b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_scale_rtcd.h +new file mode 100644 +index 00000000000..c5196db4dc6 +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/libvpx/source/config/linux/la64/vpx_scale_rtcd.h +@@ -0,0 +1,96 @@ ++// This file is generated. Do not edit. ++#ifndef VPX_SCALE_RTCD_H_ ++#define VPX_SCALE_RTCD_H_ ++ ++#ifdef RTCD_C ++#define RTCD_EXTERN ++#else ++#define RTCD_EXTERN extern ++#endif ++ ++struct yv12_buffer_config; ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++void vp8_horizontal_line_2_1_scale_c(const unsigned char* source, ++ unsigned int source_width, ++ unsigned char* dest, ++ unsigned int dest_width); ++#define vp8_horizontal_line_2_1_scale vp8_horizontal_line_2_1_scale_c ++ ++void vp8_horizontal_line_5_3_scale_c(const unsigned char* source, ++ unsigned int source_width, ++ unsigned char* dest, ++ unsigned int dest_width); ++#define vp8_horizontal_line_5_3_scale vp8_horizontal_line_5_3_scale_c ++ ++void vp8_horizontal_line_5_4_scale_c(const unsigned char* source, ++ unsigned int source_width, ++ unsigned char* dest, ++ unsigned int dest_width); ++#define vp8_horizontal_line_5_4_scale vp8_horizontal_line_5_4_scale_c ++ ++void vp8_vertical_band_2_1_scale_c(unsigned char* source, ++ unsigned int src_pitch, ++ unsigned char* dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width); ++#define vp8_vertical_band_2_1_scale vp8_vertical_band_2_1_scale_c ++ ++void vp8_vertical_band_2_1_scale_i_c(unsigned char* source, ++ unsigned int src_pitch, ++ unsigned char* dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width); ++#define vp8_vertical_band_2_1_scale_i vp8_vertical_band_2_1_scale_i_c ++ ++void vp8_vertical_band_5_3_scale_c(unsigned char* source, ++ unsigned int src_pitch, ++ unsigned char* dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width); ++#define vp8_vertical_band_5_3_scale vp8_vertical_band_5_3_scale_c ++ ++void vp8_vertical_band_5_4_scale_c(unsigned char* source, ++ unsigned int src_pitch, ++ unsigned char* dest, ++ unsigned int dest_pitch, ++ unsigned int dest_width); ++#define vp8_vertical_band_5_4_scale vp8_vertical_band_5_4_scale_c ++ ++void vp8_yv12_copy_frame_c(const struct yv12_buffer_config* src_ybc, ++ struct yv12_buffer_config* dst_ybc); ++#define vp8_yv12_copy_frame vp8_yv12_copy_frame_c ++ ++void vp8_yv12_extend_frame_borders_c(struct yv12_buffer_config* ybf); ++#define vp8_yv12_extend_frame_borders vp8_yv12_extend_frame_borders_c ++ ++void vpx_extend_frame_borders_c(struct yv12_buffer_config* ybf); ++#define vpx_extend_frame_borders vpx_extend_frame_borders_c ++ ++void vpx_extend_frame_inner_borders_c(struct yv12_buffer_config* ybf); ++#define vpx_extend_frame_inner_borders vpx_extend_frame_inner_borders_c ++ ++void vpx_yv12_copy_frame_c(const struct yv12_buffer_config* src_ybc, ++ struct yv12_buffer_config* dst_ybc); ++#define vpx_yv12_copy_frame vpx_yv12_copy_frame_c ++ ++void vpx_yv12_copy_y_c(const struct yv12_buffer_config* src_ybc, ++ struct yv12_buffer_config* dst_ybc); ++#define vpx_yv12_copy_y vpx_yv12_copy_y_c ++ ++void vpx_scale_rtcd(void); ++ ++#include "vpx_config.h" ++ ++#ifdef RTCD_C ++static void setup_rtcd_internal(void) {} ++#endif ++ ++#ifdef __cplusplus ++} // extern "C" ++#endif ++ ++#endif +diff --git a/src/3rdparty/chromium/third_party/lss/linux_syscall_support.h b/src/3rdparty/chromium/third_party/lss/linux_syscall_support.h +index d2baee9d243..fed8f2ed2c2 100644 +--- a/src/3rdparty/chromium/third_party/lss/linux_syscall_support.h ++++ b/src/3rdparty/chromium/third_party/lss/linux_syscall_support.h +@@ -88,7 +88,7 @@ + */ + #if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \ + defined(__mips__) || defined(__PPC__) || defined(__ARM_EABI__) || \ +- defined(__aarch64__) || defined(__s390__)) \ ++ defined(__aarch64__) || defined(__s390__) || defined(__loongarch64)) \ + && (defined(__linux) || defined(__ANDROID__)) + + #ifndef SYS_CPLUSPLUS +@@ -299,7 +299,7 @@ struct kernel_old_sigaction { + } __attribute__((packed,aligned(4))); + #elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) + #define kernel_old_sigaction kernel_sigaction +-#elif defined(__aarch64__) ++#elif defined(__aarch64__) || defined(__loongarch64) + // No kernel_old_sigaction defined for arm64. + #endif + +@@ -312,7 +312,7 @@ struct kernel_old_sigaction { + * actual number of signals is obviously the same, but the constants + * differ by one. + */ +-#ifdef __mips__ ++#if defined(__mips__) || defined(__loongarch64) + #define KERNEL_NSIG 128 + #else + #define KERNEL_NSIG 64 +@@ -517,7 +517,7 @@ struct kernel_stat { + int st_blocks; + int st_pad4[14]; + }; +-#elif defined(__aarch64__) ++#elif defined(__aarch64__) || defined(__loongarch64) + struct kernel_stat { + unsigned long st_dev; + unsigned long st_ino; +@@ -604,7 +604,7 @@ struct kernel_statfs64 { + unsigned long f_spare[6]; + }; + #endif +-#elif defined(__s390__) ++#elif defined(__s390__) || defined(__loongarch64) + /* See also arch/s390/include/asm/compat.h */ + struct kernel_statfs64 { + unsigned int f_type; +@@ -668,7 +668,7 @@ struct kernel_statfs { + uint64_t f_frsize; + uint64_t f_spare[5]; + }; +-#elif defined(__s390__) ++#elif defined(__s390__) || defined(__loongarch64) + struct kernel_statfs { + unsigned int f_type; + unsigned int f_bsize; +@@ -1057,7 +1057,7 @@ struct kernel_statfs { + #define __NR_getcpu (__NR_SYSCALL_BASE + 345) + #endif + /* End of ARM 3/EABI definitions */ +-#elif defined(__aarch64__) ++#elif defined(__aarch64__) || defined(__loongarch64) + #ifndef __NR_setxattr + #define __NR_setxattr 5 + #endif +@@ -1860,7 +1860,8 @@ struct kernel_statfs { + + #undef LSS_RETURN + #if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) \ +- || defined(__ARM_EABI__) || defined(__aarch64__) || defined(__s390__)) ++ || defined(__ARM_EABI__) || defined(__aarch64__) || defined(__s390__) \ ++ || defined(__loongarch64)) + /* Failing system calls return a negative result in the range of + * -1..-4095. These are "errno" values with the sign inverted. + */ +@@ -1960,7 +1961,7 @@ struct kernel_statfs { + LSS_ENTRYPOINT \ + "pop %%ebx" \ + args \ +- : "esp", "memory"); \ ++ : "memory"); \ + LSS_RETURN(type,__res) + #undef _syscall0 + #define _syscall0(type,name) \ +@@ -2017,7 +2018,7 @@ struct kernel_statfs { + : "i" (__NR_##name), "ri" ((long)(arg1)), \ + "c" ((long)(arg2)), "d" ((long)(arg3)), \ + "S" ((long)(arg4)), "D" ((long)(arg5)) \ +- : "esp", "memory"); \ ++ : "memory"); \ + LSS_RETURN(type,__res); \ + } + #undef _syscall6 +@@ -2039,7 +2040,7 @@ struct kernel_statfs { + : "i" (__NR_##name), "0" ((long)(&__s)), \ + "c" ((long)(arg2)), "d" ((long)(arg3)), \ + "S" ((long)(arg4)), "D" ((long)(arg5)) \ +- : "esp", "memory"); \ ++ : "memory"); \ + LSS_RETURN(type,__res); \ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, +@@ -2125,7 +2126,7 @@ struct kernel_statfs { + : "0"(-EINVAL), "i"(__NR_clone), + "m"(fn), "m"(child_stack), "m"(flags), "m"(arg), + "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr) +- : "esp", "memory", "ecx", "edx", "esi", "edi"); ++ : "memory", "ecx", "edx", "esi", "edi"); + LSS_RETURN(int, __res); + } + +@@ -2813,6 +2814,126 @@ struct kernel_statfs { + } + LSS_RETURN(int, __res); + } ++ #elif defined(__loongarch64) ++ /* Most definitions of _syscallX() neglect to mark "memory" as being ++ * clobbered. This causes problems with compilers, that do a better job ++ * at optimizing across __asm__ calls. ++ * So, we just have to redefine all of the _syscallX() macros. ++ */ ++ #undef LSS_REG ++ #define LSS_REG(ar,a) register int64_t __a##ar __asm__("a"#ar) = (int64_t)a ++ #undef LSS_BODY ++ #define LSS_BODY(type,name,args...) \ ++ register int64_t __res_a0 __asm__("a0"); \ ++ int64_t __res; \ ++ __asm__ __volatile__ ("li $a7, %1\n" \ ++ "syscall 0x0\n" \ ++ : "=r"(__res_a0) \ ++ : "i"(__NR_##name) , ## args \ ++ : "$a7", "memory"); \ ++ __res = __res_a0; \ ++ LSS_RETURN(type, __res) ++ #undef _syscall0 ++ #define _syscall0(type, name) \ ++ type LSS_NAME(name)(void) { \ ++ LSS_BODY(type, name); \ ++ } ++ #undef _syscall1 ++ #define _syscall1(type, name, type1, arg1) \ ++ type LSS_NAME(name)(type1 arg1) { \ ++ LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__a0)); \ ++ } ++ #undef _syscall2 ++ #define _syscall2(type, name, type1, arg1, type2, arg2) \ ++ type LSS_NAME(name)(type1 arg1, type2 arg2) { \ ++ LSS_REG(0, arg1); LSS_REG(1, arg2); \ ++ LSS_BODY(type, name, "r"(__a0), "r"(__a1)); \ ++ } ++ #undef _syscall3 ++ #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ ++ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ ++ LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ ++ LSS_BODY(type, name, "r"(__a0), "r"(__a1), "r"(__a2)); \ ++ } ++ #undef _syscall4 ++ #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ ++ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ ++ LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ ++ LSS_REG(3, arg4); \ ++ LSS_BODY(type, name, "r"(__a0), "r"(__a1), "r"(__a2), "r"(__a3)); \ ++ } ++ #undef _syscall5 ++ #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ ++ type5,arg5) \ ++ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ ++ type5 arg5) { \ ++ LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ ++ LSS_REG(3, arg4); LSS_REG(4, arg5); \ ++ LSS_BODY(type, name, "r"(__a0), "r"(__a1), "r"(__a2), "r"(__a3), \ ++ "r"(__a4)); \ ++ } ++ #undef _syscall6 ++ #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ ++ type5,arg5,type6,arg6) \ ++ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ ++ type5 arg5, type6 arg6) { \ ++ LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ ++ LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6); \ ++ LSS_BODY(type, name, "r"(__a0), "r"(__a1), "r"(__a2), "r"(__a3), \ ++ "r"(__a4), "r"(__a5)); \ ++ } ++ ++ LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, ++ int flags, void *arg, int *parent_tidptr, ++ void *newtls, int *child_tidptr) { ++ int64_t __res; ++ { ++ register uint64_t __flags __asm__("a0") = flags; ++ register void *__stack __asm__("a1") = child_stack; ++ register void *__ptid __asm__("a2") = parent_tidptr; ++ register void *__tls __asm__("a3") = newtls; ++ register int *__ctid __asm__("a4") = child_tidptr; ++ __asm__ __volatile__(/* Push "arg" and "fn" onto the stack that will be ++ * used by the child. ++ */ ++ "sub.d $%2, 16\n" ++ "st.d %1, %2, 8\n" ++ "st.d %4, %2, 0\n" ++ ++ /* %a0 = syscall(%a0 = flags, ++ * %a1 = child_stack, ++ * %a2 = parent_tidptr, ++ * %a3 = newtls, ++ * %a4 = child_tidptr) ++ */ ++ "li a7, %8\n" ++ "syscall 0x0\n" ++ ++ /* if (%a0 != 0) ++ * return %a0; ++ */ ++ "bnz a0, 1f\n" ++ ++ /* In the child, now. Call "fn(arg)". ++ */ ++ "ld.d a0, $sp, 0\n" ++ "ld.d a1, $sp, 8\n" ++ "add.d $sp, 16\n" ++ "bl $a1\n" ++ ++ /* Call _exit(%a0). ++ */ ++ "li $a7, %9\n" ++ "syscall 0x0\n" ++ "1:\n" ++ : "=r" (__res) ++ : "r"(fn), "r"(__stack), "r"(__flags), "r"(arg), ++ "r"(__ptid), "r"(__tls), "r"(__ctid), ++ "i"(__NR_clone), "i"(__NR_exit) ++ : "cc", "a7", "memory"); ++ } ++ LSS_RETURN(int, __res); ++ } + #elif defined(__mips__) + #undef LSS_REG + #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) = \ +@@ -3396,9 +3517,10 @@ struct kernel_statfs { + LSS_INLINE _syscall2(int, ftruncate, int, f, + off_t, l) + #endif +- LSS_INLINE _syscall4(int, futex, int*, a, +- int, o, int, v, +- struct kernel_timespec*, t) ++ LSS_INLINE _syscall6(int, futex, int*, u, ++ int, o, int, v, ++ struct kernel_timespec*, t, ++ int*, u2, int, v2) + LSS_INLINE _syscall3(int, getdents, int, f, + struct kernel_dirent*, d, int, c) + LSS_INLINE _syscall3(int, getdents64, int, f, +@@ -4156,7 +4278,7 @@ struct kernel_statfs { + LSS_SC_BODY(4, int, 8, d, type, protocol, sv); + } + #endif +- #if defined(__ARM_EABI__) || defined (__aarch64__) ++ #if defined(__ARM_EABI__) || defined (__aarch64__) || defined (__loongarch64) + LSS_INLINE _syscall3(ssize_t, recvmsg, int, s, struct kernel_msghdr*, msg, + int, flags) + LSS_INLINE _syscall3(ssize_t, sendmsg, int, s, const struct kernel_msghdr*, +@@ -4478,7 +4600,7 @@ struct kernel_statfs { + // TODO: define this in an arch-independant way instead of inlining the clone + // syscall body. + +-# if defined(__aarch64__) ++# if defined(__aarch64__) || defined(__loongarch64) + LSS_INLINE pid_t LSS_NAME(fork)(void) { + // No fork syscall on aarch64 - implement by means of the clone syscall. + // Note that this does not reset glibc's cached view of the PID/TID, so +@@ -4494,8 +4616,8 @@ struct kernel_statfs { + LSS_REG(2, parent_tidptr); + LSS_REG(3, newtls); + LSS_REG(4, child_tidptr); +- LSS_BODY(pid_t, clone, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), +- "r"(__r4)); ++ LSS_BODY(pid_t, clone, "r"(__a0), "r"(__a1), "r"(__a2), "r"(__a3), ++ "r"(__a4)); + } + # elif defined(__x86_64__) + LSS_INLINE pid_t LSS_NAME(fork)(void) { +diff --git a/src/3rdparty/chromium/third_party/pdfium/third_party/base/allocator/partition_allocator/page_allocator_constants.h b/src/3rdparty/chromium/third_party/pdfium/third_party/base/allocator/partition_allocator/page_allocator_constants.h +index 567e3a3b91c..2208d4f207a 100644 +--- a/src/3rdparty/chromium/third_party/pdfium/third_party/base/allocator/partition_allocator/page_allocator_constants.h ++++ b/src/3rdparty/chromium/third_party/pdfium/third_party/base/allocator/partition_allocator/page_allocator_constants.h +@@ -13,7 +13,7 @@ namespace pdfium { + namespace base { + #if defined(OS_WIN) || defined(ARCH_CPU_PPC64) + static constexpr size_t kPageAllocationGranularityShift = 16; // 64KB +-#elif defined(_MIPS_ARCH_LOONGSON) ++#elif defined(_MIPS_ARCH_LOONGSON) || defined(__loongarch__) + static constexpr size_t kPageAllocationGranularityShift = 14; // 16KB + #else + static constexpr size_t kPageAllocationGranularityShift = 12; // 4KB +@@ -25,7 +25,7 @@ static constexpr size_t kPageAllocationGranularityOffsetMask = + static constexpr size_t kPageAllocationGranularityBaseMask = + ~kPageAllocationGranularityOffsetMask; + +-#if defined(_MIPS_ARCH_LOONGSON) ++#if defined(_MIPS_ARCH_LOONGSON) || defined(__loongarch__) + static constexpr size_t kSystemPageSize = 16384; + #elif defined(ARCH_CPU_PPC64) + // Modern ppc64 systems support 4KB and 64KB page sizes. +diff --git a/src/3rdparty/chromium/third_party/pdfium/third_party/base/allocator/partition_allocator/partition_alloc_constants.h b/src/3rdparty/chromium/third_party/pdfium/third_party/base/allocator/partition_allocator/partition_alloc_constants.h +index 8ebc4f5bac7..600dbdb5134 100644 +--- a/src/3rdparty/chromium/third_party/pdfium/third_party/base/allocator/partition_allocator/partition_alloc_constants.h ++++ b/src/3rdparty/chromium/third_party/pdfium/third_party/base/allocator/partition_allocator/partition_alloc_constants.h +@@ -35,7 +35,7 @@ static const size_t kBucketShift = (kAllocationGranularity == 8) ? 3 : 2; + // other constant values, we pack _all_ `PartitionRootGeneric::Alloc` sizes + // perfectly up against the end of a system page. + +-#if defined(_MIPS_ARCH_LOONGSON) ++#if defined(_MIPS_ARCH_LOONGSON) || defined(__loongarch__) + static const size_t kPartitionPageShift = 16; // 64 KiB + #elif defined(ARCH_CPU_PPC64) + static const size_t kPartitionPageShift = 18; // 256 KiB +diff --git a/src/3rdparty/chromium/third_party/swiftshader/src/Reactor/BUILD.gn b/src/3rdparty/chromium/third_party/swiftshader/src/Reactor/BUILD.gn +index 1154dba4288..403d53aaae5 100644 +--- a/src/3rdparty/chromium/third_party/swiftshader/src/Reactor/BUILD.gn ++++ b/src/3rdparty/chromium/third_party/swiftshader/src/Reactor/BUILD.gn +@@ -19,7 +19,7 @@ declare_args() { + # PPC64. + use_swiftshader_with_subzero = + current_cpu != "arm64" && current_cpu != "mips64el" && current_cpu != "ppc64" +- supports_llvm = is_linux || is_fuchsia || is_win || is_android || is_mac ++ supports_llvm = (is_linux || is_fuchsia || is_win || is_android || is_mac) && current_cpu != "la64" + } + + config("swiftshader_reactor_private_config") { +diff --git a/src/3rdparty/chromium/third_party/swiftshader/src/Reactor/SubzeroReactor.cpp b/src/3rdparty/chromium/third_party/swiftshader/src/Reactor/SubzeroReactor.cpp +index 522b5668772..f34d37b73d5 100644 +--- a/src/3rdparty/chromium/third_party/swiftshader/src/Reactor/SubzeroReactor.cpp ++++ b/src/3rdparty/chromium/third_party/swiftshader/src/Reactor/SubzeroReactor.cpp +@@ -324,6 +324,8 @@ private: + return false; + #elif defined(__mips__) + return false; ++#elif defined(__loongarch__) ++ return false; + #else + # error "Unknown architecture" + #endif +@@ -665,6 +667,7 @@ std::vector loadImage(uint8_t *const elfImage, const std::vectore_machine == EM_AARCH64); + #elif defined(__mips__) + ASSERT(sizeof(void *) == 4 && elfHeader->e_machine == EM_MIPS); ++#elif defined(__loongarch__) + #else + # error "Unsupported platform" + #endif +diff --git a/src/3rdparty/chromium/third_party/swiftshader/third_party/marl/src/osfiber_asm_la64.S b/src/3rdparty/chromium/third_party/swiftshader/third_party/marl/src/osfiber_asm_la64.S +new file mode 100644 +index 00000000000..a41e0be09e3 +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/swiftshader/third_party/marl/src/osfiber_asm_la64.S +@@ -0,0 +1,86 @@ ++// Copyright 2020 The Marl Authors. ++// ++// Licensed under the Apache License, Version 2.0 (the "License"); ++// you may not use this file except in compliance with the License. ++// You may obtain a copy of the License at ++// ++// https://www.apache.org/licenses/LICENSE-2.0 ++// ++// Unless required by applicable law or agreed to in writing, software ++// distributed under the License is distributed on an "AS IS" BASIS, ++// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++// See the License for the specific language governing permissions and ++// limitations under the License. ++ ++#if defined(__loongarch__) ++ ++#define MARL_BUILD_ASM 1 ++#include "osfiber_asm_la64.h" ++ ++// void marl_fiber_swap(marl_fiber_context* from, const marl_fiber_context* to) ++// a0: from ++// v0: to ++.text ++.global MARL_ASM_SYMBOL(marl_fiber_swap) ++.align 4 ++MARL_ASM_SYMBOL(marl_fiber_swap): ++ ++ // Save context 'from' ++ ++ // Store callee-preserved registers ++ sd $s0, MARL_REG_s0($a0) ++ sd $s1, MARL_REG_s1($a0) ++ sd $s2, MARL_REG_s2($a0) ++ sd $s3, MARL_REG_s3($a0) ++ sd $s4, MARL_REG_s4($a0) ++ sd $s5, MARL_REG_s5($a0) ++ sd $s6, MARL_REG_s6($a0) ++ sd $s7, MARL_REG_s7($a0) ++ ++ s.d $f24, MARL_REG_f24($a0) ++ s.d $f25, MARL_REG_f25($a0) ++ s.d $f26, MARL_REG_f26($a0) ++ s.d $f27, MARL_REG_f27($a0) ++ s.d $f28, MARL_REG_f28($a0) ++ s.d $f29, MARL_REG_f29($a0) ++ s.d $f31, MARL_REG_f30($a0) ++ s.d $f31, MARL_REG_f31($a0) ++ ++ sd $gp, MARL_REG_gp($a0) ++ sd $sp, MARL_REG_sp($a0) ++ sd $fp, MARL_REG_fp($a0) ++ sd $ra, MARL_REG_ra($a0) ++ ++ move $v0, $a1 // Function have no return, so safe to touch v0 ++ ++ // Recover callee-preserved registers ++ ld $s0, MARL_REG_s0($v0) ++ ld $s1, MARL_REG_s1($v0) ++ ld $s2, MARL_REG_s2($v0) ++ ld $s3, MARL_REG_s3($v0) ++ ld $s4, MARL_REG_s4($v0) ++ ld $s5, MARL_REG_s5($v0) ++ ld $s6, MARL_REG_s6($v0) ++ ld $s7, MARL_REG_s7($v0) ++ ++ l.d $f24, MARL_REG_f24($v0) ++ l.d $f25, MARL_REG_f25($v0) ++ l.d $f26, MARL_REG_f26($v0) ++ l.d $f27, MARL_REG_f27($v0) ++ l.d $f28, MARL_REG_f28($v0) ++ l.d $f29, MARL_REG_f29($v0) ++ l.d $f31, MARL_REG_f30($v0) ++ l.d $f31, MARL_REG_f31($v0) ++ ++ ld $gp, MARL_REG_gp($v0) ++ ld $sp, MARL_REG_sp($v0) ++ ld $fp, MARL_REG_fp($v0) ++ ld $ra, MARL_REG_ra($v0) ++ ++ // Recover arguments ++ ld $a0, MARL_REG_a0($v0) ++ ld $a1, MARL_REG_a1($v0) ++ ++ jr $ra ++ ++#endif // defined(__loongarch__) +diff --git a/src/3rdparty/chromium/third_party/swiftshader/third_party/marl/src/osfiber_asm_la64.h b/src/3rdparty/chromium/third_party/swiftshader/third_party/marl/src/osfiber_asm_la64.h +new file mode 100644 +index 00000000000..e444e1c78bf +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/swiftshader/third_party/marl/src/osfiber_asm_la64.h +@@ -0,0 +1,126 @@ ++// Copyright 2020 The Marl Authors. ++// ++// Licensed under the Apache License, Version 2.0 (the "License"); ++// you may not use this file except in compliance with the License. ++// You may obtain a copy of the License at ++// ++// https://www.apache.org/licenses/LICENSE-2.0 ++// ++// Unless required by applicable law or agreed to in writing, software ++// distributed under the License is distributed on an "AS IS" BASIS, ++// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++// See the License for the specific language governing permissions and ++// limitations under the License. ++ ++#define MARL_REG_a0 0x00 ++#define MARL_REG_a1 0x08 ++#define MARL_REG_s0 0x10 ++#define MARL_REG_s1 0x18 ++#define MARL_REG_s2 0x20 ++#define MARL_REG_s3 0x28 ++#define MARL_REG_s4 0x30 ++#define MARL_REG_s5 0x38 ++#define MARL_REG_s6 0x40 ++#define MARL_REG_s7 0x48 ++#define MARL_REG_f24 0x50 ++#define MARL_REG_f25 0x58 ++#define MARL_REG_f26 0x60 ++#define MARL_REG_f27 0x68 ++#define MARL_REG_f28 0x70 ++#define MARL_REG_f29 0x78 ++#define MARL_REG_f30 0x80 ++#define MARL_REG_f31 0x88 ++#define MARL_REG_gp 0x90 ++#define MARL_REG_sp 0x98 ++#define MARL_REG_fp 0xa0 ++#define MARL_REG_ra 0xa8 ++ ++#if defined(__APPLE__) ++#define MARL_ASM_SYMBOL(x) _##x ++#else ++#define MARL_ASM_SYMBOL(x) x ++#endif ++ ++#ifndef MARL_BUILD_ASM ++ ++#include ++ ++struct marl_fiber_context { ++ // parameter registers (First two) ++ uintptr_t a0; ++ uintptr_t a1; ++ ++ // callee-saved registers ++ uintptr_t s0; ++ uintptr_t s1; ++ uintptr_t s2; ++ uintptr_t s3; ++ uintptr_t s4; ++ uintptr_t s5; ++ uintptr_t s6; ++ uintptr_t s7; ++ ++ uintptr_t f24; ++ uintptr_t f25; ++ uintptr_t f26; ++ uintptr_t f27; ++ uintptr_t f28; ++ uintptr_t f29; ++ uintptr_t f30; ++ uintptr_t f31; ++ ++ uintptr_t gp; ++ uintptr_t sp; ++ uintptr_t fp; ++ uintptr_t ra; ++}; ++ ++#ifdef __cplusplus ++#include ++static_assert(offsetof(marl_fiber_context, a0) == MARL_REG_a0, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, a1) == MARL_REG_a1, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, s0) == MARL_REG_s0, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, s1) == MARL_REG_s1, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, s2) == MARL_REG_s2, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, s3) == MARL_REG_s3, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, s4) == MARL_REG_s4, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, s5) == MARL_REG_s5, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, s6) == MARL_REG_s6, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, s7) == MARL_REG_s7, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, f24) == MARL_REG_f24, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, f25) == MARL_REG_f25, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, f26) == MARL_REG_f26, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, f27) == MARL_REG_f27, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, f28) == MARL_REG_f28, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, f29) == MARL_REG_f29, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, f30) == MARL_REG_f30, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, f31) == MARL_REG_f31, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, gp) == MARL_REG_gp, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, sp) == MARL_REG_sp, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, fp) == MARL_REG_fp, ++ "Bad register offset"); ++static_assert(offsetof(marl_fiber_context, ra) == MARL_REG_ra, ++ "Bad register offset"); ++#endif // __cplusplus ++ ++#endif // MARL_BUILD_ASM +diff --git a/src/3rdparty/chromium/third_party/swiftshader/third_party/marl/src/osfiber_la64.c b/src/3rdparty/chromium/third_party/swiftshader/third_party/marl/src/osfiber_la64.c +new file mode 100644 +index 00000000000..4838f9f0964 +--- /dev/null ++++ b/src/3rdparty/chromium/third_party/swiftshader/third_party/marl/src/osfiber_la64.c +@@ -0,0 +1,35 @@ ++// Copyright 2020 The Marl Authors. ++// ++// Licensed under the Apache License, Version 2.0 (the "License"); ++// you may not use this file except in compliance with the License. ++// You may obtain a copy of the License at ++// ++// https://www.apache.org/licenses/LICENSE-2.0 ++// ++// Unless required by applicable law or agreed to in writing, software ++// distributed under the License is distributed on an "AS IS" BASIS, ++// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++// See the License for the specific language governing permissions and ++// limitations under the License. ++ ++#if defined(__loongarch__) ++ ++#include "osfiber_asm_la64.h" ++ ++void marl_fiber_trampoline(void (*target)(void*), void* arg) { ++ target(arg); ++} ++ ++void marl_fiber_set_target(struct marl_fiber_context* ctx, ++ void* stack, ++ uint32_t stack_size, ++ void (*target)(void*), ++ void* arg) { ++ uintptr_t* stack_top = (uintptr_t*)((uint8_t*)(stack) + stack_size); ++ ctx->ra = (uintptr_t)&marl_fiber_trampoline; ++ ctx->a0 = (uintptr_t)target; ++ ctx->a1 = (uintptr_t)arg; ++ ctx->sp = ((uintptr_t)stack_top) & ~(uintptr_t)15; ++} ++ ++#endif // defined(__loongarch__) +diff --git a/src/3rdparty/chromium/third_party/tcmalloc/chromium/src/base/basictypes.h b/src/3rdparty/chromium/third_party/tcmalloc/chromium/src/base/basictypes.h +index 3bf59f4e5f7..8e723831d63 100644 +--- a/src/3rdparty/chromium/third_party/tcmalloc/chromium/src/base/basictypes.h ++++ b/src/3rdparty/chromium/third_party/tcmalloc/chromium/src/base/basictypes.h +@@ -379,6 +379,8 @@ class AssignAttributeStartEnd { + // some ARMs have shorter cache lines (ARM1176JZF-S is 32 bytes for example) but obviously 64-byte aligned implies 32-byte aligned + # elif (defined(__mips__)) + # define CACHELINE_ALIGNED __attribute__((aligned(128))) ++# elif (defined(__loongarch__)) ++# define CACHELINE_ALIGNED __attribute__((aligned(128))) + # elif (defined(__aarch64__)) + # define CACHELINE_ALIGNED __attribute__((aligned(64))) + // implementation specific, Cortex-A53 and 57 should have 64 bytes +diff --git a/src/3rdparty/chromium/third_party/tcmalloc/chromium/src/base/spinlock_linux-inl.h b/src/3rdparty/chromium/third_party/tcmalloc/chromium/src/base/spinlock_linux-inl.h +index ece8477b972..c4a4ab715d8 100644 +--- a/src/3rdparty/chromium/third_party/tcmalloc/chromium/src/base/spinlock_linux-inl.h ++++ b/src/3rdparty/chromium/third_party/tcmalloc/chromium/src/base/spinlock_linux-inl.h +@@ -41,6 +41,7 @@ + #define FUTEX_WAIT 0 + #define FUTEX_WAKE 1 + #define FUTEX_PRIVATE_FLAG 128 ++#define __NR_futex 98 + + // Note: Instead of making direct system calls that are inlined, we rely + // on the syscall() function in glibc to do the right thing. This +diff --git a/src/3rdparty/chromium/third_party/webrtc/modules/desktop_capture/differ_block.cc b/src/3rdparty/chromium/third_party/webrtc/modules/desktop_capture/differ_block.cc +index dd9ab457e0f..ce0d26c953f 100644 +--- a/src/3rdparty/chromium/third_party/webrtc/modules/desktop_capture/differ_block.cc ++++ b/src/3rdparty/chromium/third_party/webrtc/modules/desktop_capture/differ_block.cc +@@ -29,7 +29,7 @@ bool VectorDifference_C(const uint8_t* image1, const uint8_t* image2) { + bool VectorDifference(const uint8_t* image1, const uint8_t* image2) { + static bool (*diff_proc)(const uint8_t*, const uint8_t*) = nullptr; + +- if (!diff_proc) { ++/* if (!diff_proc) { + #if defined(WEBRTC_ARCH_ARM_FAMILY) || defined(WEBRTC_ARCH_MIPS_FAMILY) + // For ARM and MIPS processors, always use C version. + // TODO(hclam): Implement a NEON version. +@@ -47,7 +47,8 @@ bool VectorDifference(const uint8_t* image1, const uint8_t* image2) { + #endif + } + +- return diff_proc(image1, image2); ++ return diff_proc(image1, image2);*/ ++ return false; + } + + bool BlockDifference(const uint8_t* image1, +diff --git a/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_interface.cc b/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_interface.cc +index 1a3df403aec..6bbede54d5e 100644 +--- a/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_interface.cc ++++ b/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_interface.cc +@@ -26,6 +26,9 @@ class LibvpxVp8Facade : public LibvpxInterface { + unsigned int d_w, + unsigned int d_h, + unsigned int align) const override { ++#if defined(__loongarch__) ++ return NULL; ++#endif + return ::vpx_img_alloc(img, fmt, d_w, d_h, align); + } + +@@ -35,20 +38,34 @@ class LibvpxVp8Facade : public LibvpxInterface { + unsigned int d_h, + unsigned int stride_align, + unsigned char* img_data) const override { ++#if defined(__loongarch__) ++ return NULL; ++#endif + return ::vpx_img_wrap(img, fmt, d_w, d_h, stride_align, img_data); + } + +- void img_free(vpx_image_t* img) const override { ::vpx_img_free(img); } ++ void img_free(vpx_image_t* img) const override { ++#if defined(__loongarch__) ++ return ; ++#endif ++ ::vpx_img_free(img); ++ } + + vpx_codec_err_t codec_enc_config_set( + vpx_codec_ctx_t* ctx, + const vpx_codec_enc_cfg_t* cfg) const override { ++#if defined(__loongarch__) ++ return VPX_CODEC_ERROR; ++#endif + return ::vpx_codec_enc_config_set(ctx, cfg); + } + + vpx_codec_err_t codec_enc_config_default(vpx_codec_iface_t* iface, + vpx_codec_enc_cfg_t* cfg, + unsigned int usage) const override { ++#if defined(__loongarch__) ++ return VPX_CODEC_ERROR; ++#endif + return ::vpx_codec_enc_config_default(iface, cfg, usage); + } + +@@ -56,6 +73,9 @@ class LibvpxVp8Facade : public LibvpxInterface { + vpx_codec_iface_t* iface, + const vpx_codec_enc_cfg_t* cfg, + vpx_codec_flags_t flags) const override { ++#if defined(__loongarch__) ++ return VPX_CODEC_ERROR; ++#endif + return ::vpx_codec_enc_init(ctx, iface, cfg, flags); + } + +@@ -65,10 +85,16 @@ class LibvpxVp8Facade : public LibvpxInterface { + int num_enc, + vpx_codec_flags_t flags, + vpx_rational_t* dsf) const override { ++#if defined(__loongarch__) ++ return VPX_CODEC_ERROR; ++#endif + return ::vpx_codec_enc_init_multi(ctx, iface, cfg, num_enc, flags, dsf); + } + + vpx_codec_err_t codec_destroy(vpx_codec_ctx_t* ctx) const override { ++#if defined(__loongarch__) ++ return VPX_CODEC_ERROR; ++#endif + return ::vpx_codec_destroy(ctx); + } + +@@ -78,6 +104,9 @@ class LibvpxVp8Facade : public LibvpxInterface { + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + uint32_t param) const override { ++#if defined(__loongarch__) ++ return VPX_CODEC_ERROR; ++#endif + // We need an explicit call for each type since vpx_codec_control is a + // macro that gets expanded into another call based on the parameter name. + switch (ctrl_id) { +@@ -113,6 +142,9 @@ class LibvpxVp8Facade : public LibvpxInterface { + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + int param) const override { ++#if defined(__loongarch__) ++ return VPX_CODEC_ERROR; ++#endif + switch (ctrl_id) { + case VP8E_SET_FRAME_FLAGS: + return vpx_codec_control(ctx, VP8E_SET_FRAME_FLAGS, param); +@@ -134,6 +166,9 @@ class LibvpxVp8Facade : public LibvpxInterface { + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + int* param) const override { ++#if defined(__loongarch__) ++ return VPX_CODEC_ERROR; ++#endif + switch (ctrl_id) { + case VP8E_GET_LAST_QUANTIZER: + return vpx_codec_control(ctx, VP8E_GET_LAST_QUANTIZER, param); +@@ -148,6 +183,9 @@ class LibvpxVp8Facade : public LibvpxInterface { + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_roi_map* param) const override { ++#if defined(__loongarch__) ++ return VPX_CODEC_ERROR; ++#endif + switch (ctrl_id) { + case VP8E_SET_ROI_MAP: + return vpx_codec_control(ctx, VP8E_SET_ROI_MAP, param); +@@ -160,6 +198,9 @@ class LibvpxVp8Facade : public LibvpxInterface { + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_active_map* param) const override { ++#if defined(__loongarch__) ++ return VPX_CODEC_ERROR; ++#endif + switch (ctrl_id) { + case VP8E_SET_ACTIVEMAP: + return vpx_codec_control(ctx, VP8E_SET_ACTIVEMAP, param); +@@ -172,6 +213,9 @@ class LibvpxVp8Facade : public LibvpxInterface { + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_scaling_mode* param) const override { ++#if defined(__loongarch__) ++ return VPX_CODEC_ERROR; ++#endif + switch (ctrl_id) { + case VP8E_SET_SCALEMODE: + return vpx_codec_control(ctx, VP8E_SET_SCALEMODE, param); +@@ -187,12 +231,18 @@ class LibvpxVp8Facade : public LibvpxInterface { + uint64_t duration, + vpx_enc_frame_flags_t flags, + uint64_t deadline) const override { ++#if defined(__loongarch__) ++ return VPX_CODEC_ERROR; ++#endif + return ::vpx_codec_encode(ctx, img, pts, duration, flags, deadline); + } + + const vpx_codec_cx_pkt_t* codec_get_cx_data( + vpx_codec_ctx_t* ctx, + vpx_codec_iter_t* iter) const override { ++#if defined(__loongarch__) ++ return NULL; ++#endif + return ::vpx_codec_get_cx_data(ctx, iter); + } + }; +diff --git a/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc b/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc +index a3ee2c0c41d..782c5653b07 100644 +--- a/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc ++++ b/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc +@@ -118,6 +118,9 @@ LibvpxVp8Decoder::~LibvpxVp8Decoder() { + } + + int LibvpxVp8Decoder::InitDecode(const VideoCodec* inst, int number_of_cores) { ++#if defined(__loongarch__) ++ return WEBRTC_VIDEO_CODEC_OK; ++#endif + int ret_val = Release(); + if (ret_val < 0) { + return ret_val; +@@ -160,6 +163,9 @@ int LibvpxVp8Decoder::InitDecode(const VideoCodec* inst, int number_of_cores) { + int LibvpxVp8Decoder::Decode(const EncodedImage& input_image, + bool missing_frames, + int64_t /*render_time_ms*/) { ++#if defined(__loongarch__) ++ return WEBRTC_VIDEO_CODEC_UNINITIALIZED; ++#endif + if (!inited_) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } +@@ -342,7 +348,9 @@ int LibvpxVp8Decoder::RegisterDecodeCompleteCallback( + + int LibvpxVp8Decoder::Release() { + int ret_val = WEBRTC_VIDEO_CODEC_OK; +- ++#if defined(__loongarch__) ++ return ret_val; ++#endif + if (decoder_ != NULL) { + if (inited_) { + if (vpx_codec_destroy(decoder_)) { +diff --git a/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc b/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc +index 7694dae6ece..0933cfa2d7c 100644 +--- a/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc ++++ b/src/3rdparty/chromium/third_party/webrtc/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc +@@ -452,6 +452,9 @@ void LibvpxVp8Encoder::SetFecControllerOverride( + // TODO(eladalon): s/inst/codec_settings/g. + int LibvpxVp8Encoder::InitEncode(const VideoCodec* inst, + const VideoEncoder::Settings& settings) { ++#if defined(__loongarch__) ++ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; ++#endif + if (inst == NULL) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } +@@ -793,6 +796,9 @@ int LibvpxVp8Encoder::NumberOfThreads(int width, int height, int cpus) { + } + + int LibvpxVp8Encoder::InitAndSetControlSettings() { ++#if defined(__loongarch__) ++ return WEBRTC_VIDEO_CODEC_UNINITIALIZED; ++#endif + vpx_codec_flags_t flags = 0; + flags |= VPX_CODEC_USE_OUTPUT_PARTITION; + +@@ -943,6 +949,9 @@ int LibvpxVp8Encoder::Encode(const VideoFrame& frame, + const std::vector* frame_types) { + RTC_DCHECK_EQ(frame.width(), codec_.width); + RTC_DCHECK_EQ(frame.height(), codec_.height); ++#if defined(__loongarch__) ++ return WEBRTC_VIDEO_CODEC_UNINITIALIZED; ++#endif + + if (!inited_) + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; +diff --git a/src/3rdparty/chromium/third_party/webrtc/rtc_base/system/arch.h b/src/3rdparty/chromium/third_party/webrtc/rtc_base/system/arch.h +index ed216e660fd..01cb78c186e 100644 +--- a/src/3rdparty/chromium/third_party/webrtc/rtc_base/system/arch.h ++++ b/src/3rdparty/chromium/third_party/webrtc/rtc_base/system/arch.h +@@ -38,6 +38,8 @@ + #define WEBRTC_ARCH_LITTLE_ENDIAN + #elif defined(__MIPSEL__) + #define WEBRTC_ARCH_MIPS_FAMILY ++#elif defined(__loongarch__) ++#define WEBRTC_ARCH_LOONGARCH_FAMILY + #if defined(__LP64__) + #define WEBRTC_ARCH_64_BITS + #else +diff --git a/src/3rdparty/chromium/ui/base/x/BUILD.gn b/src/3rdparty/chromium/ui/base/x/BUILD.gn +index 782f009ddcc..6941d79df85 100644 +--- a/src/3rdparty/chromium/ui/base/x/BUILD.gn ++++ b/src/3rdparty/chromium/ui/base/x/BUILD.gn +@@ -57,6 +57,8 @@ component("x") { + "//build/config/linux:xrandr", + ] + ++ cflags = ["-fpermissive", ] ++ + defines = [ "IS_UI_BASE_X_IMPL" ] + + deps = [ +diff --git a/src/3rdparty/chromium/ui/gl/BUILD.gn b/src/3rdparty/chromium/ui/gl/BUILD.gn +index adc9c08b0be..e3fbeb64b4b 100644 +--- a/src/3rdparty/chromium/ui/gl/BUILD.gn ++++ b/src/3rdparty/chromium/ui/gl/BUILD.gn +@@ -33,6 +33,7 @@ buildflag_header("buildflags") { + + config("gl_config") { + defines = [] ++ cflags = [] + if (use_glx) { + defines += [ + "GL_GLEXT_PROTOTYPES", +@@ -42,6 +43,8 @@ config("gl_config") { + if (use_egl) { + defines += [ "USE_EGL" ] + } ++ ++ cflags += [ "-fpermissive", ] + } + + jumbo_component("gl") { +diff --git a/src/3rdparty/chromium/ui/views/layout/layout_types.h b/src/3rdparty/chromium/ui/views/layout/layout_types.h +index b349e63825f..35142a56277 100644 +--- a/src/3rdparty/chromium/ui/views/layout/layout_types.h ++++ b/src/3rdparty/chromium/ui/views/layout/layout_types.h +@@ -46,10 +46,10 @@ class VIEWS_EXPORT SizeBounds { + ~SizeBounds() = default; + + constexpr const base::Optional& width() const { return width_; } +- void set_width(base::Optional width) { width_ = std::move(width); } ++ void set_width(base::Optional width) { width_ = width; } + + constexpr const base::Optional& height() const { return height_; } +- void set_height(base::Optional height) { height_ = std::move(height); } ++ void set_height(base::Optional height) { height_ = height; } + + constexpr bool is_fully_bounded() const { return width_ && height_; } + +diff --git a/src/3rdparty/chromium/v8/BUILD.gn b/src/3rdparty/chromium/v8/BUILD.gn +index ab20142de9a..f7763f54c15 100644 +--- a/src/3rdparty/chromium/v8/BUILD.gn ++++ b/src/3rdparty/chromium/v8/BUILD.gn +@@ -670,6 +670,16 @@ config("toolchain") { + cflags += [ "-march=z196" ] + } + } ++ ++ # la64 simulators. ++ if (target_is_simulator && v8_current_cpu == "la64") { ++ defines += [ "_LA64_TARGET_SIMULATOR" ] ++ } ++ ++ if (v8_current_cpu == "la64") { ++ defines += [ "V8_TARGET_ARCH_LA64" ] ++ } ++ + if (v8_current_cpu == "ppc" || v8_current_cpu == "ppc64") { + if (v8_current_cpu == "ppc") { + defines += [ "V8_TARGET_ARCH_PPC" ] +@@ -1695,6 +1705,11 @@ v8_source_set("v8_initializers") { + ### gcmole(arch:mips64el) ### + "src/builtins/mips64/builtins-mips64.cc", + ] ++ } else if (v8_current_cpu == "la64") { ++ sources += [ ++ ### gcmole(arch:la64) ### ++ "src/builtins/la64/builtins-la64.cc", ++ ] + } else if (v8_current_cpu == "ppc") { + sources += [ + ### gcmole(arch:ppc) ### +@@ -3362,6 +3377,33 @@ v8_source_set("v8_base_without_compiler") { + "src/regexp/mips64/regexp-macro-assembler-mips64.h", + "src/wasm/baseline/mips64/liftoff-assembler-mips64.h", + ] ++ } else if (v8_current_cpu == "la64") { ++ sources += [ ### gcmole(arch:la64) ### ++ "src/codegen/la64/assembler-la64-inl.h", ++ "src/codegen/la64/assembler-la64.cc", ++ "src/codegen/la64/assembler-la64.h", ++ "src/codegen/la64/constants-la64.cc", ++ "src/codegen/la64/constants-la64.h", ++ "src/codegen/la64/cpu-la64.cc", ++ "src/codegen/la64/interface-descriptors-la64.cc", ++ "src/codegen/la64/macro-assembler-la64.cc", ++ "src/codegen/la64/macro-assembler-la64.h", ++ "src/codegen/la64/register-la64.h", ++ "src/compiler/backend/la64/code-generator-la64.cc", ++ "src/compiler/backend/la64/instruction-codes-la64.h", ++ "src/compiler/backend/la64/instruction-scheduler-la64.cc", ++ "src/compiler/backend/la64/instruction-selector-la64.cc", ++ "src/debug/la64/debug-la64.cc", ++ "src/deoptimizer/la64/deoptimizer-la64.cc", ++ "src/diagnostics/la64/disasm-la64.cc", ++ "src/execution/la64/frame-constants-la64.cc", ++ "src/execution/la64/frame-constants-la64.h", ++ "src/execution/la64/simulator-la64.cc", ++ "src/execution/la64/simulator-la64.h", ++ "src/regexp/la64/regexp-macro-assembler-la64.cc", ++ "src/regexp/la64/regexp-macro-assembler-la64.h", ++ "src/wasm/baseline/la64/liftoff-assembler-la64.h", ++ ] + } else if (v8_current_cpu == "ppc") { + sources += [ ### gcmole(arch:ppc) ### + "src/codegen/ppc/assembler-ppc-inl.h", +diff --git a/src/3rdparty/chromium/v8/gni/snapshot_toolchain.gni b/src/3rdparty/chromium/v8/gni/snapshot_toolchain.gni +index b5fb1823b38..8ada11d790d 100644 +--- a/src/3rdparty/chromium/v8/gni/snapshot_toolchain.gni ++++ b/src/3rdparty/chromium/v8/gni/snapshot_toolchain.gni +@@ -79,7 +79,8 @@ if (v8_snapshot_toolchain == "") { + + if (v8_current_cpu == "x64" || v8_current_cpu == "x86") { + _cpus = v8_current_cpu +- } else if (v8_current_cpu == "arm64" || v8_current_cpu == "mips64el") { ++ } else if (v8_current_cpu == "arm64" || v8_current_cpu == "mips64el" || ++ v8_current_cpu == "la64") { + if (is_win && v8_current_cpu == "arm64") { + # set _cpus to blank for Windows ARM64 so host_toolchain could be + # selected as snapshot toolchain later. +diff --git a/src/3rdparty/chromium/v8/src/base/build_config.h b/src/3rdparty/chromium/v8/src/base/build_config.h +index 8d142c456c9..a9a8a07b12e 100644 +--- a/src/3rdparty/chromium/v8/src/base/build_config.h ++++ b/src/3rdparty/chromium/v8/src/base/build_config.h +@@ -33,6 +33,9 @@ + #elif defined(__MIPSEB__) || defined(__MIPSEL__) + #define V8_HOST_ARCH_MIPS 1 + #define V8_HOST_ARCH_32_BIT 1 ++#elif defined(__loongarch64) ++#define V8_HOST_ARCH_LA64 1 ++#define V8_HOST_ARCH_64_BIT 1 + #elif defined(__PPC64__) || defined(_ARCH_PPC64) + #define V8_HOST_ARCH_PPC64 1 + #define V8_HOST_ARCH_64_BIT 1 +@@ -77,7 +80,8 @@ + // environment as presented by the compiler. + #if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM && \ + !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64 && \ +- !V8_TARGET_ARCH_PPC && !V8_TARGET_ARCH_PPC64 && !V8_TARGET_ARCH_S390 ++ !V8_TARGET_ARCH_PPC && !V8_TARGET_ARCH_PPC64 && !V8_TARGET_ARCH_S390 && \ ++ !V8_TARGET_ARCH_LA64 + #if defined(_M_X64) || defined(__x86_64__) + #define V8_TARGET_ARCH_X64 1 + #elif defined(_M_IX86) || defined(__i386__) +@@ -118,6 +122,8 @@ + #define V8_TARGET_ARCH_32_BIT 1 + #elif V8_TARGET_ARCH_MIPS64 + #define V8_TARGET_ARCH_64_BIT 1 ++#elif V8_TARGET_ARCH_LA64 ++#define V8_TARGET_ARCH_64_BIT 1 + #elif V8_TARGET_ARCH_PPC + #define V8_TARGET_ARCH_32_BIT 1 + #elif V8_TARGET_ARCH_PPC64 +@@ -156,6 +162,9 @@ + #if (V8_TARGET_ARCH_MIPS64 && !(V8_HOST_ARCH_X64 || V8_HOST_ARCH_MIPS64)) + #error Target architecture mips64 is only supported on mips64 and x64 host + #endif ++#if (V8_TARGET_ARCH_LA64 && !(V8_HOST_ARCH_X64 || V8_HOST_ARCH_LA64)) ++#error Target architecture la64 is only supported on la64 and x64 host ++#endif + + // Determine architecture endianness. + #if V8_TARGET_ARCH_IA32 +@@ -166,6 +175,8 @@ + #define V8_TARGET_LITTLE_ENDIAN 1 + #elif V8_TARGET_ARCH_ARM64 + #define V8_TARGET_LITTLE_ENDIAN 1 ++#elif V8_TARGET_ARCH_LA64 ++#define V8_TARGET_LITTLE_ENDIAN 1 + #elif V8_TARGET_ARCH_MIPS + #if defined(__MIPSEB__) + #define V8_TARGET_BIG_ENDIAN 1 +diff --git a/src/3rdparty/chromium/v8/src/base/platform/platform-posix.cc b/src/3rdparty/chromium/v8/src/base/platform/platform-posix.cc +index 1e600c7891b..f0e95f6443b 100644 +--- a/src/3rdparty/chromium/v8/src/base/platform/platform-posix.cc ++++ b/src/3rdparty/chromium/v8/src/base/platform/platform-posix.cc +@@ -297,6 +297,10 @@ void* OS::GetRandomMmapAddr() { + // 42 bits of virtual addressing. Truncate to 40 bits to allow kernel chance + // to fulfill request. + raw_addr &= uint64_t{0xFFFFFF0000}; ++#elif V8_TARGET_ARCH_LA64 ++ // 42 bits of virtual addressing. Truncate to 40 bits to allow kernel chance ++ // to fulfill request. ++ raw_addr &= uint64_t{0xFFFFFF0000}; + #else + raw_addr &= 0x3FFFF000; + +@@ -470,6 +474,8 @@ void OS::DebugBreak() { + asm("break"); + #elif V8_HOST_ARCH_MIPS64 + asm("break"); ++#elif V8_HOST_ARCH_LA64 ++ asm("break 0"); + #elif V8_HOST_ARCH_PPC || V8_HOST_ARCH_PPC64 + asm("twge 2,2"); + #elif V8_HOST_ARCH_IA32 +diff --git a/src/3rdparty/chromium/v8/src/builtins/builtins.cc b/src/3rdparty/chromium/v8/src/builtins/builtins.cc +index 34f7ddc18ad..72f28d08e98 100644 +--- a/src/3rdparty/chromium/v8/src/builtins/builtins.cc ++++ b/src/3rdparty/chromium/v8/src/builtins/builtins.cc +@@ -466,7 +466,7 @@ bool Builtins::CodeObjectIsExecutable(int builtin_index) { + case Builtins::kCEntry_Return1_DontSaveFPRegs_ArgvOnStack_NoBuiltinExit: + return true; + default: +-#if V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 ++#if V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_LA64 + // TODO(Loongson): Move non-JS linkage builtins code objects into RO_SPACE + // caused MIPS platform to crash, and we need some time to handle it. Now + // disable this change temporarily on MIPS platform. +diff --git a/src/3rdparty/chromium/v8/src/builtins/la64/builtins-la64.cc b/src/3rdparty/chromium/v8/src/builtins/la64/builtins-la64.cc +new file mode 100644 +index 00000000000..cdfb9abed10 +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/builtins/la64/builtins-la64.cc +@@ -0,0 +1,3173 @@ ++// Copyright 2012 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#if V8_TARGET_ARCH_LA64 ++ ++#include "src/api/api-arguments.h" ++#include "src/codegen/code-factory.h" ++#include "src/debug/debug.h" ++#include "src/deoptimizer/deoptimizer.h" ++#include "src/execution/frame-constants.h" ++#include "src/execution/frames.h" ++#include "src/logging/counters.h" ++// For interpreter_entry_return_pc_offset. TODO(jkummerow): Drop. ++#include "src/codegen/la64/constants-la64.h" ++#include "src/codegen/macro-assembler-inl.h" ++#include "src/codegen/register-configuration.h" ++#include "src/heap/heap-inl.h" ++#include "src/objects/cell.h" ++#include "src/objects/foreign.h" ++#include "src/objects/heap-number.h" ++#include "src/objects/js-generator.h" ++#include "src/objects/objects-inl.h" ++#include "src/objects/smi.h" ++#include "src/runtime/runtime.h" ++#include "src/wasm/wasm-linkage.h" ++#include "src/wasm/wasm-objects.h" ++ ++namespace v8 { ++namespace internal { ++ ++#define __ ACCESS_MASM(masm) ++ ++void Builtins::Generate_Adaptor(MacroAssembler* masm, Address address) { ++ __ li(kJavaScriptCallExtraArg1Register, ExternalReference::Create(address)); ++ __ Jump(BUILTIN_CODE(masm->isolate(), AdaptorWithBuiltinExitFrame), ++ RelocInfo::CODE_TARGET); ++} ++ ++static void GenerateTailCallToReturnedCode(MacroAssembler* masm, ++ Runtime::FunctionId function_id) { ++ // ----------- S t a t e ------------- ++ // -- a1 : target function (preserved for callee) ++ // -- a3 : new target (preserved for callee) ++ // ----------------------------------- ++ { ++ FrameScope scope(masm, StackFrame::INTERNAL); ++ // Push a copy of the function onto the stack. ++ // Push a copy of the target function and the new target. ++ __ Push(a1, a3, a1); ++ ++ __ CallRuntime(function_id, 1); ++ // Restore target function and new target. ++ __ Pop(a1, a3); ++ } ++ ++ static_assert(kJavaScriptCallCodeStartRegister == a2, "ABI mismatch"); ++ __ Add_d(a2, a0, Operand(Code::kHeaderSize - kHeapObjectTag)); ++ __ Jump(a2); ++} ++ ++namespace { ++ ++enum StackLimitKind { kInterruptStackLimit, kRealStackLimit }; ++ ++void LoadStackLimit(MacroAssembler* masm, Register destination, ++ StackLimitKind kind) { ++ DCHECK(masm->root_array_available()); ++ Isolate* isolate = masm->isolate(); ++ ExternalReference limit = ++ kind == StackLimitKind::kRealStackLimit ++ ? ExternalReference::address_of_real_jslimit(isolate) ++ : ExternalReference::address_of_jslimit(isolate); ++ DCHECK(TurboAssembler::IsAddressableThroughRootRegister(isolate, limit)); ++ ++ intptr_t offset = ++ TurboAssembler::RootRegisterOffsetForExternalReference(isolate, limit); ++ CHECK(is_int32(offset)); ++ __ Ld_d(destination, MemOperand(kRootRegister, static_cast(offset))); ++} ++ ++void Generate_JSBuiltinsConstructStubHelper(MacroAssembler* masm) { ++ // ----------- S t a t e ------------- ++ // -- a0 : number of arguments ++ // -- a1 : constructor function ++ // -- a3 : new target ++ // -- cp : context ++ // -- ra : return address ++ // -- sp[...]: constructor arguments ++ // ----------------------------------- ++ ++ // Enter a construct frame. ++ { ++ FrameScope scope(masm, StackFrame::CONSTRUCT); ++ ++ // Preserve the incoming parameters on the stack. ++ __ SmiTag(a0); ++ __ Push(cp, a0); ++ __ SmiUntag(a0); ++ ++ // The receiver for the builtin/api call. ++ __ PushRoot(RootIndex::kTheHoleValue); ++ ++ // Set up pointer to last argument. ++ __ Add_d(t2, fp, Operand(StandardFrameConstants::kCallerSPOffset)); ++ ++ // Copy arguments and receiver to the expression stack. ++ Label loop, entry; ++ __ mov(t3, a0); ++ // ----------- S t a t e ------------- ++ // -- a0: number of arguments (untagged) ++ // -- a3: new target ++ // -- t2: pointer to last argument ++ // -- t3: counter ++ // -- sp[0*kPointerSize]: the hole (receiver) ++ // -- sp[1*kPointerSize]: number of arguments (tagged) ++ // -- sp[2*kPointerSize]: context ++ // ----------------------------------- ++ __ jmp(&entry); ++ __ bind(&loop); ++ __ Alsl_d(t0, t3, t2, kPointerSizeLog2, t7); ++ __ Ld_d(t1, MemOperand(t0, 0)); ++ __ push(t1); ++ __ bind(&entry); ++ __ Add_d(t3, t3, Operand(-1)); ++ __ Branch(&loop, greater_equal, t3, Operand(zero_reg)); ++ ++ // Call the function. ++ // a0: number of arguments (untagged) ++ // a1: constructor function ++ // a3: new target ++ __ InvokeFunctionWithNewTarget(a1, a3, a0, CALL_FUNCTION); ++ ++ // Restore context from the frame. ++ __ Ld_d(cp, MemOperand(fp, ConstructFrameConstants::kContextOffset)); ++ // Restore smi-tagged arguments count from the frame. ++ __ Ld_d(a1, MemOperand(fp, ConstructFrameConstants::kLengthOffset)); ++ // Leave construct frame. ++ } ++ ++ // Remove caller arguments from the stack and return. ++ __ SmiScale(a4, a1, kPointerSizeLog2); ++ __ Add_d(sp, sp, a4); ++ __ Add_d(sp, sp, kPointerSize); ++ __ Ret(); ++} ++ ++static void Generate_StackOverflowCheck(MacroAssembler* masm, Register num_args, ++ Register scratch1, Register scratch2, ++ Label* stack_overflow) { ++ // Check the stack for overflow. We are not trying to catch ++ // interruptions (e.g. debug break and preemption) here, so the "real stack ++ // limit" is checked. ++ LoadStackLimit(masm, scratch1, StackLimitKind::kRealStackLimit); ++ // Make scratch1 the space we have left. The stack might already be overflowed ++ // here which will cause scratch1 to become negative. ++ __ sub_d(scratch1, sp, scratch1); ++ // Check if the arguments will overflow the stack. ++ __ slli_d(scratch2, num_args, kPointerSizeLog2); ++ // Signed comparison. ++ __ Branch(stack_overflow, le, scratch1, Operand(scratch2)); ++} ++ ++} // namespace ++ ++// The construct stub for ES5 constructor functions and ES6 class constructors. ++void Builtins::Generate_JSConstructStubGeneric(MacroAssembler* masm) { ++ // ----------- S t a t e ------------- ++ // -- a0: number of arguments (untagged) ++ // -- a1: constructor function ++ // -- a3: new target ++ // -- cp: context ++ // -- ra: return address ++ // -- sp[...]: constructor arguments ++ // ----------------------------------- ++ ++ // Enter a construct frame. ++ { ++ FrameScope scope(masm, StackFrame::CONSTRUCT); ++ Label post_instantiation_deopt_entry, not_create_implicit_receiver; ++ ++ // Preserve the incoming parameters on the stack. ++ __ SmiTag(a0); ++ __ Push(cp, a0, a1); ++ __ PushRoot(RootIndex::kTheHoleValue); ++ __ Push(a3); ++ ++ // ----------- S t a t e ------------- ++ // -- sp[0*kPointerSize]: new target ++ // -- sp[1*kPointerSize]: padding ++ // -- a1 and sp[2*kPointerSize]: constructor function ++ // -- sp[3*kPointerSize]: number of arguments (tagged) ++ // -- sp[4*kPointerSize]: context ++ // ----------------------------------- ++ ++ __ Ld_d(t2, FieldMemOperand(a1, JSFunction::kSharedFunctionInfoOffset)); ++ __ Ld_wu(t2, FieldMemOperand(t2, SharedFunctionInfo::kFlagsOffset)); ++ __ DecodeField(t2); ++ __ JumpIfIsInRange(t2, kDefaultDerivedConstructor, kDerivedConstructor, ++ ¬_create_implicit_receiver); ++ ++ // If not derived class constructor: Allocate the new receiver object. ++ __ IncrementCounter(masm->isolate()->counters()->constructed_objects(), 1, ++ t2, t3); ++ __ Call(BUILTIN_CODE(masm->isolate(), FastNewObject), ++ RelocInfo::CODE_TARGET); ++ __ Branch(&post_instantiation_deopt_entry); ++ ++ // Else: use TheHoleValue as receiver for constructor call ++ __ bind(¬_create_implicit_receiver); ++ __ LoadRoot(a0, RootIndex::kTheHoleValue); ++ ++ // ----------- S t a t e ------------- ++ // -- a0: receiver ++ // -- Slot 4 / sp[0*kPointerSize]: new target ++ // -- Slot 3 / sp[1*kPointerSize]: padding ++ // -- Slot 2 / sp[2*kPointerSize]: constructor function ++ // -- Slot 1 / sp[3*kPointerSize]: number of arguments (tagged) ++ // -- Slot 0 / sp[4*kPointerSize]: context ++ // ----------------------------------- ++ // Deoptimizer enters here. ++ masm->isolate()->heap()->SetConstructStubCreateDeoptPCOffset( ++ masm->pc_offset()); ++ __ bind(&post_instantiation_deopt_entry); ++ ++ // Restore new target. ++ __ Pop(a3); ++ // Push the allocated receiver to the stack. We need two copies ++ // because we may have to return the original one and the calling ++ // conventions dictate that the called function pops the receiver. ++ __ Push(a0, a0); ++ ++ // ----------- S t a t e ------------- ++ // -- r3: new target ++ // -- sp[0*kPointerSize]: implicit receiver ++ // -- sp[1*kPointerSize]: implicit receiver ++ // -- sp[2*kPointerSize]: padding ++ // -- sp[3*kPointerSize]: constructor function ++ // -- sp[4*kPointerSize]: number of arguments (tagged) ++ // -- sp[5*kPointerSize]: context ++ // ----------------------------------- ++ ++ // Restore constructor function and argument count. ++ __ Ld_d(a1, MemOperand(fp, ConstructFrameConstants::kConstructorOffset)); ++ __ Ld_d(a0, MemOperand(fp, ConstructFrameConstants::kLengthOffset)); ++ __ SmiUntag(a0); ++ ++ // Set up pointer to last argument. ++ __ Add_d(t2, fp, Operand(StandardFrameConstants::kCallerSPOffset)); ++ ++ Label enough_stack_space, stack_overflow; ++ Generate_StackOverflowCheck(masm, a0, t0, t1, &stack_overflow); ++ __ Branch(&enough_stack_space); ++ ++ __ bind(&stack_overflow); ++ // Restore the context from the frame. ++ __ Ld_d(cp, MemOperand(fp, ConstructFrameConstants::kContextOffset)); ++ __ CallRuntime(Runtime::kThrowStackOverflow); ++ // Unreachable code. ++ __ break_(0xCC); ++ ++ __ bind(&enough_stack_space); ++ ++ // Copy arguments and receiver to the expression stack. ++ Label loop, entry; ++ __ mov(t3, a0); ++ // ----------- S t a t e ------------- ++ // -- a0: number of arguments (untagged) ++ // -- a3: new target ++ // -- t2: pointer to last argument ++ // -- t3: counter ++ // -- sp[0*kPointerSize]: implicit receiver ++ // -- sp[1*kPointerSize]: implicit receiver ++ // -- sp[2*kPointerSize]: padding ++ // -- a1 and sp[3*kPointerSize]: constructor function ++ // -- sp[4*kPointerSize]: number of arguments (tagged) ++ // -- sp[5*kPointerSize]: context ++ // ----------------------------------- ++ __ jmp(&entry); ++ __ bind(&loop); ++ __ Alsl_d(t0, t3, t2, kPointerSizeLog2, t7); ++ __ Ld_d(t1, MemOperand(t0, 0)); ++ __ push(t1); ++ __ bind(&entry); ++ __ Add_d(t3, t3, Operand(-1)); ++ __ Branch(&loop, greater_equal, t3, Operand(zero_reg)); ++ ++ // Call the function. ++ __ InvokeFunctionWithNewTarget(a1, a3, a0, CALL_FUNCTION); ++ ++ // ----------- S t a t e ------------- ++ // -- t5: constructor result ++ // -- sp[0*kPointerSize]: implicit receiver ++ // -- sp[1*kPointerSize]: padding ++ // -- sp[2*kPointerSize]: constructor function ++ // -- sp[3*kPointerSize]: number of arguments ++ // -- sp[4*kPointerSize]: context ++ // ----------------------------------- ++ ++ // Store offset of return address for deoptimizer. ++ masm->isolate()->heap()->SetConstructStubInvokeDeoptPCOffset( ++ masm->pc_offset()); ++ ++ // Restore the context from the frame. ++ __ Ld_d(cp, MemOperand(fp, ConstructFrameConstants::kContextOffset)); ++ ++ // If the result is an object (in the ECMA sense), we should get rid ++ // of the receiver and use the result; see ECMA-262 section 13.2.2-7 ++ // on page 74. ++ Label use_receiver, do_throw, leave_frame; ++ ++ // If the result is undefined, we jump out to using the implicit receiver. ++ __ JumpIfRoot(a0, RootIndex::kUndefinedValue, &use_receiver); ++ ++ // Otherwise we do a smi check and fall through to check if the return value ++ // is a valid receiver. ++ ++ // If the result is a smi, it is *not* an object in the ECMA sense. ++ __ JumpIfSmi(a0, &use_receiver); ++ ++ // If the type of the result (stored in its map) is less than ++ // FIRST_JS_RECEIVER_TYPE, it is not an object in the ECMA sense. ++ __ GetObjectType(a0, t2, t2); ++ STATIC_ASSERT(LAST_JS_RECEIVER_TYPE == LAST_TYPE); ++ __ Branch(&leave_frame, greater_equal, t2, Operand(FIRST_JS_RECEIVER_TYPE)); ++ __ Branch(&use_receiver); ++ ++ __ bind(&do_throw); ++ __ CallRuntime(Runtime::kThrowConstructorReturnedNonObject); ++ ++ // Throw away the result of the constructor invocation and use the ++ // on-stack receiver as the result. ++ __ bind(&use_receiver); ++ __ Ld_d(a0, MemOperand(sp, 0 * kPointerSize)); ++ __ JumpIfRoot(a0, RootIndex::kTheHoleValue, &do_throw); ++ ++ __ bind(&leave_frame); ++ // Restore smi-tagged arguments count from the frame. ++ __ Ld_d(a1, MemOperand(fp, ConstructFrameConstants::kLengthOffset)); ++ // Leave construct frame. ++ } ++ // Remove caller arguments from the stack and return. ++ __ SmiScale(a4, a1, kPointerSizeLog2); ++ __ Add_d(sp, sp, a4); ++ __ Add_d(sp, sp, kPointerSize); ++ __ Ret(); ++} ++ ++void Builtins::Generate_JSBuiltinsConstructStub(MacroAssembler* masm) { ++ Generate_JSBuiltinsConstructStubHelper(masm); ++} ++ ++static void GetSharedFunctionInfoBytecode(MacroAssembler* masm, ++ Register sfi_data, ++ Register scratch1) { ++ Label done; ++ ++ __ GetObjectType(sfi_data, scratch1, scratch1); ++ __ Branch(&done, ne, scratch1, Operand(INTERPRETER_DATA_TYPE)); ++ __ Ld_d(sfi_data, ++ FieldMemOperand(sfi_data, InterpreterData::kBytecodeArrayOffset)); ++ ++ __ bind(&done); ++} ++ ++// static ++void Builtins::Generate_ResumeGeneratorTrampoline(MacroAssembler* masm) { ++ // ----------- S t a t e ------------- ++ // -- a0 : the value to pass to the generator ++ // -- a1 : the JSGeneratorObject to resume ++ // -- ra : return address ++ // ----------------------------------- ++ __ AssertGeneratorObject(a1); ++ ++ // Store input value into generator object. ++ __ St_d(a0, FieldMemOperand(a1, JSGeneratorObject::kInputOrDebugPosOffset)); ++ __ RecordWriteField(a1, JSGeneratorObject::kInputOrDebugPosOffset, a0, a3, ++ kRAHasNotBeenSaved, kDontSaveFPRegs); ++ ++ // Load suspended function and context. ++ __ Ld_d(a4, FieldMemOperand(a1, JSGeneratorObject::kFunctionOffset)); ++ __ Ld_d(cp, FieldMemOperand(a4, JSFunction::kContextOffset)); ++ ++ // Flood function if we are stepping. ++ Label prepare_step_in_if_stepping, prepare_step_in_suspended_generator; ++ Label stepping_prepared; ++ ExternalReference debug_hook = ++ ExternalReference::debug_hook_on_function_call_address(masm->isolate()); ++ __ li(a5, debug_hook); ++ __ Ld_b(a5, MemOperand(a5, 0)); ++ __ Branch(&prepare_step_in_if_stepping, ne, a5, Operand(zero_reg)); ++ ++ // Flood function if we need to continue stepping in the suspended generator. ++ ExternalReference debug_suspended_generator = ++ ExternalReference::debug_suspended_generator_address(masm->isolate()); ++ __ li(a5, debug_suspended_generator); ++ __ Ld_d(a5, MemOperand(a5, 0)); ++ __ Branch(&prepare_step_in_suspended_generator, eq, a1, Operand(a5)); ++ __ bind(&stepping_prepared); ++ ++ // Check the stack for overflow. We are not trying to catch interruptions ++ // (i.e. debug break and preemption) here, so check the "real stack limit". ++ Label stack_overflow; ++ LoadStackLimit(masm, kScratchReg, StackLimitKind::kRealStackLimit); ++ __ Branch(&stack_overflow, lo, sp, Operand(kScratchReg)); ++ ++ // Push receiver. ++ __ Ld_d(a5, FieldMemOperand(a1, JSGeneratorObject::kReceiverOffset)); ++ __ Push(a5); ++ ++ // ----------- S t a t e ------------- ++ // -- a1 : the JSGeneratorObject to resume ++ // -- a4 : generator function ++ // -- cp : generator context ++ // -- ra : return address ++ // -- sp[0] : generator receiver ++ // ----------------------------------- ++ ++ // Push holes for arguments to generator function. Since the parser forced ++ // context allocation for any variables in generators, the actual argument ++ // values have already been copied into the context and these dummy values ++ // will never be used. ++ __ Ld_d(a3, FieldMemOperand(a4, JSFunction::kSharedFunctionInfoOffset)); ++ __ Ld_hu( ++ a3, FieldMemOperand(a3, SharedFunctionInfo::kFormalParameterCountOffset)); ++ __ Ld_d(t1, FieldMemOperand( ++ a1, JSGeneratorObject::kParametersAndRegistersOffset)); ++ { ++ Label done_loop, loop; ++ __ Move(t2, zero_reg); ++ __ bind(&loop); ++ __ Sub_d(a3, a3, Operand(1)); ++ __ Branch(&done_loop, lt, a3, Operand(zero_reg)); ++ __ Alsl_d(kScratchReg, t2, t1, kPointerSizeLog2, t7); ++ __ Ld_d(kScratchReg, FieldMemOperand(kScratchReg, FixedArray::kHeaderSize)); ++ __ Push(kScratchReg); ++ __ Add_d(t2, t2, Operand(1)); ++ __ Branch(&loop); ++ __ bind(&done_loop); ++ } ++ ++ // Underlying function needs to have bytecode available. ++ if (FLAG_debug_code) { ++ __ Ld_d(a3, FieldMemOperand(a4, JSFunction::kSharedFunctionInfoOffset)); ++ __ Ld_d(a3, FieldMemOperand(a3, SharedFunctionInfo::kFunctionDataOffset)); ++ GetSharedFunctionInfoBytecode(masm, a3, t5); ++ __ GetObjectType(a3, a3, a3); ++ __ Assert(eq, AbortReason::kMissingBytecodeArray, a3, ++ Operand(BYTECODE_ARRAY_TYPE)); ++ } ++ ++ // Resume (Ignition/TurboFan) generator object. ++ { ++ __ Ld_d(a0, FieldMemOperand(a4, JSFunction::kSharedFunctionInfoOffset)); ++ __ Ld_hu(a0, FieldMemOperand( ++ a0, SharedFunctionInfo::kFormalParameterCountOffset)); ++ // We abuse new.target both to indicate that this is a resume call and to ++ // pass in the generator object. In ordinary calls, new.target is always ++ // undefined because generator functions are non-constructable. ++ __ Move(a3, a1); ++ __ Move(a1, a4); ++ static_assert(kJavaScriptCallCodeStartRegister == a2, "ABI mismatch"); ++ __ Ld_d(a2, FieldMemOperand(a1, JSFunction::kCodeOffset)); ++ __ Add_d(a2, a2, Operand(Code::kHeaderSize - kHeapObjectTag)); ++ __ Jump(a2); ++ } ++ ++ __ bind(&prepare_step_in_if_stepping); ++ { ++ FrameScope scope(masm, StackFrame::INTERNAL); ++ __ Push(a1, a4); ++ // Push hole as receiver since we do not use it for stepping. ++ __ PushRoot(RootIndex::kTheHoleValue); ++ __ CallRuntime(Runtime::kDebugOnFunctionCall); ++ __ Pop(a1); ++ } ++ __ Ld_d(a4, FieldMemOperand(a1, JSGeneratorObject::kFunctionOffset)); ++ __ Branch(&stepping_prepared); ++ ++ __ bind(&prepare_step_in_suspended_generator); ++ { ++ FrameScope scope(masm, StackFrame::INTERNAL); ++ __ Push(a1); ++ __ CallRuntime(Runtime::kDebugPrepareStepInSuspendedGenerator); ++ __ Pop(a1); ++ } ++ __ Ld_d(a4, FieldMemOperand(a1, JSGeneratorObject::kFunctionOffset)); ++ __ Branch(&stepping_prepared); ++ ++ __ bind(&stack_overflow); ++ { ++ FrameScope scope(masm, StackFrame::INTERNAL); ++ __ CallRuntime(Runtime::kThrowStackOverflow); ++ __ break_(0xCC); // This should be unreachable. ++ } ++} ++ ++void Builtins::Generate_ConstructedNonConstructable(MacroAssembler* masm) { ++ FrameScope scope(masm, StackFrame::INTERNAL); ++ __ Push(a1); ++ __ CallRuntime(Runtime::kThrowConstructedNonConstructable); ++} ++ ++// Clobbers scratch1 and scratch2; preserves all other registers. ++static void Generate_CheckStackOverflow(MacroAssembler* masm, Register argc, ++ Register scratch1, Register scratch2) { ++ // Check the stack for overflow. We are not trying to catch ++ // interruptions (e.g. debug break and preemption) here, so the "real stack ++ // limit" is checked. ++ Label okay; ++ LoadStackLimit(masm, scratch1, StackLimitKind::kRealStackLimit); ++ // Make a2 the space we have left. The stack might already be overflowed ++ // here which will cause r2 to become negative. ++ __ sub_d(scratch1, sp, scratch1); ++ // Check if the arguments will overflow the stack. ++ __ slli_d(scratch2, argc, kPointerSizeLog2); ++ __ Branch(&okay, gt, scratch1, Operand(scratch2)); // Signed comparison. ++ ++ // Out of stack space. ++ __ CallRuntime(Runtime::kThrowStackOverflow); ++ ++ __ bind(&okay); ++} ++ ++namespace { ++ ++// Called with the native C calling convention. The corresponding function ++// signature is either: ++// ++// using JSEntryFunction = GeneratedCode; ++// or ++// using JSEntryFunction = GeneratedCode; ++void Generate_JSEntryVariant(MacroAssembler* masm, StackFrame::Type type, ++ Builtins::Name entry_trampoline) { ++ Label invoke, handler_entry, exit; ++ ++ { ++ NoRootArrayScope no_root_array(masm); ++ ++ // TODO(plind): unify the ABI description here. ++ // Registers: ++ // either ++ // a0: root register value ++ // a1: entry address ++ // a2: function ++ // a3: receiver ++ // a4: argc ++ // a5: argv ++ // or ++ // a0: root register value ++ // a1: microtask_queue ++ // ++ // Stack: ++ // 0 arg slots on mips64 (4 args slots on mips) ++ ++ // Save callee saved registers on the stack. ++ __ MultiPush(kCalleeSaved | ra.bit()); ++ ++ // Save callee-saved FPU registers. ++ __ MultiPushFPU(kCalleeSavedFPU); ++ // Set up the reserved register for 0.0. ++ __ Move(kDoubleRegZero, 0.0); ++ ++ // Initialize the root register. ++ // C calling convention. The first argument is passed in a0. ++ __ mov(kRootRegister, a0); ++ } ++ ++ // a1: entry address ++ // a2: function ++ // a3: receiver ++ // a4: argc ++ // a5: argv ++ ++ // We build an EntryFrame. ++ __ li(s1, Operand(-1)); // Push a bad frame pointer to fail if it is used. ++ __ li(s2, Operand(StackFrame::TypeToMarker(type))); ++ __ li(s3, Operand(StackFrame::TypeToMarker(type))); ++ ExternalReference c_entry_fp = ExternalReference::Create( ++ IsolateAddressId::kCEntryFPAddress, masm->isolate()); ++ __ li(s4, c_entry_fp); ++ __ Ld_d(s4, MemOperand(s4, 0)); ++ __ Push(s1, s2, s3, s4); ++ // Set up frame pointer for the frame to be pushed. ++ __ addi_d(fp, sp, -EntryFrameConstants::kCallerFPOffset); ++ ++ // Registers: ++ // either ++ // a1: entry address ++ // a2: function ++ // a3: receiver ++ // a4: argc ++ // a5: argv ++ // or ++ // a1: microtask_queue ++ // ++ // Stack: ++ // caller fp | ++ // function slot | entry frame ++ // context slot | ++ // bad fp (0xFF...F) | ++ // callee saved registers + ra ++ // [ O32: 4 args slots] ++ // args ++ ++ // If this is the outermost JS call, set js_entry_sp value. ++ Label non_outermost_js; ++ ExternalReference js_entry_sp = ExternalReference::Create( ++ IsolateAddressId::kJSEntrySPAddress, masm->isolate()); ++ __ li(s1, js_entry_sp); ++ __ Ld_d(s2, MemOperand(s1, 0)); ++ __ Branch(&non_outermost_js, ne, s2, Operand(zero_reg)); ++ __ St_d(fp, MemOperand(s1, 0)); ++ __ li(s3, Operand(StackFrame::OUTERMOST_JSENTRY_FRAME)); ++ Label cont; ++ __ b(&cont); ++ __ nop(); // Branch delay slot nop. ++ __ bind(&non_outermost_js); ++ __ li(s3, Operand(StackFrame::INNER_JSENTRY_FRAME)); ++ __ bind(&cont); ++ __ push(s3); ++ ++ // Jump to a faked try block that does the invoke, with a faked catch ++ // block that sets the pending exception. ++ __ jmp(&invoke); ++ __ bind(&handler_entry); ++ ++ // Store the current pc as the handler offset. It's used later to create the ++ // handler table. ++ masm->isolate()->builtins()->SetJSEntryHandlerOffset(handler_entry.pos()); ++ ++ // Caught exception: Store result (exception) in the pending exception ++ // field in the JSEnv and return a failure sentinel. Coming in here the ++ // fp will be invalid because the PushStackHandler below sets it to 0 to ++ // signal the existence of the JSEntry frame. ++ __ li(s1, ExternalReference::Create( ++ IsolateAddressId::kPendingExceptionAddress, masm->isolate())); ++ __ St_d(a0, ++ MemOperand(s1, 0)); // We come back from 'invoke'. result is in a0. ++ __ LoadRoot(a0, RootIndex::kException); ++ __ b(&exit); // b exposes branch delay slot. ++ __ nop(); // Branch delay slot nop. ++ ++ // Invoke: Link this frame into the handler chain. ++ __ bind(&invoke); ++ __ PushStackHandler(); ++ // If an exception not caught by another handler occurs, this handler ++ // returns control to the code after the bal(&invoke) above, which ++ // restores all kCalleeSaved registers (including cp and fp) to their ++ // saved values before returning a failure to C. ++ // ++ // Registers: ++ // either ++ // a0: root register value ++ // a1: entry address ++ // a2: function ++ // a3: receiver ++ // a4: argc ++ // a5: argv ++ // or ++ // a0: root register value ++ // a1: microtask_queue ++ // ++ // Stack: ++ // handler frame ++ // entry frame ++ // callee saved registers + ra ++ // [ O32: 4 args slots] ++ // args ++ // ++ // Invoke the function by calling through JS entry trampoline builtin and ++ // pop the faked function when we return. ++ ++ Handle trampoline_code = ++ masm->isolate()->builtins()->builtin_handle(entry_trampoline); ++ __ Call(trampoline_code, RelocInfo::CODE_TARGET); ++ ++ // Unlink this frame from the handler chain. ++ __ PopStackHandler(); ++ ++ __ bind(&exit); // a0 holds result ++ // Check if the current stack frame is marked as the outermost JS frame. ++ Label non_outermost_js_2; ++ __ pop(a5); ++ __ Branch(&non_outermost_js_2, ne, a5, ++ Operand(StackFrame::OUTERMOST_JSENTRY_FRAME)); ++ __ li(a5, js_entry_sp); ++ __ St_d(zero_reg, MemOperand(a5, 0)); ++ __ bind(&non_outermost_js_2); ++ ++ // Restore the top frame descriptors from the stack. ++ __ pop(a5); ++ __ li(a4, ExternalReference::Create(IsolateAddressId::kCEntryFPAddress, ++ masm->isolate())); ++ __ St_d(a5, MemOperand(a4, 0)); ++ ++ // Reset the stack to the callee saved registers. ++ __ addi_d(sp, sp, -EntryFrameConstants::kCallerFPOffset); ++ ++ // Restore callee-saved fpu registers. ++ __ MultiPopFPU(kCalleeSavedFPU); ++ ++ // Restore callee saved registers from the stack. ++ __ MultiPop(kCalleeSaved | ra.bit()); ++ // Return. ++ __ Jump(ra); ++} ++ ++} // namespace ++ ++void Builtins::Generate_JSEntry(MacroAssembler* masm) { ++ Generate_JSEntryVariant(masm, StackFrame::ENTRY, ++ Builtins::kJSEntryTrampoline); ++} ++ ++void Builtins::Generate_JSConstructEntry(MacroAssembler* masm) { ++ Generate_JSEntryVariant(masm, StackFrame::CONSTRUCT_ENTRY, ++ Builtins::kJSConstructEntryTrampoline); ++} ++ ++void Builtins::Generate_JSRunMicrotasksEntry(MacroAssembler* masm) { ++ Generate_JSEntryVariant(masm, StackFrame::ENTRY, ++ Builtins::kRunMicrotasksTrampoline); ++} ++ ++static void Generate_JSEntryTrampolineHelper(MacroAssembler* masm, ++ bool is_construct) { ++ // ----------- S t a t e ------------- ++ // -- a1: new.target ++ // -- a2: function ++ // -- a3: receiver_pointer ++ // -- a4: argc ++ // -- a5: argv ++ // ----------------------------------- ++ ++ // Enter an internal frame. ++ { ++ FrameScope scope(masm, StackFrame::INTERNAL); ++ ++ // Setup the context (we need to use the caller context from the isolate). ++ ExternalReference context_address = ExternalReference::Create( ++ IsolateAddressId::kContextAddress, masm->isolate()); ++ __ li(cp, context_address); ++ __ Ld_d(cp, MemOperand(cp, 0)); ++ ++ // Push the function and the receiver onto the stack. ++ __ Push(a2, a3); ++ ++ // Check if we have enough stack space to push all arguments. ++ // Clobbers a0 and a3. ++ Generate_CheckStackOverflow(masm, a4, t5, a3); ++ ++ // Setup new.target, function and argc. ++ __ mov(a3, a1); ++ __ mov(a1, a2); ++ __ mov(a0, a4); ++ ++ // a0: argc ++ // a1: function ++ // a3: new.target ++ // a5: argv ++ ++ // Copy arguments to the stack in a loop. ++ // a3: argc ++ // a5: argv, i.e. points to first arg ++ Label loop, entry; ++ __ Alsl_d(s1, a4, a5, kPointerSizeLog2, t7); ++ __ b(&entry); ++ __ nop(); // Branch delay slot nop. ++ // s1 points past last arg. ++ __ bind(&loop); ++ __ Ld_d(s2, MemOperand(a5, 0)); // Read next parameter. ++ __ addi_d(a5, a5, kPointerSize); ++ __ Ld_d(s2, MemOperand(s2, 0)); // Dereference handle. ++ __ push(s2); // Push parameter. ++ __ bind(&entry); ++ __ Branch(&loop, ne, a5, Operand(s1)); ++ ++ // a0: argc ++ // a1: function ++ // a3: new.target ++ ++ // Initialize all JavaScript callee-saved registers, since they will be seen ++ // by the garbage collector as part of handlers. ++ __ LoadRoot(a4, RootIndex::kUndefinedValue); ++ __ mov(a5, a4); ++ __ mov(s1, a4); ++ __ mov(s2, a4); ++ __ mov(s3, a4); ++ __ mov(s4, a4); ++ __ mov(s5, a4); ++ // s6 holds the root address. Do not clobber. ++ // s7 is cp. Do not init. ++ ++ // Invoke the code. ++ Handle builtin = is_construct ++ ? BUILTIN_CODE(masm->isolate(), Construct) ++ : masm->isolate()->builtins()->Call(); ++ __ Call(builtin, RelocInfo::CODE_TARGET); ++ ++ // Leave internal frame. ++ } ++ __ Jump(ra); ++} ++ ++void Builtins::Generate_JSEntryTrampoline(MacroAssembler* masm) { ++ Generate_JSEntryTrampolineHelper(masm, false); ++} ++ ++void Builtins::Generate_JSConstructEntryTrampoline(MacroAssembler* masm) { ++ Generate_JSEntryTrampolineHelper(masm, true); ++} ++ ++void Builtins::Generate_RunMicrotasksTrampoline(MacroAssembler* masm) { ++ // a1: microtask_queue ++ __ mov(RunMicrotasksDescriptor::MicrotaskQueueRegister(), a1); ++ __ Jump(BUILTIN_CODE(masm->isolate(), RunMicrotasks), RelocInfo::CODE_TARGET); ++} ++ ++static void ReplaceClosureCodeWithOptimizedCode(MacroAssembler* masm, ++ Register optimized_code, ++ Register closure, ++ Register scratch1, ++ Register scratch2) { ++ // Store code entry in the closure. ++ __ St_d(optimized_code, FieldMemOperand(closure, JSFunction::kCodeOffset)); ++ __ mov(scratch1, optimized_code); // Write barrier clobbers scratch1 below. ++ __ RecordWriteField(closure, JSFunction::kCodeOffset, scratch1, scratch2, ++ kRAHasNotBeenSaved, kDontSaveFPRegs, OMIT_REMEMBERED_SET, ++ OMIT_SMI_CHECK); ++} ++ ++static void LeaveInterpreterFrame(MacroAssembler* masm, Register scratch) { ++ Register args_count = scratch; ++ ++ // Get the arguments + receiver count. ++ __ Ld_d(args_count, ++ MemOperand(fp, InterpreterFrameConstants::kBytecodeArrayFromFp)); ++ __ Ld_w(t0, FieldMemOperand(args_count, BytecodeArray::kParameterSizeOffset)); ++ ++ // Leave the frame (also dropping the register file). ++ __ LeaveFrame(StackFrame::INTERPRETED); ++ ++ // Drop receiver + arguments. ++ __ Add_d(sp, sp, args_count); ++} ++ ++// Tail-call |function_id| if |smi_entry| == |marker| ++static void TailCallRuntimeIfMarkerEquals(MacroAssembler* masm, ++ Register smi_entry, ++ OptimizationMarker marker, ++ Runtime::FunctionId function_id) { ++ Label no_match; ++ __ Branch(&no_match, ne, smi_entry, Operand(Smi::FromEnum(marker))); ++ GenerateTailCallToReturnedCode(masm, function_id); ++ __ bind(&no_match); ++} ++ ++static void TailCallOptimizedCodeSlot(MacroAssembler* masm, ++ Register optimized_code_entry, ++ Register scratch1, Register scratch2) { ++ // ----------- S t a t e ------------- ++ // -- a3 : new target (preserved for callee if needed, and caller) ++ // -- a1 : target function (preserved for callee if needed, and caller) ++ // ----------------------------------- ++ DCHECK(!AreAliased(optimized_code_entry, a1, a3, scratch1, scratch2)); ++ ++ Register closure = a1; ++ ++ // Check if the optimized code is marked for deopt. If it is, call the ++ // runtime to clear it. ++ Label found_deoptimized_code; ++ __ Ld_d(a5, FieldMemOperand(optimized_code_entry, ++ Code::kCodeDataContainerOffset)); ++ __ Ld_w(a5, FieldMemOperand(a5, CodeDataContainer::kKindSpecificFlagsOffset)); ++ __ And(a5, a5, Operand(1 << Code::kMarkedForDeoptimizationBit)); ++ __ Branch(&found_deoptimized_code, ne, a5, Operand(zero_reg)); ++ ++ // Optimized code is good, get it into the closure and link the closure into ++ // the optimized functions list, then tail call the optimized code. ++ // The feedback vector is no longer used, so re-use it as a scratch ++ // register. ++ ReplaceClosureCodeWithOptimizedCode(masm, optimized_code_entry, closure, ++ scratch1, scratch2); ++ ++ static_assert(kJavaScriptCallCodeStartRegister == a2, "ABI mismatch"); ++ __ Add_d(a2, optimized_code_entry, ++ Operand(Code::kHeaderSize - kHeapObjectTag)); ++ __ Jump(a2); ++ ++ // Optimized code slot contains deoptimized code, evict it and re-enter the ++ // closure's code. ++ __ bind(&found_deoptimized_code); ++ GenerateTailCallToReturnedCode(masm, Runtime::kEvictOptimizedCodeSlot); ++} ++ ++static void MaybeOptimizeCode(MacroAssembler* masm, Register feedback_vector, ++ Register optimization_marker) { ++ // ----------- S t a t e ------------- ++ // -- a3 : new target (preserved for callee if needed, and caller) ++ // -- a1 : target function (preserved for callee if needed, and caller) ++ // -- feedback vector (preserved for caller if needed) ++ // -- optimization_marker : a Smi containing a non-zero optimization marker. ++ // ----------------------------------- ++ DCHECK(!AreAliased(feedback_vector, a1, a3, optimization_marker)); ++ ++ // TODO(v8:8394): The logging of first execution will break if ++ // feedback vectors are not allocated. We need to find a different way of ++ // logging these events if required. ++ TailCallRuntimeIfMarkerEquals(masm, optimization_marker, ++ OptimizationMarker::kLogFirstExecution, ++ Runtime::kFunctionFirstExecution); ++ TailCallRuntimeIfMarkerEquals(masm, optimization_marker, ++ OptimizationMarker::kCompileOptimized, ++ Runtime::kCompileOptimized_NotConcurrent); ++ TailCallRuntimeIfMarkerEquals(masm, optimization_marker, ++ OptimizationMarker::kCompileOptimizedConcurrent, ++ Runtime::kCompileOptimized_Concurrent); ++ ++ // Otherwise, the marker is InOptimizationQueue, so fall through hoping ++ // that an interrupt will eventually update the slot with optimized code. ++ if (FLAG_debug_code) { ++ __ Assert(eq, AbortReason::kExpectedOptimizationSentinel, ++ optimization_marker, ++ Operand(Smi::FromEnum(OptimizationMarker::kInOptimizationQueue))); ++ } ++} ++ ++// Advance the current bytecode offset. This simulates what all bytecode ++// handlers do upon completion of the underlying operation. Will bail out to a ++// label if the bytecode (without prefix) is a return bytecode. ++static void AdvanceBytecodeOffsetOrReturn(MacroAssembler* masm, ++ Register bytecode_array, ++ Register bytecode_offset, ++ Register bytecode, Register scratch1, ++ Register scratch2, Label* if_return) { ++ Register bytecode_size_table = scratch1; ++ DCHECK(!AreAliased(bytecode_array, bytecode_offset, bytecode_size_table, ++ bytecode)); ++ ++ __ li(bytecode_size_table, ExternalReference::bytecode_size_table_address()); ++ ++ // Check if the bytecode is a Wide or ExtraWide prefix bytecode. ++ Label process_bytecode, extra_wide; ++ STATIC_ASSERT(0 == static_cast(interpreter::Bytecode::kWide)); ++ STATIC_ASSERT(1 == static_cast(interpreter::Bytecode::kExtraWide)); ++ STATIC_ASSERT(2 == static_cast(interpreter::Bytecode::kDebugBreakWide)); ++ STATIC_ASSERT(3 == ++ static_cast(interpreter::Bytecode::kDebugBreakExtraWide)); ++ __ Branch(&process_bytecode, hi, bytecode, Operand(3)); ++ __ And(scratch2, bytecode, Operand(1)); ++ __ Branch(&extra_wide, ne, scratch2, Operand(zero_reg)); ++ ++ // Load the next bytecode and update table to the wide scaled table. ++ __ Add_d(bytecode_offset, bytecode_offset, Operand(1)); ++ __ Add_d(scratch2, bytecode_array, bytecode_offset); ++ __ Ld_bu(bytecode, MemOperand(scratch2, 0)); ++ __ Add_d(bytecode_size_table, bytecode_size_table, ++ Operand(kIntSize * interpreter::Bytecodes::kBytecodeCount)); ++ __ jmp(&process_bytecode); ++ ++ __ bind(&extra_wide); ++ // Load the next bytecode and update table to the extra wide scaled table. ++ __ Add_d(bytecode_offset, bytecode_offset, Operand(1)); ++ __ Add_d(scratch2, bytecode_array, bytecode_offset); ++ __ Ld_bu(bytecode, MemOperand(scratch2, 0)); ++ __ Add_d(bytecode_size_table, bytecode_size_table, ++ Operand(2 * kIntSize * interpreter::Bytecodes::kBytecodeCount)); ++ ++ __ bind(&process_bytecode); ++ ++// Bailout to the return label if this is a return bytecode. ++#define JUMP_IF_EQUAL(NAME) \ ++ __ Branch(if_return, eq, bytecode, \ ++ Operand(static_cast(interpreter::Bytecode::k##NAME))); ++ RETURN_BYTECODE_LIST(JUMP_IF_EQUAL) ++#undef JUMP_IF_EQUAL ++ ++ // Otherwise, load the size of the current bytecode and advance the offset. ++ __ Alsl_d(scratch2, bytecode, bytecode_size_table, 2, t7); ++ __ Ld_w(scratch2, MemOperand(scratch2, 0)); ++ __ Add_d(bytecode_offset, bytecode_offset, scratch2); ++} ++ ++// Generate code for entering a JS function with the interpreter. ++// On entry to the function the receiver and arguments have been pushed on the ++// stack left to right. The actual argument count matches the formal parameter ++// count expected by the function. ++// ++// The live registers are: ++// o a1: the JS function object being called. ++// o a3: the incoming new target or generator object ++// o cp: our context ++// o fp: the caller's frame pointer ++// o sp: stack pointer ++// o ra: return address ++// ++// The function builds an interpreter frame. See InterpreterFrameConstants in ++// frames.h for its layout. ++void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) { ++ Register closure = a1; ++ Register feedback_vector = a2; ++ ++ // Get the bytecode array from the function object and load it into ++ // kInterpreterBytecodeArrayRegister. ++ __ Ld_d(t5, FieldMemOperand(closure, JSFunction::kSharedFunctionInfoOffset)); ++ __ Ld_d(kInterpreterBytecodeArrayRegister, ++ FieldMemOperand(t5, SharedFunctionInfo::kFunctionDataOffset)); ++ GetSharedFunctionInfoBytecode(masm, kInterpreterBytecodeArrayRegister, a4); ++ ++ // The bytecode array could have been flushed from the shared function info, ++ // if so, call into CompileLazy. ++ Label compile_lazy; ++ __ GetObjectType(kInterpreterBytecodeArrayRegister, t5, t5); ++ __ Branch(&compile_lazy, ne, t5, Operand(BYTECODE_ARRAY_TYPE)); ++ ++ // Load the feedback vector from the closure. ++ __ Ld_d(feedback_vector, ++ FieldMemOperand(closure, JSFunction::kFeedbackCellOffset)); ++ __ Ld_d(feedback_vector, ++ FieldMemOperand(feedback_vector, Cell::kValueOffset)); ++ ++ Label push_stack_frame; ++ // Check if feedback vector is valid. If valid, check for optimized code ++ // and update invocation count. Otherwise, setup the stack frame. ++ __ Ld_d(a4, FieldMemOperand(feedback_vector, HeapObject::kMapOffset)); ++ __ Ld_hu(a4, FieldMemOperand(a4, Map::kInstanceTypeOffset)); ++ __ Branch(&push_stack_frame, ne, a4, Operand(FEEDBACK_VECTOR_TYPE)); ++ ++ // Read off the optimized code slot in the feedback vector, and if there ++ // is optimized code or an optimization marker, call that instead. ++ Register optimized_code_entry = a4; ++ __ Ld_d(optimized_code_entry, ++ FieldMemOperand(feedback_vector, ++ FeedbackVector::kOptimizedCodeWeakOrSmiOffset)); ++ ++ // Check if the optimized code slot is not empty. ++ Label optimized_code_slot_not_empty; ++ ++ __ Branch(&optimized_code_slot_not_empty, ne, optimized_code_entry, ++ Operand(Smi::FromEnum(OptimizationMarker::kNone))); ++ ++ Label not_optimized; ++ __ bind(¬_optimized); ++ ++ // Increment invocation count for the function. ++ __ Ld_w(a4, FieldMemOperand(feedback_vector, ++ FeedbackVector::kInvocationCountOffset)); ++ __ Add_w(a4, a4, Operand(1)); ++ __ St_w(a4, FieldMemOperand(feedback_vector, ++ FeedbackVector::kInvocationCountOffset)); ++ ++ // Open a frame scope to indicate that there is a frame on the stack. The ++ // MANUAL indicates that the scope shouldn't actually generate code to set up ++ // the frame (that is done below). ++ __ bind(&push_stack_frame); ++ FrameScope frame_scope(masm, StackFrame::MANUAL); ++ __ PushStandardFrame(closure); ++ ++ // Reset code age and the OSR arming. The OSR field and BytecodeAgeOffset are ++ // 8-bit fields next to each other, so we could just optimize by writing a ++ // 16-bit. These static asserts guard our assumption is valid. ++ STATIC_ASSERT(BytecodeArray::kBytecodeAgeOffset == ++ BytecodeArray::kOsrNestingLevelOffset + kCharSize); ++ STATIC_ASSERT(BytecodeArray::kNoAgeBytecodeAge == 0); ++ __ St_h(zero_reg, FieldMemOperand(kInterpreterBytecodeArrayRegister, ++ BytecodeArray::kOsrNestingLevelOffset)); ++ ++ // Load initial bytecode offset. ++ __ li(kInterpreterBytecodeOffsetRegister, ++ Operand(BytecodeArray::kHeaderSize - kHeapObjectTag)); ++ ++ // Push bytecode array and Smi tagged bytecode array offset. ++ __ SmiTag(a4, kInterpreterBytecodeOffsetRegister); ++ __ Push(kInterpreterBytecodeArrayRegister, a4); ++ ++ // Allocate the local and temporary register file on the stack. ++ Label stack_overflow; ++ { ++ // Load frame size (word) from the BytecodeArray object. ++ __ Ld_w(a4, FieldMemOperand(kInterpreterBytecodeArrayRegister, ++ BytecodeArray::kFrameSizeOffset)); ++ ++ // Do a stack check to ensure we don't go over the limit. ++ __ Sub_d(a5, sp, Operand(a4)); ++ LoadStackLimit(masm, a2, StackLimitKind::kRealStackLimit); ++ __ Branch(&stack_overflow, lo, a5, Operand(a2)); ++ ++ // If ok, push undefined as the initial value for all register file entries. ++ Label loop_header; ++ Label loop_check; ++ __ LoadRoot(a5, RootIndex::kUndefinedValue); ++ __ Branch(&loop_check); ++ __ bind(&loop_header); ++ // TODO(rmcilroy): Consider doing more than one push per loop iteration. ++ __ push(a5); ++ // Continue loop if not done. ++ __ bind(&loop_check); ++ __ Sub_d(a4, a4, Operand(kPointerSize)); ++ __ Branch(&loop_header, ge, a4, Operand(zero_reg)); ++ } ++ ++ // If the bytecode array has a valid incoming new target or generator object ++ // register, initialize it with incoming value which was passed in r3. ++ Label no_incoming_new_target_or_generator_register; ++ __ Ld_w(a5, FieldMemOperand( ++ kInterpreterBytecodeArrayRegister, ++ BytecodeArray::kIncomingNewTargetOrGeneratorRegisterOffset)); ++ __ Branch(&no_incoming_new_target_or_generator_register, eq, a5, ++ Operand(zero_reg)); ++ __ Alsl_d(a5, a5, fp, kPointerSizeLog2, t7); ++ __ St_d(a3, MemOperand(a5, 0)); ++ __ bind(&no_incoming_new_target_or_generator_register); ++ ++ // Perform interrupt stack check. ++ // TODO(solanes): Merge with the real stack limit check above. ++ Label stack_check_interrupt, after_stack_check_interrupt; ++ LoadStackLimit(masm, a5, StackLimitKind::kInterruptStackLimit); ++ __ Branch(&stack_check_interrupt, lo, sp, Operand(a5)); ++ __ bind(&after_stack_check_interrupt); ++ ++ // Load accumulator as undefined. ++ __ LoadRoot(kInterpreterAccumulatorRegister, RootIndex::kUndefinedValue); ++ ++ // Load the dispatch table into a register and dispatch to the bytecode ++ // handler at the current bytecode offset. ++ Label do_dispatch; ++ __ bind(&do_dispatch); ++ __ li(kInterpreterDispatchTableRegister, ++ ExternalReference::interpreter_dispatch_table_address(masm->isolate())); ++ __ Add_d(t5, kInterpreterBytecodeArrayRegister, ++ kInterpreterBytecodeOffsetRegister); ++ __ Ld_bu(a7, MemOperand(t5, 0)); ++ __ Alsl_d(kScratchReg, a7, kInterpreterDispatchTableRegister, ++ kPointerSizeLog2, t7); ++ __ Ld_d(kJavaScriptCallCodeStartRegister, MemOperand(kScratchReg, 0)); ++ __ Call(kJavaScriptCallCodeStartRegister); ++ masm->isolate()->heap()->SetInterpreterEntryReturnPCOffset(masm->pc_offset()); ++ ++ // Any returns to the entry trampoline are either due to the return bytecode ++ // or the interpreter tail calling a builtin and then a dispatch. ++ ++ // Get bytecode array and bytecode offset from the stack frame. ++ __ Ld_d(kInterpreterBytecodeArrayRegister, ++ MemOperand(fp, InterpreterFrameConstants::kBytecodeArrayFromFp)); ++ __ Ld_d(kInterpreterBytecodeOffsetRegister, ++ MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp)); ++ __ SmiUntag(kInterpreterBytecodeOffsetRegister); ++ ++ // Either return, or advance to the next bytecode and dispatch. ++ Label do_return; ++ __ Add_d(a1, kInterpreterBytecodeArrayRegister, ++ kInterpreterBytecodeOffsetRegister); ++ __ Ld_bu(a1, MemOperand(a1, 0)); ++ AdvanceBytecodeOffsetOrReturn(masm, kInterpreterBytecodeArrayRegister, ++ kInterpreterBytecodeOffsetRegister, a1, a2, a3, ++ &do_return); ++ __ jmp(&do_dispatch); ++ ++ __ bind(&do_return); ++ // The return value is in a0. ++ LeaveInterpreterFrame(masm, t0); ++ __ Jump(ra); ++ ++ __ bind(&stack_check_interrupt); ++ // Modify the bytecode offset in the stack to be kFunctionEntryBytecodeOffset ++ // for the call to the StackGuard. ++ __ li(kInterpreterBytecodeOffsetRegister, ++ Operand(Smi::FromInt(BytecodeArray::kHeaderSize - kHeapObjectTag + ++ kFunctionEntryBytecodeOffset))); ++ __ St_d(kInterpreterBytecodeOffsetRegister, ++ MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp)); ++ __ CallRuntime(Runtime::kStackGuard); ++ ++ // After the call, restore the bytecode array, bytecode offset and accumulator ++ // registers again. Also, restore the bytecode offset in the stack to its ++ // previous value. ++ __ Ld_d(kInterpreterBytecodeArrayRegister, ++ MemOperand(fp, InterpreterFrameConstants::kBytecodeArrayFromFp)); ++ __ li(kInterpreterBytecodeOffsetRegister, ++ Operand(BytecodeArray::kHeaderSize - kHeapObjectTag)); ++ __ LoadRoot(kInterpreterAccumulatorRegister, RootIndex::kUndefinedValue); ++ ++ __ SmiTag(a5, kInterpreterBytecodeOffsetRegister); ++ __ St_d(a5, MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp)); ++ ++ __ jmp(&after_stack_check_interrupt); ++ ++ __ bind(&optimized_code_slot_not_empty); ++ Label maybe_has_optimized_code; ++ // Check if optimized code marker is actually a weak reference to the ++ // optimized code as opposed to an optimization marker. ++ __ JumpIfNotSmi(optimized_code_entry, &maybe_has_optimized_code, t7); ++ MaybeOptimizeCode(masm, feedback_vector, optimized_code_entry); ++ // Fall through if there's no runnable optimized code. ++ __ jmp(¬_optimized); ++ ++ __ bind(&maybe_has_optimized_code); ++ // Load code entry from the weak reference, if it was cleared, resume ++ // execution of unoptimized code. ++ __ LoadWeakValue(optimized_code_entry, optimized_code_entry, ¬_optimized); ++ TailCallOptimizedCodeSlot(masm, optimized_code_entry, t3, a5); ++ ++ __ bind(&compile_lazy); ++ GenerateTailCallToReturnedCode(masm, Runtime::kCompileLazy); ++ // Unreachable code. ++ __ break_(0xCC); ++ ++ __ bind(&stack_overflow); ++ __ CallRuntime(Runtime::kThrowStackOverflow); ++ // Unreachable code. ++ __ break_(0xCC); ++} ++ ++static void Generate_InterpreterPushArgs(MacroAssembler* masm, ++ Register num_args, Register index, ++ Register scratch, Register scratch2) { ++ // Find the address of the last argument. ++ __ mov(scratch2, num_args); ++ __ slli_d(scratch2, scratch2, kPointerSizeLog2); ++ __ Sub_d(scratch2, index, Operand(scratch2)); ++ ++ // Push the arguments. ++ Label loop_header, loop_check; ++ __ Branch(&loop_check); ++ __ bind(&loop_header); ++ __ Ld_d(scratch, MemOperand(index, 0)); ++ __ Add_d(index, index, Operand(-kPointerSize)); ++ __ push(scratch); ++ __ bind(&loop_check); ++ __ Branch(&loop_header, hi, index, Operand(scratch2)); ++} ++ ++// static ++void Builtins::Generate_InterpreterPushArgsThenCallImpl( ++ MacroAssembler* masm, ConvertReceiverMode receiver_mode, ++ InterpreterPushArgsMode mode) { ++ DCHECK(mode != InterpreterPushArgsMode::kArrayFunction); ++ // ----------- S t a t e ------------- ++ // -- a0 : the number of arguments (not including the receiver) ++ // -- a2 : the address of the first argument to be pushed. Subsequent ++ // arguments should be consecutive above this, in the same order as ++ // they are to be pushed onto the stack. ++ // -- a1 : the target to call (can be any Object). ++ // ----------------------------------- ++ Label stack_overflow; ++ ++ __ Add_d(a3, a0, Operand(1)); // Add one for receiver. ++ ++ // Push "undefined" as the receiver arg if we need to. ++ if (receiver_mode == ConvertReceiverMode::kNullOrUndefined) { ++ __ PushRoot(RootIndex::kUndefinedValue); ++ __ Sub_d(a3, a3, Operand(1)); // Subtract one for receiver. ++ } ++ ++ Generate_StackOverflowCheck(masm, a3, a4, t0, &stack_overflow); ++ ++ // This function modifies a2, t0 and a4. ++ Generate_InterpreterPushArgs(masm, a3, a2, a4, t0); ++ ++ if (mode == InterpreterPushArgsMode::kWithFinalSpread) { ++ __ Pop(a2); // Pass the spread in a register ++ __ Sub_d(a0, a0, Operand(1)); // Subtract one for spread ++ } ++ ++ // Call the target. ++ if (mode == InterpreterPushArgsMode::kWithFinalSpread) { ++ __ Jump(BUILTIN_CODE(masm->isolate(), CallWithSpread), ++ RelocInfo::CODE_TARGET); ++ } else { ++ __ Jump(masm->isolate()->builtins()->Call(ConvertReceiverMode::kAny), ++ RelocInfo::CODE_TARGET); ++ } ++ ++ __ bind(&stack_overflow); ++ { ++ __ TailCallRuntime(Runtime::kThrowStackOverflow); ++ // Unreachable code. ++ __ break_(0xCC); ++ } ++} ++ ++// static ++void Builtins::Generate_InterpreterPushArgsThenConstructImpl( ++ MacroAssembler* masm, InterpreterPushArgsMode mode) { ++ // ----------- S t a t e ------------- ++ // -- a0 : argument count (not including receiver) ++ // -- a3 : new target ++ // -- a1 : constructor to call ++ // -- a2 : allocation site feedback if available, undefined otherwise. ++ // -- a4 : address of the first argument ++ // ----------------------------------- ++ Label stack_overflow; ++ ++ // Push a slot for the receiver. ++ __ push(zero_reg); ++ ++ Generate_StackOverflowCheck(masm, a0, a5, t0, &stack_overflow); ++ ++ // This function modifies t0, a4 and a5. ++ Generate_InterpreterPushArgs(masm, a0, a4, a5, t0); ++ ++ if (mode == InterpreterPushArgsMode::kWithFinalSpread) { ++ __ Pop(a2); // Pass the spread in a register ++ __ Sub_d(a0, a0, Operand(1)); // Subtract one for spread ++ } else { ++ __ AssertUndefinedOrAllocationSite(a2, t0); ++ } ++ ++ if (mode == InterpreterPushArgsMode::kArrayFunction) { ++ __ AssertFunction(a1); ++ ++ // Tail call to the function-specific construct stub (still in the caller ++ // context at this point). ++ __ Jump(BUILTIN_CODE(masm->isolate(), ArrayConstructorImpl), ++ RelocInfo::CODE_TARGET); ++ } else if (mode == InterpreterPushArgsMode::kWithFinalSpread) { ++ // Call the constructor with a0, a1, and a3 unmodified. ++ __ Jump(BUILTIN_CODE(masm->isolate(), ConstructWithSpread), ++ RelocInfo::CODE_TARGET); ++ } else { ++ DCHECK_EQ(InterpreterPushArgsMode::kOther, mode); ++ // Call the constructor with a0, a1, and a3 unmodified. ++ __ Jump(BUILTIN_CODE(masm->isolate(), Construct), RelocInfo::CODE_TARGET); ++ } ++ ++ __ bind(&stack_overflow); ++ { ++ __ TailCallRuntime(Runtime::kThrowStackOverflow); ++ // Unreachable code. ++ __ break_(0xCC); ++ } ++} ++ ++static void Generate_InterpreterEnterBytecode(MacroAssembler* masm) { ++ // Set the return address to the correct point in the interpreter entry ++ // trampoline. ++ Label builtin_trampoline, trampoline_loaded; ++ Smi interpreter_entry_return_pc_offset( ++ masm->isolate()->heap()->interpreter_entry_return_pc_offset()); ++ DCHECK_NE(interpreter_entry_return_pc_offset, Smi::zero()); ++ ++ // If the SFI function_data is an InterpreterData, the function will have a ++ // custom copy of the interpreter entry trampoline for profiling. If so, ++ // get the custom trampoline, otherwise grab the entry address of the global ++ // trampoline. ++ __ Ld_d(t0, MemOperand(fp, StandardFrameConstants::kFunctionOffset)); ++ __ Ld_d(t0, FieldMemOperand(t0, JSFunction::kSharedFunctionInfoOffset)); ++ __ Ld_d(t0, FieldMemOperand(t0, SharedFunctionInfo::kFunctionDataOffset)); ++ __ GetObjectType(t0, kInterpreterDispatchTableRegister, ++ kInterpreterDispatchTableRegister); ++ __ Branch(&builtin_trampoline, ne, kInterpreterDispatchTableRegister, ++ Operand(INTERPRETER_DATA_TYPE)); ++ ++ __ Ld_d(t0, ++ FieldMemOperand(t0, InterpreterData::kInterpreterTrampolineOffset)); ++ __ Add_d(t0, t0, Operand(Code::kHeaderSize - kHeapObjectTag)); ++ __ Branch(&trampoline_loaded); ++ ++ __ bind(&builtin_trampoline); ++ __ li(t0, ExternalReference:: ++ address_of_interpreter_entry_trampoline_instruction_start( ++ masm->isolate())); ++ __ Ld_d(t0, MemOperand(t0, 0)); ++ ++ __ bind(&trampoline_loaded); ++ __ Add_d(ra, t0, Operand(interpreter_entry_return_pc_offset.value())); ++ ++ // Initialize the dispatch table register. ++ __ li(kInterpreterDispatchTableRegister, ++ ExternalReference::interpreter_dispatch_table_address(masm->isolate())); ++ ++ // Get the bytecode array pointer from the frame. ++ __ Ld_d(kInterpreterBytecodeArrayRegister, ++ MemOperand(fp, InterpreterFrameConstants::kBytecodeArrayFromFp)); ++ ++ if (FLAG_debug_code) { ++ // Check function data field is actually a BytecodeArray object. ++ __ SmiTst(kInterpreterBytecodeArrayRegister, kScratchReg); ++ __ Assert(ne, ++ AbortReason::kFunctionDataShouldBeBytecodeArrayOnInterpreterEntry, ++ kScratchReg, Operand(zero_reg)); ++ __ GetObjectType(kInterpreterBytecodeArrayRegister, a1, a1); ++ __ Assert(eq, ++ AbortReason::kFunctionDataShouldBeBytecodeArrayOnInterpreterEntry, ++ a1, Operand(BYTECODE_ARRAY_TYPE)); ++ } ++ ++ // Get the target bytecode offset from the frame. ++ __ SmiUntag(kInterpreterBytecodeOffsetRegister, ++ MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp)); ++ ++ if (FLAG_debug_code) { ++ Label okay; ++ __ Branch(&okay, ge, kInterpreterBytecodeOffsetRegister, ++ Operand(BytecodeArray::kHeaderSize - kHeapObjectTag)); ++ // Unreachable code. ++ __ break_(0xCC); ++ __ bind(&okay); ++ } ++ ++ // Dispatch to the target bytecode. ++ __ Add_d(a1, kInterpreterBytecodeArrayRegister, ++ kInterpreterBytecodeOffsetRegister); ++ __ Ld_bu(a7, MemOperand(a1, 0)); ++ __ Alsl_d(a1, a7, kInterpreterDispatchTableRegister, kPointerSizeLog2, t7); ++ __ Ld_d(kJavaScriptCallCodeStartRegister, MemOperand(a1, 0)); ++ __ Jump(kJavaScriptCallCodeStartRegister); ++} ++ ++void Builtins::Generate_InterpreterEnterBytecodeAdvance(MacroAssembler* masm) { ++ // Advance the current bytecode offset stored within the given interpreter ++ // stack frame. This simulates what all bytecode handlers do upon completion ++ // of the underlying operation. ++ __ Ld_d(kInterpreterBytecodeArrayRegister, ++ MemOperand(fp, InterpreterFrameConstants::kBytecodeArrayFromFp)); ++ __ Ld_d(kInterpreterBytecodeOffsetRegister, ++ MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp)); ++ __ SmiUntag(kInterpreterBytecodeOffsetRegister); ++ ++ Label enter_bytecode, function_entry_bytecode; ++ __ Branch(&function_entry_bytecode, eq, kInterpreterBytecodeOffsetRegister, ++ Operand(BytecodeArray::kHeaderSize - kHeapObjectTag + ++ kFunctionEntryBytecodeOffset)); ++ ++ // Load the current bytecode. ++ __ Add_d(a1, kInterpreterBytecodeArrayRegister, ++ kInterpreterBytecodeOffsetRegister); ++ __ Ld_bu(a1, MemOperand(a1, 0)); ++ ++ // Advance to the next bytecode. ++ Label if_return; ++ AdvanceBytecodeOffsetOrReturn(masm, kInterpreterBytecodeArrayRegister, ++ kInterpreterBytecodeOffsetRegister, a1, a2, a3, ++ &if_return); ++ ++ __ bind(&enter_bytecode); ++ // Convert new bytecode offset to a Smi and save in the stackframe. ++ __ SmiTag(a2, kInterpreterBytecodeOffsetRegister); ++ __ St_d(a2, MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp)); ++ ++ Generate_InterpreterEnterBytecode(masm); ++ ++ __ bind(&function_entry_bytecode); ++ // If the code deoptimizes during the implicit function entry stack interrupt ++ // check, it will have a bailout ID of kFunctionEntryBytecodeOffset, which is ++ // not a valid bytecode offset. Detect this case and advance to the first ++ // actual bytecode. ++ __ li(kInterpreterBytecodeOffsetRegister, ++ Operand(BytecodeArray::kHeaderSize - kHeapObjectTag)); ++ __ Branch(&enter_bytecode); ++ ++ // We should never take the if_return path. ++ __ bind(&if_return); ++ __ Abort(AbortReason::kInvalidBytecodeAdvance); ++} ++ ++void Builtins::Generate_InterpreterEnterBytecodeDispatch(MacroAssembler* masm) { ++ Generate_InterpreterEnterBytecode(masm); ++} ++ ++namespace { ++void Generate_ContinueToBuiltinHelper(MacroAssembler* masm, ++ bool java_script_builtin, ++ bool with_result) { ++ const RegisterConfiguration* config(RegisterConfiguration::Default()); ++ int allocatable_register_count = config->num_allocatable_general_registers(); ++ if (with_result) { ++ // Overwrite the hole inserted by the deoptimizer with the return value from ++ // the LAZY deopt point. ++ __ St_d(a0, ++ MemOperand( ++ sp, config->num_allocatable_general_registers() * kPointerSize + ++ BuiltinContinuationFrameConstants::kFixedFrameSize)); ++ } ++ for (int i = allocatable_register_count - 1; i >= 0; --i) { ++ int code = config->GetAllocatableGeneralCode(i); ++ __ Pop(Register::from_code(code)); ++ if (java_script_builtin && code == kJavaScriptCallArgCountRegister.code()) { ++ __ SmiUntag(Register::from_code(code)); ++ } ++ } ++ __ Ld_d( ++ fp, ++ MemOperand(sp, BuiltinContinuationFrameConstants::kFixedFrameSizeFromFp)); ++ // Load builtin index (stored as a Smi) and use it to get the builtin start ++ // address from the builtins table. ++ __ Pop(t0); ++ __ Add_d(sp, sp, ++ Operand(BuiltinContinuationFrameConstants::kFixedFrameSizeFromFp)); ++ __ Pop(ra); ++ __ LoadEntryFromBuiltinIndex(t0); ++ __ Jump(t0); ++} ++} // namespace ++ ++void Builtins::Generate_ContinueToCodeStubBuiltin(MacroAssembler* masm) { ++ Generate_ContinueToBuiltinHelper(masm, false, false); ++} ++ ++void Builtins::Generate_ContinueToCodeStubBuiltinWithResult( ++ MacroAssembler* masm) { ++ Generate_ContinueToBuiltinHelper(masm, false, true); ++} ++ ++void Builtins::Generate_ContinueToJavaScriptBuiltin(MacroAssembler* masm) { ++ Generate_ContinueToBuiltinHelper(masm, true, false); ++} ++ ++void Builtins::Generate_ContinueToJavaScriptBuiltinWithResult( ++ MacroAssembler* masm) { ++ Generate_ContinueToBuiltinHelper(masm, true, true); ++} ++ ++void Builtins::Generate_NotifyDeoptimized(MacroAssembler* masm) { ++ { ++ FrameScope scope(masm, StackFrame::INTERNAL); ++ __ CallRuntime(Runtime::kNotifyDeoptimized); ++ } ++ ++ DCHECK_EQ(kInterpreterAccumulatorRegister.code(), a0.code()); ++ __ Ld_d(a0, MemOperand(sp, 0 * kPointerSize)); ++ __ Add_d(sp, sp, Operand(1 * kPointerSize)); // Remove state. ++ __ Ret(); ++} ++ ++void Builtins::Generate_InterpreterOnStackReplacement(MacroAssembler* masm) { ++ { ++ FrameScope scope(masm, StackFrame::INTERNAL); ++ __ CallRuntime(Runtime::kCompileForOnStackReplacement); ++ } ++ ++ // If the code object is null, just return to the caller. ++ __ Ret(eq, a0, Operand(Smi::zero())); ++ ++ // Drop the handler frame that is be sitting on top of the actual ++ // JavaScript frame. This is the case then OSR is triggered from bytecode. ++ __ LeaveFrame(StackFrame::STUB); ++ ++ // Load deoptimization data from the code object. ++ // = [#deoptimization_data_offset] ++ __ Ld_d(a1, MemOperand(a0, Code::kDeoptimizationDataOffset - kHeapObjectTag)); ++ ++ // Load the OSR entrypoint offset from the deoptimization data. ++ // = [#header_size + #osr_pc_offset] ++ __ SmiUntag(a1, MemOperand(a1, FixedArray::OffsetOfElementAt( ++ DeoptimizationData::kOsrPcOffsetIndex) - ++ kHeapObjectTag)); ++ ++ // Compute the target address = code_obj + header_size + osr_offset ++ // = + #header_size + ++ __ Add_d(a0, a0, a1); ++ __ addi_d(ra, a0, Code::kHeaderSize - kHeapObjectTag); ++ ++ // And "return" to the OSR entry point of the function. ++ __ Ret(); ++} ++ ++// static ++void Builtins::Generate_FunctionPrototypeApply(MacroAssembler* masm) { ++ // ----------- S t a t e ------------- ++ // -- a0 : argc ++ // -- sp[0] : argArray ++ // -- sp[4] : thisArg ++ // -- sp[8] : receiver ++ // ----------------------------------- ++ ++ Register argc = a0; ++ Register arg_array = a2; ++ Register receiver = a1; ++ Register this_arg = a5; ++ Register undefined_value = a3; ++ Register scratch = a4; ++ ++ __ LoadRoot(undefined_value, RootIndex::kUndefinedValue); ++ ++ // 1. Load receiver into a1, argArray into a2 (if present), remove all ++ // arguments from the stack (including the receiver), and push thisArg (if ++ // present) instead. ++ { ++ // Claim (2 - argc) dummy arguments form the stack, to put the stack in a ++ // consistent state for a simple pop operation. ++ ++ __ Sub_d(sp, sp, Operand(2 * kPointerSize)); ++ __ Alsl_d(sp, argc, sp, kPointerSizeLog2, t7); ++ __ mov(scratch, argc); ++ __ Pop(this_arg, arg_array); // Overwrite argc ++ __ Movz(arg_array, undefined_value, scratch); // if argc == 0 ++ __ Movz(this_arg, undefined_value, scratch); // if argc == 0 ++ __ Sub_d(scratch, scratch, Operand(1)); ++ __ Movz(arg_array, undefined_value, scratch); // if argc == 1 ++ __ Ld_d(receiver, MemOperand(sp, 0)); ++ __ St_d(this_arg, MemOperand(sp, 0)); ++ } ++ ++ // ----------- S t a t e ------------- ++ // -- a2 : argArray ++ // -- a1 : receiver ++ // -- a3 : undefined root value ++ // -- sp[0] : thisArg ++ // ----------------------------------- ++ ++ // 2. We don't need to check explicitly for callable receiver here, ++ // since that's the first thing the Call/CallWithArrayLike builtins ++ // will do. ++ ++ // 3. Tail call with no arguments if argArray is null or undefined. ++ Label no_arguments; ++ __ JumpIfRoot(arg_array, RootIndex::kNullValue, &no_arguments); ++ __ Branch(&no_arguments, eq, arg_array, Operand(undefined_value)); ++ ++ // 4a. Apply the receiver to the given argArray. ++ __ Jump(BUILTIN_CODE(masm->isolate(), CallWithArrayLike), ++ RelocInfo::CODE_TARGET); ++ ++ // 4b. The argArray is either null or undefined, so we tail call without any ++ // arguments to the receiver. ++ __ bind(&no_arguments); ++ { ++ __ mov(a0, zero_reg); ++ DCHECK(receiver == a1); ++ __ Jump(masm->isolate()->builtins()->Call(), RelocInfo::CODE_TARGET); ++ } ++} ++ ++// static ++void Builtins::Generate_FunctionPrototypeCall(MacroAssembler* masm) { ++ // 1. Make sure we have at least one argument. ++ // a0: actual number of arguments ++ { ++ Label done; ++ __ Branch(&done, ne, a0, Operand(zero_reg)); ++ __ PushRoot(RootIndex::kUndefinedValue); ++ __ Add_d(a0, a0, Operand(1)); ++ __ bind(&done); ++ } ++ ++ // 2. Get the function to call (passed as receiver) from the stack. ++ // a0: actual number of arguments ++ __ Alsl_d(kScratchReg, a0, sp, kPointerSizeLog2, t7); ++ __ Ld_d(a1, MemOperand(kScratchReg, 0)); ++ ++ // 3. Shift arguments and return address one slot down on the stack ++ // (overwriting the original receiver). Adjust argument count to make ++ // the original first argument the new receiver. ++ // a0: actual number of arguments ++ // a1: function ++ { ++ Label loop; ++ // Calculate the copy start address (destination). Copy end address is sp. ++ __ Alsl_d(a2, a0, sp, kPointerSizeLog2, t7); ++ ++ __ bind(&loop); ++ __ Ld_d(kScratchReg, MemOperand(a2, -kPointerSize)); ++ __ St_d(kScratchReg, MemOperand(a2, 0)); ++ __ Sub_d(a2, a2, Operand(kPointerSize)); ++ __ Branch(&loop, ne, a2, Operand(sp)); ++ // Adjust the actual number of arguments and remove the top element ++ // (which is a copy of the last argument). ++ __ Sub_d(a0, a0, Operand(1)); ++ __ Pop(); ++ } ++ ++ // 4. Call the callable. ++ __ Jump(masm->isolate()->builtins()->Call(), RelocInfo::CODE_TARGET); ++} ++ ++void Builtins::Generate_ReflectApply(MacroAssembler* masm) { ++ // ----------- S t a t e ------------- ++ // -- a0 : argc ++ // -- sp[0] : argumentsList (if argc ==3) ++ // -- sp[4] : thisArgument (if argc >=2) ++ // -- sp[8] : target (if argc >=1) ++ // -- sp[12] : receiver ++ // ----------------------------------- ++ ++ Register argc = a0; ++ Register arguments_list = a2; ++ Register target = a1; ++ Register this_argument = a5; ++ Register undefined_value = a3; ++ Register scratch = a4; ++ ++ __ LoadRoot(undefined_value, RootIndex::kUndefinedValue); ++ ++ // 1. Load target into a1 (if present), argumentsList into a2 (if present), ++ // remove all arguments from the stack (including the receiver), and push ++ // thisArgument (if present) instead. ++ { ++ // Claim (3 - argc) dummy arguments form the stack, to put the stack in a ++ // consistent state for a simple pop operation. ++ ++ __ Sub_d(sp, sp, Operand(3 * kPointerSize)); ++ __ Alsl_d(sp, argc, sp, kPointerSizeLog2, t7); ++ __ mov(scratch, argc); ++ __ Pop(target, this_argument, arguments_list); ++ __ Movz(arguments_list, undefined_value, scratch); // if argc == 0 ++ __ Movz(this_argument, undefined_value, scratch); // if argc == 0 ++ __ Movz(target, undefined_value, scratch); // if argc == 0 ++ __ Sub_d(scratch, scratch, Operand(1)); ++ __ Movz(arguments_list, undefined_value, scratch); // if argc == 1 ++ __ Movz(this_argument, undefined_value, scratch); // if argc == 1 ++ __ Sub_d(scratch, scratch, Operand(1)); ++ __ Movz(arguments_list, undefined_value, scratch); // if argc == 2 ++ ++ __ St_d(this_argument, MemOperand(sp, 0)); // Overwrite receiver ++ } ++ ++ // ----------- S t a t e ------------- ++ // -- a2 : argumentsList ++ // -- a1 : target ++ // -- a3 : undefined root value ++ // -- sp[0] : thisArgument ++ // ----------------------------------- ++ ++ // 2. We don't need to check explicitly for callable target here, ++ // since that's the first thing the Call/CallWithArrayLike builtins ++ // will do. ++ ++ // 3. Apply the target to the given argumentsList. ++ __ Jump(BUILTIN_CODE(masm->isolate(), CallWithArrayLike), ++ RelocInfo::CODE_TARGET); ++} ++ ++void Builtins::Generate_ReflectConstruct(MacroAssembler* masm) { ++ // ----------- S t a t e ------------- ++ // -- a0 : argc ++ // -- sp[0] : new.target (optional) (dummy value if argc <= 2) ++ // -- sp[4] : argumentsList (dummy value if argc <= 1) ++ // -- sp[8] : target (dummy value if argc == 0) ++ // -- sp[12] : receiver ++ // ----------------------------------- ++ Register argc = a0; ++ Register arguments_list = a2; ++ Register target = a1; ++ Register new_target = a3; ++ Register undefined_value = a4; ++ Register scratch = a5; ++ ++ __ LoadRoot(undefined_value, RootIndex::kUndefinedValue); ++ ++ // 1. Load target into a1 (if present), argumentsList into a2 (if present), ++ // new.target into a3 (if present, otherwise use target), remove all ++ // arguments from the stack (including the receiver), and push thisArgument ++ // (if present) instead. ++ { ++ // Claim (3 - argc) dummy arguments form the stack, to put the stack in a ++ // consistent state for a simple pop operation. ++ ++ __ Sub_d(sp, sp, Operand(3 * kPointerSize)); ++ __ Alsl_d(sp, argc, sp, kPointerSizeLog2, t7); ++ __ mov(scratch, argc); ++ __ Pop(target, arguments_list, new_target); ++ __ Movz(arguments_list, undefined_value, scratch); // if argc == 0 ++ __ Movz(new_target, undefined_value, scratch); // if argc == 0 ++ __ Movz(target, undefined_value, scratch); // if argc == 0 ++ __ Sub_d(scratch, scratch, Operand(1)); ++ __ Movz(arguments_list, undefined_value, scratch); // if argc == 1 ++ __ Movz(new_target, target, scratch); // if argc == 1 ++ __ Sub_d(scratch, scratch, Operand(1)); ++ __ Movz(new_target, target, scratch); // if argc == 2 ++ ++ __ St_d(undefined_value, MemOperand(sp, 0)); // Overwrite receiver ++ } ++ ++ // ----------- S t a t e ------------- ++ // -- a2 : argumentsList ++ // -- a1 : target ++ // -- a3 : new.target ++ // -- sp[0] : receiver (undefined) ++ // ----------------------------------- ++ ++ // 2. We don't need to check explicitly for constructor target here, ++ // since that's the first thing the Construct/ConstructWithArrayLike ++ // builtins will do. ++ ++ // 3. We don't need to check explicitly for constructor new.target here, ++ // since that's the second thing the Construct/ConstructWithArrayLike ++ // builtins will do. ++ ++ // 4. Construct the target with the given new.target and argumentsList. ++ __ Jump(BUILTIN_CODE(masm->isolate(), ConstructWithArrayLike), ++ RelocInfo::CODE_TARGET); ++} ++ ++static void EnterArgumentsAdaptorFrame(MacroAssembler* masm) { ++ __ SmiTag(a0); ++ __ li(a4, Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR))); ++ __ Push(ra, fp, a4, a1, a0); ++ __ Push(Smi::zero()); // Padding. ++ __ Add_d(fp, sp, ++ Operand(ArgumentsAdaptorFrameConstants::kFixedFrameSizeFromFp)); ++} ++ ++static void LeaveArgumentsAdaptorFrame(MacroAssembler* masm) { ++ // ----------- S t a t e ------------- ++ // -- a0 : result being passed through ++ // ----------------------------------- ++ // Get the number of arguments passed (as a smi), tear down the frame and ++ // then tear down the parameters. ++ __ Ld_d(a1, MemOperand(fp, ArgumentsAdaptorFrameConstants::kLengthOffset)); ++ __ mov(sp, fp); ++ __ Pop(ra, fp); ++ __ SmiScale(a4, a1, kPointerSizeLog2); ++ __ Add_d(sp, sp, a4); ++ // Adjust for the receiver. ++ __ Add_d(sp, sp, Operand(kPointerSize)); ++} ++ ++// static ++void Builtins::Generate_CallOrConstructVarargs(MacroAssembler* masm, ++ Handle code) { ++ // ----------- S t a t e ------------- ++ // -- a1 : target ++ // -- a0 : number of parameters on the stack (not including the receiver) ++ // -- a2 : arguments list (a FixedArray) ++ // -- a4 : len (number of elements to push from args) ++ // -- a3 : new.target (for [[Construct]]) ++ // ----------------------------------- ++ if (masm->emit_debug_code()) { ++ // Allow a2 to be a FixedArray, or a FixedDoubleArray if a4 == 0. ++ Label ok, fail; ++ __ AssertNotSmi(a2); ++ __ GetObjectType(a2, t8, t8); ++ __ Branch(&ok, eq, t8, Operand(FIXED_ARRAY_TYPE)); ++ __ Branch(&fail, ne, t8, Operand(FIXED_DOUBLE_ARRAY_TYPE)); ++ __ Branch(&ok, eq, a4, Operand(zero_reg)); ++ // Fall through. ++ __ bind(&fail); ++ __ Abort(AbortReason::kOperandIsNotAFixedArray); ++ ++ __ bind(&ok); ++ } ++ ++ Register args = a2; ++ Register len = a4; ++ ++ // Check for stack overflow. ++ Label stack_overflow; ++ Generate_StackOverflowCheck(masm, len, kScratchReg, a5, &stack_overflow); ++ ++ // Push arguments onto the stack (thisArgument is already on the stack). ++ { ++ Label done, push, loop; ++ Register src = a6; ++ Register scratch = len; ++ ++ __ addi_d(src, args, FixedArray::kHeaderSize - kHeapObjectTag); ++ __ Add_d(a0, a0, len); // The 'len' argument for Call() or Construct(). ++ __ Branch(&done, eq, len, Operand(zero_reg)); ++ __ slli_d(scratch, len, kPointerSizeLog2); ++ __ Sub_d(scratch, sp, Operand(scratch)); ++ __ LoadRoot(t1, RootIndex::kTheHoleValue); ++ __ bind(&loop); ++ __ Ld_d(a5, MemOperand(src, 0)); ++ __ Branch(&push, ne, a5, Operand(t1)); ++ __ LoadRoot(a5, RootIndex::kUndefinedValue); ++ __ bind(&push); ++ __ addi_d(src, src, kPointerSize); ++ __ Push(a5); ++ __ Branch(&loop, ne, scratch, Operand(sp)); ++ __ bind(&done); ++ } ++ ++ // Tail-call to the actual Call or Construct builtin. ++ __ Jump(code, RelocInfo::CODE_TARGET); ++ ++ __ bind(&stack_overflow); ++ __ TailCallRuntime(Runtime::kThrowStackOverflow); ++} ++ ++// static ++void Builtins::Generate_CallOrConstructForwardVarargs(MacroAssembler* masm, ++ CallOrConstructMode mode, ++ Handle code) { ++ // ----------- S t a t e ------------- ++ // -- a0 : the number of arguments (not including the receiver) ++ // -- a3 : the new.target (for [[Construct]] calls) ++ // -- a1 : the target to call (can be any Object) ++ // -- a2 : start index (to support rest parameters) ++ // ----------------------------------- ++ ++ // Check if new.target has a [[Construct]] internal method. ++ if (mode == CallOrConstructMode::kConstruct) { ++ Label new_target_constructor, new_target_not_constructor; ++ __ JumpIfSmi(a3, &new_target_not_constructor); ++ __ Ld_d(t1, FieldMemOperand(a3, HeapObject::kMapOffset)); ++ __ Ld_bu(t1, FieldMemOperand(t1, Map::kBitFieldOffset)); ++ __ And(t1, t1, Operand(Map::Bits1::IsConstructorBit::kMask)); ++ __ Branch(&new_target_constructor, ne, t1, Operand(zero_reg)); ++ __ bind(&new_target_not_constructor); ++ { ++ FrameScope scope(masm, StackFrame::MANUAL); ++ __ EnterFrame(StackFrame::INTERNAL); ++ __ Push(a3); ++ __ CallRuntime(Runtime::kThrowNotConstructor); ++ } ++ __ bind(&new_target_constructor); ++ } ++ ++ // Check if we have an arguments adaptor frame below the function frame. ++ Label arguments_adaptor, arguments_done; ++ __ Ld_d(a6, MemOperand(fp, StandardFrameConstants::kCallerFPOffset)); ++ __ Ld_d(a7, MemOperand(a6, CommonFrameConstants::kContextOrFrameTypeOffset)); ++ __ Branch(&arguments_adaptor, eq, a7, ++ Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR))); ++ { ++ __ Ld_d(a7, MemOperand(fp, StandardFrameConstants::kFunctionOffset)); ++ __ Ld_d(a7, FieldMemOperand(a7, JSFunction::kSharedFunctionInfoOffset)); ++ __ Ld_hu(a7, FieldMemOperand( ++ a7, SharedFunctionInfo::kFormalParameterCountOffset)); ++ __ mov(a6, fp); ++ } ++ __ Branch(&arguments_done); ++ __ bind(&arguments_adaptor); ++ { ++ // Just get the length from the ArgumentsAdaptorFrame. ++ __ SmiUntag(a7, ++ MemOperand(a6, ArgumentsAdaptorFrameConstants::kLengthOffset)); ++ } ++ __ bind(&arguments_done); ++ ++ Label stack_done, stack_overflow; ++ __ Sub_w(a7, a7, a2); ++ __ Branch(&stack_done, le, a7, Operand(zero_reg)); ++ { ++ // Check for stack overflow. ++ Generate_StackOverflowCheck(masm, a7, a4, a5, &stack_overflow); ++ ++ // Forward the arguments from the caller frame. ++ { ++ Label loop; ++ __ Add_d(a0, a0, a7); ++ __ bind(&loop); ++ { ++ __ Alsl_d(kScratchReg, a7, a6, kPointerSizeLog2, t7); ++ __ Ld_d(kScratchReg, MemOperand(kScratchReg, 1 * kPointerSize)); ++ __ push(kScratchReg); ++ __ Sub_w(a7, a7, Operand(1)); ++ __ Branch(&loop, ne, a7, Operand(zero_reg)); ++ } ++ } ++ } ++ __ Branch(&stack_done); ++ __ bind(&stack_overflow); ++ __ TailCallRuntime(Runtime::kThrowStackOverflow); ++ __ bind(&stack_done); ++ ++ // Tail-call to the {code} handler. ++ __ Jump(code, RelocInfo::CODE_TARGET); ++} ++ ++// static ++void Builtins::Generate_CallFunction(MacroAssembler* masm, ++ ConvertReceiverMode mode) { ++ // ----------- S t a t e ------------- ++ // -- a0 : the number of arguments (not including the receiver) ++ // -- a1 : the function to call (checked to be a JSFunction) ++ // ----------------------------------- ++ __ AssertFunction(a1); ++ ++ // See ES6 section 9.2.1 [[Call]] ( thisArgument, argumentsList) ++ // Check that function is not a "classConstructor". ++ Label class_constructor; ++ __ Ld_d(a2, FieldMemOperand(a1, JSFunction::kSharedFunctionInfoOffset)); ++ __ Ld_wu(a3, FieldMemOperand(a2, SharedFunctionInfo::kFlagsOffset)); ++ __ And(kScratchReg, a3, ++ Operand(SharedFunctionInfo::IsClassConstructorBit::kMask)); ++ __ Branch(&class_constructor, ne, kScratchReg, Operand(zero_reg)); ++ ++ // Enter the context of the function; ToObject has to run in the function ++ // context, and we also need to take the global proxy from the function ++ // context in case of conversion. ++ __ Ld_d(cp, FieldMemOperand(a1, JSFunction::kContextOffset)); ++ // We need to convert the receiver for non-native sloppy mode functions. ++ Label done_convert; ++ __ Ld_wu(a3, FieldMemOperand(a2, SharedFunctionInfo::kFlagsOffset)); ++ __ And(kScratchReg, a3, ++ Operand(SharedFunctionInfo::IsNativeBit::kMask | ++ SharedFunctionInfo::IsStrictBit::kMask)); ++ __ Branch(&done_convert, ne, kScratchReg, Operand(zero_reg)); ++ { ++ // ----------- S t a t e ------------- ++ // -- a0 : the number of arguments (not including the receiver) ++ // -- a1 : the function to call (checked to be a JSFunction) ++ // -- a2 : the shared function info. ++ // -- cp : the function context. ++ // ----------------------------------- ++ ++ if (mode == ConvertReceiverMode::kNullOrUndefined) { ++ // Patch receiver to global proxy. ++ __ LoadGlobalProxy(a3); ++ } else { ++ Label convert_to_object, convert_receiver; ++ __ Alsl_d(kScratchReg, a0, sp, kPointerSizeLog2, t7); ++ __ Ld_d(a3, MemOperand(kScratchReg, 0)); ++ __ JumpIfSmi(a3, &convert_to_object); ++ STATIC_ASSERT(LAST_JS_RECEIVER_TYPE == LAST_TYPE); ++ __ GetObjectType(a3, a4, a4); ++ __ Branch(&done_convert, hs, a4, Operand(FIRST_JS_RECEIVER_TYPE)); ++ if (mode != ConvertReceiverMode::kNotNullOrUndefined) { ++ Label convert_global_proxy; ++ __ JumpIfRoot(a3, RootIndex::kUndefinedValue, &convert_global_proxy); ++ __ JumpIfNotRoot(a3, RootIndex::kNullValue, &convert_to_object); ++ __ bind(&convert_global_proxy); ++ { ++ // Patch receiver to global proxy. ++ __ LoadGlobalProxy(a3); ++ } ++ __ Branch(&convert_receiver); ++ } ++ __ bind(&convert_to_object); ++ { ++ // Convert receiver using ToObject. ++ // TODO(bmeurer): Inline the allocation here to avoid building the frame ++ // in the fast case? (fall back to AllocateInNewSpace?) ++ FrameScope scope(masm, StackFrame::INTERNAL); ++ __ SmiTag(a0); ++ __ Push(a0, a1); ++ __ mov(a0, a3); ++ __ Push(cp); ++ __ Call(BUILTIN_CODE(masm->isolate(), ToObject), ++ RelocInfo::CODE_TARGET); ++ __ Pop(cp); ++ __ mov(a3, a0); ++ __ Pop(a0, a1); ++ __ SmiUntag(a0); ++ } ++ __ Ld_d(a2, FieldMemOperand(a1, JSFunction::kSharedFunctionInfoOffset)); ++ __ bind(&convert_receiver); ++ } ++ __ Alsl_d(kScratchReg, a0, sp, kPointerSizeLog2, t7); ++ __ St_d(a3, MemOperand(kScratchReg, 0)); ++ } ++ __ bind(&done_convert); ++ ++ // ----------- S t a t e ------------- ++ // -- a0 : the number of arguments (not including the receiver) ++ // -- a1 : the function to call (checked to be a JSFunction) ++ // -- a2 : the shared function info. ++ // -- cp : the function context. ++ // ----------------------------------- ++ ++ __ Ld_hu( ++ a2, FieldMemOperand(a2, SharedFunctionInfo::kFormalParameterCountOffset)); ++ __ InvokeFunctionCode(a1, no_reg, a2, a0, JUMP_FUNCTION); ++ ++ // The function is a "classConstructor", need to raise an exception. ++ __ bind(&class_constructor); ++ { ++ FrameScope frame(masm, StackFrame::INTERNAL); ++ __ Push(a1); ++ __ CallRuntime(Runtime::kThrowConstructorNonCallableError); ++ } ++} ++ ++// static ++void Builtins::Generate_CallBoundFunctionImpl(MacroAssembler* masm) { ++ // ----------- S t a t e ------------- ++ // -- a0 : the number of arguments (not including the receiver) ++ // -- a1 : the function to call (checked to be a JSBoundFunction) ++ // ----------------------------------- ++ __ AssertBoundFunction(a1); ++ ++ // Patch the receiver to [[BoundThis]]. ++ { ++ __ Ld_d(kScratchReg, ++ FieldMemOperand(a1, JSBoundFunction::kBoundThisOffset)); ++ __ Alsl_d(a4, a0, sp, kPointerSizeLog2, t7); ++ __ St_d(kScratchReg, MemOperand(a4, 0)); ++ } ++ ++ // Load [[BoundArguments]] into a2 and length of that into a4. ++ __ Ld_d(a2, FieldMemOperand(a1, JSBoundFunction::kBoundArgumentsOffset)); ++ __ SmiUntag(a4, FieldMemOperand(a2, FixedArray::kLengthOffset)); ++ ++ // ----------- S t a t e ------------- ++ // -- a0 : the number of arguments (not including the receiver) ++ // -- a1 : the function to call (checked to be a JSBoundFunction) ++ // -- a2 : the [[BoundArguments]] (implemented as FixedArray) ++ // -- a4 : the number of [[BoundArguments]] ++ // ----------------------------------- ++ ++ // Reserve stack space for the [[BoundArguments]]. ++ { ++ Label done; ++ __ slli_d(a5, a4, kPointerSizeLog2); ++ __ Sub_d(sp, sp, Operand(a5)); ++ // Check the stack for overflow. We are not trying to catch interruptions ++ // (i.e. debug break and preemption) here, so check the "real stack limit". ++ LoadStackLimit(masm, kScratchReg, StackLimitKind::kRealStackLimit); ++ __ Branch(&done, hs, sp, Operand(kScratchReg)); ++ // Restore the stack pointer. ++ __ Add_d(sp, sp, Operand(a5)); ++ { ++ FrameScope scope(masm, StackFrame::MANUAL); ++ __ EnterFrame(StackFrame::INTERNAL); ++ __ CallRuntime(Runtime::kThrowStackOverflow); ++ } ++ __ bind(&done); ++ } ++ ++ // Relocate arguments down the stack. ++ { ++ Label loop, done_loop; ++ __ mov(a5, zero_reg); ++ __ bind(&loop); ++ __ Branch(&done_loop, gt, a5, Operand(a0)); ++ __ Alsl_d(a6, a4, sp, kPointerSizeLog2, t7); ++ __ Ld_d(kScratchReg, MemOperand(a6, 0)); ++ __ Alsl_d(a6, a5, sp, kPointerSizeLog2, t7); ++ __ St_d(kScratchReg, MemOperand(a6, 0)); ++ __ Add_d(a4, a4, Operand(1)); ++ __ Add_d(a5, a5, Operand(1)); ++ __ Branch(&loop); ++ __ bind(&done_loop); ++ } ++ ++ // Copy [[BoundArguments]] to the stack (below the arguments). ++ { ++ Label loop, done_loop; ++ __ SmiUntag(a4, FieldMemOperand(a2, FixedArray::kLengthOffset)); ++ __ Add_d(a2, a2, Operand(FixedArray::kHeaderSize - kHeapObjectTag)); ++ __ bind(&loop); ++ __ Sub_d(a4, a4, Operand(1)); ++ __ Branch(&done_loop, lt, a4, Operand(zero_reg)); ++ __ Alsl_d(a5, a4, a2, kPointerSizeLog2, t7); ++ __ Ld_d(kScratchReg, MemOperand(a5, 0)); ++ __ Alsl_d(a5, a0, sp, kPointerSizeLog2, t7); ++ __ St_d(kScratchReg, MemOperand(a5, 0)); ++ __ Add_d(a0, a0, Operand(1)); ++ __ Branch(&loop); ++ __ bind(&done_loop); ++ } ++ ++ // Call the [[BoundTargetFunction]] via the Call builtin. ++ __ Ld_d(a1, FieldMemOperand(a1, JSBoundFunction::kBoundTargetFunctionOffset)); ++ __ Jump(BUILTIN_CODE(masm->isolate(), Call_ReceiverIsAny), ++ RelocInfo::CODE_TARGET); ++} ++ ++// static ++void Builtins::Generate_Call(MacroAssembler* masm, ConvertReceiverMode mode) { ++ // ----------- S t a t e ------------- ++ // -- a0 : the number of arguments (not including the receiver) ++ // -- a1 : the target to call (can be any Object). ++ // ----------------------------------- ++ ++ Label non_callable, non_smi; ++ __ JumpIfSmi(a1, &non_callable); ++ __ bind(&non_smi); ++ __ GetObjectType(a1, t1, t2); ++ __ Jump(masm->isolate()->builtins()->CallFunction(mode), ++ RelocInfo::CODE_TARGET, eq, t2, Operand(JS_FUNCTION_TYPE)); ++ __ Jump(BUILTIN_CODE(masm->isolate(), CallBoundFunction), ++ RelocInfo::CODE_TARGET, eq, t2, Operand(JS_BOUND_FUNCTION_TYPE)); ++ ++ // Check if target has a [[Call]] internal method. ++ __ Ld_bu(t1, FieldMemOperand(t1, Map::kBitFieldOffset)); ++ __ And(t1, t1, Operand(Map::Bits1::IsCallableBit::kMask)); ++ __ Branch(&non_callable, eq, t1, Operand(zero_reg)); ++ ++ __ Jump(BUILTIN_CODE(masm->isolate(), CallProxy), RelocInfo::CODE_TARGET, eq, ++ t2, Operand(JS_PROXY_TYPE)); ++ ++ // 2. Call to something else, which might have a [[Call]] internal method (if ++ // not we raise an exception). ++ // Overwrite the original receiver with the (original) target. ++ __ Alsl_d(kScratchReg, a0, sp, kPointerSizeLog2, t7); ++ __ St_d(a1, MemOperand(kScratchReg, 0)); ++ // Let the "call_as_function_delegate" take care of the rest. ++ __ LoadNativeContextSlot(Context::CALL_AS_FUNCTION_DELEGATE_INDEX, a1); ++ __ Jump(masm->isolate()->builtins()->CallFunction( ++ ConvertReceiverMode::kNotNullOrUndefined), ++ RelocInfo::CODE_TARGET); ++ ++ // 3. Call to something that is not callable. ++ __ bind(&non_callable); ++ { ++ FrameScope scope(masm, StackFrame::INTERNAL); ++ __ Push(a1); ++ __ CallRuntime(Runtime::kThrowCalledNonCallable); ++ } ++} ++ ++void Builtins::Generate_ConstructFunction(MacroAssembler* masm) { ++ // ----------- S t a t e ------------- ++ // -- a0 : the number of arguments (not including the receiver) ++ // -- a1 : the constructor to call (checked to be a JSFunction) ++ // -- a3 : the new target (checked to be a constructor) ++ // ----------------------------------- ++ __ AssertConstructor(a1); ++ __ AssertFunction(a1); ++ ++ // Calling convention for function specific ConstructStubs require ++ // a2 to contain either an AllocationSite or undefined. ++ __ LoadRoot(a2, RootIndex::kUndefinedValue); ++ ++ Label call_generic_stub; ++ ++ // Jump to JSBuiltinsConstructStub or JSConstructStubGeneric. ++ __ Ld_d(a4, FieldMemOperand(a1, JSFunction::kSharedFunctionInfoOffset)); ++ __ Ld_wu(a4, FieldMemOperand(a4, SharedFunctionInfo::kFlagsOffset)); ++ __ And(a4, a4, Operand(SharedFunctionInfo::ConstructAsBuiltinBit::kMask)); ++ __ Branch(&call_generic_stub, eq, a4, Operand(zero_reg)); ++ ++ __ Jump(BUILTIN_CODE(masm->isolate(), JSBuiltinsConstructStub), ++ RelocInfo::CODE_TARGET); ++ ++ __ bind(&call_generic_stub); ++ __ Jump(BUILTIN_CODE(masm->isolate(), JSConstructStubGeneric), ++ RelocInfo::CODE_TARGET); ++} ++ ++// static ++void Builtins::Generate_ConstructBoundFunction(MacroAssembler* masm) { ++ // ----------- S t a t e ------------- ++ // -- a0 : the number of arguments (not including the receiver) ++ // -- a1 : the function to call (checked to be a JSBoundFunction) ++ // -- a3 : the new target (checked to be a constructor) ++ // ----------------------------------- ++ __ AssertConstructor(a1); ++ __ AssertBoundFunction(a1); ++ ++ // Load [[BoundArguments]] into a2 and length of that into a4. ++ __ Ld_d(a2, FieldMemOperand(a1, JSBoundFunction::kBoundArgumentsOffset)); ++ __ SmiUntag(a4, FieldMemOperand(a2, FixedArray::kLengthOffset)); ++ ++ // ----------- S t a t e ------------- ++ // -- a0 : the number of arguments (not including the receiver) ++ // -- a1 : the function to call (checked to be a JSBoundFunction) ++ // -- a2 : the [[BoundArguments]] (implemented as FixedArray) ++ // -- a3 : the new target (checked to be a constructor) ++ // -- a4 : the number of [[BoundArguments]] ++ // ----------------------------------- ++ ++ // Reserve stack space for the [[BoundArguments]]. ++ { ++ Label done; ++ __ slli_d(a5, a4, kPointerSizeLog2); ++ __ Sub_d(sp, sp, Operand(a5)); ++ // Check the stack for overflow. We are not trying to catch interruptions ++ // (i.e. debug break and preemption) here, so check the "real stack limit". ++ LoadStackLimit(masm, kScratchReg, StackLimitKind::kRealStackLimit); ++ __ Branch(&done, hs, sp, Operand(kScratchReg)); ++ // Restore the stack pointer. ++ __ Add_d(sp, sp, Operand(a5)); ++ { ++ FrameScope scope(masm, StackFrame::MANUAL); ++ __ EnterFrame(StackFrame::INTERNAL); ++ __ CallRuntime(Runtime::kThrowStackOverflow); ++ } ++ __ bind(&done); ++ } ++ ++ // Relocate arguments down the stack. ++ { ++ Label loop, done_loop; ++ __ mov(a5, zero_reg); ++ __ bind(&loop); ++ __ Branch(&done_loop, ge, a5, Operand(a0)); ++ __ Alsl_d(a6, a4, sp, kPointerSizeLog2, t7); ++ __ Ld_d(kScratchReg, MemOperand(a6, 0)); ++ __ Alsl_d(a6, a5, sp, kPointerSizeLog2, t7); ++ __ St_d(kScratchReg, MemOperand(a6, 0)); ++ __ Add_d(a4, a4, Operand(1)); ++ __ Add_d(a5, a5, Operand(1)); ++ __ Branch(&loop); ++ __ bind(&done_loop); ++ } ++ ++ // Copy [[BoundArguments]] to the stack (below the arguments). ++ { ++ Label loop, done_loop; ++ __ SmiUntag(a4, FieldMemOperand(a2, FixedArray::kLengthOffset)); ++ __ Add_d(a2, a2, Operand(FixedArray::kHeaderSize - kHeapObjectTag)); ++ __ bind(&loop); ++ __ Sub_d(a4, a4, Operand(1)); ++ __ Branch(&done_loop, lt, a4, Operand(zero_reg)); ++ __ Alsl_d(a5, a4, a2, kPointerSizeLog2, t7); ++ __ Ld_d(kScratchReg, MemOperand(a5, 0)); ++ __ Alsl_d(a5, a0, sp, kPointerSizeLog2, t7); ++ __ St_d(kScratchReg, MemOperand(a5, 0)); ++ __ Add_d(a0, a0, Operand(1)); ++ __ Branch(&loop); ++ __ bind(&done_loop); ++ } ++ ++ // Patch new.target to [[BoundTargetFunction]] if new.target equals target. ++ { ++ Label skip_load; ++ __ Branch(&skip_load, ne, a1, Operand(a3)); ++ __ Ld_d(a3, ++ FieldMemOperand(a1, JSBoundFunction::kBoundTargetFunctionOffset)); ++ __ bind(&skip_load); ++ } ++ ++ // Construct the [[BoundTargetFunction]] via the Construct builtin. ++ __ Ld_d(a1, FieldMemOperand(a1, JSBoundFunction::kBoundTargetFunctionOffset)); ++ __ Jump(BUILTIN_CODE(masm->isolate(), Construct), RelocInfo::CODE_TARGET); ++} ++ ++// static ++void Builtins::Generate_Construct(MacroAssembler* masm) { ++ // ----------- S t a t e ------------- ++ // -- a0 : the number of arguments (not including the receiver) ++ // -- a1 : the constructor to call (can be any Object) ++ // -- a3 : the new target (either the same as the constructor or ++ // the JSFunction on which new was invoked initially) ++ // ----------------------------------- ++ ++ // Check if target is a Smi. ++ Label non_constructor, non_proxy; ++ __ JumpIfSmi(a1, &non_constructor); ++ ++ // Check if target has a [[Construct]] internal method. ++ __ Ld_d(t1, FieldMemOperand(a1, HeapObject::kMapOffset)); ++ __ Ld_bu(t3, FieldMemOperand(t1, Map::kBitFieldOffset)); ++ __ And(t3, t3, Operand(Map::Bits1::IsConstructorBit::kMask)); ++ __ Branch(&non_constructor, eq, t3, Operand(zero_reg)); ++ ++ // Dispatch based on instance type. ++ __ Ld_hu(t2, FieldMemOperand(t1, Map::kInstanceTypeOffset)); ++ __ Jump(BUILTIN_CODE(masm->isolate(), ConstructFunction), ++ RelocInfo::CODE_TARGET, eq, t2, Operand(JS_FUNCTION_TYPE)); ++ ++ // Only dispatch to bound functions after checking whether they are ++ // constructors. ++ __ Jump(BUILTIN_CODE(masm->isolate(), ConstructBoundFunction), ++ RelocInfo::CODE_TARGET, eq, t2, Operand(JS_BOUND_FUNCTION_TYPE)); ++ ++ // Only dispatch to proxies after checking whether they are constructors. ++ __ Branch(&non_proxy, ne, t2, Operand(JS_PROXY_TYPE)); ++ __ Jump(BUILTIN_CODE(masm->isolate(), ConstructProxy), ++ RelocInfo::CODE_TARGET); ++ ++ // Called Construct on an exotic Object with a [[Construct]] internal method. ++ __ bind(&non_proxy); ++ { ++ // Overwrite the original receiver with the (original) target. ++ __ Alsl_d(kScratchReg, a0, sp, kPointerSizeLog2, t7); ++ __ St_d(a1, MemOperand(kScratchReg, 0)); ++ // Let the "call_as_constructor_delegate" take care of the rest. ++ __ LoadNativeContextSlot(Context::CALL_AS_CONSTRUCTOR_DELEGATE_INDEX, a1); ++ __ Jump(masm->isolate()->builtins()->CallFunction(), ++ RelocInfo::CODE_TARGET); ++ } ++ ++ // Called Construct on an Object that doesn't have a [[Construct]] internal ++ // method. ++ __ bind(&non_constructor); ++ __ Jump(BUILTIN_CODE(masm->isolate(), ConstructedNonConstructable), ++ RelocInfo::CODE_TARGET); ++} ++ ++void Builtins::Generate_ArgumentsAdaptorTrampoline(MacroAssembler* masm) { ++ // State setup as expected by MacroAssembler::InvokePrologue. ++ // ----------- S t a t e ------------- ++ // -- a0: actual arguments count ++ // -- a1: function (passed through to callee) ++ // -- a2: expected arguments count ++ // -- a3: new target (passed through to callee) ++ // ----------------------------------- ++ ++ Label invoke, dont_adapt_arguments, stack_overflow; ++ ++ Label enough, too_few; ++ __ Branch(&dont_adapt_arguments, eq, a2, ++ Operand(kDontAdaptArgumentsSentinel)); ++ // We use Uless as the number of argument should always be greater than 0. ++ __ Branch(&too_few, Uless, a0, Operand(a2)); ++ ++ { // Enough parameters: actual >= expected. ++ // a0: actual number of arguments as a smi ++ // a1: function ++ // a2: expected number of arguments ++ // a3: new target (passed through to callee) ++ __ bind(&enough); ++ EnterArgumentsAdaptorFrame(masm); ++ Generate_StackOverflowCheck(masm, a2, a5, kScratchReg, &stack_overflow); ++ ++ // Calculate copy start address into a0 and copy end address into a4. ++ __ SmiScale(a0, a0, kPointerSizeLog2); ++ __ Add_d(a0, fp, a0); ++ // Adjust for return address and receiver. ++ __ Add_d(a0, a0, Operand(2 * kPointerSize)); ++ // Compute copy end address. ++ __ slli_d(a4, a2, kPointerSizeLog2); ++ __ sub_d(a4, a0, a4); ++ ++ // Copy the arguments (including the receiver) to the new stack frame. ++ // a0: copy start address ++ // a1: function ++ // a2: expected number of arguments ++ // a3: new target (passed through to callee) ++ // a4: copy end address ++ ++ Label copy; ++ __ bind(©); ++ __ Ld_d(a5, MemOperand(a0, 0)); ++ __ push(a5); ++ __ addi_d(a0, a0, -kPointerSize); ++ __ Branch(©, ge, a0, Operand(a4)); ++ ++ __ jmp(&invoke); ++ } ++ ++ { // Too few parameters: Actual < expected. ++ __ bind(&too_few); ++ EnterArgumentsAdaptorFrame(masm); ++ Generate_StackOverflowCheck(masm, a2, a5, kScratchReg, &stack_overflow); ++ ++ // Calculate copy start address into a0 and copy end address into a7. ++ // a0: actual number of arguments as a smi ++ // a1: function ++ // a2: expected number of arguments ++ // a3: new target (passed through to callee) ++ __ SmiScale(a0, a0, kPointerSizeLog2); ++ __ Add_d(a0, fp, a0); ++ // Adjust for return address and receiver. ++ __ Add_d(a0, a0, Operand(2 * kPointerSize)); ++ // Compute copy end address. Also adjust for return address. ++ __ Add_d(a7, fp, kPointerSize); ++ ++ // Copy the arguments (including the receiver) to the new stack frame. ++ // a0: copy start address ++ // a1: function ++ // a2: expected number of arguments ++ // a3: new target (passed through to callee) ++ // a7: copy end address ++ Label copy; ++ __ bind(©); ++ __ Ld_d(a4, ++ MemOperand(a0, 0)); // Adjusted above for return addr and receiver. ++ __ Sub_d(sp, sp, kPointerSize); ++ __ Sub_d(a0, a0, kPointerSize); ++ __ St_d(a4, MemOperand(sp, 0)); ++ __ Branch(©, ne, a0, Operand(a7)); ++ ++ // Fill the remaining expected arguments with undefined. ++ // a1: function ++ // a2: expected number of arguments ++ // a3: new target (passed through to callee) ++ __ LoadRoot(a5, RootIndex::kUndefinedValue); ++ __ slli_d(a6, a2, kPointerSizeLog2); ++ __ Sub_d(a4, fp, Operand(a6)); ++ // Adjust for frame. ++ __ Sub_d(a4, a4, ++ Operand(ArgumentsAdaptorFrameConstants::kFixedFrameSizeFromFp + ++ kPointerSize)); ++ ++ Label fill; ++ __ bind(&fill); ++ __ Sub_d(sp, sp, kPointerSize); ++ __ St_d(a5, MemOperand(sp, 0)); ++ __ Branch(&fill, ne, sp, Operand(a4)); ++ } ++ ++ // Call the entry point. ++ __ bind(&invoke); ++ __ mov(a0, a2); ++ // a0 : expected number of arguments ++ // a1 : function (passed through to callee) ++ // a3: new target (passed through to callee) ++ static_assert(kJavaScriptCallCodeStartRegister == a2, "ABI mismatch"); ++ __ Ld_d(a2, FieldMemOperand(a1, JSFunction::kCodeOffset)); ++ __ Add_d(a2, a2, Operand(Code::kHeaderSize - kHeapObjectTag)); ++ __ Call(a2); ++ ++ // Store offset of return address for deoptimizer. ++ masm->isolate()->heap()->SetArgumentsAdaptorDeoptPCOffset(masm->pc_offset()); ++ ++ // Exit frame and return. ++ LeaveArgumentsAdaptorFrame(masm); ++ __ Ret(); ++ ++ // ------------------------------------------- ++ // Don't adapt arguments. ++ // ------------------------------------------- ++ __ bind(&dont_adapt_arguments); ++ static_assert(kJavaScriptCallCodeStartRegister == a2, "ABI mismatch"); ++ __ Ld_d(a2, FieldMemOperand(a1, JSFunction::kCodeOffset)); ++ __ Add_d(a2, a2, Operand(Code::kHeaderSize - kHeapObjectTag)); ++ __ Jump(a2); ++ ++ __ bind(&stack_overflow); ++ { ++ FrameScope frame(masm, StackFrame::MANUAL); ++ __ CallRuntime(Runtime::kThrowStackOverflow); ++ __ break_(0xCC); ++ } ++} ++ ++void Builtins::Generate_WasmCompileLazy(MacroAssembler* masm) { ++ // The function index was put in t0 by the jump table trampoline. ++ // Convert to Smi for the runtime call ++ __ SmiTag(kWasmCompileLazyFuncIndexRegister); ++ { ++ HardAbortScope hard_abort(masm); // Avoid calls to Abort. ++ FrameScope scope(masm, StackFrame::WASM_COMPILE_LAZY); ++ ++ // Save all parameter registers (see wasm-linkage.cc). They might be ++ // overwritten in the runtime call below. We don't have any callee-saved ++ // registers in wasm, so no need to store anything else. ++ constexpr RegList gp_regs = Register::ListOf(a0, a2, a3, a4, a5, a6, a7); ++ constexpr RegList fp_regs = ++ DoubleRegister::ListOf(f2, f4, f6, f8, f10, f12, f14); ++ __ MultiPush(gp_regs); ++ __ MultiPushFPU(fp_regs); ++ ++ // Pass instance and function index as an explicit arguments to the runtime ++ // function. ++ __ Push(kWasmInstanceRegister, kWasmCompileLazyFuncIndexRegister); ++ // Initialize the JavaScript context with 0. CEntry will use it to ++ // set the current context on the isolate. ++ __ Move(kContextRegister, Smi::zero()); ++ __ CallRuntime(Runtime::kWasmCompileLazy, 2); ++ __ mov(t8, a0); ++ ++ // Restore registers. ++ __ MultiPopFPU(fp_regs); ++ __ MultiPop(gp_regs); ++ } ++ // Finally, jump to the entrypoint. ++ __ Jump(t8); ++} ++ ++void Builtins::Generate_WasmDebugBreak(MacroAssembler* masm) { ++ HardAbortScope hard_abort(masm); // Avoid calls to Abort. ++ { ++ FrameScope scope(masm, StackFrame::WASM_DEBUG_BREAK); ++ ++ // Save all parameter registers. They might hold live values, we restore ++ // them after the runtime call. ++ __ MultiPush(WasmDebugBreakFrameConstants::kPushedGpRegs); ++ __ MultiPushFPU(WasmDebugBreakFrameConstants::kPushedFpRegs); ++ ++ // Initialize the JavaScript context with 0. CEntry will use it to ++ // set the current context on the isolate. ++ __ Move(cp, Smi::zero()); ++ __ CallRuntime(Runtime::kWasmDebugBreak, 0); ++ ++ // Restore registers. ++ __ MultiPopFPU(WasmDebugBreakFrameConstants::kPushedFpRegs); ++ __ MultiPop(WasmDebugBreakFrameConstants::kPushedGpRegs); ++ } ++ __ Ret(); ++} ++ ++void Builtins::Generate_CEntry(MacroAssembler* masm, int result_size, ++ SaveFPRegsMode save_doubles, ArgvMode argv_mode, ++ bool builtin_exit_frame) { ++ // Called from JavaScript; parameters are on stack as if calling JS function ++ // a0: number of arguments including receiver ++ // a1: pointer to builtin function ++ // fp: frame pointer (restored after C call) ++ // sp: stack pointer (restored as callee's sp after C call) ++ // cp: current context (C callee-saved) ++ // ++ // If argv_mode == kArgvInRegister: ++ // a2: pointer to the first argument ++ ++ if (argv_mode == kArgvInRegister) { ++ // Move argv into the correct register. ++ __ mov(s1, a2); ++ } else { ++ // Compute the argv pointer in a callee-saved register. ++ __ Alsl_d(s1, a0, sp, kPointerSizeLog2, t7); ++ __ Sub_d(s1, s1, kPointerSize); ++ } ++ ++ // Enter the exit frame that transitions from JavaScript to C++. ++ FrameScope scope(masm, StackFrame::MANUAL); ++ __ EnterExitFrame( ++ save_doubles == kSaveFPRegs, 0, ++ builtin_exit_frame ? StackFrame::BUILTIN_EXIT : StackFrame::EXIT); ++ ++ // s0: number of arguments including receiver (C callee-saved) ++ // s1: pointer to first argument (C callee-saved) ++ // s2: pointer to builtin function (C callee-saved) ++ ++ // Prepare arguments for C routine. ++ // a0 = argc ++ __ mov(s0, a0); ++ __ mov(s2, a1); ++ ++ // We are calling compiled C/C++ code. a0 and a1 hold our two arguments. We ++ // also need to reserve the 4 argument slots on the stack. ++ ++ __ AssertStackIsAligned(); ++ ++ // a0 = argc, a1 = argv, a2 = isolate ++ __ li(a2, ExternalReference::isolate_address(masm->isolate())); ++ __ mov(a1, s1); ++ ++ __ StoreReturnAddressAndCall(s2); ++ ++ // Result returned in a0 or a1:a0 - do not destroy these registers! ++ ++ // Check result for exception sentinel. ++ Label exception_returned; ++ __ LoadRoot(a4, RootIndex::kException); ++ __ Branch(&exception_returned, eq, a4, Operand(a0)); ++ ++ // Check that there is no pending exception, otherwise we ++ // should have returned the exception sentinel. ++ if (FLAG_debug_code) { ++ Label okay; ++ ExternalReference pending_exception_address = ExternalReference::Create( ++ IsolateAddressId::kPendingExceptionAddress, masm->isolate()); ++ __ li(a2, pending_exception_address); ++ __ Ld_d(a2, MemOperand(a2, 0)); ++ __ LoadRoot(a4, RootIndex::kTheHoleValue); ++ // Cannot use check here as it attempts to generate call into runtime. ++ __ Branch(&okay, eq, a4, Operand(a2)); ++ __ stop(); ++ __ bind(&okay); ++ } ++ ++ // Exit C frame and return. ++ // a0:a1: result ++ // sp: stack pointer ++ // fp: frame pointer ++ Register argc = argv_mode == kArgvInRegister ++ // We don't want to pop arguments so set argc to no_reg. ++ ? no_reg ++ // s0: still holds argc (callee-saved). ++ : s0; ++ __ LeaveExitFrame(save_doubles == kSaveFPRegs, argc, EMIT_RETURN); ++ ++ // Handling of exception. ++ __ bind(&exception_returned); ++ ++ ExternalReference pending_handler_context_address = ExternalReference::Create( ++ IsolateAddressId::kPendingHandlerContextAddress, masm->isolate()); ++ ExternalReference pending_handler_entrypoint_address = ++ ExternalReference::Create( ++ IsolateAddressId::kPendingHandlerEntrypointAddress, masm->isolate()); ++ ExternalReference pending_handler_fp_address = ExternalReference::Create( ++ IsolateAddressId::kPendingHandlerFPAddress, masm->isolate()); ++ ExternalReference pending_handler_sp_address = ExternalReference::Create( ++ IsolateAddressId::kPendingHandlerSPAddress, masm->isolate()); ++ ++ // Ask the runtime for help to determine the handler. This will set a0 to ++ // contain the current pending exception, don't clobber it. ++ ExternalReference find_handler = ++ ExternalReference::Create(Runtime::kUnwindAndFindExceptionHandler); ++ { ++ FrameScope scope(masm, StackFrame::MANUAL); ++ __ PrepareCallCFunction(3, 0, a0); ++ __ mov(a0, zero_reg); ++ __ mov(a1, zero_reg); ++ __ li(a2, ExternalReference::isolate_address(masm->isolate())); ++ __ CallCFunction(find_handler, 3); ++ } ++ ++ // Retrieve the handler context, SP and FP. ++ __ li(cp, pending_handler_context_address); ++ __ Ld_d(cp, MemOperand(cp, 0)); ++ __ li(sp, pending_handler_sp_address); ++ __ Ld_d(sp, MemOperand(sp, 0)); ++ __ li(fp, pending_handler_fp_address); ++ __ Ld_d(fp, MemOperand(fp, 0)); ++ ++ // If the handler is a JS frame, restore the context to the frame. Note that ++ // the context will be set to (cp == 0) for non-JS frames. ++ Label zero; ++ __ Branch(&zero, eq, cp, Operand(zero_reg)); ++ __ St_d(cp, MemOperand(fp, StandardFrameConstants::kContextOffset)); ++ __ bind(&zero); ++ ++ // Reset the masking register. This is done independent of the underlying ++ // feature flag {FLAG_untrusted_code_mitigations} to make the snapshot work ++ // with both configurations. It is safe to always do this, because the ++ // underlying register is caller-saved and can be arbitrarily clobbered. ++ __ ResetSpeculationPoisonRegister(); ++ ++ // Compute the handler entry address and jump to it. ++ __ li(t7, pending_handler_entrypoint_address); ++ __ Ld_d(t7, MemOperand(t7, 0)); ++ __ Jump(t7); ++} ++ ++void Builtins::Generate_DoubleToI(MacroAssembler* masm) { ++ Label done; ++ Register result_reg = t0; ++ ++ Register scratch = GetRegisterThatIsNotOneOf(result_reg); ++ Register scratch2 = GetRegisterThatIsNotOneOf(result_reg, scratch); ++ Register scratch3 = GetRegisterThatIsNotOneOf(result_reg, scratch, scratch2); ++ DoubleRegister double_scratch = kScratchDoubleReg; ++ ++ // Account for saved regs. ++ const int kArgumentOffset = 4 * kPointerSize; ++ ++ __ Push(result_reg); ++ __ Push(scratch, scratch2, scratch3); ++ ++ // Load double input. ++ __ Fld_d(double_scratch, MemOperand(sp, kArgumentOffset)); ++ ++ // Clear cumulative exception flags and save the FCSR. ++ // __ movfcsr2gr(scratch2, FCSR); ++ // __ movgr2fcsr(FCSR, zero_reg); ++ ++ // Try a conversion to a signed integer. ++ __ ftintrz_w_d(double_scratch, double_scratch); ++ // Move the converted value into the result register. ++ __ movfr2gr_s(scratch3, double_scratch); ++ ++ // Retrieve and restore the FCSR. ++ __ movfcsr2gr(scratch); // __ cfc1(scratch, FCSR); ++ // __ ctc1(scratch2, FCSR); ++ ++ // Check for overflow and NaNs. ++ __ And( ++ scratch, scratch, ++ kFCSROverflowFlagMask | kFCSRUnderflowFlagMask | kFCSRInvalidOpFlagMask); ++ // If we had no exceptions then set result_reg and we are done. ++ Label error; ++ __ Branch(&error, ne, scratch, Operand(zero_reg)); ++ __ Move(result_reg, scratch3); ++ __ Branch(&done); ++ __ bind(&error); ++ ++ // Load the double value and perform a manual truncation. ++ Register input_high = scratch2; ++ Register input_low = scratch3; ++ ++ __ Ld_w(input_low, ++ MemOperand(sp, kArgumentOffset + Register::kMantissaOffset)); ++ __ Ld_w(input_high, ++ MemOperand(sp, kArgumentOffset + Register::kExponentOffset)); ++ ++ Label normal_exponent; ++ // Extract the biased exponent in result. ++ __ bstrpick_w(result_reg, input_high, ++ HeapNumber::kExponentShift + HeapNumber::kExponentBits - 1, ++ HeapNumber::kExponentShift); ++ ++ // Check for Infinity and NaNs, which should return 0. ++ __ Sub_w(scratch, result_reg, HeapNumber::kExponentMask); ++ __ Movz(result_reg, zero_reg, scratch); ++ __ Branch(&done, eq, scratch, Operand(zero_reg)); ++ ++ // Express exponent as delta to (number of mantissa bits + 31). ++ __ Sub_w(result_reg, result_reg, ++ Operand(HeapNumber::kExponentBias + HeapNumber::kMantissaBits + 31)); ++ ++ // If the delta is strictly positive, all bits would be shifted away, ++ // which means that we can return 0. ++ __ Branch(&normal_exponent, le, result_reg, Operand(zero_reg)); ++ __ mov(result_reg, zero_reg); ++ __ Branch(&done); ++ ++ __ bind(&normal_exponent); ++ const int kShiftBase = HeapNumber::kNonMantissaBitsInTopWord - 1; ++ // Calculate shift. ++ __ Add_w(scratch, result_reg, ++ Operand(kShiftBase + HeapNumber::kMantissaBits)); ++ ++ // Save the sign. ++ Register sign = result_reg; ++ result_reg = no_reg; ++ __ And(sign, input_high, Operand(HeapNumber::kSignMask)); ++ ++ // On ARM shifts > 31 bits are valid and will result in zero. On MIPS we need ++ // to check for this specific case. ++ Label high_shift_needed, high_shift_done; ++ __ Branch(&high_shift_needed, lt, scratch, Operand(32)); ++ __ mov(input_high, zero_reg); ++ __ Branch(&high_shift_done); ++ __ bind(&high_shift_needed); ++ ++ // Set the implicit 1 before the mantissa part in input_high. ++ __ Or(input_high, input_high, ++ Operand(1 << HeapNumber::kMantissaBitsInTopWord)); ++ // Shift the mantissa bits to the correct position. ++ // We don't need to clear non-mantissa bits as they will be shifted away. ++ // If they weren't, it would mean that the answer is in the 32bit range. ++ __ sll_w(input_high, input_high, scratch); ++ ++ __ bind(&high_shift_done); ++ ++ // Replace the shifted bits with bits from the lower mantissa word. ++ Label pos_shift, shift_done; ++ __ li(kScratchReg, 32); ++ __ sub_w(scratch, kScratchReg, scratch); ++ __ Branch(&pos_shift, ge, scratch, Operand(zero_reg)); ++ ++ // Negate scratch. ++ __ Sub_w(scratch, zero_reg, scratch); ++ __ sll_w(input_low, input_low, scratch); ++ __ Branch(&shift_done); ++ ++ __ bind(&pos_shift); ++ __ srl_w(input_low, input_low, scratch); ++ ++ __ bind(&shift_done); ++ __ Or(input_high, input_high, Operand(input_low)); ++ // Restore sign if necessary. ++ __ mov(scratch, sign); ++ result_reg = sign; ++ sign = no_reg; ++ __ Sub_w(result_reg, zero_reg, input_high); ++ __ Movz(result_reg, input_high, scratch); ++ ++ __ bind(&done); ++ ++ __ St_d(result_reg, MemOperand(sp, kArgumentOffset)); ++ __ Pop(scratch, scratch2, scratch3); ++ __ Pop(result_reg); ++ __ Ret(); ++} ++ ++namespace { ++ ++int AddressOffset(ExternalReference ref0, ExternalReference ref1) { ++ int64_t offset = (ref0.address() - ref1.address()); ++ DCHECK(static_cast(offset) == offset); ++ return static_cast(offset); ++} ++ ++// Calls an API function. Allocates HandleScope, extracts returned value ++// from handle and propagates exceptions. Restores context. stack_space ++// - space to be unwound on exit (includes the call JS arguments space and ++// the additional space allocated for the fast call). ++void CallApiFunctionAndReturn(MacroAssembler* masm, Register function_address, ++ ExternalReference thunk_ref, int stack_space, ++ MemOperand* stack_space_operand, ++ MemOperand return_value_operand) { ++ Isolate* isolate = masm->isolate(); ++ ExternalReference next_address = ++ ExternalReference::handle_scope_next_address(isolate); ++ const int kNextOffset = 0; ++ const int kLimitOffset = AddressOffset( ++ ExternalReference::handle_scope_limit_address(isolate), next_address); ++ const int kLevelOffset = AddressOffset( ++ ExternalReference::handle_scope_level_address(isolate), next_address); ++ ++ DCHECK(function_address == a1 || function_address == a2); ++ ++ Label profiler_enabled, end_profiler_check; ++ __ li(t7, ExternalReference::is_profiling_address(isolate)); ++ __ Ld_b(t7, MemOperand(t7, 0)); ++ __ Branch(&profiler_enabled, ne, t7, Operand(zero_reg)); ++ __ li(t7, ExternalReference::address_of_runtime_stats_flag()); ++ __ Ld_w(t7, MemOperand(t7, 0)); ++ __ Branch(&profiler_enabled, ne, t7, Operand(zero_reg)); ++ { ++ // Call the api function directly. ++ __ mov(t7, function_address); ++ __ Branch(&end_profiler_check); ++ } ++ ++ __ bind(&profiler_enabled); ++ { ++ // Additional parameter is the address of the actual callback. ++ __ li(t7, thunk_ref); ++ } ++ __ bind(&end_profiler_check); ++ ++ // Allocate HandleScope in callee-save registers. ++ __ li(s5, next_address); ++ __ Ld_d(s0, MemOperand(s5, kNextOffset)); ++ __ Ld_d(s1, MemOperand(s5, kLimitOffset)); ++ __ Ld_w(s2, MemOperand(s5, kLevelOffset)); ++ __ Add_w(s2, s2, Operand(1)); ++ __ St_w(s2, MemOperand(s5, kLevelOffset)); ++ ++ __ StoreReturnAddressAndCall(t7); ++ ++ Label promote_scheduled_exception; ++ Label delete_allocated_handles; ++ Label leave_exit_frame; ++ Label return_value_loaded; ++ ++ // Load value from ReturnValue. ++ __ Ld_d(a0, return_value_operand); ++ __ bind(&return_value_loaded); ++ ++ // No more valid handles (the result handle was the last one). Restore ++ // previous handle scope. ++ __ St_d(s0, MemOperand(s5, kNextOffset)); ++ if (__ emit_debug_code()) { ++ __ Ld_w(a1, MemOperand(s5, kLevelOffset)); ++ __ Check(eq, AbortReason::kUnexpectedLevelAfterReturnFromApiCall, a1, ++ Operand(s2)); ++ } ++ __ Sub_w(s2, s2, Operand(1)); ++ __ St_w(s2, MemOperand(s5, kLevelOffset)); ++ __ Ld_d(kScratchReg, MemOperand(s5, kLimitOffset)); ++ __ Branch(&delete_allocated_handles, ne, s1, Operand(kScratchReg)); ++ ++ // Leave the API exit frame. ++ __ bind(&leave_exit_frame); ++ ++ if (stack_space_operand == nullptr) { ++ DCHECK_NE(stack_space, 0); ++ __ li(s0, Operand(stack_space)); ++ } else { ++ DCHECK_EQ(stack_space, 0); ++ STATIC_ASSERT(kCArgSlotCount == 0); ++ __ Ld_d(s0, *stack_space_operand); ++ } ++ ++ static constexpr bool kDontSaveDoubles = false; ++ static constexpr bool kRegisterContainsSlotCount = false; ++ __ LeaveExitFrame(kDontSaveDoubles, s0, NO_EMIT_RETURN, ++ kRegisterContainsSlotCount); ++ ++ // Check if the function scheduled an exception. ++ __ LoadRoot(a4, RootIndex::kTheHoleValue); ++ __ li(kScratchReg, ExternalReference::scheduled_exception_address(isolate)); ++ __ Ld_d(a5, MemOperand(kScratchReg, 0)); ++ __ Branch(&promote_scheduled_exception, ne, a4, Operand(a5)); ++ ++ __ Ret(); ++ ++ // Re-throw by promoting a scheduled exception. ++ __ bind(&promote_scheduled_exception); ++ __ TailCallRuntime(Runtime::kPromoteScheduledException); ++ ++ // HandleScope limit has changed. Delete allocated extensions. ++ __ bind(&delete_allocated_handles); ++ __ St_d(s1, MemOperand(s5, kLimitOffset)); ++ __ mov(s0, a0); ++ __ PrepareCallCFunction(1, s1); ++ __ li(a0, ExternalReference::isolate_address(isolate)); ++ __ CallCFunction(ExternalReference::delete_handle_scope_extensions(), 1); ++ __ mov(a0, s0); ++ __ jmp(&leave_exit_frame); ++} ++ ++} // namespace ++ ++void Builtins::Generate_CallApiCallback(MacroAssembler* masm) { ++ // ----------- S t a t e ------------- ++ // -- cp : context ++ // -- a1 : api function address ++ // -- a2 : arguments count (not including the receiver) ++ // -- a3 : call data ++ // -- a0 : holder ++ // -- ++ // -- sp[0] : last argument ++ // -- ... ++ // -- sp[(argc - 1) * 8] : first argument ++ // -- sp[(argc + 0) * 8] : receiver ++ // ----------------------------------- ++ ++ Register api_function_address = a1; ++ Register argc = a2; ++ Register call_data = a3; ++ Register holder = a0; ++ Register scratch = t0; ++ Register base = t1; // For addressing MemOperands on the stack. ++ ++ DCHECK(!AreAliased(api_function_address, argc, call_data, holder, scratch, ++ base)); ++ ++ using FCA = FunctionCallbackArguments; ++ ++ STATIC_ASSERT(FCA::kArgsLength == 6); ++ STATIC_ASSERT(FCA::kNewTargetIndex == 5); ++ STATIC_ASSERT(FCA::kDataIndex == 4); ++ STATIC_ASSERT(FCA::kReturnValueOffset == 3); ++ STATIC_ASSERT(FCA::kReturnValueDefaultValueIndex == 2); ++ STATIC_ASSERT(FCA::kIsolateIndex == 1); ++ STATIC_ASSERT(FCA::kHolderIndex == 0); ++ ++ // Set up FunctionCallbackInfo's implicit_args on the stack as follows: ++ // ++ // Target state: ++ // sp[0 * kPointerSize]: kHolder ++ // sp[1 * kPointerSize]: kIsolate ++ // sp[2 * kPointerSize]: undefined (kReturnValueDefaultValue) ++ // sp[3 * kPointerSize]: undefined (kReturnValue) ++ // sp[4 * kPointerSize]: kData ++ // sp[5 * kPointerSize]: undefined (kNewTarget) ++ ++ // Set up the base register for addressing through MemOperands. It will point ++ // at the receiver (located at sp + argc * kPointerSize). ++ __ Alsl_d(base, argc, sp, kPointerSizeLog2, t7); ++ ++ // Reserve space on the stack. ++ __ Sub_d(sp, sp, Operand(FCA::kArgsLength * kPointerSize)); ++ ++ // kHolder. ++ __ St_d(holder, MemOperand(sp, 0 * kPointerSize)); ++ ++ // kIsolate. ++ __ li(scratch, ExternalReference::isolate_address(masm->isolate())); ++ __ St_d(scratch, MemOperand(sp, 1 * kPointerSize)); ++ ++ // kReturnValueDefaultValue and kReturnValue. ++ __ LoadRoot(scratch, RootIndex::kUndefinedValue); ++ __ St_d(scratch, MemOperand(sp, 2 * kPointerSize)); ++ __ St_d(scratch, MemOperand(sp, 3 * kPointerSize)); ++ ++ // kData. ++ __ St_d(call_data, MemOperand(sp, 4 * kPointerSize)); ++ ++ // kNewTarget. ++ __ St_d(scratch, MemOperand(sp, 5 * kPointerSize)); ++ ++ // Keep a pointer to kHolder (= implicit_args) in a scratch register. ++ // We use it below to set up the FunctionCallbackInfo object. ++ __ mov(scratch, sp); ++ ++ // Allocate the v8::Arguments structure in the arguments' space since ++ // it's not controlled by GC. ++ static constexpr int kApiStackSpace = 4; ++ static constexpr bool kDontSaveDoubles = false; ++ FrameScope frame_scope(masm, StackFrame::MANUAL); ++ __ EnterExitFrame(kDontSaveDoubles, kApiStackSpace); ++ ++ // EnterExitFrame may align the sp. ++ ++ // FunctionCallbackInfo::implicit_args_ (points at kHolder as set up above). ++ // Arguments are after the return address (pushed by EnterExitFrame()). ++ __ St_d(scratch, MemOperand(sp, 1 * kPointerSize)); ++ ++ // FunctionCallbackInfo::values_ (points at the first varargs argument passed ++ // on the stack). ++ __ Sub_d(scratch, base, Operand(1 * kPointerSize)); ++ __ St_d(scratch, MemOperand(sp, 2 * kPointerSize)); ++ ++ // FunctionCallbackInfo::length_. ++ // Stored as int field, 32-bit integers within struct on stack always left ++ // justified by n64 ABI. ++ __ St_w(argc, MemOperand(sp, 3 * kPointerSize)); ++ ++ // We also store the number of bytes to drop from the stack after returning ++ // from the API function here. ++ // Note: Unlike on other architectures, this stores the number of slots to ++ // drop, not the number of bytes. ++ __ Add_d(scratch, argc, Operand(FCA::kArgsLength + 1 /* receiver */)); ++ __ St_d(scratch, MemOperand(sp, 4 * kPointerSize)); ++ ++ // v8::InvocationCallback's argument. ++ DCHECK(!AreAliased(api_function_address, scratch, a0)); ++ __ Add_d(a0, sp, Operand(1 * kPointerSize)); ++ ++ ExternalReference thunk_ref = ExternalReference::invoke_function_callback(); ++ ++ // There are two stack slots above the arguments we constructed on the stack. ++ // TODO(jgruber): Document what these arguments are. ++ static constexpr int kStackSlotsAboveFCA = 2; ++ MemOperand return_value_operand( ++ fp, (kStackSlotsAboveFCA + FCA::kReturnValueOffset) * kPointerSize); ++ ++ static constexpr int kUseStackSpaceOperand = 0; ++ MemOperand stack_space_operand(sp, 4 * kPointerSize); ++ ++ AllowExternalCallThatCantCauseGC scope(masm); ++ CallApiFunctionAndReturn(masm, api_function_address, thunk_ref, ++ kUseStackSpaceOperand, &stack_space_operand, ++ return_value_operand); ++} ++ ++void Builtins::Generate_CallApiGetter(MacroAssembler* masm) { ++ // Build v8::PropertyCallbackInfo::args_ array on the stack and push property ++ // name below the exit frame to make GC aware of them. ++ STATIC_ASSERT(PropertyCallbackArguments::kShouldThrowOnErrorIndex == 0); ++ STATIC_ASSERT(PropertyCallbackArguments::kHolderIndex == 1); ++ STATIC_ASSERT(PropertyCallbackArguments::kIsolateIndex == 2); ++ STATIC_ASSERT(PropertyCallbackArguments::kReturnValueDefaultValueIndex == 3); ++ STATIC_ASSERT(PropertyCallbackArguments::kReturnValueOffset == 4); ++ STATIC_ASSERT(PropertyCallbackArguments::kDataIndex == 5); ++ STATIC_ASSERT(PropertyCallbackArguments::kThisIndex == 6); ++ STATIC_ASSERT(PropertyCallbackArguments::kArgsLength == 7); ++ ++ Register receiver = ApiGetterDescriptor::ReceiverRegister(); ++ Register holder = ApiGetterDescriptor::HolderRegister(); ++ Register callback = ApiGetterDescriptor::CallbackRegister(); ++ Register scratch = a4; ++ DCHECK(!AreAliased(receiver, holder, callback, scratch)); ++ ++ Register api_function_address = a2; ++ ++ // Here and below +1 is for name() pushed after the args_ array. ++ using PCA = PropertyCallbackArguments; ++ __ Sub_d(sp, sp, (PCA::kArgsLength + 1) * kPointerSize); ++ __ St_d(receiver, MemOperand(sp, (PCA::kThisIndex + 1) * kPointerSize)); ++ __ Ld_d(scratch, FieldMemOperand(callback, AccessorInfo::kDataOffset)); ++ __ St_d(scratch, MemOperand(sp, (PCA::kDataIndex + 1) * kPointerSize)); ++ __ LoadRoot(scratch, RootIndex::kUndefinedValue); ++ __ St_d(scratch, ++ MemOperand(sp, (PCA::kReturnValueOffset + 1) * kPointerSize)); ++ __ St_d(scratch, MemOperand(sp, (PCA::kReturnValueDefaultValueIndex + 1) * ++ kPointerSize)); ++ __ li(scratch, ExternalReference::isolate_address(masm->isolate())); ++ __ St_d(scratch, MemOperand(sp, (PCA::kIsolateIndex + 1) * kPointerSize)); ++ __ St_d(holder, MemOperand(sp, (PCA::kHolderIndex + 1) * kPointerSize)); ++ // should_throw_on_error -> false ++ DCHECK_EQ(0, Smi::zero().ptr()); ++ __ St_d(zero_reg, ++ MemOperand(sp, (PCA::kShouldThrowOnErrorIndex + 1) * kPointerSize)); ++ __ Ld_d(scratch, FieldMemOperand(callback, AccessorInfo::kNameOffset)); ++ __ St_d(scratch, MemOperand(sp, 0 * kPointerSize)); ++ ++ // v8::PropertyCallbackInfo::args_ array and name handle. ++ const int kStackUnwindSpace = PropertyCallbackArguments::kArgsLength + 1; ++ ++ // Load address of v8::PropertyAccessorInfo::args_ array and name handle. ++ __ mov(a0, sp); // a0 = Handle ++ __ Add_d(a1, a0, Operand(1 * kPointerSize)); // a1 = v8::PCI::args_ ++ ++ const int kApiStackSpace = 1; ++ FrameScope frame_scope(masm, StackFrame::MANUAL); ++ __ EnterExitFrame(false, kApiStackSpace); ++ ++ // Create v8::PropertyCallbackInfo object on the stack and initialize ++ // it's args_ field. ++ __ St_d(a1, MemOperand(sp, 1 * kPointerSize)); ++ __ Add_d(a1, sp, Operand(1 * kPointerSize)); ++ // a1 = v8::PropertyCallbackInfo& ++ ++ ExternalReference thunk_ref = ++ ExternalReference::invoke_accessor_getter_callback(); ++ ++ __ Ld_d(scratch, FieldMemOperand(callback, AccessorInfo::kJsGetterOffset)); ++ __ Ld_d(api_function_address, ++ FieldMemOperand(scratch, Foreign::kForeignAddressOffset)); ++ ++ // +3 is to skip prolog, return address and name handle. ++ MemOperand return_value_operand( ++ fp, (PropertyCallbackArguments::kReturnValueOffset + 3) * kPointerSize); ++ MemOperand* const kUseStackSpaceConstant = nullptr; ++ CallApiFunctionAndReturn(masm, api_function_address, thunk_ref, ++ kStackUnwindSpace, kUseStackSpaceConstant, ++ return_value_operand); ++} ++ ++void Builtins::Generate_DirectCEntry(MacroAssembler* masm) { ++ // The sole purpose of DirectCEntry is for movable callers (e.g. any general ++ // purpose Code object) to be able to call into C functions that may trigger ++ // GC and thus move the caller. ++ // ++ // DirectCEntry places the return address on the stack (updated by the GC), ++ // making the call GC safe. The irregexp backend relies on this. ++ ++ // Make place for arguments to fit C calling convention. Callers use ++ // EnterExitFrame/LeaveExitFrame so they handle stack restoring and we don't ++ // have to do that here. Any caller must drop kCArgsSlotsSize stack space ++ // after the call. ++ __ addi_d(sp, sp, -kCArgsSlotsSize); ++ ++ __ St_d(ra, MemOperand(sp, kCArgsSlotsSize)); // Store the return address. ++ __ Call(t7); // Call the C++ function. ++ __ Ld_d(t7, MemOperand(sp, kCArgsSlotsSize)); // Return to calling code. ++ ++ if (FLAG_debug_code && FLAG_enable_slow_asserts) { ++ // In case of an error the return address may point to a memory area ++ // filled with kZapValue by the GC. Dereference the address and check for ++ // this. ++ __ Ld_d(a4, MemOperand(t7, 0)); ++ __ Assert(ne, AbortReason::kReceivedInvalidReturnAddress, a4, ++ Operand(reinterpret_cast(kZapValue))); ++ } ++ ++ __ Jump(t7); ++} ++ ++#undef __ ++ ++} // namespace internal ++} // namespace v8 ++ ++#endif // V8_TARGET_ARCH_LA64 +diff --git a/src/3rdparty/chromium/v8/src/codegen/assembler-arch.h b/src/3rdparty/chromium/v8/src/codegen/assembler-arch.h +index d56b3725046..6d5ad8bbf19 100644 +--- a/src/3rdparty/chromium/v8/src/codegen/assembler-arch.h ++++ b/src/3rdparty/chromium/v8/src/codegen/assembler-arch.h +@@ -21,6 +21,8 @@ + #include "src/codegen/mips/assembler-mips.h" + #elif V8_TARGET_ARCH_MIPS64 + #include "src/codegen/mips64/assembler-mips64.h" ++#elif V8_TARGET_ARCH_LA64 ++#include "src/codegen/la64/assembler-la64.h" + #elif V8_TARGET_ARCH_S390 + #include "src/codegen/s390/assembler-s390.h" + #else +diff --git a/src/3rdparty/chromium/v8/src/codegen/assembler-inl.h b/src/3rdparty/chromium/v8/src/codegen/assembler-inl.h +index 8c81315d50d..304eed44f06 100644 +--- a/src/3rdparty/chromium/v8/src/codegen/assembler-inl.h ++++ b/src/3rdparty/chromium/v8/src/codegen/assembler-inl.h +@@ -21,6 +21,8 @@ + #include "src/codegen/mips/assembler-mips-inl.h" + #elif V8_TARGET_ARCH_MIPS64 + #include "src/codegen/mips64/assembler-mips64-inl.h" ++#elif V8_TARGET_ARCH_LA64 ++#include "src/codegen/la64/assembler-la64-inl.h" + #elif V8_TARGET_ARCH_S390 + #include "src/codegen/s390/assembler-s390-inl.h" + #else +diff --git a/src/3rdparty/chromium/v8/src/codegen/constants-arch.h b/src/3rdparty/chromium/v8/src/codegen/constants-arch.h +index 7a222c960ff..701c3c08a92 100644 +--- a/src/3rdparty/chromium/v8/src/codegen/constants-arch.h ++++ b/src/3rdparty/chromium/v8/src/codegen/constants-arch.h +@@ -15,6 +15,8 @@ + #include "src/codegen/mips/constants-mips.h" // NOLINT + #elif V8_TARGET_ARCH_MIPS64 + #include "src/codegen/mips64/constants-mips64.h" // NOLINT ++#elif V8_TARGET_ARCH_LA64 ++#include "src/codegen/la64/constants-la64.h" // NOLINT + #elif V8_TARGET_ARCH_PPC || V8_TARGET_ARCH_PPC64 + #include "src/codegen/ppc/constants-ppc.h" // NOLINT + #elif V8_TARGET_ARCH_S390 +diff --git a/src/3rdparty/chromium/v8/src/codegen/cpu-features.h b/src/3rdparty/chromium/v8/src/codegen/cpu-features.h +index 14c94ebae9a..d0bb89367ef 100644 +--- a/src/3rdparty/chromium/v8/src/codegen/cpu-features.h ++++ b/src/3rdparty/chromium/v8/src/codegen/cpu-features.h +@@ -47,6 +47,9 @@ enum CpuFeature { + MIPSr6, + MIPS_SIMD, // MSA instructions + ++#elif V8_TARGET_ARCH_LA64 ++ FPU, // TODO ++ + #elif V8_TARGET_ARCH_PPC || V8_TARGET_ARCH_PPC64 + FPU, + FPR_GPR_MOV, +diff --git a/src/3rdparty/chromium/v8/src/codegen/external-reference.cc b/src/3rdparty/chromium/v8/src/codegen/external-reference.cc +index 7a42e40461c..3bf4edef3bd 100644 +--- a/src/3rdparty/chromium/v8/src/codegen/external-reference.cc ++++ b/src/3rdparty/chromium/v8/src/codegen/external-reference.cc +@@ -472,6 +472,8 @@ ExternalReference ExternalReference::invoke_accessor_getter_callback() { + #define re_stack_check_func RegExpMacroAssemblerMIPS::CheckStackGuardState + #elif V8_TARGET_ARCH_MIPS64 + #define re_stack_check_func RegExpMacroAssemblerMIPS::CheckStackGuardState ++#elif V8_TARGET_ARCH_LA64 ++#define re_stack_check_func RegExpMacroAssemblerLA64::CheckStackGuardState + #elif V8_TARGET_ARCH_S390 + #define re_stack_check_func RegExpMacroAssemblerS390::CheckStackGuardState + #else +diff --git a/src/3rdparty/chromium/v8/src/codegen/interface-descriptors.cc b/src/3rdparty/chromium/v8/src/codegen/interface-descriptors.cc +index 42b45c0f33a..d0b2bfe1e7e 100644 +--- a/src/3rdparty/chromium/v8/src/codegen/interface-descriptors.cc ++++ b/src/3rdparty/chromium/v8/src/codegen/interface-descriptors.cc +@@ -128,7 +128,8 @@ const char* CallInterfaceDescriptor::DebugName() const { + return ""; + } + +-#if !defined(V8_TARGET_ARCH_MIPS) && !defined(V8_TARGET_ARCH_MIPS64) ++#if !defined(V8_TARGET_ARCH_MIPS) && !defined(V8_TARGET_ARCH_MIPS64) && \ ++ !defined(V8_TARGET_ARCH_LA64) + bool CallInterfaceDescriptor::IsValidFloatParameterRegister(Register reg) { + return true; + } +@@ -412,7 +413,8 @@ void WasmAtomicNotifyDescriptor::InitializePlatformSpecific( + DefaultInitializePlatformSpecific(data, kParameterCount); + } + +-#if !defined(V8_TARGET_ARCH_MIPS) && !defined(V8_TARGET_ARCH_MIPS64) ++#if !defined(V8_TARGET_ARCH_MIPS) && !defined(V8_TARGET_ARCH_MIPS64) && \ ++ !defined(V8_TARGET_ARCH_LA64) + void WasmI32AtomicWait32Descriptor::InitializePlatformSpecific( + CallInterfaceDescriptorData* data) { + DefaultInitializePlatformSpecific(data, kParameterCount); +diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/assembler-la64-inl.h b/src/3rdparty/chromium/v8/src/codegen/la64/assembler-la64-inl.h +new file mode 100644 +index 00000000000..e2ead3948c9 +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/codegen/la64/assembler-la64-inl.h +@@ -0,0 +1,268 @@ ++// Copyright (c) 1994-2006 Sun Microsystems Inc. ++// All Rights Reserved. ++// ++// Redistribution and use in source and binary forms, with or without ++// modification, are permitted provided that the following conditions are ++// met: ++// ++// - Redistributions of source code must retain the above copyright notice, ++// this list of conditions and the following disclaimer. ++// ++// - Redistribution in binary form must reproduce the above copyright ++// notice, this list of conditions and the following disclaimer in the ++// documentation and/or other materials provided with the distribution. ++// ++// - Neither the name of Sun Microsystems or the names of contributors may ++// be used to endorse or promote products derived from this software without ++// specific prior written permission. ++// ++// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS ++// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ++// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR ++// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR ++// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ++// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ++// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ++// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ++// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++// The original source code covered by the above license above has been ++// modified significantly by Google Inc. ++// Copyright 2012 the V8 project authors. All rights reserved. ++ ++#ifndef V8_CODEGEN_LA64_ASSEMBLER_LA64_INL_H_ ++#define V8_CODEGEN_LA64_ASSEMBLER_LA64_INL_H_ ++ ++#include "src/codegen/la64/assembler-la64.h" ++ ++#include "src/codegen/assembler.h" ++#include "src/debug/debug.h" ++#include "src/objects/objects-inl.h" ++ ++namespace v8 { ++namespace internal { ++ ++bool CpuFeatures::SupportsOptimizer() { return IsSupported(FPU); } ++ ++bool CpuFeatures::SupportsWasmSimd128() { return false; } ++ ++// ----------------------------------------------------------------------------- ++// Operand and MemOperand. ++ ++bool Operand::is_reg() const { return rm_.is_valid(); } ++ ++int64_t Operand::immediate() const { ++ DCHECK(!is_reg()); ++ DCHECK(!IsHeapObjectRequest()); ++ return value_.immediate; ++} ++ ++// ----------------------------------------------------------------------------- ++// RelocInfo. ++ ++void RelocInfo::apply(intptr_t delta) { ++ if (IsInternalReference(rmode_) || IsInternalReferenceEncoded(rmode_)) { ++ // Absolute code pointer inside code object moves with the code object. ++ Assembler::RelocateInternalReference(rmode_, pc_, delta); ++ } ++} ++ ++Address RelocInfo::target_address() { ++ DCHECK(IsCodeTarget(rmode_) || IsRuntimeEntry(rmode_) || IsWasmCall(rmode_)); ++ return Assembler::target_address_at(pc_, constant_pool_); ++} ++ ++Address RelocInfo::target_address_address() { ++ DCHECK(HasTargetAddressAddress()); ++ // Read the address of the word containing the target_address in an ++ // instruction stream. ++ // The only architecture-independent user of this function is the serializer. ++ // The serializer uses it to find out how many raw bytes of instruction to ++ // output before the next target. ++ // For an instruction like LUI/ORI where the target bits are mixed into the ++ // instruction bits, the size of the target will be zero, indicating that the ++ // serializer should not step forward in memory after a target is resolved ++ // and written. In this case the target_address_address function should ++ // return the end of the instructions to be patched, allowing the ++ // deserializer to deserialize the instructions as raw bytes and put them in ++ // place, ready to be patched with the target. After jump optimization, ++ // that is the address of the instruction that follows J/JAL/JR/JALR ++ // instruction. ++ return pc_ + Assembler::kInstructionsFor64BitConstant * kInstrSize; ++} ++ ++Address RelocInfo::constant_pool_entry_address() { UNREACHABLE(); } ++ ++int RelocInfo::target_address_size() { return Assembler::kSpecialTargetSize; } ++ ++void Assembler::deserialization_set_special_target_at( ++ Address instruction_payload, Code code, Address target) { ++ set_target_address_at(instruction_payload, ++ !code.is_null() ? code.constant_pool() : kNullAddress, ++ target); ++} ++ ++int Assembler::deserialization_special_target_size( ++ Address instruction_payload) { ++ return kSpecialTargetSize; ++} ++ ++void Assembler::set_target_internal_reference_encoded_at(Address pc, ++ Address target) { ++ // TODO, see AssembleJumpTable, la64 does not generate internal reference? ++ abort(); ++} ++ ++void Assembler::deserialization_set_target_internal_reference_at( ++ Address pc, Address target, RelocInfo::Mode mode) { ++ if (mode == RelocInfo::INTERNAL_REFERENCE_ENCODED) { ++ DCHECK(IsJ(instr_at(pc))); ++ set_target_internal_reference_encoded_at(pc, target); ++ } else { ++ DCHECK(mode == RelocInfo::INTERNAL_REFERENCE); ++ Memory
(pc) = target; ++ } ++} ++ ++HeapObject RelocInfo::target_object() { ++ DCHECK(IsCodeTarget(rmode_) || IsFullEmbeddedObject(rmode_)); ++ return HeapObject::cast( ++ Object(Assembler::target_address_at(pc_, constant_pool_))); ++} ++ ++HeapObject RelocInfo::target_object_no_host(Isolate* isolate) { ++ return target_object(); ++} ++ ++Handle RelocInfo::target_object_handle(Assembler* origin) { ++ DCHECK(IsCodeTarget(rmode_) || IsFullEmbeddedObject(rmode_)); ++ return Handle(reinterpret_cast( ++ Assembler::target_address_at(pc_, constant_pool_))); ++} ++ ++void RelocInfo::set_target_object(Heap* heap, HeapObject target, ++ WriteBarrierMode write_barrier_mode, ++ ICacheFlushMode icache_flush_mode) { ++ DCHECK(IsCodeTarget(rmode_) || IsFullEmbeddedObject(rmode_)); ++ Assembler::set_target_address_at(pc_, constant_pool_, target.ptr(), ++ icache_flush_mode); ++ if (write_barrier_mode == UPDATE_WRITE_BARRIER && !host().is_null() && ++ !FLAG_disable_write_barriers) { ++ WriteBarrierForCode(host(), this, target); ++ } ++} ++ ++Address RelocInfo::target_external_reference() { ++ DCHECK(rmode_ == EXTERNAL_REFERENCE); ++ return Assembler::target_address_at(pc_, constant_pool_); ++} ++ ++void RelocInfo::set_target_external_reference( ++ Address target, ICacheFlushMode icache_flush_mode) { ++ DCHECK(rmode_ == RelocInfo::EXTERNAL_REFERENCE); ++ Assembler::set_target_address_at(pc_, constant_pool_, target, ++ icache_flush_mode); ++} ++ ++Address RelocInfo::target_internal_reference() { ++ if (rmode_ == INTERNAL_REFERENCE) { ++ return Memory
(pc_); ++ } else { ++ UNREACHABLE(); ++ } ++} ++ ++Address RelocInfo::target_internal_reference_address() { ++ DCHECK(rmode_ == INTERNAL_REFERENCE || rmode_ == INTERNAL_REFERENCE_ENCODED); ++ return pc_; ++} ++ ++Address RelocInfo::target_runtime_entry(Assembler* origin) { ++ DCHECK(IsRuntimeEntry(rmode_)); ++ return target_address(); ++} ++ ++void RelocInfo::set_target_runtime_entry(Address target, ++ WriteBarrierMode write_barrier_mode, ++ ICacheFlushMode icache_flush_mode) { ++ DCHECK(IsRuntimeEntry(rmode_)); ++ if (target_address() != target) ++ set_target_address(target, write_barrier_mode, icache_flush_mode); ++} ++ ++Address RelocInfo::target_off_heap_target() { ++ DCHECK(IsOffHeapTarget(rmode_)); ++ return Assembler::target_address_at(pc_, constant_pool_); ++} ++ ++void RelocInfo::WipeOut() { ++ DCHECK(IsFullEmbeddedObject(rmode_) || IsCodeTarget(rmode_) || ++ IsRuntimeEntry(rmode_) || IsExternalReference(rmode_) || ++ IsInternalReference(rmode_) || IsInternalReferenceEncoded(rmode_) || ++ IsOffHeapTarget(rmode_)); ++ if (IsInternalReference(rmode_)) { ++ Memory
(pc_) = kNullAddress; ++ } else if (IsInternalReferenceEncoded(rmode_)) { ++ Assembler::set_target_internal_reference_encoded_at(pc_, kNullAddress); ++ } else { ++ Assembler::set_target_address_at(pc_, constant_pool_, kNullAddress); ++ } ++} ++ ++// ----------------------------------------------------------------------------- ++// Assembler. ++ ++void Assembler::CheckBuffer() { ++ if (buffer_space() <= kGap) { ++ GrowBuffer(); ++ } ++} ++ ++void Assembler::EmitHelper(Instr x) { ++ *reinterpret_cast(pc_) = x; ++ pc_ += kInstrSize; ++ CheckTrampolinePoolQuick(); ++} ++ ++template <> ++inline void Assembler::EmitHelper(uint8_t x); ++ ++template ++void Assembler::EmitHelper(T x) { ++ *reinterpret_cast(pc_) = x; ++ pc_ += sizeof(x); ++ CheckTrampolinePoolQuick(); ++} ++ ++template <> ++void Assembler::EmitHelper(uint8_t x) { ++ *reinterpret_cast(pc_) = x; ++ pc_ += sizeof(x); ++ if (reinterpret_cast(pc_) % kInstrSize == 0) { ++ CheckTrampolinePoolQuick(); ++ } ++} ++ ++void Assembler::emit(Instr x) { ++ if (!is_buffer_growth_blocked()) { ++ CheckBuffer(); ++ } ++ EmitHelper(x); ++} ++ ++void Assembler::emit(uint64_t data) { ++ // CheckForEmitInForbiddenSlot(); ++ if (!is_buffer_growth_blocked()) { ++ CheckBuffer(); ++ } ++ EmitHelper(data); ++} ++ ++EnsureSpace::EnsureSpace(Assembler* assembler) { assembler->CheckBuffer(); } ++ ++} // namespace internal ++} // namespace v8 ++ ++#endif // V8_CODEGEN_LA64_ASSEMBLER_LA64_INL_H_ +diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/assembler-la64.cc b/src/3rdparty/chromium/v8/src/codegen/la64/assembler-la64.cc +new file mode 100644 +index 00000000000..0272caeaaf7 +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/codegen/la64/assembler-la64.cc +@@ -0,0 +1,2856 @@ ++// Copyright (c) 1994-2006 Sun Microsystems Inc. ++// All Rights Reserved. ++// ++// Redistribution and use in source and binary forms, with or without ++// modification, are permitted provided that the following conditions are ++// met: ++// ++// - Redistributions of source code must retain the above copyright notice, ++// this list of conditions and the following disclaimer. ++// ++// - Redistribution in binary form must reproduce the above copyright ++// notice, this list of conditions and the following disclaimer in the ++// documentation and/or other materials provided with the distribution. ++// ++// - Neither the name of Sun Microsystems or the names of contributors may ++// be used to endorse or promote products derived from this software without ++// specific prior written permission. ++// ++// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS ++// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ++// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR ++// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR ++// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ++// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ++// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ++// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ++// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++// The original source code covered by the above license above has been ++// modified significantly by Google Inc. ++// Copyright 2012 the V8 project authors. All rights reserved. ++ ++#include "src/codegen/la64/assembler-la64.h" ++ ++#if V8_TARGET_ARCH_LA64 ++ ++#include "src/base/cpu.h" ++#include "src/codegen/la64/assembler-la64-inl.h" ++#include "src/codegen/safepoint-table.h" ++#include "src/codegen/string-constants.h" ++#include "src/deoptimizer/deoptimizer.h" ++#include "src/objects/heap-number-inl.h" ++ ++namespace v8 { ++namespace internal { ++ ++void CpuFeatures::ProbeImpl(bool cross_compile) { ++ supported_ |= 1u << FPU; ++ ++ // Only use statically determined features for cross compile (snapshot). ++ if (cross_compile) return; ++ ++#if defined(_loongisa_vec) ++ supported_ |= 0u; ++#endif ++ // If the compiler is allowed to use fpu then we can use fpu too in our ++ // code generation. ++#ifdef __loongarch__ ++ // Probe for additional features at runtime. ++ base::CPU cpu; ++ supported_ |= 0u; ++#endif ++} ++ ++void CpuFeatures::PrintTarget() {} ++void CpuFeatures::PrintFeatures() {} ++ ++int ToNumber(Register reg) { ++ DCHECK(reg.is_valid()); ++ const int kNumbers[] = { ++ 0, // zero_reg ++ 1, // r1 ra ++ 2, // r2 gp ++ 3, // r3 sp ++ 4, // a0 v0 ++ 5, // a1 v1 ++ 6, // a2 ++ 7, // a3 ++ 8, // a4 ++ 9, // a5 ++ 10, // a6 ++ 11, // a7 ++ 12, // t0 ++ 13, // t1 ++ 14, // t2 ++ 15, // t3 ++ 16, // t4 ++ 17, // t5 ++ 18, // t6 ++ 19, // t7 ++ 20, // t8 ++ 21, // tp ++ 22, // fp ++ 23, // s0 ++ 24, // s1 ++ 25, // s2 ++ 26, // s3 ++ 27, // s4 ++ 28, // s5 ++ 29, // s6 ++ 30, // s7 ++ 31, // s8 ++ }; ++ return kNumbers[reg.code()]; ++} ++ ++Register ToRegister(int num) { ++ DCHECK(num >= 0 && num < kNumRegisters); ++ const Register kRegisters[] = { ++ zero_reg, ra, gp, sp, a0, a1, a2, a3, a4, a5, a6, a7, t0, t1, t2, t3, ++ t4, t5, t6, t7, t8, tp, fp, s0, s1, s2, s3, s4, s5, s6, s7, s8}; ++ return kRegisters[num]; ++} ++ ++// ----------------------------------------------------------------------------- ++// Implementation of RelocInfo. ++ ++const int RelocInfo::kApplyMask = ++ RelocInfo::ModeMask(RelocInfo::INTERNAL_REFERENCE) | ++ RelocInfo::ModeMask(RelocInfo::INTERNAL_REFERENCE_ENCODED); ++ ++bool RelocInfo::IsCodedSpecially() { ++ // The deserializer needs to know whether a pointer is specially coded. Being ++ // specially coded on loongisa means that it is a lui/ori instruction, and ++ // that is always the case inside code objects. ++ return true; ++} ++ ++bool RelocInfo::IsInConstantPool() { return false; } ++ ++uint32_t RelocInfo::wasm_call_tag() const { ++ DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL); ++ return static_cast( ++ Assembler::target_address_at(pc_, constant_pool_)); ++} ++ ++// ----------------------------------------------------------------------------- ++// Implementation of Operand and MemOperand. ++// See assembler-la64-inl.h for inlined constructors. ++ ++Operand::Operand(Handle handle) ++ : rm_(no_reg), rmode_(RelocInfo::FULL_EMBEDDED_OBJECT) { ++ value_.immediate = static_cast(handle.address()); ++} ++ ++Operand Operand::EmbeddedNumber(double value) { ++ int32_t smi; ++ if (DoubleToSmiInteger(value, &smi)) return Operand(Smi::FromInt(smi)); ++ Operand result(0, RelocInfo::FULL_EMBEDDED_OBJECT); ++ result.is_heap_object_request_ = true; ++ result.value_.heap_object_request = HeapObjectRequest(value); ++ return result; ++} ++ ++Operand Operand::EmbeddedStringConstant(const StringConstantBase* str) { ++ Operand result(0, RelocInfo::FULL_EMBEDDED_OBJECT); ++ result.is_heap_object_request_ = true; ++ result.value_.heap_object_request = HeapObjectRequest(str); ++ return result; ++} ++ ++MemOperand::MemOperand(Register base, int32_t offset) ++ : base_(base), index_(no_reg), offset_(offset) {} ++ ++MemOperand::MemOperand(Register base, Register index) ++ : base_(base), index_(index), offset_(0) {} ++ ++void Assembler::AllocateAndInstallRequestedHeapObjects(Isolate* isolate) { ++ DCHECK_IMPLIES(isolate == nullptr, heap_object_requests_.empty()); ++ for (auto& request : heap_object_requests_) { ++ Handle object; ++ switch (request.kind()) { ++ case HeapObjectRequest::kHeapNumber: ++ object = isolate->factory()->NewHeapNumber( ++ request.heap_number()); ++ break; ++ case HeapObjectRequest::kStringConstant: ++ const StringConstantBase* str = request.string(); ++ CHECK_NOT_NULL(str); ++ object = str->AllocateStringConstant(isolate); ++ break; ++ } ++ Address pc = reinterpret_cast
(buffer_start_) + request.offset(); ++ set_target_value_at(pc, reinterpret_cast(object.location())); ++ } ++} ++ ++// ----------------------------------------------------------------------------- ++// Specific instructions, constants, and masks. ++ ++// addi_d(sp, sp, 8) aka Pop() operation or part of Pop(r) ++// operations as post-increment of sp. ++const Instr kPopInstruction = ADDI_D | (kPointerSize & kImm12Mask) << kRkShift | ++ (sp.code() << kRjShift) | sp.code(); // NOLINT ++// addi_d(sp, sp, -8) part of Push(r) operation as pre-decrement of sp. ++const Instr kPushInstruction = ADDI_D | ++ (-kPointerSize & kImm12Mask) << kRkShift | ++ (sp.code() << kRjShift) | sp.code(); // NOLINT ++// St_d(r, MemOperand(sp, 0)) ++const Instr kPushRegPattern = ST_D | (sp.code() << kRjShift); // NOLINT ++// Ld_d(r, MemOperand(sp, 0)) ++const Instr kPopRegPattern = LD_D | (sp.code() << kRjShift); // NOLINT ++ ++Assembler::Assembler(const AssemblerOptions& options, ++ std::unique_ptr buffer) ++ : AssemblerBase(options, std::move(buffer)), ++ scratch_register_list_(t7.bit() | t6.bit()) { ++ reloc_info_writer.Reposition(buffer_start_ + buffer_->size(), pc_); ++ ++ last_trampoline_pool_end_ = 0; ++ no_trampoline_pool_before_ = 0; ++ trampoline_pool_blocked_nesting_ = 0; ++ // We leave space (16 * kTrampolineSlotsSize) ++ // for BlockTrampolinePoolScope buffer. ++ next_buffer_check_ = FLAG_force_long_branches ++ ? kMaxInt ++ : kMax16BranchOffset - kTrampolineSlotsSize * 16; ++ internal_trampoline_exception_ = false; ++ last_bound_pos_ = 0; ++ ++ trampoline_emitted_ = FLAG_force_long_branches; // TODO remove this ++ unbound_labels_count_ = 0; ++ block_buffer_growth_ = false; ++} ++ ++void Assembler::GetCode(Isolate* isolate, CodeDesc* desc, ++ SafepointTableBuilder* safepoint_table_builder, ++ int handler_table_offset) { ++ // EmitForbiddenSlotInstruction(); // TODO why? ++ ++ int code_comments_size = WriteCodeComments(); ++ ++ DCHECK(pc_ <= reloc_info_writer.pos()); // No overlap. ++ ++ AllocateAndInstallRequestedHeapObjects(isolate); ++ ++ // Set up code descriptor. ++ // TODO(jgruber): Reconsider how these offsets and sizes are maintained up to ++ // this point to make CodeDesc initialization less fiddly. ++ ++ static constexpr int kConstantPoolSize = 0; ++ const int instruction_size = pc_offset(); ++ const int code_comments_offset = instruction_size - code_comments_size; ++ const int constant_pool_offset = code_comments_offset - kConstantPoolSize; ++ const int handler_table_offset2 = (handler_table_offset == kNoHandlerTable) ++ ? constant_pool_offset ++ : handler_table_offset; ++ const int safepoint_table_offset = ++ (safepoint_table_builder == kNoSafepointTable) ++ ? handler_table_offset2 ++ : safepoint_table_builder->GetCodeOffset(); ++ const int reloc_info_offset = ++ static_cast(reloc_info_writer.pos() - buffer_->start()); ++ CodeDesc::Initialize(desc, this, safepoint_table_offset, ++ handler_table_offset2, constant_pool_offset, ++ code_comments_offset, reloc_info_offset); ++} ++ ++void Assembler::Align(int m) { ++ DCHECK(m >= 4 && base::bits::IsPowerOfTwo(m)); ++ while ((pc_offset() & (m - 1)) != 0) { ++ nop(); ++ } ++} ++ ++void Assembler::CodeTargetAlign() { ++ // No advantage to aligning branch/call targets to more than ++ // single instruction, that I am aware of. ++ Align(4); ++} ++ ++Register Assembler::GetRkReg(Instr instr) { ++ return Register::from_code((instr & kRkFieldMask) >> kRkShift); ++} ++ ++Register Assembler::GetRjReg(Instr instr) { ++ return Register::from_code((instr & kRjFieldMask) >> kRjShift); ++} ++ ++Register Assembler::GetRdReg(Instr instr) { ++ return Register::from_code((instr & kRdFieldMask) >> kRdShift); ++} ++ ++uint32_t Assembler::GetRk(Instr instr) { ++ return (instr & kRkFieldMask) >> kRkShift; ++} ++ ++uint32_t Assembler::GetRkField(Instr instr) { return instr & kRkFieldMask; } ++ ++uint32_t Assembler::GetRj(Instr instr) { ++ return (instr & kRjFieldMask) >> kRjShift; ++} ++ ++uint32_t Assembler::GetRjField(Instr instr) { return instr & kRjFieldMask; } ++ ++uint32_t Assembler::GetRd(Instr instr) { ++ return (instr & kRdFieldMask) >> kRdShift; ++} ++ ++uint32_t Assembler::GetRdField(Instr instr) { return instr & kRdFieldMask; } ++ ++uint32_t Assembler::GetSa2(Instr instr) { ++ return (instr & kSa2FieldMask) >> kSaShift; ++} ++ ++uint32_t Assembler::GetSa2Field(Instr instr) { return instr & kSa2FieldMask; } ++ ++uint32_t Assembler::GetSa3(Instr instr) { ++ return (instr & kSa3FieldMask) >> kSaShift; ++} ++ ++uint32_t Assembler::GetSa3Field(Instr instr) { return instr & kSa3FieldMask; } ++ ++bool Assembler::IsPop(Instr instr) { ++ return (instr & 0xffc003e0) == kPopRegPattern; ++} ++ ++bool Assembler::IsPush(Instr instr) { ++ return (instr & 0xffc003e0) == kPushRegPattern; ++} ++ ++// Labels refer to positions in the (to be) generated code. ++// There are bound, linked, and unused labels. ++// ++// Bound labels refer to known positions in the already ++// generated code. pos() is the position the label refers to. ++// ++// Linked labels refer to unknown positions in the code ++// to be generated; pos() is the position of the last ++// instruction using the label. ++ ++// The link chain is terminated by a value in the instruction of -1, ++// which is an otherwise illegal value (branch -1 is inf loop). ++// The instruction 16-bit offset field addresses 32-bit words, but in ++// code is conv to an 18-bit value addressing bytes, hence the -4 value. ++ ++const int kEndOfChain = 0; ++// Determines the end of the Jump chain (a subset of the label link chain). ++const int kEndOfJumpChain = 0; ++ ++bool Assembler::IsBranch(Instr instr) { ++ uint32_t opcode = (instr >> 26) << 26; ++ // Checks if the instruction is a branch. ++ bool isBranch = opcode == BEQZ || opcode == BNEZ || opcode == BCZ || ++ opcode == B || opcode == BL || opcode == BEQ || ++ opcode == BNE || opcode == BLT || opcode == BGE || ++ opcode == BLTU || opcode == BGEU; ++ return isBranch; ++} ++ ++bool Assembler::IsB(Instr instr) { ++ uint32_t opcode = (instr >> 26) << 26; ++ // Checks if the instruction is a b. ++ bool isBranch = opcode == B || opcode == BL; ++ return isBranch; ++} ++ ++bool Assembler::IsBz(Instr instr) { ++ uint32_t opcode = (instr >> 26) << 26; ++ // Checks if the instruction is a branch. ++ bool isBranch = opcode == BEQZ || opcode == BNEZ || opcode == BCZ; ++ return isBranch; ++} ++ ++bool Assembler::IsEmittedConstant(Instr instr) { ++ // Add GetLabelConst function? ++ uint32_t label_constant = instr & ~kImm16Mask; ++ return label_constant == 0; // Emitted label const in reg-exp engine. ++} ++ ++bool Assembler::IsJ(Instr instr) { ++ uint32_t opcode = (instr >> 26) << 26; ++ // Checks if the instruction is a jump. ++ return opcode == JIRL; ++} ++ ++bool Assembler::IsLu12i_w(Instr instr) { ++ uint32_t opcode = (instr >> 25) << 25; ++ return opcode == LU12I_W; ++} ++ ++bool Assembler::IsOri(Instr instr) { ++ uint32_t opcode = (instr >> 22) << 22; ++ return opcode == ORI; ++} ++ ++bool Assembler::IsLu32i_d(Instr instr) { ++ uint32_t opcode = (instr >> 25) << 25; ++ return opcode == LU32I_D; ++} ++ ++bool Assembler::IsLu52i_d(Instr instr) { ++ uint32_t opcode = (instr >> 22) << 22; ++ return opcode == LU52I_D; ++} ++ ++bool Assembler::IsMov(Instr instr, Register rd, Register rj) { ++ // Checks if the instruction is a OR with zero_reg argument (aka MOV). ++ Instr instr1 = ++ OR | zero_reg.code() << kRkShift | rj.code() << kRjShift | rd.code(); ++ return instr == instr1; ++} ++ ++bool Assembler::IsPcAddi(Instr instr, Register rd, int32_t si20) { ++ DCHECK(is_int20(si20)); ++ Instr instr1 = PCADDI | (si20 & 0xfffff) << kRjShift | rd.code(); ++ return instr == instr1; ++} ++ ++bool Assembler::IsNop(Instr instr, unsigned int type) { ++ // See Assembler::nop(type). ++ DCHECK_LT(type, 32); ++ // Traditional loongisa nop == andi(zero_reg, zero_reg, 0) ++ // When marking non-zero type, use andi(zero_reg, t7, type) ++ // to avoid use of ssnop and ehb special encodings of the ++ // andi instruction. ++ ++ Register nop_rt_reg = (type == 0) ? zero_reg : t7; ++ Instr instr1 = ANDI | ((type & kImm12Mask) << kRkShift) | ++ (nop_rt_reg.code() << kRjShift); ++ ++ return instr == instr1; ++} ++ ++static inline int32_t GetOffsetOfBranch(Instr instr, ++ Assembler::OffsetSize bits) { ++ int32_t result = 0; ++ if (bits == 16) { ++ result = (instr << 6) >> 16; ++ } else if (bits == 21) { ++ uint32_t low16 = instr << 6; ++ low16 = low16 >> 16; ++ low16 &= 0xffff; ++ int32_t hi5 = (instr << 27) >> 11; ++ result = hi5 | low16; ++ } else { ++ uint32_t low16 = instr << 6; ++ low16 = low16 >> 16; ++ low16 &= 0xffff; ++ int32_t hi10 = (instr << 22) >> 6; ++ result = hi10 | low16; ++ DCHECK_EQ(bits, 26); ++ } ++ return result << 2; ++} ++ ++static Assembler::OffsetSize OffsetSizeInBits(Instr instr) { ++ if (Assembler::IsB(instr)) { ++ return Assembler::OffsetSize::kOffset26; ++ } else if (Assembler::IsBz(instr)) { ++ return Assembler::OffsetSize::kOffset21; ++ } else { ++ DCHECK(Assembler::IsBranch(instr)); ++ return Assembler::OffsetSize::kOffset16; ++ } ++} ++ ++static inline int32_t AddBranchOffset(int pos, Instr instr) { ++ Assembler::OffsetSize bits = OffsetSizeInBits(instr); ++ ++ int32_t imm = GetOffsetOfBranch(instr, bits); ++ ++ if (imm == kEndOfChain) { ++ // EndOfChain sentinel is returned directly, not relative to pc or pos. ++ return kEndOfChain; ++ } else { ++ // Handle the case that next branch position is 0. ++ // TODO: Define -4 as a constant ++ int32_t offset = pos + Assembler::kBranchPCOffset + imm; ++ return offset == 0 ? -4 : offset; ++ } ++} ++ ++int Assembler::target_at(int pos, bool is_internal) { ++ if (is_internal) { ++ int64_t* p = reinterpret_cast(buffer_start_ + pos); ++ int64_t address = *p; ++ if (address == kEndOfJumpChain) { ++ return kEndOfChain; ++ } else { ++ int64_t instr_address = reinterpret_cast(p); ++ DCHECK(instr_address - address < INT_MAX); ++ int delta = static_cast(instr_address - address); ++ DCHECK(pos > delta); ++ return pos - delta; ++ } ++ } ++ Instr instr = instr_at(pos); ++ ++ // TODO remove after remove label_at_put? ++ if ((instr & ~kImm16Mask) == 0) { ++ // Emitted label constant, not part of a branch. ++ if (instr == 0) { ++ return kEndOfChain; ++ } else { ++ int32_t imm18 = ((instr & static_cast(kImm16Mask)) << 16) >> 14; ++ return (imm18 + pos); ++ } ++ } ++ ++ // Check we have a branch or jump instruction. ++ DCHECK(IsBranch(instr) || IsJ(instr) || IsLu12i_w(instr) || ++ IsPcAddi(instr, t8, 16)); ++ // Do NOT change this to <<2. We rely on arithmetic shifts here, assuming ++ // the compiler uses arithmetic shifts for signed integers. ++ if (IsBranch(instr)) { ++ return AddBranchOffset(pos, instr); ++ } else if (IsPcAddi(instr, t8, 16)) { ++ // see BranchLong(Label* L) and BranchAndLinkLong ?? ++ int32_t imm32; ++ Instr instr_lu12i_w = instr_at(pos + 1 * kInstrSize); ++ Instr instr_ori = instr_at(pos + 2 * kInstrSize); ++ DCHECK(IsLu12i_w(instr_lu12i_w)); ++ // DCHECK(IsOri(instr_ori)); ++ imm32 = ((instr_lu12i_w >> 5) & 0xfffff) << 12; ++ imm32 |= ((instr_ori >> 10) & static_cast(kImm12Mask)); ++ if (imm32 == kEndOfJumpChain) { ++ // EndOfChain sentinel is returned directly, not relative to pc or pos. ++ return kEndOfChain; ++ } ++ return pos + imm32; ++ } else if (IsLu12i_w(instr)) { ++ abort(); ++ // TODO no used?? ++ /* Instr instr_lui = instr_at(pos + 0 * kInstrSize); ++ Instr instr_ori = instr_at(pos + 1 * kInstrSize); ++ Instr instr_ori2 = instr_at(pos + 3 * kInstrSize); ++ DCHECK(IsOri(instr_ori)); ++ DCHECK(IsOri(instr_ori2)); ++ ++ // TODO(plind) create named constants for shift values. ++ int64_t imm = static_cast(instr_lui & kImm16Mask) << 48; ++ imm |= static_cast(instr_ori & kImm16Mask) << 32; ++ imm |= static_cast(instr_ori2 & kImm16Mask) << 16; ++ // Sign extend address; ++ imm >>= 16; ++ ++ if (imm == kEndOfJumpChain) { ++ // EndOfChain sentinel is returned directly, not relative to pc or ++ pos. return kEndOfChain; } else { uint64_t instr_address = ++ reinterpret_cast(buffer_start_ + pos); DCHECK(instr_address - ++ imm < INT_MAX); int delta = static_cast(instr_address - imm); ++ DCHECK(pos > delta); ++ return pos - delta; ++ }*/ ++ } else { ++ DCHECK(IsJ(instr)); ++ // TODO not used??? ++ abort(); ++ } ++} ++ ++static inline Instr SetBranchOffset(int32_t pos, int32_t target_pos, ++ Instr instr) { ++ int32_t bits = OffsetSizeInBits(instr); ++ int32_t imm = target_pos - pos; ++ DCHECK_EQ(imm & 3, 0); ++ imm >>= 2; ++ ++ DCHECK(is_intn(imm, bits)); ++ ++ if (bits == 16) { ++ const int32_t mask = ((1 << 16) - 1) << 10; ++ instr &= ~mask; ++ return instr | ((imm << 10) & mask); ++ } else if (bits == 21) { ++ const int32_t mask = 0x3fffc1f; ++ instr &= ~mask; ++ uint32_t low16 = (imm & kImm16Mask) << 10; ++ int32_t hi5 = (imm >> 16) & 0x1f; ++ return instr | low16 | hi5; ++ } else { ++ DCHECK_EQ(bits, 26); ++ const int32_t mask = 0x3ffffff; ++ instr &= ~mask; ++ uint32_t low16 = (imm & kImm16Mask) << 10; ++ int32_t hi10 = (imm >> 16) & 0x3ff; ++ return instr | low16 | hi10; ++ } ++} ++ ++void Assembler::target_at_put(int pos, int target_pos, bool is_internal) { ++ if (is_internal) { ++ uint64_t imm = reinterpret_cast(buffer_start_) + target_pos; ++ *reinterpret_cast(buffer_start_ + pos) = imm; ++ return; ++ } ++ Instr instr = instr_at(pos); ++ if ((instr & ~kImm16Mask) == 0) { ++ DCHECK(target_pos == kEndOfChain || target_pos >= 0); ++ // Emitted label constant, not part of a branch. ++ // Make label relative to Code pointer of generated Code object. ++ instr_at_put(pos, target_pos + (Code::kHeaderSize - kHeapObjectTag)); ++ return; ++ } ++ ++ if (IsBranch(instr)) { ++ instr = SetBranchOffset(pos, target_pos, instr); ++ instr_at_put(pos, instr); ++ } else if (0 == 1 /*IsLui(instr)*/) { ++ /* if (IsPcAddi(instr, t8, 16)) { ++ Instr instr_lui = instr_at(pos + 0 * kInstrSize); ++ Instr instr_ori = instr_at(pos + 2 * kInstrSize); ++ DCHECK(IsLui(instr_lui)); ++ DCHECK(IsOri(instr_ori)); ++ int32_t imm = target_pos - (pos + Assembler::kLongBranchPCOffset); ++ DCHECK_EQ(imm & 3, 0); ++ if (is_int16(imm + Assembler::kLongBranchPCOffset - ++ Assembler::kBranchPCOffset)) { ++ // Optimize by converting to regular branch and link with 16-bit ++ // offset. ++ Instr instr_b = REGIMM | BGEZAL; // Branch and link. ++ instr_b = SetBranchOffset(pos, target_pos, instr_b); ++ // Correct ra register to point to one instruction after jalr from ++ // TurboAssembler::BranchAndLinkLong. ++ Instr instr_a = DADDIU | ra.code() << kRsShift | ra.code() << kRtShift ++ | kOptimizedBranchAndLinkLongReturnOffset; ++ ++ instr_at_put(pos, instr_b); ++ instr_at_put(pos + 1 * kInstrSize, instr_a); ++ } else { ++ instr_lui &= ~kImm16Mask; ++ instr_ori &= ~kImm16Mask; ++ ++ instr_at_put(pos + 0 * kInstrSize, ++ instr_lui | ((imm >> kLuiShift) & kImm16Mask)); ++ instr_at_put(pos + 2 * kInstrSize, instr_ori | (imm & kImm16Mask)); ++ } ++ } else { ++ Instr instr_lui = instr_at(pos + 0 * kInstrSize); ++ Instr instr_ori = instr_at(pos + 1 * kInstrSize); ++ Instr instr_ori2 = instr_at(pos + 3 * kInstrSize); ++ DCHECK(IsOri(instr_ori)); ++ DCHECK(IsOri(instr_ori2)); ++ ++ uint64_t imm = reinterpret_cast(buffer_start_) + target_pos; ++ DCHECK_EQ(imm & 3, 0); ++ ++ instr_lui &= ~kImm16Mask; ++ instr_ori &= ~kImm16Mask; ++ instr_ori2 &= ~kImm16Mask; ++ ++ instr_at_put(pos + 0 * kInstrSize, ++ instr_lui | ((imm >> 32) & kImm16Mask)); ++ instr_at_put(pos + 1 * kInstrSize, ++ instr_ori | ((imm >> 16) & kImm16Mask)); ++ instr_at_put(pos + 3 * kInstrSize, instr_ori2 | (imm & kImm16Mask)); ++ }*/ ++ } else if (IsPcAddi(instr, t8, 16)) { ++ abort(); /* ++ Instr instr_lu12i_w = instr_at(pos + 1 * kInstrSize); ++ Instr instr_ori = instr_at(pos + 2 * kInstrSize); ++ DCHECK(IsLu12i_w(instr_lu12i_w)); ++ //DCHECK(IsOri(instr_ori)); ++ ++ int32_t imm_short = target_pos - (pos + Assembler::kBranchPCOffset); ++ ++ if (is_int21(imm_short)) { ++ // Optimize by converting to regular branch with 21-bit ++ // offset ++ Instr instr_b = B; ++ instr_b = SetBranchOffset(pos, target_pos, instr_b); ++ ++ instr_at_put(pos, instr_b); ++ } else { ++ int32_t imm = target_pos - (pos + Assembler::kLongBranchPCOffset); ++ DCHECK_EQ(imm & 3, 0); ++ ++ instr_lu12i_w &= 0xfe00001fu; // opcode:7 | bit20 | rd:5 ++ instr_ori &= 0xffc003ffu; // opcode:10 | bit12 | rj:5 | rd:5 ++ ++ instr_at_put(pos + 1 * kInstrSize, ++ instr_lu12i_w | (((imm >> 12) & 0xfffff) << 5)); ++ instr_at_put(pos + 2 * kInstrSize, instr_ori | ++ ((imm & 0xfff) << 10)); ++ }*/ ++ } else if (IsJ(instr)) { ++ /* ++ int32_t imm28 = target_pos - pos; ++ DCHECK_EQ(imm28 & 3, 0); ++ ++ uint32_t imm26 = static_cast(imm28 >> 2); ++ DCHECK(is_uint26(imm26)); ++ // Place 26-bit signed offset with markings. ++ // When code is committed it will be resolved to j/jal. ++ int32_t mark = IsJ(instr) ? kJRawMark : kJalRawMark; ++ instr_at_put(pos, mark | (imm26 & kImm26Mask));*/ ++ abort(); ++ } else { ++ /* int32_t imm28 = target_pos - pos; ++ DCHECK_EQ(imm28 & 3, 0); ++ ++ uint32_t imm26 = static_cast(imm28 >> 2); ++ DCHECK(is_uint26(imm26)); ++ // Place raw 26-bit signed offset. ++ // When code is committed it will be resolved to j/jal. ++ instr &= ~kImm26Mask; ++ instr_at_put(pos, instr | (imm26 & kImm26Mask));*/ ++ abort(); ++ } ++} ++ ++void Assembler::print(const Label* L) { ++ if (L->is_unused()) { ++ PrintF("unused label\n"); ++ } else if (L->is_bound()) { ++ PrintF("bound label to %d\n", L->pos()); ++ } else if (L->is_linked()) { ++ Label l; ++ l.link_to(L->pos()); ++ PrintF("unbound label"); ++ while (l.is_linked()) { ++ PrintF("@ %d ", l.pos()); ++ Instr instr = instr_at(l.pos()); ++ if ((instr & ~kImm16Mask) == 0) { ++ PrintF("value\n"); ++ } else { ++ PrintF("%d\n", instr); ++ } ++ next(&l, is_internal_reference(&l)); ++ } ++ } else { ++ PrintF("label in inconsistent state (pos = %d)\n", L->pos_); ++ } ++} ++ ++void Assembler::bind_to(Label* L, int pos) { ++ DCHECK(0 <= pos && pos <= pc_offset()); // Must have valid binding position. ++ int trampoline_pos = kInvalidSlotPos; ++ bool is_internal = false; ++ if (L->is_linked() && !trampoline_emitted_) { ++ unbound_labels_count_--; ++ if (!is_internal_reference(L)) { ++ next_buffer_check_ += kTrampolineSlotsSize; ++ } ++ } ++ ++ while (L->is_linked()) { ++ int fixup_pos = L->pos(); ++ int dist = pos - fixup_pos; ++ is_internal = is_internal_reference(L); ++ next(L, is_internal); // Call next before overwriting link with target at ++ // fixup_pos. ++ Instr instr = instr_at(fixup_pos); ++ if (is_internal) { ++ target_at_put(fixup_pos, pos, is_internal); ++ } else { ++ if (IsBranch(instr)) { ++ int branch_offset = BranchOffset(instr); ++ if (dist > branch_offset) { ++ if (trampoline_pos == kInvalidSlotPos) { ++ trampoline_pos = get_trampoline_entry(fixup_pos); ++ CHECK_NE(trampoline_pos, kInvalidSlotPos); ++ } ++ CHECK((trampoline_pos - fixup_pos) <= branch_offset); ++ target_at_put(fixup_pos, trampoline_pos, false); ++ fixup_pos = trampoline_pos; ++ } ++ target_at_put(fixup_pos, pos, false); ++ } else { ++ DCHECK(IsJ(instr) || IsLu12i_w(instr) || IsEmittedConstant(instr) || ++ IsPcAddi(instr, t8, 8)); ++ target_at_put(fixup_pos, pos, false); ++ } ++ } ++ } ++ L->bind_to(pos); ++ ++ // Keep track of the last bound label so we don't eliminate any instructions ++ // before a bound label. ++ if (pos > last_bound_pos_) last_bound_pos_ = pos; ++} ++ ++void Assembler::bind(Label* L) { ++ DCHECK(!L->is_bound()); // Label can only be bound once. ++ bind_to(L, pc_offset()); ++} ++ ++void Assembler::next(Label* L, bool is_internal) { ++ DCHECK(L->is_linked()); ++ int link = target_at(L->pos(), is_internal); ++ if (link == kEndOfChain) { ++ L->Unuse(); ++ } else if (link == -4) { ++ // Next position is pc_offset == 0 ++ L->link_to(0); ++ } else { ++ DCHECK_GE(link, 0); ++ L->link_to(link); ++ } ++} ++ ++bool Assembler::is_near_c(Label* L) { ++ DCHECK(L->is_bound()); ++ return pc_offset() - L->pos() < kMax16BranchOffset - 4 * kInstrSize; ++} ++ ++bool Assembler::is_near(Label* L, OffsetSize bits) { ++ DCHECK(L->is_bound()); ++ return ((pc_offset() - L->pos()) < ++ (1 << (bits + 2 - 1)) - 1 - 5 * kInstrSize); ++} ++ ++bool Assembler::is_near_a(Label* L) { ++ DCHECK(L->is_bound()); ++ return pc_offset() - L->pos() <= kMax26BranchOffset - 4 * kInstrSize; ++} ++ ++int Assembler::BranchOffset(Instr instr) { ++ int bits = OffsetSize::kOffset16; ++ ++ uint32_t opcode = (instr >> 26) << 26; ++ switch (opcode) { ++ case B: ++ case BL: ++ bits = OffsetSize::kOffset26; ++ break; ++ case BNEZ: ++ case BEQZ: ++ case BCZ: ++ bits = OffsetSize::kOffset21; ++ break; ++ case BNE: ++ case BEQ: ++ case BLT: ++ case BGE: ++ case BLTU: ++ case BGEU: ++ case JIRL: ++ bits = OffsetSize::kOffset16; ++ break; ++ default: ++ break; ++ } ++ ++ return (1 << (bits + 2 - 1)) - 1; ++} ++ ++// We have to use a temporary register for things that can be relocated even ++// if they can be encoded in the LA's 16 bits of immediate-offset instruction ++// space. There is no guarantee that the relocated location can be similarly ++// encoded. ++bool Assembler::MustUseReg(RelocInfo::Mode rmode) { ++ return !RelocInfo::IsNone(rmode); ++} ++ ++void Assembler::GenB(Opcode opcode, Register rj, int32_t si21) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ DCHECK((BEQZ == opcode || BNEZ == opcode) && is_int21(si21) && rj.is_valid()); ++ Instr instr = opcode | (si21 & kImm16Mask) << kRkShift | ++ (rj.code() << kRjShift) | ((si21 & 0x1fffff) >> 16); ++ emit(instr); ++} ++ ++void Assembler::GenB(Opcode opcode, CFRegister cj, int32_t si21, bool isEq) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ DCHECK(BCZ == opcode && is_int21(si21)); ++ DCHECK(cj >= 0 && cj <= 7); ++ int32_t sc = (isEq ? cj : cj + 8); ++ Instr instr = opcode | (si21 & kImm16Mask) << kRkShift | (sc << kRjShift) | ++ ((si21 & 0x1fffff) >> 16); ++ emit(instr); ++} ++ ++void Assembler::GenB(Opcode opcode, int32_t si26) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ DCHECK((B == opcode || BL == opcode) && is_int26(si26)); ++ Instr instr = ++ opcode | ((si26 & kImm16Mask) << kRkShift) | ((si26 & kImm26Mask) >> 16); ++ emit(instr); ++} ++ ++void Assembler::GenBJ(Opcode opcode, Register rj, Register rd, int32_t si16) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ DCHECK(is_int16(si16)); ++ Instr instr = opcode | ((si16 & kImm16Mask) << kRkShift) | ++ (rj.code() << kRjShift) | rd.code(); ++ emit(instr); ++} ++ ++void Assembler::GenCmp(Opcode opcode, FPUCondition cond, FPURegister fk, ++ FPURegister fj, CFRegister cd) { ++ DCHECK(opcode == FCMP_COND_S || opcode == FCMP_COND_D); ++ Instr instr = opcode | cond << kCondShift | (fk.code() << kFkShift) | ++ (fj.code() << kFjShift) | cd; ++ emit(instr); ++} ++ ++void Assembler::GenSel(Opcode opcode, CFRegister ca, FPURegister fk, ++ FPURegister fj, FPURegister rd) { ++ DCHECK((opcode == FSEL)); ++ Instr instr = opcode | ca << kCondShift | (fk.code() << kFkShift) | ++ (fj.code() << kFjShift) | rd.code(); ++ emit(instr); ++} ++ ++void Assembler::GenRegister(Opcode opcode, Register rj, Register rd, ++ bool rjrd) { ++ Instr instr = 0; ++ if (rjrd) { ++ instr = opcode | (rj.code() << kRjShift) | rd.code(); ++ } else { ++ DCHECK(opcode == ASRTLE_D || opcode == ASRTGT_D); ++ instr = opcode | (rj.code() << kRkShift) | rd.code() << kRjShift; ++ } ++ emit(instr); ++} ++ ++void Assembler::GenRegister(Opcode opcode, FPURegister fj, FPURegister fd) { ++ Instr instr = opcode | (fj.code() << kFjShift) | fd.code(); ++ emit(instr); ++} ++ ++void Assembler::GenRegister(Opcode opcode, Register rj, FPURegister fd) { ++ DCHECK((opcode == MOVGR2FR_W) || (opcode == MOVGR2FR_D) || ++ (opcode == MOVGR2FRH_W)); ++ Instr instr = opcode | (rj.code() << kRjShift) | fd.code(); ++ emit(instr); ++} ++ ++void Assembler::GenRegister(Opcode opcode, FPURegister fj, Register rd) { ++ DCHECK((opcode == MOVFR2GR_S) || (opcode == MOVFR2GR_D) || ++ (opcode == MOVFRH2GR_S)); ++ Instr instr = opcode | (fj.code() << kFjShift) | rd.code(); ++ emit(instr); ++} ++ ++void Assembler::GenRegister(Opcode opcode, Register rj, FPUControlRegister fd) { ++ DCHECK((opcode == MOVGR2FCSR)); ++ Instr instr = opcode | (rj.code() << kRjShift) | fd.code(); ++ emit(instr); ++} ++ ++void Assembler::GenRegister(Opcode opcode, FPUControlRegister fj, Register rd) { ++ DCHECK((opcode == MOVFCSR2GR)); ++ Instr instr = opcode | (fj.code() << kFjShift) | rd.code(); ++ emit(instr); ++} ++ ++void Assembler::GenRegister(Opcode opcode, FPURegister fj, CFRegister cd) { ++ DCHECK((opcode == MOVFR2CF)); ++ Instr instr = opcode | (fj.code() << kFjShift) | cd; ++ emit(instr); ++} ++ ++void Assembler::GenRegister(Opcode opcode, CFRegister cj, FPURegister fd) { ++ DCHECK((opcode == MOVCF2FR)); ++ Instr instr = opcode | cj << kFjShift | fd.code(); ++ emit(instr); ++} ++ ++void Assembler::GenRegister(Opcode opcode, Register rj, CFRegister cd) { ++ DCHECK((opcode == MOVGR2CF)); ++ Instr instr = opcode | (rj.code() << kRjShift) | cd; ++ emit(instr); ++} ++ ++void Assembler::GenRegister(Opcode opcode, CFRegister cj, Register rd) { ++ DCHECK((opcode == MOVCF2GR)); ++ Instr instr = opcode | cj << kFjShift | rd.code(); ++ emit(instr); ++} ++ ++void Assembler::GenRegister(Opcode opcode, Register rk, Register rj, ++ Register rd) { ++ Instr instr = ++ opcode | (rk.code() << kRkShift) | (rj.code() << kRjShift) | rd.code(); ++ emit(instr); ++} ++ ++void Assembler::GenRegister(Opcode opcode, FPURegister fk, FPURegister fj, ++ FPURegister fd) { ++ Instr instr = ++ opcode | (fk.code() << kFkShift) | (fj.code() << kFjShift) | fd.code(); ++ emit(instr); ++} ++ ++void Assembler::GenRegister(Opcode opcode, FPURegister fa, FPURegister fk, ++ FPURegister fj, FPURegister fd) { ++ Instr instr = opcode | (fa.code() << kFaShift) | (fk.code() << kFkShift) | ++ (fj.code() << kFjShift) | fd.code(); ++ emit(instr); ++} ++ ++void Assembler::GenRegister(Opcode opcode, Register rk, Register rj, ++ FPURegister fd) { ++ Instr instr = ++ opcode | (rk.code() << kRkShift) | (rj.code() << kRjShift) | fd.code(); ++ emit(instr); ++} ++ ++void Assembler::GenImm(Opcode opcode, int32_t bit3, Register rk, Register rj, ++ Register rd) { ++ DCHECK(is_uint3(bit3)); ++ Instr instr = opcode | (bit3 & 0x7) << kSaShift | (rk.code() << kRkShift) | ++ (rj.code() << kRjShift) | rd.code(); ++ emit(instr); ++} ++ ++void Assembler::GenImm(Opcode opcode, int32_t bit6m, int32_t bit6l, Register rj, ++ Register rd) { ++ DCHECK(is_uint6(bit6m) && is_uint6(bit6l)); ++ Instr instr = opcode | (bit6m & 0x3f) << 16 | (bit6l & 0x3f) << kRkShift | ++ (rj.code() << kRjShift) | rd.code(); ++ emit(instr); ++} ++ ++void Assembler::GenImm(Opcode opcode, int32_t bit20, Register rd) { ++ // DCHECK(is_uint20(bit20) || is_int20(bit20)); ++ Instr instr = opcode | (bit20 & 0xfffff) << kRjShift | rd.code(); ++ emit(instr); ++} ++ ++void Assembler::GenImm(Opcode opcode, int32_t bit15) { ++ DCHECK(is_uint15(bit15)); ++ Instr instr = opcode | (bit15 & 0x7fff); ++ emit(instr); ++} ++ ++void Assembler::GenImm(Opcode opcode, int32_t value, Register rj, Register rd, ++ int32_t value_bits) { ++ DCHECK(value_bits == 6 || value_bits == 12 || value_bits == 14 || ++ value_bits == 16); ++ uint32_t imm = value & 0x3f; ++ if (value_bits == 12) { ++ imm = value & kImm12Mask; ++ } else if (value_bits == 14) { ++ imm = value & 0x3fff; ++ } else if (value_bits == 16) { ++ imm = value & kImm16Mask; ++ } ++ Instr instr = opcode | imm << kRkShift | (rj.code() << kRjShift) | rd.code(); ++ emit(instr); ++} ++ ++void Assembler::GenImm(Opcode opcode, int32_t bit12, Register rj, ++ FPURegister fd) { ++ DCHECK(is_int12(bit12)); ++ Instr instr = opcode | ((bit12 & kImm12Mask) << kRkShift) | ++ (rj.code() << kRjShift) | fd.code(); ++ emit(instr); ++} ++ ++// Returns the next free trampoline entry. ++int32_t Assembler::get_trampoline_entry(int32_t pos) { ++ int32_t trampoline_entry = kInvalidSlotPos; ++ if (!internal_trampoline_exception_) { ++ if (trampoline_.start() > pos) { ++ trampoline_entry = trampoline_.take_slot(); ++ } ++ ++ if (kInvalidSlotPos == trampoline_entry) { ++ internal_trampoline_exception_ = true; ++ } ++ } ++ return trampoline_entry; ++} ++ ++uint64_t Assembler::jump_address(Label* L) { ++ int64_t target_pos; ++ if (L->is_bound()) { ++ target_pos = L->pos(); ++ } else { ++ if (L->is_linked()) { ++ target_pos = L->pos(); // L's link. ++ L->link_to(pc_offset()); ++ } else { ++ L->link_to(pc_offset()); ++ return kEndOfJumpChain; ++ } ++ } ++ uint64_t imm = reinterpret_cast(buffer_start_) + target_pos; ++ DCHECK_EQ(imm & 3, 0); ++ ++ return imm; ++} ++ ++uint64_t Assembler::branch_long_offset(Label* L) { ++ int64_t target_pos; ++ ++ if (L->is_bound()) { ++ target_pos = L->pos(); ++ } else { ++ if (L->is_linked()) { ++ target_pos = L->pos(); // L's link. ++ L->link_to(pc_offset()); ++ } else { ++ L->link_to(pc_offset()); ++ return kEndOfJumpChain; ++ } ++ } ++ int64_t offset = target_pos - (pc_offset() + kLongBranchPCOffset); ++ DCHECK_EQ(offset & 3, 0); ++ ++ return static_cast(offset); ++} ++ ++int32_t Assembler::branch_offset_helper(Label* L, OffsetSize bits) { ++ int32_t target_pos; ++ ++ if (L->is_bound()) { ++ target_pos = L->pos(); ++ } else { ++ if (L->is_linked()) { ++ target_pos = L->pos(); ++ L->link_to(pc_offset()); ++ } else { ++ L->link_to(pc_offset()); ++ if (!trampoline_emitted_) { ++ unbound_labels_count_++; ++ next_buffer_check_ -= kTrampolineSlotsSize; ++ } ++ return kEndOfChain; ++ } ++ } ++ ++ int32_t offset = target_pos - (pc_offset() + kBranchPCOffset); ++ DCHECK(is_intn(offset, bits + 2)); ++ DCHECK_EQ(offset & 3, 0); ++ ++ return offset; ++} ++ ++void Assembler::label_at_put(Label* L, int at_offset) { ++ int target_pos; ++ if (L->is_bound()) { ++ target_pos = L->pos(); ++ instr_at_put(at_offset, target_pos + (Code::kHeaderSize - kHeapObjectTag)); ++ } else { ++ if (L->is_linked()) { ++ target_pos = L->pos(); // L's link. ++ int32_t imm18 = target_pos - at_offset; ++ DCHECK_EQ(imm18 & 3, 0); ++ int32_t imm16 = imm18 >> 2; ++ DCHECK(is_int16(imm16)); ++ instr_at_put(at_offset, (imm16 & kImm16Mask)); ++ } else { ++ target_pos = kEndOfChain; ++ instr_at_put(at_offset, 0); ++ if (!trampoline_emitted_) { ++ unbound_labels_count_++; ++ next_buffer_check_ -= kTrampolineSlotsSize; ++ } ++ } ++ L->link_to(at_offset); ++ } ++ // TODO PushBackTrack() ++} ++ ++//------- Branch and jump instructions -------- ++ ++void Assembler::b(int32_t offset) { GenB(B, offset); } ++ ++void Assembler::bl(int32_t offset) { GenB(BL, offset); } ++ ++void Assembler::beq(Register rj, Register rd, int32_t offset) { ++ GenBJ(BEQ, rj, rd, offset); ++} ++ ++void Assembler::bne(Register rj, Register rd, int32_t offset) { ++ GenBJ(BNE, rj, rd, offset); ++} ++ ++void Assembler::blt(Register rj, Register rd, int32_t offset) { ++ GenBJ(BLT, rj, rd, offset); ++} ++ ++void Assembler::bge(Register rj, Register rd, int32_t offset) { ++ GenBJ(BGE, rj, rd, offset); ++} ++ ++void Assembler::bltu(Register rj, Register rd, int32_t offset) { ++ GenBJ(BLTU, rj, rd, offset); ++} ++ ++void Assembler::bgeu(Register rj, Register rd, int32_t offset) { ++ GenBJ(BGEU, rj, rd, offset); ++} ++ ++void Assembler::beqz(Register rj, int32_t offset) { GenB(BEQZ, rj, offset); } ++void Assembler::bnez(Register rj, int32_t offset) { GenB(BNEZ, rj, offset); } ++ ++void Assembler::jirl(Register rd, Register rj, int32_t offset) { ++ GenBJ(JIRL, rj, rd, offset); ++} ++ ++void Assembler::bceqz(CFRegister cj, int32_t si21) { ++ GenB(BCZ, cj, si21, true); ++} ++ ++void Assembler::bcnez(CFRegister cj, int32_t si21) { ++ GenB(BCZ, cj, si21, false); ++} ++ ++// -------Data-processing-instructions--------- ++ ++// Arithmetic. ++void Assembler::add_w(Register rd, Register rj, Register rk) { ++ GenRegister(ADD_W, rk, rj, rd); ++} ++ ++void Assembler::add_d(Register rd, Register rj, Register rk) { ++ GenRegister(ADD_D, rk, rj, rd); ++} ++ ++void Assembler::sub_w(Register rd, Register rj, Register rk) { ++ GenRegister(SUB_W, rk, rj, rd); ++} ++ ++void Assembler::sub_d(Register rd, Register rj, Register rk) { ++ GenRegister(SUB_D, rk, rj, rd); ++} ++ ++void Assembler::addi_w(Register rd, Register rj, int32_t si12) { ++ GenImm(ADDI_W, si12, rj, rd, 12); ++} ++ ++void Assembler::addi_d(Register rd, Register rj, int32_t si12) { ++ GenImm(ADDI_D, si12, rj, rd, 12); ++} ++ ++void Assembler::addu16i_d(Register rd, Register rj, int32_t si16) { ++ GenImm(ADDU16I_D, si16, rj, rd, 16); ++} ++ ++void Assembler::alsl_w(Register rd, Register rj, Register rk, int32_t sa2) { ++ DCHECK(is_uint2(sa2 - 1)); ++ GenImm(ALSL_W, sa2 - 1, rk, rj, rd); ++} ++ ++void Assembler::alsl_wu(Register rd, Register rj, Register rk, int32_t sa2) { ++ DCHECK(is_uint2(sa2 - 1)); ++ GenImm(ALSL_WU, sa2 + 3, rk, rj, rd); ++} ++ ++void Assembler::alsl_d(Register rd, Register rj, Register rk, int32_t sa2) { ++ DCHECK(is_uint2(sa2 - 1)); ++ GenImm(ALSL_D, sa2 - 1, rk, rj, rd); ++} ++ ++void Assembler::lu12i_w(Register rd, int32_t si20) { ++ GenImm(LU12I_W, si20, rd); ++} ++ ++void Assembler::lu32i_d(Register rd, int32_t si20) { ++ GenImm(LU32I_D, si20, rd); ++} ++ ++void Assembler::lu52i_d(Register rd, Register rj, int32_t si12) { ++ GenImm(LU52I_D, si12, rj, rd, 12); ++} ++ ++void Assembler::slt(Register rd, Register rj, Register rk) { ++ GenRegister(SLT, rk, rj, rd); ++} ++ ++void Assembler::sltu(Register rd, Register rj, Register rk) { ++ GenRegister(SLTU, rk, rj, rd); ++} ++ ++void Assembler::slti(Register rd, Register rj, int32_t si12) { ++ GenImm(SLTI, si12, rj, rd, 12); ++} ++ ++void Assembler::sltui(Register rd, Register rj, int32_t si12) { ++ GenImm(SLTUI, si12, rj, rd, 12); ++} ++ ++void Assembler::pcaddi(Register rd, int32_t si20) { GenImm(PCADDI, si20, rd); } ++ ++void Assembler::pcaddu12i(Register rd, int32_t si20) { ++ GenImm(PCADDU12I, si20, rd); ++} ++ ++void Assembler::pcaddu18i(Register rd, int32_t si20) { ++ GenImm(PCADDU18I, si20, rd); ++} ++ ++void Assembler::pcalau12i(Register rd, int32_t si20) { ++ GenImm(PCALAU12I, si20, rd); ++} ++ ++void Assembler::and_(Register rd, Register rj, Register rk) { ++ GenRegister(AND, rk, rj, rd); ++} ++ ++void Assembler::or_(Register rd, Register rj, Register rk) { ++ GenRegister(OR, rk, rj, rd); ++} ++ ++void Assembler::xor_(Register rd, Register rj, Register rk) { ++ GenRegister(XOR, rk, rj, rd); ++} ++ ++void Assembler::nor(Register rd, Register rj, Register rk) { ++ GenRegister(NOR, rk, rj, rd); ++} ++ ++void Assembler::andn(Register rd, Register rj, Register rk) { ++ GenRegister(ANDN, rk, rj, rd); ++} ++ ++void Assembler::orn(Register rd, Register rj, Register rk) { ++ GenRegister(ORN, rk, rj, rd); ++} ++ ++void Assembler::andi(Register rd, Register rj, int32_t ui12) { ++ GenImm(ANDI, ui12, rj, rd, 12); ++} ++ ++void Assembler::ori(Register rd, Register rj, int32_t ui12) { ++ GenImm(ORI, ui12, rj, rd, 12); ++} ++ ++void Assembler::xori(Register rd, Register rj, int32_t ui12) { ++ GenImm(XORI, ui12, rj, rd, 12); ++} ++ ++void Assembler::mul_w(Register rd, Register rj, Register rk) { ++ GenRegister(MUL_W, rk, rj, rd); ++} ++ ++void Assembler::mulh_w(Register rd, Register rj, Register rk) { ++ GenRegister(MULH_W, rk, rj, rd); ++} ++ ++void Assembler::mulh_wu(Register rd, Register rj, Register rk) { ++ GenRegister(MULH_WU, rk, rj, rd); ++} ++ ++void Assembler::mul_d(Register rd, Register rj, Register rk) { ++ GenRegister(MUL_D, rk, rj, rd); ++} ++ ++void Assembler::mulh_d(Register rd, Register rj, Register rk) { ++ GenRegister(MULH_D, rk, rj, rd); ++} ++ ++void Assembler::mulh_du(Register rd, Register rj, Register rk) { ++ GenRegister(MULH_DU, rk, rj, rd); ++} ++ ++void Assembler::mulw_d_w(Register rd, Register rj, Register rk) { ++ GenRegister(MULW_D_W, rk, rj, rd); ++} ++ ++void Assembler::mulw_d_wu(Register rd, Register rj, Register rk) { ++ GenRegister(MULW_D_WU, rk, rj, rd); ++} ++ ++void Assembler::div_w(Register rd, Register rj, Register rk) { ++ GenRegister(DIV_W, rk, rj, rd); ++} ++ ++void Assembler::mod_w(Register rd, Register rj, Register rk) { ++ GenRegister(MOD_W, rk, rj, rd); ++} ++ ++void Assembler::div_wu(Register rd, Register rj, Register rk) { ++ GenRegister(DIV_WU, rk, rj, rd); ++} ++ ++void Assembler::mod_wu(Register rd, Register rj, Register rk) { ++ GenRegister(MOD_WU, rk, rj, rd); ++} ++ ++void Assembler::div_d(Register rd, Register rj, Register rk) { ++ GenRegister(DIV_D, rk, rj, rd); ++} ++ ++void Assembler::mod_d(Register rd, Register rj, Register rk) { ++ GenRegister(MOD_D, rk, rj, rd); ++} ++ ++void Assembler::div_du(Register rd, Register rj, Register rk) { ++ GenRegister(DIV_DU, rk, rj, rd); ++} ++ ++void Assembler::mod_du(Register rd, Register rj, Register rk) { ++ GenRegister(MOD_DU, rk, rj, rd); ++} ++ ++// Shifts. ++void Assembler::sll_w(Register rd, Register rj, Register rk) { ++ GenRegister(SLL_W, rk, rj, rd); ++} ++ ++void Assembler::srl_w(Register rd, Register rj, Register rk) { ++ GenRegister(SRL_W, rk, rj, rd); ++} ++ ++void Assembler::sra_w(Register rd, Register rj, Register rk) { ++ GenRegister(SRA_W, rk, rj, rd); ++} ++ ++void Assembler::rotr_w(Register rd, Register rj, Register rk) { ++ GenRegister(ROTR_W, rk, rj, rd); ++} ++ ++void Assembler::slli_w(Register rd, Register rj, int32_t ui5) { ++ DCHECK(is_uint5(ui5)); ++ GenImm(SLLI_W, ui5 + 0x20, rj, rd, 6); ++} ++ ++void Assembler::srli_w(Register rd, Register rj, int32_t ui5) { ++ DCHECK(is_uint5(ui5)); ++ GenImm(SRLI_W, ui5 + 0x20, rj, rd, 6); ++} ++ ++void Assembler::srai_w(Register rd, Register rj, int32_t ui5) { ++ DCHECK(is_uint5(ui5)); ++ GenImm(SRAI_W, ui5 + 0x20, rj, rd, 6); ++} ++ ++void Assembler::rotri_w(Register rd, Register rj, int32_t ui5) { ++ DCHECK(is_uint5(ui5)); ++ GenImm(ROTRI_W, ui5 + 0x20, rj, rd, 6); ++} ++ ++void Assembler::sll_d(Register rd, Register rj, Register rk) { ++ GenRegister(SLL_D, rk, rj, rd); ++} ++ ++void Assembler::srl_d(Register rd, Register rj, Register rk) { ++ GenRegister(SRL_D, rk, rj, rd); ++} ++ ++void Assembler::sra_d(Register rd, Register rj, Register rk) { ++ GenRegister(SRA_D, rk, rj, rd); ++} ++ ++void Assembler::rotr_d(Register rd, Register rj, Register rk) { ++ GenRegister(ROTR_D, rk, rj, rd); ++} ++ ++void Assembler::slli_d(Register rd, Register rj, int32_t ui6) { ++ GenImm(SLLI_D, ui6, rj, rd, 6); ++} ++ ++void Assembler::srli_d(Register rd, Register rj, int32_t ui6) { ++ GenImm(SRLI_D, ui6, rj, rd, 6); ++} ++ ++void Assembler::srai_d(Register rd, Register rj, int32_t ui6) { ++ GenImm(SRAI_D, ui6, rj, rd, 6); ++} ++ ++void Assembler::rotri_d(Register rd, Register rj, int32_t ui6) { ++ GenImm(ROTRI_D, ui6, rj, rd, 6); ++} ++ ++// Bit twiddling. ++void Assembler::ext_w_b(Register rd, Register rj) { ++ GenRegister(EXT_W_B, rj, rd); ++} ++ ++void Assembler::ext_w_h(Register rd, Register rj) { ++ GenRegister(EXT_W_H, rj, rd); ++} ++ ++void Assembler::clo_w(Register rd, Register rj) { GenRegister(CLO_W, rj, rd); } ++ ++void Assembler::clz_w(Register rd, Register rj) { GenRegister(CLZ_W, rj, rd); } ++ ++void Assembler::cto_w(Register rd, Register rj) { GenRegister(CTO_W, rj, rd); } ++ ++void Assembler::ctz_w(Register rd, Register rj) { GenRegister(CTZ_W, rj, rd); } ++ ++void Assembler::clo_d(Register rd, Register rj) { GenRegister(CLO_D, rj, rd); } ++ ++void Assembler::clz_d(Register rd, Register rj) { GenRegister(CLZ_D, rj, rd); } ++ ++void Assembler::cto_d(Register rd, Register rj) { GenRegister(CTO_D, rj, rd); } ++ ++void Assembler::ctz_d(Register rd, Register rj) { GenRegister(CTZ_D, rj, rd); } ++ ++void Assembler::bytepick_w(Register rd, Register rj, Register rk, int32_t sa2) { ++ DCHECK(is_uint2(sa2)); ++ GenImm(BYTEPICK_W, sa2, rk, rj, rd); ++} ++ ++void Assembler::bytepick_d(Register rd, Register rj, Register rk, int32_t sa3) { ++ GenImm(BYTEPICK_D, sa3, rk, rj, rd); ++} ++ ++void Assembler::revb_2h(Register rd, Register rj) { ++ GenRegister(REVB_2H, rj, rd); ++} ++ ++void Assembler::revb_4h(Register rd, Register rj) { ++ GenRegister(REVB_4H, rj, rd); ++} ++ ++void Assembler::revb_2w(Register rd, Register rj) { ++ GenRegister(REVB_2W, rj, rd); ++} ++ ++void Assembler::revb_d(Register rd, Register rj) { ++ GenRegister(REVB_D, rj, rd); ++} ++ ++void Assembler::revh_2w(Register rd, Register rj) { ++ GenRegister(REVH_2W, rj, rd); ++} ++ ++void Assembler::revh_d(Register rd, Register rj) { ++ GenRegister(REVH_D, rj, rd); ++} ++ ++void Assembler::bitrev_4b(Register rd, Register rj) { ++ GenRegister(BITREV_4B, rj, rd); ++} ++ ++void Assembler::bitrev_8b(Register rd, Register rj) { ++ GenRegister(BITREV_8B, rj, rd); ++} ++ ++void Assembler::bitrev_w(Register rd, Register rj) { ++ GenRegister(BITREV_W, rj, rd); ++} ++ ++void Assembler::bitrev_d(Register rd, Register rj) { ++ GenRegister(BITREV_D, rj, rd); ++} ++ ++void Assembler::bstrins_w(Register rd, Register rj, int32_t msbw, ++ int32_t lsbw) { ++ DCHECK(is_uint5(msbw) && is_uint5(lsbw)); ++ GenImm(BSTR_W, msbw + 0x20, lsbw, rj, rd); ++} ++ ++void Assembler::bstrins_d(Register rd, Register rj, int32_t msbd, ++ int32_t lsbd) { ++ GenImm(BSTRINS_D, msbd, lsbd, rj, rd); ++} ++ ++void Assembler::bstrpick_w(Register rd, Register rj, int32_t msbw, ++ int32_t lsbw) { ++ DCHECK(is_uint5(msbw) && is_uint5(lsbw)); ++ GenImm(BSTR_W, msbw + 0x20, lsbw + 0x20, rj, rd); ++} ++ ++void Assembler::bstrpick_d(Register rd, Register rj, int32_t msbd, ++ int32_t lsbd) { ++ GenImm(BSTRPICK_D, msbd, lsbd, rj, rd); ++} ++ ++void Assembler::maskeqz(Register rd, Register rj, Register rk) { ++ GenRegister(MASKEQZ, rk, rj, rd); ++} ++ ++void Assembler::masknez(Register rd, Register rj, Register rk) { ++ GenRegister(MASKNEZ, rk, rj, rd); ++} ++ ++// Memory-instructions ++void Assembler::ld_b(Register rd, Register rj, int32_t si12) { ++ GenImm(LD_B, si12, rj, rd, 12); ++} ++ ++void Assembler::ld_h(Register rd, Register rj, int32_t si12) { ++ GenImm(LD_H, si12, rj, rd, 12); ++} ++ ++void Assembler::ld_w(Register rd, Register rj, int32_t si12) { ++ GenImm(LD_W, si12, rj, rd, 12); ++} ++ ++void Assembler::ld_d(Register rd, Register rj, int32_t si12) { ++ GenImm(LD_D, si12, rj, rd, 12); ++} ++ ++void Assembler::ld_bu(Register rd, Register rj, int32_t si12) { ++ GenImm(LD_BU, si12, rj, rd, 12); ++} ++ ++void Assembler::ld_hu(Register rd, Register rj, int32_t si12) { ++ GenImm(LD_HU, si12, rj, rd, 12); ++} ++ ++void Assembler::ld_wu(Register rd, Register rj, int32_t si12) { ++ GenImm(LD_WU, si12, rj, rd, 12); ++} ++ ++void Assembler::st_b(Register rd, Register rj, int32_t si12) { ++ GenImm(ST_B, si12, rj, rd, 12); ++} ++ ++void Assembler::st_h(Register rd, Register rj, int32_t si12) { ++ GenImm(ST_H, si12, rj, rd, 12); ++} ++ ++void Assembler::st_w(Register rd, Register rj, int32_t si12) { ++ GenImm(ST_W, si12, rj, rd, 12); ++} ++ ++void Assembler::st_d(Register rd, Register rj, int32_t si12) { ++ GenImm(ST_D, si12, rj, rd, 12); ++} ++ ++void Assembler::ldx_b(Register rd, Register rj, Register rk) { ++ GenRegister(LDX_B, rk, rj, rd); ++} ++ ++void Assembler::ldx_h(Register rd, Register rj, Register rk) { ++ GenRegister(LDX_H, rk, rj, rd); ++} ++ ++void Assembler::ldx_w(Register rd, Register rj, Register rk) { ++ GenRegister(LDX_W, rk, rj, rd); ++} ++ ++void Assembler::ldx_d(Register rd, Register rj, Register rk) { ++ GenRegister(LDX_D, rk, rj, rd); ++} ++ ++void Assembler::ldx_bu(Register rd, Register rj, Register rk) { ++ GenRegister(LDX_BU, rk, rj, rd); ++} ++ ++void Assembler::ldx_hu(Register rd, Register rj, Register rk) { ++ GenRegister(LDX_HU, rk, rj, rd); ++} ++ ++void Assembler::ldx_wu(Register rd, Register rj, Register rk) { ++ GenRegister(LDX_WU, rk, rj, rd); ++} ++ ++void Assembler::stx_b(Register rd, Register rj, Register rk) { ++ GenRegister(STX_B, rk, rj, rd); ++} ++ ++void Assembler::stx_h(Register rd, Register rj, Register rk) { ++ GenRegister(STX_H, rk, rj, rd); ++} ++ ++void Assembler::stx_w(Register rd, Register rj, Register rk) { ++ GenRegister(STX_W, rk, rj, rd); ++} ++ ++void Assembler::stx_d(Register rd, Register rj, Register rk) { ++ GenRegister(STX_D, rk, rj, rd); ++} ++ ++void Assembler::ldptr_w(Register rd, Register rj, int32_t si14) { ++ DCHECK(is_int16(si14) && ((si14 & 0x3) == 0)); ++ GenImm(LDPTR_W, si14 >> 2, rj, rd, 14); ++} ++ ++void Assembler::ldptr_d(Register rd, Register rj, int32_t si14) { ++ DCHECK(is_int16(si14) && ((si14 & 0x3) == 0)); ++ GenImm(LDPTR_D, si14 >> 2, rj, rd, 14); ++} ++ ++void Assembler::stptr_w(Register rd, Register rj, int32_t si14) { ++ DCHECK(is_int16(si14) && ((si14 & 0x3) == 0)); ++ GenImm(STPTR_W, si14 >> 2, rj, rd, 14); ++} ++ ++void Assembler::stptr_d(Register rd, Register rj, int32_t si14) { ++ DCHECK(is_int16(si14) && ((si14 & 0x3) == 0)); ++ GenImm(STPTR_D, si14 >> 2, rj, rd, 14); ++} ++ ++void Assembler::preld(int32_t hint, Register rj, int32_t si12) { ++ DCHECK(is_uint5(hint)); ++ GenImm(PRELD, si12, rj, Register::from_code(hint), 12); ++} ++ ++void Assembler::preldx(int32_t hint, Register rj, Register rk) { ++ DCHECK(is_uint5(hint)); ++ GenRegister(PRELDX, rk, rj, Register::from_code(hint)); ++} ++ ++void Assembler::ldgt_b(Register rd, Register rj, Register rk) { ++ GenRegister(LDGT_B, rk, rj, rd); ++} ++ ++void Assembler::ldgt_h(Register rd, Register rj, Register rk) { ++ GenRegister(LDGT_H, rk, rj, rd); ++} ++ ++void Assembler::ldgt_w(Register rd, Register rj, Register rk) { ++ GenRegister(LDGT_W, rk, rj, rd); ++} ++ ++void Assembler::ldgt_d(Register rd, Register rj, Register rk) { ++ GenRegister(LDGT_D, rk, rj, rd); ++} ++ ++void Assembler::ldle_b(Register rd, Register rj, Register rk) { ++ GenRegister(LDLE_B, rk, rj, rd); ++} ++ ++void Assembler::ldle_h(Register rd, Register rj, Register rk) { ++ GenRegister(LDLE_H, rk, rj, rd); ++} ++ ++void Assembler::ldle_w(Register rd, Register rj, Register rk) { ++ GenRegister(LDLE_W, rk, rj, rd); ++} ++ ++void Assembler::ldle_d(Register rd, Register rj, Register rk) { ++ GenRegister(LDLE_D, rk, rj, rd); ++} ++ ++void Assembler::stgt_b(Register rd, Register rj, Register rk) { ++ GenRegister(STGT_B, rk, rj, rd); ++} ++ ++void Assembler::stgt_h(Register rd, Register rj, Register rk) { ++ GenRegister(STGT_H, rk, rj, rd); ++} ++ ++void Assembler::stgt_w(Register rd, Register rj, Register rk) { ++ GenRegister(STGT_W, rk, rj, rd); ++} ++ ++void Assembler::stgt_d(Register rd, Register rj, Register rk) { ++ GenRegister(STGT_D, rk, rj, rd); ++} ++ ++void Assembler::stle_b(Register rd, Register rj, Register rk) { ++ GenRegister(STLE_B, rk, rj, rd); ++} ++ ++void Assembler::stle_h(Register rd, Register rj, Register rk) { ++ GenRegister(STLE_H, rk, rj, rd); ++} ++ ++void Assembler::stle_w(Register rd, Register rj, Register rk) { ++ GenRegister(STLE_W, rk, rj, rd); ++} ++ ++void Assembler::stle_d(Register rd, Register rj, Register rk) { ++ GenRegister(STLE_D, rk, rj, rd); ++} ++ ++void Assembler::amswap_w(Register rd, Register rk, Register rj) { ++ GenRegister(AMSWAP_W, rk, rj, rd); ++} ++ ++void Assembler::amswap_d(Register rd, Register rk, Register rj) { ++ GenRegister(AMSWAP_D, rk, rj, rd); ++} ++ ++void Assembler::amadd_w(Register rd, Register rk, Register rj) { ++ GenRegister(AMADD_W, rk, rj, rd); ++} ++ ++void Assembler::amadd_d(Register rd, Register rk, Register rj) { ++ GenRegister(AMADD_D, rk, rj, rd); ++} ++ ++void Assembler::amand_w(Register rd, Register rk, Register rj) { ++ GenRegister(AMAND_W, rk, rj, rd); ++} ++ ++void Assembler::amand_d(Register rd, Register rk, Register rj) { ++ GenRegister(AMAND_D, rk, rj, rd); ++} ++ ++void Assembler::amor_w(Register rd, Register rk, Register rj) { ++ GenRegister(AMOR_W, rk, rj, rd); ++} ++ ++void Assembler::amor_d(Register rd, Register rk, Register rj) { ++ GenRegister(AMOR_D, rk, rj, rd); ++} ++ ++void Assembler::amxor_w(Register rd, Register rk, Register rj) { ++ GenRegister(AMXOR_W, rk, rj, rd); ++} ++ ++void Assembler::amxor_d(Register rd, Register rk, Register rj) { ++ GenRegister(AMXOR_D, rk, rj, rd); ++} ++ ++void Assembler::ammax_w(Register rd, Register rk, Register rj) { ++ GenRegister(AMMAX_W, rk, rj, rd); ++} ++ ++void Assembler::ammax_d(Register rd, Register rk, Register rj) { ++ GenRegister(AMMAX_D, rk, rj, rd); ++} ++ ++void Assembler::ammin_w(Register rd, Register rk, Register rj) { ++ GenRegister(AMMIN_W, rk, rj, rd); ++} ++ ++void Assembler::ammin_d(Register rd, Register rk, Register rj) { ++ GenRegister(AMMIN_D, rk, rj, rd); ++} ++ ++void Assembler::ammax_wu(Register rd, Register rk, Register rj) { ++ GenRegister(AMMAX_WU, rk, rj, rd); ++} ++ ++void Assembler::ammax_du(Register rd, Register rk, Register rj) { ++ GenRegister(AMMAX_DU, rk, rj, rd); ++} ++ ++void Assembler::ammin_wu(Register rd, Register rk, Register rj) { ++ GenRegister(AMMIN_WU, rk, rj, rd); ++} ++ ++void Assembler::ammin_du(Register rd, Register rk, Register rj) { ++ GenRegister(AMMIN_DU, rk, rj, rd); ++} ++ ++void Assembler::amswap_db_w(Register rd, Register rk, Register rj) { ++ GenRegister(AMSWAP_DB_W, rk, rj, rd); ++} ++ ++void Assembler::amswap_db_d(Register rd, Register rk, Register rj) { ++ GenRegister(AMSWAP_DB_D, rk, rj, rd); ++} ++ ++void Assembler::amadd_db_w(Register rd, Register rk, Register rj) { ++ GenRegister(AMADD_DB_W, rk, rj, rd); ++} ++ ++void Assembler::amadd_db_d(Register rd, Register rk, Register rj) { ++ GenRegister(AMADD_DB_D, rk, rj, rd); ++} ++ ++void Assembler::amand_db_w(Register rd, Register rk, Register rj) { ++ GenRegister(AMAND_DB_W, rk, rj, rd); ++} ++ ++void Assembler::amand_db_d(Register rd, Register rk, Register rj) { ++ GenRegister(AMAND_DB_D, rk, rj, rd); ++} ++ ++void Assembler::amor_db_w(Register rd, Register rk, Register rj) { ++ GenRegister(AMOR_DB_W, rk, rj, rd); ++} ++ ++void Assembler::amor_db_d(Register rd, Register rk, Register rj) { ++ GenRegister(AMOR_DB_D, rk, rj, rd); ++} ++ ++void Assembler::amxor_db_w(Register rd, Register rk, Register rj) { ++ GenRegister(AMXOR_DB_W, rk, rj, rd); ++} ++ ++void Assembler::amxor_db_d(Register rd, Register rk, Register rj) { ++ GenRegister(AMXOR_DB_D, rk, rj, rd); ++} ++ ++void Assembler::ammax_db_w(Register rd, Register rk, Register rj) { ++ GenRegister(AMMAX_DB_W, rk, rj, rd); ++} ++ ++void Assembler::ammax_db_d(Register rd, Register rk, Register rj) { ++ GenRegister(AMMAX_DB_D, rk, rj, rd); ++} ++ ++void Assembler::ammin_db_w(Register rd, Register rk, Register rj) { ++ GenRegister(AMMIN_DB_W, rk, rj, rd); ++} ++ ++void Assembler::ammin_db_d(Register rd, Register rk, Register rj) { ++ GenRegister(AMMIN_DB_D, rk, rj, rd); ++} ++ ++void Assembler::ammax_db_wu(Register rd, Register rk, Register rj) { ++ GenRegister(AMMAX_DB_WU, rk, rj, rd); ++} ++ ++void Assembler::ammax_db_du(Register rd, Register rk, Register rj) { ++ GenRegister(AMMAX_DB_DU, rk, rj, rd); ++} ++ ++void Assembler::ammin_db_wu(Register rd, Register rk, Register rj) { ++ GenRegister(AMMIN_DB_WU, rk, rj, rd); ++} ++ ++void Assembler::ammin_db_du(Register rd, Register rk, Register rj) { ++ GenRegister(AMMIN_DB_DU, rk, rj, rd); ++} ++ ++void Assembler::ll_w(Register rd, Register rj, int32_t si14) { ++ DCHECK(is_int16(si14) && ((si14 & 0x3) == 0)); ++ GenImm(LL_W, si14 >> 2, rj, rd, 14); ++} ++ ++void Assembler::ll_d(Register rd, Register rj, int32_t si14) { ++ DCHECK(is_int16(si14) && ((si14 & 0x3) == 0)); ++ GenImm(LL_D, si14 >> 2, rj, rd, 14); ++} ++ ++void Assembler::sc_w(Register rd, Register rj, int32_t si14) { ++ DCHECK(is_int16(si14) && ((si14 & 0x3) == 0)); ++ GenImm(SC_W, si14 >> 2, rj, rd, 14); ++} ++ ++void Assembler::sc_d(Register rd, Register rj, int32_t si14) { ++ DCHECK(is_int16(si14) && ((si14 & 0x3) == 0)); ++ GenImm(SC_D, si14 >> 2, rj, rd, 14); ++} ++ ++void Assembler::dbar(int32_t hint) { GenImm(DBAR, hint); } ++ ++void Assembler::ibar(int32_t hint) { GenImm(IBAR, hint); } ++ ++void Assembler::crc_w_b_w(Register rd, Register rj, Register rk) { ++ GenRegister(CRC_W_B_W, rk, rj, rd); ++} ++ ++void Assembler::crc_w_h_w(Register rd, Register rj, Register rk) { ++ GenRegister(CRC_W_H_W, rk, rj, rd); ++} ++ ++void Assembler::crc_w_w_w(Register rd, Register rj, Register rk) { ++ GenRegister(CRC_W_W_W, rk, rj, rd); ++} ++ ++void Assembler::crc_w_d_w(Register rd, Register rj, Register rk) { ++ GenRegister(CRC_W_D_W, rk, rj, rd); ++} ++ ++void Assembler::crcc_w_b_w(Register rd, Register rj, Register rk) { ++ GenRegister(CRCC_W_B_W, rk, rj, rd); ++} ++ ++void Assembler::crcc_w_h_w(Register rd, Register rj, Register rk) { ++ GenRegister(CRCC_W_H_W, rk, rj, rd); ++} ++ ++void Assembler::crcc_w_w_w(Register rd, Register rj, Register rk) { ++ GenRegister(CRCC_W_W_W, rk, rj, rd); ++} ++ ++void Assembler::crcc_w_d_w(Register rd, Register rj, Register rk) { ++ GenRegister(CRCC_W_D_W, rk, rj, rd); ++} ++ ++void Assembler::syscall(int32_t code) { GenImm(SYSCALL, code); } ++ ++void Assembler::asrtle_d(Register rj, Register rk) { ++ GenRegister(ASRTLE_D, rk, rj, false); ++} ++ ++void Assembler::asrtgt_d(Register rj, Register rk) { ++ GenRegister(ASRTGT_D, rk, rj, false); ++} ++ ++void Assembler::rdtimel_w(Register rd, Register rj) { ++ GenRegister(RDTIMEL_W, rj, rd); ++} ++ ++void Assembler::rdtimeh_w(Register rd, Register rj) { ++ GenRegister(RDTIMEH_W, rj, rd); ++} ++ ++void Assembler::rdtime_d(Register rd, Register rj) { ++ GenRegister(RDTIME_D, rj, rd); ++} ++ ++void Assembler::cpucfg(Register rd, Register rj) { ++ GenRegister(CPUCFG_W, rj, rd); ++} ++ ++// Break / Trap instructions. ++void Assembler::break_(uint32_t code, bool break_as_stop) { ++ DCHECK( ++ (break_as_stop && code <= kMaxStopCode && code > kMaxWatchpointCode) || ++ (!break_as_stop && (code > kMaxStopCode || code <= kMaxWatchpointCode))); ++ GenImm(BREAK, code); ++} ++ ++void Assembler::stop(uint32_t code) { ++ DCHECK_GT(code, kMaxWatchpointCode); ++ DCHECK_LE(code, kMaxStopCode); ++#if defined(V8_HOST_ARCH_LA64) ++ break_(0x4321); ++#else // V8_HOST_ARCH_LA64 ++ break_(code, true); ++#endif ++} ++ ++void Assembler::fadd_s(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FADD_S, fk, fj, fd); ++} ++ ++void Assembler::fadd_d(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FADD_D, fk, fj, fd); ++} ++ ++void Assembler::fsub_s(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FSUB_S, fk, fj, fd); ++} ++ ++void Assembler::fsub_d(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FSUB_D, fk, fj, fd); ++} ++ ++void Assembler::fmul_s(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FMUL_S, fk, fj, fd); ++} ++ ++void Assembler::fmul_d(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FMUL_D, fk, fj, fd); ++} ++ ++void Assembler::fdiv_s(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FDIV_S, fk, fj, fd); ++} ++ ++void Assembler::fdiv_d(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FDIV_D, fk, fj, fd); ++} ++ ++void Assembler::fmadd_s(FPURegister fd, FPURegister fj, FPURegister fk, ++ FPURegister fa) { ++ GenRegister(FMADD_S, fa, fk, fj, fd); ++} ++ ++void Assembler::fmadd_d(FPURegister fd, FPURegister fj, FPURegister fk, ++ FPURegister fa) { ++ GenRegister(FMADD_D, fa, fk, fj, fd); ++} ++ ++void Assembler::fmsub_s(FPURegister fd, FPURegister fj, FPURegister fk, ++ FPURegister fa) { ++ GenRegister(FMSUB_S, fa, fk, fj, fd); ++} ++ ++void Assembler::fmsub_d(FPURegister fd, FPURegister fj, FPURegister fk, ++ FPURegister fa) { ++ GenRegister(FMSUB_D, fa, fk, fj, fd); ++} ++ ++void Assembler::fnmadd_s(FPURegister fd, FPURegister fj, FPURegister fk, ++ FPURegister fa) { ++ GenRegister(FNMADD_S, fa, fk, fj, fd); ++} ++ ++void Assembler::fnmadd_d(FPURegister fd, FPURegister fj, FPURegister fk, ++ FPURegister fa) { ++ GenRegister(FNMADD_D, fa, fk, fj, fd); ++} ++ ++void Assembler::fnmsub_s(FPURegister fd, FPURegister fj, FPURegister fk, ++ FPURegister fa) { ++ GenRegister(FNMSUB_S, fa, fk, fj, fd); ++} ++ ++void Assembler::fnmsub_d(FPURegister fd, FPURegister fj, FPURegister fk, ++ FPURegister fa) { ++ GenRegister(FNMSUB_D, fa, fk, fj, fd); ++} ++ ++void Assembler::fmax_s(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FMAX_S, fk, fj, fd); ++} ++ ++void Assembler::fmax_d(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FMAX_D, fk, fj, fd); ++} ++ ++void Assembler::fmin_s(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FMIN_S, fk, fj, fd); ++} ++ ++void Assembler::fmin_d(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FMIN_D, fk, fj, fd); ++} ++ ++void Assembler::fmaxa_s(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FMAXA_S, fk, fj, fd); ++} ++ ++void Assembler::fmaxa_d(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FMAXA_D, fk, fj, fd); ++} ++ ++void Assembler::fmina_s(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FMINA_S, fk, fj, fd); ++} ++ ++void Assembler::fmina_d(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FMINA_D, fk, fj, fd); ++} ++ ++void Assembler::fabs_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FABS_S, fj, fd); ++} ++ ++void Assembler::fabs_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FABS_D, fj, fd); ++} ++ ++void Assembler::fneg_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FNEG_S, fj, fd); ++} ++ ++void Assembler::fneg_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FNEG_D, fj, fd); ++} ++ ++void Assembler::fsqrt_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FSQRT_S, fj, fd); ++} ++ ++void Assembler::fsqrt_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FSQRT_D, fj, fd); ++} ++ ++void Assembler::frecip_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FRECIP_S, fj, fd); ++} ++ ++void Assembler::frecip_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FRECIP_D, fj, fd); ++} ++ ++void Assembler::frsqrt_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FRSQRT_S, fj, fd); ++} ++ ++void Assembler::frsqrt_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FRSQRT_D, fj, fd); ++} ++ ++void Assembler::fscaleb_s(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FSCALEB_S, fk, fj, fd); ++} ++ ++void Assembler::fscaleb_d(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FSCALEB_D, fk, fj, fd); ++} ++ ++void Assembler::flogb_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FLOGB_S, fj, fd); ++} ++ ++void Assembler::flogb_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FLOGB_D, fj, fd); ++} ++ ++void Assembler::fcopysign_s(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FCOPYSIGN_S, fk, fj, fd); ++} ++ ++void Assembler::fcopysign_d(FPURegister fd, FPURegister fj, FPURegister fk) { ++ GenRegister(FCOPYSIGN_D, fk, fj, fd); ++} ++ ++void Assembler::fclass_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FCLASS_S, fj, fd); ++} ++ ++void Assembler::fclass_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FCLASS_D, fj, fd); ++} ++ ++void Assembler::fcmp_cond_s(FPUCondition cc, FPURegister fj, FPURegister fk, ++ CFRegister cd) { ++ GenCmp(FCMP_COND_S, cc, fk, fj, cd); ++} ++ ++void Assembler::fcmp_cond_d(FPUCondition cc, FPURegister fj, FPURegister fk, ++ CFRegister cd) { ++ GenCmp(FCMP_COND_D, cc, fk, fj, cd); ++} ++ ++void Assembler::fcvt_s_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FCVT_S_D, fj, fd); ++} ++ ++void Assembler::fcvt_d_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FCVT_D_S, fj, fd); ++} ++ ++void Assembler::ffint_s_w(FPURegister fd, FPURegister fj) { ++ GenRegister(FFINT_S_W, fj, fd); ++} ++ ++void Assembler::ffint_s_l(FPURegister fd, FPURegister fj) { ++ GenRegister(FFINT_S_L, fj, fd); ++} ++ ++void Assembler::ffint_d_w(FPURegister fd, FPURegister fj) { ++ GenRegister(FFINT_D_W, fj, fd); ++} ++ ++void Assembler::ffint_d_l(FPURegister fd, FPURegister fj) { ++ GenRegister(FFINT_D_L, fj, fd); ++} ++ ++void Assembler::ftint_w_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINT_W_S, fj, fd); ++} ++ ++void Assembler::ftint_w_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINT_W_D, fj, fd); ++} ++ ++void Assembler::ftint_l_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINT_L_S, fj, fd); ++} ++ ++void Assembler::ftint_l_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINT_L_D, fj, fd); ++} ++ ++void Assembler::ftintrm_w_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINTRM_W_S, fj, fd); ++} ++ ++void Assembler::ftintrm_w_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINTRM_W_D, fj, fd); ++} ++ ++void Assembler::ftintrm_l_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINTRM_L_S, fj, fd); ++} ++ ++void Assembler::ftintrm_l_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINTRM_L_D, fj, fd); ++} ++ ++void Assembler::ftintrp_w_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINTRP_W_S, fj, fd); ++} ++ ++void Assembler::ftintrp_w_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINTRP_W_D, fj, fd); ++} ++ ++void Assembler::ftintrp_l_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINTRP_L_S, fj, fd); ++} ++ ++void Assembler::ftintrp_l_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINTRP_L_D, fj, fd); ++} ++ ++void Assembler::ftintrz_w_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINTRZ_W_S, fj, fd); ++} ++ ++void Assembler::ftintrz_w_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINTRZ_W_D, fj, fd); ++} ++ ++void Assembler::ftintrz_l_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINTRZ_L_S, fj, fd); ++} ++ ++void Assembler::ftintrz_l_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINTRZ_L_D, fj, fd); ++} ++ ++void Assembler::ftintrne_w_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINTRNE_W_S, fj, fd); ++} ++ ++void Assembler::ftintrne_w_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINTRNE_W_D, fj, fd); ++} ++ ++void Assembler::ftintrne_l_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINTRNE_L_S, fj, fd); ++} ++ ++void Assembler::ftintrne_l_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FTINTRNE_L_D, fj, fd); ++} ++ ++void Assembler::frint_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FRINT_S, fj, fd); ++} ++ ++void Assembler::frint_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FRINT_D, fj, fd); ++} ++ ++void Assembler::fmov_s(FPURegister fd, FPURegister fj) { ++ GenRegister(FMOV_S, fj, fd); ++} ++ ++void Assembler::fmov_d(FPURegister fd, FPURegister fj) { ++ GenRegister(FMOV_D, fj, fd); ++} ++ ++void Assembler::fsel(CFRegister ca, FPURegister fd, FPURegister fj, ++ FPURegister fk) { ++ GenSel(FSEL, ca, fk, fj, fd); ++} ++ ++void Assembler::movgr2fr_w(FPURegister fd, Register rj) { ++ GenRegister(MOVGR2FR_W, rj, fd); ++} ++ ++void Assembler::movgr2fr_d(FPURegister fd, Register rj) { ++ GenRegister(MOVGR2FR_D, rj, fd); ++} ++ ++void Assembler::movgr2frh_w(FPURegister fd, Register rj) { ++ GenRegister(MOVGR2FRH_W, rj, fd); ++} ++ ++void Assembler::movfr2gr_s(Register rd, FPURegister fj) { ++ GenRegister(MOVFR2GR_S, fj, rd); ++} ++ ++void Assembler::movfr2gr_d(Register rd, FPURegister fj) { ++ GenRegister(MOVFR2GR_D, fj, rd); ++} ++ ++void Assembler::movfrh2gr_s(Register rd, FPURegister fj) { ++ GenRegister(MOVFRH2GR_S, fj, rd); ++} ++ ++void Assembler::movgr2fcsr(Register rj) { GenRegister(MOVGR2FCSR, rj, FCSR); } ++ ++void Assembler::movfcsr2gr(Register rd) { GenRegister(MOVFCSR2GR, FCSR, rd); } ++ ++void Assembler::movfr2cf(CFRegister cd, FPURegister fj) { ++ GenRegister(MOVFR2CF, fj, cd); ++} ++ ++void Assembler::movcf2fr(FPURegister fd, CFRegister cj) { ++ GenRegister(MOVCF2FR, cj, fd); ++} ++ ++void Assembler::movgr2cf(CFRegister cd, Register rj) { ++ GenRegister(MOVGR2CF, rj, cd); ++} ++ ++void Assembler::movcf2gr(Register rd, CFRegister cj) { ++ GenRegister(MOVCF2GR, cj, rd); ++} ++ ++void Assembler::fld_s(FPURegister fd, Register rj, int32_t si12) { ++ GenImm(FLD_S, si12, rj, fd); ++} ++ ++void Assembler::fld_d(FPURegister fd, Register rj, int32_t si12) { ++ GenImm(FLD_D, si12, rj, fd); ++} ++ ++void Assembler::fst_s(FPURegister fd, Register rj, int32_t si12) { ++ GenImm(FST_S, si12, rj, fd); ++} ++ ++void Assembler::fst_d(FPURegister fd, Register rj, int32_t si12) { ++ GenImm(FST_D, si12, rj, fd); ++} ++ ++void Assembler::fldx_s(FPURegister fd, Register rj, Register rk) { ++ GenRegister(FLDX_S, rk, rj, fd); ++} ++ ++void Assembler::fldx_d(FPURegister fd, Register rj, Register rk) { ++ GenRegister(FLDX_D, rk, rj, fd); ++} ++ ++void Assembler::fstx_s(FPURegister fd, Register rj, Register rk) { ++ GenRegister(FSTX_S, rk, rj, fd); ++} ++ ++void Assembler::fstx_d(FPURegister fd, Register rj, Register rk) { ++ GenRegister(FSTX_D, rk, rj, fd); ++} ++ ++void Assembler::fldgt_s(FPURegister fd, Register rj, Register rk) { ++ GenRegister(FLDGT_S, rk, rj, fd); ++} ++ ++void Assembler::fldgt_d(FPURegister fd, Register rj, Register rk) { ++ GenRegister(FLDGT_D, rk, rj, fd); ++} ++ ++void Assembler::fldle_s(FPURegister fd, Register rj, Register rk) { ++ GenRegister(FLDLE_S, rk, rj, fd); ++} ++ ++void Assembler::fldle_d(FPURegister fd, Register rj, Register rk) { ++ GenRegister(FLDLE_D, rk, rj, fd); ++} ++ ++void Assembler::fstgt_s(FPURegister fd, Register rj, Register rk) { ++ GenRegister(FSTGT_S, rk, rj, fd); ++} ++ ++void Assembler::fstgt_d(FPURegister fd, Register rj, Register rk) { ++ GenRegister(FSTGT_D, rk, rj, fd); ++} ++ ++void Assembler::fstle_s(FPURegister fd, Register rj, Register rk) { ++ GenRegister(FSTLE_S, rk, rj, fd); ++} ++ ++void Assembler::fstle_d(FPURegister fd, Register rj, Register rk) { ++ GenRegister(FSTLE_D, rk, rj, fd); ++} ++ ++// ------------Memory-instructions------------- ++ ++/*void Assembler::AdjustBaseAndOffset(MemOperand* src, ++ OffsetAccessType access_type, ++ int second_access_add_to_offset) { ++ // TODO should be optimized. ++ // This method is used to adjust the base register and offset pair ++ // for a load/store when the offset doesn't fit into int12_t. ++ ++ bool doubleword_aligned = (src->offset() & (kDoubleSize - 1)) == 0; ++ bool two_accesses = static_cast(access_type) || !doubleword_aligned; ++ DCHECK_LE(second_access_add_to_offset, 7); // Must be <= 7. ++ ++ // is_int12 must be passed a signed value, hence the static cast below. ++ if (is_int12(src->offset()) && ++ (!two_accesses || is_int12(static_cast( ++ src->offset() + second_access_add_to_offset)))) { ++ // Nothing to do: 'offset' (and, if needed, 'offset + 4', or other specified ++ // value) fits into int16_t. ++ return; ++ } ++ ++ DCHECK(src->rm() != ++ at); // Must not overwrite the register 'base' while loading 'offset'. ++ ++#ifdef DEBUG ++ // Remember the "(mis)alignment" of 'offset', it will be checked at the end. ++ uint32_t misalignment = src->offset() & (kDoubleSize - 1); ++#endif ++ ++ // Do not load the whole 32-bit 'offset' if it can be represented as ++ // a sum of two 16-bit signed offsets. This can save an instruction or two. ++ // To simplify matters, only do this for a symmetric range of offsets from ++ // about -64KB to about +64KB, allowing further addition of 4 when accessing ++ // 64-bit variables with two 32-bit accesses. ++ constexpr int32_t kMinOffsetForSimpleAdjustment = ++ 0x7FF8; // Max int16_t that's a multiple of 8. ++ constexpr int32_t kMaxOffsetForSimpleAdjustment = ++ 2 * kMinOffsetForSimpleAdjustment; ++ ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ if (0 <= src->offset() && src->offset() <= kMaxOffsetForSimpleAdjustment) { ++ daddiu(scratch, src->rm(), kMinOffsetForSimpleAdjustment); ++ src->offset_ -= kMinOffsetForSimpleAdjustment; ++ } else if (-kMaxOffsetForSimpleAdjustment <= src->offset() && ++ src->offset() < 0) { ++ daddiu(scratch, src->rm(), -kMinOffsetForSimpleAdjustment); ++ src->offset_ += kMinOffsetForSimpleAdjustment; ++ } else if (kArchVariant == kMips64r6) { ++ // On r6 take advantage of the daui instruction, e.g.: ++ // daui at, base, offset_high ++ // [dahi at, 1] // When `offset` is close to +2GB. ++ // lw reg_lo, offset_low(at) ++ // [lw reg_hi, (offset_low+4)(at)] // If misaligned 64-bit load. ++ // or when offset_low+4 overflows int16_t: ++ // daui at, base, offset_high ++ // daddiu at, at, 8 ++ // lw reg_lo, (offset_low-8)(at) ++ // lw reg_hi, (offset_low-4)(at) ++ int16_t offset_low = static_cast(src->offset()); ++ int32_t offset_low32 = offset_low; ++ int16_t offset_high = static_cast(src->offset() >> 16); ++ bool increment_hi16 = offset_low < 0; ++ bool overflow_hi16 = false; ++ ++ if (increment_hi16) { ++ offset_high++; ++ overflow_hi16 = (offset_high == -32768); ++ } ++ daui(scratch, src->rm(), static_cast(offset_high)); ++ ++ if (overflow_hi16) { ++ dahi(scratch, 1); ++ } ++ ++ if (two_accesses && !is_int16(static_cast( ++ offset_low32 + second_access_add_to_offset))) { ++ // Avoid overflow in the 16-bit offset of the load/store instruction when ++ // adding 4. ++ daddiu(scratch, scratch, kDoubleSize); ++ offset_low32 -= kDoubleSize; ++ } ++ ++ src->offset_ = offset_low32; ++ } else { ++ // Do not load the whole 32-bit 'offset' if it can be represented as ++ // a sum of three 16-bit signed offsets. This can save an instruction. ++ // To simplify matters, only do this for a symmetric range of offsets from ++ // about -96KB to about +96KB, allowing further addition of 4 when accessing ++ // 64-bit variables with two 32-bit accesses. ++ constexpr int32_t kMinOffsetForMediumAdjustment = ++ 2 * kMinOffsetForSimpleAdjustment; ++ constexpr int32_t kMaxOffsetForMediumAdjustment = ++ 3 * kMinOffsetForSimpleAdjustment; ++ if (0 <= src->offset() && src->offset() <= kMaxOffsetForMediumAdjustment) { ++ daddiu(scratch, src->rm(), kMinOffsetForMediumAdjustment / 2); ++ daddiu(scratch, scratch, kMinOffsetForMediumAdjustment / 2); ++ src->offset_ -= kMinOffsetForMediumAdjustment; ++ } else if (-kMaxOffsetForMediumAdjustment <= src->offset() && ++ src->offset() < 0) { ++ daddiu(scratch, src->rm(), -kMinOffsetForMediumAdjustment / 2); ++ daddiu(scratch, scratch, -kMinOffsetForMediumAdjustment / 2); ++ src->offset_ += kMinOffsetForMediumAdjustment; ++ } else { ++ // Now that all shorter options have been exhausted, load the full 32-bit ++ // offset. ++ int32_t loaded_offset = RoundDown(src->offset(), kDoubleSize); ++ lui(scratch, (loaded_offset >> kLuiShift) & kImm16Mask); ++ ori(scratch, scratch, loaded_offset & kImm16Mask); // Load 32-bit offset. ++ daddu(scratch, scratch, src->rm()); ++ src->offset_ -= loaded_offset; ++ } ++ } ++ src->rm_ = scratch; ++ ++ DCHECK(is_int16(src->offset())); ++ if (two_accesses) { ++ DCHECK(is_int16( ++ static_cast(src->offset() + second_access_add_to_offset))); ++ } ++ DCHECK(misalignment == (src->offset() & (kDoubleSize - 1))); ++}*/ ++ ++void Assembler::AdjustBaseAndOffset(MemOperand* src) { ++ // is_int12 must be passed a signed value, hence the static cast below. ++ if ((!src->hasIndexReg() && is_int12(src->offset())) || src->hasIndexReg()) { ++ return; ++ } ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ if (is_uint12(static_cast(src->offset()))) { ++ ori(scratch, zero_reg, src->offset() & kImm12Mask); ++ } else { ++ lu12i_w(scratch, src->offset() >> 12 & 0xfffff); ++ if (src->offset() & kImm12Mask) { ++ ori(scratch, scratch, src->offset() & kImm12Mask); ++ } ++ } ++ src->index_ = scratch; ++ src->offset_ = 0; ++ // TODO can be optimized, for example 2 * [int12_min, int12_max] ++ // addi_d scratch base, offset/2 only on instr ++ // base = scratch ++ // offset = offset - offset / 2 ++} ++ ++int Assembler::RelocateInternalReference(RelocInfo::Mode rmode, Address pc, ++ intptr_t pc_delta) { ++ if (RelocInfo::IsInternalReference(rmode)) { ++ int64_t* p = reinterpret_cast(pc); ++ if (*p == kEndOfJumpChain) { ++ return 0; // Number of instructions patched. ++ } ++ *p += pc_delta; ++ return 2; // Number of instructions patched. ++ } ++ abort(); ++ /* Instr instr = instr_at(pc); ++ DCHECK(RelocInfo::IsInternalReferenceEncoded(rmode)); ++ if (IsLui(instr)) { ++ Instr instr_lui = instr_at(pc + 0 * kInstrSize); ++ Instr instr_ori = instr_at(pc + 1 * kInstrSize); ++ Instr instr_ori2 = instr_at(pc + 3 * kInstrSize); ++ DCHECK(IsOri(instr_ori)); ++ DCHECK(IsOri(instr_ori2)); ++ // TODO(plind): symbolic names for the shifts. ++ int64_t imm = (instr_lui & static_cast(kImm16Mask)) << 48; ++ imm |= (instr_ori & static_cast(kImm16Mask)) << 32; ++ imm |= (instr_ori2 & static_cast(kImm16Mask)) << 16; ++ // Sign extend address. ++ imm >>= 16; ++ ++ if (imm == kEndOfJumpChain) { ++ return 0; // Number of instructions patched. ++ } ++ imm += pc_delta; ++ DCHECK_EQ(imm & 3, 0); ++ ++ instr_lui &= ~kImm16Mask; ++ instr_ori &= ~kImm16Mask; ++ instr_ori2 &= ~kImm16Mask; ++ ++ instr_at_put(pc + 0 * kInstrSize, instr_lui | ((imm >> 32) & kImm16Mask)); ++ instr_at_put(pc + 1 * kInstrSize, instr_ori | (imm >> 16 & kImm16Mask)); ++ instr_at_put(pc + 3 * kInstrSize, instr_ori2 | (imm & kImm16Mask)); ++ return 4; // Number of instructions patched. ++ } else if (IsJ(instr) || IsJal(instr)) { ++ // Regular j/jal relocation. ++ uint32_t imm28 = (instr & static_cast(kImm26Mask)) << 2; ++ imm28 += pc_delta; ++ imm28 &= kImm28Mask; ++ instr &= ~kImm26Mask; ++ DCHECK_EQ(imm28 & 3, 0); ++ uint32_t imm26 = static_cast(imm28 >> 2); ++ instr_at_put(pc, instr | (imm26 & kImm26Mask)); ++ return 1; // Number of instructions patched. ++ } else { ++ DCHECK(((instr & kJumpRawMask) == kJRawMark) || ++ ((instr & kJumpRawMask) == kJalRawMark)); ++ // Unbox raw offset and emit j/jal. ++ int32_t imm28 = (instr & static_cast(kImm26Mask)) << 2; ++ // Sign extend 28-bit offset to 32-bit. ++ imm28 = (imm28 << 4) >> 4; ++ uint64_t target = ++ static_cast(imm28) + reinterpret_cast(pc); ++ target &= kImm28Mask; ++ DCHECK_EQ(imm28 & 3, 0); ++ uint32_t imm26 = static_cast(target >> 2); ++ // Check markings whether to emit j or jal. ++ uint32_t unbox = (instr & kJRawMark) ? J : JAL; ++ instr_at_put(pc, unbox | (imm26 & kImm26Mask)); ++ return 1; // Number of instructions patched. ++ }*/ ++} ++ ++void Assembler::GrowBuffer() { ++ // Compute new buffer size. ++ int old_size = buffer_->size(); ++ int new_size = std::min(2 * old_size, old_size + 1 * MB); ++ ++ // Some internal data structures overflow for very large buffers, ++ // they must ensure that kMaximalBufferSize is not too large. ++ if (new_size > kMaximalBufferSize) { ++ V8::FatalProcessOutOfMemory(nullptr, "Assembler::GrowBuffer"); ++ } ++ ++ // Set up new buffer. ++ std::unique_ptr new_buffer = buffer_->Grow(new_size); ++ DCHECK_EQ(new_size, new_buffer->size()); ++ byte* new_start = new_buffer->start(); ++ ++ // Copy the data. ++ intptr_t pc_delta = new_start - buffer_start_; ++ intptr_t rc_delta = (new_start + new_size) - (buffer_start_ + old_size); ++ size_t reloc_size = (buffer_start_ + old_size) - reloc_info_writer.pos(); ++ MemMove(new_start, buffer_start_, pc_offset()); ++ MemMove(reloc_info_writer.pos() + rc_delta, reloc_info_writer.pos(), ++ reloc_size); ++ ++ // Switch buffers. ++ buffer_ = std::move(new_buffer); ++ buffer_start_ = new_start; ++ pc_ += pc_delta; ++ reloc_info_writer.Reposition(reloc_info_writer.pos() + rc_delta, ++ reloc_info_writer.last_pc() + pc_delta); ++ ++ // Relocate runtime entries. ++ Vector instructions{buffer_start_, pc_offset()}; ++ Vector reloc_info{reloc_info_writer.pos(), reloc_size}; ++ for (RelocIterator it(instructions, reloc_info, 0); !it.done(); it.next()) { ++ RelocInfo::Mode rmode = it.rinfo()->rmode(); ++ if (rmode == RelocInfo::INTERNAL_REFERENCE) { ++ RelocateInternalReference(rmode, it.rinfo()->pc(), pc_delta); ++ } ++ } ++ DCHECK(!overflow()); ++} ++ ++void Assembler::db(uint8_t data) { ++ if (!is_buffer_growth_blocked()) { ++ CheckBuffer(); ++ } ++ *reinterpret_cast(pc_) = data; ++ pc_ += sizeof(uint8_t); ++} ++ ++void Assembler::dd(uint32_t data) { ++ if (!is_buffer_growth_blocked()) { ++ CheckBuffer(); ++ } ++ *reinterpret_cast(pc_) = data; ++ pc_ += sizeof(uint32_t); ++} ++ ++void Assembler::dq(uint64_t data) { ++ if (!is_buffer_growth_blocked()) { ++ CheckBuffer(); ++ } ++ *reinterpret_cast(pc_) = data; ++ pc_ += sizeof(uint64_t); ++} ++ ++void Assembler::dd(Label* label) { ++ if (!is_buffer_growth_blocked()) { ++ CheckBuffer(); ++ } ++ uint64_t data; ++ if (label->is_bound()) { ++ data = reinterpret_cast(buffer_start_ + label->pos()); ++ } else { ++ data = jump_address(label); ++ unbound_labels_count_++; ++ internal_reference_positions_.insert(label->pos()); ++ } ++ RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE); ++ EmitHelper(data); ++} ++ ++void Assembler::RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data) { ++ if (!ShouldRecordRelocInfo(rmode)) return; ++ // We do not try to reuse pool constants. ++ RelocInfo rinfo(reinterpret_cast
(pc_), rmode, data, Code()); ++ DCHECK_GE(buffer_space(), kMaxRelocSize); // Too late to grow buffer here. ++ reloc_info_writer.Write(&rinfo); ++} ++ ++void Assembler::BlockTrampolinePoolFor(int instructions) { ++ CheckTrampolinePoolQuick(instructions); ++ BlockTrampolinePoolBefore(pc_offset() + instructions * kInstrSize); ++} ++ ++void Assembler::CheckTrampolinePool() { ++ // Some small sequences of instructions must not be broken up by the ++ // insertion of a trampoline pool; such sequences are protected by setting ++ // either trampoline_pool_blocked_nesting_ or no_trampoline_pool_before_, ++ // which are both checked here. Also, recursive calls to CheckTrampolinePool ++ // are blocked by trampoline_pool_blocked_nesting_. ++ if ((trampoline_pool_blocked_nesting_ > 0) || ++ (pc_offset() < no_trampoline_pool_before_)) { ++ // Emission is currently blocked; make sure we try again as soon as ++ // possible. ++ if (trampoline_pool_blocked_nesting_ > 0) { ++ next_buffer_check_ = pc_offset() + kInstrSize; ++ } else { ++ next_buffer_check_ = no_trampoline_pool_before_; ++ } ++ return; ++ } ++ ++ DCHECK(!trampoline_emitted_); ++ DCHECK_GE(unbound_labels_count_, 0); ++ if (unbound_labels_count_ > 0) { ++ // First we emit jump (2 instructions), then we emit trampoline pool. ++ { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ Label after_pool; ++ b(&after_pool); ++ nop(); // TODO remove this ++ ++ int pool_start = pc_offset(); ++ for (int i = 0; i < unbound_labels_count_; i++) { ++ { ++ b(&after_pool); ++ nop(); // TODO remove this ++ } ++ } ++ nop(); ++ bind(&after_pool); ++ trampoline_ = Trampoline(pool_start, unbound_labels_count_); ++ ++ trampoline_emitted_ = true; ++ // As we are only going to emit trampoline once, we need to prevent any ++ // further emission. ++ next_buffer_check_ = kMaxInt; ++ } ++ } else { ++ // Number of branches to unbound label at this point is zero, so we can ++ // move next buffer check to maximum. ++ next_buffer_check_ = ++ pc_offset() + kMax16BranchOffset - kTrampolineSlotsSize * 16; ++ } ++ return; ++} ++ ++Address Assembler::target_address_at(Address pc) { ++ Instr instr0 = instr_at(pc); ++ Instr instr1 = instr_at(pc + 1 * kInstrSize); ++ Instr instr2 = instr_at(pc + 2 * kInstrSize); ++ ++ // Interpret 4 instructions for address generated by li: See listing in ++ // Assembler::set_target_address_at() just below. ++ DCHECK((IsLu12i_w(instr0) && (IsOri(instr1)) && (IsLu32i_d(instr2)))); ++ ++ // Assemble the 48 bit value. ++ uint64_t hi20 = ((uint64_t)(instr2 >> 5) & 0xfffff) << 32; ++ uint64_t mid20 = ((uint64_t)(instr0 >> 5) & 0xfffff) << 12; ++ uint64_t low12 = ((uint64_t)(instr1 >> 10) & 0xfff); ++ int64_t addr = static_cast(hi20 | mid20 | low12); ++ ++ // Sign extend to get canonical address. ++ addr = (addr << 16) >> 16; ++ // printf("add : 0x%lx 0x%lx 0x%lx 0x%lx\n", addr, hi20, mid20, low12); ++ return static_cast
(addr); ++} ++ ++// On la64, a target address is stored in a 3-instruction sequence: ++// 0: lu12i_w(rd, (j.imm64_ >> 12) & kImm20Mask); ++// 1: ori(rd, rd, j.imm64_ & kImm12Mask); ++// 2: lu32i_d(rd, (j.imm64_ >> 32) & kImm20Mask); ++// ++// Patching the address must replace all the lui & ori instructions, ++// and flush the i-cache. ++// ++// There is an optimization below, which emits a nop when the address ++// fits in just 16 bits. This is unlikely to help, and should be benchmarked, ++// and possibly removed. ++void Assembler::set_target_value_at(Address pc, uint64_t target, ++ ICacheFlushMode icache_flush_mode) { ++ // There is an optimization where only 3 instructions are used to load address ++ // in code on LA64 because only 48-bits of address is effectively used. ++ // It relies on fact the upper [63:48] bits are not used for virtual address ++ // translation and they have to be set according to value of bit 47 in order ++ // get canonical address. ++#ifdef DEBUG ++ // Check we have the result from a li macro-instruction. ++ Instr instr0 = instr_at(pc); ++ Instr instr1 = instr_at(pc + kInstrSize); ++ Instr instr2 = instr_at(pc + kInstrSize * 2); ++ DCHECK(IsLu12i_w(instr0) && IsOri(instr1) && IsLu32i_d(instr2)); ++#endif ++ ++ Instr instr = instr_at(pc); ++ uint32_t rd_code = GetRd(instr); ++ uint32_t* p = reinterpret_cast(pc); ++ ++ // Must use 3 instructions to insure patchable code. ++ // lu12i_w rd, middle-20. ++ // ori rd, rd, low-12. ++ // li32i_d rd, high-20. ++ *p = LU12I_W | (((target >> 12) & 0xfffff) << kRjShift) | rd_code; ++ *(p + 1) = ++ ORI | (target & 0xfff) << kRkShift | (rd_code << kRjShift) | rd_code; ++ *(p + 2) = LU32I_D | (((target >> 32) & 0xfffff) << kRjShift) | rd_code; ++ ++ if (icache_flush_mode != SKIP_ICACHE_FLUSH) { ++ FlushInstructionCache(pc, 3 * kInstrSize); ++ } ++} ++ ++UseScratchRegisterScope::UseScratchRegisterScope(Assembler* assembler) ++ : available_(assembler->GetScratchRegisterList()), ++ old_available_(*available_) {} ++ ++UseScratchRegisterScope::~UseScratchRegisterScope() { ++ *available_ = old_available_; ++} ++ ++Register UseScratchRegisterScope::Acquire() { ++ DCHECK_NOT_NULL(available_); ++ DCHECK_NE(*available_, 0); ++ int index = static_cast(base::bits::CountTrailingZeros32(*available_)); ++ *available_ &= ~(1UL << index); ++ ++ return Register::from_code(index); ++} ++ ++bool UseScratchRegisterScope::hasAvailable() const { return *available_ != 0; } ++ ++} // namespace internal ++} // namespace v8 ++ ++#endif // V8_TARGET_ARCH_LA64 +diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/assembler-la64.h b/src/3rdparty/chromium/v8/src/codegen/la64/assembler-la64.h +new file mode 100644 +index 00000000000..03a0103b1ca +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/codegen/la64/assembler-la64.h +@@ -0,0 +1,1171 @@ ++// Copyright (c) 1994-2006 Sun Microsystems Inc. ++// All Rights Reserved. ++// ++// Redistribution and use in source and binary forms, with or without ++// modification, are permitted provided that the following conditions are ++// met: ++// ++// - Redistributions of source code must retain the above copyright notice, ++// this list of conditions and the following disclaimer. ++// ++// - Redistribution in binary form must reproduce the above copyright ++// notice, this list of conditions and the following disclaimer in the ++// documentation and/or other materials provided with the distribution. ++// ++// - Neither the name of Sun Microsystems or the names of contributors may ++// be used to endorse or promote products derived from this software without ++// specific prior written permission. ++// ++// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS ++// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ++// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR ++// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR ++// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ++// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ++// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ++// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ++// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++// The original source code covered by the above license above has been ++// modified significantly by Google Inc. ++// Copyright 2012 the V8 project authors. All rights reserved. ++ ++#ifndef V8_CODEGEN_LA64_ASSEMBLER_LA64_H_ ++#define V8_CODEGEN_LA64_ASSEMBLER_LA64_H_ ++ ++#include ++#include ++#include ++ ++#include "src/codegen/assembler.h" ++#include "src/codegen/external-reference.h" ++#include "src/codegen/la64/constants-la64.h" ++#include "src/codegen/la64/register-la64.h" ++#include "src/codegen/label.h" ++#include "src/objects/contexts.h" ++#include "src/objects/smi.h" ++ ++namespace v8 { ++namespace internal { ++ ++class SafepointTableBuilder; ++ ++// ----------------------------------------------------------------------------- ++// Machine instruction Operands. ++constexpr int kSmiShift = kSmiTagSize + kSmiShiftSize; ++constexpr uint64_t kSmiShiftMask = (1UL << kSmiShift) - 1; ++// Class Operand represents a shifter operand in data processing instructions. ++class Operand { ++ public: ++ // Immediate. ++ V8_INLINE explicit Operand(int64_t immediate, ++ RelocInfo::Mode rmode = RelocInfo::NONE) ++ : rm_(no_reg), rmode_(rmode) { ++ value_.immediate = immediate; ++ } ++ V8_INLINE explicit Operand(const ExternalReference& f) ++ : rm_(no_reg), rmode_(RelocInfo::EXTERNAL_REFERENCE) { ++ value_.immediate = static_cast(f.address()); ++ } ++ V8_INLINE explicit Operand(const char* s); ++ explicit Operand(Handle handle); ++ V8_INLINE explicit Operand(Smi value) : rm_(no_reg), rmode_(RelocInfo::NONE) { ++ value_.immediate = static_cast(value.ptr()); ++ } ++ ++ static Operand EmbeddedNumber(double number); // Smi or HeapNumber. ++ static Operand EmbeddedStringConstant(const StringConstantBase* str); ++ ++ // Register. ++ V8_INLINE explicit Operand(Register rm) : rm_(rm) {} ++ ++ // Return true if this is a register operand. ++ V8_INLINE bool is_reg() const; ++ ++ inline int64_t immediate() const; ++ ++ bool IsImmediate() const { return !rm_.is_valid(); } ++ ++ HeapObjectRequest heap_object_request() const { ++ DCHECK(IsHeapObjectRequest()); ++ return value_.heap_object_request; ++ } ++ ++ bool IsHeapObjectRequest() const { ++ DCHECK_IMPLIES(is_heap_object_request_, IsImmediate()); ++ DCHECK_IMPLIES(is_heap_object_request_, ++ rmode_ == RelocInfo::FULL_EMBEDDED_OBJECT || ++ rmode_ == RelocInfo::CODE_TARGET); ++ return is_heap_object_request_; ++ } ++ ++ Register rm() const { return rm_; } ++ ++ RelocInfo::Mode rmode() const { return rmode_; } ++ ++ private: ++ Register rm_; ++ union Value { ++ Value() {} ++ HeapObjectRequest heap_object_request; // if is_heap_object_request_ ++ int64_t immediate; // otherwise ++ } value_; // valid if rm_ == no_reg ++ bool is_heap_object_request_ = false; ++ RelocInfo::Mode rmode_; ++ ++ friend class Assembler; ++ friend class MacroAssembler; ++}; ++ ++// Class MemOperand represents a memory operand in load and store instructions. ++// 1: base_reg + off_imm( si12 | si14<<2) ++// 2: base_reg + offset_reg ++class V8_EXPORT_PRIVATE MemOperand { ++ public: ++ explicit MemOperand(Register rj, int32_t offset = 0); ++ explicit MemOperand(Register rj, Register offset = no_reg); ++ Register base() const { return base_; } ++ Register index() const { return index_; } ++ int32_t offset() const { return offset_; } ++ ++ bool hasIndexReg() const { return index_ != no_reg; } ++ ++ private: ++ Register base_; // base ++ Register index_; // index ++ int32_t offset_; // offset ++ ++ friend class Assembler; ++}; ++ ++class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ++ public: ++ // Create an assembler. Instructions and relocation information are emitted ++ // into a buffer, with the instructions starting from the beginning and the ++ // relocation information starting from the end of the buffer. See CodeDesc ++ // for a detailed comment on the layout (globals.h). ++ // ++ // If the provided buffer is nullptr, the assembler allocates and grows its ++ // own buffer. Otherwise it takes ownership of the provided buffer. ++ explicit Assembler(const AssemblerOptions&, ++ std::unique_ptr = {}); ++ ++ virtual ~Assembler() {} ++ ++ // GetCode emits any pending (non-emitted) code and fills the descriptor desc. ++ static constexpr int kNoHandlerTable = 0; ++ static constexpr SafepointTableBuilder* kNoSafepointTable = nullptr; ++ void GetCode(Isolate* isolate, CodeDesc* desc, ++ SafepointTableBuilder* safepoint_table_builder, ++ int handler_table_offset); ++ ++ // Convenience wrapper for code without safepoint or handler tables. ++ void GetCode(Isolate* isolate, CodeDesc* desc) { ++ GetCode(isolate, desc, kNoSafepointTable, kNoHandlerTable); ++ } ++ ++ // Unused on this architecture. ++ void MaybeEmitOutOfLineConstantPool() {} ++ ++ // Label operations & relative jumps (PPUM Appendix D). ++ // ++ // Takes a branch opcode (cc) and a label (L) and generates ++ // either a backward branch or a forward branch and links it ++ // to the label fixup chain. Usage: ++ // ++ // Label L; // unbound label ++ // j(cc, &L); // forward branch to unbound label ++ // bind(&L); // bind label to the current pc ++ // j(cc, &L); // backward branch to bound label ++ // bind(&L); // illegal: a label may be bound only once ++ // ++ // Note: The same Label can be used for forward and backward branches ++ // but it may be bound only once. ++ void bind(Label* L); // Binds an unbound label L to current code position. ++ ++ enum OffsetSize : int { kOffset26 = 26, kOffset21 = 21, kOffset16 = 16 }; ++ ++ // Determines if Label is bound and near enough so that branch instruction ++ // can be used to reach it, instead of jump instruction. ++ // c means conditinal branch, a means always branch. ++ bool is_near_c(Label* L); ++ bool is_near(Label* L, OffsetSize bits); ++ bool is_near_a(Label* L); ++ ++ int BranchOffset(Instr instr); ++ ++ // Returns the branch offset to the given label from the current code ++ // position. Links the label to the current position if it is still unbound. ++ // Manages the jump elimination optimization if the second parameter is true. ++ int32_t branch_offset_helper(Label* L, OffsetSize bits); ++ inline int32_t branch_offset(Label* L) { ++ return branch_offset_helper(L, OffsetSize::kOffset16); ++ } ++ inline int32_t branch_offset21(Label* L) { ++ return branch_offset_helper(L, OffsetSize::kOffset21); ++ } ++ inline int32_t branch_offset26(Label* L) { ++ return branch_offset_helper(L, OffsetSize::kOffset26); ++ } ++ inline int32_t shifted_branch_offset(Label* L) { ++ return branch_offset(L) >> 2; ++ } ++ inline int32_t shifted_branch_offset21(Label* L) { ++ return branch_offset21(L) >> 2; ++ } ++ inline int32_t shifted_branch_offset26(Label* L) { ++ return branch_offset26(L) >> 2; ++ } ++ uint64_t jump_address(Label* L); ++ uint64_t jump_offset(Label* L); ++ uint64_t branch_long_offset(Label* L); ++ ++ // Puts a labels target address at the given position. ++ // The high 8 bits are set to zero. ++ void label_at_put(Label* L, int at_offset); ++ ++ // Read/Modify the code target address in the branch/call instruction at pc. ++ // The isolate argument is unused (and may be nullptr) when skipping flushing. ++ static Address target_address_at(Address pc); ++ V8_INLINE static void set_target_address_at( ++ Address pc, Address target, ++ ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED) { ++ set_target_value_at(pc, target, icache_flush_mode); ++ } ++ // On MIPS there is no Constant Pool so we skip that parameter. ++ V8_INLINE static Address target_address_at(Address pc, ++ Address constant_pool) { ++ return target_address_at(pc); ++ } ++ V8_INLINE static void set_target_address_at( ++ Address pc, Address constant_pool, Address target, ++ ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED) { ++ set_target_address_at(pc, target, icache_flush_mode); ++ } ++ ++ static void set_target_value_at( ++ Address pc, uint64_t target, ++ ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED); ++ ++ static void JumpLabelToJumpRegister(Address pc); ++ ++ // This sets the branch destination (which gets loaded at the call address). ++ // This is for calls and branches within generated code. The serializer ++ // has already deserialized the lui/ori instructions etc. ++ inline static void deserialization_set_special_target_at( ++ Address instruction_payload, Code code, Address target); ++ ++ // Get the size of the special target encoded at 'instruction_payload'. ++ inline static int deserialization_special_target_size( ++ Address instruction_payload); ++ ++ // This sets the internal reference at the pc. ++ inline static void deserialization_set_target_internal_reference_at( ++ Address pc, Address target, ++ RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE); ++ ++ // Here we are patching the address in the LUI/ORI instruction pair. ++ // These values are used in the serialization process and must be zero for ++ // LA platform, as Code, Embedded Object or External-reference pointers ++ // are split across two consecutive instructions and don't exist separately ++ // in the code, so the serializer should not step forwards in memory after ++ // a target is resolved and written. ++ static constexpr int kSpecialTargetSize = 0; ++ ++ // Number of consecutive instructions used to store 32bit/64bit constant. ++ // This constant was used in RelocInfo::target_address_address() function ++ // to tell serializer address of the instruction that follows ++ // LUI/ORI instruction pair. ++ // TODO check this ++ static constexpr int kInstructionsFor64BitConstant = 4; ++ ++ // Difference between address of current opcode and target address offset. ++ static constexpr int kBranchPCOffset = 0; ++ ++ // Difference between address of current opcode and target address offset, ++ // when we are generatinga sequence of instructions for long relative PC ++ // branches ++ static constexpr int kLongBranchPCOffset = 0; // 3 * kInstrSize; ++ ++ // Max offset for instructions with 16-bit offset field ++ static constexpr int kMax16BranchOffset = (1 << (18 - 1)) - 1; ++ ++ // Max offset for instructions with 21-bit offset field ++ static constexpr int kMax21BranchOffset = (1 << (23 - 1)) - 1; ++ ++ // Max offset for compact branch instructions with 26-bit offset field ++ static constexpr int kMax26BranchOffset = (1 << (28 - 1)) - 1; ++ ++ static constexpr int kTrampolineSlotsSize = 2 * kInstrSize; ++ ++ RegList* GetScratchRegisterList() { return &scratch_register_list_; } ++ ++ // --------------------------------------------------------------------------- ++ // Code generation. ++ ++ // Insert the smallest number of nop instructions ++ // possible to align the pc offset to a multiple ++ // of m. m must be a power of 2 (>= 4). ++ void Align(int m); ++ // Insert the smallest number of zero bytes possible to align the pc offset ++ // to a mulitple of m. m must be a power of 2 (>= 2). ++ void DataAlign(int m); ++ // Aligns code to something that's optimal for a jump target for the platform. ++ void CodeTargetAlign(); ++ ++ // Different nop operations are used by the code generator to detect certain ++ // states of the generated code. ++ enum NopMarkerTypes { ++ NON_MARKING_NOP = 0, ++ DEBUG_BREAK_NOP, ++ // IC markers. ++ PROPERTY_ACCESS_INLINED, ++ PROPERTY_ACCESS_INLINED_CONTEXT, ++ PROPERTY_ACCESS_INLINED_CONTEXT_DONT_DELETE, ++ // Helper values. ++ LAST_CODE_MARKER, ++ FIRST_IC_MARKER = PROPERTY_ACCESS_INLINED, ++ }; ++ ++ // Type == 0 is the default non-marking nop. For loongisa this is a ++ // andi(zero_reg, zero_reg, 0). We use rt_reg == r1 for non-zero ++ // marking, to avoid conflict with ssnop and ehb instructions. ++ void nop(unsigned int type = 0) { ++ DCHECK_LT(type, 32); ++ Register nop_rt_reg = (type == 0) ? zero_reg : t7; ++ andi(zero_reg, nop_rt_reg, type); ++ } ++ ++ // --------Branch-and-jump-instructions---------- ++ // We don't use likely variant of instructions. ++ void b(int32_t offset); ++ inline void b(Label* L) { b(shifted_branch_offset26(L)); } ++ void bl(int32_t offset); ++ inline void bl(Label* L) { bl(shifted_branch_offset26(L)); } ++ ++ void beq(Register rj, Register rd, int32_t offset); ++ inline void beq(Register rj, Register rd, Label* L) { ++ beq(rj, rd, shifted_branch_offset(L)); ++ } ++ void bne(Register rj, Register rd, int32_t offset); ++ inline void bne(Register rj, Register rd, Label* L) { ++ bne(rj, rd, shifted_branch_offset(L)); ++ } ++ void blt(Register rj, Register rd, int32_t offset); ++ inline void blt(Register rj, Register rd, Label* L) { ++ blt(rj, rd, shifted_branch_offset(L)); ++ } ++ void bge(Register rj, Register rd, int32_t offset); ++ inline void bge(Register rj, Register rd, Label* L) { ++ bge(rj, rd, shifted_branch_offset(L)); ++ } ++ void bltu(Register rj, Register rd, int32_t offset); ++ inline void bltu(Register rj, Register rd, Label* L) { ++ bltu(rj, rd, shifted_branch_offset(L)); ++ } ++ void bgeu(Register rj, Register rd, int32_t offset); ++ inline void bgeu(Register rj, Register rd, Label* L) { ++ bgeu(rj, rd, shifted_branch_offset(L)); ++ } ++ void beqz(Register rj, int32_t offset); ++ inline void beqz(Register rj, Label* L) { ++ beqz(rj, shifted_branch_offset21(L)); ++ } ++ void bnez(Register rj, int32_t offset); ++ inline void bnez(Register rj, Label* L) { ++ bnez(rj, shifted_branch_offset21(L)); ++ } ++ ++ void jirl(Register rd, Register rj, int32_t offset); ++ ++ void bceqz(CFRegister cj, int32_t si21); ++ inline void bceqz(CFRegister cj, Label* L) { ++ bceqz(cj, shifted_branch_offset21(L)); ++ } ++ void bcnez(CFRegister cj, int32_t si21); ++ inline void bcnez(CFRegister cj, Label* L) { ++ bcnez(cj, shifted_branch_offset21(L)); ++ } ++ ++ // -------Data-processing-instructions--------- ++ ++ // Arithmetic. ++ void add_w(Register rd, Register rj, Register rk); ++ void add_d(Register rd, Register rj, Register rk); ++ void sub_w(Register rd, Register rj, Register rk); ++ void sub_d(Register rd, Register rj, Register rk); ++ ++ void addi_w(Register rd, Register rj, int32_t si12); ++ void addi_d(Register rd, Register rj, int32_t si12); ++ ++ void addu16i_d(Register rd, Register rj, int32_t si16); ++ ++ void alsl_w(Register rd, Register rj, Register rk, int32_t sa2); ++ void alsl_wu(Register rd, Register rj, Register rk, int32_t sa2); ++ void alsl_d(Register rd, Register rj, Register rk, int32_t sa2); ++ ++ void lu12i_w(Register rd, int32_t si20); ++ void lu32i_d(Register rd, int32_t si20); ++ void lu52i_d(Register rd, Register rj, int32_t si12); ++ ++ void slt(Register rd, Register rj, Register rk); ++ void sltu(Register rd, Register rj, Register rk); ++ void slti(Register rd, Register rj, int32_t si12); ++ void sltui(Register rd, Register rj, int32_t si12); ++ ++ void pcaddi(Register rd, int32_t si20); ++ void pcaddu12i(Register rd, int32_t si20); ++ void pcaddu18i(Register rd, int32_t si20); ++ void pcalau12i(Register rd, int32_t si20); ++ ++ void and_(Register rd, Register rj, Register rk); ++ void or_(Register rd, Register rj, Register rk); ++ void xor_(Register rd, Register rj, Register rk); ++ void nor(Register rd, Register rj, Register rk); ++ void andn(Register rd, Register rj, Register rk); ++ void orn(Register rd, Register rj, Register rk); ++ ++ void andi(Register rd, Register rj, int32_t ui12); ++ void ori(Register rd, Register rj, int32_t ui12); ++ void xori(Register rd, Register rj, int32_t ui12); ++ ++ void mul_w(Register rd, Register rj, Register rk); ++ void mulh_w(Register rd, Register rj, Register rk); ++ void mulh_wu(Register rd, Register rj, Register rk); ++ void mul_d(Register rd, Register rj, Register rk); ++ void mulh_d(Register rd, Register rj, Register rk); ++ void mulh_du(Register rd, Register rj, Register rk); ++ ++ void mulw_d_w(Register rd, Register rj, Register rk); ++ void mulw_d_wu(Register rd, Register rj, Register rk); ++ ++ void div_w(Register rd, Register rj, Register rk); ++ void mod_w(Register rd, Register rj, Register rk); ++ void div_wu(Register rd, Register rj, Register rk); ++ void mod_wu(Register rd, Register rj, Register rk); ++ void div_d(Register rd, Register rj, Register rk); ++ void mod_d(Register rd, Register rj, Register rk); ++ void div_du(Register rd, Register rj, Register rk); ++ void mod_du(Register rd, Register rj, Register rk); ++ ++ // Shifts. ++ void sll_w(Register rd, Register rj, Register rk); ++ void srl_w(Register rd, Register rj, Register rk); ++ void sra_w(Register rd, Register rj, Register rk); ++ void rotr_w(Register rd, Register rj, Register rk); ++ ++ void slli_w(Register rd, Register rj, int32_t ui5); ++ void srli_w(Register rd, Register rj, int32_t ui5); ++ void srai_w(Register rd, Register rj, int32_t ui5); ++ void rotri_w(Register rd, Register rj, int32_t ui5); ++ ++ void sll_d(Register rd, Register rj, Register rk); ++ void srl_d(Register rd, Register rj, Register rk); ++ void sra_d(Register rd, Register rj, Register rk); ++ void rotr_d(Register rd, Register rj, Register rk); ++ ++ void slli_d(Register rd, Register rj, int32_t ui6); ++ void srli_d(Register rd, Register rj, int32_t ui6); ++ void srai_d(Register rd, Register rj, int32_t ui6); ++ void rotri_d(Register rd, Register rj, int32_t ui6); ++ ++ // Bit twiddling. ++ void ext_w_b(Register rd, Register rj); ++ void ext_w_h(Register rd, Register rj); ++ ++ void clo_w(Register rd, Register rj); ++ void clz_w(Register rd, Register rj); ++ void cto_w(Register rd, Register rj); ++ void ctz_w(Register rd, Register rj); ++ void clo_d(Register rd, Register rj); ++ void clz_d(Register rd, Register rj); ++ void cto_d(Register rd, Register rj); ++ void ctz_d(Register rd, Register rj); ++ ++ void bytepick_w(Register rd, Register rj, Register rk, int32_t sa2); ++ void bytepick_d(Register rd, Register rj, Register rk, int32_t sa3); ++ ++ void revb_2h(Register rd, Register rj); ++ void revb_4h(Register rd, Register rj); ++ void revb_2w(Register rd, Register rj); ++ void revb_d(Register rd, Register rj); ++ ++ void revh_2w(Register rd, Register rj); ++ void revh_d(Register rd, Register rj); ++ ++ void bitrev_4b(Register rd, Register rj); ++ void bitrev_8b(Register rd, Register rj); ++ ++ void bitrev_w(Register rd, Register rj); ++ void bitrev_d(Register rd, Register rj); ++ ++ void bstrins_w(Register rd, Register rj, int32_t msbw, int32_t lsbw); ++ void bstrins_d(Register rd, Register rj, int32_t msbd, int32_t lsbd); ++ ++ void bstrpick_w(Register rd, Register rj, int32_t msbw, int32_t lsbw); ++ void bstrpick_d(Register rd, Register rj, int32_t msbd, int32_t lsbd); ++ ++ void maskeqz(Register rd, Register rj, Register rk); ++ void masknez(Register rd, Register rj, Register rk); ++ ++ // Memory-instructions ++ void ld_b(Register rd, Register rj, int32_t si12); ++ void ld_h(Register rd, Register rj, int32_t si12); ++ void ld_w(Register rd, Register rj, int32_t si12); ++ void ld_d(Register rd, Register rj, int32_t si12); ++ void ld_bu(Register rd, Register rj, int32_t si12); ++ void ld_hu(Register rd, Register rj, int32_t si12); ++ void ld_wu(Register rd, Register rj, int32_t si12); ++ void st_b(Register rd, Register rj, int32_t si12); ++ void st_h(Register rd, Register rj, int32_t si12); ++ void st_w(Register rd, Register rj, int32_t si12); ++ void st_d(Register rd, Register rj, int32_t si12); ++ ++ void ldx_b(Register rd, Register rj, Register rk); ++ void ldx_h(Register rd, Register rj, Register rk); ++ void ldx_w(Register rd, Register rj, Register rk); ++ void ldx_d(Register rd, Register rj, Register rk); ++ void ldx_bu(Register rd, Register rj, Register rk); ++ void ldx_hu(Register rd, Register rj, Register rk); ++ void ldx_wu(Register rd, Register rj, Register rk); ++ void stx_b(Register rd, Register rj, Register rk); ++ void stx_h(Register rd, Register rj, Register rk); ++ void stx_w(Register rd, Register rj, Register rk); ++ void stx_d(Register rd, Register rj, Register rk); ++ ++ void ldptr_w(Register rd, Register rj, int32_t si14); ++ void ldptr_d(Register rd, Register rj, int32_t si14); ++ void stptr_w(Register rd, Register rj, int32_t si14); ++ void stptr_d(Register rd, Register rj, int32_t si14); ++ ++ void preld(int32_t hint, Register rj, int32_t si12); ++ ++ void preldx(int32_t hint, Register rj, Register rk); ++ ++ void ldgt_b(Register rd, Register rj, Register rk); ++ void ldgt_h(Register rd, Register rj, Register rk); ++ void ldgt_w(Register rd, Register rj, Register rk); ++ void ldgt_d(Register rd, Register rj, Register rk); ++ ++ void ldle_b(Register rd, Register rj, Register rk); ++ void ldle_h(Register rd, Register rj, Register rk); ++ void ldle_w(Register rd, Register rj, Register rk); ++ void ldle_d(Register rd, Register rj, Register rk); ++ ++ void stgt_b(Register rd, Register rj, Register rk); ++ void stgt_h(Register rd, Register rj, Register rk); ++ void stgt_w(Register rd, Register rj, Register rk); ++ void stgt_d(Register rd, Register rj, Register rk); ++ ++ void stle_b(Register rd, Register rj, Register rk); ++ void stle_h(Register rd, Register rj, Register rk); ++ void stle_w(Register rd, Register rj, Register rk); ++ void stle_d(Register rd, Register rj, Register rk); ++ ++ void amswap_w(Register rd, Register rk, Register rj); ++ void amswap_d(Register rd, Register rk, Register rj); ++ void amadd_w(Register rd, Register rk, Register rj); ++ void amadd_d(Register rd, Register rk, Register rj); ++ void amand_w(Register rd, Register rk, Register rj); ++ void amand_d(Register rd, Register rk, Register rj); ++ void amor_w(Register rd, Register rk, Register rj); ++ void amor_d(Register rd, Register rk, Register rj); ++ void amxor_w(Register rd, Register rk, Register rj); ++ void amxor_d(Register rd, Register rk, Register rj); ++ void ammax_w(Register rd, Register rk, Register rj); ++ void ammax_d(Register rd, Register rk, Register rj); ++ void ammin_w(Register rd, Register rk, Register rj); ++ void ammin_d(Register rd, Register rk, Register rj); ++ void ammax_wu(Register rd, Register rk, Register rj); ++ void ammax_du(Register rd, Register rk, Register rj); ++ void ammin_wu(Register rd, Register rk, Register rj); ++ void ammin_du(Register rd, Register rk, Register rj); ++ ++ void amswap_db_w(Register rd, Register rk, Register rj); ++ void amswap_db_d(Register rd, Register rk, Register rj); ++ void amadd_db_w(Register rd, Register rk, Register rj); ++ void amadd_db_d(Register rd, Register rk, Register rj); ++ void amand_db_w(Register rd, Register rk, Register rj); ++ void amand_db_d(Register rd, Register rk, Register rj); ++ void amor_db_w(Register rd, Register rk, Register rj); ++ void amor_db_d(Register rd, Register rk, Register rj); ++ void amxor_db_w(Register rd, Register rk, Register rj); ++ void amxor_db_d(Register rd, Register rk, Register rj); ++ void ammax_db_w(Register rd, Register rk, Register rj); ++ void ammax_db_d(Register rd, Register rk, Register rj); ++ void ammin_db_w(Register rd, Register rk, Register rj); ++ void ammin_db_d(Register rd, Register rk, Register rj); ++ void ammax_db_wu(Register rd, Register rk, Register rj); ++ void ammax_db_du(Register rd, Register rk, Register rj); ++ void ammin_db_wu(Register rd, Register rk, Register rj); ++ void ammin_db_du(Register rd, Register rk, Register rj); ++ ++ void ll_w(Register rd, Register rj, int32_t si14); ++ void ll_d(Register rd, Register rj, int32_t si14); ++ void sc_w(Register rd, Register rj, int32_t si14); ++ void sc_d(Register rd, Register rj, int32_t si14); ++ ++ void dbar(int32_t hint); ++ void ibar(int32_t hint); ++ ++ void crc_w_b_w(Register rd, Register rj, Register rk); ++ void crc_w_h_w(Register rd, Register rj, Register rk); ++ void crc_w_w_w(Register rd, Register rj, Register rk); ++ void crc_w_d_w(Register rd, Register rj, Register rk); ++ void crcc_w_b_w(Register rd, Register rj, Register rk); ++ void crcc_w_h_w(Register rd, Register rj, Register rk); ++ void crcc_w_w_w(Register rd, Register rj, Register rk); ++ void crcc_w_d_w(Register rd, Register rj, Register rk); ++ ++ void syscall(int32_t code); ++ ++ void asrtle_d(Register rj, Register rk); ++ void asrtgt_d(Register rj, Register rk); ++ ++ void rdtimel_w(Register rd, Register rj); ++ void rdtimeh_w(Register rd, Register rj); ++ void rdtime_d(Register rd, Register rj); ++ ++ void cpucfg(Register rd, Register rj); ++ ++ // Break / Trap instructions. ++ void break_(uint32_t code, bool break_as_stop = false); ++ void stop(uint32_t code = kMaxStopCode); ++ ++ // Arithmetic. ++ void fadd_s(FPURegister fd, FPURegister fj, FPURegister fk); ++ void fadd_d(FPURegister fd, FPURegister fj, FPURegister fk); ++ void fsub_s(FPURegister fd, FPURegister fj, FPURegister fk); ++ void fsub_d(FPURegister fd, FPURegister fj, FPURegister fk); ++ void fmul_s(FPURegister fd, FPURegister fj, FPURegister fk); ++ void fmul_d(FPURegister fd, FPURegister fj, FPURegister fk); ++ void fdiv_s(FPURegister fd, FPURegister fj, FPURegister fk); ++ void fdiv_d(FPURegister fd, FPURegister fj, FPURegister fk); ++ ++ void fmadd_s(FPURegister fd, FPURegister fj, FPURegister fk, FPURegister fa); ++ void fmadd_d(FPURegister fd, FPURegister fj, FPURegister fk, FPURegister fa); ++ void fmsub_s(FPURegister fd, FPURegister fj, FPURegister fk, FPURegister fa); ++ void fmsub_d(FPURegister fd, FPURegister fj, FPURegister fk, FPURegister fa); ++ void fnmadd_s(FPURegister fd, FPURegister fj, FPURegister fk, FPURegister fa); ++ void fnmadd_d(FPURegister fd, FPURegister fj, FPURegister fk, FPURegister fa); ++ void fnmsub_s(FPURegister fd, FPURegister fj, FPURegister fk, FPURegister fa); ++ void fnmsub_d(FPURegister fd, FPURegister fj, FPURegister fk, FPURegister fa); ++ ++ void fmax_s(FPURegister fd, FPURegister fj, FPURegister fk); ++ void fmax_d(FPURegister fd, FPURegister fj, FPURegister fk); ++ void fmin_s(FPURegister fd, FPURegister fj, FPURegister fk); ++ void fmin_d(FPURegister fd, FPURegister fj, FPURegister fk); ++ ++ void fmaxa_s(FPURegister fd, FPURegister fj, FPURegister fk); ++ void fmaxa_d(FPURegister fd, FPURegister fj, FPURegister fk); ++ void fmina_s(FPURegister fd, FPURegister fj, FPURegister fk); ++ void fmina_d(FPURegister fd, FPURegister fj, FPURegister fk); ++ ++ void fabs_s(FPURegister fd, FPURegister fj); ++ void fabs_d(FPURegister fd, FPURegister fj); ++ void fneg_s(FPURegister fd, FPURegister fj); ++ void fneg_d(FPURegister fd, FPURegister fj); ++ ++ void fsqrt_s(FPURegister fd, FPURegister fj); ++ void fsqrt_d(FPURegister fd, FPURegister fj); ++ void frecip_s(FPURegister fd, FPURegister fj); ++ void frecip_d(FPURegister fd, FPURegister fj); ++ void frsqrt_s(FPURegister fd, FPURegister fj); ++ void frsqrt_d(FPURegister fd, FPURegister fj); ++ ++ void fscaleb_s(FPURegister fd, FPURegister fj, FPURegister fk); ++ void fscaleb_d(FPURegister fd, FPURegister fj, FPURegister fk); ++ void flogb_s(FPURegister fd, FPURegister fj); ++ void flogb_d(FPURegister fd, FPURegister fj); ++ void fcopysign_s(FPURegister fd, FPURegister fj, FPURegister fk); ++ void fcopysign_d(FPURegister fd, FPURegister fj, FPURegister fk); ++ ++ void fclass_s(FPURegister fd, FPURegister fj); ++ void fclass_d(FPURegister fd, FPURegister fj); ++ ++ void fcmp_cond_s(FPUCondition cc, FPURegister fj, FPURegister fk, ++ CFRegister cd); ++ void fcmp_cond_d(FPUCondition cc, FPURegister fj, FPURegister fk, ++ CFRegister cd); ++ ++ void fcvt_s_d(FPURegister fd, FPURegister fj); ++ void fcvt_d_s(FPURegister fd, FPURegister fj); ++ ++ void ffint_s_w(FPURegister fd, FPURegister fj); ++ void ffint_s_l(FPURegister fd, FPURegister fj); ++ void ffint_d_w(FPURegister fd, FPURegister fj); ++ void ffint_d_l(FPURegister fd, FPURegister fj); ++ void ftint_w_s(FPURegister fd, FPURegister fj); ++ void ftint_w_d(FPURegister fd, FPURegister fj); ++ void ftint_l_s(FPURegister fd, FPURegister fj); ++ void ftint_l_d(FPURegister fd, FPURegister fj); ++ ++ void ftintrm_w_s(FPURegister fd, FPURegister fj); ++ void ftintrm_w_d(FPURegister fd, FPURegister fj); ++ void ftintrm_l_s(FPURegister fd, FPURegister fj); ++ void ftintrm_l_d(FPURegister fd, FPURegister fj); ++ void ftintrp_w_s(FPURegister fd, FPURegister fj); ++ void ftintrp_w_d(FPURegister fd, FPURegister fj); ++ void ftintrp_l_s(FPURegister fd, FPURegister fj); ++ void ftintrp_l_d(FPURegister fd, FPURegister fj); ++ void ftintrz_w_s(FPURegister fd, FPURegister fj); ++ void ftintrz_w_d(FPURegister fd, FPURegister fj); ++ void ftintrz_l_s(FPURegister fd, FPURegister fj); ++ void ftintrz_l_d(FPURegister fd, FPURegister fj); ++ void ftintrne_w_s(FPURegister fd, FPURegister fj); ++ void ftintrne_w_d(FPURegister fd, FPURegister fj); ++ void ftintrne_l_s(FPURegister fd, FPURegister fj); ++ void ftintrne_l_d(FPURegister fd, FPURegister fj); ++ ++ void frint_s(FPURegister fd, FPURegister fj); ++ void frint_d(FPURegister fd, FPURegister fj); ++ ++ void fmov_s(FPURegister fd, FPURegister fj); ++ void fmov_d(FPURegister fd, FPURegister fj); ++ ++ void fsel(CFRegister ca, FPURegister fd, FPURegister fj, FPURegister fk); ++ ++ void movgr2fr_w(FPURegister fd, Register rj); ++ void movgr2fr_d(FPURegister fd, Register rj); ++ void movgr2frh_w(FPURegister fd, Register rj); ++ ++ void movfr2gr_s(Register rd, FPURegister fj); ++ void movfr2gr_d(Register rd, FPURegister fj); ++ void movfrh2gr_s(Register rd, FPURegister fj); ++ ++ void movgr2fcsr(Register rj); ++ void movfcsr2gr(Register rd); ++ ++ void movfr2cf(CFRegister cd, FPURegister fj); ++ void movcf2fr(FPURegister fd, CFRegister cj); ++ ++ void movgr2cf(CFRegister cd, Register rj); ++ void movcf2gr(Register rd, CFRegister cj); ++ ++ void fld_s(FPURegister fd, Register rj, int32_t si12); ++ void fld_d(FPURegister fd, Register rj, int32_t si12); ++ void fst_s(FPURegister fd, Register rj, int32_t si12); ++ void fst_d(FPURegister fd, Register rj, int32_t si12); ++ ++ void fldx_s(FPURegister fd, Register rj, Register rk); ++ void fldx_d(FPURegister fd, Register rj, Register rk); ++ void fstx_s(FPURegister fd, Register rj, Register rk); ++ void fstx_d(FPURegister fd, Register rj, Register rk); ++ ++ void fldgt_s(FPURegister fd, Register rj, Register rk); ++ void fldgt_d(FPURegister fd, Register rj, Register rk); ++ void fldle_s(FPURegister fd, Register rj, Register rk); ++ void fldle_d(FPURegister fd, Register rj, Register rk); ++ void fstgt_s(FPURegister fd, Register rj, Register rk); ++ void fstgt_d(FPURegister fd, Register rj, Register rk); ++ void fstle_s(FPURegister fd, Register rj, Register rk); ++ void fstle_d(FPURegister fd, Register rj, Register rk); ++ ++ // Check the code size generated from label to here. ++ int SizeOfCodeGeneratedSince(Label* label) { ++ return pc_offset() - label->pos(); ++ } ++ ++ // Check the number of instructions generated from label to here. ++ int InstructionsGeneratedSince(Label* label) { ++ return SizeOfCodeGeneratedSince(label) / kInstrSize; ++ } ++ ++ // Class for scoping postponing the trampoline pool generation. ++ class BlockTrampolinePoolScope { ++ public: ++ explicit BlockTrampolinePoolScope(Assembler* assem) : assem_(assem) { ++ assem_->StartBlockTrampolinePool(); ++ } ++ ~BlockTrampolinePoolScope() { assem_->EndBlockTrampolinePool(); } ++ ++ private: ++ Assembler* assem_; ++ ++ DISALLOW_IMPLICIT_CONSTRUCTORS(BlockTrampolinePoolScope); ++ }; ++ ++ // Class for postponing the assembly buffer growth. Typically used for ++ // sequences of instructions that must be emitted as a unit, before ++ // buffer growth (and relocation) can occur. ++ // This blocking scope is not nestable. ++ class BlockGrowBufferScope { ++ public: ++ explicit BlockGrowBufferScope(Assembler* assem) : assem_(assem) { ++ assem_->StartBlockGrowBuffer(); ++ } ++ ~BlockGrowBufferScope() { assem_->EndBlockGrowBuffer(); } ++ ++ private: ++ Assembler* assem_; ++ ++ DISALLOW_IMPLICIT_CONSTRUCTORS(BlockGrowBufferScope); ++ }; ++ ++ // Record a deoptimization reason that can be used by a log or cpu profiler. ++ // Use --trace-deopt to enable. ++ void RecordDeoptReason(DeoptimizeReason reason, SourcePosition position, ++ int id); ++ ++ static int RelocateInternalReference(RelocInfo::Mode rmode, Address pc, ++ intptr_t pc_delta); ++ ++ // Writes a single byte or word of data in the code stream. Used for ++ // inline tables, e.g., jump-tables. ++ void db(uint8_t data); ++ void dd(uint32_t data); ++ void dq(uint64_t data); ++ void dp(uintptr_t data) { dq(data); } ++ void dd(Label* label); ++ ++ // Postpone the generation of the trampoline pool for the specified number of ++ // instructions. ++ void BlockTrampolinePoolFor(int instructions); ++ ++ // Check if there is less than kGap bytes available in the buffer. ++ // If this is the case, we need to grow the buffer before emitting ++ // an instruction or relocation information. ++ inline bool overflow() const { return pc_ >= reloc_info_writer.pos() - kGap; } ++ ++ // Get the number of bytes available in the buffer. ++ inline intptr_t available_space() const { ++ return reloc_info_writer.pos() - pc_; ++ } ++ ++ // Read/patch instructions. ++ static Instr instr_at(Address pc) { return *reinterpret_cast(pc); } ++ static void instr_at_put(Address pc, Instr instr) { ++ *reinterpret_cast(pc) = instr; ++ } ++ Instr instr_at(int pos) { ++ return *reinterpret_cast(buffer_start_ + pos); ++ } ++ void instr_at_put(int pos, Instr instr) { ++ *reinterpret_cast(buffer_start_ + pos) = instr; ++ } ++ ++ // Check if an instruction is a branch of some kind. ++ static bool IsBranch(Instr instr); ++ static bool IsB(Instr instr); ++ static bool IsBz(Instr instr); ++ static bool IsNal(Instr instr); ++ ++ static bool IsBeq(Instr instr); ++ static bool IsBne(Instr instr); ++ ++ static bool IsJump(Instr instr); ++ static bool IsMov(Instr instr, Register rd, Register rs); ++ static bool IsPcAddi(Instr instr, Register rd, int32_t si20); ++ ++ static bool IsJ(Instr instr); ++ static bool IsLu12i_w(Instr instr); ++ static bool IsOri(Instr instr); ++ static bool IsLu32i_d(Instr instr); ++ static bool IsLu52i_d(Instr instr); ++ ++ static bool IsNop(Instr instr, unsigned int type); ++ static bool IsPop(Instr instr); ++ static bool IsPush(Instr instr); ++ // static bool IsLwRegFpOffset(Instr instr); ++ // static bool IsSwRegFpOffset(Instr instr); ++ // static bool IsLwRegFpNegOffset(Instr instr); ++ // static bool IsSwRegFpNegOffset(Instr instr); ++ ++ static Register GetRjReg(Instr instr); ++ static Register GetRkReg(Instr instr); ++ static Register GetRdReg(Instr instr); ++ ++ static uint32_t GetRj(Instr instr); ++ static uint32_t GetRjField(Instr instr); ++ static uint32_t GetRk(Instr instr); ++ static uint32_t GetRkField(Instr instr); ++ static uint32_t GetRd(Instr instr); ++ static uint32_t GetRdField(Instr instr); ++ static uint32_t GetSa2(Instr instr); ++ static uint32_t GetSa3(Instr instr); ++ static uint32_t GetSa2Field(Instr instr); ++ static uint32_t GetSa3Field(Instr instr); ++ static uint32_t GetOpcodeField(Instr instr); ++ static uint32_t GetFunction(Instr instr); ++ static uint32_t GetFunctionField(Instr instr); ++ static uint32_t GetImmediate16(Instr instr); ++ static uint32_t GetLabelConst(Instr instr); ++ ++ static bool IsAddImmediate(Instr instr); ++ static Instr SetAddImmediateOffset(Instr instr, int16_t offset); ++ ++ static bool IsAndImmediate(Instr instr); ++ static bool IsEmittedConstant(Instr instr); ++ ++ void CheckTrampolinePool(); ++ ++ inline int UnboundLabelsCount() { return unbound_labels_count_; } ++ ++ protected: ++ // Helper function for memory load/store. ++ void AdjustBaseAndOffset(MemOperand* src); ++ ++ inline static void set_target_internal_reference_encoded_at(Address pc, ++ Address target); ++ ++ int64_t buffer_space() const { return reloc_info_writer.pos() - pc_; } ++ ++ // Decode branch instruction at pos and return branch target pos. ++ int target_at(int pos, bool is_internal); ++ ++ // Patch branch instruction at pos to branch to given branch target pos. ++ void target_at_put(int pos, int target_pos, bool is_internal); ++ ++ // Say if we need to relocate with this mode. ++ bool MustUseReg(RelocInfo::Mode rmode); ++ ++ // Record reloc info for current pc_. ++ void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0); ++ ++ // Block the emission of the trampoline pool before pc_offset. ++ void BlockTrampolinePoolBefore(int pc_offset) { ++ if (no_trampoline_pool_before_ < pc_offset) ++ no_trampoline_pool_before_ = pc_offset; ++ } ++ ++ void StartBlockTrampolinePool() { trampoline_pool_blocked_nesting_++; } ++ ++ void EndBlockTrampolinePool() { ++ trampoline_pool_blocked_nesting_--; ++ if (trampoline_pool_blocked_nesting_ == 0) { ++ CheckTrampolinePoolQuick(1); ++ } ++ } ++ ++ bool is_trampoline_pool_blocked() const { ++ return trampoline_pool_blocked_nesting_ > 0; ++ } ++ ++ bool has_exception() const { return internal_trampoline_exception_; } ++ ++ bool is_trampoline_emitted() const { return trampoline_emitted_; } ++ ++ // Temporarily block automatic assembly buffer growth. ++ void StartBlockGrowBuffer() { ++ DCHECK(!block_buffer_growth_); ++ block_buffer_growth_ = true; ++ } ++ ++ void EndBlockGrowBuffer() { ++ DCHECK(block_buffer_growth_); ++ block_buffer_growth_ = false; ++ } ++ ++ bool is_buffer_growth_blocked() const { return block_buffer_growth_; } ++ ++ void CheckTrampolinePoolQuick(int extra_instructions = 0) { ++ if (pc_offset() >= next_buffer_check_ - extra_instructions * kInstrSize) { ++ CheckTrampolinePool(); ++ } ++ } ++ ++ private: ++ // Avoid overflows for displacements etc. ++ static const int kMaximalBufferSize = 512 * MB; ++ ++ // Buffer size and constant pool distance are checked together at regular ++ // intervals of kBufferCheckInterval emitted bytes. ++ static constexpr int kBufferCheckInterval = 1 * KB / 2; ++ ++ // Code generation. ++ // The relocation writer's position is at least kGap bytes below the end of ++ // the generated instructions. This is so that multi-instruction sequences do ++ // not have to check for overflow. The same is true for writes of large ++ // relocation info entries. ++ static constexpr int kGap = 64; ++ STATIC_ASSERT(AssemblerBase::kMinimalBufferSize >= 2 * kGap); ++ ++ // Repeated checking whether the trampoline pool should be emitted is rather ++ // expensive. By default we only check again once a number of instructions ++ // has been generated. ++ static constexpr int kCheckConstIntervalInst = 32; ++ static constexpr int kCheckConstInterval = ++ kCheckConstIntervalInst * kInstrSize; ++ ++ int next_buffer_check_; // pc offset of next buffer check. ++ ++ // Emission of the trampoline pool may be blocked in some code sequences. ++ int trampoline_pool_blocked_nesting_; // Block emission if this is not zero. ++ int no_trampoline_pool_before_; // Block emission before this pc offset. ++ ++ // Keep track of the last emitted pool to guarantee a maximal distance. ++ int last_trampoline_pool_end_; // pc offset of the end of the last pool. ++ ++ // Automatic growth of the assembly buffer may be blocked for some sequences. ++ bool block_buffer_growth_; // Block growth when true. ++ ++ // Relocation information generation. ++ // Each relocation is encoded as a variable size value. ++ static constexpr int kMaxRelocSize = RelocInfoWriter::kMaxSize; ++ RelocInfoWriter reloc_info_writer; ++ ++ // The bound position, before this we cannot do instruction elimination. ++ int last_bound_pos_; ++ ++ // Code emission. ++ inline void CheckBuffer(); ++ void GrowBuffer(); ++ inline void emit(Instr x); ++ inline void emit(uint64_t x); ++ // inline void CheckForEmitInForbiddenSlot(); ++ template ++ inline void EmitHelper(T x); ++ inline void EmitHelper(Instr x); ++ ++ void GenB(Opcode opcode, Register rj, int32_t si21); // opcode:6 ++ void GenB(Opcode opcode, CFRegister cj, int32_t si21, bool isEq); ++ void GenB(Opcode opcode, int32_t si26); ++ void GenBJ(Opcode opcode, Register rj, Register rd, int32_t si16); ++ void GenCmp(Opcode opcode, FPUCondition cond, FPURegister fk, FPURegister fj, ++ CFRegister cd); ++ void GenSel(Opcode opcode, CFRegister ca, FPURegister fk, FPURegister fj, ++ FPURegister rd); ++ ++ void GenRegister(Opcode opcode, Register rj, Register rd, bool rjrd = true); ++ void GenRegister(Opcode opcode, FPURegister fj, FPURegister fd); ++ void GenRegister(Opcode opcode, Register rj, FPURegister fd); ++ void GenRegister(Opcode opcode, FPURegister fj, Register rd); ++ void GenRegister(Opcode opcode, Register rj, FPUControlRegister fd); ++ void GenRegister(Opcode opcode, FPUControlRegister fj, Register rd); ++ void GenRegister(Opcode opcode, FPURegister fj, CFRegister cd); ++ void GenRegister(Opcode opcode, CFRegister cj, FPURegister fd); ++ void GenRegister(Opcode opcode, Register rj, CFRegister cd); ++ void GenRegister(Opcode opcode, CFRegister cj, Register rd); ++ ++ void GenRegister(Opcode opcode, Register rk, Register rj, Register rd); ++ void GenRegister(Opcode opcode, FPURegister fk, FPURegister fj, ++ FPURegister fd); ++ ++ void GenRegister(Opcode opcode, FPURegister fa, FPURegister fk, ++ FPURegister fj, FPURegister fd); ++ void GenRegister(Opcode opcode, Register rk, Register rj, FPURegister fd); ++ ++ void GenImm(Opcode opcode, int32_t bit3, Register rk, Register rj, ++ Register rd); ++ void GenImm(Opcode opcode, int32_t bit6m, int32_t bit6l, Register rj, ++ Register rd); ++ void GenImm(Opcode opcode, int32_t bit20, Register rd); ++ void GenImm(Opcode opcode, int32_t bit15); ++ void GenImm(Opcode opcode, int32_t value, Register rj, Register rd, ++ int32_t value_bits); // 6 | 12 | 14 | 16 ++ void GenImm(Opcode opcode, int32_t bit12, Register rj, FPURegister fd); ++ ++ // Labels. ++ void print(const Label* L); ++ void bind_to(Label* L, int pos); ++ void next(Label* L, bool is_internal); ++ ++ // One trampoline consists of: ++ // - space for trampoline slots, ++ // - space for labels. ++ // ++ // Space for trampoline slots is equal to slot_count * 2 * kInstrSize. ++ // Space for trampoline slots precedes space for labels. Each label is of one ++ // instruction size, so total amount for labels is equal to ++ // label_count * kInstrSize. ++ class Trampoline { ++ public: ++ Trampoline() { ++ start_ = 0; ++ next_slot_ = 0; ++ free_slot_count_ = 0; ++ end_ = 0; ++ } ++ Trampoline(int start, int slot_count) { ++ start_ = start; ++ next_slot_ = start; ++ free_slot_count_ = slot_count; ++ end_ = start + slot_count * kTrampolineSlotsSize; ++ } ++ int start() { return start_; } ++ int end() { return end_; } ++ int take_slot() { ++ int trampoline_slot = kInvalidSlotPos; ++ if (free_slot_count_ <= 0) { ++ // We have run out of space on trampolines. ++ // Make sure we fail in debug mode, so we become aware of each case ++ // when this happens. ++ DCHECK(0); ++ // Internal exception will be caught. ++ } else { ++ trampoline_slot = next_slot_; ++ free_slot_count_--; ++ next_slot_ += kTrampolineSlotsSize; ++ } ++ return trampoline_slot; ++ } ++ ++ private: ++ int start_; ++ int end_; ++ int next_slot_; ++ int free_slot_count_; ++ }; ++ ++ int32_t get_trampoline_entry(int32_t pos); ++ int unbound_labels_count_; ++ // After trampoline is emitted, long branches are used in generated code for ++ // the forward branches whose target offsets could be beyond reach of branch ++ // instruction. We use this information to trigger different mode of ++ // branch instruction generation, where we use jump instructions rather ++ // than regular branch instructions. ++ bool trampoline_emitted_; ++ static constexpr int kInvalidSlotPos = -1; ++ ++ // Internal reference positions, required for unbounded internal reference ++ // labels. ++ std::set internal_reference_positions_; ++ bool is_internal_reference(Label* L) { ++ return internal_reference_positions_.find(L->pos()) != ++ internal_reference_positions_.end(); ++ } ++ ++ void EmittedCompactBranchInstruction() { prev_instr_compact_branch_ = true; } ++ void ClearCompactBranchState() { prev_instr_compact_branch_ = false; } ++ bool prev_instr_compact_branch_ = false; ++ ++ Trampoline trampoline_; ++ bool internal_trampoline_exception_; ++ ++ RegList scratch_register_list_; ++ ++ private: ++ void AllocateAndInstallRequestedHeapObjects(Isolate* isolate); ++ ++ int WriteCodeComments(); ++ ++ friend class RegExpMacroAssemblerMIPS; ++ friend class RelocInfo; ++ friend class BlockTrampolinePoolScope; ++ friend class EnsureSpace; ++}; ++ ++class EnsureSpace { ++ public: ++ explicit inline EnsureSpace(Assembler* assembler); ++}; ++ ++class V8_EXPORT_PRIVATE UseScratchRegisterScope { ++ public: ++ explicit UseScratchRegisterScope(Assembler* assembler); ++ ~UseScratchRegisterScope(); ++ ++ Register Acquire(); ++ bool hasAvailable() const; ++ ++ private: ++ RegList* available_; ++ RegList old_available_; ++}; ++ ++} // namespace internal ++} // namespace v8 ++ ++#endif // V8_CODEGEN_LA64_ASSEMBLER_LA64_H_ +diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/constants-la64.cc b/src/3rdparty/chromium/v8/src/codegen/la64/constants-la64.cc +new file mode 100644 +index 00000000000..1a406a8c4d3 +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/codegen/la64/constants-la64.cc +@@ -0,0 +1,100 @@ ++// Copyright 2011 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#if V8_TARGET_ARCH_LA64 ++ ++#include "src/codegen/la64/constants-la64.h" ++ ++namespace v8 { ++namespace internal { ++ ++// ----------------------------------------------------------------------------- ++// Registers. ++ ++// These register names are defined in a way to match the native disassembler ++// formatting. See for example the command "objdump -d ". ++const char* Registers::names_[kNumSimuRegisters] = { ++ "zero_reg", "ra", "gp", "sp", "a0", "a1", "a2", "a3", "a4", "a5", "a6", ++ "a7", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "tp", ++ "fp", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "pc"}; ++ ++// List of alias names which can be used when referring to MIPS registers. ++const Registers::RegisterAlias Registers::aliases_[] = { ++ {0, "zero"}, {23, "cp"}, {kInvalidRegister, nullptr}}; ++ ++const char* Registers::Name(int reg) { ++ const char* result; ++ if ((0 <= reg) && (reg < kNumSimuRegisters)) { ++ result = names_[reg]; ++ } else { ++ result = "noreg"; ++ } ++ return result; ++} ++ ++int Registers::Number(const char* name) { ++ // Look through the canonical names. ++ for (int i = 0; i < kNumSimuRegisters; i++) { ++ if (strcmp(names_[i], name) == 0) { ++ return i; ++ } ++ } ++ ++ // Look through the alias names. ++ int i = 0; ++ while (aliases_[i].reg != kInvalidRegister) { ++ if (strcmp(aliases_[i].name, name) == 0) { ++ return aliases_[i].reg; ++ } ++ i++; ++ } ++ ++ // No register with the reguested name found. ++ return kInvalidRegister; ++} ++ ++const char* FPURegisters::names_[kNumFPURegisters] = { ++ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", ++ "f11", "f12", "f13", "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", ++ "f22", "f23", "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"}; ++ ++// List of alias names which can be used when referring to MIPS registers. ++const FPURegisters::RegisterAlias FPURegisters::aliases_[] = { ++ {kInvalidRegister, nullptr}}; ++ ++const char* FPURegisters::Name(int creg) { ++ const char* result; ++ if ((0 <= creg) && (creg < kNumFPURegisters)) { ++ result = names_[creg]; ++ } else { ++ result = "nocreg"; ++ } ++ return result; ++} ++ ++int FPURegisters::Number(const char* name) { ++ // Look through the canonical names. ++ for (int i = 0; i < kNumFPURegisters; i++) { ++ if (strcmp(names_[i], name) == 0) { ++ return i; ++ } ++ } ++ ++ // Look through the alias names. ++ int i = 0; ++ while (aliases_[i].creg != kInvalidRegister) { ++ if (strcmp(aliases_[i].name, name) == 0) { ++ return aliases_[i].creg; ++ } ++ i++; ++ } ++ ++ // No Cregister with the reguested name found. ++ return kInvalidFPURegister; ++} ++ ++} // namespace internal ++} // namespace v8 ++ ++#endif // V8_TARGET_ARCH_LA64 +diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/constants-la64.h b/src/3rdparty/chromium/v8/src/codegen/la64/constants-la64.h +new file mode 100644 +index 00000000000..6cf2ec3b7ec +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/codegen/la64/constants-la64.h +@@ -0,0 +1,1479 @@ ++// Copyright 2012 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#ifndef V8_CODEGEN_LA64_CONSTANTS_LA64_H_ ++#define V8_CODEGEN_LA64_CONSTANTS_LA64_H_ ++ ++#include "src/base/logging.h" ++#include "src/base/macros.h" ++#include "src/common/globals.h" ++ ++// UNIMPLEMENTED_ macro for LOONGISA. ++#ifdef DEBUG ++#define UNIMPLEMENTED_LOONGISA() \ ++ v8::internal::PrintF("%s, \tline %d: \tfunction %s not implemented. \n", \ ++ __FILE__, __LINE__, __func__) ++#else ++#define UNIMPLEMENTED_LOONGISA() ++#endif ++ ++#define UNSUPPORTED_LOONGISA() \ ++ v8::internal::PrintF("Unsupported instruction.\n") ++ ++const uint32_t kLeastSignificantByteInInt32Offset = 0; ++const uint32_t kLessSignificantWordInDoublewordOffset = 0; ++ ++#ifndef __STDC_FORMAT_MACROS ++#define __STDC_FORMAT_MACROS ++#endif ++#include ++ ++// Defines constants and accessor classes to assemble, disassemble and ++// simulate LA64 instructions. ++ ++namespace v8 { ++namespace internal { ++ ++constexpr size_t kMaxPCRelativeCodeRangeInMB = 128; ++ ++// ----------------------------------------------------------------------------- ++// Registers and FPURegisters. ++ ++// Number of general purpose registers. ++const int kNumRegisters = 32; ++const int kInvalidRegister = -1; ++ ++// Number of registers with pc. ++const int kNumSimuRegisters = 33; ++ ++// In the simulator, the PC register is simulated as the 34th register. ++const int kPCRegister = 32; ++ ++// Number coprocessor registers. ++const int kNumFPURegisters = 32; ++const int kInvalidFPURegister = -1; ++ ++// FPU (coprocessor 1) control registers. Currently only FCSR is implemented. ++// TODO fcsr0 fcsr1 fcsr2 fcsr3 ++const int kFCSRRegister = 0; ++const int kInvalidFPUControlRegister = -1; ++const uint32_t kFPUInvalidResult = static_cast(1u << 31) - 1; ++const int32_t kFPUInvalidResultNegative = static_cast(1u << 31); ++const uint64_t kFPU64InvalidResult = ++ static_cast(static_cast(1) << 63) - 1; ++const int64_t kFPU64InvalidResultNegative = ++ static_cast(static_cast(1) << 63); ++ ++// FCSR constants. ++// TODO ++const uint32_t kFCSRInexactFlagBit = 16; ++const uint32_t kFCSRUnderflowFlagBit = 17; ++const uint32_t kFCSROverflowFlagBit = 18; ++const uint32_t kFCSRDivideByZeroFlagBit = 19; ++const uint32_t kFCSRInvalidOpFlagBit = 20; ++ ++const uint32_t kFCSRInexactFlagMask = 1 << kFCSRInexactFlagBit; ++const uint32_t kFCSRUnderflowFlagMask = 1 << kFCSRUnderflowFlagBit; ++const uint32_t kFCSROverflowFlagMask = 1 << kFCSROverflowFlagBit; ++const uint32_t kFCSRDivideByZeroFlagMask = 1 << kFCSRDivideByZeroFlagBit; ++const uint32_t kFCSRInvalidOpFlagMask = 1 << kFCSRInvalidOpFlagBit; ++ ++const uint32_t kFCSRFlagMask = ++ kFCSRInexactFlagMask | kFCSRUnderflowFlagMask | kFCSROverflowFlagMask | ++ kFCSRDivideByZeroFlagMask | kFCSRInvalidOpFlagMask; ++ ++const uint32_t kFCSRExceptionFlagMask = kFCSRFlagMask ^ kFCSRInexactFlagMask; ++ ++// 'preld' instruction hints ++const int32_t kPrefHintLoad = 0; ++const int32_t kPrefHintStore = 8; ++ ++// Actual value of root register is offset from the root array's start ++// to take advantage of negative displacement values. ++// TODO(sigurds): Choose best value. ++constexpr int kRootRegisterBias = 256; ++ ++// Helper functions for converting between register numbers and names. ++class Registers { ++ public: ++ // Return the name of the register. ++ static const char* Name(int reg); ++ ++ // Lookup the register number for the name provided. ++ static int Number(const char* name); ++ ++ struct RegisterAlias { ++ int reg; ++ const char* name; ++ }; ++ ++ static const int64_t kMaxValue = 0x7fffffffffffffffl; ++ static const int64_t kMinValue = 0x8000000000000000l; ++ ++ private: ++ static const char* names_[kNumSimuRegisters]; ++ static const RegisterAlias aliases_[]; ++}; ++ ++// Helper functions for converting between register numbers and names. ++class FPURegisters { ++ public: ++ // Return the name of the register. ++ static const char* Name(int reg); ++ ++ // Lookup the register number for the name provided. ++ static int Number(const char* name); ++ ++ struct RegisterAlias { ++ int creg; ++ const char* name; ++ }; ++ ++ private: ++ static const char* names_[kNumFPURegisters]; ++ static const RegisterAlias aliases_[]; ++}; ++ ++// ----------------------------------------------------------------------------- ++// Instructions encoding constants. ++ ++// On LoongISA all instructions are 32 bits. ++using Instr = int32_t; ++ ++// Special Software Interrupt codes when used in the presence of the LA64 ++// simulator. ++enum SoftwareInterruptCodes { ++ // Transition to C code. ++ call_rt_redirected = 0x7fff ++}; ++ ++// On LA64 Simulator breakpoints can have different codes: ++// - Breaks between 0 and kMaxWatchpointCode are treated as simple watchpoints, ++// the simulator will run through them and print the registers. ++// - Breaks between kMaxWatchpointCode and kMaxStopCode are treated as stop() ++// instructions (see Assembler::stop()). ++// - Breaks larger than kMaxStopCode are simple breaks, dropping you into the ++// debugger. ++const uint32_t kMaxWatchpointCode = 31; ++const uint32_t kMaxStopCode = 127; ++STATIC_ASSERT(kMaxWatchpointCode < kMaxStopCode); ++ ++// ----- Fields offset and length. ++const int kRjShift = 5; ++const int kRjBits = 5; ++const int kRkShift = 10; ++const int kRkBits = 5; ++const int kRdShift = 0; ++const int kRdBits = 5; ++const int kSaShift = 15; ++const int kSa2Bits = 2; ++const int kSa3Bits = 3; ++const int kCdShift = 0; ++const int kCdBits = 3; ++const int kCjShift = 5; ++const int kCjBits = 3; ++const int kCodeShift = 0; ++const int kCodeBits = 15; ++const int kCondShift = 15; ++const int kCondBits = 5; ++const int kUi5Shift = 10; ++const int kUi5Bits = 5; ++const int kUi6Shift = 10; ++const int kUi6Bits = 6; ++const int kUi12Shift = 10; ++const int kUi12Bits = 12; ++const int kSi12Shift = 10; ++const int kSi12Bits = 12; ++const int kSi14Shift = 10; ++const int kSi14Bits = 14; ++const int kSi16Shift = 10; ++const int kSi16Bits = 16; ++const int kSi20Shift = 5; ++const int kSi20Bits = 20; ++const int kMsbwShift = 16; ++const int kMsbwBits = 5; ++const int kLsbwShift = 10; ++const int kLsbwBits = 5; ++const int kMsbdShift = 16; ++const int kMsbdBits = 6; ++const int kLsbdShift = 10; ++const int kLsbdBits = 6; ++const int kFdShift = 0; ++const int kFdBits = 5; ++const int kFjShift = 5; ++const int kFjBits = 5; ++const int kFkShift = 10; ++const int kFkBits = 5; ++const int kFaShift = 15; ++const int kFaBits = 5; ++const int kCaShift = 15; ++const int kCaBits = 3; ++const int kHint15Shift = 0; ++const int kHint15Bits = 15; ++const int kHint5Shift = 0; ++const int kHint5Bits = 5; ++const int kOffsLowShift = 10; ++const int kOffsLowBits = 16; ++const int kOffs26HighShift = 0; ++const int kOffs26HighBits = 10; ++const int kOffs21HighShift = 0; ++const int kOffs21HighBits = 5; ++const int kImm12Shift = 0; ++const int kImm12Bits = 12; ++const int kImm16Shift = 0; ++const int kImm16Bits = 16; ++const int kImm26Shift = 0; ++const int kImm26Bits = 26; ++const int kImm28Shift = 0; ++const int kImm28Bits = 28; ++const int kImm32Shift = 0; ++const int kImm32Bits = 32; ++ ++// ----- Miscellaneous useful masks. ++// Instruction bit masks. ++const int kRjFieldMask = ((1 << kRjBits) - 1) << kRjShift; ++const int kRkFieldMask = ((1 << kRkBits) - 1) << kRkShift; ++const int kRdFieldMask = ((1 << kRdBits) - 1) << kRdShift; ++const int kSa2FieldMask = ((1 << kSa2Bits) - 1) << kSaShift; ++const int kSa3FieldMask = ((1 << kSa3Bits) - 1) << kSaShift; ++// Misc masks. ++const int kHiMaskOf32 = 0xffff << 16; // Only to be used with 32-bit values ++const int kLoMaskOf32 = 0xffff; ++const int kSignMaskOf32 = 0x80000000; // Only to be used with 32-bit values ++const int64_t kTop16MaskOf64 = (int64_t)0xffff << 48; ++const int64_t kHigher16MaskOf64 = (int64_t)0xffff << 32; ++const int64_t kUpper16MaskOf64 = (int64_t)0xffff << 16; ++ ++const int kImm12Mask = ((1 << kImm12Bits) - 1) << kImm12Shift; ++const int kImm16Mask = ((1 << kImm16Bits) - 1) << kImm16Shift; ++const int kImm26Mask = ((1 << kImm26Bits) - 1) << kImm26Shift; ++const int kImm28Mask = ((1 << kImm28Bits) - 1) << kImm28Shift; ++ ++// ----- LA64 Opcodes and Function Fields. ++enum Opcode : uint32_t { ++ BEQZ = 0x10U << 26, ++ BNEZ = 0x11U << 26, ++ BCZ = 0x12U << 26, // BCEQZ & BCNEZ ++ JIRL = 0x13U << 26, ++ B = 0x14U << 26, ++ BL = 0x15U << 26, ++ BEQ = 0x16U << 26, ++ BNE = 0x17U << 26, ++ BLT = 0x18U << 26, ++ BGE = 0x19U << 26, ++ BLTU = 0x1aU << 26, ++ BGEU = 0x1bU << 26, ++ ++ ADDU16I_D = 0x4U << 26, ++ ++ LU12I_W = 0xaU << 25, ++ LU32I_D = 0xbU << 25, ++ PCADDI = 0xcU << 25, ++ PCALAU12I = 0xdU << 25, ++ PCADDU12I = 0xeU << 25, ++ PCADDU18I = 0xfU << 25, ++ ++ CSR = 0x4U << 24, // CSRRD & CSRWR & CSRXCHG ++ ++ LL_W = 0x20U << 24, ++ SC_W = 0x21U << 24, ++ LL_D = 0x22U << 24, ++ SC_D = 0x23U << 24, ++ LDPTR_W = 0x24U << 24, ++ STPTR_W = 0x25U << 24, ++ LDPTR_D = 0x26U << 24, ++ STPTR_D = 0x27U << 24, ++ ++ BSTR_W = 0x1U << 22, // BSTRINS_W & BSTRPICK_W ++ BSTRINS_W = BSTR_W, ++ BSTRPICK_W = BSTR_W, ++ BSTRINS_D = 0x2U << 22, ++ BSTRPICK_D = 0x3U << 22, ++ ++ SLTI = 0x8U << 22, ++ SLTUI = 0x9U << 22, ++ ADDI_W = 0xaU << 22, ++ ADDI_D = 0xbU << 22, ++ LU52I_D = 0xcU << 22, ++ ANDI = 0xdU << 22, ++ ORI = 0xeU << 22, ++ XORI = 0xfU << 22, ++ ++ CACHE = 0x18U << 22, ++ ++ LD_B = 0xa0U << 22, ++ LD_H = 0xa1U << 22, ++ LD_W = 0xa2U << 22, ++ LD_D = 0xa3U << 22, ++ ST_B = 0xa4U << 22, ++ ST_H = 0xa5U << 22, ++ ST_W = 0xa6U << 22, ++ ST_D = 0xa7U << 22, ++ LD_BU = 0xa8U << 22, ++ LD_HU = 0xa9U << 22, ++ LD_WU = 0xaaU << 22, ++ PRELD = 0xabU << 22, ++ FLD_S = 0xacU << 22, ++ FST_S = 0xadU << 22, ++ FLD_D = 0xaeU << 22, ++ FST_D = 0xafU << 22, ++ ++ FMADD_S = 0x81U << 20, ++ FMADD_D = 0x82U << 20, ++ FMSUB_S = 0x85U << 20, ++ FMSUB_D = 0x86U << 20, ++ FNMADD_S = 0x89U << 20, ++ FNMADD_D = 0x8aU << 20, ++ FNMSUB_S = 0x8dU << 20, ++ FNMSUB_D = 0x8eU << 20, ++ FCMP_COND_S = 0xc1U << 20, ++ FCMP_COND_D = 0xc2U << 20, ++ ++ BYTEPICK_D = 0x3U << 18, ++ BYTEPICK_W = 0x2U << 18, ++ ++ LDDIR = 0x190U << 18, ++ LDPTE = 0x191U << 18, ++ ++ FSEL = 0x340U << 18, ++ ++ ALSL = 0x1U << 18, ++ ALSL_W = ALSL, ++ ALSL_WU = ALSL, ++ ++ ALSL_D = 0xbU << 18, ++ ++ SLLI_W = 0x40U << 16, ++ SRLI_W = 0x44U << 16, ++ SRAI_W = 0x48U << 16, ++ ROTRI_W = 0x4cU << 16, ++ ++ SLLI_D = 0x41U << 16, ++ SRLI_D = 0x45U << 16, ++ SRAI_D = 0x49U << 16, ++ ROTRI_D = 0x4dU << 16, ++ ++ SLLI = 0x10U << 18, ++ SRLI = 0x11U << 18, ++ SRAI = 0x12U << 18, ++ ROTRI = 0x13U << 18, ++ ++ ASRTLE_D = 0x2U << 15, ++ ASRTGT_D = 0x3U << 15, ++ ++ ADD_W = 0x20U << 15, ++ ADD_D = 0x21U << 15, ++ SUB_W = 0x22U << 15, ++ SUB_D = 0x23U << 15, ++ SLT = 0x24U << 15, ++ SLTU = 0x25U << 15, ++ MASKNEZ = 0x26U << 15, ++ MASKEQZ = 0x27U << 15, ++ NOR = 0x28U << 15, ++ AND = 0x29U << 15, ++ OR = 0x2aU << 15, ++ XOR = 0x2bU << 15, ++ ORN = 0x2cU << 15, ++ ANDN = 0x2dU << 15, ++ SLL_W = 0x2eU << 15, ++ SRL_W = 0x2fU << 15, ++ SRA_W = 0x30U << 15, ++ SLL_D = 0x31U << 15, ++ SRL_D = 0x32U << 15, ++ SRA_D = 0x33U << 15, ++ ROTR_W = 0x36U << 15, ++ ROTR_D = 0x37U << 15, ++ MUL_W = 0x38U << 15, ++ MULH_W = 0x39U << 15, ++ MULH_WU = 0x3aU << 15, ++ MUL_D = 0x3bU << 15, ++ MULH_D = 0x3cU << 15, ++ MULH_DU = 0x3dU << 15, ++ MULW_D_W = 0x3eU << 15, ++ MULW_D_WU = 0x3fU << 15, ++ ++ DIV_W = 0x40U << 15, ++ MOD_W = 0x41U << 15, ++ DIV_WU = 0x42U << 15, ++ MOD_WU = 0x43U << 15, ++ DIV_D = 0x44U << 15, ++ MOD_D = 0x45U << 15, ++ DIV_DU = 0x46U << 15, ++ MOD_DU = 0x47U << 15, ++ ++ CRC_W_B_W = 0x48U << 15, ++ CRC_W_H_W = 0x49U << 15, ++ CRC_W_W_W = 0x4aU << 15, ++ CRC_W_D_W = 0x4bU << 15, ++ CRCC_W_B_W = 0x4cU << 15, ++ CRCC_W_H_W = 0x4dU << 15, ++ CRCC_W_W_W = 0x4eU << 15, ++ CRCC_W_D_W = 0x4fU << 15, ++ ++ BREAK = 0x54U << 15, ++ DBGCALL = 0x55U << 15, ++ SYSCALL = 0x56U << 15, ++ HYPCALL = 0x57U << 15, ++ ++ FADD_S = 0x201U << 15, ++ FADD_D = 0x202U << 15, ++ FSUB_S = 0x205U << 15, ++ FSUB_D = 0x206U << 15, ++ FMUL_S = 0x209U << 15, ++ FMUL_D = 0x20aU << 15, ++ FDIV_S = 0x20dU << 15, ++ FDIV_D = 0x20eU << 15, ++ FMAX_S = 0x211U << 15, ++ FMAX_D = 0x212U << 15, ++ FMIN_S = 0x215U << 15, ++ FMIN_D = 0x216U << 15, ++ FMAXA_S = 0x219U << 15, ++ FMAXA_D = 0x21aU << 15, ++ FMINA_S = 0x21dU << 15, ++ FMINA_D = 0x21eU << 15, ++ FSCALEB_S = 0x221U << 15, ++ FSCALEB_D = 0x222U << 15, ++ FCOPYSIGN_S = 0x225U << 15, ++ FCOPYSIGN_D = 0x226U << 15, ++ ++ WAIT_INVTLB = 0xc91U << 15, // wait & invtlb ++ ++ LDX_B = 0x7000U << 15, ++ LDX_H = 0x7008U << 15, ++ LDX_W = 0x7010U << 15, ++ LDX_D = 0x7018U << 15, ++ STX_B = 0x7020U << 15, ++ STX_H = 0x7028U << 15, ++ STX_W = 0x7030U << 15, ++ STX_D = 0x7038U << 15, ++ LDX_BU = 0x7040U << 15, ++ LDX_HU = 0x7048U << 15, ++ LDX_WU = 0x7050U << 15, ++ PRELDX = 0x7058U << 15, ++ FLDX_S = 0x7060U << 15, ++ FLDX_D = 0x7068U << 15, ++ FSTX_S = 0x7070U << 15, ++ FSTX_D = 0x7078U << 15, ++ ++ AMSWAP_W = 0x70c0U << 15, ++ AMSWAP_D = 0x70c1U << 15, ++ AMADD_W = 0x70c2U << 15, ++ AMADD_D = 0x70c3U << 15, ++ AMAND_W = 0x70c4U << 15, ++ AMAND_D = 0x70c5U << 15, ++ AMOR_W = 0x70c6U << 15, ++ AMOR_D = 0x70c7U << 15, ++ AMXOR_W = 0x70c8U << 15, ++ AMXOR_D = 0x70c9U << 15, ++ AMMAX_W = 0x70caU << 15, ++ AMMAX_D = 0x70cbU << 15, ++ AMMIN_W = 0x70ccU << 15, ++ AMMIN_D = 0x70cdU << 15, ++ AMMAX_WU = 0x70ceU << 15, ++ AMMAX_DU = 0x70cfU << 15, ++ AMMIN_WU = 0x70d0U << 15, ++ AMMIN_DU = 0x70d1U << 15, ++ AMSWAP_DB_W = 0x70d2U << 15, ++ AMSWAP_DB_D = 0x70d3U << 15, ++ AMADD_DB_W = 0x70d4U << 15, ++ AMADD_DB_D = 0x70d5U << 15, ++ AMAND_DB_W = 0x70d6U << 15, ++ AMAND_DB_D = 0x70d7U << 15, ++ AMOR_DB_W = 0x70d8U << 15, ++ AMOR_DB_D = 0x70d9U << 15, ++ AMXOR_DB_W = 0x70daU << 15, ++ AMXOR_DB_D = 0x70dbU << 15, ++ AMMAX_DB_W = 0x70dcU << 15, ++ AMMAX_DB_D = 0x70ddU << 15, ++ AMMIN_DB_W = 0x70deU << 15, ++ AMMIN_DB_D = 0x70dfU << 15, ++ AMMAX_DB_WU = 0x70e0U << 15, ++ AMMAX_DB_DU = 0x70e1U << 15, ++ AMMIN_DB_WU = 0x70e2U << 15, ++ AMMIN_DB_DU = 0x70e3U << 15, ++ ++ DBAR = 0x70e4U << 15, ++ IBAR = 0x70e5U << 15, ++ ++ FLDGT_S = 0x70e8U << 15, ++ FLDGT_D = 0x70e9U << 15, ++ FLDLE_S = 0x70eaU << 15, ++ FLDLE_D = 0x70ebU << 15, ++ FSTGT_S = 0x70ecU << 15, ++ FSTGT_D = 0x70edU << 15, ++ FSTLE_S = 0x70eeU << 15, ++ FSTLE_D = 0x70efU << 15, ++ LDGT_B = 0x70f0U << 15, ++ LDGT_H = 0x70f1U << 15, ++ LDGT_W = 0x70f2U << 15, ++ LDGT_D = 0x70f3U << 15, ++ LDLE_B = 0x70f4U << 15, ++ LDLE_H = 0x70f5U << 15, ++ LDLE_W = 0x70f6U << 15, ++ LDLE_D = 0x70f7U << 15, ++ STGT_B = 0x70f8U << 15, ++ STGT_H = 0x70f9U << 15, ++ STGT_W = 0x70faU << 15, ++ STGT_D = 0x70fbU << 15, ++ STLE_B = 0x70fcU << 15, ++ STLE_H = 0x70fdU << 15, ++ STLE_W = 0x70feU << 15, ++ STLE_D = 0x70ffU << 15, ++ ++ CLO_W = 0X4U << 10, ++ CLZ_W = 0X5U << 10, ++ CTO_W = 0X6U << 10, ++ CTZ_W = 0X7U << 10, ++ CLO_D = 0X8U << 10, ++ CLZ_D = 0X9U << 10, ++ CTO_D = 0XaU << 10, ++ CTZ_D = 0XbU << 10, ++ REVB_2H = 0XcU << 10, ++ REVB_4H = 0XdU << 10, ++ REVB_2W = 0XeU << 10, ++ REVB_D = 0XfU << 10, ++ REVH_2W = 0X10U << 10, ++ REVH_D = 0X11U << 10, ++ BITREV_4B = 0X12U << 10, ++ BITREV_8B = 0X13U << 10, ++ BITREV_W = 0X14U << 10, ++ BITREV_D = 0X15U << 10, ++ EXT_W_H = 0X16U << 10, ++ EXT_W_B = 0X17U << 10, ++ RDTIMEL_W = 0X18U << 10, ++ RDTIMEH_W = 0X19U << 10, ++ RDTIME_D = 0X1aU << 10, ++ CPUCFG_W = 0X1bU << 10, ++ ++ FABS_S = 0X4501U << 10, ++ FABS_D = 0X4502U << 10, ++ FNEG_S = 0X4505U << 10, ++ FNEG_D = 0X4506U << 10, ++ FLOGB_S = 0X4509U << 10, ++ FLOGB_D = 0X450aU << 10, ++ FCLASS_S = 0X450dU << 10, ++ FCLASS_D = 0X450eU << 10, ++ FSQRT_S = 0X4511U << 10, ++ FSQRT_D = 0X4512U << 10, ++ FRECIP_S = 0X4515U << 10, ++ FRECIP_D = 0X4516U << 10, ++ FRSQRT_S = 0X4519U << 10, ++ FRSQRT_D = 0X451aU << 10, ++ FMOV_S = 0X4525U << 10, ++ FMOV_D = 0X4526U << 10, ++ MOVGR2FR_W = 0X4529U << 10, ++ MOVGR2FR_D = 0X452aU << 10, ++ MOVGR2FRH_W = 0X452bU << 10, ++ MOVFR2GR_S = 0X452dU << 10, ++ MOVFR2GR_D = 0X452eU << 10, ++ MOVFRH2GR_S = 0X452fU << 10, ++ MOVGR2FCSR = 0X4530U << 10, ++ MOVFCSR2GR = 0X4532U << 10, ++ MOVFR2CF = 0X4534U << 10, ++ MOVGR2CF = 0X4536U << 10, ++ ++ FCVT_S_D = 0x4646U << 10, ++ FCVT_D_S = 0x4649U << 10, ++ FTINTRM_W_S = 0x4681U << 10, ++ FTINTRM_W_D = 0x4682U << 10, ++ FTINTRM_L_S = 0x4689U << 10, ++ FTINTRM_L_D = 0x468aU << 10, ++ FTINTRP_W_S = 0x4691U << 10, ++ FTINTRP_W_D = 0x4692U << 10, ++ FTINTRP_L_S = 0x4699U << 10, ++ FTINTRP_L_D = 0x469aU << 10, ++ FTINTRZ_W_S = 0x46a1U << 10, ++ FTINTRZ_W_D = 0x46a2U << 10, ++ FTINTRZ_L_S = 0x46a9U << 10, ++ FTINTRZ_L_D = 0x46aaU << 10, ++ FTINTRNE_W_S = 0x46b1U << 10, ++ FTINTRNE_W_D = 0x46b2U << 10, ++ FTINTRNE_L_S = 0x46b9U << 10, ++ FTINTRNE_L_D = 0x46baU << 10, ++ FTINT_W_S = 0x46c1U << 10, ++ FTINT_W_D = 0x46c2U << 10, ++ FTINT_L_S = 0x46c9U << 10, ++ FTINT_L_D = 0x46caU << 10, ++ FFINT_S_W = 0x4744U << 10, ++ FFINT_S_L = 0x4746U << 10, ++ FFINT_D_W = 0x4748U << 10, ++ FFINT_D_L = 0x474aU << 10, ++ FRINT_S = 0x4791U << 10, ++ FRINT_D = 0x4792U << 10, ++ ++ IOCSRRD_B = 0x19200U << 10, ++ IOCSRRD_H = 0x19201U << 10, ++ IOCSRRD_W = 0x19202U << 10, ++ IOCSRRD_D = 0x19203U << 10, ++ IOCSRWR_B = 0x19204U << 10, ++ IOCSRWR_H = 0x19205U << 10, ++ IOCSRWR_W = 0x19206U << 10, ++ IOCSRWR_D = 0x19207U << 10, ++ ++ MOVCF2FR = 0x4535U << 10, ++ MOVCF2GR = 0x4537U << 10, ++ ++ TLBINV = 0x06482000U, ++ TLBFLUSH = 0x06482400U, ++ TLBP = 0x06482800U, ++ TLBR = 0x06482c00U, ++ TLBWI = 0x06483000U, ++ TLBWR = 0x06483400U, ++ ERET = 0x06483800U ++}; ++ ++// ----- Emulated conditions. ++// On LA64 we use this enum to abstract from conditional branch instructions. ++// The 'U' prefix is used to specify unsigned comparisons. ++enum Condition { ++ // Any value < 0 is considered no_condition. ++ kNoCondition = -1, ++ overflow = 0, ++ no_overflow = 1, ++ Uless = 2, ++ Ugreater_equal = 3, ++ Uless_equal = 4, ++ Ugreater = 5, ++ equal = 6, ++ not_equal = 7, // Unordered or Not Equal. ++ negative = 8, ++ positive = 9, ++ parity_even = 10, ++ parity_odd = 11, ++ less = 12, ++ greater_equal = 13, ++ less_equal = 14, ++ greater = 15, ++ ueq = 16, // Unordered or Equal. ++ ogl = 17, // Ordered and Not Equal. ++ cc_always = 18, ++ ++ // Aliases. ++ carry = Uless, ++ not_carry = Ugreater_equal, ++ zero = equal, ++ eq = equal, ++ not_zero = not_equal, ++ ne = not_equal, ++ nz = not_equal, ++ sign = negative, ++ not_sign = positive, ++ mi = negative, ++ pl = positive, ++ hi = Ugreater, ++ ls = Uless_equal, ++ ge = greater_equal, ++ lt = less, ++ gt = greater, ++ le = less_equal, ++ hs = Ugreater_equal, ++ lo = Uless, ++ al = cc_always, ++ ult = Uless, ++ uge = Ugreater_equal, ++ ule = Uless_equal, ++ ugt = Ugreater, ++ cc_default = kNoCondition ++}; ++ ++// Returns the equivalent of !cc. ++// Negation of the default kNoCondition (-1) results in a non-default ++// no_condition value (-2). As long as tests for no_condition check ++// for condition < 0, this will work as expected. ++inline Condition NegateCondition(Condition cc) { ++ DCHECK(cc != cc_always); ++ return static_cast(cc ^ 1); ++} ++ ++inline Condition NegateFpuCondition(Condition cc) { ++ DCHECK(cc != cc_always); ++ switch (cc) { ++ case ult: ++ return ge; ++ case ugt: ++ return le; ++ case uge: ++ return lt; ++ case ule: ++ return gt; ++ case lt: ++ return uge; ++ case gt: ++ return ule; ++ case ge: ++ return ult; ++ case le: ++ return ugt; ++ case eq: ++ return ne; ++ case ne: ++ return eq; ++ case ueq: ++ return ogl; ++ case ogl: ++ return ueq; ++ default: ++ return cc; ++ } ++} ++ ++// ----- Coprocessor conditions. ++enum FPUCondition { ++ kNoFPUCondition = -1, ++ ++ CAF = 0x00, // False. ++ SAF = 0x01, // False. ++ CLT = 0x02, // Less Than quiet ++ // SLT = 0x03, // Less Than signaling ++ CEQ = 0x04, ++ SEQ = 0x05, ++ CLE = 0x06, ++ SLE = 0x07, ++ CUN = 0x08, ++ SUN = 0x09, ++ CULT = 0x0a, ++ SULT = 0x0b, ++ CUEQ = 0x0c, ++ SUEQ = 0x0d, ++ CULE = 0x0e, ++ SULE = 0x0f, ++ CNE = 0x10, ++ SNE = 0x11, ++ COR = 0x14, ++ SOR = 0x15, ++ CUNE = 0x18, ++ SUNE = 0x19, ++}; ++ ++const uint32_t kFPURoundingModeShift = 8; ++const uint32_t kFPURoundingModeMask = 0b11 << kFPURoundingModeShift; ++ ++// FPU rounding modes. ++enum FPURoundingMode { ++ RN = 0b00 << kFPURoundingModeShift, // Round to Nearest. ++ RZ = 0b01 << kFPURoundingModeShift, // Round towards zero. ++ RP = 0b10 << kFPURoundingModeShift, // Round towards Plus Infinity. ++ RM = 0b11 << kFPURoundingModeShift, // Round towards Minus Infinity. ++ ++ // Aliases. ++ kRoundToNearest = RN, ++ kRoundToZero = RZ, ++ kRoundToPlusInf = RP, ++ kRoundToMinusInf = RM, ++ ++ mode_round = RN, ++ mode_ceil = RP, ++ mode_floor = RM, ++ mode_trunc = RZ ++}; ++ ++enum CheckForInexactConversion { ++ kCheckForInexactConversion, ++ kDontCheckForInexactConversion ++}; ++ ++enum class MaxMinKind : int { kMin = 0, kMax = 1 }; ++ ++// ----------------------------------------------------------------------------- ++// Hints. ++ ++// Branch hints are not used on the LA64. They are defined so that they can ++// appear in shared function signatures, but will be ignored in LA64 ++// implementations. ++enum Hint { no_hint = 0 }; ++ ++inline Hint NegateHint(Hint hint) { return no_hint; } ++ ++// ----------------------------------------------------------------------------- ++// Specific instructions, constants, and masks. ++// These constants are declared in assembler-mips.cc, as they use named ++// registers and other constants. ++ ++// addi_d(sp, sp, 8) aka Pop() operation or part of Pop(r) ++// operations as post-increment of sp. ++extern const Instr kPopInstruction; ++// addi_d(sp, sp, -8) part of Push(r) operation as pre-decrement of sp. ++extern const Instr kPushInstruction; ++// St_d(r, MemOperand(sp, 0)) ++extern const Instr kPushRegPattern; ++// Ld_d(r, MemOperand(sp, 0)) ++extern const Instr kPopRegPattern; ++// extern const Instr kLwRegFpOffsetPattern; ++// extern const Instr kSwRegFpOffsetPattern; ++// extern const Instr kLwRegFpNegOffsetPattern; ++// extern const Instr kSwRegFpNegOffsetPattern; ++// A mask for the Rk register for push, pop, lw, sw instructions. ++extern const Instr kRtMask; ++// extern const Instr kLwSwInstrTypeMask; ++// extern const Instr kLwSwInstrArgumentMask; ++// extern const Instr kLwSwOffsetMask; ++ ++// Break 0xfffff, reserved for redirected real time call. ++const Instr rtCallRedirInstr = BREAK | call_rt_redirected; ++// A nop instruction. (Encoding of addi_w 0 0 0). ++const Instr nopInstr = ADDI_W; ++ ++constexpr uint8_t kInstrSize = 4; ++constexpr uint8_t kInstrSizeLog2 = 2; ++ ++class InstructionBase { ++ public: ++ enum { ++ // On Loonisa PC cannot actually be directly accessed. We behave as if PC ++ // was ++ // always the value of the current instruction being executed. ++ kPCReadOffset = 0 ++ }; ++ ++ enum Type { ++ kOp6Type, ++ kOp7Type, ++ kOp8Type, ++ kOp10Type, ++ kOp12Type, ++ kOp14Type, ++ kOp17Type, ++ kOp22Type, ++ kUnsupported = -1 ++ }; ++ ++ // Get the raw instruction bits. ++ inline Instr InstructionBits() const { ++ return *reinterpret_cast(this); ++ } ++ ++ // Set the raw instruction bits to value. ++ inline void SetInstructionBits(Instr value) { ++ *reinterpret_cast(this) = value; ++ } ++ ++ // Read one particular bit out of the instruction bits. ++ inline int Bit(int nr) const { return (InstructionBits() >> nr) & 1; } ++ ++ // Read a bit field out of the instruction bits. ++ inline int Bits(int hi, int lo) const { ++ return (InstructionBits() >> lo) & ((2U << (hi - lo)) - 1); ++ } ++ ++ // Safe to call within InstructionType(). ++ inline int RjFieldRawNoAssert() const { ++ return InstructionBits() & kRjFieldMask; ++ } ++ ++ // Get the encoding type of the instruction. ++ inline Type InstructionType() const; ++ ++ protected: ++ InstructionBase() {} ++}; ++ ++template ++class InstructionGetters : public T { ++ public: ++ inline int RjValue() const { ++ return this->Bits(kRjShift + kRjBits - 1, kRjShift); ++ } ++ ++ inline int RkValue() const { ++ return this->Bits(kRkShift + kRkBits - 1, kRkShift); ++ } ++ ++ inline int RdValue() const { ++ return this->Bits(kRdShift + kRdBits - 1, kRdShift); ++ } ++ ++ inline int Sa2Value() const { ++ return this->Bits(kSaShift + kSa2Bits - 1, kSaShift); ++ } ++ ++ inline int Sa3Value() const { ++ return this->Bits(kSaShift + kSa3Bits - 1, kSaShift); ++ } ++ ++ inline int Ui5Value() const { ++ return this->Bits(kUi5Shift + kUi5Bits - 1, kUi5Shift); ++ } ++ ++ inline int Ui6Value() const { ++ return this->Bits(kUi6Shift + kUi6Bits - 1, kUi6Shift); ++ } ++ ++ inline int Ui12Value() const { ++ return this->Bits(kUi12Shift + kUi12Bits - 1, kUi12Shift); ++ } ++ ++ inline int LsbwValue() const { ++ return this->Bits(kLsbwShift + kLsbwBits - 1, kLsbwShift); ++ } ++ ++ inline int MsbwValue() const { ++ return this->Bits(kMsbwShift + kMsbwBits - 1, kMsbwShift); ++ } ++ ++ inline int LsbdValue() const { ++ return this->Bits(kLsbdShift + kLsbdBits - 1, kLsbdShift); ++ } ++ ++ inline int MsbdValue() const { ++ return this->Bits(kMsbdShift + kMsbdBits - 1, kMsbdShift); ++ } ++ ++ inline int CondValue() const { ++ return this->Bits(kCondShift + kCondBits - 1, kCondShift); ++ } ++ ++ inline int Si12Value() const { ++ return this->Bits(kSi12Shift + kSi12Bits - 1, kSi12Shift); ++ } ++ ++ inline int Si14Value() const { ++ return this->Bits(kSi14Shift + kSi14Bits - 1, kSi14Shift); ++ } ++ ++ inline int Si16Value() const { ++ return this->Bits(kSi16Shift + kSi16Bits - 1, kSi16Shift); ++ } ++ ++ inline int Si20Value() const { ++ return this->Bits(kSi20Shift + kSi20Bits - 1, kSi20Shift); ++ } ++ ++ inline int FdValue() const { ++ return this->Bits(kFdShift + kFdBits - 1, kFdShift); ++ } ++ ++ inline int FaValue() const { ++ return this->Bits(kFaShift + kFaBits - 1, kFaShift); ++ } ++ ++ inline int FjValue() const { ++ return this->Bits(kFjShift + kFjBits - 1, kFjShift); ++ } ++ ++ inline int FkValue() const { ++ return this->Bits(kFkShift + kFkBits - 1, kFkShift); ++ } ++ ++ inline int CjValue() const { ++ return this->Bits(kCjShift + kCjBits - 1, kCjShift); ++ } ++ ++ inline int CdValue() const { ++ return this->Bits(kCdShift + kCdBits - 1, kCdShift); ++ } ++ ++ inline int CaValue() const { ++ return this->Bits(kCaShift + kCaBits - 1, kCaShift); ++ } ++ ++ inline int CodeValue() const { ++ return this->Bits(kCodeShift + kCodeBits - 1, kCodeShift); ++ } ++ ++ inline int Hint5Value() const { ++ return this->Bits(kHint5Shift + kHint5Bits - 1, kHint5Shift); ++ } ++ ++ inline int Hint15Value() const { ++ return this->Bits(kHint15Shift + kHint15Bits - 1, kHint15Shift); ++ } ++ ++ inline int Offs16Value() const { ++ return this->Bits(kOffsLowShift + kOffsLowBits - 1, kOffsLowShift); ++ } ++ ++ inline int Offs21Value() const { ++ int low = this->Bits(kOffsLowShift + kOffsLowBits - 1, kOffsLowShift); ++ int high = ++ this->Bits(kOffs21HighShift + kOffs21HighBits - 1, kOffs21HighShift); ++ return ((high << kOffsLowBits) + low); ++ } ++ ++ inline int Offs26Value() const { ++ int low = this->Bits(kOffsLowShift + kOffsLowBits - 1, kOffsLowShift); ++ int high = ++ this->Bits(kOffs26HighShift + kOffs26HighBits - 1, kOffs26HighShift); ++ return ((high << kOffsLowBits) + low); ++ } ++ ++ inline int RjFieldRaw() const { ++ return this->InstructionBits() & kRjFieldMask; ++ } ++ ++ inline int RkFieldRaw() const { ++ return this->InstructionBits() & kRkFieldMask; ++ } ++ ++ inline int RdFieldRaw() const { ++ return this->InstructionBits() & kRdFieldMask; ++ } ++ ++ inline int32_t ImmValue(int bits) const { return this->Bits(bits - 1, 0); } ++ ++ /*TODO*/ ++ inline int32_t Imm12Value() const { abort(); } ++ ++ inline int32_t Imm14Value() const { abort(); } ++ ++ inline int32_t Imm16Value() const { abort(); } ++ ++ // Say if the instruction 'links'. e.g. jal, bal. ++ bool IsLinkingInstruction() const; ++ // Say if the instruction is a break or a trap. ++ bool IsTrap() const; ++}; ++ ++class Instruction : public InstructionGetters { ++ public: ++ // Instructions are read of out a code stream. The only way to get a ++ // reference to an instruction is to convert a pointer. There is no way ++ // to allocate or create instances of class Instruction. ++ // Use the At(pc) function to create references to Instruction. ++ static Instruction* At(byte* pc) { ++ return reinterpret_cast(pc); ++ } ++ ++ private: ++ // We need to prevent the creation of instances of class Instruction. ++ DISALLOW_IMPLICIT_CONSTRUCTORS(Instruction); ++}; ++ ++// ----------------------------------------------------------------------------- ++// LA64 assembly various constants. ++ ++// C/C++ argument slots size. ++const int kCArgSlotCount = 0; ++ ++const int kCArgsSlotsSize = kCArgSlotCount * kInstrSize * 2; ++ ++const int kInvalidStackOffset = -1; ++ ++static const int kNegOffset = 0x00008000; ++ ++InstructionBase::Type InstructionBase::InstructionType() const { ++ InstructionBase::Type kType = kUnsupported; ++ ++ // Check for kOp6Type ++ switch (Bits(31, 26) << 26) { ++ case ADDU16I_D: ++ case BEQZ: ++ case BNEZ: ++ case BCZ: ++ case JIRL: ++ case B: ++ case BL: ++ case BEQ: ++ case BNE: ++ case BLT: ++ case BGE: ++ case BLTU: ++ case BGEU: ++ kType = kOp6Type; ++ break; ++ default: ++ kType = kUnsupported; ++ } ++ ++ if (kType == kUnsupported) { ++ // Check for kOp7Type ++ switch (Bits(31, 25) << 25) { ++ case LU12I_W: ++ case LU32I_D: ++ case PCADDI: ++ case PCALAU12I: ++ case PCADDU12I: ++ case PCADDU18I: ++ kType = kOp7Type; ++ break; ++ default: ++ kType = kUnsupported; ++ } ++ } ++ ++ if (kType == kUnsupported) { ++ // Check for kOp8Type ++ switch (Bits(31, 24) << 24) { ++ case LDPTR_W: ++ case STPTR_W: ++ case LDPTR_D: ++ case STPTR_D: ++ case LL_W: ++ case SC_W: ++ case LL_D: ++ case SC_D: ++ case CSR: ++ kType = kOp8Type; ++ break; ++ default: ++ kType = kUnsupported; ++ } ++ } ++ ++ if (kType == kUnsupported) { ++ // Check for kOp10Type ++ switch (Bits(31, 22) << 22) { ++ case BSTR_W: { ++ // If Bit(21) = 0, then the Opcode is not BSTR_W. ++ if (Bit(21) == 0) ++ kType = kUnsupported; ++ else ++ kType = kOp10Type; ++ break; ++ } ++ case BSTRINS_D: ++ case BSTRPICK_D: ++ case SLTI: ++ case SLTUI: ++ case ADDI_W: ++ case ADDI_D: ++ case LU52I_D: ++ case ANDI: ++ case ORI: ++ case XORI: ++ case LD_B: ++ case LD_H: ++ case LD_W: ++ case LD_D: ++ case ST_B: ++ case ST_H: ++ case ST_W: ++ case ST_D: ++ case LD_BU: ++ case LD_HU: ++ case LD_WU: ++ case PRELD: ++ case FLD_S: ++ case FST_S: ++ case FLD_D: ++ case FST_D: ++ case CACHE: ++ kType = kOp10Type; ++ break; ++ default: ++ kType = kUnsupported; ++ } ++ } ++ ++ if (kType == kUnsupported) { ++ // Check for kOp12Type ++ switch (Bits(31, 20) << 20) { ++ case FMADD_S: ++ case FMADD_D: ++ case FMSUB_S: ++ case FMSUB_D: ++ case FNMADD_S: ++ case FNMADD_D: ++ case FNMSUB_S: ++ case FNMSUB_D: ++ case FCMP_COND_S: ++ case FCMP_COND_D: ++ case FSEL: ++ kType = kOp12Type; ++ break; ++ default: ++ kType = kUnsupported; ++ } ++ } ++ ++ if (kType == kUnsupported) { ++ // Check for kOp14Type ++ switch (Bits(31, 18) << 18) { ++ case ALSL: ++ case BYTEPICK_W: ++ case BYTEPICK_D: ++ case ALSL_D: ++ case SLLI: ++ case SRLI: ++ case SRAI: ++ case ROTRI: ++ case LDDIR: ++ case LDPTE: ++ kType = kOp14Type; ++ break; ++ default: ++ kType = kUnsupported; ++ } ++ } ++ ++ if (kType == kUnsupported) { ++ // Check for kOp17Type ++ switch (Bits(31, 15) << 15) { ++ case ADD_W: ++ case ADD_D: ++ case SUB_W: ++ case SUB_D: ++ case SLT: ++ case SLTU: ++ case MASKEQZ: ++ case MASKNEZ: ++ case NOR: ++ case AND: ++ case OR: ++ case XOR: ++ case ORN: ++ case ANDN: ++ case SLL_W: ++ case SRL_W: ++ case SRA_W: ++ case SLL_D: ++ case SRL_D: ++ case SRA_D: ++ case ROTR_D: ++ case ROTR_W: ++ case MUL_W: ++ case MULH_W: ++ case MULH_WU: ++ case MUL_D: ++ case MULH_D: ++ case MULH_DU: ++ case MULW_D_W: ++ case MULW_D_WU: ++ case DIV_W: ++ case MOD_W: ++ case DIV_WU: ++ case MOD_WU: ++ case DIV_D: ++ case MOD_D: ++ case DIV_DU: ++ case MOD_DU: ++ case BREAK: ++ case FADD_S: ++ case FADD_D: ++ case FSUB_S: ++ case FSUB_D: ++ case FMUL_S: ++ case FMUL_D: ++ case FDIV_S: ++ case FDIV_D: ++ case FMAX_S: ++ case FMAX_D: ++ case FMIN_S: ++ case FMIN_D: ++ case FMAXA_S: ++ case FMAXA_D: ++ case FMINA_S: ++ case FMINA_D: ++ case LDX_B: ++ case LDX_H: ++ case LDX_W: ++ case LDX_D: ++ case STX_B: ++ case STX_H: ++ case STX_W: ++ case STX_D: ++ case LDX_BU: ++ case LDX_HU: ++ case LDX_WU: ++ case PRELDX: ++ case FLDX_S: ++ case FLDX_D: ++ case FSTX_S: ++ case FSTX_D: ++ case ASRTLE_D: ++ case ASRTGT_D: ++ case DBGCALL: ++ case SYSCALL: ++ case HYPCALL: ++ case AMSWAP_W: ++ case AMSWAP_D: ++ case AMADD_W: ++ case AMADD_D: ++ case AMAND_W: ++ case AMAND_D: ++ case AMOR_W: ++ case AMOR_D: ++ case AMXOR_W: ++ case AMXOR_D: ++ case AMMAX_W: ++ case AMMAX_D: ++ case AMMIN_W: ++ case AMMIN_D: ++ case AMMAX_WU: ++ case AMMAX_DU: ++ case AMMIN_WU: ++ case AMMIN_DU: ++ case AMSWAP_DB_W: ++ case AMSWAP_DB_D: ++ case AMADD_DB_W: ++ case AMADD_DB_D: ++ case AMAND_DB_W: ++ case AMAND_DB_D: ++ case AMOR_DB_W: ++ case AMOR_DB_D: ++ case AMXOR_DB_W: ++ case AMXOR_DB_D: ++ case AMMAX_DB_W: ++ case AMMAX_DB_D: ++ case AMMIN_DB_W: ++ case AMMIN_DB_D: ++ case AMMAX_DB_WU: ++ case AMMAX_DB_DU: ++ case AMMIN_DB_WU: ++ case AMMIN_DB_DU: ++ case DBAR: ++ case IBAR: ++ case FLDGT_S: ++ case FLDGT_D: ++ case FLDLE_S: ++ case FLDLE_D: ++ case FSTGT_S: ++ case FSTGT_D: ++ case FSTLE_S: ++ case FSTLE_D: ++ case LDGT_B: ++ case LDGT_H: ++ case LDGT_W: ++ case LDGT_D: ++ case LDLE_B: ++ case LDLE_H: ++ case LDLE_W: ++ case LDLE_D: ++ case STGT_B: ++ case STGT_H: ++ case STGT_W: ++ case STGT_D: ++ case STLE_B: ++ case STLE_H: ++ case STLE_W: ++ case STLE_D: ++ case WAIT_INVTLB: ++ case FSCALEB_S: ++ case FSCALEB_D: ++ case FCOPYSIGN_S: ++ case FCOPYSIGN_D: ++ case CRC_W_B_W: ++ case CRC_W_H_W: ++ case CRC_W_W_W: ++ case CRC_W_D_W: ++ case CRCC_W_B_W: ++ case CRCC_W_H_W: ++ case CRCC_W_W_W: ++ case CRCC_W_D_W: ++ kType = kOp17Type; ++ break; ++ default: ++ kType = kUnsupported; ++ } ++ } ++ ++ if (kType == kUnsupported) { ++ // Check for kOp22Type ++ switch (Bits(31, 10) << 10) { ++ case CLZ_W: ++ case CTZ_W: ++ case CLZ_D: ++ case CTZ_D: ++ case REVB_2H: ++ case REVB_4H: ++ case REVB_2W: ++ case REVB_D: ++ case REVH_2W: ++ case REVH_D: ++ case BITREV_4B: ++ case BITREV_8B: ++ case BITREV_W: ++ case BITREV_D: ++ case EXT_W_B: ++ case EXT_W_H: ++ case FABS_S: ++ case FABS_D: ++ case FNEG_S: ++ case FNEG_D: ++ case FSQRT_S: ++ case FSQRT_D: ++ case FMOV_S: ++ case FMOV_D: ++ case MOVGR2FR_W: ++ case MOVGR2FR_D: ++ case MOVGR2FRH_W: ++ case MOVFR2GR_S: ++ case MOVFR2GR_D: ++ case MOVFRH2GR_S: ++ case MOVGR2FCSR: ++ case MOVFCSR2GR: ++ case FCVT_S_D: ++ case FCVT_D_S: ++ case FTINTRM_W_S: ++ case FTINTRM_W_D: ++ case FTINTRM_L_S: ++ case FTINTRM_L_D: ++ case FTINTRP_W_S: ++ case FTINTRP_W_D: ++ case FTINTRP_L_S: ++ case FTINTRP_L_D: ++ case FTINTRZ_W_S: ++ case FTINTRZ_W_D: ++ case FTINTRZ_L_S: ++ case FTINTRZ_L_D: ++ case FTINTRNE_W_S: ++ case FTINTRNE_W_D: ++ case FTINTRNE_L_S: ++ case FTINTRNE_L_D: ++ case FTINT_W_S: ++ case FTINT_W_D: ++ case FTINT_L_S: ++ case FTINT_L_D: ++ case FFINT_S_W: ++ case FFINT_S_L: ++ case FFINT_D_W: ++ case FFINT_D_L: ++ case FRINT_S: ++ case FRINT_D: ++ case MOVFR2CF: ++ case MOVCF2FR: ++ case MOVGR2CF: ++ case MOVCF2GR: ++ case FRECIP_S: ++ case FRECIP_D: ++ case FRSQRT_S: ++ case FRSQRT_D: ++ case FCLASS_S: ++ case FCLASS_D: ++ case FLOGB_S: ++ case FLOGB_D: ++ case CLO_W: ++ case CTO_W: ++ case CLO_D: ++ case CTO_D: ++ case IOCSRRD_B: ++ case IOCSRRD_H: ++ case IOCSRRD_W: ++ case IOCSRRD_D: ++ case IOCSRWR_B: ++ case IOCSRWR_H: ++ case IOCSRWR_W: ++ case IOCSRWR_D: ++ case TLBINV: ++ case TLBFLUSH: ++ case TLBP: ++ case TLBR: ++ case TLBWI: ++ case TLBWR: ++ case ERET: ++ case RDTIMEL_W: ++ case RDTIMEH_W: ++ case RDTIME_D: ++ // case CPUCFG: ++ kType = kOp22Type; ++ break; ++ default: ++ kType = kUnsupported; ++ } ++ } ++ ++ return kType; ++} ++ ++// ----------------------------------------------------------------------------- ++// Instructions. ++ ++template ++bool InstructionGetters

::IsTrap() const { ++ return true; ++} ++ ++} // namespace internal ++} // namespace v8 ++ ++#endif // V8_CODEGEN_LA64_CONSTANTS_LA64_H_ +diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/cpu-la64.cc b/src/3rdparty/chromium/v8/src/codegen/la64/cpu-la64.cc +new file mode 100644 +index 00000000000..3e11a88313e +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/codegen/la64/cpu-la64.cc +@@ -0,0 +1,38 @@ ++// Copyright 2012 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++// CPU specific code for loongisa independent of OS goes here. ++ ++#include ++#include ++ ++#if V8_TARGET_ARCH_LA64 ++ ++#include "src/codegen/cpu-features.h" ++ ++namespace v8 { ++namespace internal { ++ ++void CpuFeatures::FlushICache(void* start, size_t size) { ++#if !defined(USE_SIMULATOR) ++ // Nothing to do, flushing no instructions. ++ if (size == 0) { ++ return; ++ } ++ ++#if defined(ANDROID) && !defined(__LP64__) ++ // Bionic cacheflush can typically run in userland, avoiding kernel call. ++ char* end = reinterpret_cast(start) + size; ++ cacheflush(reinterpret_cast(start), reinterpret_cast(end), ++ 0); ++#else // ANDROID ++ asm("ibar 0\n"); ++#endif // ANDROID ++#endif // !USE_SIMULATOR. ++} ++ ++} // namespace internal ++} // namespace v8 ++ ++#endif // V8_TARGET_ARCH_LA64 +diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/interface-descriptors-la64.cc b/src/3rdparty/chromium/v8/src/codegen/la64/interface-descriptors-la64.cc +new file mode 100644 +index 00000000000..b72ee4f917f +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/codegen/la64/interface-descriptors-la64.cc +@@ -0,0 +1,332 @@ ++// Copyright 2012 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#if V8_TARGET_ARCH_LA64 ++ ++#include "src/codegen/interface-descriptors.h" ++ ++#include "src/execution/frames.h" ++ ++namespace v8 { ++namespace internal { ++ ++const Register CallInterfaceDescriptor::ContextRegister() { return cp; } ++ ++void CallInterfaceDescriptor::DefaultInitializePlatformSpecific( ++ CallInterfaceDescriptorData* data, int register_parameter_count) { ++ const Register default_stub_registers[] = {a0, a1, a2, a3, a4}; ++ CHECK_LE(static_cast(register_parameter_count), ++ arraysize(default_stub_registers)); ++ data->InitializePlatformSpecific(register_parameter_count, ++ default_stub_registers); ++} ++ ++// On MIPS it is not allowed to use odd numbered floating point registers ++// (e.g. f1, f3, etc.) for parameters. This can happen if we use ++// DefaultInitializePlatformSpecific to assign float registers for parameters. ++// E.g if fourth parameter goes to float register, f7 would be assigned for ++// parameter (a3 casted to int is 7). ++bool CallInterfaceDescriptor::IsValidFloatParameterRegister(Register reg) { ++ return reg.code() % 2 == 0; ++} ++ ++void WasmI32AtomicWait32Descriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ const Register default_stub_registers[] = {a0, a1, a2, a3}; ++ CHECK_EQ(static_cast(kParameterCount), ++ arraysize(default_stub_registers)); ++ data->InitializePlatformSpecific(kParameterCount, default_stub_registers); ++} ++ ++void WasmI32AtomicWait64Descriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ const Register default_stub_registers[] = {a0, a1, a2}; ++ CHECK_EQ(static_cast(kParameterCount), ++ arraysize(default_stub_registers)); ++ data->InitializePlatformSpecific(kParameterCount, default_stub_registers); ++} ++ ++void WasmI64AtomicWait32Descriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ const Register default_stub_registers[] = {a0, a1, a2, a3, a4}; ++ CHECK_EQ(static_cast(kParameterCount - kStackArgumentsCount), ++ arraysize(default_stub_registers)); ++ data->InitializePlatformSpecific(kParameterCount - kStackArgumentsCount, ++ default_stub_registers); ++} ++ ++void WasmI64AtomicWait64Descriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ const Register default_stub_registers[] = {a0, a1, a2}; ++ CHECK_EQ(static_cast(kParameterCount), ++ arraysize(default_stub_registers)); ++ data->InitializePlatformSpecific(kParameterCount, default_stub_registers); ++} ++ ++void RecordWriteDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ const Register default_stub_registers[] = {a0, a1, a2, a3, a4}; ++ ++ data->RestrictAllocatableRegisters(default_stub_registers, ++ arraysize(default_stub_registers)); ++ ++ CHECK_LE(static_cast(kParameterCount), ++ arraysize(default_stub_registers)); ++ data->InitializePlatformSpecific(kParameterCount, default_stub_registers); ++} ++ ++void EphemeronKeyBarrierDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ const Register default_stub_registers[] = {a0, a1, a2, a3, a4}; ++ ++ data->RestrictAllocatableRegisters(default_stub_registers, ++ arraysize(default_stub_registers)); ++ ++ CHECK_LE(static_cast(kParameterCount), ++ arraysize(default_stub_registers)); ++ data->InitializePlatformSpecific(kParameterCount, default_stub_registers); ++} ++ ++const Register FastNewFunctionContextDescriptor::ScopeInfoRegister() { ++ return a1; ++} ++const Register FastNewFunctionContextDescriptor::SlotsRegister() { return a0; } ++ ++const Register LoadDescriptor::ReceiverRegister() { return a1; } ++const Register LoadDescriptor::NameRegister() { return a2; } ++const Register LoadDescriptor::SlotRegister() { return a0; } ++ ++const Register LoadWithVectorDescriptor::VectorRegister() { return a3; } ++ ++const Register StoreDescriptor::ReceiverRegister() { return a1; } ++const Register StoreDescriptor::NameRegister() { return a2; } ++const Register StoreDescriptor::ValueRegister() { return a0; } ++const Register StoreDescriptor::SlotRegister() { return a4; } ++ ++const Register StoreWithVectorDescriptor::VectorRegister() { return a3; } ++ ++const Register StoreTransitionDescriptor::SlotRegister() { return a4; } ++const Register StoreTransitionDescriptor::VectorRegister() { return a3; } ++const Register StoreTransitionDescriptor::MapRegister() { return a5; } ++ ++const Register ApiGetterDescriptor::HolderRegister() { return a0; } ++const Register ApiGetterDescriptor::CallbackRegister() { return a3; } ++ ++const Register GrowArrayElementsDescriptor::ObjectRegister() { return a0; } ++const Register GrowArrayElementsDescriptor::KeyRegister() { return a3; } ++ ++// static ++const Register TypeConversionDescriptor::ArgumentRegister() { return a0; } ++ ++void TypeofDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ Register registers[] = {a3}; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void CallTrampolineDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ // a1: target ++ // a0: number of arguments ++ Register registers[] = {a1, a0}; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void CallVarargsDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ // a0 : number of arguments (on the stack, not including receiver) ++ // a1 : the target to call ++ // a4 : arguments list length (untagged) ++ // a2 : arguments list (FixedArray) ++ Register registers[] = {a1, a0, a4, a2}; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void CallForwardVarargsDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ // a1: the target to call ++ // a0: number of arguments ++ // a2: start index (to support rest parameters) ++ Register registers[] = {a1, a0, a2}; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void CallFunctionTemplateDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ // a1 : function template info ++ // a0 : number of arguments (on the stack, not including receiver) ++ Register registers[] = {a1, a0}; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void CallWithSpreadDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ // a0 : number of arguments (on the stack, not including receiver) ++ // a1 : the target to call ++ // a2 : the object to spread ++ Register registers[] = {a1, a0, a2}; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void CallWithArrayLikeDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ // a1 : the target to call ++ // a2 : the arguments list ++ Register registers[] = {a1, a2}; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void ConstructVarargsDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ // a0 : number of arguments (on the stack, not including receiver) ++ // a1 : the target to call ++ // a3 : the new target ++ // a4 : arguments list length (untagged) ++ // a2 : arguments list (FixedArray) ++ Register registers[] = {a1, a3, a0, a4, a2}; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void ConstructForwardVarargsDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ // a1: the target to call ++ // a3: new target ++ // a0: number of arguments ++ // a2: start index (to support rest parameters) ++ Register registers[] = {a1, a3, a0, a2}; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void ConstructWithSpreadDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ // a0 : number of arguments (on the stack, not including receiver) ++ // a1 : the target to call ++ // a3 : the new target ++ // a2 : the object to spread ++ Register registers[] = {a1, a3, a0, a2}; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void ConstructWithArrayLikeDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ // a1 : the target to call ++ // a3 : the new target ++ // a2 : the arguments list ++ Register registers[] = {a1, a3, a2}; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void ConstructStubDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ // a1: target ++ // a3: new target ++ // a0: number of arguments ++ // a2: allocation site or undefined ++ Register registers[] = {a1, a3, a0, a2}; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void AbortDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ Register registers[] = {a0}; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void AllocateHeapNumberDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ // register state ++ data->InitializePlatformSpecific(0, nullptr); ++} ++ ++void CompareDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ Register registers[] = {a1, a0}; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void BinaryOpDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ Register registers[] = {a1, a0}; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void ArgumentsAdaptorDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ Register registers[] = { ++ a1, // JSFunction ++ a3, // the new target ++ a0, // actual number of arguments ++ a2, // expected number of arguments ++ }; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void ApiCallbackDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ Register registers[] = { ++ a1, // kApiFunctionAddress ++ a2, // kArgc ++ a3, // kCallData ++ a0, // kHolder ++ }; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void InterpreterDispatchDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ Register registers[] = { ++ kInterpreterAccumulatorRegister, kInterpreterBytecodeOffsetRegister, ++ kInterpreterBytecodeArrayRegister, kInterpreterDispatchTableRegister}; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void InterpreterPushArgsThenCallDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ Register registers[] = { ++ a0, // argument count (not including receiver) ++ a2, // address of first argument ++ a1 // the target callable to be call ++ }; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void InterpreterPushArgsThenConstructDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ Register registers[] = { ++ a0, // argument count (not including receiver) ++ a4, // address of the first argument ++ a1, // constructor to call ++ a3, // new target ++ a2, // allocation site feedback if available, undefined otherwise ++ }; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void ResumeGeneratorDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ Register registers[] = { ++ a0, // the value to pass to the generator ++ a1 // the JSGeneratorObject to resume ++ }; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void FrameDropperTrampolineDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ Register registers[] = { ++ a1, // loaded new FP ++ }; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++void RunMicrotasksEntryDescriptor::InitializePlatformSpecific( ++ CallInterfaceDescriptorData* data) { ++ Register registers[] = {a0, a1}; ++ data->InitializePlatformSpecific(arraysize(registers), registers); ++} ++ ++} // namespace internal ++} // namespace v8 ++ ++#endif // V8_TARGET_ARCH_LA64 +diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/macro-assembler-la64.cc b/src/3rdparty/chromium/v8/src/codegen/la64/macro-assembler-la64.cc +new file mode 100644 +index 00000000000..3fde2b9dfa7 +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/codegen/la64/macro-assembler-la64.cc +@@ -0,0 +1,3992 @@ ++// Copyright 2012 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#include // For LONG_MIN, LONG_MAX. ++ ++#if V8_TARGET_ARCH_LA64 ++ ++#include "src/base/bits.h" ++#include "src/base/division-by-constant.h" ++#include "src/codegen/assembler-inl.h" ++#include "src/codegen/callable.h" ++#include "src/codegen/code-factory.h" ++#include "src/codegen/external-reference-table.h" ++#include "src/codegen/macro-assembler.h" ++#include "src/codegen/register-configuration.h" ++#include "src/debug/debug.h" ++#include "src/execution/frames-inl.h" ++#include "src/heap/heap-inl.h" // For MemoryChunk. ++#include "src/init/bootstrapper.h" ++#include "src/logging/counters.h" ++#include "src/objects/heap-number.h" ++#include "src/runtime/runtime.h" ++#include "src/snapshot/embedded/embedded-data.h" ++#include "src/snapshot/snapshot.h" ++#include "src/wasm/wasm-code-manager.h" ++ ++// Satisfy cpplint check, but don't include platform-specific header. It is ++// included recursively via macro-assembler.h. ++#if 0 ++#include "src/codegen/la64/macro-assembler-la64.h" ++#endif ++ ++namespace v8 { ++namespace internal { ++ ++static inline bool IsZero(const Operand& rk) { ++ if (rk.is_reg()) { ++ return rk.rm() == zero_reg; ++ } else { ++ return rk.immediate() == 0; ++ } ++} ++ ++int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode, ++ Register exclusion1, ++ Register exclusion2, ++ Register exclusion3) const { ++ int bytes = 0; ++ RegList exclusions = 0; ++ if (exclusion1 != no_reg) { ++ exclusions |= exclusion1.bit(); ++ if (exclusion2 != no_reg) { ++ exclusions |= exclusion2.bit(); ++ if (exclusion3 != no_reg) { ++ exclusions |= exclusion3.bit(); ++ } ++ } ++ } ++ ++ RegList list = kJSCallerSaved & ~exclusions; ++ bytes += NumRegs(list) * kPointerSize; ++ ++ if (fp_mode == kSaveFPRegs) { ++ bytes += NumRegs(kCallerSavedFPU) * kDoubleSize; ++ } ++ ++ return bytes; ++} ++ ++int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1, ++ Register exclusion2, Register exclusion3) { ++ int bytes = 0; ++ RegList exclusions = 0; ++ if (exclusion1 != no_reg) { ++ exclusions |= exclusion1.bit(); ++ if (exclusion2 != no_reg) { ++ exclusions |= exclusion2.bit(); ++ if (exclusion3 != no_reg) { ++ exclusions |= exclusion3.bit(); ++ } ++ } ++ } ++ ++ RegList list = kJSCallerSaved & ~exclusions; ++ MultiPush(list); ++ bytes += NumRegs(list) * kPointerSize; ++ ++ if (fp_mode == kSaveFPRegs) { ++ MultiPushFPU(kCallerSavedFPU); ++ bytes += NumRegs(kCallerSavedFPU) * kDoubleSize; ++ } ++ ++ return bytes; ++} ++ ++int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1, ++ Register exclusion2, Register exclusion3) { ++ int bytes = 0; ++ if (fp_mode == kSaveFPRegs) { ++ MultiPopFPU(kCallerSavedFPU); ++ bytes += NumRegs(kCallerSavedFPU) * kDoubleSize; ++ } ++ ++ RegList exclusions = 0; ++ if (exclusion1 != no_reg) { ++ exclusions |= exclusion1.bit(); ++ if (exclusion2 != no_reg) { ++ exclusions |= exclusion2.bit(); ++ if (exclusion3 != no_reg) { ++ exclusions |= exclusion3.bit(); ++ } ++ } ++ } ++ ++ RegList list = kJSCallerSaved & ~exclusions; ++ MultiPop(list); ++ bytes += NumRegs(list) * kPointerSize; ++ ++ return bytes; ++} ++ ++void TurboAssembler::LoadRoot(Register destination, RootIndex index) { ++ Ld_d(destination, MemOperand(s6, RootRegisterOffsetForRootIndex(index))); ++} ++ ++void TurboAssembler::PushCommonFrame(Register marker_reg) { ++ if (marker_reg.is_valid()) { ++ Push(ra, fp, marker_reg); ++ Add_d(fp, sp, Operand(kPointerSize)); ++ } else { ++ Push(ra, fp); ++ mov(fp, sp); ++ } ++} ++ ++void TurboAssembler::PushStandardFrame(Register function_reg) { ++ int offset = -StandardFrameConstants::kContextOffset; ++ if (function_reg.is_valid()) { ++ Push(ra, fp, cp, function_reg); ++ offset += kPointerSize; ++ } else { ++ Push(ra, fp, cp); ++ } ++ Add_d(fp, sp, Operand(offset)); ++} ++ ++int MacroAssembler::SafepointRegisterStackIndex(int reg_code) { ++ // The registers are pushed starting with the highest encoding, ++ // which means that lowest encodings are closest to the stack pointer. ++ return kSafepointRegisterStackIndexMap[reg_code]; ++} ++ ++// Clobbers object, dst, value, and ra, if (ra_status == kRAHasBeenSaved) ++// The register 'object' contains a heap object pointer. The heap object ++// tag is shifted away. ++void MacroAssembler::RecordWriteField(Register object, int offset, ++ Register value, Register dst, ++ RAStatus ra_status, ++ SaveFPRegsMode save_fp, ++ RememberedSetAction remembered_set_action, ++ SmiCheck smi_check) { ++ DCHECK(!AreAliased(value, dst, t8, object)); ++ // First, check if a write barrier is even needed. The tests below ++ // catch stores of Smis. ++ Label done; ++ ++ // Skip barrier if writing a smi. ++ if (smi_check == INLINE_SMI_CHECK) { ++ JumpIfSmi(value, &done); ++ } ++ ++ // Although the object register is tagged, the offset is relative to the start ++ // of the object, so so offset must be a multiple of kPointerSize. ++ DCHECK(IsAligned(offset, kPointerSize)); ++ ++ Add_d(dst, object, Operand(offset - kHeapObjectTag)); ++ if (emit_debug_code()) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ Label ok; ++ And(t8, dst, Operand(kPointerSize - 1)); ++ Branch(&ok, eq, t8, Operand(zero_reg)); ++ stop(); ++ bind(&ok); ++ } ++ ++ RecordWrite(object, dst, value, ra_status, save_fp, remembered_set_action, ++ OMIT_SMI_CHECK); ++ ++ bind(&done); ++ ++ // Clobber clobbered input registers when running with the debug-code flag ++ // turned on to provoke errors. ++ if (emit_debug_code()) { ++ li(value, Operand(bit_cast(kZapValue + 4))); ++ li(dst, Operand(bit_cast(kZapValue + 8))); ++ } ++} ++ ++void TurboAssembler::SaveRegisters(RegList registers) { ++ DCHECK_GT(NumRegs(registers), 0); ++ RegList regs = 0; ++ for (int i = 0; i < Register::kNumRegisters; ++i) { ++ if ((registers >> i) & 1u) { ++ regs |= Register::from_code(i).bit(); ++ } ++ } ++ MultiPush(regs); ++} ++ ++void TurboAssembler::RestoreRegisters(RegList registers) { ++ DCHECK_GT(NumRegs(registers), 0); ++ RegList regs = 0; ++ for (int i = 0; i < Register::kNumRegisters; ++i) { ++ if ((registers >> i) & 1u) { ++ regs |= Register::from_code(i).bit(); ++ } ++ } ++ MultiPop(regs); ++} ++ ++void TurboAssembler::CallEphemeronKeyBarrier(Register object, Register address, ++ SaveFPRegsMode fp_mode) { ++ EphemeronKeyBarrierDescriptor descriptor; ++ RegList registers = descriptor.allocatable_registers(); ++ ++ SaveRegisters(registers); ++ ++ Register object_parameter( ++ descriptor.GetRegisterParameter(EphemeronKeyBarrierDescriptor::kObject)); ++ Register slot_parameter(descriptor.GetRegisterParameter( ++ EphemeronKeyBarrierDescriptor::kSlotAddress)); ++ Register fp_mode_parameter( ++ descriptor.GetRegisterParameter(EphemeronKeyBarrierDescriptor::kFPMode)); ++ ++ Push(object); ++ Push(address); ++ ++ Pop(slot_parameter); ++ Pop(object_parameter); ++ ++ Move(fp_mode_parameter, Smi::FromEnum(fp_mode)); ++ Call(isolate()->builtins()->builtin_handle(Builtins::kEphemeronKeyBarrier), ++ RelocInfo::CODE_TARGET); ++ RestoreRegisters(registers); ++} ++ ++void TurboAssembler::CallRecordWriteStub( ++ Register object, Register address, ++ RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode) { ++ CallRecordWriteStub( ++ object, address, remembered_set_action, fp_mode, ++ isolate()->builtins()->builtin_handle(Builtins::kRecordWrite), ++ kNullAddress); ++} ++ ++void TurboAssembler::CallRecordWriteStub( ++ Register object, Register address, ++ RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode, ++ Address wasm_target) { ++ CallRecordWriteStub(object, address, remembered_set_action, fp_mode, ++ Handle::null(), wasm_target); ++} ++ ++void TurboAssembler::CallRecordWriteStub( ++ Register object, Register address, ++ RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode, ++ Handle code_target, Address wasm_target) { ++ DCHECK_NE(code_target.is_null(), wasm_target == kNullAddress); ++ // TODO(albertnetymk): For now we ignore remembered_set_action and fp_mode, ++ // i.e. always emit remember set and save FP registers in RecordWriteStub. If ++ // large performance regression is observed, we should use these values to ++ // avoid unnecessary work. ++ ++ RecordWriteDescriptor descriptor; ++ RegList registers = descriptor.allocatable_registers(); ++ ++ SaveRegisters(registers); ++ Register object_parameter( ++ descriptor.GetRegisterParameter(RecordWriteDescriptor::kObject)); ++ Register slot_parameter( ++ descriptor.GetRegisterParameter(RecordWriteDescriptor::kSlot)); ++ Register remembered_set_parameter( ++ descriptor.GetRegisterParameter(RecordWriteDescriptor::kRememberedSet)); ++ Register fp_mode_parameter( ++ descriptor.GetRegisterParameter(RecordWriteDescriptor::kFPMode)); ++ ++ Push(object); ++ Push(address); ++ ++ Pop(slot_parameter); ++ Pop(object_parameter); ++ ++ Move(remembered_set_parameter, Smi::FromEnum(remembered_set_action)); ++ Move(fp_mode_parameter, Smi::FromEnum(fp_mode)); ++ if (code_target.is_null()) { ++ Call(wasm_target, RelocInfo::WASM_STUB_CALL); ++ } else { ++ Call(code_target, RelocInfo::CODE_TARGET); ++ } ++ ++ RestoreRegisters(registers); ++} ++ ++// Clobbers object, address, value, and ra, if (ra_status == kRAHasBeenSaved) ++// The register 'object' contains a heap object pointer. The heap object ++// tag is shifted away. ++void MacroAssembler::RecordWrite(Register object, Register address, ++ Register value, RAStatus ra_status, ++ SaveFPRegsMode fp_mode, ++ RememberedSetAction remembered_set_action, ++ SmiCheck smi_check) { ++ DCHECK(!AreAliased(object, address, value)); ++ ++ if (emit_debug_code()) { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ Ld_d(scratch, MemOperand(address, 0)); ++ Assert(eq, AbortReason::kWrongAddressOrValuePassedToRecordWrite, scratch, ++ Operand(value)); ++ } ++ ++ if ((remembered_set_action == OMIT_REMEMBERED_SET && ++ !FLAG_incremental_marking) || ++ FLAG_disable_write_barriers) { ++ return; ++ } ++ ++ // First, check if a write barrier is even needed. The tests below ++ // catch stores of smis and stores into the young generation. ++ Label done; ++ ++ if (smi_check == INLINE_SMI_CHECK) { ++ DCHECK_EQ(0, kSmiTag); ++ JumpIfSmi(value, &done); ++ } ++ ++ CheckPageFlag(value, ++ value, // Used as scratch. ++ MemoryChunk::kPointersToHereAreInterestingMask, eq, &done); ++ CheckPageFlag(object, ++ value, // Used as scratch. ++ MemoryChunk::kPointersFromHereAreInterestingMask, eq, &done); ++ ++ // Record the actual write. ++ if (ra_status == kRAHasNotBeenSaved) { ++ push(ra); ++ } ++ CallRecordWriteStub(object, address, remembered_set_action, fp_mode); ++ if (ra_status == kRAHasNotBeenSaved) { ++ pop(ra); ++ } ++ ++ bind(&done); ++ ++ // Clobber clobbered registers when running with the debug-code flag ++ // turned on to provoke errors. ++ if (emit_debug_code()) { ++ li(address, Operand(bit_cast(kZapValue + 12))); ++ li(value, Operand(bit_cast(kZapValue + 16))); ++ } ++} ++ ++// --------------------------------------------------------------------------- ++// Instruction macros. ++ ++void TurboAssembler::Add_w(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ add_w(rd, rj, rk.rm()); ++ } else { ++ if (is_int12(rk.immediate()) && !MustUseReg(rk.rmode())) { ++ addi_w(rd, rj, static_cast(rk.immediate())); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ add_w(rd, rj, scratch); ++ } ++ } ++} ++ ++void TurboAssembler::Add_d(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ add_d(rd, rj, rk.rm()); ++ } else { ++ if (is_int12(rk.immediate()) && !MustUseReg(rk.rmode())) { ++ addi_d(rd, rj, static_cast(rk.immediate())); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ add_d(rd, rj, scratch); ++ } ++ } ++} ++ ++void TurboAssembler::Sub_w(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ sub_w(rd, rj, rk.rm()); ++ } else { ++ DCHECK(is_int32(rk.immediate())); ++ if (is_int12(-rk.immediate()) && !MustUseReg(rk.rmode())) { ++ addi_w(rd, rj, ++ static_cast( ++ -rk.immediate())); // No subi_w instr, use addi_w(x, y, -imm). ++ } else { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ if (-rk.immediate() >> 12 == 0 && !MustUseReg(rk.rmode())) { ++ // Use load -imm and addu when loading -imm generates one instruction. ++ li(scratch, -rk.immediate()); ++ add_w(rd, rj, scratch); ++ } else { ++ // li handles the relocation. ++ li(scratch, rk); ++ sub_w(rd, rj, scratch); ++ } ++ } ++ } ++} ++ ++void TurboAssembler::Sub_d(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ sub_d(rd, rj, rk.rm()); ++ } else if (is_int12(-rk.immediate()) && !MustUseReg(rk.rmode())) { ++ addi_d(rd, rj, ++ static_cast( ++ -rk.immediate())); // No subi_d instr, use addi_d(x, y, -imm). ++ } else { ++ DCHECK(rj != t7); ++ int li_count = InstrCountForLi64Bit(rk.immediate()); ++ int li_neg_count = InstrCountForLi64Bit(-rk.immediate()); ++ if (li_neg_count < li_count && !MustUseReg(rk.rmode())) { ++ // Use load -imm and add_d when loading -imm generates one instruction. ++ DCHECK(rk.immediate() != std::numeric_limits::min()); ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ li(scratch, Operand(-rk.immediate())); ++ add_d(rd, rj, scratch); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ li(scratch, rk); ++ sub_d(rd, rj, scratch); ++ } ++ } ++} ++ ++void TurboAssembler::Mul_w(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ mul_w(rd, rj, rk.rm()); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ mul_w(rd, rj, scratch); ++ } ++} ++ ++void TurboAssembler::Mulh_w(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ mulh_w(rd, rj, rk.rm()); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ mulh_w(rd, rj, scratch); ++ } ++} ++ ++void TurboAssembler::Mulh_wu(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ mulh_wu(rd, rj, rk.rm()); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ mulh_wu(rd, rj, scratch); ++ } ++} ++ ++void TurboAssembler::Mul_d(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ mul_d(rd, rj, rk.rm()); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ mul_d(rd, rj, scratch); ++ } ++} ++ ++void TurboAssembler::Mulh_d(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ mulh_d(rd, rj, rk.rm()); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ mulh_d(rd, rj, scratch); ++ } ++} ++ ++void TurboAssembler::Div_w(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ div_w(rd, rj, rk.rm()); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ div_w(rd, rj, scratch); ++ } ++} ++ ++void TurboAssembler::Mod_w(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ mod_w(rd, rj, rk.rm()); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ mod_w(rd, rj, scratch); ++ } ++} ++ ++void TurboAssembler::Mod_wu(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ mod_wu(rd, rj, rk.rm()); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ mod_wu(rd, rj, scratch); ++ } ++} ++ ++void TurboAssembler::Div_d(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ div_d(rd, rj, rk.rm()); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ div_d(rd, rj, scratch); ++ } ++} ++ ++void TurboAssembler::Div_wu(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ div_wu(rd, rj, rk.rm()); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ div_wu(rd, rj, scratch); ++ } ++} ++ ++void TurboAssembler::Div_du(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ div_du(rd, rj, rk.rm()); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ div_du(rd, rj, scratch); ++ } ++} ++ ++void TurboAssembler::Mod_d(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ mod_d(rd, rj, rk.rm()); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ mod_d(rd, rj, scratch); ++ } ++} ++ ++void TurboAssembler::Mod_du(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ mod_du(rd, rj, rk.rm()); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ mod_du(rd, rj, scratch); ++ } ++} ++ ++void TurboAssembler::And(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ and_(rd, rj, rk.rm()); ++ } else { ++ if (is_uint12(rk.immediate()) && !MustUseReg(rk.rmode())) { ++ andi(rd, rj, static_cast(rk.immediate())); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ and_(rd, rj, scratch); ++ } ++ } ++} ++ ++void TurboAssembler::Or(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ or_(rd, rj, rk.rm()); ++ } else { ++ if (is_uint12(rk.immediate()) && !MustUseReg(rk.rmode())) { ++ ori(rd, rj, static_cast(rk.immediate())); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ or_(rd, rj, scratch); ++ } ++ } ++} ++ ++void TurboAssembler::Xor(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ xor_(rd, rj, rk.rm()); ++ } else { ++ if (is_uint12(rk.immediate()) && !MustUseReg(rk.rmode())) { ++ xori(rd, rj, static_cast(rk.immediate())); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ xor_(rd, rj, scratch); ++ } ++ } ++} ++ ++void TurboAssembler::Nor(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ nor(rd, rj, rk.rm()); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ nor(rd, rj, scratch); ++ } ++} ++ ++void TurboAssembler::Andn(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ andn(rd, rj, rk.rm()); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ andn(rd, rj, scratch); ++ } ++} ++ ++void TurboAssembler::Orn(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ orn(rd, rj, rk.rm()); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ orn(rd, rj, scratch); ++ } ++} ++ ++void TurboAssembler::Neg(Register rj, const Operand& rk) { ++ DCHECK(rk.is_reg()); ++ sub_d(rj, zero_reg, rk.rm()); ++} ++ ++void TurboAssembler::Slt(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ slt(rd, rj, rk.rm()); ++ } else { ++ if (is_int12(rk.immediate()) && !MustUseReg(rk.rmode())) { ++ slti(rd, rj, static_cast(rk.immediate())); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ // TODO why?? ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ Register scratch = temps.hasAvailable() ? temps.Acquire() : t8; ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ slt(rd, rj, scratch); ++ } ++ } ++} ++ ++void TurboAssembler::Sltu(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ sltu(rd, rj, rk.rm()); ++ } else { ++ if (is_int12(rk.immediate()) && !MustUseReg(rk.rmode())) { ++ sltui(rd, rj, static_cast(rk.immediate())); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ Register scratch = temps.hasAvailable() ? temps.Acquire() : t8; ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ sltu(rd, rj, scratch); ++ } ++ } ++} ++ ++void TurboAssembler::Sle(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ slt(rd, rk.rm(), rj); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.hasAvailable() ? temps.Acquire() : t8; ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ slt(rd, scratch, rj); ++ } ++ xori(rd, rd, 1); ++} ++ ++void TurboAssembler::Sleu(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ sltu(rd, rk.rm(), rj); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.hasAvailable() ? temps.Acquire() : t8; ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ sltu(rd, scratch, rj); ++ } ++ xori(rd, rd, 1); ++} ++ ++void TurboAssembler::Sge(Register rd, Register rj, const Operand& rk) { ++ Slt(rd, rj, rk); ++ xori(rd, rd, 1); ++} ++ ++void TurboAssembler::Sgeu(Register rd, Register rj, const Operand& rk) { ++ Sltu(rd, rj, rk); ++ xori(rd, rd, 1); ++} ++ ++void TurboAssembler::Sgt(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ slt(rd, rk.rm(), rj); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.hasAvailable() ? temps.Acquire() : t8; ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ slt(rd, scratch, rj); ++ } ++} ++ ++void TurboAssembler::Sgtu(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ sltu(rd, rk.rm(), rj); ++ } else { ++ // li handles the relocation. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.hasAvailable() ? temps.Acquire() : t8; ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ DCHECK(rj != scratch); ++ li(scratch, rk); ++ sltu(rd, scratch, rj); ++ } ++} ++ ++void TurboAssembler::Rotr_w(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ rotr_w(rd, rj, rk.rm()); ++ } else { ++ int64_t ror_value = rk.immediate() % 32; ++ if (ror_value < 0) { ++ ror_value += 32; ++ } ++ rotri_w(rd, rj, ror_value); ++ } ++} ++ ++void TurboAssembler::Rotr_d(Register rd, Register rj, const Operand& rk) { ++ if (rk.is_reg()) { ++ rotr_d(rd, rj, rk.rm()); ++ } else { ++ int64_t dror_value = rk.immediate() % 64; ++ if (dror_value < 0) dror_value += 64; ++ rotri_d(rd, rj, dror_value); ++ } ++} ++ ++void MacroAssembler::Pref(int32_t hint, const MemOperand& rj) { ++ // TODO ++ // pref(hint); ++} ++ ++void TurboAssembler::Alsl_w(Register rd, Register rj, Register rk, uint8_t sa, ++ Register scratch) { ++ DCHECK(sa >= 1 && sa <= 31); ++ if (sa <= 4) { ++ alsl_w(rd, rj, rk, sa); ++ } else { ++ Register tmp = rd == rk ? scratch : rd; ++ DCHECK(tmp != rk); ++ slli_w(tmp, rj, sa); ++ add_w(rd, rk, tmp); ++ } ++} ++ ++void TurboAssembler::Alsl_d(Register rd, Register rj, Register rk, uint8_t sa, ++ Register scratch) { ++ DCHECK(sa >= 1 && sa <= 31); ++ if (sa <= 4) { ++ alsl_d(rd, rj, rk, sa); ++ } else { ++ Register tmp = rd == rk ? scratch : rd; ++ DCHECK(tmp != rk); ++ slli_d(tmp, rj, sa); ++ add_d(rd, rk, tmp); ++ } ++} ++ ++// ------------Pseudo-instructions------------- ++ ++// Change endianness ++void TurboAssembler::ByteSwapSigned(Register dest, Register src, ++ int operand_size) { ++ DCHECK(operand_size == 2 || operand_size == 4 || operand_size == 8); ++ if (operand_size == 2) { ++ revb_2h(dest, src); ++ ext_w_h(dest, dest); ++ } else if (operand_size == 4) { ++ revb_2w(dest, src); ++ slli_w(dest, dest, 0); ++ } else { ++ revb_d(dest, dest); ++ } ++} ++ ++void TurboAssembler::ByteSwapUnsigned(Register dest, Register src, ++ int operand_size) { ++ DCHECK(operand_size == 2 || operand_size == 4); ++ if (operand_size == 2) { ++ revb_2h(dest, src); ++ bstrins_d(dest, zero_reg, 63, 16); ++ } else { ++ revb_2w(dest, src); ++ bstrins_d(dest, zero_reg, 63, 32); ++ } ++} ++ ++void TurboAssembler::Ld_b(Register rd, const MemOperand& rj) { ++ MemOperand source = rj; ++ AdjustBaseAndOffset(&source); ++ if (source.hasIndexReg()) { ++ ldx_b(rd, source.base(), source.index()); ++ } else { ++ ld_b(rd, source.base(), source.offset()); ++ } ++} ++ ++void TurboAssembler::Ld_bu(Register rd, const MemOperand& rj) { ++ MemOperand source = rj; ++ AdjustBaseAndOffset(&source); ++ if (source.hasIndexReg()) { ++ ldx_bu(rd, source.base(), source.index()); ++ } else { ++ ld_bu(rd, source.base(), source.offset()); ++ } ++} ++ ++void TurboAssembler::St_b(Register rd, const MemOperand& rj) { ++ MemOperand source = rj; ++ AdjustBaseAndOffset(&source); ++ if (source.hasIndexReg()) { ++ stx_b(rd, source.base(), source.index()); ++ } else { ++ st_b(rd, source.base(), source.offset()); ++ } ++} ++ ++void TurboAssembler::Ld_h(Register rd, const MemOperand& rj) { ++ MemOperand source = rj; ++ AdjustBaseAndOffset(&source); ++ if (source.hasIndexReg()) { ++ ldx_h(rd, source.base(), source.index()); ++ } else { ++ ld_h(rd, source.base(), source.offset()); ++ } ++} ++ ++void TurboAssembler::Ld_hu(Register rd, const MemOperand& rj) { ++ MemOperand source = rj; ++ AdjustBaseAndOffset(&source); ++ if (source.hasIndexReg()) { ++ ldx_hu(rd, source.base(), source.index()); ++ } else { ++ ld_hu(rd, source.base(), source.offset()); ++ } ++} ++ ++void TurboAssembler::St_h(Register rd, const MemOperand& rj) { ++ MemOperand source = rj; ++ AdjustBaseAndOffset(&source); ++ if (source.hasIndexReg()) { ++ stx_h(rd, source.base(), source.index()); ++ } else { ++ st_h(rd, source.base(), source.offset()); ++ } ++} ++ ++void TurboAssembler::Ld_w(Register rd, const MemOperand& rj) { ++ MemOperand source = rj; ++ AdjustBaseAndOffset(&source); // TODO ldptr_w ?? ++ if (source.hasIndexReg()) { ++ ldx_w(rd, source.base(), source.index()); ++ } else { ++ ld_w(rd, source.base(), source.offset()); ++ } ++} ++ ++void TurboAssembler::Ld_wu(Register rd, const MemOperand& rj) { ++ MemOperand source = rj; ++ AdjustBaseAndOffset(&source); ++ if (source.hasIndexReg()) { ++ ldx_wu(rd, source.base(), source.index()); ++ } else { ++ ld_wu(rd, source.base(), source.offset()); ++ } ++} ++ ++void TurboAssembler::St_w(Register rd, const MemOperand& rj) { ++ MemOperand source = rj; ++ AdjustBaseAndOffset(&source); ++ if (source.hasIndexReg()) { ++ stx_w(rd, source.base(), source.index()); ++ } else { ++ st_w(rd, source.base(), source.offset()); ++ } ++} ++ ++void TurboAssembler::Ld_d(Register rd, const MemOperand& rj) { ++ MemOperand source = rj; ++ AdjustBaseAndOffset(&source); ++ if (source.hasIndexReg()) { ++ ldx_d(rd, source.base(), source.index()); ++ } else { ++ ld_d(rd, source.base(), source.offset()); ++ } ++} ++ ++void TurboAssembler::St_d(Register rd, const MemOperand& rj) { ++ MemOperand source = rj; ++ AdjustBaseAndOffset(&source); ++ if (source.hasIndexReg()) { ++ stx_d(rd, source.base(), source.index()); ++ } else { ++ st_d(rd, source.base(), source.offset()); ++ } ++} ++ ++void TurboAssembler::Fld_s(FPURegister fd, const MemOperand& src) { ++ MemOperand tmp = src; ++ AdjustBaseAndOffset(&tmp); ++ if (tmp.hasIndexReg()) { ++ fldx_s(fd, tmp.base(), tmp.index()); ++ } else { ++ fld_s(fd, tmp.base(), tmp.offset()); ++ } ++} ++ ++void TurboAssembler::Fst_s(FPURegister fs, const MemOperand& src) { ++ MemOperand tmp = src; ++ AdjustBaseAndOffset(&tmp); ++ if (tmp.hasIndexReg()) { ++ fstx_s(fs, tmp.base(), tmp.index()); ++ } else { ++ fst_s(fs, tmp.base(), tmp.offset()); ++ } ++} ++ ++void TurboAssembler::Fld_d(FPURegister fd, const MemOperand& src) { ++ MemOperand tmp = src; ++ AdjustBaseAndOffset(&tmp); ++ if (tmp.hasIndexReg()) { ++ fldx_d(fd, tmp.base(), tmp.index()); ++ } else { ++ fld_d(fd, tmp.base(), tmp.offset()); ++ } ++} ++ ++void TurboAssembler::Fst_d(FPURegister fs, const MemOperand& src) { ++ MemOperand tmp = src; ++ AdjustBaseAndOffset(&tmp); ++ if (tmp.hasIndexReg()) { ++ fstx_d(fs, tmp.base(), tmp.index()); ++ } else { ++ fst_d(fs, tmp.base(), tmp.offset()); ++ } ++} ++ ++void TurboAssembler::Ll_w(Register rd, const MemOperand& rj) { ++ DCHECK(!rj.hasIndexReg()); ++ bool is_one_instruction = is_int14(rj.offset()); ++ if (is_one_instruction) { ++ ll_w(rd, rj.base(), rj.offset()); ++ } else { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ li(scratch, rj.offset()); ++ add_d(scratch, scratch, rj.base()); ++ ll_w(rd, scratch, 0); ++ } ++} ++ ++void TurboAssembler::Ll_d(Register rd, const MemOperand& rj) { ++ DCHECK(!rj.hasIndexReg()); ++ bool is_one_instruction = is_int14(rj.offset()); ++ if (is_one_instruction) { ++ ll_d(rd, rj.base(), rj.offset()); ++ } else { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ li(scratch, rj.offset()); ++ add_d(scratch, scratch, rj.base()); ++ ll_d(rd, scratch, 0); ++ } ++} ++ ++void TurboAssembler::Sc_w(Register rd, const MemOperand& rj) { ++ DCHECK(!rj.hasIndexReg()); ++ bool is_one_instruction = is_int14(rj.offset()); ++ if (is_one_instruction) { ++ sc_w(rd, rj.base(), rj.offset()); ++ } else { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ li(scratch, rj.offset()); ++ add_d(scratch, scratch, rj.base()); ++ sc_w(rd, scratch, 0); ++ } ++} ++ ++void TurboAssembler::Sc_d(Register rd, const MemOperand& rj) { ++ DCHECK(!rj.hasIndexReg()); ++ bool is_one_instruction = is_int14(rj.offset()); ++ if (is_one_instruction) { ++ sc_d(rd, rj.base(), rj.offset()); ++ } else { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ li(scratch, rj.offset()); ++ add_d(scratch, scratch, rj.base()); ++ sc_d(rd, scratch, 0); ++ } ++} ++ ++void TurboAssembler::li(Register dst, Handle value, LiFlags mode) { ++ // TODO(jgruber,v8:8887): Also consider a root-relative load when generating ++ // non-isolate-independent code. In many cases it might be cheaper than ++ // embedding the relocatable value. ++ if (root_array_available_ && options().isolate_independent_code) { ++ IndirectLoadConstant(dst, value); ++ return; ++ } ++ li(dst, Operand(value), mode); ++} ++ ++void TurboAssembler::li(Register dst, ExternalReference value, LiFlags mode) { ++ // TODO(jgruber,v8:8887): Also consider a root-relative load when generating ++ // non-isolate-independent code. In many cases it might be cheaper than ++ // embedding the relocatable value. ++ if (root_array_available_ && options().isolate_independent_code) { ++ IndirectLoadExternalReference(dst, value); ++ return; ++ } ++ li(dst, Operand(value), mode); ++} ++ ++void TurboAssembler::li(Register dst, const StringConstantBase* string, ++ LiFlags mode) { ++ li(dst, Operand::EmbeddedStringConstant(string), mode); ++} ++ ++static inline int InstrCountForLiLower32Bit(int64_t value) { ++ if (is_int12(static_cast(value)) || ++ is_uint12(static_cast(value)) || !(value & kImm12Mask)) { ++ return 1; ++ } else { ++ return 2; ++ } ++} ++ ++void TurboAssembler::LiLower32BitHelper(Register rd, Operand j) { ++ if (is_int12(static_cast(j.immediate()))) { ++ addi_d(rd, zero_reg, j.immediate()); ++ } else if (is_uint12(static_cast(j.immediate()))) { ++ ori(rd, zero_reg, j.immediate() & kImm12Mask); ++ } else { ++ lu12i_w(rd, j.immediate() >> 12 & 0xfffff); ++ if (j.immediate() & kImm12Mask) { ++ ori(rd, rd, j.immediate() & kImm12Mask); ++ } ++ } ++} ++ ++int TurboAssembler::InstrCountForLi64Bit(int64_t value) { ++ if (is_int32(value)) { ++ return InstrCountForLiLower32Bit(value); ++ } else if (is_int52(value)) { ++ return InstrCountForLiLower32Bit(value) + 1; ++ } else if ((value & 0xffffffffL) == 0) { ++ // 32 LSBs (Least Significant Bits) all set to zero. ++ uint8_t tzc = base::bits::CountTrailingZeros32(value >> 32); ++ uint8_t lzc = base::bits::CountLeadingZeros32(value >> 32); ++ if (tzc >= 20) { ++ return 1; ++ } else if (tzc + lzc > 12) { ++ return 2; ++ } else { ++ return 3; ++ } ++ } else { ++ int64_t imm21 = (value >> 31) & 0x1fffffL; ++ if (imm21 != 0x1fffffL && imm21 != 0) { ++ return InstrCountForLiLower32Bit(value) + 2; ++ } else { ++ return InstrCountForLiLower32Bit(value) + 1; ++ } ++ } ++ UNREACHABLE(); ++ return INT_MAX; ++} ++ ++// All changes to if...else conditions here must be added to ++// InstrCountForLi64Bit as well. ++void TurboAssembler::li_optimized(Register rd, Operand j, LiFlags mode) { ++ DCHECK(!j.is_reg()); ++ DCHECK(!MustUseReg(j.rmode())); ++ DCHECK(mode == OPTIMIZE_SIZE); ++ int64_t imm = j.immediate(); ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ // Normal load of an immediate value which does not need Relocation Info. ++ if (is_int32(imm)) { ++ LiLower32BitHelper(rd, j); ++ } else if (is_int52(imm)) { ++ LiLower32BitHelper(rd, j); ++ lu32i_d(rd, imm >> 32 & 0xfffff); ++ } else if ((imm & 0xffffffffL) == 0) { ++ // 32 LSBs (Least Significant Bits) all set to zero. ++ uint8_t tzc = base::bits::CountTrailingZeros32(imm >> 32); ++ uint8_t lzc = base::bits::CountLeadingZeros32(imm >> 32); ++ if (tzc >= 20) { ++ lu52i_d(rd, zero_reg, imm >> 52 & kImm12Mask); ++ } else if (tzc + lzc > 12) { ++ int32_t mask = (1 << (32 - tzc)) - 1; ++ lu12i_w(rd, imm >> (tzc + 32) & mask); ++ slli_d(rd, rd, tzc + 20); ++ } else { ++ xor_(rd, rd, rd); ++ lu32i_d(rd, imm >> 32 & 0xfffff); ++ lu52i_d(rd, rd, imm >> 52 & kImm12Mask); ++ } ++ } else { ++ int64_t imm21 = (imm >> 31) & 0x1fffffL; ++ LiLower32BitHelper(rd, j); ++ if (imm21 != 0x1fffffL && imm21 != 0) lu32i_d(rd, imm >> 32 & 0xfffff); ++ lu52i_d(rd, rd, imm >> 52 & kImm12Mask); ++ } ++} ++ ++void TurboAssembler::li(Register rd, Operand j, LiFlags mode) { ++ DCHECK(!j.is_reg()); ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ if (!MustUseReg(j.rmode()) && mode == OPTIMIZE_SIZE) { ++ li_optimized(rd, j, mode); ++ } else if (MustUseReg(j.rmode())) { ++ int64_t immediate; ++ if (j.IsHeapObjectRequest()) { ++ RequestHeapObject(j.heap_object_request()); ++ immediate = 0; ++ } else { ++ immediate = j.immediate(); ++ } ++ ++ RecordRelocInfo(j.rmode(), immediate); ++ lu12i_w(rd, immediate >> 12 & 0xfffff); ++ ori(rd, rd, immediate & kImm12Mask); ++ lu32i_d(rd, immediate >> 32 & 0xfffff); ++ } else if (mode == ADDRESS_LOAD) { ++ // We always need the same number of instructions as we may need to patch ++ // this code to load another value which may need all 3 instructions. ++ lu12i_w(rd, j.immediate() >> 12 & 0xfffff); ++ ori(rd, rd, j.immediate() & kImm12Mask); ++ lu32i_d(rd, j.immediate() >> 32 & 0xfffff); ++ } else { // mode == CONSTANT_SIZE - always emit the same instruction ++ // sequence. ++ lu12i_w(rd, j.immediate() >> 12 & 0xfffff); ++ ori(rd, rd, j.immediate() & kImm12Mask); ++ lu32i_d(rd, j.immediate() >> 32 & 0xfffff); ++ lu52i_d(rd, rd, j.immediate() >> 52 & kImm12Mask); ++ } ++} ++ ++void TurboAssembler::MultiPush(RegList regs) { ++ int16_t stack_offset = 0; ++ ++ for (int16_t i = kNumRegisters - 1; i >= 0; i--) { ++ if ((regs & (1 << i)) != 0) { ++ stack_offset -= kPointerSize; ++ St_d(ToRegister(i), MemOperand(sp, stack_offset)); ++ } ++ } ++ addi_d(sp, sp, stack_offset); ++} ++ ++void TurboAssembler::MultiPush(RegList regs1, RegList regs2) { ++ DCHECK_EQ(regs1 & regs2, 0); ++ int16_t stack_offset = 0; ++ ++ for (int16_t i = kNumRegisters - 1; i >= 0; i--) { ++ if ((regs1 & (1 << i)) != 0) { ++ stack_offset -= kPointerSize; ++ St_d(ToRegister(i), MemOperand(sp, stack_offset)); ++ } ++ } ++ for (int16_t i = kNumRegisters - 1; i >= 0; i--) { ++ if ((regs2 & (1 << i)) != 0) { ++ stack_offset -= kPointerSize; ++ St_d(ToRegister(i), MemOperand(sp, stack_offset)); ++ } ++ } ++ addi_d(sp, sp, stack_offset); ++} ++ ++void TurboAssembler::MultiPush(RegList regs1, RegList regs2, RegList regs3) { ++ DCHECK_EQ(regs1 & regs2, 0); ++ DCHECK_EQ(regs1 & regs3, 0); ++ DCHECK_EQ(regs2 & regs3, 0); ++ int16_t stack_offset = 0; ++ ++ for (int16_t i = kNumRegisters - 1; i >= 0; i--) { ++ if ((regs1 & (1 << i)) != 0) { ++ stack_offset -= kPointerSize; ++ St_d(ToRegister(i), MemOperand(sp, stack_offset)); ++ } ++ } ++ for (int16_t i = kNumRegisters - 1; i >= 0; i--) { ++ if ((regs2 & (1 << i)) != 0) { ++ stack_offset -= kPointerSize; ++ St_d(ToRegister(i), MemOperand(sp, stack_offset)); ++ } ++ } ++ for (int16_t i = kNumRegisters - 1; i >= 0; i--) { ++ if ((regs3 & (1 << i)) != 0) { ++ stack_offset -= kPointerSize; ++ St_d(ToRegister(i), MemOperand(sp, stack_offset)); ++ } ++ } ++ addi_d(sp, sp, stack_offset); ++} ++ ++void TurboAssembler::MultiPop(RegList regs) { ++ int16_t stack_offset = 0; ++ ++ for (int16_t i = 0; i < kNumRegisters; i++) { ++ if ((regs & (1 << i)) != 0) { ++ Ld_d(ToRegister(i), MemOperand(sp, stack_offset)); ++ stack_offset += kPointerSize; ++ } ++ } ++ addi_d(sp, sp, stack_offset); ++} ++ ++void TurboAssembler::MultiPop(RegList regs1, RegList regs2) { ++ DCHECK_EQ(regs1 & regs2, 0); ++ int16_t stack_offset = 0; ++ ++ for (int16_t i = 0; i < kNumRegisters; i++) { ++ if ((regs2 & (1 << i)) != 0) { ++ Ld_d(ToRegister(i), MemOperand(sp, stack_offset)); ++ stack_offset += kPointerSize; ++ } ++ } ++ for (int16_t i = 0; i < kNumRegisters; i++) { ++ if ((regs1 & (1 << i)) != 0) { ++ Ld_d(ToRegister(i), MemOperand(sp, stack_offset)); ++ stack_offset += kPointerSize; ++ } ++ } ++ addi_d(sp, sp, stack_offset); ++} ++ ++void TurboAssembler::MultiPop(RegList regs1, RegList regs2, RegList regs3) { ++ DCHECK_EQ(regs1 & regs2, 0); ++ DCHECK_EQ(regs1 & regs3, 0); ++ DCHECK_EQ(regs2 & regs3, 0); ++ int16_t stack_offset = 0; ++ ++ for (int16_t i = 0; i < kNumRegisters; i++) { ++ if ((regs3 & (1 << i)) != 0) { ++ Ld_d(ToRegister(i), MemOperand(sp, stack_offset)); ++ stack_offset += kPointerSize; ++ } ++ } ++ for (int16_t i = 0; i < kNumRegisters; i++) { ++ if ((regs2 & (1 << i)) != 0) { ++ Ld_d(ToRegister(i), MemOperand(sp, stack_offset)); ++ stack_offset += kPointerSize; ++ } ++ } ++ for (int16_t i = 0; i < kNumRegisters; i++) { ++ if ((regs1 & (1 << i)) != 0) { ++ Ld_d(ToRegister(i), MemOperand(sp, stack_offset)); ++ stack_offset += kPointerSize; ++ } ++ } ++ addi_d(sp, sp, stack_offset); ++} ++ ++void TurboAssembler::MultiPushFPU(RegList regs) { ++ int16_t num_to_push = base::bits::CountPopulation(regs); ++ int16_t stack_offset = num_to_push * kDoubleSize; ++ ++ Sub_d(sp, sp, Operand(stack_offset)); ++ for (int16_t i = kNumRegisters - 1; i >= 0; i--) { ++ if ((regs & (1 << i)) != 0) { ++ stack_offset -= kDoubleSize; ++ Fst_d(FPURegister::from_code(i), MemOperand(sp, stack_offset)); ++ } ++ } ++} ++ ++void TurboAssembler::MultiPopFPU(RegList regs) { ++ int16_t stack_offset = 0; ++ ++ for (int16_t i = 0; i < kNumRegisters; i++) { ++ if ((regs & (1 << i)) != 0) { ++ Fld_d(FPURegister::from_code(i), MemOperand(sp, stack_offset)); ++ stack_offset += kDoubleSize; ++ } ++ } ++ addi_d(sp, sp, stack_offset); ++} ++ ++void TurboAssembler::Bstrpick_w(Register rk, Register rj, uint16_t msbw, ++ uint16_t lsbw) { ++ DCHECK_LT(lsbw, msbw); ++ DCHECK_LT(lsbw, 32); ++ DCHECK_LT(msbw, 32); ++ bstrpick_w(rk, rj, msbw, lsbw); ++} ++ ++void TurboAssembler::Bstrpick_d(Register rk, Register rj, uint16_t msbw, ++ uint16_t lsbw) { ++ DCHECK_LT(lsbw, msbw); ++ DCHECK_LT(lsbw, 64); ++ DCHECK_LT(msbw, 64); ++ bstrpick_d(rk, rj, msbw, lsbw); ++} ++ ++void TurboAssembler::Neg_s(FPURegister fd, FPURegister fj) { fneg_s(fd, fj); } ++ ++void TurboAssembler::Neg_d(FPURegister fd, FPURegister fj) { fneg_d(fd, fj); } ++ ++void TurboAssembler::Ffint_d_uw(FPURegister fd, FPURegister fj) { ++ // Move the data from fs to t8. ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ movfr2gr_s(t8, fj); ++ Ffint_d_uw(fd, t8); ++} ++ ++void TurboAssembler::Ffint_d_uw(FPURegister fd, Register rj) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ ++ // Convert rj to a FP value in fd. ++ DCHECK(rj != t7); ++ ++ // Zero extend int32 in rj. ++ Bstrpick_d(t7, rj, 31, 0); ++ movgr2fr_d(fd, t7); ++ ffint_d_l(fd, fd); ++} ++ ++void TurboAssembler::Ffint_d_ul(FPURegister fd, FPURegister fj) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ // Move the data from fs to t8. ++ movfr2gr_d(t8, fj); ++ Ffint_d_ul(fd, t8); ++} ++ ++void TurboAssembler::Ffint_d_ul(FPURegister fd, Register rj) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ // Convert rj to a FP value in fd. ++ ++ DCHECK(rj != t7); ++ ++ Label msb_clear, conversion_done; ++ ++ Branch(&msb_clear, ge, rj, Operand(zero_reg)); ++ ++ // Rj >= 2^63 ++ andi(t7, rj, 1); ++ srli_d(rj, rj, 1); ++ or_(t7, t7, rj); ++ movgr2fr_d(fd, t7); ++ ffint_d_l(fd, fd); ++ fadd_d(fd, fd, fd); ++ Branch(&conversion_done); ++ ++ bind(&msb_clear); ++ // Rs < 2^63, we can do simple conversion. ++ movgr2fr_d(fd, rj); ++ ffint_d_l(fd, fd); ++ ++ bind(&conversion_done); ++} ++ ++void TurboAssembler::Ffint_s_uw(FPURegister fd, FPURegister fj) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ // Move the data from fs to t8. ++ movfr2gr_d(t8, fj); ++ Ffint_s_uw(fd, t8); ++} ++ ++void TurboAssembler::Ffint_s_uw(FPURegister fd, Register rj) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ // Convert rj to a FP value in fd. ++ DCHECK(rj != t7); ++ ++ // Zero extend int32 in rj. ++ bstrpick_d(t7, rj, 31, 0); ++ movgr2fr_d(fd, t7); ++ ffint_s_l(fd, fd); ++} ++ ++void TurboAssembler::Ffint_s_ul(FPURegister fd, FPURegister fj) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ // Move the data from fs to t8. ++ movfr2gr_d(t8, fj); ++ Ffint_s_ul(fd, t8); ++} ++ ++void TurboAssembler::Ffint_s_ul(FPURegister fd, Register rj) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ // Convert rj to a FP value in fd. ++ ++ DCHECK(rj != t7); ++ ++ Label positive, conversion_done; ++ ++ Branch(&positive, ge, rj, Operand(zero_reg)); ++ ++ // Rs >= 2^31. ++ andi(t7, rj, 1); ++ srli_d(rj, rj, 1); ++ or_(t7, t7, rj); ++ movgr2fr_d(fd, t7); ++ ffint_s_l(fd, fd); ++ fadd_s(fd, fd, fd); ++ Branch(&conversion_done); ++ ++ bind(&positive); ++ // Rs < 2^31, we can do simple conversion. ++ movgr2fr_d(fd, rj); ++ ffint_s_l(fd, fd); ++ ++ bind(&conversion_done); ++} ++ ++void MacroAssembler::Ftintrne_l_d(FPURegister fd, FPURegister fj) { ++ ftintrne_l_d(fd, fj); ++} ++ ++void MacroAssembler::Ftintrm_l_d(FPURegister fd, FPURegister fj) { ++ ftintrm_l_d(fd, fj); ++} ++ ++void MacroAssembler::Ftintrp_l_d(FPURegister fd, FPURegister fj) { ++ ftintrp_l_d(fd, fj); ++} ++ ++void MacroAssembler::Ftintrz_l_d(FPURegister fd, FPURegister fj) { ++ ftintrz_l_d(fd, fj); ++} ++ ++void MacroAssembler::Ftintrz_l_ud(FPURegister fd, FPURegister fj, ++ FPURegister scratch) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ // Load to GPR. ++ movfr2gr_d(t8, fj); ++ // Reset sign bit. ++ { ++ UseScratchRegisterScope temps(this); ++ Register scratch1 = temps.Acquire(); ++ li(scratch1, 0x7FFFFFFFFFFFFFFFl); ++ and_(t8, t8, scratch1); ++ } ++ movgr2fr_d(scratch, t8); ++ Ftintrz_l_d(fd, scratch); ++} ++ ++void TurboAssembler::Ftintrz_uw_d(FPURegister fd, FPURegister fj, ++ FPURegister scratch) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ Ftintrz_uw_d(t8, fj, scratch); ++ movgr2fr_w(fd, t8); ++} ++ ++void TurboAssembler::Ftintrz_uw_s(FPURegister fd, FPURegister fj, ++ FPURegister scratch) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ Ftintrz_uw_s(t8, fj, scratch); ++ movgr2fr_w(fd, t8); ++} ++ ++void TurboAssembler::Ftintrz_ul_d(FPURegister fd, FPURegister fj, ++ FPURegister scratch, Register result) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ Ftintrz_ul_d(t8, fj, scratch, result); ++ movgr2fr_d(fd, t8); ++} ++ ++void TurboAssembler::Ftintrz_ul_s(FPURegister fd, FPURegister fj, ++ FPURegister scratch, Register result) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ Ftintrz_ul_s(t8, fj, scratch, result); ++ movgr2fr_d(fd, t8); ++} ++ ++void MacroAssembler::Ftintrz_w_d(FPURegister fd, FPURegister fj) { ++ ftintrz_w_d(fd, fj); ++} ++ ++void MacroAssembler::Ftintrne_w_d(FPURegister fd, FPURegister fj) { ++ ftintrne_w_d(fd, fj); ++} ++ ++void MacroAssembler::Ftintrm_w_d(FPURegister fd, FPURegister fj) { ++ ftintrm_w_d(fd, fj); ++} ++ ++void MacroAssembler::Ftintrp_w_d(FPURegister fd, FPURegister fj) { ++ ftintrp_w_d(fd, fj); ++} ++ ++void TurboAssembler::Ftintrz_uw_d(Register rd, FPURegister fj, ++ FPURegister scratch) { ++ DCHECK(fj != scratch); ++ DCHECK(rd != t7); ++ ++ { ++ // Load 2^31 into scratch as its float representation. ++ UseScratchRegisterScope temps(this); ++ Register scratch1 = temps.Acquire(); ++ li(scratch1, 0x41E00000); ++ movgr2fr_w(scratch, zero_reg); ++ movgr2frh_w(scratch, scratch1); ++ } ++ // Test if scratch > fd. ++ // If fd < 2^31 we can convert it normally. ++ Label simple_convert; ++ CompareF64(fj, scratch, CLT); ++ BranchTrueShortF(&simple_convert); ++ ++ // First we subtract 2^31 from fd, then trunc it to rs ++ // and add 2^31 to rj. ++ fsub_d(scratch, fj, scratch); ++ ftintrz_w_d(scratch, scratch); ++ movfr2gr_s(rd, scratch); ++ Or(rd, rd, 1 << 31); ++ ++ Label done; ++ Branch(&done); ++ // Simple conversion. ++ bind(&simple_convert); ++ ftintrz_w_d(scratch, fj); ++ movfr2gr_s(rd, scratch); ++ ++ bind(&done); ++} ++ ++void TurboAssembler::Ftintrz_uw_s(Register rd, FPURegister fj, ++ FPURegister scratch) { ++ DCHECK(fj != scratch); ++ DCHECK(rd != t7); ++ { ++ // Load 2^31 into scratch as its float representation. ++ UseScratchRegisterScope temps(this); ++ Register scratch1 = temps.Acquire(); ++ li(scratch1, 0x4F000000); ++ movgr2fr_w(scratch, scratch1); ++ } ++ // Test if scratch > fs. ++ // If fs < 2^31 we can convert it normally. ++ Label simple_convert; ++ CompareF32(fj, scratch, CLT); ++ BranchTrueShortF(&simple_convert); ++ ++ // First we subtract 2^31 from fs, then trunc it to rd ++ // and add 2^31 to rd. ++ fsub_s(scratch, fj, scratch); ++ ftintrz_w_s(scratch, scratch); ++ movfr2gr_s(rd, scratch); ++ Or(rd, rd, 1 << 31); ++ ++ Label done; ++ Branch(&done); ++ // Simple conversion. ++ bind(&simple_convert); ++ ftintrz_w_s(scratch, fj); ++ movfr2gr_s(rd, scratch); ++ ++ bind(&done); ++} ++ ++void TurboAssembler::Ftintrz_ul_d(Register rd, FPURegister fj, ++ FPURegister scratch, Register result) { ++ DCHECK(fj != scratch); ++ DCHECK(result.is_valid() ? !AreAliased(rd, result, t7) : !AreAliased(rd, t7)); ++ ++ Label simple_convert, done, fail; ++ if (result.is_valid()) { ++ mov(result, zero_reg); ++ Move(scratch, -1.0); ++ // If fd =< -1 or unordered, then the conversion fails. ++ CompareF64(fj, scratch, CLE); ++ BranchTrueShortF(&fail); ++ CompareIsNanF64(fj, scratch); ++ BranchTrueShortF(&fail); ++ } ++ ++ // Load 2^63 into scratch as its double representation. ++ li(t7, 0x43E0000000000000); ++ movgr2fr_d(scratch, t7); ++ ++ // Test if scratch > fs. ++ // If fs < 2^63 we can convert it normally. ++ CompareF64(fj, scratch, CLT); ++ BranchTrueShortF(&simple_convert); ++ ++ // First we subtract 2^63 from fs, then trunc it to rd ++ // and add 2^63 to rd. ++ fsub_d(scratch, fj, scratch); ++ ftintrz_l_d(scratch, scratch); ++ movfr2gr_d(rd, scratch); ++ Or(rd, rd, Operand(1UL << 63)); ++ Branch(&done); ++ ++ // Simple conversion. ++ bind(&simple_convert); ++ ftintrz_l_d(scratch, fj); ++ movfr2gr_d(rd, scratch); ++ ++ bind(&done); ++ if (result.is_valid()) { ++ // Conversion is failed if the result is negative. ++ { ++ UseScratchRegisterScope temps(this); ++ Register scratch1 = temps.Acquire(); ++ addi_d(scratch1, zero_reg, -1); ++ srli_d(scratch1, scratch1, 1); // Load 2^62. ++ movfr2gr_d(result, scratch); ++ xor_(result, result, scratch1); ++ } ++ Slt(result, zero_reg, result); ++ } ++ ++ bind(&fail); ++} ++ ++void TurboAssembler::Ftintrz_ul_s(Register rd, FPURegister fj, ++ FPURegister scratch, Register result) { ++ DCHECK(fj != scratch); ++ DCHECK(result.is_valid() ? !AreAliased(rd, result, t7) : !AreAliased(rd, t7)); ++ ++ Label simple_convert, done, fail; ++ if (result.is_valid()) { ++ mov(result, zero_reg); ++ Move(scratch, -1.0f); ++ // If fd =< -1 or unordered, then the conversion fails. ++ CompareF32(fj, scratch, CLE); ++ BranchTrueShortF(&fail); ++ CompareIsNanF32(fj, scratch); ++ BranchTrueShortF(&fail); ++ } ++ ++ { ++ // Load 2^63 into scratch as its float representation. ++ UseScratchRegisterScope temps(this); ++ Register scratch1 = temps.Acquire(); ++ li(scratch1, 0x5F000000); ++ movgr2fr_w(scratch, scratch1); ++ } ++ ++ // Test if scratch > fs. ++ // If fs < 2^63 we can convert it normally. ++ CompareF32(fj, scratch, CLT); ++ BranchTrueShortF(&simple_convert); ++ ++ // First we subtract 2^63 from fs, then trunc it to rd ++ // and add 2^63 to rd. ++ fsub_s(scratch, fj, scratch); ++ ftintrz_l_s(scratch, scratch); ++ movfr2gr_d(rd, scratch); ++ Or(rd, rd, Operand(1UL << 63)); ++ Branch(&done); ++ ++ // Simple conversion. ++ bind(&simple_convert); ++ ftintrz_l_s(scratch, fj); ++ movfr2gr_d(rd, scratch); ++ ++ bind(&done); ++ if (result.is_valid()) { ++ // Conversion is failed if the result is negative or unordered. ++ { ++ UseScratchRegisterScope temps(this); ++ Register scratch1 = temps.Acquire(); ++ addi_d(scratch1, zero_reg, -1); ++ srli_d(scratch1, scratch1, 1); // Load 2^62. ++ movfr2gr_d(result, scratch); ++ xor_(result, result, scratch1); ++ } ++ Slt(result, zero_reg, result); ++ } ++ ++ bind(&fail); ++} ++ ++void TurboAssembler::RoundDouble(FPURegister dst, FPURegister src, ++ FPURoundingMode mode) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ Register scratch = t8; ++ movfcsr2gr(scratch); ++ li(t7, Operand(mode)); ++ movgr2fcsr(t7); ++ frint_d(dst, src); ++ movgr2fcsr(scratch); ++} ++ ++void TurboAssembler::Floor_d(FPURegister dst, FPURegister src) { ++ RoundDouble(dst, src, mode_floor); ++} ++ ++void TurboAssembler::Ceil_d(FPURegister dst, FPURegister src) { ++ RoundDouble(dst, src, mode_ceil); ++} ++ ++void TurboAssembler::Trunc_d(FPURegister dst, FPURegister src) { ++ RoundDouble(dst, src, mode_trunc); ++} ++ ++void TurboAssembler::Round_d(FPURegister dst, FPURegister src) { ++ RoundDouble(dst, src, mode_round); ++} ++ ++void TurboAssembler::RoundFloat(FPURegister dst, FPURegister src, ++ FPURoundingMode mode) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ Register scratch = t8; ++ movfcsr2gr(scratch); ++ li(t7, Operand(mode)); ++ movgr2fcsr(t7); ++ frint_s(dst, src); ++ movgr2fcsr(scratch); ++} ++ ++void TurboAssembler::Floor_s(FPURegister dst, FPURegister src) { ++ RoundFloat(dst, src, mode_floor); ++} ++ ++void TurboAssembler::Ceil_s(FPURegister dst, FPURegister src) { ++ RoundFloat(dst, src, mode_ceil); ++} ++ ++void TurboAssembler::Trunc_s(FPURegister dst, FPURegister src) { ++ RoundFloat(dst, src, mode_trunc); ++} ++ ++void TurboAssembler::Round_s(FPURegister dst, FPURegister src) { ++ RoundFloat(dst, src, mode_round); ++} ++ ++void TurboAssembler::CompareF(FPURegister cmp1, FPURegister cmp2, ++ FPUCondition cc, CFRegister cd, bool f32) { ++ if (f32) { ++ fcmp_cond_s(cc, cmp1, cmp2, cd); ++ } else { ++ fcmp_cond_d(cc, cmp1, cmp2, cd); ++ } ++} ++ ++void TurboAssembler::CompareIsNanF(FPURegister cmp1, FPURegister cmp2, ++ CFRegister cd, bool f32) { ++ CompareF(cmp1, cmp2, CUN, cd, f32); ++} ++ ++void TurboAssembler::BranchTrueShortF(Label* target, CFRegister cj) { ++ bcnez(cj, target); ++} ++ ++void TurboAssembler::BranchFalseShortF(Label* target, CFRegister cj) { ++ bceqz(cj, target); ++} ++ ++void TurboAssembler::BranchTrueF(Label* target, CFRegister cj) { ++ // TODO can be optimzed ++ bool long_branch = target->is_bound() ++ ? !is_near(target, OffsetSize::kOffset21) ++ : is_trampoline_emitted(); ++ if (long_branch) { ++ Label skip; ++ BranchFalseShortF(&skip, cj); ++ Branch(target); ++ bind(&skip); ++ } else { ++ BranchTrueShortF(target, cj); ++ } ++} ++ ++void TurboAssembler::BranchFalseF(Label* target, CFRegister cj) { ++ bool long_branch = target->is_bound() ++ ? !is_near(target, OffsetSize::kOffset21) ++ : is_trampoline_emitted(); ++ if (long_branch) { ++ Label skip; ++ BranchTrueShortF(&skip, cj); ++ Branch(target); ++ bind(&skip); ++ } else { ++ BranchFalseShortF(target, cj); ++ } ++} ++ ++void TurboAssembler::FmoveLow(FPURegister dst, Register src_low) { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ DCHECK(src_low != scratch); ++ movfrh2gr_s(scratch, dst); ++ movgr2fr_w(dst, src_low); ++ movgr2frh_w(dst, scratch); ++} ++ ++void TurboAssembler::Move(FPURegister dst, uint32_t src) { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ li(scratch, Operand(static_cast(src))); ++ movgr2fr_w(dst, scratch); ++} ++ ++void TurboAssembler::Move(FPURegister dst, uint64_t src) { ++ // Handle special values first. ++ if (src == bit_cast(0.0) && has_double_zero_reg_set_) { ++ fmov_d(dst, kDoubleRegZero); ++ } else if (src == bit_cast(-0.0) && has_double_zero_reg_set_) { ++ Neg_d(dst, kDoubleRegZero); ++ } else { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ li(scratch, Operand(static_cast(src))); ++ movgr2fr_d(dst, scratch); ++ if (dst == kDoubleRegZero) has_double_zero_reg_set_ = true; ++ } ++} ++ ++void TurboAssembler::Movz(Register rd, Register rj, Register rk) { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ maskeqz(scratch, rj, rk); ++ masknez(rd, rd, rk); ++ or_(rd, rd, scratch); ++} ++ ++void TurboAssembler::Movn(Register rd, Register rj, Register rk) { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ masknez(scratch, rj, rk); ++ maskeqz(rd, rd, rk); ++ or_(rd, rd, scratch); ++} ++ ++void TurboAssembler::LoadZeroOnCondition(Register rd, Register rj, ++ const Operand& rk, Condition cond) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ switch (cond) { ++ case cc_always: ++ mov(rd, zero_reg); ++ break; ++ case eq: ++ if (rj == zero_reg) { ++ if (rk.is_reg()) { ++ LoadZeroIfConditionZero(rd, rk.rm()); ++ } else { ++ if (rk.immediate() == 0) { ++ mov(rd, zero_reg); ++ } else { ++ // nop(); ++ } ++ } ++ } else if (IsZero(rk)) { ++ LoadZeroIfConditionZero(rd, rj); ++ } else { ++ Sub_d(t7, rj, rk); ++ LoadZeroIfConditionZero(rd, t7); ++ } ++ break; ++ case ne: ++ if (rj == zero_reg) { ++ if (rk.is_reg()) { ++ LoadZeroIfConditionNotZero(rd, rk.rm()); ++ } else { ++ if (rk.immediate() != 0) { ++ mov(rd, zero_reg); ++ } else { ++ // nop(); ++ } ++ } ++ } else if (IsZero(rk)) { ++ LoadZeroIfConditionNotZero(rd, rj); ++ } else { ++ Sub_d(t7, rj, rk); ++ LoadZeroIfConditionNotZero(rd, t7); ++ } ++ break; ++ ++ // Signed comparison. ++ case greater: ++ Sgt(t7, rj, rk); ++ LoadZeroIfConditionNotZero(rd, t7); ++ break; ++ case greater_equal: ++ Sge(t7, rj, rk); ++ LoadZeroIfConditionNotZero(rd, t7); ++ // rj >= rk ++ break; ++ case less: ++ Slt(t7, rj, rk); ++ LoadZeroIfConditionNotZero(rd, t7); ++ // rj < rk ++ break; ++ case less_equal: ++ Sle(t7, rj, rk); ++ LoadZeroIfConditionNotZero(rd, t7); ++ // rj <= rk ++ break; ++ ++ // Unsigned comparison. ++ case Ugreater: ++ Sgtu(t7, rj, rk); ++ LoadZeroIfConditionNotZero(rd, t7); ++ // rj > rk ++ break; ++ ++ case Ugreater_equal: ++ Sgeu(t7, rj, rk); ++ LoadZeroIfConditionNotZero(rd, t7); ++ // rj >= rk ++ break; ++ case Uless: ++ Sltu(t7, rj, rk); ++ LoadZeroIfConditionNotZero(rd, t7); ++ // rj < rk ++ break; ++ case Uless_equal: ++ Sleu(t7, rj, rk); ++ LoadZeroIfConditionNotZero(rd, t7); ++ // rj <= rk ++ break; ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++void TurboAssembler::LoadZeroIfConditionNotZero(Register dest, ++ Register condition) { ++ maskeqz(dest, dest, condition); ++} ++ ++void TurboAssembler::LoadZeroIfConditionZero(Register dest, ++ Register condition) { ++ masknez(dest, dest, condition); ++} ++ ++void TurboAssembler::LoadZeroIfFPUCondition(Register dest, CFRegister cc) { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ movcf2gr(scratch, cc); ++ LoadZeroIfConditionNotZero(dest, scratch); ++} ++ ++void TurboAssembler::LoadZeroIfNotFPUCondition(Register dest, CFRegister cc) { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ movcf2gr(scratch, cc); ++ LoadZeroIfConditionZero(dest, scratch); ++} ++ ++void TurboAssembler::Clz_w(Register rd, Register rj) { clz_w(rd, rj); } ++ ++void TurboAssembler::Clz_d(Register rd, Register rj) { clz_d(rd, rj); } ++ ++void TurboAssembler::Ctz_w(Register rd, Register rj) { ctz_w(rd, rj); } ++ ++void TurboAssembler::Ctz_d(Register rd, Register rj) { ctz_d(rd, rj); } ++ ++// TODO: Optimize like arm64, use simd instruction ++void TurboAssembler::Popcnt_w(Register rd, Register rj) { ++ // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel ++ // ++ // A generalization of the best bit counting method to integers of ++ // bit-widths up to 128 (parameterized by type T) is this: ++ // ++ // v = v - ((v >> 1) & (T)~(T)0/3); // temp ++ // v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3); // temp ++ // v = (v + (v >> 4)) & (T)~(T)0/255*15; // temp ++ // c = (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * BITS_PER_BYTE; //count ++ // ++ // For comparison, for 32-bit quantities, this algorithm can be executed ++ // using 20 MIPS instructions (the calls to LoadConst32() generate two ++ // machine instructions each for the values being used in this algorithm). ++ // A(n unrolled) loop-based algorithm requires 25 instructions. ++ // ++ // For a 64-bit operand this can be performed in 24 instructions compared ++ // to a(n unrolled) loop based algorithm which requires 38 instructions. ++ // ++ // There are algorithms which are faster in the cases where very few ++ // bits are set but the algorithm here attempts to minimize the total ++ // number of instructions executed even when a large number of bits ++ // are set. ++ int32_t B0 = 0x55555555; // (T)~(T)0/3 ++ int32_t B1 = 0x33333333; // (T)~(T)0/15*3 ++ int32_t B2 = 0x0F0F0F0F; // (T)~(T)0/255*15 ++ int32_t value = 0x01010101; // (T)~(T)0/255 ++ uint32_t shift = 24; // (sizeof(T) - 1) * BITS_PER_BYTE ++ ++ UseScratchRegisterScope temps(this); ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ Register scratch = temps.Acquire(); ++ Register scratch2 = t8; ++ srli_w(scratch, rj, 1); ++ li(scratch2, B0); ++ And(scratch, scratch, scratch2); ++ Sub_w(scratch, rj, scratch); ++ li(scratch2, B1); ++ And(rd, scratch, scratch2); ++ srli_w(scratch, scratch, 2); ++ And(scratch, scratch, scratch2); ++ Add_w(scratch, rd, scratch); ++ srli_w(rd, scratch, 4); ++ Add_w(rd, rd, scratch); ++ li(scratch2, B2); ++ And(rd, rd, scratch2); ++ li(scratch, value); ++ Mul_w(rd, rd, scratch); ++ srli_w(rd, rd, shift); ++} ++ ++void TurboAssembler::Popcnt_d(Register rd, Register rj) { ++ int64_t B0 = 0x5555555555555555l; // (T)~(T)0/3 ++ int64_t B1 = 0x3333333333333333l; // (T)~(T)0/15*3 ++ int64_t B2 = 0x0F0F0F0F0F0F0F0Fl; // (T)~(T)0/255*15 ++ int64_t value = 0x0101010101010101l; // (T)~(T)0/255 ++ uint32_t shift = 56; // (sizeof(T) - 1) * BITS_PER_BYTE ++ ++ UseScratchRegisterScope temps(this); ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ Register scratch = temps.Acquire(); ++ Register scratch2 = t8; ++ srli_d(scratch, rj, 1); ++ li(scratch2, B0); ++ And(scratch, scratch, scratch2); ++ Sub_d(scratch, rj, scratch); ++ li(scratch2, B1); ++ And(rd, scratch, scratch2); ++ srli_d(scratch, scratch, 2); ++ And(scratch, scratch, scratch2); ++ Add_d(scratch, rd, scratch); ++ srli_d(rd, scratch, 4); ++ Add_d(rd, rd, scratch); ++ li(scratch2, B2); ++ And(rd, rd, scratch2); ++ li(scratch, value); ++ Mul_d(rd, rd, scratch); ++ srli_d(rd, rd, shift); ++} ++ ++void TurboAssembler::ExtractBits(Register dest, Register source, Register pos, ++ int size, bool sign_extend) { ++ sra_d(dest, source, pos); ++ bstrpick_d(dest, dest, size - 1, 0); ++ if (sign_extend) { ++ switch (size) { ++ case 8: ++ ext_w_b(dest, dest); ++ break; ++ case 16: ++ ext_w_h(dest, dest); ++ break; ++ case 32: ++ // sign-extend word ++ slli_w(dest, dest, 0); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++ } ++} ++ ++void TurboAssembler::InsertBits(Register dest, Register source, Register pos, ++ int size) { ++ Rotr_d(dest, dest, pos); ++ bstrins_d(dest, source, size - 1, 0); ++ { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ Sub_d(scratch, zero_reg, pos); ++ Rotr_d(dest, dest, scratch); ++ } ++} ++ ++void MacroAssembler::EmitFPUTruncate( ++ FPURoundingMode rounding_mode, Register result, DoubleRegister double_input, ++ Register scratch, DoubleRegister double_scratch, Register except_flag, ++ CheckForInexactConversion check_inexact) { ++ break_(3); ++} ++ ++void TurboAssembler::TryInlineTruncateDoubleToI(Register result, ++ DoubleRegister double_input, ++ Label* done) { ++ DoubleRegister single_scratch = kScratchDoubleReg.low(); ++ UseScratchRegisterScope temps(this); ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ Register scratch = temps.Acquire(); ++ Register scratch2 = t7; ++ ++ // Clear cumulative exception flags and save the FCSR. ++ /* movfcsr2gr(scratch2, FCSR); ++ movgr2fcsr(FCSR, zero_reg); ++ // Try a conversion to a signed integer. ++ ftintrz_w_d(single_scratch, double_input); ++ movfr2gr_w(result, single_scratch); ++ // Retrieve and restore the FCSR. ++ movfcsr2gr(scratch, FCSR); ++ movgr2fcsr(FCSR, scratch2); ++ // Check for overflow and NaNs. ++ And(scratch, scratch, ++ kFCSROverflowFlagMask | kFCSRUnderflowFlagMask | ++ kFCSRInvalidOpFlagMask); ++ // If we had no exceptions we are done. ++ Branch(done, eq, scratch, Operand(zero_reg));*/ ++ ++ CompareIsNanF64(double_input, double_input); ++ Move(result, zero_reg); ++ bcnez(FCC0, done); ++ ftintrz_l_d(single_scratch, double_input); ++ movfr2gr_d(scratch2, single_scratch); ++ li(scratch, 1L << 63); ++ Xor(scratch, scratch, scratch2); ++ rotri_d(scratch2, scratch, 1); ++ movfr2gr_s(result, single_scratch); ++ Branch(done, ne, scratch, Operand(scratch2)); ++} ++ ++void TurboAssembler::TruncateDoubleToI(Isolate* isolate, Zone* zone, ++ Register result, ++ DoubleRegister double_input, ++ StubCallMode stub_mode) { ++ Label done; ++ ++ TryInlineTruncateDoubleToI(result, double_input, &done); ++ ++ // If we fell through then inline version didn't succeed - call stub instead. ++ Sub_d(sp, sp, ++ Operand(kDoubleSize + kSystemPointerSize)); // Put input on stack. ++ St_d(ra, MemOperand(sp, kSystemPointerSize)); ++ Fst_d(double_input, MemOperand(sp, 0)); ++ ++ if (stub_mode == StubCallMode::kCallWasmRuntimeStub) { ++ Call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL); ++ } else { ++ Call(BUILTIN_CODE(isolate, DoubleToI), RelocInfo::CODE_TARGET); ++ } ++ ++ Pop(ra, result); ++ bind(&done); ++} ++ ++// BRANCH_ARGS_CHECK checks that conditional jump arguments are correct. ++#define BRANCH_ARGS_CHECK(cond, rj, rk) \ ++ DCHECK((cond == cc_always && rj == zero_reg && rk.rm() == zero_reg) || \ ++ (cond != cc_always && (rj != zero_reg || rk.rm() != zero_reg))) ++ ++void TurboAssembler::Branch(Label* L, bool need_link) { ++ int offset = GetOffset(L, OffsetSize::kOffset26); ++ if (need_link) { ++ bl(offset); ++ } else { ++ b(offset); ++ } ++} ++ ++void TurboAssembler::Branch(Label* L, Condition cond, Register rj, ++ const Operand& rk, bool need_link) { ++ if (L->is_bound()) { ++ BRANCH_ARGS_CHECK(cond, rj, rk); ++ if (!BranchShortOrFallback(L, cond, rj, rk, need_link)) { ++ if (cond != cc_always) { ++ Label skip; ++ Condition neg_cond = NegateCondition(cond); ++ BranchShort(&skip, neg_cond, rj, rk, need_link); ++ Branch(L, need_link); ++ bind(&skip); ++ } else { ++ Branch(L); ++ } ++ } ++ } else { ++ if (is_trampoline_emitted()) { ++ if (cond != cc_always) { ++ Label skip; ++ Condition neg_cond = NegateCondition(cond); ++ BranchShort(&skip, neg_cond, rj, rk, need_link); ++ Branch(L, need_link); ++ bind(&skip); ++ } else { ++ Branch(L); ++ } ++ } else { ++ BranchShort(L, cond, rj, rk, need_link); ++ } ++ } ++} ++ ++void TurboAssembler::Branch(Label* L, Condition cond, Register rj, ++ RootIndex index) { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ LoadRoot(scratch, index); ++ Branch(L, cond, rj, Operand(scratch)); ++} ++ ++int32_t TurboAssembler::GetOffset(Label* L, OffsetSize bits) { ++ return branch_offset_helper(L, bits) >> 2; ++} ++ ++Register TurboAssembler::GetRkAsRegisterHelper(const Operand& rk, ++ Register scratch) { ++ Register r2 = no_reg; ++ if (rk.is_reg()) { ++ r2 = rk.rm(); ++ } else { ++ r2 = scratch; ++ li(r2, rk); ++ } ++ ++ return r2; ++} ++ ++bool TurboAssembler::BranchShortOrFallback(Label* L, Condition cond, ++ Register rj, const Operand& rk, ++ bool need_link) { ++ UseScratchRegisterScope temps(this); ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ Register scratch = temps.hasAvailable() ? temps.Acquire() : t8; ++ ++ // Be careful to always use shifted_branch_offset only just before the ++ // branch instruction, as the location will be remember for patching the ++ // target. ++ { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ int offset = 0; ++ switch (cond) { ++ case cc_always: ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false; ++ offset = GetOffset(L, OffsetSize::kOffset26); ++ if (need_link) { ++ bl(offset); ++ } else { ++ b(offset); ++ } ++ break; ++ case eq: ++ if (rk.is_reg() && rj.code() == rk.rm().code()) { ++ // beq is used here to make the code patchable. Otherwise b should ++ // be used which has no condition field so is not patchable. ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false; ++ if (need_link) pcaddi(ra, 2); ++ offset = GetOffset(L, OffsetSize::kOffset16); ++ beq(rj, rj, offset); ++ } else if (IsZero(rk)) { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset21)) return false; ++ if (need_link) pcaddi(ra, 2); ++ offset = GetOffset(L, OffsetSize::kOffset21); ++ beqz(rj, offset); ++ } else { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false; ++ if (need_link) pcaddi(ra, 2); ++ // We don't want any other register but scratch clobbered. ++ Register sc = GetRkAsRegisterHelper(rk, scratch); ++ offset = GetOffset(L, OffsetSize::kOffset16); ++ beq(rj, sc, offset); ++ } ++ break; ++ case ne: ++ if (rk.is_reg() && rj.code() == rk.rm().code()) { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false; ++ if (need_link) pcaddi(ra, 2); ++ // bne is used here to make the code patchable. Otherwise we ++ // should not generate any instruction. ++ offset = GetOffset(L, OffsetSize::kOffset16); ++ bne(rj, rj, offset); ++ } else if (IsZero(rk)) { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset21)) return false; ++ if (need_link) pcaddi(ra, 2); ++ offset = GetOffset(L, OffsetSize::kOffset21); ++ bnez(rj, offset); ++ } else { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false; ++ if (need_link) pcaddi(ra, 2); ++ // We don't want any other register but scratch clobbered. ++ Register sc = GetRkAsRegisterHelper(rk, scratch); ++ offset = GetOffset(L, OffsetSize::kOffset16); ++ bne(rj, sc, offset); ++ } ++ break; ++ ++ // Signed comparison. ++ case greater: ++ // rj > rk ++ if (rk.is_reg() && rj.code() == rk.rm().code()) { ++ // No code needs to be emitted. ++ } else if (IsZero(rk)) { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false; ++ if (need_link) pcaddi(ra, 2); ++ offset = GetOffset(L, OffsetSize::kOffset16); ++ blt(zero_reg, rj, offset); ++ } else { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false; ++ if (need_link) pcaddi(ra, 2); ++ Register sc = GetRkAsRegisterHelper(rk, scratch); ++ DCHECK(rj != sc); ++ offset = GetOffset(L, OffsetSize::kOffset16); ++ blt(sc, rj, offset); ++ } ++ break; ++ case greater_equal: ++ // rj >= rk ++ if (rk.is_reg() && rj.code() == rk.rm().code()) { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false; ++ if (need_link) pcaddi(ra, 2); ++ offset = GetOffset(L, OffsetSize::kOffset26); ++ b(offset); ++ } else if (IsZero(rk)) { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false; ++ if (need_link) pcaddi(ra, 2); ++ offset = GetOffset(L, OffsetSize::kOffset16); ++ bge(rj, zero_reg, offset); ++ } else { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false; ++ if (need_link) pcaddi(ra, 2); ++ Register sc = GetRkAsRegisterHelper(rk, scratch); ++ DCHECK(rj != sc); ++ offset = GetOffset(L, OffsetSize::kOffset16); ++ bge(rj, sc, offset); ++ } ++ break; ++ case less: ++ // rj < rk ++ if (rk.is_reg() && rj.code() == rk.rm().code()) { ++ // No code needs to be emitted. ++ } else if (IsZero(rk)) { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false; ++ if (need_link) pcaddi(ra, 2); ++ offset = GetOffset(L, OffsetSize::kOffset16); ++ blt(rj, zero_reg, offset); ++ } else { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false; ++ if (need_link) pcaddi(ra, 2); ++ Register sc = GetRkAsRegisterHelper(rk, scratch); ++ DCHECK(rj != sc); ++ offset = GetOffset(L, OffsetSize::kOffset16); ++ blt(rj, sc, offset); ++ } ++ break; ++ case less_equal: ++ // rj <= rk ++ if (rk.is_reg() && rj.code() == rk.rm().code()) { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false; ++ if (need_link) pcaddi(ra, 2); ++ offset = GetOffset(L, OffsetSize::kOffset26); ++ b(offset); ++ } else if (IsZero(rk)) { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false; ++ if (need_link) pcaddi(ra, 2); ++ offset = GetOffset(L, OffsetSize::kOffset16); ++ bge(zero_reg, rj, offset); ++ } else { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false; ++ if (need_link) pcaddi(ra, 2); ++ Register sc = GetRkAsRegisterHelper(rk, scratch); ++ DCHECK(rj != sc); ++ offset = GetOffset(L, OffsetSize::kOffset16); ++ bge(sc, rj, offset); ++ } ++ break; ++ ++ // Unsigned comparison. ++ case Ugreater: ++ // rj > rk ++ if (rk.is_reg() && rj.code() == rk.rm().code()) { ++ // No code needs to be emitted. ++ } else if (rj == zero_reg) { ++ // No code needs to be emitted. ++ } else if (IsZero(rk)) { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false; ++ if (need_link) pcaddi(ra, 2); ++ offset = GetOffset(L, OffsetSize::kOffset26); ++ b(offset); ++ } else { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false; ++ if (need_link) pcaddi(ra, 2); ++ Register sc = GetRkAsRegisterHelper(rk, scratch); ++ DCHECK(rj != sc); ++ offset = GetOffset(L, OffsetSize::kOffset16); ++ bltu(sc, rj, offset); ++ } ++ break; ++ case Ugreater_equal: ++ // rj >= rk ++ if (rk.is_reg() && rj.code() == rk.rm().code()) { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false; ++ if (need_link) pcaddi(ra, 2); ++ offset = GetOffset(L, OffsetSize::kOffset26); ++ b(offset); ++ } else if (IsZero(rk)) { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false; ++ if (need_link) pcaddi(ra, 2); ++ offset = GetOffset(L, OffsetSize::kOffset26); ++ b(offset); ++ } else if (rj == zero_reg) { ++ // No code needs to be emitted. ++ } else { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false; ++ if (need_link) pcaddi(ra, 2); ++ Register sc = GetRkAsRegisterHelper(rk, scratch); ++ DCHECK(rj != sc); ++ offset = GetOffset(L, OffsetSize::kOffset16); ++ bgeu(rj, sc, offset); ++ } ++ break; ++ case Uless: ++ // rj < rk ++ if (rk.is_reg() && rj.code() == rk.rm().code()) { ++ // No code needs to be emitted. ++ } else if (IsZero(rk)) { ++ // No code needs to be emitted. ++ } else if (rj == zero_reg) { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false; ++ if (need_link) pcaddi(ra, 2); ++ offset = GetOffset(L, OffsetSize::kOffset26); ++ b(offset); ++ } else { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false; ++ if (need_link) pcaddi(ra, 2); ++ Register sc = GetRkAsRegisterHelper(rk, scratch); ++ DCHECK(rj != sc); ++ offset = GetOffset(L, OffsetSize::kOffset16); ++ bltu(rj, sc, offset); ++ } ++ break; ++ case Uless_equal: ++ // rj <= rk ++ if (rk.is_reg() && rj.code() == rk.rm().code()) { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false; ++ if (need_link) pcaddi(ra, 2); ++ offset = GetOffset(L, OffsetSize::kOffset26); ++ b(offset); ++ } else if (rj == zero_reg) { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset26)) return false; ++ if (need_link) pcaddi(ra, 2); ++ offset = GetOffset(L, OffsetSize::kOffset26); ++ b(offset); ++ } else if (IsZero(rk)) { ++ // No code needs to be emitted. ++ } else { ++ if (L->is_bound() && !is_near(L, OffsetSize::kOffset16)) return false; ++ if (need_link) pcaddi(ra, 2); ++ Register sc = GetRkAsRegisterHelper(rk, scratch); ++ DCHECK(rj != sc); ++ offset = GetOffset(L, OffsetSize::kOffset16); ++ bgeu(sc, rj, offset); ++ } ++ break; ++ default: ++ UNREACHABLE(); ++ } ++ } ++ return true; ++} ++ ++void TurboAssembler::BranchShort(Label* L, Condition cond, Register rj, ++ const Operand& rk, bool need_link) { ++ BRANCH_ARGS_CHECK(cond, rj, rk); ++ bool result = BranchShortOrFallback(L, cond, rj, rk, need_link); ++ DCHECK(result); ++ USE(result); ++} ++ ++void TurboAssembler::LoadFromConstantsTable(Register destination, ++ int constant_index) { ++ DCHECK(RootsTable::IsImmortalImmovable(RootIndex::kBuiltinsConstantsTable)); ++ LoadRoot(destination, RootIndex::kBuiltinsConstantsTable); ++ Ld_d(destination, ++ FieldMemOperand(destination, FixedArray::kHeaderSize + ++ constant_index * kPointerSize)); ++} ++ ++void TurboAssembler::LoadRootRelative(Register destination, int32_t offset) { ++ Ld_d(destination, MemOperand(kRootRegister, offset)); ++} ++ ++void TurboAssembler::LoadRootRegisterOffset(Register destination, ++ intptr_t offset) { ++ if (offset == 0) { ++ Move(destination, kRootRegister); ++ } else { ++ Add_d(destination, kRootRegister, Operand(offset)); ++ } ++} ++ ++void TurboAssembler::Jump(Register target, Condition cond, Register rj, ++ const Operand& rk) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ if (cond == cc_always) { ++ jirl(zero_reg, target, 0); ++ } else { ++ BRANCH_ARGS_CHECK(cond, rj, rk); ++ Label skip; ++ Branch(&skip, NegateCondition(cond), rj, rk); ++ jirl(zero_reg, target, 0); ++ bind(&skip); ++ } ++} ++ ++void TurboAssembler::Jump(intptr_t target, RelocInfo::Mode rmode, ++ Condition cond, Register rj, const Operand& rk) { ++ Label skip; ++ if (cond != cc_always) { ++ Branch(&skip, NegateCondition(cond), rj, rk); ++ } ++ { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ li(t7, Operand(target, rmode)); ++ jirl(zero_reg, t7, 0); ++ bind(&skip); ++ } ++} ++ ++void TurboAssembler::Jump(Address target, RelocInfo::Mode rmode, Condition cond, ++ Register rj, const Operand& rk) { ++ DCHECK(!RelocInfo::IsCodeTarget(rmode)); ++ Jump(static_cast(target), rmode, cond, rj, rk); ++} ++ ++void TurboAssembler::Jump(Handle code, RelocInfo::Mode rmode, ++ Condition cond, Register rj, const Operand& rk) { ++ DCHECK(RelocInfo::IsCodeTarget(rmode)); ++ ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ if (root_array_available_ && options().isolate_independent_code) { ++ IndirectLoadConstant(t7, code); ++ Add_d(t7, t7, Operand(Code::kHeaderSize - kHeapObjectTag)); ++ Jump(t7, cond, rj, rk); ++ return; ++ } else if (options().inline_offheap_trampolines) { ++ int builtin_index = Builtins::kNoBuiltinId; ++ if (isolate()->builtins()->IsBuiltinHandle(code, &builtin_index) && ++ Builtins::IsIsolateIndependent(builtin_index)) { ++ // Inline the trampoline. ++ RecordCommentForOffHeapTrampoline(builtin_index); ++ CHECK_NE(builtin_index, Builtins::kNoBuiltinId); ++ EmbeddedData d = EmbeddedData::FromBlob(); ++ Address entry = d.InstructionStartOfBuiltin(builtin_index); ++ li(t7, Operand(entry, RelocInfo::OFF_HEAP_TARGET)); ++ Jump(t7, cond, rj, rk); ++ return; ++ } ++ } ++ ++ Jump(static_cast(code.address()), rmode, cond, rj, rk); ++} ++ ++void TurboAssembler::Jump(const ExternalReference& reference) { ++ li(t7, reference); ++ Jump(t7); ++} ++ ++// Note: To call gcc-compiled C code on loonarch, you must call through t[0-8]. ++void TurboAssembler::Call(Register target, Condition cond, Register rj, ++ const Operand& rk) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ if (cond == cc_always) { ++ jirl(ra, target, 0); ++ } else { ++ BRANCH_ARGS_CHECK(cond, rj, rk); ++ Label skip; ++ Branch(&skip, NegateCondition(cond), rj, rk); ++ jirl(ra, target, 0); ++ bind(&skip); ++ } ++} ++ ++void MacroAssembler::JumpIfIsInRange(Register value, unsigned lower_limit, ++ unsigned higher_limit, ++ Label* on_in_range) { ++ if (lower_limit != 0) { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ Sub_d(scratch, value, Operand(lower_limit)); ++ Branch(on_in_range, ls, scratch, Operand(higher_limit - lower_limit)); ++ } else { ++ Branch(on_in_range, ls, value, Operand(higher_limit - lower_limit)); ++ } ++} ++ ++void TurboAssembler::Call(Address target, RelocInfo::Mode rmode, Condition cond, ++ Register rj, const Operand& rk) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ li(t7, Operand(static_cast(target), rmode), ADDRESS_LOAD); ++ Call(t7, cond, rj, rk); ++} ++ ++void TurboAssembler::Call(Handle code, RelocInfo::Mode rmode, ++ Condition cond, Register rj, const Operand& rk) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ ++ if (root_array_available_ && options().isolate_independent_code) { ++ IndirectLoadConstant(t7, code); ++ Add_d(t7, t7, Operand(Code::kHeaderSize - kHeapObjectTag)); ++ Call(t7, cond, rj, rk); ++ return; ++ } else if (options().inline_offheap_trampolines) { ++ int builtin_index = Builtins::kNoBuiltinId; ++ if (isolate()->builtins()->IsBuiltinHandle(code, &builtin_index) && ++ Builtins::IsIsolateIndependent(builtin_index)) { ++ // Inline the trampoline. ++ RecordCommentForOffHeapTrampoline(builtin_index); ++ CHECK_NE(builtin_index, Builtins::kNoBuiltinId); ++ EmbeddedData d = EmbeddedData::FromBlob(); ++ Address entry = d.InstructionStartOfBuiltin(builtin_index); ++ li(t7, Operand(entry, RelocInfo::OFF_HEAP_TARGET)); ++ Call(t7, cond, rj, rk); ++ return; ++ } ++ } ++ ++ DCHECK(RelocInfo::IsCodeTarget(rmode)); ++ DCHECK(code->IsExecutable()); ++ Call(code.address(), rmode, cond, rj, rk); ++} ++ ++void TurboAssembler::LoadEntryFromBuiltinIndex(Register builtin_index) { ++ STATIC_ASSERT(kSystemPointerSize == 8); ++ STATIC_ASSERT(kSmiTagSize == 1); ++ STATIC_ASSERT(kSmiTag == 0); ++ ++ // The builtin_index register contains the builtin index as a Smi. ++ SmiUntag(builtin_index, builtin_index); ++ Alsl_d(builtin_index, builtin_index, kRootRegister, kSystemPointerSizeLog2, ++ t7); ++ Ld_d(builtin_index, ++ MemOperand(builtin_index, IsolateData::builtin_entry_table_offset())); ++} ++ ++void TurboAssembler::CallBuiltinByIndex(Register builtin_index) { ++ LoadEntryFromBuiltinIndex(builtin_index); ++ Call(builtin_index); ++} ++ ++void TurboAssembler::PatchAndJump(Address target) { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ pcaddi(scratch, 4); ++ Ld_d(t7, MemOperand(scratch, 0)); ++ jirl(zero_reg, t7, 0); ++ nop(); ++ DCHECK_EQ(reinterpret_cast(pc_) % 8, 0); ++ *reinterpret_cast(pc_) = target; // pc_ should be align. ++ pc_ += sizeof(uint64_t); ++} ++ ++void TurboAssembler::StoreReturnAddressAndCall(Register target) { ++ // This generates the final instruction sequence for calls to C functions ++ // once an exit frame has been constructed. ++ // ++ // Note that this assumes the caller code (i.e. the Code object currently ++ // being generated) is immovable or that the callee function cannot trigger ++ // GC, since the callee function will return to it. ++ ++ Assembler::BlockTrampolinePoolScope block_trampoline_pool(this); ++ static constexpr int kNumInstructionsToJump = 2; ++ Label find_ra; ++ // Adjust the value in ra to point to the correct return location, 2nd ++ // instruction past the real call into C code (the jirl)), and push it. ++ // This is the return address of the exit frame. ++ pcaddi(ra, kNumInstructionsToJump + 1); ++ bind(&find_ra); ++ ++ // This spot was reserved in EnterExitFrame. ++ St_d(ra, MemOperand(sp, 0)); ++ // Stack is still aligned. ++ ++ // TODO can be jirl target? a0 -- a7? ++ jirl(zero_reg, target, 0); ++ // Make sure the stored 'ra' points to this position. ++ DCHECK_EQ(kNumInstructionsToJump, InstructionsGeneratedSince(&find_ra)); ++} ++ ++void TurboAssembler::Ret(Condition cond, Register rj, const Operand& rk) { ++ Jump(ra, cond, rj, rk); ++} ++ ++void TurboAssembler::DropAndRet(int drop) { ++ DCHECK(is_int16(drop * kPointerSize)); ++ addi_d(sp, sp, drop * kPointerSize); ++ Ret(); ++} ++ ++void TurboAssembler::DropAndRet(int drop, Condition cond, Register r1, ++ const Operand& r2) { ++ // Both Drop and Ret need to be conditional. ++ Label skip; ++ if (cond != cc_always) { ++ Branch(&skip, NegateCondition(cond), r1, r2); ++ } ++ ++ Drop(drop); ++ Ret(); ++ ++ if (cond != cc_always) { ++ bind(&skip); ++ } ++} ++ ++void TurboAssembler::Drop(int count, Condition cond, Register reg, ++ const Operand& op) { ++ if (count <= 0) { ++ return; ++ } ++ ++ Label skip; ++ ++ if (cond != al) { ++ Branch(&skip, NegateCondition(cond), reg, op); ++ } ++ ++ Add_d(sp, sp, Operand(count * kPointerSize)); ++ ++ if (cond != al) { ++ bind(&skip); ++ } ++} ++ ++void MacroAssembler::Swap(Register reg1, Register reg2, Register scratch) { ++ if (scratch == no_reg) { ++ Xor(reg1, reg1, Operand(reg2)); ++ Xor(reg2, reg2, Operand(reg1)); ++ Xor(reg1, reg1, Operand(reg2)); ++ } else { ++ mov(scratch, reg1); ++ mov(reg1, reg2); ++ mov(reg2, scratch); ++ } ++} ++ ++void TurboAssembler::Call(Label* target) { Branch(target, true); } ++ ++void TurboAssembler::Push(Smi smi) { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ li(scratch, Operand(smi)); ++ push(scratch); ++} ++ ++void TurboAssembler::Push(Handle handle) { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ li(scratch, Operand(handle)); ++ push(scratch); ++} ++ ++void MacroAssembler::MaybeDropFrames() { ++ // Check whether we need to drop frames to restart a function on the stack. ++ li(a1, ExternalReference::debug_restart_fp_address(isolate())); ++ Ld_d(a1, MemOperand(a1, 0)); ++ Jump(BUILTIN_CODE(isolate(), FrameDropperTrampoline), RelocInfo::CODE_TARGET, ++ ne, a1, Operand(zero_reg)); ++} ++ ++// --------------------------------------------------------------------------- ++// Exception handling. ++ ++void MacroAssembler::PushStackHandler() { ++ // Adjust this code if not the case. ++ STATIC_ASSERT(StackHandlerConstants::kSize == 2 * kPointerSize); ++ STATIC_ASSERT(StackHandlerConstants::kNextOffset == 0 * kPointerSize); ++ ++ Push(Smi::zero()); // Padding. ++ ++ // Link the current handler as the next handler. ++ li(t2, ++ ExternalReference::Create(IsolateAddressId::kHandlerAddress, isolate())); ++ Ld_d(t1, MemOperand(t2, 0)); ++ push(t1); ++ ++ // Set this new handler as the current one. ++ St_d(sp, MemOperand(t2, 0)); ++} ++ ++void MacroAssembler::PopStackHandler() { ++ STATIC_ASSERT(StackHandlerConstants::kNextOffset == 0); ++ pop(a1); ++ Add_d(sp, sp, ++ Operand( ++ static_cast(StackHandlerConstants::kSize - kPointerSize))); ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ li(scratch, ++ ExternalReference::Create(IsolateAddressId::kHandlerAddress, isolate())); ++ St_d(a1, MemOperand(scratch, 0)); ++} ++ ++void TurboAssembler::FPUCanonicalizeNaN(const DoubleRegister dst, ++ const DoubleRegister src) { ++ fsub_d(dst, src, kDoubleRegZero); ++} ++ ++void TurboAssembler::MovFromFloatResult(const DoubleRegister dst) { ++ Move(dst, f0); // Reg f0 is loongarch return value ++} ++ ++void TurboAssembler::MovFromFloatParameter(const DoubleRegister dst) { ++ Move(dst, f0); // Reg f0 is loongarch first argument value. ++} ++ ++void TurboAssembler::MovToFloatParameter(DoubleRegister src) { Move(f0, src); } ++ ++void TurboAssembler::MovToFloatResult(DoubleRegister src) { Move(f0, src); } ++ ++void TurboAssembler::MovToFloatParameters(DoubleRegister src1, ++ DoubleRegister src2) { ++ const DoubleRegister fparg2 = f1; ++ if (src2 == f0) { ++ DCHECK(src1 != fparg2); ++ Move(fparg2, src2); ++ Move(f0, src1); ++ } else { ++ Move(f0, src1); ++ Move(fparg2, src2); ++ } ++} ++ ++// ----------------------------------------------------------------------------- ++// JavaScript invokes. ++ ++void TurboAssembler::PrepareForTailCall(Register callee_args_count, ++ Register caller_args_count, ++ Register scratch0, Register scratch1) { ++ // Calculate the end of destination area where we will put the arguments ++ // after we drop current frame. We add kPointerSize to count the receiver ++ // argument which is not included into formal parameters count. ++ Register dst_reg = scratch0; ++ Alsl_d(dst_reg, caller_args_count, fp, kPointerSizeLog2, t7); ++ Add_d(dst_reg, dst_reg, ++ Operand(StandardFrameConstants::kCallerSPOffset + kPointerSize)); ++ ++ Register src_reg = caller_args_count; ++ // Calculate the end of source area. +kPointerSize is for the receiver. ++ Alsl_d(src_reg, callee_args_count, sp, kPointerSizeLog2, t7); ++ Add_d(src_reg, src_reg, Operand(kPointerSize)); ++ ++ if (FLAG_debug_code) { ++ Check(lo, AbortReason::kStackAccessBelowStackPointer, src_reg, ++ Operand(dst_reg)); ++ } ++ ++ // Restore caller's frame pointer and return address now as they will be ++ // overwritten by the copying loop. ++ Ld_d(ra, MemOperand(fp, StandardFrameConstants::kCallerPCOffset)); ++ Ld_d(fp, MemOperand(fp, StandardFrameConstants::kCallerFPOffset)); ++ ++ // Now copy callee arguments to the caller frame going backwards to avoid ++ // callee arguments corruption (source and destination areas could overlap). ++ ++ // Both src_reg and dst_reg are pointing to the word after the one to copy, ++ // so they must be pre-decremented in the loop. ++ Register tmp_reg = scratch1; ++ Label loop, entry; ++ Branch(&entry); ++ bind(&loop); ++ Sub_d(src_reg, src_reg, Operand(kPointerSize)); ++ Sub_d(dst_reg, dst_reg, Operand(kPointerSize)); ++ Ld_d(tmp_reg, MemOperand(src_reg, 0)); ++ St_d(tmp_reg, MemOperand(dst_reg, 0)); ++ bind(&entry); ++ Branch(&loop, ne, sp, Operand(src_reg)); ++ ++ // Leave current frame. ++ mov(sp, dst_reg); ++} ++ ++void MacroAssembler::InvokePrologue(Register expected_parameter_count, ++ Register actual_parameter_count, ++ Label* done, InvokeFlag flag) { ++ Label regular_invoke; ++ ++ // Check whether the expected and actual arguments count match. The registers ++ // are set up according to contract with ArgumentsAdaptorTrampoline: ++ // a0: actual arguments count ++ // a1: function (passed through to callee) ++ // a2: expected arguments count ++ ++ // The code below is made a lot easier because the calling code already sets ++ // up actual and expected registers according to the contract. ++ ++ DCHECK_EQ(actual_parameter_count, a0); ++ DCHECK_EQ(expected_parameter_count, a2); ++ ++ Branch(®ular_invoke, eq, expected_parameter_count, ++ Operand(actual_parameter_count)); ++ ++ Handle adaptor = BUILTIN_CODE(isolate(), ArgumentsAdaptorTrampoline); ++ if (flag == CALL_FUNCTION) { ++ Call(adaptor); ++ Branch(done); ++ } else { ++ Jump(adaptor, RelocInfo::CODE_TARGET); ++ } ++ ++ bind(®ular_invoke); ++} ++ ++void MacroAssembler::CheckDebugHook(Register fun, Register new_target, ++ Register expected_parameter_count, ++ Register actual_parameter_count) { ++ Label skip_hook; ++ ++ li(t0, ExternalReference::debug_hook_on_function_call_address(isolate())); ++ Ld_b(t0, MemOperand(t0, 0)); ++ Branch(&skip_hook, eq, t0, Operand(zero_reg)); ++ ++ { ++ // Load receiver to pass it later to DebugOnFunctionCall hook. ++ Alsl_d(t0, actual_parameter_count, sp, kPointerSizeLog2, t7); ++ Ld_d(t0, MemOperand(t0, 0)); ++ FrameScope frame(this, ++ has_frame() ? StackFrame::NONE : StackFrame::INTERNAL); ++ SmiTag(expected_parameter_count); ++ Push(expected_parameter_count); ++ ++ SmiTag(actual_parameter_count); ++ Push(actual_parameter_count); ++ ++ if (new_target.is_valid()) { ++ Push(new_target); ++ } ++ // TODO: MultiPush/Pop ++ Push(fun); ++ Push(fun); ++ Push(t0); ++ CallRuntime(Runtime::kDebugOnFunctionCall); ++ Pop(fun); ++ if (new_target.is_valid()) { ++ Pop(new_target); ++ } ++ ++ Pop(actual_parameter_count); ++ SmiUntag(actual_parameter_count); ++ ++ Pop(expected_parameter_count); ++ SmiUntag(expected_parameter_count); ++ } ++ bind(&skip_hook); ++} ++ ++void MacroAssembler::InvokeFunctionCode(Register function, Register new_target, ++ Register expected_parameter_count, ++ Register actual_parameter_count, ++ InvokeFlag flag) { ++ // You can't call a function without a valid frame. ++ DCHECK_IMPLIES(flag == CALL_FUNCTION, has_frame()); ++ DCHECK_EQ(function, a1); ++ DCHECK_IMPLIES(new_target.is_valid(), new_target == a3); ++ ++ // On function call, call into the debugger if necessary. ++ CheckDebugHook(function, new_target, expected_parameter_count, ++ actual_parameter_count); ++ ++ // Clear the new.target register if not given. ++ if (!new_target.is_valid()) { ++ LoadRoot(a3, RootIndex::kUndefinedValue); ++ } ++ ++ Label done; ++ InvokePrologue(expected_parameter_count, actual_parameter_count, &done, flag); ++ // We call indirectly through the code field in the function to ++ // allow recompilation to take effect without changing any of the ++ // call sites. ++ Register code = kJavaScriptCallCodeStartRegister; ++ Ld_d(code, FieldMemOperand(function, JSFunction::kCodeOffset)); ++ if (flag == CALL_FUNCTION) { ++ Add_d(code, code, Operand(Code::kHeaderSize - kHeapObjectTag)); ++ Call(code); ++ } else { ++ DCHECK(flag == JUMP_FUNCTION); ++ Add_d(code, code, Operand(Code::kHeaderSize - kHeapObjectTag)); ++ Jump(code); ++ } ++ ++ // Continue here if InvokePrologue does handle the invocation due to ++ // mismatched parameter counts. ++ bind(&done); ++} ++ ++void MacroAssembler::InvokeFunctionWithNewTarget( ++ Register function, Register new_target, Register actual_parameter_count, ++ InvokeFlag flag) { ++ // You can't call a function without a valid frame. ++ DCHECK_IMPLIES(flag == CALL_FUNCTION, has_frame()); ++ ++ // Contract with called JS functions requires that function is passed in a1. ++ DCHECK_EQ(function, a1); ++ Register expected_parameter_count = a2; ++ Register temp_reg = t0; ++ Ld_d(temp_reg, FieldMemOperand(a1, JSFunction::kSharedFunctionInfoOffset)); ++ Ld_d(cp, FieldMemOperand(a1, JSFunction::kContextOffset)); ++ // The argument count is stored as uint16_t ++ Ld_hu(expected_parameter_count, ++ FieldMemOperand(temp_reg, ++ SharedFunctionInfo::kFormalParameterCountOffset)); ++ ++ InvokeFunctionCode(a1, new_target, expected_parameter_count, ++ actual_parameter_count, flag); ++} ++ ++void MacroAssembler::InvokeFunction(Register function, ++ Register expected_parameter_count, ++ Register actual_parameter_count, ++ InvokeFlag flag) { ++ // You can't call a function without a valid frame. ++ DCHECK_IMPLIES(flag == CALL_FUNCTION, has_frame()); ++ ++ // Contract with called JS functions requires that function is passed in a1. ++ DCHECK_EQ(function, a1); ++ ++ // Get the function and setup the context. ++ Ld_d(cp, FieldMemOperand(a1, JSFunction::kContextOffset)); ++ ++ InvokeFunctionCode(a1, no_reg, expected_parameter_count, ++ actual_parameter_count, flag); ++} ++ ++// --------------------------------------------------------------------------- ++// Support functions. ++ ++void MacroAssembler::GetObjectType(Register object, Register map, ++ Register type_reg) { ++ LoadMap(map, object); ++ Ld_hu(type_reg, FieldMemOperand(map, Map::kInstanceTypeOffset)); ++} ++ ++// ----------------------------------------------------------------------------- ++// Runtime calls. ++ ++void TurboAssembler::AdddOverflow(Register dst, Register left, ++ const Operand& right, Register overflow) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ Register right_reg = no_reg; ++ Register scratch = t8; ++ if (!right.is_reg()) { ++ li(t7, Operand(right)); ++ right_reg = t7; ++ } else { ++ right_reg = right.rm(); ++ } ++ ++ DCHECK(left != scratch && right_reg != scratch && dst != scratch && ++ overflow != scratch); ++ DCHECK(overflow != left && overflow != right_reg); ++ ++ if (dst == left || dst == right_reg) { ++ add_d(scratch, left, right_reg); ++ xor_(overflow, scratch, left); ++ xor_(t7, scratch, right_reg); ++ and_(overflow, overflow, t7); ++ mov(dst, scratch); ++ } else { ++ add_d(dst, left, right_reg); ++ xor_(overflow, dst, left); ++ xor_(t7, dst, right_reg); ++ and_(overflow, overflow, t7); ++ } ++} ++ ++void TurboAssembler::SubdOverflow(Register dst, Register left, ++ const Operand& right, Register overflow) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ Register right_reg = no_reg; ++ Register scratch = t8; ++ if (!right.is_reg()) { ++ li(t7, Operand(right)); ++ right_reg = t7; ++ } else { ++ right_reg = right.rm(); ++ } ++ ++ DCHECK(left != scratch && right_reg != scratch && dst != scratch && ++ overflow != scratch); ++ DCHECK(overflow != left && overflow != right_reg); ++ ++ if (dst == left || dst == right_reg) { ++ Sub_d(scratch, left, right_reg); ++ xor_(overflow, left, scratch); ++ xor_(t7, left, right_reg); ++ and_(overflow, overflow, t7); ++ mov(dst, scratch); ++ } else { ++ sub_d(dst, left, right_reg); ++ xor_(overflow, left, dst); ++ xor_(t7, left, right_reg); ++ and_(overflow, overflow, t7); ++ } ++} ++ ++void TurboAssembler::MulOverflow(Register dst, Register left, ++ const Operand& right, Register overflow) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ Register right_reg = no_reg; ++ Register scratch = t8; ++ if (!right.is_reg()) { ++ li(t7, Operand(right)); ++ right_reg = t7; ++ } else { ++ right_reg = right.rm(); ++ } ++ ++ DCHECK(left != scratch && right_reg != scratch && dst != scratch && ++ overflow != scratch); ++ DCHECK(overflow != left && overflow != right_reg); ++ ++ if (dst == left || dst == right_reg) { ++ Mul_w(scratch, left, right_reg); ++ Mulh_w(overflow, left, right_reg); ++ mov(dst, scratch); ++ } else { ++ Mul_w(dst, left, right_reg); ++ Mulh_w(overflow, left, right_reg); ++ } ++ ++ srai_d(scratch, dst, 32); ++ xor_(overflow, overflow, scratch); ++} ++ ++void MacroAssembler::CallRuntime(const Runtime::Function* f, int num_arguments, ++ SaveFPRegsMode save_doubles) { ++ // All parameters are on the stack. v0 has the return value after call. ++ ++ // If the expected number of arguments of the runtime function is ++ // constant, we check that the actual number of arguments match the ++ // expectation. ++ CHECK(f->nargs < 0 || f->nargs == num_arguments); ++ ++ // TODO(1236192): Most runtime routines don't need the number of ++ // arguments passed in because it is constant. At some point we ++ // should remove this need and make the runtime routine entry code ++ // smarter. ++ PrepareCEntryArgs(num_arguments); ++ PrepareCEntryFunction(ExternalReference::Create(f)); ++ Handle code = ++ CodeFactory::CEntry(isolate(), f->result_size, save_doubles); ++ Call(code, RelocInfo::CODE_TARGET); ++} ++ ++void MacroAssembler::TailCallRuntime(Runtime::FunctionId fid) { ++ const Runtime::Function* function = Runtime::FunctionForId(fid); ++ DCHECK_EQ(1, function->result_size); ++ if (function->nargs >= 0) { ++ PrepareCEntryArgs(function->nargs); ++ } ++ JumpToExternalReference(ExternalReference::Create(fid)); ++} ++ ++void MacroAssembler::JumpToExternalReference(const ExternalReference& builtin, ++ bool builtin_exit_frame) { ++ PrepareCEntryFunction(builtin); ++ Handle code = CodeFactory::CEntry(isolate(), 1, kDontSaveFPRegs, ++ kArgvOnStack, builtin_exit_frame); ++ Jump(code, RelocInfo::CODE_TARGET, al, zero_reg, Operand(zero_reg)); ++} ++ ++void MacroAssembler::JumpToInstructionStream(Address entry) { ++ li(kOffHeapTrampolineRegister, Operand(entry, RelocInfo::OFF_HEAP_TARGET)); ++ Jump(kOffHeapTrampolineRegister); ++} ++ ++void MacroAssembler::LoadWeakValue(Register out, Register in, ++ Label* target_if_cleared) { ++ Branch(target_if_cleared, eq, in, Operand(kClearedWeakHeapObjectLower32)); ++ ++ And(out, in, Operand(~kWeakHeapObjectMask)); ++} ++ ++void MacroAssembler::IncrementCounter(StatsCounter* counter, int value, ++ Register scratch1, Register scratch2) { ++ DCHECK_GT(value, 0); ++ if (FLAG_native_code_counters && counter->Enabled()) { ++ // This operation has to be exactly 32-bit wide in case the external ++ // reference table redirects the counter to a uint32_t dummy_stats_counter_ ++ // field. ++ li(scratch2, ExternalReference::Create(counter)); ++ Ld_w(scratch1, MemOperand(scratch2, 0)); ++ Add_w(scratch1, scratch1, Operand(value)); ++ St_w(scratch1, MemOperand(scratch2, 0)); ++ } ++} ++ ++void MacroAssembler::DecrementCounter(StatsCounter* counter, int value, ++ Register scratch1, Register scratch2) { ++ DCHECK_GT(value, 0); ++ if (FLAG_native_code_counters && counter->Enabled()) { ++ // This operation has to be exactly 32-bit wide in case the external ++ // reference table redirects the counter to a uint32_t dummy_stats_counter_ ++ // field. ++ li(scratch2, ExternalReference::Create(counter)); ++ Ld_w(scratch1, MemOperand(scratch2, 0)); ++ Sub_w(scratch1, scratch1, Operand(value)); ++ St_w(scratch1, MemOperand(scratch2, 0)); ++ } ++} ++ ++// ----------------------------------------------------------------------------- ++// Debugging. ++ ++void TurboAssembler::Trap() { stop(); } ++void TurboAssembler::DebugBreak() { stop(); } ++ ++void TurboAssembler::Assert(Condition cc, AbortReason reason, Register rs, ++ Operand rk) { ++ if (emit_debug_code()) Check(cc, reason, rs, rk); ++} ++ ++void TurboAssembler::Check(Condition cc, AbortReason reason, Register rj, ++ Operand rk) { ++ Label L; ++ Branch(&L, cc, rj, rk); ++ Abort(reason); ++ // Will not return here. ++ bind(&L); ++} ++ ++void TurboAssembler::Abort(AbortReason reason) { ++ Label abort_start; ++ bind(&abort_start); ++#ifdef DEBUG ++ const char* msg = GetAbortReason(reason); ++ RecordComment("Abort message: "); ++ RecordComment(msg); ++#endif ++ ++ // Avoid emitting call to builtin if requested. ++ if (trap_on_abort()) { ++ stop(); ++ return; ++ } ++ ++ if (should_abort_hard()) { ++ // We don't care if we constructed a frame. Just pretend we did. ++ FrameScope assume_frame(this, StackFrame::NONE); ++ PrepareCallCFunction(0, a0); ++ li(a0, Operand(static_cast(reason))); ++ CallCFunction(ExternalReference::abort_with_reason(), 1); ++ return; ++ } ++ ++ Move(a0, Smi::FromInt(static_cast(reason))); ++ ++ // Disable stub call restrictions to always allow calls to abort. ++ if (!has_frame()) { ++ // We don't actually want to generate a pile of code for this, so just ++ // claim there is a stack frame, without generating one. ++ FrameScope scope(this, StackFrame::NONE); ++ Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET); ++ } else { ++ Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET); ++ } ++ // Will not return here. ++ if (is_trampoline_pool_blocked()) { ++ // If the calling code cares about the exact number of ++ // instructions generated, we insert padding here to keep the size ++ // of the Abort macro constant. ++ // Currently in debug mode with debug_code enabled the number of ++ // generated instructions is 10, so we use this as a maximum value. ++ static const int kExpectedAbortInstructions = 10; ++ int abort_instructions = InstructionsGeneratedSince(&abort_start); ++ DCHECK_LE(abort_instructions, kExpectedAbortInstructions); ++ while (abort_instructions++ < kExpectedAbortInstructions) { ++ nop(); ++ } ++ } ++} ++ ++void MacroAssembler::LoadMap(Register destination, Register object) { ++ Ld_d(destination, FieldMemOperand(object, HeapObject::kMapOffset)); ++} ++ ++void MacroAssembler::LoadNativeContextSlot(int index, Register dst) { ++ LoadMap(dst, cp); ++ Ld_d(dst, FieldMemOperand( ++ dst, Map::kConstructorOrBackPointerOrNativeContextOffset)); ++ Ld_d(dst, MemOperand(dst, Context::SlotOffset(index))); ++} ++ ++void TurboAssembler::StubPrologue(StackFrame::Type type) { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ li(scratch, Operand(StackFrame::TypeToMarker(type))); ++ PushCommonFrame(scratch); ++} ++ ++void TurboAssembler::Prologue() { PushStandardFrame(a1); } ++ ++void TurboAssembler::EnterFrame(StackFrame::Type type) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ int stack_offset = -3 * kPointerSize; ++ const int fp_offset = 1 * kPointerSize; ++ addi_d(sp, sp, stack_offset); ++ stack_offset = -stack_offset - kPointerSize; ++ St_d(ra, MemOperand(sp, stack_offset)); ++ stack_offset -= kPointerSize; ++ St_d(fp, MemOperand(sp, stack_offset)); ++ stack_offset -= kPointerSize; ++ li(t7, Operand(StackFrame::TypeToMarker(type))); ++ St_d(t7, MemOperand(sp, stack_offset)); ++ // Adjust FP to point to saved FP. ++ DCHECK_EQ(stack_offset, 0); ++ Add_d(fp, sp, Operand(fp_offset)); ++} ++ ++void TurboAssembler::LeaveFrame(StackFrame::Type type) { ++ addi_d(sp, fp, 2 * kPointerSize); ++ Ld_d(ra, MemOperand(fp, 1 * kPointerSize)); ++ Ld_d(fp, MemOperand(fp, 0 * kPointerSize)); ++} ++ ++void MacroAssembler::EnterExitFrame(bool save_doubles, int stack_space, ++ StackFrame::Type frame_type) { ++ DCHECK(frame_type == StackFrame::EXIT || ++ frame_type == StackFrame::BUILTIN_EXIT); ++ ++ // Set up the frame structure on the stack. ++ STATIC_ASSERT(2 * kPointerSize == ExitFrameConstants::kCallerSPDisplacement); ++ STATIC_ASSERT(1 * kPointerSize == ExitFrameConstants::kCallerPCOffset); ++ STATIC_ASSERT(0 * kPointerSize == ExitFrameConstants::kCallerFPOffset); ++ ++ // This is how the stack will look: ++ // fp + 2 (==kCallerSPDisplacement) - old stack's end ++ // [fp + 1 (==kCallerPCOffset)] - saved old ra ++ // [fp + 0 (==kCallerFPOffset)] - saved old fp ++ // [fp - 1 StackFrame::EXIT Smi ++ // [fp - 2 (==kSPOffset)] - sp of the called function ++ // fp - (2 + stack_space + alignment) == sp == [fp - kSPOffset] - top of the ++ // new stack (will contain saved ra) ++ ++ // Save registers and reserve room for saved entry sp. ++ addi_d(sp, sp, -2 * kPointerSize - ExitFrameConstants::kFixedFrameSizeFromFp); ++ St_d(ra, MemOperand(sp, 3 * kPointerSize)); ++ St_d(fp, MemOperand(sp, 2 * kPointerSize)); ++ { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ li(scratch, Operand(StackFrame::TypeToMarker(frame_type))); ++ St_d(scratch, MemOperand(sp, 1 * kPointerSize)); ++ } ++ // Set up new frame pointer. ++ addi_d(fp, sp, ExitFrameConstants::kFixedFrameSizeFromFp); ++ ++ if (emit_debug_code()) { ++ St_d(zero_reg, MemOperand(fp, ExitFrameConstants::kSPOffset)); ++ } ++ ++ { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ // Save the frame pointer and the context in top. ++ li(t8, ExternalReference::Create(IsolateAddressId::kCEntryFPAddress, ++ isolate())); ++ St_d(fp, MemOperand(t8, 0)); ++ li(t8, ++ ExternalReference::Create(IsolateAddressId::kContextAddress, isolate())); ++ St_d(cp, MemOperand(t8, 0)); ++ } ++ ++ const int frame_alignment = MacroAssembler::ActivationFrameAlignment(); ++ if (save_doubles) { ++ // The stack is already aligned to 0 modulo 8 for stores with sdc1. ++ int kNumOfSavedRegisters = FPURegister::kNumRegisters / 2; ++ int space = kNumOfSavedRegisters * kDoubleSize; ++ Sub_d(sp, sp, Operand(space)); ++ // Remember: we only need to save every 2nd double FPU value. ++ for (int i = 0; i < kNumOfSavedRegisters; i++) { ++ FPURegister reg = FPURegister::from_code(2 * i); ++ Fst_d(reg, MemOperand(sp, i * kDoubleSize)); ++ } ++ } ++ ++ // Reserve place for the return address, stack space and an optional slot ++ // (used by DirectCEntry to hold the return value if a struct is ++ // returned) and align the frame preparing for calling the runtime function. ++ DCHECK_GE(stack_space, 0); ++ Sub_d(sp, sp, Operand((stack_space + 2) * kPointerSize)); ++ if (frame_alignment > 0) { ++ DCHECK(base::bits::IsPowerOfTwo(frame_alignment)); ++ And(sp, sp, Operand(-frame_alignment)); // Align stack. ++ } ++ ++ // Set the exit frame sp value to point just before the return address ++ // location. ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ addi_d(scratch, sp, kPointerSize); ++ St_d(scratch, MemOperand(fp, ExitFrameConstants::kSPOffset)); ++} ++ ++void MacroAssembler::LeaveExitFrame(bool save_doubles, Register argument_count, ++ bool do_return, ++ bool argument_count_is_length) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ // Optionally restore all double registers. ++ if (save_doubles) { ++ // Remember: we only need to restore every 2nd double FPU value. ++ int kNumOfSavedRegisters = FPURegister::kNumRegisters / 2; ++ Sub_d(t8, fp, ++ Operand(ExitFrameConstants::kFixedFrameSizeFromFp + ++ kNumOfSavedRegisters * kDoubleSize)); ++ for (int i = 0; i < kNumOfSavedRegisters; i++) { ++ FPURegister reg = FPURegister::from_code(2 * i); ++ Fld_d(reg, MemOperand(t8, i * kDoubleSize)); ++ } ++ } ++ ++ // Clear top frame. ++ li(t8, ++ ExternalReference::Create(IsolateAddressId::kCEntryFPAddress, isolate())); ++ St_d(zero_reg, MemOperand(t8, 0)); ++ ++ // Restore current context from top and clear it in debug mode. ++ li(t8, ++ ExternalReference::Create(IsolateAddressId::kContextAddress, isolate())); ++ Ld_d(cp, MemOperand(t8, 0)); ++ ++#ifdef DEBUG ++ li(t8, ++ ExternalReference::Create(IsolateAddressId::kContextAddress, isolate())); ++ St_d(a3, MemOperand(t8, 0)); ++#endif ++ ++ // Pop the arguments, restore registers, and return. ++ mov(sp, fp); // Respect ABI stack constraint. ++ Ld_d(fp, MemOperand(sp, ExitFrameConstants::kCallerFPOffset)); ++ Ld_d(ra, MemOperand(sp, ExitFrameConstants::kCallerPCOffset)); ++ ++ if (argument_count.is_valid()) { ++ if (argument_count_is_length) { ++ add_d(sp, sp, argument_count); ++ } else { ++ Alsl_d(sp, argument_count, sp, kPointerSizeLog2, t8); ++ } ++ } ++ ++ addi_d(sp, sp, 2 * kPointerSize); ++ if (do_return) { ++ Ret(); ++ } ++} ++ ++int TurboAssembler::ActivationFrameAlignment() { ++#if V8_HOST_ARCH_LA64 ++ // Running on the real platform. Use the alignment as mandated by the local ++ // environment. ++ // Note: This will break if we ever start generating snapshots on one Mips ++ // platform for another Mips platform with a different alignment. ++ return base::OS::ActivationFrameAlignment(); ++#else // V8_HOST_ARCH_LA64 ++ // If we are using the simulator then we should always align to the expected ++ // alignment. As the simulator is used to generate snapshots we do not know ++ // if the target platform will need alignment, so this is controlled from a ++ // flag. ++ return FLAG_sim_stack_alignment; ++#endif // V8_HOST_ARCH_LA64 ++} ++ ++void MacroAssembler::AssertStackIsAligned() { ++ if (emit_debug_code()) { ++ const int frame_alignment = ActivationFrameAlignment(); ++ const int frame_alignment_mask = frame_alignment - 1; ++ ++ if (frame_alignment > kPointerSize) { ++ Label alignment_as_expected; ++ DCHECK(base::bits::IsPowerOfTwo(frame_alignment)); ++ { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ andi(scratch, sp, frame_alignment_mask); ++ Branch(&alignment_as_expected, eq, scratch, Operand(zero_reg)); ++ } ++ // Don't use Check here, as it will call Runtime_Abort re-entering here. ++ stop(); ++ bind(&alignment_as_expected); ++ } ++ } ++} ++ ++void TurboAssembler::SmiUntag(Register dst, const MemOperand& src) { ++ if (SmiValuesAre32Bits()) { ++ Ld_w(dst, MemOperand(src.base(), SmiWordOffset(src.offset()))); ++ } else { ++ DCHECK(SmiValuesAre31Bits()); ++ Ld_w(dst, src); ++ SmiUntag(dst); ++ } ++} ++ ++void TurboAssembler::JumpIfSmi(Register value, Label* smi_label, ++ Register scratch) { ++ DCHECK_EQ(0, kSmiTag); ++ andi(scratch, value, kSmiTagMask); ++ Branch(smi_label, eq, scratch, Operand(zero_reg)); ++} ++ ++void MacroAssembler::JumpIfNotSmi(Register value, Label* not_smi_label, ++ Register scratch) { ++ DCHECK_EQ(0, kSmiTag); ++ andi(scratch, value, kSmiTagMask); ++ Branch(not_smi_label, ne, scratch, Operand(zero_reg)); ++} ++ ++void MacroAssembler::AssertNotSmi(Register object) { ++ if (emit_debug_code()) { ++ STATIC_ASSERT(kSmiTag == 0); ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ andi(scratch, object, kSmiTagMask); ++ Check(ne, AbortReason::kOperandIsASmi, scratch, Operand(zero_reg)); ++ } ++} ++ ++void MacroAssembler::AssertSmi(Register object) { ++ if (emit_debug_code()) { ++ STATIC_ASSERT(kSmiTag == 0); ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ andi(scratch, object, kSmiTagMask); ++ Check(eq, AbortReason::kOperandIsASmi, scratch, Operand(zero_reg)); ++ } ++} ++ ++void MacroAssembler::AssertConstructor(Register object) { ++ if (emit_debug_code()) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ STATIC_ASSERT(kSmiTag == 0); ++ SmiTst(object, t8); ++ Check(ne, AbortReason::kOperandIsASmiAndNotAConstructor, t8, ++ Operand(zero_reg)); ++ ++ LoadMap(t8, object); ++ Ld_bu(t8, FieldMemOperand(t8, Map::kBitFieldOffset)); ++ And(t8, t8, Operand(Map::Bits1::IsConstructorBit::kMask)); ++ Check(ne, AbortReason::kOperandIsNotAConstructor, t8, Operand(zero_reg)); ++ } ++} ++ ++void MacroAssembler::AssertFunction(Register object) { ++ if (emit_debug_code()) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ STATIC_ASSERT(kSmiTag == 0); ++ SmiTst(object, t8); ++ Check(ne, AbortReason::kOperandIsASmiAndNotAFunction, t8, ++ Operand(zero_reg)); ++ GetObjectType(object, t8, t8); ++ Check(eq, AbortReason::kOperandIsNotAFunction, t8, ++ Operand(JS_FUNCTION_TYPE)); ++ } ++} ++ ++void MacroAssembler::AssertBoundFunction(Register object) { ++ if (emit_debug_code()) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ STATIC_ASSERT(kSmiTag == 0); ++ SmiTst(object, t8); ++ Check(ne, AbortReason::kOperandIsASmiAndNotABoundFunction, t8, ++ Operand(zero_reg)); ++ GetObjectType(object, t8, t8); ++ Check(eq, AbortReason::kOperandIsNotABoundFunction, t8, ++ Operand(JS_BOUND_FUNCTION_TYPE)); ++ } ++} ++ ++void MacroAssembler::AssertGeneratorObject(Register object) { ++ if (!emit_debug_code()) return; ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ STATIC_ASSERT(kSmiTag == 0); ++ SmiTst(object, t8); ++ Check(ne, AbortReason::kOperandIsASmiAndNotAGeneratorObject, t8, ++ Operand(zero_reg)); ++ ++ GetObjectType(object, t8, t8); ++ ++ Label done; ++ ++ // Check if JSGeneratorObject ++ Branch(&done, eq, t8, Operand(JS_GENERATOR_OBJECT_TYPE)); ++ ++ // Check if JSAsyncFunctionObject (See MacroAssembler::CompareInstanceType) ++ Branch(&done, eq, t8, Operand(JS_ASYNC_FUNCTION_OBJECT_TYPE)); ++ ++ // Check if JSAsyncGeneratorObject ++ Branch(&done, eq, t8, Operand(JS_ASYNC_GENERATOR_OBJECT_TYPE)); ++ ++ Abort(AbortReason::kOperandIsNotAGeneratorObject); ++ ++ bind(&done); ++} ++ ++void MacroAssembler::AssertUndefinedOrAllocationSite(Register object, ++ Register scratch) { ++ if (emit_debug_code()) { ++ Label done_checking; ++ AssertNotSmi(object); ++ LoadRoot(scratch, RootIndex::kUndefinedValue); ++ Branch(&done_checking, eq, object, Operand(scratch)); ++ GetObjectType(object, scratch, scratch); ++ Assert(eq, AbortReason::kExpectedUndefinedOrCell, scratch, ++ Operand(ALLOCATION_SITE_TYPE)); ++ bind(&done_checking); ++ } ++} ++ ++void TurboAssembler::Float32Max(FPURegister dst, FPURegister src1, ++ FPURegister src2, Label* out_of_line) { ++ if (src1 == src2) { ++ Move_s(dst, src1); ++ return; ++ } ++ ++ // Check if one of operands is NaN. ++ CompareIsNanF32(src1, src2); ++ BranchTrueF(out_of_line); ++ ++ fmax_s(dst, src1, src2); ++} ++ ++void TurboAssembler::Float32MaxOutOfLine(FPURegister dst, FPURegister src1, ++ FPURegister src2) { ++ fadd_s(dst, src1, src2); ++} ++ ++void TurboAssembler::Float32Min(FPURegister dst, FPURegister src1, ++ FPURegister src2, Label* out_of_line) { ++ if (src1 == src2) { ++ Move_s(dst, src1); ++ return; ++ } ++ ++ // Check if one of operands is NaN. ++ CompareIsNanF32(src1, src2); ++ BranchTrueF(out_of_line); ++ ++ fmin_s(dst, src1, src2); ++} ++ ++void TurboAssembler::Float32MinOutOfLine(FPURegister dst, FPURegister src1, ++ FPURegister src2) { ++ fadd_s(dst, src1, src2); ++} ++ ++void TurboAssembler::Float64Max(FPURegister dst, FPURegister src1, ++ FPURegister src2, Label* out_of_line) { ++ if (src1 == src2) { ++ Move_d(dst, src1); ++ return; ++ } ++ ++ // Check if one of operands is NaN. ++ CompareIsNanF64(src1, src2); ++ BranchTrueF(out_of_line); ++ ++ fmax_d(dst, src1, src2); ++} ++ ++void TurboAssembler::Float64MaxOutOfLine(FPURegister dst, FPURegister src1, ++ FPURegister src2) { ++ fadd_d(dst, src1, src2); ++} ++ ++void TurboAssembler::Float64Min(FPURegister dst, FPURegister src1, ++ FPURegister src2, Label* out_of_line) { ++ if (src1 == src2) { ++ Move_d(dst, src1); ++ return; ++ } ++ ++ // Check if one of operands is NaN. ++ CompareIsNanF64(src1, src2); ++ BranchTrueF(out_of_line); ++ ++ fmin_d(dst, src1, src2); ++} ++ ++void TurboAssembler::Float64MinOutOfLine(FPURegister dst, FPURegister src1, ++ FPURegister src2) { ++ fadd_d(dst, src1, src2); ++} ++ ++static const int kRegisterPassedArguments = 8; ++ ++int TurboAssembler::CalculateStackPassedWords(int num_reg_arguments, ++ int num_double_arguments) { ++ int stack_passed_words = 0; ++ num_reg_arguments += 2 * num_double_arguments; ++ ++ // O32: Up to four simple arguments are passed in registers a0..a3. ++ // N64: Up to eight simple arguments are passed in registers a0..a7. ++ if (num_reg_arguments > kRegisterPassedArguments) { ++ stack_passed_words += num_reg_arguments - kRegisterPassedArguments; ++ } ++ stack_passed_words += kCArgSlotCount; ++ return stack_passed_words; ++} ++ ++void TurboAssembler::PrepareCallCFunction(int num_reg_arguments, ++ int num_double_arguments, ++ Register scratch) { ++ int frame_alignment = ActivationFrameAlignment(); ++ ++ // n64: Up to eight simple arguments in a0..a3, a4..a7, No argument slots. ++ // O32: Up to four simple arguments are passed in registers a0..a3. ++ // Those four arguments must have reserved argument slots on the stack for ++ // mips, even though those argument slots are not normally used. ++ // Both ABIs: Remaining arguments are pushed on the stack, above (higher ++ // address than) the (O32) argument slots. (arg slot calculation handled by ++ // CalculateStackPassedWords()). ++ int stack_passed_arguments = ++ CalculateStackPassedWords(num_reg_arguments, num_double_arguments); ++ if (frame_alignment > kPointerSize) { ++ // Make stack end at alignment and make room for num_arguments - 4 words ++ // and the original value of sp. ++ mov(scratch, sp); ++ Sub_d(sp, sp, Operand((stack_passed_arguments + 1) * kPointerSize)); ++ DCHECK(base::bits::IsPowerOfTwo(frame_alignment)); ++ bstrins_d(sp, zero_reg, std::log2(frame_alignment) - 1, 0); ++ St_d(scratch, MemOperand(sp, stack_passed_arguments * kPointerSize)); ++ } else { ++ Sub_d(sp, sp, Operand(stack_passed_arguments * kPointerSize)); ++ } ++} ++ ++void TurboAssembler::PrepareCallCFunction(int num_reg_arguments, ++ Register scratch) { ++ PrepareCallCFunction(num_reg_arguments, 0, scratch); ++} ++ ++void TurboAssembler::CallCFunction(ExternalReference function, ++ int num_reg_arguments, ++ int num_double_arguments) { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ li(t7, function); ++ CallCFunctionHelper(t7, num_reg_arguments, num_double_arguments); ++} ++ ++void TurboAssembler::CallCFunction(Register function, int num_reg_arguments, ++ int num_double_arguments) { ++ CallCFunctionHelper(function, num_reg_arguments, num_double_arguments); ++} ++ ++void TurboAssembler::CallCFunction(ExternalReference function, ++ int num_arguments) { ++ CallCFunction(function, num_arguments, 0); ++} ++ ++void TurboAssembler::CallCFunction(Register function, int num_arguments) { ++ CallCFunction(function, num_arguments, 0); ++} ++ ++void TurboAssembler::CallCFunctionHelper(Register function, ++ int num_reg_arguments, ++ int num_double_arguments) { ++ DCHECK_LE(num_reg_arguments + num_double_arguments, kMaxCParameters); ++ DCHECK(has_frame()); ++ // Make sure that the stack is aligned before calling a C function unless ++ // running in the simulator. The simulator has its own alignment check which ++ // provides more information. ++ // The argument stots are presumed to have been set up by ++ // PrepareCallCFunction. The C function must be called via t9, for mips ABI. ++ ++#if V8_HOST_ARCH_LA64 ++ if (emit_debug_code()) { ++ int frame_alignment = base::OS::ActivationFrameAlignment(); ++ int frame_alignment_mask = frame_alignment - 1; ++ if (frame_alignment > kPointerSize) { ++ DCHECK(base::bits::IsPowerOfTwo(frame_alignment)); ++ Label alignment_as_expected; ++ { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ And(scratch, sp, Operand(frame_alignment_mask)); ++ Branch(&alignment_as_expected, eq, scratch, Operand(zero_reg)); ++ } ++ // Don't use Check here, as it will call Runtime_Abort possibly ++ // re-entering here. ++ stop(); ++ bind(&alignment_as_expected); ++ } ++ } ++#endif // V8_HOST_ARCH_LA64 ++ ++ // Just call directly. The function called cannot cause a GC, or ++ // allow preemption, so the return address in the link register ++ // stays correct. ++ { ++ BlockTrampolinePoolScope block_trampoline_pool(this); ++ if (function != t7) { ++ mov(t7, function); ++ function = t7; ++ } ++ ++ // Save the frame pointer and PC so that the stack layout remains iterable, ++ // even without an ExitFrame which normally exists between JS and C frames. ++ // 't' registers are caller-saved so this is safe as a scratch register. ++ Register pc_scratch = t1; ++ Register scratch = t2; ++ DCHECK(!AreAliased(pc_scratch, scratch, function)); ++ ++ pcaddi(pc_scratch, 1); ++ ++ // See x64 code for reasoning about how to address the isolate data fields. ++ if (root_array_available()) { ++ St_d(pc_scratch, MemOperand(kRootRegister, ++ IsolateData::fast_c_call_caller_pc_offset())); ++ St_d(fp, MemOperand(kRootRegister, ++ IsolateData::fast_c_call_caller_fp_offset())); ++ } else { ++ DCHECK_NOT_NULL(isolate()); ++ li(scratch, ExternalReference::fast_c_call_caller_pc_address(isolate())); ++ St_d(pc_scratch, MemOperand(scratch, 0)); ++ li(scratch, ExternalReference::fast_c_call_caller_fp_address(isolate())); ++ St_d(fp, MemOperand(scratch, 0)); ++ } ++ ++ Call(function); ++ ++ // We don't unset the PC; the FP is the source of truth. ++ if (root_array_available()) { ++ St_d(zero_reg, MemOperand(kRootRegister, ++ IsolateData::fast_c_call_caller_fp_offset())); ++ } else { ++ DCHECK_NOT_NULL(isolate()); ++ li(scratch, ExternalReference::fast_c_call_caller_fp_address(isolate())); ++ St_d(zero_reg, MemOperand(scratch, 0)); ++ } ++ } ++ ++ int stack_passed_arguments = ++ CalculateStackPassedWords(num_reg_arguments, num_double_arguments); ++ ++ if (base::OS::ActivationFrameAlignment() > kPointerSize) { ++ Ld_d(sp, MemOperand(sp, stack_passed_arguments * kPointerSize)); ++ } else { ++ Add_d(sp, sp, Operand(stack_passed_arguments * kPointerSize)); ++ } ++} ++ ++#undef BRANCH_ARGS_CHECK ++ ++void TurboAssembler::CheckPageFlag(Register object, Register scratch, int mask, ++ Condition cc, Label* condition_met) { ++ And(scratch, object, Operand(~kPageAlignmentMask)); ++ Ld_d(scratch, MemOperand(scratch, MemoryChunk::kFlagsOffset)); ++ And(scratch, scratch, Operand(mask)); ++ Branch(condition_met, cc, scratch, Operand(zero_reg)); ++} ++ ++Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3, ++ Register reg4, Register reg5, ++ Register reg6) { ++ RegList regs = 0; ++ if (reg1.is_valid()) regs |= reg1.bit(); ++ if (reg2.is_valid()) regs |= reg2.bit(); ++ if (reg3.is_valid()) regs |= reg3.bit(); ++ if (reg4.is_valid()) regs |= reg4.bit(); ++ if (reg5.is_valid()) regs |= reg5.bit(); ++ if (reg6.is_valid()) regs |= reg6.bit(); ++ ++ const RegisterConfiguration* config = RegisterConfiguration::Default(); ++ for (int i = 0; i < config->num_allocatable_general_registers(); ++i) { ++ int code = config->GetAllocatableGeneralCode(i); ++ Register candidate = Register::from_code(code); ++ if (regs & candidate.bit()) continue; ++ return candidate; ++ } ++ UNREACHABLE(); ++} ++ ++void TurboAssembler::ComputeCodeStartAddress(Register dst) { ++ // TODO: range check, add Pcadd macro function? ++ pcaddi(dst, -pc_offset() >> 2); ++} ++ ++void TurboAssembler::ResetSpeculationPoisonRegister() { ++ li(kSpeculationPoisonRegister, -1); ++} ++ ++void TurboAssembler::CallForDeoptimization(Address target, int deopt_id, ++ Label* exit, DeoptimizeKind kind) { ++ USE(exit, kind); ++ NoRootArrayScope no_root_array(this); ++ ++ // Save the deopt id in kRootRegister (we don't need the roots array from now ++ // on). ++ DCHECK_LE(deopt_id, 0xFFFF); ++ li(kRootRegister, deopt_id); ++ Call(target, RelocInfo::RUNTIME_ENTRY); ++} ++ ++} // namespace internal ++} // namespace v8 ++ ++#endif // V8_TARGET_ARCH_LA64 +diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/macro-assembler-la64.h b/src/3rdparty/chromium/v8/src/codegen/la64/macro-assembler-la64.h +new file mode 100644 +index 00000000000..64116977c09 +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/codegen/la64/macro-assembler-la64.h +@@ -0,0 +1,1084 @@ ++// Copyright 2012 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#ifndef INCLUDED_FROM_MACRO_ASSEMBLER_H ++#error This header must be included via macro-assembler.h ++#endif ++ ++#ifndef V8_CODEGEN_LA64_MACRO_ASSEMBLER_LA64_H_ ++#define V8_CODEGEN_LA64_MACRO_ASSEMBLER_LA64_H_ ++ ++#include "src/codegen/assembler.h" ++#include "src/codegen/la64/assembler-la64.h" ++#include "src/common/globals.h" ++ ++namespace v8 { ++namespace internal { ++ ++// Forward declarations. ++enum class AbortReason : uint8_t; ++ ++// Reserved Register Usage Summary. ++// ++// Registers t8 and t7 are reserved for use by the MacroAssembler. ++// ++// The programmer should know that the MacroAssembler may clobber these two, ++// but won't touch other registers except in special cases. ++// ++// Per the MIPS ABI, register t0 -- t8 must be used for indirect function call ++// via 'jirl t[0-8]' instructions. gcc? ++ ++// Flags used for LeaveExitFrame function. ++enum LeaveExitFrameMode { EMIT_RETURN = true, NO_EMIT_RETURN = false }; ++ ++// Flags used for the li macro-assembler function. ++enum LiFlags { ++ // If the constant value can be represented in just 12 bits, then ++ // optimize the li to use a single instruction, rather than lu12i_w/lu32i_d/ ++ // lu52i_d/ori sequence. A number of other optimizations that emits less than ++ // maximum number of instructions exists. ++ OPTIMIZE_SIZE = 0, ++ // Always use 4 instructions (lu12i_w/ori/lu32i_d/lu52i_d sequence), ++ // even if the constant could be loaded with just one, so that this value is ++ // patchable later. ++ CONSTANT_SIZE = 1, ++ // For address loads only 3 instruction are required. Used to mark ++ // constant load that will be used as address without relocation ++ // information. It ensures predictable code size, so specific sites ++ // in code are patchable. ++ ADDRESS_LOAD = 2 ++}; ++ ++enum RememberedSetAction { EMIT_REMEMBERED_SET, OMIT_REMEMBERED_SET }; ++enum SmiCheck { INLINE_SMI_CHECK, OMIT_SMI_CHECK }; ++enum RAStatus { kRAHasNotBeenSaved, kRAHasBeenSaved }; ++ ++Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2 = no_reg, ++ Register reg3 = no_reg, ++ Register reg4 = no_reg, ++ Register reg5 = no_reg, ++ Register reg6 = no_reg); ++ ++// ----------------------------------------------------------------------------- ++// Static helper functions. ++ ++#define SmiWordOffset(offset) (offset + kPointerSize / 2) ++ ++// Generate a MemOperand for loading a field from an object. ++inline MemOperand FieldMemOperand(Register object, int offset) { ++ return MemOperand(object, offset - kHeapObjectTag); ++} ++ ++class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ++ public: ++ using TurboAssemblerBase::TurboAssemblerBase; ++ ++ // Activation support. ++ void EnterFrame(StackFrame::Type type); ++ void EnterFrame(StackFrame::Type type, bool load_constant_pool_pointer_reg) { ++ // Out-of-line constant pool not implemented on la64. ++ UNREACHABLE(); ++ } ++ void LeaveFrame(StackFrame::Type type); ++ ++ // Generates function and stub prologue code. ++ void StubPrologue(StackFrame::Type type); ++ void Prologue(); ++ ++ void InitializeRootRegister() { ++ ExternalReference isolate_root = ExternalReference::isolate_root(isolate()); ++ li(kRootRegister, Operand(isolate_root)); ++ } ++ ++ // Jump unconditionally to given label. ++ // Use rather b(Label) for code generation. ++ void jmp(Label* L) { Branch(L); } ++ ++ // ------------------------------------------------------------------------- ++ // Debugging. ++ ++ void Trap() override; ++ void DebugBreak() override; ++ ++ // Calls Abort(msg) if the condition cc is not satisfied. ++ // Use --debug_code to enable. ++ void Assert(Condition cc, AbortReason reason, Register rj, Operand rk); ++ ++ // Like Assert(), but always enabled. ++ void Check(Condition cc, AbortReason reason, Register rj, Operand rk); ++ ++ // Print a message to stdout and abort execution. ++ void Abort(AbortReason msg); ++ ++ void Branch(Label* label, bool need_link = false); ++ void Branch(Label* label, Condition cond, Register r1, const Operand& r2, ++ bool need_link = false); ++ void BranchShort(Label* label, Condition cond, Register r1, const Operand& r2, ++ bool need_link = false); ++ void Branch(Label* L, Condition cond, Register rj, RootIndex index); ++ ++ // Floating point branches ++ void CompareF32(FPURegister cmp1, FPURegister cmp2, FPUCondition cc, ++ CFRegister cd = FCC0) { ++ CompareF(cmp1, cmp2, cc, cd, true); ++ } ++ ++ void CompareIsNanF32(FPURegister cmp1, FPURegister cmp2, ++ CFRegister cd = FCC0) { ++ CompareIsNanF(cmp1, cmp2, cd, true); ++ } ++ ++ void CompareF64(FPURegister cmp1, FPURegister cmp2, FPUCondition cc, ++ CFRegister cd = FCC0) { ++ CompareF(cmp1, cmp2, cc, cd, false); ++ } ++ ++ void CompareIsNanF64(FPURegister cmp1, FPURegister cmp2, ++ CFRegister cd = FCC0) { ++ CompareIsNanF(cmp1, cmp2, cd, false); ++ } ++ ++ void BranchTrueShortF(Label* target, CFRegister cc = FCC0); ++ void BranchFalseShortF(Label* target, CFRegister cc = FCC0); ++ ++ void BranchTrueF(Label* target, CFRegister cc = FCC0); ++ void BranchFalseF(Label* target, CFRegister cc = FCC0); ++ ++ static int InstrCountForLi64Bit(int64_t value); ++ inline void LiLower32BitHelper(Register rd, Operand j); ++ void li_optimized(Register rd, Operand j, LiFlags mode = OPTIMIZE_SIZE); ++ void li(Register rd, Operand j, LiFlags mode = OPTIMIZE_SIZE); ++ inline void li(Register rd, int64_t j, LiFlags mode = OPTIMIZE_SIZE) { ++ li(rd, Operand(j), mode); ++ } ++ inline void li(Register rd, int32_t j, LiFlags mode = OPTIMIZE_SIZE) { ++ li(rd, Operand(static_cast(j)), mode); ++ } ++ void li(Register dst, Handle value, LiFlags mode = OPTIMIZE_SIZE); ++ void li(Register dst, ExternalReference value, LiFlags mode = OPTIMIZE_SIZE); ++ void li(Register dst, const StringConstantBase* string, ++ LiFlags mode = OPTIMIZE_SIZE); ++ ++ void LoadFromConstantsTable(Register destination, ++ int constant_index) override; ++ void LoadRootRegisterOffset(Register destination, intptr_t offset) override; ++ void LoadRootRelative(Register destination, int32_t offset) override; ++ ++// Jump, Call, and Ret pseudo instructions implementing inter-working. ++#define COND_ARGS \ ++ Condition cond = al, Register rj = zero_reg, \ ++ const Operand &rk = Operand(zero_reg) ++ ++ void Jump(Register target, COND_ARGS); ++ void Jump(intptr_t target, RelocInfo::Mode rmode, COND_ARGS); ++ void Jump(Address target, RelocInfo::Mode rmode, COND_ARGS); ++ // Deffer from li, this method save target to the memory, and then load ++ // it to register use ld_d, it can be used in wasm jump table for concurrent ++ // patching. ++ void PatchAndJump(Address target); ++ void Jump(Handle code, RelocInfo::Mode rmode, COND_ARGS); ++ void Jump(const ExternalReference& reference) override; ++ void Call(Register target, COND_ARGS); ++ void Call(Address target, RelocInfo::Mode rmode, COND_ARGS); ++ void Call(Handle code, RelocInfo::Mode rmode = RelocInfo::CODE_TARGET, ++ COND_ARGS); ++ void Call(Label* target); ++ void LoadAddress(Register dst, Label* target); ++ ++ // Load the builtin given by the Smi in |builtin_index| into the same ++ // register. ++ void LoadEntryFromBuiltinIndex(Register builtin_index); ++ void CallBuiltinByIndex(Register builtin_index) override; ++ ++ void LoadCodeObjectEntry(Register destination, ++ Register code_object) override { ++ // TODO(mips): Implement. ++ UNIMPLEMENTED(); ++ } ++ void CallCodeObject(Register code_object) override { ++ // TODO(mips): Implement. ++ UNIMPLEMENTED(); ++ } ++ void JumpCodeObject(Register code_object) override { ++ // TODO(mips): Implement. ++ UNIMPLEMENTED(); ++ } ++ ++ // Generates an instruction sequence s.t. the return address points to the ++ // instruction following the call. ++ // The return address on the stack is used by frame iteration. ++ void StoreReturnAddressAndCall(Register target); ++ ++ void CallForDeoptimization(Address target, int deopt_id, Label* exit, ++ DeoptimizeKind kind); ++ ++ void Ret(COND_ARGS); ++ ++ // Emit code to discard a non-negative number of pointer-sized elements ++ // from the stack, clobbering only the sp register. ++ void Drop(int count, Condition cond = cc_always, Register reg = no_reg, ++ const Operand& op = Operand(no_reg)); ++ ++ // Trivial case of DropAndRet that utilizes the delay slot and only emits ++ // 2 instructions. ++ void DropAndRet(int drop); ++ ++ void DropAndRet(int drop, Condition cond, Register reg, const Operand& op); ++ ++ void Ld_d(Register rd, const MemOperand& rj); ++ void St_d(Register rd, const MemOperand& rj); ++ ++ void push(Register src) { ++ Add_d(sp, sp, Operand(-kPointerSize)); ++ St_d(src, MemOperand(sp, 0)); ++ } ++ void Push(Register src) { push(src); } ++ void Push(Handle handle); ++ void Push(Smi smi); ++ ++ // Push two registers. Pushes leftmost register first (to highest address). ++ void Push(Register src1, Register src2) { ++ Sub_d(sp, sp, Operand(2 * kPointerSize)); ++ St_d(src1, MemOperand(sp, 1 * kPointerSize)); ++ St_d(src2, MemOperand(sp, 0 * kPointerSize)); ++ } ++ ++ // Push three registers. Pushes leftmost register first (to highest address). ++ void Push(Register src1, Register src2, Register src3) { ++ Sub_d(sp, sp, Operand(3 * kPointerSize)); ++ St_d(src1, MemOperand(sp, 2 * kPointerSize)); ++ St_d(src2, MemOperand(sp, 1 * kPointerSize)); ++ St_d(src3, MemOperand(sp, 0 * kPointerSize)); ++ } ++ ++ // Push four registers. Pushes leftmost register first (to highest address). ++ void Push(Register src1, Register src2, Register src3, Register src4) { ++ Sub_d(sp, sp, Operand(4 * kPointerSize)); ++ St_d(src1, MemOperand(sp, 3 * kPointerSize)); ++ St_d(src2, MemOperand(sp, 2 * kPointerSize)); ++ St_d(src3, MemOperand(sp, 1 * kPointerSize)); ++ St_d(src4, MemOperand(sp, 0 * kPointerSize)); ++ } ++ ++ // Push five registers. Pushes leftmost register first (to highest address). ++ void Push(Register src1, Register src2, Register src3, Register src4, ++ Register src5) { ++ Sub_d(sp, sp, Operand(5 * kPointerSize)); ++ St_d(src1, MemOperand(sp, 4 * kPointerSize)); ++ St_d(src2, MemOperand(sp, 3 * kPointerSize)); ++ St_d(src3, MemOperand(sp, 2 * kPointerSize)); ++ St_d(src4, MemOperand(sp, 1 * kPointerSize)); ++ St_d(src5, MemOperand(sp, 0 * kPointerSize)); ++ } ++ ++ void Push(Register src, Condition cond, Register tst1, Register tst2) { ++ // Since we don't have conditional execution we use a Branch. ++ Label skip; ++ Branch(&skip, cond, tst1, Operand(tst2)); ++ addi_d(sp, sp, -kPointerSize); ++ st_d(src, sp, 0); ++ bind(&skip); ++ } ++ ++ void SaveRegisters(RegList registers); ++ void RestoreRegisters(RegList registers); ++ ++ void CallRecordWriteStub(Register object, Register address, ++ RememberedSetAction remembered_set_action, ++ SaveFPRegsMode fp_mode); ++ void CallRecordWriteStub(Register object, Register address, ++ RememberedSetAction remembered_set_action, ++ SaveFPRegsMode fp_mode, Address wasm_target); ++ void CallEphemeronKeyBarrier(Register object, Register address, ++ SaveFPRegsMode fp_mode); ++ ++ // Push multiple registers on the stack. ++ // Registers are saved in numerical order, with higher numbered registers ++ // saved in higher memory addresses. ++ void MultiPush(RegList regs); ++ void MultiPush(RegList regs1, RegList regs2); ++ void MultiPush(RegList regs1, RegList regs2, RegList regs3); ++ void MultiPushFPU(RegList regs); ++ ++ // Calculate how much stack space (in bytes) are required to store caller ++ // registers excluding those specified in the arguments. ++ int RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode, ++ Register exclusion1 = no_reg, ++ Register exclusion2 = no_reg, ++ Register exclusion3 = no_reg) const; ++ ++ // Push caller saved registers on the stack, and return the number of bytes ++ // stack pointer is adjusted. ++ int PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1 = no_reg, ++ Register exclusion2 = no_reg, ++ Register exclusion3 = no_reg); ++ // Restore caller saved registers from the stack, and return the number of ++ // bytes stack pointer is adjusted. ++ int PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1 = no_reg, ++ Register exclusion2 = no_reg, ++ Register exclusion3 = no_reg); ++ ++ void pop(Register dst) { ++ Ld_d(dst, MemOperand(sp, 0)); ++ Add_d(sp, sp, Operand(kPointerSize)); ++ } ++ void Pop(Register dst) { pop(dst); } ++ ++ // Pop two registers. Pops rightmost register first (from lower address). ++ void Pop(Register src1, Register src2) { ++ DCHECK(src1 != src2); ++ Ld_d(src2, MemOperand(sp, 0 * kPointerSize)); ++ Ld_d(src1, MemOperand(sp, 1 * kPointerSize)); ++ Add_d(sp, sp, 2 * kPointerSize); ++ } ++ ++ // Pop three registers. Pops rightmost register first (from lower address). ++ void Pop(Register src1, Register src2, Register src3) { ++ Ld_d(src3, MemOperand(sp, 0 * kPointerSize)); ++ Ld_d(src2, MemOperand(sp, 1 * kPointerSize)); ++ Ld_d(src1, MemOperand(sp, 2 * kPointerSize)); ++ Add_d(sp, sp, 3 * kPointerSize); ++ } ++ ++ void Pop(uint32_t count = 1) { Add_d(sp, sp, Operand(count * kPointerSize)); } ++ ++ // Pops multiple values from the stack and load them in the ++ // registers specified in regs. Pop order is the opposite as in MultiPush. ++ void MultiPop(RegList regs); ++ void MultiPop(RegList regs1, RegList regs2); ++ void MultiPop(RegList regs1, RegList regs2, RegList regs3); ++ ++ void MultiPopFPU(RegList regs); ++ ++#define DEFINE_INSTRUCTION(instr) \ ++ void instr(Register rd, Register rj, const Operand& rk); \ ++ void instr(Register rd, Register rj, Register rk) { \ ++ instr(rd, rj, Operand(rk)); \ ++ } \ ++ void instr(Register rj, Register rk, int32_t j) { instr(rj, rk, Operand(j)); } ++ ++#define DEFINE_INSTRUCTION2(instr) \ ++ void instr(Register rj, const Operand& rk); \ ++ void instr(Register rj, Register rk) { instr(rj, Operand(rk)); } \ ++ void instr(Register rj, int32_t j) { instr(rj, Operand(j)); } ++ ++ DEFINE_INSTRUCTION(Add_w) ++ DEFINE_INSTRUCTION(Add_d) ++ DEFINE_INSTRUCTION(Div_w) ++ DEFINE_INSTRUCTION(Div_wu) ++ DEFINE_INSTRUCTION(Div_du) ++ DEFINE_INSTRUCTION(Mod_w) ++ DEFINE_INSTRUCTION(Mod_wu) ++ DEFINE_INSTRUCTION(Div_d) ++ DEFINE_INSTRUCTION(Sub_w) ++ DEFINE_INSTRUCTION(Sub_d) ++ DEFINE_INSTRUCTION(Mod_d) ++ DEFINE_INSTRUCTION(Mod_du) ++ DEFINE_INSTRUCTION(Mul_w) ++ DEFINE_INSTRUCTION(Mulh_w) ++ DEFINE_INSTRUCTION(Mulh_wu) ++ DEFINE_INSTRUCTION(Mul_d) ++ DEFINE_INSTRUCTION(Mulh_d) ++ DEFINE_INSTRUCTION2(Div_w) ++ DEFINE_INSTRUCTION2(Div_d) ++ DEFINE_INSTRUCTION2(Div_wu) ++ DEFINE_INSTRUCTION2(Div_du) ++ ++ DEFINE_INSTRUCTION(And) ++ DEFINE_INSTRUCTION(Or) ++ DEFINE_INSTRUCTION(Xor) ++ DEFINE_INSTRUCTION(Nor) ++ DEFINE_INSTRUCTION2(Neg) ++ DEFINE_INSTRUCTION(Andn) ++ DEFINE_INSTRUCTION(Orn) ++ ++ DEFINE_INSTRUCTION(Slt) ++ DEFINE_INSTRUCTION(Sltu) ++ DEFINE_INSTRUCTION(Slti) ++ DEFINE_INSTRUCTION(Sltiu) ++ DEFINE_INSTRUCTION(Sle) ++ DEFINE_INSTRUCTION(Sleu) ++ DEFINE_INSTRUCTION(Sgt) ++ DEFINE_INSTRUCTION(Sgtu) ++ DEFINE_INSTRUCTION(Sge) ++ DEFINE_INSTRUCTION(Sgeu) ++ ++ DEFINE_INSTRUCTION(Rotr_w) ++ DEFINE_INSTRUCTION(Rotr_d) ++ ++#undef DEFINE_INSTRUCTION ++#undef DEFINE_INSTRUCTION2 ++#undef DEFINE_INSTRUCTION3 ++ ++ void SmiUntag(Register dst, const MemOperand& src); ++ void SmiUntag(Register dst, Register src) { ++ if (SmiValuesAre32Bits()) { ++ srai_d(dst, src, kSmiShift); ++ } else { ++ DCHECK(SmiValuesAre31Bits()); ++ srai_w(dst, src, kSmiShift); ++ } ++ } ++ ++ void SmiUntag(Register reg) { SmiUntag(reg, reg); } ++ ++ // Removes current frame and its arguments from the stack preserving ++ // the arguments and a return address pushed to the stack for the next call. ++ // Both |callee_args_count| and |caller_args_count| do not include ++ // receiver. |callee_args_count| is not modified. |caller_args_count| ++ // is trashed. ++ void PrepareForTailCall(Register callee_args_count, ++ Register caller_args_count, Register scratch0, ++ Register scratch1); ++ ++ int CalculateStackPassedWords(int num_reg_arguments, ++ int num_double_arguments); ++ ++ // Before calling a C-function from generated code, align arguments on stack ++ // and add space for the four mips argument slots. ++ // After aligning the frame, non-register arguments must be stored on the ++ // stack, after the argument-slots using helper: CFunctionArgumentOperand(). ++ // The argument count assumes all arguments are word sized. ++ // Some compilers/platforms require the stack to be aligned when calling ++ // C++ code. ++ // Needs a scratch register to do some arithmetic. This register will be ++ // trashed. ++ void PrepareCallCFunction(int num_reg_arguments, int num_double_registers, ++ Register scratch); ++ void PrepareCallCFunction(int num_reg_arguments, Register scratch); ++ ++ // Calls a C function and cleans up the space for arguments allocated ++ // by PrepareCallCFunction. The called function is not allowed to trigger a ++ // garbage collection, since that might move the code and invalidate the ++ // return address (unless this is somehow accounted for by the called ++ // function). ++ void CallCFunction(ExternalReference function, int num_arguments); ++ void CallCFunction(Register function, int num_arguments); ++ void CallCFunction(ExternalReference function, int num_reg_arguments, ++ int num_double_arguments); ++ void CallCFunction(Register function, int num_reg_arguments, ++ int num_double_arguments); ++ void MovFromFloatResult(DoubleRegister dst); ++ void MovFromFloatParameter(DoubleRegister dst); ++ ++ // There are two ways of passing double arguments on MIPS, depending on ++ // whether soft or hard floating point ABI is used. These functions ++ // abstract parameter passing for the three different ways we call ++ // C functions from generated code. ++ void MovToFloatParameter(DoubleRegister src); ++ void MovToFloatParameters(DoubleRegister src1, DoubleRegister src2); ++ void MovToFloatResult(DoubleRegister src); ++ ++ // See comments at the beginning of Builtins::Generate_CEntry. ++ inline void PrepareCEntryArgs(int num_args) { li(a0, num_args); } ++ inline void PrepareCEntryFunction(const ExternalReference& ref) { ++ li(a1, ref); ++ } ++ ++ void CheckPageFlag(Register object, Register scratch, int mask, Condition cc, ++ Label* condition_met); ++#undef COND_ARGS ++ ++ // Performs a truncating conversion of a floating point number as used by ++ // the JS bitwise operations. See ECMA-262 9.5: ToInt32. ++ // Exits with 'result' holding the answer. ++ void TruncateDoubleToI(Isolate* isolate, Zone* zone, Register result, ++ DoubleRegister double_input, StubCallMode stub_mode); ++ ++ // Conditional move. ++ void Movz(Register rd, Register rj, Register rk); ++ void Movn(Register rd, Register rj, Register rk); ++ ++ void LoadZeroIfFPUCondition(Register dest, CFRegister = FCC0); ++ void LoadZeroIfNotFPUCondition(Register dest, CFRegister = FCC0); ++ ++ void LoadZeroIfConditionNotZero(Register dest, Register condition); ++ void LoadZeroIfConditionZero(Register dest, Register condition); ++ void LoadZeroOnCondition(Register rd, Register rj, const Operand& rk, ++ Condition cond); ++ ++ void Clz_w(Register rd, Register rj); ++ void Clz_d(Register rd, Register rj); ++ void Ctz_w(Register rd, Register rj); ++ void Ctz_d(Register rd, Register rj); ++ void Popcnt_w(Register rd, Register rj); ++ void Popcnt_d(Register rd, Register rj); ++ ++ void ExtractBits(Register dest, Register source, Register pos, int size, ++ bool sign_extend = false); ++ void InsertBits(Register dest, Register source, Register pos, int size); ++ ++ void Bstrins_w(Register rk, Register rj, uint16_t msbw, uint16_t lswb); ++ void Bstrins_d(Register rk, Register rj, uint16_t msbw, uint16_t lsbw); ++ void Bstrpick_w(Register rk, Register rj, uint16_t msbw, uint16_t lsbw); ++ void Bstrpick_d(Register rk, Register rj, uint16_t msbw, uint16_t lsbw); ++ void Neg_s(FPURegister fd, FPURegister fj); ++ void Neg_d(FPURegister fd, FPURegister fk); ++ ++ // Convert single to unsigned word. ++ void Trunc_uw_s(FPURegister fd, FPURegister fj, FPURegister scratch); ++ void Trunc_uw_s(Register rd, FPURegister fj, FPURegister scratch); ++ ++ // Change endianness ++ void ByteSwapSigned(Register dest, Register src, int operand_size); ++ void ByteSwapUnsigned(Register dest, Register src, int operand_size); ++ ++ void Ld_b(Register rd, const MemOperand& rj); ++ void Ld_bu(Register rd, const MemOperand& rj); ++ void St_b(Register rd, const MemOperand& rj); ++ ++ void Ld_h(Register rd, const MemOperand& rj); ++ void Ld_hu(Register rd, const MemOperand& rj); ++ void St_h(Register rd, const MemOperand& rj); ++ ++ void Ld_w(Register rd, const MemOperand& rj); ++ void Ld_wu(Register rd, const MemOperand& rj); ++ void St_w(Register rd, const MemOperand& rj); ++ ++ void Fld_s(FPURegister fd, const MemOperand& src); ++ void Fst_s(FPURegister fj, const MemOperand& dst); ++ ++ void Fld_d(FPURegister fd, const MemOperand& src); ++ void Fst_d(FPURegister fj, const MemOperand& dst); ++ ++ void Ll_w(Register rd, const MemOperand& rj); ++ void Sc_w(Register rd, const MemOperand& rj); ++ ++ void Ll_d(Register rd, const MemOperand& rj); ++ void Sc_d(Register rd, const MemOperand& rj); ++ ++ // These functions assume (and assert) that src1!=src2. It is permitted ++ // for the result to alias either input register. ++ void Float32Max(FPURegister dst, FPURegister src1, FPURegister src2, ++ Label* out_of_line); ++ void Float32Min(FPURegister dst, FPURegister src1, FPURegister src2, ++ Label* out_of_line); ++ void Float64Max(FPURegister dst, FPURegister src1, FPURegister src2, ++ Label* out_of_line); ++ void Float64Min(FPURegister dst, FPURegister src1, FPURegister src2, ++ Label* out_of_line); ++ ++ // Generate out-of-line cases for the macros above. ++ void Float32MaxOutOfLine(FPURegister dst, FPURegister src1, FPURegister src2); ++ void Float32MinOutOfLine(FPURegister dst, FPURegister src1, FPURegister src2); ++ void Float64MaxOutOfLine(FPURegister dst, FPURegister src1, FPURegister src2); ++ void Float64MinOutOfLine(FPURegister dst, FPURegister src1, FPURegister src2); ++ ++ bool IsDoubleZeroRegSet() { return has_double_zero_reg_set_; } ++ ++ void mov(Register rd, Register rj) { or_(rd, rj, zero_reg); } ++ ++ inline void Move(Register dst, Handle handle) { li(dst, handle); } ++ inline void Move(Register dst, Smi smi) { li(dst, Operand(smi)); } ++ ++ inline void Move(Register dst, Register src) { ++ if (dst != src) { ++ mov(dst, src); ++ } ++ } ++ ++ inline void FmoveLow(Register dst_low, FPURegister src) { ++ movfr2gr_s(dst_low, src); ++ } ++ ++ void FmoveLow(FPURegister dst, Register src_low); ++ ++ inline void Move(FPURegister dst, FPURegister src) { Move_d(dst, src); } ++ ++ inline void Move_d(FPURegister dst, FPURegister src) { ++ if (dst != src) { ++ fmov_d(dst, src); ++ } ++ } ++ ++ inline void Move_s(FPURegister dst, FPURegister src) { ++ if (dst != src) { ++ fmov_s(dst, src); ++ } ++ } ++ ++ void Move(FPURegister dst, float imm) { Move(dst, bit_cast(imm)); } ++ void Move(FPURegister dst, double imm) { Move(dst, bit_cast(imm)); } ++ void Move(FPURegister dst, uint32_t src); ++ void Move(FPURegister dst, uint64_t src); ++ ++ // AdddOverflow sets overflow register to a negative value if ++ // overflow occured, otherwise it is zero or positive ++ void AdddOverflow(Register dst, Register left, const Operand& right, ++ Register overflow); ++ // SubdOverflow sets overflow register to a negative value if ++ // overflow occured, otherwise it is zero or positive ++ void SubdOverflow(Register dst, Register left, const Operand& right, ++ Register overflow); ++ // MulOverflow sets overflow register to zero if no overflow occured ++ void MulOverflow(Register dst, Register left, const Operand& right, ++ Register overflow); ++ ++ // Number of instructions needed for calculation of switch table entry address ++ static const int kSwitchTablePrologueSize = 5; ++ ++ // GetLabelFunction must be lambda '[](size_t index) -> Label*' or a ++ // functor/function with 'Label *func(size_t index)' declaration. ++ template ++ void GenerateSwitchTable(Register index, size_t case_count, ++ Func GetLabelFunction); ++ ++ // Load an object from the root table. ++ void LoadRoot(Register destination, RootIndex index) override; ++ void LoadRoot(Register destination, RootIndex index, Condition cond, ++ Register src1, const Operand& src2); ++ ++ // If the value is a NaN, canonicalize the value, src must be nan. ++ void FPUCanonicalizeNaN(const DoubleRegister dst, const DoubleRegister src); ++ ++ // --------------------------------------------------------------------------- ++ // FPU macros. These do not handle special cases like NaN or +- inf. ++ ++ // Convert unsigned word to double. ++ void Ffint_d_uw(FPURegister fd, FPURegister fj); ++ void Ffint_d_uw(FPURegister fd, Register rj); ++ ++ // Convert unsigned long to double. ++ void Ffint_d_ul(FPURegister fd, FPURegister fj); ++ void Ffint_d_ul(FPURegister fd, Register rj); ++ ++ // Convert unsigned word to float. ++ void Ffint_s_uw(FPURegister fd, FPURegister fj); ++ void Ffint_s_uw(FPURegister fd, Register rj); ++ ++ // Convert unsigned long to float. ++ void Ffint_s_ul(FPURegister fd, FPURegister fj); ++ void Ffint_s_ul(FPURegister fd, Register rj); ++ ++ // Convert double to unsigned word. ++ void Ftintrz_uw_d(FPURegister fd, FPURegister fj, FPURegister scratch); ++ void Ftintrz_uw_d(Register rd, FPURegister fj, FPURegister scratch); ++ ++ // Convert single to unsigned word. ++ void Ftintrz_uw_s(FPURegister fd, FPURegister fs, FPURegister scratch); ++ void Ftintrz_uw_s(Register rd, FPURegister fs, FPURegister scratch); ++ ++ // Convert double to unsigned long. ++ void Ftintrz_ul_d(FPURegister fd, FPURegister fj, FPURegister scratch, ++ Register result = no_reg); ++ void Ftintrz_ul_d(Register rd, FPURegister fj, FPURegister scratch, ++ Register result = no_reg); ++ ++ // Convert single to unsigned long. ++ void Ftintrz_ul_s(FPURegister fd, FPURegister fj, FPURegister scratch, ++ Register result = no_reg); ++ void Ftintrz_ul_s(Register rd, FPURegister fj, FPURegister scratch, ++ Register result = no_reg); ++ ++ // Round double functions ++ void Trunc_d(FPURegister fd, FPURegister fj); ++ void Round_d(FPURegister fd, FPURegister fj); ++ void Floor_d(FPURegister fd, FPURegister fj); ++ void Ceil_d(FPURegister fd, FPURegister fj); ++ ++ // Round float functions ++ void Trunc_s(FPURegister fd, FPURegister fj); ++ void Round_s(FPURegister fd, FPURegister fj); ++ void Floor_s(FPURegister fd, FPURegister fj); ++ void Ceil_s(FPURegister fd, FPURegister fj); ++ ++ // Jump the register contains a smi. ++ void JumpIfSmi(Register value, Label* smi_label, Register scratch = t7); ++ ++ void JumpIfEqual(Register a, int32_t b, Label* dest) { ++ li(kScratchReg, Operand(b)); ++ Branch(dest, eq, a, Operand(kScratchReg)); ++ } ++ ++ void JumpIfLessThan(Register a, int32_t b, Label* dest) { ++ li(kScratchReg, Operand(b)); ++ Branch(dest, lt, a, Operand(kScratchReg)); ++ } ++ ++ // Push a standard frame, consisting of ra, fp, context and JS function. ++ void PushStandardFrame(Register function_reg); ++ ++ // Get the actual activation frame alignment for target environment. ++ static int ActivationFrameAlignment(); ++ ++ // Load Scaled Address instructions. Parameter sa (shift argument) must be ++ // between [1, 31] (inclusive). The scratch register may be clobbered. ++ void Alsl_w(Register rd, Register rj, Register rk, uint8_t sa, ++ Register scratch = t7); ++ void Alsl_d(Register rd, Register rj, Register rk, uint8_t sa, ++ Register scratch = t7); ++ ++ // Compute the start of the generated instruction stream from the current PC. ++ // This is an alternative to embedding the {CodeObject} handle as a reference. ++ void ComputeCodeStartAddress(Register dst); ++ ++ void ResetSpeculationPoisonRegister(); ++ ++ // Control-flow integrity: ++ ++ // Define a function entrypoint. This doesn't emit any code for this ++ // architecture, as control-flow integrity is not supported for it. ++ void CodeEntry() {} ++ // Define an exception handler. ++ void ExceptionHandler() {} ++ // Define an exception handler and bind a label. ++ void BindExceptionHandler(Label* label) { bind(label); } ++ ++ protected: ++ inline Register GetRkAsRegisterHelper(const Operand& rk, Register scratch); ++ inline int32_t GetOffset(Label* L, OffsetSize bits); ++ ++ private: ++ bool has_double_zero_reg_set_ = false; ++ ++ // Performs a truncating conversion of a floating point number as used by ++ // the JS bitwise operations. See ECMA-262 9.5: ToInt32. Goes to 'done' if it ++ // succeeds, otherwise falls through if result is saturated. On return ++ // 'result' either holds answer, or is clobbered on fall through. ++ void TryInlineTruncateDoubleToI(Register result, DoubleRegister input, ++ Label* done); ++ ++ bool BranchShortOrFallback(Label* L, Condition cond, Register rj, ++ const Operand& rk, bool need_link); ++ ++ // f32 or f64 ++ void CompareF(FPURegister cmp1, FPURegister cmp2, FPUCondition cc, ++ CFRegister cd, bool f32 = true); ++ ++ void CompareIsNanF(FPURegister cmp1, FPURegister cmp2, CFRegister cd, ++ bool f32 = true); ++ ++ void CallCFunctionHelper(Register function, int num_reg_arguments, ++ int num_double_arguments); ++ ++ void RoundDouble(FPURegister dst, FPURegister src, FPURoundingMode mode); ++ ++ void RoundFloat(FPURegister dst, FPURegister src, FPURoundingMode mode); ++ ++ // Push a fixed frame, consisting of ra, fp. ++ void PushCommonFrame(Register marker_reg = no_reg); ++ ++ void CallRecordWriteStub(Register object, Register address, ++ RememberedSetAction remembered_set_action, ++ SaveFPRegsMode fp_mode, Handle code_target, ++ Address wasm_target); ++}; ++ ++// MacroAssembler implements a collection of frequently used macros. ++class V8_EXPORT_PRIVATE MacroAssembler : public TurboAssembler { ++ public: ++ using TurboAssembler::TurboAssembler; ++ ++ bool IsNear(Label* L, Condition cond, int rs_reg); ++ ++ // Swap two registers. If the scratch register is omitted then a slightly ++ // less efficient form using xor instead of mov is emitted. ++ void Swap(Register reg1, Register reg2, Register scratch = no_reg); ++ ++ void PushRoot(RootIndex index) { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ LoadRoot(scratch, index); ++ Push(scratch); ++ } ++ ++ // Compare the object in a register to a value and jump if they are equal. ++ void JumpIfRoot(Register with, RootIndex index, Label* if_equal) { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ LoadRoot(scratch, index); ++ Branch(if_equal, eq, with, Operand(scratch)); ++ } ++ ++ // Compare the object in a register to a value and jump if they are not equal. ++ void JumpIfNotRoot(Register with, RootIndex index, Label* if_not_equal) { ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ LoadRoot(scratch, index); ++ Branch(if_not_equal, ne, with, Operand(scratch)); ++ } ++ ++ // Checks if value is in range [lower_limit, higher_limit] using a single ++ // comparison. ++ void JumpIfIsInRange(Register value, unsigned lower_limit, ++ unsigned higher_limit, Label* on_in_range); ++ ++ // --------------------------------------------------------------------------- ++ // GC Support ++ ++ // Notify the garbage collector that we wrote a pointer into an object. ++ // |object| is the object being stored into, |value| is the object being ++ // stored. value and scratch registers are clobbered by the operation. ++ // The offset is the offset from the start of the object, not the offset from ++ // the tagged HeapObject pointer. For use with FieldOperand(reg, off). ++ void RecordWriteField( ++ Register object, int offset, Register value, Register scratch, ++ RAStatus ra_status, SaveFPRegsMode save_fp, ++ RememberedSetAction remembered_set_action = EMIT_REMEMBERED_SET, ++ SmiCheck smi_check = INLINE_SMI_CHECK); ++ ++ // For a given |object| notify the garbage collector that the slot |address| ++ // has been written. |value| is the object being stored. The value and ++ // address registers are clobbered by the operation. ++ void RecordWrite( ++ Register object, Register address, Register value, RAStatus ra_status, ++ SaveFPRegsMode save_fp, ++ RememberedSetAction remembered_set_action = EMIT_REMEMBERED_SET, ++ SmiCheck smi_check = INLINE_SMI_CHECK); ++ ++ void Pref(int32_t hint, const MemOperand& rs); ++ ++ // --------------------------------------------------------------------------- ++ // Pseudo-instructions. ++ ++ void LoadWordPair(Register rd, const MemOperand& rj, Register scratch); ++ void StoreWordPair(Register rd, const MemOperand& rj, Register scratch); ++ ++ // Convert double to unsigned long. ++ void Ftintrz_l_ud(FPURegister fd, FPURegister fj, FPURegister scratch); ++ ++ void Ftintrz_l_d(FPURegister fd, FPURegister fj); ++ void Ftintrne_l_d(FPURegister fd, FPURegister fj); ++ void Ftintrm_l_d(FPURegister fd, FPURegister fj); ++ void Ftintrp_l_d(FPURegister fd, FPURegister fj); ++ ++ void Ftintrz_w_d(FPURegister fd, FPURegister fj); ++ void Ftintrne_w_d(FPURegister fd, FPURegister fj); ++ void Ftintrm_w_d(FPURegister fd, FPURegister fj); ++ void Ftintrp_w_d(FPURegister fd, FPURegister fj); ++ ++ void Madd_s(FPURegister fd, FPURegister fa, FPURegister fj, FPURegister fk); ++ void Madd_d(FPURegister fd, FPURegister fa, FPURegister fj, FPURegister fk); ++ void Msub_s(FPURegister fd, FPURegister fa, FPURegister fj, FPURegister fk); ++ void Msub_d(FPURegister fd, FPURegister fa, FPURegister fj, FPURegister fk); ++ ++ // Truncates a double using a specific rounding mode, and writes the value ++ // to the result register. ++ // The except_flag will contain any exceptions caused by the instruction. ++ // If check_inexact is kDontCheckForInexactConversion, then the inexact ++ // exception is masked. ++ void EmitFPUTruncate( ++ FPURoundingMode rounding_mode, Register result, ++ DoubleRegister double_input, Register scratch, ++ DoubleRegister double_scratch, Register except_flag, ++ CheckForInexactConversion check_inexact = kDontCheckForInexactConversion); ++ ++ // Enter exit frame. ++ // argc - argument count to be dropped by LeaveExitFrame. ++ // save_doubles - saves FPU registers on stack, currently disabled. ++ // stack_space - extra stack space. ++ void EnterExitFrame(bool save_doubles, int stack_space = 0, ++ StackFrame::Type frame_type = StackFrame::EXIT); ++ ++ // Leave the current exit frame. ++ void LeaveExitFrame(bool save_doubles, Register arg_count, ++ bool do_return = NO_EMIT_RETURN, ++ bool argument_count_is_length = false); ++ ++ void LoadMap(Register destination, Register object); ++ ++ // Make sure the stack is aligned. Only emits code in debug mode. ++ void AssertStackIsAligned(); ++ ++ // Load the global proxy from the current context. ++ void LoadGlobalProxy(Register dst) { ++ LoadNativeContextSlot(Context::GLOBAL_PROXY_INDEX, dst); ++ } ++ ++ void LoadNativeContextSlot(int index, Register dst); ++ ++ // Load the initial map from the global function. The registers ++ // function and map can be the same, function is then overwritten. ++ void LoadGlobalFunctionInitialMap(Register function, Register map, ++ Register scratch); ++ ++ // ------------------------------------------------------------------------- ++ // JavaScript invokes. ++ ++ // Invoke the JavaScript function code by either calling or jumping. ++ void InvokeFunctionCode(Register function, Register new_target, ++ Register expected_parameter_count, ++ Register actual_parameter_count, InvokeFlag flag); ++ ++ // On function call, call into the debugger if necessary. ++ void CheckDebugHook(Register fun, Register new_target, ++ Register expected_parameter_count, ++ Register actual_parameter_count); ++ ++ // Invoke the JavaScript function in the given register. Changes the ++ // current context to the context in the function before invoking. ++ void InvokeFunctionWithNewTarget(Register function, Register new_target, ++ Register actual_parameter_count, ++ InvokeFlag flag); ++ void InvokeFunction(Register function, Register expected_parameter_count, ++ Register actual_parameter_count, InvokeFlag flag); ++ ++ // Frame restart support. ++ void MaybeDropFrames(); ++ ++ // Exception handling. ++ ++ // Push a new stack handler and link into stack handler chain. ++ void PushStackHandler(); ++ ++ // Unlink the stack handler on top of the stack from the stack handler chain. ++ // Must preserve the result register. ++ void PopStackHandler(); ++ ++ // ------------------------------------------------------------------------- ++ // Support functions. ++ ++ void GetObjectType(Register function, Register map, Register type_reg); ++ ++ // ------------------------------------------------------------------------- ++ // Runtime calls. ++ ++ // Call a runtime routine. ++ void CallRuntime(const Runtime::Function* f, int num_arguments, ++ SaveFPRegsMode save_doubles = kDontSaveFPRegs); ++ ++ // Convenience function: Same as above, but takes the fid instead. ++ void CallRuntime(Runtime::FunctionId fid, ++ SaveFPRegsMode save_doubles = kDontSaveFPRegs) { ++ const Runtime::Function* function = Runtime::FunctionForId(fid); ++ CallRuntime(function, function->nargs, save_doubles); ++ } ++ ++ // Convenience function: Same as above, but takes the fid instead. ++ void CallRuntime(Runtime::FunctionId fid, int num_arguments, ++ SaveFPRegsMode save_doubles = kDontSaveFPRegs) { ++ CallRuntime(Runtime::FunctionForId(fid), num_arguments, save_doubles); ++ } ++ ++ // Convenience function: tail call a runtime routine (jump). ++ void TailCallRuntime(Runtime::FunctionId fid); ++ ++ // Jump to the builtin routine. ++ void JumpToExternalReference(const ExternalReference& builtin, ++ bool builtin_exit_frame = false); ++ ++ // Generates a trampoline to jump to the off-heap instruction stream. ++ void JumpToInstructionStream(Address entry); ++ ++ // --------------------------------------------------------------------------- ++ // In-place weak references. ++ void LoadWeakValue(Register out, Register in, Label* target_if_cleared); ++ ++ // ------------------------------------------------------------------------- ++ // StatsCounter support. ++ ++ void IncrementCounter(StatsCounter* counter, int value, Register scratch1, ++ Register scratch2); ++ void DecrementCounter(StatsCounter* counter, int value, Register scratch1, ++ Register scratch2); ++ ++ // ------------------------------------------------------------------------- ++ // Smi utilities. ++ ++ void SmiTag(Register dst, Register src) { ++ STATIC_ASSERT(kSmiTag == 0); ++ if (SmiValuesAre32Bits()) { ++ slli_d(dst, src, 32); ++ } else { ++ DCHECK(SmiValuesAre31Bits()); ++ add_w(dst, src, src); ++ } ++ } ++ ++ void SmiTag(Register reg) { SmiTag(reg, reg); } ++ ++ // Left-shifted from int32 equivalent of Smi. ++ void SmiScale(Register dst, Register src, int scale) { ++ if (SmiValuesAre32Bits()) { ++ // The int portion is upper 32-bits of 64-bit word. ++ srai_d(dst, src, kSmiShift - scale); ++ } else { ++ DCHECK(SmiValuesAre31Bits()); ++ DCHECK_GE(scale, kSmiTagSize); ++ slli_w(dst, src, scale - kSmiTagSize); ++ } ++ } ++ ++ // Test if the register contains a smi. ++ inline void SmiTst(Register value, Register scratch) { ++ And(scratch, value, Operand(kSmiTagMask)); ++ } ++ ++ // Jump if the register contains a non-smi. ++ void JumpIfNotSmi(Register value, Label* not_smi_label, Register scratch); ++ ++ // Abort execution if argument is a smi, enabled via --debug-code. ++ void AssertNotSmi(Register object); ++ void AssertSmi(Register object); ++ ++ // Abort execution if argument is not a Constructor, enabled via --debug-code. ++ void AssertConstructor(Register object); ++ ++ // Abort execution if argument is not a JSFunction, enabled via --debug-code. ++ void AssertFunction(Register object); ++ ++ // Abort execution if argument is not a JSBoundFunction, ++ // enabled via --debug-code. ++ void AssertBoundFunction(Register object); ++ ++ // Abort execution if argument is not a JSGeneratorObject (or subclass), ++ // enabled via --debug-code. ++ void AssertGeneratorObject(Register object); ++ ++ // Abort execution if argument is not undefined or an AllocationSite, enabled ++ // via --debug-code. ++ void AssertUndefinedOrAllocationSite(Register object, Register scratch); ++ ++ template ++ void DecodeField(Register dst, Register src) { ++ Bstrpick_d(dst, src, Field::kShift + Field::kSize - 1, Field::kShift); ++ } ++ ++ template ++ void DecodeField(Register reg) { ++ DecodeField(reg, reg); ++ } ++ ++ private: ++ // Helper functions for generating invokes. ++ void InvokePrologue(Register expected_parameter_count, ++ Register actual_parameter_count, Label* done, ++ InvokeFlag flag); ++ ++ // Compute memory operands for safepoint stack slots. ++ static int SafepointRegisterStackIndex(int reg_code); ++ ++ // Needs access to SafepointRegisterStackIndex for compiled frame ++ // traversal. ++ friend class StandardFrame; ++ ++ DISALLOW_IMPLICIT_CONSTRUCTORS(MacroAssembler); ++}; ++ ++template ++void TurboAssembler::GenerateSwitchTable(Register index, size_t case_count, ++ Func GetLabelFunction) { ++ // Ensure that dd-ed labels following this instruction use 8 bytes aligned ++ // addresses. ++ BlockTrampolinePoolFor(static_cast(case_count) * 2 + ++ kSwitchTablePrologueSize); ++ UseScratchRegisterScope temps(this); ++ Register scratch = temps.Acquire(); ++ Align(8); // next is 4 instrs. ++ pcaddi(scratch, 4); ++ // alsl_d will do sa ++ alsl_d(scratch, index, scratch, kPointerSizeLog2); ++ Ld_d(scratch, MemOperand(scratch, 0)); ++ jirl(zero_reg, scratch, 0); ++ for (size_t index = 0; index < case_count; ++index) { ++ dd(GetLabelFunction(index)); ++ } ++} ++ ++#define ACCESS_MASM(masm) masm-> ++ ++} // namespace internal ++} // namespace v8 ++ ++#endif // V8_CODEGEN_LA64_MACRO_ASSEMBLER_LA64_H_ +diff --git a/src/3rdparty/chromium/v8/src/codegen/la64/register-la64.h b/src/3rdparty/chromium/v8/src/codegen/la64/register-la64.h +new file mode 100644 +index 00000000000..f2025e28e5f +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/codegen/la64/register-la64.h +@@ -0,0 +1,328 @@ ++// Copyright 2018 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#ifndef V8_CODEGEN_LA64_REGISTER_LA64_H_ ++#define V8_CODEGEN_LA64_REGISTER_LA64_H_ ++ ++#include "src/codegen/la64/constants-la64.h" ++#include "src/codegen/register.h" ++#include "src/codegen/reglist.h" ++ ++namespace v8 { ++namespace internal { ++ ++// clang-format off ++#define GENERAL_REGISTERS(V) \ ++ V(zero_reg) V(ra) V(gp) V(sp) \ ++ V(a0) V(a1) V(a2) V(a3) V(a4) V(a5) V(a6) V(a7) \ ++ V(t0) V(t1) V(t2) V(t3) V(t4) V(t5) V(t6) V(t7) V(t8) \ ++ V(tp) V(fp) \ ++ V(s0) V(s1) V(s2) V(s3) V(s4) V(s5) V(s6) V(s7) V(s8) \ ++ ++#define ALLOCATABLE_GENERAL_REGISTERS(V) \ ++ V(a0) V(a1) V(a2) V(a3) V(a4) V(a5) V(a6) V(a7) \ ++ V(t0) V(t1) V(t2) V(t3) V(t4) V(t5) V(s7) ++ ++#define DOUBLE_REGISTERS(V) \ ++ V(f0) V(f1) V(f2) V(f3) V(f4) V(f5) V(f6) V(f7) \ ++ V(f8) V(f9) V(f10) V(f11) V(f12) V(f13) V(f14) V(f15) \ ++ V(f16) V(f17) V(f18) V(f19) V(f20) V(f21) V(f22) V(f23) \ ++ V(f24) V(f25) V(f26) V(f27) V(f28) V(f29) V(f30) V(f31) ++ ++#define FLOAT_REGISTERS DOUBLE_REGISTERS ++#define SIMD128_REGISTERS(V) \ ++ V(w0) V(w1) V(w2) V(w3) V(w4) V(w5) V(w6) V(w7) \ ++ V(w8) V(w9) V(w10) V(w11) V(w12) V(w13) V(w14) V(w15) \ ++ V(w16) V(w17) V(w18) V(w19) V(w20) V(w21) V(w22) V(w23) \ ++ V(w24) V(w25) V(w26) V(w27) V(w28) V(w29) V(w30) V(w31) ++ ++#define ALLOCATABLE_DOUBLE_REGISTERS(V) \ ++ V(f0) V(f1) V(f2) V(f3) V(f4) V(f5) V(f6) V(f7) \ ++ V(f8) V(f9) V(f10) V(f11) V(f12) V(f13) V(f14) V(f15) V(f16) \ ++ V(f17) V(f18) V(f19) V(f20) V(f21) V(f22) V(f23) ++// clang-format on ++ ++// Note that the bit values must match those used in actual instruction ++// encoding. ++const int kNumRegs = 32; ++ ++const RegList kJSCallerSaved = 1 << 4 | // a0 ++ 1 << 5 | // a1 ++ 1 << 6 | // a2 ++ 1 << 7 | // a3 ++ 1 << 8 | // a4 ++ 1 << 9 | // a5 ++ 1 << 10 | // a6 ++ 1 << 11 | // a7 ++ 1 << 12 | // t0 ++ 1 << 13 | // t1 ++ 1 << 14 | // t2 ++ 1 << 15 | // t3 ++ 1 << 16 | // t4 ++ 1 << 17 | // t5 ++ 1 << 20; // t8 ++ ++const int kNumJSCallerSaved = 15; ++ ++// Callee-saved registers preserved when switching from C to JavaScript. ++const RegList kCalleeSaved = 1 << 22 | // fp ++ 1 << 23 | // s0 ++ 1 << 24 | // s1 ++ 1 << 25 | // s2 ++ 1 << 26 | // s3 ++ 1 << 27 | // s4 ++ 1 << 28 | // s5 ++ 1 << 29 | // s6 (roots in Javascript code) ++ 1 << 30 | // s7 (cp in Javascript code) ++ 1 << 31; // s8 ++ ++const int kNumCalleeSaved = 10; ++ ++const RegList kCalleeSavedFPU = 1 << 24 | // f24 ++ 1 << 25 | // f25 ++ 1 << 26 | // f26 ++ 1 << 27 | // f27 ++ 1 << 28 | // f28 ++ 1 << 29 | // f29 ++ 1 << 30 | // f30 ++ 1 << 31; // f31 ++ ++const int kNumCalleeSavedFPU = 8; ++ ++const RegList kCallerSavedFPU = 1 << 0 | // f0 ++ 1 << 1 | // f1 ++ 1 << 2 | // f2 ++ 1 << 3 | // f3 ++ 1 << 4 | // f4 ++ 1 << 5 | // f5 ++ 1 << 6 | // f6 ++ 1 << 7 | // f7 ++ 1 << 8 | // f8 ++ 1 << 9 | // f9 ++ 1 << 10 | // f10 ++ 1 << 11 | // f11 ++ 1 << 12 | // f12 ++ 1 << 13 | // f13 ++ 1 << 14 | // f14 ++ 1 << 15 | // f15 ++ 1 << 16 | // f16 ++ 1 << 17 | // f17 ++ 1 << 18 | // f18 ++ 1 << 19 | // f19 ++ 1 << 20 | // f20 ++ 1 << 21 | // f21 ++ 1 << 22 | // f22 ++ 1 << 23; // f23 ++ ++// Number of registers for which space is reserved in safepoints. Must be a ++// multiple of 8. ++const int kNumSafepointRegisters = 32; ++ ++// Define the list of registers actually saved at safepoints. ++// Note that the number of saved registers may be smaller than the reserved ++// space, i.e. kNumSafepointSavedRegisters <= kNumSafepointRegisters. ++const RegList kSafepointSavedRegisters = kJSCallerSaved | kCalleeSaved; ++const int kNumSafepointSavedRegisters = kNumJSCallerSaved + kNumCalleeSaved; ++ ++const int kUndefIndex = -1; ++// Map with indexes on stack that corresponds to codes of saved registers. ++const int kSafepointRegisterStackIndexMap[kNumRegs] = {kUndefIndex, // zero_reg ++ kUndefIndex, // ra ++ kUndefIndex, // gp ++ kUndefIndex, // sp ++ 0, // a0 ++ 1, // a1 ++ 2, // a2 ++ 3, // a3 ++ 4, // a4 ++ 5, // a5 ++ 6, // a6 ++ 7, // a7 ++ 8, // t0 ++ 9, // t1 ++ 10, // t2 ++ 11, // t3 ++ 12, // t4 ++ 13, // t5 ++ kUndefIndex, // t6 ++ kUndefIndex, // t7 ++ 14, // t8 ++ kUndefIndex, // tp ++ 15, // fp ++ 16, // s0 ++ 17, // s1 ++ 28, // s2 ++ 29, // s3 ++ 20, // s4 ++ 21, // s5 ++ 22, // s6 ++ 23, // s7 ++ 24}; // s8 ++ ++// CPU Registers. ++// ++// 1) We would prefer to use an enum, but enum values are assignment- ++// compatible with int, which has caused code-generation bugs. ++// ++// 2) We would prefer to use a class instead of a struct but we don't like ++// the register initialization to depend on the particular initialization ++// order (which appears to be different on OS X, Linux, and Windows for the ++// installed versions of C++ we tried). Using a struct permits C-style ++// "initialization". Also, the Register objects cannot be const as this ++// forces initialization stubs in MSVC, making us dependent on initialization ++// order. ++// ++// 3) By not using an enum, we are possibly preventing the compiler from ++// doing certain constant folds, which may significantly reduce the ++// code generated for some assembly instructions (because they boil down ++// to a few constants). If this is a problem, we could change the code ++// such that we use an enum in optimized mode, and the struct in debug ++// mode. This way we get the compile-time error checking in debug mode ++// and best performance in optimized code. ++ ++// ----------------------------------------------------------------------------- ++// Implementation of Register and FPURegister. ++ ++enum RegisterCode { ++#define REGISTER_CODE(R) kRegCode_##R, ++ GENERAL_REGISTERS(REGISTER_CODE) ++#undef REGISTER_CODE ++ kRegAfterLast ++}; ++ ++class Register : public RegisterBase { ++ public: ++ static constexpr int kMantissaOffset = 0; ++ static constexpr int kExponentOffset = 4; ++ ++ private: ++ friend class RegisterBase; ++ explicit constexpr Register(int code) : RegisterBase(code) {} ++}; ++ ++// s7: context register ++// s3: scratch register ++// s4: scratch register 2 ++#define DECLARE_REGISTER(R) \ ++ constexpr Register R = Register::from_code(kRegCode_##R); ++GENERAL_REGISTERS(DECLARE_REGISTER) ++#undef DECLARE_REGISTER ++ ++constexpr Register no_reg = Register::no_reg(); ++ ++int ToNumber(Register reg); ++ ++Register ToRegister(int num); ++ ++constexpr bool kPadArguments = false; ++constexpr bool kSimpleFPAliasing = true; ++constexpr bool kSimdMaskRegisters = false; ++ ++enum DoubleRegisterCode { ++#define REGISTER_CODE(R) kDoubleCode_##R, ++ DOUBLE_REGISTERS(REGISTER_CODE) ++#undef REGISTER_CODE ++ kDoubleAfterLast ++}; ++ ++// Coprocessor register. ++class FPURegister : public RegisterBase { ++ public: ++ FPURegister low() const { ++ // TODO(plind): Create DCHECK for FR=0 mode. This usage suspect for FR=1. ++ // Find low reg of a Double-reg pair, which is the reg itself. ++ DCHECK_EQ(code() % 2, 0); // Specified Double reg must be even. ++ return FPURegister::from_code(code()); ++ } ++ ++ private: ++ friend class RegisterBase; ++ explicit constexpr FPURegister(int code) : RegisterBase(code) {} ++}; ++ ++enum CFRegister { FCC0, FCC1, FCC2, FCC3, FCC4, FCC5, FCC6, FCC7 }; ++ ++using FloatRegister = FPURegister; ++ ++using DoubleRegister = FPURegister; ++ ++// TODO here only for build success ++using Simd128Register = FPURegister; ++ ++#define DECLARE_DOUBLE_REGISTER(R) \ ++ constexpr DoubleRegister R = DoubleRegister::from_code(kDoubleCode_##R); ++DOUBLE_REGISTERS(DECLARE_DOUBLE_REGISTER) ++#undef DECLARE_DOUBLE_REGISTER ++ ++constexpr DoubleRegister no_dreg = DoubleRegister::no_reg(); ++ ++// Register aliases. ++// cp is assumed to be a callee saved register. ++constexpr Register kRootRegister = s6; ++constexpr Register cp = s7; ++constexpr Register kScratchReg = s3; ++constexpr Register kScratchReg2 = s4; ++constexpr DoubleRegister kScratchDoubleReg = f30; ++// FPU zero reg is often used to hold 0.0, but it's not hardwired to 0.0. ++constexpr DoubleRegister kDoubleRegZero = f28; ++ ++// FPU (coprocessor 1) control registers. ++// Currently only FCSR0 is implemented. ++// TODO fscr0 fcsr1 fcsr2 fscsr3 ++struct FPUControlRegister { ++ bool is_valid() const { return reg_code == kFCSRRegister; } ++ bool is(FPUControlRegister creg) const { return reg_code == creg.reg_code; } ++ int code() const { ++ DCHECK(is_valid()); ++ return reg_code; ++ } ++ int bit() const { ++ DCHECK(is_valid()); ++ return 1 << reg_code; ++ } ++ void setcode(int f) { ++ reg_code = f; ++ DCHECK(is_valid()); ++ } ++ // Unfortunately we can't make this private in a struct. ++ int reg_code; ++}; ++ ++constexpr FPUControlRegister no_fpucreg = {kInvalidFPUControlRegister}; ++constexpr FPUControlRegister FCSR = {kFCSRRegister}; ++ ++// Define {RegisterName} methods for the register types. ++DEFINE_REGISTER_NAMES(Register, GENERAL_REGISTERS) ++DEFINE_REGISTER_NAMES(FPURegister, DOUBLE_REGISTERS) ++ ++// Give alias names to registers for calling conventions. ++constexpr Register kReturnRegister0 = a0; ++constexpr Register kReturnRegister1 = a1; ++constexpr Register kReturnRegister2 = a2; ++constexpr Register kJSFunctionRegister = a1; ++constexpr Register kContextRegister = s7; ++constexpr Register kAllocateSizeRegister = a0; ++constexpr Register kSpeculationPoisonRegister = t3; ++constexpr Register kInterpreterAccumulatorRegister = a0; ++constexpr Register kInterpreterBytecodeOffsetRegister = t0; ++constexpr Register kInterpreterBytecodeArrayRegister = t1; ++constexpr Register kInterpreterDispatchTableRegister = t2; ++ ++constexpr Register kJavaScriptCallArgCountRegister = a0; ++constexpr Register kJavaScriptCallCodeStartRegister = a2; ++constexpr Register kJavaScriptCallTargetRegister = kJSFunctionRegister; ++constexpr Register kJavaScriptCallNewTargetRegister = a3; ++constexpr Register kJavaScriptCallExtraArg1Register = a2; ++ ++constexpr Register kOffHeapTrampolineRegister = t7; ++constexpr Register kRuntimeCallFunctionRegister = a1; ++constexpr Register kRuntimeCallArgCountRegister = a0; ++constexpr Register kRuntimeCallArgvRegister = a2; ++constexpr Register kWasmInstanceRegister = a0; ++constexpr Register kWasmCompileLazyFuncIndexRegister = t0; ++ ++} // namespace internal ++} // namespace v8 ++ ++#endif // V8_CODEGEN_LA64_REGISTER_LA64_H_ +diff --git a/src/3rdparty/chromium/v8/src/codegen/macro-assembler.h b/src/3rdparty/chromium/v8/src/codegen/macro-assembler.h +index 01175e585e9..9c2fa9e3108 100644 +--- a/src/3rdparty/chromium/v8/src/codegen/macro-assembler.h ++++ b/src/3rdparty/chromium/v8/src/codegen/macro-assembler.h +@@ -49,6 +49,9 @@ enum AllocationFlags { + #elif V8_TARGET_ARCH_MIPS64 + #include "src/codegen/mips64/constants-mips64.h" + #include "src/codegen/mips64/macro-assembler-mips64.h" ++#elif V8_TARGET_ARCH_LA64 ++#include "src/codegen/la64/constants-la64.h" ++#include "src/codegen/la64/macro-assembler-la64.h" + #elif V8_TARGET_ARCH_S390 + #include "src/codegen/s390/constants-s390.h" + #include "src/codegen/s390/macro-assembler-s390.h" +diff --git a/src/3rdparty/chromium/v8/src/codegen/mips64/assembler-mips64.cc b/src/3rdparty/chromium/v8/src/codegen/mips64/assembler-mips64.cc +index 37a05585c4b..cafcfef81d1 100644 +--- a/src/3rdparty/chromium/v8/src/codegen/mips64/assembler-mips64.cc ++++ b/src/3rdparty/chromium/v8/src/codegen/mips64/assembler-mips64.cc +@@ -996,7 +996,7 @@ void Assembler::next(Label* L, bool is_internal) { + } + + bool Assembler::is_near(Label* L) { +- DCHECK(L->is_bound()); ++ if (L == nullptr || !L->is_bound()) return true; + return pc_offset() - L->pos() < kMaxBranchOffset - 4 * kInstrSize; + } + +diff --git a/src/3rdparty/chromium/v8/src/codegen/mips64/assembler-mips64.h b/src/3rdparty/chromium/v8/src/codegen/mips64/assembler-mips64.h +index f70e46f81b3..c585840a7ad 100644 +--- a/src/3rdparty/chromium/v8/src/codegen/mips64/assembler-mips64.h ++++ b/src/3rdparty/chromium/v8/src/codegen/mips64/assembler-mips64.h +@@ -1864,6 +1864,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { + // instruction. We use this information to trigger different mode of + // branch instruction generation, where we use jump instructions rather + // than regular branch instructions. ++ // TODO can this be optimied?????? + bool trampoline_emitted_; + static constexpr int kInvalidSlotPos = -1; + +diff --git a/src/3rdparty/chromium/v8/src/codegen/register-arch.h b/src/3rdparty/chromium/v8/src/codegen/register-arch.h +index 21a72330169..5ee6c4683d9 100644 +--- a/src/3rdparty/chromium/v8/src/codegen/register-arch.h ++++ b/src/3rdparty/chromium/v8/src/codegen/register-arch.h +@@ -22,6 +22,8 @@ + #include "src/codegen/mips/register-mips.h" + #elif V8_TARGET_ARCH_MIPS64 + #include "src/codegen/mips64/register-mips64.h" ++#elif V8_TARGET_ARCH_LA64 ++#include "src/codegen/la64/register-la64.h" + #elif V8_TARGET_ARCH_S390 + #include "src/codegen/s390/register-s390.h" + #else +diff --git a/src/3rdparty/chromium/v8/src/codegen/register-configuration.cc b/src/3rdparty/chromium/v8/src/codegen/register-configuration.cc +index 5752b463392..2c4bb1426a1 100644 +--- a/src/3rdparty/chromium/v8/src/codegen/register-configuration.cc ++++ b/src/3rdparty/chromium/v8/src/codegen/register-configuration.cc +@@ -58,6 +58,8 @@ static int get_num_allocatable_double_registers() { + kMaxAllocatableDoubleRegisterCount; + #elif V8_TARGET_ARCH_MIPS64 + kMaxAllocatableDoubleRegisterCount; ++#elif V8_TARGET_ARCH_LA64 ++ kMaxAllocatableDoubleRegisterCount; + #elif V8_TARGET_ARCH_PPC + kMaxAllocatableDoubleRegisterCount; + #elif V8_TARGET_ARCH_PPC64 +diff --git a/src/3rdparty/chromium/v8/src/codegen/reloc-info.cc b/src/3rdparty/chromium/v8/src/codegen/reloc-info.cc +index 9f079789326..ccbd7a355c2 100644 +--- a/src/3rdparty/chromium/v8/src/codegen/reloc-info.cc ++++ b/src/3rdparty/chromium/v8/src/codegen/reloc-info.cc +@@ -329,7 +329,8 @@ bool RelocInfo::OffHeapTargetIsCodedSpecially() { + return false; + #elif defined(V8_TARGET_ARCH_IA32) || defined(V8_TARGET_ARCH_MIPS) || \ + defined(V8_TARGET_ARCH_MIPS64) || defined(V8_TARGET_ARCH_PPC) || \ +- defined(V8_TARGET_ARCH_PPC64) || defined(V8_TARGET_ARCH_S390) ++ defined(V8_TARGET_ARCH_PPC64) || defined(V8_TARGET_ARCH_S390) || \ ++ defined(V8_TARGET_ARCH_MIPS64) || defined(V8_TARGET_ARCH_LA64) + return true; + #endif + } +diff --git a/src/3rdparty/chromium/v8/src/common/globals.h b/src/3rdparty/chromium/v8/src/common/globals.h +index c79b3b633cd..05078cb3f29 100644 +--- a/src/3rdparty/chromium/v8/src/common/globals.h ++++ b/src/3rdparty/chromium/v8/src/common/globals.h +@@ -58,6 +58,9 @@ constexpr int GB = MB * 1024; + #if (V8_TARGET_ARCH_S390 && !V8_HOST_ARCH_S390) + #define USE_SIMULATOR 1 + #endif ++#if (V8_TARGET_ARCH_LA64 && !V8_HOST_ARCH_LA64) ++#define USE_SIMULATOR 1 ++#endif + #endif + + // Determine whether the architecture uses an embedded constant pool +diff --git a/src/3rdparty/chromium/v8/src/compiler/backend/instruction-codes.h b/src/3rdparty/chromium/v8/src/compiler/backend/instruction-codes.h +index 84d5d249b83..353594436e4 100644 +--- a/src/3rdparty/chromium/v8/src/compiler/backend/instruction-codes.h ++++ b/src/3rdparty/chromium/v8/src/compiler/backend/instruction-codes.h +@@ -17,6 +17,8 @@ + #include "src/compiler/backend/mips/instruction-codes-mips.h" + #elif V8_TARGET_ARCH_MIPS64 + #include "src/compiler/backend/mips64/instruction-codes-mips64.h" ++#elif V8_TARGET_ARCH_LA64 ++#include "src/compiler/backend/la64/instruction-codes-la64.h" + #elif V8_TARGET_ARCH_X64 + #include "src/compiler/backend/x64/instruction-codes-x64.h" + #elif V8_TARGET_ARCH_PPC || V8_TARGET_ARCH_PPC64 +diff --git a/src/3rdparty/chromium/v8/src/compiler/backend/instruction-selector.cc b/src/3rdparty/chromium/v8/src/compiler/backend/instruction-selector.cc +index 7d72dbbf2d0..628ba0e7c53 100644 +--- a/src/3rdparty/chromium/v8/src/compiler/backend/instruction-selector.cc ++++ b/src/3rdparty/chromium/v8/src/compiler/backend/instruction-selector.cc +@@ -2573,7 +2573,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { + #endif // !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS + + #if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS64 && \ +- !V8_TARGET_ARCH_S390 && !V8_TARGET_ARCH_PPC64 ++ !V8_TARGET_ARCH_S390 && !V8_TARGET_ARCH_PPC64 && !V8_TARGET_ARCH_LA64 + void InstructionSelector::VisitWord64AtomicLoad(Node* node) { UNIMPLEMENTED(); } + + void InstructionSelector::VisitWord64AtomicStore(Node* node) { +@@ -2598,7 +2598,8 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) { + UNIMPLEMENTED(); + } + #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_PPC64 +- // !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_S390 ++ // !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_S390 && ++ // !V8_TARGET_ARCH_LA64 + + #if !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM + // This is only needed on 32-bit to split the 64-bit value into two operands. +diff --git a/src/3rdparty/chromium/v8/src/compiler/backend/la64/code-generator-la64.cc b/src/3rdparty/chromium/v8/src/compiler/backend/la64/code-generator-la64.cc +new file mode 100644 +index 00000000000..29bfffb5f63 +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/compiler/backend/la64/code-generator-la64.cc +@@ -0,0 +1,2847 @@ ++// Copyright 2014 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#include "src/codegen/assembler-inl.h" ++#include "src/codegen/callable.h" ++#include "src/codegen/la64/constants-la64.h" ++#include "src/codegen/macro-assembler.h" ++#include "src/codegen/optimized-compilation-info.h" ++#include "src/compiler/backend/code-generator-impl.h" ++#include "src/compiler/backend/code-generator.h" ++#include "src/compiler/backend/gap-resolver.h" ++#include "src/compiler/node-matchers.h" ++#include "src/compiler/osr.h" ++#include "src/heap/heap-inl.h" // crbug.com/v8/8499 ++#include "src/wasm/wasm-code-manager.h" ++ ++namespace v8 { ++namespace internal { ++namespace compiler { ++ ++#define __ tasm()-> ++ ++// TODO(plind): consider renaming these macros. ++#define TRACE_MSG(msg) \ ++ PrintF("code_gen: \'%s\' in function %s at line %d\n", msg, __FUNCTION__, \ ++ __LINE__) ++ ++#define TRACE_UNIMPL() \ ++ PrintF("UNIMPLEMENTED code_generator_la64: %s at line %d\n", __FUNCTION__, \ ++ __LINE__) ++ ++// Adds La64-specific methods to convert InstructionOperands. ++class La64OperandConverter final : public InstructionOperandConverter { ++ public: ++ La64OperandConverter(CodeGenerator* gen, Instruction* instr) ++ : InstructionOperandConverter(gen, instr) {} ++ ++ FloatRegister OutputSingleRegister(size_t index = 0) { ++ return ToSingleRegister(instr_->OutputAt(index)); ++ } ++ ++ FloatRegister InputSingleRegister(size_t index) { ++ return ToSingleRegister(instr_->InputAt(index)); ++ } ++ ++ FloatRegister ToSingleRegister(InstructionOperand* op) { ++ // Single (Float) and Double register namespace is same on LA64, ++ // both are typedefs of FPURegister. ++ return ToDoubleRegister(op); ++ } ++ ++ Register InputOrZeroRegister(size_t index) { ++ if (instr_->InputAt(index)->IsImmediate()) { ++ DCHECK_EQ(0, InputInt32(index)); ++ return zero_reg; ++ } ++ return InputRegister(index); ++ } ++ ++ DoubleRegister InputOrZeroDoubleRegister(size_t index) { ++ if (instr_->InputAt(index)->IsImmediate()) return kDoubleRegZero; ++ ++ return InputDoubleRegister(index); ++ } ++ ++ DoubleRegister InputOrZeroSingleRegister(size_t index) { ++ if (instr_->InputAt(index)->IsImmediate()) return kDoubleRegZero; ++ ++ return InputSingleRegister(index); ++ } ++ ++ Operand InputImmediate(size_t index) { ++ Constant constant = ToConstant(instr_->InputAt(index)); ++ switch (constant.type()) { ++ case Constant::kInt32: ++ return Operand(constant.ToInt32()); ++ case Constant::kInt64: ++ return Operand(constant.ToInt64()); ++ case Constant::kFloat32: ++ return Operand::EmbeddedNumber(constant.ToFloat32()); ++ case Constant::kFloat64: ++ return Operand::EmbeddedNumber(constant.ToFloat64().value()); ++ case Constant::kExternalReference: ++ case Constant::kCompressedHeapObject: ++ case Constant::kHeapObject: ++ // TODO(plind): Maybe we should handle ExtRef & HeapObj here? ++ // maybe not done on arm due to const pool ?? ++ break; ++ case Constant::kDelayedStringConstant: ++ return Operand::EmbeddedStringConstant( ++ constant.ToDelayedStringConstant()); ++ case Constant::kRpoNumber: ++ UNREACHABLE(); // TODO(titzer): RPO immediates on la64? ++ break; ++ } ++ UNREACHABLE(); ++ } ++ ++ Operand InputOperand(size_t index) { ++ InstructionOperand* op = instr_->InputAt(index); ++ if (op->IsRegister()) { ++ return Operand(ToRegister(op)); ++ } ++ return InputImmediate(index); ++ } ++ ++ MemOperand MemoryOperand(size_t* first_index) { ++ const size_t index = *first_index; ++ switch (AddressingModeField::decode(instr_->opcode())) { ++ case kMode_None: ++ break; ++ case kMode_MRI: ++ *first_index += 2; ++ return MemOperand(InputRegister(index + 0), InputInt32(index + 1)); ++ case kMode_MRR: ++ *first_index += 2; ++ return MemOperand(InputRegister(index + 0), InputRegister(index + 1)); ++ } ++ UNREACHABLE(); ++ } ++ ++ MemOperand MemoryOperand(size_t index = 0) { return MemoryOperand(&index); } ++ ++ MemOperand ToMemOperand(InstructionOperand* op) const { ++ DCHECK_NOT_NULL(op); ++ DCHECK(op->IsStackSlot() || op->IsFPStackSlot()); ++ return SlotToMemOperand(AllocatedOperand::cast(op)->index()); ++ } ++ ++ MemOperand SlotToMemOperand(int slot) const { ++ FrameOffset offset = frame_access_state()->GetFrameOffset(slot); ++ return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset()); ++ } ++}; ++ ++static inline bool HasRegisterInput(Instruction* instr, size_t index) { ++ return instr->InputAt(index)->IsRegister(); ++} ++ ++namespace { ++ ++class OutOfLineRecordWrite final : public OutOfLineCode { ++ public: ++ OutOfLineRecordWrite(CodeGenerator* gen, Register object, Register index, ++ Register value, Register scratch0, Register scratch1, ++ RecordWriteMode mode, StubCallMode stub_mode) ++ : OutOfLineCode(gen), ++ object_(object), ++ index_(index), ++ value_(value), ++ scratch0_(scratch0), ++ scratch1_(scratch1), ++ mode_(mode), ++ stub_mode_(stub_mode), ++ must_save_lr_(!gen->frame_access_state()->has_frame()), ++ zone_(gen->zone()) {} ++ ++ void Generate() final { ++ if (mode_ > RecordWriteMode::kValueIsPointer) { ++ __ JumpIfSmi(value_, exit()); ++ } ++ __ CheckPageFlag(value_, scratch0_, ++ MemoryChunk::kPointersToHereAreInterestingMask, eq, ++ exit()); ++ __ Add_d(scratch1_, object_, index_); ++ RememberedSetAction const remembered_set_action = ++ mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET ++ : OMIT_REMEMBERED_SET; ++ SaveFPRegsMode const save_fp_mode = ++ frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs; ++ if (must_save_lr_) { ++ // We need to save and restore ra if the frame was elided. ++ __ Push(ra); ++ } ++ if (mode_ == RecordWriteMode::kValueIsEphemeronKey) { ++ __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode); ++ } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) { ++ // A direct call to a wasm runtime stub defined in this module. ++ // Just encode the stub index. This will be patched when the code ++ // is added to the native module and copied into wasm code space. ++ __ CallRecordWriteStub(object_, scratch1_, remembered_set_action, ++ save_fp_mode, wasm::WasmCode::kRecordWrite); ++ } else { ++ __ CallRecordWriteStub(object_, scratch1_, remembered_set_action, ++ save_fp_mode); ++ } ++ if (must_save_lr_) { ++ __ Pop(ra); ++ } ++ } ++ ++ private: ++ Register const object_; ++ Register const index_; ++ Register const value_; ++ Register const scratch0_; ++ Register const scratch1_; ++ RecordWriteMode const mode_; ++ StubCallMode const stub_mode_; ++ bool must_save_lr_; ++ Zone* zone_; ++}; ++ ++#define CREATE_OOL_CLASS(ool_name, tasm_ool_name, T) \ ++ class ool_name final : public OutOfLineCode { \ ++ public: \ ++ ool_name(CodeGenerator* gen, T dst, T src1, T src2) \ ++ : OutOfLineCode(gen), dst_(dst), src1_(src1), src2_(src2) {} \ ++ \ ++ void Generate() final { __ tasm_ool_name(dst_, src1_, src2_); } \ ++ \ ++ private: \ ++ T const dst_; \ ++ T const src1_; \ ++ T const src2_; \ ++ } ++ ++CREATE_OOL_CLASS(OutOfLineFloat32Max, Float32MaxOutOfLine, FPURegister); ++CREATE_OOL_CLASS(OutOfLineFloat32Min, Float32MinOutOfLine, FPURegister); ++CREATE_OOL_CLASS(OutOfLineFloat64Max, Float64MaxOutOfLine, FPURegister); ++CREATE_OOL_CLASS(OutOfLineFloat64Min, Float64MinOutOfLine, FPURegister); ++ ++#undef CREATE_OOL_CLASS ++ ++Condition FlagsConditionToConditionCmp(FlagsCondition condition) { ++ switch (condition) { ++ case kEqual: ++ return eq; ++ case kNotEqual: ++ return ne; ++ case kSignedLessThan: ++ return lt; ++ case kSignedGreaterThanOrEqual: ++ return ge; ++ case kSignedLessThanOrEqual: ++ return le; ++ case kSignedGreaterThan: ++ return gt; ++ case kUnsignedLessThan: ++ return lo; ++ case kUnsignedGreaterThanOrEqual: ++ return hs; ++ case kUnsignedLessThanOrEqual: ++ return ls; ++ case kUnsignedGreaterThan: ++ return hi; ++ case kUnorderedEqual: ++ case kUnorderedNotEqual: ++ break; ++ default: ++ break; ++ } ++ UNREACHABLE(); ++} ++ ++Condition FlagsConditionToConditionTst(FlagsCondition condition) { ++ switch (condition) { ++ case kNotEqual: ++ return ne; ++ case kEqual: ++ return eq; ++ default: ++ break; ++ } ++ UNREACHABLE(); ++} ++ ++Condition FlagsConditionToConditionOvf(FlagsCondition condition) { ++ switch (condition) { ++ case kOverflow: ++ return ne; ++ case kNotOverflow: ++ return eq; ++ default: ++ break; ++ } ++ UNREACHABLE(); ++} ++ ++FPUCondition FlagsConditionToConditionCmpFPU(bool* predicate, ++ FlagsCondition condition) { ++ switch (condition) { ++ case kEqual: ++ *predicate = true; ++ return CEQ; ++ case kNotEqual: ++ *predicate = false; ++ return CEQ; ++ case kUnsignedLessThan: ++ *predicate = true; ++ return CLT; ++ case kUnsignedGreaterThanOrEqual: ++ *predicate = false; ++ return CLT; ++ case kUnsignedLessThanOrEqual: ++ *predicate = true; ++ return CLE; ++ case kUnsignedGreaterThan: ++ *predicate = false; ++ return CLE; ++ case kUnorderedEqual: ++ case kUnorderedNotEqual: ++ *predicate = true; ++ break; ++ default: ++ *predicate = true; ++ break; ++ } ++ UNREACHABLE(); ++} ++ ++void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, ++ InstructionCode opcode, Instruction* instr, ++ La64OperandConverter const& i) { ++ const MemoryAccessMode access_mode = ++ static_cast(MiscField::decode(opcode)); ++ if (access_mode == kMemoryAccessPoisoned) { ++ Register value = i.OutputRegister(); ++ codegen->tasm()->And(value, value, kSpeculationPoisonRegister); ++ } ++} ++ ++} // namespace ++ ++#define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr) \ ++ do { \ ++ __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \ ++ __ dbar(0); \ ++ } while (0) ++ ++// TODO remove second dbar? ++#define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr) \ ++ do { \ ++ __ dbar(0); \ ++ __ asm_instr(i.InputOrZeroRegister(2), i.MemoryOperand()); \ ++ __ dbar(0); \ ++ } while (0) ++ ++// only use for sub_w and sub_d ++#define ASSEMBLE_ATOMIC_BINOP(load_linked, store_conditional, bin_instr) \ ++ do { \ ++ Label binop; \ ++ __ Add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \ ++ __ dbar(0); \ ++ __ bind(&binop); \ ++ __ load_linked(i.OutputRegister(0), MemOperand(i.TempRegister(0), 0)); \ ++ __ bin_instr(i.TempRegister(1), i.OutputRegister(0), \ ++ Operand(i.InputRegister(2))); \ ++ __ store_conditional(i.TempRegister(1), MemOperand(i.TempRegister(0), 0)); \ ++ __ BranchShort(&binop, eq, i.TempRegister(1), Operand(zero_reg)); \ ++ __ dbar(0); \ ++ } while (0) ++ ++// TODO remove second dbar? ++#define ASSEMBLE_ATOMIC_BINOP_EXT(load_linked, store_conditional, sign_extend, \ ++ size, bin_instr, representation) \ ++ do { \ ++ Label binop; \ ++ __ add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \ ++ if (representation == 32) { \ ++ __ andi(i.TempRegister(3), i.TempRegister(0), 0x3); \ ++ } else { \ ++ DCHECK_EQ(representation, 64); \ ++ __ andi(i.TempRegister(3), i.TempRegister(0), 0x7); \ ++ } \ ++ __ Sub_d(i.TempRegister(0), i.TempRegister(0), \ ++ Operand(i.TempRegister(3))); \ ++ __ slli_w(i.TempRegister(3), i.TempRegister(3), 3); \ ++ __ dbar(0); \ ++ __ bind(&binop); \ ++ __ load_linked(i.TempRegister(1), MemOperand(i.TempRegister(0), 0)); \ ++ __ ExtractBits(i.OutputRegister(0), i.TempRegister(1), i.TempRegister(3), \ ++ size, sign_extend); \ ++ __ bin_instr(i.TempRegister(2), i.OutputRegister(0), \ ++ Operand(i.InputRegister(2))); \ ++ __ InsertBits(i.TempRegister(1), i.TempRegister(2), i.TempRegister(3), \ ++ size); \ ++ __ store_conditional(i.TempRegister(1), MemOperand(i.TempRegister(0), 0)); \ ++ __ BranchShort(&binop, eq, i.TempRegister(1), Operand(zero_reg)); \ ++ __ dbar(0); \ ++ } while (0) ++ ++// TODO remove second dbar? ++#define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT( \ ++ load_linked, store_conditional, sign_extend, size, representation) \ ++ do { \ ++ Label exchange; \ ++ __ add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \ ++ if (representation == 32) { \ ++ __ andi(i.TempRegister(1), i.TempRegister(0), 0x3); \ ++ } else { \ ++ DCHECK_EQ(representation, 64); \ ++ __ andi(i.TempRegister(1), i.TempRegister(0), 0x7); \ ++ } \ ++ __ Sub_d(i.TempRegister(0), i.TempRegister(0), \ ++ Operand(i.TempRegister(1))); \ ++ __ slli_w(i.TempRegister(1), i.TempRegister(1), 3); \ ++ __ dbar(0); \ ++ __ bind(&exchange); \ ++ __ load_linked(i.TempRegister(2), MemOperand(i.TempRegister(0), 0)); \ ++ __ ExtractBits(i.OutputRegister(0), i.TempRegister(2), i.TempRegister(1), \ ++ size, sign_extend); \ ++ __ InsertBits(i.TempRegister(2), i.InputRegister(2), i.TempRegister(1), \ ++ size); \ ++ __ store_conditional(i.TempRegister(2), MemOperand(i.TempRegister(0), 0)); \ ++ __ BranchShort(&exchange, eq, i.TempRegister(2), Operand(zero_reg)); \ ++ __ dbar(0); \ ++ } while (0) ++ ++// TODO remove second dbar? ++#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_linked, \ ++ store_conditional) \ ++ do { \ ++ Label compareExchange; \ ++ Label exit; \ ++ __ add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \ ++ __ dbar(0); \ ++ __ bind(&compareExchange); \ ++ __ load_linked(i.OutputRegister(0), MemOperand(i.TempRegister(0), 0)); \ ++ __ BranchShort(&exit, ne, i.InputRegister(2), \ ++ Operand(i.OutputRegister(0))); \ ++ __ mov(i.TempRegister(2), i.InputRegister(3)); \ ++ __ store_conditional(i.TempRegister(2), MemOperand(i.TempRegister(0), 0)); \ ++ __ BranchShort(&compareExchange, eq, i.TempRegister(2), \ ++ Operand(zero_reg)); \ ++ __ bind(&exit); \ ++ __ dbar(0); \ ++ } while (0) ++ ++// TODO remove second dbar? ++#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT( \ ++ load_linked, store_conditional, sign_extend, size, representation) \ ++ do { \ ++ Label compareExchange; \ ++ Label exit; \ ++ __ add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \ ++ if (representation == 32) { \ ++ __ andi(i.TempRegister(1), i.TempRegister(0), 0x3); \ ++ } else { \ ++ DCHECK_EQ(representation, 64); \ ++ __ andi(i.TempRegister(1), i.TempRegister(0), 0x7); \ ++ } \ ++ __ Sub_d(i.TempRegister(0), i.TempRegister(0), \ ++ Operand(i.TempRegister(1))); \ ++ __ slli_w(i.TempRegister(1), i.TempRegister(1), 3); \ ++ __ dbar(0); \ ++ __ bind(&compareExchange); \ ++ __ load_linked(i.TempRegister(2), MemOperand(i.TempRegister(0), 0)); \ ++ __ ExtractBits(i.OutputRegister(0), i.TempRegister(2), i.TempRegister(1), \ ++ size, sign_extend); \ ++ __ ExtractBits(i.InputRegister(2), i.InputRegister(2), i.TempRegister(1), \ ++ size, sign_extend); \ ++ __ BranchShort(&exit, ne, i.InputRegister(2), \ ++ Operand(i.OutputRegister(0))); \ ++ __ InsertBits(i.TempRegister(2), i.InputRegister(3), i.TempRegister(1), \ ++ size); \ ++ __ store_conditional(i.TempRegister(2), MemOperand(i.TempRegister(0), 0)); \ ++ __ BranchShort(&compareExchange, eq, i.TempRegister(2), \ ++ Operand(zero_reg)); \ ++ __ bind(&exit); \ ++ __ dbar(0); \ ++ } while (0) ++ ++#define ASSEMBLE_IEEE754_BINOP(name) \ ++ do { \ ++ FrameScope scope(tasm(), StackFrame::MANUAL); \ ++ __ PrepareCallCFunction(0, 2, kScratchReg); \ ++ __ MovToFloatParameters(i.InputDoubleRegister(0), \ ++ i.InputDoubleRegister(1)); \ ++ __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2); \ ++ /* Move the result in the double result register. */ \ ++ __ MovFromFloatResult(i.OutputDoubleRegister()); \ ++ } while (0) ++ ++#define ASSEMBLE_IEEE754_UNOP(name) \ ++ do { \ ++ FrameScope scope(tasm(), StackFrame::MANUAL); \ ++ __ PrepareCallCFunction(0, 1, kScratchReg); \ ++ __ MovToFloatParameter(i.InputDoubleRegister(0)); \ ++ __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1); \ ++ /* Move the result in the double result register. */ \ ++ __ MovFromFloatResult(i.OutputDoubleRegister()); \ ++ } while (0) ++ ++#define ASSEMBLE_F64X2_ARITHMETIC_BINOP(op) \ ++ do { \ ++ __ op(i.OutputSimd128Register(), i.InputSimd128Register(0), \ ++ i.InputSimd128Register(1)); \ ++ } while (0) ++ ++void CodeGenerator::AssembleDeconstructFrame() { ++ __ mov(sp, fp); ++ __ Pop(ra, fp); ++} ++ ++void CodeGenerator::AssemblePrepareTailCall() { ++ if (frame_access_state()->has_frame()) { ++ __ Ld_d(ra, MemOperand(fp, StandardFrameConstants::kCallerPCOffset)); ++ __ Ld_d(fp, MemOperand(fp, StandardFrameConstants::kCallerFPOffset)); ++ } ++ frame_access_state()->SetFrameAccessToSP(); ++} ++ ++void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg, ++ Register scratch1, ++ Register scratch2, ++ Register scratch3) { ++ DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3)); ++ Label done; ++ ++ // Check if current frame is an arguments adaptor frame. ++ __ Ld_d(scratch3, MemOperand(fp, StandardFrameConstants::kContextOffset)); ++ __ Branch(&done, ne, scratch3, ++ Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR))); ++ ++ // Load arguments count from current arguments adaptor frame (note, it ++ // does not include receiver). ++ Register caller_args_count_reg = scratch1; ++ __ Ld_d(caller_args_count_reg, ++ MemOperand(fp, ArgumentsAdaptorFrameConstants::kLengthOffset)); ++ __ SmiUntag(caller_args_count_reg); ++ ++ __ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3); ++ __ bind(&done); ++} ++ ++namespace { ++ ++void AdjustStackPointerForTailCall(TurboAssembler* tasm, ++ FrameAccessState* state, ++ int new_slot_above_sp, ++ bool allow_shrinkage = true) { ++ int current_sp_offset = state->GetSPToFPSlotCount() + ++ StandardFrameConstants::kFixedSlotCountAboveFp; ++ int stack_slot_delta = new_slot_above_sp - current_sp_offset; ++ if (stack_slot_delta > 0) { ++ tasm->Sub_d(sp, sp, stack_slot_delta * kSystemPointerSize); ++ state->IncreaseSPDelta(stack_slot_delta); ++ } else if (allow_shrinkage && stack_slot_delta < 0) { ++ tasm->Add_d(sp, sp, -stack_slot_delta * kSystemPointerSize); ++ state->IncreaseSPDelta(stack_slot_delta); ++ } ++} ++ ++} // namespace ++ ++void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr, ++ int first_unused_stack_slot) { ++ AdjustStackPointerForTailCall(tasm(), frame_access_state(), ++ first_unused_stack_slot, false); ++} ++ ++void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr, ++ int first_unused_stack_slot) { ++ AdjustStackPointerForTailCall(tasm(), frame_access_state(), ++ first_unused_stack_slot); ++} ++ ++// Check that {kJavaScriptCallCodeStartRegister} is correct. ++void CodeGenerator::AssembleCodeStartRegisterCheck() { ++ __ ComputeCodeStartAddress(kScratchReg); ++ __ Assert(eq, AbortReason::kWrongFunctionCodeStart, ++ kJavaScriptCallCodeStartRegister, Operand(kScratchReg)); ++} ++ ++// Check if the code object is marked for deoptimization. If it is, then it ++// jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need ++// to: ++// 1. read from memory the word that contains that bit, which can be found in ++// the flags in the referenced {CodeDataContainer} object; ++// 2. test kMarkedForDeoptimizationBit in those flags; and ++// 3. if it is not zero then it jumps to the builtin. ++void CodeGenerator::BailoutIfDeoptimized() { ++ int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize; ++ __ Ld_d(kScratchReg, MemOperand(kJavaScriptCallCodeStartRegister, offset)); ++ __ Ld_w(kScratchReg, ++ FieldMemOperand(kScratchReg, ++ CodeDataContainer::kKindSpecificFlagsOffset)); ++ __ And(kScratchReg, kScratchReg, ++ Operand(1 << Code::kMarkedForDeoptimizationBit)); ++ __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode), ++ RelocInfo::CODE_TARGET, ne, kScratchReg, Operand(zero_reg)); ++} ++ ++void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() { ++ // Calculate a mask which has all bits set in the normal case, but has all ++ // bits cleared if we are speculatively executing the wrong PC. ++ __ li(kSpeculationPoisonRegister, -1); ++ __ ComputeCodeStartAddress(kScratchReg); ++ __ sub_d(kScratchReg, kScratchReg, kJavaScriptCallCodeStartRegister); ++ __ maskeqz(kSpeculationPoisonRegister, kSpeculationPoisonRegister, ++ kScratchReg); ++} ++ ++void CodeGenerator::AssembleRegisterArgumentPoisoning() { ++ __ And(kJSFunctionRegister, kJSFunctionRegister, kSpeculationPoisonRegister); ++ __ And(kContextRegister, kContextRegister, kSpeculationPoisonRegister); ++ __ And(sp, sp, kSpeculationPoisonRegister); ++} ++ ++// Assembles an instruction after register allocation, producing machine code. ++CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ++ Instruction* instr) { ++ La64OperandConverter i(this, instr); ++ InstructionCode opcode = instr->opcode(); ++ ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode); ++ switch (arch_opcode) { ++ case kArchCallCodeObject: { ++ if (instr->InputAt(0)->IsImmediate()) { ++ __ Call(i.InputCode(0), RelocInfo::CODE_TARGET); ++ } else { ++ Register reg = i.InputRegister(0); ++ DCHECK_IMPLIES( ++ HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister), ++ reg == kJavaScriptCallCodeStartRegister); ++ __ addi_d(reg, reg, Code::kHeaderSize - kHeapObjectTag); ++ __ Call(reg); ++ } ++ RecordCallPosition(instr); ++ frame_access_state()->ClearSPDelta(); ++ break; ++ } ++ case kArchCallBuiltinPointer: { ++ DCHECK(!instr->InputAt(0)->IsImmediate()); ++ Register builtin_index = i.InputRegister(0); ++ __ CallBuiltinByIndex(builtin_index); ++ RecordCallPosition(instr); ++ frame_access_state()->ClearSPDelta(); ++ break; ++ } ++ case kArchCallWasmFunction: { ++ if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) { ++ AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister, ++ i.TempRegister(0), i.TempRegister(1), ++ i.TempRegister(2)); ++ } ++ if (instr->InputAt(0)->IsImmediate()) { ++ Constant constant = i.ToConstant(instr->InputAt(0)); ++ Address wasm_code = static_cast

(constant.ToInt64()); ++ __ Call(wasm_code, constant.rmode()); ++ } else { ++ __ addi_d(kScratchReg, i.InputRegister(0), 0); ++ __ Call(kScratchReg); ++ } ++ RecordCallPosition(instr); ++ frame_access_state()->ClearSPDelta(); ++ break; ++ } ++ case kArchTailCallCodeObjectFromJSFunction: ++ case kArchTailCallCodeObject: { ++ if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) { ++ AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister, ++ i.TempRegister(0), i.TempRegister(1), ++ i.TempRegister(2)); ++ } ++ if (instr->InputAt(0)->IsImmediate()) { ++ __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET); ++ } else { ++ Register reg = i.InputRegister(0); ++ DCHECK_IMPLIES( ++ HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister), ++ reg == kJavaScriptCallCodeStartRegister); ++ __ addi_d(reg, reg, Code::kHeaderSize - kHeapObjectTag); ++ __ Jump(reg); ++ } ++ frame_access_state()->ClearSPDelta(); ++ frame_access_state()->SetFrameAccessToDefault(); ++ break; ++ } ++ case kArchTailCallWasm: { ++ if (instr->InputAt(0)->IsImmediate()) { ++ Constant constant = i.ToConstant(instr->InputAt(0)); ++ Address wasm_code = static_cast
(constant.ToInt64()); ++ __ Jump(wasm_code, constant.rmode()); ++ } else { ++ __ addi_d(kScratchReg, i.InputRegister(0), 0); ++ __ Jump(kScratchReg); ++ } ++ frame_access_state()->ClearSPDelta(); ++ frame_access_state()->SetFrameAccessToDefault(); ++ break; ++ } ++ case kArchTailCallAddress: { ++ CHECK(!instr->InputAt(0)->IsImmediate()); ++ Register reg = i.InputRegister(0); ++ DCHECK_IMPLIES( ++ HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister), ++ reg == kJavaScriptCallCodeStartRegister); ++ __ Jump(reg); ++ frame_access_state()->ClearSPDelta(); ++ frame_access_state()->SetFrameAccessToDefault(); ++ break; ++ } ++ case kArchCallJSFunction: { ++ Register func = i.InputRegister(0); ++ if (FLAG_debug_code) { ++ // Check the function's context matches the context argument. ++ __ Ld_d(kScratchReg, FieldMemOperand(func, JSFunction::kContextOffset)); ++ __ Assert(eq, AbortReason::kWrongFunctionContext, cp, ++ Operand(kScratchReg)); ++ } ++ static_assert(kJavaScriptCallCodeStartRegister == a2, "ABI mismatch"); ++ __ Ld_d(a2, FieldMemOperand(func, JSFunction::kCodeOffset)); ++ __ Add_d(a2, a2, Operand(Code::kHeaderSize - kHeapObjectTag)); ++ __ Call(a2); ++ RecordCallPosition(instr); ++ frame_access_state()->ClearSPDelta(); ++ break; ++ } ++ case kArchPrepareCallCFunction: { ++ int const num_parameters = MiscField::decode(instr->opcode()); ++ __ PrepareCallCFunction(num_parameters, kScratchReg); ++ // Frame alignment requires using FP-relative frame addressing. ++ frame_access_state()->SetFrameAccessToFP(); ++ break; ++ } ++ case kArchSaveCallerRegisters: { ++ fp_mode_ = ++ static_cast(MiscField::decode(instr->opcode())); ++ DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs); ++ // kReturnRegister0 should have been saved before entering the stub. ++ int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0); ++ DCHECK(IsAligned(bytes, kSystemPointerSize)); ++ DCHECK_EQ(0, frame_access_state()->sp_delta()); ++ frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize); ++ DCHECK(!caller_registers_saved_); ++ caller_registers_saved_ = true; ++ break; ++ } ++ case kArchRestoreCallerRegisters: { ++ DCHECK(fp_mode_ == ++ static_cast(MiscField::decode(instr->opcode()))); ++ DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs); ++ // Don't overwrite the returned value. ++ int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0); ++ frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize)); ++ DCHECK_EQ(0, frame_access_state()->sp_delta()); ++ DCHECK(caller_registers_saved_); ++ caller_registers_saved_ = false; ++ break; ++ } ++ case kArchPrepareTailCall: ++ AssemblePrepareTailCall(); ++ break; ++ case kArchCallCFunction: { ++ int const num_parameters = MiscField::decode(instr->opcode()); ++ Label start_call; ++ bool isWasmCapiFunction = ++ linkage()->GetIncomingDescriptor()->IsWasmCapiFunction(); ++ // from start_call to return address. ++ int offset = __ root_array_available() ? 44 : 80; // 11 or 20 instrs ++#if V8_HOST_ARCH_LA64 ++ if (__ emit_debug_code()) { ++ offset += 12; // see CallCFunction ++ } ++#endif ++ if (isWasmCapiFunction) { ++ // Put the return address in a stack slot. ++ // __ mov(kScratchReg, ra); ++ __ bind(&start_call); ++ __ pcaddi(t7, -4); // __ nal(); ++ //__ nop(); ++ //__ Daddu(ra, ra, offset - 8); // 8 = nop + nal ++ __ St_d(t7, MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset)); ++ // __ mov(ra, kScratchReg); ++ } ++ if (instr->InputAt(0)->IsImmediate()) { ++ ExternalReference ref = i.InputExternalReference(0); ++ __ CallCFunction(ref, num_parameters); ++ } else { ++ Register func = i.InputRegister(0); ++ __ CallCFunction(func, num_parameters); ++ } ++ if (isWasmCapiFunction) { ++ CHECK_EQ(offset, __ SizeOfCodeGeneratedSince(&start_call)); ++ RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt); ++ } ++ ++ frame_access_state()->SetFrameAccessToDefault(); ++ // Ideally, we should decrement SP delta to match the change of stack ++ // pointer in CallCFunction. However, for certain architectures (e.g. ++ // ARM), there may be more strict alignment requirement, causing old SP ++ // to be saved on the stack. In those cases, we can not calculate the SP ++ // delta statically. ++ frame_access_state()->ClearSPDelta(); ++ if (caller_registers_saved_) { ++ // Need to re-sync SP delta introduced in kArchSaveCallerRegisters. ++ // Here, we assume the sequence to be: ++ // kArchSaveCallerRegisters; ++ // kArchCallCFunction; ++ // kArchRestoreCallerRegisters; ++ int bytes = ++ __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0); ++ frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize); ++ } ++ break; ++ } ++ case kArchJmp: ++ AssembleArchJump(i.InputRpo(0)); ++ break; ++ case kArchBinarySearchSwitch: ++ AssembleArchBinarySearchSwitch(instr); ++ break; ++ break; ++ case kArchTableSwitch: ++ AssembleArchTableSwitch(instr); ++ break; ++ case kArchAbortCSAAssert: ++ DCHECK(i.InputRegister(0) == a0); ++ { ++ // We don't actually want to generate a pile of code for this, so just ++ // claim there is a stack frame, without generating one. ++ FrameScope scope(tasm(), StackFrame::NONE); ++ __ Call( ++ isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert), ++ RelocInfo::CODE_TARGET); ++ } ++ __ stop(); ++ break; ++ case kArchDebugBreak: ++ __ DebugBreak(); ++ break; ++ case kArchComment: ++ __ RecordComment(reinterpret_cast(i.InputInt64(0))); ++ break; ++ case kArchNop: ++ case kArchThrowTerminator: ++ // don't emit code for nops. ++ break; ++ case kArchDeoptimize: { ++ DeoptimizationExit* exit = ++ BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore()); ++ CodeGenResult result = AssembleDeoptimizerCall(exit); ++ if (result != kSuccess) return result; ++ break; ++ } ++ case kArchRet: ++ AssembleReturn(instr->InputAt(0)); ++ break; ++ case kArchStackPointerGreaterThan: ++ // Pseudo-instruction used for cmp/branch. No opcode emitted here. ++ break; ++ case kArchStackCheckOffset: ++ __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset())); ++ break; ++ case kArchFramePointer: ++ __ mov(i.OutputRegister(), fp); ++ break; ++ case kArchParentFramePointer: ++ if (frame_access_state()->has_frame()) { ++ __ Ld_d(i.OutputRegister(), MemOperand(fp, 0)); ++ } else { ++ __ mov(i.OutputRegister(), fp); ++ } ++ break; ++ case kArchTruncateDoubleToI: ++ __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(), ++ i.InputDoubleRegister(0), DetermineStubCallMode()); ++ break; ++ case kArchStoreWithWriteBarrier: { ++ RecordWriteMode mode = ++ static_cast(MiscField::decode(instr->opcode())); ++ Register object = i.InputRegister(0); ++ Register index = i.InputRegister(1); ++ Register value = i.InputRegister(2); ++ Register scratch0 = i.TempRegister(0); ++ Register scratch1 = i.TempRegister(1); ++ auto ool = new (zone()) ++ OutOfLineRecordWrite(this, object, index, value, scratch0, scratch1, ++ mode, DetermineStubCallMode()); ++ __ Add_d(kScratchReg, object, index); ++ __ St_d(value, MemOperand(kScratchReg, 0)); ++ __ CheckPageFlag(object, scratch0, ++ MemoryChunk::kPointersFromHereAreInterestingMask, ne, ++ ool->entry()); ++ __ bind(ool->exit()); ++ break; ++ } ++ case kArchStackSlot: { ++ FrameOffset offset = ++ frame_access_state()->GetFrameOffset(i.InputInt32(0)); ++ Register base_reg = offset.from_stack_pointer() ? sp : fp; ++ __ Add_d(i.OutputRegister(), base_reg, Operand(offset.offset())); ++ int alignment = i.InputInt32(1); ++ DCHECK(alignment == 0 || alignment == 4 || alignment == 8 || ++ alignment == 16); ++ if (FLAG_debug_code && alignment > 0) { ++ // Verify that the output_register is properly aligned ++ __ And(kScratchReg, i.OutputRegister(), ++ Operand(kSystemPointerSize - 1)); ++ __ Assert(eq, AbortReason::kAllocationIsNotDoubleAligned, kScratchReg, ++ Operand(zero_reg)); ++ } ++ if (alignment == 2 * kSystemPointerSize) { ++ Label done; ++ __ Add_d(kScratchReg, base_reg, Operand(offset.offset())); ++ __ And(kScratchReg, kScratchReg, Operand(alignment - 1)); ++ __ BranchShort(&done, eq, kScratchReg, Operand(zero_reg)); ++ __ Add_d(i.OutputRegister(), i.OutputRegister(), kSystemPointerSize); ++ __ bind(&done); ++ } else if (alignment > 2 * kSystemPointerSize) { ++ Label done; ++ __ Add_d(kScratchReg, base_reg, Operand(offset.offset())); ++ __ And(kScratchReg, kScratchReg, Operand(alignment - 1)); ++ __ BranchShort(&done, eq, kScratchReg, Operand(zero_reg)); ++ __ li(kScratchReg2, alignment); ++ __ Sub_d(kScratchReg2, kScratchReg2, Operand(kScratchReg)); ++ __ Add_d(i.OutputRegister(), i.OutputRegister(), kScratchReg2); ++ __ bind(&done); ++ } ++ ++ break; ++ } ++ case kArchWordPoisonOnSpeculation: ++ __ And(i.OutputRegister(), i.InputRegister(0), ++ kSpeculationPoisonRegister); ++ break; ++ case kIeee754Float64Acos: ++ ASSEMBLE_IEEE754_UNOP(acos); ++ break; ++ case kIeee754Float64Acosh: ++ ASSEMBLE_IEEE754_UNOP(acosh); ++ break; ++ case kIeee754Float64Asin: ++ ASSEMBLE_IEEE754_UNOP(asin); ++ break; ++ case kIeee754Float64Asinh: ++ ASSEMBLE_IEEE754_UNOP(asinh); ++ break; ++ case kIeee754Float64Atan: ++ ASSEMBLE_IEEE754_UNOP(atan); ++ break; ++ case kIeee754Float64Atanh: ++ ASSEMBLE_IEEE754_UNOP(atanh); ++ break; ++ case kIeee754Float64Atan2: ++ ASSEMBLE_IEEE754_BINOP(atan2); ++ break; ++ case kIeee754Float64Cos: ++ ASSEMBLE_IEEE754_UNOP(cos); ++ break; ++ case kIeee754Float64Cosh: ++ ASSEMBLE_IEEE754_UNOP(cosh); ++ break; ++ case kIeee754Float64Cbrt: ++ ASSEMBLE_IEEE754_UNOP(cbrt); ++ break; ++ case kIeee754Float64Exp: ++ ASSEMBLE_IEEE754_UNOP(exp); ++ break; ++ case kIeee754Float64Expm1: ++ ASSEMBLE_IEEE754_UNOP(expm1); ++ break; ++ case kIeee754Float64Log: ++ ASSEMBLE_IEEE754_UNOP(log); ++ break; ++ case kIeee754Float64Log1p: ++ ASSEMBLE_IEEE754_UNOP(log1p); ++ break; ++ case kIeee754Float64Log2: ++ ASSEMBLE_IEEE754_UNOP(log2); ++ break; ++ case kIeee754Float64Log10: ++ ASSEMBLE_IEEE754_UNOP(log10); ++ break; ++ case kIeee754Float64Pow: ++ ASSEMBLE_IEEE754_BINOP(pow); ++ break; ++ case kIeee754Float64Sin: ++ ASSEMBLE_IEEE754_UNOP(sin); ++ break; ++ case kIeee754Float64Sinh: ++ ASSEMBLE_IEEE754_UNOP(sinh); ++ break; ++ case kIeee754Float64Tan: ++ ASSEMBLE_IEEE754_UNOP(tan); ++ break; ++ case kIeee754Float64Tanh: ++ ASSEMBLE_IEEE754_UNOP(tanh); ++ break; ++ case kLa64Add: ++ __ Add_w(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ break; ++ case kLa64Dadd: ++ __ Add_d(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ break; ++ case kLa64DaddOvf: ++ __ AdddOverflow(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1), ++ kScratchReg); ++ break; ++ case kLa64Sub: ++ __ Sub_w(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ break; ++ case kLa64Dsub: ++ __ Sub_d(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ break; ++ case kLa64DsubOvf: ++ __ SubdOverflow(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1), ++ kScratchReg); ++ break; ++ case kLa64Mul: ++ __ Mul_w(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ break; ++ case kLa64MulOvf: ++ __ MulOverflow(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1), ++ kScratchReg); ++ break; ++ case kLa64MulHigh: ++ __ Mulh_w(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ break; ++ case kLa64MulHighU: ++ __ Mulh_wu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ break; ++ case kLa64DMulHigh: ++ __ Mulh_d(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ break; ++ case kLa64Div: ++ __ Div_w(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ __ masknez(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1)); ++ break; ++ case kLa64DivU: ++ __ Div_wu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ __ masknez(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1)); ++ break; ++ case kLa64Mod: ++ __ Mod_w(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ break; ++ case kLa64ModU: ++ __ Mod_wu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ break; ++ case kLa64Dmul: ++ __ Mul_d(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ break; ++ case kLa64Ddiv: ++ __ Div_d(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ __ masknez(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1)); ++ break; ++ case kLa64DdivU: ++ __ Div_du(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ __ masknez(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1)); ++ break; ++ case kLa64Dmod: ++ __ Mod_d(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ break; ++ case kLa64DmodU: ++ __ Mod_du(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ break; ++ case kLa64Dlsa: ++ DCHECK(instr->InputAt(2)->IsImmediate()); ++ __ Alsl_d(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1), ++ i.InputInt8(2), t7); ++ break; ++ case kLa64Lsa: ++ DCHECK(instr->InputAt(2)->IsImmediate()); ++ __ Alsl_w(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1), ++ i.InputInt8(2), t7); ++ break; ++ case kLa64And: ++ __ And(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ break; ++ case kLa64And32: ++ __ And(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ __ slli_w(i.OutputRegister(), i.OutputRegister(), 0x0); ++ break; ++ case kLa64Or: ++ __ Or(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ break; ++ case kLa64Or32: ++ __ Or(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ __ slli_w(i.OutputRegister(), i.OutputRegister(), 0x0); ++ break; ++ case kLa64Nor: ++ if (instr->InputAt(1)->IsRegister()) { ++ __ Nor(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ } else { ++ DCHECK_EQ(0, i.InputOperand(1).immediate()); ++ __ Nor(i.OutputRegister(), i.InputRegister(0), zero_reg); ++ } ++ break; ++ case kLa64Nor32: ++ if (instr->InputAt(1)->IsRegister()) { ++ __ Nor(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ __ slli_w(i.OutputRegister(), i.OutputRegister(), 0x0); ++ } else { ++ DCHECK_EQ(0, i.InputOperand(1).immediate()); ++ __ Nor(i.OutputRegister(), i.InputRegister(0), zero_reg); ++ __ slli_w(i.OutputRegister(), i.OutputRegister(), 0x0); ++ } ++ break; ++ case kLa64Xor: ++ __ Xor(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ break; ++ case kLa64Xor32: ++ __ Xor(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ __ slli_w(i.OutputRegister(), i.OutputRegister(), 0x0); ++ break; ++ case kLa64Clz: ++ __ Clz_w(i.OutputRegister(), i.InputRegister(0)); ++ break; ++ case kLa64Dclz: ++ __ clz_d(i.OutputRegister(), i.InputRegister(0)); ++ break; ++ case kLa64Ctz: { ++ Register src = i.InputRegister(0); ++ Register dst = i.OutputRegister(); ++ __ Ctz_w(dst, src); ++ } break; ++ case kLa64Dctz: { ++ Register src = i.InputRegister(0); ++ Register dst = i.OutputRegister(); ++ __ Ctz_d(dst, src); ++ } break; ++ case kLa64Popcnt: { ++ Register src = i.InputRegister(0); ++ Register dst = i.OutputRegister(); ++ __ Popcnt_w(dst, src); ++ } break; ++ case kLa64Dpopcnt: { ++ Register src = i.InputRegister(0); ++ Register dst = i.OutputRegister(); ++ __ Popcnt_d(dst, src); ++ } break; ++ case kLa64Shl: ++ if (instr->InputAt(1)->IsRegister()) { ++ __ sll_w(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1)); ++ } else { ++ int64_t imm = i.InputOperand(1).immediate(); ++ __ slli_w(i.OutputRegister(), i.InputRegister(0), ++ static_cast(imm)); ++ } ++ break; ++ case kLa64Shr: ++ if (instr->InputAt(1)->IsRegister()) { ++ __ slli_w(i.InputRegister(0), i.InputRegister(0), 0x0); ++ __ srl_w(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1)); ++ } else { ++ int64_t imm = i.InputOperand(1).immediate(); ++ __ slli_w(i.OutputRegister(), i.InputRegister(0), 0x0); ++ __ srli_w(i.OutputRegister(), i.OutputRegister(), ++ static_cast(imm)); ++ } ++ break; ++ case kLa64Sar: ++ if (instr->InputAt(1)->IsRegister()) { ++ __ slli_w(i.InputRegister(0), i.InputRegister(0), 0x0); ++ __ sra_w(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1)); ++ } else { ++ int64_t imm = i.InputOperand(1).immediate(); ++ __ slli_w(i.OutputRegister(), i.InputRegister(0), 0x0); ++ __ srai_w(i.OutputRegister(), i.OutputRegister(), ++ static_cast(imm)); ++ } ++ break; ++ case kLa64Ext: ++ __ bstrpick_w(i.OutputRegister(), i.InputRegister(0), ++ i.InputInt8(1) + i.InputInt8(2) - 1, i.InputInt8(1)); ++ break; ++ case kLa64Ins: ++ if (instr->InputAt(1)->IsImmediate() && i.InputInt8(1) == 0) { ++ __ bstrins_w(i.OutputRegister(), zero_reg, ++ i.InputInt8(1) + i.InputInt8(2) - 1, i.InputInt8(1)); ++ } else { ++ __ bstrins_w(i.OutputRegister(), i.InputRegister(0), ++ i.InputInt8(1) + i.InputInt8(2) - 1, i.InputInt8(1)); ++ } ++ break; ++ case kLa64Dext: { ++ __ bstrpick_d(i.OutputRegister(), i.InputRegister(0), ++ i.InputInt8(1) + i.InputInt8(2) - 1, i.InputInt8(1)); ++ break; ++ } ++ case kLa64Dins: ++ if (instr->InputAt(1)->IsImmediate() && i.InputInt8(1) == 0) { ++ __ bstrins_d(i.OutputRegister(), zero_reg, ++ i.InputInt8(1) + i.InputInt8(2) - 1, i.InputInt8(1)); ++ } else { ++ __ bstrins_d(i.OutputRegister(), i.InputRegister(0), ++ i.InputInt8(1) + i.InputInt8(2) - 1, i.InputInt8(1)); ++ } ++ break; ++ case kLa64Dshl: ++ if (instr->InputAt(1)->IsRegister()) { ++ __ sll_d(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1)); ++ } else { ++ int64_t imm = i.InputOperand(1).immediate(); ++ __ slli_d(i.OutputRegister(), i.InputRegister(0), ++ static_cast(imm)); ++ } ++ break; ++ case kLa64Dshr: ++ if (instr->InputAt(1)->IsRegister()) { ++ __ srl_d(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1)); ++ } else { ++ int64_t imm = i.InputOperand(1).immediate(); ++ __ srli_d(i.OutputRegister(), i.InputRegister(0), ++ static_cast(imm)); ++ } ++ break; ++ case kLa64Dsar: ++ if (instr->InputAt(1)->IsRegister()) { ++ __ sra_d(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1)); ++ } else { ++ int64_t imm = i.InputOperand(1).immediate(); ++ __ srai_d(i.OutputRegister(), i.InputRegister(0), imm); ++ } ++ break; ++ case kLa64Ror: ++ __ Rotr_w(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ break; ++ case kLa64Dror: ++ __ Rotr_d(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1)); ++ break; ++ case kLa64Tst: ++ __ And(kScratchReg, i.InputRegister(0), i.InputOperand(1)); ++ // Pseudo-instruction used for cmp/branch. No opcode emitted here. ++ break; ++ case kLa64Cmp: ++ // Pseudo-instruction used for cmp/branch. No opcode emitted here. ++ break; ++ case kLa64Mov: ++ // TODO(plind): Should we combine mov/li like this, or use separate instr? ++ // - Also see x64 ASSEMBLE_BINOP & RegisterOrOperandType ++ if (HasRegisterInput(instr, 0)) { ++ __ mov(i.OutputRegister(), i.InputRegister(0)); ++ } else { ++ __ li(i.OutputRegister(), i.InputOperand(0)); ++ } ++ break; ++ ++ case kLa64CmpS: { ++ FPURegister left = i.InputOrZeroSingleRegister(0); ++ FPURegister right = i.InputOrZeroSingleRegister(1); ++ bool predicate; ++ FPUCondition cc = ++ FlagsConditionToConditionCmpFPU(&predicate, instr->flags_condition()); ++ ++ if ((left == kDoubleRegZero || right == kDoubleRegZero) && ++ !__ IsDoubleZeroRegSet()) { ++ __ Move(kDoubleRegZero, 0.0); ++ } ++ ++ __ CompareF32(left, right, cc); ++ } break; ++ case kLa64AddS: ++ // TODO(plind): add special case: combine mult & add. ++ __ fadd_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0), ++ i.InputDoubleRegister(1)); ++ break; ++ case kLa64SubS: ++ __ fsub_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0), ++ i.InputDoubleRegister(1)); ++ break; ++ case kLa64MulS: ++ // TODO(plind): add special case: right op is -1.0, see arm port. ++ __ fmul_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0), ++ i.InputDoubleRegister(1)); ++ break; ++ case kLa64DivS: ++ __ fdiv_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0), ++ i.InputDoubleRegister(1)); ++ break; ++ case kLa64ModS: { ++ // TODO(bmeurer): We should really get rid of this special instruction, ++ // and generate a CallAddress instruction instead. ++ FrameScope scope(tasm(), StackFrame::MANUAL); ++ __ PrepareCallCFunction(0, 2, kScratchReg); ++ __ MovToFloatParameters(i.InputDoubleRegister(0), ++ i.InputDoubleRegister(1)); ++ // TODO(balazs.kilvady): implement mod_two_floats_operation(isolate()) ++ __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2); ++ // Move the result in the double result register. ++ __ MovFromFloatResult(i.OutputSingleRegister()); ++ break; ++ } ++ case kLa64AbsS: ++ __ fabs_s(i.OutputSingleRegister(), i.InputSingleRegister(0)); ++ break; ++ case kLa64NegS: ++ __ Neg_s(i.OutputSingleRegister(), i.InputSingleRegister(0)); ++ break; ++ case kLa64SqrtS: { ++ __ fsqrt_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); ++ break; ++ } ++ case kLa64MaxS: ++ __ fmax_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0), ++ i.InputDoubleRegister(1)); ++ break; ++ case kLa64MinS: ++ __ fmin_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0), ++ i.InputDoubleRegister(1)); ++ break; ++ case kLa64CmpD: { ++ FPURegister left = i.InputOrZeroDoubleRegister(0); ++ FPURegister right = i.InputOrZeroDoubleRegister(1); ++ bool predicate; ++ FPUCondition cc = ++ FlagsConditionToConditionCmpFPU(&predicate, instr->flags_condition()); ++ if ((left == kDoubleRegZero || right == kDoubleRegZero) && ++ !__ IsDoubleZeroRegSet()) { ++ __ Move(kDoubleRegZero, 0.0); ++ } ++ ++ __ CompareF64(left, right, cc); ++ } break; ++ case kLa64AddD: ++ // TODO(plind): add special case: combine mult & add. ++ __ fadd_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0), ++ i.InputDoubleRegister(1)); ++ break; ++ case kLa64SubD: ++ __ fsub_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0), ++ i.InputDoubleRegister(1)); ++ break; ++ case kLa64MulD: ++ // TODO(plind): add special case: right op is -1.0, see arm port. ++ __ fmul_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0), ++ i.InputDoubleRegister(1)); ++ break; ++ case kLa64DivD: ++ __ fdiv_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0), ++ i.InputDoubleRegister(1)); ++ break; ++ case kLa64ModD: { ++ // TODO(bmeurer): We should really get rid of this special instruction, ++ // and generate a CallAddress instruction instead. ++ FrameScope scope(tasm(), StackFrame::MANUAL); ++ __ PrepareCallCFunction(0, 2, kScratchReg); ++ __ MovToFloatParameters(i.InputDoubleRegister(0), ++ i.InputDoubleRegister(1)); ++ __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2); ++ // Move the result in the double result register. ++ __ MovFromFloatResult(i.OutputDoubleRegister()); ++ break; ++ } ++ case kLa64AbsD: ++ __ fabs_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); ++ break; ++ case kLa64NegD: ++ __ Neg_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); ++ break; ++ case kLa64SqrtD: { ++ __ fsqrt_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); ++ break; ++ } ++ case kLa64MaxD: ++ __ fmax_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0), ++ i.InputDoubleRegister(1)); ++ break; ++ case kLa64MinD: ++ __ fmin_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0), ++ i.InputDoubleRegister(1)); ++ break; ++ case kLa64Float64RoundDown: { ++ __ Floor_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); ++ break; ++ } ++ case kLa64Float32RoundDown: { ++ __ Floor_s(i.OutputSingleRegister(), i.InputSingleRegister(0)); ++ break; ++ } ++ case kLa64Float64RoundTruncate: { ++ __ Trunc_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); ++ break; ++ } ++ case kLa64Float32RoundTruncate: { ++ __ Trunc_s(i.OutputSingleRegister(), i.InputSingleRegister(0)); ++ break; ++ } ++ case kLa64Float64RoundUp: { ++ __ Ceil_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); ++ break; ++ } ++ case kLa64Float32RoundUp: { ++ __ Ceil_s(i.OutputSingleRegister(), i.InputSingleRegister(0)); ++ break; ++ } ++ case kLa64Float64RoundTiesEven: { ++ __ Round_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); ++ break; ++ } ++ case kLa64Float32RoundTiesEven: { ++ __ Round_s(i.OutputSingleRegister(), i.InputSingleRegister(0)); ++ break; ++ } ++ case kLa64Float32Max: { ++ FPURegister dst = i.OutputSingleRegister(); ++ FPURegister src1 = i.InputSingleRegister(0); ++ FPURegister src2 = i.InputSingleRegister(1); ++ auto ool = new (zone()) OutOfLineFloat32Max(this, dst, src1, src2); ++ __ Float32Max(dst, src1, src2, ool->entry()); ++ __ bind(ool->exit()); ++ break; ++ } ++ case kLa64Float64Max: { ++ FPURegister dst = i.OutputDoubleRegister(); ++ FPURegister src1 = i.InputDoubleRegister(0); ++ FPURegister src2 = i.InputDoubleRegister(1); ++ auto ool = new (zone()) OutOfLineFloat64Max(this, dst, src1, src2); ++ __ Float64Max(dst, src1, src2, ool->entry()); ++ __ bind(ool->exit()); ++ break; ++ } ++ case kLa64Float32Min: { ++ FPURegister dst = i.OutputSingleRegister(); ++ FPURegister src1 = i.InputSingleRegister(0); ++ FPURegister src2 = i.InputSingleRegister(1); ++ auto ool = new (zone()) OutOfLineFloat32Min(this, dst, src1, src2); ++ __ Float32Min(dst, src1, src2, ool->entry()); ++ __ bind(ool->exit()); ++ break; ++ } ++ case kLa64Float64Min: { ++ FPURegister dst = i.OutputDoubleRegister(); ++ FPURegister src1 = i.InputDoubleRegister(0); ++ FPURegister src2 = i.InputDoubleRegister(1); ++ auto ool = new (zone()) OutOfLineFloat64Min(this, dst, src1, src2); ++ __ Float64Min(dst, src1, src2, ool->entry()); ++ __ bind(ool->exit()); ++ break; ++ } ++ case kLa64Float64SilenceNaN: ++ __ FPUCanonicalizeNaN(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); ++ break; ++ case kLa64CvtSD: ++ __ fcvt_s_d(i.OutputSingleRegister(), i.InputDoubleRegister(0)); ++ break; ++ case kLa64CvtDS: ++ __ fcvt_d_s(i.OutputDoubleRegister(), i.InputSingleRegister(0)); ++ break; ++ case kLa64CvtDW: { ++ FPURegister scratch = kScratchDoubleReg; ++ __ movgr2fr_w(scratch, i.InputRegister(0)); ++ __ ffint_d_w(i.OutputDoubleRegister(), scratch); ++ break; ++ } ++ case kLa64CvtSW: { ++ FPURegister scratch = kScratchDoubleReg; ++ __ movgr2fr_w(scratch, i.InputRegister(0)); ++ __ ffint_s_w(i.OutputDoubleRegister(), scratch); ++ break; ++ } ++ case kLa64CvtSUw: { ++ __ Ffint_s_uw(i.OutputDoubleRegister(), i.InputRegister(0)); ++ break; ++ } ++ case kLa64CvtSL: { ++ FPURegister scratch = kScratchDoubleReg; ++ __ movgr2fr_d(scratch, i.InputRegister(0)); ++ __ ffint_s_l(i.OutputDoubleRegister(), scratch); ++ break; ++ } ++ case kLa64CvtDL: { ++ FPURegister scratch = kScratchDoubleReg; ++ __ movgr2fr_d(scratch, i.InputRegister(0)); ++ __ ffint_d_l(i.OutputDoubleRegister(), scratch); ++ break; ++ } ++ case kLa64CvtDUw: { ++ __ Ffint_d_uw(i.OutputDoubleRegister(), i.InputRegister(0)); ++ break; ++ } ++ case kLa64CvtDUl: { ++ __ Ffint_d_ul(i.OutputDoubleRegister(), i.InputRegister(0)); ++ break; ++ } ++ case kLa64CvtSUl: { ++ __ Ffint_s_ul(i.OutputDoubleRegister(), i.InputRegister(0)); ++ break; ++ } ++ case kLa64FloorWD: { ++ FPURegister scratch = kScratchDoubleReg; ++ __ ftintrm_w_d(scratch, i.InputDoubleRegister(0)); ++ __ movfr2gr_s(i.OutputRegister(), scratch); ++ break; ++ } ++ case kLa64CeilWD: { ++ FPURegister scratch = kScratchDoubleReg; ++ __ ftintrp_w_d(scratch, i.InputDoubleRegister(0)); ++ __ movfr2gr_s(i.OutputRegister(), scratch); ++ break; ++ } ++ case kLa64RoundWD: { ++ FPURegister scratch = kScratchDoubleReg; ++ __ ftintrne_w_d(scratch, i.InputDoubleRegister(0)); ++ __ movfr2gr_s(i.OutputRegister(), scratch); ++ break; ++ } ++ case kLa64TruncWD: { ++ FPURegister scratch = kScratchDoubleReg; ++ // Other arches use round to zero here, so we follow. ++ __ ftintrz_w_d(scratch, i.InputDoubleRegister(0)); ++ __ movfr2gr_s(i.OutputRegister(), scratch); ++ break; ++ } ++ case kLa64FloorWS: { ++ FPURegister scratch = kScratchDoubleReg; ++ __ ftintrm_w_s(scratch, i.InputDoubleRegister(0)); ++ __ movfr2gr_s(i.OutputRegister(), scratch); ++ break; ++ } ++ case kLa64CeilWS: { ++ FPURegister scratch = kScratchDoubleReg; ++ __ ftintrp_w_s(scratch, i.InputDoubleRegister(0)); ++ __ movfr2gr_s(i.OutputRegister(), scratch); ++ break; ++ } ++ case kLa64RoundWS: { ++ FPURegister scratch = kScratchDoubleReg; ++ __ ftintrne_w_s(scratch, i.InputDoubleRegister(0)); ++ __ movfr2gr_s(i.OutputRegister(), scratch); ++ break; ++ } ++ case kLa64TruncWS: { ++ FPURegister scratch = kScratchDoubleReg; ++ __ ftintrz_w_s(scratch, i.InputDoubleRegister(0)); ++ __ movfr2gr_s(i.OutputRegister(), scratch); ++ // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead, ++ // because INT32_MIN allows easier out-of-bounds detection. ++ __ addi_w(kScratchReg, i.OutputRegister(), 1); ++ __ slt(kScratchReg2, kScratchReg, i.OutputRegister()); ++ __ Movn(i.OutputRegister(), kScratchReg, kScratchReg2); ++ break; ++ } ++ case kLa64TruncLS: { ++ FPURegister scratch = kScratchDoubleReg; ++ Register tmp_fcsr = kScratchReg; ++ Register result = kScratchReg2; ++ ++ bool load_status = instr->OutputCount() > 1; ++ if (load_status) { ++ // Save FCSR. ++ __ movfcsr2gr(tmp_fcsr); // __ cfc1(tmp_fcsr, FCSR); ++ // Clear FPU flags. ++ __ movgr2fcsr(zero_reg); // __ ctc1(zero_reg, FCSR); ++ } ++ // Other arches use round to zero here, so we follow. ++ __ ftintrz_l_s(scratch, i.InputDoubleRegister(0)); ++ __ movfr2gr_d(i.OutputRegister(), scratch); ++ if (load_status) { ++ __ movfcsr2gr(result); // __ cfc1(result, FCSR); ++ // Check for overflow and NaNs. ++ __ And(result, result, ++ (kFCSROverflowFlagMask | kFCSRInvalidOpFlagMask)); ++ __ Slt(result, zero_reg, result); ++ __ xori(result, result, 1); ++ __ mov(i.OutputRegister(1), result); ++ // Restore FCSR ++ __ movgr2fcsr(tmp_fcsr); // __ ctc1(tmp_fcsr, FCSR); ++ } ++ break; ++ } ++ case kLa64TruncLD: { ++ FPURegister scratch = kScratchDoubleReg; ++ Register tmp_fcsr = kScratchReg; ++ Register result = kScratchReg2; ++ ++ bool load_status = instr->OutputCount() > 1; ++ if (load_status) { ++ // Save FCSR. ++ __ movfcsr2gr(tmp_fcsr); // __ cfc1(tmp_fcsr, FCSR); ++ // Clear FPU flags. ++ __ movgr2fcsr(zero_reg); // __ ctc1(zero_reg, FCSR); ++ } ++ // Other arches use round to zero here, so we follow. ++ __ ftintrz_l_d(scratch, i.InputDoubleRegister(0)); ++ __ movfr2gr_d(i.OutputRegister(0), scratch); ++ if (load_status) { ++ __ movfcsr2gr(result); // __ cfc1(result, FCSR); ++ // Check for overflow and NaNs. ++ __ And(result, result, ++ (kFCSROverflowFlagMask | kFCSRInvalidOpFlagMask)); ++ __ Slt(result, zero_reg, result); ++ __ xori(result, result, 1); ++ __ mov(i.OutputRegister(1), result); ++ // Restore FCSR ++ __ movgr2fcsr(tmp_fcsr); // __ ctc1(tmp_fcsr, FCSR); ++ } ++ break; ++ } ++ case kLa64TruncUwD: { ++ FPURegister scratch = kScratchDoubleReg; ++ __ Ftintrz_uw_d(i.OutputRegister(), i.InputDoubleRegister(0), scratch); ++ break; ++ } ++ case kLa64TruncUwS: { ++ FPURegister scratch = kScratchDoubleReg; ++ __ Ftintrz_uw_s(i.OutputRegister(), i.InputDoubleRegister(0), scratch); ++ // Avoid UINT32_MAX as an overflow indicator and use 0 instead, ++ // because 0 allows easier out-of-bounds detection. ++ __ addi_w(kScratchReg, i.OutputRegister(), 1); ++ __ Movz(i.OutputRegister(), zero_reg, kScratchReg); ++ break; ++ } ++ case kLa64TruncUlS: { ++ FPURegister scratch = kScratchDoubleReg; ++ Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg; ++ __ Ftintrz_ul_s(i.OutputRegister(), i.InputDoubleRegister(0), scratch, ++ result); ++ break; ++ } ++ case kLa64TruncUlD: { ++ FPURegister scratch = kScratchDoubleReg; ++ Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg; ++ __ Ftintrz_ul_d(i.OutputRegister(0), i.InputDoubleRegister(0), scratch, ++ result); ++ break; ++ } ++ case kLa64BitcastDL: ++ __ movfr2gr_d(i.OutputRegister(), i.InputDoubleRegister(0)); ++ break; ++ case kLa64BitcastLD: ++ __ movgr2fr_d(i.OutputDoubleRegister(), i.InputRegister(0)); ++ break; ++ case kLa64Float64ExtractLowWord32: ++ __ FmoveLow(i.OutputRegister(), i.InputDoubleRegister(0)); ++ break; ++ case kLa64Float64ExtractHighWord32: ++ __ movfrh2gr_s(i.OutputRegister(), i.InputDoubleRegister(0)); ++ break; ++ case kLa64Float64InsertLowWord32: ++ __ FmoveLow(i.OutputDoubleRegister(), i.InputRegister(1)); ++ break; ++ case kLa64Float64InsertHighWord32: ++ __ movgr2frh_w(i.OutputDoubleRegister(), i.InputRegister(1)); ++ break; ++ // ... more basic instructions ... ++ ++ case kLa64Seb: ++ __ ext_w_b(i.OutputRegister(), i.InputRegister(0)); ++ break; ++ case kLa64Seh: ++ __ ext_w_h(i.OutputRegister(), i.InputRegister(0)); ++ break; ++ case kLa64Lbu: ++ __ Ld_bu(i.OutputRegister(), i.MemoryOperand()); ++ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); ++ break; ++ case kLa64Lb: ++ __ Ld_b(i.OutputRegister(), i.MemoryOperand()); ++ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); ++ break; ++ case kLa64Sb: ++ __ St_b(i.InputOrZeroRegister(2), i.MemoryOperand()); ++ break; ++ case kLa64Lhu: ++ __ Ld_hu(i.OutputRegister(), i.MemoryOperand()); ++ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); ++ break; ++ case kLa64Ulhu: ++ __ Ld_hu(i.OutputRegister(), i.MemoryOperand()); ++ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); ++ break; ++ case kLa64Lh: ++ __ Ld_h(i.OutputRegister(), i.MemoryOperand()); ++ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); ++ break; ++ case kLa64Ulh: ++ __ Ld_h(i.OutputRegister(), i.MemoryOperand()); ++ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); ++ break; ++ case kLa64Sh: ++ __ St_h(i.InputOrZeroRegister(2), i.MemoryOperand()); ++ break; ++ case kLa64Ush: ++ __ St_h(i.InputOrZeroRegister(2), i.MemoryOperand()); ++ break; ++ case kLa64Lw: ++ __ Ld_w(i.OutputRegister(), i.MemoryOperand()); ++ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); ++ break; ++ case kLa64Ulw: ++ __ Ld_w(i.OutputRegister(), i.MemoryOperand()); ++ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); ++ break; ++ case kLa64Lwu: ++ __ Ld_wu(i.OutputRegister(), i.MemoryOperand()); ++ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); ++ break; ++ case kLa64Ulwu: ++ __ Ld_wu(i.OutputRegister(), i.MemoryOperand()); ++ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); ++ break; ++ case kLa64Ld: ++ __ Ld_d(i.OutputRegister(), i.MemoryOperand()); ++ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); ++ break; ++ case kLa64Uld: ++ __ Ld_d(i.OutputRegister(), i.MemoryOperand()); ++ EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); ++ break; ++ case kLa64Sw: ++ __ St_w(i.InputOrZeroRegister(2), i.MemoryOperand()); ++ break; ++ case kLa64Usw: ++ __ St_w(i.InputOrZeroRegister(2), i.MemoryOperand()); ++ break; ++ case kLa64Sd: ++ __ St_d(i.InputOrZeroRegister(2), i.MemoryOperand()); ++ break; ++ case kLa64Usd: ++ __ St_d(i.InputOrZeroRegister(2), i.MemoryOperand()); ++ break; ++ case kLa64Lwc1: { ++ __ Fld_s(i.OutputSingleRegister(), i.MemoryOperand()); ++ break; ++ } ++ case kLa64Ulwc1: { ++ __ Fld_s(i.OutputSingleRegister(), i.MemoryOperand()); ++ break; ++ } ++ case kLa64Swc1: { ++ size_t index = 0; ++ MemOperand operand = i.MemoryOperand(&index); ++ FPURegister ft = i.InputOrZeroSingleRegister(index); ++ if (ft == kDoubleRegZero && !__ IsDoubleZeroRegSet()) { ++ __ Move(kDoubleRegZero, 0.0); ++ } ++ ++ __ Fst_s(ft, operand); ++ break; ++ } ++ case kLa64Uswc1: { ++ size_t index = 0; ++ MemOperand operand = i.MemoryOperand(&index); ++ FPURegister ft = i.InputOrZeroSingleRegister(index); ++ if (ft == kDoubleRegZero && !__ IsDoubleZeroRegSet()) { ++ __ Move(kDoubleRegZero, 0.0); ++ } ++ ++ __ Fst_s(ft, operand); ++ break; ++ } ++ case kLa64Ldc1: ++ __ Fld_d(i.OutputDoubleRegister(), i.MemoryOperand()); ++ break; ++ case kLa64Uldc1: ++ __ Fld_d(i.OutputDoubleRegister(), i.MemoryOperand()); ++ break; ++ case kLa64Sdc1: { ++ FPURegister ft = i.InputOrZeroDoubleRegister(2); ++ if (ft == kDoubleRegZero && !__ IsDoubleZeroRegSet()) { ++ __ Move(kDoubleRegZero, 0.0); ++ } ++ ++ __ Fst_d(ft, i.MemoryOperand()); ++ break; ++ } ++ case kLa64Usdc1: { ++ FPURegister ft = i.InputOrZeroDoubleRegister(2); ++ if (ft == kDoubleRegZero && !__ IsDoubleZeroRegSet()) { ++ __ Move(kDoubleRegZero, 0.0); ++ } ++ ++ __ Fst_d(ft, i.MemoryOperand()); ++ break; ++ } ++ case kLa64Sync: { ++ __ dbar(0); ++ break; ++ } ++ case kLa64Push: ++ if (instr->InputAt(0)->IsFPRegister()) { ++ __ Fst_d(i.InputDoubleRegister(0), MemOperand(sp, -kDoubleSize)); ++ __ Sub_d(sp, sp, Operand(kDoubleSize)); ++ frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize); ++ } else { ++ __ Push(i.InputRegister(0)); ++ frame_access_state()->IncreaseSPDelta(1); ++ } ++ break; ++ case kLa64Peek: { ++ // The incoming value is 0-based, but we need a 1-based value. ++ int reverse_slot = i.InputInt32(0) + 1; ++ int offset = ++ FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot); ++ if (instr->OutputAt(0)->IsFPRegister()) { ++ LocationOperand* op = LocationOperand::cast(instr->OutputAt(0)); ++ if (op->representation() == MachineRepresentation::kFloat64) { ++ __ Fld_d(i.OutputDoubleRegister(), MemOperand(fp, offset)); ++ } else { ++ DCHECK_EQ(op->representation(), MachineRepresentation::kFloat32); ++ __ Fld_s( ++ i.OutputSingleRegister(0), ++ MemOperand(fp, offset + kLessSignificantWordInDoublewordOffset)); ++ } ++ } else { ++ __ Ld_d(i.OutputRegister(0), MemOperand(fp, offset)); ++ } ++ break; ++ } ++ case kLa64StackClaim: { ++ __ Sub_d(sp, sp, Operand(i.InputInt32(0))); ++ frame_access_state()->IncreaseSPDelta(i.InputInt32(0) / ++ kSystemPointerSize); ++ break; ++ } ++ case kLa64StoreToStackSlot: { ++ if (instr->InputAt(0)->IsFPRegister()) { ++ __ Fst_d(i.InputDoubleRegister(0), MemOperand(sp, i.InputInt32(1))); ++ } else { ++ __ St_d(i.InputRegister(0), MemOperand(sp, i.InputInt32(1))); ++ } ++ break; ++ } ++ case kLa64ByteSwap64: { ++ __ ByteSwapSigned(i.OutputRegister(0), i.InputRegister(0), 8); ++ break; ++ } ++ case kLa64ByteSwap32: { ++ __ ByteSwapSigned(i.OutputRegister(0), i.InputRegister(0), 4); ++ break; ++ } ++ case kWord32AtomicLoadInt8: ++ ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld_b); ++ break; ++ case kWord32AtomicLoadUint8: ++ ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld_bu); ++ break; ++ case kWord32AtomicLoadInt16: ++ ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld_h); ++ break; ++ case kWord32AtomicLoadUint16: ++ ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld_hu); ++ break; ++ case kWord32AtomicLoadWord32: ++ ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld_w); ++ break; ++ case kLa64Word64AtomicLoadUint8: ++ ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld_bu); ++ break; ++ case kLa64Word64AtomicLoadUint16: ++ ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld_hu); ++ break; ++ case kLa64Word64AtomicLoadUint32: ++ ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld_wu); ++ break; ++ case kLa64Word64AtomicLoadUint64: ++ ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld_d); ++ break; ++ case kWord32AtomicStoreWord8: ++ ASSEMBLE_ATOMIC_STORE_INTEGER(St_b); ++ break; ++ case kWord32AtomicStoreWord16: ++ ASSEMBLE_ATOMIC_STORE_INTEGER(St_h); ++ break; ++ case kWord32AtomicStoreWord32: ++ ASSEMBLE_ATOMIC_STORE_INTEGER(St_w); ++ break; ++ case kLa64Word64AtomicStoreWord8: ++ ASSEMBLE_ATOMIC_STORE_INTEGER(St_b); ++ break; ++ case kLa64Word64AtomicStoreWord16: ++ ASSEMBLE_ATOMIC_STORE_INTEGER(St_h); ++ break; ++ case kLa64Word64AtomicStoreWord32: ++ ASSEMBLE_ATOMIC_STORE_INTEGER(St_w); ++ break; ++ case kLa64Word64AtomicStoreWord64: ++ ASSEMBLE_ATOMIC_STORE_INTEGER(St_d); ++ break; ++ case kWord32AtomicExchangeInt8: ++ ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll_w, Sc_w, true, 8, 32); ++ break; ++ case kWord32AtomicExchangeUint8: ++ ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll_w, Sc_w, false, 8, 32); ++ break; ++ case kWord32AtomicExchangeInt16: ++ ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll_w, Sc_w, true, 16, 32); ++ break; ++ case kWord32AtomicExchangeUint16: ++ ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll_w, Sc_w, false, 16, 32); ++ break; ++ case kWord32AtomicExchangeWord32: ++ __ add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); ++ __ amswap_db_w(i.OutputRegister(0), i.InputRegister(2), ++ i.TempRegister(0)); ++ break; ++ case kLa64Word64AtomicExchangeUint8: ++ ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll_d, Sc_d, false, 8, 64); ++ break; ++ case kLa64Word64AtomicExchangeUint16: ++ ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll_d, Sc_d, false, 16, 64); ++ break; ++ case kLa64Word64AtomicExchangeUint32: ++ ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll_d, Sc_d, false, 32, 64); ++ break; ++ case kLa64Word64AtomicExchangeUint64: ++ __ add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); ++ __ amswap_db_d(i.OutputRegister(0), i.InputRegister(2), ++ i.TempRegister(0)); ++ break; ++ case kWord32AtomicCompareExchangeInt8: ++ ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll_w, Sc_w, true, 8, 32); ++ break; ++ case kWord32AtomicCompareExchangeUint8: ++ ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll_w, Sc_w, false, 8, 32); ++ break; ++ case kWord32AtomicCompareExchangeInt16: ++ ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll_w, Sc_w, true, 16, 32); ++ break; ++ case kWord32AtomicCompareExchangeUint16: ++ ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll_w, Sc_w, false, 16, 32); ++ break; ++ case kWord32AtomicCompareExchangeWord32: ++ __ slli_w(i.InputRegister(2), i.InputRegister(2), 0); ++ ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(Ll_w, Sc_w); ++ break; ++ case kLa64Word64AtomicCompareExchangeUint8: ++ ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll_d, Sc_d, false, 8, 64); ++ break; ++ case kLa64Word64AtomicCompareExchangeUint16: ++ ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll_d, Sc_d, false, 16, 64); ++ break; ++ case kLa64Word64AtomicCompareExchangeUint32: ++ ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll_d, Sc_d, false, 32, 64); ++ break; ++ case kLa64Word64AtomicCompareExchangeUint64: ++ ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(Ll_d, Sc_d); ++ break; ++ case kWord32AtomicAddWord32: ++ __ Add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); ++ __ amadd_db_w(i.OutputRegister(0), i.InputRegister(2), i.TempRegister(0)); ++ break; ++ case kWord32AtomicSubWord32: ++ ASSEMBLE_ATOMIC_BINOP(Ll_w, Sc_w, Sub_w); ++ break; ++ case kWord32AtomicAndWord32: ++ __ Add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); ++ __ amand_db_w(i.OutputRegister(0), i.InputRegister(2), i.TempRegister(0)); ++ break; ++ case kWord32AtomicOrWord32: ++ __ Add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); ++ __ amor_db_w(i.OutputRegister(0), i.InputRegister(2), i.TempRegister(0)); ++ break; ++ case kWord32AtomicXorWord32: ++ __ Add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); ++ __ amxor_db_w(i.OutputRegister(0), i.InputRegister(2), i.TempRegister(0)); ++ break; ++#define ATOMIC_BINOP_CASE(op, inst) \ ++ case kWord32Atomic##op##Int8: \ ++ ASSEMBLE_ATOMIC_BINOP_EXT(Ll_w, Sc_w, true, 8, inst, 32); \ ++ break; \ ++ case kWord32Atomic##op##Uint8: \ ++ ASSEMBLE_ATOMIC_BINOP_EXT(Ll_w, Sc_w, false, 8, inst, 32); \ ++ break; \ ++ case kWord32Atomic##op##Int16: \ ++ ASSEMBLE_ATOMIC_BINOP_EXT(Ll_w, Sc_w, true, 16, inst, 32); \ ++ break; \ ++ case kWord32Atomic##op##Uint16: \ ++ ASSEMBLE_ATOMIC_BINOP_EXT(Ll_w, Sc_w, false, 16, inst, 32); \ ++ break; ++ ATOMIC_BINOP_CASE(Add, Add_w) ++ ATOMIC_BINOP_CASE(Sub, Sub_w) ++ ATOMIC_BINOP_CASE(And, And) ++ ATOMIC_BINOP_CASE(Or, Or) ++ ATOMIC_BINOP_CASE(Xor, Xor) ++#undef ATOMIC_BINOP_CASE ++ ++ case kLa64Word64AtomicAddUint64: ++ __ Add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); ++ __ amadd_db_d(i.OutputRegister(0), i.InputRegister(2), i.TempRegister(0)); ++ break; ++ case kLa64Word64AtomicSubUint64: ++ ASSEMBLE_ATOMIC_BINOP(Ll_d, Sc_d, Sub_d); ++ break; ++ case kLa64Word64AtomicAndUint64: ++ __ Add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); ++ __ amand_db_d(i.OutputRegister(0), i.InputRegister(2), i.TempRegister(0)); ++ break; ++ case kLa64Word64AtomicOrUint64: ++ __ Add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); ++ __ amor_db_d(i.OutputRegister(0), i.InputRegister(2), i.TempRegister(0)); ++ break; ++ case kLa64Word64AtomicXorUint64: ++ __ Add_d(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); ++ __ amxor_db_d(i.OutputRegister(0), i.InputRegister(2), i.TempRegister(0)); ++ break; ++#define ATOMIC_BINOP_CASE(op, inst) \ ++ case kLa64Word64Atomic##op##Uint8: \ ++ ASSEMBLE_ATOMIC_BINOP_EXT(Ll_d, Sc_d, false, 8, inst, 64); \ ++ break; \ ++ case kLa64Word64Atomic##op##Uint16: \ ++ ASSEMBLE_ATOMIC_BINOP_EXT(Ll_d, Sc_d, false, 16, inst, 64); \ ++ break; \ ++ case kLa64Word64Atomic##op##Uint32: \ ++ ASSEMBLE_ATOMIC_BINOP_EXT(Ll_d, Sc_d, false, 32, inst, 64); \ ++ break; ++ ATOMIC_BINOP_CASE(Add, Add_d) ++ ATOMIC_BINOP_CASE(Sub, Sub_d) ++ ATOMIC_BINOP_CASE(And, And) ++ ATOMIC_BINOP_CASE(Or, Or) ++ ATOMIC_BINOP_CASE(Xor, Xor) ++#undef ATOMIC_BINOP_CASE ++ case kLa64AssertEqual: ++ __ Assert(eq, static_cast(i.InputOperand(2).immediate()), ++ i.InputRegister(0), Operand(i.InputRegister(1))); ++ break; ++ case kLa64S128Zero: ++ case kLa64I32x4Splat: ++ case kLa64I32x4ExtractLane: ++ case kLa64I32x4AddHoriz: ++ case kLa64I32x4Add: ++ case kLa64I32x4ReplaceLane: ++ case kLa64I32x4Sub: ++ case kLa64F64x2Abs: ++ default: ++ break; ++ } ++ return kSuccess; ++} // NOLINT(readability/fn_size) ++ ++#define UNSUPPORTED_COND(opcode, condition) \ ++ StdoutStream{} << "Unsupported " << #opcode << " condition: \"" << condition \ ++ << "\""; \ ++ UNIMPLEMENTED(); ++ ++void AssembleBranchToLabels(CodeGenerator* gen, TurboAssembler* tasm, ++ Instruction* instr, FlagsCondition condition, ++ Label* tlabel, Label* flabel, bool fallthru) { ++#undef __ ++#define __ tasm-> ++ La64OperandConverter i(gen, instr); ++ ++ Condition cc = kNoCondition; ++ // LA64 does not have condition code flags, so compare and branch are ++ // implemented differently than on the other arch's. The compare operations ++ // emit la64 pseudo-instructions, which are handled here by branch ++ // instructions that do the actual comparison. Essential that the input ++ // registers to compare pseudo-op are not modified before this branch op, as ++ // they are tested here. ++ ++ if (instr->arch_opcode() == kLa64Tst) { ++ cc = FlagsConditionToConditionTst(condition); ++ __ Branch(tlabel, cc, kScratchReg, Operand(zero_reg)); ++ } else if (instr->arch_opcode() == kLa64Dadd || ++ instr->arch_opcode() == kLa64Dsub) { ++ cc = FlagsConditionToConditionOvf(condition); ++ __ srai_d(kScratchReg, i.OutputRegister(), 32); ++ __ srai_w(kScratchReg2, i.OutputRegister(), 31); ++ __ Branch(tlabel, cc, kScratchReg2, Operand(kScratchReg)); ++ } else if (instr->arch_opcode() == kLa64DaddOvf || ++ instr->arch_opcode() == kLa64DsubOvf) { ++ switch (condition) { ++ // Overflow occurs if overflow register is negative ++ case kOverflow: ++ __ Branch(tlabel, lt, kScratchReg, Operand(zero_reg)); ++ break; ++ case kNotOverflow: ++ __ Branch(tlabel, ge, kScratchReg, Operand(zero_reg)); ++ break; ++ default: ++ UNSUPPORTED_COND(instr->arch_opcode(), condition); ++ break; ++ } ++ } else if (instr->arch_opcode() == kLa64MulOvf) { ++ // Overflow occurs if overflow register is not zero ++ switch (condition) { ++ case kOverflow: ++ __ Branch(tlabel, ne, kScratchReg, Operand(zero_reg)); ++ break; ++ case kNotOverflow: ++ __ Branch(tlabel, eq, kScratchReg, Operand(zero_reg)); ++ break; ++ default: ++ UNSUPPORTED_COND(kLa64MulOvf, condition); ++ break; ++ } ++ } else if (instr->arch_opcode() == kLa64Cmp) { ++ cc = FlagsConditionToConditionCmp(condition); ++ __ Branch(tlabel, cc, i.InputRegister(0), i.InputOperand(1)); ++ } else if (instr->arch_opcode() == kArchStackPointerGreaterThan) { ++ cc = FlagsConditionToConditionCmp(condition); ++ Register lhs_register = sp; ++ uint32_t offset; ++ if (gen->ShouldApplyOffsetToStackCheck(instr, &offset)) { ++ lhs_register = i.TempRegister(0); ++ __ Sub_d(lhs_register, sp, offset); ++ } ++ __ Branch(tlabel, cc, lhs_register, Operand(i.InputRegister(0))); ++ } else if (instr->arch_opcode() == kLa64CmpS || ++ instr->arch_opcode() == kLa64CmpD) { ++ bool predicate; ++ FlagsConditionToConditionCmpFPU(&predicate, condition); ++ if (predicate) { ++ __ BranchTrueF(tlabel); ++ } else { ++ __ BranchFalseF(tlabel); ++ } ++ } else { ++ PrintF("AssembleArchBranch Unimplemented arch_opcode: %d\n", ++ instr->arch_opcode()); ++ UNIMPLEMENTED(); ++ } ++ if (!fallthru) __ Branch(flabel); // no fallthru to flabel. ++#undef __ ++#define __ tasm()-> ++} ++ ++// Assembles branches after an instruction. ++void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) { ++ Label* tlabel = branch->true_label; ++ Label* flabel = branch->false_label; ++ ++ AssembleBranchToLabels(this, tasm(), instr, branch->condition, tlabel, flabel, ++ branch->fallthru); ++} ++ ++void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition, ++ Instruction* instr) { ++ // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal). ++ if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) { ++ return; ++ } ++ ++ La64OperandConverter i(this, instr); ++ condition = NegateFlagsCondition(condition); ++ ++ switch (instr->arch_opcode()) { ++ case kLa64Cmp: { ++ __ LoadZeroOnCondition(kSpeculationPoisonRegister, i.InputRegister(0), ++ i.InputOperand(1), ++ FlagsConditionToConditionCmp(condition)); ++ } ++ return; ++ case kLa64Tst: { ++ switch (condition) { ++ case kEqual: ++ __ LoadZeroIfConditionZero(kSpeculationPoisonRegister, kScratchReg); ++ break; ++ case kNotEqual: ++ __ LoadZeroIfConditionNotZero(kSpeculationPoisonRegister, ++ kScratchReg); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++ } ++ return; ++ case kLa64Dadd: ++ case kLa64Dsub: { ++ // Check for overflow creates 1 or 0 for result. ++ __ srli_d(kScratchReg, i.OutputRegister(), 63); ++ __ srli_w(kScratchReg2, i.OutputRegister(), 31); ++ __ xor_(kScratchReg2, kScratchReg, kScratchReg2); ++ switch (condition) { ++ case kOverflow: ++ __ LoadZeroIfConditionNotZero(kSpeculationPoisonRegister, ++ kScratchReg2); ++ break; ++ case kNotOverflow: ++ __ LoadZeroIfConditionZero(kSpeculationPoisonRegister, kScratchReg2); ++ break; ++ default: ++ UNSUPPORTED_COND(instr->arch_opcode(), condition); ++ } ++ } ++ return; ++ case kLa64DaddOvf: ++ case kLa64DsubOvf: { ++ // Overflow occurs if overflow register is negative ++ __ Slt(kScratchReg2, kScratchReg, zero_reg); ++ switch (condition) { ++ case kOverflow: ++ __ LoadZeroIfConditionNotZero(kSpeculationPoisonRegister, ++ kScratchReg2); ++ break; ++ case kNotOverflow: ++ __ LoadZeroIfConditionZero(kSpeculationPoisonRegister, kScratchReg2); ++ break; ++ default: ++ UNSUPPORTED_COND(instr->arch_opcode(), condition); ++ } ++ } ++ return; ++ case kLa64MulOvf: { ++ // Overflow occurs if overflow register is not zero ++ switch (condition) { ++ case kOverflow: ++ __ LoadZeroIfConditionNotZero(kSpeculationPoisonRegister, ++ kScratchReg); ++ break; ++ case kNotOverflow: ++ __ LoadZeroIfConditionZero(kSpeculationPoisonRegister, kScratchReg); ++ break; ++ default: ++ UNSUPPORTED_COND(instr->arch_opcode(), condition); ++ } ++ } ++ return; ++ case kLa64CmpS: ++ case kLa64CmpD: { ++ bool predicate; ++ FlagsConditionToConditionCmpFPU(&predicate, condition); ++ if (predicate) { ++ __ LoadZeroIfFPUCondition(kSpeculationPoisonRegister); ++ } else { ++ __ LoadZeroIfNotFPUCondition(kSpeculationPoisonRegister); ++ } ++ } ++ return; ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++#undef UNSUPPORTED_COND ++ ++void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr, ++ BranchInfo* branch) { ++ AssembleArchBranch(instr, branch); ++} ++ ++void CodeGenerator::AssembleArchJump(RpoNumber target) { ++ if (!IsNextInAssemblyOrder(target)) __ Branch(GetLabel(target)); ++} ++ ++void CodeGenerator::AssembleArchTrap(Instruction* instr, ++ FlagsCondition condition) { ++ class OutOfLineTrap final : public OutOfLineCode { ++ public: ++ OutOfLineTrap(CodeGenerator* gen, Instruction* instr) ++ : OutOfLineCode(gen), instr_(instr), gen_(gen) {} ++ void Generate() final { ++ La64OperandConverter i(gen_, instr_); ++ TrapId trap_id = ++ static_cast(i.InputInt32(instr_->InputCount() - 1)); ++ GenerateCallToTrap(trap_id); ++ } ++ ++ private: ++ void GenerateCallToTrap(TrapId trap_id) { ++ if (trap_id == TrapId::kInvalid) { ++ // We cannot test calls to the runtime in cctest/test-run-wasm. ++ // Therefore we emit a call to C here instead of a call to the runtime. ++ // We use the context register as the scratch register, because we do ++ // not have a context here. ++ __ PrepareCallCFunction(0, 0, cp); ++ __ CallCFunction( ++ ExternalReference::wasm_call_trap_callback_for_testing(), 0); ++ __ LeaveFrame(StackFrame::WASM_COMPILED); ++ auto call_descriptor = gen_->linkage()->GetIncomingDescriptor(); ++ int pop_count = ++ static_cast(call_descriptor->StackParameterCount()); ++ pop_count += (pop_count & 1); // align ++ __ Drop(pop_count); ++ __ Ret(); ++ } else { ++ gen_->AssembleSourcePosition(instr_); ++ // A direct call to a wasm runtime stub defined in this module. ++ // Just encode the stub index. This will be patched when the code ++ // is added to the native module and copied into wasm code space. ++ __ Call(static_cast
(trap_id), RelocInfo::WASM_STUB_CALL); ++ ReferenceMap* reference_map = ++ new (gen_->zone()) ReferenceMap(gen_->zone()); ++ gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt); ++ if (FLAG_debug_code) { ++ __ stop(); ++ } ++ } ++ } ++ Instruction* instr_; ++ CodeGenerator* gen_; ++ }; ++ auto ool = new (zone()) OutOfLineTrap(this, instr); ++ Label* tlabel = ool->entry(); ++ AssembleBranchToLabels(this, tasm(), instr, condition, tlabel, nullptr, true); ++} ++ ++// Assembles boolean materializations after an instruction. ++void CodeGenerator::AssembleArchBoolean(Instruction* instr, ++ FlagsCondition condition) { ++ La64OperandConverter i(this, instr); ++ ++ // Materialize a full 32-bit 1 or 0 value. The result register is always the ++ // last output of the instruction. ++ DCHECK_NE(0u, instr->OutputCount()); ++ Register result = i.OutputRegister(instr->OutputCount() - 1); ++ Condition cc = kNoCondition; ++ // La64 does not have condition code flags, so compare and branch are ++ // implemented differently than on the other arch's. The compare operations ++ // emit la64 pseudo-instructions, which are checked and handled here. ++ ++ if (instr->arch_opcode() == kLa64Tst) { ++ cc = FlagsConditionToConditionTst(condition); ++ if (cc == eq) { ++ __ Sltu(result, kScratchReg, 1); ++ } else { ++ __ Sltu(result, zero_reg, kScratchReg); ++ } ++ return; ++ } else if (instr->arch_opcode() == kLa64Dadd || ++ instr->arch_opcode() == kLa64Dsub) { ++ cc = FlagsConditionToConditionOvf(condition); ++ // Check for overflow creates 1 or 0 for result. ++ __ srli_d(kScratchReg, i.OutputRegister(), 63); ++ __ srli_w(kScratchReg2, i.OutputRegister(), 31); ++ __ xor_(result, kScratchReg, kScratchReg2); ++ if (cc == eq) // Toggle result for not overflow. ++ __ xori(result, result, 1); ++ return; ++ } else if (instr->arch_opcode() == kLa64DaddOvf || ++ instr->arch_opcode() == kLa64DsubOvf) { ++ // Overflow occurs if overflow register is negative ++ __ slt(result, kScratchReg, zero_reg); ++ } else if (instr->arch_opcode() == kLa64MulOvf) { ++ // Overflow occurs if overflow register is not zero ++ __ Sgtu(result, kScratchReg, zero_reg); ++ } else if (instr->arch_opcode() == kLa64Cmp) { ++ cc = FlagsConditionToConditionCmp(condition); ++ switch (cc) { ++ case eq: ++ case ne: { ++ Register left = i.InputRegister(0); ++ Operand right = i.InputOperand(1); ++ if (instr->InputAt(1)->IsImmediate()) { ++ if (is_int12(-right.immediate())) { ++ if (right.immediate() == 0) { ++ if (cc == eq) { ++ __ Sltu(result, left, 1); ++ } else { ++ __ Sltu(result, zero_reg, left); ++ } ++ } else { ++ __ Add_d(result, left, Operand(-right.immediate())); ++ if (cc == eq) { ++ __ Sltu(result, result, 1); ++ } else { ++ __ Sltu(result, zero_reg, result); ++ } ++ } ++ } else { ++ if (is_uint12(right.immediate())) { ++ __ Xor(result, left, right); ++ } else { ++ __ li(kScratchReg, right); ++ __ Xor(result, left, kScratchReg); ++ } ++ if (cc == eq) { ++ __ Sltu(result, result, 1); ++ } else { ++ __ Sltu(result, zero_reg, result); ++ } ++ } ++ } else { ++ __ Xor(result, left, right); ++ if (cc == eq) { ++ __ Sltu(result, result, 1); ++ } else { ++ __ Sltu(result, zero_reg, result); ++ } ++ } ++ } break; ++ case lt: ++ case ge: { ++ Register left = i.InputRegister(0); ++ Operand right = i.InputOperand(1); ++ __ Slt(result, left, right); ++ if (cc == ge) { ++ __ xori(result, result, 1); ++ } ++ } break; ++ case gt: ++ case le: { ++ Register left = i.InputRegister(1); ++ Operand right = i.InputOperand(0); ++ __ Slt(result, left, right); ++ if (cc == le) { ++ __ xori(result, result, 1); ++ } ++ } break; ++ case lo: ++ case hs: { ++ Register left = i.InputRegister(0); ++ Operand right = i.InputOperand(1); ++ __ Sltu(result, left, right); ++ if (cc == hs) { ++ __ xori(result, result, 1); ++ } ++ } break; ++ case hi: ++ case ls: { ++ Register left = i.InputRegister(1); ++ Operand right = i.InputOperand(0); ++ __ Sltu(result, left, right); ++ if (cc == ls) { ++ __ xori(result, result, 1); ++ } ++ } break; ++ default: ++ UNREACHABLE(); ++ } ++ return; ++ } else if (instr->arch_opcode() == kLa64CmpD || ++ instr->arch_opcode() == kLa64CmpS) { ++ FPURegister left = i.InputOrZeroDoubleRegister(0); ++ FPURegister right = i.InputOrZeroDoubleRegister(1); ++ if ((left == kDoubleRegZero || right == kDoubleRegZero) && ++ !__ IsDoubleZeroRegSet()) { ++ __ Move(kDoubleRegZero, 0.0); ++ } ++ bool predicate; ++ FlagsConditionToConditionCmpFPU(&predicate, condition); ++ { ++ __ movcf2gr(result, FCC0); ++ if (!predicate) { ++ __ xori(result, result, 1); ++ } ++ } ++ return; ++ } else { ++ PrintF("AssembleArchBranch Unimplemented arch_opcode is : %d\n", ++ instr->arch_opcode()); ++ TRACE_UNIMPL(); ++ UNIMPLEMENTED(); ++ } ++} ++ ++void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) { ++ La64OperandConverter i(this, instr); ++ Register input = i.InputRegister(0); ++ std::vector> cases; ++ for (size_t index = 2; index < instr->InputCount(); index += 2) { ++ cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))}); ++ } ++ AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(), ++ cases.data() + cases.size()); ++} ++ ++void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) { ++ La64OperandConverter i(this, instr); ++ Register input = i.InputRegister(0); ++ size_t const case_count = instr->InputCount() - 2; ++ ++ __ Branch(GetLabel(i.InputRpo(1)), hs, input, Operand(case_count)); ++ __ GenerateSwitchTable(input, case_count, [&i, this](size_t index) { ++ return GetLabel(i.InputRpo(index + 2)); ++ }); ++} ++ ++void CodeGenerator::FinishFrame(Frame* frame) { ++ auto call_descriptor = linkage()->GetIncomingDescriptor(); ++ ++ const RegList saves_fpu = call_descriptor->CalleeSavedFPRegisters(); ++ if (saves_fpu != 0) { ++ int count = base::bits::CountPopulation(saves_fpu); ++ DCHECK_EQ(kNumCalleeSavedFPU, count); ++ frame->AllocateSavedCalleeRegisterSlots(count * ++ (kDoubleSize / kSystemPointerSize)); ++ } ++ ++ const RegList saves = call_descriptor->CalleeSavedRegisters(); ++ if (saves != 0) { ++ int count = base::bits::CountPopulation(saves); ++ DCHECK_EQ(kNumCalleeSaved, count + 1); ++ frame->AllocateSavedCalleeRegisterSlots(count); ++ } ++} ++ ++void CodeGenerator::AssembleConstructFrame() { ++ auto call_descriptor = linkage()->GetIncomingDescriptor(); ++ ++ if (frame_access_state()->has_frame()) { ++ if (call_descriptor->IsCFunctionCall()) { ++ if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) { ++ __ StubPrologue(StackFrame::C_WASM_ENTRY); ++ // Reserve stack space for saving the c_entry_fp later. ++ __ Sub_d(sp, sp, Operand(kSystemPointerSize)); ++ } else { ++ __ Push(ra, fp); ++ __ mov(fp, sp); ++ } ++ } else if (call_descriptor->IsJSFunctionCall()) { ++ __ Prologue(); ++ if (call_descriptor->PushArgumentCount()) { ++ __ Push(kJavaScriptCallArgCountRegister); ++ } ++ } else { ++ __ StubPrologue(info()->GetOutputStackFrameType()); ++ if (call_descriptor->IsWasmFunctionCall()) { ++ __ Push(kWasmInstanceRegister); ++ } else if (call_descriptor->IsWasmImportWrapper() || ++ call_descriptor->IsWasmCapiFunction()) { ++ // Wasm import wrappers are passed a tuple in the place of the instance. ++ // Unpack the tuple into the instance and the target callable. ++ // This must be done here in the codegen because it cannot be expressed ++ // properly in the graph. ++ __ Ld_d(kJSFunctionRegister, ++ FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue2Offset)); ++ __ Ld_d(kWasmInstanceRegister, ++ FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue1Offset)); ++ __ Push(kWasmInstanceRegister); ++ if (call_descriptor->IsWasmCapiFunction()) { ++ // Reserve space for saving the PC later. ++ __ Sub_d(sp, sp, Operand(kSystemPointerSize)); ++ } ++ } ++ } ++ } ++ ++ int required_slots = ++ frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount(); ++ ++ if (info()->is_osr()) { ++ // TurboFan OSR-compiled functions cannot be entered directly. ++ __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction); ++ ++ // Unoptimized code jumps directly to this entrypoint while the unoptimized ++ // frame is still on the stack. Optimized code uses OSR values directly from ++ // the unoptimized frame. Thus, all that needs to be done is to allocate the ++ // remaining stack slots. ++ if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --"); ++ osr_pc_offset_ = __ pc_offset(); ++ required_slots -= osr_helper()->UnoptimizedFrameSlots(); ++ ResetSpeculationPoison(); ++ } ++ ++ const RegList saves = call_descriptor->CalleeSavedRegisters(); ++ const RegList saves_fpu = call_descriptor->CalleeSavedFPRegisters(); ++ ++ if (required_slots > 0) { ++ DCHECK(frame_access_state()->has_frame()); ++ if (info()->IsWasm() && required_slots > 128) { ++ // For WebAssembly functions with big frames we have to do the stack ++ // overflow check before we construct the frame. Otherwise we may not ++ // have enough space on the stack to call the runtime for the stack ++ // overflow. ++ Label done; ++ ++ // If the frame is bigger than the stack, we throw the stack overflow ++ // exception unconditionally. Thereby we can avoid the integer overflow ++ // check in the condition code. ++ if ((required_slots * kSystemPointerSize) < (FLAG_stack_size * 1024)) { ++ __ Ld_d( ++ kScratchReg, ++ FieldMemOperand(kWasmInstanceRegister, ++ WasmInstanceObject::kRealStackLimitAddressOffset)); ++ __ Ld_d(kScratchReg, MemOperand(kScratchReg, 0)); ++ __ Add_d(kScratchReg, kScratchReg, ++ Operand(required_slots * kSystemPointerSize)); ++ __ Branch(&done, uge, sp, Operand(kScratchReg)); ++ } ++ ++ __ Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL); ++ // We come from WebAssembly, there are no references for the GC. ++ ReferenceMap* reference_map = new (zone()) ReferenceMap(zone()); ++ RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt); ++ if (FLAG_debug_code) { ++ __ stop(); ++ } ++ ++ __ bind(&done); ++ } ++ } ++ ++ const int returns = frame()->GetReturnSlotCount(); ++ ++ // Skip callee-saved and return slots, which are pushed below. ++ required_slots -= base::bits::CountPopulation(saves); ++ required_slots -= base::bits::CountPopulation(saves_fpu); ++ required_slots -= returns; ++ if (required_slots > 0) { ++ __ Sub_d(sp, sp, Operand(required_slots * kSystemPointerSize)); ++ } ++ ++ if (saves_fpu != 0) { ++ // Save callee-saved FPU registers. ++ __ MultiPushFPU(saves_fpu); ++ DCHECK_EQ(kNumCalleeSavedFPU, base::bits::CountPopulation(saves_fpu)); ++ } ++ ++ if (saves != 0) { ++ // Save callee-saved registers. ++ __ MultiPush(saves); ++ DCHECK_EQ(kNumCalleeSaved, base::bits::CountPopulation(saves) + 1); ++ } ++ ++ if (returns != 0) { ++ // Create space for returns. ++ __ Sub_d(sp, sp, Operand(returns * kSystemPointerSize)); ++ } ++} ++ ++void CodeGenerator::AssembleReturn(InstructionOperand* pop) { ++ auto call_descriptor = linkage()->GetIncomingDescriptor(); ++ ++ const int returns = frame()->GetReturnSlotCount(); ++ if (returns != 0) { ++ __ Add_d(sp, sp, Operand(returns * kSystemPointerSize)); ++ } ++ ++ // Restore GP registers. ++ const RegList saves = call_descriptor->CalleeSavedRegisters(); ++ if (saves != 0) { ++ __ MultiPop(saves); ++ } ++ ++ // Restore FPU registers. ++ const RegList saves_fpu = call_descriptor->CalleeSavedFPRegisters(); ++ if (saves_fpu != 0) { ++ __ MultiPopFPU(saves_fpu); ++ } ++ ++ La64OperandConverter g(this, nullptr); ++ if (call_descriptor->IsCFunctionCall()) { ++ AssembleDeconstructFrame(); ++ } else if (frame_access_state()->has_frame()) { ++ // Canonicalize JSFunction return sites for now unless they have an variable ++ // number of stack slot pops. ++ if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) { ++ if (return_label_.is_bound()) { ++ __ Branch(&return_label_); ++ return; ++ } else { ++ __ bind(&return_label_); ++ AssembleDeconstructFrame(); ++ } ++ } else { ++ AssembleDeconstructFrame(); ++ } ++ } ++ int pop_count = static_cast(call_descriptor->StackParameterCount()); ++ if (pop->IsImmediate()) { ++ pop_count += g.ToConstant(pop).ToInt32(); ++ } else { ++ Register pop_reg = g.ToRegister(pop); ++ __ slli_d(pop_reg, pop_reg, kSystemPointerSizeLog2); ++ __ Add_d(sp, sp, pop_reg); ++ } ++ if (pop_count != 0) { ++ __ DropAndRet(pop_count); ++ } else { ++ __ Ret(); ++ } ++} ++ ++void CodeGenerator::FinishCode() {} ++ ++void CodeGenerator::PrepareForDeoptimizationExits(int deopt_count) {} ++ ++void CodeGenerator::AssembleMove(InstructionOperand* source, ++ InstructionOperand* destination) { ++ La64OperandConverter g(this, nullptr); ++ // Dispatch on the source and destination operand kinds. Not all ++ // combinations are possible. ++ if (source->IsRegister()) { ++ DCHECK(destination->IsRegister() || destination->IsStackSlot()); ++ Register src = g.ToRegister(source); ++ if (destination->IsRegister()) { ++ __ mov(g.ToRegister(destination), src); ++ } else { ++ __ St_d(src, g.ToMemOperand(destination)); ++ } ++ } else if (source->IsStackSlot()) { ++ DCHECK(destination->IsRegister() || destination->IsStackSlot()); ++ MemOperand src = g.ToMemOperand(source); ++ if (destination->IsRegister()) { ++ __ Ld_d(g.ToRegister(destination), src); ++ } else { ++ Register temp = kScratchReg; ++ __ Ld_d(temp, src); ++ __ St_d(temp, g.ToMemOperand(destination)); ++ } ++ } else if (source->IsConstant()) { ++ Constant src = g.ToConstant(source); ++ if (destination->IsRegister() || destination->IsStackSlot()) { ++ Register dst = ++ destination->IsRegister() ? g.ToRegister(destination) : kScratchReg; ++ switch (src.type()) { ++ case Constant::kInt32: ++ __ li(dst, Operand(src.ToInt32())); ++ break; ++ case Constant::kFloat32: ++ __ li(dst, Operand::EmbeddedNumber(src.ToFloat32())); ++ break; ++ case Constant::kInt64: ++ if (RelocInfo::IsWasmReference(src.rmode())) { ++ __ li(dst, Operand(src.ToInt64(), src.rmode())); ++ } else { ++ __ li(dst, Operand(src.ToInt64())); ++ } ++ break; ++ case Constant::kFloat64: ++ __ li(dst, Operand::EmbeddedNumber(src.ToFloat64().value())); ++ break; ++ case Constant::kExternalReference: ++ __ li(dst, src.ToExternalReference()); ++ break; ++ case Constant::kDelayedStringConstant: ++ __ li(dst, src.ToDelayedStringConstant()); ++ break; ++ case Constant::kHeapObject: { ++ Handle src_object = src.ToHeapObject(); ++ RootIndex index; ++ if (IsMaterializableFromRoot(src_object, &index)) { ++ __ LoadRoot(dst, index); ++ } else { ++ __ li(dst, src_object); ++ } ++ break; ++ } ++ case Constant::kCompressedHeapObject: ++ UNREACHABLE(); ++ case Constant::kRpoNumber: ++ UNREACHABLE(); // TODO(titzer): loading RPO numbers on LA64. ++ break; ++ } ++ if (destination->IsStackSlot()) __ St_d(dst, g.ToMemOperand(destination)); ++ } else if (src.type() == Constant::kFloat32) { ++ if (destination->IsFPStackSlot()) { ++ MemOperand dst = g.ToMemOperand(destination); ++ if (bit_cast(src.ToFloat32()) == 0) { ++ __ St_d(zero_reg, dst); ++ } else { ++ __ li(kScratchReg, Operand(bit_cast(src.ToFloat32()))); ++ __ St_d(kScratchReg, dst); ++ } ++ } else { ++ DCHECK(destination->IsFPRegister()); ++ FloatRegister dst = g.ToSingleRegister(destination); ++ __ Move(dst, src.ToFloat32()); ++ } ++ } else { ++ DCHECK_EQ(Constant::kFloat64, src.type()); ++ DoubleRegister dst = destination->IsFPRegister() ++ ? g.ToDoubleRegister(destination) ++ : kScratchDoubleReg; ++ __ Move(dst, src.ToFloat64().value()); ++ if (destination->IsFPStackSlot()) { ++ __ Fst_d(dst, g.ToMemOperand(destination)); ++ } ++ } ++ } else if (source->IsFPRegister()) { ++ FPURegister src = g.ToDoubleRegister(source); ++ if (destination->IsFPRegister()) { ++ FPURegister dst = g.ToDoubleRegister(destination); ++ __ Move(dst, src); ++ } else { ++ DCHECK(destination->IsFPStackSlot()); ++ __ Fst_d(src, g.ToMemOperand(destination)); ++ } ++ } else if (source->IsFPStackSlot()) { ++ DCHECK(destination->IsFPRegister() || destination->IsFPStackSlot()); ++ MemOperand src = g.ToMemOperand(source); ++ if (destination->IsFPRegister()) { ++ __ Fld_d(g.ToDoubleRegister(destination), src); ++ } else { ++ DCHECK(destination->IsFPStackSlot()); ++ FPURegister temp = kScratchDoubleReg; ++ __ Fld_d(temp, src); ++ __ Fst_d(temp, g.ToMemOperand(destination)); ++ } ++ } else { ++ UNREACHABLE(); ++ } ++} ++ ++void CodeGenerator::AssembleSwap(InstructionOperand* source, ++ InstructionOperand* destination) { ++ La64OperandConverter g(this, nullptr); ++ // Dispatch on the source and destination operand kinds. Not all ++ // combinations are possible. ++ if (source->IsRegister()) { ++ // Register-register. ++ Register temp = kScratchReg; ++ Register src = g.ToRegister(source); ++ if (destination->IsRegister()) { ++ Register dst = g.ToRegister(destination); ++ __ Move(temp, src); ++ __ Move(src, dst); ++ __ Move(dst, temp); ++ } else { ++ DCHECK(destination->IsStackSlot()); ++ MemOperand dst = g.ToMemOperand(destination); ++ __ mov(temp, src); ++ __ Ld_d(src, dst); ++ __ St_d(temp, dst); ++ } ++ } else if (source->IsStackSlot()) { ++ DCHECK(destination->IsStackSlot()); ++ Register temp_0 = kScratchReg; ++ Register temp_1 = kScratchReg2; ++ MemOperand src = g.ToMemOperand(source); ++ MemOperand dst = g.ToMemOperand(destination); ++ __ Ld_d(temp_0, src); ++ __ Ld_d(temp_1, dst); ++ __ St_d(temp_0, dst); ++ __ St_d(temp_1, src); ++ } else if (source->IsFPRegister()) { ++ FPURegister temp = kScratchDoubleReg; ++ FPURegister src = g.ToDoubleRegister(source); ++ if (destination->IsFPRegister()) { ++ FPURegister dst = g.ToDoubleRegister(destination); ++ __ Move(temp, src); ++ __ Move(src, dst); ++ __ Move(dst, temp); ++ } else { ++ DCHECK(destination->IsFPStackSlot()); ++ MemOperand dst = g.ToMemOperand(destination); ++ __ Move(temp, src); ++ __ Fld_d(src, dst); ++ __ Fst_d(temp, dst); ++ } ++ } else if (source->IsFPStackSlot()) { ++ DCHECK(destination->IsFPStackSlot()); ++ Register temp_0 = kScratchReg; ++ MemOperand src0 = g.ToMemOperand(source); ++ MemOperand src1(src0.base(), src0.offset() + kIntSize); ++ MemOperand dst0 = g.ToMemOperand(destination); ++ MemOperand dst1(dst0.base(), dst0.offset() + kIntSize); ++ FPURegister temp_1 = kScratchDoubleReg; ++ __ Fld_d(temp_1, dst0); // Save destination in temp_1. ++ __ Ld_w(temp_0, src0); // Then use temp_0 to copy source to destination. ++ __ St_w(temp_0, dst0); ++ __ Ld_w(temp_0, src1); ++ __ St_w(temp_0, dst1); ++ __ Fst_d(temp_1, src0); ++ } else { ++ // No other combinations are possible. ++ UNREACHABLE(); ++ } ++} ++ ++void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) { ++ // On 64-bit LA64 we emit the jump tables inline. ++ UNREACHABLE(); ++} ++ ++#undef ASSEMBLE_ATOMIC_LOAD_INTEGER ++#undef ASSEMBLE_ATOMIC_STORE_INTEGER ++#undef ASSEMBLE_ATOMIC_BINOP ++#undef ASSEMBLE_ATOMIC_BINOP_EXT ++#undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER ++#undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT ++#undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER ++#undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT ++#undef ASSEMBLE_IEEE754_BINOP ++#undef ASSEMBLE_IEEE754_UNOP ++ ++#undef TRACE_MSG ++#undef TRACE_UNIMPL ++#undef __ ++ ++} // namespace compiler ++} // namespace internal ++} // namespace v8 +diff --git a/src/3rdparty/chromium/v8/src/compiler/backend/la64/instruction-codes-la64.h b/src/3rdparty/chromium/v8/src/compiler/backend/la64/instruction-codes-la64.h +new file mode 100644 +index 00000000000..b8a2d97961a +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/compiler/backend/la64/instruction-codes-la64.h +@@ -0,0 +1,412 @@ ++// Copyright 2014 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#ifndef V8_COMPILER_BACKEND_LA64_INSTRUCTION_CODES_LA64_H_ ++#define V8_COMPILER_BACKEND_LA64_INSTRUCTION_CODES_LA64_H_ ++ ++namespace v8 { ++namespace internal { ++namespace compiler { ++ ++// LA64-specific opcodes that specify which assembly sequence to emit. ++// Most opcodes specify a single instruction. ++#define TARGET_ARCH_OPCODE_LIST(V) \ ++ V(La64Add) \ ++ V(La64Dadd) \ ++ V(La64DaddOvf) \ ++ V(La64Sub) \ ++ V(La64Dsub) \ ++ V(La64DsubOvf) \ ++ V(La64Mul) \ ++ V(La64MulOvf) \ ++ V(La64MulHigh) \ ++ V(La64DMulHigh) \ ++ V(La64MulHighU) \ ++ V(La64Dmul) \ ++ V(La64Div) \ ++ V(La64Ddiv) \ ++ V(La64DivU) \ ++ V(La64DdivU) \ ++ V(La64Mod) \ ++ V(La64Dmod) \ ++ V(La64ModU) \ ++ V(La64DmodU) \ ++ V(La64And) \ ++ V(La64And32) \ ++ V(La64Or) \ ++ V(La64Or32) \ ++ V(La64Nor) \ ++ V(La64Nor32) \ ++ V(La64Xor) \ ++ V(La64Xor32) \ ++ V(La64Clz) \ ++ V(La64Lsa) \ ++ V(La64Dlsa) \ ++ V(La64Shl) \ ++ V(La64Shr) \ ++ V(La64Sar) \ ++ V(La64Ext) \ ++ V(La64Ins) \ ++ V(La64Dext) \ ++ V(La64Dins) \ ++ V(La64Dclz) \ ++ V(La64Ctz) \ ++ V(La64Dctz) \ ++ V(La64Popcnt) \ ++ V(La64Dpopcnt) \ ++ V(La64Dshl) \ ++ V(La64Dshr) \ ++ V(La64Dsar) \ ++ V(La64Ror) \ ++ V(La64Dror) \ ++ V(La64Mov) \ ++ V(La64Tst) \ ++ V(La64Cmp) \ ++ V(La64CmpS) \ ++ V(La64AddS) \ ++ V(La64SubS) \ ++ V(La64MulS) \ ++ V(La64DivS) \ ++ V(La64ModS) \ ++ V(La64AbsS) \ ++ V(La64NegS) \ ++ V(La64SqrtS) \ ++ V(La64MaxS) \ ++ V(La64MinS) \ ++ V(La64CmpD) \ ++ V(La64AddD) \ ++ V(La64SubD) \ ++ V(La64MulD) \ ++ V(La64DivD) \ ++ V(La64ModD) \ ++ V(La64AbsD) \ ++ V(La64NegD) \ ++ V(La64SqrtD) \ ++ V(La64MaxD) \ ++ V(La64MinD) \ ++ V(La64Float64RoundDown) \ ++ V(La64Float64RoundTruncate) \ ++ V(La64Float64RoundUp) \ ++ V(La64Float64RoundTiesEven) \ ++ V(La64Float32RoundDown) \ ++ V(La64Float32RoundTruncate) \ ++ V(La64Float32RoundUp) \ ++ V(La64Float32RoundTiesEven) \ ++ V(La64CvtSD) \ ++ V(La64CvtDS) \ ++ V(La64TruncWD) \ ++ V(La64RoundWD) \ ++ V(La64FloorWD) \ ++ V(La64CeilWD) \ ++ V(La64TruncWS) \ ++ V(La64RoundWS) \ ++ V(La64FloorWS) \ ++ V(La64CeilWS) \ ++ V(La64TruncLS) \ ++ V(La64TruncLD) \ ++ V(La64TruncUwD) \ ++ V(La64TruncUwS) \ ++ V(La64TruncUlS) \ ++ V(La64TruncUlD) \ ++ V(La64CvtDW) \ ++ V(La64CvtSL) \ ++ V(La64CvtSW) \ ++ V(La64CvtSUw) \ ++ V(La64CvtSUl) \ ++ V(La64CvtDL) \ ++ V(La64CvtDUw) \ ++ V(La64CvtDUl) \ ++ V(La64Lb) \ ++ V(La64Lbu) \ ++ V(La64Sb) \ ++ V(La64Lh) \ ++ V(La64Ulh) \ ++ V(La64Lhu) \ ++ V(La64Ulhu) \ ++ V(La64Sh) \ ++ V(La64Ush) \ ++ V(La64Ld) \ ++ V(La64Uld) \ ++ V(La64Lw) \ ++ V(La64Ulw) \ ++ V(La64Lwu) \ ++ V(La64Ulwu) \ ++ V(La64Sw) \ ++ V(La64Usw) \ ++ V(La64Sd) \ ++ V(La64Usd) \ ++ V(La64Lwc1) \ ++ V(La64Ulwc1) \ ++ V(La64Swc1) \ ++ V(La64Uswc1) \ ++ V(La64Ldc1) \ ++ V(La64Uldc1) \ ++ V(La64Sdc1) \ ++ V(La64Usdc1) \ ++ V(La64BitcastDL) \ ++ V(La64BitcastLD) \ ++ V(La64Float64ExtractLowWord32) \ ++ V(La64Float64ExtractHighWord32) \ ++ V(La64Float64InsertLowWord32) \ ++ V(La64Float64InsertHighWord32) \ ++ V(La64Float32Max) \ ++ V(La64Float64Max) \ ++ V(La64Float32Min) \ ++ V(La64Float64Min) \ ++ V(La64Float64SilenceNaN) \ ++ V(La64Push) \ ++ V(La64Peek) \ ++ V(La64StoreToStackSlot) \ ++ V(La64ByteSwap64) \ ++ V(La64ByteSwap32) \ ++ V(La64StackClaim) \ ++ V(La64Seb) \ ++ V(La64Seh) \ ++ V(La64Sync) \ ++ V(La64AssertEqual) \ ++ V(La64S128Zero) \ ++ V(La64I32x4Splat) \ ++ V(La64I32x4ExtractLane) \ ++ V(La64I32x4ReplaceLane) \ ++ V(La64I32x4Add) \ ++ V(La64I32x4AddHoriz) \ ++ V(La64I32x4Sub) \ ++ V(La64F64x2Abs) \ ++ V(La64F64x2Neg) \ ++ V(La64F32x4Splat) \ ++ V(La64F32x4ExtractLane) \ ++ V(La64F32x4ReplaceLane) \ ++ V(La64F32x4SConvertI32x4) \ ++ V(La64F32x4UConvertI32x4) \ ++ V(La64I32x4Mul) \ ++ V(La64I32x4MaxS) \ ++ V(La64I32x4MinS) \ ++ V(La64I32x4Eq) \ ++ V(La64I32x4Ne) \ ++ V(La64I32x4Shl) \ ++ V(La64I32x4ShrS) \ ++ V(La64I32x4ShrU) \ ++ V(La64I32x4MaxU) \ ++ V(La64I32x4MinU) \ ++ V(La64F64x2Sqrt) \ ++ V(La64F64x2Add) \ ++ V(La64F64x2Sub) \ ++ V(La64F64x2Mul) \ ++ V(La64F64x2Div) \ ++ V(La64F64x2Min) \ ++ V(La64F64x2Max) \ ++ V(La64F64x2Eq) \ ++ V(La64F64x2Ne) \ ++ V(La64F64x2Lt) \ ++ V(La64F64x2Le) \ ++ V(La64F64x2Splat) \ ++ V(La64F64x2ExtractLane) \ ++ V(La64F64x2ReplaceLane) \ ++ V(La64I64x2Add) \ ++ V(La64I64x2Sub) \ ++ V(La64I64x2Mul) \ ++ V(La64I64x2Neg) \ ++ V(La64I64x2Shl) \ ++ V(La64I64x2ShrS) \ ++ V(La64I64x2ShrU) \ ++ V(La64F32x4Abs) \ ++ V(La64F32x4Neg) \ ++ V(La64F32x4Sqrt) \ ++ V(La64F32x4RecipApprox) \ ++ V(La64F32x4RecipSqrtApprox) \ ++ V(La64F32x4Add) \ ++ V(La64F32x4AddHoriz) \ ++ V(La64F32x4Sub) \ ++ V(La64F32x4Mul) \ ++ V(La64F32x4Div) \ ++ V(La64F32x4Max) \ ++ V(La64F32x4Min) \ ++ V(La64F32x4Eq) \ ++ V(La64F32x4Ne) \ ++ V(La64F32x4Lt) \ ++ V(La64F32x4Le) \ ++ V(La64I32x4SConvertF32x4) \ ++ V(La64I32x4UConvertF32x4) \ ++ V(La64I32x4Neg) \ ++ V(La64I32x4GtS) \ ++ V(La64I32x4GeS) \ ++ V(La64I32x4GtU) \ ++ V(La64I32x4GeU) \ ++ V(La64I32x4Abs) \ ++ V(La64I16x8Splat) \ ++ V(La64I16x8ExtractLaneU) \ ++ V(La64I16x8ExtractLaneS) \ ++ V(La64I16x8ReplaceLane) \ ++ V(La64I16x8Neg) \ ++ V(La64I16x8Shl) \ ++ V(La64I16x8ShrS) \ ++ V(La64I16x8ShrU) \ ++ V(La64I16x8Add) \ ++ V(La64I16x8AddSaturateS) \ ++ V(La64I16x8AddHoriz) \ ++ V(La64I16x8Sub) \ ++ V(La64I16x8SubSaturateS) \ ++ V(La64I16x8Mul) \ ++ V(La64I16x8MaxS) \ ++ V(La64I16x8MinS) \ ++ V(La64I16x8Eq) \ ++ V(La64I16x8Ne) \ ++ V(La64I16x8GtS) \ ++ V(La64I16x8GeS) \ ++ V(La64I16x8AddSaturateU) \ ++ V(La64I16x8SubSaturateU) \ ++ V(La64I16x8MaxU) \ ++ V(La64I16x8MinU) \ ++ V(La64I16x8GtU) \ ++ V(La64I16x8GeU) \ ++ V(La64I16x8RoundingAverageU) \ ++ V(La64I16x8Abs) \ ++ V(La64I8x16Splat) \ ++ V(La64I8x16ExtractLaneU) \ ++ V(La64I8x16ExtractLaneS) \ ++ V(La64I8x16ReplaceLane) \ ++ V(La64I8x16Neg) \ ++ V(La64I8x16Shl) \ ++ V(La64I8x16ShrS) \ ++ V(La64I8x16Add) \ ++ V(La64I8x16AddSaturateS) \ ++ V(La64I8x16Sub) \ ++ V(La64I8x16SubSaturateS) \ ++ V(La64I8x16Mul) \ ++ V(La64I8x16MaxS) \ ++ V(La64I8x16MinS) \ ++ V(La64I8x16Eq) \ ++ V(La64I8x16Ne) \ ++ V(La64I8x16GtS) \ ++ V(La64I8x16GeS) \ ++ V(La64I8x16ShrU) \ ++ V(La64I8x16AddSaturateU) \ ++ V(La64I8x16SubSaturateU) \ ++ V(La64I8x16MaxU) \ ++ V(La64I8x16MinU) \ ++ V(La64I8x16GtU) \ ++ V(La64I8x16GeU) \ ++ V(La64I8x16RoundingAverageU) \ ++ V(La64I8x16Abs) \ ++ V(La64S128And) \ ++ V(La64S128Or) \ ++ V(La64S128Xor) \ ++ V(La64S128Not) \ ++ V(La64S128Select) \ ++ V(La64S128AndNot) \ ++ V(La64S1x4AnyTrue) \ ++ V(La64S1x4AllTrue) \ ++ V(La64S1x8AnyTrue) \ ++ V(La64S1x8AllTrue) \ ++ V(La64S1x16AnyTrue) \ ++ V(La64S1x16AllTrue) \ ++ V(La64S32x4InterleaveRight) \ ++ V(La64S32x4InterleaveLeft) \ ++ V(La64S32x4PackEven) \ ++ V(La64S32x4PackOdd) \ ++ V(La64S32x4InterleaveEven) \ ++ V(La64S32x4InterleaveOdd) \ ++ V(La64S32x4Shuffle) \ ++ V(La64S16x8InterleaveRight) \ ++ V(La64S16x8InterleaveLeft) \ ++ V(La64S16x8PackEven) \ ++ V(La64S16x8PackOdd) \ ++ V(La64S16x8InterleaveEven) \ ++ V(La64S16x8InterleaveOdd) \ ++ V(La64S16x4Reverse) \ ++ V(La64S16x2Reverse) \ ++ V(La64S8x16InterleaveRight) \ ++ V(La64S8x16InterleaveLeft) \ ++ V(La64S8x16PackEven) \ ++ V(La64S8x16PackOdd) \ ++ V(La64S8x16InterleaveEven) \ ++ V(La64S8x16InterleaveOdd) \ ++ V(La64S8x16Shuffle) \ ++ V(La64S8x16Swizzle) \ ++ V(La64S8x16Concat) \ ++ V(La64S8x8Reverse) \ ++ V(La64S8x4Reverse) \ ++ V(La64S8x2Reverse) \ ++ V(La64S8x16LoadSplat) \ ++ V(La64S16x8LoadSplat) \ ++ V(La64S32x4LoadSplat) \ ++ V(La64S64x2LoadSplat) \ ++ V(La64I16x8Load8x8S) \ ++ V(La64I16x8Load8x8U) \ ++ V(La64I32x4Load16x4S) \ ++ V(La64I32x4Load16x4U) \ ++ V(La64I64x2Load32x2S) \ ++ V(La64I64x2Load32x2U) \ ++ V(La64I32x4SConvertI16x8Low) \ ++ V(La64I32x4SConvertI16x8High) \ ++ V(La64I32x4UConvertI16x8Low) \ ++ V(La64I32x4UConvertI16x8High) \ ++ V(La64I16x8SConvertI8x16Low) \ ++ V(La64I16x8SConvertI8x16High) \ ++ V(La64I16x8SConvertI32x4) \ ++ V(La64I16x8UConvertI32x4) \ ++ V(La64I16x8UConvertI8x16Low) \ ++ V(La64I16x8UConvertI8x16High) \ ++ V(La64I8x16SConvertI16x8) \ ++ V(La64I8x16UConvertI16x8) \ ++ V(La64Word64AtomicLoadUint8) \ ++ V(La64Word64AtomicLoadUint16) \ ++ V(La64Word64AtomicLoadUint32) \ ++ V(La64Word64AtomicLoadUint64) \ ++ V(La64Word64AtomicStoreWord8) \ ++ V(La64Word64AtomicStoreWord16) \ ++ V(La64Word64AtomicStoreWord32) \ ++ V(La64Word64AtomicStoreWord64) \ ++ V(La64Word64AtomicAddUint8) \ ++ V(La64Word64AtomicAddUint16) \ ++ V(La64Word64AtomicAddUint32) \ ++ V(La64Word64AtomicAddUint64) \ ++ V(La64Word64AtomicSubUint8) \ ++ V(La64Word64AtomicSubUint16) \ ++ V(La64Word64AtomicSubUint32) \ ++ V(La64Word64AtomicSubUint64) \ ++ V(La64Word64AtomicAndUint8) \ ++ V(La64Word64AtomicAndUint16) \ ++ V(La64Word64AtomicAndUint32) \ ++ V(La64Word64AtomicAndUint64) \ ++ V(La64Word64AtomicOrUint8) \ ++ V(La64Word64AtomicOrUint16) \ ++ V(La64Word64AtomicOrUint32) \ ++ V(La64Word64AtomicOrUint64) \ ++ V(La64Word64AtomicXorUint8) \ ++ V(La64Word64AtomicXorUint16) \ ++ V(La64Word64AtomicXorUint32) \ ++ V(La64Word64AtomicXorUint64) \ ++ V(La64Word64AtomicExchangeUint8) \ ++ V(La64Word64AtomicExchangeUint16) \ ++ V(La64Word64AtomicExchangeUint32) \ ++ V(La64Word64AtomicExchangeUint64) \ ++ V(La64Word64AtomicCompareExchangeUint8) \ ++ V(La64Word64AtomicCompareExchangeUint16) \ ++ V(La64Word64AtomicCompareExchangeUint32) \ ++ V(La64Word64AtomicCompareExchangeUint64) ++ ++// Addressing modes represent the "shape" of inputs to an instruction. ++// Many instructions support multiple addressing modes. Addressing modes ++// are encoded into the InstructionCode of the instruction and tell the ++// code generator after register allocation which assembler method to call. ++// ++// We use the following local notation for addressing modes: ++// ++// R = register ++// O = register or stack slot ++// D = double register ++// I = immediate (handle, external, int32) ++// MRI = [register + immediate] ++// MRR = [register + register] ++// TODO(plind): Add the new r6 address modes. ++#define TARGET_ADDRESSING_MODE_LIST(V) \ ++ V(MRI) /* [%r0 + K] */ \ ++ V(MRR) /* [%r0 + %r1] */ ++ ++} // namespace compiler ++} // namespace internal ++} // namespace v8 ++ ++#endif // V8_COMPILER_BACKEND_LA64_INSTRUCTION_CODES_LA64_H_ +diff --git a/src/3rdparty/chromium/v8/src/compiler/backend/la64/instruction-scheduler-la64.cc b/src/3rdparty/chromium/v8/src/compiler/backend/la64/instruction-scheduler-la64.cc +new file mode 100644 +index 00000000000..a1a5a771d32 +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/compiler/backend/la64/instruction-scheduler-la64.cc +@@ -0,0 +1,1534 @@ ++// Copyright 2015 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#include "src/codegen/macro-assembler.h" ++#include "src/compiler/backend/instruction-scheduler.h" ++ ++namespace v8 { ++namespace internal { ++namespace compiler { ++ ++bool InstructionScheduler::SchedulerSupported() { return true; } ++ ++int InstructionScheduler::GetTargetInstructionFlags( ++ const Instruction* instr) const { ++ switch (instr->arch_opcode()) { ++ case kLa64AbsD: ++ case kLa64AbsS: ++ case kLa64Add: ++ case kLa64AddD: ++ case kLa64AddS: ++ case kLa64And: ++ case kLa64And32: ++ case kLa64AssertEqual: ++ case kLa64BitcastDL: ++ case kLa64BitcastLD: ++ case kLa64ByteSwap32: ++ case kLa64ByteSwap64: ++ case kLa64CeilWD: ++ case kLa64CeilWS: ++ case kLa64Clz: ++ case kLa64Cmp: ++ case kLa64CmpD: ++ case kLa64CmpS: ++ case kLa64Ctz: ++ case kLa64CvtDL: ++ case kLa64CvtDS: ++ case kLa64CvtDUl: ++ case kLa64CvtDUw: ++ case kLa64CvtDW: ++ case kLa64CvtSD: ++ case kLa64CvtSL: ++ case kLa64CvtSUl: ++ case kLa64CvtSUw: ++ case kLa64CvtSW: ++ case kLa64DMulHigh: ++ case kLa64MulHighU: ++ case kLa64Dadd: ++ case kLa64DaddOvf: ++ case kLa64Dclz: ++ case kLa64Dctz: ++ case kLa64Ddiv: ++ case kLa64DdivU: ++ case kLa64Dext: ++ case kLa64Dins: ++ case kLa64Div: ++ case kLa64DivD: ++ case kLa64DivS: ++ case kLa64DivU: ++ case kLa64Dlsa: ++ case kLa64Dmod: ++ case kLa64DmodU: ++ case kLa64Dmul: ++ case kLa64Dpopcnt: ++ case kLa64Dror: ++ case kLa64Dsar: ++ case kLa64Dshl: ++ case kLa64Dshr: ++ case kLa64Dsub: ++ case kLa64DsubOvf: ++ case kLa64Ext: ++ case kLa64F64x2Abs: ++ case kLa64F64x2Neg: ++ case kLa64F64x2Sqrt: ++ case kLa64F64x2Add: ++ case kLa64F64x2Sub: ++ case kLa64F64x2Mul: ++ case kLa64F64x2Div: ++ case kLa64F64x2Min: ++ case kLa64F64x2Max: ++ case kLa64F64x2Eq: ++ case kLa64F64x2Ne: ++ case kLa64F64x2Lt: ++ case kLa64F64x2Le: ++ case kLa64I64x2Add: ++ case kLa64I64x2Sub: ++ case kLa64I64x2Mul: ++ case kLa64I64x2Neg: ++ case kLa64I64x2Shl: ++ case kLa64I64x2ShrS: ++ case kLa64I64x2ShrU: ++ case kLa64F32x4Abs: ++ case kLa64F32x4Add: ++ case kLa64F32x4AddHoriz: ++ case kLa64F32x4Eq: ++ case kLa64F32x4ExtractLane: ++ case kLa64F32x4Lt: ++ case kLa64F32x4Le: ++ case kLa64F32x4Max: ++ case kLa64F32x4Min: ++ case kLa64F32x4Mul: ++ case kLa64F32x4Div: ++ case kLa64F32x4Ne: ++ case kLa64F32x4Neg: ++ case kLa64F32x4Sqrt: ++ case kLa64F32x4RecipApprox: ++ case kLa64F32x4RecipSqrtApprox: ++ case kLa64F32x4ReplaceLane: ++ case kLa64F32x4SConvertI32x4: ++ case kLa64F32x4Splat: ++ case kLa64F32x4Sub: ++ case kLa64F32x4UConvertI32x4: ++ case kLa64F64x2Splat: ++ case kLa64F64x2ExtractLane: ++ case kLa64F64x2ReplaceLane: ++ case kLa64Float32Max: ++ case kLa64Float32Min: ++ case kLa64Float32RoundDown: ++ case kLa64Float32RoundTiesEven: ++ case kLa64Float32RoundTruncate: ++ case kLa64Float32RoundUp: ++ case kLa64Float64ExtractLowWord32: ++ case kLa64Float64ExtractHighWord32: ++ case kLa64Float64InsertLowWord32: ++ case kLa64Float64InsertHighWord32: ++ case kLa64Float64Max: ++ case kLa64Float64Min: ++ case kLa64Float64RoundDown: ++ case kLa64Float64RoundTiesEven: ++ case kLa64Float64RoundTruncate: ++ case kLa64Float64RoundUp: ++ case kLa64Float64SilenceNaN: ++ case kLa64FloorWD: ++ case kLa64FloorWS: ++ case kLa64I16x8Add: ++ case kLa64I16x8AddHoriz: ++ case kLa64I16x8AddSaturateS: ++ case kLa64I16x8AddSaturateU: ++ case kLa64I16x8Eq: ++ case kLa64I16x8ExtractLaneU: ++ case kLa64I16x8ExtractLaneS: ++ case kLa64I16x8GeS: ++ case kLa64I16x8GeU: ++ case kLa64I16x8GtS: ++ case kLa64I16x8GtU: ++ case kLa64I16x8MaxS: ++ case kLa64I16x8MaxU: ++ case kLa64I16x8MinS: ++ case kLa64I16x8MinU: ++ case kLa64I16x8Mul: ++ case kLa64I16x8Ne: ++ case kLa64I16x8Neg: ++ case kLa64I16x8ReplaceLane: ++ case kLa64I8x16SConvertI16x8: ++ case kLa64I16x8SConvertI32x4: ++ case kLa64I16x8SConvertI8x16High: ++ case kLa64I16x8SConvertI8x16Low: ++ case kLa64I16x8Shl: ++ case kLa64I16x8ShrS: ++ case kLa64I16x8ShrU: ++ case kLa64I16x8Splat: ++ case kLa64I16x8Sub: ++ case kLa64I16x8SubSaturateS: ++ case kLa64I16x8SubSaturateU: ++ case kLa64I8x16UConvertI16x8: ++ case kLa64I16x8UConvertI32x4: ++ case kLa64I16x8UConvertI8x16High: ++ case kLa64I16x8UConvertI8x16Low: ++ case kLa64I16x8RoundingAverageU: ++ case kLa64I16x8Abs: ++ case kLa64I32x4Add: ++ case kLa64I32x4AddHoriz: ++ case kLa64I32x4Eq: ++ case kLa64I32x4ExtractLane: ++ case kLa64I32x4GeS: ++ case kLa64I32x4GeU: ++ case kLa64I32x4GtS: ++ case kLa64I32x4GtU: ++ case kLa64I32x4MaxS: ++ case kLa64I32x4MaxU: ++ case kLa64I32x4MinS: ++ case kLa64I32x4MinU: ++ case kLa64I32x4Mul: ++ case kLa64I32x4Ne: ++ case kLa64I32x4Neg: ++ case kLa64I32x4ReplaceLane: ++ case kLa64I32x4SConvertF32x4: ++ case kLa64I32x4SConvertI16x8High: ++ case kLa64I32x4SConvertI16x8Low: ++ case kLa64I32x4Shl: ++ case kLa64I32x4ShrS: ++ case kLa64I32x4ShrU: ++ case kLa64I32x4Splat: ++ case kLa64I32x4Sub: ++ case kLa64I32x4UConvertF32x4: ++ case kLa64I32x4UConvertI16x8High: ++ case kLa64I32x4UConvertI16x8Low: ++ case kLa64I32x4Abs: ++ case kLa64I8x16Add: ++ case kLa64I8x16AddSaturateS: ++ case kLa64I8x16AddSaturateU: ++ case kLa64I8x16Eq: ++ case kLa64I8x16ExtractLaneU: ++ case kLa64I8x16ExtractLaneS: ++ case kLa64I8x16GeS: ++ case kLa64I8x16GeU: ++ case kLa64I8x16GtS: ++ case kLa64I8x16GtU: ++ case kLa64I8x16MaxS: ++ case kLa64I8x16MaxU: ++ case kLa64I8x16MinS: ++ case kLa64I8x16MinU: ++ case kLa64I8x16Mul: ++ case kLa64I8x16Ne: ++ case kLa64I8x16Neg: ++ case kLa64I8x16ReplaceLane: ++ case kLa64I8x16Shl: ++ case kLa64I8x16ShrS: ++ case kLa64I8x16ShrU: ++ case kLa64I8x16Splat: ++ case kLa64I8x16Sub: ++ case kLa64I8x16SubSaturateS: ++ case kLa64I8x16SubSaturateU: ++ case kLa64I8x16RoundingAverageU: ++ case kLa64I8x16Abs: ++ case kLa64Ins: ++ case kLa64Lsa: ++ case kLa64MaxD: ++ case kLa64MaxS: ++ case kLa64MinD: ++ case kLa64MinS: ++ case kLa64Mod: ++ case kLa64ModU: ++ case kLa64Mov: ++ case kLa64Mul: ++ case kLa64MulD: ++ case kLa64MulHigh: ++ case kLa64MulOvf: ++ case kLa64MulS: ++ case kLa64NegD: ++ case kLa64NegS: ++ case kLa64Nor: ++ case kLa64Nor32: ++ case kLa64Or: ++ case kLa64Or32: ++ case kLa64Popcnt: ++ case kLa64Ror: ++ case kLa64RoundWD: ++ case kLa64RoundWS: ++ case kLa64S128And: ++ case kLa64S128Or: ++ case kLa64S128Not: ++ case kLa64S128Select: ++ case kLa64S128AndNot: ++ case kLa64S128Xor: ++ case kLa64S128Zero: ++ case kLa64S16x8InterleaveEven: ++ case kLa64S16x8InterleaveOdd: ++ case kLa64S16x8InterleaveLeft: ++ case kLa64S16x8InterleaveRight: ++ case kLa64S16x8PackEven: ++ case kLa64S16x8PackOdd: ++ case kLa64S16x2Reverse: ++ case kLa64S16x4Reverse: ++ case kLa64S1x16AllTrue: ++ case kLa64S1x16AnyTrue: ++ case kLa64S1x4AllTrue: ++ case kLa64S1x4AnyTrue: ++ case kLa64S1x8AllTrue: ++ case kLa64S1x8AnyTrue: ++ case kLa64S32x4InterleaveEven: ++ case kLa64S32x4InterleaveOdd: ++ case kLa64S32x4InterleaveLeft: ++ case kLa64S32x4InterleaveRight: ++ case kLa64S32x4PackEven: ++ case kLa64S32x4PackOdd: ++ case kLa64S32x4Shuffle: ++ case kLa64S8x16Concat: ++ case kLa64S8x16InterleaveEven: ++ case kLa64S8x16InterleaveOdd: ++ case kLa64S8x16InterleaveLeft: ++ case kLa64S8x16InterleaveRight: ++ case kLa64S8x16PackEven: ++ case kLa64S8x16PackOdd: ++ case kLa64S8x2Reverse: ++ case kLa64S8x4Reverse: ++ case kLa64S8x8Reverse: ++ case kLa64S8x16Shuffle: ++ case kLa64S8x16Swizzle: ++ case kLa64Sar: ++ case kLa64Seb: ++ case kLa64Seh: ++ case kLa64Shl: ++ case kLa64Shr: ++ case kLa64SqrtD: ++ case kLa64SqrtS: ++ case kLa64Sub: ++ case kLa64SubD: ++ case kLa64SubS: ++ case kLa64TruncLD: ++ case kLa64TruncLS: ++ case kLa64TruncUlD: ++ case kLa64TruncUlS: ++ case kLa64TruncUwD: ++ case kLa64TruncUwS: ++ case kLa64TruncWD: ++ case kLa64TruncWS: ++ case kLa64Tst: ++ case kLa64Xor: ++ case kLa64Xor32: ++ return kNoOpcodeFlags; ++ ++ case kLa64Lb: ++ case kLa64Lbu: ++ case kLa64Ld: ++ case kLa64Ldc1: ++ case kLa64Lh: ++ case kLa64Lhu: ++ case kLa64Lw: ++ case kLa64Lwc1: ++ case kLa64Lwu: ++ case kLa64Peek: ++ case kLa64Uld: ++ case kLa64Uldc1: ++ case kLa64Ulh: ++ case kLa64Ulhu: ++ case kLa64Ulw: ++ case kLa64Ulwu: ++ case kLa64Ulwc1: ++ case kLa64S8x16LoadSplat: ++ case kLa64S16x8LoadSplat: ++ case kLa64S32x4LoadSplat: ++ case kLa64S64x2LoadSplat: ++ case kLa64I16x8Load8x8S: ++ case kLa64I16x8Load8x8U: ++ case kLa64I32x4Load16x4S: ++ case kLa64I32x4Load16x4U: ++ case kLa64I64x2Load32x2S: ++ case kLa64I64x2Load32x2U: ++ case kLa64Word64AtomicLoadUint8: ++ case kLa64Word64AtomicLoadUint16: ++ case kLa64Word64AtomicLoadUint32: ++ case kLa64Word64AtomicLoadUint64: ++ ++ return kIsLoadOperation; ++ ++ case kLa64ModD: ++ case kLa64ModS: ++ case kLa64Push: ++ case kLa64Sb: ++ case kLa64Sd: ++ case kLa64Sdc1: ++ case kLa64Sh: ++ case kLa64StackClaim: ++ case kLa64StoreToStackSlot: ++ case kLa64Sw: ++ case kLa64Swc1: ++ case kLa64Usd: ++ case kLa64Usdc1: ++ case kLa64Ush: ++ case kLa64Usw: ++ case kLa64Uswc1: ++ case kLa64Sync: ++ case kLa64Word64AtomicStoreWord8: ++ case kLa64Word64AtomicStoreWord16: ++ case kLa64Word64AtomicStoreWord32: ++ case kLa64Word64AtomicStoreWord64: ++ case kLa64Word64AtomicAddUint8: ++ case kLa64Word64AtomicAddUint16: ++ case kLa64Word64AtomicAddUint32: ++ case kLa64Word64AtomicAddUint64: ++ case kLa64Word64AtomicSubUint8: ++ case kLa64Word64AtomicSubUint16: ++ case kLa64Word64AtomicSubUint32: ++ case kLa64Word64AtomicSubUint64: ++ case kLa64Word64AtomicAndUint8: ++ case kLa64Word64AtomicAndUint16: ++ case kLa64Word64AtomicAndUint32: ++ case kLa64Word64AtomicAndUint64: ++ case kLa64Word64AtomicOrUint8: ++ case kLa64Word64AtomicOrUint16: ++ case kLa64Word64AtomicOrUint32: ++ case kLa64Word64AtomicOrUint64: ++ case kLa64Word64AtomicXorUint8: ++ case kLa64Word64AtomicXorUint16: ++ case kLa64Word64AtomicXorUint32: ++ case kLa64Word64AtomicXorUint64: ++ case kLa64Word64AtomicExchangeUint8: ++ case kLa64Word64AtomicExchangeUint16: ++ case kLa64Word64AtomicExchangeUint32: ++ case kLa64Word64AtomicExchangeUint64: ++ case kLa64Word64AtomicCompareExchangeUint8: ++ case kLa64Word64AtomicCompareExchangeUint16: ++ case kLa64Word64AtomicCompareExchangeUint32: ++ case kLa64Word64AtomicCompareExchangeUint64: ++ return kHasSideEffect; ++ ++#define CASE(Name) case k##Name: ++ COMMON_ARCH_OPCODE_LIST(CASE) ++#undef CASE ++ // Already covered in architecture independent code. ++ UNREACHABLE(); ++ } ++ ++ UNREACHABLE(); ++} ++ ++enum Latency { ++ BRANCH = 4, // Estimated max. ++ RINT_S = 4, // Estimated. ++ RINT_D = 4, // Estimated. ++ ++ MULT = 4, ++ MULTU = 4, ++ DMULT = 4, ++ DMULTU = 4, ++ ++ MUL = 7, ++ DMUL = 7, ++ MUH = 7, ++ MUHU = 7, ++ DMUH = 7, ++ DMUHU = 7, ++ ++ DIV = 50, // Min:11 Max:50 ++ DDIV = 50, ++ DIVU = 50, ++ DDIVU = 50, ++ ++ ABS_S = 4, ++ ABS_D = 4, ++ NEG_S = 4, ++ NEG_D = 4, ++ ADD_S = 4, ++ ADD_D = 4, ++ SUB_S = 4, ++ SUB_D = 4, ++ MAX_S = 4, // Estimated. ++ MIN_S = 4, ++ MAX_D = 4, // Estimated. ++ MIN_D = 4, ++ C_cond_S = 4, ++ C_cond_D = 4, ++ MUL_S = 4, ++ ++ MADD_S = 4, ++ MSUB_S = 4, ++ NMADD_S = 4, ++ NMSUB_S = 4, ++ ++ CABS_cond_S = 4, ++ CABS_cond_D = 4, ++ ++ CVT_D_S = 4, ++ CVT_PS_PW = 4, ++ ++ CVT_S_W = 4, ++ CVT_S_L = 4, ++ CVT_D_W = 4, ++ CVT_D_L = 4, ++ ++ CVT_S_D = 4, ++ ++ CVT_W_S = 4, ++ CVT_W_D = 4, ++ CVT_L_S = 4, ++ CVT_L_D = 4, ++ ++ CEIL_W_S = 4, ++ CEIL_W_D = 4, ++ CEIL_L_S = 4, ++ CEIL_L_D = 4, ++ ++ FLOOR_W_S = 4, ++ FLOOR_W_D = 4, ++ FLOOR_L_S = 4, ++ FLOOR_L_D = 4, ++ ++ ROUND_W_S = 4, ++ ROUND_W_D = 4, ++ ROUND_L_S = 4, ++ ROUND_L_D = 4, ++ ++ TRUNC_W_S = 4, ++ TRUNC_W_D = 4, ++ TRUNC_L_S = 4, ++ TRUNC_L_D = 4, ++ ++ MOV_S = 4, ++ MOV_D = 4, ++ ++ MOVF_S = 4, ++ MOVF_D = 4, ++ ++ MOVN_S = 4, ++ MOVN_D = 4, ++ ++ MOVT_S = 4, ++ MOVT_D = 4, ++ ++ MOVZ_S = 4, ++ MOVZ_D = 4, ++ ++ MUL_D = 5, ++ MADD_D = 5, ++ MSUB_D = 5, ++ NMADD_D = 5, ++ NMSUB_D = 5, ++ ++ RECIP_S = 13, ++ RECIP_D = 26, ++ ++ RSQRT_S = 17, ++ RSQRT_D = 36, ++ ++ DIV_S = 17, ++ SQRT_S = 17, ++ ++ DIV_D = 32, ++ SQRT_D = 32, ++ ++ MTC1 = 4, ++ MTHC1 = 4, ++ DMTC1 = 4, ++ LWC1 = 4, ++ LDC1 = 4, ++ ++ MFC1 = 1, ++ MFHC1 = 1, ++ DMFC1 = 1, ++ MFHI = 1, ++ MFLO = 1, ++ SWC1 = 1, ++ SDC1 = 1, ++}; ++ ++int DadduLatency(bool is_operand_register = true) { ++ if (is_operand_register) { ++ return 1; ++ } else { ++ return 2; // Estimated max. ++ } ++} ++ ++int DsubuLatency(bool is_operand_register = true) { ++ return DadduLatency(is_operand_register); ++} ++ ++int AndLatency(bool is_operand_register = true) { ++ return DadduLatency(is_operand_register); ++} ++ ++int OrLatency(bool is_operand_register = true) { ++ return DadduLatency(is_operand_register); ++} ++ ++int NorLatency(bool is_operand_register = true) { ++ if (is_operand_register) { ++ return 1; ++ } else { ++ return 2; // Estimated max. ++ } ++} ++ ++int XorLatency(bool is_operand_register = true) { ++ return DadduLatency(is_operand_register); ++} ++ ++int MulLatency(bool is_operand_register = true) { ++ if (is_operand_register) { ++ return Latency::MUL; ++ } else { ++ return Latency::MUL + 1; ++ } ++} ++ ++int DmulLatency(bool is_operand_register = true) { ++ int latency = 0; ++ latency = Latency::DMUL; ++ if (!is_operand_register) { ++ latency += 1; ++ } ++ return latency; ++} ++ ++int MulhLatency(bool is_operand_register = true) { ++ int latency = 0; ++ latency = Latency::MUH; ++ if (!is_operand_register) { ++ latency += 1; ++ } ++ return latency; ++} ++ ++int MulhuLatency(bool is_operand_register = true) { ++ int latency = 0; ++ latency = Latency::MUH; ++ if (!is_operand_register) { ++ latency += 1; ++ } ++ return latency; ++} ++ ++int DMulhLatency(bool is_operand_register = true) { ++ int latency = 0; ++ latency = Latency::DMUH; ++ if (!is_operand_register) { ++ latency += 1; ++ } ++ return latency; ++} ++ ++int DivLatency(bool is_operand_register = true) { ++ if (is_operand_register) { ++ return Latency::DIV; ++ } else { ++ return Latency::DIV + 1; ++ } ++} ++ ++int DivuLatency(bool is_operand_register = true) { ++ if (is_operand_register) { ++ return Latency::DIVU; ++ } else { ++ return Latency::DIVU + 1; ++ } ++} ++ ++int DdivLatency(bool is_operand_register = true) { ++ int latency = 0; ++ latency = Latency::DDIV; ++ if (!is_operand_register) { ++ latency += 1; ++ } ++ return latency; ++} ++ ++int DdivuLatency(bool is_operand_register = true) { ++ int latency = 0; ++ latency = Latency::DDIVU; ++ if (!is_operand_register) { ++ latency += 1; ++ } ++ return latency; ++} ++ ++int ModLatency(bool is_operand_register = true) { ++ int latency = 0; ++ latency = 1; ++ if (!is_operand_register) { ++ latency += 1; ++ } ++ return latency; ++} ++ ++int ModuLatency(bool is_operand_register = true) { ++ int latency = 0; ++ latency = 1; ++ if (!is_operand_register) { ++ latency += 1; ++ } ++ return latency; ++} ++ ++int DmodLatency(bool is_operand_register = true) { ++ int latency = 0; ++ latency = 1; ++ if (!is_operand_register) { ++ latency += 1; ++ } ++ return latency; ++} ++ ++int DmoduLatency(bool is_operand_register = true) { ++ int latency = 0; ++ latency = 1; ++ if (!is_operand_register) { ++ latency += 1; ++ } ++ return latency; ++} ++ ++int MovzLatency() { return Latency::BRANCH + 1; } ++ ++int MovnLatency() { return Latency::BRANCH + 1; } ++ ++int DlsaLatency() { ++ // Estimated max. ++ return DadduLatency() + 1; ++} ++ ++int CallLatency() { ++ // Estimated. ++ return DadduLatency(false) + Latency::BRANCH + 5; ++} ++ ++int JumpLatency() { ++ // Estimated max. ++ return 1 + DadduLatency() + Latency::BRANCH + 2; ++} ++ ++int SmiUntagLatency() { return 1; } ++ ++int PrepareForTailCallLatency() { ++ // Estimated max. ++ return 2 * (DlsaLatency() + DadduLatency(false)) + 2 + Latency::BRANCH + ++ Latency::BRANCH + 2 * DsubuLatency(false) + 2 + Latency::BRANCH + 1; ++} ++ ++int AssemblePopArgumentsAdoptFrameLatency() { ++ return 1 + Latency::BRANCH + 1 + SmiUntagLatency() + ++ PrepareForTailCallLatency(); ++} ++ ++int AssertLatency() { return 1; } ++ ++int PrepareCallCFunctionLatency() { ++ int frame_alignment = TurboAssembler::ActivationFrameAlignment(); ++ if (frame_alignment > kSystemPointerSize) { ++ return 1 + DsubuLatency(false) + AndLatency(false) + 1; ++ } else { ++ return DsubuLatency(false); ++ } ++} ++ ++int AdjustBaseAndOffsetLatency() { ++ return 3; // Estimated max. ++} ++ ++int AlignedMemoryLatency() { return AdjustBaseAndOffsetLatency() + 1; } ++ ++int UlhuLatency() { return AlignedMemoryLatency(); } ++ ++int UlwLatency() { return AlignedMemoryLatency(); } ++ ++int UlwuLatency() { return AlignedMemoryLatency(); } ++ ++int UldLatency() { return AlignedMemoryLatency(); } ++ ++int Ulwc1Latency() { return AlignedMemoryLatency(); } ++ ++int Uldc1Latency() { return AlignedMemoryLatency(); } ++ ++int UshLatency() { return AlignedMemoryLatency(); } ++ ++int UswLatency() { return AlignedMemoryLatency(); } ++ ++int UsdLatency() { return AlignedMemoryLatency(); } ++ ++int Uswc1Latency() { return AlignedMemoryLatency(); } ++ ++int Usdc1Latency() { return AlignedMemoryLatency(); } ++ ++int Lwc1Latency() { return AdjustBaseAndOffsetLatency() + Latency::LWC1; } ++ ++int Swc1Latency() { return AdjustBaseAndOffsetLatency() + Latency::SWC1; } ++ ++int Sdc1Latency() { return AdjustBaseAndOffsetLatency() + Latency::SDC1; } ++ ++int Ldc1Latency() { return AdjustBaseAndOffsetLatency() + Latency::LDC1; } ++ ++int MultiPushLatency() { ++ int latency = DsubuLatency(false); ++ for (int16_t i = kNumRegisters - 1; i >= 0; i--) { ++ latency++; ++ } ++ return latency; ++} ++ ++int MultiPushFPULatency() { ++ int latency = DsubuLatency(false); ++ for (int16_t i = kNumRegisters - 1; i >= 0; i--) { ++ latency += Sdc1Latency(); ++ } ++ return latency; ++} ++ ++int PushCallerSavedLatency(SaveFPRegsMode fp_mode) { ++ int latency = MultiPushLatency(); ++ if (fp_mode == kSaveFPRegs) { ++ latency += MultiPushFPULatency(); ++ } ++ return latency; ++} ++ ++int MultiPopLatency() { ++ int latency = DadduLatency(false); ++ for (int16_t i = 0; i < kNumRegisters; i++) { ++ latency++; ++ } ++ return latency; ++} ++ ++int MultiPopFPULatency() { ++ int latency = DadduLatency(false); ++ for (int16_t i = 0; i < kNumRegisters; i++) { ++ latency += Ldc1Latency(); ++ } ++ return latency; ++} ++ ++int PopCallerSavedLatency(SaveFPRegsMode fp_mode) { ++ int latency = MultiPopLatency(); ++ if (fp_mode == kSaveFPRegs) { ++ latency += MultiPopFPULatency(); ++ } ++ return latency; ++} ++ ++int CallCFunctionHelperLatency() { ++ // Estimated. ++ int latency = AndLatency(false) + Latency::BRANCH + 2 + CallLatency(); ++ if (base::OS::ActivationFrameAlignment() > kSystemPointerSize) { ++ latency++; ++ } else { ++ latency += DadduLatency(false); ++ } ++ return latency; ++} ++ ++int CallCFunctionLatency() { return 1 + CallCFunctionHelperLatency(); } ++ ++int AssembleArchJumpLatency() { ++ // Estimated max. ++ return Latency::BRANCH; ++} ++ ++int GenerateSwitchTableLatency() { ++ int latency = 0; ++ latency = DlsaLatency() + 2; ++ latency += 2; ++ return latency; ++} ++ ++int AssembleArchTableSwitchLatency() { ++ return Latency::BRANCH + GenerateSwitchTableLatency(); ++} ++ ++int DropAndRetLatency() { ++ // Estimated max. ++ return DadduLatency(false) + JumpLatency(); ++} ++ ++int AssemblerReturnLatency() { ++ // Estimated max. ++ return DadduLatency(false) + MultiPopLatency() + MultiPopFPULatency() + ++ Latency::BRANCH + DadduLatency() + 1 + DropAndRetLatency(); ++} ++ ++int TryInlineTruncateDoubleToILatency() { ++ return 2 + Latency::TRUNC_W_D + Latency::MFC1 + 2 + AndLatency(false) + ++ Latency::BRANCH; ++} ++ ++int CallStubDelayedLatency() { return 1 + CallLatency(); } ++ ++int TruncateDoubleToIDelayedLatency() { ++ // TODO(la64): This no longer reflects how TruncateDoubleToI is called. ++ return TryInlineTruncateDoubleToILatency() + 1 + DsubuLatency(false) + ++ Sdc1Latency() + CallStubDelayedLatency() + DadduLatency(false) + 1; ++} ++ ++int CheckPageFlagLatency() { ++ return AndLatency(false) + AlignedMemoryLatency() + AndLatency(false) + ++ Latency::BRANCH; ++} ++ ++int SltuLatency(bool is_operand_register = true) { ++ if (is_operand_register) { ++ return 1; ++ } else { ++ return 2; // Estimated max. ++ } ++} ++ ++int BranchShortHelperLatency() { ++ return 2; // Estimated max. ++} ++ ++int BranchShortLatency() { return BranchShortHelperLatency(); } ++ ++int MoveLatency() { return 1; } ++ ++int MovToFloatParametersLatency() { return 2 * MoveLatency(); } ++ ++int MovFromFloatResultLatency() { return MoveLatency(); } ++ ++int DaddOverflowLatency() { ++ // Estimated max. ++ return 6; ++} ++ ++int DsubOverflowLatency() { ++ // Estimated max. ++ return 6; ++} ++ ++int MulOverflowLatency() { ++ // Estimated max. ++ return MulLatency() + MulhLatency() + 2; ++} ++ ++int DclzLatency() { return 1; } ++ ++int CtzLatency() { return 3 + DclzLatency(); } ++ ++int DctzLatency() { return 4; } ++ ++int PopcntLatency() { ++ return 2 + AndLatency() + DsubuLatency() + 1 + AndLatency() + 1 + ++ AndLatency() + DadduLatency() + 1 + DadduLatency() + 1 + AndLatency() + ++ 1 + MulLatency() + 1; ++} ++ ++int DpopcntLatency() { ++ return 2 + AndLatency() + DsubuLatency() + 1 + AndLatency() + 1 + ++ AndLatency() + DadduLatency() + 1 + DadduLatency() + 1 + AndLatency() + ++ 1 + DmulLatency() + 1; ++} ++ ++int CompareFLatency() { return Latency::C_cond_S; } ++ ++int CompareF32Latency() { return CompareFLatency(); } ++ ++int CompareF64Latency() { return CompareFLatency(); } ++ ++int CompareIsNanFLatency() { return CompareFLatency(); } ++ ++int CompareIsNanF32Latency() { return CompareIsNanFLatency(); } ++ ++int CompareIsNanF64Latency() { return CompareIsNanFLatency(); } ++ ++int NegsLatency() { return Latency::NEG_S; } ++ ++int NegdLatency() { return Latency::NEG_D; } ++ ++int Float64RoundLatency() { return Latency::RINT_D + 4; } ++ ++int Float32RoundLatency() { return Latency::RINT_S + 4; } ++ ++int Float32MaxLatency() { ++ // Estimated max. ++ int latency = CompareIsNanF32Latency() + Latency::BRANCH; ++ return latency + Latency::MAX_S; ++} ++ ++int Float64MaxLatency() { ++ // Estimated max. ++ int latency = CompareIsNanF64Latency() + Latency::BRANCH; ++ return latency + Latency::MAX_D; ++} ++ ++int Float32MinLatency() { ++ // Estimated max. ++ int latency = CompareIsNanF32Latency() + Latency::BRANCH; ++ return latency + Latency::MIN_S; ++} ++ ++int Float64MinLatency() { ++ // Estimated max. ++ int latency = CompareIsNanF64Latency() + Latency::BRANCH; ++ return latency + Latency::MIN_D; ++} ++ ++int TruncLSLatency(bool load_status) { ++ int latency = Latency::TRUNC_L_S + Latency::DMFC1; ++ if (load_status) { ++ latency += SltuLatency() + 7; ++ } ++ return latency; ++} ++ ++int TruncLDLatency(bool load_status) { ++ int latency = Latency::TRUNC_L_D + Latency::DMFC1; ++ if (load_status) { ++ latency += SltuLatency() + 7; ++ } ++ return latency; ++} ++ ++int TruncUlSLatency() { ++ // Estimated max. ++ return 2 * CompareF32Latency() + CompareIsNanF32Latency() + ++ 4 * Latency::BRANCH + Latency::SUB_S + 2 * Latency::TRUNC_L_S + ++ 3 * Latency::DMFC1 + OrLatency() + Latency::MTC1 + Latency::MOV_S + ++ SltuLatency() + 4; ++} ++ ++int TruncUlDLatency() { ++ // Estimated max. ++ return 2 * CompareF64Latency() + CompareIsNanF64Latency() + ++ 4 * Latency::BRANCH + Latency::SUB_D + 2 * Latency::TRUNC_L_D + ++ 3 * Latency::DMFC1 + OrLatency() + Latency::DMTC1 + Latency::MOV_D + ++ SltuLatency() + 4; ++} ++ ++int PushLatency() { return DadduLatency() + AlignedMemoryLatency(); } ++ ++int ByteSwapSignedLatency() { return 2; } ++ ++int LlLatency(int offset) { ++ bool is_one_instruction = is_int14(offset); ++ if (is_one_instruction) { ++ return 1; ++ } else { ++ return 3; ++ } ++} ++ ++int ExtractBitsLatency(bool sign_extend, int size) { ++ int latency = 2; ++ if (sign_extend) { ++ switch (size) { ++ case 8: ++ case 16: ++ case 32: ++ latency += 1; ++ break; ++ default: ++ UNREACHABLE(); ++ } ++ } ++ return latency; ++} ++ ++int InsertBitsLatency() { return 2 + DsubuLatency(false) + 2; } ++ ++int ScLatency(int offset) { ++ bool is_one_instruction = is_int14(offset); ++ if (is_one_instruction) { ++ return 1; ++ } else { ++ return 3; ++ } ++} ++ ++int Word32AtomicExchangeLatency(bool sign_extend, int size) { ++ return DadduLatency(false) + 1 + DsubuLatency() + 2 + LlLatency(0) + ++ ExtractBitsLatency(sign_extend, size) + InsertBitsLatency() + ++ ScLatency(0) + BranchShortLatency() + 1; ++} ++ ++int Word32AtomicCompareExchangeLatency(bool sign_extend, int size) { ++ return 2 + DsubuLatency() + 2 + LlLatency(0) + ++ ExtractBitsLatency(sign_extend, size) + InsertBitsLatency() + ++ ScLatency(0) + BranchShortLatency() + 1; ++} ++ ++int InstructionScheduler::GetInstructionLatency(const Instruction* instr) { ++ // Basic latency modeling for LA64 instructions. They have been determined ++ // in empirical way. ++ switch (instr->arch_opcode()) { ++ case kArchCallCodeObject: ++ case kArchCallWasmFunction: ++ return CallLatency(); ++ case kArchTailCallCodeObjectFromJSFunction: ++ case kArchTailCallCodeObject: { ++ int latency = 0; ++ if (instr->arch_opcode() == kArchTailCallCodeObjectFromJSFunction) { ++ latency = AssemblePopArgumentsAdoptFrameLatency(); ++ } ++ return latency + JumpLatency(); ++ } ++ case kArchTailCallWasm: ++ case kArchTailCallAddress: ++ return JumpLatency(); ++ case kArchCallJSFunction: { ++ int latency = 0; ++ if (FLAG_debug_code) { ++ latency = 1 + AssertLatency(); ++ } ++ return latency + 1 + DadduLatency(false) + CallLatency(); ++ } ++ case kArchPrepareCallCFunction: ++ return PrepareCallCFunctionLatency(); ++ case kArchSaveCallerRegisters: { ++ auto fp_mode = ++ static_cast(MiscField::decode(instr->opcode())); ++ return PushCallerSavedLatency(fp_mode); ++ } ++ case kArchRestoreCallerRegisters: { ++ auto fp_mode = ++ static_cast(MiscField::decode(instr->opcode())); ++ return PopCallerSavedLatency(fp_mode); ++ } ++ case kArchPrepareTailCall: ++ return 2; ++ case kArchCallCFunction: ++ return CallCFunctionLatency(); ++ case kArchJmp: ++ return AssembleArchJumpLatency(); ++ case kArchTableSwitch: ++ return AssembleArchTableSwitchLatency(); ++ case kArchAbortCSAAssert: ++ return CallLatency() + 1; ++ case kArchDebugBreak: ++ return 1; ++ case kArchComment: ++ case kArchNop: ++ case kArchThrowTerminator: ++ case kArchDeoptimize: ++ return 0; ++ case kArchRet: ++ return AssemblerReturnLatency(); ++ case kArchFramePointer: ++ return 1; ++ case kArchParentFramePointer: ++ // Estimated max. ++ return AlignedMemoryLatency(); ++ case kArchTruncateDoubleToI: ++ return TruncateDoubleToIDelayedLatency(); ++ case kArchStoreWithWriteBarrier: ++ return DadduLatency() + 1 + CheckPageFlagLatency(); ++ case kArchStackSlot: ++ // Estimated max. ++ return DadduLatency(false) + AndLatency(false) + AssertLatency() + ++ DadduLatency(false) + AndLatency(false) + BranchShortLatency() + ++ 1 + DsubuLatency() + DadduLatency(); ++ case kArchWordPoisonOnSpeculation: ++ return AndLatency(); ++ case kIeee754Float64Acos: ++ case kIeee754Float64Acosh: ++ case kIeee754Float64Asin: ++ case kIeee754Float64Asinh: ++ case kIeee754Float64Atan: ++ case kIeee754Float64Atanh: ++ case kIeee754Float64Atan2: ++ case kIeee754Float64Cos: ++ case kIeee754Float64Cosh: ++ case kIeee754Float64Cbrt: ++ case kIeee754Float64Exp: ++ case kIeee754Float64Expm1: ++ case kIeee754Float64Log: ++ case kIeee754Float64Log1p: ++ case kIeee754Float64Log10: ++ case kIeee754Float64Log2: ++ case kIeee754Float64Pow: ++ case kIeee754Float64Sin: ++ case kIeee754Float64Sinh: ++ case kIeee754Float64Tan: ++ case kIeee754Float64Tanh: ++ return PrepareCallCFunctionLatency() + MovToFloatParametersLatency() + ++ CallCFunctionLatency() + MovFromFloatResultLatency(); ++ case kLa64Add: ++ case kLa64Dadd: ++ return DadduLatency(instr->InputAt(1)->IsRegister()); ++ case kLa64DaddOvf: ++ return DaddOverflowLatency(); ++ case kLa64Sub: ++ case kLa64Dsub: ++ return DsubuLatency(instr->InputAt(1)->IsRegister()); ++ case kLa64DsubOvf: ++ return DsubOverflowLatency(); ++ case kLa64Mul: ++ return MulLatency(); ++ case kLa64MulOvf: ++ return MulOverflowLatency(); ++ case kLa64MulHigh: ++ return MulhLatency(); ++ case kLa64MulHighU: ++ return MulhuLatency(); ++ case kLa64DMulHigh: ++ return DMulhLatency(); ++ case kLa64Div: { ++ int latency = DivLatency(instr->InputAt(1)->IsRegister()); ++ return latency++; ++ } ++ case kLa64DivU: { ++ int latency = DivuLatency(instr->InputAt(1)->IsRegister()); ++ return latency++; ++ } ++ case kLa64Mod: ++ return ModLatency(); ++ case kLa64ModU: ++ return ModuLatency(); ++ case kLa64Dmul: ++ return DmulLatency(); ++ case kLa64Ddiv: { ++ int latency = DdivLatency(); ++ return latency++; ++ } ++ case kLa64DdivU: { ++ int latency = DdivuLatency(); ++ return latency++; ++ } ++ case kLa64Dmod: ++ return DmodLatency(); ++ case kLa64DmodU: ++ return DmoduLatency(); ++ case kLa64Dlsa: ++ case kLa64Lsa: ++ return DlsaLatency(); ++ case kLa64And: ++ return AndLatency(instr->InputAt(1)->IsRegister()); ++ case kLa64And32: { ++ bool is_operand_register = instr->InputAt(1)->IsRegister(); ++ int latency = AndLatency(is_operand_register); ++ if (is_operand_register) { ++ return latency + 2; ++ } else { ++ return latency + 1; ++ } ++ } ++ case kLa64Or: ++ return OrLatency(instr->InputAt(1)->IsRegister()); ++ case kLa64Or32: { ++ bool is_operand_register = instr->InputAt(1)->IsRegister(); ++ int latency = OrLatency(is_operand_register); ++ if (is_operand_register) { ++ return latency + 2; ++ } else { ++ return latency + 1; ++ } ++ } ++ case kLa64Nor: ++ return NorLatency(instr->InputAt(1)->IsRegister()); ++ case kLa64Nor32: { ++ bool is_operand_register = instr->InputAt(1)->IsRegister(); ++ int latency = NorLatency(is_operand_register); ++ if (is_operand_register) { ++ return latency + 2; ++ } else { ++ return latency + 1; ++ } ++ } ++ case kLa64Xor: ++ return XorLatency(instr->InputAt(1)->IsRegister()); ++ case kLa64Xor32: { ++ bool is_operand_register = instr->InputAt(1)->IsRegister(); ++ int latency = XorLatency(is_operand_register); ++ if (is_operand_register) { ++ return latency + 2; ++ } else { ++ return latency + 1; ++ } ++ } ++ case kLa64Clz: ++ case kLa64Dclz: ++ return DclzLatency(); ++ case kLa64Ctz: ++ return CtzLatency(); ++ case kLa64Dctz: ++ return DctzLatency(); ++ case kLa64Popcnt: ++ return PopcntLatency(); ++ case kLa64Dpopcnt: ++ return DpopcntLatency(); ++ case kLa64Shl: ++ return 1; ++ case kLa64Shr: ++ case kLa64Sar: ++ return 2; ++ case kLa64Ext: ++ case kLa64Ins: ++ case kLa64Dext: ++ case kLa64Dins: ++ case kLa64Dshl: ++ case kLa64Dshr: ++ case kLa64Dsar: ++ case kLa64Ror: ++ case kLa64Dror: ++ return 1; ++ case kLa64Tst: ++ return AndLatency(instr->InputAt(1)->IsRegister()); ++ case kLa64Mov: ++ return 1; ++ case kLa64CmpS: ++ return MoveLatency() + CompareF32Latency(); ++ case kLa64AddS: ++ return Latency::ADD_S; ++ case kLa64SubS: ++ return Latency::SUB_S; ++ case kLa64MulS: ++ return Latency::MUL_S; ++ case kLa64DivS: ++ return Latency::DIV_S; ++ case kLa64ModS: ++ return PrepareCallCFunctionLatency() + MovToFloatParametersLatency() + ++ CallCFunctionLatency() + MovFromFloatResultLatency(); ++ case kLa64AbsS: ++ return Latency::ABS_S; ++ case kLa64NegS: ++ return NegdLatency(); ++ case kLa64SqrtS: ++ return Latency::SQRT_S; ++ case kLa64MaxS: ++ return Latency::MAX_S; ++ case kLa64MinS: ++ return Latency::MIN_S; ++ case kLa64CmpD: ++ return MoveLatency() + CompareF64Latency(); ++ case kLa64AddD: ++ return Latency::ADD_D; ++ case kLa64SubD: ++ return Latency::SUB_D; ++ case kLa64MulD: ++ return Latency::MUL_D; ++ case kLa64DivD: ++ return Latency::DIV_D; ++ case kLa64ModD: ++ return PrepareCallCFunctionLatency() + MovToFloatParametersLatency() + ++ CallCFunctionLatency() + MovFromFloatResultLatency(); ++ case kLa64AbsD: ++ return Latency::ABS_D; ++ case kLa64NegD: ++ return NegdLatency(); ++ case kLa64SqrtD: ++ return Latency::SQRT_D; ++ case kLa64MaxD: ++ return Latency::MAX_D; ++ case kLa64MinD: ++ return Latency::MIN_D; ++ case kLa64Float64RoundDown: ++ case kLa64Float64RoundTruncate: ++ case kLa64Float64RoundUp: ++ case kLa64Float64RoundTiesEven: ++ return Float64RoundLatency(); ++ case kLa64Float32RoundDown: ++ case kLa64Float32RoundTruncate: ++ case kLa64Float32RoundUp: ++ case kLa64Float32RoundTiesEven: ++ return Float32RoundLatency(); ++ case kLa64Float32Max: ++ return Float32MaxLatency(); ++ case kLa64Float64Max: ++ return Float64MaxLatency(); ++ case kLa64Float32Min: ++ return Float32MinLatency(); ++ case kLa64Float64Min: ++ return Float64MinLatency(); ++ case kLa64Float64SilenceNaN: ++ return Latency::SUB_D; ++ case kLa64CvtSD: ++ return Latency::CVT_S_D; ++ case kLa64CvtDS: ++ return Latency::CVT_D_S; ++ case kLa64CvtDW: ++ return Latency::MTC1 + Latency::CVT_D_W; ++ case kLa64CvtSW: ++ return Latency::MTC1 + Latency::CVT_S_W; ++ case kLa64CvtSUw: ++ return 1 + Latency::DMTC1 + Latency::CVT_S_L; ++ case kLa64CvtSL: ++ return Latency::DMTC1 + Latency::CVT_S_L; ++ case kLa64CvtDL: ++ return Latency::DMTC1 + Latency::CVT_D_L; ++ case kLa64CvtDUw: ++ return 1 + Latency::DMTC1 + Latency::CVT_D_L; ++ case kLa64CvtDUl: ++ return 2 * Latency::BRANCH + 3 + 2 * Latency::DMTC1 + ++ 2 * Latency::CVT_D_L + Latency::ADD_D; ++ case kLa64CvtSUl: ++ return 2 * Latency::BRANCH + 3 + 2 * Latency::DMTC1 + ++ 2 * Latency::CVT_S_L + Latency::ADD_S; ++ case kLa64FloorWD: ++ return Latency::FLOOR_W_D + Latency::MFC1; ++ case kLa64CeilWD: ++ return Latency::CEIL_W_D + Latency::MFC1; ++ case kLa64RoundWD: ++ return Latency::ROUND_W_D + Latency::MFC1; ++ case kLa64TruncWD: ++ return Latency::TRUNC_W_D + Latency::MFC1; ++ case kLa64FloorWS: ++ return Latency::FLOOR_W_S + Latency::MFC1; ++ case kLa64CeilWS: ++ return Latency::CEIL_W_S + Latency::MFC1; ++ case kLa64RoundWS: ++ return Latency::ROUND_W_S + Latency::MFC1; ++ case kLa64TruncWS: ++ return Latency::TRUNC_W_S + Latency::MFC1 + 2 + MovnLatency(); ++ case kLa64TruncLS: ++ return TruncLSLatency(instr->OutputCount() > 1); ++ case kLa64TruncLD: ++ return TruncLDLatency(instr->OutputCount() > 1); ++ case kLa64TruncUwD: ++ // Estimated max. ++ return CompareF64Latency() + 2 * Latency::BRANCH + ++ 2 * Latency::TRUNC_W_D + Latency::SUB_D + OrLatency() + ++ Latency::MTC1 + Latency::MFC1 + Latency::MTHC1 + 1; ++ case kLa64TruncUwS: ++ // Estimated max. ++ return CompareF32Latency() + 2 * Latency::BRANCH + ++ 2 * Latency::TRUNC_W_S + Latency::SUB_S + OrLatency() + ++ Latency::MTC1 + 2 * Latency::MFC1 + 2 + MovzLatency(); ++ case kLa64TruncUlS: ++ return TruncUlSLatency(); ++ case kLa64TruncUlD: ++ return TruncUlDLatency(); ++ case kLa64BitcastDL: ++ return Latency::DMFC1; ++ case kLa64BitcastLD: ++ return Latency::DMTC1; ++ case kLa64Float64ExtractLowWord32: ++ return Latency::MFC1; ++ case kLa64Float64InsertLowWord32: ++ return Latency::MFHC1 + Latency::MTC1 + Latency::MTHC1; ++ case kLa64Float64ExtractHighWord32: ++ return Latency::MFHC1; ++ case kLa64Float64InsertHighWord32: ++ return Latency::MTHC1; ++ case kLa64Seb: ++ case kLa64Seh: ++ return 1; ++ case kLa64Lbu: ++ case kLa64Lb: ++ case kLa64Lhu: ++ case kLa64Lh: ++ case kLa64Lwu: ++ case kLa64Lw: ++ case kLa64Ld: ++ case kLa64Sb: ++ case kLa64Sh: ++ case kLa64Sw: ++ case kLa64Sd: ++ return AlignedMemoryLatency(); ++ case kLa64Lwc1: ++ return Lwc1Latency(); ++ case kLa64Ldc1: ++ return Ldc1Latency(); ++ case kLa64Swc1: ++ return Swc1Latency(); ++ case kLa64Sdc1: ++ return Sdc1Latency(); ++ case kLa64Ulhu: ++ case kLa64Ulh: ++ return UlhuLatency(); ++ case kLa64Ulwu: ++ return UlwuLatency(); ++ case kLa64Ulw: ++ return UlwLatency(); ++ case kLa64Uld: ++ return UldLatency(); ++ case kLa64Ulwc1: ++ return Ulwc1Latency(); ++ case kLa64Uldc1: ++ return Uldc1Latency(); ++ case kLa64Ush: ++ return UshLatency(); ++ case kLa64Usw: ++ return UswLatency(); ++ case kLa64Usd: ++ return UsdLatency(); ++ case kLa64Uswc1: ++ return Uswc1Latency(); ++ case kLa64Usdc1: ++ return Usdc1Latency(); ++ case kLa64Push: { ++ int latency = 0; ++ if (instr->InputAt(0)->IsFPRegister()) { ++ latency = Sdc1Latency() + DsubuLatency(false); ++ } else { ++ latency = PushLatency(); ++ } ++ return latency; ++ } ++ case kLa64Peek: { ++ int latency = 0; ++ if (instr->OutputAt(0)->IsFPRegister()) { ++ auto op = LocationOperand::cast(instr->OutputAt(0)); ++ switch (op->representation()) { ++ case MachineRepresentation::kFloat64: ++ latency = Ldc1Latency(); ++ break; ++ case MachineRepresentation::kFloat32: ++ latency = Latency::LWC1; ++ break; ++ default: ++ UNREACHABLE(); ++ } ++ } else { ++ latency = AlignedMemoryLatency(); ++ } ++ return latency; ++ } ++ case kLa64StackClaim: ++ return DsubuLatency(false); ++ case kLa64StoreToStackSlot: { ++ int latency = 0; ++ if (instr->InputAt(0)->IsFPRegister()) { ++ if (instr->InputAt(0)->IsSimd128Register()) { ++ latency = 1; // Estimated value. ++ } else { ++ latency = Sdc1Latency(); ++ } ++ } else { ++ latency = AlignedMemoryLatency(); ++ } ++ return latency; ++ } ++ case kLa64ByteSwap64: ++ return ByteSwapSignedLatency(); ++ case kLa64ByteSwap32: ++ return ByteSwapSignedLatency(); ++ case kWord32AtomicLoadInt8: ++ case kWord32AtomicLoadUint8: ++ case kWord32AtomicLoadInt16: ++ case kWord32AtomicLoadUint16: ++ case kWord32AtomicLoadWord32: ++ return 2; ++ case kWord32AtomicStoreWord8: ++ case kWord32AtomicStoreWord16: ++ case kWord32AtomicStoreWord32: ++ return 3; ++ case kWord32AtomicExchangeInt8: ++ return Word32AtomicExchangeLatency(true, 8); ++ case kWord32AtomicExchangeUint8: ++ return Word32AtomicExchangeLatency(false, 8); ++ case kWord32AtomicExchangeInt16: ++ return Word32AtomicExchangeLatency(true, 16); ++ case kWord32AtomicExchangeUint16: ++ return Word32AtomicExchangeLatency(false, 16); ++ case kWord32AtomicExchangeWord32: ++ return 2 + LlLatency(0) + 1 + ScLatency(0) + BranchShortLatency() + 1; ++ case kWord32AtomicCompareExchangeInt8: ++ return Word32AtomicCompareExchangeLatency(true, 8); ++ case kWord32AtomicCompareExchangeUint8: ++ return Word32AtomicCompareExchangeLatency(false, 8); ++ case kWord32AtomicCompareExchangeInt16: ++ return Word32AtomicCompareExchangeLatency(true, 16); ++ case kWord32AtomicCompareExchangeUint16: ++ return Word32AtomicCompareExchangeLatency(false, 16); ++ case kWord32AtomicCompareExchangeWord32: ++ return 3 + LlLatency(0) + BranchShortLatency() + 1 + ScLatency(0) + ++ BranchShortLatency() + 1; ++ case kLa64AssertEqual: ++ return AssertLatency(); ++ default: ++ return 1; ++ } ++} ++ ++} // namespace compiler ++} // namespace internal ++} // namespace v8 +diff --git a/src/3rdparty/chromium/v8/src/compiler/backend/la64/instruction-selector-la64.cc b/src/3rdparty/chromium/v8/src/compiler/backend/la64/instruction-selector-la64.cc +new file mode 100644 +index 00000000000..67ea5efe39f +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/compiler/backend/la64/instruction-selector-la64.cc +@@ -0,0 +1,3096 @@ ++// Copyright 2014 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#include "src/base/bits.h" ++#include "src/compiler/backend/instruction-selector-impl.h" ++#include "src/compiler/node-matchers.h" ++#include "src/compiler/node-properties.h" ++ ++namespace v8 { ++namespace internal { ++namespace compiler { ++ ++#define TRACE_UNIMPL() \ ++ PrintF("UNIMPLEMENTED instr_sel: %s at line %d\n", __FUNCTION__, __LINE__) ++ ++#define TRACE() PrintF("instr_sel: %s at line %d\n", __FUNCTION__, __LINE__) ++ ++// Adds la64-specific methods for generating InstructionOperands. ++class La64OperandGenerator final : public OperandGenerator { ++ public: ++ explicit La64OperandGenerator(InstructionSelector* selector) ++ : OperandGenerator(selector) {} ++ ++ InstructionOperand UseOperand(Node* node, InstructionCode opcode) { ++ if (CanBeImmediate(node, opcode)) { ++ return UseImmediate(node); ++ } ++ return UseRegister(node); ++ } ++ ++ // Use the zero register if the node has the immediate value zero, otherwise ++ // assign a register. ++ InstructionOperand UseRegisterOrImmediateZero(Node* node) { ++ if ((IsIntegerConstant(node) && (GetIntegerConstantValue(node) == 0)) || ++ (IsFloatConstant(node) && ++ (bit_cast(GetFloatConstantValue(node)) == 0))) { ++ return UseImmediate(node); ++ } ++ return UseRegister(node); ++ } ++ ++ bool IsIntegerConstant(Node* node) { ++ return (node->opcode() == IrOpcode::kInt32Constant) || ++ (node->opcode() == IrOpcode::kInt64Constant); ++ } ++ ++ int64_t GetIntegerConstantValue(Node* node) { ++ if (node->opcode() == IrOpcode::kInt32Constant) { ++ return OpParameter(node->op()); ++ } ++ DCHECK_EQ(IrOpcode::kInt64Constant, node->opcode()); ++ return OpParameter(node->op()); ++ } ++ ++ bool IsFloatConstant(Node* node) { ++ return (node->opcode() == IrOpcode::kFloat32Constant) || ++ (node->opcode() == IrOpcode::kFloat64Constant); ++ } ++ ++ double GetFloatConstantValue(Node* node) { ++ if (node->opcode() == IrOpcode::kFloat32Constant) { ++ return OpParameter(node->op()); ++ } ++ DCHECK_EQ(IrOpcode::kFloat64Constant, node->opcode()); ++ return OpParameter(node->op()); ++ } ++ ++ bool CanBeImmediate(Node* node, InstructionCode mode) { ++ return IsIntegerConstant(node) && ++ CanBeImmediate(GetIntegerConstantValue(node), mode); ++ } ++ ++ bool CanBeImmediate(int64_t value, InstructionCode opcode) { ++ switch (ArchOpcodeField::decode(opcode)) { ++ case kLa64Shl: ++ case kLa64Sar: ++ case kLa64Shr: ++ return is_uint5(value); ++ case kLa64Dshl: ++ case kLa64Dsar: ++ case kLa64Dshr: ++ return is_uint6(value); ++ case kLa64Add: ++ case kLa64And32: ++ case kLa64And: ++ case kLa64Dadd: ++ case kLa64Or32: ++ case kLa64Or: ++ case kLa64Tst: ++ case kLa64Xor: ++ return is_uint12(value); ++ case kLa64Lb: ++ case kLa64Lbu: ++ case kLa64Sb: ++ case kLa64Lh: ++ case kLa64Lhu: ++ case kLa64Sh: ++ case kLa64Lw: ++ case kLa64Sw: ++ case kLa64Ld: ++ case kLa64Sd: ++ case kLa64Lwc1: ++ case kLa64Swc1: ++ case kLa64Ldc1: ++ case kLa64Sdc1: ++ return is_int12(value); ++ default: ++ return is_int12(value); ++ } ++ } ++ ++ private: ++ bool ImmediateFitsAddrMode1Instruction(int32_t imm) const { ++ TRACE_UNIMPL(); ++ return false; ++ } ++}; ++ ++static void VisitRR(InstructionSelector* selector, ArchOpcode opcode, ++ Node* node) { ++ La64OperandGenerator g(selector); ++ selector->Emit(opcode, g.DefineAsRegister(node), ++ g.UseRegister(node->InputAt(0))); ++} ++ ++static void VisitRRI(InstructionSelector* selector, ArchOpcode opcode, ++ Node* node) { ++ La64OperandGenerator g(selector); ++ int32_t imm = OpParameter(node->op()); ++ selector->Emit(opcode, g.DefineAsRegister(node), ++ g.UseRegister(node->InputAt(0)), g.UseImmediate(imm)); ++} ++ ++static void VisitSimdShift(InstructionSelector* selector, ArchOpcode opcode, ++ Node* node) { ++ La64OperandGenerator g(selector); ++ if (g.IsIntegerConstant(node->InputAt(1))) { ++ selector->Emit(opcode, g.DefineAsRegister(node), ++ g.UseRegister(node->InputAt(0)), ++ g.UseImmediate(node->InputAt(1))); ++ } else { ++ selector->Emit(opcode, g.DefineAsRegister(node), ++ g.UseRegister(node->InputAt(0)), ++ g.UseRegister(node->InputAt(1))); ++ } ++} ++ ++static void VisitRRIR(InstructionSelector* selector, ArchOpcode opcode, ++ Node* node) { ++ La64OperandGenerator g(selector); ++ int32_t imm = OpParameter(node->op()); ++ selector->Emit(opcode, g.DefineAsRegister(node), ++ g.UseRegister(node->InputAt(0)), g.UseImmediate(imm), ++ g.UseRegister(node->InputAt(1))); ++} ++ ++static void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, ++ Node* node) { ++ La64OperandGenerator g(selector); ++ selector->Emit(opcode, g.DefineAsRegister(node), ++ g.UseRegister(node->InputAt(0)), ++ g.UseRegister(node->InputAt(1))); ++} ++ ++void VisitRRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) { ++ La64OperandGenerator g(selector); ++ selector->Emit( ++ opcode, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), ++ g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(2))); ++} ++ ++static void VisitRRO(InstructionSelector* selector, ArchOpcode opcode, ++ Node* node) { ++ La64OperandGenerator g(selector); ++ selector->Emit(opcode, g.DefineAsRegister(node), ++ g.UseRegister(node->InputAt(0)), ++ g.UseOperand(node->InputAt(1), opcode)); ++} ++ ++struct ExtendingLoadMatcher { ++ ExtendingLoadMatcher(Node* node, InstructionSelector* selector) ++ : matches_(false), selector_(selector), base_(nullptr), immediate_(0) { ++ Initialize(node); ++ } ++ ++ bool Matches() const { return matches_; } ++ ++ Node* base() const { ++ DCHECK(Matches()); ++ return base_; ++ } ++ int64_t immediate() const { ++ DCHECK(Matches()); ++ return immediate_; ++ } ++ ArchOpcode opcode() const { ++ DCHECK(Matches()); ++ return opcode_; ++ } ++ ++ private: ++ bool matches_; ++ InstructionSelector* selector_; ++ Node* base_; ++ int64_t immediate_; ++ ArchOpcode opcode_; ++ ++ void Initialize(Node* node) { ++ Int64BinopMatcher m(node); ++ // When loading a 64-bit value and shifting by 32, we should ++ // just load and sign-extend the interesting 4 bytes instead. ++ // This happens, for example, when we're loading and untagging SMIs. ++ DCHECK(m.IsWord64Sar()); ++ if (m.left().IsLoad() && m.right().Is(32) && ++ selector_->CanCover(m.node(), m.left().node())) { ++ DCHECK_EQ(selector_->GetEffectLevel(node), ++ selector_->GetEffectLevel(m.left().node())); ++ MachineRepresentation rep = ++ LoadRepresentationOf(m.left().node()->op()).representation(); ++ DCHECK_EQ(3, ElementSizeLog2Of(rep)); ++ if (rep != MachineRepresentation::kTaggedSigned && ++ rep != MachineRepresentation::kTaggedPointer && ++ rep != MachineRepresentation::kTagged && ++ rep != MachineRepresentation::kWord64) { ++ return; ++ } ++ ++ La64OperandGenerator g(selector_); ++ Node* load = m.left().node(); ++ Node* offset = load->InputAt(1); ++ base_ = load->InputAt(0); ++ opcode_ = kLa64Lw; ++ if (g.CanBeImmediate(offset, opcode_)) { ++ immediate_ = g.GetIntegerConstantValue(offset) + 4; ++ matches_ = g.CanBeImmediate(immediate_, kLa64Lw); ++ } ++ } ++ } ++}; ++ ++bool TryEmitExtendingLoad(InstructionSelector* selector, Node* node, ++ Node* output_node) { ++ ExtendingLoadMatcher m(node, selector); ++ La64OperandGenerator g(selector); ++ if (m.Matches()) { ++ InstructionOperand inputs[2]; ++ inputs[0] = g.UseRegister(m.base()); ++ InstructionCode opcode = ++ m.opcode() | AddressingModeField::encode(kMode_MRI); ++ DCHECK(is_int32(m.immediate())); ++ inputs[1] = g.TempImmediate(static_cast(m.immediate())); ++ InstructionOperand outputs[] = {g.DefineAsRegister(output_node)}; ++ selector->Emit(opcode, arraysize(outputs), outputs, arraysize(inputs), ++ inputs); ++ return true; ++ } ++ return false; ++} ++ ++bool TryMatchImmediate(InstructionSelector* selector, ++ InstructionCode* opcode_return, Node* node, ++ size_t* input_count_return, InstructionOperand* inputs) { ++ La64OperandGenerator g(selector); ++ if (g.CanBeImmediate(node, *opcode_return)) { ++ *opcode_return |= AddressingModeField::encode(kMode_MRI); ++ inputs[0] = g.UseImmediate(node); ++ *input_count_return = 1; ++ return true; ++ } ++ return false; ++} ++ ++static void VisitBinop(InstructionSelector* selector, Node* node, ++ InstructionCode opcode, bool has_reverse_opcode, ++ InstructionCode reverse_opcode, ++ FlagsContinuation* cont) { ++ La64OperandGenerator g(selector); ++ Int32BinopMatcher m(node); ++ InstructionOperand inputs[2]; ++ size_t input_count = 0; ++ InstructionOperand outputs[1]; ++ size_t output_count = 0; ++ ++ if (TryMatchImmediate(selector, &opcode, m.right().node(), &input_count, ++ &inputs[1])) { ++ inputs[0] = g.UseRegister(m.left().node()); ++ input_count++; ++ } else if (has_reverse_opcode && ++ TryMatchImmediate(selector, &reverse_opcode, m.left().node(), ++ &input_count, &inputs[1])) { ++ inputs[0] = g.UseRegister(m.right().node()); ++ opcode = reverse_opcode; ++ input_count++; ++ } else { ++ inputs[input_count++] = g.UseRegister(m.left().node()); ++ inputs[input_count++] = g.UseOperand(m.right().node(), opcode); ++ } ++ ++ if (cont->IsDeoptimize()) { ++ // If we can deoptimize as a result of the binop, we need to make sure that ++ // the deopt inputs are not overwritten by the binop result. One way ++ // to achieve that is to declare the output register as same-as-first. ++ outputs[output_count++] = g.DefineSameAsFirst(node); ++ } else { ++ outputs[output_count++] = g.DefineAsRegister(node); ++ } ++ ++ DCHECK_NE(0u, input_count); ++ DCHECK_EQ(1u, output_count); ++ DCHECK_GE(arraysize(inputs), input_count); ++ DCHECK_GE(arraysize(outputs), output_count); ++ ++ selector->EmitWithContinuation(opcode, output_count, outputs, input_count, ++ inputs, cont); ++} ++ ++static void VisitBinop(InstructionSelector* selector, Node* node, ++ InstructionCode opcode, bool has_reverse_opcode, ++ InstructionCode reverse_opcode) { ++ FlagsContinuation cont; ++ VisitBinop(selector, node, opcode, has_reverse_opcode, reverse_opcode, &cont); ++} ++ ++static void VisitBinop(InstructionSelector* selector, Node* node, ++ InstructionCode opcode, FlagsContinuation* cont) { ++ VisitBinop(selector, node, opcode, false, kArchNop, cont); ++} ++ ++static void VisitBinop(InstructionSelector* selector, Node* node, ++ InstructionCode opcode) { ++ VisitBinop(selector, node, opcode, false, kArchNop); ++} ++ ++void InstructionSelector::VisitStackSlot(Node* node) { ++ StackSlotRepresentation rep = StackSlotRepresentationOf(node->op()); ++ int alignment = rep.alignment(); ++ int slot = frame_->AllocateSpillSlot(rep.size(), alignment); ++ OperandGenerator g(this); ++ ++ Emit(kArchStackSlot, g.DefineAsRegister(node), ++ sequence()->AddImmediate(Constant(slot)), ++ sequence()->AddImmediate(Constant(alignment)), 0, nullptr); ++} ++ ++void InstructionSelector::VisitAbortCSAAssert(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kArchAbortCSAAssert, g.NoOutput(), g.UseFixed(node->InputAt(0), a0)); ++} ++ ++void EmitLoad(InstructionSelector* selector, Node* node, InstructionCode opcode, ++ Node* output = nullptr) { ++ La64OperandGenerator g(selector); ++ Node* base = node->InputAt(0); ++ Node* index = node->InputAt(1); ++ ++ if (g.CanBeImmediate(index, opcode)) { ++ selector->Emit(opcode | AddressingModeField::encode(kMode_MRI), ++ g.DefineAsRegister(output == nullptr ? node : output), ++ g.UseRegister(base), g.UseImmediate(index)); ++ } else { ++ selector->Emit(opcode | AddressingModeField::encode(kMode_MRR), ++ g.DefineAsRegister(output == nullptr ? node : output), ++ g.UseRegister(base), g.UseRegister(index)); ++ } ++} ++ ++void InstructionSelector::VisitLoadTransform(Node* node) { ++ LoadTransformParameters params = LoadTransformParametersOf(node->op()); ++ ++ InstructionCode opcode = kArchNop; ++ switch (params.transformation) { ++ case LoadTransformation::kS8x16LoadSplat: ++ opcode = kLa64S8x16LoadSplat; ++ break; ++ case LoadTransformation::kS16x8LoadSplat: ++ opcode = kLa64S16x8LoadSplat; ++ break; ++ case LoadTransformation::kS32x4LoadSplat: ++ opcode = kLa64S32x4LoadSplat; ++ break; ++ case LoadTransformation::kS64x2LoadSplat: ++ opcode = kLa64S64x2LoadSplat; ++ break; ++ case LoadTransformation::kI16x8Load8x8S: ++ opcode = kLa64I16x8Load8x8S; ++ break; ++ case LoadTransformation::kI16x8Load8x8U: ++ opcode = kLa64I16x8Load8x8U; ++ break; ++ case LoadTransformation::kI32x4Load16x4S: ++ opcode = kLa64I32x4Load16x4S; ++ break; ++ case LoadTransformation::kI32x4Load16x4U: ++ opcode = kLa64I32x4Load16x4U; ++ break; ++ case LoadTransformation::kI64x2Load32x2S: ++ opcode = kLa64I64x2Load32x2S; ++ break; ++ case LoadTransformation::kI64x2Load32x2U: ++ opcode = kLa64I64x2Load32x2U; ++ break; ++ default: ++ UNIMPLEMENTED(); ++ } ++ ++ EmitLoad(this, node, opcode); ++} ++ ++void InstructionSelector::VisitLoad(Node* node) { ++ LoadRepresentation load_rep = LoadRepresentationOf(node->op()); ++ ++ InstructionCode opcode = kArchNop; ++ switch (load_rep.representation()) { ++ case MachineRepresentation::kFloat32: ++ opcode = kLa64Lwc1; ++ break; ++ case MachineRepresentation::kFloat64: ++ opcode = kLa64Ldc1; ++ break; ++ case MachineRepresentation::kBit: // Fall through. ++ case MachineRepresentation::kWord8: ++ opcode = load_rep.IsUnsigned() ? kLa64Lbu : kLa64Lb; ++ break; ++ case MachineRepresentation::kWord16: ++ opcode = load_rep.IsUnsigned() ? kLa64Lhu : kLa64Lh; ++ break; ++ case MachineRepresentation::kWord32: ++ opcode = load_rep.IsUnsigned() ? kLa64Lwu : kLa64Lw; ++ break; ++ case MachineRepresentation::kTaggedSigned: // Fall through. ++ case MachineRepresentation::kTaggedPointer: // Fall through. ++ case MachineRepresentation::kTagged: // Fall through. ++ case MachineRepresentation::kWord64: ++ opcode = kLa64Ld; ++ break; ++ case MachineRepresentation::kCompressedPointer: // Fall through. ++ case MachineRepresentation::kCompressed: // Fall through. ++ case MachineRepresentation::kNone: ++ case MachineRepresentation::kSimd128: ++ UNREACHABLE(); ++ } ++ if (node->opcode() == IrOpcode::kPoisonedLoad) { ++ CHECK_NE(poisoning_level_, PoisoningMitigationLevel::kDontPoison); ++ opcode |= MiscField::encode(kMemoryAccessPoisoned); ++ } ++ ++ EmitLoad(this, node, opcode); ++} ++ ++void InstructionSelector::VisitPoisonedLoad(Node* node) { VisitLoad(node); } ++ ++void InstructionSelector::VisitProtectedLoad(Node* node) { ++ // TODO(eholk) ++ UNIMPLEMENTED(); ++} ++ ++void InstructionSelector::VisitStore(Node* node) { ++ La64OperandGenerator g(this); ++ Node* base = node->InputAt(0); ++ Node* index = node->InputAt(1); ++ Node* value = node->InputAt(2); ++ ++ StoreRepresentation store_rep = StoreRepresentationOf(node->op()); ++ WriteBarrierKind write_barrier_kind = store_rep.write_barrier_kind(); ++ MachineRepresentation rep = store_rep.representation(); ++ ++ // TODO(la64): I guess this could be done in a better way. ++ if (write_barrier_kind != kNoWriteBarrier && ++ V8_LIKELY(!FLAG_disable_write_barriers)) { ++ DCHECK(CanBeTaggedPointer(rep)); ++ InstructionOperand inputs[3]; ++ size_t input_count = 0; ++ inputs[input_count++] = g.UseUniqueRegister(base); ++ inputs[input_count++] = g.UseUniqueRegister(index); ++ inputs[input_count++] = g.UseUniqueRegister(value); ++ RecordWriteMode record_write_mode = ++ WriteBarrierKindToRecordWriteMode(write_barrier_kind); ++ InstructionOperand temps[] = {g.TempRegister(), g.TempRegister()}; ++ size_t const temp_count = arraysize(temps); ++ InstructionCode code = kArchStoreWithWriteBarrier; ++ code |= MiscField::encode(static_cast(record_write_mode)); ++ Emit(code, 0, nullptr, input_count, inputs, temp_count, temps); ++ } else { ++ ArchOpcode opcode = kArchNop; ++ switch (rep) { ++ case MachineRepresentation::kFloat32: ++ opcode = kLa64Swc1; ++ break; ++ case MachineRepresentation::kFloat64: ++ opcode = kLa64Sdc1; ++ break; ++ case MachineRepresentation::kBit: // Fall through. ++ case MachineRepresentation::kWord8: ++ opcode = kLa64Sb; ++ break; ++ case MachineRepresentation::kWord16: ++ opcode = kLa64Sh; ++ break; ++ case MachineRepresentation::kWord32: ++ opcode = kLa64Sw; ++ break; ++ case MachineRepresentation::kTaggedSigned: // Fall through. ++ case MachineRepresentation::kTaggedPointer: // Fall through. ++ case MachineRepresentation::kTagged: // Fall through. ++ case MachineRepresentation::kWord64: ++ opcode = kLa64Sd; ++ break; ++ case MachineRepresentation::kCompressedPointer: // Fall through. ++ case MachineRepresentation::kCompressed: // Fall through. ++ case MachineRepresentation::kNone: ++ case MachineRepresentation::kSimd128: ++ UNREACHABLE(); ++ return; ++ } ++ ++ if (g.CanBeImmediate(index, opcode)) { ++ Emit(opcode | AddressingModeField::encode(kMode_MRI), g.NoOutput(), ++ g.UseRegister(base), g.UseImmediate(index), ++ g.UseRegisterOrImmediateZero(value)); ++ } else { ++ Emit(opcode | AddressingModeField::encode(kMode_MRR), g.NoOutput(), ++ g.UseRegister(base), g.UseRegister(index), ++ g.UseRegisterOrImmediateZero(value)); ++ } ++ } ++} ++ ++void InstructionSelector::VisitProtectedStore(Node* node) { ++ // TODO(eholk) ++ UNIMPLEMENTED(); ++} ++ ++void InstructionSelector::VisitWord32And(Node* node) { ++ La64OperandGenerator g(this); ++ Int32BinopMatcher m(node); ++ if (m.left().IsWord32Shr() && CanCover(node, m.left().node()) && ++ m.right().HasValue()) { ++ uint32_t mask = m.right().Value(); ++ uint32_t mask_width = base::bits::CountPopulation(mask); ++ uint32_t mask_msb = base::bits::CountLeadingZeros32(mask); ++ if ((mask_width != 0) && (mask_msb + mask_width == 32)) { ++ // The mask must be contiguous, and occupy the least-significant bits. ++ DCHECK_EQ(0u, base::bits::CountTrailingZeros32(mask)); ++ ++ // Select Ext for And(Shr(x, imm), mask) where the mask is in the least ++ // significant bits. ++ Int32BinopMatcher mleft(m.left().node()); ++ if (mleft.right().HasValue()) { ++ // Any shift value can match; int32 shifts use `value % 32`. ++ uint32_t lsb = mleft.right().Value() & 0x1F; ++ ++ // Ext cannot extract bits past the register size, however since ++ // shifting the original value would have introduced some zeros we can ++ // still use Ext with a smaller mask and the remaining bits will be ++ // zeros. ++ if (lsb + mask_width > 32) mask_width = 32 - lsb; ++ ++ Emit(kLa64Ext, g.DefineAsRegister(node), ++ g.UseRegister(mleft.left().node()), g.TempImmediate(lsb), ++ g.TempImmediate(mask_width)); ++ return; ++ } ++ // Other cases fall through to the normal And operation. ++ } ++ } ++ if (m.right().HasValue()) { ++ uint32_t mask = m.right().Value(); ++ uint32_t shift = base::bits::CountPopulation(~mask); ++ uint32_t msb = base::bits::CountLeadingZeros32(~mask); ++ if (shift != 0 && shift != 32 && msb + shift == 32) { ++ // Insert zeros for (x >> K) << K => x & ~(2^K - 1) expression reduction ++ // and remove constant loading of inverted mask. ++ Emit(kLa64Ins, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()), ++ g.TempImmediate(0), g.TempImmediate(shift)); ++ return; ++ } ++ } ++ VisitBinop(this, node, kLa64And32, true, kLa64And32); ++} ++ ++void InstructionSelector::VisitWord64And(Node* node) { ++ La64OperandGenerator g(this); ++ Int64BinopMatcher m(node); ++ if (m.left().IsWord64Shr() && CanCover(node, m.left().node()) && ++ m.right().HasValue()) { ++ uint64_t mask = m.right().Value(); ++ uint32_t mask_width = base::bits::CountPopulation(mask); ++ uint32_t mask_msb = base::bits::CountLeadingZeros64(mask); ++ if ((mask_width != 0) && (mask_msb + mask_width == 64)) { ++ // The mask must be contiguous, and occupy the least-significant bits. ++ DCHECK_EQ(0u, base::bits::CountTrailingZeros64(mask)); ++ ++ // Select Dext for And(Shr(x, imm), mask) where the mask is in the least ++ // significant bits. ++ Int64BinopMatcher mleft(m.left().node()); ++ if (mleft.right().HasValue()) { ++ // Any shift value can match; int64 shifts use `value % 64`. ++ uint32_t lsb = static_cast(mleft.right().Value() & 0x3F); ++ ++ // Dext cannot extract bits past the register size, however since ++ // shifting the original value would have introduced some zeros we can ++ // still use Dext with a smaller mask and the remaining bits will be ++ // zeros. ++ if (lsb + mask_width > 64) mask_width = 64 - lsb; ++ ++ if (lsb == 0 && mask_width == 64) { ++ Emit(kArchNop, g.DefineSameAsFirst(node), g.Use(mleft.left().node())); ++ } else { ++ Emit(kLa64Dext, g.DefineAsRegister(node), ++ g.UseRegister(mleft.left().node()), g.TempImmediate(lsb), ++ g.TempImmediate(static_cast(mask_width))); ++ } ++ return; ++ } ++ // Other cases fall through to the normal And operation. ++ } ++ } ++ if (m.right().HasValue()) { ++ uint64_t mask = m.right().Value(); ++ uint32_t shift = base::bits::CountPopulation(~mask); ++ uint32_t msb = base::bits::CountLeadingZeros64(~mask); ++ if (shift != 0 && shift < 32 && msb + shift == 64) { ++ // Insert zeros for (x >> K) << K => x & ~(2^K - 1) expression reduction ++ // and remove constant loading of inverted mask. Dins cannot insert bits ++ // past word size, so shifts smaller than 32 are covered. ++ Emit(kLa64Dins, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()), ++ g.TempImmediate(0), g.TempImmediate(shift)); ++ return; ++ } ++ } ++ VisitBinop(this, node, kLa64And, true, kLa64And); ++} ++ ++void InstructionSelector::VisitWord32Or(Node* node) { ++ VisitBinop(this, node, kLa64Or32, true, kLa64Or32); ++} ++ ++void InstructionSelector::VisitWord64Or(Node* node) { ++ VisitBinop(this, node, kLa64Or, true, kLa64Or); ++} ++ ++void InstructionSelector::VisitWord32Xor(Node* node) { ++ Int32BinopMatcher m(node); ++ if (m.left().IsWord32Or() && CanCover(node, m.left().node()) && ++ m.right().Is(-1)) { ++ Int32BinopMatcher mleft(m.left().node()); ++ if (!mleft.right().HasValue()) { ++ La64OperandGenerator g(this); ++ Emit(kLa64Nor32, g.DefineAsRegister(node), ++ g.UseRegister(mleft.left().node()), ++ g.UseRegister(mleft.right().node())); ++ return; ++ } ++ } ++ if (m.right().Is(-1)) { ++ // Use Nor for bit negation and eliminate constant loading for xori. ++ La64OperandGenerator g(this); ++ Emit(kLa64Nor32, g.DefineAsRegister(node), g.UseRegister(m.left().node()), ++ g.TempImmediate(0)); ++ return; ++ } ++ VisitBinop(this, node, kLa64Xor32, true, kLa64Xor32); ++} ++ ++void InstructionSelector::VisitWord64Xor(Node* node) { ++ Int64BinopMatcher m(node); ++ if (m.left().IsWord64Or() && CanCover(node, m.left().node()) && ++ m.right().Is(-1)) { ++ Int64BinopMatcher mleft(m.left().node()); ++ if (!mleft.right().HasValue()) { ++ La64OperandGenerator g(this); ++ Emit(kLa64Nor, g.DefineAsRegister(node), ++ g.UseRegister(mleft.left().node()), ++ g.UseRegister(mleft.right().node())); ++ return; ++ } ++ } ++ if (m.right().Is(-1)) { ++ // Use Nor for bit negation and eliminate constant loading for xori. ++ La64OperandGenerator g(this); ++ Emit(kLa64Nor, g.DefineAsRegister(node), g.UseRegister(m.left().node()), ++ g.TempImmediate(0)); ++ return; ++ } ++ VisitBinop(this, node, kLa64Xor, true, kLa64Xor); ++} ++ ++void InstructionSelector::VisitWord32Shl(Node* node) { ++ Int32BinopMatcher m(node); ++ if (m.left().IsWord32And() && CanCover(node, m.left().node()) && ++ m.right().IsInRange(1, 31)) { ++ La64OperandGenerator g(this); ++ Int32BinopMatcher mleft(m.left().node()); ++ // Match Word32Shl(Word32And(x, mask), imm) to Shl where the mask is ++ // contiguous, and the shift immediate non-zero. ++ if (mleft.right().HasValue()) { ++ uint32_t mask = mleft.right().Value(); ++ uint32_t mask_width = base::bits::CountPopulation(mask); ++ uint32_t mask_msb = base::bits::CountLeadingZeros32(mask); ++ if ((mask_width != 0) && (mask_msb + mask_width == 32)) { ++ uint32_t shift = m.right().Value(); ++ DCHECK_EQ(0u, base::bits::CountTrailingZeros32(mask)); ++ DCHECK_NE(0u, shift); ++ if ((shift + mask_width) >= 32) { ++ // If the mask is contiguous and reaches or extends beyond the top ++ // bit, only the shift is needed. ++ Emit(kLa64Shl, g.DefineAsRegister(node), ++ g.UseRegister(mleft.left().node()), ++ g.UseImmediate(m.right().node())); ++ return; ++ } ++ } ++ } ++ } ++ VisitRRO(this, kLa64Shl, node); ++} ++ ++void InstructionSelector::VisitWord32Shr(Node* node) { ++ Int32BinopMatcher m(node); ++ if (m.left().IsWord32And() && m.right().HasValue()) { ++ uint32_t lsb = m.right().Value() & 0x1F; ++ Int32BinopMatcher mleft(m.left().node()); ++ if (mleft.right().HasValue() && mleft.right().Value() != 0) { ++ // Select Ext for Shr(And(x, mask), imm) where the result of the mask is ++ // shifted into the least-significant bits. ++ uint32_t mask = (mleft.right().Value() >> lsb) << lsb; ++ unsigned mask_width = base::bits::CountPopulation(mask); ++ unsigned mask_msb = base::bits::CountLeadingZeros32(mask); ++ if ((mask_msb + mask_width + lsb) == 32) { ++ La64OperandGenerator g(this); ++ DCHECK_EQ(lsb, base::bits::CountTrailingZeros32(mask)); ++ Emit(kLa64Ext, g.DefineAsRegister(node), ++ g.UseRegister(mleft.left().node()), g.TempImmediate(lsb), ++ g.TempImmediate(mask_width)); ++ return; ++ } ++ } ++ } ++ VisitRRO(this, kLa64Shr, node); ++} ++ ++void InstructionSelector::VisitWord32Sar(Node* node) { ++ Int32BinopMatcher m(node); ++ if (m.left().IsWord32Shl() && CanCover(node, m.left().node())) { ++ Int32BinopMatcher mleft(m.left().node()); ++ if (m.right().HasValue() && mleft.right().HasValue()) { ++ La64OperandGenerator g(this); ++ uint32_t sar = m.right().Value(); ++ uint32_t shl = mleft.right().Value(); ++ if ((sar == shl) && (sar == 16)) { ++ Emit(kLa64Seh, g.DefineAsRegister(node), ++ g.UseRegister(mleft.left().node())); ++ return; ++ } else if ((sar == shl) && (sar == 24)) { ++ Emit(kLa64Seb, g.DefineAsRegister(node), ++ g.UseRegister(mleft.left().node())); ++ return; ++ } else if ((sar == shl) && (sar == 32)) { ++ Emit(kLa64Shl, g.DefineAsRegister(node), ++ g.UseRegister(mleft.left().node()), g.TempImmediate(0)); ++ return; ++ } ++ } ++ } ++ VisitRRO(this, kLa64Sar, node); ++} ++ ++void InstructionSelector::VisitWord64Shl(Node* node) { ++ La64OperandGenerator g(this); ++ Int64BinopMatcher m(node); ++ if ((m.left().IsChangeInt32ToInt64() || m.left().IsChangeUint32ToUint64()) && ++ m.right().IsInRange(32, 63) && CanCover(node, m.left().node())) { ++ // There's no need to sign/zero-extend to 64-bit if we shift out the upper ++ // 32 bits anyway. ++ Emit(kLa64Dshl, g.DefineSameAsFirst(node), ++ g.UseRegister(m.left().node()->InputAt(0)), ++ g.UseImmediate(m.right().node())); ++ return; ++ } ++ if (m.left().IsWord64And() && CanCover(node, m.left().node()) && ++ m.right().IsInRange(1, 63)) { ++ // Match Word64Shl(Word64And(x, mask), imm) to Dshl where the mask is ++ // contiguous, and the shift immediate non-zero. ++ Int64BinopMatcher mleft(m.left().node()); ++ if (mleft.right().HasValue()) { ++ uint64_t mask = mleft.right().Value(); ++ uint32_t mask_width = base::bits::CountPopulation(mask); ++ uint32_t mask_msb = base::bits::CountLeadingZeros64(mask); ++ if ((mask_width != 0) && (mask_msb + mask_width == 64)) { ++ uint64_t shift = m.right().Value(); ++ DCHECK_EQ(0u, base::bits::CountTrailingZeros64(mask)); ++ DCHECK_NE(0u, shift); ++ ++ if ((shift + mask_width) >= 64) { ++ // If the mask is contiguous and reaches or extends beyond the top ++ // bit, only the shift is needed. ++ Emit(kLa64Dshl, g.DefineAsRegister(node), ++ g.UseRegister(mleft.left().node()), ++ g.UseImmediate(m.right().node())); ++ return; ++ } ++ } ++ } ++ } ++ VisitRRO(this, kLa64Dshl, node); ++} ++ ++void InstructionSelector::VisitWord64Shr(Node* node) { ++ Int64BinopMatcher m(node); ++ if (m.left().IsWord64And() && m.right().HasValue()) { ++ uint32_t lsb = m.right().Value() & 0x3F; ++ Int64BinopMatcher mleft(m.left().node()); ++ if (mleft.right().HasValue() && mleft.right().Value() != 0) { ++ // Select Dext for Shr(And(x, mask), imm) where the result of the mask is ++ // shifted into the least-significant bits. ++ uint64_t mask = (mleft.right().Value() >> lsb) << lsb; ++ unsigned mask_width = base::bits::CountPopulation(mask); ++ unsigned mask_msb = base::bits::CountLeadingZeros64(mask); ++ if ((mask_msb + mask_width + lsb) == 64) { ++ La64OperandGenerator g(this); ++ DCHECK_EQ(lsb, base::bits::CountTrailingZeros64(mask)); ++ Emit(kLa64Dext, g.DefineAsRegister(node), ++ g.UseRegister(mleft.left().node()), g.TempImmediate(lsb), ++ g.TempImmediate(mask_width)); ++ return; ++ } ++ } ++ } ++ VisitRRO(this, kLa64Dshr, node); ++} ++ ++void InstructionSelector::VisitWord64Sar(Node* node) { ++ if (TryEmitExtendingLoad(this, node, node)) return; ++ VisitRRO(this, kLa64Dsar, node); ++} ++ ++void InstructionSelector::VisitWord32Ror(Node* node) { ++ VisitRRO(this, kLa64Ror, node); ++} ++ ++void InstructionSelector::VisitWord32Clz(Node* node) { ++ VisitRR(this, kLa64Clz, node); ++} ++ ++void InstructionSelector::VisitWord32ReverseBits(Node* node) { UNREACHABLE(); } ++ ++void InstructionSelector::VisitWord64ReverseBits(Node* node) { UNREACHABLE(); } ++ ++void InstructionSelector::VisitWord64ReverseBytes(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kLa64ByteSwap64, g.DefineAsRegister(node), ++ g.UseRegister(node->InputAt(0))); ++} ++ ++void InstructionSelector::VisitWord32ReverseBytes(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kLa64ByteSwap32, g.DefineAsRegister(node), ++ g.UseRegister(node->InputAt(0))); ++} ++ ++void InstructionSelector::VisitSimd128ReverseBytes(Node* node) { ++ UNREACHABLE(); ++} ++ ++void InstructionSelector::VisitWord32Ctz(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kLa64Ctz, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0))); ++} ++ ++void InstructionSelector::VisitWord64Ctz(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kLa64Dctz, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0))); ++} ++ ++void InstructionSelector::VisitWord32Popcnt(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kLa64Popcnt, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0))); ++} ++ ++void InstructionSelector::VisitWord64Popcnt(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kLa64Dpopcnt, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0))); ++} ++ ++void InstructionSelector::VisitWord64Ror(Node* node) { ++ VisitRRO(this, kLa64Dror, node); ++} ++ ++void InstructionSelector::VisitWord64Clz(Node* node) { ++ VisitRR(this, kLa64Dclz, node); ++} ++ ++void InstructionSelector::VisitInt32Add(Node* node) { ++ La64OperandGenerator g(this); ++ Int32BinopMatcher m(node); ++ ++ // Select Lsa for (left + (left_of_right << imm)). ++ if (m.right().opcode() == IrOpcode::kWord32Shl && ++ CanCover(node, m.left().node()) && CanCover(node, m.right().node())) { ++ Int32BinopMatcher mright(m.right().node()); ++ if (mright.right().HasValue() && !m.left().HasValue()) { ++ int32_t shift_value = static_cast(mright.right().Value()); ++ if (shift_value > 0 && shift_value <= 31) { ++ Emit(kLa64Lsa, g.DefineAsRegister(node), ++ g.UseRegister(mright.left().node()), ++ g.UseRegister(m.left().node()), g.TempImmediate(shift_value)); ++ return; ++ } ++ } ++ } ++ ++ // Select Lsa for ((left_of_left << imm) + right). ++ if (m.left().opcode() == IrOpcode::kWord32Shl && ++ CanCover(node, m.right().node()) && CanCover(node, m.left().node())) { ++ Int32BinopMatcher mleft(m.left().node()); ++ if (mleft.right().HasValue() && !m.right().HasValue()) { ++ int32_t shift_value = static_cast(mleft.right().Value()); ++ if (shift_value > 0 && shift_value <= 31) { ++ Emit(kLa64Lsa, g.DefineAsRegister(node), ++ g.UseRegister(mleft.left().node()), ++ g.UseRegister(m.right().node()), g.TempImmediate(shift_value)); ++ return; ++ } ++ } ++ } ++ ++ VisitBinop(this, node, kLa64Add, true, kLa64Add); ++} ++ ++void InstructionSelector::VisitInt64Add(Node* node) { ++ La64OperandGenerator g(this); ++ Int64BinopMatcher m(node); ++ ++ // Select Dlsa for (left + (left_of_right << imm)). ++ if (m.right().opcode() == IrOpcode::kWord64Shl && ++ CanCover(node, m.left().node()) && CanCover(node, m.right().node())) { ++ Int64BinopMatcher mright(m.right().node()); ++ if (mright.right().HasValue() && !m.left().HasValue()) { ++ int32_t shift_value = static_cast(mright.right().Value()); ++ if (shift_value > 0 && shift_value <= 31) { ++ Emit(kLa64Dlsa, g.DefineAsRegister(node), ++ g.UseRegister(mright.left().node()), ++ g.UseRegister(m.left().node()), g.TempImmediate(shift_value)); ++ return; ++ } ++ } ++ } ++ ++ // Select Dlsa for ((left_of_left << imm) + right). ++ if (m.left().opcode() == IrOpcode::kWord64Shl && ++ CanCover(node, m.right().node()) && CanCover(node, m.left().node())) { ++ Int64BinopMatcher mleft(m.left().node()); ++ if (mleft.right().HasValue() && !m.right().HasValue()) { ++ int32_t shift_value = static_cast(mleft.right().Value()); ++ if (shift_value > 0 && shift_value <= 31) { ++ Emit(kLa64Dlsa, g.DefineAsRegister(node), ++ g.UseRegister(mleft.left().node()), ++ g.UseRegister(m.right().node()), g.TempImmediate(shift_value)); ++ return; ++ } ++ } ++ } ++ ++ VisitBinop(this, node, kLa64Dadd, true, kLa64Dadd); ++} ++ ++void InstructionSelector::VisitInt32Sub(Node* node) { ++ VisitBinop(this, node, kLa64Sub); ++} ++ ++void InstructionSelector::VisitInt64Sub(Node* node) { ++ VisitBinop(this, node, kLa64Dsub); ++} ++ ++void InstructionSelector::VisitInt32Mul(Node* node) { ++ La64OperandGenerator g(this); ++ Int32BinopMatcher m(node); ++ if (m.right().HasValue() && m.right().Value() > 0) { ++ uint32_t value = static_cast(m.right().Value()); ++ if (base::bits::IsPowerOfTwo(value)) { ++ Emit(kLa64Shl | AddressingModeField::encode(kMode_None), ++ g.DefineAsRegister(node), g.UseRegister(m.left().node()), ++ g.TempImmediate(base::bits::WhichPowerOfTwo(value))); ++ return; ++ } ++ if (base::bits::IsPowerOfTwo(value - 1) && /*kArchVariant == kLa64r6 &&*/ ++ value - 1 > 0 && value - 1 <= 31) { ++ Emit(kLa64Lsa, g.DefineAsRegister(node), g.UseRegister(m.left().node()), ++ g.UseRegister(m.left().node()), ++ g.TempImmediate(base::bits::WhichPowerOfTwo(value - 1))); ++ return; ++ } ++ if (base::bits::IsPowerOfTwo(value + 1)) { ++ InstructionOperand temp = g.TempRegister(); ++ Emit(kLa64Shl | AddressingModeField::encode(kMode_None), temp, ++ g.UseRegister(m.left().node()), ++ g.TempImmediate(base::bits::WhichPowerOfTwo(value + 1))); ++ Emit(kLa64Sub | AddressingModeField::encode(kMode_None), ++ g.DefineAsRegister(node), temp, g.UseRegister(m.left().node())); ++ return; ++ } ++ } ++ Node* left = node->InputAt(0); ++ Node* right = node->InputAt(1); ++ if (CanCover(node, left) && CanCover(node, right)) { ++ if (left->opcode() == IrOpcode::kWord64Sar && ++ right->opcode() == IrOpcode::kWord64Sar) { ++ Int64BinopMatcher leftInput(left), rightInput(right); ++ if (leftInput.right().Is(32) && rightInput.right().Is(32)) { ++ // Combine untagging shifts with Dmul high. ++ Emit(kLa64DMulHigh, g.DefineSameAsFirst(node), ++ g.UseRegister(leftInput.left().node()), ++ g.UseRegister(rightInput.left().node())); ++ return; ++ } ++ } ++ } ++ VisitRRR(this, kLa64Mul, node); ++} ++ ++void InstructionSelector::VisitInt32MulHigh(Node* node) { ++ VisitRRR(this, kLa64MulHigh, node); ++} ++ ++void InstructionSelector::VisitUint32MulHigh(Node* node) { ++ VisitRRR(this, kLa64MulHighU, node); ++} ++ ++void InstructionSelector::VisitInt64Mul(Node* node) { ++ La64OperandGenerator g(this); ++ Int64BinopMatcher m(node); ++ // TODO(dusmil): Add optimization for shifts larger than 32. ++ if (m.right().HasValue() && m.right().Value() > 0) { ++ uint32_t value = static_cast(m.right().Value()); ++ if (base::bits::IsPowerOfTwo(value)) { ++ Emit(kLa64Dshl | AddressingModeField::encode(kMode_None), ++ g.DefineAsRegister(node), g.UseRegister(m.left().node()), ++ g.TempImmediate(base::bits::WhichPowerOfTwo(value))); ++ return; ++ } ++ if (base::bits::IsPowerOfTwo(value - 1) && /*kArchVariant == kLa64r6 &&*/ ++ value - 1 > 0 && value - 1 <= 31) { ++ // Dlsa macro will handle the shifting value out of bound cases. ++ Emit(kLa64Dlsa, g.DefineAsRegister(node), g.UseRegister(m.left().node()), ++ g.UseRegister(m.left().node()), ++ g.TempImmediate(base::bits::WhichPowerOfTwo(value - 1))); ++ return; ++ } ++ if (base::bits::IsPowerOfTwo(value + 1)) { ++ InstructionOperand temp = g.TempRegister(); ++ Emit(kLa64Dshl | AddressingModeField::encode(kMode_None), temp, ++ g.UseRegister(m.left().node()), ++ g.TempImmediate(base::bits::WhichPowerOfTwo(value + 1))); ++ Emit(kLa64Dsub | AddressingModeField::encode(kMode_None), ++ g.DefineAsRegister(node), temp, g.UseRegister(m.left().node())); ++ return; ++ } ++ } ++ Emit(kLa64Dmul, g.DefineAsRegister(node), g.UseRegister(m.left().node()), ++ g.UseRegister(m.right().node())); ++} ++ ++void InstructionSelector::VisitInt32Div(Node* node) { ++ La64OperandGenerator g(this); ++ Int32BinopMatcher m(node); ++ Node* left = node->InputAt(0); ++ Node* right = node->InputAt(1); ++ if (CanCover(node, left) && CanCover(node, right)) { ++ if (left->opcode() == IrOpcode::kWord64Sar && ++ right->opcode() == IrOpcode::kWord64Sar) { ++ Int64BinopMatcher rightInput(right), leftInput(left); ++ if (rightInput.right().Is(32) && leftInput.right().Is(32)) { ++ // Combine both shifted operands with Ddiv. ++ Emit(kLa64Ddiv, g.DefineSameAsFirst(node), ++ g.UseRegister(leftInput.left().node()), ++ g.UseRegister(rightInput.left().node())); ++ return; ++ } ++ } ++ } ++ Emit(kLa64Div, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()), ++ g.UseRegister(m.right().node())); ++} ++ ++void InstructionSelector::VisitUint32Div(Node* node) { ++ La64OperandGenerator g(this); ++ Int32BinopMatcher m(node); ++ Emit(kLa64DivU, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()), ++ g.UseRegister(m.right().node())); ++} ++ ++void InstructionSelector::VisitInt32Mod(Node* node) { ++ La64OperandGenerator g(this); ++ Int32BinopMatcher m(node); ++ Node* left = node->InputAt(0); ++ Node* right = node->InputAt(1); ++ if (CanCover(node, left) && CanCover(node, right)) { ++ if (left->opcode() == IrOpcode::kWord64Sar && ++ right->opcode() == IrOpcode::kWord64Sar) { ++ Int64BinopMatcher rightInput(right), leftInput(left); ++ if (rightInput.right().Is(32) && leftInput.right().Is(32)) { ++ // Combine both shifted operands with Dmod. ++ Emit(kLa64Dmod, g.DefineSameAsFirst(node), ++ g.UseRegister(leftInput.left().node()), ++ g.UseRegister(rightInput.left().node())); ++ return; ++ } ++ } ++ } ++ Emit(kLa64Mod, g.DefineAsRegister(node), g.UseRegister(m.left().node()), ++ g.UseRegister(m.right().node())); ++} ++ ++void InstructionSelector::VisitUint32Mod(Node* node) { ++ La64OperandGenerator g(this); ++ Int32BinopMatcher m(node); ++ Emit(kLa64ModU, g.DefineAsRegister(node), g.UseRegister(m.left().node()), ++ g.UseRegister(m.right().node())); ++} ++ ++void InstructionSelector::VisitInt64Div(Node* node) { ++ La64OperandGenerator g(this); ++ Int64BinopMatcher m(node); ++ Emit(kLa64Ddiv, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()), ++ g.UseRegister(m.right().node())); ++} ++ ++void InstructionSelector::VisitUint64Div(Node* node) { ++ La64OperandGenerator g(this); ++ Int64BinopMatcher m(node); ++ Emit(kLa64DdivU, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()), ++ g.UseRegister(m.right().node())); ++} ++ ++void InstructionSelector::VisitInt64Mod(Node* node) { ++ La64OperandGenerator g(this); ++ Int64BinopMatcher m(node); ++ Emit(kLa64Dmod, g.DefineAsRegister(node), g.UseRegister(m.left().node()), ++ g.UseRegister(m.right().node())); ++} ++ ++void InstructionSelector::VisitUint64Mod(Node* node) { ++ La64OperandGenerator g(this); ++ Int64BinopMatcher m(node); ++ Emit(kLa64DmodU, g.DefineAsRegister(node), g.UseRegister(m.left().node()), ++ g.UseRegister(m.right().node())); ++} ++ ++void InstructionSelector::VisitChangeFloat32ToFloat64(Node* node) { ++ VisitRR(this, kLa64CvtDS, node); ++} ++ ++void InstructionSelector::VisitRoundInt32ToFloat32(Node* node) { ++ VisitRR(this, kLa64CvtSW, node); ++} ++ ++void InstructionSelector::VisitRoundUint32ToFloat32(Node* node) { ++ VisitRR(this, kLa64CvtSUw, node); ++} ++ ++void InstructionSelector::VisitChangeInt32ToFloat64(Node* node) { ++ VisitRR(this, kLa64CvtDW, node); ++} ++ ++void InstructionSelector::VisitChangeInt64ToFloat64(Node* node) { ++ VisitRR(this, kLa64CvtDL, node); ++} ++ ++void InstructionSelector::VisitChangeUint32ToFloat64(Node* node) { ++ VisitRR(this, kLa64CvtDUw, node); ++} ++ ++void InstructionSelector::VisitTruncateFloat32ToInt32(Node* node) { ++ VisitRR(this, kLa64TruncWS, node); ++} ++ ++void InstructionSelector::VisitTruncateFloat32ToUint32(Node* node) { ++ VisitRR(this, kLa64TruncUwS, node); ++} ++ ++void InstructionSelector::VisitChangeFloat64ToInt32(Node* node) { ++ La64OperandGenerator g(this); ++ Node* value = node->InputAt(0); ++ // Match ChangeFloat64ToInt32(Float64Round##OP) to corresponding instruction ++ // which does rounding and conversion to integer format. ++ if (CanCover(node, value)) { ++ switch (value->opcode()) { ++ case IrOpcode::kFloat64RoundDown: ++ Emit(kLa64FloorWD, g.DefineAsRegister(node), ++ g.UseRegister(value->InputAt(0))); ++ return; ++ case IrOpcode::kFloat64RoundUp: ++ Emit(kLa64CeilWD, g.DefineAsRegister(node), ++ g.UseRegister(value->InputAt(0))); ++ return; ++ case IrOpcode::kFloat64RoundTiesEven: ++ Emit(kLa64RoundWD, g.DefineAsRegister(node), ++ g.UseRegister(value->InputAt(0))); ++ return; ++ case IrOpcode::kFloat64RoundTruncate: ++ Emit(kLa64TruncWD, g.DefineAsRegister(node), ++ g.UseRegister(value->InputAt(0))); ++ return; ++ default: ++ break; ++ } ++ if (value->opcode() == IrOpcode::kChangeFloat32ToFloat64) { ++ Node* next = value->InputAt(0); ++ if (CanCover(value, next)) { ++ // Match ChangeFloat64ToInt32(ChangeFloat32ToFloat64(Float64Round##OP)) ++ switch (next->opcode()) { ++ case IrOpcode::kFloat32RoundDown: ++ Emit(kLa64FloorWS, g.DefineAsRegister(node), ++ g.UseRegister(next->InputAt(0))); ++ return; ++ case IrOpcode::kFloat32RoundUp: ++ Emit(kLa64CeilWS, g.DefineAsRegister(node), ++ g.UseRegister(next->InputAt(0))); ++ return; ++ case IrOpcode::kFloat32RoundTiesEven: ++ Emit(kLa64RoundWS, g.DefineAsRegister(node), ++ g.UseRegister(next->InputAt(0))); ++ return; ++ case IrOpcode::kFloat32RoundTruncate: ++ Emit(kLa64TruncWS, g.DefineAsRegister(node), ++ g.UseRegister(next->InputAt(0))); ++ return; ++ default: ++ Emit(kLa64TruncWS, g.DefineAsRegister(node), ++ g.UseRegister(value->InputAt(0))); ++ return; ++ } ++ } else { ++ // Match float32 -> float64 -> int32 representation change path. ++ Emit(kLa64TruncWS, g.DefineAsRegister(node), ++ g.UseRegister(value->InputAt(0))); ++ return; ++ } ++ } ++ } ++ VisitRR(this, kLa64TruncWD, node); ++} ++ ++void InstructionSelector::VisitChangeFloat64ToInt64(Node* node) { ++ VisitRR(this, kLa64TruncLD, node); ++} ++ ++void InstructionSelector::VisitChangeFloat64ToUint32(Node* node) { ++ VisitRR(this, kLa64TruncUwD, node); ++} ++ ++void InstructionSelector::VisitChangeFloat64ToUint64(Node* node) { ++ VisitRR(this, kLa64TruncUlD, node); ++} ++ ++void InstructionSelector::VisitTruncateFloat64ToUint32(Node* node) { ++ VisitRR(this, kLa64TruncUwD, node); ++} ++ ++void InstructionSelector::VisitTruncateFloat64ToInt64(Node* node) { ++ VisitRR(this, kLa64TruncLD, node); ++} ++ ++void InstructionSelector::VisitTryTruncateFloat32ToInt64(Node* node) { ++ La64OperandGenerator g(this); ++ InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))}; ++ InstructionOperand outputs[2]; ++ size_t output_count = 0; ++ outputs[output_count++] = g.DefineAsRegister(node); ++ ++ Node* success_output = NodeProperties::FindProjection(node, 1); ++ if (success_output) { ++ outputs[output_count++] = g.DefineAsRegister(success_output); ++ } ++ ++ this->Emit(kLa64TruncLS, output_count, outputs, 1, inputs); ++} ++ ++void InstructionSelector::VisitTryTruncateFloat64ToInt64(Node* node) { ++ La64OperandGenerator g(this); ++ InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))}; ++ InstructionOperand outputs[2]; ++ size_t output_count = 0; ++ outputs[output_count++] = g.DefineAsRegister(node); ++ ++ Node* success_output = NodeProperties::FindProjection(node, 1); ++ if (success_output) { ++ outputs[output_count++] = g.DefineAsRegister(success_output); ++ } ++ ++ Emit(kLa64TruncLD, output_count, outputs, 1, inputs); ++} ++ ++void InstructionSelector::VisitTryTruncateFloat32ToUint64(Node* node) { ++ La64OperandGenerator g(this); ++ InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))}; ++ InstructionOperand outputs[2]; ++ size_t output_count = 0; ++ outputs[output_count++] = g.DefineAsRegister(node); ++ ++ Node* success_output = NodeProperties::FindProjection(node, 1); ++ if (success_output) { ++ outputs[output_count++] = g.DefineAsRegister(success_output); ++ } ++ ++ Emit(kLa64TruncUlS, output_count, outputs, 1, inputs); ++} ++ ++void InstructionSelector::VisitTryTruncateFloat64ToUint64(Node* node) { ++ La64OperandGenerator g(this); ++ ++ InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))}; ++ InstructionOperand outputs[2]; ++ size_t output_count = 0; ++ outputs[output_count++] = g.DefineAsRegister(node); ++ ++ Node* success_output = NodeProperties::FindProjection(node, 1); ++ if (success_output) { ++ outputs[output_count++] = g.DefineAsRegister(success_output); ++ } ++ ++ Emit(kLa64TruncUlD, output_count, outputs, 1, inputs); ++} ++ ++void InstructionSelector::VisitBitcastWord32ToWord64(Node* node) { ++ UNIMPLEMENTED(); ++} ++ ++void InstructionSelector::VisitChangeInt32ToInt64(Node* node) { ++ Node* value = node->InputAt(0); ++ if (value->opcode() == IrOpcode::kLoad && CanCover(node, value)) { ++ // Generate sign-extending load. ++ LoadRepresentation load_rep = LoadRepresentationOf(value->op()); ++ InstructionCode opcode = kArchNop; ++ switch (load_rep.representation()) { ++ case MachineRepresentation::kBit: // Fall through. ++ case MachineRepresentation::kWord8: ++ opcode = load_rep.IsUnsigned() ? kLa64Lbu : kLa64Lb; ++ break; ++ case MachineRepresentation::kWord16: ++ opcode = load_rep.IsUnsigned() ? kLa64Lhu : kLa64Lh; ++ break; ++ case MachineRepresentation::kWord32: ++ opcode = kLa64Lw; ++ break; ++ default: ++ UNREACHABLE(); ++ return; ++ } ++ EmitLoad(this, value, opcode, node); ++ } else { ++ La64OperandGenerator g(this); ++ Emit(kLa64Shl, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)), ++ g.TempImmediate(0)); ++ } ++} ++ ++void InstructionSelector::VisitChangeUint32ToUint64(Node* node) { ++ La64OperandGenerator g(this); ++ Node* value = node->InputAt(0); ++ switch (value->opcode()) { ++ // 32-bit operations will write their result in a 64 bit register, ++ // clearing the top 32 bits of the destination register. ++ case IrOpcode::kUint32Div: ++ case IrOpcode::kUint32Mod: ++ case IrOpcode::kUint32MulHigh: { ++ Emit(kArchNop, g.DefineSameAsFirst(node), g.Use(value)); ++ return; ++ } ++ case IrOpcode::kLoad: { ++ LoadRepresentation load_rep = LoadRepresentationOf(value->op()); ++ if (load_rep.IsUnsigned()) { ++ switch (load_rep.representation()) { ++ case MachineRepresentation::kWord8: ++ case MachineRepresentation::kWord16: ++ case MachineRepresentation::kWord32: ++ Emit(kArchNop, g.DefineSameAsFirst(node), g.Use(value)); ++ return; ++ default: ++ break; ++ } ++ } ++ break; ++ } ++ default: ++ break; ++ } ++ Emit(kLa64Dext, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)), ++ g.TempImmediate(0), g.TempImmediate(32)); ++} ++ ++void InstructionSelector::VisitTruncateInt64ToInt32(Node* node) { ++ La64OperandGenerator g(this); ++ Node* value = node->InputAt(0); ++ if (CanCover(node, value)) { ++ switch (value->opcode()) { ++ case IrOpcode::kWord64Sar: { ++ if (CanCoverTransitively(node, value, value->InputAt(0)) && ++ TryEmitExtendingLoad(this, value, node)) { ++ return; ++ } else { ++ Int64BinopMatcher m(value); ++ if (m.right().IsInRange(32, 63)) { ++ // After smi untagging no need for truncate. Combine sequence. ++ Emit(kLa64Dsar, g.DefineSameAsFirst(node), ++ g.UseRegister(m.left().node()), ++ g.UseImmediate(m.right().node())); ++ return; ++ } ++ } ++ break; ++ } ++ default: ++ break; ++ } ++ } ++ Emit(kLa64Ext, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)), ++ g.TempImmediate(0), g.TempImmediate(32)); ++} ++ ++void InstructionSelector::VisitTruncateFloat64ToFloat32(Node* node) { ++ La64OperandGenerator g(this); ++ Node* value = node->InputAt(0); ++ // Match TruncateFloat64ToFloat32(ChangeInt32ToFloat64) to corresponding ++ // instruction. ++ if (CanCover(node, value) && ++ value->opcode() == IrOpcode::kChangeInt32ToFloat64) { ++ Emit(kLa64CvtSW, g.DefineAsRegister(node), ++ g.UseRegister(value->InputAt(0))); ++ return; ++ } ++ VisitRR(this, kLa64CvtSD, node); ++} ++ ++void InstructionSelector::VisitTruncateFloat64ToWord32(Node* node) { ++ VisitRR(this, kArchTruncateDoubleToI, node); ++} ++ ++void InstructionSelector::VisitRoundFloat64ToInt32(Node* node) { ++ VisitRR(this, kLa64TruncWD, node); ++} ++ ++void InstructionSelector::VisitRoundInt64ToFloat32(Node* node) { ++ VisitRR(this, kLa64CvtSL, node); ++} ++ ++void InstructionSelector::VisitRoundInt64ToFloat64(Node* node) { ++ VisitRR(this, kLa64CvtDL, node); ++} ++ ++void InstructionSelector::VisitRoundUint64ToFloat32(Node* node) { ++ VisitRR(this, kLa64CvtSUl, node); ++} ++ ++void InstructionSelector::VisitRoundUint64ToFloat64(Node* node) { ++ VisitRR(this, kLa64CvtDUl, node); ++} ++ ++void InstructionSelector::VisitBitcastFloat32ToInt32(Node* node) { ++ VisitRR(this, kLa64Float64ExtractLowWord32, node); ++} ++ ++void InstructionSelector::VisitBitcastFloat64ToInt64(Node* node) { ++ VisitRR(this, kLa64BitcastDL, node); ++} ++ ++void InstructionSelector::VisitBitcastInt32ToFloat32(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kLa64Float64InsertLowWord32, g.DefineAsRegister(node), ++ ImmediateOperand(ImmediateOperand::INLINE, 0), ++ g.UseRegister(node->InputAt(0))); ++} ++ ++void InstructionSelector::VisitBitcastInt64ToFloat64(Node* node) { ++ VisitRR(this, kLa64BitcastLD, node); ++} ++ ++void InstructionSelector::VisitFloat32Add(Node* node) { ++ // Optimization with Madd.S(z, x, y) is intentionally removed. ++ // See explanation for madd_s in assembler-la64.cc. ++ VisitRRR(this, kLa64AddS, node); ++} ++ ++void InstructionSelector::VisitFloat64Add(Node* node) { ++ // Optimization with Madd.D(z, x, y) is intentionally removed. ++ // See explanation for madd_d in assembler-la64.cc. ++ VisitRRR(this, kLa64AddD, node); ++} ++ ++void InstructionSelector::VisitFloat32Sub(Node* node) { ++ // Optimization with Msub.S(z, x, y) is intentionally removed. ++ // See explanation for madd_s in assembler-la64.cc. ++ VisitRRR(this, kLa64SubS, node); ++} ++ ++void InstructionSelector::VisitFloat64Sub(Node* node) { ++ // Optimization with Msub.D(z, x, y) is intentionally removed. ++ // See explanation for madd_d in assembler-la64.cc. ++ VisitRRR(this, kLa64SubD, node); ++} ++ ++void InstructionSelector::VisitFloat32Mul(Node* node) { ++ VisitRRR(this, kLa64MulS, node); ++} ++ ++void InstructionSelector::VisitFloat64Mul(Node* node) { ++ VisitRRR(this, kLa64MulD, node); ++} ++ ++void InstructionSelector::VisitFloat32Div(Node* node) { ++ VisitRRR(this, kLa64DivS, node); ++} ++ ++void InstructionSelector::VisitFloat64Div(Node* node) { ++ VisitRRR(this, kLa64DivD, node); ++} ++ ++void InstructionSelector::VisitFloat64Mod(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kLa64ModD, g.DefineAsFixed(node, f0), g.UseFixed(node->InputAt(0), f0), ++ g.UseFixed(node->InputAt(1), f1)) ++ ->MarkAsCall(); ++} ++ ++void InstructionSelector::VisitFloat32Max(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kLa64Float32Max, g.DefineAsRegister(node), ++ g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); ++} ++ ++void InstructionSelector::VisitFloat64Max(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kLa64Float64Max, g.DefineAsRegister(node), ++ g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); ++} ++ ++void InstructionSelector::VisitFloat32Min(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kLa64Float32Min, g.DefineAsRegister(node), ++ g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); ++} ++ ++void InstructionSelector::VisitFloat64Min(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kLa64Float64Min, g.DefineAsRegister(node), ++ g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); ++} ++ ++void InstructionSelector::VisitFloat32Abs(Node* node) { ++ VisitRR(this, kLa64AbsS, node); ++} ++ ++void InstructionSelector::VisitFloat64Abs(Node* node) { ++ VisitRR(this, kLa64AbsD, node); ++} ++ ++void InstructionSelector::VisitFloat32Sqrt(Node* node) { ++ VisitRR(this, kLa64SqrtS, node); ++} ++ ++void InstructionSelector::VisitFloat64Sqrt(Node* node) { ++ VisitRR(this, kLa64SqrtD, node); ++} ++ ++void InstructionSelector::VisitFloat32RoundDown(Node* node) { ++ VisitRR(this, kLa64Float32RoundDown, node); ++} ++ ++void InstructionSelector::VisitFloat64RoundDown(Node* node) { ++ VisitRR(this, kLa64Float64RoundDown, node); ++} ++ ++void InstructionSelector::VisitFloat32RoundUp(Node* node) { ++ VisitRR(this, kLa64Float32RoundUp, node); ++} ++ ++void InstructionSelector::VisitFloat64RoundUp(Node* node) { ++ VisitRR(this, kLa64Float64RoundUp, node); ++} ++ ++void InstructionSelector::VisitFloat32RoundTruncate(Node* node) { ++ VisitRR(this, kLa64Float32RoundTruncate, node); ++} ++ ++void InstructionSelector::VisitFloat64RoundTruncate(Node* node) { ++ VisitRR(this, kLa64Float64RoundTruncate, node); ++} ++ ++void InstructionSelector::VisitFloat64RoundTiesAway(Node* node) { ++ UNREACHABLE(); ++} ++ ++void InstructionSelector::VisitFloat32RoundTiesEven(Node* node) { ++ VisitRR(this, kLa64Float32RoundTiesEven, node); ++} ++ ++void InstructionSelector::VisitFloat64RoundTiesEven(Node* node) { ++ VisitRR(this, kLa64Float64RoundTiesEven, node); ++} ++ ++void InstructionSelector::VisitFloat32Neg(Node* node) { ++ VisitRR(this, kLa64NegS, node); ++} ++ ++void InstructionSelector::VisitFloat64Neg(Node* node) { ++ VisitRR(this, kLa64NegD, node); ++} ++ ++void InstructionSelector::VisitFloat64Ieee754Binop(Node* node, ++ InstructionCode opcode) { ++ La64OperandGenerator g(this); ++ Emit(opcode, g.DefineAsFixed(node, f0), g.UseFixed(node->InputAt(0), f2), ++ g.UseFixed(node->InputAt(1), f4)) ++ ->MarkAsCall(); ++} ++ ++void InstructionSelector::VisitFloat64Ieee754Unop(Node* node, ++ InstructionCode opcode) { ++ La64OperandGenerator g(this); ++ Emit(opcode, g.DefineAsFixed(node, f0), g.UseFixed(node->InputAt(0), f0)) ++ ->MarkAsCall(); ++} ++ ++void InstructionSelector::EmitPrepareArguments( ++ ZoneVector* arguments, const CallDescriptor* call_descriptor, ++ Node* node) { ++ La64OperandGenerator g(this); ++ ++ // Prepare for C function call. ++ if (call_descriptor->IsCFunctionCall()) { ++ Emit(kArchPrepareCallCFunction | MiscField::encode(static_cast( ++ call_descriptor->ParameterCount())), ++ 0, nullptr, 0, nullptr); ++ ++ // Poke any stack arguments. ++ int slot = kCArgSlotCount; ++ for (PushParameter input : (*arguments)) { ++ Emit(kLa64StoreToStackSlot, g.NoOutput(), g.UseRegister(input.node), ++ g.TempImmediate(slot << kSystemPointerSizeLog2)); ++ ++slot; ++ } ++ } else { ++ int push_count = static_cast(call_descriptor->StackParameterCount()); ++ if (push_count > 0) { ++ // Calculate needed space ++ int stack_size = 0; ++ for (PushParameter input : (*arguments)) { ++ if (input.node) { ++ stack_size += input.location.GetSizeInPointers(); ++ } ++ } ++ Emit(kLa64StackClaim, g.NoOutput(), ++ g.TempImmediate(stack_size << kSystemPointerSizeLog2)); ++ } ++ for (size_t n = 0; n < arguments->size(); ++n) { ++ PushParameter input = (*arguments)[n]; ++ if (input.node) { ++ Emit(kLa64StoreToStackSlot, g.NoOutput(), g.UseRegister(input.node), ++ g.TempImmediate(static_cast(n << kSystemPointerSizeLog2))); ++ } ++ } ++ } ++} ++ ++void InstructionSelector::EmitPrepareResults( ++ ZoneVector* results, const CallDescriptor* call_descriptor, ++ Node* node) { ++ La64OperandGenerator g(this); ++ ++ int reverse_slot = 0; ++ for (PushParameter output : *results) { ++ if (!output.location.IsCallerFrameSlot()) continue; ++ // Skip any alignment holes in nodes. ++ if (output.node != nullptr) { ++ DCHECK(!call_descriptor->IsCFunctionCall()); ++ if (output.location.GetType() == MachineType::Float32()) { ++ MarkAsFloat32(output.node); ++ } else if (output.location.GetType() == MachineType::Float64()) { ++ MarkAsFloat64(output.node); ++ } ++ Emit(kLa64Peek, g.DefineAsRegister(output.node), ++ g.UseImmediate(reverse_slot)); ++ } ++ reverse_slot += output.location.GetSizeInPointers(); ++ } ++} ++ ++bool InstructionSelector::IsTailCallAddressImmediate() { return false; } ++ ++int InstructionSelector::GetTempsCountForTailCallFromJSFunction() { return 3; } ++ ++void InstructionSelector::VisitUnalignedLoad(Node* node) { ++ LoadRepresentation load_rep = LoadRepresentationOf(node->op()); ++ La64OperandGenerator g(this); ++ Node* base = node->InputAt(0); ++ Node* index = node->InputAt(1); ++ ++ ArchOpcode opcode = kArchNop; ++ switch (load_rep.representation()) { ++ case MachineRepresentation::kFloat32: ++ opcode = kLa64Ulwc1; ++ break; ++ case MachineRepresentation::kFloat64: ++ opcode = kLa64Uldc1; ++ break; ++ case MachineRepresentation::kBit: // Fall through. ++ case MachineRepresentation::kWord8: ++ UNREACHABLE(); ++ case MachineRepresentation::kWord16: ++ opcode = load_rep.IsUnsigned() ? kLa64Ulhu : kLa64Ulh; ++ break; ++ case MachineRepresentation::kWord32: ++ opcode = load_rep.IsUnsigned() ? kLa64Ulwu : kLa64Ulw; ++ break; ++ case MachineRepresentation::kTaggedSigned: // Fall through. ++ case MachineRepresentation::kTaggedPointer: // Fall through. ++ case MachineRepresentation::kTagged: // Fall through. ++ case MachineRepresentation::kWord64: ++ opcode = kLa64Uld; ++ break; ++ case MachineRepresentation::kCompressedPointer: // Fall through. ++ case MachineRepresentation::kCompressed: // Fall through. ++ case MachineRepresentation::kNone: ++ case MachineRepresentation::kSimd128: ++ UNREACHABLE(); ++ } ++ ++ if (g.CanBeImmediate(index, opcode)) { ++ Emit(opcode | AddressingModeField::encode(kMode_MRI), ++ g.DefineAsRegister(node), g.UseRegister(base), g.UseImmediate(index)); ++ } else { ++ InstructionOperand addr_reg = g.TempRegister(); ++ Emit(kLa64Dadd | AddressingModeField::encode(kMode_None), addr_reg, ++ g.UseRegister(index), g.UseRegister(base)); ++ // Emit desired load opcode, using temp addr_reg. ++ Emit(opcode | AddressingModeField::encode(kMode_MRI), ++ g.DefineAsRegister(node), addr_reg, g.TempImmediate(0)); ++ } ++} ++ ++void InstructionSelector::VisitUnalignedStore(Node* node) { ++ La64OperandGenerator g(this); ++ Node* base = node->InputAt(0); ++ Node* index = node->InputAt(1); ++ Node* value = node->InputAt(2); ++ ++ UnalignedStoreRepresentation rep = UnalignedStoreRepresentationOf(node->op()); ++ ArchOpcode opcode = kArchNop; ++ switch (rep) { ++ case MachineRepresentation::kFloat32: ++ opcode = kLa64Uswc1; ++ break; ++ case MachineRepresentation::kFloat64: ++ opcode = kLa64Usdc1; ++ break; ++ case MachineRepresentation::kBit: // Fall through. ++ case MachineRepresentation::kWord8: ++ UNREACHABLE(); ++ case MachineRepresentation::kWord16: ++ opcode = kLa64Ush; ++ break; ++ case MachineRepresentation::kWord32: ++ opcode = kLa64Usw; ++ break; ++ case MachineRepresentation::kTaggedSigned: // Fall through. ++ case MachineRepresentation::kTaggedPointer: // Fall through. ++ case MachineRepresentation::kTagged: // Fall through. ++ case MachineRepresentation::kWord64: ++ opcode = kLa64Usd; ++ break; ++ case MachineRepresentation::kCompressedPointer: // Fall through. ++ case MachineRepresentation::kCompressed: // Fall through. ++ case MachineRepresentation::kNone: ++ case MachineRepresentation::kSimd128: ++ UNREACHABLE(); ++ } ++ ++ if (g.CanBeImmediate(index, opcode)) { ++ Emit(opcode | AddressingModeField::encode(kMode_MRI), g.NoOutput(), ++ g.UseRegister(base), g.UseImmediate(index), ++ g.UseRegisterOrImmediateZero(value)); ++ } else { ++ InstructionOperand addr_reg = g.TempRegister(); ++ Emit(kLa64Dadd | AddressingModeField::encode(kMode_None), addr_reg, ++ g.UseRegister(index), g.UseRegister(base)); ++ // Emit desired store opcode, using temp addr_reg. ++ Emit(opcode | AddressingModeField::encode(kMode_MRI), g.NoOutput(), ++ addr_reg, g.TempImmediate(0), g.UseRegisterOrImmediateZero(value)); ++ } ++} ++ ++namespace { ++ ++// Shared routine for multiple compare operations. ++static void VisitCompare(InstructionSelector* selector, InstructionCode opcode, ++ InstructionOperand left, InstructionOperand right, ++ FlagsContinuation* cont) { ++ selector->EmitWithContinuation(opcode, left, right, cont); ++} ++ ++// Shared routine for multiple float32 compare operations. ++void VisitFloat32Compare(InstructionSelector* selector, Node* node, ++ FlagsContinuation* cont) { ++ La64OperandGenerator g(selector); ++ Float32BinopMatcher m(node); ++ InstructionOperand lhs, rhs; ++ ++ lhs = m.left().IsZero() ? g.UseImmediate(m.left().node()) ++ : g.UseRegister(m.left().node()); ++ rhs = m.right().IsZero() ? g.UseImmediate(m.right().node()) ++ : g.UseRegister(m.right().node()); ++ VisitCompare(selector, kLa64CmpS, lhs, rhs, cont); ++} ++ ++// Shared routine for multiple float64 compare operations. ++void VisitFloat64Compare(InstructionSelector* selector, Node* node, ++ FlagsContinuation* cont) { ++ La64OperandGenerator g(selector); ++ Float64BinopMatcher m(node); ++ InstructionOperand lhs, rhs; ++ ++ lhs = m.left().IsZero() ? g.UseImmediate(m.left().node()) ++ : g.UseRegister(m.left().node()); ++ rhs = m.right().IsZero() ? g.UseImmediate(m.right().node()) ++ : g.UseRegister(m.right().node()); ++ VisitCompare(selector, kLa64CmpD, lhs, rhs, cont); ++} ++ ++// Shared routine for multiple word compare operations. ++void VisitWordCompare(InstructionSelector* selector, Node* node, ++ InstructionCode opcode, FlagsContinuation* cont, ++ bool commutative) { ++ La64OperandGenerator g(selector); ++ Node* left = node->InputAt(0); ++ Node* right = node->InputAt(1); ++ ++ // Match immediates on left or right side of comparison. ++ if (g.CanBeImmediate(right, opcode)) { ++ if (opcode == kLa64Tst) { ++ VisitCompare(selector, opcode, g.UseRegister(left), g.UseImmediate(right), ++ cont); ++ } else { ++ switch (cont->condition()) { ++ case kEqual: ++ case kNotEqual: ++ if (cont->IsSet()) { ++ VisitCompare(selector, opcode, g.UseRegister(left), ++ g.UseImmediate(right), cont); ++ } else { ++ VisitCompare(selector, opcode, g.UseRegister(left), ++ g.UseRegister(right), cont); ++ } ++ break; ++ case kSignedLessThan: ++ case kSignedGreaterThanOrEqual: ++ case kUnsignedLessThan: ++ case kUnsignedGreaterThanOrEqual: ++ VisitCompare(selector, opcode, g.UseRegister(left), ++ g.UseImmediate(right), cont); ++ break; ++ default: ++ VisitCompare(selector, opcode, g.UseRegister(left), ++ g.UseRegister(right), cont); ++ } ++ } ++ } else if (g.CanBeImmediate(left, opcode)) { ++ if (!commutative) cont->Commute(); ++ if (opcode == kLa64Tst) { ++ VisitCompare(selector, opcode, g.UseRegister(right), g.UseImmediate(left), ++ cont); ++ } else { ++ switch (cont->condition()) { ++ case kEqual: ++ case kNotEqual: ++ if (cont->IsSet()) { ++ VisitCompare(selector, opcode, g.UseRegister(right), ++ g.UseImmediate(left), cont); ++ } else { ++ VisitCompare(selector, opcode, g.UseRegister(right), ++ g.UseRegister(left), cont); ++ } ++ break; ++ case kSignedLessThan: ++ case kSignedGreaterThanOrEqual: ++ case kUnsignedLessThan: ++ case kUnsignedGreaterThanOrEqual: ++ VisitCompare(selector, opcode, g.UseRegister(right), ++ g.UseImmediate(left), cont); ++ break; ++ default: ++ VisitCompare(selector, opcode, g.UseRegister(right), ++ g.UseRegister(left), cont); ++ } ++ } ++ } else { ++ VisitCompare(selector, opcode, g.UseRegister(left), g.UseRegister(right), ++ cont); ++ } ++} ++ ++bool IsNodeUnsigned(Node* n) { ++ NodeMatcher m(n); ++ ++ if (m.IsLoad() || m.IsUnalignedLoad() || m.IsPoisonedLoad() || ++ m.IsProtectedLoad() || m.IsWord32AtomicLoad() || m.IsWord64AtomicLoad()) { ++ LoadRepresentation load_rep = LoadRepresentationOf(n->op()); ++ return load_rep.IsUnsigned(); ++ } else { ++ return m.IsUint32Div() || m.IsUint32LessThan() || ++ m.IsUint32LessThanOrEqual() || m.IsUint32Mod() || ++ m.IsUint32MulHigh() || m.IsChangeFloat64ToUint32() || ++ m.IsTruncateFloat64ToUint32() || m.IsTruncateFloat32ToUint32(); ++ } ++} ++ ++// Shared routine for multiple word compare operations. ++void VisitFullWord32Compare(InstructionSelector* selector, Node* node, ++ InstructionCode opcode, FlagsContinuation* cont) { ++ La64OperandGenerator g(selector); ++ InstructionOperand leftOp = g.TempRegister(); ++ InstructionOperand rightOp = g.TempRegister(); ++ ++ selector->Emit(kLa64Dshl, leftOp, g.UseRegister(node->InputAt(0)), ++ g.TempImmediate(32)); ++ selector->Emit(kLa64Dshl, rightOp, g.UseRegister(node->InputAt(1)), ++ g.TempImmediate(32)); ++ ++ VisitCompare(selector, opcode, leftOp, rightOp, cont); ++} ++ ++void VisitOptimizedWord32Compare(InstructionSelector* selector, Node* node, ++ InstructionCode opcode, ++ FlagsContinuation* cont) { ++ if (FLAG_debug_code) { ++ La64OperandGenerator g(selector); ++ InstructionOperand leftOp = g.TempRegister(); ++ InstructionOperand rightOp = g.TempRegister(); ++ InstructionOperand optimizedResult = g.TempRegister(); ++ InstructionOperand fullResult = g.TempRegister(); ++ FlagsCondition condition = cont->condition(); ++ InstructionCode testOpcode = opcode | ++ FlagsConditionField::encode(condition) | ++ FlagsModeField::encode(kFlags_set); ++ ++ selector->Emit(testOpcode, optimizedResult, g.UseRegister(node->InputAt(0)), ++ g.UseRegister(node->InputAt(1))); ++ ++ selector->Emit(kLa64Dshl, leftOp, g.UseRegister(node->InputAt(0)), ++ g.TempImmediate(32)); ++ selector->Emit(kLa64Dshl, rightOp, g.UseRegister(node->InputAt(1)), ++ g.TempImmediate(32)); ++ selector->Emit(testOpcode, fullResult, leftOp, rightOp); ++ ++ selector->Emit(kLa64AssertEqual, g.NoOutput(), optimizedResult, fullResult, ++ g.TempImmediate(static_cast( ++ AbortReason::kUnsupportedNonPrimitiveCompare))); ++ } ++ ++ VisitWordCompare(selector, node, opcode, cont, false); ++} ++ ++void VisitWord32Compare(InstructionSelector* selector, Node* node, ++ FlagsContinuation* cont) { ++ // LA64 doesn't support Word32 compare instructions. Instead it relies ++ // that the values in registers are correctly sign-extended and uses ++ // Word64 comparison instead. This behavior is correct in most cases, ++ // but doesn't work when comparing signed with unsigned operands. ++ // We could simulate full Word32 compare in all cases but this would ++ // create an unnecessary overhead since unsigned integers are rarely ++ // used in JavaScript. ++ // The solution proposed here tries to match a comparison of signed ++ // with unsigned operand, and perform full Word32Compare only ++ // in those cases. Unfortunately, the solution is not complete because ++ // it might skip cases where Word32 full compare is needed, so ++ // basically it is a hack. ++ // When call to a host function in simulator, if the function return a ++ // int32 value, the simulator do not sign-extended to int64 because in ++ // simulator we do not know the function whether return a int32 or int64. ++ // so we need do a full word32 compare in this case. ++#ifndef USE_SIMULATOR ++ if (IsNodeUnsigned(node->InputAt(0)) != IsNodeUnsigned(node->InputAt(1))) { ++#else ++ if (IsNodeUnsigned(node->InputAt(0)) != IsNodeUnsigned(node->InputAt(1)) || ++ node->InputAt(0)->opcode() == IrOpcode::kCall || ++ node->InputAt(1)->opcode() == IrOpcode::kCall) { ++#endif ++ VisitFullWord32Compare(selector, node, kLa64Cmp, cont); ++ } else { ++ VisitOptimizedWord32Compare(selector, node, kLa64Cmp, cont); ++ } ++} ++ ++void VisitWord64Compare(InstructionSelector* selector, Node* node, ++ FlagsContinuation* cont) { ++ VisitWordCompare(selector, node, kLa64Cmp, cont, false); ++} ++ ++void EmitWordCompareZero(InstructionSelector* selector, Node* value, ++ FlagsContinuation* cont) { ++ La64OperandGenerator g(selector); ++ selector->EmitWithContinuation(kLa64Cmp, g.UseRegister(value), ++ g.TempImmediate(0), cont); ++} ++ ++void VisitAtomicLoad(InstructionSelector* selector, Node* node, ++ ArchOpcode opcode) { ++ La64OperandGenerator g(selector); ++ Node* base = node->InputAt(0); ++ Node* index = node->InputAt(1); ++ if (g.CanBeImmediate(index, opcode)) { ++ selector->Emit(opcode | AddressingModeField::encode(kMode_MRI), ++ g.DefineAsRegister(node), g.UseRegister(base), ++ g.UseImmediate(index)); ++ } else { ++ InstructionOperand addr_reg = g.TempRegister(); ++ selector->Emit(kLa64Dadd | AddressingModeField::encode(kMode_None), ++ addr_reg, g.UseRegister(index), g.UseRegister(base)); ++ // Emit desired load opcode, using temp addr_reg. ++ selector->Emit(opcode | AddressingModeField::encode(kMode_MRI), ++ g.DefineAsRegister(node), addr_reg, g.TempImmediate(0)); ++ } ++} ++ ++void VisitAtomicStore(InstructionSelector* selector, Node* node, ++ ArchOpcode opcode) { ++ La64OperandGenerator g(selector); ++ Node* base = node->InputAt(0); ++ Node* index = node->InputAt(1); ++ Node* value = node->InputAt(2); ++ ++ if (g.CanBeImmediate(index, opcode)) { ++ selector->Emit(opcode | AddressingModeField::encode(kMode_MRI), ++ g.NoOutput(), g.UseRegister(base), g.UseImmediate(index), ++ g.UseRegisterOrImmediateZero(value)); ++ } else { ++ InstructionOperand addr_reg = g.TempRegister(); ++ selector->Emit(kLa64Dadd | AddressingModeField::encode(kMode_None), ++ addr_reg, g.UseRegister(index), g.UseRegister(base)); ++ // Emit desired store opcode, using temp addr_reg. ++ selector->Emit(opcode | AddressingModeField::encode(kMode_MRI), ++ g.NoOutput(), addr_reg, g.TempImmediate(0), ++ g.UseRegisterOrImmediateZero(value)); ++ } ++} ++ ++void VisitAtomicExchange(InstructionSelector* selector, Node* node, ++ ArchOpcode opcode) { ++ La64OperandGenerator g(selector); ++ Node* base = node->InputAt(0); ++ Node* index = node->InputAt(1); ++ Node* value = node->InputAt(2); ++ ++ AddressingMode addressing_mode = kMode_MRI; ++ InstructionOperand inputs[3]; ++ size_t input_count = 0; ++ inputs[input_count++] = g.UseUniqueRegister(base); ++ inputs[input_count++] = g.UseUniqueRegister(index); ++ inputs[input_count++] = g.UseUniqueRegister(value); ++ InstructionOperand outputs[1]; ++ outputs[0] = g.UseUniqueRegister(node); ++ InstructionOperand temp[3]; ++ temp[0] = g.TempRegister(); ++ temp[1] = g.TempRegister(); ++ temp[2] = g.TempRegister(); ++ InstructionCode code = opcode | AddressingModeField::encode(addressing_mode); ++ selector->Emit(code, 1, outputs, input_count, inputs, 3, temp); ++} ++ ++void VisitAtomicCompareExchange(InstructionSelector* selector, Node* node, ++ ArchOpcode opcode) { ++ La64OperandGenerator g(selector); ++ Node* base = node->InputAt(0); ++ Node* index = node->InputAt(1); ++ Node* old_value = node->InputAt(2); ++ Node* new_value = node->InputAt(3); ++ ++ AddressingMode addressing_mode = kMode_MRI; ++ InstructionOperand inputs[4]; ++ size_t input_count = 0; ++ inputs[input_count++] = g.UseUniqueRegister(base); ++ inputs[input_count++] = g.UseUniqueRegister(index); ++ inputs[input_count++] = g.UseUniqueRegister(old_value); ++ inputs[input_count++] = g.UseUniqueRegister(new_value); ++ InstructionOperand outputs[1]; ++ outputs[0] = g.UseUniqueRegister(node); ++ InstructionOperand temp[3]; ++ temp[0] = g.TempRegister(); ++ temp[1] = g.TempRegister(); ++ temp[2] = g.TempRegister(); ++ InstructionCode code = opcode | AddressingModeField::encode(addressing_mode); ++ selector->Emit(code, 1, outputs, input_count, inputs, 3, temp); ++} ++ ++void VisitAtomicBinop(InstructionSelector* selector, Node* node, ++ ArchOpcode opcode) { ++ La64OperandGenerator g(selector); ++ Node* base = node->InputAt(0); ++ Node* index = node->InputAt(1); ++ Node* value = node->InputAt(2); ++ ++ AddressingMode addressing_mode = kMode_MRI; ++ InstructionOperand inputs[3]; ++ size_t input_count = 0; ++ inputs[input_count++] = g.UseUniqueRegister(base); ++ inputs[input_count++] = g.UseUniqueRegister(index); ++ inputs[input_count++] = g.UseUniqueRegister(value); ++ InstructionOperand outputs[1]; ++ outputs[0] = g.UseUniqueRegister(node); ++ InstructionOperand temps[4]; ++ temps[0] = g.TempRegister(); ++ temps[1] = g.TempRegister(); ++ temps[2] = g.TempRegister(); ++ temps[3] = g.TempRegister(); ++ InstructionCode code = opcode | AddressingModeField::encode(addressing_mode); ++ selector->Emit(code, 1, outputs, input_count, inputs, 4, temps); ++} ++ ++} // namespace ++ ++void InstructionSelector::VisitStackPointerGreaterThan( ++ Node* node, FlagsContinuation* cont) { ++ StackCheckKind kind = StackCheckKindOf(node->op()); ++ InstructionCode opcode = ++ kArchStackPointerGreaterThan | MiscField::encode(static_cast(kind)); ++ ++ La64OperandGenerator g(this); ++ ++ // No outputs. ++ InstructionOperand* const outputs = nullptr; ++ const int output_count = 0; ++ ++ // Applying an offset to this stack check requires a temp register. Offsets ++ // are only applied to the first stack check. If applying an offset, we must ++ // ensure the input and temp registers do not alias, thus kUniqueRegister. ++ InstructionOperand temps[] = {g.TempRegister()}; ++ const int temp_count = (kind == StackCheckKind::kJSFunctionEntry ? 1 : 0); ++ const auto register_mode = (kind == StackCheckKind::kJSFunctionEntry) ++ ? OperandGenerator::kUniqueRegister ++ : OperandGenerator::kRegister; ++ ++ Node* const value = node->InputAt(0); ++ InstructionOperand inputs[] = {g.UseRegisterWithMode(value, register_mode)}; ++ static constexpr int input_count = arraysize(inputs); ++ ++ EmitWithContinuation(opcode, output_count, outputs, input_count, inputs, ++ temp_count, temps, cont); ++} ++ ++// Shared routine for word comparisons against zero. ++void InstructionSelector::VisitWordCompareZero(Node* user, Node* value, ++ FlagsContinuation* cont) { ++ // Try to combine with comparisons against 0 by simply inverting the branch. ++ while (CanCover(user, value)) { ++ if (value->opcode() == IrOpcode::kWord32Equal) { ++ Int32BinopMatcher m(value); ++ if (!m.right().Is(0)) break; ++ user = value; ++ value = m.left().node(); ++ } else if (value->opcode() == IrOpcode::kWord64Equal) { ++ Int64BinopMatcher m(value); ++ if (!m.right().Is(0)) break; ++ user = value; ++ value = m.left().node(); ++ } else { ++ break; ++ } ++ ++ cont->Negate(); ++ } ++ ++ if (CanCover(user, value)) { ++ switch (value->opcode()) { ++ case IrOpcode::kWord32Equal: ++ cont->OverwriteAndNegateIfEqual(kEqual); ++ return VisitWord32Compare(this, value, cont); ++ case IrOpcode::kInt32LessThan: ++ cont->OverwriteAndNegateIfEqual(kSignedLessThan); ++ return VisitWord32Compare(this, value, cont); ++ case IrOpcode::kInt32LessThanOrEqual: ++ cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual); ++ return VisitWord32Compare(this, value, cont); ++ case IrOpcode::kUint32LessThan: ++ cont->OverwriteAndNegateIfEqual(kUnsignedLessThan); ++ return VisitWord32Compare(this, value, cont); ++ case IrOpcode::kUint32LessThanOrEqual: ++ cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual); ++ return VisitWord32Compare(this, value, cont); ++ case IrOpcode::kWord64Equal: ++ cont->OverwriteAndNegateIfEqual(kEqual); ++ return VisitWord64Compare(this, value, cont); ++ case IrOpcode::kInt64LessThan: ++ cont->OverwriteAndNegateIfEqual(kSignedLessThan); ++ return VisitWord64Compare(this, value, cont); ++ case IrOpcode::kInt64LessThanOrEqual: ++ cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual); ++ return VisitWord64Compare(this, value, cont); ++ case IrOpcode::kUint64LessThan: ++ cont->OverwriteAndNegateIfEqual(kUnsignedLessThan); ++ return VisitWord64Compare(this, value, cont); ++ case IrOpcode::kUint64LessThanOrEqual: ++ cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual); ++ return VisitWord64Compare(this, value, cont); ++ case IrOpcode::kFloat32Equal: ++ cont->OverwriteAndNegateIfEqual(kEqual); ++ return VisitFloat32Compare(this, value, cont); ++ case IrOpcode::kFloat32LessThan: ++ cont->OverwriteAndNegateIfEqual(kUnsignedLessThan); ++ return VisitFloat32Compare(this, value, cont); ++ case IrOpcode::kFloat32LessThanOrEqual: ++ cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual); ++ return VisitFloat32Compare(this, value, cont); ++ case IrOpcode::kFloat64Equal: ++ cont->OverwriteAndNegateIfEqual(kEqual); ++ return VisitFloat64Compare(this, value, cont); ++ case IrOpcode::kFloat64LessThan: ++ cont->OverwriteAndNegateIfEqual(kUnsignedLessThan); ++ return VisitFloat64Compare(this, value, cont); ++ case IrOpcode::kFloat64LessThanOrEqual: ++ cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual); ++ return VisitFloat64Compare(this, value, cont); ++ case IrOpcode::kProjection: ++ // Check if this is the overflow output projection of an ++ // WithOverflow node. ++ if (ProjectionIndexOf(value->op()) == 1u) { ++ // We cannot combine the WithOverflow with this branch ++ // unless the 0th projection (the use of the actual value of the ++ // is either nullptr, which means there's no use of the ++ // actual value, or was already defined, which means it is scheduled ++ // *AFTER* this branch). ++ Node* const node = value->InputAt(0); ++ Node* const result = NodeProperties::FindProjection(node, 0); ++ if (result == nullptr || IsDefined(result)) { ++ switch (node->opcode()) { ++ case IrOpcode::kInt32AddWithOverflow: ++ cont->OverwriteAndNegateIfEqual(kOverflow); ++ return VisitBinop(this, node, kLa64Dadd, cont); ++ case IrOpcode::kInt32SubWithOverflow: ++ cont->OverwriteAndNegateIfEqual(kOverflow); ++ return VisitBinop(this, node, kLa64Dsub, cont); ++ case IrOpcode::kInt32MulWithOverflow: ++ cont->OverwriteAndNegateIfEqual(kOverflow); ++ return VisitBinop(this, node, kLa64MulOvf, cont); ++ case IrOpcode::kInt64AddWithOverflow: ++ cont->OverwriteAndNegateIfEqual(kOverflow); ++ return VisitBinop(this, node, kLa64DaddOvf, cont); ++ case IrOpcode::kInt64SubWithOverflow: ++ cont->OverwriteAndNegateIfEqual(kOverflow); ++ return VisitBinop(this, node, kLa64DsubOvf, cont); ++ default: ++ break; ++ } ++ } ++ } ++ break; ++ case IrOpcode::kWord32And: ++ case IrOpcode::kWord64And: ++ return VisitWordCompare(this, value, kLa64Tst, cont, true); ++ case IrOpcode::kStackPointerGreaterThan: ++ cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition); ++ return VisitStackPointerGreaterThan(value, cont); ++ default: ++ break; ++ } ++ } ++ ++ // Continuation could not be combined with a compare, emit compare against 0. ++ EmitWordCompareZero(this, value, cont); ++} ++ ++void InstructionSelector::VisitSwitch(Node* node, const SwitchInfo& sw) { ++ La64OperandGenerator g(this); ++ InstructionOperand value_operand = g.UseRegister(node->InputAt(0)); ++ ++ // Emit either ArchTableSwitch or ArchBinarySearchSwitch. ++ if (enable_switch_jump_table_ == kEnableSwitchJumpTable) { ++ static const size_t kMaxTableSwitchValueRange = 2 << 16; ++ size_t table_space_cost = 10 + 2 * sw.value_range(); ++ size_t table_time_cost = 3; ++ size_t lookup_space_cost = 2 + 2 * sw.case_count(); ++ size_t lookup_time_cost = sw.case_count(); ++ if (sw.case_count() > 0 && ++ table_space_cost + 3 * table_time_cost <= ++ lookup_space_cost + 3 * lookup_time_cost && ++ sw.min_value() > std::numeric_limits::min() && ++ sw.value_range() <= kMaxTableSwitchValueRange) { ++ InstructionOperand index_operand = value_operand; ++ if (sw.min_value()) { ++ index_operand = g.TempRegister(); ++ Emit(kLa64Sub, index_operand, value_operand, ++ g.TempImmediate(sw.min_value())); ++ } ++ // Generate a table lookup. ++ return EmitTableSwitch(sw, index_operand); ++ } ++ } ++ ++ // Generate a tree of conditional jumps. ++ return EmitBinarySearchSwitch(sw, value_operand); ++} ++ ++void InstructionSelector::VisitWord32Equal(Node* const node) { ++ FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node); ++ Int32BinopMatcher m(node); ++ if (m.right().Is(0)) { ++ return VisitWordCompareZero(m.node(), m.left().node(), &cont); ++ } ++ ++ VisitWord32Compare(this, node, &cont); ++} ++ ++void InstructionSelector::VisitInt32LessThan(Node* node) { ++ FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node); ++ VisitWord32Compare(this, node, &cont); ++} ++ ++void InstructionSelector::VisitInt32LessThanOrEqual(Node* node) { ++ FlagsContinuation cont = ++ FlagsContinuation::ForSet(kSignedLessThanOrEqual, node); ++ VisitWord32Compare(this, node, &cont); ++} ++ ++void InstructionSelector::VisitUint32LessThan(Node* node) { ++ FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node); ++ VisitWord32Compare(this, node, &cont); ++} ++ ++void InstructionSelector::VisitUint32LessThanOrEqual(Node* node) { ++ FlagsContinuation cont = ++ FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node); ++ VisitWord32Compare(this, node, &cont); ++} ++ ++void InstructionSelector::VisitInt32AddWithOverflow(Node* node) { ++ if (Node* ovf = NodeProperties::FindProjection(node, 1)) { ++ FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf); ++ return VisitBinop(this, node, kLa64Dadd, &cont); ++ } ++ FlagsContinuation cont; ++ VisitBinop(this, node, kLa64Dadd, &cont); ++} ++ ++void InstructionSelector::VisitInt32SubWithOverflow(Node* node) { ++ if (Node* ovf = NodeProperties::FindProjection(node, 1)) { ++ FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf); ++ return VisitBinop(this, node, kLa64Dsub, &cont); ++ } ++ FlagsContinuation cont; ++ VisitBinop(this, node, kLa64Dsub, &cont); ++} ++ ++void InstructionSelector::VisitInt32MulWithOverflow(Node* node) { ++ if (Node* ovf = NodeProperties::FindProjection(node, 1)) { ++ FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf); ++ return VisitBinop(this, node, kLa64MulOvf, &cont); ++ } ++ FlagsContinuation cont; ++ VisitBinop(this, node, kLa64MulOvf, &cont); ++} ++ ++void InstructionSelector::VisitInt64AddWithOverflow(Node* node) { ++ if (Node* ovf = NodeProperties::FindProjection(node, 1)) { ++ FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf); ++ return VisitBinop(this, node, kLa64DaddOvf, &cont); ++ } ++ FlagsContinuation cont; ++ VisitBinop(this, node, kLa64DaddOvf, &cont); ++} ++ ++void InstructionSelector::VisitInt64SubWithOverflow(Node* node) { ++ if (Node* ovf = NodeProperties::FindProjection(node, 1)) { ++ FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf); ++ return VisitBinop(this, node, kLa64DsubOvf, &cont); ++ } ++ FlagsContinuation cont; ++ VisitBinop(this, node, kLa64DsubOvf, &cont); ++} ++ ++void InstructionSelector::VisitWord64Equal(Node* const node) { ++ FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node); ++ Int64BinopMatcher m(node); ++ if (m.right().Is(0)) { ++ return VisitWordCompareZero(m.node(), m.left().node(), &cont); ++ } ++ ++ VisitWord64Compare(this, node, &cont); ++} ++ ++void InstructionSelector::VisitInt64LessThan(Node* node) { ++ FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node); ++ VisitWord64Compare(this, node, &cont); ++} ++ ++void InstructionSelector::VisitInt64LessThanOrEqual(Node* node) { ++ FlagsContinuation cont = ++ FlagsContinuation::ForSet(kSignedLessThanOrEqual, node); ++ VisitWord64Compare(this, node, &cont); ++} ++ ++void InstructionSelector::VisitUint64LessThan(Node* node) { ++ FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node); ++ VisitWord64Compare(this, node, &cont); ++} ++ ++void InstructionSelector::VisitUint64LessThanOrEqual(Node* node) { ++ FlagsContinuation cont = ++ FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node); ++ VisitWord64Compare(this, node, &cont); ++} ++ ++void InstructionSelector::VisitFloat32Equal(Node* node) { ++ FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node); ++ VisitFloat32Compare(this, node, &cont); ++} ++ ++void InstructionSelector::VisitFloat32LessThan(Node* node) { ++ FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node); ++ VisitFloat32Compare(this, node, &cont); ++} ++ ++void InstructionSelector::VisitFloat32LessThanOrEqual(Node* node) { ++ FlagsContinuation cont = ++ FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node); ++ VisitFloat32Compare(this, node, &cont); ++} ++ ++void InstructionSelector::VisitFloat64Equal(Node* node) { ++ FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node); ++ VisitFloat64Compare(this, node, &cont); ++} ++ ++void InstructionSelector::VisitFloat64LessThan(Node* node) { ++ FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node); ++ VisitFloat64Compare(this, node, &cont); ++} ++ ++void InstructionSelector::VisitFloat64LessThanOrEqual(Node* node) { ++ FlagsContinuation cont = ++ FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node); ++ VisitFloat64Compare(this, node, &cont); ++} ++ ++void InstructionSelector::VisitFloat64ExtractLowWord32(Node* node) { ++ VisitRR(this, kLa64Float64ExtractLowWord32, node); ++} ++ ++void InstructionSelector::VisitFloat64ExtractHighWord32(Node* node) { ++ VisitRR(this, kLa64Float64ExtractHighWord32, node); ++} ++ ++void InstructionSelector::VisitFloat64SilenceNaN(Node* node) { ++ VisitRR(this, kLa64Float64SilenceNaN, node); ++} ++ ++void InstructionSelector::VisitFloat64InsertLowWord32(Node* node) { ++ La64OperandGenerator g(this); ++ Node* left = node->InputAt(0); ++ Node* right = node->InputAt(1); ++ Emit(kLa64Float64InsertLowWord32, g.DefineSameAsFirst(node), ++ g.UseRegister(left), g.UseRegister(right)); ++} ++ ++void InstructionSelector::VisitFloat64InsertHighWord32(Node* node) { ++ La64OperandGenerator g(this); ++ Node* left = node->InputAt(0); ++ Node* right = node->InputAt(1); ++ Emit(kLa64Float64InsertHighWord32, g.DefineSameAsFirst(node), ++ g.UseRegister(left), g.UseRegister(right)); ++} ++ ++void InstructionSelector::VisitMemoryBarrier(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kLa64Sync, g.NoOutput()); ++} ++ ++void InstructionSelector::VisitWord32AtomicLoad(Node* node) { ++ LoadRepresentation load_rep = LoadRepresentationOf(node->op()); ++ ArchOpcode opcode = kArchNop; ++ switch (load_rep.representation()) { ++ case MachineRepresentation::kWord8: ++ opcode = ++ load_rep.IsSigned() ? kWord32AtomicLoadInt8 : kWord32AtomicLoadUint8; ++ break; ++ case MachineRepresentation::kWord16: ++ opcode = load_rep.IsSigned() ? kWord32AtomicLoadInt16 ++ : kWord32AtomicLoadUint16; ++ break; ++ case MachineRepresentation::kWord32: ++ opcode = kWord32AtomicLoadWord32; ++ break; ++ default: ++ UNREACHABLE(); ++ } ++ VisitAtomicLoad(this, node, opcode); ++} ++ ++void InstructionSelector::VisitWord32AtomicStore(Node* node) { ++ MachineRepresentation rep = AtomicStoreRepresentationOf(node->op()); ++ ArchOpcode opcode = kArchNop; ++ switch (rep) { ++ case MachineRepresentation::kWord8: ++ opcode = kWord32AtomicStoreWord8; ++ break; ++ case MachineRepresentation::kWord16: ++ opcode = kWord32AtomicStoreWord16; ++ break; ++ case MachineRepresentation::kWord32: ++ opcode = kWord32AtomicStoreWord32; ++ break; ++ default: ++ UNREACHABLE(); ++ } ++ ++ VisitAtomicStore(this, node, opcode); ++} ++ ++void InstructionSelector::VisitWord64AtomicLoad(Node* node) { ++ LoadRepresentation load_rep = LoadRepresentationOf(node->op()); ++ ArchOpcode opcode = kArchNop; ++ switch (load_rep.representation()) { ++ case MachineRepresentation::kWord8: ++ opcode = kLa64Word64AtomicLoadUint8; ++ break; ++ case MachineRepresentation::kWord16: ++ opcode = kLa64Word64AtomicLoadUint16; ++ break; ++ case MachineRepresentation::kWord32: ++ opcode = kLa64Word64AtomicLoadUint32; ++ break; ++ case MachineRepresentation::kWord64: ++ opcode = kLa64Word64AtomicLoadUint64; ++ break; ++ default: ++ UNREACHABLE(); ++ } ++ VisitAtomicLoad(this, node, opcode); ++} ++ ++void InstructionSelector::VisitWord64AtomicStore(Node* node) { ++ MachineRepresentation rep = AtomicStoreRepresentationOf(node->op()); ++ ArchOpcode opcode = kArchNop; ++ switch (rep) { ++ case MachineRepresentation::kWord8: ++ opcode = kLa64Word64AtomicStoreWord8; ++ break; ++ case MachineRepresentation::kWord16: ++ opcode = kLa64Word64AtomicStoreWord16; ++ break; ++ case MachineRepresentation::kWord32: ++ opcode = kLa64Word64AtomicStoreWord32; ++ break; ++ case MachineRepresentation::kWord64: ++ opcode = kLa64Word64AtomicStoreWord64; ++ break; ++ default: ++ UNREACHABLE(); ++ } ++ ++ VisitAtomicStore(this, node, opcode); ++} ++ ++void InstructionSelector::VisitWord32AtomicExchange(Node* node) { ++ ArchOpcode opcode = kArchNop; ++ MachineType type = AtomicOpType(node->op()); ++ if (type == MachineType::Int8()) { ++ opcode = kWord32AtomicExchangeInt8; ++ } else if (type == MachineType::Uint8()) { ++ opcode = kWord32AtomicExchangeUint8; ++ } else if (type == MachineType::Int16()) { ++ opcode = kWord32AtomicExchangeInt16; ++ } else if (type == MachineType::Uint16()) { ++ opcode = kWord32AtomicExchangeUint16; ++ } else if (type == MachineType::Int32() || type == MachineType::Uint32()) { ++ opcode = kWord32AtomicExchangeWord32; ++ } else { ++ UNREACHABLE(); ++ return; ++ } ++ ++ VisitAtomicExchange(this, node, opcode); ++} ++ ++void InstructionSelector::VisitWord64AtomicExchange(Node* node) { ++ ArchOpcode opcode = kArchNop; ++ MachineType type = AtomicOpType(node->op()); ++ if (type == MachineType::Uint8()) { ++ opcode = kLa64Word64AtomicExchangeUint8; ++ } else if (type == MachineType::Uint16()) { ++ opcode = kLa64Word64AtomicExchangeUint16; ++ } else if (type == MachineType::Uint32()) { ++ opcode = kLa64Word64AtomicExchangeUint32; ++ } else if (type == MachineType::Uint64()) { ++ opcode = kLa64Word64AtomicExchangeUint64; ++ } else { ++ UNREACHABLE(); ++ return; ++ } ++ VisitAtomicExchange(this, node, opcode); ++} ++ ++void InstructionSelector::VisitWord32AtomicCompareExchange(Node* node) { ++ ArchOpcode opcode = kArchNop; ++ MachineType type = AtomicOpType(node->op()); ++ if (type == MachineType::Int8()) { ++ opcode = kWord32AtomicCompareExchangeInt8; ++ } else if (type == MachineType::Uint8()) { ++ opcode = kWord32AtomicCompareExchangeUint8; ++ } else if (type == MachineType::Int16()) { ++ opcode = kWord32AtomicCompareExchangeInt16; ++ } else if (type == MachineType::Uint16()) { ++ opcode = kWord32AtomicCompareExchangeUint16; ++ } else if (type == MachineType::Int32() || type == MachineType::Uint32()) { ++ opcode = kWord32AtomicCompareExchangeWord32; ++ } else { ++ UNREACHABLE(); ++ return; ++ } ++ ++ VisitAtomicCompareExchange(this, node, opcode); ++} ++ ++void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) { ++ ArchOpcode opcode = kArchNop; ++ MachineType type = AtomicOpType(node->op()); ++ if (type == MachineType::Uint8()) { ++ opcode = kLa64Word64AtomicCompareExchangeUint8; ++ } else if (type == MachineType::Uint16()) { ++ opcode = kLa64Word64AtomicCompareExchangeUint16; ++ } else if (type == MachineType::Uint32()) { ++ opcode = kLa64Word64AtomicCompareExchangeUint32; ++ } else if (type == MachineType::Uint64()) { ++ opcode = kLa64Word64AtomicCompareExchangeUint64; ++ } else { ++ UNREACHABLE(); ++ return; ++ } ++ VisitAtomicCompareExchange(this, node, opcode); ++} ++void InstructionSelector::VisitWord32AtomicBinaryOperation( ++ Node* node, ArchOpcode int8_op, ArchOpcode uint8_op, ArchOpcode int16_op, ++ ArchOpcode uint16_op, ArchOpcode word32_op) { ++ ArchOpcode opcode = kArchNop; ++ MachineType type = AtomicOpType(node->op()); ++ if (type == MachineType::Int8()) { ++ opcode = int8_op; ++ } else if (type == MachineType::Uint8()) { ++ opcode = uint8_op; ++ } else if (type == MachineType::Int16()) { ++ opcode = int16_op; ++ } else if (type == MachineType::Uint16()) { ++ opcode = uint16_op; ++ } else if (type == MachineType::Int32() || type == MachineType::Uint32()) { ++ opcode = word32_op; ++ } else { ++ UNREACHABLE(); ++ return; ++ } ++ ++ VisitAtomicBinop(this, node, opcode); ++} ++ ++#define VISIT_ATOMIC_BINOP(op) \ ++ void InstructionSelector::VisitWord32Atomic##op(Node* node) { \ ++ VisitWord32AtomicBinaryOperation( \ ++ node, kWord32Atomic##op##Int8, kWord32Atomic##op##Uint8, \ ++ kWord32Atomic##op##Int16, kWord32Atomic##op##Uint16, \ ++ kWord32Atomic##op##Word32); \ ++ } ++VISIT_ATOMIC_BINOP(Add) ++VISIT_ATOMIC_BINOP(Sub) ++VISIT_ATOMIC_BINOP(And) ++VISIT_ATOMIC_BINOP(Or) ++VISIT_ATOMIC_BINOP(Xor) ++#undef VISIT_ATOMIC_BINOP ++ ++void InstructionSelector::VisitWord64AtomicBinaryOperation( ++ Node* node, ArchOpcode uint8_op, ArchOpcode uint16_op, ArchOpcode uint32_op, ++ ArchOpcode uint64_op) { ++ ArchOpcode opcode = kArchNop; ++ MachineType type = AtomicOpType(node->op()); ++ if (type == MachineType::Uint8()) { ++ opcode = uint8_op; ++ } else if (type == MachineType::Uint16()) { ++ opcode = uint16_op; ++ } else if (type == MachineType::Uint32()) { ++ opcode = uint32_op; ++ } else if (type == MachineType::Uint64()) { ++ opcode = uint64_op; ++ } else { ++ UNREACHABLE(); ++ return; ++ } ++ VisitAtomicBinop(this, node, opcode); ++} ++ ++#define VISIT_ATOMIC_BINOP(op) \ ++ void InstructionSelector::VisitWord64Atomic##op(Node* node) { \ ++ VisitWord64AtomicBinaryOperation( \ ++ node, kLa64Word64Atomic##op##Uint8, kLa64Word64Atomic##op##Uint16, \ ++ kLa64Word64Atomic##op##Uint32, kLa64Word64Atomic##op##Uint64); \ ++ } ++VISIT_ATOMIC_BINOP(Add) ++VISIT_ATOMIC_BINOP(Sub) ++VISIT_ATOMIC_BINOP(And) ++VISIT_ATOMIC_BINOP(Or) ++VISIT_ATOMIC_BINOP(Xor) ++#undef VISIT_ATOMIC_BINOP ++ ++void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) { ++ UNREACHABLE(); ++} ++ ++void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { ++ UNREACHABLE(); ++} ++ ++#define SIMD_TYPE_LIST(V) \ ++ V(F32x4) \ ++ V(I32x4) \ ++ V(I16x8) \ ++ V(I8x16) ++ ++#define SIMD_UNOP_LIST(V) \ ++ V(F64x2Abs, kLa64F64x2Abs) \ ++ V(F64x2Neg, kLa64F64x2Neg) \ ++ V(F64x2Sqrt, kLa64F64x2Sqrt) \ ++ V(I64x2Neg, kLa64I64x2Neg) \ ++ V(F32x4SConvertI32x4, kLa64F32x4SConvertI32x4) \ ++ V(F32x4UConvertI32x4, kLa64F32x4UConvertI32x4) \ ++ V(F32x4Abs, kLa64F32x4Abs) \ ++ V(F32x4Neg, kLa64F32x4Neg) \ ++ V(F32x4Sqrt, kLa64F32x4Sqrt) \ ++ V(F32x4RecipApprox, kLa64F32x4RecipApprox) \ ++ V(F32x4RecipSqrtApprox, kLa64F32x4RecipSqrtApprox) \ ++ V(I32x4SConvertF32x4, kLa64I32x4SConvertF32x4) \ ++ V(I32x4UConvertF32x4, kLa64I32x4UConvertF32x4) \ ++ V(I32x4Neg, kLa64I32x4Neg) \ ++ V(I32x4SConvertI16x8Low, kLa64I32x4SConvertI16x8Low) \ ++ V(I32x4SConvertI16x8High, kLa64I32x4SConvertI16x8High) \ ++ V(I32x4UConvertI16x8Low, kLa64I32x4UConvertI16x8Low) \ ++ V(I32x4UConvertI16x8High, kLa64I32x4UConvertI16x8High) \ ++ V(I32x4Abs, kLa64I32x4Abs) \ ++ V(I16x8Neg, kLa64I16x8Neg) \ ++ V(I16x8SConvertI8x16Low, kLa64I16x8SConvertI8x16Low) \ ++ V(I16x8SConvertI8x16High, kLa64I16x8SConvertI8x16High) \ ++ V(I16x8UConvertI8x16Low, kLa64I16x8UConvertI8x16Low) \ ++ V(I16x8UConvertI8x16High, kLa64I16x8UConvertI8x16High) \ ++ V(I16x8Abs, kLa64I16x8Abs) \ ++ V(I8x16Neg, kLa64I8x16Neg) \ ++ V(I8x16Abs, kLa64I8x16Abs) \ ++ V(S128Not, kLa64S128Not) \ ++ V(S1x4AnyTrue, kLa64S1x4AnyTrue) \ ++ V(S1x4AllTrue, kLa64S1x4AllTrue) \ ++ V(S1x8AnyTrue, kLa64S1x8AnyTrue) \ ++ V(S1x8AllTrue, kLa64S1x8AllTrue) \ ++ V(S1x16AnyTrue, kLa64S1x16AnyTrue) \ ++ V(S1x16AllTrue, kLa64S1x16AllTrue) ++ ++#define SIMD_SHIFT_OP_LIST(V) \ ++ V(I64x2Shl) \ ++ V(I64x2ShrS) \ ++ V(I64x2ShrU) \ ++ V(I32x4Shl) \ ++ V(I32x4ShrS) \ ++ V(I32x4ShrU) \ ++ V(I16x8Shl) \ ++ V(I16x8ShrS) \ ++ V(I16x8ShrU) \ ++ V(I8x16Shl) \ ++ V(I8x16ShrS) \ ++ V(I8x16ShrU) ++ ++#define SIMD_BINOP_LIST(V) \ ++ V(F64x2Add, kLa64F64x2Add) \ ++ V(F64x2Sub, kLa64F64x2Sub) \ ++ V(F64x2Mul, kLa64F64x2Mul) \ ++ V(F64x2Div, kLa64F64x2Div) \ ++ V(F64x2Min, kLa64F64x2Min) \ ++ V(F64x2Max, kLa64F64x2Max) \ ++ V(F64x2Eq, kLa64F64x2Eq) \ ++ V(F64x2Ne, kLa64F64x2Ne) \ ++ V(F64x2Lt, kLa64F64x2Lt) \ ++ V(F64x2Le, kLa64F64x2Le) \ ++ V(I64x2Add, kLa64I64x2Add) \ ++ V(I64x2Sub, kLa64I64x2Sub) \ ++ V(I64x2Mul, kLa64I64x2Mul) \ ++ V(F32x4Add, kLa64F32x4Add) \ ++ V(F32x4AddHoriz, kLa64F32x4AddHoriz) \ ++ V(F32x4Sub, kLa64F32x4Sub) \ ++ V(F32x4Mul, kLa64F32x4Mul) \ ++ V(F32x4Div, kLa64F32x4Div) \ ++ V(F32x4Max, kLa64F32x4Max) \ ++ V(F32x4Min, kLa64F32x4Min) \ ++ V(F32x4Eq, kLa64F32x4Eq) \ ++ V(F32x4Ne, kLa64F32x4Ne) \ ++ V(F32x4Lt, kLa64F32x4Lt) \ ++ V(F32x4Le, kLa64F32x4Le) \ ++ V(I32x4Add, kLa64I32x4Add) \ ++ V(I32x4AddHoriz, kLa64I32x4AddHoriz) \ ++ V(I32x4Sub, kLa64I32x4Sub) \ ++ V(I32x4Mul, kLa64I32x4Mul) \ ++ V(I32x4MaxS, kLa64I32x4MaxS) \ ++ V(I32x4MinS, kLa64I32x4MinS) \ ++ V(I32x4MaxU, kLa64I32x4MaxU) \ ++ V(I32x4MinU, kLa64I32x4MinU) \ ++ V(I32x4Eq, kLa64I32x4Eq) \ ++ V(I32x4Ne, kLa64I32x4Ne) \ ++ V(I32x4GtS, kLa64I32x4GtS) \ ++ V(I32x4GeS, kLa64I32x4GeS) \ ++ V(I32x4GtU, kLa64I32x4GtU) \ ++ V(I32x4GeU, kLa64I32x4GeU) \ ++ V(I16x8Add, kLa64I16x8Add) \ ++ V(I16x8AddSaturateS, kLa64I16x8AddSaturateS) \ ++ V(I16x8AddSaturateU, kLa64I16x8AddSaturateU) \ ++ V(I16x8AddHoriz, kLa64I16x8AddHoriz) \ ++ V(I16x8Sub, kLa64I16x8Sub) \ ++ V(I16x8SubSaturateS, kLa64I16x8SubSaturateS) \ ++ V(I16x8SubSaturateU, kLa64I16x8SubSaturateU) \ ++ V(I16x8Mul, kLa64I16x8Mul) \ ++ V(I16x8MaxS, kLa64I16x8MaxS) \ ++ V(I16x8MinS, kLa64I16x8MinS) \ ++ V(I16x8MaxU, kLa64I16x8MaxU) \ ++ V(I16x8MinU, kLa64I16x8MinU) \ ++ V(I16x8Eq, kLa64I16x8Eq) \ ++ V(I16x8Ne, kLa64I16x8Ne) \ ++ V(I16x8GtS, kLa64I16x8GtS) \ ++ V(I16x8GeS, kLa64I16x8GeS) \ ++ V(I16x8GtU, kLa64I16x8GtU) \ ++ V(I16x8GeU, kLa64I16x8GeU) \ ++ V(I16x8RoundingAverageU, kLa64I16x8RoundingAverageU) \ ++ V(I16x8SConvertI32x4, kLa64I16x8SConvertI32x4) \ ++ V(I16x8UConvertI32x4, kLa64I16x8UConvertI32x4) \ ++ V(I8x16Add, kLa64I8x16Add) \ ++ V(I8x16AddSaturateS, kLa64I8x16AddSaturateS) \ ++ V(I8x16AddSaturateU, kLa64I8x16AddSaturateU) \ ++ V(I8x16Sub, kLa64I8x16Sub) \ ++ V(I8x16SubSaturateS, kLa64I8x16SubSaturateS) \ ++ V(I8x16SubSaturateU, kLa64I8x16SubSaturateU) \ ++ V(I8x16Mul, kLa64I8x16Mul) \ ++ V(I8x16MaxS, kLa64I8x16MaxS) \ ++ V(I8x16MinS, kLa64I8x16MinS) \ ++ V(I8x16MaxU, kLa64I8x16MaxU) \ ++ V(I8x16MinU, kLa64I8x16MinU) \ ++ V(I8x16Eq, kLa64I8x16Eq) \ ++ V(I8x16Ne, kLa64I8x16Ne) \ ++ V(I8x16GtS, kLa64I8x16GtS) \ ++ V(I8x16GeS, kLa64I8x16GeS) \ ++ V(I8x16GtU, kLa64I8x16GtU) \ ++ V(I8x16GeU, kLa64I8x16GeU) \ ++ V(I8x16RoundingAverageU, kLa64I8x16RoundingAverageU) \ ++ V(I8x16SConvertI16x8, kLa64I8x16SConvertI16x8) \ ++ V(I8x16UConvertI16x8, kLa64I8x16UConvertI16x8) \ ++ V(S128And, kLa64S128And) \ ++ V(S128Or, kLa64S128Or) \ ++ V(S128Xor, kLa64S128Xor) \ ++ V(S128AndNot, kLa64S128AndNot) ++ ++void InstructionSelector::VisitS128Zero(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kLa64S128Zero, g.DefineAsRegister(node)); ++} ++ ++#define SIMD_VISIT_SPLAT(Type) \ ++ void InstructionSelector::Visit##Type##Splat(Node* node) { \ ++ VisitRR(this, kLa64##Type##Splat, node); \ ++ } ++SIMD_TYPE_LIST(SIMD_VISIT_SPLAT) ++SIMD_VISIT_SPLAT(F64x2) ++#undef SIMD_VISIT_SPLAT ++ ++#define SIMD_VISIT_EXTRACT_LANE(Type, Sign) \ ++ void InstructionSelector::Visit##Type##ExtractLane##Sign(Node* node) { \ ++ VisitRRI(this, kLa64##Type##ExtractLane##Sign, node); \ ++ } ++SIMD_VISIT_EXTRACT_LANE(F64x2, ) ++SIMD_VISIT_EXTRACT_LANE(F32x4, ) ++SIMD_VISIT_EXTRACT_LANE(I32x4, ) ++SIMD_VISIT_EXTRACT_LANE(I16x8, U) ++SIMD_VISIT_EXTRACT_LANE(I16x8, S) ++SIMD_VISIT_EXTRACT_LANE(I8x16, U) ++SIMD_VISIT_EXTRACT_LANE(I8x16, S) ++#undef SIMD_VISIT_EXTRACT_LANE ++ ++#define SIMD_VISIT_REPLACE_LANE(Type) \ ++ void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \ ++ VisitRRIR(this, kLa64##Type##ReplaceLane, node); \ ++ } ++SIMD_TYPE_LIST(SIMD_VISIT_REPLACE_LANE) ++SIMD_VISIT_REPLACE_LANE(F64x2) ++#undef SIMD_VISIT_REPLACE_LANE ++ ++#define SIMD_VISIT_UNOP(Name, instruction) \ ++ void InstructionSelector::Visit##Name(Node* node) { \ ++ VisitRR(this, instruction, node); \ ++ } ++SIMD_UNOP_LIST(SIMD_VISIT_UNOP) ++#undef SIMD_VISIT_UNOP ++ ++#define SIMD_VISIT_SHIFT_OP(Name) \ ++ void InstructionSelector::Visit##Name(Node* node) { \ ++ VisitSimdShift(this, kLa64##Name, node); \ ++ } ++SIMD_SHIFT_OP_LIST(SIMD_VISIT_SHIFT_OP) ++#undef SIMD_VISIT_SHIFT_OP ++ ++#define SIMD_VISIT_BINOP(Name, instruction) \ ++ void InstructionSelector::Visit##Name(Node* node) { \ ++ VisitRRR(this, instruction, node); \ ++ } ++SIMD_BINOP_LIST(SIMD_VISIT_BINOP) ++#undef SIMD_VISIT_BINOP ++ ++void InstructionSelector::VisitS128Select(Node* node) { ++ VisitRRRR(this, kLa64S128Select, node); ++} ++ ++namespace { ++ ++struct ShuffleEntry { ++ uint8_t shuffle[kSimd128Size]; ++ ArchOpcode opcode; ++}; ++ ++static const ShuffleEntry arch_shuffles[] = { ++ {{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}, ++ kLa64S32x4InterleaveRight}, ++ {{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}, ++ kLa64S32x4InterleaveLeft}, ++ {{0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27}, ++ kLa64S32x4PackEven}, ++ {{4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31}, ++ kLa64S32x4PackOdd}, ++ {{0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}, ++ kLa64S32x4InterleaveEven}, ++ {{4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}, ++ kLa64S32x4InterleaveOdd}, ++ ++ {{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}, ++ kLa64S16x8InterleaveRight}, ++ {{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}, ++ kLa64S16x8InterleaveLeft}, ++ {{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}, ++ kLa64S16x8PackEven}, ++ {{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}, ++ kLa64S16x8PackOdd}, ++ {{0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29}, ++ kLa64S16x8InterleaveEven}, ++ {{2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31}, ++ kLa64S16x8InterleaveOdd}, ++ {{6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9}, kLa64S16x4Reverse}, ++ {{2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}, kLa64S16x2Reverse}, ++ ++ {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}, ++ kLa64S8x16InterleaveRight}, ++ {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}, ++ kLa64S8x16InterleaveLeft}, ++ {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}, ++ kLa64S8x16PackEven}, ++ {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}, ++ kLa64S8x16PackOdd}, ++ {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}, ++ kLa64S8x16InterleaveEven}, ++ {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}, ++ kLa64S8x16InterleaveOdd}, ++ {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}, kLa64S8x8Reverse}, ++ {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kLa64S8x4Reverse}, ++ {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kLa64S8x2Reverse}}; ++ ++bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table, ++ size_t num_entries, bool is_swizzle, ++ ArchOpcode* opcode) { ++ uint8_t mask = is_swizzle ? kSimd128Size - 1 : 2 * kSimd128Size - 1; ++ for (size_t i = 0; i < num_entries; ++i) { ++ const ShuffleEntry& entry = table[i]; ++ int j = 0; ++ for (; j < kSimd128Size; ++j) { ++ if ((entry.shuffle[j] & mask) != (shuffle[j] & mask)) { ++ break; ++ } ++ } ++ if (j == kSimd128Size) { ++ *opcode = entry.opcode; ++ return true; ++ } ++ } ++ return false; ++} ++ ++} // namespace ++ ++void InstructionSelector::VisitS8x16Shuffle(Node* node) { ++ uint8_t shuffle[kSimd128Size]; ++ bool is_swizzle; ++ CanonicalizeShuffle(node, shuffle, &is_swizzle); ++ uint8_t shuffle32x4[4]; ++ ArchOpcode opcode; ++ if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles), ++ is_swizzle, &opcode)) { ++ VisitRRR(this, opcode, node); ++ return; ++ } ++ Node* input0 = node->InputAt(0); ++ Node* input1 = node->InputAt(1); ++ uint8_t offset; ++ La64OperandGenerator g(this); ++ if (TryMatchConcat(shuffle, &offset)) { ++ Emit(kLa64S8x16Concat, g.DefineSameAsFirst(node), g.UseRegister(input1), ++ g.UseRegister(input0), g.UseImmediate(offset)); ++ return; ++ } ++ if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) { ++ Emit(kLa64S32x4Shuffle, g.DefineAsRegister(node), g.UseRegister(input0), ++ g.UseRegister(input1), g.UseImmediate(Pack4Lanes(shuffle32x4))); ++ return; ++ } ++ Emit(kLa64S8x16Shuffle, g.DefineAsRegister(node), g.UseRegister(input0), ++ g.UseRegister(input1), g.UseImmediate(Pack4Lanes(shuffle)), ++ g.UseImmediate(Pack4Lanes(shuffle + 4)), ++ g.UseImmediate(Pack4Lanes(shuffle + 8)), ++ g.UseImmediate(Pack4Lanes(shuffle + 12))); ++} ++ ++void InstructionSelector::VisitS8x16Swizzle(Node* node) { ++ La64OperandGenerator g(this); ++ InstructionOperand temps[] = {g.TempSimd128Register()}; ++ // We don't want input 0 or input 1 to be the same as output, since we will ++ // modify output before do the calculation. ++ Emit(kLa64S8x16Swizzle, g.DefineAsRegister(node), ++ g.UseUniqueRegister(node->InputAt(0)), ++ g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); ++} ++ ++void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kLa64Seb, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0))); ++} ++ ++void InstructionSelector::VisitSignExtendWord16ToInt32(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kLa64Seh, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0))); ++} ++ ++void InstructionSelector::VisitSignExtendWord8ToInt64(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kLa64Seb, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0))); ++} ++ ++void InstructionSelector::VisitSignExtendWord16ToInt64(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kLa64Seh, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0))); ++} ++ ++void InstructionSelector::VisitSignExtendWord32ToInt64(Node* node) { ++ La64OperandGenerator g(this); ++ Emit(kLa64Shl, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)), ++ g.TempImmediate(0)); ++} ++ ++// static ++MachineOperatorBuilder::Flags ++InstructionSelector::SupportedMachineOperatorFlags() { ++ MachineOperatorBuilder::Flags flags = MachineOperatorBuilder::kNoFlags; ++ return flags | MachineOperatorBuilder::kWord32Ctz | ++ MachineOperatorBuilder::kWord64Ctz | ++ MachineOperatorBuilder::kWord32Popcnt | ++ MachineOperatorBuilder::kWord64Popcnt | ++ MachineOperatorBuilder::kWord32ShiftIsSafe | ++ MachineOperatorBuilder::kInt32DivIsSafe | ++ MachineOperatorBuilder::kUint32DivIsSafe | ++ MachineOperatorBuilder::kFloat64RoundDown | ++ MachineOperatorBuilder::kFloat32RoundDown | ++ MachineOperatorBuilder::kFloat64RoundUp | ++ MachineOperatorBuilder::kFloat32RoundUp | ++ MachineOperatorBuilder::kFloat64RoundTruncate | ++ MachineOperatorBuilder::kFloat32RoundTruncate | ++ MachineOperatorBuilder::kFloat64RoundTiesEven | ++ MachineOperatorBuilder::kFloat32RoundTiesEven; ++} ++ ++// static ++MachineOperatorBuilder::AlignmentRequirements ++InstructionSelector::AlignmentRequirements() { ++ return MachineOperatorBuilder::AlignmentRequirements:: ++ FullUnalignedAccessSupport(); ++} ++ ++#undef SIMD_BINOP_LIST ++#undef SIMD_SHIFT_OP_LIST ++#undef SIMD_UNOP_LIST ++#undef SIMD_TYPE_LIST ++#undef TRACE_UNIMPL ++#undef TRACE ++ ++} // namespace compiler ++} // namespace internal ++} // namespace v8 +diff --git a/src/3rdparty/chromium/v8/src/compiler/c-linkage.cc b/src/3rdparty/chromium/v8/src/compiler/c-linkage.cc +index 4967f2bbfa1..c3701e3ef07 100644 +--- a/src/3rdparty/chromium/v8/src/compiler/c-linkage.cc ++++ b/src/3rdparty/chromium/v8/src/compiler/c-linkage.cc +@@ -94,9 +94,22 @@ namespace { + #define PARAM_REGISTERS a0, a1, a2, a3, a4, a5, a6, a7 + #define CALLEE_SAVE_REGISTERS \ + s0.bit() | s1.bit() | s2.bit() | s3.bit() | s4.bit() | s5.bit() | s6.bit() | \ +- s7.bit() +-#define CALLEE_SAVE_FP_REGISTERS \ +- f20.bit() | f22.bit() | f24.bit() | f26.bit() | f28.bit() | f30.bit() ++ s7.bit() | fp.bit() ++#define CALLEE_SAVE_FP_REGISTERS \ ++ f24.bit() | f25.bit() | f26.bit() | f27.bit() | f28.bit() | f29.bit() | \ ++ f30.bit() | f31.bit() ++ ++#elif V8_TARGET_ARCH_LA64 ++// =========================================================================== ++// == la64 ================================================================= ++// =========================================================================== ++#define PARAM_REGISTERS a0, a1, a2, a3, a4, a5, a6, a7 ++#define CALLEE_SAVE_REGISTERS \ ++ s0.bit() | s1.bit() | s2.bit() | s3.bit() | s4.bit() | s5.bit() | s6.bit() | \ ++ s7.bit() | fp.bit() ++#define CALLEE_SAVE_FP_REGISTERS \ ++ f24.bit() | f25.bit() | f26.bit() | f27.bit() | f28.bit() | f29.bit() | \ ++ f30.bit() | f31.bit() + + #elif V8_TARGET_ARCH_PPC64 + // =========================================================================== +diff --git a/src/3rdparty/chromium/v8/src/debug/debug-evaluate.cc b/src/3rdparty/chromium/v8/src/debug/debug-evaluate.cc +index fcf9b8448a8..f704f0b6d84 100644 +--- a/src/3rdparty/chromium/v8/src/debug/debug-evaluate.cc ++++ b/src/3rdparty/chromium/v8/src/debug/debug-evaluate.cc +@@ -1067,7 +1067,7 @@ void DebugEvaluate::VerifyTransitiveBuiltins(Isolate* isolate) { + } + CHECK(!failed); + #if defined(V8_TARGET_ARCH_PPC) || defined(V8_TARGET_ARCH_PPC64) || \ +- defined(V8_TARGET_ARCH_MIPS64) ++ defined(V8_TARGET_ARCH_MIPS64) || defined(V8_TARGET_ARCH_LA64) + // Isolate-independent builtin calls and jumps do not emit reloc infos + // on PPC. We try to avoid using PC relative code due to performance + // issue with especially older hardwares. +diff --git a/src/3rdparty/chromium/v8/src/debug/la64/debug-la64.cc b/src/3rdparty/chromium/v8/src/debug/la64/debug-la64.cc +new file mode 100644 +index 00000000000..081135d3bb4 +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/debug/la64/debug-la64.cc +@@ -0,0 +1,56 @@ ++// Copyright 2012 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#if V8_TARGET_ARCH_LA64 ++ ++#include "src/debug/debug.h" ++ ++#include "src/codegen/macro-assembler.h" ++#include "src/debug/liveedit.h" ++#include "src/execution/frames-inl.h" ++ ++namespace v8 { ++namespace internal { ++ ++#define __ ACCESS_MASM(masm) ++ ++void DebugCodegen::GenerateHandleDebuggerStatement(MacroAssembler* masm) { ++ { ++ FrameScope scope(masm, StackFrame::INTERNAL); ++ __ CallRuntime(Runtime::kHandleDebuggerStatement, 0); ++ } ++ __ MaybeDropFrames(); ++ ++ // Return to caller. ++ __ Ret(); ++} ++ ++void DebugCodegen::GenerateFrameDropperTrampoline(MacroAssembler* masm) { ++ // Frame is being dropped: ++ // - Drop to the target frame specified by a1. ++ // - Look up current function on the frame. ++ // - Leave the frame. ++ // - Restart the frame by calling the function. ++ __ mov(fp, a1); ++ __ Ld_d(a1, MemOperand(fp, StandardFrameConstants::kFunctionOffset)); ++ ++ // Pop return address and frame. ++ __ LeaveFrame(StackFrame::INTERNAL); ++ ++ __ Ld_d(a0, FieldMemOperand(a1, JSFunction::kSharedFunctionInfoOffset)); ++ __ Ld_hu( ++ a0, FieldMemOperand(a0, SharedFunctionInfo::kFormalParameterCountOffset)); ++ __ mov(a2, a0); ++ ++ __ InvokeFunction(a1, a2, a0, JUMP_FUNCTION); ++} ++ ++const bool LiveEdit::kFrameDropperSupported = true; ++ ++#undef __ ++ ++} // namespace internal ++} // namespace v8 ++ ++#endif // V8_TARGET_ARCH_LA64 +diff --git a/src/3rdparty/chromium/v8/src/deoptimizer/la64/deoptimizer-la64.cc b/src/3rdparty/chromium/v8/src/deoptimizer/la64/deoptimizer-la64.cc +new file mode 100644 +index 00000000000..23a0051d93d +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/deoptimizer/la64/deoptimizer-la64.cc +@@ -0,0 +1,241 @@ ++// Copyright 2011 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#include "src/codegen/macro-assembler.h" ++#include "src/codegen/register-configuration.h" ++#include "src/codegen/safepoint-table.h" ++#include "src/deoptimizer/deoptimizer.h" ++ ++namespace v8 { ++namespace internal { ++ ++const bool Deoptimizer::kSupportsFixedDeoptExitSizes = false; ++const int Deoptimizer::kNonLazyDeoptExitSize = 0; ++const int Deoptimizer::kLazyDeoptExitSize = 0; ++ ++#define __ masm-> ++ ++// This code tries to be close to ia32 code so that any changes can be ++// easily ported. ++void Deoptimizer::GenerateDeoptimizationEntries(MacroAssembler* masm, ++ Isolate* isolate, ++ DeoptimizeKind deopt_kind) { ++ NoRootArrayScope no_root_array(masm); ++ ++ // Unlike on ARM we don't save all the registers, just the useful ones. ++ // For the rest, there are gaps on the stack, so the offsets remain the same. ++ const int kNumberOfRegisters = Register::kNumRegisters; ++ ++ RegList restored_regs = kJSCallerSaved | kCalleeSaved; ++ RegList saved_regs = restored_regs | sp.bit() | ra.bit(); ++ ++ const int kDoubleRegsSize = kDoubleSize * DoubleRegister::kNumRegisters; ++ ++ // Save all double FPU registers before messing with them. ++ __ Sub_d(sp, sp, Operand(kDoubleRegsSize)); ++ const RegisterConfiguration* config = RegisterConfiguration::Default(); ++ for (int i = 0; i < config->num_allocatable_double_registers(); ++i) { ++ int code = config->GetAllocatableDoubleCode(i); ++ const DoubleRegister fpu_reg = DoubleRegister::from_code(code); ++ int offset = code * kDoubleSize; ++ __ Fst_d(fpu_reg, MemOperand(sp, offset)); ++ } ++ ++ // Push saved_regs (needed to populate FrameDescription::registers_). ++ // Leave gaps for other registers. ++ __ Sub_d(sp, sp, kNumberOfRegisters * kPointerSize); ++ for (int16_t i = kNumberOfRegisters - 1; i >= 0; i--) { ++ if ((saved_regs & (1 << i)) != 0) { ++ __ St_d(ToRegister(i), MemOperand(sp, kPointerSize * i)); ++ } ++ } ++ ++ __ li(a2, Operand(ExternalReference::Create( ++ IsolateAddressId::kCEntryFPAddress, isolate))); ++ __ St_d(fp, MemOperand(a2, 0)); ++ ++ const int kSavedRegistersAreaSize = ++ (kNumberOfRegisters * kPointerSize) + kDoubleRegsSize; ++ ++ // Get the bailout is passed as kRootRegister by the caller. ++ __ mov(a2, kRootRegister); ++ ++ // Get the address of the location in the code object (a3) (return ++ // address for lazy deoptimization) and compute the fp-to-sp delta in ++ // register a4. ++ __ mov(a3, ra); ++ __ Add_d(a4, sp, Operand(kSavedRegistersAreaSize)); ++ ++ __ Sub_d(a4, fp, a4); ++ ++ // Allocate a new deoptimizer object. ++ __ PrepareCallCFunction(6, a5); ++ // Pass six arguments, according to n64 ABI. ++ __ mov(a0, zero_reg); ++ Label context_check; ++ __ Ld_d(a1, MemOperand(fp, CommonFrameConstants::kContextOrFrameTypeOffset)); ++ __ JumpIfSmi(a1, &context_check); ++ __ Ld_d(a0, MemOperand(fp, StandardFrameConstants::kFunctionOffset)); ++ __ bind(&context_check); ++ __ li(a1, Operand(static_cast(deopt_kind))); ++ // a2: bailout id already loaded. ++ // a3: code address or 0 already loaded. ++ // a4: already has fp-to-sp delta. ++ __ li(a5, Operand(ExternalReference::isolate_address(isolate))); ++ ++ // Call Deoptimizer::New(). ++ { ++ AllowExternalCallThatCantCauseGC scope(masm); ++ __ CallCFunction(ExternalReference::new_deoptimizer_function(), 6); ++ } ++ ++ // Preserve "deoptimizer" object in register v0 and get the input ++ // frame descriptor pointer to a1 (deoptimizer->input_); ++ // Move deopt-obj to a0 for call to Deoptimizer::ComputeOutputFrames() below. ++ // TODO save a0 ++ //__ mov(a0, v0); ++ __ Ld_d(a1, MemOperand(a0, Deoptimizer::input_offset())); ++ ++ // Copy core registers into FrameDescription::registers_[kNumRegisters]. ++ DCHECK_EQ(Register::kNumRegisters, kNumberOfRegisters); ++ for (int i = 0; i < kNumberOfRegisters; i++) { ++ int offset = (i * kPointerSize) + FrameDescription::registers_offset(); ++ if ((saved_regs & (1 << i)) != 0) { ++ __ Ld_d(a2, MemOperand(sp, i * kPointerSize)); ++ __ St_d(a2, MemOperand(a1, offset)); ++ } else if (FLAG_debug_code) { ++ __ li(a2, Operand(kDebugZapValue)); ++ __ St_d(a2, MemOperand(a1, offset)); ++ } ++ } ++ ++ int double_regs_offset = FrameDescription::double_registers_offset(); ++ // Copy FPU registers to ++ // double_registers_[DoubleRegister::kNumAllocatableRegisters] ++ for (int i = 0; i < config->num_allocatable_double_registers(); ++i) { ++ int code = config->GetAllocatableDoubleCode(i); ++ int dst_offset = code * kDoubleSize + double_regs_offset; ++ int src_offset = code * kDoubleSize + kNumberOfRegisters * kPointerSize; ++ __ Fld_d(f0, MemOperand(sp, src_offset)); ++ __ Fst_d(f0, MemOperand(a1, dst_offset)); ++ } ++ ++ // Remove the saved registers from the stack. ++ __ Add_d(sp, sp, Operand(kSavedRegistersAreaSize)); ++ ++ // Compute a pointer to the unwinding limit in register a2; that is ++ // the first stack slot not part of the input frame. ++ __ Ld_d(a2, MemOperand(a1, FrameDescription::frame_size_offset())); ++ __ Add_d(a2, a2, sp); ++ ++ // Unwind the stack down to - but not including - the unwinding ++ // limit and copy the contents of the activation frame to the input ++ // frame description. ++ __ Add_d(a3, a1, Operand(FrameDescription::frame_content_offset())); ++ Label pop_loop; ++ Label pop_loop_header; ++ __ Branch(&pop_loop_header); ++ __ bind(&pop_loop); ++ __ pop(a4); ++ __ St_d(a4, MemOperand(a3, 0)); ++ __ addi_d(a3, a3, sizeof(uint64_t)); ++ __ bind(&pop_loop_header); ++ __ BranchShort(&pop_loop, ne, a2, Operand(sp)); ++ // Compute the output frame in the deoptimizer. ++ __ push(a0); // Preserve deoptimizer object across call. ++ // a0: deoptimizer object; a1: scratch. ++ __ PrepareCallCFunction(1, a1); ++ // Call Deoptimizer::ComputeOutputFrames(). ++ { ++ AllowExternalCallThatCantCauseGC scope(masm); ++ __ CallCFunction(ExternalReference::compute_output_frames_function(), 1); ++ } ++ __ pop(a0); // Restore deoptimizer object (class Deoptimizer). ++ ++ __ Ld_d(sp, MemOperand(a0, Deoptimizer::caller_frame_top_offset())); ++ ++ // Replace the current (input) frame with the output frames. ++ Label outer_push_loop, inner_push_loop, outer_loop_header, inner_loop_header; ++ // Outer loop state: a4 = current "FrameDescription** output_", ++ // a1 = one past the last FrameDescription**. ++ __ Ld_w(a1, MemOperand(a0, Deoptimizer::output_count_offset())); ++ __ Ld_d(a4, MemOperand(a0, Deoptimizer::output_offset())); // a4 is output_. ++ __ Alsl_d(a1, a1, a4, kPointerSizeLog2, t7); ++ __ Branch(&outer_loop_header); ++ __ bind(&outer_push_loop); ++ // Inner loop state: a2 = current FrameDescription*, a3 = loop index. ++ __ Ld_d(a2, MemOperand(a4, 0)); // output_[ix] ++ __ Ld_d(a3, MemOperand(a2, FrameDescription::frame_size_offset())); ++ __ Branch(&inner_loop_header); ++ __ bind(&inner_push_loop); ++ __ Sub_d(a3, a3, Operand(sizeof(uint64_t))); ++ __ Add_d(a6, a2, Operand(a3)); ++ __ Ld_d(a7, MemOperand(a6, FrameDescription::frame_content_offset())); ++ __ push(a7); ++ __ bind(&inner_loop_header); ++ __ BranchShort(&inner_push_loop, ne, a3, Operand(zero_reg)); ++ ++ __ Add_d(a4, a4, Operand(kPointerSize)); ++ __ bind(&outer_loop_header); ++ __ BranchShort(&outer_push_loop, lt, a4, Operand(a1)); ++ ++ __ Ld_d(a1, MemOperand(a0, Deoptimizer::input_offset())); ++ for (int i = 0; i < config->num_allocatable_double_registers(); ++i) { ++ int code = config->GetAllocatableDoubleCode(i); ++ const DoubleRegister fpu_reg = DoubleRegister::from_code(code); ++ int src_offset = code * kDoubleSize + double_regs_offset; ++ __ Fld_d(fpu_reg, MemOperand(a1, src_offset)); ++ } ++ ++ // Push pc and continuation from the last output frame. ++ __ Ld_d(a6, MemOperand(a2, FrameDescription::pc_offset())); ++ __ push(a6); ++ __ Ld_d(a6, MemOperand(a2, FrameDescription::continuation_offset())); ++ __ push(a6); ++ ++ // Technically restoring 'at' should work unless zero_reg is also restored ++ // but it's safer to check for this. ++ DCHECK(!(t7.bit() & restored_regs)); ++ // Restore the registers from the last output frame. ++ __ mov(t7, a2); ++ for (int i = kNumberOfRegisters - 1; i >= 0; i--) { ++ int offset = (i * kPointerSize) + FrameDescription::registers_offset(); ++ if ((restored_regs & (1 << i)) != 0) { ++ __ Ld_d(ToRegister(i), MemOperand(t7, offset)); ++ } ++ } ++ ++ __ pop(t7); // Get continuation, leave pc on stack. ++ __ pop(ra); ++ __ Jump(t7); ++ __ stop(); ++} ++ ++// Maximum size of a table entry generated below. ++const int Deoptimizer::table_entry_size_ = 2 * kInstrSize; ++ ++Float32 RegisterValues::GetFloatRegister(unsigned n) const { ++ return Float32::FromBits( ++ static_cast(double_registers_[n].get_bits())); ++} ++ ++void FrameDescription::SetCallerPc(unsigned offset, intptr_t value) { ++ SetFrameSlot(offset, value); ++} ++ ++void FrameDescription::SetCallerFp(unsigned offset, intptr_t value) { ++ SetFrameSlot(offset, value); ++} ++ ++void FrameDescription::SetCallerConstantPool(unsigned offset, intptr_t value) { ++ // No embedded constant pool support. ++ UNREACHABLE(); ++} ++ ++void FrameDescription::SetPc(intptr_t pc) { pc_ = pc; } ++ ++#undef __ ++ ++} // namespace internal ++} // namespace v8 +diff --git a/src/3rdparty/chromium/v8/src/diagnostics/gdb-jit.cc b/src/3rdparty/chromium/v8/src/diagnostics/gdb-jit.cc +index 5f364373027..4ef6eba3273 100644 +--- a/src/3rdparty/chromium/v8/src/diagnostics/gdb-jit.cc ++++ b/src/3rdparty/chromium/v8/src/diagnostics/gdb-jit.cc +@@ -1077,6 +1077,8 @@ class DebugInfoSection : public DebugSection { + UNIMPLEMENTED(); + #elif V8_TARGET_ARCH_MIPS64 + UNIMPLEMENTED(); ++#elif V8_TARGET_ARCH_LA64 ++ UNIMPLEMENTED(); + #elif V8_TARGET_ARCH_PPC64 && V8_OS_LINUX + w->Write(DW_OP_reg31); // The frame pointer is here on PPC64. + #elif V8_TARGET_ARCH_S390 +diff --git a/src/3rdparty/chromium/v8/src/diagnostics/la64/disasm-la64.cc b/src/3rdparty/chromium/v8/src/diagnostics/la64/disasm-la64.cc +new file mode 100644 +index 00000000000..0d3e8ee89f7 +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/diagnostics/la64/disasm-la64.cc +@@ -0,0 +1,1841 @@ ++#include ++#include ++#include ++#include ++ ++#if V8_TARGET_ARCH_LA64 ++ ++#include "src/base/platform/platform.h" ++#include "src/codegen/la64/constants-la64.h" ++#include "src/codegen/macro-assembler.h" ++#include "src/diagnostics/disasm.h" ++ ++namespace v8 { ++namespace internal { ++ ++//------------------------------------------------------------------------------ ++ ++// Decoder decodes and disassembles instructions into an output buffer. ++// It uses the converter to convert register names and call destinations into ++// more informative description. ++class Decoder { ++ public: ++ Decoder(const disasm::NameConverter& converter, ++ v8::internal::Vector out_buffer) ++ : converter_(converter), out_buffer_(out_buffer), out_buffer_pos_(0) { ++ out_buffer_[out_buffer_pos_] = '\0'; ++ } ++ ++ ~Decoder() {} ++ ++ // Writes one disassembled instruction into 'buffer' (0-terminated). ++ // Returns the length of the disassembled machine instruction in bytes. ++ int InstructionDecode(byte* instruction); ++ ++ private: ++ // Bottleneck functions to print into the out_buffer. ++ void PrintChar(const char ch); ++ void Print(const char* str); ++ ++ // Printing of common values. ++ void PrintRegister(int reg); ++ void PrintFPURegister(int freg); ++ void PrintFPUStatusRegister(int freg); ++ void PrintRj(Instruction* instr); ++ void PrintRk(Instruction* instr); ++ void PrintRd(Instruction* instr); ++ void PrintFj(Instruction* instr); ++ void PrintFk(Instruction* instr); ++ void PrintFd(Instruction* instr); ++ void PrintFa(Instruction* instr); ++ void PrintSa2(Instruction* instr); ++ void PrintSa3(Instruction* instr); ++ void PrintUi5(Instruction* instr); ++ void PrintUi6(Instruction* instr); ++ void PrintUi12(Instruction* instr); ++ void PrintXi12(Instruction* instr); ++ void PrintMsbw(Instruction* instr); ++ void PrintLsbw(Instruction* instr); ++ void PrintMsbd(Instruction* instr); ++ void PrintLsbd(Instruction* instr); ++ // void PrintCond(Instruction* instr); ++ void PrintSi12(Instruction* instr); ++ void PrintSi14(Instruction* instr); ++ void PrintSi16(Instruction* instr); ++ void PrintSi20(Instruction* instr); ++ void PrintCj(Instruction* instr); ++ void PrintCd(Instruction* instr); ++ void PrintCa(Instruction* instr); ++ void PrintCode(Instruction* instr); ++ void PrintHint5(Instruction* instr); ++ void PrintHint15(Instruction* instr); ++ void PrintPCOffs16(Instruction* instr); ++ void PrintPCOffs21(Instruction* instr); ++ void PrintPCOffs26(Instruction* instr); ++ void PrintOffs16(Instruction* instr); ++ void PrintOffs21(Instruction* instr); ++ void PrintOffs26(Instruction* instr); ++ ++ // Handle formatting of instructions and their options. ++ int FormatRegister(Instruction* instr, const char* option); ++ int FormatFPURegister(Instruction* instr, const char* option); ++ int FormatOption(Instruction* instr, const char* option); ++ void Format(Instruction* instr, const char* format); ++ void Unknown(Instruction* instr); ++ int DecodeBreakInstr(Instruction* instr); ++ ++ // Each of these functions decodes one particular instruction type. ++ int InstructionDecode(Instruction* instr); ++ void DecodeTypekOp6(Instruction* instr); ++ void DecodeTypekOp7(Instruction* instr); ++ void DecodeTypekOp8(Instruction* instr); ++ void DecodeTypekOp10(Instruction* instr); ++ void DecodeTypekOp12(Instruction* instr); ++ void DecodeTypekOp14(Instruction* instr); ++ int DecodeTypekOp17(Instruction* instr); ++ void DecodeTypekOp22(Instruction* instr); ++ ++ const disasm::NameConverter& converter_; ++ v8::internal::Vector out_buffer_; ++ int out_buffer_pos_; ++ ++ DISALLOW_COPY_AND_ASSIGN(Decoder); ++}; ++ ++// Support for assertions in the Decoder formatting functions. ++#define STRING_STARTS_WITH(string, compare_string) \ ++ (strncmp(string, compare_string, strlen(compare_string)) == 0) ++ ++// Append the ch to the output buffer. ++void Decoder::PrintChar(const char ch) { out_buffer_[out_buffer_pos_++] = ch; } ++ ++// Append the str to the output buffer. ++void Decoder::Print(const char* str) { ++ char cur = *str++; ++ while (cur != '\0' && (out_buffer_pos_ < (out_buffer_.length() - 1))) { ++ PrintChar(cur); ++ cur = *str++; ++ } ++ out_buffer_[out_buffer_pos_] = 0; ++} ++ ++// Print the register name according to the active name converter. ++void Decoder::PrintRegister(int reg) { ++ Print(converter_.NameOfCPURegister(reg)); ++} ++ ++void Decoder::PrintRj(Instruction* instr) { ++ int reg = instr->RjValue(); ++ PrintRegister(reg); ++} ++ ++void Decoder::PrintRk(Instruction* instr) { ++ int reg = instr->RkValue(); ++ PrintRegister(reg); ++} ++ ++void Decoder::PrintRd(Instruction* instr) { ++ int reg = instr->RdValue(); ++ PrintRegister(reg); ++} ++ ++// Print the FPUregister name according to the active name converter. ++void Decoder::PrintFPURegister(int freg) { ++ Print(converter_.NameOfXMMRegister(freg)); ++} ++ ++void Decoder::PrintFj(Instruction* instr) { ++ int freg = instr->FjValue(); ++ PrintFPURegister(freg); ++} ++ ++void Decoder::PrintFk(Instruction* instr) { ++ int freg = instr->FkValue(); ++ PrintFPURegister(freg); ++} ++ ++void Decoder::PrintFd(Instruction* instr) { ++ int freg = instr->FdValue(); ++ PrintFPURegister(freg); ++} ++ ++void Decoder::PrintFa(Instruction* instr) { ++ int freg = instr->FaValue(); ++ PrintFPURegister(freg); ++} ++ ++// Print the integer value of the sa field. ++void Decoder::PrintSa2(Instruction* instr) { ++ int sa = instr->Sa2Value(); ++ uint32_t opcode = (instr->InstructionBits() >> 18) << 18; ++ if (opcode == ALSL || opcode == ALSL_D) { ++ sa += 1; ++ } ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", sa); ++} ++ ++void Decoder::PrintSa3(Instruction* instr) { ++ int sa = instr->Sa3Value(); ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", sa); ++} ++ ++void Decoder::PrintUi5(Instruction* instr) { ++ int ui = instr->Ui5Value(); ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", ui); ++} ++ ++void Decoder::PrintUi6(Instruction* instr) { ++ int ui = instr->Ui6Value(); ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", ui); ++} ++ ++void Decoder::PrintUi12(Instruction* instr) { ++ int ui = instr->Ui12Value(); ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", ui); ++} ++ ++void Decoder::PrintXi12(Instruction* instr) { ++ int xi = instr->Ui12Value(); ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "0x%x", xi); ++} ++ ++void Decoder::PrintMsbd(Instruction* instr) { ++ int msbd = instr->MsbdValue(); ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", msbd); ++} ++ ++void Decoder::PrintLsbd(Instruction* instr) { ++ int lsbd = instr->LsbdValue(); ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", lsbd); ++} ++ ++void Decoder::PrintMsbw(Instruction* instr) { ++ int msbw = instr->MsbwValue(); ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", msbw); ++} ++ ++void Decoder::PrintLsbw(Instruction* instr) { ++ int lsbw = instr->LsbwValue(); ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", lsbw); ++} ++ ++void Decoder::PrintSi12(Instruction* instr) { ++ int si = ((instr->Si12Value()) << (32 - kSi12Bits)) >> (32 - kSi12Bits); ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", si); ++} ++ ++void Decoder::PrintSi14(Instruction* instr) { ++ int si = ((instr->Si14Value()) << (32 - kSi14Bits)) >> (32 - kSi14Bits); ++ si <<= 2; ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", si); ++} ++ ++void Decoder::PrintSi16(Instruction* instr) { ++ int si = ((instr->Si16Value()) << (32 - kSi16Bits)) >> (32 - kSi16Bits); ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", si); ++} ++ ++void Decoder::PrintSi20(Instruction* instr) { ++ int si = ((instr->Si20Value()) << (32 - kSi20Bits)) >> (32 - kSi20Bits); ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", si); ++} ++ ++void Decoder::PrintCj(Instruction* instr) { ++ int cj = instr->CjValue(); ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", cj); ++} ++ ++void Decoder::PrintCd(Instruction* instr) { ++ int cd = instr->CdValue(); ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", cd); ++} ++ ++void Decoder::PrintCa(Instruction* instr) { ++ int ca = instr->CaValue(); ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%u", ca); ++} ++ ++void Decoder::PrintCode(Instruction* instr) { ++ int code = instr->CodeValue(); ++ out_buffer_pos_ += ++ SNPrintF(out_buffer_ + out_buffer_pos_, "0x%x(%u)", code, code); ++} ++ ++void Decoder::PrintHint5(Instruction* instr) { ++ int hint = instr->Hint5Value(); ++ out_buffer_pos_ += ++ SNPrintF(out_buffer_ + out_buffer_pos_, "0x%x(%u)", hint, hint); ++} ++ ++void Decoder::PrintHint15(Instruction* instr) { ++ int hint = instr->Hint15Value(); ++ out_buffer_pos_ += ++ SNPrintF(out_buffer_ + out_buffer_pos_, "0x%x(%u)", hint, hint); ++} ++ ++void Decoder::PrintPCOffs16(Instruction* instr) { ++ int n_bits = 2; ++ int offs = instr->Offs16Value(); ++ int target = ((offs << n_bits) << (32 - kOffsLowBits - n_bits)) >> ++ (32 - kOffsLowBits - n_bits); ++ out_buffer_pos_ += SNPrintF( ++ out_buffer_ + out_buffer_pos_, "%s", ++ converter_.NameOfAddress(reinterpret_cast(instr) + target)); ++} ++ ++void Decoder::PrintPCOffs21(Instruction* instr) { ++ int n_bits = 2; ++ int offs = instr->Offs21Value(); ++ int target = ++ ((offs << n_bits) << (32 - kOffsLowBits - kOffs21HighBits - n_bits)) >> ++ (32 - kOffsLowBits - kOffs21HighBits - n_bits); ++ out_buffer_pos_ += SNPrintF( ++ out_buffer_ + out_buffer_pos_, "%s", ++ converter_.NameOfAddress(reinterpret_cast(instr) + target)); ++} ++ ++void Decoder::PrintPCOffs26(Instruction* instr) { ++ int n_bits = 2; ++ int offs = instr->Offs26Value(); ++ int target = ++ ((offs << n_bits) << (32 - kOffsLowBits - kOffs26HighBits - n_bits)) >> ++ (32 - kOffsLowBits - kOffs26HighBits - n_bits); ++ out_buffer_pos_ += SNPrintF( ++ out_buffer_ + out_buffer_pos_, "%s", ++ converter_.NameOfAddress(reinterpret_cast(instr) + target)); ++} ++ ++void Decoder::PrintOffs16(Instruction* instr) { ++ int offs = instr->Offs16Value(); ++ offs <<= (32 - kOffsLowBits); ++ offs >>= (32 - kOffsLowBits); ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", offs); ++} ++ ++void Decoder::PrintOffs21(Instruction* instr) { ++ int offs = instr->Offs21Value(); ++ offs <<= (32 - kOffsLowBits - kOffs21HighBits); ++ offs >>= (32 - kOffsLowBits - kOffs21HighBits); ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", offs); ++} ++ ++void Decoder::PrintOffs26(Instruction* instr) { ++ int offs = instr->Offs26Value(); ++ offs <<= (32 - kOffsLowBits - kOffs26HighBits); ++ offs >>= (32 - kOffsLowBits - kOffs26HighBits); ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%d", offs); ++} ++ ++// Handle all register based formatting in this function to reduce the ++// complexity of FormatOption. ++int Decoder::FormatRegister(Instruction* instr, const char* format) { ++ DCHECK_EQ(format[0], 'r'); ++ if (format[1] == 'j') { // 'rj: Rj register. ++ int reg = instr->RjValue(); ++ PrintRegister(reg); ++ return 2; ++ } else if (format[1] == 'k') { // 'rk: rk register. ++ int reg = instr->RkValue(); ++ PrintRegister(reg); ++ return 2; ++ } else if (format[1] == 'd') { // 'rd: rd register. ++ int reg = instr->RdValue(); ++ PrintRegister(reg); ++ return 2; ++ } ++ UNREACHABLE(); ++ return 0; ++} ++ ++// Handle all FPUregister based formatting in this function to reduce the ++// complexity of FormatOption. ++int Decoder::FormatFPURegister(Instruction* instr, const char* format) { ++ DCHECK_EQ(format[0], 'f'); ++ if (format[1] == 'j') { // 'fj: fj register. ++ int reg = instr->FjValue(); ++ PrintFPURegister(reg); ++ return 2; ++ } else if (format[1] == 'k') { // 'fk: fk register. ++ int reg = instr->FkValue(); ++ PrintFPURegister(reg); ++ return 2; ++ } else if (format[1] == 'd') { // 'fd: fd register. ++ int reg = instr->FdValue(); ++ PrintFPURegister(reg); ++ return 2; ++ } else if (format[1] == 'a') { // 'fa: fa register. ++ int reg = instr->FaValue(); ++ PrintFPURegister(reg); ++ return 2; ++ } ++ UNREACHABLE(); ++ return 0; ++} ++ ++// FormatOption takes a formatting string and interprets it based on ++// the current instructions. The format string points to the first ++// character of the option string (the option escape has already been ++// consumed by the caller.) FormatOption returns the number of ++// characters that were consumed from the formatting string. ++int Decoder::FormatOption(Instruction* instr, const char* format) { ++ switch (format[0]) { ++ case 'c': { ++ switch (format[1]) { ++ case 'a': ++ DCHECK(STRING_STARTS_WITH(format, "ca")); ++ PrintCa(instr); ++ return 2; ++ case 'd': ++ DCHECK(STRING_STARTS_WITH(format, "cd")); ++ PrintCd(instr); ++ return 2; ++ case 'j': ++ DCHECK(STRING_STARTS_WITH(format, "cj")); ++ PrintCj(instr); ++ return 2; ++ case 'o': ++ DCHECK(STRING_STARTS_WITH(format, "code")); ++ PrintCode(instr); ++ return 4; ++ } ++ } ++ case 'f': { ++ return FormatFPURegister(instr, format); ++ } ++ case 'h': { ++ if (format[4] == '5') { ++ DCHECK(STRING_STARTS_WITH(format, "hint5")); ++ PrintHint5(instr); ++ return 5; ++ } else if (format[4] == '1') { ++ DCHECK(STRING_STARTS_WITH(format, "hint15")); ++ PrintHint15(instr); ++ return 6; ++ } ++ break; ++ } ++ case 'l': { ++ switch (format[3]) { ++ case 'w': ++ DCHECK(STRING_STARTS_WITH(format, "lsbw")); ++ PrintLsbw(instr); ++ return 4; ++ case 'd': ++ DCHECK(STRING_STARTS_WITH(format, "lsbd")); ++ PrintLsbd(instr); ++ return 4; ++ default: ++ return 0; ++ } ++ } ++ case 'm': { ++ if (format[3] == 'w') { ++ DCHECK(STRING_STARTS_WITH(format, "msbw")); ++ PrintMsbw(instr); ++ } else if (format[3] == 'd') { ++ DCHECK(STRING_STARTS_WITH(format, "msbd")); ++ PrintMsbd(instr); ++ } ++ return 4; ++ } ++ case 'o': { ++ if (format[1] == 'f') { ++ if (format[4] == '1') { ++ DCHECK(STRING_STARTS_WITH(format, "offs16")); ++ PrintOffs16(instr); ++ return 6; ++ } else if (format[4] == '2') { ++ if (format[5] == '1') { ++ DCHECK(STRING_STARTS_WITH(format, "offs21")); ++ PrintOffs21(instr); ++ return 6; ++ } else if (format[5] == '6') { ++ DCHECK(STRING_STARTS_WITH(format, "offs26")); ++ PrintOffs26(instr); ++ return 6; ++ } ++ } ++ } ++ break; ++ } ++ case 'p': { ++ if (format[6] == '1') { ++ DCHECK(STRING_STARTS_WITH(format, "pcoffs16")); ++ PrintPCOffs16(instr); ++ return 8; ++ } else if (format[6] == '2') { ++ if (format[7] == '1') { ++ DCHECK(STRING_STARTS_WITH(format, "pcoffs21")); ++ PrintPCOffs21(instr); ++ return 8; ++ } else if (format[7] == '6') { ++ DCHECK(STRING_STARTS_WITH(format, "pcoffs26")); ++ PrintPCOffs26(instr); ++ return 8; ++ } ++ } ++ break; ++ } ++ case 'r': { ++ return FormatRegister(instr, format); ++ break; ++ } ++ case 's': { ++ switch (format[1]) { ++ case 'a': ++ if (format[2] == '2') { ++ DCHECK(STRING_STARTS_WITH(format, "sa2")); ++ PrintSa2(instr); ++ } else if (format[2] == '3') { ++ DCHECK(STRING_STARTS_WITH(format, "sa3")); ++ PrintSa3(instr); ++ } ++ return 3; ++ case 'i': ++ if (format[2] == '2') { ++ DCHECK(STRING_STARTS_WITH(format, "si20")); ++ PrintSi20(instr); ++ return 4; ++ } else if (format[2] == '1') { ++ switch (format[3]) { ++ case '2': ++ DCHECK(STRING_STARTS_WITH(format, "si12")); ++ PrintSi12(instr); ++ return 4; ++ case '4': ++ DCHECK(STRING_STARTS_WITH(format, "si14")); ++ PrintSi14(instr); ++ return 4; ++ case '6': ++ DCHECK(STRING_STARTS_WITH(format, "si16")); ++ PrintSi16(instr); ++ return 4; ++ default: ++ break; ++ } ++ } ++ break; ++ default: ++ break; ++ } ++ break; ++ } ++ case 'u': { ++ if (format[2] == '5') { ++ DCHECK(STRING_STARTS_WITH(format, "ui5")); ++ PrintUi5(instr); ++ return 3; ++ } else if (format[2] == '6') { ++ DCHECK(STRING_STARTS_WITH(format, "ui6")); ++ PrintUi6(instr); ++ return 3; ++ } else if (format[2] == '1') { ++ DCHECK(STRING_STARTS_WITH(format, "ui12")); ++ PrintUi12(instr); ++ return 4; ++ } ++ break; ++ } ++ case 'x': { ++ DCHECK(STRING_STARTS_WITH(format, "xi12")); ++ PrintXi12(instr); ++ return 4; ++ } ++ default: ++ UNREACHABLE(); ++ } ++ return 0; ++} ++ ++// Format takes a formatting string for a whole instruction and prints it into ++// the output buffer. All escaped options are handed to FormatOption to be ++// parsed further. ++void Decoder::Format(Instruction* instr, const char* format) { ++ char cur = *format++; ++ while ((cur != 0) && (out_buffer_pos_ < (out_buffer_.length() - 1))) { ++ if (cur == '\'') { // Single quote is used as the formatting escape. ++ format += FormatOption(instr, format); ++ } else { ++ out_buffer_[out_buffer_pos_++] = cur; ++ } ++ cur = *format++; ++ } ++ out_buffer_[out_buffer_pos_] = '\0'; ++} ++ ++// For currently unimplemented decodings the disassembler calls Unknown(instr) ++// which will just print "unknown" of the instruction bits. ++void Decoder::Unknown(Instruction* instr) { Format(instr, "unknown"); } ++ ++int Decoder::DecodeBreakInstr(Instruction* instr) { ++ // This is already known to be BREAK instr, just extract the code. ++ /*if (instr->Bits(14, 0) == static_cast(kMaxStopCode)) { ++ // This is stop(msg). ++ Format(instr, "break, code: 'code"); ++ out_buffer_pos_ += SNPrintF( ++ out_buffer_ + out_buffer_pos_, "\n%p %08" PRIx64, ++ static_cast(reinterpret_cast(instr + kInstrSize)), ++ reinterpret_cast( ++ *reinterpret_cast(instr + kInstrSize))); ++ // Size 3: the break_ instr, plus embedded 64-bit char pointer. ++ return 3 * kInstrSize; ++ } else { ++ Format(instr, "break, code: 'code"); ++ return kInstrSize; ++ }*/ ++ Format(instr, "break code: 'code"); ++ return kInstrSize; ++} //=================================================== ++ ++void Decoder::DecodeTypekOp6(Instruction* instr) { ++ switch (instr->Bits(31, 26) << 26) { ++ case ADDU16I_D: ++ Format(instr, "addu16i.d 'rd, 'rj, 'si16"); ++ break; ++ case BEQZ: ++ Format(instr, "beqz 'rj, 'offs21 -> 'pcoffs21"); ++ break; ++ case BNEZ: ++ Format(instr, "bnez 'rj, 'offs21 -> 'pcoffs21"); ++ break; ++ case BCZ: ++ if (instr->Bit(8)) ++ Format(instr, "bcnez fcc'cj, 'offs21 -> 'pcoffs21"); ++ else ++ Format(instr, "bceqz fcc'cj, 'offs21 -> 'pcoffs21"); ++ break; ++ case JIRL: ++ Format(instr, "jirl 'rd, 'rj, 'offs16"); ++ break; ++ case B: ++ Format(instr, "b 'offs26 -> 'pcoffs26"); ++ break; ++ case BL: ++ Format(instr, "bl 'offs26 -> 'pcoffs26"); ++ break; ++ case BEQ: ++ Format(instr, "beq 'rj, 'rd, 'offs16 -> 'pcoffs16"); ++ break; ++ case BNE: ++ Format(instr, "bne 'rj, 'rd, 'offs16 -> 'pcoffs16"); ++ break; ++ case BLT: ++ Format(instr, "blt 'rj, 'rd, 'offs16 -> 'pcoffs16"); ++ break; ++ case BGE: ++ Format(instr, "bge 'rj, 'rd, 'offs16 -> 'pcoffs16"); ++ break; ++ case BLTU: ++ Format(instr, "bltu 'rj, 'rd, 'offs16 -> 'pcoffs16"); ++ break; ++ case BGEU: ++ Format(instr, "bgeu 'rj, 'rd, 'offs16 -> 'pcoffs16"); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++void Decoder::DecodeTypekOp7(Instruction* instr) { ++ switch (instr->Bits(31, 25) << 25) { ++ case LU12I_W: ++ Format(instr, "lu12i.w 'rd, 'si20"); ++ break; ++ case LU32I_D: ++ Format(instr, "lu32i.d 'rd, 'si20"); ++ break; ++ case PCADDI: ++ Format(instr, "pcaddi 'rd, 'si20"); ++ break; ++ case PCALAU12I: ++ Format(instr, "pcalau12i 'rd, 'si20"); ++ break; ++ case PCADDU12I: ++ Format(instr, "pcaddu12i 'rd, 'si20"); ++ break; ++ case PCADDU18I: ++ Format(instr, "pcaddu18i 'rd, 'si20"); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++void Decoder::DecodeTypekOp8(Instruction* instr) { ++ switch (instr->Bits(31, 24) << 24) { ++ case LDPTR_W: ++ Format(instr, "ldptr.w 'rd, 'rj, 'si14"); ++ break; ++ case STPTR_W: ++ Format(instr, "stptr.w 'rd, 'rj, 'si14"); ++ break; ++ case LDPTR_D: ++ Format(instr, "ldptr.d 'rd, 'rj, 'si14"); ++ break; ++ case STPTR_D: ++ Format(instr, "stptr.d 'rd, 'rj, 'si14"); ++ break; ++ case LL_W: ++ Format(instr, "ll.w 'rd, 'rj, 'si14"); ++ break; ++ case SC_W: ++ Format(instr, "sc.w 'rd, 'rj, 'si14"); ++ break; ++ case LL_D: ++ Format(instr, "ll.d 'rd, 'rj, 'si14"); ++ break; ++ case SC_D: ++ Format(instr, "sc.d 'rd, 'rj, 'si14"); ++ break; ++ case CSR: ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++void Decoder::DecodeTypekOp10(Instruction* instr) { ++ switch (instr->Bits(31, 22) << 22) { ++ case BSTR_W: { ++ if (instr->Bit(21) != 0) { ++ if (instr->Bit(15) == 0) { ++ Format(instr, "bstrins.w 'rd, 'rj, 'msbw, 'lsbw"); ++ } else { ++ Format(instr, "bstrpick.w 'rd, 'rj, 'msbw, 'lsbw"); ++ } ++ } ++ break; ++ } ++ case BSTRINS_D: ++ Format(instr, "bstrins.d 'rd, 'rj, 'msbd, 'lsbd"); ++ break; ++ case BSTRPICK_D: ++ Format(instr, "bstrpick.d 'rd, 'rj, 'msbd, 'lsbd"); ++ break; ++ case SLTI: ++ Format(instr, "slti 'rd, 'rj, 'si12"); ++ break; ++ case SLTUI: ++ Format(instr, "sltui 'rd, 'rj, 'si12"); ++ break; ++ case ADDI_W: ++ Format(instr, "addi.w 'rd, 'rj, 'si12"); ++ break; ++ case ADDI_D: ++ Format(instr, "addi.d 'rd, 'rj, 'si12"); ++ break; ++ case LU52I_D: ++ Format(instr, "lu52i.d 'rd, 'rj, 'si12"); ++ break; ++ case ANDI: ++ Format(instr, "andi 'rd, 'rj, 'xi12"); ++ break; ++ case ORI: ++ Format(instr, "ori 'rd, 'rj, 'xi12"); ++ break; ++ case XORI: ++ Format(instr, "xori 'rd, 'rj, 'xi12"); ++ break; ++ case LD_B: ++ Format(instr, "ld.b 'rd, 'rj, 'si12"); ++ break; ++ case LD_H: ++ Format(instr, "ld.h 'rd, 'rj, 'si12"); ++ break; ++ case LD_W: ++ Format(instr, "ld.w 'rd, 'rj, 'si12"); ++ break; ++ case LD_D: ++ Format(instr, "ld.d 'rd, 'rj, 'si12"); ++ break; ++ case ST_B: ++ Format(instr, "st.b 'rd, 'rj, 'si12"); ++ break; ++ case ST_H: ++ Format(instr, "st.h 'rd, 'rj, 'si12"); ++ break; ++ case ST_W: ++ Format(instr, "st.w 'rd, 'rj, 'si12"); ++ break; ++ case ST_D: ++ Format(instr, "st.d 'rd, 'rj, 'si12"); ++ break; ++ case LD_BU: ++ Format(instr, "ld.bu 'rd, 'rj, 'si12"); ++ break; ++ case LD_HU: ++ Format(instr, "ld.hu 'rd, 'rj, 'si12"); ++ break; ++ case LD_WU: ++ Format(instr, "ld.wu 'rd, 'rj, 'si12"); ++ break; ++ case PRELD: ++ Format(instr, "preld 'hint5, 'rj, 'si12"); ++ break; ++ case FLD_S: ++ Format(instr, "fld.s 'fd, 'rj, 'si12"); ++ break; ++ case FST_S: ++ Format(instr, "fst.s 'fd, 'rj, 'si12"); ++ break; ++ case FLD_D: ++ Format(instr, "fld.d 'fd, 'rj, 'si12"); ++ break; ++ case FST_D: ++ Format(instr, "fst.d 'fd, 'rj, 'si12"); ++ break; ++ case CACHE: ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++void Decoder::DecodeTypekOp12(Instruction* instr) { ++ switch (instr->Bits(31, 20) << 20) { ++ case FMADD_S: ++ Format(instr, "fmadd.s 'fd, 'fj, 'fk, 'fa"); ++ break; ++ case FMADD_D: ++ Format(instr, "fmadd.d 'fd, 'fj, 'fk, 'fa"); ++ break; ++ case FMSUB_S: ++ Format(instr, "fmsub.s 'fd, 'fj, 'fk, 'fa"); ++ break; ++ case FMSUB_D: ++ Format(instr, "fmsub.d 'fd, 'fj, 'fk, 'fa"); ++ break; ++ case FNMADD_S: ++ Format(instr, "fnmadd.s 'fd, 'fj, 'fk, 'fa"); ++ break; ++ case FNMADD_D: ++ Format(instr, "fnmadd.d 'fd, 'fj, 'fk, 'fa"); ++ break; ++ case FNMSUB_S: ++ Format(instr, "fnmsub.s 'fd, 'fj, 'fk, 'fa"); ++ break; ++ case FNMSUB_D: ++ Format(instr, "fnmsub.d 'fd, 'fj, 'fk, 'fa"); ++ break; ++ case FCMP_COND_S: ++ switch (instr->Bits(19, 15)) { ++ case CAF: ++ Format(instr, "fcmp.caf.s fcc'cd, 'fj, 'fk"); ++ break; ++ case SAF: ++ Format(instr, "fcmp.saf.s fcc'cd, 'fj, 'fk"); ++ break; ++ case CLT: ++ Format(instr, "fcmp.clt.s fcc'cd, 'fj, 'fk"); ++ break; ++ case CEQ: ++ Format(instr, "fcmp.ceq.s fcc'cd, 'fj, 'fk"); ++ break; ++ case SEQ: ++ Format(instr, "fcmp.seq.s fcc'cd, 'fj, 'fk"); ++ break; ++ case CLE: ++ Format(instr, "fcmp.cle.s fcc'cd, 'fj, 'fk"); ++ break; ++ case SLE: ++ Format(instr, "fcmp.sle.s fcc'cd, 'fj, 'fk"); ++ break; ++ case CUN: ++ Format(instr, "fcmp.cun.s fcc'cd, 'fj, 'fk"); ++ break; ++ case SUN: ++ Format(instr, "fcmp.sun.s fcc'cd, 'fj, 'fk"); ++ break; ++ case CULT: ++ Format(instr, "fcmp.cult.s fcc'cd, 'fj, 'fk"); ++ break; ++ case SULT: ++ Format(instr, "fcmp.sult.s fcc'cd, 'fj, 'fk"); ++ break; ++ case CUEQ: ++ Format(instr, "fcmp.cueq.s fcc'cd, 'fj, 'fk"); ++ break; ++ case SUEQ: ++ Format(instr, "fcmp.sueq.s fcc'cd, 'fj, 'fk"); ++ break; ++ case CULE: ++ Format(instr, "fcmp.cule.s fcc'cd, 'fj, 'fk"); ++ break; ++ case SULE: ++ Format(instr, "fcmp.sule.s fcc'cd, 'fj, 'fk"); ++ break; ++ case CNE: ++ Format(instr, "fcmp.cne.s fcc'cd, 'fj, 'fk"); ++ break; ++ case SNE: ++ Format(instr, "fcmp.sne.s fcc'cd, 'fj, 'fk"); ++ break; ++ case COR: ++ Format(instr, "fcmp.cor.s fcc'cd, 'fj, 'fk"); ++ break; ++ case SOR: ++ Format(instr, "fcmp.sor.s fcc'cd, 'fj, 'fk"); ++ break; ++ case CUNE: ++ Format(instr, "fcmp.cune.s fcc'cd, 'fj, 'fk"); ++ break; ++ case SUNE: ++ Format(instr, "fcmp.sune.s fcc'cd, 'fj, 'fk"); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++ break; ++ case FCMP_COND_D: ++ switch (instr->Bits(19, 15)) { ++ case CAF: ++ Format(instr, "fcmp.caf.d fcc'cd, 'fj, 'fk"); ++ break; ++ case SAF: ++ Format(instr, "fcmp.saf.d fcc'cd, 'fj, 'fk"); ++ break; ++ case CLT: ++ Format(instr, "fcmp.clt.d fcc'cd, 'fj, 'fk"); ++ break; ++ case CEQ: ++ Format(instr, "fcmp.ceq.d fcc'cd, 'fj, 'fk"); ++ break; ++ case SEQ: ++ Format(instr, "fcmp.seq.d fcc'cd, 'fj, 'fk"); ++ break; ++ case CLE: ++ Format(instr, "fcmp.cle.d fcc'cd, 'fj, 'fk"); ++ break; ++ case SLE: ++ Format(instr, "fcmp.sle.d fcc'cd, 'fj, 'fk"); ++ break; ++ case CUN: ++ Format(instr, "fcmp.cun.d fcc'cd, 'fj, 'fk"); ++ break; ++ case SUN: ++ Format(instr, "fcmp.sun.d fcc'cd, 'fj, 'fk"); ++ break; ++ case CULT: ++ Format(instr, "fcmp.cult.d fcc'cd, 'fj, 'fk"); ++ break; ++ case SULT: ++ Format(instr, "fcmp.sult.d fcc'cd, 'fj, 'fk"); ++ break; ++ case CUEQ: ++ Format(instr, "fcmp.cueq.d fcc'cd, 'fj, 'fk"); ++ break; ++ case SUEQ: ++ Format(instr, "fcmp.sueq.d fcc'cd, 'fj, 'fk"); ++ break; ++ case CULE: ++ Format(instr, "fcmp.cule.d fcc'cd, 'fj, 'fk"); ++ break; ++ case SULE: ++ Format(instr, "fcmp.sule.d fcc'cd, 'fj, 'fk"); ++ break; ++ case CNE: ++ Format(instr, "fcmp.cne.d fcc'cd, 'fj, 'fk"); ++ break; ++ case SNE: ++ Format(instr, "fcmp.sne.d fcc'cd, 'fj, 'fk"); ++ break; ++ case COR: ++ Format(instr, "fcmp.cor.d fcc'cd, 'fj, 'fk"); ++ break; ++ case SOR: ++ Format(instr, "fcmp.sor.d fcc'cd, 'fj, 'fk"); ++ break; ++ case CUNE: ++ Format(instr, "fcmp.cune.d fcc'cd, 'fj, 'fk"); ++ break; ++ case SUNE: ++ Format(instr, "fcmp.sune.d fcc'cd, 'fj, 'fk"); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++ break; ++ case FSEL: ++ Format(instr, "fsel 'fd, 'fj, 'fk, fcc'ca"); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++void Decoder::DecodeTypekOp14(Instruction* instr) { ++ switch (instr->Bits(31, 18) << 18) { ++ case ALSL: ++ if (instr->Bit(17)) ++ Format(instr, "alsl.wu 'rd, 'rj, 'rk, 'sa2"); ++ else ++ Format(instr, "alsl.w 'rd, 'rj, 'rk, 'sa2"); ++ break; ++ case BYTEPICK_W: ++ Format(instr, "bytepick.w 'rd, 'rj, 'rk, 'sa2"); ++ break; ++ case BYTEPICK_D: ++ Format(instr, "bytepick.d 'rd, 'rj, 'rk, 'sa3"); ++ break; ++ case ALSL_D: ++ Format(instr, "alsl.d 'rd, 'rj, 'rk, 'sa2"); ++ break; ++ case SLLI: ++ if (instr->Bit(16)) ++ Format(instr, "slli.d 'rd, 'rj, 'ui6"); ++ else ++ Format(instr, "slli.w 'rd, 'rj, 'ui5"); ++ break; ++ case SRLI: ++ if (instr->Bit(16)) ++ Format(instr, "srli.d 'rd, 'rj, 'ui6"); ++ else ++ Format(instr, "srli.w 'rd, 'rj, 'ui5"); ++ break; ++ case SRAI: ++ if (instr->Bit(16)) ++ Format(instr, "srai.d 'rd, 'rj, 'ui6"); ++ else ++ Format(instr, "srai.w 'rd, 'rj, 'ui5"); ++ break; ++ case ROTRI: ++ if (instr->Bit(16)) ++ Format(instr, "rotri.d 'rd, 'rj, 'ui6"); ++ else ++ Format(instr, "rotri.w 'rd, 'rj, 'ui5"); ++ break; ++ case LDDIR: ++ case LDPTE: ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++int Decoder::DecodeTypekOp17(Instruction* instr) { ++ switch (instr->Bits(31, 15) << 15) { ++ case ADD_W: ++ Format(instr, "add.w 'rd, 'rj, 'rk"); ++ break; ++ case ADD_D: ++ Format(instr, "add.d 'rd, 'rj, 'rk"); ++ break; ++ case SUB_W: ++ Format(instr, "sub.w 'rd, 'rj, 'rk"); ++ break; ++ case SUB_D: ++ Format(instr, "sub.d 'rd, 'rj, 'rk"); ++ break; ++ case SLT: ++ Format(instr, "slt 'rd, 'rj, 'rk"); ++ break; ++ case SLTU: ++ Format(instr, "sltu 'rd, 'rj, 'rk"); ++ break; ++ case MASKEQZ: ++ Format(instr, "maskeqz 'rd, 'rj, 'rk"); ++ break; ++ case MASKNEZ: ++ Format(instr, "masknez 'rd, 'rj, 'rk"); ++ break; ++ case NOR: ++ Format(instr, "nor 'rd, 'rj, 'rk"); ++ break; ++ case AND: ++ Format(instr, "and 'rd, 'rj, 'rk"); ++ break; ++ case OR: ++ Format(instr, "or 'rd, 'rj, 'rk"); ++ break; ++ case XOR: ++ Format(instr, "xor 'rd, 'rj, 'rk"); ++ break; ++ case ORN: ++ Format(instr, "orn 'rd, 'rj, 'rk"); ++ break; ++ case ANDN: ++ Format(instr, "andn 'rd, 'rj, 'rk"); ++ break; ++ case SLL_W: ++ Format(instr, "sll.w 'rd, 'rj, 'rk"); ++ break; ++ case SRL_W: ++ Format(instr, "srl.w 'rd, 'rj, 'rk"); ++ break; ++ case SRA_W: ++ Format(instr, "sra.w 'rd, 'rj, 'rk"); ++ break; ++ case SLL_D: ++ Format(instr, "sll.d 'rd, 'rj, 'rk"); ++ break; ++ case SRL_D: ++ Format(instr, "srl.d 'rd, 'rj, 'rk"); ++ break; ++ case SRA_D: ++ Format(instr, "sra.d 'rd, 'rj, 'rk"); ++ break; ++ case ROTR_D: ++ Format(instr, "rotr.d 'rd, 'rj, 'rk"); ++ break; ++ case ROTR_W: ++ Format(instr, "rotr.w 'rd, 'rj, 'rk"); ++ break; ++ case MUL_W: ++ Format(instr, "mul.w 'rd, 'rj, 'rk"); ++ break; ++ case MULH_W: ++ Format(instr, "mulh.w 'rd, 'rj, 'rk"); ++ break; ++ case MULH_WU: ++ Format(instr, "mulh.wu 'rd, 'rj, 'rk"); ++ break; ++ case MUL_D: ++ Format(instr, "mul.d 'rd, 'rj, 'rk"); ++ break; ++ case MULH_D: ++ Format(instr, "mulh.d 'rd, 'rj, 'rk"); ++ break; ++ case MULH_DU: ++ Format(instr, "mulh.du 'rd, 'rj, 'rk"); ++ break; ++ case MULW_D_W: ++ Format(instr, "mulw.d.w 'rd, 'rj, 'rk"); ++ break; ++ case MULW_D_WU: ++ Format(instr, "mulw.d.wu 'rd, 'rj, 'rk"); ++ break; ++ case DIV_W: ++ Format(instr, "div.w 'rd, 'rj, 'rk"); ++ break; ++ case MOD_W: ++ Format(instr, "mod.w 'rd, 'rj, 'rk"); ++ break; ++ case DIV_WU: ++ Format(instr, "div.wu 'rd, 'rj, 'rk"); ++ break; ++ case MOD_WU: ++ Format(instr, "mod.wu 'rd, 'rj, 'rk"); ++ break; ++ case DIV_D: ++ Format(instr, "div.d 'rd, 'rj, 'rk"); ++ break; ++ case MOD_D: ++ Format(instr, "mod.d 'rd, 'rj, 'rk"); ++ break; ++ case DIV_DU: ++ Format(instr, "div.du 'rd, 'rj, 'rk"); ++ break; ++ case MOD_DU: ++ Format(instr, "mod.du 'rd, 'rj, 'rk"); ++ break; ++ case BREAK: ++ return DecodeBreakInstr(instr); ++ case FADD_S: ++ Format(instr, "fadd.s 'fd, 'fj, 'fk"); ++ break; ++ case FADD_D: ++ Format(instr, "fadd.d 'fd, 'fj, 'fk"); ++ break; ++ case FSUB_S: ++ Format(instr, "fsub.s 'fd, 'fj, 'fk"); ++ break; ++ case FSUB_D: ++ Format(instr, "fsub.d 'fd, 'fj, 'fk"); ++ break; ++ case FMUL_S: ++ Format(instr, "fmul.s 'fd, 'fj, 'fk"); ++ break; ++ case FMUL_D: ++ Format(instr, "fmul.d 'fd, 'fj, 'fk"); ++ break; ++ case FDIV_S: ++ Format(instr, "fdiv.s 'fd, 'fj, 'fk"); ++ break; ++ case FDIV_D: ++ Format(instr, "fdiv.d 'fd, 'fj, 'fk"); ++ break; ++ case FMAX_S: ++ Format(instr, "fmax.s 'fd, 'fj, 'fk"); ++ break; ++ case FMAX_D: ++ Format(instr, "fmax.d 'fd, 'fj, 'fk"); ++ break; ++ case FMIN_S: ++ Format(instr, "fmin.s 'fd, 'fj, 'fk"); ++ break; ++ case FMIN_D: ++ Format(instr, "fmin.d 'fd, 'fj, 'fk"); ++ break; ++ case FMAXA_S: ++ Format(instr, "fmaxa.s 'fd, 'fj, 'fk"); ++ break; ++ case FMAXA_D: ++ Format(instr, "fmaxa.d 'fd, 'fj, 'fk"); ++ break; ++ case FMINA_S: ++ Format(instr, "fmina.s 'fd, 'fj, 'fk"); ++ break; ++ case FMINA_D: ++ Format(instr, "fmina.d 'fd, 'fj, 'fk"); ++ break; ++ case LDX_B: ++ Format(instr, "ldx.b 'rd, 'rj, 'rk"); ++ break; ++ case LDX_H: ++ Format(instr, "ldx.h 'rd, 'rj, 'rk"); ++ break; ++ case LDX_W: ++ Format(instr, "ldx.w 'rd, 'rj, 'rk"); ++ break; ++ case LDX_D: ++ Format(instr, "ldx.d 'rd, 'rj, 'rk"); ++ break; ++ case STX_B: ++ Format(instr, "stx.b 'rd, 'rj, 'rk"); ++ break; ++ case STX_H: ++ Format(instr, "stx.h 'rd, 'rj, 'rk"); ++ break; ++ case STX_W: ++ Format(instr, "stx.w 'rd, 'rj, 'rk"); ++ break; ++ case STX_D: ++ Format(instr, "stx.d 'rd, 'rj, 'rk"); ++ break; ++ case LDX_BU: ++ Format(instr, "ldx.bu 'rd, 'rj, 'rk"); ++ break; ++ case LDX_HU: ++ Format(instr, "ldx.hu 'rd, 'rj, 'rk"); ++ break; ++ case LDX_WU: ++ Format(instr, "ldx.wu 'rd, 'rj, 'rk"); ++ break; ++ case PRELDX: ++ Format(instr, "preldx 'hint5, 'rj, 'rk"); ++ break; ++ case FLDX_S: ++ Format(instr, "fldx.s 'fd, 'rj, 'rk"); ++ break; ++ case FLDX_D: ++ Format(instr, "fldx.d 'fd, 'rj, 'rk"); ++ break; ++ case FSTX_S: ++ Format(instr, "fstx.s 'fd, 'rj, 'rk"); ++ break; ++ case FSTX_D: ++ Format(instr, "fstx.d 'fd, 'rj, 'rk"); ++ break; ++ case ASRTLE_D: ++ Format(instr, "asrtle.d 'rj, 'rk"); ++ break; ++ case ASRTGT_D: ++ Format(instr, "asrtgt.d 'rj, 'rk"); ++ break; ++ case SYSCALL: ++ Format(instr, "syscall code 'code"); ++ break; ++ case HYPCALL: ++ Format(instr, "hypcall code 'code"); ++ break; ++ case AMSWAP_W: ++ Format(instr, "amswap.w 'rd, 'rk, 'rj"); ++ break; ++ case AMSWAP_D: ++ Format(instr, "amswap.d 'rd, 'rk, 'rj"); ++ break; ++ case AMADD_W: ++ Format(instr, "amadd.w 'rd, 'rk, 'rj"); ++ break; ++ case AMADD_D: ++ Format(instr, "amadd.d 'rd, 'rk, 'rj"); ++ break; ++ case AMAND_W: ++ Format(instr, "amand.w 'rd, 'rk, 'rj"); ++ break; ++ case AMAND_D: ++ Format(instr, "amand.d 'rd, 'rk, 'rj"); ++ break; ++ case AMOR_W: ++ Format(instr, "amor.w 'rd, 'rk, 'rj"); ++ break; ++ case AMOR_D: ++ Format(instr, "amor.d 'rd, 'rk, 'rj"); ++ break; ++ case AMXOR_W: ++ Format(instr, "amxor.w 'rd, 'rk, 'rj"); ++ break; ++ case AMXOR_D: ++ Format(instr, "amxor.d 'rd, 'rk, 'rj"); ++ break; ++ case AMMAX_W: ++ Format(instr, "ammax.w 'rd, 'rk, 'rj"); ++ break; ++ case AMMAX_D: ++ Format(instr, "ammax.d 'rd, 'rk, 'rj"); ++ break; ++ case AMMIN_W: ++ Format(instr, "ammin.w 'rd, 'rk, 'rj"); ++ break; ++ case AMMIN_D: ++ Format(instr, "ammin.d 'rd, 'rk, 'rj"); ++ break; ++ case AMMAX_WU: ++ Format(instr, "ammax.wu 'rd, 'rk, 'rj"); ++ break; ++ case AMMAX_DU: ++ Format(instr, "ammax.du 'rd, 'rk, 'rj"); ++ break; ++ case AMMIN_WU: ++ Format(instr, "ammin.wu 'rd, 'rk, 'rj"); ++ break; ++ case AMMIN_DU: ++ Format(instr, "ammin.du 'rd, 'rk, 'rj"); ++ break; ++ case AMSWAP_DB_W: ++ Format(instr, "amswap_db.w 'rd, 'rk, 'rj"); ++ break; ++ case AMSWAP_DB_D: ++ Format(instr, "amswap_db.d 'rd, 'rk, 'rj"); ++ break; ++ case AMADD_DB_W: ++ Format(instr, "amadd_db.w 'rd, 'rk, 'rj"); ++ break; ++ case AMADD_DB_D: ++ Format(instr, "amadd_db.d 'rd, 'rk, 'rj"); ++ break; ++ case AMAND_DB_W: ++ Format(instr, "amand_db.w 'rd, 'rk, 'rj"); ++ break; ++ case AMAND_DB_D: ++ Format(instr, "amand_db.d 'rd, 'rk, 'rj"); ++ break; ++ case AMOR_DB_W: ++ Format(instr, "amor_db.w 'rd, 'rk, 'rj"); ++ break; ++ case AMOR_DB_D: ++ Format(instr, "amor_db.d 'rd, 'rk, 'rj"); ++ break; ++ case AMXOR_DB_W: ++ Format(instr, "amxor_db.w 'rd, 'rk, 'rj"); ++ break; ++ case AMXOR_DB_D: ++ Format(instr, "amxor_db.d 'rd, 'rk, 'rj"); ++ break; ++ case AMMAX_DB_W: ++ Format(instr, "ammax_db.w 'rd, 'rk, 'rj"); ++ break; ++ case AMMAX_DB_D: ++ Format(instr, "ammax_db.d 'rd, 'rk, 'rj"); ++ break; ++ case AMMIN_DB_W: ++ Format(instr, "ammin_db.w 'rd, 'rk, 'rj"); ++ break; ++ case AMMIN_DB_D: ++ Format(instr, "ammin_db.d 'rd, 'rk, 'rj"); ++ break; ++ case AMMAX_DB_WU: ++ Format(instr, "ammax_db.wu 'rd, 'rk, 'rj"); ++ break; ++ case AMMAX_DB_DU: ++ Format(instr, "ammax_db.du 'rd, 'rk, 'rj"); ++ break; ++ case AMMIN_DB_WU: ++ Format(instr, "ammin_db.wu 'rd, 'rk, 'rj"); ++ break; ++ case AMMIN_DB_DU: ++ Format(instr, "ammin_db.du 'rd, 'rk, 'rj"); ++ break; ++ case DBAR: ++ Format(instr, "dbar 'hint15"); ++ break; ++ case IBAR: ++ Format(instr, "ibar 'hint15"); ++ break; ++ case FLDGT_S: ++ Format(instr, "fldgt.s 'fd, 'rj, 'rk"); ++ break; ++ case FLDGT_D: ++ Format(instr, "fldgt.d 'fd, 'rj, 'rk"); ++ break; ++ case FLDLE_S: ++ Format(instr, "fldle.s 'fd, 'rj, 'rk"); ++ break; ++ case FLDLE_D: ++ Format(instr, "fldle.d 'fd, 'rj, 'rk"); ++ break; ++ case FSTGT_S: ++ Format(instr, "fstgt.s 'fd, 'rj, 'rk"); ++ break; ++ case FSTGT_D: ++ Format(instr, "fstgt.d 'fd, 'rj, 'rk"); ++ break; ++ case FSTLE_S: ++ Format(instr, "fstle.s 'fd, 'rj, 'rk"); ++ break; ++ case FSTLE_D: ++ Format(instr, "fstle.d 'fd, 'rj, 'rk"); ++ break; ++ case LDGT_B: ++ Format(instr, "ldgt.b 'rd, 'rj, 'rk"); ++ break; ++ case LDGT_H: ++ Format(instr, "ldgt.h 'rd, 'rj, 'rk"); ++ break; ++ case LDGT_W: ++ Format(instr, "ldgt.w 'rd, 'rj, 'rk"); ++ break; ++ case LDGT_D: ++ Format(instr, "ldgt.d 'rd, 'rj, 'rk"); ++ break; ++ case LDLE_B: ++ Format(instr, "ldle.b 'rd, 'rj, 'rk"); ++ break; ++ case LDLE_H: ++ Format(instr, "ldle.h 'rd, 'rj, 'rk"); ++ break; ++ case LDLE_W: ++ Format(instr, "ldle.w 'rd, 'rj, 'rk"); ++ break; ++ case LDLE_D: ++ Format(instr, "ldle.d 'rd, 'rj, 'rk"); ++ break; ++ case STGT_B: ++ Format(instr, "stgt.b 'rd, 'rj, 'rk"); ++ break; ++ case STGT_H: ++ Format(instr, "stgt.h 'rd, 'rj, 'rk"); ++ break; ++ case STGT_W: ++ Format(instr, "stgt.w 'rd, 'rj, 'rk"); ++ break; ++ case STGT_D: ++ Format(instr, "stgt.d 'rd, 'rj, 'rk"); ++ break; ++ case STLE_B: ++ Format(instr, "stle.b 'rd, 'rj, 'rk"); ++ break; ++ case STLE_H: ++ Format(instr, "stle.h 'rd, 'rj, 'rk"); ++ break; ++ case STLE_W: ++ Format(instr, "stle.w 'rd, 'rj, 'rk"); ++ break; ++ case STLE_D: ++ Format(instr, "stle.d 'rd, 'rj, 'rk"); ++ break; ++ case FSCALEB_S: ++ Format(instr, "fscaleb.s 'fd, 'fj, 'fk"); ++ break; ++ case FSCALEB_D: ++ Format(instr, "fscaleb.d 'fd, 'fj, 'fk"); ++ break; ++ case FCOPYSIGN_S: ++ Format(instr, "fcopysign.s 'fd, 'fj, 'fk"); ++ break; ++ case FCOPYSIGN_D: ++ Format(instr, "fcopysign.d 'fd, 'fj, 'fk"); ++ break; ++ case CRC_W_B_W: ++ Format(instr, "crc.w.b.w 'rd, 'rj, 'rk"); ++ break; ++ case CRC_W_H_W: ++ Format(instr, "crc.w.h.w 'rd, 'rj, 'rk"); ++ break; ++ case CRC_W_W_W: ++ Format(instr, "crc.w.w.w 'rd, 'rj, 'rk"); ++ break; ++ case CRC_W_D_W: ++ Format(instr, "crc.w.d.w 'rd, 'rj, 'rk"); ++ break; ++ case CRCC_W_B_W: ++ Format(instr, "crcc.w.b.w 'rd, 'rj, 'rk"); ++ break; ++ case CRCC_W_H_W: ++ Format(instr, "crcc.w.h.w 'rd, 'rj, 'rk"); ++ break; ++ case CRCC_W_W_W: ++ Format(instr, "crcc.w.w.w 'rd, 'rj, 'rk"); ++ break; ++ case CRCC_W_D_W: ++ Format(instr, "crcc.w.d.w 'rd, 'rj, 'rk"); ++ break; ++ case WAIT_INVTLB: ++ case DBGCALL: ++ default: ++ UNREACHABLE(); ++ } ++ return kInstrSize; ++} ++ ++void Decoder::DecodeTypekOp22(Instruction* instr) { ++ switch (instr->Bits(31, 10) << 10) { ++ case CLZ_W: ++ Format(instr, "clz.w 'rd, 'rj"); ++ break; ++ case CTZ_W: ++ Format(instr, "ctz.w 'rd, 'rj"); ++ break; ++ case CLZ_D: ++ Format(instr, "clz.d 'rd, 'rj"); ++ break; ++ case CTZ_D: ++ Format(instr, "ctz.d 'rd, 'rj"); ++ break; ++ case REVB_2H: ++ Format(instr, "revb.2h 'rd, 'rj"); ++ break; ++ case REVB_4H: ++ Format(instr, "revb.4h 'rd, 'rj"); ++ break; ++ case REVB_2W: ++ Format(instr, "revb.2w 'rd, 'rj"); ++ break; ++ case REVB_D: ++ Format(instr, "revb.d 'rd, 'rj"); ++ break; ++ case REVH_2W: ++ Format(instr, "revh.2w 'rd, 'rj"); ++ break; ++ case REVH_D: ++ Format(instr, "revh.d 'rd, 'rj"); ++ break; ++ case BITREV_4B: ++ Format(instr, "bitrev.4b 'rd, 'rj"); ++ break; ++ case BITREV_8B: ++ Format(instr, "bitrev.8b 'rd, 'rj"); ++ break; ++ case BITREV_W: ++ Format(instr, "bitrev.w 'rd, 'rj"); ++ break; ++ case BITREV_D: ++ Format(instr, "bitrev.d 'rd, 'rj"); ++ break; ++ case EXT_W_B: ++ Format(instr, "ext.w.b 'rd, 'rj"); ++ break; ++ case EXT_W_H: ++ Format(instr, "ext.w.h 'rd, 'rj"); ++ break; ++ case FABS_S: ++ Format(instr, "fabs.s 'fd, 'fj"); ++ break; ++ case FABS_D: ++ Format(instr, "fabs.d 'fd, 'fj"); ++ break; ++ case FNEG_S: ++ Format(instr, "fneg.s 'fd, 'fj"); ++ break; ++ case FNEG_D: ++ Format(instr, "fneg.d 'fd, 'fj"); ++ break; ++ case FSQRT_S: ++ Format(instr, "fsqrt.s 'fd, 'fj"); ++ break; ++ case FSQRT_D: ++ Format(instr, "fsqrt.d 'fd, 'fj"); ++ break; ++ case FMOV_S: ++ Format(instr, "fmov.s 'fd, 'fj"); ++ break; ++ case FMOV_D: ++ Format(instr, "fmov.d 'fd, 'fj"); ++ break; ++ case MOVGR2FR_W: ++ Format(instr, "movgr2fr.w 'fd, 'rj"); ++ break; ++ case MOVGR2FR_D: ++ Format(instr, "movgr2fr.d 'fd, 'rj"); ++ break; ++ case MOVGR2FRH_W: ++ Format(instr, "movgr2frh.w 'fd, 'rj"); ++ break; ++ case MOVFR2GR_S: ++ Format(instr, "movfr2gr.s 'rd, 'fj"); ++ break; ++ case MOVFR2GR_D: ++ Format(instr, "movfr2gr.d 'rd, 'fj"); ++ break; ++ case MOVFRH2GR_S: ++ Format(instr, "movfrh2gr.s 'rd, 'fj"); ++ break; ++ case MOVGR2FCSR: ++ Format(instr, "movgr2fcsr fcsr, 'rj"); ++ break; ++ case MOVFCSR2GR: ++ Format(instr, "movfcsr2gr 'rd, fcsr"); ++ break; ++ case FCVT_S_D: ++ Format(instr, "fcvt.s.d 'fd, 'fj"); ++ break; ++ case FCVT_D_S: ++ Format(instr, "fcvt.d.s 'fd, 'fj"); ++ break; ++ case FTINTRM_W_S: ++ Format(instr, "ftintrm.w.s 'fd, 'fj"); ++ break; ++ case FTINTRM_W_D: ++ Format(instr, "ftintrm.w.d 'fd, 'fj"); ++ break; ++ case FTINTRM_L_S: ++ Format(instr, "ftintrm.l.s 'fd, 'fj"); ++ break; ++ case FTINTRM_L_D: ++ Format(instr, "ftintrm.l.d 'fd, 'fj"); ++ break; ++ case FTINTRP_W_S: ++ Format(instr, "ftintrp.w.s 'fd, 'fj"); ++ break; ++ case FTINTRP_W_D: ++ Format(instr, "ftintrp.w.d 'fd, 'fj"); ++ break; ++ case FTINTRP_L_S: ++ Format(instr, "ftintrp.l.s 'fd, 'fj"); ++ break; ++ case FTINTRP_L_D: ++ Format(instr, "ftintrp.l.d 'fd, 'fj"); ++ break; ++ case FTINTRZ_W_S: ++ Format(instr, "ftintrz.w.s 'fd, 'fj"); ++ break; ++ case FTINTRZ_W_D: ++ Format(instr, "ftintrz.w.d 'fd, 'fj"); ++ break; ++ case FTINTRZ_L_S: ++ Format(instr, "ftintrz.l.s 'fd, 'fj"); ++ break; ++ case FTINTRZ_L_D: ++ Format(instr, "ftintrz.l.d 'fd, 'fj"); ++ break; ++ case FTINTRNE_W_S: ++ Format(instr, "ftintrne.w.s 'fd, 'fj"); ++ break; ++ case FTINTRNE_W_D: ++ Format(instr, "ftintrne.w.d 'fd, 'fj"); ++ break; ++ case FTINTRNE_L_S: ++ Format(instr, "ftintrne.l.s 'fd, 'fj"); ++ break; ++ case FTINTRNE_L_D: ++ Format(instr, "ftintrne.l.d 'fd, 'fj"); ++ break; ++ case FTINT_W_S: ++ Format(instr, "ftint.w.s 'fd, 'fj"); ++ break; ++ case FTINT_W_D: ++ Format(instr, "ftint.w.d 'fd, 'fj"); ++ break; ++ case FTINT_L_S: ++ Format(instr, "ftint.l.s 'fd, 'fj"); ++ break; ++ case FTINT_L_D: ++ Format(instr, "ftint.l.d 'fd, 'fj"); ++ break; ++ case FFINT_S_W: ++ Format(instr, "ffint.s.w 'fd, 'fj"); ++ break; ++ case FFINT_S_L: ++ Format(instr, "ffint.s.l 'fd, 'fj"); ++ break; ++ case FFINT_D_W: ++ Format(instr, "ffint.d.w 'fd, 'fj"); ++ break; ++ case FFINT_D_L: ++ Format(instr, "ffint.d.l 'fd, 'fj"); ++ break; ++ case FRINT_S: ++ Format(instr, "frint.s 'fd, 'fj"); ++ break; ++ case FRINT_D: ++ Format(instr, "frint.d 'fd, 'fj"); ++ break; ++ case MOVFR2CF: ++ Format(instr, "movfr2cf fcc'cd, 'fj"); ++ break; ++ case MOVCF2FR: ++ Format(instr, "movcf2fr 'fd, fcc'cj"); ++ break; ++ case MOVGR2CF: ++ Format(instr, "movgr2cf fcc'cd, 'rj"); ++ break; ++ case MOVCF2GR: ++ Format(instr, "movcf2gr 'rd, fcc'cj"); ++ break; ++ case FRECIP_S: ++ Format(instr, "frecip.s 'fd, 'fj"); ++ break; ++ case FRECIP_D: ++ Format(instr, "frecip.d 'fd, 'fj"); ++ break; ++ case FRSQRT_S: ++ Format(instr, "frsqrt.s 'fd, 'fj"); ++ break; ++ case FRSQRT_D: ++ Format(instr, "frsqrt.d 'fd, 'fj"); ++ break; ++ case FCLASS_S: ++ Format(instr, "fclass.s 'fd, 'fj"); ++ break; ++ case FCLASS_D: ++ Format(instr, "fclass.d 'fd, 'fj"); ++ break; ++ case FLOGB_S: ++ Format(instr, "flogb.s 'fd, 'fj"); ++ break; ++ case FLOGB_D: ++ Format(instr, "flogb.d 'fd, 'fj"); ++ break; ++ case CLO_W: ++ Format(instr, "clo.w 'rd, 'rj"); ++ break; ++ case CTO_W: ++ Format(instr, "cto.w 'rd, 'rj"); ++ break; ++ case CLO_D: ++ Format(instr, "clo.d 'rd, 'rj"); ++ break; ++ case CTO_D: ++ Format(instr, "cto.d 'rd, 'rj"); ++ break; ++ case RDTIMEL_W: ++ Format(instr, "rdtimel.w 'rd, 'rj"); ++ break; ++ case RDTIMEH_W: ++ Format(instr, "rdtimeh.w 'rd, 'rj"); ++ break; ++ case RDTIME_D: ++ Format(instr, "rdtime.d 'rd, 'rj"); ++ break; ++ // case CPUCFG: ++ // Format(instr, "cpucfg 'rd, 'rj"); ++ // break; ++ case IOCSRRD_B: ++ case IOCSRRD_H: ++ case IOCSRRD_W: ++ case IOCSRRD_D: ++ case IOCSRWR_B: ++ case IOCSRWR_H: ++ case IOCSRWR_W: ++ case IOCSRWR_D: ++ case TLBINV: ++ case TLBFLUSH: ++ case TLBP: ++ case TLBR: ++ case TLBWI: ++ case TLBWR: ++ case ERET: ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++int Decoder::InstructionDecode(byte* instr_ptr) { ++ Instruction* instr = Instruction::At(instr_ptr); ++ out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "%08x ", ++ instr->InstructionBits()); ++ switch (instr->InstructionType()) { ++ case Instruction::kOp6Type: { ++ DecodeTypekOp6(instr); ++ break; ++ } ++ case Instruction::kOp7Type: { ++ DecodeTypekOp7(instr); ++ break; ++ } ++ case Instruction::kOp8Type: { ++ DecodeTypekOp8(instr); ++ break; ++ } ++ case Instruction::kOp10Type: { ++ DecodeTypekOp10(instr); ++ break; ++ } ++ case Instruction::kOp12Type: { ++ DecodeTypekOp12(instr); ++ break; ++ } ++ case Instruction::kOp14Type: { ++ DecodeTypekOp14(instr); ++ break; ++ } ++ case Instruction::kOp17Type: { ++ return DecodeTypekOp17(instr); ++ } ++ case Instruction::kOp22Type: { ++ DecodeTypekOp22(instr); ++ break; ++ } ++ case Instruction::kUnsupported: { ++ Format(instr, "UNSUPPORTED"); ++ break; ++ } ++ default: { ++ Format(instr, "UNSUPPORTED"); ++ break; ++ } ++ } ++ return kInstrSize; ++} ++ ++} // namespace internal ++} // namespace v8 ++ ++//------------------------------------------------------------------------------ ++ ++namespace disasm { ++ ++const char* NameConverter::NameOfAddress(byte* addr) const { ++ v8::internal::SNPrintF(tmp_buffer_, "%p", static_cast(addr)); ++ return tmp_buffer_.begin(); ++} ++ ++const char* NameConverter::NameOfConstant(byte* addr) const { ++ return NameOfAddress(addr); ++} ++ ++const char* NameConverter::NameOfCPURegister(int reg) const { ++ return v8::internal::Registers::Name(reg); ++} ++ ++const char* NameConverter::NameOfXMMRegister(int reg) const { ++ return v8::internal::FPURegisters::Name(reg); ++} ++ ++const char* NameConverter::NameOfByteCPURegister(int reg) const { ++ UNREACHABLE(); ++ return "nobytereg"; ++} ++ ++const char* NameConverter::NameInCode(byte* addr) const { ++ // The default name converter is called for unknown code. So we will not try ++ // to access any memory. ++ return ""; ++} ++ ++//------------------------------------------------------------------------------ ++ ++int Disassembler::InstructionDecode(v8::internal::Vector buffer, ++ byte* instruction) { ++ v8::internal::Decoder d(converter_, buffer); ++ return d.InstructionDecode(instruction); ++} ++ ++int Disassembler::ConstantPoolSizeAt(byte* instruction) { return -1; } ++ ++void Disassembler::Disassemble(FILE* f, byte* begin, byte* end, ++ UnimplementedOpcodeAction unimplemented_action) { ++ NameConverter converter; ++ Disassembler d(converter, unimplemented_action); ++ for (byte* pc = begin; pc < end;) { ++ v8::internal::EmbeddedVector buffer; ++ buffer[0] = '\0'; ++ byte* prev_pc = pc; ++ pc += d.InstructionDecode(buffer, pc); ++ v8::internal::PrintF(f, "%p %08x %s\n", static_cast(prev_pc), ++ *reinterpret_cast(prev_pc), buffer.begin()); ++ } ++} ++ ++#undef STRING_STARTS_WITH ++ ++} // namespace disasm ++ ++#endif // V8_TARGET_ARCH_LA64 +diff --git a/src/3rdparty/chromium/v8/src/diagnostics/perf-jit.h b/src/3rdparty/chromium/v8/src/diagnostics/perf-jit.h +index dbe78ddf2d8..cb745ef8cc0 100644 +--- a/src/3rdparty/chromium/v8/src/diagnostics/perf-jit.h ++++ b/src/3rdparty/chromium/v8/src/diagnostics/perf-jit.h +@@ -83,6 +83,7 @@ class PerfJitLogger : public CodeEventLogger { + static const uint32_t kElfMachARM = 40; + static const uint32_t kElfMachMIPS = 8; + static const uint32_t kElfMachMIPS64 = 8; ++ static const uint32_t kElfMachLA64 = 258; + static const uint32_t kElfMachARM64 = 183; + static const uint32_t kElfMachS390x = 22; + static const uint32_t kElfMachPPC64 = 21; +@@ -98,6 +99,8 @@ class PerfJitLogger : public CodeEventLogger { + return kElfMachMIPS; + #elif V8_TARGET_ARCH_MIPS64 + return kElfMachMIPS64; ++#elif V8_TARGET_ARCH_LA64 ++ return kElfMachLA64; + #elif V8_TARGET_ARCH_ARM64 + return kElfMachARM64; + #elif V8_TARGET_ARCH_S390X +diff --git a/src/3rdparty/chromium/v8/src/execution/frame-constants.h b/src/3rdparty/chromium/v8/src/execution/frame-constants.h +index 8c3f7743192..f8508468ae4 100644 +--- a/src/3rdparty/chromium/v8/src/execution/frame-constants.h ++++ b/src/3rdparty/chromium/v8/src/execution/frame-constants.h +@@ -389,6 +389,8 @@ inline static int FrameSlotToFPOffset(int slot) { + #include "src/execution/mips/frame-constants-mips.h" // NOLINT + #elif V8_TARGET_ARCH_MIPS64 + #include "src/execution/mips64/frame-constants-mips64.h" // NOLINT ++#elif V8_TARGET_ARCH_LA64 ++#include "src/execution/la64/frame-constants-la64.h" // NOLINT + #elif V8_TARGET_ARCH_S390 + #include "src/execution/s390/frame-constants-s390.h" // NOLINT + #else +diff --git a/src/3rdparty/chromium/v8/src/execution/la64/frame-constants-la64.cc b/src/3rdparty/chromium/v8/src/execution/la64/frame-constants-la64.cc +new file mode 100644 +index 00000000000..185f0abe3db +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/execution/la64/frame-constants-la64.cc +@@ -0,0 +1,32 @@ ++// Copyright 2020 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#if V8_TARGET_ARCH_LA64 ++ ++#include "src/codegen/la64/assembler-la64-inl.h" ++#include "src/execution/frame-constants.h" ++#include "src/execution/frames.h" ++ ++#include "src/execution/la64/frame-constants-la64.h" ++ ++namespace v8 { ++namespace internal { ++ ++Register JavaScriptFrame::fp_register() { return v8::internal::fp; } ++Register JavaScriptFrame::context_register() { return cp; } ++Register JavaScriptFrame::constant_pool_pointer_register() { UNREACHABLE(); } ++ ++int InterpreterFrameConstants::RegisterStackSlotCount(int register_count) { ++ return register_count; ++} ++ ++int BuiltinContinuationFrameConstants::PaddingSlotCount(int register_count) { ++ USE(register_count); ++ return 0; ++} ++ ++} // namespace internal ++} // namespace v8 ++ ++#endif // V8_TARGET_ARCH_LA64 +diff --git a/src/3rdparty/chromium/v8/src/execution/la64/frame-constants-la64.h b/src/3rdparty/chromium/v8/src/execution/la64/frame-constants-la64.h +new file mode 100644 +index 00000000000..e6069a60e97 +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/execution/la64/frame-constants-la64.h +@@ -0,0 +1,75 @@ ++// Copyright 2020 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#ifndef V8_EXECUTION_LA64_FRAME_CONSTANTS_LA64_H_ ++#define V8_EXECUTION_LA64_FRAME_CONSTANTS_LA64_H_ ++ ++#include "src/base/bits.h" ++#include "src/base/macros.h" ++#include "src/execution/frame-constants.h" ++ ++namespace v8 { ++namespace internal { ++ ++class EntryFrameConstants : public AllStatic { ++ public: ++ // This is the offset to where JSEntry pushes the current value of ++ // Isolate::c_entry_fp onto the stack. ++ static constexpr int kCallerFPOffset = ++ -(StandardFrameConstants::kFixedFrameSizeFromFp + kPointerSize); ++}; ++ ++class WasmCompileLazyFrameConstants : public TypedFrameConstants { ++ public: ++ static constexpr int kNumberOfSavedGpParamRegs = 7; ++ static constexpr int kNumberOfSavedFpParamRegs = 7; ++ ++ // FP-relative. ++ static constexpr int kWasmInstanceOffset = TYPED_FRAME_PUSHED_VALUE_OFFSET(7); ++ static constexpr int kFixedFrameSizeFromFp = ++ TypedFrameConstants::kFixedFrameSizeFromFp + ++ kNumberOfSavedGpParamRegs * kPointerSize + ++ kNumberOfSavedFpParamRegs * kDoubleSize; ++}; ++ ++// Frame constructed by the {WasmDebugBreak} builtin. ++// After pushing the frame type marker, the builtin pushes all Liftoff cache ++// registers (see liftoff-assembler-defs.h). ++class WasmDebugBreakFrameConstants : public TypedFrameConstants { ++ public: ++ // {a0, a1, a2, a3, a4, a5, a6, a7, t0, t1, t2, t3, t4, t5, t6, t7, t8} ++ static constexpr uint32_t kPushedGpRegs = 0b111111111111111110000; ++ // {f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, f20, f22, f24, f26} ++ static constexpr uint32_t kPushedFpRegs = 0b101010101010101010101010101; ++ ++ static constexpr int kNumPushedGpRegisters = ++ base::bits::CountPopulation(kPushedGpRegs); ++ static constexpr int kNumPushedFpRegisters = ++ base::bits::CountPopulation(kPushedFpRegs); ++ ++ static constexpr int kLastPushedGpRegisterOffset = ++ -kFixedFrameSizeFromFp - kNumPushedGpRegisters * kSystemPointerSize; ++ static constexpr int kLastPushedFpRegisterOffset = ++ kLastPushedGpRegisterOffset - kNumPushedFpRegisters * kDoubleSize; ++ ++ // Offsets are fp-relative. ++ static int GetPushedGpRegisterOffset(int reg_code) { ++ DCHECK_NE(0, kPushedGpRegs & (1 << reg_code)); ++ uint32_t lower_regs = kPushedGpRegs & ((uint32_t{1} << reg_code) - 1); ++ return kLastPushedGpRegisterOffset + ++ base::bits::CountPopulation(lower_regs) * kSystemPointerSize; ++ } ++ ++ static int GetPushedFpRegisterOffset(int reg_code) { ++ DCHECK_NE(0, kPushedFpRegs & (1 << reg_code)); ++ uint32_t lower_regs = kPushedFpRegs & ((uint32_t{1} << reg_code) - 1); ++ return kLastPushedFpRegisterOffset + ++ base::bits::CountPopulation(lower_regs) * kDoubleSize; ++ } ++}; ++ ++} // namespace internal ++} // namespace v8 ++ ++#endif // V8_EXECUTION_LA64_FRAME_CONSTANTS_LA64_H_ +diff --git a/src/3rdparty/chromium/v8/src/execution/la64/simulator-la64.cc b/src/3rdparty/chromium/v8/src/execution/la64/simulator-la64.cc +new file mode 100644 +index 00000000000..f4bafa1d8dd +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/execution/la64/simulator-la64.cc +@@ -0,0 +1,5804 @@ ++// Copyright 2020 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#include "src/execution/la64/simulator-la64.h" ++ ++// Only build the simulator if not compiling for real LA64 hardware. ++#if defined(USE_SIMULATOR) ++ ++#include ++#include ++#include ++#include ++ ++#include "src/base/bits.h" ++#include "src/codegen/assembler-inl.h" ++#include "src/codegen/la64/constants-la64.h" ++#include "src/codegen/macro-assembler.h" ++#include "src/diagnostics/disasm.h" ++#include "src/heap/combined-heap.h" ++#include "src/runtime/runtime-utils.h" ++#include "src/utils/ostreams.h" ++#include "src/utils/vector.h" ++ ++namespace v8 { ++namespace internal { ++ ++DEFINE_LAZY_LEAKY_OBJECT_GETTER(Simulator::GlobalMonitor, ++ Simulator::GlobalMonitor::Get) ++ ++// #define PRINT_SIM_LOG ++ ++// Util functions. ++inline bool HaveSameSign(int64_t a, int64_t b) { return ((a ^ b) >= 0); } ++ ++uint32_t get_fcsr_condition_bit(uint32_t cc) { ++ if (cc == 0) { ++ return 23; ++ } else { ++ return 24 + cc; ++ } ++} ++ ++static int64_t MultiplyHighSigned(int64_t u, int64_t v) { ++ uint64_t u0, v0, w0; ++ int64_t u1, v1, w1, w2, t; ++ ++ u0 = u & 0xFFFFFFFFL; ++ u1 = u >> 32; ++ v0 = v & 0xFFFFFFFFL; ++ v1 = v >> 32; ++ ++ w0 = u0 * v0; ++ t = u1 * v0 + (w0 >> 32); ++ w1 = t & 0xFFFFFFFFL; ++ w2 = t >> 32; ++ w1 = u0 * v1 + w1; ++ ++ return u1 * v1 + w2 + (w1 >> 32); ++} ++ ++static uint64_t MultiplyHighUnsigned(uint64_t u, uint64_t v) { ++ uint64_t u0, v0, w0; ++ uint64_t u1, v1, w1, w2, t; ++ ++ u0 = u & 0xFFFFFFFFL; ++ u1 = u >> 32; ++ v0 = v & 0xFFFFFFFFL; ++ v1 = v >> 32; ++ ++ w0 = u0 * v0; ++ t = u1 * v0 + (w0 >> 32); ++ w1 = t & 0xFFFFFFFFL; ++ w2 = t >> 32; ++ w1 = u0 * v1 + w1; ++ ++ return u1 * v1 + w2 + (w1 >> 32); ++} ++ ++#ifdef PRINT_SIM_LOG ++inline void printf_instr(const char* _Format, ...) { ++ va_list varList; ++ va_start(varList, _Format); ++ vprintf(_Format, varList); ++ va_end(varList); ++} ++#else ++#define printf_instr(...) ++#endif ++ ++// This macro provides a platform independent use of sscanf. The reason for ++// SScanF not being implemented in a platform independent was through ++// ::v8::internal::OS in the same way as SNPrintF is that the Windows C Run-Time ++// Library does not provide vsscanf. ++#define SScanF sscanf // NOLINT ++ ++// The La64Debugger class is used by the simulator while debugging simulated ++// code. ++class La64Debugger { ++ public: ++ explicit La64Debugger(Simulator* sim) : sim_(sim) {} ++ ++ void Stop(Instruction* instr); ++ void Debug(); ++ // Print all registers with a nice formatting. ++ void PrintAllRegs(); ++ void PrintAllRegsIncludingFPU(); ++ ++ private: ++ // We set the breakpoint code to 0xFFFF to easily recognize it. ++ static const Instr kBreakpointInstr = BREAK | 0xFFFF; ++ static const Instr kNopInstr = 0x0; ++ ++ Simulator* sim_; ++ ++ int64_t GetRegisterValue(int regnum); ++ int64_t GetFPURegisterValue(int regnum); ++ float GetFPURegisterValueFloat(int regnum); ++ double GetFPURegisterValueDouble(int regnum); ++ bool GetValue(const char* desc, int64_t* value); ++ ++ // Set or delete a breakpoint. Returns true if successful. ++ bool SetBreakpoint(Instruction* breakpc); ++ bool DeleteBreakpoint(Instruction* breakpc); ++ ++ // Undo and redo all breakpoints. This is needed to bracket disassembly and ++ // execution to skip past breakpoints when run from the debugger. ++ void UndoBreakpoints(); ++ void RedoBreakpoints(); ++}; ++ ++inline void UNSUPPORTED() { printf("Sim: Unsupported instruction.\n"); } ++ ++void La64Debugger::Stop(Instruction* instr) { ++ // Get the stop code. ++ uint32_t code = instr->Bits(25, 6); ++ PrintF("Simulator hit (%u)\n", code); ++ Debug(); ++} ++ ++int64_t La64Debugger::GetRegisterValue(int regnum) { ++ if (regnum == kNumSimuRegisters) { ++ return sim_->get_pc(); ++ } else { ++ return sim_->get_register(regnum); ++ } ++} ++ ++int64_t La64Debugger::GetFPURegisterValue(int regnum) { ++ if (regnum == kNumFPURegisters) { ++ return sim_->get_pc(); ++ } else { ++ return sim_->get_fpu_register(regnum); ++ } ++} ++ ++float La64Debugger::GetFPURegisterValueFloat(int regnum) { ++ if (regnum == kNumFPURegisters) { ++ return sim_->get_pc(); ++ } else { ++ return sim_->get_fpu_register_float(regnum); ++ } ++} ++ ++double La64Debugger::GetFPURegisterValueDouble(int regnum) { ++ if (regnum == kNumFPURegisters) { ++ return sim_->get_pc(); ++ } else { ++ return sim_->get_fpu_register_double(regnum); ++ } ++} ++ ++bool La64Debugger::GetValue(const char* desc, int64_t* value) { ++ int regnum = Registers::Number(desc); ++ int fpuregnum = FPURegisters::Number(desc); ++ ++ if (regnum != kInvalidRegister) { ++ *value = GetRegisterValue(regnum); ++ return true; ++ } else if (fpuregnum != kInvalidFPURegister) { ++ *value = GetFPURegisterValue(fpuregnum); ++ return true; ++ } else if (strncmp(desc, "0x", 2) == 0) { ++ return SScanF(desc + 2, "%" SCNx64, reinterpret_cast(value)) == ++ 1; ++ } else { ++ return SScanF(desc, "%" SCNu64, reinterpret_cast(value)) == 1; ++ } ++ return false; ++} ++ ++bool La64Debugger::SetBreakpoint(Instruction* breakpc) { ++ // Check if a breakpoint can be set. If not return without any side-effects. ++ if (sim_->break_pc_ != nullptr) { ++ return false; ++ } ++ ++ // Set the breakpoint. ++ sim_->break_pc_ = breakpc; ++ sim_->break_instr_ = breakpc->InstructionBits(); ++ // Not setting the breakpoint instruction in the code itself. It will be set ++ // when the debugger shell continues. ++ return true; ++} ++ ++bool La64Debugger::DeleteBreakpoint(Instruction* breakpc) { ++ if (sim_->break_pc_ != nullptr) { ++ sim_->break_pc_->SetInstructionBits(sim_->break_instr_); ++ } ++ ++ sim_->break_pc_ = nullptr; ++ sim_->break_instr_ = 0; ++ return true; ++} ++ ++void La64Debugger::UndoBreakpoints() { ++ if (sim_->break_pc_ != nullptr) { ++ sim_->break_pc_->SetInstructionBits(sim_->break_instr_); ++ } ++} ++ ++void La64Debugger::RedoBreakpoints() { ++ if (sim_->break_pc_ != nullptr) { ++ sim_->break_pc_->SetInstructionBits(kBreakpointInstr); ++ } ++} ++ ++void La64Debugger::PrintAllRegs() { ++#define REG_INFO(n) Registers::Name(n), GetRegisterValue(n), GetRegisterValue(n) ++ ++ PrintF("\n"); ++ // at, v0, a0. ++ PrintF("%3s: 0x%016" PRIx64 " %14" PRId64 "\t%3s: 0x%016" PRIx64 " %14" PRId64 ++ "\t%3s: 0x%016" PRIx64 " %14" PRId64 "\n", ++ REG_INFO(1), REG_INFO(2), REG_INFO(4)); ++ // v1, a1. ++ PrintF("%34s\t%3s: 0x%016" PRIx64 " %14" PRId64 " \t%3s: 0x%016" PRIx64 ++ " %14" PRId64 " \n", ++ "", REG_INFO(3), REG_INFO(5)); ++ // a2. ++ PrintF("%34s\t%34s\t%3s: 0x%016" PRIx64 " %14" PRId64 " \n", "", "", ++ REG_INFO(6)); ++ // a3. ++ PrintF("%34s\t%34s\t%3s: 0x%016" PRIx64 " %14" PRId64 " \n", "", "", ++ REG_INFO(7)); ++ PrintF("\n"); ++ // a4-t3, s0-s7 ++ for (int i = 0; i < 8; i++) { ++ PrintF("%3s: 0x%016" PRIx64 " %14" PRId64 " \t%3s: 0x%016" PRIx64 ++ " %14" PRId64 " \n", ++ REG_INFO(8 + i), REG_INFO(16 + i)); ++ } ++ PrintF("\n"); ++ // t8, k0, LO. ++ PrintF("%3s: 0x%016" PRIx64 " %14" PRId64 " \t%3s: 0x%016" PRIx64 ++ " %14" PRId64 " \t%3s: 0x%016" PRIx64 " %14" PRId64 " \n", ++ REG_INFO(24), REG_INFO(26), REG_INFO(32)); ++ // t9, k1, HI. ++ PrintF("%3s: 0x%016" PRIx64 " %14" PRId64 " \t%3s: 0x%016" PRIx64 ++ " %14" PRId64 " \t%3s: 0x%016" PRIx64 " %14" PRId64 " \n", ++ REG_INFO(25), REG_INFO(27), REG_INFO(33)); ++ // sp, fp, gp. ++ PrintF("%3s: 0x%016" PRIx64 " %14" PRId64 " \t%3s: 0x%016" PRIx64 ++ " %14" PRId64 " \t%3s: 0x%016" PRIx64 " %14" PRId64 " \n", ++ REG_INFO(29), REG_INFO(30), REG_INFO(28)); ++ // pc. ++ PrintF("%3s: 0x%016" PRIx64 " %14" PRId64 " \t%3s: 0x%016" PRIx64 ++ " %14" PRId64 " \n", ++ REG_INFO(31), REG_INFO(34)); ++ ++#undef REG_INFO ++} ++ ++void La64Debugger::PrintAllRegsIncludingFPU() { ++#define FPU_REG_INFO(n) \ ++ FPURegisters::Name(n), GetFPURegisterValue(n), GetFPURegisterValueDouble(n) ++ ++ PrintAllRegs(); ++ ++ PrintF("\n\n"); ++ // f0, f1, f2, ... f31. ++ // TODO(plind): consider printing 2 columns for space efficiency. ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(0)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(1)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(2)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(3)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(4)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(5)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(6)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(7)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(8)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(9)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(10)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(11)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(12)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(13)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(14)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(15)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(16)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(17)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(18)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(19)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(20)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(21)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(22)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(23)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(24)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(25)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(26)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(27)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(28)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(29)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(30)); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", FPU_REG_INFO(31)); ++ ++#undef FPU_REG_INFO ++} ++ ++void La64Debugger::Debug() { ++ intptr_t last_pc = -1; ++ bool done = false; ++ ++#define COMMAND_SIZE 63 ++#define ARG_SIZE 255 ++ ++#define STR(a) #a ++#define XSTR(a) STR(a) ++ ++ char cmd[COMMAND_SIZE + 1]; ++ char arg1[ARG_SIZE + 1]; ++ char arg2[ARG_SIZE + 1]; ++ char* argv[3] = {cmd, arg1, arg2}; ++ ++ // Make sure to have a proper terminating character if reaching the limit. ++ cmd[COMMAND_SIZE] = 0; ++ arg1[ARG_SIZE] = 0; ++ arg2[ARG_SIZE] = 0; ++ ++ // Undo all set breakpoints while running in the debugger shell. This will ++ // make them invisible to all commands. ++ UndoBreakpoints(); ++ ++ while (!done && (sim_->get_pc() != Simulator::end_sim_pc)) { ++ if (last_pc != sim_->get_pc()) { ++ disasm::NameConverter converter; ++ disasm::Disassembler dasm(converter); ++ // Use a reasonably large buffer. ++ v8::internal::EmbeddedVector buffer; ++ dasm.InstructionDecode(buffer, reinterpret_cast(sim_->get_pc())); ++ PrintF(" 0x%016" PRIx64 " %s\n", sim_->get_pc(), buffer.begin()); ++ last_pc = sim_->get_pc(); ++ } ++ char* line = ReadLine("sim> "); ++ if (line == nullptr) { ++ break; ++ } else { ++ char* last_input = sim_->last_debugger_input(); ++ if (strcmp(line, "\n") == 0 && last_input != nullptr) { ++ line = last_input; ++ } else { ++ // Ownership is transferred to sim_; ++ sim_->set_last_debugger_input(line); ++ } ++ // Use sscanf to parse the individual parts of the command line. At the ++ // moment no command expects more than two parameters. ++ int argc = SScanF(line, ++ "%" XSTR(COMMAND_SIZE) "s " ++ "%" XSTR(ARG_SIZE) "s " ++ "%" XSTR(ARG_SIZE) "s", ++ cmd, arg1, arg2); ++ if ((strcmp(cmd, "si") == 0) || (strcmp(cmd, "stepi") == 0)) { ++ Instruction* instr = reinterpret_cast(sim_->get_pc()); ++ if (!(instr->IsTrap()) || ++ instr->InstructionBits() == rtCallRedirInstr) { ++ sim_->InstructionDecode( ++ reinterpret_cast(sim_->get_pc())); ++ } else { ++ // Allow si to jump over generated breakpoints. ++ PrintF("/!\\ Jumping over generated breakpoint.\n"); ++ sim_->set_pc(sim_->get_pc() + kInstrSize); ++ } ++ } else if ((strcmp(cmd, "c") == 0) || (strcmp(cmd, "cont") == 0)) { ++ // Execute the one instruction we broke at with breakpoints disabled. ++ sim_->InstructionDecode(reinterpret_cast(sim_->get_pc())); ++ // Leave the debugger shell. ++ done = true; ++ } else if ((strcmp(cmd, "p") == 0) || (strcmp(cmd, "print") == 0)) { ++ if (argc == 2) { ++ int64_t value; ++ double dvalue; ++ if (strcmp(arg1, "all") == 0) { ++ PrintAllRegs(); ++ } else if (strcmp(arg1, "allf") == 0) { ++ PrintAllRegsIncludingFPU(); ++ } else { ++ int regnum = Registers::Number(arg1); ++ int fpuregnum = FPURegisters::Number(arg1); ++ ++ if (regnum != kInvalidRegister) { ++ value = GetRegisterValue(regnum); ++ PrintF("%s: 0x%08" PRIx64 " %" PRId64 " \n", arg1, value, ++ value); ++ } else if (fpuregnum != kInvalidFPURegister) { ++ value = GetFPURegisterValue(fpuregnum); ++ dvalue = GetFPURegisterValueDouble(fpuregnum); ++ PrintF("%3s: 0x%016" PRIx64 " %16.4e\n", ++ FPURegisters::Name(fpuregnum), value, dvalue); ++ } else { ++ PrintF("%s unrecognized\n", arg1); ++ } ++ } ++ } else { ++ if (argc == 3) { ++ if (strcmp(arg2, "single") == 0) { ++ int64_t value; ++ float fvalue; ++ int fpuregnum = FPURegisters::Number(arg1); ++ ++ if (fpuregnum != kInvalidFPURegister) { ++ value = GetFPURegisterValue(fpuregnum); ++ value &= 0xFFFFFFFFUL; ++ fvalue = GetFPURegisterValueFloat(fpuregnum); ++ PrintF("%s: 0x%08" PRIx64 " %11.4e\n", arg1, value, fvalue); ++ } else { ++ PrintF("%s unrecognized\n", arg1); ++ } ++ } else { ++ PrintF("print single\n"); ++ } ++ } else { ++ PrintF("print or print single\n"); ++ } ++ } ++ } else if ((strcmp(cmd, "po") == 0) || ++ (strcmp(cmd, "printobject") == 0)) { ++ if (argc == 2) { ++ int64_t value; ++ StdoutStream os; ++ if (GetValue(arg1, &value)) { ++ Object obj(value); ++ os << arg1 << ": \n"; ++#ifdef DEBUG ++ obj.Print(os); ++ os << "\n"; ++#else ++ os << Brief(obj) << "\n"; ++#endif ++ } else { ++ os << arg1 << " unrecognized\n"; ++ } ++ } else { ++ PrintF("printobject \n"); ++ } ++ } else if (strcmp(cmd, "stack") == 0 || strcmp(cmd, "mem") == 0 || ++ strcmp(cmd, "dump") == 0) { ++ int64_t* cur = nullptr; ++ int64_t* end = nullptr; ++ int next_arg = 1; ++ ++ if (strcmp(cmd, "stack") == 0) { ++ cur = reinterpret_cast(sim_->get_register(Simulator::sp)); ++ } else { // Command "mem". ++ int64_t value; ++ if (!GetValue(arg1, &value)) { ++ PrintF("%s unrecognized\n", arg1); ++ continue; ++ } ++ cur = reinterpret_cast(value); ++ next_arg++; ++ } ++ ++ int64_t words; ++ if (argc == next_arg) { ++ words = 10; ++ } else { ++ if (!GetValue(argv[next_arg], &words)) { ++ words = 10; ++ } ++ } ++ end = cur + words; ++ ++ bool skip_obj_print = (strcmp(cmd, "dump") == 0); ++ while (cur < end) { ++ PrintF(" 0x%012" PRIxPTR " : 0x%016" PRIx64 " %14" PRId64 " ", ++ reinterpret_cast(cur), *cur, *cur); ++ Object obj(*cur); ++ Heap* current_heap = sim_->isolate_->heap(); ++ if (!skip_obj_print) { ++ if (obj.IsSmi() || ++ IsValidHeapObject(current_heap, HeapObject::cast(obj))) { ++ PrintF(" ("); ++ if (obj.IsSmi()) { ++ PrintF("smi %d", Smi::ToInt(obj)); ++ } else { ++ obj.ShortPrint(); ++ } ++ PrintF(")"); ++ } ++ } ++ PrintF("\n"); ++ cur++; ++ } ++ ++ } else if ((strcmp(cmd, "disasm") == 0) || (strcmp(cmd, "dpc") == 0) || ++ (strcmp(cmd, "di") == 0)) { ++ disasm::NameConverter converter; ++ disasm::Disassembler dasm(converter); ++ // Use a reasonably large buffer. ++ v8::internal::EmbeddedVector buffer; ++ ++ byte* cur = nullptr; ++ byte* end = nullptr; ++ ++ if (argc == 1) { ++ cur = reinterpret_cast(sim_->get_pc()); ++ end = cur + (10 * kInstrSize); ++ } else if (argc == 2) { ++ int regnum = Registers::Number(arg1); ++ if (regnum != kInvalidRegister || strncmp(arg1, "0x", 2) == 0) { ++ // The argument is an address or a register name. ++ int64_t value; ++ if (GetValue(arg1, &value)) { ++ cur = reinterpret_cast(value); ++ // Disassemble 10 instructions at . ++ end = cur + (10 * kInstrSize); ++ } ++ } else { ++ // The argument is the number of instructions. ++ int64_t value; ++ if (GetValue(arg1, &value)) { ++ cur = reinterpret_cast(sim_->get_pc()); ++ // Disassemble instructions. ++ end = cur + (value * kInstrSize); ++ } ++ } ++ } else { ++ int64_t value1; ++ int64_t value2; ++ if (GetValue(arg1, &value1) && GetValue(arg2, &value2)) { ++ cur = reinterpret_cast(value1); ++ end = cur + (value2 * kInstrSize); ++ } ++ } ++ ++ while (cur < end) { ++ dasm.InstructionDecode(buffer, cur); ++ PrintF(" 0x%08" PRIxPTR " %s\n", reinterpret_cast(cur), ++ buffer.begin()); ++ cur += kInstrSize; ++ } ++ } else if (strcmp(cmd, "gdb") == 0) { ++ PrintF("relinquishing control to gdb\n"); ++ v8::base::OS::DebugBreak(); ++ PrintF("regaining control from gdb\n"); ++ } else if (strcmp(cmd, "break") == 0) { ++ if (argc == 2) { ++ int64_t value; ++ if (GetValue(arg1, &value)) { ++ if (!SetBreakpoint(reinterpret_cast(value))) { ++ PrintF("setting breakpoint failed\n"); ++ } ++ } else { ++ PrintF("%s unrecognized\n", arg1); ++ } ++ } else { ++ PrintF("break
\n"); ++ } ++ } else if (strcmp(cmd, "del") == 0) { ++ if (!DeleteBreakpoint(nullptr)) { ++ PrintF("deleting breakpoint failed\n"); ++ } ++ } else if (strcmp(cmd, "flags") == 0) { ++ PrintF("No flags on LA64 !\n"); ++ } else if (strcmp(cmd, "stop") == 0) { ++ int64_t value; ++ intptr_t stop_pc = sim_->get_pc() - 2 * kInstrSize; ++ Instruction* stop_instr = reinterpret_cast(stop_pc); ++ Instruction* msg_address = ++ reinterpret_cast(stop_pc + kInstrSize); ++ if ((argc == 2) && (strcmp(arg1, "unstop") == 0)) { ++ // Remove the current stop. ++ if (sim_->IsStopInstruction(stop_instr)) { ++ stop_instr->SetInstructionBits(kNopInstr); ++ msg_address->SetInstructionBits(kNopInstr); ++ } else { ++ PrintF("Not at debugger stop.\n"); ++ } ++ } else if (argc == 3) { ++ // Print information about all/the specified breakpoint(s). ++ if (strcmp(arg1, "info") == 0) { ++ if (strcmp(arg2, "all") == 0) { ++ PrintF("Stop information:\n"); ++ for (uint32_t i = kMaxWatchpointCode + 1; i <= kMaxStopCode; ++ i++) { ++ sim_->PrintStopInfo(i); ++ } ++ } else if (GetValue(arg2, &value)) { ++ sim_->PrintStopInfo(value); ++ } else { ++ PrintF("Unrecognized argument.\n"); ++ } ++ } else if (strcmp(arg1, "enable") == 0) { ++ // Enable all/the specified breakpoint(s). ++ if (strcmp(arg2, "all") == 0) { ++ for (uint32_t i = kMaxWatchpointCode + 1; i <= kMaxStopCode; ++ i++) { ++ sim_->EnableStop(i); ++ } ++ } else if (GetValue(arg2, &value)) { ++ sim_->EnableStop(value); ++ } else { ++ PrintF("Unrecognized argument.\n"); ++ } ++ } else if (strcmp(arg1, "disable") == 0) { ++ // Disable all/the specified breakpoint(s). ++ if (strcmp(arg2, "all") == 0) { ++ for (uint32_t i = kMaxWatchpointCode + 1; i <= kMaxStopCode; ++ i++) { ++ sim_->DisableStop(i); ++ } ++ } else if (GetValue(arg2, &value)) { ++ sim_->DisableStop(value); ++ } else { ++ PrintF("Unrecognized argument.\n"); ++ } ++ } ++ } else { ++ PrintF("Wrong usage. Use help command for more information.\n"); ++ } ++ } else if ((strcmp(cmd, "stat") == 0) || (strcmp(cmd, "st") == 0)) { ++ // Print registers and disassemble. ++ PrintAllRegs(); ++ PrintF("\n"); ++ ++ disasm::NameConverter converter; ++ disasm::Disassembler dasm(converter); ++ // Use a reasonably large buffer. ++ v8::internal::EmbeddedVector buffer; ++ ++ byte* cur = nullptr; ++ byte* end = nullptr; ++ ++ if (argc == 1) { ++ cur = reinterpret_cast(sim_->get_pc()); ++ end = cur + (10 * kInstrSize); ++ } else if (argc == 2) { ++ int64_t value; ++ if (GetValue(arg1, &value)) { ++ cur = reinterpret_cast(value); ++ // no length parameter passed, assume 10 instructions ++ end = cur + (10 * kInstrSize); ++ } ++ } else { ++ int64_t value1; ++ int64_t value2; ++ if (GetValue(arg1, &value1) && GetValue(arg2, &value2)) { ++ cur = reinterpret_cast(value1); ++ end = cur + (value2 * kInstrSize); ++ } ++ } ++ ++ while (cur < end) { ++ dasm.InstructionDecode(buffer, cur); ++ PrintF(" 0x%08" PRIxPTR " %s\n", reinterpret_cast(cur), ++ buffer.begin()); ++ cur += kInstrSize; ++ } ++ } else if ((strcmp(cmd, "h") == 0) || (strcmp(cmd, "help") == 0)) { ++ PrintF("cont\n"); ++ PrintF(" continue execution (alias 'c')\n"); ++ PrintF("stepi\n"); ++ PrintF(" step one instruction (alias 'si')\n"); ++ PrintF("print \n"); ++ PrintF(" print register content (alias 'p')\n"); ++ PrintF(" use register name 'all' to print all registers\n"); ++ PrintF("printobject \n"); ++ PrintF(" print an object from a register (alias 'po')\n"); ++ PrintF("stack []\n"); ++ PrintF(" dump stack content, default dump 10 words)\n"); ++ PrintF("mem
[]\n"); ++ PrintF(" dump memory content, default dump 10 words)\n"); ++ PrintF("dump []\n"); ++ PrintF( ++ " dump memory content without pretty printing JS objects, default " ++ "dump 10 words)\n"); ++ PrintF("flags\n"); ++ PrintF(" print flags\n"); ++ PrintF("disasm []\n"); ++ PrintF("disasm [
]\n"); ++ PrintF("disasm [[
] ]\n"); ++ PrintF(" disassemble code, default is 10 instructions\n"); ++ PrintF(" from pc (alias 'di')\n"); ++ PrintF("gdb\n"); ++ PrintF(" enter gdb\n"); ++ PrintF("break
\n"); ++ PrintF(" set a break point on the address\n"); ++ PrintF("del\n"); ++ PrintF(" delete the breakpoint\n"); ++ PrintF("stop feature:\n"); ++ PrintF(" Description:\n"); ++ PrintF(" Stops are debug instructions inserted by\n"); ++ PrintF(" the Assembler::stop() function.\n"); ++ PrintF(" When hitting a stop, the Simulator will\n"); ++ PrintF(" stop and give control to the Debugger.\n"); ++ PrintF(" All stop codes are watched:\n"); ++ PrintF(" - They can be enabled / disabled: the Simulator\n"); ++ PrintF(" will / won't stop when hitting them.\n"); ++ PrintF(" - The Simulator keeps track of how many times they \n"); ++ PrintF(" are met. (See the info command.) Going over a\n"); ++ PrintF(" disabled stop still increases its counter. \n"); ++ PrintF(" Commands:\n"); ++ PrintF(" stop info all/ : print infos about number \n"); ++ PrintF(" or all stop(s).\n"); ++ PrintF(" stop enable/disable all/ : enables / disables\n"); ++ PrintF(" all or number stop(s)\n"); ++ PrintF(" stop unstop\n"); ++ PrintF(" ignore the stop instruction at the current location\n"); ++ PrintF(" from now on\n"); ++ } else { ++ PrintF("Unknown command: %s\n", cmd); ++ } ++ } ++ } ++ ++ // Add all the breakpoints back to stop execution and enter the debugger ++ // shell when hit. ++ RedoBreakpoints(); ++ ++#undef COMMAND_SIZE ++#undef ARG_SIZE ++ ++#undef STR ++#undef XSTR ++} ++ ++bool Simulator::ICacheMatch(void* one, void* two) { ++ DCHECK_EQ(reinterpret_cast(one) & CachePage::kPageMask, 0); ++ DCHECK_EQ(reinterpret_cast(two) & CachePage::kPageMask, 0); ++ return one == two; ++} ++ ++static uint32_t ICacheHash(void* key) { ++ return static_cast(reinterpret_cast(key)) >> 2; ++} ++ ++static bool AllOnOnePage(uintptr_t start, size_t size) { ++ intptr_t start_page = (start & ~CachePage::kPageMask); ++ intptr_t end_page = ((start + size) & ~CachePage::kPageMask); ++ return start_page == end_page; ++} ++ ++void Simulator::set_last_debugger_input(char* input) { ++ DeleteArray(last_debugger_input_); ++ last_debugger_input_ = input; ++} ++ ++void Simulator::SetRedirectInstruction(Instruction* instruction) { ++ instruction->SetInstructionBits(rtCallRedirInstr); ++} ++ ++void Simulator::FlushICache(base::CustomMatcherHashMap* i_cache, ++ void* start_addr, size_t size) { ++ int64_t start = reinterpret_cast(start_addr); ++ int64_t intra_line = (start & CachePage::kLineMask); ++ start -= intra_line; ++ size += intra_line; ++ size = ((size - 1) | CachePage::kLineMask) + 1; ++ int offset = (start & CachePage::kPageMask); ++ while (!AllOnOnePage(start, size - 1)) { ++ int bytes_to_flush = CachePage::kPageSize - offset; ++ FlushOnePage(i_cache, start, bytes_to_flush); ++ start += bytes_to_flush; ++ size -= bytes_to_flush; ++ DCHECK_EQ((int64_t)0, start & CachePage::kPageMask); ++ offset = 0; ++ } ++ if (size != 0) { ++ FlushOnePage(i_cache, start, size); ++ } ++} ++ ++CachePage* Simulator::GetCachePage(base::CustomMatcherHashMap* i_cache, ++ void* page) { ++ base::HashMap::Entry* entry = i_cache->LookupOrInsert(page, ICacheHash(page)); ++ if (entry->value == nullptr) { ++ CachePage* new_page = new CachePage(); ++ entry->value = new_page; ++ } ++ return reinterpret_cast(entry->value); ++} ++ ++// Flush from start up to and not including start + size. ++void Simulator::FlushOnePage(base::CustomMatcherHashMap* i_cache, ++ intptr_t start, size_t size) { ++ DCHECK_LE(size, CachePage::kPageSize); ++ DCHECK(AllOnOnePage(start, size - 1)); ++ DCHECK_EQ(start & CachePage::kLineMask, 0); ++ DCHECK_EQ(size & CachePage::kLineMask, 0); ++ void* page = reinterpret_cast(start & (~CachePage::kPageMask)); ++ int offset = (start & CachePage::kPageMask); ++ CachePage* cache_page = GetCachePage(i_cache, page); ++ char* valid_bytemap = cache_page->ValidityByte(offset); ++ memset(valid_bytemap, CachePage::LINE_INVALID, size >> CachePage::kLineShift); ++} ++ ++void Simulator::CheckICache(base::CustomMatcherHashMap* i_cache, ++ Instruction* instr) { ++ int64_t address = reinterpret_cast(instr); ++ void* page = reinterpret_cast(address & (~CachePage::kPageMask)); ++ void* line = reinterpret_cast(address & (~CachePage::kLineMask)); ++ int offset = (address & CachePage::kPageMask); ++ CachePage* cache_page = GetCachePage(i_cache, page); ++ char* cache_valid_byte = cache_page->ValidityByte(offset); ++ bool cache_hit = (*cache_valid_byte == CachePage::LINE_VALID); ++ char* cached_line = cache_page->CachedData(offset & ~CachePage::kLineMask); ++ if (cache_hit) { ++ // Check that the data in memory matches the contents of the I-cache. ++ CHECK_EQ(0, memcmp(reinterpret_cast(instr), ++ cache_page->CachedData(offset), kInstrSize)); ++ } else { ++ // Cache miss. Load memory into the cache. ++ memcpy(cached_line, line, CachePage::kLineLength); ++ *cache_valid_byte = CachePage::LINE_VALID; ++ } ++} ++ ++Simulator::Simulator(Isolate* isolate) : isolate_(isolate) { ++ // Set up simulator support first. Some of this information is needed to ++ // setup the architecture state. ++ stack_size_ = FLAG_sim_stack_size * KB; ++ stack_ = reinterpret_cast(malloc(stack_size_)); ++ pc_modified_ = false; ++ icount_ = 0; ++ break_count_ = 0; ++ break_pc_ = nullptr; ++ break_instr_ = 0; ++ ++ // Set up architecture state. ++ // All registers are initialized to zero to start with. ++ for (int i = 0; i < kNumSimuRegisters; i++) { ++ registers_[i] = 0; ++ } ++ for (int i = 0; i < kNumFPURegisters; i++) { ++ FPUregisters_[i] = 0; ++ } ++ for (int i = 0; i < kNumCFRegisters; i++) { ++ CFregisters_[i] = 0; ++ } ++ ++ FCSR_ = 0; ++ ++ // The sp is initialized to point to the bottom (high address) of the ++ // allocated stack area. To be safe in potential stack underflows we leave ++ // some buffer below. ++ registers_[sp] = reinterpret_cast(stack_) + stack_size_ - 64; ++ // The ra and pc are initialized to a known bad value that will cause an ++ // access violation if the simulator ever tries to execute it. ++ registers_[pc] = bad_ra; ++ registers_[ra] = bad_ra; ++ ++ last_debugger_input_ = nullptr; ++} ++ ++Simulator::~Simulator() { ++ GlobalMonitor::Get()->RemoveLinkedAddress(&global_monitor_thread_); ++ free(stack_); ++} ++ ++// Get the active Simulator for the current thread. ++Simulator* Simulator::current(Isolate* isolate) { ++ v8::internal::Isolate::PerIsolateThreadData* isolate_data = ++ isolate->FindOrAllocatePerThreadDataForThisThread(); ++ DCHECK_NOT_NULL(isolate_data); ++ ++ Simulator* sim = isolate_data->simulator(); ++ if (sim == nullptr) { ++ // TODO(146): delete the simulator object when a thread/isolate goes away. ++ sim = new Simulator(isolate); ++ isolate_data->set_simulator(sim); ++ } ++ return sim; ++} ++ ++// Sets the register in the architecture state. It will also deal with updating ++// Simulator internal state for special registers such as PC. ++void Simulator::set_register(int reg, int64_t value) { ++ DCHECK((reg >= 0) && (reg < kNumSimuRegisters)); ++ if (reg == pc) { ++ pc_modified_ = true; ++ } ++ ++ // Zero register always holds 0. ++ registers_[reg] = (reg == 0) ? 0 : value; ++} ++ ++void Simulator::set_dw_register(int reg, const int* dbl) { ++ DCHECK((reg >= 0) && (reg < kNumSimuRegisters)); ++ registers_[reg] = dbl[1]; ++ registers_[reg] = registers_[reg] << 32; ++ registers_[reg] += dbl[0]; ++} ++ ++void Simulator::set_fpu_register(int fpureg, int64_t value) { ++ DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters)); ++ FPUregisters_[fpureg] = value; ++} ++ ++void Simulator::set_fpu_register_word(int fpureg, int32_t value) { ++ // Set ONLY lower 32-bits, leaving upper bits untouched. ++ DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters)); ++ int32_t* pword; ++ pword = reinterpret_cast(&FPUregisters_[fpureg]); ++ ++ *pword = value; ++} ++ ++void Simulator::set_fpu_register_hi_word(int fpureg, int32_t value) { ++ // Set ONLY upper 32-bits, leaving lower bits untouched. ++ DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters)); ++ int32_t* phiword; ++ phiword = (reinterpret_cast(&FPUregisters_[fpureg])) + 1; ++ ++ *phiword = value; ++} ++ ++void Simulator::set_fpu_register_float(int fpureg, float value) { ++ DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters)); ++ *bit_cast(&FPUregisters_[fpureg]) = value; ++} ++ ++void Simulator::set_fpu_register_double(int fpureg, double value) { ++ DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters)); ++ *bit_cast(&FPUregisters_[fpureg]) = value; ++} ++ ++void Simulator::set_cf_register(int cfreg, bool value) { ++ DCHECK((cfreg >= 0) && (cfreg < kNumCFRegisters)); ++ CFregisters_[cfreg] = value; ++} ++ ++// Get the register from the architecture state. This function does handle ++// the special case of accessing the PC register. ++int64_t Simulator::get_register(int reg) const { ++ DCHECK((reg >= 0) && (reg < kNumSimuRegisters)); ++ if (reg == 0) ++ return 0; ++ else ++ return registers_[reg] + ((reg == pc) ? Instruction::kPCReadOffset : 0); ++} ++ ++double Simulator::get_double_from_register_pair(int reg) { ++ // TODO(plind): bad ABI stuff, refactor or remove. ++ DCHECK((reg >= 0) && (reg < kNumSimuRegisters)); ++ ++ double dm_val = 0.0; ++ // Read the bits from the unsigned integer register_[] array ++ // into the double precision floating point value and return it. ++ char buffer[sizeof(registers_[0])]; ++ memcpy(buffer, ®isters_[reg], sizeof(registers_[0])); ++ memcpy(&dm_val, buffer, sizeof(registers_[0])); ++ return (dm_val); ++} ++ ++int64_t Simulator::get_fpu_register(int fpureg) const { ++ DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters)); ++ return FPUregisters_[fpureg]; ++} ++ ++int32_t Simulator::get_fpu_register_word(int fpureg) const { ++ DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters)); ++ return static_cast(FPUregisters_[fpureg] & 0xFFFFFFFF); ++} ++ ++int32_t Simulator::get_fpu_register_signed_word(int fpureg) const { ++ DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters)); ++ return static_cast(FPUregisters_[fpureg] & 0xFFFFFFFF); ++} ++ ++int32_t Simulator::get_fpu_register_hi_word(int fpureg) const { ++ DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters)); ++ return static_cast((FPUregisters_[fpureg] >> 32) & 0xFFFFFFFF); ++} ++ ++float Simulator::get_fpu_register_float(int fpureg) const { ++ DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters)); ++ return *bit_cast(const_cast(&FPUregisters_[fpureg])); ++} ++ ++double Simulator::get_fpu_register_double(int fpureg) const { ++ DCHECK((fpureg >= 0) && (fpureg < kNumFPURegisters)); ++ return *bit_cast(&FPUregisters_[fpureg]); ++} ++ ++bool Simulator::get_cf_register(int cfreg) const { ++ DCHECK((cfreg >= 0) && (cfreg < kNumCFRegisters)); ++ return CFregisters_[cfreg]; ++} ++ ++// Runtime FP routines take up to two double arguments and zero ++// or one integer arguments. All are constructed here, ++// from a0-a3 or fa0 and fa1 (n64). ++void Simulator::GetFpArgs(double* x, double* y, int32_t* z) { ++ const int fparg2 = f1; ++ *x = get_fpu_register_double(f0); ++ *y = get_fpu_register_double(fparg2); ++ *z = static_cast(get_register(a2)); ++} ++ ++// The return value is either in v0/v1 or f0. ++void Simulator::SetFpResult(const double& result) { ++ set_fpu_register_double(0, result); ++} ++ ++// Helper functions for setting and testing the FCSR register's bits. ++void Simulator::set_fcsr_bit(uint32_t cc, bool value) { ++ if (value) { ++ FCSR_ |= (1 << cc); ++ } else { ++ FCSR_ &= ~(1 << cc); ++ } ++} ++ ++bool Simulator::test_fcsr_bit(uint32_t cc) { return FCSR_ & (1 << cc); } ++ ++void Simulator::set_fcsr_rounding_mode(FPURoundingMode mode) { ++ FCSR_ |= mode & kFPURoundingModeMask; ++} ++ ++unsigned int Simulator::get_fcsr_rounding_mode() { ++ return FCSR_ & kFPURoundingModeMask; ++} ++ ++// Sets the rounding error codes in FCSR based on the result of the rounding. ++// Returns true if the operation was invalid. ++bool Simulator::set_fcsr_round_error(double original, double rounded) { ++ bool ret = false; ++ double max_int32 = std::numeric_limits::max(); ++ double min_int32 = std::numeric_limits::min(); ++ ++ if (!std::isfinite(original) || !std::isfinite(rounded)) { ++ set_fcsr_bit(kFCSRInvalidOpFlagBit, true); ++ ret = true; ++ } ++ ++ if (original != rounded) { ++ set_fcsr_bit(kFCSRInexactFlagBit, true); ++ } ++ ++ if (rounded < DBL_MIN && rounded > -DBL_MIN && rounded != 0) { ++ set_fcsr_bit(kFCSRUnderflowFlagBit, true); ++ ret = true; ++ } ++ ++ if (rounded > max_int32 || rounded < min_int32) { ++ set_fcsr_bit(kFCSROverflowFlagBit, true); ++ // The reference is not really clear but it seems this is required: ++ set_fcsr_bit(kFCSRInvalidOpFlagBit, true); ++ ret = true; ++ } ++ ++ return ret; ++} ++ ++// Sets the rounding error codes in FCSR based on the result of the rounding. ++// Returns true if the operation was invalid. ++bool Simulator::set_fcsr_round64_error(double original, double rounded) { ++ bool ret = false; ++ // The value of INT64_MAX (2^63-1) can't be represented as double exactly, ++ // loading the most accurate representation into max_int64, which is 2^63. ++ double max_int64 = std::numeric_limits::max(); ++ double min_int64 = std::numeric_limits::min(); ++ ++ if (!std::isfinite(original) || !std::isfinite(rounded)) { ++ set_fcsr_bit(kFCSRInvalidOpFlagBit, true); ++ ret = true; ++ } ++ ++ if (original != rounded) { ++ set_fcsr_bit(kFCSRInexactFlagBit, true); ++ } ++ ++ if (rounded < DBL_MIN && rounded > -DBL_MIN && rounded != 0) { ++ set_fcsr_bit(kFCSRUnderflowFlagBit, true); ++ ret = true; ++ } ++ ++ if (rounded >= max_int64 || rounded < min_int64) { ++ set_fcsr_bit(kFCSROverflowFlagBit, true); ++ // The reference is not really clear but it seems this is required: ++ set_fcsr_bit(kFCSRInvalidOpFlagBit, true); ++ ret = true; ++ } ++ ++ return ret; ++} ++ ++// Sets the rounding error codes in FCSR based on the result of the rounding. ++// Returns true if the operation was invalid. ++bool Simulator::set_fcsr_round_error(float original, float rounded) { ++ bool ret = false; ++ double max_int32 = std::numeric_limits::max(); ++ double min_int32 = std::numeric_limits::min(); ++ ++ if (!std::isfinite(original) || !std::isfinite(rounded)) { ++ set_fcsr_bit(kFCSRInvalidOpFlagBit, true); ++ ret = true; ++ } ++ ++ if (original != rounded) { ++ set_fcsr_bit(kFCSRInexactFlagBit, true); ++ } ++ ++ if (rounded < FLT_MIN && rounded > -FLT_MIN && rounded != 0) { ++ set_fcsr_bit(kFCSRUnderflowFlagBit, true); ++ ret = true; ++ } ++ ++ if (rounded > max_int32 || rounded < min_int32) { ++ set_fcsr_bit(kFCSROverflowFlagBit, true); ++ // The reference is not really clear but it seems this is required: ++ set_fcsr_bit(kFCSRInvalidOpFlagBit, true); ++ ret = true; ++ } ++ ++ return ret; ++} ++ ++void Simulator::set_fpu_register_word_invalid_result(float original, ++ float rounded) { ++ double max_int32 = std::numeric_limits::max(); ++ double min_int32 = std::numeric_limits::min(); ++ if (std::isnan(original)) { ++ set_fpu_register_word(fd_reg(), 0); ++ } else if (rounded > max_int32) { ++ set_fpu_register_word(fd_reg(), kFPUInvalidResult); ++ } else if (rounded < min_int32) { ++ set_fpu_register_word(fd_reg(), kFPUInvalidResultNegative); ++ } else { ++ UNREACHABLE(); ++ } ++} ++ ++void Simulator::set_fpu_register_invalid_result(float original, float rounded) { ++ double max_int32 = std::numeric_limits::max(); ++ double min_int32 = std::numeric_limits::min(); ++ if (std::isnan(original)) { ++ set_fpu_register(fd_reg(), 0); ++ } else if (rounded > max_int32) { ++ set_fpu_register(fd_reg(), kFPUInvalidResult); ++ } else if (rounded < min_int32) { ++ set_fpu_register(fd_reg(), kFPUInvalidResultNegative); ++ } else { ++ UNREACHABLE(); ++ } ++} ++ ++void Simulator::set_fpu_register_invalid_result64(float original, ++ float rounded) { ++ // The value of INT64_MAX (2^63-1) can't be represented as double exactly, ++ // loading the most accurate representation into max_int64, which is 2^63. ++ double max_int64 = std::numeric_limits::max(); ++ double min_int64 = std::numeric_limits::min(); ++ if (std::isnan(original)) { ++ set_fpu_register(fd_reg(), 0); ++ } else if (rounded >= max_int64) { ++ set_fpu_register(fd_reg(), kFPU64InvalidResult); ++ } else if (rounded < min_int64) { ++ set_fpu_register(fd_reg(), kFPU64InvalidResultNegative); ++ } else { ++ UNREACHABLE(); ++ } ++} ++ ++void Simulator::set_fpu_register_word_invalid_result(double original, ++ double rounded) { ++ double max_int32 = std::numeric_limits::max(); ++ double min_int32 = std::numeric_limits::min(); ++ if (std::isnan(original)) { ++ set_fpu_register_word(fd_reg(), 0); ++ } else if (rounded > max_int32) { ++ set_fpu_register_word(fd_reg(), kFPUInvalidResult); ++ } else if (rounded < min_int32) { ++ set_fpu_register_word(fd_reg(), kFPUInvalidResultNegative); ++ } else { ++ UNREACHABLE(); ++ } ++} ++ ++void Simulator::set_fpu_register_invalid_result(double original, ++ double rounded) { ++ double max_int32 = std::numeric_limits::max(); ++ double min_int32 = std::numeric_limits::min(); ++ if (std::isnan(original)) { ++ set_fpu_register(fd_reg(), 0); ++ } else if (rounded > max_int32) { ++ set_fpu_register(fd_reg(), kFPUInvalidResult); ++ } else if (rounded < min_int32) { ++ set_fpu_register(fd_reg(), kFPUInvalidResultNegative); ++ } else { ++ UNREACHABLE(); ++ } ++} ++ ++void Simulator::set_fpu_register_invalid_result64(double original, ++ double rounded) { ++ // The value of INT64_MAX (2^63-1) can't be represented as double exactly, ++ // loading the most accurate representation into max_int64, which is 2^63. ++ double max_int64 = std::numeric_limits::max(); ++ double min_int64 = std::numeric_limits::min(); ++ if (std::isnan(original)) { ++ set_fpu_register(fd_reg(), 0); ++ } else if (rounded >= max_int64) { ++ set_fpu_register(fd_reg(), kFPU64InvalidResult); ++ } else if (rounded < min_int64) { ++ set_fpu_register(fd_reg(), kFPU64InvalidResultNegative); ++ } else { ++ UNREACHABLE(); ++ } ++} ++ ++// Sets the rounding error codes in FCSR based on the result of the rounding. ++// Returns true if the operation was invalid. ++bool Simulator::set_fcsr_round64_error(float original, float rounded) { ++ bool ret = false; ++ // The value of INT64_MAX (2^63-1) can't be represented as double exactly, ++ // loading the most accurate representation into max_int64, which is 2^63. ++ double max_int64 = std::numeric_limits::max(); ++ double min_int64 = std::numeric_limits::min(); ++ ++ if (!std::isfinite(original) || !std::isfinite(rounded)) { ++ set_fcsr_bit(kFCSRInvalidOpFlagBit, true); ++ ret = true; ++ } ++ ++ if (original != rounded) { ++ set_fcsr_bit(kFCSRInexactFlagBit, true); ++ } ++ ++ if (rounded < FLT_MIN && rounded > -FLT_MIN && rounded != 0) { ++ set_fcsr_bit(kFCSRUnderflowFlagBit, true); ++ ret = true; ++ } ++ ++ if (rounded >= max_int64 || rounded < min_int64) { ++ set_fcsr_bit(kFCSROverflowFlagBit, true); ++ // The reference is not really clear but it seems this is required: ++ set_fcsr_bit(kFCSRInvalidOpFlagBit, true); ++ ret = true; ++ } ++ ++ return ret; ++} ++ ++// For ftint instructions only ++void Simulator::round_according_to_fcsr(double toRound, double* rounded, ++ int32_t* rounded_int) { ++ // 0 RN (round to nearest): Round a result to the nearest ++ // representable value; if the result is exactly halfway between ++ // two representable values, round to zero. ++ ++ // 1 RZ (round toward zero): Round a result to the closest ++ // representable value whose absolute value is less than or ++ // equal to the infinitely accurate result. ++ ++ // 2 RP (round up, or toward +infinity): Round a result to the ++ // next representable value up. ++ ++ // 3 RN (round down, or toward −infinity): Round a result to ++ // the next representable value down. ++ // switch ((FCSR_ >> 8) & 3) { ++ switch (FCSR_ & kFPURoundingModeMask) { ++ case kRoundToNearest: ++ *rounded = std::floor(toRound + 0.5); ++ *rounded_int = static_cast(*rounded); ++ if ((*rounded_int & 1) != 0 && *rounded_int - toRound == 0.5) { ++ // If the number is halfway between two integers, ++ // round to the even one. ++ *rounded_int -= 1; ++ *rounded -= 1.; ++ } ++ break; ++ case kRoundToZero: ++ *rounded = trunc(toRound); ++ *rounded_int = static_cast(*rounded); ++ break; ++ case kRoundToPlusInf: ++ *rounded = std::ceil(toRound); ++ *rounded_int = static_cast(*rounded); ++ break; ++ case kRoundToMinusInf: ++ *rounded = std::floor(toRound); ++ *rounded_int = static_cast(*rounded); ++ break; ++ } ++} ++ ++void Simulator::round64_according_to_fcsr(double toRound, double* rounded, ++ int64_t* rounded_int) { ++ // 0 RN (round to nearest): Round a result to the nearest ++ // representable value; if the result is exactly halfway between ++ // two representable values, round to zero. ++ ++ // 1 RZ (round toward zero): Round a result to the closest ++ // representable value whose absolute value is less than or. ++ // equal to the infinitely accurate result. ++ ++ // 2 RP (round up, or toward +infinity): Round a result to the ++ // next representable value up. ++ ++ // 3 RN (round down, or toward −infinity): Round a result to ++ // the next representable value down. ++ switch (FCSR_ & kFPURoundingModeMask) { ++ case kRoundToNearest: ++ *rounded = std::floor(toRound + 0.5); ++ *rounded_int = static_cast(*rounded); ++ if ((*rounded_int & 1) != 0 && *rounded_int - toRound == 0.5) { ++ // If the number is halfway between two integers, ++ // round to the even one. ++ *rounded_int -= 1; ++ *rounded -= 1.; ++ } ++ break; ++ case kRoundToZero: ++ *rounded = std::trunc(toRound); ++ *rounded_int = static_cast(*rounded); ++ break; ++ case kRoundToPlusInf: ++ *rounded = std::ceil(toRound); ++ *rounded_int = static_cast(*rounded); ++ break; ++ case kRoundToMinusInf: ++ *rounded = std::floor(toRound); ++ *rounded_int = static_cast(*rounded); ++ break; ++ } ++} ++ ++void Simulator::round_according_to_fcsr(float toRound, float* rounded, ++ int32_t* rounded_int) { ++ // 0 RN (round to nearest): Round a result to the nearest ++ // representable value; if the result is exactly halfway between ++ // two representable values, round to zero. ++ ++ // 1 RZ (round toward zero): Round a result to the closest ++ // representable value whose absolute value is less than or ++ // equal to the infinitely accurate result. ++ ++ // 2 RP (round up, or toward +infinity): Round a result to the ++ // next representable value up. ++ ++ // 3 RN (round down, or toward −infinity): Round a result to ++ // the next representable value down. ++ switch (FCSR_ & kFPURoundingModeMask) { ++ case kRoundToNearest: ++ *rounded = std::floor(toRound + 0.5); ++ *rounded_int = static_cast(*rounded); ++ if ((*rounded_int & 1) != 0 && *rounded_int - toRound == 0.5) { ++ // If the number is halfway between two integers, ++ // round to the even one. ++ *rounded_int -= 1; ++ *rounded -= 1.f; ++ } ++ break; ++ case kRoundToZero: ++ *rounded = std::trunc(toRound); ++ *rounded_int = static_cast(*rounded); ++ break; ++ case kRoundToPlusInf: ++ *rounded = std::ceil(toRound); ++ *rounded_int = static_cast(*rounded); ++ break; ++ case kRoundToMinusInf: ++ *rounded = std::floor(toRound); ++ *rounded_int = static_cast(*rounded); ++ break; ++ } ++} ++ ++void Simulator::round64_according_to_fcsr(float toRound, float* rounded, ++ int64_t* rounded_int) { ++ // 0 RN (round to nearest): Round a result to the nearest ++ // representable value; if the result is exactly halfway between ++ // two representable values, round to zero. ++ ++ // 1 RZ (round toward zero): Round a result to the closest ++ // representable value whose absolute value is less than or. ++ // equal to the infinitely accurate result. ++ ++ // 2 RP (round up, or toward +infinity): Round a result to the ++ // next representable value up. ++ ++ // 3 RN (round down, or toward −infinity): Round a result to ++ // the next representable value down. ++ switch (FCSR_ & kFPURoundingModeMask) { ++ case kRoundToNearest: ++ *rounded = std::floor(toRound + 0.5); ++ *rounded_int = static_cast(*rounded); ++ if ((*rounded_int & 1) != 0 && *rounded_int - toRound == 0.5) { ++ // If the number is halfway between two integers, ++ // round to the even one. ++ *rounded_int -= 1; ++ *rounded -= 1.f; ++ } ++ break; ++ case kRoundToZero: ++ *rounded = trunc(toRound); ++ *rounded_int = static_cast(*rounded); ++ break; ++ case kRoundToPlusInf: ++ *rounded = std::ceil(toRound); ++ *rounded_int = static_cast(*rounded); ++ break; ++ case kRoundToMinusInf: ++ *rounded = std::floor(toRound); ++ *rounded_int = static_cast(*rounded); ++ break; ++ } ++} ++ ++// Raw access to the PC register. ++void Simulator::set_pc(int64_t value) { ++ pc_modified_ = true; ++ registers_[pc] = value; ++} ++ ++bool Simulator::has_bad_pc() const { ++ return ((registers_[pc] == bad_ra) || (registers_[pc] == end_sim_pc)); ++} ++ ++// Raw access to the PC register without the special adjustment when reading. ++int64_t Simulator::get_pc() const { return registers_[pc]; } ++ ++// TODO(plind): refactor this messy debug code when we do unaligned access. ++void Simulator::DieOrDebug() { ++ if ((1)) { // Flag for this was removed. ++ La64Debugger dbg(this); ++ dbg.Debug(); ++ } else { ++ base::OS::Abort(); ++ } ++} ++ ++void Simulator::TraceRegWr(int64_t value, TraceType t) { ++ if (::v8::internal::FLAG_trace_sim) { ++ union { ++ int64_t fmt_int64; ++ int32_t fmt_int32[2]; ++ float fmt_float[2]; ++ double fmt_double; ++ } v; ++ v.fmt_int64 = value; ++ ++ switch (t) { ++ case WORD: ++ SNPrintF(trace_buf_, ++ "%016" PRIx64 " (%" PRId64 ") int32:%" PRId32 ++ " uint32:%" PRIu32, ++ v.fmt_int64, icount_, v.fmt_int32[0], v.fmt_int32[0]); ++ break; ++ case DWORD: ++ SNPrintF(trace_buf_, ++ "%016" PRIx64 " (%" PRId64 ") int64:%" PRId64 ++ " uint64:%" PRIu64, ++ value, icount_, value, value); ++ break; ++ case FLOAT: ++ SNPrintF(trace_buf_, "%016" PRIx64 " (%" PRId64 ") flt:%e", ++ v.fmt_int64, icount_, v.fmt_float[0]); ++ break; ++ case DOUBLE: ++ SNPrintF(trace_buf_, "%016" PRIx64 " (%" PRId64 ") dbl:%e", ++ v.fmt_int64, icount_, v.fmt_double); ++ break; ++ case FLOAT_DOUBLE: ++ SNPrintF(trace_buf_, "%016" PRIx64 " (%" PRId64 ") flt:%e dbl:%e", ++ v.fmt_int64, icount_, v.fmt_float[0], v.fmt_double); ++ break; ++ case WORD_DWORD: ++ SNPrintF(trace_buf_, ++ "%016" PRIx64 " (%" PRId64 ") int32:%" PRId32 ++ " uint32:%" PRIu32 " int64:%" PRId64 " uint64:%" PRIu64, ++ v.fmt_int64, icount_, v.fmt_int32[0], v.fmt_int32[0], ++ v.fmt_int64, v.fmt_int64); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++ } ++} ++ ++// TODO(plind): consider making icount_ printing a flag option. ++void Simulator::TraceMemRd(int64_t addr, int64_t value, TraceType t) { ++ if (::v8::internal::FLAG_trace_sim) { ++ union { ++ int64_t fmt_int64; ++ int32_t fmt_int32[2]; ++ float fmt_float[2]; ++ double fmt_double; ++ } v; ++ v.fmt_int64 = value; ++ ++ switch (t) { ++ case WORD: ++ SNPrintF(trace_buf_, ++ "%016" PRIx64 " <-- [%016" PRIx64 "] (%" PRId64 ++ ") int32:%" PRId32 " uint32:%" PRIu32, ++ v.fmt_int64, addr, icount_, v.fmt_int32[0], v.fmt_int32[0]); ++ break; ++ case DWORD: ++ SNPrintF(trace_buf_, ++ "%016" PRIx64 " <-- [%016" PRIx64 "] (%" PRId64 ++ ") int64:%" PRId64 " uint64:%" PRIu64, ++ value, addr, icount_, value, value); ++ break; ++ case FLOAT: ++ SNPrintF(trace_buf_, ++ "%016" PRIx64 " <-- [%016" PRIx64 "] (%" PRId64 ++ ") flt:%e", ++ v.fmt_int64, addr, icount_, v.fmt_float[0]); ++ break; ++ case DOUBLE: ++ SNPrintF(trace_buf_, ++ "%016" PRIx64 " <-- [%016" PRIx64 "] (%" PRId64 ++ ") dbl:%e", ++ v.fmt_int64, addr, icount_, v.fmt_double); ++ break; ++ case FLOAT_DOUBLE: ++ SNPrintF(trace_buf_, ++ "%016" PRIx64 " <-- [%016" PRIx64 "] (%" PRId64 ++ ") flt:%e dbl:%e", ++ v.fmt_int64, addr, icount_, v.fmt_float[0], v.fmt_double); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++ } ++} ++ ++void Simulator::TraceMemWr(int64_t addr, int64_t value, TraceType t) { ++ if (::v8::internal::FLAG_trace_sim) { ++ switch (t) { ++ case BYTE: ++ SNPrintF(trace_buf_, ++ " %02" PRIx8 " --> [%016" PRIx64 "] (%" PRId64 ++ ")", ++ static_cast(value), addr, icount_); ++ break; ++ case HALF: ++ SNPrintF(trace_buf_, ++ " %04" PRIx16 " --> [%016" PRIx64 "] (%" PRId64 ++ ")", ++ static_cast(value), addr, icount_); ++ break; ++ case WORD: ++ SNPrintF(trace_buf_, ++ " %08" PRIx32 " --> [%016" PRIx64 "] (%" PRId64 ")", ++ static_cast(value), addr, icount_); ++ break; ++ case DWORD: ++ SNPrintF(trace_buf_, ++ "%016" PRIx64 " --> [%016" PRIx64 "] (%" PRId64 " )", ++ value, addr, icount_); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++ } ++} ++ ++template ++void Simulator::TraceMemRd(int64_t addr, T value) { ++ if (::v8::internal::FLAG_trace_sim) { ++ switch (sizeof(T)) { ++ case 1: ++ SNPrintF(trace_buf_, ++ "%08" PRIx8 " <-- [%08" PRIx64 "] (%" PRIu64 ++ ") int8:%" PRId8 " uint8:%" PRIu8, ++ static_cast(value), addr, icount_, ++ static_cast(value), static_cast(value)); ++ break; ++ case 2: ++ SNPrintF(trace_buf_, ++ "%08" PRIx16 " <-- [%08" PRIx64 "] (%" PRIu64 ++ ") int16:%" PRId16 " uint16:%" PRIu16, ++ static_cast(value), addr, icount_, ++ static_cast(value), static_cast(value)); ++ break; ++ case 4: ++ SNPrintF(trace_buf_, ++ "%08" PRIx32 " <-- [%08" PRIx64 "] (%" PRIu64 ++ ") int32:%" PRId32 " uint32:%" PRIu32, ++ static_cast(value), addr, icount_, ++ static_cast(value), static_cast(value)); ++ break; ++ case 8: ++ SNPrintF(trace_buf_, ++ "%08" PRIx64 " <-- [%08" PRIx64 "] (%" PRIu64 ++ ") int64:%" PRId64 " uint64:%" PRIu64, ++ static_cast(value), addr, icount_, ++ static_cast(value), static_cast(value)); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++ } ++} ++ ++template ++void Simulator::TraceMemWr(int64_t addr, T value) { ++ if (::v8::internal::FLAG_trace_sim) { ++ switch (sizeof(T)) { ++ case 1: ++ SNPrintF(trace_buf_, ++ " %02" PRIx8 " --> [%08" PRIx64 "] (%" PRIu64 ")", ++ static_cast(value), addr, icount_); ++ break; ++ case 2: ++ SNPrintF(trace_buf_, ++ " %04" PRIx16 " --> [%08" PRIx64 "] (%" PRIu64 ")", ++ static_cast(value), addr, icount_); ++ break; ++ case 4: ++ SNPrintF(trace_buf_, ++ "%08" PRIx32 " --> [%08" PRIx64 "] (%" PRIu64 ")", ++ static_cast(value), addr, icount_); ++ break; ++ case 8: ++ SNPrintF(trace_buf_, ++ "%16" PRIx64 " --> [%08" PRIx64 "] (%" PRIu64 ")", ++ static_cast(value), addr, icount_); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++ } ++} ++ ++// TODO(plind): sign-extend and zero-extend not implmented properly ++// on all the ReadXX functions, I don't think re-interpret cast does it. ++int32_t Simulator::ReadW(int64_t addr, Instruction* instr, TraceType t) { ++ if (addr >= 0 && addr < 0x400) { ++ // This has to be a nullptr-dereference, drop into debugger. ++ PrintF("Memory read from bad address: 0x%08" PRIx64 " , pc=0x%08" PRIxPTR ++ " \n", ++ addr, reinterpret_cast(instr)); ++ DieOrDebug(); ++ } ++ /* if ((addr & 0x3) == 0)*/ { ++ local_monitor_.NotifyLoad(); ++ int32_t* ptr = reinterpret_cast(addr); ++ TraceMemRd(addr, static_cast(*ptr), t); ++ return *ptr; ++ } ++ // PrintF("Unaligned read at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n", ++ // addr, ++ // reinterpret_cast(instr)); ++ // DieOrDebug(); ++ // return 0; ++} ++ ++uint32_t Simulator::ReadWU(int64_t addr, Instruction* instr) { ++ if (addr >= 0 && addr < 0x400) { ++ // This has to be a nullptr-dereference, drop into debugger. ++ PrintF("Memory read from bad address: 0x%08" PRIx64 " , pc=0x%08" PRIxPTR ++ " \n", ++ addr, reinterpret_cast(instr)); ++ DieOrDebug(); ++ } ++ // if ((addr & 0x3) == 0) { ++ local_monitor_.NotifyLoad(); ++ uint32_t* ptr = reinterpret_cast(addr); ++ TraceMemRd(addr, static_cast(*ptr), WORD); ++ return *ptr; ++ // } ++ // PrintF("Unaligned read at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n", addr, ++ // reinterpret_cast(instr)); ++ // DieOrDebug(); ++ // return 0; ++} ++ ++void Simulator::WriteW(int64_t addr, int32_t value, Instruction* instr) { ++ if (addr >= 0 && addr < 0x400) { ++ // This has to be a nullptr-dereference, drop into debugger. ++ PrintF("Memory write to bad address: 0x%08" PRIx64 " , pc=0x%08" PRIxPTR ++ " \n", ++ addr, reinterpret_cast(instr)); ++ DieOrDebug(); ++ } ++ /*if ((addr & 0x3) == 0)*/ { ++ local_monitor_.NotifyStore(); ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_); ++ TraceMemWr(addr, value, WORD); ++ int* ptr = reinterpret_cast(addr); ++ *ptr = value; ++ return; ++ } ++ // PrintF("Unaligned write at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n", ++ // addr, ++ // reinterpret_cast(instr)); ++ // DieOrDebug(); ++} ++ ++void Simulator::WriteConditionalW(int64_t addr, int32_t value, ++ Instruction* instr, int32_t rk_reg) { ++ if (addr >= 0 && addr < 0x400) { ++ // This has to be a nullptr-dereference, drop into debugger. ++ PrintF("Memory write to bad address: 0x%08" PRIx64 " , pc=0x%08" PRIxPTR ++ " \n", ++ addr, reinterpret_cast(instr)); ++ DieOrDebug(); ++ } ++ if ((addr & 0x3) == 0) { ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ if (local_monitor_.NotifyStoreConditional(addr, TransactionSize::Word) && ++ GlobalMonitor::Get()->NotifyStoreConditional_Locked( ++ addr, &global_monitor_thread_)) { ++ local_monitor_.NotifyStore(); ++ GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_); ++ TraceMemWr(addr, value, WORD); ++ int* ptr = reinterpret_cast(addr); ++ *ptr = value; ++ set_register(rk_reg, 1); ++ } else { ++ set_register(rk_reg, 0); ++ } ++ return; ++ } ++ PrintF("Unaligned write at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n", addr, ++ reinterpret_cast(instr)); ++ DieOrDebug(); ++} ++ ++int64_t Simulator::Read2W(int64_t addr, Instruction* instr) { ++ if (addr >= 0 && addr < 0x400) { ++ // This has to be a nullptr-dereference, drop into debugger. ++ PrintF("Memory read from bad address: 0x%08" PRIx64 " , pc=0x%08" PRIxPTR ++ " \n", ++ addr, reinterpret_cast(instr)); ++ DieOrDebug(); ++ } ++ /* if ((addr & kPointerAlignmentMask) == 0)*/ { ++ local_monitor_.NotifyLoad(); ++ int64_t* ptr = reinterpret_cast(addr); ++ TraceMemRd(addr, *ptr); ++ return *ptr; ++ } ++ // PrintF("Unaligned read at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n", ++ // addr, ++ // reinterpret_cast(instr)); ++ // DieOrDebug(); ++ // return 0; ++} ++ ++void Simulator::Write2W(int64_t addr, int64_t value, Instruction* instr) { ++ if (addr >= 0 && addr < 0x400) { ++ // This has to be a nullptr-dereference, drop into debugger. ++ PrintF("Memory write to bad address: 0x%08" PRIx64 " , pc=0x%08" PRIxPTR ++ "\n", ++ addr, reinterpret_cast(instr)); ++ DieOrDebug(); ++ } ++ /*if ((addr & kPointerAlignmentMask) == 0)*/ { ++ local_monitor_.NotifyStore(); ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_); ++ TraceMemWr(addr, value, DWORD); ++ int64_t* ptr = reinterpret_cast(addr); ++ *ptr = value; ++ return; ++ } ++ // PrintF("Unaligned write at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n", ++ // addr, ++ // reinterpret_cast(instr)); ++ // DieOrDebug(); ++} ++ ++void Simulator::WriteConditional2W(int64_t addr, int64_t value, ++ Instruction* instr, int32_t rk_reg) { ++ if (addr >= 0 && addr < 0x400) { ++ // This has to be a nullptr-dereference, drop into debugger. ++ PrintF("Memory write to bad address: 0x%08" PRIx64 " , pc=0x%08" PRIxPTR ++ "\n", ++ addr, reinterpret_cast(instr)); ++ DieOrDebug(); ++ } ++ if ((addr & kPointerAlignmentMask) == 0) { ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ if (local_monitor_.NotifyStoreConditional(addr, ++ TransactionSize::DoubleWord) && ++ GlobalMonitor::Get()->NotifyStoreConditional_Locked( ++ addr, &global_monitor_thread_)) { ++ local_monitor_.NotifyStore(); ++ GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_); ++ TraceMemWr(addr, value, DWORD); ++ int64_t* ptr = reinterpret_cast(addr); ++ *ptr = value; ++ set_register(rk_reg, 1); ++ } else { ++ set_register(rk_reg, 0); ++ } ++ return; ++ } ++ PrintF("Unaligned write at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR "\n", addr, ++ reinterpret_cast(instr)); ++ DieOrDebug(); ++} ++ ++double Simulator::ReadD(int64_t addr, Instruction* instr) { ++ /*if ((addr & kDoubleAlignmentMask) == 0)*/ { ++ local_monitor_.NotifyLoad(); ++ double* ptr = reinterpret_cast(addr); ++ return *ptr; ++ } ++ // PrintF("Unaligned (double) read at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR ++ // "\n", ++ // addr, reinterpret_cast(instr)); ++ // base::OS::Abort(); ++ // return 0; ++} ++ ++void Simulator::WriteD(int64_t addr, double value, Instruction* instr) { ++ /*if ((addr & kDoubleAlignmentMask) == 0)*/ { ++ local_monitor_.NotifyStore(); ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_); ++ double* ptr = reinterpret_cast(addr); ++ *ptr = value; ++ return; ++ } ++ // PrintF("Unaligned (double) write at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR ++ // "\n", ++ // addr, reinterpret_cast(instr)); ++ // DieOrDebug(); ++} ++ ++uint16_t Simulator::ReadHU(int64_t addr, Instruction* instr) { ++ // if ((addr & 1) == 0) { ++ local_monitor_.NotifyLoad(); ++ uint16_t* ptr = reinterpret_cast(addr); ++ TraceMemRd(addr, static_cast(*ptr)); ++ return *ptr; ++ // } ++ // PrintF("Unaligned unsigned halfword read at 0x%08" PRIx64 ++ // " , pc=0x%08" V8PRIxPTR "\n", ++ // addr, reinterpret_cast(instr)); ++ // DieOrDebug(); ++ // return 0; ++} ++ ++int16_t Simulator::ReadH(int64_t addr, Instruction* instr) { ++ // if ((addr & 1) == 0) { ++ local_monitor_.NotifyLoad(); ++ int16_t* ptr = reinterpret_cast(addr); ++ TraceMemRd(addr, static_cast(*ptr)); ++ return *ptr; ++ // } ++ // PrintF("Unaligned signed halfword read at 0x%08" PRIx64 ++ // " , pc=0x%08" V8PRIxPTR "\n", ++ // addr, reinterpret_cast(instr)); ++ // DieOrDebug(); ++ // return 0; ++} ++ ++void Simulator::WriteH(int64_t addr, uint16_t value, Instruction* instr) { ++ // if ((addr & 1) == 0) { ++ local_monitor_.NotifyStore(); ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_); ++ TraceMemWr(addr, value, HALF); ++ uint16_t* ptr = reinterpret_cast(addr); ++ *ptr = value; ++ return; ++ // } ++ // PrintF("Unaligned unsigned halfword write at 0x%08" PRIx64 ++ // " , pc=0x%08" V8PRIxPTR "\n", ++ // addr, reinterpret_cast(instr)); ++ // DieOrDebug(); ++} ++ ++void Simulator::WriteH(int64_t addr, int16_t value, Instruction* instr) { ++ // if ((addr & 1) == 0) { ++ local_monitor_.NotifyStore(); ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_); ++ TraceMemWr(addr, value, HALF); ++ int16_t* ptr = reinterpret_cast(addr); ++ *ptr = value; ++ return; ++ // } ++ // PrintF("Unaligned halfword write at 0x%08" PRIx64 " , pc=0x%08" V8PRIxPTR ++ // "\n", ++ // addr, reinterpret_cast(instr)); ++ // DieOrDebug(); ++} ++ ++uint32_t Simulator::ReadBU(int64_t addr) { ++ local_monitor_.NotifyLoad(); ++ uint8_t* ptr = reinterpret_cast(addr); ++ TraceMemRd(addr, static_cast(*ptr)); ++ return *ptr & 0xFF; ++} ++ ++int32_t Simulator::ReadB(int64_t addr) { ++ local_monitor_.NotifyLoad(); ++ int8_t* ptr = reinterpret_cast(addr); ++ TraceMemRd(addr, static_cast(*ptr)); ++ return *ptr; ++} ++ ++void Simulator::WriteB(int64_t addr, uint8_t value) { ++ local_monitor_.NotifyStore(); ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_); ++ TraceMemWr(addr, value, BYTE); ++ uint8_t* ptr = reinterpret_cast(addr); ++ *ptr = value; ++} ++ ++void Simulator::WriteB(int64_t addr, int8_t value) { ++ local_monitor_.NotifyStore(); ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_); ++ TraceMemWr(addr, value, BYTE); ++ int8_t* ptr = reinterpret_cast(addr); ++ *ptr = value; ++} ++ ++template ++T Simulator::ReadMem(int64_t addr, Instruction* instr) { ++ int alignment_mask = (1 << sizeof(T)) - 1; ++ if ((addr & alignment_mask) == 0) { ++ local_monitor_.NotifyLoad(); ++ T* ptr = reinterpret_cast(addr); ++ TraceMemRd(addr, *ptr); ++ return *ptr; ++ } ++ PrintF("Unaligned read of type sizeof(%ld) at 0x%08lx, pc=0x%08" V8PRIxPTR ++ "\n", ++ sizeof(T), addr, reinterpret_cast(instr)); ++ base::OS::Abort(); ++ return 0; ++} ++ ++template ++void Simulator::WriteMem(int64_t addr, T value, Instruction* instr) { ++ int alignment_mask = (1 << sizeof(T)) - 1; ++ if ((addr & alignment_mask) == 0) { ++ local_monitor_.NotifyStore(); ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ GlobalMonitor::Get()->NotifyStore_Locked(&global_monitor_thread_); ++ T* ptr = reinterpret_cast(addr); ++ *ptr = value; ++ TraceMemWr(addr, value); ++ return; ++ } ++ PrintF("Unaligned write of type sizeof(%ld) at 0x%08lx, pc=0x%08" V8PRIxPTR ++ "\n", ++ sizeof(T), addr, reinterpret_cast(instr)); ++ base::OS::Abort(); ++} ++ ++// Returns the limit of the stack area to enable checking for stack overflows. ++uintptr_t Simulator::StackLimit(uintptr_t c_limit) const { ++ // The simulator uses a separate JS stack. If we have exhausted the C stack, ++ // we also drop down the JS limit to reflect the exhaustion on the JS stack. ++ if (GetCurrentStackPosition() < c_limit) { ++ return reinterpret_cast(get_sp()); ++ } ++ ++ // Otherwise the limit is the JS stack. Leave a safety margin of 1024 bytes ++ // to prevent overrunning the stack when pushing values. ++ return reinterpret_cast(stack_) + 1024; ++} ++ ++// Unsupported instructions use Format to print an error and stop execution. ++void Simulator::Format(Instruction* instr, const char* format) { ++ PrintF("Simulator found unsupported instruction:\n 0x%08" PRIxPTR " : %s\n", ++ reinterpret_cast(instr), format); ++ UNIMPLEMENTED(); ++} ++ ++// Calls into the V8 runtime are based on this very simple interface. ++// Note: To be able to return two values from some calls the code in runtime.cc ++// uses the ObjectPair which is essentially two 32-bit values stuffed into a ++// 64-bit value. With the code below we assume that all runtime calls return ++// 64 bits of result. If they don't, the v1 result register contains a bogus ++// value, which is fine because it is caller-saved. ++ ++using SimulatorRuntimeCall = ObjectPair (*)(int64_t arg0, int64_t arg1, ++ int64_t arg2, int64_t arg3, ++ int64_t arg4, int64_t arg5, ++ int64_t arg6, int64_t arg7, ++ int64_t arg8, int64_t arg9); ++ ++// These prototypes handle the four types of FP calls. ++using SimulatorRuntimeCompareCall = int64_t (*)(double darg0, double darg1); ++using SimulatorRuntimeFPFPCall = double (*)(double darg0, double darg1); ++using SimulatorRuntimeFPCall = double (*)(double darg0); ++using SimulatorRuntimeFPIntCall = double (*)(double darg0, int32_t arg0); ++ ++// This signature supports direct call in to API function native callback ++// (refer to InvocationCallback in v8.h). ++using SimulatorRuntimeDirectApiCall = void (*)(int64_t arg0); ++using SimulatorRuntimeProfilingApiCall = void (*)(int64_t arg0, void* arg1); ++ ++// This signature supports direct call to accessor getter callback. ++using SimulatorRuntimeDirectGetterCall = void (*)(int64_t arg0, int64_t arg1); ++using SimulatorRuntimeProfilingGetterCall = void (*)(int64_t arg0, int64_t arg1, ++ void* arg2); ++ ++// Software interrupt instructions are used by the simulator to call into the ++// C-based V8 runtime. They are also used for debugging with simulator. ++void Simulator::SoftwareInterrupt() { ++ // There are several instructions that could get us here, ++ // the break_, dbgcall_, syscall_ and hypcall instructions. ++ int32_t opcode_hi15 = instr_.Bits(31, 17); ++ CHECK_EQ(opcode_hi15, 0x15); ++ uint32_t code = instr_.Bits(14, 0); ++ // We first check if we met a call_rt_redirected. ++ if (instr_.InstructionBits() == rtCallRedirInstr) { ++ Redirection* redirection = Redirection::FromInstruction(instr_.instr()); ++ ++ int64_t* stack_pointer = reinterpret_cast(get_register(sp)); ++ ++ int64_t arg0 = get_register(a0); ++ int64_t arg1 = get_register(a1); ++ int64_t arg2 = get_register(a2); ++ int64_t arg3 = get_register(a3); ++ int64_t arg4 = get_register(a4); ++ int64_t arg5 = get_register(a5); ++ int64_t arg6 = get_register(a6); ++ int64_t arg7 = get_register(a7); ++ int64_t arg8 = stack_pointer[0]; ++ int64_t arg9 = stack_pointer[1]; ++ STATIC_ASSERT(kMaxCParameters == 10); ++ ++ bool fp_call = ++ (redirection->type() == ExternalReference::BUILTIN_FP_FP_CALL) || ++ (redirection->type() == ExternalReference::BUILTIN_COMPARE_CALL) || ++ (redirection->type() == ExternalReference::BUILTIN_FP_CALL) || ++ (redirection->type() == ExternalReference::BUILTIN_FP_INT_CALL); ++ ++ { ++ // With the hard floating point calling convention, double ++ // arguments are passed in FPU registers. Fetch the arguments ++ // from there and call the builtin using soft floating point ++ // convention. ++ switch (redirection->type()) { ++ case ExternalReference::BUILTIN_FP_FP_CALL: ++ case ExternalReference::BUILTIN_COMPARE_CALL: ++ arg0 = get_fpu_register(f0); ++ arg1 = get_fpu_register(f1); ++ arg2 = get_fpu_register(f2); ++ arg3 = get_fpu_register(f3); ++ break; ++ case ExternalReference::BUILTIN_FP_CALL: ++ arg0 = get_fpu_register(f0); ++ arg1 = get_fpu_register(f1); ++ break; ++ case ExternalReference::BUILTIN_FP_INT_CALL: ++ arg0 = get_fpu_register(f0); ++ arg1 = get_fpu_register(f1); ++ arg2 = get_register(a2); ++ break; ++ default: ++ break; ++ } ++ } ++ ++ // This is dodgy but it works because the C entry stubs are never moved. ++ // See comment in codegen-arm.cc and bug 1242173. ++ int64_t saved_ra = get_register(ra); ++ ++ intptr_t external = ++ reinterpret_cast(redirection->external_function()); ++ ++ // Based on CpuFeatures::IsSupported(FPU), La64 will use either hardware ++ // FPU, or gcc soft-float routines. Hardware FPU is simulated in this ++ // simulator. Soft-float has additional abstraction of ExternalReference, ++ // to support serialization. ++ if (fp_call) { ++ double dval0, dval1; // one or two double parameters ++ int32_t ival; // zero or one integer parameters ++ int64_t iresult = 0; // integer return value ++ double dresult = 0; // double return value ++ GetFpArgs(&dval0, &dval1, &ival); ++ SimulatorRuntimeCall generic_target = ++ reinterpret_cast(external); ++ if (::v8::internal::FLAG_trace_sim) { ++ switch (redirection->type()) { ++ case ExternalReference::BUILTIN_FP_FP_CALL: ++ case ExternalReference::BUILTIN_COMPARE_CALL: ++ PrintF("Call to host function at %p with args %f, %f", ++ reinterpret_cast(FUNCTION_ADDR(generic_target)), ++ dval0, dval1); ++ break; ++ case ExternalReference::BUILTIN_FP_CALL: ++ PrintF("Call to host function at %p with arg %f", ++ reinterpret_cast(FUNCTION_ADDR(generic_target)), ++ dval0); ++ break; ++ case ExternalReference::BUILTIN_FP_INT_CALL: ++ PrintF("Call to host function at %p with args %f, %d", ++ reinterpret_cast(FUNCTION_ADDR(generic_target)), ++ dval0, ival); ++ break; ++ default: ++ UNREACHABLE(); ++ break; ++ } ++ } ++ switch (redirection->type()) { ++ case ExternalReference::BUILTIN_COMPARE_CALL: { ++ SimulatorRuntimeCompareCall target = ++ reinterpret_cast(external); ++ iresult = target(dval0, dval1); ++ set_register(v0, static_cast(iresult)); ++ // set_register(v1, static_cast(iresult >> 32)); ++ break; ++ } ++ case ExternalReference::BUILTIN_FP_FP_CALL: { ++ SimulatorRuntimeFPFPCall target = ++ reinterpret_cast(external); ++ dresult = target(dval0, dval1); ++ SetFpResult(dresult); ++ break; ++ } ++ case ExternalReference::BUILTIN_FP_CALL: { ++ SimulatorRuntimeFPCall target = ++ reinterpret_cast(external); ++ dresult = target(dval0); ++ SetFpResult(dresult); ++ break; ++ } ++ case ExternalReference::BUILTIN_FP_INT_CALL: { ++ SimulatorRuntimeFPIntCall target = ++ reinterpret_cast(external); ++ dresult = target(dval0, ival); ++ SetFpResult(dresult); ++ break; ++ } ++ default: ++ UNREACHABLE(); ++ break; ++ } ++ if (::v8::internal::FLAG_trace_sim) { ++ switch (redirection->type()) { ++ case ExternalReference::BUILTIN_COMPARE_CALL: ++ PrintF("Returned %08x\n", static_cast(iresult)); ++ break; ++ case ExternalReference::BUILTIN_FP_FP_CALL: ++ case ExternalReference::BUILTIN_FP_CALL: ++ case ExternalReference::BUILTIN_FP_INT_CALL: ++ PrintF("Returned %f\n", dresult); ++ break; ++ default: ++ UNREACHABLE(); ++ break; ++ } ++ } ++ } else if (redirection->type() == ExternalReference::DIRECT_API_CALL) { ++ if (::v8::internal::FLAG_trace_sim) { ++ PrintF("Call to host function at %p args %08" PRIx64 " \n", ++ reinterpret_cast(external), arg0); ++ } ++ SimulatorRuntimeDirectApiCall target = ++ reinterpret_cast(external); ++ target(arg0); ++ } else if (redirection->type() == ExternalReference::PROFILING_API_CALL) { ++ if (::v8::internal::FLAG_trace_sim) { ++ PrintF("Call to host function at %p args %08" PRIx64 " %08" PRIx64 ++ " \n", ++ reinterpret_cast(external), arg0, arg1); ++ } ++ SimulatorRuntimeProfilingApiCall target = ++ reinterpret_cast(external); ++ target(arg0, Redirection::ReverseRedirection(arg1)); ++ } else if (redirection->type() == ExternalReference::DIRECT_GETTER_CALL) { ++ if (::v8::internal::FLAG_trace_sim) { ++ PrintF("Call to host function at %p args %08" PRIx64 " %08" PRIx64 ++ " \n", ++ reinterpret_cast(external), arg0, arg1); ++ } ++ SimulatorRuntimeDirectGetterCall target = ++ reinterpret_cast(external); ++ target(arg0, arg1); ++ } else if (redirection->type() == ++ ExternalReference::PROFILING_GETTER_CALL) { ++ if (::v8::internal::FLAG_trace_sim) { ++ PrintF("Call to host function at %p args %08" PRIx64 " %08" PRIx64 ++ " %08" PRIx64 " \n", ++ reinterpret_cast(external), arg0, arg1, arg2); ++ } ++ SimulatorRuntimeProfilingGetterCall target = ++ reinterpret_cast(external); ++ target(arg0, arg1, Redirection::ReverseRedirection(arg2)); ++ } else { ++ DCHECK(redirection->type() == ExternalReference::BUILTIN_CALL || ++ redirection->type() == ExternalReference::BUILTIN_CALL_PAIR); ++ SimulatorRuntimeCall target = ++ reinterpret_cast(external); ++ if (::v8::internal::FLAG_trace_sim) { ++ PrintF( ++ "Call to host function at %p " ++ "args %08" PRIx64 " , %08" PRIx64 " , %08" PRIx64 " , %08" PRIx64 ++ " , %08" PRIx64 " , %08" PRIx64 " , %08" PRIx64 " , %08" PRIx64 ++ " , %08" PRIx64 " , %08" PRIx64 " \n", ++ reinterpret_cast(FUNCTION_ADDR(target)), arg0, arg1, arg2, ++ arg3, arg4, arg5, arg6, arg7, arg8, arg9); ++ } ++ ObjectPair result = ++ target(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9); ++ set_register(v0, (int64_t)(result.x)); ++ set_register(v1, (int64_t)(result.y)); ++ } ++ if (::v8::internal::FLAG_trace_sim) { ++ PrintF("Returned %08" PRIx64 " : %08" PRIx64 " \n", get_register(v1), ++ get_register(v0)); ++ } ++ set_register(ra, saved_ra); ++ set_pc(get_register(ra)); ++ ++ } else if (code <= kMaxStopCode) { ++ if (IsWatchpoint(code)) { ++ PrintWatchpoint(code); ++ } else { ++ IncreaseStopCounter(code); ++ HandleStop(code, instr_.instr()); ++ } ++ } else { ++ // All remaining break_ codes, and all traps are handled here. ++ La64Debugger dbg(this); ++ dbg.Debug(); ++ } ++} ++ ++// Stop helper functions. ++bool Simulator::IsWatchpoint(uint64_t code) { ++ return (code <= kMaxWatchpointCode); ++} ++ ++void Simulator::PrintWatchpoint(uint64_t code) { ++ La64Debugger dbg(this); ++ ++break_count_; ++ PrintF("\n---- break %" PRId64 " marker: %3d (instr count: %8" PRId64 ++ " ) ----------" ++ "----------------------------------", ++ code, break_count_, icount_); ++ dbg.PrintAllRegs(); // Print registers and continue running. ++} ++ ++void Simulator::HandleStop(uint64_t code, Instruction* instr) { ++ // Stop if it is enabled, otherwise go on jumping over the stop ++ // and the message address. ++ if (IsEnabledStop(code)) { ++ La64Debugger dbg(this); ++ dbg.Stop(instr); ++ } ++} ++ ++bool Simulator::IsStopInstruction(Instruction* instr) { ++ int32_t opcode_hi15 = instr->Bits(31, 17); ++ uint32_t code = static_cast(instr->Bits(14, 0)); ++ return (opcode_hi15 == 0x15) && code > kMaxWatchpointCode && ++ code <= kMaxStopCode; ++} ++ ++bool Simulator::IsEnabledStop(uint64_t code) { ++ DCHECK_LE(code, kMaxStopCode); ++ DCHECK_GT(code, kMaxWatchpointCode); ++ return !(watched_stops_[code].count & kStopDisabledBit); ++} ++ ++void Simulator::EnableStop(uint64_t code) { ++ if (!IsEnabledStop(code)) { ++ watched_stops_[code].count &= ~kStopDisabledBit; ++ } ++} ++ ++void Simulator::DisableStop(uint64_t code) { ++ if (IsEnabledStop(code)) { ++ watched_stops_[code].count |= kStopDisabledBit; ++ } ++} ++ ++void Simulator::IncreaseStopCounter(uint64_t code) { ++ DCHECK_LE(code, kMaxStopCode); ++ if ((watched_stops_[code].count & ~(1 << 31)) == 0x7FFFFFFF) { ++ PrintF("Stop counter for code %" PRId64 ++ " has overflowed.\n" ++ "Enabling this code and reseting the counter to 0.\n", ++ code); ++ watched_stops_[code].count = 0; ++ EnableStop(code); ++ } else { ++ watched_stops_[code].count++; ++ } ++} ++ ++// Print a stop status. ++void Simulator::PrintStopInfo(uint64_t code) { ++ if (code <= kMaxWatchpointCode) { ++ PrintF("That is a watchpoint, not a stop.\n"); ++ return; ++ } else if (code > kMaxStopCode) { ++ PrintF("Code too large, only %u stops can be used\n", kMaxStopCode + 1); ++ return; ++ } ++ const char* state = IsEnabledStop(code) ? "Enabled" : "Disabled"; ++ int32_t count = watched_stops_[code].count & ~kStopDisabledBit; ++ // Don't print the state of unused breakpoints. ++ if (count != 0) { ++ if (watched_stops_[code].desc) { ++ PrintF("stop %" PRId64 " - 0x%" PRIx64 " : \t%s, \tcounter = %i, \t%s\n", ++ code, code, state, count, watched_stops_[code].desc); ++ } else { ++ PrintF("stop %" PRId64 " - 0x%" PRIx64 " : \t%s, \tcounter = %i\n", code, ++ code, state, count); ++ } ++ } ++} ++ ++void Simulator::SignalException(Exception e) { ++ FATAL("Error: Exception %i raised.", static_cast(e)); ++} ++ ++template ++static T FPAbs(T a); ++ ++template <> ++double FPAbs(double a) { ++ return fabs(a); ++} ++ ++template <> ++float FPAbs(float a) { ++ return fabsf(a); ++} ++ ++template ++static bool FPUProcessNaNsAndZeros(T a, T b, MaxMinKind kind, T* result) { ++ if (std::isnan(a) && std::isnan(b)) { ++ *result = a; ++ } else if (std::isnan(a)) { ++ *result = b; ++ } else if (std::isnan(b)) { ++ *result = a; ++ } else if (b == a) { ++ // Handle -0.0 == 0.0 case. ++ // std::signbit() returns int 0 or 1 so subtracting MaxMinKind::kMax ++ // negates the result. ++ *result = std::signbit(b) - static_cast(kind) ? b : a; ++ } else { ++ return false; ++ } ++ return true; ++} ++ ++template ++static T FPUMin(T a, T b) { ++ T result; ++ if (FPUProcessNaNsAndZeros(a, b, MaxMinKind::kMin, &result)) { ++ return result; ++ } else { ++ return b < a ? b : a; ++ } ++} ++ ++template ++static T FPUMax(T a, T b) { ++ T result; ++ if (FPUProcessNaNsAndZeros(a, b, MaxMinKind::kMax, &result)) { ++ return result; ++ } else { ++ return b > a ? b : a; ++ } ++} ++ ++template ++static T FPUMinA(T a, T b) { ++ T result; ++ if (!FPUProcessNaNsAndZeros(a, b, MaxMinKind::kMin, &result)) { ++ if (FPAbs(a) < FPAbs(b)) { ++ result = a; ++ } else if (FPAbs(b) < FPAbs(a)) { ++ result = b; ++ } else { ++ result = a < b ? a : b; ++ } ++ } ++ return result; ++} ++ ++template ++static T FPUMaxA(T a, T b) { ++ T result; ++ if (!FPUProcessNaNsAndZeros(a, b, MaxMinKind::kMin, &result)) { ++ if (FPAbs(a) > FPAbs(b)) { ++ result = a; ++ } else if (FPAbs(b) > FPAbs(a)) { ++ result = b; ++ } else { ++ result = a > b ? a : b; ++ } ++ } ++ return result; ++} ++ ++enum class KeepSign : bool { no = false, yes }; ++ ++template ::value, ++ int>::type = 0> ++T FPUCanonalizeNaNArg(T result, T arg, KeepSign keepSign = KeepSign::no) { ++ DCHECK(std::isnan(arg)); ++ T qNaN = std::numeric_limits::quiet_NaN(); ++ if (keepSign == KeepSign::yes) { ++ return std::copysign(qNaN, result); ++ } ++ return qNaN; ++} ++ ++template ++T FPUCanonalizeNaNArgs(T result, KeepSign keepSign, T first) { ++ if (std::isnan(first)) { ++ return FPUCanonalizeNaNArg(result, first, keepSign); ++ } ++ return result; ++} ++ ++template ++T FPUCanonalizeNaNArgs(T result, KeepSign keepSign, T first, Args... args) { ++ if (std::isnan(first)) { ++ return FPUCanonalizeNaNArg(result, first, keepSign); ++ } ++ return FPUCanonalizeNaNArgs(result, keepSign, args...); ++} ++ ++template ++T FPUCanonalizeOperation(Func f, T first, Args... args) { ++ return FPUCanonalizeOperation(f, KeepSign::no, first, args...); ++} ++ ++template ++T FPUCanonalizeOperation(Func f, KeepSign keepSign, T first, Args... args) { ++ T result = f(first, args...); ++ if (std::isnan(result)) { ++ result = FPUCanonalizeNaNArgs(result, keepSign, first, args...); ++ } ++ return result; ++} ++ ++// Handle execution based on instruction types. ++void Simulator::DecodeTypeOp6() { ++ int64_t alu_out; ++ // Next pc. ++ int64_t next_pc = bad_ra; ++ ++ // Branch instructions common part. ++ auto BranchAndLinkHelper = [this, &next_pc]() { ++ int64_t current_pc = get_pc(); ++ set_register(ra, current_pc + kInstrSize); ++ int32_t offs26_low16 = ++ static_cast(instr_.Bits(25, 10) << 16) >> 16; ++ int32_t offs26_high10 = static_cast(instr_.Bits(9, 0) << 22) >> 6; ++ int32_t offs26 = offs26_low16 | offs26_high10; ++ next_pc = current_pc + (offs26 << 2); ++ printf_instr("Offs26: %08x\n", offs26); ++ set_pc(next_pc); ++ }; ++ ++ auto BranchOff16Helper = [this, &next_pc](bool do_branch) { ++ int64_t current_pc = get_pc(); ++ int32_t offs16 = static_cast(instr_.Bits(25, 10) << 16) >> 16; ++ printf_instr("Offs16: %08x\n", offs16); ++ int32_t offs = do_branch ? (offs16 << 2) : kInstrSize; ++ next_pc = current_pc + offs; ++ set_pc(next_pc); ++ }; ++ ++ auto BranchOff21Helper = [this, &next_pc](bool do_branch) { ++ int64_t current_pc = get_pc(); ++ int32_t offs21_low16 = ++ static_cast(instr_.Bits(25, 10) << 16) >> 16; ++ int32_t offs21_high5 = static_cast(instr_.Bits(4, 0) << 27) >> 11; ++ int32_t offs = offs21_low16 | offs21_high5; ++ printf_instr("Offs21: %08x\n", offs); ++ offs = do_branch ? (offs << 2) : kInstrSize; ++ next_pc = current_pc + offs; ++ set_pc(next_pc); ++ }; ++ ++ auto BranchOff26Helper = [this, &next_pc]() { ++ int64_t current_pc = get_pc(); ++ int32_t offs26_low16 = ++ static_cast(instr_.Bits(25, 10) << 16) >> 16; ++ int32_t offs26_high10 = static_cast(instr_.Bits(9, 0) << 22) >> 6; ++ int32_t offs26 = offs26_low16 | offs26_high10; ++ next_pc = current_pc + (offs26 << 2); ++ printf_instr("Offs26: %08x\n", offs26); ++ set_pc(next_pc); ++ }; ++ ++ auto JumpOff16Helper = [this, &next_pc]() { ++ int32_t offs16 = static_cast(instr_.Bits(25, 10) << 16) >> 16; ++ printf_instr("JIRL\t %s: %016lx, %s: %016lx, offs16: %x\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), offs16); ++ set_register(rd_reg(), get_pc() + kInstrSize); ++ next_pc = rj() + (offs16 << 2); ++ set_pc(next_pc); ++ }; ++ ++ switch (instr_.Bits(31, 26) << 26) { ++ case ADDU16I_D: { ++ printf_instr("ADDU16I_D\t %s: %016lx, %s: %016lx, si16: %d\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si16()); ++ int32_t si16_upper = static_cast(si16()) << 16; ++ alu_out = static_cast(si16_upper) + rj(); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case BEQZ: ++ printf_instr("BEQZ\t %s: %016lx, ", Registers::Name(rj_reg()), rj()); ++ BranchOff21Helper(rj() == 0); ++ break; ++ case BNEZ: ++ printf_instr("BNEZ\t %s: %016lx, ", Registers::Name(rj_reg()), rj()); ++ BranchOff21Helper(rj() != 0); ++ break; ++ case BCZ: { ++ if (instr_.Bits(9, 8) == 0b00) { ++ // BCEQZ ++ printf_instr("BCEQZ\t fcc%d: %s, ", cj_reg(), cj() ? "True" : "False"); ++ BranchOff21Helper(cj() == false); ++ } else if (instr_.Bits(9, 8) == 0b01) { ++ // BCNEZ ++ printf_instr("BCNEZ\t fcc%d: %s, ", cj_reg(), cj() ? "True" : "False"); ++ BranchOff21Helper(cj() == true); ++ } else { ++ UNREACHABLE(); ++ } ++ break; ++ } ++ case JIRL: ++ JumpOff16Helper(); ++ break; ++ case B: ++ printf_instr("B\t "); ++ BranchOff26Helper(); ++ break; ++ case BL: ++ printf_instr("BL\t "); ++ BranchAndLinkHelper(); ++ break; ++ case BEQ: ++ printf_instr("BEQ\t %s: %016lx, %s, %016lx, ", Registers::Name(rj_reg()), ++ rj(), Registers::Name(rd_reg()), rd()); ++ BranchOff16Helper(rj() == rd()); ++ break; ++ case BNE: ++ printf_instr("BNE\t %s: %016lx, %s, %016lx, ", Registers::Name(rj_reg()), ++ rj(), Registers::Name(rd_reg()), rd()); ++ BranchOff16Helper(rj() != rd()); ++ break; ++ case BLT: ++ printf_instr("BLT\t %s: %016lx, %s, %016lx, ", Registers::Name(rj_reg()), ++ rj(), Registers::Name(rd_reg()), rd()); ++ BranchOff16Helper(rj() < rd()); ++ break; ++ case BGE: ++ printf_instr("BGE\t %s: %016lx, %s, %016lx, ", Registers::Name(rj_reg()), ++ rj(), Registers::Name(rd_reg()), rd()); ++ BranchOff16Helper(rj() >= rd()); ++ break; ++ case BLTU: ++ printf_instr("BLTU\t %s: %016lx, %s, %016lx, ", Registers::Name(rj_reg()), ++ rj(), Registers::Name(rd_reg()), rd()); ++ BranchOff16Helper(rj_u() < rd_u()); ++ break; ++ case BGEU: ++ printf_instr("BGEU\t %s: %016lx, %s, %016lx, ", Registers::Name(rj_reg()), ++ rj(), Registers::Name(rd_reg()), rd()); ++ BranchOff16Helper(rj_u() >= rd_u()); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++void Simulator::DecodeTypeOp7() { ++ int64_t alu_out; ++ ++ switch (instr_.Bits(31, 25) << 25) { ++ case LU12I_W: { ++ printf_instr("LU12I_W\t %s: %016lx, si20: %d\n", ++ Registers::Name(rd_reg()), rd(), si20()); ++ int32_t si20_upper = static_cast(si20() << 12); ++ SetResult(rd_reg(), static_cast(si20_upper)); ++ break; ++ } ++ case LU32I_D: { ++ printf_instr("LU32I_D\t %s: %016lx, si20: %d\n", ++ Registers::Name(rd_reg()), rd(), si20()); ++ int32_t si20_signExtend = static_cast(si20() << 12) >> 12; ++ int64_t lower_32bit_mask = 0xFFFFFFFF; ++ alu_out = (static_cast(si20_signExtend) << 32) | ++ (rd() & lower_32bit_mask); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case PCADDI: { ++ printf_instr("PCADDI\t %s: %016lx, si20: %d\n", Registers::Name(rd_reg()), ++ rd(), si20()); ++ int32_t si20_signExtend = static_cast(si20() << 12) >> 10; ++ int64_t current_pc = get_pc(); ++ alu_out = static_cast(si20_signExtend) + current_pc; ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case PCALAU12I: { ++ printf_instr("PCALAU12I\t %s: %016lx, si20: %d\n", ++ Registers::Name(rd_reg()), rd(), si20()); ++ int32_t si20_signExtend = static_cast(si20() << 12); ++ int64_t current_pc = get_pc(); ++ int64_t clear_lower12bit_mask = 0xFFFFFFFFFFFFF000; ++ alu_out = static_cast(si20_signExtend) + current_pc; ++ SetResult(rd_reg(), alu_out & clear_lower12bit_mask); ++ break; ++ } ++ case PCADDU12I: { ++ printf_instr("PCADDU12I\t %s: %016lx, si20: %d\n", ++ Registers::Name(rd_reg()), rd(), si20()); ++ int32_t si20_signExtend = static_cast(si20() << 12); ++ int64_t current_pc = get_pc(); ++ alu_out = static_cast(si20_signExtend) + current_pc; ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case PCADDU18I: { ++ printf_instr("PCADDU18I\t %s: %016lx, si20: %d\n", ++ Registers::Name(rd_reg()), rd(), si20()); ++ int64_t si20_signExtend = (static_cast(si20()) << 44) >> 26; ++ int64_t current_pc = get_pc(); ++ alu_out = si20_signExtend + current_pc; ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++void Simulator::DecodeTypeOp8() { ++ int64_t addr = 0x0; ++ int64_t si14_se = (static_cast(si14()) << 50) >> 48; ++ ++ switch (instr_.Bits(31, 24) << 24) { ++ case LDPTR_W: ++ printf_instr("LDPTR_W\t %s: %016lx, %s: %016lx, si14: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si14_se); ++ set_register(rd_reg(), ReadW(rj() + si14_se, instr_.instr())); ++ break; ++ case STPTR_W: ++ printf_instr("STPTR_W\t %s: %016lx, %s: %016lx, si14: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si14_se); ++ WriteW(rj() + si14_se, static_cast(rd()), instr_.instr()); ++ break; ++ case LDPTR_D: ++ printf_instr("LDPTR_D\t %s: %016lx, %s: %016lx, si14: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si14_se); ++ set_register(rd_reg(), Read2W(rj() + si14_se, instr_.instr())); ++ break; ++ case STPTR_D: ++ printf_instr("STPTR_D\t %s: %016lx, %s: %016lx, si14: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si14_se); ++ Write2W(rj() + si14_se, rd(), instr_.instr()); ++ break; ++ case LL_W: { ++ printf_instr("LL_W\t %s: %016lx, %s: %016lx, si14: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si14_se); ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ addr = si14_se + rj(); ++ set_register(rd_reg(), ReadW(addr, instr_.instr())); ++ local_monitor_.NotifyLoadLinked(addr, TransactionSize::Word); ++ GlobalMonitor::Get()->NotifyLoadLinked_Locked(addr, ++ &global_monitor_thread_); ++ break; ++ } ++ case SC_W: { ++ printf_instr("SC_W\t %s: %016lx, %s: %016lx, si14: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si14_se); ++ addr = si14_se + rj(); ++ WriteConditionalW(addr, static_cast(rd()), instr_.instr(), ++ rd_reg()); ++ break; ++ } ++ case LL_D: { ++ printf_instr("LL_D\t %s: %016lx, %s: %016lx, si14: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si14_se); ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ addr = si14_se + rj(); ++ set_register(rd_reg(), Read2W(addr, instr_.instr())); ++ local_monitor_.NotifyLoadLinked(addr, TransactionSize::DoubleWord); ++ GlobalMonitor::Get()->NotifyLoadLinked_Locked(addr, ++ &global_monitor_thread_); ++ break; ++ } ++ case SC_D: { ++ printf_instr("SC_D\t %s: %016lx, %s: %016lx, si14: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si14_se); ++ addr = si14_se + rj(); ++ WriteConditional2W(addr, rd(), instr_.instr(), rd_reg()); ++ break; ++ } ++ case CSR: ++ UNIMPLEMENTED(); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++void Simulator::DecodeTypeOp10() { ++ int64_t alu_out = 0x0; ++ int64_t si12_se = (static_cast(si12()) << 52) >> 52; ++ uint64_t si12_ze = (static_cast(ui12()) << 52) >> 52; ++ ++ switch (instr_.Bits(31, 22) << 22) { ++ case BSTR_W: { ++ CHECK_EQ(instr_.Bit(21), 1); ++ uint8_t lsbw_ = lsbw(); ++ uint8_t msbw_ = msbw(); ++ CHECK_LE(lsbw_, msbw_); ++ uint8_t size = msbw_ - lsbw_ + 1; ++ uint64_t mask = (1ULL << size) - 1; ++ if (instr_.Bit(15) == 0) { ++ // BSTRINS_W ++ printf_instr( ++ "BSTRINS_W\t %s: %016lx, %s: %016lx, msbw: %02x, lsbw: %02x\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), rj(), ++ msbw_, lsbw_); ++ alu_out = static_cast((rd_u() & ~(mask << lsbw_)) | ++ ((rj_u() & mask) << lsbw_)); ++ } else { ++ // BSTRPICK_W ++ printf_instr( ++ "BSTRPICK_W\t %s: %016lx, %s: %016lx, msbw: %02x, lsbw: %02x\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), rj(), ++ msbw_, lsbw_); ++ alu_out = static_cast((rj_u() & (mask << lsbw_)) >> lsbw_); ++ } ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case BSTRINS_D: { ++ uint8_t lsbd_ = lsbd(); ++ uint8_t msbd_ = msbd(); ++ CHECK_LE(lsbd_, msbd_); ++ printf_instr( ++ "BSTRINS_D\t %s: %016lx, %s: %016lx, msbw: %02x, lsbw: %02x\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), rj(), ++ msbd_, lsbd_); ++ uint8_t size = msbd_ - lsbd_ + 1; ++ if (size < 64) { ++ uint64_t mask = (1ULL << size) - 1; ++ alu_out = (rd_u() & ~(mask << lsbd_)) | ((rj_u() & mask) << lsbd_); ++ SetResult(rd_reg(), alu_out); ++ } else if (size == 64) { ++ SetResult(rd_reg(), rj()); ++ } ++ break; ++ } ++ case BSTRPICK_D: { ++ uint8_t lsbd_ = lsbd(); ++ uint8_t msbd_ = msbd(); ++ CHECK_LE(lsbd_, msbd_); ++ printf_instr( ++ "BSTRPICK_D\t %s: %016lx, %s: %016lx, msbw: %02x, lsbw: %02x\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), rj(), ++ msbd_, lsbd_); ++ uint8_t size = msbd_ - lsbd_ + 1; ++ if (size < 64) { ++ uint64_t mask = (1ULL << size) - 1; ++ alu_out = (rj_u() & (mask << lsbd_)) >> lsbd_; ++ SetResult(rd_reg(), alu_out); ++ } else if (size == 64) { ++ SetResult(rd_reg(), rj()); ++ } ++ break; ++ } ++ case SLTI: ++ printf_instr("SLTI\t %s: %016lx, %s: %016lx, si12: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si12_se); ++ SetResult(rd_reg(), rj() < si12_se ? 1 : 0); ++ break; ++ case SLTUI: ++ printf_instr("SLTUI\t %s: %016lx, %s: %016lx, si12: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si12_se); ++ SetResult(rd_reg(), rj_u() < static_cast(si12_se) ? 1 : 0); ++ break; ++ case ADDI_W: { ++ printf_instr("ADDI_W\t %s: %016lx, %s: %016lx, si12: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si12_se); ++ int32_t alu32_out = ++ static_cast(rj()) + static_cast(si12_se); ++ SetResult(rd_reg(), alu32_out); ++ break; ++ } ++ case ADDI_D: ++ printf_instr("ADDI_D\t %s: %016lx, %s: %016lx, si12: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si12_se); ++ SetResult(rd_reg(), rj() + si12_se); ++ break; ++ case LU52I_D: { ++ printf_instr("LU52I_D\t %s: %016lx, %s: %016lx, si12: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si12_se); ++ int64_t si12_se = static_cast(si12()) << 52; ++ uint64_t mask = (1ULL << 52) - 1; ++ alu_out = si12_se + (rj() & mask); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case ANDI: ++ printf_instr("ANDI\t %s: %016lx, %s: %016lx, si12: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si12_ze); ++ SetResult(rd_reg(), rj() & si12_ze); ++ break; ++ case ORI: ++ printf_instr("ORI\t %s: %016lx, %s: %016lx, si12: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si12_ze); ++ SetResult(rd_reg(), rj_u() | si12_ze); ++ break; ++ case XORI: ++ printf_instr("XORI\t %s: %016lx, %s: %016lx, si12: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si12_ze); ++ SetResult(rd_reg(), rj_u() ^ si12_ze); ++ break; ++ case LD_B: ++ printf_instr("LD_B\t %s: %016lx, %s: %016lx, si12: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si12_ze); ++ set_register(rd_reg(), ReadB(rj() + si12_se)); ++ break; ++ case LD_H: ++ printf_instr("LD_H\t %s: %016lx, %s: %016lx, si12: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si12_ze); ++ set_register(rd_reg(), ReadH(rj() + si12_se, instr_.instr())); ++ break; ++ case LD_W: ++ printf_instr("LD_W\t %s: %016lx, %s: %016lx, si12: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si12_ze); ++ set_register(rd_reg(), ReadW(rj() + si12_se, instr_.instr())); ++ break; ++ case LD_D: ++ printf_instr("LD_D\t %s: %016lx, %s: %016lx, si12: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si12_ze); ++ set_register(rd_reg(), Read2W(rj() + si12_se, instr_.instr())); ++ break; ++ case ST_B: ++ printf_instr("ST_B\t %s: %016lx, %s: %016lx, si12: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si12_ze); ++ WriteB(rj() + si12_se, static_cast(rd())); ++ break; ++ case ST_H: ++ printf_instr("ST_H\t %s: %016lx, %s: %016lx, si12: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si12_ze); ++ WriteH(rj() + si12_se, static_cast(rd()), instr_.instr()); ++ break; ++ case ST_W: ++ printf_instr("ST_W\t %s: %016lx, %s: %016lx, si12: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si12_ze); ++ WriteW(rj() + si12_se, static_cast(rd()), instr_.instr()); ++ break; ++ case ST_D: ++ printf_instr("ST_D\t %s: %016lx, %s: %016lx, si12: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si12_ze); ++ Write2W(rj() + si12_se, rd(), instr_.instr()); ++ break; ++ case LD_BU: ++ printf_instr("LD_BU\t %s: %016lx, %s: %016lx, si12: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si12_ze); ++ set_register(rd_reg(), ReadBU(rj() + si12_se)); ++ break; ++ case LD_HU: ++ printf_instr("LD_HU\t %s: %016lx, %s: %016lx, si12: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si12_ze); ++ set_register(rd_reg(), ReadHU(rj() + si12_se, instr_.instr())); ++ break; ++ case LD_WU: ++ printf_instr("LD_WU\t %s: %016lx, %s: %016lx, si12: %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), si12_ze); ++ set_register(rd_reg(), ReadWU(rj() + si12_se, instr_.instr())); ++ break; ++ case FLD_S: { ++ printf_instr("FLD_S\t %s: %016f, %s: %016lx, si12: %016lx\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ Registers::Name(rj_reg()), rj(), si12_ze); ++ set_fpu_register(fd_reg(), kFPUInvalidResult); // Trash upper 32 bits. ++ set_fpu_register_word( ++ fd_reg(), ReadW(rj() + si12_se, instr_.instr(), FLOAT_DOUBLE)); ++ break; ++ } ++ case FST_S: { ++ printf_instr("FST_S\t %s: %016f, %s: %016lx, si12: %016lx\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ Registers::Name(rj_reg()), rj(), si12_ze); ++ int32_t alu_out_32 = static_cast(get_fpu_register(fd_reg())); ++ WriteW(rj() + si12_se, alu_out_32, instr_.instr()); ++ break; ++ } ++ case FLD_D: { ++ printf_instr("FLD_D\t %s: %016f, %s: %016lx, si12: %016lx\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ Registers::Name(rj_reg()), rj(), si12_ze); ++ set_fpu_register_double(fd_reg(), ReadD(rj() + si12_se, instr_.instr())); ++ TraceMemRd(rj() + si12_se, get_fpu_register(fd_reg()), DOUBLE); ++ break; ++ } ++ case FST_D: { ++ printf_instr("FST_D\t %s: %016f, %s: %016lx, si12: %016lx\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ Registers::Name(rj_reg()), rj(), si12_ze); ++ WriteD(rj() + si12_se, get_fpu_register_double(fd_reg()), instr_.instr()); ++ TraceMemWr(rj() + si12_se, get_fpu_register(fd_reg()), DWORD); ++ break; ++ } ++ case PRELD: ++ case CACHE: ++ UNIMPLEMENTED(); ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++void Simulator::DecodeTypeOp12() { ++ switch (instr_.Bits(31, 20) << 20) { ++ case FMADD_S: ++ printf_instr("FMADD_S\t %s: %016f, %s: %016f, %s: %016f %s: %016f\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ FPURegisters::Name(fk_reg()), fk_float(), ++ FPURegisters::Name(fa_reg()), fa_float(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ SetFPUFloatResult(fd_reg(), std::fma(fj_float(), fk_float(), fa_float())); ++ break; ++ case FMADD_D: ++ printf_instr("FMADD_D\t %s: %016f, %s: %016f, %s: %016f %s: %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fk_reg()), fk_double(), ++ FPURegisters::Name(fa_reg()), fa_double(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ SetFPUDoubleResult(fd_reg(), ++ std::fma(fj_double(), fk_double(), fa_double())); ++ break; ++ case FMSUB_S: ++ printf_instr("FMSUB_S\t %s: %016f, %s: %016f, %s: %016f %s: %016f\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ FPURegisters::Name(fk_reg()), fk_float(), ++ FPURegisters::Name(fa_reg()), fa_float(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ SetFPUFloatResult(fd_reg(), ++ std::fma(fj_float(), fk_float(), -fa_float())); ++ break; ++ case FMSUB_D: ++ printf_instr("FMSUB_D\t %s: %016f, %s: %016f, %s: %016f %s: %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fk_reg()), fk_double(), ++ FPURegisters::Name(fa_reg()), fa_double(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ SetFPUDoubleResult(fd_reg(), ++ std::fma(fj_double(), fk_double(), -fa_double())); ++ break; ++ case FNMADD_S: ++ printf_instr("FNMADD_S\t %s: %016f, %s: %016f, %s: %016f %s: %016f\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ FPURegisters::Name(fk_reg()), fk_float(), ++ FPURegisters::Name(fa_reg()), fa_float(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ SetFPUFloatResult(fd_reg(), ++ std::fma(-fj_float(), fk_float(), -fa_float())); ++ break; ++ case FNMADD_D: ++ printf_instr("FNMADD_D\t %s: %016f, %s: %016f, %s: %016f %s: %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fk_reg()), fk_double(), ++ FPURegisters::Name(fa_reg()), fa_double(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ SetFPUDoubleResult(fd_reg(), ++ std::fma(-fj_double(), fk_double(), -fa_double())); ++ break; ++ case FNMSUB_S: ++ printf_instr("FNMSUB_S\t %s: %016f, %s: %016f, %s: %016f %s: %016f\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ FPURegisters::Name(fk_reg()), fk_float(), ++ FPURegisters::Name(fa_reg()), fa_float(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ SetFPUFloatResult(fd_reg(), ++ std::fma(-fj_float(), fk_float(), fa_float())); ++ break; ++ case FNMSUB_D: ++ printf_instr("FNMSUB_D\t %s: %016f, %s: %016f, %s: %016f %s: %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fk_reg()), fk_double(), ++ FPURegisters::Name(fa_reg()), fa_double(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ SetFPUDoubleResult(fd_reg(), ++ std::fma(-fj_double(), fk_double(), fa_double())); ++ break; ++ case FCMP_COND_S: { ++ CHECK_EQ(instr_.Bits(4, 3), 0); ++ float fj = fj_float(); ++ float fk = fk_float(); ++ switch (cond()) { ++ case CAF: { ++ printf_instr("FCMP_CAF_S fcc%d\n", cd_reg()); ++ set_cf_register(cd_reg(), false); ++ break; ++ } ++ case CUN: { ++ printf_instr("FCMP_CUN_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), std::isnan(fj) || std::isnan(fk)); ++ break; ++ } ++ case CEQ: { ++ printf_instr("FCMP_CEQ_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), fj == fk); ++ break; ++ } ++ case CUEQ: { ++ printf_instr("FCMP_CUEQ_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), ++ (fj == fk) || std::isnan(fj) || std::isnan(fk)); ++ break; ++ } ++ case CLT: { ++ printf_instr("FCMP_CLT_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), fj < fk); ++ break; ++ } ++ case CULT: { ++ printf_instr("FCMP_CULT_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), ++ (fj < fk) || std::isnan(fj) || std::isnan(fk)); ++ break; ++ } ++ case CLE: { ++ printf_instr("FCMP_CLE_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), fj <= fk); ++ break; ++ } ++ case CULE: { ++ printf_instr("FCMP_CULE_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), ++ (fj <= fk) || std::isnan(fj) || std::isnan(fk)); ++ break; ++ } ++ case CNE: { ++ printf_instr("FCMP_CNE_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), (fj < fk) || (fj > fk)); ++ break; ++ } ++ case COR: { ++ printf_instr("FCMP_COR_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), !std::isnan(fj) && !std::isnan(fk)); ++ break; ++ } ++ case CUNE: { ++ printf_instr("FCMP_CUNE_S fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), ++ (fj != fk) || std::isnan(fj) || std::isnan(fk)); ++ break; ++ } ++ case SAF: ++ case SUN: ++ case SEQ: ++ case SUEQ: ++ case SLT: ++ case SULT: ++ case SLE: ++ case SULE: ++ case SNE: ++ case SOR: ++ case SUNE: ++ UNIMPLEMENTED(); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++ break; ++ } ++ case FCMP_COND_D: { ++ CHECK_EQ(instr_.Bits(4, 3), 0); ++ double fj = fj_double(); ++ double fk = fk_double(); ++ switch (cond()) { ++ case CAF: { ++ printf_instr("FCMP_CAF_D fcc%d\n", cd_reg()); ++ set_cf_register(cd_reg(), false); ++ break; ++ } ++ case CUN: { ++ printf_instr("FCMP_CUN_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), std::isnan(fj) || std::isnan(fk)); ++ break; ++ } ++ case CEQ: { ++ printf_instr("FCMP_CEQ_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), fj == fk); ++ break; ++ } ++ case CUEQ: { ++ printf_instr("FCMP_CUEQ_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), ++ (fj == fk) || std::isnan(fj) || std::isnan(fk)); ++ break; ++ } ++ case CLT: { ++ printf_instr("FCMP_CLT_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), fj < fk); ++ break; ++ } ++ case CULT: { ++ printf_instr("FCMP_CULT_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), ++ (fj < fk) || std::isnan(fj) || std::isnan(fk)); ++ break; ++ } ++ case CLE: { ++ printf_instr("FCMP_CLE_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), fj <= fk); ++ break; ++ } ++ case CULE: { ++ printf_instr("FCMP_CULE_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), ++ (fj <= fk) || std::isnan(fj) || std::isnan(fk)); ++ break; ++ } ++ case CNE: { ++ printf_instr("FCMP_CNE_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), (fj < fk) || (fj > fk)); ++ break; ++ } ++ case COR: { ++ printf_instr("FCMP_COR_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), !std::isnan(fj) && !std::isnan(fk)); ++ break; ++ } ++ case CUNE: { ++ printf_instr("FCMP_CUNE_D fcc%d, %s: %016f, %s: %016f\n", cd_reg(), ++ FPURegisters::Name(fj_reg()), fj, ++ FPURegisters::Name(fk_reg()), fk); ++ set_cf_register(cd_reg(), ++ (fj != fk) || std::isnan(fj) || std::isnan(fk)); ++ break; ++ } ++ case SAF: ++ case SUN: ++ case SEQ: ++ case SUEQ: ++ case SLT: ++ case SULT: ++ case SLE: ++ case SULE: ++ case SNE: ++ case SOR: ++ case SUNE: ++ UNIMPLEMENTED(); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++ break; ++ } ++ case FSEL: { ++ CHECK_EQ(instr_.Bits(19, 18), 0); ++ printf_instr("FSEL fcc%d, %s: %016f, %s: %016f, %s: %016f\n", ca_reg(), ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double(), ++ FPURegisters::Name(fk_reg()), fk_double()); ++ if (ca() == 0) { ++ SetFPUDoubleResult(fd_reg(), fj_double()); ++ } else { ++ SetFPUDoubleResult(fd_reg(), fk_double()); ++ } ++ break; ++ } ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++void Simulator::DecodeTypeOp14() { ++ int64_t alu_out = 0x0; ++ int32_t alu32_out = 0x0; ++ ++ switch (instr_.Bits(31, 18) << 18) { ++ case ALSL: { ++ uint8_t sa = sa2() + 1; ++ alu32_out = ++ (static_cast(rj()) << sa) + static_cast(rk()); ++ if (instr_.Bit(17) == 0) { ++ // ALSL_W ++ printf_instr("ALSL_W\t %s: %016lx, %s: %016lx, %s: %016lx, sa2: %d\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk(), sa2()); ++ SetResult(rd_reg(), alu32_out); ++ } else { ++ // ALSL_WU ++ printf_instr("ALSL_WU\t %s: %016lx, %s: %016lx, %s: %016lx, sa2: %d\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk(), sa2()); ++ SetResult(rd_reg(), static_cast(alu32_out)); ++ } ++ break; ++ } ++ case BYTEPICK_W: { ++ CHECK_EQ(instr_.Bit(17), 0); ++ printf_instr("BYTEPICK_W\t %s: %016lx, %s: %016lx, %s: %016lx, sa2: %d\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk(), sa2()); ++ uint8_t sa = sa2() * 8; ++ if (sa == 0) { ++ alu32_out = static_cast(rk()); ++ } else { ++ int32_t mask = (1 << 31) >> (sa - 1); ++ int32_t rk_hi = (static_cast(rk()) & (~mask)) << sa; ++ int32_t rj_lo = (static_cast(rj()) & mask) >> (32 - sa); ++ alu32_out = rk_hi | rj_lo; ++ } ++ SetResult(rd_reg(), static_cast(alu32_out)); ++ break; ++ } ++ case BYTEPICK_D: { ++ printf_instr("BYTEPICK_D\t %s: %016lx, %s: %016lx, %s: %016lx, sa3: %d\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk(), sa3()); ++ uint8_t sa = sa3() * 8; ++ if (sa == 0) { ++ alu_out = rk(); ++ } else { ++ int64_t mask = (1ULL << 63) >> (sa - 1); ++ int64_t rk_hi = (rk() & (~mask)) << sa; ++ int64_t rj_lo = (rj() & mask) >> (64 - sa); ++ alu_out = rk_hi | rj_lo; ++ } ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case ALSL_D: { ++ printf_instr("ALSL_D\t %s: %016lx, %s: %016lx, %s: %016lx, sa2: %d\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk(), sa2()); ++ CHECK_EQ(instr_.Bit(17), 0); ++ uint8_t sa = sa2() + 1; ++ alu_out = (rj() << sa) + rk(); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case SLLI: { ++ DCHECK_EQ(instr_.Bit(17), 0); ++ if (instr_.Bits(17, 15) == 0b001) { ++ // SLLI_W ++ printf_instr("SLLI_W\t %s: %016lx, %s: %016lx, ui5: %d\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), ui5()); ++ alu32_out = static_cast(rj()) << ui5(); ++ SetResult(rd_reg(), static_cast(alu32_out)); ++ } else if ((instr_.Bits(17, 16) == 0b01)) { ++ // SLLI_D ++ printf_instr("SLLI_D\t %s: %016lx, %s: %016lx, ui6: %d\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), ui6()); ++ SetResult(rd_reg(), rj() << ui6()); ++ } ++ break; ++ } ++ case SRLI: { ++ DCHECK_EQ(instr_.Bit(17), 0); ++ if (instr_.Bits(17, 15) == 0b001) { ++ // SRLI_W ++ printf_instr("SRLI_W\t %s: %016lx, %s: %016lx, ui5: %d\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), ui5()); ++ alu32_out = static_cast(rj()) >> ui5(); ++ SetResult(rd_reg(), static_cast(alu32_out)); ++ } else if (instr_.Bits(17, 16) == 0b01) { ++ // SRLI_D ++ printf_instr("SRLI_D\t %s: %016lx, %s: %016lx, ui6: %d\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), ui6()); ++ SetResult(rd_reg(), rj_u() >> ui6()); ++ } ++ break; ++ } ++ case SRAI: { ++ DCHECK_EQ(instr_.Bit(17), 0); ++ if (instr_.Bits(17, 15) == 0b001) { ++ // SRAI_W ++ printf_instr("SRAI_W\t %s: %016lx, %s: %016lx, ui5: %d\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), ui5()); ++ alu32_out = static_cast(rj()) >> ui5(); ++ SetResult(rd_reg(), static_cast(alu32_out)); ++ } else if (instr_.Bits(17, 16) == 0b01) { ++ // SRAI_D ++ printf_instr("SRAI_D\t %s: %016lx, %s: %016lx, ui6: %d\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), ui6()); ++ SetResult(rd_reg(), rj() >> ui6()); ++ } ++ break; ++ } ++ case ROTRI: { ++ DCHECK_EQ(instr_.Bit(17), 0); ++ if (instr_.Bits(17, 15) == 0b001) { ++ // ROTRI_W ++ printf_instr("ROTRI_W\t %s: %016lx, %s: %016lx, ui5: %d\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), ui5()); ++ alu32_out = static_cast( ++ base::bits::RotateRight32(static_cast(rj_u()), ++ static_cast(ui5()))); ++ SetResult(rd_reg(), static_cast(alu32_out)); ++ } else if (instr_.Bits(17, 16) == 0b01) { ++ // ROTRI_D ++ printf_instr("ROTRI_D\t %s: %016lx, %s: %016lx, ui6: %d\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), ui6()); ++ alu_out = ++ static_cast(base::bits::RotateRight64(rj_u(), ui6())); ++ SetResult(rd_reg(), alu_out); ++ printf_instr("ROTRI, %s, %s, %d\n", Registers::Name(rd_reg()), ++ Registers::Name(rj_reg()), ui6()); ++ } ++ break; ++ } ++ case LDDIR: ++ case LDPTE: ++ UNIMPLEMENTED(); ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++void Simulator::DecodeTypeOp17() { ++ int64_t alu_out; ++ ++ switch (instr_.Bits(31, 15) << 15) { ++ case ADD_W: { ++ printf_instr("ADD_W\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ int32_t alu32_out = static_cast(rj() + rk()); ++ // Sign-extend result of 32bit operation into 64bit register. ++ SetResult(rd_reg(), static_cast(alu32_out)); ++ break; ++ } ++ case ADD_D: ++ printf_instr("ADD_D\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ SetResult(rd_reg(), rj() + rk()); ++ break; ++ case SUB_W: { ++ printf_instr("SUB_W\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ int32_t alu32_out = static_cast(rj() - rk()); ++ // Sign-extend result of 32bit operation into 64bit register. ++ SetResult(rd_reg(), static_cast(alu32_out)); ++ break; ++ } ++ case SUB_D: ++ printf_instr("SUB_D\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ SetResult(rd_reg(), rj() - rk()); ++ break; ++ case SLT: ++ printf_instr("SLT\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ SetResult(rd_reg(), rj() < rk() ? 1 : 0); ++ break; ++ case SLTU: ++ printf_instr("SLTU\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ SetResult(rd_reg(), rj_u() < rk_u() ? 1 : 0); ++ break; ++ case MASKEQZ: ++ printf_instr("MASKEQZ\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ SetResult(rd_reg(), rk() == 0 ? rj() : 0); ++ break; ++ case MASKNEZ: ++ printf_instr("MASKNEZ\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ SetResult(rd_reg(), rk() != 0 ? rj() : 0); ++ break; ++ case NOR: ++ printf_instr("NOR\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ SetResult(rd_reg(), ~(rj() | rk())); ++ break; ++ case AND: ++ printf_instr("AND\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ SetResult(rd_reg(), rj() & rk()); ++ break; ++ case OR: ++ printf_instr("OR\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ SetResult(rd_reg(), rj() | rk()); ++ break; ++ case XOR: ++ printf_instr("XOR\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ SetResult(rd_reg(), rj() ^ rk()); ++ break; ++ case ORN: ++ printf_instr("ORN\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ SetResult(rd_reg(), rj() | (~rk())); ++ break; ++ case ANDN: ++ printf_instr("ANDN\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ SetResult(rd_reg(), rj() & (~rk())); ++ break; ++ case SLL_W: ++ printf_instr("SLL_W\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ SetResult(rd_reg(), (int32_t)rj() << (rk_u() % 32)); ++ break; ++ case SRL_W: { ++ printf_instr("SRL_W\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ alu_out = static_cast((uint32_t)rj_u() >> (rk_u() % 32)); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case SRA_W: ++ printf_instr("SRA_W\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ SetResult(rd_reg(), (int32_t)rj() >> (rk_u() % 32)); ++ break; ++ case SLL_D: ++ printf_instr("SLL_D\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ SetResult(rd_reg(), rj() << (rk_u() % 64)); ++ break; ++ case SRL_D: { ++ printf_instr("SRL_D\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ alu_out = static_cast(rj_u() >> (rk_u() % 64)); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case SRA_D: ++ printf_instr("SRA_D\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ SetResult(rd_reg(), rj() >> (rk_u() % 64)); ++ break; ++ case ROTR_W: { ++ printf_instr("ROTR_W\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ alu_out = static_cast( ++ base::bits::RotateRight32(static_cast(rj_u()), ++ static_cast(rk_u() % 32))); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case ROTR_D: { ++ printf_instr("ROTR_D\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ alu_out = static_cast( ++ base::bits::RotateRight64((rj_u()), (rk_u() % 64))); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case MUL_W: { ++ printf_instr("MUL_W\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ alu_out = static_cast(rj()) * static_cast(rk()); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case MULH_W: { ++ printf_instr("MULH_W\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ int32_t rj_lo = static_cast(rj()); ++ int32_t rk_lo = static_cast(rk()); ++ alu_out = static_cast(rj_lo) * static_cast(rk_lo); ++ SetResult(rd_reg(), alu_out >> 32); ++ break; ++ } ++ case MULH_WU: { ++ printf_instr("MULH_WU\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ uint32_t rj_lo = static_cast(rj_u()); ++ uint32_t rk_lo = static_cast(rk_u()); ++ alu_out = static_cast(rj_lo) * static_cast(rk_lo); ++ SetResult(rd_reg(), alu_out >> 32); ++ break; ++ } ++ case MUL_D: ++ printf_instr("MUL_D\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ SetResult(rd_reg(), rj() * rk()); ++ break; ++ case MULH_D: ++ printf_instr("MULH_D\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ SetResult(rd_reg(), MultiplyHighSigned(rj(), rk())); ++ break; ++ case MULH_DU: ++ printf_instr("MULH_DU\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ SetResult(rd_reg(), MultiplyHighUnsigned(rj_u(), rk_u())); ++ break; ++ case MULW_D_W: { ++ printf_instr("MULW_D_W\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ int64_t rj_i32 = static_cast(rj()); ++ int64_t rk_i32 = static_cast(rk()); ++ SetResult(rd_reg(), rj_i32 * rk_i32); ++ break; ++ } ++ case MULW_D_WU: { ++ printf_instr("MULW_D_WU\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ uint64_t rj_u32 = static_cast(rj_u()); ++ uint64_t rk_u32 = static_cast(rk_u()); ++ SetResult(rd_reg(), rj_u32 * rk_u32); ++ break; ++ } ++ case DIV_W: { ++ printf_instr("DIV_W\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ int32_t rj_i32 = static_cast(rj()); ++ int32_t rk_i32 = static_cast(rk()); ++ if (rj_i32 == INT_MIN && rk_i32 == -1) { ++ SetResult(rd_reg(), INT_MIN); ++ } else if (rk_i32 != 0) { ++ SetResult(rd_reg(), rj_i32 / rk_i32); ++ } ++ break; ++ } ++ case MOD_W: { ++ printf_instr("MOD_W\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ int32_t rj_i32 = static_cast(rj()); ++ int32_t rk_i32 = static_cast(rk()); ++ if (rj_i32 == INT_MIN && rk_i32 == -1) { ++ SetResult(rd_reg(), 0); ++ } else if (rk_i32 != 0) { ++ SetResult(rd_reg(), rj_i32 % rk_i32); ++ } ++ break; ++ } ++ case DIV_WU: { ++ printf_instr("DIV_WU\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ uint32_t rj_u32 = static_cast(rj()); ++ uint32_t rk_u32 = static_cast(rk()); ++ if (rk_u32 != 0) { ++ SetResult(rd_reg(), static_cast(rj_u32 / rk_u32)); ++ } ++ break; ++ } ++ case MOD_WU: { ++ printf_instr("MOD_WU\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ uint32_t rj_u32 = static_cast(rj()); ++ uint32_t rk_u32 = static_cast(rk()); ++ if (rk_u32 != 0) { ++ SetResult(rd_reg(), static_cast(rj_u32 % rk_u32)); ++ } ++ break; ++ } ++ case DIV_D: { ++ printf_instr("DIV_D\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ if (rj() == LONG_MIN && rk() == -1) { ++ SetResult(rd_reg(), LONG_MIN); ++ } else if (rk() != 0) { ++ SetResult(rd_reg(), rj() / rk()); ++ } ++ break; ++ } ++ case MOD_D: { ++ printf_instr("MOD_D\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ if (rj() == LONG_MIN && rk() == -1) { ++ SetResult(rd_reg(), 0); ++ } else if (rk() != 0) { ++ SetResult(rd_reg(), rj() % rk()); ++ } ++ break; ++ } ++ case DIV_DU: { ++ printf_instr("DIV_DU\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ if (rk_u() != 0) { ++ SetResult(rd_reg(), static_cast(rj_u() / rk_u())); ++ } ++ break; ++ } ++ case MOD_DU: { ++ printf_instr("MOD_DU\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ if (rk_u() != 0) { ++ SetResult(rd_reg(), static_cast(rj_u() % rk_u())); ++ } ++ break; ++ } ++ case BREAK: ++ printf_instr("BREAK\t code: %x\n", instr_.Bits(14, 0)); ++ SoftwareInterrupt(); ++ break; ++ case FADD_S: { ++ printf_instr("FADD_S\t %s: %016f, %s, %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ FPURegisters::Name(fj_reg()), fj_float(), ++ FPURegisters::Name(fk_reg()), fk_float()); ++ SetFPUFloatResult( ++ fd_reg(), ++ FPUCanonalizeOperation([](float lhs, float rhs) { return lhs + rhs; }, ++ fj_float(), fk_float())); ++ break; ++ } ++ case FADD_D: { ++ printf_instr("FADD_D\t %s: %016f, %s, %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double(), ++ FPURegisters::Name(fk_reg()), fk_double()); ++ SetFPUDoubleResult(fd_reg(), ++ FPUCanonalizeOperation( ++ [](double lhs, double rhs) { return lhs + rhs; }, ++ fj_double(), fk_double())); ++ break; ++ } ++ case FSUB_S: { ++ printf_instr("FSUB_S\t %s: %016f, %s, %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ FPURegisters::Name(fj_reg()), fj_float(), ++ FPURegisters::Name(fk_reg()), fk_float()); ++ SetFPUFloatResult( ++ fd_reg(), ++ FPUCanonalizeOperation([](float lhs, float rhs) { return lhs - rhs; }, ++ fj_float(), fk_float())); ++ break; ++ } ++ case FSUB_D: { ++ printf_instr("FSUB_D\t %s: %016f, %s, %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double(), ++ FPURegisters::Name(fk_reg()), fk_double()); ++ SetFPUDoubleResult(fd_reg(), ++ FPUCanonalizeOperation( ++ [](double lhs, double rhs) { return lhs - rhs; }, ++ fj_double(), fk_double())); ++ break; ++ } ++ case FMUL_S: { ++ printf_instr("FMUL_S\t %s: %016f, %s, %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ FPURegisters::Name(fj_reg()), fj_float(), ++ FPURegisters::Name(fk_reg()), fk_float()); ++ SetFPUFloatResult( ++ fd_reg(), ++ FPUCanonalizeOperation([](float lhs, float rhs) { return lhs * rhs; }, ++ fj_float(), fk_float())); ++ break; ++ } ++ case FMUL_D: { ++ printf_instr("FMUL_D\t %s: %016f, %s, %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double(), ++ FPURegisters::Name(fk_reg()), fk_double()); ++ SetFPUDoubleResult(fd_reg(), ++ FPUCanonalizeOperation( ++ [](double lhs, double rhs) { return lhs * rhs; }, ++ fj_double(), fk_double())); ++ break; ++ } ++ case FDIV_S: { ++ printf_instr("FDIV_S\t %s: %016f, %s, %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ FPURegisters::Name(fj_reg()), fj_float(), ++ FPURegisters::Name(fk_reg()), fk_float()); ++ SetFPUFloatResult( ++ fd_reg(), ++ FPUCanonalizeOperation([](float lhs, float rhs) { return lhs / rhs; }, ++ fj_float(), fk_float())); ++ break; ++ } ++ case FDIV_D: { ++ printf_instr("FDIV_D\t %s: %016f, %s, %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double(), ++ FPURegisters::Name(fk_reg()), fk_double()); ++ SetFPUDoubleResult(fd_reg(), ++ FPUCanonalizeOperation( ++ [](double lhs, double rhs) { return lhs / rhs; }, ++ fj_double(), fk_double())); ++ break; ++ } ++ case FMAX_S: ++ printf_instr("FMAX_S\t %s: %016f, %s, %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ FPURegisters::Name(fj_reg()), fj_float(), ++ FPURegisters::Name(fk_reg()), fk_float()); ++ SetFPUFloatResult(fd_reg(), FPUMax(fk_float(), fj_float())); ++ break; ++ case FMAX_D: ++ printf_instr("FMAX_D\t %s: %016f, %s, %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double(), ++ FPURegisters::Name(fk_reg()), fk_double()); ++ SetFPUDoubleResult(fd_reg(), FPUMax(fk_double(), fj_double())); ++ break; ++ case FMIN_S: ++ printf_instr("FMIN_S\t %s: %016f, %s, %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ FPURegisters::Name(fj_reg()), fj_float(), ++ FPURegisters::Name(fk_reg()), fk_float()); ++ SetFPUFloatResult(fd_reg(), FPUMin(fk_float(), fj_float())); ++ break; ++ case FMIN_D: ++ printf_instr("FMIN_D\t %s: %016f, %s, %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double(), ++ FPURegisters::Name(fk_reg()), fk_double()); ++ SetFPUDoubleResult(fd_reg(), FPUMin(fk_double(), fj_double())); ++ break; ++ case FMAXA_S: ++ printf_instr("FMAXA_S\t %s: %016f, %s, %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ FPURegisters::Name(fj_reg()), fj_float(), ++ FPURegisters::Name(fk_reg()), fk_float()); ++ SetFPUFloatResult(fd_reg(), FPUMaxA(fk_float(), fj_float())); ++ break; ++ case FMAXA_D: ++ printf_instr("FMAXA_D\t %s: %016f, %s, %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double(), ++ FPURegisters::Name(fk_reg()), fk_double()); ++ SetFPUDoubleResult(fd_reg(), FPUMaxA(fk_double(), fj_double())); ++ break; ++ case FMINA_S: ++ printf_instr("FMINA_S\t %s: %016f, %s, %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ FPURegisters::Name(fj_reg()), fj_float(), ++ FPURegisters::Name(fk_reg()), fk_float()); ++ SetFPUFloatResult(fd_reg(), FPUMinA(fk_float(), fj_float())); ++ break; ++ case FMINA_D: ++ printf_instr("FMINA_D\t %s: %016f, %s, %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double(), ++ FPURegisters::Name(fk_reg()), fk_double()); ++ SetFPUDoubleResult(fd_reg(), FPUMinA(fk_double(), fj_double())); ++ break; ++ case LDX_B: ++ printf_instr("LDX_B\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ set_register(rd_reg(), ReadB(rj() + rk())); ++ break; ++ case LDX_H: ++ printf_instr("LDX_H\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ set_register(rd_reg(), ReadH(rj() + rk(), instr_.instr())); ++ break; ++ case LDX_W: ++ printf_instr("LDX_W\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ set_register(rd_reg(), ReadW(rj() + rk(), instr_.instr())); ++ break; ++ case LDX_D: ++ printf_instr("LDX_D\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ set_register(rd_reg(), Read2W(rj() + rk(), instr_.instr())); ++ break; ++ case STX_B: ++ printf_instr("STX_B\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ WriteB(rj() + rk(), static_cast(rd())); ++ break; ++ case STX_H: ++ printf_instr("STX_H\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ WriteH(rj() + rk(), static_cast(rd()), instr_.instr()); ++ break; ++ case STX_W: ++ printf_instr("STX_W\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ WriteW(rj() + rk(), static_cast(rd()), instr_.instr()); ++ break; ++ case STX_D: ++ printf_instr("STX_D\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ Write2W(rj() + rk(), rd(), instr_.instr()); ++ break; ++ case LDX_BU: ++ printf_instr("LDX_BU\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ set_register(rd_reg(), ReadBU(rj() + rk())); ++ break; ++ case LDX_HU: ++ printf_instr("LDX_HU\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ set_register(rd_reg(), ReadHU(rj() + rk(), instr_.instr())); ++ break; ++ case LDX_WU: ++ printf_instr("LDX_WU\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj(), Registers::Name(rk_reg()), rk()); ++ set_register(rd_reg(), ReadWU(rj() + rk(), instr_.instr())); ++ break; ++ case PRELDX: ++ printf("Sim UNIMPLEMENTED: PRELDX\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FLDX_S: ++ printf_instr("FLDX_S\t %s: %016f, %s: %016lx, %s: %016lx\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ Registers::Name(rj_reg()), rj(), Registers::Name(rk_reg()), ++ rk()); ++ set_fpu_register(fd_reg(), kFPUInvalidResult); // Trash upper 32 bits. ++ set_fpu_register_word(fd_reg(), ++ ReadW(rj() + rk(), instr_.instr(), FLOAT_DOUBLE)); ++ break; ++ case FLDX_D: ++ printf_instr("FLDX_D\t %s: %016f, %s: %016lx, %s: %016lx\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ Registers::Name(rj_reg()), rj(), Registers::Name(rk_reg()), ++ rk()); ++ set_fpu_register_double(fd_reg(), ReadD(rj() + rk(), instr_.instr())); ++ break; ++ case FSTX_S: ++ printf_instr("FSTX_S\t %s: %016f, %s: %016lx, %s: %016lx\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ Registers::Name(rj_reg()), rj(), Registers::Name(rk_reg()), ++ rk()); ++ WriteW(rj() + rk(), static_cast(get_fpu_register(fd_reg())), ++ instr_.instr()); ++ break; ++ case FSTX_D: ++ printf_instr("FSTX_D\t %s: %016f, %s: %016lx, %s: %016lx\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ Registers::Name(rj_reg()), rj(), Registers::Name(rk_reg()), ++ rk()); ++ WriteD(rj() + rk(), get_fpu_register_double(fd_reg()), instr_.instr()); ++ break; ++ case ASRTLE_D: ++ printf("Sim UNIMPLEMENTED: ASRTLE_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case ASRTGT_D: ++ printf("Sim UNIMPLEMENTED: ASRTGT_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case DBGCALL: ++ printf("Sim UNIMPLEMENTED: DBGCALL\n"); ++ UNIMPLEMENTED(); ++ break; ++ case SYSCALL: ++ printf("Sim UNIMPLEMENTED: SYSCALL\n"); ++ UNIMPLEMENTED(); ++ break; ++ case HYPCALL: ++ printf("Sim UNIMPLEMENTED: HYPCALL\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMSWAP_W: ++ printf("Sim UNIMPLEMENTED: AMSWAP_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMSWAP_D: ++ printf("Sim UNIMPLEMENTED: AMSWAP_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMADD_W: ++ printf("Sim UNIMPLEMENTED: AMADD_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMADD_D: ++ printf("Sim UNIMPLEMENTED: AMADD_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMAND_W: ++ printf("Sim UNIMPLEMENTED: AMAND_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMAND_D: ++ printf("Sim UNIMPLEMENTED: AMAND_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMOR_W: ++ printf("Sim UNIMPLEMENTED: AMOR_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMOR_D: ++ printf("Sim UNIMPLEMENTED: AMOR_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMXOR_W: ++ printf("Sim UNIMPLEMENTED: AMXOR_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMXOR_D: ++ printf("Sim UNIMPLEMENTED: AMXOR_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMMAX_W: ++ printf("Sim UNIMPLEMENTED: AMMAX_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMMAX_D: ++ printf("Sim UNIMPLEMENTED: AMMAX_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMMIN_W: ++ printf("Sim UNIMPLEMENTED: AMMIN_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMMIN_D: ++ printf("Sim UNIMPLEMENTED: AMMIN_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMMAX_WU: ++ printf("Sim UNIMPLEMENTED: AMMAX_WU\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMMAX_DU: ++ printf("Sim UNIMPLEMENTED: AMMAX_DU\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMMIN_WU: ++ printf("Sim UNIMPLEMENTED: AMMIN_WU\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMMIN_DU: ++ printf("Sim UNIMPLEMENTED: AMMIN_DU\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMSWAP_DB_W: { ++ printf_instr("AMSWAP_DB_W:\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()), ++ rk(), Registers::Name(rj_reg()), rj()); ++ int32_t rdvalue; ++ do { ++ { ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ set_register(rd_reg(), ReadW(rj(), instr_.instr())); ++ local_monitor_.NotifyLoadLinked(rj(), TransactionSize::Word); ++ GlobalMonitor::Get()->NotifyLoadLinked_Locked( ++ rj(), &global_monitor_thread_); ++ } ++ rdvalue = get_register(rd_reg()); ++ WriteConditionalW(rj(), static_cast(rk()), instr_.instr(), ++ rd_reg()); ++ } while (!get_register(rd_reg())); ++ set_register(rd_reg(), rdvalue); ++ } break; ++ case AMSWAP_DB_D: { ++ printf_instr("AMSWAP_DB_D:\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()), ++ rk(), Registers::Name(rj_reg()), rj()); ++ int64_t rdvalue; ++ do { ++ { ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ set_register(rd_reg(), Read2W(rj(), instr_.instr())); ++ local_monitor_.NotifyLoadLinked(rj(), TransactionSize::DoubleWord); ++ GlobalMonitor::Get()->NotifyLoadLinked_Locked( ++ rj(), &global_monitor_thread_); ++ } ++ rdvalue = get_register(rd_reg()); ++ WriteConditional2W(rj(), rk(), instr_.instr(), rd_reg()); ++ } while (!get_register(rd_reg())); ++ set_register(rd_reg(), rdvalue); ++ } break; ++ case AMADD_DB_W: { ++ printf_instr("AMADD_DB_W:\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()), ++ rk(), Registers::Name(rj_reg()), rj()); ++ int32_t rdvalue; ++ do { ++ { ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ set_register(rd_reg(), ReadW(rj(), instr_.instr())); ++ local_monitor_.NotifyLoadLinked(rj(), TransactionSize::Word); ++ GlobalMonitor::Get()->NotifyLoadLinked_Locked( ++ rj(), &global_monitor_thread_); ++ } ++ rdvalue = get_register(rd_reg()); ++ WriteConditionalW(rj(), ++ static_cast(static_cast(rk()) + ++ static_cast(rd())), ++ instr_.instr(), rd_reg()); ++ } while (!get_register(rd_reg())); ++ set_register(rd_reg(), rdvalue); ++ } break; ++ case AMADD_DB_D: { ++ printf_instr("AMADD_DB_D:\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()), ++ rk(), Registers::Name(rj_reg()), rj()); ++ int64_t rdvalue; ++ do { ++ { ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ set_register(rd_reg(), Read2W(rj(), instr_.instr())); ++ local_monitor_.NotifyLoadLinked(rj(), TransactionSize::DoubleWord); ++ GlobalMonitor::Get()->NotifyLoadLinked_Locked( ++ rj(), &global_monitor_thread_); ++ } ++ rdvalue = get_register(rd_reg()); ++ WriteConditional2W(rj(), rk() + rd(), instr_.instr(), rd_reg()); ++ } while (!get_register(rd_reg())); ++ set_register(rd_reg(), rdvalue); ++ } break; ++ case AMAND_DB_W: { ++ printf_instr("AMAND_DB_W:\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()), ++ rk(), Registers::Name(rj_reg()), rj()); ++ int32_t rdvalue; ++ do { ++ { ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ set_register(rd_reg(), ReadW(rj(), instr_.instr())); ++ local_monitor_.NotifyLoadLinked(rj(), TransactionSize::Word); ++ GlobalMonitor::Get()->NotifyLoadLinked_Locked( ++ rj(), &global_monitor_thread_); ++ } ++ rdvalue = get_register(rd_reg()); ++ WriteConditionalW(rj(), ++ static_cast(static_cast(rk()) & ++ static_cast(rd())), ++ instr_.instr(), rd_reg()); ++ } while (!get_register(rd_reg())); ++ set_register(rd_reg(), rdvalue); ++ } break; ++ case AMAND_DB_D: { ++ printf_instr("AMAND_DB_D:\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()), ++ rk(), Registers::Name(rj_reg()), rj()); ++ int64_t rdvalue; ++ do { ++ { ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ set_register(rd_reg(), Read2W(rj(), instr_.instr())); ++ local_monitor_.NotifyLoadLinked(rj(), TransactionSize::DoubleWord); ++ GlobalMonitor::Get()->NotifyLoadLinked_Locked( ++ rj(), &global_monitor_thread_); ++ } ++ rdvalue = get_register(rd_reg()); ++ WriteConditional2W(rj(), rk() & rd(), instr_.instr(), rd_reg()); ++ } while (!get_register(rd_reg())); ++ set_register(rd_reg(), rdvalue); ++ } break; ++ case AMOR_DB_W: { ++ printf_instr("AMOR_DB_W:\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()), ++ rk(), Registers::Name(rj_reg()), rj()); ++ int32_t rdvalue; ++ do { ++ { ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ set_register(rd_reg(), ReadW(rj(), instr_.instr())); ++ local_monitor_.NotifyLoadLinked(rj(), TransactionSize::Word); ++ GlobalMonitor::Get()->NotifyLoadLinked_Locked( ++ rj(), &global_monitor_thread_); ++ } ++ rdvalue = get_register(rd_reg()); ++ WriteConditionalW(rj(), ++ static_cast(static_cast(rk()) | ++ static_cast(rd())), ++ instr_.instr(), rd_reg()); ++ } while (!get_register(rd_reg())); ++ set_register(rd_reg(), rdvalue); ++ } break; ++ case AMOR_DB_D: { ++ printf_instr("AMOR_DB_D:\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()), ++ rk(), Registers::Name(rj_reg()), rj()); ++ int64_t rdvalue; ++ do { ++ { ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ set_register(rd_reg(), Read2W(rj(), instr_.instr())); ++ local_monitor_.NotifyLoadLinked(rj(), TransactionSize::DoubleWord); ++ GlobalMonitor::Get()->NotifyLoadLinked_Locked( ++ rj(), &global_monitor_thread_); ++ } ++ rdvalue = get_register(rd_reg()); ++ WriteConditional2W(rj(), rk() | rd(), instr_.instr(), rd_reg()); ++ } while (!get_register(rd_reg())); ++ set_register(rd_reg(), rdvalue); ++ } break; ++ case AMXOR_DB_W: { ++ printf_instr("AMXOR_DB_W:\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()), ++ rk(), Registers::Name(rj_reg()), rj()); ++ int32_t rdvalue; ++ do { ++ { ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ set_register(rd_reg(), ReadW(rj(), instr_.instr())); ++ local_monitor_.NotifyLoadLinked(rj(), TransactionSize::Word); ++ GlobalMonitor::Get()->NotifyLoadLinked_Locked( ++ rj(), &global_monitor_thread_); ++ } ++ rdvalue = get_register(rd_reg()); ++ WriteConditionalW(rj(), ++ static_cast(static_cast(rk()) ^ ++ static_cast(rd())), ++ instr_.instr(), rd_reg()); ++ } while (!get_register(rd_reg())); ++ set_register(rd_reg(), rdvalue); ++ } break; ++ case AMXOR_DB_D: { ++ printf_instr("AMXOR_DB_D:\t %s: %016lx, %s, %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rk_reg()), ++ rk(), Registers::Name(rj_reg()), rj()); ++ int64_t rdvalue; ++ do { ++ { ++ base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); ++ set_register(rd_reg(), Read2W(rj(), instr_.instr())); ++ local_monitor_.NotifyLoadLinked(rj(), TransactionSize::DoubleWord); ++ GlobalMonitor::Get()->NotifyLoadLinked_Locked( ++ rj(), &global_monitor_thread_); ++ } ++ rdvalue = get_register(rd_reg()); ++ WriteConditional2W(rj(), rk() ^ rd(), instr_.instr(), rd_reg()); ++ } while (!get_register(rd_reg())); ++ set_register(rd_reg(), rdvalue); ++ } break; ++ case AMMAX_DB_W: ++ printf("Sim UNIMPLEMENTED: AMMAX_DB_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMMAX_DB_D: ++ printf("Sim UNIMPLEMENTED: AMMAX_DB_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMMIN_DB_W: ++ printf("Sim UNIMPLEMENTED: AMMIN_DB_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMMIN_DB_D: ++ printf("Sim UNIMPLEMENTED: AMMIN_DB_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMMAX_DB_WU: ++ printf("Sim UNIMPLEMENTED: AMMAX_DB_WU\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMMAX_DB_DU: ++ printf("Sim UNIMPLEMENTED: AMMAX_DB_DU\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMMIN_DB_WU: ++ printf("Sim UNIMPLEMENTED: AMMIN_DB_WU\n"); ++ UNIMPLEMENTED(); ++ break; ++ case AMMIN_DB_DU: ++ printf("Sim UNIMPLEMENTED: AMMIN_DB_DU\n"); ++ UNIMPLEMENTED(); ++ break; ++ case DBAR: ++ printf_instr("DBAR\n"); ++ break; ++ case IBAR: ++ printf("Sim UNIMPLEMENTED: IBAR\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FLDGT_S: ++ printf("Sim UNIMPLEMENTED: FLDGT_S\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FLDGT_D: ++ printf("Sim UNIMPLEMENTED: FLDGT_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FLDLE_S: ++ printf("Sim UNIMPLEMENTED: FLDLE_S\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FLDLE_D: ++ printf("Sim UNIMPLEMENTED: FLDLE_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FSTGT_S: ++ printf("Sim UNIMPLEMENTED: FSTGT_S\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FSTGT_D: ++ printf("Sim UNIMPLEMENTED: FSTGT_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FSTLE_S: ++ printf("Sim UNIMPLEMENTED: FSTLE_S\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FSTLE_D: ++ printf("Sim UNIMPLEMENTED: FSTLE_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case LDGT_B: ++ printf("Sim UNIMPLEMENTED: LDGT_B\n"); ++ UNIMPLEMENTED(); ++ break; ++ case LDGT_H: ++ printf("Sim UNIMPLEMENTED: LDGT_H\n"); ++ UNIMPLEMENTED(); ++ break; ++ case LDGT_W: ++ printf("Sim UNIMPLEMENTED: LDGT_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case LDGT_D: ++ printf("Sim UNIMPLEMENTED: LDGT_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case LDLE_B: ++ printf("Sim UNIMPLEMENTED: LDLT_B\n"); ++ UNIMPLEMENTED(); ++ break; ++ case LDLE_H: ++ printf("Sim UNIMPLEMENTED: LDLE_H\n"); ++ UNIMPLEMENTED(); ++ break; ++ case LDLE_W: ++ printf("Sim UNIMPLEMENTED: LDLE_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case LDLE_D: ++ printf("Sim UNIMPLEMENTED: LDLE_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case STGT_B: ++ printf("Sim UNIMPLEMENTED: STGT_B\n"); ++ UNIMPLEMENTED(); ++ break; ++ case STGT_H: ++ printf("Sim UNIMPLEMENTED: STGT_H\n"); ++ UNIMPLEMENTED(); ++ break; ++ case STGT_W: ++ printf("Sim UNIMPLEMENTED: STGT_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case STGT_D: ++ printf("Sim UNIMPLEMENTED: STGT_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case STLE_B: ++ printf("Sim UNIMPLEMENTED: STLE_B\n"); ++ UNIMPLEMENTED(); ++ break; ++ case STLE_H: ++ printf("Sim UNIMPLEMENTED: STLE_H\n"); ++ UNIMPLEMENTED(); ++ break; ++ case STLE_W: ++ printf("Sim UNIMPLEMENTED: STLE_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case STLE_D: ++ printf("Sim UNIMPLEMENTED: STLE_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case WAIT_INVTLB: ++ printf("Sim UNIMPLEMENTED: WAIT_INVTLB\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FSCALEB_S: ++ printf("Sim UNIMPLEMENTED: FSCALEB_S\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FSCALEB_D: ++ printf("Sim UNIMPLEMENTED: FSCALEB_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FCOPYSIGN_S: ++ printf("Sim UNIMPLEMENTED: FCOPYSIGN_S\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FCOPYSIGN_D: ++ printf("Sim UNIMPLEMENTED: FCOPYSIGN_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case CRC_W_B_W: ++ printf("Sim UNIMPLEMENTED: CRC_W_B_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case CRC_W_H_W: ++ printf("Sim UNIMPLEMENTED: CRC_W_H_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case CRC_W_W_W: ++ printf("Sim UNIMPLEMENTED: CRC_W_W_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case CRC_W_D_W: ++ printf("Sim UNIMPLEMENTED: CRC_W_D_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case CRCC_W_B_W: ++ printf("Sim UNIMPLEMENTED: CRCC_W_B_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case CRCC_W_H_W: ++ printf("Sim UNIMPLEMENTED: CRCC_W_H_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case CRCC_W_W_W: ++ printf("Sim UNIMPLEMENTED: CRCC_W_W_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case CRCC_W_D_W: ++ printf("Sim UNIMPLEMENTED: CRCC_W_D_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++void Simulator::DecodeTypeOp22() { ++ int64_t alu_out; ++ ++ switch (instr_.Bits(31, 10) << 10) { ++ case CLZ_W: { ++ printf_instr("CLZ_W\t %s: %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj()); ++ alu_out = base::bits::CountLeadingZeros32(static_cast(rj_u())); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case CTZ_W: { ++ printf_instr("CTZ_W\t %s: %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj()); ++ alu_out = base::bits::CountTrailingZeros32(static_cast(rj_u())); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case CLZ_D: { ++ printf_instr("CLZ_D\t %s: %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj()); ++ alu_out = base::bits::CountLeadingZeros64(static_cast(rj_u())); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case CTZ_D: { ++ printf_instr("CTZ_D\t %s: %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj()); ++ alu_out = base::bits::CountTrailingZeros64(static_cast(rj_u())); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case REVB_2H: { ++ printf_instr("REVB_2H\t %s: %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj()); ++ uint32_t input = static_cast(rj()); ++ uint64_t output = 0; ++ ++ uint32_t mask = 0xFF000000; ++ for (int i = 0; i < 4; i++) { ++ uint32_t tmp = mask & input; ++ if (i % 2 == 0) { ++ tmp = tmp >> 8; ++ } else { ++ tmp = tmp << 8; ++ } ++ output = output | tmp; ++ mask = mask >> 8; ++ } ++ ++ alu_out = static_cast(static_cast(output)); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case REVB_4H: { ++ printf_instr("REVB_4H\t %s: %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj()); ++ uint64_t input = rj_u(); ++ uint64_t output = 0; ++ ++ uint64_t mask = 0xFF00000000000000; ++ for (int i = 0; i < 8; i++) { ++ uint64_t tmp = mask & input; ++ if (i % 2 == 0) { ++ tmp = tmp >> 8; ++ } else { ++ tmp = tmp << 8; ++ } ++ output = output | tmp; ++ mask = mask >> 8; ++ } ++ ++ alu_out = static_cast(output); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case REVB_2W: { ++ printf_instr("REVB_2W\t %s: %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj()); ++ uint64_t input = rj_u(); ++ uint64_t output = 0; ++ ++ uint64_t mask = 0xFF000000FF000000; ++ for (int i = 0; i < 4; i++) { ++ uint64_t tmp = mask & input; ++ if (i <= 1) { ++ tmp = tmp >> (24 - i * 16); ++ } else { ++ tmp = tmp << (i * 16 - 24); ++ } ++ output = output | tmp; ++ mask = mask >> 8; ++ } ++ ++ alu_out = static_cast(output); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case REVB_D: { ++ printf_instr("REVB_D\t %s: %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj()); ++ uint64_t input = rj_u(); ++ uint64_t output = 0; ++ ++ uint64_t mask = 0xFF00000000000000; ++ for (int i = 0; i < 8; i++) { ++ uint64_t tmp = mask & input; ++ if (i <= 3) { ++ tmp = tmp >> (56 - i * 16); ++ } else { ++ tmp = tmp << (i * 16 - 56); ++ } ++ output = output | tmp; ++ mask = mask >> 8; ++ } ++ ++ alu_out = static_cast(output); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case REVH_2W: { ++ printf_instr("REVH_2W\t %s: %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj()); ++ uint64_t input = rj_u(); ++ uint64_t output = 0; ++ ++ uint64_t mask = 0xFFFF000000000000; ++ for (int i = 0; i < 4; i++) { ++ uint64_t tmp = mask & input; ++ if (i % 2 == 0) { ++ tmp = tmp >> 16; ++ } else { ++ tmp = tmp << 16; ++ } ++ output = output | tmp; ++ mask = mask >> 16; ++ } ++ ++ alu_out = static_cast(output); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case REVH_D: { ++ printf_instr("REVH_D\t %s: %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj()); ++ uint64_t input = rj_u(); ++ uint64_t output = 0; ++ ++ uint64_t mask = 0xFFFF000000000000; ++ for (int i = 0; i < 4; i++) { ++ uint64_t tmp = mask & input; ++ if (i <= 1) { ++ tmp = tmp >> (48 - i * 32); ++ } else { ++ tmp = tmp << (i * 32 - 48); ++ } ++ output = output | tmp; ++ mask = mask >> 16; ++ } ++ ++ alu_out = static_cast(output); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case BITREV_4B: { ++ printf_instr("BITREV_4B\t %s: %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj()); ++ uint32_t input = static_cast(rj()); ++ uint32_t output = 0; ++ uint8_t i_byte, o_byte; ++ ++ // Reverse the bit in byte for each individual byte ++ for (int i = 0; i < 4; i++) { ++ output = output >> 8; ++ i_byte = input & 0xFF; ++ ++ // Fast way to reverse bits in byte ++ // Devised by Sean Anderson, July 13, 2001 ++ o_byte = static_cast(((i_byte * 0x0802LU & 0x22110LU) | ++ (i_byte * 0x8020LU & 0x88440LU)) * ++ 0x10101LU >> ++ 16); ++ ++ output = output | (static_cast(o_byte << 24)); ++ input = input >> 8; ++ } ++ ++ alu_out = static_cast(static_cast(output)); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case BITREV_8B: { ++ printf_instr("BITREV_8B\t %s: %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj()); ++ uint64_t input = rj_u(); ++ uint64_t output = 0; ++ uint8_t i_byte, o_byte; ++ ++ // Reverse the bit in byte for each individual byte ++ for (int i = 0; i < 8; i++) { ++ output = output >> 8; ++ i_byte = input & 0xFF; ++ ++ // Fast way to reverse bits in byte ++ // Devised by Sean Anderson, July 13, 2001 ++ o_byte = static_cast(((i_byte * 0x0802LU & 0x22110LU) | ++ (i_byte * 0x8020LU & 0x88440LU)) * ++ 0x10101LU >> ++ 16); ++ ++ output = output | (static_cast(o_byte) << 56); ++ input = input >> 8; ++ } ++ ++ alu_out = static_cast(output); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case BITREV_W: { ++ printf_instr("BITREV_W\t %s: %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj()); ++ uint32_t input = static_cast(rj()); ++ uint32_t output = 0; ++ output = base::bits::ReverseBits(input); ++ alu_out = static_cast(static_cast(output)); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case BITREV_D: { ++ printf_instr("BITREV_D\t %s: %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj()); ++ alu_out = static_cast(base::bits::ReverseBits(rj_u())); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case EXT_W_B: { ++ printf_instr("EXT_W_B\t %s: %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj()); ++ uint8_t input = static_cast(rj()); ++ alu_out = static_cast(static_cast(input)); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case EXT_W_H: { ++ printf_instr("EXT_W_H\t %s: %016lx, %s, %016lx\n", ++ Registers::Name(rd_reg()), rd(), Registers::Name(rj_reg()), ++ rj()); ++ uint16_t input = static_cast(rj()); ++ alu_out = static_cast(static_cast(input)); ++ SetResult(rd_reg(), alu_out); ++ break; ++ } ++ case FABS_S: ++ printf_instr("FABS_S\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ SetFPUFloatResult(fd_reg(), std::abs(fj_float())); ++ break; ++ case FABS_D: ++ printf_instr("FABS_D\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ SetFPUDoubleResult(fd_reg(), std::abs(fj_double())); ++ break; ++ case FNEG_S: ++ printf_instr("FNEG_S\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ SetFPUFloatResult(fd_reg(), -fj_float()); ++ break; ++ case FNEG_D: ++ printf_instr("FNEG_D\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ SetFPUDoubleResult(fd_reg(), -fj_double()); ++ break; ++ case FSQRT_S: { ++ printf_instr("FSQRT_S\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ if (fj_float() >= 0) { ++ SetFPUFloatResult(fd_reg(), std::sqrt(fj_float())); ++ } else { ++ SetFPUFloatResult(fd_reg(), std::sqrt(-1)); // qnan ++ set_fcsr_bit(kFCSRInvalidOpFlagBit, true); ++ } ++ break; ++ } ++ case FSQRT_D: { ++ printf_instr("FSQRT_D\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ if (fj_double() >= 0) { ++ SetFPUDoubleResult(fd_reg(), std::sqrt(fj_double())); ++ } else { ++ SetFPUDoubleResult(fd_reg(), std::sqrt(-1)); // qnan ++ set_fcsr_bit(kFCSRInvalidOpFlagBit, true); ++ } ++ break; ++ } ++ case FMOV_S: ++ printf_instr("FMOV_S\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ SetFPUFloatResult(fd_reg(), fj_float()); ++ break; ++ case FMOV_D: ++ printf_instr("FMOV_D\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_float(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ SetFPUDoubleResult(fd_reg(), fj_double()); ++ break; ++ case MOVGR2FR_W: { ++ printf_instr("MOVGR2FR_W\t %s: %016f, %s, %016lx\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ Registers::Name(rj_reg()), rj()); ++ set_fpu_register_word(fd_reg(), static_cast(rj())); ++ TraceRegWr(get_fpu_register(fd_reg()), FLOAT_DOUBLE); ++ break; ++ } ++ case MOVGR2FR_D: ++ printf_instr("MOVGR2FR_D\t %s: %016f, %s, %016lx\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ Registers::Name(rj_reg()), rj()); ++ SetFPUResult2(fd_reg(), rj()); ++ break; ++ case MOVGR2FRH_W: { ++ printf_instr("MOVGR2FRH_W\t %s: %016f, %s, %016lx\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ Registers::Name(rj_reg()), rj()); ++ set_fpu_register_hi_word(fd_reg(), static_cast(rj())); ++ TraceRegWr(get_fpu_register(fd_reg()), DOUBLE); ++ break; ++ } ++ case MOVFR2GR_S: { ++ printf_instr("MOVFR2GR_S\t %s: %016lx, %s, %016f\n", ++ Registers::Name(rd_reg()), rd(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ set_register(rd_reg(), ++ static_cast(get_fpu_register_word(fj_reg()))); ++ TraceRegWr(get_register(rd_reg()), WORD_DWORD); ++ break; ++ } ++ case MOVFR2GR_D: ++ printf_instr("MOVFR2GR_D\t %s: %016lx, %s, %016f\n", ++ Registers::Name(rd_reg()), rd(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ SetResult(rd_reg(), get_fpu_register(fj_reg())); ++ break; ++ case MOVFRH2GR_S: ++ printf_instr("MOVFRH2GR_S\t %s: %016lx, %s, %016f\n", ++ Registers::Name(rd_reg()), rd(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ SetResult(rd_reg(), get_fpu_register_hi_word(fj_reg())); ++ break; ++ case MOVGR2FCSR: { ++ printf_instr("MOVGR2FCSR\t fcsr: %016x, %s, %016lx\n", FCSR_, ++ Registers::Name(rj_reg()), rj()); ++ // fcsr could be 0-3 ++ CHECK_LT(rd_reg(), 4); ++ FCSR_ = static_cast(rj()); ++ TraceRegWr(FCSR_); ++ break; ++ } ++ case MOVFCSR2GR: { ++ printf_instr("MOVFCSR2GR\t %s, %016lx, FCSR: %016x\n", ++ Registers::Name(rd_reg()), rd(), FCSR_); ++ // fcsr could be 0-3 ++ CHECK_LT(rj_reg(), 4); ++ SetResult(rd_reg(), FCSR_); ++ break; ++ } ++ case FCVT_S_D: ++ printf_instr("FCVT_S_D\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ SetFPUFloatResult(fd_reg(), static_cast(fj_double())); ++ break; ++ case FCVT_D_S: ++ printf_instr("FCVT_D_S\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ SetFPUDoubleResult(fd_reg(), static_cast(fj_float())); ++ break; ++ case FTINTRM_W_S: { ++ printf_instr("FTINTRM_W_S\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ float fj = fj_float(); ++ float rounded = std::floor(fj); ++ int32_t result = static_cast(rounded); ++ SetFPUWordResult(fd_reg(), result); ++ if (set_fcsr_round_error(fj, rounded)) { ++ set_fpu_register_word_invalid_result(fj, rounded); ++ } ++ break; ++ } ++ case FTINTRM_W_D: { ++ printf_instr("FTINTRM_W_D\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ double fj = fj_double(); ++ double rounded = std::floor(fj); ++ int32_t result = static_cast(rounded); ++ SetFPUWordResult(fd_reg(), result); ++ if (set_fcsr_round_error(fj, rounded)) { ++ set_fpu_register_invalid_result(fj, rounded); ++ } ++ break; ++ } ++ case FTINTRM_L_S: { ++ printf_instr("FTINTRM_L_S\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ float fj = fj_float(); ++ float rounded = std::floor(fj); ++ int64_t result = static_cast(rounded); ++ SetFPUResult(fd_reg(), result); ++ if (set_fcsr_round64_error(fj, rounded)) { ++ set_fpu_register_invalid_result64(fj, rounded); ++ } ++ break; ++ } ++ case FTINTRM_L_D: { ++ printf_instr("FTINTRM_L_D\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ double fj = fj_double(); ++ double rounded = std::floor(fj); ++ int64_t result = static_cast(rounded); ++ SetFPUResult(fd_reg(), result); ++ if (set_fcsr_round64_error(fj, rounded)) { ++ set_fpu_register_invalid_result64(fj, rounded); ++ } ++ break; ++ } ++ case FTINTRP_W_S: { ++ printf_instr("FTINTRP_W_S\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ float fj = fj_float(); ++ float rounded = std::ceil(fj); ++ int32_t result = static_cast(rounded); ++ SetFPUWordResult(fd_reg(), result); ++ if (set_fcsr_round_error(fj, rounded)) { ++ set_fpu_register_word_invalid_result(fj, rounded); ++ } ++ break; ++ } ++ case FTINTRP_W_D: { ++ printf_instr("FTINTRP_W_D\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ double fj = fj_double(); ++ double rounded = std::ceil(fj); ++ int32_t result = static_cast(rounded); ++ SetFPUWordResult(fd_reg(), result); ++ if (set_fcsr_round_error(fj, rounded)) { ++ set_fpu_register_invalid_result(fj, rounded); ++ } ++ break; ++ } ++ case FTINTRP_L_S: { ++ printf_instr("FTINTRP_L_S\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ float fj = fj_float(); ++ float rounded = std::ceil(fj); ++ int64_t result = static_cast(rounded); ++ SetFPUResult(fd_reg(), result); ++ if (set_fcsr_round64_error(fj, rounded)) { ++ set_fpu_register_invalid_result64(fj, rounded); ++ } ++ break; ++ } ++ case FTINTRP_L_D: { ++ printf_instr("FTINTRP_L_D\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ double fj = fj_double(); ++ double rounded = std::ceil(fj); ++ int64_t result = static_cast(rounded); ++ SetFPUResult(fd_reg(), result); ++ if (set_fcsr_round64_error(fj, rounded)) { ++ set_fpu_register_invalid_result64(fj, rounded); ++ } ++ break; ++ } ++ case FTINTRZ_W_S: { ++ printf_instr("FTINTRZ_W_S\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ float fj = fj_float(); ++ float rounded = std::trunc(fj); ++ int32_t result = static_cast(rounded); ++ SetFPUWordResult(fd_reg(), result); ++ if (set_fcsr_round_error(fj, rounded)) { ++ set_fpu_register_word_invalid_result(fj, rounded); ++ } ++ break; ++ } ++ case FTINTRZ_W_D: { ++ printf_instr("FTINTRZ_W_D\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ double fj = fj_double(); ++ double rounded = std::trunc(fj); ++ int32_t result = static_cast(rounded); ++ SetFPUWordResult(fd_reg(), result); ++ if (set_fcsr_round_error(fj, rounded)) { ++ set_fpu_register_invalid_result(fj, rounded); ++ } ++ break; ++ } ++ case FTINTRZ_L_S: { ++ printf_instr("FTINTRZ_L_S\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ float fj = fj_float(); ++ float rounded = std::trunc(fj); ++ int64_t result = static_cast(rounded); ++ SetFPUResult(fd_reg(), result); ++ if (set_fcsr_round64_error(fj, rounded)) { ++ set_fpu_register_invalid_result64(fj, rounded); ++ } ++ break; ++ } ++ case FTINTRZ_L_D: { ++ printf_instr("FTINTRZ_L_D\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ double fj = fj_double(); ++ double rounded = std::trunc(fj); ++ int64_t result = static_cast(rounded); ++ SetFPUResult(fd_reg(), result); ++ if (set_fcsr_round64_error(fj, rounded)) { ++ set_fpu_register_invalid_result64(fj, rounded); ++ } ++ break; ++ } ++ case FTINTRNE_W_S: { ++ printf_instr("FTINTRNE_W_S\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ float fj = fj_float(); ++ float rounded = std::floor(fj + 0.5); ++ int32_t result = static_cast(rounded); ++ if ((result & 1) != 0 && result - fj == 0.5) { ++ // If the number is halfway between two integers, ++ // round to the even one. ++ result--; ++ } ++ SetFPUWordResult(fd_reg(), result); ++ if (set_fcsr_round_error(fj, rounded)) { ++ set_fpu_register_word_invalid_result(fj, rounded); ++ } ++ break; ++ } ++ case FTINTRNE_W_D: { ++ printf_instr("FTINTRNE_W_D\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ double fj = fj_double(); ++ double rounded = std::floor(fj + 0.5); ++ int32_t result = static_cast(rounded); ++ if ((result & 1) != 0 && result - fj == 0.5) { ++ // If the number is halfway between two integers, ++ // round to the even one. ++ result--; ++ } ++ SetFPUWordResult(fd_reg(), result); ++ if (set_fcsr_round_error(fj, rounded)) { ++ set_fpu_register_invalid_result(fj, rounded); ++ } ++ break; ++ } ++ case FTINTRNE_L_S: { ++ printf_instr("FTINTRNE_L_S\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ float fj = fj_float(); ++ float rounded = std::floor(fj + 0.5); ++ int64_t result = static_cast(rounded); ++ if ((result & 1) != 0 && result - fj == 0.5) { ++ // If the number is halfway between two integers, ++ // round to the even one. ++ result--; ++ } ++ SetFPUResult(fd_reg(), result); ++ if (set_fcsr_round64_error(fj, rounded)) { ++ set_fpu_register_invalid_result64(fj, rounded); ++ } ++ break; ++ } ++ case FTINTRNE_L_D: { ++ printf_instr("FTINTRNE_L_D\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ double fj = fj_double(); ++ double rounded = std::floor(fj + 0.5); ++ int64_t result = static_cast(rounded); ++ if ((result & 1) != 0 && result - fj == 0.5) { ++ // If the number is halfway between two integers, ++ // round to the even one. ++ result--; ++ } ++ SetFPUResult(fd_reg(), result); ++ if (set_fcsr_round64_error(fj, rounded)) { ++ set_fpu_register_invalid_result64(fj, rounded); ++ } ++ break; ++ } ++ case FTINT_W_S: { ++ printf_instr("FTINT_W_S\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ float fj = fj_float(); ++ float rounded; ++ int32_t result; ++ round_according_to_fcsr(fj, &rounded, &result); ++ SetFPUWordResult(fd_reg(), result); ++ if (set_fcsr_round_error(fj, rounded)) { ++ set_fpu_register_word_invalid_result(fj, rounded); ++ } ++ break; ++ } ++ case FTINT_W_D: { ++ printf_instr("FTINT_W_D\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ double fj = fj_double(); ++ double rounded; ++ int32_t result; ++ round_according_to_fcsr(fj, &rounded, &result); ++ SetFPUWordResult(fd_reg(), result); ++ if (set_fcsr_round_error(fj, rounded)) { ++ set_fpu_register_word_invalid_result(fj, rounded); ++ } ++ break; ++ } ++ case FTINT_L_S: { ++ printf_instr("FTINT_L_S\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ float fj = fj_float(); ++ float rounded; ++ int64_t result; ++ round64_according_to_fcsr(fj, &rounded, &result); ++ SetFPUResult(fd_reg(), result); ++ if (set_fcsr_round64_error(fj, rounded)) { ++ set_fpu_register_invalid_result64(fj, rounded); ++ } ++ break; ++ } ++ case FTINT_L_D: { ++ printf_instr("FTINT_L_D\t %s: %016f, %s, %016f\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ double fj = fj_double(); ++ double rounded; ++ int64_t result; ++ round64_according_to_fcsr(fj, &rounded, &result); ++ SetFPUResult(fd_reg(), result); ++ if (set_fcsr_round64_error(fj, rounded)) { ++ set_fpu_register_invalid_result64(fj, rounded); ++ } ++ break; ++ } ++ case FFINT_S_W: { ++ alu_out = get_fpu_register_signed_word(fj_reg()); ++ printf_instr("FFINT_S_W\t %s: %016f, %s, %016x\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), (int)alu_out); ++ SetFPUFloatResult(fd_reg(), static_cast(alu_out)); ++ break; ++ } ++ case FFINT_S_L: { ++ alu_out = get_fpu_register(fj_reg()); ++ printf_instr("FFINT_S_L\t %s: %016f, %s, %016lx\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), alu_out); ++ SetFPUFloatResult(fd_reg(), static_cast(alu_out)); ++ break; ++ } ++ case FFINT_D_W: { ++ alu_out = get_fpu_register_signed_word(fj_reg()); ++ printf_instr("FFINT_D_W\t %s: %016f, %s, %016x\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), (int)alu_out); ++ SetFPUDoubleResult(fd_reg(), static_cast(alu_out)); ++ break; ++ } ++ case FFINT_D_L: { ++ alu_out = get_fpu_register(fj_reg()); ++ printf_instr("FFINT_D_L\t %s: %016f, %s, %016lx\n", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), alu_out); ++ SetFPUDoubleResult(fd_reg(), static_cast(alu_out)); ++ break; ++ } ++ case FRINT_S: { ++ printf_instr("FRINT_S\t %s: %016f, %s, %016f mode : ", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_float()); ++ float fj = fj_float(); ++ float result, temp_result; ++ double temp; ++ float upper = std::ceil(fj); ++ float lower = std::floor(fj); ++ switch (get_fcsr_rounding_mode()) { ++ case kRoundToNearest: ++ printf_instr(" kRoundToNearest\n"); ++ if (upper - fj < fj - lower) { ++ result = upper; ++ } else if (upper - fj > fj - lower) { ++ result = lower; ++ } else { ++ temp_result = upper / 2; ++ float reminder = std::modf(temp_result, &temp); ++ if (reminder == 0) { ++ result = upper; ++ } else { ++ result = lower; ++ } ++ } ++ break; ++ case kRoundToZero: ++ printf_instr(" kRoundToZero\n"); ++ result = (fj > 0 ? lower : upper); ++ break; ++ case kRoundToPlusInf: ++ printf_instr(" kRoundToPlusInf\n"); ++ result = upper; ++ break; ++ case kRoundToMinusInf: ++ printf_instr(" kRoundToMinusInf\n"); ++ result = lower; ++ break; ++ } ++ SetFPUFloatResult(fd_reg(), result); ++ if (result != fj) { ++ set_fcsr_bit(kFCSRInexactFlagBit, true); ++ } ++ break; ++ } ++ case FRINT_D: { ++ printf_instr("FRINT_D\t %s: %016f, %s, %016f mode : ", ++ FPURegisters::Name(fd_reg()), fd_double(), ++ FPURegisters::Name(fj_reg()), fj_double()); ++ double fj = fj_double(); ++ double result, temp, temp_result; ++ double upper = std::ceil(fj); ++ double lower = std::floor(fj); ++ switch (get_fcsr_rounding_mode()) { ++ case kRoundToNearest: ++ printf_instr(" kRoundToNearest\n"); ++ if (upper - fj < fj - lower) { ++ result = upper; ++ } else if (upper - fj > fj - lower) { ++ result = lower; ++ } else { ++ temp_result = upper / 2; ++ double reminder = std::modf(temp_result, &temp); ++ if (reminder == 0) { ++ result = upper; ++ } else { ++ result = lower; ++ } ++ } ++ break; ++ case kRoundToZero: ++ printf_instr(" kRoundToZero\n"); ++ result = (fj > 0 ? lower : upper); ++ break; ++ case kRoundToPlusInf: ++ printf_instr(" kRoundToPlusInf\n"); ++ result = upper; ++ break; ++ case kRoundToMinusInf: ++ printf_instr(" kRoundToMinusInf\n"); ++ result = lower; ++ break; ++ } ++ SetFPUDoubleResult(fd_reg(), result); ++ if (result != fj) { ++ set_fcsr_bit(kFCSRInexactFlagBit, true); ++ } ++ break; ++ } ++ case MOVFR2CF: ++ printf("Sim UNIMPLEMENTED: MOVFR2CF\n"); ++ UNIMPLEMENTED(); ++ break; ++ case MOVCF2FR: ++ printf("Sim UNIMPLEMENTED: MOVCF2FR\n"); ++ UNIMPLEMENTED(); ++ break; ++ case MOVGR2CF: ++ printf_instr("MOVGR2CF\t FCC%d, %s: %016lx\n", cd_reg(), ++ Registers::Name(rj_reg()), rj()); ++ set_cf_register(cd_reg(), rj() & 1); ++ break; ++ case MOVCF2GR: ++ printf_instr("MOVCF2GR\t %s: %016lx, FCC%d\n", Registers::Name(rd_reg()), ++ rd(), cj_reg()); ++ SetResult(rd_reg(), cj()); ++ break; ++ case FRECIP_S: ++ printf("Sim UNIMPLEMENTED: FRECIP_S\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FRECIP_D: ++ printf("Sim UNIMPLEMENTED: FRECIP_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FRSQRT_S: ++ printf("Sim UNIMPLEMENTED: FRSQRT_S\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FRSQRT_D: ++ printf("Sim UNIMPLEMENTED: FRSQRT_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FCLASS_S: ++ printf("Sim UNIMPLEMENTED: FCLASS_S\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FCLASS_D: ++ printf("Sim UNIMPLEMENTED: FCLASS_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FLOGB_S: ++ printf("Sim UNIMPLEMENTED: FLOGB_S\n"); ++ UNIMPLEMENTED(); ++ break; ++ case FLOGB_D: ++ printf("Sim UNIMPLEMENTED: FLOGB_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case CLO_W: ++ printf("Sim UNIMPLEMENTED: CLO_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case CTO_W: ++ printf("Sim UNIMPLEMENTED: CTO_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case CLO_D: ++ printf("Sim UNIMPLEMENTED: CLO_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case CTO_D: ++ printf("Sim UNIMPLEMENTED: CTO_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case IOCSRRD_B: ++ printf("Sim UNIMPLEMENTED: IOCSRRD_B\n"); ++ UNIMPLEMENTED(); ++ break; ++ case IOCSRRD_H: ++ printf("Sim UNIMPLEMENTED: IOCSRRD_H\n"); ++ UNIMPLEMENTED(); ++ break; ++ case IOCSRRD_W: ++ printf("Sim UNIMPLEMENTED: IOCSRRD_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case IOCSRRD_D: ++ printf("Sim UNIMPLEMENTED: IOCSRRD_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case IOCSRWR_B: ++ printf("Sim UNIMPLEMENTED: IOCSRWR_B\n"); ++ UNIMPLEMENTED(); ++ break; ++ case IOCSRWR_H: ++ printf("Sim UNIMPLEMENTED: IOCSRWR_H\n"); ++ UNIMPLEMENTED(); ++ break; ++ case IOCSRWR_W: ++ printf("Sim UNIMPLEMENTED: IOCSRWR_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case IOCSRWR_D: ++ printf("Sim UNIMPLEMENTED: IOCSRWR_D\n"); ++ UNIMPLEMENTED(); ++ break; ++ case TLBINV: ++ printf("Sim UNIMPLEMENTED: TLBINV\n"); ++ UNIMPLEMENTED(); ++ break; ++ case TLBFLUSH: ++ printf("Sim UNIMPLEMENTED: TLBFLUSH\n"); ++ UNIMPLEMENTED(); ++ break; ++ case TLBP: ++ printf("Sim UNIMPLEMENTED: TLBP\n"); ++ UNIMPLEMENTED(); ++ break; ++ case TLBR: ++ printf("Sim UNIMPLEMENTED: TLBR\n"); ++ UNIMPLEMENTED(); ++ break; ++ case TLBWI: ++ printf("Sim UNIMPLEMENTED: TLBWI\n"); ++ UNIMPLEMENTED(); ++ break; ++ case TLBWR: ++ printf("Sim UNIMPLEMENTED: TLBWR\n"); ++ UNIMPLEMENTED(); ++ break; ++ case ERET: ++ printf("Sim UNIMPLEMENTED: ERET\n"); ++ UNIMPLEMENTED(); ++ break; ++ case RDTIMEL_W: ++ printf("Sim UNIMPLEMENTED: RDTIMEL_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case RDTIMEH_W: ++ printf("Sim UNIMPLEMENTED: RDTIMEH_W\n"); ++ UNIMPLEMENTED(); ++ break; ++ case RDTIME_D: ++ printf("Sim UNIMPLEMENTED: RDTIME_D\n"); ++ // case CPUCFG: ++ // TODO ++ UNIMPLEMENTED(); ++ break; ++ // Unimplemented opcodes raised an error in the configuration step before, ++ // so we can use the default here to set the destination register in common ++ // cases. ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++// Executes the current instruction. ++void Simulator::InstructionDecode(Instruction* instr) { ++ if (v8::internal::FLAG_check_icache) { ++ CheckICache(i_cache(), instr); ++ } ++ pc_modified_ = false; ++ ++ v8::internal::EmbeddedVector buffer; ++ ++ if (::v8::internal::FLAG_trace_sim) { ++ SNPrintF(trace_buf_, " "); ++ disasm::NameConverter converter; ++ disasm::Disassembler dasm(converter); ++ // Use a reasonably large buffer. ++ dasm.InstructionDecode(buffer, reinterpret_cast(instr)); ++ } ++ ++ static int instr_count = 0; ++ USE(instr_count); ++ instr_ = instr; ++ printf_instr("\nInstr%3d: %08x, PC: %016lx\t", instr_count++, ++ instr_.Bits(31, 0), get_pc()); ++ switch (instr_.InstructionType()) { ++ case Instruction::kOp6Type: ++ DecodeTypeOp6(); ++ break; ++ case Instruction::kOp7Type: ++ DecodeTypeOp7(); ++ break; ++ case Instruction::kOp8Type: ++ DecodeTypeOp8(); ++ break; ++ case Instruction::kOp10Type: ++ DecodeTypeOp10(); ++ break; ++ case Instruction::kOp12Type: ++ DecodeTypeOp12(); ++ break; ++ case Instruction::kOp14Type: ++ DecodeTypeOp14(); ++ break; ++ case Instruction::kOp17Type: ++ DecodeTypeOp17(); ++ break; ++ case Instruction::kOp22Type: ++ DecodeTypeOp22(); ++ break; ++ default: { ++ printf("instr_: %x\n", instr_.Bits(31, 0)); ++ UNREACHABLE(); ++ } ++ } ++ ++ if (::v8::internal::FLAG_trace_sim) { ++ PrintF(" 0x%08" PRIxPTR " %-44s %s\n", ++ reinterpret_cast(instr), buffer.begin(), ++ trace_buf_.begin()); ++ } ++ ++ if (!pc_modified_) { ++ set_register(pc, reinterpret_cast(instr) + kInstrSize); ++ } ++} ++ ++void Simulator::Execute() { ++ // Get the PC to simulate. Cannot use the accessor here as we need the ++ // raw PC value and not the one used as input to arithmetic instructions. ++ int64_t program_counter = get_pc(); ++ if (::v8::internal::FLAG_stop_sim_at == 0) { ++ // Fast version of the dispatch loop without checking whether the simulator ++ // should be stopping at a particular executed instruction. ++ while (program_counter != end_sim_pc) { ++ Instruction* instr = reinterpret_cast(program_counter); ++ icount_++; ++ InstructionDecode(instr); ++ program_counter = get_pc(); ++ } ++ } else { ++ // FLAG_stop_sim_at is at the non-default value. Stop in the debugger when ++ // we reach the particular instruction count. ++ while (program_counter != end_sim_pc) { ++ Instruction* instr = reinterpret_cast(program_counter); ++ icount_++; ++ if (icount_ == static_cast(::v8::internal::FLAG_stop_sim_at)) { ++ La64Debugger dbg(this); ++ dbg.Debug(); ++ } else { ++ InstructionDecode(instr); ++ } ++ program_counter = get_pc(); ++ } ++ } ++} ++ ++void Simulator::CallInternal(Address entry) { ++ // Adjust JS-based stack limit to C-based stack limit. ++ isolate_->stack_guard()->AdjustStackLimitForSimulator(); ++ ++ // Prepare to execute the code at entry. ++ set_register(pc, static_cast(entry)); ++ // Put down marker for end of simulation. The simulator will stop simulation ++ // when the PC reaches this value. By saving the "end simulation" value into ++ // the LR the simulation stops when returning to this call point. ++ set_register(ra, end_sim_pc); ++ ++ // Remember the values of callee-saved registers. ++ int64_t s0_val = get_register(s0); ++ int64_t s1_val = get_register(s1); ++ int64_t s2_val = get_register(s2); ++ int64_t s3_val = get_register(s3); ++ int64_t s4_val = get_register(s4); ++ int64_t s5_val = get_register(s5); ++ int64_t s6_val = get_register(s6); ++ int64_t s7_val = get_register(s7); ++ int64_t s8_val = get_register(s8); ++ int64_t gp_val = get_register(gp); ++ int64_t sp_val = get_register(sp); ++ int64_t tp_val = get_register(tp); ++ int64_t fp_val = get_register(fp); ++ ++ // Set up the callee-saved registers with a known value. To be able to check ++ // that they are preserved properly across JS execution. ++ int64_t callee_saved_value = icount_; ++ set_register(s0, callee_saved_value); ++ set_register(s1, callee_saved_value); ++ set_register(s2, callee_saved_value); ++ set_register(s3, callee_saved_value); ++ set_register(s4, callee_saved_value); ++ set_register(s5, callee_saved_value); ++ set_register(s6, callee_saved_value); ++ set_register(s7, callee_saved_value); ++ set_register(s8, callee_saved_value); ++ set_register(gp, callee_saved_value); ++ set_register(tp, callee_saved_value); ++ set_register(fp, callee_saved_value); ++ ++ // Start the simulation. ++ Execute(); ++ ++ // Check that the callee-saved registers have been preserved. ++ CHECK_EQ(callee_saved_value, get_register(s0)); ++ CHECK_EQ(callee_saved_value, get_register(s1)); ++ CHECK_EQ(callee_saved_value, get_register(s2)); ++ CHECK_EQ(callee_saved_value, get_register(s3)); ++ CHECK_EQ(callee_saved_value, get_register(s4)); ++ CHECK_EQ(callee_saved_value, get_register(s5)); ++ CHECK_EQ(callee_saved_value, get_register(s6)); ++ CHECK_EQ(callee_saved_value, get_register(s7)); ++ CHECK_EQ(callee_saved_value, get_register(s8)); ++ CHECK_EQ(callee_saved_value, get_register(gp)); ++ CHECK_EQ(callee_saved_value, get_register(tp)); ++ CHECK_EQ(callee_saved_value, get_register(fp)); ++ ++ // Restore callee-saved registers with the original value. ++ set_register(s0, s0_val); ++ set_register(s1, s1_val); ++ set_register(s2, s2_val); ++ set_register(s3, s3_val); ++ set_register(s4, s4_val); ++ set_register(s5, s5_val); ++ set_register(s6, s6_val); ++ set_register(s7, s7_val); ++ set_register(s8, s8_val); ++ set_register(gp, gp_val); ++ set_register(sp, sp_val); ++ set_register(tp, tp_val); ++ set_register(fp, fp_val); ++} ++ ++intptr_t Simulator::CallImpl(Address entry, int argument_count, ++ const intptr_t* arguments) { ++ constexpr int kRegisterPassedArguments = 8; ++ // Set up arguments. ++ ++ int reg_arg_count = std::min(kRegisterPassedArguments, argument_count); ++ if (reg_arg_count > 0) set_register(a0, arguments[0]); ++ if (reg_arg_count > 1) set_register(a1, arguments[1]); ++ if (reg_arg_count > 2) set_register(a2, arguments[2]); ++ if (reg_arg_count > 3) set_register(a3, arguments[3]); ++ if (reg_arg_count > 4) set_register(a4, arguments[4]); ++ if (reg_arg_count > 5) set_register(a5, arguments[5]); ++ if (reg_arg_count > 6) set_register(a6, arguments[6]); ++ if (reg_arg_count > 7) set_register(a7, arguments[7]); ++ ++ // Remaining arguments passed on stack. ++ int64_t original_stack = get_register(sp); ++ // Compute position of stack on entry to generated code. ++ int stack_args_count = argument_count - reg_arg_count; ++ int stack_args_size = stack_args_count * sizeof(*arguments) + kCArgsSlotsSize; ++ int64_t entry_stack = original_stack - stack_args_size; ++ ++ if (base::OS::ActivationFrameAlignment() != 0) { ++ entry_stack &= -base::OS::ActivationFrameAlignment(); ++ } ++ // Store remaining arguments on stack, from low to high memory. ++ intptr_t* stack_argument = reinterpret_cast(entry_stack); ++ memcpy(stack_argument + kCArgSlotCount, arguments + reg_arg_count, ++ stack_args_count * sizeof(*arguments)); ++ set_register(sp, entry_stack); ++ ++ CallInternal(entry); ++ ++ // Pop stack passed arguments. ++ CHECK_EQ(entry_stack, get_register(sp)); ++ set_register(sp, original_stack); ++ ++ return get_register(v0); ++} ++ ++double Simulator::CallFP(Address entry, double d0, double d1) { ++ const FPURegister fparg2 = f1; ++ set_fpu_register_double(f0, d0); ++ set_fpu_register_double(fparg2, d1); ++ CallInternal(entry); ++ return get_fpu_register_double(f0); ++} ++ ++uintptr_t Simulator::PushAddress(uintptr_t address) { ++ int64_t new_sp = get_register(sp) - sizeof(uintptr_t); ++ uintptr_t* stack_slot = reinterpret_cast(new_sp); ++ *stack_slot = address; ++ set_register(sp, new_sp); ++ return new_sp; ++} ++ ++uintptr_t Simulator::PopAddress() { ++ int64_t current_sp = get_register(sp); ++ uintptr_t* stack_slot = reinterpret_cast(current_sp); ++ uintptr_t address = *stack_slot; ++ set_register(sp, current_sp + sizeof(uintptr_t)); ++ return address; ++} ++ ++Simulator::LocalMonitor::LocalMonitor() ++ : access_state_(MonitorAccess::Open), ++ tagged_addr_(0), ++ size_(TransactionSize::None) {} ++ ++void Simulator::LocalMonitor::Clear() { ++ access_state_ = MonitorAccess::Open; ++ tagged_addr_ = 0; ++ size_ = TransactionSize::None; ++} ++ ++void Simulator::LocalMonitor::NotifyLoad() { ++ if (access_state_ == MonitorAccess::RMW) { ++ // A non linked load could clear the local monitor. As a result, it's ++ // most strict to unconditionally clear the local monitor on load. ++ Clear(); ++ } ++} ++ ++void Simulator::LocalMonitor::NotifyLoadLinked(uintptr_t addr, ++ TransactionSize size) { ++ access_state_ = MonitorAccess::RMW; ++ tagged_addr_ = addr; ++ size_ = size; ++} ++ ++void Simulator::LocalMonitor::NotifyStore() { ++ if (access_state_ == MonitorAccess::RMW) { ++ // A non exclusive store could clear the local monitor. As a result, it's ++ // most strict to unconditionally clear the local monitor on store. ++ Clear(); ++ } ++} ++ ++bool Simulator::LocalMonitor::NotifyStoreConditional(uintptr_t addr, ++ TransactionSize size) { ++ if (access_state_ == MonitorAccess::RMW) { ++ if (addr == tagged_addr_ && size_ == size) { ++ Clear(); ++ return true; ++ } else { ++ return false; ++ } ++ } else { ++ DCHECK(access_state_ == MonitorAccess::Open); ++ return false; ++ } ++} ++ ++Simulator::GlobalMonitor::LinkedAddress::LinkedAddress() ++ : access_state_(MonitorAccess::Open), ++ tagged_addr_(0), ++ next_(nullptr), ++ prev_(nullptr), ++ failure_counter_(0) {} ++ ++void Simulator::GlobalMonitor::LinkedAddress::Clear_Locked() { ++ access_state_ = MonitorAccess::Open; ++ tagged_addr_ = 0; ++} ++ ++void Simulator::GlobalMonitor::LinkedAddress::NotifyLoadLinked_Locked( ++ uintptr_t addr) { ++ access_state_ = MonitorAccess::RMW; ++ tagged_addr_ = addr; ++} ++ ++void Simulator::GlobalMonitor::LinkedAddress::NotifyStore_Locked() { ++ if (access_state_ == MonitorAccess::RMW) { ++ // A non exclusive store could clear the global monitor. As a result, it's ++ // most strict to unconditionally clear global monitors on store. ++ Clear_Locked(); ++ } ++} ++ ++bool Simulator::GlobalMonitor::LinkedAddress::NotifyStoreConditional_Locked( ++ uintptr_t addr, bool is_requesting_thread) { ++ if (access_state_ == MonitorAccess::RMW) { ++ if (is_requesting_thread) { ++ if (addr == tagged_addr_) { ++ Clear_Locked(); ++ // Introduce occasional sc/scd failures. This is to simulate the ++ // behavior of hardware, which can randomly fail due to background ++ // cache evictions. ++ if (failure_counter_++ >= kMaxFailureCounter) { ++ failure_counter_ = 0; ++ return false; ++ } else { ++ return true; ++ } ++ } ++ } else if ((addr & kExclusiveTaggedAddrMask) == ++ (tagged_addr_ & kExclusiveTaggedAddrMask)) { ++ // Check the masked addresses when responding to a successful lock by ++ // another thread so the implementation is more conservative (i.e. the ++ // granularity of locking is as large as possible.) ++ Clear_Locked(); ++ return false; ++ } ++ } ++ return false; ++} ++ ++void Simulator::GlobalMonitor::NotifyLoadLinked_Locked( ++ uintptr_t addr, LinkedAddress* linked_address) { ++ linked_address->NotifyLoadLinked_Locked(addr); ++ PrependProcessor_Locked(linked_address); ++} ++ ++void Simulator::GlobalMonitor::NotifyStore_Locked( ++ LinkedAddress* linked_address) { ++ // Notify each thread of the store operation. ++ for (LinkedAddress* iter = head_; iter; iter = iter->next_) { ++ iter->NotifyStore_Locked(); ++ } ++} ++ ++bool Simulator::GlobalMonitor::NotifyStoreConditional_Locked( ++ uintptr_t addr, LinkedAddress* linked_address) { ++ DCHECK(IsProcessorInLinkedList_Locked(linked_address)); ++ if (linked_address->NotifyStoreConditional_Locked(addr, true)) { ++ // Notify the other processors that this StoreConditional succeeded. ++ for (LinkedAddress* iter = head_; iter; iter = iter->next_) { ++ if (iter != linked_address) { ++ iter->NotifyStoreConditional_Locked(addr, false); ++ } ++ } ++ return true; ++ } else { ++ return false; ++ } ++} ++ ++bool Simulator::GlobalMonitor::IsProcessorInLinkedList_Locked( ++ LinkedAddress* linked_address) const { ++ return head_ == linked_address || linked_address->next_ || ++ linked_address->prev_; ++} ++ ++void Simulator::GlobalMonitor::PrependProcessor_Locked( ++ LinkedAddress* linked_address) { ++ if (IsProcessorInLinkedList_Locked(linked_address)) { ++ return; ++ } ++ ++ if (head_) { ++ head_->prev_ = linked_address; ++ } ++ linked_address->prev_ = nullptr; ++ linked_address->next_ = head_; ++ head_ = linked_address; ++} ++ ++void Simulator::GlobalMonitor::RemoveLinkedAddress( ++ LinkedAddress* linked_address) { ++ base::MutexGuard lock_guard(&mutex); ++ if (!IsProcessorInLinkedList_Locked(linked_address)) { ++ return; ++ } ++ ++ if (linked_address->prev_) { ++ linked_address->prev_->next_ = linked_address->next_; ++ } else { ++ head_ = linked_address->next_; ++ } ++ if (linked_address->next_) { ++ linked_address->next_->prev_ = linked_address->prev_; ++ } ++ linked_address->prev_ = nullptr; ++ linked_address->next_ = nullptr; ++} ++ ++#undef SScanF ++ ++} // namespace internal ++} // namespace v8 ++ ++#endif // USE_SIMULATOR +diff --git a/src/3rdparty/chromium/v8/src/execution/la64/simulator-la64.h b/src/3rdparty/chromium/v8/src/execution/la64/simulator-la64.h +new file mode 100644 +index 00000000000..de2d1b0d89a +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/execution/la64/simulator-la64.h +@@ -0,0 +1,646 @@ ++// Copyright 2020 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++// Declares a Simulator for loongisa instructions if we are not generating a ++// native loongisa binary. This Simulator allows us to run and debug loongisa ++// code generation on regular desktop machines. V8 calls into generated code via ++// the GeneratedCode wrapper, which will start execution in the Simulator or ++// forwards to the real entry on a loongisa HW platform. ++ ++#ifndef V8_EXECUTION_LA64_SIMULATOR_LA64_H_ ++#define V8_EXECUTION_LA64_SIMULATOR_LA64_H_ ++ ++// globals.h defines USE_SIMULATOR. ++#include "src/common/globals.h" ++ ++template ++int Compare(const T& a, const T& b) { ++ if (a == b) ++ return 0; ++ else if (a < b) ++ return -1; ++ else ++ return 1; ++} ++ ++// Returns the negative absolute value of its argument. ++template ::value>::type> ++T Nabs(T a) { ++ return a < 0 ? a : -a; ++} ++ ++#if defined(USE_SIMULATOR) ++// Running with a simulator. ++ ++#include "src/base/hashmap.h" ++#include "src/codegen/assembler.h" ++#include "src/codegen/la64/constants-la64.h" ++#include "src/execution/simulator-base.h" ++#include "src/utils/allocation.h" ++ ++namespace v8 { ++namespace internal { ++ ++// ----------------------------------------------------------------------------- ++// Utility functions ++ ++class CachePage { ++ public: ++ static const int LINE_VALID = 0; ++ static const int LINE_INVALID = 1; ++ ++ static const int kPageShift = 12; ++ static const int kPageSize = 1 << kPageShift; ++ static const int kPageMask = kPageSize - 1; ++ static const int kLineShift = 2; // The cache line is only 4 bytes right now. ++ static const int kLineLength = 1 << kLineShift; ++ static const int kLineMask = kLineLength - 1; ++ ++ CachePage() { memset(&validity_map_, LINE_INVALID, sizeof(validity_map_)); } ++ ++ char* ValidityByte(int offset) { ++ return &validity_map_[offset >> kLineShift]; ++ } ++ ++ char* CachedData(int offset) { return &data_[offset]; } ++ ++ private: ++ char data_[kPageSize]; // The cached data. ++ static const int kValidityMapSize = kPageSize >> kLineShift; ++ char validity_map_[kValidityMapSize]; // One byte per line. ++}; ++ ++class SimInstructionBase : public InstructionBase { ++ public: ++ Type InstructionType() const { return type_; } ++ inline Instruction* instr() const { return instr_; } ++ inline int32_t operand() const { return operand_; } ++ ++ protected: ++ SimInstructionBase() : operand_(-1), instr_(nullptr), type_(kUnsupported) {} ++ explicit SimInstructionBase(Instruction* instr) {} ++ ++ int32_t operand_; ++ Instruction* instr_; ++ Type type_; ++ ++ private: ++ DISALLOW_ASSIGN(SimInstructionBase); ++}; ++ ++class SimInstruction : public InstructionGetters { ++ public: ++ SimInstruction() {} ++ ++ explicit SimInstruction(Instruction* instr) { *this = instr; } ++ ++ SimInstruction& operator=(Instruction* instr) { ++ operand_ = *reinterpret_cast(instr); ++ instr_ = instr; ++ type_ = InstructionBase::InstructionType(); ++ DCHECK(reinterpret_cast(&operand_) == this); ++ return *this; ++ } ++}; ++ ++class Simulator : public SimulatorBase { ++ public: ++ friend class La64Debugger; ++ ++ // Registers are declared in order. ++ enum Register { ++ no_reg = -1, ++ zero_reg = 0, ++ ra, ++ gp, ++ sp, ++ a0, ++ a1, ++ a2, ++ a3, ++ a4, ++ a5, ++ a6, ++ a7, ++ t0, ++ t1, ++ t2, ++ t3, ++ t4, ++ t5, ++ t6, ++ t7, ++ t8, ++ tp, ++ fp, ++ s0, ++ s1, ++ s2, ++ s3, ++ s4, ++ s5, ++ s6, ++ s7, ++ s8, ++ pc, // pc must be the last register. ++ kNumSimuRegisters, ++ // aliases ++ v0 = a0, ++ v1 = a1 ++ }; ++ ++ // Condition flag registers. ++ enum CFRegister { ++ fcc0, ++ fcc1, ++ fcc2, ++ fcc3, ++ fcc4, ++ fcc5, ++ fcc6, ++ fcc7, ++ kNumCFRegisters ++ }; ++ ++ // Floating point registers. ++ enum FPURegister { ++ f0, ++ f1, ++ f2, ++ f3, ++ f4, ++ f5, ++ f6, ++ f7, ++ f8, ++ f9, ++ f10, ++ f11, ++ f12, ++ f13, ++ f14, ++ f15, ++ f16, ++ f17, ++ f18, ++ f19, ++ f20, ++ f21, ++ f22, ++ f23, ++ f24, ++ f25, ++ f26, ++ f27, ++ f28, ++ f29, ++ f30, ++ f31, ++ kNumFPURegisters ++ }; ++ ++ explicit Simulator(Isolate* isolate); ++ ~Simulator(); ++ ++ // The currently executing Simulator instance. Potentially there can be one ++ // for each native thread. ++ V8_EXPORT_PRIVATE static Simulator* current(v8::internal::Isolate* isolate); ++ ++ // Accessors for register state. Reading the pc value adheres to the LA64 ++ // architecture specification and is off by a 8 from the currently executing ++ // instruction. ++ void set_register(int reg, int64_t value); ++ void set_register_word(int reg, int32_t value); ++ void set_dw_register(int dreg, const int* dbl); ++ int64_t get_register(int reg) const; ++ double get_double_from_register_pair(int reg); ++ // Same for FPURegisters. ++ void set_fpu_register(int fpureg, int64_t value); ++ void set_fpu_register_word(int fpureg, int32_t value); ++ void set_fpu_register_hi_word(int fpureg, int32_t value); ++ void set_fpu_register_float(int fpureg, float value); ++ void set_fpu_register_double(int fpureg, double value); ++ void set_fpu_register_invalid_result64(float original, float rounded); ++ void set_fpu_register_invalid_result(float original, float rounded); ++ void set_fpu_register_word_invalid_result(float original, float rounded); ++ void set_fpu_register_invalid_result64(double original, double rounded); ++ void set_fpu_register_invalid_result(double original, double rounded); ++ void set_fpu_register_word_invalid_result(double original, double rounded); ++ int64_t get_fpu_register(int fpureg) const; ++ int32_t get_fpu_register_word(int fpureg) const; ++ int32_t get_fpu_register_signed_word(int fpureg) const; ++ int32_t get_fpu_register_hi_word(int fpureg) const; ++ float get_fpu_register_float(int fpureg) const; ++ double get_fpu_register_double(int fpureg) const; ++ void set_cf_register(int cfreg, bool value); ++ bool get_cf_register(int cfreg) const; ++ void set_fcsr_rounding_mode(FPURoundingMode mode); ++ unsigned int get_fcsr_rounding_mode(); ++ void set_fcsr_bit(uint32_t cc, bool value); ++ bool test_fcsr_bit(uint32_t cc); ++ bool set_fcsr_round_error(double original, double rounded); ++ bool set_fcsr_round64_error(double original, double rounded); ++ bool set_fcsr_round_error(float original, float rounded); ++ bool set_fcsr_round64_error(float original, float rounded); ++ void round_according_to_fcsr(double toRound, double* rounded, ++ int32_t* rounded_int); ++ void round64_according_to_fcsr(double toRound, double* rounded, ++ int64_t* rounded_int); ++ void round_according_to_fcsr(float toRound, float* rounded, ++ int32_t* rounded_int); ++ void round64_according_to_fcsr(float toRound, float* rounded, ++ int64_t* rounded_int); ++ // Special case of set_register and get_register to access the raw PC value. ++ void set_pc(int64_t value); ++ int64_t get_pc() const; ++ ++ Address get_sp() const { return static_cast
(get_register(sp)); } ++ ++ // Accessor to the internal simulator stack area. ++ uintptr_t StackLimit(uintptr_t c_limit) const; ++ ++ // Executes LA64 instructions until the PC reaches end_sim_pc. ++ void Execute(); ++ ++ template ++ Return Call(Address entry, Args... args) { ++ return VariadicCall(this, &Simulator::CallImpl, entry, args...); ++ } ++ ++ // Alternative: call a 2-argument double function. ++ double CallFP(Address entry, double d0, double d1); ++ ++ // Push an address onto the JS stack. ++ uintptr_t PushAddress(uintptr_t address); ++ ++ // Pop an address from the JS stack. ++ uintptr_t PopAddress(); ++ ++ // Debugger input. ++ void set_last_debugger_input(char* input); ++ char* last_debugger_input() { return last_debugger_input_; } ++ ++ // Redirection support. ++ static void SetRedirectInstruction(Instruction* instruction); ++ ++ // ICache checking. ++ static bool ICacheMatch(void* one, void* two); ++ static void FlushICache(base::CustomMatcherHashMap* i_cache, void* start, ++ size_t size); ++ ++ // Returns true if pc register contains one of the 'special_values' defined ++ // below (bad_ra, end_sim_pc). ++ bool has_bad_pc() const; ++ ++ private: ++ enum special_values { ++ // Known bad pc value to ensure that the simulator does not execute ++ // without being properly setup. ++ bad_ra = -1, ++ // A pc value used to signal the simulator to stop execution. Generally ++ // the ra is set to this value on transition from native C code to ++ // simulated execution, so that the simulator can "return" to the native ++ // C code. ++ end_sim_pc = -2, ++ // Unpredictable value. ++ Unpredictable = 0xbadbeaf ++ }; ++ ++ V8_EXPORT_PRIVATE intptr_t CallImpl(Address entry, int argument_count, ++ const intptr_t* arguments); ++ ++ // Unsupported instructions use Format to print an error and stop execution. ++ void Format(Instruction* instr, const char* format); ++ ++ // Helpers for data value tracing. ++ enum TraceType { ++ BYTE, ++ HALF, ++ WORD, ++ DWORD, ++ FLOAT, ++ DOUBLE, ++ FLOAT_DOUBLE, ++ WORD_DWORD ++ }; ++ ++ // Read and write memory. ++ inline uint32_t ReadBU(int64_t addr); ++ inline int32_t ReadB(int64_t addr); ++ inline void WriteB(int64_t addr, uint8_t value); ++ inline void WriteB(int64_t addr, int8_t value); ++ ++ inline uint16_t ReadHU(int64_t addr, Instruction* instr); ++ inline int16_t ReadH(int64_t addr, Instruction* instr); ++ // Note: Overloaded on the sign of the value. ++ inline void WriteH(int64_t addr, uint16_t value, Instruction* instr); ++ inline void WriteH(int64_t addr, int16_t value, Instruction* instr); ++ ++ inline uint32_t ReadWU(int64_t addr, Instruction* instr); ++ inline int32_t ReadW(int64_t addr, Instruction* instr, TraceType t = WORD); ++ inline void WriteW(int64_t addr, int32_t value, Instruction* instr); ++ void WriteConditionalW(int64_t addr, int32_t value, Instruction* instr, ++ int32_t rt_reg); ++ inline int64_t Read2W(int64_t addr, Instruction* instr); ++ inline void Write2W(int64_t addr, int64_t value, Instruction* instr); ++ inline void WriteConditional2W(int64_t addr, int64_t value, ++ Instruction* instr, int32_t rt_reg); ++ ++ inline double ReadD(int64_t addr, Instruction* instr); ++ inline void WriteD(int64_t addr, double value, Instruction* instr); ++ ++ template ++ T ReadMem(int64_t addr, Instruction* instr); ++ template ++ void WriteMem(int64_t addr, T value, Instruction* instr); ++ ++ // Helper for debugging memory access. ++ inline void DieOrDebug(); ++ ++ void TraceRegWr(int64_t value, TraceType t = DWORD); ++ void TraceMemWr(int64_t addr, int64_t value, TraceType t); ++ void TraceMemRd(int64_t addr, int64_t value, TraceType t = DWORD); ++ template ++ void TraceMemRd(int64_t addr, T value); ++ template ++ void TraceMemWr(int64_t addr, T value); ++ ++ SimInstruction instr_; ++ ++ // Executing is handled based on the instruction type. ++ void DecodeTypeOp6(); ++ void DecodeTypeOp7(); ++ void DecodeTypeOp8(); ++ void DecodeTypeOp10(); ++ void DecodeTypeOp12(); ++ void DecodeTypeOp14(); ++ void DecodeTypeOp17(); ++ void DecodeTypeOp22(); ++ ++ inline int32_t rj_reg() const { return instr_.RjValue(); } ++ inline int64_t rj() const { return get_register(rj_reg()); } ++ inline uint64_t rj_u() const { ++ return static_cast(get_register(rj_reg())); ++ } ++ inline int32_t rk_reg() const { return instr_.RkValue(); } ++ inline int64_t rk() const { return get_register(rk_reg()); } ++ inline uint64_t rk_u() const { ++ return static_cast(get_register(rk_reg())); ++ } ++ inline int32_t rd_reg() const { return instr_.RdValue(); } ++ inline int64_t rd() const { return get_register(rd_reg()); } ++ inline uint64_t rd_u() const { ++ return static_cast(get_register(rd_reg())); ++ } ++ inline int32_t fa_reg() const { return instr_.FaValue(); } ++ inline float fa_float() const { return get_fpu_register_float(fa_reg()); } ++ inline double fa_double() const { return get_fpu_register_double(fa_reg()); } ++ inline int32_t fj_reg() const { return instr_.FjValue(); } ++ inline float fj_float() const { return get_fpu_register_float(fj_reg()); } ++ inline double fj_double() const { return get_fpu_register_double(fj_reg()); } ++ inline int32_t fk_reg() const { return instr_.FkValue(); } ++ inline float fk_float() const { return get_fpu_register_float(fk_reg()); } ++ inline double fk_double() const { return get_fpu_register_double(fk_reg()); } ++ inline int32_t fd_reg() const { return instr_.FdValue(); } ++ inline float fd_float() const { return get_fpu_register_float(fd_reg()); } ++ inline double fd_double() const { return get_fpu_register_double(fd_reg()); } ++ inline int32_t cj_reg() const { return instr_.CjValue(); } ++ inline bool cj() const { return get_cf_register(cj_reg()); } ++ inline int32_t cd_reg() const { return instr_.CdValue(); } ++ inline bool cd() const { return get_cf_register(cd_reg()); } ++ inline int32_t ca_reg() const { return instr_.CaValue(); } ++ inline bool ca() const { return get_cf_register(ca_reg()); } ++ inline uint32_t sa2() const { return instr_.Sa2Value(); } ++ inline uint32_t sa3() const { return instr_.Sa3Value(); } ++ inline uint32_t ui5() const { return instr_.Ui5Value(); } ++ inline uint32_t ui6() const { return instr_.Ui6Value(); } ++ inline uint32_t lsbw() const { return instr_.LsbwValue(); } ++ inline uint32_t msbw() const { return instr_.MsbwValue(); } ++ inline uint32_t lsbd() const { return instr_.LsbdValue(); } ++ inline uint32_t msbd() const { return instr_.MsbdValue(); } ++ inline uint32_t cond() const { return instr_.CondValue(); } ++ inline int32_t si12() const { return (instr_.Si12Value() << 20) >> 20; } ++ inline uint32_t ui12() const { return instr_.Ui12Value(); } ++ inline int32_t si14() const { return (instr_.Si14Value() << 18) >> 18; } ++ inline int32_t si16() const { return (instr_.Si16Value() << 16) >> 16; } ++ inline int32_t si20() const { return (instr_.Si20Value() << 12) >> 12; } ++ ++ inline void SetResult(const int32_t rd_reg, const int64_t alu_out) { ++ set_register(rd_reg, alu_out); ++ TraceRegWr(alu_out); ++ } ++ ++ inline void SetFPUWordResult(int32_t fd_reg, int32_t alu_out) { ++ set_fpu_register_word(fd_reg, alu_out); ++ TraceRegWr(get_fpu_register(fd_reg), WORD); ++ } ++ ++ inline void SetFPUWordResult2(int32_t fd_reg, int32_t alu_out) { ++ set_fpu_register_word(fd_reg, alu_out); ++ TraceRegWr(get_fpu_register(fd_reg)); ++ } ++ ++ inline void SetFPUResult(int32_t fd_reg, int64_t alu_out) { ++ set_fpu_register(fd_reg, alu_out); ++ TraceRegWr(get_fpu_register(fd_reg)); ++ } ++ ++ inline void SetFPUResult2(int32_t fd_reg, int64_t alu_out) { ++ set_fpu_register(fd_reg, alu_out); ++ TraceRegWr(get_fpu_register(fd_reg), DOUBLE); ++ } ++ ++ inline void SetFPUFloatResult(int32_t fd_reg, float alu_out) { ++ set_fpu_register_float(fd_reg, alu_out); ++ TraceRegWr(get_fpu_register(fd_reg), FLOAT); ++ } ++ ++ inline void SetFPUDoubleResult(int32_t fd_reg, double alu_out) { ++ set_fpu_register_double(fd_reg, alu_out); ++ TraceRegWr(get_fpu_register(fd_reg), DOUBLE); ++ } ++ ++ // Used for breakpoints. ++ void SoftwareInterrupt(); ++ ++ // Stop helper functions. ++ bool IsWatchpoint(uint64_t code); ++ void PrintWatchpoint(uint64_t code); ++ void HandleStop(uint64_t code, Instruction* instr); ++ bool IsStopInstruction(Instruction* instr); ++ bool IsEnabledStop(uint64_t code); ++ void EnableStop(uint64_t code); ++ void DisableStop(uint64_t code); ++ void IncreaseStopCounter(uint64_t code); ++ void PrintStopInfo(uint64_t code); ++ ++ // Executes one instruction. ++ void InstructionDecode(Instruction* instr); ++ // Execute one instruction placed in a branch delay slot. ++ ++ // ICache. ++ static void CheckICache(base::CustomMatcherHashMap* i_cache, ++ Instruction* instr); ++ static void FlushOnePage(base::CustomMatcherHashMap* i_cache, intptr_t start, ++ size_t size); ++ static CachePage* GetCachePage(base::CustomMatcherHashMap* i_cache, ++ void* page); ++ ++ enum Exception { ++ none, ++ kIntegerOverflow, ++ kIntegerUnderflow, ++ kDivideByZero, ++ kNumExceptions ++ }; ++ ++ // Exceptions. ++ void SignalException(Exception e); ++ ++ // Handle arguments and return value for runtime FP functions. ++ void GetFpArgs(double* x, double* y, int32_t* z); ++ void SetFpResult(const double& result); ++ ++ void CallInternal(Address entry); ++ ++ // Architecture state. ++ // Registers. ++ int64_t registers_[kNumSimuRegisters]; ++ // Floating point Registers. ++ int64_t FPUregisters_[kNumFPURegisters]; ++ // Condition flags Registers. ++ bool CFregisters_[kNumCFRegisters]; ++ // FPU control register. ++ uint32_t FCSR_; ++ ++ // Simulator support. ++ // Allocate 1MB for stack. ++ size_t stack_size_; ++ char* stack_; ++ bool pc_modified_; ++ int64_t icount_; ++ int break_count_; ++ EmbeddedVector trace_buf_; ++ ++ // Debugger input. ++ char* last_debugger_input_; ++ ++ v8::internal::Isolate* isolate_; ++ ++ // Registered breakpoints. ++ Instruction* break_pc_; ++ Instr break_instr_; ++ ++ // Stop is disabled if bit 31 is set. ++ static const uint32_t kStopDisabledBit = 1 << 31; ++ ++ // A stop is enabled, meaning the simulator will stop when meeting the ++ // instruction, if bit 31 of watched_stops_[code].count is unset. ++ // The value watched_stops_[code].count & ~(1 << 31) indicates how many times ++ // the breakpoint was hit or gone through. ++ struct StopCountAndDesc { ++ uint32_t count; ++ char* desc; ++ }; ++ StopCountAndDesc watched_stops_[kMaxStopCode + 1]; ++ ++ // Synchronization primitives. ++ enum class MonitorAccess { ++ Open, ++ RMW, ++ }; ++ ++ enum class TransactionSize { ++ None = 0, ++ Word = 4, ++ DoubleWord = 8, ++ }; ++ ++ // The least-significant bits of the address are ignored. The number of bits ++ // is implementation-defined, between 3 and minimum page size. ++ static const uintptr_t kExclusiveTaggedAddrMask = ~((1 << 3) - 1); ++ ++ class LocalMonitor { ++ public: ++ LocalMonitor(); ++ ++ // These functions manage the state machine for the local monitor, but do ++ // not actually perform loads and stores. NotifyStoreConditional only ++ // returns true if the store conditional is allowed; the global monitor will ++ // still have to be checked to see whether the memory should be updated. ++ void NotifyLoad(); ++ void NotifyLoadLinked(uintptr_t addr, TransactionSize size); ++ void NotifyStore(); ++ bool NotifyStoreConditional(uintptr_t addr, TransactionSize size); ++ ++ private: ++ void Clear(); ++ ++ MonitorAccess access_state_; ++ uintptr_t tagged_addr_; ++ TransactionSize size_; ++ }; ++ ++ class GlobalMonitor { ++ public: ++ class LinkedAddress { ++ public: ++ LinkedAddress(); ++ ++ private: ++ friend class GlobalMonitor; ++ // These functions manage the state machine for the global monitor, but do ++ // not actually perform loads and stores. ++ void Clear_Locked(); ++ void NotifyLoadLinked_Locked(uintptr_t addr); ++ void NotifyStore_Locked(); ++ bool NotifyStoreConditional_Locked(uintptr_t addr, ++ bool is_requesting_thread); ++ ++ MonitorAccess access_state_; ++ uintptr_t tagged_addr_; ++ LinkedAddress* next_; ++ LinkedAddress* prev_; ++ // A scd can fail due to background cache evictions. Rather than ++ // simulating this, we'll just occasionally introduce cases where an ++ // store conditional fails. This will happen once after every ++ // kMaxFailureCounter exclusive stores. ++ static const int kMaxFailureCounter = 5; ++ int failure_counter_; ++ }; ++ ++ // Exposed so it can be accessed by Simulator::{Read,Write}Ex*. ++ base::Mutex mutex; ++ ++ void NotifyLoadLinked_Locked(uintptr_t addr, LinkedAddress* linked_address); ++ void NotifyStore_Locked(LinkedAddress* linked_address); ++ bool NotifyStoreConditional_Locked(uintptr_t addr, ++ LinkedAddress* linked_address); ++ ++ // Called when the simulator is destroyed. ++ void RemoveLinkedAddress(LinkedAddress* linked_address); ++ ++ static GlobalMonitor* Get(); ++ ++ private: ++ // Private constructor. Call {GlobalMonitor::Get()} to get the singleton. ++ GlobalMonitor() = default; ++ friend class base::LeakyObject; ++ ++ bool IsProcessorInLinkedList_Locked(LinkedAddress* linked_address) const; ++ void PrependProcessor_Locked(LinkedAddress* linked_address); ++ ++ LinkedAddress* head_ = nullptr; ++ }; ++ ++ LocalMonitor local_monitor_; ++ GlobalMonitor::LinkedAddress global_monitor_thread_; ++}; ++ ++} // namespace internal ++} // namespace v8 ++ ++#endif // defined(USE_SIMULATOR) ++#endif // V8_EXECUTION_LA64_SIMULATOR_LA64_H_ +diff --git a/src/3rdparty/chromium/v8/src/execution/mips64/simulator-mips64.cc b/src/3rdparty/chromium/v8/src/execution/mips64/simulator-mips64.cc +index 72f28363292..98c50263a02 100644 +--- a/src/3rdparty/chromium/v8/src/execution/mips64/simulator-mips64.cc ++++ b/src/3rdparty/chromium/v8/src/execution/mips64/simulator-mips64.cc +@@ -28,6 +28,8 @@ namespace internal { + DEFINE_LAZY_LEAKY_OBJECT_GETTER(Simulator::GlobalMonitor, + Simulator::GlobalMonitor::Get) + ++// #define PRINT_SIM_LOG ++ + // Util functions. + inline bool HaveSameSign(int64_t a, int64_t b) { return ((a ^ b) >= 0); } + +@@ -57,6 +59,17 @@ static int64_t MultiplyHighSigned(int64_t u, int64_t v) { + return u1 * v1 + w2 + (w1 >> 32); + } + ++#ifdef PRINT_SIM_LOG ++inline void printf_instr(const char* _Format, ...) { ++ va_list varList; ++ va_start(varList, _Format); ++ vprintf(_Format, varList); ++ va_end(varList); ++} ++#else ++#define printf_instr(...) ++#endif ++ + // This macro provides a platform independent use of sscanf. The reason for + // SScanF not being implemented in a platform independent was through + // ::v8::internal::OS in the same way as SNPrintF is that the Windows C Run-Time +@@ -2195,6 +2208,7 @@ void Simulator::SoftwareInterrupt() { + uint32_t code = (func == BREAK) ? instr_.Bits(25, 6) : -1; + // We first check if we met a call_rt_redirected. + if (instr_.InstructionBits() == rtCallRedirInstr) { ++ printf_instr("Simulator::SoftwareInterrupt: BREAK 0xFFFFF\n"); + Redirection* redirection = Redirection::FromInstruction(instr_.instr()); + + int64_t* stack_pointer = reinterpret_cast(get_register(sp)); +@@ -2723,6 +2737,9 @@ void Simulator::DecodeTypeRegisterSRsType() { + KeepSign::yes, fs)); + break; + case SQRT_S: ++ printf_instr("sqrt_s\t %s: %016f, %s: %016f\n", ++ FPURegisters::Name(fd_reg()), fd, ++ FPURegisters::Name(fs_reg()), fs); + SetFPUFloatResult( + fd_reg(), + FPUCanonalizeOperation([](float src) { return std::sqrt(src); }, fs)); +@@ -3115,6 +3132,10 @@ void Simulator::DecodeTypeRegisterDRsType() { + [](double lhs, double rhs) { return lhs + rhs; }, fs, ft)); + break; + case SUB_D: ++ printf_instr("sub_d\t %s: %016f, %s: %016f, %s: %016f\n", ++ FPURegisters::Name(fd_reg()), fd, ++ FPURegisters::Name(fs_reg()), fs, ++ FPURegisters::Name(ft_reg()), ft); + SetFPUDoubleResult( + fd_reg(), + FPUCanonalizeOperation( +@@ -3381,6 +3402,10 @@ void Simulator::DecodeTypeRegisterWRsType() { + int64_t alu_out = 0x12345678; + switch (instr_.FunctionFieldRaw()) { + case CVT_S_W: // Convert word to float (single). ++ printf_instr( ++ "CVT_S_W \t %s: %016f, %s: %016x\n", FPURegisters::Name(fd_reg()), ++ get_fpu_register_float(fd_reg()), FPURegisters::Name(fs_reg()), ++ get_fpu_register_signed_word(fs_reg())); + alu_out = get_fpu_register_signed_word(fs_reg()); + SetFPUFloatResult(fd_reg(), static_cast(alu_out)); + break; +@@ -3476,6 +3501,10 @@ void Simulator::DecodeTypeRegisterLRsType() { + SetFPUDoubleResult(fd_reg(), static_cast(i64)); + break; + case CVT_S_L: ++ printf_instr("CVT_S_L \t %s: %016f, %s: %016x\n", ++ FPURegisters::Name(fd_reg()), ++ get_fpu_register_float(fd_reg()), ++ FPURegisters::Name(fs_reg()), get_fpu_register(fs_reg())); + i64 = get_fpu_register(fs_reg()); + SetFPUFloatResult(fd_reg(), static_cast(i64)); + break; +@@ -3569,11 +3598,17 @@ void Simulator::DecodeTypeRegisterCOP1() { + SetResult(rt_reg(), FCSR_); + break; + case MFC1: ++ printf_instr("MFC1 \t %s: %016lx, %s: %016f\n", Registers::Name(rt_reg()), ++ rt(), FPURegisters::Name(fs_reg()), ++ get_fpu_register_float(fs_reg())); + set_register(rt_reg(), + static_cast(get_fpu_register_word(fs_reg()))); + TraceRegWr(get_register(rt_reg()), WORD_DWORD); + break; + case DMFC1: ++ printf_instr( ++ "DMFC1 \t %s: %016lx, %s: %016f\n", Registers::Name(rt_reg()), rt(), ++ FPURegisters::Name(fs_reg()), get_fpu_register_double(fs_reg())); + SetResult(rt_reg(), get_fpu_register(fs_reg())); + break; + case MFHC1: +@@ -3593,12 +3628,18 @@ void Simulator::DecodeTypeRegisterCOP1() { + break; + } + case MTC1: ++ printf_instr( ++ "MTC1 \t %s: %016f, %s: %016lx\n", FPURegisters::Name(fs_reg()), ++ get_fpu_register_float(fs_reg()), Registers::Name(rt_reg()), rt()); + // Hardware writes upper 32-bits to zero on mtc1. + set_fpu_register_hi_word(fs_reg(), 0); + set_fpu_register_word(fs_reg(), static_cast(rt())); + TraceRegWr(get_fpu_register(fs_reg()), FLOAT_DOUBLE); + break; + case DMTC1: ++ printf_instr( ++ "DMTC1 \t %s: %016f, %s: %016lx\n", FPURegisters::Name(fs_reg()), ++ get_fpu_register_float(fs_reg()), Registers::Name(rt_reg()), rt()); + SetFPUResult2(fs_reg(), rt()); + break; + case MTHC1: +@@ -3683,6 +3724,7 @@ void Simulator::DecodeTypeRegisterSPECIAL() { + case JR: { + int64_t next_pc = rs(); + int64_t current_pc = get_pc(); ++ printf_instr("JALR\t %s: %016lx\n", Registers::Name(rs_reg()), rs()); + Instruction* branch_delay_instr = + reinterpret_cast(current_pc + kInstrSize); + BranchDelayInstructionDecode(branch_delay_instr); +@@ -3694,6 +3736,8 @@ void Simulator::DecodeTypeRegisterSPECIAL() { + int64_t next_pc = rs(); + int64_t current_pc = get_pc(); + int32_t return_addr_reg = rd_reg(); ++ printf_instr("JALR\t %s: %016lx, %s: %016lx\n", Registers::Name(rd_reg()), ++ get_register(rd_reg()), Registers::Name(rs_reg()), rs()); + Instruction* branch_delay_instr = + reinterpret_cast(current_pc + kInstrSize); + BranchDelayInstructionDecode(branch_delay_instr); +@@ -3703,21 +3747,36 @@ void Simulator::DecodeTypeRegisterSPECIAL() { + break; + } + case SLL: ++ printf_instr("SLL\t %s: %016lx, %s: %016lx, sa: %02x\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), sa()); + SetResult(rd_reg(), static_cast(rt()) << sa()); + break; + case DSLL: ++ printf_instr("DSLL\t %s: %016lx, %s: %016lx, sa: %02x\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), sa()); + SetResult(rd_reg(), rt() << sa()); + break; + case DSLL32: ++ printf_instr("DSLL32\t %s: %016lx, %s: %016lx, sa: %02x\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), sa()); + SetResult(rd_reg(), rt() << sa() << 32); + break; + case SRL: + if (rs_reg() == 0) { ++ printf_instr("SRL\t %s: %016lx, %s: %016lx, sa: %02x\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), sa()); + // Regular logical right shift of a word by a fixed number of + // bits instruction. RS field is always equal to 0. + // Sign-extend the 32-bit result. + alu_out = static_cast(static_cast(rt_u()) >> sa()); + } else if (rs_reg() == 1) { ++ printf_instr("ROTR\t %s: %016lx, %s: %016lx, sa: %02x\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), sa()); + // Logical right-rotate of a word by a fixed number of bits. This + // is special case of SRL instruction, added in MIPS32 Release 2. + // RS field is equal to 00001. +@@ -3731,11 +3790,17 @@ void Simulator::DecodeTypeRegisterSPECIAL() { + break; + case DSRL: + if (rs_reg() == 0) { ++ printf_instr("DSRL\t %s: %016lx, %s: %016lx, sa: %02x\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), sa()); + // Regular logical right shift of a word by a fixed number of + // bits instruction. RS field is always equal to 0. + // Sign-extend the 64-bit result. + alu_out = static_cast(rt_u() >> sa()); + } else if (rs_reg() == 1) { ++ printf_instr("DROTR\t %s: %016lx, %s: %016lx, sa: %02x\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), sa()); + // Logical right-rotate of a word by a fixed number of bits. This + // is special case of SRL instruction, added in MIPS32 Release 2. + // RS field is equal to 00001. +@@ -3747,11 +3812,17 @@ void Simulator::DecodeTypeRegisterSPECIAL() { + break; + case DSRL32: + if (rs_reg() == 0) { ++ printf_instr("DSRL32\t %s: %016lx, %s: %016lx, sa: %02x\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), sa()); + // Regular logical right shift of a word by a fixed number of + // bits instruction. RS field is always equal to 0. + // Sign-extend the 64-bit result. + alu_out = static_cast(rt_u() >> sa() >> 32); + } else if (rs_reg() == 1) { ++ printf_instr("DROTR32\t %s: %016lx, %s: %016lx, sa: %02x\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), sa()); + // Logical right-rotate of a word by a fixed number of bits. This + // is special case of SRL instruction, added in MIPS32 Release 2. + // RS field is equal to 00001. +@@ -3763,26 +3834,51 @@ void Simulator::DecodeTypeRegisterSPECIAL() { + SetResult(rd_reg(), alu_out); + break; + case SRA: ++ printf_instr("SRA\t %s: %016lx, %s: %016lx, sa: %02x\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), sa()); + SetResult(rd_reg(), (int32_t)rt() >> sa()); + break; + case DSRA: ++ printf_instr("DSRA\t %s: %016lx, %s: %016lx, sa: %02x\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), sa()); + SetResult(rd_reg(), rt() >> sa()); + break; + case DSRA32: ++ printf_instr("DSRA32\t %s: %016lx, %s: %016lx, sa: %02x\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), sa()); + SetResult(rd_reg(), rt() >> sa() >> 32); + break; + case SLLV: ++ printf_instr("SLLV\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), Registers::Name(rs_reg()), ++ rs()); + SetResult(rd_reg(), (int32_t)rt() << rs()); + break; + case DSLLV: ++ printf_instr("DSLLV\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), Registers::Name(rs_reg()), ++ rs()); + SetResult(rd_reg(), rt() << rs()); + break; + case SRLV: + if (sa() == 0) { ++ printf_instr("SRLV\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), Registers::Name(rs_reg()), ++ rs()); + // Regular logical right-shift of a word by a variable number of + // bits instruction. SA field is always equal to 0. + alu_out = static_cast((uint32_t)rt_u() >> rs()); + } else { ++ printf_instr("ROTRV\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), Registers::Name(rs_reg()), ++ rs()); + // Logical right-rotate of a word by a variable number of bits. + // This is special case od SRLV instruction, added in MIPS32 + // Release 2. SA field is equal to 00001. +@@ -3794,10 +3890,18 @@ void Simulator::DecodeTypeRegisterSPECIAL() { + break; + case DSRLV: + if (sa() == 0) { ++ printf_instr("SRLV\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), Registers::Name(rs_reg()), ++ rs()); + // Regular logical right-shift of a word by a variable number of + // bits instruction. SA field is always equal to 0. + alu_out = static_cast(rt_u() >> rs()); + } else { ++ printf_instr("DROTRV\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), Registers::Name(rs_reg()), ++ rs()); + // Logical right-rotate of a word by a variable number of bits. + // This is special case od SRLV instruction, added in MIPS32 + // Release 2. SA field is equal to 00001. +@@ -3807,9 +3911,17 @@ void Simulator::DecodeTypeRegisterSPECIAL() { + SetResult(rd_reg(), alu_out); + break; + case SRAV: ++ printf_instr("SRAV\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), Registers::Name(rs_reg()), ++ rs()); + SetResult(rd_reg(), (int32_t)rt() >> rs()); + break; + case DSRAV: ++ printf_instr("DSRAV\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rt_reg()), rt(), Registers::Name(rs_reg()), ++ rs()); + SetResult(rd_reg(), rt() >> rs()); + break; + case LSA: { +@@ -4018,6 +4130,10 @@ void Simulator::DecodeTypeRegisterSPECIAL() { + break; + case ADD: + case DADD: ++ printf_instr("DADD\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()), ++ rt()); + if (HaveSameSign(rs(), rt())) { + if (rs() > 0) { + if (rs() > (Registers::kMaxValue - rt())) { +@@ -4032,16 +4148,28 @@ void Simulator::DecodeTypeRegisterSPECIAL() { + SetResult(rd_reg(), rs() + rt()); + break; + case ADDU: { ++ printf_instr("ADDU\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()), ++ rt()); + int32_t alu32_out = static_cast(rs() + rt()); + // Sign-extend result of 32bit operation into 64bit register. + SetResult(rd_reg(), static_cast(alu32_out)); + break; + } + case DADDU: ++ printf_instr("DADDU\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()), ++ rt()); + SetResult(rd_reg(), rs() + rt()); + break; + case SUB: + case DSUB: ++ printf_instr("DSUB\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()), ++ rt()); + if (!HaveSameSign(rs(), rt())) { + if (rs() > 0) { + if (rs() > (Registers::kMaxValue + rt())) { +@@ -4056,30 +4184,62 @@ void Simulator::DecodeTypeRegisterSPECIAL() { + SetResult(rd_reg(), rs() - rt()); + break; + case SUBU: { ++ printf_instr("SUBU\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()), ++ rt()); + int32_t alu32_out = static_cast(rs() - rt()); + // Sign-extend result of 32bit operation into 64bit register. + SetResult(rd_reg(), static_cast(alu32_out)); + break; + } + case DSUBU: ++ printf_instr("DSUBU\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()), ++ rt()); + SetResult(rd_reg(), rs() - rt()); + break; + case AND: ++ printf_instr("AND\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()), ++ rt()); + SetResult(rd_reg(), rs() & rt()); + break; + case OR: ++ printf_instr("OR\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()), ++ rt()); + SetResult(rd_reg(), rs() | rt()); + break; + case XOR: ++ printf_instr("XOR\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()), ++ rt()); + SetResult(rd_reg(), rs() ^ rt()); + break; + case NOR: ++ printf_instr("NOR\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()), ++ rt()); + SetResult(rd_reg(), ~(rs() | rt())); + break; + case SLT: ++ printf_instr("SLT\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()), ++ rt()); + SetResult(rd_reg(), rs() < rt() ? 1 : 0); + break; + case SLTU: ++ printf_instr("SLTU\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()), ++ rt()); + SetResult(rd_reg(), rs_u() < rt_u() ? 1 : 0); + break; + // Break and trap instructions. +@@ -4106,9 +4266,14 @@ void Simulator::DecodeTypeRegisterSPECIAL() { + break; + case SYNC: + // TODO(palfia): Ignore sync instruction for now. ++ printf_instr("sync\n"); + break; + // Conditional moves. + case MOVN: ++ printf_instr("MOVN\t %s: %016lx, %s: %016lx, %s: %016lx\n", ++ Registers::Name(rd_reg()), get_register(rd_reg()), ++ Registers::Name(rs_reg()), rs(), Registers::Name(rt_reg()), ++ rt()); + if (rt()) { + SetResult(rd_reg(), rs()); + } +@@ -4173,6 +4338,9 @@ void Simulator::DecodeTypeRegisterSPECIAL3() { + // Interpret sa field as 5-bit lsb of extract. + uint16_t lsb = sa(); + uint16_t size = msbd + 1; ++ printf_instr("EXT\t %s: %016lx, %s: %016lx, pos: %d, size: %d\n", ++ Registers::Name(rt_reg()), get_register(rt_reg()), ++ Registers::Name(rs_reg()), rs(), lsb, size); + uint64_t mask = (1ULL << size) - 1; + alu_out = static_cast((rs_u() & (mask << lsb)) >> lsb); + SetResult(rt_reg(), alu_out); +@@ -4184,6 +4352,9 @@ void Simulator::DecodeTypeRegisterSPECIAL3() { + // Interpret sa field as 5-bit lsb of extract. + uint16_t lsb = sa(); + uint16_t size = msbd + 1; ++ printf_instr("DEXT\t %s: %016lx, %s: %016lx, pos: %d, size: %d\n", ++ Registers::Name(rt_reg()), get_register(rt_reg()), ++ Registers::Name(rs_reg()), rs(), lsb, size); + uint64_t mask = (size == 64) ? UINT64_MAX : (1ULL << size) - 1; + alu_out = static_cast((rs_u() & (mask << lsb)) >> lsb); + SetResult(rt_reg(), alu_out); +@@ -6553,6 +6724,7 @@ void Simulator::DecodeTypeImmediate() { + [this, &next_pc, &execute_branch_delay_instruction](bool do_branch) { + execute_branch_delay_instruction = true; + int64_t current_pc = get_pc(); ++ printf_instr("Offs16: %04x\n", instr_.Imm16Value()); + set_register(31, current_pc + 2 * kInstrSize); + if (do_branch) { + int16_t imm16 = instr_.Imm16Value(); +@@ -6565,6 +6737,7 @@ void Simulator::DecodeTypeImmediate() { + auto BranchHelper = [this, &next_pc, + &execute_branch_delay_instruction](bool do_branch) { + execute_branch_delay_instruction = true; ++ printf_instr("Offs16: %04x\n", instr_.Imm16Value()); + int64_t current_pc = get_pc(); + if (do_branch) { + int16_t imm16 = instr_.Imm16Value(); +@@ -6601,6 +6774,7 @@ void Simulator::DecodeTypeImmediate() { + auto BranchAndLinkCompactHelper = [this, &next_pc](bool do_branch, int bits) { + int64_t current_pc = get_pc(); + CheckForbiddenSlot(current_pc); ++ printf_instr("Offs: %08x\n", instr_.ImmValue(bits)); + if (do_branch) { + int32_t imm = instr_.ImmValue(bits); + imm <<= 32 - bits; +@@ -6613,6 +6787,7 @@ void Simulator::DecodeTypeImmediate() { + auto BranchCompactHelper = [this, &next_pc](bool do_branch, int bits) { + int64_t current_pc = get_pc(); + CheckForbiddenSlot(current_pc); ++ printf_instr("Offs: %08x\n", instr_.ImmValue(bits)); + if (do_branch) { + int32_t imm = instr_.ImmValue(bits); + imm <<= 32 - bits; +@@ -6707,15 +6882,19 @@ void Simulator::DecodeTypeImmediate() { + case REGIMM: + switch (instr_.RtFieldRaw()) { + case BLTZ: ++ printf_instr("BLTZ\t %s: %016lx, ", Registers::Name(rs_reg), rs); + BranchHelper(rs < 0); + break; + case BGEZ: ++ printf_instr("BGEZ\t %s: %016lx, ", Registers::Name(rs_reg), rs); + BranchHelper(rs >= 0); + break; + case BLTZAL: ++ printf_instr("BLTZAL\t %s: %016lx, ", Registers::Name(rs_reg), rs); + BranchAndLinkHelper(rs < 0); + break; + case BGEZAL: ++ printf_instr("BGEZAL\t %s: %016lx, ", Registers::Name(rs_reg), rs); + BranchAndLinkHelper(rs >= 0); + break; + case DAHI: +@@ -6732,9 +6911,13 @@ void Simulator::DecodeTypeImmediate() { + // When comparing to zero, the encoding of rt field is always 0, so we don't + // need to replace rt with zero. + case BEQ: ++ printf_instr("BEQ\t %s: %016lx, %s: %016lx, ", Registers::Name(rs_reg), ++ rs, Registers::Name(rt_reg), rt); + BranchHelper(rs == rt); + break; + case BNE: ++ printf_instr("BNE\t %s: %016lx, %s: %016lx, ", Registers::Name(rs_reg), ++ rs, Registers::Name(rt_reg), rt); + BranchHelper(rs != rt); + break; + case POP06: // BLEZALC, BGEZALC, BGEUC, BLEZ (pre-r6) +@@ -6754,6 +6937,7 @@ void Simulator::DecodeTypeImmediate() { + BranchHelper(rs <= 0); + } + } else { // BLEZ ++ printf_instr("BLEZ\t %s: %016lx", Registers::Name(rs_reg), rs); + BranchHelper(rs <= 0); + } + break; +@@ -6774,6 +6958,7 @@ void Simulator::DecodeTypeImmediate() { + BranchHelper(rs > 0); + } + } else { // BGTZ ++ printf_instr("BGTZ\t %s: %016lx", Registers::Name(rs_reg), rs); + BranchHelper(rs > 0); + } + break; +@@ -6791,6 +6976,7 @@ void Simulator::DecodeTypeImmediate() { + } + } + } else { // BLEZL ++ printf_instr("BLEZL\t %s: %016lx", Registers::Name(rs_reg), rs); + BranchAndLinkHelper(rs <= 0); + } + break; +@@ -6808,6 +6994,7 @@ void Simulator::DecodeTypeImmediate() { + } + } + } else { // BGTZL ++ printf_instr("BGTZL\t %s: %016lx", Registers::Name(rs_reg), rs); + BranchAndLinkHelper(rs > 0); + } + break; +@@ -6846,6 +7033,9 @@ void Simulator::DecodeTypeImmediate() { + } + } + } else { // ADDI ++ printf_instr("ADDI\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + if (HaveSameSign(rs, se_imm16)) { + if (rs > 0) { + if (rs <= Registers::kMaxValue - se_imm16) { +@@ -6876,27 +7066,48 @@ void Simulator::DecodeTypeImmediate() { + break; + // ------------- Arithmetic instructions. + case ADDIU: { ++ printf_instr("ADDIU\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + int32_t alu32_out = static_cast(rs + se_imm16); + // Sign-extend result of 32bit operation into 64bit register. + SetResult(rt_reg, static_cast(alu32_out)); + break; + } + case DADDIU: ++ printf_instr("DADDIU\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + SetResult(rt_reg, rs + se_imm16); + break; + case SLTI: ++ printf_instr("SLTI\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + SetResult(rt_reg, rs < se_imm16 ? 1 : 0); + break; + case SLTIU: ++ printf_instr("SLTIU\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + SetResult(rt_reg, rs_u < static_cast(se_imm16) ? 1 : 0); + break; + case ANDI: ++ printf_instr("ANDI\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ oe_imm16); + SetResult(rt_reg, rs & oe_imm16); + break; + case ORI: ++ printf_instr("ORI\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ oe_imm16); + SetResult(rt_reg, rs | oe_imm16); + break; + case XORI: ++ printf_instr("XORI\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ oe_imm16); + SetResult(rt_reg, rs ^ oe_imm16); + break; + case LUI: +@@ -6907,6 +7118,8 @@ void Simulator::DecodeTypeImmediate() { + SetResult(rt_reg, static_cast(alu32_out)); + } else { + // LUI instruction. ++ printf_instr("LUI\t %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, se_imm16); + int32_t alu32_out = static_cast(oe_imm16 << 16); + // Sign-extend result of 32bit operation into 64bit register. + SetResult(rt_reg, static_cast(alu32_out)); +@@ -6919,12 +7132,21 @@ void Simulator::DecodeTypeImmediate() { + break; + // ------------- Memory instructions. + case LB: ++ printf_instr("LB\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + set_register(rt_reg, ReadB(rs + se_imm16)); + break; + case LH: ++ printf_instr("LH\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + set_register(rt_reg, ReadH(rs + se_imm16, instr_.instr())); + break; + case LWL: { ++ printf_instr("LWL\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + local_monitor_.NotifyLoad(); + // al_offset is offset of the effective address within an aligned word. + uint8_t al_offset = (rs + se_imm16) & kInt32AlignmentMask; +@@ -6938,21 +7160,39 @@ void Simulator::DecodeTypeImmediate() { + break; + } + case LW: ++ printf_instr("LW\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + set_register(rt_reg, ReadW(rs + se_imm16, instr_.instr())); + break; + case LWU: ++ printf_instr("LWU\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + set_register(rt_reg, ReadWU(rs + se_imm16, instr_.instr())); + break; + case LD: ++ printf_instr("LD\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + set_register(rt_reg, Read2W(rs + se_imm16, instr_.instr())); + break; + case LBU: ++ printf_instr("LBU\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + set_register(rt_reg, ReadBU(rs + se_imm16)); + break; + case LHU: ++ printf_instr("LHU\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + set_register(rt_reg, ReadHU(rs + se_imm16, instr_.instr())); + break; + case LWR: { ++ printf_instr("LWR\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + // al_offset is offset of the effective address within an aligned word. + uint8_t al_offset = (rs + se_imm16) & kInt32AlignmentMask; + uint8_t byte_shift = kInt32AlignmentMask - al_offset; +@@ -6965,6 +7205,9 @@ void Simulator::DecodeTypeImmediate() { + break; + } + case LDL: { ++ printf_instr("LDL\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + // al_offset is offset of the effective address within an aligned word. + uint8_t al_offset = (rs + se_imm16) & kInt64AlignmentMask; + uint8_t byte_shift = kInt64AlignmentMask - al_offset; +@@ -6977,6 +7220,9 @@ void Simulator::DecodeTypeImmediate() { + break; + } + case LDR: { ++ printf_instr("LDR\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + // al_offset is offset of the effective address within an aligned word. + uint8_t al_offset = (rs + se_imm16) & kInt64AlignmentMask; + uint8_t byte_shift = kInt64AlignmentMask - al_offset; +@@ -6989,12 +7235,21 @@ void Simulator::DecodeTypeImmediate() { + break; + } + case SB: ++ printf_instr("SB\t %s: %016lx, (%s: %016lx), imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + WriteB(rs + se_imm16, static_cast(rt)); + break; + case SH: ++ printf_instr("SH\t %s: %016lx, (%s: %016lx), imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + WriteH(rs + se_imm16, static_cast(rt), instr_.instr()); + break; + case SWL: { ++ printf_instr("SWL\t %s: %016lx, (%s: %016lx), imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + uint8_t al_offset = (rs + se_imm16) & kInt32AlignmentMask; + uint8_t byte_shift = kInt32AlignmentMask - al_offset; + uint32_t mask = byte_shift ? (~0 << (al_offset + 1) * 8) : 0; +@@ -7005,12 +7260,21 @@ void Simulator::DecodeTypeImmediate() { + break; + } + case SW: ++ printf_instr("SW\t %s: %016lx, (%s: %016lx), imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + WriteW(rs + se_imm16, static_cast(rt), instr_.instr()); + break; + case SD: ++ printf_instr("SD\t %s: %016lx, (%s: %016lx), imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + Write2W(rs + se_imm16, rt, instr_.instr()); + break; + case SWR: { ++ printf_instr("SWR\t %s: %016lx, (%s: %016lx), imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + uint8_t al_offset = (rs + se_imm16) & kInt32AlignmentMask; + uint32_t mask = (1 << al_offset * 8) - 1; + addr = rs + se_imm16 - al_offset; +@@ -7020,6 +7284,9 @@ void Simulator::DecodeTypeImmediate() { + break; + } + case SDL: { ++ printf_instr("SDL\t %s: %016lx, (%s: %016lx), imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + uint8_t al_offset = (rs + se_imm16) & kInt64AlignmentMask; + uint8_t byte_shift = kInt64AlignmentMask - al_offset; + uint64_t mask = byte_shift ? (~0UL << (al_offset + 1) * 8) : 0; +@@ -7030,6 +7297,9 @@ void Simulator::DecodeTypeImmediate() { + break; + } + case SDR: { ++ printf_instr("SDR\t %s: %016lx, (%s: %016lx), imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + uint8_t al_offset = (rs + se_imm16) & kInt64AlignmentMask; + uint64_t mask = (1UL << al_offset * 8) - 1; + addr = rs + se_imm16 - al_offset; +@@ -7055,6 +7325,9 @@ void Simulator::DecodeTypeImmediate() { + break; + } + case LLD: { ++ printf_instr("LLD\t %s: %016lx, %s: %016lx, imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + DCHECK(kArchVariant != kMips64r6); + base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex); + addr = rs + se_imm16; +@@ -7065,6 +7338,9 @@ void Simulator::DecodeTypeImmediate() { + break; + } + case SCD: { ++ printf_instr("SCD\t %s: %016lx, (%s: %016lx), imm16: %04lx\n", ++ Registers::Name(rt_reg), rt, Registers::Name(rs_reg), rs, ++ se_imm16); + DCHECK(kArchVariant != kMips64r6); + addr = rs + se_imm16; + WriteConditional2W(addr, rt, instr_.instr(), rt_reg); +@@ -7080,11 +7356,17 @@ void Simulator::DecodeTypeImmediate() { + TraceMemRd(addr, get_fpu_register(ft_reg), DOUBLE); + break; + case SWC1: { ++ printf_instr("SWC1\t %s: %016f, %s: %016lx, imm16: %04lx\n", ++ FPURegisters::Name(ft_reg), get_fpu_register_float(ft_reg), ++ Registers::Name(rs_reg), rs, se_imm16); + int32_t alu_out_32 = static_cast(get_fpu_register(ft_reg)); + WriteW(rs + se_imm16, alu_out_32, instr_.instr()); + break; + } + case SDC1: ++ printf_instr("SDC1\t %s: %016f, %s: %016lx, imm16: %04lx\n", ++ FPURegisters::Name(ft_reg), get_fpu_register_double(ft_reg), ++ Registers::Name(rs_reg), rs, se_imm16); + WriteD(rs + se_imm16, get_fpu_register_double(ft_reg), instr_.instr()); + TraceMemWr(rs + se_imm16, get_fpu_register(ft_reg), DWORD); + break; +@@ -7257,6 +7539,8 @@ void Simulator::DecodeTypeJump() { + int64_t pc_high_bits = current_pc & 0xFFFFFFFFF0000000; + // Next pc. + int64_t next_pc = pc_high_bits | (simInstr.Imm26Value() << 2); ++ printf_instr("%s\t", simInstr.IsLinkingInstruction() ? "JAL" : "J"); ++ printf_instr("offs26: %x\n", instr_.Bits(25, 0)); + + // Execute branch delay slot. + // We don't check for end_sim_pc. First it should not be met as the current pc +@@ -7291,7 +7575,11 @@ void Simulator::InstructionDecode(Instruction* instr) { + dasm.InstructionDecode(buffer, reinterpret_cast(instr)); + } + ++ static int instr_count = 0; ++ USE(instr_count); + instr_ = instr; ++ printf_instr("\nInstr%3d: %08x, PC: %lx\t", instr_count++, instr_.Bits(31, 0), ++ get_pc()); + switch (instr_.InstructionType()) { + case Instruction::kRegisterType: + DecodeTypeRegister(); +diff --git a/src/3rdparty/chromium/v8/src/execution/simulator-base.h b/src/3rdparty/chromium/v8/src/execution/simulator-base.h +index 0fa98cb4054..12e0cad3e3d 100644 +--- a/src/3rdparty/chromium/v8/src/execution/simulator-base.h ++++ b/src/3rdparty/chromium/v8/src/execution/simulator-base.h +@@ -88,7 +88,7 @@ class SimulatorBase { + static typename std::enable_if::value, intptr_t>::type + ConvertArg(T arg) { + static_assert(sizeof(T) <= sizeof(intptr_t), "type bigger than ptrsize"); +-#if V8_TARGET_ARCH_MIPS64 ++#if V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_LA64 + // The MIPS64 calling convention is to sign extend all values, even unsigned + // ones. + using signed_t = typename std::make_signed::type; +diff --git a/src/3rdparty/chromium/v8/src/execution/simulator.h b/src/3rdparty/chromium/v8/src/execution/simulator.h +index a4e07b235b4..1bc39ac7e70 100644 +--- a/src/3rdparty/chromium/v8/src/execution/simulator.h ++++ b/src/3rdparty/chromium/v8/src/execution/simulator.h +@@ -24,6 +24,8 @@ + #include "src/execution/mips/simulator-mips.h" + #elif V8_TARGET_ARCH_MIPS64 + #include "src/execution/mips64/simulator-mips64.h" ++#elif V8_TARGET_ARCH_LA64 ++#include "src/execution/la64/simulator-la64.h" + #elif V8_TARGET_ARCH_S390 + #include "src/execution/s390/simulator-s390.h" + #else +diff --git a/src/3rdparty/chromium/v8/src/flags/flag-definitions.h b/src/3rdparty/chromium/v8/src/flags/flag-definitions.h +index c3f360cdf0d..f14b6a1e5e6 100644 +--- a/src/3rdparty/chromium/v8/src/flags/flag-definitions.h ++++ b/src/3rdparty/chromium/v8/src/flags/flag-definitions.h +@@ -1246,7 +1246,7 @@ DEFINE_BOOL(check_icache, false, + "Check icache flushes in ARM and MIPS simulator") + DEFINE_INT(stop_sim_at, 0, "Simulator stop after x number of instructions") + #if defined(V8_TARGET_ARCH_ARM64) || defined(V8_TARGET_ARCH_MIPS64) || \ +- defined(V8_TARGET_ARCH_PPC64) ++ defined(V8_TARGET_ARCH_PPC64) || defined(V8_TARGET_ARCH_LA64) + DEFINE_INT(sim_stack_alignment, 16, + "Stack alignment in bytes in simulator. This must be a power of two " + "and it must be at least 16. 16 is default.") +diff --git a/src/3rdparty/chromium/v8/src/heap/base/asm/la64/push_registers_asm.cc b/src/3rdparty/chromium/v8/src/heap/base/asm/la64/push_registers_asm.cc +new file mode 100644 +index 00000000000..c9e6f5d2cc8 +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/heap/base/asm/la64/push_registers_asm.cc +@@ -0,0 +1,48 @@ ++// Copyright 2020 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++// Push all callee-saved registers to get them on the stack for conservative ++// stack scanning. ++// ++// See asm/x64/push_registers_clang.cc for why the function is not generated ++// using clang. ++// ++// Do not depend on V8_TARGET_OS_* defines as some embedders may override the ++// GN toolchain (e.g. ChromeOS) and not provide them. ++asm(".text \n" ++ ".global PushAllRegistersAndIterateStack \n" ++ ".type PushAllRegistersAndIterateStack, %function \n" ++ ".hidden PushAllRegistersAndIterateStack \n" ++ "PushAllRegistersAndIterateStack: \n" ++ // Push all callee-saved registers and save return address. ++ " addi.d $sp, $sp, -96 \n" ++ " st.d $ra, $sp, 88 \n" ++ " st.d $s8, $sp, 80 \n" ++ " st.d $sp, $sp, 72 \n" ++ " st.d $fp, $sp, 64 \n" ++ " st.d $s7, $sp, 56 \n" ++ " st.d $s6, $sp, 48 \n" ++ " st.d $s5, $sp, 40 \n" ++ " st.d $s4, $sp, 32 \n" ++ " st.d $s3, $sp, 24 \n" ++ " st.d $s2, $sp, 16 \n" ++ " st.d $s1, $sp, 8 \n" ++ " st.d $s0, $sp, 0 \n" ++ // Maintain frame pointer. ++ " addi.d $s8, $sp, 0 \n" ++ // Pass 1st parameter (a0) unchanged (Stack*). ++ // Pass 2nd parameter (a1) unchanged (StackVisitor*). ++ // Save 3rd parameter (a2; IterateStackCallback). ++ " addi.d $a3, $a2, 0 \n" ++ // Call the callback. ++ // Pass 3rd parameter as sp (stack pointer). ++ " addi.d $a2, $sp, 0 \n" ++ " jirl $ra, $a3, 0 \n" ++ // Load return address. ++ " ld.d $ra, $sp, 88 \n" ++ // Restore frame pointer. ++ " ld.d $s8, $sp, 80 \n" ++ // Discard all callee-saved registers. ++ " addi.d $sp, $sp, 96 \n" ++ " jirl $zero, $ra, 0 \n"); +diff --git a/src/3rdparty/chromium/v8/src/interpreter/interpreter-assembler.cc b/src/3rdparty/chromium/v8/src/interpreter/interpreter-assembler.cc +index eaea1c91dd8..66775d6dfee 100644 +--- a/src/3rdparty/chromium/v8/src/interpreter/interpreter-assembler.cc ++++ b/src/3rdparty/chromium/v8/src/interpreter/interpreter-assembler.cc +@@ -1484,7 +1484,7 @@ void InterpreterAssembler::TraceBytecodeDispatch(TNode target_bytecode) { + + // static + bool InterpreterAssembler::TargetSupportsUnalignedAccess() { +-#if V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 ++#if V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_LA64 + return false; + #elif V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_S390 || \ + V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_PPC || \ +diff --git a/src/3rdparty/chromium/v8/src/libsampler/sampler.cc b/src/3rdparty/chromium/v8/src/libsampler/sampler.cc +index e2091ceb32a..6ebb1b8305b 100644 +--- a/src/3rdparty/chromium/v8/src/libsampler/sampler.cc ++++ b/src/3rdparty/chromium/v8/src/libsampler/sampler.cc +@@ -415,6 +415,10 @@ void SignalHandler::FillRegisterState(void* context, RegisterState* state) { + state->pc = reinterpret_cast(mcontext.pc); + state->sp = reinterpret_cast(mcontext.gregs[29]); + state->fp = reinterpret_cast(mcontext.gregs[30]); ++#elif V8_HOST_ARCH_LA64 ++ state->pc = reinterpret_cast(mcontext.__pc); ++ state->sp = reinterpret_cast(mcontext.__gregs[3]); ++ state->fp = reinterpret_cast(mcontext.__gregs[22]); + #elif V8_HOST_ARCH_PPC || V8_HOST_ARCH_PPC64 + #if V8_LIBC_GLIBC + state->pc = reinterpret_cast(ucontext->uc_mcontext.regs->nip); +diff --git a/src/3rdparty/chromium/v8/src/logging/log.cc b/src/3rdparty/chromium/v8/src/logging/log.cc +index dc79ffda5e5..6c745cea8c0 100644 +--- a/src/3rdparty/chromium/v8/src/logging/log.cc ++++ b/src/3rdparty/chromium/v8/src/logging/log.cc +@@ -588,6 +588,8 @@ void LowLevelLogger::LogCodeInfo() { + const char arch[] = "ppc64"; + #elif V8_TARGET_ARCH_MIPS + const char arch[] = "mips"; ++#elif V8_TARGET_ARCH_LA64 ++ const char arch[] = "la64"; + #elif V8_TARGET_ARCH_ARM64 + const char arch[] = "arm64"; + #elif V8_TARGET_ARCH_S390 +diff --git a/src/3rdparty/chromium/v8/src/objects/backing-store.cc b/src/3rdparty/chromium/v8/src/objects/backing-store.cc +index 52ab0085f7c..c96faf197bf 100644 +--- a/src/3rdparty/chromium/v8/src/objects/backing-store.cc ++++ b/src/3rdparty/chromium/v8/src/objects/backing-store.cc +@@ -29,7 +29,7 @@ constexpr bool kUseGuardRegions = true; + constexpr bool kUseGuardRegions = false; + #endif + +-#if V8_TARGET_ARCH_MIPS64 ++#if V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_LA64 + // MIPS64 has a user space of 2^40 bytes on most processors, + // address space limits needs to be smaller. + constexpr size_t kAddressSpaceLimit = 0x8000000000L; // 512 GiB +diff --git a/src/3rdparty/chromium/v8/src/objects/code.h b/src/3rdparty/chromium/v8/src/objects/code.h +index d80e72fa038..7da4c617461 100644 +--- a/src/3rdparty/chromium/v8/src/objects/code.h ++++ b/src/3rdparty/chromium/v8/src/objects/code.h +@@ -412,6 +412,8 @@ class Code : public HeapObject { + static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 20 : 0; + #elif V8_TARGET_ARCH_MIPS64 + static constexpr int kHeaderPaddingSize = 0; ++#elif V8_TARGET_ARCH_LA64 ++ static constexpr int kHeaderPaddingSize = 0; + #elif V8_TARGET_ARCH_X64 + static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 20 : 0; + #elif V8_TARGET_ARCH_ARM +diff --git a/src/3rdparty/chromium/v8/src/profiler/tick-sample.cc b/src/3rdparty/chromium/v8/src/profiler/tick-sample.cc +index 00bff91cd0a..56654b6288d 100644 +--- a/src/3rdparty/chromium/v8/src/profiler/tick-sample.cc ++++ b/src/3rdparty/chromium/v8/src/profiler/tick-sample.cc +@@ -104,7 +104,7 @@ bool SimulatorHelper::FillRegisters(Isolate* isolate, + state->sp = reinterpret_cast(simulator->sp()); + state->fp = reinterpret_cast(simulator->fp()); + state->lr = reinterpret_cast(simulator->lr()); +-#elif V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 ++#elif V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_LA64 + if (!simulator->has_bad_pc()) { + state->pc = reinterpret_cast(simulator->get_pc()); + } +diff --git a/src/3rdparty/chromium/v8/src/regexp/la64/regexp-macro-assembler-la64.cc b/src/3rdparty/chromium/v8/src/regexp/la64/regexp-macro-assembler-la64.cc +new file mode 100644 +index 00000000000..8a5e9c30c6d +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/regexp/la64/regexp-macro-assembler-la64.cc +@@ -0,0 +1,1286 @@ ++// Copyright 2012 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#if V8_TARGET_ARCH_LA64 ++ ++#include "src/regexp/la64/regexp-macro-assembler-la64.h" ++ ++#include "src/codegen/assembler-inl.h" ++#include "src/codegen/macro-assembler.h" ++#include "src/logging/log.h" ++#include "src/objects/objects-inl.h" ++#include "src/regexp/regexp-macro-assembler.h" ++#include "src/regexp/regexp-stack.h" ++#include "src/snapshot/embedded/embedded-data.h" ++#include "src/strings/unicode.h" ++ ++namespace v8 { ++namespace internal { ++ ++/* clang-format off ++ * ++ * This assembler uses the following register assignment convention ++ * - t3 : Temporarily stores the index of capture start after a matching pass ++ * for a global regexp. ++ * - a5 : Pointer to current Code object including heap object tag. ++ * - a6 : Current position in input, as negative offset from end of string. ++ * Please notice that this is the byte offset, not the character offset! ++ * - a7 : Currently loaded character. Must be loaded using ++ * LoadCurrentCharacter before using any of the dispatch methods. ++ * - t0 : Points to tip of backtrack stack ++ * - t1 : Unused. ++ * - t2 : End of input (points to byte after last character in input). ++ * - fp : Frame pointer. Used to access arguments, local variables and ++ * RegExp registers. ++ * - sp : Points to tip of C stack. ++ * ++ * The remaining registers are free for computations. ++ * Each call to a public method should retain this convention. ++ * ++ * TODO(plind): O32 documented here with intent of having single 32/64 codebase ++ * in the future. ++ * ++ * The O32 stack will have the following structure: ++ * ++ * - fp[72] Isolate* isolate (address of the current isolate) ++ * - fp[68] direct_call (if 1, direct call from JavaScript code, ++ * if 0, call through the runtime system). ++ * - fp[64] stack_area_base (High end of the memory area to use as ++ * backtracking stack). ++ * - fp[60] capture array size (may fit multiple sets of matches) ++ * - fp[44..59] MIPS O32 four argument slots ++ * - fp[40] int* capture_array (int[num_saved_registers_], for output). ++ * --- sp when called --- ++ * - fp[36] return address (lr). ++ * - fp[32] old frame pointer (r11). ++ * - fp[0..31] backup of registers s0..s7. ++ * --- frame pointer ---- ++ * - fp[-4] end of input (address of end of string). ++ * - fp[-8] start of input (address of first character in string). ++ * - fp[-12] start index (character index of start). ++ * - fp[-16] void* input_string (location of a handle containing the string). ++ * - fp[-20] success counter (only for global regexps to count matches). ++ * - fp[-24] Offset of location before start of input (effectively character ++ * string start - 1). Used to initialize capture registers to a ++ * non-position. ++ * - fp[-28] At start (if 1, we are starting at the start of the ++ * string, otherwise 0) ++ * - fp[-32] register 0 (Only positions must be stored in the first ++ * - register 1 num_saved_registers_ registers) ++ * - ... ++ * - register num_registers-1 ++ * --- sp --- ++ * ++ * ++ * The N64 stack will have the following structure: ++ * ++ * - fp[80] Isolate* isolate (address of the current isolate) kIsolate ++ * kStackFrameHeader ++ * --- sp when called --- ++ * - fp[72] ra Return from RegExp code (ra). kReturnAddress ++ * - fp[64] s9, old-fp Old fp, callee saved(s9). ++ * - fp[0..63] s0..s7 Callee-saved registers s0..s7. ++ * --- frame pointer ---- ++ * - fp[-8] direct_call (1 = direct call from JS, 0 = from runtime) kDirectCall ++ * - fp[-16] stack_base (Top of backtracking stack). kStackHighEnd ++ * - fp[-24] capture array size (may fit multiple sets of matches) kNumOutputRegisters ++ * - fp[-32] int* capture_array (int[num_saved_registers_], for output). kRegisterOutput ++ * - fp[-40] end of input (address of end of string). kInputEnd ++ * - fp[-48] start of input (address of first character in string). kInputStart ++ * - fp[-56] start index (character index of start). kStartIndex ++ * - fp[-64] void* input_string (location of a handle containing the string). kInputString ++ * - fp[-72] success counter (only for global regexps to count matches). kSuccessfulCaptures ++ * - fp[-80] Offset of location before start of input (effectively character kStringStartMinusOne ++ * position -1). Used to initialize capture registers to a ++ * non-position. ++ * --------- The following output registers are 32-bit values. --------- ++ * - fp[-88] register 0 (Only positions must be stored in the first kRegisterZero ++ * - register 1 num_saved_registers_ registers) ++ * - ... ++ * - register num_registers-1 ++ * --- sp --- ++ * ++ * The first num_saved_registers_ registers are initialized to point to ++ * "character -1" in the string (i.e., char_size() bytes before the first ++ * character of the string). The remaining registers start out as garbage. ++ * ++ * The data up to the return address must be placed there by the calling ++ * code and the remaining arguments are passed in registers, e.g. by calling the ++ * code entry as cast to a function with the signature: ++ * int (*match)(String input_string, ++ * int start_index, ++ * Address start, ++ * Address end, ++ * int* capture_output_array, ++ * int num_capture_registers, ++ * byte* stack_area_base, ++ * bool direct_call = false, ++ * Isolate* isolate); ++ * The call is performed by NativeRegExpMacroAssembler::Execute() ++ * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper. ++ * ++ * clang-format on ++ */ ++ ++#define __ ACCESS_MASM(masm_) ++ ++const int RegExpMacroAssemblerLA64::kRegExpCodeSize; ++ ++RegExpMacroAssemblerLA64::RegExpMacroAssemblerLA64(Isolate* isolate, Zone* zone, ++ Mode mode, ++ int registers_to_save) ++ : NativeRegExpMacroAssembler(isolate, zone), ++ masm_(new MacroAssembler(isolate, CodeObjectRequired::kYes, ++ NewAssemblerBuffer(kRegExpCodeSize))), ++ mode_(mode), ++ num_registers_(registers_to_save), ++ num_saved_registers_(registers_to_save), ++ entry_label_(), ++ start_label_(), ++ success_label_(), ++ backtrack_label_(), ++ exit_label_(), ++ internal_failure_label_() { ++ masm_->set_root_array_available(false); ++ ++ DCHECK_EQ(0, registers_to_save % 2); ++ __ jmp(&entry_label_); // We'll write the entry code later. ++ // If the code gets too big or corrupted, an internal exception will be ++ // raised, and we will exit right away. ++ __ bind(&internal_failure_label_); ++ __ li(a0, Operand(FAILURE)); ++ __ Ret(); ++ __ bind(&start_label_); // And then continue from here. ++} ++ ++RegExpMacroAssemblerLA64::~RegExpMacroAssemblerLA64() { ++ delete masm_; ++ // Unuse labels in case we throw away the assembler without calling GetCode. ++ entry_label_.Unuse(); ++ start_label_.Unuse(); ++ success_label_.Unuse(); ++ backtrack_label_.Unuse(); ++ exit_label_.Unuse(); ++ check_preempt_label_.Unuse(); ++ stack_overflow_label_.Unuse(); ++ internal_failure_label_.Unuse(); ++} ++ ++int RegExpMacroAssemblerLA64::stack_limit_slack() { ++ return RegExpStack::kStackLimitSlack; ++} ++ ++void RegExpMacroAssemblerLA64::AdvanceCurrentPosition(int by) { ++ if (by != 0) { ++ __ Add_d(current_input_offset(), current_input_offset(), ++ Operand(by * char_size())); ++ } ++} ++ ++void RegExpMacroAssemblerLA64::AdvanceRegister(int reg, int by) { ++ DCHECK_LE(0, reg); ++ DCHECK_GT(num_registers_, reg); ++ if (by != 0) { ++ __ Ld_d(a0, register_location(reg)); ++ __ Add_d(a0, a0, Operand(by)); ++ __ St_d(a0, register_location(reg)); ++ } ++} ++ ++void RegExpMacroAssemblerLA64::Backtrack() { ++ CheckPreemption(); ++ if (has_backtrack_limit()) { ++ Label next; ++ __ Ld_d(a0, MemOperand(frame_pointer(), kBacktrackCount)); ++ __ Add_d(a0, a0, Operand(1)); ++ __ St_d(a0, MemOperand(frame_pointer(), kBacktrackCount)); ++ __ Branch(&next, ne, a0, Operand(backtrack_limit())); ++ ++ // Exceeded limits are treated as a failed match. ++ Fail(); ++ ++ __ bind(&next); ++ } ++ // Pop Code offset from backtrack stack, add Code and jump to location. ++ Pop(a0); ++ __ Add_d(a0, a0, code_pointer()); ++ __ Jump(a0); ++} ++ ++void RegExpMacroAssemblerLA64::Bind(Label* label) { __ bind(label); } ++ ++void RegExpMacroAssemblerLA64::CheckCharacter(uint32_t c, Label* on_equal) { ++ BranchOrBacktrack(on_equal, eq, current_character(), Operand(c)); ++} ++ ++void RegExpMacroAssemblerLA64::CheckCharacterGT(uc16 limit, Label* on_greater) { ++ BranchOrBacktrack(on_greater, gt, current_character(), Operand(limit)); ++} ++ ++void RegExpMacroAssemblerLA64::CheckAtStart(int cp_offset, Label* on_at_start) { ++ __ Ld_d(a1, MemOperand(frame_pointer(), kStringStartMinusOne)); ++ __ Add_d(a0, current_input_offset(), ++ Operand(-char_size() + cp_offset * char_size())); ++ BranchOrBacktrack(on_at_start, eq, a0, Operand(a1)); ++} ++ ++void RegExpMacroAssemblerLA64::CheckNotAtStart(int cp_offset, ++ Label* on_not_at_start) { ++ __ Ld_d(a1, MemOperand(frame_pointer(), kStringStartMinusOne)); ++ __ Add_d(a0, current_input_offset(), ++ Operand(-char_size() + cp_offset * char_size())); ++ BranchOrBacktrack(on_not_at_start, ne, a0, Operand(a1)); ++} ++ ++void RegExpMacroAssemblerLA64::CheckCharacterLT(uc16 limit, Label* on_less) { ++ BranchOrBacktrack(on_less, lt, current_character(), Operand(limit)); ++} ++ ++void RegExpMacroAssemblerLA64::CheckGreedyLoop(Label* on_equal) { ++ Label backtrack_non_equal; ++ __ Ld_w(a0, MemOperand(backtrack_stackpointer(), 0)); ++ __ Branch(&backtrack_non_equal, ne, current_input_offset(), Operand(a0)); ++ __ Add_d(backtrack_stackpointer(), backtrack_stackpointer(), ++ Operand(kIntSize)); ++ __ bind(&backtrack_non_equal); ++ BranchOrBacktrack(on_equal, eq, current_input_offset(), Operand(a0)); ++} ++ ++void RegExpMacroAssemblerLA64::CheckNotBackReferenceIgnoreCase( ++ int start_reg, bool read_backward, Label* on_no_match) { ++ Label fallthrough; ++ __ Ld_d(a0, register_location(start_reg)); // Index of start of capture. ++ __ Ld_d(a1, register_location(start_reg + 1)); // Index of end of capture. ++ __ Sub_d(a1, a1, a0); // Length of capture. ++ ++ // At this point, the capture registers are either both set or both cleared. ++ // If the capture length is zero, then the capture is either empty or cleared. ++ // Fall through in both cases. ++ __ Branch(&fallthrough, eq, a1, Operand(zero_reg)); ++ ++ if (read_backward) { ++ __ Ld_d(t1, MemOperand(frame_pointer(), kStringStartMinusOne)); ++ __ Add_d(t1, t1, a1); ++ BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t1)); ++ } else { ++ __ Add_d(t1, a1, current_input_offset()); ++ // Check that there are enough characters left in the input. ++ BranchOrBacktrack(on_no_match, gt, t1, Operand(zero_reg)); ++ } ++ ++ if (mode_ == LATIN1) { ++ Label success; ++ Label fail; ++ Label loop_check; ++ ++ // a0 - offset of start of capture. ++ // a1 - length of capture. ++ __ Add_d(a0, a0, Operand(end_of_input_address())); ++ __ Add_d(a2, end_of_input_address(), Operand(current_input_offset())); ++ if (read_backward) { ++ __ Sub_d(a2, a2, Operand(a1)); ++ } ++ __ Add_d(a1, a0, Operand(a1)); ++ ++ // a0 - Address of start of capture. ++ // a1 - Address of end of capture. ++ // a2 - Address of current input position. ++ ++ Label loop; ++ __ bind(&loop); ++ __ Ld_bu(a3, MemOperand(a0, 0)); ++ __ addi_d(a0, a0, char_size()); ++ __ Ld_bu(a4, MemOperand(a2, 0)); ++ __ addi_d(a2, a2, char_size()); ++ ++ __ Branch(&loop_check, eq, a4, Operand(a3)); ++ ++ // Mismatch, try case-insensitive match (converting letters to lower-case). ++ __ Or(a3, a3, Operand(0x20)); // Convert capture character to lower-case. ++ __ Or(a4, a4, Operand(0x20)); // Also convert input character. ++ __ Branch(&fail, ne, a4, Operand(a3)); ++ __ Sub_d(a3, a3, Operand('a')); ++ __ Branch(&loop_check, ls, a3, Operand('z' - 'a')); ++ // Latin-1: Check for values in range [224,254] but not 247. ++ __ Sub_d(a3, a3, Operand(224 - 'a')); ++ // Weren't Latin-1 letters. ++ __ Branch(&fail, hi, a3, Operand(254 - 224)); ++ // Check for 247. ++ __ Branch(&fail, eq, a3, Operand(247 - 224)); ++ ++ __ bind(&loop_check); ++ __ Branch(&loop, lt, a0, Operand(a1)); ++ __ jmp(&success); ++ ++ __ bind(&fail); ++ GoTo(on_no_match); ++ ++ __ bind(&success); ++ // Compute new value of character position after the matched part. ++ __ Sub_d(current_input_offset(), a2, end_of_input_address()); ++ if (read_backward) { ++ __ Ld_d(t1, register_location(start_reg)); // Index of start of capture. ++ __ Ld_d(a2, ++ register_location(start_reg + 1)); // Index of end of capture. ++ __ Add_d(current_input_offset(), current_input_offset(), Operand(t1)); ++ __ Sub_d(current_input_offset(), current_input_offset(), Operand(a2)); ++ } ++ } else { ++ DCHECK(mode_ == UC16); ++ // Put regexp engine registers on stack. ++ RegList regexp_registers_to_retain = current_input_offset().bit() | ++ current_character().bit() | ++ backtrack_stackpointer().bit(); ++ __ MultiPush(regexp_registers_to_retain); ++ ++ int argument_count = 4; ++ __ PrepareCallCFunction(argument_count, a2); ++ ++ // a0 - offset of start of capture. ++ // a1 - length of capture. ++ ++ // Put arguments into arguments registers. ++ // Parameters are ++ // a0: Address byte_offset1 - Address captured substring's start. ++ // a1: Address byte_offset2 - Address of current character position. ++ // a2: size_t byte_length - length of capture in bytes(!). ++ // a3: Isolate* isolate. ++ ++ // Address of start of capture. ++ __ Add_d(a0, a0, Operand(end_of_input_address())); ++ // Length of capture. ++ __ mov(a2, a1); ++ // Save length in callee-save register for use on return. ++ __ mov(s3, a1); ++ // Address of current input position. ++ __ Add_d(a1, current_input_offset(), Operand(end_of_input_address())); ++ if (read_backward) { ++ __ Sub_d(a1, a1, Operand(s3)); ++ } ++ // Isolate. ++ __ li(a3, Operand(ExternalReference::isolate_address(masm_->isolate()))); ++ ++ { ++ AllowExternalCallThatCantCauseGC scope(masm_); ++ ExternalReference function = ++ ExternalReference::re_case_insensitive_compare_uc16(masm_->isolate()); ++ __ CallCFunction(function, argument_count); ++ } ++ ++ // Restore regexp engine registers. ++ __ MultiPop(regexp_registers_to_retain); ++ __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE); ++ __ Ld_d(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd)); ++ ++ // Check if function returned non-zero for success or zero for failure. ++ BranchOrBacktrack(on_no_match, eq, a0, Operand(zero_reg)); ++ // On success, increment position by length of capture. ++ if (read_backward) { ++ __ Sub_d(current_input_offset(), current_input_offset(), Operand(s3)); ++ } else { ++ __ Add_d(current_input_offset(), current_input_offset(), Operand(s3)); ++ } ++ } ++ ++ __ bind(&fallthrough); ++} ++ ++void RegExpMacroAssemblerLA64::CheckNotBackReference(int start_reg, ++ bool read_backward, ++ Label* on_no_match) { ++ Label fallthrough; ++ ++ // Find length of back-referenced capture. ++ __ Ld_d(a0, register_location(start_reg)); ++ __ Ld_d(a1, register_location(start_reg + 1)); ++ __ Sub_d(a1, a1, a0); // Length to check. ++ ++ // At this point, the capture registers are either both set or both cleared. ++ // If the capture length is zero, then the capture is either empty or cleared. ++ // Fall through in both cases. ++ __ Branch(&fallthrough, eq, a1, Operand(zero_reg)); ++ ++ if (read_backward) { ++ __ Ld_d(t1, MemOperand(frame_pointer(), kStringStartMinusOne)); ++ __ Add_d(t1, t1, a1); ++ BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t1)); ++ } else { ++ __ Add_d(t1, a1, current_input_offset()); ++ // Check that there are enough characters left in the input. ++ BranchOrBacktrack(on_no_match, gt, t1, Operand(zero_reg)); ++ } ++ ++ // Compute pointers to match string and capture string. ++ __ Add_d(a0, a0, Operand(end_of_input_address())); ++ __ Add_d(a2, end_of_input_address(), Operand(current_input_offset())); ++ if (read_backward) { ++ __ Sub_d(a2, a2, Operand(a1)); ++ } ++ __ Add_d(a1, a1, Operand(a0)); ++ ++ Label loop; ++ __ bind(&loop); ++ if (mode_ == LATIN1) { ++ __ Ld_bu(a3, MemOperand(a0, 0)); ++ __ addi_d(a0, a0, char_size()); ++ __ Ld_bu(a4, MemOperand(a2, 0)); ++ __ addi_d(a2, a2, char_size()); ++ } else { ++ DCHECK(mode_ == UC16); ++ __ Ld_hu(a3, MemOperand(a0, 0)); ++ __ addi_d(a0, a0, char_size()); ++ __ Ld_hu(a4, MemOperand(a2, 0)); ++ __ addi_d(a2, a2, char_size()); ++ } ++ BranchOrBacktrack(on_no_match, ne, a3, Operand(a4)); ++ __ Branch(&loop, lt, a0, Operand(a1)); ++ ++ // Move current character position to position after match. ++ __ Sub_d(current_input_offset(), a2, end_of_input_address()); ++ if (read_backward) { ++ __ Ld_d(t1, register_location(start_reg)); // Index of start of capture. ++ __ Ld_d(a2, register_location(start_reg + 1)); // Index of end of capture. ++ __ Add_d(current_input_offset(), current_input_offset(), Operand(t1)); ++ __ Sub_d(current_input_offset(), current_input_offset(), Operand(a2)); ++ } ++ __ bind(&fallthrough); ++} ++ ++void RegExpMacroAssemblerLA64::CheckNotCharacter(uint32_t c, ++ Label* on_not_equal) { ++ BranchOrBacktrack(on_not_equal, ne, current_character(), Operand(c)); ++} ++ ++void RegExpMacroAssemblerLA64::CheckCharacterAfterAnd(uint32_t c, uint32_t mask, ++ Label* on_equal) { ++ __ And(a0, current_character(), Operand(mask)); ++ Operand rhs = (c == 0) ? Operand(zero_reg) : Operand(c); ++ BranchOrBacktrack(on_equal, eq, a0, rhs); ++} ++ ++void RegExpMacroAssemblerLA64::CheckNotCharacterAfterAnd(uint32_t c, ++ uint32_t mask, ++ Label* on_not_equal) { ++ __ And(a0, current_character(), Operand(mask)); ++ Operand rhs = (c == 0) ? Operand(zero_reg) : Operand(c); ++ BranchOrBacktrack(on_not_equal, ne, a0, rhs); ++} ++ ++void RegExpMacroAssemblerLA64::CheckNotCharacterAfterMinusAnd( ++ uc16 c, uc16 minus, uc16 mask, Label* on_not_equal) { ++ DCHECK_GT(String::kMaxUtf16CodeUnit, minus); ++ __ Sub_d(a0, current_character(), Operand(minus)); ++ __ And(a0, a0, Operand(mask)); ++ BranchOrBacktrack(on_not_equal, ne, a0, Operand(c)); ++} ++ ++void RegExpMacroAssemblerLA64::CheckCharacterInRange(uc16 from, uc16 to, ++ Label* on_in_range) { ++ __ Sub_d(a0, current_character(), Operand(from)); ++ // Unsigned lower-or-same condition. ++ BranchOrBacktrack(on_in_range, ls, a0, Operand(to - from)); ++} ++ ++void RegExpMacroAssemblerLA64::CheckCharacterNotInRange( ++ uc16 from, uc16 to, Label* on_not_in_range) { ++ __ Sub_d(a0, current_character(), Operand(from)); ++ // Unsigned higher condition. ++ BranchOrBacktrack(on_not_in_range, hi, a0, Operand(to - from)); ++} ++ ++void RegExpMacroAssemblerLA64::CheckBitInTable(Handle table, ++ Label* on_bit_set) { ++ __ li(a0, Operand(table)); ++ if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) { ++ __ And(a1, current_character(), Operand(kTableSize - 1)); ++ __ Add_d(a0, a0, a1); ++ } else { ++ __ Add_d(a0, a0, current_character()); ++ } ++ ++ __ Ld_bu(a0, FieldMemOperand(a0, ByteArray::kHeaderSize)); ++ BranchOrBacktrack(on_bit_set, ne, a0, Operand(zero_reg)); ++} ++ ++bool RegExpMacroAssemblerLA64::CheckSpecialCharacterClass(uc16 type, ++ Label* on_no_match) { ++ // Range checks (c in min..max) are generally implemented by an unsigned ++ // (c - min) <= (max - min) check. ++ switch (type) { ++ case 's': ++ // Match space-characters. ++ if (mode_ == LATIN1) { ++ // One byte space characters are '\t'..'\r', ' ' and \u00a0. ++ Label success; ++ __ Branch(&success, eq, current_character(), Operand(' ')); ++ // Check range 0x09..0x0D. ++ __ Sub_d(a0, current_character(), Operand('\t')); ++ __ Branch(&success, ls, a0, Operand('\r' - '\t')); ++ // \u00a0 (NBSP). ++ BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00A0 - '\t')); ++ __ bind(&success); ++ return true; ++ } ++ return false; ++ case 'S': ++ // The emitted code for generic character classes is good enough. ++ return false; ++ case 'd': ++ // Match Latin1 digits ('0'..'9'). ++ __ Sub_d(a0, current_character(), Operand('0')); ++ BranchOrBacktrack(on_no_match, hi, a0, Operand('9' - '0')); ++ return true; ++ case 'D': ++ // Match non Latin1-digits. ++ __ Sub_d(a0, current_character(), Operand('0')); ++ BranchOrBacktrack(on_no_match, ls, a0, Operand('9' - '0')); ++ return true; ++ case '.': { ++ // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029). ++ __ Xor(a0, current_character(), Operand(0x01)); ++ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C. ++ __ Sub_d(a0, a0, Operand(0x0B)); ++ BranchOrBacktrack(on_no_match, ls, a0, Operand(0x0C - 0x0B)); ++ if (mode_ == UC16) { ++ // Compare original value to 0x2028 and 0x2029, using the already ++ // computed (current_char ^ 0x01 - 0x0B). I.e., check for ++ // 0x201D (0x2028 - 0x0B) or 0x201E. ++ __ Sub_d(a0, a0, Operand(0x2028 - 0x0B)); ++ BranchOrBacktrack(on_no_match, ls, a0, Operand(1)); ++ } ++ return true; ++ } ++ case 'n': { ++ // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029). ++ __ Xor(a0, current_character(), Operand(0x01)); ++ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C. ++ __ Sub_d(a0, a0, Operand(0x0B)); ++ if (mode_ == LATIN1) { ++ BranchOrBacktrack(on_no_match, hi, a0, Operand(0x0C - 0x0B)); ++ } else { ++ Label done; ++ BranchOrBacktrack(&done, ls, a0, Operand(0x0C - 0x0B)); ++ // Compare original value to 0x2028 and 0x2029, using the already ++ // computed (current_char ^ 0x01 - 0x0B). I.e., check for ++ // 0x201D (0x2028 - 0x0B) or 0x201E. ++ __ Sub_d(a0, a0, Operand(0x2028 - 0x0B)); ++ BranchOrBacktrack(on_no_match, hi, a0, Operand(1)); ++ __ bind(&done); ++ } ++ return true; ++ } ++ case 'w': { ++ if (mode_ != LATIN1) { ++ // Table is 256 entries, so all Latin1 characters can be tested. ++ BranchOrBacktrack(on_no_match, hi, current_character(), Operand('z')); ++ } ++ ExternalReference map = ++ ExternalReference::re_word_character_map(isolate()); ++ __ li(a0, Operand(map)); ++ __ Add_d(a0, a0, current_character()); ++ __ Ld_bu(a0, MemOperand(a0, 0)); ++ BranchOrBacktrack(on_no_match, eq, a0, Operand(zero_reg)); ++ return true; ++ } ++ case 'W': { ++ Label done; ++ if (mode_ != LATIN1) { ++ // Table is 256 entries, so all Latin1 characters can be tested. ++ __ Branch(&done, hi, current_character(), Operand('z')); ++ } ++ ExternalReference map = ++ ExternalReference::re_word_character_map(isolate()); ++ __ li(a0, Operand(map)); ++ __ Add_d(a0, a0, current_character()); ++ __ Ld_bu(a0, MemOperand(a0, 0)); ++ BranchOrBacktrack(on_no_match, ne, a0, Operand(zero_reg)); ++ if (mode_ != LATIN1) { ++ __ bind(&done); ++ } ++ return true; ++ } ++ case '*': ++ // Match any character. ++ return true; ++ // No custom implementation (yet): s(UC16), S(UC16). ++ default: ++ return false; ++ } ++} ++ ++void RegExpMacroAssemblerLA64::Fail() { ++ __ li(a0, Operand(FAILURE)); ++ __ jmp(&exit_label_); ++} ++ ++Handle RegExpMacroAssemblerLA64::GetCode(Handle source) { ++ Label return_v0; ++ if (0 /* todo masm_->has_exception()*/) { ++ // If the code gets corrupted due to long regular expressions and lack of ++ // space on trampolines, an internal exception flag is set. If this case ++ // is detected, we will jump into exit sequence right away. ++ //__ bind_to(&entry_label_, internal_failure_label_.pos()); ++ } else { ++ // Finalize code - write the entry point code now we know how many ++ // registers we need. ++ ++ // Entry code: ++ __ bind(&entry_label_); ++ ++ // Tell the system that we have a stack frame. Because the type is MANUAL, ++ // no is generated. ++ FrameScope scope(masm_, StackFrame::MANUAL); ++ ++ // Actually emit code to start a new stack frame. ++ // Push arguments ++ // Save callee-save registers. ++ // Start new stack frame. ++ // Store link register in existing stack-cell. ++ // Order here should correspond to order of offset constants in header file. ++ // TODO(plind): we save s0..s7, but ONLY use s3 here - use the regs ++ // or dont save. ++ RegList registers_to_retain = s0.bit() | s1.bit() | s2.bit() | s3.bit() | ++ s4.bit() | s5.bit() | s6.bit() | s7.bit(); ++ RegList argument_registers = a0.bit() | a1.bit() | a2.bit() | a3.bit(); ++ ++ argument_registers |= a4.bit() | a5.bit() | a6.bit() | a7.bit(); ++ ++ __ MultiPush(ra.bit(), fp.bit(), argument_registers | registers_to_retain); ++ // Set frame pointer in space for it if this is not a direct call ++ // from generated code. ++ // TODO(plind): this 8 is the # of argument regs, should have definition. ++ __ Add_d(frame_pointer(), sp, Operand(8 * kPointerSize)); ++ STATIC_ASSERT(kSuccessfulCaptures == kInputString - kSystemPointerSize); ++ __ mov(a0, zero_reg); ++ __ push(a0); // Make room for success counter and initialize it to 0. ++ STATIC_ASSERT(kStringStartMinusOne == ++ kSuccessfulCaptures - kSystemPointerSize); ++ __ push(a0); // Make room for "string start - 1" constant. ++ STATIC_ASSERT(kBacktrackCount == kStringStartMinusOne - kSystemPointerSize); ++ __ push(a0); // The backtrack counter ++ ++ // Check if we have space on the stack for registers. ++ Label stack_limit_hit; ++ Label stack_ok; ++ ++ ExternalReference stack_limit = ++ ExternalReference::address_of_jslimit(masm_->isolate()); ++ __ li(a0, Operand(stack_limit)); ++ __ Ld_d(a0, MemOperand(a0, 0)); ++ __ Sub_d(a0, sp, a0); ++ // Handle it if the stack pointer is already below the stack limit. ++ __ Branch(&stack_limit_hit, le, a0, Operand(zero_reg)); ++ // Check if there is room for the variable number of registers above ++ // the stack limit. ++ __ Branch(&stack_ok, hs, a0, Operand(num_registers_ * kPointerSize)); ++ // Exit with OutOfMemory exception. There is not enough space on the stack ++ // for our working registers. ++ __ li(a0, Operand(EXCEPTION)); ++ __ jmp(&return_v0); ++ ++ __ bind(&stack_limit_hit); ++ CallCheckStackGuardState(a0); ++ // If returned value is non-zero, we exit with the returned value as result. ++ __ Branch(&return_v0, ne, a0, Operand(zero_reg)); ++ ++ __ bind(&stack_ok); ++ // Allocate space on stack for registers. ++ __ Sub_d(sp, sp, Operand(num_registers_ * kPointerSize)); ++ // Load string end. ++ __ Ld_d(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd)); ++ // Load input start. ++ __ Ld_d(a0, MemOperand(frame_pointer(), kInputStart)); ++ // Find negative length (offset of start relative to end). ++ __ Sub_d(current_input_offset(), a0, end_of_input_address()); ++ // Set a0 to address of char before start of the input string ++ // (effectively string position -1). ++ __ Ld_d(a1, MemOperand(frame_pointer(), kStartIndex)); ++ __ Sub_d(a0, current_input_offset(), Operand(char_size())); ++ __ slli_d(t1, a1, (mode_ == UC16) ? 1 : 0); ++ __ Sub_d(a0, a0, t1); ++ // Store this value in a local variable, for use when clearing ++ // position registers. ++ __ St_d(a0, MemOperand(frame_pointer(), kStringStartMinusOne)); ++ ++ // Initialize code pointer register ++ __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE); ++ ++ Label load_char_start_regexp, start_regexp; ++ // Load newline if index is at start, previous character otherwise. ++ __ Branch(&load_char_start_regexp, ne, a1, Operand(zero_reg)); ++ __ li(current_character(), Operand('\n')); ++ __ jmp(&start_regexp); ++ ++ // Global regexp restarts matching here. ++ __ bind(&load_char_start_regexp); ++ // Load previous char as initial value of current character register. ++ LoadCurrentCharacterUnchecked(-1, 1); ++ __ bind(&start_regexp); ++ ++ // Initialize on-stack registers. ++ if (num_saved_registers_ > 0) { // Always is, if generated from a regexp. ++ // Fill saved registers with initial value = start offset - 1. ++ if (num_saved_registers_ > 8) { ++ // Address of register 0. ++ __ Add_d(a1, frame_pointer(), Operand(kRegisterZero)); ++ __ li(a2, Operand(num_saved_registers_)); ++ Label init_loop; ++ __ bind(&init_loop); ++ __ St_d(a0, MemOperand(a1, 0)); ++ __ Add_d(a1, a1, Operand(-kPointerSize)); ++ __ Sub_d(a2, a2, Operand(1)); ++ __ Branch(&init_loop, ne, a2, Operand(zero_reg)); ++ } else { ++ for (int i = 0; i < num_saved_registers_; i++) { ++ __ St_d(a0, register_location(i)); ++ } ++ } ++ } ++ ++ // Initialize backtrack stack pointer. ++ __ Ld_d(backtrack_stackpointer(), ++ MemOperand(frame_pointer(), kStackHighEnd)); ++ ++ __ jmp(&start_label_); ++ ++ // Exit code: ++ if (success_label_.is_linked()) { ++ // Save captures when successful. ++ __ bind(&success_label_); ++ if (num_saved_registers_ > 0) { ++ // Copy captures to output. ++ __ Ld_d(a1, MemOperand(frame_pointer(), kInputStart)); ++ __ Ld_d(a0, MemOperand(frame_pointer(), kRegisterOutput)); ++ __ Ld_d(a2, MemOperand(frame_pointer(), kStartIndex)); ++ __ Sub_d(a1, end_of_input_address(), a1); ++ // a1 is length of input in bytes. ++ if (mode_ == UC16) { ++ __ srli_d(a1, a1, 1); ++ } ++ // a1 is length of input in characters. ++ __ Add_d(a1, a1, Operand(a2)); ++ // a1 is length of string in characters. ++ ++ DCHECK_EQ(0, num_saved_registers_ % 2); ++ // Always an even number of capture registers. This allows us to ++ // unroll the loop once to add an operation between a load of a register ++ // and the following use of that register. ++ for (int i = 0; i < num_saved_registers_; i += 2) { ++ __ Ld_d(a2, register_location(i)); ++ __ Ld_d(a3, register_location(i + 1)); ++ if (i == 0 && global_with_zero_length_check()) { ++ // Keep capture start in a4 for the zero-length check later. ++ __ mov(t3, a2); ++ } ++ if (mode_ == UC16) { ++ __ srai_d(a2, a2, 1); ++ __ Add_d(a2, a2, a1); ++ __ srai_d(a3, a3, 1); ++ __ Add_d(a3, a3, a1); ++ } else { ++ __ Add_d(a2, a1, Operand(a2)); ++ __ Add_d(a3, a1, Operand(a3)); ++ } ++ // V8 expects the output to be an int32_t array. ++ __ St_w(a2, MemOperand(a0, 0)); ++ __ Add_d(a0, a0, kIntSize); ++ __ St_w(a3, MemOperand(a0, 0)); ++ __ Add_d(a0, a0, kIntSize); ++ } ++ } ++ ++ if (global()) { ++ // Restart matching if the regular expression is flagged as global. ++ __ Ld_d(a0, MemOperand(frame_pointer(), kSuccessfulCaptures)); ++ __ Ld_d(a1, MemOperand(frame_pointer(), kNumOutputRegisters)); ++ __ Ld_d(a2, MemOperand(frame_pointer(), kRegisterOutput)); ++ // Increment success counter. ++ __ Add_d(a0, a0, 1); ++ __ St_d(a0, MemOperand(frame_pointer(), kSuccessfulCaptures)); ++ // Capture results have been stored, so the number of remaining global ++ // output registers is reduced by the number of stored captures. ++ __ Sub_d(a1, a1, num_saved_registers_); ++ // Check whether we have enough room for another set of capture results. ++ //__ mov(v0, a0); ++ __ Branch(&return_v0, lt, a1, Operand(num_saved_registers_)); ++ ++ __ St_d(a1, MemOperand(frame_pointer(), kNumOutputRegisters)); ++ // Advance the location for output. ++ __ Add_d(a2, a2, num_saved_registers_ * kIntSize); ++ __ St_d(a2, MemOperand(frame_pointer(), kRegisterOutput)); ++ ++ // Prepare a0 to initialize registers with its value in the next run. ++ __ Ld_d(a0, MemOperand(frame_pointer(), kStringStartMinusOne)); ++ ++ if (global_with_zero_length_check()) { ++ // Special case for zero-length matches. ++ // t3: capture start index ++ // Not a zero-length match, restart. ++ __ Branch(&load_char_start_regexp, ne, current_input_offset(), ++ Operand(t3)); ++ // Offset from the end is zero if we already reached the end. ++ __ Branch(&exit_label_, eq, current_input_offset(), ++ Operand(zero_reg)); ++ // Advance current position after a zero-length match. ++ Label advance; ++ __ bind(&advance); ++ __ Add_d(current_input_offset(), current_input_offset(), ++ Operand((mode_ == UC16) ? 2 : 1)); ++ if (global_unicode()) CheckNotInSurrogatePair(0, &advance); ++ } ++ ++ __ Branch(&load_char_start_regexp); ++ } else { ++ __ li(a0, Operand(SUCCESS)); ++ } ++ } ++ // Exit and return v0. ++ __ bind(&exit_label_); ++ if (global()) { ++ __ Ld_d(a0, MemOperand(frame_pointer(), kSuccessfulCaptures)); ++ } ++ ++ __ bind(&return_v0); ++ // Skip sp past regexp registers and local variables.. ++ __ mov(sp, frame_pointer()); ++ // Restore registers s0..s7 and return (restoring ra to pc). ++ __ MultiPop(ra.bit(), fp.bit(), registers_to_retain); ++ __ Ret(); ++ ++ // Backtrack code (branch target for conditional backtracks). ++ if (backtrack_label_.is_linked()) { ++ __ bind(&backtrack_label_); ++ Backtrack(); ++ } ++ ++ Label exit_with_exception; ++ ++ // Preempt-code. ++ if (check_preempt_label_.is_linked()) { ++ SafeCallTarget(&check_preempt_label_); ++ // Put regexp engine registers on stack. ++ RegList regexp_registers_to_retain = current_input_offset().bit() | ++ current_character().bit() | ++ backtrack_stackpointer().bit(); ++ __ MultiPush(regexp_registers_to_retain); ++ CallCheckStackGuardState(a0); ++ __ MultiPop(regexp_registers_to_retain); ++ // If returning non-zero, we should end execution with the given ++ // result as return value. ++ __ Branch(&return_v0, ne, a0, Operand(zero_reg)); ++ ++ // String might have moved: Reload end of string from frame. ++ __ Ld_d(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd)); ++ __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE); ++ SafeReturn(); ++ } ++ ++ // Backtrack stack overflow code. ++ if (stack_overflow_label_.is_linked()) { ++ SafeCallTarget(&stack_overflow_label_); ++ // Reached if the backtrack-stack limit has been hit. ++ // Put regexp engine registers on stack first. ++ RegList regexp_registers = ++ current_input_offset().bit() | current_character().bit(); ++ __ MultiPush(regexp_registers); ++ ++ // Call GrowStack(backtrack_stackpointer(), &stack_base) ++ static const int num_arguments = 3; ++ __ PrepareCallCFunction(num_arguments, a0); ++ __ mov(a0, backtrack_stackpointer()); ++ __ Add_d(a1, frame_pointer(), Operand(kStackHighEnd)); ++ __ li(a2, Operand(ExternalReference::isolate_address(masm_->isolate()))); ++ ExternalReference grow_stack = ++ ExternalReference::re_grow_stack(masm_->isolate()); ++ __ CallCFunction(grow_stack, num_arguments); ++ // Restore regexp registers. ++ __ MultiPop(regexp_registers); ++ // If return nullptr, we have failed to grow the stack, and ++ // must exit with a stack-overflow exception. ++ __ Branch(&exit_with_exception, eq, a0, Operand(zero_reg)); ++ // Otherwise use return value as new stack pointer. ++ __ mov(backtrack_stackpointer(), a0); ++ // Restore saved registers and continue. ++ __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE); ++ __ Ld_d(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd)); ++ SafeReturn(); ++ } ++ ++ if (exit_with_exception.is_linked()) { ++ // If any of the code above needed to exit with an exception. ++ __ bind(&exit_with_exception); ++ // Exit with Result EXCEPTION(-1) to signal thrown exception. ++ __ li(a0, Operand(EXCEPTION)); ++ __ jmp(&return_v0); ++ } ++ } ++ ++ CodeDesc code_desc; ++ masm_->GetCode(isolate(), &code_desc); ++ Handle code = Factory::CodeBuilder(isolate(), code_desc, Code::REGEXP) ++ .set_self_reference(masm_->CodeObject()) ++ .Build(); ++ LOG(masm_->isolate(), ++ RegExpCodeCreateEvent(Handle::cast(code), source)); ++ return Handle::cast(code); ++} ++ ++void RegExpMacroAssemblerLA64::GoTo(Label* to) { ++ if (to == nullptr) { ++ Backtrack(); ++ return; ++ } ++ __ jmp(to); ++ return; ++} ++ ++void RegExpMacroAssemblerLA64::IfRegisterGE(int reg, int comparand, ++ Label* if_ge) { ++ __ Ld_d(a0, register_location(reg)); ++ BranchOrBacktrack(if_ge, ge, a0, Operand(comparand)); ++} ++ ++void RegExpMacroAssemblerLA64::IfRegisterLT(int reg, int comparand, ++ Label* if_lt) { ++ __ Ld_d(a0, register_location(reg)); ++ BranchOrBacktrack(if_lt, lt, a0, Operand(comparand)); ++} ++ ++void RegExpMacroAssemblerLA64::IfRegisterEqPos(int reg, Label* if_eq) { ++ __ Ld_d(a0, register_location(reg)); ++ BranchOrBacktrack(if_eq, eq, a0, Operand(current_input_offset())); ++} ++ ++RegExpMacroAssembler::IrregexpImplementation ++RegExpMacroAssemblerLA64::Implementation() { ++ return kLA64Implementation; ++} ++ ++void RegExpMacroAssemblerLA64::LoadCurrentCharacterImpl(int cp_offset, ++ Label* on_end_of_input, ++ bool check_bounds, ++ int characters, ++ int eats_at_least) { ++ // It's possible to preload a small number of characters when each success ++ // path requires a large number of characters, but not the reverse. ++ DCHECK_GE(eats_at_least, characters); ++ ++ DCHECK(cp_offset < (1 << 30)); // Be sane! (And ensure negation works). ++ if (check_bounds) { ++ if (cp_offset >= 0) { ++ CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input); ++ } else { ++ CheckPosition(cp_offset, on_end_of_input); ++ } ++ } ++ LoadCurrentCharacterUnchecked(cp_offset, characters); ++} ++ ++void RegExpMacroAssemblerLA64::PopCurrentPosition() { ++ Pop(current_input_offset()); ++} ++ ++void RegExpMacroAssemblerLA64::PopRegister(int register_index) { ++ Pop(a0); ++ __ St_d(a0, register_location(register_index)); ++} ++ ++void RegExpMacroAssemblerLA64::PushBacktrack(Label* label) { ++ if (label->is_bound()) { ++ int target = label->pos(); ++ __ li(a0, Operand(target + Code::kHeaderSize - kHeapObjectTag)); ++ } else { ++ // TODO: Optimize like arm64 without ld_wu? ++ Assembler::BlockTrampolinePoolScope block_trampoline_pool(masm_); ++ Label after_constant; ++ __ Branch(&after_constant); ++ int offset = masm_->pc_offset(); ++ int cp_offset = offset + Code::kHeaderSize - kHeapObjectTag; ++ //__ emit(0); ++ __ nop(); ++ masm_->label_at_put(label, offset); ++ __ bind(&after_constant); ++ if (is_int12(cp_offset)) { ++ __ Ld_wu(a0, MemOperand(code_pointer(), cp_offset)); ++ } else { ++ __ Add_d(a0, code_pointer(), cp_offset); ++ __ Ld_wu(a0, MemOperand(a0, 0)); ++ } ++ } ++ Push(a0); ++ CheckStackLimit(); ++} ++ ++void RegExpMacroAssemblerLA64::PushCurrentPosition() { ++ Push(current_input_offset()); ++} ++ ++void RegExpMacroAssemblerLA64::PushRegister(int register_index, ++ StackCheckFlag check_stack_limit) { ++ __ Ld_d(a0, register_location(register_index)); ++ Push(a0); ++ if (check_stack_limit) CheckStackLimit(); ++} ++ ++void RegExpMacroAssemblerLA64::ReadCurrentPositionFromRegister(int reg) { ++ __ Ld_d(current_input_offset(), register_location(reg)); ++} ++ ++void RegExpMacroAssemblerLA64::ReadStackPointerFromRegister(int reg) { ++ __ Ld_d(backtrack_stackpointer(), register_location(reg)); ++ __ Ld_d(a0, MemOperand(frame_pointer(), kStackHighEnd)); ++ __ Add_d(backtrack_stackpointer(), backtrack_stackpointer(), Operand(a0)); ++} ++ ++void RegExpMacroAssemblerLA64::SetCurrentPositionFromEnd(int by) { ++ Label after_position; ++ __ Branch(&after_position, ge, current_input_offset(), ++ Operand(-by * char_size())); ++ __ li(current_input_offset(), -by * char_size()); ++ // On RegExp code entry (where this operation is used), the character before ++ // the current position is expected to be already loaded. ++ // We have advanced the position, so it's safe to read backwards. ++ LoadCurrentCharacterUnchecked(-1, 1); ++ __ bind(&after_position); ++} ++ ++void RegExpMacroAssemblerLA64::SetRegister(int register_index, int to) { ++ DCHECK(register_index >= num_saved_registers_); // Reserved for positions! ++ __ li(a0, Operand(to)); ++ __ St_d(a0, register_location(register_index)); ++} ++ ++bool RegExpMacroAssemblerLA64::Succeed() { ++ __ jmp(&success_label_); ++ return global(); ++} ++ ++void RegExpMacroAssemblerLA64::WriteCurrentPositionToRegister(int reg, ++ int cp_offset) { ++ if (cp_offset == 0) { ++ __ St_d(current_input_offset(), register_location(reg)); ++ } else { ++ __ Add_d(a0, current_input_offset(), Operand(cp_offset * char_size())); ++ __ St_d(a0, register_location(reg)); ++ } ++} ++ ++void RegExpMacroAssemblerLA64::ClearRegisters(int reg_from, int reg_to) { ++ DCHECK(reg_from <= reg_to); ++ __ Ld_d(a0, MemOperand(frame_pointer(), kStringStartMinusOne)); ++ for (int reg = reg_from; reg <= reg_to; reg++) { ++ __ St_d(a0, register_location(reg)); ++ } ++} ++ ++void RegExpMacroAssemblerLA64::WriteStackPointerToRegister(int reg) { ++ __ Ld_d(a1, MemOperand(frame_pointer(), kStackHighEnd)); ++ __ Sub_d(a0, backtrack_stackpointer(), a1); ++ __ St_d(a0, register_location(reg)); ++} ++ ++bool RegExpMacroAssemblerLA64::CanReadUnaligned() { return false; } ++ ++// Private methods: ++ ++void RegExpMacroAssemblerLA64::CallCheckStackGuardState(Register scratch) { ++ DCHECK(!isolate()->IsGeneratingEmbeddedBuiltins()); ++ DCHECK(!masm_->options().isolate_independent_code); ++ ++ int stack_alignment = base::OS::ActivationFrameAlignment(); ++ ++ // Align the stack pointer and save the original sp value on the stack. ++ __ mov(scratch, sp); ++ __ Sub_d(sp, sp, Operand(kPointerSize)); ++ DCHECK(base::bits::IsPowerOfTwo(stack_alignment)); ++ __ And(sp, sp, Operand(-stack_alignment)); ++ __ St_d(scratch, MemOperand(sp, 0)); ++ ++ __ mov(a2, frame_pointer()); ++ // Code of self. ++ __ li(a1, Operand(masm_->CodeObject()), CONSTANT_SIZE); ++ ++ // We need to make room for the return address on the stack. ++ DCHECK(IsAligned(stack_alignment, kPointerSize)); ++ __ Sub_d(sp, sp, Operand(stack_alignment)); ++ ++ // The stack pointer now points to cell where the return address will be ++ // written. Arguments are in registers, meaning we treat the return address as ++ // argument 5. Since DirectCEntry will handle allocating space for the C ++ // argument slots, we don't need to care about that here. This is how the ++ // stack will look (sp meaning the value of sp at this moment): ++ // [sp + 3] - empty slot if needed for alignment. ++ // [sp + 2] - saved sp. ++ // [sp + 1] - second word reserved for return value. ++ // [sp + 0] - first word reserved for return value. ++ ++ // a0 will point to the return address, placed by DirectCEntry. ++ __ mov(a0, sp); ++ ++ ExternalReference stack_guard_check = ++ ExternalReference::re_check_stack_guard_state(masm_->isolate()); ++ __ li(t7, Operand(stack_guard_check)); ++ ++ EmbeddedData d = EmbeddedData::FromBlob(); ++ CHECK(Builtins::IsIsolateIndependent(Builtins::kDirectCEntry)); ++ Address entry = d.InstructionStartOfBuiltin(Builtins::kDirectCEntry); ++ __ li(kScratchReg, Operand(entry, RelocInfo::OFF_HEAP_TARGET)); ++ __ Call(kScratchReg); ++ ++ // DirectCEntry allocated space for the C argument slots so we have to ++ // drop them with the return address from the stack with loading saved sp. ++ // At this point stack must look: ++ // [sp + 7] - empty slot if needed for alignment. ++ // [sp + 6] - saved sp. ++ // [sp + 5] - second word reserved for return value. ++ // [sp + 4] - first word reserved for return value. ++ // [sp + 3] - C argument slot. ++ // [sp + 2] - C argument slot. ++ // [sp + 1] - C argument slot. ++ // [sp + 0] - C argument slot. ++ __ Ld_d(sp, MemOperand(sp, stack_alignment + kCArgsSlotsSize)); ++ ++ __ li(code_pointer(), Operand(masm_->CodeObject())); ++} ++ ++// Helper function for reading a value out of a stack frame. ++template ++static T& frame_entry(Address re_frame, int frame_offset) { ++ return reinterpret_cast(Memory(re_frame + frame_offset)); ++} ++ ++template ++static T* frame_entry_address(Address re_frame, int frame_offset) { ++ return reinterpret_cast(re_frame + frame_offset); ++} ++ ++int64_t RegExpMacroAssemblerLA64::CheckStackGuardState(Address* return_address, ++ Address raw_code, ++ Address re_frame) { ++ Code re_code = Code::cast(Object(raw_code)); ++ return NativeRegExpMacroAssembler::CheckStackGuardState( ++ frame_entry(re_frame, kIsolate), ++ static_cast(frame_entry(re_frame, kStartIndex)), ++ static_cast( ++ frame_entry(re_frame, kDirectCall)), ++ return_address, re_code, ++ frame_entry_address
(re_frame, kInputString), ++ frame_entry_address(re_frame, kInputStart), ++ frame_entry_address(re_frame, kInputEnd)); ++} ++ ++MemOperand RegExpMacroAssemblerLA64::register_location(int register_index) { ++ DCHECK(register_index < (1 << 30)); ++ if (num_registers_ <= register_index) { ++ num_registers_ = register_index + 1; ++ } ++ return MemOperand(frame_pointer(), ++ kRegisterZero - register_index * kPointerSize); ++} ++ ++void RegExpMacroAssemblerLA64::CheckPosition(int cp_offset, ++ Label* on_outside_input) { ++ if (cp_offset >= 0) { ++ BranchOrBacktrack(on_outside_input, ge, current_input_offset(), ++ Operand(-cp_offset * char_size())); ++ } else { ++ __ Ld_d(a1, MemOperand(frame_pointer(), kStringStartMinusOne)); ++ __ Add_d(a0, current_input_offset(), Operand(cp_offset * char_size())); ++ BranchOrBacktrack(on_outside_input, le, a0, Operand(a1)); ++ } ++} ++ ++void RegExpMacroAssemblerLA64::BranchOrBacktrack(Label* to, Condition condition, ++ Register rs, ++ const Operand& rt) { ++ if (condition == al) { // Unconditional. ++ if (to == nullptr) { ++ Backtrack(); ++ return; ++ } ++ __ jmp(to); ++ return; ++ } ++ if (to == nullptr) { ++ __ Branch(&backtrack_label_, condition, rs, rt); ++ return; ++ } ++ __ Branch(to, condition, rs, rt); ++} ++ ++void RegExpMacroAssemblerLA64::SafeCall(Label* to, Condition cond, Register rs, ++ const Operand& rt) { ++ __ Branch(to, cond, rs, rt, true); ++} ++ ++void RegExpMacroAssemblerLA64::SafeReturn() { ++ __ pop(ra); ++ __ Add_d(t1, ra, Operand(masm_->CodeObject())); ++ __ Jump(t1); ++} ++ ++void RegExpMacroAssemblerLA64::SafeCallTarget(Label* name) { ++ __ bind(name); ++ __ Sub_d(ra, ra, Operand(masm_->CodeObject())); ++ __ push(ra); ++} ++ ++void RegExpMacroAssemblerLA64::Push(Register source) { ++ DCHECK(source != backtrack_stackpointer()); ++ __ Add_d(backtrack_stackpointer(), backtrack_stackpointer(), ++ Operand(-kIntSize)); ++ __ St_w(source, MemOperand(backtrack_stackpointer(), 0)); ++} ++ ++void RegExpMacroAssemblerLA64::Pop(Register target) { ++ DCHECK(target != backtrack_stackpointer()); ++ __ Ld_w(target, MemOperand(backtrack_stackpointer(), 0)); ++ __ Add_d(backtrack_stackpointer(), backtrack_stackpointer(), kIntSize); ++} ++ ++void RegExpMacroAssemblerLA64::CheckPreemption() { ++ // Check for preemption. ++ ExternalReference stack_limit = ++ ExternalReference::address_of_jslimit(masm_->isolate()); ++ __ li(a0, Operand(stack_limit)); ++ __ Ld_d(a0, MemOperand(a0, 0)); ++ SafeCall(&check_preempt_label_, ls, sp, Operand(a0)); ++} ++ ++void RegExpMacroAssemblerLA64::CheckStackLimit() { ++ ExternalReference stack_limit = ++ ExternalReference::address_of_regexp_stack_limit_address( ++ masm_->isolate()); ++ ++ __ li(a0, Operand(stack_limit)); ++ __ Ld_d(a0, MemOperand(a0, 0)); ++ SafeCall(&stack_overflow_label_, ls, backtrack_stackpointer(), Operand(a0)); ++} ++ ++void RegExpMacroAssemblerLA64::LoadCurrentCharacterUnchecked(int cp_offset, ++ int characters) { ++ Register offset = current_input_offset(); ++ if (cp_offset != 0) { ++ // t3 is not being used to store the capture start index at this point. ++ __ Add_d(t3, current_input_offset(), Operand(cp_offset * char_size())); ++ offset = t3; ++ } ++ // We assume that we cannot do unaligned loads on LA64, so this function ++ // must only be used to load a single character at a time. ++ DCHECK_EQ(1, characters); ++ __ Add_d(t1, end_of_input_address(), Operand(offset)); ++ if (mode_ == LATIN1) { ++ __ Ld_bu(current_character(), MemOperand(t1, 0)); ++ } else { ++ DCHECK(mode_ == UC16); ++ __ Ld_hu(current_character(), MemOperand(t1, 0)); ++ } ++} ++ ++#undef __ ++ ++} // namespace internal ++} // namespace v8 ++ ++#endif // V8_TARGET_ARCH_LA64 +diff --git a/src/3rdparty/chromium/v8/src/regexp/la64/regexp-macro-assembler-la64.h b/src/3rdparty/chromium/v8/src/regexp/la64/regexp-macro-assembler-la64.h +new file mode 100644 +index 00000000000..5ebf37807cb +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/regexp/la64/regexp-macro-assembler-la64.h +@@ -0,0 +1,216 @@ ++// Copyright 2011 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#ifndef V8_REGEXP_LA64_REGEXP_MACRO_ASSEMBLER_LA64_H_ ++#define V8_REGEXP_LA64_REGEXP_MACRO_ASSEMBLER_LA64_H_ ++ ++#include "src/codegen/la64/assembler-la64.h" ++#include "src/codegen/macro-assembler.h" ++#include "src/regexp/regexp-macro-assembler.h" ++ ++namespace v8 { ++namespace internal { ++ ++class V8_EXPORT_PRIVATE RegExpMacroAssemblerLA64 ++ : public NativeRegExpMacroAssembler { ++ public: ++ RegExpMacroAssemblerLA64(Isolate* isolate, Zone* zone, Mode mode, ++ int registers_to_save); ++ virtual ~RegExpMacroAssemblerLA64(); ++ virtual int stack_limit_slack(); ++ virtual void AdvanceCurrentPosition(int by); ++ virtual void AdvanceRegister(int reg, int by); ++ virtual void Backtrack(); ++ virtual void Bind(Label* label); ++ virtual void CheckAtStart(int cp_offset, Label* on_at_start); ++ virtual void CheckCharacter(uint32_t c, Label* on_equal); ++ virtual void CheckCharacterAfterAnd(uint32_t c, uint32_t mask, ++ Label* on_equal); ++ virtual void CheckCharacterGT(uc16 limit, Label* on_greater); ++ virtual void CheckCharacterLT(uc16 limit, Label* on_less); ++ // A "greedy loop" is a loop that is both greedy and with a simple ++ // body. It has a particularly simple implementation. ++ virtual void CheckGreedyLoop(Label* on_tos_equals_current_position); ++ virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start); ++ virtual void CheckNotBackReference(int start_reg, bool read_backward, ++ Label* on_no_match); ++ virtual void CheckNotBackReferenceIgnoreCase(int start_reg, ++ bool read_backward, ++ Label* on_no_match); ++ virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal); ++ virtual void CheckNotCharacterAfterAnd(uint32_t c, uint32_t mask, ++ Label* on_not_equal); ++ virtual void CheckNotCharacterAfterMinusAnd(uc16 c, uc16 minus, uc16 mask, ++ Label* on_not_equal); ++ virtual void CheckCharacterInRange(uc16 from, uc16 to, Label* on_in_range); ++ virtual void CheckCharacterNotInRange(uc16 from, uc16 to, ++ Label* on_not_in_range); ++ virtual void CheckBitInTable(Handle table, Label* on_bit_set); ++ ++ // Checks whether the given offset from the current position is before ++ // the end of the string. ++ virtual void CheckPosition(int cp_offset, Label* on_outside_input); ++ virtual bool CheckSpecialCharacterClass(uc16 type, Label* on_no_match); ++ virtual void Fail(); ++ virtual Handle GetCode(Handle source); ++ virtual void GoTo(Label* label); ++ virtual void IfRegisterGE(int reg, int comparand, Label* if_ge); ++ virtual void IfRegisterLT(int reg, int comparand, Label* if_lt); ++ virtual void IfRegisterEqPos(int reg, Label* if_eq); ++ virtual IrregexpImplementation Implementation(); ++ virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input, ++ bool check_bounds, int characters, ++ int eats_at_least); ++ virtual void PopCurrentPosition(); ++ virtual void PopRegister(int register_index); ++ virtual void PushBacktrack(Label* label); ++ virtual void PushCurrentPosition(); ++ virtual void PushRegister(int register_index, ++ StackCheckFlag check_stack_limit); ++ virtual void ReadCurrentPositionFromRegister(int reg); ++ virtual void ReadStackPointerFromRegister(int reg); ++ virtual void SetCurrentPositionFromEnd(int by); ++ virtual void SetRegister(int register_index, int to); ++ virtual bool Succeed(); ++ virtual void WriteCurrentPositionToRegister(int reg, int cp_offset); ++ virtual void ClearRegisters(int reg_from, int reg_to); ++ virtual void WriteStackPointerToRegister(int reg); ++ virtual bool CanReadUnaligned(); ++ ++ // Called from RegExp if the stack-guard is triggered. ++ // If the code object is relocated, the return address is fixed before ++ // returning. ++ // {raw_code} is an Address because this is called via ExternalReference. ++ static int64_t CheckStackGuardState(Address* return_address, Address raw_code, ++ Address re_frame); ++ ++ void print_regexp_frame_constants(); ++ ++ private: ++ // Offsets from frame_pointer() of function parameters and stored registers. ++ static const int kFramePointer = 0; ++ ++ // Above the frame pointer - Stored registers and stack passed parameters. ++ // Registers s0 to s7, fp, and ra. ++ static const int kStoredRegisters = kFramePointer; ++ // Return address (stored from link register, read into pc on return). ++ ++ // TODO(plind): This 9 - is 8 s-regs (s0..s7) plus fp. ++ ++ static const int kReturnAddress = kStoredRegisters + 9 * kPointerSize; ++ // Stack frame header. ++ static const int kStackFrameHeader = kReturnAddress; ++ // Stack parameters placed by caller. ++ static const int kIsolate = kStackFrameHeader + kPointerSize; ++ ++ // Below the frame pointer. ++ // Register parameters stored by setup code. ++ static const int kDirectCall = kFramePointer - kPointerSize; ++ static const int kStackHighEnd = kDirectCall - kPointerSize; ++ static const int kNumOutputRegisters = kStackHighEnd - kPointerSize; ++ static const int kRegisterOutput = kNumOutputRegisters - kPointerSize; ++ static const int kInputEnd = kRegisterOutput - kPointerSize; ++ static const int kInputStart = kInputEnd - kPointerSize; ++ static const int kStartIndex = kInputStart - kPointerSize; ++ static const int kInputString = kStartIndex - kPointerSize; ++ // When adding local variables remember to push space for them in ++ // the frame in GetCode. ++ static const int kSuccessfulCaptures = kInputString - kPointerSize; ++ static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize; ++ static const int kBacktrackCount = kStringStartMinusOne - kSystemPointerSize; ++ // First register address. Following registers are below it on the stack. ++ static const int kRegisterZero = kBacktrackCount - kSystemPointerSize; ++ ++ // Initial size of code buffer. ++ static const int kRegExpCodeSize = 1024; ++ ++ // Load a number of characters at the given offset from the ++ // current position, into the current-character register. ++ void LoadCurrentCharacterUnchecked(int cp_offset, int character_count); ++ ++ // Check whether preemption has been requested. ++ void CheckPreemption(); ++ ++ // Check whether we are exceeding the stack limit on the backtrack stack. ++ void CheckStackLimit(); ++ ++ // Generate a call to CheckStackGuardState. ++ void CallCheckStackGuardState(Register scratch); ++ ++ // The ebp-relative location of a regexp register. ++ MemOperand register_location(int register_index); ++ ++ // Register holding the current input position as negative offset from ++ // the end of the string. ++ inline Register current_input_offset() { return a6; } ++ ++ // The register containing the current character after LoadCurrentCharacter. ++ inline Register current_character() { return a7; } ++ ++ // Register holding address of the end of the input string. ++ inline Register end_of_input_address() { return t2; } ++ ++ // Register holding the frame address. Local variables, parameters and ++ // regexp registers are addressed relative to this. ++ inline Register frame_pointer() { return fp; } ++ ++ // The register containing the backtrack stack top. Provides a meaningful ++ // name to the register. ++ inline Register backtrack_stackpointer() { return t0; } ++ ++ // Register holding pointer to the current code object. ++ inline Register code_pointer() { return a5; } ++ ++ // Byte size of chars in the string to match (decided by the Mode argument). ++ inline int char_size() { return static_cast(mode_); } ++ ++ // Equivalent to a conditional branch to the label, unless the label ++ // is nullptr, in which case it is a conditional Backtrack. ++ void BranchOrBacktrack(Label* to, Condition condition, Register rs, ++ const Operand& rt); ++ ++ // Call and return internally in the generated code in a way that ++ // is GC-safe (i.e., doesn't leave absolute code addresses on the stack) ++ inline void SafeCall(Label* to, Condition cond, Register rs, ++ const Operand& rt); ++ inline void SafeReturn(); ++ inline void SafeCallTarget(Label* name); ++ ++ // Pushes the value of a register on the backtrack stack. Decrements the ++ // stack pointer by a word size and stores the register's value there. ++ inline void Push(Register source); ++ ++ // Pops a value from the backtrack stack. Reads the word at the stack pointer ++ // and increments it by a word size. ++ inline void Pop(Register target); ++ ++ Isolate* isolate() const { return masm_->isolate(); } ++ ++ MacroAssembler* masm_; ++ ++ // Which mode to generate code for (Latin1 or UC16). ++ Mode mode_; ++ ++ // One greater than maximal register index actually used. ++ int num_registers_; ++ ++ // Number of registers to output at the end (the saved registers ++ // are always 0..num_saved_registers_-1). ++ int num_saved_registers_; ++ ++ // Labels used internally. ++ Label entry_label_; ++ Label start_label_; ++ Label success_label_; ++ Label backtrack_label_; ++ Label exit_label_; ++ Label check_preempt_label_; ++ Label stack_overflow_label_; ++ Label internal_failure_label_; ++}; ++ ++} // namespace internal ++} // namespace v8 ++ ++#endif // V8_REGEXP_LA64_REGEXP_MACRO_ASSEMBLER_LA64_H_ +diff --git a/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler-arch.h b/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler-arch.h +index 8ec12a0ae62..cdc95655184 100644 +--- a/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler-arch.h ++++ b/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler-arch.h +@@ -21,6 +21,8 @@ + #include "src/regexp/mips/regexp-macro-assembler-mips.h" + #elif V8_TARGET_ARCH_MIPS64 + #include "src/regexp/mips64/regexp-macro-assembler-mips64.h" ++#elif V8_TARGET_ARCH_LA64 ++#include "src/regexp/la64/regexp-macro-assembler-la64.h" + #elif V8_TARGET_ARCH_S390 + #include "src/regexp/s390/regexp-macro-assembler-s390.h" + #else +diff --git a/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler-tracer.cc b/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler-tracer.cc +index 0a122017437..b357ec85e8d 100644 +--- a/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler-tracer.cc ++++ b/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler-tracer.cc +@@ -15,8 +15,8 @@ RegExpMacroAssemblerTracer::RegExpMacroAssemblerTracer( + : RegExpMacroAssembler(isolate, assembler->zone()), assembler_(assembler) { + IrregexpImplementation type = assembler->Implementation(); + DCHECK_LT(type, 9); +- const char* impl_names[] = {"IA32", "ARM", "ARM64", "MIPS", "S390", +- "PPC", "X64", "X87", "Bytecode"}; ++ const char* impl_names[] = {"IA32", "ARM", "ARM64", "MIPS", "LA64", ++ "S390", "PPC", "X64", "X87", "Bytecode"}; + PrintF("RegExpMacroAssembler%s();\n", impl_names[type]); + } + +diff --git a/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler.h b/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler.h +index e83446cdc9b..6047a71e6cc 100644 +--- a/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler.h ++++ b/src/3rdparty/chromium/v8/src/regexp/regexp-macro-assembler.h +@@ -43,6 +43,7 @@ class RegExpMacroAssembler { + kARMImplementation, + kARM64Implementation, + kMIPSImplementation, ++ kLA64Implementation, + kS390Implementation, + kPPCImplementation, + kX64Implementation, +diff --git a/src/3rdparty/chromium/v8/src/regexp/regexp.cc b/src/3rdparty/chromium/v8/src/regexp/regexp.cc +index 4319990a398..641a2af9ccd 100644 +--- a/src/3rdparty/chromium/v8/src/regexp/regexp.cc ++++ b/src/3rdparty/chromium/v8/src/regexp/regexp.cc +@@ -854,6 +854,9 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data, + #elif V8_TARGET_ARCH_MIPS64 + macro_assembler.reset(new RegExpMacroAssemblerMIPS( + isolate, zone, mode, (data->capture_count + 1) * 2)); ++#elif V8_TARGET_ARCH_LA64 ++ macro_assembler.reset(new RegExpMacroAssemblerLA64( ++ isolate, zone, mode, (data->capture_count + 1) * 2)); + #else + #error "Unsupported architecture" + #endif +diff --git a/src/3rdparty/chromium/v8/src/runtime/runtime-atomics.cc b/src/3rdparty/chromium/v8/src/runtime/runtime-atomics.cc +index 34259c6e67b..a0a5825f8f2 100644 +--- a/src/3rdparty/chromium/v8/src/runtime/runtime-atomics.cc ++++ b/src/3rdparty/chromium/v8/src/runtime/runtime-atomics.cc +@@ -20,7 +20,8 @@ namespace internal { + + // Other platforms have CSA support, see builtins-sharedarraybuffer-gen.h. + #if V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_PPC64 || \ +- V8_TARGET_ARCH_PPC || V8_TARGET_ARCH_S390 || V8_TARGET_ARCH_S390X ++ V8_TARGET_ARCH_PPC || V8_TARGET_ARCH_S390 || V8_TARGET_ARCH_S390X || \ ++ V8_TARGET_ARCH_LA64 + + namespace { + +diff --git a/src/3rdparty/chromium/v8/src/snapshot/deserializer.h b/src/3rdparty/chromium/v8/src/snapshot/deserializer.h +index 62814a881ae..dfc04f19b14 100644 +--- a/src/3rdparty/chromium/v8/src/snapshot/deserializer.h ++++ b/src/3rdparty/chromium/v8/src/snapshot/deserializer.h +@@ -28,8 +28,9 @@ class Object; + // Used for platforms with embedded constant pools to trigger deserialization + // of objects found in code. + #if defined(V8_TARGET_ARCH_MIPS) || defined(V8_TARGET_ARCH_MIPS64) || \ +- defined(V8_TARGET_ARCH_PPC) || defined(V8_TARGET_ARCH_S390) || \ +- defined(V8_TARGET_ARCH_PPC64) || V8_EMBEDDED_CONSTANT_POOL ++ defined(V8_TARGET_ARCH_LA64) || defined(V8_TARGET_ARCH_PPC) || \ ++ defined(V8_TARGET_ARCH_S390) || defined(V8_TARGET_ARCH_PPC64) || \ ++ V8_EMBEDDED_CONSTANT_POOL + #define V8_CODE_EMBEDS_OBJECT_POINTER 1 + #else + #define V8_CODE_EMBEDS_OBJECT_POINTER 0 +diff --git a/src/3rdparty/chromium/v8/src/wasm/baseline/la64/liftoff-assembler-la64.h b/src/3rdparty/chromium/v8/src/wasm/baseline/la64/liftoff-assembler-la64.h +new file mode 100644 +index 00000000000..7c82427a4fe +--- /dev/null ++++ b/src/3rdparty/chromium/v8/src/wasm/baseline/la64/liftoff-assembler-la64.h +@@ -0,0 +1,1503 @@ ++// Copyright 2017 the V8 project authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#ifndef V8_WASM_BASELINE_LA64_LIFTOFF_ASSEMBLER_LA64_H_ ++#define V8_WASM_BASELINE_LA64_LIFTOFF_ASSEMBLER_LA64_H_ ++ ++#include "src/wasm/baseline/liftoff-assembler.h" ++ ++namespace v8 { ++namespace internal { ++namespace wasm { ++ ++namespace liftoff { ++ ++// Liftoff Frames. ++// ++// slot Frame ++// +--------------------+--------------------------- ++// n+4 | optional padding slot to keep the stack 16 byte aligned. ++// n+3 | parameter n | ++// ... | ... | ++// 4 | parameter 1 | or parameter 2 ++// 3 | parameter 0 | or parameter 1 ++// 2 | (result address) | or parameter 0 ++// -----+--------------------+--------------------------- ++// 1 | return addr (ra) | ++// 0 | previous frame (fp)| ++// -----+--------------------+ <-- frame ptr (fp) ++// -1 | 0xa: WASM_COMPILED | ++// -2 | instance | ++// -----+--------------------+--------------------------- ++// -3 | slot 0 | ^ ++// -4 | slot 1 | | ++// | | Frame slots ++// | | | ++// | | v ++// | optional padding slot to keep the stack 16 byte aligned. ++// -----+--------------------+ <-- stack ptr (sp) ++// ++ ++// fp-8 holds the stack marker, fp-16 is the instance parameter. ++constexpr int kInstanceOffset = 16; ++ ++inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); } ++ ++inline MemOperand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); } ++ ++inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, MemOperand src, ++ ValueType type) { ++ switch (type.kind()) { ++ case ValueType::kI32: ++ assm->Ld_w(dst.gp(), src); ++ break; ++ case ValueType::kI64: ++ assm->Ld_d(dst.gp(), src); ++ break; ++ case ValueType::kF32: ++ assm->Fld_s(dst.fp(), src); ++ break; ++ case ValueType::kF64: ++ assm->Fld_d(dst.fp(), src); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++inline void Store(LiftoffAssembler* assm, Register base, int32_t offset, ++ LiftoffRegister src, ValueType type) { ++ MemOperand dst(base, offset); ++ switch (type.kind()) { ++ case ValueType::kI32: ++ assm->St_w(src.gp(), dst); ++ break; ++ case ValueType::kI64: ++ assm->St_d(src.gp(), dst); ++ break; ++ case ValueType::kF32: ++ assm->Fst_s(src.fp(), dst); ++ break; ++ case ValueType::kF64: ++ assm->Fst_d(src.fp(), dst); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++inline void push(LiftoffAssembler* assm, LiftoffRegister reg, ValueType type) { ++ switch (type.kind()) { ++ case ValueType::kI32: ++ assm->addi_d(sp, sp, -kSystemPointerSize); ++ assm->St_w(reg.gp(), MemOperand(sp, 0)); ++ break; ++ case ValueType::kI64: ++ assm->push(reg.gp()); ++ break; ++ case ValueType::kF32: ++ assm->addi_d(sp, sp, -kSystemPointerSize); ++ assm->Fst_s(reg.fp(), MemOperand(sp, 0)); ++ break; ++ case ValueType::kF64: ++ assm->addi_d(sp, sp, -kSystemPointerSize); ++ assm->Fst_d(reg.fp(), MemOperand(sp, 0)); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++} // namespace liftoff ++ ++int LiftoffAssembler::PrepareStackFrame() { ++ int offset = pc_offset(); ++ // When constant that represents size of stack frame can't be represented ++ // as 16bit we need three instructions to add it to sp, so we reserve space ++ // for this case. ++ addi_d(sp, sp, 0); ++ nop(); ++ nop(); ++ return offset; ++} ++ ++void LiftoffAssembler::PatchPrepareStackFrame(int offset, int frame_size) { ++ // We can't run out of space, just pass anything big enough to not cause the ++ // assembler to try to grow the buffer. ++ constexpr int kAvailableSpace = 256; ++ TurboAssembler patching_assembler( ++ nullptr, AssemblerOptions{}, CodeObjectRequired::kNo, ++ ExternalAssemblerBuffer(buffer_start_ + offset, kAvailableSpace)); ++ // If bytes can be represented as 16bit, daddiu will be generated and two ++ // nops will stay untouched. Otherwise, lui-ori sequence will load it to ++ // register and, as third instruction, daddu will be generated. ++ patching_assembler.Add_d(sp, sp, Operand(-frame_size)); ++} ++ ++void LiftoffAssembler::FinishCode() {} ++ ++void LiftoffAssembler::AbortCompilation() {} ++ ++// static ++constexpr int LiftoffAssembler::StaticStackFrameSize() { ++ return liftoff::kInstanceOffset; ++} ++ ++int LiftoffAssembler::SlotSizeForType(ValueType type) { ++ switch (type.kind()) { ++ case ValueType::kS128: ++ return type.element_size_bytes(); ++ default: ++ return kStackSlotSize; ++ } ++} ++ ++bool LiftoffAssembler::NeedsAlignment(ValueType type) { ++ switch (type.kind()) { ++ case ValueType::kS128: ++ return true; ++ default: ++ // No alignment because all other types are kStackSlotSize. ++ return false; ++ } ++} ++ ++void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value, ++ RelocInfo::Mode rmode) { ++ switch (value.type().kind()) { ++ case ValueType::kI32: ++ TurboAssembler::li(reg.gp(), Operand(value.to_i32(), rmode)); ++ break; ++ case ValueType::kI64: ++ TurboAssembler::li(reg.gp(), Operand(value.to_i64(), rmode)); ++ break; ++ case ValueType::kF32: ++ TurboAssembler::Move(reg.fp(), value.to_f32_boxed().get_bits()); ++ break; ++ case ValueType::kF64: ++ TurboAssembler::Move(reg.fp(), value.to_f64_boxed().get_bits()); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++void LiftoffAssembler::LoadFromInstance(Register dst, uint32_t offset, ++ int size) { ++ DCHECK_LE(offset, kMaxInt); ++ Ld_d(dst, liftoff::GetInstanceOperand()); ++ DCHECK(size == 4 || size == 8); ++ if (size == 4) { ++ Ld_w(dst, MemOperand(dst, offset)); ++ } else { ++ Ld_d(dst, MemOperand(dst, offset)); ++ } ++} ++ ++void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst, ++ uint32_t offset) { ++ LoadFromInstance(dst, offset, kTaggedSize); ++} ++ ++void LiftoffAssembler::SpillInstance(Register instance) { ++ St_d(instance, liftoff::GetInstanceOperand()); ++} ++ ++void LiftoffAssembler::FillInstanceInto(Register dst) { ++ Ld_d(dst, liftoff::GetInstanceOperand()); ++} ++ ++void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr, ++ Register offset_reg, ++ uint32_t offset_imm, ++ LiftoffRegList pinned) { ++ STATIC_ASSERT(kTaggedSize == kInt64Size); ++ Load(LiftoffRegister(dst), src_addr, offset_reg, offset_imm, ++ LoadType::kI64Load, pinned); ++} ++ ++void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr, ++ Register offset_reg, uint32_t offset_imm, ++ LoadType type, LiftoffRegList pinned, ++ uint32_t* protected_load_pc, bool is_load_mem) { ++ Register src = no_reg; ++ if (offset_reg != no_reg) { ++ src = GetUnusedRegister(kGpReg, pinned).gp(); ++ emit_ptrsize_add(src, src_addr, offset_reg); ++ } ++ MemOperand src_op = (offset_reg != no_reg) ? MemOperand(src, offset_imm) ++ : MemOperand(src_addr, offset_imm); ++ ++ if (protected_load_pc) *protected_load_pc = pc_offset(); ++ switch (type.value()) { ++ case LoadType::kI32Load8U: ++ case LoadType::kI64Load8U: ++ Ld_bu(dst.gp(), src_op); ++ break; ++ case LoadType::kI32Load8S: ++ case LoadType::kI64Load8S: ++ Ld_b(dst.gp(), src_op); ++ break; ++ case LoadType::kI32Load16U: ++ case LoadType::kI64Load16U: ++ TurboAssembler::Ld_hu(dst.gp(), src_op); ++ break; ++ case LoadType::kI32Load16S: ++ case LoadType::kI64Load16S: ++ TurboAssembler::Ld_h(dst.gp(), src_op); ++ break; ++ case LoadType::kI64Load32U: ++ TurboAssembler::Ld_wu(dst.gp(), src_op); ++ break; ++ case LoadType::kI32Load: ++ case LoadType::kI64Load32S: ++ TurboAssembler::Ld_w(dst.gp(), src_op); ++ break; ++ case LoadType::kI64Load: ++ TurboAssembler::Ld_d(dst.gp(), src_op); ++ break; ++ case LoadType::kF32Load: ++ TurboAssembler::Fld_s(dst.fp(), src_op); ++ break; ++ case LoadType::kF64Load: ++ TurboAssembler::Fld_d(dst.fp(), src_op); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++void LiftoffAssembler::Store(Register dst_addr, Register offset_reg, ++ uint32_t offset_imm, LiftoffRegister src, ++ StoreType type, LiftoffRegList pinned, ++ uint32_t* protected_store_pc, bool is_store_mem) { ++ Register dst = no_reg; ++ MemOperand dst_op = MemOperand(dst_addr, offset_imm); ++ if (offset_reg != no_reg) { ++ if (is_store_mem) { ++ pinned.set(src); ++ } ++ dst = GetUnusedRegister(kGpReg, pinned).gp(); ++ emit_ptrsize_add(dst, dst_addr, offset_reg); ++ dst_op = MemOperand(dst, offset_imm); ++ } ++ ++ if (protected_store_pc) *protected_store_pc = pc_offset(); ++ switch (type.value()) { ++ case StoreType::kI32Store8: ++ case StoreType::kI64Store8: ++ St_b(src.gp(), dst_op); ++ break; ++ case StoreType::kI32Store16: ++ case StoreType::kI64Store16: ++ TurboAssembler::St_h(src.gp(), dst_op); ++ break; ++ case StoreType::kI32Store: ++ case StoreType::kI64Store32: ++ TurboAssembler::St_w(src.gp(), dst_op); ++ break; ++ case StoreType::kI64Store: ++ TurboAssembler::St_d(src.gp(), dst_op); ++ break; ++ case StoreType::kF32Store: ++ TurboAssembler::Fst_s(src.fp(), dst_op); ++ break; ++ case StoreType::kF64Store: ++ TurboAssembler::Fst_d(src.fp(), dst_op); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr, ++ Register offset_reg, uint32_t offset_imm, ++ LoadType type, LiftoffRegList pinned) { ++ bailout(kAtomics, "AtomicLoad"); ++} ++ ++void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg, ++ uint32_t offset_imm, LiftoffRegister src, ++ StoreType type, LiftoffRegList pinned) { ++ bailout(kAtomics, "AtomicStore"); ++} ++ ++void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg, ++ uint32_t offset_imm, LiftoffRegister value, ++ StoreType type) { ++ bailout(kAtomics, "AtomicAdd"); ++} ++ ++void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg, ++ uint32_t offset_imm, LiftoffRegister value, ++ StoreType type) { ++ bailout(kAtomics, "AtomicSub"); ++} ++ ++void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg, ++ uint32_t offset_imm, LiftoffRegister value, ++ StoreType type) { ++ bailout(kAtomics, "AtomicAnd"); ++} ++ ++void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg, ++ uint32_t offset_imm, LiftoffRegister value, ++ StoreType type) { ++ bailout(kAtomics, "AtomicOr"); ++} ++ ++void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg, ++ uint32_t offset_imm, LiftoffRegister value, ++ StoreType type) { ++ bailout(kAtomics, "AtomicXor"); ++} ++ ++void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg, ++ uint32_t offset_imm, ++ LiftoffRegister value, StoreType type) { ++ bailout(kAtomics, "AtomicExchange"); ++} ++ ++void LiftoffAssembler::AtomicCompareExchange( ++ Register dst_addr, Register offset_reg, uint32_t offset_imm, ++ LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result, ++ StoreType type) { ++ bailout(kAtomics, "AtomicCompareExchange"); ++} ++ ++void LiftoffAssembler::AtomicFence() { dbar(0); } ++ ++void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst, ++ uint32_t caller_slot_idx, ++ ValueType type) { ++ MemOperand src(fp, kSystemPointerSize * (caller_slot_idx + 1)); ++ liftoff::Load(this, dst, src, type); ++} ++ ++void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset, ++ ValueType type) { ++ DCHECK_NE(dst_offset, src_offset); ++ LiftoffRegister reg = GetUnusedRegister(reg_class_for(type)); ++ Fill(reg, src_offset, type); ++ Spill(dst_offset, reg, type); ++} ++ ++void LiftoffAssembler::Move(Register dst, Register src, ValueType type) { ++ DCHECK_NE(dst, src); ++ // TODO(ksreten): Handle different sizes here. ++ TurboAssembler::Move(dst, src); ++} ++ ++void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src, ++ ValueType type) { ++ DCHECK_NE(dst, src); ++ TurboAssembler::Move(dst, src); ++} ++ ++void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueType type) { ++ RecordUsedSpillOffset(offset); ++ MemOperand dst = liftoff::GetStackSlot(offset); ++ switch (type.kind()) { ++ case ValueType::kI32: ++ St_w(reg.gp(), dst); ++ break; ++ case ValueType::kI64: ++ St_d(reg.gp(), dst); ++ break; ++ case ValueType::kF32: ++ Fst_s(reg.fp(), dst); ++ break; ++ case ValueType::kF64: ++ TurboAssembler::Fst_d(reg.fp(), dst); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++void LiftoffAssembler::Spill(int offset, WasmValue value) { ++ RecordUsedSpillOffset(offset); ++ MemOperand dst = liftoff::GetStackSlot(offset); ++ switch (value.type().kind()) { ++ case ValueType::kI32: { ++ LiftoffRegister tmp = GetUnusedRegister(kGpReg); ++ TurboAssembler::li(tmp.gp(), Operand(value.to_i32())); ++ St_w(tmp.gp(), dst); ++ break; ++ } ++ case ValueType::kI64: { ++ LiftoffRegister tmp = GetUnusedRegister(kGpReg); ++ TurboAssembler::li(tmp.gp(), value.to_i64()); ++ St_d(tmp.gp(), dst); ++ break; ++ } ++ default: ++ // kWasmF32 and kWasmF64 are unreachable, since those ++ // constants are not tracked. ++ UNREACHABLE(); ++ } ++} ++ ++void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueType type) { ++ MemOperand src = liftoff::GetStackSlot(offset); ++ switch (type.kind()) { ++ case ValueType::kI32: ++ Ld_w(reg.gp(), src); ++ break; ++ case ValueType::kI64: ++ Ld_d(reg.gp(), src); ++ break; ++ case ValueType::kF32: ++ Fld_s(reg.fp(), src); ++ break; ++ case ValueType::kF64: ++ TurboAssembler::Fld_d(reg.fp(), src); ++ break; ++ default: ++ UNREACHABLE(); ++ } ++} ++ ++void LiftoffAssembler::FillI64Half(Register, int offset, RegPairHalf) { ++ UNREACHABLE(); ++} ++ ++void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) { ++ DCHECK_LT(0, size); ++ RecordUsedSpillOffset(start + size); ++ ++ if (size <= 12 * kStackSlotSize) { ++ // Special straight-line code for up to 12 slots. Generates one ++ // instruction per slot (<= 12 instructions total). ++ uint32_t remainder = size; ++ for (; remainder >= kStackSlotSize; remainder -= kStackSlotSize) { ++ St_d(zero_reg, liftoff::GetStackSlot(start + remainder)); ++ } ++ DCHECK(remainder == 4 || remainder == 0); ++ if (remainder) { ++ St_w(zero_reg, liftoff::GetStackSlot(start + remainder)); ++ } ++ } else { ++ // General case for bigger counts (12 instructions). ++ // Use a0 for start address (inclusive), a1 for end address (exclusive). ++ Push(a1, a0); ++ Add_d(a0, fp, Operand(-start - size)); ++ Add_d(a1, fp, Operand(-start)); ++ ++ Label loop; ++ bind(&loop); ++ St_d(zero_reg, MemOperand(a0, kSystemPointerSize)); ++ addi_d(a0, a0, kSystemPointerSize); ++ BranchShort(&loop, ne, a0, Operand(a1)); ++ ++ Pop(a1, a0); ++ } ++} ++ ++void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) { ++ TurboAssembler::Clz_d(dst.gp(), src.gp()); ++} ++ ++void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) { ++ TurboAssembler::Ctz_d(dst.gp(), src.gp()); ++} ++ ++bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst, ++ LiftoffRegister src) { ++ TurboAssembler::Popcnt_d(dst.gp(), src.gp()); ++ return true; ++} ++ ++void LiftoffAssembler::emit_i32_mul(Register dst, Register lhs, Register rhs) { ++ TurboAssembler::Mul_w(dst, lhs, rhs); ++} ++ ++void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs, ++ Label* trap_div_by_zero, ++ Label* trap_div_unrepresentable) { ++ TurboAssembler::Branch(trap_div_by_zero, eq, rhs, Operand(zero_reg)); ++ ++ // Check if lhs == kMinInt and rhs == -1, since this case is unrepresentable. ++ TurboAssembler::li(kScratchReg, 1); ++ TurboAssembler::li(kScratchReg2, 1); ++ TurboAssembler::LoadZeroOnCondition(kScratchReg, lhs, Operand(kMinInt), eq); ++ TurboAssembler::LoadZeroOnCondition(kScratchReg2, rhs, Operand(-1), eq); ++ add_d(kScratchReg, kScratchReg, kScratchReg2); ++ TurboAssembler::Branch(trap_div_unrepresentable, eq, kScratchReg, ++ Operand(zero_reg)); ++ ++ TurboAssembler::Div_w(dst, lhs, rhs); ++} ++ ++void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs, ++ Label* trap_div_by_zero) { ++ TurboAssembler::Branch(trap_div_by_zero, eq, rhs, Operand(zero_reg)); ++ TurboAssembler::Div_wu(dst, lhs, rhs); ++} ++ ++void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs, ++ Label* trap_div_by_zero) { ++ TurboAssembler::Branch(trap_div_by_zero, eq, rhs, Operand(zero_reg)); ++ TurboAssembler::Mod_w(dst, lhs, rhs); ++} ++ ++void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs, ++ Label* trap_div_by_zero) { ++ TurboAssembler::Branch(trap_div_by_zero, eq, rhs, Operand(zero_reg)); ++ TurboAssembler::Mod_wu(dst, lhs, rhs); ++} ++ ++#define I32_BINOP(name, instruction) \ ++ void LiftoffAssembler::emit_i32_##name(Register dst, Register lhs, \ ++ Register rhs) { \ ++ instruction(dst, lhs, rhs); \ ++ } ++ ++// clang-format off ++I32_BINOP(add, add_w) ++I32_BINOP(sub, sub_w) ++I32_BINOP(and, and_) ++I32_BINOP(or, or_) ++I32_BINOP(xor, xor_) ++// clang-format on ++ ++#undef I32_BINOP ++ ++#define I32_BINOP_I(name, instruction) \ ++ void LiftoffAssembler::emit_i32_##name(Register dst, Register lhs, \ ++ int32_t imm) { \ ++ instruction(dst, lhs, Operand(imm)); \ ++ } ++ ++// clang-format off ++I32_BINOP_I(add, Add_w) ++I32_BINOP_I(and, And) ++I32_BINOP_I(or, Or) ++I32_BINOP_I(xor, Xor) ++// clang-format on ++ ++#undef I32_BINOP_I ++ ++void LiftoffAssembler::emit_i32_clz(Register dst, Register src) { ++ TurboAssembler::Clz_w(dst, src); ++} ++ ++void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) { ++ TurboAssembler::Ctz_w(dst, src); ++} ++ ++bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) { ++ TurboAssembler::Popcnt_w(dst, src); ++ return true; ++} ++ ++#define I32_SHIFTOP(name, instruction) \ ++ void LiftoffAssembler::emit_i32_##name(Register dst, Register src, \ ++ Register amount) { \ ++ instruction(dst, src, amount); \ ++ } ++#define I32_SHIFTOP_I(name, instruction, instruction1) \ ++ I32_SHIFTOP(name, instruction) \ ++ void LiftoffAssembler::emit_i32_##name(Register dst, Register src, \ ++ int amount) { \ ++ instruction1(dst, src, amount & 0x1f); \ ++ } ++ ++I32_SHIFTOP_I(shl, sll_w, slli_w) ++I32_SHIFTOP_I(sar, sra_w, srai_w) ++I32_SHIFTOP_I(shr, srl_w, srli_w) ++ ++#undef I32_SHIFTOP ++#undef I32_SHIFTOP_I ++ ++void LiftoffAssembler::emit_i64_mul(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs) { ++ TurboAssembler::Mul_d(dst.gp(), lhs.gp(), rhs.gp()); ++} ++ ++bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs, ++ Label* trap_div_by_zero, ++ Label* trap_div_unrepresentable) { ++ TurboAssembler::Branch(trap_div_by_zero, eq, rhs.gp(), Operand(zero_reg)); ++ ++ // Check if lhs == MinInt64 and rhs == -1, since this case is unrepresentable. ++ TurboAssembler::li(kScratchReg, 1); ++ TurboAssembler::li(kScratchReg2, 1); ++ TurboAssembler::LoadZeroOnCondition( ++ kScratchReg, lhs.gp(), Operand(std::numeric_limits::min()), eq); ++ TurboAssembler::LoadZeroOnCondition(kScratchReg2, rhs.gp(), Operand(-1), eq); ++ add_d(kScratchReg, kScratchReg, kScratchReg2); ++ TurboAssembler::Branch(trap_div_unrepresentable, eq, kScratchReg, ++ Operand(zero_reg)); ++ ++ TurboAssembler::Div_d(dst.gp(), lhs.gp(), rhs.gp()); ++ return true; ++} ++ ++bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs, ++ Label* trap_div_by_zero) { ++ TurboAssembler::Branch(trap_div_by_zero, eq, rhs.gp(), Operand(zero_reg)); ++ TurboAssembler::Div_du(dst.gp(), lhs.gp(), rhs.gp()); ++ return true; ++} ++ ++bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs, ++ Label* trap_div_by_zero) { ++ TurboAssembler::Branch(trap_div_by_zero, eq, rhs.gp(), Operand(zero_reg)); ++ TurboAssembler::Mod_d(dst.gp(), lhs.gp(), rhs.gp()); ++ return true; ++} ++ ++bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs, ++ Label* trap_div_by_zero) { ++ TurboAssembler::Branch(trap_div_by_zero, eq, rhs.gp(), Operand(zero_reg)); ++ TurboAssembler::Mod_du(dst.gp(), lhs.gp(), rhs.gp()); ++ return true; ++} ++ ++#define I64_BINOP(name, instruction) \ ++ void LiftoffAssembler::emit_i64_##name( \ ++ LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { \ ++ instruction(dst.gp(), lhs.gp(), rhs.gp()); \ ++ } ++ ++// clang-format off ++I64_BINOP(add, Add_d) ++I64_BINOP(sub, Sub_d) ++I64_BINOP(and, and_) ++I64_BINOP(or, or_) ++I64_BINOP(xor, xor_) ++// clang-format on ++ ++#undef I64_BINOP ++ ++#define I64_BINOP_I(name, instruction) \ ++ void LiftoffAssembler::emit_i64_##name(LiftoffRegister dst, \ ++ LiftoffRegister lhs, int32_t imm) { \ ++ instruction(dst.gp(), lhs.gp(), Operand(imm)); \ ++ } ++ ++// clang-format off ++I64_BINOP_I(add, Add_d) ++I64_BINOP_I(and, And) ++I64_BINOP_I(or, Or) ++I64_BINOP_I(xor, Xor) ++// clang-format on ++ ++#undef I64_BINOP_I ++ ++#define I64_SHIFTOP(name, instruction) \ ++ void LiftoffAssembler::emit_i64_##name( \ ++ LiftoffRegister dst, LiftoffRegister src, Register amount) { \ ++ instruction(dst.gp(), src.gp(), amount); \ ++ } ++#define I64_SHIFTOP_I(name, instruction, instructioni) \ ++ I64_SHIFTOP(name, instruction) \ ++ void LiftoffAssembler::emit_i64_##name(LiftoffRegister dst, \ ++ LiftoffRegister src, int amount) { \ ++ DCHECK(is_uint6(amount)); \ ++ instructioni(dst.gp(), src.gp(), amount); \ ++ } ++ ++I64_SHIFTOP_I(shl, sll_d, slli_d) ++I64_SHIFTOP_I(sar, sra_d, srai_d) ++I64_SHIFTOP_I(shr, srl_d, srli_d) ++ ++#undef I64_SHIFTOP ++#undef I64_SHIFTOP_I ++ ++void LiftoffAssembler::emit_u32_to_intptr(Register dst, Register src) { ++ add_w(dst, src, zero_reg); ++} ++ ++void LiftoffAssembler::emit_f32_neg(DoubleRegister dst, DoubleRegister src) { ++ TurboAssembler::Neg_s(dst, src); ++} ++ ++void LiftoffAssembler::emit_f64_neg(DoubleRegister dst, DoubleRegister src) { ++ TurboAssembler::Neg_d(dst, src); ++} ++ ++void LiftoffAssembler::emit_f32_min(DoubleRegister dst, DoubleRegister lhs, ++ DoubleRegister rhs) { ++ Label ool, done; ++ TurboAssembler::Float32Min(dst, lhs, rhs, &ool); ++ Branch(&done); ++ ++ bind(&ool); ++ TurboAssembler::Float32MinOutOfLine(dst, lhs, rhs); ++ bind(&done); ++} ++ ++void LiftoffAssembler::emit_f32_max(DoubleRegister dst, DoubleRegister lhs, ++ DoubleRegister rhs) { ++ Label ool, done; ++ TurboAssembler::Float32Max(dst, lhs, rhs, &ool); ++ Branch(&done); ++ ++ bind(&ool); ++ TurboAssembler::Float32MaxOutOfLine(dst, lhs, rhs); ++ bind(&done); ++} ++ ++void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs, ++ DoubleRegister rhs) { ++ bailout(kComplexOperation, "f32_copysign"); ++} ++ ++void LiftoffAssembler::emit_f64_min(DoubleRegister dst, DoubleRegister lhs, ++ DoubleRegister rhs) { ++ Label ool, done; ++ TurboAssembler::Float64Min(dst, lhs, rhs, &ool); ++ Branch(&done); ++ ++ bind(&ool); ++ TurboAssembler::Float64MinOutOfLine(dst, lhs, rhs); ++ bind(&done); ++} ++ ++void LiftoffAssembler::emit_f64_max(DoubleRegister dst, DoubleRegister lhs, ++ DoubleRegister rhs) { ++ Label ool, done; ++ TurboAssembler::Float64Max(dst, lhs, rhs, &ool); ++ Branch(&done); ++ ++ bind(&ool); ++ TurboAssembler::Float64MaxOutOfLine(dst, lhs, rhs); ++ bind(&done); ++} ++ ++void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs, ++ DoubleRegister rhs) { ++ bailout(kComplexOperation, "f64_copysign"); ++} ++ ++#define FP_BINOP(name, instruction) \ ++ void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \ ++ DoubleRegister rhs) { \ ++ instruction(dst, lhs, rhs); \ ++ } ++#define FP_UNOP(name, instruction) \ ++ void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \ ++ instruction(dst, src); \ ++ } ++#define FP_UNOP_RETURN_TRUE(name, instruction) \ ++ bool LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \ ++ instruction(dst, src); \ ++ return true; \ ++ } ++ ++FP_BINOP(f32_add, fadd_s) ++FP_BINOP(f32_sub, fsub_s) ++FP_BINOP(f32_mul, fmul_s) ++FP_BINOP(f32_div, fdiv_s) ++FP_UNOP(f32_abs, fabs_s) ++FP_UNOP_RETURN_TRUE(f32_ceil, Ceil_s) ++FP_UNOP_RETURN_TRUE(f32_floor, Floor_s) ++FP_UNOP_RETURN_TRUE(f32_trunc, Trunc_s) ++FP_UNOP_RETURN_TRUE(f32_nearest_int, Round_s) ++FP_UNOP(f32_sqrt, fsqrt_s) ++FP_BINOP(f64_add, fadd_d) ++FP_BINOP(f64_sub, fsub_d) ++FP_BINOP(f64_mul, fmul_d) ++FP_BINOP(f64_div, fdiv_d) ++FP_UNOP(f64_abs, fabs_d) ++FP_UNOP_RETURN_TRUE(f64_ceil, Ceil_d) ++FP_UNOP_RETURN_TRUE(f64_floor, Floor_d) ++FP_UNOP_RETURN_TRUE(f64_trunc, Trunc_d) ++FP_UNOP_RETURN_TRUE(f64_nearest_int, Round_d) ++FP_UNOP(f64_sqrt, fsqrt_d) ++ ++#undef FP_BINOP ++#undef FP_UNOP ++#undef FP_UNOP_RETURN_TRUE ++ ++bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode, ++ LiftoffRegister dst, ++ LiftoffRegister src, Label* trap) { ++ switch (opcode) { ++ case kExprI32ConvertI64: ++ TurboAssembler::bstrpick_w(dst.gp(), src.gp(), 31, 0); ++ return true; ++ case kExprI32SConvertF32: { ++ LiftoffRegister rounded = ++ GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src)); ++ LiftoffRegister converted_back = ++ GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src, rounded)); ++ ++ // Real conversion. ++ TurboAssembler::Trunc_s(rounded.fp(), src.fp()); ++ ftintrz_w_s(kScratchDoubleReg, rounded.fp()); ++ movfr2gr_s(dst.gp(), kScratchDoubleReg); ++ // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead, ++ // because INT32_MIN allows easier out-of-bounds detection. ++ TurboAssembler::Add_w(kScratchReg, dst.gp(), 1); ++ TurboAssembler::Slt(kScratchReg2, kScratchReg, dst.gp()); ++ TurboAssembler::Movn(dst.gp(), kScratchReg, kScratchReg2); ++ ++ // Checking if trap. ++ movgr2fr_w(kScratchDoubleReg, dst.gp()); ++ ffint_s_w(converted_back.fp(), kScratchDoubleReg); ++ TurboAssembler::CompareF32(rounded.fp(), converted_back.fp(), CEQ); ++ TurboAssembler::BranchFalseF(trap); ++ return true; ++ } ++ case kExprI32UConvertF32: { ++ LiftoffRegister rounded = ++ GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src)); ++ LiftoffRegister converted_back = ++ GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src, rounded)); ++ ++ // Real conversion. ++ TurboAssembler::Trunc_s(rounded.fp(), src.fp()); ++ TurboAssembler::Ftintrz_uw_s(dst.gp(), rounded.fp(), kScratchDoubleReg); ++ // Avoid UINT32_MAX as an overflow indicator and use 0 instead, ++ // because 0 allows easier out-of-bounds detection. ++ TurboAssembler::Add_w(kScratchReg, dst.gp(), 1); ++ TurboAssembler::Movz(dst.gp(), zero_reg, kScratchReg); ++ ++ // Checking if trap. ++ TurboAssembler::Ffint_d_uw(converted_back.fp(), dst.gp()); ++ fcvt_s_d(converted_back.fp(), converted_back.fp()); ++ TurboAssembler::CompareF32(rounded.fp(), converted_back.fp(), CEQ); ++ TurboAssembler::BranchFalseF(trap); ++ return true; ++ } ++ case kExprI32SConvertF64: { ++ LiftoffRegister rounded = ++ GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src)); ++ LiftoffRegister converted_back = ++ GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src, rounded)); ++ ++ // Real conversion. ++ TurboAssembler::Trunc_d(rounded.fp(), src.fp()); ++ ftintrz_w_d(kScratchDoubleReg, rounded.fp()); ++ movfr2gr_s(dst.gp(), kScratchDoubleReg); ++ ++ // Checking if trap. ++ ffint_d_w(converted_back.fp(), kScratchDoubleReg); ++ TurboAssembler::CompareF64(rounded.fp(), converted_back.fp(), CEQ); ++ TurboAssembler::BranchFalseF(trap); ++ return true; ++ } ++ case kExprI32UConvertF64: { ++ LiftoffRegister rounded = ++ GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src)); ++ LiftoffRegister converted_back = ++ GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src, rounded)); ++ ++ // Real conversion. ++ TurboAssembler::Trunc_d(rounded.fp(), src.fp()); ++ TurboAssembler::Ftintrz_uw_d(dst.gp(), rounded.fp(), kScratchDoubleReg); ++ ++ // Checking if trap. ++ TurboAssembler::Ffint_d_uw(converted_back.fp(), dst.gp()); ++ TurboAssembler::CompareF64(rounded.fp(), converted_back.fp(), CEQ); ++ TurboAssembler::BranchFalseF(trap); ++ return true; ++ } ++ case kExprI32ReinterpretF32: ++ TurboAssembler::FmoveLow(dst.gp(), src.fp()); ++ return true; ++ case kExprI64SConvertI32: ++ slli_w(dst.gp(), src.gp(), 0); ++ return true; ++ case kExprI64UConvertI32: ++ TurboAssembler::bstrpick_d(dst.gp(), src.gp(), 31, 0); ++ return true; ++ case kExprI64SConvertF32: { ++ LiftoffRegister rounded = ++ GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src)); ++ LiftoffRegister converted_back = ++ GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src, rounded)); ++ ++ // Real conversion. ++ TurboAssembler::Trunc_s(rounded.fp(), src.fp()); ++ ftintrz_l_s(kScratchDoubleReg, rounded.fp()); ++ movfr2gr_d(dst.gp(), kScratchDoubleReg); ++ // Avoid INT64_MAX as an overflow indicator and use INT64_MIN instead, ++ // because INT64_MIN allows easier out-of-bounds detection. ++ TurboAssembler::Add_d(kScratchReg, dst.gp(), 1); ++ TurboAssembler::Slt(kScratchReg2, kScratchReg, dst.gp()); ++ TurboAssembler::Movn(dst.gp(), kScratchReg, kScratchReg2); ++ ++ // Checking if trap. ++ movgr2fr_d(kScratchDoubleReg, dst.gp()); ++ ffint_s_l(converted_back.fp(), kScratchDoubleReg); ++ TurboAssembler::CompareF32(rounded.fp(), converted_back.fp(), CEQ); ++ TurboAssembler::BranchFalseF(trap); ++ return true; ++ } ++ case kExprI64UConvertF32: { ++ // Real conversion. ++ TurboAssembler::Ftintrz_ul_s(dst.gp(), src.fp(), kScratchDoubleReg, ++ kScratchReg); ++ ++ // Checking if trap. ++ TurboAssembler::Branch(trap, eq, kScratchReg, Operand(zero_reg)); ++ return true; ++ } ++ case kExprI64SConvertF64: { ++ LiftoffRegister rounded = ++ GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src)); ++ LiftoffRegister converted_back = ++ GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(src, rounded)); ++ ++ // Real conversion. ++ TurboAssembler::Trunc_d(rounded.fp(), src.fp()); ++ ftintrz_l_d(kScratchDoubleReg, rounded.fp()); ++ movfr2gr_d(dst.gp(), kScratchDoubleReg); ++ // Avoid INT64_MAX as an overflow indicator and use INT64_MIN instead, ++ // because INT64_MIN allows easier out-of-bounds detection. ++ TurboAssembler::Add_d(kScratchReg, dst.gp(), 1); ++ TurboAssembler::Slt(kScratchReg2, kScratchReg, dst.gp()); ++ TurboAssembler::Movn(dst.gp(), kScratchReg, kScratchReg2); ++ ++ // Checking if trap. ++ movgr2fr_d(kScratchDoubleReg, dst.gp()); ++ ffint_d_l(converted_back.fp(), kScratchDoubleReg); ++ TurboAssembler::CompareF64(rounded.fp(), converted_back.fp(), CEQ); ++ TurboAssembler::BranchFalseF(trap); ++ return true; ++ } ++ case kExprI64UConvertF64: { ++ // Real conversion. ++ TurboAssembler::Ftintrz_ul_d(dst.gp(), src.fp(), kScratchDoubleReg, ++ kScratchReg); ++ ++ // Checking if trap. ++ TurboAssembler::Branch(trap, eq, kScratchReg, Operand(zero_reg)); ++ return true; ++ } ++ case kExprI64ReinterpretF64: ++ movfr2gr_d(dst.gp(), src.fp()); ++ return true; ++ case kExprF32SConvertI32: { ++ LiftoffRegister scratch = ++ GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(dst)); ++ movgr2fr_w(scratch.fp(), src.gp()); ++ ffint_s_w(dst.fp(), scratch.fp()); ++ return true; ++ } ++ case kExprF32UConvertI32: ++ TurboAssembler::Ffint_s_uw(dst.fp(), src.gp()); ++ return true; ++ case kExprF32ConvertF64: ++ fcvt_s_d(dst.fp(), src.fp()); ++ return true; ++ case kExprF32ReinterpretI32: ++ TurboAssembler::FmoveLow(dst.fp(), src.gp()); ++ return true; ++ case kExprF64SConvertI32: { ++ LiftoffRegister scratch = ++ GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(dst)); ++ movgr2fr_w(scratch.fp(), src.gp()); ++ ffint_d_w(dst.fp(), scratch.fp()); ++ return true; ++ } ++ case kExprF64UConvertI32: ++ TurboAssembler::Ffint_d_uw(dst.fp(), src.gp()); ++ return true; ++ case kExprF64ConvertF32: ++ fcvt_d_s(dst.fp(), src.fp()); ++ return true; ++ case kExprF64ReinterpretI64: ++ movgr2fr_d(dst.fp(), src.gp()); ++ return true; ++ default: ++ return false; ++ } ++} ++ ++void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) { ++ bailout(kComplexOperation, "i32_signextend_i8"); ++} ++ ++void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) { ++ bailout(kComplexOperation, "i32_signextend_i16"); ++} ++ ++void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst, ++ LiftoffRegister src) { ++ bailout(kComplexOperation, "i64_signextend_i8"); ++} ++ ++void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst, ++ LiftoffRegister src) { ++ bailout(kComplexOperation, "i64_signextend_i16"); ++} ++ ++void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst, ++ LiftoffRegister src) { ++ bailout(kComplexOperation, "i64_signextend_i32"); ++} ++ ++void LiftoffAssembler::emit_jump(Label* label) { ++ TurboAssembler::Branch(label); ++} ++ ++void LiftoffAssembler::emit_jump(Register target) { ++ TurboAssembler::Jump(target); ++} ++ ++void LiftoffAssembler::emit_cond_jump(Condition cond, Label* label, ++ ValueType type, Register lhs, ++ Register rhs) { ++ if (rhs != no_reg) { ++ TurboAssembler::Branch(label, cond, lhs, Operand(rhs)); ++ } else { ++ TurboAssembler::Branch(label, cond, lhs, Operand(zero_reg)); ++ } ++} ++ ++void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) { ++ sltui(dst, src, 1); ++} ++ ++void LiftoffAssembler::emit_i32_set_cond(Condition cond, Register dst, ++ Register lhs, Register rhs) { ++ Register tmp = dst; ++ if (dst == lhs || dst == rhs) { ++ tmp = GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(lhs, rhs)).gp(); ++ } ++ // Write 1 as result. ++ TurboAssembler::li(tmp, 1); ++ ++ // If negative condition is true, write 0 as result. ++ Condition neg_cond = NegateCondition(cond); ++ TurboAssembler::LoadZeroOnCondition(tmp, lhs, Operand(rhs), neg_cond); ++ ++ // If tmp != dst, result will be moved. ++ TurboAssembler::Move(dst, tmp); ++} ++ ++void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) { ++ sltui(dst, src.gp(), 1); ++} ++ ++void LiftoffAssembler::emit_i64_set_cond(Condition cond, Register dst, ++ LiftoffRegister lhs, ++ LiftoffRegister rhs) { ++ Register tmp = dst; ++ if (dst == lhs.gp() || dst == rhs.gp()) { ++ tmp = GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(lhs, rhs)).gp(); ++ } ++ // Write 1 as result. ++ TurboAssembler::li(tmp, 1); ++ ++ // If negative condition is true, write 0 as result. ++ Condition neg_cond = NegateCondition(cond); ++ TurboAssembler::LoadZeroOnCondition(tmp, lhs.gp(), Operand(rhs.gp()), ++ neg_cond); ++ ++ // If tmp != dst, result will be moved. ++ TurboAssembler::Move(dst, tmp); ++} ++ ++namespace liftoff { ++ ++inline FPUCondition ConditionToConditionCmpFPU(Condition condition, ++ bool* predicate) { ++ switch (condition) { ++ case kEqual: ++ *predicate = true; ++ return CEQ; ++ case kUnequal: ++ *predicate = false; ++ return CEQ; ++ case kUnsignedLessThan: ++ *predicate = true; ++ return CLT; ++ case kUnsignedGreaterEqual: ++ *predicate = false; ++ return CLT; ++ case kUnsignedLessEqual: ++ *predicate = true; ++ return CLE; ++ case kUnsignedGreaterThan: ++ *predicate = false; ++ return CLE; ++ default: ++ *predicate = true; ++ break; ++ } ++ UNREACHABLE(); ++} ++ ++} // namespace liftoff ++ ++void LiftoffAssembler::emit_f32_set_cond(Condition cond, Register dst, ++ DoubleRegister lhs, ++ DoubleRegister rhs) { ++ Label not_nan, cont; ++ TurboAssembler::CompareIsNanF32(lhs, rhs); ++ TurboAssembler::BranchFalseF(¬_nan); ++ // If one of the operands is NaN, return 1 for f32.ne, else 0. ++ if (cond == ne) { ++ TurboAssembler::li(dst, 1); ++ } else { ++ TurboAssembler::Move(dst, zero_reg); ++ } ++ TurboAssembler::Branch(&cont); ++ ++ bind(¬_nan); ++ ++ TurboAssembler::li(dst, 1); ++ bool predicate; ++ FPUCondition fcond = liftoff::ConditionToConditionCmpFPU(cond, &predicate); ++ TurboAssembler::CompareF32(lhs, rhs, fcond); ++ if (predicate) { ++ TurboAssembler::LoadZeroIfNotFPUCondition(dst); ++ } else { ++ TurboAssembler::LoadZeroIfFPUCondition(dst); ++ } ++ ++ bind(&cont); ++} ++ ++void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, ++ DoubleRegister lhs, ++ DoubleRegister rhs) { ++ Label not_nan, cont; ++ TurboAssembler::CompareIsNanF64(lhs, rhs); ++ TurboAssembler::BranchFalseF(¬_nan); ++ // If one of the operands is NaN, return 1 for f64.ne, else 0. ++ if (cond == ne) { ++ TurboAssembler::li(dst, 1); ++ } else { ++ TurboAssembler::Move(dst, zero_reg); ++ } ++ TurboAssembler::Branch(&cont); ++ ++ bind(¬_nan); ++ ++ TurboAssembler::li(dst, 1); ++ bool predicate; ++ FPUCondition fcond = liftoff::ConditionToConditionCmpFPU(cond, &predicate); ++ TurboAssembler::CompareF64(lhs, rhs, fcond); ++ if (predicate) { ++ TurboAssembler::LoadZeroIfNotFPUCondition(dst); ++ } else { ++ TurboAssembler::LoadZeroIfFPUCondition(dst); ++ } ++ ++ bind(&cont); ++} ++ ++void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, ++ LiftoffRegister src) { ++ bailout(kSimd, "emit_i8x16_splat"); ++} ++ ++void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst, ++ LiftoffRegister src) { ++ bailout(kSimd, "emit_i16x8_splat"); ++} ++ ++void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst, ++ LiftoffRegister src) { ++ bailout(kSimd, "emit_i32x4_splat"); ++} ++ ++void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst, ++ LiftoffRegister src) { ++ bailout(kSimd, "emit_i64x2_splat"); ++} ++ ++void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst, ++ LiftoffRegister src) { ++ bailout(kSimd, "emit_f32x4_splat"); ++} ++ ++void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst, ++ LiftoffRegister src) {} ++ ++void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs) {} ++ ++void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs) {} ++ ++void LiftoffAssembler::emit_i8x16_mul(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs) {} ++ ++void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs) {} ++ ++void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs) {} ++ ++void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs) {} ++ ++void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs) {} ++ ++void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs) {} ++ ++void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs) {} ++ ++void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs) {} ++ ++void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs) {} ++ ++void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs) {} ++ ++void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs) {} ++ ++void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs) {} ++ ++void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs) {} ++ ++void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs) {} ++ ++void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs) {} ++ ++void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs, ++ LiftoffRegister rhs) {} ++ ++void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst, ++ LiftoffRegister lhs, ++ uint8_t imm_lane_idx) {} ++ ++void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst, ++ LiftoffRegister lhs, ++ uint8_t imm_lane_idx) {} ++ ++void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst, ++ LiftoffRegister lhs, ++ uint8_t imm_lane_idx) {} ++ ++void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst, ++ LiftoffRegister lhs, ++ uint8_t imm_lane_idx) {} ++ ++void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst, ++ LiftoffRegister lhs, ++ uint8_t imm_lane_idx) {} ++ ++void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst, ++ LiftoffRegister lhs, ++ uint8_t imm_lane_idx) {} ++ ++void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst, ++ LiftoffRegister lhs, ++ uint8_t imm_lane_idx) {} ++ ++void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst, ++ LiftoffRegister lhs, ++ uint8_t imm_lane_idx) {} ++ ++void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst, ++ LiftoffRegister src1, ++ LiftoffRegister src2, ++ uint8_t imm_lane_idx) {} ++ ++void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst, ++ LiftoffRegister src1, ++ LiftoffRegister src2, ++ uint8_t imm_lane_idx) {} ++ ++void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst, ++ LiftoffRegister src1, ++ LiftoffRegister src2, ++ uint8_t imm_lane_idx) {} ++ ++void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst, ++ LiftoffRegister src1, ++ LiftoffRegister src2, ++ uint8_t imm_lane_idx) {} ++ ++void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst, ++ LiftoffRegister src1, ++ LiftoffRegister src2, ++ uint8_t imm_lane_idx) {} ++ ++void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst, ++ LiftoffRegister src1, ++ LiftoffRegister src2, ++ uint8_t imm_lane_idx) {} ++ ++void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { ++ TurboAssembler::Ld_d(limit_address, MemOperand(limit_address, 0)); ++ TurboAssembler::Branch(ool_code, ule, sp, Operand(limit_address)); ++} ++ ++void LiftoffAssembler::CallTrapCallbackForTesting() { ++ PrepareCallCFunction(0, GetUnusedRegister(kGpReg).gp()); ++ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0); ++} ++ ++void LiftoffAssembler::AssertUnreachable(AbortReason reason) { ++ if (emit_debug_code()) Abort(reason); ++} ++ ++void LiftoffAssembler::PushRegisters(LiftoffRegList regs) { ++ LiftoffRegList gp_regs = regs & kGpCacheRegList; ++ unsigned num_gp_regs = gp_regs.GetNumRegsSet(); ++ if (num_gp_regs) { ++ unsigned offset = num_gp_regs * kSystemPointerSize; ++ addi_d(sp, sp, -offset); ++ while (!gp_regs.is_empty()) { ++ LiftoffRegister reg = gp_regs.GetFirstRegSet(); ++ offset -= kSystemPointerSize; ++ St_d(reg.gp(), MemOperand(sp, offset)); ++ gp_regs.clear(reg); ++ } ++ DCHECK_EQ(offset, 0); ++ } ++ LiftoffRegList fp_regs = regs & kFpCacheRegList; ++ unsigned num_fp_regs = fp_regs.GetNumRegsSet(); ++ if (num_fp_regs) { ++ addi_d(sp, sp, -(num_fp_regs * kStackSlotSize)); ++ unsigned offset = 0; ++ while (!fp_regs.is_empty()) { ++ LiftoffRegister reg = fp_regs.GetFirstRegSet(); ++ TurboAssembler::Fst_d(reg.fp(), MemOperand(sp, offset)); ++ fp_regs.clear(reg); ++ offset += sizeof(double); ++ } ++ DCHECK_EQ(offset, num_fp_regs * sizeof(double)); ++ } ++} ++ ++void LiftoffAssembler::PopRegisters(LiftoffRegList regs) { ++ LiftoffRegList fp_regs = regs & kFpCacheRegList; ++ unsigned fp_offset = 0; ++ while (!fp_regs.is_empty()) { ++ LiftoffRegister reg = fp_regs.GetFirstRegSet(); ++ TurboAssembler::Fld_d(reg.fp(), MemOperand(sp, fp_offset)); ++ fp_regs.clear(reg); ++ fp_offset += sizeof(double); ++ } ++ if (fp_offset) addi_d(sp, sp, fp_offset); ++ LiftoffRegList gp_regs = regs & kGpCacheRegList; ++ unsigned gp_offset = 0; ++ while (!gp_regs.is_empty()) { ++ LiftoffRegister reg = gp_regs.GetLastRegSet(); ++ Ld_d(reg.gp(), MemOperand(sp, gp_offset)); ++ gp_regs.clear(reg); ++ gp_offset += kSystemPointerSize; ++ } ++ addi_d(sp, sp, gp_offset); ++} ++ ++void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) { ++ DCHECK_LT(num_stack_slots, ++ (1 << 16) / kSystemPointerSize); // 16 bit immediate ++ TurboAssembler::DropAndRet(static_cast(num_stack_slots)); ++} ++ ++void LiftoffAssembler::CallC(const wasm::FunctionSig* sig, ++ const LiftoffRegister* args, ++ const LiftoffRegister* rets, ++ ValueType out_argument_type, int stack_bytes, ++ ExternalReference ext_ref) { ++ addi_d(sp, sp, -stack_bytes); ++ ++ int arg_bytes = 0; ++ for (ValueType param_type : sig->parameters()) { ++ liftoff::Store(this, sp, arg_bytes, *args++, param_type); ++ arg_bytes += param_type.element_size_bytes(); ++ } ++ DCHECK_LE(arg_bytes, stack_bytes); ++ ++ // Pass a pointer to the buffer with the arguments to the C function. ++ // On mips, the first argument is passed in {a0}. ++ constexpr Register kFirstArgReg = a0; ++ mov(kFirstArgReg, sp); ++ ++ // Now call the C function. ++ constexpr int kNumCCallArgs = 1; ++ PrepareCallCFunction(kNumCCallArgs, kScratchReg); ++ CallCFunction(ext_ref, kNumCCallArgs); ++ ++ // Move return value to the right register. ++ const LiftoffRegister* next_result_reg = rets; ++ if (sig->return_count() > 0) { ++ DCHECK_EQ(1, sig->return_count()); ++ constexpr Register kReturnReg = a0; ++ if (kReturnReg != next_result_reg->gp()) { ++ Move(*next_result_reg, LiftoffRegister(kReturnReg), sig->GetReturn(0)); ++ } ++ ++next_result_reg; ++ } ++ ++ // Load potential output value from the buffer on the stack. ++ if (out_argument_type != kWasmStmt) { ++ liftoff::Load(this, *next_result_reg, MemOperand(sp, 0), out_argument_type); ++ } ++ ++ addi_d(sp, sp, stack_bytes); ++} ++ ++void LiftoffAssembler::CallNativeWasmCode(Address addr) { ++ Call(addr, RelocInfo::WASM_CALL); ++} ++ ++void LiftoffAssembler::CallIndirect(const wasm::FunctionSig* sig, ++ compiler::CallDescriptor* call_descriptor, ++ Register target) { ++ if (target == no_reg) { ++ pop(kScratchReg); ++ Call(kScratchReg); ++ } else { ++ Call(target); ++ } ++} ++ ++void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) { ++ // A direct call to a wasm runtime stub defined in this module. ++ // Just encode the stub index. This will be patched at relocation. ++ Call(static_cast
(sid), RelocInfo::WASM_STUB_CALL); ++} ++ ++void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) { ++ addi_d(sp, sp, -size); ++ TurboAssembler::Move(addr, sp); ++} ++ ++void LiftoffAssembler::DeallocateStackSlot(uint32_t size) { ++ addi_d(sp, sp, size); ++} ++ ++void LiftoffStackSlots::Construct() { ++ for (auto& slot : slots_) { ++ const LiftoffAssembler::VarState& src = slot.src_; ++ switch (src.loc()) { ++ case LiftoffAssembler::VarState::kStack: ++ asm_->Ld_d(kScratchReg, liftoff::GetStackSlot(slot.src_offset_)); ++ asm_->push(kScratchReg); ++ break; ++ case LiftoffAssembler::VarState::kRegister: ++ liftoff::push(asm_, src.reg(), src.type()); ++ break; ++ case LiftoffAssembler::VarState::kIntConst: { ++ asm_->li(kScratchReg, Operand(src.i32_const())); ++ asm_->push(kScratchReg); ++ break; ++ } ++ } ++ } ++} ++ ++} // namespace wasm ++} // namespace internal ++} // namespace v8 ++ ++#endif // V8_WASM_BASELINE_LA64_LIFTOFF_ASSEMBLER_LA64_H_ +diff --git a/src/3rdparty/chromium/v8/src/wasm/baseline/liftoff-assembler-defs.h b/src/3rdparty/chromium/v8/src/wasm/baseline/liftoff-assembler-defs.h +index 781fb87dbcf..286fe8bdea2 100644 +--- a/src/3rdparty/chromium/v8/src/wasm/baseline/liftoff-assembler-defs.h ++++ b/src/3rdparty/chromium/v8/src/wasm/baseline/liftoff-assembler-defs.h +@@ -46,6 +46,14 @@ constexpr RegList kLiftoffAssemblerGpCacheRegs = + constexpr RegList kLiftoffAssemblerFpCacheRegs = DoubleRegister::ListOf( + f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, f20, f22, f24, f26); + ++#elif V8_TARGET_ARCH_LA64 ++/*todo*/ ++constexpr RegList kLiftoffAssemblerGpCacheRegs = ++ Register::ListOf(a0, a1, a2, a3, a4, a5, a6, a7, t0, t1, t2, s7); ++ ++constexpr RegList kLiftoffAssemblerFpCacheRegs = DoubleRegister::ListOf( ++ f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, f20, f22, f24, f26); ++ + #elif V8_TARGET_ARCH_ARM + + // r7: cp, r10: root, r11: fp, r12: ip, r13: sp, r14: lr, r15: pc. +@@ -90,7 +98,7 @@ constexpr Condition kUnsignedLessEqual = below_equal; + constexpr Condition kUnsignedGreaterThan = above; + constexpr Condition kUnsignedGreaterEqual = above_equal; + +-#elif V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 ++#elif V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_LA64 + + constexpr Condition kEqual = eq; + constexpr Condition kUnequal = ne; +diff --git a/src/3rdparty/chromium/v8/src/wasm/baseline/liftoff-assembler.h b/src/3rdparty/chromium/v8/src/wasm/baseline/liftoff-assembler.h +index 6573ff4aa4d..4e26ea95d21 100644 +--- a/src/3rdparty/chromium/v8/src/wasm/baseline/liftoff-assembler.h ++++ b/src/3rdparty/chromium/v8/src/wasm/baseline/liftoff-assembler.h +@@ -1045,6 +1045,8 @@ class LiftoffStackSlots { + #include "src/wasm/baseline/mips/liftoff-assembler-mips.h" + #elif V8_TARGET_ARCH_MIPS64 + #include "src/wasm/baseline/mips64/liftoff-assembler-mips64.h" ++#elif V8_TARGET_ARCH_LA64 ++#include "src/wasm/baseline/la64/liftoff-assembler-la64.h" + #elif V8_TARGET_ARCH_S390 + #include "src/wasm/baseline/s390/liftoff-assembler-s390.h" + #else +diff --git a/src/3rdparty/chromium/v8/src/wasm/jump-table-assembler.cc b/src/3rdparty/chromium/v8/src/wasm/jump-table-assembler.cc +index 90cdad4672b..33f8b9e6e99 100644 +--- a/src/3rdparty/chromium/v8/src/wasm/jump-table-assembler.cc ++++ b/src/3rdparty/chromium/v8/src/wasm/jump-table-assembler.cc +@@ -268,6 +268,37 @@ void JumpTableAssembler::NopBytes(int bytes) { + } + } + ++#elif V8_TARGET_ARCH_LA64 ++void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index, ++ Address lazy_compile_target) { ++ DCHECK(is_int32(func_index)); ++ int start = pc_offset(); ++ li(kWasmCompileLazyFuncIndexRegister, (int32_t)func_index); // max. 2 instr ++ // Jump produces max. 3 instructions for 32-bit platform ++ // and max. 4 instructions for 64-bit platform. ++ Jump(lazy_compile_target, RelocInfo::NONE); ++ int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset(); ++ DCHECK_EQ(nop_bytes % kInstrSize, 0); ++ for (int i = 0; i < nop_bytes; i += kInstrSize) nop(); ++} ++bool JumpTableAssembler::EmitJumpSlot(Address target) { ++ PatchAndJump(target); ++ return true; ++} ++void JumpTableAssembler::EmitFarJumpSlot(Address target) { ++ JumpToInstructionStream(target); ++} ++void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) { ++ UNREACHABLE(); ++} ++void JumpTableAssembler::NopBytes(int bytes) { ++ DCHECK_LE(0, bytes); ++ DCHECK_EQ(0, bytes % kInstrSize); ++ for (; bytes > 0; bytes -= kInstrSize) { ++ nop(); ++ } ++} ++ + #elif V8_TARGET_ARCH_PPC64 + void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index, + Address lazy_compile_target) { +diff --git a/src/3rdparty/chromium/v8/src/wasm/jump-table-assembler.h b/src/3rdparty/chromium/v8/src/wasm/jump-table-assembler.h +index 253f0bc0182..71c1c7eeb3c 100644 +--- a/src/3rdparty/chromium/v8/src/wasm/jump-table-assembler.h ++++ b/src/3rdparty/chromium/v8/src/wasm/jump-table-assembler.h +@@ -215,6 +215,12 @@ class V8_EXPORT_PRIVATE JumpTableAssembler : public MacroAssembler { + static constexpr int kJumpTableSlotSize = 8 * kInstrSize; + static constexpr int kFarJumpTableSlotSize = 6 * kInstrSize; + static constexpr int kLazyCompileTableSlotSize = 8 * kInstrSize; ++#elif V8_TARGET_ARCH_LA64 ++ // TODO ++ static constexpr int kJumpTableLineSize = 8 * kInstrSize; ++ static constexpr int kJumpTableSlotSize = 8 * kInstrSize; ++ static constexpr int kFarJumpTableSlotSize = 4 * kInstrSize; ++ static constexpr int kLazyCompileTableSlotSize = 8 * kInstrSize; + #else + #error Unknown architecture. + #endif +diff --git a/src/3rdparty/chromium/v8/src/wasm/wasm-linkage.h b/src/3rdparty/chromium/v8/src/wasm/wasm-linkage.h +index 7e56ea6eae2..b8efe962a7c 100644 +--- a/src/3rdparty/chromium/v8/src/wasm/wasm-linkage.h ++++ b/src/3rdparty/chromium/v8/src/wasm/wasm-linkage.h +@@ -75,6 +75,15 @@ constexpr Register kGpReturnRegisters[] = {v0, v1}; + constexpr DoubleRegister kFpParamRegisters[] = {f2, f4, f6, f8, f10, f12, f14}; + constexpr DoubleRegister kFpReturnRegisters[] = {f2, f4}; + ++#elif V8_TARGET_ARCH_LA64 ++// =========================================================================== ++// == LA64 TODO ============================================================= ++// =========================================================================== ++constexpr Register kGpParamRegisters[] = {a0, a2, a3, a4, a5, a6, a7}; ++constexpr Register kGpReturnRegisters[] = {a0, a1}; ++constexpr DoubleRegister kFpParamRegisters[] = {f2, f4, f6, f8, f10, f12, f14}; ++constexpr DoubleRegister kFpReturnRegisters[] = {f2, f4}; ++ + #elif V8_TARGET_ARCH_PPC || V8_TARGET_ARCH_PPC64 + // =========================================================================== + // == ppc & ppc64 ============================================================ +diff --git a/src/3rdparty/chromium/v8/test/cctest/BUILD.gn b/src/3rdparty/chromium/v8/test/cctest/BUILD.gn +index fd9de1bacb5..eacb2f41862 100644 +--- a/src/3rdparty/chromium/v8/test/cctest/BUILD.gn ++++ b/src/3rdparty/chromium/v8/test/cctest/BUILD.gn +@@ -363,6 +363,12 @@ v8_source_set("cctest_sources") { + "test-disasm-mips64.cc", + "test-macro-assembler-mips64.cc", + ] ++ } else if (v8_current_cpu == "la64") { ++ sources += [ ### loongson(arch:la64) ### ++ "test-assembler-la64.cc", ++ "test-disasm-la64.cc", ++ "test-macro-assembler-la64.cc", ++ ] + } else if (v8_current_cpu == "x64") { + sources += [ ### gcmole(arch:x64) ### + "test-assembler-x64.cc", +@@ -417,7 +423,8 @@ v8_source_set("cctest_sources") { + v8_current_cpu == "arm" || v8_current_cpu == "arm64" || + v8_current_cpu == "s390" || v8_current_cpu == "s390x" || + v8_current_cpu == "mips" || v8_current_cpu == "mips64" || +- v8_current_cpu == "mipsel" || v8_current_cpu == "mipsel64") { ++ v8_current_cpu == "mipsel" || v8_current_cpu == "mipsel64" || ++ v8_current_cpu == "la64") { + # Disable fmadd/fmsub so that expected results match generated code in + # RunFloat64MulAndFloat64Add1 and friends. + if (!is_win) { +diff --git a/src/3rdparty/chromium/v8/test/cctest/test-assembler-la64.cc b/src/3rdparty/chromium/v8/test/cctest/test-assembler-la64.cc +new file mode 100644 +index 00000000000..366bcb7cd25 +--- /dev/null ++++ b/src/3rdparty/chromium/v8/test/cctest/test-assembler-la64.cc +@@ -0,0 +1,5127 @@ ++// Copyright 2012 the V8 project authors. All rights reserved. ++// Redistribution and use in source and binary forms, with or without ++// modification, are permitted provided that the following conditions are ++// met: ++// ++// * Redistributions of source code must retain the above copyright ++// notice, this list of conditions and the following disclaimer. ++// * Redistributions in binary form must reproduce the above ++// copyright notice, this list of conditions and the following ++// disclaimer in the documentation and/or other materials provided ++// with the distribution. ++// * Neither the name of Google Inc. nor the names of its ++// contributors may be used to endorse or promote products derived ++// from this software without specific prior written permission. ++// ++// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++#include // NOLINT(readability/streams) ++ ++#include "src/base/utils/random-number-generator.h" ++#include "src/codegen/assembler-inl.h" ++#include "src/codegen/macro-assembler.h" ++#include "src/diagnostics/disassembler.h" ++#include "src/execution/simulator.h" ++#include "src/heap/factory.h" ++#include "src/init/v8.h" ++#include "test/cctest/cctest.h" ++ ++namespace v8 { ++namespace internal { ++ ++// Define these function prototypes to match JSEntryFunction in execution.cc. ++// TODO(mips64): Refine these signatures per test case. ++using F1 = void*(int x, int p1, int p2, int p3, int p4); ++using F2 = void*(int x, int y, int p2, int p3, int p4); ++using F3 = void*(void* p, int p1, int p2, int p3, int p4); ++using F4 = void*(int64_t x, int64_t y, int64_t p2, int64_t p3, int64_t p4); ++using F5 = void*(void* p0, void* p1, int p2, int p3, int p4); ++ ++#define __ assm. ++// v0->a2, v1->a3 ++TEST(LA0) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ // Addition. ++ __ addi_d(a2, a0, 0xC); ++ ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ int64_t res = reinterpret_cast(f.Call(0xAB0, 0, 0, 0, 0)); ++ CHECK_EQ(0xABCL, res); ++} ++ ++TEST(LA1) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ Label L, C; ++ ++ __ ori(a1, a0, 0); ++ __ ori(a2, zero_reg, 0); ++ __ b(&C); ++ ++ __ bind(&L); ++ __ add_d(a2, a2, a1); ++ __ addi_d(a1, a1, -1); ++ ++ __ bind(&C); ++ __ ori(a3, a1, 0); ++ ++ __ Branch(&L, ne, a3, Operand((int64_t)0)); ++ ++ __ or_(a0, a2, zero_reg); ++ __ or_(a1, a3, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ int64_t res = reinterpret_cast(f.Call(50, 0, 0, 0, 0)); ++ CHECK_EQ(1275L, res); ++} ++ ++TEST(LA2) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ Label exit, error; ++ ++ __ ori(a4, zero_reg, 0); // 00000000 ++ __ lu12i_w(a4, 0x12345); // 12345000 ++ __ ori(a4, a4, 0); // 12345000 ++ __ ori(a2, a4, 0xF0F); // 12345F0F ++ __ Branch(&error, ne, a2, Operand(0x12345F0F)); ++ ++ __ ori(a4, zero_reg, 0); ++ __ lu32i_d(a4, 0x12345); // 1 2345 0000 0000 ++ __ ori(a4, a4, 0xFFF); // 1 2345 0000 0FFF ++ __ addi_d(a2, a4, 1); ++ __ Branch(&error, ne, a2, Operand(0x1234500001000)); ++ ++ __ ori(a4, zero_reg, 0); ++ __ lu52i_d(a4, zero_reg, 0x123); // 1230 0000 0000 0000 ++ __ ori(a4, a4, 0xFFF); // 123F 0000 0000 0FFF ++ __ addi_d(a2, a4, 1); // 1230 0000 0000 1000 ++ __ Branch(&error, ne, a2, Operand(0x1230000000001000)); ++ ++ __ li(a2, 0x31415926); ++ __ b(&exit); ++ ++ __ bind(&error); ++ __ li(a2, 0x666); ++ ++ __ bind(&exit); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ int64_t res = reinterpret_cast(f.Call(0, 0, 0, 0, 0)); ++ ++ CHECK_EQ(0x31415926L, res); ++} ++ ++TEST(LA3) { ++ // Test 32bit calculate instructions. ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ Label exit, error; ++ ++ __ li(a4, 0x00000004); ++ __ li(a5, 0x00001234); ++ __ li(a6, 0x12345678); ++ __ li(a7, 0x7FFFFFFF); ++ __ li(t0, static_cast(0xFFFFFFFC)); ++ __ li(t1, static_cast(0xFFFFEDCC)); ++ __ li(t2, static_cast(0xEDCBA988)); ++ __ li(t3, static_cast(0x80000000)); ++ ++ __ ori(a2, zero_reg, 0); // 0x00000000 ++ __ add_w(a2, a4, a5); // 0x00001238 ++ __ sub_w(a2, a2, a4); // 0x00001234 ++ __ Branch(&error, ne, a2, Operand(0x00001234)); ++ __ ori(a3, zero_reg, 0); // 0x00000000 ++ __ add_w(a3, a7, a4); // 32bit addu result is sign-extended into 64bit reg. ++ __ Branch(&error, ne, a3, Operand(0xFFFFFFFF80000003)); ++ ++ __ sub_w(a3, t3, a4); // 0x7FFFFFFC ++ __ Branch(&error, ne, a3, Operand(0x7FFFFFFC)); ++ ++ __ ori(a2, zero_reg, 0); // 0x00000000 ++ __ ori(a3, zero_reg, 0); // 0x00000000 ++ __ addi_w(a2, zero_reg, 0x421); // 0x00007421 ++ __ addi_w(a2, a2, -0x1); // 0x00007420 ++ __ addi_w(a2, a2, -0x20); // 0x00007400 ++ __ Branch(&error, ne, a2, Operand(0x0000400)); ++ __ addi_w(a3, a7, 0x1); // 0x80000000 - result is sign-extended. ++ __ Branch(&error, ne, a3, Operand(0xFFFFFFFF80000000)); ++ ++ __ ori(a2, zero_reg, 0); // 0x00000000 ++ __ ori(a3, zero_reg, 0); // 0x00000000 ++ __ alsl_w(a2, a6, a4, 3); // 0xFFFFFFFF91A2B3C4 ++ __ alsl_w(a2, a2, a4, 2); // 0x468ACF14 ++ __ Branch(&error, ne, a2, Operand(0x468acf14)); ++ __ ori(a0, zero_reg, 31); ++ __ alsl_wu(a3, a6, a4, 3); // 0x91A2B3C4 ++ __ alsl_wu(a3, a3, a7, 1); // 0xFFFFFFFFA3456787 ++ __ Branch(&error, ne, a3, Operand(0xA3456787)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ ori(a3, zero_reg, 0); ++ __ mul_w(a2, a5, a7); ++ __ div_w(a2, a2, a4); ++ __ Branch(&error, ne, a2, Operand(0xFFFFFFFFFFFFFB73)); ++ __ mul_w(a3, a4, t1); ++ __ Branch(&error, ne, a3, Operand(0xFFFFFFFFFFFFB730)); ++ __ div_w(a3, t3, a4); ++ __ Branch(&error, ne, a3, Operand(0xFFFFFFFFE0000000)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ mulh_w(a2, a4, t1); ++ __ Branch(&error, ne, a2, Operand(0xFFFFFFFFFFFFFFFF)); ++ __ mulh_w(a2, a4, a6); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ ++ __ ori(a2, zero_reg, 0); ++ __ mulh_wu(a2, a4, t1); ++ __ Branch(&error, ne, a2, Operand(0x3)); ++ __ mulh_wu(a2, a4, a6); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ ++ __ ori(a2, zero_reg, 0); ++ __ mulw_d_w(a2, a4, t1); ++ __ Branch(&error, ne, a2, Operand(0xFFFFFFFFFFFFB730)); ++ __ mulw_d_w(a2, a4, a6); ++ __ Branch(&error, ne, a2, Operand(0x48D159E0)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ mulw_d_wu(a2, a4, t1); ++ __ Branch(&error, ne, a2, Operand(0x3FFFFB730)); //========0xFFFFB730 ++ __ ori(a2, zero_reg, 81); ++ __ mulw_d_wu(a2, a4, a6); ++ __ Branch(&error, ne, a2, Operand(0x48D159E0)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ div_wu(a2, a7, a5); ++ __ Branch(&error, ne, a2, Operand(0x70821)); ++ __ div_wu(a2, t0, a5); ++ __ Branch(&error, ne, a2, Operand(0xE1042)); ++ __ div_wu(a2, t0, t1); ++ __ Branch(&error, ne, a2, Operand(0x1)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ mod_w(a2, a6, a5); ++ __ Branch(&error, ne, a2, Operand(0xDA8)); ++ __ ori(a2, zero_reg, 0); ++ __ mod_w(a2, t2, a5); ++ __ Branch(&error, ne, a2, Operand(0xFFFFFFFFFFFFF258)); ++ __ ori(a2, zero_reg, 0); ++ __ mod_w(a2, t2, t1); ++ __ Branch(&error, ne, a2, Operand(0xFFFFFFFFFFFFF258)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ mod_wu(a2, a6, a5); ++ __ Branch(&error, ne, a2, Operand(0xDA8)); ++ __ mod_wu(a2, t2, a5); ++ __ Branch(&error, ne, a2, Operand(0xF0)); ++ __ mod_wu(a2, t2, t1); ++ __ Branch(&error, ne, a2, Operand(0xFFFFFFFFEDCBA988)); ++ ++ __ li(a2, 0x31415926); ++ __ b(&exit); ++ ++ __ bind(&error); ++ __ li(a2, 0x666); ++ ++ __ bind(&exit); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ int64_t res = reinterpret_cast(f.Call(0, 0, 0, 0, 0)); ++ ++ CHECK_EQ(0x31415926L, res); ++} ++ ++TEST(LA4) { ++ // Test 64bit calculate instructions. ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ Label exit, error; ++ ++ __ li(a4, 0x17312); ++ __ li(a5, 0x1012131415161718); ++ __ li(a6, 0x51F4B764A26E7412); ++ __ li(a7, 0x7FFFFFFFFFFFFFFF); ++ __ li(t0, static_cast(0xFFFFFFFFFFFFF547)); ++ __ li(t1, static_cast(0xDF6B8F35A10E205C)); ++ __ li(t2, static_cast(0x81F25A87C4236841)); ++ __ li(t3, static_cast(0x8000000000000000)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ add_d(a2, a4, a5); ++ __ sub_d(a2, a2, a4); ++ __ Branch(&error, ne, a2, Operand(0x1012131415161718)); ++ __ ori(a3, zero_reg, 0); ++ __ add_d(a3, a6, a7); //溢出 ++ __ Branch(&error, ne, a3, Operand(0xd1f4b764a26e7411)); ++ __ sub_d(a3, t3, a4); //溢出 ++ __ Branch(&error, ne, a3, Operand(0x7ffffffffffe8cee)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ addi_d(a2, a5, 0x412); //正值 ++ __ Branch(&error, ne, a2, Operand(0x1012131415161b2a)); ++ __ addi_d(a2, a7, 0x547); //负值 ++ __ Branch(&error, ne, a2, Operand(0x8000000000000546)); ++ ++ __ ori(t4, zero_reg, 0); ++ __ addu16i_d(a2, t4, 0x1234); ++ __ Branch(&error, ne, a2, Operand(0x12340000)); ++ __ addu16i_d(a2, a2, 0x9876); ++ __ Branch(&error, ne, a2, Operand(0xffffffffaaaa0000)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ alsl_d(a2, t2, t0, 3); ++ __ Branch(&error, ne, a2, Operand(0xf92d43e211b374f)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ mul_d(a2, a5, a6); ++ __ Branch(&error, ne, a2, Operand(0xdbe6a8729a547fb0)); ++ __ mul_d(a2, t0, t1); ++ __ Branch(&error, ne, a2, Operand(0x57ad69f40f870584)); ++ __ mul_d(a2, a4, t0); ++ __ Branch(&error, ne, a2, Operand(0xfffffffff07523fe)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ mulh_d(a2, a5, a6); ++ __ Branch(&error, ne, a2, Operand(0x52514c6c6b54467)); ++ __ mulh_d(a2, t0, t1); ++ __ Branch(&error, ne, a2, Operand(0x15d)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ mulh_du(a2, a5, a6); ++ __ Branch(&error, ne, a2, Operand(0x52514c6c6b54467)); ++ __ mulh_du(a2, t0, t1); ++ __ Branch(&error, ne, a2, Operand(0xdf6b8f35a10e1700)); ++ __ mulh_du(a2, a4, t0); ++ __ Branch(&error, ne, a2, Operand(0x17311)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ div_d(a2, a5, a6); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ __ div_d(a2, t0, t1); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ __ div_d(a2, t1, a4); ++ __ Branch(&error, ne, a2, Operand(0xffffe985f631e6d9)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ div_du(a2, a5, a6); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ __ div_du(a2, t0, t1); ++ __ Branch(&error, ne, a2, Operand(0x1)); ++ __ div_du(a2, t1, a4); ++ __ Branch(&error, ne, a2, Operand(0x9a22ffd3973d)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ mod_d(a2, a6, a4); ++ __ Branch(&error, ne, a2, Operand(0x13558)); ++ __ mod_d(a2, t2, t0); ++ __ Branch(&error, ne, a2, Operand(0xfffffffffffffb0a)); ++ __ mod_d(a2, t1, a4); ++ __ Branch(&error, ne, a2, Operand(0xffffffffffff6a1a)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ mod_du(a2, a6, a4); ++ __ Branch(&error, ne, a2, Operand(0x13558)); ++ __ mod_du(a2, t2, t0); ++ __ Branch(&error, ne, a2, Operand(0x81f25a87c4236841)); ++ __ mod_du(a2, t1, a4); ++ __ Branch(&error, ne, a2, Operand(0x1712)); ++ ++ // Everything was correctly executed. Load the expected result. ++ __ li(a2, 0x31415926); ++ __ b(&exit); ++ ++ __ bind(&error); ++ __ li(a2, 0x666); ++ // Got an error. Return a wrong result. ++ ++ __ bind(&exit); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ int64_t res = reinterpret_cast(f.Call(0, 0, 0, 0, 0)); ++ ++ CHECK_EQ(0x31415926L, res); ++} ++ ++TEST(LA5) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ Label exit, error; ++ ++ __ li(a4, 0x17312); ++ __ li(a5, 0x1012131415161718); ++ __ li(a6, 0x51F4B764A26E7412); ++ __ li(a7, 0x7FFFFFFFFFFFFFFF); ++ __ li(t0, static_cast(0xFFFFFFFFFFFFF547)); ++ __ li(t1, static_cast(0xDF6B8F35A10E205C)); ++ __ li(t2, static_cast(0x81F25A87C4236841)); ++ __ li(t3, static_cast(0x8000000000000000)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ slt(a2, a5, a6); ++ __ Branch(&error, ne, a2, Operand(0x1)); ++ __ slt(a2, a7, t0); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ __ slt(a2, t1, t1); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ ++ __ ori(a2, zero_reg, 0); ++ __ sltu(a2, a5, a6); ++ __ Branch(&error, ne, a2, Operand(0x1)); ++ __ sltu(a2, a7, t0); ++ __ Branch(&error, ne, a2, Operand(0x1)); ++ __ sltu(a2, t1, t1); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ ++ __ ori(a2, zero_reg, 0); ++ __ slti(a2, a5, 0x123); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ __ slti(a2, t0, 0x123); ++ __ Branch(&error, ne, a2, Operand(0x1)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ sltui(a2, a5, 0x123); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ __ sltui(a2, t0, 0x123); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ ++ __ ori(a2, zero_reg, 0); ++ __ and_(a2, a4, a5); ++ __ Branch(&error, ne, a2, Operand(0x1310)); ++ __ and_(a2, a6, a7); ++ __ Branch(&error, ne, a2, Operand(0x51F4B764A26E7412)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ or_(a2, t0, t1); ++ __ Branch(&error, ne, a2, Operand(0xfffffffffffff55f)); ++ __ or_(a2, t2, t3); ++ __ Branch(&error, ne, a2, Operand(0x81f25a87c4236841)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ nor(a2, a4, a5); ++ __ Branch(&error, ne, a2, Operand(0xefedecebeae888e5)); ++ __ nor(a2, a6, a7); ++ __ Branch(&error, ne, a2, Operand(0x8000000000000000)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ xor_(a2, t0, t1); ++ __ Branch(&error, ne, a2, Operand(0x209470ca5ef1d51b)); ++ __ xor_(a2, t2, t3); ++ __ Branch(&error, ne, a2, Operand(0x1f25a87c4236841)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ andn(a2, a4, a5); ++ __ Branch(&error, ne, a2, Operand(0x16002)); ++ __ andn(a2, a6, a7); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ ++ __ ori(a2, zero_reg, 0); ++ __ orn(a2, t0, t1); ++ __ Branch(&error, ne, a2, Operand(0xffffffffffffffe7)); ++ __ orn(a2, t2, t3); ++ __ Branch(&error, ne, a2, Operand(0xffffffffffffffff)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ andi(a2, a4, 0x123); ++ __ Branch(&error, ne, a2, Operand(0x102)); ++ __ andi(a2, a6, 0xDCB); ++ __ Branch(&error, ne, a2, Operand(0x402)); ++ ++ __ ori(a2, zero_reg, 0); ++ __ xori(a2, t0, 0x123); ++ __ Branch(&error, ne, a2, Operand(0xfffffffffffff464)); ++ __ xori(a2, t2, 0xDCB); ++ __ Branch(&error, ne, a2, Operand(0x81f25a87c423658a)); ++ ++ // Everything was correctly executed. Load the expected result. ++ __ li(a2, 0x31415926); ++ __ b(&exit); ++ ++ __ bind(&error); ++ // Got an error. Return a wrong result. ++ __ li(a2, 0x666); ++ ++ __ bind(&exit); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ int64_t res = reinterpret_cast(f.Call(0, 0, 0, 0, 0)); ++ ++ CHECK_EQ(0x31415926L, res); ++} ++ ++TEST(LA6) { ++ // Test loads and stores instruction. ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct T { ++ int64_t si1; ++ int64_t si2; ++ int64_t si3; ++ int64_t result_ld_b_si1; ++ int64_t result_ld_b_si2; ++ int64_t result_ld_h_si1; ++ int64_t result_ld_h_si2; ++ int64_t result_ld_w_si1; ++ int64_t result_ld_w_si2; ++ int64_t result_ld_d_si1; ++ int64_t result_ld_d_si3; ++ int64_t result_ld_bu_si2; ++ int64_t result_ld_hu_si2; ++ int64_t result_ld_wu_si2; ++ int64_t result_st_b; ++ int64_t result_st_h; ++ int64_t result_st_w; ++ }; ++ T t; ++ ++ // Ld_b ++ __ Ld_b(a4, MemOperand(a0, offsetof(T, si1))); ++ __ St_d(a4, MemOperand(a0, offsetof(T, result_ld_b_si1))); ++ ++ __ Ld_b(a4, MemOperand(a0, offsetof(T, si2))); ++ __ St_d(a4, MemOperand(a0, offsetof(T, result_ld_b_si2))); ++ ++ // Ld_h ++ __ Ld_h(a5, MemOperand(a0, offsetof(T, si1))); ++ __ St_d(a5, MemOperand(a0, offsetof(T, result_ld_h_si1))); ++ ++ __ Ld_h(a5, MemOperand(a0, offsetof(T, si2))); ++ __ St_d(a5, MemOperand(a0, offsetof(T, result_ld_h_si2))); ++ ++ // Ld_w ++ __ Ld_w(a6, MemOperand(a0, offsetof(T, si1))); ++ __ St_d(a6, MemOperand(a0, offsetof(T, result_ld_w_si1))); ++ ++ __ Ld_w(a6, MemOperand(a0, offsetof(T, si2))); ++ __ St_d(a6, MemOperand(a0, offsetof(T, result_ld_w_si2))); ++ ++ // Ld_d ++ __ Ld_d(a7, MemOperand(a0, offsetof(T, si1))); ++ __ St_d(a7, MemOperand(a0, offsetof(T, result_ld_d_si1))); ++ ++ __ Ld_d(a7, MemOperand(a0, offsetof(T, si3))); ++ __ St_d(a7, MemOperand(a0, offsetof(T, result_ld_d_si3))); ++ ++ // Ld_bu ++ __ Ld_bu(t0, MemOperand(a0, offsetof(T, si2))); ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_ld_bu_si2))); ++ ++ // Ld_hu ++ __ Ld_hu(t1, MemOperand(a0, offsetof(T, si2))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_ld_hu_si2))); ++ ++ // Ld_wu ++ __ Ld_wu(t2, MemOperand(a0, offsetof(T, si2))); ++ __ St_d(t2, MemOperand(a0, offsetof(T, result_ld_wu_si2))); ++ ++ // St ++ __ li(t4, 0x11111111); ++ ++ // St_b ++ __ Ld_d(t5, MemOperand(a0, offsetof(T, si3))); ++ __ St_d(t5, MemOperand(a0, offsetof(T, result_st_b))); ++ __ St_b(t4, MemOperand(a0, offsetof(T, result_st_b))); ++ ++ // St_h ++ __ Ld_d(t6, MemOperand(a0, offsetof(T, si3))); ++ __ St_d(t6, MemOperand(a0, offsetof(T, result_st_h))); ++ __ St_h(t4, MemOperand(a0, offsetof(T, result_st_h))); ++ ++ // St_w ++ __ Ld_d(t7, MemOperand(a0, offsetof(T, si3))); ++ __ St_d(t7, MemOperand(a0, offsetof(T, result_st_w))); ++ __ St_w(t4, MemOperand(a0, offsetof(T, result_st_w))); ++ ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ t.si1 = 0x11223344; ++ t.si2 = 0x99AABBCC; ++ t.si3 = 0x1122334455667788; ++ f.Call(&t, 0, 0, 0, 0); ++ ++ CHECK_EQ(static_cast(0x44), t.result_ld_b_si1); ++ CHECK_EQ(static_cast(0xFFFFFFFFFFFFFFCC), t.result_ld_b_si2); ++ ++ CHECK_EQ(static_cast(0x3344), t.result_ld_h_si1); ++ CHECK_EQ(static_cast(0xFFFFFFFFFFFFBBCC), t.result_ld_h_si2); ++ ++ CHECK_EQ(static_cast(0x11223344), t.result_ld_w_si1); ++ CHECK_EQ(static_cast(0xFFFFFFFF99AABBCC), t.result_ld_w_si2); ++ ++ CHECK_EQ(static_cast(0x11223344), t.result_ld_d_si1); ++ CHECK_EQ(static_cast(0x1122334455667788), t.result_ld_d_si3); ++ ++ CHECK_EQ(static_cast(0xCC), t.result_ld_bu_si2); ++ CHECK_EQ(static_cast(0xBBCC), t.result_ld_hu_si2); ++ CHECK_EQ(static_cast(0x99AABBCC), t.result_ld_wu_si2); ++ ++ CHECK_EQ(static_cast(0x1122334455667711), t.result_st_b); ++ CHECK_EQ(static_cast(0x1122334455661111), t.result_st_h); ++ CHECK_EQ(static_cast(0x1122334411111111), t.result_st_w); ++} ++ ++TEST(LA7) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct T { ++ int64_t si1; ++ int64_t si2; ++ int64_t si3; ++ int64_t result_ldx_b_si1; ++ int64_t result_ldx_b_si2; ++ int64_t result_ldx_h_si1; ++ int64_t result_ldx_h_si2; ++ int64_t result_ldx_w_si1; ++ int64_t result_ldx_w_si2; ++ int64_t result_ldx_d_si1; ++ int64_t result_ldx_d_si3; ++ int64_t result_ldx_bu_si2; ++ int64_t result_ldx_hu_si2; ++ int64_t result_ldx_wu_si2; ++ int64_t result_stx_b; ++ int64_t result_stx_h; ++ int64_t result_stx_w; ++ }; ++ T t; ++ ++ // ldx_b ++ __ li(a2, static_cast(offsetof(T, si1))); ++ __ Ld_b(a4, MemOperand(a0, a2)); ++ __ St_d(a4, MemOperand(a0, offsetof(T, result_ldx_b_si1))); ++ ++ __ li(a2, static_cast(offsetof(T, si2))); ++ __ Ld_b(a4, MemOperand(a0, a2)); ++ __ St_d(a4, MemOperand(a0, offsetof(T, result_ldx_b_si2))); ++ ++ // ldx_h ++ __ li(a2, static_cast(offsetof(T, si1))); ++ __ Ld_h(a5, MemOperand(a0, a2)); ++ __ St_d(a5, MemOperand(a0, offsetof(T, result_ldx_h_si1))); ++ ++ __ li(a2, static_cast(offsetof(T, si2))); ++ __ Ld_h(a5, MemOperand(a0, a2)); ++ __ St_d(a5, MemOperand(a0, offsetof(T, result_ldx_h_si2))); ++ ++ // ldx_w ++ __ li(a2, static_cast(offsetof(T, si1))); ++ __ Ld_w(a6, MemOperand(a0, a2)); ++ __ St_d(a6, MemOperand(a0, offsetof(T, result_ldx_w_si1))); ++ ++ __ li(a2, static_cast(offsetof(T, si2))); ++ __ Ld_w(a6, MemOperand(a0, a2)); ++ __ St_d(a6, MemOperand(a0, offsetof(T, result_ldx_w_si2))); ++ ++ // Ld_d ++ __ li(a2, static_cast(offsetof(T, si1))); ++ __ Ld_d(a7, MemOperand(a0, a2)); ++ __ St_d(a7, MemOperand(a0, offsetof(T, result_ldx_d_si1))); ++ ++ __ li(a2, static_cast(offsetof(T, si3))); ++ __ Ld_d(a7, MemOperand(a0, a2)); ++ __ St_d(a7, MemOperand(a0, offsetof(T, result_ldx_d_si3))); ++ ++ // Ld_bu ++ __ li(a2, static_cast(offsetof(T, si2))); ++ __ Ld_bu(t0, MemOperand(a0, a2)); ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_ldx_bu_si2))); ++ ++ // Ld_hu ++ __ li(a2, static_cast(offsetof(T, si2))); ++ __ Ld_hu(t1, MemOperand(a0, a2)); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_ldx_hu_si2))); ++ ++ // Ld_wu ++ __ li(a2, static_cast(offsetof(T, si2))); ++ __ Ld_wu(t2, MemOperand(a0, a2)); ++ __ St_d(t2, MemOperand(a0, offsetof(T, result_ldx_wu_si2))); ++ ++ // St ++ __ li(t4, 0x11111111); ++ ++ // St_b ++ __ Ld_d(t5, MemOperand(a0, offsetof(T, si3))); ++ __ St_d(t5, MemOperand(a0, offsetof(T, result_stx_b))); ++ __ li(a2, static_cast(offsetof(T, result_stx_b))); ++ __ St_b(t4, MemOperand(a0, a2)); ++ ++ // St_h ++ __ Ld_d(t6, MemOperand(a0, offsetof(T, si3))); ++ __ St_d(t6, MemOperand(a0, offsetof(T, result_stx_h))); ++ __ li(a2, static_cast(offsetof(T, result_stx_h))); ++ __ St_h(t4, MemOperand(a0, a2)); ++ ++ // St_w ++ __ Ld_d(t7, MemOperand(a0, offsetof(T, si3))); ++ __ li(a2, static_cast(offsetof(T, result_stx_w))); ++ __ St_d(t7, MemOperand(a0, a2)); ++ __ li(a3, static_cast(offsetof(T, result_stx_w))); ++ __ St_w(t4, MemOperand(a0, a3)); ++ ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ t.si1 = 0x11223344; ++ t.si2 = 0x99AABBCC; ++ t.si3 = 0x1122334455667788; ++ f.Call(&t, 0, 0, 0, 0); ++ ++ CHECK_EQ(static_cast(0x44), t.result_ldx_b_si1); ++ CHECK_EQ(static_cast(0xFFFFFFFFFFFFFFCC), t.result_ldx_b_si2); ++ ++ CHECK_EQ(static_cast(0x3344), t.result_ldx_h_si1); ++ CHECK_EQ(static_cast(0xFFFFFFFFFFFFBBCC), t.result_ldx_h_si2); ++ ++ CHECK_EQ(static_cast(0x11223344), t.result_ldx_w_si1); ++ CHECK_EQ(static_cast(0xFFFFFFFF99AABBCC), t.result_ldx_w_si2); ++ ++ CHECK_EQ(static_cast(0x11223344), t.result_ldx_d_si1); ++ CHECK_EQ(static_cast(0x1122334455667788), t.result_ldx_d_si3); ++ ++ CHECK_EQ(static_cast(0xCC), t.result_ldx_bu_si2); ++ CHECK_EQ(static_cast(0xBBCC), t.result_ldx_hu_si2); ++ CHECK_EQ(static_cast(0x99AABBCC), t.result_ldx_wu_si2); ++ ++ CHECK_EQ(static_cast(0x1122334455667711), t.result_stx_b); ++ CHECK_EQ(static_cast(0x1122334455661111), t.result_stx_h); ++ CHECK_EQ(static_cast(0x1122334411111111), t.result_stx_w); ++} ++ ++TEST(LDPTR_STPTR) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ int64_t test[10]; ++ ++ __ ldptr_w(a4, a0, 0); ++ __ stptr_d(a4, a0, 24); // test[3] ++ ++ __ ldptr_w(a5, a0, 8); // test[1] ++ __ stptr_d(a5, a0, 32); // test[4] ++ ++ __ ldptr_d(a6, a0, 16); // test[2] ++ __ stptr_d(a6, a0, 40); // test[5] ++ ++ __ li(t0, 0x11111111); ++ ++ __ stptr_d(a6, a0, 48); // test[6] ++ __ stptr_w(t0, a0, 48); // test[6] ++ ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ test[0] = 0x11223344; ++ test[1] = 0x99AABBCC; ++ test[2] = 0x1122334455667788; ++ f.Call(&test, 0, 0, 0, 0); ++ ++ CHECK_EQ(static_cast(0x11223344), test[3]); ++ CHECK_EQ(static_cast(0xFFFFFFFF99AABBCC), test[4]); ++ CHECK_EQ(static_cast(0x1122334455667788), test[5]); ++ CHECK_EQ(static_cast(0x1122334411111111), test[6]); ++} ++ ++TEST(LA8) { ++ // Test 32bit shift instructions. ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ struct T { ++ int32_t input; ++ int32_t result_sll_w_0; ++ int32_t result_sll_w_8; ++ int32_t result_sll_w_10; ++ int32_t result_sll_w_31; ++ int32_t result_srl_w_0; ++ int32_t result_srl_w_8; ++ int32_t result_srl_w_10; ++ int32_t result_srl_w_31; ++ int32_t result_sra_w_0; ++ int32_t result_sra_w_8; ++ int32_t result_sra_w_10; ++ int32_t result_sra_w_31; ++ int32_t result_rotr_w_0; ++ int32_t result_rotr_w_8; ++ int32_t result_slli_w_0; ++ int32_t result_slli_w_8; ++ int32_t result_slli_w_10; ++ int32_t result_slli_w_31; ++ int32_t result_srli_w_0; ++ int32_t result_srli_w_8; ++ int32_t result_srli_w_10; ++ int32_t result_srli_w_31; ++ int32_t result_srai_w_0; ++ int32_t result_srai_w_8; ++ int32_t result_srai_w_10; ++ int32_t result_srai_w_31; ++ int32_t result_rotri_w_0; ++ int32_t result_rotri_w_8; ++ int32_t result_rotri_w_10; ++ int32_t result_rotri_w_31; ++ }; ++ T t; ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ __ Ld_w(a4, MemOperand(a0, offsetof(T, input))); ++ ++ // sll_w ++ __ li(a5, 0); ++ __ sll_w(t0, a4, a5); ++ __ li(a5, 0x8); ++ __ sll_w(t1, a4, a5); ++ __ li(a5, 0xA); ++ __ sll_w(t2, a4, a5); ++ __ li(a5, 0x1F); ++ __ sll_w(t3, a4, a5); ++ ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_sll_w_0))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_sll_w_8))); ++ __ St_w(t2, MemOperand(a0, offsetof(T, result_sll_w_10))); ++ __ St_w(t3, MemOperand(a0, offsetof(T, result_sll_w_31))); ++ ++ // srl_w ++ __ li(a5, 0x0); ++ __ srl_w(t0, a4, a5); ++ __ li(a5, 0x8); ++ __ srl_w(t1, a4, a5); ++ __ li(a5, 0xA); ++ __ srl_w(t2, a4, a5); ++ __ li(a5, 0x1F); ++ __ srl_w(t3, a4, a5); ++ ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_srl_w_0))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_srl_w_8))); ++ __ St_w(t2, MemOperand(a0, offsetof(T, result_srl_w_10))); ++ __ St_w(t3, MemOperand(a0, offsetof(T, result_srl_w_31))); ++ ++ // sra_w ++ __ li(a5, 0x0); ++ __ sra_w(t0, a4, a5); ++ __ li(a5, 0x8); ++ __ sra_w(t1, a4, a5); ++ ++ __ li(a6, static_cast(0x80000000)); ++ __ add_w(a6, a6, a4); ++ __ li(a5, 0xA); ++ __ sra_w(t2, a6, a5); ++ __ li(a5, 0x1F); ++ __ sra_w(t3, a6, a5); ++ ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_sra_w_0))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_sra_w_8))); ++ __ St_w(t2, MemOperand(a0, offsetof(T, result_sra_w_10))); ++ __ St_w(t3, MemOperand(a0, offsetof(T, result_sra_w_31))); ++ ++ // rotr ++ __ li(a5, 0x0); ++ __ rotr_w(t0, a4, a5); ++ __ li(a6, 0x8); ++ __ rotr_w(t1, a4, a6); ++ ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_rotr_w_0))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_rotr_w_8))); ++ ++ // slli_w ++ __ slli_w(t0, a4, 0); ++ __ slli_w(t1, a4, 0x8); ++ __ slli_w(t2, a4, 0xA); ++ __ slli_w(t3, a4, 0x1F); ++ ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_slli_w_0))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_slli_w_8))); ++ __ St_w(t2, MemOperand(a0, offsetof(T, result_slli_w_10))); ++ __ St_w(t3, MemOperand(a0, offsetof(T, result_slli_w_31))); ++ ++ // srli_w ++ __ srli_w(t0, a4, 0); ++ __ srli_w(t1, a4, 0x8); ++ __ srli_w(t2, a4, 0xA); ++ __ srli_w(t3, a4, 0x1F); ++ ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_srli_w_0))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_srli_w_8))); ++ __ St_w(t2, MemOperand(a0, offsetof(T, result_srli_w_10))); ++ __ St_w(t3, MemOperand(a0, offsetof(T, result_srli_w_31))); ++ ++ // srai_w ++ __ srai_w(t0, a4, 0); ++ __ srai_w(t1, a4, 0x8); ++ ++ __ li(a6, static_cast(0x80000000)); ++ __ add_w(a6, a6, a4); ++ __ srai_w(t2, a6, 0xA); ++ __ srai_w(t3, a6, 0x1F); ++ ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_srai_w_0))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_srai_w_8))); ++ __ St_w(t2, MemOperand(a0, offsetof(T, result_srai_w_10))); ++ __ St_w(t3, MemOperand(a0, offsetof(T, result_srai_w_31))); ++ ++ // rotri_w ++ __ rotri_w(t0, a4, 0); ++ __ rotri_w(t1, a4, 0x8); ++ __ rotri_w(t2, a4, 0xA); ++ __ rotri_w(t3, a4, 0x1F); ++ ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_rotri_w_0))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_rotri_w_8))); ++ __ St_w(t2, MemOperand(a0, offsetof(T, result_rotri_w_10))); ++ __ St_w(t3, MemOperand(a0, offsetof(T, result_rotri_w_31))); ++ ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ t.input = 0x12345678; ++ f.Call(&t, 0x0, 0, 0, 0); ++ ++ CHECK_EQ(static_cast(0x12345678), t.result_sll_w_0); ++ CHECK_EQ(static_cast(0x34567800), t.result_sll_w_8); ++ CHECK_EQ(static_cast(0xD159E000), t.result_sll_w_10); ++ CHECK_EQ(static_cast(0x0), t.result_sll_w_31); ++ ++ CHECK_EQ(static_cast(0x12345678), t.result_srl_w_0); ++ CHECK_EQ(static_cast(0x123456), t.result_srl_w_8); ++ CHECK_EQ(static_cast(0x48D15), t.result_srl_w_10); ++ CHECK_EQ(static_cast(0x0), t.result_srl_w_31); ++ ++ CHECK_EQ(static_cast(0x12345678), t.result_sra_w_0); ++ CHECK_EQ(static_cast(0x123456), t.result_sra_w_8); ++ CHECK_EQ(static_cast(0xFFE48D15), t.result_sra_w_10); ++ CHECK_EQ(static_cast(0xFFFFFFFF), t.result_sra_w_31); ++ ++ CHECK_EQ(static_cast(0x12345678), t.result_rotr_w_0); ++ CHECK_EQ(static_cast(0x78123456), t.result_rotr_w_8); ++ ++ CHECK_EQ(static_cast(0x12345678), t.result_slli_w_0); ++ CHECK_EQ(static_cast(0x34567800), t.result_slli_w_8); ++ CHECK_EQ(static_cast(0xD159E000), t.result_slli_w_10); ++ CHECK_EQ(static_cast(0x0), t.result_slli_w_31); ++ ++ CHECK_EQ(static_cast(0x12345678), t.result_srli_w_0); ++ CHECK_EQ(static_cast(0x123456), t.result_srli_w_8); ++ CHECK_EQ(static_cast(0x48D15), t.result_srli_w_10); ++ CHECK_EQ(static_cast(0x0), t.result_srli_w_31); ++ ++ CHECK_EQ(static_cast(0x12345678), t.result_srai_w_0); ++ CHECK_EQ(static_cast(0x123456), t.result_srai_w_8); ++ CHECK_EQ(static_cast(0xFFE48D15), t.result_srai_w_10); ++ CHECK_EQ(static_cast(0xFFFFFFFF), t.result_srai_w_31); ++ ++ CHECK_EQ(static_cast(0x12345678), t.result_rotri_w_0); ++ CHECK_EQ(static_cast(0x78123456), t.result_rotri_w_8); ++ CHECK_EQ(static_cast(0x9E048D15), t.result_rotri_w_10); ++ CHECK_EQ(static_cast(0x2468ACF0), t.result_rotri_w_31); ++} ++ ++TEST(LA9) { ++ // Test 64bit shift instructions. ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ struct T { ++ int64_t input; ++ int64_t result_sll_d_0; ++ int64_t result_sll_d_13; ++ int64_t result_sll_d_30; ++ int64_t result_sll_d_63; ++ int64_t result_srl_d_0; ++ int64_t result_srl_d_13; ++ int64_t result_srl_d_30; ++ int64_t result_srl_d_63; ++ int64_t result_sra_d_0; ++ int64_t result_sra_d_13; ++ int64_t result_sra_d_30; ++ int64_t result_sra_d_63; ++ int64_t result_rotr_d_0; ++ int64_t result_rotr_d_13; ++ int64_t result_slli_d_0; ++ int64_t result_slli_d_13; ++ int64_t result_slli_d_30; ++ int64_t result_slli_d_63; ++ int64_t result_srli_d_0; ++ int64_t result_srli_d_13; ++ int64_t result_srli_d_30; ++ int64_t result_srli_d_63; ++ int64_t result_srai_d_0; ++ int64_t result_srai_d_13; ++ int64_t result_srai_d_30; ++ int64_t result_srai_d_63; ++ int64_t result_rotri_d_0; ++ int64_t result_rotri_d_13; ++ int64_t result_rotri_d_30; ++ int64_t result_rotri_d_63; ++ }; ++ ++ T t; ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ __ Ld_d(a4, MemOperand(a0, offsetof(T, input))); ++ ++ // sll_d ++ __ li(a5, 0); ++ __ sll_d(t0, a4, a5); ++ __ li(a5, 0xD); ++ __ sll_d(t1, a4, a5); ++ __ li(a5, 0x1E); ++ __ sll_d(t2, a4, a5); ++ __ li(a5, 0x3F); ++ __ sll_d(t3, a4, a5); ++ ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_sll_d_0))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_sll_d_13))); ++ __ St_d(t2, MemOperand(a0, offsetof(T, result_sll_d_30))); ++ __ St_d(t3, MemOperand(a0, offsetof(T, result_sll_d_63))); ++ ++ // srl_d ++ __ li(a5, 0x0); ++ __ srl_d(t0, a4, a5); ++ __ li(a5, 0xD); ++ __ srl_d(t1, a4, a5); ++ __ li(a5, 0x1E); ++ __ srl_d(t2, a4, a5); ++ __ li(a5, 0x3F); ++ __ srl_d(t3, a4, a5); ++ ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_srl_d_0))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_srl_d_13))); ++ __ St_d(t2, MemOperand(a0, offsetof(T, result_srl_d_30))); ++ __ St_d(t3, MemOperand(a0, offsetof(T, result_srl_d_63))); ++ ++ // sra_d ++ __ li(a5, 0x0); ++ __ sra_d(t0, a4, a5); ++ __ li(a5, 0xD); ++ __ sra_d(t1, a4, a5); ++ ++ __ li(a6, static_cast(0x8000000000000000)); ++ __ add_d(a6, a6, a4); ++ __ li(a5, 0x1E); ++ __ sra_d(t2, a6, a5); ++ __ li(a5, 0x3F); ++ __ sra_d(t3, a6, a5); ++ ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_sra_d_0))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_sra_d_13))); ++ __ St_d(t2, MemOperand(a0, offsetof(T, result_sra_d_30))); ++ __ St_d(t3, MemOperand(a0, offsetof(T, result_sra_d_63))); ++ ++ // rotr ++ __ li(a5, 0x0); ++ __ rotr_d(t0, a4, a5); ++ __ li(a6, 0xD); ++ __ rotr_d(t1, a4, a6); ++ ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_rotr_d_0))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_rotr_d_13))); ++ ++ // slli_d ++ __ slli_d(t0, a4, 0); ++ __ slli_d(t1, a4, 0xD); ++ __ slli_d(t2, a4, 0x1E); ++ __ slli_d(t3, a4, 0x3F); ++ ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_slli_d_0))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_slli_d_13))); ++ __ St_d(t2, MemOperand(a0, offsetof(T, result_slli_d_30))); ++ __ St_d(t3, MemOperand(a0, offsetof(T, result_slli_d_63))); ++ ++ // srli_d ++ __ srli_d(t0, a4, 0); ++ __ srli_d(t1, a4, 0xD); ++ __ srli_d(t2, a4, 0x1E); ++ __ srli_d(t3, a4, 0x3F); ++ ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_srli_d_0))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_srli_d_13))); ++ __ St_d(t2, MemOperand(a0, offsetof(T, result_srli_d_30))); ++ __ St_d(t3, MemOperand(a0, offsetof(T, result_srli_d_63))); ++ ++ // srai_d ++ __ srai_d(t0, a4, 0); ++ __ srai_d(t1, a4, 0xD); ++ ++ __ li(a6, static_cast(0x8000000000000000)); ++ __ add_d(a6, a6, a4); ++ __ srai_d(t2, a6, 0x1E); ++ __ srai_d(t3, a6, 0x3F); ++ ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_srai_d_0))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_srai_d_13))); ++ __ St_d(t2, MemOperand(a0, offsetof(T, result_srai_d_30))); ++ __ St_d(t3, MemOperand(a0, offsetof(T, result_srai_d_63))); ++ ++ // rotri_d ++ __ rotri_d(t0, a4, 0); ++ __ rotri_d(t1, a4, 0xD); ++ __ rotri_d(t2, a4, 0x1E); ++ __ rotri_d(t3, a4, 0x3F); ++ ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_rotri_d_0))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_rotri_d_13))); ++ __ St_d(t2, MemOperand(a0, offsetof(T, result_rotri_d_30))); ++ __ St_d(t3, MemOperand(a0, offsetof(T, result_rotri_d_63))); ++ ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ t.input = 0x51F4B764A26E7412; ++ f.Call(&t, 0, 0, 0, 0); ++ ++ CHECK_EQ(static_cast(0x51f4b764a26e7412), t.result_sll_d_0); ++ CHECK_EQ(static_cast(0x96ec944dce824000), t.result_sll_d_13); ++ CHECK_EQ(static_cast(0x289b9d0480000000), t.result_sll_d_30); ++ CHECK_EQ(static_cast(0x0), t.result_sll_d_63); ++ ++ CHECK_EQ(static_cast(0x51f4b764a26e7412), t.result_srl_d_0); ++ CHECK_EQ(static_cast(0x28fa5bb251373), t.result_srl_d_13); ++ CHECK_EQ(static_cast(0x147d2dd92), t.result_srl_d_30); ++ CHECK_EQ(static_cast(0x0), t.result_srl_d_63); ++ ++ CHECK_EQ(static_cast(0x51f4b764a26e7412), t.result_sra_d_0); ++ CHECK_EQ(static_cast(0x28fa5bb251373), t.result_sra_d_13); ++ CHECK_EQ(static_cast(0xffffffff47d2dd92), t.result_sra_d_30); ++ CHECK_EQ(static_cast(0xffffffffffffffff), t.result_sra_d_63); ++ ++ CHECK_EQ(static_cast(0x51f4b764a26e7412), t.result_rotr_d_0); ++ CHECK_EQ(static_cast(0xa0928fa5bb251373), t.result_rotr_d_13); ++ ++ CHECK_EQ(static_cast(0x51f4b764a26e7412), t.result_slli_d_0); ++ CHECK_EQ(static_cast(0x96ec944dce824000), t.result_slli_d_13); ++ CHECK_EQ(static_cast(0x289b9d0480000000), t.result_slli_d_30); ++ CHECK_EQ(static_cast(0x0), t.result_slli_d_63); ++ ++ CHECK_EQ(static_cast(0x51f4b764a26e7412), t.result_srli_d_0); ++ CHECK_EQ(static_cast(0x28fa5bb251373), t.result_srli_d_13); ++ CHECK_EQ(static_cast(0x147d2dd92), t.result_srli_d_30); ++ CHECK_EQ(static_cast(0x0), t.result_srli_d_63); ++ ++ CHECK_EQ(static_cast(0x51f4b764a26e7412), t.result_srai_d_0); ++ CHECK_EQ(static_cast(0x28fa5bb251373), t.result_srai_d_13); ++ CHECK_EQ(static_cast(0xffffffff47d2dd92), t.result_srai_d_30); ++ CHECK_EQ(static_cast(0xffffffffffffffff), t.result_srai_d_63); ++ ++ CHECK_EQ(static_cast(0x51f4b764a26e7412), t.result_rotri_d_0); ++ CHECK_EQ(static_cast(0xa0928fa5bb251373), t.result_rotri_d_13); ++ CHECK_EQ(static_cast(0x89b9d04947d2dd92), t.result_rotri_d_30); ++ CHECK_EQ(static_cast(0xa3e96ec944dce824), t.result_rotri_d_63); ++} ++ ++TEST(LA10) { ++ // Test 32bit bit operation instructions. ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct T { ++ int64_t si1; ++ int64_t si2; ++ int32_t result_ext_w_b_si1; ++ int32_t result_ext_w_b_si2; ++ int32_t result_ext_w_h_si1; ++ int32_t result_ext_w_h_si2; ++ int32_t result_clo_w_si1; ++ int32_t result_clo_w_si2; ++ int32_t result_clz_w_si1; ++ int32_t result_clz_w_si2; ++ int32_t result_cto_w_si1; ++ int32_t result_cto_w_si2; ++ int32_t result_ctz_w_si1; ++ int32_t result_ctz_w_si2; ++ int32_t result_bytepick_w_si1; ++ int32_t result_bytepick_w_si2; ++ int32_t result_revb_2h_si1; ++ int32_t result_revb_2h_si2; ++ int32_t result_bitrev_4b_si1; ++ int32_t result_bitrev_4b_si2; ++ int32_t result_bitrev_w_si1; ++ int32_t result_bitrev_w_si2; ++ int32_t result_bstrins_w_si1; ++ int32_t result_bstrins_w_si2; ++ int32_t result_bstrpick_w_si1; ++ int32_t result_bstrpick_w_si2; ++ }; ++ T t; ++ ++ __ Ld_d(a4, MemOperand(a0, offsetof(T, si1))); ++ __ Ld_d(a5, MemOperand(a0, offsetof(T, si2))); ++ ++ // ext_w_b ++ __ ext_w_b(t0, a4); ++ __ ext_w_b(t1, a5); ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_ext_w_b_si1))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_ext_w_b_si2))); ++ ++ // ext_w_h ++ __ ext_w_h(t0, a4); ++ __ ext_w_h(t1, a5); ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_ext_w_h_si1))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_ext_w_h_si2))); ++ ++ /* //clo_w ++ __ clo_w(t0, a4); ++ __ clo_w(t1, a5); ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_clo_w_si1))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_clo_w_si2)));*/ ++ ++ // clz_w ++ __ clz_w(t0, a4); ++ __ clz_w(t1, a5); ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_clz_w_si1))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_clz_w_si2))); ++ ++ /* //cto_w ++ __ cto_w(t0, a4); ++ __ cto_w(t1, a5); ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_cto_w_si1))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_cto_w_si2)));*/ ++ ++ // ctz_w ++ __ ctz_w(t0, a4); ++ __ ctz_w(t1, a5); ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_ctz_w_si1))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_ctz_w_si2))); ++ ++ // bytepick_w ++ __ bytepick_w(t0, a4, a5, 0); ++ __ bytepick_w(t1, a5, a4, 2); ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_bytepick_w_si1))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_bytepick_w_si2))); ++ ++ // revb_2h ++ __ revb_2h(t0, a4); ++ __ revb_2h(t1, a5); ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_revb_2h_si1))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_revb_2h_si2))); ++ ++ // bitrev ++ __ bitrev_4b(t0, a4); ++ __ bitrev_4b(t1, a5); ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_bitrev_4b_si1))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_bitrev_4b_si2))); ++ ++ // bitrev_w ++ __ bitrev_w(t0, a4); ++ __ bitrev_w(t1, a5); ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_bitrev_w_si1))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_bitrev_w_si2))); ++ ++ // bstrins ++ __ or_(t0, zero_reg, zero_reg); ++ __ or_(t1, zero_reg, zero_reg); ++ __ bstrins_w(t0, a4, 0xD, 0x4); ++ __ bstrins_w(t1, a5, 0x16, 0x5); ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_bstrins_w_si1))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_bstrins_w_si2))); ++ ++ // bstrpick ++ __ or_(t0, zero_reg, zero_reg); ++ __ or_(t1, zero_reg, zero_reg); ++ __ bstrpick_w(t0, a4, 0xD, 0x4); ++ __ bstrpick_w(t1, a5, 0x16, 0x5); ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_bstrpick_w_si1))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_bstrpick_w_si2))); ++ ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ t.si1 = 0x51F4B764A26E7412; ++ t.si2 = 0x81F25A87C423B891; ++ f.Call(&t, 0, 0, 0, 0); ++ ++ CHECK_EQ(static_cast(0x12), t.result_ext_w_b_si1); ++ CHECK_EQ(static_cast(0xffffff91), t.result_ext_w_b_si2); ++ CHECK_EQ(static_cast(0x7412), t.result_ext_w_h_si1); ++ CHECK_EQ(static_cast(0xffffb891), t.result_ext_w_h_si2); ++ // CHECK_EQ(static_cast(0x1), t.result_clo_w_si1); ++ // CHECK_EQ(static_cast(0x2), t.result_clo_w_si2); ++ CHECK_EQ(static_cast(0x0), t.result_clz_w_si1); ++ CHECK_EQ(static_cast(0x0), t.result_clz_w_si2); ++ // CHECK_EQ(static_cast(0x0), t.result_cto_w_si1); ++ // CHECK_EQ(static_cast(0x1), t.result_cto_w_si2); ++ CHECK_EQ(static_cast(0x1), t.result_ctz_w_si1); ++ CHECK_EQ(static_cast(0x0), t.result_ctz_w_si2); ++ CHECK_EQ(static_cast(0xc423b891), t.result_bytepick_w_si1); ++ CHECK_EQ(static_cast(0x7412c423), ++ t.result_bytepick_w_si2); // 0xffffc423 ++ CHECK_EQ(static_cast(0x6ea21274), t.result_revb_2h_si1); ++ CHECK_EQ(static_cast(0x23c491b8), t.result_revb_2h_si2); ++ CHECK_EQ(static_cast(0x45762e48), t.result_bitrev_4b_si1); ++ CHECK_EQ(static_cast(0x23c41d89), t.result_bitrev_4b_si2); ++ CHECK_EQ(static_cast(0x482e7645), t.result_bitrev_w_si1); ++ CHECK_EQ(static_cast(0x891dc423), t.result_bitrev_w_si2); ++ CHECK_EQ(static_cast(0x120), t.result_bstrins_w_si1); ++ CHECK_EQ(static_cast(0x771220), t.result_bstrins_w_si2); ++ CHECK_EQ(static_cast(0x341), t.result_bstrpick_w_si1); ++ CHECK_EQ(static_cast(0x11dc4), t.result_bstrpick_w_si2); ++} ++ ++TEST(LA11) { ++ // Test 64bit bit operation instructions. ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct T { ++ int64_t si1; ++ int64_t si2; ++ int64_t result_clo_d_si1; ++ int64_t result_clo_d_si2; ++ int64_t result_clz_d_si1; ++ int64_t result_clz_d_si2; ++ int64_t result_cto_d_si1; ++ int64_t result_cto_d_si2; ++ int64_t result_ctz_d_si1; ++ int64_t result_ctz_d_si2; ++ int64_t result_bytepick_d_si1; ++ int64_t result_bytepick_d_si2; ++ int64_t result_revb_4h_si1; ++ int64_t result_revb_4h_si2; ++ int64_t result_revb_2w_si1; ++ int64_t result_revb_2w_si2; ++ int64_t result_revb_d_si1; ++ int64_t result_revb_d_si2; ++ int64_t result_revh_2w_si1; ++ int64_t result_revh_2w_si2; ++ int64_t result_revh_d_si1; ++ int64_t result_revh_d_si2; ++ int64_t result_bitrev_8b_si1; ++ int64_t result_bitrev_8b_si2; ++ int64_t result_bitrev_d_si1; ++ int64_t result_bitrev_d_si2; ++ int64_t result_bstrins_d_si1; ++ int64_t result_bstrins_d_si2; ++ int64_t result_bstrpick_d_si1; ++ int64_t result_bstrpick_d_si2; ++ int64_t result_maskeqz_si1; ++ int64_t result_maskeqz_si2; ++ int64_t result_masknez_si1; ++ int64_t result_masknez_si2; ++ }; ++ ++ T t; ++ ++ __ Ld_d(a4, MemOperand(a0, offsetof(T, si1))); ++ __ Ld_d(a5, MemOperand(a0, offsetof(T, si2))); ++ ++ /* //clo_d ++ __ clo_d(t0, a4); ++ __ clo_d(t1, a5); ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_clo_d_si1))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_clo_d_si2)));*/ ++ ++ // clz_d ++ __ or_(t0, zero_reg, zero_reg); ++ __ clz_d(t0, a4); ++ __ clz_d(t1, a5); ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_clz_d_si1))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_clz_d_si2))); ++ ++ /* //cto_d ++ __ cto_d(t0, a4); ++ __ cto_d(t1, a5); ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_cto_d_si1))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_cto_d_si2)));*/ ++ ++ // ctz_d ++ __ ctz_d(t0, a4); ++ __ ctz_d(t1, a5); ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_ctz_d_si1))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_ctz_d_si2))); ++ ++ // bytepick_d ++ __ bytepick_d(t0, a4, a5, 0); ++ __ bytepick_d(t1, a5, a4, 5); ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_bytepick_d_si1))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_bytepick_d_si2))); ++ ++ // revb_4h ++ __ revb_4h(t0, a4); ++ __ revb_4h(t1, a5); ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_revb_4h_si1))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_revb_4h_si2))); ++ ++ // revb_2w ++ __ revb_2w(t0, a4); ++ __ revb_2w(t1, a5); ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_revb_2w_si1))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_revb_2w_si2))); ++ ++ // revb_d ++ __ revb_d(t0, a4); ++ __ revb_d(t1, a5); ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_revb_d_si1))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_revb_d_si2))); ++ ++ // revh_2w ++ __ revh_2w(t0, a4); ++ __ revh_2w(t1, a5); ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_revh_2w_si1))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_revh_2w_si2))); ++ ++ // revh_d ++ __ revh_d(t0, a4); ++ __ revh_d(t1, a5); ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_revh_d_si1))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_revh_d_si2))); ++ ++ // bitrev_8b ++ __ bitrev_8b(t0, a4); ++ __ bitrev_8b(t1, a5); ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_bitrev_8b_si1))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_bitrev_8b_si2))); ++ ++ // bitrev_d ++ __ bitrev_d(t0, a4); ++ __ bitrev_d(t1, a5); ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_bitrev_d_si1))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_bitrev_d_si2))); ++ ++ // bstrins_d ++ __ or_(t0, zero_reg, zero_reg); ++ __ or_(t1, zero_reg, zero_reg); ++ __ bstrins_d(t0, a4, 5, 0); ++ __ bstrins_d(t1, a5, 39, 12); ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_bstrins_d_si1))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_bstrins_d_si2))); ++ ++ // bstrpick_d ++ __ or_(t0, zero_reg, zero_reg); ++ __ or_(t1, zero_reg, zero_reg); ++ __ bstrpick_d(t0, a4, 5, 0); ++ __ bstrpick_d(t1, a5, 63, 48); ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_bstrpick_d_si1))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_bstrpick_d_si2))); ++ ++ // maskeqz ++ __ maskeqz(t0, a4, a4); ++ __ maskeqz(t1, a5, zero_reg); ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_maskeqz_si1))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_maskeqz_si2))); ++ ++ // masknez ++ __ masknez(t0, a4, a4); ++ __ masknez(t1, a5, zero_reg); ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_masknez_si1))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_masknez_si2))); ++ ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ t.si1 = 0x10C021098B710CDE; ++ t.si2 = 0xFB8017FF781A15C3; ++ f.Call(&t, 0, 0, 0, 0); ++ ++ // CHECK_EQ(static_cast(0x0), t.result_clo_d_si1); ++ // CHECK_EQ(static_cast(0x5), t.result_clo_d_si2); ++ CHECK_EQ(static_cast(0x3), t.result_clz_d_si1); ++ CHECK_EQ(static_cast(0x0), t.result_clz_d_si2); ++ // CHECK_EQ(static_cast(0x0), t.result_cto_d_si1); ++ // CHECK_EQ(static_cast(0x2), t.result_cto_d_si2); ++ CHECK_EQ(static_cast(0x1), t.result_ctz_d_si1); ++ CHECK_EQ(static_cast(0x0), t.result_ctz_d_si2); ++ CHECK_EQ(static_cast(0xfb8017ff781a15c3), t.result_bytepick_d_si1); ++ CHECK_EQ(static_cast(0x710cde0000000000), t.result_bytepick_d_si2); ++ CHECK_EQ(static_cast(0xc0100921718bde0c), t.result_revb_4h_si1); ++ CHECK_EQ(static_cast(0x80fbff171a78c315), t.result_revb_4h_si2); ++ CHECK_EQ(static_cast(0x921c010de0c718b), t.result_revb_2w_si1); ++ CHECK_EQ(static_cast(0xff1780fbc3151a78), t.result_revb_2w_si2); ++ CHECK_EQ(static_cast(0xde0c718b0921c010), t.result_revb_d_si1); ++ CHECK_EQ(static_cast(0xc3151a78ff1780fb), t.result_revb_d_si2); ++ CHECK_EQ(static_cast(0x210910c00cde8b71), t.result_revh_2w_si1); ++ CHECK_EQ(static_cast(0x17fffb8015c3781a), t.result_revh_2w_si2); ++ CHECK_EQ(static_cast(0xcde8b71210910c0), t.result_revh_d_si1); ++ CHECK_EQ(static_cast(0x15c3781a17fffb80), t.result_revh_d_si2); ++ CHECK_EQ(static_cast(0x8038490d18e307b), t.result_bitrev_8b_si1); ++ CHECK_EQ(static_cast(0xdf01e8ff1e58a8c3), t.result_bitrev_8b_si2); ++ CHECK_EQ(static_cast(0x7b308ed190840308), t.result_bitrev_d_si1); ++ CHECK_EQ(static_cast(0xc3a8581effe801df), t.result_bitrev_d_si2); ++ CHECK_EQ(static_cast(0x1e), t.result_bstrins_d_si1); ++ CHECK_EQ(static_cast(0x81a15c3000), t.result_bstrins_d_si2); ++ CHECK_EQ(static_cast(0x1e), t.result_bstrpick_d_si1); ++ CHECK_EQ(static_cast(0xfb80), t.result_bstrpick_d_si2); ++ CHECK_EQ(static_cast(0), t.result_maskeqz_si1); ++ CHECK_EQ(static_cast(0xFB8017FF781A15C3), t.result_maskeqz_si2); ++ CHECK_EQ(static_cast(0x10C021098B710CDE), t.result_masknez_si1); ++ CHECK_EQ(static_cast(0), t.result_masknez_si2); ++} ++ ++uint64_t run_beq(int64_t value1, int64_t value2, int16_t offset) { ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ Label main_block, L; ++ __ li(a2, 0l); ++ __ b(&main_block); ++ // Block 1 ++ __ addi_d(a2, a2, 0x1); ++ __ addi_d(a2, a2, 0x2); ++ __ b(&L); ++ ++ // Block 2 ++ __ addi_d(a2, a2, 0x10); ++ __ addi_d(a2, a2, 0x20); ++ __ b(&L); ++ ++ // Block 3 (Main) ++ __ bind(&main_block); ++ __ beq(a0, a1, offset); ++ __ bind(&L); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ // Block 4 ++ __ addi_d(a2, a2, 0x100); ++ __ addi_d(a2, a2, 0x200); ++ __ b(&L); ++ ++ // Block 5 ++ __ addi_d(a2, a2, 0x300); ++ __ addi_d(a2, a2, 0x400); ++ __ b(&L); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ ++ auto f = GeneratedCode::FromCode(*code); ++ uint64_t res = reinterpret_cast(f.Call(value1, value2, 0, 0, 0)); ++ ++ return res; ++} ++ ++TEST(BEQ) { ++ CcTest::InitializeVM(); ++ struct TestCaseBeq { ++ int64_t value1; ++ int64_t value2; ++ int16_t offset; ++ uint64_t expected_res; ++ }; ++ ++ // clang-format off ++ struct TestCaseBeq tc[] = { ++ // value1, value2, offset, expected_res ++ { 0, 0, -6, 0x3 }, ++ { 1, 1, -3, 0x30 }, ++ { -2, -2, 3, 0x300 }, ++ { 3, -3, 6, 0 }, ++ { 4, 4, 6, 0x700 }, ++ }; ++ // clang-format on ++ ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBeq); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ uint64_t res = run_beq(tc[i].value1, tc[i].value2, tc[i].offset); ++ CHECK_EQ(tc[i].expected_res, res); ++ } ++} ++ ++uint64_t run_bne(int64_t value1, int64_t value2, int16_t offset) { ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ Label main_block, L; ++ __ li(a2, 0l); ++ __ b(&main_block); ++ // Block 1 ++ __ addi_d(a2, a2, 0x1); ++ __ addi_d(a2, a2, 0x2); ++ __ b(&L); ++ ++ // Block 2 ++ __ addi_d(a2, a2, 0x10); ++ __ addi_d(a2, a2, 0x20); ++ __ b(&L); ++ ++ // Block 3 (Main) ++ __ bind(&main_block); ++ __ bne(a0, a1, offset); ++ __ bind(&L); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ // Block 4 ++ __ addi_d(a2, a2, 0x100); ++ __ addi_d(a2, a2, 0x200); ++ __ b(&L); ++ ++ // Block 5 ++ __ addi_d(a2, a2, 0x300); ++ __ addi_d(a2, a2, 0x400); ++ __ b(&L); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ ++ auto f = GeneratedCode::FromCode(*code); ++ uint64_t res = reinterpret_cast(f.Call(value1, value2, 0, 0, 0)); ++ ++ return res; ++} ++ ++TEST(BNE) { ++ CcTest::InitializeVM(); ++ struct TestCaseBne { ++ int64_t value1; ++ int64_t value2; ++ int16_t offset; ++ uint64_t expected_res; ++ }; ++ ++ // clang-format off ++ struct TestCaseBne tc[] = { ++ // value1, value2, offset, expected_res ++ { 1, -1, -6, 0x3 }, ++ { 2, -2, -3, 0x30 }, ++ { 3, -3, 3, 0x300 }, ++ { 4, -4, 6, 0x700 }, ++ { 0, 0, 6, 0 }, ++ }; ++ // clang-format on ++ ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBne); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ uint64_t res = run_bne(tc[i].value1, tc[i].value2, tc[i].offset); ++ CHECK_EQ(tc[i].expected_res, res); ++ } ++} ++ ++uint64_t run_blt(int64_t value1, int64_t value2, int16_t offset) { ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ Label main_block, L; ++ __ li(a2, 0l); ++ __ b(&main_block); ++ // Block 1 ++ __ addi_d(a2, a2, 0x1); ++ __ addi_d(a2, a2, 0x2); ++ __ b(&L); ++ ++ // Block 2 ++ __ addi_d(a2, a2, 0x10); ++ __ addi_d(a2, a2, 0x20); ++ __ b(&L); ++ ++ // Block 3 (Main) ++ __ bind(&main_block); ++ __ blt(a0, a1, offset); ++ __ bind(&L); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ // Block 4 ++ __ addi_d(a2, a2, 0x100); ++ __ addi_d(a2, a2, 0x200); ++ __ b(&L); ++ ++ // Block 5 ++ __ addi_d(a2, a2, 0x300); ++ __ addi_d(a2, a2, 0x400); ++ __ b(&L); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ ++ auto f = GeneratedCode::FromCode(*code); ++ uint64_t res = reinterpret_cast(f.Call(value1, value2, 0, 0, 0)); ++ ++ return res; ++} ++ ++TEST(BLT) { ++ CcTest::InitializeVM(); ++ struct TestCaseBlt { ++ int64_t value1; ++ int64_t value2; ++ int16_t offset; ++ uint64_t expected_res; ++ }; ++ ++ // clang-format off ++ struct TestCaseBlt tc[] = { ++ // value1, value2, offset, expected_res ++ { -1, 1, -6, 0x3 }, ++ { -2, 2, -3, 0x30 }, ++ { -3, 3, 3, 0x300 }, ++ { -4, 4, 6, 0x700 }, ++ { 5, -5, 6, 0 }, ++ { 0, 0, 6, 0 }, ++ }; ++ // clang-format on ++ ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBlt); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ uint64_t res = run_blt(tc[i].value1, tc[i].value2, tc[i].offset); ++ CHECK_EQ(tc[i].expected_res, res); ++ } ++} ++ ++uint64_t run_bge(uint64_t value1, uint64_t value2, int16_t offset) { ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ Label main_block, L; ++ __ li(a2, 0l); ++ __ b(&main_block); ++ // Block 1 ++ __ addi_d(a2, a2, 0x1); ++ __ addi_d(a2, a2, 0x2); ++ __ b(&L); ++ ++ // Block 2 ++ __ addi_d(a2, a2, 0x10); ++ __ addi_d(a2, a2, 0x20); ++ __ b(&L); ++ ++ // Block 3 (Main) ++ __ bind(&main_block); ++ __ bge(a0, a1, offset); ++ __ bind(&L); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ // Block 4 ++ __ addi_d(a2, a2, 0x100); ++ __ addi_d(a2, a2, 0x200); ++ __ b(&L); ++ ++ // Block 5 ++ __ addi_d(a2, a2, 0x300); ++ __ addi_d(a2, a2, 0x400); ++ __ b(&L); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ ++ auto f = GeneratedCode::FromCode(*code); ++ uint64_t res = reinterpret_cast(f.Call(value1, value2, 0, 0, 0)); ++ ++ return res; ++} ++ ++TEST(BGE) { ++ CcTest::InitializeVM(); ++ struct TestCaseBge { ++ int64_t value1; ++ int64_t value2; ++ int16_t offset; ++ uint64_t expected_res; ++ }; ++ ++ // clang-format off ++ struct TestCaseBge tc[] = { ++ // value1, value2, offset, expected_res ++ { 0, 0, -6, 0x3 }, ++ { 1, 1, -3, 0x30 }, ++ { 2, -2, 3, 0x300 }, ++ { 3, -3, 6, 0x700 }, ++ { -4, 4, 6, 0 }, ++ }; ++ // clang-format on ++ ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBge); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ uint64_t res = run_bge(tc[i].value1, tc[i].value2, tc[i].offset); ++ CHECK_EQ(tc[i].expected_res, res); ++ } ++} ++ ++uint64_t run_bltu(int64_t value1, int64_t value2, int16_t offset) { ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ Label main_block, L; ++ __ li(a2, 0l); ++ __ b(&main_block); ++ // Block 1 ++ __ addi_d(a2, a2, 0x1); ++ __ addi_d(a2, a2, 0x2); ++ __ b(&L); ++ ++ // Block 2 ++ __ addi_d(a2, a2, 0x10); ++ __ addi_d(a2, a2, 0x20); ++ __ b(&L); ++ ++ // Block 3 (Main) ++ __ bind(&main_block); ++ __ bltu(a0, a1, offset); ++ __ bind(&L); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ // Block 4 ++ __ addi_d(a2, a2, 0x100); ++ __ addi_d(a2, a2, 0x200); ++ __ b(&L); ++ ++ // Block 5 ++ __ addi_d(a2, a2, 0x300); ++ __ addi_d(a2, a2, 0x400); ++ __ b(&L); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ ++ auto f = GeneratedCode::FromCode(*code); ++ uint64_t res = reinterpret_cast(f.Call(value1, value2, 0, 0, 0)); ++ ++ return res; ++} ++ ++TEST(BLTU) { ++ CcTest::InitializeVM(); ++ struct TestCaseBltu { ++ int64_t value1; ++ int64_t value2; ++ int16_t offset; ++ uint64_t expected_res; ++ }; ++ ++ // clang-format off ++ struct TestCaseBltu tc[] = { ++ // value1, value2, offset, expected_res ++ { 0, 1, -6, 0x3 }, ++ { 1, -1, -3, 0x30 }, ++ { 2, -2, 3, 0x300 }, ++ { 3, -3, 6, 0x700 }, ++ { 4, 4, 6, 0 }, ++ }; ++ // clang-format on ++ ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBltu); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ uint64_t res = run_bltu(tc[i].value1, tc[i].value2, tc[i].offset); ++ CHECK_EQ(tc[i].expected_res, res); ++ } ++} ++ ++uint64_t run_bgeu(int64_t value1, int64_t value2, int16_t offset) { ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ Label main_block, L; ++ __ li(a2, 0l); ++ __ b(&main_block); ++ // Block 1 ++ __ addi_d(a2, a2, 0x1); ++ __ addi_d(a2, a2, 0x2); ++ __ b(&L); ++ ++ // Block 2 ++ __ addi_d(a2, a2, 0x10); ++ __ addi_d(a2, a2, 0x20); ++ __ b(&L); ++ ++ // Block 3 (Main) ++ __ bind(&main_block); ++ __ bgeu(a0, a1, offset); ++ __ bind(&L); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ // Block 4 ++ __ addi_d(a2, a2, 0x100); ++ __ addi_d(a2, a2, 0x200); ++ __ b(&L); ++ ++ // Block 5 ++ __ addi_d(a2, a2, 0x300); ++ __ addi_d(a2, a2, 0x400); ++ __ b(&L); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ ++ auto f = GeneratedCode::FromCode(*code); ++ uint64_t res = reinterpret_cast(f.Call(value1, value2, 0, 0, 0)); ++ ++ return res; ++} ++ ++TEST(BGEU) { ++ CcTest::InitializeVM(); ++ struct TestCaseBgeu { ++ int64_t value1; ++ int64_t value2; ++ int16_t offset; ++ uint64_t expected_res; ++ }; ++ ++ // clang-format off ++ struct TestCaseBgeu tc[] = { ++ // value1, value2, offset, expected_res ++ { 0, 0, -6, 0x3 }, ++ { -1, 1, -3, 0x30 }, ++ { -2, 2, 3, 0x300 }, ++ { -3, 3, 6, 0x700 }, ++ { 4, -4, 6, 0 }, ++ }; ++ // clang-format on ++ ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBgeu); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ uint64_t res = run_bgeu(tc[i].value1, tc[i].value2, tc[i].offset); ++ CHECK_EQ(tc[i].expected_res, res); ++ } ++} ++ ++uint64_t run_beqz(int64_t value, int32_t offset) { ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ Label main_block, L; ++ __ li(a2, 0l); ++ __ b(&main_block); ++ // Block 1 ++ __ addi_d(a2, a2, 0x1); ++ __ addi_d(a2, a2, 0x2); ++ __ b(&L); ++ ++ // Block 2 ++ __ addi_d(a2, a2, 0x10); ++ __ addi_d(a2, a2, 0x20); ++ __ b(&L); ++ ++ // Block 3 (Main) ++ __ bind(&main_block); ++ __ beqz(a0, offset); ++ __ bind(&L); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ // Block 4 ++ __ addi_d(a2, a2, 0x100); ++ __ addi_d(a2, a2, 0x200); ++ __ b(&L); ++ ++ // Block 5 ++ __ addi_d(a2, a2, 0x300); ++ __ addi_d(a2, a2, 0x400); ++ __ b(&L); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ ++ auto f = GeneratedCode::FromCode(*code); ++ uint64_t res = reinterpret_cast(f.Call(value, 0, 0, 0, 0)); ++ ++ return res; ++} ++ ++TEST(BEQZ) { ++ CcTest::InitializeVM(); ++ struct TestCaseBeqz { ++ int64_t value; ++ int32_t offset; ++ uint64_t expected_res; ++ }; ++ ++ // clang-format off ++ struct TestCaseBeqz tc[] = { ++ // value, offset, expected_res ++ { 0, -6, 0x3 }, ++ { 0, -3, 0x30 }, ++ { 0, 3, 0x300 }, ++ { 0, 6, 0x700 }, ++ { 1, 6, 0 }, ++ }; ++ // clang-format on ++ ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBeqz); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ uint64_t res = run_beqz(tc[i].value, tc[i].offset); ++ CHECK_EQ(tc[i].expected_res, res); ++ } ++} ++ ++uint64_t run_bnez_b(int64_t value, int32_t offset) { ++ // bnez, b. ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ Label main_block, L; ++ __ li(a2, 0l); ++ __ b(&main_block); ++ // Block 1 ++ __ addi_d(a2, a2, 0x1); ++ __ addi_d(a2, a2, 0x2); ++ __ b(5); ++ ++ // Block 2 ++ __ addi_d(a2, a2, 0x10); ++ __ addi_d(a2, a2, 0x20); ++ __ b(2); ++ ++ // Block 3 (Main) ++ __ bind(&main_block); ++ __ bnez(a0, offset); ++ __ bind(&L); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ // Block 4 ++ __ addi_d(a2, a2, 0x100); ++ __ addi_d(a2, a2, 0x200); ++ __ b(-4); ++ ++ // Block 5 ++ __ addi_d(a2, a2, 0x300); ++ __ addi_d(a2, a2, 0x400); ++ __ b(-7); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ ++ auto f = GeneratedCode::FromCode(*code); ++ uint64_t res = reinterpret_cast(f.Call(value, 0, 0, 0, 0)); ++ ++ return res; ++} ++ ++TEST(BNEZ_B) { ++ CcTest::InitializeVM(); ++ struct TestCaseBnez { ++ int64_t value; ++ int32_t offset; ++ uint64_t expected_res; ++ }; ++ ++ // clang-format off ++ struct TestCaseBnez tc[] = { ++ // value, offset, expected_res ++ { 1, -6, 0x3 }, ++ { -2, -3, 0x30 }, ++ { 3, 3, 0x300 }, ++ { -4, 6, 0x700 }, ++ { 0, 6, 0 }, ++ }; ++ // clang-format on ++ ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBnez); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ uint64_t res = run_bnez_b(tc[i].value, tc[i].offset); ++ CHECK_EQ(tc[i].expected_res, res); ++ } ++} ++ ++uint64_t run_bl(int32_t offset) { ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ Label main_block; ++ __ li(a2, 0l); ++ __ push(ra); // push is implemented by two instructions, addi_d and st_d ++ __ b(&main_block); ++ ++ // Block 1 ++ __ addi_d(a2, a2, 0x1); ++ __ addi_d(a2, a2, 0x2); ++ __ jirl(zero_reg, ra, 0); ++ ++ // Block 2 ++ __ addi_d(a2, a2, 0x10); ++ __ addi_d(a2, a2, 0x20); ++ __ jirl(zero_reg, ra, 0); ++ ++ // Block 3 (Main) ++ __ bind(&main_block); ++ __ bl(offset); ++ __ or_(a0, a2, zero_reg); ++ __ pop(ra); // pop is implemented by two instructions, ld_d and addi_d. ++ __ jirl(zero_reg, ra, 0); ++ ++ // Block 4 ++ __ addi_d(a2, a2, 0x100); ++ __ addi_d(a2, a2, 0x200); ++ __ jirl(zero_reg, ra, 0); ++ ++ // Block 5 ++ __ addi_d(a2, a2, 0x300); ++ __ addi_d(a2, a2, 0x400); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ ++ auto f = GeneratedCode::FromCode(*code); ++ uint64_t res = reinterpret_cast(f.Call(0, 0, 0, 0, 0)); ++ ++ return res; ++} ++ ++TEST(BL) { ++ CcTest::InitializeVM(); ++ struct TestCaseBl { ++ int32_t offset; ++ uint64_t expected_res; ++ }; ++ ++ // clang-format off ++ struct TestCaseBl tc[] = { ++ // offset, expected_res ++ { -6, 0x3 }, ++ { -3, 0x30 }, ++ { 5, 0x300 }, ++ { 8, 0x700 }, ++ }; ++ // clang-format on ++ ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBl); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ uint64_t res = run_bl(tc[i].offset); ++ CHECK_EQ(tc[i].expected_res, res); ++ } ++} ++ ++TEST(PCADD) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ Label exit, error; ++ __ push(ra); ++ ++ // pcaddi ++ __ li(a4, 0x1FFFFC); ++ __ li(a5, 0); ++ __ li(a6, static_cast(0xFFE00000)); ++ ++ __ bl(1); ++ __ pcaddi(a3, 0x7FFFF); ++ __ add_d(a2, ra, a4); ++ __ Branch(&error, ne, a2, Operand(a3)); ++ ++ __ bl(1); ++ __ pcaddi(a3, 0); ++ __ add_d(a2, ra, a5); ++ __ Branch(&error, ne, a2, Operand(a3)); ++ ++ __ bl(1); ++ __ pcaddi(a3, 0x80000); ++ __ add_d(a2, ra, a6); ++ __ Branch(&error, ne, a2, Operand(a3)); ++ ++ // pcaddu12i ++ __ li(a4, 0x7FFFF000); ++ __ li(a5, 0); ++ __ li(a6, static_cast(0x80000000)); ++ ++ __ bl(1); ++ __ pcaddu12i(a2, 0x7FFFF); ++ __ add_d(a3, ra, a4); ++ __ Branch(&error, ne, a2, Operand(a3)); ++ __ bl(1); ++ __ pcaddu12i(a2, 0); ++ __ add_d(a3, ra, a5); ++ __ Branch(&error, ne, a2, Operand(a3)); ++ __ bl(1); ++ __ pcaddu12i(a2, 0x80000); ++ __ add_d(a3, ra, a6); ++ __ Branch(&error, ne, a2, Operand(a3)); ++ ++ // pcaddu18i ++ __ li(a4, 0x1FFFFC0000); ++ __ li(a5, 0); ++ __ li(a6, static_cast(0xFFFFFFE000000000)); ++ ++ __ bl(1); ++ __ pcaddu18i(a2, 0x7FFFF); ++ __ add_d(a3, ra, a4); ++ __ Branch(&error, ne, a2, Operand(a3)); ++ ++ __ bl(1); ++ __ pcaddu18i(a2, 0); ++ __ add_d(a3, ra, a5); ++ __ Branch(&error, ne, a2, Operand(a3)); ++ ++ __ bl(1); ++ __ pcaddu18i(a2, 0x80000); ++ __ add_d(a3, ra, a6); ++ __ Branch(&error, ne, a2, Operand(a3)); ++ ++ // pcalau12i ++ __ li(a4, 0x7FFFF000); ++ __ li(a5, 0); ++ __ li(a6, static_cast(0x80000000)); ++ __ li(a7, static_cast(0xFFFFFFFFFFFFF000)); ++ ++ __ bl(1); ++ __ pcalau12i(a3, 0x7FFFF); ++ __ add_d(a2, ra, a4); ++ __ and_(t0, a2, a7); ++ __ and_(t1, a3, a7); ++ __ Branch(&error, ne, t0, Operand(t1)); ++ ++ __ bl(1); ++ __ pcalau12i(a3, 0); ++ __ add_d(a2, ra, a5); ++ __ and_(t0, a2, a7); ++ __ and_(t1, a3, a7); ++ __ Branch(&error, ne, t0, Operand(t1)); ++ ++ __ bl(1); ++ __ pcalau12i(a2, 0x80000); ++ __ add_d(a3, ra, a6); ++ __ and_(t0, a2, a7); ++ __ and_(t1, a3, a7); ++ __ Branch(&error, ne, t0, Operand(t1)); ++ ++ __ li(a0, 0x31415926); ++ __ b(&exit); ++ ++ __ bind(&error); ++ __ li(a0, 0x666); ++ ++ __ bind(&exit); ++ __ pop(ra); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ int64_t res = reinterpret_cast(f.Call(0, 0, 0, 0, 0)); ++ ++ CHECK_EQ(0x31415926L, res); ++} ++ ++uint64_t run_jirl(int16_t offset) { ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ Label main_block; ++ __ li(a2, 0l); ++ __ push(ra); ++ __ b(&main_block); ++ ++ // Block 1 ++ __ addi_d(a2, a2, 0x1); ++ __ addi_d(a2, a2, 0x2); ++ __ jirl(zero_reg, ra, 0); ++ ++ // Block 2 ++ __ addi_d(a2, a2, 0x10); ++ __ addi_d(a2, a2, 0x20); ++ __ jirl(zero_reg, ra, 0); ++ ++ // Block 3 (Main) ++ __ bind(&main_block); ++ __ pcaddi(a3, 1); ++ __ jirl(ra, a3, offset); ++ __ or_(a0, a2, zero_reg); ++ __ pop(ra); // pop is implemented by two instructions, ld_d and addi_d. ++ __ jirl(zero_reg, ra, 0); ++ ++ // Block 4 ++ __ addi_d(a2, a2, 0x100); ++ __ addi_d(a2, a2, 0x200); ++ __ jirl(zero_reg, ra, 0); ++ ++ // Block 5 ++ __ addi_d(a2, a2, 0x300); ++ __ addi_d(a2, a2, 0x400); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ ++ auto f = GeneratedCode::FromCode(*code); ++ uint64_t res = reinterpret_cast(f.Call(0, 0, 0, 0, 0)); ++ ++ return res; ++} ++ ++TEST(JIRL) { ++ CcTest::InitializeVM(); ++ struct TestCaseJirl { ++ int16_t offset; ++ uint64_t expected_res; ++ }; ++ ++ // clang-format off ++ struct TestCaseJirl tc[] = { ++ // offset, expected_res ++ { -7, 0x3 }, ++ { -4, 0x30 }, ++ { 5, 0x300 }, ++ { 8, 0x700 }, ++ }; ++ // clang-format on ++ ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseJirl); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ uint64_t res = run_jirl(tc[i].offset); ++ CHECK_EQ(tc[i].expected_res, res); ++ } ++} ++ ++TEST(LA12) { ++ // Test floating point calculate instructions. ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ struct T { ++ double a; ++ double b; ++ double c; ++ double d; ++ double e; ++ double f; ++ double result_fadd_d; ++ double result_fsub_d; ++ double result_fmul_d; ++ double result_fdiv_d; ++ double result_fmadd_d; ++ double result_fmsub_d; ++ double result_fnmadd_d; ++ double result_fnmsub_d; ++ double result_fsqrt_d; ++ double result_frecip_d; ++ double result_frsqrt_d; ++ double result_fscaleb_d; ++ double result_flogb_d; ++ double result_fcopysign_d; ++ double result_fclass_d; ++ }; ++ T t; ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ // Double precision floating point instructions. ++ __ Fld_d(f8, MemOperand(a0, offsetof(T, a))); ++ __ Fld_d(f9, MemOperand(a0, offsetof(T, b))); ++ ++ __ fneg_d(f10, f8); ++ __ fadd_d(f11, f9, f10); ++ __ Fst_d(f11, MemOperand(a0, offsetof(T, result_fadd_d))); ++ __ fabs_d(f11, f11); ++ __ fsub_d(f12, f11, f9); ++ __ Fst_d(f12, MemOperand(a0, offsetof(T, result_fsub_d))); ++ ++ __ Fld_d(f13, MemOperand(a0, offsetof(T, c))); ++ __ Fld_d(f14, MemOperand(a0, offsetof(T, d))); ++ __ Fld_d(f15, MemOperand(a0, offsetof(T, e))); ++ ++ __ fmin_d(f16, f13, f14); ++ __ fmul_d(f17, f15, f16); ++ __ Fst_d(f17, MemOperand(a0, offsetof(T, result_fmul_d))); ++ __ fmax_d(f18, f13, f14); ++ __ fdiv_d(f19, f15, f18); ++ __ Fst_d(f19, MemOperand(a0, offsetof(T, result_fdiv_d))); ++ ++ __ fmina_d(f16, f13, f14); ++ __ fmadd_d(f18, f17, f15, f16); ++ __ Fst_d(f18, MemOperand(a0, offsetof(T, result_fmadd_d))); ++ __ fnmadd_d(f19, f17, f15, f16); ++ __ Fst_d(f19, MemOperand(a0, offsetof(T, result_fnmadd_d))); ++ __ fmaxa_d(f16, f13, f14); ++ __ fmsub_d(f20, f17, f15, f16); ++ __ Fst_d(f20, MemOperand(a0, offsetof(T, result_fmsub_d))); ++ __ fnmsub_d(f21, f17, f15, f16); ++ __ Fst_d(f21, MemOperand(a0, offsetof(T, result_fnmsub_d))); ++ ++ __ Fld_d(f8, MemOperand(a0, offsetof(T, f))); ++ __ fsqrt_d(f10, f8); ++ __ Fst_d(f10, MemOperand(a0, offsetof(T, result_fsqrt_d))); ++ //__ frecip_d(f11, f10); ++ //__ frsqrt_d(f12, f8); ++ //__ Fst_d(f11, MemOperand(a0, offsetof(T, result_frecip_d))); ++ //__ Fst_d(f12, MemOperand(a0, offsetof(T, result_frsqrt_d))); ++ ++ /*__ fscaleb_d(f16, f13, f15); ++ __ flogb_d(f17, f15); ++ __ fcopysign_d(f18, f8, f9); ++ __ fclass_d(f19, f9); ++ __ Fst_d(f16, MemOperand(a0, offsetof(T, result_fscaleb_d))); ++ __ Fst_d(f17, MemOperand(a0, offsetof(T, result_flogb_d))); ++ __ Fst_d(f18, MemOperand(a0, offsetof(T, result_fcopysign_d))); ++ __ Fst_d(f19, MemOperand(a0, offsetof(T, result_fclass_d)));*/ ++ ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ // Double test values. ++ t.a = 1.5e14; ++ t.b = -2.75e11; ++ t.c = 1.5; ++ t.d = -2.75; ++ t.e = 120.0; ++ t.f = 120.44; ++ f.Call(&t, 0, 0, 0, 0); ++ ++ CHECK_EQ(static_cast(-1.502750e14), t.result_fadd_d); ++ CHECK_EQ(static_cast(1.505500e14), t.result_fsub_d); ++ CHECK_EQ(static_cast(-3.300000e02), t.result_fmul_d); ++ CHECK_EQ(static_cast(8.000000e01), t.result_fdiv_d); ++ CHECK_EQ(static_cast(-3.959850e04), t.result_fmadd_d); ++ CHECK_EQ(static_cast(-3.959725e04), t.result_fmsub_d); ++ CHECK_EQ(static_cast(3.959850e04), t.result_fnmadd_d); ++ CHECK_EQ(static_cast(3.959725e04), t.result_fnmsub_d); ++ CHECK_EQ(static_cast(10.97451593465515908537), t.result_fsqrt_d); ++ // CHECK_EQ(static_cast( 8.164965e-08), t.result_frecip_d); ++ // CHECK_EQ(static_cast( 8.164966e-08), t.result_frsqrt_d); ++ // CHECK_EQ(static_cast(), t.result_fscaleb_d); ++ // CHECK_EQ(static_cast( 6.906891), t.result_flogb_d); ++ // CHECK_EQ(static_cast( 2.75e11), t.result_fcopysign_d); ++ // CHECK_EQ(static_cast(), t.result_fclass_d); ++} ++ ++TEST(LA13) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ struct T { ++ float a; ++ float b; ++ float c; ++ float d; ++ float e; ++ float result_fadd_s; ++ float result_fsub_s; ++ float result_fmul_s; ++ float result_fdiv_s; ++ float result_fmadd_s; ++ float result_fmsub_s; ++ float result_fnmadd_s; ++ float result_fnmsub_s; ++ float result_fsqrt_s; ++ float result_frecip_s; ++ float result_frsqrt_s; ++ float result_fscaleb_s; ++ float result_flogb_s; ++ float result_fcopysign_s; ++ float result_fclass_s; ++ }; ++ T t; ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ // Float precision floating point instructions. ++ __ Fld_s(f8, MemOperand(a0, offsetof(T, a))); ++ __ Fld_s(f9, MemOperand(a0, offsetof(T, b))); ++ ++ __ fneg_s(f10, f8); ++ __ fadd_s(f11, f9, f10); ++ __ Fst_s(f11, MemOperand(a0, offsetof(T, result_fadd_s))); ++ __ fabs_s(f11, f11); ++ __ fsub_s(f12, f11, f9); ++ __ Fst_s(f12, MemOperand(a0, offsetof(T, result_fsub_s))); ++ ++ __ Fld_s(f13, MemOperand(a0, offsetof(T, c))); ++ __ Fld_s(f14, MemOperand(a0, offsetof(T, d))); ++ __ Fld_s(f15, MemOperand(a0, offsetof(T, e))); ++ ++ __ fmin_s(f16, f13, f14); ++ __ fmul_s(f17, f15, f16); ++ __ Fst_s(f17, MemOperand(a0, offsetof(T, result_fmul_s))); ++ __ fmax_s(f18, f13, f14); ++ __ fdiv_s(f19, f15, f18); ++ __ Fst_s(f19, MemOperand(a0, offsetof(T, result_fdiv_s))); ++ ++ __ fmina_s(f16, f13, f14); ++ __ fmadd_s(f18, f17, f15, f16); ++ __ Fst_s(f18, MemOperand(a0, offsetof(T, result_fmadd_s))); ++ __ fnmadd_s(f19, f17, f15, f16); ++ __ Fst_s(f19, MemOperand(a0, offsetof(T, result_fnmadd_s))); ++ __ fmaxa_s(f16, f13, f14); ++ __ fmsub_s(f20, f17, f15, f16); ++ __ Fst_s(f20, MemOperand(a0, offsetof(T, result_fmsub_s))); ++ __ fnmsub_s(f21, f17, f15, f16); ++ __ Fst_s(f21, MemOperand(a0, offsetof(T, result_fnmsub_s))); ++ ++ __ fsqrt_s(f10, f8); ++ //__ frecip_s(f11, f10); ++ //__ frsqrt_s(f12, f8); ++ __ Fst_s(f10, MemOperand(a0, offsetof(T, result_fsqrt_s))); ++ //__ Fst_s(f11, MemOperand(a0, offsetof(T, result_frecip_s))); ++ //__ Fst_s(f12, MemOperand(a0, offsetof(T, result_frsqrt_s))); ++ ++ /*__ fscaleb_s(f16, f13, f15); ++ __ flogb_s(f17, f15); ++ __ fcopysign_s(f18, f8, f9); ++ __ fclass_s(f19, f9); ++ __ Fst_s(f16, MemOperand(a0, offsetof(T, result_fscaleb_s))); ++ __ Fst_s(f17, MemOperand(a0, offsetof(T, result_flogb_s))); ++ __ Fst_s(f18, MemOperand(a0, offsetof(T, result_fcopysign_s))); ++ __ Fst_s(f19, MemOperand(a0, offsetof(T, result_fclass_s)));*/ ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ // Float test values. ++ t.a = 1.5e6; ++ t.b = -2.75e4; ++ t.c = 1.5; ++ t.d = -2.75; ++ t.e = 120.0; ++ f.Call(&t, 0, 0, 0, 0); ++ ++ CHECK_EQ(static_cast(-1.527500e06), t.result_fadd_s); ++ CHECK_EQ(static_cast(1.555000e06), t.result_fsub_s); ++ CHECK_EQ(static_cast(-3.300000e02), t.result_fmul_s); ++ CHECK_EQ(static_cast(8.000000e01), t.result_fdiv_s); ++ CHECK_EQ(static_cast(-3.959850e04), t.result_fmadd_s); ++ CHECK_EQ(static_cast(-3.959725e04), t.result_fmsub_s); ++ CHECK_EQ(static_cast(3.959850e04), t.result_fnmadd_s); ++ CHECK_EQ(static_cast(3.959725e04), t.result_fnmsub_s); ++ CHECK_EQ(static_cast(1224.744873), t.result_fsqrt_s); ++ // CHECK_EQ(static_cast( 8.164966e-04), t.result_frecip_s); ++ // CHECK_EQ(static_cast( 8.164966e-04), t.result_frsqrt_s); ++ // CHECK_EQ(static_cast(), t.result_fscaleb_s); ++ // CHECK_EQ(static_cast( 6.906890), t.result_flogb_s); ++ // CHECK_EQ(static_cast( 2.75e4), t.result_fcopysign_s); ++ // CHECK_EQ(static_cast(), t.result_fclass_s); ++} ++ ++TEST(FCMP_COND) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct TestFloat { ++ double dTrue; ++ double dFalse; ++ double dOp1; ++ double dOp2; ++ double dCaf; ++ double dCun; ++ double dCeq; ++ double dCueq; ++ double dClt; ++ double dCult; ++ double dCle; ++ double dCule; ++ double dCne; ++ double dCor; ++ double dCune; ++ double dSaf; ++ double dSun; ++ double dSeq; ++ double dSueq; ++ double dSlt; ++ double dSult; ++ double dSle; ++ double dSule; ++ double dSne; ++ double dSor; ++ double dSune; ++ float fTrue; ++ float fFalse; ++ float fOp1; ++ float fOp2; ++ float fCaf; ++ float fCun; ++ float fCeq; ++ float fCueq; ++ float fClt; ++ float fCult; ++ float fCle; ++ float fCule; ++ float fCne; ++ float fCor; ++ float fCune; ++ float fSaf; ++ float fSun; ++ float fSeq; ++ float fSueq; ++ float fSlt; ++ float fSult; ++ float fSle; ++ float fSule; ++ float fSne; ++ float fSor; ++ float fSune; ++ }; ++ ++ TestFloat test; ++ ++ __ Fld_d(f8, MemOperand(a0, offsetof(TestFloat, dOp1))); ++ __ Fld_d(f9, MemOperand(a0, offsetof(TestFloat, dOp2))); ++ ++ __ Fld_s(f10, MemOperand(a0, offsetof(TestFloat, fOp1))); ++ __ Fld_s(f11, MemOperand(a0, offsetof(TestFloat, fOp2))); ++ ++ __ Fld_d(f12, MemOperand(a0, offsetof(TestFloat, dFalse))); ++ __ Fld_d(f13, MemOperand(a0, offsetof(TestFloat, dTrue))); ++ ++ __ Fld_s(f14, MemOperand(a0, offsetof(TestFloat, fFalse))); ++ __ Fld_s(f15, MemOperand(a0, offsetof(TestFloat, fTrue))); ++ ++ __ fcmp_cond_d(CAF, f8, f9, FCC0); ++ __ fcmp_cond_s(CAF, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCaf))); ++ __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCaf))); ++ ++ __ fcmp_cond_d(CUN, f8, f9, FCC0); ++ __ fcmp_cond_s(CUN, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCun))); ++ __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCun))); ++ ++ __ fcmp_cond_d(CEQ, f8, f9, FCC0); ++ __ fcmp_cond_s(CEQ, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCeq))); ++ __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCeq))); ++ ++ __ fcmp_cond_d(CUEQ, f8, f9, FCC0); ++ __ fcmp_cond_s(CUEQ, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCueq))); ++ __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCueq))); ++ ++ __ fcmp_cond_d(CLT, f8, f9, FCC0); ++ __ fcmp_cond_s(CLT, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dClt))); ++ __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fClt))); ++ ++ __ fcmp_cond_d(CULT, f8, f9, FCC0); ++ __ fcmp_cond_s(CULT, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCult))); ++ __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCult))); ++ ++ __ fcmp_cond_d(CLE, f8, f9, FCC0); ++ __ fcmp_cond_s(CLE, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCle))); ++ __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCle))); ++ ++ __ fcmp_cond_d(CULE, f8, f9, FCC0); ++ __ fcmp_cond_s(CULE, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCule))); ++ __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCule))); ++ ++ __ fcmp_cond_d(CNE, f8, f9, FCC0); ++ __ fcmp_cond_s(CNE, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCne))); ++ __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCne))); ++ ++ __ fcmp_cond_d(COR, f8, f9, FCC0); ++ __ fcmp_cond_s(COR, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCor))); ++ __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCor))); ++ ++ __ fcmp_cond_d(CUNE, f8, f9, FCC0); ++ __ fcmp_cond_s(CUNE, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dCune))); ++ __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fCune))); ++ ++ /* __ fcmp_cond_d(SAF, f8, f9, FCC0); ++ __ fcmp_cond_s(SAF, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSaf))); ++ __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fSaf))); ++ ++ __ fcmp_cond_d(SUN, f8, f9, FCC0); ++ __ fcmp_cond_s(SUN, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSun))); ++ __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fSun))); ++ ++ __ fcmp_cond_d(SEQ, f8, f9, FCC0); ++ __ fcmp_cond_s(SEQ, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSeq))); ++ __ Fst_f(f17, MemOperand(a0, offsetof(TestFloat, fSeq))); ++ ++ __ fcmp_cond_d(SUEQ, f8, f9, FCC0); ++ __ fcmp_cond_s(SUEQ, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSueq))); ++ __ Fst_f(f17, MemOperand(a0, offsetof(TestFloat, fSueq))); ++ ++ __ fcmp_cond_d(SLT, f8, f9, FCC0); ++ __ fcmp_cond_s(SLT, f10, f11, FCC1); ++ __ fsel(f16, f12, f13, FCC0); ++ __ fsel(f17, f14, f15, FCC1); ++ __ Fld_d(f16, MemOperand(a0, offsetof(TestFloat, dSlt))); ++ __ Fst_d(f17, MemOperand(a0, offsetof(TestFloat, fSlt))); ++ ++ __ fcmp_cond_d(SULT, f8, f9, FCC0); ++ __ fcmp_cond_s(SULT, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSult))); ++ __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fSult))); ++ ++ __ fcmp_cond_d(SLE, f8, f9, FCC0); ++ __ fcmp_cond_s(SLE, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSle))); ++ __ Fst_f(f17, MemOperand(a0, offsetof(TestFloat, fSle))); ++ ++ __ fcmp_cond_d(SULE, f8, f9, FCC0); ++ __ fcmp_cond_s(SULE, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSule))); ++ __ Fst_f(f17, MemOperand(a0, offsetof(TestFloat, fSule))); ++ ++ __ fcmp_cond_d(SNE, f8, f9, FCC0); ++ __ fcmp_cond_s(SNE, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSne))); ++ __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fSne))); ++ ++ __ fcmp_cond_d(SOR, f8, f9, FCC0); ++ __ fcmp_cond_s(SOR, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSor))); ++ __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fSor))); ++ ++ __ fcmp_cond_d(SUNE, f8, f9, FCC0); ++ __ fcmp_cond_s(SUNE, f10, f11, FCC1); ++ __ fsel(FCC0, f16, f12, f13); ++ __ fsel(FCC1, f17, f14, f15); ++ __ Fst_d(f16, MemOperand(a0, offsetof(TestFloat, dSune))); ++ __ Fst_s(f17, MemOperand(a0, offsetof(TestFloat, fSune)));*/ ++ ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ test.dTrue = 1234.0; ++ test.dFalse = 0.0; ++ test.fTrue = 12.0; ++ test.fFalse = 0.0; ++ ++ test.dOp1 = 2.0; ++ test.dOp2 = 3.0; ++ test.fOp1 = 2.0; ++ test.fOp2 = 3.0; ++ f.Call(&test, 0, 0, 0, 0); ++ ++ CHECK_EQ(test.dCaf, test.dFalse); ++ CHECK_EQ(test.fCaf, test.fFalse); ++ CHECK_EQ(test.dCun, test.dFalse); ++ CHECK_EQ(test.fCun, test.fFalse); ++ CHECK_EQ(test.dCeq, test.dFalse); ++ CHECK_EQ(test.fCeq, test.fFalse); ++ CHECK_EQ(test.dCueq, test.dFalse); ++ CHECK_EQ(test.fCueq, test.fFalse); ++ CHECK_EQ(test.dClt, test.dTrue); ++ CHECK_EQ(test.fClt, test.fTrue); ++ CHECK_EQ(test.dCult, test.dTrue); ++ CHECK_EQ(test.fCult, test.fTrue); ++ CHECK_EQ(test.dCle, test.dTrue); ++ CHECK_EQ(test.fCle, test.fTrue); ++ CHECK_EQ(test.dCule, test.dTrue); ++ CHECK_EQ(test.fCule, test.fTrue); ++ CHECK_EQ(test.dCne, test.dTrue); ++ CHECK_EQ(test.fCne, test.fTrue); ++ CHECK_EQ(test.dCor, test.dTrue); ++ CHECK_EQ(test.fCor, test.fTrue); ++ CHECK_EQ(test.dCune, test.dTrue); ++ CHECK_EQ(test.fCune, test.fTrue); ++ /* CHECK_EQ(test.dSaf, test.dFalse); ++ CHECK_EQ(test.fSaf, test.fFalse); ++ CHECK_EQ(test.dSun, test.dFalse); ++ CHECK_EQ(test.fSun, test.fFalse); ++ CHECK_EQ(test.dSeq, test.dFalse); ++ CHECK_EQ(test.fSeq, test.fFalse); ++ CHECK_EQ(test.dSueq, test.dFalse); ++ CHECK_EQ(test.fSueq, test.fFalse); ++ CHECK_EQ(test.dClt, test.dTrue); ++ CHECK_EQ(test.fClt, test.fTrue); ++ CHECK_EQ(test.dCult, test.dTrue); ++ CHECK_EQ(test.fCult, test.fTrue); ++ CHECK_EQ(test.dSle, test.dTrue); ++ CHECK_EQ(test.fSle, test.fTrue); ++ CHECK_EQ(test.dSule, test.dTrue); ++ CHECK_EQ(test.fSule, test.fTrue); ++ CHECK_EQ(test.dSne, test.dTrue); ++ CHECK_EQ(test.fSne, test.fTrue); ++ CHECK_EQ(test.dSor, test.dTrue); ++ CHECK_EQ(test.fSor, test.fTrue); ++ CHECK_EQ(test.dSune, test.dTrue); ++ CHECK_EQ(test.fSune, test.fTrue);*/ ++ ++ test.dOp1 = std::numeric_limits::max(); ++ test.dOp2 = std::numeric_limits::min(); ++ test.fOp1 = std::numeric_limits::min(); ++ test.fOp2 = -std::numeric_limits::max(); ++ f.Call(&test, 0, 0, 0, 0); ++ ++ CHECK_EQ(test.dCaf, test.dFalse); ++ CHECK_EQ(test.fCaf, test.fFalse); ++ CHECK_EQ(test.dCun, test.dFalse); ++ CHECK_EQ(test.fCun, test.fFalse); ++ CHECK_EQ(test.dCeq, test.dFalse); ++ CHECK_EQ(test.fCeq, test.fFalse); ++ CHECK_EQ(test.dCueq, test.dFalse); ++ CHECK_EQ(test.fCueq, test.fFalse); ++ CHECK_EQ(test.dClt, test.dFalse); ++ CHECK_EQ(test.fClt, test.fFalse); ++ CHECK_EQ(test.dCult, test.dFalse); ++ CHECK_EQ(test.fCult, test.fFalse); ++ CHECK_EQ(test.dCle, test.dFalse); ++ CHECK_EQ(test.fCle, test.fFalse); ++ CHECK_EQ(test.dCule, test.dFalse); ++ CHECK_EQ(test.fCule, test.fFalse); ++ CHECK_EQ(test.dCne, test.dTrue); ++ CHECK_EQ(test.fCne, test.fTrue); ++ CHECK_EQ(test.dCor, test.dTrue); ++ CHECK_EQ(test.fCor, test.fTrue); ++ CHECK_EQ(test.dCune, test.dTrue); ++ CHECK_EQ(test.fCune, test.fTrue); ++ /* CHECK_EQ(test.dSaf, test.dFalse); ++ CHECK_EQ(test.fSaf, test.fFalse); ++ CHECK_EQ(test.dSun, test.dFalse); ++ CHECK_EQ(test.fSun, test.fFalse); ++ CHECK_EQ(test.dSeq, test.dFalse); ++ CHECK_EQ(test.fSeq, test.fFalse); ++ CHECK_EQ(test.dSueq, test.dFalse); ++ CHECK_EQ(test.fSueq, test.fFalse); ++ CHECK_EQ(test.dSlt, test.dFalse); ++ CHECK_EQ(test.fSlt, test.fFalse); ++ CHECK_EQ(test.dSult, test.dFalse); ++ CHECK_EQ(test.fSult, test.fFalse); ++ CHECK_EQ(test.dSle, test.dFalse); ++ CHECK_EQ(test.fSle, test.fFalse); ++ CHECK_EQ(test.dSule, test.dFalse); ++ CHECK_EQ(test.fSule, test.fFalse); ++ CHECK_EQ(test.dSne, test.dTrue); ++ CHECK_EQ(test.fSne, test.fTrue); ++ CHECK_EQ(test.dSor, test.dTrue); ++ CHECK_EQ(test.fSor, test.fTrue); ++ CHECK_EQ(test.dSune, test.dTrue); ++ CHECK_EQ(test.fSune, test.fTrue);*/ ++ ++ test.dOp1 = std::numeric_limits::quiet_NaN(); ++ test.dOp2 = 0.0; ++ test.fOp1 = std::numeric_limits::quiet_NaN(); ++ test.fOp2 = 0.0; ++ f.Call(&test, 0, 0, 0, 0); ++ ++ CHECK_EQ(test.dCaf, test.dFalse); ++ CHECK_EQ(test.fCaf, test.fFalse); ++ CHECK_EQ(test.dCun, test.dTrue); ++ CHECK_EQ(test.fCun, test.fTrue); ++ CHECK_EQ(test.dCeq, test.dFalse); ++ CHECK_EQ(test.fCeq, test.fFalse); ++ CHECK_EQ(test.dCueq, test.dTrue); ++ CHECK_EQ(test.fCueq, test.fTrue); ++ CHECK_EQ(test.dClt, test.dFalse); ++ CHECK_EQ(test.fClt, test.fFalse); ++ CHECK_EQ(test.dCult, test.dTrue); ++ CHECK_EQ(test.fCult, test.fTrue); ++ CHECK_EQ(test.dCle, test.dFalse); ++ CHECK_EQ(test.fCle, test.fFalse); ++ CHECK_EQ(test.dCule, test.dTrue); ++ CHECK_EQ(test.fCule, test.fTrue); ++ CHECK_EQ(test.dCne, test.dFalse); ++ CHECK_EQ(test.fCne, test.fFalse); ++ CHECK_EQ(test.dCor, test.dFalse); ++ CHECK_EQ(test.fCor, test.fFalse); ++ CHECK_EQ(test.dCune, test.dTrue); ++ CHECK_EQ(test.fCune, test.fTrue); ++ /* CHECK_EQ(test.dSaf, test.dTrue); ++ CHECK_EQ(test.fSaf, test.fTrue); ++ CHECK_EQ(test.dSun, test.dTrue); ++ CHECK_EQ(test.fSun, test.fTrue); ++ CHECK_EQ(test.dSeq, test.dFalse); ++ CHECK_EQ(test.fSeq, test.fFalse); ++ CHECK_EQ(test.dSueq, test.dTrue); ++ CHECK_EQ(test.fSueq, test.fTrue); ++ CHECK_EQ(test.dSlt, test.dFalse); ++ CHECK_EQ(test.fSlt, test.fFalse); ++ CHECK_EQ(test.dSult, test.dTrue); ++ CHECK_EQ(test.fSult, test.fTrue); ++ CHECK_EQ(test.dSle, test.dFalse); ++ CHECK_EQ(test.fSle, test.fFalse); ++ CHECK_EQ(test.dSule, test.dTrue); ++ CHECK_EQ(test.fSule, test.fTrue); ++ CHECK_EQ(test.dSne, test.dFalse); ++ CHECK_EQ(test.fSne, test.fFalse); ++ CHECK_EQ(test.dSor, test.dFalse); ++ CHECK_EQ(test.fSor, test.fFalse); ++ CHECK_EQ(test.dSune, test.dTrue); ++ CHECK_EQ(test.fSune, test.fTrue);*/ ++} ++ ++TEST(FCVT) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct TestFloat { ++ float fcvt_d_s_in; ++ double fcvt_s_d_in; ++ double fcvt_d_s_out; ++ float fcvt_s_d_out; ++ int fcsr; ++ }; ++ TestFloat test; ++ __ xor_(a4, a4, a4); ++ __ xor_(a5, a5, a5); ++ __ Ld_w(a4, MemOperand(a0, offsetof(TestFloat, fcsr))); ++ __ movfcsr2gr(a5); ++ __ movgr2fcsr(a4); ++ __ Fld_s(f8, MemOperand(a0, offsetof(TestFloat, fcvt_d_s_in))); ++ __ Fld_d(f9, MemOperand(a0, offsetof(TestFloat, fcvt_s_d_in))); ++ __ fcvt_d_s(f10, f8); ++ __ fcvt_s_d(f11, f9); ++ __ Fst_d(f10, MemOperand(a0, offsetof(TestFloat, fcvt_d_s_out))); ++ __ Fst_s(f11, MemOperand(a0, offsetof(TestFloat, fcvt_s_d_out))); ++ __ movgr2fcsr(a5); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ test.fcsr = kRoundToZero; ++ ++ test.fcvt_d_s_in = -0.51; ++ test.fcvt_s_d_in = -0.51; ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.fcvt_d_s_out, static_cast(test.fcvt_d_s_in)); ++ CHECK_EQ(test.fcvt_s_d_out, static_cast(test.fcvt_s_d_in)); ++ ++ test.fcvt_d_s_in = 0.49; ++ test.fcvt_s_d_in = 0.49; ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.fcvt_d_s_out, static_cast(test.fcvt_d_s_in)); ++ CHECK_EQ(test.fcvt_s_d_out, static_cast(test.fcvt_s_d_in)); ++ ++ test.fcvt_d_s_in = std::numeric_limits::max(); ++ test.fcvt_s_d_in = std::numeric_limits::max(); ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.fcvt_d_s_out, static_cast(test.fcvt_d_s_in)); ++ CHECK_EQ(test.fcvt_s_d_out, static_cast(test.fcvt_s_d_in)); ++ ++ test.fcvt_d_s_in = -std::numeric_limits::max(); ++ test.fcvt_s_d_in = -std::numeric_limits::max(); ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.fcvt_d_s_out, static_cast(test.fcvt_d_s_in)); ++ CHECK_EQ(test.fcvt_s_d_out, static_cast(test.fcvt_s_d_in)); ++ ++ test.fcvt_d_s_in = std::numeric_limits::min(); ++ test.fcvt_s_d_in = std::numeric_limits::min(); ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.fcvt_d_s_out, static_cast(test.fcvt_d_s_in)); ++ CHECK_EQ(test.fcvt_s_d_out, static_cast(test.fcvt_s_d_in)); ++} ++ ++TEST(FFINT) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct TestFloat { ++ int32_t ffint_s_w_in; ++ int64_t ffint_s_l_in; ++ int32_t ffint_d_w_in; ++ int64_t ffint_d_l_in; ++ float ffint_s_w_out; ++ float ffint_s_l_out; ++ double ffint_d_w_out; ++ double ffint_d_l_out; ++ int fcsr; ++ }; ++ TestFloat test; ++ __ xor_(a4, a4, a4); ++ __ xor_(a5, a5, a5); ++ __ Ld_w(a4, MemOperand(a0, offsetof(TestFloat, fcsr))); ++ __ movfcsr2gr(a5); ++ __ movgr2fcsr(a4); ++ __ Fld_s(f8, MemOperand(a0, offsetof(TestFloat, ffint_s_w_in))); ++ __ Fld_d(f9, MemOperand(a0, offsetof(TestFloat, ffint_s_l_in))); ++ __ Fld_s(f10, MemOperand(a0, offsetof(TestFloat, ffint_d_w_in))); ++ __ Fld_d(f11, MemOperand(a0, offsetof(TestFloat, ffint_d_l_in))); ++ __ ffint_s_w(f12, f8); ++ __ ffint_s_l(f13, f9); ++ __ ffint_d_w(f14, f10); ++ __ ffint_d_l(f15, f11); ++ __ Fst_s(f12, MemOperand(a0, offsetof(TestFloat, ffint_s_w_out))); ++ __ Fst_s(f13, MemOperand(a0, offsetof(TestFloat, ffint_s_l_out))); ++ __ Fst_d(f14, MemOperand(a0, offsetof(TestFloat, ffint_d_w_out))); ++ __ Fst_d(f15, MemOperand(a0, offsetof(TestFloat, ffint_d_l_out))); ++ __ movgr2fcsr(a5); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ test.fcsr = kRoundToZero; ++ ++ test.ffint_s_w_in = -1; ++ test.ffint_s_l_in = -1; ++ test.ffint_d_w_in = -1; ++ test.ffint_d_l_in = -1; ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.ffint_s_w_out, static_cast(test.ffint_s_w_in)); ++ CHECK_EQ(test.ffint_s_l_out, static_cast(test.ffint_s_l_in)); ++ CHECK_EQ(test.ffint_d_w_out, static_cast(test.ffint_d_w_in)); ++ CHECK_EQ(test.ffint_d_l_out, static_cast(test.ffint_d_l_in)); ++ ++ test.ffint_s_w_in = 1; ++ test.ffint_s_l_in = 1; ++ test.ffint_d_w_in = 1; ++ test.ffint_d_l_in = 1; ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.ffint_s_w_out, static_cast(test.ffint_s_w_in)); ++ CHECK_EQ(test.ffint_s_l_out, static_cast(test.ffint_s_l_in)); ++ CHECK_EQ(test.ffint_d_w_out, static_cast(test.ffint_d_w_in)); ++ CHECK_EQ(test.ffint_d_l_out, static_cast(test.ffint_d_l_in)); ++ ++ test.ffint_s_w_in = std::numeric_limits::max(); ++ test.ffint_s_l_in = std::numeric_limits::max(); ++ test.ffint_d_w_in = std::numeric_limits::max(); ++ test.ffint_d_l_in = std::numeric_limits::max(); ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.ffint_s_w_out, static_cast(test.ffint_s_w_in)); ++ CHECK_EQ(test.ffint_s_l_out, static_cast(test.ffint_s_l_in)); ++ CHECK_EQ(test.ffint_d_w_out, static_cast(test.ffint_d_w_in)); ++ CHECK_EQ(test.ffint_d_l_out, static_cast(test.ffint_d_l_in)); ++ ++ test.ffint_s_w_in = std::numeric_limits::min(); ++ test.ffint_s_l_in = std::numeric_limits::min(); ++ test.ffint_d_w_in = std::numeric_limits::min(); ++ test.ffint_d_l_in = std::numeric_limits::min(); ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.ffint_s_w_out, static_cast(test.ffint_s_w_in)); ++ CHECK_EQ(test.ffint_s_l_out, static_cast(test.ffint_s_l_in)); ++ CHECK_EQ(test.ffint_d_w_out, static_cast(test.ffint_d_w_in)); ++ CHECK_EQ(test.ffint_d_l_out, static_cast(test.ffint_d_l_in)); ++} ++ ++TEST(FTINT) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct Test { ++ double a; ++ float b; ++ int32_t c; ++ int32_t d; ++ int64_t e; ++ int64_t f; ++ int fcsr; ++ }; ++ Test test; ++ ++ const int kTableLength = 9; ++ // clang-format off ++ double inputs_d[kTableLength] = { ++ 3.1, 3.6, 3.5, -3.1, -3.6, -3.5, ++ 2147483648.0, ++ std::numeric_limits::quiet_NaN(), ++ std::numeric_limits::infinity() ++ }; ++ float inputs_s[kTableLength] = { ++ 3.1, 3.6, 3.5, -3.1, -3.6, -3.5, ++ 2147483648.0, ++ std::numeric_limits::quiet_NaN(), ++ std::numeric_limits::infinity() ++ }; ++ double outputs_RN_W[kTableLength] = { ++ 3.0, 4.0, 4.0, -3.0, -4.0, -4.0, ++ kFPUInvalidResult, 0, ++ kFPUInvalidResult}; ++ double outputs_RN_L[kTableLength] = { ++ 3.0, 4.0, 4.0, -3.0, -4.0, -4.0, ++ 2147483648.0, 0, ++ kFPU64InvalidResult}; ++ double outputs_RZ_W[kTableLength] = { ++ 3.0, 3.0, 3.0, -3.0, -3.0, -3.0, ++ kFPUInvalidResult, 0, ++ kFPUInvalidResult}; ++ double outputs_RZ_L[kTableLength] = { ++ 3.0, 3.0, 3.0, -3.0, -3.0, -3.0, ++ 2147483648.0, 0, ++ kFPU64InvalidResult}; ++ double outputs_RP_W[kTableLength] = { ++ 4.0, 4.0, 4.0, -3.0, -3.0, -3.0, ++ kFPUInvalidResult, 0, ++ kFPUInvalidResult}; ++ double outputs_RP_L[kTableLength] = { ++ 4.0, 4.0, 4.0, -3.0, -3.0, -3.0, ++ 2147483648.0, 0, ++ kFPU64InvalidResult}; ++ double outputs_RM_W[kTableLength] = { ++ 3.0, 3.0, 3.0, -4.0, -4.0, -4.0, ++ kFPUInvalidResult, 0, ++ kFPUInvalidResult}; ++ double outputs_RM_L[kTableLength] = { ++ 3.0, 3.0, 3.0, -4.0, -4.0, -4.0, ++ 2147483648.0, 0, ++ kFPU64InvalidResult}; ++ // clang-format on ++ ++ int fcsr_inputs[4] = {kRoundToNearest, kRoundToZero, kRoundToPlusInf, ++ kRoundToMinusInf}; ++ double* outputs[8] = { ++ outputs_RN_W, outputs_RN_L, outputs_RZ_W, outputs_RZ_L, ++ outputs_RP_W, outputs_RP_L, outputs_RM_W, outputs_RM_L, ++ }; ++ ++ __ Fld_d(f8, MemOperand(a0, offsetof(Test, a))); ++ __ Fld_s(f9, MemOperand(a0, offsetof(Test, b))); ++ __ xor_(a5, a5, a5); ++ __ Ld_w(a5, MemOperand(a0, offsetof(Test, fcsr))); ++ __ movfcsr2gr(a4); ++ __ movgr2fcsr(a5); ++ __ ftint_w_d(f10, f8); ++ __ ftint_w_s(f11, f9); ++ __ ftint_l_d(f12, f8); ++ __ ftint_l_s(f13, f9); ++ __ Fst_s(f10, MemOperand(a0, offsetof(Test, c))); ++ __ Fst_s(f11, MemOperand(a0, offsetof(Test, d))); ++ __ Fst_d(f12, MemOperand(a0, offsetof(Test, e))); ++ __ Fst_d(f13, MemOperand(a0, offsetof(Test, f))); ++ __ movgr2fcsr(a4); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ for (int j = 0; j < 4; j++) { ++ test.fcsr = fcsr_inputs[j]; ++ for (int i = 0; i < kTableLength; i++) { ++ test.a = inputs_d[i]; ++ test.b = inputs_s[i]; ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.c, outputs[2 * j][i]); ++ CHECK_EQ(test.d, outputs[2 * j][i]); ++ CHECK_EQ(test.e, outputs[2 * j + 1][i]); ++ CHECK_EQ(test.f, outputs[2 * j + 1][i]); ++ } ++ } ++} ++ ++TEST(FTINTRM) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct Test { ++ double a; ++ float b; ++ int32_t c; ++ int32_t d; ++ int64_t e; ++ int64_t f; ++ }; ++ Test test; ++ ++ const int kTableLength = 9; ++ ++ // clang-format off ++ double inputs_d[kTableLength] = { ++ 3.1, 3.6, 3.5, -3.1, -3.6, -3.5, ++ 2147483648.0, ++ std::numeric_limits::quiet_NaN(), ++ std::numeric_limits::infinity() ++ }; ++ float inputs_s[kTableLength] = { ++ 3.1, 3.6, 3.5, -3.1, -3.6, -3.5, ++ 2147483648.0, ++ std::numeric_limits::quiet_NaN(), ++ std::numeric_limits::infinity() ++ }; ++ double outputs_w[kTableLength] = { ++ 3.0, 3.0, 3.0, -4.0, -4.0, -4.0, ++ kFPUInvalidResult, 0, ++ kFPUInvalidResult}; ++ double outputs_l[kTableLength] = { ++ 3.0, 3.0, 3.0, -4.0, -4.0, -4.0, ++ 2147483648.0, 0, ++ kFPU64InvalidResult}; ++ // clang-format on ++ ++ __ Fld_d(f8, MemOperand(a0, offsetof(Test, a))); ++ __ Fld_s(f9, MemOperand(a0, offsetof(Test, b))); ++ __ ftintrm_w_d(f10, f8); ++ __ ftintrm_w_s(f11, f9); ++ __ ftintrm_l_d(f12, f8); ++ __ ftintrm_l_s(f13, f9); ++ __ Fst_s(f10, MemOperand(a0, offsetof(Test, c))); ++ __ Fst_s(f11, MemOperand(a0, offsetof(Test, d))); ++ __ Fst_d(f12, MemOperand(a0, offsetof(Test, e))); ++ __ Fst_d(f13, MemOperand(a0, offsetof(Test, f))); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ for (int i = 0; i < kTableLength; i++) { ++ test.a = inputs_d[i]; ++ test.b = inputs_s[i]; ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.c, outputs_w[i]); ++ CHECK_EQ(test.d, outputs_w[i]); ++ CHECK_EQ(test.e, outputs_l[i]); ++ CHECK_EQ(test.f, outputs_l[i]); ++ } ++} ++ ++TEST(FTINTRP) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct Test { ++ double a; ++ float b; ++ int32_t c; ++ int32_t d; ++ int64_t e; ++ int64_t f; ++ }; ++ Test test; ++ ++ const int kTableLength = 9; ++ ++ // clang-format off ++ double inputs_d[kTableLength] = { ++ 3.1, 3.6, 3.5, -3.1, -3.6, -3.5, ++ 2147483648.0, ++ std::numeric_limits::quiet_NaN(), ++ std::numeric_limits::infinity() ++ }; ++ float inputs_s[kTableLength] = { ++ 3.1, 3.6, 3.5, -3.1, -3.6, -3.5, ++ 2147483648.0, ++ std::numeric_limits::quiet_NaN(), ++ std::numeric_limits::infinity() ++ }; ++ double outputs_w[kTableLength] = { ++ 4.0, 4.0, 4.0, -3.0, -3.0, -3.0, ++ kFPUInvalidResult, 0, ++ kFPUInvalidResult}; ++ double outputs_l[kTableLength] = { ++ 4.0, 4.0, 4.0, -3.0, -3.0, -3.0, ++ 2147483648.0, 0, ++ kFPU64InvalidResult}; ++ // clang-format on ++ ++ __ Fld_d(f8, MemOperand(a0, offsetof(Test, a))); ++ __ Fld_s(f9, MemOperand(a0, offsetof(Test, b))); ++ __ ftintrp_w_d(f10, f8); ++ __ ftintrp_w_s(f11, f9); ++ __ ftintrp_l_d(f12, f8); ++ __ ftintrp_l_s(f13, f9); ++ __ Fst_s(f10, MemOperand(a0, offsetof(Test, c))); ++ __ Fst_s(f11, MemOperand(a0, offsetof(Test, d))); ++ __ Fst_d(f12, MemOperand(a0, offsetof(Test, e))); ++ __ Fst_d(f13, MemOperand(a0, offsetof(Test, f))); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ for (int i = 0; i < kTableLength; i++) { ++ test.a = inputs_d[i]; ++ test.b = inputs_s[i]; ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.c, outputs_w[i]); ++ CHECK_EQ(test.d, outputs_w[i]); ++ CHECK_EQ(test.e, outputs_l[i]); ++ CHECK_EQ(test.f, outputs_l[i]); ++ } ++} ++ ++TEST(FTINTRZ) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct Test { ++ double a; ++ float b; ++ int32_t c; ++ int32_t d; ++ int64_t e; ++ int64_t f; ++ }; ++ Test test; ++ ++ const int kTableLength = 9; ++ ++ // clang-format off ++ double inputs_d[kTableLength] = { ++ 3.1, 3.6, 3.5, -3.1, -3.6, -3.5, ++ 2147483648.0, ++ std::numeric_limits::quiet_NaN(), ++ std::numeric_limits::infinity() ++ }; ++ float inputs_s[kTableLength] = { ++ 3.1, 3.6, 3.5, -3.1, -3.6, -3.5, ++ 2147483648.0, ++ std::numeric_limits::quiet_NaN(), ++ std::numeric_limits::infinity() ++ }; ++ double outputs_w[kTableLength] = { ++ 3.0, 3.0, 3.0, -3.0, -3.0, -3.0, ++ kFPUInvalidResult, 0, ++ kFPUInvalidResult}; ++ double outputs_l[kTableLength] = { ++ 3.0, 3.0, 3.0, -3.0, -3.0, -3.0, ++ 2147483648.0, 0, ++ kFPU64InvalidResult}; ++ // clang-format on ++ ++ __ Fld_d(f8, MemOperand(a0, offsetof(Test, a))); ++ __ Fld_s(f9, MemOperand(a0, offsetof(Test, b))); ++ __ ftintrz_w_d(f10, f8); ++ __ ftintrz_w_s(f11, f9); ++ __ ftintrz_l_d(f12, f8); ++ __ ftintrz_l_s(f13, f9); ++ __ Fst_s(f10, MemOperand(a0, offsetof(Test, c))); ++ __ Fst_s(f11, MemOperand(a0, offsetof(Test, d))); ++ __ Fst_d(f12, MemOperand(a0, offsetof(Test, e))); ++ __ Fst_d(f13, MemOperand(a0, offsetof(Test, f))); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ for (int i = 0; i < kTableLength; i++) { ++ test.a = inputs_d[i]; ++ test.b = inputs_s[i]; ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.c, outputs_w[i]); ++ CHECK_EQ(test.d, outputs_w[i]); ++ CHECK_EQ(test.e, outputs_l[i]); ++ CHECK_EQ(test.f, outputs_l[i]); ++ } ++} ++ ++TEST(FTINTRNE) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct Test { ++ double a; ++ float b; ++ int32_t c; ++ int32_t d; ++ int64_t e; ++ int64_t f; ++ }; ++ Test test; ++ ++ const int kTableLength = 9; ++ ++ // clang-format off ++ double inputs_d[kTableLength] = { ++ 3.1, 3.6, 3.5, -3.1, -3.6, -3.5, ++ 2147483648.0, ++ std::numeric_limits::quiet_NaN(), ++ std::numeric_limits::infinity() ++ }; ++ float inputs_s[kTableLength] = { ++ 3.1, 3.6, 3.5, -3.1, -3.6, -3.5, ++ 2147483648.0, ++ std::numeric_limits::quiet_NaN(), ++ std::numeric_limits::infinity() ++ }; ++ double outputs_w[kTableLength] = { ++ 3.0, 4.0, 4.0, -3.0, -4.0, -4.0, ++ kFPUInvalidResult, 0, ++ kFPUInvalidResult}; ++ double outputs_l[kTableLength] = { ++ 3.0, 4.0, 4.0, -3.0, -4.0, -4.0, ++ 2147483648.0, 0, ++ kFPU64InvalidResult}; ++ // clang-format on ++ ++ __ Fld_d(f8, MemOperand(a0, offsetof(Test, a))); ++ __ Fld_s(f9, MemOperand(a0, offsetof(Test, b))); ++ __ ftintrne_w_d(f10, f8); ++ __ ftintrne_w_s(f11, f9); ++ __ ftintrne_l_d(f12, f8); ++ __ ftintrne_l_s(f13, f9); ++ __ Fst_s(f10, MemOperand(a0, offsetof(Test, c))); ++ __ Fst_s(f11, MemOperand(a0, offsetof(Test, d))); ++ __ Fst_d(f12, MemOperand(a0, offsetof(Test, e))); ++ __ Fst_d(f13, MemOperand(a0, offsetof(Test, f))); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ for (int i = 0; i < kTableLength; i++) { ++ test.a = inputs_d[i]; ++ test.b = inputs_s[i]; ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.c, outputs_w[i]); ++ CHECK_EQ(test.d, outputs_w[i]); ++ CHECK_EQ(test.e, outputs_l[i]); ++ CHECK_EQ(test.f, outputs_l[i]); ++ } ++} ++ ++TEST(FRINT) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct Test { ++ double a; ++ float b; ++ double c; ++ float d; ++ int fcsr; ++ }; ++ Test test; ++ ++ const int kTableLength = 32; ++ ++ // clang-format off ++ double inputs_d[kTableLength] = { ++ 18446744073709551617.0, 4503599627370496.0, -4503599627370496.0, ++ 1.26782468584154733584017312973E30, 1.44860108245951772690707170478E147, ++ 1.7976931348623157E+308, 6.27463370218383111104242366943E-307, ++ 309485009821345068724781056.89, ++ 2.1, 2.6, 2.5, 3.1, 3.6, 3.5, ++ -2.1, -2.6, -2.5, -3.1, -3.6, -3.5, ++ 37778931862957161709568.0, 37778931862957161709569.0, ++ 37778931862957161709580.0, 37778931862957161709581.0, ++ 37778931862957161709582.0, 37778931862957161709583.0, ++ 37778931862957161709584.0, 37778931862957161709585.0, ++ 37778931862957161709586.0, 37778931862957161709587.0, ++ std::numeric_limits::max() - 0.1, ++ std::numeric_limits::infinity() ++ }; ++ float inputs_s[kTableLength] = { ++ 18446744073709551617.0, 4503599627370496.0, -4503599627370496.0, ++ 1.26782468584154733584017312973E30, 1.44860108245951772690707170478E37, ++ 1.7976931348623157E+38, 6.27463370218383111104242366943E-37, ++ 309485009821345068724781056.89, ++ 2.1, 2.6, 2.5, 3.1, 3.6, 3.5, ++ -2.1, -2.6, -2.5, -3.1, -3.6, -3.5, ++ 37778931862957161709568.0, 37778931862957161709569.0, ++ 37778931862957161709580.0, 37778931862957161709581.0, ++ 37778931862957161709582.0, 37778931862957161709583.0, ++ 37778931862957161709584.0, 37778931862957161709585.0, ++ 37778931862957161709586.0, 37778931862957161709587.0, ++ std::numeric_limits::lowest() + 0.6, ++ std::numeric_limits::infinity() ++ }; ++ float outputs_RN_S[kTableLength] = { ++ 18446744073709551617.0, 4503599627370496.0, -4503599627370496.0, ++ 1.26782468584154733584017312973E30, 1.44860108245951772690707170478E37, ++ 1.7976931348623157E38, 0, ++ 309485009821345068724781057.0, ++ 2.0, 3.0, 2.0, 3.0, 4.0, 4.0, ++ -2.0, -3.0, -2.0, -3.0, -4.0, -4.0, ++ 37778931862957161709568.0, 37778931862957161709569.0, ++ 37778931862957161709580.0, 37778931862957161709581.0, ++ 37778931862957161709582.0, 37778931862957161709583.0, ++ 37778931862957161709584.0, 37778931862957161709585.0, ++ 37778931862957161709586.0, 37778931862957161709587.0, ++ std::numeric_limits::lowest() + 1, ++ std::numeric_limits::infinity() ++ }; ++ double outputs_RN_D[kTableLength] = { ++ 18446744073709551617.0, 4503599627370496.0, -4503599627370496.0, ++ 1.26782468584154733584017312973E30, 1.44860108245951772690707170478E147, ++ 1.7976931348623157E308, 0, ++ 309485009821345068724781057.0, ++ 2.0, 3.0, 2.0, 3.0, 4.0, 4.0, ++ -2.0, -3.0, -2.0, -3.0, -4.0, -4.0, ++ 37778931862957161709568.0, 37778931862957161709569.0, ++ 37778931862957161709580.0, 37778931862957161709581.0, ++ 37778931862957161709582.0, 37778931862957161709583.0, ++ 37778931862957161709584.0, 37778931862957161709585.0, ++ 37778931862957161709586.0, 37778931862957161709587.0, ++ std::numeric_limits::max(), ++ std::numeric_limits::infinity() ++ }; ++ float outputs_RZ_S[kTableLength] = { ++ 18446744073709551617.0, 4503599627370496.0, -4503599627370496.0, ++ 1.26782468584154733584017312973E30, 1.44860108245951772690707170478E37, ++ 1.7976931348623157E38, 0, ++ 309485009821345068724781057.0, ++ 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, ++ -2.0, -2.0, -2.0, -3.0, -3.0, -3.0, ++ 37778931862957161709568.0, 37778931862957161709569.0, ++ 37778931862957161709580.0, 37778931862957161709581.0, ++ 37778931862957161709582.0, 37778931862957161709583.0, ++ 37778931862957161709584.0, 37778931862957161709585.0, ++ 37778931862957161709586.0, 37778931862957161709587.0, ++ std::numeric_limits::lowest() + 1, ++ std::numeric_limits::infinity() ++ }; ++ double outputs_RZ_D[kTableLength] = { ++ 18446744073709551617.0, 4503599627370496.0, -4503599627370496.0, ++ 1.26782468584154733584017312973E30, 1.44860108245951772690707170478E147, ++ 1.7976931348623157E308, 0, ++ 309485009821345068724781057.0, ++ 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, ++ -2.0, -2.0, -2.0, -3.0, -3.0, -3.0, ++ 37778931862957161709568.0, 37778931862957161709569.0, ++ 37778931862957161709580.0, 37778931862957161709581.0, ++ 37778931862957161709582.0, 37778931862957161709583.0, ++ 37778931862957161709584.0, 37778931862957161709585.0, ++ 37778931862957161709586.0, 37778931862957161709587.0, ++ std::numeric_limits::max() - 1, ++ std::numeric_limits::infinity() ++ }; ++ float outputs_RP_S[kTableLength] = { ++ 18446744073709551617.0, 4503599627370496.0, -4503599627370496.0, ++ 1.26782468584154733584017312973E30, 1.44860108245951772690707170478E37, ++ 1.7976931348623157E38, 1, ++ 309485009821345068724781057.0, ++ 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, ++ -2.0, -2.0, -2.0, -3.0, -3.0, -3.0, ++ 37778931862957161709568.0, 37778931862957161709569.0, ++ 37778931862957161709580.0, 37778931862957161709581.0, ++ 37778931862957161709582.0, 37778931862957161709583.0, ++ 37778931862957161709584.0, 37778931862957161709585.0, ++ 37778931862957161709586.0, 37778931862957161709587.0, ++ std::numeric_limits::lowest() + 1, ++ std::numeric_limits::infinity() ++ }; ++ double outputs_RP_D[kTableLength] = { ++ 18446744073709551617.0, 4503599627370496.0, -4503599627370496.0, ++ 1.26782468584154733584017312973E30, 1.44860108245951772690707170478E147, ++ 1.7976931348623157E308, 1, ++ 309485009821345068724781057.0, ++ 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, ++ -2.0, -2.0, -2.0, -3.0, -3.0, -3.0, ++ 37778931862957161709568.0, 37778931862957161709569.0, ++ 37778931862957161709580.0, 37778931862957161709581.0, ++ 37778931862957161709582.0, 37778931862957161709583.0, ++ 37778931862957161709584.0, 37778931862957161709585.0, ++ 37778931862957161709586.0, 37778931862957161709587.0, ++ std::numeric_limits::max(), ++ std::numeric_limits::infinity() ++ }; ++ float outputs_RM_S[kTableLength] = { ++ 18446744073709551617.0, 4503599627370496.0, -4503599627370496.0, ++ 1.26782468584154733584017312973E30, 1.44860108245951772690707170478E37, ++ 1.7976931348623157E38, 0, ++ 309485009821345068724781057.0, ++ 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, ++ -3.0, -3.0, -3.0, -4.0, -4.0, -4.0, ++ 37778931862957161709568.0, 37778931862957161709569.0, ++ 37778931862957161709580.0, 37778931862957161709581.0, ++ 37778931862957161709582.0, 37778931862957161709583.0, ++ 37778931862957161709584.0, 37778931862957161709585.0, ++ 37778931862957161709586.0, 37778931862957161709587.0, ++ std::numeric_limits::lowest() + 1, ++ std::numeric_limits::infinity() ++ }; ++ double outputs_RM_D[kTableLength] = { ++ 18446744073709551617.0, 4503599627370496.0, -4503599627370496.0, ++ 1.26782468584154733584017312973E30, 1.44860108245951772690707170478E147, ++ 1.7976931348623157E308, 0, ++ 309485009821345068724781057.0, ++ 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, ++ -3.0, -3.0, -3.0, -4.0, -4.0, -4.0, ++ 37778931862957161709568.0, 37778931862957161709569.0, ++ 37778931862957161709580.0, 37778931862957161709581.0, ++ 37778931862957161709582.0, 37778931862957161709583.0, ++ 37778931862957161709584.0, 37778931862957161709585.0, ++ 37778931862957161709586.0, 37778931862957161709587.0, ++ std::numeric_limits::max(), ++ std::numeric_limits::infinity() ++ }; ++ // clang-format on ++ ++ int fcsr_inputs[4] = {kRoundToNearest, kRoundToZero, kRoundToPlusInf, ++ kRoundToMinusInf}; ++ double* outputs_d[4] = {outputs_RN_D, outputs_RZ_D, outputs_RP_D, ++ outputs_RM_D}; ++ float* outputs_s[4] = {outputs_RN_S, outputs_RZ_S, outputs_RP_S, ++ outputs_RM_S}; ++ ++ __ Fld_d(f8, MemOperand(a0, offsetof(Test, a))); ++ __ Fld_s(f9, MemOperand(a0, offsetof(Test, b))); ++ __ xor_(a5, a5, a5); ++ __ Ld_w(a5, MemOperand(a0, offsetof(Test, fcsr))); ++ __ movfcsr2gr(a4); ++ __ movgr2fcsr(a5); ++ __ frint_d(f10, f8); ++ __ frint_s(f11, f9); ++ __ Fst_d(f10, MemOperand(a0, offsetof(Test, c))); ++ __ Fst_s(f11, MemOperand(a0, offsetof(Test, d))); ++ __ movgr2fcsr(a4); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ for (int j = 0; j < 4; j++) { ++ test.fcsr = fcsr_inputs[j]; ++ for (int i = 0; i < kTableLength; i++) { ++ test.a = inputs_d[i]; ++ test.b = inputs_s[i]; ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.c, outputs_d[j][i]); ++ CHECK_EQ(test.d, outputs_s[j][i]); ++ } ++ } ++} ++ ++TEST(FMOV) { ++ const int kTableLength = 7; ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct TestFloat { ++ double a; ++ float b; ++ double c; ++ float d; ++ }; ++ ++ TestFloat test; ++ ++ // clang-format off ++ double inputs_D[kTableLength] = { ++ 5.3, -5.3, 0.29, -0.29, 0, ++ std::numeric_limits::max(), ++ -std::numeric_limits::max() ++ }; ++ float inputs_S[kTableLength] = { ++ 4.8, -4.8, 0.29, -0.29, 0, ++ std::numeric_limits::max(), ++ -std::numeric_limits::max() ++ }; ++ ++ double outputs_D[kTableLength] = { ++ 5.3, -5.3, 0.29, -0.29, 0, ++ std::numeric_limits::max(), ++ -std::numeric_limits::max() ++ }; ++ ++ float outputs_S[kTableLength] = { ++ 4.8, -4.8, 0.29, -0.29, 0, ++ std::numeric_limits::max(), ++ -std::numeric_limits::max() ++ }; ++ // clang-format on ++ ++ __ Fld_d(f8, MemOperand(a0, offsetof(TestFloat, a))); ++ __ Fld_s(f9, MemOperand(a0, offsetof(TestFloat, b))); ++ __ fmov_d(f10, f8); ++ __ fmov_s(f11, f9); ++ __ Fst_d(f10, MemOperand(a0, offsetof(TestFloat, c))); ++ __ Fst_s(f11, MemOperand(a0, offsetof(TestFloat, d))); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ for (int i = 0; i < kTableLength; i++) { ++ test.a = inputs_D[i]; ++ test.b = inputs_S[i]; ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.c, outputs_D[i]); ++ CHECK_EQ(test.d, outputs_S[i]); ++ } ++} ++ ++TEST(LA14) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ struct T { ++ double a; ++ double b; ++ double c; ++ double d; ++ int64_t high; ++ int64_t low; ++ }; ++ T t; ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ __ Fld_d(f8, MemOperand(a0, offsetof(T, a))); ++ __ Fld_d(f9, MemOperand(a0, offsetof(T, b))); ++ ++ __ movfr2gr_s(a4, f8); ++ __ movfrh2gr_s(a5, f8); ++ __ movfr2gr_d(a6, f9); ++ ++ __ movgr2fr_w(f9, a4); ++ __ movgr2frh_w(f9, a5); ++ __ movgr2fr_d(f8, a6); ++ ++ __ Fst_d(f8, MemOperand(a0, offsetof(T, a))); ++ __ Fst_d(f9, MemOperand(a0, offsetof(T, c))); ++ ++ __ Fld_d(f8, MemOperand(a0, offsetof(T, d))); ++ __ movfrh2gr_s(a4, f8); ++ __ movfr2gr_s(a5, f8); ++ ++ __ St_d(a4, MemOperand(a0, offsetof(T, high))); ++ __ St_d(a5, MemOperand(a0, offsetof(T, low))); ++ ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ ++ t.a = 1.5e22; ++ t.b = 2.75e11; ++ t.c = 17.17; ++ t.d = -2.75e11; ++ f.Call(&t, 0, 0, 0, 0); ++ CHECK_EQ(2.75e11, t.a); ++ CHECK_EQ(2.75e11, t.b); ++ CHECK_EQ(1.5e22, t.c); ++ CHECK_EQ(static_cast(0xFFFFFFFFC25001D1L), t.high); ++ CHECK_EQ(static_cast(0xFFFFFFFFBF800000L), t.low); ++ ++ t.a = -1.5e22; ++ t.b = -2.75e11; ++ t.c = 17.17; ++ t.d = 274999868928.0; ++ f.Call(&t, 0, 0, 0, 0); ++ CHECK_EQ(-2.75e11, t.a); ++ CHECK_EQ(-2.75e11, t.b); ++ CHECK_EQ(-1.5e22, t.c); ++ CHECK_EQ(static_cast(0x425001D1L), t.high); ++ CHECK_EQ(static_cast(0x3F800000L), t.low); ++} ++ ++uint64_t run_bceqz(int fcc_value, int32_t offset) { ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ Label main_block, L; ++ __ li(a2, 0); ++ __ li(t0, fcc_value); ++ __ b(&main_block); ++ // Block 1 ++ for (int32_t i = -104; i <= -55; ++i) { ++ __ addi_d(a2, a2, 0x1); ++ } ++ __ b(&L); ++ ++ // Block 2 ++ for (int32_t i = -53; i <= -4; ++i) { ++ __ addi_d(a2, a2, 0x10); ++ } ++ __ b(&L); ++ ++ // Block 3 (Main) ++ __ bind(&main_block); ++ __ movcf2gr(t1, FCC0); ++ __ movgr2cf(FCC0, t0); ++ __ bceqz(FCC0, offset); ++ __ bind(&L); ++ __ movgr2cf(FCC0, t1); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ // Block 4 ++ for (int32_t i = 4; i <= 53; ++i) { ++ __ addi_d(a2, a2, 0x100); ++ } ++ __ b(&L); ++ ++ // Block 5 ++ for (int32_t i = 55; i <= 104; ++i) { ++ __ addi_d(a2, a2, 0x300); ++ } ++ __ b(&L); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ uint64_t res = reinterpret_cast(f.Call(0, 0, 0, 0, 0)); ++ ++ return res; ++} ++ ++TEST(BCEQZ) { ++ CcTest::InitializeVM(); ++ struct TestCaseBceqz { ++ int fcc; ++ int32_t offset; ++ uint64_t expected_res; ++ }; ++ ++ // clang-format off ++ struct TestCaseBceqz tc[] = { ++ // fcc, offset, expected_res ++ { 0, -90, 0x24 }, ++ { 0, -27, 0x180 }, ++ { 0, 47, 0x700 }, ++ { 0, 70, 0x6900 }, ++ { 1, -27, 0 }, ++ { 1, 47, 0 }, ++ }; ++ // clang-format on ++ ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBceqz); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ uint64_t res = run_bceqz(tc[i].fcc, tc[i].offset); ++ CHECK_EQ(tc[i].expected_res, res); ++ } ++} ++ ++uint64_t run_bcnez(int fcc_value, int32_t offset) { ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ Label main_block, L; ++ __ li(a2, 0); ++ __ li(t0, fcc_value); ++ __ b(&main_block); ++ // Block 1 ++ for (int32_t i = -104; i <= -55; ++i) { ++ __ addi_d(a2, a2, 0x1); ++ } ++ __ b(&L); ++ ++ // Block 2 ++ for (int32_t i = -53; i <= -4; ++i) { ++ __ addi_d(a2, a2, 0x10); ++ } ++ __ b(&L); ++ ++ // Block 3 (Main) ++ __ bind(&main_block); ++ __ movcf2gr(t1, FCC0); ++ __ movgr2cf(FCC0, t0); ++ __ bcnez(FCC0, offset); ++ __ bind(&L); ++ __ movgr2cf(FCC0, t1); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ // Block 4 ++ for (int32_t i = 4; i <= 53; ++i) { ++ __ addi_d(a2, a2, 0x100); ++ } ++ __ b(&L); ++ ++ // Block 5 ++ for (int32_t i = 55; i <= 104; ++i) { ++ __ addi_d(a2, a2, 0x300); ++ } ++ __ b(&L); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ uint64_t res = reinterpret_cast(f.Call(0, 0, 0, 0, 0)); ++ ++ return res; ++} ++ ++TEST(BCNEZ) { ++ CcTest::InitializeVM(); ++ struct TestCaseBcnez { ++ int fcc; ++ int32_t offset; ++ uint64_t expected_res; ++ }; ++ ++ // clang-format off ++ struct TestCaseBcnez tc[] = { ++ // fcc, offset, expected_res ++ { 1, -90, 0x24 }, ++ { 1, -27, 0x180 }, ++ { 1, 47, 0x700 }, ++ { 1, 70, 0x6900 }, ++ { 0, -27, 0 }, ++ { 0, 47, 0 }, ++ }; ++ // clang-format on ++ ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseBcnez); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ uint64_t res = run_bcnez(tc[i].fcc, tc[i].offset); ++ CHECK_EQ(tc[i].expected_res, res); ++ } ++} ++ ++TEST(jump_tables1) { ++ // Test jump tables with forward jumps. ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ const int kNumCases = 512; ++ int values[kNumCases]; ++ isolate->random_number_generator()->NextBytes(values, sizeof(values)); ++ Label labels[kNumCases]; ++ ++ __ addi_d(sp, sp, -8); ++ __ St_d(ra, MemOperand(sp, 0)); ++ __ Align(8); ++ ++ Label done; ++ { ++ __ BlockTrampolinePoolFor(kNumCases * 2 + 6); ++ __ pcaddi(ra, 2); ++ __ slli_d(t7, a0, 3); ++ __ add_d(t7, t7, ra); ++ __ Ld_d(t7, MemOperand(t7, 4 * kInstrSize)); ++ __ jirl(zero_reg, t7, 0); ++ __ nop(); ++ for (int i = 0; i < kNumCases; ++i) { ++ __ dd(&labels[i]); ++ } ++ } ++ ++ for (int i = 0; i < kNumCases; ++i) { ++ __ bind(&labels[i]); ++ __ lu12i_w(a2, (values[i] >> 12) & 0xFFFFF); ++ __ ori(a2, a2, values[i] & 0xFFF); ++ __ b(&done); ++ __ nop(); ++ } ++ ++ __ bind(&done); ++ __ Ld_d(ra, MemOperand(sp, 0)); ++ __ addi_d(sp, sp, 8); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CHECK_EQ(0, assm.UnboundLabelsCount()); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++#ifdef OBJECT_PRINT ++ code->Print(std::cout); ++#endif ++ auto f = GeneratedCode::FromCode(*code); ++ for (int i = 0; i < kNumCases; ++i) { ++ int64_t res = reinterpret_cast(f.Call(i, 0, 0, 0, 0)); ++ ::printf("f(%d) = %" PRId64 "\n", i, res); ++ CHECK_EQ((values[i]), static_cast(res)); ++ } ++} ++ ++TEST(jump_tables2) { ++ // Test jump tables with backward jumps. ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ const int kNumCases = 512; ++ int values[kNumCases]; ++ isolate->random_number_generator()->NextBytes(values, sizeof(values)); ++ Label labels[kNumCases]; ++ ++ __ addi_d(sp, sp, -8); ++ __ St_d(ra, MemOperand(sp, 0)); ++ ++ Label done, dispatch; ++ __ b(&dispatch); ++ __ nop(); ++ ++ for (int i = 0; i < kNumCases; ++i) { ++ __ bind(&labels[i]); ++ __ lu12i_w(a2, (values[i] >> 12) & 0xFFFFF); ++ __ ori(a2, a2, values[i] & 0xFFF); ++ __ b(&done); ++ __ nop(); ++ } ++ ++ __ Align(8); ++ __ bind(&dispatch); ++ { ++ __ BlockTrampolinePoolFor(kNumCases * 2 + 6); ++ __ pcaddi(ra, 2); ++ __ slli_d(t7, a0, 3); ++ __ add_d(t7, t7, ra); ++ __ Ld_d(t7, MemOperand(t7, 4 * kInstrSize)); ++ __ jirl(zero_reg, t7, 0); ++ __ nop(); ++ for (int i = 0; i < kNumCases; ++i) { ++ __ dd(&labels[i]); ++ } ++ } ++ ++ __ bind(&done); ++ __ Ld_d(ra, MemOperand(sp, 0)); ++ __ addi_d(sp, sp, 8); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++#ifdef OBJECT_PRINT ++ code->Print(std::cout); ++#endif ++ auto f = GeneratedCode::FromCode(*code); ++ for (int i = 0; i < kNumCases; ++i) { ++ int64_t res = reinterpret_cast(f.Call(i, 0, 0, 0, 0)); ++ ::printf("f(%d) = %" PRId64 "\n", i, res); ++ CHECK_EQ(values[i], res); ++ } ++} ++ ++TEST(jump_tables3) { ++ // Test jump tables with backward jumps and embedded heap objects. ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ const int kNumCases = 512; ++ Handle values[kNumCases]; ++ for (int i = 0; i < kNumCases; ++i) { ++ double value = isolate->random_number_generator()->NextDouble(); ++ values[i] = isolate->factory()->NewHeapNumber(value); ++ } ++ Label labels[kNumCases]; ++ Object obj; ++ int64_t imm64; ++ ++ __ addi_d(sp, sp, -8); ++ __ St_d(ra, MemOperand(sp, 0)); ++ ++ Label done, dispatch; ++ __ b(&dispatch); ++ __ nop(); ++ ++ for (int i = 0; i < kNumCases; ++i) { ++ __ bind(&labels[i]); ++ obj = *values[i]; ++ imm64 = obj.ptr(); ++ __ lu12i_w(a2, (imm64 >> 12) & 0xFFFFF); ++ __ ori(a2, a2, imm64 & 0xFFF); ++ __ lu32i_d(a2, (imm64 >> 32) & 0xFFFFF); ++ __ lu52i_d(a2, a2, (imm64 >> 52) & 0xFFF); ++ __ b(&done); ++ } ++ ++ __ Align(8); ++ __ bind(&dispatch); ++ { ++ __ BlockTrampolinePoolFor(kNumCases * 2 + 6); ++ __ pcaddi(ra, 2); ++ __ slli_d(t7, a0, 3); // In delay slot. ++ __ add_d(t7, t7, ra); ++ __ Ld_d(t7, MemOperand(t7, 4 * kInstrSize)); ++ __ jirl(zero_reg, t7, 0); ++ __ nop(); ++ for (int i = 0; i < kNumCases; ++i) { ++ __ dd(&labels[i]); ++ } ++ } ++ __ bind(&done); ++ __ Ld_d(ra, MemOperand(sp, 0)); ++ __ addi_d(sp, sp, 8); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++#ifdef OBJECT_PRINT ++ code->Print(std::cout); ++#endif ++ auto f = GeneratedCode::FromCode(*code); ++ for (int i = 0; i < kNumCases; ++i) { ++ Handle result( ++ Object(reinterpret_cast
(f.Call(i, 0, 0, 0, 0))), isolate); ++#ifdef OBJECT_PRINT ++ ::printf("f(%d) = ", i); ++ result->Print(std::cout); ++ ::printf("\n"); ++#endif ++ CHECK(values[i].is_identical_to(result)); ++ } ++} ++ ++uint64_t run_li_macro(int64_t imm, LiFlags mode, int32_t num_instr = 0) { ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ Label code_start; ++ __ bind(&code_start); ++ __ li(a2, imm, mode); ++ if (num_instr > 0) { ++ CHECK_EQ(assm.InstructionsGeneratedSince(&code_start), num_instr); ++ CHECK_EQ(__ InstrCountForLi64Bit(imm), num_instr); ++ } ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++#ifdef OBJECT_PRINT ++ code->Print(std::cout); ++#endif ++ auto f = GeneratedCode::FromCode(*code); ++ ++ uint64_t res = reinterpret_cast(f.Call(0, 0, 0, 0, 0)); ++ ++ return res; ++} ++ ++TEST(li_macro) { ++ CcTest::InitializeVM(); ++ ++ // Test li macro-instruction for border cases. ++ ++ struct TestCase_li { ++ uint64_t imm; ++ int32_t num_instr; ++ }; ++ // clang-format off ++ struct TestCase_li tc[] = { ++ // imm, num_instr ++ {0xFFFFFFFFFFFFF800, 1}, // min_int12 ++ // The test case above generates addi_d instruction. ++ // This is int12 value and we can load it using just addi_d. ++ { 0x800, 1}, // max_int12 + 1 ++ // Generates ori ++ // max_int12 + 1 is not int12 but is uint12, just use ori. ++ {0xFFFFFFFFFFFFF7FF, 2}, // min_int12 - 1 ++ // Generates lu12i + ori ++ // We load int32 value using lu12i_w + ori. ++ { 0x801, 1}, // max_int12 + 2 ++ // Generates ori ++ // Also an uint1 value, use ori. ++ { 0x00001000, 1}, // max_uint12 + 1 ++ // Generates lu12i_w ++ // Low 12 bits are 0, load value using lu12i_w. ++ { 0x00001001, 2}, // max_uint12 + 2 ++ // Generates lu12i_w + ori ++ // We have to generate two instructions in this case. ++ {0x00000000FFFFFFFF, 2}, // max_uint32 ++ // addi_w + lu32i_d ++ {0x00000000FFFFFFFE, 2}, // max_uint32 - 1 ++ // addi_w + lu32i_d ++ {0xFFFFFFFF80000000, 1}, // min_int32 ++ // lu12i_w ++ {0x0000000080000000, 2}, // max_int32 + 1 ++ // lu12i_w + lu32i_d ++ {0xFFFF0000FFFF8765, 3}, ++ // lu12i_w + ori + lu32i_d ++ {0x1234ABCD87654321, 4}, ++ // lu12i_w + ori + lu32i_d + lu52i_d ++ {0xFFFF789100000000, 2}, ++ // xor + lu32i_d ++ {0xF12F789100000000, 3}, ++ // xor + lu32i_d + lu52i_d ++ {0xF120000000000800, 2}, ++ // ori + lu52i_d ++ {0xFFF0000000000000, 1}, ++ // lu52i_d ++ {0xF100000000000000, 1}, ++ {0x0122000000000000, 2}, ++ {0x1234FFFF77654321, 4}, ++ {0x1230000077654321, 3}, ++ }; ++ // clang-format on ++ ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCase_li); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ CHECK_EQ(tc[i].imm, ++ run_li_macro(tc[i].imm, OPTIMIZE_SIZE, tc[i].num_instr)); ++ CHECK_EQ(tc[i].imm, run_li_macro(tc[i].imm, CONSTANT_SIZE)); ++ if (is_int48(tc[i].imm)) { ++ CHECK_EQ(tc[i].imm, run_li_macro(tc[i].imm, ADDRESS_LOAD)); ++ } ++ } ++} ++ ++TEST(FMIN_FMAX) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct TestFloat { ++ double a; ++ double b; ++ float c; ++ float d; ++ double e; ++ double f; ++ float g; ++ float h; ++ }; ++ ++ TestFloat test; ++ const double dnan = std::numeric_limits::quiet_NaN(); ++ const double dinf = std::numeric_limits::infinity(); ++ const double dminf = -std::numeric_limits::infinity(); ++ const float fnan = std::numeric_limits::quiet_NaN(); ++ const float finf = std::numeric_limits::infinity(); ++ const float fminf = -std::numeric_limits::infinity(); ++ const int kTableLength = 13; ++ ++ // clang-format off ++ double inputsa[kTableLength] = {2.0, 3.0, dnan, 3.0, -0.0, 0.0, dinf, ++ dnan, 42.0, dinf, dminf, dinf, dnan}; ++ double inputsb[kTableLength] = {3.0, 2.0, 3.0, dnan, 0.0, -0.0, dnan, ++ dinf, dinf, 42.0, dinf, dminf, dnan}; ++ double outputsdmin[kTableLength] = {2.0, 2.0, 3.0, 3.0, -0.0, ++ -0.0, dinf, dinf, 42.0, 42.0, ++ dminf, dminf, dnan}; ++ double outputsdmax[kTableLength] = {3.0, 3.0, 3.0, 3.0, 0.0, 0.0, dinf, ++ dinf, dinf, dinf, dinf, dinf, dnan}; ++ ++ float inputsc[kTableLength] = {2.0, 3.0, fnan, 3.0, -0.0, 0.0, finf, ++ fnan, 42.0, finf, fminf, finf, fnan}; ++ float inputsd[kTableLength] = {3.0, 2.0, 3.0, fnan, 0.0, -0.0, fnan, ++ finf, finf, 42.0, finf, fminf, fnan}; ++ float outputsfmin[kTableLength] = {2.0, 2.0, 3.0, 3.0, -0.0, ++ -0.0, finf, finf, 42.0, 42.0, ++ fminf, fminf, fnan}; ++ float outputsfmax[kTableLength] = {3.0, 3.0, 3.0, 3.0, 0.0, 0.0, finf, ++ finf, finf, finf, finf, finf, fnan}; ++ // clang-format on ++ ++ __ Fld_d(f8, MemOperand(a0, offsetof(TestFloat, a))); ++ __ Fld_d(f9, MemOperand(a0, offsetof(TestFloat, b))); ++ __ Fld_s(f10, MemOperand(a0, offsetof(TestFloat, c))); ++ __ Fld_s(f11, MemOperand(a0, offsetof(TestFloat, d))); ++ __ fmin_d(f12, f8, f9); ++ __ fmax_d(f13, f8, f9); ++ __ fmin_s(f14, f10, f11); ++ __ fmax_s(f15, f10, f11); ++ __ Fst_d(f12, MemOperand(a0, offsetof(TestFloat, e))); ++ __ Fst_d(f13, MemOperand(a0, offsetof(TestFloat, f))); ++ __ Fst_s(f14, MemOperand(a0, offsetof(TestFloat, g))); ++ __ Fst_s(f15, MemOperand(a0, offsetof(TestFloat, h))); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ for (int i = 4; i < kTableLength; i++) { ++ test.a = inputsa[i]; ++ test.b = inputsb[i]; ++ test.c = inputsc[i]; ++ test.d = inputsd[i]; ++ ++ f.Call(&test, 0, 0, 0, 0); ++ ++ CHECK_EQ(0, memcmp(&test.e, &outputsdmin[i], sizeof(test.e))); ++ CHECK_EQ(0, memcmp(&test.f, &outputsdmax[i], sizeof(test.f))); ++ CHECK_EQ(0, memcmp(&test.g, &outputsfmin[i], sizeof(test.g))); ++ CHECK_EQ(0, memcmp(&test.h, &outputsfmax[i], sizeof(test.h))); ++ } ++} ++ ++TEST(FMINA_FMAXA) { ++ const int kTableLength = 23; ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ const double dnan = std::numeric_limits::quiet_NaN(); ++ const double dinf = std::numeric_limits::infinity(); ++ const double dminf = -std::numeric_limits::infinity(); ++ const float fnan = std::numeric_limits::quiet_NaN(); ++ const float finf = std::numeric_limits::infinity(); ++ const float fminf = std::numeric_limits::infinity(); ++ ++ struct TestFloat { ++ double a; ++ double b; ++ double resd1; ++ double resd2; ++ float c; ++ float d; ++ float resf1; ++ float resf2; ++ }; ++ ++ TestFloat test; ++ // clang-format off ++ double inputsa[kTableLength] = { ++ 5.3, 4.8, 6.1, 9.8, 9.8, 9.8, -10.0, -8.9, -9.8, -10.0, -8.9, -9.8, ++ dnan, 3.0, -0.0, 0.0, dinf, dnan, 42.0, dinf, dminf, dinf, dnan}; ++ double inputsb[kTableLength] = { ++ 4.8, 5.3, 6.1, -10.0, -8.9, -9.8, 9.8, 9.8, 9.8, -9.8, -11.2, -9.8, ++ 3.0, dnan, 0.0, -0.0, dnan, dinf, dinf, 42.0, dinf, dminf, dnan}; ++ double resd1[kTableLength] = { ++ 4.8, 4.8, 6.1, 9.8, -8.9, -9.8, 9.8, -8.9, -9.8, -9.8, -8.9, -9.8, ++ 3.0, 3.0, -0.0, -0.0, dinf, dinf, 42.0, 42.0, dminf, dminf, dnan}; ++ double resd2[kTableLength] = { ++ 5.3, 5.3, 6.1, -10.0, 9.8, 9.8, -10.0, 9.8, 9.8, -10.0, -11.2, -9.8, ++ 3.0, 3.0, 0.0, 0.0, dinf, dinf, dinf, dinf, dinf, dinf, dnan}; ++ float inputsc[kTableLength] = { ++ 5.3, 4.8, 6.1, 9.8, 9.8, 9.8, -10.0, -8.9, -9.8, -10.0, -8.9, -9.8, ++ fnan, 3.0, -0.0, 0.0, finf, fnan, 42.0, finf, fminf, finf, fnan}; ++ float inputsd[kTableLength] = { ++ 4.8, 5.3, 6.1, -10.0, -8.9, -9.8, 9.8, 9.8, 9.8, -9.8, -11.2, -9.8, ++ 3.0, fnan, -0.0, 0.0, fnan, finf, finf, 42.0, finf, fminf, fnan}; ++ float resf1[kTableLength] = { ++ 4.8, 4.8, 6.1, 9.8, -8.9, -9.8, 9.8, -8.9, -9.8, -9.8, -8.9, -9.8, ++ 3.0, 3.0, -0.0, -0.0, finf, finf, 42.0, 42.0, fminf, fminf, fnan}; ++ float resf2[kTableLength] = { ++ 5.3, 5.3, 6.1, -10.0, 9.8, 9.8, -10.0, 9.8, 9.8, -10.0, -11.2, -9.8, ++ 3.0, 3.0, 0.0, 0.0, finf, finf, finf, finf, finf, finf, fnan}; ++ // clang-format on ++ ++ __ Fld_d(f8, MemOperand(a0, offsetof(TestFloat, a))); ++ __ Fld_d(f9, MemOperand(a0, offsetof(TestFloat, b))); ++ __ Fld_s(f10, MemOperand(a0, offsetof(TestFloat, c))); ++ __ Fld_s(f11, MemOperand(a0, offsetof(TestFloat, d))); ++ __ fmina_d(f12, f8, f9); ++ __ fmaxa_d(f13, f8, f9); ++ __ fmina_s(f14, f10, f11); ++ __ fmaxa_s(f15, f10, f11); ++ __ Fst_d(f12, MemOperand(a0, offsetof(TestFloat, resd1))); ++ __ Fst_d(f13, MemOperand(a0, offsetof(TestFloat, resd2))); ++ __ Fst_s(f14, MemOperand(a0, offsetof(TestFloat, resf1))); ++ __ Fst_s(f15, MemOperand(a0, offsetof(TestFloat, resf2))); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ for (int i = 0; i < kTableLength; i++) { ++ test.a = inputsa[i]; ++ test.b = inputsb[i]; ++ test.c = inputsc[i]; ++ test.d = inputsd[i]; ++ f.Call(&test, 0, 0, 0, 0); ++ if (i < kTableLength - 1) { ++ CHECK_EQ(test.resd1, resd1[i]); ++ CHECK_EQ(test.resd2, resd2[i]); ++ CHECK_EQ(test.resf1, resf1[i]); ++ CHECK_EQ(test.resf2, resf2[i]); ++ } else { ++ CHECK(std::isnan(test.resd1)); ++ CHECK(std::isnan(test.resd2)); ++ CHECK(std::isnan(test.resf1)); ++ CHECK(std::isnan(test.resf2)); ++ } ++ } ++} ++ ++TEST(FADD) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct TestFloat { ++ double a; ++ double b; ++ double c; ++ float d; ++ float e; ++ float f; ++ }; ++ ++ TestFloat test; ++ ++ __ Fld_d(f8, MemOperand(a0, offsetof(TestFloat, a))); ++ __ Fld_d(f9, MemOperand(a0, offsetof(TestFloat, b))); ++ __ fadd_d(f10, f8, f9); ++ __ Fst_d(f10, MemOperand(a0, offsetof(TestFloat, c))); ++ ++ __ Fld_s(f11, MemOperand(a0, offsetof(TestFloat, d))); ++ __ Fld_s(f12, MemOperand(a0, offsetof(TestFloat, e))); ++ __ fadd_s(f13, f11, f12); ++ __ Fst_s(f13, MemOperand(a0, offsetof(TestFloat, f))); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ test.a = 2.0; ++ test.b = 3.0; ++ test.d = 2.0; ++ test.e = 3.0; ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.c, 5.0); ++ CHECK_EQ(test.f, 5.0); ++ ++ test.a = std::numeric_limits::max(); ++ test.b = -std::numeric_limits::max(); // lowest() ++ test.d = std::numeric_limits::max(); ++ test.e = -std::numeric_limits::max(); // lowest() ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.c, 0.0); ++ CHECK_EQ(test.f, 0.0); ++ ++ test.a = std::numeric_limits::max(); ++ test.b = std::numeric_limits::max(); ++ test.d = std::numeric_limits::max(); ++ test.e = std::numeric_limits::max(); ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK(!std::isfinite(test.c)); ++ CHECK(!std::isfinite(test.f)); ++ ++ test.a = 5.0; ++ test.b = std::numeric_limits::signaling_NaN(); ++ test.d = 5.0; ++ test.e = std::numeric_limits::signaling_NaN(); ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK(std::isnan(test.c)); ++ CHECK(std::isnan(test.f)); ++} ++ ++TEST(FSUB) { ++ const int kTableLength = 12; ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct TestFloat { ++ float a; ++ float b; ++ float resultS; ++ double c; ++ double d; ++ double resultD; ++ }; ++ ++ TestFloat test; ++ ++ // clang-format off ++ double inputfs_D[kTableLength] = { ++ 5.3, 4.8, 2.9, -5.3, -4.8, -2.9, ++ 5.3, 4.8, 2.9, -5.3, -4.8, -2.9 ++ }; ++ double inputft_D[kTableLength] = { ++ 4.8, 5.3, 2.9, 4.8, 5.3, 2.9, ++ -4.8, -5.3, -2.9, -4.8, -5.3, -2.9 ++ }; ++ double outputs_D[kTableLength] = { ++ 0.5, -0.5, 0.0, -10.1, -10.1, -5.8, ++ 10.1, 10.1, 5.8, -0.5, 0.5, 0.0 ++ }; ++ float inputfs_S[kTableLength] = { ++ 5.3, 4.8, 2.9, -5.3, -4.8, -2.9, ++ 5.3, 4.8, 2.9, -5.3, -4.8, -2.9 ++ }; ++ float inputft_S[kTableLength] = { ++ 4.8, 5.3, 2.9, 4.8, 5.3, 2.9, ++ -4.8, -5.3, -2.9, -4.8, -5.3, -2.9 ++ }; ++ float outputs_S[kTableLength] = { ++ 0.5, -0.5, 0.0, -10.1, -10.1, -5.8, ++ 10.1, 10.1, 5.8, -0.5, 0.5, 0.0 ++ }; ++ // clang-format on ++ ++ __ Fld_s(f8, MemOperand(a0, offsetof(TestFloat, a))); ++ __ Fld_s(f9, MemOperand(a0, offsetof(TestFloat, b))); ++ __ Fld_d(f10, MemOperand(a0, offsetof(TestFloat, c))); ++ __ Fld_d(f11, MemOperand(a0, offsetof(TestFloat, d))); ++ __ fsub_s(f12, f8, f9); ++ __ fsub_d(f13, f10, f11); ++ __ Fst_s(f12, MemOperand(a0, offsetof(TestFloat, resultS))); ++ __ Fst_d(f13, MemOperand(a0, offsetof(TestFloat, resultD))); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ for (int i = 0; i < kTableLength; i++) { ++ test.a = inputfs_S[i]; ++ test.b = inputft_S[i]; ++ test.c = inputfs_D[i]; ++ test.d = inputft_D[i]; ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.resultS, outputs_S[i]); ++ CHECK_EQ(test.resultD, outputs_D[i]); ++ } ++} ++ ++TEST(FMUL) { ++ const int kTableLength = 4; ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct TestFloat { ++ float a; ++ float b; ++ float resultS; ++ double c; ++ double d; ++ double resultD; ++ }; ++ ++ TestFloat test; ++ // clang-format off ++ double inputfs_D[kTableLength] = { ++ 5.3, -5.3, 5.3, -2.9 ++ }; ++ double inputft_D[kTableLength] = { ++ 4.8, 4.8, -4.8, -0.29 ++ }; ++ ++ float inputfs_S[kTableLength] = { ++ 5.3, -5.3, 5.3, -2.9 ++ }; ++ float inputft_S[kTableLength] = { ++ 4.8, 4.8, -4.8, -0.29 ++ }; ++ // clang-format on ++ __ Fld_s(f8, MemOperand(a0, offsetof(TestFloat, a))); ++ __ Fld_s(f9, MemOperand(a0, offsetof(TestFloat, b))); ++ __ Fld_d(f10, MemOperand(a0, offsetof(TestFloat, c))); ++ __ Fld_d(f11, MemOperand(a0, offsetof(TestFloat, d))); ++ __ fmul_s(f12, f8, f9); ++ __ fmul_d(f13, f10, f11); ++ __ Fst_s(f12, MemOperand(a0, offsetof(TestFloat, resultS))); ++ __ Fst_d(f13, MemOperand(a0, offsetof(TestFloat, resultD))); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ for (int i = 0; i < kTableLength; i++) { ++ test.a = inputfs_S[i]; ++ test.b = inputft_S[i]; ++ test.c = inputfs_D[i]; ++ test.d = inputft_D[i]; ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.resultS, inputfs_S[i] * inputft_S[i]); ++ CHECK_EQ(test.resultD, inputfs_D[i] * inputft_D[i]); ++ } ++} ++ ++TEST(FDIV) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct Test { ++ double dOp1; ++ double dOp2; ++ double dRes; ++ float fOp1; ++ float fOp2; ++ float fRes; ++ }; ++ ++ Test test; ++ ++ __ movfcsr2gr(a4); ++ __ movgr2fcsr(zero_reg); ++ ++ __ Fld_d(f8, MemOperand(a0, offsetof(Test, dOp1))); ++ __ Fld_d(f9, MemOperand(a0, offsetof(Test, dOp2))); ++ __ Fld_s(f10, MemOperand(a0, offsetof(Test, fOp1))); ++ __ Fld_s(f11, MemOperand(a0, offsetof(Test, fOp2))); ++ __ fdiv_d(f12, f8, f9); ++ __ fdiv_s(f13, f10, f11); ++ __ Fst_d(f12, MemOperand(a0, offsetof(Test, dRes))); ++ __ Fst_s(f13, MemOperand(a0, offsetof(Test, fRes))); ++ ++ __ movgr2fcsr(a4); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ f.Call(&test, 0, 0, 0, 0); ++ const int test_size = 3; ++ // clang-format off ++ double dOp1[test_size] = { ++ 5.0, DBL_MAX, DBL_MAX}; ++ ++ double dOp2[test_size] = { ++ 2.0, 2.0, -DBL_MAX}; ++ ++ double dRes[test_size] = { ++ 2.5, DBL_MAX / 2.0, -1.0}; ++ ++ float fOp1[test_size] = { ++ 5.0, FLT_MAX, FLT_MAX}; ++ ++ float fOp2[test_size] = { ++ 2.0, 2.0, -FLT_MAX}; ++ ++ float fRes[test_size] = { ++ 2.5, FLT_MAX / 2.0, -1.0}; ++ // clang-format on ++ ++ for (int i = 0; i < test_size; i++) { ++ test.dOp1 = dOp1[i]; ++ test.dOp2 = dOp2[i]; ++ test.fOp1 = fOp1[i]; ++ test.fOp2 = fOp2[i]; ++ ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.dRes, dRes[i]); ++ CHECK_EQ(test.fRes, fRes[i]); ++ } ++ ++ test.dOp1 = DBL_MAX; ++ test.dOp2 = -0.0; ++ test.fOp1 = FLT_MAX; ++ test.fOp2 = -0.0; ++ ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK(!std::isfinite(test.dRes)); ++ CHECK(!std::isfinite(test.fRes)); ++ ++ test.dOp1 = 0.0; ++ test.dOp2 = -0.0; ++ test.fOp1 = 0.0; ++ test.fOp2 = -0.0; ++ ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK(std::isnan(test.dRes)); ++ CHECK(std::isnan(test.fRes)); ++ ++ test.dOp1 = std::numeric_limits::quiet_NaN(); ++ test.dOp2 = -5.0; ++ test.fOp1 = std::numeric_limits::quiet_NaN(); ++ test.fOp2 = -5.0; ++ ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK(std::isnan(test.dRes)); ++ CHECK(std::isnan(test.fRes)); ++} ++ ++TEST(FABS) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct TestFloat { ++ double a; ++ float b; ++ }; ++ ++ TestFloat test; ++ ++ __ movfcsr2gr(a4); ++ __ movgr2fcsr(zero_reg); ++ ++ __ Fld_d(f8, MemOperand(a0, offsetof(TestFloat, a))); ++ __ Fld_s(f9, MemOperand(a0, offsetof(TestFloat, b))); ++ __ fabs_d(f10, f8); ++ __ fabs_s(f11, f9); ++ __ Fst_d(f10, MemOperand(a0, offsetof(TestFloat, a))); ++ __ Fst_s(f11, MemOperand(a0, offsetof(TestFloat, b))); ++ ++ __ movgr2fcsr(a4); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ test.a = -2.0; ++ test.b = -2.0; ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.a, 2.0); ++ CHECK_EQ(test.b, 2.0); ++ ++ test.a = 2.0; ++ test.b = 2.0; ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.a, 2.0); ++ CHECK_EQ(test.b, 2.0); ++ ++ // Testing biggest positive number ++ test.a = std::numeric_limits::max(); ++ test.b = std::numeric_limits::max(); ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.a, std::numeric_limits::max()); ++ CHECK_EQ(test.b, std::numeric_limits::max()); ++ ++ // Testing smallest negative number ++ test.a = -std::numeric_limits::max(); // lowest() ++ test.b = -std::numeric_limits::max(); // lowest() ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.a, std::numeric_limits::max()); ++ CHECK_EQ(test.b, std::numeric_limits::max()); ++ ++ // Testing smallest positive number ++ test.a = -std::numeric_limits::min(); ++ test.b = -std::numeric_limits::min(); ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.a, std::numeric_limits::min()); ++ CHECK_EQ(test.b, std::numeric_limits::min()); ++ ++ // Testing infinity ++ test.a = ++ -std::numeric_limits::max() / std::numeric_limits::min(); ++ test.b = ++ -std::numeric_limits::max() / std::numeric_limits::min(); ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.a, std::numeric_limits::max() / ++ std::numeric_limits::min()); ++ CHECK_EQ(test.b, std::numeric_limits::max() / ++ std::numeric_limits::min()); ++ ++ test.a = std::numeric_limits::quiet_NaN(); ++ test.b = std::numeric_limits::quiet_NaN(); ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK(std::isnan(test.a)); ++ CHECK(std::isnan(test.b)); ++ ++ test.a = std::numeric_limits::signaling_NaN(); ++ test.b = std::numeric_limits::signaling_NaN(); ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK(std::isnan(test.a)); ++ CHECK(std::isnan(test.b)); ++} ++ ++template ++struct TestCaseMaddMsub { ++ T fj, fk, fa, fd_fmadd, fd_fmsub, fd_fnmadd, fd_fnmsub; ++}; ++ ++template ++void helper_fmadd_fmsub_fnmadd_fnmsub(F func) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ T x = std::sqrt(static_cast(2.0)); ++ T y = std::sqrt(static_cast(3.0)); ++ T z = std::sqrt(static_cast(5.0)); ++ T x2 = 11.11, y2 = 22.22, z2 = 33.33; ++ // clang-format off ++ TestCaseMaddMsub test_cases[] = { ++ {x, y, z, 0.0, 0.0, 0.0, 0.0}, ++ {x, y, -z, 0.0, 0.0, 0.0, 0.0}, ++ {x, -y, z, 0.0, 0.0, 0.0, 0.0}, ++ {x, -y, -z, 0.0, 0.0, 0.0, 0.0}, ++ {-x, y, z, 0.0, 0.0, 0.0, 0.0}, ++ {-x, y, -z, 0.0, 0.0, 0.0, 0.0}, ++ {-x, -y, z, 0.0, 0.0, 0.0, 0.0}, ++ {-x, -y, -z, 0.0, 0.0, 0.0, 0.0}, ++ {-3.14, 0.2345, -123.000056, 0.0, 0.0, 0.0, 0.0}, ++ {7.3, -23.257, -357.1357, 0.0, 0.0, 0.0, 0.0}, ++ {x2, y2, z2, 0.0, 0.0, 0.0, 0.0}, ++ {x2, y2, -z2, 0.0, 0.0, 0.0, 0.0}, ++ {x2, -y2, z2, 0.0, 0.0, 0.0, 0.0}, ++ {x2, -y2, -z2, 0.0, 0.0, 0.0, 0.0}, ++ {-x2, y2, z2, 0.0, 0.0, 0.0, 0.0}, ++ {-x2, y2, -z2, 0.0, 0.0, 0.0, 0.0}, ++ {-x2, -y2, z2, 0.0, 0.0, 0.0, 0.0}, ++ {-x2, -y2, -z2, 0.0, 0.0, 0.0, 0.0}, ++ }; ++ // clang-format on ++ if (std::is_same::value) { ++ __ Fld_s(f8, MemOperand(a0, offsetof(TestCaseMaddMsub, fj))); ++ __ Fld_s(f9, MemOperand(a0, offsetof(TestCaseMaddMsub, fk))); ++ __ Fld_s(f10, MemOperand(a0, offsetof(TestCaseMaddMsub, fa))); ++ } else if (std::is_same::value) { ++ __ Fld_d(f8, MemOperand(a0, offsetof(TestCaseMaddMsub, fj))); ++ __ Fld_d(f9, MemOperand(a0, offsetof(TestCaseMaddMsub, fk))); ++ __ Fld_d(f10, MemOperand(a0, offsetof(TestCaseMaddMsub, fa))); ++ } else { ++ UNREACHABLE(); ++ } ++ ++ func(assm); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ ++ const size_t kTableLength = sizeof(test_cases) / sizeof(TestCaseMaddMsub); ++ TestCaseMaddMsub tc; ++ for (size_t i = 0; i < kTableLength; i++) { ++ tc.fj = test_cases[i].fj; ++ tc.fk = test_cases[i].fk; ++ tc.fa = test_cases[i].fa; ++ ++ f.Call(&tc, 0, 0, 0, 0); ++ ++ T res_fmadd; ++ T res_fmsub; ++ T res_fnmadd; ++ T res_fnmsub; ++ res_fmadd = std::fma(tc.fj, tc.fk, tc.fa); ++ res_fmsub = std::fma(tc.fj, tc.fk, -tc.fa); ++ res_fnmadd = -std::fma(tc.fj, tc.fk, tc.fa); ++ res_fnmsub = -std::fma(tc.fj, tc.fk, -tc.fa); ++ ++ CHECK_EQ(tc.fd_fmadd, res_fmadd); ++ CHECK_EQ(tc.fd_fmsub, res_fmsub); ++ CHECK_EQ(tc.fd_fnmadd, res_fnmadd); ++ CHECK_EQ(tc.fd_fnmsub, res_fnmsub); ++ } ++} ++ ++TEST(FMADD_FMSUB_FNMADD_FNMSUB_S) { ++ helper_fmadd_fmsub_fnmadd_fnmsub([](MacroAssembler& assm) { ++ __ fmadd_s(f11, f8, f9, f10); ++ __ Fst_s(f11, MemOperand(a0, offsetof(TestCaseMaddMsub, fd_fmadd))); ++ __ fmsub_s(f12, f8, f9, f10); ++ __ Fst_s(f12, MemOperand(a0, offsetof(TestCaseMaddMsub, fd_fmsub))); ++ __ fnmadd_s(f13, f8, f9, f10); ++ __ Fst_s(f13, MemOperand(a0, offsetof(TestCaseMaddMsub, fd_fnmadd))); ++ __ fnmsub_s(f14, f8, f9, f10); ++ __ Fst_s(f14, MemOperand(a0, offsetof(TestCaseMaddMsub, fd_fnmsub))); ++ }); ++} ++ ++TEST(FMADD_FMSUB_FNMADD_FNMSUB_D) { ++ helper_fmadd_fmsub_fnmadd_fnmsub([](MacroAssembler& assm) { ++ __ fmadd_d(f11, f8, f9, f10); ++ __ Fst_d(f11, MemOperand(a0, offsetof(TestCaseMaddMsub, fd_fmadd))); ++ __ fmsub_d(f12, f8, f9, f10); ++ __ Fst_d(f12, MemOperand(a0, offsetof(TestCaseMaddMsub, fd_fmsub))); ++ __ fnmadd_d(f13, f8, f9, f10); ++ __ Fst_d(f13, ++ MemOperand(a0, offsetof(TestCaseMaddMsub, fd_fnmadd))); ++ __ fnmsub_d(f14, f8, f9, f10); ++ __ Fst_d(f14, ++ MemOperand(a0, offsetof(TestCaseMaddMsub, fd_fnmsub))); ++ }); ++} ++ ++/* ++TEST(FSQRT_FRSQRT_FRECIP) { ++ const int kTableLength = 4; ++ const double deltaDouble = 2E-15; ++ const float deltaFloat = 2E-7; ++ const float sqrt2_s = sqrt(2); ++ const double sqrt2_d = sqrt(2); ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ struct TestFloat { ++ float a; ++ float resultS1; ++ float resultS2; ++ float resultS3; ++ double b; ++ double resultD1; ++ double resultD2; ++ double resultD3; ++ }; ++ TestFloat test; ++ // clang-format off ++ double inputs_D[kTableLength] = { ++ 0.0L, 4.0L, 2.0L, 4e-28L ++ }; ++ ++ double outputs_D[kTableLength] = { ++ 0.0L, 2.0L, sqrt2_d, 2e-14L ++ }; ++ float inputs_S[kTableLength] = { ++ 0.0, 4.0, 2.0, 4e-28 ++ }; ++ ++ float outputs_S[kTableLength] = { ++ 0.0, 2.0, sqrt2_s, 2e-14 ++ }; ++ // clang-format on ++ __ Fld_s(f8, MemOperand(a0, offsetof(TestFloat, a))); ++ __ Fld_d(f9, MemOperand(a0, offsetof(TestFloat, b))); ++ __ fsqrt_s(f10, f8); ++ __ fsqrt_d(f11, f9); ++ __ frsqrt_s(f12, f8); ++ __ frsqrt_d(f13, f9); ++ __ frecip_s(f14, f8); ++ __ frecip_d(f15, f9); ++ __ Fst_s(f10, MemOperand(a0, offsetof(TestFloat, resultS1))); ++ __ Fst_d(f11, MemOperand(a0, offsetof(TestFloat, resultD1))); ++ __ Fst_s(f12, MemOperand(a0, offsetof(TestFloat, resultS2))); ++ __ Fst_d(f13, MemOperand(a0, offsetof(TestFloat, resultD2))); ++ __ Fst_s(f14, MemOperand(a0, offsetof(TestFloat, resultS3))); ++ __ Fst_d(f15, MemOperand(a0, offsetof(TestFloat, resultD3))); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ ++ for (int i = 0; i < kTableLength; i++) { ++ float f1; ++ double d1; ++ test.a = inputs_S[i]; ++ test.b = inputs_D[i]; ++ ++ f.Call(&test, 0, 0, 0, 0); ++ ++ CHECK_EQ(test.resultS1, outputs_S[i]); ++ CHECK_EQ(test.resultD1, outputs_D[i]); ++ ++ if (i != 0) { ++ f1 = test.resultS2 - 1.0F/outputs_S[i]; ++ f1 = (f1 < 0) ? f1 : -f1; ++ CHECK(f1 <= deltaFloat); ++ d1 = test.resultD2 - 1.0L/outputs_D[i]; ++ d1 = (d1 < 0) ? d1 : -d1; ++ CHECK(d1 <= deltaDouble); ++ f1 = test.resultS3 - 1.0F/inputs_S[i]; ++ f1 = (f1 < 0) ? f1 : -f1; ++ CHECK(f1 <= deltaFloat); ++ d1 = test.resultD3 - 1.0L/inputs_D[i]; ++ d1 = (d1 < 0) ? d1 : -d1; ++ CHECK(d1 <= deltaDouble); ++ } else { ++ CHECK_EQ(test.resultS2, 1.0F/outputs_S[i]); ++ CHECK_EQ(test.resultD2, 1.0L/outputs_D[i]); ++ CHECK_EQ(test.resultS3, 1.0F/inputs_S[i]); ++ CHECK_EQ(test.resultD3, 1.0L/inputs_D[i]); ++ } ++ } ++}*/ ++ ++TEST(LA15) { ++ // Test chaining of label usages within instructions (issue 1644). ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ Assembler assm(AssemblerOptions{}); ++ ++ Label target; ++ __ beq(a0, a1, &target); ++ __ nop(); ++ __ bne(a0, a1, &target); ++ __ nop(); ++ __ bind(&target); ++ __ nop(); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ f.Call(1, 1, 0, 0, 0); ++} ++ ++TEST(Trampoline) { ++ static const int kMaxBranchOffset = (1 << (18 - 1)) - 1; ++ ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ Label done; ++ size_t nr_calls = kMaxBranchOffset / kInstrSize + 5; ++ ++ __ xor_(a2, a2, a2); ++ __ BranchShort(&done, eq, a0, Operand(a1)); ++ for (size_t i = 0; i < nr_calls; ++i) { ++ __ addi_d(a2, a2, 1); ++ } ++ __ bind(&done); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ ++ int64_t res = reinterpret_cast(f.Call(42, 42, 0, 0, 0)); ++ CHECK_EQ(0, res); ++} ++ ++#undef __ ++ ++} // namespace internal ++} // namespace v8 +diff --git a/src/3rdparty/chromium/v8/test/cctest/test-disasm-la64.cc b/src/3rdparty/chromium/v8/test/cctest/test-disasm-la64.cc +new file mode 100644 +index 00000000000..36e46dc2131 +--- /dev/null ++++ b/src/3rdparty/chromium/v8/test/cctest/test-disasm-la64.cc +@@ -0,0 +1,966 @@ ++// Copyright 2012 the V8 project authors. All rights reserved. ++// Redistribution and use in source and binary forms, with or without ++// modification, are permitted provided that the following conditions are ++// met: ++// ++// * Redistributions of source code must retain the above copyright ++// notice, this list of conditions and the following disclaimer. ++// * Redistributions in binary form must reproduce the above ++// copyright notice, this list of conditions and the following ++// disclaimer in the documentation and/or other materials provided ++// with the distribution. ++// * Neither the name of Google Inc. nor the names of its ++// contributors may be used to endorse or promote products derived ++// from this software without specific prior written permission. ++// ++// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++// ++ ++#include ++#include ++ ++#include "src/init/v8.h" ++ ++#include "src/codegen/macro-assembler.h" ++#include "src/debug/debug.h" ++#include "src/diagnostics/disasm.h" ++#include "src/diagnostics/disassembler.h" ++#include "src/execution/frames-inl.h" ++#include "test/cctest/cctest.h" ++ ++namespace v8 { ++namespace internal { ++ ++bool DisassembleAndCompare(byte* pc, const char* compare_string) { ++ disasm::NameConverter converter; ++ disasm::Disassembler disasm(converter); ++ EmbeddedVector disasm_buffer; ++ ++ /* if (prev_instr_compact_branch) { ++ disasm.InstructionDecode(disasm_buffer, pc); ++ pc += 4; ++ }*/ ++ ++ disasm.InstructionDecode(disasm_buffer, pc); ++ ++ if (strcmp(compare_string, disasm_buffer.begin()) != 0) { ++ fprintf(stderr, ++ "expected: \n" ++ "%s\n" ++ "disassembled: \n" ++ "%s\n\n", ++ compare_string, disasm_buffer.begin()); ++ return false; ++ } ++ return true; ++} ++ ++// Set up V8 to a state where we can at least run the assembler and ++// disassembler. Declare the variables and allocate the data structures used ++// in the rest of the macros. ++#define SET_UP() \ ++ CcTest::InitializeVM(); \ ++ Isolate* isolate = CcTest::i_isolate(); \ ++ HandleScope scope(isolate); \ ++ byte* buffer = reinterpret_cast(malloc(4 * 1024)); \ ++ Assembler assm(AssemblerOptions{}, \ ++ ExternalAssemblerBuffer(buffer, 4 * 1024)); \ ++ bool failure = false; ++ ++// This macro assembles one instruction using the preallocated assembler and ++// disassembles the generated instruction, comparing the output to the expected ++// value. If the comparison fails an error message is printed, but the test ++// continues to run until the end. ++#define COMPARE(asm_, compare_string) \ ++ { \ ++ int pc_offset = assm.pc_offset(); \ ++ byte* progcounter = &buffer[pc_offset]; \ ++ assm.asm_; \ ++ if (!DisassembleAndCompare(progcounter, compare_string)) failure = true; \ ++ } ++ ++// Verify that all invocations of the COMPARE macro passed successfully. ++// Exit with a failure if at least one of the tests failed. ++#define VERIFY_RUN() \ ++ if (failure) { \ ++ FATAL("LA64 Disassembler tests failed.\n"); \ ++ } ++ ++#define COMPARE_PC_REL(asm_, compare_string, offset) \ ++ { \ ++ int pc_offset = assm.pc_offset(); \ ++ byte* progcounter = &buffer[pc_offset]; \ ++ char str_with_address[100]; \ ++ printf("%p\n", static_cast(progcounter)); \ ++ snprintf(str_with_address, sizeof(str_with_address), "%s -> %p", \ ++ compare_string, static_cast(progcounter + (offset * 4))); \ ++ assm.asm_; \ ++ if (!DisassembleAndCompare(progcounter, str_with_address)) failure = true; \ ++ } ++ ++TEST(TypeOp6) { ++ SET_UP(); ++ ++ COMPARE(jirl(ra, t7, 0), "4c000261 jirl ra, t7, 0"); ++ COMPARE(jirl(ra, t7, 32767), "4dfffe61 jirl ra, t7, 32767"); ++ COMPARE(jirl(ra, t7, -32768), "4e000261 jirl ra, t7, -32768"); ++ ++ VERIFY_RUN(); ++} ++ ++TEST(TypeOp6PC) { ++ SET_UP(); ++ ++ COMPARE_PC_REL(beqz(t7, 1048575), "43fffe6f beqz t7, 1048575", ++ 1048575); ++ COMPARE_PC_REL(beqz(t0, -1048576), "40000190 beqz t0, -1048576", ++ -1048576); ++ COMPARE_PC_REL(beqz(t1, 0), "400001a0 beqz t1, 0", 0); ++ ++ COMPARE_PC_REL(bnez(a2, 1048575), "47fffccf bnez a2, 1048575", ++ 1048575); ++ COMPARE_PC_REL(bnez(s3, -1048576), "44000350 bnez s3, -1048576", ++ -1048576); ++ COMPARE_PC_REL(bnez(t8, 0), "44000280 bnez t8, 0", 0); ++ ++ COMPARE_PC_REL(bceqz(FCC0, 1048575), "4bfffc0f bceqz fcc0, 1048575", ++ 1048575); ++ COMPARE_PC_REL(bceqz(FCC0, -1048576), ++ "48000010 bceqz fcc0, -1048576", -1048576); ++ COMPARE_PC_REL(bceqz(FCC0, 0), "48000000 bceqz fcc0, 0", 0); ++ ++ COMPARE_PC_REL(bcnez(FCC0, 1048575), "4bfffd0f bcnez fcc0, 1048575", ++ 1048575); ++ COMPARE_PC_REL(bcnez(FCC0, -1048576), ++ "48000110 bcnez fcc0, -1048576", -1048576); ++ COMPARE_PC_REL(bcnez(FCC0, 0), "48000100 bcnez fcc0, 0", 0); ++ ++ COMPARE_PC_REL(b(33554431), "53fffdff b 33554431", 33554431); ++ COMPARE_PC_REL(b(-33554432), "50000200 b -33554432", -33554432); ++ COMPARE_PC_REL(b(0), "50000000 b 0", 0); ++ ++ COMPARE_PC_REL(beq(t0, a6, 32767), "59fffd8a beq t0, a6, 32767", ++ 32767); ++ COMPARE_PC_REL(beq(t1, a0, -32768), "5a0001a4 beq t1, a0, -32768", ++ -32768); ++ COMPARE_PC_REL(beq(a4, t1, 0), "5800010d beq a4, t1, 0", 0); ++ ++ COMPARE_PC_REL(bne(a3, a4, 32767), "5dfffce8 bne a3, a4, 32767", ++ 32767); ++ COMPARE_PC_REL(bne(a6, a5, -32768), "5e000149 bne a6, a5, -32768", ++ -32768); ++ COMPARE_PC_REL(bne(a4, a5, 0), "5c000109 bne a4, a5, 0", 0); ++ ++ COMPARE_PC_REL(blt(a4, a6, 32767), "61fffd0a blt a4, a6, 32767", ++ 32767); ++ COMPARE_PC_REL(blt(a4, a5, -32768), "62000109 blt a4, a5, -32768", ++ -32768); ++ COMPARE_PC_REL(blt(a4, a6, 0), "6000010a blt a4, a6, 0", 0); ++ ++ COMPARE_PC_REL(bge(s7, a5, 32767), "65ffffc9 bge s7, a5, 32767", ++ 32767); ++ COMPARE_PC_REL(bge(a1, a3, -32768), "660000a7 bge a1, a3, -32768", ++ -32768); ++ COMPARE_PC_REL(bge(a5, s3, 0), "6400013a bge a5, s3, 0", 0); ++ ++ COMPARE_PC_REL(bltu(a5, s7, 32767), "69fffd3e bltu a5, s7, 32767", ++ 32767); ++ COMPARE_PC_REL(bltu(a4, a5, -32768), "6a000109 bltu a4, a5, -32768", ++ -32768); ++ COMPARE_PC_REL(bltu(a4, t6, 0), "68000112 bltu a4, t6, 0", 0); ++ ++ COMPARE_PC_REL(bgeu(a7, a6, 32767), "6dfffd6a bgeu a7, a6, 32767", ++ 32767); ++ COMPARE_PC_REL(bgeu(a5, a3, -32768), "6e000127 bgeu a5, a3, -32768", ++ -32768); ++ COMPARE_PC_REL(bgeu(t2, t1, 0), "6c0001cd bgeu t2, t1, 0", 0); ++ ++ VERIFY_RUN(); ++} ++ ++TEST(TypeOp7) { ++ SET_UP(); ++ ++ COMPARE(lu12i_w(a4, 524287), "14ffffe8 lu12i.w a4, 524287"); ++ COMPARE(lu12i_w(a5, -524288), "15000009 lu12i.w a5, -524288"); ++ COMPARE(lu12i_w(a6, 0), "1400000a lu12i.w a6, 0"); ++ ++ COMPARE(lu32i_d(a7, 524287), "16ffffeb lu32i.d a7, 524287"); ++ COMPARE(lu32i_d(t0, 524288), "1700000c lu32i.d t0, -524288"); ++ COMPARE(lu32i_d(t1, 0), "1600000d lu32i.d t1, 0"); ++ ++ COMPARE(pcaddi(t1, 1), "1800002d pcaddi t1, 1"); ++ COMPARE(pcaddi(t2, 524287), "18ffffee pcaddi t2, 524287"); ++ COMPARE(pcaddi(t3, -524288), "1900000f pcaddi t3, -524288"); ++ COMPARE(pcaddi(t4, 0), "18000010 pcaddi t4, 0"); ++ ++ COMPARE(pcalau12i(t5, 524287), "1afffff1 pcalau12i t5, 524287"); ++ COMPARE(pcalau12i(t6, -524288), "1b000012 pcalau12i t6, -524288"); ++ COMPARE(pcalau12i(a4, 0), "1a000008 pcalau12i a4, 0"); ++ ++ COMPARE(pcaddu12i(a5, 524287), "1cffffe9 pcaddu12i a5, 524287"); ++ COMPARE(pcaddu12i(a6, -524288), "1d00000a pcaddu12i a6, -524288"); ++ COMPARE(pcaddu12i(a7, 0), "1c00000b pcaddu12i a7, 0"); ++ ++ COMPARE(pcaddu18i(t0, 524287), "1effffec pcaddu18i t0, 524287"); ++ COMPARE(pcaddu18i(t1, -524288), "1f00000d pcaddu18i t1, -524288"); ++ COMPARE(pcaddu18i(t2, 0), "1e00000e pcaddu18i t2, 0"); ++ ++ VERIFY_RUN(); ++} ++ ++TEST(TypeOp8) { ++ SET_UP(); ++ ++ COMPARE(ll_w(t2, t3, 32764), "207ffdee ll.w t2, t3, 32764"); ++ COMPARE(ll_w(t3, t4, -32768), "2080020f ll.w t3, t4, -32768"); ++ COMPARE(ll_w(t5, t6, 0), "20000251 ll.w t5, t6, 0"); ++ ++ COMPARE(sc_w(a6, a7, 32764), "217ffd6a sc.w a6, a7, 32764"); ++ COMPARE(sc_w(t0, t1, -32768), "218001ac sc.w t0, t1, -32768"); ++ COMPARE(sc_w(t2, t3, 0), "210001ee sc.w t2, t3, 0"); ++ ++ COMPARE(ll_d(a0, a1, 32764), "227ffca4 ll.d a0, a1, 32764"); ++ COMPARE(ll_d(a2, a3, -32768), "228000e6 ll.d a2, a3, -32768"); ++ COMPARE(ll_d(a4, a5, 0), "22000128 ll.d a4, a5, 0"); ++ ++ COMPARE(sc_d(t4, t5, 32764), "237ffe30 sc.d t4, t5, 32764"); ++ COMPARE(sc_d(t6, a0, -32768), "23800092 sc.d t6, a0, -32768"); ++ COMPARE(sc_d(a1, a2, 0), "230000c5 sc.d a1, a2, 0"); ++ ++ COMPARE(ldptr_w(a4, a5, 32764), "247ffd28 ldptr.w a4, a5, 32764"); ++ COMPARE(ldptr_w(a6, a7, -32768), "2480016a ldptr.w a6, a7, -32768"); ++ COMPARE(ldptr_w(t0, t1, 0), "240001ac ldptr.w t0, t1, 0"); ++ ++ COMPARE(stptr_w(a4, a5, 32764), "257ffd28 stptr.w a4, a5, 32764"); ++ COMPARE(stptr_w(a6, a7, -32768), "2580016a stptr.w a6, a7, -32768"); ++ COMPARE(stptr_w(t0, t1, 0), "250001ac stptr.w t0, t1, 0"); ++ ++ COMPARE(ldptr_d(t2, t3, 32764), "267ffdee ldptr.d t2, t3, 32764"); ++ COMPARE(ldptr_d(t4, t5, -32768), "26800230 ldptr.d t4, t5, -32768"); ++ COMPARE(ldptr_d(t6, a4, 0), "26000112 ldptr.d t6, a4, 0"); ++ ++ COMPARE(stptr_d(a5, a6, 32764), "277ffd49 stptr.d a5, a6, 32764"); ++ COMPARE(stptr_d(a7, t0, -32768), "2780018b stptr.d a7, t0, -32768"); ++ COMPARE(stptr_d(t1, t2, 0), "270001cd stptr.d t1, t2, 0"); ++ ++ VERIFY_RUN(); ++} ++ ++TEST(TypeOp10) { ++ SET_UP(); ++ ++ COMPARE(bstrins_w(a4, a5, 31, 16), ++ "007f4128 bstrins.w a4, a5, 31, 16"); ++ COMPARE(bstrins_w(a6, a7, 5, 0), "0065016a bstrins.w a6, a7, 5, 0"); ++ ++ COMPARE(bstrins_d(a3, zero_reg, 17, 0), ++ "00910007 bstrins.d a3, zero_reg, 17, 0"); ++ COMPARE(bstrins_d(t1, zero_reg, 17, 0), ++ "0091000d bstrins.d t1, zero_reg, 17, 0"); ++ ++ COMPARE(bstrpick_w(t0, t1, 31, 29), ++ "007ff5ac bstrpick.w t0, t1, 31, 29"); ++ COMPARE(bstrpick_w(a4, a5, 16, 0), ++ "00708128 bstrpick.w a4, a5, 16, 0"); ++ ++ COMPARE(bstrpick_d(a5, a5, 31, 0), ++ "00df0129 bstrpick.d a5, a5, 31, 0"); ++ COMPARE(bstrpick_d(a4, a4, 25, 2), ++ "00d90908 bstrpick.d a4, a4, 25, 2"); ++ ++ COMPARE(slti(t2, a5, 2047), "021ffd2e slti t2, a5, 2047"); ++ COMPARE(slti(a7, a1, -2048), "022000ab slti a7, a1, -2048"); ++ ++ COMPARE(sltui(a7, a7, 2047), "025ffd6b sltui a7, a7, 2047"); ++ COMPARE(sltui(t1, t1, -2048), "026001ad sltui t1, t1, -2048"); ++ ++ COMPARE(addi_w(t0, t2, 2047), "029ffdcc addi.w t0, t2, 2047"); ++ COMPARE(addi_w(a0, a0, -2048), "02a00084 addi.w a0, a0, -2048"); ++ ++ COMPARE(addi_d(a0, zero_reg, 2047), ++ "02dffc04 addi.d a0, zero_reg, 2047"); ++ COMPARE(addi_d(t7, t7, -2048), "02e00273 addi.d t7, t7, -2048"); ++ ++ COMPARE(lu52i_d(a0, a0, 2047), "031ffc84 lu52i.d a0, a0, 2047"); ++ COMPARE(lu52i_d(a1, a1, -2048), "032000a5 lu52i.d a1, a1, -2048"); ++ ++ COMPARE(andi(s3, a3, 0xfff), "037ffcfa andi s3, a3, 0xfff"); ++ COMPARE(andi(a4, a4, 0), "03400108 andi a4, a4, 0x0"); ++ ++ COMPARE(ori(t6, t6, 0xfff), "03bffe52 ori t6, t6, 0xfff"); ++ COMPARE(ori(t6, t6, 0), "03800252 ori t6, t6, 0x0"); ++ ++ COMPARE(xori(t1, t1, 0xfff), "03fffdad xori t1, t1, 0xfff"); ++ COMPARE(xori(a3, a3, 0x0), "03c000e7 xori a3, a3, 0x0"); ++ ++ COMPARE(ld_b(a1, a1, 2047), "281ffca5 ld.b a1, a1, 2047"); ++ COMPARE(ld_b(a4, a4, -2048), "28200108 ld.b a4, a4, -2048"); ++ ++ COMPARE(ld_h(a4, a0, 2047), "285ffc88 ld.h a4, a0, 2047"); ++ COMPARE(ld_h(a4, a3, -2048), "286000e8 ld.h a4, a3, -2048"); ++ ++ COMPARE(ld_w(a6, a6, 2047), "289ffd4a ld.w a6, a6, 2047"); ++ COMPARE(ld_w(a5, a4, -2048), "28a00109 ld.w a5, a4, -2048"); ++ ++ COMPARE(ld_d(a0, a3, 2047), "28dffce4 ld.d a0, a3, 2047"); ++ COMPARE(ld_d(a6, fp, -2048), "28e002ca ld.d a6, fp, -2048"); ++ COMPARE(ld_d(a0, a6, 0), "28c00144 ld.d a0, a6, 0"); ++ ++ COMPARE(st_b(a4, a0, 2047), "291ffc88 st.b a4, a0, 2047"); ++ COMPARE(st_b(a6, a5, -2048), "2920012a st.b a6, a5, -2048"); ++ ++ COMPARE(st_h(a4, a0, 2047), "295ffc88 st.h a4, a0, 2047"); ++ COMPARE(st_h(t1, t2, -2048), "296001cd st.h t1, t2, -2048"); ++ ++ COMPARE(st_w(t3, a4, 2047), "299ffd0f st.w t3, a4, 2047"); ++ COMPARE(st_w(a3, t2, -2048), "29a001c7 st.w a3, t2, -2048"); ++ ++ COMPARE(st_d(s3, sp, 2047), "29dffc7a st.d s3, sp, 2047"); ++ COMPARE(st_d(fp, s6, -2048), "29e003b6 st.d fp, s6, -2048"); ++ ++ COMPARE(ld_bu(a6, a0, 2047), "2a1ffc8a ld.bu a6, a0, 2047"); ++ COMPARE(ld_bu(a7, a7, -2048), "2a20016b ld.bu a7, a7, -2048"); ++ ++ COMPARE(ld_hu(a7, a7, 2047), "2a5ffd6b ld.hu a7, a7, 2047"); ++ COMPARE(ld_hu(a3, a3, -2048), "2a6000e7 ld.hu a3, a3, -2048"); ++ ++ COMPARE(ld_wu(a3, a0, 2047), "2a9ffc87 ld.wu a3, a0, 2047"); ++ COMPARE(ld_wu(a3, a5, -2048), "2aa00127 ld.wu a3, a5, -2048"); ++ ++ COMPARE(preld(31, a7, 2047), "2adffd7f preld 0x1f(31), a7, 2047"); ++ COMPARE(preld(0, t0, -2048), "2ae00180 preld 0x0(0), t0, -2048"); ++ ++ COMPARE(fld_s(f0, a3, 2047), "2b1ffce0 fld.s f0, a3, 2047"); ++ COMPARE(fld_s(f0, a1, -2048), "2b2000a0 fld.s f0, a1, -2048"); ++ ++ COMPARE(fld_d(f0, a0, 2047), "2b9ffc80 fld.d f0, a0, 2047"); ++ COMPARE(fld_d(f0, fp, -2048), "2ba002c0 fld.d f0, fp, -2048"); ++ ++ COMPARE(fst_d(f0, fp, 2047), "2bdffec0 fst.d f0, fp, 2047"); ++ COMPARE(fst_d(f0, a0, -2048), "2be00080 fst.d f0, a0, -2048"); ++ ++ COMPARE(fst_s(f0, a5, 2047), "2b5ffd20 fst.s f0, a5, 2047"); ++ COMPARE(fst_s(f0, a3, -2048), "2b6000e0 fst.s f0, a3, -2048"); ++ ++ VERIFY_RUN(); ++} ++ ++TEST(TypeOp12) { ++ SET_UP(); ++ ++ COMPARE(fmadd_s(f0, f1, f2, f3), "08118820 fmadd.s f0, f1, f2, f3"); ++ COMPARE(fmadd_s(f4, f5, f6, f7), "081398a4 fmadd.s f4, f5, f6, f7"); ++ ++ COMPARE(fmadd_d(f8, f9, f10, f11), ++ "0825a928 fmadd.d f8, f9, f10, f11"); ++ COMPARE(fmadd_d(f12, f13, f14, f15), ++ "0827b9ac fmadd.d f12, f13, f14, f15"); ++ ++ COMPARE(fmsub_s(f0, f1, f2, f3), "08518820 fmsub.s f0, f1, f2, f3"); ++ COMPARE(fmsub_s(f4, f5, f6, f7), "085398a4 fmsub.s f4, f5, f6, f7"); ++ ++ COMPARE(fmsub_d(f8, f9, f10, f11), ++ "0865a928 fmsub.d f8, f9, f10, f11"); ++ COMPARE(fmsub_d(f12, f13, f14, f15), ++ "0867b9ac fmsub.d f12, f13, f14, f15"); ++ ++ COMPARE(fnmadd_s(f0, f1, f2, f3), ++ "08918820 fnmadd.s f0, f1, f2, f3"); ++ COMPARE(fnmadd_s(f4, f5, f6, f7), ++ "089398a4 fnmadd.s f4, f5, f6, f7"); ++ ++ COMPARE(fnmadd_d(f8, f9, f10, f11), ++ "08a5a928 fnmadd.d f8, f9, f10, f11"); ++ COMPARE(fnmadd_d(f12, f13, f14, f15), ++ "08a7b9ac fnmadd.d f12, f13, f14, f15"); ++ ++ COMPARE(fnmsub_s(f0, f1, f2, f3), ++ "08d18820 fnmsub.s f0, f1, f2, f3"); ++ COMPARE(fnmsub_s(f4, f5, f6, f7), ++ "08d398a4 fnmsub.s f4, f5, f6, f7"); ++ ++ COMPARE(fnmsub_d(f8, f9, f10, f11), ++ "08e5a928 fnmsub.d f8, f9, f10, f11"); ++ COMPARE(fnmsub_d(f12, f13, f14, f15), ++ "08e7b9ac fnmsub.d f12, f13, f14, f15"); ++ ++ COMPARE(fcmp_cond_s(CAF, f1, f2, FCC0), ++ "0c100820 fcmp.caf.s fcc0, f1, f2"); ++ COMPARE(fcmp_cond_s(CUN, f5, f6, FCC0), ++ "0c1418a0 fcmp.cun.s fcc0, f5, f6"); ++ COMPARE(fcmp_cond_s(CEQ, f9, f10, FCC0), ++ "0c122920 fcmp.ceq.s fcc0, f9, f10"); ++ COMPARE(fcmp_cond_s(CUEQ, f13, f14, FCC0), ++ "0c1639a0 fcmp.cueq.s fcc0, f13, f14"); ++ ++ COMPARE(fcmp_cond_s(CLT, f1, f2, FCC0), ++ "0c110820 fcmp.clt.s fcc0, f1, f2"); ++ COMPARE(fcmp_cond_s(CULT, f5, f6, FCC0), ++ "0c1518a0 fcmp.cult.s fcc0, f5, f6"); ++ COMPARE(fcmp_cond_s(CLE, f9, f10, FCC0), ++ "0c132920 fcmp.cle.s fcc0, f9, f10"); ++ COMPARE(fcmp_cond_s(CULE, f13, f14, FCC0), ++ "0c1739a0 fcmp.cule.s fcc0, f13, f14"); ++ ++ COMPARE(fcmp_cond_s(CNE, f1, f2, FCC0), ++ "0c180820 fcmp.cne.s fcc0, f1, f2"); ++ COMPARE(fcmp_cond_s(COR, f5, f6, FCC0), ++ "0c1a18a0 fcmp.cor.s fcc0, f5, f6"); ++ COMPARE(fcmp_cond_s(CUNE, f9, f10, FCC0), ++ "0c1c2920 fcmp.cune.s fcc0, f9, f10"); ++ COMPARE(fcmp_cond_s(SAF, f13, f14, FCC0), ++ "0c10b9a0 fcmp.saf.s fcc0, f13, f14"); ++ ++ COMPARE(fcmp_cond_s(SUN, f1, f2, FCC0), ++ "0c148820 fcmp.sun.s fcc0, f1, f2"); ++ COMPARE(fcmp_cond_s(SEQ, f5, f6, FCC0), ++ "0c1298a0 fcmp.seq.s fcc0, f5, f6"); ++ COMPARE(fcmp_cond_s(SUEQ, f9, f10, FCC0), ++ "0c16a920 fcmp.sueq.s fcc0, f9, f10"); ++ // COMPARE(fcmp_cond_s(SLT, f13, f14, FCC0), ++ // "0c11b9a0 fcmp.slt.s fcc0, f13, f14"); ++ ++ COMPARE(fcmp_cond_s(SULT, f1, f2, FCC0), ++ "0c158820 fcmp.sult.s fcc0, f1, f2"); ++ COMPARE(fcmp_cond_s(SLE, f5, f6, FCC0), ++ "0c1398a0 fcmp.sle.s fcc0, f5, f6"); ++ COMPARE(fcmp_cond_s(SULE, f9, f10, FCC0), ++ "0c17a920 fcmp.sule.s fcc0, f9, f10"); ++ COMPARE(fcmp_cond_s(SNE, f13, f14, FCC0), ++ "0c18b9a0 fcmp.sne.s fcc0, f13, f14"); ++ COMPARE(fcmp_cond_s(SOR, f13, f14, FCC0), ++ "0c1ab9a0 fcmp.sor.s fcc0, f13, f14"); ++ COMPARE(fcmp_cond_s(SUNE, f1, f2, FCC0), ++ "0c1c8820 fcmp.sune.s fcc0, f1, f2"); ++ ++ COMPARE(fcmp_cond_d(CAF, f1, f2, FCC0), ++ "0c200820 fcmp.caf.d fcc0, f1, f2"); ++ COMPARE(fcmp_cond_d(CUN, f5, f6, FCC0), ++ "0c2418a0 fcmp.cun.d fcc0, f5, f6"); ++ COMPARE(fcmp_cond_d(CEQ, f9, f10, FCC0), ++ "0c222920 fcmp.ceq.d fcc0, f9, f10"); ++ COMPARE(fcmp_cond_d(CUEQ, f13, f14, FCC0), ++ "0c2639a0 fcmp.cueq.d fcc0, f13, f14"); ++ ++ COMPARE(fcmp_cond_d(CLT, f1, f2, FCC0), ++ "0c210820 fcmp.clt.d fcc0, f1, f2"); ++ COMPARE(fcmp_cond_d(CULT, f5, f6, FCC0), ++ "0c2518a0 fcmp.cult.d fcc0, f5, f6"); ++ COMPARE(fcmp_cond_d(CLE, f9, f10, FCC0), ++ "0c232920 fcmp.cle.d fcc0, f9, f10"); ++ COMPARE(fcmp_cond_d(CULE, f13, f14, FCC0), ++ "0c2739a0 fcmp.cule.d fcc0, f13, f14"); ++ ++ COMPARE(fcmp_cond_d(CNE, f1, f2, FCC0), ++ "0c280820 fcmp.cne.d fcc0, f1, f2"); ++ COMPARE(fcmp_cond_d(COR, f5, f6, FCC0), ++ "0c2a18a0 fcmp.cor.d fcc0, f5, f6"); ++ COMPARE(fcmp_cond_d(CUNE, f9, f10, FCC0), ++ "0c2c2920 fcmp.cune.d fcc0, f9, f10"); ++ COMPARE(fcmp_cond_d(SAF, f13, f14, FCC0), ++ "0c20b9a0 fcmp.saf.d fcc0, f13, f14"); ++ ++ COMPARE(fcmp_cond_d(SUN, f1, f2, FCC0), ++ "0c248820 fcmp.sun.d fcc0, f1, f2"); ++ COMPARE(fcmp_cond_d(SEQ, f5, f6, FCC0), ++ "0c2298a0 fcmp.seq.d fcc0, f5, f6"); ++ COMPARE(fcmp_cond_d(SUEQ, f9, f10, FCC0), ++ "0c26a920 fcmp.sueq.d fcc0, f9, f10"); ++ // COMPARE(fcmp_cond_d(SLT, f13, f14, FCC0), ++ // "0c21b9a0 fcmp.slt.d fcc0, f13, f14"); ++ ++ COMPARE(fcmp_cond_d(SULT, f1, f2, FCC0), ++ "0c258820 fcmp.sult.d fcc0, f1, f2"); ++ COMPARE(fcmp_cond_d(SLE, f5, f6, FCC0), ++ "0c2398a0 fcmp.sle.d fcc0, f5, f6"); ++ COMPARE(fcmp_cond_d(SULE, f9, f10, FCC0), ++ "0c27a920 fcmp.sule.d fcc0, f9, f10"); ++ COMPARE(fcmp_cond_d(SNE, f13, f14, FCC0), ++ "0c28b9a0 fcmp.sne.d fcc0, f13, f14"); ++ COMPARE(fcmp_cond_d(SOR, f13, f14, FCC0), ++ "0c2ab9a0 fcmp.sor.d fcc0, f13, f14"); ++ COMPARE(fcmp_cond_d(SUNE, f1, f2, FCC0), ++ "0c2c8820 fcmp.sune.d fcc0, f1, f2"); ++ ++ VERIFY_RUN(); ++} ++ ++TEST(TypeOp14) { ++ SET_UP(); ++ ++ COMPARE(alsl_w(a0, a1, a2, 1), "000418a4 alsl.w a0, a1, a2, 1"); ++ COMPARE(alsl_w(a3, a4, a5, 3), "00052507 alsl.w a3, a4, a5, 3"); ++ COMPARE(alsl_w(a6, a7, t0, 4), "0005b16a alsl.w a6, a7, t0, 4"); ++ ++ COMPARE(alsl_wu(t1, t2, t3, 1), "00063dcd alsl.wu t1, t2, t3, 1"); ++ COMPARE(alsl_wu(t4, t5, t6, 3), "00074a30 alsl.wu t4, t5, t6, 3"); ++ COMPARE(alsl_wu(a0, a1, a2, 4), "000798a4 alsl.wu a0, a1, a2, 4"); ++ ++ COMPARE(alsl_d(a3, a4, a5, 1), "002c2507 alsl.d a3, a4, a5, 1"); ++ COMPARE(alsl_d(a6, a7, t0, 3), "002d316a alsl.d a6, a7, t0, 3"); ++ COMPARE(alsl_d(t1, t2, t3, 4), "002dbdcd alsl.d t1, t2, t3, 4"); ++ ++ COMPARE(bytepick_w(t4, t5, t6, 0), ++ "00084a30 bytepick.w t4, t5, t6, 0"); ++ COMPARE(bytepick_w(a0, a1, a2, 3), ++ "000998a4 bytepick.w a0, a1, a2, 3"); ++ ++ COMPARE(bytepick_d(a6, a7, t0, 0), ++ "000c316a bytepick.d a6, a7, t0, 0"); ++ COMPARE(bytepick_d(t4, t5, t6, 7), ++ "000fca30 bytepick.d t4, t5, t6, 7"); ++ ++ COMPARE(slli_w(a3, a3, 31), "0040fce7 slli.w a3, a3, 31"); ++ COMPARE(slli_w(a6, a6, 1), "0040854a slli.w a6, a6, 1"); ++ ++ COMPARE(slli_d(t3, t2, 63), "0041fdcf slli.d t3, t2, 63"); ++ COMPARE(slli_d(t4, a6, 1), "00410550 slli.d t4, a6, 1"); ++ ++ COMPARE(srli_w(a7, a7, 31), "0044fd6b srli.w a7, a7, 31"); ++ COMPARE(srli_w(a4, a4, 1), "00448508 srli.w a4, a4, 1"); ++ ++ COMPARE(srli_d(a4, a3, 63), "0045fce8 srli.d a4, a3, 63"); ++ COMPARE(srli_d(a4, a4, 1), "00450508 srli.d a4, a4, 1"); ++ ++ COMPARE(srai_d(a0, a0, 63), "0049fc84 srai.d a0, a0, 63"); ++ COMPARE(srai_d(a4, a1, 1), "004904a8 srai.d a4, a1, 1"); ++ ++ COMPARE(srai_w(s4, a3, 31), "0048fcfb srai.w s4, a3, 31"); ++ COMPARE(srai_w(s4, a5, 1), "0048853b srai.w s4, a5, 1"); ++ ++ COMPARE(rotri_d(t7, t6, 1), "004d0653 rotri.d t7, t6, 1"); ++ ++ VERIFY_RUN(); ++} ++ ++TEST(TypeOp17) { ++ SET_UP(); ++ ++ COMPARE(sltu(t5, t4, a4), "0012a211 sltu t5, t4, a4"); ++ COMPARE(sltu(t4, zero_reg, t4), "0012c010 sltu t4, zero_reg, t4"); ++ ++ COMPARE(add_w(a4, a4, a6), "00102908 add.w a4, a4, a6"); ++ COMPARE(add_w(a5, a6, t3), "00103d49 add.w a5, a6, t3"); ++ ++ COMPARE(add_d(a4, t0, t1), "0010b588 add.d a4, t0, t1"); ++ COMPARE(add_d(a6, a3, t1), "0010b4ea add.d a6, a3, t1"); ++ ++ COMPARE(sub_w(a7, a7, a2), "0011196b sub.w a7, a7, a2"); ++ COMPARE(sub_w(a2, a2, s3), "001168c6 sub.w a2, a2, s3"); ++ ++ COMPARE(sub_d(s3, ra, s3), "0011e83a sub.d s3, ra, s3"); ++ COMPARE(sub_d(a0, a1, a2), "001198a4 sub.d a0, a1, a2"); ++ ++ COMPARE(slt(a5, a5, a6), "00122929 slt a5, a5, a6"); ++ COMPARE(slt(a6, t3, t4), "001241ea slt a6, t3, t4"); ++ ++ COMPARE(masknez(a5, a5, a3), "00131d29 masknez a5, a5, a3"); ++ COMPARE(masknez(a3, a4, a5), "00132507 masknez a3, a4, a5"); ++ ++ COMPARE(maskeqz(a6, a7, t0), "0013b16a maskeqz a6, a7, t0"); ++ COMPARE(maskeqz(t1, t2, t3), "0013bdcd maskeqz t1, t2, t3"); ++ ++ COMPARE(or_(s3, sp, zero_reg), "0015007a or s3, sp, zero_reg"); ++ COMPARE(or_(a4, a0, zero_reg), "00150088 or a4, a0, zero_reg"); ++ ++ COMPARE(and_(sp, sp, t6), "0014c863 and sp, sp, t6"); ++ COMPARE(and_(a3, a3, a7), "0014ace7 and a3, a3, a7"); ++ ++ COMPARE(nor(a7, a7, a7), "00142d6b nor a7, a7, a7"); ++ COMPARE(nor(t4, t5, t6), "00144a30 nor t4, t5, t6"); ++ ++ COMPARE(xor_(a0, a1, a2), "001598a4 xor a0, a1, a2"); ++ COMPARE(xor_(a3, a4, a5), "0015a507 xor a3, a4, a5"); ++ ++ COMPARE(orn(a6, a7, t0), "0016316a orn a6, a7, t0"); ++ COMPARE(orn(t1, t2, t3), "00163dcd orn t1, t2, t3"); ++ ++ COMPARE(andn(t4, t5, t6), "0016ca30 andn t4, t5, t6"); ++ COMPARE(andn(a0, a1, a2), "001698a4 andn a0, a1, a2"); ++ ++ COMPARE(sll_w(a3, t0, a7), "00172d87 sll.w a3, t0, a7"); ++ COMPARE(sll_w(a3, a4, a3), "00171d07 sll.w a3, a4, a3"); ++ ++ COMPARE(srl_w(a3, a4, a3), "00179d07 srl.w a3, a4, a3"); ++ COMPARE(srl_w(a3, t1, t4), "0017c1a7 srl.w a3, t1, t4"); ++ ++ COMPARE(sra_w(a4, t4, a4), "00182208 sra.w a4, t4, a4"); ++ COMPARE(sra_w(a3, t1, a6), "001829a7 sra.w a3, t1, a6"); ++ ++ COMPARE(sll_d(a3, a1, a3), "00189ca7 sll.d a3, a1, a3"); ++ COMPARE(sll_d(a7, a4, t0), "0018b10b sll.d a7, a4, t0"); ++ ++ COMPARE(srl_d(a7, a7, t0), "0019316b srl.d a7, a7, t0"); ++ COMPARE(srl_d(t0, a6, t0), "0019314c srl.d t0, a6, t0"); ++ ++ COMPARE(sra_d(a3, a4, a5), "0019a507 sra.d a3, a4, a5"); ++ COMPARE(sra_d(a6, a7, t0), "0019b16a sra.d a6, a7, t0"); ++ ++ COMPARE(rotr_d(t1, t2, t3), "001bbdcd rotr.d t1, t2, t3"); ++ COMPARE(rotr_d(t4, t5, t6), "001bca30 rotr.d t4, t5, t6"); ++ ++ COMPARE(rotr_w(a0, a1, a2), "001b18a4 rotr.w a0, a1, a2"); ++ COMPARE(rotr_w(a3, a4, a5), "001b2507 rotr.w a3, a4, a5"); ++ ++ COMPARE(mul_w(t8, a5, t7), "001c4d34 mul.w t8, a5, t7"); ++ COMPARE(mul_w(t4, t5, t6), "001c4a30 mul.w t4, t5, t6"); ++ ++ COMPARE(mulh_w(s3, a3, t7), "001cccfa mulh.w s3, a3, t7"); ++ COMPARE(mulh_w(a0, a1, a2), "001c98a4 mulh.w a0, a1, a2"); ++ ++ COMPARE(mulh_wu(a6, a7, t0), "001d316a mulh.wu a6, a7, t0"); ++ COMPARE(mulh_wu(t1, t2, t3), "001d3dcd mulh.wu t1, t2, t3"); ++ ++ COMPARE(mul_d(t2, a5, t1), "001db52e mul.d t2, a5, t1"); ++ COMPARE(mul_d(a4, a4, a5), "001da508 mul.d a4, a4, a5"); ++ ++ COMPARE(mulh_d(a3, a4, a5), "001e2507 mulh.d a3, a4, a5"); ++ COMPARE(mulh_d(a6, a7, t0), "001e316a mulh.d a6, a7, t0"); ++ ++ COMPARE(mulh_du(t1, t2, t3), "001ebdcd mulh.du t1, t2, t3"); ++ COMPARE(mulh_du(t4, t5, t6), "001eca30 mulh.du t4, t5, t6"); ++ ++ COMPARE(mulw_d_w(a0, a1, a2), "001f18a4 mulw.d.w a0, a1, a2"); ++ COMPARE(mulw_d_w(a3, a4, a5), "001f2507 mulw.d.w a3, a4, a5"); ++ ++ COMPARE(mulw_d_wu(a6, a7, t0), "001fb16a mulw.d.wu a6, a7, t0"); ++ COMPARE(mulw_d_wu(t1, t2, t3), "001fbdcd mulw.d.wu t1, t2, t3"); ++ ++ COMPARE(div_w(a5, a5, a3), "00201d29 div.w a5, a5, a3"); ++ COMPARE(div_w(t4, t5, t6), "00204a30 div.w t4, t5, t6"); ++ ++ COMPARE(mod_w(a6, t3, a6), "0020a9ea mod.w a6, t3, a6"); ++ COMPARE(mod_w(a3, a4, a3), "00209d07 mod.w a3, a4, a3"); ++ ++ COMPARE(div_wu(t1, t2, t3), "00213dcd div.wu t1, t2, t3"); ++ COMPARE(div_wu(t4, t5, t6), "00214a30 div.wu t4, t5, t6"); ++ ++ COMPARE(mod_wu(a0, a1, a2), "002198a4 mod.wu a0, a1, a2"); ++ COMPARE(mod_wu(a3, a4, a5), "0021a507 mod.wu a3, a4, a5"); ++ ++ COMPARE(div_d(t0, t0, a6), "0022298c div.d t0, t0, a6"); ++ COMPARE(div_d(a7, a7, a5), "0022256b div.d a7, a7, a5"); ++ ++ COMPARE(mod_d(a6, a7, t0), "0022b16a mod.d a6, a7, t0"); ++ COMPARE(mod_d(t1, t2, t3), "0022bdcd mod.d t1, t2, t3"); ++ ++ COMPARE(div_du(t4, t5, t6), "00234a30 div.du t4, t5, t6"); ++ COMPARE(div_du(a0, a1, a2), "002318a4 div.du a0, a1, a2"); ++ ++ COMPARE(mod_du(a3, a4, a5), "0023a507 mod.du a3, a4, a5"); ++ COMPARE(mod_du(a6, a7, t0), "0023b16a mod.du a6, a7, t0"); ++ ++ COMPARE(fadd_s(f3, f4, f5), "01009483 fadd.s f3, f4, f5"); ++ COMPARE(fadd_s(f6, f7, f8), "0100a0e6 fadd.s f6, f7, f8"); ++ ++ COMPARE(fadd_d(f0, f1, f0), "01010020 fadd.d f0, f1, f0"); ++ COMPARE(fadd_d(f0, f1, f2), "01010820 fadd.d f0, f1, f2"); ++ ++ COMPARE(fsub_s(f9, f10, f11), "0102ad49 fsub.s f9, f10, f11"); ++ COMPARE(fsub_s(f12, f13, f14), "0102b9ac fsub.s f12, f13, f14"); ++ ++ COMPARE(fsub_d(f30, f0, f30), "0103781e fsub.d f30, f0, f30"); ++ COMPARE(fsub_d(f0, f0, f1), "01030400 fsub.d f0, f0, f1"); ++ ++ COMPARE(fmul_s(f15, f16, f17), "0104c60f fmul.s f15, f16, f17"); ++ COMPARE(fmul_s(f18, f19, f20), "0104d272 fmul.s f18, f19, f20"); ++ ++ COMPARE(fmul_d(f0, f0, f1), "01050400 fmul.d f0, f0, f1"); ++ COMPARE(fmul_d(f0, f0, f0), "01050000 fmul.d f0, f0, f0"); ++ ++ COMPARE(fdiv_s(f0, f1, f2), "01068820 fdiv.s f0, f1, f2"); ++ COMPARE(fdiv_s(f3, f4, f5), "01069483 fdiv.s f3, f4, f5"); ++ ++ COMPARE(fdiv_d(f0, f0, f1), "01070400 fdiv.d f0, f0, f1"); ++ COMPARE(fdiv_d(f0, f1, f0), "01070020 fdiv.d f0, f1, f0"); ++ ++ COMPARE(fmax_s(f9, f10, f11), "0108ad49 fmax.s f9, f10, f11"); ++ COMPARE(fmin_s(f6, f7, f8), "010aa0e6 fmin.s f6, f7, f8"); ++ ++ COMPARE(fmax_d(f0, f1, f0), "01090020 fmax.d f0, f1, f0"); ++ COMPARE(fmin_d(f0, f1, f0), "010b0020 fmin.d f0, f1, f0"); ++ ++ COMPARE(fmaxa_s(f12, f13, f14), "010cb9ac fmaxa.s f12, f13, f14"); ++ COMPARE(fmina_s(f15, f16, f17), "010ec60f fmina.s f15, f16, f17"); ++ ++ COMPARE(fmaxa_d(f18, f19, f20), "010d5272 fmaxa.d f18, f19, f20"); ++ COMPARE(fmina_d(f0, f1, f2), "010f0820 fmina.d f0, f1, f2"); ++ ++ COMPARE(ldx_b(a0, a1, a2), "380018a4 ldx.b a0, a1, a2"); ++ COMPARE(ldx_h(a3, a4, a5), "38042507 ldx.h a3, a4, a5"); ++ COMPARE(ldx_w(a6, a7, t0), "3808316a ldx.w a6, a7, t0"); ++ ++ COMPARE(stx_b(t1, t2, t3), "38103dcd stx.b t1, t2, t3"); ++ COMPARE(stx_h(t4, t5, t6), "38144a30 stx.h t4, t5, t6"); ++ COMPARE(stx_w(a0, a1, a2), "381818a4 stx.w a0, a1, a2"); ++ ++ COMPARE(ldx_bu(a3, a4, a5), "38202507 ldx.bu a3, a4, a5"); ++ COMPARE(ldx_hu(a6, a7, t0), "3824316a ldx.hu a6, a7, t0"); ++ COMPARE(ldx_wu(t1, t2, t3), "38283dcd ldx.wu t1, t2, t3"); ++ ++ COMPARE(ldx_d(a2, s6, t6), "380c4ba6 ldx.d a2, s6, t6"); ++ COMPARE(ldx_d(t7, s6, t6), "380c4bb3 ldx.d t7, s6, t6"); ++ ++ COMPARE(stx_d(a4, a3, t6), "381c48e8 stx.d a4, a3, t6"); ++ COMPARE(stx_d(a0, a3, t6), "381c48e4 stx.d a0, a3, t6"); ++ ++ COMPARE(preldx(0, t5, t6), "382c4a20 preldx 0x0(0), t5, t6"); ++ COMPARE(preldx(31, a1, a2), "382c18bf preldx 0x1f(31), a1, a2"); ++ ++ COMPARE(amswap_db_w(a0, a3, t6), "38691e44 amswap_db.w a0, a3, t6"); ++ COMPARE(amswap_db_d(a0, a3, t6), "38699e44 amswap_db.d a0, a3, t6"); ++ COMPARE(amadd_db_w(a0, a3, t6), "386a1e44 amadd_db.w a0, a3, t6"); ++ COMPARE(amadd_db_d(a0, a3, t6), "386a9e44 amadd_db.d a0, a3, t6"); ++ COMPARE(amand_db_w(a0, a3, t6), "386b1e44 amand_db.w a0, a3, t6"); ++ COMPARE(amand_db_d(a0, a3, t6), "386b9e44 amand_db.d a0, a3, t6"); ++ COMPARE(amor_db_w(a0, a3, t6), "386c1e44 amor_db.w a0, a3, t6"); ++ COMPARE(amor_db_d(a0, a3, t6), "386c9e44 amor_db.d a0, a3, t6"); ++ COMPARE(amxor_db_w(a0, a3, t6), "386d1e44 amxor_db.w a0, a3, t6"); ++ COMPARE(amxor_db_d(a0, a3, t6), "386d9e44 amxor_db.d a0, a3, t6"); ++ ++ COMPARE(dbar(0), "38720000 dbar 0x0(0)"); ++ COMPARE(ibar(5555), "387295b3 ibar 0x15b3(5555)"); ++ ++ COMPARE(break_(0), "002a0000 break code: 0x0(0)"); ++ COMPARE(break_(0x3fc0), "002a3fc0 break code: 0x3fc0(16320)"); ++ ++ COMPARE(fldx_s(f3, a4, a5), "38302503 fldx.s f3, a4, a5"); ++ COMPARE(fldx_d(f6, a7, t0), "38343166 fldx.d f6, a7, t0"); ++ ++ COMPARE(fstx_s(f1, t2, t3), "38383dc1 fstx.s f1, t2, t3"); ++ COMPARE(fstx_d(f4, t5, t6), "383c4a24 fstx.d f4, t5, t6"); ++ ++ COMPARE(asrtle_d(a0, a1), "00011480 asrtle.d a0, a1"); ++ COMPARE(asrtgt_d(a2, a3), "00019cc0 asrtgt.d a2, a3"); ++ ++ COMPARE(syscall(2), "002b0002 syscall code 0x2(2)"); ++ // COMPARE(hypcall(2), ++ // "002b8002 hypcall 0x2(2)"); ++ ++ COMPARE(amswap_w(a4, a5, a6), "38602548 amswap.w a4, a5, a6"); ++ COMPARE(amswap_d(a7, t0, t1), "3860b1ab amswap.d a7, t0, t1"); ++ ++ COMPARE(amadd_w(t2, t3, t4), "38613e0e amadd.w t2, t3, t4"); ++ COMPARE(amadd_d(t5, t6, a0), "3861c891 amadd.d t5, t6, a0"); ++ ++ COMPARE(amand_w(a1, a2, a3), "386218e5 amand.w a1, a2, a3"); ++ COMPARE(amand_d(a4, a5, a6), "3862a548 amand.d a4, a5, a6"); ++ ++ COMPARE(amor_w(a7, t0, t1), "386331ab amor.w a7, t0, t1"); ++ COMPARE(amor_d(t2, t3, t4), "3863be0e amor.d t2, t3, t4"); ++ ++ COMPARE(amxor_w(t5, t6, a0), "38644891 amxor.w t5, t6, a0"); ++ COMPARE(amxor_d(a1, a2, a3), "386498e5 amxor.d a1, a2, a3"); ++ ++ COMPARE(ammax_w(a4, a5, a6), "38652548 ammax.w a4, a5, a6"); ++ COMPARE(ammax_d(a7, t0, t1), "3865b1ab ammax.d a7, t0, t1"); ++ ++ COMPARE(ammin_w(t2, t3, t4), "38663e0e ammin.w t2, t3, t4"); ++ COMPARE(ammin_d(t5, t6, a0), "3866c891 ammin.d t5, t6, a0"); ++ ++ COMPARE(ammax_wu(a1, a2, a3), "386718e5 ammax.wu a1, a2, a3"); ++ COMPARE(ammax_du(a4, a5, a6), "3867a548 ammax.du a4, a5, a6"); ++ ++ COMPARE(ammin_wu(a7, t0, t1), "386831ab ammin.wu a7, t0, t1"); ++ COMPARE(ammin_du(t2, t3, t4), "3868be0e ammin.du t2, t3, t4"); ++ ++ COMPARE(ammax_db_d(a0, a1, a2), "386e94c4 ammax_db.d a0, a1, a2"); ++ COMPARE(ammax_db_du(a3, a4, a5), "3870a127 ammax_db.du a3, a4, a5"); ++ ++ COMPARE(ammax_db_w(a6, a7, t0), "386e2d8a ammax_db.w a6, a7, t0"); ++ COMPARE(ammax_db_wu(t1, t2, t3), "387039ed ammax_db.wu t1, t2, t3"); ++ ++ COMPARE(ammin_db_d(t4, t5, t6), "386fc650 ammin_db.d t4, t5, t6"); ++ COMPARE(ammin_db_du(a0, a1, a2), "387194c4 ammin_db.du a0, a1, a2"); ++ ++ COMPARE(ammin_db_wu(a3, a4, a5), "38712127 ammin_db.wu a3, a4, a5"); ++ COMPARE(ammin_db_w(a6, a7, t0), "386f2d8a ammin_db.w a6, a7, t0"); ++ ++ COMPARE(fldgt_s(f0, a1, a2), "387418a0 fldgt.s f0, a1, a2"); ++ COMPARE(fldgt_d(f2, a3, a4), "3874a0e2 fldgt.d f2, a3, a4"); ++ ++ COMPARE(fldle_s(f5, a6, a7), "38752d45 fldle.s f5, a6, a7"); ++ COMPARE(fldle_d(f8, t0, t1), "3875b588 fldle.d f8, t0, t1"); ++ ++ COMPARE(fstgt_s(f11, t2, t3), "38763dcb fstgt.s f11, t2, t3"); ++ COMPARE(fstgt_d(f14, t4, t5), "3876c60e fstgt.d f14, t4, t5"); ++ ++ COMPARE(fstle_s(f17, t6, a0), "38771251 fstle.s f17, t6, a0"); ++ COMPARE(fstle_d(f20, a1, a2), "387798b4 fstle.d f20, a1, a2"); ++ ++ COMPARE(ldgt_b(a1, a2, a3), "38781cc5 ldgt.b a1, a2, a3"); ++ COMPARE(ldgt_h(a4, a5, a6), "3878a928 ldgt.h a4, a5, a6"); ++ COMPARE(ldgt_w(a7, t0, t1), "3879358b ldgt.w a7, t0, t1"); ++ COMPARE(ldgt_d(t2, t3, t4), "3879c1ee ldgt.d t2, t3, t4"); ++ ++ COMPARE(ldle_b(t5, t6, a0), "387a1251 ldle.b t5, t6, a0"); ++ COMPARE(ldle_h(a1, a2, a3), "387a9cc5 ldle.h a1, a2, a3"); ++ COMPARE(ldle_w(a4, a5, a6), "387b2928 ldle.w a4, a5, a6"); ++ COMPARE(ldle_d(a7, t0, t1), "387bb58b ldle.d a7, t0, t1"); ++ ++ COMPARE(stgt_b(t2, t3, t4), "387c41ee stgt.b t2, t3, t4"); ++ COMPARE(stgt_h(t5, t6, a0), "387c9251 stgt.h t5, t6, a0"); ++ COMPARE(stgt_w(a1, a2, a3), "387d1cc5 stgt.w a1, a2, a3"); ++ COMPARE(stgt_d(a4, a5, a6), "387da928 stgt.d a4, a5, a6"); ++ ++ COMPARE(stle_b(a7, t0, t1), "387e358b stle.b a7, t0, t1"); ++ COMPARE(stle_h(t2, t3, t4), "387ec1ee stle.h t2, t3, t4"); ++ COMPARE(stle_w(t5, t6, a0), "387f1251 stle.w t5, t6, a0"); ++ COMPARE(stle_d(a1, a2, a3), "387f9cc5 stle.d a1, a2, a3"); ++ ++ COMPARE(fscaleb_s(f0, f1, f2), "01108820 fscaleb.s f0, f1, f2"); ++ COMPARE(fscaleb_d(f3, f4, f5), "01111483 fscaleb.d f3, f4, f5"); ++ ++ COMPARE(fcopysign_s(f6, f7, f8), "0112a0e6 fcopysign.s f6, f7, f8"); ++ COMPARE(fcopysign_d(f9, f10, f12), ++ "01133149 fcopysign.d f9, f10, f12"); ++ ++ COMPARE(crc_w_b_w(a4, a5, a6), "00242928 crc.w.b.w a4, a5, a6"); ++ COMPARE(crc_w_h_w(a7, t0, t1), "0024b58b crc.w.h.w a7, t0, t1"); ++ COMPARE(crc_w_w_w(t2, t3, t4), "002541ee crc.w.w.w t2, t3, t4"); ++ COMPARE(crc_w_d_w(t5, t6, a0), "00259251 crc.w.d.w t5, t6, a0"); ++ ++ COMPARE(crcc_w_b_w(a1, a2, a3), "00261cc5 crcc.w.b.w a1, a2, a3"); ++ COMPARE(crcc_w_h_w(a4, a5, a6), "0026a928 crcc.w.h.w a4, a5, a6"); ++ COMPARE(crcc_w_w_w(a7, t0, t1), "0027358b crcc.w.w.w a7, t0, t1"); ++ COMPARE(crcc_w_d_w(t2, t3, t4), "0027c1ee crcc.w.d.w t2, t3, t4"); ++ ++ VERIFY_RUN(); ++} ++ ++TEST(TypeOp22) { ++ SET_UP(); ++ ++ COMPARE(clz_w(a3, a0), "00001487 clz.w a3, a0"); ++ COMPARE(ctz_w(a0, a1), "00001ca4 ctz.w a0, a1"); ++ COMPARE(clz_d(a2, a3), "000024e6 clz.d a2, a3"); ++ COMPARE(ctz_d(a4, a5), "00002d28 ctz.d a4, a5"); ++ ++ COMPARE(clo_w(a0, a1), "000010a4 clo.w a0, a1"); ++ COMPARE(cto_w(a2, a3), "000018e6 cto.w a2, a3"); ++ COMPARE(clo_d(a4, a5), "00002128 clo.d a4, a5"); ++ COMPARE(cto_d(a6, a7), "0000296a cto.d a6, a7"); ++ ++ COMPARE(revb_2h(a6, a7), "0000316a revb.2h a6, a7"); ++ COMPARE(revb_4h(t0, t1), "000035ac revb.4h t0, t1"); ++ COMPARE(revb_2w(t2, t3), "000039ee revb.2w t2, t3"); ++ COMPARE(revb_d(t4, t5), "00003e30 revb.d t4, t5"); ++ ++ COMPARE(revh_2w(a0, a1), "000040a4 revh.2w a0, a1"); ++ COMPARE(revh_d(a2, a3), "000044e6 revh.d a2, a3"); ++ ++ COMPARE(bitrev_4b(a4, a5), "00004928 bitrev.4b a4, a5"); ++ COMPARE(bitrev_8b(a6, a7), "00004d6a bitrev.8b a6, a7"); ++ COMPARE(bitrev_w(t0, t1), "000051ac bitrev.w t0, t1"); ++ COMPARE(bitrev_d(t2, t3), "000055ee bitrev.d t2, t3"); ++ ++ COMPARE(ext_w_b(t4, t5), "00005e30 ext.w.b t4, t5"); ++ COMPARE(ext_w_h(a0, a1), "000058a4 ext.w.h a0, a1"); ++ ++ COMPARE(fabs_s(f2, f3), "01140462 fabs.s f2, f3"); ++ COMPARE(fabs_d(f0, f0), "01140800 fabs.d f0, f0"); ++ ++ COMPARE(fneg_s(f0, f1), "01141420 fneg.s f0, f1"); ++ COMPARE(fneg_d(f0, f0), "01141800 fneg.d f0, f0"); ++ ++ COMPARE(fsqrt_s(f4, f5), "011444a4 fsqrt.s f4, f5"); ++ COMPARE(fsqrt_d(f0, f0), "01144800 fsqrt.d f0, f0"); ++ ++ COMPARE(fmov_s(f6, f7), "011494e6 fmov.s f6, f7"); ++ COMPARE(fmov_d(f0, f1), "01149820 fmov.d f0, f1"); ++ COMPARE(fmov_d(f1, f0), "01149801 fmov.d f1, f0"); ++ ++ COMPARE(movgr2fr_d(f0, t6), "0114aa40 movgr2fr.d f0, t6"); ++ COMPARE(movgr2fr_d(f1, t6), "0114aa41 movgr2fr.d f1, t6"); ++ ++ COMPARE(movgr2fr_w(f30, a3), "0114a4fe movgr2fr.w f30, a3"); ++ COMPARE(movgr2fr_w(f30, a0), "0114a49e movgr2fr.w f30, a0"); ++ ++ COMPARE(movgr2frh_w(f30, t6), "0114ae5e movgr2frh.w f30, t6"); ++ COMPARE(movgr2frh_w(f0, a3), "0114ace0 movgr2frh.w f0, a3"); ++ ++ COMPARE(movfr2gr_s(a3, f30), "0114b7c7 movfr2gr.s a3, f30"); ++ ++ COMPARE(movfr2gr_d(a6, f30), "0114bbca movfr2gr.d a6, f30"); ++ COMPARE(movfr2gr_d(t7, f30), "0114bbd3 movfr2gr.d t7, f30"); ++ ++ COMPARE(movfrh2gr_s(a5, f0), "0114bc09 movfrh2gr.s a5, f0"); ++ COMPARE(movfrh2gr_s(a4, f0), "0114bc08 movfrh2gr.s a4, f0"); ++ ++ COMPARE(movgr2fcsr(a2), "0114c0c0 movgr2fcsr fcsr, a2"); ++ COMPARE(movfcsr2gr(a4), "0114c808 movfcsr2gr a4, fcsr"); ++ ++ COMPARE(movfr2cf(FCC0, f0), "0114d000 movfr2cf fcc0, f0"); ++ COMPARE(movcf2fr(f1, FCC1), "0114d421 movcf2fr f1, fcc1"); ++ ++ COMPARE(movgr2cf(FCC2, a0), "0114d882 movgr2cf fcc2, a0"); ++ COMPARE(movcf2gr(a1, FCC3), "0114dc65 movcf2gr a1, fcc3"); ++ ++ COMPARE(fcvt_s_d(f0, f0), "01191800 fcvt.s.d f0, f0"); ++ COMPARE(fcvt_d_s(f0, f0), "01192400 fcvt.d.s f0, f0"); ++ ++ COMPARE(ftintrm_w_s(f8, f9), "011a0528 ftintrm.w.s f8, f9"); ++ COMPARE(ftintrm_w_d(f10, f11), "011a096a ftintrm.w.d f10, f11"); ++ COMPARE(ftintrm_l_s(f12, f13), "011a25ac ftintrm.l.s f12, f13"); ++ COMPARE(ftintrm_l_d(f14, f15), "011a29ee ftintrm.l.d f14, f15"); ++ ++ COMPARE(ftintrp_w_s(f16, f17), "011a4630 ftintrp.w.s f16, f17"); ++ COMPARE(ftintrp_w_d(f18, f19), "011a4a72 ftintrp.w.d f18, f19"); ++ COMPARE(ftintrp_l_s(f20, f21), "011a66b4 ftintrp.l.s f20, f21"); ++ COMPARE(ftintrp_l_d(f0, f1), "011a6820 ftintrp.l.d f0, f1"); ++ ++ COMPARE(ftintrz_w_s(f30, f4), "011a849e ftintrz.w.s f30, f4"); ++ COMPARE(ftintrz_w_d(f30, f4), "011a889e ftintrz.w.d f30, f4"); ++ COMPARE(ftintrz_l_s(f30, f0), "011aa41e ftintrz.l.s f30, f0"); ++ COMPARE(ftintrz_l_d(f30, f30), "011aabde ftintrz.l.d f30, f30"); ++ ++ COMPARE(ftintrne_w_s(f2, f3), "011ac462 ftintrne.w.s f2, f3"); ++ COMPARE(ftintrne_w_d(f4, f5), "011ac8a4 ftintrne.w.d f4, f5"); ++ COMPARE(ftintrne_l_s(f6, f7), "011ae4e6 ftintrne.l.s f6, f7"); ++ COMPARE(ftintrne_l_d(f8, f9), "011ae928 ftintrne.l.d f8, f9"); ++ ++ COMPARE(ftint_w_s(f10, f11), "011b056a ftint.w.s f10, f11"); ++ COMPARE(ftint_w_d(f12, f13), "011b09ac ftint.w.d f12, f13"); ++ COMPARE(ftint_l_s(f14, f15), "011b25ee ftint.l.s f14, f15"); ++ COMPARE(ftint_l_d(f16, f17), "011b2a30 ftint.l.d f16, f17"); ++ ++ COMPARE(ffint_s_w(f18, f19), "011d1272 ffint.s.w f18, f19"); ++ COMPARE(ffint_s_l(f20, f21), "011d1ab4 ffint.s.l f20, f21"); ++ COMPARE(ffint_d_w(f0, f1), "011d2020 ffint.d.w f0, f1"); ++ COMPARE(ffint_d_l(f2, f3), "011d2862 ffint.d.l f2, f3"); ++ ++ COMPARE(frint_s(f4, f5), "011e44a4 frint.s f4, f5"); ++ COMPARE(frint_d(f6, f7), "011e48e6 frint.d f6, f7"); ++ ++ COMPARE(frecip_s(f8, f9), "01145528 frecip.s f8, f9"); ++ COMPARE(frecip_d(f10, f11), "0114596a frecip.d f10, f11"); ++ ++ COMPARE(frsqrt_s(f12, f13), "011465ac frsqrt.s f12, f13"); ++ COMPARE(frsqrt_d(f14, f15), "011469ee frsqrt.d f14, f15"); ++ ++ COMPARE(fclass_s(f16, f17), "01143630 fclass.s f16, f17"); ++ COMPARE(fclass_d(f18, f19), "01143a72 fclass.d f18, f19"); ++ ++ COMPARE(flogb_s(f20, f21), "011426b4 flogb.s f20, f21"); ++ COMPARE(flogb_d(f0, f1), "01142820 flogb.d f0, f1"); ++ ++ COMPARE(rdtimel_w(t0, t1), "000061ac rdtimel.w t0, t1"); ++ COMPARE(rdtimeh_w(t2, t3), "000065ee rdtimeh.w t2, t3"); ++ COMPARE(rdtime_d(t4, t5), "00006a30 rdtime.d t4, t5"); ++ ++ VERIFY_RUN(); ++} ++ ++} // namespace internal ++} // namespace v8 +diff --git a/src/3rdparty/chromium/v8/test/cctest/test-macro-assembler-la64.cc b/src/3rdparty/chromium/v8/test/cctest/test-macro-assembler-la64.cc +new file mode 100644 +index 00000000000..ef536b862ba +--- /dev/null ++++ b/src/3rdparty/chromium/v8/test/cctest/test-macro-assembler-la64.cc +@@ -0,0 +1,2894 @@ ++// Copyright 2013 the V8 project authors. All rights reserved. ++// Redistribution and use in source and binary forms, with or without ++// modification, are permitted provided that the following conditions are ++// met: ++// ++// * Redistributions of source code must retain the above copyright ++// notice, this list of conditions and the following disclaimer. ++// * Redistributions in binary form must reproduce the above ++// copyright notice, this list of conditions and the following ++// disclaimer in the documentation and/or other materials provided ++// with the distribution. ++// * Neither the name of Google Inc. nor the names of its ++// contributors may be used to endorse or promote products derived ++// from this software without specific prior written permission. ++// ++// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++#include ++ ++#include // NOLINT(readability/streams) ++ ++#include "src/base/utils/random-number-generator.h" ++#include "src/codegen/macro-assembler.h" ++#include "src/execution/simulator.h" ++#include "src/init/v8.h" ++#include "src/objects/heap-number.h" ++#include "src/objects/objects-inl.h" ++#include "src/utils/ostreams.h" ++#include "test/cctest/cctest.h" ++ ++namespace v8 { ++namespace internal { ++ ++// TODO(mips64): Refine these signatures per test case. ++using FV = void*(int64_t x, int64_t y, int p2, int p3, int p4); ++using F1 = void*(int x, int p1, int p2, int p3, int p4); ++using F2 = void*(int x, int y, int p2, int p3, int p4); ++using F3 = void*(void* p, int p1, int p2, int p3, int p4); ++using F4 = void*(void* p0, void* p1, int p2, int p3, int p4); ++ ++#define __ masm-> ++ ++TEST(BYTESWAP) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ struct T { ++ uint64_t s8; ++ uint64_t s4; ++ uint64_t s2; ++ uint64_t u4; ++ uint64_t u2; ++ }; ++ ++ T t; ++ // clang-format off ++ uint64_t test_values[] = {0x5612FFCD9D327ACC, ++ 0x781A15C3, ++ 0xFCDE, ++ 0x9F, ++ 0xC81A15C3, ++ 0x8000000000000000, ++ 0xFFFFFFFFFFFFFFFF, ++ 0x0000000080000000, ++ 0x0000000000008000}; ++ // clang-format on ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ ++ MacroAssembler* masm = &assembler; ++ ++ __ Ld_d(a4, MemOperand(a0, offsetof(T, s8))); ++ __ ByteSwapSigned(a4, a4, 8); ++ __ St_d(a4, MemOperand(a0, offsetof(T, s8))); ++ ++ __ Ld_d(a4, MemOperand(a0, offsetof(T, s4))); ++ __ ByteSwapSigned(a4, a4, 4); ++ __ St_d(a4, MemOperand(a0, offsetof(T, s4))); ++ ++ __ Ld_d(a4, MemOperand(a0, offsetof(T, s2))); ++ __ ByteSwapSigned(a4, a4, 2); ++ __ St_d(a4, MemOperand(a0, offsetof(T, s2))); ++ ++ __ Ld_d(a4, MemOperand(a0, offsetof(T, u4))); ++ __ ByteSwapSigned(a4, a4, 4); ++ __ St_d(a4, MemOperand(a0, offsetof(T, u4))); ++ ++ __ Ld_d(a4, MemOperand(a0, offsetof(T, u2))); ++ __ ByteSwapSigned(a4, a4, 2); ++ __ St_d(a4, MemOperand(a0, offsetof(T, u2))); ++ ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ ++ for (size_t i = 0; i < arraysize(test_values); i++) { ++ int32_t in_s4 = static_cast(test_values[i]); ++ int16_t in_s2 = static_cast(test_values[i]); ++ uint32_t in_u4 = static_cast(test_values[i]); ++ uint16_t in_u2 = static_cast(test_values[i]); ++ ++ t.s8 = test_values[i]; ++ t.s4 = static_cast(in_s4); ++ t.s2 = static_cast(in_s2); ++ t.u4 = static_cast(in_u4); ++ t.u2 = static_cast(in_u2); ++ ++ f.Call(&t, 0, 0, 0, 0); ++ ++ CHECK_EQ(ByteReverse(test_values[i]), t.s8); ++ CHECK_EQ(ByteReverse(in_s4), static_cast(t.s4)); ++ CHECK_EQ(ByteReverse(in_s2), static_cast(t.s2)); ++ CHECK_EQ(ByteReverse(in_u4), static_cast(t.u4)); ++ CHECK_EQ(ByteReverse(in_u2), static_cast(t.u2)); ++ } ++} ++ ++TEST(LoadConstants) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope handles(isolate); ++ ++ int64_t refConstants[64]; ++ int64_t result[64]; ++ ++ int64_t mask = 1; ++ for (int i = 0; i < 64; i++) { ++ refConstants[i] = ~(mask << i); ++ } ++ ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ __ or_(a4, a0, zero_reg); ++ for (int i = 0; i < 64; i++) { ++ // Load constant. ++ __ li(a5, Operand(refConstants[i])); ++ __ St_d(a5, MemOperand(a4, zero_reg)); ++ __ Add_d(a4, a4, Operand(kPointerSize)); ++ } ++ ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ ++ auto f = GeneratedCode::FromCode(*code); ++ (void)f.Call(reinterpret_cast(result), 0, 0, 0, 0); ++ // Check results. ++ for (int i = 0; i < 64; i++) { ++ CHECK(refConstants[i] == result[i]); ++ } ++} ++ ++TEST(LoadAddress) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope handles(isolate); ++ ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ Label to_jump, skip; ++ __ mov(a4, a0); ++ ++ __ Branch(&skip); ++ __ bind(&to_jump); ++ __ nop(); ++ __ nop(); ++ __ jirl(zero_reg, ra, 0); ++ __ bind(&skip); ++ __ li(a4, Operand(masm->jump_address(&to_jump)), ADDRESS_LOAD); ++ int check_size = masm->InstructionsGeneratedSince(&skip); ++ CHECK_EQ(3, check_size); ++ __ jirl(zero_reg, a4, 0); ++ __ stop(); ++ __ stop(); ++ __ stop(); ++ __ stop(); ++ __ stop(); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ ++ auto f = GeneratedCode::FromCode(*code); ++ (void)f.Call(0, 0, 0, 0, 0); ++ // Check results. ++} ++ ++TEST(jump_tables4) { ++ // Similar to test-assembler-mips jump_tables1, with extra test for branch ++ // trampoline required before emission of the dd table (where trampolines are ++ // blocked), and proper transition to long-branch mode. ++ // Regression test for v8:4294. ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ const int kNumCases = 512; ++ int values[kNumCases]; ++ isolate->random_number_generator()->NextBytes(values, sizeof(values)); ++ Label labels[kNumCases]; ++ Label near_start, end, done; ++ ++ __ Push(ra); ++ __ xor_(a2, a2, a2); ++ ++ __ Branch(&end); ++ __ bind(&near_start); ++ ++ for (int i = 0; i < 32768 - 256; ++i) { ++ __ Add_d(a2, a2, 1); ++ } ++ ++ __ GenerateSwitchTable(a0, kNumCases, ++ [&labels](size_t i) { return labels + i; }); ++ ++ for (int i = 0; i < kNumCases; ++i) { ++ __ bind(&labels[i]); ++ __ li(a2, values[i]); ++ __ Branch(&done); ++ } ++ ++ __ bind(&done); ++ __ Pop(ra); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ __ bind(&end); ++ __ Branch(&near_start); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++#ifdef OBJECT_PRINT ++ code->Print(std::cout); ++#endif ++ auto f = GeneratedCode::FromCode(*code); ++ for (int i = 0; i < kNumCases; ++i) { ++ int64_t res = reinterpret_cast(f.Call(i, 0, 0, 0, 0)); ++ ::printf("f(%d) = %" PRId64 "\n", i, res); ++ CHECK_EQ(values[i], res); ++ } ++} ++ ++TEST(jump_tables6) { ++ // Similar to test-assembler-mips jump_tables1, with extra test for branch ++ // trampoline required after emission of the dd table (where trampolines are ++ // blocked). This test checks if number of really generated instructions is ++ // greater than number of counted instructions from code, as we are expecting ++ // generation of trampoline in this case ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ const int kSwitchTableCases = 40; ++ ++ const int kMaxBranchOffset = (1 << (18 - 1)) - 1; ++ const int kTrampolineSlotsSize = Assembler::kTrampolineSlotsSize; ++ const int kSwitchTablePrologueSize = MacroAssembler::kSwitchTablePrologueSize; ++ ++ const int kMaxOffsetForTrampolineStart = ++ kMaxBranchOffset - 16 * kTrampolineSlotsSize; ++ const int kFillInstr = (kMaxOffsetForTrampolineStart / kInstrSize) - ++ (kSwitchTablePrologueSize + 2 * kSwitchTableCases) - ++ 20; ++ ++ int values[kSwitchTableCases]; ++ isolate->random_number_generator()->NextBytes(values, sizeof(values)); ++ Label labels[kSwitchTableCases]; ++ Label near_start, end, done; ++ ++ __ Push(ra); ++ __ xor_(a2, a2, a2); ++ ++ int offs1 = masm->pc_offset(); ++ int gen_insn = 0; ++ ++ __ Branch(&end); ++ gen_insn += 1; ++ __ bind(&near_start); ++ ++ for (int i = 0; i < kFillInstr; ++i) { ++ __ Add_d(a2, a2, 1); ++ } ++ gen_insn += kFillInstr; ++ ++ __ GenerateSwitchTable(a0, kSwitchTableCases, ++ [&labels](size_t i) { return labels + i; }); ++ gen_insn += (kSwitchTablePrologueSize + 2 * kSwitchTableCases); ++ ++ for (int i = 0; i < kSwitchTableCases; ++i) { ++ __ bind(&labels[i]); ++ __ li(a2, values[i]); ++ __ Branch(&done); ++ } ++ gen_insn += 3 * kSwitchTableCases; ++ ++ // If offset from here to first branch instr is greater than max allowed ++ // offset for trampoline ... ++ CHECK_LT(kMaxOffsetForTrampolineStart, masm->pc_offset() - offs1); ++ // ... number of generated instructions must be greater then "gen_insn", ++ // as we are expecting trampoline generation ++ CHECK_LT(gen_insn, (masm->pc_offset() - offs1) / kInstrSize); ++ ++ __ bind(&done); ++ __ Pop(ra); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ __ bind(&end); ++ __ Branch(&near_start); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++#ifdef OBJECT_PRINT ++ code->Print(std::cout); ++#endif ++ auto f = GeneratedCode::FromCode(*code); ++ for (int i = 0; i < kSwitchTableCases; ++i) { ++ int64_t res = reinterpret_cast(f.Call(i, 0, 0, 0, 0)); ++ ::printf("f(%d) = %" PRId64 "\n", i, res); ++ CHECK_EQ(values[i], res); ++ } ++} ++ ++static uint64_t run_alsl_w(uint32_t rj, uint32_t rk, int8_t sa) { ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ __ Alsl_w(a2, a0, a1, sa); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assembler.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ ++ auto f = GeneratedCode::FromCode(*code); ++ ++ uint64_t res = reinterpret_cast(f.Call(rj, rk, 0, 0, 0)); ++ ++ return res; ++} ++ ++TEST(ALSL_W) { ++ CcTest::InitializeVM(); ++ struct TestCaseAlsl { ++ int32_t rj; ++ int32_t rk; ++ uint8_t sa; ++ uint64_t expected_res; ++ }; ++ // clang-format off ++ struct TestCaseAlsl tc[] = {// rj, rk, sa, expected_res ++ {0x1, 0x4, 1, 0x6}, ++ {0x1, 0x4, 2, 0x8}, ++ {0x1, 0x4, 3, 0xC}, ++ {0x1, 0x4, 4, 0x14}, ++ {0x1, 0x4, 5, 0x24}, ++ {0x1, 0x0, 1, 0x2}, ++ {0x1, 0x0, 2, 0x4}, ++ {0x1, 0x0, 3, 0x8}, ++ {0x1, 0x0, 4, 0x10}, ++ {0x1, 0x0, 5, 0x20}, ++ {0x0, 0x4, 1, 0x4}, ++ {0x0, 0x4, 2, 0x4}, ++ {0x0, 0x4, 3, 0x4}, ++ {0x0, 0x4, 4, 0x4}, ++ {0x0, 0x4, 5, 0x4}, ++ ++ // Shift overflow. ++ {INT32_MAX, 0x4, 1, 0x2}, ++ {INT32_MAX >> 1, 0x4, 2, 0x0}, ++ {INT32_MAX >> 2, 0x4, 3, 0xFFFFFFFFFFFFFFFC}, ++ {INT32_MAX >> 3, 0x4, 4, 0xFFFFFFFFFFFFFFF4}, ++ {INT32_MAX >> 4, 0x4, 5, 0xFFFFFFFFFFFFFFE4}, ++ ++ // Signed addition overflow. ++ {0x1, INT32_MAX - 1, 1, 0xFFFFFFFF80000000}, ++ {0x1, INT32_MAX - 3, 2, 0xFFFFFFFF80000000}, ++ {0x1, INT32_MAX - 7, 3, 0xFFFFFFFF80000000}, ++ {0x1, INT32_MAX - 15, 4, 0xFFFFFFFF80000000}, ++ {0x1, INT32_MAX - 31, 5, 0xFFFFFFFF80000000}, ++ ++ // Addition overflow. ++ {0x1, -2, 1, 0x0}, ++ {0x1, -4, 2, 0x0}, ++ {0x1, -8, 3, 0x0}, ++ {0x1, -16, 4, 0x0}, ++ {0x1, -32, 5, 0x0}}; ++ // clang-format on ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseAlsl); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ uint64_t res = run_alsl_w(tc[i].rj, tc[i].rk, tc[i].sa); ++ PrintF("0x%" PRIx64 " =? 0x%" PRIx64 " == Alsl_w(a0, %x, %x, %hhu)\n", ++ tc[i].expected_res, res, tc[i].rj, tc[i].rk, tc[i].sa); ++ CHECK_EQ(tc[i].expected_res, res); ++ } ++} ++ ++static uint64_t run_alsl_d(uint64_t rj, uint64_t rk, int8_t sa) { ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ __ Alsl_d(a2, a0, a1, sa); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assembler.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ ++ auto f = GeneratedCode::FromCode(*code); ++ ++ uint64_t res = reinterpret_cast(f.Call(rj, rk, 0, 0, 0)); ++ ++ return res; ++} ++ ++TEST(ALSL_D) { ++ CcTest::InitializeVM(); ++ struct TestCaseAlsl { ++ int64_t rj; ++ int64_t rk; ++ uint8_t sa; ++ uint64_t expected_res; ++ }; ++ // clang-format off ++ struct TestCaseAlsl tc[] = {// rj, rk, sa, expected_res ++ {0x1, 0x4, 1, 0x6}, ++ {0x1, 0x4, 2, 0x8}, ++ {0x1, 0x4, 3, 0xC}, ++ {0x1, 0x4, 4, 0x14}, ++ {0x1, 0x4, 5, 0x24}, ++ {0x1, 0x0, 1, 0x2}, ++ {0x1, 0x0, 2, 0x4}, ++ {0x1, 0x0, 3, 0x8}, ++ {0x1, 0x0, 4, 0x10}, ++ {0x1, 0x0, 5, 0x20}, ++ {0x0, 0x4, 1, 0x4}, ++ {0x0, 0x4, 2, 0x4}, ++ {0x0, 0x4, 3, 0x4}, ++ {0x0, 0x4, 4, 0x4}, ++ {0x0, 0x4, 5, 0x4}, ++ ++ // Shift overflow. ++ {INT64_MAX, 0x4, 1, 0x2}, ++ {INT64_MAX >> 1, 0x4, 2, 0x0}, ++ {INT64_MAX >> 2, 0x4, 3, 0xFFFFFFFFFFFFFFFC}, ++ {INT64_MAX >> 3, 0x4, 4, 0xFFFFFFFFFFFFFFF4}, ++ {INT64_MAX >> 4, 0x4, 5, 0xFFFFFFFFFFFFFFE4}, ++ ++ // Signed addition overflow. ++ {0x1, INT64_MAX - 1, 1, 0x8000000000000000}, ++ {0x1, INT64_MAX - 3, 2, 0x8000000000000000}, ++ {0x1, INT64_MAX - 7, 3, 0x8000000000000000}, ++ {0x1, INT64_MAX - 15, 4, 0x8000000000000000}, ++ {0x1, INT64_MAX - 31, 5, 0x8000000000000000}, ++ ++ // Addition overflow. ++ {0x1, -2, 1, 0x0}, ++ {0x1, -4, 2, 0x0}, ++ {0x1, -8, 3, 0x0}, ++ {0x1, -16, 4, 0x0}, ++ {0x1, -32, 5, 0x0}}; ++ // clang-format on ++ ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseAlsl); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ uint64_t res = run_alsl_d(tc[i].rj, tc[i].rk, tc[i].sa); ++ PrintF("0x%" PRIx64 " =? 0x%" PRIx64 " == Dlsa(v0, %" PRIx64 ", %" PRIx64 ++ ", %hhu)\n", ++ tc[i].expected_res, res, tc[i].rj, tc[i].rk, tc[i].sa); ++ CHECK_EQ(tc[i].expected_res, res); ++ } ++} ++// clang-format off ++static const std::vector ffint_ftintrz_uint32_test_values() { ++ static const uint32_t kValues[] = {0x00000000, 0x00000001, 0x00FFFF00, ++ 0x7FFFFFFF, 0x80000000, 0x80000001, ++ 0x80FFFF00, 0x8FFFFFFF, 0xFFFFFFFF}; ++ return std::vector(&kValues[0], &kValues[arraysize(kValues)]); ++} ++ ++static const std::vector ffint_ftintrz_int32_test_values() { ++ static const int32_t kValues[] = { ++ static_cast(0x00000000), static_cast(0x00000001), ++ static_cast(0x00FFFF00), static_cast(0x7FFFFFFF), ++ static_cast(0x80000000), static_cast(0x80000001), ++ static_cast(0x80FFFF00), static_cast(0x8FFFFFFF), ++ static_cast(0xFFFFFFFF)}; ++ return std::vector(&kValues[0], &kValues[arraysize(kValues)]); ++} ++ ++static const std::vector ffint_ftintrz_uint64_test_values() { ++ static const uint64_t kValues[] = { ++ 0x0000000000000000, 0x0000000000000001, 0x0000FFFFFFFF0000, ++ 0x7FFFFFFFFFFFFFFF, 0x8000000000000000, 0x8000000000000001, ++ 0x8000FFFFFFFF0000, 0x8FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}; ++ return std::vector(&kValues[0], &kValues[arraysize(kValues)]); ++} ++ ++static const std::vector ffint_ftintrz_int64_test_values() { ++ static const int64_t kValues[] = {static_cast(0x0000000000000000), ++ static_cast(0x0000000000000001), ++ static_cast(0x0000FFFFFFFF0000), ++ static_cast(0x7FFFFFFFFFFFFFFF), ++ static_cast(0x8000000000000000), ++ static_cast(0x8000000000000001), ++ static_cast(0x8000FFFFFFFF0000), ++ static_cast(0x8FFFFFFFFFFFFFFF), ++ static_cast(0xFFFFFFFFFFFFFFFF)}; ++ return std::vector(&kValues[0], &kValues[arraysize(kValues)]); ++} ++ // clang-off on ++ ++// Helper macros that can be used in FOR_INT32_INPUTS(i) { ... *i ... } ++#define FOR_INPUTS(ctype, itype, var, test_vector) \ ++ std::vector var##_vec = test_vector(); \ ++ for (std::vector::iterator var = var##_vec.begin(); \ ++ var != var##_vec.end(); ++var) ++ ++#define FOR_INPUTS2(ctype, itype, var, var2, test_vector) \ ++ std::vector var##_vec = test_vector(); \ ++ std::vector::iterator var; \ ++ std::vector::reverse_iterator var2; \ ++ for (var = var##_vec.begin(), var2 = var##_vec.rbegin(); \ ++ var != var##_vec.end(); ++var, ++var2) ++ ++#define FOR_ENUM_INPUTS(var, type, test_vector) \ ++ FOR_INPUTS(enum type, type, var, test_vector) ++#define FOR_STRUCT_INPUTS(var, type, test_vector) \ ++ FOR_INPUTS(struct type, type, var, test_vector) ++#define FOR_INT32_INPUTS(var, test_vector) \ ++ FOR_INPUTS(int32_t, int32, var, test_vector) ++#define FOR_INT32_INPUTS2(var, var2, test_vector) \ ++ FOR_INPUTS2(int32_t, int32, var, var2, test_vector) ++#define FOR_INT64_INPUTS(var, test_vector) \ ++ FOR_INPUTS(int64_t, int64, var, test_vector) ++#define FOR_UINT32_INPUTS(var, test_vector) \ ++ FOR_INPUTS(uint32_t, uint32, var, test_vector) ++#define FOR_UINT64_INPUTS(var, test_vector) \ ++ FOR_INPUTS(uint64_t, uint64, var, test_vector) ++ ++template ++RET_TYPE run_CVT(IN_TYPE x, Func GenerateConvertInstructionFunc) { ++ using F_CVT = RET_TYPE(IN_TYPE x0, int x1, int x2, int x3, int x4); ++ ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assm; ++ ++ GenerateConvertInstructionFunc(masm); ++ __ movfr2gr_d(a2, f9); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ ++ auto f = GeneratedCode::FromCode(*code); ++ ++ return reinterpret_cast(f.Call(x, 0, 0, 0, 0)); ++} ++ ++TEST(Ffint_s_uw_Ftintrz_uw_s) { ++ CcTest::InitializeVM(); ++ FOR_UINT32_INPUTS(i, ffint_ftintrz_uint32_test_values) { ++ ++ uint32_t input = *i; ++ auto fn = [](MacroAssembler* masm) { ++ __ Ffint_s_uw(f8, a0); ++ __ movgr2frh_w(f9, zero_reg); ++ __ Ftintrz_uw_s(f9, f8, f10); ++ }; ++ CHECK_EQ(static_cast(input), run_CVT(input, fn)); ++ } ++} ++ ++TEST(Ffint_s_ul_Ftintrz_ul_s) { ++ CcTest::InitializeVM(); ++ FOR_UINT64_INPUTS(i, ffint_ftintrz_uint64_test_values) { ++ uint64_t input = *i; ++ auto fn = [](MacroAssembler* masm) { ++ __ Ffint_s_ul(f8, a0); ++ __ Ftintrz_ul_s(f9, f8, f10, a2); ++ }; ++ CHECK_EQ(static_cast(input), run_CVT(input, fn)); ++ } ++} ++ ++TEST(Ffint_d_uw_Ftintrz_uw_d) { ++ CcTest::InitializeVM(); ++ FOR_UINT64_INPUTS(i, ffint_ftintrz_uint64_test_values) { ++ uint32_t input = *i; ++ auto fn = [](MacroAssembler* masm) { ++ __ Ffint_d_uw(f8, a0); ++ __ movgr2frh_w(f9, zero_reg); ++ __ Ftintrz_uw_d(f9, f8, f10); ++ }; ++ CHECK_EQ(static_cast(input), run_CVT(input, fn)); ++ } ++} ++ ++TEST(Ffint_d_ul_Ftintrz_ul_d) { ++ CcTest::InitializeVM(); ++ FOR_UINT64_INPUTS(i, ffint_ftintrz_uint64_test_values) { ++ uint64_t input = *i; ++ auto fn = [](MacroAssembler* masm) { ++ __ Ffint_d_ul(f8, a0); ++ __ Ftintrz_ul_d(f9, f8, f10, a2); ++ }; ++ CHECK_EQ(static_cast(input), run_CVT(input, fn)); ++ } ++} ++ ++TEST(Ffint_d_l_Ftintrz_l_ud) { ++ CcTest::InitializeVM(); ++ FOR_INT64_INPUTS(i, ffint_ftintrz_int64_test_values) { ++ int64_t input = *i; ++ uint64_t abs_input = (input < 0) ? -input : input; ++ auto fn = [](MacroAssembler* masm) { ++ __ movgr2fr_d(f8, a0); ++ __ ffint_d_l(f10, f8); ++ __ Ftintrz_l_ud(f9, f10, f11); ++ }; ++ CHECK_EQ(static_cast(abs_input), run_CVT(input, fn)); ++ } ++} ++ ++TEST(ffint_d_l_Ftint_l_d) { ++ CcTest::InitializeVM(); ++ FOR_INT64_INPUTS(i, ffint_ftintrz_int64_test_values) { ++ int64_t input = *i; ++ auto fn = [](MacroAssembler* masm) { ++ __ movgr2fr_d(f8, a0); ++ __ ffint_d_l(f10, f8); ++ __ Ftintrz_l_d(f9, f10); ++ }; ++ CHECK_EQ(static_cast(input), run_CVT(input, fn)); ++ } ++} ++ ++TEST(ffint_d_w_Ftint_w_d) { ++ CcTest::InitializeVM(); ++ FOR_INT32_INPUTS(i, ffint_ftintrz_int32_test_values) { ++ int32_t input = *i; ++ auto fn = [](MacroAssembler* masm) { ++ __ movgr2fr_w(f8, a0); ++ __ ffint_d_w(f10, f8); ++ __ Ftintrz_w_d(f9, f10); ++ __ movfr2gr_s(a4, f9); ++ __ movgr2fr_d(f9, a4); ++ }; ++ CHECK_EQ(static_cast(input), run_CVT(input, fn)); ++ } ++} ++ ++ ++static const std::vector overflow_int64_test_values() { ++ // clang-format off ++ static const int64_t kValues[] = {static_cast(0xF000000000000000), ++ static_cast(0x0000000000000001), ++ static_cast(0xFF00000000000000), ++ static_cast(0x0000F00111111110), ++ static_cast(0x0F00001000000000), ++ static_cast(0x991234AB12A96731), ++ static_cast(0xB0FFFF0F0F0F0F01), ++ static_cast(0x00006FFFFFFFFFFF), ++ static_cast(0xFFFFFFFFFFFFFFFF)}; ++ // clang-format on ++ return std::vector(&kValues[0], &kValues[arraysize(kValues)]); ++} ++ ++TEST(OverflowInstructions) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope handles(isolate); ++ ++ struct T { ++ int64_t lhs; ++ int64_t rhs; ++ int64_t output_add1; ++ int64_t output_add2; ++ int64_t output_sub1; ++ int64_t output_sub2; ++ int64_t output_mul1; ++ int64_t output_mul2; ++ int64_t overflow_add1; ++ int64_t overflow_add2; ++ int64_t overflow_sub1; ++ int64_t overflow_sub2; ++ int64_t overflow_mul1; ++ int64_t overflow_mul2; ++ }; ++ T t; ++ ++ FOR_INT64_INPUTS(i, overflow_int64_test_values) { ++ FOR_INT64_INPUTS(j, overflow_int64_test_values) { ++ int64_t ii = *i; ++ int64_t jj = *j; ++ int64_t expected_add, expected_sub; ++ int32_t ii32 = static_cast(ii); ++ int32_t jj32 = static_cast(jj); ++ int32_t expected_mul; ++ int64_t expected_add_ovf, expected_sub_ovf, expected_mul_ovf; ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ __ ld_d(t0, a0, offsetof(T, lhs)); ++ __ ld_d(t1, a0, offsetof(T, rhs)); ++ ++ __ AdddOverflow(t2, t0, Operand(t1), t3); ++ __ st_d(t2, a0, offsetof(T, output_add1)); ++ __ st_d(t3, a0, offsetof(T, overflow_add1)); ++ __ or_(t3, zero_reg, zero_reg); ++ __ AdddOverflow(t0, t0, Operand(t1), t3); ++ __ st_d(t0, a0, offsetof(T, output_add2)); ++ __ st_d(t3, a0, offsetof(T, overflow_add2)); ++ ++ __ ld_d(t0, a0, offsetof(T, lhs)); ++ __ ld_d(t1, a0, offsetof(T, rhs)); ++ ++ __ SubdOverflow(t2, t0, Operand(t1), t3); ++ __ st_d(t2, a0, offsetof(T, output_sub1)); ++ __ st_d(t3, a0, offsetof(T, overflow_sub1)); ++ __ or_(t3, zero_reg, zero_reg); ++ __ SubdOverflow(t0, t0, Operand(t1), t3); ++ __ st_d(t0, a0, offsetof(T, output_sub2)); ++ __ st_d(t3, a0, offsetof(T, overflow_sub2)); ++ ++ __ ld_d(t0, a0, offsetof(T, lhs)); ++ __ ld_d(t1, a0, offsetof(T, rhs)); ++ __ slli_w(t0, t0, 0); ++ __ slli_w(t1, t1, 0); ++ ++ __ MulOverflow(t2, t0, Operand(t1), t3); ++ __ st_d(t2, a0, offsetof(T, output_mul1)); ++ __ st_d(t3, a0, offsetof(T, overflow_mul1)); ++ __ or_(t3, zero_reg, zero_reg); ++ __ MulOverflow(t0, t0, Operand(t1), t3); ++ __ st_d(t0, a0, offsetof(T, output_mul2)); ++ __ st_d(t3, a0, offsetof(T, overflow_mul2)); ++ ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = ++ Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ t.lhs = ii; ++ t.rhs = jj; ++ f.Call(&t, 0, 0, 0, 0); ++ ++ expected_add_ovf = base::bits::SignedAddOverflow64(ii, jj, &expected_add); ++ expected_sub_ovf = base::bits::SignedSubOverflow64(ii, jj, &expected_sub); ++ expected_mul_ovf = ++ base::bits::SignedMulOverflow32(ii32, jj32, &expected_mul); ++ ++ CHECK_EQ(expected_add_ovf, t.overflow_add1 < 0); ++ CHECK_EQ(expected_sub_ovf, t.overflow_sub1 < 0); ++ CHECK_EQ(expected_mul_ovf, t.overflow_mul1 != 0); ++ ++ CHECK_EQ(t.overflow_add1, t.overflow_add2); ++ CHECK_EQ(t.overflow_sub1, t.overflow_sub2); ++ CHECK_EQ(t.overflow_mul1, t.overflow_mul2); ++ ++ CHECK_EQ(expected_add, t.output_add1); ++ CHECK_EQ(expected_add, t.output_add2); ++ CHECK_EQ(expected_sub, t.output_sub1); ++ CHECK_EQ(expected_sub, t.output_sub2); ++ if (!expected_mul_ovf) { ++ CHECK_EQ(expected_mul, t.output_mul1); ++ CHECK_EQ(expected_mul, t.output_mul2); ++ } ++ } ++ } ++} ++ ++TEST(min_max_nan) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ struct TestFloat { ++ double a; ++ double b; ++ double c; ++ double d; ++ float e; ++ float f; ++ float g; ++ float h; ++ }; ++ ++ TestFloat test; ++ const double dnan = std::numeric_limits::quiet_NaN(); ++ const double dinf = std::numeric_limits::infinity(); ++ const double dminf = -std::numeric_limits::infinity(); ++ const float fnan = std::numeric_limits::quiet_NaN(); ++ const float finf = std::numeric_limits::infinity(); ++ const float fminf = -std::numeric_limits::infinity(); ++ const int kTableLength = 13; ++ ++ // clang-format off ++ double inputsa[kTableLength] = {dnan, 3.0, -0.0, 0.0, 42.0, dinf, dminf, ++ dinf, dnan, 3.0, dinf, dnan, dnan}; ++ double inputsb[kTableLength] = {dnan, 2.0, 0.0, -0.0, dinf, 42.0, dinf, ++ dminf, 3.0, dnan, dnan, dinf, dnan}; ++ double outputsdmin[kTableLength] = {dnan, 2.0, -0.0, -0.0, 42.0, ++ 42.0, dminf, dminf, dnan, dnan, ++ dnan, dnan, dnan}; ++ double outputsdmax[kTableLength] = {dnan, 3.0, 0.0, 0.0, dinf, dinf, dinf, ++ dinf, dnan, dnan, dnan, dnan, dnan}; ++ ++ float inputse[kTableLength] = {2.0, 3.0, -0.0, 0.0, 42.0, finf, fminf, ++ finf, fnan, 3.0, finf, fnan, fnan}; ++ float inputsf[kTableLength] = {3.0, 2.0, 0.0, -0.0, finf, 42.0, finf, ++ fminf, 3.0, fnan, fnan, finf, fnan}; ++ float outputsfmin[kTableLength] = {2.0, 2.0, -0.0, -0.0, 42.0, 42.0, fminf, ++ fminf, fnan, fnan, fnan, fnan, fnan}; ++ float outputsfmax[kTableLength] = {3.0, 3.0, 0.0, 0.0, finf, finf, finf, ++ finf, fnan, fnan, fnan, fnan, fnan}; ++ ++ // clang-format on ++ auto handle_dnan = [masm](FPURegister dst, Label* nan, Label* back) { ++ __ bind(nan); ++ __ LoadRoot(t8, RootIndex::kNanValue); ++ __ Fld_d(dst, FieldMemOperand(t8, HeapNumber::kValueOffset)); ++ __ Branch(back); ++ }; ++ ++ auto handle_snan = [masm, fnan](FPURegister dst, Label* nan, Label* back) { ++ __ bind(nan); ++ __ Move(dst, fnan); ++ __ Branch(back); ++ }; ++ ++ Label handle_mind_nan, handle_maxd_nan, handle_mins_nan, handle_maxs_nan; ++ Label back_mind_nan, back_maxd_nan, back_mins_nan, back_maxs_nan; ++ ++ __ push(s6); ++ __ InitializeRootRegister(); ++ __ Fld_d(f8, MemOperand(a0, offsetof(TestFloat, a))); ++ __ Fld_d(f9, MemOperand(a0, offsetof(TestFloat, b))); ++ __ Fld_s(f10, MemOperand(a0, offsetof(TestFloat, e))); ++ __ Fld_s(f11, MemOperand(a0, offsetof(TestFloat, f))); ++ __ Float64Min(f12, f8, f9, &handle_mind_nan); ++ __ bind(&back_mind_nan); ++ __ Float64Max(f13, f8, f9, &handle_maxd_nan); ++ __ bind(&back_maxd_nan); ++ __ Float32Min(f14, f10, f11, &handle_mins_nan); ++ __ bind(&back_mins_nan); ++ __ Float32Max(f15, f10, f11, &handle_maxs_nan); ++ __ bind(&back_maxs_nan); ++ __ Fst_d(f12, MemOperand(a0, offsetof(TestFloat, c))); ++ __ Fst_d(f13, MemOperand(a0, offsetof(TestFloat, d))); ++ __ Fst_s(f14, MemOperand(a0, offsetof(TestFloat, g))); ++ __ Fst_s(f15, MemOperand(a0, offsetof(TestFloat, h))); ++ __ pop(s6); ++ __ jirl(zero_reg, ra, 0); ++ ++ handle_dnan(f12, &handle_mind_nan, &back_mind_nan); ++ handle_dnan(f13, &handle_maxd_nan, &back_maxd_nan); ++ handle_snan(f14, &handle_mins_nan, &back_mins_nan); ++ handle_snan(f15, &handle_maxs_nan, &back_maxs_nan); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ for (int i = 0; i < kTableLength; i++) { ++ test.a = inputsa[i]; ++ test.b = inputsb[i]; ++ test.e = inputse[i]; ++ test.f = inputsf[i]; ++ ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(0, memcmp(&test.c, &outputsdmin[i], sizeof(test.c))); ++ CHECK_EQ(0, memcmp(&test.d, &outputsdmax[i], sizeof(test.d))); ++ CHECK_EQ(0, memcmp(&test.g, &outputsfmin[i], sizeof(test.g))); ++ CHECK_EQ(0, memcmp(&test.h, &outputsfmax[i], sizeof(test.h))); ++ } ++} ++ ++template ++bool run_Unaligned(char* memory_buffer, int32_t in_offset, int32_t out_offset, ++ IN_TYPE value, Func GenerateUnalignedInstructionFunc) { ++ using F_CVT = int32_t(char* x0, int x1, int x2, int x3, int x4); ++ ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assm; ++ IN_TYPE res; ++ ++ GenerateUnalignedInstructionFunc(masm, in_offset, out_offset); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ ++ auto f = GeneratedCode::FromCode(*code); ++ ++ MemCopy(memory_buffer + in_offset, &value, sizeof(IN_TYPE)); ++ f.Call(memory_buffer, 0, 0, 0, 0); ++ MemCopy(&res, memory_buffer + out_offset, sizeof(IN_TYPE)); ++ ++ return res == value; ++} ++ ++static const std::vector unsigned_test_values() { ++ // clang-format off ++ static const uint64_t kValues[] = { ++ 0x2180F18A06384414, 0x000A714532102277, 0xBC1ACCCF180649F0, ++ 0x8000000080008000, 0x0000000000000001, 0xFFFFFFFFFFFFFFFF, ++ }; ++ // clang-format on ++ return std::vector(&kValues[0], &kValues[arraysize(kValues)]); ++} ++ ++static const std::vector unsigned_test_offset() { ++ static const int32_t kValues[] = {// value, offset ++ -132 * KB, -21 * KB, 0, 19 * KB, 135 * KB}; ++ return std::vector(&kValues[0], &kValues[arraysize(kValues)]); ++} ++ ++static const std::vector unsigned_test_offset_increment() { ++ static const int32_t kValues[] = {-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5}; ++ return std::vector(&kValues[0], &kValues[arraysize(kValues)]); ++} ++ ++TEST(Ld_b) { ++ CcTest::InitializeVM(); ++ ++ static const int kBufferSize = 300 * KB; ++ char memory_buffer[kBufferSize]; ++ char* buffer_middle = memory_buffer + (kBufferSize / 2); ++ ++ FOR_UINT64_INPUTS(i, unsigned_test_values) { ++ FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) { ++ FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) { ++ uint16_t value = static_cast(*i & 0xFFFF); ++ int32_t in_offset = *j1 + *k1; ++ int32_t out_offset = *j2 + *k2; ++ ++ auto fn_1 = [](MacroAssembler* masm, int32_t in_offset, ++ int32_t out_offset) { ++ __ Ld_b(a2, MemOperand(a0, in_offset)); ++ __ St_b(a2, MemOperand(a0, out_offset)); ++ __ or_(a0, a2, zero_reg); ++ }; ++ CHECK_EQ(true, run_Unaligned(buffer_middle, in_offset, ++ out_offset, value, fn_1)); ++ ++ auto fn_2 = [](MacroAssembler* masm, int32_t in_offset, ++ int32_t out_offset) { ++ __ mov(t0, a0); ++ __ Ld_b(a0, MemOperand(a0, in_offset)); ++ __ St_b(a0, MemOperand(t0, out_offset)); ++ __ or_(a0, a2, zero_reg); ++ }; ++ CHECK_EQ(true, run_Unaligned(buffer_middle, in_offset, ++ out_offset, value, fn_2)); ++ ++ auto fn_3 = [](MacroAssembler* masm, int32_t in_offset, ++ int32_t out_offset) { ++ __ mov(t0, a0); ++ __ Ld_bu(a0, MemOperand(a0, in_offset)); ++ __ St_b(a0, MemOperand(t0, out_offset)); ++ __ or_(a0, a2, zero_reg); ++ }; ++ CHECK_EQ(true, run_Unaligned(buffer_middle, in_offset, ++ out_offset, value, fn_3)); ++ ++ auto fn_4 = [](MacroAssembler* masm, int32_t in_offset, ++ int32_t out_offset) { ++ __ Ld_bu(a2, MemOperand(a0, in_offset)); ++ __ St_b(a2, MemOperand(a0, out_offset)); ++ __ or_(a0, a2, zero_reg); ++ }; ++ CHECK_EQ(true, run_Unaligned(buffer_middle, in_offset, ++ out_offset, value, fn_4)); ++ } ++ } ++ } ++} ++ ++TEST(Ld_b_bitextension) { ++ CcTest::InitializeVM(); ++ ++ static const int kBufferSize = 300 * KB; ++ char memory_buffer[kBufferSize]; ++ char* buffer_middle = memory_buffer + (kBufferSize / 2); ++ ++ FOR_UINT64_INPUTS(i, unsigned_test_values) { ++ FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) { ++ FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) { ++ uint16_t value = static_cast(*i & 0xFFFF); ++ int32_t in_offset = *j1 + *k1; ++ int32_t out_offset = *j2 + *k2; ++ ++ auto fn = [](MacroAssembler* masm, int32_t in_offset, ++ int32_t out_offset) { ++ Label success, fail, end, different; ++ __ Ld_b(t0, MemOperand(a0, in_offset)); ++ __ Ld_bu(t1, MemOperand(a0, in_offset)); ++ __ Branch(&different, ne, t0, Operand(t1)); ++ ++ // If signed and unsigned values are same, check ++ // the upper bits to see if they are zero ++ __ srai_w(t0, t0, 7); ++ __ Branch(&success, eq, t0, Operand(zero_reg)); ++ __ Branch(&fail); ++ ++ // If signed and unsigned values are different, ++ // check that the upper bits are complementary ++ __ bind(&different); ++ __ srai_w(t1, t1, 7); ++ __ Branch(&fail, ne, t1, Operand(1)); ++ __ srai_w(t0, t0, 7); ++ __ addi_d(t0, t0, 1); ++ __ Branch(&fail, ne, t0, Operand(zero_reg)); ++ // Fall through to success ++ ++ __ bind(&success); ++ __ Ld_b(t0, MemOperand(a0, in_offset)); ++ __ St_b(t0, MemOperand(a0, out_offset)); ++ __ Branch(&end); ++ __ bind(&fail); ++ __ St_b(zero_reg, MemOperand(a0, out_offset)); ++ __ bind(&end); ++ __ or_(a0, a2, zero_reg); ++ }; ++ CHECK_EQ(true, run_Unaligned(buffer_middle, in_offset, ++ out_offset, value, fn)); ++ } ++ } ++ } ++} ++ ++TEST(Ld_h) { ++ CcTest::InitializeVM(); ++ ++ static const int kBufferSize = 300 * KB; ++ char memory_buffer[kBufferSize]; ++ char* buffer_middle = memory_buffer + (kBufferSize / 2); ++ ++ FOR_UINT64_INPUTS(i, unsigned_test_values) { ++ FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) { ++ FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) { ++ uint16_t value = static_cast(*i & 0xFFFF); ++ int32_t in_offset = *j1 + *k1; ++ int32_t out_offset = *j2 + *k2; ++ ++ auto fn_1 = [](MacroAssembler* masm, int32_t in_offset, ++ int32_t out_offset) { ++ __ Ld_h(a2, MemOperand(a0, in_offset)); ++ __ St_h(a2, MemOperand(a0, out_offset)); ++ __ or_(a0, a2, zero_reg); ++ }; ++ CHECK_EQ(true, run_Unaligned(buffer_middle, in_offset, ++ out_offset, value, fn_1)); ++ ++ auto fn_2 = [](MacroAssembler* masm, int32_t in_offset, ++ int32_t out_offset) { ++ __ mov(t0, a0); ++ __ Ld_h(a0, MemOperand(a0, in_offset)); ++ __ St_h(a0, MemOperand(t0, out_offset)); ++ __ or_(a0, a2, zero_reg); ++ }; ++ CHECK_EQ(true, run_Unaligned(buffer_middle, in_offset, ++ out_offset, value, fn_2)); ++ ++ auto fn_3 = [](MacroAssembler* masm, int32_t in_offset, ++ int32_t out_offset) { ++ __ mov(t0, a0); ++ __ Ld_hu(a0, MemOperand(a0, in_offset)); ++ __ St_h(a0, MemOperand(t0, out_offset)); ++ __ or_(a0, a2, zero_reg); ++ }; ++ CHECK_EQ(true, run_Unaligned(buffer_middle, in_offset, ++ out_offset, value, fn_3)); ++ ++ auto fn_4 = [](MacroAssembler* masm, int32_t in_offset, ++ int32_t out_offset) { ++ __ Ld_hu(a2, MemOperand(a0, in_offset)); ++ __ St_h(a2, MemOperand(a0, out_offset)); ++ __ or_(a0, a2, zero_reg); ++ }; ++ CHECK_EQ(true, run_Unaligned(buffer_middle, in_offset, ++ out_offset, value, fn_4)); ++ } ++ } ++ } ++} ++ ++TEST(Ld_h_bitextension) { ++ CcTest::InitializeVM(); ++ ++ static const int kBufferSize = 300 * KB; ++ char memory_buffer[kBufferSize]; ++ char* buffer_middle = memory_buffer + (kBufferSize / 2); ++ ++ FOR_UINT64_INPUTS(i, unsigned_test_values) { ++ FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) { ++ FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) { ++ uint16_t value = static_cast(*i & 0xFFFF); ++ int32_t in_offset = *j1 + *k1; ++ int32_t out_offset = *j2 + *k2; ++ ++ auto fn = [](MacroAssembler* masm, int32_t in_offset, ++ int32_t out_offset) { ++ Label success, fail, end, different; ++ __ Ld_h(t0, MemOperand(a0, in_offset)); ++ __ Ld_hu(t1, MemOperand(a0, in_offset)); ++ __ Branch(&different, ne, t0, Operand(t1)); ++ ++ // If signed and unsigned values are same, check ++ // the upper bits to see if they are zero ++ __ srai_w(t0, t0, 15); ++ __ Branch(&success, eq, t0, Operand(zero_reg)); ++ __ Branch(&fail); ++ ++ // If signed and unsigned values are different, ++ // check that the upper bits are complementary ++ __ bind(&different); ++ __ srai_w(t1, t1, 15); ++ __ Branch(&fail, ne, t1, Operand(1)); ++ __ srai_w(t0, t0, 15); ++ __ addi_d(t0, t0, 1); ++ __ Branch(&fail, ne, t0, Operand(zero_reg)); ++ // Fall through to success ++ ++ __ bind(&success); ++ __ Ld_h(t0, MemOperand(a0, in_offset)); ++ __ St_h(t0, MemOperand(a0, out_offset)); ++ __ Branch(&end); ++ __ bind(&fail); ++ __ St_h(zero_reg, MemOperand(a0, out_offset)); ++ __ bind(&end); ++ __ or_(a0, a2, zero_reg); ++ }; ++ CHECK_EQ(true, run_Unaligned(buffer_middle, in_offset, ++ out_offset, value, fn)); ++ } ++ } ++ } ++} ++ ++TEST(Ld_w) { ++ CcTest::InitializeVM(); ++ ++ static const int kBufferSize = 300 * KB; ++ char memory_buffer[kBufferSize]; ++ char* buffer_middle = memory_buffer + (kBufferSize / 2); ++ ++ FOR_UINT64_INPUTS(i, unsigned_test_values) { ++ FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) { ++ FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) { ++ uint32_t value = static_cast(*i & 0xFFFFFFFF); ++ int32_t in_offset = *j1 + *k1; ++ int32_t out_offset = *j2 + *k2; ++ ++ auto fn_1 = [](MacroAssembler* masm, int32_t in_offset, ++ int32_t out_offset) { ++ __ Ld_w(a2, MemOperand(a0, in_offset)); ++ __ St_w(a2, MemOperand(a0, out_offset)); ++ __ or_(a0, a2, zero_reg); ++ }; ++ CHECK_EQ(true, run_Unaligned(buffer_middle, in_offset, ++ out_offset, value, fn_1)); ++ ++ auto fn_2 = [](MacroAssembler* masm, int32_t in_offset, ++ int32_t out_offset) { ++ __ mov(t0, a0); ++ __ Ld_w(a0, MemOperand(a0, in_offset)); ++ __ St_w(a0, MemOperand(t0, out_offset)); ++ __ or_(a0, a2, zero_reg); ++ }; ++ CHECK_EQ(true, ++ run_Unaligned(buffer_middle, in_offset, out_offset, ++ (uint32_t)value, fn_2)); ++ ++ auto fn_3 = [](MacroAssembler* masm, int32_t in_offset, ++ int32_t out_offset) { ++ __ Ld_wu(a2, MemOperand(a0, in_offset)); ++ __ St_w(a2, MemOperand(a0, out_offset)); ++ __ or_(a0, a2, zero_reg); ++ }; ++ CHECK_EQ(true, run_Unaligned(buffer_middle, in_offset, ++ out_offset, value, fn_3)); ++ ++ auto fn_4 = [](MacroAssembler* masm, int32_t in_offset, ++ int32_t out_offset) { ++ __ mov(t0, a0); ++ __ Ld_wu(a0, MemOperand(a0, in_offset)); ++ __ St_w(a0, MemOperand(t0, out_offset)); ++ __ or_(a0, a2, zero_reg); ++ }; ++ CHECK_EQ(true, ++ run_Unaligned(buffer_middle, in_offset, out_offset, ++ (uint32_t)value, fn_4)); ++ } ++ } ++ } ++} ++ ++TEST(Ld_w_extension) { ++ CcTest::InitializeVM(); ++ ++ static const int kBufferSize = 300 * KB; ++ char memory_buffer[kBufferSize]; ++ char* buffer_middle = memory_buffer + (kBufferSize / 2); ++ ++ FOR_UINT64_INPUTS(i, unsigned_test_values) { ++ FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) { ++ FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) { ++ uint32_t value = static_cast(*i & 0xFFFFFFFF); ++ int32_t in_offset = *j1 + *k1; ++ int32_t out_offset = *j2 + *k2; ++ ++ auto fn = [](MacroAssembler* masm, int32_t in_offset, ++ int32_t out_offset) { ++ Label success, fail, end, different; ++ __ Ld_w(t0, MemOperand(a0, in_offset)); ++ __ Ld_wu(t1, MemOperand(a0, in_offset)); ++ __ Branch(&different, ne, t0, Operand(t1)); ++ ++ // If signed and unsigned values are same, check ++ // the upper bits to see if they are zero ++ __ srai_d(t0, t0, 31); ++ __ Branch(&success, eq, t0, Operand(zero_reg)); ++ __ Branch(&fail); ++ ++ // If signed and unsigned values are different, ++ // check that the upper bits are complementary ++ __ bind(&different); ++ __ srai_d(t1, t1, 31); ++ __ Branch(&fail, ne, t1, Operand(1)); ++ __ srai_d(t0, t0, 31); ++ __ addi_d(t0, t0, 1); ++ __ Branch(&fail, ne, t0, Operand(zero_reg)); ++ // Fall through to success ++ ++ __ bind(&success); ++ __ Ld_w(t0, MemOperand(a0, in_offset)); ++ __ St_w(t0, MemOperand(a0, out_offset)); ++ __ Branch(&end); ++ __ bind(&fail); ++ __ St_w(zero_reg, MemOperand(a0, out_offset)); ++ __ bind(&end); ++ __ or_(a0, a2, zero_reg); ++ }; ++ CHECK_EQ(true, run_Unaligned(buffer_middle, in_offset, ++ out_offset, value, fn)); ++ } ++ } ++ } ++} ++ ++TEST(Ld_d) { ++ CcTest::InitializeVM(); ++ ++ static const int kBufferSize = 300 * KB; ++ char memory_buffer[kBufferSize]; ++ char* buffer_middle = memory_buffer + (kBufferSize / 2); ++ ++ FOR_UINT64_INPUTS(i, unsigned_test_values) { ++ FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) { ++ FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) { ++ uint64_t value = *i; ++ int32_t in_offset = *j1 + *k1; ++ int32_t out_offset = *j2 + *k2; ++ ++ auto fn_1 = [](MacroAssembler* masm, int32_t in_offset, ++ int32_t out_offset) { ++ __ Ld_d(a2, MemOperand(a0, in_offset)); ++ __ St_d(a2, MemOperand(a0, out_offset)); ++ __ or_(a0, a2, zero_reg); ++ }; ++ CHECK_EQ(true, run_Unaligned(buffer_middle, in_offset, ++ out_offset, value, fn_1)); ++ ++ auto fn_2 = [](MacroAssembler* masm, int32_t in_offset, ++ int32_t out_offset) { ++ __ mov(t0, a0); ++ __ Ld_d(a0, MemOperand(a0, in_offset)); ++ __ St_d(a0, MemOperand(t0, out_offset)); ++ __ or_(a0, a2, zero_reg); ++ }; ++ CHECK_EQ(true, ++ run_Unaligned(buffer_middle, in_offset, out_offset, ++ (uint32_t)value, fn_2)); ++ } ++ } ++ } ++} ++ ++TEST(Fld_s) { ++ CcTest::InitializeVM(); ++ ++ static const int kBufferSize = 300 * KB; ++ char memory_buffer[kBufferSize]; ++ char* buffer_middle = memory_buffer + (kBufferSize / 2); ++ ++ FOR_UINT64_INPUTS(i, unsigned_test_values) { ++ FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) { ++ FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) { ++ float value = static_cast(*i & 0xFFFFFFFF); ++ int32_t in_offset = *j1 + *k1; ++ int32_t out_offset = *j2 + *k2; ++ ++ auto fn = [](MacroAssembler* masm, int32_t in_offset, ++ int32_t out_offset) { ++ __ Fld_s(f0, MemOperand(a0, in_offset)); ++ __ Fst_s(f0, MemOperand(a0, out_offset)); ++ }; ++ CHECK_EQ(true, run_Unaligned(buffer_middle, in_offset, ++ out_offset, value, fn)); ++ } ++ } ++ } ++} ++ ++TEST(Fld_d) { ++ CcTest::InitializeVM(); ++ ++ static const int kBufferSize = 300 * KB; ++ char memory_buffer[kBufferSize]; ++ char* buffer_middle = memory_buffer + (kBufferSize / 2); ++ ++ FOR_UINT64_INPUTS(i, unsigned_test_values) { ++ FOR_INT32_INPUTS2(j1, j2, unsigned_test_offset) { ++ FOR_INT32_INPUTS2(k1, k2, unsigned_test_offset_increment) { ++ double value = static_cast(*i); ++ int32_t in_offset = *j1 + *k1; ++ int32_t out_offset = *j2 + *k2; ++ ++ auto fn = [](MacroAssembler* masm, int32_t in_offset, ++ int32_t out_offset) { ++ __ Fld_d(f0, MemOperand(a0, in_offset)); ++ __ Fst_d(f0, MemOperand(a0, out_offset)); ++ }; ++ CHECK_EQ(true, run_Unaligned(buffer_middle, in_offset, ++ out_offset, value, fn)); ++ } ++ } ++ } ++} ++ ++static const std::vector sltu_test_values() { ++ // clang-format off ++ static const uint64_t kValues[] = { ++ 0, ++ 1, ++ 0x7FE, ++ 0x7FF, ++ 0x800, ++ 0x801, ++ 0xFFE, ++ 0xFFF, ++ 0xFFFFFFFFFFFFF7FE, ++ 0xFFFFFFFFFFFFF7FF, ++ 0xFFFFFFFFFFFFF800, ++ 0xFFFFFFFFFFFFF801, ++ 0xFFFFFFFFFFFFFFFE, ++ 0xFFFFFFFFFFFFFFFF, ++ }; ++ // clang-format on ++ return std::vector(&kValues[0], &kValues[arraysize(kValues)]); ++} ++ ++template ++bool run_Sltu(uint64_t rj, uint64_t rk, Func GenerateSltuInstructionFunc) { ++ using F_CVT = int64_t(uint64_t x0, uint64_t x1, int x2, int x3, int x4); ++ ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assm(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assm; ++ ++ GenerateSltuInstructionFunc(masm, rk); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ assm.GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ ++ auto f = GeneratedCode::FromCode(*code); ++ int64_t res = reinterpret_cast(f.Call(rj, rk, 0, 0, 0)); ++ return res == 1; ++} ++ ++TEST(Sltu) { ++ CcTest::InitializeVM(); ++ ++ FOR_UINT64_INPUTS(i, sltu_test_values) { ++ FOR_UINT64_INPUTS(j, sltu_test_values) { ++ uint64_t rj = *i; ++ uint64_t rk = *j; ++ ++ auto fn_1 = [](MacroAssembler* masm, uint64_t imm) { ++ __ Sltu(a2, a0, Operand(imm)); ++ }; ++ CHECK_EQ(rj < rk, run_Sltu(rj, rk, fn_1)); ++ ++ auto fn_2 = [](MacroAssembler* masm, uint64_t imm) { ++ __ Sltu(a2, a0, a1); ++ }; ++ CHECK_EQ(rj < rk, run_Sltu(rj, rk, fn_2)); ++ } ++ } ++} ++ ++template ++static GeneratedCode GenerateMacroFloat32MinMax(MacroAssembler* masm) { ++ T a = T::from_code(8); // f8 ++ T b = T::from_code(9); // f9 ++ T c = T::from_code(10); // f10 ++ ++ Label ool_min_abc, ool_min_aab, ool_min_aba; ++ Label ool_max_abc, ool_max_aab, ool_max_aba; ++ ++ Label done_min_abc, done_min_aab, done_min_aba; ++ Label done_max_abc, done_max_aab, done_max_aba; ++ ++#define FLOAT_MIN_MAX(fminmax, res, x, y, done, ool, res_field) \ ++ __ Fld_s(x, MemOperand(a0, offsetof(Inputs, src1_))); \ ++ __ Fld_s(y, MemOperand(a0, offsetof(Inputs, src2_))); \ ++ __ fminmax(res, x, y, &ool); \ ++ __ bind(&done); \ ++ __ Fst_s(a, MemOperand(a1, offsetof(Results, res_field))) ++ ++ // a = min(b, c); ++ FLOAT_MIN_MAX(Float32Min, a, b, c, done_min_abc, ool_min_abc, min_abc_); ++ // a = min(a, b); ++ FLOAT_MIN_MAX(Float32Min, a, a, b, done_min_aab, ool_min_aab, min_aab_); ++ // a = min(b, a); ++ FLOAT_MIN_MAX(Float32Min, a, b, a, done_min_aba, ool_min_aba, min_aba_); ++ ++ // a = max(b, c); ++ FLOAT_MIN_MAX(Float32Max, a, b, c, done_max_abc, ool_max_abc, max_abc_); ++ // a = max(a, b); ++ FLOAT_MIN_MAX(Float32Max, a, a, b, done_max_aab, ool_max_aab, max_aab_); ++ // a = max(b, a); ++ FLOAT_MIN_MAX(Float32Max, a, b, a, done_max_aba, ool_max_aba, max_aba_); ++ ++#undef FLOAT_MIN_MAX ++ ++ __ jirl(zero_reg, ra, 0); ++ ++ // Generate out-of-line cases. ++ __ bind(&ool_min_abc); ++ __ Float32MinOutOfLine(a, b, c); ++ __ Branch(&done_min_abc); ++ ++ __ bind(&ool_min_aab); ++ __ Float32MinOutOfLine(a, a, b); ++ __ Branch(&done_min_aab); ++ ++ __ bind(&ool_min_aba); ++ __ Float32MinOutOfLine(a, b, a); ++ __ Branch(&done_min_aba); ++ ++ __ bind(&ool_max_abc); ++ __ Float32MaxOutOfLine(a, b, c); ++ __ Branch(&done_max_abc); ++ ++ __ bind(&ool_max_aab); ++ __ Float32MaxOutOfLine(a, a, b); ++ __ Branch(&done_max_aab); ++ ++ __ bind(&ool_max_aba); ++ __ Float32MaxOutOfLine(a, b, a); ++ __ Branch(&done_max_aba); ++ ++ CodeDesc desc; ++ masm->GetCode(masm->isolate(), &desc); ++ Handle code = ++ Factory::CodeBuilder(masm->isolate(), desc, Code::STUB).Build(); ++#ifdef DEBUG ++ StdoutStream os; ++ code->Print(os); ++#endif ++ return GeneratedCode::FromCode(*code); ++} ++ ++TEST(macro_float_minmax_f32) { ++ // Test the Float32Min and Float32Max macros. ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ struct Inputs { ++ float src1_; ++ float src2_; ++ }; ++ ++ struct Results { ++ // Check all register aliasing possibilities in order to exercise all ++ // code-paths in the macro assembler. ++ float min_abc_; ++ float min_aab_; ++ float min_aba_; ++ float max_abc_; ++ float max_aab_; ++ float max_aba_; ++ }; ++ ++ GeneratedCode f = ++ GenerateMacroFloat32MinMax(masm); ++ ++#define CHECK_MINMAX(src1, src2, min, max) \ ++ do { \ ++ Inputs inputs = {src1, src2}; \ ++ Results results; \ ++ f.Call(&inputs, &results, 0, 0, 0); \ ++ CHECK_EQ(bit_cast(min), bit_cast(results.min_abc_)); \ ++ CHECK_EQ(bit_cast(min), bit_cast(results.min_aab_)); \ ++ CHECK_EQ(bit_cast(min), bit_cast(results.min_aba_)); \ ++ CHECK_EQ(bit_cast(max), bit_cast(results.max_abc_)); \ ++ CHECK_EQ(bit_cast(max), bit_cast(results.max_aab_)); \ ++ CHECK_EQ(bit_cast(max), bit_cast(results.max_aba_)); \ ++ /* Use a bit_cast to correctly identify -0.0 and NaNs. */ \ ++ } while (0) ++ ++ float nan_a = std::numeric_limits::quiet_NaN(); ++ float nan_b = std::numeric_limits::quiet_NaN(); ++ ++ CHECK_MINMAX(1.0f, -1.0f, -1.0f, 1.0f); ++ CHECK_MINMAX(-1.0f, 1.0f, -1.0f, 1.0f); ++ CHECK_MINMAX(0.0f, -1.0f, -1.0f, 0.0f); ++ CHECK_MINMAX(-1.0f, 0.0f, -1.0f, 0.0f); ++ CHECK_MINMAX(-0.0f, -1.0f, -1.0f, -0.0f); ++ CHECK_MINMAX(-1.0f, -0.0f, -1.0f, -0.0f); ++ CHECK_MINMAX(0.0f, 1.0f, 0.0f, 1.0f); ++ CHECK_MINMAX(1.0f, 0.0f, 0.0f, 1.0f); ++ ++ CHECK_MINMAX(0.0f, 0.0f, 0.0f, 0.0f); ++ CHECK_MINMAX(-0.0f, -0.0f, -0.0f, -0.0f); ++ CHECK_MINMAX(-0.0f, 0.0f, -0.0f, 0.0f); ++ CHECK_MINMAX(0.0f, -0.0f, -0.0f, 0.0f); ++ ++ CHECK_MINMAX(0.0f, nan_a, nan_a, nan_a); ++ CHECK_MINMAX(nan_a, 0.0f, nan_a, nan_a); ++ CHECK_MINMAX(nan_a, nan_b, nan_a, nan_a); ++ CHECK_MINMAX(nan_b, nan_a, nan_b, nan_b); ++ ++#undef CHECK_MINMAX ++} ++ ++template ++static GeneratedCode GenerateMacroFloat64MinMax(MacroAssembler* masm) { ++ T a = T::from_code(8); // f8 ++ T b = T::from_code(9); // f9 ++ T c = T::from_code(10); // f10 ++ ++ Label ool_min_abc, ool_min_aab, ool_min_aba; ++ Label ool_max_abc, ool_max_aab, ool_max_aba; ++ ++ Label done_min_abc, done_min_aab, done_min_aba; ++ Label done_max_abc, done_max_aab, done_max_aba; ++ ++#define FLOAT_MIN_MAX(fminmax, res, x, y, done, ool, res_field) \ ++ __ Fld_d(x, MemOperand(a0, offsetof(Inputs, src1_))); \ ++ __ Fld_d(y, MemOperand(a0, offsetof(Inputs, src2_))); \ ++ __ fminmax(res, x, y, &ool); \ ++ __ bind(&done); \ ++ __ Fst_d(a, MemOperand(a1, offsetof(Results, res_field))) ++ ++ // a = min(b, c); ++ FLOAT_MIN_MAX(Float64Min, a, b, c, done_min_abc, ool_min_abc, min_abc_); ++ // a = min(a, b); ++ FLOAT_MIN_MAX(Float64Min, a, a, b, done_min_aab, ool_min_aab, min_aab_); ++ // a = min(b, a); ++ FLOAT_MIN_MAX(Float64Min, a, b, a, done_min_aba, ool_min_aba, min_aba_); ++ ++ // a = max(b, c); ++ FLOAT_MIN_MAX(Float64Max, a, b, c, done_max_abc, ool_max_abc, max_abc_); ++ // a = max(a, b); ++ FLOAT_MIN_MAX(Float64Max, a, a, b, done_max_aab, ool_max_aab, max_aab_); ++ // a = max(b, a); ++ FLOAT_MIN_MAX(Float64Max, a, b, a, done_max_aba, ool_max_aba, max_aba_); ++ ++#undef FLOAT_MIN_MAX ++ ++ __ jirl(zero_reg, ra, 0); ++ ++ // Generate out-of-line cases. ++ __ bind(&ool_min_abc); ++ __ Float64MinOutOfLine(a, b, c); ++ __ Branch(&done_min_abc); ++ ++ __ bind(&ool_min_aab); ++ __ Float64MinOutOfLine(a, a, b); ++ __ Branch(&done_min_aab); ++ ++ __ bind(&ool_min_aba); ++ __ Float64MinOutOfLine(a, b, a); ++ __ Branch(&done_min_aba); ++ ++ __ bind(&ool_max_abc); ++ __ Float64MaxOutOfLine(a, b, c); ++ __ Branch(&done_max_abc); ++ ++ __ bind(&ool_max_aab); ++ __ Float64MaxOutOfLine(a, a, b); ++ __ Branch(&done_max_aab); ++ ++ __ bind(&ool_max_aba); ++ __ Float64MaxOutOfLine(a, b, a); ++ __ Branch(&done_max_aba); ++ ++ CodeDesc desc; ++ masm->GetCode(masm->isolate(), &desc); ++ Handle code = ++ Factory::CodeBuilder(masm->isolate(), desc, Code::STUB).Build(); ++#ifdef DEBUG ++ StdoutStream os; ++ code->Print(os); ++#endif ++ return GeneratedCode::FromCode(*code); ++} ++ ++TEST(macro_float_minmax_f64) { ++ // Test the Float64Min and Float64Max macros. ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ struct Inputs { ++ double src1_; ++ double src2_; ++ }; ++ ++ struct Results { ++ // Check all register aliasing possibilities in order to exercise all ++ // code-paths in the macro assembler. ++ double min_abc_; ++ double min_aab_; ++ double min_aba_; ++ double max_abc_; ++ double max_aab_; ++ double max_aba_; ++ }; ++ ++ GeneratedCode f = ++ GenerateMacroFloat64MinMax(masm); ++ ++#define CHECK_MINMAX(src1, src2, min, max) \ ++ do { \ ++ Inputs inputs = {src1, src2}; \ ++ Results results; \ ++ f.Call(&inputs, &results, 0, 0, 0); \ ++ CHECK_EQ(bit_cast(min), bit_cast(results.min_abc_)); \ ++ CHECK_EQ(bit_cast(min), bit_cast(results.min_aab_)); \ ++ CHECK_EQ(bit_cast(min), bit_cast(results.min_aba_)); \ ++ CHECK_EQ(bit_cast(max), bit_cast(results.max_abc_)); \ ++ CHECK_EQ(bit_cast(max), bit_cast(results.max_aab_)); \ ++ CHECK_EQ(bit_cast(max), bit_cast(results.max_aba_)); \ ++ /* Use a bit_cast to correctly identify -0.0 and NaNs. */ \ ++ } while (0) ++ ++ double nan_a = std::numeric_limits::quiet_NaN(); ++ double nan_b = std::numeric_limits::quiet_NaN(); ++ ++ CHECK_MINMAX(1.0, -1.0, -1.0, 1.0); ++ CHECK_MINMAX(-1.0, 1.0, -1.0, 1.0); ++ CHECK_MINMAX(0.0, -1.0, -1.0, 0.0); ++ CHECK_MINMAX(-1.0, 0.0, -1.0, 0.0); ++ CHECK_MINMAX(-0.0, -1.0, -1.0, -0.0); ++ CHECK_MINMAX(-1.0, -0.0, -1.0, -0.0); ++ CHECK_MINMAX(0.0, 1.0, 0.0, 1.0); ++ CHECK_MINMAX(1.0, 0.0, 0.0, 1.0); ++ ++ CHECK_MINMAX(0.0, 0.0, 0.0, 0.0); ++ CHECK_MINMAX(-0.0, -0.0, -0.0, -0.0); ++ CHECK_MINMAX(-0.0, 0.0, -0.0, 0.0); ++ CHECK_MINMAX(0.0, -0.0, -0.0, 0.0); ++ ++ CHECK_MINMAX(0.0, nan_a, nan_a, nan_a); ++ CHECK_MINMAX(nan_a, 0.0, nan_a, nan_a); ++ CHECK_MINMAX(nan_a, nan_b, nan_a, nan_a); ++ CHECK_MINMAX(nan_b, nan_a, nan_b, nan_b); ++ ++#undef CHECK_MINMAX ++} ++ ++uint64_t run_Sub_w(uint64_t imm, int32_t num_instr) { ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ Label code_start; ++ __ bind(&code_start); ++ __ Sub_w(a2, zero_reg, Operand(imm)); ++ CHECK_EQ(masm->InstructionsGeneratedSince(&code_start), num_instr); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++#ifdef OBJECT_PRINT ++ code->Print(std::cout); ++#endif ++ auto f = GeneratedCode::FromCode(*code); ++ ++ uint64_t res = reinterpret_cast(f.Call(0, 0, 0, 0, 0)); ++ ++ return res; ++} ++ ++TEST(SUB_W) { ++ CcTest::InitializeVM(); ++ ++ // Test Subu macro-instruction for min_int12 and max_int12 border cases. ++ // For subtracting int16 immediate values we use addiu. ++ ++ struct TestCaseSub { ++ uint64_t imm; ++ uint64_t expected_res; ++ int32_t num_instr; ++ }; ++ ++ // We call Sub_w(v0, zero_reg, imm) to test cases listed below. ++ // 0 - imm = expected_res ++ // clang-format off ++ struct TestCaseSub tc[] = { ++ // imm, expected_res, num_instr ++ {0xFFFFFFFFFFFFF800, 0x800, 2}, // min_int12 ++ // The test case above generates ori + add_w instruction sequence. ++ // We can't have just addi_ because -min_int12 > max_int12 so use ++ // register. We can load min_int12 to at register with addi_w and then ++ // subtract at with sub_w, but now we use ori + add_w because -min_int12 can ++ // be loaded using ori. ++ {0x800, 0xFFFFFFFFFFFFF800, 1}, // max_int12 + 1 ++ // Generates addi_w ++ // max_int12 + 1 is not int12 but -(max_int12 + 1) is, just use addi_w. ++ {0xFFFFFFFFFFFFF7FF, 0x801, 2}, // min_int12 - 1 ++ // Generates ori + add_w ++ // To load this value to at we need two instructions and another one to ++ // subtract, lu12i + ori + sub_w. But we can load -value to at using just ++ // ori and then add at register with add_w. ++ {0x801, 0xFFFFFFFFFFFFF7FF, 2}, // max_int12 + 2 ++ // Generates ori + sub_w ++ // Not int12 but is uint12, load value to at with ori and subtract with ++ // sub_w. ++ {0x00010000, 0xFFFFFFFFFFFF0000, 2}, ++ // Generates lu12i_w + sub_w ++ // Load value using lui to at and subtract with subu. ++ {0x00010001, 0xFFFFFFFFFFFEFFFF, 3}, ++ // Generates lu12i + ori + sub_w ++ // We have to generate three instructions in this case. ++ {0x7FFFFFFF, 0xFFFFFFFF80000001, 3}, // max_int32 ++ // Generates lu12i_w + ori + sub_w ++ {0xFFFFFFFF80000000, 0xFFFFFFFF80000000, 2}, // min_int32 ++ // The test case above generates lu12i + sub_w intruction sequence. ++ // The result of 0 - min_int32 eqauls max_int32 + 1, which wraps around to ++ // min_int32 again. ++ }; ++ // clang-format on ++ ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseSub); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ CHECK_EQ(tc[i].expected_res, run_Sub_w(tc[i].imm, tc[i].num_instr)); ++ } ++} ++ ++uint64_t run_Sub_d(uint64_t imm, int32_t num_instr) { ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ Label code_start; ++ __ bind(&code_start); ++ __ Sub_d(a2, zero_reg, Operand(imm)); ++ CHECK_EQ(masm->InstructionsGeneratedSince(&code_start), num_instr); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++#ifdef OBJECT_PRINT ++ code->Print(std::cout); ++#endif ++ auto f = GeneratedCode::FromCode(*code); ++ ++ uint64_t res = reinterpret_cast(f.Call(0, 0, 0, 0, 0)); ++ ++ return res; ++} ++ ++TEST(SUB_D) { ++ CcTest::InitializeVM(); ++ ++ // Test Sub_d macro-instruction for min_int12 and max_int12 border cases. ++ // For subtracting int12 immediate values we use addi_d. ++ ++ struct TestCaseSub { ++ uint64_t imm; ++ uint64_t expected_res; ++ int32_t num_instr; ++ }; ++ // We call Sub(v0, zero_reg, imm) to test cases listed below. ++ // 0 - imm = expected_res ++ // clang-format off ++ struct TestCaseSub tc[] = { ++ // imm, expected_res, num_instr ++ {0xFFFFFFFFFFFFF800, 0x800, 2}, // min_int12 ++ // The test case above generates addi_d instruction. ++ // This is int12 value and we can load it using just addi_d. ++ { 0x800, 0xFFFFFFFFFFFFF800, 1}, // max_int12 + 1 ++ // Generates addi_d ++ // max_int12 + 1 is not int12 but is uint12, just use ori. ++ {0xFFFFFFFFFFFFF7FF, 0x801, 2}, // min_int12 - 1 ++ // Generates ori + add_d ++ { 0x801, 0xFFFFFFFFFFFFF7FF, 2}, // max_int12 + 2 ++ // Generates ori + add_d ++ { 0x00001000, 0xFFFFFFFFFFFFF000, 2}, // max_uint12 + 1 ++ // Generates lu12i_w + sub_d ++ { 0x00001001, 0xFFFFFFFFFFFFEFFF, 3}, // max_uint12 + 2 ++ // Generates lu12i_w + ori + sub_d ++ {0x00000000FFFFFFFF, 0xFFFFFFFF00000001, 3}, // max_uint32 ++ // Generates addi_w + li32i_d + sub_d ++ {0x00000000FFFFFFFE, 0xFFFFFFFF00000002, 3}, // max_uint32 - 1 ++ // Generates addi_w + li32i_d + sub_d ++ {0xFFFFFFFF80000000, 0x80000000, 2}, // min_int32 ++ // Generates lu12i_w + sub_d ++ {0x0000000080000000, 0xFFFFFFFF80000000, 2}, // max_int32 + 1 ++ // Generates lu12i_w + add_d ++ {0xFFFF0000FFFF8765, 0x0000FFFF0000789B, 4}, ++ // Generates lu12i_w + ori + lu32i_d + sub ++ {0x1234ABCD87654321, 0xEDCB5432789ABCDF, 5}, ++ // Generates lu12i_w + ori + lu32i_d + lu52i_d + sub ++ {0xFFFF789100000000, 0x876F00000000, 3}, ++ // Generates xor + lu32i_d + sub ++ {0xF12F789100000000, 0xED0876F00000000, 4}, ++ // Generates xor + lu32i_d + lu52i_d + sub ++ {0xF120000000000800, 0xEDFFFFFFFFFF800, 3}, ++ // Generates ori + lu52i_d + sub ++ {0xFFF0000000000000, 0x10000000000000, 2} ++ // Generates lu52i_d + sub ++ }; ++ // clang-format on ++ ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseSub); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ CHECK_EQ(tc[i].expected_res, run_Sub_d(tc[i].imm, tc[i].num_instr)); ++ } ++} ++ ++TEST(Move) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ struct T { ++ float a; ++ float b; ++ float result_a; ++ float result_b; ++ double c; ++ double d; ++ double e; ++ double result_c; ++ double result_d; ++ double result_e; ++ }; ++ T t; ++ __ li(a4, static_cast(0x80000000)); ++ __ St_w(a4, MemOperand(a0, offsetof(T, a))); ++ __ li(a5, static_cast(0x12345678)); ++ __ St_w(a5, MemOperand(a0, offsetof(T, b))); ++ __ li(a6, static_cast(0x8877665544332211)); ++ __ St_d(a6, MemOperand(a0, offsetof(T, c))); ++ __ li(a7, static_cast(0x1122334455667788)); ++ __ St_d(a7, MemOperand(a0, offsetof(T, d))); ++ __ li(t0, static_cast(0)); ++ __ St_d(t0, MemOperand(a0, offsetof(T, e))); ++ ++ __ Move(f8, static_cast(0x80000000)); ++ __ Move(f9, static_cast(0x12345678)); ++ __ Move(f10, static_cast(0x8877665544332211)); ++ __ Move(f11, static_cast(0x1122334455667788)); ++ __ Move(f12, static_cast(0)); ++ __ Fst_s(f8, MemOperand(a0, offsetof(T, result_a))); ++ __ Fst_s(f9, MemOperand(a0, offsetof(T, result_b))); ++ __ Fst_d(f10, MemOperand(a0, offsetof(T, result_c))); ++ __ Fst_d(f11, MemOperand(a0, offsetof(T, result_d))); ++ __ Fst_d(f12, MemOperand(a0, offsetof(T, result_e))); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ f.Call(&t, 0, 0, 0, 0); ++ CHECK_EQ(t.a, t.result_a); ++ CHECK_EQ(t.b, t.result_b); ++ CHECK_EQ(t.c, t.result_c); ++ CHECK_EQ(t.d, t.result_d); ++ CHECK_EQ(t.e, t.result_e); ++} ++ ++TEST(Movz_Movn) { ++ const int kTableLength = 4; ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ struct Test { ++ int64_t rt; ++ int64_t a; ++ int64_t b; ++ int64_t bold; ++ int64_t b1; ++ int64_t bold1; ++ int32_t c; ++ int32_t d; ++ int32_t dold; ++ int32_t d1; ++ int32_t dold1; ++ }; ++ ++ Test test; ++ // clang-format off ++ int64_t inputs_D[kTableLength] = { ++ 7, 8, -9, -10 ++ }; ++ int32_t inputs_W[kTableLength] = { ++ 3, 4, -5, -6 ++ }; ++ ++ int32_t outputs_W[kTableLength] = { ++ 3, 4, -5, -6 ++ }; ++ int64_t outputs_D[kTableLength] = { ++ 7, 8, -9, -10 ++ }; ++ // clang-format on ++ ++ __ Ld_d(a4, MemOperand(a0, offsetof(Test, a))); ++ __ Ld_w(a5, MemOperand(a0, offsetof(Test, c))); ++ __ Ld_d(a6, MemOperand(a0, offsetof(Test, rt))); ++ __ li(t0, 1); ++ __ li(t1, 1); ++ __ li(t2, 1); ++ __ li(t3, 1); ++ __ St_d(t0, MemOperand(a0, offsetof(Test, bold))); ++ __ St_d(t1, MemOperand(a0, offsetof(Test, bold1))); ++ __ St_w(t2, MemOperand(a0, offsetof(Test, dold))); ++ __ St_w(t3, MemOperand(a0, offsetof(Test, dold1))); ++ __ Movz(t0, a4, a6); ++ __ Movn(t1, a4, a6); ++ __ Movz(t2, a5, a6); ++ __ Movn(t3, a5, a6); ++ __ St_d(t0, MemOperand(a0, offsetof(Test, b))); ++ __ St_d(t1, MemOperand(a0, offsetof(Test, b1))); ++ __ St_w(t2, MemOperand(a0, offsetof(Test, d))); ++ __ St_w(t3, MemOperand(a0, offsetof(Test, d1))); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ for (int i = 0; i < kTableLength; i++) { ++ test.a = inputs_D[i]; ++ test.c = inputs_W[i]; ++ ++ test.rt = 1; ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.b, test.bold); ++ CHECK_EQ(test.d, test.dold); ++ CHECK_EQ(test.b1, outputs_D[i]); ++ CHECK_EQ(test.d1, outputs_W[i]); ++ ++ test.rt = 0; ++ f.Call(&test, 0, 0, 0, 0); ++ CHECK_EQ(test.b, outputs_D[i]); ++ CHECK_EQ(test.d, outputs_W[i]); ++ CHECK_EQ(test.b1, test.bold1); ++ CHECK_EQ(test.d1, test.dold1); ++ } ++} ++ ++TEST(macro_instructions1) { ++ // Test 32bit calculate instructions macros. ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ Label exit, error; ++ ++ __ li(a4, 0x00000004); ++ __ li(a5, 0x00001234); ++ __ li(a6, 0x12345678); ++ __ li(a7, 0x7FFFFFFF); ++ __ li(t0, static_cast(0xFFFFFFFC)); ++ __ li(t1, static_cast(0xFFFFEDCC)); ++ __ li(t2, static_cast(0xEDCBA988)); ++ __ li(t3, static_cast(0x80000000)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ add_w(a2, a7, t1); ++ __ Add_w(a3, t1, a7); ++ __ Branch(&error, ne, a2, Operand(a3)); ++ __ Add_w(t4, t1, static_cast(0x7FFFFFFF)); ++ __ Branch(&error, ne, a2, Operand(t4)); ++ __ addi_w(a2, a6, 0x800); ++ __ Add_w(a3, a6, 0xFFFFF800); ++ __ Branch(&error, ne, a2, Operand(a3)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ mul_w(a2, t1, a7); ++ __ Mul_w(a3, t1, a7); ++ __ Branch(&error, ne, a2, Operand(a3)); ++ __ Mul_w(t4, t1, static_cast(0x7FFFFFFF)); ++ __ Branch(&error, ne, a2, Operand(t4)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ mulh_w(a2, t1, a7); ++ __ Mulh_w(a3, t1, a7); ++ __ Branch(&error, ne, a2, Operand(a3)); ++ __ Mulh_w(t4, t1, static_cast(0x7FFFFFFF)); ++ __ Branch(&error, ne, a2, Operand(t4)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Mulh_wu(a2, a4, static_cast(0xFFFFEDCC)); ++ __ Branch(&error, ne, a2, Operand(0x3)); ++ __ Mulh_wu(a3, a4, t1); ++ __ Branch(&error, ne, a3, Operand(0x3)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ div_w(a2, a7, t2); ++ __ Div_w(a3, a7, t2); ++ __ Branch(&error, ne, a2, Operand(a3)); ++ __ Div_w(t4, a7, static_cast(0xEDCBA988)); ++ __ Branch(&error, ne, a2, Operand(t4)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Div_wu(a2, a7, a5); ++ __ Branch(&error, ne, a2, Operand(0x70821)); ++ __ Div_wu(a3, t0, static_cast(0x00001234)); ++ __ Branch(&error, ne, a3, Operand(0xE1042)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Mod_w(a2, a6, a5); ++ __ Branch(&error, ne, a2, Operand(0xDA8)); ++ __ Mod_w(a3, t2, static_cast(0x00001234)); ++ __ Branch(&error, ne, a3, Operand(0xFFFFFFFFFFFFF258)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Mod_wu(a2, a6, a5); ++ __ Branch(&error, ne, a2, Operand(0xDA8)); ++ __ Mod_wu(a3, t2, static_cast(0x00001234)); ++ __ Branch(&error, ne, a3, Operand(0xF0)); ++ ++ __ li(a2, 0x31415926); ++ __ b(&exit); ++ ++ __ bind(&error); ++ __ li(a2, 0x666); ++ ++ __ bind(&exit); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ int64_t res = reinterpret_cast(f.Call(0, 0, 0, 0, 0)); ++ ++ CHECK_EQ(0x31415926L, res); ++} ++ ++TEST(macro_instructions2) { ++ // Test 64bit calculate instructions macros. ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ Label exit, error; ++ ++ __ li(a4, 0x17312); ++ __ li(a5, 0x1012131415161718); ++ __ li(a6, 0x51F4B764A26E7412); ++ __ li(a7, 0x7FFFFFFFFFFFFFFF); ++ __ li(t0, static_cast(0xFFFFFFFFFFFFF547)); ++ __ li(t1, static_cast(0xDF6B8F35A10E205C)); ++ __ li(t2, static_cast(0x81F25A87C4236841)); ++ __ li(t3, static_cast(0x8000000000000000)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ add_d(a2, a7, t1); ++ __ Add_d(a3, t1, a7); ++ __ Branch(&error, ne, a2, Operand(a3)); ++ __ Add_d(t4, t1, Operand(0x7FFFFFFFFFFFFFFF)); ++ __ Branch(&error, ne, a2, Operand(t4)); ++ __ addi_d(a2, a6, 0x800); ++ __ Add_d(a3, a6, Operand(0xFFFFFFFFFFFFF800)); ++ __ Branch(&error, ne, a2, Operand(a3)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Mul_d(a2, a5, a6); ++ __ Branch(&error, ne, a2, Operand(0xdbe6a8729a547fb0)); ++ __ Mul_d(a3, t0, Operand(0xDF6B8F35A10E205C)); ++ __ Branch(&error, ne, a3, Operand(0x57ad69f40f870584)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Mulh_d(a2, a5, a6); ++ __ Branch(&error, ne, a2, Operand(0x52514c6c6b54467)); ++ __ Mulh_d(a3, t0, Operand(0xDF6B8F35A10E205C)); ++ __ Branch(&error, ne, a3, Operand(0x15d)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Div_d(a2, t0, t1); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ __ Div_d(a3, t1, Operand(0x17312)); ++ __ Branch(&error, ne, a3, Operand(0xffffe985f631e6d9)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Div_du(a2, t0, t1); ++ __ Branch(&error, ne, a2, Operand(0x1)); ++ __ Div_du(a3, t1, 0x17312); ++ __ Branch(&error, ne, a3, Operand(0x9a22ffd3973d)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Mod_d(a2, a6, a4); ++ __ Branch(&error, ne, a2, Operand(0x13558)); ++ __ Mod_d(a3, t2, Operand(0xFFFFFFFFFFFFF547)); ++ __ Branch(&error, ne, a3, Operand(0xfffffffffffffb0a)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Mod_du(a2, a6, a4); ++ __ Branch(&error, ne, a2, Operand(0x13558)); ++ __ Mod_du(a3, t2, Operand(0xFFFFFFFFFFFFF547)); ++ __ Branch(&error, ne, a3, Operand(0x81f25a87c4236841)); ++ ++ __ li(a2, 0x31415926); ++ __ b(&exit); ++ ++ __ bind(&error); ++ __ li(a2, 0x666); ++ ++ __ bind(&exit); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ int64_t res = reinterpret_cast(f.Call(0, 0, 0, 0, 0)); ++ ++ CHECK_EQ(0x31415926L, res); ++} ++ ++TEST(macro_instructions3) { ++ // Test 64bit calculate instructions macros. ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ Label exit, error; ++ ++ __ li(a4, 0x17312); ++ __ li(a5, 0x1012131415161718); ++ __ li(a6, 0x51F4B764A26E7412); ++ __ li(a7, 0x7FFFFFFFFFFFFFFF); ++ __ li(t0, static_cast(0xFFFFFFFFFFFFF547)); ++ __ li(t1, static_cast(0xDF6B8F35A10E205C)); ++ __ li(t2, static_cast(0x81F25A87C4236841)); ++ __ li(t3, static_cast(0x8000000000000000)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ And(a2, a4, a5); ++ __ Branch(&error, ne, a2, Operand(0x1310)); ++ __ And(a3, a6, Operand(0x7FFFFFFFFFFFFFFF)); ++ __ Branch(&error, ne, a3, Operand(0x51F4B764A26E7412)); ++ __ andi(a2, a6, 0xDCB); ++ __ And(a3, a6, Operand(0xDCB)); ++ __ Branch(&error, ne, a3, Operand(a2)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Or(a2, t0, t1); ++ __ Branch(&error, ne, a2, Operand(0xfffffffffffff55f)); ++ __ Or(a3, t2, Operand(0x8000000000000000)); ++ __ Branch(&error, ne, a3, Operand(0x81f25a87c4236841)); ++ __ ori(a2, a5, 0xDCB); ++ __ Or(a3, a5, Operand(0xDCB)); ++ __ Branch(&error, ne, a2, Operand(a3)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Orn(a2, t0, t1); ++ __ Branch(&error, ne, a2, Operand(0xffffffffffffffe7)); ++ __ Orn(a3, t2, Operand(0x81F25A87C4236841)); ++ __ Branch(&error, ne, a3, Operand(0xffffffffffffffff)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Xor(a2, t0, t1); ++ __ Branch(&error, ne, a2, Operand(0x209470ca5ef1d51b)); ++ __ Xor(a3, t2, Operand(0x8000000000000000)); ++ __ Branch(&error, ne, a3, Operand(0x1f25a87c4236841)); ++ __ Xor(a2, t2, Operand(0xDCB)); ++ __ Branch(&error, ne, a2, Operand(0x81f25a87c423658a)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Nor(a2, a4, a5); ++ __ Branch(&error, ne, a2, Operand(0xefedecebeae888e5)); ++ __ Nor(a3, a6, Operand(0x7FFFFFFFFFFFFFFF)); ++ __ Branch(&error, ne, a3, Operand(0x8000000000000000)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Andn(a2, a4, a5); ++ __ Branch(&error, ne, a2, Operand(0x16002)); ++ __ Andn(a3, a6, Operand(0x7FFFFFFFFFFFFFFF)); ++ __ Branch(&error, ne, a3, Operand(static_cast(0))); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Orn(a2, t0, t1); ++ __ Branch(&error, ne, a2, Operand(0xffffffffffffffe7)); ++ __ Orn(a3, t2, Operand(0x8000000000000000)); ++ __ Branch(&error, ne, a3, Operand(0xffffffffffffffff)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Neg(a2, a7); ++ __ Branch(&error, ne, a2, Operand(0x8000000000000001)); ++ __ Neg(a3, t0); ++ __ Branch(&error, ne, a3, Operand(0xAB9)); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Slt(a2, a5, a6); ++ __ Branch(&error, ne, a2, Operand(0x1)); ++ __ Slt(a3, a7, Operand(0xFFFFFFFFFFFFF547)); ++ __ Branch(&error, ne, a3, Operand(static_cast(0))); ++ __ Slt(a3, a4, 0x800); ++ __ Branch(&error, ne, a3, Operand(static_cast(0))); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Sle(a2, a5, a6); ++ __ Branch(&error, ne, a2, Operand(0x1)); ++ __ Sle(a3, t0, Operand(0xFFFFFFFFFFFFF547)); ++ __ Branch(&error, ne, a3, Operand(static_cast(0x1))); ++ __ Sle(a2, a7, t0); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Sleu(a2, a5, a6); ++ __ Branch(&error, ne, a2, Operand(0x1)); ++ __ Sleu(a3, t0, Operand(0xFFFFFFFFFFFFF547)); ++ __ Branch(&error, ne, a3, Operand(static_cast(0x1))); ++ __ Sleu(a2, a7, t0); ++ __ Branch(&error, ne, a2, Operand(static_cast(0x1))); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Sge(a2, a5, a6); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ __ Sge(a3, t0, Operand(0xFFFFFFFFFFFFF547)); ++ __ Branch(&error, ne, a3, Operand(static_cast(0x1))); ++ __ Sge(a2, a7, t0); ++ __ Branch(&error, ne, a2, Operand(static_cast(0x1))); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Sgeu(a2, a5, a6); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ __ Sgeu(a3, t0, Operand(0xFFFFFFFFFFFFF547)); ++ __ Branch(&error, ne, a3, Operand(static_cast(0x1))); ++ __ Sgeu(a2, a7, t0); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Sgt(a2, a5, a6); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ __ Sgt(a3, t0, Operand(0xFFFFFFFFFFFFF547)); ++ __ Branch(&error, ne, a3, Operand(static_cast(0))); ++ __ Sgt(a2, a7, t0); ++ __ Branch(&error, ne, a2, Operand(static_cast(0x1))); ++ ++ __ or_(a2, zero_reg, zero_reg); ++ __ or_(a3, zero_reg, zero_reg); ++ __ Sgtu(a2, a5, a6); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ __ Sgtu(a3, t0, Operand(0xFFFFFFFFFFFFF547)); ++ __ Branch(&error, ne, a3, Operand(static_cast(0))); ++ __ Sgtu(a2, a7, t0); ++ __ Branch(&error, ne, a2, Operand(static_cast(0))); ++ ++ __ li(a2, 0x31415926); ++ __ b(&exit); ++ ++ __ bind(&error); ++ __ li(a2, 0x666); ++ ++ __ bind(&exit); ++ __ or_(a0, a2, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ int64_t res = reinterpret_cast(f.Call(0, 0, 0, 0, 0)); ++ ++ CHECK_EQ(0x31415926L, res); ++} ++ ++TEST(Rotr_w) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ struct T { ++ int32_t input; ++ int32_t result_rotr_0; ++ int32_t result_rotr_4; ++ int32_t result_rotr_8; ++ int32_t result_rotr_12; ++ int32_t result_rotr_16; ++ int32_t result_rotr_20; ++ int32_t result_rotr_24; ++ int32_t result_rotr_28; ++ int32_t result_rotr_32; ++ int32_t result_rotri_0; ++ int32_t result_rotri_4; ++ int32_t result_rotri_8; ++ int32_t result_rotri_12; ++ int32_t result_rotri_16; ++ int32_t result_rotri_20; ++ int32_t result_rotri_24; ++ int32_t result_rotri_28; ++ int32_t result_rotri_32; ++ }; ++ T t; ++ ++ __ Ld_w(a4, MemOperand(a0, offsetof(T, input))); ++ ++ __ Rotr_w(a5, a4, 0); ++ __ Rotr_w(a6, a4, 0x04); ++ __ Rotr_w(a7, a4, 0x08); ++ __ Rotr_w(t0, a4, 0x0C); ++ __ Rotr_w(t1, a4, 0x10); ++ __ Rotr_w(t2, a4, -0x0C); ++ __ Rotr_w(t3, a4, -0x08); ++ __ Rotr_w(t4, a4, -0x04); ++ __ Rotr_w(t5, a4, 0x20); ++ __ St_w(a5, MemOperand(a0, offsetof(T, result_rotr_0))); ++ __ St_w(a6, MemOperand(a0, offsetof(T, result_rotr_4))); ++ __ St_w(a7, MemOperand(a0, offsetof(T, result_rotr_8))); ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_rotr_12))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_rotr_16))); ++ __ St_w(t2, MemOperand(a0, offsetof(T, result_rotr_20))); ++ __ St_w(t3, MemOperand(a0, offsetof(T, result_rotr_24))); ++ __ St_w(t4, MemOperand(a0, offsetof(T, result_rotr_28))); ++ __ St_w(t5, MemOperand(a0, offsetof(T, result_rotr_32))); ++ ++ __ li(t5, 0); ++ __ Rotr_w(a5, a4, t5); ++ __ li(t5, 0x04); ++ __ Rotr_w(a6, a4, t5); ++ __ li(t5, 0x08); ++ __ Rotr_w(a7, a4, t5); ++ __ li(t5, 0x0C); ++ __ Rotr_w(t0, a4, t5); ++ __ li(t5, 0x10); ++ __ Rotr_w(t1, a4, t5); ++ __ li(t5, -0x0C); ++ __ Rotr_w(t2, a4, t5); ++ __ li(t5, -0x08); ++ __ Rotr_w(t3, a4, t5); ++ __ li(t5, -0x04); ++ __ Rotr_w(t4, a4, t5); ++ __ li(t5, 0x20); ++ __ Rotr_w(t5, a4, t5); ++ ++ __ St_w(a5, MemOperand(a0, offsetof(T, result_rotri_0))); ++ __ St_w(a6, MemOperand(a0, offsetof(T, result_rotri_4))); ++ __ St_w(a7, MemOperand(a0, offsetof(T, result_rotri_8))); ++ __ St_w(t0, MemOperand(a0, offsetof(T, result_rotri_12))); ++ __ St_w(t1, MemOperand(a0, offsetof(T, result_rotri_16))); ++ __ St_w(t2, MemOperand(a0, offsetof(T, result_rotri_20))); ++ __ St_w(t3, MemOperand(a0, offsetof(T, result_rotri_24))); ++ __ St_w(t4, MemOperand(a0, offsetof(T, result_rotri_28))); ++ __ St_w(t5, MemOperand(a0, offsetof(T, result_rotri_32))); ++ ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ t.input = 0x12345678; ++ f.Call(&t, 0, 0, 0, 0); ++ ++ CHECK_EQ(static_cast(0x12345678), t.result_rotr_0); ++ CHECK_EQ(static_cast(0x81234567), t.result_rotr_4); ++ CHECK_EQ(static_cast(0x78123456), t.result_rotr_8); ++ CHECK_EQ(static_cast(0x67812345), t.result_rotr_12); ++ CHECK_EQ(static_cast(0x56781234), t.result_rotr_16); ++ CHECK_EQ(static_cast(0x45678123), t.result_rotr_20); ++ CHECK_EQ(static_cast(0x34567812), t.result_rotr_24); ++ CHECK_EQ(static_cast(0x23456781), t.result_rotr_28); ++ CHECK_EQ(static_cast(0x12345678), t.result_rotr_32); ++ ++ CHECK_EQ(static_cast(0x12345678), t.result_rotri_0); ++ CHECK_EQ(static_cast(0x81234567), t.result_rotri_4); ++ CHECK_EQ(static_cast(0x78123456), t.result_rotri_8); ++ CHECK_EQ(static_cast(0x67812345), t.result_rotri_12); ++ CHECK_EQ(static_cast(0x56781234), t.result_rotri_16); ++ CHECK_EQ(static_cast(0x45678123), t.result_rotri_20); ++ CHECK_EQ(static_cast(0x34567812), t.result_rotri_24); ++ CHECK_EQ(static_cast(0x23456781), t.result_rotri_28); ++ CHECK_EQ(static_cast(0x12345678), t.result_rotri_32); ++} ++ ++TEST(Rotr_d) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ struct T { ++ int64_t input; ++ int64_t result_rotr_0; ++ int64_t result_rotr_8; ++ int64_t result_rotr_16; ++ int64_t result_rotr_24; ++ int64_t result_rotr_32; ++ int64_t result_rotr_40; ++ int64_t result_rotr_48; ++ int64_t result_rotr_56; ++ int64_t result_rotr_64; ++ int64_t result_rotri_0; ++ int64_t result_rotri_8; ++ int64_t result_rotri_16; ++ int64_t result_rotri_24; ++ int64_t result_rotri_32; ++ int64_t result_rotri_40; ++ int64_t result_rotri_48; ++ int64_t result_rotri_56; ++ int64_t result_rotri_64; ++ }; ++ T t; ++ ++ __ Ld_d(a4, MemOperand(a0, offsetof(T, input))); ++ ++ __ Rotr_d(a5, a4, 0); ++ __ Rotr_d(a6, a4, 0x08); ++ __ Rotr_d(a7, a4, 0x10); ++ __ Rotr_d(t0, a4, 0x18); ++ __ Rotr_d(t1, a4, 0x20); ++ __ Rotr_d(t2, a4, -0x18); ++ __ Rotr_d(t3, a4, -0x10); ++ __ Rotr_d(t4, a4, -0x08); ++ __ Rotr_d(t5, a4, 0x40); ++ __ St_d(a5, MemOperand(a0, offsetof(T, result_rotr_0))); ++ __ St_d(a6, MemOperand(a0, offsetof(T, result_rotr_8))); ++ __ St_d(a7, MemOperand(a0, offsetof(T, result_rotr_16))); ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_rotr_24))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_rotr_32))); ++ __ St_d(t2, MemOperand(a0, offsetof(T, result_rotr_40))); ++ __ St_d(t3, MemOperand(a0, offsetof(T, result_rotr_48))); ++ __ St_d(t4, MemOperand(a0, offsetof(T, result_rotr_56))); ++ __ St_d(t5, MemOperand(a0, offsetof(T, result_rotr_64))); ++ ++ __ li(t5, 0); ++ __ Rotr_d(a5, a4, t5); ++ __ li(t5, 0x08); ++ __ Rotr_d(a6, a4, t5); ++ __ li(t5, 0x10); ++ __ Rotr_d(a7, a4, t5); ++ __ li(t5, 0x18); ++ __ Rotr_d(t0, a4, t5); ++ __ li(t5, 0x20); ++ __ Rotr_d(t1, a4, t5); ++ __ li(t5, -0x18); ++ __ Rotr_d(t2, a4, t5); ++ __ li(t5, -0x10); ++ __ Rotr_d(t3, a4, t5); ++ __ li(t5, -0x08); ++ __ Rotr_d(t4, a4, t5); ++ __ li(t5, 0x40); ++ __ Rotr_d(t5, a4, t5); ++ ++ __ St_d(a5, MemOperand(a0, offsetof(T, result_rotri_0))); ++ __ St_d(a6, MemOperand(a0, offsetof(T, result_rotri_8))); ++ __ St_d(a7, MemOperand(a0, offsetof(T, result_rotri_16))); ++ __ St_d(t0, MemOperand(a0, offsetof(T, result_rotri_24))); ++ __ St_d(t1, MemOperand(a0, offsetof(T, result_rotri_32))); ++ __ St_d(t2, MemOperand(a0, offsetof(T, result_rotri_40))); ++ __ St_d(t3, MemOperand(a0, offsetof(T, result_rotri_48))); ++ __ St_d(t4, MemOperand(a0, offsetof(T, result_rotri_56))); ++ __ St_d(t5, MemOperand(a0, offsetof(T, result_rotri_64))); ++ ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ t.input = 0x0123456789ABCDEF; ++ f.Call(&t, 0, 0, 0, 0); ++ ++ CHECK_EQ(static_cast(0x0123456789ABCDEF), t.result_rotr_0); ++ CHECK_EQ(static_cast(0xEF0123456789ABCD), t.result_rotr_8); ++ CHECK_EQ(static_cast(0xCDEF0123456789AB), t.result_rotr_16); ++ CHECK_EQ(static_cast(0xABCDEF0123456789), t.result_rotr_24); ++ CHECK_EQ(static_cast(0x89ABCDEF01234567), t.result_rotr_32); ++ CHECK_EQ(static_cast(0x6789ABCDEF012345), t.result_rotr_40); ++ CHECK_EQ(static_cast(0x456789ABCDEF0123), t.result_rotr_48); ++ CHECK_EQ(static_cast(0x23456789ABCDEF01), t.result_rotr_56); ++ CHECK_EQ(static_cast(0x0123456789ABCDEF), t.result_rotr_64); ++ ++ CHECK_EQ(static_cast(0x0123456789ABCDEF), t.result_rotri_0); ++ CHECK_EQ(static_cast(0xEF0123456789ABCD), t.result_rotri_8); ++ CHECK_EQ(static_cast(0xCDEF0123456789AB), t.result_rotri_16); ++ CHECK_EQ(static_cast(0xABCDEF0123456789), t.result_rotri_24); ++ CHECK_EQ(static_cast(0x89ABCDEF01234567), t.result_rotri_32); ++ CHECK_EQ(static_cast(0x6789ABCDEF012345), t.result_rotri_40); ++ CHECK_EQ(static_cast(0x456789ABCDEF0123), t.result_rotri_48); ++ CHECK_EQ(static_cast(0x23456789ABCDEF01), t.result_rotri_56); ++ CHECK_EQ(static_cast(0x0123456789ABCDEF), t.result_rotri_64); ++} ++ ++TEST(macro_instructions4) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ struct T { ++ double a; ++ float b; ++ double result_floor_a; ++ float result_floor_b; ++ double result_ceil_a; ++ float result_ceil_b; ++ double result_trunc_a; ++ float result_trunc_b; ++ double result_round_a; ++ float result_round_b; ++ }; ++ T t; ++ ++ const int kTableLength = 16; ++ ++ // clang-format off ++ double inputs_d[kTableLength] = { ++ 2.1, 2.6, 2.5, 3.1, 3.6, 3.5, ++ -2.1, -2.6, -2.5, -3.1, -3.6, -3.5, ++ 1.7976931348623157E+308, 6.27463370218383111104242366943E-307, ++ std::numeric_limits::max() - 0.1, ++ std::numeric_limits::infinity() ++ }; ++ float inputs_s[kTableLength] = { ++ 2.1, 2.6, 2.5, 3.1, 3.6, 3.5, ++ -2.1, -2.6, -2.5, -3.1, -3.6, -3.5, ++ 1.7976931348623157E+38, 6.27463370218383111104242366943E-37, ++ std::numeric_limits::lowest() + 0.6, ++ std::numeric_limits::infinity() ++ }; ++ float outputs_round_s[kTableLength] = { ++ 2.0, 3.0, 2.0, 3.0, 4.0, 4.0, ++ -2.0, -3.0, -2.0, -3.0, -4.0, -4.0, ++ 1.7976931348623157E+38, 0, ++ std::numeric_limits::lowest() + 1, ++ std::numeric_limits::infinity() ++ }; ++ double outputs_round_d[kTableLength] = { ++ 2.0, 3.0, 2.0, 3.0, 4.0, 4.0, ++ -2.0, -3.0, -2.0, -3.0, -4.0, -4.0, ++ 1.7976931348623157E+308, 0, ++ std::numeric_limits::max(), ++ std::numeric_limits::infinity() ++ }; ++ float outputs_trunc_s[kTableLength] = { ++ 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, ++ -2.0, -2.0, -2.0, -3.0, -3.0, -3.0, ++ 1.7976931348623157E+38, 0, ++ std::numeric_limits::lowest() + 1, ++ std::numeric_limits::infinity() ++ }; ++ double outputs_trunc_d[kTableLength] = { ++ 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, ++ -2.0, -2.0, -2.0, -3.0, -3.0, -3.0, ++ 1.7976931348623157E+308, 0, ++ std::numeric_limits::max() - 1, ++ std::numeric_limits::infinity() ++ }; ++ float outputs_ceil_s[kTableLength] = { ++ 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, ++ -2.0, -2.0, -2.0, -3.0, -3.0, -3.0, ++ 1.7976931348623157E38, 1, ++ std::numeric_limits::lowest() + 1, ++ std::numeric_limits::infinity() ++ }; ++ double outputs_ceil_d[kTableLength] = { ++ 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, ++ -2.0, -2.0, -2.0, -3.0, -3.0, -3.0, ++ 1.7976931348623157E308, 1, ++ std::numeric_limits::max(), ++ std::numeric_limits::infinity() ++ }; ++ float outputs_floor_s[kTableLength] = { ++ 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, ++ -3.0, -3.0, -3.0, -4.0, -4.0, -4.0, ++ 1.7976931348623157E38, 0, ++ std::numeric_limits::lowest() + 1, ++ std::numeric_limits::infinity() ++ }; ++ double outputs_floor_d[kTableLength] = { ++ 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, ++ -3.0, -3.0, -3.0, -4.0, -4.0, -4.0, ++ 1.7976931348623157E308, 0, ++ std::numeric_limits::max(), ++ std::numeric_limits::infinity() ++ }; ++ // clang-format on ++ ++ __ Fld_d(f8, MemOperand(a0, offsetof(T, a))); ++ __ Fld_s(f9, MemOperand(a0, offsetof(T, b))); ++ __ Floor_d(f10, f8); ++ __ Floor_s(f11, f9); ++ __ Fst_d(f10, MemOperand(a0, offsetof(T, result_floor_a))); ++ __ Fst_s(f11, MemOperand(a0, offsetof(T, result_floor_b))); ++ __ Ceil_d(f10, f8); ++ __ Ceil_s(f11, f9); ++ __ Fst_d(f10, MemOperand(a0, offsetof(T, result_ceil_a))); ++ __ Fst_s(f11, MemOperand(a0, offsetof(T, result_ceil_b))); ++ __ Trunc_d(f10, f8); ++ __ Trunc_s(f11, f9); ++ __ Fst_d(f10, MemOperand(a0, offsetof(T, result_trunc_a))); ++ __ Fst_s(f11, MemOperand(a0, offsetof(T, result_trunc_b))); ++ __ Round_d(f10, f8); ++ __ Round_s(f11, f9); ++ __ Fst_d(f10, MemOperand(a0, offsetof(T, result_round_a))); ++ __ Fst_s(f11, MemOperand(a0, offsetof(T, result_round_b))); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ for (int i = 0; i < kTableLength; i++) { ++ t.a = inputs_d[i]; ++ t.b = inputs_s[i]; ++ f.Call(&t, 0, 0, 0, 0); ++ CHECK_EQ(t.result_floor_a, outputs_floor_d[i]); ++ CHECK_EQ(t.result_floor_b, outputs_floor_s[i]); ++ CHECK_EQ(t.result_ceil_a, outputs_ceil_d[i]); ++ CHECK_EQ(t.result_ceil_b, outputs_ceil_s[i]); ++ CHECK_EQ(t.result_trunc_a, outputs_trunc_d[i]); ++ CHECK_EQ(t.result_trunc_b, outputs_trunc_s[i]); ++ CHECK_EQ(t.result_round_a, outputs_round_d[i]); ++ CHECK_EQ(t.result_round_b, outputs_round_s[i]); ++ } ++} ++ ++uint64_t run_ExtractBits(uint64_t source, int pos, int size, bool sign_extend) { ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ if (sign_extend) { ++ __ ExtractBits(t0, a0, a1, size, true); ++ } else { ++ __ ExtractBits(t0, a0, a1, size); ++ } ++ __ or_(a0, t0, zero_reg); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ uint64_t res = reinterpret_cast(f.Call(source, pos, 0, 0, 0)); ++ return res; ++} ++ ++TEST(ExtractBits) { ++ CcTest::InitializeVM(); ++ ++ struct TestCase { ++ uint64_t source; ++ int pos; ++ int size; ++ bool sign_extend; ++ uint64_t res; ++ }; ++ ++ // clang-format off ++ struct TestCase tc[] = { ++ //source, pos, size, sign_extend, res; ++ {0x800, 4, 8, false, 0x80}, ++ {0x800, 4, 8, true, 0xFFFFFFFFFFFFFF80}, ++ {0x800, 5, 8, true, 0x40}, ++ {0x40000, 3, 16, false, 0x8000}, ++ {0x40000, 3, 16, true, 0xFFFFFFFFFFFF8000}, ++ {0x40000, 4, 16, true, 0x4000}, ++ {0x200000000, 2, 32, false, 0x80000000}, ++ {0x200000000, 2, 32, true, 0xFFFFFFFF80000000}, ++ {0x200000000, 3, 32, true, 0x40000000}, ++ }; ++ // clang-format on ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCase); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ uint64_t result = ++ run_ExtractBits(tc[i].source, tc[i].pos, tc[i].size, tc[i].sign_extend); ++ CHECK_EQ(tc[i].res, result); ++ } ++} ++ ++uint64_t run_InsertBits(uint64_t dest, uint64_t source, int pos, int size) { ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ __ InsertBits(a0, a1, a2, size); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ uint64_t res = reinterpret_cast(f.Call(dest, source, pos, 0, 0)); ++ return res; ++} ++ ++TEST(InsertBits) { ++ CcTest::InitializeVM(); ++ ++ struct TestCase { ++ uint64_t dest; ++ uint64_t source; ++ int pos; ++ int size; ++ uint64_t res; ++ }; ++ ++ // clang-format off ++ struct TestCase tc[] = { ++ //dest source, pos, size, res; ++ {0x11111111, 0x1234, 32, 16, 0x123411111111}, ++ {0x111111111111, 0xFFFFF, 24, 10, 0x1113FF111111}, ++ {0x1111111111111111, 0xFEDCBA, 16, 4, 0x11111111111A1111}, ++ }; ++ // clang-format on ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCase); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ uint64_t result = ++ run_InsertBits(tc[i].dest, tc[i].source, tc[i].pos, tc[i].size); ++ CHECK_EQ(tc[i].res, result); ++ } ++} ++ ++TEST(Popcnt) { ++ CcTest::InitializeVM(); ++ Isolate* isolate = CcTest::i_isolate(); ++ HandleScope scope(isolate); ++ MacroAssembler assembler(isolate, v8::internal::CodeObjectRequired::kYes); ++ MacroAssembler* masm = &assembler; ++ ++ struct TestCase { ++ uint32_t a; ++ uint64_t b; ++ int expected_a; ++ int expected_b; ++ int result_a; ++ int result_b; ++ }; ++ // clang-format off ++ struct TestCase tc[] = { ++ { 0x12345678, 0x1122334455667788, 13, 26, 0, 0}, ++ { 0x1234, 0x123456, 5, 9, 0, 0}, ++ { 0xFFF00000, 0xFFFF000000000000, 12, 16, 0, 0}, ++ { 0xFF000012, 0xFFFF000000001234, 10, 21, 0, 0} ++ }; ++ // clang-format on ++ ++ __ Ld_w(t0, MemOperand(a0, offsetof(TestCase, a))); ++ __ Ld_d(t1, MemOperand(a0, offsetof(TestCase, b))); ++ __ Popcnt_w(t2, t0); ++ __ Popcnt_d(t3, t1); ++ __ St_w(t2, MemOperand(a0, offsetof(TestCase, result_a))); ++ __ St_w(t3, MemOperand(a0, offsetof(TestCase, result_b))); ++ __ jirl(zero_reg, ra, 0); ++ ++ CodeDesc desc; ++ masm->GetCode(isolate, &desc); ++ Handle code = Factory::CodeBuilder(isolate, desc, Code::STUB).Build(); ++ auto f = GeneratedCode::FromCode(*code); ++ ++ size_t nr_test_cases = sizeof(tc) / sizeof(TestCase); ++ for (size_t i = 0; i < nr_test_cases; ++i) { ++ f.Call(&tc[i], 0, 0, 0, 0); ++ CHECK_EQ(tc[i].expected_a, tc[i].result_a); ++ CHECK_EQ(tc[i].expected_b, tc[i].result_b); ++ } ++} ++ ++#undef __ ++ ++} // namespace internal ++} // namespace v8 +diff --git a/src/3rdparty/chromium/v8/tools/dev/gm.py b/src/3rdparty/chromium/v8/tools/dev/gm.py +index 9d5cbf056a2..0363af7148e 100755 +--- a/src/3rdparty/chromium/v8/tools/dev/gm.py ++++ b/src/3rdparty/chromium/v8/tools/dev/gm.py +@@ -39,7 +39,7 @@ BUILD_TARGETS_ALL = ["all"] + + # All arches that this script understands. + ARCHES = ["ia32", "x64", "arm", "arm64", "mipsel", "mips64el", "ppc", "ppc64", +- "s390", "s390x", "android_arm", "android_arm64"] ++ "s390", "s390x", "android_arm", "android_arm64", "la64"] + # Arches that get built/run when you don't specify any. + DEFAULT_ARCHES = ["ia32", "x64", "arm", "arm64"] + # Modes that this script understands. +@@ -246,7 +246,7 @@ class Config(object): + if self.arch == "android_arm": return "\nv8_target_cpu = \"arm\"" + if self.arch == "android_arm64": return "\nv8_target_cpu = \"arm64\"" + if self.arch in ("arm", "arm64", "mipsel", "mips64el", "ppc", "ppc64", +- "s390", "s390x"): ++ "s390", "s390x", "la64"): + return "\nv8_target_cpu = \"%s\"" % self.arch + return "" + +diff --git a/src/3rdparty/gn/tools/gn/args.cc b/src/3rdparty/gn/tools/gn/args.cc +index 802c3731d5a..748f1ff3e29 100644 +--- a/src/3rdparty/gn/tools/gn/args.cc ++++ b/src/3rdparty/gn/tools/gn/args.cc +@@ -327,6 +327,7 @@ void Args::SetSystemVarsLocked(Scope* dest) const { + static const char kArm64[] = "arm64"; + static const char kMips[] = "mipsel"; + static const char kMips64[] = "mips64el"; ++ static const char kLa64[] = "la64"; + static const char kS390X[] = "s390x"; + static const char kPPC64[] = "ppc64"; + const char* arch = nullptr; +@@ -346,6 +347,8 @@ void Args::SetSystemVarsLocked(Scope* dest) const { + arch = kMips; + else if (os_arch == "mips64") + arch = kMips64; ++ else if (os_arch == "loongarch64") ++ arch = kLa64; + else if (os_arch == "s390x") + arch = kS390X; + else if (os_arch == "ppc64" || os_arch == "ppc64le") +diff --git a/src/3rdparty/gn/tools/gn/variables.cc b/src/3rdparty/gn/tools/gn/variables.cc +index ff6d45cb619..771d7b04cdb 100644 +--- a/src/3rdparty/gn/tools/gn/variables.cc ++++ b/src/3rdparty/gn/tools/gn/variables.cc +@@ -111,6 +111,7 @@ Possible values + - "arm" + - "arm64" + - "mipsel" ++ - "la64" + )"; + + const char kTargetName[] = "target_name"; +diff --git a/src/3rdparty/gn/util/build_config.h b/src/3rdparty/gn/util/build_config.h +index addd7cfb081..14c0dab426c 100644 +--- a/src/3rdparty/gn/util/build_config.h ++++ b/src/3rdparty/gn/util/build_config.h +@@ -172,6 +172,18 @@ + #define ARCH_CPU_32_BITS 1 + #define ARCH_CPU_BIG_ENDIAN 1 + #endif ++#elif defined(__loongarch__) ++#if defined(__LP64__) ++#define ARCH_CPU_LOONGARCH_FAMILY 1 ++#define ARCH_CPU_LA64 1 ++#define ARCH_CPU_64_BITS 1 ++#define ARCH_CPU_LITTLE_ENDIAN 1 ++#else ++#define ARCH_CPU_LOONGARCH_FAMILY 1 ++#define ARCH_CPU_LA 1 ++#define ARCH_CPU_32_BITS 1 ++#define ARCH_CPU_LITTLE_ENDIAN 1 ++#endif + #else + #error Please add support for your architecture in build_config.h + #endif +diff --git a/src/buildtools/config/linux.pri b/src/buildtools/config/linux.pri +index 56c18bdb5..78381e0d5 100644 +--- a/src/buildtools/config/linux.pri ++++ b/src/buildtools/config/linux.pri +@@ -116,6 +116,11 @@ contains(QT_ARCH, "mips") { + else: contains(QMAKE_CFLAGS, "-mdsp"): gn_args += mips_dsp_rev=1 + } + ++contains(QT_ARCH, "loongarch64") { ++ DEFINES += ARCH_CPU_LA64 ++ gn_args += debug_devtools=false ++} ++ + host_build { + gn_args += custom_toolchain=\"$$QTWEBENGINE_OUT_ROOT/src/toolchain:host\" + GN_HOST_CPU = $$gnArch($$QT_ARCH) diff --git a/qt6-quick3dphysics/PKGBUILD b/qt6-quick3dphysics/PKGBUILD index 8b0113ff74..973ed4f360 100644 --- a/qt6-quick3dphysics/PKGBUILD +++ b/qt6-quick3dphysics/PKGBUILD @@ -19,8 +19,14 @@ makedepends=(cmake qt6-shadertools) groups=(qt6) _pkgfn=${pkgname/6-/}-everywhere-src-$_qtver -source=(https://download.qt.io/official_releases/qt/${pkgver%.*}/$_qtver/submodules/$_pkgfn.tar.xz) -sha256sums=('2cc6b5f58d7b1de6de34279657ad2c73a0e82e29c7a56a12f2c00fb62725e15a') +source=(https://download.qt.io/official_releases/qt/${pkgver%.*}/$_qtver/submodules/$_pkgfn.tar.xz + qt3d-la64.patch) +sha256sums=('2cc6b5f58d7b1de6de34279657ad2c73a0e82e29c7a56a12f2c00fb62725e15a' + '3d0f784887aebda0498ad2778ed757069b334db8b8edf1b1e56a82e616972e55') + +prepare() { + patch -d $_pkgfn -p1 -i "$srcdir/qt3d-la64.patch" +} build() { cmake -B build -S $_pkgfn -G Ninja \ diff --git a/qt6-quick3dphysics/qt3d-la64.patch b/qt6-quick3dphysics/qt3d-la64.patch new file mode 100644 index 0000000000..6afbdc534f --- /dev/null +++ b/qt6-quick3dphysics/qt3d-la64.patch @@ -0,0 +1,31 @@ +Index: qtquick3dphysics-everywhere-src-6.5.1/src/3rdparty/PhysX/pxshared/include/foundation/PxPreprocessor.h +=================================================================== +--- qtquick3dphysics-everywhere-src-6.5.1.orig/src/3rdparty/PhysX/pxshared/include/foundation/PxPreprocessor.h ++++ qtquick3dphysics-everywhere-src-6.5.1/src/3rdparty/PhysX/pxshared/include/foundation/PxPreprocessor.h +@@ -127,6 +127,8 @@ Architecture defines, see http://sourcef + #define PX_PPC 1 + #elif defined(__mips__) + #define PX_X64 1 ++#elif defined(__loongarch_lp64) ++#define PX_LA64 1 + #else + #error "Unknown architecture" + #endif +@@ -147,7 +149,7 @@ SIMD defines + #endif + + /** Disable SIMD for webassembly, mips and arm64 */ +-#if defined(__EMSCRIPTEN__) || defined(__mips__) || defined(_M_ARM64) || defined(_M_ARM) ++#if defined(__EMSCRIPTEN__) || defined(__mips__) || defined(_M_ARM64) || defined(_M_ARM) || defined(__loongarch__) + #define PX_SIMD_DISABLED 1 + #endif + +@@ -436,7 +438,7 @@ General defines + + // static assert + #if(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))) || (PX_PS4) || (PX_APPLE_FAMILY) || (PX_SWITCH) || (PX_CLANG && PX_ARM) +-#define PX_COMPILE_TIME_ASSERT(exp) typedef char PX_CONCAT(PxCompileTimeAssert_Dummy, __COUNTER__)[(exp) ? 1 : -1] __attribute__((unused)) ++#define PX_COMPILE_TIME_ASSERT(exp) typedef char PX_CONCAT(PxCompileTimeAssert_Dummy, __COUNTER__)[(exp) ? 1 : 0] __attribute__((unused)) + #else + #define PX_COMPILE_TIME_ASSERT(exp) typedef char PxCompileTimeAssert_Dummy[(exp) ? 1 : -1] + #endif diff --git a/qt6-tools/qt6-tools-fix-build.patch b/qt6-tools/qt6-tools-fix-build.patch new file mode 100644 index 0000000000..9facbaef20 --- /dev/null +++ b/qt6-tools/qt6-tools-fix-build.patch @@ -0,0 +1,11 @@ +--- qttools-everywhere-src-6.4.2/src/linguist/lupdate/lupdatepreprocessoraction.h 2023-03-06 23:30:16.999102388 +0800 ++++ qttools-everywhere-src-6.4.2/src/linguist/lupdate/lupdatepreprocessoraction.h 2023-03-06 23:32:56.223626722 +0800 +@@ -59,7 +59,7 @@ + #endif + clang::StringRef /*searchPath*/, clang::StringRef /*relativePath*/, + const clang::Module */*imported*/, +- clang::SrcMgr::CharacteristicKind /*fileType*/) override; ++ clang::SrcMgr::CharacteristicKind /*fileType*/); + + std::string m_inputFile; + clang::Preprocessor &m_preprocessor; diff --git a/qtcreator/qtcreator-la64.patch b/qtcreator/qtcreator-la64.patch new file mode 100644 index 0000000000..f8dff379de --- /dev/null +++ b/qtcreator/qtcreator-la64.patch @@ -0,0 +1,179 @@ +Index: qt-creator-opensource-src-6.0.2/src/plugins/projectexplorer/abi.cpp +=================================================================== +--- qt-creator-opensource-src-6.0.2.orig/src/plugins/projectexplorer/abi.cpp ++++ qt-creator-opensource-src-6.0.2/src/plugins/projectexplorer/abi.cpp +@@ -152,8 +152,10 @@ static Abi::Architecture architectureFro + return Abi::X86Architecture; + if (arch == "ia64") + return Abi::ItaniumArchitecture; + if (arch.startsWith("mips")) + return Abi::MipsArchitecture; ++ if (arch.startsWith("loongarch")) ++ return Abi::LoongArchitecture; + if (arch.startsWith("power")) + return Abi::PowerPCArchitecture; + if (arch.startsWith("sh")) // Not in Qt documentation! +@@ -363,6 +367,9 @@ static Abis abiOf(const QByteArray &data + } + + switch (machine) { ++ case 2: // EM_LOONGARCH64 ++ result.append(Abi(Abi::LoongArchitecture, os, flavor, Abi::ElfFormat, 64)); ++ break; + case 3: // EM_386 + result.append(Abi(Abi::X86Architecture, os, flavor, Abi::ElfFormat, 32)); + break; +@@ -390,6 +397,8 @@ static Abis abiOf(const QByteArray &data + case 50: // EM_IA_64 + result.append(Abi(Abi::ItaniumArchitecture, os, flavor, Abi::ElfFormat, 64)); + break; ++ case 258: // EM_AARCH64 ++ result.append(Abi(Abi::LoongArchitecture, os, flavor, Abi::ElfFormat, 64)); + default: + ; + } +@@ -575,6 +584,9 @@ Abi Abi::abiFromTargetTriplet(const QStr + } else if (p.startsWith("mips")) { + arch = MipsArchitecture; + width = p.contains("64") ? 64 : 32; ++ } else if (p.startsWith("loongarch")) { ++ arch = LoongArchitecture; ++ width = p.contains("64") ? 64 : 32; + } else if (p == "x86_64" || p == "amd64") { + arch = X86Architecture; + width = 64; +@@ -772,6 +784,8 @@ QString Abi::toString(const Architecture + return QLatin1String("mcs251"); + case MipsArchitecture: + return QLatin1String("mips"); ++ case LoongArchitecture: ++ return QLatin1String("loongarch"); + case PowerPCArchitecture: + return QLatin1String("ppc"); + case ItaniumArchitecture: +@@ -944,6 +958,8 @@ Abi::Architecture Abi::architectureFromS + return Mcs251Architecture; + if (a == "mips") + return MipsArchitecture; ++ if (a == "loongarch") ++ return LoongArchitecture; + if (a == "ppc") + return PowerPCArchitecture; + if (a == "itanium") +@@ -1509,6 +1525,14 @@ void ProjectExplorer::ProjectExplorerPlu + << int(Abi::LinuxOS) << int(Abi::GenericFlavor) + << int(Abi::ElfFormat) << 64; + ++ QTest::newRow("loongarch-linux-gnu") << int(Abi::LoongArchitecture) ++ << int(Abi::LinuxOS) << int(Abi::GenericFlavor) ++ << int(Abi::ElfFormat) << 32; ++ ++ QTest::newRow("loongarch64-linux-gnu") << int(Abi::LoongArchitecture) ++ << int(Abi::LinuxOS) << int(Abi::GenericFlavor) ++ << int(Abi::ElfFormat) << 64; ++ + QTest::newRow("arm-wrs-vxworks") << int(Abi::ArmArchitecture) + << int(Abi::VxWorks) << int(Abi::VxWorksFlavor) + << int(Abi::ElfFormat) << 32; +Index: qt-creator-opensource-src-6.0.2/src/plugins/projectexplorer/abi.h +=================================================================== +--- qt-creator-opensource-src-6.0.2.orig/src/plugins/projectexplorer/abi.h ++++ qt-creator-opensource-src-6.0.2/src/plugins/projectexplorer/abi.h +@@ -76,6 +76,7 @@ public: + R32CArchitecture, + CR16Architecture, + RiscVArchitecture, ++ LoongArchitecture, + UnknownArchitecture + }; + +Index: qt-creator-opensource-src-6.0.2/src/plugins/projectexplorer/gcctoolchain.cpp +=================================================================== +--- qt-creator-opensource-src-6.0.2.orig/src/plugins/projectexplorer/gcctoolchain.cpp ++++ qt-creator-opensource-src-6.0.2/src/plugins/projectexplorer/gcctoolchain.cpp +@@ -2042,6 +2042,14 @@ void ProjectExplorerPlugin::testGccAbiGu + << QString::fromLatin1("mips64el-linux-uclibc") + << QByteArray("#define __SIZEOF_SIZE_T__ 8") + << QStringList({"mips-linux-generic-elf-64bit"}); ++ QTest::newRow("Linux 12 (loongarch)") ++ << QString::fromLatin1("loongarch32-linux-gnu") ++ << QByteArray("#define __SIZEOF_SIZE_T__ 4") ++ << QStringList({"loongarch-linux-generic-elf-32bit"}); ++ QTest::newRow("Linux 13 (64bit loongarch)") ++ << QString::fromLatin1("loongarch64-linux-gnu") ++ << QByteArray("#define __SIZEOF_SIZE_T__ 8") ++ << QStringList({"loongarch64-linux-generic-elf-64bit"}); + + QTest::newRow("Mingw 1 (32bit)") + << QString::fromLatin1("i686-w64-mingw32") +Index: qt-creator-opensource-src-6.0.2/src/shared/qbs/share/qbs/imports/qbs/ModUtils/utils.js +=================================================================== +--- qt-creator-opensource-src-6.0.2.orig/src/shared/qbs/share/qbs/imports/qbs/ModUtils/utils.js ++++ qt-creator-opensource-src-6.0.2/src/shared/qbs/share/qbs/imports/qbs/ModUtils/utils.js +@@ -550,6 +550,10 @@ function guessArchitecture(m) { + architecture = "mips"; + if (hasAnyOf(m, ["_MIPS_ARCH_MIPS64", "__mips64"])) + architecture += "64"; ++ } else if (hasAnyOf(m, ["__loongarch", "__loongarch__"])) { ++ architecture = "loongarch"; ++ if (hasAnyOf(m, ["__LP64__", "__loongarch64"])) ++ architecture += "64"; + } else if (hasAnyOf(m, ["__ppc__", "__ppc", "__powerpc__", + "_ARCH_COM", "_ARCH_PWR", "_ARCH_PPC", "_M_MPPC", "_M_PPC"])) { + architecture = "ppc"; +Index: qt-creator-opensource-src-6.0.2/src/shared/qbs/share/qbs/imports/qbs/Probes/LibraryProbe.qbs +=================================================================== +--- qt-creator-opensource-src-6.0.2.orig/src/shared/qbs/share/qbs/imports/qbs/Probes/LibraryProbe.qbs ++++ qt-creator-opensource-src-6.0.2/src/shared/qbs/share/qbs/imports/qbs/Probes/LibraryProbe.qbs +@@ -51,6 +51,10 @@ PathProbe { + result = ["/usr/lib/mipsel-linux-gnu"] + else if (qbs.architecture === "mips64") + result = ["/usr/lib/mips64el-linux-gnuabi64"] ++ else if (qbs.architecture === "loongarch") ++ result = ["/usr/lib/loongarch-linux-gnu"] ++ else if (qbs.architecture === "loongarch64") ++ result = ["/usr/lib/loongarch64-linux-gnu"] + else if (qbs.architecture === "ppc") + result = ["/usr/lib/powerpc-linux-gnu"] + else if (qbs.architecture === "ppc64") +Index: qt-creator-opensource-src-6.0.2/src/shared/qbs/src/lib/corelib/tools/architectures.cpp +=================================================================== +--- qt-creator-opensource-src-6.0.2.orig/src/shared/qbs/src/lib/corelib/tools/architectures.cpp ++++ qt-creator-opensource-src-6.0.2/src/shared/qbs/src/lib/corelib/tools/architectures.cpp +@@ -82,6 +82,10 @@ QString canonicalTargetArchitecture(cons + return StringConstants::i386Arch(); + } + ++ if (arch == StringConstants::loongArch() || arch == StringConstants::loongArch64()) { ++ return arch; ++ } ++ + if (arch == StringConstants::mipsArch() || arch == StringConstants::mips64Arch()) { + if (endianness == QStringLiteral("big")) + return arch + QStringLiteral("eb"); +@@ -145,6 +149,12 @@ QString canonicalArchitecture(const QStr + << QStringLiteral("mips64eb") + << QStringLiteral("mips64el")); + ++ archMap.insert(StringConstants::loongArch(), QStringList() ++ << QStringLiteral("loongarch")); ++ ++ archMap.insert(StringConstants::loongArch64(), QStringList() ++ << QStringLiteral("loongarch64")); ++ + QMapIterator i(archMap); + while (i.hasNext()) { + i.next(); +Index: qt-creator-opensource-src-6.0.2/src/shared/qbs/src/lib/corelib/tools/stringconstants.h +=================================================================== +--- qt-creator-opensource-src-6.0.2.orig/src/shared/qbs/src/lib/corelib/tools/stringconstants.h ++++ qt-creator-opensource-src-6.0.2/src/shared/qbs/src/lib/corelib/tools/stringconstants.h +@@ -233,6 +233,8 @@ public: + QBS_STRING_CONSTANT(i586Arch, "i586") + QBS_STRING_CONSTANT(mipsArch, "mips") + QBS_STRING_CONSTANT(mips64Arch, "mips64") ++ QBS_STRING_CONSTANT(loongArch, "loongarch") ++ QBS_STRING_CONSTANT(loongArch64, "loongarch64") + QBS_STRING_CONSTANT(powerPcArch, "powerpc") + QBS_STRING_CONSTANT(ppcArch, "ppc") + QBS_STRING_CONSTANT(ppc64Arch, "ppc64") diff --git a/quazip/PKGBUILD b/quazip/PKGBUILD index 4a4e63c82c..8979595e8a 100644 --- a/quazip/PKGBUILD +++ b/quazip/PKGBUILD @@ -7,7 +7,7 @@ pkgbase=quazip pkgname=(quazip-qt5 quazip-qt6) pkgver=1.4 -pkgrel=1 +pkgrel=2 pkgdesc='C++ wrapper for the ZIP/UNZIP C package' url='https://stachenov.github.io/quazip/' license=(LGPL) @@ -18,7 +18,8 @@ sha256sums=('79633fd3a18e2d11a7d5c40c4c79c1786ba0c74b59ad752e8429746fe1781dd6') build() { cmake -B build5 -S $pkgbase-$pkgver \ - -DCMAKE_INSTALL_PREFIX=/usr + -DCMAKE_INSTALL_PREFIX=/usr \ + -DQUAZIP_QT_MAJOR_VERSION=5 cmake --build build5 cmake -B build6 -S $pkgbase-$pkgver \ diff --git a/rathole/PKGBUILD b/rathole/PKGBUILD index 50912f83f3..3ccf4a7556 100644 --- a/rathole/PKGBUILD +++ b/rathole/PKGBUILD @@ -15,7 +15,7 @@ options=('!lto') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/rbw/PKGBUILD b/rbw/PKGBUILD index aba528264f..d4f34b457a 100644 --- a/rbw/PKGBUILD +++ b/rbw/PKGBUILD @@ -17,7 +17,7 @@ b2sums=('2b4cd61193fe79e9a095ab4534fcb2982c5a611f54789a97f6fd8aea133a93575fb7977 prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/reapack/PKGBUILD b/reapack/PKGBUILD index c379afc3fb..c102d38b5d 100644 --- a/reapack/PKGBUILD +++ b/reapack/PKGBUILD @@ -83,7 +83,7 @@ package() { cd "$pkgname" # plugin - install -vDm755 -t "$pkgdir/usr/lib/$pkgname" "build/reaper_reapack-$CARCH.so" + install -vDm755 -t "$pkgdir/usr/lib/$pkgname" "build/reaper_reapack-`uname -m`.so" # documentation install -vDm644 -t "$pkgdir/usr/share/doc/$pkgname" README.md diff --git a/rebuilderd/PKGBUILD b/rebuilderd/PKGBUILD index e0e2a7f6fc..91b3e2dcf2 100644 --- a/rebuilderd/PKGBUILD +++ b/rebuilderd/PKGBUILD @@ -27,7 +27,7 @@ validpgpkeys=("64B13F7117D6E07D661BBCE0FE763A64F5E54FD6") prepare() { cd ${pkgbase}-${pkgver} - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/refind/PKGBUILD b/refind/PKGBUILD index 5de3cc369d..f59f07e06a 100644 --- a/refind/PKGBUILD +++ b/refind/PKGBUILD @@ -13,13 +13,17 @@ makedepends=( efibootmgr gnu-efi ) -source=(https://sourceforge.net/projects/refind/files/$pkgver/$pkgname-src-$pkgver.tar.gz) -sha512sums=('41c120c1afec37c508aa5c0ec09a6563c3047ef84932308c91701795b950431dfad17d25cf664039b490a302d475add98441b75f90ff71cadce41febedc68a9e') -b2sums=('02019ddb872ce44d2a2119902edebd633f925d49634e3bcc6bfb2c9dedb8ce213166909395a333d3a37e95c67720e31b1f5fcf25083801c17d645372aa54a06a') +source=(https://sourceforge.net/projects/refind/files/$pkgver/$pkgname-src-$pkgver.tar.gz + refind-la64-0.14.0.patch) +sha512sums=('41c120c1afec37c508aa5c0ec09a6563c3047ef84932308c91701795b950431dfad17d25cf664039b490a302d475add98441b75f90ff71cadce41febedc68a9e' + '413d4db728d0942036a8afbd7e0b68473e6175ab13834f79586a2597043803cb505d70081bfbd00f2f74f96ee54c85baecc2efab8c4a80b999193eaa9cfaffdd') +b2sums=('02019ddb872ce44d2a2119902edebd633f925d49634e3bcc6bfb2c9dedb8ce213166909395a333d3a37e95c67720e31b1f5fcf25083801c17d645372aa54a06a' + 'd54284e8da0292bddb79edb230ff36058053b6c5a3452c1b0d4b12a05d535169c83ff1dd9b31569de2d9e97a64eecff9c9f441369bbaccfc219f16aba383679f') _arch='x64' prepare() { cd $pkgbase-$pkgver + patch -p1 -i $srcdir/refind-la64-0.14.0.patch # remove the path prefix from the css reference, so that the css can live # in the same directory sed -e 's|../Styles/||g' -i docs/$pkgbase/*.html @@ -63,11 +67,17 @@ package_refind() { ) cd $pkgbase-$pkgver + if [ "$CARCH" == "loong64" ]; then + _arch='loongarch64' + else + _arch='x64' + fi # NOTE: the install target calls refind-install, therefore we install things # manually # efi binaries install -vDm 644 refind/*.efi -t "$pkgdir/usr/share/$pkgname/" install -vDm 644 drivers_*/*.efi -t "$pkgdir/usr/share/refind/drivers_$_arch/" + rm -f "$pkgdir/usr/share/refind/drivers_loongarch64/ext4_loongarch64.efi" install -vDm 644 gptsync/*.efi -t "$pkgdir/usr/share/$pkgname/tools_$_arch/" # sample config install -vDm 644 $pkgname.conf-sample -t "$pkgdir/usr/share/$pkgname/" diff --git a/refind/refind-la64-0.14.0.patch b/refind/refind-la64-0.14.0.patch new file mode 100644 index 0000000000..053c4b4a2d --- /dev/null +++ b/refind/refind-la64-0.14.0.patch @@ -0,0 +1,729 @@ +From 726f40b468f8cb3136d100d94620eb78f0bb27d9 Mon Sep 17 00:00:00 2001 +From: Xiaotian Wu +Date: Sat, 17 Apr 2021 22:09:40 +0800 +Subject: [PATCH] add loongarch64 support + +--- + Make.common | 21 +++++++++++++++++++++ + Makefile | 3 +++ + RefindPkg.dsc | 5 ++++- + filesystems/Make.gnuefi | 5 +++++ + filesystems/btrfs.inf | 9 +++++++++ + filesystems/ext2.inf | 9 +++++++++ + filesystems/ext4.inf | 9 +++++++++ + filesystems/hfs.inf | 9 +++++++++ + filesystems/iso9660.inf | 9 +++++++++ + filesystems/ntfs.inf | 9 +++++++++ + filesystems/reiserfs.inf | 9 +++++++++ + gptsync.inf | 9 +++++++++ + gptsync/Make.gnuefi | 4 ++++ + gptsync/Make.tiano | 4 ++++ + gptsync/gptsync.h | 2 +- + include/refit_call_wrapper.h | 2 +- + libeg/image.c | 2 +- + mvrefind | 3 +++ + refind-install | 36 +++++++++++++++++++++++++++++------- + refind.inf | 11 ++++++++++- + refind/Make.tiano | 4 ++++ + refind/Makefile | 5 +++++ + refind/config.h | 2 ++ + refind/driver_support.c | 2 ++ + refind/global.h | 4 ++++ + refind/install.h | 5 +++++ + refind/launch_efi.c | 4 +++- + refind/lib.c | 2 ++ + refind/main.c | 5 +++++ + refind/scan.c | 8 ++++++++ + 31 files changed, 211 insertions(+), 13 deletions(-) + +diff --git a/Make.common b/Make.common +index 10cde40..9c5fb11 100644 +--- a/Make.common ++++ b/Make.common +@@ -166,6 +166,27 @@ ifeq ($(ARCH), aarch64) + LD_CODE = aarch64elf + endif + ++ifeq ($(ARCH), loongarch64) ++ GNUEFI_CFLAGS += -DEFILOONGARCH64 ++ FORMAT = -O binary ++ FORMAT_DRIVER = -O binary ++ SUBSYSTEM_LDFLAG = -defsym=EFI_SUBSYSTEM=0xa ++ LDFLAGS += --warn-common --no-undefined --fatal-warnings ++ ++ ARCH_CFLAGS = -fno-merge-constants -ffreestanding -DEFILOONGARCH64 ++ ifeq ($(MAKEWITH),TIANO) ++ ARCH_CFLAGS += -mcmodel=large -Wno-address -Wno-missing-braces -Wno-array-bounds -ffunction-sections -fdata-sections ++ endif ++ ifeq ($(MAKEWITH),GNUEFI) ++ ARCH_CFLAGS += -fno-stack-check ++ endif ++ ARCHDIR = LoongArch64 ++ UC_ARCH = LOONGARCH64 ++ FILENAME_CODE = loongarch64 ++ LD_CODE = loongarch64elf ++endif ++ ++ + # GNU-EFI compilation path uses .o files for compiled object code + %.o: %.c + $(CC) $(CFLAGS) $(ARCH_CFLAGS) $(GNUEFI_CFLAGS) $(LOCAL_GNUEFI_CFLAGS) \ +diff --git a/Makefile b/Makefile +index 4d07160..8ebc622 100644 +--- a/Makefile ++++ b/Makefile +@@ -117,6 +117,9 @@ tiano: + ifneq ($(ARCH),aarch64) + +make MAKEWITH=TIANO -C $(GPTSYNC_DIR) -f Make.tiano + endif ++ifneq ($(ARCH),loongarch64) ++ +make MAKEWITH=TIANO -C $(GPTSYNC_DIR) -f Make.tiano ++endif + # +make MAKEWITH=TIANO -C $(FS_DIR) + + all_tiano: tiano fs_tiano +diff --git a/RefindPkg.dsc b/RefindPkg.dsc +index c267f7a..5a7f857 100644 +--- a/RefindPkg.dsc ++++ b/RefindPkg.dsc +@@ -3,7 +3,7 @@ + PLATFORM_GUID = d6365e1c-b895-426d-a012-46769b2d02a3 + PLATFORM_VERSION = 4.5.0 + DSC_SPECIFICATION = 0x00010006 +- SUPPORTED_ARCHITECTURES = IA32|IPF|X64|EBC|ARM|AARCH64 ++ SUPPORTED_ARCHITECTURES = IA32|IPF|X64|EBC|ARM|AARCH64|LOONGARCH64 + BUILD_TARGETS = DEBUG|RELEASE + SKUID_IDENTIFIER = DEFAULT + +@@ -75,6 +75,9 @@ + [LibraryClasses.AARCH64] + CompilerIntrinsicsLib|ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf + ++[LibraryClasses.LOONGARCH64] ++ CompilerIntrinsicsLib|LoongArchPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf ++ + [Components] + RefindPkg/refind.inf + RefindPkg/gptsync.inf +diff --git a/filesystems/Make.gnuefi b/filesystems/Make.gnuefi +index be5b183..6638f9d 100644 +--- a/filesystems/Make.gnuefi ++++ b/filesystems/Make.gnuefi +@@ -30,6 +30,11 @@ ifeq ($(HOSTARCH),aarch64) + SUBSYSTEM_LDFLAG = -defsym=EFI_SUBSYSTEM=0xb + endif + ++ifeq ($(HOSTARCH),loongarch64) ++ # Set symbol for driver ++ SUBSYSTEM_LDFLAG = -defsym=EFI_SUBSYSTEM=0xb ++endif ++ + $(SHLIB_TARGET): $(OBJS) + $(LD) $(GNUEFI_LDFLAGS) $(SUBSYSTEM_LDFLAG) $(OBJS) -o $@ $(LOCAL_LIBS) $(GNUEFI_LIBS) + +diff --git a/filesystems/btrfs.inf b/filesystems/btrfs.inf +index 10f284a..00a2c54 100644 +--- a/filesystems/btrfs.inf ++++ b/filesystems/btrfs.inf +@@ -51,6 +51,11 @@ + # Comment out CompilerIntrinsicsLib when compiling for AARCH64 using UDK2014 + CompilerIntrinsicsLib + ++[LibraryClasses.LOONGARCH64] ++ BaseStackCheckLib ++# Comment out CompilerIntrinsicsLib when compiling for LOONGARCH64 using UDK2014 ++ CompilerIntrinsicsLib ++ + [Guids] + + [Ppis] +@@ -74,3 +79,7 @@ + [BuildOptions.AARCH64] + XCODE:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=btrfs + GCC:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=btrfs ++ ++[BuildOptions.LOONGARCH64] ++ XCODE:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=btrfs ++ GCC:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=btrfs +diff --git a/filesystems/ext2.inf b/filesystems/ext2.inf +index ee8eea0..2df2dbd 100644 +--- a/filesystems/ext2.inf ++++ b/filesystems/ext2.inf +@@ -50,6 +50,11 @@ + # Comment out CompilerIntrinsicsLib when compiling for AARCH64 using UDK2014 + CompilerIntrinsicsLib + ++[LibraryClasses.LOONGARCH64] ++ BaseStackCheckLib ++# Comment out CompilerIntrinsicsLib when compiling for LOONGARCH64 using UDK2014 ++ CompilerIntrinsicsLib ++ + [Guids] + + [Ppis] +@@ -71,3 +76,7 @@ + [BuildOptions.AARCH64] + XCODE:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ext2 + GCC:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ext2 ++ ++[BuildOptions.LOONGARCH64] ++ XCODE:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ext2 ++ GCC:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ext2 +diff --git a/filesystems/ext4.inf b/filesystems/ext4.inf +index 3586ee5..6e445dd 100644 +--- a/filesystems/ext4.inf ++++ b/filesystems/ext4.inf +@@ -51,6 +51,11 @@ + # Comment out CompilerIntrinsicsLib when compiling for AARCH64 using UDK2014 + CompilerIntrinsicsLib + ++[LibraryClasses.LOONGARCH64] ++ BaseStackCheckLib ++# Comment out CompilerIntrinsicsLib when compiling for LOONGARCH64 using UDK2014 ++ CompilerIntrinsicsLib ++ + [Guids] + + [Ppis] +@@ -72,3 +77,7 @@ + [BuildOptions.AARCH64] + XCODE:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ext4 + GCC:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ext4 ++ ++[BuildOptions.LOONGARCH64] ++ XCODE:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ext4 ++ GCC:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ext4 +diff --git a/filesystems/hfs.inf b/filesystems/hfs.inf +index 14a859c..31bc526 100644 +--- a/filesystems/hfs.inf ++++ b/filesystems/hfs.inf +@@ -51,6 +51,11 @@ + # Comment out CompilerIntrinsicsLib when compiling for AARCH64 using UDK2014 + CompilerIntrinsicsLib + ++[LibraryClasses.LOONGARCH64] ++ BaseStackCheckLib ++# Comment out CompilerIntrinsicsLib when compiling for LOONGARCH64 using UDK2014 ++ CompilerIntrinsicsLib ++ + [Guids] + + [Ppis] +@@ -72,3 +77,7 @@ + [BuildOptions.AARCH64] + XCODE:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=hfs + GCC:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=hfs ++ ++[BuildOptions.LOONGARCH64] ++ XCODE:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=hfs ++ GCC:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=hfs +diff --git a/filesystems/iso9660.inf b/filesystems/iso9660.inf +index 0e03032..9bcf98b 100644 +--- a/filesystems/iso9660.inf ++++ b/filesystems/iso9660.inf +@@ -51,6 +51,11 @@ + # Comment out CompilerIntrinsicsLib when compiling for AARCH64 using UDK2014 + CompilerIntrinsicsLib + ++[LibraryClasses.LOONGARCH64] ++ BaseStackCheckLib ++# Comment out CompilerIntrinsicsLib when compiling for LOONGARCH64 using UDK2014 ++ CompilerIntrinsicsLib ++ + [Guids] + + [Ppis] +@@ -72,3 +77,7 @@ + [BuildOptions.AARCH64] + XCODE:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=iso9660 + GCC:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=iso9660 ++ ++[BuildOptions.LOONGARCH64] ++ XCODE:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=iso9660 ++ GCC:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=iso9660 +diff --git a/filesystems/ntfs.inf b/filesystems/ntfs.inf +index 73262e1..b008e10 100644 +--- a/filesystems/ntfs.inf ++++ b/filesystems/ntfs.inf +@@ -51,6 +51,11 @@ + # Comment out CompilerIntrinsicsLib when compiling for AARCH64 using UDK2014 + CompilerIntrinsicsLib + ++[LibraryClasses.LOONGARCH64] ++ BaseStackCheckLib ++# Comment out CompilerIntrinsicsLib when compiling for LOONGARCH64 using UDK2014 ++ CompilerIntrinsicsLib ++ + [Guids] + + [Ppis] +@@ -72,3 +77,7 @@ + [BuildOptions.AARCH64] + XCODE:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ntfs + GCC:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ntfs ++ ++[BuildOptions.LOONGARCH64] ++ XCODE:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ntfs ++ GCC:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=ntfs +diff --git a/filesystems/reiserfs.inf b/filesystems/reiserfs.inf +index 474d97c..b2c6dfa 100644 +--- a/filesystems/reiserfs.inf ++++ b/filesystems/reiserfs.inf +@@ -51,6 +51,11 @@ + # Comment out CompilerIntrinsicsLib when compiling for AARCH64 using UDK2014 + CompilerIntrinsicsLib + ++[LibraryClasses.LOONGARCH64] ++ BaseStackCheckLib ++# Comment out CompilerIntrinsicsLib when compiling for LOONGARCH64 using UDK2014 ++ CompilerIntrinsicsLib ++ + [Guids] + + [Ppis] +@@ -72,3 +77,7 @@ + [BuildOptions.AARCH64] + XCODE:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=reiserfs + GCC:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO -DFSTYPE=reiserfs ++ ++[BuildOptions.LOONGARCH64] ++ XCODE:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=reiserfs ++ GCC:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO -DFSTYPE=reiserfs +diff --git a/gptsync.inf b/gptsync.inf +index af3769c..382ac97 100644 +--- a/gptsync.inf ++++ b/gptsync.inf +@@ -50,6 +50,11 @@ + # Comment out CompilerIntrinsicsLib when compiling for AARCH64 using UDK2014 + CompilerIntrinsicsLib + ++[LibraryClasses.LOONGARCH64] ++ BaseStackCheckLib ++# Comment out CompilerIntrinsicsLib when compiling for LOONGARCH64 using UDK2014 ++ CompilerIntrinsicsLib ++ + [Guids] + gEfiAcpiTableGuid + gEfiAcpi10TableGuid +@@ -135,3 +140,7 @@ + [BuildOptions.AARCH64] + XCODE:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO + GCC:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO ++ ++[BuildOptions.LOONGARCH64] ++ XCODE:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO ++ GCC:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO +diff --git a/gptsync/Make.gnuefi b/gptsync/Make.gnuefi +index b74d0f3..8c6f530 100644 +--- a/gptsync/Make.gnuefi ++++ b/gptsync/Make.gnuefi +@@ -23,6 +23,10 @@ ifeq ($(ARCH),aarch64) + TARGET = gptsync_aa64.efi + endif + ++ifeq ($(ARCH),loongarch64) ++ TARGET = gptsync_loongarch64.efi ++endif ++ + all: $(TARGET) + + SHLIB_TARGET = $(subst .efi,.so,$(TARGET)) +diff --git a/gptsync/Make.tiano b/gptsync/Make.tiano +index a0656cd..70a7db3 100644 +--- a/gptsync/Make.tiano ++++ b/gptsync/Make.tiano +@@ -26,6 +26,10 @@ ifeq ($(ARCH),aarch64) + ALL_EFILIBS += $(EFILIB)/BaseStackCheckLib/BaseStackCheckLib/OUTPUT/BaseStackCheckLib.lib + endif + ++ifeq ($(ARCH),loongarch64) ++ ALL_EFILIBS += $(EFILIB)/BaseStackCheckLib/BaseStackCheckLib/OUTPUT/BaseStackCheckLib.lib ++endif ++ + TIANO_INCLUDE_DIRS = -I $(TIANOBASE)/MdePkg \ + -I $(TIANOBASE)/MdePkg/Include \ + -I $(TIANOBASE)/MdePkg/Include/$(ARCHDIR) \ +diff --git a/gptsync/gptsync.h b/gptsync/gptsync.h +index f6cf2a5..584d1fb 100644 +--- a/gptsync/gptsync.h ++++ b/gptsync/gptsync.h +@@ -45,7 +45,7 @@ + // + + +-#if defined(EFI32) || defined(EFIX64) || defined(EFIAARCH64) ++#if defined(EFI32) || defined(EFIX64) || defined(EFIAARCH64) || defined(EFILOONGARCH64) + #define CONFIG_EFI + #endif + +diff --git a/include/refit_call_wrapper.h b/include/refit_call_wrapper.h +index 1bd4042..1fa386f 100644 +--- a/include/refit_call_wrapper.h ++++ b/include/refit_call_wrapper.h +@@ -3,7 +3,7 @@ + + #ifdef __MAKEWITH_GNUEFI + +-#if defined (EFIX64) | defined (AARCH64) ++#if defined (EFIX64) | defined (AARCH64) | defined (LOONGARCH64) + # define refit_call1_wrapper(f, a1) \ + uefi_call_wrapper(f, 1, (UINT64)(a1)) + # define refit_call2_wrapper(f, a1, a2) \ +diff --git a/libeg/image.c b/libeg/image.c +index c8ae198..47ddede 100644 +--- a/libeg/image.c ++++ b/libeg/image.c +@@ -71,7 +71,7 @@ + // A value of 4096 should keep us within limits on 32-bit systems, but I've + // seen some minor artifacts at this level, so give it a bit more precision + // on 64-bit systems.... +-#if defined(EFIX64) | defined(EFIAARCH64) ++#if defined(EFIX64) | defined(EFIAARCH64) | defined(EFILOONGARCH64) + #define FP_MULTIPLIER (UINTN) 65536 + #else + #define FP_MULTIPLIER (UINTN) 4096 +diff --git a/mvrefind b/mvrefind +index 6b840c2..154729e 100755 +--- a/mvrefind ++++ b/mvrefind +@@ -95,6 +95,9 @@ DeterminePlatform() { + i?86) + Platform="ia32" + ;; ++ loongarch64) ++ Platform="loongarch64" ++ ;; + *) + echo "Unsupported CPU type; aborting!" + exit 1 +diff --git a/refind-install b/refind-install +index 22dd8e6..3fc7cab 100755 +--- a/refind-install ++++ b/refind-install +@@ -266,7 +266,7 @@ ReadKeyPassphrase() { + + # Determine what CPU type and EFI bit depth we're using. + # Sets Platform global variable to lowercase EFI platform code (currently +-# "x64", "ia32", or "aa64") -- the same code used in filenames. ++# "x64", "ia32", "aa64" or "loongarch64") -- the same code used in filenames. + DeterminePlatform() { + local CpuType + case "$OSTYPE" in +@@ -291,6 +291,9 @@ DeterminePlatform() { + aarch64) + Platform="aa64" + ;; ++ loongarch64) ++ Platform="loongarch64" ++ ;; + x86_64) + Platform="x64" + ;; +@@ -386,6 +389,7 @@ CheckForFiles() { + if [[ $ShimType == "shimx64.efi" || $ShimType == "shim.efi" || $ShimType == "shimx64.efi.signed" ]] ; then + TargetX64="grubx64.efi" + TargetAARCH64="grubaa64.efi" ++ TargetLOONGARCH64="grubloongarch64.efi" + MokManagerSource=$(dirname "$ShimSource")/mm$Platform.efi.signed + if [[ ! -f "$MokManagerSource" ]] ; then + MokManagerSource=$(dirname "$ShimSource")/mm$Platform.efi +@@ -455,10 +459,12 @@ SetVarsForBoot() { + TargetX64="bootx64.efi" + TargetIA32="bootia32.efi" + TargetAARCH64="bootaa64.efi" ++ TargetLOONGARCH64="bootloongarch64.efi" + else +- if [[ $ShimType == "shim.efi" || $ShimType == "shimx64.efi" || $ShimType == "shimx64.efi.signed" || $ShimType = "shimaa64.efi" ]] ; then ++ if [[ $ShimType == "shim.efi" || $ShimType == "shimx64.efi" || $ShimType == "shimx64.efi.signed" || $ShimType = "shimaa64.efi" || $ShimType = "shimloongarch64.efi" ]] ; then + TargetX64="grubx64.efi" + TargetAARCH64="grubaa64.efi" ++ TargetLOONGARCH64="grubloongarch64.efi" + elif [[ $ShimType == "preloader.efi" || $ShimType == "PreLoader.efi" ]] ; then + TargetX64="loader.efi" + else +@@ -482,10 +488,12 @@ SetVarsForMsBoot() { + TargetX64="bootmgfw.efi" + TargetIA32="bootmgfw.efi" + TargetAARCH64="bootmgfw.efi" ++ TargetLOONGARCH64="bootmgfw.efi" + else +- if [[ $ShimType == "shim.efi" || $ShimType == "shimx64.efi" || $ShimType == "shimaa64.efi" ]] ; then ++ if [[ $ShimType == "shim.efi" || $ShimType == "shimx64.efi" || $ShimType == "shimaa64.efi" || $ShimType == "shimloongarch64.efi" ]] ; then + TargetX64="grubx64.efi" + TargetAARCH64="grubaa64.efi" ++ TargetLOONGARCH64="grubloongarch64.efi" + elif [[ $ShimType == "preloader.efi" || $ShimType == "PreLoader.efi" ]] ; then + TargetX64="loader.efi" + else +@@ -528,6 +536,7 @@ DetermineTargetDir() { + TargetX64="refind_x64.efi" + TargetIA32="refind_ia32.efi" + TargetAARCH64="refind_aa64.efi" ++ TargetLOONGARCH64="refind_loongarch64.efi" + fi + Upgrade=1 + fi +@@ -672,6 +681,10 @@ CopyRefindFiles() { + if [[ $? != 0 && $Platform == "aa64" ]] ; then + Problems=1 + fi ++ cp "$RefindDir/refind_loongarch64.efi" "$InstallDir/$TargetDir/$TargetLOONGARCH64" 2> /dev/null ++ if [[ $? != 0 && $Platform == "loongarch64" ]] ; then ++ Problems=1 ++ fi + if [[ "$ShimSource" != "none" ]] ; then + TargetShim="bootx64.efi" + CopyShimFiles +@@ -709,15 +722,19 @@ CopyRefindFiles() { + if [[ "$TargetDir" == '/System/Library/CoreServices' ]] ; then + SetupMacHfs $TargetX64 + fi +- elif [[ $Platform == 'ia32' || $Platform == 'aa64' ]] ; then ++ elif [[ $Platform == 'ia32' || $Platform == 'aa64' || $Platform == 'loongarch64' ]] ; then + if [[ $Platform == 'ia32' ]] ; then + if ! cp "$RefindDir/refind_ia32.efi" "$InstallDir/$TargetDir/$TargetIA32" ; then + Problems=1 + fi +- else ++ elif [[ $Platform == 'aa64' ]] ; then + if ! cp "$RefindDir/refind_aa64.efi" "$InstallDir/$TargetDir/$TargetAARCH64" ; then + Problems=1 + fi ++ else ++ if ! cp "$RefindDir/refind_loongarch64.efi" "$InstallDir/$TargetDir/$TargetLOONGARCH64" ; then ++ Problems=1 ++ fi + fi + CopyDrivers $Platform + CopyTools $Platform +@@ -816,6 +833,10 @@ CreateBootCsvFile() { + echo "$TargetAARCH64,rEFInd Boot Manager,,This is the boot entry for rEFInd" | \ + $IConv -t UCS-2 > "$InstallDir/$TargetDir/BOOT.CSV" + fi ++ if [[ "$Platform" == "loongarch64" && -d "$InstallDir/$TargetDir" ]] ; then ++ echo "$TargetLOONGARCH64,rEFInd Boot Manager,,This is the boot entry for rEFInd" | \ ++ $IConv -t UCS-2 > "$InstallDir/$TargetDir/BOOT.CSV" ++ fi + fi + } # CreateBootCsvFile() + +@@ -1362,8 +1383,9 @@ AddBootEntry() { + echo + echo "ALERT: There were problems running the efibootmgr program! You may need to" + echo "rename the $Refind binary to the default name (EFI/BOOT/bootx64.efi" +- echo "on x86-64 systems, EFI/BOOT/bootia32.efi on x86 systems, or" +- echo "EFI/BOOT/bootaa64.efi on ARM64 systems) to have it run!" ++ echo "on x86-64 systems, EFI/BOOT/bootia32.efi on x86 systems, " ++ echo "EFI/BOOT/bootaa64.efi on ARM64 systems or " ++ echo "EFI/BOOT/bootloongarch64.efi on LoongArch systems.) to have it run!" + echo + else + echo "rEFInd is set as the default boot manager." +diff --git a/refind.inf b/refind.inf +index 9340b1e..7bb6a36 100644 +--- a/refind.inf ++++ b/refind.inf +@@ -22,7 +22,7 @@ + # + # The following information is for reference only and not required by the build tools. + # +-# VALID_ARCHITECTURES = IA32 X64 IPF EBC AARCH64 ++# VALID_ARCHITECTURES = IA32 X64 IPF EBC AARCH64 LOONGARCH64 + # + + [Sources] +@@ -99,6 +99,11 @@ + # Comment out CompilerIntrinsicsLib when compiling for AARCH64 using UDK2014 + CompilerIntrinsicsLib + ++[LibraryClasses.LOONGARCH64] ++ BaseStackCheckLib ++# Comment out CompilerIntrinsicsLib when compiling for LOONGARCH64 using UDK2014 ++ CompilerIntrinsicsLib ++ + [Guids] + gEfiAcpiTableGuid + gEfiAcpi10TableGuid +@@ -184,3 +189,7 @@ + [BuildOptions.AARCH64] + XCODE:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO + GCC:*_*_*_CC_FLAGS = -Os -DEFIAARCH64 -D__MAKEWITH_TIANO ++ ++[BuildOptions.LOONGARCH64] ++ XCODE:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO ++ GCC:*_*_*_CC_FLAGS = -Os -DEFILOONGARCH64 -D__MAKEWITH_TIANO +diff --git a/refind/Make.tiano b/refind/Make.tiano +index 3e9036c..4aff620 100644 +--- a/refind/Make.tiano ++++ b/refind/Make.tiano +@@ -35,6 +35,10 @@ ifeq ($(ARCH),aarch64) + ALL_EFILIBS += $(EFILIB)/BaseStackCheckLib/BaseStackCheckLib/OUTPUT/BaseStackCheckLib.lib + endif + ++ifeq ($(ARCH),loongarch64) ++ ALL_EFILIBS += $(EFILIB)/BaseStackCheckLib/BaseStackCheckLib/OUTPUT/BaseStackCheckLib.lib ++endif ++ + SOURCE_NAMES = apple AutoGen config crc32 driver_support gpt icns \ + install launch_efi launch_legacy lib line_edit linux \ + log main menu mystrings pointer scan screen +diff --git a/refind/Makefile b/refind/Makefile +index 8e7048c..99ba59b 100644 +--- a/refind/Makefile ++++ b/refind/Makefile +@@ -30,6 +30,11 @@ ifeq ($(ARCH),aarch64) + TARGET = refind_aa64.efi + endif + ++ifeq ($(ARCH),loongarch64) ++ LIBEG = build ++ TARGET = refind_loongarch64.efi ++endif ++ + LOCAL_GNUEFI_CFLAGS = -I$(SRCDIR) -I$(SRCDIR)/../include \ + -I$(SRCDIR)/../libeg -I$(SRCDIR)/../mok + LOCAL_LDFLAGS = -L$(SRCDIR)/../libeg/ -L$(SRCDIR)/../mok/ \ +diff --git a/refind/config.h b/refind/config.h +index 223af72..1e1bb9b 100644 +--- a/refind/config.h ++++ b/refind/config.h +@@ -76,6 +76,8 @@ typedef struct { + #define DONT_SCAN_FILES L"shim.efi,shim-fedora.efi,shim-centos.efi,shimx64.efi,PreLoader.efi,TextMode.efi,ebounce.efi,GraphicsConsole.efi,bootmgr.efi,fbia32.efi" + #elif defined(EFIAARCH64) + #define DONT_SCAN_FILES L"shim.efi,shim-fedora.efi,shim-centos.efi,shimx64.efi,PreLoader.efi,TextMode.efi,ebounce.efi,GraphicsConsole.efi,bootmgr.efi,fbaa64.efi" ++#elif defined(EFILOONGARCH64) ++#define DONT_SCAN_FILES L"shim.efi,shim-fedora.efi,shim-centos.efi,shimx64.efi,PreLoader.efi,TextMode.efi,ebounce.efi,GraphicsConsole.efi,bootmgr.efi,fbloongarch64.efi" + #else + #define DONT_SCAN_FILES L"shim.efi,shim-fedora.efi,shim-centos.efi,shimx64.efi,PreLoader.efi,TextMode.efi,ebounce.efi,GraphicsConsole.efi,bootmgr.efi" + #endif +diff --git a/refind/driver_support.c b/refind/driver_support.c +index 563b5ad..a527b13 100644 +--- a/refind/driver_support.c ++++ b/refind/driver_support.c +@@ -83,6 +83,8 @@ + #define DRIVER_DIRS L"drivers,drivers_ia32" + #elif defined (EFIAARCH64) + #define DRIVER_DIRS L"drivers,drivers_aa64" ++#elif defined (EFILOONGARCH64) ++#define DRIVER_DIRS L"drivers,drivers_loongarch64" + #else + #define DRIVER_DIRS L"drivers" + #endif +diff --git a/refind/global.h b/refind/global.h +index ae958ed..7bbbedb 100644 +--- a/refind/global.h ++++ b/refind/global.h +@@ -172,6 +172,8 @@ + #define MOK_NAMES L"MokManager.efi,HashTool.efi,HashTool-signed.efi,KeyTool.efi,KeyTool-signed.efi,mmia32.efi" + #elif defined(EFIAARCH64) + #define MOK_NAMES L"MokManager.efi,HashTool.efi,HashTool-signed.efi,KeyTool.efi,KeyTool-signed.efi,mmaa64.efi" ++#elif defined(EFILOONGARCH64) ++#define MOK_NAMES L"MokManager.efi,HashTool.efi,HashTool-signed.efi,KeyTool.efi,KeyTool-signed.efi,mmloongarch64.efi" + #else + #define MOK_NAMES L"MokManager.efi,HashTool.efi,HashTool-signed.efi,KeyTool.efi,KeyTool-signed.efi" + #endif +@@ -182,6 +184,8 @@ + #define FWUPDATE_NAMES L"fwupia32.efi" + #elif defined(EFIAARCH64) + #define FWUPDATE_NAMES L"fwupaa64.efi" ++#elif defined(EFILOONGARCH64) ++#define FWUPDATE_NAMES L"fwuploongarch64.efi" + #else + #define FWUPDATE_NAMES L"fwup.efi" + #endif +diff --git a/refind/install.h b/refind/install.h +index 1976884..3ecf96d 100644 +--- a/refind/install.h ++++ b/refind/install.h +@@ -28,6 +28,11 @@ + #define INST_DRIVERS_SUBDIR L"drivers_aa64" + #define INST_REFIND_NAME L"refind_aa64.efi" + #define INST_PLATFORM_EXTENSION L"_aa64.efi" ++#elif defined(EFILOONGARCH64) ++#define INST_DIRECTORIES L"\\EFI,\\EFI\\refind,\\EFI\\refind\\icons,\\EFI\\refind\\drivers_loongarch64" ++#define INST_DRIVERS_SUBDIR L"drivers_loongarch64" ++#define INST_REFIND_NAME L"refind_loongarch64.efi" ++#define INST_PLATFORM_EXTENSION L"_loongarch64.efi" + #else + #define INST_DIRECTORIES L"\\EFI,\\EFI\\refind,\\EFI\\refind\\icons,\\EFI\\refind\\drivers" + #define INST_DRIVERS_SUBDIR L"drivers" +diff --git a/refind/launch_efi.c b/refind/launch_efi.c +index 320a207..03da111 100644 +--- a/refind/launch_efi.c ++++ b/refind/launch_efi.c +@@ -81,6 +81,8 @@ + #define EFI_STUB_ARCH 0x014c + #elif defined (EFIAARCH64) + #define EFI_STUB_ARCH 0xaa64 ++#elif defined (EFILOONGARCH64) ++#define EFI_STUB_ARCH 0x6264 + #else + #endif + +@@ -116,7 +118,7 @@ static VOID WarnSecureBootError(CHAR16 *Name, BOOLEAN Verbose) { + // gzip loaders. + UINTN IsValidLoader(EFI_FILE_PROTOCOL *RootDir, CHAR16 *FileName) { + UINTN LoaderType = LOADER_TYPE_EFI; +-#if defined (EFIX64) | defined (EFI32) | defined (EFIAARCH64) ++#if defined (EFIX64) | defined (EFI32) | defined (EFIAARCH64) | defined (EFILOONGARCH64) + BOOLEAN IsValid = TRUE; + EFI_STATUS Status; + EFI_FILE_HANDLE FileHandle; +diff --git a/refind/lib.c b/refind/lib.c +index 0da1fa4..c27ae73 100644 +--- a/refind/lib.c ++++ b/refind/lib.c +@@ -103,6 +103,8 @@ EFI_GUID gFreedesktopRootGuid = { 0x4f68bce3, 0xe8cd, 0x4db1, { 0x96, 0xe7, 0xfb + EFI_GUID gFreedesktopRootGuid = { 0x44479540, 0xf297, 0x41b2, { 0x9a, 0xf7, 0xd1, 0x31, 0xd5, 0xf0, 0x45, 0x8a }}; + #elif defined (EFIAARCH64) + EFI_GUID gFreedesktopRootGuid = { 0xb921b045, 0x1df0, 0x41c3, { 0xaf, 0x44, 0x4c, 0x6f, 0x28, 0x0d, 0x3f, 0xae }}; ++#elif defined (EFILOONGARCH64) ++EFI_GUID gFreedesktopRootGuid = { 0x77055800, 0x792c, 0x4f94, { 0xb3, 0x9a, 0x98, 0xc9, 0x1b, 0x76, 0x2b, 0xb6 }}; + #else + // Below is GUID for ARM32 + EFI_GUID gFreedesktopRootGuid = { 0x69dad710, 0x2ce4, 0x4e3c, { 0xb1, 0x6c, 0x21, 0xa1, 0xd4, 0x9a, 0xbe, 0xd3 }}; +diff --git a/refind/main.c b/refind/main.c +index a6d0dc7..55f5b2f 100644 +--- a/refind/main.c ++++ b/refind/main.c +@@ -192,6 +192,9 @@ VOID AboutrEFInd(VOID) + #elif defined(EFIAARCH64) + AddMenuInfoLine(&AboutMenu, PoolPrint(L" Platform: ARM (64 bit); Secure Boot %s", + secure_mode() ? L"active" : L"inactive")); ++#elif defined(EFILOONGARCH64) ++ AddMenuInfoLine(&AboutMenu, PoolPrint(L" Platform: LoongArch (64 bit); Secure Boot %s", ++ secure_mode() ? L"active" : L"inactive")); + #else + AddMenuInfoLine(&AboutMenu, L" Platform: unknown"); + #endif +@@ -384,6 +387,8 @@ VOID LogBasicInfo(VOID) { + LOG(1, LOG_LINE_NORMAL, L"Platform: x86-64/X64/AMD64 (64-bit)"); + #elif defined(EFIAARCH64) + LOG(1, LOG_LINE_NORMAL, L"Platform: ARM64/AARCH64 (64-bit)"); ++#elif defined(EFILOONGARCH64) ++ LOG(1, LOG_LINE_NORMAL, L"Platform: LoongArch (64-bit)"); + #else + LOG(1, LOG_LINE_NORMAL, L"Platform: unknown"); + #endif +diff --git a/refind/scan.c b/refind/scan.c +index e270e83..535ef42 100644 +--- a/refind/scan.c ++++ b/refind/scan.c +@@ -104,6 +104,14 @@ + #define MEMTEST_NAMES L"memtest86.efi,memtest86_aa64.efi,memtest86aa64.efi,bootaa64.efi" + #define FALLBACK_FULLNAME L"EFI\\BOOT\\bootaa64.efi" + #define FALLBACK_BASENAME L"bootaa64.efi" ++#elif defined (EFILOONGARCH64) ++#define SHELL_NAMES L"\\EFI\\tools\\shell.efi,\\EFI\\tools\\shellloongarch64.efi,\\shell.efi,\\shellloongarch64.efi" ++#define GPTSYNC_NAMES L"\\EFI\\tools\\gptsync.efi,\\EFI\\tools\\gptsync_loongarch64.efi" ++#define GDISK_NAMES L"\\EFI\\tools\\gdisk.efi,\\EFI\\tools\\gdisk_loongarch64.efi" ++#define NETBOOT_NAMES L"\\EFI\\tools\\ipxe.efi" ++#define MEMTEST_NAMES L"memtest86.efi,memtest86_loongarch64.efi,memtest86loongarch64.efi,bootloongarch64.efi" ++#define FALLBACK_FULLNAME L"EFI\\BOOT\\bootloongarch64.efi" ++#define FALLBACK_BASENAME L"bootloongarch64.efi" + #else + #define SHELL_NAMES L"\\EFI\\tools\\shell.efi,\\shell.efi" + #define GPTSYNC_NAMES L"\\EFI\\tools\\gptsync.efi" +-- +2.39.2 + diff --git a/repod/0001-add-loong64-support.patch b/repod/0001-add-loong64-support.patch new file mode 100644 index 0000000000..143b7c1638 --- /dev/null +++ b/repod/0001-add-loong64-support.patch @@ -0,0 +1,33 @@ +From dda3c4bfdf9010e53c6e0259bc11c43922e51398 Mon Sep 17 00:00:00 2001 +From: Xiaotian Wu +Date: Tue, 28 Nov 2023 22:12:58 +0800 +Subject: [PATCH] add loong64 support + +--- + repod/common/enums.py | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/repod/common/enums.py b/repod/common/enums.py +index 64dc74b..773fd81 100644 +--- a/repod/common/enums.py ++++ b/repod/common/enums.py +@@ -23,6 +23,8 @@ class ArchitectureEnum(Enum): + The i486 CPU architecture + I686: "i686" + The i686 CPU architecture ++ LOONG64: "loong64" ++ The loong64 CPU architecture + PENTIUM4: "pentium4" + The pentium4 CPU architecture + RISCV32: "riscv32" +@@ -46,6 +48,7 @@ class ArchitectureEnum(Enum): + ARMV7H = "armv7h" + I486 = "i486" + I686 = "i686" ++ LOONG64 = "loong64" + PENTIUM4 = "pentium4" + RISCV32 = "riscv32" + RISCV64 = "riscv64" +-- +2.42.0 + diff --git a/repod/PKGBUILD b/repod/PKGBUILD index 074d6c0f7e..efea833dfb 100644 --- a/repod/PKGBUILD +++ b/repod/PKGBUILD @@ -2,7 +2,7 @@ pkgname=repod pkgver=0.3.0 -pkgrel=1 +pkgrel=2 pkgdesc="Tooling to maintain binary package repositories" arch=(any) url="https://repod.archlinux.page/" @@ -34,9 +34,17 @@ makedepends=( checkdepends=(python-pytest python-pytest-asyncio python-pytest-lazy-fixture) source=( https://gitlab.archlinux.org/archlinux/$pkgname/-/archive/$pkgver/$pkgname-$pkgver.tar.gz + 0001-add-loong64-support.patch ) -sha256sums=('1d7763d0234aeb64ba21772b9de33e4b05b575ff916c0bcf7055109f06168468') -b2sums=('ba17d42856ba032d564fe8bdc5083f6524bc2b2d47bb353c68fd10f36a5a55795eba479f10853937772f470abe44dd9c582b49c3d905feea349c363e9249baa0') +sha256sums=('1d7763d0234aeb64ba21772b9de33e4b05b575ff916c0bcf7055109f06168468' + 'c440e466ae1fb26a401d08b86611230574e478584af25a26420cce58f660b826') +b2sums=('ba17d42856ba032d564fe8bdc5083f6524bc2b2d47bb353c68fd10f36a5a55795eba479f10853937772f470abe44dd9c582b49c3d905feea349c363e9249baa0' + '509f2db206e22df793e5f7d8dd052799f30e3853a9d41c3d278065e049e9ac6dcaafde1d512077cf91b798338a52f754ef2538b773b5e5da3131aff535130492') + +prepare() { + cd $pkgname-$pkgver + patch -p1 -i $srcdir/0001-add-loong64-support.patch +} build() { export PDM_BUILD_SCM_VERSION=$pkgver diff --git a/repro-env/PKGBUILD b/repro-env/PKGBUILD index 0bf6822624..f3be14351e 100644 --- a/repro-env/PKGBUILD +++ b/repro-env/PKGBUILD @@ -20,7 +20,7 @@ b2sums=('3bfc865c6555f809767218706f8f578d22a3891333a1ddf00351e950010f868f74e6d75 prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/rhit/PKGBUILD b/rhit/PKGBUILD index 292c1b7f18..0c6122f2b1 100644 --- a/rhit/PKGBUILD +++ b/rhit/PKGBUILD @@ -14,7 +14,7 @@ sha512sums=('2bc59c7eb24e655eba71f4cc540823c00619eba0673dadd8133f84490642ad13f11 prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/rhythmbox/PKGBUILD b/rhythmbox/PKGBUILD index bff69d16ba..f253129bcc 100644 --- a/rhythmbox/PKGBUILD +++ b/rhythmbox/PKGBUILD @@ -38,7 +38,7 @@ makedepends=( yelp-tools zeitgeist ) -checkdepends=( +makedepends+=( check xorg-server-xvfb ) diff --git a/riff/PKGBUILD b/riff/PKGBUILD index a7ca1d1b81..bd00fd0796 100644 --- a/riff/PKGBUILD +++ b/riff/PKGBUILD @@ -16,7 +16,7 @@ sha256sums=('8e28c456a781f5e9d51d2e84f8823bf01ca85b69337a6d21d31fd435d1699e1b') prepare() { cd "$_archive" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/ripgrep-all/PKGBUILD b/ripgrep-all/PKGBUILD index 41cf2bda03..13c851edf7 100644 --- a/ripgrep-all/PKGBUILD +++ b/ripgrep-all/PKGBUILD @@ -28,7 +28,7 @@ b2sums=('fc2618369c349fda5a78d3604b17b78788be73ce5925a5b6aa234627ccaa4b70dba8ded prepare() { cd ripgrep-all-${pkgver} - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "$(rustc -vV | sed -n 's/host: //p')" } build() { diff --git a/riscv64-linux-gnu-glibc/PKGBUILD b/riscv64-linux-gnu-glibc/PKGBUILD index 87e6029aed..893753c8db 100644 --- a/riscv64-linux-gnu-glibc/PKGBUILD +++ b/riscv64-linux-gnu-glibc/PKGBUILD @@ -5,7 +5,7 @@ _target=riscv64-linux-gnu pkgname=$_target-glibc pkgver=2.36 -pkgrel=1 +pkgrel=2 pkgdesc='GNU C Library RISCV target' arch=(any) url='https://www.gnu.org/software/libc/' diff --git a/roc-toolkit/PKGBUILD b/roc-toolkit/PKGBUILD index 3648be9e21..3308caf0e1 100644 --- a/roc-toolkit/PKGBUILD +++ b/roc-toolkit/PKGBUILD @@ -37,12 +37,14 @@ sha512sums=('447532862dc1714054ebd03ce7fd101525c213a87bc7198a55c8e6068c28db318d5 b2sums=('31775d330bdfb3c42278d1ecbbbb5e14f695ca82f22929e435b6c9bf10c2e25c3367d11cf2913a81e6469876c255193be11eb78b775fad3b17f57c5be9c4bb39') build() { + CFLAGS=${CFLAGS/-mno-relax/} + CXXFLAGS=${CXXFLAGS/-mno-relax/} + CFLAGS=${CFLAGS/-fstack-clash-protection/} + CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/} local scons_options=( --prefix=/usr --libdir=/usr/lib --disable-openssl # disable as it is not yet used - --enable-tests - --enable-examples ) cd $pkgname-$pkgver @@ -63,6 +65,10 @@ check() { } package() { + CFLAGS=${CFLAGS/-mno-relax/} + CXXFLAGS=${CXXFLAGS/-mno-relax/} + CFLAGS=${CFLAGS/-fstack-clash-protection/} + CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/} local scons_options=( --prefix=/usr --libdir=/usr/lib diff --git a/rosenpass/PKGBUILD b/rosenpass/PKGBUILD index ffaeb4256b..c9bc7662fa 100644 --- a/rosenpass/PKGBUILD +++ b/rosenpass/PKGBUILD @@ -19,7 +19,7 @@ _script=rp prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target $CARCH-unknown-linux-gnu + cargo fetch --locked --target `uname -m`-unknown-linux-gnu } build() { diff --git a/rpg-cli/PKGBUILD b/rpg-cli/PKGBUILD index c1c5e0c004..408eb9721a 100644 --- a/rpg-cli/PKGBUILD +++ b/rpg-cli/PKGBUILD @@ -17,7 +17,7 @@ sha256sums=('763d5a5c9219f2084d5ec6273911f84213e5424f127117ab0f1c611609663a8b' prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/rpm-tools/PKGBUILD b/rpm-tools/PKGBUILD index f0c7916339..98beb4e7bc 100644 --- a/rpm-tools/PKGBUILD +++ b/rpm-tools/PKGBUILD @@ -26,14 +26,17 @@ _base_pkgver=$_pkgver_major.$_pkgver_minor.x source=(http://ftp.rpm.org/releases/rpm-$_base_pkgver/rpm-$pkgver.tar.bz2 rpmextract.sh - rpmlib-filesystem-check.patch) + rpmlib-filesystem-check.patch + rpm-add-loongarch.patch) sha256sums=('ba7eee1bc2c6f83be73c0a40d159c625cbaed976b3ac044233404fb25ae1b979' '3e5bf450d4628366ba35469ec0530a99cd09ab2616a3d261a3f68270f481f777' - 'bd0e6dbd458f990268c60324190c6825b234647ecdde08296d2b453dc4bce27a') + 'bd0e6dbd458f990268c60324190c6825b234647ecdde08296d2b453dc4bce27a' + 'a40afdd567b77480d2e315d3ca297f5238dd3f9f11cccc239a1d33b297b84e04') prepare() { cd rpm-${pkgver} patch -p1 < ../rpmlib-filesystem-check.patch + patch -p1 -i $srcdir/rpm-add-loongarch.patch } build() { diff --git a/rpm-tools/rpm-add-loongarch.patch b/rpm-tools/rpm-add-loongarch.patch new file mode 100644 index 0000000000..99bfb5a756 --- /dev/null +++ b/rpm-tools/rpm-add-loongarch.patch @@ -0,0 +1,63 @@ +Index: rpm-4.16.0/rpmrc.in +=================================================================== +--- rpm-4.16.0.orig/rpmrc.in ++++ rpm-4.16.0/rpmrc.in +@@ -67,6 +67,8 @@ optflags: mipsr6el -O2 -g + optflags: mips64r6 -O2 -g + optflags: mips64r6el -O2 -g + ++optflags: loongarch64 -O2 -g ++ + optflags: armv3l -O2 -g -march=armv3 + optflags: armv4b -O2 -g -march=armv4 + optflags: armv4l -O2 -g -march=armv4 +@@ -137,6 +139,9 @@ archcolor: mipsr6el 1 + archcolor: mips64r6 2 + archcolor: mips64r6el 2 + ++archcolor: loongarch32 1 ++archcolor: loongarch64 2 ++ + archcolor: m68k 1 + + archcolor: m68kmint 1 +@@ -257,6 +262,9 @@ arch_canon: mips64r6el: mips64r6el 21 + arch_canon: riscv: riscv64 22 + arch_canon: riscv64: riscv64 22 + ++arch_canon: loongarch32: loongarch32 25 ++arch_canon: loongarch64: loongarch64 26 ++ + ############################################################# + # Canonical OS names and numbers + +@@ -360,6 +368,9 @@ buildarchtranslate: mipsr6el: mipsr6el + buildarchtranslate: mips64r6: mips64r6 + buildarchtranslate: mips64r6el: mips64r6el + ++buildarchtranslate: loongarch32: loongarch32 ++buildarchtranslate: loongarch64: loongarch64 ++ + buildarchtranslate: m68k: m68k + + buildarchtranslate: atarist: m68kmint +@@ -449,6 +460,9 @@ arch_compat: mipsr6el: noarch + arch_compat: mips64r6: mipsr6 + arch_compat: mips64r6el: mipsr6el + ++arch_compat: loongarch32: noarch ++arch_compat: loongarch64: loongarch32 ++ + arch_compat: hppa2.0: hppa1.2 + arch_compat: hppa1.2: hppa1.1 + arch_compat: hppa1.1: hppa1.0 +@@ -586,6 +600,9 @@ buildarch_compat: mipsr6el: noarch + buildarch_compat: mips64r6: noarch + buildarch_compat: mips64r6el: noarch + ++buildarch_compat: loongarch32: noarch ++buildarch_compat: loongarch64: noarch ++ + buildarch_compat: armv4b: noarch + buildarch_compat: armv8l: armv7l + buildarch_compat: armv7l: armv6l diff --git a/rq/PKGBUILD b/rq/PKGBUILD index be90634936..eac9d13119 100644 --- a/rq/PKGBUILD +++ b/rq/PKGBUILD @@ -17,7 +17,7 @@ sha256sums=('4c3fc4427d02271c93a2cf4a784887982e97f9aba4946900aad1a35b142f9a47') prepare() { cd "$_archive" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/ruby-base64/PKGBUILD b/ruby-base64/PKGBUILD index d8c7fecc10..525f1b322b 100644 --- a/ruby-base64/PKGBUILD +++ b/ruby-base64/PKGBUILD @@ -3,7 +3,7 @@ _gemname='base64' pkgname="ruby-${_gemname}" pkgver=0.1.1 -pkgrel=4 +pkgrel=5 pkgdesc='Support for encoding and decoding binary data using a Base64 representation' arch=('any') url="https://github.com/ruby/${_gemname}" diff --git a/ruby-bigdecimal/PKGBUILD b/ruby-bigdecimal/PKGBUILD index 265e8566c8..30922ea8ef 100644 --- a/ruby-bigdecimal/PKGBUILD +++ b/ruby-bigdecimal/PKGBUILD @@ -3,7 +3,7 @@ _gemname='bigdecimal' pkgname="ruby-${_gemname}" pkgver=3.1.2 -pkgrel=4 +pkgrel=5 pkgdesc='This library provides arbitrary-precision decimal floating-point number class' arch=('loong64' 'x86_64') url="https://github.com/ruby/bigdecimal" diff --git a/ruby-cri/PKGBUILD b/ruby-cri/PKGBUILD index f9cb4a27d9..2151f00b75 100644 --- a/ruby-cri/PKGBUILD +++ b/ruby-cri/PKGBUILD @@ -48,7 +48,7 @@ package() { install -Dm 644 LICENSE -t "${pkgdir}/usr/share/licenses/${pkgname}/" install -Dm 644 README.md CODE_OF_CONDUCT.md NEWS.md -t "${pkgdir}/usr/share/doc/${pkgname}/" - mv doc/yardoc "${pkgdir}/usr/share/doc/${pkgname}/" +# mv doc/yardoc "${pkgdir}/usr/share/doc/${pkgname}/" rm -rf "${pkgdir}/${_gemdir}/gems/${_gemname}-${pkgver}/"{README.md,CODE_OF_CONDUCT.md,NEWS.md,.gitignore,.rubocop.yml,.travis.yml,test,LICENSE} rm -rf "${pkgdir}/${_gemdir}/cache" diff --git a/ruby-ffi/PKGBUILD b/ruby-ffi/PKGBUILD index 15bcc44fc8..b6aa722028 100644 --- a/ruby-ffi/PKGBUILD +++ b/ruby-ffi/PKGBUILD @@ -37,7 +37,7 @@ package() { rm -vrf cache cd "gems/$_gemname-$pkgver" rm -vrf Gemfile Rakefile "$_gemname.gemspec" ext rakelib - find lib/ffi/platform/* -prune -not -name "$CARCH-linux" -exec rm -rf {} + + find lib/ffi/platform/* -prune -not -name "`uname -m`-linux" -exec rm -rf {} + # move documentation install -vd "$pkgdir/usr/share/doc/$pkgname" diff --git a/ruby-iconv/PKGBUILD b/ruby-iconv/PKGBUILD index bfaef89c01..649819ac60 100755 --- a/ruby-iconv/PKGBUILD +++ b/ruby-iconv/PKGBUILD @@ -30,7 +30,7 @@ package() { # delete unnecessary files & folders cd "$pkgdir/$_gemdir" rm -vrf cache - rm -vrf "extensions/$CARCH-linux/$(basename $_gemdir)/$_gemname-$pkgver/"{gem_make.out,mkmf.log} + rm -vrf "extensions/`uname -m`-linux/$(basename $_gemdir)/$_gemname-$pkgver/"{gem_make.out,mkmf.log} cd "gems/$_gemname-$pkgver" find . -type f -name ".*" -delete rm -vrf Gemfile Rakefile "$_gemname.gemspec" ext test diff --git a/ruby-rake/PKGBUILD b/ruby-rake/PKGBUILD index 3a3370553b..541fe32fc1 100644 --- a/ruby-rake/PKGBUILD +++ b/ruby-rake/PKGBUILD @@ -16,6 +16,7 @@ noextract=($_gemname-$pkgver.gem) sha512sums=('9dbcd1ef4d93f4853b3da40b29890509bb260e13e5500f5a0502645ce762d6e50ee7dd6bd59d08d135868dab579e10344920ba246079cde7048e3510bd473ea2') package() { + set -x local _gemdir="$(ruby -e'puts Gem.default_dir')" gem install --ignore-dependencies --no-user-install --no-document -i "$pkgdir/$_gemdir" -n "$pkgdir/usr/bin" $_gemname-$pkgver.gem rm "$pkgdir/$_gemdir/cache/$_gemname-$pkgver.gem" diff --git a/ruby/PKGBUILD b/ruby/PKGBUILD index fbb939c637..9609bf44da 100644 --- a/ruby/PKGBUILD +++ b/ruby/PKGBUILD @@ -141,7 +141,7 @@ package_ruby() { rm --force --recursive --verbose \ "${pkgdir}"/usr/lib/ruby/${rubyver}/${stdlib_gem} \ "${pkgdir}"/usr/lib/ruby/${rubyver}/${stdlib_gem}.rb \ - "${pkgdir}"/usr/lib/ruby/${rubyver}/x86_64-linux/${stdlib_gem}.so \ + "${pkgdir}"/usr/lib/ruby/${rubyver}/`uname -m`-linux/${stdlib_gem}.so \ "${pkgdir}"/usr/lib/ruby/gems/${rubyver}/specifications/default/${stdlib_gem}-*.gemspec done @@ -151,14 +151,14 @@ package_ruby() { "${pkgdir}"/usr/lib/ruby/${rubyver}/net/http \ "${pkgdir}"/usr/lib/ruby/${rubyver}/net/http.rb \ "${pkgdir}"/usr/lib/ruby/${rubyver}/net/https.rb \ - "${pkgdir}"/usr/lib/ruby/${rubyver}/x86_64-linux/cgi \ - "${pkgdir}"/usr/lib/ruby/${rubyver}/x86_64-linux/date_core.so \ - "${pkgdir}"/usr/lib/ruby/${rubyver}/x86_64-linux/digest \ - "${pkgdir}"/usr/lib/ruby/${rubyver}/x86_64-linux/io/console.so \ - "${pkgdir}"/usr/lib/ruby/${rubyver}/x86_64-linux/io/nonblock.so \ - "${pkgdir}"/usr/lib/ruby/${rubyver}/x86_64-linux/io/wait.so \ - "${pkgdir}"/usr/lib/ruby/${rubyver}/x86_64-linux/json \ - "${pkgdir}"/usr/lib/ruby/${rubyver}/x86_64-linux/racc + "${pkgdir}"/usr/lib/ruby/${rubyver}/`uname -m`-linux/cgi \ + "${pkgdir}"/usr/lib/ruby/${rubyver}/`uname -m`-linux/date_core.so \ + "${pkgdir}"/usr/lib/ruby/${rubyver}/`uname -m`-linux/digest \ + "${pkgdir}"/usr/lib/ruby/${rubyver}/`uname -m`-linux/io/console.so \ + "${pkgdir}"/usr/lib/ruby/${rubyver}/`uname -m`-linux/io/nonblock.so \ + "${pkgdir}"/usr/lib/ruby/${rubyver}/`uname -m`-linux/io/wait.so \ + "${pkgdir}"/usr/lib/ruby/${rubyver}/`uname -m`-linux/json \ + "${pkgdir}"/usr/lib/ruby/${rubyver}/`uname -m`-linux/racc } package_ruby-docs() { diff --git a/ruff/PKGBUILD b/ruff/PKGBUILD index bccc76d4a4..243cdaa3e8 100644 --- a/ruff/PKGBUILD +++ b/ruff/PKGBUILD @@ -47,7 +47,7 @@ _package_common() { package_ruff() { cd "$_archive" _package_common - local _target="target/$CARCH-unknown-linux-gnu/release/ruff" + local _target="target/`uname -m`-unknown-linux-gnu/release/ruff" install -Dm0755 -t "$pkgdir/usr/bin/" "$_target" $_target --generate-shell-completion bash | install -Dm0644 /dev/stdin "$pkgdir/usr/share/bash-completion/completions/$pkgbase.bash" $_target --generate-shell-completion fish | install -Dm0644 /dev/stdin "$pkgdir/usr/share/fish/vendor_completions.d/$pkgbase.fish" diff --git a/runc/PKGBUILD b/runc/PKGBUILD index 33163d1d37..7d21252e20 100644 --- a/runc/PKGBUILD +++ b/runc/PKGBUILD @@ -16,15 +16,18 @@ optdepends=( 'criu: checkpoint support' ) source=("${pkgname}-${pkgver}.tar.xz::https://github.com/opencontainers/runc/releases/download/v${pkgver}/runc.tar.xz" - "${pkgname}-${pkgver}.tar.xz.sig::https://github.com/opencontainers/runc/releases/download/v${pkgver}/runc.tar.xz.asc") + "${pkgname}-${pkgver}.tar.xz.sig::https://github.com/opencontainers/runc/releases/download/v${pkgver}/runc.tar.xz.asc" + runc-la64.patch) validpgpkeys=("5F36C6C61B5460124A75F5A69E18AA267DDB8DB4" "C9C370B246B09F6DBCFC744C34401015D1D2D386") sha256sums=('47d9e34500e478d860512b3b646724ee4b9e638692122ddaa82af417668ca4d7' - 'SKIP') + 'SKIP' + '6027791a177bbc22751eecd0bca41ffc4287fbdbdd757c27dab6cd29e0d425f4') prepare() { mkdir -p src/github.com/opencontainers cp -r runc-${pkgver} src/github.com/opencontainers/runc + patch -d ${pkgname}-${pkgver} -p1 -i $srcdir/runc-la64.patch } build() { @@ -36,7 +39,15 @@ build() { export CGO_CXXFLAGS="${CXXFLAGS}" export CGO_LDFLAGS="${LDFLAGS}" export GOFLAGS="-trimpath -mod=readonly -modcacherw" - make runc man + export GOPROXY=https://goproxy.cn + go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664 + go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305 + go mod tidy + make GO_BUILDMODE="" runc man || true +# patch the depends code + cd $srcdir && patch -d src/github.com/opencontainers/runc/ -p1 -i $srcdir/runc-la64.patch + cd src/github.com/opencontainers/runc + make GO_BUILDMODE="" runc man } package() { diff --git a/runc/runc-la64.patch b/runc/runc-la64.patch new file mode 100644 index 0000000000..94734e2b73 --- /dev/null +++ b/runc/runc-la64.patch @@ -0,0 +1,11 @@ +--- runc-1.1.4.orig/libcontainer/system/syscall_linux_64.go 2022-08-24 08:45:13.000000000 +0800 ++++ runc-1.1.4/libcontainer/system/syscall_linux_64.go 2022-09-18 15:02:00.535357829 +0800 +@@ -1,6 +1,6 @@ +-//go:build linux && (arm64 || amd64 || mips || mipsle || mips64 || mips64le || ppc || ppc64 || ppc64le || riscv64 || s390x) ++//go:build linux && (arm64 || amd64 || loong64 || mips || mipsle || mips64 || mips64le || ppc || ppc64 || ppc64le || riscv64 || s390x) + // +build linux +-// +build arm64 amd64 mips mipsle mips64 mips64le ppc ppc64 ppc64le riscv64 s390x ++// +build arm64 amd64 loong64 mips mipsle mips64 mips64le ppc ppc64 ppc64le riscv64 s390x + + package system + diff --git a/runst/PKGBUILD b/runst/PKGBUILD index 687258beab..08b1cadbc3 100644 --- a/runst/PKGBUILD +++ b/runst/PKGBUILD @@ -14,7 +14,7 @@ sha512sums=('341a33c66d6b77dc660686283cdaf816fbbcf75c1a2cb661936d345d90b91e919ae prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/rust-bindgen/PKGBUILD b/rust-bindgen/PKGBUILD index 76c914757d..5f75c718fc 100644 --- a/rust-bindgen/PKGBUILD +++ b/rust-bindgen/PKGBUILD @@ -22,7 +22,7 @@ prepare() { build() { cd $pkgname-$pkgver - cargo build --release --frozen + cargo build --release #--frozen local _completion="target/release/$_pkgname --generate-shell-completions" $_completion bash > "completions/$_pkgname" $_completion fish > "completions/$_pkgname.fish" diff --git a/rust-script/PKGBUILD b/rust-script/PKGBUILD index 86fb3b1298..ff3854b340 100644 --- a/rust-script/PKGBUILD +++ b/rust-script/PKGBUILD @@ -18,7 +18,7 @@ prepare() { cd "$pkgname-$pkgver" # download dependencies - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/rust/PKGBUILD b/rust/PKGBUILD index f1202cadef..7839db8ff6 100644 --- a/rust/PKGBUILD +++ b/rust/PKGBUILD @@ -7,9 +7,9 @@ pkgbase=rust pkgname=( rust - lib32-rust-libs - rust-musl - rust-wasm +# lib32-rust-libs +# rust-musl +# rust-wasm rust-src ) epoch=1 @@ -35,11 +35,11 @@ depends=( ) makedepends=( cmake - lib32-gcc-libs +# lib32-gcc-libs libffi lld llvm - musl +# musl ninja perl python @@ -50,6 +50,7 @@ checkdepends=( gdb procps-ng ) +SKIPCONFIG=1 source=( "https://static.rust-lang.org/dist/rustc-$pkgver-src.tar.gz"{,.asc} 0001-bootstrap-Change-libexec-dir.patch @@ -91,13 +92,7 @@ change-id = 116881 link-shared = true [build] -target = [ - "x86_64-unknown-linux-gnu", - "i686-unknown-linux-gnu", - "x86_64-unknown-linux-musl", - "wasm32-unknown-unknown", - "wasm32-wasi", -] +target = [ "loongarch64-unknown-linux-gnu" ] cargo = "/usr/bin/cargo" rustc = "/usr/bin/rustc" rustfmt = "/usr/bin/rustfmt" @@ -147,31 +142,31 @@ deny-warnings = false [dist] compression-formats = ["gz"] -[target.x86_64-unknown-linux-gnu] +[target.loongarch64-unknown-linux-gnu] cc = "/usr/bin/gcc" cxx = "/usr/bin/g++" ar = "/usr/bin/gcc-ar" ranlib = "/usr/bin/gcc-ranlib" llvm-config = "/usr/bin/llvm-config" -[target.i686-unknown-linux-gnu] -cc = "/usr/bin/gcc" -cxx = "/usr/bin/g++" -ar = "/usr/bin/gcc-ar" -ranlib = "/usr/bin/gcc-ranlib" - -[target.x86_64-unknown-linux-musl] -sanitizers = false -musl-root = "/usr/lib/musl" - -[target.wasm32-unknown-unknown] -sanitizers = false -profiler = false - -[target.wasm32-wasi] -sanitizers = false -profiler = false -wasi-root = "/usr/share/wasi-sysroot" +#[target.i686-unknown-linux-gnu] +#cc = "/usr/bin/gcc" +#cxx = "/usr/bin/g++" +#ar = "/usr/bin/gcc-ar" +#ranlib = "/usr/bin/gcc-ranlib" +# +#[target.x86_64-unknown-linux-musl] +#sanitizers = false +#musl-root = "/usr/lib/musl" +# +#[target.wasm32-unknown-unknown] +#sanitizers = false +#profiler = false +# +#[target.wasm32-wasi] +#sanitizers = false +#profiler = false +#wasi-root = "/usr/share/wasi-sysroot" END } @@ -201,16 +196,16 @@ build() { # rustbuild always installs copies of the shared libraries to /usr/lib, # overwrite them with symlinks to the per-architecture versions - mkdir -p usr/lib32 - ln -srft usr/lib usr/lib/rustlib/x86_64-unknown-linux-gnu/lib/*.so - ln -srft usr/lib32 usr/lib/rustlib/i686-unknown-linux-gnu/lib/*.so +#mkdir -p usr/lib32 + ln -srft usr/lib usr/lib/rustlib/loongarch64-unknown-linux-gnu/lib/*.so +#ln -srft usr/lib32 usr/lib/rustlib/i686-unknown-linux-gnu/lib/*.so mkdir -p usr/share/licenses/rust mv -t usr/share/licenses/rust usr/share/doc/rust/{COPYRIGHT,LICENSE*} - _pick dest-i686 usr/lib/rustlib/i686-unknown-linux-gnu usr/lib32 - _pick dest-musl usr/lib/rustlib/x86_64-unknown-linux-musl - _pick dest-wasm usr/lib/rustlib/wasm32-* +#_pick dest-i686 usr/lib/rustlib/i686-unknown-linux-gnu usr/lib32 +#_pick dest-musl usr/lib/rustlib/loongarch64-unknown-linux-musl +#_pick dest-wasm usr/lib/rustlib/wasm32-* _pick dest-src usr/lib/rustlib/src } diff --git a/rustscan/PKGBUILD b/rustscan/PKGBUILD index ebd0ba4b33..05d1408aa8 100644 --- a/rustscan/PKGBUILD +++ b/rustscan/PKGBUILD @@ -35,7 +35,7 @@ prepare() { patch -p1 -i ../update-lockfile.patch # download dependencies - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/rustypaste-cli/PKGBUILD b/rustypaste-cli/PKGBUILD index ffd560f3c6..92db719c48 100644 --- a/rustypaste-cli/PKGBUILD +++ b/rustypaste-cli/PKGBUILD @@ -16,7 +16,7 @@ sha256sums=('51f72cedfa315848cbfad2da98cf87febc5450a087996f5665311f71a83e6cbb') prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target "${CARCH}-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build(){ diff --git a/rustypaste/PKGBUILD b/rustypaste/PKGBUILD index 530bea518f..4850171c65 100644 --- a/rustypaste/PKGBUILD +++ b/rustypaste/PKGBUILD @@ -16,7 +16,7 @@ b2sums=('b02acf82fd38597d62cf1706e99d1789845ef6ab8c7b1b64174836e1edbb0f3906db662 prepare() { cd ${pkgname}-${pkgver} - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/sad/PKGBUILD b/sad/PKGBUILD index 7a2e78f3f9..3b8d47e134 100644 --- a/sad/PKGBUILD +++ b/sad/PKGBUILD @@ -40,7 +40,7 @@ prepare() { cd "$pkgname" # download dependencies - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/sbsigntools/PKGBUILD b/sbsigntools/PKGBUILD index 206ca8efc7..23982879c0 100644 --- a/sbsigntools/PKGBUILD +++ b/sbsigntools/PKGBUILD @@ -24,9 +24,11 @@ makedepends=( source=( git+https://git.kernel.org/pub/scm/linux/kernel/git/jejb/sbsigntools.git#tag=$_commit?signed git+https://git.ozlabs.org/ccan + sbsigntools-la64.patch ) sha256sums=('SKIP' - 'SKIP') + 'SKIP' + '98442f63594d22fc463f5e6dc7d2d6e892e32406ee69a6e5c94e2738e5b3d4ae') validpgpkeys=('D5606E73C8B46271BEAD9ADF814AE47C214854D6') # James Bottomley prepare() { @@ -34,6 +36,7 @@ prepare() { git submodule init git config submodule."lib/ccan.git".url "$srcdir/ccan" git -c protocol.file.allow=always submodule update + patch -p1 -i $srcdir/sbsigntools-la64.patch ./autogen.sh } diff --git a/sbsigntools/sbsigntools-la64.patch b/sbsigntools/sbsigntools-la64.patch new file mode 100644 index 0000000000..77ca8707d2 --- /dev/null +++ b/sbsigntools/sbsigntools-la64.patch @@ -0,0 +1,24 @@ +Index: sbsigntools/src/coff/pe.h +=================================================================== +--- sbsigntools.orig/src/coff/pe.h ++++ sbsigntools/src/coff/pe.h +@@ -152,6 +152,7 @@ + #define IMAGE_FILE_MACHINE_TRICORE 0x0520 + #define IMAGE_FILE_MACHINE_WCEMIPSV2 0x0169 + #define IMAGE_FILE_MACHINE_AARCH64 0xaa64 ++#define IMAGE_FILE_MACHINE_LOONGARCH64 0x6264 + #define IMAGE_FILE_MACHINE_RISCV64 0x5064 + + #define IMAGE_SUBSYSTEM_UNKNOWN 0 +Index: sbsigntools/src/image.c +=================================================================== +--- sbsigntools.orig/src/image.c ++++ sbsigntools/src/image.c +@@ -239,6 +239,7 @@ static int image_pecoff_parse(struct ima + switch (magic) { + case IMAGE_FILE_MACHINE_AMD64: + case IMAGE_FILE_MACHINE_AARCH64: ++ case IMAGE_FILE_MACHINE_LOONGARCH64: + case IMAGE_FILE_MACHINE_RISCV64: + rc = image_pecoff_parse_64(image); + break; diff --git a/scaleway-cli/PKGBUILD b/scaleway-cli/PKGBUILD index 2aea586986..ea61515a89 100644 --- a/scaleway-cli/PKGBUILD +++ b/scaleway-cli/PKGBUILD @@ -39,8 +39,10 @@ build() { export CGO_CFLAGS="$CFLAGS" export CGO_CXXFLAGS="$CXXFLAGS" export CGO_LDFLAGS="$LDFLAGS" + export GOPROXY=https://goproxy.cn + go mod edit -replace=golang.org/x/sys=github.com/golang/sys@master + go mod tidy go build \ - -buildmode=pie \ -trimpath \ -mod=readonly \ -modcacherw \ diff --git a/sccache/PKGBUILD b/sccache/PKGBUILD index 2bd441eabb..ec892b0831 100644 --- a/sccache/PKGBUILD +++ b/sccache/PKGBUILD @@ -34,17 +34,27 @@ pkgver() { } prepare() { +# find -name Cargo.lock -exec rm -f {} \; +# mkdir -p .cargo +# cat > .cargo/config.toml < .cargo/config.toml <> $pkgbase-rel-$pkgver/.config - make oldnoconfig V=1 -C $pkgbase-rel-$pkgver + make oldnoconfig V=1 -C $pkgbase-rel-$pkgver \ + CROSS_PREFIX=x86_64-linux-gnu- - make -C $pkgbase-rel-$pkgver V=1 EXTRAVERSION=-$pkgrel PYTHON=python3 $build_target + make -C $pkgbase-rel-$pkgver V=1 EXTRAVERSION=-$pkgrel PYTHON=python3 $build_target CROSS_PREFIX=x86_64-linux-gnu- cp $pkgbase-rel-$pkgver/out/$output_name output/$binary_name } diff --git a/selene/PKGBUILD b/selene/PKGBUILD index a532d44d19..51e8c826fb 100644 --- a/selene/PKGBUILD +++ b/selene/PKGBUILD @@ -18,7 +18,7 @@ b2sums=('SKIP') prepare() { cd $pkgname - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/sentry-cli/PKGBUILD b/sentry-cli/PKGBUILD index c08b7503c3..c3fd02cd81 100644 --- a/sentry-cli/PKGBUILD +++ b/sentry-cli/PKGBUILD @@ -16,7 +16,13 @@ sha256sums=('bc60cc1a6015c337e7a3598123962dd24564426bd1a537f759fd19b00184643e') build() { cd $pkgname-$pkgver - cargo build --release --locked + find -name Cargo.lock -exec rm -f {} \; + mkdir -p .cargo + cat > .cargo/config.toml < .cargo/config.toml < + #include + #include +-#include + + using namespace SignOnUi; + +@@ -39,7 +40,6 @@ + int main(int argc, char **argv) + { + QApplication app(argc, argv); +- QtWebEngine::initialize(); + + app.setApplicationName("signon-ui"); + app.setQuitOnLastWindowClosed(false); +diff -ur signon-ui-4368bb77d9d1abc2978af514225ba4a42c29a646/src/signon-ui.pro signon-ui-4368bb77d9d1abc2978af514225ba4a42c29a646/src/signon-ui.pro +--- signon-ui-4368bb77d9d1abc2978af514225ba4a42c29a646/src/signon-ui.pro 2023-04-05 13:00:36.856326177 +0800 ++++ signon-ui-4368bb77d9d1abc2978af514225ba4a42c29a646/src/signon-ui.pro 2017-10-23 23:39:24.000000000 +0800 +@@ -17,7 +17,6 @@ + gui \ + network \ +- quick \ +- webengine ++ quick + + PKGCONFIG += \ + signon-plugins-common \ diff --git a/singularity/PKGBUILD b/singularity/PKGBUILD index 631da26eaa..fcdc096020 100644 --- a/singularity/PKGBUILD +++ b/singularity/PKGBUILD @@ -12,7 +12,7 @@ license=('GPL2' 'custom') depends=('python' 'python-pygame' 'python-numpy') makedepends=('python-setuptools') source=("https://github.com/singularity/singularity/releases/download/v${pkgver}/singularity-${pkgver}.tar.gz"{,.asc} - singularity-336.patch::https://github.com/singularity/singularity/pull/336 + singularity-336.patch::https://github.com/singularity/singularity/pull/336.patch https://github.com/singularity/singularity-music/archive/025e2696638bcc3bf7690679c3a17c0b46823bbe.tar.gz "singularity.desktop") validpgpkeys=('B3131A451DBFDF7CA05B4197054BBB9F7D806442') diff --git a/skim/PKGBUILD b/skim/PKGBUILD index 7773e09076..6e6dfa9623 100644 --- a/skim/PKGBUILD +++ b/skim/PKGBUILD @@ -32,7 +32,7 @@ prepare() { cd "$pkgname" # download dependencies - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/sn0int/PKGBUILD b/sn0int/PKGBUILD index 4594b8f358..a4facd3356 100644 --- a/sn0int/PKGBUILD +++ b/sn0int/PKGBUILD @@ -25,7 +25,7 @@ validpgpkeys=("64B13F7117D6E07D661BBCE0FE763A64F5E54FD6") prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/sniffglue/PKGBUILD b/sniffglue/PKGBUILD index a69ec9827a..66059ea9b1 100644 --- a/sniffglue/PKGBUILD +++ b/sniffglue/PKGBUILD @@ -28,7 +28,7 @@ validpgpkeys=("64B13F7117D6E07D661BBCE0FE763A64F5E54FD6") prepare() { cd ${pkgname}-${pkgver} - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/sniffnet/PKGBUILD b/sniffnet/PKGBUILD index 3b19213b15..08ad0a202e 100644 --- a/sniffnet/PKGBUILD +++ b/sniffnet/PKGBUILD @@ -18,7 +18,7 @@ options=('!lto') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/sonic/PKGBUILD b/sonic/PKGBUILD index a81ffa4f21..8995aa2a39 100644 --- a/sonic/PKGBUILD +++ b/sonic/PKGBUILD @@ -38,7 +38,7 @@ prepare() { cd "$pkgname" # download dependencies - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/spicy-launcher/PKGBUILD b/spicy-launcher/PKGBUILD index 13ed13667f..7ce3b7c858 100644 --- a/spicy-launcher/PKGBUILD +++ b/spicy-launcher/PKGBUILD @@ -15,13 +15,13 @@ options=('!lto') prepare() { cd "SpicyLauncher-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { cd "SpicyLauncher-$pkgver/gui" yarn install --ignore-engines - yarn tauri build --target "$CARCH-unknown-linux-gnu" --bundles none + yarn tauri build --target "`uname -m`-unknown-linux-gnu" --bundles none cd .. cargo build --release --frozen } diff --git a/spirv-tools/PKGBUILD b/spirv-tools/PKGBUILD index e3bdd67cbf..64a20f2b01 100644 --- a/spirv-tools/PKGBUILD +++ b/spirv-tools/PKGBUILD @@ -13,7 +13,7 @@ license=('custom') groups=(vulkan-devel) depends=('gcc-libs' 'sh') makedepends=('cmake' 'python' 'ninja' 'spirv-headers') -source=("${pkgname}-${pkgver}.tar.gz::https://github.com/KhronosGroup/SPIRV-Tools/archive/refs/tags/${_tag}.tar.gz") +source=("${pkgname}-${pkgver}.tar.gz::https://github.com/KhronosGroup/SPIRV-Tools/archive/refs/tags/v${_tag}.tar.gz") sha256sums=('aed90b51ce884ce3ac267acec75e785ee743a1e1fd294c25be33b49c5804d77c') build() { diff --git a/spotify-launcher/PKGBUILD b/spotify-launcher/PKGBUILD index 6e8e5d233a..4beb1a8f62 100644 --- a/spotify-launcher/PKGBUILD +++ b/spotify-launcher/PKGBUILD @@ -35,7 +35,7 @@ validpgpkeys=("64B13F7117D6E07D661BBCE0FE763A64F5E54FD6") prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/spotifyd/PKGBUILD b/spotifyd/PKGBUILD index 3599ccf89a..406cb39606 100644 --- a/spotifyd/PKGBUILD +++ b/spotifyd/PKGBUILD @@ -18,7 +18,7 @@ b2sums=('dd266f499f960b4f0e37e8c89d73286c0efc9f7c782007e6df901a9b432a88f4fb27666 prepare() { cd spotifyd-${pkgver} - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/spytrap-adb/PKGBUILD b/spytrap-adb/PKGBUILD index 217e3d3504..296f58b731 100644 --- a/spytrap-adb/PKGBUILD +++ b/spytrap-adb/PKGBUILD @@ -16,7 +16,7 @@ b2sums=('05f70e429530864b7f25e2e4443d70da2d2bd3b435d1c5c95ce2ae82d8d3a665d66f572 prepare() { cd "${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/sshx/PKGBUILD b/sshx/PKGBUILD index 33e0a7830b..b4f1e4a781 100644 --- a/sshx/PKGBUILD +++ b/sshx/PKGBUILD @@ -23,7 +23,7 @@ sha256sums=('5773c2c65dea72ec4b98d22b0c0534b745c2c536ea5b1267ced5c668b3a6736f') prepare() { cd "$_archive" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/stalonetray/PKGBUILD b/stalonetray/PKGBUILD index e7c9491f33..40e76e596c 100644 --- a/stalonetray/PKGBUILD +++ b/stalonetray/PKGBUILD @@ -2,7 +2,7 @@ pkgname=stalonetray pkgver=0.8.5 -pkgrel=1 +pkgrel=2 pkgdesc="STAnd-aLONE sysTRAY. It has minimal build and run-time dependencies: the Xlib only." arch=('loong64' 'x86_64') url="https://github.com/kolbusa/stalonetray" diff --git a/stardict/PKGBUILD b/stardict/PKGBUILD index 0297f0523f..35dad8f017 100644 --- a/stardict/PKGBUILD +++ b/stardict/PKGBUILD @@ -39,6 +39,9 @@ prepare() { build() { cd "${srcdir}/${pkgname}-${pkgver}" + CFLAGS=${CFLAGS/-Werror=format-security/} + CXXFLAGS=${CXXFLAGS/-Werror=format-security/} + CXXFLAGS+=" -std=gnu++11" ./configure \ --prefix=/usr \ --sysconfdir=/etc \ diff --git a/starship/PKGBUILD b/starship/PKGBUILD index 4f246d30ad..6e895372a5 100644 --- a/starship/PKGBUILD +++ b/starship/PKGBUILD @@ -28,7 +28,7 @@ b2sums=('SKIP') prepare() { cargo fetch \ --locked \ - --target $CARCH-unknown-linux-gnu \ + --target `uname -m`-unknown-linux-gnu \ --manifest-path starship/Cargo.toml } diff --git a/stochas/PKGBUILD b/stochas/PKGBUILD index a1cb8e2287..65fa569cca 100644 --- a/stochas/PKGBUILD +++ b/stochas/PKGBUILD @@ -58,8 +58,8 @@ package() { cd "$pkgname-$pkgver" # project has no install target :( install -vDm 755 build/${pkgname}_artefacts/None/Standalone/${_name} -t "${pkgdir}/usr/bin/" - install -vDm 755 build/${pkgname}_artefacts/None/VST3/${_name}.vst3/Contents/${CARCH}-linux/${_name}.so \ - -t "${pkgdir}/usr/lib/vst3/Stochas.vst3/Contents/${CARCH}-linux/" + install -vDm 755 build/${pkgname}_artefacts/None/VST3/${_name}.vst3/Contents/`uname -m`-linux/${_name}.so \ + -t "${pkgdir}/usr/lib/vst3/Stochas.vst3/Contents/`uname -m`-linux/" install -vDm 644 *.desktop -t "${pkgdir}/usr/share/applications/" install -vDm 644 image/app_logo_512.png "${pkgdir}/usr/share/icons/hicolor/512x512/apps/org.surge-synth-team.${_name}.png" diff --git a/suitesparse/PKGBUILD b/suitesparse/PKGBUILD index a905ce8436..3f58613de0 100644 --- a/suitesparse/PKGBUILD +++ b/suitesparse/PKGBUILD @@ -22,11 +22,17 @@ sha256sums=('19cbeb9964ebe439413dd66d82ace1f904adc5f25d8a823c1b48c34bd0d29ea5') build() { cd SuiteSparse-$pkgver + CXXFLAGS=${CXXFLAGS/-mlsx /} + CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/} + + CFLAGS+=" -ffat-lto-objects -mcmodel=extreme" \ + CXXFLAGS+=" -ffat-lto-objects -mcmodel=extreme" \ CMAKE_OPTIONS="-DBLA_VENDOR=Generic \ -DCMAKE_INSTALL_PREFIX=/usr \ -DCMAKE_BUILD_TYPE=None \ -DNSTATIC=ON" \ make +# -DALLOW_64BIT_BLAS=ON \ } package() { diff --git a/supermin/PKGBUILD b/supermin/PKGBUILD index 6b9e5f80b9..ad82a40ee7 100644 --- a/supermin/PKGBUILD +++ b/supermin/PKGBUILD @@ -12,14 +12,22 @@ license=('GPL') makedepends=('ocaml' 'ocaml-findlib') depends=('e2fsprogs' 'pacman' 'pacman-contrib' 'cpio') conflicts=('febootstrap<=3.21') -source=("https://download.libguestfs.org/${pkgname}/5.3-development/${pkgname}-${pkgver}.tar.gz"{,.sig}) +source=("https://download.libguestfs.org/${pkgname}/5.3-development/${pkgname}-${pkgver}.tar.gz"{,.sig} + 'supermin-disable-doc.patch') sha512sums=('501731e9cce8bf1f4743eeff4af620813d466da10b664df037575a546b3b8e8697ed9e881dde7d3ba737e6a78536717c1823e22cdc1c92409db78d976a6678b5' - 'SKIP') + 'SKIP' + 'da5cfe1cce9695b05e593a732e43ab066fd39d3f72183cf7d19f9ec1c40be40a2574fcf64904ef0a3f044e30830f3c3183c4d53b03c850f59758e87b8be6a572') validpgpkeys=('F7774FB1AD074A7E8C8767EA91738F73E1B768A0') # Richard W.M. Jones +prepare() { + cd "${pkgname}-${pkgver}" + patch -p1 -i $srcdir/supermin-disable-doc.patch +} + build() { cd "${pkgname}-${pkgver}" + autoreconf -ifv ./configure --prefix=/usr make diff --git a/sws/PKGBUILD b/sws/PKGBUILD index 466919dfc3..2687b0955f 100644 --- a/sws/PKGBUILD +++ b/sws/PKGBUILD @@ -61,7 +61,7 @@ build() { package() { # plugin - install -vDm755 -t "$pkgdir/usr/lib/sws" "build/reaper_sws-$CARCH.so" + install -vDm755 -t "$pkgdir/usr/lib/sws" "build/reaper_sws-`uname -m`.so" install -vDm644 -t "$pkgdir/usr/lib/sws" build/sws_python64.py cd "$pkgname" diff --git a/syslog-ng/PKGBUILD b/syslog-ng/PKGBUILD index 960f18210c..742a081cf8 100644 --- a/syslog-ng/PKGBUILD +++ b/syslog-ng/PKGBUILD @@ -25,8 +25,8 @@ depends=( 'systemd-libs' ) makedepends=('libxslt' 'mongo-c-driver' 'librabbitmq-c' 'python' 'libesmtp' 'hiredis' - 'libdbi' 'libmaxminddb' 'net-snmp' 'librdkafka' 'systemd') -checkdepends=('python-nose' 'python-ply') + 'libdbi' 'libmaxminddb' 'net-snmp' 'librdkafka' 'systemd') #'libcap' +makedepends+=('python-nose' 'python-ply') optdepends=('logrotate: for rotating log files' 'libdbi: for the SQL plugin' 'librabbitmq-c: for the AMQP plugin' diff --git a/systeroid/PKGBUILD b/systeroid/PKGBUILD index 13b9350658..57b8f81e60 100644 --- a/systeroid/PKGBUILD +++ b/systeroid/PKGBUILD @@ -14,7 +14,7 @@ sha512sums=('61d8f8bdd34404f57e237f0843f67c1aaf9d9e552fd7857bc770db1ebf6296ed6f1 prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/taplo-cli/PKGBUILD b/taplo-cli/PKGBUILD index 684481881d..d172db899e 100644 --- a/taplo-cli/PKGBUILD +++ b/taplo-cli/PKGBUILD @@ -21,12 +21,12 @@ b2sums=('f5b8a1b1f10b42ddb98c7ea400a062715e3ab9c2023adece88052126847ab992db52258 prepare() { cd $pkgname-$pkgver - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { cd $pkgname-$pkgver - cargo build --release --locked --offline --all-features + cargo build --release --offline --all-features } package() { diff --git a/taskwarrior-tui/PKGBUILD b/taskwarrior-tui/PKGBUILD index cef1667561..c55152a549 100644 --- a/taskwarrior-tui/PKGBUILD +++ b/taskwarrior-tui/PKGBUILD @@ -16,7 +16,7 @@ sha256sums=('86a00c0c33f825824ac432c50e57a9bac150c3ba9e3d06e6d86f65790a99a458' prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/tealdeer/PKGBUILD b/tealdeer/PKGBUILD index bde21d488a..f8456537e7 100644 --- a/tealdeer/PKGBUILD +++ b/tealdeer/PKGBUILD @@ -29,7 +29,7 @@ prepare() { cd "$pkgname" # download dependencies - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/tectonic/PKGBUILD b/tectonic/PKGBUILD index faf293a613..020e082578 100644 --- a/tectonic/PKGBUILD +++ b/tectonic/PKGBUILD @@ -18,7 +18,7 @@ b2sums=('94720d6beeef03aae68b1ac70b22995a05be2371176b844dcfbabbadc475e065bd8618c prepare() { cd ${pkgname}-${pkgname}-${pkgver} - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/tere/PKGBUILD b/tere/PKGBUILD index 377fedc871..be4b8aad85 100644 --- a/tere/PKGBUILD +++ b/tere/PKGBUILD @@ -17,7 +17,7 @@ sha256sums=('d7f657371ffbd469c4d8855c2a2734c20b53ae632fe3cbf9bb7cab94bd726326') prepare() { cd "$srcdir/$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/texlab/PKGBUILD b/texlab/PKGBUILD index 92a99dc7ab..587e9c69be 100644 --- a/texlab/PKGBUILD +++ b/texlab/PKGBUILD @@ -17,7 +17,7 @@ options=('!lto') prepare() { cd "${srcdir}/${pkgname}-${pkgver}" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/texlive-bin/PKGBUILD b/texlive-bin/PKGBUILD index c40ed33bb0..c4515aa9e3 100644 --- a/texlive-bin/PKGBUILD +++ b/texlive-bin/PKGBUILD @@ -47,10 +47,12 @@ url='https://tug.org/texlive/' _commit=871c7a2856d70e1a9703d1f72f0587b9995dba5f # tags/texlive-2023.0 source=(git+https://github.com/Tex-Live/texlive-source.git#commit=$_commit ptex-debug-print.patch - context-luatex-1.17.patch) + context-luatex-1.17.patch + texlive-bin-la64.patch) sha256sums=('SKIP' 'aa838f09003c62c2efb5770a8de66f99b409df049fbd65098d80fd1957d06c50' - 'a56838d19c3bd820781693b5a2e058e1a22378b37ea199bac426d97fcc420920') + 'a56838d19c3bd820781693b5a2e058e1a22378b37ea199bac426d97fcc420920' + '0c09915ccb8bad1792f11a69cc7057d775cdf54a443427d9ca77f5602432f1aa') prepare() { cd texlive-source @@ -64,6 +66,7 @@ prepare() { patch -p1 -i ../ptex-debug-print.patch # update context to work with luatex 1.17 patch -p1 -i ../context-luatex-1.17.patch + patch -p1 -i $srcdir/texlive-bin-la64.patch } build() { diff --git a/texlive-bin/texlive-bin-la64.patch b/texlive-bin/texlive-bin-la64.patch new file mode 100644 index 0000000000..9ffb6e2ba3 --- /dev/null +++ b/texlive-bin/texlive-bin-la64.patch @@ -0,0 +1,11150 @@ +diff --git a/libs/luajit/LuaJIT-src/Makefile b/libs/luajit/LuaJIT-src/Makefile +index 0f93308..45b3b2d 100644 +--- a/libs/luajit/LuaJIT-src/Makefile ++++ b/libs/luajit/LuaJIT-src/Makefile +@@ -88,7 +88,7 @@ FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h + FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ + dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ + dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ +- dis_mips64.lua dis_mips64el.lua vmdef.lua ++ dis_mips64.lua dis_mips64el.lua dis_loongarch64.lua vmdef.lua + + ifeq (,$(findstring Windows,$(OS))) + HOST_SYS:= $(shell uname -s) +diff --git a/libs/luajit/LuaJIT-src/doc/ext_jit.html b/libs/luajit/LuaJIT-src/doc/ext_jit.html +index e4088bc..492f537 100644 +--- a/libs/luajit/LuaJIT-src/doc/ext_jit.html ++++ b/libs/luajit/LuaJIT-src/doc/ext_jit.html +@@ -153,7 +153,7 @@ Contains the target OS name: +

jit.arch

+

+ Contains the target architecture name: +-"x86", "x64", "arm", "arm64", "ppc", "mips" or "mips64". ++"x86", "x64", "arm", "arm64", "ppc", "loongarch64", "mips" or "mips64". +

+ +

jit.opt.* — JIT compiler optimization control

+diff --git a/libs/luajit/LuaJIT-src/doc/install.html b/libs/luajit/LuaJIT-src/doc/install.html +index c491c60..fc8559d 100644 +--- a/libs/luajit/LuaJIT-src/doc/install.html ++++ b/libs/luajit/LuaJIT-src/doc/install.html +@@ -154,6 +154,13 @@ operating systems, CPUs and compilers: +   +   + ++ ++LoongArch64 ++GCC 4.3+ ++GCC 4.3+ ++  ++  ++ + + +

Configuring LuaJIT

+@@ -426,6 +433,9 @@ make HOST_CC="gcc -m32" CROSS=mipsel-linux- + make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64" + # MIPS64 little-endian + make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64" ++ ++# LOONGARCH64 ++make CROSS=loongarch64-linux- + +

+ You can cross-compile for Android using the Android NDK. +diff --git a/libs/luajit/LuaJIT-src/dynasm/dasm_loongarch64.h b/libs/luajit/LuaJIT-src/dynasm/dasm_loongarch64.h +new file mode 100644 +index 0000000..e6c9e3e +--- /dev/null ++++ b/libs/luajit/LuaJIT-src/dynasm/dasm_loongarch64.h +@@ -0,0 +1,451 @@ ++/* ++** DynASM LoongArch encoding engine. ++** Copyright (C) 2005-2021 Mike Pall. All rights reserved. ++** Copyright (C) 2021 Loongson Technology. All rights reserved. ++** Released under the MIT license. See dynasm.lua for full copyright notice. ++*/ ++ ++#include ++#include ++#include ++#include ++ ++#define DASM_ARCH "loongarch64" ++ ++#ifndef DASM_EXTERN ++#define DASM_EXTERN(a,b,c,d) 0 ++#endif ++ ++/* Action definitions. */ ++enum { ++ DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, ++ /* The following actions need a buffer position. */ ++ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, ++ /* The following actions also have an argument. */ ++ DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMM2, ++ DASM__MAX ++}; ++ ++/* Maximum number of section buffer positions for a single dasm_put() call. */ ++#define DASM_MAXSECPOS 25 ++ ++/* DynASM encoder status codes. Action list offset or number are or'ed in. */ ++#define DASM_S_OK 0x00000000 ++#define DASM_S_NOMEM 0x01000000 ++#define DASM_S_PHASE 0x02000000 ++#define DASM_S_MATCH_SEC 0x03000000 ++#define DASM_S_RANGE_I 0x11000000 ++#define DASM_S_RANGE_SEC 0x12000000 ++#define DASM_S_RANGE_LG 0x13000000 ++#define DASM_S_RANGE_PC 0x14000000 ++#define DASM_S_RANGE_REL 0x15000000 ++#define DASM_S_UNDEF_LG 0x21000000 ++#define DASM_S_UNDEF_PC 0x22000000 ++ ++/* Macros to convert positions (8 bit section + 24 bit index). */ ++#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) ++#define DASM_POS2BIAS(pos) ((pos)&0xff000000) ++#define DASM_SEC2POS(sec) ((sec)<<24) ++#define DASM_POS2SEC(pos) ((pos)>>24) ++#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) ++ ++/* Action list type. */ ++typedef const unsigned int *dasm_ActList; ++ ++/* Per-section structure. */ ++typedef struct dasm_Section { ++ int *rbuf; /* Biased buffer pointer (negative section bias). */ ++ int *buf; /* True buffer pointer. */ ++ size_t bsize; /* Buffer size in bytes. */ ++ int pos; /* Biased buffer position. */ ++ int epos; /* End of biased buffer position - max single put. */ ++ int ofs; /* Byte offset into section. */ ++} dasm_Section; ++ ++/* Core structure holding the DynASM encoding state. */ ++struct dasm_State { ++ size_t psize; /* Allocated size of this structure. */ ++ dasm_ActList actionlist; /* Current actionlist pointer. */ ++ int *lglabels; /* Local/global chain/pos ptrs. */ ++ size_t lgsize; ++ int *pclabels; /* PC label chains/pos ptrs. */ ++ size_t pcsize; ++ void **globals; /* Array of globals (bias -10). */ ++ dasm_Section *section; /* Pointer to active section. */ ++ size_t codesize; /* Total size of all code sections. */ ++ int maxsection; /* 0 <= sectionidx < maxsection. */ ++ int status; /* Status code. */ ++ dasm_Section sections[1]; /* All sections. Alloc-extended. */ ++}; ++ ++/* The size of the core structure depends on the max. number of sections. */ ++#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) ++ ++ ++/* Initialize DynASM state. */ ++void dasm_init(Dst_DECL, int maxsection) ++{ ++ dasm_State *D; ++ size_t psz = 0; ++ int i; ++ Dst_REF = NULL; ++ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); ++ D = Dst_REF; ++ D->psize = psz; ++ D->lglabels = NULL; ++ D->lgsize = 0; ++ D->pclabels = NULL; ++ D->pcsize = 0; ++ D->globals = NULL; ++ D->maxsection = maxsection; ++ for (i = 0; i < maxsection; i++) { ++ D->sections[i].buf = NULL; /* Need this for pass3. */ ++ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); ++ D->sections[i].bsize = 0; ++ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ ++ } ++} ++ ++/* Free DynASM state. */ ++void dasm_free(Dst_DECL) ++{ ++ dasm_State *D = Dst_REF; ++ int i; ++ for (i = 0; i < D->maxsection; i++) ++ if (D->sections[i].buf) ++ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); ++ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); ++ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); ++ DASM_M_FREE(Dst, D, D->psize); ++} ++ ++/* Setup global label array. Must be called before dasm_setup(). */ ++void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) ++{ ++ dasm_State *D = Dst_REF; ++ D->globals = gl - 10; /* Negative bias to compensate for locals. */ ++ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); ++} ++ ++/* Grow PC label array. Can be called after dasm_setup(), too. */ ++void dasm_growpc(Dst_DECL, unsigned int maxpc) ++{ ++ dasm_State *D = Dst_REF; ++ size_t osz = D->pcsize; ++ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); ++ memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); ++} ++ ++/* Setup encoder. */ ++void dasm_setup(Dst_DECL, const void *actionlist) ++{ ++ dasm_State *D = Dst_REF; ++ int i; ++ D->actionlist = (dasm_ActList)actionlist; ++ D->status = DASM_S_OK; ++ D->section = &D->sections[0]; ++ memset((void *)D->lglabels, 0, D->lgsize); ++ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); ++ for (i = 0; i < D->maxsection; i++) { ++ D->sections[i].pos = DASM_SEC2POS(i); ++ D->sections[i].ofs = 0; ++ } ++} ++ ++ ++#ifdef DASM_CHECKS ++#define CK(x, st) \ ++ do { if (!(x)) { \ ++ D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) ++#define CKPL(kind, st) \ ++ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ ++ D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) ++#else ++#define CK(x, st) ((void)0) ++#define CKPL(kind, st) ((void)0) ++#endif ++ ++static int dasm_imm2(unsigned int n) ++{ ++ if ((n >> 21) == 0) ++ return n; ++ //return ((n>>16)&0x1f) | ((n&0xffff)>>10); ++ else if ((n >> 26) == 0) ++ return n; ++ //return ((n>>16)&0x3ff) | ((n&0xffff)>>10); ++ else ++ return -1; ++} ++ ++/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ ++void dasm_put(Dst_DECL, int start, ...) ++{ ++ va_list ap; ++ dasm_State *D = Dst_REF; ++ dasm_ActList p = D->actionlist + start; ++ dasm_Section *sec = D->section; ++ int pos = sec->pos, ofs = sec->ofs; ++ int *b; ++ ++ if (pos >= sec->epos) { ++ DASM_M_GROW(Dst, int, sec->buf, sec->bsize, ++ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); ++ sec->rbuf = sec->buf - DASM_POS2BIAS(pos); ++ sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); ++ } ++ ++ b = sec->rbuf; ++ b[pos++] = start; ++ ++ va_start(ap, start); ++ while (1) { ++ unsigned int ins = *p++; ++ unsigned int action = (ins >> 16) - 0xff00; ++ if (action >= DASM__MAX) { ++ ofs += 4; ++ } else { ++ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; ++ switch (action) { ++ case DASM_STOP: goto stop; ++ case DASM_SECTION: ++ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); ++ D->section = &D->sections[n]; goto stop; ++ case DASM_ESC: p++; ofs += 4; break; ++ case DASM_REL_EXT: break; ++ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; ++ case DASM_REL_LG: ++ n = (ins & 2047) - 10; pl = D->lglabels + n; ++ /* Bkwd rel or global. */ ++ if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } ++ pl += 10; n = *pl; ++ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ ++ goto linkrel; ++ case DASM_REL_PC: ++ pl = D->pclabels + n; CKPL(pc, PC); ++ putrel: ++ n = *pl; ++ if (n < 0) { /* Label exists. Get label pos and store it. */ ++ b[pos] = -n; ++ } else { ++ linkrel: ++ b[pos] = n; /* Else link to rel chain, anchored at label. */ ++ *pl = pos; ++ } ++ pos++; ++ break; ++ case DASM_LABEL_LG: ++ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; ++ case DASM_LABEL_PC: ++ pl = D->pclabels + n; CKPL(pc, PC); ++ putlabel: ++ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ ++ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; ++ } ++ *pl = -pos; /* Label exists now. */ ++ b[pos++] = ofs; /* Store pass1 offset estimate. */ ++ break; ++ case DASM_IMM: ++#ifdef DASM_CHECKS ++ CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); ++#endif ++ n >>= ((ins>>10)&31); ++#ifdef DASM_CHECKS ++ if (ins & 0x8000) ++ CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); ++ else ++ CK((n>>((ins>>5)&31)) == 0, RANGE_I); ++#endif ++ b[pos++] = n; ++ break; ++ case DASM_IMM2: ++ CK(dasm_imm2((unsigned int)n) != -1, RANGE_I); ++ b[pos++] = n; ++ break; ++ } ++ } ++ } ++stop: ++ va_end(ap); ++ sec->pos = pos; ++ sec->ofs = ofs; ++} ++#undef CK ++ ++/* Pass 2: Link sections, shrink aligns, fix label offsets. */ ++int dasm_link(Dst_DECL, size_t *szp) ++{ ++ dasm_State *D = Dst_REF; ++ int secnum; ++ int ofs = 0; ++ ++#ifdef DASM_CHECKS ++ *szp = 0; ++ if (D->status != DASM_S_OK) return D->status; ++ { ++ int pc; ++ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) ++ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; ++ } ++#endif ++ ++ { /* Handle globals not defined in this translation unit. */ ++ int idx; ++ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { ++ int n = D->lglabels[idx]; ++ /* Undefined label: Collapse rel chain and replace with marker (< 0). */ ++ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } ++ } ++ } ++ ++ /* Combine all code sections. No support for data sections (yet). */ ++ for (secnum = 0; secnum < D->maxsection; secnum++) { ++ dasm_Section *sec = D->sections + secnum; ++ int *b = sec->rbuf; ++ int pos = DASM_SEC2POS(secnum); ++ int lastpos = sec->pos; ++ ++ while (pos != lastpos) { ++ dasm_ActList p = D->actionlist + b[pos++]; ++ while (1) { ++ unsigned int ins = *p++; ++ unsigned int action = (ins >> 16) - 0xff00; ++ switch (action) { ++ case DASM_STOP: case DASM_SECTION: goto stop; ++ case DASM_ESC: p++; break; ++ case DASM_REL_EXT: break; ++ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; ++ case DASM_REL_LG: case DASM_REL_PC: pos++; break; ++ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; ++ case DASM_IMM: case DASM_IMM2: pos++; break; ++ } ++ } ++ stop: (void)0; ++ } ++ ofs += sec->ofs; /* Next section starts right after current section. */ ++ } ++ ++ D->codesize = ofs; /* Total size of all code sections */ ++ *szp = ofs; ++ return DASM_S_OK; ++} ++ ++#ifdef DASM_CHECKS ++#define CK(x, st) \ ++ do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) ++#else ++#define CK(x, st) ((void)0) ++#endif ++ ++/* Pass 3: Encode sections. */ ++int dasm_encode(Dst_DECL, void *buffer) ++{ ++ dasm_State *D = Dst_REF; ++ char *base = (char *)buffer; ++ unsigned int *cp = (unsigned int *)buffer; ++ int secnum; ++ ++ /* Encode all code sections. No support for data sections (yet). */ ++ for (secnum = 0; secnum < D->maxsection; secnum++) { ++ dasm_Section *sec = D->sections + secnum; ++ int *b = sec->buf; ++ int *endb = sec->rbuf + sec->pos; ++ ++ while (b != endb) { ++ dasm_ActList p = D->actionlist + *b++; ++ while (1) { ++ unsigned int ins = *p++; ++ unsigned int action = (ins >> 16) - 0xff00; ++ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; ++ switch (action) { ++ case DASM_STOP: case DASM_SECTION: goto stop; ++ case DASM_ESC: *cp++ = *p++; break; ++ case DASM_REL_EXT: ++ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1); ++ goto patchrel; ++ case DASM_ALIGN: ++ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; ++ break; ++ case DASM_REL_LG: ++ if (n < 0) { ++ n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp + 4); ++ goto patchrel; ++ } ++ /* fallthrough */ ++ case DASM_REL_PC: ++ CK(n >= 0, UNDEF_PC); ++ n = *DASM_POS2PTR(D, n); ++ if (ins & 2048) ++ n = (n + (int)(size_t)base) & 0x0fffffff; ++ else ++ n = n - (int)((char *)cp - base) + 4; ++ patchrel: { ++ unsigned int e = 16 + ((ins >> 12) & 15); ++ CK((n & 3) == 0 && ++ ((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0, RANGE_REL); ++ if (!(ins & 0xf800)) { /* BEQ, BNE, BLT, BGE, BLTU, BGEU */ ++ cp[-1] |= (((n >> 2) & 0xffff) << 10); ++ } else if ((ins & 0x5000)) { /* BEQZ, BNEZ, BCEQZ, BCNEZ */ ++ cp[-1] |= (((n >> 2) & 0xffff) << 10) | (((n >> 2) & 0x1f0000) >> 16); ++ } else if ((ins & 0xa000)) { /* B, BL */ ++ cp[-1] |= (((n >> 2) & 0xffff) << 10) | (((n >> 2) & 0x3ff0000) >> 16); ++ } ++ } ++ break; ++ case DASM_LABEL_LG: ++ ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); ++ break; ++ case DASM_LABEL_PC: break; ++ case DASM_IMM2: { ++ //cp[-1] |= ((n>>3) & 4); n &= 0x1f; ++ unsigned int imm2n = dasm_imm2((unsigned int)n); ++ cp[-1] |= ((imm2n&0x3ff0000) | ((imm2n&0xffff))>>10); ++ } ++ break; ++ /* fallthrough */ ++ case DASM_IMM: ++ cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); ++ break; ++ default: *cp++ = ins; break; ++ } ++ } ++ stop: (void)0; ++ } ++ } ++ ++ if (base + D->codesize != (char *)cp) /* Check for phase errors. */ ++ return DASM_S_PHASE; ++ return DASM_S_OK; ++} ++#undef CK ++ ++/* Get PC label offset. */ ++int dasm_getpclabel(Dst_DECL, unsigned int pc) ++{ ++ dasm_State *D = Dst_REF; ++ if (pc*sizeof(int) < D->pcsize) { ++ int pos = D->pclabels[pc]; ++ if (pos < 0) return *DASM_POS2PTR(D, -pos); ++ if (pos > 0) return -1; /* Undefined. */ ++ } ++ return -2; /* Unused or out of range. */ ++} ++ ++#ifdef DASM_CHECKS ++/* Optional sanity checker to call between isolated encoding steps. */ ++int dasm_checkstep(Dst_DECL, int secmatch) ++{ ++ dasm_State *D = Dst_REF; ++ if (D->status == DASM_S_OK) { ++ int i; ++ for (i = 1; i <= 9; i++) { ++ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } ++ D->lglabels[i] = 0; ++ } ++ } ++ if (D->status == DASM_S_OK && secmatch >= 0 && ++ D->section != &D->sections[secmatch]) ++ D->status = DASM_S_MATCH_SEC|(D->section-D->sections); ++ return D->status; ++} ++#endif ++ +diff --git a/libs/luajit/LuaJIT-src/dynasm/dasm_loongarch64.lua b/libs/luajit/LuaJIT-src/dynasm/dasm_loongarch64.lua +new file mode 100644 +index 0000000..6542763 +--- /dev/null ++++ b/libs/luajit/LuaJIT-src/dynasm/dasm_loongarch64.lua +@@ -0,0 +1,977 @@ ++------------------------------------------------------------------------------ ++-- DynASM LoongArch module. ++-- ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. ++-- Copyright (C) 2021 Loongson Technology. All rights reserved. ++-- See dynasm.lua for full copyright notice. ++------------------------------------------------------------------------------ ++ ++-- Module information: ++local _info = { ++ arch = "loongarch64", ++ description = "DynASM LoongArch64 module", ++ version = "1.4.0", ++ vernum = 10400, ++ release = "2021-05-20", ++ author = "Mike Pall", ++ license = "MIT", ++} ++ ++-- Exported glue functions for the arch-specific module. ++local _M = { _info = _info } ++ ++-- Cache library functions. ++local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs ++local assert, setmetatable = assert, setmetatable ++local _s = string ++local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char ++local match, gmatch = _s.match, _s.gmatch ++local concat, sort = table.concat, table.sort ++local bit = bit or require("bit") ++local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift ++local tohex = bit.tohex ++ ++-- Inherited tables and callbacks. ++local g_opt, g_arch ++local wline, werror, wfatal, wwarn ++ ++-- Action name list. ++-- CHECK: Keep this in sync with the C code! ++local action_names = { ++ "STOP", "SECTION", "ESC", "REL_EXT", ++ "ALIGN", "REL_LG", "LABEL_LG", ++ "REL_PC", "LABEL_PC", "IMM", "IMM2", ++} ++ ++-- Maximum number of section buffer positions for dasm_put(). ++-- CHECK: Keep this in sync with the C code! ++local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. ++ ++-- Action name -> action number. ++local map_action = {} ++for n,name in ipairs(action_names) do ++ map_action[name] = n-1 ++end ++ ++-- Action list buffer. ++local actlist = {} ++ ++-- Argument list for next dasm_put(). Start with offset 0 into action list. ++local actargs = { 0 } ++ ++-- Current number of section buffer positions for dasm_put(). ++local secpos = 1 ++ ++------------------------------------------------------------------------------ ++ ++-- Dump action names and numbers. ++local function dumpactions(out) ++ out:write("DynASM encoding engine action codes:\n") ++ for n,name in ipairs(action_names) do ++ local num = map_action[name] ++ out:write(format(" %-10s %02X %d\n", name, num, num)) ++ end ++ out:write("\n") ++end ++ ++-- Write action list buffer as a huge static C array. ++local function writeactions(out, name) ++ local nn = #actlist ++ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end ++ out:write("static const unsigned int ", name, "[", nn, "] = {\n") ++ for i = 1,nn-1 do ++ assert(out:write("0x", tohex(actlist[i]), ",\n")) ++ end ++ assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Add word to action list. ++local function wputxw(n) ++ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") ++ actlist[#actlist+1] = n ++end ++ ++-- Add action to list with optional arg. Advance buffer pos, too. ++local function waction(action, val, a, num) ++ local w = assert(map_action[action], "bad action name `"..action.."'") ++ wputxw(0xff000000 + w * 0x10000 + (val or 0)) ++ if a then actargs[#actargs+1] = a end ++ if a or num then secpos = secpos + (num or 1) end ++end ++ ++-- Flush action list (intervening C code or buffer pos overflow). ++local function wflush(term) ++ if #actlist == actargs[1] then return end -- Nothing to flush. ++ if not term then waction("STOP") end -- Terminate action list. ++ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) ++ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). ++ secpos = 1 -- The actionlist offset occupies a buffer position, too. ++end ++ ++-- Put escaped word. ++local function wputw(n) ++ if n >= 0xff000000 then waction("ESC") end ++ wputxw(n) ++end ++ ++-- Reserve position for word. ++local function wpos() ++ local pos = #actlist+1 ++ actlist[pos] = "" ++ return pos ++end ++ ++-- Store word to reserved position. ++local function wputpos(pos, n) ++ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") ++ actlist[pos] = n ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Global label name -> global label number. With auto assignment on 1st use. ++local next_global = 20 ++local map_global = setmetatable({}, { __index = function(t, name) ++ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end ++ local n = next_global ++ if n > 2047 then werror("too many global labels") end ++ next_global = n + 1 ++ t[name] = n ++ return n ++end}) ++ ++-- Dump global labels. ++local function dumpglobals(out, lvl) ++ local t = {} ++ for name, n in pairs(map_global) do t[n] = name end ++ out:write("Global labels:\n") ++ for i=20,next_global-1 do ++ out:write(format(" %s\n", t[i])) ++ end ++ out:write("\n") ++end ++ ++-- Write global label enum. ++local function writeglobals(out, prefix) ++ local t = {} ++ for name, n in pairs(map_global) do t[n] = name end ++ out:write("enum {\n") ++ for i=20,next_global-1 do ++ out:write(" ", prefix, t[i], ",\n") ++ end ++ out:write(" ", prefix, "_MAX\n};\n") ++end ++ ++-- Write global label names. ++local function writeglobalnames(out, name) ++ local t = {} ++ for name, n in pairs(map_global) do t[n] = name end ++ out:write("static const char *const ", name, "[] = {\n") ++ for i=20,next_global-1 do ++ out:write(" \"", t[i], "\",\n") ++ end ++ out:write(" (const char *)0\n};\n") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Extern label name -> extern label number. With auto assignment on 1st use. ++local next_extern = 0 ++local map_extern_ = {} ++local map_extern = setmetatable({}, { __index = function(t, name) ++ -- No restrictions on the name for now. ++ local n = next_extern ++ if n > 2047 then werror("too many extern labels") end ++ next_extern = n + 1 ++ t[name] = n ++ map_extern_[n] = name ++ return n ++end}) ++ ++-- Dump extern labels. ++local function dumpexterns(out, lvl) ++ out:write("Extern labels:\n") ++ for i=0,next_extern-1 do ++ out:write(format(" %s\n", map_extern_[i])) ++ end ++ out:write("\n") ++end ++ ++-- Write extern label names. ++local function writeexternnames(out, name) ++ out:write("static const char *const ", name, "[] = {\n") ++ for i=0,next_extern-1 do ++ out:write(" \"", map_extern_[i], "\",\n") ++ end ++ out:write(" (const char *)0\n};\n") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Arch-specific maps. ++local map_archdef = { sp="r3", ra="r1" } -- Ext. register name -> int. name. ++ ++local map_type = {} -- Type name -> { ctype, reg } ++local ctypenum = 0 -- Type number (for Dt... macros). ++ ++-- Reverse defines for registers. ++function _M.revdef(s) ++ if s == "r3" then return "sp" ++ elseif s == "r1" then return "ra" end ++ return s ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Template strings for LoongArch instructions. ++local map_op = { ++ ["clo.w_2"] = "00001000DJ", ++ ["clz.w_2"] = "00001400DJ", ++ ["cto.w_2"] = "00001800DJ", ++ ["ctz.w_2"] = "00001c00DJ", ++ ["clo.d_2"] = "00002000DJ", ++ ["clz.d_2"] = "00002400DJ", ++ ["cto.d_2"] = "00002800DJ", ++ ["ctz.d_2"] = "00002c00DJ", ++ ["revb.2h_2"] = "00003000DJ", ++ ["revb.4h_2"] = "00003400DJ", ++ ["revb.2w_2"] = "00003800DJ", ++ ["revb.d_2"] = "00003c00DJ", ++ ["revh.2w_2"] = "00004000DJ", ++ ["revh.d_2"] = "00004400DJ", ++ ["bitrev.4b_2"] = "00004800DJ", ++ ["bitrev.8b_2"] = "00004c00DJ", ++ ["bitrev.w_2"] = "00005000DJ", ++ ["bitrev.d_2"] = "00005400DJ", ++ ["ext.w.h_2"] = "00005800DJ", ++ ["ext.w.b_2"] = "00005c00DJ", ++ ++ ["add.w_3"] = "00100000DJK", ++ ["add.d_3"] = "00108000DJK", ++ ["sub.w_3"] = "00110000DJK", ++ ["sub.d_3"] = "00118000DJK", ++ slt_3 = "00120000DJK", ++ sltu_3 = "00128000DJK", ++ maskeqz_3 = "00130000DJK", ++ masknez_3 = "00138000DJK", ++ ++ nor_3 = "00140000DJK", ++ and_3 = "00148000DJK", ++ or_3 = "00150000DJK", ++ xor_3 = "00158000DJK", ++ orn_3 = "00160000DJK", ++ andn_3 = "00168000DJK", ++ ["sll.w_3"] = "00170000DJK", ++ ["srl.w_3"] = "00178000DJK", ++ ["sra.w_3"] = "00180000DJK", ++ ["sll.d_3"] = "00188000DJK", ++ ["srl.d_3"] = "00190000DJK", ++ ["sra.d_3"] = "00198000DJK", ++ ["rotr.w_3"] = "001b0000DJK", ++ ["rotr.d_3"] = "001b8000DJK", ++ ["mul.w_3"] = "001c0000DJK", ++ ["mulh.w_3"] = "001c8000DJK", ++ ["mulh.wu_3"] = "001d0000DJK", ++ ["mul.d_3"] = "001d8000DJK", ++ ["mulh.d_3"] = "001e0000DJK", ++ ["mulh.du_3"] = "001e8000DJK", ++ ["mulw.d.w_3"] = "001f0000DJK", ++ ["mulw.d.wu_3"] = "001f8000DJK", ++ ++ ["fabs.h_2"] = "01140000FG", ++ ["fabs.s_2"] = "01140400FG", ++ ["fabs.d_2"] = "01140800FG", ++ ["fneg.h_2"] = "01141000FG", ++ ["fneg.s_2"] = "01141400FG", ++ ["fneg.d_2"] = "01141800FG", ++ ["flogb.h_2"] = "01142000FG", ++ ["flogb.s_2"] = "01142400FG", ++ ["flogb.d_2"] = "01142800FG", ++ ["fclass.h_2"] = "01143000FG", ++ ["fclass.s_2"] = "01143400FG", ++ ["fclass.d_2"] = "01143800FG", ++ ["fsqrt.h_2"] = "01144000FG", ++ ["fsqrt.s_2"] = "01144400FG", ++ ["fsqrt.d_2"] = "01144800FG", ++ ["frecip.h_2"] = "01145000FG", ++ ["frecip.s_2"] = "01145400FG", ++ ["frecip.d_2"] = "01145800FG", ++ ["frsqrt.h_2"] = "01146000FG", ++ ["frsqrt.s_2"] = "01146400FG", ++ ["frsqrt.d_2"] = "01146800FG", ++ ["frecipe.h_2"] = "01147000FG", ++ ["frecipe.s_2"] = "01147400FG", ++ ["frecipe.d_2"] = "01147800FG", ++ ["frsqrte.h_2"] = "01148000FG", ++ ["frsqrte.s_2"] = "01148400FG", ++ ["frsqrte.d_2"] = "01148800FG", ++ ++ ["fmov.h_2"] = "01149000FG", ++ ["fmov.s_2"] = "01149400FG", ++ ["fmov.d_2"] = "01149800FG", ++ ["movgr2fr.h_2"] = "0114a000FJ", ++ ["movgr2fr.w_2"] = "0114a400FJ", ++ ["movgr2fr.d_2"] = "0114a800FJ", ++ ["movgr2frh.w_2"] = "0114ac00FJ", ++ ["movfr2gr.h_2"] = "0114b000DG", ++ ["movfr2gr.s_2"] = "0114b400DG", ++ ["movfr2gr.d_2"] = "0114b800DG", ++ ["movfrh2gr.s_2"] = "0114bc00DG", ++ movgr2fcsr_2 = "0114c000SG", ++ movfcsr2gr_2 = "0114c800FR", ++ movfr2cf_2 = "0114d000EG", ++ movcf2fr_2 = "0114d400FA", ++ movgr2cf_2 = "0114d800EG", ++ movcf2gr_2 = "0114dc00DA", ++ ["fcvt.ld.d_2"] = "0114e000FG", ++ ["fcvt.ud.d_2"] = "0114e400FG", ++ ["fcvt.s.d_2"] = "01191800FG", ++ ["fcvt.d.s_2"] = "01192400FG", ++ ["ftintrm.w.s_2"] = "011a0400FG", ++ ["ftintrm.w.d_2"] = "011a0800FG", ++ ["ftintrm.l.s_2"] = "011a2400FG", ++ ["ftintrm.l.d_2"] = "011a2800FG", ++ ["ftintrp.w.s_2"] = "011a4400FG", ++ ["ftintrp.w.d_2"] = "011a4800FG", ++ ["ftintrp.l.s_2"] = "011a6400FG", ++ ["ftintrp.l.d_2"] = "011a6800FG", ++ ["ftintrz.w.s_2"] = "011a8400FG", ++ ["ftintrz.w.d_2"] = "011a8800FG", ++ ["ftintrz.l.s_2"] = "011aa400FG", ++ ["ftintrz.l.d_2"] = "011aa800FG", ++ ["ftintrne.w.s_2"] = "011ac400FG", ++ ["ftintrne.w.d_2"] = "011ac800FG", ++ ["ftintrne.l.s_2"] = "011ae400FG", ++ ["ftintrne.l.d_2"] = "011ae800FG", ++ ["ftint.w.s_2"] = "011b0400FG", ++ ["ftint.w.d_2"] = "011b0800FG", ++ ["ftint.l.s_2"] = "011b2400FG", ++ ["ftint.l.d_2"] = "011b2800FG", ++ ["ffint.s.w_2"] = "011d1000FG", ++ ["ffint.s.l_2"] = "011d1800FG", ++ ["ffint.d.w_2"] = "011d2000FG", ++ ["ffint.d.l_2"] = "011d2800FG", ++ ["frint.s_2"] = "011e4400FG", ++ ["frint.d_2"] = "011e4800FG", ++ ++ ["fadd.h_3"] = "01000000FGH", ++ ["fadd.s_3"] = "01008000FGH", ++ ["fadd.d_3"] = "01010000FGH", ++ ["fsub.h_3"] = "01020000FGH", ++ ["fsub.s_3"] = "01028000FGH", ++ ["fsub.d_3"] = "01030000FGH", ++ ["fmul.h_3"] = "01040000FGH", ++ ["fmul.s_3"] = "01048000FGH", ++ ["fmul.d_3"] = "01050000FGH", ++ ["fdiv.h_3"] = "01060000FGH", ++ ["fdiv.s_3"] = "01068000FGH", ++ ["fdiv.d_3"] = "01070000FGH", ++ ["fmax.h_3"] = "01080000FGH", ++ ["fmax.s_3"] = "01088000FGH", ++ ["fmax.d_3"] = "01090000FGH", ++ ["fmin.h_3"] = "010a0000FGH", ++ ["fmin.s_3"] = "010a8000FGH", ++ ["fmin.d_3"] = "010b0000FGH", ++ ["fmaxa.h_3"] = "010c0000FGH", ++ ["fmaxa.s_3"] = "010c8000FGH", ++ ["fmaxa.d_3"] = "010d0000FGH", ++ ["fmina.h_3"] = "010e0000FGH", ++ ["fmina.s_3"] = "010e8000FGH", ++ ["fmina.d_3"] = "010f0000FGH", ++ ["fscaleb.h_3"] = "01100000FGH", ++ ["fscaleb.s_3"] = "01108000FGH", ++ ["fscaleb.d_3"] = "01110000FGH", ++ ["fcopysign.h_3"] = "01120000FGH", ++ ["fcopysign.s_3"] = "01128000FGH", ++ ["fcopysign.d_3"] = "01130000FGH", ++ ++ ["alsl.w_4"] = "00040000DJKQ", ++ ["alsl.wu_4"] = "00060000DJKQ", ++ ["alsl.d_4"] = "002c0000DJKQ", ++ ["bytepick.w_4"] = "00080000DJKQ", ++ ["bytepick.d_4"] = "000c0000DJKB", ++ ++ ["div.w_3"] = "00200000DJK", ++ ["mod.w_3"] = "00208000DJK", ++ ["div.wu_3"] = "00210000DJK", ++ ["mod.wu_3"] = "00218000DJK", ++ ["div.d_3"] = "00220000DJK", ++ ["mod.d_3"] = "00228000DJK", ++ ["div.du_3"] = "00230000DJK", ++ ["mod.du_3"] = "00238000DJK", ++ ["crc.w.b.w_3"] = "00240000DJK", ++ ["crc.w.h.w_3"] = "00248000DJK", ++ ["crc.w.w.w_3"] = "00250000DJK", ++ ["crc.w.d.w_3"] = "00258000DJK", ++ ["crcc.w.b.w_3"] = "00260000DJK", ++ ["crcc.w.h.w_3"] = "00268000DJK", ++ ["crcc.w.w.w_3"] = "00270000DJK", ++ ["crcc.w.d.w_3"] = "00278000DJK", ++ ++ break_1 = "002a0000C", ++ syscall_1 = "002b0000C", ++ ++ ["slli.w_3"] = "00408000DJU", ++ ["slli.d_3"] = "00410000DJV", ++ ["srli.w_3"] = "00448000DJU", ++ ["srli.d_3"] = "00450000DJV", ++ ["srai.w_3"] = "00488000DJU", ++ ["srai.d_3"] = "00490000DJV", ++ ["rotri.w_3"] = "004c8000DJU", ++ ["rotri.d_3"] = "004d0000DJV", ++ ++ ["bstrins.w_4"] = "00600000DJMU", ++ ["bstrpick.w_4"] = "00608000DJMU", ++ ["bstrins.d_4"] = "00800000DJNV", ++ ["bstrpick.d_4"] = "00c00000DJNV", ++ slti_3 = "02000000DJX", ++ sltui_3 = "02400000DJX", ++ ["addi.w_3"] = "02800000DJX", ++ ["addi.d_3"] = "02c00000DJX", ++ ["lu52i.d_3"] = "03000000DJX", ++ andi_3 = "03400000DJT", ++ ori_3 = "03800000DJT", ++ xori_3 = "03c00000DJT", ++ ["lu12i.w_2"] = "14000000DZ", ++ ["lu32i.d_2"] = "16000000DZ", ++ pcaddi_2 = "18000000DZ", ++ pcalau12i_2 = "1a000000DZ", ++ pcaddu12i_2 = "1c000000DZ", ++ pcaddu18i_2 = "1e000000DZ", ++ ++ ["ldx.b_3"] = "38000000DJK", ++ ["ldx.h_3"] = "38040000DJK", ++ ["ldx.w_3"] = "38080000DJK", ++ ["ldx.d_3"] = "380c0000DJK", ++ ["stx.b_3"] = "38100000DJK", ++ ["stx.h_3"] = "38140000DJK", ++ ["stx.w_3"] = "38180000DJK", ++ ["stx.d_3"] = "381c0000DJK", ++ ["ldx.bu_3"] = "38200000DJK", ++ ["ldx.hu_3"] = "38240000DJK", ++ ["ldx.wu_3"] = "38280000DJK", ++ ["fldx.s_3"] = "38300000FJK", ++ ["fldx.d_3"] = "38340000FJK", ++ ["fstx.s_3"] = "38380000FJK", ++ ["fstx.d_3"] = "383c0000FJK", ++ ["fldgt.s_3"] = "38740000FJK", ++ ["fldgt.d_3"] = "38748000FJK", ++ ["fldle.s_3"] = "38750000FJK", ++ ["fldle.d_3"] = "38758000FJK", ++ ["fstgt.s_3"] = "38760000FJK", ++ ["fstgt.d_3"] = "38768000FJK", ++ ["fstle.s_3"] = "38770000FJK", ++ ["fstle.d_3"] = "38778000FJK", ++ ["ldgt.b_3"] = "38780000DJK", ++ ["ldgt.h_3"] = "38788000DJK", ++ ["ldgt.w_3"] = "38790000DJK", ++ ["ldgt.d_3"] = "38798000DJK", ++ ["ldle.b_3"] = "387a0000DJK", ++ ["ldle.h_3"] = "387a8000DJK", ++ ["ldle.w_3"] = "387b0000DJK", ++ ["ldle.d_3"] = "387b8000DJK", ++ ["stgt.b_3"] = "387c0000DJK", ++ ["stgt.h_3"] = "387c8000DJK", ++ ["stgt.w_3"] = "387d0000DJK", ++ ["stgt.d_3"] = "387d8000DJK", ++ ["stle.b_3"] = "387e0000DJK", ++ ["stle.h_3"] = "387e8000DJK", ++ ["stle.w_3"] = "387f0000DJK", ++ ["stle.d_3"] = "387f8000DJK", ++ ++ ["ll.w_3"] = "20000000DJW", ++ ["sc.w_3"] = "21000000DJW", ++ ["ll.d_3"] = "22000000DJW", ++ ["sc.d_3"] = "23000000DJW", ++ ["ldptr.w_3"] = "24000000DJW", ++ ["stptr.w_3"] = "25000000DJW", ++ ["ldptr.d_3"] = "26000000DJW", ++ ["stptr.d_3"] = "27000000DJW", ++ ++ ["ld.b_3"] = "28000000DJX", ++ ["ld.h_3"] = "28400000DJX", ++ ["ld.w_2"] = "28800000Do", ++ ["ld.d_2"] = "28c00000Do", ++ ["st.b_2"] = "29000000Do", ++ ["st.h_2"] = "29400000Do", ++ ["st.w_2"] = "29800000Do", ++ ["st.d_2"] = "29c00000Do", ++ ["ld.bu_2"] = "2a000000Do", ++ ["ld.hu_2"] = "2a400000Do", ++ ["ld.wu_3"] = "2a800000DJX", ++ ["ldx.d_3"] = "380c0000DJK", ++ ["stx.d_3"] = "381c0000DJK", ++ ["fld.s_2"] = "2b000000Fo", ++ ["fst.s_2"] = "2b400000Fo", ++ ["fld.d_2"] = "2b800000Fo", ++ ["fst.d_2"] = "2bc00000Fo", ++ ++ ["fcmp.caf.s_3"] = "0c100000EGH", ++ ["fcmp.saf.s_3"] = "0c108000EGH", ++ ["fcmp.clt.s_3"] = "0c110000EGH", ++ ["fcmp.slt.s_3"] = "0c118000EGH", ++ ["fcmp.ceq.s_3"] = "0c120000EGH", ++ ["fcmp.seq.s_3"] = "0c128000EGH", ++ ["fcmp.cle.s_3"] = "0c130000EGH", ++ ["fcmp.sle.s_3"] = "0c138000EGH", ++ ["fcmp.cun.s_3"] = "0c140000EGH", ++ ["fcmp.sun.s_3"] = "0c148000EGH", ++ ["fcmp.cult.s_3"] = "0c150000EGH", --TODO ++ ["fcmp.sult.s_3"] = "0c158000EGH", ++ ["fcmp.cueq.s_3"] = "0c160000EGH", ++ ["fcmp.sueq.s_3"] = "0c168000EGH", ++ ["fcmp.cule.s_3"] = "0c170000EGH", ++ ["fcmp.sule.s_3"] = "0c178000EGH", ++ ["fcmp.cne.s_3"] = "0c180000EGH", ++ ["fcmp.sne.s_3"] = "0c188000EGH", ++ ["fcmp.cor.s_3"] = "0c1a0000EGH", ++ ["fcmp.sor.s_3"] = "0c1a8000EGH", ++ ["fcmp.cune.s_3"] = "0c1c0000EGH", ++ ["fcmp.sune.s_3"] = "0c1c8000EGH", ++ ["fcmp.caf.d_3"] = "0c200000EGH", ++ ["fcmp.saf.d_3"] = "0c208000EGH", ++ ["fcmp.clt.d_3"] = "0c210000EGH", ++ ["fcmp.slt.d_3"] = "0c218000EGH", ++ ["fcmp.ceq.d_3"] = "0c220000EGH", ++ ["fcmp.seq.d_3"] = "0c228000EGH", ++ ["fcmp.cle.d_3"] = "0c230000EGH", ++ ["fcmp.sle.d_3"] = "0c238000EGH", ++ ["fcmp.cun.d_3"] = "0c240000EGH", ++ ["fcmp.sun.d_3"] = "0c248000EGH", ++ ["fcmp.cult.d_3"] = "0c250000EGH", --TODO ++ ["fcmp.sult.d_3"] = "0c258000EGH", ++ ["fcmp.cueq.d_3"] = "0c260000EGH", ++ ["fcmp.sueq.d_3"] = "0c268000EGH", ++ ["fcmp.cule.d_3"] = "0c270000EGH", ++ ["fcmp.sule.d_3"] = "0c278000EGH", ++ ["fcmp.cne.d_3"] = "0c280000EGH", ++ ["fcmp.sne.d_3"] = "0c288000EGH", ++ ["fcmp.cor.d_3"] = "0c2a0000EGH", ++ ["fcmp.sor.d_3"] = "0c2a8000EGH", ++ ["fcmp.cune.d_3"] = "0c2c0000EGH", ++ ["fcmp.sune.d_3"] = "0c2c8000EGH", ++ ++ fsel_4 = "0d000000FGHI", ++ ++ ["addu16i.d_3"] = "10000000DJY", ++ beqz_2 = "40000000JL", ++ bnez_2 = "44000000JL", ++ bceqz_2 = "48000000AL", ++ bcnez_2 = "48000100AL", ++ jirl_3 = "4c000000DJa", ++ b_1 = "50000000P", ++ bl_1 = "54000000P", ++ beq_3 = "58000000JDO", ++ bne_3 = "5c000000JDO", ++ blt_3 = "60000000JDO", ++ bge_3 = "64000000JDO", ++ bltu_3 = "68000000JDO", ++ bgeu_3 = "6c000000JDO", ++} ++ ++------------------------------------------------------------------------------ ++ ++local function parse_gpr(expr) ++ local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$") ++ local tp = map_type[tname or expr] ++ if tp then ++ local reg = ovreg or tp.reg ++ if not reg then ++ werror("type `"..(tname or expr).."' needs a register override") ++ end ++ expr = reg ++ end ++ local r = match(expr, "^r([1-3]?[0-9])$") ++ if r then ++ r = tonumber(r) ++ if r <= 31 then return r, tp end ++ end ++ werror("bad register name `"..expr.."'") ++end ++ ++local function parse_fpr(expr) ++ local r = match(expr, "^f([1-3]?[0-9])$") ++ if r then ++ r = tonumber(r) ++ if r <= 31 then return r end ++ end ++ werror("bad register name `"..expr.."'") ++end ++ ++local function parse_fcsr(expr) ++ local r = match(expr, "^fcsr([0-3])$") ++ if r then ++ r = tonumber(r) ++ return r ++ end ++ werror("bad register name `"..expr.."'") ++end ++ ++local function parse_fcc(expr) ++ local r = match(expr, "^fcc([0-7])$") ++ if r then ++ r = tonumber(r) ++ return r ++ end ++ werror("bad register name `"..expr.."'") ++end ++ ++local function parse_imm(imm, bits, shift, scale, signed, action) ++ local n = tonumber(imm) ++ if n then ++ local m = sar(n, scale) ++ if shl(m, scale) == n then ++ if signed then ++ local s = sar(m, bits-1) ++ if s == 0 then return shl(m, shift) ++ elseif s == -1 then return shl(m + shl(1, bits), shift) end ++ else ++ if sar(m, bits) == 0 then return shl(m, shift) end ++ end ++ end ++ werror("out of range immediate1 `"..imm.."'") ++ elseif match(imm, "^[rf]([1-3]?[0-9])$") or ++ match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then ++ werror("expected immediate operand, got register") ++ else ++ waction(action or "IMM", ++ (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm) ++ return 0 ++ end ++end ++ ++local function parse_imm21or26(imm, i) ++ local n = tonumber(imm) ++ if n then ++ -- signed ++ local m = sar(n, 0) ++ if shl(m, 0) == n then ++ local s = sar(m, i-1) ++ if s == 0 then ++ return shl(sub(m, 1, 16), 10) + shl(sub(m, 17, i), 0) ++ elseif s == -1 then ++ return shl(sub(m, 1, 16), 10) + shl(sub(m, 17, i), 0) --TODO ++ end ++ end ++ werror("out of range immediate2 `"..imm.."'") ++ else ++ waction("IMM2", 0, imm) --TODO ++ return 0 ++ end ++end ++ ++local function parse_disp(disp) ++ local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") ++ if imm then ++ local r = shl(parse_gpr(reg), 5) ++ local extname = match(imm, "^extern%s+(%S+)$") ++ if extname then ++ waction("REL_EXT", map_extern[extname], nil, 1) ++ return r ++ else ++ return r + parse_imm(imm, 12, 10, 0, true) ++ end ++ end ++ local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") ++ if reg and tailr ~= "" then ++ local r, tp = parse_gpr(reg) ++ if tp then ++ waction("IMM", 32768+12*32+10, format(tp.ctypefmt, tailr)) ++ return shl(r, 5) ++ end ++ end ++ werror("bad displacement `"..disp.."'") ++end ++ ++local function parse_label(label, def) ++ local prefix = sub(label, 1, 2) ++ -- =>label (pc label reference) ++ if prefix == "=>" then ++ return "PC", 0, sub(label, 3) ++ end ++ -- ->name (global label reference) ++ if prefix == "->" then ++ return "LG", map_global[sub(label, 3)] ++ end ++ if def then ++ -- [1-9] (local label definition) ++ if match(label, "^[1-9]$") then ++ return "LG", 10+tonumber(label) ++ end ++ else ++ -- [<>][1-9] (local label reference) ++ local dir, lnum = match(label, "^([<>])([1-9])$") ++ if dir then -- Fwd: 1-9, Bkwd: 11-19. ++ return "LG", lnum + (dir == ">" and 0 or 10) ++ end ++ -- extern label (extern label reference) ++ local extname = match(label, "^extern%s+(%S+)$") ++ if extname then ++ return "EXT", map_extern[extname] ++ end ++ end ++ werror("bad label `"..label.."'") ++end ++ ++local function branch_type(op) ++ if shr(op, 26) == 0x16 or shr(op, 26) == 0x17 or shr(op, 26) == 0x18 or ++ shr(op, 26) == 0x19 or shr(op, 26) == 0x1a or shr(op, 26) == 0x1b then ++ return 0 -- BEQ, BNE, BLT, BGE, BLTU, BGEU ++ elseif shr(op, 26) == 0x10 or shr(op, 26) == 0x11 or shr(op, 26) == 0x12 then ++ return 0x5000 -- BEQZ, BNEZ, BCEQZ, BCNEZ ++ elseif band(op, 0xf8000000) == 0x50000000 then return 0xa000 --B, BL ++ else ++ assert(false, "unknown branch type") ++ end ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Handle opcodes defined with template strings. ++map_op[".template__"] = function(params, template, nparams) ++ if not params then return sub(template, 9) end ++ local op = tonumber(sub(template, 1, 8), 16) ++ local n = 1 ++ ++ -- Limit number of section buffer positions used by a single dasm_put(). ++ -- A single opcode needs a maximum of 2 positions (ins/ext). ++ if secpos+2 > maxsecpos then wflush() end ++ local pos = wpos() ++ ++ -- Process each character. ++ for p in gmatch(sub(template, 9), ".") do ++ if p == "D" then ++ op = op + shl(parse_gpr(params[n]), 0); n = n + 1 ++ elseif p == "J" then ++ op = op + shl(parse_gpr(params[n]), 5); n = n + 1 ++ elseif p == "K" then ++ op = op + shl(parse_gpr(params[n]), 10); n = n + 1 ++ elseif p == "F" then ++ op = op + shl(parse_fpr(params[n]), 0); n = n + 1 ++ elseif p == "G" then ++ op = op + shl(parse_fpr(params[n]), 5); n = n + 1 ++ elseif p == "H" then ++ op = op + shl(parse_fpr(params[n]), 10); n = n + 1 ++ elseif p == "I" then ++ op = op + shl(parse_fcc(params[n]), 15); n = n + 1 ++ elseif p == "A" then ++ op = op + shl(parse_fcc(params[n]), 5); n = n + 1 ++ elseif p == "E" then ++ op = op + shl(parse_fcc(params[n]), 0); n = n + 1 ++ elseif op == "S" then ++ op = op + shl(parse_fcsr(params[n]), 0); n = n + 1 ++ elseif op == "R" then ++ op = op + shl(parse_fcsr(params[n]), 5); n = n + 1 ++ elseif p == "U" then ++ op = op + parse_imm(params[n], 5, 10, 0, false); n = n + 1 ++ elseif p == "V" then ++ op = op + parse_imm(params[n], 6, 10, 0, false); n = n + 1 ++ elseif p == "W" then ++ op = op + parse_imm(params[n], 14, 10, 0, true); n = n + 1 ++ elseif p == "X" then ++ op = op + parse_imm(params[n], 12, 10, 0, true); n = n + 1 ++ elseif p == "o" then ++ op = op + parse_disp(params[n]); n = n + 1 ++ elseif p == "Y" then ++ op = op + parse_imm(params[n], 16, 10, 0, true); n = n + 1 ++ elseif p == "Z" then ++ op = op + parse_imm(params[n], 20, 5, 0, true); n = n + 1 ++ elseif p == "T" then ++ op = op + parse_imm(params[n], 12, 10, 0, false); n = n + 1 ++ elseif p == "C" then ++ op = op + parse_imm(params[n], 15, 0, 0, false); n = n + 1 ++ elseif p == "Q" then ++ op = op + parse_imm(params[n], 2, 15, 0, false); n = n + 1 ++ elseif p == "B" then ++ op = op + parse_imm(params[n], 3, 15, 0, false); n = n + 1 ++ elseif p == "M" then ++ op = op + parse_imm(params[n], 5, 16, 0, false); n = n + 1 ++ elseif p == "N" then ++ op = op + parse_imm(params[n], 6, 16, 0, false); n = n + 1 ++-- elseif p == "O" then ++-- op = op + parse_imm(params[n], 16, 10, 0, true); n = n + 1 ++-- elseif p == "L" then ++-- op = op + parse_imm21or26(params[n], 21); n = n + 1 ++-- elseif p == "P" then ++-- op = op + parse_imm21or26(params[n], 26); n = n + 1 ++ elseif p == "O" or p == "L" or p == "P" then ++ local mode, m, s = parse_label(params[n], false) ++ local v = branch_type(op) ++ waction("REL_"..mode, m+v, s, 1) ++ n = n + 1 ++ elseif p == "a" then ++ op = op + parse_imm(params[n], 16, 10, 0, true); n = n + 1 ++ else ++ assert(false) ++ end ++ end ++ wputpos(pos, op) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pseudo-opcode to mark the position where the action list is to be emitted. ++map_op[".actionlist_1"] = function(params) ++ if not params then return "cvar" end ++ local name = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeactions(out, name) end) ++end ++ ++-- Pseudo-opcode to mark the position where the global enum is to be emitted. ++map_op[".globals_1"] = function(params) ++ if not params then return "prefix" end ++ local prefix = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeglobals(out, prefix) end) ++end ++ ++-- Pseudo-opcode to mark the position where the global names are to be emitted. ++map_op[".globalnames_1"] = function(params) ++ if not params then return "cvar" end ++ local name = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeglobalnames(out, name) end) ++end ++ ++-- Pseudo-opcode to mark the position where the extern names are to be emitted. ++map_op[".externnames_1"] = function(params) ++ if not params then return "cvar" end ++ local name = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeexternnames(out, name) end) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Label pseudo-opcode (converted from trailing colon form). ++map_op[".label_1"] = function(params) ++ if not params then return "[1-9] | ->global | =>pcexpr" end ++ if secpos+1 > maxsecpos then wflush() end ++ local mode, n, s = parse_label(params[1], true) ++ if mode == "EXT" then werror("bad label definition") end ++ waction("LABEL_"..mode, n, s, 1) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pseudo-opcodes for data storage. ++map_op[".long_*"] = function(params) ++ if not params then return "imm..." end ++ for _,p in ipairs(params) do ++ local n = tonumber(p) ++ if not n then werror("bad immediate `"..p.."'") end ++ if n < 0 then n = n + 2^32 end ++ wputw(n) ++ if secpos+2 > maxsecpos then wflush() end ++ end ++end ++ ++-- Alignment pseudo-opcode. ++map_op[".align_1"] = function(params) ++ if not params then return "numpow2" end ++ if secpos+1 > maxsecpos then wflush() end ++ local align = tonumber(params[1]) ++ if align then ++ local x = align ++ -- Must be a power of 2 in the range (2 ... 256). ++ for i=1,8 do ++ x = x / 2 ++ if x == 1 then ++ waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. ++ return ++ end ++ end ++ end ++ werror("bad alignment") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pseudo-opcode for (primitive) type definitions (map to C types). ++map_op[".type_3"] = function(params, nparams) ++ if not params then ++ return nparams == 2 and "name, ctype" or "name, ctype, reg" ++ end ++ local name, ctype, reg = params[1], params[2], params[3] ++ if not match(name, "^[%a_][%w_]*$") then ++ werror("bad type name `"..name.."'") ++ end ++ local tp = map_type[name] ++ if tp then ++ werror("duplicate type `"..name.."'") ++ end ++ -- Add #type to defines. A bit unclean to put it in map_archdef. ++ map_archdef["#"..name] = "sizeof("..ctype..")" ++ -- Add new type and emit shortcut define. ++ local num = ctypenum + 1 ++ map_type[name] = { ++ ctype = ctype, ++ ctypefmt = format("Dt%X(%%s)", num), ++ reg = reg, ++ } ++ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) ++ ctypenum = num ++end ++map_op[".type_2"] = map_op[".type_3"] ++ ++-- Dump type definitions. ++local function dumptypes(out, lvl) ++ local t = {} ++ for name in pairs(map_type) do t[#t+1] = name end ++ sort(t) ++ out:write("Type definitions:\n") ++ for _,name in ipairs(t) do ++ local tp = map_type[name] ++ local reg = tp.reg or "" ++ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) ++ end ++ out:write("\n") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Set the current section. ++function _M.section(num) ++ waction("SECTION", num) ++ wflush(true) -- SECTION is a terminal action. ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Dump architecture description. ++function _M.dumparch(out) ++ out:write(format("DynASM %s version %s, released %s\n\n", ++ _info.arch, _info.version, _info.release)) ++ dumpactions(out) ++end ++ ++-- Dump all user defined elements. ++function _M.dumpdef(out, lvl) ++ dumptypes(out, lvl) ++ dumpglobals(out, lvl) ++ dumpexterns(out, lvl) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pass callbacks from/to the DynASM core. ++function _M.passcb(wl, we, wf, ww) ++ wline, werror, wfatal, wwarn = wl, we, wf, ww ++ return wflush ++end ++ ++-- Setup the arch-specific module. ++function _M.setup(arch, opt) ++ g_arch, g_opt = arch, opt ++end ++ ++-- Merge the core maps and the arch-specific maps. ++function _M.mergemaps(map_coreop, map_def) ++ setmetatable(map_op, { __index = map_coreop }) ++ setmetatable(map_def, { __index = map_archdef }) ++ return map_op, map_def ++end ++ ++return _M ++ ++------------------------------------------------------------------------------ ++ +diff --git a/libs/luajit/LuaJIT-src/src/Makefile b/libs/luajit/LuaJIT-src/src/Makefile +index 34c5e97..cb3fc00 100644 +--- a/libs/luajit/LuaJIT-src/src/Makefile ++++ b/libs/luajit/LuaJIT-src/src/Makefile +@@ -36,7 +36,7 @@ CC= $(DEFAULT_CC) + # to slow down the C part by not omitting it. Debugging, tracebacks and + # unwinding are not affected -- the assembler part has frame unwind + # information and GCC emits it where needed (x64) or with -g (see CCDEBUG). +-CCOPT= -O2 -fomit-frame-pointer ++CCOPT= -O0 -fomit-frame-pointer + # Use this if you want to generate a smaller binary (but it's slower): + #CCOPT= -Os -fomit-frame-pointer + # Note: it's no longer recommended to use -O3 with GCC 4.x. +@@ -53,6 +53,7 @@ CCOPT_arm= + CCOPT_arm64= + CCOPT_ppc= + CCOPT_mips= ++CCOPT_loongarch64= + # + CCDEBUG= + # Uncomment the next line to generate debug information: +@@ -241,6 +242,10 @@ else + ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) + TARGET_LJARCH= arm + else ++ifneq (,$(findstring LJ_TARGET_LOONGARCH64 ,$(TARGET_TESTARCH))) ++ TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE ++ TARGET_LJARCH= loongarch64 ++else + ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) + ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH))) + TARGET_ARCH= -D__AARCH64EB__=1 +@@ -272,6 +277,7 @@ endif + endif + endif + endif ++endif + + ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) + TARGET_SYS= PS3 +diff --git a/libs/luajit/LuaJIT-src/src/host/buildvm.c b/libs/luajit/LuaJIT-src/src/host/buildvm.c +index 98a7a57..8e96cb4 100644 +--- a/libs/luajit/LuaJIT-src/src/host/buildvm.c ++++ b/libs/luajit/LuaJIT-src/src/host/buildvm.c +@@ -65,6 +65,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); + #include "../dynasm/dasm_ppc.h" + #elif LJ_TARGET_MIPS + #include "../dynasm/dasm_mips.h" ++#elif LJ_TARGET_LOONGARCH64 ++#include "../dynasm/dasm_loongarch64.h" + #else + #error "No support for this architecture (yet)" + #endif +diff --git a/libs/luajit/LuaJIT-src/src/host/buildvm_asm.c b/libs/luajit/LuaJIT-src/src/host/buildvm_asm.c +index ffd1490..bc9ab7f 100644 +--- a/libs/luajit/LuaJIT-src/src/host/buildvm_asm.c ++++ b/libs/luajit/LuaJIT-src/src/host/buildvm_asm.c +@@ -164,6 +164,15 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, + "Error: unsupported opcode %08x for %s symbol relocation.\n", + ins, sym); + exit(1); ++#elif LJ_TARGET_LOONGARCH64 ++ if ((ins >> 26) == 21) { ++ fprintf(ctx->fp, "\tbl %s\n", sym); ++ } else { ++ fprintf(stderr, ++ "Error: unsupported opcode %08x for %s symbol relocation.\n", ++ ins, sym); ++ exit(1); ++ } + #else + #error "missing relocation support for this architecture" + #endif +diff --git a/libs/luajit/LuaJIT-src/src/jit/bcsave.lua b/libs/luajit/LuaJIT-src/src/jit/bcsave.lua +index c17c88e..79bae42 100644 +--- a/libs/luajit/LuaJIT-src/src/jit/bcsave.lua ++++ b/libs/luajit/LuaJIT-src/src/jit/bcsave.lua +@@ -64,7 +64,7 @@ local map_type = { + + local map_arch = { + x86 = true, x64 = true, arm = true, arm64 = true, arm64be = true, +- ppc = true, mips = true, mipsel = true, ++ ppc = true, mips = true, mipsel = true, loongarch64 = true, + } + + local map_os = { +diff --git a/libs/luajit/LuaJIT-src/src/jit/dis_loongarch64.lua b/libs/luajit/LuaJIT-src/src/jit/dis_loongarch64.lua +new file mode 100644 +index 0000000..3e67efc +--- /dev/null ++++ b/libs/luajit/LuaJIT-src/src/jit/dis_loongarch64.lua +@@ -0,0 +1,649 @@ ++---------------------------------------------------------------------------- ++-- LuaJIT LoongArch disassembler module. ++-- ++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. ++-- Copyright (C) 2021 Loongson Technology. All rights reserved. ++-- Released under the MIT/X license. See Copyright Notice in luajit.h ++---------------------------------------------------------------------------- ++-- This is a helper module used by the LuaJIT machine code dumper module. ++-- ++-- It disassembles most LoongArch instructions. ++-- NYI: SIMD instructions. ++------------------------------------------------------------------------------ ++ ++local type = type ++local byte, format = string.byte, string.format ++local match, gmatch = string.match, string.gmatch ++local concat = table.concat ++local bit = require("bit") ++local band, bor, tohex = bit.band, bit.bor, bit.tohex ++local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift ++ ++------------------------------------------------------------------------------ ++-- Opcode maps ++------------------------------------------------------------------------------ ++ ++local map_18_0 = { -- 18-20:0, 10-17 ++ shift = 10, mask = 255, ++ [4] = "clo.wDJ", ++ [5] = "clz.wDJ", ++ [6] = "cto.wDJ", ++ [7] = "ctz.wDJ", ++ [8] = "clo.dDJ", ++ [9] = "clz.dDJ", ++ [10] = "cto.dDJ", ++ [11] = "ctz.dDJ", ++ [12] = "revb.2hDJ", ++ [13] = "revb.4hDJ", ++ [14] = "revb.2wDJ", ++ [15] = "revb.dDJ", ++ [16] = "revh.2wDJ", ++ [17] = "revh.dDJ", ++ [18] = "bitrev.4bDJ", ++ [19] = "bitrev.8bDJ", ++ [20] = "bitrev.wDJ", ++ [21] = "bitrev.dDJ", ++ [22] = "ext.w.hDJ", ++ [23] = "ext.w.bDJ", ++} ++ ++local map_18_4 = { -- 18-20:4, 15-17 ++ shift = 15, mask = 7, ++ [0] = "add.wDJK", ++ [1] = "add.dDJK", ++ [2] = "sub.wDJK", ++ [3] = "sub.dDJK", ++ [4] = "sltDJK", ++ [5] = "sltuDJK", ++ [6] = "maskeqzDJK", ++ [7] = "masknezDJK", ++} ++ ++local map_18_5 = { -- 18-20:5, 15-17 ++ shift = 15, mask = 7, ++ [0] = "norDJK", ++ [1] = "andDJK", ++ [2] = "orDJK", ++ [3] = "xorDJK", ++ [4] = "ornDJK", ++ [5] = "andnDJK", ++ [6] = "sll.wDJK", ++ [7] = "srl.wDJK", ++} ++ ++local map_18_6 = { -- 18-20:6, 15-17 ++ shift = 15, mask = 7, ++ [0] = "sra.wDJK", ++ [1] = "sll.dDJK", ++ [2] = "srl.dDJK", ++ [3] = "sra.dDJK", ++ [6] = "rotr.wDJK", ++ [7] = "rotr.dDJK", ++} ++ ++local map_18_7 = { -- 18-20:7, 15-17 ++ shift = 15, mask = 7, ++ [0] = "mul.wDJK", ++ [1] = "mulh.wDJK", ++ [2] = "mulh.wuDJK", ++ [3] = "mul.dDJK", ++ [4] = "mulh.dDJK", ++ [5] = "mulh.duDJK", ++ [6] = "mulw.d.wDJK", ++ [7] = "mulw.d.wuDJK", ++} ++ ++local map_farith2 = { ++ shift = 10, mask = 31, ++ [0] = "fabs.hFG", ++ [1] = "fabs.sFG", ++ [2] = "fabs.dFG", ++ [4] = "fneg.hFG", ++ [5] = "fneg.sFG", ++ [6] = "fneg.dFG", ++ [8] = "flogb.hFG", ++ [9] = "flogb.sFG", ++ [10] = "flogb.dFG", ++ [12] = "fclass.hFG", ++ [13] = "fclass.sFG", ++ [14] = "fclass.dFG", ++ [16] = "fsqrt.hFG", ++ [17] = "fsqrt.sFG", ++ [18] = "fsqrt.dFG", ++ [20] = "frecip.hFG", ++ [21] = "frecip.sFG", ++ [22] = "frecip.dFG", ++ [24] = "frsqrt.hFG", ++ [25] = "frsqrt.sFG", ++ [26] = "frsqrt.dFG", ++ [28] = "frecipe.hFG", ++ [29] = "frecipe.sFG", ++ [30] = "frecipe.dFG", ++ [32] = "frsqrte.hFG", ++ [33] = "frsqrte.sFG", ++ [34] = "frsqrte.dFG", ++} ++ ++local map_fmov = { ++ shift = 10, mask = 31, ++ [4] = "fmov.hFG", ++ [5] = "fmov.sFG", ++ [6] = "fmov.dFG", ++ [8] = "movgr2fr.hFJ", ++ [9] = "movgr2fr.wFJ", ++ [10] = "movgr2fr.dFJ", ++ [11] = "movgr2frh.wFJ", ++ [12] = "movfr2gr.hDG", ++ [13] = "movfr2gr.sDG", ++ [14] = "movfr2gr.dDG", ++ [15] = "movfrh2gr.sDG", ++ [16] = "movgr2fcsrSJ", ++ [18] = "movfcsr2grDR", ++ [20] = { shift = 3, mask = 3, [0] = "movfr2cfEG", }, ++ [21] = { shift = 8, mask = 3, [0] = "movcf2frFA", }, ++ [22] = { shift = 3, mask = 3, [0] = "movgr2cfEJ", }, ++ [23] = { shift = 8, mask = 3, [0] = "movcf2grDA", }, ++ [24] = "fcvt.ld.dFG", ++ [25] = "fcvt.ud.dFG", ++} ++ ++local map_fconvert = { -- 15-20: 110010 ++ shift = 10, mask = 31, ++ [6] = "fcvt.s.dFG", [9] = "fcvt.d.sFG", ++} ++ ++local map_fconvert1 = { -- 15-20: 110100 ++ shift = 10, mask = 31, ++ [1] = "ftintrm.w.sFG", ++ [2] = "ftintrm.w.dFG", ++ [9] = "ftintrm.l.sFG", ++ [10] = "ftintrm.l.dFG", ++ [17] = "ftintrp.w.sFG", ++ [18] = "ftintrp.w.dFG", ++ [25] = "ftintrp.l.sFG", ++ [26] = "ftintrp.l.dFG", ++} ++ ++local map_fconvert2 = { -- 15-20: 110101 ++ shift = 10, mask = 31, ++ [1] = "ftintrz.w.sFG", ++ [2] = "ftintrz.w.dFG", ++ [9] = "ftintrz.l.sFG", ++ [10] = "ftintrz.l.dFG", ++ [17] = "ftintrne.w.sFG", ++ [18] = "ftintrne.w.dFG", ++ [25] = "ftintrne.l.sFG", ++ [26] = "ftintrne.l.dFG", ++} ++ ++local map_fconvert3 = { -- 15-20: 110110 ++ shift = 10, mask = 31, ++ [1] = "ftint.w.sFG", ++ [2] = "ftint.w.dFG", ++ [9] = "ftint.l.sFG", ++ [10] = "ftint.l.dFG", ++} ++ ++local map_fconvert4 = { -- 15-20: 111010 ++ shift = 10, mask = 31, ++ [4] = "ffint.s.wFG", ++ [6] = "ffint.s.lFG", ++ [8] = "ffint.d.wFG", ++ [10] = "ffint.d.lFG", ++} ++ ++local map_fconvert5 = { -- 15-20: 111100 ++ shift = 10, mask = 31, ++ [17] = "frint.sFG", ++ [18] = "frint.dFG", ++} ++ ++local map_farith = { -- 22-25:4, 15-21 ++ shift = 15, mask = 127, ++ [0] = "fadd.hFGH", ++ [1] = "fadd.sFGH", ++ [2] = "fadd.dFGH", ++ [4] = "fsub.hFGH", ++ [5] = "fsub.sFGH", ++ [6] = "fsub.dFGH", ++ [8] = "fmul.hFGH", ++ [9] = "fmul.sFGH", ++ [10] = "fmul.dFGH", ++ [12] = "fdiv.hFGH", ++ [13] = "fdiv.sFGH", ++ [14] = "fdiv.dFGH", ++ [16] = "fmax.hFGH", ++ [17] = "fmax.sFGH", ++ [18] = "fmax.dFGH", ++ [20] = "fmin.hFGH", ++ [21] = "fmin.sFGH", ++ [22] = "fmin.dFGH", ++ [24] = "fmaxa.hFGH", ++ [25] = "fmaxa.sFGH", ++ [26] = "fmaxa.dFGH", ++ [28] = "fmina.hFGH", ++ [29] = "fmina.sFGH", ++ [30] = "fmina.dFGH", ++ [32] = "fscaleb.hFGH", ++ [33] = "fscaleb.sFGH", ++ [34] = "fscaleb.dFGH", ++ [36] = "fcopysign.hFGH", ++ [37] = "fcopysign.sFGH", ++ [38] = "fcopysign.dFGH", ++ [40] = map_farith2, [41] = map_fmov, ++ [50] = map_fconvert, [52] = map_fconvert1, ++ [53] = map_fconvert2, [54] = map_fconvert3, ++ [58] = map_fconvert4, [60] = map_fconvert5, ++} ++ ++local map_21_0 = { --21st:0, 18-20 ++ shift = 18, mask = 7, ++ [0] = map_18_0, ++ [1] = { shift = 17, mask = 1, [0] = "alsl.wDJKQ", "alsl.wuDJKQ", }, ++ [2] = {shift = 17, mask = 1, [0] = "bytepick.wDJKQ", }, ++ [3] = "bytepick.dDJKB", ++ [4] = map_18_4, ++ [5] = map_18_5, ++ [6] = map_18_6, ++ [7] = map_18_7, ++} ++ ++local map_21_1 = { --21st:1, 22nd:0, 15-20 ++ shift = 21, mask = 1, ++ [1] = { ++ shift = 18, mask = 7, ++ [0] = { ++ shift = 15, mask = 7, ++ [0] = "div.wDJK", ++ [1] = "mod.wDJK", ++ [2] = "div.wuDJK", ++ [3] = "mod.wuDJK", ++ [4] = "div.dDJK", ++ [5] = "mod.dDJK", ++ [6] = "div.duDJK", ++ [7] = "mod.duDJK", ++ }, ++ [1] = { ++ shift = 18, mask = 7, ++ [0] = "crc.w.b.wDJK", ++ [1] = "crc.w.h.wDJK", ++ [2] = "crc.w.w.wDJK", ++ [3] = "crc.w.d.wDJK", ++ [4] = "crcc.w.b.wDJK", ++ [5] = "crcc.w.h.wDJK", ++ [6] = "crcc.w.w.wDJK", ++ [7] = "crcc.w.d.wDJK", ++ }, ++ [2] = { ++ shift = 15, mask = 7, ++ [4] = breakC, [6] = syscallC, ++ }, ++ [3] = { shift = 17, mask = 1, [0] = "alsl.dDJKQ", }, ++ }, ++} ++ ++local map_22_0 = { ++ shift = 21, mask = 1, ++ [0] = map_21_0, ++ [1] = map_21_1, ++} ++ ++local map_shift = { -- 22nd:1, 21st:0 ++ shift = 16, mask = 31, ++ [0] = { shift = 15, mask = 1, [1] = "slli.wDJU", }, ++ [1] = "slli.dDJV", ++ [4] = { shift = 15, mask = 1, [1] = "srli.wDJU", }, ++ [5] = "srli.dDJV", ++ [8] = { shift = 15, mask = 1, [1] = "srai.wDJU", }, ++ [9] = "srai.dDJV", ++ [12] = { shift = 15, mask = 1, [1] = "rotri.wDJU", }, ++ [13] = "rotri.dDJV", ++} ++ ++local map_22_1 = { -- 22nd:1 ++ shift = 21, mask = 1, ++ [0] = map_shift, ++ [1] = { shift = 15, mask = 1, [0] = "bstrins.wDJMU", [1] = "bstrpick.wDJMU", }, ++} ++ ++local map_26_0 = { ++ shift = 22, mask = 15, ++ [0] = map_22_0, ++ [1] = map_22_1, ++ [2] = "bstrins.dDJNV", ++ [3] = "bstrpick.dDJNV", ++ [4] = map_farith, ++ [8] = "sltiDJX", ++ [9] = "sltuiDJX", ++ [10] = "addi.wDJX", ++ [11] = "addi.dDJX", ++ [12] = "lu52i.dDJX", ++ [13] = "andiDJT", ++ [14] = "oriDJT", ++ [15] = "xoriDJT", ++} ++ ++local map_long_i_5 = { -- Long immediate fixed-point arithmetic. ++ shift = 25, mask = 1, ++ [0] = "lu12i.wDZ", ++ [1] = "lu32i.dDZ", ++} ++ ++local map_long_i_6 = { ++ shift = 25, mask = 1, ++ [0] = "pcaddiDZ", ++ [1] = "pcalau12iDZ", ++} ++ ++local map_long_i_7 = { ++ shift = 25, mask = 1, ++ [0] = "pcaddu12iDZ", ++ [1] = "pcaddu18iDZ", ++} ++ ++local map_ldst0_14 = { ++ shift = 15, mask = 2047, ++ [0] = "ldx.bDJK", [8] = "ldx.hDJK", [16] = "ldx.wDJK", ++ [24] = "ldx.dDJK", [32] = "stx.bDJK", [40] = "stx.hDJK", ++ [48] = "stx.wDJK", [56] = "stx.dDJK", [64] = "ldx.buDJK", ++ [72] = "ldx.huDJK", [80] = "ldx.wuDJK", [96] = "fldx.sFJK", ++ [104] = "fldx.dFJK", [112] = "fstx.sFJK", [120] = "fstx.dFJK", ++ [232] = "fldgt.sFJK", [233] = "fldgt.dFJK", [234] = "fldle.sFJK", ++ [235] = "fldle.dFJK", [236] = "fstgt.sFJK", [237] = "fstgt.dFJK", ++ [238] = "fstle.sFJK", [239] = "fstle.dFJK", [240] = "ldgt.bDJK", ++ [241] = "ldgt.hDJK", [242] = "ldgt.wDJK", [243] = "ldgt.dDJK", ++ [244] = "ldle.bDJK", [245] = "ldle.hDJK", [246] = "ldle.wDJK", ++ [247] = "ldle.dDJK", [248] = "stgt.bDJK", [249] = "stgt.hDJK", ++ [250] = "stgt.wDJK", [251] = "stgt.dDJK", [252] = "stle.bDJK", ++ [253] = "stle.hDJK", [254] = "stle.wDJK", [255] = "stle.dDJK", ++} ++ ++local map_ldst1_8 = { ++ shift = 24, mask = 3, ++ [0] = "ll.wDJW", ++ [1] = "sc.wDJW", ++ [2] = "ll.dDJW", ++ [3] = "sc.dDJW", ++} ++ ++local map_ldst1_9 = { ++ shift = 24, mask = 3, ++ [0] = "ldptr.wDJW", ++ [1] = "stptr.wDJW", ++ [2] = "ldptr.dDJW", ++ [3] = "stptr.dDJW", ++} ++ ++local map_ldst1_10 = { ++ shift = 22, mask = 15, ++ [0] = "ld.bDJX", ++ [1] = "ld.hDJX", ++ [2] = "ld.wDo", ++ [3] = "ld.dDo", ++ [4] = "st.bDo", ++ [5] = "st.hDo", ++ [6] = "st.wDo", ++ [7] = "st.dDo", ++ [8] = "ld.buDo", ++ [9] = "ld.huDo", ++ [10] = "ld.wuDJX", ++ [12] = "fld.sFo", ++ [13] = "fst.sFo", ++ [14] = "fld.dFo", ++ [15] = "fst.dFo", ++} ++ ++local map_fcmp0 = { ++ shift = 15, mask = 31, ++ [0] = "fcmp.caf.sEGH", ++ [1] = "fcmp.saf.sEGH", ++ [2] = "fcmp.clt.sEGH", ++ [3] = "fcmp.slt.sEGH", ++ [4] = "fcmp.ceq.sEGH", ++ [5] = "fcmp.seq.sEGH", ++ [6] = "fcmp.cle.sEGH", ++ [7] = "fcmp.sle.sEGH", ++ [8] = "fcmp.cun.sEGH", ++ [9] = "fcmp.sun.sEGH", ++ [10] = "fcmp.cult.sEGH", ++ [11] ="fcmp.sult.sEGH", ++ [12] = "fcmp.cueq.sEGH", ++ [13] = "fcmp.sueq.sEGH", ++ [14] = "fcmp.cule.sEGH", ++ [15] = "fcmp.sule.sEGH", ++ [16] = "fcmp.cne.sEGH", ++ [17] = "fcmp.sne.sEGH", ++ [20] = "fcmp.cor.sEGH", ++ [21] = "fcmp.sor.sEGH", ++ [24] = "fcmp.cune.sEGH", ++ [25] = "fcmp.sune.sEGH", ++} ++ ++local map_fcmp1 = { ++ shift = 15, mask = 31, ++ [0] = "fcmp.caf.dEGH", ++ [1] = "fcmp.saf.dEGH", ++ [2] = "fcmp.clt.dEGH", ++ [3] = "fcmp.slt.dEGH", ++ [4] = "fcmp.ceq.dEGH", ++ [5] = "fcmp.seq.dEGH", ++ [6] = "fcmp.cle.dEGH", ++ [7] = "fcmp.sle.dEGH", ++ [8] = "fcmp.cun.dEGH", ++ [9] = "fcmp.sun.dEGH", ++ [10] = "fcmp.cult.dEGH", ++ [11] = "fcmp.sult.dEGH", ++ [12] = "fcmp.cueq.dEGH", ++ [13] = "fcmp.sueq.dEGH", ++ [14] = "fcmp.cule.dEGH", ++ [15] = "fcmp.sule.dEGH", ++ [16] = "fcmp.cne.dEGH", ++ [17] = "fcmp.sne.dEGH", ++ [20] = "fcmp.cor.dEGH", ++ [21] = "fcmp.sor.dEGH", ++ [24] = "fcmp.cune.dEGH", ++ [25] = "fcmp.sune.dEGH", ++} ++ ++local map_fcmp = { ++ shift = 20, mask = 63, ++ [1] = { shift = 3, mask = 3, [0] = map_fcmp0, }, ++ [2] = { shift = 3, mask = 3, [0] = map_fcmp1, }, ++ [16] = { shift = 18, mask = 3, [0] = "fselFGHI", }, ++} ++ ++local map_init = { ++ shift = 26, mask = 63, ++ [0] = map_26_0, ++ [3] = map_fcmp, ++ [4] = "addu16i.dDJY", ++ [5] = map_long_i_5, ++ [6] = map_long_i_6, ++ [7] = map_long_i_7, ++ [8] = map_ldst1_8, ++ [9] = map_ldst1_9, ++ [10] = map_ldst1_10, ++ [14] = map_ldst0_14, ++ [16] = "beqzJL", ++ [17] = "bnezJL", ++ [18] = { shift = 8, mask = 3, [0] = "bceqzAL", "bcnezAL", }, ++ [19] = "jirlDJa", ++ [20] = "bP", ++ [21] = "blP", ++ [22] = "beqJDO", ++ [23] = "bneJDO", ++ [24] = "bltJDO", ++ [25] = "bgeJDO", ++ [26] = "bltuJDO", ++ [27] = "bgeuJDO", ++} ++ ++------------------------------------------------------------------------------ ++ ++local map_gpr = { ++ [0] = "r0", "ra", "r2", "sp", "r4", "r5", "r6", "r7", ++ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", ++ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", ++ "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", ++} ++ ++------------------------------------------------------------------------------ ++ ++-- Output a nicely formatted line with an opcode and operands. ++local function putop(ctx, text, operands) ++ local pos = ctx.pos ++ local extra = "" ++ if ctx.rel then ++ local sym = ctx.symtab[ctx.rel] ++ if sym then extra = "\t->"..sym end ++ end ++ if ctx.hexdump > 0 then ++ ctx.out(format("%08x %s %-7s %s%s\n", ++ ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra)) ++ else ++ ctx.out(format("%08x %-7s %s%s\n", ++ ctx.addr+pos, text, concat(operands, ", "), extra)) ++ end ++ ctx.pos = pos + 4 ++end ++ ++-- Fallback for unknown opcodes. ++local function unknown(ctx) ++ return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) ++end ++ ++local function get_le(ctx) ++ local pos = ctx.pos ++ local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) ++ return bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) ++end ++ ++-- Disassemble a single instruction. ++local function disass_ins(ctx) ++ local op = ctx:get() ++ local operands = {} ++ local last = nil ++ ctx.op = op ++ ctx.rel = nil ++ ++ local opat = ctx.map_pri[rshift(op, 26)] ++ while type(opat) ~= "string" do ++ if not opat then return unknown(ctx) end ++ opat = opat[band(rshift(op, opat.shift), opat.mask)] ++ end ++ local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") ++ local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") ++ if altname then pat = pat2 end ++ ++ for p in gmatch(pat, ".") do ++ local x = nil ++ if p == "D" then ++ x = map_gpr[band(rshift(op, 0), 31)] ++ elseif p == "J" then ++ x = map_gpr[band(rshift(op, 5), 31)] ++ elseif p == "K" then ++ x = map_gpr[band(rshift(op, 10), 31)] ++ elseif p == "F" then ++ x = "f"..band(rshift(op, 0), 31) ++ elseif p == "G" then ++ x = "f"..band(rshift(op, 5), 31) ++ elseif p == "H" then ++ x = "f"..band(rshift(op, 10), 31) ++ elseif p == "S" then ++ x = "fcsr"..band(rshift(op, 0), 31) ++ elseif p == "R" then ++ x = "fcsr"..band(rshift(op, 5), 31) ++ elseif p == "E" then ++ x = "fcc"..band(rshift(op, 0), 7) ++ elseif p == "A" then ++ x = "fcc"..band(rshift(op, 5), 7) ++ elseif p == "I" then ++ x = "fcc"..band(rshift(op, 15), 7) ++ elseif p == "Q" then --TODO sa2 ++ x = band(rshift(op, 15), 3) ++ elseif p == "B" then --TODO sa3 ++ x = band(rshift(op, 15), 7) ++ elseif p == "M" then --TODO msbw ++ x = band(rshift(op, 16), 31) ++ elseif p == "N" then --TODO msbd ++ x = band(rshift(op, 16), 63) ++ elseif p == "U" then -- ui5 ++ x = band(rshift(op, 10), 31) ++ elseif p == "V" then -- ui6 ++ x = band(rshift(op, 10), 63) ++ elseif p == "T" then -- ui12 ++ x = band(rshift(op, 10), 4095) ++ elseif p == "W" then -- si14 ++ x = band(rshift(op, 10), 16383) ++ elseif p == "X" then -- si12 ++ x = band(rshift(op, 10), 4095) ++ elseif p == "o" then ++ local disp = band((rshift(op, 10)), 0xfff) ++ operands[#operands] = format("%s, %d", last, disp) ++ elseif p == "Y" then -- si16 ++ x = band(rshift(op, 10), 65535) ++ elseif p == "Z" then -- si20 ++ x = band(rshift(op, 10), 1048575) ++ elseif p == "C" then -- code ++ x = band(rshift(op, 0), 32767) ++ elseif p == "O" then -- offs[15:0] ++ x = band(rshift(op, 10), 65535) ++ elseif p == "L" then -- offs[15:0] + offs[20:16] ++ x = lshift(band(op, 31), 16) + band(rshift(op, 10), 65535) ++ elseif p == "P" then -- offs[15:0] + offs[25:16] ++ x = lshift(band(op, 1023), 16) + band(rshift(op, 10), 65535) ++ elseif p == "a" then ++ x = band(rshift(op, 10), 65535) ++ else ++ assert(false) ++ end ++ if x then operands[#operands+1] = x; last = x end ++ end ++ ++ return putop(ctx, name, operands) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Disassemble a block of code. ++local function disass_block(ctx, ofs, len) ++ if not ofs then ofs = 0 end ++ local stop = len and ofs+len or #ctx.code ++ stop = stop - stop % 4 ++ ctx.pos = ofs - ofs % 4 ++ ctx.rel = nil ++ while ctx.pos < stop do disass_ins(ctx) end ++end ++ ++-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). ++local function create(code, addr, out) ++ local ctx = {} ++ ctx.code = code ++ ctx.addr = addr or 0 ++ ctx.out = out or io.write ++ ctx.symtab = {} ++ ctx.disass = disass_block ++ ctx.hexdump = 8 ++ ctx.get = get_le ++ ctx.map_pri = map_init ++ return ctx ++end ++ ++-- Simple API: disassemble code (a string) at address and output via out. ++local function disass(code, addr, out) ++ create(code, addr, out):disass() ++end ++ ++-- Return register name for RID. ++local function regname(r) ++ if r < 32 then return map_gpr[r] end ++ return "f"..(r-32) ++end ++ ++-- Public module functions. ++return { ++ create = create, ++ disass = disass, ++ regname = regname ++} ++ +diff --git a/libs/luajit/LuaJIT-src/src/lib_jit.c b/libs/luajit/LuaJIT-src/src/lib_jit.c +index 22ca0a1..09be9eb 100644 +--- a/libs/luajit/LuaJIT-src/src/lib_jit.c ++++ b/libs/luajit/LuaJIT-src/src/lib_jit.c +@@ -732,6 +732,10 @@ static uint32_t jit_cpudetect(lua_State *L) + } + #endif + #endif ++ ++#elif LJ_TARGET_LOONGARCH64 ++ flags |= JIT_F_GS464V; ++ + #else + #error "Missing CPU detection for this architecture" + #endif +diff --git a/libs/luajit/LuaJIT-src/src/lj_arch.h b/libs/luajit/LuaJIT-src/src/lj_arch.h +index e8ad844..6bdaaaf 100644 +--- a/libs/luajit/LuaJIT-src/src/lj_arch.h ++++ b/libs/luajit/LuaJIT-src/src/lj_arch.h +@@ -29,6 +29,8 @@ + #define LUAJIT_ARCH_mips32 6 + #define LUAJIT_ARCH_MIPS64 7 + #define LUAJIT_ARCH_mips64 7 ++#define LUAJIT_ARCH_LOONGARCH64 9 ++#define LUAJIT_ARCH_loongarch64 9 + + /* Target OS. */ + #define LUAJIT_OS_OTHER 0 +@@ -55,6 +57,8 @@ + #define LUAJIT_TARGET LUAJIT_ARCH_MIPS64 + #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) + #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 ++#elif defined(__loongarch64__) || defined(__loongarch64) || defined(__LOONGARCH64__) || defined(__LOONGARCH64) ++#define LUAJIT_TARGET LUAJIT_ARCH_LOONGARCH64 + #else + #error "No support for this architecture (yet)" + #endif +@@ -358,6 +362,40 @@ + #define LJ_ARCH_VERSION 10 + #endif + ++#elif LUAJIT_TARGET == LUAJIT_ARCH_LOONGARCH64 ++#define LJ_ARCH_NAME "loongarch64" ++#define LJ_ARCH_BITS 64 ++#define LJ_ARCH_ENDIAN LUAJIT_LE ++#define LJ_TARGET_LOONGARCH64 1 ++#define LJ_TARGET_GC64 1 ++#define LJ_TARGET_EHRETREG 4 ++#define LJ_TARGET_EHRAREG 30 ++#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ ++#define LJ_TARGET_MASKSHIFT 1 ++#define LJ_TARGET_MASKROT 1 ++#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ ++#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL ++ ++#if !defined(LJ_ARCH_HASFPU) ++#ifdef __loongarch_soft_float ++#define LJ_ARCH_HASFPU 0 ++#else ++#define LJ_ARCH_HASFPU 1 ++#endif ++#endif ++ ++#if !defined(LJ_ABI_SOFTFP) ++#ifdef __loongarch_soft_float ++#define LJ_ABI_SOFTFP 1 ++#else ++#define LJ_ABI_SOFTFP 0 ++#endif ++#endif ++ ++#if LJ_ABI_SOFTFP || !LJ_ARCH_HASFPU ++#define LJ_ARCH_NOJIT 1 ++#endif ++ + #else + #error "No target architecture defined" + #endif +diff --git a/libs/luajit/LuaJIT-src/src/lj_asm.c b/libs/luajit/LuaJIT-src/src/lj_asm.c +index c2cf5a9..72932f8 100644 +--- a/libs/luajit/LuaJIT-src/src/lj_asm.c ++++ b/libs/luajit/LuaJIT-src/src/lj_asm.c +@@ -177,6 +177,8 @@ IRFLDEF(FLOFS) + #include "lj_emit_ppc.h" + #elif LJ_TARGET_MIPS + #include "lj_emit_mips.h" ++#elif LJ_TARGET_LOONGARCH64 ++#include "lj_emit_loongarch64.h" + #else + #error "Missing instruction emitter for target CPU" + #endif +@@ -1597,6 +1599,8 @@ static void asm_loop(ASMState *as) + #include "lj_asm_ppc.h" + #elif LJ_TARGET_MIPS + #include "lj_asm_mips.h" ++#elif LJ_TARGET_LOONGARCH64 ++#include "lj_asm_loongarch64.h" + #else + #error "Missing assembler for target CPU" + #endif +diff --git a/libs/luajit/LuaJIT-src/src/lj_asm_loongarch64.h b/libs/luajit/LuaJIT-src/src/lj_asm_loongarch64.h +new file mode 100644 +index 0000000..28847cb +--- /dev/null ++++ b/libs/luajit/LuaJIT-src/src/lj_asm_loongarch64.h +@@ -0,0 +1,2272 @@ ++/* ++** LoongArch IR assembler (SSA IR -> machine code). ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2021 Loongson Technology. All rights reserved. ++*/ ++ ++/* -- Register allocator extensions --------------------------------------- */ ++ ++/* Allocate a register with a hint. */ ++static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) ++{ ++ Reg r = IR(ref)->r; ++ if (ra_noreg(r)) { ++ if (!ra_hashint(r) && !iscrossref(as, ref)) ++ ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ ++ r = ra_allocref(as, ref, allow); ++ } ++ ra_noweak(as, r); ++ return r; ++} ++ ++/* Allocate two source registers for three-operand instructions. */ ++static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) ++{ ++ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); ++ Reg left = irl->r, right = irr->r; ++ if (ra_hasreg(left)) { ++ ra_noweak(as, left); ++ if (ra_noreg(right)) ++ right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); ++ else ++ ra_noweak(as, right); ++ } else if (ra_hasreg(right)) { ++ ra_noweak(as, right); ++ left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); ++ } else if (ra_hashint(right)) { ++ right = ra_allocref(as, ir->op2, allow); ++ left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); ++ } else { ++ left = ra_allocref(as, ir->op1, allow); ++ right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); ++ } ++ return left | (right << 8); ++} ++ ++/* -- Guard handling ------------------------------------------------------ */ ++ ++/* Setup exit stub after the end of each trace. */ ++static void asm_exitstub_setup(ASMState *as) ++{ ++ MCode *mxp = as->mctop; ++ if (as->mcp == mxp) ++ --as->mcp; ++ /* st.w TMP, sp, 0; li TMP, traceno; b ->vm_exit_handler;*/ ++ *--mxp = LAI_JIRL | RID_R0 | LAF_J(RID_R20) | 0<<10; ++// *--mxp = LAI_B | LAF_I((uintptr_t)(void *)lj_vm_exit_handler & 0xffff) | (((uintptr_t)(void *)lj_vm_exit_handler >> 16) & 0x3ff); ++ emit_dj32i(as, RID_TMP, RID_ZERO, as->T->traceno); ++ *--mxp = *as->mcp; ++ *--mxp = LAI_LU52I_D | RID_R20 | LAF_J(RID_R20) | ((((uintptr_t)(void *)lj_vm_exit_handler)>>52)&0xfff)<<10; ++ *--mxp = LAI_LU32I_D | RID_R20 | ((((uintptr_t)(void *)lj_vm_exit_handler)>>32)&0xfffff)<<5; ++ *--mxp = LAI_ORI | RID_R20| LAF_J(RID_R20) | (((uintptr_t)(void *)lj_vm_exit_handler)&0xfff) << 10; ++ *--mxp = LAI_LU12I_W | RID_R20 | ((((uintptr_t)(void *)lj_vm_exit_handler)&0xfffff000)>>12)<<5; ++ *--mxp = LAI_ST_W|LAF_D(RID_TMP)|LAF_J(RID_SP)|0; ++ as->mctop = mxp; ++} ++ ++/* Keep this in-sync with exitstub_trace_addr(). */ ++#define asm_exitstub_addr(as) ((as)->mctop) ++ ++/* Emit conditional branch to exit for guard. */ ++static void asm_guard(ASMState *as, LAIns lai, Reg rj, Reg rd) ++{ ++ MCode *target = asm_exitstub_addr(as); ++ MCode *p = as->mcp; ++ if (LJ_UNLIKELY(p == as->invmcp)) { ++ as->invmcp = NULL; ++ as->loopinv = 1; ++ as->mcp = p; ++ lai = lai ^ ((lai>>28) == 4 ? 0x00000100u : 0x04000000u); /* Invert cond. BEQ BNE BGE BLZ*/ ++ target = p; /* Patch target later in asm_loop_fixup. */ ++ } ++ if (rj == RID_TMP) { ++ emit_branch(as, lai, RID_R20, rd, target); ++ emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno); ++ /* move r18, r1*/ ++ emit_djk(as, LAI_OR, RID_R20, rj, RID_ZERO); ++ } else { ++ emit_branch(as, lai, rj, rd, target); ++ emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno); ++ } ++} ++ ++static void asm_guard21(ASMState *as, LAIns lai, Reg rj, Reg rd) ++{ ++ MCode *target = asm_exitstub_addr(as); ++ MCode *p = as->mcp; ++ if (LJ_UNLIKELY(p == as->invmcp)) { ++ as->invmcp = NULL; ++ as->loopinv = 1; ++ as->mcp = p; ++ lai = lai ^ ((lai>>28) == 4 ? 0x00000100u : 0x04000000u); /* Invert cond. BCEQZ BCNEZ*/ ++ target = p; /* Patch target later in asm_loop_fixup. */ ++ } ++ if (rj == RID_TMP) { ++ emit_branch21(as, lai, RID_R20, target); ++ emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno); ++ /* move r18, r1*/ ++ emit_djk(as, LAI_OR, RID_R20, rj, RID_ZERO); ++ } else { ++ emit_branch21(as, lai, rj, target); ++ emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno); ++ } ++} ++ ++/* -- Operand fusion ------------------------------------------------------ */ ++ ++/* Limit linear search to this distance. Avoids O(n^2) behavior. */ ++#define CONFLICT_SEARCH_LIM 31 ++ ++/* Check if there's no conflicting instruction between curins and ref. */ ++static int noconflict(ASMState *as, IRRef ref, IROp conflict) ++{ ++ IRIns *ir = as->ir; ++ IRRef i = as->curins; ++ if (i > ref + CONFLICT_SEARCH_LIM) ++ return 0; /* Give up, ref is too far away. */ ++ while (--i > ref) ++ if (ir[i].o == conflict) ++ return 0; /* Conflict found. */ ++ return 1; /* Ok, no conflict. */ ++} ++ ++/* Fuse the array base of colocated arrays. */ ++static int32_t asm_fuseabase(ASMState *as, IRRef ref) ++{ ++ IRIns *ir = IR(ref); ++ if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && ++ !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) ++ return (int32_t)sizeof(GCtab); ++ return 0; ++} ++ ++/* Fuse array/hash/upvalue reference into register+offset operand. */ ++static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) //TODO ++{ ++ IRIns *ir = IR(ref); ++ if (ra_noreg(ir->r)) { ++ if (ir->o == IR_AREF) { ++ if (mayfuse(as, ref)) { ++ if (irref_isk(ir->op2)) { ++ IRRef tab = IR(ir->op1)->op1; ++ int32_t ofs = asm_fuseabase(as, tab); ++ IRRef refa = ofs ? tab : ir->op1; ++ ofs += 8*IR(ir->op2)->i; ++ if (checki16(ofs)) { ++ *ofsp = ofs; ++ return ra_alloc1(as, refa, allow); ++ } ++ } ++ } ++ } else if (ir->o == IR_HREFK) { ++ if (mayfuse(as, ref)) { ++ int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); ++ if (checki16(ofs)) { ++ *ofsp = ofs; ++ return ra_alloc1(as, ir->op1, allow); ++ } ++ } ++ } else if (ir->o == IR_UREFC) { ++ if (irref_isk(ir->op1)) { ++ GCfunc *fn = ir_kfunc(IR(ir->op1)); ++ intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv; ++ intptr_t jgl = (intptr_t)J2G(as->J); ++ if ((uintptr_t)(ofs-jgl) < 65536) { ++ *ofsp = ofs-jgl-32768; ++ return RID_JGL; ++ } else { ++ *ofsp = (int16_t)ofs; ++ return ra_allock(as, ofs-(int16_t)ofs, allow); ++ } ++ } ++ } ++ } ++ *ofsp = 0; ++ return ra_alloc1(as, ref, allow); ++} ++ ++/* Fuse XLOAD/XSTORE reference into load/store operand. */ ++static void asm_fusexref(ASMState *as, LAIns lai, Reg rd, IRRef ref, //TODO ++ RegSet allow, int32_t ofs) ++{ ++ IRIns *ir = IR(ref); ++ Reg base; ++ if (ra_noreg(ir->r) && canfuse(as, ir)) { ++ if (ir->o == IR_ADD) { ++ intptr_t ofs2; ++ if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2), ++ checki16(ofs2))) { ++ ref = ir->op1; ++ ofs = (int32_t)ofs2; ++ } ++ } else if (ir->o == IR_STRREF) { ++ intptr_t ofs2 = 65536; ++ lua_assert(ofs == 0); ++ ofs = (int32_t)sizeof(GCstr); ++ if (irref_isk(ir->op2)) { ++ ofs2 = ofs + get_kval(as, ir->op2); ++ ref = ir->op1; ++ } else if (irref_isk(ir->op1)) { ++ ofs2 = ofs + get_kval(as, ir->op1); ++ ref = ir->op2; ++ } ++ if (!checki16(ofs2)) { ++ /* NYI: Fuse ADD with constant. */ ++ Reg right, left = ra_alloc2(as, ir, allow); ++ right = (left >> 8); left &= 255; ++ emit_dji(as, lai, rd, RID_TMP, ofs&0xfff); ++ emit_djk(as, LAI_ADD_D, RID_TMP, left, right); ++ return; ++ } ++ ofs = ofs2; ++ } ++ } ++ base = ra_alloc1(as, ref, allow); ++ emit_dji(as, lai, rd, base, ofs&0xfff); ++} ++ ++/* -- Calls --------------------------------------------------------------- */ ++ ++/* Generate a call to a C function. */ ++static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) ++{ ++ uint32_t n, nargs = CCI_XNARGS(ci); ++ int32_t ofs = 0; ++#if LJ_SOFTFP ++ Reg gpr = REGARG_FIRSTGPR; ++#else ++ Reg gpr, fpr = REGARG_FIRSTFPR; ++#endif ++ if ((void *)ci->func) ++ emit_call(as, (void *)ci->func); //TODO ++#if !LJ_SOFTFP ++ for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) ++ as->cost[gpr] = REGCOST(~0u, ASMREF_L); ++ gpr = REGARG_FIRSTGPR; ++#endif ++ for (n = 0; n < nargs; n++) { /* Setup args. */ ++ IRRef ref = args[n]; ++ if (ref) { ++ IRIns *ir = IR(ref); ++#if !LJ_SOFTFP ++ if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR && ++ !(ci->flags & CCI_VARARG)) { ++ lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ ++ ra_leftov(as, fpr, ref); ++ fpr += 1; ++ } else ++#endif ++ { ++ if (gpr <= REGARG_LASTGPR) { ++ lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ ++#if !LJ_SOFTFP ++ if (irt_isfp(ir->t)) { ++ RegSet of = as->freeset; ++ Reg r; ++ /* Workaround to protect argument GPRs from being used for remat. */ ++ as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1); ++ r = ra_alloc1(as, ref, RSET_FPR); ++ as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); ++ if (irt_isnum(ir->t)) { ++ emit_dj(as, LAI_MOVFR2GR_D, gpr, r); ++ gpr++; ++ } else if (irt_isfloat(ir->t)) { ++ emit_dj(as, LAI_MOVFR2GR_S, gpr, r); ++ gpr++; ++ } ++ } else ++#endif ++ { ++ ra_leftov(as, gpr, ref); ++ gpr++; ++ } ++ } else { ++ Reg r = ra_alloc1(as, ref, !LJ_SOFTFP && irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); ++ emit_spstore(as, ir, r, ofs); ++ ofs += 8; ++ } ++ } ++ } else { ++#if !LJ_SOFTFP ++ fpr = REGARG_LASTFPR+1; ++#endif ++ if (gpr <= REGARG_LASTGPR) { ++ gpr++; ++ } else { ++ ofs += 8; ++ } ++ } ++ checkmclim(as); ++ } ++} ++ ++/* Setup result reg/sp for call. Evict scratch regs. */ ++static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) ++{ ++ RegSet drop = RSET_SCRATCH; ++#if !LJ_SOFTFP ++ if ((ci->flags & CCI_NOFPRCLOBBER)) ++ drop &= ~RSET_FPR; ++#endif ++ if (ra_hasreg(ir->r)) ++ rset_clear(drop, ir->r); /* Dest reg handled below. */ ++ ra_evictset(as, drop); /* Evictions must be performed first. */ ++ if (ra_used(ir)) { ++ lua_assert(!irt_ispri(ir->t)); ++ if (!LJ_SOFTFP && irt_isfp(ir->t)) { ++ if ((ci->flags & CCI_CASTU64)) { ++ int32_t ofs = sps_scale(ir->s); ++ Reg dest = ir->r; ++ if (ra_hasreg(dest)) { ++ ra_free(as, dest); ++ ra_modified(as, dest); ++ emit_dj(as, LAI_MOVGR2FR_D, dest, RID_RET); ++ } ++ if (ofs) { ++ //emit_dji(as, LAI_ST_D, RID_RET, RID_SP, ofs); //TODO ofs&0xfff? ++ emit_djk(as, LAI_STX_D, RID_RET, RID_SP, RID_R19); ++ emit_d16i(as, RID_R19, ofs); ++ } ++ } else { ++ ra_destreg(as, ir, RID_FPRET); ++ } ++ } else { ++ ra_destreg(as, ir, RID_RET); ++ } ++ } ++} ++ ++static void asm_callx(ASMState *as, IRIns *ir) ++{ ++ IRRef args[CCI_NARGS_MAX*2]; ++ CCallInfo ci; ++ IRRef func; ++ IRIns *irf; ++ ci.flags = asm_callx_flags(as, ir); ++ asm_collectargs(as, ir, &ci, args); ++ asm_setupresult(as, ir, &ci); ++ func = ir->op2; irf = IR(func); ++ if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } ++ if (irref_isk(func)) { /* Call to constant address. */ ++ ci.func = (ASMFunction)(void *)get_kval(as, func); ++ } else { /* Need specific register for indirect calls. */ ++ Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); ++ MCode *p = as->mcp; ++ *--p = LAI_JIRL | LAF_D(RID_RA) | LAF_J(r); ++ *--p = LAI_MOVE | LAF_D(RID_CFUNCADDR) | LAF_J(r); ++ //*--p = LAI_JIRL | LAF_D(RID_RA) | LAF_J(r); ++ as->mcp = p; ++ ci.func = (ASMFunction)(void *)0; ++ } ++ asm_gencall(as, &ci, args); ++} ++ ++#if !LJ_SOFTFP ++static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) ++{ ++ /* The modified regs must match with the *.dasc implementation. */ ++ RegSet drop = RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| ++ RID2RSET(RID_F23)|RID2RSET(RID_F10)|RID2RSET(REGARG_FIRSTFPR) ++ |RID2RSET(RID_F19); ++ if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); ++ ra_evictset(as, drop); ++ ra_destreg(as, ir, RID_FPRET); ++ emit_call(as, (void *)lj_ir_callinfo[id].func); ++ ra_leftov(as, REGARG_FIRSTFPR, ir->op1); ++} ++#endif ++ ++/* -- Returns ------------------------------------------------------------- */ ++ ++/* Return to lower frame. Guard that it goes to the right spot. */ ++static void asm_retf(ASMState *as, IRIns *ir) ++{ ++ Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); ++ void *pc = ir_kptr(IR(ir->op2)); ++ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); ++ as->topslot -= (BCReg)delta; ++ if ((int32_t)as->topslot < 0) as->topslot = 0; ++ irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ ++ emit_setgl(as, base, jit_base); ++ emit_addptr(as, base, -8*delta); ++ asm_guard(as, LAI_BNE, RID_TMP, ++ ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base))); ++ emit_dji(as, LAI_LD_D, RID_TMP, base, -8&0xfff); ++} ++ ++/* -- Buffer operations --------------------------------------------------- */ ++ ++#if LJ_HASBUFFER ++static void asm_bufhdr_write(ASMState *as, Reg sb) ++{ ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); ++ IRIns irgc; ++ irgc.ot = IRT(0, IRT_PGC); /* GC type. */ ++ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); ++ if ((as->flags & JIT_F_GS464V)) { ++ emit_djml(as, LJ_64? LAI_BSTRINS_D : LAI_BSTRINS_W, RID_TMP, tmp, lj_fls(SBUF_MASK_FLAG), 0); ++ } else { ++ emit_djk(as, LAI_OR, RID_TMP, RID_TMP, tmp); ++ emit_dji(as, LAI_ANDI, tmp, tmp, SBUF_MASK_FLAG); ++ } ++ emit_getgl(as, RID_TMP, cur_L); ++ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); ++} ++#endif ++ ++/* -- Type conversions ---------------------------------------------------- */ ++ ++#if !LJ_SOFTFP ++static void asm_tointg(ASMState *as, IRIns *ir, Reg left) ++{ ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ //asm_guard21(as, LAI_BCEQZ, tmp&7, (tmp&7)); ++ asm_guard21(as, LAI_BCEQZ, 0, (tmp&7)); ++ //emit_djk(as, LAI_FCMP_CEQ_D, tmp&7, tmp, left); ++ emit_djk(as, LAI_FCMP_CEQ_D, 0, tmp, left); ++ emit_dj(as, LAI_FFINT_D_W, tmp, tmp); ++ emit_dj(as, LAI_MOVFR2GR_S, dest, tmp); ++ emit_dj(as, LAI_FTINT_W_D, tmp, left); ++} ++ ++static void asm_tobit(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_FPR; ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, allow); ++ Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); ++ Reg tmp = ra_scratch(as, rset_clear(allow, right)); ++ emit_dj(as, LAI_MOVFR2GR_S, dest, tmp); ++ emit_djk(as, LAI_FADD_D, tmp, left, right); ++} ++#elif LJ_64 /* && LJ_SOFTFP */ ++static void asm_tointg(ASMState *as, IRIns *ir, Reg r) ++{ ++ /* The modified regs must match with the *.dasc implementation. */ ++ RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)| ++ RID2RSET(RID_R12); // r1 -> r19, r12 -> r12 ++ if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); ++ ra_evictset(as, drop); ++ /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */ ++ ra_destreg(as, ir, RID_RET); ++ asm_guard(as, LAI_BNE, RID_RET+1, RID_ZERO); ++ emit_call(as, (void *)lj_ir_callinfo[IRCALL_lj_vm_tointg].func); ++ if (r == RID_NONE) ++ ra_leftov(as, REGARG_FIRSTGPR, ir->op1); ++ else if (r != REGARG_FIRSTGPR) ++ emit_move(as, REGARG_FIRSTGPR, r); ++} ++ ++static void asm_tobit(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ emit_dju(as, LAI_SLLI_W, dest, dest, 0); ++ asm_callid(as, ir, IRCALL_lj_vm_tobit); ++} ++#endif ++ ++static void asm_conv(ASMState *as, IRIns *ir) ++{ ++ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); ++ int stfp = (st == IRT_NUM || st == IRT_FLOAT); ++ int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); ++ IRRef lref = ir->op1; ++ lua_assert(irt_type(ir->t) != st); ++#if !LJ_SOFTFP ++ if (irt_isfp(ir->t)) { ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ if (stfp) { /* FP to FP conversion. */ ++ emit_dj(as, st == IRT_NUM ? LAI_FCVT_S_D : LAI_FCVT_D_S, ++ dest, ra_alloc1(as, lref, RSET_FPR)); ++ } else if (st == IRT_U32) { /* U32 to FP conversion. */ ++ /* y = (x ^ 0x8000000) + 2147483648.0 */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); ++ if (irt_isfloat(ir->t)) ++ emit_dj(as, LAI_FCVT_S_D, dest, dest); ++ /* Must perform arithmetic with doubles to keep the precision. */ ++ emit_djk(as, LAI_FADD_D, dest, dest, tmp); ++ emit_dj(as, LAI_FFINT_D_W, dest, dest); ++ emit_lsptr(as, LAI_FLD_D, (tmp & 31), //TODO emit_lsptr ++ (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); ++ emit_dj(as, LAI_MOVGR2FR_W, RID_TMP, dest); ++ emit_djk(as, LAI_XOR, RID_TMP, RID_TMP, left); ++ emit_dji(as, LAI_ADDU16I_D, RID_TMP, RID_R0, 0x8000); ++#if LJ_64 ++ } else if(st == IRT_U64) { /* U64 to FP conversion. */ ++ /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); ++ MCLabel l_end = emit_label(as); ++ if (irt_isfloat(ir->t)) { ++ emit_djk(as, LAI_FADD_S, dest, dest, tmp); ++ emit_lsptr(as, LAI_FLD_S, (tmp & 31), (void *)&as->J->k32[LJ_K32_2P63], ++ rset_exclude(RSET_GPR, left)); ++ emit_branch(as, LAI_BGE, left, RID_ZERO, l_end); ++ emit_dj(as, LAI_FFINT_S_L, dest, dest); ++ } else { ++ emit_djk(as, LAI_FADD_D, dest, dest, tmp); ++ emit_lsptr(as, LAI_FLD_D, (tmp & 31), (void *)&as->J->k64[LJ_K64_2P63], ++ rset_exclude(RSET_GPR, left)); ++ emit_branch(as, LAI_BGE, left, RID_ZERO, l_end); ++ emit_dj(as, LAI_FFINT_D_L, dest, dest); ++ } ++ //emit_branch(as, LAI_BGE, left, RID_ZERO, l_end); //TODO ++ emit_dj(as, LAI_MOVGR2FR_D, RID_TMP, dest); ++ emit_djml(as, LAI_BSTRPICK_D, RID_TMP, left, 62, 0); ++#endif ++ } else { /* Integer to FP conversion. */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ LAIns lai = irt_isfloat(ir->t) ? ++ (st64 ? LAI_FFINT_S_L : LAI_FFINT_S_W) : ++ (st64 ? LAI_FFINT_D_L : LAI_FFINT_D_W); ++ emit_dj(as, lai, dest, dest); ++ emit_dj(as, st64 ? LAI_MOVGR2FR_D : LAI_MOVGR2FR_W, dest, left); ++ } ++ } else if (stfp) { /* FP to integer conversion. */ ++ if (irt_isguard(ir->t)) { ++ /* Checked conversions are only supported from number to int. */ ++ lua_assert(irt_isint(ir->t) && st == IRT_NUM); ++ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, lref, RSET_FPR); ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); ++ if (irt_isu32(ir->t)) { /* FP to U32 conversion. */ ++ /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ ++ emit_djk(as, LAI_XOR, dest, dest, RID_TMP); ++ emit_dji(as, LAI_ADDU16I_D, RID_TMP, RID_R0, 0x8000); ++ emit_dj(as, LAI_MOVFR2GR_S, dest, tmp); ++ emit_dj(as, st == IRT_FLOAT ? LAI_FTINTRM_W_S : LAI_FTINTRM_W_D, ++ tmp, tmp); ++ emit_djk(as, st == IRT_FLOAT ? LAI_FSUB_S : LAI_FSUB_D, ++ tmp, left, tmp); ++ if (st == IRT_FLOAT) ++ emit_lsptr(as, LAI_FLD_S, (tmp & 31), ++ (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); ++ else ++ emit_lsptr(as, LAI_FLD_D, (tmp & 31), ++ (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); ++#if LJ_64 ++ } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */ ++ MCLabel l_end; ++ emit_dj(as, LAI_MOVFR2GR_D, dest, tmp); ++ l_end = emit_label(as); ++ /* For inputs >= 2^63 add -2^64 and convert again. */ ++ if (st == IRT_NUM) { ++ emit_dj(as, LAI_FTINTRZ_L_D, tmp, tmp); ++ emit_djk(as, LAI_FADD_D, tmp, left, tmp); ++ emit_lsptr(as, LAI_FLD_D, (tmp & 31), ++ (void *)&as->J->k64[LJ_K64_M2P64], ++ rset_exclude(RSET_GPR, dest)); ++ //emit_dj(as, LAI_FTINTRZ_L_D, tmp, left); /* Delay slot. */ //TODO ++ //emit_branch21(as, LAI_BCNEZ, (left&7), l_end); ++ emit_branch21(as, LAI_BCNEZ, 0, l_end); ++ emit_dj(as, LAI_FTINTRZ_L_D, tmp, left); ++ //emit_djk(as, LAI_FCMP_CLT_D, left&7, left, tmp); // TODO ++ emit_djk(as, LAI_FCMP_CLT_D, 0, left, tmp); ++ emit_lsptr(as, LAI_FLD_D, (tmp & 31), ++ (void *)&as->J->k64[LJ_K64_2P63], ++ rset_exclude(RSET_GPR, dest)); ++ } else { ++ emit_dj(as, LAI_FTINTRZ_L_S, tmp, tmp); ++ emit_djk(as, LAI_FADD_S, tmp, left, tmp); ++ emit_lsptr(as, LAI_FLD_S, (tmp & 31), ++ (void *)&as->J->k32[LJ_K32_M2P64], ++ rset_exclude(RSET_GPR, dest)); ++ //emit_dj(as, LAI_FTINTRZ_L_S, tmp, left); /* Delay slot. */ //TODO ++ //emit_branch21(as, LAI_BCNEZ, (left&7), l_end); ++ emit_branch21(as, LAI_BCNEZ, 0, l_end); ++ emit_dj(as, LAI_FTINTRZ_L_S, tmp, left); ++ //emit_djk(as, LAI_FCMP_CLT_S, left&7, left, tmp); // TODO ++ emit_djk(as, LAI_FCMP_CLT_S, 0, left, tmp); ++ emit_lsptr(as, LAI_FLD_S, (tmp & 31), ++ (void *)&as->J->k32[LJ_K32_2P63], ++ rset_exclude(RSET_GPR, dest)); ++ } ++#endif ++ } else { ++ LAIns lai = irt_is64(ir->t) ? ++ (st == IRT_NUM ? LAI_FTINTRZ_L_D : LAI_FTINTRZ_L_S) : ++ (st == IRT_NUM ? LAI_FTINTRZ_W_D : LAI_FTINTRZ_W_S); ++ emit_dj(as, irt_is64(ir->t) ? LAI_MOVFR2GR_D : LAI_MOVFR2GR_S, dest, left); ++ emit_dj(as, lai, left, left); ++ } ++ } ++ } else ++#else ++ if (irt_isfp(ir->t)) { ++#if LJ_64 && LJ_HASFFI ++ if (stfp) { /* FP to FP conversion. */ ++ asm_callid(as, ir, irt_isnum(ir->t) ? IRCALL_softfp_f2d : ++ IRCALL_softfp_d2f); ++ } else { /* Integer to FP conversion. */ ++ IRCallID cid = ((IRT_IS64 >> st) & 1) ? ++ (irt_isnum(ir->t) ? ++ (st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d) : ++ (st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f)) : ++ (irt_isnum(ir->t) ? ++ (st == IRT_INT ? IRCALL_softfp_i2d : IRCALL_softfp_ui2d) : ++ (st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f)); ++ asm_callid(as, ir, cid); ++ } ++#else ++ asm_callid(as, ir, IRCALL_softfp_i2d); ++#endif ++ } else if (stfp) { /* FP to integer conversion. */ ++ if (irt_isguard(ir->t)) { ++ /* Checked conversions are only supported from number to int. */ ++ lua_assert(irt_isint(ir->t) && st == IRT_NUM); ++ asm_tointg(as, ir, RID_NONE); ++ } else { ++ IRCallID cid = irt_is64(ir->t) ? ++ ((st == IRT_NUM) ? ++ (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) : ++ (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) : ++ ((st == IRT_NUM) ? ++ (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : ++ (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)); ++ asm_callid(as, ir, cid); ++ } ++ } else ++#endif ++ { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ ++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); ++ lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); ++ if ((ir->op2 & IRCONV_SEXT)) { ++ emit_dj(as, st == IRT_I8 ? LAI_EXT_W_B : LAI_EXT_W_H, dest, left); ++ } else { ++ if (st == IRT_U8) { ++ emit_dju(as, LAI_ANDI, dest, left, (int32_t)0xff); ++ } else { ++ emit_djk(as, LAI_AND, dest, left, RID_R20); ++ //emit_dj32i(as, RID_R20, RID_R0, 0xffff); ++ emit_djml(as, LAI_BSTRPICK_D, RID_R20, RID_R20, 15, 0); // zero-extend ++ emit_d16i(as, RID_R20, 0xffff); ++ } ++ } ++ } else { /* 32/64 bit integer conversions. */ ++ if (irt_is64(ir->t)) { ++ if (st64) { ++ /* 64/64 bit no-op (cast)*/ ++ ra_leftov(as, dest, lref); ++ } else { ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */ ++ emit_dju(as, LAI_SLLI_W, dest, left, 0); ++ } else { /* 32 to 64 bit zero extension. */ ++ emit_djml(as, LAI_BSTRPICK_D, dest, left, 31, 0); ++ } ++ } ++ } else { ++ if (st64) { ++ /* This is either a 32 bit reg/reg mov which zeroes the hiword ++ ** or a load of the loword from a 64 bit address. ++ */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ emit_djml(as, LAI_BSTRPICK_D, dest, left, 31, 0); ++ } else { /* 32/32 bit no-op (cast). */ ++ /* Do nothing, but may need to move regs. */ ++ ra_leftov(as, dest, lref); ++ } ++ } ++ } ++ } ++} ++ ++static void asm_strto(ASMState *as, IRIns *ir) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; ++ IRRef args[2]; ++ int32_t ofs = 0; ++ RegSet drop = RSET_SCRATCH; ++ if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ ++ ra_evictset(as, drop); ++ ofs = sps_scale(ir->s); ++ asm_guard(as, LAI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ ++ args[0] = ir->op1; /* GCstr *str */ ++ args[1] = ASMREF_TMP1; /* TValue *n */ ++ asm_gencall(as, ci, args); ++ /* Store the result to the spill slot or temp slots. */ ++ //emit_dji(as, LAI_ADDI_D, ra_releasetmp(as, ASMREF_TMP1), ++ // RID_SP, ofs&0xfff); ++ emit_djk(as, LAI_ADD_D, ra_releasetmp(as, ASMREF_TMP1), RID_SP, RID_R19); ++ emit_d16i(as, RID_R19, ofs); ++} ++ ++/* -- Memory references --------------------------------------------------- */ ++ ++#if LJ_64 ++/* Store tagged value for ref at base+ofs. */ ++static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) ++{ ++ RegSet allow = rset_exclude(RSET_GPR, base); ++ IRIns *ir = IR(ref); ++ lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); ++ if (irref_isk(ref)) { ++ TValue k; ++ lj_ir_kvalue(as->J->L, &k, ir); ++ //emit_dji(as, LAI_ST_D, ra_allock(as, (int64_t)k.u64, allow), base, ofs&0xfff); ++ emit_djk(as, LAI_STX_D, ra_allock(as, (int64_t)k.u64, allow), base, RID_R19); ++ emit_d16i(as, RID_R19, ofs); ++ } else { ++ Reg src = ra_alloc1(as, ref, allow); ++ Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, ++ rset_exclude(allow, src)); ++ //emit_dji(as, LAI_ST_D, RID_TMP, base, ofs&0xfff); ++ emit_djk(as, LAI_STX_D, RID_TMP, base, RID_R19); ++ emit_d16i(as, RID_R19, ofs); ++ if (irt_isinteger(ir->t)) { ++ emit_djk(as, LAI_ADD_D, RID_TMP, RID_TMP, type); ++ emit_djml(as, LAI_BSTRPICK_D, RID_TMP, src, 31, 0); ++ } else { ++ emit_djk(as, LAI_ADD_D, RID_TMP, src, type); ++ } ++ } ++} ++#endif ++ ++/* Get pointer to TValue. */ ++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) ++{ ++ IRIns *ir = IR(ref); ++ if (irt_isnum(ir->t)) { ++ if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ ++ ra_allockreg(as, igcptr(ir_knum(ir)), dest); ++ else { /* Otherwise force a spill and use the spill slot. */ ++ emit_djk(as, LAI_ADD_D, dest, RID_SP, RID_R19); ++ emit_d16i(as, RID_R19, ra_spill(as, ir)); ++ } ++ } else { ++ /* Otherwise use g->tmptv to hold the TValue. */ ++ asm_tvstore64(as, dest, 0, ref); ++ emit_djk(as, LAI_ADD_D, dest, RID_JGL, RID_R19); ++ emit_d16i(as, RID_R19, (int32_t)(offsetof(global_State, tmptv)-32768)); ++ } ++} ++ ++static void asm_aref(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg idx, base; ++ if (irref_isk(ir->op2)) { ++ IRRef tab = IR(ir->op1)->op1; ++ int32_t ofs = asm_fuseabase(as, tab); ++ IRRef refa = ofs ? tab : ir->op1; ++ ofs += 8*IR(ir->op2)->i; ++ if (checki16(ofs)) { ++ base = ra_alloc1(as, refa, RSET_GPR); ++ //emit_dj32i(as, dest, base, ofs); //TODO ++ emit_djk(as, LAI_ADD_D, dest, base, RID_R19); ++ emit_d16i(as, RID_R19, ofs); ++ return; ++ } ++ } ++ base = ra_alloc1(as, ir->op1, RSET_GPR); ++ idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); ++ // emit_djka(as, LAI_ALSL_D, dest, idx, base, 2); ++ emit_djk(as, LAI_ADD_D, dest, RID_TMP, base); ++ emit_dju(as, LAI_SLLI_D, RID_TMP, idx, 3); ++} ++ ++/* Inlined hash lookup. Specialized for key type and for const keys. ++** The equivalent C code is: ++** Node *n = hashkey(t, key); ++** do { ++** if (lj_obj_equal(&n->key, key)) return &n->val; ++** } while ((n = nextnode(n))); ++** return niltv(L); ++*/ ++static void asm_href(ASMState *as, IRIns *ir, IROp merge) ++{ ++ RegSet allow = RSET_GPR; ++ int destused = ra_used(ir); ++ Reg dest = ra_dest(as, ir, allow); ++ Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); ++ Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; ++ Reg cmp64 = RID_NONE; ++ IRRef refkey = ir->op2; ++ IRIns *irkey = IR(refkey); ++ int isk = irref_isk(refkey); ++ IRType1 kt = irkey->t; ++ uint32_t khash; ++ MCLabel l_end, l_loop, l_next; ++ ++ rset_clear(allow, tab); ++ if (!LJ_SOFTFP && irt_isnum(kt)) { ++ key = ra_alloc1(as, refkey, RSET_FPR); ++ tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); ++ } else if (!irt_ispri(kt)) { ++ key = ra_alloc1(as, refkey, allow); ++ rset_clear(allow, key); ++ } ++ tmp2 = ra_scratch(as, allow); ++ rset_clear(allow, tmp2); ++ if (LJ_SOFTFP || !irt_isnum(kt)) { ++ /* Allocate cmp64 register used for 64-bit comparisons */ ++ if (LJ_SOFTFP && irt_isnum(kt)) { ++ cmp64 = key; ++ } else if (!isk && irt_isaddr(kt)) { ++ cmp64 = tmp2; ++ } else { ++ int64_t k; ++ if (isk && irt_isaddr(kt)) { ++ k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; ++ } else { ++ lua_assert(irt_ispri(kt) && !irt_isnil(kt)); ++ k = ~((int64_t)~irt_toitype(ir->t) << 47); ++ } ++ cmp64 = ra_allock(as, k, allow); ++ rset_clear(allow, cmp64); ++ } ++ } ++ ++ /* Key not found in chain: jump to exit (if merged) or load niltv. */ ++ l_end = emit_label(as); ++ as->invmcp = NULL; ++ if (merge == IR_NE) ++ asm_guard(as, LAI_BEQ, RID_ZERO, RID_ZERO); ++ else if (destused) ++ emit_loada(as, dest, niltvg(J2G(as->J))); ++ /* Follow hash chain until the end. */ ++ l_loop = --as->mcp; ++ emit_move(as, dest, tmp1); ++ //emit_dji(as, LAI_LD_D, tmp1, dest, ((int32_t)offsetof(Node, next))&0xfff); //TODO si12 ++ emit_djk(as, LAI_LDX_D, tmp1, dest, RID_R19); ++ emit_d16i(as, RID_R19, (int32_t)offsetof(Node, next)); ++ l_next = emit_label(as); ++ ++ /* Type and value comparison. */ ++ if (merge == IR_EQ) { /* Must match asm_guard(). */ ++ //emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno); //TODO ++ l_end = asm_exitstub_addr(as); ++ } ++ if (!LJ_SOFTFP && irt_isnum(kt)) { ++ //emit_branch21(as, LAI_BCNEZ, (tmpnum&7), l_end); ++ emit_branch21(as, LAI_BCNEZ, 0, l_end); ++ emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno); ++ //emit_djk(as, LAI_FCMP_CEQ_D, tmpnum&7, tmpnum, key); // TODO ++ emit_djk(as, LAI_FCMP_CEQ_D, 0, tmpnum, key); ++ *--as->mcp = LAI_NOP; /* Avoid NaN comparison overhead. */ ++ emit_branch(as, LAI_BEQ, tmp1, RID_ZERO, l_next); ++ //emit_dji(as, LAI_SLTUI, tmp1, tmp1, ((int32_t)LJ_TISNUM)&0xfff); ++ emit_djk(as, LAI_SLTU, tmp1, tmp1, RID_R19); ++ emit_d16i(as, RID_R19, (int32_t)LJ_TISNUM); ++ emit_dju(as, LAI_SRAI_D, tmp1, tmp1, 47); ++ emit_dj(as, LAI_MOVGR2FR_D, tmpnum, tmp1); ++ //emit_dji(as, LAI_LD_D, tmp1, dest, ((int32_t)offsetof(Node, key.u64))&0xfff); ++ emit_djk(as, LAI_LDX_D, tmp1, dest, RID_R19); ++ } else { ++ emit_branch(as, LAI_BEQ, RID_R20, cmp64, l_end); ++ emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno); ++ emit_djk(as, LAI_OR, RID_R20, RID_R0, tmp1); ++ //emit_dji(as, LAI_LD_D, tmp1, dest, ((int32_t)offsetof(Node, key.u64))&0xfff); ++ emit_djk(as, LAI_LDX_D, tmp1, dest, RID_R19); ++ } ++ emit_d16i(as, RID_R19, (int32_t)offsetof(Node, key.u64)); ++ // *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); ++ *l_loop = LAI_BNE | LAF_J(tmp1) | LAF_D(RID_ZERO) | LAF_I(((as->mcp-l_loop) & 0xffffu)); ++ if (!isk && irt_isaddr(kt)) { ++ type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow); ++ emit_djk(as, LAI_ADD_D, tmp2, key, type); ++ rset_clear(allow, type); ++ } ++ ++ /* Load main position relative to tab->node into dest. */ ++ khash = isk ? ir_khash(irkey) : 1; ++ if (khash == 0) { ++ //emit_dji(as, LAI_LD_D, dest, tab, ((int32_t)offsetof(GCtab, node))&0xfff); ++ emit_djk(as, LAI_LDX_D, dest, tab, RID_R19); ++ emit_d16i(as, RID_R19, (int32_t)offsetof(GCtab, node)); ++ } else { ++ Reg tmphash = tmp1; ++ if (isk) ++ tmphash = ra_allock(as, khash, allow); ++ emit_djk(as, LAI_ADD_D, dest, dest, tmp1); ++ lua_assert(sizeof(Node) == 24); ++ emit_djk(as, LAI_SUB_W, tmp1, tmp2, tmp1); ++ emit_dju(as, LAI_SLLI_W, tmp1, tmp1, 3); ++ emit_dju(as, LAI_SLLI_W, tmp2, tmp1, 5); ++ emit_djk(as, LAI_AND, tmp1, tmp2, tmphash); ++ emit_dji(as, LAI_LD_D, dest, tab, ((int32_t)offsetof(GCtab, node))&0xfff); ++ emit_dji(as, LAI_LD_W, tmp2, tab, ((int32_t)offsetof(GCtab, hmask))&0xfff); ++ if (isk) { ++ /* Nothing to do. */ ++ } else if (irt_isstr(kt)) { ++ emit_dji(as, LAI_LD_W, tmp1, key, ((int32_t)offsetof(GCstr, hash))&0xfff); ++ } else { /* Must match with hash*() in lj_tab.c. */ ++ emit_djk(as, LAI_SUB_W, tmp1, tmp1, tmp2); ++ emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31); //TODO ++ emit_djk(as, LAI_XOR, tmp1, tmp1, tmp2); ++ emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31); //TODO ++ emit_djk(as, LAI_SUB_W, tmp2, tmp2, dest); ++ emit_djk(as, LAI_XOR, tmp2, tmp2, tmp1); ++ emit_dju(as, LAI_ROTRI_W, dest, tmp1, (-HASH_ROT1)&31); ++ if (irt_isnum(kt)) { ++ emit_djk(as, LAI_ADD_W, tmp1, tmp1, tmp1); ++ emit_dju(as, LAI_SRAI_D, tmp1, LJ_SOFTFP ? key : tmp1, 32); ++ emit_dju(as, LAI_SLLI_W, tmp2, LJ_SOFTFP ? key : tmp1, 0); ++#if !LJ_SOFTFP ++ emit_dj(as, LAI_MOVFR2GR_D, tmp1, key); ++#endif ++ } else { ++ checkmclim(as); ++ emit_dju(as, LAI_SRAI_D, tmp1, tmp1, 32); ++ emit_dju(as, LAI_SLLI_W, tmp2, key, 0); ++ emit_djk(as, LAI_ADD_D, tmp1, key, type); ++ } ++ } ++ } ++} ++ ++static void asm_hrefk(ASMState *as, IRIns *ir) ++{ ++ IRIns *kslot = IR(ir->op2); ++ IRIns *irkey = IR(kslot->op1); ++ int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); ++ int32_t kofs = ofs + (int32_t)offsetof(Node, key); ++ Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; //TODO ++ Reg node = ra_alloc1(as, ir->op1, RSET_GPR); ++ RegSet allow = rset_exclude(RSET_GPR, node); ++ Reg idx = node; ++ Reg key = ra_scratch(as, allow); ++ int64_t k; ++ lua_assert(ofs % sizeof(Node) == 0); ++ if (ofs > 32736) { //TODO why 32736 ? ++ idx = dest; ++ rset_clear(allow, dest); ++ kofs = (int32_t)offsetof(Node, key); ++ } else if (ra_hasreg(dest)) { ++ // emit_dj32i(as, dest, node, ofs); ++ //emit_add(as, dest, node, ofs); ++ emit_djk(as, LAI_ADD_D, dest, node, RID_R19); ++ emit_d16i(as, RID_R19, ofs); ++ } ++ if (irt_ispri(irkey->t)) { ++ lua_assert(!irt_isnil(irkey->t)); ++ k = ~((int64_t)~irt_toitype(irkey->t) << 47); ++ } else if (irt_isnum(irkey->t)) { ++ k = (int64_t)ir_knum(irkey)->u64; ++ } else { ++ k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); ++ } ++ asm_guard(as, LAI_BNE, key, ra_allock(as, k, allow)); ++ //emit_dji(as, LAI_LD_D, key, idx, kofs&0xfff); //TODO si12 ++ emit_djk(as, LAI_LDX_D, key, idx, RID_R19); ++ emit_d16i(as, RID_R19, kofs); ++ if (ofs > 32736) ++ emit_djk(as, LAI_ADD_D, dest, node, ra_allock(as, ofs, allow)); ++} ++ ++static void asm_uref(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ if (irref_isk(ir->op1)) { ++ GCfunc *fn = ir_kfunc(IR(ir->op1)); ++ MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; ++ emit_lsptr(as, LAI_LD_D, dest, v, RSET_GPR); ++ } else { ++ Reg uv = ra_scratch(as, RSET_GPR); ++ Reg func = ra_alloc1(as, ir->op1, RSET_GPR); ++ if (ir->o == IR_UREFC) { ++ asm_guard(as, LAI_BEQ, RID_TMP, RID_ZERO); ++ emit_dji(as, LAI_ADDI_D, dest, uv, ((int32_t)offsetof(GCupval, tv))&0xfff); //TODO si12 ++ emit_dji(as, LAI_LD_BU, RID_TMP, uv, ((int32_t)offsetof(GCupval, closed))&0xfff); ++ } else { ++ emit_dji(as, LAI_LD_D, dest, uv, ((int32_t)offsetof(GCupval, v))&0xfff); ++ } ++ //emit_dji(as, LAI_LD_D, uv, func, ((int32_t)offsetof(GCfuncL, uvptr) + ++ // (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8))&0xfff); ++ emit_djk(as, LAI_LDX_D, uv, func, RID_R19); ++ emit_d16i(as, RID_R19, (int32_t)offsetof(GCfuncL, uvptr) + ++ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); ++ } ++} ++ ++static void asm_fref(ASMState *as, IRIns *ir) ++{ ++ UNUSED(as); UNUSED(ir); ++ lua_assert(!ra_used(ir)); ++} ++ ++static void asm_strref(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_GPR; ++ Reg dest = ra_dest(as, ir, allow); ++ Reg base = ra_alloc1(as, ir->op1, allow); ++ IRIns *irr = IR(ir->op2); ++ int32_t ofs = sizeof(GCstr); ++ rset_clear(allow, base); ++ if (irref_isk(ir->op2) && checki16(ofs + irr->i)) { //TODO checki16 ++ // emit_tsi(as, MIPSI_DADDIU, dest, base, ofs + irr->i); ++ //emit_dj32i(as, dest, base, ofs + irr->i); ++ emit_djk(as, LAI_ADD_D, dest, base, RID_R19); ++ emit_d16i(as, RID_R19, (ofs + irr->i)); ++ } else { ++ // emit_tsi(as, MIPSI_DADDIU, dest, dest, ofs); ++ //emit_dj32i(as, dest, dest, ofs); //TODO ++ emit_djk(as, LAI_ADD_D, dest, dest, RID_R19); ++ emit_d16i(as, RID_R19, ofs); ++ emit_djk(as, LAI_ADD_D, dest, base, ra_alloc1(as, ir->op2, allow)); ++ } ++} ++ ++/* -- Loads and stores ---------------------------------------------------- */ ++ ++static LAIns asm_fxloadins(ASMState *as, IRIns *ir) ++{ ++ UNUSED(as); ++ switch (irt_type(ir->t)) { ++ case IRT_I8: return LAI_LD_B; ++ case IRT_U8: return LAI_LD_BU; ++ case IRT_I16: return LAI_LD_H; ++ case IRT_U16: return LAI_LD_HU; ++ case IRT_NUM: ++ lua_assert(!LJ_SOFTFP32); ++ if (!LJ_SOFTFP) return LAI_FLD_D; ++ /* fallthrough */ ++ case IRT_FLOAT: if (!LJ_SOFTFP) return LAI_FLD_S; ++ /* fallthrough */ ++ default: return (LJ_64 && irt_is64(ir->t)) ? LAI_LD_D : LAI_LD_W; ++ } ++} ++ ++static LAIns asm_fxstoreins(ASMState *as, IRIns *ir) ++{ ++ UNUSED(as); ++ switch (irt_type(ir->t)) { ++ case IRT_I8: case IRT_U8: return LAI_ST_B; ++ case IRT_I16: case IRT_U16: return LAI_ST_H; ++ case IRT_NUM: ++ lua_assert(!LJ_SOFTFP32); ++ if (!LJ_SOFTFP) return LAI_FST_D; ++ /* fallthrough */ ++ case IRT_FLOAT: if (!LJ_SOFTFP) return LAI_FST_S; ++ /* fallthrough */ ++ default: return (LJ_64 && irt_is64(ir->t)) ? LAI_ST_D : LAI_ST_W; ++ } ++} ++ ++static void asm_fload(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ LAIns lai = asm_fxloadins(as, ir); ++ Reg idx; ++ int32_t ofs; ++ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ ++ idx = RID_JGL; ++ ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); //TODO ++ } else { ++ idx = ra_alloc1(as, ir->op1, RSET_GPR); ++ if (ir->op2 == IRFL_TAB_ARRAY) { ++ ofs = asm_fuseabase(as, ir->op1); ++ if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ ++ //emit_dji(as, LAI_ADDI_D, dest, idx, ofs&0xfff); ++ emit_djk(as, LAI_ADD_D, dest, idx, RID_R19); ++ emit_d16i(as, RID_R19, ofs); ++ return; ++ } ++ } ++ ofs = field_ofs[ir->op2]; ++ } ++ lua_assert(!irt_isfp(ir->t)); ++ // emit_dji(as, lai, dest, idx, ofs&0xfff); ++ /* li r17, ofs; ldx.d/w dest, idx, r17 */ ++ switch (lai) { ++ case LAI_LD_B: ++ lai = LAI_LDX_B; ++ break; ++ case LAI_LD_BU: ++ lai = LAI_LDX_BU; ++ break; ++ case LAI_LD_H: ++ lai = LAI_LDX_H; ++ break; ++ case LAI_LD_HU: ++ lai = LAI_LDX_HU; ++ break; ++ case LAI_LD_D: ++ lai = LAI_LDX_D; ++ break; ++ case LAI_LD_W: ++ lai = LAI_LDX_W; ++ break; ++ case LAI_FLD_D: ++ lai = LAI_FLDX_D; ++ break; ++ case LAI_FLD_S: ++ lai = LAI_FLDX_S; ++ break; ++ default: ++ break; ++ } ++ emit_djk(as, lai, dest, idx, RID_R19); ++ //emit_loadi(as, RID_R19, ofs); ++ emit_d16i(as, RID_R19, ofs); ++} ++ ++static void asm_fstore(ASMState *as, IRIns *ir) ++{ ++ if (ir->r != RID_SINK) { ++ Reg src = ra_alloc1(as, ir->op2, RSET_GPR); ++ IRIns *irf = IR(ir->op1); ++ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); ++ int32_t ofs = field_ofs[irf->op2]; ++ LAIns lai = asm_fxstoreins(as, ir); ++ lua_assert(!irt_isfp(ir->t)); ++ emit_dji(as, lai, src, idx, ofs&0xfff); ++ } ++} ++ ++static void asm_xload(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, ++ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); ++ lua_assert(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED)); ++ asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); ++} ++ ++static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) ++{ ++ if (ir->r != RID_SINK) { ++ Reg src = ra_alloc1(as, ir->op2, ++ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); ++ asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, ++ rset_exclude(RSET_GPR, src), ofs); ++ } ++} ++ ++#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) ++ ++static void asm_ahuvload(ASMState *as, IRIns *ir) ++{ ++ Reg dest = RID_NONE, type = RID_TMP, idx; ++ RegSet allow = RSET_GPR; ++ int32_t ofs = 0; ++ IRType1 t = ir->t; ++ ++ if (ra_used(ir)) { ++ lua_assert((irt_isnum(ir->t)) || ++ irt_isint(ir->t) || irt_isaddr(ir->t)); ++ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); ++ rset_clear(allow, dest); ++ if (irt_isaddr(t)) ++ emit_djml(as, LAI_BSTRPICK_D, dest, dest, 46, 0); //TODO 14+1+32? ++ else if (irt_isint(t)) ++ emit_dju(as, LAI_SLLI_W, dest, dest, 0); ++ } ++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow); ++ rset_clear(allow, idx); ++ if (irt_isnum(t)) { ++ asm_guard(as, LAI_BEQ, RID_TMP, RID_ZERO); ++ //emit_dji(as, LAI_SLTUI, RID_TMP, type, ((int32_t)LJ_TISNUM)&0xfff); //TODO ++ emit_djk(as, LAI_SLTU, RID_TMP, type, RID_R19); ++ emit_d16i(as, RID_R19, (int32_t)LJ_TISNUM); ++ } else { ++ asm_guard(as, LAI_BNE, type, ++ ra_allock(as, (int32_t)irt_toitype(t), allow)); ++ } ++ if (ra_hasreg(dest)) { ++ if (!LJ_SOFTFP && irt_isnum(t)) { ++ //emit_dji(as, LAI_FLD_D, dest, idx, ofs&0xfff); ++ emit_djk(as, LAI_FLDX_D, dest, idx, RID_R19); ++ emit_d16i(as, RID_R19, ofs); ++ dest = type; ++ } ++ } else { ++ dest = type; ++ } ++ emit_dju(as, LAI_SRAI_D, type, dest, (47 & 0x3f)); ++ //emit_dji(as, LAI_LD_D, dest, idx, ofs&0xfff); ++ emit_djk(as, LAI_LDX_D, dest, idx, RID_R19); ++ emit_d16i(as, RID_R19, ofs); ++} ++ ++static void asm_ahustore(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_GPR; ++ Reg idx, src = RID_NONE, type = RID_NONE; ++ int32_t ofs = 0; ++ if (ir->r == RID_SINK) ++ return; ++ if (irt_isnum(ir->t)) { ++ src = ra_alloc1(as, ir->op2, LJ_SOFTFP ? RSET_GPR : RSET_FPR); ++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow); ++ //emit_dji(as, LJ_SOFTFP ? LAI_ST_D : LAI_FST_D, src, idx, ofs&0xfff); ++ emit_djk(as, LJ_SOFTFP ? LAI_STX_D : LAI_FSTX_D, src, idx, RID_R19); ++ emit_d16i(as, RID_R19, ofs); ++ } else { ++ Reg tmp = RID_TMP; ++ if (irt_ispri(ir->t)) { ++ tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); ++ rset_clear(allow, tmp); ++ } else { ++ src = ra_alloc1(as, ir->op2, allow); ++ rset_clear(allow, src); ++ type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); ++ rset_clear(allow, type); ++ } ++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow); ++ //emit_dji(as, LAI_ST_D, tmp, idx, ofs&0xfff); ++ emit_djk(as, LAI_STX_D, tmp, idx, RID_R19); ++ emit_d16i(as, RID_R19, ofs); ++ if (ra_hasreg(src)) { ++ if (irt_isinteger(ir->t)) { ++ emit_djk(as, LAI_ADD_D, tmp, tmp, type); ++ emit_djml(as, LAI_BSTRPICK_D, tmp, src, 31, 0); //TODO ++ } else { ++ emit_djk(as, LAI_ADD_D, tmp, src, type); ++ } ++ } ++ } ++} ++ ++static void asm_sload(ASMState *as, IRIns *ir) ++{ ++ Reg dest = RID_NONE, type = RID_NONE, base; ++ RegSet allow = RSET_GPR; ++ IRType1 t = ir->t; ++ int32_t ofs = 8*((int32_t)ir->op1-2); ++ lua_assert(!(ir->op2 & IRSLOAD_PARENT)); ++ lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); ++ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { ++ dest = ra_scratch(as, LJ_SOFTFP ? allow : RSET_FPR); ++ asm_tointg(as, ir, dest); ++ t.irt = IRT_NUM; /* Continue with a regular number type check. */ ++ } else ++ if (ra_used(ir)) { ++ lua_assert((irt_isnum(ir->t)) || ++ irt_isint(ir->t) || irt_isaddr(ir->t)); ++ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); ++ rset_clear(allow, dest); ++ base = ra_alloc1(as, REF_BASE, allow); ++ rset_clear(allow, base); ++ if (ir->op2 & IRSLOAD_CONVERT) { ++ if (irt_isint(t)) { ++ Reg tmp = ra_scratch(as, LJ_SOFTFP ? RSET_GPR : RSET_FPR); ++#if LJ_SOFTFP ++ ra_evictset(as, rset_exclude(RSET_SCRATCH, dest)); ++ ra_destreg(as, ir, RID_RET); ++ emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_d2i].func); //TODO ++ if (tmp != REGARG_FIRSTGPR) ++ emit_move(as, REGARG_FIRSTGPR, tmp); ++#else ++ emit_dj(as, LAI_MOVFR2GR_S, dest, tmp); ++ emit_dj(as, LAI_FTINTRZ_W_D, tmp, tmp); ++#endif ++ dest = tmp; ++ t.irt = IRT_NUM; /* Check for original type. */ ++ } else { ++ Reg tmp = ra_scratch(as, RSET_GPR); ++#if LJ_SOFTFP ++ ra_evictset(as, rset_exclude(RSET_SCRATCH, dest)); ++ ra_destreg(as, ir, RID_RET); ++ emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_i2d].func); //TODO ++ emit_dju(as, LAI_SLLI_W, REGARG_FIRSTGPR, tmp, 0); ++#else ++ emit_dj(as, LAI_FFINT_D_W, dest, dest); ++ emit_dj(as, LAI_MOVGR2FR_W, tmp, dest); ++#endif ++ dest = tmp; ++ t.irt = IRT_INT; /* Check for original type. */ ++ } ++ } ++ else if (irt_isaddr(t)) { ++ /* Clear type from pointers. */ ++ emit_djml(as, LAI_BSTRPICK_D, dest, dest, 46, 0); ++ } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { ++ /* Sign-extend integers. */ ++ emit_dju(as, LAI_SLLI_W, dest, dest, 0); ++ } ++ goto dotypecheck; ++ } ++ base = ra_alloc1(as, REF_BASE, allow); ++ rset_clear(allow, base); ++dotypecheck: ++ if ((ir->op2 & IRSLOAD_TYPECHECK)) { ++ type = dest < RID_MAX_GPR ? dest : RID_TMP; ++ if (irt_ispri(t)) { ++ asm_guard(as, LAI_BNE, type, ++ ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow)); ++ } else { ++ if (irt_isnum(t)) { ++ asm_guard(as, LAI_BEQ, RID_TMP, RID_ZERO); ++ //emit_dji(as, LAI_SLTUI, RID_TMP, RID_TMP, ((int32_t)LJ_TISNUM)&0xfff); ++ emit_djk(as, LAI_SLTU, RID_TMP, RID_TMP, RID_R19); ++ emit_d16i(as, RID_R19, (int32_t)LJ_TISNUM); ++ if (!LJ_SOFTFP && ra_hasreg(dest)) { ++ //emit_dji(as, LAI_FLD_D, dest, base, ofs&0xfff); ++ emit_djk(as, LAI_FLDX_D, dest, base, RID_R19); ++ emit_d16i(as, RID_R19, ofs); ++ } ++ } else { ++ asm_guard(as, LAI_BNE, RID_TMP, ++ ra_allock(as, (int32_t)irt_toitype(t), allow)); ++ } ++ emit_dju(as, LAI_SRAI_D, RID_TMP, type, 47); ++ } ++ //emit_dji(as, LAI_LD_D, type, base, ofs&0xfff); ++ emit_djk(as, LAI_LDX_D, type, base, RID_R19); ++ emit_d16i(as, RID_R19, ofs); ++ } else if (ra_hasreg(dest)) { ++ if (!LJ_SOFTFP && irt_isnum(t)) { ++ emit_djk(as, LAI_FLDX_D, dest, base, RID_R19); ++ } else { ++ emit_djk(as, irt_isint(t) ? LAI_LDX_W : LAI_LDX_D, dest, base, RID_R19); ++ } ++ emit_d16i(as, RID_R19, ofs); ++ } ++} ++ ++/* -- Allocations --------------------------------------------------------- */ ++ ++#if LJ_HASFFI ++static void asm_cnew(ASMState *as, IRIns *ir) ++{ ++ CTState *cts = ctype_ctsG(J2G(as->J)); ++ CTypeID id = (CTypeID)IR(ir->op1)->i; ++ CTSize sz; ++ CTInfo info = lj_ctype_info(cts, id, &sz); ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; ++ IRRef args[4]; ++ RegSet drop = RSET_SCRATCH; ++ lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); ++ ++ as->gcsteps++; ++ if (ra_hasreg(ir->r)) ++ rset_clear(drop, ir->r); /* Dest reg handled below. */ ++ ra_evictset(as, drop); ++ if (ra_used(ir)) ++ ra_destreg(as, ir, RID_RET); /* GCcdata * */ ++ ++ /* Initialize immutable cdata object. */ ++ if (ir->o == IR_CNEWI) { ++ RegSet allow = (RSET_GPR & ~RSET_SCRATCH); ++ emit_dji(as, sz == 8 ? LAI_ST_D : LAI_ST_W, ra_alloc1(as, ir->op2, allow), ++ RID_RET, (sizeof(GCcdata))&0xfff); ++ lua_assert(sz == 4 || sz == 8); ++ } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ ++ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; ++ args[0] = ASMREF_L; /* lua_State *L */ ++ args[1] = ir->op1; /* CTypeID id */ ++ args[2] = ir->op2; /* CTSize sz */ ++ args[3] = ASMREF_TMP1; /* CTSize align */ ++ asm_gencall(as, ci, args); ++ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); ++ return; ++ } ++ ++ /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ ++ emit_dji(as, LAI_ST_B, RID_RET+1, RID_RET, (offsetof(GCcdata, gct))&0xfff); ++ emit_dji(as, LAI_ST_H, RID_TMP, RID_RET, (offsetof(GCcdata, ctypeid))&0xfff); ++ //emit_dj32i(as, RID_RET+1, 0, ~LJ_TCDATA); ++ emit_djk(as, LAI_ADD_D, RID_RET+1, 0, RID_R19); ++ emit_d16i(as, RID_R19, ~LJ_TCDATA); ++ //emit_dj32i(as, RID_TMP, 0, id); /* Lower 16 bit used. Sign-ext ok. */ ++ emit_djk(as, LAI_ADD_D, RID_TMP, 0, RID_R19); ++ emit_d16i(as, RID_R19, id); ++ args[0] = ASMREF_L; /* lua_State *L */ ++ args[1] = ASMREF_TMP1; /* MSize size */ ++ asm_gencall(as, ci, args); ++ ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ++ ra_releasetmp(as, ASMREF_TMP1)); ++} ++#endif ++ ++/* -- Write barriers ------------------------------------------------------ */ ++ ++static void asm_tbar(ASMState *as, IRIns *ir) ++{ ++ Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); ++ Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); ++ Reg link = RID_TMP; ++ MCLabel l_end = emit_label(as); ++ emit_dji(as, LAI_ST_D, link, tab, ((int32_t)offsetof(GCtab, gclist))&0xfff); ++ emit_dji(as, LAI_ST_B, mark, tab, ((int32_t)offsetof(GCtab, marked))&0xfff); ++ emit_setgl(as, tab, gc.grayagain); //TODO ++ emit_getgl(as, link, gc.grayagain); //TODO ++ //emit_djk(as, LAI_XOR, mark, mark, RID_TMP); /* Clear black bit. */ ++ emit_branch(as, LAI_BEQ, RID_TMP, RID_ZERO, l_end); ++ emit_djk(as, LAI_XOR, mark, mark, RID_TMP); ++ emit_dju(as, LAI_ANDI, RID_TMP, mark, LJ_GC_BLACK); ++ emit_dji(as, LAI_LD_BU, mark, tab, ((int32_t)offsetof(GCtab, marked))&0xfff); ++} ++ ++static void asm_obar(ASMState *as, IRIns *ir) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; ++ IRRef args[2]; ++ MCLabel l_end; ++ Reg obj, val, tmp; ++ /* No need for other object barriers (yet). */ ++ lua_assert(IR(ir->op1)->o == IR_UREFC); ++ ra_evictset(as, RSET_SCRATCH); ++ l_end = emit_label(as); ++ args[0] = ASMREF_TMP1; /* global_State *g */ ++ args[1] = ir->op1; /* TValue *tv */ ++ asm_gencall(as, ci, args); ++ //emit_dji(as, LAI_ADDI_D, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); ++ //emit_dj32i(as, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); //TODO daddiu ++ emit_djk(as, LAI_ADDI_D, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, RID_R19); ++ emit_d16i(as, RID_R19, -32768); ++ obj = IR(ir->op1)->r; ++ tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); ++ emit_branch(as, LAI_BEQ, RID_TMP, RID_ZERO, l_end); ++ emit_dju(as, LAI_ANDI, tmp, tmp, LJ_GC_BLACK); ++ emit_branch(as, LAI_BEQ, RID_TMP, RID_ZERO, l_end); ++ emit_dju(as, LAI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES); ++ val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); ++ emit_dji(as, LAI_LD_BU, tmp, obj, ++ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))&0xfff); ++ emit_dji(as, LAI_LD_BU, RID_TMP, val, ((int32_t)offsetof(GChead, marked))&0xfff); ++} ++ ++/* -- Arithmetic and logic operations ------------------------------------- */ ++ ++#if !LJ_SOFTFP ++static void asm_fparith(ASMState *as, IRIns *ir, LAIns lai) ++{ ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_FPR); ++ right = (left >> 8); left &= 255; ++ emit_djk(as, lai, dest, left, right); ++} ++ ++static void asm_fpunary(ASMState *as, IRIns *ir, LAIns lai) ++{ ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); ++ emit_dj(as, lai, dest, left); ++} ++#endif ++ ++static void asm_fpmath(ASMState *as, IRIns *ir) ++{ ++#if !LJ_SOFTFP ++ if (ir->op2 <= IRFPM_TRUNC) ++ asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); ++ else if (ir->op2 == IRFPM_SQRT) ++ asm_fpunary(as, ir, LAI_FSQRT_D); ++ else ++#endif ++ asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); ++} ++ ++#if !LJ_SOFTFP ++#define asm_fpadd(as, ir) asm_fparith(as, ir, LAI_FADD_D) ++#define asm_fpsub(as, ir) asm_fparith(as, ir, LAI_FSUB_D) ++#define asm_fpmul(as, ir) asm_fparith(as, ir, LAI_FMUL_D) ++#elif LJ_64 /* && LJ_SOFTFP */ ++#define asm_fpadd(as, ir) asm_callid(as, ir, IRCALL_softfp_add) ++#define asm_fpsub(as, ir) asm_callid(as, ir, IRCALL_softfp_sub) ++#define asm_fpmul(as, ir) asm_callid(as, ir, IRCALL_softfp_mul) ++#endif ++ ++static void asm_add(ASMState *as, IRIns *ir) ++{ ++ IRType1 t = ir->t; ++ if (irt_isnum(t)) { ++ asm_fpadd(as, ir); ++ } else ++ { ++ /* TODO fmadd.s/d */ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ if (irref_isk(ir->op2)) { ++ intptr_t k = get_kval(as, ir->op2); ++ if (checki16(k)) { ++ if (LJ_64 && irt_is64(t)) { ++ emit_add(as, dest, left, k); ++ } else { ++ emit_addw(as, dest, left, k); ++ } ++ return; ++ } ++ } ++ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ emit_djk(as, (LJ_64 && irt_is64(t)) ? LAI_ADD_D : LAI_ADD_W, dest, ++ left, right); ++ } ++} ++ ++static void asm_sub(ASMState *as, IRIns *ir) ++{ ++ if (irt_isnum(ir->t)) { ++ asm_fpsub(as, ir); ++ } else ++ { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ emit_djk(as, (LJ_64 && irt_is64(ir->t)) ? LAI_SUB_D : LAI_SUB_W, dest, ++ left, right); ++ } ++} ++ ++static void asm_mul(ASMState *as, IRIns *ir) ++{ ++ if (irt_isnum(ir->t)) { ++ asm_fpmul(as, ir); ++ } else ++ { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ if (LJ_64 && irt_is64(ir->t)) { ++ emit_djk(as, LAI_MUL_D, dest, left, right); ++ } else { ++ emit_djk(as, LAI_MUL_W, dest, left, right); ++ } ++ } ++} ++ ++static void asm_mod(ASMState *as, IRIns *ir) ++{ ++#if LJ_64 && LJ_HASFFI ++ if (!irt_isint(ir->t)) ++ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : ++ IRCALL_lj_carith_modu64); ++ else ++#endif ++ asm_callid(as, ir, IRCALL_lj_vm_modi); ++} ++ ++#if !LJ_SOFTFP ++static void asm_pow(ASMState *as, IRIns *ir) ++{ ++#if LJ_64 && LJ_HASFFI ++ if (!irt_isnum(ir->t)) ++ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : ++ IRCALL_lj_carith_powu64); ++ else ++#endif ++ asm_callid(as, ir, IRCALL_lj_vm_powi); ++} ++ ++static void asm_div(ASMState *as, IRIns *ir) ++{ ++#if LJ_64 && LJ_HASFFI ++ if (!irt_isnum(ir->t)) ++ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : ++ IRCALL_lj_carith_divu64); ++ else ++#endif ++ asm_fparith(as, ir, LAI_DIV_D); ++} ++#endif ++ ++static void asm_fpdiv(ASMState *as, IRIns *ir) ++{ ++#if !LJ_SOFTFP ++ asm_fparith(as, ir, LAI_FDIV_D); ++#else ++ asm_callid(as, ir, IRCALL_softfp_div); ++#endif ++} ++ ++static void asm_neg(ASMState *as, IRIns *ir) ++{ ++#if !LJ_SOFTFP ++ if (irt_isnum(ir->t)) { ++ asm_fpunary(as, ir, LAI_FNEG_D); ++ } else ++#elif LJ_64 /* && LJ_SOFTFP */ ++ if (irt_isnum(ir->t)) { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ emit_djk(as, LAI_XOR, dest, left, ++ ra_allock(as, 0x8000000000000000ll, rset_exclude(RSET_GPR, dest))); ++ } else ++#endif ++ { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ emit_djk(as, (LJ_64 && irt_is64(ir->t)) ? LAI_SUB_D : LAI_SUB_W, dest, ++ RID_ZERO, left); ++ } ++} ++ ++#if !LJ_SOFTFP ++#define asm_abs(as, ir) asm_fpunary(as, ir, LAI_FABS_D) ++#elif LJ_64 /* && LJ_SOFTFP */ ++static void asm_abs(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); ++ emit_djml(as, LAI_BSTRPICK_D, dest, left, 62, 0); //TODO 30+1+32 ++} ++#endif ++#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) ++#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) ++ ++static void asm_arithov(ASMState *as, IRIns *ir) ++{ ++ /* TODO */ ++ Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); ++ lua_assert(!irt_is64(ir->t)); ++ if (irref_isk(ir->op2)) { ++ int k = IR(ir->op2)->i; ++ if (ir->o == IR_SUBOV) k = -k; ++ if (checki16(k)) { /* (dest < left) == (k >= 0 ? 1 : 0) */ ++ left = ra_alloc1(as, ir->op1, RSET_GPR); ++ asm_guard(as, k >= 0 ? LAI_BNE : LAI_BEQ, RID_TMP, RID_ZERO); ++ emit_djk(as, LAI_SLT, RID_TMP, dest, dest == left ? RID_TMP : left); ++ emit_dj32i(as, dest, left, k); // addiu ++ if (dest == left) emit_move(as, RID_TMP, left); ++ return; ++ } ++ } ++ left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), ++ right), dest)); ++ asm_guard(as, LAI_BLT, RID_TMP, RID_ZERO); ++ emit_djk(as, LAI_AND, RID_TMP, RID_TMP, tmp); ++ if (ir->o == IR_ADDOV) { /* ((dest^left) & (dest^right)) < 0 */ ++ emit_djk(as, LAI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right); ++ } else { /* ((dest^left) & (dest^~right)) < 0 */ ++ emit_djk(as, LAI_XOR, RID_TMP, RID_TMP, dest); ++ emit_djk(as, LAI_NOR, RID_TMP, dest == right ? RID_TMP : right, RID_ZERO); ++ } ++ emit_djk(as, LAI_XOR, tmp, dest, dest == left ? RID_TMP : left); ++ emit_djk(as, ir->o == IR_ADDOV ? LAI_ADD_W : LAI_SUB_W, dest, left, right); ++ if (dest == left || dest == right) ++ emit_move(as, RID_TMP, dest == left ? left : right); ++} ++ ++#define asm_addov(as, ir) asm_arithov(as, ir) ++#define asm_subov(as, ir) asm_arithov(as, ir) ++ ++static void asm_mulov(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), ++ right), dest)); ++ asm_guard(as, LAI_BNE, RID_TMP, tmp); ++ emit_dju(as, LAI_SRAI_W, RID_TMP, dest, 31); ++ emit_djk(as, LAI_MUL_W, dest, left, right); ++ emit_djk(as, LAI_MULH_W, tmp, left, right); ++} ++ ++static void asm_bnot(ASMState *as, IRIns *ir) ++{ ++ Reg left, right, dest = ra_dest(as, ir, RSET_GPR); ++ IRIns *irl = IR(ir->op1); ++ if (mayfuse(as, ir->op1) && irl->o == IR_BOR) { ++ left = ra_alloc2(as, irl, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ } else { ++ left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ right = RID_ZERO; ++ } ++ emit_djk(as, LAI_NOR, dest, left, right); ++} ++ ++static void asm_bswap(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); ++ if (irt_is64(ir->t)) { ++ emit_dj(as, LAI_REVH_D, dest, RID_TMP); ++ emit_dj(as, LAI_REVB_4H, RID_TMP, left); ++ } else { ++ emit_dju(as, LAI_ROTRI_W, dest, RID_TMP, 16); ++ emit_dj(as, LAI_REVB_2H, RID_TMP, left); ++ } ++} ++ ++static void asm_bitop1(ASMState *as, LAIns lai, Reg rd, Reg rj, int32_t i) ++{ ++ emit_djk(as, LAI_ADD_W, rd, rd, RID_R20); ++ emit_dju(as, LAI_SLLI_W, RID_R20, RID_R20, 12); ++ emit_dju(as, lai, RID_R20, RID_R20, (i&0xf000)>>12); ++ emit_dju(as, LAI_SRLI_W, RID_R20, RID_R20, 12); ++ emit_dju(as, lai, rd, RID_R20, i&0xfff); ++ emit_djk(as, LAI_OR, RID_R20, RID_R0, rj); ++} ++ ++static void asm_bitop(ASMState *as, IRIns *ir, LAIns lai, LAIns laik) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ if (irref_isk(ir->op2)) { ++ intptr_t k = get_kval(as, ir->op2); ++ if (checku16(k)) { ++ asm_bitop1(as, laik, dest, left, k); ++ return; ++ } ++ } ++ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ emit_djk(as, lai, dest, left, right); ++} ++ ++#define asm_band(as, ir) asm_bitop(as, ir, LAI_AND, LAI_ANDI) ++#define asm_bor(as, ir) asm_bitop(as, ir, LAI_OR, LAI_ORI) ++#define asm_bxor(as, ir) asm_bitop(as, ir, LAI_XOR, LAI_XORI) ++ ++static void asm_bitshift(ASMState *as, IRIns *ir, LAIns lai, LAIns laik) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ if (irref_isk(ir->op2)) { /* Constant shifts. */ ++ uint32_t shift = (uint32_t)IR(ir->op2)->i; ++ if (LJ_64 && irt_is64(ir->t)) laik = laik + 0x8000; ++ emit_dju(as, laik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), ++ shift); ++ } else { ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ if (LJ_64 && irt_is64(ir->t)) { ++ if (lai == LAI_ROTR_W) { ++ lai = lai + 0x8000; ++ } else { ++ lai = lai + 0x18000; ++ } ++ } ++ emit_djk(as, lai, dest, left, right); /* Shift amount is in rs. */ ++ } ++} ++ ++#define asm_bshl(as, ir) asm_bitshift(as, ir, LAI_SLL_W, LAI_SLLI_W) ++#define asm_bshr(as, ir) asm_bitshift(as, ir, LAI_SRL_W, LAI_SRLI_W) ++#define asm_bsar(as, ir) asm_bitshift(as, ir, LAI_SRA_W, LAI_SRAI_W) ++#define asm_brol(as, ir) lua_assert(0) ++#define asm_bror(as, ir) asm_bitshift(as, ir, LAI_ROTR_W, LAI_ROTRI_W) ++ ++ ++#if LJ_SOFTFP ++static void asm_sfpmin_max(ASMState *as, IRIns *ir) ++{ ++ CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax]; ++ IRRef args[2]; ++ args[0] = ir->op1; ++ args[1] = ir->op2; ++ asm_setupresult(as, ir, &ci); ++ emit_call(as, (void *)ci.func); //TODO ++ ci.func = NULL; ++ asm_gencall(as, &ci, args); ++} ++#endif ++ ++static void asm_min_max(ASMState *as, IRIns *ir, int ismax) ++{ ++ if (irt_isnum(ir->t)) { ++#if LJ_SOFTFP ++ asm_sfpmin_max(as, ir); ++#else ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_FPR); ++ right = (left >> 8); left &= 255; ++ emit_djk(as, ismax ? LAI_FMAX_D : LAI_FMIN_D, dest, left, right); ++#endif ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ if (left == right) { ++ if (dest != left) emit_move(as, dest, left); ++ } else { ++ emit_djk(as, LAI_OR, dest, dest, RID_TMP); ++ if (dest != right) { ++ emit_djk(as, LAI_MASKEQZ, RID_TMP, right, RID_TMP); ++ emit_djk(as, LAI_MASKNEZ, dest, left, RID_TMP); ++ } else { ++ emit_djk(as, LAI_MASKNEZ, RID_TMP, left, RID_TMP); ++ emit_djk(as, LAI_MASKEQZ, dest, right, RID_TMP); ++ } ++ emit_djk(as, LAI_SLT, RID_TMP, ++ ismax ? left : right, ismax ? right : left); ++ } ++ } ++} ++ ++#define asm_min(as, ir) asm_min_max(as, ir, 0) ++#define asm_max(as, ir) asm_min_max(as, ir, 1) ++ ++/* -- Comparisons --------------------------------------------------------- */ ++ ++#if LJ_SOFTFP ++/* SFP comparisons. */ ++static void asm_sfpcomp(ASMState *as, IRIns *ir) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; ++ RegSet drop = RSET_SCRATCH; ++ Reg r; ++ IRRef args[2]; ++ args[0] = ir->op1; ++ args[1] = ir->op2; ++ ++ for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+1; r++) { ++ if (!rset_test(as->freeset, r) && ++ regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) ++ rset_clear(drop, r); ++ } ++ ra_evictset(as, drop); ++ ++ asm_setupresult(as, ir, ci); ++ ++ switch ((IROp)ir->o) { ++ case IR_LT: ++ asm_guard(as, LAI_BGE, RID_RET, RID_ZERO); ++ break; ++ case IR_ULT: ++ asm_guard(as, LAI_BEQ, RID_RET, RID_TMP); ++ emit_loadi(as, RID_TMP, 1); ++ asm_guard(as, LAI_BEQ, RID_RET, RID_ZERO); ++ break; ++ case IR_GE: ++ asm_guard(as, LAI_BEQ, RID_RET, RID_TMP); ++ emit_loadi(as, RID_TMP, 2); ++ asm_guard(as, LAI_BLT, RID_RET, RID_ZERO); ++ break; ++ case IR_LE: ++ asm_guard(as, LAI_BLT, RID_ZERO, RID_RET); ++ break; ++ case IR_GT: ++ asm_guard(as, LAI_BEQ, RID_RET, RID_TMP); ++ emit_loadi(as, RID_TMP, 2); ++ asm_guard(as, LAI_BGE, RID_ZERO, RID_RET); ++ break; ++ case IR_UGE: ++ asm_guard(as, LAI_BLT, RID_RET, RID_ZERO); ++ break; ++ case IR_ULE: ++ asm_guard(as, LAI_BEQ, RID_RET, RID_TMP); ++ emit_loadi(as, RID_TMP, 1); ++ break; ++ case IR_UGT: case IR_ABC: ++ asm_guard(as, LAI_BGE, RID_ZERO, RID_RET); ++ break; ++ case IR_EQ: case IR_NE: ++ asm_guard(as, (ir->o & 1) ? LAI_BEQ : LAI_BNE, RID_RET, RID_ZERO); ++ default: ++ break; ++ } ++ asm_gencall(as, ci, args); ++} ++#endif ++ ++static void asm_comp(ASMState *as, IRIns *ir) ++{ ++ /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ ++ /* 00 01 10 11 100 101 110 111 */ ++ IROp op = ir->o; ++ if (irt_isnum(ir->t)) { ++#if LJ_SOFTFP ++ asm_sfpcomp(as, ir); ++#else ++ Reg tmp, right, left = ra_alloc2(as, ir, RSET_FPR); ++ right = (left >> 8); left &= 255; ++ tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right)); ++ asm_guard21(as, (op&1) ? LAI_BCNEZ : LAI_BCEQZ, 0, (tmp&7)); ++ // emit_dst(as, MIPSI_CMP_LT_D + ((op&3) ^ ((op>>2)&1)), tmp, left, right); //TODO ++ // use case ++ switch (op) { ++ case IR_LT: case IR_UGE: ++ emit_djk(as, LAI_FCMP_CLT_D, 0, left, right); ++ break; ++ case IR_GE: case IR_ULT: ++ emit_djk(as, LAI_FCMP_CULT_D, 0, left, right); ++ break; ++ case IR_LE: case IR_UGT: case IR_ABC: ++ emit_djk(as, LAI_FCMP_CLE_D, 0, left, right); ++ break; ++ case IR_GT: case IR_ULE: ++ emit_djk(as, LAI_FCMP_CULE_D, 0, left, right); ++ break; ++ case IR_EQ: ++ emit_djk(as, LAI_FCMP_CEQ_D, 0, left, right); ++ break; ++ case IR_NE: ++ emit_djk(as, LAI_FCMP_CNE_D, 0, left, right); ++ break; ++ default: ++ break; ++ } ++#endif ++ } else { ++ Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); ++ if (op == IR_ABC) op = IR_UGT; ++ if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) { ++ /* MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) : ++ ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ); ++ asm_guard(as, mi, left, 0); ++ */ ++ if (op&2) { ++ if (op&1) { ++ asm_guard(as, LAI_BGE, RID_ZERO, left); ++ } else { ++ asm_guard(as, LAI_BLT, RID_ZERO, left); ++ } ++ } else { ++ if (op&1) { ++ asm_guard(as, LAI_BLT, left, RID_ZERO); ++ } else { ++ asm_guard(as, LAI_BGE, left, RID_ZERO); ++ } ++ } ++ } else { ++ if (irref_isk(ir->op2)) { ++ intptr_t k = get_kval(as, ir->op2); ++ if ((op&2)) k++; ++ if (checki16(k)) { ++ asm_guard(as, (op&1) ? LAI_BNE : LAI_BEQ, RID_TMP, RID_ZERO); ++ emit_djk(as, (op&4) ? LAI_SLTU : LAI_SLT, //TODO si12 ++ RID_TMP, left, RID_R20); ++ //emit_djk(as, LAI_ADD_D, RID_R20, RID_R19, RID_ZERO); ++ //emit_dju(as, LAI_ORI, RID_R19, RID_R19, k&0xfff); ++ //emit_di(as, LAI_LU12I_W, RID_R19, (k>>12)&0xfffff); ++ //emit_dj32i(as, RID_R20, RID_ZERO, k); ++ emit_d16i(as, RID_R20, k); ++ return; ++ } ++ } ++ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ asm_guard(as, ((op^(op>>1))&1) ? LAI_BNE : LAI_BEQ, RID_TMP, RID_ZERO); ++ emit_djk(as, (op&4) ? LAI_SLTU : LAI_SLT, ++ RID_TMP, (op&2) ? right : left, (op&2) ? left : right); ++ } ++ } ++} ++ ++static void asm_equal(ASMState *as, IRIns *ir) ++{ ++ Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ? ++ RSET_FPR : RSET_GPR); ++ right = (left >> 8); left &= 255; ++ if (irt_isnum(ir->t)) { ++#if LJ_SOFTFP ++ asm_sfpcomp(as, ir); ++#else ++ Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right)); ++ asm_guard21(as, (ir->o & 1) ? LAI_BCNEZ : LAI_BCEQZ, 0, (tmp&7)); ++ //emit_djk(as, LAI_FCMP_CEQ_D, tmp&7, left, right); ++ emit_djk(as, LAI_FCMP_CEQ_D, 0, left, right); ++#endif ++ } else { ++ asm_guard(as, (ir->o & 1) ? LAI_BEQ : LAI_BNE, left, right); ++ } ++} ++ ++/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ ++ ++/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ ++static void asm_hiop(ASMState *as, IRIns *ir) ++{ ++ UNUSED(as); UNUSED(ir); ++ lua_assert(0); /* Unused on 64 bit. */ ++} ++ ++/* -- Profiling ----------------------------------------------------------- */ ++ ++static void asm_prof(ASMState *as, IRIns *ir) ++{ ++ UNUSED(ir); ++ asm_guard(as, LAI_BNE, RID_TMP, RID_ZERO); ++ emit_dju(as, LAI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE); //HOOK_PROFILE=0x80 ++ emit_lsglptr2(as, LAI_LD_BU, RID_TMP, //TODO ++ (int32_t)offsetof(global_State, hookmask)); ++} ++ ++/* -- Stack handling ------------------------------------------------------ */ ++ ++/* Check Lua stack size for overflow. Use exit handler as fallback. */ ++static void asm_stack_check(ASMState *as, BCReg topslot, ++ IRIns *irp, RegSet allow, ExitNo exitno) ++{ ++ /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */ ++ Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; ++ ExitNo oldsnap = as->snapno; ++ rset_clear(allow, pbase); ++ tmp = allow ? rset_pickbot(allow) : RID_RET; ++ as->snapno = exitno; ++ asm_guard(as, LAI_BNE, RID_TMP, RID_ZERO); ++ as->snapno = oldsnap; ++ if (allow == RSET_EMPTY) /* Restore temp. register. */ ++ emit_dji(as, LAI_LD_D, tmp, RID_SP, 0); ++ else ++ ra_modified(as, tmp); ++ //emit_dji(as, LAI_SLTUI, RID_TMP, RID_TMP, ((int32_t)(8*topslot))&0xfff); //TODO si12 ++ emit_djk(as, LAI_SLTU, RID_TMP, RID_TMP, RID_R19); ++ emit_d16i(as, RID_R19, (int32_t)(8*topslot)); ++ emit_djk(as, LAI_SUB_D, RID_TMP, tmp, pbase); ++ emit_djk(as, LAI_LDX_D, tmp, tmp, RID_R19); ++ emit_loadi(as, RID_R19, offsetof(lua_State, maxstack)); ++ if (pbase == RID_TMP) ++ emit_getgl(as, RID_TMP, jit_base); ++ emit_getgl(as, tmp, cur_L); ++ if (allow == RSET_EMPTY) /* Spill temp. register. */ ++ emit_dji(as, LAI_ST_D, tmp, RID_SP, 0); ++} ++ ++/* Restore Lua stack from on-trace state. */ ++static void asm_stack_restore(ASMState *as, SnapShot *snap) ++{ ++ SnapEntry *map = &as->T->snapmap[snap->mapofs]; ++#if defined(LUA_USE_ASSERT) ++ SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; ++#endif ++ MSize n, nent = snap->nent; ++ /* Store the value of all modified slots to the Lua stack. */ ++ for (n = 0; n < nent; n++) { ++ SnapEntry sn = map[n]; ++ BCReg s = snap_slot(sn); ++ int32_t ofs = 8*((int32_t)s-1-LJ_FR2); ++ IRRef ref = snap_ref(sn); ++ IRIns *ir = IR(ref); ++ if ((sn & SNAP_NORESTORE)) ++ continue; ++ if (irt_isnum(ir->t)) { ++#if LJ_SOFTFP /* && LJ_64 */ ++ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); ++ //emit_dji(as, LAI_ST_D, src, RID_BASE, ofs&0xfff); //TODO si12 ++ emit_djk(as, LAI_STX_D, src, RID_BASE, RID_R19); ++#else ++ Reg src = ra_alloc1(as, ref, RSET_FPR); ++ //emit_dji(as, LAI_FST_D, src, RID_BASE, ofs&0xfff); //TODO si12 ++ emit_djk(as, LAI_FSTX_D, src, RID_BASE, RID_R19); ++#endif ++ emit_d16i(as, RID_R19, ofs); ++ } else { ++ asm_tvstore64(as, RID_BASE, ofs, ref); ++ } ++ checkmclim(as); ++ } ++ lua_assert(map + nent == flinks); ++} ++ ++/* -- GC handling --------------------------------------------------------- */ ++ ++/* Marker to prevent patching the GC check exit. */ ++#define LA_NOPATCH_GC_CHECK LAI_OR ++ ++/* Check GC threshold and do one or more GC steps. */ ++static void asm_gc_check(ASMState *as) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; ++ IRRef args[2]; ++ MCLabel l_end; ++ Reg tmp; ++ ra_evictset(as, RSET_SCRATCH); ++ l_end = emit_label(as); ++ /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ ++ /* Assumes asm_snap_prep() already done. */ ++ asm_guard(as, LAI_BNE, RID_RET, RID_ZERO); ++ args[0] = ASMREF_TMP1; /* global_State *g */ ++ args[1] = ASMREF_TMP2; /* MSize steps */ ++ asm_gencall(as, ci, args); ++ l_end[-3] = LA_NOPATCH_GC_CHECK; /* Replace the nop after the call. */ //TODO ++ //emit_dji(as, LAI_ADDI_D, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); //TODO ++ //emit_dj32i(as, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); ++ emit_add(as, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); ++ tmp = ra_releasetmp(as, ASMREF_TMP2); ++ /* Jump around GC step if GC total < GC threshold. */ ++ emit_branch(as, LAI_BNE, RID_TMP, RID_ZERO, l_end); ++ emit_loadi(as, tmp, as->gcsteps); ++ emit_djk(as, LAI_SLTU, RID_TMP, RID_TMP, tmp); ++ emit_getgl(as, tmp, gc.threshold); ++ emit_getgl(as, RID_TMP, gc.total); ++ as->gcsteps = 0; ++ checkmclim(as); ++} ++ ++/* -- Loop handling ------------------------------------------------------- */ ++ ++/* Fixup the loop branch. */ ++static void asm_loop_fixup(ASMState *as) ++{ ++ MCode *p = as->mctop; ++ MCode *target = as->mcp; ++ if (as->loopinv) { /* Inverted loop branch? */ ++ /* asm_guard already inverted the bceqz/bcnez/beq/bne/blt/bge, and patched the final b. */ ++ uint32_t mask = (p[-2] & 0xfc000000) == 0x48000000 ? 0x1fffffu : 0xffffu; ++ ptrdiff_t delta = target - p ; ++ if (mask == 0x1fffffu) { ++ p[-2] = p[-2] | LAF_I((uint32_t)delta & 0xffffu) | (((uint32_t)delta & 0x1f0000u) >> 16); ++ } else { ++ p[-2] |= LAF_I(delta & 0xffffu); //TODO ++ } ++ if (p[-1] == 0 || p[-1] == 0x109c21) //TODO ++ p[-1] = LAI_NOP; ++ } else { ++ /* b */ ++ ptrdiff_t delta = target - (p - 1); ++ p[-1] = LAI_B | LAF_I(delta & 0xffffu) | ((delta & 0x3ff0000) >> 16); //TODO ++ if ( (p[-2] & LAI_B) == LAI_B || (p[-2] & LAI_BL) == LAI_BL || (p[-2] & LAI_BEQ) == LAI_BEQ || (p[-2] & LAI_BNE) == LAI_BNE ||(p[-2] & LAI_BLT) == LAI_BLT || (p[-2] & LAI_BGE) == LAI_BGE) ++ p[-3] = LAI_NOP; ++ } ++} ++ ++/* Fixup the tail of the loop. */ ++static void asm_loop_tail_fixup(ASMState *as) ++{ ++ if (as->loopinv) as->mctop--; ++} ++ ++/* -- Head of trace ------------------------------------------------------- */ ++ ++/* Coalesce BASE register for a root trace. */ ++static void asm_head_root_base(ASMState *as) ++{ ++ IRIns *ir = IR(REF_BASE); ++ Reg r = ir->r; ++ if (as->loopinv) as->mctop--; ++ if (ra_hasreg(r)) { ++ ra_free(as, r); ++ if (rset_test(as->modset, r) || irt_ismarked(ir->t)) ++ ir->r = RID_INIT; /* No inheritance for modified BASE register. */ ++ if (r != RID_BASE) ++ emit_move(as, r, RID_BASE); ++ } ++} ++ ++/* Coalesce BASE register for a side trace. */ ++static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) ++{ ++ IRIns *ir = IR(REF_BASE); ++ Reg r = ir->r; ++ if (as->loopinv) as->mctop--; ++ if (ra_hasreg(r)) { ++ ra_free(as, r); ++ if (rset_test(as->modset, r) || irt_ismarked(ir->t)) ++ ir->r = RID_INIT; /* No inheritance for modified BASE register. */ ++ if (irp->r == r) { ++ rset_clear(allow, r); /* Mark same BASE register as coalesced. */ ++ } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { ++ rset_clear(allow, irp->r); ++ emit_move(as, r, irp->r); /* Move from coalesced parent reg. */ ++ } else { ++ emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ ++ } ++ } ++ return allow; ++} ++ ++/* -- Tail of trace ------------------------------------------------------- */ ++ ++/* Fixup the tail code. */ ++static void asm_tail_fixup(ASMState *as, TraceNo lnk) ++{ ++ MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; ++ int32_t spadj = as->T->spadjust; ++ MCode *p = as->mctop - 1; ++ if (spadj == 0) { ++ p[-1] = LAI_NOP; ++ } else { ++ p[-1] = LAI_ADDI_D|LAF_D(RID_SP)|LAF_J(RID_SP)|LAF_I(spadj); ++ } ++ ++ MCode *tmp = p; ++ *p = LAI_B | LAF_I((uintptr_t)(target-tmp)&0xffffu) | (((uintptr_t)(target-tmp)&0x3ff0000u) >> 16); ++} ++ ++/* Prepare tail of code. */ ++static void asm_tail_prep(ASMState *as) ++{ ++ // as->mcp = as->mctop-2; /* Leave room for branch plus nop or stack adj. */ ++ // as->invmcp = as->loopref ? as->mcp : NULL; ++ MCode *p = as->mctop - 1; /* Leave room for exit branch. */ ++ if (as->loopref) { ++ as->invmcp = as->mcp = p; ++ } else { ++ as->mcp = p-1; /* Leave room for stack pointer adjustment. */ ++ as->invmcp = NULL; ++ } ++ *p = LAI_NOP; /* Prevent load/store merging. */ ++} ++ ++/* -- Trace setup --------------------------------------------------------- */ ++ ++/* Ensure there are enough stack slots for call arguments. */ ++static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) ++{ ++ IRRef args[CCI_NARGS_MAX*2]; ++ uint32_t i, nargs = CCI_XNARGS(ci); ++ int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; ++ asm_collectargs(as, ir, ci, args); ++ for (i = 0; i < nargs; i++) { ++ if (args[i] && irt_isfp(IR(args[i])->t)) { ++ if (nfpr > 0) nfpr--; else nslots += 2; ++ } else { ++ if (ngpr > 0) ngpr--; else nslots += 2; ++ } ++ } ++ if (nslots > as->evenspill) /* Leave room for args in stack slots. */ ++ as->evenspill = nslots; ++ return REGSP_HINT(RID_RET); ++ // return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); ++} ++ ++static void asm_sparejump_setup(ASMState *as) ++{ ++ MCode *mxp = as->mctop; ++ if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) { ++ mxp -= 4*1; ++ as->mctop = mxp; ++ } ++} ++ ++static void asm_setup_target(ASMState *as) ++{ ++ asm_sparejump_setup(as); ++ asm_exitstub_setup(as); ++} ++ ++/* -- Trace patching ------------------------------------------------------ */ ++ ++/* Patch exit jumps of existing machine code to a new target. */ ++void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) ++{ ++ MCode *p = T->mcode; ++ MCode *pe = (MCode *)((char *)p + T->szmcode); ++ MCode *px = exitstub_trace_addr(T, exitno); ++ MCode *cstart = NULL; ++ MCode *mcarea = lj_mcode_patch(J, p, 0); ++ ++ MCode exitload = LAI_ADDI_W | LAF_D(RID_TMP) | LAF_J(RID_ZERO) | LAF_I(exitno&0xfff); ++ ++ for (p++; p < pe; p++) { ++ if (*p == exitload) { ++ /* Look for exitstub branch, replace with branch to target. */ ++ ptrdiff_t delta = target - p - 1; ++ MCode ins = p[1]; ++ if ((ins & 0xfc000000u) == LAI_BEQ || ++ (ins & 0xfc000000u) == LAI_BNE || ++ (ins & 0xfc000000u) == LAI_BLT || ++ (ins & 0xfc000000u) == LAI_BGE) { ++ /* Patch beq/bne/blt/bge, if within range. */ ++ if (LAF_S_OK(delta, 16)) { ++ p[1] = (ins & 0xfc0003ffu) | LAF_I(delta & 0xffff); ++ if (!cstart) cstart = p + 1; ++ } ++ } else if ((ins & 0xfc000000u) == LAI_BCEQZ || ++ (ins & 0xfc000100u) == LAI_BCNEZ) { ++ /* Patch bceqz/bcnez, if within range. */ ++ if (p[-1] == LA_NOPATCH_GC_CHECK) { ++ } else if (LAF_S_OK(delta, 21)) { ++ *p = (ins & 0xfc0003e0u) | LAF_I(delta & 0xffff) | ((delta & 0x1f0000) >> 16); ++ if (!cstart) cstart = p; ++ } ++ } else if ((ins & 0xfc000000u) == LAI_B) { ++ /* Patch b. */ ++ lua_assert(LAF_S_OK(delta, 26)); ++ *p = (ins & 0xfc000000u) | LAF_I(delta & 0xffff) | ((delta & 0x3ff0000) >> 16); ++ if (!cstart) cstart = p; ++ } else if (p+2 == pe){ ++ if (p[2] == LAI_NOP) { ++ ptrdiff_t delta = target - &p[2]; ++ lua_assert(LAF_S_OK(delta, 26)); ++ p[2] = LAI_B | LAF_I(delta & 0xffff) | ((delta & 0x3ff0000) >> 16); ++ *p = LAI_NOP; ++ if (!cstart) cstart = p + 2; ++ } ++ } ++ } ++ } ++ if (cstart) lj_mcode_sync(cstart, px+1); ++ lj_mcode_patch(J, mcarea, 1); ++} +diff --git a/libs/luajit/LuaJIT-src/src/lj_ccall.c b/libs/luajit/LuaJIT-src/src/lj_ccall.c +index 5c252e5..426e79a 100644 +--- a/libs/luajit/LuaJIT-src/src/lj_ccall.c ++++ b/libs/luajit/LuaJIT-src/src/lj_ccall.c +@@ -562,6 +562,81 @@ + goto done; \ + } + ++#elif LJ_TARGET_LOONGARCH64 ++/* -- LoongArch lp64 calling conventions ---------------------------------------- */ ++ ++#define CCALL_HANDLE_STRUCTRET \ ++ /* Return structs of size <= 16 in a GPR. */ \ ++ cc->retref = !(sz <= 16); \ ++ if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; ++ ++#define CCALL_HANDLE_STRUCTRET2 \ ++ ccall_copy_struct(cc, ctr, dp, sp, ccall_classify_struct(cts, ctr, ct)); ++ ++#define CCALL_HANDLE_COMPLEXRET \ ++ /* Complex values are returned in 1 or 2 FPRs. */ \ ++ cc->retref = 0; ++ ++#if LJ_ABI_SOFTFP /* LoongArch64 soft-float */ ++ ++#define CCALL_HANDLE_COMPLEXRET2 \ ++ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \ ++ ((intptr_t *)dp)[0] = cc->gpr[0]; \ ++ } else { /* Copy complex double from GPRs. */ \ ++ ((intptr_t *)dp)[0] = cc->gpr[0]; \ ++ ((intptr_t *)dp)[1] = cc->gpr[1]; \ ++ } ++ ++#define CCALL_HANDLE_COMPLEXARG \ ++ /* Pass complex by value in 2 or 4 GPRs. */ ++ ++/* Position of soft-float 'float' return value depends on endianess. */ ++#define CCALL_HANDLE_RET \ ++ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ++ sp = (uint8_t *)cc->gpr + LJ_ENDIAN_SELECT(0, 4); ++ ++#else /* LoongArch64 hard-float */ ++ ++#define CCALL_HANDLE_COMPLEXRET2 \ ++ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ ++ ((float *)dp)[0] = cc->fpr[0].f; \ ++ ((float *)dp)[1] = cc->fpr[1].f; \ ++ } else { /* Copy complex double from FPRs. */ \ ++ ((double *)dp)[0] = cc->fpr[0].d; \ ++ ((double *)dp)[1] = cc->fpr[1].d; \ ++ } ++ ++#define CCALL_HANDLE_COMPLEXARG \ ++ if (sz == 2*sizeof(float)) { \ ++ isfp = 2; \ ++ if (ngpr < maxgpr) \ ++ sz *= 2; \ ++ } ++#define CCALL_HANDLE_RET \ ++ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ++ sp = (uint8_t *)&cc->fpr[0].f; ++ ++#endif ++ ++#define CCALL_HANDLE_STRUCTARG \ ++ /* Pass all structs by value in registers and/or on the stack. */ ++ ++#define CCALL_HANDLE_REGARG \ ++ if (isfp) { /* Try to pass argument in FPRs. */ \ ++ int n2 = ctype_isvector(d->info) ? 1 : n; \ ++ if (nfpr + n2 <= CCALL_NARG_FPR) { \ ++ dp = &cc->fpr[nfpr]; \ ++ nfpr += n2; \ ++ goto done; \ ++ } \ ++ } else { /* Try to pass argument in GPRs. */ \ ++ if (ngpr + n <= maxgpr) { \ ++ dp = &cc->gpr[ngpr]; \ ++ ngpr += n; \ ++ goto done; \ ++ } \ ++ } ++ + #else + #error "Missing calling convention definitions for this architecture" + #endif +@@ -873,6 +948,79 @@ void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, int ft) + + #endif + ++/* -- LoongArch64 ABI struct classification ---------------------------- */ ++ ++#if LJ_TARGET_LOONGARCH64 ++ ++#define FTYPE_FLOAT 1 ++#define FTYPE_DOUBLE 2 ++ ++/* Classify FP fields (max. 2) and their types. */ ++static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf) ++{ ++ int n = 0, ft = 0; ++ if ((ctf->info & CTF_VARARG) || (ct->info & CTF_UNION)) ++ goto noth; ++ while (ct->sib) { ++ CType *sct; ++ ct = ctype_get(cts, ct->sib); ++ if (n == 2) { ++ goto noth; ++ } else if (ctype_isfield(ct->info)) { ++ sct = ctype_rawchild(cts, ct); ++ if (ctype_isfp(sct->info)) { ++ ft |= (sct->size == 4 ? FTYPE_FLOAT : FTYPE_DOUBLE) << 2*n; ++ n++; ++ } else { ++ goto noth; ++ } ++ } else if (ctype_isbitfield(ct->info) || ++ ctype_isxattrib(ct->info, CTA_SUBTYPE)) { ++ goto noth; ++ } ++ } ++ if (n <= 2) ++ return ft; ++noth: /* Not a homogeneous float/double aggregate. */ ++ return 0; /* Struct is in GPRs. */ ++} ++ ++static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, ++ int ft) ++{ ++ if (LJ_ABI_SOFTFP ? ft : ++ ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) { ++ int i, ofs = 0; ++ for (i = 0; ft != 0; i++, ft >>= 2) { ++ if ((ft & 3) == FTYPE_FLOAT) { ++#if LJ_ABI_SOFTFP ++ /* The 2nd FP struct result is in CARG1 (gpr[2]) and not CRET2. */ ++ memcpy((uint8_t *)dp + ofs, (uint8_t *)&cc->gpr[2*i], 4); ++#else ++ *(float *)((uint8_t *)dp + ofs) = cc->fpr[i].f; ++#endif ++ ofs += 4; ++ } else { ++ ofs = (ofs + 7) & ~7; /* 64 bit alignment. */ ++#if LJ_ABI_SOFTFP ++ *(intptr_t *)((uint8_t *)dp + ofs) = cc->gpr[2*i]; ++#else ++ *(double *)((uint8_t *)dp + ofs) = cc->fpr[i].d; ++#endif ++ ofs += 8; ++ } ++ } ++ } else { ++#if !LJ_ABI_SOFTFP ++ if (ft) sp = (uint8_t *)&cc->fpr[0]; ++#endif ++ memcpy(dp, sp, ctr->size); ++ } ++} ++ ++#endif ++ ++ + /* -- Common C call handling ---------------------------------------------- */ + + /* Infer the destination CTypeID for a vararg argument. */ +@@ -1068,7 +1216,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ + cc->fpr[nfpr-2].d[1] = 0; + } +-#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP) ++#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP) || (LJ_TARGET_LOONGARCH64 && !LJ_ABI_SOFTFP) + if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) { + /* Split float HFA or complex float into separate registers. */ + CTSize i = (sz >> 2) - 1; +@@ -1080,7 +1228,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + } + if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ + +-#if LJ_TARGET_X64 || LJ_TARGET_PPC ++#if LJ_TARGET_X64 || LJ_TARGET_PPC || LJ_TARGET_LOONGARCH64 + cc->nfpr = nfpr; /* Required for vararg functions. */ + #endif + cc->nsp = nsp; +diff --git a/libs/luajit/LuaJIT-src/src/lj_ccall.h b/libs/luajit/LuaJIT-src/src/lj_ccall.h +index 59f6648..1c150fa 100644 +--- a/libs/luajit/LuaJIT-src/src/lj_ccall.h ++++ b/libs/luajit/LuaJIT-src/src/lj_ccall.h +@@ -126,6 +126,21 @@ typedef union FPRArg { + struct { LJ_ENDIAN_LOHI(float f; , float g;) }; + } FPRArg; + ++#elif LJ_TARGET_LOONGARCH64 ++ ++#define CCALL_NARG_GPR 8 ++#define CCALL_NARG_FPR 8 ++#define CCALL_NRET_GPR 2 ++#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2) ++#define CCALL_SPS_EXTRA 3 ++#define CCALL_SPS_FREE 1 ++ ++typedef intptr_t GPRArg; ++typedef union FPRArg { ++ double d; ++ struct { LJ_ENDIAN_LOHI(float f; , float g;) }; ++} FPRArg; ++ + #else + #error "Missing calling convention definitions for this architecture" + #endif +@@ -168,7 +183,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { + uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ + #elif LJ_TARGET_ARM64 + void *retp; /* Aggregate return pointer in x8. */ +-#elif LJ_TARGET_PPC ++#elif LJ_TARGET_PPC || LJ_TARGET_LOONGARCH64 + uint8_t nfpr; /* Number of arguments in FPRs. */ + #endif + #if LJ_32 +diff --git a/libs/luajit/LuaJIT-src/src/lj_ccallback.c b/libs/luajit/LuaJIT-src/src/lj_ccallback.c +index 846827b..c7cbd73 100644 +--- a/libs/luajit/LuaJIT-src/src/lj_ccallback.c ++++ b/libs/luajit/LuaJIT-src/src/lj_ccallback.c +@@ -71,6 +71,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) + + #define CALLBACK_MCODE_HEAD 52 + ++#elif LJ_TARGET_LOONGARCH64 ++ ++#define CALLBACK_MCODE_HEAD 52 ++ + #else + + /* Missing support for this architecture. */ +@@ -238,6 +242,37 @@ static void callback_mcode_init(global_State *g, uint32_t *page) + } + lua_assert(p - page <= CALLBACK_MCODE_SIZE); + } ++#elif LJ_TARGET_LOONGARCH64 ++static void *callback_mcode_init(global_State *g, uint32_t *page) ++{ ++ uint32_t *p = page; ++ uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; ++ uintptr_t ug = (uintptr_t)(void *)g; ++ MSize slot; ++ *p++ = LAI_LU12I_W | LAF_D(RID_R18) | LAF_I20((target >> 12) & 0xfffff); ++ *p++ = LAI_LU12I_W | LAF_D(RID_R17) | LAF_I20((ug >> 12) & 0xfffff); ++ *p++ = LAI_ORI | LAF_D(RID_R18) | LAF_J(RID_R18) | LAF_I(target & 0xfff); ++ *p++ = LAI_ORI | LAF_D(RID_R17) | LAF_J(RID_R17) | LAF_I(ug & 0xfff); ++ *p++ = LAI_LU32I_D | LAF_D(RID_R18) | LAF_I20((target >> 32) & 0xfffff); ++ *p++ = LAI_LU32I_D | LAF_D(RID_R17) | LAF_I20((ug >> 32) & 0xfffff); ++ *p++ = LAI_LU52I_D | LAF_D(RID_R18) | LAF_J(RID_R18) | LAF_I((target >> 52) & 0xfff); ++ *p++ = LAI_LU52I_D | LAF_D(RID_R17) | LAF_J(RID_R17) | LAF_I((ug >> 52) & 0xfff); ++ *p++ = LAI_NOP; ++ *p++ = LAI_NOP; ++ *p++ = LAI_NOP; ++ *p++ = LAI_NOP; ++ *p++ = LAI_JIRL | LAF_D(RID_R0) | LAF_J(RID_R18) | LAF_I(0); ++ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { ++ //*p = LAI_BEQ | LAF_D(RID_R0) | LAF_J(RID_R0) | ((page-p-1) & 0x0000ffffu); //TODO ++ //p++; ++ //*p++ = LAI_LU12I_W | LAF_D(RID_R19) | LAF_I20((slot >> 12) & 0xfffff); //TODO ++ *p++ = LAI_ORI | LAF_D(RID_R19) | LAF_J(RID_R0) | LAF_I(slot & 0xfff); ++ //*p = LAI_BEQ | LAF_D(RID_ZERO) | LAF_J(RID_ZERO) | ((page-p-1) & 0x0000ffffu); ++ *p = LAI_B | LAF_I((page-p) & 0xffff) | (((page-p) >> 16) & 0x3ff); ++ p++; ++ } ++ return p; ++} + #else + /* Missing support for this architecture. */ + #define callback_mcode_init(g, p) UNUSED(p) +@@ -491,6 +526,37 @@ void lj_ccallback_mcode_free(CTState *cts) + } + #endif + ++#define CALLBACK_HANDLE_RET \ ++ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ++ ((float *)dp)[1] = *(float *)dp; ++ ++#elif LJ_TARGET_LOONGARCH64 ++ ++#if !LJ_ABI_SOFTFP /* LoongArch64 hard-float */ ++#define CALLBACK_HANDLE_REGARG \ ++ if (isfp) { \ ++ if (nfpr + n <= CCALL_NARG_FPR) { \ ++ sp = &cts->cb.fpr[nfpr]; \ ++ nfpr += n; \ ++ goto done; \ ++ } \ ++ } else { \ ++ if (ngpr + n <= maxgpr) { \ ++ sp = &cts->cb.gpr[ngpr]; \ ++ ngpr += n; \ ++ goto done; \ ++ } \ ++ } ++#else /* LoongArch64 soft-float */ ++#define CALLBACK_HANDLE_REGARG \ ++ if (ngpr + n <= maxgpr) { \ ++ UNUSED(isfp); \ ++ sp = (void*) &cts->cb.gpr[ngpr]; \ ++ ngpr += n; \ ++ goto done; \ ++ } ++#endif ++ + #define CALLBACK_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + ((float *)dp)[1] = *(float *)dp; +diff --git a/libs/luajit/LuaJIT-src/src/lj_crecord.c b/libs/luajit/LuaJIT-src/src/lj_crecord.c +index e32ae23..89a70fa 100644 +--- a/libs/luajit/LuaJIT-src/src/lj_crecord.c ++++ b/libs/luajit/LuaJIT-src/src/lj_crecord.c +@@ -132,7 +132,7 @@ static IRType crec_ct2irt(CTState *cts, CType *ct) + #define CREC_COPY_REGWIN 2 + #elif LJ_TARGET_PPC || LJ_TARGET_MIPS + #define CREC_COPY_REGWIN 8 +-#else ++#else //TODO + #define CREC_COPY_REGWIN 4 + #endif + +diff --git a/libs/luajit/LuaJIT-src/src/lj_emit_loongarch64.h b/libs/luajit/LuaJIT-src/src/lj_emit_loongarch64.h +new file mode 100644 +index 0000000..bf778ea +--- /dev/null ++++ b/libs/luajit/LuaJIT-src/src/lj_emit_loongarch64.h +@@ -0,0 +1,384 @@ ++/* ++** LoongArch instruction emitter. ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2021 Loongson Technology. All rights reserved. ++*/ ++ ++static intptr_t get_k64val(ASMState *as, IRRef ref) ++{ ++ IRIns *ir = IR(ref); ++ if (ir->o == IR_KINT64) { ++ return (intptr_t)ir_kint64(ir)->u64; ++ } else if (ir->o == IR_KGC) { ++ return (intptr_t)ir_kgc(ir); ++ } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { ++ return (intptr_t)ir_kptr(ir); ++ } else if (LJ_SOFTFP && ir->o == IR_KNUM) { ++ return (intptr_t)ir_knum(ir)->u64; ++ } else { ++ lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); ++ return ir->i; /* Sign-extended. */ ++ } ++} ++ ++#define get_kval(as, ref) get_k64val(as, ref) ++ ++ ++/* -- Emit basic instructions --------------------------------------------- */ ++ ++static void emit_djk(ASMState *as, LAIns lai, Reg rd, Reg rj, Reg rk) ++{ ++ *--as->mcp = lai | LAF_D(rd & 0x1f) | LAF_J(rj & 0x1f) | LAF_K(rk & 0x1f); ++} ++ ++#define emit_dj(as, lai, rd, rj) emit_djk(as, (lai), (rd)&31, (rj)&31, 0) ++ ++static void emit_di(ASMState *as, LAIns lai, Reg rd, int32_t i) ++{ ++ *--as->mcp = lai | LAF_D(rd & 0x1f) | LAF_I20(i & 0xfffff); ++} ++ ++static void emit_dji(ASMState *as, LAIns lai, Reg rd, Reg rj, int32_t i) ++{ ++ *--as->mcp = lai | LAF_D(rd & 0x1f) | LAF_J(rj & 0x1f) | LAF_I(i); ++} ++ ++static void emit_dju(ASMState *as, LAIns lai, Reg rd, Reg rj, uint32_t u) ++{ ++ *--as->mcp = lai | LAF_D(rd & 0x1f) | LAF_J(rj & 0x1f) | LAF_I(u); ++} ++ ++static void emit_dj32i(ASMState *as, Reg rd, Reg rj, int32_t i) ++{ ++ if ((i>>12) == 0 || (i>>12) == 0xfffff) { ++ *--as->mcp = LAI_ADDI_D | LAF_D(rd) | LAF_J(rj) | LAF_I(i&0xfff); ++ } else { ++ emit_djk(as, LAI_ADD_D, rd, RID_R19, rj); ++ if ((i&0xfff) != 0) { ++ emit_dju(as, LAI_ORI, RID_R19, RID_R19, i&0xfff); ++ } ++ if (((i>>12)&0xfffff) != 0) { ++ emit_di(as, LAI_LU12I_W, RID_R19, (i>>12)&0xfffff); ++ } ++ } ++} ++ ++static void emit_d16i(ASMState *as, Reg rd, int32_t i) ++{ ++ emit_dji(as, LAI_SRAI_D, rd, rd, 16); ++ emit_dji(as, LAI_ADDU16I_D, rd, RID_ZERO, (i&0xffff)); ++} ++ ++static void emit_addw(ASMState *as, Reg rd, Reg rj, int32_t i) ++{ ++ emit_djk(as, LAI_ADD_W, rd, rj, RID_R20); ++ emit_djk(as, LAI_OR, RID_R20, RID_R19, RID_R20); ++ emit_dji(as, LAI_SLLI_W, RID_R19, RID_R19, 24); ++ emit_dji(as, LAI_ORI, RID_R19, RID_R0, (i&0xff000000)>>24); ++ emit_djk(as, LAI_OR, RID_R20, RID_R19, RID_R20); ++ emit_dji(as, LAI_SLLI_W, RID_R19, RID_R19, 12); ++ emit_dji(as, LAI_ORI, RID_R19, RID_R0, (i&0xfff000)>>12); ++ emit_dji(as, LAI_ORI, RID_R20, RID_R0, i&0xfff); ++} ++ ++static void emit_add(ASMState *as, Reg rd, Reg rj, int64_t i) ++{ ++ emit_djk(as, LAI_ADD_D, rd, rj, RID_R20); ++ emit_djk(as, LAI_OR, RID_R20, RID_R19, RID_R20); ++ emit_dji(as, LAI_SLLI_D, RID_R19, RID_R19, 60); ++ emit_dji(as, LAI_ORI, RID_R19, RID_R0, (i&0xf000000000000000)>>60); ++ emit_djk(as, LAI_OR, RID_R20, RID_R19, RID_R20); ++ emit_dji(as, LAI_SLLI_D, RID_R19, RID_R19, 48); ++ emit_dji(as, LAI_ORI, RID_R19, RID_R0, (i&0xfff000000000000)>>48); ++ emit_djk(as, LAI_OR, RID_R20, RID_R19, RID_R20); ++ emit_dji(as, LAI_SLLI_D, RID_R19, RID_R19, 36); ++ emit_dji(as, LAI_ORI, RID_R19, RID_R0, (i&0xfff000000000)>>36); ++ emit_djk(as, LAI_OR, RID_R20, RID_R19, RID_R20); ++ emit_dji(as, LAI_SLLI_D, RID_R19, RID_R19, 24); ++ emit_dji(as, LAI_ORI, RID_R19, RID_R0, (i&0xfff000000)>>24); ++ emit_djk(as, LAI_OR, RID_R20, RID_R19, RID_R20); ++ emit_dji(as, LAI_SLLI_D, RID_R19, RID_R19, 12); ++ emit_dji(as, LAI_ORI, RID_R19, RID_R0, (i&0xfff000)>>12); ++ emit_dji(as, LAI_ORI, RID_R20, RID_R0, i&0xfff); ++} ++ ++static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) ++{ ++ emit_dju(as, LAI_ROTRI_W, dest, src, shift); ++} ++ ++static void emit_djml(ASMState *as, LAIns lai, Reg rd, Reg rj, uint32_t m, uint32_t l) ++{ ++ *--as->mcp = lai | LAF_D(rd & 0x1f) | LAF_J(rj & 0x1f) | LAF_I(l & 0x3f) | LAF_M(m & 0x3f); ++} ++ ++static void emit_b_bl(ASMState *as, LAIns lai, uint32_t i) ++{ ++ *--as->mcp = lai | LAF_I(i & 0xffff) | ((i >> 16) & 0x3ff); ++} ++ ++ ++/* -- Emit loads/stores --------------------------------------------------- */ ++ ++/* Prefer rematerialization of BASE/L from global_State over spills. */ ++#define emit_canremat(ref) ((ref) <= REF_BASE) ++ ++/* Try to find a one step delta relative to another constant. */ ++static int emit_kdelta1(ASMState *as, Reg t, intptr_t i) ++{ ++ RegSet work = ~as->freeset & RSET_GPR; ++ while (work) { ++ Reg r = rset_picktop(work); ++ IRRef ref = regcost_ref(as->cost[r]); ++ lua_assert(r != t); ++ if (ref < ASMREF_L) { ++ intptr_t delta = (intptr_t)((uintptr_t)i - ++ (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(as, ref))); ++ if (checki16(delta)) { ++ //emit_dj32i(as, t, r, delta); // daddiu ++ emit_djk(as, LAI_ADD_D, t, r, RID_R19); ++ emit_d16i(as, RID_R19, delta); ++ return 1; ++ } ++ } ++ rset_clear(work, r); ++ } ++ return 0; /* Failed. */ ++} ++ ++/* Load a 32/64 bit constant into a GPR. */ ++//#define emit_loadi(as, rd, i) emit_loadk(as, rd, i) ++//#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i) ++ ++//#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr))) ++ ++/* Load a 32 bit constant into a GPR. */ ++static void emit_loadi(ASMState *as, Reg r, int32_t i) ++{ ++ if (checki16(i)) { ++ //emit_ti(as, MIPSI_LI, r, i); // MIPSI_LI = MIPSI_ADDIU ++ emit_addw(as, r, RID_R0, i); ++ } else { ++ if ((i & 0xffff)) { ++ intptr_t jgl = (intptr_t)(void *)J2G(as->J); ++ if ((uintptr_t)(i-jgl) < 65536) { ++ //emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768); ++ emit_addw(as, r, RID_JGL, i-jgl-32768); ++ return; ++ } else if (emit_kdelta1(as, r, i)) { ++ return; ++ } else if ((i >> 16) == 0) { ++ //emit_tsi(as, MIPSI_ORI, r, RID_ZERO, i); ++ emit_dji(as, LAI_ORI, r, RID_R20, i&0xfff); ++ emit_dji(as, LAI_SLLI_W, RID_R20, RID_R20, 12); ++ emit_dji(as, LAI_ORI, RID_R20, RID_R0, (i>>12)&0xf); ++ return; ++ } ++ //emit_tsi(as, MIPSI_ORI, r, r, i); ++ emit_djk(as, LAI_OR, r, r, RID_R19); ++ emit_dji(as, LAI_ORI, RID_R19, RID_R20, i&0xfff); ++ emit_dji(as, LAI_SLLI_W, RID_R20, RID_R20, 12); ++ emit_dji(as, LAI_ORI, RID_R20, RID_R0, (i>>12)&0xf); ++ } ++ //emit_ti(as, MIPSI_LUI, r, (i >> 16)); ++ emit_dji(as, LAI_ADDU16I_D, r, RID_R0, (i>>16)&0xffff); ++ } ++} ++ ++#if LJ_64 ++/* Load a 64 bit constant into a GPR. */ ++static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) ++{ ++ if (checki32((int64_t)u64)) { ++ emit_loadi(as, r, (int32_t)u64); ++ } else { ++ uint64_t delta = u64 - (uint64_t)(void *)J2G(as->J); ++ if (delta < 65536) { ++ //emit_tsi(as, MIPSI_DADDIU, r, RID_JGL, (int32_t)(delta-32768)); ++ emit_add(as, r, RID_JGL, (int32_t)(delta-32768)); ++ } else if (emit_kdelta1(as, r, (intptr_t)u64)) { ++ return; ++ } else { ++ *--as->mcp = LAI_LU52I_D | LAF_D(r) | LAF_J(r) | LAF_I((u64>>52)&0xfff); ++ *--as->mcp = LAI_LU32I_D | LAF_D(r) | LAF_I20((u64>>32)&0xfffff); ++ *--as->mcp = LAI_ORI | LAF_D(r) | LAF_J(r) | LAF_I(u64&0xfff); ++ *--as->mcp = LAI_LU12I_W | LAF_D(r) | LAF_I20((u64>>12)&0xfffff); ++ } ++ /* TODO: There are probably more optimization opportunities. */ ++ } ++} ++ ++#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr))) ++#else ++#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) ++#endif ++ ++static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); ++static void ra_allockreg(ASMState *as, intptr_t k, Reg r); ++ ++/* Get/set from constant pointer. */ ++static void emit_lsptr(ASMState *as, LAIns lai, Reg r, void *p, RegSet allow) ++{ ++ intptr_t jgl = (intptr_t)(J2G(as->J)); ++ intptr_t i = (intptr_t)(p); ++ Reg base; ++ if ((uint32_t)(i-jgl) < 65536) { //TODO ++ i = i-jgl-32768; ++ base = RID_JGL; ++ } else { ++ base = ra_allock(as, i-(int16_t)i, allow); ++ } ++ // emit_dji(as, lai, r, base, i&0xfff); /* ld.d rd, rj, si12 */ ++ if ((i>>12) == 0) { ++ emit_dji(as, lai, r, base, i&0xfff); ++ } ++ else { ++ /* ld.d->ldx.d, fld.d->fldx.d, ld.s->fldx.s */ ++ if (lai == LAI_LD_D) ++ lai = LAI_LDX_D; ++ else if (lai == LAI_FLD_D) ++ lai = LAI_FLDX_D; ++ else if (lai == LAI_FLD_S) ++ lai = LAI_FLDX_S; ++ emit_djk(as, lai, r, base, RID_R19); ++ ++ /* move i to a GPR */ ++ emit_d16i(as, RID_R19, i); ++ } ++} ++ ++/* Load 64 bit IR constant into register. */ ++static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) ++{ ++ const uint64_t *k = &ir_k64(ir)->u64; ++ Reg r64 = r; ++ if (rset_test(RSET_FPR, r)) { ++ r64 = RID_TMP; ++ emit_dj(as, LAI_MOVGR2FR_D, r, r64); ++ } ++ if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536) ++ emit_lsptr(as, LAI_LD_D, r64, (void *)k, 0); /*To copy a doubleword from a GPR to an FPR*/ ++ else ++ emit_loadu64(as, r64, *k); ++} ++ ++/* Get/set global_State fields. */ ++static void emit_lsglptr2(ASMState *as, LAIns lai, Reg r, int32_t ofs) ++{ ++ emit_djk(as, lai, r, RID_JGL, RID_R20); ++ emit_loadi(as, RID_R20, (ofs-32768)); ++} ++ ++#define emit_getgl(as, r, field) \ ++ emit_lsglptr2(as, LAI_LDX_D, (r), (int32_t)offsetof(global_State, field)) ++#define emit_setgl(as, r, field) \ ++ emit_lsglptr2(as, LAI_STX_D, (r), (int32_t)offsetof(global_State, field)) ++ ++/* Trace number is determined from per-trace exit stubs. */ ++#define emit_setvmstate(as, i) UNUSED(i) ++ ++/* -- Emit control-flow instructions -------------------------------------- */ ++ ++/* Label for internal jumps. */ ++typedef MCode *MCLabel; ++ ++/* Return label pointing to current PC. */ ++#define emit_label(as) ((as)->mcp) ++ ++static void emit_branch(ASMState *as, LAIns lai, Reg rj, Reg rd, MCode *target) ++{ ++ MCode *p = as->mcp; ++ ptrdiff_t delta = target - (p - 1); ++ lua_assert(((delta + 0x8000) >> 16) == 0); ++ *--p = lai | LAF_D(rd) | LAF_J(rj) | LAF_I(((uint32_t)delta & 0xffffu)); /*BEQ BNE BGE BLZ*/ ++ as->mcp = p; ++} ++ ++static void emit_branch21(ASMState *as, LAIns lai, Reg rj, MCode *target) ++{ ++ MCode *p = as->mcp; ++ ptrdiff_t delta = target - (p - 1); ++ lua_assert(((delta + 0x100000) >> 21) == 0); ++ *--p = lai | LAF_J(rj) | LAF_I(((uint32_t)delta & 0xffffu)) | ((uint32_t)delta & 0x1f0000u); /*BEQZ BNEZ BCEQZ BCNEZ*/ ++ as->mcp = p; ++} ++ ++static void emit_jmp(ASMState *as, MCode *target) ++{ ++ MCode *p = as->mcp; ++ ptrdiff_t delta = target - (p - 1); ++ emit_b_bl(as, LAI_B, (delta&0x3ffffff)); /*offs 26*/ ++} ++ ++#define emit_move(as, dst, src) \ ++ emit_djk(as, LAI_OR, (dst), (src), RID_ZERO) ++ ++static void emit_call(ASMState *as, void *target) ++{ ++ RegSet pick = as->freeset & RID2RSET(RID_CFUNCADDR); ++ if (!pick) { ++ Reg r = rset_picktop(as->freeset & RSET_GPR); ++ rset_clear(as->freeset, r); ++ emit_move(as, RID_CFUNCADDR, r); ++ emit_dji(as, LAI_JIRL, RID_RA, RID_CFUNCADDR, 0); ++ //emit_dj32i(as, RID_CFUNCADDR, RID_ZERO, (intptr_t)target); ++ emit_add(as, RID_CFUNCADDR, RID_ZERO, (intptr_t)target); ++ emit_move(as, r, RID_CFUNCADDR); ++ rset_set(as->freeset, (r)); ++ } else { ++ emit_dji(as, LAI_JIRL, RID_RA, RID_CFUNCADDR, 0); ++ ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR); ++ } ++} ++ ++/* -- Emit generic operations --------------------------------------------- */ ++ ++/* Generic move between two regs. */ ++static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) ++{ ++ if (dst < RID_MAX_GPR) ++ emit_move(as, dst, src); ++ else ++ emit_dj(as, irt_isnum(ir->t) ? LAI_FMOV_D : LAI_FMOV_S, dst, src); ++} ++ ++/* Generic load of register with base and (small) offset address. */ ++static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) ++{ ++ if (r < RID_MAX_GPR) { ++ //emit_dji(as, irt_is64(ir->t) ? LAI_LD_D : LAI_LD_W, r, base, ofs&0xfff); ++ emit_djk(as, irt_is64(ir->t) ? LAI_LDX_D : LAI_LDX_W, r, base, RID_R19); ++ } else { ++ //emit_dji(as, irt_isnum(ir->t) ? LAI_FLD_D : LAI_FLD_S, r, base, ofs&0xfff); ++ emit_djk(as, irt_isnum(ir->t) ? LAI_FLDX_D : LAI_FLDX_S, r, base, RID_R19); ++ } ++ emit_d16i(as, RID_R19, ofs); ++} ++ ++/* Generic store of register with base and (small) offset address. */ ++static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) ++{ ++ if (r < RID_MAX_GPR) { ++ //emit_dji(as, irt_is64(ir->t) ? LAI_ST_D : LAI_ST_W, r, base, ofs&0xfff); ++ emit_djk(as, irt_is64(ir->t) ? LAI_STX_D : LAI_STX_W, r, base, RID_R19); ++ } else { ++ //emit_dji(as, irt_isnum(ir->t) ? LAI_FST_D : LAI_FST_S, ++ // (r&31), base, ofs&0xfff); ++ emit_djk(as, irt_isnum(ir->t) ? LAI_FSTX_D : LAI_FSTX_S, (r&31), base, RID_R19); ++ } ++ emit_d16i(as, RID_R19, ofs); ++} ++ ++/* Add offset to pointer. */ ++static void emit_addptr(ASMState *as, Reg r, int32_t ofs) ++{ ++ if (ofs) { ++ lua_assert(checki16(ofs)); ++ //emit_dji(as, LAI_ADDI_D, r, r, ofs&0xfff); //TODO 12bit -> 16bit ++ emit_djk(as, LAI_ADD_D, r, r, RID_R19); ++ emit_d16i(as, RID_R19, ofs); ++ } ++} ++ ++ ++#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) +diff --git a/libs/luajit/LuaJIT-src/src/lj_frame.h b/libs/luajit/LuaJIT-src/src/lj_frame.h +index 19c49a4..d129530 100644 +--- a/libs/luajit/LuaJIT-src/src/lj_frame.h ++++ b/libs/luajit/LuaJIT-src/src/lj_frame.h +@@ -264,6 +264,24 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ + #endif + #define CFRAME_OFS_MULTRES 0 + #define CFRAME_SHIFT_MULTRES 3 ++#elif LJ_TARGET_LOONGARCH64 //TODO ++#if LJ_ARCH_HASFPU ++#define CFRAME_OFS_ERRF 188 ++#define CFRAME_OFS_NRES 184 ++#define CFRAME_OFS_PREV 176 ++#define CFRAME_OFS_L 168 ++#define CFRAME_OFS_PC 160 ++#define CFRAME_SIZE 192 ++#else ++#define CFRAME_OFS_ERRF 124 ++#define CFRAME_OFS_NRES 120 ++#define CFRAME_OFS_PREV 112 ++#define CFRAME_OFS_L 104 ++#define CFRAME_OFS_PC 96 ++#define CFRAME_SIZE 128 ++#endif ++#define CFRAME_OFS_MULTRES 0 ++#define CFRAME_SHIFT_MULTRES 3 + #else + #error "Missing CFRAME_* definitions for this architecture" + #endif +diff --git a/libs/luajit/LuaJIT-src/src/lj_gdbjit.c b/libs/luajit/LuaJIT-src/src/lj_gdbjit.c +index c219ffa..5ac5db7 100644 +--- a/libs/luajit/LuaJIT-src/src/lj_gdbjit.c ++++ b/libs/luajit/LuaJIT-src/src/lj_gdbjit.c +@@ -306,6 +306,9 @@ enum { + #elif LJ_TARGET_MIPS + DW_REG_SP = 29, + DW_REG_RA = 31, ++#elif LJ_TARGET_LOONGARCH64 ++ DW_REG_SP = 3, ++ DW_REG_RA = 1, + #else + #error "Unsupported target architecture" + #endif +@@ -383,6 +386,8 @@ static const ELFheader elfhdr_template = { + .machine = 20, + #elif LJ_TARGET_MIPS + .machine = 8, ++#elif LJ_TARGET_LOONGARCH64 ++ .machine = 258, + #else + #error "Unsupported target architecture" + #endif +@@ -591,6 +596,13 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx) + for (i = 23; i >= 16; i--) { DB(DW_CFA_offset|i); DUV(26-i); } + for (i = 30; i >= 20; i -= 2) { DB(DW_CFA_offset|32|i); DUV(42-i); } + } ++#elif LJ_TARGET_LOONGARCH64 ++ { ++ int i; ++ DB(DW_CFA_offset|30); DUV(2); //TODO ++ for (i = 30; i >= 23; i--) { DB(DW_CFA_offset|i); DUV(3+(30-i)); } ++ for (i = 31; i >= 24; i--) { DB(DW_CFA_offset|32|i); DUV(42-i); } //TODO ++ } + #else + #error "Unsupported target architecture" + #endif +diff --git a/libs/luajit/LuaJIT-src/src/lj_ircall.h b/libs/luajit/LuaJIT-src/src/lj_ircall.h +index 973c36e..e136526 100644 +--- a/libs/luajit/LuaJIT-src/src/lj_ircall.h ++++ b/libs/luajit/LuaJIT-src/src/lj_ircall.h +@@ -84,6 +84,12 @@ typedef struct CCallInfo { + #define IRCALLCOND_SOFTFP_MIPS(x) NULL + #endif + ++#if LJ_SOFTFP && LJ_TARGET_LOONGARCH64 ++#define IRCALLCOND_SOFTFP_LOONGARCH64(x) x ++#else ++#define IRCALLCOND_SOFTFP_LOONGARCH64(x) NULL ++#endif ++ + #define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS32) + + #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) +@@ -272,7 +278,7 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; + #define fp64_f2l __aeabi_f2lz + #define fp64_f2ul __aeabi_f2ulz + #endif +-#elif LJ_TARGET_MIPS ++#elif LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64 + #define softfp_add __adddf3 + #define softfp_sub __subdf3 + #define softfp_mul __muldf3 +@@ -308,7 +314,7 @@ extern float softfp_ui2f(uint32_t a); + extern int32_t softfp_f2i(float a); + extern uint32_t softfp_f2ui(float a); + #endif +-#if LJ_TARGET_MIPS ++#if LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64 + extern double lj_vm_sfmin(double a, double b); + extern double lj_vm_sfmax(double a, double b); + #endif +diff --git a/libs/luajit/LuaJIT-src/src/lj_jit.h b/libs/luajit/LuaJIT-src/src/lj_jit.h +index 92054e3..db4b7f4 100644 +--- a/libs/luajit/LuaJIT-src/src/lj_jit.h ++++ b/libs/luajit/LuaJIT-src/src/lj_jit.h +@@ -55,6 +55,13 @@ + #else + #define JIT_F_CPUSTRING "\010MIPS64R2" + #endif ++ ++#elif LJ_TARGET_LOONGARCH64 ++#define JIT_F_CPU 0x00000010 ++#define JIT_F_GS464V (JIT_F_CPU << 0) ++#define JIT_F_CPU_FIRST JIT_F_GS464V ++#define JIT_F_CPUSTRING "\6GS464V" ++ + #else + #define JIT_F_CPU_FIRST 0 + #define JIT_F_CPUSTRING "" +@@ -335,7 +342,7 @@ enum { + LJ_K64_M2P64_31 = LJ_K64_M2P64, + #endif + #endif +-#if LJ_TARGET_MIPS ++#if LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64 + LJ_K64_2P31, /* 2^31 */ + #if LJ_64 + LJ_K64_2P63, /* 2^63 */ +@@ -353,10 +360,10 @@ enum { + LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ + LJ_K32_2P52, /* 2^52 */ + #endif +-#if LJ_TARGET_PPC || LJ_TARGET_MIPS ++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64 + LJ_K32_2P31, /* 2^31 */ + #endif +-#if LJ_TARGET_MIPS64 ++#if LJ_TARGET_MIPS64 || LJ_TARGET_LOONGARCH64 + LJ_K32_2P63, /* 2^63 */ + LJ_K32_M2P64, /* -2^64 */ + #endif +diff --git a/libs/luajit/LuaJIT-src/src/lj_target.h b/libs/luajit/LuaJIT-src/src/lj_target.h +index 8dcae95..52a0a7e 100644 +--- a/libs/luajit/LuaJIT-src/src/lj_target.h ++++ b/libs/luajit/LuaJIT-src/src/lj_target.h +@@ -55,7 +55,7 @@ typedef uint32_t RegSP; + /* Bitset for registers. 32 registers suffice for most architectures. + ** Note that one set holds bits for both GPRs and FPRs. + */ +-#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 ++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_LOONGARCH64 + typedef uint64_t RegSet; + #else + typedef uint32_t RegSet; +@@ -69,7 +69,7 @@ typedef uint32_t RegSet; + #define rset_set(rs, r) (rs |= RID2RSET(r)) + #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) + #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) +-#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 ++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_LOONGARCH64 + #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) + #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) + #else +@@ -144,6 +144,8 @@ typedef uint32_t RegCost; + #include "lj_target_ppc.h" + #elif LJ_TARGET_MIPS + #include "lj_target_mips.h" ++#elif LJ_TARGET_LOONGARCH64 ++#include "lj_target_loongarch64.h" + #else + #error "Missing include for target CPU" + #endif +diff --git a/libs/luajit/LuaJIT-src/src/lj_target_loongarch64.h b/libs/luajit/LuaJIT-src/src/lj_target_loongarch64.h +new file mode 100644 +index 0000000..6d96b45 +--- /dev/null ++++ b/libs/luajit/LuaJIT-src/src/lj_target_loongarch64.h +@@ -0,0 +1,339 @@ ++/* ++** Definitions for LoongArch CPUs. ++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h ++** Copyright (C) 2021 Loongson Technology. All rights reserved. ++*/ ++ ++#ifndef _LJ_TARGET_LOONGARCH_H ++#define _LJ_TARGET_LOONGARCH_H ++ ++/* -- Registers IDs ------------------------------------------------------- */ ++ ++#define GPRDEF(_) \ ++ _(R0) _(RA) _(R2) _(SP) _(R4) _(R5) _(R6) _(R7) \ ++ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \ ++ _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \ ++ _(R24) _(R25) _(R26) _(R27) _(R28) _(R29) _(R30) _(R31) ++#if LJ_SOFTFP ++#define FPRDEF(_) ++#else ++#define FPRDEF(_) \ ++ _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ ++ _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ ++ _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ ++ _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) ++#endif ++#define VRIDDEF(_) ++ ++#define RIDENUM(name) RID_##name, ++ ++enum { ++ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ ++ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ ++ RID_MAX, ++ RID_ZERO = RID_R0, ++ RID_TMP = RID_RA, //TODO ++ RID_GP = RID_R31, ++ ++ /* Calling conventions. */ ++ RID_RET = RID_R4, ++ ++ RID_RETHI = RID_R18, ++ RID_RETLO = RID_R17, ++ ++#if LJ_SOFTFP ++ RID_FPRET = RID_R17, ++#else ++ RID_FPRET = RID_F0, ++#endif ++ RID_CFUNCADDR = RID_R16, ++ ++ /* These definitions must match with the *.dasc file(s): */ ++ RID_BASE = RID_R23, /* Interpreter BASE. */ ++ RID_LPC = RID_R25, /* Interpreter PC. */ ++ RID_DISPATCH = RID_R26, /* Interpreter DISPATCH table. */ ++ RID_LREG = RID_R27, /* Interpreter L. */ ++ RID_JGL = RID_R22, /* On-trace: global_State + 32768. */ ++ ++ /* Register ranges [min, max) and number of registers. */ ++ RID_MIN_GPR = RID_R0, ++ RID_MAX_GPR = RID_R31+1, ++ RID_MIN_FPR = RID_MAX_GPR, ++#if LJ_SOFTFP ++ RID_MAX_FPR = RID_MIN_FPR, ++#else ++ RID_MAX_FPR = RID_F31+1, ++#endif ++ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, ++ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR ++}; ++ ++#define RID_NUM_KREF RID_NUM_GPR ++#define RID_MIN_KREF RID_R0 ++ ++/* -- Register sets ------------------------------------------------------- */ ++ ++/* Make use of all registers, except ZERO, TMP, SP, JGL. */ ++#define RSET_FIXED \ ++ (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_R2)|\ ++ RID2RSET(RID_SP)|RID2RSET(RID_JGL)|RID2RSET(RID_R31)|\ ++ RID2RSET(RID_R19)|RID2RSET(RID_R20)) ++#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) ++#if LJ_SOFTFP ++#define RSET_FPR 0 ++#else ++#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) ++#endif ++#define RSET_ALL (RSET_GPR|RSET_FPR) ++#define RSET_INIT RSET_ALL ++ ++/* scratch register. */ ++#define RSET_SCRATCH_GPR \ ++ (RSET_RANGE(RID_R4, RID_R19)) ++#if LJ_SOFTFP ++#define RSET_SCRATCH_FPR 0 ++#else ++#define RSET_SCRATCH_FPR RSET_RANGE(RID_F0, RID_F23+1) ++#endif ++#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) ++#define REGARG_FIRSTGPR RID_R4 ++#define REGARG_LASTGPR RID_R11 ++#define REGARG_NUMGPR 8 ++#if LJ_ABI_SOFTFP ++#define REGARG_FIRSTFPR 0 ++#define REGARG_LASTFPR 0 ++#define REGARG_NUMFPR 0 ++#else ++#define REGARG_FIRSTFPR RID_F0 ++#define REGARG_LASTFPR RID_F7 ++#define REGARG_NUMFPR 8 ++#endif ++ ++/* -- Spill slots --------------------------------------------------------- */ ++ ++/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. ++** ++** SPS_FIXED: Available fixed spill slots in interpreter frame. ++** This definition must match with the *.dasc file(s). ++** ++** SPS_FIRST: First spill slot for general use. ++*/ ++#define SPS_FIXED 4 ++#define SPS_FIRST 4 //TODO ++ ++#define SPOFS_TMP 0 ++ ++#define sps_scale(slot) (4 * (int32_t)(slot)) ++#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) //TODO ++ ++/* -- Exit state ---------------------------------------------------------- */ ++ ++/* This definition must match with the *.dasc file(s). */ ++typedef struct { ++#if !LJ_SOFTFP ++ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ ++#endif ++ intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ ++ int32_t spill[256]; /* Spill slots. */ ++} ExitState; ++ ++/* Highest exit + 1 indicates stack check. */ ++#define EXITSTATE_CHECKEXIT 1 ++ ++/* Return the address of a per-trace exit stub. */ ++static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p) ++{ ++ while (*p == 0x03400000) p++; /* Skip LAI_NOP. */ ++ return p; ++} ++/* Avoid dependence on lj_jit.h if only including lj_target.h. */ ++#define exitstub_trace_addr(T, exitno) \ ++ exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode)) ++ ++/* -- Instructions -------------------------------------------------------- */ ++ ++/* Instruction fields. */ ++#define LAF_D(r) (r) ++#define LAF_J(r) ((r) << 5) ++#define LAF_K(r) ((r) << 10) ++#define LAF_A(r) ((r) << 15) ++#define LAF_FC(r) ((r) << 5) ++#define LAF_I(n) ((n) << 10) ++#define LAF_I20(n) ((n) << 5) ++#define LAF_M(n) ((n) << 16) ++ ++/* Check for valid field range. */ ++#define LAF_S_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0) ++ ++typedef enum LAIns { ++/* Integer instructions. */ ++ LAI_MOVE = 0x00150000, ++ LAI_NOP = 0x03400000, ++ ++ LAI_LU = 0x03800000, ++ ++ LAI_AND = 0x00148000, ++ LAI_ANDI = 0x03400000, ++ LAI_OR = 0x00150000, ++ LAI_ORI = 0x03800000, ++ LAI_XOR = 0x00158000, ++ LAI_XORI = 0x03c00000, ++ LAI_NOR = 0x00140000, ++ ++ LAI_SLT = 0x00120000, ++ LAI_SLTU = 0x00128000, ++ LAI_SLTI = 0x02000000, ++ LAI_SLTUI = 0x02400000, ++ ++ LAI_ADD_W = 0x00100000, ++ LAI_ADDI_W = 0x02800000, ++ LAI_SUB_W = 0x00110000, ++ LAI_MUL_W = 0x001c0000, ++ LAI_MULH_W = 0x001c8000, ++ LAI_DIV_W = 0x00200000, ++ LAI_DIV_WU = 0x00210000, ++ ++ LAI_SLLI_W = 0x00408000, ++ LAI_SRLI_W = 0x00448000, ++ LAI_SRAI_W = 0x00488000, ++ LAI_ROTRI_W = 0x004c8000, ++ LAI_ROTRI_D = 0x004d0000, ++ LAI_SLL_W = 0x00170000, ++ LAI_SRL_W = 0x00178000, ++ LAI_SRA_W = 0x00180000, ++ LAI_ROTR_W = 0x001b0000, ++ LAI_ROTR_D = 0x001b8000, ++ ++ LAI_EXT_W_B = 0x00005c00, ++ LAI_EXT_W_H = 0x00005800, ++ LAI_REVB_2H = 0x00003000, ++ LAI_REVB_4H = 0x00003400, ++ ++ LAI_ALSL_W = 0x00040000, ++ LAI_ALSL_D = 0x002c0000, ++ ++ LAI_B = 0x50000000, ++ LAI_BL = 0x54000000, ++ LAI_JIRL = 0x4c000000, ++ ++ LAI_BEQ = 0x58000000, ++ LAI_BNE = 0x5c000000, ++ LAI_BLT = 0x60000000, ++ LAI_BGE = 0x64000000, ++ LAI_BCEQZ = 0x48000000, ++ LAI_BCNEZ = 0x48000100, ++ ++ /* Load/store instructions. */ ++ LAI_LD_W = 0x28800000, ++ LAI_LD_D = 0x28c00000, ++ LAI_ST_W = 0x29800000, ++ LAI_ST_D = 0x29c00000, ++ LAI_LD_B = 0x28000000, ++ LAI_ST_B = 0x29000000, ++ LAI_LD_H = 0x28400000, ++ LAI_ST_H = 0x29400000, ++ LAI_LD_BU = 0x2a000000, ++ LAI_LD_HU = 0x2a400000, ++ LAI_LDX_B = 0x38000000, ++ LAI_LDX_BU = 0x38200000, ++ LAI_LDX_H = 0x38040000, ++ LAI_LDX_HU = 0x38240000, ++ LAI_LDX_D = 0x380c0000, ++ LAI_STX_D = 0x381c0000, ++ LAI_LDX_W = 0x38080000, ++ LAI_STX_W = 0x38180000, ++ LAI_FLD_S = 0x2b000000, ++ LAI_FST_S = 0x2b400000, ++ LAI_FLD_D = 0x2b800000, ++ LAI_FST_D = 0x2bc00000, ++ LAI_FLDX_D = 0x38340000, ++ LAI_FLDX_S = 0x38300000, ++ LAI_FSTX_D = 0x383c0000, ++ LAI_FSTX_S = 0x38380000, ++ ++ /* LA64 instructions. */ ++ LAI_ADD_D = 0x00108000, ++ LAI_ADDI_D = 0x02c00000, ++ LAI_ADDU16I_D = 0x10000000, ++ LAI_LU12I_W = 0x14000000, ++ LAI_LU32I_D = 0x16000000, ++ LAI_LU52I_D = 0x3000000, ++ LAI_SUB_D = 0x00118000, ++ LAI_DIV_D = 0x00220000, ++ LAI_DIV_DU = 0x00230000, ++ LAI_MUL_D = 0x001d8000, ++ ++ LAI_SLLI_D = 0x00410000, ++ LAI_SRLI_D = 0x00450000, ++ LAI_SLL_D = 0x00188000, ++ LAI_SRL_D = 0x00190000, ++ LAI_SRAI_D = 0x00490000, ++ LAI_SRA_D = 0x00198000, ++ LAI_REVH_D = 0x00004400, ++ ++ /* Extract/insert instructions. */ ++ LAI_BSTRPICK_D = 0x00c00000, ++ LAI_BSTRINS_W = 0x00600000, ++ LAI_BSTRINS_D = 0x00800000, ++ ++ LAI_MASKEQZ = 0x00130000, ++ LAI_MASKNEZ = 0x00138000, ++ ++ LAI_FRINT_S = 0x011e4400, ++ LAI_FRINT_D = 0x011e4800, ++ LAI_FTINTRM_L_D = 0x011a2800, ++ LAI_FTINTRP_L_D = 0x011a6800, ++ LAI_FTINTRNE_L_D = 0x011ae800, ++ ++ /* FP instructions. */ ++ LAI_FMOV_S = 0x01149400, ++ LAI_FMOV_D = 0x01149800, ++ ++ LAI_FABS_D = 0x01140800, ++ LAI_FNEG_D = 0x01141800, ++ ++ LAI_FADD_D = 0x01010000, ++ LAI_FSUB_D = 0x01030000, ++ LAI_FMUL_D = 0x01050000, ++ LAI_FDIV_D = 0x01070000, ++ LAI_FSQRT_D = 0x01144800, ++ ++ LAI_FMIN_D = 0x010b0000, ++ LAI_FMAX_D = 0x01090000, ++ ++ LAI_FADD_S = 0x01008000, ++ LAI_FSUB_S = 0x01028000, ++ ++ LAI_FCVT_D_S = 0x01192400, ++ LAI_FTINT_W_S = 0x011b0400, ++ LAI_FCVT_S_D = 0x01191800, ++ LAI_FTINT_W_D = 0x011b0800, ++ LAI_FFINT_S_W = 0x011d1000, ++ LAI_FFINT_D_W = 0x011d2000, ++ LAI_FFINT_S_L = 0x011d1800, ++ LAI_FFINT_D_L = 0x011d2800, ++ ++ LAI_FTINTRZ_W_S = 0x011a8400, ++ LAI_FTINTRZ_W_D = 0x011a8800, ++ LAI_FTINTRZ_L_S = 0x011aa400, ++ LAI_FTINTRZ_L_D = 0x011aa800, ++ LAI_FTINTRM_W_S = 0x011a0400, ++ LAI_FTINTRM_W_D = 0x011a0800, ++ ++ LAI_MOVFR2GR_S = 0x0114b400, ++ LAI_MOVGR2FR_W = 0x0114a400, ++ LAI_MOVGR2FR_D = 0x0114a800, ++ LAI_MOVFR2GR_D = 0x0114b800, ++ ++ LAI_FCMP_CEQ_D = 0x0c220000, ++ LAI_FCMP_CLT_S = 0x0c110000, ++ LAI_FCMP_CLT_D = 0x0c210000, ++ LAI_FCMP_CLE_D = 0x0c230000, ++ LAI_FCMP_CULE_D = 0x0c270000, ++ LAI_FCMP_CULT_D = 0x0c250000, ++ LAI_FCMP_CNE_D = 0x0c280000, ++ LAI_FSEL = 0x0d000000, ++} LAIns; ++ ++#endif ++ +diff --git a/libs/luajit/LuaJIT-src/src/lj_trace.c b/libs/luajit/LuaJIT-src/src/lj_trace.c +index d85b47f..021fd49 100644 +--- a/libs/luajit/LuaJIT-src/src/lj_trace.c ++++ b/libs/luajit/LuaJIT-src/src/lj_trace.c +@@ -325,17 +325,17 @@ void lj_trace_initstate(global_State *g) + J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000); + J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000; + #endif +-#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 ++#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 || LJ_TARGET_LOONGARCH64 + J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); + #endif + #if LJ_TARGET_PPC + J->k32[LJ_K32_2P52_2P31] = 0x59800004; + J->k32[LJ_K32_2P52] = 0x59800000; + #endif +-#if LJ_TARGET_PPC || LJ_TARGET_MIPS ++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64 + J->k32[LJ_K32_2P31] = 0x4f000000; + #endif +-#if LJ_TARGET_MIPS ++#if LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64 + J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000); + #if LJ_64 + J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000); +diff --git a/libs/luajit/LuaJIT-src/src/lj_vm.h b/libs/luajit/LuaJIT-src/src/lj_vm.h +index 1cc7eed..8bad4e6 100644 +--- a/libs/luajit/LuaJIT-src/src/lj_vm.h ++++ b/libs/luajit/LuaJIT-src/src/lj_vm.h +@@ -54,7 +54,8 @@ LJ_ASMF void lj_vm_exit_handler(void); + LJ_ASMF void lj_vm_exit_interp(void); + + /* Internal math helper functions. */ +-#if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP) ++#if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP)\ ++|| (LJ_TARGET_LOONGARCH64 && LJ_ABI_SOFTFP) + #define lj_vm_floor floor + #define lj_vm_ceil ceil + #else +diff --git a/libs/luajit/LuaJIT-src/src/lj_vmmath.c b/libs/luajit/LuaJIT-src/src/lj_vmmath.c +index b231d3e..8484220 100644 +--- a/libs/luajit/LuaJIT-src/src/lj_vmmath.c ++++ b/libs/luajit/LuaJIT-src/src/lj_vmmath.c +@@ -57,7 +57,7 @@ double lj_vm_foldarith(double x, double y, int op) + } + } + +-#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS ++#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64 + int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) + { + uint32_t y, ua, ub; +diff --git a/libs/luajit/LuaJIT-src/src/vm_loongarch64.dasc b/libs/luajit/LuaJIT-src/src/vm_loongarch64.dasc +new file mode 100644 +index 0000000..b91092a +--- /dev/null ++++ b/libs/luajit/LuaJIT-src/src/vm_loongarch64.dasc +@@ -0,0 +1,5219 @@ ++|// Low-level VM code for LoongArch CPUs. ++|// Bytecode interpreter, fast functions and helper functions. ++|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h ++|// Copyright (C) 2021 Loongson Technology. All rights reserved. ++| ++|.arch loongarch64 ++|.section code_op, code_sub ++| ++|.actionlist build_actionlist ++|.globals GLOB_ ++|.globalnames globnames ++|.externnames extnames ++| ++|// Note: The ragged indentation of the instructions is intentional. ++|// The starting columns indicate data dependencies. ++| ++|//----------------------------------------------------------------------- ++| ++|// Fixed register assignments for the interpreter. ++|// Don't use: r0 = 0, r1 = ra, r2 = tp, r3 = sp, r21 = reserved ++| ++|.macro .FPU, a, b, c ++|.if FPU ++| a, b, c ++|.endif ++|.endmacro ++| ++|.macro .FPU2, a, b ++|.if FPU ++| a, b ++|.endif ++|.endmacro ++| ++|.macro .LI, a, b ++| addu16i.d r20, r0, b ++| srai.d r20, r20, 16 ++| or a, r0, r20 ++|.endmacro ++| ++|.macro .LUI, a, b ++| addi.w a, r0, b>>5 ++| slli.w a, a, 5 ++| ori a, a, b&0x1f ++| slli.w a, a, 16 ++|.endmacro ++| ++|.macro .STXW, a, b, c ++| addu16i.d r20, r0, c ++| srai.d r20, r20, 16 ++| stx.w a, b, r20 ++|.endmacro ++| ++|.macro .STXD, a, b, c ++| addu16i.d r20, r0, c ++| srai.d r20, r20, 16 ++| stx.d a, b, r20 ++|.endmacro ++| ++|.macro .LDXW, a, b, c ++| addu16i.d r20, r0, c ++| srai.d r20, r20, 16 ++| ldx.w a, b, r20 ++|.endmacro ++| ++|.macro .LDXD, a, b, c ++| addu16i.d r20, r0, c ++| srai.d r20, r20, 16 ++| ldx.d a, b, r20 ++|.endmacro ++| ++|.macro .LDXBU, a, b, c ++| addu16i.d r20, r0, c ++| srai.d r20, r20, 16 ++| ldx.bu a, b, r20 ++|.endmacro ++| ++|.macro .DADDIU, a, b, c ++| addu16i.d r20, r0, c ++| srai.d r20, r20, 16 ++| add.d a, b, r20 ++|.endmacro ++| ++|// The following must be C callee-save (but BASE is often refetched). ++|.define BASE, r23 // Base of current Lua stack frame. ++|.define KBASE, r24 // Constants of current Lua function. ++|.define PC, r25 // Next PC. ++|.define DISPATCH, r26 // Opcode dispatch table. ++|.define LREG, r27 // Register holding lua_State (also in SAVE_L). ++|.define MULTRES, r28 // Size of multi-result: (nresults+1)*8. ++| ++|.define JGL, r22 // On-trace: global_State + 32768. ++| ++|// Constants for type-comparisons, stores and conversions. C callee-save. ++|.define TISNIL, r22 ++|.define TISNUM, r29 ++|.if FPU ++|.define TOBIT, f30 // 2^52 + 2^51. ++|.endif ++| ++|// The following temporaries are not saved across C calls, except for RA. ++|.define RA, r30 // Callee-save. ++|.define RB, r8 ++|.define RC, r9 ++|.define RD, r10 ++|.define INS, r11 ++| ++|.define AT, r19 ++|.define TMP0, r12 ++|.define TMP1, r13 ++|.define TMP2, r14 ++|.define TMP3, r15 ++| ++|// Loongarch lp64 calling convention. ++|.define CFUNCADDR, r16 ++|.define CARG1, r4 ++|.define CARG2, r5 ++|.define CARG3, r6 ++|.define CARG4, r7 ++|.define CARG5, r8 ++|.define CARG6, r9 ++|.define CARG7, r10 ++|.define CARG8, r11 ++| ++|.define CRET1, r4 ++|.define CRET2, r5 ++| ++|.if FPU ++|.define FARG1, f0 ++|.define FARG2, f1 ++|.define FARG3, f2 ++|.define FARG4, f3 ++|.define FARG5, f4 ++|.define FARG6, f5 ++|.define FARG7, f6 ++|.define FARG8, f7 ++| ++|.define FRET1, f22 ++|.define FRET2, f23 ++| ++|.define FTMP0, f18 ++|.define FTMP1, f19 ++|.define FTMP2, f20 ++| ++|.define FCC0, fcc0 ++|.define FCC1, fcc1 ++|.endif ++| ++|// Stack layout while in interpreter. Must match with lj_frame.h. ++|.if FPU // LoongArch64 hard-float. ++| ++|.define CFRAME_SPACE, 192 // Delta for sp. ++| ++|//----- 16 byte aligned, <-- sp entering interpreter ++|.define SAVE_ERRF, 188 // 32 bit values. ++|.define SAVE_NRES, 184 ++|.define SAVE_CFRAME, 176 // 64 bit values. ++|.define SAVE_L, 168 ++|.define SAVE_PC, 160 ++|//----- 16 byte aligned ++|.define SAVE_GPR_, 80 // .. 80+10*8: 64 bit GPR saves. ++|.define SAVE_FPR_, 16 // .. 16+8*8: 64 bit FPR saves. ++| ++|.else // LoongArch64 soft-float ++| ++|.define CFRAME_SPACE, 128 // Delta for sp. ++| ++|//----- 16 byte aligned, <-- sp entering interpreter ++|.define SAVE_ERRF, 124 // 32 bit values. ++|.define SAVE_NRES, 120 ++|.define SAVE_CFRAME, 112 // 64 bit values. ++|.define SAVE_L, 104 ++|.define SAVE_PC, 96 ++|//----- 16 byte aligned ++|.define SAVE_GPR_, 16 // .. 16+10*8: 64 bit GPR saves. ++| ++|.endif ++| ++|.define TMPX, 8 // Unused by interpreter, temp for JIT code. ++|.define TMPD, 0 ++|//----- 16 byte aligned ++| ++|.define TMPD_OFS, 0 ++| ++|//.define SAVE_MULTRES, sp, TMPD ++| ++|//----------------------------------------------------------------------- ++| ++|.macro saveregs ++| addi.d sp, sp, -CFRAME_SPACE ++| st.d ra, SAVE_GPR_+9*8(sp) ++| st.d r22, SAVE_GPR_+8*8(sp) ++| .FPU2 fst.d f31, SAVE_FPR_+7*8(sp) ++| st.d r30, SAVE_GPR_+7*8(sp) ++| .FPU2 fst.d f30, SAVE_FPR_+6*8(sp) ++| st.d r29, SAVE_GPR_+6*8(sp) ++| .FPU2 fst.d f29, SAVE_FPR_+5*8(sp) ++| st.d r28, SAVE_GPR_+5*8(sp) ++| .FPU2 fst.d f28, SAVE_FPR_+4*8(sp) ++| st.d r27, SAVE_GPR_+4*8(sp) ++| .FPU2 fst.d f27, SAVE_FPR_+3*8(sp) ++| st.d r26, SAVE_GPR_+3*8(sp) ++| .FPU2 fst.d f26, SAVE_FPR_+2*8(sp) ++| st.d r25, SAVE_GPR_+2*8(sp) ++| .FPU2 fst.d f25, SAVE_FPR_+1*8(sp) ++| st.d r24, SAVE_GPR_+1*8(sp) ++| .FPU2 fst.d f24, SAVE_FPR_+0*8(sp) ++| st.d r23, SAVE_GPR_+0*8(sp) ++|.endmacro ++| ++|.macro restoreregs_ret ++| ld.d ra, SAVE_GPR_+9*8(sp) ++| ld.d r22, SAVE_GPR_+8*8(sp) ++| ld.d r30, SAVE_GPR_+7*8(sp) ++| .FPU2 fld.d f31, SAVE_FPR_+7*8(sp) ++| ld.d r29, SAVE_GPR_+6*8(sp) ++| .FPU2 fld.d f30, SAVE_FPR_+6*8(sp) ++| ld.d r28, SAVE_GPR_+5*8(sp) ++| .FPU2 fld.d f29, SAVE_FPR_+5*8(sp) ++| ld.d r27, SAVE_GPR_+4*8(sp) ++| .FPU2 fld.d f28, SAVE_FPR_+4*8(sp) ++| ld.d r26, SAVE_GPR_+3*8(sp) ++| .FPU2 fld.d f27, SAVE_FPR_+3*8(sp) ++| ld.d r25, SAVE_GPR_+2*8(sp) ++| .FPU2 fld.d f26, SAVE_FPR_+2*8(sp) ++| ld.d r24, SAVE_GPR_+1*8(sp) ++| .FPU2 fld.d f25, SAVE_FPR_+1*8(sp) ++| ld.d r23, SAVE_GPR_+0*8(sp) ++| .FPU2 fld.d f24, SAVE_FPR_+0*8(sp) ++| addi.d sp, sp, CFRAME_SPACE ++| jirl r0, ra, 0 ++|.endmacro ++| ++|// Type definitions. Some of these are only used for documentation. ++|.type L, lua_State, LREG ++|.type GL, global_State ++|.type TVALUE, TValue ++|.type GCOBJ, GCobj ++|.type STR, GCstr ++|.type TAB, GCtab ++|.type LFUNC, GCfuncL ++|.type CFUNC, GCfuncC ++|.type PROTO, GCproto ++|.type UPVAL, GCupval ++|.type NODE, Node ++|.type NARGS8, int ++|.type TRACE, GCtrace ++|.type SBUF, SBuf ++| ++|//----------------------------------------------------------------------- ++| ++|// Trap for not-yet-implemented parts. TODO ++|.macro NYI; .long 0xf0f0f0f0; .endmacro ++| ++|//----------------------------------------------------------------------- ++| ++|// Access to frame relative to BASE. ++|.define FRAME_PC, -8 ++|.define FRAME_FUNC, -16 ++| ++|//----------------------------------------------------------------------- ++| ++|// Endian-specific defines. LoongArch is little endian. TODO ++|.define HI, 4 ++|.define LO, 0 ++|.define OFS_RD, 2 ++|.define OFS_RA, 1 ++|.define OFS_OP, 0 ++| ++|// Instruction decode. ++|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro ++|.macro decode_OP8a, dst, ins; andi dst, ins, 0xff; .endmacro ++|.macro decode_OP8b, dst; slli.w dst, dst, 3; .endmacro ++|.macro decode_RC8a, dst, ins; srli.w dst, ins, 13; .endmacro ++|.macro decode_RC8b, dst; andi dst, dst, 0x7f8; .endmacro ++|.macro decode_RD4b, dst; slli.w dst, dst, 2; .endmacro ++|.macro decode_RA8a, dst, ins; srli.w dst, ins, 5; .endmacro ++|.macro decode_RA8b, dst; andi dst, dst, 0x7f8; .endmacro ++|.macro decode_RB8a, dst, ins; srli.w dst, ins, 21; .endmacro ++|.macro decode_RB8b, dst; andi dst, dst, 0x7f8; .endmacro ++|.macro decode_RD8a, dst, ins; srli.w dst, ins, 16; .endmacro ++|.macro decode_RD8b, dst; slli.w dst, dst, 3; .endmacro ++|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro ++| ++|// Instruction fetch. ++|.macro ins_NEXT1 ++| ld.w INS, 0(PC) ++| addi.d PC, PC, 4 ++|.endmacro ++|// Instruction decode+dispatch. ++|.macro ins_NEXT2 ++| decode_OP8a TMP1, INS ++| decode_OP8b TMP1 ++| add.d TMP0, DISPATCH, TMP1 ++| decode_RD8a RD, INS ++| ld.d AT, 0(TMP0) ++| decode_RA8a RA, INS ++| decode_RD8b RD ++| decode_RA8b RA ++| jirl r0, AT, 0 ++|.endmacro ++|.macro ins_NEXT ++| ins_NEXT1 ++| ins_NEXT2 ++|.endmacro ++| ++|// Instruction footer. ++|.if 1 ++| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. ++| .define ins_next, ins_NEXT ++| .define ins_next_, ins_NEXT ++| .define ins_next1, ins_NEXT1 ++| .define ins_next2, ins_NEXT2 ++|.else ++| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. ++| // Affects only certain kinds of benchmarks (and only with -j off). ++| .macro ins_next ++| b ->ins_next ++| .endmacro ++| .macro ins_next1 ++| .endmacro ++| .macro ins_next2 ++| b ->ins_next ++| .endmacro ++| .macro ins_next_ ++| ->ins_next: ++| ins_NEXT ++| .endmacro ++|.endif ++| ++|// Call decode and dispatch. ++|.macro ins_callt ++| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC ++| ld.d PC, LFUNC:RB->pc ++| ld.w INS, 0(PC) ++| addi.d PC, PC, 4 ++| decode_OP8a TMP1, INS ++| decode_RA8a RA, INS ++| decode_OP8b TMP1 ++| decode_RA8b RA ++| add.d TMP0, DISPATCH, TMP1 ++| ld.d TMP0, 0(TMP0) ++| add.d RA, RA, BASE ++| jirl r0, TMP0, 0 ++|.endmacro ++| ++|.macro ins_call ++| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC ++| st.d PC, FRAME_PC(BASE) ++| ins_callt ++|.endmacro ++| ++|//----------------------------------------------------------------------- ++| ++|.macro branch_RD ++| srli.w TMP0, RD, 1 ++| .LUI AT, (-(BCBIAS_J*4 >> 16) & 65535) ++| add.w TMP0, TMP0, AT ++| add.d PC, PC, TMP0 ++|.endmacro ++| ++|// Assumes DISPATCH is relative to GL. ++#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) ++#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) ++| ++#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) ++| ++|.macro hotcheck, delta, target ++| srli.d TMP1, PC, 1 ++| andi TMP1, TMP1, 126 ++| add.d TMP1, TMP1, DISPATCH ++| ld.hu TMP2, GG_DISP2HOT(TMP1) ++| addi.w TMP2, TMP2, -delta ++| st.h TMP2, GG_DISP2HOT(TMP1) ++| blt TMP2, r0, target ++|.endmacro ++| ++|.macro hotloop ++| hotcheck HOTCOUNT_LOOP, ->vm_hotloop ++|.endmacro ++| ++|.macro hotcall ++| hotcheck HOTCOUNT_CALL, ->vm_hotcall ++|.endmacro ++| ++|// Set current VM state. Uses TMP0. ++|.macro li_vmstate, st; addi.w TMP0, r0, ~LJ_VMST_..st; .endmacro ++|.macro st_vmstate; .STXW TMP0, DISPATCH, DISPATCH_GL(vmstate); .endmacro ++| ++|// Move table write barrier back. Overwrites mark and tmp. ++|.macro barrierback, tab, mark, tmp, target ++| .LDXD tmp, DISPATCH, DISPATCH_GL(gc.grayagain) ++| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab) ++| .STXD tab, DISPATCH, DISPATCH_GL(gc.grayagain) ++| st.b mark, tab->marked ++| st.d tmp, tab->gclist ++| beq r0, r0, target ++|.endmacro ++| ++|// Clear type tag. Isolate lowest 47 bits of reg. ++|.macro cleartp, reg; bstrpick.d reg, reg, 46, 0; .endmacro ++|.macro cleartp, dst, reg; bstrpick.d dst, reg, 46, 0; .endmacro ++| ++|// Set type tag: Merge 17 type bits into bits [47, 63] of dst. ++|.macro settp, dst, tp; bstrins.d dst, tp, 63, 47; .endmacro ++| ++|// Extract (negative) type tag. ++|.macro gettp, dst, src; srai.d dst, src, 47; .endmacro ++| ++|// Macros to check the TValue type and extract the GCobj. Branch on failure. ++|.macro checktp, reg, tp, target ++| gettp AT, reg ++| addi.d AT, AT, tp ++| cleartp reg ++| bnez AT, target ++|.endmacro ++|.macro checktp, dst, reg, tp, target ++| gettp AT, reg ++| addi.d AT, AT, tp ++| cleartp dst, reg ++| bnez AT, target ++|.endmacro ++|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro ++|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro ++|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro ++|.macro checkint, reg, target ++| gettp AT, reg ++| bne AT, TISNUM, target ++|.endmacro ++|.macro checknum, reg, target ++| gettp AT, reg ++| sltui AT, AT, LJ_TISNUM ++|// or TMP0, r0, LJ_TISNUM ++|// sltu AT, AT, TMP0 ++| beqz AT, target ++|.endmacro ++| ++|.macro mov_false, reg ++| addi.d reg, r0, 0x0001 ++| slli.d reg, reg, 47 ++| nor reg, reg, r0 ++|.endmacro ++|.macro mov_true, reg ++| addi.d reg, r0, 0x0001 ++| slli.d reg, reg, 48 ++| nor reg, reg, r0 ++|.endmacro ++| ++|//----------------------------------------------------------------------- ++ ++/* Generate subroutines used by opcodes and other parts of the VM. */ ++/* The .code_sub section should be last to help static branch prediction. */ ++static void build_subroutines(BuildCtx *ctx) ++{ ++ |.code_sub ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Return handling ---------------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_returnp: ++ | // See vm_return. Also: TMP2 = previous base. ++ | andi AT, PC, FRAME_P ++ |// beqz AT, ->cont_dispatch ++ | ++ | // Return from pcall or xpcall fast func. ++ | mov_true TMP1 ++ | beqz AT, ->cont_dispatch ++ | ld.d PC, FRAME_PC(TMP2) // Fetch PC of previous frame. ++ | or BASE, TMP2, r0 // Restore caller base. ++ | // Prepending may overwrite the pcall frame, so do it at the end. ++ | st.d TMP1, -8(RA) // Prepend true to results. ++ | addi.d RA, RA, -8 ++ | ++ |->vm_returnc: ++ | addi.w RD, RD, 8 // RD = (nresults+1)*8. ++ | andi TMP0, PC, FRAME_TYPE ++ | addi.w CRET1, r0, LUA_YIELD ++ | beqz RD, ->vm_unwind_c_eh ++ | or MULTRES, RD, r0 ++ | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua. ++ | ++ |->vm_return: ++ | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return ++ | // TMP0 = PC & FRAME_TYPE ++ | addi.w TMP2, r0, -8 ++ | xori AT, TMP0, FRAME_C ++ | and TMP2, PC, TMP2 ++ | sub.d TMP2, BASE, TMP2 // TMP2 = previous base. ++ | bnez AT, ->vm_returnp ++ | ++ | addi.w TMP1, RD, -8 ++ | st.d TMP2, L->base ++ | li_vmstate C ++ | ld.w TMP2, SAVE_NRES(sp) ++ | addi.d BASE, BASE, -16 ++ | st_vmstate ++ | slli.w TMP2, TMP2, 3 ++ | beqz TMP1, >2 ++ |1: ++ | addi.w TMP1, TMP1, -8 ++ | ld.d CRET1, 0(RA) ++ | addi.d RA, RA, 8 ++ | st.d CRET1, 0(BASE) ++ | addi.d BASE, BASE, 8 ++ | bnez TMP1, <1 ++ | ++ |2: ++ | bne TMP2, RD, >6 ++ |3: ++ | st.d BASE, L->top // Store new top. ++ | ++ |->vm_leave_cp: ++ | ld.d TMP0, SAVE_CFRAME(sp) // Restore previous C frame. ++ | or CRET1, r0, r0 // Ok return status for vm_pcall. ++ | st.d TMP0, L->cframe ++ | ++ |->vm_leave_unw: ++ | restoreregs_ret ++ | ++ |6: ++ | ld.d TMP1, L->maxstack ++ | slt AT, TMP2, RD ++ | or r17, AT, r0 ++ |// bnez AT, >7 // Less results wanted? ++ | // More results wanted. Check stack size and fill up results with nil. ++ | slt AT, BASE, TMP1 ++ | bnez r17, >7 ++ | beqz AT, >8 ++ | st.d TISNIL, 0(BASE) ++ | addi.w RD, RD, 8 ++ | addi.d BASE, BASE, 8 ++ | beq r0, r0, <2 ++ | ++ |7: // Less results wanted. ++ | sub.w TMP0, RD, TMP2 ++ | sub.d TMP0, BASE, TMP0 // Either keep top or shrink it. ++ | maskeqz TMP0, TMP0, TMP2 // LUA_MULTRET+1 case? ++ | masknez BASE, BASE, TMP2 ++ | or BASE, BASE, TMP0 ++ | b <3 ++ | ++ |8: // Corner case: need to grow stack for filling up results. ++ | // This can happen if: ++ | // - A C function grows the stack (a lot). ++ | // - The GC shrinks the stack in between. ++ | // - A return back from a lua_call() with (high) nresults adjustment. ++ | ++ | st.d BASE, L->top // Save current top held in BASE (yes). ++ | or MULTRES, RD, r0 ++ | srli.w CARG2, TMP2, 3 ++ | or CARG1, L, r0 ++ | bl extern lj_state_growstack // (lua_State *L, int n) ++ | ld.w TMP2, SAVE_NRES(sp) ++ | ld.d BASE, L->top // Need the (realloced) L->top in BASE. ++ | or RD, MULTRES, r0 ++ | slli.w TMP2, TMP2, 3 ++ | beq r0, r0, <2 ++ ++ |->vm_unwind_c: // Unwind C stack, return from vm_pcall. ++ | // (void *cframe, int errcode) ++ | or sp, CARG1, r0 ++ | or CRET1, CARG2, r0 ++ |->vm_unwind_c_eh: // Landing pad for external unwinder. ++ | ld.d L, SAVE_L(sp) ++ | addi.w TMP0, r0, ~LJ_VMST_C ++ | ld.d GL:TMP1, L->glref ++ | st.w TMP0, GL:TMP1->vmstate ++ | beq r0, r0, ->vm_leave_unw ++ | ++ |->vm_unwind_ff: // Unwind C stack, return from ff pcall. ++ | // (void *cframe) ++ | .LI AT, -4 ++ | and sp, CARG1, AT ++ |->vm_unwind_ff_eh: // Landing pad for external unwinder. ++ | ld.d L, SAVE_L(sp) ++ | .FPU addu16i.d TMP3, r0, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | .LI TISNIL, LJ_TNIL ++ | .LI TISNUM, LJ_TISNUM ++ | ld.d BASE, L->base ++ | ld.d DISPATCH, L->glref // Setup pointer to dispatch table. ++ | .FPU2 movgr2fr.w TOBIT, TMP3 ++ | mov_false TMP1 ++ | li_vmstate INTERP ++ | ld.d PC, FRAME_PC(BASE) // Fetch PC of previous frame. ++ | .FPU2 fcvt.d.s TOBIT, TOBIT ++ | addi.d RA, BASE, -8 // Results start at BASE-8. ++ | .DADDIU DISPATCH, DISPATCH, GG_G2DISP ++ | st.d TMP1, 0(RA) // Prepend false to error message. ++ | st_vmstate ++ | .LI RD, 16 // 2 results: false + error message. ++ | beq r0, r0, ->vm_returnc ++ | ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Grow stack for calls ----------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_growstack_c: // Grow stack for C function. ++ | .LI CARG2, LUA_MINSTACK ++ | beq r0, r0, >2 ++ | ++ |->vm_growstack_l: // Grow stack for Lua function. ++ | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC ++ | add.d RC, BASE, RC ++ | sub.d RA, RA, BASE ++ | st.d BASE, L->base ++ | addi.d PC, PC, 4 // Must point after first instruction. ++ | st.d RC, L->top ++ | srli.w CARG2, RA, 3 ++ |2: ++ | // L->base = new base, L->top = top ++ | st.d PC, SAVE_PC(sp) ++ | or CARG1, L, r0 ++ | bl extern lj_state_growstack // (lua_State *L, int n) ++ | ld.d BASE, L->base ++ | ld.d RC, L->top ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) ++ | sub.d RC, RC, BASE ++ | cleartp LFUNC:RB ++ | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC ++ | ins_callt // Just retry the call. ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Entry points into the assembler VM --------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_resume: // Setup C frame and resume thread. ++ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) ++ | saveregs ++ | or L, CARG1, r0 ++ | ld.d DISPATCH, L->glref // Setup pointer to dispatch table. ++ | or BASE, CARG2, r0 ++ | ld.bu TMP1, L->status ++ | st.d L, SAVE_L(sp) ++ | .LI PC, FRAME_CP ++ | addi.d TMP0, sp, CFRAME_RESUME ++ | .DADDIU DISPATCH, DISPATCH, GG_G2DISP ++ | st.w r0, SAVE_NRES(sp) ++ | st.w r0, SAVE_ERRF(sp) ++ | st.d CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. ++ | st.d r0, SAVE_CFRAME(sp) ++ | st.d TMP0, L->cframe ++ | beqz TMP1, >3 ++ | ++ | // Resume after yield (like a return). ++ | .STXD L, DISPATCH, DISPATCH_GL(cur_L) ++ | or RA, BASE, r0 ++ | ld.d BASE, L->base ++ | ld.d TMP1, L->top ++ | ld.d PC, FRAME_PC(BASE) ++ | .FPU addu16i.d TMP3, r0, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | sub.d RD, TMP1, BASE ++ | .FPU2 movgr2fr.w TOBIT, TMP3 ++ | st.b r0, L->status ++ | .FPU2 fcvt.d.s TOBIT, TOBIT ++ | li_vmstate INTERP ++ | addi.d RD, RD, 8 ++ | st_vmstate ++ | or MULTRES, RD, r0 ++ | andi TMP0, PC, FRAME_TYPE ++ | .LI TISNIL, LJ_TNIL ++ | .LI TISNUM, LJ_TISNUM ++ | beqz TMP0, ->BC_RET_Z ++ | beq r0, r0, ->vm_return ++ | ++ |->vm_pcall: // Setup protected C frame and enter VM. ++ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) ++ | saveregs ++ | st.w CARG4, SAVE_ERRF(sp) ++ | .LI PC, FRAME_CP ++ | beq r0, r0, >1 ++ | ++ |->vm_call: // Setup C frame and enter VM. ++ | // (lua_State *L, TValue *base, int nres1) ++ | saveregs ++ | .LI PC, FRAME_C ++ | ++ |1: // Entry point for vm_pcall above (PC = ftype). ++ | ld.d TMP1, L:CARG1->cframe ++ | or L, CARG1, r0 ++ | st.w CARG3, SAVE_NRES(sp) ++ | ld.d DISPATCH, L->glref // Setup pointer to dispatch table. ++ | st.d CARG1, SAVE_L(sp) ++ | or BASE, CARG2, r0 ++ | .DADDIU DISPATCH, DISPATCH, GG_G2DISP ++ | st.d CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. ++ | st.d TMP1, SAVE_CFRAME(sp) ++ | st.d sp, L->cframe // Add our C frame to cframe chain. ++ | ++ |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). ++ | .STXD L, DISPATCH, DISPATCH_GL(cur_L) ++ | ld.d TMP2, L->base // TMP2 = old base (used in vmeta_call). ++ | .FPU2 .LUI TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | ld.d TMP1, L->top ++ | .FPU2 movgr2fr.w TOBIT, TMP3 ++ | add.d PC, PC, BASE ++ | sub.d NARGS8:RC, TMP1, BASE ++ | .LI TISNUM, LJ_TISNUM ++ | sub.d PC, PC, TMP2 // PC = frame delta + frame type ++ | .FPU2 fcvt.d.s TOBIT, TOBIT ++ | li_vmstate INTERP ++ | .LI TISNIL, LJ_TNIL ++ | st_vmstate ++ | ++ |->vm_call_dispatch: ++ | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) ++ | checkfunc LFUNC:RB, ->vmeta_call ++ | ++ |->vm_call_dispatch_f: ++ | ins_call ++ | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC ++ | ++ |->vm_cpcall: // Setup protected C frame, call C. ++ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) ++ | saveregs ++ | or L, CARG1, r0 ++ | ld.d TMP0, L:CARG1->stack ++ | st.d CARG1, SAVE_L(sp) ++ | ld.d TMP1, L->top ++ | ld.d DISPATCH, L->glref // Setup pointer to dispatch table. ++ | st.d CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. ++ | sub.d TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). ++ | ld.d TMP1, L->cframe ++ | .DADDIU DISPATCH, DISPATCH, GG_G2DISP ++ | st.w TMP0, SAVE_NRES(sp) // Neg. delta means cframe w/o frame. ++ | st.w r0, SAVE_ERRF(sp) // No error function. ++ | st.d TMP1, SAVE_CFRAME(sp) ++ | st.d sp, L->cframe // Add our C frame to cframe chain. ++ | .STXD L, DISPATCH, DISPATCH_GL(cur_L) ++ | or CFUNCADDR, CARG4, r0 ++ | jirl r1, CARG4, 0 // (lua_State *L, lua_CFunction func, void *ud) ++ | or BASE, CRET1, r0 ++ | .LI PC, FRAME_CP ++ | bnez CRET1, <3 // Else continue with the call. ++ | beq r0, r0, ->vm_leave_cp // No base? Just remove C frame. ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Metamethod handling ------------------------------------------------ ++ |//----------------------------------------------------------------------- ++ | ++ |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the ++ |// stack, so BASE doesn't need to be reloaded across these calls. ++ | ++ |//-- Continuation dispatch ---------------------------------------------- ++ | ++ |->cont_dispatch: ++ | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 ++ | ld.d TMP0, -32(BASE) // Continuation. ++ | or RB, BASE, r0 ++ | or BASE, TMP2, r0 // Restore caller BASE. ++ | ld.d LFUNC:TMP1, FRAME_FUNC(TMP2) ++ |.if FFI ++ | sltui AT, TMP0, 2 ++ |.endif ++ | ld.d PC, -24(RB) // Restore PC from [cont|PC]. ++ | cleartp LFUNC:TMP1 ++ | add.d TMP2, RA, RD ++ | ld.d TMP1, LFUNC:TMP1->pc ++ | st.d TISNIL, -8(TMP2) // Ensure one valid arg. ++ |.if FFI ++ | bnez AT, >1 ++ |.endif ++ | // BASE = base, RA = resultptr, RB = meta base ++ | ld.d KBASE, PC2PROTO(k)(TMP1) ++ | jirl r0, TMP0, 0 // Jump to continuation. ++ | ++ |.if FFI ++ |1: ++ | addi.d TMP1, RB, -32 ++ | bnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback. ++ | // cont = 0: tailcall from C function. ++ | sub.d RC, TMP1, BASE ++ | beq r0, r0, ->vm_call_tail ++ |.endif ++ | ++ |->cont_cat: // RA = resultptr, RB = meta base ++ | ld.w INS, -4(PC) ++ | addi.d CARG2, RB, -32 ++ | ld.d CRET1, 0(RA) ++ | decode_RB8a MULTRES, INS ++ | decode_RA8a RA, INS ++ | decode_RB8b MULTRES ++ | decode_RA8b RA ++ | add.d TMP1, BASE, MULTRES ++ | st.d BASE, L->base ++ | sub.d CARG3, CARG2, TMP1 ++ | st.d CRET1, 0(CARG2) ++ | bne TMP1, CARG2, ->BC_CAT_Z ++ | add.d RA, BASE, RA ++ | st.d CRET1, 0(RA) ++ | beq r0, r0, ->cont_nop ++ | ++ |//-- Table indexing metamethods ----------------------------------------- ++ | ++ |->vmeta_tgets1: ++ | .DADDIU CARG3, DISPATCH, DISPATCH_GL(tmptv) ++ | .LI TMP0, LJ_TSTR ++ | settp STR:RC, TMP0 ++ | st.d STR:RC, 0(CARG3) ++ | beq r0, r0, >1 ++ | ++ |->vmeta_tgets: ++ | .DADDIU CARG2, DISPATCH, DISPATCH_GL(tmptv) ++ | .LI TMP0, LJ_TTAB ++ | .LI TMP1, LJ_TSTR ++ | settp TAB:RB, TMP0 ++ | .DADDIU CARG3, DISPATCH, DISPATCH_GL(tmptv2) ++ | st.d TAB:RB, 0(CARG2) ++ | settp STR:RC, TMP1 ++ | st.d STR:RC, 0(CARG3) ++ | beq r0, r0, >1 ++ | ++ |->vmeta_tgetb: // TMP0 = index ++ | .DADDIU CARG3, DISPATCH, DISPATCH_GL(tmptv) ++ | settp TMP0, TISNUM ++ | st.d TMP0, 0(CARG3) ++ | ++ |->vmeta_tgetv: ++ |1: ++ | st.d BASE, L->base ++ | st.d PC, SAVE_PC(sp) ++ | or CARG1, L, r0 ++ | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) ++ | // Returns TValue * (finished) or NULL (metamethod). ++ | addi.d TMP1, BASE, -FRAME_CONT ++ | beqz CRET1, >3 ++ | ld.d CARG1, 0(CRET1) ++ | ins_next1 ++ | st.d CARG1, 0(RA) ++ | ins_next2 ++ | ++ |3: // Call __index metamethod. ++ | // BASE = base, L->top = new base, stack = cont/func/t/k ++ | ld.d BASE, L->top ++ | st.d PC, -24(BASE) // [cont|PC] ++ | sub.d PC, BASE, TMP1 ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. ++ | cleartp LFUNC:RB ++ | .LI NARGS8:RC, 16 // 2 args for func(t, k). ++ | beq r0, r0, ->vm_call_dispatch_f ++ | ++ |->vmeta_tgetr: ++ | bl extern lj_tab_getinth // (GCtab *t, int32_t key) ++ | // Returns cTValue * or NULL. ++ | or CARG2, TISNIL, r0 ++ | beqz CRET1, ->BC_TGETR_Z ++ | ld.d CARG2, 0(CRET1) ++ | beq r0, r0, ->BC_TGETR_Z ++ | ++ |//----------------------------------------------------------------------- ++ | ++ |->vmeta_tsets1: ++ | .DADDIU CARG3, DISPATCH, DISPATCH_GL(tmptv) ++ | .LI TMP0, LJ_TSTR ++ | settp STR:RC, TMP0 ++ | st.d STR:RC, 0(CARG3) ++ | beq r0, r0, >1 ++ | ++ |->vmeta_tsets: ++ | .DADDIU CARG2, DISPATCH, DISPATCH_GL(tmptv) ++ | .LI TMP0, LJ_TTAB ++ | .LI TMP1, LJ_TSTR ++ | settp TAB:RB, TMP0 ++ | .DADDIU CARG3, DISPATCH, DISPATCH_GL(tmptv2) ++ | st.d TAB:RB, 0(CARG2) ++ | settp STR:RC, TMP1 ++ | st.d STR:RC, 0(CARG3) ++ | beq r0, r0, >1 ++ | ++ |->vmeta_tsetb: // TMP0 = index ++ | .DADDIU CARG3, DISPATCH, DISPATCH_GL(tmptv) ++ | settp TMP0, TISNUM ++ | st.d TMP0, 0(CARG3) ++ | ++ |->vmeta_tsetv: ++ |1: ++ | st.d BASE, L->base ++ | st.d PC, SAVE_PC(sp) ++ | or CARG1, L, r0 ++ | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) ++ | // Returns TValue * (finished) or NULL (metamethod). ++ | ld.d r17, 0(RA) ++ | beqz CRET1, >3 ++ | // NOBARRIER: lj_meta_tset ensures the table is not black. ++ | ins_next1 ++ | st.d r17, 0(CRET1) ++ | ins_next2 ++ | ++ |3: // Call __newindex metamethod. ++ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) ++ | addi.d TMP1, BASE, -FRAME_CONT ++ | ld.d BASE, L->top ++ | st.d PC, -24(BASE) // [cont|PC] ++ | sub.d PC, BASE, TMP1 ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. ++ | cleartp LFUNC:RB ++ | st.d r17, 16(BASE) // Copy value to third argument. ++ | .LI NARGS8:RC, 24 // 3 args for func(t, k, v) ++ | beq r0, r0, ->vm_call_dispatch_f ++ | ++ |->vmeta_tsetr: ++ | st.d BASE, L->base ++ | st.d PC, SAVE_PC(sp) ++ | or CARG1, L, r0 ++ | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) ++ | // Returns TValue *. ++ | beq r0, r0, ->BC_TSETR_Z ++ | ++ |//-- Comparison metamethods --------------------------------------------- ++ | ++ |->vmeta_comp: ++ | // RA/RD point to o1/o2. ++ | or CARG2, RA, r0 ++ | or CARG3, RD, r0 ++ | addi.d PC, PC, -4 ++ | st.d BASE, L->base ++ | st.d PC, SAVE_PC(sp) ++ | decode_OP1 CARG4, INS ++ | or CARG1, L, r0 ++ | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) ++ | // Returns 0/1 or TValue * (metamethod). ++ |3: ++ | sltui AT, CRET1, 2 ++ | beqz AT, ->vmeta_binop ++ | sub.w TMP2, r0, CRET1 ++ |4: ++ | ld.hu RD, OFS_RD(PC) ++ | addi.d PC, PC, 4 ++ | .LUI TMP1, (-(BCBIAS_J*4 >> 16) & 65535) ++ | slli.w RD, RD, 2 ++ | add.w RD, RD, TMP1 ++ | and RD, RD, TMP2 ++ | add.d PC, PC, RD ++ |->cont_nop: ++ | ins_next ++ | ++ |->cont_ra: // RA = resultptr ++ | ld.bu TMP1, -4+OFS_RA(PC) ++ | ld.d CRET1, 0(RA) ++ | slli.w TMP1, TMP1, 3 ++ | add.d TMP1, BASE, TMP1 ++ | st.d CRET1, 0(TMP1) ++ | beq r0, r0, ->cont_nop ++ | ++ |->cont_condt: // RA = resultptr ++ | ld.d TMP0, 0(RA) ++ | gettp TMP0, TMP0 ++ | sltui AT, TMP0, LJ_TISTRUECOND ++ | sub.w TMP2, r0, AT // Branch if result is true. ++ | beq r0, r0, <4 ++ | ++ |->cont_condf: // RA = resultptr ++ | ld.d TMP0, 0(RA) ++ | gettp TMP0, TMP0 ++ | sltui AT, TMP0, LJ_TISTRUECOND ++ | addi.w TMP2, AT, -1 // Branch if result is false. ++ | beq r0, r0, <4 ++ | ++ |->vmeta_equal: ++ | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1. ++ | cleartp LFUNC:CARG3, CARG2 ++ | cleartp LFUNC:CARG2, CARG1 ++ | or CARG4, TMP0, r0 ++ | addi.d PC, PC, -4 ++ | st.d BASE, L->base ++ | st.d PC, SAVE_PC(sp) ++ | or CARG1, L, r0 ++ | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) ++ | // Returns 0/1 or TValue * (metamethod). ++ | beq r0, r0, <3 ++ | ++ |->vmeta_equal_cd: ++ |.if FFI ++ | or CARG2, INS, r0 ++ | addi.d PC, PC, -4 ++ | st.d BASE, L->base ++ | st.d PC, SAVE_PC(sp) ++ | or CARG1, L, r0 ++ | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) ++ | // Returns 0/1 or TValue * (metamethod). ++ | beq r0, r0, <3 ++ |.endif ++ | ++ |->vmeta_istype: ++ | addi.d PC, PC, -4 ++ | st.d BASE, L->base ++ | srli.w CARG2, RA, 3 ++ | srli.w CARG3, RD, 3 ++ | st.d PC, SAVE_PC(sp) ++ | or CARG1, L, r0 ++ | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) ++ | beq r0, r0, ->cont_nop ++ | ++ |//-- Arithmetic metamethods --------------------------------------------- ++ | ++ |->vmeta_unm: ++ | or RC, RB, r0 ++ | ++ |->vmeta_arith: ++ | st.d BASE, L->base ++ | or CARG2, RA, r0 ++ | st.d PC, SAVE_PC(sp) ++ | or CARG3, RB, r0 ++ | or CARG4, RC, r0 ++ | decode_OP1 CARG5, INS // CARG5 == RB. ++ | or CARG1, L, r0 ++ | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) ++ | // Returns NULL (finished) or TValue * (metamethod). ++ | beqz CRET1, ->cont_nop ++ | ++ | // Call metamethod for binary op. ++ |->vmeta_binop: ++ | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 ++ | sub.d TMP1, CRET1, BASE ++ | st.d PC, -24(CRET1) // [cont|PC] ++ | or TMP2, BASE, r0 ++ | addi.d PC, TMP1, FRAME_CONT ++ | or BASE, CRET1, r0 ++ | .LI NARGS8:RC, 16 // 2 args for func(o1, o2). ++ | beq r0, r0, ->vm_call_dispatch ++ | ++ |->vmeta_len: ++ | // CARG2 already set by BC_LEN. ++#if LJ_52 ++ | or MULTRES, CARG1, r0 ++#endif ++ | st.d BASE, L->base ++ | st.d PC, SAVE_PC(sp) ++ | or CARG1, L, r0 ++ | bl extern lj_meta_len // (lua_State *L, TValue *o) ++ | // Returns NULL (retry) or TValue * (metamethod base). ++#if LJ_52 ++ | bnez CRET1, ->vmeta_binop // Binop call for compatibility. ++ | or CARG1, MULTRES, r0 ++ | beq r0, r0, ->BC_LEN_Z ++#else ++ | beq r0, r0, ->vmeta_binop // Binop call for compatibility. ++#endif ++ | ++ |//-- Call metamethod ---------------------------------------------------- ++ | ++ |->vmeta_call: // Resolve and call __call metamethod. ++ | // TMP2 = old base, BASE = new base, RC = nargs*8 ++ | st.d TMP2, L->base // This is the callers base! ++ | addi.d CARG2, BASE, -16 ++ | st.d PC, SAVE_PC(sp) ++ | add.d CARG3, BASE, RC ++ | or MULTRES, NARGS8:RC, r0 ++ | or CARG1, L, r0 ++ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. ++ | addi.d NARGS8:RC, MULTRES, 8 // Got one more argument now. ++ | cleartp LFUNC:RB ++ | ins_call ++ | ++ |->vmeta_callt: // Resolve __call for BC_CALLT. ++ | // BASE = old base, RA = new base, RC = nargs*8 ++ | st.d BASE, L->base ++ | addi.d CARG2, RA, -16 ++ | st.d PC, SAVE_PC(sp) ++ | add.d CARG3, RA, RC ++ | or MULTRES, NARGS8:RC, r0 ++ | or CARG1, L, r0 ++ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) ++ | ld.d RB, FRAME_FUNC(RA) // Guaranteed to be a function here. ++ | ld.d TMP1, FRAME_PC(BASE) ++ | addi.d NARGS8:RC, MULTRES, 8 // Got one more argument now. ++ | cleartp LFUNC:CARG3, RB ++ | beq r0, r0, ->BC_CALLT_Z ++ | ++ |//-- Argument coercion for 'for' statement ------------------------------ ++ | ++ |->vmeta_for: ++ | st.d BASE, L->base ++ | or CARG2, RA, r0 ++ | st.d PC, SAVE_PC(sp) ++ | or MULTRES, INS, r0 ++ | or CARG1, L, r0 ++ | bl extern lj_meta_for // (lua_State *L, TValue *base) ++ |.if JIT ++ | decode_OP1 TMP0, MULTRES ++ | .LI AT, BC_JFORI ++ |.endif ++ | decode_RA8a RA, MULTRES ++ | decode_RD8a RD, MULTRES ++ | decode_RA8b RA ++ |.if JIT ++ | decode_RD8b RD ++ | beq TMP0, AT, =>BC_JFORI ++ | beq r0, r0, =>BC_FORI ++ |.else ++ | decode_RD8b RD ++ | beq r0, r0, =>BC_FORI ++ |.endif ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Fast functions ----------------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |.macro .ffunc, name ++ |->ff_ .. name: ++ |.endmacro ++ | ++ |.macro .ffunc_1, name ++ |->ff_ .. name: ++ | ld.d CARG1, 0(BASE) ++ | beqz NARGS8:RC, ->fff_fallback ++ |.endmacro ++ | ++ |.macro .ffunc_2, name ++ |->ff_ .. name: ++ | sltui AT, NARGS8:RC, 16 ++ | ld.d CARG1, 0(BASE) ++ | ld.d CARG2, 8(BASE) ++ | bnez AT, ->fff_fallback ++ |.endmacro ++ | ++ |.macro .ffunc_n, name ++ |->ff_ .. name: ++ | ld.d CARG1, 0(BASE) ++ | .FPU2 fld.d FARG1, 0(BASE) ++ | beqz NARGS8:RC, ->fff_fallback ++#if name == math_sqrt ++ | fsqrt.d FRET1, FARG1 ++#endif ++ | checknum CARG1, ->fff_fallback ++ |.endmacro ++ | ++ |.macro .ffunc_nn, name // Caveat: has delay slot! ++ |->ff_ .. name: ++ | ld.d CARG1, 0(BASE) ++ | sltui AT, NARGS8:RC, 16 ++ | ld.d CARG2, 8(BASE) ++ | gettp TMP0, CARG1 ++ | bnez AT, ->fff_fallback ++ | gettp TMP1, CARG2 ++ | sltui TMP0, TMP0, LJ_TISNUM ++ | sltui TMP1, TMP1, LJ_TISNUM ++ | .FPU2 fld.d FARG1, 0(BASE) ++ | and TMP0, TMP0, TMP1 ++ | .FPU2 fld.d FARG2, 8(BASE) ++ | beqz TMP0, ->fff_fallback ++ |.endmacro ++ | ++ |// Inlined GC threshold check. ++ |.macro ffgccheck ++ | .LDXD TMP0, DISPATCH, DISPATCH_GL(gc.total) ++ | .LDXD TMP1, DISPATCH, DISPATCH_GL(gc.threshold) ++ | blt TMP0, TMP1, >1 ++ | bl ->fff_gcstep ++ |1: ++ |.endmacro ++ | ++ |//-- Base library: checks ----------------------------------------------- ++ |.ffunc_1 assert ++ | gettp AT, CARG1 ++ | sltui AT, AT, LJ_TISTRUECOND ++ | addi.d RA, BASE, -16 ++ | beqz AT, ->fff_fallback ++ | ld.d PC, FRAME_PC(BASE) ++ | addi.w RD, NARGS8:RC, 8 // Compute (nresults+1)*8. ++ | add.d TMP2, RA, RD ++ | addi.d TMP1, BASE, 8 ++ | st.d CARG1, 0(RA) ++ | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. ++ |1: ++ | ld.d r17, 0(TMP1) ++ | st.d r17, -16(TMP1) ++ | or r18, TMP1, r0 ++ | addi.d TMP1, TMP1, 8 ++ | bne r18, TMP2, <1 ++ | beq r0, r0, ->fff_res ++ | ++ |.ffunc_1 type ++ | gettp TMP0, CARG1 ++ | sltu TMP1, TISNUM, TMP0 ++ | nor TMP2, TMP0, r0 ++ | .LI TMP3, ~LJ_TISNUM ++ | maskeqz TMP2, TMP2, TMP1 ++ | masknez TMP3, TMP3, TMP1 ++ | or TMP2, TMP2, TMP3 ++ | slli.d TMP2, TMP2, 3 ++ | add.d TMP2, CFUNC:RB, TMP2 ++ | ld.d CARG1, CFUNC:TMP2->upvalue ++ | beq r0, r0, ->fff_restv ++ | ++ |//-- Base library: getters and setters --------------------------------- ++ | ++ |.ffunc_1 getmetatable ++ | gettp TMP2, CARG1 ++ | addi.d TMP0, TMP2, -LJ_TTAB ++ | addi.d TMP1, TMP2, -LJ_TUDATA ++ | maskeqz TMP0, TMP1, TMP0 ++ | cleartp TAB:CARG1 ++ | bnez TMP0, >6 ++ |1: // Field metatable must be at same offset for GCtab and GCudata! ++ | ld.d TAB:RB, TAB:CARG1->metatable ++ |2: ++ | .LDXD STR:RC, DISPATCH, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable]) ++ | .LI CARG1, LJ_TNIL ++ | beqz TAB:RB, ->fff_restv ++ | ld.w TMP0, TAB:RB->hmask ++ | ld.w TMP1, STR:RC->hash ++ | ld.d NODE:TMP2, TAB:RB->node ++ | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask ++ | slli.d TMP0, TMP1, 5 ++ | slli.d TMP1, TMP1, 3 ++ | sub.d TMP1, TMP0, TMP1 ++ | add.d NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) ++ | .LI CARG4, LJ_TSTR ++ | settp STR:RC, CARG4 // Tagged key to look for. ++ |3: // Rearranged logic, because we expect _not_ to find the key. ++ | ld.d TMP0, NODE:TMP2->key ++ | ld.d CARG1, NODE:TMP2->val ++ | ld.d NODE:TMP2, NODE:TMP2->next ++ | .LI AT, LJ_TTAB ++ | beq RC, TMP0, >5 ++ | bnez NODE:TMP2, <3 ++ |4: ++ | or CARG1, RB, r0 ++ | settp CARG1, AT ++ | beq r0, r0, ->fff_restv // Not found, keep default result. ++ |5: ++ | bne CARG1, TISNIL, ->fff_restv ++ | beq r0, r0, <4 // Ditto for nil value. ++ | ++ |6: ++ | sltui AT, TMP2, LJ_TISNUM ++ | maskeqz TMP0, TISNUM, AT ++ | masknez AT, TMP2, AT ++ | or TMP2, TMP0, AT ++ | slli.d TMP2, TMP2, 3 ++ | sub.d TMP0, DISPATCH, TMP2 ++ | .LDXD TAB:RB, TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8 ++ | beq r0, r0, <2 ++ | ++ |.ffunc_2 setmetatable ++ | // Fast path: no mt for table yet and not clearing the mt. ++ | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback ++ | gettp TMP3, CARG2 ++ | ld.d TAB:TMP0, TAB:TMP1->metatable ++ | ld.bu TMP2, TAB:TMP1->marked ++ | addi.d AT, TMP3, -LJ_TTAB ++ | cleartp TAB:CARG2 ++ | or AT, AT, TAB:TMP0 ++ | or r18, AT, r0 ++ | andi AT, TMP2, LJ_GC_BLACK // isblack(table) ++ | bnez r18, ->fff_fallback ++ | st.d TAB:CARG2, TAB:TMP1->metatable ++ | beqz AT, ->fff_restv ++ | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv ++ | ++ |.ffunc rawget ++ | ld.d CARG2, 0(BASE) ++ | sltui AT, NARGS8:RC, 16 ++ | gettp TMP0, CARG2 ++ | cleartp CARG2 ++ | addi.d TMP0, TMP0, -LJ_TTAB ++ | or AT, AT, TMP0 ++ | addi.d CARG3, BASE, 8 ++ | bnez AT, ->fff_fallback ++ | or CARG1, L, r0 ++ | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) ++ | ld.d CARG1, 0(CRET1) ++ | beq r0, r0, ->fff_restv ++ | ++ |//-- Base library: conversions ------------------------------------------ ++ | ++ |.ffunc tonumber ++ | // Only handles the number case inline (without a base argument). ++ | ld.d CARG1, 0(BASE) ++ | xori AT, NARGS8:RC, 8 // Exactly one number argument. ++ | gettp TMP1, CARG1 ++ | sltu TMP0, TISNUM, TMP1 ++ | or AT, AT, TMP0 ++ | bnez AT, ->fff_fallback ++ | beq r0, r0, ->fff_restv ++ | ++ |.ffunc_1 tostring ++ | // Only handles the string or number case inline. ++ | gettp TMP0, CARG1 ++ | addi.d AT, TMP0, -LJ_TSTR ++ | .LDXD TMP1, DISPATCH, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM]) ++ | // A __tostring method in the string base metatable is ignored. ++ | beqz AT, ->fff_restv // String key? ++ | // Handle numbers inline, unless a number base metatable is present. ++ | sltu TMP0, TISNUM, TMP0 ++ | or TMP0, TMP0, TMP1 ++ | st.d BASE, L->base // Add frame since C call can throw. ++ | bnez TMP0, ->fff_fallback ++ | st.d PC, SAVE_PC(sp) // Redundant (but a defined value). ++ | ffgccheck ++ | or CARG1, L, r0 ++ | or CARG2, BASE, r0 ++ | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) ++ | // Returns GCstr *. ++ | .LI AT, LJ_TSTR ++ | settp CRET1, AT ++ | or CARG1, CRET1, r0 ++ | beq r0, r0, ->fff_restv ++ | ++ |//-- Base library: iterators ------------------------------------------- ++ | ++ |.ffunc_1 next ++ | checktp CARG2, CARG1, -LJ_TTAB, ->fff_fallback ++ | add.d TMP2, BASE, NARGS8:RC ++ | st.d TISNIL, 0(TMP2) // Set missing 2nd arg to nil. ++ | ld.d PC, FRAME_PC(BASE) ++ | st.d BASE, L->base // Add frame since C call can throw. ++ | st.d BASE, L->top // Dummy frame length is ok. ++ | addi.d CARG3, BASE, 8 ++ | st.d PC, SAVE_PC(sp) ++ | or CARG1, L, r0 ++ | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) ++ | // Returns 0 at end of traversal. ++ | or r17, CRET1, r0 ++ | or CARG1, TISNIL, r0 ++ | beqz r17, ->fff_restv // End of traversal: return nil. ++ | ld.d TMP0, 8(BASE) ++ | addi.d RA, BASE, -16 ++ | ld.d TMP2, 16(BASE) ++ | st.d TMP0, 0(RA) ++ | st.d TMP2, 8(RA) ++ | .LI RD, (2+1)*8 ++ | beq r0, r0, ->fff_res ++ | ++ |.ffunc_1 pairs ++ | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback ++ | ld.d PC, FRAME_PC(BASE) ++#if LJ_52 ++ | ld.d TAB:TMP2, TAB:TMP1->metatable ++ | ld.d TMP0, CFUNC:RB->upvalue[0] ++ | addi.d RA, BASE, -16 ++ | bnez TAB:TMP2, ->fff_fallback ++#else ++ | ld.d TMP0, CFUNC:RB->upvalue[0] ++ | addi.d RA, BASE, -16 ++#endif ++ | st.d TISNIL, 0(BASE) ++ | st.d CARG1, -8(BASE) ++ | st.d TMP0, 0(RA) ++ | .LI RD, (3+1)*8 ++ | beq r0, r0, ->fff_res ++ | ++ |.ffunc_2 ipairs_aux ++ | checktab CARG1, ->fff_fallback ++ | ld.w TMP0, TAB:CARG1->asize ++ | checkint CARG2, ->fff_fallback ++ | ld.d TMP1, TAB:CARG1->array ++ | ld.d PC, FRAME_PC(BASE) ++ | slli.w TMP2, CARG2, 0 // sextw -> slli.w ++ | addi.w TMP2, TMP2, 1 ++ | sltu AT, TMP2, TMP0 ++ | addi.d RA, BASE, -16 ++ | bstrpick.d TMP0, TMP2, 31, 0 // zextw -> bstrpick.d ++ | settp TMP0, TISNUM ++ | st.d TMP0, 0(RA) ++ | beqz AT, >2 // Not in array part? ++ | slli.d TMP3, TMP2, 3 ++ | add.d TMP3, TMP1, TMP3 ++ | ld.d TMP1, 0(TMP3) ++ |1: ++ | .LI RD, (0+1)*8 ++ | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. ++ | st.d TMP1, -8(BASE) ++ | .LI RD, (2+1)*8 ++ | beq r0, r0, ->fff_res ++ |2: // Check for empty hash part first. Otherwise call C function. ++ | ld.w TMP0, TAB:CARG1->hmask ++ | .LI RD, (0+1)*8 ++ | beqz TMP0, ->fff_res ++ | or CARG2, TMP2, r0 ++ | bl extern lj_tab_getinth // (GCtab *t, int32_t key) ++ | // Returns cTValue * or NULL. ++ | .LI RD, (0+1)*8 ++ | beqz CRET1, ->fff_res ++ | ld.d TMP1, 0(CRET1) ++ | beq r0, r0, <1 ++ | ++ |.ffunc_1 ipairs ++ | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback ++ | ld.d PC, FRAME_PC(BASE) ++#if LJ_52 ++ | ld.d TAB:TMP2, TAB:TMP1->metatable ++ | ld.d CFUNC:TMP0, CFUNC:RB->upvalue[0] ++ | addi.d RA, BASE, -16 ++ | bnez TAB:TMP2, ->fff_fallback ++#else ++ | ld.d TMP0, CFUNC:RB->upvalue[0] ++ | addi.d RA, BASE, -16 ++#endif ++ | slli.d AT, TISNUM, 47 ++ | st.d CARG1, -8(BASE) ++ | st.d AT, 0(BASE) ++ | st.d CFUNC:TMP0, 0(RA) ++ | .LI RD, (3+1)*8 ++ | beq r0, r0, ->fff_res ++ | ++ |//-- Base library: catch errors ---------------------------------------- ++ | ++ |.ffunc pcall ++ | addi.d NARGS8:RC, NARGS8:RC, -8 ++ | .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask) ++ | or TMP2, BASE, r0 ++ | blt NARGS8:RC, r0, ->fff_fallback ++ | addi.d BASE, BASE, 16 ++ | // Remember active hook before pcall. ++ | srli.w TMP3, TMP3, HOOK_ACTIVE_SHIFT ++ | andi TMP3, TMP3, 1 ++ | addi.d PC, TMP3, 16+FRAME_PCALL ++ | beqz NARGS8:RC, ->vm_call_dispatch ++ |1: ++ | add.d TMP0, BASE, NARGS8:RC ++ |// beqz NARGS8:RC, ->vm_call_dispatch ++ |2: ++ | ld.d TMP1, -16(TMP0) ++ | st.d TMP1, -8(TMP0) ++ | addi.d TMP0, TMP0, -8 ++ | bne TMP0, BASE, <2 ++ | beq r0, r0, ->vm_call_dispatch ++ | ++ |.ffunc xpcall ++ | addi.d NARGS8:TMP0, NARGS8:RC, -16 ++ | ld.d CARG1, 0(BASE) ++ | ld.d CARG2, 8(BASE) ++ | .LDXBU TMP1, DISPATCH, DISPATCH_GL(hookmask) ++ | blt NARGS8:TMP0, r0, ->fff_fallback ++ | gettp AT, CARG2 ++ | addi.d AT, AT, -LJ_TFUNC ++ | or TMP2, BASE, r0 ++ | bnez AT, ->fff_fallback // Traceback must be a function. ++ | or NARGS8:RC, NARGS8:TMP0, r0 ++ | addi.d BASE, BASE, 24 ++ | // Remember active hook before pcall. ++ | srli.w TMP3, TMP3, HOOK_ACTIVE_SHIFT ++ | st.d CARG2, 0(TMP2) // Swap function and traceback. ++ | andi TMP3, TMP3, 1 ++ | st.d CARG1, 8(TMP2) ++ | addi.d PC, TMP3, 24+FRAME_PCALL ++ | beqz NARGS8:RC, ->vm_call_dispatch ++ | beq r0, r0, <1 ++ | ++ |//-- Coroutine library -------------------------------------------------- ++ | ++ |.macro coroutine_resume_wrap, resume ++ |.if resume ++ |.ffunc_1 coroutine_resume ++ | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback ++ |.else ++ |.ffunc coroutine_wrap_aux ++ | ld.d L:CARG1, CFUNC:RB->upvalue[0].gcr ++ | cleartp L:CARG1 ++ |.endif ++ | ld.bu TMP0, L:CARG1->status ++ | ld.d TMP1, L:CARG1->cframe ++ | ld.d CARG2, L:CARG1->top ++ | ld.d TMP2, L:CARG1->base ++ | addi.w AT, TMP0, -LUA_YIELD ++ | add.d CARG3, CARG2, TMP0 ++ | addi.d TMP3, CARG2, 8 ++ | masknez CARG2, CARG2, AT ++ | maskeqz TMP3, TMP3, AT ++ | or CARG2, TMP3, CARG2 ++ | blt r0, AT, ->fff_fallback // st > LUA_YIELD? ++ | xor TMP2, TMP2, CARG3 ++ | or AT, TMP2, TMP0 ++ | bnez TMP1, ->fff_fallback // cframe != 0? ++ | ld.d TMP0, L:CARG1->maxstack ++ | ld.d PC, FRAME_PC(BASE) ++ | beqz AT, ->fff_fallback // base == top && st == 0? ++ | add.d TMP2, CARG2, NARGS8:RC ++ | sltu AT, TMP0, TMP2 ++ | st.d PC, SAVE_PC(sp) ++ | bnez AT, ->fff_fallback // Stack overflow? ++ | st.d BASE, L->base ++ |1: ++ |.if resume ++ | addi.d BASE, BASE, 8 // Keep resumed thread in stack for GC. ++ | addi.d NARGS8:RC, NARGS8:RC, -8 ++ | addi.d TMP2, TMP2, -8 ++ |.endif ++ | st.d TMP2, L:CARG1->top ++ | add.d TMP1, BASE, NARGS8:RC ++ | or CARG3, CARG2, r0 ++ | st.d BASE, L->top ++ |2: // Move args to coroutine. ++ | ld.d r17, 0(BASE) ++ | sltu AT, BASE, TMP1 ++ | addi.d BASE, BASE, 8 ++ | beqz AT, >3 ++ | st.d r17, 0(CARG3) ++ | addi.d CARG3, CARG3, 8 ++ | beq r0, r0, <2 ++ |3: ++ | or L:RA, L:CARG1, r0 ++ | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) ++ | // Returns thread status. ++ |4: ++ | ld.d TMP2, L:RA->base ++ | sltui AT, CRET1, LUA_YIELD+1 ++ | ld.d TMP3, L:RA->top ++ | li_vmstate INTERP ++ | ld.d BASE, L->base ++ | .STXD L, DISPATCH, DISPATCH_GL(cur_L) ++ | st_vmstate ++ | sub.d RD, TMP3, TMP2 ++ | beqz AT, >8 ++ | ld.d TMP0, L->maxstack ++ | add.d TMP1, BASE, RD ++ | beqz RD, >6 // No results? ++ | sltu AT, TMP0, TMP1 ++ | add.d TMP3, TMP2, RD ++ | bnez AT, >9 // Need to grow stack? ++ | st.d TMP2, L:RA->top // Clear coroutine stack. ++ | or TMP1, BASE, r0 ++ |5: // Move results from coroutine. ++ | ld.d r17, 0(TMP2) ++ | addi.d TMP2, TMP2, 8 ++ | sltu AT, TMP2, TMP3 ++ | st.d r17, 0(TMP1) ++ | addi.d TMP1, TMP1, 8 ++ | bnez AT, <5 ++ |6: ++ | andi TMP0, PC, FRAME_TYPE ++ |.if resume ++ | mov_true TMP1 ++ | addi.d RA, BASE, -8 ++ | st.d TMP1, -8(BASE) // Prepend true to results. ++ | addi.d RD, RD, 16 ++ |.else ++ | or RA, BASE, r0 ++ | addi.d RD, RD, 8 ++ |.endif ++ |7: ++ | st.d PC, SAVE_PC(sp) ++ | or MULTRES, RD, r0 ++ | beqz TMP0, ->BC_RET_Z ++ | beq r0, r0, ->vm_return ++ | ++ |8: // Coroutine returned with error (at co->top-1). ++ |.if resume ++ | addi.d TMP3, TMP3, -8 ++ | mov_false TMP1 ++ | ld.d r17, 0(TMP3) ++ | st.d TMP3, L:RA->top // Remove error from coroutine stack. ++ | .LI RD, (2+1)*8 ++ | st.d TMP1, -8(BASE) // Prepend false to results. ++ | addi.d RA, BASE, -8 ++ | st.d r17, 0(BASE) // Copy error message. ++ | andi TMP0, PC, FRAME_TYPE ++ | beq r0, r0, <7 ++ |.else ++ | or CARG2, L:RA, r0 ++ | or CARG1, L, r0 ++ | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) ++ |.endif ++ | ++ |9: // Handle stack expansion on return from yield. ++ | srli.w CARG2, RD, 3 ++ | or CARG1, L, r0 ++ | bl extern lj_state_growstack // (lua_State *L, int n) ++ | .LI CRET1, 0 ++ | beq r0, r0, <4 ++ |.endmacro ++ | ++ | coroutine_resume_wrap 1 // coroutine.resume ++ | coroutine_resume_wrap 0 // coroutine.wrap ++ | ++ |.ffunc coroutine_yield ++ | ld.d TMP0, L->cframe ++ | add.d TMP1, BASE, NARGS8:RC ++ | st.d BASE, L->base ++ | andi TMP0, TMP0, CFRAME_RESUME ++ | st.d TMP1, L->top ++ | .LI CRET1, LUA_YIELD ++ | beqz TMP0, ->fff_fallback ++ | st.d r0, L->cframe ++ | st.b CRET1, L->status ++ | beq r0, r0, ->vm_leave_unw ++ | ++ |//-- Math library ------------------------------------------------------- ++ | ++ |.ffunc_1 math_abs ++ | gettp CARG2, CARG1 ++ | addi.d AT, CARG2, -LJ_TISNUM ++ | slli.w TMP1, CARG1, 0 // sextw -> slli.w ++ | bnez AT, >1 ++ | srai.w TMP0, TMP1, 31 // Extract sign. ++ | xor TMP1, TMP1, TMP0 ++ | sub.d CARG1, TMP1, TMP0 ++ | slli.d TMP3, CARG1, 32 ++ | settp CARG1, TISNUM ++ | bge TMP3, r0, ->fff_restv ++ | .LI CARG1, 0x41e0 // 2^31 as a double. ++ | slli.d CARG1, CARG1, 48 ++ | beq r0, r0, ->fff_restv ++ |1: ++ | sltui AT, CARG2, LJ_TISNUM ++ | bstrpick.d CARG1, CARG1, 62, 0 ++ | beqz AT, ->fff_fallback ++ |// fallthrough ++ | ++ |->fff_restv: ++ | // CARG1 = TValue result. ++ | ld.d PC, FRAME_PC(BASE) ++ | addi.d RA, BASE, -16 ++ | st.d CARG1, -16(BASE) ++ |->fff_res1: ++ | // RA = results, PC = return. ++ | .LI RD, (1+1)*8 ++ |->fff_res: ++ | // RA = results, RD = (nresults+1)*8, PC = return. ++ | andi TMP0, PC, FRAME_TYPE ++ | or MULTRES, RD, r0 ++ | bnez TMP0, ->vm_return ++ | ld.w INS, -4(PC) ++ | decode_RB8a RB, INS ++ | decode_RB8b RB ++ |5: ++ | sltu AT, RD, RB ++ | decode_RA8a TMP0, INS ++ | bnez AT, >6 // More results expected? ++ | decode_RA8b TMP0 ++ | ins_next1 ++ | // Adjust BASE. KBASE is assumed to be set for the calling frame. ++ | sub.d BASE, RA, TMP0 ++ | ins_next2 ++ | ++ |6: // Fill up results with nil. ++ | add.d TMP1, RA, RD ++ | addi.d RD, RD, 8 ++ | st.d TISNIL, -8(TMP1) ++ | beq r0, r0, <5 ++ | ++ |.macro math_extern, func ++ | .ffunc_n math_ .. func ++ | bl extern func ++ | fmov.d FRET1, FARG1 ++ | beq r0, r0, ->fff_resn ++ |.endmacro ++ | ++ |.macro math_extern2, func ++ | .ffunc_nn math_ .. func ++ | bl extern func ++ | fmov.d FRET1, FARG1 ++ | beq r0, r0, ->fff_resn ++ |.endmacro ++ | ++ |// TODO: Return integer type if result is integer (own sf implementation). ++ |.macro math_round, func ++ |->ff_math_ .. func: ++ | ld.d CARG1, 0(BASE) ++ | gettp TMP0, CARG1 ++ | beqz NARGS8:RC, ->fff_fallback ++ | sltu AT, TMP0, TISNUM ++ | beq TMP0, TISNUM, ->fff_restv ++ |// beqz AT, ->fff_fallback ++ |.if FPU ++ | fld.d FARG1, 0(BASE) ++ | beqz AT, ->fff_fallback ++ | bl ->vm_ .. func ++ |.else ++ | beqz AT, ->fff_fallback ++ | bl extern func ++ |.endif ++ | beq r0, r0, ->fff_resn ++ |.endmacro ++ | ++ | math_round floor ++ | math_round ceil ++ | ++ |.ffunc math_log ++ | .LI AT, 8 ++ | ld.d CARG1, 0(BASE) ++ | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. ++ | checknum CARG1, ->fff_fallback ++ |.if FPU ++ | fld.d FARG1, 0(BASE) ++ | bl extern log ++ |.else ++ | bl extern log ++ |.endif ++ | fmov.d FRET1, FARG1 ++ | beq r0, r0, ->fff_resn ++ | ++ | math_extern log10 ++ | math_extern exp ++ | math_extern sin ++ | math_extern cos ++ | math_extern tan ++ | math_extern asin ++ | math_extern acos ++ | math_extern atan ++ | math_extern sinh ++ | math_extern cosh ++ | math_extern tanh ++ | math_extern2 pow ++ | math_extern2 atan2 ++ | math_extern2 fmod ++ | ++ |.if FPU ++ |// fsqrt.d FRET1, FARG1 ++ |.ffunc_n math_sqrt ++ |// fsqrt.d FRET1, FARG1 ++ |// fallthrough to ->fff_resn ++ |.else ++ | math_extern sqrt ++ |.endif ++ | ++ |->fff_resn: ++ | ld.d PC, FRAME_PC(BASE) ++ | addi.d RA, BASE, -16 ++ |.if FPU ++ | fst.d FRET1, 0(RA) ++ | beq r0, r0, ->fff_res1 ++ |.else ++ | st.d CRET1, 0(RA) ++ | beq r0, r0, ->fff_res1 ++ |.endif ++ | ++ | ++ |.ffunc_2 math_ldexp ++ | checknum CARG1, ->fff_fallback ++ | checkint CARG2, ->fff_fallback ++ | .FPU2 fld.d FARG1, 0(BASE) ++ | ld.w CARG1, 8+LO(BASE) ++ | bl extern ldexp ++ | fmov.d FRET1, FARG1 ++ | beq r0, r0, ->fff_resn ++ | ++ |.ffunc_n math_frexp ++ | ld.d PC, FRAME_PC(BASE) ++ | .DADDIU CARG1, DISPATCH, DISPATCH_GL(tmptv) ++ | bl extern frexp ++ | .LDXW TMP1, DISPATCH, DISPATCH_GL(tmptv) ++ | addi.d RA, BASE, -16 ++ |.if FPU ++ | movgr2fr.w FARG2, TMP1 ++ | fst.d FRET1, 0(RA) ++ | ffint.d.w FARG2, FARG2 ++ | fst.d FARG2, 8(RA) ++ |.else ++ | st.d CRET1, 0(RA) ++ | bstrpick.d TMP1, TMP1, 31, 0 // zextw -> bstrpick.d ++ | settp TMP1, TISNUM ++ | st.d TMP1, 8(RA) ++ |.endif ++ | .LI RD, (2+1)*8 ++ | beq r0, r0, ->fff_res ++ | ++ |.ffunc_n math_modf ++ | ld.d PC, FRAME_PC(BASE) ++ | addi.d CARG1, BASE, -16 ++ | bl extern modf ++ | addi.d RA, BASE, -16 ++ |.if FPU ++ | fst.d FRET1, -8(BASE) ++ |.else ++ | st.d CRET1, -8(BASE) ++ |.endif ++ | .LI RD, (2+1)*8 ++ | beq r0, r0, ->fff_res ++ | ++ |.macro math_minmax, name, intins, intinsc, fpins ++ | .ffunc_1 name ++ | add.d TMP3, BASE, NARGS8:RC ++ | addi.d TMP2, BASE, 8 ++ | checkint CARG1, >5 ++ |1: // Handle integers. ++ | ld.d CARG2, 0(TMP2) ++ | beq TMP2, TMP3, ->fff_restv ++ | slli.w CARG1, CARG1, 0 // sextw -> slli.w ++ | checkint CARG2, >3 ++ | ld.w CARG2, LO(TMP2) ++ | slt AT, CARG1, CARG2 ++ | intins TMP1, CARG2, AT ++ | intinsc CARG1, CARG1, AT ++ | or CARG1, CARG1, TMP1 ++ | addi.d TMP2, TMP2, 8 ++ | bstrpick.d CARG1, CARG1, 31, 0 // zextw -> bstrpick.d ++ | settp CARG1, TISNUM ++ | beq r0, r0, <1 ++ | ++ |3: // Convert intermediate result to number and continue with number loop. ++ |// checknum CARG2, ->fff_fallback ++ |.if FPU ++ | movgr2fr.w FRET1, CARG1 //TODO checknum slot ins ++ | checknum CARG2, ->fff_fallback ++ | ffint.d.w FRET1, FRET1 ++ | fld.d FARG1, 0(TMP2) ++ | beq r0, r0, >7 ++ |.else ++ | checknum CARG2, ->fff_fallback ++ | bl ->vm_sfi2d_1 ++ | beq r0, r0, >7 ++ |.endif ++ | ++ |5: ++ | .FPU2 fld.d FRET1, 0(BASE) ++ |// checknum CARG1, ->fff_fallback ++ |6: // Handle numbers. ++ | ld.d CARG2, 0(TMP2) //TODO mips slot ins ++ | checknum CARG1, ->fff_fallback ++ |// beq TMP2, TMP3, ->fff_resn ++ |.if FPU ++ | fld.d FARG1, 0(TMP2) ++ |.else ++ | or CRET1, CARG1, r0 ++ |.endif ++ | beq TMP2, TMP3, ->fff_resn ++ | checknum CARG2, >8 ++ |7: ++ |.if FPU ++ | fpins FRET1, FRET1, FARG1 ++ |.else ++ |.if fpins // ismax ++ | bl ->vm_sfcmpogt ++ |.else ++ | bl ->vm_sfcmpolt ++ |.endif ++ | masknez AT, CARG2, CRET1 ++ | maskeqz CARG1, CARG1, CRET1 ++ | or CARG1, CARG1, AT ++ |.endif ++ | addi.d TMP2, TMP2, 8 ++ | beq r0, r0, <6 ++ | ++ |8: // Convert integer to number and continue with number loop. ++ |// checkint CARG2, ->fff_fallback //TODO doesnot process the mips slot ins ++ |.if FPU ++ | fld.s FARG1, LO(TMP2) ++ | checkint CARG2, ->fff_fallback ++ | ffint.d.w FARG1, FARG1 ++ | beq r0, r0, <7 ++ |.else ++ | ld.w CARG2, LO(TMP2) ++ | checkint CARG2, ->fff_fallback ++ | bl ->vm_sfi2d_2 ++ | beq r0, r0, <7 ++ |.endif ++ | ++ |.endmacro ++ | ++ | math_minmax math_min, masknez, maskeqz, fmin.d ++ | math_minmax math_max, maskeqz, masknez, fmax.d ++ | ++ |//-- String library ----------------------------------------------------- ++ | ++ |.ffunc string_byte // Only handle the 1-arg case here. ++ | ld.d CARG1, 0(BASE) ++ | gettp TMP0, CARG1 ++ | xori AT, NARGS8:RC, 8 ++ | addi.d TMP0, TMP0, -LJ_TSTR ++ | or AT, AT, TMP0 ++ | cleartp STR:CARG1 ++ | bnez AT, ->fff_fallback // Need exactly 1 string argument. ++ | ld.w TMP0, STR:CARG1->len ++ | addi.d RA, BASE, -16 ++ | ld.d PC, FRAME_PC(BASE) ++ | sltu RD, r0, TMP0 ++ | ld.bu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). ++ | addi.w RD, RD, 1 ++ | slli.w RD, RD, 3 // RD = ((str->len != 0)+1)*8 ++ | settp TMP1, TISNUM ++ | st.d TMP1, 0(RA) ++ | beq r0, r0, ->fff_res ++ | ++ |.ffunc string_char // Only handle the 1-arg case here. ++ | ffgccheck ++ | ld.d CARG1, 0(BASE) ++ | gettp TMP0, CARG1 ++ | xori AT, NARGS8:RC, 8 // Exactly 1 argument. ++ | addi.d TMP0, TMP0, -LJ_TISNUM // Integer. ++ | .LI TMP1, 255 ++ | slli.w CARG1, CARG1, 0 // sextw -> slli.w ++ | or AT, AT, TMP0 ++ | sltu TMP1, TMP1, CARG1 // !(255 < n). ++ | or AT, AT, TMP1 ++ | .LI CARG3, 1 ++ | bnez AT, ->fff_fallback ++ | addi.d CARG2, sp, TMPD_OFS ++ | st.b CARG1, TMPD(sp) ++ |->fff_newstr: ++ | st.d BASE, L->base ++ | st.d PC, SAVE_PC(sp) ++ | or CARG1, L, r0 ++ | bl extern lj_str_new // (lua_State *L, char *str, size_t l) ++ | // Returns GCstr *. ++ | ld.d BASE, L->base ++ |->fff_resstr: ++ | .LI AT, LJ_TSTR ++ | settp CRET1, AT ++ | or CARG1, CRET1, r0 ++ | beq r0, r0, ->fff_restv ++ | ++ |.ffunc string_sub ++ | ffgccheck ++ | addi.d AT, NARGS8:RC, -16 ++ | ld.d TMP0, 0(BASE) ++ | gettp TMP3, TMP0 ++ | blt AT, r0, ->fff_fallback ++ | cleartp STR:CARG1, TMP0 ++ | ld.d CARG2, 8(BASE) ++ | .LI CARG4, -1 ++ | beqz AT, >1 ++ | ld.d CARG3, 16(BASE) ++ |// checkint CARG3, ->fff_fallback ++ | slli.w CARG4, CARG3, 0 //TODO it`s also a mips slot ins, sextw -> slli.w ++ | checkint CARG3, ->fff_fallback ++ |1: ++ | checkint CARG2, ->fff_fallback ++ | .LI AT, LJ_TSTR //TODO mips slot ins ++ |// checkint CARG2, ->fff_fallback ++ | slli.w CARG3, CARG2, 0 // sextw -> slli.w ++ | bne TMP3, AT, ->fff_fallback ++ | ld.w CARG2, STR:CARG1->len ++ | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end ++ | slt AT, CARG4, r0 ++ | addi.w TMP0, CARG2, 1 ++ | add.w TMP1, CARG4, TMP0 ++ | slt TMP3, CARG3, r0 ++ | masknez CARG4, CARG4, AT ++ | maskeqz TMP1, TMP1, AT ++ | or CARG4, TMP1, CARG4 // if (end < 0) end += len+1 ++ | add.w TMP1, CARG3, TMP0 ++ | maskeqz TMP1, TMP1, TMP3 ++ | masknez CARG3, CARG3, TMP3 ++ | or CARG3, TMP1, CARG3 // if (start < 0) start += len+1 ++ | .LI TMP2, 1 ++ | slt AT, CARG4, r0 ++ | slt TMP3, r0, CARG3 ++ | masknez CARG4, CARG4, AT // if (end < 0) end = 0 ++ | maskeqz CARG3, CARG3, TMP3 ++ | masknez TMP2, TMP2, TMP3 ++ | or CARG3, TMP2, CARG3 // if (start < 1) start = 1 ++ | slt AT, CARG2, CARG4 ++ | masknez CARG4, CARG4, AT ++ | maskeqz CARG2, CARG2, AT ++ | or CARG4, CARG2, CARG4 // if (end > len) end = len ++ | add.d CARG2, STR:CARG1, CARG3 ++ | sub.d CARG3, CARG4, CARG3 // len = end - start ++ | addi.d CARG2, CARG2, sizeof(GCstr)-1 ++ | or r17, CARG3, r0 ++ | addi.w CARG3, CARG3, 1 // len++ ++ | bge r17, r0, ->fff_newstr ++ |->fff_emptystr: // Return empty string. ++ | .LI AT, LJ_TSTR ++ | .DADDIU STR:CARG1, DISPATCH, DISPATCH_GL(strempty) ++ | settp CARG1, AT ++ | beq r0, r0, ->fff_restv ++ | ++ |.macro ffstring_op, name ++ | .ffunc string_ .. name ++ | ffgccheck ++ | ld.d CARG2, 0(BASE) ++ | beqz NARGS8:RC, ->fff_fallback ++ | checkstr STR:CARG2, ->fff_fallback ++ | .DADDIU SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf) ++ | ld.d TMP0, SBUF:CARG1->b ++ | st.d L, SBUF:CARG1->L ++ | st.d BASE, L->base ++ | st.d TMP0, SBUF:CARG1->p ++ | st.d PC, SAVE_PC(sp) ++ | bl extern lj_buf_putstr_ .. name ++ | or SBUF:CARG1, SBUF:CRET1, r0 ++ | bl extern lj_buf_tostr ++ | ld.d BASE, L->base ++ | beq r0, r0, ->fff_resstr ++ |.endmacro ++ | ++ |ffstring_op reverse ++ |ffstring_op lower ++ |ffstring_op upper ++ | ++ |//-- Bit library -------------------------------------------------------- ++ | ++ |->vm_tobit_fb: ++ |// beqz TMP1, ->fff_fallback //TODO doesnot process the following mips slot ins ++ |.if FPU ++ | fld.d FARG1, 0(BASE) ++ | beqz TMP1, ->fff_fallback ++ | fadd.d FARG1, FARG1, TOBIT ++ | movfr2gr.s CRET1, FARG1 ++ | bstrpick.d CRET1, CRET1, 31, 0 // zextw -> bstrpick.d ++ | jirl r0, ra, 0 ++ |.else ++ | beqz TMP1, ->fff_fallback ++ |// FP number to bit conversion for soft-float. ++ |->vm_tobit: ++ | slli.d TMP0, CARG1, 1 ++ | .LI CARG3, 1076 ++ | srli.d AT, TMP0, 53 ++ | sub.d CARG3, CARG3, AT ++ | sltui AT, CARG3, 54 ++ | bstrpick.d TMP0, TMP0, 52, 0 ++ | beqz AT, >1 ++ | bstrins.d TMP0, AT, 21, 21 ++ | slt AT, CARG1, r0 ++ | srl.d CRET1, TMP0, CARG3 ++ | sub.d TMP0, r0, CRET1 ++ | maskeqz TMP0, TMP0, AT ++ | masknez CRET1, CRET1, AT ++ | or CRET1, CRET1, TMP0 ++ | bstrpick.d CRET1, CRET1, 31, 0 // zextw -> bstrpick.d ++ | jirl r0, ra, 0 ++ |1: ++ | or CRET1, r0, r0 ++ | jirl, r0, ra, 0 ++ | ++ |// FP number to int conversion with a check for soft-float. ++ |// Modifies CARG1, CRET1, CRET2, TMP0, AT. ++ |->vm_tointg: ++ |.if JIT ++ | slli.d CRET2, CARG1, 1 ++ | .LI TMP0, 1076 ++ | beqz CRET2, >2 ++ | srli.d AT, CRET2, 53 ++ | sub.d TMP0, TMP0, AT ++ | sltui AT, TMP0, 54 ++ | bstrpick.d CRET2, CRET2, 52, 0 ++ | beqz AT, >1 ++ | bstrins.d CRET2, AT, 21, 21 ++ | slt AT, CARG1, r0 ++ | srl.d CRET1, CRET2, TMP0 ++ | sub.d CARG1, r0, CRET1 ++ | masknez CRET1, CRET1, AT ++ | maskeqz CARG1, CARG1, AT ++ | or CRET1, CRET1, CARG1 ++ | .LI CARG1, 64 ++ | sub.w TMP0, CARG1, TMP0 ++ | sll.d CRET2, CRET2, TMP0 // Integer check. ++ | slli.w AT, CRET1, 0 // sextw -> slli.w ++ | xor AT, CRET1, AT // Range check. ++ | masknez AT, AT, CRET2 ++ | maskeqz CRET2, CRET2, CRET2 ++ | or CRET2, AT, CRET2 ++ | jirl r0, ra, 0 ++ |1: ++ | .LI CRET2, 1 ++ | jirl r0, ra, 0 ++ |2: ++ | or CRET1, r0, r0 ++ | jirl r0, ra, 0 ++ |.endif ++ |.endif ++ | ++ |.macro .ffunc_bit, name ++ | .ffunc_1 bit_..name ++ | gettp TMP0, CARG1 ++ | bstrpick.d CRET1, CARG1, 31, 0 // zextw -> bstrpick.d ++ | beq TMP0, TISNUM, >6 ++ | sltui TMP1, TMP0, LJ_TISNUM ++ | bl ->vm_tobit_fb ++ |6: ++ |.endmacro ++ | ++ |.macro .ffunc_bit_op, name, bins ++ | .ffunc_bit name ++ | addi.d TMP2, BASE, 8 ++ | add.d TMP3, BASE, NARGS8:RC ++ |1: ++ | ld.d r17, 0(TMP2) ++ | beq TMP2, TMP3, ->fff_resi ++ | gettp TMP0, r17 ++ |.if FPU ++ | addi.d TMP2, TMP2, 8 ++ | bne TMP0, TISNUM, >2 ++ | bstrpick.d r17, r17, 31, 0 // zextw -> bstrpick.d ++ | bins CRET1, CRET1, r17 ++ | beq r0, r0, <1 ++ |2: ++ | fld.d FARG1, -8(TMP2) ++ | sltui AT, TMP0, LJ_TISNUM ++ | fadd.d FARG1, FARG1, TOBIT ++ | beqz AT, ->fff_fallback ++ | movfr2gr.s r17, FARG1 ++ | bstrpick.d r17, r17, 31, 0 // zextw -> bstrpick.d ++ | bins CRET1, CRET1, r17 ++ | beq r0, r0, <1 ++ |.else ++ | or CRET2, CRET1, r0 ++ | beq TMP0, TISNUM, >2 ++ | sltui TMP1, TMP0, LJ_TISNUM ++ | bl ->vm_tobit_fb ++ | or CARG1, CRET2, r0 ++ |2: ++ | bstrpick.d r17, r17, 31, 0 // zextw -> bstrpick.d ++ | bins CRET1, CRET1, r17 ++ | addi.d TMP2, TMP2, 8 ++ | beq r0, r0, <1 ++ |.endif ++ |.endmacro ++ | ++ |.ffunc_bit_op band, and ++ |.ffunc_bit_op bor, or ++ |.ffunc_bit_op bxor, xor ++ | ++ |.ffunc_bit bswap ++ | srli.d TMP0, CRET1, 8 ++ | srli.d TMP1, CRET1, 24 ++ |// andi TMP2, TMP0, 0xff00 ++ | srli.d TMP3,TMP0, 8 ++ | andi TMP2, TMP3, 0xff ++ | slli.d TMP2, TMP2, 8 ++ | bstrins.d TMP1, CRET1, 31, 24 ++ | bstrins.d TMP2, TMP0, 23, 16 ++ | or CRET1, TMP1, TMP2 ++ | beq r0, r0, ->fff_resi ++ | ++ |.ffunc_bit bnot ++ | nor CRET1, CRET1, r0 ++ | bstrpick.d CRET1, CRET1, 31, 0 // zextw -> bstrpick.d ++ | beq r0, r0, ->fff_resi ++ | ++ |.macro .ffunc_bit_sh, name, shins, shmod ++ | .ffunc_2 bit_..name ++ | gettp TMP0, CARG1 ++ | beq TMP0, TISNUM, >1 ++ | sltui TMP1, TMP0, LJ_TISNUM ++ | bl ->vm_tobit_fb ++ | or CARG1, CRET1, r0 ++ |1: ++ | gettp TMP0, CARG2 ++ | bstrpick.d CARG2, CARG2, 31, 0 // zextw -> bstrpick.d ++ | bne TMP0, TISNUM, ->fff_fallback ++ | slli.w CARG1, CARG1, 0 // sextw -> slli.w ++ |.if shmod == 1 ++ | sub.w CARG2, r0, CARG2 ++ |.endif ++ | shins CRET1, CARG1, CARG2 ++ | bstrpick.d CRET1, CRET1, 31, 0 // zextw -> bstrpick.d ++ | beq r0, r0, ->fff_resi ++ |.endmacro ++ | ++ |.ffunc_bit_sh lshift, sll.w, 0 ++ |.ffunc_bit_sh rshift, srl.w, 0 ++ |.ffunc_bit_sh arshift, sra.w, 0 ++ |.ffunc_bit_sh rol, rotr.w, 1 ++ |.ffunc_bit_sh ror, rotr.w, 0 ++ | ++ |.ffunc_bit tobit ++ |->fff_resi: ++ | ld.d PC, FRAME_PC(BASE) ++ | addi.d RA, BASE, -16 ++ | settp CRET1, TISNUM ++ | st.d CRET1, -16(BASE) ++ | beq r0, r0, ->fff_res1 ++ | ++ |//----------------------------------------------------------------------- ++ |->fff_fallback: // Call fast function fallback handler. ++ | // BASE = new base, RB = CFUNC, RC = nargs*8 ++ | ld.d TMP3, CFUNC:RB->f ++ | add.d TMP1, BASE, NARGS8:RC ++ | ld.d PC, FRAME_PC(BASE) // Fallback may overwrite PC. ++ | addi.d TMP0, TMP1, 8*LUA_MINSTACK ++ | ld.d TMP2, L->maxstack ++ | st.d PC, SAVE_PC(sp) // Redundant (but a defined value). ++ | sltu AT, TMP2, TMP0 ++ | st.d BASE, L->base ++ | st.d TMP1, L->top ++ | or CFUNCADDR, TMP3, r0 ++ | bnez AT, >5 // Need to grow stack. ++ | or CARG1, L, r0 ++ | jirl r1, TMP3, 0 // (lua_State *L) ++ | // Either throws an error, or recovers and returns -1, 0 or nresults+1. ++ | ld.d BASE, L->base ++ | slli.w RD, CRET1, 3 ++ | addi.d RA, BASE, -16 ++ | blt r0, CRET1, ->fff_res // Returned nresults+1? ++ |1: // Returned 0 or -1: retry fast path. ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) ++ | ld.d TMP0, L->top ++ | cleartp LFUNC:RB ++ | sub.d NARGS8:RC, TMP0, BASE ++ | bnez CRET1, ->vm_call_tail // Returned -1? ++ | ins_callt // Returned 0: retry fast path. ++ | ++ |// Reconstruct previous base for vmeta_call during tailcall. ++ |->vm_call_tail: ++ | andi TMP0, PC, FRAME_TYPE ++ | .LI AT, -4 ++ | and TMP1, PC, AT ++ | bnez TMP0, >3 ++ | ld.bu TMP1, OFS_RA(PC) ++ | slli.w TMP1, TMP1, 3 ++ | addi.w TMP1, TMP1, 16 ++ |3: ++ | sub.d TMP2, BASE, TMP1 ++ | beq r0, r0, ->vm_call_dispatch // Resolve again for tailcall. ++ | ++ |5: // Grow stack for fallback handler. ++ | .LI CARG2, LUA_MINSTACK ++ | or CARG1, L, r0 ++ | bl extern lj_state_growstack // (lua_State *L, int n) ++ | ld.d BASE, L->base ++ | .LI CRET1, 0 // Force retry. ++ | beq r0, r0, <1 ++ | ++ |->fff_gcstep: // Call GC step function. ++ | // BASE = new base, RC = nargs*8 ++ | or MULTRES, ra, r0 ++ | st.d BASE, L->base ++ | add.d TMP0, BASE, NARGS8:RC ++ | st.d PC, SAVE_PC(sp) // Redundant (but a defined value). ++ | st.d TMP0, L->top ++ | or CARG1, L, r0 ++ | bl extern lj_gc_step // (lua_State *L) ++ | ld.d BASE, L->base ++ | or ra, MULTRES, r0 ++ | ld.d TMP0, L->top ++ | ld.d CFUNC:RB, FRAME_FUNC(BASE) ++ | cleartp CFUNC:RB ++ | sub.d NARGS8:RC, TMP0, BASE ++ | jirl r0, ra, 0 ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Special dispatch targets ------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_record: // Dispatch target for recording phase. ++ |.if JIT ++ | .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask) ++ | andi AT, TMP3, HOOK_VMEVENT // No recording while in vmevent. ++ | .LDXW TMP2, DISPATCH, DISPATCH_GL(hookcount) ++ | bnez AT, >5 ++ | // Decrement the hookcount for consistency, but always do the call. ++ | andi AT, TMP3, HOOK_ACTIVE ++ | addi.w TMP2, TMP2, -1 ++ | bnez AT, >1 ++ | andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT ++ | beqz AT, >1 ++ | .STXW TMP2, DISPATCH, DISPATCH_GL(hookcount) ++ | beq r0, r0, >1 ++ |.endif ++ | ++ |->vm_rethook: // Dispatch target for return hooks. ++ | .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask) ++ | andi AT, TMP3, HOOK_ACTIVE // Hook already active? ++ |// beqz AT, >1 //TODO dose not process the following mips slot ins ++ |5: // Re-dispatch to static ins. ++ | ld.d AT, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4. ++ | beqz AT, >1 ++ | jirl r0, AT, 0 ++ | ++ |->vm_inshook: // Dispatch target for instr/line hooks. ++ | .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask) ++ | .LDXW TMP2, DISPATCH, DISPATCH_GL(hookcount) ++ | andi AT, TMP3, HOOK_ACTIVE // Hook already active? ++ | or r17, AT, r0 ++ | andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT ++ | bnez r17, <5 ++ | addi.w TMP2, TMP2, -1 ++ | beqz AT, <5 ++ | .STXW TMP2, DISPATCH, DISPATCH_GL(hookcount) ++ | beqz TMP2, >1 ++ | andi AT, TMP3, LUA_MASKLINE ++ | beqz AT, <5 //TODO dose not process the following mips slot ins ++ |1: ++ |//. load_got lj_dispatch_ins ++ |// st.w MULTRES, SAVE_MULTRES ++ | st.w MULTRES, TMPD(sp) ++ | or CARG2, PC, r0 ++ | st.d BASE, L->base ++ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. ++ | or CARG1, L, r0 ++ | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) ++ |3: ++ | ld.d BASE, L->base ++ |4: // Re-dispatch to static ins. ++ | ld.w INS, -4(PC) ++ | decode_OP8a TMP1, INS ++ | decode_OP8b TMP1 ++ | add.d TMP0, DISPATCH, TMP1 ++ | decode_RD8a RD, INS ++ | ld.d AT, GG_DISP2STATIC(TMP0) ++ | decode_RA8a RA, INS ++ | decode_RD8b RD ++ | decode_RA8b RA ++ | jirl r0, AT, 0 ++ | ++ |->cont_hook: // Continue from hook yield. ++ | addi.d PC, PC, 4 ++ | ld.w MULTRES, -24+LO(RB) // Restore MULTRES for *M ins. ++ | beq r0, r0, <4 ++ | ++ |->vm_hotloop: // Hot loop counter underflow. ++ |.if JIT ++ | ld.d LFUNC:TMP1, FRAME_FUNC(BASE) ++ | .DADDIU CARG1, DISPATCH, GG_DISP2J ++ | cleartp LFUNC:TMP1 ++ | st.d PC, SAVE_PC(sp) ++ | ld.d TMP1, LFUNC:TMP1->pc ++ | or CARG2, PC, r0 ++ | .STXD L, DISPATCH, DISPATCH_J(L) ++ | ld.bu TMP1, PC2PROTO(framesize)(TMP1) ++ | st.d BASE, L->base ++ | slli.d TMP1, TMP1, 3 ++ | add.d TMP1, BASE, TMP1 ++ | st.d TMP1, L->top ++ | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) ++ | beq r0, r0, <3 ++ |.endif ++ | ++ | ++ |->vm_callhook: // Dispatch target for call hooks. ++ | or CARG2, PC, r0 ++ |.if JIT ++ | beq r0, r0, >1 //TODO which is the mips slot ins ++ |.endif ++ |// or CARG2, PC, r0 ++ | ++ |->vm_hotcall: // Hot call counter underflow. ++ |.if JIT ++ | ori CARG2, PC, 1 ++ |1: ++ |.endif ++ | add.d TMP0, BASE, RC ++ | st.d PC, SAVE_PC(sp) ++ | st.d BASE, L->base ++ | sub.d RA, RA, BASE ++ | st.d TMP0, L->top ++ | or CARG1, L, r0 ++ | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) ++ | // Returns ASMFunction. ++ | ld.d BASE, L->base ++ | ld.d TMP0, L->top ++ | st.d r0, SAVE_PC(sp) // Invalidate for subsequent line hook. ++ | sub.d NARGS8:RC, TMP0, BASE ++ | add.d RA, BASE, RA ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) ++ | cleartp LFUNC:RB ++ | ld.w INS, -4(PC) ++ | jirl r0, CRET1, 0 ++ | ++ |->cont_stitch: // Trace stitching. ++ |.if JIT ++ | // RA = resultptr, RB = meta base ++ | ld.w INS, -4(PC) ++ | ld.d TRACE:TMP2, -40(RB) // Save previous trace. ++ | decode_RA8a RC, INS ++ | addi.d AT, MULTRES, -8 ++ | cleartp TRACE:TMP2 ++ | decode_RA8b RC ++ | add.d RC, BASE, RC // Call base. ++ | beqz AT, >2 ++ |1: // Move results down. ++ | ld.d CARG1, 0(RA) ++ | addi.d AT, AT, -8 ++ | addi.d RA, RA, 8 ++ | st.d CARG1, 0(RC) ++ | addi.d RC, RC, 8 ++ | bnez AT, <1 ++ |2: ++ | decode_RA8a RA, INS ++ | decode_RB8a RB, INS ++ | decode_RA8b RA ++ | decode_RB8b RB ++ | add.d RA, RA, RB ++ | add.d RA, BASE, RA ++ |3: ++ | sltu AT, RC, RA ++ | bnez AT, >9 // More results wanted? ++ | ++ | ld.hu TMP3, TRACE:TMP2->traceno ++ | ld.hu RD, TRACE:TMP2->link ++ | beq RD, TMP3, ->cont_nop // Blacklisted. ++ | slli.w RD, RD, 3 ++ | bnez RD, =>BC_JLOOP // Jump to stitched trace. ++ | ++ | // Stitch a new trace to the previous trace. ++ | st.w TMP3, DISPATCH_J(exitno)(DISPATCH) ++ | .STXD L, DISPATCH, DISPATCH_J(L) ++ | st.d BASE, L->base ++ | .DADDIU CARG1, DISPATCH, GG_DISP2J ++ | or CARG2, PC, r0 ++ | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) ++ | ld.d BASE, L->base ++ | beq r0, r0, ->cont_nop ++ | ++ |9: ++ | st.d TISNIL, 0(RC) ++ | addi.d RC, RC, 8 ++ | beq r0, r0, <3 ++ |.endif ++ | ++ |->vm_profhook: // Dispatch target for profiler hook. ++#if LJ_HASPROFILE ++ |// st.w MULTRES, SAVE_MULTRES ++ | st.w MULTRES, TMPD(sp) ++ | or CARG2, PC, r0 ++ | st.d BASE, L->base ++ | or CARG1, L, r0 ++ | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) ++ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. ++ | addi.d PC, PC, -4 ++ | ld.d BASE, L->base ++ | beq r0, r0, ->cont_nop ++#endif ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Trace exit handler ------------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |.macro savex_, a, b ++ |.if FPU ++ | fst.d f..a, a*8(sp) ++ | fst.d f..b, b*8(sp) ++ | st.d r..a, 32*8+a*8(sp) ++ | st.d r..b, 32*8+b*8(sp) ++ |.else ++ | st.d r..a, a*8(sp) ++ | st.d r..b, b*8(sp) ++ |.endif ++ |.endmacro ++ | ++ |->vm_exit_handler: ++ |.if JIT ++ |.if FPU ++ | addi.d sp, sp, -(32*8+32*8) ++ |.else ++ | addi.d sp, sp, -(32*8) ++ |.endif ++ | savex_ 0, 2 ++ | savex_ 4, 5 ++ | savex_ 6, 7 ++ | savex_ 8, 9 ++ | savex_ 10, 11 ++ | savex_ 12, 13 ++ | savex_ 14, 15 ++ | savex_ 16, 17 ++ | savex_ 18, 19 ++ | savex_ 20, 21 ++ | savex_ 22, 23 ++ | savex_ 24, 25 ++ | savex_ 26, 27 ++ | savex_ 28, 29 ++ | savex_ 30, 31 ++ |.if FPU ++ | fst.d f1, 1*8(sp) ++ | fst.d f3, 3*8(sp) ++ | st.d r0, 32*8+1*8(sp) // Clear RID_TMP. ++ | addi.d TMP2, sp, 32*8+32*8 // Recompute original value of sp. ++ | st.d TMP2, 32*8+3*8(sp) // Store sp in RID_SP ++ |.else ++ | st.d r0, 31*8(sp) // Clear RID_TMP. ++ | addi.d TMP2, sp, 32*8 // Recompute original value of sp. ++ | st.d TMP2, 3*8(sp) // Store sp in RID_SP ++ |.endif ++ | li_vmstate EXIT ++ | .DADDIU DISPATCH, JGL, -GG_DISP2G-32768 ++ | ld.w TMP1, 0(TMP2) // Load exit number. ++ | st_vmstate ++ | .LDXD L, DISPATCH, DISPATCH_GL(cur_L) ++ | .LDXD BASE, DISPATCH, DISPATCH_GL(jit_base) ++ | .STXD L, DISPATCH, DISPATCH_J(L) ++ | st.w ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. ++ | st.d BASE, L->base ++ | st.w TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. ++ | .DADDIU CARG1, DISPATCH, GG_DISP2J ++ | .STXD r0, DISPATCH, DISPATCH_GL(jit_base) ++ | or CARG2, sp, r0 ++ | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) ++ | // Returns MULTRES (unscaled) or negated error code. ++ | ld.d TMP1, L->cframe ++ | .LI AT, -4 ++ | ld.d BASE, L->base ++ | and sp, TMP1, AT ++ | ld.d PC, SAVE_PC(sp) // Get SAVE_PC. ++ | st.d L, SAVE_L(sp) // Set SAVE_L (on-trace resume/yield). ++ | beq r0, r0, >1 ++ |.endif ++ |->vm_exit_interp: ++ |.if JIT ++ | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. ++ | ld.d L, SAVE_L(sp) ++ | .DADDIU DISPATCH, JGL, -GG_DISP2G-32768 ++ | st.d BASE, L->base ++ |1: ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) ++ | blt CRET1, r0, >9 // Check for error from exit. ++ | .FPU addu16i.d TMP3, r0, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | slli.d MULTRES, CRET1, 3 ++ | cleartp LFUNC:RB ++ |//st.w MULTRES, SAVE_MULTRES ++ | st.w MULTRES, TMPD(sp) ++ | .LI TISNIL, LJ_TNIL ++ | .LI TISNUM, LJ_TISNUM // Setup type comparison constants. ++ | .FPU2 movgr2fr.w TOBIT, TMP3 ++ | ld.d TMP1, LFUNC:RB->pc ++ | .STXD r0, DISPATCH, DISPATCH_GL(jit_base) ++ | ld.d KBASE, PC2PROTO(k)(TMP1) ++ | .FPU2 fcvt.d.s TOBIT, TOBIT ++ | // Modified copy of ins_next which handles function header dispatch, too. ++ | ld.w INS, 0(PC) ++ | addi.d PC, PC, 4 ++ | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 ++ | .STXW TISNIL, DISPATCH, DISPATCH_GL(vmstate) ++ | decode_OP8a TMP1, INS ++ | decode_OP8b TMP1 ++ | sltui TMP2, TMP1, BC_FUNCF*8 ++ | add.d TMP0, DISPATCH, TMP1 ++ | decode_RD8a RD, INS ++ | ld.d AT, 0(TMP0) ++ | decode_RA8a RA, INS ++ | decode_RA8b RA ++ | beqz TMP2, >2 ++ | decode_RD8b RD ++ | jirl r0, AT, 0 ++ |2: ++ | sltui TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function? ++ | ld.d TMP1, FRAME_PC(BASE) ++ | bnez TMP2, >3 ++ | // Check frame below fast function. ++ | andi TMP0, TMP1, FRAME_TYPE ++ | bnez TMP0, >3 // Trace stitching continuation? ++ | // Otherwise set KBASE for Lua function below fast function. ++ | ld.w TMP2, -4(TMP1) ++ | decode_RA8a TMP0, TMP2 ++ | decode_RA8b TMP0 ++ | sub.d TMP1, BASE, TMP0 ++ | ld.d LFUNC:TMP2, -32(TMP1) ++ | cleartp LFUNC:TMP2 ++ | ld.d TMP1, LFUNC:TMP2->pc ++ | ld.d KBASE, PC2PROTO(k)(TMP1) ++ |3: ++ | addi.d RC, MULTRES, -8 ++ | add.d RA, RA, BASE ++ | jirl r0, AT, 0 ++ | ++ |9: // Rethrow error from the right C frame. ++ | sub.w CARG2, r0, CRET1 //TODO LA: sub.w no trap ++ | or CARG1, L, r0 ++ | bl extern lj_err_throw // (lua_State *L, int errcode) ++ |.endif ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Math helper functions ---------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |// Hard-float round to integer. ++ |.macro vm_round_hf, func ++ | addu16i.d TMP0, r0, 0x4330 // Hiword of 2^52 (double). ++ | slli.d TMP0, TMP0, 32 ++ | movgr2fr.d f4, TMP0 ++ | fabs.d FRET2, FARG1 // |x| ++ | movfr2gr.d AT, FARG1 ++ | fcmp.clt.d FCC0, FRET2, f4 ++ | fadd.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 ++ | fsub.d FRET1, FRET1, f4 ++ | bceqz FCC0, >1 // Truncate only if |x| < 2^52. ++ | slt AT, AT, r0 ++ |.if "func" == "ceil" ++ |// addu16i.d TMP0, r0, 0xbff0 // Hiword of -1 (double). Preserves -0. ++ | .LUI TMP0, 0xbff0 ++ |.else ++ | addu16i.d TMP0, r0, 0x3ff0 // Hiword of +1 (double). ++ |.endif ++ |.if "func" == "trunc" ++ | slli.d TMP0, TMP0, 32 ++ | movgr2fr.d f4, TMP0 ++ | fcmp.clt.d FCC0, FRET2, FRET1 // |x| < result? ++ | fsub.d FRET2, FRET1, f4 ++ |// sel.d FTMP1, FRET1, FRET2 // If yes, subtract +1. ++ | fsel FTMP1, FRET1, FRET2, FCC0 ++ | movgr2fr.d FRET1, AT ++ | fneg.d FRET2, FTMP1 ++ |//. sel.d FRET1, FTMP1, FRET2 ++ | movfr2cf FCC0, FRET1 ++ | fsel FRET1, FTMP1, FRET2, FCC0 ++ | jirl r0, ra, 0 ++ |.else ++ | fneg.d FRET2, FRET1 ++ | slli.d TMP0, TMP0, 32 ++ | movgr2fr.d f4, TMP0 ++ | movgr2fr.d FTMP1, AT ++ | movfr2cf FCC0, FTMP1 ++ | fsel FTMP1, FRET1, FRET2, FCC0 ++ |.if "func" == "ceil" ++ | fcmp.clt.d FCC0, FTMP1, FARG1 // x > result? ++ |.else ++ | fcmp.clt.d FCC0, FARG1, FTMP1 // x < result? ++ |.endif ++ | fsub.d FRET2, FTMP1, f4 // If yes, subtract +-1. ++ | fsel FRET1, FTMP1, FRET2, FCC0 ++ | fmov.d FARG1, FRET1 ++ | jirl r0, ra, 0 ++ |.endif ++ |1: ++ | fmov.d FRET1, FARG1 ++ | jirl r0, ra, 0 ++ |.endmacro ++ | ++ |.macro vm_round, func ++ |.if FPU ++ | vm_round_hf, func ++ |.endif ++ |.endmacro ++ | ++ |->vm_floor: ++ | vm_round floor ++ |->vm_ceil: ++ | vm_round ceil ++ |->vm_trunc: ++ |.if JIT ++ | vm_round trunc ++ |.endif ++ | ++ |// Soft-float integer to number conversion. ++ |.macro sfi2d, ARG ++ |.if not FPU ++ | srai.w TMP0, ARG, 31 ++ | beqz ARG, >9 // Handle zero first. ++ | xor TMP1, ARG, TMP0 ++ | sub.d TMP1, TMP1, TMP0 // Absolute value in TMP1. ++ | clz.d ARG, TMP1 ++ | addi.w ARG, ARG, -11 ++ | .LI AT, 0x3ff+63-11-1 ++ | sll.d TMP1, TMP1, ARG // Align mantissa left with leading 1. ++ | sub.w ARG, AT, ARG // Exponent - 1. ++ | bstrins.w ARG, TMP0, 11, 11 // Sign | Exponent. ++ | slli.d ARG, ARG, 52 // Align left. ++ | add.d ARG, ARG, TMP1 // Add mantissa, increment exponent. ++ | jirl r0, ra, 0 ++ |9: ++ | jirl r0, ra, 0 ++ |.endif ++ |.endmacro ++ | ++ |// Input CARG1. Output: CARG1. Temporaries: AT, TMP0, TMP1. ++ |->vm_sfi2d_1: ++ | sfi2d CARG1 ++ | ++ |// Input CARG2. Output: CARG2. Temporaries: AT, TMP0, TMP1. ++ |->vm_sfi2d_2: ++ | sfi2d CARG2 ++ | ++ |// Soft-float comparison. Equivalent to c.eq.d. ++ |// Input: CARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1. ++ |->vm_sfcmpeq: ++ |.if not FPU ++ | slli.d AT, CARG1, 1 ++ | slli.d TMP0, CARG2, 1 ++ | or TMP1, AT, TMP0 ++ | addu16i.d TMP1, r0, 0xffe0 ++ | beqz TMP1, >8 // Both args +-0: return 1. ++ | slli.d TMP1, TMP1, 32 ++ | sltu AT, TMP1, AT ++ | sltu TMP0, TMP1, TMP0 ++ | or TMP1, AT, TMP0 ++ | xor AT, CARG1, CARG2 ++ | bnez TMP1, >9 // Either arg is NaN: return 0; ++ | sltui CRET1, AT, 1 // Same values: return 1. ++ | jirl r0, ra, 0 ++ |8: ++ | .LI CRET1, 1 ++ | jirl r0, ra, 0 ++ |9: ++ | .LI CRET1, 0 ++ | jirl r0, ra, 0 ++ |.endif ++ | ++ |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d. ++ |// Input: CARG1, CARG2. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2. ++ |->vm_sfcmpult: ++ |.if not FPU ++ | .LI CRET2, 1 ++ | beq r0, r0, >1 ++ |.endif ++ | ++ |->vm_sfcmpolt: ++ |.if not FPU ++ | .LI CRET2, 0 ++ |1: ++ | slli.d AT, CARG1, 1 ++ | slli.d TMP0, CARG2, 1 ++ | or TMP1, AT, TMP0 ++ | addu16i.d TMP1, r0, 0xffe0 ++ | beqz TMP1, >8 // Both args +-0: return 0. ++ | slli.d TMP1, TMP1, 32 ++ | sltu AT, TMP1, AT ++ | sltu TMP0, TMP1, TMP0 ++ | or TMP1, AT, TMP0 ++ | and AT, CARG1, CARG2 ++ | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; ++ | blt AT, r0, >5 // Both args negative? ++ | slt CRET1, CARG1, CARG2 ++ | jirl r0, ra, 0 ++ |5: // Swap conditions if both operands are negative. ++ | slt CRET1, CARG2, CARG1 ++ | jirl r0, ra, 0 ++ |8: ++ | .LI CRET1, 0 ++ | jirl r0, ra, 0 ++ |9: ++ | or CRET1, CRET2, r0 ++ | jirl r0, ra, 0 ++ |.endif ++ | ++ |->vm_sfcmpogt: ++ |.if not FPU ++ | slli.d AT, CARG2, 1 ++ | slli.d TMP0, CARG1, 1 ++ | or TMP1, AT, TMP0 ++ | addu16i.d TMP1, r0, 0xffe0 ++ | beqz TMP1, >8 // Both args +-0: return 0. ++ | slli.d TMP1, TMP1, 32 ++ | sltu AT, TMP1, AT ++ | sltu TMP0, TMP1, TMP0 ++ | or TMP1, AT, TMP0 ++ | and AT, CARG2, CARG1 ++ | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; ++ | blt AT, r0, >5 // Both args negative? ++ | slt CRET1, CARG2, CARG1 ++ | jirl r0, ra, 0 ++ |5: // Swap conditions if both operands are negative. ++ | slt CRET1, CARG1, CARG2 ++ | jirl r0, ra, 0 ++ |8: ++ | .LI CRET1, 0 ++ | jirl r0, ra, 0 ++ |9: ++ | .LI CRET1, 0 ++ | jirl r0, ra, 0 ++ |.endif ++ | ++ |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. ++ |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. ++ |->vm_sfcmpolex: ++ |.if not FPU ++ | slli.d AT, CARG1, 1 ++ | slli.d TMP0, CARG2, 1 ++ | or TMP1, AT, TMP0 ++ | addu16i.d TMP1, r0, 0xffe0 ++ | beqz TMP1, >8 // Both args +-0: return 1. ++ | slli.d TMP1, TMP1, 32 ++ | sltu AT, TMP1, AT ++ | sltu TMP0, TMP1, TMP0 ++ | or TMP1, AT, TMP0 ++ | and AT, CARG1, CARG2 ++ | bnez TMP1, >9 // Either arg is NaN: return 0; ++ | xor AT, AT, TMP3 ++ | bltz AT, r0, >5 // Both args negative? ++ | slt CRET1, CARG2, CARG1 ++ | jirl r0, ra, 0 ++ |5: // Swap conditions if both operands are negative. ++ | slt CRET1, CARG1, CARG2 ++ | jirl r0, ra, 0 ++ |8: ++ | .LI CRET1, 1 ++ | jirl r0, ra, 0 ++ |9: ++ | .LI CRET1, 0 ++ | jirl r0, ra, 0 ++ |.endif ++ | ++ |.macro sfmin_max, name, fpcall ++ |->vm_sf .. name: ++ |.if JIT and not FPU ++ | or TMP2, ra, r0 ++ | bl ->fpcall ++ | or ra, TMP2, r0 ++ | or TMP0, CRET1, r0 ++ | or CRET1, CARG1, r0 ++ | maskeqz CRET1, CRET1, TMP0 ++ | masknez TMP0, CARG2, TMP0 ++ | or CRET1, CRET1, TMP0 ++ | jirl r0, ra, 0 ++ |.endif ++ |.endmacro ++ | ++ | sfmin_max min, vm_sfcmpolt ++ | sfmin_max max, vm_sfcmpogt ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Miscellaneous functions -------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |.define NEXT_TAB, TAB:CARG1 ++ |.define NEXT_IDX, CARG2 ++ |.define NEXT_ASIZE, CARG3 ++ |.define NEXT_NIL, CARG4 ++ |.define NEXT_TMP0, r12 ++ |.define NEXT_TMP1, r13 ++ |.define NEXT_TMP2, r14 ++ |.define NEXT_RES_VK, CRET1 ++ |.define NEXT_RES_IDX, CRET2 ++ |.define NEXT_RES_PTR, sp ++ |.define NEXT_RES_VAL, 0(sp) ++ |.define NEXT_RES_KEY, 8(sp) ++ | ++ |// TValue *lj_vm_next(GCtab *t, uint32_t idx) ++ |// Next idx returned in CRET2. ++ |->vm_next: ++ |.if JIT and ENDIAN_LE ++ | ld.d NEXT_ASIZE, NEXT_TAB->asize ++ | ld.d NEXT_TMP0, NEXT_TAB->array ++ | .LI NEXT_NIL, LJ_TNIL ++ |1: // Traverse array part. ++ | sltu AT, NEXT_IDX, NEXT_ASIZE ++ | slli.w NEXT_TMP1, NEXT_IDX, 3 ++ | add.d NEXT_TMP1, NEXT_TMP0, NEXT_TMP1 ++ | beqz AT, >5 ++ | .LI AT, LJ_TISNUM ++ | ld.d NEXT_TMP2, 4(NEXT_TMP1) ++ | slli.d AT, AT, 47 ++ | or NEXT_TMP1, NEXT_IDX, AT ++ | addi.d NEXT_IDX, NEXT_IDX, 1 ++ | beq NEXT_TMP2, NEXT_NIL, <1 ++ | st.d NEXT_TMP2, NEXT_RES_VAL ++ | st.d NEXT_TMP1, NEXT_RES_KEY ++ | addi.d NEXT_RES_VK, NEXT_RES_PTR, 0 ++ | addi.d NEXT_RES_IDX, NEXT_IDX, 0 ++ | jirl r0, ra, 0 ++ | ++ |5: // Traverse hash part. ++ | sub.d NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE ++ | ld.d NODE:NEXT_RES_VK, NEXT_TAB->node ++ | slli.w NEXT_TMP2, NEXT_RES_IDX, 5 ++ | ld.d NEXT_TMP0, NEXT_TAB->hmask ++ | slli.w AT, NEXT_RES_IDX, 3 ++ | sub.d AT, NEXT_TMP2, AT ++ | add.d NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT ++ |6: ++ | sltu AT, NEXT_TMP0, NEXT_RES_IDX ++ | bnez AT, >8 ++ | ld.d NEXT_TMP2, NODE:NEXT_RES_VK->val ++ | addi.d NEXT_RES_IDX, NEXT_RES_IDX, 1 ++ | bne NEXT_TMP2, NEXT_NIL, >9 ++ | // Skip holes in hash part. ++ | addi.d NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node) ++ | b <6 ++ | ++ |8: // End of iteration. Set the key to nil (not the value). ++ | st.d NEXT_NIL, NEXT_RES_KEY ++ | addi.d NEXT_RES_VK, NEXT_RES_PTR, 0 ++ |9: ++ | add.d NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE ++ | jirl r0, ra, 0 ++ |.endif ++ | ++ |//----------------------------------------------------------------------- ++ |//-- FFI helper functions ----------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |// Handler for callback functions. Callback slot number in r19, g in r17. ++ |->vm_ffi_callback: ++ |.if FFI ++ |.type CTSTATE, CTState, PC ++ | saveregs ++ | ld.d CTSTATE, GL:r17->ctype_state ++ | .DADDIU DISPATCH, r17, GG_G2DISP ++ | st.w r19, CTSTATE->cb.slot ++ | st.d CARG1, CTSTATE->cb.gpr[0] ++ | .FPU2 fst.d FARG1, CTSTATE->cb.fpr[0] ++ | st.d CARG2, CTSTATE->cb.gpr[1] ++ | .FPU2 fst.d FARG2, CTSTATE->cb.fpr[1] ++ | st.d CARG3, CTSTATE->cb.gpr[2] ++ | .FPU2 fst.d FARG3, CTSTATE->cb.fpr[2] ++ | st.d CARG4, CTSTATE->cb.gpr[3] ++ | .FPU2 fst.d FARG4, CTSTATE->cb.fpr[3] ++ | st.d CARG5, CTSTATE->cb.gpr[4] ++ | .FPU2 fst.d FARG5, CTSTATE->cb.fpr[4] ++ | st.d CARG6, CTSTATE->cb.gpr[5] ++ | .FPU2 fst.d FARG6, CTSTATE->cb.fpr[5] ++ | st.d CARG7, CTSTATE->cb.gpr[6] ++ | .FPU2 fst.d FARG7, CTSTATE->cb.fpr[6] ++ | st.d CARG8, CTSTATE->cb.gpr[7] ++ | .FPU2 fst.d FARG8, CTSTATE->cb.fpr[7] ++ | addi.d TMP0, sp, CFRAME_SPACE ++ | st.d TMP0, CTSTATE->cb.stack ++ | st.d r0, SAVE_PC(sp) // Any value outside of bytecode is ok. ++ | or CARG2, sp, r0 ++ | or CARG1, CTSTATE, r0 ++ | bl extern lj_ccallback_enter // (CTState *cts, void *cf) ++ | // Returns lua_State *. ++ | ld.d BASE, L:CRET1->base ++ | ld.d RC, L:CRET1->top ++ | or L, CRET1, r0 ++ | .FPU addu16i.d TMP3, r0, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) ++ | .FPU2 movgr2fr.w TOBIT, TMP3 ++ | .LI TISNIL, LJ_TNIL ++ | .LI TISNUM, LJ_TISNUM ++ | li_vmstate INTERP ++ | sub.w RC, RC, BASE ++ | cleartp LFUNC:RB ++ | st_vmstate ++ | .FPU2 fcvt.d.s TOBIT, TOBIT ++ | ins_callt ++ |.endif ++ | ++ |->cont_ffi_callback: // Return from FFI callback. ++ |.if FFI ++ | .LDXD CTSTATE, DISPATCH, DISPATCH_GL(ctype_state) ++ | st.d BASE, L->base ++ | st.d RB, L->top ++ | st.d L, CTSTATE->L ++ | or CARG2, RA, r0 ++ | or CARG1, CTSTATE, r0 ++ | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) ++ | .FPU2 fld.d FRET1, CTSTATE->cb.fpr[0] ++ | ld.d CRET1, CTSTATE->cb.gpr[0] ++ | .FPU2 fld.d FRET2, CTSTATE->cb.fpr[1] ++ | ld.d CRET2, CTSTATE->cb.gpr[1] ++ | beq r0, r0, ->vm_leave_unw ++ |.endif ++ | ++ |->vm_ffi_call: // Call C function via FFI. ++ | // Caveat: needs special frame unwinding, see below. ++ |.if FFI ++ | .type CCSTATE, CCallState, CARG1 ++ | ld.w TMP1, CCSTATE->spadj ++ | ld.bu CARG2, CCSTATE->nsp ++ | ld.bu CARG3, CCSTATE->nfpr ++ | or TMP2, sp, r0 ++ | sub.d sp, sp, TMP1 ++ | st.d ra, -8(TMP2) ++ | slli.w CARG2, CARG2, 3 ++ | st.d r23, -16(TMP2) ++ | st.d CCSTATE, -24(TMP2) ++ | or r23, TMP2, r0 ++ | addi.d TMP1, CCSTATE, offsetof(CCallState, stack) ++ | or TMP2, sp, r0 ++ | add.d TMP3, TMP1, CARG2 ++ | beqz CARG2, >2 ++ |1: ++ | ld.d TMP0, 0(TMP1) ++ | addi.d TMP1, TMP1, 8 ++ | sltu AT, TMP1, TMP3 ++ | st.d TMP0, 0(TMP2) ++ | addi.d TMP2, TMP2, 8 ++ | bnez AT, <1 ++ |2: ++ | beqz CARG3, >3 ++ | .FPU2 fld.d FARG1, CCSTATE->fpr[0] ++ | .FPU2 fld.d FARG2, CCSTATE->fpr[1] ++ | .FPU2 fld.d FARG3, CCSTATE->fpr[2] ++ | .FPU2 fld.d FARG4, CCSTATE->fpr[3] ++ | .FPU2 fld.d FARG5, CCSTATE->fpr[4] ++ | .FPU2 fld.d FARG6, CCSTATE->fpr[5] ++ | .FPU2 fld.d FARG7, CCSTATE->fpr[6] ++ | .FPU2 fld.d FARG8, CCSTATE->fpr[7] ++ |3: ++ | ld.d CFUNCADDR, CCSTATE->func ++ | ld.d CARG2, CCSTATE->gpr[1] ++ | ld.d CARG3, CCSTATE->gpr[2] ++ | ld.d CARG4, CCSTATE->gpr[3] ++ | ld.d CARG5, CCSTATE->gpr[4] ++ | ld.d CARG6, CCSTATE->gpr[5] ++ | ld.d CARG7, CCSTATE->gpr[6] ++ | ld.d CARG8, CCSTATE->gpr[7] ++ | ld.d CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. ++ | jirl r1, CFUNCADDR, 0 ++ | ld.d CCSTATE:TMP1, -24(r23) ++ | ld.d TMP2, -16(r23) ++ | ld.d ra, -8(r23) ++ | st.d CRET1, CCSTATE:TMP1->gpr[0] ++ | st.d CRET2, CCSTATE:TMP1->gpr[1] ++ |.if FPU ++ | fmov.d FRET1, FARG1 ++ | fmov.d FRET2, FARG2 ++ | fst.d FRET1, CCSTATE:TMP1->fpr[0] ++ | fst.d FRET2, CCSTATE:TMP1->fpr[1] ++ |.else ++ | st.d CARG1, CCSTATE:TMP1->gpr[2] // 2nd FP struct field for soft-float. ++ |.endif ++ | or sp, r23, r0 ++ | or r23, TMP2, r0 ++ | jirl r0, ra, 0 ++ |.endif ++ |// Note: vm_ffi_call must be the last function in this object file! ++ | ++ |//----------------------------------------------------------------------- ++} ++ ++/* Generate the code for a single instruction. */ ++static void build_ins(BuildCtx *ctx, BCOp op, int defop) ++{ ++ int vk = 0; ++ |=>defop: ++ ++ switch (op) { ++ ++ /* -- Comparison ops ---------------------------------------------------- */ ++ ++ /* Remember: all ops branch for a true comparison, fall through otherwise. */ ++ ++ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: ++ | // RA = src1*8, RD = src2*8, JMP with RD = target ++ |.macro bc_comp, FRA, FRD, ARGRA, ARGRD, movop, fmovop, fcomp, sfcomp ++ | add.d RA, BASE, RA ++ | add.d RD, BASE, RD ++ | ld.d ARGRA, 0(RA) ++ | ld.d ARGRD, 0(RD) ++ | ld.hu TMP2, OFS_RD(PC) ++ | gettp CARG3, ARGRA ++ | gettp CARG4, ARGRD ++ | addi.d PC, PC, 4 ++ | bne CARG3, TISNUM, >2 ++ | decode_RD4b TMP2 ++ | bne CARG4, TISNUM, >5 ++ | slli.w ARGRA, ARGRA, 0 // sextw -> slli.w ++ | slli.w ARGRD, ARGRD, 0 // sextw -> slli.w ++ | .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535) ++ | slt AT, CARG1, CARG2 ++ | add.w TMP2, TMP2, TMP3 ++ | movop TMP2, TMP2, AT ++ |1: ++ | add.d PC, PC, TMP2 ++ | ins_next ++ | ++ |2: // RA is not an integer. ++ | sltui AT, CARG3, LJ_TISNUM ++ | .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535) ++ | beqz AT, ->vmeta_comp ++ | sltui AT, CARG4, LJ_TISNUM ++ | decode_RD4b TMP2 //TODO ++ | beqz AT, >4 ++ |.if FPU ++ | fld.d FRA, 0(RA) ++ | fld.d FRD, 0(RD) ++ |.endif ++ |3: // RA and RD are both numbers. ++ |.if FPU ++ | fcomp FCC0, FTMP0, FTMP2 ++ | add.w TMP2, TMP2, TMP3 ++ | movcf2gr TMP3, FCC0 ++ | fmovop TMP2, TMP2, TMP3 ++ | beq r0, r0, <1 ++ |.else ++ | add.w TMP2, TMP2, TMP ++ | bl sfcomp ++ | movop TMP2, TMP2, CRET1 ++ | beq r0, r0, <1 ++ |.endif ++ | ++ |4: // RA is a number, RD is not a number. ++ |// bne CARG4, TISNUM, ->vmeta_comp ++ | // RA is a number, RD is an integer. Convert RD to a number. ++ |.if FPU ++ | fld.s FRD, LO(RD) ++ | bne CARG4, TISNUM, ->vmeta_comp ++ | fld.d FRA, 0(RA) ++ | ffint.d.w FRD, FRD ++ | beq r0, r0, <3 ++ |.else ++ |.if "ARGRD" == "CARG1" ++ | slli.w CARG1, CARG1, 0 // sextw -> slli.w ++ | bne CARG4, TISNUM, ->vmeta_comp ++ | bl ->vm_sfi2d_1 ++ |.else ++ | slli.w CARG2, CARG2, 0 // sextw -> slli.w ++ | bne CARG4, TISNUM, ->vmeta_comp ++ | bl ->vm_sfi2d_2 ++ |.endif ++ | beq r0, r0, <3 ++ |.endif ++ | ++ |5: // RA is an integer, RD is not an integer ++ | sltui AT, CARG4, LJ_TISNUM ++ | .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535) ++ | beqz AT, ->vmeta_comp ++ | // RA is an integer, RD is a number. Convert RA to a number. ++ |.if FPU ++ | fld.s FRA, LO(RA) ++ | fld.d FRD, 0(RD) ++ | ffint.d.w FRA, FRA ++ | beq r0, r0, <3 ++ |.else ++ |.if "ARGRA" == "CARG1" ++ | slli.w CARG1, CARG1, 0 // sextw -> slli.w ++ | bl ->vm_sfi2d_1 ++ |.else ++ | slli.w CARG2, CARG2, 0 // sextw -> slli.w ++ | bl ->vm_sfi2d_2 ++ |.endif ++ | beq r0, r0, <3 ++ |.endif ++ |.endmacro ++ | ++ if (op == BC_ISLT) { ++ | bc_comp FTMP0, FTMP2, CARG1, CARG2, maskeqz, maskeqz, fcmp.clt.d, ->vm_sfcmpolt ++ } else if (op == BC_ISGE) { ++ | bc_comp FTMP0, FTMP2, CARG1, CARG2, masknez, masknez, fcmp.clt.d, ->vm_sfcmpolt ++ } else if (op == BC_ISLE) { ++ | bc_comp FTMP2, FTMP0, CARG2, CARG1, masknez, masknez, fcmp.cult.d, ->vm_sfcmpult ++ } else { ++ | bc_comp FTMP2, FTMP0, CARG2, CARG1, maskeqz, maskeqz, fcmp.cult.d, ->vm_sfcmpult ++ } ++ break; ++ ++ case BC_ISEQV: case BC_ISNEV: ++ vk = op == BC_ISEQV; ++ | // RA = src1*8, RD = src2*8, JMP with RD = target ++ | add.d RA, BASE, RA ++ | addi.d PC, PC, 4 ++ | add.d RD, BASE, RD ++ | ld.d CARG1, 0(RA) ++ | ld.hu TMP2, -4+OFS_RD(PC) ++ | ld.d CARG2, 0(RD) ++ | gettp CARG3, CARG1 ++ | gettp CARG4, CARG2 ++ | sltu AT, TISNUM, CARG3 ++ | sltu TMP1, TISNUM, CARG4 ++ | or AT, AT, TMP1 ++ | .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535) ++ if (vk) { ++ | beqz AT, ->BC_ISEQN_Z //TODO which is the following slot ins ++ } else { ++ | beqz AT, ->BC_ISNEN_Z ++ } ++ | // Either or both types are not numbers. ++ |.if FFI ++ | .LI AT, LJ_TCDATA ++ | beq CARG3, AT, ->vmeta_equal_cd ++ |.endif ++ | decode_RD4b TMP2 ++ |.if FFI ++ | beq CARG4, AT, ->vmeta_equal_cd ++ |.endif ++ | add.w TMP2, TMP2, TMP3 ++ | bne CARG1, CARG2, >2 ++ | // Tag and value are equal. ++ if (vk) { ++ |->BC_ISEQV_Z: ++ | add.d PC, PC, TMP2 ++ } ++ |1: ++ | ins_next ++ | ++ |2: // Check if the tags are the same and it's a table or userdata. ++ | xor AT, CARG3, CARG4 // Same type? ++ | sltui TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? ++ | masknez TMP0, TMP0, AT ++ | cleartp TAB:TMP1, CARG1 ++ if (vk) { ++ | beqz TMP0, <1 ++ } else { ++ | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction. ++ } ++ | // Different tables or userdatas. Need to check __eq metamethod. ++ | // Field metatable must be at same offset for GCtab and GCudata! ++ | ld.d TAB:TMP3, TAB:TMP1->metatable ++ if (vk) { ++ | beqz TAB:TMP3, <1 // No metatable? ++ | ld.bu TMP3, TAB:TMP3->nomm ++ | andi TMP3, TMP3, 1<1 // Or 'no __eq' flag set? ++ } else { ++ | beqz TAB:TMP3,->BC_ISEQV_Z // No metatable? ++ | ld.bu TMP3, TAB:TMP3->nomm ++ | andi TMP3, TMP3, 1<BC_ISEQV_Z // Or 'no __eq' flag set? ++ } ++ | .LI TMP0, 1-vk // ne = 0 or 1. ++ | beq r0, r0, ->vmeta_equal // Handle __eq metamethod. ++ break; ++ ++ case BC_ISEQS: case BC_ISNES: ++ vk = op == BC_ISEQS; ++ | // RA = src*8, RD = str_const*8 (~), JMP with RD = target ++ | add.d RA, BASE, RA ++ | addi.d PC, PC, 4 ++ | ld.d CARG1, 0(RA) ++ | sub.d RD, KBASE, RD ++ | ld.hu TMP2, -4+OFS_RD(PC) ++ | ld.d CARG2, -8(RD) // KBASE-8-str_const*8 ++ |.if FFI ++ | gettp TMP0, CARG1 ++ | .LI AT, LJ_TCDATA ++ |.endif ++ | .LI TMP1, LJ_TSTR ++ | decode_RD4b TMP2 ++ | settp CARG2, TMP1 ++ |.if FFI ++ | beq TMP0, AT, ->vmeta_equal_cd ++ |.endif ++ | .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535) ++ | xor TMP1, CARG1, CARG2 ++ | add.w TMP2, TMP2, TMP3 ++ if (vk) { ++ | masknez TMP2, TMP2, TMP1 ++ } else { ++ | maskeqz TMP2, TMP2, TMP1 ++ } ++ | add.d PC, PC, TMP2 ++ | ins_next ++ break; ++ ++ case BC_ISEQN: case BC_ISNEN: ++ vk = op == BC_ISEQN; ++ | // RA = src*8, RD = num_const*8, JMP with RD = target ++ | add.d RA, BASE, RA ++ | add.d RD, KBASE, RD ++ | ld.d CARG1, 0(RA) ++ | ld.d CARG2, 0(RD) ++ | ld.hu TMP2, OFS_RD(PC) ++ | gettp CARG3, CARG1 ++ | gettp CARG4, CARG2 ++ | addi.d PC, PC, 4 ++ | .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535) ++ if (vk) { ++ |->BC_ISEQN_Z: ++ } else { ++ |->BC_ISNEN_Z: ++ } ++ | decode_RD4b TMP2 ++ | bne CARG3, TISNUM, >3 ++ | add.w TMP2, TMP2, TMP3 ++ | bne CARG4, TISNUM, >6 ++ | xor AT, CARG1, CARG2 ++ if (vk) { ++ | masknez TMP2, TMP2, AT ++ |1: ++ | add.d PC, PC, TMP2 ++ |2: ++ } else { ++ | maskeqz TMP2, TMP2, AT ++ |1: ++ |2: ++ | add.d PC, PC, TMP2 ++ } ++ | ins_next ++ | ++ |3: // RA is not an integer. ++ | sltu AT, CARG3, TISNUM ++ | add.w TMP2, TMP2, TMP3 ++ |.if FFI ++ | beqz AT, >8 ++ |.else ++ | beqz AT, <2 ++ |.endif ++ | sltu AT, CARG4, TISNUM ++ |.if FPU ++ | fld.d FTMP0, 0(RA) ++ | fld.d FTMP2, 0(RD) ++ |.endif ++ | beqz AT, >5 ++ |4: // RA and RD are both numbers. ++ |.if FPU ++ | fcmp.ceq.d FCC0, FTMP0, FTMP2 //TODO fcmp.cond.d cc, fj, fk ++ | movcf2gr TMP1, FCC0 ++ if (vk) { ++ | maskeqz TMP2, TMP2, TMP1 ++ } else { ++ | masknez TMP2, TMP2, TMP1 ++ } ++ | beq r0, r0, <1 ++ |.else ++ | bl ->vm_sfcmpeq ++ if (vk) { ++ | maskeqz TMP2, TMP2, CRET1 ++ } else { ++ | masknez TMP2, TMP2, CRET1 ++ } ++ | beq r0, r0, <1 ++ |.endif ++ | ++ |5: // RA is a number, RD is not a number. ++ |//.if FFI ++ |// bne CARG4, TISNUM, >9 //TODO does not process the following flot ins ++ |//.else ++ |// bne CARG4, TISNUM, <2 ++ |//.endif ++ | // RA is a number, RD is an integer. Convert RD to a number. ++ |.if FPU ++ | fld.s FTMP2, LO(RD) ++ |.if FFI ++ | bne CARG4, TISNUM, >9 ++ |.else ++ | bne CARG4, TISNUM, <2 ++ |.endif ++ | ffint.d.w FTMP2, FTMP2 ++ | beq r0, r0, <4 ++ |.else ++ | slli.w CARG2, CARG2, 0 // sextw -> slli.w ++ |.if FFI ++ | bne CARG4, TISNUM, >9 ++ |.else ++ | bne CARG4, TISNUM, <2 ++ |.endif ++ | bl ->vm_sfi2d_2 ++ | beq r0, r0, <4 ++ |.endif ++ | ++ |6: // RA is an integer, RD is not an integer ++ | sltu AT, CARG4, TISNUM ++ |//.if FFI ++ |// beqz AT, >9 //TODO does not process the following flot ins ++ |//.else ++ |// beqz AT, <2 ++ |//.endif ++ | // RA is an integer, RD is a number. Convert RA to a number. ++ |.if FPU ++ | fld.s FTMP0, LO(RA) ++ |.if FFI ++ | beqz AT, >9 ++ |.else ++ | beqz AT, <2 ++ |.endif ++ | fld.d FTMP2, 0(RD) ++ | ffint.d.w FTMP0, FTMP0 ++ | b <4 ++ |.else ++ | slli.w CARG1, CARG1, 0 // sextw -> slli.w ++ |.if FFI ++ | beqz AT, >9 ++ |.else ++ | beqz AT, <2 ++ |.endif ++ | bl ->vm_sfi2d_1 ++ | beq r0, r0, <4 ++ |.endif ++ | ++ |.if FFI ++ |8: ++ | .LI AT, LJ_TCDATA ++ | bne CARG3, AT, <2 ++ | beq r0, r0, ->vmeta_equal_cd ++ |9: ++ | .LI AT, LJ_TCDATA ++ | bne CARG4, AT, <2 ++ | beq r0, r0, ->vmeta_equal_cd ++ |.endif ++ break; ++ ++ case BC_ISEQP: case BC_ISNEP: ++ vk = op == BC_ISEQP; ++ | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target ++ | add.d RA, BASE, RA ++ | srli.w TMP1, RD, 3 ++ | ld.d TMP0, 0(RA) ++ | ld.hu TMP2, OFS_RD(PC) ++ | nor TMP1, TMP1, r0 ++ | gettp TMP0, TMP0 ++ | addi.d PC, PC, 4 ++ | or r17, TMP0, r0 ++ | xor TMP0, TMP0, TMP1 ++ |.if FFI ++ | .LI AT, LJ_TCDATA ++ | beq r17, AT, ->vmeta_equal_cd ++ |.endif ++ | decode_RD4b TMP2 ++ | .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535) ++ | add.w TMP2, TMP2, TMP3 ++ if (vk) { ++ | masknez TMP2, TMP2, TMP0 ++ } else { ++ | maskeqz TMP2, TMP2, TMP0 ++ } ++ | add.d PC, PC, TMP2 ++ | ins_next ++ break; ++ ++ /* -- Unary test and copy ops ------------------------------------------- */ ++ ++ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: ++ | // RA = dst*8 or unused, RD = src*8, JMP with RD = target ++ | add.d RD, BASE, RD ++ | ld.hu TMP2, OFS_RD(PC) ++ | ld.d TMP0, 0(RD) ++ | addi.d PC, PC, 4 ++ | gettp TMP0, TMP0 ++ | sltui TMP0, TMP0, LJ_TISTRUECOND ++ if (op == BC_IST || op == BC_ISF) { ++ | decode_RD4b TMP2 ++ | .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535) ++ | add.w TMP2, TMP2, TMP3 ++ if (op == BC_IST) { ++ | maskeqz TMP2, TMP2, TMP0; ++ } else { ++ | masknez TMP2, TMP2, TMP0; ++ } ++ | add.d PC, PC, TMP2 ++ } else { ++ | ld.d CRET1, 0(RD) ++ | add.d RA, BASE, RA ++ if (op == BC_ISTC) { ++ | beqz TMP0, >1 ++ } else { ++ | bnez TMP0, >1 ++ } ++ | decode_RD4b TMP2 ++ | .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535) ++ | add.w TMP2, TMP2, TMP3 ++ | st.d CRET1, 0(RA) ++ | add.d PC, PC, TMP2 ++ |1: ++ } ++ | ins_next ++ break; ++ ++ case BC_ISTYPE: ++ | // RA = src*8, RD = -type*8 ++ | add.d TMP2, BASE, RA ++ | srli.w TMP1, RD, 3 ++ | ld.d TMP0, 0(TMP2) ++ | ins_next1 ++ | gettp TMP0, TMP0 ++ | add.d AT, TMP0, TMP1 ++ | bnez AT, ->vmeta_istype ++ | ins_next2 ++ break; ++ case BC_ISNUM: ++ | // RA = src*8, RD = -(TISNUM-1)*8 ++ | add.d TMP2, BASE, RA ++ | ld.d TMP0, 0(TMP2) ++ | ins_next1 ++ | ins_next2 ++ | checknum TMP0, ->vmeta_istype ++ |// ins_next2 ++ break; ++ ++ /* -- Unary ops --------------------------------------------------------- */ ++ ++ case BC_MOV: ++ | // RA = dst*8, RD = src*8 ++ | add.d RD, BASE, RD ++ | add.d RA, BASE, RA ++ | ld.d CRET1, 0(RD) ++ | ins_next1 ++ | st.d CRET1, 0(RA) ++ | ins_next2 ++ break; ++ case BC_NOT: ++ | // RA = dst*8, RD = src*8 ++ | add.d RD, BASE, RD ++ | add.d RA, BASE, RA ++ | ld.d TMP0, 0(RD) ++ | .LI AT, LJ_TTRUE ++ | gettp TMP0, TMP0 ++ | sltu TMP0, AT, TMP0 ++ | addi.w TMP0, TMP0, 1 ++ | slli.d TMP0, TMP0, 47 ++ | nor TMP0, TMP0, r0 ++ | ins_next1 ++ | st.d TMP0, 0(RA) ++ | ins_next2 ++ break; ++ case BC_UNM: ++ | // RA = dst*8, RD = src*8 ++ | add.d RB, BASE, RD ++ | ld.d CARG1, 0(RB) ++ | add.d RA, BASE, RA ++ | gettp CARG3, CARG1 ++ |// addu16i.d TMP1, r0, 0x8000 ++ | .LUI TMP1, 0x8000 ++ | bne CARG3, TISNUM, >2 ++ | slli.w CARG1, CARG1, 0 // sextw -> slli.w ++ | sub.w CARG1, r0, CARG1 ++ | beq CARG1, TMP1, ->vmeta_unm // Meta handler deals with -2^31. ++ | bstrpick.d CARG1, CARG1, 31, 0 // zextw -> bstrpick.d ++ | settp CARG1, TISNUM ++ |1: ++ | ins_next1 ++ | st.d CARG1, 0(RA) ++ | ins_next2 ++ |2: ++ | sltui AT, CARG3, LJ_TISNUM ++ | slli.d TMP1, TMP1, 32 ++ | beqz AT, ->vmeta_unm ++ | xor CARG1, CARG1, TMP1 ++ | beq r0, r0, <1 ++ break; ++ case BC_LEN: ++ | // RA = dst*8, RD = src*8 ++ | add.d CARG2, BASE, RD ++ | add.d RA, BASE, RA ++ | ld.d TMP0, 0(CARG2) ++ | gettp TMP1, TMP0 ++ | addi.d AT, TMP1, -LJ_TSTR ++ | cleartp STR:CARG1, TMP0 ++ | bnez AT, >2 ++ | ld.w CRET1, STR:CARG1->len ++ |1: ++ | settp CRET1, TISNUM ++ | ins_next1 ++ | st.d CRET1, 0(RA) ++ | ins_next2 ++ |2: ++ | addi.d AT, TMP1, -LJ_TTAB ++ | bnez AT, ->vmeta_len ++#if LJ_52 ++ | ld.d TAB:TMP2, TAB:CARG1->metatable ++ | bnez TAB:TMP2, >9 ++ |3: ++#endif ++ |->BC_LEN_Z: ++ | bl extern lj_tab_len // (GCtab *t) ++ | // Returns uint32_t (but less than 2^31). ++ | beq r0, r0, <1 ++#if LJ_52 ++ |9: ++ | ld.bu TMP0, TAB:TMP2->nomm ++ | andi TMP0, TMP0, 1<vmeta_len ++#endif ++ break; ++ ++ /* -- Binary ops -------------------------------------------------------- */ ++ ++ |.macro fpmod, a, b, c ++ | fdiv.d FARG1, b, c ++ | bl ->vm_floor // floor(b/c) ++ | fmul.d a, FRET1, c ++ | fsub.d a, b, a // b - floor(b/c)*c ++ |.endmacro ++ ++ |.macro sfpmod ++ | addi.d sp, sp, -16 ++ | ++ | st.d CARG1, 0(sp) ++ | st.d CARG2, 8(sp) ++ | bl extern __divdf3 ++ | ++ | or CARG1, CRET1, r0 ++ | bl extern floor ++ | ++ | or CARG1, CRET1, r0 ++ | ld.d CARG2, 8(sp) ++ | bl extern __muldf3 ++ | ++ | ld.d CARG1, 0(sp) ++ | or CARG2, CRET1, r0 ++ | bl extern __subdf3 ++ | ++ | addi.d sp, sp, 16 ++ |.endmacro ++ ++ |.macro ins_arithpre, label ++ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); ++ | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 ++ ||switch (vk) { ++ ||case 0: ++ | decode_RB8a RB, INS ++ | decode_RB8b RB ++ | decode_RDtoRC8 RC, RD ++ | // RA = dst*8, RB = src1*8, RC = num_const*8 ++ | add.d RB, BASE, RB ++ | add.d RC, KBASE, RC ++ |.if "label" ~= "none" ++ | beq r0, r0, label ++ |.endif ++ || break; ++ ||case 1: ++ | decode_RB8a RC, INS ++ | decode_RB8b RC ++ | decode_RDtoRC8 RB, RD ++ | // RA = dst*8, RB = num_const*8, RC = src1*8 ++ | add.d RC, BASE, RC ++ | add.d RB, KBASE, RB ++ |.if "label" ~= "none" ++ | beq r0, r0, label ++ |.endif ++ || break; ++ ||default: ++ | decode_RB8a RB, INS ++ | decode_RB8b RB ++ | decode_RDtoRC8 RC, RD ++ | // RA = dst*8, RB = src1*8, RC = src2*8 ++ | add.d RB, BASE, RB ++ | add.d RC, BASE, RC ++ |.if "label" ~= "none" ++ | beq r0, r0, label ++ |.endif ++ || break; ++ ||} ++ |.endmacro ++ | ++ |.macro ins_arith, intins, fpins, fpcall, label ++ | ins_arithpre none ++ | ++ |.if "label" ~= "none" ++ |label: ++ |.endif ++ | ++ |// Used in 5. ++ | ld.d CARG1, 0(RB) ++ | ld.d CARG2, 0(RC) ++ | gettp TMP0, CARG1 ++ | gettp TMP1, CARG2 ++ | ++ |.if "intins" ~= "div.w" ++ | ++ | // Check for two integers. ++ | slli.w CARG3, CARG1, 0 // sextw -> slli.w ++ | slli.w CARG4, CARG2, 0 // sextw -> slli.w ++ | bne TMP0, TISNUM, >5 ++ |// bne TMP1, TISNUM, >5 //TODO not process the following slot ins ++ | ++ |.if "intins" == "add.w" ++ | intins CRET1, CARG3, CARG4 ++ | bne TMP1, TISNUM, >5 ++ | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow. ++ | xor TMP2, CRET1, CARG4 ++ | and TMP1, TMP1, TMP2 ++ | add.d RA, BASE, RA ++ | blt TMP1, r0, ->vmeta_arith ++ |.elif "intins" == "sub.w" ++ | intins CRET1, CARG3, CARG4 ++ | bne TMP1, TISNUM, >5 ++ | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow. ++ | xor TMP2, CARG3, CARG4 ++ | and TMP1, TMP1, TMP2 ++ | add.d RA, BASE, RA ++ | blt TMP1, r0, ->vmeta_arith ++ |.elif "intins" == "mulw.d.w" //TODO mips: mult -> la: mulw.d.w ++ |//. nop ++ | bne TMP1, TISNUM, >5 ++ | mul.w CRET1, CARG3, CARG4 ++ | mulh.w TMP2, CARG3, CARG4 ++ | srai.w TMP1, CRET1, 31 ++ | add.d RA, BASE, RA ++ | bne TMP1, TMP2, ->vmeta_arith ++ |.else ++ |//. load_got lj_vm_modi ++ | bne TMP1, TISNUM, >5 ++ | add.d RA, BASE, RA ++ | beqz CARG4, ->vmeta_arith ++ | or CARG1, CARG3, r0 ++ | or CARG2, CARG4, r0 ++ | bl extern lj_vm_modi //TODO implement func lj_vm_modi/vm_modi ++ |.endif ++ | ++ | bstrpick.d CRET1, CRET1, 31, 0 // zextw -> bstrpick.d ++ | settp CRET1, TISNUM ++ | ins_next1 ++ | st.d CRET1, 0(RA) ++ |3: ++ | ins_next2 ++ | ++ |.endif ++ | ++ |5: // Check for two numbers. ++ | .FPU2 fld.d FTMP0, 0(RB) ++ | sltu AT, TMP0, TISNUM ++ | sltu TMP0, TMP1, TISNUM ++ | .FPU2 fld.d FTMP2, 0(RC) ++ | and AT, AT, TMP0 ++ | add.d RA, BASE, RA ++ | beqz AT, ->vmeta_arith ++ | ++ |.if FPU ++ | fpins FRET1, FTMP0, FTMP2 ++ |.elif "fpcall" == "sfpmod" ++ | sfpmod ++ |.else ++ | bl fpcall ++ |.endif ++ | ++ | ins_next1 ++ |.if FPU ++ | fst.d FRET1, 0(RA) ++ |.else ++ | st.d CRET1, 0(RA) ++ |.endif ++ |.if "intins" ~= "div.w" ++ | beq r0, r0, <3 ++ |.endif ++ |.if "intins" == "div.w" ++ | ins_next2 ++ |.endif ++ | ++ |.endmacro ++ ++ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: ++ | ins_arith add.w, fadd.d, __adddf3, none ++ break; ++ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: ++ | ins_arith sub.w, fsub.d, __subdf3, none ++ break; ++ case BC_MULVN: case BC_MULNV: case BC_MULVV: ++ | ins_arith mulw.d.w, fmul.d, __muldf3, none ++ break; ++ case BC_DIVVN: ++ | ins_arith div.w, fdiv.d, __divdf3, ->BC_DIVVN_Z ++ break; ++ case BC_DIVNV: case BC_DIVVV: ++ | ins_arithpre ->BC_DIVVN_Z ++ break; ++ case BC_MODVN: ++ | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z //TODO modi -> ? ++ break; ++ case BC_MODNV: case BC_MODVV: ++ | ins_arithpre ->BC_MODVN_Z ++ break; ++ case BC_POW: ++ | ins_arithpre none ++ | ld.d CARG1, 0(RB) ++ | ld.d CARG2, 0(RC) ++ | gettp TMP0, CARG1 ++ | gettp TMP1, CARG2 ++ | sltui TMP0, TMP0, LJ_TISNUM ++ | sltui TMP1, TMP1, LJ_TISNUM ++ | and AT, TMP0, TMP1 ++ | add.d RA, BASE, RA ++ | beqz AT, ->vmeta_arith ++ |.if FPU ++ | fld.d FARG1, 0(RB) ++ | fld.d FARG2, 0(RC) ++ |.endif ++ | bl extern pow ++ | ins_next1 ++ |.if FPU ++ | fst.d FRET1, 0(RA) ++ |.else ++ | st.d CRET1, 0(RA) ++ |.endif ++ | ins_next2 ++ break; ++ ++ case BC_CAT: ++ | // RA = dst*8, RB = src_start*8, RC = src_end*8 ++ | decode_RB8a RB, INS ++ | decode_RB8b RB ++ | decode_RDtoRC8 RC, RD ++ | sub.d CARG3, RC, RB ++ | st.d BASE, L->base ++ | add.d CARG2, BASE, RC ++ | or MULTRES, RB, r0 ++ |->BC_CAT_Z: ++ | srli.w CARG3, CARG3, 3 ++ | st.d PC, SAVE_PC(sp) ++ | or CARG1, L, r0 ++ | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) ++ | // Returns NULL (finished) or TValue * (metamethod). ++ | ld.d BASE, L->base ++ | bnez CRET1, ->vmeta_binop ++ | add.d RB, BASE, MULTRES ++ | ld.d r17, 0(RB) ++ | add.d RA, BASE, RA ++ | ins_next1 ++ | st.d r17, 0(RA) ++ | ins_next2 ++ break; ++ ++ /* -- Constant ops ------------------------------------------------------ */ ++ ++ case BC_KSTR: ++ | // RA = dst*8, RD = str_const*8 (~) ++ | sub.d TMP1, KBASE, RD ++ | ins_next1 ++ | .LI TMP2, LJ_TSTR ++ | ld.d TMP0, -8(TMP1) // KBASE-8-str_const*8 ++ | add.d RA, BASE, RA ++ | settp TMP0, TMP2 ++ | st.d TMP0, 0(RA) ++ | ins_next2 ++ break; ++ case BC_KCDATA: ++ |.if FFI ++ | // RA = dst*8, RD = cdata_const*8 (~) ++ | sub.d TMP1, KBASE, RD ++ | ins_next1 ++ | ld.d TMP0, -8(TMP1) // KBASE-8-cdata_const*8 ++ | .LI TMP2, LJ_TCDATA ++ | add.d RA, BASE, RA ++ | settp TMP0, TMP2 ++ | st.d TMP0, 0(RA) ++ | ins_next2 ++ |.endif ++ break; ++ case BC_KSHORT: ++ | // RA = dst*8, RD = int16_literal*8 ++ | srai.w RD, INS, 16 ++ | add.d RA, BASE, RA ++ | bstrpick.d RD, RD, 31, 0 // zextw -> bstrpick.d ++ | ins_next1 ++ | settp RD, TISNUM ++ | st.d RD, 0(RA) ++ | ins_next2 ++ break; ++ case BC_KNUM: ++ | // RA = dst*8, RD = num_const*8 ++ | add.d RD, KBASE, RD ++ | add.d RA, BASE, RA ++ | ld.d CRET1, 0(RD) ++ | ins_next1 ++ | st.d CRET1, 0(RA) ++ | ins_next2 ++ break; ++ case BC_KPRI: ++ | // RA = dst*8, RD = primitive_type*8 (~) ++ | add.d RA, BASE, RA ++ | slli.d TMP0, RD, 44 ++ | nor TMP0, TMP0, r0 ++ | ins_next1 ++ | st.d TMP0, 0(RA) ++ | ins_next2 ++ break; ++ case BC_KNIL: ++ | // RA = base*8, RD = end*8 ++ | add.d RA, BASE, RA ++ | st.d TISNIL, 0(RA) ++ | addi.d RA, RA, 8 ++ | add.d RD, BASE, RD ++ |1: ++ | st.d TISNIL, 0(RA) ++ | slt AT, RA, RD ++ | addi.d RA, RA, 8 ++ | bnez AT, <1 ++ | ins_next_ ++ break; ++ ++ /* -- Upvalue and function ops ------------------------------------------ */ ++ ++ case BC_UGET: ++ | // RA = dst*8, RD = uvnum*8 ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) ++ | add.d RA, BASE, RA ++ | cleartp LFUNC:RB ++ | add.d RD, RD, LFUNC:RB ++ | ld.d UPVAL:RB, LFUNC:RD->uvptr ++ | ins_next1 ++ | ld.d TMP1, UPVAL:RB->v ++ | ld.d CRET1, 0(TMP1) ++ | st.d CRET1, 0(RA) ++ | ins_next2 ++ break; ++ case BC_USETV: ++ | // RA = uvnum*8, RD = src*8 ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) ++ | add.d RD, BASE, RD ++ | cleartp LFUNC:RB ++ | add.d RA, RA, LFUNC:RB ++ | ld.d UPVAL:RB, LFUNC:RA->uvptr ++ | ld.d CRET1, 0(RD) ++ | ld.bu TMP3, UPVAL:RB->marked ++ | ld.d CARG2, UPVAL:RB->v ++ | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) ++ | ld.bu TMP0, UPVAL:RB->closed ++ | gettp TMP2, CRET1 ++ | st.d CRET1, 0(CARG2) ++ | .LI AT, LJ_GC_BLACK|1 ++ | or TMP3, TMP3, TMP0 ++ | addi.d TMP2, TMP2, -(LJ_TNUMX+1) ++ | beq TMP3, AT, >2 // Upvalue is closed and black? ++ |1: ++ | ins_next ++ | ++ |2: // Check if new value is collectable. ++ | sltui AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) ++ | cleartp GCOBJ:CRET1, CRET1 ++ | beqz AT, <1 // tvisgcv(v) ++ | ld.bu TMP3, GCOBJ:CRET1->gch.marked ++ | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) ++ | beqz TMP3, <1 ++ | // Crossed a write barrier. Move the barrier forward. ++ | .DADDIU CARG1, DISPATCH, GG_DISP2G ++ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) ++ | beq r0, r0, <1 ++ break; ++ case BC_USETS: ++ | // RA = uvnum*8, RD = str_const*8 (~) ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) ++ | sub.d TMP1, KBASE, RD ++ | cleartp LFUNC:RB ++ | add.d RA, RA, LFUNC:RB ++ | ld.d UPVAL:RB, LFUNC:RA->uvptr ++ | ld.d STR:TMP1, -8(TMP1) // KBASE-8-str_const*8 ++ | ld.bu TMP2, UPVAL:RB->marked ++ | ld.d CARG2, UPVAL:RB->v ++ | ld.bu TMP3, STR:TMP1->marked ++ | andi AT, TMP2, LJ_GC_BLACK // isblack(uv) ++ | ld.bu TMP2, UPVAL:RB->closed ++ | .LI TMP0, LJ_TSTR ++ | settp TMP1, TMP0 ++ | st.d TMP1, 0(CARG2) ++ | bnez AT, >2 ++ |1: ++ | ins_next ++ | ++ |2: // Check if string is white and ensure upvalue is closed. ++ | andi AT, TMP3, LJ_GC_WHITES // iswhite(str) ++ | beqz TMP2, <1 ++ | beqz AT, <1 ++ | // Crossed a write barrier. Move the barrier forward. ++ | .DADDIU CARG1, DISPATCH, GG_DISP2G ++ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) ++ | beq r0, r0, <1 ++ break; ++ case BC_USETN: ++ | // RA = uvnum*8, RD = num_const*8 ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) ++ | add.d RD, KBASE, RD ++ | cleartp LFUNC:RB ++ | add.d RA, RA, LFUNC:RB ++ | ld.d UPVAL:RB, LFUNC:RA->uvptr ++ | ld.d CRET1, 0(RD) ++ | ld.d TMP1, UPVAL:RB->v ++ | ins_next1 ++ | st.d CRET1, 0(TMP1) ++ | ins_next2 ++ break; ++ case BC_USETP: ++ | // RA = uvnum*8, RD = primitive_type*8 (~) ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) ++ | slli.d TMP0, RD, 44 ++ | cleartp LFUNC:RB ++ | add.d RA, RA, LFUNC:RB ++ | nor TMP0, TMP0, r0 ++ | ld.d UPVAL:RB, LFUNC:RA->uvptr ++ | ins_next1 ++ | ld.d TMP1, UPVAL:RB->v ++ | st.d TMP0, 0(TMP1) ++ | ins_next2 ++ break; ++ ++ case BC_UCLO: ++ | // RA = level*8, RD = target ++ | ld.d TMP2, L->openupval ++ | branch_RD // Do this first since RD is not saved. ++ | st.d BASE, L->base ++ | or CARG1, L, r0 ++ | beqz TMP2, >1 ++ | add.d CARG2, BASE, RA ++ | bl extern lj_func_closeuv // (lua_State *L, TValue *level) ++ | ld.d BASE, L->base ++ |1: ++ | ins_next ++ break; ++ ++ case BC_FNEW: ++ | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) ++ | sub.d TMP1, KBASE, RD ++ | ld.d CARG3, FRAME_FUNC(BASE) ++ | ld.d CARG2, -8(TMP1) // KBASE-8-tab_const*8 ++ | st.d BASE, L->base ++ | st.d PC, SAVE_PC(sp) ++ | cleartp CARG3 ++ | // (lua_State *L, GCproto *pt, GCfuncL *parent) ++ | or CARG1, L, r0 ++ | bl extern lj_func_newL_gc ++ | // Returns GCfuncL *. ++ | .LI TMP0, LJ_TFUNC ++ | ld.d BASE, L->base ++ | ins_next1 ++ | settp CRET1, TMP0 ++ | add.d RA, BASE, RA ++ | st.d CRET1, 0(RA) ++ | ins_next2 ++ break; ++ ++ /* -- Table ops --------------------------------------------------------- */ ++ ++ case BC_TNEW: ++ case BC_TDUP: ++ | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) ++ | .LDXD TMP0, DISPATCH, DISPATCH_GL(gc.total) ++ | .LDXD TMP1, DISPATCH, DISPATCH_GL(gc.threshold) ++ | st.d BASE, L->base ++ | st.d PC, SAVE_PC(sp) ++ | sltu AT, TMP0, TMP1 ++ | beqz AT, >5 //TODO why no slot ins ? ++ |1: ++ if (op == BC_TNEW) { ++ | srli.w CARG2, RD, 3 ++ | andi CARG2, CARG2, 0x7ff ++ | .LI TMP0, 0x801 ++ | addi.w AT, CARG2, -0x7ff ++ | srli.w CARG3, RD, 14 ++ | masknez TMP0, TMP0, AT ++ | maskeqz CARG2, CARG2, AT ++ | or CARG2, CARG2, TMP0 ++ | // (lua_State *L, int32_t asize, uint32_t hbits) ++ | or CARG1, L, r0 ++ | bl extern lj_tab_new ++ | // Returns Table *. ++ } else { ++ | sub.d TMP1, KBASE, RD ++ | or CARG1, L, r0 ++ | ld.d CARG2, -8(TMP1) // KBASE-8-str_const*8 ++ | bl extern lj_tab_dup // (lua_State *L, Table *kt) ++ | // Returns Table *. ++ } ++ | .LI TMP0, LJ_TTAB ++ | ld.d BASE, L->base ++ | ins_next1 ++ | add.d RA, BASE, RA ++ | settp CRET1, TMP0 ++ | st.d CRET1, 0(RA) ++ | ins_next2 ++ |5: ++ | or MULTRES, RD, r0 ++ | or CARG1, L, r0 ++ | bl extern lj_gc_step_fixtop // (lua_State *L) ++ | or RD, MULTRES, r0 ++ | beq r0, r0, <1 ++ break; ++ ++ case BC_GGET: ++ | // RA = dst*8, RD = str_const*8 (~) ++ case BC_GSET: ++ | // RA = src*8, RD = str_const*8 (~) ++ | ld.d LFUNC:TMP2, FRAME_FUNC(BASE) ++ | sub.d TMP1, KBASE, RD ++ | ld.d STR:RC, -8(TMP1) // KBASE-8-str_const*8 ++ | cleartp LFUNC:TMP2 ++ | ld.d TAB:RB, LFUNC:TMP2->env ++ | add.d RA, BASE, RA ++ if (op == BC_GGET) { ++ | beq r0, r0, ->BC_TGETS_Z ++ } else { ++ | beq r0, r0, ->BC_TSETS_Z ++ } ++ break; ++ ++ case BC_TGETV: ++ | // RA = dst*8, RB = table*8, RC = key*8 ++ | decode_RB8a RB, INS ++ | decode_RB8b RB ++ | decode_RDtoRC8 RC, RD ++ | add.d CARG2, BASE, RB ++ | add.d CARG3, BASE, RC ++ | ld.d TAB:RB, 0(CARG2) ++ | ld.d TMP2, 0(CARG3) ++ | add.d RA, BASE, RA ++ | checktab TAB:RB, ->vmeta_tgetv ++ | gettp TMP3, TMP2 ++ | ld.w TMP0, TAB:RB->asize ++ | bne TMP3, TISNUM, >5 // Integer key? ++ | slli.w TMP2, TMP2, 0 // sextw -> slli.w ++ | ld.d TMP1, TAB:RB->array ++ | sltu AT, TMP2, TMP0 ++ | slli.w TMP2, TMP2, 3 ++ | add.d TMP2, TMP1, TMP2 ++ | beqz AT, ->vmeta_tgetv // Integer key and in array part? ++ | ld.d AT, 0(TMP2) ++ | ld.d CRET1, 0(TMP2) ++ | beq AT, TISNIL, >2 ++ |1: ++ | ins_next1 ++ | st.d CRET1, 0(RA) ++ | ins_next2 ++ | ++ |2: // Check for __index if table value is nil. ++ | ld.d TAB:TMP2, TAB:RB->metatable ++ | beqz TAB:TMP2, <1 // No metatable: done. ++ | ld.bu TMP0, TAB:TMP2->nomm ++ | andi TMP0, TMP0, 1<vmeta_tgetv ++ | ++ |5: ++ | .LI AT, LJ_TSTR ++ | cleartp RC, TMP2 ++ | bne TMP3, AT, ->vmeta_tgetv ++ | beq r0, r0, ->BC_TGETS_Z // String key? ++ break; ++ case BC_TGETS: ++ | // RA = dst*8, RB = table*8, RC = str_const*8 (~) ++ | decode_RB8a RB, INS ++ | decode_RB8b RB ++ | decode_RC8a RC, INS ++ | add.d CARG2, BASE, RB ++ | decode_RC8b RC ++ | ld.d TAB:RB, 0(CARG2) ++ | sub.d CARG3, KBASE, RC ++ | add.d RA, BASE, RA ++ | ld.d STR:RC, -8(CARG3) // KBASE-8-str_const*8 ++ | checktab TAB:RB, ->vmeta_tgets1 ++ |->BC_TGETS_Z: ++ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 ++ | ld.w TMP0, TAB:RB->hmask ++ | ld.w TMP1, STR:RC->hash ++ | ld.d NODE:TMP2, TAB:RB->node ++ | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask ++ | slli.w TMP0, TMP1, 5 ++ | slli.w TMP1, TMP1, 3 ++ | sub.w TMP1, TMP0, TMP1 ++ | .LI TMP3, LJ_TSTR ++ | add.d NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) ++ | settp STR:RC, TMP3 // Tagged key to look for. ++ |1: ++ | ld.d CARG1, NODE:TMP2->key ++ | ld.d r17, NODE:TMP2->val ++ | ld.d NODE:TMP1, NODE:TMP2->next ++ | ld.d TAB:TMP3, TAB:RB->metatable ++ | bne CARG1, RC, >4 ++ | beq r17, TISNIL, >5 // Key found, but nil value? ++ |3: ++ | ins_next1 ++ | st.d r17, 0(RA) ++ | ins_next2 ++ | ++ |4: // Follow hash chain. ++ | or NODE:TMP2, NODE:TMP1, r0 ++ | bnez NODE:TMP1, <1 ++ | // End of hash chain: key not found, nil result. ++ | ++ |5: // Check for __index if table value is nil. ++ | or r17, TISNIL, r0 ++ | beqz TAB:TMP3, <3 // No metatable: done. ++ | ld.bu TMP0, TAB:TMP3->nomm ++ | andi TMP0, TMP0, 1<vmeta_tgets ++ break; ++ case BC_TGETB: ++ | // RA = dst*8, RB = table*8, RC = index*8 ++ | decode_RB8a RB, INS ++ | decode_RB8b RB ++ | add.d CARG2, BASE, RB ++ | decode_RDtoRC8 RC, RD ++ | ld.d TAB:RB, 0(CARG2) ++ | add.d RA, BASE, RA ++ | srli.w TMP0, RC, 3 ++ | checktab TAB:RB, ->vmeta_tgetb ++ | ld.w TMP1, TAB:RB->asize ++ | ld.d TMP2, TAB:RB->array ++ | sltu AT, TMP0, TMP1 ++ | add.d RC, TMP2, RC ++ | beqz AT, ->vmeta_tgetb ++ | ld.d AT, 0(RC) ++ | ld.d CRET1, 0(RC) ++ | beq AT, TISNIL, >5 ++ |1: ++ | ins_next1 ++ | st.d CRET1, 0(RA) ++ | ins_next2 ++ | ++ |5: // Check for __index if table value is nil. ++ | ld.d TAB:TMP2, TAB:RB->metatable ++ | beqz TAB:TMP2, <1 // No metatable: done. ++ | ld.bu TMP1, TAB:TMP2->nomm ++ | andi TMP1, TMP1, 1<vmeta_tgetb // Caveat: preserve TMP0 and CARG2! ++ break; ++ case BC_TGETR: ++ | // RA = dst*8, RB = table*8, RC = key*8 ++ | decode_RB8a RB, INS ++ | decode_RB8b RB ++ | decode_RDtoRC8 RC, RD ++ | add.d RB, BASE, RB ++ | add.d RC, BASE, RC ++ | ld.d TAB:CARG1, 0(RB) ++ | ld.w CARG2, LO(RC) ++ | add.d RA, BASE, RA ++ | cleartp TAB:CARG1 ++ | ld.w TMP0, TAB:CARG1->asize ++ | ld.d TMP1, TAB:CARG1->array ++ | sltu AT, CARG2, TMP0 ++ | slli.w TMP2, CARG2, 3 ++ | add.d r17, TMP1, TMP2 ++ | beqz AT, ->vmeta_tgetr // In array part? ++ | ld.d CARG2, 0(r17) ++ |->BC_TGETR_Z: ++ | ins_next1 ++ | st.d CARG2, 0(RA) ++ | ins_next2 ++ break; ++ ++ case BC_TSETV: ++ | // RA = src*8, RB = table*8, RC = key*8 ++ | decode_RB8a RB, INS ++ | decode_RB8b RB ++ | decode_RDtoRC8 RC, RD ++ | add.d CARG2, BASE, RB ++ | add.d CARG3, BASE, RC ++ | ld.d RB, 0(CARG2) ++ | ld.d TMP2, 0(CARG3) ++ | add.d RA, BASE, RA ++ | checktab RB, ->vmeta_tsetv ++ | slli.w RC, TMP2, 0 // sextw -> slli.w ++ | checkint TMP2, >5 ++ | ld.w TMP0, TAB:RB->asize ++ | ld.d TMP1, TAB:RB->array ++ | sltu AT, RC, TMP0 ++ | slli.w TMP2, RC, 3 ++ | add.d TMP1, TMP1, TMP2 ++ | beqz AT, ->vmeta_tsetv // Integer key and in array part? ++ | ld.d TMP0, 0(TMP1) ++ | ld.bu TMP3, TAB:RB->marked ++ | ld.d CRET1, 0(RA) ++ | beq TMP0, TISNIL, >3 ++ |1: ++ | andi AT, TMP3, LJ_GC_BLACK // isblack(table) ++ | st.d CRET1, 0(TMP1) ++ | bnez AT, >7 ++ |2: ++ | ins_next ++ | ++ |3: // Check for __newindex if previous value is nil. ++ | ld.d TAB:TMP2, TAB:RB->metatable ++ | beqz TAB:TMP2, <1 // No metatable: done. ++ | ld.bu TMP2, TAB:TMP2->nomm ++ | andi TMP2, TMP2, 1<vmeta_tsetv ++ | ++ |5: ++ | gettp AT, TMP2 ++ | addi.d AT, AT, -LJ_TSTR ++ | bnez AT, ->vmeta_tsetv ++ | cleartp STR:RC, TMP2 ++ | beq r0, r0, ->BC_TSETS_Z // String key? ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:RB, TMP3, TMP0, <2 ++ break; ++ case BC_TSETS: ++ | // RA = src*8, RB = table*8, RC = str_const*8 (~) ++ | decode_RB8a RB, INS ++ | decode_RB8b RB ++ | add.d CARG2, BASE, RB ++ | decode_RC8a RC, INS ++ | ld.d TAB:RB, 0(CARG2) ++ | decode_RC8b RC ++ | sub.d CARG3, KBASE, RC ++ | ld.d RC, -8(CARG3) // KBASE-8-str_const*8 ++ | add.d RA, BASE, RA ++ | cleartp STR:RC ++ | checktab TAB:RB, ->vmeta_tsets1 ++ |->BC_TSETS_Z: ++ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 ++ | ld.w TMP0, TAB:RB->hmask ++ | ld.w TMP1, STR:RC->hash ++ | ld.d NODE:TMP2, TAB:RB->node ++ | st.b r0, TAB:RB->nomm // Clear metamethod cache. ++ | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask ++ | slli.w TMP0, TMP1, 5 ++ | slli.w TMP1, TMP1, 3 ++ | sub.w TMP1, TMP0, TMP1 ++ | .LI TMP3, LJ_TSTR ++ | add.d NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) ++ | settp STR:RC, TMP3 // Tagged key to look for. ++ |.if FPU ++ | fld.d FTMP0, 0(RA) ++ |.else ++ | ld.d CRET1, 0(RA) ++ |.endif ++ |1: ++ | ld.d TMP0, NODE:TMP2->key ++ | ld.d CARG2, NODE:TMP2->val ++ | ld.d NODE:TMP1, NODE:TMP2->next ++ | ld.bu TMP3, TAB:RB->marked ++ | bne TMP0, RC, >5 ++ | ld.d TAB:TMP0, TAB:RB->metatable ++ | beq CARG2, TISNIL, >4 // Key found, but nil value? ++ |2: ++ | andi AT, TMP3, LJ_GC_BLACK // isblack(table) ++ |.if FPU ++ | fst.d FTMP0, NODE:TMP2->val ++ |.else ++ | st.d CRET1, NODE:TMP2->val ++ |.endif ++ | bnez AT, >7 ++ |3: ++ | ins_next ++ | ++ |4: // Check for __newindex if previous value is nil. ++ | beqz TAB:TMP0, <2 // No metatable: done. ++ | ld.bu TMP0, TAB:TMP0->nomm ++ | andi TMP0, TMP0, 1<vmeta_tsets ++ | ++ |5: // Follow hash chain. ++ | or NODE:TMP2, NODE:TMP1, r0 ++ | bnez NODE:TMP1, <1 ++ | // End of hash chain: key not found, add a new one ++ | ++ | // But check for __newindex first. ++ | ld.d TAB:TMP2, TAB:RB->metatable ++ | .DADDIU CARG3, DISPATCH, DISPATCH_GL(tmptv) ++ | beqz TAB:TMP2, >6 // No metatable: continue. ++ | ld.bu TMP0, TAB:TMP2->nomm ++ | andi TMP0, TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. TODO why no slot ins ? ++ |6: ++ | st.d RC, 0(CARG3) ++ | st.d BASE, L->base ++ | or CARG2, TAB:RB, r0 ++ | st.d PC, SAVE_PC(sp) ++ | or CARG1, L, r0 ++ | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k ++ | // Returns TValue *. ++ | ld.d BASE, L->base ++ |.if FPU ++ | fst.d FTMP0, 0(CRET1) ++ | beq r0, r0, <3 // No 2nd write barrier needed. ++ |.else ++ | ld.d r17, 0(RA) ++ | st.d r17, 0(CRET1) ++ | beq r0, r0, <3 // No 2nd write barrier needed. ++ |.endif ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:RB, TMP3, TMP0, <3 ++ break; ++ case BC_TSETB: ++ | // RA = src*8, RB = table*8, RC = index*8 ++ | decode_RB8a RB, INS ++ | decode_RB8b RB ++ | add.d CARG2, BASE, RB ++ | decode_RDtoRC8 RC, RD ++ | ld.d TAB:RB, 0(CARG2) ++ | add.d RA, BASE, RA ++ | srli.w TMP0, RC, 3 ++ | checktab RB, ->vmeta_tsetb ++ | ld.w TMP1, TAB:RB->asize ++ | ld.d TMP2, TAB:RB->array ++ | sltu AT, TMP0, TMP1 ++ | add.d RC, TMP2, RC ++ | beqz AT, ->vmeta_tsetb ++ | ld.d TMP1, 0(RC) ++ | ld.bu TMP3, TAB:RB->marked ++ | beq TMP1, TISNIL, >5 //TODO not process the following slot ins ++ |1: ++ | ld.d CRET1, 0(RA) ++ |// beq TMP1, TISNIL, >5 ++ | andi AT, TMP3, LJ_GC_BLACK // isblack(table) ++ | st.d CRET1, 0(RC) ++ | bnez AT, >7 ++ |2: ++ | ins_next ++ | ++ |5: // Check for __newindex if previous value is nil. ++ | ld.d TAB:TMP2, TAB:RB->metatable ++ | beqz TAB:TMP2, <1 // No metatable: done. ++ | ld.bu TMP1, TAB:TMP2->nomm ++ | andi TMP1, TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0 and CARG2! ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:RB, TMP3, TMP0, <2 ++ break; ++ case BC_TSETR: ++ | // RA = dst*8, RB = table*8, RC = key*8 ++ | decode_RB8a RB, INS ++ | decode_RB8b RB ++ | decode_RDtoRC8 RC, RD ++ | add.d CARG1, BASE, RB ++ | add.d CARG3, BASE, RC ++ | ld.d TAB:CARG2, 0(CARG1) ++ | ld.w CARG3, LO(CARG3) ++ | cleartp TAB:CARG2 ++ | ld.bu TMP3, TAB:CARG2->marked ++ | ld.w TMP0, TAB:CARG2->asize ++ | ld.d TMP1, TAB:CARG2->array ++ | andi AT, TMP3, LJ_GC_BLACK // isblack(table) ++ | add.d RA, BASE, RA ++ | bnez AT, >7 ++ |2: ++ | sltu AT, CARG3, TMP0 ++ | slli.w TMP2, CARG3, 3 ++ | add.d r17, TMP1, TMP2 ++ | beqz AT, ->vmeta_tsetr // In array part? ++ |->BC_TSETR_Z: ++ | bnez AT, >3 ++ | add.d r17, CRET1, r0 ++ |3: ++ | ld.d CARG1, 0(RA) ++ | ins_next1 ++ | st.d CARG1, 0(r17) ++ | ins_next2 ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:CARG2, TMP3, CRET1, <2 ++ break; ++ ++ case BC_TSETM: ++ | // RA = base*8 (table at base-1), RD = num_const*8 (start index) ++ | add.d RA, BASE, RA ++ |1: ++ | add.d TMP3, KBASE, RD ++ | ld.d TAB:CARG2, -8(RA) // Guaranteed to be a table. ++ | addi.w TMP0, MULTRES, -8 ++ | ld.w TMP3, LO(TMP3) // Integer constant is in lo-word. ++ | srli.w CARG3, TMP0, 3 ++ | beqz TMP0, >4 // Nothing to copy? ++ | cleartp CARG2 ++ | add.w CARG3, CARG3, TMP3 ++ | ld.w TMP2, TAB:CARG2->asize ++ | slli.w TMP1, TMP3, 3 ++ | ld.bu TMP3, TAB:CARG2->marked ++ | ld.d CARG1, TAB:CARG2->array ++ | sltu AT, TMP2, CARG3 ++ | add.d TMP2, RA, TMP0 ++ | bnez AT, >5 ++ | add.d TMP1, TMP1, CARG1 ++ | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) ++ |3: // Copy result slots to table. ++ | ld.d CRET1, 0(RA) ++ | addi.d RA, RA, 8 ++ | sltu AT, RA, TMP2 ++ | st.d CRET1, 0(TMP1) ++ | addi.d TMP1, TMP1, 8 ++ | bnez AT, <3 ++ | bnez TMP0, >7 ++ |4: ++ | ins_next ++ | ++ |5: // Need to resize array part. ++ | st.d BASE, L->base ++ | st.d PC, SAVE_PC(sp) ++ | or BASE, RD, r0 ++ | or CARG1, L, r0 ++ | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) ++ | // Must not reallocate the stack. ++ | or RD, BASE, r0 ++ | ld.d BASE, L->base // Reload BASE for lack of a saved register. ++ | beq r0, r0, <1 ++ | ++ |7: // Possible table write barrier for any value. Skip valiswhite check. ++ | barrierback TAB:CARG2, TMP3, TMP0, <4 ++ break; ++ ++ /* -- Calls and vararg handling ----------------------------------------- */ ++ ++ case BC_CALLM: ++ | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 ++ | decode_RDtoRC8 NARGS8:RC, RD ++ | add.w NARGS8:RC, NARGS8:RC, MULTRES ++ | beq r0, r0, ->BC_CALL_Z ++ break; ++ case BC_CALL: ++ | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 ++ | decode_RDtoRC8 NARGS8:RC, RD ++ |->BC_CALL_Z: ++ | or TMP2, BASE, r0 ++ | add.d BASE, BASE, RA ++ | ld.d LFUNC:RB, 0(BASE) ++ | addi.d BASE, BASE, 16 ++ | addi.w NARGS8:RC, NARGS8:RC, -8 ++ | checkfunc RB, ->vmeta_call ++ | ins_call ++ break; ++ ++ case BC_CALLMT: ++ | // RA = base*8, (RB = 0,) RC = extra_nargs*8 ++ | add.w NARGS8:RD, NARGS8:RD, MULTRES // BC_CALLT gets RC from RD. ++ | // Fall through. Assumes BC_CALLT follows. ++ break; ++ case BC_CALLT: ++ | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 ++ | add.d RA, BASE, RA ++ | ld.d RB, 0(RA) ++ | or NARGS8:RC, RD, r0 ++ | ld.d TMP1, FRAME_PC(BASE) ++ | addi.d RA, RA, 16 ++ | addi.w NARGS8:RC, NARGS8:RC, -8 ++ | checktp CARG3, RB, -LJ_TFUNC, ->vmeta_callt ++ |->BC_CALLT_Z: ++ | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'. ++ | ld.bu TMP3, LFUNC:CARG3->ffid ++ | xori TMP2, TMP1, FRAME_VARG ++ | bnez TMP0, >7 ++ |1: ++ | st.d RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. ++ | sltui AT, TMP3, 2 // (> FF_C) Calling a fast function? ++ | or TMP2, BASE, r0 ++ | or RB, CARG3, r0 ++ | or TMP3, NARGS8:RC, r0 ++ | beqz NARGS8:RC, >3 ++ |2: ++ | ld.d CRET1, 0(RA) ++ | addi.d RA, RA, 8 ++ | addi.w TMP3, TMP3, -8 ++ | st.d CRET1, 0(TMP2) ++ | addi.d TMP2, TMP2, 8 ++ | bnez TMP3, <2 ++ |3: ++ | or TMP0, TMP0, AT ++ | beqz TMP0, >5 ++ |4: ++ | ins_callt ++ | ++ |5: // Tailcall to a fast function with a Lua frame below. ++ | ld.w INS, -4(TMP1) ++ | decode_RA8a RA, INS ++ | decode_RA8b RA ++ | sub.d TMP1, BASE, RA ++ | ld.d TMP1, -32(TMP1) ++ | cleartp LFUNC:TMP1 ++ | ld.d TMP1, LFUNC:TMP1->pc ++ | ld.d KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. ++ | beq r0, r0, <4 ++ | ++ |7: // Tailcall from a vararg function. ++ | andi AT, TMP2, FRAME_TYPEP ++ | sub.d TMP2, BASE, TMP2 // Relocate BASE down. ++ | bnez AT, <1 // Vararg frame below? ++ | or BASE, TMP2, r0 ++ | ld.d TMP1, FRAME_PC(TMP2) ++ | andi TMP0, TMP1, FRAME_TYPE ++ | beq r0, r0, <1 ++ break; ++ ++ case BC_ITERC: ++ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) ++ | or TMP2, BASE, r0 // Save old BASE fir vmeta_call. ++ | add.d BASE, BASE, RA ++ | ld.d RB, -24(BASE) ++ | ld.d CARG1, -16(BASE) ++ | ld.d CARG2, -8(BASE) ++ | .LI NARGS8:RC, 16 // Iterators get 2 arguments. ++ | st.d RB, 0(BASE) // Copy callable. ++ | st.d CARG1, 16(BASE) // Copy state. ++ | st.d CARG2, 24(BASE) // Copy control var. ++ | addi.d BASE, BASE, 16 ++ | checkfunc RB, ->vmeta_call ++ | ins_call ++ break; ++ ++ case BC_ITERN: ++ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) ++ |.if JIT ++ | // NYI: add hotloop, record BC_ITERN. ++ |.endif ++ |->vm_IITERN: ++ | add.d RA, BASE, RA ++ | ld.d TAB:RB, -16(RA) ++ | ld.w RC, -8+LO(RA) // Get index from control var. ++ | cleartp TAB:RB ++ | addi.d PC, PC, 4 ++ | ld.w TMP0, TAB:RB->asize ++ | ld.d TMP1, TAB:RB->array ++ | slli.d CARG3, TISNUM, 47 ++ |1: // Traverse array part. ++ | sltu AT, RC, TMP0 ++ | slli.w TMP3, RC, 3 ++ | beqz AT, >5 // Index points after array part? ++ | add.d TMP3, TMP1, TMP3 ++ | ld.d CARG1, 0(TMP3) ++ | ld.hu RD, -4+OFS_RD(PC) ++ | or TMP2, RC, CARG3 ++ | addi.w RC, RC, 1 ++ | beq CARG1, TISNIL, <1 // Skip holes in array part. ++ | st.d TMP2, 0(RA) ++ | st.d CARG1, 8(RA) ++ | or TMP0, RC, CARG3 ++ | .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535) ++ | decode_RD4b RD ++ | add.d RD, RD, TMP3 ++ | st.w TMP0, -8+LO(RA) // Update control var. ++ | add.d PC, PC, RD ++ |3: ++ | ins_next ++ | ++ |5: // Traverse hash part. ++ | ld.w TMP1, TAB:RB->hmask ++ | sub.w RC, RC, TMP0 ++ | ld.d TMP2, TAB:RB->node ++ |6: ++ | sltu AT, TMP1, RC // End of iteration? Branch to ITERL+1. ++ | slli.w TMP3, RC, 5 ++ | bnez AT, <3 ++ | slli.w RB, RC, 3 ++ | sub.w TMP3, TMP3, RB ++ | add.d NODE:TMP3, TMP3, TMP2 ++ | ld.d CARG1, 0(NODE:TMP3) ++ | ld.hu RD, -4+OFS_RD(PC) ++ | addi.w RC, RC, 1 ++ | beq CARG1, TISNIL, <6 // Skip holes in hash part. ++ | ld.d CARG2, NODE:TMP3->key ++ | .LUI TMP3, (-(BCBIAS_J*4 >> 16) & 65535) ++ | st.d CARG1, 8(RA) ++ | add.w RC, RC, TMP0 ++ | decode_RD4b RD ++ | add.w RD, RD, TMP3 ++ | st.d CARG2, 0(RA) ++ | add.d PC, PC, RD ++ | st.w RC, -8+LO(RA) // Update control var. ++ | beq r0, r0, <3 ++ break; ++ ++ case BC_ISNEXT: ++ | // RA = base*8, RD = target (points to ITERN) ++ | add.d RA, BASE, RA ++ | srli.w TMP0, RD, 1 ++ | ld.d CFUNC:CARG1, -24(RA) ++ | add.d TMP0, PC, TMP0 ++ | ld.d CARG2, -16(RA) ++ | ld.d CARG3, -8(RA) ++ | .LUI TMP2, (-(BCBIAS_J*4 >> 16) & 65535) ++ | checkfunc CFUNC:CARG1, >5 ++ | gettp CARG2, CARG2 ++ | addi.d CARG2, CARG2, -LJ_TTAB ++ | ld.bu TMP1, CFUNC:CARG1->ffid ++ | addi.d CARG3, CARG3, -LJ_TNIL ++ | or AT, CARG2, CARG3 ++ | addi.d TMP1, TMP1, -FF_next_N ++ | or AT, AT, TMP1 ++ |// addu16i.d TMP1, r0, 0xfffe ++ | .LUI TMP1, 0xfffe ++ | bnez AT, >5 ++ | add.d PC, TMP0, TMP2 ++ |// ori TMP1, TMP1, 0x7fff ++ | srli.d TMP1, TMP1, 12 ++ | ori TMP1, TMP1, 0x7 ++ | slli.d TMP1, TMP1, 12 ++ | ori TMP1, TMP1, 0xfff ++ | slli.d TMP1, TMP1, 32 ++ | st.d TMP1, -8(RA) ++ |1: ++ | ins_next ++ |5: // Despecialize bytecode if any of the checks fail. ++ | .LI TMP3, BC_JMP ++ | .LI TMP1, BC_ITERC ++ | st.b TMP3, -4+OFS_OP(PC) ++ | add.d PC, TMP0, TMP2 ++ | st.b TMP1, OFS_OP(PC) ++ | beq r0, r0, <1 ++ break; ++ ++ case BC_VARG: ++ | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 ++ | ld.d TMP0, FRAME_PC(BASE) ++ | decode_RDtoRC8 RC, RD ++ | decode_RB8a RB, INS ++ | add.d RC, BASE, RC ++ | decode_RB8b RB ++ | add.d RA, BASE, RA ++ | addi.d RC, RC, FRAME_VARG ++ | add.d TMP2, RA, RB ++ | addi.d TMP3, BASE, -16 // TMP3 = vtop ++ | sub.d RC, RC, TMP0 // RC = vbase ++ | // Note: RC may now be even _above_ BASE if nargs was < numparams. ++ | sub.d TMP1, TMP3, RC ++ | beqz RB, >5 // Copy all varargs? ++ | addi.d TMP2, TMP2, -16 ++ |1: // Copy vararg slots to destination slots. ++ | ld.d CARG1, 0(RC) ++ | sltu AT, RC, TMP3 ++ | addi.d RC, RC, 8 ++ | maskeqz CARG1, CARG1, AT ++ | masknez AT, TISNIL, AT ++ | or CARG1, CARG1, AT ++ | st.d CARG1, 0(RA) ++ | sltu AT, RA, TMP2 ++ | addi.d RA, RA, 8 ++ | bnez AT, <1 ++ |3: ++ | ins_next ++ | ++ |5: // Copy all varargs. ++ | ld.d TMP0, L->maxstack ++ | .LI MULTRES, 8 // MULTRES = (0+1)*8 ++ | bge r0, TMP1, <3 // No vararg slots? ++ | add.d TMP2, RA, TMP1 ++ | sltu AT, TMP0, TMP2 ++ | addi.d MULTRES, TMP1, 8 ++ | bnez AT, >7 ++ |6: ++ | ld.d CRET1, 0(RC) ++ | addi.d RC, RC, 8 ++ | st.d CRET1, 0(RA) ++ | sltu AT, RC, TMP3 ++ | addi.d RA, RA, 8 ++ | bnez AT, <6 // More vararg slots? ++ | beq r0, r0, <3 ++ | ++ |7: // Grow stack for varargs. ++ | st.d RA, L->top ++ | sub.d RA, RA, BASE ++ | st.d BASE, L->base ++ | sub.d BASE, RC, BASE // Need delta, because BASE may change. ++ | st.d PC, SAVE_PC(sp) ++ | srli.w CARG2, TMP1, 3 ++ | or CARG1, L, r0 ++ | bl extern lj_state_growstack // (lua_State *L, int n) ++ | or RC, BASE, r0 ++ | ld.d BASE, L->base ++ | add.d RA, BASE, RA ++ | add.d RC, BASE, RC ++ | addi.d TMP3, BASE, -16 ++ | beq r0, r0, <6 ++ break; ++ ++ /* -- Returns ----------------------------------------------------------- */ ++ ++ case BC_RETM: ++ | // RA = results*8, RD = extra_nresults*8 ++ | add.w RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. ++ | // Fall through. Assumes BC_RET follows. ++ break; ++ ++ case BC_RET: ++ | // RA = results*8, RD = (nresults+1)*8 ++ | ld.d PC, FRAME_PC(BASE) ++ | add.d RA, BASE, RA ++ | or MULTRES, RD, r0 ++ |1: ++ | andi TMP0, PC, FRAME_TYPE ++ | xori TMP1, PC, FRAME_VARG ++ | bnez TMP0, ->BC_RETV_Z ++ | ++ |->BC_RET_Z: ++ | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return ++ | ld.w INS, -4(PC) ++ | addi.d TMP2, BASE, -16 ++ | addi.d RC, RD, -8 ++ | decode_RA8a TMP0, INS ++ | decode_RB8a RB, INS ++ | decode_RA8b TMP0 ++ | decode_RB8b RB ++ | add.d TMP3, TMP2, RB ++ | sub.d BASE, TMP2, TMP0 ++ | beqz RC, >3 ++ |2: ++ | ld.d CRET1, 0(RA) ++ | addi.d RA, RA, 8 ++ | addi.d RC, RC, -8 ++ | st.d CRET1, 0(TMP2) ++ | addi.d TMP2, TMP2, 8 ++ | bnez RC, <2 ++ |3: ++ | addi.d TMP3, TMP3, -8 ++ |5: ++ | sltu AT, TMP2, TMP3 ++ | ld.d LFUNC:TMP1, FRAME_FUNC(BASE) ++ | bnez AT, >6 ++ | ins_next1 ++ | cleartp LFUNC:TMP1 ++ | ld.d TMP1, LFUNC:TMP1->pc ++ | ld.d KBASE, PC2PROTO(k)(TMP1) ++ | ins_next2 ++ | ++ |6: // Fill up results with nil. ++ | st.d TISNIL, 0(TMP2) ++ | addi.d TMP2, TMP2, 8 ++ | beq r0, r0, <5 ++ | ++ |->BC_RETV_Z: // Non-standard return case. ++ | andi TMP2, TMP1, FRAME_TYPEP ++ | bnez TMP2, ->vm_return ++ | // Return from vararg function: relocate BASE down. ++ | sub.d BASE, BASE, TMP1 ++ | ld.d PC, FRAME_PC(BASE) ++ | beq r0, r0, <1 ++ break; ++ ++ case BC_RET0: case BC_RET1: ++ | // RA = results*8, RD = (nresults+1)*8 ++ | ld.d PC, FRAME_PC(BASE) ++ | add.d RA, BASE, RA ++ | or MULTRES, RD, r0 ++ | andi TMP0, PC, FRAME_TYPE ++ | xori TMP1, PC, FRAME_VARG ++ | bnez TMP0, ->BC_RETV_Z ++ | ld.w INS, -4(PC) ++ | addi.d TMP2, BASE, -16 ++ if (op == BC_RET1) { ++ | ld.d CRET1, 0(RA) ++ } ++ | decode_RB8a RB, INS ++ | decode_RA8a RA, INS ++ | decode_RB8b RB ++ | decode_RA8b RA ++ | sub.d BASE, TMP2, RA ++ if (op == BC_RET1) { ++ | st.d CRET1, 0(TMP2) ++ } ++ |5: ++ | sltu AT, RD, RB ++ | ld.d TMP1, FRAME_FUNC(BASE) ++ | bnez AT, >6 ++ | ins_next1 ++ | cleartp LFUNC:TMP1 ++ | ld.d TMP1, LFUNC:TMP1->pc ++ | ld.d KBASE, PC2PROTO(k)(TMP1) ++ | ins_next2 ++ | ++ |6: // Fill up results with nil. ++ | addi.d TMP2, TMP2, 8 ++ | addi.d RD, RD, 8 ++ if (op == BC_RET1) { ++ | st.d TISNIL, 0(TMP2) ++ } else { ++ | st.d TISNIL, -8(TMP2) ++ } ++ | beq r0, r0, <5 ++ break; ++ ++ /* -- Loops and branches ------------------------------------------------ */ ++ ++ case BC_FORL: ++ |.if JIT ++ | hotloop ++ |.endif ++ | // Fall through. Assumes BC_IFORL follows. ++ break; ++ ++ case BC_JFORI: ++ case BC_JFORL: ++#if !LJ_HASJIT ++ break; ++#endif ++ case BC_FORI: ++ case BC_IFORL: ++ | // RA = base*8, RD = target (after end of loop or start of loop) ++ vk = (op == BC_IFORL || op == BC_JFORL); ++ | add.d RA, BASE, RA ++ | ld.d r17, FORL_IDX*8(RA) // IDX CARG1 - CARG3 type ++ | gettp CARG3, r17 ++ if (op != BC_JFORL) { ++ | srli.w RD, RD, 1 ++ | .LUI TMP2, (-(BCBIAS_J*4 >> 16) & 65535) ++ | add.d TMP2, RD, TMP2 ++ } ++ if (!vk) { ++ | ld.d r18, FORL_STOP*8(RA) // STOP CARG2 - CARG4 type ++ | ld.d CRET1, FORL_STEP*8(RA) // STEP CRET1 - CRET2 type ++ | gettp CARG4, r18 ++ | gettp CRET2, CRET1 ++ | bne CARG3, TISNUM, >5 ++ | slli.w CARG3, r17, 0 // sextw -> slli.w ++ | bne CARG4, TISNUM, ->vmeta_for ++ | slli.w r18, r18, 0 // sextw -> slli.w ++ | bne CRET2, TISNUM, ->vmeta_for ++ | bstrpick.d AT, CRET1, 31, 31 ++ | slt CRET1, r18, CARG3 ++ | slt TMP1, CARG3, r18 ++ | maskeqz TMP1, TMP1, AT ++ | masknez CRET1, CRET1, AT ++ | or CRET1, CRET1, TMP1 ++ } else { ++ | ld.d CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type ++ | bne CARG3, TISNUM, >5 ++ | ld.d CRET1, FORL_STOP*8(RA) // STOP CRET1 - CRET2 type ++ | slli.w TMP3, r17, 0 // sextw -> slli.w ++ | slli.w CARG2, CARG2, 0 // sextw -> slli.w ++ | slli.w CRET1, CRET1, 0 // sextw -> slli.w ++ | add.w r17, TMP3, CARG2 ++ | xor TMP0, r17, TMP3 ++ | xor TMP1, r17, CARG2 ++ | and TMP0, TMP0, TMP1 ++ | slt TMP1, r17, CRET1 ++ | slt CRET1, CRET1, r17 ++ | slt AT, CARG2, r0 ++ | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. ++ | maskeqz TMP1, TMP1, AT ++ | masknez CRET1, CRET1, AT ++ | or CRET1, CRET1, TMP1 ++ | or CRET1, CRET1, TMP0 ++ | bstrpick.d r17, r17, 31, 0 // zextw -> bstrpick.d ++ | settp r17, TISNUM ++ } ++ |1: ++ if (op == BC_FORI) { ++ | maskeqz TMP2, TMP2, CRET1 ++ | add.d PC, PC, TMP2 ++ } else if (op == BC_JFORI) { ++ | add.d PC, PC, TMP2 ++ | ld.hu RD, -4+OFS_RD(PC) ++ } else if (op == BC_IFORL) { ++ | masknez TMP2, TMP2, CRET1 ++ | add.d PC, PC, TMP2 ++ } ++ if (vk) { ++ | st.d r17, FORL_IDX*8(RA) ++ } ++ | ins_next1 ++ | st.d r17, FORL_EXT*8(RA) ++ |2: ++ if (op == BC_JFORI) { ++ | decode_RD8b RD ++ | beqz CRET1, =>BC_JLOOP ++ } else if (op == BC_JFORL) { ++ | beqz CRET1, =>BC_JLOOP //TODO no slot ins ? ++ } ++ | ins_next2 ++ | ++ |5: // FP loop. ++ |.if FPU ++ if (!vk) { ++ | fld.d f22, FORL_IDX*8(RA) ++ | fld.d f23, FORL_STOP*8(RA) ++ | sltui TMP0, CARG3, LJ_TISNUM ++ | sltui TMP1, CARG4, LJ_TISNUM ++ | sltui AT, CRET2, LJ_TISNUM ++ | ld.d TMP3, FORL_STEP*8(RA) ++ | and TMP0, TMP0, TMP1 ++ | and AT, AT, TMP0 ++ | slt TMP3, TMP3, r0 ++ | beqz AT, ->vmeta_for ++ | movgr2fr.d FTMP2, TMP3 ++ | fcmp.clt.d FCC0, f22, f23 ++ | fcmp.clt.d FCC1, f23, f22 ++ | movcf2fr FTMP0, FCC0 ++ | movcf2fr FTMP1, FCC1 ++ | movfr2cf FCC0, FTMP2 ++ | fsel FTMP2, FTMP1, FTMP0, FCC0 ++ | movfr2gr.d CRET1, FTMP2 ++ | beq r0, r0, <1 ++ } else { ++ | fld.d f22, FORL_IDX*8(RA) ++ | fld.d f10, FORL_STEP*8(RA) ++ | fld.d f23, FORL_STOP*8(RA) ++ | ld.d TMP3, FORL_STEP*8(RA) ++ | fadd.d f22, f22, f10 ++ | slt TMP3, TMP3, r0 ++ | movgr2fr.d FTMP2, TMP3 ++ | fcmp.clt.d FCC0, f22, f23 ++ | fcmp.clt.d FCC1, f23, f22 ++ | movcf2fr FTMP0, FCC0 ++ | movcf2fr FTMP1, FCC1 ++ | movfr2cf FCC0, FTMP2 ++ | fsel FTMP2, FTMP1, FTMP0, FCC0 ++ | movfr2gr.d CRET1, FTMP2 ++ if (op == BC_IFORL) { ++ | masknez TMP2, TMP2, CRET1 ++ | add.d PC, PC, TMP2 ++ } ++ | fst.d f22, FORL_IDX*8(RA) ++ | ins_next1 ++ | fst.d f22, FORL_EXT*8(RA) ++ | beq r0, r0, <2 ++ } ++ |.else ++ if (!vk) { ++ | sltui TMP0, CARG3, LJ_TISNUM ++ | sltui TMP1, CARG4, LJ_TISNUM ++ | sltui AT, CRET2, LJ_TISNUM ++ | and TMP0, TMP0, TMP1 ++ | and AT, AT, TMP0 ++ | beqz AT, ->vmeta_for ++ | ld.w TMP3, FORL_STEP*8+HI(RA) ++ | bl ->vm_sfcmpolex ++ | beq r0, r0, <1 ++ } else { ++ | st.w TMP2, TMPD(sp) ++ | bl extern __adddf3 ++ | ld.d CARG2, FORL_STOP*8(RA) ++ | or r17, CRET1, r0 ++ if ( op == BC_JFORL ) { ++ | ld.hu RD, -4+OFS_RD(PC) ++ | decode_RD8b RD ++ } ++ | ld.w TMP3, FORL_STEP*8+HI(RA) ++ | bl ->vm_sfcmpolex ++ | ld.w TMP2, TMPD(sp) ++ | beq r0, r0, <1 ++ } ++ |.endif ++ break; ++ ++ case BC_ITERL: ++ |.if JIT ++ | hotloop ++ |.endif ++ | // Fall through. Assumes BC_IITERL follows. ++ break; ++ ++ case BC_JITERL: ++#if !LJ_HASJIT ++ break; ++#endif ++ case BC_IITERL: ++ | // RA = base*8, RD = target ++ | add.d RA, BASE, RA ++ | ld.d TMP1, 0(RA) ++ | beq TMP1, TISNIL, >1 // Stop if iterator returned nil. ++ if (op == BC_JITERL) { ++ | st.d TMP1,-8(RA) ++ | beq r0, r0, =>BC_JLOOP ++ } else { ++ | branch_RD // Otherwise save control var + branch. ++ | st.d TMP1, -8(RA) ++ } ++ |1: ++ | ins_next ++ break; ++ ++ case BC_LOOP: ++ | // RA = base*8, RD = target (loop extent) ++ | // Note: RA/RD is only used by trace recorder to determine scope/extent ++ | // This opcode does NOT jump, it's only purpose is to detect a hot loop. ++ |.if JIT ++ | hotloop ++ |.endif ++ | // Fall through. Assumes BC_ILOOP follows. ++ break; ++ ++ case BC_ILOOP: ++ | // RA = base*8, RD = target (loop extent) ++ | ins_next ++ break; ++ ++ case BC_JLOOP: ++ |.if JIT ++ | // RA = base*8 (ignored), RD = traceno*8 ++ | .LDXD TMP1, DISPATCH, DISPATCH_J(trace) ++ | .LI AT, 0 ++ | add.d TMP1, TMP1, RD ++ | // Traces on MIPS don't store the trace number, so use 0. ++ | .STXD AT, DISPATCH, DISPATCH_GL(vmstate) ++ | ld.d TRACE:TMP2, 0(TMP1) ++ | .STXD BASE, DISPATCH, DISPATCH_GL(jit_base) ++ | ld.d TMP2, TRACE:TMP2->mcode ++ | .STXD L, DISPATCH, DISPATCH_GL(tmpbuf.L) ++ | .DADDIU JGL, DISPATCH, GG_DISP2G+32768 ++ | jirl r0, TMP2, 0 ++ |.endif ++ break; ++ ++ case BC_JMP: ++ | // RA = base*8 (only used by trace recorder), RD = target ++ | branch_RD ++ | ins_next ++ break; ++ ++ /* -- Function headers -------------------------------------------------- */ ++ ++ case BC_FUNCF: ++ |.if JIT ++ | hotcall ++ |.endif ++ case BC_FUNCV: /* NYI: compiled vararg functions. */ ++ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. ++ break; ++ ++ case BC_JFUNCF: ++#if !LJ_HASJIT ++ break; ++#endif ++ case BC_IFUNCF: ++ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 ++ | ld.d TMP2, L->maxstack ++ | ld.bu TMP1, -4+PC2PROTO(numparams)(PC) ++ | ld.d KBASE, -4+PC2PROTO(k)(PC) ++ | sltu AT, TMP2, RA ++ | slli.w TMP1, TMP1, 3 ++ | bnez AT, ->vm_growstack_l ++ if (op != BC_JFUNCF) { ++ | ins_next1 ++ } ++ |2: ++ | sltu AT, NARGS8:RC, TMP1 // Check for missing parameters. ++ | or r17, AT, r0 ++ | add.d AT, BASE, NARGS8:RC ++ | bnez r17, >3 ++ if (op == BC_JFUNCF) { ++ | decode_RD8a RD, INS ++ | decode_RD8b RD ++ | beq r0, r0, =>BC_JLOOP ++ } else { ++ | ins_next2 ++ } ++ | ++ |3: // Clear missing parameters. ++ | st.d TISNIL, 0(AT) ++ | addi.w NARGS8:RC, NARGS8:RC, 8 ++ | beq r0, r0, <2 ++ break; ++ ++ case BC_JFUNCV: ++#if !LJ_HASJIT ++ break; ++#endif ++ | NYI // NYI: compiled vararg functions ++ break; /* NYI: compiled vararg functions. */ ++ ++ case BC_IFUNCV: ++ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 ++ | .LI TMP0, LJ_TFUNC ++ | add.d TMP1, BASE, RC ++ | ld.d TMP2, L->maxstack ++ | settp LFUNC:RB, TMP0 ++ | add.d TMP0, RA, RC ++ | st.d LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. ++ | addi.d TMP3, RC, 16+FRAME_VARG ++ | sltu AT, TMP0, TMP2 ++ | ld.d KBASE, -4+PC2PROTO(k)(PC) ++ | st.d TMP3, 8(TMP1) // Store delta + FRAME_VARG. ++ | beqz AT, ->vm_growstack_l ++ | ld.bu TMP2, -4+PC2PROTO(numparams)(PC) ++ | or RA, BASE, r0 ++ | or RC, TMP1, r0 ++ | ins_next1 ++ | addi.d BASE, TMP1, 16 ++ | beqz TMP2, >3 ++ |1: ++ | ld.d TMP0, 0(RA) ++ | sltu AT, RA, RC // Less args than parameters? ++ | or CARG1, TMP0, r0 ++ | maskeqz TMP0, TMP0, AT ++ | masknez TMP3, TISNIL, AT ++ | or TMP0, TMP0, TMP3 ++ | masknez TMP3, CARG1, AT ++ | maskeqz CARG1, TISNIL, AT ++ | or CARG1, CARG1, TMP3 ++ | addi.w TMP2, TMP2, -1 ++ | st.d TMP0, 16(TMP1) ++ | addi.d TMP1, TMP1, 8 ++ | st.d CARG1, 0(RA) ++ | addi.d RA, RA, 8 ++ | bnez TMP2, <1 ++ |3: ++ | ins_next2 ++ break; ++ ++ case BC_FUNCC: ++ case BC_FUNCCW: ++ | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 ++ if (op == BC_FUNCC) { ++ | ld.d CFUNCADDR, CFUNC:RB->f ++ } else { ++ | .LDXD CFUNCADDR, DISPATCH, DISPATCH_GL(wrapf) ++ } ++ | add.d TMP1, RA, NARGS8:RC ++ | ld.d TMP2, L->maxstack ++ | add.d RC, BASE, NARGS8:RC ++ | st.d BASE, L->base ++ | sltu AT, TMP2, TMP1 ++ | st.d RC, L->top ++ | li_vmstate C ++ if (op == BC_FUNCCW) { ++ | ld.d CARG2, CFUNC:RB->f ++ } ++ | or CARG1, L, r0 ++ | bnez AT, ->vm_growstack_c // Need to grow stack. ++ | st_vmstate ++ | jirl r1, CFUNCADDR, 0 // (lua_State *L [, lua_CFunction f]) ++ | // Returns nresults. ++ | ld.d BASE, L->base ++ | slli.w RD, CRET1, 3 ++ | ld.d TMP1, L->top ++ | li_vmstate INTERP ++ | ld.d PC, FRAME_PC(BASE) // Fetch PC of caller. ++ | sub.d RA, TMP1, RD // RA = L->top - nresults*8 ++ | .STXD L, DISPATCH, DISPATCH_GL(cur_L) ++ | st_vmstate ++ | beq r0, r0, ->vm_returnc ++ break; ++ ++ /* ---------------------------------------------------------------------- */ ++ ++ default: ++ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); ++ exit(2); ++ break; ++ } ++} ++ ++static int build_backend(BuildCtx *ctx) ++{ ++ int op; ++ ++ dasm_growpc(Dst, BC__MAX); ++ ++ build_subroutines(ctx); ++ ++ |.code_op ++ for (op = 0; op < BC__MAX; op++) ++ build_ins(ctx, (BCOp)op, op); ++ ++ return BC__MAX; ++} ++ ++/* Emit pseudo frame-info for all assembler functions. */ ++static void emit_asm_debug(BuildCtx *ctx) ++{ ++ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); ++ int i; ++ switch (ctx->mode) { ++ case BUILD_elfasm: ++ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); ++ fprintf(ctx->fp, ++ ".Lframe0:\n" ++ "\t.4byte .LECIE0-.LSCIE0\n" ++ ".LSCIE0:\n" ++ "\t.4byte 0xffffffff\n" ++ "\t.byte 0x1\n" ++ "\t.string \"\"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -4\n" ++ "\t.byte 31\n" ++ "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" ++ "\t.align 2\n" ++ ".LECIE0:\n\n"); ++ fprintf(ctx->fp, ++ ".LSFDE0:\n" ++ "\t.4byte .LEFDE0-.LASFDE0\n" ++ ".LASFDE0:\n" ++ "\t.4byte .Lframe0\n" ++ "\t.8byte .Lbegin\n" ++ "\t.8byte %d\n" ++ "\t.byte 0xe\n\t.uleb128 %d\n" ++ "\t.byte 0x9f\n\t.sleb128 2*5\n" ++ "\t.byte 0x9e\n\t.sleb128 2*6\n", ++ fcofs, CFRAME_SIZE); ++ for (i = 23; i >= 16; i--) ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(30-i)); ++#if !LJ_SOFTFP ++ for (i = 31; i >= 24; i--) ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(46-i)); ++#endif ++ fprintf(ctx->fp, ++ "\t.align 2\n" ++ ".LEFDE0:\n\n"); ++#if LJ_HASFFI ++ fprintf(ctx->fp, ++ ".LSFDE1:\n" ++ "\t.4byte .LEFDE1-.LASFDE1\n" ++ ".LASFDE1:\n" ++ "\t.4byte .Lframe0\n" ++ "\t.4byte lj_vm_ffi_call\n" ++ "\t.4byte %d\n" ++ "\t.byte 0x9f\n\t.uleb128 2*1\n" ++ "\t.byte 0x90\n\t.uleb128 2*2\n" ++ "\t.byte 0xd\n\t.uleb128 0x10\n" ++ "\t.align 2\n" ++ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); ++#endif ++#if !LJ_NO_UNWIND ++ /* NYI */ ++#endif ++ break; ++ default: ++ break; ++ } ++} ++ +diff --git a/libs/luajit/configure b/libs/luajit/configure +index 7d798f8..ea7d3c1 100755 +--- a/libs/luajit/configure ++++ b/libs/luajit/configure +@@ -15094,6 +15094,11 @@ then : + echo '-D__AARCH64EB__=1' >>native_flags + fi + ++elif grep 'LJ_TARGET_LOONGARCH64 ' conftest.i >/dev/null 2>&1 ++then : ++ LJARCH=loongarch64 ++ echo '-D__loongarch__=1' >>native_flags ++ + elif grep 'LJ_TARGET_PPC ' conftest.i >/dev/null 2>&1 + then : + LJARCH=ppc +diff --git a/libs/luajit/m4/lj-system.m4 b/libs/luajit/m4/lj-system.m4 +index 73ba282..7005664 100644 +--- a/libs/luajit/m4/lj-system.m4 ++++ b/libs/luajit/m4/lj-system.m4 +@@ -29,6 +29,9 @@ AS_IF([grep 'LJ_TARGET_X64 ' conftest.i >/dev/null 2>&1], + AS_IF([grep '__AARCH64EB__' conftest.i >/dev/null 2>&1], + [echo '-D__AARCH64EB__=1' >>native_flags]) + ], ++ [grep 'LJ_TARGET_LOONGARCH64 ' conftest.i >/dev/null 2>&1], ++ [LJARCH=loongarch64 ++ ], + [grep 'LJ_TARGET_PPC ' conftest.i >/dev/null 2>&1], + [LJARCH=ppc + AS_IF([grep 'LJ_LE 1' conftest.i >/dev/null 2>&1], diff --git a/thunderbird/PKGBUILD b/thunderbird/PKGBUILD index 59e53e379b..8e6ad4a43a 100644 --- a/thunderbird/PKGBUILD +++ b/thunderbird/PKGBUILD @@ -55,13 +55,14 @@ depends=( ) makedepends=( unzip zip diffutils python nasm mesa libpulse libice libsm - rust clang llvm cbindgen nodejs lld + rust clang llvm cbindgen nodejs #lld gawk perl findutils libotr wasi-compiler-rt wasi-libc wasi-libc++ wasi-libc++abi ) options=(!emptydirs !makeflags !lto) source=(https://archive.mozilla.org/pub/thunderbird/releases/$pkgver/source/thunderbird-$pkgver.source.tar.xz{,.asc} vendor-prefs.js distribution.ini + firefox-115-loong.patch mozconfig.cfg metainfo.patch org.mozilla.Thunderbird.desktop @@ -119,6 +120,10 @@ build() { # malloc_usable_size is used in various parts of the codebase CFLAGS="${CFLAGS/_FORTIFY_SOURCE=3/_FORTIFY_SOURCE=2}" CXXFLAGS="${CXXFLAGS/_FORTIFY_SOURCE=3/_FORTIFY_SOURCE=2}" + CFLAGS=${CFLAGS/-mlsx /} + CXXFLAGS=${CXXFLAGS/-mlsx /} + CFLAGS=${CFLAGS/-fstack-clash-protection/} + CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/} ./mach configure ./mach build @@ -261,7 +266,8 @@ sha512sums=('de9edb81cf5da494101bf927a5b963ccdec0cc9bff87ebd72d896c6e25102c1113b 'SKIP' '6918c0de63deeddc6f53b9ba331390556c12e0d649cf54587dfaabb98b32d6a597b63cf02809c7c58b15501720455a724d527375a8fb9d757ccca57460320734' '5cd3ac4c94ef6dcce72fba02bc18b771a2f67906ff795e0e3d71ce7db6d8a41165bd5443908470915bdbdb98dddd9cf3f837c4ba3a36413f55ec570e6efdbb9f' - 'a34dd97954f415a5ffe956ca1f10718bd164950566ceba328805c2ccbb54ed9081df07f2e063479bf932c4a443bb5b7443cca2f82eea3914465ed6e4863e0c0e' + '0c1c085147db0569ec0365b9250e6b8181fe3ffbe6c22e1b5d752370eaa8d18425dfab612a906d10d5211394d232d9ee1a563b9d8d89d8f2105a4303f953eb94' + '702f1e889ec75e454245a46e485a554c51af1be94bdc0eeb42c466c5baee48106f41d5769f1f09888ad5bbe2db113bfbefbbea34111d6e2566126bfdb34d50b0' '7e43b1f25827ddae615ad43fc1e11c6ba439d6c2049477dfe60e00188a70c0a76160c59a97cc01d1fd99c476f261c7cecb57628b5be48874be7cf991c22db290' 'fffeb73e2055408c5598439b0214b3cb3bb4e53dac3090b880a55f64afcbc56ba5d32d1187829a08ef06d592513d158ced1fde2f20e2f01e967b5fbd3b2fafd4' '9897cb0ababc8e1a0001c4e1f70e0b39f5cdb9c08c69e3afd42088dfd001aa1fc6996cd83df0db1fb57ee0a80686c35c8df783108408dbe9191602cddd1e3c65' diff --git a/thunderbird/firefox-115-loong.patch b/thunderbird/firefox-115-loong.patch new file mode 100644 index 0000000000..ad71fd595f --- /dev/null +++ b/thunderbird/firefox-115-loong.patch @@ -0,0 +1,619 @@ +From 3751c1f6f1a0781eb35d65595773f7a251c5f319 Mon Sep 17 00:00:00 2001 +From: Kay Lin +Date: Wed, 20 Sep 2023 03:03:37 -0700 +Subject: [PATCH] Add support for LoongArch + +Adapted from LoongArchLinux. + +Co-Authored-By: loongson +Co-Authored-By: WANG Xuerui # rebased for 115esr branch +--- + ipc/chromium/src/build/build_config.h | 3 + + third_party/libwebrtc/build/build_config.h | 4 + + .../rust/authenticator/.cargo-checksum.json | 2 +- + third_party/rust/authenticator/build.rs | 2 + + .../src/transport/linux/hidwrapper.rs | 3 + + .../src/transport/linux/ioctl_loongarch64.rs | 5 + + third_party/rust/cty/.cargo-checksum.json | 2 +- + third_party/rust/cty/src/lib.rs | 1 + + third_party/rust/nix/.cargo-checksum.json | 2 +- + third_party/rust/nix/src/sys/ioctl/linux.rs | 1 + + .../telemetry/pingsender/pingsender.cpp | 1 + + toolkit/moz.configure | 2 +- + xpcom/reflect/xptcall/md/unix/moz.build | 8 + + .../md/unix/xptcinvoke_asm_loongarch64.S | 91 ++++++++++ + .../md/unix/xptcinvoke_loongarch64.cpp | 100 +++++++++++ + .../md/unix/xptcstubs_asm_loongarch64.S | 52 ++++++ + .../xptcall/md/unix/xptcstubs_loongarch64.cpp | 159 ++++++++++++++++++ + 17 files changed, 434 insertions(+), 4 deletions(-) + create mode 100644 third_party/rust/authenticator/src/transport/linux/ioctl_loongarch64.rs + create mode 100644 xpcom/reflect/xptcall/md/unix/xptcinvoke_asm_loongarch64.S + create mode 100644 xpcom/reflect/xptcall/md/unix/xptcinvoke_loongarch64.cpp + create mode 100644 xpcom/reflect/xptcall/md/unix/xptcstubs_asm_loongarch64.S + create mode 100644 xpcom/reflect/xptcall/md/unix/xptcstubs_loongarch64.cpp + +diff --git a/ipc/chromium/src/build/build_config.h b/ipc/chromium/src/build/build_config.h +index 511f36858c034..1104c4f4665ae 100644 +--- a/ipc/chromium/src/build/build_config.h ++++ b/ipc/chromium/src/build/build_config.h +@@ -126,6 +126,9 @@ + # define ARCH_CPU_ARM_FAMILY 1 + # define ARCH_CPU_ARM64 1 + # define ARCH_CPU_64_BITS 1 ++#elif defined(__loongarch_lp64) ++# define ARCH_CPU_LOONGARCH64 1 ++# define ARCH_CPU_64_BITS 1 + #elif defined(__riscv) && __riscv_xlen == 64 + # define ARCH_CPU_RISCV64 1 + # define ARCH_CPU_64_BITS 1 +diff --git a/third_party/libwebrtc/build/build_config.h b/third_party/libwebrtc/build/build_config.h +index c39ae9da50f99..28191de02654b 100644 +--- a/third_party/libwebrtc/build/build_config.h ++++ b/third_party/libwebrtc/build/build_config.h +@@ -210,6 +210,10 @@ + #define ARCH_CPU_SPARC 1 + #define ARCH_CPU_32_BITS 1 + #define ARCH_CPU_BIG_ENDIAN 1 ++#elif defined(__loongarch_lp64) ++#define ARCH_CPU_LOONGARCH64 1 ++#define ARCH_CPU_64_BITS 1 ++#define ARCH_CPU_LITTLE_ENDIAN 1 + #else + #error Please add support for your architecture in build/build_config.h + #endif +diff --git a/third_party/rust/authenticator/.cargo-checksum.json b/third_party/rust/authenticator/.cargo-checksum.json +index 080c46c4c00c0..3f93dda5cb1f3 100644 +--- a/third_party/rust/authenticator/.cargo-checksum.json ++++ b/third_party/rust/authenticator/.cargo-checksum.json +@@ -1 +1 @@ +-{"files":{"Cargo.lock":"803a1ca7735f93e1d952a07291a6976db787b6530bc67f9e3d2ae2dcaf8a90cc","Cargo.toml":"e8f07adde7f2c71a96cbe3809ab605a9082b8ccaf8d2a69aacb6d5db90fddcdc","Cross.toml":"8d132da818d48492aa9f4b78a348f0df3adfae45d988d42ebd6be8a5adadb6c3","LICENSE":"e866c8f5864d4cacfe403820e722e9dc03fe3c7565efa5e4dad9051d827bb92a","README.md":"c87d9c7cc44f1dd4ef861a3a9f8cd2eb68aedd3814768871f5fb63c2070806cd","build.rs":"01092254718e4cd5d6bffcd64d55cc3240dc00e79f3d7344a5dc4abf6c27bca6","examples/ctap2.rs":"51709e50dd23477f6f91225c09fca08824a00abdc851727b2f3bd9dcd746378e","examples/ctap2_discoverable_creds.rs":"952207c39bad1995998c686f99fbca39268e930099b0086a09adeb5d12931df6","examples/interactive_management.rs":"27d2578fca7672477584bb3a74db182295c85e4aa6ae2d8edfd849fc0018c413","examples/reset.rs":"b13d3a2ed3544018ede8660ec0cc79732139e792d4e55c2c6fb517ad376b36ad","examples/set_pin.rs":"991d9bd66fd6bdd9dd8627ed710fe100a3dfb65b968031f768ee9a28e1e995d7","examples/test_exclude_list.rs":"20577d6887b00c99d2ae404e1b1f64c746ecc774bd2f9f0f8d1c5bb6a6f30292","rustfmt.toml":"ceb6615363d6fff16426eb56f5727f98a7f7ed459ba9af735b1d8b672e2c3b9b","src/authenticatorservice.rs":"dc756ae9d420dac187b04afbb4831527c12fa307ef072f1c1cb4480df9cbda5f","src/consts.rs":"44fb7c396dc87d1657d1feed08e956fc70608c0b06a034716b626419b442bcfe","src/crypto/dummy.rs":"9cc6be0dc1e28c7328121e7a4bf435211ae8b1455784472b24993571c4009579","src/crypto/mod.rs":"e4342dd93fd41bf48fa26386188ed92db5f908ad4d69f32f080a65228c6d5390","src/crypto/nss.rs":"2bf33898728760f194f204876450d0906b47907d259270f6e3d43c62a709c99a","src/crypto/openssl.rs":"ef6e4dbcc7230137e505e3fc4ad37e102e6b26b37470afd0f4709a297b3aa546","src/ctap2/attestation.rs":"e3c581154fb6bd4e4d8bd2326515864849b21766f5344e2d955d607b360fc930","src/ctap2/client_data.rs":"04ee84b34e91c988183871b4975fc08e12234965187c793ad26d0d82ed44642f","src/ctap2/commands/client_pin.rs":"7f3a49b23592e985b8f32d43688593ff7411a05cb594444e24851c13f093cdef","src/ctap2/commands/get_assertion.rs":"e9cd68cff2ee54156af6e3e424691a06354aafffcc374a40ccc9622f030c4999","src/ctap2/commands/get_info.rs":"79117c39d280445fb17be057af2f45ec1d80651ea1c8b478e07118ade808291b","src/ctap2/commands/get_next_assertion.rs":"8a8fa69cb4079a21ff4734067e74784b2bfee3c20ddcc0b35675ce77a3d83ae9","src/ctap2/commands/get_version.rs":"958c273c6156af102bba515de42e4a5ae43f36b4d2d1814d922c269c500f6ce2","src/ctap2/commands/make_credentials.rs":"524cb3378fcc2b08696ab25bf5473e149af307d18ef503a4ee971b4b7e087ff3","src/ctap2/commands/mod.rs":"916eb63b3e46968a9e79d088dd217c2b80dc1c4d14beaf12803e91b7987b6c32","src/ctap2/commands/reset.rs":"45500500c900124f96269679862ceeb18e87111096d322c87c766f2694e576fc","src/ctap2/commands/selection.rs":"7832d62bf37ddbbaf996d84f905c2cdca7dceb529c8f9f1fe82eb288da886068","src/ctap2/mod.rs":"5953ee33ee5930437f9d91299f8a6fdbc21bc62297ae4194901893ef0a5ac82a","src/ctap2/preflight.rs":"1cd41e948955a8bcb22a2e55e254dad1be74590b6016437914e93a2639222aef","src/ctap2/server.rs":"61e2afa1bc3ce1d61743073f14c1a385d064e5deed2b8a194e32e0ccbd4243ad","src/ctap2/utils.rs":"ad0aa36a0dbeb510b7f37789329f1957eab206eb529dc083e6176b142984e26e","src/errors.rs":"a99e5fbdad315ba1589b116fc227310996ef900498b595545228be35744b2038","src/lib.rs":"d42fc78ab81b6fdd66ebe35951a4395a3656f557795cff4c8bfcc54199cabfcd","src/manager.rs":"d72f8523d0a549487504ef6d370aee9132ad7436aaae777e6d65a0a03f3c0c27","src/statecallback.rs":"6b16f97176db1ae3fc3851fe8394e4ffc324bc6fe59313845ac3a88132fd52f1","src/statemachine.rs":"3b1b08efda156bc8c00bad27096a95177217ad77cb041530a03b8903ba51d7e0","src/status_update.rs":"d032524f2c36c5a32db9dd424decf4577cea65adceca91bb1dfcdc07c58289cb","src/transport/device_selector.rs":"c703aa8e59b0b7ac9d11be0aac434dffda8b0c91e1a84298c48e598978e1576e","src/transport/errors.rs":"5af7cb8d22ffa63bf4264d182a0f54b9b3a2cc9d19d832b3495857229f9a2875","src/transport/freebsd/device.rs":"f41c7cf29c48bf2b403cf460e6387864372a134d6daeefc5c3afc3f40d0d4575","src/transport/freebsd/mod.rs":"42dcb57fbeb00140003a8ad39acac9b547062b8f281a3fa5deb5f92a6169dde6","src/transport/freebsd/monitor.rs":"a6b34af4dd2e357a5775b1f3a723766107c11ef98dba859b1188ed08e0e450a2","src/transport/freebsd/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/freebsd/uhid.rs":"a194416a8bc5d428c337f8d96a2248769ca190810852bbe5ee686ab595d8eb4c","src/transport/hid.rs":"033e0f1bf6428a1d4077e5abb53dbfa193ef72dd8a98b7666d7b5fb45a6570f0","src/transport/hidproto.rs":"9d490f161807b75f4d7d5096355006627c1f47c0d90fca53bade3692efc92a2d","src/transport/linux/device.rs":"e79bd06d98723a0d7e4f25b7cf2ac3e0260b10e52d2b0695909d2932288e10a4","src/transport/linux/hidraw.rs":"c7a0df9b4e51cb2736218ffffa02b2b2547b7c515d69f9bae2c9a8c8f1cb547b","src/transport/linux/hidwrapper.h":"72785db3a9b27ea72b6cf13a958fee032af54304522d002f56322473978a20f9","src/transport/linux/hidwrapper.rs":"753c7459dbb73befdd186b6269ac33f7a4537b4c935928f50f2b2131756e787d","src/transport/linux/ioctl_aarch64le.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_armle.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_mips64le.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_mipsbe.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_mipsle.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_powerpc64be.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_powerpc64le.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_powerpcbe.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_riscv64.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_s390xbe.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_x86.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_x86_64.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/mod.rs":"446e435126d2a58f167f648dd95cba28e8ac9c17f1f799e1eaeab80ea800fc57","src/transport/linux/monitor.rs":"5e3ec2618dd74027ae6ca1527991254e3271cce59106d4920ce0414094e22f64","src/transport/linux/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/macos/device.rs":"f508d0585079ecf87a73d6135c52e8b5a887fbf16e241676d51a8099a8001a81","src/transport/macos/iokit.rs":"7dc4e7bbf8e42e2fcde0cee8e48d14d6234a5a910bd5d3c4e966d8ba6b73992f","src/transport/macos/mod.rs":"333e561554fc901d4f6092f6e4c85823e2b0c4ff31c9188d0e6d542b71a0a07c","src/transport/macos/monitor.rs":"e02288454bb4010e06b705d82646abddb3799f0cd655f574aa19f9d91485a4a2","src/transport/macos/transaction.rs":"9dcdebd13d5fd5a185b5ad777a80c825a6ba5e76b141c238aa115b451b9a72fa","src/transport/mock/device.rs":"582b2b55f13d95dd9f1127e3dde49d2137a5ca020f9c1fa1ffa5c4083d05c0e7","src/transport/mock/mod.rs":"9c4c87efd19adddc1a91c699a6c328063cfbac5531b76346a5ff92e986aded8f","src/transport/mock/transaction.rs":"be3ed8c389dfa04122364b82515edd76fad6f5d5f72d15cacd45a84fb8397292","src/transport/mod.rs":"e28d72b6f3fdaff21f940c4db213067cd94f5832f864ecaad1c9901d5aea9b79","src/transport/netbsd/device.rs":"a7dec83b5040faf1a8ddb37e9fc2b45b9b12814be4802b3b351eff081d1b80c3","src/transport/netbsd/fd.rs":"5464019025d03ea2a39c82f76b238bbbdb0ea63f5a5fc7c9d974e235139cd53b","src/transport/netbsd/mod.rs":"b1c52aa29537330cebe67427062d6c94871cab2a9b0c04b2305d686f07e88fd5","src/transport/netbsd/monitor.rs":"fb2917e4ba53cc9867987a539061f82d011f4c6e478df1157d965d32df2eb922","src/transport/netbsd/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/netbsd/uhid.rs":"d15be35e2413240066a8f086bb8846b08a6a92bf6a1941c3eec1329dd3a4f9ce","src/transport/openbsd/device.rs":"47d8dfeb12c33e6cada2b2cd76476827059c797d8a16f2c4aea6e78d32ebab46","src/transport/openbsd/mod.rs":"514274d414042ff84b3667a41a736e78581e22fda87ccc97c2bc05617e381a30","src/transport/openbsd/monitor.rs":"2e0ba6ecc69b450be9cbfd21a7c65036ed2ce593b12363596d3eae0b5bfb79e8","src/transport/openbsd/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/stub/device.rs":"aa21711d6690ed68bd878b28463172ba69c6324be7afabeccb1f07b4831cb020","src/transport/stub/mod.rs":"6a7fec504a52d403b0241b18cd8b95088a31807571f4c0a67e4055afc74f4453","src/transport/stub/transaction.rs":"c9a3ade9562468163f28fd51e7ff3e0bf5854b7edade9e987000d11c5d0e62d2","src/transport/windows/device.rs":"148b1572ed5fa8d476efbdb2a3a35608ec23012d6a805129f3c25c453bab4b7a","src/transport/windows/mod.rs":"218e7f2fe91ecb390c12bba5a5ffdad2c1f0b22861c937f4d386262e5b3dd617","src/transport/windows/monitor.rs":"95913d49e7d83482e420493d89b53ffceb6a49e646a87de934dff507b3092b4c","src/transport/windows/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/windows/winapi.rs":"b2a4cc85f14e39cadfbf068ee001c9d776f028d3cf09cb926d4364c5b437c112","src/u2fprotocol.rs":"e61ac223aab79ae82383cd32a23213d18461e229c448373bf2483357a9eae69e","src/u2ftypes.rs":"8511c6f04f69670ddd403178a46060644a27128ca4077a9a3e00bc6671e3864b","src/util.rs":"cf37c4c3caf6dde4fc3cf6f5f297ed3c0f13bcb50fb0e8955899fc837483ef31","src/virtualdevices/mod.rs":"2c7df7691d5c150757304241351612aed4260d65b70ab0f483edbc1a5cfb5674","src/virtualdevices/software_u2f.rs":"83e63c0c4a597e71d87b5cd1f33a49646d00b3062edbdd05c51623b80fb60168","src/virtualdevices/webdriver/mod.rs":"4a36e6dfa9f45f941d863b4039bfbcfa8eaca660bd6ed78aeb1a2962db64be5a","src/virtualdevices/webdriver/testtoken.rs":"7146e02f1a5dad2c8827dd11c12ee408c0e42a0706ac65f139998feffd42570f","src/virtualdevices/webdriver/virtualmanager.rs":"7205a0397833628fc0847aa942a6a314dc1e23306858b546053e0de6a360ebe1","src/virtualdevices/webdriver/web_api.rs":"9032525af458b6fe9a3274c36b6ef8c791ecc4ec46d38ae36583fc9a4535b59d","testing/cross/powerpc64le-unknown-linux-gnu.Dockerfile":"d7463ff4376e3e0ca3fed879fab4aa975c4c0a3e7924c5b88aef9381a5d013de","testing/cross/x86_64-unknown-linux-gnu.Dockerfile":"11c79c04b07a171b0c9b63ef75fa75f33263ce76e3c1eda0879a3e723ebd0c24","testing/run_cross.sh":"cc2a7e0359f210eba2e7121f81eb8ab0125cea6e0d0f2698177b0fe2ad0c33d8","webdriver-tools/requirements.txt":"8236aa3dedad886f213c9b778fec80b037212d30e640b458984110211d546005","webdriver-tools/webdriver-driver.py":"82327c26ba271d1689acc87b612ab8436cb5475f0a3c0dba7baa06e7f6f5e19c"},"package":"aa0e182b77b6b19eaf9c7b69fddf3be970169ec6d34eca3f5d682ab948727e57"} +\ No newline at end of file ++{"files":{"Cargo.lock":"803a1ca7735f93e1d952a07291a6976db787b6530bc67f9e3d2ae2dcaf8a90cc","Cargo.toml":"e8f07adde7f2c71a96cbe3809ab605a9082b8ccaf8d2a69aacb6d5db90fddcdc","Cross.toml":"8d132da818d48492aa9f4b78a348f0df3adfae45d988d42ebd6be8a5adadb6c3","LICENSE":"e866c8f5864d4cacfe403820e722e9dc03fe3c7565efa5e4dad9051d827bb92a","README.md":"c87d9c7cc44f1dd4ef861a3a9f8cd2eb68aedd3814768871f5fb63c2070806cd","build.rs":"5b909f42e52ed2056afa3693544ef1c1dc5e90d00e7d8730175a228bd0233b43","examples/ctap2.rs":"51709e50dd23477f6f91225c09fca08824a00abdc851727b2f3bd9dcd746378e","examples/ctap2_discoverable_creds.rs":"952207c39bad1995998c686f99fbca39268e930099b0086a09adeb5d12931df6","examples/interactive_management.rs":"27d2578fca7672477584bb3a74db182295c85e4aa6ae2d8edfd849fc0018c413","examples/reset.rs":"b13d3a2ed3544018ede8660ec0cc79732139e792d4e55c2c6fb517ad376b36ad","examples/set_pin.rs":"991d9bd66fd6bdd9dd8627ed710fe100a3dfb65b968031f768ee9a28e1e995d7","examples/test_exclude_list.rs":"20577d6887b00c99d2ae404e1b1f64c746ecc774bd2f9f0f8d1c5bb6a6f30292","rustfmt.toml":"ceb6615363d6fff16426eb56f5727f98a7f7ed459ba9af735b1d8b672e2c3b9b","src/authenticatorservice.rs":"dc756ae9d420dac187b04afbb4831527c12fa307ef072f1c1cb4480df9cbda5f","src/consts.rs":"44fb7c396dc87d1657d1feed08e956fc70608c0b06a034716b626419b442bcfe","src/crypto/dummy.rs":"9cc6be0dc1e28c7328121e7a4bf435211ae8b1455784472b24993571c4009579","src/crypto/mod.rs":"e4342dd93fd41bf48fa26386188ed92db5f908ad4d69f32f080a65228c6d5390","src/crypto/nss.rs":"2bf33898728760f194f204876450d0906b47907d259270f6e3d43c62a709c99a","src/crypto/openssl.rs":"ef6e4dbcc7230137e505e3fc4ad37e102e6b26b37470afd0f4709a297b3aa546","src/ctap2/attestation.rs":"e3c581154fb6bd4e4d8bd2326515864849b21766f5344e2d955d607b360fc930","src/ctap2/client_data.rs":"04ee84b34e91c988183871b4975fc08e12234965187c793ad26d0d82ed44642f","src/ctap2/commands/client_pin.rs":"7f3a49b23592e985b8f32d43688593ff7411a05cb594444e24851c13f093cdef","src/ctap2/commands/get_assertion.rs":"e9cd68cff2ee54156af6e3e424691a06354aafffcc374a40ccc9622f030c4999","src/ctap2/commands/get_info.rs":"79117c39d280445fb17be057af2f45ec1d80651ea1c8b478e07118ade808291b","src/ctap2/commands/get_next_assertion.rs":"8a8fa69cb4079a21ff4734067e74784b2bfee3c20ddcc0b35675ce77a3d83ae9","src/ctap2/commands/get_version.rs":"958c273c6156af102bba515de42e4a5ae43f36b4d2d1814d922c269c500f6ce2","src/ctap2/commands/make_credentials.rs":"524cb3378fcc2b08696ab25bf5473e149af307d18ef503a4ee971b4b7e087ff3","src/ctap2/commands/mod.rs":"916eb63b3e46968a9e79d088dd217c2b80dc1c4d14beaf12803e91b7987b6c32","src/ctap2/commands/reset.rs":"45500500c900124f96269679862ceeb18e87111096d322c87c766f2694e576fc","src/ctap2/commands/selection.rs":"7832d62bf37ddbbaf996d84f905c2cdca7dceb529c8f9f1fe82eb288da886068","src/ctap2/mod.rs":"5953ee33ee5930437f9d91299f8a6fdbc21bc62297ae4194901893ef0a5ac82a","src/ctap2/preflight.rs":"1cd41e948955a8bcb22a2e55e254dad1be74590b6016437914e93a2639222aef","src/ctap2/server.rs":"61e2afa1bc3ce1d61743073f14c1a385d064e5deed2b8a194e32e0ccbd4243ad","src/ctap2/utils.rs":"ad0aa36a0dbeb510b7f37789329f1957eab206eb529dc083e6176b142984e26e","src/errors.rs":"a99e5fbdad315ba1589b116fc227310996ef900498b595545228be35744b2038","src/lib.rs":"d42fc78ab81b6fdd66ebe35951a4395a3656f557795cff4c8bfcc54199cabfcd","src/manager.rs":"d72f8523d0a549487504ef6d370aee9132ad7436aaae777e6d65a0a03f3c0c27","src/statecallback.rs":"6b16f97176db1ae3fc3851fe8394e4ffc324bc6fe59313845ac3a88132fd52f1","src/statemachine.rs":"3b1b08efda156bc8c00bad27096a95177217ad77cb041530a03b8903ba51d7e0","src/status_update.rs":"d032524f2c36c5a32db9dd424decf4577cea65adceca91bb1dfcdc07c58289cb","src/transport/device_selector.rs":"c703aa8e59b0b7ac9d11be0aac434dffda8b0c91e1a84298c48e598978e1576e","src/transport/errors.rs":"5af7cb8d22ffa63bf4264d182a0f54b9b3a2cc9d19d832b3495857229f9a2875","src/transport/freebsd/device.rs":"f41c7cf29c48bf2b403cf460e6387864372a134d6daeefc5c3afc3f40d0d4575","src/transport/freebsd/mod.rs":"42dcb57fbeb00140003a8ad39acac9b547062b8f281a3fa5deb5f92a6169dde6","src/transport/freebsd/monitor.rs":"a6b34af4dd2e357a5775b1f3a723766107c11ef98dba859b1188ed08e0e450a2","src/transport/freebsd/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/freebsd/uhid.rs":"a194416a8bc5d428c337f8d96a2248769ca190810852bbe5ee686ab595d8eb4c","src/transport/hid.rs":"033e0f1bf6428a1d4077e5abb53dbfa193ef72dd8a98b7666d7b5fb45a6570f0","src/transport/hidproto.rs":"9d490f161807b75f4d7d5096355006627c1f47c0d90fca53bade3692efc92a2d","src/transport/linux/device.rs":"e79bd06d98723a0d7e4f25b7cf2ac3e0260b10e52d2b0695909d2932288e10a4","src/transport/linux/hidraw.rs":"c7a0df9b4e51cb2736218ffffa02b2b2547b7c515d69f9bae2c9a8c8f1cb547b","src/transport/linux/hidwrapper.h":"72785db3a9b27ea72b6cf13a958fee032af54304522d002f56322473978a20f9","src/transport/linux/hidwrapper.rs":"d203e8804e7632b8d47a224c186d1f431800f04ddc43360d5c086f71e9b0f674","src/transport/linux/ioctl_aarch64le.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_armle.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_mips64le.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_mipsbe.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_mipsle.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_powerpc64be.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_powerpc64le.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_powerpcbe.rs":"fbda309934ad8bda689cd4fb5c0ca696fe26dedb493fe9d5a5322c3047d474fd","src/transport/linux/ioctl_riscv64.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_s390xbe.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_x86.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_x86_64.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/ioctl_loongarch64.rs":"2d8b265cd39a9f46816f83d5a5df0701c13eb842bc609325bad42ce50add3bf0","src/transport/linux/mod.rs":"446e435126d2a58f167f648dd95cba28e8ac9c17f1f799e1eaeab80ea800fc57","src/transport/linux/monitor.rs":"5e3ec2618dd74027ae6ca1527991254e3271cce59106d4920ce0414094e22f64","src/transport/linux/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/macos/device.rs":"f508d0585079ecf87a73d6135c52e8b5a887fbf16e241676d51a8099a8001a81","src/transport/macos/iokit.rs":"7dc4e7bbf8e42e2fcde0cee8e48d14d6234a5a910bd5d3c4e966d8ba6b73992f","src/transport/macos/mod.rs":"333e561554fc901d4f6092f6e4c85823e2b0c4ff31c9188d0e6d542b71a0a07c","src/transport/macos/monitor.rs":"e02288454bb4010e06b705d82646abddb3799f0cd655f574aa19f9d91485a4a2","src/transport/macos/transaction.rs":"9dcdebd13d5fd5a185b5ad777a80c825a6ba5e76b141c238aa115b451b9a72fa","src/transport/mock/device.rs":"582b2b55f13d95dd9f1127e3dde49d2137a5ca020f9c1fa1ffa5c4083d05c0e7","src/transport/mock/mod.rs":"9c4c87efd19adddc1a91c699a6c328063cfbac5531b76346a5ff92e986aded8f","src/transport/mock/transaction.rs":"be3ed8c389dfa04122364b82515edd76fad6f5d5f72d15cacd45a84fb8397292","src/transport/mod.rs":"e28d72b6f3fdaff21f940c4db213067cd94f5832f864ecaad1c9901d5aea9b79","src/transport/netbsd/device.rs":"a7dec83b5040faf1a8ddb37e9fc2b45b9b12814be4802b3b351eff081d1b80c3","src/transport/netbsd/fd.rs":"5464019025d03ea2a39c82f76b238bbbdb0ea63f5a5fc7c9d974e235139cd53b","src/transport/netbsd/mod.rs":"b1c52aa29537330cebe67427062d6c94871cab2a9b0c04b2305d686f07e88fd5","src/transport/netbsd/monitor.rs":"fb2917e4ba53cc9867987a539061f82d011f4c6e478df1157d965d32df2eb922","src/transport/netbsd/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/netbsd/uhid.rs":"d15be35e2413240066a8f086bb8846b08a6a92bf6a1941c3eec1329dd3a4f9ce","src/transport/openbsd/device.rs":"47d8dfeb12c33e6cada2b2cd76476827059c797d8a16f2c4aea6e78d32ebab46","src/transport/openbsd/mod.rs":"514274d414042ff84b3667a41a736e78581e22fda87ccc97c2bc05617e381a30","src/transport/openbsd/monitor.rs":"2e0ba6ecc69b450be9cbfd21a7c65036ed2ce593b12363596d3eae0b5bfb79e8","src/transport/openbsd/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/stub/device.rs":"aa21711d6690ed68bd878b28463172ba69c6324be7afabeccb1f07b4831cb020","src/transport/stub/mod.rs":"6a7fec504a52d403b0241b18cd8b95088a31807571f4c0a67e4055afc74f4453","src/transport/stub/transaction.rs":"c9a3ade9562468163f28fd51e7ff3e0bf5854b7edade9e987000d11c5d0e62d2","src/transport/windows/device.rs":"148b1572ed5fa8d476efbdb2a3a35608ec23012d6a805129f3c25c453bab4b7a","src/transport/windows/mod.rs":"218e7f2fe91ecb390c12bba5a5ffdad2c1f0b22861c937f4d386262e5b3dd617","src/transport/windows/monitor.rs":"95913d49e7d83482e420493d89b53ffceb6a49e646a87de934dff507b3092b4c","src/transport/windows/transaction.rs":"ec28475a70dded260f9a7908c7f88dd3771f5d64b9a5dda835411d13b713c39a","src/transport/windows/winapi.rs":"b2a4cc85f14e39cadfbf068ee001c9d776f028d3cf09cb926d4364c5b437c112","src/u2fprotocol.rs":"e61ac223aab79ae82383cd32a23213d18461e229c448373bf2483357a9eae69e","src/u2ftypes.rs":"8511c6f04f69670ddd403178a46060644a27128ca4077a9a3e00bc6671e3864b","src/util.rs":"cf37c4c3caf6dde4fc3cf6f5f297ed3c0f13bcb50fb0e8955899fc837483ef31","src/virtualdevices/mod.rs":"2c7df7691d5c150757304241351612aed4260d65b70ab0f483edbc1a5cfb5674","src/virtualdevices/software_u2f.rs":"83e63c0c4a597e71d87b5cd1f33a49646d00b3062edbdd05c51623b80fb60168","src/virtualdevices/webdriver/mod.rs":"4a36e6dfa9f45f941d863b4039bfbcfa8eaca660bd6ed78aeb1a2962db64be5a","src/virtualdevices/webdriver/testtoken.rs":"7146e02f1a5dad2c8827dd11c12ee408c0e42a0706ac65f139998feffd42570f","src/virtualdevices/webdriver/virtualmanager.rs":"7205a0397833628fc0847aa942a6a314dc1e23306858b546053e0de6a360ebe1","src/virtualdevices/webdriver/web_api.rs":"9032525af458b6fe9a3274c36b6ef8c791ecc4ec46d38ae36583fc9a4535b59d","testing/cross/powerpc64le-unknown-linux-gnu.Dockerfile":"d7463ff4376e3e0ca3fed879fab4aa975c4c0a3e7924c5b88aef9381a5d013de","testing/cross/x86_64-unknown-linux-gnu.Dockerfile":"11c79c04b07a171b0c9b63ef75fa75f33263ce76e3c1eda0879a3e723ebd0c24","testing/run_cross.sh":"cc2a7e0359f210eba2e7121f81eb8ab0125cea6e0d0f2698177b0fe2ad0c33d8","webdriver-tools/requirements.txt":"8236aa3dedad886f213c9b778fec80b037212d30e640b458984110211d546005","webdriver-tools/webdriver-driver.py":"82327c26ba271d1689acc87b612ab8436cb5475f0a3c0dba7baa06e7f6f5e19c"},"package":"aa0e182b77b6b19eaf9c7b69fddf3be970169ec6d34eca3f5d682ab948727e57"} +diff --git a/third_party/rust/authenticator/build.rs b/third_party/rust/authenticator/build.rs +index 58f6cfa393aaa..acc4f09466f7d 100644 +--- a/third_party/rust/authenticator/build.rs ++++ b/third_party/rust/authenticator/build.rs +@@ -47,6 +47,8 @@ fn main() { + "ioctl_s390xbe.rs" + } else if cfg!(all(target_arch = "riscv64", target_endian = "little")) { + "ioctl_riscv64.rs" ++ } else if cfg!(all(target_arch = "loongarch64", target_endian = "little")) { ++ "ioctl_loongarch64.rs" + } else { + panic!("architecture not supported"); + }; +diff --git a/third_party/rust/authenticator/src/transport/linux/hidwrapper.rs b/third_party/rust/authenticator/src/transport/linux/hidwrapper.rs +index 82aabc6301017..bc8582c5b1491 100644 +--- a/third_party/rust/authenticator/src/transport/linux/hidwrapper.rs ++++ b/third_party/rust/authenticator/src/transport/linux/hidwrapper.rs +@@ -49,3 +49,6 @@ include!("ioctl_s390xbe.rs"); + + #[cfg(all(target_arch = "riscv64", target_endian = "little"))] + include!("ioctl_riscv64.rs"); ++ ++#[cfg(all(target_arch = "loongarch64", target_endian = "little"))] ++include!("ioctl_loongarch64.rs"); +diff --git a/third_party/rust/authenticator/src/transport/linux/ioctl_loongarch64.rs b/third_party/rust/authenticator/src/transport/linux/ioctl_loongarch64.rs +new file mode 100644 +index 0000000000000..a784e9bf4600b +--- /dev/null ++++ b/third_party/rust/authenticator/src/transport/linux/ioctl_loongarch64.rs +@@ -0,0 +1,5 @@ ++/* automatically generated by rust-bindgen */ ++ ++pub type __u32 = ::std::os::raw::c_uint; ++pub const _HIDIOCGRDESCSIZE: __u32 = 2147764225; ++pub const _HIDIOCGRDESC: __u32 = 2416199682; +diff --git a/third_party/rust/cty/.cargo-checksum.json b/third_party/rust/cty/.cargo-checksum.json +index 902714f58a741..3e65dbf70a232 100644 +--- a/third_party/rust/cty/.cargo-checksum.json ++++ b/third_party/rust/cty/.cargo-checksum.json +@@ -1 +1 @@ +-{"files":{"CHANGELOG.md":"077c738b5f2c05d66a12209edaabca887091db727d61164a7a414da23d8bf08f","Cargo.toml":"94a517ea6c7dad4634a9a2bd356f3a8035927e7ff8367bd5a975b4db4ccf8e6e","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"035e70219855119df4273b3c5b97543ae82e0dd60c520416e759107c602f651b","README.md":"19687c50697e6abc991e4c35e6d674db56bd5e5ae0d5b833440718f8f154a74d","ci/install.sh":"9b34273d9e79ec59f63d5e3e7aea27b0db66194667f9730a21158740fa1b99f1","ci/script.sh":"54962430ca4d3528e5c0d44ff590b1504be13147db5cbe8bb82f1358528ef5f3","src/lib.rs":"33a38ce6df718fb0191f34ff2cefdcbd3ad9d93f0c4073b78eaf937c07fb7614"},"package":"b365fabc795046672053e29c954733ec3b05e4be654ab130fe8f1f94d7051f35"} +\ No newline at end of file ++{"files":{"CHANGELOG.md":"077c738b5f2c05d66a12209edaabca887091db727d61164a7a414da23d8bf08f","Cargo.toml":"94a517ea6c7dad4634a9a2bd356f3a8035927e7ff8367bd5a975b4db4ccf8e6e","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"035e70219855119df4273b3c5b97543ae82e0dd60c520416e759107c602f651b","README.md":"19687c50697e6abc991e4c35e6d674db56bd5e5ae0d5b833440718f8f154a74d","ci/install.sh":"9b34273d9e79ec59f63d5e3e7aea27b0db66194667f9730a21158740fa1b99f1","ci/script.sh":"54962430ca4d3528e5c0d44ff590b1504be13147db5cbe8bb82f1358528ef5f3","src/lib.rs":"3e9ec28a0d13cfb47546e044b8fc3a32007f7c76994704c4164c4430a7167e39"},"package":"b365fabc795046672053e29c954733ec3b05e4be654ab130fe8f1f94d7051f35"} +diff --git a/third_party/rust/cty/src/lib.rs b/third_party/rust/cty/src/lib.rs +index 971c9cb3a9e04..80b8f3f291716 100644 +--- a/third_party/rust/cty/src/lib.rs ++++ b/third_party/rust/cty/src/lib.rs +@@ -24,6 +24,7 @@ pub use pwd::*; + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "s390x", ++ target_arch = "loongarch64", + target_arch = "riscv32", + target_arch = "riscv64"))] + mod ad { +diff --git a/third_party/rust/nix/.cargo-checksum.json b/third_party/rust/nix/.cargo-checksum.json +index f4c932b88926b..b7b9c9f3c9a89 100644 +--- a/third_party/rust/nix/.cargo-checksum.json ++++ b/third_party/rust/nix/.cargo-checksum.json +@@ -1 +1 @@ +-{"files":{"CHANGELOG.md":"8ee4e556e53d1b39400a48675d3ecff0bf27e419accab7ca3be76ab934289548","Cargo.toml":"2e6eff9170182f107188b8bc9802efd044ef47178afc7f138950ecff1c1ceb96","LICENSE":"66e3ee1fa7f909ad3c612d556f2a0cdabcd809ad6e66f3b0605015ac64841b70","README.md":"1ed9a0e26ae6e575b3262ae734dd02889455593b761ee62403ea5a64104f3c9c","src/dir.rs":"0280a2dc480bd913f24ed84fbe26569fa2e8eefa660e5ad7c21e05fc34c14d16","src/env.rs":"028bc5e20139ebba418a655a2978a53335dc7680bf1de43d2c8333dd72cfa5c4","src/errno.rs":"e55d075858e349d9afea9ce0480f7fb7ba4dccccf0694fd7b3280b918836203c","src/fcntl.rs":"ea8f43d8fec0b6c3b7d903333e4c1ce85611684a4afd561c55cfe4b61a979e94","src/features.rs":"5b4a0831e5f4b79a6f0e42ed052fd66c875da18959750be51e41fb59ac19feed","src/ifaddrs.rs":"377865eb48040d28c392a1aec0221320108e3392ea285d23405ae2cfa5c54b20","src/kmod.rs":"c818ced08d55ae36fdf82fa914ba856b688e37234d574d3faa37128211d512fb","src/lib.rs":"a62fac2ba7111157c5b64251f67f8a189f04bd587d5c80703454a596ea7ae5d9","src/macros.rs":"e23d7d8be22ef0bf9febaaf2739585453103607c0139bd3995a324e4a16d011e","src/mount/bsd.rs":"4cf35606a63d7ca41caac3b38f01e2b70c63e71978c0529f19fc79182629dbe0","src/mount/linux.rs":"6e5d61788dedf1ca4416c6c6a3a9c6c747f9352c26d863f4a1d4142e288584d6","src/mount/mod.rs":"ba9f60eb831224ab73bdd87e00e15d13b9ce9efb70b18bf8f3fe60406d522b3e","src/mqueue.rs":"ed0a189036b2437b5f7f7f1312fa545540b06ca72171b451d8bce42cc3627534","src/net/if_.rs":"b32a8a1f952de60d95e549779a5c673fd72aa665e86bfdfc8ec6badf3016b9b1","src/net/mod.rs":"577f70170e53d4a6de1abb70bf8f1031ec3e65c0e63ef5fcf05c907125e7ac17","src/poll.rs":"2fc1d144fb40db51811c6357b520ab7993529702d8f0d8060c903118ff4f7259","src/pty.rs":"27b4f76c23acf02542674017067fee74cdcac907338458700a1aa4d6f6a62e27","src/sched.rs":"403aa5ebed81910263d42a94717612b737550bf053227b7d90f1c8949188d919","src/sys/aio.rs":"ae091de8540c97da374a39e7d154c1b3ce50f41e6fc20a45c6b06eb838e74366","src/sys/epoll.rs":"28e22debf474d1b047e8044a00b354c25dab2fa125960f9f2f14cc34289fd5c9","src/sys/event.rs":"dbd8e84bccb813839295b0a336485783ef19548d2317931f0ceb5ee62f839a40","src/sys/eventfd.rs":"c8db8f5874726fdad289ad5e2603a7d71a1ae5a899dcde3a35d3edff8b498b7e","src/sys/inotify.rs":"5b4da774313afa9c28c3f92f9d07dce9bf4c8d044fd6a16f19480e79a19e808b","src/sys/ioctl/bsd.rs":"bbd02e30b0a78c1cb22777d9b00cfcbba9c68505cffc06118ac68474cf6fea39","src/sys/ioctl/linux.rs":"028181834d119b834bf399f2b8a6176cc57e75144693f28f32059d087d8c8018","src/sys/ioctl/mod.rs":"89b20579476b2e0254e0ecb1b41830cccd7027a22cbdb816a9d4ec3924842ac1","src/sys/memfd.rs":"f58d7fbe67c4b994832d72f5fbd59c136c8f1ae88ea8b0bc1c099db2d847ee6c","src/sys/mman.rs":"17df1bc34ba92bdd6bad1e11e4ef139998117f6c468c8f560421858f3cc899a5","src/sys/mod.rs":"baabf649f758ad4acce849ec1795dd4e4f9c6539e677bad5fa777300a4871dcb","src/sys/personality.rs":"aa89760c023bfec3fca5d8636f9eac9d337f5547933793ce6df7a0de97ae6ee1","src/sys/pthread.rs":"258cdf7ff0b61a4afa6d228109e4cb4fb88d859bb8dfe6c959d95130fb010906","src/sys/ptrace/bsd.rs":"4c590d8f023ff52f396f8b6f2150c08e5c9486d3088d9c173db33a70d616b800","src/sys/ptrace/linux.rs":"c82db3fb18aa97755f9ccb440a957cd46d664968a94045830c5d74d2d53bc19f","src/sys/ptrace/mod.rs":"e9e5d970097f5eafffba900959d4fdbf233bff9ed7f599fc9896bb44d86a57a4","src/sys/quota.rs":"02e698a25f0986fb43aa88689f3d3d8b9edc6ae48496ad02f7214fccaa493e00","src/sys/reboot.rs":"eacdf57694a6629fb05787e16450446102a62818274495f2ad4e445807d09221","src/sys/resource.rs":"d498d0c00fd30e35e1269a8902cb812014d813f63ec95364f8f59f1912ba5657","src/sys/select.rs":"65c39b129d3cc85b8ca026ff26dcf80c5639824f43715881c3c1bbb6bf0c8a60","src/sys/sendfile.rs":"7a62099f9771fecff49b9c11210341e3c1a4acf22f8dfb96d395e29421648676","src/sys/signal.rs":"c3e13a2edea54d190a4b051f62efc97953c00b5051a9fda0e39e3bc732a31939","src/sys/signalfd.rs":"583524434fd37143be3db37fa6f6cbd339f7946416f05b58a95e246947e5cc9d","src/sys/socket/addr.rs":"84df895052f59ec84774b189ffb285d2a37a9703af6c8310ae5040cca1a2583e","src/sys/socket/mod.rs":"6deb55438cad3606385303f036b0efd842dfd759fba93611911f5a4f2613c9dc","src/sys/socket/sockopt.rs":"ed1f920364bfe88bbe6eaeeefb27a63bfcdd7d67604aca2f03e22f2b502df55a","src/sys/stat.rs":"337dea8d55d6177dc85b3235b40b8a3e81af7f4a6e2806a0b2f730bec5424350","src/sys/statfs.rs":"17103659a85279bac046c69cb3b22bf2c11c2492cffb0edfa4c3b233d161a2f2","src/sys/statvfs.rs":"f81e3900ef90d62e7eceaf1b6ff8dcfd965466714c033eb4717687f692171f48","src/sys/sysinfo.rs":"b4519b1ca091c9dbe94d2a6fd6304944bf3df5626973d2c6884022559706f0d9","src/sys/termios.rs":"7923f9846a8122096b6b1cd240d3618b876ce500a751ac434954d172e2e85745","src/sys/time.rs":"9026033b60a5ccc95b70424aef043c8c748722e2ea8c7c86366ecd4585b651a0","src/sys/timer.rs":"8c10f0e7cfac857ad00460be30bc68b957909cc9296e70718d3b5d4a0babafde","src/sys/timerfd.rs":"ef7c48aefdcfac13316eeddbef5da04cf12e9f574b8d9f43402c02b6b8db86b3","src/sys/uio.rs":"e1d59ccbee9d46c65d3aa8c36aa3a3222539beea0d20163a8b707d08fca14e09","src/sys/utsname.rs":"0cdda0cc111caaa0e4ebe2d4588bdc825d878e5bcb7a9136073b15f87a20e11f","src/sys/wait.rs":"cc70d2d9b880ff6c48577a479c209af6127067bc013a90ee22538e4dfad7d2b4","src/time.rs":"d4e0872361a57810837f5bd790cbca3a2b9db1ac4694a3c52d1564ad3532d3be","src/ucontext.rs":"b8f2e04757a9c2bc38c3b1e259d3a013da8a730fe9bfbe5487637395681b43d3","src/unistd.rs":"e19be456124731c5b93aef92ed72a7c4c9092e28db0649814ba3fcc1f0d620fa","test/common/mod.rs":"1d7e28e3635754664cd056f3a1079232ff5c118df619e1d0551a9972eb0b3cd6","test/sys/mod.rs":"87b2891d83067ff21f72b8ff7fde3019dc45b6877282ac278b6da151de45c7a7","test/sys/test_aio.rs":"4dac9f716f852f1f438f78d6e64bf041e6fd316bf15dcb27afffaf0894bdefa6","test/sys/test_aio_drop.rs":"614070155fa16a979b7341d001639c5ce24a1d6f632c3abce45a5a6d49c4039b","test/sys/test_epoll.rs":"ffe95e36c79e37426ef8e8ca3b137b7f35ea0333ce666a20a4b7878db17680e9","test/sys/test_inotify.rs":"a141b9a995892547b51ceeb6761a70a6b86d37e8f38d13ea2c497b81b4b0f49f","test/sys/test_ioctl.rs":"00ccc5afb665e533a0a4b6d6a6be438bcaea19fce335390feef4e91d17b3036c","test/sys/test_mman.rs":"2b4161964c9204b74659028b0f89a88f4e3bcc9886137a3039737cd91d2698cb","test/sys/test_pthread.rs":"ace36a2f5587f1874854281b4fd84e4e4d892a1e3c5cc38ced57975739522ad6","test/sys/test_ptrace.rs":"0385eebc8b1b8c72f655b745769decd9143ad83018198375982da0896310456b","test/sys/test_select.rs":"54cea1c34ad28d5770a613c1c3cbc3b1064b22037ec2b9d3fcd422d3be9e60a7","test/sys/test_signal.rs":"acc9941227bd3e2afad323613c2b8c83902ed0486d3745fd72704f395924f1e4","test/sys/test_signalfd.rs":"0e1060143e2612c490bc3d0168d0bbb042ef55e3f1d91d2578b9e42e4310a14d","test/sys/test_socket.rs":"d2df1001f9a0b2dac0b88051a67c3868bb216e72e4da4eecd11c4448b9fa4b40","test/sys/test_sockopt.rs":"4465f22f718442f3f7b502e052dad02b93cebfa3b71fa55ff4f25fb02534acab","test/sys/test_stat.rs":"6630a28217fd708bb84cd4f7e7101836b74f2420f9888923fdab664ccc331c1d","test/sys/test_sysinfo.rs":"ffd49bc96375914a2c4a4a59730cae8072f85771e2c4a80d3403df38d967e272","test/sys/test_termios.rs":"e5bcef10c84bd7583d600d5601835bcb3cfc88781cb283ab0185bbef5faf4327","test/sys/test_timerfd.rs":"cfed3abf58118611d08f6985251a7739cff67108e11214222a1d2394a3a026ce","test/sys/test_uio.rs":"32656bd0a5699e4d019aa928edf104637937179782914a82d50d37226e84c421","test/sys/test_wait.rs":"6fd59fffeeb09ff620c359baefd062ba777598982b6cb001ccc07b6bc7605493","test/test.rs":"11f40b0718ddd1a150cb9e703d56d0b2a9462306505a2245ddf273a2011f48b5","test/test_clearenv.rs":"45ca548035b3c20ec87314715feaba2be973709a635d85b8cde46fd1d9f1ecd4","test/test_dir.rs":"ae3c11c58cb06da6557aa2a839c6653c54cd7724283fffe9df5a5d3feabdd89a","test/test_fcntl.rs":"71dcb87f7b04d78fc62937ba46cb7f0f1f2dbb330b63a996ea2e8ec9056b98a9","test/test_kmod/hello_mod/Makefile":"0219f7bce0603f97d997fb377ca071966c90333ecc665e78a54dfeb97a9c811b","test/test_kmod/hello_mod/hello.c":"bcac6b19c5bd807e1f3878c15e426acc85785a8ade9840c3bb4d068635c9188c","test/test_kmod/mod.rs":"b4ae25841c2f06f32de9f1acd8230eeccd7095721302ebe78ad454e4e4f9c783","test/test_mount.rs":"6dd242b6e23c9c39e1a75612bbea62573898818ab374c3c032c2cdb97033554d","test/test_mq.rs":"136071f24131aac0e65d5f29ac18e3806641dfae1164813f5570c0e3a6f70553","test/test_net.rs":"f2912327ebb2a3d37e6cff02a5ac3106cf889cc5c74404db4ef0034059ba26f1","test/test_nix_path.rs":"01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b","test/test_nmount.rs":"d6c112547bb80968170b5497cda4b6cbf69dabec6f51d494bd52298995ceff18","test/test_poll.rs":"3e0b8f0397ba080785c61a3bfc3d637bc87f324bc4e52b5f1bf3ca0d32dbc9fe","test/test_pty.rs":"b26238a0783746cb31880e11eebc1913149be999ce75fbc2d6677bdd1e2731b2","test/test_ptymaster_drop.rs":"ae63c815f5028ddc67d194e86559483018ab1816316bdb917f40cee9364fd8a5","test/test_resource.rs":"40aef790ab745cec31a4b333d2ca406b462aa9bdf4a6d3756371e498b8d51e9a","test/test_sched.rs":"c4579bd376fab8816e63b07fa9ace31dc08e63ebb7c855a2c450698090d1d1e8","test/test_sendfile.rs":"bb41b4f3621b518e397d3a5b5ad3c5dcef3fe506afe516eab7572fbab92b77e3","test/test_stat.rs":"c407ca47a5258750076d041afad2f6add4c3563be36628bde1c5b314f5d0765d","test/test_time.rs":"f7a21b1e279e60e84909d5dadda97ded66d3326b131fe317badf9af0a1b50335","test/test_timer.rs":"3ae20d364f075d2811f3ff94eda9886682cc21d8807656007d2464fe36d1e361","test/test_unistd.rs":"20a00be4fbe26302ea5fe50ce25b99265dc763db138663d6aa1d7ac729a1d292"},"package":"bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"} +\ No newline at end of file ++{"files":{"CHANGELOG.md":"8ee4e556e53d1b39400a48675d3ecff0bf27e419accab7ca3be76ab934289548","Cargo.toml":"2e6eff9170182f107188b8bc9802efd044ef47178afc7f138950ecff1c1ceb96","LICENSE":"66e3ee1fa7f909ad3c612d556f2a0cdabcd809ad6e66f3b0605015ac64841b70","README.md":"1ed9a0e26ae6e575b3262ae734dd02889455593b761ee62403ea5a64104f3c9c","src/dir.rs":"0280a2dc480bd913f24ed84fbe26569fa2e8eefa660e5ad7c21e05fc34c14d16","src/env.rs":"028bc5e20139ebba418a655a2978a53335dc7680bf1de43d2c8333dd72cfa5c4","src/errno.rs":"e55d075858e349d9afea9ce0480f7fb7ba4dccccf0694fd7b3280b918836203c","src/fcntl.rs":"ea8f43d8fec0b6c3b7d903333e4c1ce85611684a4afd561c55cfe4b61a979e94","src/features.rs":"5b4a0831e5f4b79a6f0e42ed052fd66c875da18959750be51e41fb59ac19feed","src/ifaddrs.rs":"377865eb48040d28c392a1aec0221320108e3392ea285d23405ae2cfa5c54b20","src/kmod.rs":"c818ced08d55ae36fdf82fa914ba856b688e37234d574d3faa37128211d512fb","src/lib.rs":"a62fac2ba7111157c5b64251f67f8a189f04bd587d5c80703454a596ea7ae5d9","src/macros.rs":"e23d7d8be22ef0bf9febaaf2739585453103607c0139bd3995a324e4a16d011e","src/mount/bsd.rs":"4cf35606a63d7ca41caac3b38f01e2b70c63e71978c0529f19fc79182629dbe0","src/mount/linux.rs":"6e5d61788dedf1ca4416c6c6a3a9c6c747f9352c26d863f4a1d4142e288584d6","src/mount/mod.rs":"ba9f60eb831224ab73bdd87e00e15d13b9ce9efb70b18bf8f3fe60406d522b3e","src/mqueue.rs":"ed0a189036b2437b5f7f7f1312fa545540b06ca72171b451d8bce42cc3627534","src/net/if_.rs":"b32a8a1f952de60d95e549779a5c673fd72aa665e86bfdfc8ec6badf3016b9b1","src/net/mod.rs":"577f70170e53d4a6de1abb70bf8f1031ec3e65c0e63ef5fcf05c907125e7ac17","src/poll.rs":"2fc1d144fb40db51811c6357b520ab7993529702d8f0d8060c903118ff4f7259","src/pty.rs":"27b4f76c23acf02542674017067fee74cdcac907338458700a1aa4d6f6a62e27","src/sched.rs":"403aa5ebed81910263d42a94717612b737550bf053227b7d90f1c8949188d919","src/sys/aio.rs":"ae091de8540c97da374a39e7d154c1b3ce50f41e6fc20a45c6b06eb838e74366","src/sys/epoll.rs":"28e22debf474d1b047e8044a00b354c25dab2fa125960f9f2f14cc34289fd5c9","src/sys/event.rs":"dbd8e84bccb813839295b0a336485783ef19548d2317931f0ceb5ee62f839a40","src/sys/eventfd.rs":"c8db8f5874726fdad289ad5e2603a7d71a1ae5a899dcde3a35d3edff8b498b7e","src/sys/inotify.rs":"5b4da774313afa9c28c3f92f9d07dce9bf4c8d044fd6a16f19480e79a19e808b","src/sys/ioctl/bsd.rs":"bbd02e30b0a78c1cb22777d9b00cfcbba9c68505cffc06118ac68474cf6fea39","src/sys/ioctl/linux.rs":"54bad026ee637b73b95dad8135b6db61cae855670fd9323e7bf21acaff0827f4","src/sys/ioctl/mod.rs":"89b20579476b2e0254e0ecb1b41830cccd7027a22cbdb816a9d4ec3924842ac1","src/sys/memfd.rs":"f58d7fbe67c4b994832d72f5fbd59c136c8f1ae88ea8b0bc1c099db2d847ee6c","src/sys/mman.rs":"17df1bc34ba92bdd6bad1e11e4ef139998117f6c468c8f560421858f3cc899a5","src/sys/mod.rs":"baabf649f758ad4acce849ec1795dd4e4f9c6539e677bad5fa777300a4871dcb","src/sys/personality.rs":"aa89760c023bfec3fca5d8636f9eac9d337f5547933793ce6df7a0de97ae6ee1","src/sys/pthread.rs":"258cdf7ff0b61a4afa6d228109e4cb4fb88d859bb8dfe6c959d95130fb010906","src/sys/ptrace/bsd.rs":"4c590d8f023ff52f396f8b6f2150c08e5c9486d3088d9c173db33a70d616b800","src/sys/ptrace/linux.rs":"c82db3fb18aa97755f9ccb440a957cd46d664968a94045830c5d74d2d53bc19f","src/sys/ptrace/mod.rs":"e9e5d970097f5eafffba900959d4fdbf233bff9ed7f599fc9896bb44d86a57a4","src/sys/quota.rs":"02e698a25f0986fb43aa88689f3d3d8b9edc6ae48496ad02f7214fccaa493e00","src/sys/reboot.rs":"eacdf57694a6629fb05787e16450446102a62818274495f2ad4e445807d09221","src/sys/resource.rs":"d498d0c00fd30e35e1269a8902cb812014d813f63ec95364f8f59f1912ba5657","src/sys/select.rs":"65c39b129d3cc85b8ca026ff26dcf80c5639824f43715881c3c1bbb6bf0c8a60","src/sys/sendfile.rs":"7a62099f9771fecff49b9c11210341e3c1a4acf22f8dfb96d395e29421648676","src/sys/signal.rs":"c3e13a2edea54d190a4b051f62efc97953c00b5051a9fda0e39e3bc732a31939","src/sys/signalfd.rs":"583524434fd37143be3db37fa6f6cbd339f7946416f05b58a95e246947e5cc9d","src/sys/socket/addr.rs":"84df895052f59ec84774b189ffb285d2a37a9703af6c8310ae5040cca1a2583e","src/sys/socket/mod.rs":"6deb55438cad3606385303f036b0efd842dfd759fba93611911f5a4f2613c9dc","src/sys/socket/sockopt.rs":"ed1f920364bfe88bbe6eaeeefb27a63bfcdd7d67604aca2f03e22f2b502df55a","src/sys/stat.rs":"337dea8d55d6177dc85b3235b40b8a3e81af7f4a6e2806a0b2f730bec5424350","src/sys/statfs.rs":"17103659a85279bac046c69cb3b22bf2c11c2492cffb0edfa4c3b233d161a2f2","src/sys/statvfs.rs":"f81e3900ef90d62e7eceaf1b6ff8dcfd965466714c033eb4717687f692171f48","src/sys/sysinfo.rs":"b4519b1ca091c9dbe94d2a6fd6304944bf3df5626973d2c6884022559706f0d9","src/sys/termios.rs":"7923f9846a8122096b6b1cd240d3618b876ce500a751ac434954d172e2e85745","src/sys/time.rs":"9026033b60a5ccc95b70424aef043c8c748722e2ea8c7c86366ecd4585b651a0","src/sys/timer.rs":"8c10f0e7cfac857ad00460be30bc68b957909cc9296e70718d3b5d4a0babafde","src/sys/timerfd.rs":"ef7c48aefdcfac13316eeddbef5da04cf12e9f574b8d9f43402c02b6b8db86b3","src/sys/uio.rs":"e1d59ccbee9d46c65d3aa8c36aa3a3222539beea0d20163a8b707d08fca14e09","src/sys/utsname.rs":"0cdda0cc111caaa0e4ebe2d4588bdc825d878e5bcb7a9136073b15f87a20e11f","src/sys/wait.rs":"cc70d2d9b880ff6c48577a479c209af6127067bc013a90ee22538e4dfad7d2b4","src/time.rs":"d4e0872361a57810837f5bd790cbca3a2b9db1ac4694a3c52d1564ad3532d3be","src/ucontext.rs":"b8f2e04757a9c2bc38c3b1e259d3a013da8a730fe9bfbe5487637395681b43d3","src/unistd.rs":"e19be456124731c5b93aef92ed72a7c4c9092e28db0649814ba3fcc1f0d620fa","test/common/mod.rs":"1d7e28e3635754664cd056f3a1079232ff5c118df619e1d0551a9972eb0b3cd6","test/sys/mod.rs":"87b2891d83067ff21f72b8ff7fde3019dc45b6877282ac278b6da151de45c7a7","test/sys/test_aio.rs":"4dac9f716f852f1f438f78d6e64bf041e6fd316bf15dcb27afffaf0894bdefa6","test/sys/test_aio_drop.rs":"614070155fa16a979b7341d001639c5ce24a1d6f632c3abce45a5a6d49c4039b","test/sys/test_epoll.rs":"ffe95e36c79e37426ef8e8ca3b137b7f35ea0333ce666a20a4b7878db17680e9","test/sys/test_inotify.rs":"a141b9a995892547b51ceeb6761a70a6b86d37e8f38d13ea2c497b81b4b0f49f","test/sys/test_ioctl.rs":"00ccc5afb665e533a0a4b6d6a6be438bcaea19fce335390feef4e91d17b3036c","test/sys/test_mman.rs":"2b4161964c9204b74659028b0f89a88f4e3bcc9886137a3039737cd91d2698cb","test/sys/test_pthread.rs":"ace36a2f5587f1874854281b4fd84e4e4d892a1e3c5cc38ced57975739522ad6","test/sys/test_ptrace.rs":"0385eebc8b1b8c72f655b745769decd9143ad83018198375982da0896310456b","test/sys/test_select.rs":"54cea1c34ad28d5770a613c1c3cbc3b1064b22037ec2b9d3fcd422d3be9e60a7","test/sys/test_signal.rs":"acc9941227bd3e2afad323613c2b8c83902ed0486d3745fd72704f395924f1e4","test/sys/test_signalfd.rs":"0e1060143e2612c490bc3d0168d0bbb042ef55e3f1d91d2578b9e42e4310a14d","test/sys/test_socket.rs":"d2df1001f9a0b2dac0b88051a67c3868bb216e72e4da4eecd11c4448b9fa4b40","test/sys/test_sockopt.rs":"4465f22f718442f3f7b502e052dad02b93cebfa3b71fa55ff4f25fb02534acab","test/sys/test_stat.rs":"6630a28217fd708bb84cd4f7e7101836b74f2420f9888923fdab664ccc331c1d","test/sys/test_sysinfo.rs":"ffd49bc96375914a2c4a4a59730cae8072f85771e2c4a80d3403df38d967e272","test/sys/test_termios.rs":"e5bcef10c84bd7583d600d5601835bcb3cfc88781cb283ab0185bbef5faf4327","test/sys/test_timerfd.rs":"cfed3abf58118611d08f6985251a7739cff67108e11214222a1d2394a3a026ce","test/sys/test_uio.rs":"32656bd0a5699e4d019aa928edf104637937179782914a82d50d37226e84c421","test/sys/test_wait.rs":"6fd59fffeeb09ff620c359baefd062ba777598982b6cb001ccc07b6bc7605493","test/test.rs":"11f40b0718ddd1a150cb9e703d56d0b2a9462306505a2245ddf273a2011f48b5","test/test_clearenv.rs":"45ca548035b3c20ec87314715feaba2be973709a635d85b8cde46fd1d9f1ecd4","test/test_dir.rs":"ae3c11c58cb06da6557aa2a839c6653c54cd7724283fffe9df5a5d3feabdd89a","test/test_fcntl.rs":"71dcb87f7b04d78fc62937ba46cb7f0f1f2dbb330b63a996ea2e8ec9056b98a9","test/test_kmod/hello_mod/Makefile":"0219f7bce0603f97d997fb377ca071966c90333ecc665e78a54dfeb97a9c811b","test/test_kmod/hello_mod/hello.c":"bcac6b19c5bd807e1f3878c15e426acc85785a8ade9840c3bb4d068635c9188c","test/test_kmod/mod.rs":"b4ae25841c2f06f32de9f1acd8230eeccd7095721302ebe78ad454e4e4f9c783","test/test_mount.rs":"6dd242b6e23c9c39e1a75612bbea62573898818ab374c3c032c2cdb97033554d","test/test_mq.rs":"136071f24131aac0e65d5f29ac18e3806641dfae1164813f5570c0e3a6f70553","test/test_net.rs":"f2912327ebb2a3d37e6cff02a5ac3106cf889cc5c74404db4ef0034059ba26f1","test/test_nix_path.rs":"01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b","test/test_nmount.rs":"d6c112547bb80968170b5497cda4b6cbf69dabec6f51d494bd52298995ceff18","test/test_poll.rs":"3e0b8f0397ba080785c61a3bfc3d637bc87f324bc4e52b5f1bf3ca0d32dbc9fe","test/test_pty.rs":"b26238a0783746cb31880e11eebc1913149be999ce75fbc2d6677bdd1e2731b2","test/test_ptymaster_drop.rs":"ae63c815f5028ddc67d194e86559483018ab1816316bdb917f40cee9364fd8a5","test/test_resource.rs":"40aef790ab745cec31a4b333d2ca406b462aa9bdf4a6d3756371e498b8d51e9a","test/test_sched.rs":"c4579bd376fab8816e63b07fa9ace31dc08e63ebb7c855a2c450698090d1d1e8","test/test_sendfile.rs":"bb41b4f3621b518e397d3a5b5ad3c5dcef3fe506afe516eab7572fbab92b77e3","test/test_stat.rs":"c407ca47a5258750076d041afad2f6add4c3563be36628bde1c5b314f5d0765d","test/test_time.rs":"f7a21b1e279e60e84909d5dadda97ded66d3326b131fe317badf9af0a1b50335","test/test_timer.rs":"3ae20d364f075d2811f3ff94eda9886682cc21d8807656007d2464fe36d1e361","test/test_unistd.rs":"20a00be4fbe26302ea5fe50ce25b99265dc763db138663d6aa1d7ac729a1d292"},"package":"bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"} +diff --git a/third_party/rust/nix/src/sys/ioctl/linux.rs b/third_party/rust/nix/src/sys/ioctl/linux.rs +index 0c0a2090538f8..214d9e8c60281 100644 +--- a/third_party/rust/nix/src/sys/ioctl/linux.rs ++++ b/third_party/rust/nix/src/sys/ioctl/linux.rs +@@ -41,6 +41,7 @@ mod consts { + target_arch = "s390x", + target_arch = "x86_64", + target_arch = "aarch64", ++ target_arch = "loongarch64", + target_arch = "riscv32", + target_arch = "riscv64" + ))] +diff --git a/toolkit/components/telemetry/pingsender/pingsender.cpp b/toolkit/components/telemetry/pingsender/pingsender.cpp +index 30f2907c720e1..e6645227a2949 100644 +--- a/toolkit/components/telemetry/pingsender/pingsender.cpp ++++ b/toolkit/components/telemetry/pingsender/pingsender.cpp +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + #include + + #include +diff --git a/toolkit/moz.configure b/toolkit/moz.configure +index c518e02d00534..975835ffee9d6 100644 +--- a/toolkit/moz.configure ++++ b/toolkit/moz.configure +@@ -2240,7 +2240,7 @@ with only_when(compile_environment | artifact_builds): + use_nasm = False + elif target.cpu == "x86_64": + flags = ["-D__x86_64__", "-DPIC", "-DELF", "-Pconfig_unix64.asm"] +- elif target.cpu in ("x86", "arm", "aarch64"): ++ elif target.cpu in ("x86", "arm", "aarch64", "loongarch64"): + flac_only = True + else: + enable = False +diff --git a/xpcom/reflect/xptcall/md/unix/moz.build b/xpcom/reflect/xptcall/md/unix/moz.build +index 1779c148cb20a..e74c936c38525 100644 +--- a/xpcom/reflect/xptcall/md/unix/moz.build ++++ b/xpcom/reflect/xptcall/md/unix/moz.build +@@ -271,6 +271,14 @@ if CONFIG["OS_ARCH"] == "Linux" and CONFIG["CPU_ARCH"] == "riscv64": + "xptcstubs_riscv64.cpp", + ] + ++if CONFIG["OS_ARCH"] == "Linux" and CONFIG["CPU_ARCH"] == "loongarch64": ++ SOURCES += [ ++ "xptcinvoke_asm_loongarch64.S", ++ "xptcinvoke_loongarch64.cpp", ++ "xptcstubs_asm_loongarch64.S", ++ "xptcstubs_loongarch64.cpp", ++ ] ++ + FINAL_LIBRARY = "xul" + + LOCAL_INCLUDES += [ +diff --git a/xpcom/reflect/xptcall/md/unix/xptcinvoke_asm_loongarch64.S b/xpcom/reflect/xptcall/md/unix/xptcinvoke_asm_loongarch64.S +new file mode 100644 +index 0000000000000..7ac5a9a52e171 +--- /dev/null ++++ b/xpcom/reflect/xptcall/md/unix/xptcinvoke_asm_loongarch64.S +@@ -0,0 +1,91 @@ ++/* This Source Code Form subject to the terms of Mozilla Public ++ * License, v. 2.0 If a copy of the MPL was not distributed with ++ * this file, You can obtain one at http://mozilla.org/MPL/2.0/. ++ */ ++ ++ .set NGPREGS, 8 ++ .set NFPREGS, 8 ++ ++ .text ++ .globl _NS_InvokeByIndex ++ .type _NS_InvokeByIndex, @function ++/* ++ * _NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex, ++ * uint32_t paramCount, nsXPTCVariant* params) ++ */ ++_NS_InvokeByIndex: ++ .cfi_startproc ++ addi.d $sp, $sp, -32 ++ .cfi_def_cfa_offset 32 ++ st.d $s0, $sp, 16 ++ .cfi_offset 23, -16 ++ st.d $s1, $sp, 8 ++ .cfi_offset 24, -24 ++ st.d $s2, $sp, 0 ++ .cfi_offset 25, -32 ++ st.d $ra, $sp, 24 ++ .cfi_offset 1, -8 ++ ++ move $s2, $a0 ++ move $s1, $a1 ++ move $s0, $sp ++ .cfi_def_cfa_register 23 ++ ++ /* 16-bytes alignment */ ++ addi.d $a0, $a2, 1 ++ li.d $t4, 0xfffffffffffffffe ++ and $a0, $a0, $t4 ++ slli.d $a0, $a0, 3 ++ sub.d $sp, $sp, $a0 ++ move $a4, $sp ++ ++ addi.d $sp, $sp, -8*(NFPREGS+NGPREGS) ++ move $a0, $sp ++ addi.d $a1, $sp, 8*NGPREGS ++ ++ bl invoke_copy_to_stack ++ ++ /* 1st argument is this */ ++ move $a0, $s2 ++ ++ ld.d $a1, $sp, 8 ++ ld.d $a2, $sp, 16 ++ ld.d $a3, $sp, 24 ++ ld.d $a4, $sp, 32 ++ ld.d $a5, $sp, 40 ++ ld.d $a6, $sp, 48 ++ ld.d $a7, $sp, 56 ++ ++ fld.d $fa0, $sp, 64 ++ fld.d $fa1, $sp, 72 ++ fld.d $fa2, $sp, 80 ++ fld.d $fa3, $sp, 88 ++ fld.d $fa4, $sp, 96 ++ fld.d $fa5, $sp, 104 ++ fld.d $fa6, $sp, 112 ++ fld.d $fa7, $sp, 120 ++ ++ addi.d $sp, $sp, 8*(NGPREGS+NFPREGS) ++ ++ ld.d $s2, $s2, 0 ++ slli.w $s1, $s1, 3 ++ add.d $s2, $s2, $s1 ++ ld.d $t3, $s2, 0 ++ jirl $ra, $t3, 0 ++ ++ move $sp, $s0 ++ .cfi_def_cfa_register 3 ++ ld.d $s0, $sp, 16 ++ .cfi_restore 23 ++ ld.d $s1, $sp, 8 ++ .cfi_restore 24 ++ ld.d $s2, $sp, 0 ++ .cfi_restore 25 ++ ld.d $ra, $sp, 24 ++ .cfi_restore 1 ++ addi.d $sp, $sp, 32 ++ .cfi_def_cfa_offset -32 ++ jirl $zero, $ra, 0 ++ .cfi_endproc ++ .size _NS_InvokeByIndex, .-_NS_InvokeByIndex ++ .section .note.GNU-stack, "", @progbits +diff --git a/xpcom/reflect/xptcall/md/unix/xptcinvoke_loongarch64.cpp b/xpcom/reflect/xptcall/md/unix/xptcinvoke_loongarch64.cpp +new file mode 100644 +index 0000000000000..61bb7b2efdeb1 +--- /dev/null ++++ b/xpcom/reflect/xptcall/md/unix/xptcinvoke_loongarch64.cpp +@@ -0,0 +1,100 @@ ++/* This Source Code Form is subject to the terms of the Mozilla Public ++ * License, v. 2.0. If a copy of the MPL was not distributed with this ++ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ ++ ++// Platform specific code to invoke XPCOM methods on native objects ++ ++#include "xptcprivate.h" ++ ++extern "C" void invoke_copy_to_stack(uint64_t* gpregs, double* fpregs, ++ uint32_t paramCount, nsXPTCVariant* s, ++ uint64_t* d) { ++ static const uint32_t GPR_COUNT = 8; ++ static const uint32_t FPR_COUNT = 8; ++ ++ uint32_t nr_gpr = 1; // skip one GPR register for "this" ++ uint32_t nr_fpr = 0; ++ uint64_t value = 0; ++ ++ for (uint32_t i = 0; i < paramCount; i++, s++) { ++ if (s->IsIndirect()) { ++ value = (uint64_t)&s->val; ++ } else { ++ switch (s->type) { ++ case nsXPTType::T_FLOAT: ++ break; ++ case nsXPTType::T_DOUBLE: ++ break; ++ case nsXPTType::T_I8: ++ value = s->val.i8; ++ break; ++ case nsXPTType::T_I16: ++ value = s->val.i16; ++ break; ++ case nsXPTType::T_I32: ++ value = s->val.i32; ++ break; ++ case nsXPTType::T_I64: ++ value = s->val.i64; ++ break; ++ case nsXPTType::T_U8: ++ value = s->val.u8; ++ break; ++ case nsXPTType::T_U16: ++ value = s->val.u16; ++ break; ++ case nsXPTType::T_U32: ++ value = s->val.u32; ++ break; ++ case nsXPTType::T_U64: ++ value = s->val.u64; ++ break; ++ case nsXPTType::T_BOOL: ++ value = s->val.b; ++ break; ++ case nsXPTType::T_CHAR: ++ value = s->val.c; ++ break; ++ case nsXPTType::T_WCHAR: ++ value = s->val.wc; ++ break; ++ default: ++ value = (uint64_t)s->val.p; ++ break; ++ } ++ } ++ ++ if (!s->IsIndirect() && s->type == nsXPTType::T_DOUBLE) { ++ if (nr_fpr < FPR_COUNT) { ++ fpregs[nr_fpr++] = s->val.d; ++ } else if (nr_gpr < GPR_COUNT) { ++ memcpy(&gpregs[nr_gpr++], &(s->val.d), sizeof(s->val.d)); ++ } else { ++ memcpy(d++, &(s->val.d), sizeof(s->val.d)); ++ } ++ } else if (!s->IsIndirect() && s->type == nsXPTType::T_FLOAT) { ++ if (nr_fpr < FPR_COUNT) { ++ memcpy(&fpregs[nr_fpr++], &(s->val.f), sizeof(s->val.f)); ++ } else if (nr_gpr < GPR_COUNT) { ++ memcpy(&gpregs[nr_gpr++], &(s->val.f), sizeof(s->val.f)); ++ } else { ++ memcpy(d++, &(s->val.f), sizeof(s->val.f)); ++ } ++ } else { ++ if (nr_gpr < GPR_COUNT) { ++ gpregs[nr_gpr++] = value; ++ } else { ++ *d++ = value; ++ } ++ } ++ } ++} ++ ++extern "C" nsresult _NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex, ++ uint32_t paramCount, ++ nsXPTCVariant* params); ++EXPORT_XPCOM_API(nsresult) ++NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex, uint32_t paramCount, ++ nsXPTCVariant* params) { ++ return _NS_InvokeByIndex(that, methodIndex, paramCount, params); ++} +diff --git a/xpcom/reflect/xptcall/md/unix/xptcstubs_asm_loongarch64.S b/xpcom/reflect/xptcall/md/unix/xptcstubs_asm_loongarch64.S +new file mode 100644 +index 0000000000000..ae4e0cf73fd36 +--- /dev/null ++++ b/xpcom/reflect/xptcall/md/unix/xptcstubs_asm_loongarch64.S +@@ -0,0 +1,52 @@ ++# License, v. 2.0. If a copy of the MPL was not distributed with this ++# file, You can obtain one at http://mozilla.org/MPL/2.0/. ++ ++ .set NGPRGES, 8 ++ .set NFPREGS, 8 ++ ++ .text ++ .globl SharedStub ++ .hidden SharedStub ++ .type SharedStub,@function ++ ++SharedStub: ++ .cfi_startproc ++ move $t0, $sp ++ addi.d $sp, $sp, -8*(NGPRGES+NFPREGS)-16 ++ .cfi_def_cfa_offset 8*(NGPRGES+NFPREGS)+16 ++ st.d $a0, $sp, 0 ++ st.d $a1, $sp, 8 ++ st.d $a2, $sp, 16 ++ st.d $a3, $sp, 24 ++ st.d $a4, $sp, 32 ++ st.d $a5, $sp, 40 ++ st.d $a6, $sp, 48 ++ st.d $a7, $sp, 56 ++ fst.d $fa0, $sp, 64 ++ fst.d $fa1, $sp, 72 ++ fst.d $fa2, $sp, 80 ++ fst.d $fa3, $sp, 88 ++ fst.d $fa4, $sp, 96 ++ fst.d $fa5, $sp, 104 ++ fst.d $fa6, $sp, 112 ++ fst.d $fa7, $sp, 120 ++ st.d $ra, $sp, 136 ++ .cfi_offset 1, 136 ++ ++ /* methodIndex is passed from stub */ ++ move $a1, $t6 ++ move $a2, $t0 ++ move $a3, $sp ++ addi.d $a4, $sp, 8*NGPRGES ++ ++ bl PrepareAndDispatch ++ ++ ld.d $ra, $sp, 136 ++ .cfi_restore 1 ++ addi.d $sp, $sp, 8*(NGPRGES+NFPREGS)+16 ++ .cfi_def_cfa_offset -8*(NGPRGES+NFPREGS)-16 ++ jirl $zero, $ra, 0 ++ .cfi_endproc ++ ++ .size SharedStub, .-SharedStub ++ .section .note.GNU-stack, "", @progbits +diff --git a/xpcom/reflect/xptcall/md/unix/xptcstubs_loongarch64.cpp b/xpcom/reflect/xptcall/md/unix/xptcstubs_loongarch64.cpp +new file mode 100644 +index 0000000000000..5c4cd6d95e7f6 +--- /dev/null ++++ b/xpcom/reflect/xptcall/md/unix/xptcstubs_loongarch64.cpp +@@ -0,0 +1,159 @@ ++/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ ++/* This Source Code Form is subject to the terms of the Mozilla Public ++ * License, V. 2.0. If a copy of the MPL was not distributed with this ++ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ ++ ++#include "xptcprivate.h" ++ ++extern "C" nsresult ATTRIBUTE_USED PrepareAndDispatch(nsXPTCStubBase* self, ++ uint32_t methodIndex, ++ uint64_t* args, ++ uint64_t* gpregs, ++ double* fpregs) { ++ static const uint32_t GPR_COUNT = 8; ++ static const uint32_t FPR_COUNT = 8; ++ nsXPTCMiniVariant paramBuffer[PARAM_BUFFER_COUNT]; ++ const nsXPTMethodInfo* info; ++ ++ self->mEntry->GetMethodInfo(uint16_t(methodIndex), &info); ++ ++ uint32_t paramCount = info->GetParamCount(); ++ const uint8_t indexOfJSContext = info->IndexOfJSContext(); ++ ++ uint64_t* ap = args; ++ uint32_t nr_gpr = 1; // skip the arg which is 'self' ++ uint32_t nr_fpr = 0; ++ uint64_t value; ++ ++ for (uint32_t i = 0; i < paramCount; i++) { ++ const nsXPTParamInfo& param = info->GetParam(i); ++ const nsXPTType& type = param.GetType(); ++ nsXPTCMiniVariant* dp = ¶mBuffer[i]; ++ ++ if (i == indexOfJSContext) { ++ if (nr_gpr < GPR_COUNT) ++ nr_gpr++; ++ else ++ ap++; ++ } ++ ++ if (!param.IsOut() && type == nsXPTType::T_DOUBLE) { ++ if (nr_fpr < FPR_COUNT) { ++ dp->val.d = fpregs[nr_fpr++]; ++ } else if (nr_gpr < GPR_COUNT) { ++ memcpy(&dp->val.d, &gpregs[nr_gpr++], sizeof(dp->val.d)); ++ } else { ++ memcpy(&dp->val.d, ap++, sizeof(dp->val.d)); ++ } ++ continue; ++ } ++ ++ if (!param.IsOut() && type == nsXPTType::T_FLOAT) { ++ if (nr_fpr < FPR_COUNT) { ++ memcpy(&dp->val.f, &fpregs[nr_fpr++], sizeof(dp->val.f)); ++ } else if (nr_gpr < GPR_COUNT) { ++ memcpy(&dp->val.f, &gpregs[nr_gpr++], sizeof(dp->val.f)); ++ } else { ++ memcpy(&dp->val.f, ap++, sizeof(dp->val.f)); ++ } ++ continue; ++ } ++ ++ if (nr_gpr < GPR_COUNT) { ++ value = gpregs[nr_gpr++]; ++ } else { ++ value = *ap++; ++ } ++ ++ if (param.IsOut() || !type.IsArithmetic()) { ++ dp->val.p = (void*)value; ++ continue; ++ } ++ ++ switch (type) { ++ case nsXPTType::T_I8: ++ dp->val.i8 = (int8_t)value; ++ break; ++ case nsXPTType::T_I16: ++ dp->val.i16 = (int16_t)value; ++ break; ++ case nsXPTType::T_I32: ++ dp->val.i32 = (int32_t)value; ++ break; ++ case nsXPTType::T_I64: ++ dp->val.i64 = (int64_t)value; ++ break; ++ case nsXPTType::T_U8: ++ dp->val.u8 = (uint8_t)value; ++ break; ++ case nsXPTType::T_U16: ++ dp->val.u16 = (uint16_t)value; ++ break; ++ case nsXPTType::T_U32: ++ dp->val.u32 = (uint32_t)value; ++ break; ++ case nsXPTType::T_U64: ++ dp->val.u64 = (uint64_t)value; ++ break; ++ case nsXPTType::T_BOOL: ++ dp->val.b = (bool)(uint8_t)value; ++ break; ++ case nsXPTType::T_CHAR: ++ dp->val.c = (char)value; ++ break; ++ case nsXPTType::T_WCHAR: ++ dp->val.wc = (wchar_t)value; ++ break; ++ default: ++ NS_ERROR("bad type"); ++ break; ++ } ++ } ++ ++ nsresult result = self->mOuter->CallMethod((uint16_t)methodIndex, info, ++ paramBuffer); ++ return result; ++} ++ ++// Load $t6 with the constant 'n' and branch to SharedStub(). ++// clang-format off ++#define STUB_ENTRY(n) \ ++ __asm__( \ ++ ".text\n\t" \ ++ ".if "#n" < 10 \n\t" \ ++ ".globl _ZN14nsXPTCStubBase5Stub"#n"Ev \n\t" \ ++ ".hidden _ZN14nsXPTCStubBase5Stub"#n"Ev \n\t" \ ++ ".type _ZN14nsXPTCStubBase5Stub"#n"Ev,@function \n\n" \ ++ "_ZN14nsXPTCStubBase5Stub"#n"Ev: \n\t" \ ++ ".elseif "#n" < 100 \n\t" \ ++ ".globl _ZN14nsXPTCStubBase6Stub"#n"Ev \n\t" \ ++ ".hidden _ZN14nsXPTCStubBase6Stub"#n"Ev \n\t" \ ++ ".type _ZN14nsXPTCStubBase6Stub"#n"Ev,@function \n\n" \ ++ "_ZN14nsXPTCStubBase6Stub"#n"Ev: \n\t" \ ++ ".elseif "#n" < 1000 \n\t" \ ++ ".globl _ZN14nsXPTCStubBase7Stub"#n"Ev \n\t" \ ++ ".hidden _ZN14nsXPTCStubBase7Stub"#n"Ev \n\t" \ ++ ".type _ZN14nsXPTCStubBase7Stub"#n"Ev,@function \n\n" \ ++ "_ZN14nsXPTCStubBase7Stub"#n"Ev: \n\t" \ ++ ".else \n\t" \ ++ ".err \"stub number "#n" >= 1000 not yet supported\"\n" \ ++ ".endif \n\t" \ ++ "li.d $t6, "#n" \n\t" \ ++ "b SharedStub \n" \ ++ ".if "#n" < 10 \n\t" \ ++ ".size _ZN14nsXPTCStubBase5Stub"#n"Ev,.-_ZN14nsXPTCStubBase5Stub"#n"Ev\n\t" \ ++ ".elseif "#n" < 100 \n\t" \ ++ ".size _ZN14nsXPTCStubBase6Stub"#n"Ev,.-_ZN14nsXPTCStubBase6Stub"#n"Ev\n\t" \ ++ ".else \n\t" \ ++ ".size _ZN14nsXPTCStubBase7Stub"#n"Ev,.-_ZN14nsXPTCStubBase7Stub"#n"Ev\n\t" \ ++ ".endif" \ ++); ++// clang-format on ++ ++#define SENTINEL_ENTRY(n) \ ++ nsresult nsXPTCStubBase::Sentinel##n() { \ ++ NS_ERROR("nsXPTCStubBase::Sentinel called"); \ ++ return NS_ERROR_NOT_IMPLEMENTED; \ ++ } ++ ++#include "xptcstubsdef.inc" +-- +2.42.0 + diff --git a/thunderbird/mozconfig.cfg b/thunderbird/mozconfig.cfg index 3d8da9a551..8036c9bf8d 100644 --- a/thunderbird/mozconfig.cfg +++ b/thunderbird/mozconfig.cfg @@ -2,12 +2,13 @@ ac_add_options --enable-application=comm/mail ac_add_options --prefix=/usr ac_add_options --enable-release -ac_add_options --enable-linker=lld +#ac_add_options --enable-linker=lld ac_add_options --enable-hardening ac_add_options --enable-optimize -ac_add_options --enable-rust-simd +#ac_add_options --enable-rust-simd # https://bugzilla.mozilla.org/show_bug.cgi?id=1423822 -ac_add_options --with-wasi-sysroot=/usr/share/wasi-sysroot +#ac_add_options --with-wasi-sysroot=/usr/share/wasi-sysroot +ac_add_options --without-wasm-sandboxed-libraries # Branding ac_add_options --enable-official-branding diff --git a/tickrs/PKGBUILD b/tickrs/PKGBUILD index 0edc5a453b..9d708c5ee4 100644 --- a/tickrs/PKGBUILD +++ b/tickrs/PKGBUILD @@ -16,7 +16,7 @@ options=('!lto') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/toastify/PKGBUILD b/toastify/PKGBUILD index 59c179af62..e89e20b306 100644 --- a/toastify/PKGBUILD +++ b/toastify/PKGBUILD @@ -14,7 +14,7 @@ b2sums=('SKIP') prepare() { cd $pkgname - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/tokei/PKGBUILD b/tokei/PKGBUILD index 6ed632e9cf..ccbcdf0396 100644 --- a/tokei/PKGBUILD +++ b/tokei/PKGBUILD @@ -15,7 +15,13 @@ sha512sums=('b8474cb3cad8cab8cb9c24b44a9b7bdaa436fde4e56ca25a8c6d9cbe342b27acf80 build() { cd "${srcdir}/${pkgname}-${pkgver}" - cargo build --release --locked --features all + find -name Cargo.lock -exec rm -f {} \; + mkdir -p .cargo + cat > .cargo/config.toml < +Date: Sun, 18 Apr 2021 22:37:24 +0800 +Subject: [PATCH] just for la64 build + +--- + src/miniacc.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/src/miniacc.h b/src/miniacc.h +index 3c949280..e52cda58 100644 +--- a/src/miniacc.h ++++ b/src/miniacc.h +@@ -979,6 +979,9 @@ + #elif (UINT_MAX == ACC_0xffffL) && defined(__m32c__) + # define ACC_ARCH_M16C 1 + # define ACC_INFO_ARCH "m16c" ++#elif defined(__loongarch64) ++# define ACC_ARCH_LA64 1 ++# define ACC_INFO_ARCH "la64" + #elif defined(__IAR_SYSTEMS_ICC__) && defined(__ICCM16C__) + # define ACC_ARCH_M16C 1 + # define ACC_INFO_ARCH "m16c" +@@ -2586,6 +2589,8 @@ ACC_COMPILE_TIME_ASSERT_HEADER(ACC_SIZEOF_PTRDIFF_T == sizeof(ptrdiff_t)) + # define ACC_ABI_BIG_ENDIAN 1 + #elif 1 && (ACC_ARCH_ARM64) && defined(__AARCH64EL__) && !defined(__AARCH64EB__) + # define ACC_ABI_LITTLE_ENDIAN 1 ++#elif 1 && (ACC_ARCH_LA64) && defined(__loongarch64) ++# define ACC_ABI_LITTLE_ENDIAN 1 + #elif 1 && (ACC_ARCH_ARM64) && defined(_MSC_VER) && defined(_WIN32) + # define ACC_ABI_LITTLE_ENDIAN 1 + #elif 1 && (ACC_ARCH_MIPS) && defined(__MIPSEB__) && !defined(__MIPSEL__) +-- +2.28.0 + diff --git a/upx/PKGBUILD b/upx/PKGBUILD index 4e31c3dfa5..e0e01c1717 100644 --- a/upx/PKGBUILD +++ b/upx/PKGBUILD @@ -22,7 +22,9 @@ source=( git+$url-vendor-valgrind#commit=a196a50056be5e6ef0c5f7456e95b4234b799a33 git+$url-vendor-zlib#commit=93538c2b9403ec5b8ed673a3a5f3874430569b70 "git+$url#commit=099c3d829e80488af7395a4242b318877e980da4") # tag: v4.2.2 +source+=(0001-just-for-la64-build.patch) b2sums=(SKIP SKIP SKIP SKIP SKIP SKIP) +b2sums+=('0ea1b7a64155d135bfd8872969e1003b72d4b098512f1bc89d31244e02ffc901cc8c5bd87eb63a529aadf161386cd857cc6271a8ea018ba3915616c3c2fe5c72') prepare() { cd $pkgname @@ -30,6 +32,7 @@ prepare() { for x in doctest lzma-sdk ucl valgrind zlib; do rm -frv vendor/$x && ln -s "$srcdir/upx-vendor-$x" vendor/$x done + patch -p1 -i $srcdir/0001-just-for-la64-build.patch } build() { diff --git a/uucp/PKGBUILD b/uucp/PKGBUILD index 7a7a2a69c9..fd66c3edaa 100644 --- a/uucp/PKGBUILD +++ b/uucp/PKGBUILD @@ -21,6 +21,8 @@ prepare() { } build() { + CFLAGS=${CFLAGS/-Werror=format-security/} + CXXFLAGS=${CXXFLAGS/-Werror=format-security/} cd "${srcdir}/${pkgname}-${pkgver}" ./configure --prefix=/usr --mandir=/usr/share/man --with-newconfigdir=/etc/uucp --sbindir=/usr/bin make diff --git a/v2ray-domain-list-community/PKGBUILD b/v2ray-domain-list-community/PKGBUILD index 4903c25f40..2b3dfcfc90 100644 --- a/v2ray-domain-list-community/PKGBUILD +++ b/v2ray-domain-list-community/PKGBUILD @@ -14,6 +14,9 @@ sha512sums=('SKIP') build() { cd domain-list-community + go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664 + go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305 + go mod tidy ASSUME_NO_MOVING_GC_UNSAFE_RISK_IT_WITH=go1.18 go run main.go } diff --git a/v2ray/PKGBUILD b/v2ray/PKGBUILD index 08fa9cc6c1..87f2774636 100644 --- a/v2ray/PKGBUILD +++ b/v2ray/PKGBUILD @@ -26,6 +26,10 @@ build() { export CGO_LDFLAGS="${LDFLAGS}" export CGO_CFLAGS="${CFLAGS}" export CGO_CPPFLAGS="${CPPFLAGS}" + export GOPROXY=https://goproxy.cn + go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664 + go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305 + go mod tidy go build -o v2ray ./main } diff --git a/valgrind/PKGBUILD b/valgrind/PKGBUILD index 4d53417803..edb54aa4fa 100644 --- a/valgrind/PKGBUILD +++ b/valgrind/PKGBUILD @@ -21,30 +21,36 @@ arch=('loong64' 'x86_64') license=('GPL') url='https://valgrind.org/' depends=('glibc' 'perl' 'debuginfod') -makedepends=('gdb' 'lib32-glibc' 'lib32-gcc-libs' 'docbook-xml' +makedepends=('gdb' 'docbook-xml' 'docbook-xsl' 'docbook-sgml') checkdepends=('procps-ng') -optdepends=('lib32-glibc: 32-bit ABI support') provides=('valgrind-multilib') replaces=('valgrind-multilib') options=('!emptydirs' '!strip') source=(https://sourceware.org/pub/valgrind/valgrind-${pkgver}.tar.bz2{,.asc} - valgrind-3.7.0-respect-flags.patch) + valgrind-3.7.0-respect-flags.patch + fix-perl-errors.patch + valgrind-3.21-la64.patch) validpgpkeys=( 0E9FFD0C16A1856CF9C7C690BA0166E698FA6035 # Julian Seward EC3CFE88F6CA0788774F5C1D1AA44BE649DE760A # Mark Wielaard ) sha512sums=('2904c13f68245bbafcea70998c6bd20725271300a7e94b6751ca00916943595fc3fac8557da7ea8db31b54a43f092823a0a947bc142829da811d074e1fe49777' 'SKIP' - 'e0cec39381cefeca09ae4794cca309dfac7c8693e6315e137e64f5c33684598726d41cfbb4edf764fe985503b13ff596184ca5fc32b159d500ec092e4cf8838c') + 'e0cec39381cefeca09ae4794cca309dfac7c8693e6315e137e64f5c33684598726d41cfbb4edf764fe985503b13ff596184ca5fc32b159d500ec092e4cf8838c' + '20b251bfc7bef8dfd232f9b679e907114c575299916164a608e2fe7fab5f30bf7241f25e37ab4194c56b0a21e682b3cea2fd892aab30fa2ce3863ef744f27f18' + '62015578845d7efe55ba1c04ccaaee1beffb597524bc17ec90494d32e53ef792947391bbc2fe2eb8557bc84a5aa470c1f0b1542b1a10aac58719efbaaeba2f87') b2sums=('80024371b3e70521996077fba24e233097a6190477ced1b311cd41fead687dcc2511ac0ef723792488f4af08867dff3e1f474816fda09c1604b89059e31c2514' 'SKIP' - 'af556fdf3c02e37892bfe9afebc954cf2f1b2fa9b75c1caacfa9f3b456ebc02bf078475f9ee30079b3af5d150d41415a947c3d04235c1ea8412cf92b959c484a') + 'af556fdf3c02e37892bfe9afebc954cf2f1b2fa9b75c1caacfa9f3b456ebc02bf078475f9ee30079b3af5d150d41415a947c3d04235c1ea8412cf92b959c484a' + '78e5ebeda69302ad380923fe0e76ef8fc3443ffa29cc3104fe629335c8ceda1b4198cb5c72bdefb0e47c77ea02d2ca7bfb478cbf8731f8ded0e0c7c5d83981ee' + 'd9bea235f6b8d07cff5db4851b7085cde9d90e85929167206c74c641fe2db50dde2ffc6cf4524109275321e4c1be8e5a4bcca8a0dda1570877d2cfbfd847b994') options=(!lto) # https://bugs.kde.org/show_bug.cgi?id=338252 prepare() { cd valgrind-${pkgver} patch -Np1 < ../valgrind-3.7.0-respect-flags.patch + patch -p1 -i "${srcdir}"/valgrind-3.21-la64.patch sed -i 's|sgml/docbook/xsl-stylesheets|xml/docbook/xsl-stylesheets-1.79.2-nons|' docs/Makefile.am autoreconf -ifv @@ -54,6 +60,8 @@ build() { # valgrind does not like some of our flags CPPFLAGS=${CPPFLAGS/-D_FORTIFY_SOURCE=2/} CFLAGS=${CFLAGS/-fno-plt/} + CFLAGS=${CFLAGS/ -O2/} + CXXFLAGS=${CFLAGS/ -O2/} CXXFLAGS=${CXXFLAGS/-fno-plt/} cd valgrind-${pkgver} diff --git a/valgrind/valgrind-3.21-la64.patch b/valgrind/valgrind-3.21-la64.patch new file mode 100644 index 0000000000..aca79dccdf --- /dev/null +++ b/valgrind/valgrind-3.21-la64.patch @@ -0,0 +1,93230 @@ +diff --git a/Makefile.all.am b/Makefile.all.am +index 1de1f13a7..d72410a9d 100755 +--- a/Makefile.all.am ++++ b/Makefile.all.am +@@ -290,6 +290,12 @@ AM_CFLAGS_PSO_MIPS64_LINUX = @FLAG_M64@ $(AM_CFLAGS_BASE) \ + $(AM_CFLAGS_PSO_BASE) + AM_CCASFLAGS_MIPS64_LINUX = @FLAG_M64@ -g + ++AM_FLAG_M3264_LOONGARCH64_LINUX = @FLAG_M64@ ++AM_CFLAGS_LOONGARCH64_LINUX = @FLAG_M64@ $(AM_CFLAGS_BASE) ++AM_CFLAGS_PSO_LOONGARCH64_LINUX = @FLAG_M64@ $(AM_CFLAGS_BASE) \ ++ $(AM_CFLAGS_PSO_BASE) ++AM_CCASFLAGS_LOONGARCH64_LINUX = @FLAG_M64@ -g ++ + AM_FLAG_M3264_X86_SOLARIS = @FLAG_M32@ + AM_CFLAGS_X86_SOLARIS = @FLAG_M32@ @PREFERRED_STACK_BOUNDARY_2@ \ + $(AM_CFLAGS_BASE) -fomit-frame-pointer @SOLARIS_UNDEF_LARGESOURCE@ +@@ -350,6 +356,7 @@ PRELOAD_LDFLAGS_S390X_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@ + PRELOAD_LDFLAGS_MIPS32_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M32@ + PRELOAD_LDFLAGS_NANOMIPS_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M32@ + PRELOAD_LDFLAGS_MIPS64_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@ ++PRELOAD_LDFLAGS_LOONGARCH64_LINUX = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@ + PRELOAD_LDFLAGS_X86_SOLARIS = $(PRELOAD_LDFLAGS_COMMON_SOLARIS) @FLAG_M32@ + PRELOAD_LDFLAGS_AMD64_SOLARIS = $(PRELOAD_LDFLAGS_COMMON_SOLARIS) @FLAG_M64@ + +diff --git a/Makefile.tool.am b/Makefile.tool.am +index df9502913..58a928e95 100644 +--- a/Makefile.tool.am ++++ b/Makefile.tool.am +@@ -99,6 +99,10 @@ TOOL_LDFLAGS_MIPS64_LINUX = \ + -static -nodefaultlibs -nostartfiles -u __start @FLAG_NO_BUILD_ID@ \ + @FLAG_M64@ + ++TOOL_LDFLAGS_LOONGARCH64_LINUX = \ ++ -static -nodefaultlibs -nostartfiles -u __start @FLAG_NO_BUILD_ID@ \ ++ @FLAG_M64@ ++ + TOOL_LDFLAGS_X86_SOLARIS = \ + $(TOOL_LDFLAGS_COMMON_SOLARIS) @FLAG_M32@ + +@@ -167,6 +171,9 @@ LIBREPLACEMALLOC_MIPS32_LINUX = \ + LIBREPLACEMALLOC_MIPS64_LINUX = \ + $(top_builddir)/coregrind/libreplacemalloc_toolpreload-mips64-linux.a + ++LIBREPLACEMALLOC_LOONGARCH64_LINUX = \ ++ $(top_builddir)/coregrind/libreplacemalloc_toolpreload-loongarch64-linux.a ++ + LIBREPLACEMALLOC_X86_SOLARIS = \ + $(top_builddir)/coregrind/libreplacemalloc_toolpreload-x86-solaris.a + +@@ -239,6 +246,11 @@ LIBREPLACEMALLOC_LDFLAGS_MIPS64_LINUX = \ + $(LIBREPLACEMALLOC_MIPS64_LINUX) \ + -Wl,--no-whole-archive + ++LIBREPLACEMALLOC_LDFLAGS_LOONGARCH64_LINUX = \ ++ -Wl,--whole-archive \ ++ $(LIBREPLACEMALLOC_LOONGARCH64_LINUX) \ ++ -Wl,--no-whole-archive ++ + LIBREPLACEMALLOC_LDFLAGS_X86_SOLARIS = \ + -Wl,--whole-archive \ + $(LIBREPLACEMALLOC_X86_SOLARIS) \ +diff --git a/Makefile.vex.am b/Makefile.vex.am +index 98d848359..009d93b45 100644 +--- a/Makefile.vex.am ++++ b/Makefile.vex.am +@@ -26,6 +26,7 @@ pkginclude_HEADERS = \ + pub/libvex_guest_s390x.h \ + pub/libvex_guest_mips32.h \ + pub/libvex_guest_mips64.h \ ++ pub/libvex_guest_loongarch64.h \ + pub/libvex_s390x_common.h \ + pub/libvex_ir.h \ + pub/libvex_trc_values.h \ +@@ -49,6 +50,7 @@ noinst_HEADERS = \ + priv/guest_mips_defs.h \ + priv/mips_defs.h \ + priv/guest_nanomips_defs.h \ ++ priv/guest_loongarch64_defs.h \ + priv/host_generic_regs.h \ + priv/host_generic_simd64.h \ + priv/host_generic_simd128.h \ +@@ -64,7 +66,8 @@ noinst_HEADERS = \ + priv/s390_defs.h \ + priv/host_mips_defs.h \ + priv/host_nanomips_defs.h \ +- priv/common_nanomips_defs.h ++ priv/common_nanomips_defs.h \ ++ priv/host_loongarch64_defs.h + + BUILT_SOURCES = pub/libvex_guest_offsets.h + CLEANFILES = pub/libvex_guest_offsets.h +@@ -93,7 +96,8 @@ pub/libvex_guest_offsets.h: auxprogs/genoffsets.c \ + pub/libvex_guest_arm64.h \ + pub/libvex_guest_s390x.h \ + pub/libvex_guest_mips32.h \ +- pub/libvex_guest_mips64.h ++ pub/libvex_guest_mips64.h \ ++ pub/libvex_guest_loongarch64.h + rm -f auxprogs/genoffsets.s + $(mkdir_p) auxprogs pub + $(CC) $(CFLAGS_FOR_GENOFFSETS) \ +@@ -151,6 +155,8 @@ LIBVEX_SOURCES_COMMON = \ + priv/guest_mips_toIR.c \ + priv/guest_nanomips_helpers.c \ + priv/guest_nanomips_toIR.c \ ++ priv/guest_loongarch64_helpers.c \ ++ priv/guest_loongarch64_toIR.c \ + priv/host_generic_regs.c \ + priv/host_generic_simd64.c \ + priv/host_generic_simd128.c \ +@@ -174,7 +180,9 @@ LIBVEX_SOURCES_COMMON = \ + priv/host_mips_defs.c \ + priv/host_nanomips_defs.c \ + priv/host_mips_isel.c \ +- priv/host_nanomips_isel.c ++ priv/host_nanomips_isel.c \ ++ priv/host_loongarch64_defs.c \ ++ priv/host_loongarch64_isel.c + + LIBVEXMULTIARCH_SOURCES = priv/multiarch_main_main.c + +diff --git a/README b/README +index 842388036..1a88d1d47 100644 +--- a/README ++++ b/README +@@ -41,6 +41,7 @@ platforms: + - MIPS32/Linux + - MIPS64/Linux + - nanoMIPS/Linux ++- LOONGARCH64/Linux + - X86/Solaris + - AMD64/Solaris + - X86/FreeBSD +diff --git a/README.loongarch64 b/README.loongarch64 +new file mode 100644 +index 000000000..2f71484bf +--- /dev/null ++++ b/README.loongarch64 +@@ -0,0 +1,46 @@ ++Status ++~~~~~~ ++ ++A port to LoongArch64 Linux platform. ++ ++Some new IROps: ++ ++* Iop_ScaleBF64 ++* Iop_ScaleBF32 ++* Iop_RSqrtF64 ++* Iop_RSqrtF32 ++* Iop_LogBF64 ++* Iop_LogBF32 ++* Iop_MaxNumAbsF64 ++* Iop_MinNumAbsF64 ++* Iop_MaxNumF32 ++* Iop_MinNumF32 ++* Iop_MaxNumAbsF32 ++* Iop_MinNumAbsF32 ++ ++A new IRMBusEvent: ++ ++* Imbe_InsnFence ++ ++A new IRJumpKind: ++ ++* Ijk_SigSYS ++ ++ ++Limitations ++~~~~~~~~~~~ ++ ++* Only support basic integer instructions and floating-point instructions. ++* Only support fallback LLSC implementation. ++ ++ ++Reading Material ++~~~~~~~~~~~~~~~~ ++ ++* LoongArch Reference Manual ++ https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html ++* LoongArch ELF ABI specification: ++ https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html ++* LoongArch Toolchain Conventions: ++ https://loongson.github.io/LoongArch-Documentation/LoongArch-toolchain-conventions-EN.html ++ +diff --git a/README.md b/README.md +new file mode 100644 +index 000000000..e2f412092 +--- /dev/null ++++ b/README.md +@@ -0,0 +1,8 @@ ++# Valgrind – LOONGARCH64/Linux with vector support ++ ++This branch (`loongarch64-linux-vector`) contains Valgrind with support for the LOONGARCH64/Linux platform, specifically focusing on vector support. ++ ++The `loongarch64-linux` branch has been tested with Glibc version 2.37. ++It's important to be aware that starting from Glibc version 2.38, vector support is required. ++ ++**Note: This branch is under development and is not stable.** +diff --git a/VEX/auxprogs/genoffsets.c b/VEX/auxprogs/genoffsets.c +index 54376dc90..89edf524c 100644 +--- a/VEX/auxprogs/genoffsets.c ++++ b/VEX/auxprogs/genoffsets.c +@@ -53,6 +53,7 @@ + #include "../pub/libvex_guest_s390x.h" + #include "../pub/libvex_guest_mips32.h" + #include "../pub/libvex_guest_mips64.h" ++#include "../pub/libvex_guest_loongarch64.h" + + #define VG_STRINGIFZ(__str) #__str + #define VG_STRINGIFY(__str) VG_STRINGIFZ(__str) +@@ -262,6 +263,41 @@ void foo ( void ) + GENOFFSET(MIPS64,mips64,PC); + GENOFFSET(MIPS64,mips64,HI); + GENOFFSET(MIPS64,mips64,LO); ++ ++ // LOONGARCH64 ++ GENOFFSET(LOONGARCH64,loongarch64,R0); ++ GENOFFSET(LOONGARCH64,loongarch64,R1); ++ GENOFFSET(LOONGARCH64,loongarch64,R2); ++ GENOFFSET(LOONGARCH64,loongarch64,R3); ++ GENOFFSET(LOONGARCH64,loongarch64,R4); ++ GENOFFSET(LOONGARCH64,loongarch64,R5); ++ GENOFFSET(LOONGARCH64,loongarch64,R6); ++ GENOFFSET(LOONGARCH64,loongarch64,R7); ++ GENOFFSET(LOONGARCH64,loongarch64,R8); ++ GENOFFSET(LOONGARCH64,loongarch64,R9); ++ GENOFFSET(LOONGARCH64,loongarch64,R10); ++ GENOFFSET(LOONGARCH64,loongarch64,R11); ++ GENOFFSET(LOONGARCH64,loongarch64,R12); ++ GENOFFSET(LOONGARCH64,loongarch64,R13); ++ GENOFFSET(LOONGARCH64,loongarch64,R14); ++ GENOFFSET(LOONGARCH64,loongarch64,R15); ++ GENOFFSET(LOONGARCH64,loongarch64,R16); ++ GENOFFSET(LOONGARCH64,loongarch64,R17); ++ GENOFFSET(LOONGARCH64,loongarch64,R18); ++ GENOFFSET(LOONGARCH64,loongarch64,R19); ++ GENOFFSET(LOONGARCH64,loongarch64,R20); ++ GENOFFSET(LOONGARCH64,loongarch64,R21); ++ GENOFFSET(LOONGARCH64,loongarch64,R22); ++ GENOFFSET(LOONGARCH64,loongarch64,R23); ++ GENOFFSET(LOONGARCH64,loongarch64,R24); ++ GENOFFSET(LOONGARCH64,loongarch64,R25); ++ GENOFFSET(LOONGARCH64,loongarch64,R26); ++ GENOFFSET(LOONGARCH64,loongarch64,R27); ++ GENOFFSET(LOONGARCH64,loongarch64,R28); ++ GENOFFSET(LOONGARCH64,loongarch64,R29); ++ GENOFFSET(LOONGARCH64,loongarch64,R30); ++ GENOFFSET(LOONGARCH64,loongarch64,R31); ++ GENOFFSET(LOONGARCH64,loongarch64,PC); + } + + /*--------------------------------------------------------------------*/ +diff --git a/VEX/priv/guest_loongarch64_defs.h b/VEX/priv/guest_loongarch64_defs.h +new file mode 100644 +index 000000000..867d85981 +--- /dev/null ++++ b/VEX/priv/guest_loongarch64_defs.h +@@ -0,0 +1,130 @@ ++ ++/*---------------------------------------------------------------*/ ++/*--- begin guest_loongarch64_defs.h ---*/ ++/*---------------------------------------------------------------*/ ++ ++/* ++ This file is part of Valgrind, a dynamic binary instrumentation ++ framework. ++ ++ Copyright (C) 2021-2022 Loongson Technology Corporation Limited ++ ++ This program is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License as ++ published by the Free Software Foundation; either version 2 of the ++ License, or (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, see . ++ ++ The GNU General Public License is contained in the file COPYING. ++*/ ++ ++/* Only to be used within the guest-loongarch64 directory. */ ++ ++#ifndef __VEX_GUEST_LOONGARCH64_DEFS_H ++#define __VEX_GUEST_LOONGARCH64_DEFS_H ++ ++#include "libvex_basictypes.h" ++#include "guest_generic_bb_to_IR.h" /* DisResult */ ++ ++ ++/*---------------------------------------------------------*/ ++/*--- loongarch64 to IR conversion ---*/ ++/*---------------------------------------------------------*/ ++ ++/* Convert one LOONGARCH64 insn to IR. See the type DisOneInstrFn in ++ guest_generic_bb_to_IR.h. */ ++extern DisResult disInstr_LOONGARCH64 ( IRSB* irsb_IN, ++ const UChar* guest_code_IN, ++ Long delta, ++ Addr guest_IP, ++ VexArch guest_arch, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo, ++ VexEndness host_endness_IN, ++ Bool sigill_diag_IN ); ++ ++/* Used by the optimiser to specialise calls to helpers. */ ++extern IRExpr* guest_loongarch64_spechelper ( const HChar* function_name, ++ IRExpr** args, ++ IRStmt** precedingStmts, ++ Int n_precedingStmts ); ++ ++/* Describes to the optimser which part of the guest state require ++ precise memory exceptions. This is logically part of the guest ++ state description. */ ++extern Bool guest_loongarch64_state_requires_precise_mem_exns ( Int minoff, ++ Int maxoff, ++ VexRegisterUpdates pxControl ); ++ ++extern VexGuestLayout loongarch64Guest_layout; ++ ++ ++/*---------------------------------------------------------*/ ++/*--- loongarch64 guest helpers ---*/ ++/*---------------------------------------------------------*/ ++ ++enum fpop { ++ FADD_S, FADD_D, FSUB_S, FSUB_D, ++ FMUL_S, FMUL_D, FDIV_S, FDIV_D, ++ FMADD_S, FMADD_D, FMSUB_S, FMSUB_D, ++ FNMADD_S, FNMADD_D, FNMSUB_S, FNMSUB_D, ++ FMAX_S, FMAX_D, FMIN_S, FMIN_D, ++ FMAXA_S, FMAXA_D, FMINA_S, FMINA_D, ++ FABS_S, FABS_D, FNEG_S, FNEG_D, ++ FSQRT_S, FSQRT_D, ++ FRECIP_S, FRECIP_D, ++ FRSQRT_S, FRSQRT_D, ++ FSCALEB_S, FSCALEB_D, ++ FLOGB_S, FLOGB_D, ++ FCMP_CAF_S, FCMP_CAF_D, FCMP_SAF_S, FCMP_SAF_D, ++ FCMP_CLT_S, FCMP_CLT_D, FCMP_SLT_S, FCMP_SLT_D, ++ FCMP_CEQ_S, FCMP_CEQ_D, FCMP_SEQ_S, FCMP_SEQ_D, ++ FCMP_CLE_S, FCMP_CLE_D, FCMP_SLE_S, FCMP_SLE_D, ++ FCMP_CUN_S, FCMP_CUN_D, FCMP_SUN_S, FCMP_SUN_D, ++ FCMP_CULT_S, FCMP_CULT_D, FCMP_SULT_S, FCMP_SULT_D, ++ FCMP_CUEQ_S, FCMP_CUEQ_D, FCMP_SUEQ_S, FCMP_SUEQ_D, ++ FCMP_CULE_S, FCMP_CULE_D, FCMP_SULE_S, FCMP_SULE_D, ++ FCMP_CNE_S, FCMP_CNE_D, FCMP_SNE_S, FCMP_SNE_D, ++ FCMP_COR_S, FCMP_COR_D, FCMP_SOR_S, FCMP_SOR_D, ++ FCMP_CUNE_S, FCMP_CUNE_D, FCMP_SUNE_S, FCMP_SUNE_D, ++ FCVT_S_D, FCVT_D_S, ++ FTINTRM_W_S, FTINTRM_W_D, FTINTRM_L_S, FTINTRM_L_D, ++ FTINTRP_W_S, FTINTRP_W_D, FTINTRP_L_S, FTINTRP_L_D, ++ FTINTRZ_W_S, FTINTRZ_W_D, FTINTRZ_L_S, FTINTRZ_L_D, ++ FTINTRNE_W_S, FTINTRNE_W_D, FTINTRNE_L_S, FTINTRNE_L_D, ++ FTINT_W_S, FTINT_W_D, FTINT_L_S, FTINT_L_D, ++ FFINT_S_W, FFINT_D_W, FFINT_S_L, FFINT_D_L, ++ FRINT_S, FRINT_D ++}; ++ ++extern ULong loongarch64_calculate_cpucfg ( ULong src ); ++extern ULong loongarch64_calculate_revb_2h ( ULong src ); ++extern ULong loongarch64_calculate_revb_4h ( ULong src ); ++extern ULong loongarch64_calculate_revb_2w ( ULong src ); ++extern ULong loongarch64_calculate_revb_d ( ULong src ); ++extern ULong loongarch64_calculate_revh_2w ( ULong src ); ++extern ULong loongarch64_calculate_revh_d ( ULong src ); ++extern ULong loongarch64_calculate_bitrev_4b ( ULong src ); ++extern ULong loongarch64_calculate_bitrev_8b ( ULong src ); ++extern ULong loongarch64_calculate_bitrev_w ( ULong src ); ++extern ULong loongarch64_calculate_bitrev_d ( ULong src ); ++extern ULong loongarch64_calculate_crc ( ULong old, ULong msg, ULong len ); ++extern ULong loongarch64_calculate_crcc ( ULong old, ULong msg, ULong len ); ++extern ULong loongarch64_calculate_fclass_s ( ULong src ); ++extern ULong loongarch64_calculate_fclass_d ( ULong src ); ++extern ULong loongarch64_calculate_FCSR ( enum fpop op, ULong src1, ++ ULong src2, ULong src3 ); ++ ++#endif /* ndef __VEX_GUEST_LOONGARCH64_DEFS_H */ ++ ++ ++/*---------------------------------------------------------------*/ ++/*--- end guest_loongarch64_defs.h ---*/ ++/*---------------------------------------------------------------*/ +diff --git a/VEX/priv/guest_loongarch64_helpers.c b/VEX/priv/guest_loongarch64_helpers.c +new file mode 100644 +index 000000000..737ef7c1a +--- /dev/null ++++ b/VEX/priv/guest_loongarch64_helpers.c +@@ -0,0 +1,874 @@ ++ ++/*---------------------------------------------------------------*/ ++/*--- begin guest_loongarch64_helpers.c ---*/ ++/*---------------------------------------------------------------*/ ++ ++/* ++ This file is part of Valgrind, a dynamic binary instrumentation ++ framework. ++ ++ Copyright (C) 2021-2022 Loongson Technology Corporation Limited ++ ++ This program is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License as ++ published by the Free Software Foundation; either version 2 of the ++ License, or (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, see . ++ ++ The GNU General Public License is contained in the file COPYING. ++*/ ++ ++#include "libvex_basictypes.h" ++#include "libvex_emnote.h" ++#include "libvex_guest_loongarch64.h" ++#include "libvex_ir.h" ++#include "libvex.h" ++ ++#include "main_util.h" ++#include "main_globals.h" ++#include "guest_generic_bb_to_IR.h" ++#include "guest_loongarch64_defs.h" ++ ++ ++/* This file contains helper functions for loongarch64 guest code. ++ Calls to these functions are generated by the back end. */ ++ ++IRExpr* guest_loongarch64_spechelper ( const HChar * function_name, ++ IRExpr ** args, ++ IRStmt ** precedingStmts, ++ Int n_precedingStmts ) ++{ ++ return NULL; ++} ++ ++/* VISIBLE TO LIBVEX CLIENT */ ++void LibVEX_GuestLOONGARCH64_initialise ( /*OUT*/ ++ VexGuestLOONGARCH64State* vex_state ) ++{ ++ Int i; ++ ++ /* Event check fail addr and counter. */ ++ vex_state->host_EvC_FAILADDR = 0; ++ vex_state->host_EvC_COUNTER = 0; ++ ++ /* CPU Registers */ ++ vex_state->guest_R0 = 0; /* Constant zero */ ++ vex_state->guest_R1 = 0; /* Return address */ ++ vex_state->guest_R2 = 0; /* Thread pointer */ ++ vex_state->guest_R3 = 0; /* Stack pointer */ ++ vex_state->guest_R4 = 0; /* Argument registers / Return value */ ++ vex_state->guest_R5 = 0; ++ vex_state->guest_R6 = 0; /* Argument registers */ ++ vex_state->guest_R7 = 0; ++ vex_state->guest_R8 = 0; ++ vex_state->guest_R9 = 0; ++ vex_state->guest_R10 = 0; ++ vex_state->guest_R11 = 0; ++ vex_state->guest_R12 = 0; /* Temporary registers */ ++ vex_state->guest_R13 = 0; ++ vex_state->guest_R14 = 0; ++ vex_state->guest_R15 = 0; ++ vex_state->guest_R16 = 0; ++ vex_state->guest_R17 = 0; ++ vex_state->guest_R18 = 0; ++ vex_state->guest_R19 = 0; ++ vex_state->guest_R20 = 0; ++ vex_state->guest_R21 = 0; /* Reserved */ ++ vex_state->guest_R22 = 0; /* Frame pointer / Static register */ ++ vex_state->guest_R23 = 0; /* Static registers */ ++ vex_state->guest_R24 = 0; ++ vex_state->guest_R25 = 0; ++ vex_state->guest_R26 = 0; ++ vex_state->guest_R27 = 0; ++ vex_state->guest_R28 = 0; ++ vex_state->guest_R29 = 0; ++ vex_state->guest_R30 = 0; ++ vex_state->guest_R31 = 0; ++ ++ vex_state->guest_PC = 0; /* Program counter */ ++ ++ /* FPU/SIMD Registers */ ++ for (i = 0; i < 8; i++) { ++ vex_state->guest_X0[i] = 0xff; ++ vex_state->guest_X1[i] = 0xff; ++ vex_state->guest_X2[i] = 0xff; ++ vex_state->guest_X3[i] = 0xff; ++ vex_state->guest_X4[i] = 0xff; ++ vex_state->guest_X5[i] = 0xff; ++ vex_state->guest_X6[i] = 0xff; ++ vex_state->guest_X7[i] = 0xff; ++ vex_state->guest_X8[i] = 0xff; ++ vex_state->guest_X9[i] = 0xff; ++ vex_state->guest_X10[i] = 0xff; ++ vex_state->guest_X11[i] = 0xff; ++ vex_state->guest_X12[i] = 0xff; ++ vex_state->guest_X13[i] = 0xff; ++ vex_state->guest_X14[i] = 0xff; ++ vex_state->guest_X15[i] = 0xff; ++ vex_state->guest_X16[i] = 0xff; ++ vex_state->guest_X17[i] = 0xff; ++ vex_state->guest_X18[i] = 0xff; ++ vex_state->guest_X19[i] = 0xff; ++ vex_state->guest_X20[i] = 0xff; ++ vex_state->guest_X21[i] = 0xff; ++ vex_state->guest_X22[i] = 0xff; ++ vex_state->guest_X23[i] = 0xff; ++ vex_state->guest_X24[i] = 0xff; ++ vex_state->guest_X25[i] = 0xff; ++ vex_state->guest_X26[i] = 0xff; ++ vex_state->guest_X27[i] = 0xff; ++ vex_state->guest_X28[i] = 0xff; ++ vex_state->guest_X29[i] = 0xff; ++ vex_state->guest_X30[i] = 0xff; ++ vex_state->guest_X31[i] = 0xff; ++ } ++ ++ vex_state->guest_FCC0 = 0; /* Condition Flag Registers */ ++ vex_state->guest_FCC1 = 0; ++ vex_state->guest_FCC2 = 0; ++ vex_state->guest_FCC3 = 0; ++ vex_state->guest_FCC4 = 0; ++ vex_state->guest_FCC5 = 0; ++ vex_state->guest_FCC6 = 0; ++ vex_state->guest_FCC7 = 0; ++ vex_state->guest_FCSR = 0; /* FP Control and Status Register */ ++ ++ /* Various pseudo-regs mandated by Vex or Valgrind. */ ++ /* Emulation notes */ ++ vex_state->guest_EMNOTE = 0; ++ ++ /* For clflush: record start and length of area to invalidate */ ++ vex_state->guest_CMSTART = 0; ++ vex_state->guest_CMLEN = 0; ++ ++ /* Used to record the unredirected guest address at the start of ++ a translation whose start has been redirected. By reading ++ this pseudo-register shortly afterwards, the translation can ++ find out what the corresponding no-redirection address was. ++ Note, this is only set for wrap-style redirects, not for ++ replace-style ones. */ ++ vex_state->guest_NRADDR = 0; ++} ++ ++ ++/*-----------------------------------------------------------*/ ++/*--- Describing the loongarch64 guest state, for the ---*/ ++/*--- benefit of iropt and instrumenters ---*/ ++/*-----------------------------------------------------------*/ ++ ++/* Figure out if any part of the guest state contained in minoff ++ .. maxoff requires precise memory exceptions. If in doubt return ++ True (but this generates significantly slower code). ++ ++ We enforce precise exns for guest SP, PC and FP. ++ ++ Only SP is needed in mode VexRegUpdSpAtMemAccess. ++*/ ++ ++Bool guest_loongarch64_state_requires_precise_mem_exns ( Int minoff, ++ Int maxoff, ++ VexRegisterUpdates pxControl ) ++{ ++ Int sp_min = offsetof(VexGuestLOONGARCH64State, guest_R3); ++ Int sp_max = sp_min + 8 - 1; ++ if ( maxoff < sp_min || minoff > sp_max ) { ++ /* no overlap with sp */ ++ if (pxControl == VexRegUpdSpAtMemAccess) ++ return False; /* We only need to check stack pointer. */ ++ } else { ++ return True; ++ } ++ ++ Int pc_min = offsetof(VexGuestLOONGARCH64State, guest_PC); ++ Int pc_max = pc_min + 8 - 1; ++ if ( maxoff < pc_min || minoff > pc_max ) { ++ /* no overlap with pc */ ++ } else { ++ return True; ++ } ++ ++ Int fp_min = offsetof(VexGuestLOONGARCH64State, guest_R22); ++ Int fp_max = fp_min + 8 - 1; ++ if ( maxoff < fp_min || minoff > fp_max ) { ++ /* no overlap with fp */ ++ } else { ++ return True; ++ } ++ ++ return False; ++} ++ ++#define ALWAYSDEFD64(field) \ ++ { offsetof(VexGuestLOONGARCH64State, field), \ ++ (sizeof ((VexGuestLOONGARCH64State*)0)->field) } ++ ++VexGuestLayout loongarch64Guest_layout = { ++ /* Total size of the guest state, in bytes. */ ++ .total_sizeB = sizeof(VexGuestLOONGARCH64State), ++ /* Describe the stack pointer. */ ++ .offset_SP = offsetof(VexGuestLOONGARCH64State, guest_R3), ++ .sizeof_SP = 8, ++ /* Describe the frame pointer. */ ++ .offset_FP = offsetof(VexGuestLOONGARCH64State, guest_R22), ++ .sizeof_FP = 8, ++ /* Describe the instruction pointer. */ ++ .offset_IP = offsetof(VexGuestLOONGARCH64State, guest_PC), ++ .sizeof_IP = 8, ++ /* Describe any sections to be regarded by Memcheck as ++ 'always-defined'. */ ++ .n_alwaysDefd = 6, ++ /* ? :( */ ++ .alwaysDefd = { ++ /* 0 */ ALWAYSDEFD64(guest_R0), ++ /* 1 */ ALWAYSDEFD64(guest_PC), ++ /* 2 */ ALWAYSDEFD64(guest_EMNOTE), ++ /* 3 */ ALWAYSDEFD64(guest_CMSTART), ++ /* 4 */ ALWAYSDEFD64(guest_CMLEN), ++ /* 5 */ ALWAYSDEFD64(guest_NRADDR), ++ } ++}; ++ ++ ++/*-----------------------------------------------------------*/ ++/*--- loongarch64 guest helpers ---*/ ++/*-----------------------------------------------------------*/ ++ ++/* Claim to be the following CPU, which is probably representative of ++ the earliest loongarch64 offerings. ++ ++ CPU Family : Loongson-64bit ++ Model Name : Loongson-3A5000LL ++ CPU Revision : 0x10 ++ FPU Revision : 0x00 ++ CPU MHz : 2300.00 ++ BogoMIPS : 4600.00 ++ TLB Entries : 2112 ++ Address Sizes : 48 bits physical, 48 bits virtual ++ ISA : loongarch32 loongarch64 ++ Features : cpucfg lam ual fpu lsx lasx complex crypto lvz ++ Hardware Watchpoint : yes, iwatch count: 8, dwatch count: 8 ++*/ ++ULong loongarch64_calculate_cpucfg ( ULong src ) ++{ ++ ULong res; ++ switch (src) { ++ case 0x0: ++ res = 0x0014c010; ++ break; ++ case 0x1: ++ res = 0x03f2f2fe; ++ break; ++ case 0x2: ++ res = 0x007ccfc7; ++ break; ++ case 0x3: ++ res = 0x0000fcff; ++ break; ++ case 0x4: ++ res = 0x05f5e100; ++ break; ++ case 0x5: ++ res = 0x00010001; ++ break; ++ case 0x6: ++ res = 0x00007f33; ++ break; ++ case 0x10: ++ res = 0x00002c3d; ++ break; ++ case 0x11: ++ res = 0x06080003; ++ break; ++ case 0x12: ++ res = 0x06080003; ++ break; ++ case 0x13: ++ res = 0x0608000f; ++ break; ++ case 0x14: ++ res = 0x060e000f; ++ break; ++ default: ++ res = 0x00000000; ++ break; ++ } ++ return (ULong)(Long)(Int)res; ++} ++ ++static void swap_UChar ( UChar* a, UChar* b ) ++{ ++ UChar t = *a; ++ *a = *b; ++ *b = t; ++} ++ ++ULong loongarch64_calculate_revb_2h ( ULong src ) ++{ ++ UChar* s = (UChar*)&src; ++ swap_UChar(&s[0], &s[1]); ++ swap_UChar(&s[2], &s[3]); ++ return (ULong)(Long)(Int)src; ++} ++ ++ULong loongarch64_calculate_revb_4h ( ULong src ) ++{ ++ UChar* s = (UChar*)&src; ++ swap_UChar(&s[0], &s[1]); ++ swap_UChar(&s[2], &s[3]); ++ swap_UChar(&s[4], &s[5]); ++ swap_UChar(&s[6], &s[7]); ++ return src; ++} ++ ++ULong loongarch64_calculate_revb_2w ( ULong src ) ++{ ++ UChar* s = (UChar*)&src; ++ swap_UChar(&s[0], &s[3]); ++ swap_UChar(&s[1], &s[2]); ++ swap_UChar(&s[4], &s[7]); ++ swap_UChar(&s[5], &s[6]); ++ return src; ++} ++ ++ULong loongarch64_calculate_revb_d ( ULong src ) ++{ ++ UChar* s = (UChar*)&src; ++ swap_UChar(&s[0], &s[7]); ++ swap_UChar(&s[1], &s[6]); ++ swap_UChar(&s[2], &s[5]); ++ swap_UChar(&s[3], &s[4]); ++ return src; ++} ++ ++static void swap_UShort ( UShort* a, UShort* b ) ++{ ++ UShort t = *a; ++ *a = *b; ++ *b = t; ++} ++ ++ULong loongarch64_calculate_revh_2w ( ULong src ) ++{ ++ UShort* s = (UShort*)&src; ++ swap_UShort(&s[0], &s[1]); ++ swap_UShort(&s[2], &s[3]); ++ return src; ++} ++ ++ULong loongarch64_calculate_revh_d ( ULong src ) ++{ ++ UShort* s = (UShort*)&src; ++ swap_UShort(&s[0], &s[3]); ++ swap_UShort(&s[1], &s[2]); ++ return src; ++} ++ ++static ULong bitrev ( ULong src, ULong start, ULong end ) ++{ ++ int i, j; ++ ULong res = 0; ++ for (i = start, j = 1; i < end; i++, j++) ++ res |= ((src >> i) & 1) << (end - j); ++ return res; ++} ++ ++ULong loongarch64_calculate_bitrev_4b ( ULong src ) ++{ ++ ULong res = bitrev(src, 0, 8); ++ res |= bitrev(src, 8, 16); ++ res |= bitrev(src, 16, 24); ++ res |= bitrev(src, 24, 32); ++ return (ULong)(Long)(Int)res; ++} ++ ++ULong loongarch64_calculate_bitrev_8b ( ULong src ) ++{ ++ ULong res = bitrev(src, 0, 8); ++ res |= bitrev(src, 8, 16); ++ res |= bitrev(src, 16, 24); ++ res |= bitrev(src, 24, 32); ++ res |= bitrev(src, 32, 40); ++ res |= bitrev(src, 40, 48); ++ res |= bitrev(src, 48, 56); ++ res |= bitrev(src, 56, 64); ++ return res; ++} ++ ++ULong loongarch64_calculate_bitrev_w ( ULong src ) ++{ ++ ULong res = bitrev(src, 0, 32); ++ return (ULong)(Long)(Int)res; ++} ++ ++ULong loongarch64_calculate_bitrev_d ( ULong src ) ++{ ++ return bitrev(src, 0, 64); ++} ++ ++static ULong crc32 ( ULong old, ULong msg, ULong width, ULong poly ) ++{ ++ int i; ++ ULong new; ++ if (width == 8) ++ msg &= 0xff; ++ else if (width == 16) ++ msg &= 0xffff; ++ else if (width == 32) ++ msg &= 0xffffffff; ++ new = (old & 0xffffffff) ^ msg; ++ for (i = 0; i < width; i++) { ++ if (new & 1) ++ new = (new >> 1) ^ poly; ++ else ++ new >>= 1; ++ } ++ return new; ++} ++ ++ULong loongarch64_calculate_crc ( ULong old, ULong msg, ULong len ) ++{ ++ ULong res = crc32(old, msg, len, 0xedb88320); ++ return (ULong)(Long)(Int)res; ++} ++ ++ULong loongarch64_calculate_crcc ( ULong old, ULong msg, ULong len ) ++{ ++ ULong res = crc32(old, msg, len, 0x82f63b78); ++ return (ULong)(Long)(Int)res; ++} ++ ++ULong loongarch64_calculate_fclass_s ( ULong src ) ++{ ++ UInt f = src; ++ Bool sign = toBool(f >> 31); ++ if ((f & 0x7fffffff) == 0x7f800000) { ++ return sign ? 1 << 2 : 1 << 6; ++ } else if ((f & 0x7fffffff) == 0) { ++ return sign ? 1 << 5 : 1 << 9; ++ } else if ((f & 0x7f800000) == 0) { ++ return sign ? 1 << 4 : 1 << 8; ++ } else if ((f & ~(1 << 31)) > 0x7f800000) { ++ return ((UInt)(f << 1) >= 0xff800000) ? 1 << 1 : 1 << 0; ++ } else { ++ return sign ? 1 << 3 : 1 << 7; ++ } ++} ++ ++ULong loongarch64_calculate_fclass_d ( ULong src ) ++{ ++ ULong f = src; ++ Bool sign = toBool(f >> 63); ++ if ((f & 0x7fffffffffffffffULL) == 0x7ff0000000000000ULL) { ++ return sign ? 1 << 2 : 1 << 6; ++ } else if ((f & 0x7fffffffffffffffULL) == 0) { ++ return sign ? 1 << 5 : 1 << 9; ++ } else if ((f & 0x7ff0000000000000ULL) == 0) { ++ return sign ? 1 << 4 : 1 << 8; ++ } else if ((f & ~(1ULL << 63)) > 0x7ff0000000000000ULL) { ++ return ((f << 1) >= 0xfff0000000000000ULL) ? 1 << 1 : 1 << 0; ++ } else { ++ return sign ? 1 << 3 : 1 << 7; ++ } ++} ++ ++#if defined(__loongarch__) ++#define ASM_VOLATILE_UNARY(inst) \ ++ __asm__ volatile("movfcsr2gr $s0, $r0 \n\t" \ ++ "movgr2fcsr $r2, $zero \n\t" \ ++ #inst" $f24, %1 \n\t" \ ++ "movfcsr2gr %0, $r2 \n\t" \ ++ "movgr2fcsr $r0, $s0 \n\t" \ ++ : "=r" (fcsr2) \ ++ : "f" (src1) \ ++ : "$s0", "$f24" \ ++ ) ++ ++#define ASM_VOLATILE_BINARY(inst) \ ++ __asm__ volatile("movfcsr2gr $s0, $r0 \n\t" \ ++ "movgr2fcsr $r2, $zero \n\t" \ ++ #inst" $f24, %1, %2 \n\t" \ ++ "movfcsr2gr %0, $r2 \n\t" \ ++ "movgr2fcsr $r0, $s0 \n\t" \ ++ : "=r" (fcsr2) \ ++ : "f" (src1), "f" (src2) \ ++ : "$s0", "$f24" \ ++ ) ++ ++#define ASM_VOLATILE_TRINARY(inst) \ ++ __asm__ volatile("movfcsr2gr $s0, $r0 \n\t" \ ++ "movgr2fcsr $r2, $zero \n\t" \ ++ #inst" $f24, %1, %2, %3 \n\t" \ ++ "movfcsr2gr %0, $r2 \n\t" \ ++ "movgr2fcsr $r0, $s0 \n\t" \ ++ : "=r" (fcsr2) \ ++ : "f" (src1), "f" (src2), "f" (src3) \ ++ : "$s0", "$f24" \ ++ ) ++ ++#define ASM_VOLATILE_FCMP(inst) \ ++ __asm__ volatile("movfcsr2gr $s0, $r0 \n\t" \ ++ "movgr2fcsr $r2, $zero \n\t" \ ++ #inst" $fcc0, %1, %2 \n\t" \ ++ "movfcsr2gr %0, $r0 \n\t" \ ++ "movgr2fcsr $r0, $s0 \n\t" \ ++ : "=r" (fcsr2) \ ++ : "f" (src1), "f" (src2) \ ++ : "$s0", "$fcc0" \ ++ ) ++#endif ++ ++/* Calculate FCSR and return whether an exception needs to be thrown */ ++ULong loongarch64_calculate_FCSR ( enum fpop op, ULong src1, ++ ULong src2, ULong src3 ) ++{ ++ UInt fcsr2 = 0; ++#if defined(__loongarch__) ++ switch (op) { ++ case FADD_S: ++ ASM_VOLATILE_BINARY(fadd.s); ++ break; ++ case FADD_D: ++ ASM_VOLATILE_BINARY(fadd.d); ++ break; ++ case FSUB_S: ++ ASM_VOLATILE_BINARY(fsub.s); ++ break; ++ case FSUB_D: ++ ASM_VOLATILE_BINARY(fsub.d); ++ break; ++ case FMUL_S: ++ ASM_VOLATILE_BINARY(fmul.s); ++ break; ++ case FMUL_D: ++ ASM_VOLATILE_BINARY(fmul.d); ++ break; ++ case FDIV_S: ++ ASM_VOLATILE_BINARY(fdiv.s); ++ break; ++ case FDIV_D: ++ ASM_VOLATILE_BINARY(fdiv.d); ++ break; ++ case FMADD_S: ++ ASM_VOLATILE_TRINARY(fmadd.s); ++ break; ++ case FMADD_D: ++ ASM_VOLATILE_TRINARY(fmadd.d); ++ break; ++ case FMSUB_S: ++ ASM_VOLATILE_TRINARY(fmsub.s); ++ break; ++ case FMSUB_D: ++ ASM_VOLATILE_TRINARY(fmsub.d); ++ break; ++ case FNMADD_S: ++ ASM_VOLATILE_TRINARY(fnmadd.s); ++ break; ++ case FNMADD_D: ++ ASM_VOLATILE_TRINARY(fnmadd.d); ++ break; ++ case FNMSUB_S: ++ ASM_VOLATILE_TRINARY(fnmsub.s); ++ break; ++ case FNMSUB_D: ++ ASM_VOLATILE_TRINARY(fnmsub.s); ++ break; ++ case FMAX_S: ++ ASM_VOLATILE_BINARY(fmax.s); ++ break; ++ case FMAX_D: ++ ASM_VOLATILE_BINARY(fmax.d); ++ break; ++ case FMIN_S: ++ ASM_VOLATILE_BINARY(fmin.s); ++ break; ++ case FMIN_D: ++ ASM_VOLATILE_BINARY(fmin.d); ++ break; ++ case FMAXA_S: ++ ASM_VOLATILE_BINARY(fmaxa.s); ++ break; ++ case FMAXA_D: ++ ASM_VOLATILE_BINARY(fmaxa.d); ++ break; ++ case FMINA_S: ++ ASM_VOLATILE_BINARY(fmina.s); ++ break; ++ case FMINA_D: ++ ASM_VOLATILE_BINARY(fmina.s); ++ break; ++ case FABS_S: ++ ASM_VOLATILE_UNARY(fabs.s); ++ break; ++ case FABS_D: ++ ASM_VOLATILE_UNARY(fabs.d); ++ break; ++ case FNEG_S: ++ ASM_VOLATILE_UNARY(fneg.s); ++ break; ++ case FNEG_D: ++ ASM_VOLATILE_UNARY(fneg.d); ++ break; ++ case FSQRT_S: ++ ASM_VOLATILE_UNARY(fsqrt.s); ++ break; ++ case FSQRT_D: ++ ASM_VOLATILE_UNARY(fsqrt.d); ++ break; ++ case FRECIP_S: ++ ASM_VOLATILE_UNARY(frecip.s); ++ break; ++ case FRECIP_D: ++ ASM_VOLATILE_UNARY(frecip.d); ++ break; ++ case FRSQRT_S: ++ ASM_VOLATILE_UNARY(frsqrt.s); ++ break; ++ case FRSQRT_D: ++ ASM_VOLATILE_UNARY(frsqrt.d); ++ break; ++ case FSCALEB_S: ++ ASM_VOLATILE_BINARY(fscaleb.s); ++ break; ++ case FSCALEB_D: ++ ASM_VOLATILE_BINARY(fscaleb.d); ++ break; ++ case FLOGB_S: ++ ASM_VOLATILE_UNARY(flogb.s); ++ break; ++ case FLOGB_D: ++ ASM_VOLATILE_UNARY(flogb.d); ++ break; ++ case FCMP_CAF_S: ++ ASM_VOLATILE_FCMP(fcmp.caf.s); ++ break; ++ case FCMP_CAF_D: ++ ASM_VOLATILE_FCMP(fcmp.caf.d); ++ break; ++ case FCMP_SAF_S: ++ ASM_VOLATILE_FCMP(fcmp.saf.s); ++ break; ++ case FCMP_SAF_D: ++ ASM_VOLATILE_FCMP(fcmp.saf.d); ++ break; ++ case FCMP_CLT_S: ++ ASM_VOLATILE_FCMP(fcmp.clt.s); ++ break; ++ case FCMP_CLT_D: ++ ASM_VOLATILE_FCMP(fcmp.clt.d); ++ break; ++ case FCMP_SLT_S: ++ ASM_VOLATILE_FCMP(fcmp.slt.s); ++ break; ++ case FCMP_SLT_D: ++ ASM_VOLATILE_FCMP(fcmp.slt.d); ++ break; ++ case FCMP_CEQ_S: ++ ASM_VOLATILE_FCMP(fcmp.ceq.s); ++ break; ++ case FCMP_CEQ_D: ++ ASM_VOLATILE_FCMP(fcmp.ceq.d); ++ break; ++ case FCMP_SEQ_S: ++ ASM_VOLATILE_FCMP(fcmp.seq.s); ++ break; ++ case FCMP_SEQ_D: ++ ASM_VOLATILE_FCMP(fcmp.seq.d); ++ break; ++ case FCMP_CLE_S: ++ ASM_VOLATILE_FCMP(fcmp.cle.s); ++ break; ++ case FCMP_CLE_D: ++ ASM_VOLATILE_FCMP(fcmp.cle.d); ++ break; ++ case FCMP_SLE_S: ++ ASM_VOLATILE_FCMP(fcmp.sle.s); ++ break; ++ case FCMP_SLE_D: ++ ASM_VOLATILE_FCMP(fcmp.sle.d); ++ break; ++ case FCMP_CUN_S: ++ ASM_VOLATILE_FCMP(fcmp.cun.s); ++ break; ++ case FCMP_CUN_D: ++ ASM_VOLATILE_FCMP(fcmp.cun.d); ++ break; ++ case FCMP_SUN_S: ++ ASM_VOLATILE_FCMP(fcmp.sun.s); ++ break; ++ case FCMP_SUN_D: ++ ASM_VOLATILE_FCMP(fcmp.sun.d); ++ break; ++ case FCMP_CULT_S: ++ ASM_VOLATILE_FCMP(fcmp.cult.s); ++ break; ++ case FCMP_CULT_D: ++ ASM_VOLATILE_FCMP(fcmp.cult.d); ++ break; ++ case FCMP_SULT_S: ++ ASM_VOLATILE_FCMP(fcmp.sult.s); ++ break; ++ case FCMP_SULT_D: ++ ASM_VOLATILE_FCMP(fcmp.sult.d); ++ break; ++ case FCMP_CUEQ_S: ++ ASM_VOLATILE_FCMP(fcmp.cueq.s); ++ break; ++ case FCMP_CUEQ_D: ++ ASM_VOLATILE_FCMP(fcmp.cueq.d); ++ break; ++ case FCMP_SUEQ_S: ++ ASM_VOLATILE_FCMP(fcmp.sueq.s); ++ break; ++ case FCMP_SUEQ_D: ++ ASM_VOLATILE_FCMP(fcmp.sueq.d); ++ break; ++ case FCMP_CULE_S: ++ ASM_VOLATILE_FCMP(fcmp.cule.s); ++ break; ++ case FCMP_CULE_D: ++ ASM_VOLATILE_FCMP(fcmp.cule.d); ++ break; ++ case FCMP_SULE_S: ++ ASM_VOLATILE_FCMP(fcmp.sule.s); ++ break; ++ case FCMP_SULE_D: ++ ASM_VOLATILE_FCMP(fcmp.sule.d); ++ break; ++ case FCMP_CNE_S: ++ ASM_VOLATILE_FCMP(fcmp.cne.s); ++ break; ++ case FCMP_CNE_D: ++ ASM_VOLATILE_FCMP(fcmp.cne.d); ++ break; ++ case FCMP_SNE_S: ++ ASM_VOLATILE_FCMP(fcmp.sne.s); ++ break; ++ case FCMP_SNE_D: ++ ASM_VOLATILE_FCMP(fcmp.sne.d); ++ break; ++ case FCMP_COR_S: ++ ASM_VOLATILE_FCMP(fcmp.cor.s); ++ break; ++ case FCMP_COR_D: ++ ASM_VOLATILE_FCMP(fcmp.cor.d); ++ break; ++ case FCMP_SOR_S: ++ ASM_VOLATILE_FCMP(fcmp.sor.s); ++ break; ++ case FCMP_SOR_D: ++ ASM_VOLATILE_FCMP(fcmp.sor.d); ++ break; ++ case FCMP_CUNE_S: ++ ASM_VOLATILE_FCMP(fcmp.cune.s); ++ break; ++ case FCMP_CUNE_D: ++ ASM_VOLATILE_FCMP(fcmp.cune.d); ++ break; ++ case FCMP_SUNE_S: ++ ASM_VOLATILE_FCMP(fcmp.sune.s); ++ break; ++ case FCMP_SUNE_D: ++ ASM_VOLATILE_FCMP(fcmp.sune.d); ++ break; ++ case FCVT_S_D: ++ ASM_VOLATILE_UNARY(fcvt.s.d); ++ break; ++ case FCVT_D_S: ++ ASM_VOLATILE_UNARY(fcvt.d.s); ++ break; ++ case FTINTRM_W_S: ++ ASM_VOLATILE_UNARY(ftintrm.w.s); ++ break; ++ case FTINTRM_W_D: ++ ASM_VOLATILE_UNARY(ftintrm.w.d); ++ break; ++ case FTINTRM_L_S: ++ ASM_VOLATILE_UNARY(ftintrm.l.s); ++ break; ++ case FTINTRM_L_D: ++ ASM_VOLATILE_UNARY(ftintrm.l.d); ++ break; ++ case FTINTRP_W_S: ++ ASM_VOLATILE_UNARY(ftintrp.w.s); ++ break; ++ case FTINTRP_W_D: ++ ASM_VOLATILE_UNARY(ftintrp.w.d); ++ break; ++ case FTINTRP_L_S: ++ ASM_VOLATILE_UNARY(ftintrp.l.s); ++ break; ++ case FTINTRP_L_D: ++ ASM_VOLATILE_UNARY(ftintrp.l.d); ++ break; ++ case FTINTRZ_W_S: ++ ASM_VOLATILE_UNARY(ftintrz.w.s); ++ break; ++ case FTINTRZ_W_D: ++ ASM_VOLATILE_UNARY(ftintrz.w.d); ++ break; ++ case FTINTRZ_L_S: ++ ASM_VOLATILE_UNARY(ftintrz.l.s); ++ break; ++ case FTINTRZ_L_D: ++ ASM_VOLATILE_UNARY(ftintrz.l.d); ++ break; ++ case FTINTRNE_W_S: ++ ASM_VOLATILE_UNARY(ftintrne.w.s); ++ break; ++ case FTINTRNE_W_D: ++ ASM_VOLATILE_UNARY(ftintrne.w.d); ++ break; ++ case FTINTRNE_L_S: ++ ASM_VOLATILE_UNARY(ftintrne.l.s); ++ break; ++ case FTINTRNE_L_D: ++ ASM_VOLATILE_UNARY(ftintrne.l.d); ++ break; ++ case FTINT_W_S: ++ ASM_VOLATILE_UNARY(ftint.w.s); ++ break; ++ case FTINT_W_D: ++ ASM_VOLATILE_UNARY(ftint.w.d); ++ break; ++ case FTINT_L_S: ++ ASM_VOLATILE_UNARY(ftint.l.s); ++ break; ++ case FTINT_L_D: ++ ASM_VOLATILE_UNARY(ftint.l.d); ++ break; ++ case FFINT_S_W: ++ ASM_VOLATILE_UNARY(ffint.s.w); ++ break; ++ case FFINT_D_W: ++ ASM_VOLATILE_UNARY(ffint.d.w); ++ break; ++ case FFINT_S_L: ++ ASM_VOLATILE_UNARY(ffint.s.l); ++ break; ++ case FFINT_D_L: ++ ASM_VOLATILE_UNARY(ffint.d.l); ++ break; ++ case FRINT_S: ++ ASM_VOLATILE_UNARY(frint.s); ++ break; ++ case FRINT_D: ++ ASM_VOLATILE_UNARY(frint.d); ++ break; ++ default: ++ break; ++ } ++#endif ++ return (ULong)fcsr2; ++} ++ ++ ++/*---------------------------------------------------------------*/ ++/*--- end guest_loongarch64_helpers.c ---*/ ++/*---------------------------------------------------------------*/ +diff --git a/VEX/priv/guest_loongarch64_toIR.c b/VEX/priv/guest_loongarch64_toIR.c +new file mode 100644 +index 000000000..e7c344f3f +--- /dev/null ++++ b/VEX/priv/guest_loongarch64_toIR.c +@@ -0,0 +1,9753 @@ ++ ++/*--------------------------------------------------------------------*/ ++/*--- begin guest_loongarch64_toIR.c ---*/ ++/*--------------------------------------------------------------------*/ ++ ++/* ++ This file is part of Valgrind, a dynamic binary instrumentation ++ framework. ++ ++ Copyright (C) 2021-2022 Loongson Technology Corporation Limited ++ ++ This program is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License as ++ published by the Free Software Foundation; either version 2 of the ++ License, or (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, see . ++ ++ The GNU General Public License is contained in the file COPYING. ++*/ ++ ++/* "Special" instructions. ++ ++ This instruction decoder can decode four special instructions ++ which mean nothing natively (are no-ops as far as regs/mem are ++ concerned) but have meaning for supporting Valgrind. A special ++ instruction is flagged by a 16-byte preamble: ++ ++ 00450c00 (srli.d $zero, $zero, 3 ++ 00453400 srli.d $zero, $zero, 13 ++ 00457400 srli.d $zero, $zero, 29 ++ 00454c00 srli.d $zero, $zero, 19) ++ ++ Following that, one of the following 3 are allowed ++ (standard interpretation in parentheses): ++ ++ 001535ad (or $t1, $t1, $t1) $a7 = client_request ( $t0 ) ++ 001539ce (or $t2, $t2, $t2) $a7 = guest_NRADDR ++ 00153def (or $t3, $t3, $t3) call-noredir $t8 ++ 00154210 (or $t4, $t4, $t4) IR injection ++ ++ Any other bytes following the 16-byte preamble are illegal and ++ constitute a failure in instruction decoding. This all assumes ++ that the preamble will never occur except in specific code ++ fragments designed for Valgrind to catch. ++*/ ++ ++/* Translates LOONGARCH64 code to IR. */ ++ ++#include "libvex_basictypes.h" ++#include "libvex_ir.h" ++#include "libvex.h" ++#include "libvex_guest_loongarch64.h" ++ ++#include "main_util.h" ++#include "main_globals.h" ++#include "guest_generic_bb_to_IR.h" ++#include "guest_loongarch64_defs.h" ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Globals ---*/ ++/*------------------------------------------------------------*/ ++ ++/* These are set at the start of the translation of a instruction, so ++ that we don't have to pass them around endlessly. CONST means does ++ not change during translation of the instruction. */ ++ ++/* CONST: what is the host's endianness? We need to know this in ++ order to do sub-register accesses to the SIMD/FP registers ++ correctly. */ ++static VexEndness host_endness; ++ ++/* CONST: The guest address for the instruction currently being ++ translated. */ ++static Addr64 guest_PC_curr_instr; ++ ++/* MOD: The IRSB* into which we're generating code. */ ++static IRSB* irsb; ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Debugging output ---*/ ++/*------------------------------------------------------------*/ ++ ++#define DIP(format, args...) \ ++ if (vex_traceflags & VEX_TRACE_FE) \ ++ vex_printf(format, ## args) ++ ++static const HChar* nameIReg( UInt reg ) ++{ ++ vassert(reg < 32); ++ static const HChar* reg_names[32] = { ++ "$zero", ++ "$ra", ++ "$tp", ++ "$sp", ++ "$a0", "$a1", "$a2", "$a3", "$a4", "$a5", "$a6", "$a7", ++ "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8", ++ "$r21", /* Reserved */ ++ "$fp", ++ "$s0", "$s1", "$s2", "$s3", "$s4", "$s5", "$s6", "$s7", "$s8" ++ }; ++ return reg_names[reg]; ++} ++ ++static const HChar* nameFReg( UInt reg ) ++{ ++ vassert(reg < 32); ++ static const HChar* reg_names[32] = { ++ "$fa0", "$fa1", "$fa2", "$fa3", "$fa4", "$fa5", "$fa6", "$fa7", ++ "$ft0", "$ft1", "$ft2", "$ft3", "$ft4", "$ft5", "$ft6", "$ft7", ++ "$ft8", "$ft9", "$ft10", "$ft11", "$ft12", "$ft13", "$ft14", "$ft15", ++ "$fs0", "$fs1", "$fs2", "$fs3", "$fs4", "$fs5", "$fs6", "$fs7" ++ }; ++ return reg_names[reg]; ++} ++ ++static const HChar* nameVReg( UInt reg ) ++{ ++ vassert(reg < 32); ++ static const HChar* reg_names[32] = { ++ "$v0", "$v1", "$v2", "$v3", "$v4", "$v5", "$v6", "$v7", ++ "$v8", "$v9", "$v10", "$v11", "$v12", "$v13", "$v14", "$v15", ++ "$v16", "$v17", "$v18", "$v19", "$v20", "$v21", "$v22", "$v23", ++ "$v24", "$v25", "$v26", "$v27", "$v28", "$v29", "$v30", "$v31" ++ }; ++ return reg_names[reg]; ++} ++ ++static const HChar* nameXReg( UInt reg ) ++{ ++ vassert(reg < 32); ++ static const HChar* reg_names[32] = { ++ "$x0", "$x1", "$x2", "$x3", "$x4", "$x5", "$x6", "$x7", ++ "$x8", "$x9", "$x10", "$x11", "$x12", "$x13", "$x14", "$x15", ++ "$x16", "$x17", "$x18", "$x19", "$x20", "$x21", "$x22", "$x23", ++ "$x24", "$x25", "$x26", "$x27", "$x28", "$x29", "$x30", "$x31" ++ }; ++ return reg_names[reg]; ++} ++ ++static const HChar* nameFCC( UInt reg ) ++{ ++ vassert(reg < 8); ++ static const HChar* reg_names[8] = { ++ "$fcc0", "$fcc1", "$fcc2", "$fcc3", "$fcc4", "$fcc5", "$fcc6", "$fcc7" ++ }; ++ return reg_names[reg]; ++} ++ ++static const HChar* nameFCSR( UInt reg ) ++{ ++ vassert(reg < 4); ++ static const HChar* reg_names[4] = { ++ "$fcsr0", "$fcsr1", "$fcsr2", "$fcsr3" ++ }; ++ return reg_names[reg]; ++} ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Helper bits and pieces for deconstructing the ---*/ ++/*--- loongarch64 insn stream. ---*/ ++/*------------------------------------------------------------*/ ++ ++/* Get insn[max:min] */ ++#define SLICE(insn, max, min) \ ++ ((((UInt)(insn)) >> (min)) & (UInt)((1ULL << ((max) - (min) + 1)) - 1ULL)) ++ ++/* Do a little-endian load of a 32-bit word, regardless of the ++ endianness of the underlying host. */ ++static inline UInt getUInt ( const UChar* p ) ++{ ++ UInt w = 0; ++ w = (w << 8) | p[3]; ++ w = (w << 8) | p[2]; ++ w = (w << 8) | p[1]; ++ w = (w << 8) | p[0]; ++ return w; ++} ++ ++/* Sign extend to 32-bit */ ++static inline UInt extend32 ( UInt imm, UInt size ) ++{ ++ UInt shift = 32 - size; ++ return (UInt)(((Int)imm << shift) >> shift); ++} ++ ++/* Sign extend to 64-bit */ ++static inline ULong extend64 ( ULong imm, UInt size ) ++{ ++ UInt shift = 64 - size; ++ return (ULong)(((Long)imm << shift) >> shift); ++} ++ ++static inline UInt get_rd ( UInt insn ) ++{ ++ return SLICE(insn, 4, 0); ++} ++ ++static inline UInt get_rj ( UInt insn ) ++{ ++ return SLICE(insn, 9, 5); ++} ++ ++static inline UInt get_rk ( UInt insn ) ++{ ++ return SLICE(insn, 14, 10); ++} ++ ++static inline UInt get_code ( UInt insn ) ++{ ++ return SLICE(insn, 14, 0); ++} ++ ++static inline UInt get_ui5 ( UInt insn ) ++{ ++ return SLICE(insn, 14, 10); ++} ++ ++static inline UInt get_ui6 ( UInt insn ) ++{ ++ return SLICE(insn, 15, 10); ++} ++ ++static inline UInt get_sa2 ( UInt insn ) ++{ ++ return SLICE(insn, 16, 15); ++} ++ ++static inline UInt get_sa3 ( UInt insn ) ++{ ++ return SLICE(insn, 17, 15); ++} ++ ++static inline UInt get_lsbw ( UInt insn ) ++{ ++ return SLICE(insn, 14, 10); ++} ++ ++static inline UInt get_msbw ( UInt insn ) ++{ ++ return SLICE(insn, 20, 16); ++} ++ ++static inline UInt get_lsbd ( UInt insn ) ++{ ++ return SLICE(insn, 15, 10); ++} ++ ++static inline UInt get_msbd ( UInt insn ) ++{ ++ return SLICE(insn, 21, 16); ++} ++ ++static inline UInt get_si12 ( UInt insn ) ++{ ++ return SLICE(insn, 21, 10); ++} ++ ++static inline UInt get_ui12 ( UInt insn ) ++{ ++ return SLICE(insn, 21, 10); ++} ++ ++static inline UInt get_si14 ( UInt insn ) ++{ ++ return SLICE(insn, 23, 10); ++} ++ ++static inline UInt get_si16 ( UInt insn ) ++{ ++ return SLICE(insn, 25, 10); ++} ++ ++static inline UInt get_si20 ( UInt insn ) ++{ ++ return SLICE(insn, 24, 5); ++} ++ ++static inline UInt get_hint5 ( UInt insn ) ++{ ++ return SLICE(insn, 4, 0); ++} ++ ++static inline UInt get_hint15 ( UInt insn ) ++{ ++ return SLICE(insn, 14, 0); ++} ++ ++static inline UInt get_offs16 ( UInt insn ) ++{ ++ return SLICE(insn, 25, 10); ++} ++ ++static inline UInt get_offs21 ( UInt insn ) ++{ ++ return (SLICE(insn, 4, 0) << 16) | SLICE(insn, 25, 10); ++} ++ ++static inline UInt get_offs26 ( UInt insn ) ++{ ++ return (SLICE(insn, 9, 0) << 16) | SLICE(insn, 25, 10); ++} ++ ++static inline UInt get_fd ( UInt insn ) ++{ ++ return SLICE(insn, 4, 0); ++} ++ ++static inline UInt get_fj ( UInt insn ) ++{ ++ return SLICE(insn, 9, 5); ++} ++ ++static inline UInt get_fk ( UInt insn ) ++{ ++ return SLICE(insn, 14, 10); ++} ++ ++static inline UInt get_fa ( UInt insn ) ++{ ++ return SLICE(insn, 19, 15); ++} ++ ++static inline UInt get_cond ( UInt insn ) ++{ ++ return SLICE(insn, 19, 15); ++} ++ ++static inline UInt get_fcsrl ( UInt insn ) ++{ ++ return SLICE(insn, 4, 0); ++} ++ ++static inline UInt get_fcsrh ( UInt insn ) ++{ ++ return SLICE(insn, 9, 5); ++} ++ ++static inline UInt get_cd ( UInt insn ) ++{ ++ return SLICE(insn, 2, 0); ++} ++ ++static inline UInt get_cj ( UInt insn ) ++{ ++ return SLICE(insn, 7, 5); ++} ++ ++static inline UInt get_ca ( UInt insn ) ++{ ++ return SLICE(insn, 17, 15); ++} ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Helper bits and pieces for creating IR fragments. ---*/ ++/*------------------------------------------------------------*/ ++ ++static inline IRExpr* mkU64 ( ULong i ) ++{ ++ return IRExpr_Const(IRConst_U64(i)); ++} ++ ++static inline IRExpr* mkU32 ( UInt i ) ++{ ++ return IRExpr_Const(IRConst_U32(i)); ++} ++ ++static inline IRExpr* mkU8 ( UInt i ) ++{ ++ vassert(i < 256); ++ return IRExpr_Const(IRConst_U8((UChar)i)); ++} ++ ++static inline IRExpr* mkU1 ( UInt i ) ++{ ++ vassert(i == 0 || i == 1); ++ return IRExpr_Const(IRConst_U1((Bool)i)); ++} ++ ++static inline IRExpr* mkF64i ( ULong i ) ++{ ++ return IRExpr_Const(IRConst_F64i(i)); ++} ++ ++static inline IRExpr* mkF32i ( UInt i ) ++{ ++ return IRExpr_Const(IRConst_F32i(i)); ++} ++ ++static inline IRExpr* mkexpr ( IRTemp tmp ) ++{ ++ return IRExpr_RdTmp(tmp); ++} ++ ++static inline IRExpr* unop ( IROp op, IRExpr* a ) ++{ ++ return IRExpr_Unop(op, a); ++} ++ ++static inline IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) ++{ ++ return IRExpr_Binop(op, a1, a2); ++} ++ ++static inline IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) ++{ ++ return IRExpr_Triop(op, a1, a2, a3); ++} ++ ++static inline IRExpr* qop ( IROp op, IRExpr* a1, IRExpr* a2, ++ IRExpr* a3, IRExpr* a4 ) ++{ ++ return IRExpr_Qop(op, a1, a2, a3, a4); ++} ++ ++static inline IRExpr* load ( IRType ty, IRExpr* addr ) ++{ ++ return IRExpr_Load(Iend_LE, ty, addr); ++} ++ ++/* Add a statement to the list held by "irbb". */ ++static inline void stmt ( IRStmt* st ) ++{ ++ addStmtToIRSB(irsb, st); ++} ++ ++static inline void store ( IRExpr* addr, IRExpr* data ) ++{ ++ stmt(IRStmt_Store(Iend_LE, addr, data)); ++} ++ ++static inline void assign ( IRTemp dst, IRExpr* e ) ++{ ++ stmt(IRStmt_WrTmp(dst, e)); ++} ++ ++static inline void exit ( IRExpr* e, IRJumpKind jk, ULong offs ) ++{ ++ stmt(IRStmt_Exit(e, jk, IRConst_U64(guest_PC_curr_instr + offs), ++ offsetof(VexGuestLOONGARCH64State, guest_PC))); ++} ++ ++/* Generate an expression to check if addr is aligned. */ ++static inline IRExpr* check_align ( IRExpr* addr, IRExpr* align ) ++{ ++ return binop(Iop_CmpNE64, binop(Iop_And64, addr, align), ++ IRExpr_Get(offsetof(VexGuestLOONGARCH64State, guest_R0), ++ Ity_I64)); ++} ++ ++/* Generate a SIGSYS if the expression evaluates to true. */ ++static inline void gen_SIGSYS ( IRExpr* cond ) ++{ ++ exit(cond, Ijk_SigSYS, 4); ++} ++ ++/* Generate a SIGBUS if the expression evaluates to true. */ ++static inline void gen_SIGBUS ( IRExpr* cond ) ++{ ++ exit(cond, Ijk_SigBUS, 4); ++} ++ ++static inline void cas ( IRTemp old, IRExpr* addr, IRExpr* expd, IRExpr* new ) ++{ ++ IRCAS* c = mkIRCAS(IRTemp_INVALID, old, Iend_LE, addr, ++ NULL, expd, NULL, new); ++ stmt(IRStmt_CAS(c)); ++} ++ ++/* Generate a new temporary of the given type. */ ++static inline IRTemp newTemp ( IRType ty ) ++{ ++ vassert(isPlausibleIRType(ty)); ++ return newIRTemp(irsb->tyenv, ty); ++} ++ ++/* S-extend 8/16/32 bit int expr to 64. */ ++static IRExpr* extendS ( IRType ty, IRExpr* e ) ++{ ++ switch (ty) { ++ case Ity_I1: return unop(Iop_1Sto64, e); ++ case Ity_I8: return unop(Iop_8Sto64, e); ++ case Ity_I16: return unop(Iop_16Sto64, e); ++ case Ity_I32: return unop(Iop_32Sto64, e); ++ default: vassert(0); ++ } ++} ++ ++/* Z-extend 8/16/32 bit int expr to 64. */ ++static IRExpr* extendU ( IRType ty, IRExpr* e ) ++{ ++ switch (ty) { ++ case Ity_I1: return unop(Iop_1Uto64, e); ++ case Ity_I8: return unop(Iop_8Uto64, e); ++ case Ity_I16: return unop(Iop_16Uto64, e); ++ case Ity_I32: return unop(Iop_32Uto64, e); ++ default: vassert(0); ++ } ++} ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Helpers for accessing guest registers. ---*/ ++/*------------------------------------------------------------*/ ++ ++/* ---------------- Integer registers ---------------- */ ++ ++static Int offsetIReg ( UInt iregNo ) ++{ ++ switch (iregNo) { ++ case 0: return offsetof(VexGuestLOONGARCH64State, guest_R0); ++ case 1: return offsetof(VexGuestLOONGARCH64State, guest_R1); ++ case 2: return offsetof(VexGuestLOONGARCH64State, guest_R2); ++ case 3: return offsetof(VexGuestLOONGARCH64State, guest_R3); ++ case 4: return offsetof(VexGuestLOONGARCH64State, guest_R4); ++ case 5: return offsetof(VexGuestLOONGARCH64State, guest_R5); ++ case 6: return offsetof(VexGuestLOONGARCH64State, guest_R6); ++ case 7: return offsetof(VexGuestLOONGARCH64State, guest_R7); ++ case 8: return offsetof(VexGuestLOONGARCH64State, guest_R8); ++ case 9: return offsetof(VexGuestLOONGARCH64State, guest_R9); ++ case 10: return offsetof(VexGuestLOONGARCH64State, guest_R10); ++ case 11: return offsetof(VexGuestLOONGARCH64State, guest_R11); ++ case 12: return offsetof(VexGuestLOONGARCH64State, guest_R12); ++ case 13: return offsetof(VexGuestLOONGARCH64State, guest_R13); ++ case 14: return offsetof(VexGuestLOONGARCH64State, guest_R14); ++ case 15: return offsetof(VexGuestLOONGARCH64State, guest_R15); ++ case 16: return offsetof(VexGuestLOONGARCH64State, guest_R16); ++ case 17: return offsetof(VexGuestLOONGARCH64State, guest_R17); ++ case 18: return offsetof(VexGuestLOONGARCH64State, guest_R18); ++ case 19: return offsetof(VexGuestLOONGARCH64State, guest_R19); ++ case 20: return offsetof(VexGuestLOONGARCH64State, guest_R20); ++ case 21: return offsetof(VexGuestLOONGARCH64State, guest_R21); ++ case 22: return offsetof(VexGuestLOONGARCH64State, guest_R22); ++ case 23: return offsetof(VexGuestLOONGARCH64State, guest_R23); ++ case 24: return offsetof(VexGuestLOONGARCH64State, guest_R24); ++ case 25: return offsetof(VexGuestLOONGARCH64State, guest_R25); ++ case 26: return offsetof(VexGuestLOONGARCH64State, guest_R26); ++ case 27: return offsetof(VexGuestLOONGARCH64State, guest_R27); ++ case 28: return offsetof(VexGuestLOONGARCH64State, guest_R28); ++ case 29: return offsetof(VexGuestLOONGARCH64State, guest_R29); ++ case 30: return offsetof(VexGuestLOONGARCH64State, guest_R30); ++ case 31: return offsetof(VexGuestLOONGARCH64State, guest_R31); ++ default: vassert(0); ++ } ++} ++ ++static IRExpr* getIReg8 ( UInt iregNo ) ++{ ++ return IRExpr_Get(offsetIReg(iregNo), Ity_I8); ++} ++ ++static IRExpr* getIReg16 ( UInt iregNo ) ++{ ++ return IRExpr_Get(offsetIReg(iregNo), Ity_I16); ++} ++ ++static IRExpr* getIReg32 ( UInt iregNo ) ++{ ++ return IRExpr_Get(offsetIReg(iregNo), Ity_I32); ++} ++ ++static IRExpr* getIReg64 ( UInt iregNo ) ++{ ++ return IRExpr_Get(offsetIReg(iregNo), Ity_I64); ++} ++ ++static void putIReg ( UInt iregNo, IRExpr* e ) ++{ ++ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); ++ if (iregNo != 0) /* $r0 - constant zero */ ++ stmt(IRStmt_Put(offsetIReg(iregNo), e)); ++} ++ ++static void putPC ( IRExpr* e ) ++{ ++ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); ++ stmt(IRStmt_Put(offsetof(VexGuestLOONGARCH64State, guest_PC), e)); ++} ++ ++/* ---------------- Floating point / vector registers ---------------- */ ++ ++static Int offsetXReg ( UInt iregNo ) ++{ ++ switch (iregNo) { ++ case 0: return offsetof(VexGuestLOONGARCH64State, guest_X0); ++ case 1: return offsetof(VexGuestLOONGARCH64State, guest_X1); ++ case 2: return offsetof(VexGuestLOONGARCH64State, guest_X2); ++ case 3: return offsetof(VexGuestLOONGARCH64State, guest_X3); ++ case 4: return offsetof(VexGuestLOONGARCH64State, guest_X4); ++ case 5: return offsetof(VexGuestLOONGARCH64State, guest_X5); ++ case 6: return offsetof(VexGuestLOONGARCH64State, guest_X6); ++ case 7: return offsetof(VexGuestLOONGARCH64State, guest_X7); ++ case 8: return offsetof(VexGuestLOONGARCH64State, guest_X8); ++ case 9: return offsetof(VexGuestLOONGARCH64State, guest_X9); ++ case 10: return offsetof(VexGuestLOONGARCH64State, guest_X10); ++ case 11: return offsetof(VexGuestLOONGARCH64State, guest_X11); ++ case 12: return offsetof(VexGuestLOONGARCH64State, guest_X12); ++ case 13: return offsetof(VexGuestLOONGARCH64State, guest_X13); ++ case 14: return offsetof(VexGuestLOONGARCH64State, guest_X14); ++ case 15: return offsetof(VexGuestLOONGARCH64State, guest_X15); ++ case 16: return offsetof(VexGuestLOONGARCH64State, guest_X16); ++ case 17: return offsetof(VexGuestLOONGARCH64State, guest_X17); ++ case 18: return offsetof(VexGuestLOONGARCH64State, guest_X18); ++ case 19: return offsetof(VexGuestLOONGARCH64State, guest_X19); ++ case 20: return offsetof(VexGuestLOONGARCH64State, guest_X20); ++ case 21: return offsetof(VexGuestLOONGARCH64State, guest_X21); ++ case 22: return offsetof(VexGuestLOONGARCH64State, guest_X22); ++ case 23: return offsetof(VexGuestLOONGARCH64State, guest_X23); ++ case 24: return offsetof(VexGuestLOONGARCH64State, guest_X24); ++ case 25: return offsetof(VexGuestLOONGARCH64State, guest_X25); ++ case 26: return offsetof(VexGuestLOONGARCH64State, guest_X26); ++ case 27: return offsetof(VexGuestLOONGARCH64State, guest_X27); ++ case 28: return offsetof(VexGuestLOONGARCH64State, guest_X28); ++ case 29: return offsetof(VexGuestLOONGARCH64State, guest_X29); ++ case 30: return offsetof(VexGuestLOONGARCH64State, guest_X30); ++ case 31: return offsetof(VexGuestLOONGARCH64State, guest_X31); ++ default: vassert(0); ++ } ++} ++ ++static Int offsetFCC ( UInt iregNo ) ++{ ++ switch (iregNo) { ++ case 0: return offsetof(VexGuestLOONGARCH64State, guest_FCC0); ++ case 1: return offsetof(VexGuestLOONGARCH64State, guest_FCC1); ++ case 2: return offsetof(VexGuestLOONGARCH64State, guest_FCC2); ++ case 3: return offsetof(VexGuestLOONGARCH64State, guest_FCC3); ++ case 4: return offsetof(VexGuestLOONGARCH64State, guest_FCC4); ++ case 5: return offsetof(VexGuestLOONGARCH64State, guest_FCC5); ++ case 6: return offsetof(VexGuestLOONGARCH64State, guest_FCC6); ++ case 7: return offsetof(VexGuestLOONGARCH64State, guest_FCC7); ++ default: vassert(0); ++ } ++} ++ ++/* Find the offset of the laneNo'th lane of type laneTy in the given ++ Xreg. Since the host is little-endian, the least significant lane ++ has the lowest offset. */ ++static Int offsetXRegLane ( UInt xregNo, IRType laneTy, UInt laneNo ) ++{ ++ vassert(host_endness == VexEndnessLE); ++ Int laneSzB; ++ /* Since the host is little-endian, the least significant lane ++ will be at the lowest address. */ ++ switch (laneTy) { ++ case Ity_F32: laneSzB = 4; break; ++ case Ity_F64: laneSzB = 8; break; ++ case Ity_V128: laneSzB = 16; break; ++ case Ity_V256: laneSzB = 32; break; ++ default: vassert(0); break; ++ } ++ return offsetXReg(xregNo) + laneNo * laneSzB; ++} ++ ++static IRExpr* getXReg ( UInt xregNo ) ++{ ++ return IRExpr_Get(offsetXRegLane(xregNo, Ity_V256, 0), Ity_V256); ++} ++ ++static IRExpr* getVReg ( UInt vregNo ) ++{ ++ return IRExpr_Get(offsetXRegLane(vregNo, Ity_V128, 0), Ity_V128); ++} ++ ++static IRExpr* getFReg64 ( UInt fregNo ) ++{ ++ return IRExpr_Get(offsetXRegLane(fregNo, Ity_F64, 0), Ity_F64); ++} ++ ++static IRExpr* getFReg32 ( UInt fregNo ) ++{ ++ /* Get FReg32 from FReg64. ++ We could probably use IRExpr_Get(offsetXRegLane(fregNo, Ity_F32, 0), Ity_F32), ++ but that would cause Memcheck to report some errors. ++ */ ++ IRExpr* i = unop(Iop_ReinterpF64asI64, getFReg64(fregNo)); ++ return unop(Iop_ReinterpI32asF32, unop(Iop_64to32, i)); ++} ++ ++static IRExpr* getFCC ( UInt iregNo ) ++{ ++ return IRExpr_Get(offsetFCC(iregNo), Ity_I8); ++} ++ ++static IRExpr* getFCSR ( UInt iregNo ) ++{ ++ /* ++ bits | name ++ --------------- ++ 4:0 | Enables ++ 7:5 | 0 ++ 9:8 | RM ++ 15:10 | 0 ++ 20:16 | Flags ++ 23:21 | 0 ++ 28:24 | Cause ++ 31:29 | 0 ++ */ ++ Int offs = offsetof(VexGuestLOONGARCH64State, guest_FCSR); ++ IRExpr* fcsr0 = IRExpr_Get(offs, Ity_I32); ++ switch (iregNo) { ++ case 0: ++ return fcsr0; ++ case 1: ++ /* FCSR1 is Enables of FCSR0. It seems that the hardware ++ implementation is that the 7th bit belongs to FCSR1. */ ++ return binop(Iop_And32, fcsr0, mkU32(0x0000009f)); ++ case 2: ++ /* FCSR2 is Cause and Flags of FCSR0. */ ++ return binop(Iop_And32, fcsr0, mkU32(0x1f1f0000)); ++ case 3: ++ /* FCSR3 is RM of FCSR0. */ ++ return binop(Iop_And32, fcsr0, mkU32(0x00000300)); ++ default: ++ vassert(0); ++ } ++} ++ ++static void putFReg32 ( UInt iregNo, IRExpr* e ) ++{ ++ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32); ++ stmt(IRStmt_Put(offsetXReg(iregNo), e)); ++} ++ ++static void putFReg64 ( UInt iregNo, IRExpr* e ) ++{ ++ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64); ++ stmt(IRStmt_Put(offsetXReg(iregNo), e)); ++} ++ ++static void putVReg ( UInt iregNo, IRExpr* e ) ++{ ++ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128); ++ stmt(IRStmt_Put(offsetXReg(iregNo), e)); ++} ++ ++static void putXReg ( UInt iregNo, IRExpr* e ) ++{ ++ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V256); ++ stmt(IRStmt_Put(offsetXReg(iregNo), e)); ++} ++ ++static void putFCC ( UInt iregNo, IRExpr* e ) ++{ ++ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8); ++ stmt(IRStmt_Put(offsetFCC(iregNo), e)); ++} ++ ++static void putFCSR ( UInt iregNo, IRExpr* e ) ++{ ++ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); ++ IRExpr* fcsr0 = getFCSR(0); ++ IRExpr* and1; ++ IRExpr* and2; ++ switch (iregNo) { ++ case 0: ++ /* It seems that the hardware implementation allows the 6th ++ bit and the 7th bit to be non-zero. */ ++ and1 = getIReg32(0); ++ and2 = binop(Iop_And32, e, mkU32(0x1f1f03df)); ++ break; ++ case 1: ++ /* FCSR1 is Enables of FCSR0. It seems that the hardware ++ implementation is that the 7th bit belongs to FCSR1. */ ++ and1 = binop(Iop_And32, fcsr0, mkU32(0xffffff60)); ++ and2 = binop(Iop_And32, e, mkU32(0x0000009f)); ++ break; ++ case 2: ++ /* FCSR2 is Cause and Flags of FCSR0. */ ++ and1 = binop(Iop_And32, fcsr0, mkU32(0xe0e0ffff)); ++ and2 = binop(Iop_And32, e, mkU32(0x1f1f0000)); ++ break; ++ case 3: ++ /* FCSR3 is RM of FCSR0. */ ++ and1 = binop(Iop_And32, fcsr0, mkU32(0xfffffcff)); ++ and2 = binop(Iop_And32, e, mkU32(0x00000300)); ++ break; ++ default: ++ vassert(0); ++ } ++ Int offs = offsetof(VexGuestLOONGARCH64State, guest_FCSR); ++ stmt(IRStmt_Put(offs, binop(Iop_Or32, and1, and2))); ++} ++ ++static IRExpr* get_rounding_mode ( void ) ++{ ++ /* ++ rounding mode | LOONGARCH | IR ++ ------------------------------ ++ to nearest | 00 | 00 ++ to zero | 01 | 11 ++ to +infinity | 10 | 10 ++ to -infinity | 11 | 01 ++ */ ++ ++ /* Bits 8 to 9 in FCSR are rounding mode. */ ++ IRExpr* fcsr = getFCSR(0); ++ IRExpr* shr = binop(Iop_Shr32, fcsr, mkU8(8)); ++ IRTemp rm = newTemp(Ity_I32); ++ assign(rm, binop(Iop_And32, shr, mkU32(0x3))); ++ ++ /* rm = XOR(rm, (rm << 1) & 2) */ ++ IRExpr* shl = binop(Iop_Shl32, mkexpr(rm), mkU8(1)); ++ IRExpr* and = binop(Iop_And32, shl, mkU32(2)); ++ return binop(Iop_Xor32, mkexpr(rm), and); ++} ++ ++static void calculateFCSR ( enum fpop op, UInt nargs, ++ UInt src1, UInt src2, UInt src3 ) ++{ ++ IRExpr* s1 = NULL; ++ IRExpr* s2 = NULL; ++ IRExpr* s3 = NULL; ++ switch (nargs) { ++ case 3: s3 = unop(Iop_ReinterpF64asI64, getFReg64(src3)); /* fallthrough */ ++ case 2: s2 = unop(Iop_ReinterpF64asI64, getFReg64(src2)); /* fallthrough */ ++ case 1: s1 = unop(Iop_ReinterpF64asI64, getFReg64(src1)); break; ++ default: vassert(0); ++ } ++ IRExpr** arg = mkIRExprVec_4(mkU64(op), s1, s2, s3); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_FCSR", ++ &loongarch64_calculate_FCSR, ++ arg); ++ IRTemp fcsr2 = newTemp(Ity_I32); ++ assign(fcsr2, unop(Iop_64to32, call)); ++ putFCSR(2, mkexpr(fcsr2)); ++} ++ ++static IRExpr* gen_round_to_nearest ( void ) ++{ ++ return mkU32(0x0); ++} ++ ++static IRExpr* gen_round_down ( void ) ++{ ++ return mkU32(0x1); ++} ++ ++static IRExpr* gen_round_up ( void ) ++{ ++ return mkU32(0x2); ++} ++ ++static IRExpr* gen_round_to_zero ( void ) ++{ ++ return mkU32(0x3); ++} ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Helpers for fixed point arithmetic insns ---*/ ++/*------------------------------------------------------------*/ ++ ++static Bool gen_add_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("add.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* add = binop(Iop_Add32, getIReg32(rj), getIReg32(rk)); ++ putIReg(rd, extendS(Ity_I32, add)); ++ ++ return True; ++} ++ ++static Bool gen_add_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("add.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ putIReg(rd, binop(Iop_Add64, getIReg64(rj), getIReg64(rk))); ++ ++ return True; ++} ++ ++static Bool gen_sub_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("sub.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* sub = binop(Iop_Sub32, getIReg32(rj), getIReg32(rk)); ++ putIReg(rd, extendS(Ity_I32, sub)); ++ ++ return True; ++} ++ ++static Bool gen_sub_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("sub.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ putIReg(rd, binop(Iop_Sub64, getIReg64(rj), getIReg64(rk))); ++ ++ return True; ++} ++ ++static Bool gen_slt ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("slt %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* cond = binop(Iop_CmpLT64S, getIReg64(rj), getIReg64(rk)); ++ putIReg(rd, extendU(Ity_I1, cond)); ++ ++ return True; ++} ++ ++static Bool gen_sltu ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("sltu %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* cond = binop(Iop_CmpLT64U, getIReg64(rj), getIReg64(rk)); ++ putIReg(rd, extendU(Ity_I1, cond)); ++ ++ return True; ++} ++ ++static Bool gen_slti ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("slti %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ IRExpr* cond = binop(Iop_CmpLT64S, getIReg64(rj), ++ mkU64(extend64(si12, 12))); ++ putIReg(rd, extendU(Ity_I1, cond)); ++ ++ return True; ++} ++ ++static Bool gen_sltui ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("sltui %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ IRExpr* cond = binop(Iop_CmpLT64U, getIReg64(rj), ++ mkU64(extend64(si12, 12))); ++ putIReg(rd, extendU(Ity_I1, cond)); ++ ++ return True; ++} ++ ++static Bool gen_nor ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("nor %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* or = binop(Iop_Or64, getIReg64(rj), getIReg64(rk)); ++ putIReg(rd, unop(Iop_Not64, or)); ++ ++ return True; ++} ++ ++static Bool gen_and ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("and %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ putIReg(rd, binop(Iop_And64, getIReg64(rj), getIReg64(rk))); ++ ++ return True; ++} ++ ++static Bool gen_or ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("or %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ putIReg(rd, binop(Iop_Or64, getIReg64(rj), getIReg64(rk))); ++ ++ return True; ++} ++ ++static Bool gen_xor ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("xor %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ putIReg(rd, binop(Iop_Xor64, getIReg64(rj), getIReg64(rk))); ++ ++ return True; ++} ++ ++static Bool gen_orn ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("orn %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* not = unop(Iop_Not64, getIReg64(rk)); ++ putIReg(rd, binop(Iop_Or64, getIReg64(rj), not)); ++ ++ return True; ++} ++ ++static Bool gen_andn ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("andn %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* not = unop(Iop_Not64, getIReg64(rk)); ++ putIReg(rd, binop(Iop_And64, getIReg64(rj), not)); ++ ++ return True; ++} ++ ++static Bool gen_mul_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("mul.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* mul = binop(Iop_MullS32, getIReg32(rj), getIReg32(rk)); ++ putIReg(rd, extendS(Ity_I32, unop(Iop_64to32, mul))); ++ ++ return True; ++} ++ ++static Bool gen_mulh_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("mulh.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* mul = binop(Iop_MullS32, getIReg32(rj), getIReg32(rk)); ++ putIReg(rd, extendS(Ity_I32, unop(Iop_64HIto32, mul))); ++ ++ return True; ++} ++ ++static Bool gen_mulh_wu ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("mulh.wu %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* mul = binop(Iop_MullU32, getIReg32(rj), getIReg32(rk)); ++ putIReg(rd, extendS(Ity_I32, unop(Iop_64HIto32, mul))); ++ ++ return True; ++} ++ ++static Bool gen_mul_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("mul.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* mul = binop(Iop_MullS64, getIReg64(rj), getIReg64(rk)); ++ putIReg(rd, unop(Iop_128to64, mul)); ++ ++ return True; ++} ++ ++static Bool gen_mulh_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("mulh.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* mul = binop(Iop_MullS64, getIReg64(rj), getIReg64(rk)); ++ putIReg(rd, unop(Iop_128HIto64, mul)); ++ ++ return True; ++} ++ ++static Bool gen_mulh_du ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("mulh.du %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* mul = binop(Iop_MullU64, getIReg64(rj), getIReg64(rk)); ++ putIReg(rd, unop(Iop_128HIto64, mul)); ++ ++ return True; ++} ++ ++static Bool gen_mulw_d_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("mulw.d.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ putIReg(rd, binop(Iop_MullS32, getIReg32(rj), getIReg32(rk))); ++ ++ return True; ++} ++ ++static Bool gen_mulw_d_wu ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("mulw.d.wu %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ putIReg(rd, binop(Iop_MullU32, getIReg32(rj), getIReg32(rk))); ++ ++ return True; ++} ++ ++static Bool gen_div_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("div.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* div = binop(Iop_DivS32, getIReg32(rj), getIReg32(rk)); ++ putIReg(rd, extendS(Ity_I32, div)); ++ ++ return True; ++} ++ ++static Bool gen_mod_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("mod.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* mod = binop(Iop_DivModS32to32, getIReg32(rj), getIReg32(rk)); ++ putIReg(rd, extendS(Ity_I32, unop(Iop_64HIto32, mod))); ++ ++ return True; ++} ++ ++static Bool gen_div_wu ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("div.wu %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* div = binop(Iop_DivU32, getIReg32(rj), getIReg32(rk)); ++ putIReg(rd, extendS(Ity_I32, div)); ++ ++ return True; ++} ++ ++static Bool gen_mod_wu ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("mod.wu %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* mod = binop(Iop_DivModU32to32, getIReg32(rj), getIReg32(rk)); ++ putIReg(rd, extendS(Ity_I32, unop(Iop_64HIto32, mod))); ++ ++ return True; ++} ++ ++static Bool gen_div_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("div.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ putIReg(rd, binop(Iop_DivS64, getIReg64(rj), getIReg64(rk))); ++ ++ return True; ++} ++ ++static Bool gen_mod_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("mod.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* mod = binop(Iop_DivModS64to64, getIReg64(rj), getIReg64(rk)); ++ putIReg(rd, unop(Iop_128HIto64, mod)); ++ ++ return True; ++} ++ ++static Bool gen_div_du ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("div.du %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ putIReg(rd, binop(Iop_DivU64, getIReg64(rj), getIReg64(rk))); ++ ++ return True; ++} ++ ++static Bool gen_mod_du ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("mod.du %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* mod = binop(Iop_DivModU64to64, getIReg64(rj), getIReg64(rk)); ++ putIReg(rd, unop(Iop_128HIto64, mod)); ++ ++ return True; ++} ++ ++static Bool gen_alsl_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt sa2 = get_sa2(insn); ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("alsl.w %s, %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ++ nameIReg(rk), sa2); ++ ++ IRExpr* shl = binop(Iop_Shl32, getIReg32(rj), mkU8(sa2 + 1)); ++ IRExpr* add = binop(Iop_Add32, shl, getIReg32(rk)); ++ putIReg(rd, extendS(Ity_I32, add)); ++ ++ return True; ++} ++ ++static Bool gen_alsl_wu ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt sa2 = get_sa2(insn); ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("alsl.wu %s, %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ++ nameIReg(rk), sa2); ++ ++ IRExpr* shl = binop(Iop_Shl32, getIReg32(rj), mkU8(sa2 + 1)); ++ IRExpr* add = binop(Iop_Add32, shl, getIReg32(rk)); ++ putIReg(rd, extendU(Ity_I32, add)); ++ ++ return True; ++} ++ ++static Bool gen_alsl_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt sa2 = get_sa2(insn); ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("alsl.d %s, %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ++ nameIReg(rk), sa2); ++ ++ IRExpr* shl = binop(Iop_Shl64, getIReg64(rj), mkU8(sa2 + 1)); ++ putIReg(rd, binop(Iop_Add64, shl, getIReg64(rk))); ++ ++ return True; ++} ++ ++static Bool gen_lu12i_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si20 = get_si20(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("lu12i.w %s, %d\n", nameIReg(rd), (Int)extend32(si20, 20)); ++ ++ IRExpr* imm = mkU32(si20 << 12); ++ putIReg(rd, extendS(Ity_I32, imm)); ++ ++ return True; ++} ++ ++static Bool gen_lu32i_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si20 = get_si20(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("lu32i.d %s, %d\n", nameIReg(rd), (Int)extend32(si20, 20)); ++ ++ IRExpr* imm = mkU64((ULong)extend32(si20, 20) << 32); ++ IRExpr* shl = binop(Iop_Shl64, getIReg64(rd), mkU8(32)); ++ IRExpr* shr = binop(Iop_Shr64, shl, mkU8(32)); ++ putIReg(rd, binop(Iop_Or64, imm, shr)); ++ ++ return True; ++} ++ ++static Bool gen_lu52i_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("lu52i.d %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ IRExpr* imm = mkU64((ULong)si12 << 52); ++ IRExpr* shl = binop(Iop_Shl64, getIReg64(rj), mkU8(12)); ++ IRExpr* shr = binop(Iop_Shr64, shl, mkU8(12)); ++ putIReg(rd, binop(Iop_Or64, imm, shr)); ++ ++ return True; ++} ++ ++static Bool gen_pcaddi ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si20 = get_si20(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("pcaddi %s, %d\n", nameIReg(rd), (Int)extend32(si20, 20)); ++ ++ putIReg(rd, mkU64(guest_PC_curr_instr + extend64(si20 << 2, 22))); ++ ++ return True; ++} ++ ++static Bool gen_pcalau12i ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si20 = get_si20(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("pcalau12i %s, %d\n", nameIReg(rd), (Int)extend32(si20, 20)); ++ ++ IRExpr* imm = mkU64(guest_PC_curr_instr + extend64(si20 << 12, 32)); ++ IRExpr* shr = binop(Iop_Shr64, imm, mkU8(12)); ++ putIReg(rd, binop(Iop_Shl64, shr, mkU8(12))); ++ ++ return True; ++} ++ ++static Bool gen_pcaddu12i ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si20 = get_si20(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("pcaddu12i %s, %d\n", nameIReg(rd), (Int)extend32(si20, 20)); ++ ++ putIReg(rd, mkU64(guest_PC_curr_instr + extend64(si20 << 12, 32))); ++ ++ return True; ++} ++ ++static Bool gen_pcaddu18i ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si20 = get_si20(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("pcaddu18i %s, %d\n", nameIReg(rd), (Int)extend32(si20, 20)); ++ ++ putIReg(rd, mkU64(guest_PC_curr_instr + extend64((ULong)si20 << 18, 38))); ++ ++ return True; ++} ++ ++static Bool gen_addi_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("addi.w %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ IRExpr* imm = mkU32(extend32(si12, 12)); ++ IRExpr* add = binop(Iop_Add32, getIReg32(rj), imm); ++ putIReg(rd, extendS(Ity_I32, add)); ++ ++ return True; ++} ++ ++static Bool gen_addi_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("addi.d %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ IRExpr* imm = mkU64(extend64(si12, 12)); ++ putIReg(rd, binop(Iop_Add64, getIReg64(rj), imm)); ++ ++ return True; ++} ++ ++static Bool gen_addu16i_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si16 = get_si16(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("addu16i.d %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si16, 16)); ++ ++ IRExpr* imm = mkU64(extend64(si16 << 16, 32)); ++ putIReg(rd, binop(Iop_Add64, getIReg64(rj), imm)); ++ ++ return True; ++} ++ ++static Bool gen_andi ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt ui12 = get_ui12(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("andi %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui12); ++ ++ IRExpr* imm = mkU64((ULong)ui12); ++ putIReg(rd, binop(Iop_And64, getIReg64(rj), imm)); ++ ++ return True; ++} ++ ++static Bool gen_ori ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt ui12 = get_ui12(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ori %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui12); ++ ++ IRExpr* imm = mkU64((ULong)ui12); ++ putIReg(rd, binop(Iop_Or64, getIReg64(rj), imm)); ++ ++ return True; ++} ++ ++static Bool gen_xori ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt ui12 = get_ui12(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("xori %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui12); ++ ++ IRExpr* imm = mkU64((ULong)ui12); ++ putIReg(rd, binop(Iop_Xor64, getIReg64(rj), imm)); ++ ++ return True; ++} ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Helpers for fixed point shift insns ---*/ ++/*------------------------------------------------------------*/ ++ ++static Bool gen_sll_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("sll.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* shl = binop(Iop_Shl32, getIReg32(rj), getIReg8(rk)); ++ putIReg(rd, extendS(Ity_I32, shl)); ++ ++ return True; ++} ++ ++static Bool gen_srl_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("srl.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* shr = binop(Iop_Shr32, getIReg32(rj), getIReg8(rk)); ++ putIReg(rd, extendS(Ity_I32, shr)); ++ ++ return True; ++} ++ ++static Bool gen_sra_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("sra.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* sar = binop(Iop_Sar32, getIReg32(rj), getIReg8(rk)); ++ putIReg(rd, extendS(Ity_I32, sar)); ++ ++ return True; ++} ++ ++static Bool gen_sll_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("sll.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ putIReg(rd, binop(Iop_Shl64, getIReg64(rj), getIReg8(rk))); ++ ++ return True; ++} ++ ++static Bool gen_srl_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("srl.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ putIReg(rd, binop(Iop_Shr64, getIReg64(rj), getIReg8(rk))); ++ ++ return True; ++} ++ ++static Bool gen_sra_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("sra.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ putIReg(rd, binop(Iop_Sar64, getIReg64(rj), getIReg8(rk))); ++ ++ return True; ++} ++ ++static Bool gen_rotr_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("rotr.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRTemp tmp1 = newTemp(Ity_I32); ++ assign(tmp1, getIReg32(rj)); ++ IRTemp tmp2 = newTemp(Ity_I8); ++ assign(tmp2, getIReg8(rk)); ++ IRExpr* shr = binop(Iop_Shr32, mkexpr(tmp1), mkexpr(tmp2)); ++ IRExpr* imm = unop(Iop_8Uto32, mkexpr(tmp2)); ++ IRExpr* sub = binop(Iop_Sub32, mkU32(32), imm); ++ IRExpr* imm2 = unop(Iop_32to8, sub); ++ IRExpr* shl = binop(Iop_Shl32, mkexpr(tmp1), imm2); ++ IRExpr* or = binop(Iop_Or32, shr, shl); ++ putIReg(rd, extendS(Ity_I32, or)); ++ ++ return True; ++} ++ ++static Bool gen_rotr_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("rotr.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRTemp tmp1 = newTemp(Ity_I64); ++ assign(tmp1, getIReg64(rj)); ++ IRTemp tmp2 = newTemp(Ity_I8); ++ assign(tmp2, getIReg8(rk)); ++ IRExpr* shr = binop(Iop_Shr64, mkexpr(tmp1), mkexpr(tmp2)); ++ IRExpr* imm = unop(Iop_8Uto64, mkexpr(tmp2)); ++ IRExpr* sub = binop(Iop_Sub64, mkU64(64), imm); ++ IRExpr* imm2 = unop(Iop_64to8, sub); ++ IRExpr* shl = binop(Iop_Shl64, mkexpr(tmp1), imm2); ++ putIReg(rd, binop(Iop_Or64, shr, shl)); ++ ++ return True; ++} ++ ++static Bool gen_slli_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt ui5 = get_ui5(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("slli.w %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui5); ++ ++ IRExpr* shl = binop(Iop_Shl32, getIReg32(rj), mkU8(ui5)); ++ putIReg(rd, extendS(Ity_I32, shl)); ++ ++ return True; ++} ++ ++static Bool gen_slli_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt ui6 = get_ui6(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("slli.d %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui6); ++ ++ putIReg(rd, binop(Iop_Shl64, getIReg64(rj), mkU8(ui6))); ++ ++ return True; ++} ++ ++static Bool gen_srli_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt ui5 = get_ui5(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("srli.w %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui5); ++ ++ IRExpr* shr = binop(Iop_Shr32, getIReg32(rj), mkU8(ui5)); ++ putIReg(rd, extendS(Ity_I32, shr)); ++ ++ return True; ++} ++ ++static Bool gen_srli_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt ui6 = get_ui6(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("srli.d %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui6); ++ ++ putIReg(rd, binop(Iop_Shr64, getIReg64(rj), mkU8(ui6))); ++ ++ return True; ++} ++ ++static Bool gen_srai_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt ui5 = get_ui5(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("srai.w %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui5); ++ ++ IRExpr* sar = binop(Iop_Sar32, getIReg32(rj), mkU8(ui5)); ++ putIReg(rd, extendS(Ity_I32, sar)); ++ ++ return True; ++} ++ ++static Bool gen_srai_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt ui6 = get_ui6(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("srai.d %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui6); ++ ++ putIReg(rd, binop(Iop_Sar64, getIReg64(rj), mkU8(ui6))); ++ ++ return True; ++} ++ ++static Bool gen_rotri_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt ui5 = get_ui5(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("rotri.w %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui5); ++ ++ IRTemp tmp = newTemp(Ity_I32); ++ assign(tmp, getIReg32(rj)); ++ IRExpr* shr = binop(Iop_Shr32, mkexpr(tmp), mkU8(ui5)); ++ IRExpr* shl = binop(Iop_Shl32, mkexpr(tmp), mkU8(32 - ui5)); ++ if (32 - ui5 == 32) ++ shl = mkU32(0); ++ IRExpr* or = binop(Iop_Or32, shr, shl); ++ putIReg(rd, extendS(Ity_I32, or)); ++ ++ return True; ++} ++ ++static Bool gen_rotri_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt ui6 = get_ui6(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("rotri.d %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ui6); ++ ++ IRTemp tmp = newTemp(Ity_I64); ++ assign(tmp, getIReg64(rj)); ++ IRExpr* shr = binop(Iop_Shr64, mkexpr(tmp), mkU8(ui6)); ++ IRExpr* shl = binop(Iop_Shl64, mkexpr(tmp), mkU8(64 - ui6)); ++ if (64 - ui6 == 64) ++ shl = mkU64(0); ++ putIReg(rd, binop(Iop_Or64, shr, shl)); ++ ++ return True; ++} ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Helpers for fixed point bit insns ---*/ ++/*------------------------------------------------------------*/ ++ ++static Bool gen_ext_w_h ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ext.w.h %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ putIReg(rd, extendS(Ity_I16, getIReg16(rj))); ++ ++ return True; ++} ++ ++static Bool gen_ext_w_b ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ext.w.b %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ putIReg(rd, extendS(Ity_I8, getIReg8(rj))); ++ ++ return True; ++} ++ ++static Bool gen_clo_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("clo.w %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ IRExpr* not = unop(Iop_Not32, getIReg32(rj)); ++ IRExpr* clz = unop(Iop_Clz32, not); ++ putIReg(rd, extendU(Ity_I32, clz)); ++ ++ return True; ++} ++ ++static Bool gen_clz_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("clz.w %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ IRExpr* clz = unop(Iop_Clz32, getIReg32(rj)); ++ putIReg(rd, extendU(Ity_I32, clz)); ++ ++ return True; ++} ++ ++static Bool gen_cto_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("cto.w %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ IRExpr* not = unop(Iop_Not32, getIReg32(rj)); ++ IRExpr* clz = unop(Iop_Ctz32, not); ++ putIReg(rd, extendU(Ity_I32, clz)); ++ ++ return True; ++} ++ ++static Bool gen_ctz_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ctz.w %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ IRExpr* clz = unop(Iop_Ctz32, getIReg32(rj)); ++ putIReg(rd, extendU(Ity_I32, clz)); ++ ++ return True; ++} ++ ++static Bool gen_clo_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("clo.d %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ IRExpr* not = unop(Iop_Not64, getIReg64(rj)); ++ putIReg(rd, unop(Iop_Clz64, not)); ++ ++ return True; ++} ++ ++static Bool gen_clz_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("clz.d %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ putIReg(rd, unop(Iop_Clz64, getIReg64(rj))); ++ ++ return True; ++} ++ ++static Bool gen_cto_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("cto.d %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ IRExpr* not = unop(Iop_Not64, getIReg64(rj)); ++ putIReg(rd, unop(Iop_Ctz64, not)); ++ ++ return True; ++} ++ ++static Bool gen_ctz_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ctz.d %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ putIReg(rd, unop(Iop_Ctz64, getIReg64(rj))); ++ ++ return True; ++} ++ ++static Bool gen_revb_2h ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("revb.2h %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ IRExpr** arg = mkIRExprVec_1(getIReg64(rj)); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_revb_2h", ++ &loongarch64_calculate_revb_2h, ++ arg); ++ putIReg(rd, call); ++ ++ return True; ++} ++ ++static Bool gen_revb_4h ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("revb.4h %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ IRExpr** arg = mkIRExprVec_1(getIReg64(rj)); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_revb_4h", ++ &loongarch64_calculate_revb_4h, ++ arg); ++ putIReg(rd, call); ++ ++ return True; ++} ++ ++static Bool gen_revb_2w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("revb.2w %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ IRExpr** arg = mkIRExprVec_1(getIReg64(rj)); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_revb_2w", ++ &loongarch64_calculate_revb_2w, ++ arg); ++ putIReg(rd, call); ++ ++ return True; ++} ++ ++static Bool gen_revb_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("revb.d %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ IRExpr** arg = mkIRExprVec_1(getIReg64(rj)); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_revb_d", ++ &loongarch64_calculate_revb_d, ++ arg); ++ putIReg(rd, call); ++ ++ return True; ++} ++ ++static Bool gen_revh_2w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("revh.2w %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ IRExpr** arg = mkIRExprVec_1(getIReg64(rj)); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_revh_2w", ++ &loongarch64_calculate_revh_2w, ++ arg); ++ putIReg(rd, call); ++ ++ return True; ++} ++ ++static Bool gen_revh_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("revh.d %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ IRExpr** arg = mkIRExprVec_1(getIReg64(rj)); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_revh_d", ++ &loongarch64_calculate_revh_d, ++ arg); ++ putIReg(rd, call); ++ ++ return True; ++} ++ ++static Bool gen_bitrev_4b ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("bitrev.4b %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ IRExpr** arg = mkIRExprVec_1(getIReg64(rj)); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_bitrev_4b", ++ &loongarch64_calculate_bitrev_4b, ++ arg); ++ putIReg(rd, call); ++ ++ return True; ++} ++ ++static Bool gen_bitrev_8b ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("bitrev.8b %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ IRExpr** arg = mkIRExprVec_1(getIReg64(rj)); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_bitrev_8b", ++ &loongarch64_calculate_bitrev_8b, ++ arg); ++ putIReg(rd, call); ++ ++ return True; ++} ++ ++static Bool gen_bitrev_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("bitrev.w %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ IRExpr** arg = mkIRExprVec_1(getIReg64(rj)); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_bitrev_w", ++ &loongarch64_calculate_bitrev_w, ++ arg); ++ putIReg(rd, call); ++ ++ return True; ++} ++ ++static Bool gen_bitrev_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("bitrev.d %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ IRExpr** arg = mkIRExprVec_1(getIReg64(rj)); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_bitrev_d", ++ &loongarch64_calculate_bitrev_d, ++ arg); ++ putIReg(rd, call); ++ ++ return True; ++} ++ ++static Bool gen_bytepick_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt sa2 = get_sa2(insn); ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("bytepick.w %s, %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ++ nameIReg(rk), sa2); ++ ++ UInt shift = 8 * (4 - sa2); ++ IRExpr* shl = binop(Iop_Shl32, getIReg32(rk), mkU8(32 - shift)); ++ if (32 - shift == 32) ++ shl = mkU32(0); ++ IRExpr* shr = binop(Iop_Shr32, getIReg32(rj), mkU8(shift)); ++ if (shift == 32) ++ shr = mkU32(0); ++ IRExpr* or = binop(Iop_Or32, shl, shr); ++ putIReg(rd, extendS(Ity_I32, or)); ++ ++ return True; ++} ++ ++static Bool gen_bytepick_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt sa3 = get_sa3(insn); ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("bytepick.d %s, %s, %s, %u\n", nameIReg(rd), nameIReg(rj), ++ nameIReg(rk), sa3); ++ ++ UInt shift = 8 * (8 - sa3); ++ IRExpr* shl = binop(Iop_Shl64, getIReg64(rk), mkU8(64 - shift)); ++ if (64 - shift == 64) ++ shl = mkU64(0); ++ IRExpr* shr = binop(Iop_Shr64, getIReg64(rj), mkU8(shift)); ++ if (shift == 64) ++ shr = mkU64(0); ++ putIReg(rd, binop(Iop_Or64, shl, shr)); ++ ++ return True; ++} ++ ++static Bool gen_maskeqz ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("maskeqz %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* cond = binop(Iop_CmpNE64, getIReg64(rk), mkU64(0)); ++ putIReg(rd, binop(Iop_And64, extendS(Ity_I1, cond), getIReg64(rj))); ++ ++ return True; ++} ++ ++static Bool gen_masknez ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("masknez %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* cond = binop(Iop_CmpEQ64, getIReg64(rk), mkU64(0)); ++ putIReg(rd, binop(Iop_And64, extendS(Ity_I1, cond), getIReg64(rj))); ++ ++ return True; ++} ++ ++static Bool gen_bstrins_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt msb = get_msbw(insn); ++ UInt lsb = get_lsbw(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("bstrins.w %s, %s, %u, %u\n", nameIReg(rd), nameIReg(rj), msb, lsb); ++ ++ IRTemp tmp = newTemp(Ity_I32); ++ assign(tmp, getIReg32(rd)); ++ IRExpr* shl1; ++ if (msb == 31) { ++ shl1 = mkU32(0); ++ } else { ++ IRExpr* shr1 = binop(Iop_Shr32, mkexpr(tmp), mkU8(msb + 1)); ++ shl1 = binop(Iop_Shl32, shr1, mkU8(msb + 1)); ++ } ++ IRExpr* shl2 = binop(Iop_Shl32, getIReg32(rj), mkU8(31 - msb + lsb)); ++ IRExpr* shr2 = binop(Iop_Shr32, shl2, mkU8(31 - msb)); ++ IRExpr* shr3; ++ if (lsb == 0) { ++ shr3 = mkU32(0); ++ } else { ++ IRExpr* shl3 = binop(Iop_Shl32, mkexpr(tmp), mkU8(32 - lsb)); ++ shr3 = binop(Iop_Shr32, shl3, mkU8(32 - lsb)); ++ } ++ IRExpr* or1 = binop(Iop_Or32, shl1, shr2); ++ IRExpr* or2 = binop(Iop_Or32, or1, shr3); ++ putIReg(rd, extendS(Ity_I32, or2)); ++ ++ return True; ++} ++ ++static Bool gen_bstrpick_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt msb = get_msbw(insn); ++ UInt lsb = get_lsbw(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("bstrpick.w %s, %s, %u, %u\n", nameIReg(rd), nameIReg(rj), msb, lsb); ++ ++ IRExpr* shl = binop(Iop_Shl32, getIReg32(rj), mkU8(31 - msb)); ++ IRExpr* shr = binop(Iop_Shr32, shl, mkU8(31 - msb + lsb)); ++ putIReg(rd, extendS(Ity_I32, shr)); ++ ++ return True; ++} ++ ++static Bool gen_bstrins_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt msb = get_msbd(insn); ++ UInt lsb = get_lsbd(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("bstrins.d %s, %s, %u, %u\n", nameIReg(rd), nameIReg(rj), msb, lsb); ++ ++ IRTemp tmp = newTemp(Ity_I64); ++ assign(tmp, getIReg64(rd)); ++ IRExpr* shl1; ++ if (msb == 63) { ++ shl1 = mkU64(0); ++ } else { ++ IRExpr* shr1 = binop(Iop_Shr64, mkexpr(tmp), mkU8(msb + 1)); ++ shl1 = binop(Iop_Shl64, shr1, mkU8(msb + 1)); ++ } ++ IRExpr* shl2 = binop(Iop_Shl64, getIReg64(rj), mkU8(63 - msb + lsb)); ++ IRExpr* shr2 = binop(Iop_Shr64, shl2, mkU8(63 - msb)); ++ IRExpr* shr3; ++ if (lsb == 0) { ++ shr3 = mkU64(0); ++ } else { ++ IRExpr* shl3 = binop(Iop_Shl64, mkexpr(tmp), mkU8(64 - lsb)); ++ shr3 = binop(Iop_Shr64, shl3, mkU8(64 - lsb)); ++ } ++ IRExpr* or = binop(Iop_Or64, shl1, shr2); ++ putIReg(rd, binop(Iop_Or64, or, shr3)); ++ ++ return True; ++} ++ ++static Bool gen_bstrpick_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt msb = get_msbd(insn); ++ UInt lsb = get_lsbd(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("bstrpick.d %s, %s, %u, %u\n", nameIReg(rd), nameIReg(rj), msb, lsb); ++ ++ IRExpr* shl = binop(Iop_Shl64, getIReg64(rj), mkU8(63 - msb)); ++ putIReg(rd, binop(Iop_Shr64, shl, mkU8(63 - msb + lsb))); ++ ++ return True; ++} ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Helpers for fixed point load/store insns ---*/ ++/*------------------------------------------------------------*/ ++ ++static Bool gen_ld_b ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ld.b %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12))); ++ putIReg(rd, extendS(Ity_I8, load(Ity_I8, addr))); ++ ++ return True; ++} ++ ++static Bool gen_ld_h ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ld.h %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12))); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x1))); ++ putIReg(rd, extendS(Ity_I16, load(Ity_I16, addr))); ++ ++ return True; ++} ++ ++static Bool gen_ld_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ld.w %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12))); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x3))); ++ putIReg(rd, extendS(Ity_I32, load(Ity_I32, addr))); ++ ++ return True; ++} ++ ++static Bool gen_ld_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ld.d %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12))); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x7))); ++ putIReg(rd, load(Ity_I64, addr)); ++ ++ return True; ++} ++ ++static Bool gen_st_b ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("st.b %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12))); ++ store(addr, getIReg8(rd)); ++ ++ return True; ++} ++ ++static Bool gen_st_h ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("st.h %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12))); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x1))); ++ store(addr, getIReg16(rd)); ++ ++ return True; ++} ++ ++static Bool gen_st_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("st.w %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12))); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x3))); ++ store(addr, getIReg32(rd)); ++ ++ return True; ++} ++ ++static Bool gen_st_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("st.d %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12))); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x7))); ++ store(addr, getIReg64(rd)); ++ ++ return True; ++} ++ ++static Bool gen_ld_bu ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ld.bu %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12))); ++ putIReg(rd, extendU(Ity_I8, load(Ity_I8, addr))); ++ ++ return True; ++} ++ ++static Bool gen_ld_hu ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ld.hu %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12))); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x1))); ++ putIReg(rd, extendU(Ity_I16, load(Ity_I16, addr))); ++ ++ return True; ++} ++ ++static Bool gen_ld_wu ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ld.wu %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12))); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x3))); ++ putIReg(rd, extendU(Ity_I32, load(Ity_I32, addr))); ++ ++ return True; ++} ++ ++static Bool gen_ldx_b ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ldx.b %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk)); ++ putIReg(rd, extendS(Ity_I8, load(Ity_I8, addr))); ++ ++ return True; ++} ++ ++static Bool gen_ldx_h ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ldx.h %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk)); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x1))); ++ putIReg(rd, extendS(Ity_I16, load(Ity_I16, addr))); ++ ++ return True; ++} ++ ++static Bool gen_ldx_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ldx.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk)); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x3))); ++ putIReg(rd, extendS(Ity_I32, load(Ity_I32, addr))); ++ ++ return True; ++} ++ ++static Bool gen_ldx_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ldx.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk)); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x7))); ++ putIReg(rd, load(Ity_I64, addr)); ++ ++ return True; ++} ++ ++static Bool gen_stx_b ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("stx.b %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk)); ++ store(addr, getIReg8(rd)); ++ ++ return True; ++} ++ ++static Bool gen_stx_h ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("stx.h %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk)); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x1))); ++ store(addr, getIReg16(rd)); ++ ++ return True; ++} ++ ++static Bool gen_stx_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("stx.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk)); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x3))); ++ store(addr, getIReg32(rd)); ++ ++ return True; ++} ++ ++static Bool gen_stx_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("stx.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk)); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x7))); ++ store(addr, getIReg64(rd)); ++ ++ return True; ++} ++ ++static Bool gen_ldx_bu ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ldx.bu %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk)); ++ putIReg(rd, extendU(Ity_I8, load(Ity_I8, addr))); ++ ++ return True; ++} ++ ++static Bool gen_ldx_hu ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ldx.hu %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk)); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x1))); ++ putIReg(rd, extendU(Ity_I16, load(Ity_I16, addr))); ++ ++ return True; ++} ++ ++static Bool gen_ldx_wu ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ldx.wu %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk)); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x3))); ++ putIReg(rd, extendU(Ity_I32, load(Ity_I32, addr))); ++ ++ return True; ++} ++ ++static Bool gen_preld ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt hint = get_hint5(insn); ++ ++ DIP("preld %u, %s, %d\n", hint, nameIReg(rj), (Int)extend32(si12, 12)); ++ ++ return True; ++} ++ ++static Bool gen_preldx ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt hint = get_hint5(insn); ++ ++ DIP("preldx %u, %s, %d\n", hint, nameIReg(rj), (Int)extend32(si12, 12)); ++ ++ return True; ++} ++ ++static Bool gen_dbar ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt hint = get_hint15(insn); ++ ++ DIP("dbar %u\n", hint); ++ ++ stmt(IRStmt_MBE(Imbe_Fence)); ++ ++ return True; ++} ++ ++static Bool gen_ibar ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt hint = get_hint15(insn); ++ ++ DIP("ibar %u\n", hint); ++ ++ stmt(IRStmt_MBE(Imbe_InsnFence)); ++ ++ return True; ++} ++ ++static Bool gen_ldptr_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si14 = get_si14(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ldptr.w %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si14, 14)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), ++ mkU64(extend64(si14 << 2, 16))); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x3))); ++ putIReg(rd, extendS(Ity_I32, load(Ity_I32, addr))); ++ ++ return True; ++} ++ ++static Bool gen_stptr_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si14 = get_si14(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("stptr.w %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si14, 14)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), ++ mkU64(extend64(si14 << 2, 16))); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x3))); ++ store(addr, getIReg32(rd)); ++ ++ return True; ++} ++ ++static Bool gen_ldptr_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si14 = get_si14(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ldptr.d %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si14, 14)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), ++ mkU64(extend64(si14 << 2, 16))); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x7))); ++ putIReg(rd, load(Ity_I64, addr)); ++ ++ return True; ++} ++ ++static Bool gen_stptr_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si14 = get_si14(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("stptr.d %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si14, 14)); ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), ++ mkU64(extend64(si14 << 2, 16))); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x7))); ++ store(addr, getIReg64(rd)); ++ ++ return True; ++} ++ ++static Bool gen_ldgt_b ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ldgt.b %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk))); ++ putIReg(rd, extendS(Ity_I8, load(Ity_I8, mkexpr(addr)))); ++ ++ return True; ++} ++ ++static Bool gen_ldgt_h ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ldgt.h %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x1))); ++ gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk))); ++ putIReg(rd, extendS(Ity_I16, load(Ity_I16, mkexpr(addr)))); ++ ++ return True; ++} ++ ++static Bool gen_ldgt_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ldgt.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3))); ++ gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk))); ++ putIReg(rd, extendS(Ity_I32, load(Ity_I32, mkexpr(addr)))); ++ ++ return True; ++} ++ ++static Bool gen_ldgt_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ldgt.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7))); ++ gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk))); ++ putIReg(rd, load(Ity_I64, mkexpr(addr))); ++ ++ return True; ++} ++ ++static Bool gen_ldle_b ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ldle.b %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr))); ++ putIReg(rd, extendS(Ity_I8, load(Ity_I8, mkexpr(addr)))); ++ ++ return True; ++} ++ ++static Bool gen_ldle_h ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ldle.h %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x1))); ++ gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr))); ++ putIReg(rd, extendS(Ity_I16, load(Ity_I16, mkexpr(addr)))); ++ ++ return True; ++} ++ ++static Bool gen_ldle_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ldle.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3))); ++ gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr))); ++ putIReg(rd, extendS(Ity_I32, load(Ity_I32, mkexpr(addr)))); ++ ++ return True; ++} ++ ++static Bool gen_ldle_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ldle.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7))); ++ gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr))); ++ putIReg(rd, load(Ity_I64, mkexpr(addr))); ++ ++ return True; ++} ++ ++static Bool gen_stgt_b ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("stgt.b %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk))); ++ store(mkexpr(addr), getIReg8(rd)); ++ ++ return True; ++} ++ ++static Bool gen_stgt_h ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("stgt.h %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x1))); ++ gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk))); ++ store(mkexpr(addr), getIReg16(rd)); ++ ++ return True; ++} ++ ++static Bool gen_stgt_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("stgt.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3))); ++ gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk))); ++ store(mkexpr(addr), getIReg32(rd)); ++ ++ return True; ++} ++ ++static Bool gen_stgt_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("stgt.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7))); ++ gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk))); ++ store(mkexpr(addr), getIReg64(rd)); ++ ++ return True; ++} ++ ++static Bool gen_stle_b ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("stle.b %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr))); ++ store(mkexpr(addr), getIReg8(rd)); ++ ++ return True; ++} ++ ++static Bool gen_stle_h ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("stle.h %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x1))); ++ gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr))); ++ store(mkexpr(addr), getIReg16(rd)); ++ ++ return True; ++} ++ ++static Bool gen_stle_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("stle.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3))); ++ gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr))); ++ store(mkexpr(addr), getIReg32(rd)); ++ ++ return True; ++} ++ ++static Bool gen_stle_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("stle.d %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7))); ++ gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr))); ++ store(mkexpr(addr), getIReg64(rd)); ++ ++ return True; ++} ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Helpers for fixed point atomic insns ---*/ ++/*------------------------------------------------------------*/ ++ ++static Bool gen_ll_helper ( UInt rd, UInt rj, UInt si14, Bool size64 ) ++{ ++ Int offs_size = offsetof(VexGuestLOONGARCH64State, guest_LLSC_SIZE); ++ Int offs_addr = offsetof(VexGuestLOONGARCH64State, guest_LLSC_ADDR); ++ Int offs_data = offsetof(VexGuestLOONGARCH64State, guest_LLSC_DATA); ++ ++ /* Get address of the load. */ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, binop(Iop_Add64, getIReg64(rj), ++ mkU64(extend64(si14 << 2, 16)))); ++ if (size64) ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7))); ++ else ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3))); ++ ++ /* Load the value. */ ++ IRTemp res = newTemp(Ity_I64); ++ if (size64) ++ assign(res, load(Ity_I64, mkexpr(addr))); ++ else ++ assign(res, extendS(Ity_I32, load(Ity_I32, mkexpr(addr)))); ++ ++ /* Set up the LLSC fallback data. */ ++ if (size64) ++ stmt(IRStmt_Put(offs_size, mkU64(8))); ++ else ++ stmt(IRStmt_Put(offs_size, mkU64(4))); ++ stmt(IRStmt_Put(offs_addr, mkexpr(addr))); ++ stmt(IRStmt_Put(offs_data, mkexpr(res))); ++ ++ /* Write the result to the destination register. */ ++ putIReg(rd, mkexpr(res)); ++ ++ return True; ++} ++ ++static Bool gen_sc_helper ( UInt rd, UInt rj, UInt si14, Bool size64 ) ++{ ++ Int offs_size = offsetof(VexGuestLOONGARCH64State, guest_LLSC_SIZE); ++ Int offs_addr = offsetof(VexGuestLOONGARCH64State, guest_LLSC_ADDR); ++ Int offs_data = offsetof(VexGuestLOONGARCH64State, guest_LLSC_DATA); ++ ++ /* Get address of the load. */ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, binop(Iop_Add64, getIReg64(rj), ++ mkU64(extend64(si14 << 2, 16)))); ++ if (size64) ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7))); ++ else ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3))); ++ ++ /* Get new value. */ ++ IRTemp new; ++ if (size64) { ++ new = newTemp(Ity_I64); ++ assign(new, getIReg64(rd)); ++ } else { ++ new = newTemp(Ity_I32); ++ assign(new, getIReg32(rd)); ++ } ++ ++ /* Mark the SC initially as failed. */ ++ putIReg(rd, mkU64(0)); ++ ++ /* Set that no transaction is in progress. */ ++ IRTemp size = newTemp(Ity_I64); ++ assign(size, IRExpr_Get(offs_size, Ity_I64)); ++ stmt(IRStmt_Put(offs_size, mkU64(0) /* "no transaction" */)); ++ ++ /* Fail if no or wrong-size transaction. */ ++ if (size64) ++ exit(binop(Iop_CmpNE64, mkexpr(size), mkU64(8)), Ijk_Boring, 4); ++ else ++ exit(binop(Iop_CmpNE64, mkexpr(size), mkU64(4)), Ijk_Boring, 4); ++ ++ /* Fail if the address doesn't match the LL address. */ ++ exit(binop(Iop_CmpNE64, mkexpr(addr), IRExpr_Get(offs_addr, Ity_I64)), ++ Ijk_Boring, 4); ++ ++ /* Fail if the data doesn't match the LL data. */ ++ IRTemp data; ++ if (size64) { ++ data = newTemp(Ity_I64); ++ assign(data, IRExpr_Get(offs_data, Ity_I64)); ++ IRExpr* d = load(Ity_I64, mkexpr(addr)); ++ exit(binop(Iop_CmpNE64, d, mkexpr(data)), Ijk_Boring, 4); ++ } else { ++ data = newTemp(Ity_I32); ++ IRTemp tmp = newTemp(Ity_I64); ++ assign(tmp, IRExpr_Get(offs_data, Ity_I64)); ++ assign(data, unop(Iop_64to32, mkexpr(tmp))); ++ IRExpr* d = extendS(Ity_I32, load(Ity_I32, mkexpr(addr))); ++ exit(binop(Iop_CmpNE64, d, mkexpr(tmp)), Ijk_Boring, 4); ++ } ++ ++ /* Try to CAS the new value in. */ ++ IRTemp old; ++ if (size64) { ++ old = newTemp(Ity_I64); ++ cas(old, mkexpr(addr), mkexpr(data), mkexpr(new)); ++ } else { ++ old = newTemp(Ity_I32); ++ cas(old, mkexpr(addr), mkexpr(data), mkexpr(new)); ++ } ++ ++ /* Fail if the CAS failed (old != expd). */ ++ if (size64) ++ exit(binop(Iop_CasCmpNE64, mkexpr(old), mkexpr(data)), Ijk_Boring, 4); ++ else ++ exit(binop(Iop_CasCmpNE32, mkexpr(old), mkexpr(data)), Ijk_Boring, 4); ++ ++ /* Otherwise mark the operation as successful. */ ++ putIReg(rd, mkU64(1)); ++ ++ return True; ++} ++ ++static Bool gen_ll_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si14 = get_si14(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ll.w %s, %s, %d%s\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si14, 14), ++ abiinfo->guest__use_fallback_LLSC ? ++ " (fallback implementation)" : ""); ++ ++ if (abiinfo->guest__use_fallback_LLSC) { ++ return gen_ll_helper(rd, rj, si14, False); ++ } else { ++ IRTemp res = newTemp(Ity_I32); ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, binop(Iop_Add64, getIReg64(rj), ++ mkU64(extend64(si14 << 2, 16)))); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3))); ++ stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(addr), NULL/*LL*/)); ++ putIReg(rd, extendS(Ity_I32, mkexpr(res))); ++ return True; ++ } ++} ++ ++static Bool gen_sc_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si14 = get_si14(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("sc.w %s, %s, %d%s\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si14, 14), ++ abiinfo->guest__use_fallback_LLSC ? ++ " (fallback implementation)" : ""); ++ ++ if (abiinfo->guest__use_fallback_LLSC) { ++ return gen_sc_helper(rd, rj, si14, False); ++ } else { ++ IRTemp res = newTemp(Ity_I1); ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, binop(Iop_Add64, getIReg64(rj), ++ mkU64(extend64(si14 << 2, 16)))); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3))); ++ stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(addr), getIReg32(rd))); ++ return True; ++ } ++} ++ ++static Bool gen_ll_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si14 = get_si14(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ll.d %s, %s, %d%s\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si14, 14), ++ abiinfo->guest__use_fallback_LLSC ? ++ " (fallback implementation)" : ""); ++ ++ if (abiinfo->guest__use_fallback_LLSC) { ++ return gen_ll_helper(rd, rj, si14, True); ++ } else { ++ IRTemp res = newTemp(Ity_I64); ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, binop(Iop_Add64, getIReg64(rj), ++ mkU64(extend64(si14 << 2, 16)))); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7))); ++ stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(addr), NULL/*LL*/)); ++ putIReg(rd, mkexpr(res)); ++ return True; ++ } ++} ++ ++static Bool gen_sc_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si14 = get_si14(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("sc.d %s, %s, %d%s\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(si14, 14), ++ abiinfo->guest__use_fallback_LLSC ? ++ " (fallback implementation)" : ""); ++ ++ if (abiinfo->guest__use_fallback_LLSC) { ++ return gen_sc_helper(rd, rj, si14, True); ++ } else { ++ IRTemp res = newTemp(Ity_I1); ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, binop(Iop_Add64, getIReg64(rj), ++ mkU64(extend64(si14 << 2, 16)))); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7))); ++ stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(addr), getIReg64(rd))); ++ return True; ++ } ++} ++ ++enum amop { ++ AMSWAP, AMADD, AMAND, AMOR, AMXOR, AMMAX, AMMIN, AMMAX_U, AMMIN_U ++}; ++ ++static Bool gen_am_w_helper ( enum amop op, Bool fence, ++ UInt rd, UInt rj, UInt rk ) ++{ ++ if (fence) ++ stmt(IRStmt_MBE(Imbe_Fence)); ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3))); ++ ++ IRTemp o = newTemp(Ity_I32); ++ assign(o, load(Ity_I32, mkexpr(addr))); ++ IRTemp n = newTemp(Ity_I32); ++ assign(n, getIReg32(rk)); ++ IRExpr* e; ++ switch (op) { ++ case AMSWAP: ++ e = mkexpr(n); ++ break; ++ case AMADD: ++ e = binop(Iop_Add32, mkexpr(o), mkexpr(n)); ++ break; ++ case AMAND: ++ e = binop(Iop_And32, mkexpr(o), mkexpr(n)); ++ break; ++ case AMOR: ++ e = binop(Iop_Or32, mkexpr(o), mkexpr(n)); ++ break; ++ case AMXOR: ++ e = binop(Iop_Xor32, mkexpr(o), mkexpr(n)); ++ break; ++ case AMMAX: { ++ IRExpr* cond = binop(Iop_CmpLT32S, mkexpr(n), mkexpr(o)); ++ e = IRExpr_ITE(cond, mkexpr(o), mkexpr(n)); ++ break; ++ } ++ case AMMIN: { ++ IRExpr* cond = binop(Iop_CmpLT32S, mkexpr(o), mkexpr(n)); ++ e = IRExpr_ITE(cond, mkexpr(o), mkexpr(n)); ++ break; ++ } ++ case AMMAX_U: { ++ IRExpr* cond = binop(Iop_CmpLT32U, mkexpr(n), mkexpr(o)); ++ e = IRExpr_ITE(cond, mkexpr(o), mkexpr(n)); ++ break; ++ } ++ case AMMIN_U: { ++ IRExpr* cond = binop(Iop_CmpLT32U, mkexpr(o), mkexpr(n)); ++ e = IRExpr_ITE(cond, mkexpr(o), mkexpr(n)); ++ break; ++ } ++ default: ++ return False; ++ } ++ ++ IRTemp old = newTemp(Ity_I32); ++ cas(old, mkexpr(addr), mkexpr(o), e); ++ IRExpr* cond = binop(Iop_CasCmpNE32, mkexpr(old), mkexpr(o)); ++ exit(cond, Ijk_Boring, 0); /* Loop if failed */ ++ putIReg(rd, extendS(Ity_I32, mkexpr(o))); ++ ++ if (fence) ++ stmt(IRStmt_MBE(Imbe_Fence)); ++ ++ return True; ++} ++ ++static Bool gen_am_d_helper ( enum amop op, Bool fence, ++ UInt rd, UInt rj, UInt rk ) ++{ ++ if (fence) ++ stmt(IRStmt_MBE(Imbe_Fence)); ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7))); ++ ++ IRTemp o = newTemp(Ity_I64); ++ assign(o, load(Ity_I64, mkexpr(addr))); ++ IRTemp n = newTemp(Ity_I64); ++ assign(n, getIReg64(rk)); ++ IRExpr* e; ++ switch (op) { ++ case AMSWAP: ++ e = mkexpr(n); ++ break; ++ case AMADD: ++ e = binop(Iop_Add64, mkexpr(o), mkexpr(n)); ++ break; ++ case AMAND: ++ e = binop(Iop_And64, mkexpr(o), mkexpr(n)); ++ break; ++ case AMOR: ++ e = binop(Iop_Or64, mkexpr(o), mkexpr(n)); ++ break; ++ case AMXOR: ++ e = binop(Iop_Xor64, mkexpr(o), mkexpr(n)); ++ break; ++ case AMMAX: { ++ IRExpr* cond = binop(Iop_CmpLT64S, mkexpr(n), mkexpr(o)); ++ e = IRExpr_ITE(cond, mkexpr(o), mkexpr(n)); ++ break; ++ } ++ case AMMIN: { ++ IRExpr* cond = binop(Iop_CmpLT64S, mkexpr(o), mkexpr(n)); ++ e = IRExpr_ITE(cond, mkexpr(o), mkexpr(n)); ++ break; ++ } ++ case AMMAX_U: { ++ IRExpr* cond = binop(Iop_CmpLT64U, mkexpr(n), mkexpr(o)); ++ e = IRExpr_ITE(cond, mkexpr(o), mkexpr(n)); ++ break; ++ } ++ case AMMIN_U: { ++ IRExpr* cond = binop(Iop_CmpLT64U, mkexpr(o), mkexpr(n)); ++ e = IRExpr_ITE(cond, mkexpr(o), mkexpr(n)); ++ break; ++ } ++ default: ++ return False; ++ } ++ ++ IRTemp old = newTemp(Ity_I64); ++ cas(old, mkexpr(addr), mkexpr(o), e); ++ IRExpr* cond = binop(Iop_CasCmpNE64, mkexpr(old), mkexpr(o)); ++ exit(cond, Ijk_Boring, 0); /* Loop if failed */ ++ putIReg(rd, mkexpr(o)); ++ ++ if (fence) ++ stmt(IRStmt_MBE(Imbe_Fence)); ++ ++ return True; ++} ++ ++static Bool gen_amswap_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amswap.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_w_helper(AMSWAP, False, rd, rj, rk); ++} ++ ++static Bool gen_amswap_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amswap.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_d_helper(AMSWAP, False, rd, rj, rk); ++} ++ ++static Bool gen_amadd_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amadd.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_w_helper(AMADD, False, rd, rj, rk); ++} ++ ++static Bool gen_amadd_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amadd.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_d_helper(AMADD, False, rd, rj, rk); ++} ++ ++static Bool gen_amand_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amand.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_w_helper(AMAND, False, rd, rj, rk); ++} ++ ++static Bool gen_amand_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amand.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_d_helper(AMAND, False, rd, rj, rk); ++} ++ ++static Bool gen_amor_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amor.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_w_helper(AMOR, False, rd, rj, rk); ++} ++ ++static Bool gen_amor_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amor.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_d_helper(AMOR, False, rd, rj, rk); ++} ++ ++static Bool gen_amxor_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amxor.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_w_helper(AMXOR, False, rd, rj, rk); ++} ++ ++static Bool gen_amxor_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amxor.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_d_helper(AMXOR, False, rd, rj, rk); ++} ++ ++static Bool gen_ammax_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ammax.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_w_helper(AMMAX, False, rd, rj, rk); ++} ++ ++static Bool gen_ammax_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ammax.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_d_helper(AMMAX, False, rd, rj, rk); ++} ++ ++static Bool gen_ammin_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ammin.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_w_helper(AMMIN, False, rd, rj, rk); ++} ++ ++static Bool gen_ammin_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ammin.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_d_helper(AMMIN, False, rd, rj, rk); ++} ++ ++static Bool gen_ammax_wu ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ammax.wu %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_w_helper(AMMAX_U, False, rd, rj, rk); ++} ++ ++static Bool gen_ammax_du ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ammax.du %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_d_helper(AMMAX_U, False, rd, rj, rk); ++} ++ ++static Bool gen_ammin_wu ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ammin.wu %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_w_helper(AMMIN_U, False, rd, rj, rk); ++} ++ ++static Bool gen_ammin_du ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ammin.du %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_d_helper(AMMIN_U, False, rd, rj, rk); ++} ++ ++static Bool gen_amswap_db_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amswap_db.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_w_helper(AMSWAP, True, rd, rj, rk); ++} ++ ++static Bool gen_amswap_db_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amswap_db.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_d_helper(AMSWAP, True, rd, rj, rk); ++} ++ ++static Bool gen_amadd_db_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amadd_db.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_w_helper(AMADD, True, rd, rj, rk); ++} ++ ++static Bool gen_amadd_db_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amadd_db.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_d_helper(AMADD, True, rd, rj, rk); ++} ++ ++static Bool gen_amand_db_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amand_db.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_w_helper(AMAND, True, rd, rj, rk); ++} ++ ++static Bool gen_amand_db_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amand_db.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_d_helper(AMAND, True, rd, rj, rk); ++} ++ ++static Bool gen_amor_db_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amor_db.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_w_helper(AMOR, True, rd, rj, rk); ++} ++ ++static Bool gen_amor_db_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amor_db.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_d_helper(AMOR, True, rd, rj, rk); ++} ++ ++static Bool gen_amxor_db_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amxor_db.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_w_helper(AMXOR, True, rd, rj, rk); ++} ++ ++static Bool gen_amxor_db_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("amxor_db.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_d_helper(AMXOR, True, rd, rj, rk); ++} ++ ++static Bool gen_ammax_db_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ammax_db.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_w_helper(AMMAX, True, rd, rj, rk); ++} ++ ++static Bool gen_ammax_db_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ammax_db.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_d_helper(AMMAX, True, rd, rj, rk); ++} ++ ++static Bool gen_ammin_db_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ammin_db.w %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_w_helper(AMMIN, True, rd, rj, rk); ++} ++ ++static Bool gen_ammin_db_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ammin_db.d %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_d_helper(AMMIN, True, rd, rj, rk); ++} ++ ++static Bool gen_ammax_db_wu ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ammax_db.wu %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_w_helper(AMMAX_U, True, rd, rj, rk); ++} ++ ++static Bool gen_ammax_db_du ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ammax_db.du %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_d_helper(AMMAX_U, True, rd, rj, rk); ++} ++ ++static Bool gen_ammin_db_wu ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ammin_db.wu %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_w_helper(AMMIN_U, True, rd, rj, rk); ++} ++ ++static Bool gen_ammin_db_du ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("ammin_db.du %s, %s, %s\n", nameIReg(rd), nameIReg(rk), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_LAM)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_am_d_helper(AMMIN_U, True, rd, rj, rk); ++} ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Helpers for fixed point extra insns ---*/ ++/*------------------------------------------------------------*/ ++ ++static Bool gen_crc_w_b_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("crc.w.b.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr** arg = mkIRExprVec_3(getIReg64(rk), getIReg64(rj), mkU64(8)); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_crc", ++ &loongarch64_calculate_crc, ++ arg); ++ putIReg(rd, call); ++ ++ return True; ++} ++ ++static Bool gen_crc_w_h_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("crc.w.h.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr** arg = mkIRExprVec_3(getIReg64(rk), getIReg64(rj), mkU64(16)); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_crc", ++ &loongarch64_calculate_crc, ++ arg); ++ putIReg(rd, call); ++ ++ return True; ++} ++ ++static Bool gen_crc_w_w_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("crc.w.w.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr** arg = mkIRExprVec_3(getIReg64(rk), getIReg64(rj), mkU64(32)); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_crc", ++ &loongarch64_calculate_crc, ++ arg); ++ putIReg(rd, call); ++ ++ return True; ++} ++ ++static Bool gen_crc_w_d_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("crc.w.d.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr** arg = mkIRExprVec_3(getIReg64(rk), getIReg64(rj), mkU64(64)); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_crc", ++ &loongarch64_calculate_crc, ++ arg); ++ putIReg(rd, call); ++ ++ return True; ++} ++ ++static Bool gen_crcc_w_b_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("crcc.w.b.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr** arg = mkIRExprVec_3(getIReg64(rk), getIReg64(rj), mkU64(8)); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_crcc", ++ &loongarch64_calculate_crcc, ++ arg); ++ putIReg(rd, call); ++ ++ return True; ++} ++ ++static Bool gen_crcc_w_h_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("crcc.w.h.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr** arg = mkIRExprVec_3(getIReg64(rk), getIReg64(rj), mkU64(16)); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_crcc", ++ &loongarch64_calculate_crcc, ++ arg); ++ putIReg(rd, call); ++ ++ return True; ++} ++ ++static Bool gen_crcc_w_w_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("crcc.w.w.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr** arg = mkIRExprVec_3(getIReg64(rk), getIReg64(rj), mkU64(32)); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_crcc", ++ &loongarch64_calculate_crcc, ++ arg); ++ putIReg(rd, call); ++ ++ return True; ++} ++ ++static Bool gen_crcc_w_d_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("crcc.w.d.w %s, %s, %s\n", nameIReg(rd), nameIReg(rj), nameIReg(rk)); ++ ++ IRExpr** arg = mkIRExprVec_3(getIReg64(rk), getIReg64(rj), mkU64(64)); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_crcc", ++ &loongarch64_calculate_crcc, ++ arg); ++ putIReg(rd, call); ++ ++ return True; ++} ++ ++static Bool gen_break ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt code = get_code(insn); ++ ++ DIP("break %u\n", code); ++ ++ putPC(mkU64(guest_PC_curr_instr + 4)); ++ ++ /* On LoongArch, most instructions do not raise exceptions; ++ instead, gcc notifies the kernel with a trap instruction. ++ We simulate the behavior of the linux kernel here. ++ See arch/loongarch/kernel/traps.c. ++ */ ++ switch (code) { ++ case 6: /* BRK_OVERFLOW */ ++ dres->jk_StopHere = Ijk_SigFPE_IntOvf; ++ break; ++ case 7: /* BRK_DIVZERO */ ++ dres->jk_StopHere = Ijk_SigFPE_IntDiv; ++ break; ++ default: ++ dres->jk_StopHere = Ijk_SigTRAP; ++ break; ++ } ++ dres->whatNext = Dis_StopHere; ++ ++ return True; ++} ++ ++static Bool gen_syscall ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt hint = get_hint15(insn); ++ ++ DIP("syscall %u\n", hint); ++ ++ putPC(mkU64(guest_PC_curr_instr + 4)); ++ ++ dres->jk_StopHere = Ijk_Sys_syscall; ++ dres->whatNext = Dis_StopHere; ++ ++ return True; ++} ++ ++static Bool gen_asrtle_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ ++ DIP("asrtle.d %s, %s\n", nameIReg(rj), nameIReg(rk)); ++ ++ gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), getIReg64(rj))); ++ ++ return True; ++} ++ ++static Bool gen_asrtgt_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ ++ DIP("asrtgt.d %s, %s\n", nameIReg(rj), nameIReg(rk)); ++ ++ gen_SIGSYS(binop(Iop_CmpLE64U, getIReg64(rj), getIReg64(rk))); ++ ++ return True; ++} ++ ++static Bool gen_rdtimel_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("rdtimel.w %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ putIReg(rd, mkU64(0)); ++ ++ return True; ++} ++ ++static Bool gen_rdtimeh_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("rdtimeh.w %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ putIReg(rd, mkU64(0)); ++ ++ return True; ++} ++ ++static Bool gen_rdtime_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("rdtime.d %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ putIReg(rd, mkU64(0)); ++ ++ return True; ++} ++ ++static Bool gen_cpucfg ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("cpucfg %s, %s\n", nameIReg(rd), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_CPUCFG)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr** arg = mkIRExprVec_1(getIReg64(rj)); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_cpucfg", ++ &loongarch64_calculate_cpucfg, ++ arg); ++ putIReg(rd, call); ++ ++ return True; ++} ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Helpers for floating point arithmetic insns ---*/ ++/*------------------------------------------------------------*/ ++ ++static Bool gen_fadd_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fadd.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FADD_S, 2, fj, fk, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg32(fd, triop(Iop_AddF32, rm, getFReg32(fj), getFReg32(fk))); ++ ++ return True; ++} ++ ++static Bool gen_fadd_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fadd.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FADD_D, 2, fj, fk, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg64(fd, triop(Iop_AddF64, rm, getFReg64(fj), getFReg64(fk))); ++ ++ return True; ++} ++ ++static Bool gen_fsub_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fsub.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FSUB_S, 2, fj, fk, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg32(fd, triop(Iop_SubF32, rm, getFReg32(fj), getFReg32(fk))); ++ ++ return True; ++} ++ ++static Bool gen_fsub_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fsub.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FSUB_D, 2, fj, fk, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg64(fd, triop(Iop_SubF64, rm, getFReg64(fj), getFReg64(fk))); ++ ++ return True; ++} ++ ++static Bool gen_fmul_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fmul.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FMUL_S, 2, fj, fk, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg32(fd, triop(Iop_MulF32, rm, getFReg32(fj), getFReg32(fk))); ++ ++ return True; ++} ++ ++static Bool gen_fmul_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fmul.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FMUL_D, 2, fj, fk, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg64(fd, triop(Iop_MulF64, rm, getFReg64(fj), getFReg64(fk))); ++ ++ return True; ++} ++ ++static Bool gen_fdiv_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fdiv.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FDIV_S, 2, fj, fk, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg32(fd, triop(Iop_DivF32, rm, getFReg32(fj), getFReg32(fk))); ++ ++ return True; ++} ++ ++static Bool gen_fdiv_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fdiv.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FDIV_D, 2, fj, fk, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg64(fd, triop(Iop_DivF64, rm, getFReg64(fj), getFReg64(fk))); ++ ++ return True; ++} ++ ++static Bool gen_fmadd_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fa = get_fa(insn); ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fmadd.s %s, %s, %s, %s\n", nameFReg(fd), nameFReg(fj), ++ nameFReg(fk), nameFReg(fa)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FMADD_S, 3, fj, fk, fa); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg32(fd, qop(Iop_MAddF32, rm, getFReg32(fj), ++ getFReg32(fk), getFReg32(fa))); ++ ++ return True; ++} ++ ++static Bool gen_fmadd_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fa = get_fa(insn); ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fmadd.d %s, %s, %s, %s\n", nameFReg(fd), nameFReg(fj), ++ nameFReg(fk), nameFReg(fa)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FMADD_D, 3, fj, fk, fa); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg64(fd, qop(Iop_MAddF64, rm, getFReg64(fj), ++ getFReg64(fk), getFReg64(fa))); ++ ++ return True; ++} ++ ++static Bool gen_fmsub_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fa = get_fa(insn); ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fmsub.s %s, %s, %s, %s\n", nameFReg(fd), nameFReg(fj), ++ nameFReg(fk), nameFReg(fa)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FMSUB_S, 3, fj, fk, fa); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg32(fd, qop(Iop_MSubF32, rm, getFReg32(fj), ++ getFReg32(fk), getFReg32(fa))); ++ ++ return True; ++} ++ ++static Bool gen_fmsub_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fa = get_fa(insn); ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fmsub.d %s, %s, %s, %s\n", nameFReg(fd), nameFReg(fj), ++ nameFReg(fk), nameFReg(fa)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FMSUB_D, 3, fj, fk, fa); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg64(fd, qop(Iop_MSubF64, rm, getFReg64(fj), ++ getFReg64(fk), getFReg64(fa))); ++ ++ return True; ++} ++ ++static Bool gen_fnmadd_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fa = get_fa(insn); ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fnmadd.s %s, %s, %s, %s\n", nameFReg(fd), nameFReg(fj), ++ nameFReg(fk), nameFReg(fa)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FNMADD_S, 3, fj, fk, fa); ++ IRExpr* rm = get_rounding_mode(); ++ IRExpr* madd = qop(Iop_MAddF32, rm, getFReg32(fj), ++ getFReg32(fk), getFReg32(fa)); ++ putFReg32(fd, unop(Iop_NegF32, madd)); ++ ++ return True; ++} ++ ++static Bool gen_fnmadd_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fa = get_fa(insn); ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fnmadd.d %s, %s, %s, %s\n", nameFReg(fd), nameFReg(fj), ++ nameFReg(fk), nameFReg(fa)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FNMADD_D, 3, fj, fk, fa); ++ IRExpr* rm = get_rounding_mode(); ++ IRExpr* madd = qop(Iop_MAddF64, rm, getFReg64(fj), ++ getFReg64(fk), getFReg64(fa)); ++ putFReg64(fd, unop(Iop_NegF64, madd)); ++ ++ return True; ++} ++ ++static Bool gen_fnmsub_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fa = get_fa(insn); ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fnmsub.s %s, %s, %s, %s\n", nameFReg(fd), nameFReg(fj), ++ nameFReg(fk), nameFReg(fa)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FNMSUB_S, 3, fj, fk, fa); ++ IRExpr* rm = get_rounding_mode(); ++ IRExpr* msub = qop(Iop_MSubF32, rm, getFReg32(fj), ++ getFReg32(fk), getFReg32(fa)); ++ putFReg32(fd, unop(Iop_NegF32, msub)); ++ ++ return True; ++} ++ ++static Bool gen_fnmsub_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fa = get_fa(insn); ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fnmsub.d %s, %s, %s, %s\n", nameFReg(fd), nameFReg(fj), ++ nameFReg(fk), nameFReg(fa)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FNMSUB_D, 3, fj, fk, fa); ++ IRExpr* rm = get_rounding_mode(); ++ IRExpr* msub = qop(Iop_MSubF64, rm, getFReg64(fj), ++ getFReg64(fk), getFReg64(fa)); ++ putFReg64(fd, unop(Iop_NegF64, msub)); ++ ++ return True; ++} ++ ++static Bool gen_fmax_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fmax.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FMAX_S, 2, fj, fk, 0); ++ putFReg32(fd, binop(Iop_MaxNumF32, getFReg32(fj), getFReg32(fk))); ++ ++ return True; ++} ++ ++static Bool gen_fmax_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fmax.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FMAX_D, 2, fj, fk, 0); ++ putFReg64(fd, binop(Iop_MaxNumF64, getFReg64(fj), getFReg64(fk))); ++ ++ return True; ++} ++ ++static Bool gen_fmin_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fmin.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FMIN_S, 2, fj, fk, 0); ++ putFReg32(fd, binop(Iop_MinNumF32, getFReg32(fj), getFReg32(fk))); ++ ++ return True; ++} ++ ++static Bool gen_fmin_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fmin.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FMIN_D, 2, fj, fk, 0); ++ putFReg64(fd, binop(Iop_MinNumF64, getFReg64(fj), getFReg64(fk))); ++ ++ return True; ++} ++ ++static Bool gen_fmaxa_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fmaxa.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FMAXA_S, 2, fj, fk, 0); ++ putFReg32(fd, binop(Iop_MaxNumAbsF32, getFReg32(fj), getFReg32(fk))); ++ ++ return True; ++} ++ ++static Bool gen_fmaxa_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fmaxa.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FMAXA_D, 2, fj, fk, 0); ++ putFReg64(fd, binop(Iop_MaxNumAbsF64, getFReg64(fj), getFReg64(fk))); ++ ++ return True; ++} ++ ++static Bool gen_fmina_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fmina.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FMINA_S, 2, fj, fk, 0); ++ putFReg32(fd, binop(Iop_MinNumAbsF32, getFReg32(fj), getFReg32(fk))); ++ ++ return True; ++} ++ ++static Bool gen_fmina_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fmina.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FMINA_D, 2, fj, fk, 0); ++ putFReg64(fd, binop(Iop_MinNumAbsF64, getFReg64(fj), getFReg64(fk))); ++ ++ return True; ++} ++ ++static Bool gen_fabs_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fabs.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FABS_S, 1, fj, 0, 0); ++ putFReg32(fd, unop(Iop_AbsF32, getFReg32(fj))); ++ ++ return True; ++} ++ ++static Bool gen_fabs_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fabs.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FABS_D, 1, fj, 0, 0); ++ putFReg64(fd, unop(Iop_AbsF64, getFReg64(fj))); ++ ++ return True; ++} ++ ++static Bool gen_fneg_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fneg.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FNEG_S, 1, fj, 0, 0); ++ putFReg32(fd, unop(Iop_NegF32, getFReg32(fj))); ++ ++ return True; ++} ++ ++static Bool gen_fneg_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fneg.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FNEG_D, 1, fj, 0, 0); ++ putFReg64(fd, unop(Iop_NegF64, getFReg64(fj))); ++ ++ return True; ++} ++ ++static Bool gen_fsqrt_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fsqrt.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FSQRT_S, 1, fj, 0, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg32(fd, binop(Iop_SqrtF32, rm, getFReg32(fj))); ++ ++ return True; ++} ++ ++static Bool gen_fsqrt_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fsqrt.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FSQRT_D, 1, fj, 0, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg64(fd, binop(Iop_SqrtF64, rm, getFReg64(fj))); ++ ++ return True; ++} ++ ++static Bool gen_frecip_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("frecip.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FRECIP_S, 1, fj, 0, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg32(fd, triop(Iop_DivF32, rm, mkF32i(1), getFReg32(fj))); ++ ++ return True; ++} ++ ++static Bool gen_frecip_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("frecip.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FRECIP_D, 1, fj, 0, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg64(fd, triop(Iop_DivF64, rm, mkF64i(1), getFReg64(fj))); ++ ++ return True; ++} ++ ++static Bool gen_frsqrt_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("frsqrt.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FRSQRT_S, 1, fj, 0, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg32(fd, binop(Iop_RSqrtF32, rm, getFReg32(fj))); ++ ++ return True; ++} ++ ++static Bool gen_frsqrt_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("frsqrt.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FRSQRT_D, 1, fj, 0, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg64(fd, binop(Iop_RSqrtF64, rm, getFReg64(fj))); ++ ++ return True; ++} ++ ++static Bool gen_fscaleb_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fscaleb.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FSCALEB_S, 2, fj, fk, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg32(fd, triop(Iop_ScaleBF32, rm, getFReg32(fj), getFReg32(fk))); ++ ++ return True; ++} ++ ++static Bool gen_fscaleb_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fscaleb.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FSCALEB_D, 2, fj, fk, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg64(fd, triop(Iop_ScaleBF64, rm, getFReg64(fj), getFReg64(fk))); ++ ++ return True; ++} ++ ++static Bool gen_flogb_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("flogb.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FLOGB_S, 1, fj, 0, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg32(fd, binop(Iop_LogBF32, rm, getFReg32(fj))); ++ ++ return True; ++} ++ ++static Bool gen_flogb_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("flogb.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FLOGB_D, 1, fj, 0, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg64(fd, binop(Iop_LogBF64, rm, getFReg64(fj))); ++ ++ return True; ++} ++ ++static Bool gen_fcopysign_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fcopysign.s %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr* i1 = unop(Iop_ReinterpF32asI32, getFReg32(fj)); ++ IRExpr* shl1 = binop(Iop_Shl32, i1, mkU8(1)); ++ IRExpr* shr1 = binop(Iop_Shr32, shl1, mkU8(1)); ++ IRExpr* i2 = unop(Iop_ReinterpF32asI32, getFReg32(fk)); ++ IRExpr* shr2 = binop(Iop_Shr32, i2, mkU8(31)); ++ IRExpr* shl2 = binop(Iop_Shl32, shr2, mkU8(31)); ++ IRExpr* or = binop(Iop_Or32, shr1, shl2); ++ putFReg32(fd, unop(Iop_ReinterpI32asF32, or)); ++ ++ return True; ++} ++ ++static Bool gen_fcopysign_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fcopysign.d %s, %s, %s\n", nameFReg(fd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr* i1 = unop(Iop_ReinterpF64asI64, getFReg64(fj)); ++ IRExpr* shl1 = binop(Iop_Shl64, i1, mkU8(1)); ++ IRExpr* shr1 = binop(Iop_Shr64, shl1, mkU8(1)); ++ IRExpr* i2 = unop(Iop_ReinterpF64asI64, getFReg64(fk)); ++ IRExpr* shr2 = binop(Iop_Shr64, i2, mkU8(63)); ++ IRExpr* shl2 = binop(Iop_Shl64, shr2, mkU8(63)); ++ IRExpr* or = binop(Iop_Or64, shr1, shl2); ++ putFReg64(fd, unop(Iop_ReinterpI64asF64, or)); ++ ++ return True; ++} ++ ++static Bool gen_fclass_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fclass.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr** arg = mkIRExprVec_1(unop(Iop_ReinterpF64asI64, getFReg64(fj))); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_fclass_s", ++ &loongarch64_calculate_fclass_s, ++ arg); ++ putFReg32(fd, unop(Iop_ReinterpI32asF32, unop(Iop_64to32, call))); ++ ++ return True; ++} ++ ++static Bool gen_fclass_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fclass.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr** arg = mkIRExprVec_1(unop(Iop_ReinterpF64asI64, getFReg64(fj))); ++ IRExpr* call = mkIRExprCCall(Ity_I64, 0/*regparms*/, ++ "loongarch64_calculate_fclass_d", ++ &loongarch64_calculate_fclass_d, ++ arg); ++ putFReg64(fd, unop(Iop_ReinterpI64asF64, call)); ++ ++ return True; ++} ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Helpers for floating point comparison insns ---*/ ++/*------------------------------------------------------------*/ ++ ++static inline IRExpr* is_UN ( IRExpr* e ) ++{ ++ return binop(Iop_CmpEQ32, e, mkU32(0x45)); ++} ++ ++static inline IRExpr* is_LT ( IRExpr* e ) ++{ ++ return binop(Iop_CmpEQ32, e, mkU32(0x1)); ++} ++ ++static inline IRExpr* is_GT ( IRExpr* e ) ++{ ++ return binop(Iop_CmpEQ32, e, mkU32(0x0)); ++} ++ ++static inline IRExpr* is_EQ ( IRExpr* e ) ++{ ++ return binop(Iop_CmpEQ32, e, mkU32(0x40)); ++} ++ ++static Bool gen_fcmp_cond_helper ( enum fpop op, UInt cc, ++ UInt fj, UInt fk, Bool size64 ) ++{ ++ /* We have to convert 'irRes' from an IR-convention return result ++ (IRCmpF32Result / IRCmpF64Result) to a LOONGARCH-encoded group. ++ ++ FP cmp result | IR ++ -------------------- ++ UN | 0x45 ++ LT | 0x01 ++ GT | 0x00 ++ EQ | 0x40 ++ */ ++ IRTemp result = newTemp(Ity_I32); ++ if (size64) ++ assign(result, binop(Iop_CmpF64, getFReg64(fj), getFReg64(fk))); ++ else ++ assign(result, binop(Iop_CmpF32, getFReg32(fj), getFReg32(fk))); ++ ++ IRExpr* e; ++ switch (op) { ++ case FCMP_CAF_S: case FCMP_CAF_D: case FCMP_SAF_S: case FCMP_SAF_D: ++ e = mkU1(0); ++ break; ++ case FCMP_CLT_S: case FCMP_CLT_D: case FCMP_SLT_S: case FCMP_SLT_D: ++ e = is_LT(mkexpr(result)); ++ break; ++ case FCMP_CEQ_S: case FCMP_CEQ_D: case FCMP_SEQ_S: case FCMP_SEQ_D: ++ e = is_EQ(mkexpr(result)); ++ break; ++ case FCMP_CLE_S: case FCMP_CLE_D: case FCMP_SLE_S: case FCMP_SLE_D: ++ e = binop(Iop_Or1, is_LT(mkexpr(result)), is_EQ(mkexpr(result))); ++ break; ++ case FCMP_CUN_S: case FCMP_CUN_D: case FCMP_SUN_S: case FCMP_SUN_D: ++ e = is_UN(mkexpr(result)); ++ break; ++ case FCMP_CULT_S: case FCMP_CULT_D: case FCMP_SULT_S: case FCMP_SULT_D: ++ e = binop(Iop_Or1, is_UN(mkexpr(result)), is_LT(mkexpr(result))); ++ break; ++ case FCMP_CUEQ_S: case FCMP_CUEQ_D: case FCMP_SUEQ_S: case FCMP_SUEQ_D: ++ e = binop(Iop_Or1, is_UN(mkexpr(result)), is_EQ(mkexpr(result))); ++ break; ++ case FCMP_CULE_S: case FCMP_CULE_D: case FCMP_SULE_S: case FCMP_SULE_D: ++ e = binop(Iop_Or1, is_UN(mkexpr(result)), ++ binop(Iop_Or1, is_LT(mkexpr(result)), ++ is_EQ(mkexpr(result)))); ++ break; ++ case FCMP_CNE_S: case FCMP_CNE_D: case FCMP_SNE_S: case FCMP_SNE_D: ++ e = binop(Iop_Or1, is_GT(mkexpr(result)), is_LT(mkexpr(result))); ++ break; ++ case FCMP_COR_S: case FCMP_COR_D: case FCMP_SOR_S: case FCMP_SOR_D: ++ e = binop(Iop_Or1, is_GT(mkexpr(result)), ++ binop(Iop_Or1, is_LT(mkexpr(result)), ++ is_EQ(mkexpr(result)))); ++ break; ++ case FCMP_CUNE_S: case FCMP_CUNE_D: case FCMP_SUNE_S: case FCMP_SUNE_D: ++ e = binop(Iop_Or1, is_UN(mkexpr(result)), ++ binop(Iop_Or1, is_GT(mkexpr(result)), ++ is_LT(mkexpr(result)))); ++ break; ++ default: ++ return False; ++ } ++ ++ calculateFCSR(op, 2, fj, fk, 0); ++ putFCC(cc, unop(Iop_1Uto8, e)); ++ ++ return True; ++} ++ ++static Bool gen_fcmp_caf_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.caf.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CAF_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_caf_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.caf.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CAF_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_saf_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.saf.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SAF_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_saf_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.saf.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SAF_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_clt_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.clt.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CLT_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_clt_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.clt.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CLT_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_slt_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.slt.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SLT_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_slt_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.slt.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SLT_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_ceq_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.ceq.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CEQ_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_ceq_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.ceq.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CEQ_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_seq_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.seq.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SEQ_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_seq_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.seq.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SEQ_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_cle_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.cle.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CLE_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_cle_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.cle.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CLE_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_sle_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.sle.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SLE_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_sle_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.sle.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SLE_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_cun_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.cun.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CUN_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_cun_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.cun.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CUN_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_sun_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.sun.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SUN_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_sun_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.sun.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SUN_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_cult_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.cult.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CULT_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_cult_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.cult.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CULT_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_sult_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.sult.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SULT_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_sult_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.sult.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SULT_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_cueq_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.cueq.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CUEQ_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_cueq_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.cueq.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CUEQ_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_sueq_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.sueq.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SUEQ_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_sueq_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.sueq.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SUEQ_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_cule_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.cule.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CULE_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_cule_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.cule.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CULE_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_sule_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.sule.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SULE_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_sule_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.sule.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SULE_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_cne_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.cne.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CNE_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_cne_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.cne.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CNE_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_sne_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.sne.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SNE_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_sne_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.sne.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SNE_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_cor_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.cor.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_COR_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_cor_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.cor.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_COR_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_sor_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.sor.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SOR_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_sor_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.sor.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SOR_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_cune_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.cune.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CUNE_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_cune_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.cune.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_CUNE_D, cd, fj, fk, True); ++} ++ ++static Bool gen_fcmp_sune_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.sune.s %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SUNE_S, cd, fj, fk, False); ++} ++ ++static Bool gen_fcmp_sune_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("fcmp.sune.d %s, %s, %s\n", nameFCC(cd), nameFReg(fj), nameFReg(fk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_fcmp_cond_helper(FCMP_SUNE_D, cd, fj, fk, True); ++} ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Helpers for floating point conversion insns ---*/ ++/*------------------------------------------------------------*/ ++ ++static IRExpr* is_Invalid_Overflow ( void ) ++{ ++ /* Bits 16 to 20 in FCSR are flags. ++ Bit 18 - overflow ++ Bit 20 - invalid ++ */ ++ IRExpr* fcsr = getFCSR(0); ++ IRExpr* shr = binop(Iop_Shr32, fcsr, mkU8(16)); ++ IRExpr* and = binop(Iop_And32, shr, mkU32(0x14)); ++ return binop(Iop_CmpNE32, and, getIReg32(0)); ++} ++ ++static Bool gen_convert_s_helper ( enum fpop op, UInt fd, UInt fj ) ++{ ++ IRExpr* e; ++ IRExpr* rm; ++ switch (op) { ++ case FTINTRM_W_S: ++ rm = gen_round_down(); ++ e = binop(Iop_F32toI32S, rm, getFReg32(fj)); ++ break; ++ case FTINTRM_W_D: ++ rm = gen_round_down(); ++ e = binop(Iop_F64toI32S, rm, getFReg64(fj)); ++ break; ++ case FTINTRP_W_S: ++ rm = gen_round_up(); ++ e = binop(Iop_F32toI32S, rm, getFReg32(fj)); ++ break; ++ case FTINTRP_W_D: ++ rm = gen_round_up(); ++ e = binop(Iop_F64toI32S, rm, getFReg64(fj)); ++ break; ++ case FTINTRZ_W_S: ++ rm = gen_round_to_zero(); ++ e = binop(Iop_F32toI32S, rm, getFReg32(fj)); ++ break; ++ case FTINTRZ_W_D: ++ rm = gen_round_to_zero(); ++ e = binop(Iop_F64toI32S, rm, getFReg64(fj)); ++ break; ++ case FTINTRNE_W_S: ++ rm = gen_round_to_nearest(); ++ e = binop(Iop_F32toI32S, rm, getFReg32(fj)); ++ break; ++ case FTINTRNE_W_D: ++ rm = gen_round_to_nearest(); ++ e = binop(Iop_F64toI32S, rm, getFReg64(fj)); ++ break; ++ case FTINT_W_S: ++ rm = get_rounding_mode(); ++ e = binop(Iop_F32toI32S, rm, getFReg32(fj)); ++ break; ++ case FTINT_W_D: ++ rm = get_rounding_mode(); ++ e = binop(Iop_F64toI32S, rm, getFReg64(fj)); ++ break; ++ default: ++ return False; ++ } ++ ++ calculateFCSR(op, 1, fj, 0, 0); ++ IRExpr* ite = IRExpr_ITE(is_Invalid_Overflow(), mkU32(0x7fffffff), e); ++ putFReg32(fd, unop(Iop_ReinterpI32asF32, ite)); ++ ++ return True; ++} ++ ++static Bool gen_convert_d_helper ( enum fpop op, UInt fd, UInt fj ) ++{ ++ IRExpr* e; ++ IRExpr* rm; ++ switch (op) { ++ case FTINTRM_L_S: ++ rm = gen_round_down(); ++ e = binop(Iop_F32toI64S, rm, getFReg32(fj)); ++ break; ++ case FTINTRM_L_D: ++ rm = gen_round_down(); ++ e = binop(Iop_F64toI64S, rm, getFReg64(fj)); ++ break; ++ case FTINTRP_L_S: ++ rm = gen_round_up(); ++ e = binop(Iop_F32toI64S, rm, getFReg32(fj)); ++ break; ++ case FTINTRP_L_D: ++ rm = gen_round_up(); ++ e = binop(Iop_F64toI64S, rm, getFReg64(fj)); ++ break; ++ case FTINTRZ_L_S: ++ rm = gen_round_to_zero(); ++ e = binop(Iop_F32toI64S, rm, getFReg32(fj)); ++ break; ++ case FTINTRZ_L_D: ++ rm = gen_round_to_zero(); ++ e = binop(Iop_F64toI64S, rm, getFReg64(fj)); ++ break; ++ case FTINTRNE_L_S: ++ rm = gen_round_to_nearest(); ++ e = binop(Iop_F32toI64S, rm, getFReg32(fj)); ++ break; ++ case FTINTRNE_L_D: ++ rm = gen_round_to_nearest(); ++ e = binop(Iop_F64toI64S, rm, getFReg64(fj)); ++ break; ++ case FTINT_L_S: ++ rm = get_rounding_mode(); ++ e = binop(Iop_F32toI64S, rm, getFReg32(fj)); ++ break; ++ case FTINT_L_D: ++ rm = get_rounding_mode(); ++ e = binop(Iop_F64toI64S, rm, getFReg64(fj)); ++ break; ++ default: ++ return False; ++ } ++ ++ calculateFCSR(op, 1, fj, 0, 0); ++ IRExpr* ite = IRExpr_ITE(is_Invalid_Overflow(), ++ mkU64(0x7fffffffffffffffULL), e); ++ putFReg64(fd, unop(Iop_ReinterpI64asF64, ite)); ++ ++ return True; ++} ++ ++static Bool gen_fcvt_s_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fcvt.s.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FCVT_S_D, 1, fj, 0, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg32(fd, binop(Iop_F64toF32, rm, getFReg64(fj))); ++ ++ return True; ++} ++ ++static Bool gen_fcvt_d_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fcvt.d.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FCVT_D_S, 1, fj, 0, 0); ++ putFReg64(fd, unop(Iop_F32toF64, getFReg32(fj))); ++ ++ return True; ++} ++ ++static Bool gen_ftintrm_w_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftintrm.w.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_s_helper(FTINTRM_W_S, fd, fj); ++} ++ ++static Bool gen_ftintrm_w_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftintrm.w.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_s_helper(FTINTRM_W_D, fd, fj); ++} ++ ++static Bool gen_ftintrm_l_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftintrm.l.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_d_helper(FTINTRM_L_S, fd, fj); ++} ++ ++static Bool gen_ftintrm_l_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftintrm.l.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_d_helper(FTINTRM_L_D, fd, fj); ++} ++ ++static Bool gen_ftintrp_w_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftintrp.w.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_s_helper(FTINTRP_W_S, fd, fj); ++} ++ ++static Bool gen_ftintrp_w_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftintrp.w.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_s_helper(FTINTRP_W_D, fd, fj); ++} ++ ++static Bool gen_ftintrp_l_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftintrp.l.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_d_helper(FTINTRP_L_S, fd, fj); ++} ++ ++static Bool gen_ftintrp_l_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftintrp.l.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_d_helper(FTINTRP_L_D, fd, fj); ++} ++ ++static Bool gen_ftintrz_w_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftintrz.w.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_s_helper(FTINTRZ_W_S, fd, fj); ++} ++ ++static Bool gen_ftintrz_w_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftintrz.w.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_s_helper(FTINTRZ_W_D, fd, fj); ++} ++ ++static Bool gen_ftintrz_l_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftintrz.l.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_d_helper(FTINTRZ_L_S, fd, fj); ++} ++ ++static Bool gen_ftintrz_l_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftintrz.l.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_d_helper(FTINTRZ_L_D, fd, fj); ++} ++ ++static Bool gen_ftintrne_w_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftintrne.w.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_s_helper(FTINTRNE_W_S, fd, fj); ++} ++ ++static Bool gen_ftintrne_w_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftintrne.w.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_s_helper(FTINTRNE_W_D, fd, fj); ++} ++ ++static Bool gen_ftintrne_l_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftintrne.l.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_d_helper(FTINTRNE_L_S, fd, fj); ++} ++ ++static Bool gen_ftintrne_l_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftintrne.l.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_d_helper(FTINTRNE_L_D, fd, fj); ++} ++ ++static Bool gen_ftint_w_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftint.w.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_s_helper(FTINT_W_S, fd, fj); ++} ++ ++static Bool gen_ftint_w_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftint.w.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_s_helper(FTINT_W_D, fd, fj); ++} ++ ++static Bool gen_ftint_l_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftint.l.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_d_helper(FTINT_L_S, fd, fj); ++} ++ ++static Bool gen_ftint_l_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ftint.l.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ return gen_convert_d_helper(FTINT_L_D, fd, fj); ++} ++ ++static Bool gen_ffint_s_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ffint.s.w %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FFINT_S_W, 1, fj, 0, 0); ++ IRExpr* rm = get_rounding_mode(); ++ IRExpr* f = unop(Iop_ReinterpF32asI32, getFReg32(fj)); ++ putFReg32(fd, binop(Iop_I32StoF32, rm, f)); ++ ++ return True; ++} ++ ++static Bool gen_ffint_s_l ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ffint.s.l %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FFINT_S_L, 1, fj, 0, 0); ++ IRExpr* rm = get_rounding_mode(); ++ IRExpr* f = unop(Iop_ReinterpF64asI64, getFReg64(fj)); ++ putFReg32(fd, binop(Iop_I64StoF32, rm, f)); ++ ++ return True; ++} ++ ++static Bool gen_ffint_d_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ffint.d.w %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FFINT_D_W, 1, fj, 0, 0); ++ IRExpr* f = unop(Iop_ReinterpF32asI32, getFReg32(fj)); ++ putFReg64(fd, unop(Iop_I32StoF64, f)); ++ ++ return True; ++} ++ ++static Bool gen_ffint_d_l ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("ffint.d.l %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FFINT_D_L, 1, fj, 0, 0); ++ IRExpr* rm = get_rounding_mode(); ++ IRExpr* f = unop(Iop_ReinterpF64asI64, getFReg64(fj)); ++ putFReg64(fd, binop(Iop_I64StoF64, rm, f)); ++ ++ return True; ++} ++ ++static Bool gen_frint_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("frint.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FRINT_S, 1, fj, 0, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg32(fd, binop(Iop_RoundF32toInt, rm, getFReg32(fj))); ++ ++ return True; ++} ++ ++static Bool gen_frint_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("frint.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ calculateFCSR(FRINT_D, 1, fj, 0, 0); ++ IRExpr* rm = get_rounding_mode(); ++ putFReg64(fd, binop(Iop_RoundF64toInt, rm, getFReg64(fj))); ++ ++ return True; ++} ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Helpers for floating point move insns ---*/ ++/*------------------------------------------------------------*/ ++ ++static Bool gen_fmov_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fmov.s %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ putFReg32(fd, getFReg32(fj)); ++ ++ return True; ++} ++ ++static Bool gen_fmov_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fmov.d %s, %s\n", nameFReg(fd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ putFReg64(fd, getFReg64(fj)); ++ ++ return True; ++} ++ ++static Bool gen_fsel ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt ca = get_ca(insn); ++ UInt fk = get_fk(insn); ++ UInt fj = get_fj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fsel %s, %s, %s, %s\n", nameFReg(fd), nameFReg(fj), ++ nameFReg(fk), nameFCC(ca)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr* cc = unop(Iop_8Uto64, getFCC(ca)); ++ IRExpr* cond = binop(Iop_CmpEQ64, cc, mkU64(0)); ++ putFReg64(fd, IRExpr_ITE(cond, getFReg64(fj), getFReg64(fk))); ++ ++ return True; ++} ++ ++static Bool gen_movgr2fr_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("movgr2fr.w %s, %s\n", nameFReg(fd), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ /* The high bits might be undefined, now the hardware implementation ++ of this instruction is that it is equivalent to movgr2fr.d. */ ++ putFReg64(fd, unop(Iop_ReinterpI64asF64, getIReg64(rj))); ++ ++ return True; ++} ++ ++static Bool gen_movgr2fr_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("movgr2fr.d %s, %s\n", nameFReg(fd), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ putFReg64(fd, unop(Iop_ReinterpI64asF64, getIReg64(rj))); ++ ++ return True; ++} ++ ++static Bool gen_movgr2frh_w ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("movgr2frh.w %s, %s\n", nameFReg(fd), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr* shl1 = binop(Iop_Shl64, getIReg64(rj), mkU8(32)); ++ IRExpr* i = unop(Iop_ReinterpF64asI64, getFReg64(fd)); ++ IRExpr* shl2 = binop(Iop_Shl64, i, mkU8(32)); ++ IRExpr* shr = binop(Iop_Shr64, shl2, mkU8(32)); ++ IRExpr* or = binop(Iop_Or64, shl1, shr); ++ putFReg64(fd, unop(Iop_ReinterpI64asF64, or)); ++ ++ return True; ++} ++ ++static Bool gen_movfr2gr_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("movfr2gr.s %s, %s\n", nameIReg(rd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr* i = unop(Iop_ReinterpF32asI32, getFReg32(fj)); ++ putIReg(rd, extendS(Ity_I32, i)); ++ ++ return True; ++} ++ ++static Bool gen_movfr2gr_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("movfr2gr.d %s, %s\n", nameIReg(rd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ putIReg(rd, unop(Iop_ReinterpF64asI64, getFReg64(fj))); ++ ++ return True; ++} ++ ++static Bool gen_movfrh2gr_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("movfrh2gr.s %s, %s\n", nameIReg(rd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr* i = unop(Iop_ReinterpF64asI64, getFReg64(fj)); ++ IRExpr* shr = binop(Iop_Shr64, i, mkU8(32)); ++ putIReg(rd, extendS(Ity_I32, unop(Iop_64to32, shr))); ++ ++ return True; ++} ++ ++static Bool gen_movgr2fcsr ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt fcsr = get_fcsrl(insn); ++ ++ DIP("movgr2fcsr %s, %s\n", nameFCSR(fcsr), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ putFCSR(fcsr, getIReg32(rj)); ++ ++ return True; ++} ++ ++static Bool gen_movfcsr2gr ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fcsr = get_fcsrh(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("movfcsr2gr %s, %s\n", nameIReg(rd), nameFCSR(fcsr)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ putIReg(rd, extendS(Ity_I32, getFCSR(fcsr))); ++ ++ return True; ++} ++ ++static Bool gen_movfr2cf ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt fj = get_fj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("movfr2cf %s, %s\n", nameFCC(cd), nameFReg(fj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr* i = unop(Iop_ReinterpF64asI64, getFReg64(fj)); ++ IRExpr* and = binop(Iop_And64, i, mkU64(0x1)); ++ putFCC(cd, unop(Iop_64to8, and)); ++ ++ return True; ++} ++ ++static Bool gen_movcf2fr ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt cj = get_cj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("movcf2fr %s, %s\n", nameFReg(fd), nameFCC(cj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ /* The hardware implementation of this instruction ++ does clear the high bits. */ ++ IRExpr* cc = unop(Iop_8Uto64, getFCC(cj)); ++ putFReg64(fd, unop(Iop_ReinterpI64asF64, cc)); ++ ++ return True; ++} ++ ++static Bool gen_movgr2cf ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rj = get_rj(insn); ++ UInt cd = get_cd(insn); ++ ++ DIP("movgr2cf %s, %s\n", nameFCC(cd), nameIReg(rj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr* and = binop(Iop_And64, getIReg64(rj), mkU64(0x1)); ++ putFCC(cd, unop(Iop_64to8, and)); ++ ++ return True; ++} ++ ++static Bool gen_movcf2gr ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt cj = get_cj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("movcf2gr %s, %s\n", nameIReg(rd), nameFCC(cj)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ /* The hardware implementation of this instruction ++ does clear the high bits. */ ++ putIReg(rd, unop(Iop_8Uto64, getFCC(cj))); ++ ++ return True; ++} ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Helpers for floating point load/store insns ---*/ ++/*------------------------------------------------------------*/ ++ ++static Bool gen_fld_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fld.s %s, %s, %d\n", nameFReg(fd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12))); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x3))); ++ putFReg32(fd, load(Ity_F32, addr)); ++ ++ return True; ++} ++ ++static Bool gen_fst_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fst.s %s, %s, %d\n", nameFReg(fd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12))); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x3))); ++ store(addr, getFReg32(fd)); ++ ++ return True; ++} ++ ++static Bool gen_fld_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fld.d %s, %s, %d\n", nameFReg(fd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12))); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x7))); ++ putFReg64(fd, load(Ity_F64, addr)); ++ ++ return True; ++} ++ ++static Bool gen_fst_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt si12 = get_si12(insn); ++ UInt rj = get_rj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fst.d %s, %s, %d\n", nameFReg(fd), nameIReg(rj), ++ (Int)extend32(si12, 12)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), mkU64(extend64(si12, 12))); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x7))); ++ store(addr, getFReg64(fd)); ++ ++ return True; ++} ++ ++static Bool gen_fldx_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fldx.s %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk)); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x3))); ++ putFReg32(fd, load(Ity_F32, addr)); ++ ++ return True; ++} ++ ++static Bool gen_fldx_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fldx.d %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk)); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x7))); ++ putFReg64(fd, load(Ity_F64, addr)); ++ ++ return True; ++} ++ ++static Bool gen_fstx_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fstx.s %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk)); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x3))); ++ store(addr, getFReg32(fd)); ++ ++ return True; ++} ++ ++static Bool gen_fstx_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fstx.d %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr* addr = binop(Iop_Add64, getIReg64(rj), getIReg64(rk)); ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_UAL)) ++ gen_SIGBUS(check_align(addr, mkU64(0x7))); ++ store(addr, getFReg64(fd)); ++ ++ return True; ++} ++ ++static Bool gen_fldgt_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fldgt.s %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3))); ++ gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk))); ++ putFReg32(fd, load(Ity_F32, mkexpr(addr))); ++ ++ return True; ++} ++ ++static Bool gen_fldgt_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fldgt.d %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7))); ++ gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk))); ++ putFReg64(fd, load(Ity_F64, mkexpr(addr))); ++ ++ return True; ++} ++ ++static Bool gen_fldle_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fldle.s %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3))); ++ gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr))); ++ putFReg32(fd, load(Ity_F32, mkexpr(addr))); ++ ++ return True; ++} ++ ++static Bool gen_fldle_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fldle.d %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7))); ++ gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr))); ++ putFReg64(fd, load(Ity_F64, mkexpr(addr))); ++ ++ return True; ++} ++ ++static Bool gen_fstgt_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fstgt.s %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3))); ++ gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk))); ++ store(mkexpr(addr), getFReg32(fd)); ++ ++ return True; ++} ++ ++static Bool gen_fstgt_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fstgt.d %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7))); ++ gen_SIGSYS(binop(Iop_CmpLE64U, mkexpr(addr), getIReg64(rk))); ++ store(mkexpr(addr), getFReg64(fd)); ++ ++ return True; ++} ++ ++static Bool gen_fstle_s ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fstle.s %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x3))); ++ gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr))); ++ store(mkexpr(addr), getFReg32(fd)); ++ ++ return True; ++} ++ ++static Bool gen_fstle_d ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt rk = get_rk(insn); ++ UInt rj = get_rj(insn); ++ UInt fd = get_fd(insn); ++ ++ DIP("fstle.d %s, %s, %s\n", nameFReg(fd), nameIReg(rj), nameIReg(rk)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRTemp addr = newTemp(Ity_I64); ++ assign(addr, getIReg64(rj)); ++ gen_SIGBUS(check_align(mkexpr(addr), mkU64(0x7))); ++ gen_SIGSYS(binop(Iop_CmpLT64U, getIReg64(rk), mkexpr(addr))); ++ store(mkexpr(addr), getFReg64(fd)); ++ ++ return True; ++} ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Helpers for branch insns ---*/ ++/*------------------------------------------------------------*/ ++ ++static Bool gen_beqz ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt offs21 = get_offs21(insn); ++ UInt rj = get_rj(insn); ++ ++ DIP("beqz %s, %d\n", nameIReg(rj), (Int)extend32(offs21, 21)); ++ ++ IRExpr* cond = binop(Iop_CmpEQ64, getIReg64(rj), mkU64(0)); ++ exit(cond, Ijk_Boring, extend64(offs21 << 2, 23)); ++ ++ return True; ++} ++ ++static Bool gen_bnez ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt offs21 = get_offs21(insn); ++ UInt rj = get_rj(insn); ++ ++ DIP("bnez %s, %d\n", nameIReg(rj), (Int)extend32(offs21, 21)); ++ ++ IRExpr* cond = binop(Iop_CmpNE64, getIReg64(rj), mkU64(0)); ++ exit(cond, Ijk_Boring, extend64(offs21 << 2, 23)); ++ ++ return True; ++} ++ ++static Bool gen_bceqz ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt offs21 = get_offs21(insn); ++ UInt cj = get_cj(insn); ++ ++ DIP("bceqz %s, %d\n", nameFCC(cj), (Int)extend32(offs21, 21)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr* cc = unop(Iop_8Uto64, getFCC(cj)); ++ IRExpr* cond = binop(Iop_CmpEQ64, cc, mkU64(0)); ++ exit(cond, Ijk_Boring, extend64(offs21 << 2, 23)); ++ ++ return True; ++} ++ ++static Bool gen_bcnez ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt offs21 = get_offs21(insn); ++ UInt cj = get_cj(insn); ++ ++ DIP("bcnez %s, %d\n", nameFCC(cj), (Int)extend32(offs21, 21)); ++ ++ if (!(archinfo->hwcaps & VEX_HWCAPS_LOONGARCH_FP)) { ++ dres->jk_StopHere = Ijk_SigILL; ++ dres->whatNext = Dis_StopHere; ++ return True; ++ } ++ ++ IRExpr* cc = unop(Iop_8Uto64, getFCC(cj)); ++ IRExpr* cond = binop(Iop_CmpNE64, cc, mkU64(0)); ++ exit(cond, Ijk_Boring, extend64(offs21 << 2, 23)); ++ ++ return True; ++} ++ ++static Bool gen_jirl ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt offs16 = get_offs16(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("jirl %s, %s, %d\n", nameIReg(rd), nameIReg(rj), ++ (Int)extend32(offs16, 16)); ++ ++ IRTemp tmp = newTemp(Ity_I64); ++ assign(tmp, getIReg64(rj)); /* This is necessary when rd == rj */ ++ putIReg(rd, mkU64(guest_PC_curr_instr + 4)); ++ IRExpr* imm = mkU64(extend64(offs16 << 2, 18)); ++ putPC(binop(Iop_Add64, mkexpr(tmp), imm)); ++ ++ dres->whatNext = Dis_StopHere; ++ dres->jk_StopHere = Ijk_Boring; ++ ++ return True; ++} ++ ++static Bool gen_b ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt offs26 = get_offs26(insn); ++ ++ DIP("b %d\n", (Int)extend32(offs26, 26)); ++ ++ putPC(mkU64(guest_PC_curr_instr + extend64(offs26 << 2, 28))); ++ ++ dres->whatNext = Dis_StopHere; ++ dres->jk_StopHere = Ijk_Boring; ++ ++ return True; ++} ++ ++static Bool gen_bl ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt offs26 = get_offs26(insn); ++ ++ DIP("bl %d\n", (Int)extend32(offs26, 26)); ++ ++ putIReg(1, mkU64(guest_PC_curr_instr + 4)); ++ putPC(mkU64(guest_PC_curr_instr + extend64(offs26 << 2, 28))); ++ ++ dres->whatNext = Dis_StopHere; ++ dres->jk_StopHere = Ijk_Boring; ++ ++ return True; ++} ++ ++static Bool gen_beq ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt offs16 = get_offs16(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("beq %s, %s, %d\n", nameIReg(rj), nameIReg(rd), ++ (Int)extend32(offs16, 16)); ++ ++ IRExpr* cond = binop(Iop_CmpEQ64, getIReg64(rj), getIReg64(rd)); ++ exit(cond, Ijk_Boring, extend64(offs16 << 2, 18)); ++ ++ return True; ++} ++ ++static Bool gen_bne ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt offs16 = get_offs16(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("bne %s, %s, %d\n", nameIReg(rj), nameIReg(rd), ++ (Int)extend32(offs16, 16)); ++ ++ IRExpr* cond = binop(Iop_CmpNE64, getIReg64(rj), getIReg64(rd)); ++ exit(cond, Ijk_Boring, extend64(offs16 << 2, 18)); ++ ++ return True; ++} ++ ++static Bool gen_blt ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt offs16 = get_offs16(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("blt %s, %s, %d\n", nameIReg(rj), nameIReg(rd), ++ (Int)extend32(offs16, 16)); ++ ++ IRExpr* cond = binop(Iop_CmpLT64S, getIReg64(rj), getIReg64(rd)); ++ exit(cond, Ijk_Boring, extend64(offs16 << 2, 18)); ++ ++ return True; ++} ++ ++static Bool gen_bge ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt offs16 = get_offs16(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("bge %s, %s, %d\n", nameIReg(rj), nameIReg(rd), ++ (Int)extend32(offs16, 16)); ++ ++ IRExpr* cond = binop(Iop_CmpLE64S, getIReg64(rd), getIReg64(rj)); ++ exit(cond, Ijk_Boring, extend64(offs16 << 2, 18)); ++ ++ return True; ++} ++ ++static Bool gen_bltu ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt offs16 = get_offs16(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("bltu %s, %s, %d\n", nameIReg(rj), nameIReg(rd), ++ (Int)extend32(offs16, 16)); ++ ++ IRExpr* cond = binop(Iop_CmpLT64U, getIReg64(rj), getIReg64(rd)); ++ exit(cond, Ijk_Boring, extend64(offs16 << 2, 18)); ++ ++ return True; ++} ++ ++static Bool gen_bgeu ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ UInt offs16 = get_offs16(insn); ++ UInt rj = get_rj(insn); ++ UInt rd = get_rd(insn); ++ ++ DIP("bgeu %s, %s, %d\n", nameIReg(rj), nameIReg(rd), ++ (Int)extend32(offs16, 16)); ++ ++ IRExpr* cond = binop(Iop_CmpLE64U, getIReg64(rd), getIReg64(rj)); ++ exit(cond, Ijk_Boring, extend64(offs16 << 2, 18)); ++ ++ return True; ++} ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Disassemble a single LOONGARCH64 instruction ---*/ ++/*------------------------------------------------------------*/ ++ ++/* Disassemble a single LOONGARCH64 instruction into IR. The instruction ++ has is located at |guest_instr| and has guest IP of |guest_PC_curr_instr|, ++ which will have been set before the call here. Returns True iff the ++ instruction was decoded, in which case *dres will be set accordingly, ++ or False, in which case *dres should be ignored by the caller. */ ++ ++static Bool disInstr_LOONGARCH64_WRK_special ( DisResult* dres, ++ const UChar* guest_instr ) ++{ ++ const UChar* code = guest_instr; ++ /* Spot the 16-byte preamble: ++ 00450c00 srli.d $zero, $zero, 3 ++ 00453400 srli.d $zero, $zero, 13 ++ 00457400 srli.d $zero, $zero, 29 ++ 00454c00 srli.d $zero, $zero, 19 ++ */ ++ if (getUInt(code + 0) == 0x00450c00 && ++ getUInt(code + 4) == 0x00453400 && ++ getUInt(code + 8) == 0x00457400 && ++ getUInt(code + 12) == 0x00454c00) { ++ /* Got a "Special" instruction preamble. Which one is it? */ ++ if (getUInt(code + 16) == 0x001535ad) { /* or $t1, $t1, $t1 */ ++ DIP("$a7 = client_request ( $t0 )\n"); ++ putPC(mkU64(guest_PC_curr_instr + 20)); ++ dres->whatNext = Dis_StopHere; ++ dres->len = 20; ++ dres->jk_StopHere = Ijk_ClientReq; ++ return True; ++ } else if (getUInt(code + 16) == 0x001539ce) { /* or $t2, $t2, $t2 */ ++ DIP("$a7 = guest_NRADDR\n"); ++ putIReg(11, IRExpr_Get(offsetof(VexGuestLOONGARCH64State, guest_NRADDR), ++ Ity_I64)); ++ dres->len = 20; ++ return True; ++ } else if (getUInt(code + 16) == 0x00153def) { /* or $t3, $t3, $t3 */ ++ DIP("branch-and-link-to-noredir $t8\n"); ++ putIReg(1, mkU64(guest_PC_curr_instr + 20)); ++ putPC(getIReg64(20)); ++ dres->whatNext = Dis_StopHere; ++ dres->len = 20; ++ dres->jk_StopHere = Ijk_NoRedir; ++ return True; ++ } else if (getUInt(code + 16) == 0x00154210) { /* or $t4, $t4, $t4 */ ++ DIP("IR injection\n"); ++ vex_inject_ir(irsb, Iend_LE); ++ /* Invalidate the current insn. The reason is that the IRop we're ++ injecting here can change. In which case the translation has to ++ be redone. For ease of handling, we simply invalidate all the ++ time. ++ */ ++ stmt(IRStmt_Put(offsetof(VexGuestLOONGARCH64State, guest_CMSTART), ++ mkU64(guest_PC_curr_instr))); ++ stmt(IRStmt_Put(offsetof(VexGuestLOONGARCH64State, guest_CMLEN), ++ mkU64(20))); ++ putPC(mkU64(guest_PC_curr_instr + 20)); ++ dres->whatNext = Dis_StopHere; ++ dres->len = 20; ++ dres->jk_StopHere = Ijk_InvalICache; ++ return True; ++ } ++ /* We don't know what it is. */ ++ vassert(0); ++ /*NOTREACHED*/ ++ } ++ return False; ++} ++ ++static Bool disInstr_LOONGARCH64_WRK_00_0000_0000 ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ Bool ok; ++ switch (SLICE(insn, 21, 15)) { ++ case 0b0000000: ++ switch (SLICE(insn, 14, 10)) { ++ case 0b00100: ++ ok = gen_clo_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b00101: ++ ok = gen_clz_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b00110: ++ ok = gen_cto_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b00111: ++ ok = gen_ctz_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01000: ++ ok = gen_clo_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01001: ++ ok = gen_clz_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01010: ++ ok = gen_cto_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01011: ++ ok = gen_ctz_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01100: ++ ok = gen_revb_2h(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01101: ++ ok = gen_revb_4h(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01110: ++ ok = gen_revb_2w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01111: ++ ok = gen_revb_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10000: ++ ok = gen_revh_2w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10001: ++ ok = gen_revh_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10010: ++ ok = gen_bitrev_4b(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10011: ++ ok = gen_bitrev_8b(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10100: ++ ok = gen_bitrev_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10101: ++ ok = gen_bitrev_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10110: ++ ok = gen_ext_w_h(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10111: ++ ok = gen_ext_w_b(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b11000: ++ ok = gen_rdtimel_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b11001: ++ ok = gen_rdtimeh_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b11010: ++ ok = gen_rdtime_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b11011: ++ ok = gen_cpucfg(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ break; ++ case 0b0000010: ++ ok = gen_asrtle_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0000011: ++ ok = gen_asrtgt_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0100000: ++ ok = gen_add_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0100001: ++ ok = gen_add_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0100010: ++ ok = gen_sub_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0100011: ++ ok = gen_sub_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0100100: ++ ok = gen_slt(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0100101: ++ ok = gen_sltu(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0100110: ++ ok = gen_maskeqz(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0100111: ++ ok = gen_masknez(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0101000: ++ ok = gen_nor(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0101001: ++ ok = gen_and(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0101010: ++ ok = gen_or(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0101011: ++ ok = gen_xor(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0101100: ++ ok = gen_orn(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0101101: ++ ok = gen_andn(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0101110: ++ ok = gen_sll_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0101111: ++ ok = gen_srl_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0110000: ++ ok = gen_sra_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0110001: ++ ok = gen_sll_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0110010: ++ ok = gen_srl_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0110011: ++ ok = gen_sra_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0110110: ++ ok = gen_rotr_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0110111: ++ ok = gen_rotr_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0111000: ++ ok = gen_mul_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0111001: ++ ok = gen_mulh_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0111010: ++ ok = gen_mulh_wu(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0111011: ++ ok = gen_mul_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0111100: ++ ok = gen_mulh_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0111101: ++ ok = gen_mulh_du(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0111110: ++ ok = gen_mulw_d_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0111111: ++ ok = gen_mulw_d_wu(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1000000: ++ ok = gen_div_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1000001: ++ ok = gen_mod_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1000010: ++ ok = gen_div_wu(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1000011: ++ ok = gen_mod_wu(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1000100: ++ ok = gen_div_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1000101: ++ ok = gen_mod_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1000110: ++ ok = gen_div_du(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1000111: ++ ok = gen_mod_du(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001000: ++ ok = gen_crc_w_b_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001001: ++ ok = gen_crc_w_h_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001010: ++ ok = gen_crc_w_w_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001011: ++ ok = gen_crc_w_d_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001100: ++ ok = gen_crcc_w_b_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001101: ++ ok = gen_crcc_w_h_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001110: ++ ok = gen_crcc_w_w_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001111: ++ ok = gen_crcc_w_d_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1010100: ++ ok = gen_break(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1010110: ++ ok = gen_syscall(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ ++ if (ok) { ++ return ok; ++ } ++ ++ switch (SLICE(insn, 21, 18)) { ++ case 0b0001: ++ if (SLICE(insn, 17, 17) == 0) { ++ ok = gen_alsl_w(dres, insn, archinfo, abiinfo); ++ } else { ++ ok = gen_alsl_wu(dres, insn, archinfo, abiinfo); ++ } ++ break; ++ case 0b0010: ++ if (SLICE(insn, 17, 17) == 0) { ++ ok = gen_bytepick_w(dres, insn, archinfo, abiinfo); ++ } else { ++ ok = False; ++ } ++ break; ++ case 0b0011: ++ ok = gen_bytepick_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1011: ++ if (SLICE(insn, 17, 17) == 0) { ++ ok = gen_alsl_d(dres, insn, archinfo, abiinfo); ++ } else { ++ ok = False; ++ } ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ return ok; ++} ++ ++static Bool disInstr_LOONGARCH64_WRK_00_0000_0001 ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ Bool ok; ++ if (SLICE(insn, 21, 21) == 0) { ++ switch (SLICE(insn, 20, 16)) { ++ case 0b00000: ++ if (SLICE(insn, 15, 15) == 1) { ++ ok = gen_slli_w(dres, insn, archinfo, abiinfo); ++ } else { ++ ok = False; ++ } ++ break; ++ case 0b00001: ++ ok = gen_slli_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b00100: ++ if (SLICE(insn, 15, 15) == 1) { ++ ok = gen_srli_w(dres, insn, archinfo, abiinfo); ++ } else { ++ ok = False; ++ } ++ break; ++ case 0b00101: ++ ok = gen_srli_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01000: ++ if (SLICE(insn, 15, 15) == 1) { ++ ok = gen_srai_w(dres, insn, archinfo, abiinfo); ++ } else { ++ ok = False; ++ } ++ break; ++ case 0b01001: ++ ok = gen_srai_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01100: ++ if (SLICE(insn, 15, 15) == 1) { ++ ok = gen_rotri_w(dres, insn, archinfo, abiinfo); ++ } else { ++ ok = False; ++ } ++ break; ++ case 0b01101: ++ ok = gen_rotri_d(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ } else { ++ if (SLICE(insn, 15, 15) == 0) { ++ ok = gen_bstrins_w(dres, insn, archinfo, abiinfo); ++ } else { ++ ok = gen_bstrpick_w(dres, insn, archinfo, abiinfo); ++ } ++ } ++ return ok; ++} ++ ++static Bool disInstr_LOONGARCH64_WRK_00_0000_0100 ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ Bool ok; ++ switch (SLICE(insn, 21, 15)) { ++ case 0b0000001: ++ ok = gen_fadd_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0000010: ++ ok = gen_fadd_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0000101: ++ ok = gen_fsub_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0000110: ++ ok = gen_fsub_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0001001: ++ ok = gen_fmul_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0001010: ++ ok = gen_fmul_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0001101: ++ ok = gen_fdiv_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0001110: ++ ok = gen_fdiv_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0010001: ++ ok = gen_fmax_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0010010: ++ ok = gen_fmax_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0010101: ++ ok = gen_fmin_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0010110: ++ ok = gen_fmin_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0011001: ++ ok = gen_fmaxa_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0011010: ++ ok = gen_fmaxa_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0011101: ++ ok = gen_fmina_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0011110: ++ ok = gen_fmina_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0100001: ++ ok = gen_fscaleb_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0100010: ++ ok = gen_fscaleb_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0100101: ++ ok = gen_fcopysign_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0100110: ++ ok = gen_fcopysign_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0101000: ++ switch (SLICE(insn, 14, 10)) { ++ case 0b00001: ++ ok = gen_fabs_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b00010: ++ ok = gen_fabs_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b00101: ++ ok = gen_fneg_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b00110: ++ ok = gen_fneg_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01001: ++ ok = gen_flogb_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01010: ++ ok = gen_flogb_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01101: ++ ok = gen_fclass_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01110: ++ ok = gen_fclass_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10001: ++ ok = gen_fsqrt_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10010: ++ ok = gen_fsqrt_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10101: ++ ok = gen_frecip_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10110: ++ ok = gen_frecip_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b11001: ++ ok = gen_frsqrt_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b11010: ++ ok = gen_frsqrt_d(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ break; ++ case 0b0101001: ++ switch (SLICE(insn, 14, 10)) { ++ case 0b00101: ++ ok = gen_fmov_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b00110: ++ ok = gen_fmov_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01001: ++ ok = gen_movgr2fr_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01010: ++ ok = gen_movgr2fr_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01011: ++ ok = gen_movgr2frh_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01101: ++ ok = gen_movfr2gr_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01110: ++ ok = gen_movfr2gr_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01111: ++ ok = gen_movfrh2gr_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10000: ++ ok = gen_movgr2fcsr(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10010: ++ ok = gen_movfcsr2gr(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10100: ++ if (SLICE(insn, 4, 3) == 0b00) { ++ ok = gen_movfr2cf(dres, insn, archinfo, abiinfo); ++ } else { ++ ok = False; ++ } ++ break; ++ case 0b10101: ++ if (SLICE(insn, 9, 8) == 0b00) { ++ ok = gen_movcf2fr(dres, insn, archinfo, abiinfo); ++ } else { ++ ok = False; ++ } ++ break; ++ case 0b10110: ++ if (SLICE(insn, 4, 3) == 0b00) { ++ ok = gen_movgr2cf(dres, insn, archinfo, abiinfo); ++ } else { ++ ok = False; ++ } ++ break; ++ case 0b10111: ++ if (SLICE(insn, 9, 8) == 0b00) { ++ ok = gen_movcf2gr(dres, insn, archinfo, abiinfo); ++ } else { ++ ok = False; ++ } ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ break; ++ case 0b0110010: ++ switch (SLICE(insn, 14, 10)) { ++ case 0b00110: ++ ok = gen_fcvt_s_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01001: ++ ok = gen_fcvt_d_s(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ break; ++ case 0b0110100: ++ switch (SLICE(insn, 14, 10)) { ++ case 0b00001: ++ ok = gen_ftintrm_w_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b00010: ++ ok = gen_ftintrm_w_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01001: ++ ok = gen_ftintrm_l_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01010: ++ ok = gen_ftintrm_l_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10001: ++ ok = gen_ftintrp_w_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10010: ++ ok = gen_ftintrp_w_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b11001: ++ ok = gen_ftintrp_l_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b11010: ++ ok = gen_ftintrp_l_d(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ break; ++ case 0b0110101: ++ switch (SLICE(insn, 14, 10)) { ++ case 0b00001: ++ ok = gen_ftintrz_w_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b00010: ++ ok = gen_ftintrz_w_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01001: ++ ok = gen_ftintrz_l_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01010: ++ ok = gen_ftintrz_l_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10001: ++ ok = gen_ftintrne_w_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10010: ++ ok = gen_ftintrne_w_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b11001: ++ ok = gen_ftintrne_l_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b11010: ++ ok = gen_ftintrne_l_d(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ break; ++ case 0b0110110: ++ switch (SLICE(insn, 14, 10)) { ++ case 0b00001: ++ ok = gen_ftint_w_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b00010: ++ ok = gen_ftint_w_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01001: ++ ok = gen_ftint_l_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01010: ++ ok = gen_ftint_l_d(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ break; ++ case 0b0111010: ++ switch (SLICE(insn, 14, 10)) { ++ case 0b00100: ++ ok = gen_ffint_s_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b00110: ++ ok = gen_ffint_s_l(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01000: ++ ok = gen_ffint_d_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01010: ++ ok = gen_ffint_d_l(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ break; ++ case 0b0111100: ++ switch (SLICE(insn, 14, 10)) { ++ case 0b10001: ++ ok = gen_frint_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10010: ++ ok = gen_frint_d(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ return ok; ++} ++ ++static Bool disInstr_LOONGARCH64_WRK_00_0000 ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ Bool ok; ++ switch (SLICE(insn, 25, 22)) { ++ case 0b0000: ++ ok = disInstr_LOONGARCH64_WRK_00_0000_0000(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0001: ++ ok = disInstr_LOONGARCH64_WRK_00_0000_0001(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0010: ++ ok = gen_bstrins_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0011: ++ ok = gen_bstrpick_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0100: ++ ok = disInstr_LOONGARCH64_WRK_00_0000_0100(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1000: ++ ok = gen_slti(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001: ++ ok = gen_sltui(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1010: ++ ok = gen_addi_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1011: ++ ok = gen_addi_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1100: ++ ok = gen_lu52i_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1101: ++ ok = gen_andi(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1110: ++ ok = gen_ori(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1111: ++ ok = gen_xori(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ return ok; ++} ++ ++static Bool disInstr_LOONGARCH64_WRK_00_1010 ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ Bool ok; ++ switch (SLICE(insn, 25, 22)) { ++ case 0b0000: ++ ok = gen_ld_b(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0001: ++ ok = gen_ld_h(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0010: ++ ok = gen_ld_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0011: ++ ok = gen_ld_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0100: ++ ok = gen_st_b(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0101: ++ ok = gen_st_h(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0110: ++ ok = gen_st_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0111: ++ ok = gen_st_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1000: ++ ok = gen_ld_bu(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001: ++ ok = gen_ld_hu(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1010: ++ ok = gen_ld_wu(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1011: ++ ok = gen_preld(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1100: ++ ok = gen_fld_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1101: ++ ok = gen_fst_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1110: ++ ok = gen_fld_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1111: ++ ok = gen_fst_d(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ return ok; ++} ++ ++static Bool disInstr_LOONGARCH64_WRK_00_1110_0000 ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ Bool ok; ++ switch (SLICE(insn, 21, 15)) { ++ case 0b0000000: ++ ok = gen_ldx_b(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0001000: ++ ok = gen_ldx_h(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0010000: ++ ok = gen_ldx_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0011000: ++ ok = gen_ldx_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0100000: ++ ok = gen_stx_b(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0101000: ++ ok = gen_stx_h(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0110000: ++ ok = gen_stx_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0111000: ++ ok = gen_stx_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1000000: ++ ok = gen_ldx_bu(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001000: ++ ok = gen_ldx_hu(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1010000: ++ ok = gen_ldx_wu(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1011000: ++ ok = gen_preldx(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1100000: ++ ok = gen_fldx_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1101000: ++ ok = gen_fldx_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1110000: ++ ok = gen_fstx_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1111000: ++ ok = gen_fstx_d(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ return ok; ++} ++ ++static Bool disInstr_LOONGARCH64_WRK_00_1110_0001 ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ Bool ok; ++ switch (SLICE(insn, 21, 15)) { ++ case 0b1000000: ++ ok = gen_amswap_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1000001: ++ ok = gen_amswap_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1000010: ++ ok = gen_amadd_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1000011: ++ ok = gen_amadd_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1000100: ++ ok = gen_amand_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1000101: ++ ok = gen_amand_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1000110: ++ ok = gen_amor_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1000111: ++ ok = gen_amor_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001000: ++ ok = gen_amxor_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001001: ++ ok = gen_amxor_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001010: ++ ok = gen_ammax_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001011: ++ ok = gen_ammax_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001100: ++ ok = gen_ammin_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001101: ++ ok = gen_ammin_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001110: ++ ok = gen_ammax_wu(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001111: ++ ok = gen_ammax_du(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1010000: ++ ok = gen_ammin_wu(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1010001: ++ ok = gen_ammin_du(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1010010: ++ ok = gen_amswap_db_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1010011: ++ ok = gen_amswap_db_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1010100: ++ ok = gen_amadd_db_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1010101: ++ ok = gen_amadd_db_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1010110: ++ ok = gen_amand_db_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1010111: ++ ok = gen_amand_db_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1011000: ++ ok = gen_amor_db_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1011001: ++ ok = gen_amor_db_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1011010: ++ ok = gen_amxor_db_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1011011: ++ ok = gen_amxor_db_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1011100: ++ ok = gen_ammax_db_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1011101: ++ ok = gen_ammax_db_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1011110: ++ ok = gen_ammin_db_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1011111: ++ ok = gen_ammin_db_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1100000: ++ ok = gen_ammax_db_wu(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1100001: ++ ok = gen_ammax_db_du(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1100010: ++ ok = gen_ammin_db_wu(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1100011: ++ ok = gen_ammin_db_du(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1100100: ++ ok = gen_dbar(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1100101: ++ ok = gen_ibar(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1101000: ++ ok = gen_fldgt_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1101001: ++ ok = gen_fldgt_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1101010: ++ ok = gen_fldle_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1101011: ++ ok = gen_fldle_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1101100: ++ ok = gen_fstgt_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1101101: ++ ok = gen_fstgt_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1101110: ++ ok = gen_fstle_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1101111: ++ ok = gen_fstle_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1110000: ++ ok = gen_ldgt_b(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1110001: ++ ok = gen_ldgt_h(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1110010: ++ ok = gen_ldgt_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1110011: ++ ok = gen_ldgt_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1110100: ++ ok = gen_ldle_b(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1110101: ++ ok = gen_ldle_h(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1110110: ++ ok = gen_ldle_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1110111: ++ ok = gen_ldle_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1111000: ++ ok = gen_stgt_b(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1111001: ++ ok = gen_stgt_h(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1111010: ++ ok = gen_stgt_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1111011: ++ ok = gen_stgt_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1111100: ++ ok = gen_stle_b(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1111101: ++ ok = gen_stle_h(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1111110: ++ ok = gen_stle_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1111111: ++ ok = gen_stle_d(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ return ok; ++} ++ ++static Bool disInstr_LOONGARCH64_WRK_FCMP_S ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ Bool ok; ++ switch (get_cond(insn)) { ++ case 0x0: ++ ok = gen_fcmp_caf_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x1: ++ ok = gen_fcmp_saf_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x2: ++ ok = gen_fcmp_clt_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x3: ++ ok = gen_fcmp_slt_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x4: ++ ok = gen_fcmp_ceq_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x5: ++ ok = gen_fcmp_seq_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x6: ++ ok = gen_fcmp_cle_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x7: ++ ok = gen_fcmp_sle_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x8: ++ ok = gen_fcmp_cun_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x9: ++ ok = gen_fcmp_sun_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0xa: ++ ok = gen_fcmp_cult_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0xb: ++ ok = gen_fcmp_sult_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0xc: ++ ok = gen_fcmp_cueq_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0xd: ++ ok = gen_fcmp_sueq_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0xe: ++ ok = gen_fcmp_cule_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0xf: ++ ok = gen_fcmp_sule_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x10: ++ ok = gen_fcmp_cne_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x11: ++ ok = gen_fcmp_sne_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x14: ++ ok = gen_fcmp_cor_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x15: ++ ok = gen_fcmp_sor_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x18: ++ ok = gen_fcmp_cune_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x19: ++ ok = gen_fcmp_sune_s(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ return ok; ++} ++ ++static Bool disInstr_LOONGARCH64_WRK_FCMP_D ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ Bool ok; ++ switch (get_cond(insn)) { ++ case 0x0: ++ ok = gen_fcmp_caf_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x1: ++ ok = gen_fcmp_saf_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x2: ++ ok = gen_fcmp_clt_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x3: ++ ok = gen_fcmp_slt_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x4: ++ ok = gen_fcmp_ceq_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x5: ++ ok = gen_fcmp_seq_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x6: ++ ok = gen_fcmp_cle_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x7: ++ ok = gen_fcmp_sle_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x8: ++ ok = gen_fcmp_cun_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x9: ++ ok = gen_fcmp_sun_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0xa: ++ ok = gen_fcmp_cult_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0xb: ++ ok = gen_fcmp_sult_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0xc: ++ ok = gen_fcmp_cueq_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0xd: ++ ok = gen_fcmp_sueq_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0xe: ++ ok = gen_fcmp_cule_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0xf: ++ ok = gen_fcmp_sule_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x10: ++ ok = gen_fcmp_cne_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x11: ++ ok = gen_fcmp_sne_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x14: ++ ok = gen_fcmp_cor_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x15: ++ ok = gen_fcmp_sor_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x18: ++ ok = gen_fcmp_cune_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0x19: ++ ok = gen_fcmp_sune_d(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ return ok; ++} ++ ++static Bool disInstr_LOONGARCH64_WRK_00 ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ Bool ok; ++ switch (SLICE(insn, 29, 26)) { ++ case 0b0000: ++ ok = disInstr_LOONGARCH64_WRK_00_0000(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0010: ++ switch (SLICE(insn, 25, 20)) { ++ case 0b000001: ++ ok = gen_fmadd_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b000010: ++ ok = gen_fmadd_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b000101: ++ ok = gen_fmsub_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b000110: ++ ok = gen_fmsub_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b001001: ++ ok = gen_fnmadd_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b001010: ++ ok = gen_fnmadd_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b001101: ++ ok = gen_fnmsub_s(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b001110: ++ ok = gen_fnmsub_d(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ break; ++ case 0b0011: ++ switch (SLICE(insn, 25, 20)) { ++ case 0b000001: ++ if (SLICE(insn, 4, 3) == 0b00) { ++ ok = disInstr_LOONGARCH64_WRK_FCMP_S(dres, insn, archinfo, abiinfo); ++ } else { ++ ok = False; ++ } ++ break; ++ case 0b000010: ++ if (SLICE(insn, 4, 3) == 0b00) { ++ ok = disInstr_LOONGARCH64_WRK_FCMP_D(dres, insn, archinfo, abiinfo); ++ } else { ++ ok = False; ++ } ++ break; ++ case 0b010000: ++ if (SLICE(insn, 19, 18) == 0b00) { ++ ok = gen_fsel(dres, insn, archinfo, abiinfo); ++ } else { ++ ok = False; ++ } ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ break; ++ case 0b0100: ++ ok = gen_addu16i_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0101: ++ if (SLICE(insn, 25, 25) == 0) { ++ ok = gen_lu12i_w(dres, insn, archinfo, abiinfo); ++ } else { ++ ok = gen_lu32i_d(dres, insn, archinfo, abiinfo); ++ } ++ break; ++ case 0b0110: ++ if (SLICE(insn, 25, 25) == 0) { ++ ok = gen_pcaddi(dres, insn, archinfo, abiinfo); ++ } else { ++ ok = gen_pcalau12i(dres, insn, archinfo, abiinfo); ++ } ++ break; ++ case 0b0111: ++ if (SLICE(insn, 25, 25) == 0) { ++ ok = gen_pcaddu12i(dres, insn, archinfo, abiinfo); ++ } else { ++ ok = gen_pcaddu18i(dres, insn, archinfo, abiinfo); ++ } ++ break; ++ case 0b1000: ++ switch (SLICE(insn, 25, 24)) { ++ case 0b00: ++ ok = gen_ll_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01: ++ ok = gen_sc_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10: ++ ok = gen_ll_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b11: ++ ok = gen_sc_d(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ break; ++ case 0b1001: ++ switch (SLICE(insn, 25, 24)) { ++ case 0b00: ++ ok = gen_ldptr_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01: ++ ok = gen_stptr_w(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b10: ++ ok = gen_ldptr_d(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b11: ++ ok = gen_stptr_d(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ break; ++ case 0b1010: ++ ok = disInstr_LOONGARCH64_WRK_00_1010(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1110: ++ switch (SLICE(insn, 25, 22)) { ++ case 0b0000: ++ ok = disInstr_LOONGARCH64_WRK_00_1110_0000(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0001: ++ ok = disInstr_LOONGARCH64_WRK_00_1110_0001(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ return ok; ++} ++ ++static Bool disInstr_LOONGARCH64_WRK_01 ( DisResult* dres, UInt insn, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo ) ++{ ++ Bool ok; ++ switch (SLICE(insn, 29, 26)) { ++ case 0b0000: ++ ok = gen_beqz(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0001: ++ ok = gen_bnez(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0010: ++ switch (SLICE(insn, 9, 8)) { ++ case 0b00: ++ ok = gen_bceqz(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01: ++ ok = gen_bcnez(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ break; ++ case 0b0011: ++ ok = gen_jirl(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0100: ++ ok = gen_b(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0101: ++ ok = gen_bl(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0110: ++ ok = gen_beq(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b0111: ++ ok = gen_bne(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1000: ++ ok = gen_blt(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1001: ++ ok = gen_bge(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1010: ++ ok = gen_bltu(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b1011: ++ ok = gen_bgeu(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ return ok; ++} ++ ++static Bool disInstr_LOONGARCH64_WRK ( /*MB_OUT*/DisResult* dres, ++ const UChar* guest_instr, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo, ++ Bool sigill_diag ) ++{ ++ /* Set result defaults. */ ++ dres->whatNext = Dis_Continue; ++ dres->len = 4; ++ dres->jk_StopHere = Ijk_INVALID; ++ dres->hint = Dis_HintNone; ++ ++ /* At least this is simple on LOONGARCH64: insns are all 4 bytes long, ++ and 4-aligned. So just fish the whole thing out of memory right now ++ and have done. */ ++ UInt insn = getUInt(guest_instr); ++ DIP("\t0x%llx:\t0x%08x\t", (Addr64)guest_PC_curr_instr, insn); ++ vassert((guest_PC_curr_instr & 3ULL) == 0); ++ ++ /* Spot "Special" instructions (see comment at top of file). */ ++ Bool ok = disInstr_LOONGARCH64_WRK_special(dres, guest_instr); ++ if (ok) ++ return ok; ++ ++ /* Main LOONGARCH64 instruction decoder starts here. */ ++ switch (SLICE(insn, 31, 30)) { ++ case 0b00: ++ ok = disInstr_LOONGARCH64_WRK_00(dres, insn, archinfo, abiinfo); ++ break; ++ case 0b01: ++ ok = disInstr_LOONGARCH64_WRK_01(dres, insn, archinfo, abiinfo); ++ break; ++ default: ++ ok = False; ++ break; ++ } ++ ++ /* If the next-level down decoders failed, make sure |dres| didn't ++ get changed. */ ++ if (!ok) { ++ vassert(dres->whatNext == Dis_Continue); ++ vassert(dres->len == 4); ++ vassert(dres->jk_StopHere == Ijk_INVALID); ++ } ++ return ok; ++} ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Top-level fn ---*/ ++/*------------------------------------------------------------*/ ++ ++/* Disassemble a single instruction into IR. The instruction ++ is located in host memory at &guest_code[delta]. */ ++ ++DisResult disInstr_LOONGARCH64 ( IRSB* irsb_IN, ++ const UChar* guest_code_IN, ++ Long delta_IN, ++ Addr guest_IP, ++ VexArch guest_arch, ++ const VexArchInfo* archinfo, ++ const VexAbiInfo* abiinfo, ++ VexEndness host_endness_IN, ++ Bool sigill_diag_IN ) ++{ ++ DisResult dres; ++ vex_bzero(&dres, sizeof(dres)); ++ ++ /* Set globals (see top of this file) */ ++ vassert(guest_arch == VexArchLOONGARCH64); ++ ++ irsb = irsb_IN; ++ host_endness = host_endness_IN; ++ guest_PC_curr_instr = (Addr64)guest_IP; ++ ++ /* Try to decode */ ++ Bool ok = disInstr_LOONGARCH64_WRK(&dres, ++ &guest_code_IN[delta_IN], ++ archinfo, abiinfo, sigill_diag_IN); ++ ++ if (ok) { ++ /* All decode successes end up here. */ ++ vassert(dres.len == 4 || dres.len == 20); ++ switch (dres.whatNext) { ++ case Dis_Continue: ++ putPC(mkU64(dres.len + guest_PC_curr_instr)); ++ break; ++ case Dis_StopHere: ++ break; ++ default: ++ vassert(0); ++ break; ++ } ++ DIP("\n"); ++ } else { ++ /* All decode failures end up here. */ ++ if (sigill_diag_IN) { ++ Int i, j; ++ UChar buf[64]; ++ UInt insn = getUInt(&guest_code_IN[delta_IN]); ++ vex_bzero(buf, sizeof(buf)); ++ for (i = j = 0; i < 32; i++) { ++ if (i > 0 && (i & 3) == 0) ++ buf[j++] = ' '; ++ buf[j++] = (insn & (1 << (31 - i))) ? '1' : '0'; ++ } ++ vex_printf("disInstr(loongarch64): unhandled instruction 0x%08x\n", insn); ++ vex_printf("disInstr(loongarch64): %s\n", buf); ++ } ++ ++ /* Tell the dispatcher that this insn cannot be decoded, and so ++ has not been executed, and (is currently) the next to be ++ executed. PC should be up-to-date since it is made so at the ++ start of each insn, but nevertheless be paranoid and update ++ it again right now. */ ++ putPC(mkU64(guest_PC_curr_instr)); ++ dres.len = 0; ++ dres.whatNext = Dis_StopHere; ++ dres.jk_StopHere = Ijk_NoDecode; ++ } ++ ++ return dres; ++} ++ ++ ++/*--------------------------------------------------------------------*/ ++/*--- end guest_loongarch64_toIR.c ---*/ ++/*--------------------------------------------------------------------*/ +diff --git a/VEX/priv/host_loongarch64_defs.c b/VEX/priv/host_loongarch64_defs.c +new file mode 100644 +index 000000000..9825a5e16 +--- /dev/null ++++ b/VEX/priv/host_loongarch64_defs.c +@@ -0,0 +1,3041 @@ ++ ++/*---------------------------------------------------------------*/ ++/*--- begin host_loongarch64_defs.c ---*/ ++/*---------------------------------------------------------------*/ ++ ++/* ++ This file is part of Valgrind, a dynamic binary instrumentation ++ framework. ++ ++ Copyright (C) 2021-2022 Loongson Technology Corporation Limited ++ ++ This program is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License as ++ published by the Free Software Foundation; either version 2 of the ++ License, or (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, see . ++ ++ The GNU General Public License is contained in the file COPYING. ++*/ ++ ++#include "libvex_basictypes.h" ++#include "libvex.h" ++#include "libvex_trc_values.h" ++ ++#include "main_util.h" ++#include "host_generic_regs.h" ++#include "host_loongarch64_defs.h" ++ ++ ++/* --------- Local helpers. --------- */ ++ ++static inline void mapReg ( HRegRemap* m, HReg* r ) ++{ ++ *r = lookupHRegRemap(m, *r); ++} ++ ++static inline Int extend ( UInt imm, UInt size ) ++{ ++ UInt shift = 32 - size; ++ return (((Int)imm << shift) >> shift); ++} ++ ++ ++/* --------- Registers. --------- */ ++ ++const RRegUniverse* getRRegUniverse_LOONGARCH64 ( void ) ++{ ++ /* The real-register universe is a big constant, so we just want to ++ initialise it once. */ ++ static RRegUniverse rRegUniverse_LOONGARCH64; ++ static Bool rRegUniverse_LOONGARCH64_initted = False; ++ ++ /* Handy shorthand, nothing more */ ++ RRegUniverse* ru = &rRegUniverse_LOONGARCH64; ++ ++ /* This isn't thread-safe. Sigh. */ ++ if (LIKELY(rRegUniverse_LOONGARCH64_initted == True)) ++ return ru; ++ ++ RRegUniverse__init(ru); ++ ++ /* Add the registers. The initial segment of this array must be ++ those available for allocation by reg-alloc, and those that ++ follow are not available for allocation. */ ++ ru->allocable_start[HRcInt64] = ru->size; ++ ru->regs[ru->size++] = hregLOONGARCH64_R23(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R24(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R25(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R26(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R27(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R28(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R29(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R30(); ++ // $r31 is used as guest stack pointer, not available to regalloc. ++ ++ // $r12 is used as a chaining/ProfInc/Cmove/genSpill/genReload temporary ++ // $r13 is used as a ProfInc temporary ++ ru->regs[ru->size++] = hregLOONGARCH64_R14(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R15(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R16(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R17(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R18(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R19(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R20(); ++ ru->allocable_end[HRcInt64] = ru->size - 1; ++ ++ ru->allocable_start[HRcFlt64] = ru->size; ++ ru->regs[ru->size++] = hregLOONGARCH64_F24(); ++ ru->regs[ru->size++] = hregLOONGARCH64_F25(); ++ ru->regs[ru->size++] = hregLOONGARCH64_F26(); ++ ru->regs[ru->size++] = hregLOONGARCH64_F27(); ++ ru->regs[ru->size++] = hregLOONGARCH64_F28(); ++ ru->regs[ru->size++] = hregLOONGARCH64_F29(); ++ ru->regs[ru->size++] = hregLOONGARCH64_F30(); ++ ru->regs[ru->size++] = hregLOONGARCH64_F31(); ++ ru->allocable_end[HRcFlt64] = ru->size - 1; ++ ++ ru->allocable = ru->size; ++ ++ /* And other regs, not available to the allocator. */ ++ ru->regs[ru->size++] = hregLOONGARCH64_R0(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R1(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R2(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R3(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R4(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R5(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R6(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R7(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R8(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R9(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R10(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R11(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R12(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R13(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R21(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R22(); ++ ru->regs[ru->size++] = hregLOONGARCH64_R31(); ++ ru->regs[ru->size++] = hregLOONGARCH64_FCSR3(); ++ ++ rRegUniverse_LOONGARCH64_initted = True; ++ ++ RRegUniverse__check_is_sane(ru); ++ return ru; ++} ++ ++UInt ppHRegLOONGARCH64 ( HReg reg ) ++{ ++ Int r; ++ Int ret = 0; ++ static const HChar* ireg_names[32] = { ++ "$zero", ++ "$ra", ++ "$tp", ++ "$sp", ++ "$a0", "$a1", "$a2", "$a3", "$a4", "$a5", "$a6", "$a7", ++ "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8", ++ "$r21", /* Reserved */ ++ "$fp", ++ "$s0", "$s1", "$s2", "$s3", "$s4", "$s5", "$s6", "$s7", "$s8" ++ }; ++ static const HChar* freg_names[32] = { ++ "$fa0", "$fa1", "$fa2", "$fa3", "$fa4", "$fa5", "$fa6", "$fa7", ++ "$ft0", "$ft1", "$ft2", "$ft3", "$ft4", "$ft5", "$ft6", "$ft7", ++ "$ft8", "$ft9", "$ft10", "$ft11", "$ft12", "$ft13", "$ft14", "$ft15", ++ "$fs0", "$fs1", "$fs2", "$fs3", "$fs4", "$fs5", "$fs6", "$fs7" ++ }; ++ ++ /* Be generic for all virtual regs. */ ++ if (hregIsVirtual(reg)) { ++ return ppHReg(reg); ++ } ++ ++ /* But specific for real regs. */ ++ switch (hregClass(reg)) { ++ case HRcInt32: ++ r = hregEncoding(reg); ++ vassert(r < 4); ++ ret = vex_printf("$fcsr%d", r); ++ break; ++ case HRcInt64: ++ r = hregEncoding(reg); ++ vassert(r < 32); ++ ret = vex_printf("%s", ireg_names[r]); ++ break; ++ case HRcFlt64: ++ r = hregEncoding(reg); ++ vassert(r < 32); ++ ret = vex_printf("%s", freg_names[r]); ++ break; ++ default: ++ vpanic("ppHRegLOONGARCH64"); ++ break; ++ } ++ ++ return ret; ++} ++ ++ ++/* --------- Condition codes, LOONGARCH64 encoding. --------- */ ++ ++static inline const HChar* showLOONGARCH64CondCode ( LOONGARCH64CondCode cond ) ++{ ++ const HChar* ret; ++ switch (cond) { ++ case LAcc_EQ: ++ ret = "eq"; /* equal */ ++ break; ++ case LAcc_NE: ++ ret = "ne"; /* not equal */ ++ break; ++ case LAcc_LT: ++ ret = "lt"; /* less than (signed) */ ++ break; ++ case LAcc_GE: ++ ret = "ge"; /* great equal (signed) */ ++ break; ++ case LAcc_LTU: ++ ret = "ltu"; /* less than (unsigned) */ ++ break; ++ case LAcc_GEU: ++ ret = "geu"; /* great equal (unsigned) */ ++ break; ++ case LAcc_AL: ++ ret = "al"; /* always (unconditional) */ ++ break; ++ default: ++ vpanic("showLOONGARCH64CondCode"); ++ break; ++ } ++ return ret; ++} ++ ++ ++/* --------- Memory address expressions (amodes). --------- */ ++ ++LOONGARCH64AMode* LOONGARCH64AMode_RI ( HReg reg, UShort imm ) ++{ ++ LOONGARCH64AMode* am = LibVEX_Alloc_inline(sizeof(LOONGARCH64AMode)); ++ am->tag = LAam_RI; ++ am->LAam.RI.base = reg; ++ am->LAam.RI.index = imm; ++ return am; ++} ++ ++LOONGARCH64AMode* LOONGARCH64AMode_RR ( HReg base, HReg index ) ++{ ++ LOONGARCH64AMode* am = LibVEX_Alloc_inline(sizeof(LOONGARCH64AMode)); ++ am->tag = LAam_RR; ++ am->LAam.RR.base = base; ++ am->LAam.RR.index = index; ++ return am; ++} ++ ++static inline void ppLOONGARCH64AMode ( LOONGARCH64AMode* am ) ++{ ++ switch (am->tag) { ++ case LAam_RI: ++ ppHRegLOONGARCH64(am->LAam.RI.base); ++ vex_printf(", "); ++ vex_printf("%d", extend((UInt)am->LAam.RI.index, 12)); ++ break; ++ case LAam_RR: ++ ppHRegLOONGARCH64(am->LAam.RR.base); ++ vex_printf(", "); ++ ppHRegLOONGARCH64(am->LAam.RR.index); ++ break; ++ default: ++ vpanic("ppLOONGARCH64AMode"); ++ break; ++ } ++} ++ ++static inline void addRegUsage_LOONGARCH64AMode( HRegUsage* u, ++ LOONGARCH64AMode* am ) ++{ ++ switch (am->tag) { ++ case LAam_RI: ++ addHRegUse(u, HRmRead, am->LAam.RI.base); ++ break; ++ case LAam_RR: ++ addHRegUse(u, HRmRead, am->LAam.RR.base); ++ addHRegUse(u, HRmRead, am->LAam.RR.index); ++ break; ++ default: ++ vpanic("addRegUsage_LOONGARCH64AMode"); ++ break; ++ } ++} ++ ++static inline void mapRegs_LOONGARCH64AMode( HRegRemap* m, ++ LOONGARCH64AMode* am ) ++{ ++ switch (am->tag) { ++ case LAam_RI: ++ mapReg(m, &am->LAam.RI.base); ++ break; ++ case LAam_RR: ++ mapReg(m, &am->LAam.RR.base); ++ mapReg(m, &am->LAam.RR.index); ++ break; ++ default: ++ vpanic("mapRegs_LOONGARCH64AMode"); ++ break; ++ } ++} ++ ++ ++/* --------- Operand, which can be reg or imm. --------- */ ++ ++LOONGARCH64RI* LOONGARCH64RI_R ( HReg reg ) ++{ ++ LOONGARCH64RI* op = LibVEX_Alloc_inline(sizeof(LOONGARCH64RI)); ++ op->tag = LAri_Reg; ++ op->LAri.R.reg = reg; ++ return op; ++} ++ ++LOONGARCH64RI* LOONGARCH64RI_I ( UShort imm, UChar size, Bool isSigned ) ++{ ++ LOONGARCH64RI* op = LibVEX_Alloc_inline(sizeof(LOONGARCH64RI)); ++ op->tag = LAri_Imm; ++ op->LAri.I.imm = imm; ++ op->LAri.I.size = size; ++ op->LAri.I.isSigned = isSigned; ++ vassert(imm < (1 << size)); ++ vassert(size == 5 || size == 6 || size == 12); ++ return op; ++} ++ ++static inline void ppLOONGARCH64RI ( LOONGARCH64RI* ri ) ++{ ++ switch (ri->tag) { ++ case LAri_Reg: ++ ppHRegLOONGARCH64(ri->LAri.R.reg); ++ break; ++ case LAri_Imm: ++ if (ri->LAri.I.isSigned) { ++ vex_printf("%d", extend((UInt)ri->LAri.I.imm, ri->LAri.I.size)); ++ } else { ++ vex_printf("%u", (UInt)ri->LAri.I.imm); ++ } ++ break; ++ default: ++ vpanic("ppLOONGARCH64RI"); ++ break; ++ } ++} ++ ++static inline void addRegUsage_LOONGARCH64RI( HRegUsage* u, LOONGARCH64RI* ri ) ++{ ++ switch (ri->tag) { ++ case LAri_Reg: ++ addHRegUse(u, HRmRead, ri->LAri.R.reg); ++ break; ++ case LAri_Imm: ++ break; ++ default: ++ vpanic("addRegUsage_LOONGARCH64RI"); ++ break; ++ } ++} ++ ++static inline void mapRegs_LOONGARCH64RI( HRegRemap* m, LOONGARCH64RI* ri ) ++{ ++ switch (ri->tag) { ++ case LAri_Reg: ++ mapReg(m, &ri->LAri.R.reg); ++ break; ++ case LAri_Imm: ++ break; ++ default: ++ vpanic("mapRegs_LOONGARCH64RI"); ++ break; ++ } ++} ++ ++ ++/* --------- Instructions. --------- */ ++ ++static inline const HChar* showLOONGARCH64UnOp ( LOONGARCH64UnOp op ) ++{ ++ switch (op) { ++ case LAun_CLZ_W: ++ return "clz.w"; ++ case LAun_CTZ_W: ++ return "ctz.w"; ++ case LAun_CLZ_D: ++ return "clz.d"; ++ case LAun_CTZ_D: ++ return "ctz.w"; ++ case LAun_EXT_W_H: ++ return "ext.w.h"; ++ case LAun_EXT_W_B: ++ return "ext.w.b"; ++ default: ++ vpanic("showLOONGARCH64UnOp"); ++ break; ++ } ++} ++ ++static inline const HChar* showLOONGARCH64BinOp ( LOONGARCH64BinOp op ) ++{ ++ switch (op) { ++ case LAbin_ADD_W: ++ return "add.w"; ++ case LAbin_ADD_D: ++ return "add.d"; ++ case LAbin_SUB_W: ++ return "sub.w"; ++ case LAbin_SUB_D: ++ return "sub.d"; ++ case LAbin_NOR: ++ return "nor"; ++ case LAbin_AND: ++ return "and"; ++ case LAbin_OR: ++ return "or"; ++ case LAbin_XOR: ++ return "xor"; ++ case LAbin_SLL_W: ++ return "sll.w"; ++ case LAbin_SRL_W: ++ return "srl.w"; ++ case LAbin_SRA_W: ++ return "sra.w"; ++ case LAbin_SLL_D: ++ return "sll.d"; ++ case LAbin_SRL_D: ++ return "srl.d"; ++ case LAbin_SRA_D: ++ return "sra.d"; ++ case LAbin_MUL_W: ++ return "mul.w"; ++ case LAbin_MUL_D: ++ return "mul.d"; ++ case LAbin_MULH_W: ++ return "mulh.w"; ++ case LAbin_MULH_WU: ++ return "mulh.wu"; ++ case LAbin_MULH_D: ++ return "mulh.d"; ++ case LAbin_MULH_DU: ++ return "mulh.du"; ++ case LAbin_MULW_D_W: ++ return "mulw.d.w"; ++ case LAbin_MULW_D_WU: ++ return "mulw.d.wu"; ++ case LAbin_DIV_W: ++ return "div.w"; ++ case LAbin_MOD_W: ++ return "mod.w"; ++ case LAbin_DIV_WU: ++ return "div.wu"; ++ case LAbin_MOD_WU: ++ return "mod.wu"; ++ case LAbin_DIV_D: ++ return "div.d"; ++ case LAbin_MOD_D: ++ return "mod.d"; ++ case LAbin_DIV_DU: ++ return "div.du"; ++ case LAbin_MOD_DU: ++ return "mod.du"; ++ case LAbin_SLLI_W: ++ return "slli.w"; ++ case LAbin_SLLI_D: ++ return "slli.d"; ++ case LAbin_SRLI_W: ++ return "srli.w"; ++ case LAbin_SRLI_D: ++ return "srli.d"; ++ case LAbin_SRAI_W: ++ return "srai.w"; ++ case LAbin_SRAI_D: ++ return "srai.d"; ++ case LAbin_ADDI_W: ++ return "addi.w"; ++ case LAbin_ADDI_D: ++ return "addi.d"; ++ case LAbin_ANDI: ++ return "andi"; ++ case LAbin_ORI: ++ return "ori"; ++ case LAbin_XORI: ++ return "xori"; ++ default: ++ vpanic("showLOONGARCH64BinOp"); ++ break; ++ } ++} ++ ++static inline const HChar* showLOONGARCH64LoadOp ( LOONGARCH64LoadOp op ) ++{ ++ switch (op) { ++ case LAload_LD_D: ++ return "ld.d"; ++ case LAload_LD_BU: ++ return "ld.bu"; ++ case LAload_LD_HU: ++ return "ld.hu"; ++ case LAload_LD_WU: ++ return "ld.wu"; ++ case LAload_LDX_D: ++ return "ldx.d"; ++ case LAload_LDX_BU: ++ return "ldx.bu"; ++ case LAload_LDX_HU: ++ return "ldx.hu"; ++ case LAload_LDX_WU: ++ return "ldx.wu"; ++ default: ++ vpanic("LOONGARCH64LoadOp"); ++ break; ++ } ++} ++ ++static inline const HChar* showLOONGARCH64StoreOp ( LOONGARCH64StoreOp op ) ++{ ++ switch (op) { ++ case LAstore_ST_B: ++ return "st.b"; ++ case LAstore_ST_H: ++ return "st.h"; ++ case LAstore_ST_W: ++ return "st.w"; ++ case LAstore_ST_D: ++ return "st.d"; ++ case LAstore_STX_B: ++ return "stx.b"; ++ case LAstore_STX_H: ++ return "stx.h"; ++ case LAstore_STX_W: ++ return "stx.w"; ++ case LAstore_STX_D: ++ return "stx.d"; ++ default: ++ vpanic("LOONGARCH64StoreOp"); ++ break; ++ } ++} ++ ++static inline const HChar* showLOONGARCH64LLSCOp ( LOONGARCH64LLSCOp op ) ++{ ++ switch (op) { ++ case LAllsc_LL_W: ++ return "ll.w"; ++ case LAllsc_SC_W: ++ return "sc.w"; ++ case LAllsc_LL_D: ++ return "ll.d"; ++ case LAllsc_SC_D: ++ return "sc.d"; ++ default: ++ vpanic("LOONGARCH64LLSCOp"); ++ break; ++ } ++} ++ ++static inline const HChar* showLOONGARCH64BarOp ( LOONGARCH64BarOp op ) ++{ ++ const HChar* ret; ++ switch (op) { ++ case LAbar_DBAR: ++ return "dbar"; ++ case LAbar_IBAR: ++ return "ibar"; ++ default: ++ vpanic("showLOONGARCH64BarOp"); ++ break; ++ } ++ return ret; ++} ++ ++static inline const HChar* showLOONGARCH64FpUnOp ( LOONGARCH64FpUnOp op ) ++{ ++ const HChar* ret; ++ switch (op) { ++ case LAfpun_FABS_S: ++ return "fabs.s"; ++ case LAfpun_FABS_D: ++ return "fabs.d"; ++ case LAfpun_FNEG_S: ++ return "fneg.s"; ++ case LAfpun_FNEG_D: ++ return "fneg.d"; ++ case LAfpun_FLOGB_S: ++ return "flogb.s"; ++ case LAfpun_FLOGB_D: ++ return "flogb.d"; ++ case LAfpun_FSQRT_S: ++ return "fsqrt.s"; ++ case LAfpun_FSQRT_D: ++ return "fsqrt.d"; ++ case LAfpun_FRSQRT_S: ++ return "frsqrt.s"; ++ case LAfpun_FRSQRT_D: ++ return "frsqrt.d"; ++ case LAfpun_FCVT_S_D: ++ return "fcvt.s.d"; ++ case LAfpun_FCVT_D_S: ++ return "fcvt.d.s"; ++ case LAfpun_FTINT_W_S: ++ return "ftint.w.s"; ++ case LAfpun_FTINT_W_D: ++ return "ftint.w.d"; ++ case LAfpun_FTINT_L_S: ++ return "ftint.l.s"; ++ case LAfpun_FTINT_L_D: ++ return "ftint.l.d"; ++ case LAfpun_FFINT_S_W: ++ return "ffint.s.w"; ++ case LAfpun_FFINT_S_L: ++ return "ffint.s.l"; ++ case LAfpun_FFINT_D_W: ++ return "ffint.d.w"; ++ case LAfpun_FFINT_D_L: ++ return "ffint.d.l"; ++ case LAfpun_FRINT_S: ++ return "frint.s"; ++ case LAfpun_FRINT_D: ++ return "frint.d"; ++ default: ++ vpanic("showLOONGARCH64FpUnOp"); ++ break; ++ } ++ return ret; ++} ++ ++static inline const HChar* showLOONGARCH64FpBinOp ( LOONGARCH64FpBinOp op ) ++{ ++ const HChar* ret; ++ switch (op) { ++ case LAfpbin_FADD_S: ++ return "fadd.s"; ++ case LAfpbin_FADD_D: ++ return "fadd.d"; ++ case LAfpbin_FSUB_S: ++ return "fsub.s"; ++ case LAfpbin_FSUB_D: ++ return "fsub.d"; ++ case LAfpbin_FMUL_S: ++ return "fmul.s"; ++ case LAfpbin_FMUL_D: ++ return "fmul.d"; ++ case LAfpbin_FDIV_S: ++ return "fdiv.s"; ++ case LAfpbin_FDIV_D: ++ return "fdiv.d"; ++ case LAfpbin_FMAX_S: ++ return "fmax.s"; ++ case LAfpbin_FMAX_D: ++ return "fmax.d"; ++ case LAfpbin_FMIN_S: ++ return "fmin.s"; ++ case LAfpbin_FMIN_D: ++ return "fmin.d"; ++ case LAfpbin_FMAXA_S: ++ return "fmaxa.s"; ++ case LAfpbin_FMAXA_D: ++ return "fmaxa.d"; ++ case LAfpbin_FMINA_S: ++ return "fmina.s"; ++ case LAfpbin_FMINA_D: ++ return "fmina.d"; ++ case LAfpbin_FSCALEB_S: ++ return "fscaleb.s"; ++ case LAfpbin_FSCALEB_D: ++ return "fscaleb.d"; ++ default: ++ vpanic("showLOONGARCH64FpBinOp"); ++ break; ++ } ++ return ret; ++} ++ ++static inline const HChar* showLOONGARCH64FpTriOp ( LOONGARCH64FpTriOp op ) ++{ ++ const HChar* ret; ++ switch (op) { ++ case LAfpbin_FMADD_S: ++ return "fmadd.s"; ++ case LAfpbin_FMADD_D: ++ return "fmadd.d"; ++ case LAfpbin_FMSUB_S: ++ return "fmsub.s"; ++ case LAfpbin_FMSUB_D: ++ return "fmsub.d"; ++ default: ++ vpanic("showLOONGARCH64FpTriOp"); ++ break; ++ } ++ return ret; ++} ++ ++static inline const HChar* showLOONGARCH64FpLoadOp ( LOONGARCH64FpLoadOp op ) ++{ ++ switch (op) { ++ case LAfpload_FLD_S: ++ return "fld.s"; ++ case LAfpload_FLD_D: ++ return "fld.d"; ++ case LAfpload_FLDX_S: ++ return "fldx.s"; ++ case LAfpload_FLDX_D: ++ return "fldx.d"; ++ default: ++ vpanic("LOONGARCH64FpLoadOp"); ++ break; ++ } ++} ++ ++static inline const HChar* showLOONGARCH64FpStoreOp ( LOONGARCH64FpStoreOp op ) ++{ ++ switch (op) { ++ case LAfpstore_FST_S: ++ return "fst.s"; ++ case LAfpstore_FST_D: ++ return "fst.d"; ++ case LAfpstore_FSTX_S: ++ return "fstx.s"; ++ case LAfpstore_FSTX_D: ++ return "fstx.d"; ++ default: ++ vpanic("LOONGARCH64FpStoreOp"); ++ break; ++ } ++} ++ ++static inline const HChar* showLOONGARCH64FpMoveOp ( LOONGARCH64FpMoveOp op ) ++{ ++ switch (op) { ++ case LAfpmove_FMOV_S: ++ return "fmov.s"; ++ case LAfpmove_FMOV_D: ++ return "fmov.d"; ++ case LAfpmove_MOVGR2FR_W: ++ return "movgr2fr.w"; ++ case LAfpmove_MOVGR2FR_D: ++ return "movgr2fr.d"; ++ case LAfpmove_MOVFR2GR_S: ++ return "movfr2gr.s"; ++ case LAfpmove_MOVFR2GR_D: ++ return "movfr2gr.d"; ++ case LAfpmove_MOVGR2FCSR: ++ return "movgr2fcsr"; ++ case LAfpmove_MOVFCSR2GR: ++ return "movfcsr2gr"; ++ default: ++ vpanic("showLOONGARCH64FpMoveOp"); ++ break; ++ } ++} ++ ++static inline const HChar* showLOONGARCH64FpCmpOp ( LOONGARCH64FpCmpOp op ) ++{ ++ const HChar* ret; ++ switch (op) { ++ case LAfpcmp_FCMP_CLT_S: ++ return "fcmp.clt.s"; ++ case LAfpcmp_FCMP_CLT_D: ++ return "fcmp.clt.d"; ++ case LAfpcmp_FCMP_CEQ_S: ++ return "fcmp.ceq.s"; ++ case LAfpcmp_FCMP_CEQ_D: ++ return "fcmp.ceq.d"; ++ case LAfpcmp_FCMP_CUN_S: ++ return "fcmp.cun.s"; ++ case LAfpcmp_FCMP_CUN_D: ++ return "fcmp.cun.d"; ++ default: ++ vpanic("showLOONGARCH64FpCmpOp"); ++ break; ++ } ++ return ret; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_LI ( ULong imm, HReg dst ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_LI; ++ i->LAin.LI.imm = imm; ++ i->LAin.LI.dst = dst; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_Unary ( LOONGARCH64UnOp op, ++ HReg src, HReg dst ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_Un; ++ i->LAin.Unary.op = op; ++ i->LAin.Unary.src = src; ++ i->LAin.Unary.dst = dst; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_Binary ( LOONGARCH64BinOp op, ++ LOONGARCH64RI* src2, ++ HReg src1, HReg dst ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_Bin; ++ i->LAin.Binary.op = op; ++ i->LAin.Binary.src2 = src2; ++ i->LAin.Binary.src1 = src1; ++ i->LAin.Binary.dst = dst; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_Load ( LOONGARCH64LoadOp op, ++ LOONGARCH64AMode* src, HReg dst ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_Load; ++ i->LAin.Load.op = op; ++ i->LAin.Load.src = src; ++ i->LAin.Load.dst = dst; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_Store ( LOONGARCH64StoreOp op, ++ LOONGARCH64AMode* dst, HReg src ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_Store; ++ i->LAin.Store.op = op; ++ i->LAin.Store.dst = dst; ++ i->LAin.Store.src = src; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_LLSC ( LOONGARCH64LLSCOp op, Bool isLoad, ++ LOONGARCH64AMode* addr, HReg val ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_LLSC; ++ i->LAin.LLSC.op = op; ++ i->LAin.LLSC.isLoad = isLoad; ++ i->LAin.LLSC.addr = addr; ++ i->LAin.LLSC.val = val; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_Bar ( LOONGARCH64BarOp op, UShort hint ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_Bar; ++ i->LAin.Bar.op = op; ++ i->LAin.Bar.hint = hint; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_FpUnary ( LOONGARCH64FpUnOp op, ++ HReg src, HReg dst ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_FpUn; ++ i->LAin.FpUnary.op = op; ++ i->LAin.FpUnary.src = src; ++ i->LAin.FpUnary.dst = dst; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_FpBinary ( LOONGARCH64FpBinOp op, HReg src2, ++ HReg src1, HReg dst ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_FpBin; ++ i->LAin.FpBinary.op = op; ++ i->LAin.FpBinary.src2 = src2; ++ i->LAin.FpBinary.src1 = src1; ++ i->LAin.FpBinary.dst = dst; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_FpTrinary ( LOONGARCH64FpTriOp op, ++ HReg src3, HReg src2, ++ HReg src1, HReg dst ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_FpTri; ++ i->LAin.FpTrinary.op = op; ++ i->LAin.FpTrinary.src3 = src3; ++ i->LAin.FpTrinary.src2 = src2; ++ i->LAin.FpTrinary.src1 = src1; ++ i->LAin.FpTrinary.dst = dst; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_FpLoad ( LOONGARCH64FpLoadOp op, ++ LOONGARCH64AMode* src, HReg dst ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_FpLoad; ++ i->LAin.FpLoad.op = op; ++ i->LAin.FpLoad.src = src; ++ i->LAin.FpLoad.dst = dst; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_FpStore ( LOONGARCH64FpStoreOp op, ++ LOONGARCH64AMode* dst, HReg src ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_FpStore; ++ i->LAin.FpStore.op = op; ++ i->LAin.FpStore.dst = dst; ++ i->LAin.FpStore.src = src; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_FpMove ( LOONGARCH64FpMoveOp op, ++ HReg src, HReg dst ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_FpMove; ++ i->LAin.FpMove.op = op; ++ i->LAin.FpMove.src = src; ++ i->LAin.FpMove.dst = dst; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_FpCmp ( LOONGARCH64FpCmpOp op, HReg src2, ++ HReg src1, HReg dst ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_FpCmp; ++ i->LAin.FpCmp.op = op; ++ i->LAin.FpCmp.src2 = src2; ++ i->LAin.FpCmp.src1 = src1; ++ i->LAin.FpCmp.dst = dst; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_Cas ( HReg old, HReg addr, HReg expd, ++ HReg data, Bool size64 ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_Cas; ++ i->LAin.Cas.old = old; ++ i->LAin.Cas.addr = addr; ++ i->LAin.Cas.expd = expd; ++ i->LAin.Cas.data = data; ++ i->LAin.Cas.size64 = size64; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_Cmp ( LOONGARCH64CondCode cond, ++ HReg src2, HReg src1, HReg dst ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_Cmp; ++ i->LAin.Cmp.cond = cond; ++ i->LAin.Cmp.src2 = src2; ++ i->LAin.Cmp.src1 = src1; ++ i->LAin.Cmp.dst = dst; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_CMove ( HReg cond, HReg r0, HReg r1, ++ HReg dst, Bool isInt ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_CMove; ++ i->LAin.CMove.cond = cond; ++ i->LAin.CMove.r0 = r0; ++ i->LAin.CMove.r1 = r1; ++ i->LAin.CMove.dst = dst; ++ i->LAin.CMove.isInt = isInt; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_Call ( HReg cond, Addr64 target, ++ UInt nArgRegs, RetLoc rloc ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_Call; ++ i->LAin.Call.cond = cond; ++ i->LAin.Call.target = target; ++ i->LAin.Call.nArgRegs = nArgRegs; ++ i->LAin.Call.rloc = rloc; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_XDirect ( Addr64 dstGA, ++ LOONGARCH64AMode* amPC, ++ HReg cond, Bool toFastEP ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_XDirect; ++ i->LAin.XDirect.dstGA = dstGA; ++ i->LAin.XDirect.amPC = amPC; ++ i->LAin.XDirect.cond = cond; ++ i->LAin.XDirect.toFastEP = toFastEP; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_XIndir ( HReg dstGA, LOONGARCH64AMode* amPC, ++ HReg cond ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_XIndir; ++ i->LAin.XIndir.dstGA = dstGA; ++ i->LAin.XIndir.amPC = amPC; ++ i->LAin.XIndir.cond = cond; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_XAssisted ( HReg dstGA, ++ LOONGARCH64AMode* amPC, ++ HReg cond, IRJumpKind jk ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_XAssisted; ++ i->LAin.XAssisted.dstGA = dstGA; ++ i->LAin.XAssisted.amPC = amPC; ++ i->LAin.XAssisted.cond = cond; ++ i->LAin.XAssisted.jk = jk; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_EvCheck ( LOONGARCH64AMode* amCounter, ++ LOONGARCH64AMode* amFailAddr ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_EvCheck; ++ i->LAin.EvCheck.amCounter = amCounter; ++ i->LAin.EvCheck.amFailAddr = amFailAddr; ++ return i; ++} ++ ++LOONGARCH64Instr* LOONGARCH64Instr_ProfInc ( void ) ++{ ++ LOONGARCH64Instr* i = LibVEX_Alloc_inline(sizeof(LOONGARCH64Instr)); ++ i->tag = LAin_ProfInc; ++ return i; ++} ++ ++ ++/* -------- Pretty Print instructions ------------- */ ++ ++static inline void ppLI ( ULong imm, HReg dst ) ++{ ++ vex_printf("li "); ++ ppHRegLOONGARCH64(dst); ++ vex_printf(", 0x%llx", imm); ++} ++ ++static inline void ppUnary ( LOONGARCH64UnOp op, HReg src, HReg dst ) ++{ ++ vex_printf("%s ", showLOONGARCH64UnOp(op)); ++ ppHRegLOONGARCH64(dst); ++ vex_printf(", "); ++ ppHRegLOONGARCH64(src); ++} ++ ++static inline void ppBinary ( LOONGARCH64BinOp op, LOONGARCH64RI* src2, ++ HReg src1, HReg dst ) ++{ ++ vex_printf("%s ", showLOONGARCH64BinOp(op)); ++ ppHRegLOONGARCH64(dst); ++ vex_printf(", "); ++ ppHRegLOONGARCH64(src1); ++ vex_printf(", "); ++ ppLOONGARCH64RI(src2); ++} ++ ++static inline void ppLoad ( LOONGARCH64LoadOp op, LOONGARCH64AMode* src, ++ HReg dst ) ++{ ++ vex_printf("%s ", showLOONGARCH64LoadOp(op)); ++ ppHRegLOONGARCH64(dst); ++ vex_printf(", "); ++ ppLOONGARCH64AMode(src); ++} ++ ++static inline void ppStore ( LOONGARCH64StoreOp op, LOONGARCH64AMode* dst, ++ HReg src ) ++{ ++ vex_printf("%s ", showLOONGARCH64StoreOp(op)); ++ ppHRegLOONGARCH64(src); ++ vex_printf(", "); ++ ppLOONGARCH64AMode(dst); ++} ++ ++static inline void ppLLSC ( LOONGARCH64LLSCOp op, LOONGARCH64AMode* addr, ++ HReg val ) ++{ ++ vex_printf("%s ", showLOONGARCH64LLSCOp(op)); ++ ppHRegLOONGARCH64(val); ++ vex_printf(", "); ++ ppLOONGARCH64AMode(addr); ++} ++ ++static inline void ppBar ( LOONGARCH64BarOp op, UShort hint ) ++{ ++ vex_printf("%s %u", showLOONGARCH64BarOp(op), (UInt)hint); ++} ++ ++static inline void ppFpUnary ( LOONGARCH64FpUnOp op, HReg src, HReg dst ) ++{ ++ vex_printf("%s ", showLOONGARCH64FpUnOp(op)); ++ ppHRegLOONGARCH64(dst); ++ vex_printf(", "); ++ ppHRegLOONGARCH64(src); ++} ++ ++static inline void ppFpBinary ( LOONGARCH64FpBinOp op, HReg src2, ++ HReg src1, HReg dst ) ++{ ++ vex_printf("%s ", showLOONGARCH64FpBinOp(op)); ++ ppHRegLOONGARCH64(dst); ++ vex_printf(", "); ++ ppHRegLOONGARCH64(src1); ++ vex_printf(", "); ++ ppHRegLOONGARCH64(src2); ++} ++ ++static inline void ppFpTrinary ( LOONGARCH64FpTriOp op, HReg src3, ++ HReg src2, HReg src1, HReg dst ) ++{ ++ vex_printf("%s ", showLOONGARCH64FpTriOp(op)); ++ ppHRegLOONGARCH64(dst); ++ vex_printf(", "); ++ ppHRegLOONGARCH64(src1); ++ vex_printf(", "); ++ ppHRegLOONGARCH64(src2); ++ vex_printf(", "); ++ ppHRegLOONGARCH64(src3); ++} ++ ++static inline void ppFpLoad ( LOONGARCH64FpLoadOp op, LOONGARCH64AMode* src, ++ HReg dst ) ++{ ++ vex_printf("%s ", showLOONGARCH64FpLoadOp(op)); ++ ppHRegLOONGARCH64(dst); ++ vex_printf(", "); ++ ppLOONGARCH64AMode(src); ++} ++ ++static inline void ppFpStore ( LOONGARCH64FpStoreOp op, LOONGARCH64AMode* dst, ++ HReg src ) ++{ ++ vex_printf("%s ", showLOONGARCH64FpStoreOp(op)); ++ ppHRegLOONGARCH64(src); ++ vex_printf(", "); ++ ppLOONGARCH64AMode(dst); ++} ++ ++static inline void ppFpMove ( LOONGARCH64FpMoveOp op, HReg src, HReg dst ) ++{ ++ vex_printf("%s ", showLOONGARCH64FpMoveOp(op)); ++ ppHRegLOONGARCH64(dst); ++ vex_printf(", "); ++ ppHRegLOONGARCH64(src); ++} ++ ++static inline void ppFpCmp ( LOONGARCH64FpCmpOp op, HReg src2, ++ HReg src1, HReg dst ) ++{ ++ vex_printf("%s ", showLOONGARCH64FpCmpOp(op)); ++ vex_printf("$fcc0, "); ++ ppHRegLOONGARCH64(src1); ++ vex_printf(", "); ++ ppHRegLOONGARCH64(src2); ++ vex_printf("; movcf2gr "); ++ ppHRegLOONGARCH64(dst); ++ vex_printf(", $fcc0"); ++} ++ ++static inline void ppCas ( HReg old, HReg addr, HReg expd, ++ HReg data, Bool size64) ++{ ++ ppHRegLOONGARCH64(old); ++ vex_printf(" = cas(%dbit)(", size64 ? 64 : 32); ++ ppHRegLOONGARCH64(expd); ++ vex_printf(", "); ++ ppHRegLOONGARCH64(data); ++ vex_printf(" -> "); ++ ppHRegLOONGARCH64(addr); ++ vex_printf(")"); ++} ++ ++static inline void ppCmp ( LOONGARCH64CondCode cond, HReg src2, ++ HReg src1, HReg dst ) ++{ ++ ppHRegLOONGARCH64(dst); ++ vex_printf(" = cmp%s(", showLOONGARCH64CondCode(cond)); ++ ppHRegLOONGARCH64(src1); ++ vex_printf(", "); ++ ppHRegLOONGARCH64(src2); ++ vex_printf(")"); ++} ++ ++static inline void ppCMove ( HReg cond, HReg r0, HReg r1, ++ HReg dst, Bool isInt ) ++{ ++ if (isInt) { ++ vex_printf("masknez $t0, "); ++ ppHRegLOONGARCH64(r0); ++ vex_printf(", "); ++ ppHRegLOONGARCH64(cond); ++ vex_printf("; maskeqz "); ++ ppHRegLOONGARCH64(dst); ++ vex_printf(", "); ++ ppHRegLOONGARCH64(r1); ++ vex_printf(", "); ++ ppHRegLOONGARCH64(cond); ++ vex_printf("; or "); ++ ppHRegLOONGARCH64(dst); ++ vex_printf(", $t0, "); ++ ppHRegLOONGARCH64(dst); ++ } else { ++ vex_printf("movgr2cf "); ++ ppHRegLOONGARCH64(cond); ++ vex_printf(", $fcc0; fsel "); ++ ppHRegLOONGARCH64(dst); ++ vex_printf(", "); ++ ppHRegLOONGARCH64(r0); ++ vex_printf(", "); ++ ppHRegLOONGARCH64(r1); ++ vex_printf(", $fcc0"); ++ } ++} ++ ++static inline void ppCall ( HReg cond, Addr64 target, ++ UInt nArgRegs, RetLoc rloc ) ++{ ++ if (!hregIsInvalid(cond)) { ++ vex_printf("if ("); ++ ppHRegLOONGARCH64(cond); ++ vex_printf(") { "); ++ } ++ vex_printf("call 0x%llx [nArgRegs=%u, ", target, nArgRegs); ++ ppRetLoc(rloc); ++ vex_printf("]"); ++ if (!hregIsInvalid(cond)) ++ vex_printf(" }"); ++} ++ ++static inline void ppXDirect ( Addr64 dstGA, LOONGARCH64AMode* amPC, ++ HReg cond, Bool toFastEP ) ++{ ++ vex_printf("(xDirect) "); ++ if (!hregIsInvalid(cond)) { ++ vex_printf("if ("); ++ ppHRegLOONGARCH64(cond); ++ vex_printf(") { "); ++ } ++ vex_printf("li $t0, 0x%llx; ", (ULong)dstGA); ++ vex_printf("st.w $t0, "); ++ ppLOONGARCH64AMode(amPC); ++ vex_printf("; li $t0, $disp_cp_chain_me_to_%sEP; ", ++ toFastEP ? "fast" : "slow"); ++ vex_printf("jirl $ra, $t0, 0"); ++ if (!hregIsInvalid(cond)) ++ vex_printf(" }"); ++} ++ ++static inline void ppXIndir ( HReg dstGA, LOONGARCH64AMode* amPC, ++ HReg cond ) ++{ ++ vex_printf("(xIndir) "); ++ if (!hregIsInvalid(cond)) { ++ vex_printf("if ("); ++ ppHRegLOONGARCH64(cond); ++ vex_printf(") { "); ++ } ++ vex_printf("st.w "); ++ ppHRegLOONGARCH64(dstGA); ++ vex_printf(", "); ++ ppLOONGARCH64AMode(amPC); ++ vex_printf("; la $t0, disp_indir; "); ++ vex_printf("jirl $ra, $t0, 0"); ++ if (!hregIsInvalid(cond)) ++ vex_printf(" }"); ++} ++ ++static inline void ppXAssisted ( HReg dstGA, LOONGARCH64AMode* amPC, ++ HReg cond, IRJumpKind jk) ++{ ++ vex_printf("(xAssisted) "); ++ if (!hregIsInvalid(cond)) { ++ vex_printf("if ("); ++ ppHRegLOONGARCH64(cond); ++ vex_printf(") { "); ++ } ++ vex_printf("st.w "); ++ ppHRegLOONGARCH64(dstGA); ++ vex_printf(", "); ++ ppLOONGARCH64AMode(amPC); ++ vex_printf("; li.w $s8, IRJumpKind_to_TRCVAL(%d); ", (Int)jk); ++ vex_printf("la $t0, disp_assisted; "); ++ vex_printf("jirl $ra, $t0, 0"); ++ if (!hregIsInvalid(cond)) ++ vex_printf(" }"); ++} ++ ++static inline void ppEvCheck ( LOONGARCH64AMode* amCounter, ++ LOONGARCH64AMode* amFailAddr ) ++{ ++ vex_printf("(evCheck) "); ++ vex_printf("ld.w $t0, "); ++ ppLOONGARCH64AMode(amCounter); ++ vex_printf("; addi.d $t0, $t0, -1; "); ++ vex_printf("st.w $t0, "); ++ ppLOONGARCH64AMode(amCounter); ++ vex_printf("; bge $t0, $zero, nofail; "); ++ vex_printf("ld.d $t0, "); ++ ppLOONGARCH64AMode(amFailAddr); ++ vex_printf("; jirl $ra, $t0, 0"); ++ vex_printf("; nofail:"); ++} ++ ++static inline void ppProfInc ( void ) ++{ ++ vex_printf("(profInc) "); ++ vex_printf("li $t0, NotKnownYet; "); ++ vex_printf("ld.d $t1, $t0, 0; "); ++ vex_printf("addi.d $t1, $t1, 1; "); ++ vex_printf("st.d $t1, $t0, 0;"); ++} ++ ++void ppLOONGARCH64Instr ( const LOONGARCH64Instr* i, Bool mode64 ) ++{ ++ vassert(mode64 == True); ++ switch (i->tag) { ++ case LAin_LI: ++ ppLI(i->LAin.LI.imm, i->LAin.LI.dst); ++ break; ++ case LAin_Un: ++ ppUnary(i->LAin.Unary.op, i->LAin.Unary.src, i->LAin.Unary.dst); ++ break; ++ case LAin_Bin: ++ ppBinary(i->LAin.Binary.op, i->LAin.Binary.src2, ++ i->LAin.Binary.src1, i->LAin.Binary.dst); ++ break; ++ case LAin_Load: ++ ppLoad(i->LAin.Load.op, i->LAin.Load.src, i->LAin.Load.dst); ++ break; ++ case LAin_Store: ++ ppStore(i->LAin.Store.op, i->LAin.Store.dst, i->LAin.Store.src); ++ break; ++ case LAin_LLSC: ++ ppLLSC(i->LAin.LLSC.op, i->LAin.LLSC.addr, i->LAin.LLSC.val); ++ break; ++ case LAin_Bar: ++ ppBar(i->LAin.Bar.op, i->LAin.Bar.hint); ++ break; ++ case LAin_FpUn: ++ ppFpUnary(i->LAin.FpUnary.op, i->LAin.FpUnary.src, ++ i->LAin.FpUnary.dst); ++ break; ++ case LAin_FpBin: ++ ppFpBinary(i->LAin.FpBinary.op, i->LAin.FpBinary.src2, ++ i->LAin.FpBinary.src1, i->LAin.FpBinary.dst); ++ break; ++ case LAin_FpTri: ++ ppFpTrinary(i->LAin.FpTrinary.op, i->LAin.FpTrinary.src3, ++ i->LAin.FpTrinary.src2, i->LAin.FpTrinary.src1, ++ i->LAin.FpTrinary.dst); ++ break; ++ case LAin_FpLoad: ++ ppFpLoad(i->LAin.FpLoad.op, i->LAin.FpLoad.src, i->LAin.FpLoad.dst); ++ break; ++ case LAin_FpStore: ++ ppFpStore(i->LAin.FpStore.op, i->LAin.FpStore.dst, ++ i->LAin.FpStore.src); ++ break; ++ case LAin_FpMove: ++ ppFpMove(i->LAin.FpMove.op, i->LAin.FpMove.src, ++ i->LAin.FpMove.dst); ++ break; ++ case LAin_FpCmp: ++ ppFpCmp(i->LAin.FpCmp.op, i->LAin.FpCmp.src2, ++ i->LAin.FpCmp.src1, i->LAin.FpCmp.dst); ++ break; ++ case LAin_Cas: ++ ppCas(i->LAin.Cas.old, i->LAin.Cas.addr, i->LAin.Cas.expd, ++ i->LAin.Cas.data, i->LAin.Cas.size64); ++ break; ++ case LAin_Cmp: ++ ppCmp(i->LAin.Cmp.cond, i->LAin.Cmp.src2, ++ i->LAin.Cmp.src1, i->LAin.Cmp.dst); ++ break; ++ case LAin_CMove: ++ ppCMove(i->LAin.CMove.cond, i->LAin.CMove.r0, ++ i->LAin.CMove.r1, i->LAin.CMove.dst, ++ i->LAin.CMove.isInt); ++ break; ++ case LAin_Call: ++ ppCall(i->LAin.Call.cond, i->LAin.Call.target, ++ i->LAin.Call.nArgRegs, i->LAin.Call.rloc); ++ break; ++ case LAin_XDirect: ++ ppXDirect(i->LAin.XDirect.dstGA, i->LAin.XDirect.amPC, ++ i->LAin.XDirect.cond, i->LAin.XDirect.toFastEP); ++ break; ++ case LAin_XIndir: ++ ppXIndir(i->LAin.XIndir.dstGA, i->LAin.XIndir.amPC, ++ i->LAin.XIndir.cond); ++ break; ++ case LAin_XAssisted: ++ ppXAssisted(i->LAin.XAssisted.dstGA, i->LAin.XAssisted.amPC, ++ i->LAin.XAssisted.cond, i->LAin.XAssisted.jk); ++ break; ++ case LAin_EvCheck: ++ ppEvCheck(i->LAin.EvCheck.amCounter, i->LAin.EvCheck.amFailAddr); ++ break; ++ case LAin_ProfInc: ++ ppProfInc(); ++ break; ++ default: ++ vpanic("ppLOONGARCH64Instr"); ++ break; ++ } ++} ++ ++ ++/* --------- Helpers for register allocation. --------- */ ++ ++void getRegUsage_LOONGARCH64Instr ( HRegUsage* u, const LOONGARCH64Instr* i, ++ Bool mode64 ) ++{ ++ vassert(mode64 == True); ++ initHRegUsage(u); ++ switch (i->tag) { ++ case LAin_LI: ++ addHRegUse(u, HRmWrite, i->LAin.LI.dst); ++ break; ++ case LAin_Un: ++ addHRegUse(u, HRmRead, i->LAin.Unary.src); ++ addHRegUse(u, HRmWrite, i->LAin.Unary.dst); ++ break; ++ case LAin_Bin: ++ addRegUsage_LOONGARCH64RI(u, i->LAin.Binary.src2); ++ addHRegUse(u, HRmRead, i->LAin.Binary.src1); ++ addHRegUse(u, HRmWrite, i->LAin.Binary.dst); ++ break; ++ case LAin_Load: ++ addRegUsage_LOONGARCH64AMode(u, i->LAin.Load.src); ++ addHRegUse(u, HRmWrite, i->LAin.Load.dst); ++ break; ++ case LAin_Store: ++ addRegUsage_LOONGARCH64AMode(u, i->LAin.Store.dst); ++ addHRegUse(u, HRmRead, i->LAin.Store.src); ++ break; ++ case LAin_LLSC: ++ addRegUsage_LOONGARCH64AMode(u, i->LAin.LLSC.addr); ++ if (i->LAin.LLSC.isLoad) ++ addHRegUse(u, HRmWrite, i->LAin.LLSC.val); ++ else ++ addHRegUse(u, HRmRead, i->LAin.LLSC.val); ++ break; ++ case LAin_Bar: ++ /* No regs. */ ++ break; ++ case LAin_FpUn: ++ addHRegUse(u, HRmRead, i->LAin.FpUnary.src); ++ addHRegUse(u, HRmWrite, i->LAin.FpUnary.dst); ++ break; ++ case LAin_FpBin: ++ addHRegUse(u, HRmRead, i->LAin.FpBinary.src2); ++ addHRegUse(u, HRmRead, i->LAin.FpBinary.src1); ++ addHRegUse(u, HRmWrite, i->LAin.FpBinary.dst); ++ break; ++ case LAin_FpTri: ++ addHRegUse(u, HRmRead, i->LAin.FpTrinary.src3); ++ addHRegUse(u, HRmRead, i->LAin.FpTrinary.src2); ++ addHRegUse(u, HRmRead, i->LAin.FpTrinary.src1); ++ addHRegUse(u, HRmWrite, i->LAin.FpTrinary.dst); ++ break; ++ case LAin_FpLoad: ++ addRegUsage_LOONGARCH64AMode(u, i->LAin.FpLoad.src); ++ addHRegUse(u, HRmWrite, i->LAin.FpLoad.dst); ++ break; ++ case LAin_FpStore: ++ addRegUsage_LOONGARCH64AMode(u, i->LAin.FpStore.dst); ++ addHRegUse(u, HRmRead, i->LAin.FpStore.src); ++ break; ++ case LAin_FpMove: ++ addHRegUse(u, HRmRead, i->LAin.FpMove.src); ++ addHRegUse(u, HRmWrite, i->LAin.FpMove.dst); ++ break; ++ case LAin_FpCmp: ++ addHRegUse(u, HRmRead, i->LAin.FpCmp.src2); ++ addHRegUse(u, HRmRead, i->LAin.FpCmp.src1); ++ addHRegUse(u, HRmWrite, i->LAin.FpCmp.dst); ++ break; ++ case LAin_Cas: ++ addHRegUse(u, HRmWrite, i->LAin.Cas.old); ++ addHRegUse(u, HRmRead, i->LAin.Cas.addr); ++ addHRegUse(u, HRmRead, i->LAin.Cas.expd); ++ addHRegUse(u, HRmModify, i->LAin.Cas.data); ++ break; ++ case LAin_Cmp: ++ addHRegUse(u, HRmRead, i->LAin.Cmp.src2); ++ addHRegUse(u, HRmRead, i->LAin.Cmp.src1); ++ addHRegUse(u, HRmWrite, i->LAin.Cmp.dst); ++ break; ++ case LAin_CMove: ++ addHRegUse(u, HRmRead, i->LAin.CMove.cond); ++ addHRegUse(u, HRmRead, i->LAin.CMove.r0); ++ addHRegUse(u, HRmRead, i->LAin.CMove.r1); ++ addHRegUse(u, HRmWrite, i->LAin.CMove.dst); ++ break; ++ case LAin_Call: ++ /* logic and comments copied/modified from mips and arm64 back end */ ++ /* This is a bit subtle. */ ++ /* First off, we need to consider the cond register. */ ++ if (!hregIsInvalid(i->LAin.Call.cond)) ++ addHRegUse(u, HRmRead, i->LAin.Call.cond); ++ /* Then, claim it trashes all the caller-saved regs ++ which fall within the register allocator's jurisdiction. */ ++ addHRegUse(u, HRmWrite, hregLOONGARCH64_R14()); ++ addHRegUse(u, HRmWrite, hregLOONGARCH64_R15()); ++ addHRegUse(u, HRmWrite, hregLOONGARCH64_R16()); ++ addHRegUse(u, HRmWrite, hregLOONGARCH64_R17()); ++ addHRegUse(u, HRmWrite, hregLOONGARCH64_R18()); ++ addHRegUse(u, HRmWrite, hregLOONGARCH64_R19()); ++ addHRegUse(u, HRmWrite, hregLOONGARCH64_R20()); ++ /* Now we have to state any parameter-carrying registers ++ which might be read. This depends on nArgRegs. */ ++ switch (i->LAin.Call.nArgRegs) { ++ case 8: addHRegUse(u, HRmRead, hregLOONGARCH64_R11()); /* fallthrough */ ++ case 7: addHRegUse(u, HRmRead, hregLOONGARCH64_R10()); /* fallthrough */ ++ case 6: addHRegUse(u, HRmRead, hregLOONGARCH64_R9()); /* fallthrough */ ++ case 5: addHRegUse(u, HRmRead, hregLOONGARCH64_R8()); /* fallthrough */ ++ case 4: addHRegUse(u, HRmRead, hregLOONGARCH64_R7()); /* fallthrough */ ++ case 3: addHRegUse(u, HRmRead, hregLOONGARCH64_R6()); /* fallthrough */ ++ case 2: addHRegUse(u, HRmRead, hregLOONGARCH64_R5()); /* fallthrough */ ++ case 1: addHRegUse(u, HRmRead, hregLOONGARCH64_R4()); /* fallthrough */ ++ case 0: break; ++ default: vpanic("getRegUsage_LOONGARCH64:Call:regparms"); break; ++ } ++ /* Finally, there is the issue that the insn trashes a ++ register because the literal target address has to be ++ loaded into a register. However, we reserve $t0 for that ++ purpose so there's no further complexity here. Stating $t0 ++ as trashed is pointless since it's not under the control ++ of the allocator, but what the hell. */ ++ addHRegUse(u, HRmWrite, hregT0()); ++ break; ++ /* XDirect/XIndir/XAssisted are also a bit subtle. They ++ conditionally exit the block. Hence we only need to list (1) ++ the registers that they read, and (2) the registers that they ++ write in the case where the block is not exited. (2) is ++ empty, hence only (1) is relevant here. */ ++ case LAin_XDirect: ++ addRegUsage_LOONGARCH64AMode(u, i->LAin.XDirect.amPC); ++ if (!hregIsInvalid(i->LAin.XDirect.cond)) ++ addHRegUse(u, HRmRead, i->LAin.XDirect.cond); ++ addHRegUse(u, HRmWrite, hregT0()); /* unavail to RA */ ++ break; ++ case LAin_XIndir: ++ addHRegUse(u, HRmRead, i->LAin.XIndir.dstGA); ++ addRegUsage_LOONGARCH64AMode(u, i->LAin.XIndir.amPC); ++ if (!hregIsInvalid(i->LAin.XIndir.cond)) ++ addHRegUse(u, HRmRead, i->LAin.XIndir.cond); ++ addHRegUse(u, HRmWrite, hregT0()); /* unavail to RA */ ++ break; ++ case LAin_XAssisted: ++ addHRegUse(u, HRmRead, i->LAin.XAssisted.dstGA); ++ addRegUsage_LOONGARCH64AMode(u, i->LAin.XAssisted.amPC); ++ if (!hregIsInvalid(i->LAin.XAssisted.cond)) ++ addHRegUse(u, HRmRead, i->LAin.XAssisted.cond); ++ addHRegUse(u, HRmWrite, hregT0()); /* unavail to RA */ ++ break; ++ case LAin_EvCheck: ++ /* We expect both amodes only to mention $r31, so this is in ++ fact pointless, since $r31 isn't allocatable, but anyway.. */ ++ addRegUsage_LOONGARCH64AMode(u, i->LAin.EvCheck.amCounter); ++ addRegUsage_LOONGARCH64AMode(u, i->LAin.EvCheck.amFailAddr); ++ addHRegUse(u, HRmWrite, hregT0()); /* unavail to RA */ ++ break; ++ case LAin_ProfInc: ++ /* Again, pointless to actually state these since neither ++ is available to RA. */ ++ addHRegUse(u, HRmWrite, hregT0()); /* unavail to RA */ ++ addHRegUse(u, HRmWrite, hregT1()); /* unavail to RA */ ++ break; ++ default: ++ ppLOONGARCH64Instr(i, mode64); ++ vpanic("getRegUsage_LOONGARCH64Instr"); ++ break; ++ } ++} ++ ++void mapRegs_LOONGARCH64Instr ( HRegRemap* m, LOONGARCH64Instr* i, ++ Bool mode64 ) ++{ ++ vassert(mode64 == True); ++ switch (i->tag) { ++ case LAin_LI: ++ mapReg(m, &i->LAin.LI.dst); ++ break; ++ case LAin_Un: ++ mapReg(m, &i->LAin.Unary.src); ++ mapReg(m, &i->LAin.Unary.dst); ++ break; ++ case LAin_Bin: ++ mapRegs_LOONGARCH64RI(m, i->LAin.Binary.src2); ++ mapReg(m, &i->LAin.Binary.src1); ++ mapReg(m, &i->LAin.Binary.dst); ++ break; ++ case LAin_Load: ++ mapRegs_LOONGARCH64AMode(m, i->LAin.Load.src); ++ mapReg(m, &i->LAin.Load.dst); ++ break; ++ case LAin_Store: ++ mapRegs_LOONGARCH64AMode(m, i->LAin.Store.dst); ++ mapReg(m, &i->LAin.Store.src); ++ break; ++ case LAin_LLSC: ++ mapRegs_LOONGARCH64AMode(m, i->LAin.LLSC.addr); ++ mapReg(m, &i->LAin.LLSC.val); ++ break; ++ case LAin_Bar: ++ /* No regs. */ ++ break; ++ case LAin_FpUn: ++ mapReg(m, &i->LAin.FpUnary.src); ++ mapReg(m, &i->LAin.FpUnary.dst); ++ break; ++ case LAin_FpBin: ++ mapReg(m, &i->LAin.FpBinary.src2); ++ mapReg(m, &i->LAin.FpBinary.src1); ++ mapReg(m, &i->LAin.FpBinary.dst); ++ break; ++ case LAin_FpTri: ++ mapReg(m, &i->LAin.FpTrinary.src3); ++ mapReg(m, &i->LAin.FpTrinary.src2); ++ mapReg(m, &i->LAin.FpTrinary.src1); ++ mapReg(m, &i->LAin.FpTrinary.dst); ++ break; ++ case LAin_FpLoad: ++ mapRegs_LOONGARCH64AMode(m, i->LAin.FpLoad.src); ++ mapReg(m, &i->LAin.FpLoad.dst); ++ break; ++ case LAin_FpStore: ++ mapRegs_LOONGARCH64AMode(m, i->LAin.FpStore.dst); ++ mapReg(m, &i->LAin.FpStore.src); ++ break; ++ case LAin_FpMove: ++ mapReg(m, &i->LAin.FpMove.src); ++ mapReg(m, &i->LAin.FpMove.dst); ++ break; ++ case LAin_FpCmp: ++ mapReg(m, &i->LAin.FpCmp.src2); ++ mapReg(m, &i->LAin.FpCmp.src1); ++ mapReg(m, &i->LAin.FpCmp.dst); ++ break; ++ case LAin_Cas: ++ mapReg(m, &i->LAin.Cas.old); ++ mapReg(m, &i->LAin.Cas.addr); ++ mapReg(m, &i->LAin.Cas.expd); ++ mapReg(m, &i->LAin.Cas.data); ++ break; ++ case LAin_Cmp: ++ mapReg(m, &i->LAin.Cmp.src2); ++ mapReg(m, &i->LAin.Cmp.src1); ++ mapReg(m, &i->LAin.Cmp.dst); ++ break; ++ case LAin_CMove: ++ mapReg(m, &i->LAin.CMove.cond); ++ mapReg(m, &i->LAin.CMove.r0); ++ mapReg(m, &i->LAin.CMove.r1); ++ mapReg(m, &i->LAin.CMove.dst); ++ break; ++ case LAin_Call: ++ if (!hregIsInvalid(i->LAin.Call.cond)) ++ mapReg(m, &i->LAin.Call.cond); ++ /* Hardwires $r12. */ ++ break; ++ /* XDirect/XIndir/XAssisted are also a bit subtle. They ++ conditionally exit the block. Hence we only need to list (1) ++ the registers that they read, and (2) the registers that they ++ write in the case where the block is not exited. (2) is ++ empty, hence only (1) is relevant here. */ ++ case LAin_XDirect: ++ mapRegs_LOONGARCH64AMode(m, i->LAin.XDirect.amPC); ++ if (!hregIsInvalid(i->LAin.XDirect.cond)) ++ mapReg(m, &i->LAin.XDirect.cond); ++ break; ++ case LAin_XIndir: ++ mapReg(m, &i->LAin.XIndir.dstGA); ++ mapRegs_LOONGARCH64AMode(m, i->LAin.XIndir.amPC); ++ if (!hregIsInvalid(i->LAin.XIndir.cond)) ++ mapReg(m, &i->LAin.XIndir.cond); ++ break; ++ case LAin_XAssisted: ++ mapReg(m, &i->LAin.XAssisted.dstGA); ++ mapRegs_LOONGARCH64AMode(m, i->LAin.XAssisted.amPC); ++ if (!hregIsInvalid(i->LAin.XAssisted.cond)) ++ mapReg(m, &i->LAin.XAssisted.cond); ++ break; ++ case LAin_EvCheck: ++ /* We expect both amodes only to mention $r31, so this is in ++ fact pointless, since $r31 isn't allocatable, but anyway.. */ ++ mapRegs_LOONGARCH64AMode(m, i->LAin.EvCheck.amCounter); ++ mapRegs_LOONGARCH64AMode(m, i->LAin.EvCheck.amFailAddr); ++ break; ++ case LAin_ProfInc: ++ /* Hardwires $r12 and $r13 -- nothing to modify. */ ++ break; ++ default: ++ ppLOONGARCH64Instr(i, mode64); ++ vpanic("mapRegs_LOONGARCH64Instr"); ++ break; ++ } ++} ++ ++/* Generate loongarch64 spill instructions under the direction of the ++ register allocator. */ ++void genSpill_LOONGARCH64 ( /*OUT*/ HInstr** i1, /*OUT*/ HInstr** i2, ++ HReg rreg, Int offsetB, Bool mode64 ) ++{ ++ vassert(mode64 == True); ++ vassert(offsetB >= 0); ++ vassert(!hregIsVirtual(rreg)); ++ ++ LOONGARCH64AMode* am; ++ *i1 = *i2 = NULL; ++ ++ switch (hregClass(rreg)) { ++ case HRcInt64: ++ if (offsetB < 1024) { ++ am = LOONGARCH64AMode_RI(hregGSP(), offsetB); ++ *i1 = LOONGARCH64Instr_Store(LAstore_ST_D, am, rreg); ++ } else { ++ am = LOONGARCH64AMode_RR(hregGSP(), hregT0()); ++ *i1 = LOONGARCH64Instr_LI(offsetB, hregT0()); ++ *i2 = LOONGARCH64Instr_Store(LAstore_STX_D, am, rreg); ++ } ++ break; ++ case HRcFlt64: ++ if (offsetB < 1024) { ++ am = LOONGARCH64AMode_RI(hregGSP(), offsetB); ++ *i1 = LOONGARCH64Instr_FpStore(LAfpstore_FST_D, am, rreg); ++ } else { ++ am = LOONGARCH64AMode_RR(hregGSP(), hregT0()); ++ *i1 = LOONGARCH64Instr_LI(offsetB, hregT0()); ++ *i2 = LOONGARCH64Instr_FpStore(LAfpstore_FSTX_D, am, rreg); ++ } ++ break; ++ default: ++ ppHRegClass(hregClass(rreg)); ++ vpanic("genSpill_LOONGARCH64: unimplemented regclass"); ++ break; ++ } ++} ++ ++/* Generate loongarch64 reload instructions under the direction of the ++ register allocator. */ ++void genReload_LOONGARCH64 ( /*OUT*/ HInstr** i1, /*OUT*/ HInstr** i2, ++ HReg rreg, Int offsetB, Bool mode64 ) ++{ ++ vassert(mode64 == True); ++ vassert(offsetB >= 0); ++ vassert(!hregIsVirtual(rreg)); ++ ++ LOONGARCH64AMode* am; ++ *i1 = *i2 = NULL; ++ ++ switch (hregClass(rreg)) { ++ case HRcInt64: ++ if (offsetB < 1024) { ++ am = LOONGARCH64AMode_RI(hregGSP(), offsetB); ++ *i1 = LOONGARCH64Instr_Load(LAload_LD_D, am, rreg); ++ } else { ++ am = LOONGARCH64AMode_RR(hregGSP(), hregT0()); ++ *i1 = LOONGARCH64Instr_LI(offsetB, hregT0()); ++ *i2 = LOONGARCH64Instr_Load(LAload_LDX_D, am, rreg); ++ } ++ break; ++ case HRcFlt64: ++ if (offsetB < 1024) { ++ am = LOONGARCH64AMode_RI(hregGSP(), offsetB); ++ *i1 = LOONGARCH64Instr_FpLoad(LAfpload_FLD_D, am, rreg); ++ } else { ++ am = LOONGARCH64AMode_RR(hregGSP(), hregT0()); ++ *i1 = LOONGARCH64Instr_LI(offsetB, hregT0()); ++ *i2 = LOONGARCH64Instr_FpLoad(LAfpload_FLDX_D, am, rreg); ++ } ++ break; ++ default: ++ ppHRegClass(hregClass(rreg)); ++ vpanic("genReload_LOONGARCH64: unimplemented regclass"); ++ break; ++ } ++} ++ ++/* Generate loongarch64 move instructions under the direction of the ++ register allocator. */ ++LOONGARCH64Instr* genMove_LOONGARCH64 ( HReg from, HReg to, Bool mode64 ) ++{ ++ vassert(mode64 == True); ++ switch (hregClass(from)) { ++ case HRcInt64: ++ return LOONGARCH64Instr_Binary(LAbin_OR, ++ LOONGARCH64RI_R(hregZERO()), ++ from, to); ++ case HRcFlt64: ++ return LOONGARCH64Instr_FpMove(LAfpmove_FMOV_D, from, to); ++ default: ++ ppHRegClass(hregClass(from)); ++ vpanic("genMove_LOONGARCH64: unimplemented regclass"); ++ } ++} ++ ++ ++/* --------- The loongarch64 assembler --------- */ ++ ++static inline UInt iregEnc ( HReg r ) ++{ ++ vassert(hregClass(r) == HRcInt64); ++ vassert(!hregIsVirtual(r)); ++ UInt n = hregEncoding(r); ++ vassert(n < 32); ++ return n; ++} ++ ++static inline UInt fregEnc ( HReg r ) ++{ ++ vassert(hregClass(r) == HRcFlt64); ++ vassert(!hregIsVirtual(r)); ++ UInt n = hregEncoding(r); ++ vassert(n < 32); ++ return n; ++} ++ ++static inline UInt fcsrEnc ( HReg r ) ++{ ++ vassert(hregClass(r) == HRcInt32); ++ vassert(!hregIsVirtual(r)); ++ UInt n = hregEncoding(r); ++ vassert(n < 32); ++ return n; ++} ++ ++static inline UInt emit_op_rj_rd ( UInt op, UInt rj, UInt rd ) ++{ ++ vassert(rj < (1 << 5)); ++ vassert(rd < (1 << 5)); ++ return op | (rj << 5) | rd; ++} ++ ++static inline UInt emit_op_rk_rj_rd ( UInt op, UInt rk, UInt rj, UInt rd ) ++{ ++ vassert(rk < (1 << 5)); ++ vassert(rj < (1 << 5)); ++ vassert(rd < (1 << 5)); ++ return op | (rk << 10) | (rj << 5) | rd; ++} ++ ++static inline UInt emit_op_fj_fd ( UInt op, UInt fj, UInt fd ) ++{ ++ vassert(fj < (1 << 5)); ++ vassert(fd < (1 << 5)); ++ return op | (fj << 5) | fd; ++} ++ ++static inline UInt emit_op_fa_fk_fj_fd ( UInt op, UInt fa, UInt fk, UInt fj, UInt fd ) ++{ ++ vassert(fa < (1 << 5)); ++ vassert(fk < (1 << 5)); ++ vassert(fj < (1 << 5)); ++ vassert(fd < (1 << 5)); ++ return op | (fa << 15) | (fk << 10) | (fj << 5) | fd; ++} ++ ++static inline UInt emit_op_fk_fj_fd ( UInt op, UInt fk, UInt fj, UInt fd ) ++{ ++ vassert(fk < (1 << 5)); ++ vassert(fj < (1 << 5)); ++ vassert(fd < (1 << 5)); ++ return op | (fk << 10) | (fj << 5) | fd; ++} ++ ++static inline UInt emit_op_ca_fk_fj_fd ( UInt op, UInt ca, UInt fk, UInt fj, UInt fd ) ++{ ++ vassert(ca < (1 << 3)); ++ vassert(fk < (1 << 5)); ++ vassert(fj < (1 << 5)); ++ vassert(fd < (1 << 5)); ++ return op | (ca << 15) | (fk << 10) | (fj << 5) | fd; ++} ++ ++static inline UInt emit_op_fk_fj_cd ( UInt op, UInt fk, UInt fj, UInt cd ) ++{ ++ vassert(fk < (1 << 5)); ++ vassert(fj < (1 << 5)); ++ vassert(cd < (1 << 3)); ++ return op | (fk << 10) | (fj << 5) | cd; ++} ++ ++static inline UInt emit_op_cj_rd ( UInt op, UInt cj, UInt rd ) ++{ ++ vassert(cj < (1 << 3)); ++ vassert(rd < (1 << 5)); ++ return op | (cj << 5) | rd; ++} ++ ++static inline UInt emit_op_rj_cd ( UInt op, UInt rj, UInt cd ) ++{ ++ vassert(rj < (1 << 5)); ++ vassert(cd < (1 << 3)); ++ return op | (rj << 5) | cd; ++} ++ ++static inline UInt emit_op_rj_fd ( UInt op, UInt rj, UInt fd ) ++{ ++ vassert(rj < (1 << 5)); ++ vassert(fd < (1 << 5)); ++ return op | (rj << 5) | fd; ++} ++ ++static inline UInt emit_op_fj_rd ( UInt op, UInt fj, UInt rd ) ++{ ++ vassert(fj < (1 << 5)); ++ vassert(rd < (1 << 5)); ++ return op | (fj << 5) | rd; ++} ++ ++static inline UInt emit_op_rj_fcsr ( UInt op, UInt rj, UInt fcsr ) ++{ ++ vassert(rj < (1 << 5)); ++ vassert(fcsr < (1 << 5)); ++ return op | (rj << 5) | fcsr; ++} ++ ++static inline UInt emit_op_fcsr_rd ( UInt op, UInt fcsr, UInt rd ) ++{ ++ vassert(fcsr < (1 << 5)); ++ vassert(rd < (1 << 5)); ++ return op | (fcsr << 5) | rd; ++} ++ ++static inline UInt emit_op_ui5_rj_rd ( UInt op, UInt ui5, UInt rj, UInt rd ) ++{ ++ vassert(ui5 < (1 << 5)); ++ vassert(rj < (1 << 5)); ++ vassert(rd < (1 << 5)); ++ return op | (ui5 << 10) | (rj << 5) | rd; ++} ++ ++static inline UInt emit_op_ui6_rj_rd ( UInt op, UInt ui6, UInt rj, UInt rd ) ++{ ++ vassert(ui6 < (1 << 6)); ++ vassert(rj < (1 << 5)); ++ vassert(rd < (1 << 5)); ++ return op | (ui6 << 10) | (rj << 5) | rd; ++} ++ ++static inline UInt emit_op_ui12_rj_rd ( UInt op, UInt ui12, UInt rj, UInt rd ) ++{ ++ vassert(ui12 < (1 << 12)); ++ vassert(rj < (1 << 5)); ++ vassert(rd < (1 << 5)); ++ return op | (ui12 << 10) | (rj << 5) | rd; ++} ++ ++static inline UInt emit_op_si12_rj_rd ( UInt op, UInt si12, UInt rj, UInt rd ) ++{ ++ vassert(si12 < (1 << 12)); ++ vassert(rj < (1 << 5)); ++ vassert(rd < (1 << 5)); ++ return op | (si12 << 10) | (rj << 5) | rd; ++} ++ ++static inline UInt emit_op_si14_rj_rd ( UInt op, UInt si14, UInt rj, UInt rd ) ++{ ++ vassert(si14 < (1 << 14)); ++ vassert(rj < (1 << 5)); ++ vassert(rd < (1 << 5)); ++ return op | (si14 << 10) | (rj << 5) | rd; ++} ++ ++static inline UInt emit_op_si20_rd ( UInt op, UInt si20, UInt rd ) ++{ ++ vassert(si20 < (1 << 20)); ++ vassert(rd < (1 << 5)); ++ return op | (si20 << 5) | rd; ++} ++ ++static inline UInt emit_op_offs16_rj_rd ( UInt op, UInt offs16, UInt rj, UInt rd ) ++{ ++ vassert(offs16 < (1 << 16)); ++ vassert(rj < (1 << 5)); ++ vassert(rd < (1 << 5)); ++ return op | (offs16 << 10) | (rj << 5) | rd; ++} ++ ++static inline UInt emit_op_offs26 ( UInt op, UInt offs26 ) ++{ ++ vassert(offs26 < (1 << 26)); ++ return op | ((offs26 & 0xffff) << 10) | (offs26 >> 16); ++} ++ ++static inline UInt emit_op_hint15 ( UInt op, UInt hint ) ++{ ++ vassert(hint < (1 << 15)); ++ return op | hint; ++} ++ ++static UInt* mkLoadImm_EXACTLY4 ( UInt* p, HReg dst, ULong imm ) ++{ ++ /* ++ lu12i.w dst, imm[31:12] ++ ori dst, dst, imm[11:0] ++ lu32i.d dst, imm[51:32] ++ lu52i.d dst, dst, imm[63:52] ++ */ ++ UInt d = iregEnc(dst); ++ *p++ = emit_op_si20_rd(LAextra_LU12I_W, (imm >> 12) & 0xfffff, d); ++ *p++ = emit_op_si12_rj_rd(LAbin_ORI, imm & 0xfff, d, d); ++ *p++ = emit_op_si20_rd(LAextra_LU32I_D, (imm >> 32) & 0xfffff, d); ++ *p++ = emit_op_si12_rj_rd(LAextra_LU52I_D, (imm >> 52) & 0xfff, d, d); ++ return p; ++} ++ ++static inline UInt* mkLoadImm_EXACTLY2 ( UInt* p, HReg dst, ULong imm ) ++{ ++ /* ++ lu12i.w dst, imm[31:12] ++ ori dst, dst, imm[11:0] ++ */ ++ UInt d = iregEnc(dst); ++ *p++ = emit_op_si20_rd(LAextra_LU12I_W, (imm >> 12) & 0xfffff, d); ++ *p++ = emit_op_si12_rj_rd(LAbin_ORI, imm & 0xfff, d, d); ++ return p; ++} ++ ++static inline UInt* mkLoadImm_EXACTLY1 ( UInt* p, HReg dst, ULong imm ) ++{ ++ /* ori dst, $zero, imm[11:0] */ ++ *p++ = emit_op_si12_rj_rd(LAbin_ORI, imm, 0, iregEnc(dst)); ++ return p; ++} ++ ++static UInt* mkLoadImm ( UInt* p, HReg dst, ULong imm ) ++{ ++ if ((imm >> 12) == 0) ++ p = mkLoadImm_EXACTLY1(p, dst, imm); ++ else if (imm < 0x80000000 || (imm >> 31) == 0x1ffffffffUL) ++ p = mkLoadImm_EXACTLY2(p, dst, imm); ++ else ++ p = mkLoadImm_EXACTLY4(p, dst, imm); ++ return p; ++} ++ ++static Bool is_LoadImm_EXACTLY4 ( UInt* p, HReg dst, ULong imm ) ++{ ++ UInt expect[4]; ++ mkLoadImm_EXACTLY4(expect, dst, imm); ++ return toBool(p[0] == expect[0] && p[1] == expect[1] && ++ p[2] == expect[2] && p[3] == expect[3]); ++} ++ ++static inline UInt* mkUnary ( UInt* p, LOONGARCH64UnOp op, HReg src, HReg dst ) ++{ ++ switch (op) { ++ case LAun_CLZ_W: ++ case LAun_CTZ_W: ++ case LAun_CLZ_D: ++ case LAun_CTZ_D: ++ case LAun_EXT_W_H: ++ case LAun_EXT_W_B: ++ *p++ = emit_op_rj_rd(op, iregEnc(src), iregEnc(dst)); ++ return p; ++ default: ++ return NULL; ++ } ++} ++ ++static inline UInt* mkBinary ( UInt* p, LOONGARCH64BinOp op, ++ LOONGARCH64RI* src2, HReg src1, HReg dst ) ++{ ++ switch (op) { ++ case LAbin_ADD_W: ++ case LAbin_ADD_D: ++ case LAbin_SUB_W: ++ case LAbin_SUB_D: ++ case LAbin_NOR: ++ case LAbin_AND: ++ case LAbin_OR: ++ case LAbin_XOR: ++ case LAbin_SLL_W: ++ case LAbin_SRL_W: ++ case LAbin_SRA_W: ++ case LAbin_SLL_D: ++ case LAbin_SRL_D: ++ case LAbin_SRA_D: ++ case LAbin_MUL_W: ++ case LAbin_MUL_D: ++ case LAbin_MULH_W: ++ case LAbin_MULH_WU: ++ case LAbin_MULH_D: ++ case LAbin_MULH_DU: ++ case LAbin_MULW_D_W: ++ case LAbin_MULW_D_WU: ++ case LAbin_DIV_W: ++ case LAbin_MOD_W: ++ case LAbin_DIV_WU: ++ case LAbin_MOD_WU: ++ case LAbin_DIV_D: ++ case LAbin_MOD_D: ++ case LAbin_DIV_DU: ++ case LAbin_MOD_DU: ++ vassert(src2->tag == LAri_Reg); ++ *p++ = emit_op_rk_rj_rd(op, iregEnc(src2->LAri.R.reg), ++ iregEnc(src1), iregEnc(dst)); ++ return p; ++ case LAbin_SLLI_W: ++ case LAbin_SRLI_W: ++ case LAbin_SRAI_W: ++ vassert(src2->tag == LAri_Imm); ++ *p++ = emit_op_ui5_rj_rd(op, src2->LAri.I.imm, ++ iregEnc(src1), iregEnc(dst)); ++ return p; ++ case LAbin_SLLI_D: ++ case LAbin_SRLI_D: ++ case LAbin_SRAI_D: ++ vassert(src2->tag == LAri_Imm); ++ *p++ = emit_op_ui6_rj_rd(op, src2->LAri.I.imm, ++ iregEnc(src1), iregEnc(dst)); ++ return p; ++ case LAbin_ADDI_W: ++ case LAbin_ADDI_D: ++ vassert(src2->tag == LAri_Imm); ++ *p++ = emit_op_si12_rj_rd(op, src2->LAri.I.imm, ++ iregEnc(src1), iregEnc(dst)); ++ return p; ++ case LAbin_ANDI: ++ case LAbin_ORI: ++ case LAbin_XORI: ++ vassert(src2->tag == LAri_Imm); ++ *p++ = emit_op_ui12_rj_rd(op, src2->LAri.I.imm, ++ iregEnc(src1), iregEnc(dst)); ++ return p; ++ default: ++ return NULL; ++ } ++} ++ ++static UInt* mkLoad ( UInt* p, LOONGARCH64LoadOp op, ++ LOONGARCH64AMode* src, HReg dst ) ++{ ++ switch (op) { ++ case LAload_LD_W: ++ case LAload_LD_D: ++ case LAload_LD_BU: ++ case LAload_LD_HU: ++ case LAload_LD_WU: ++ vassert(src->tag == LAam_RI); ++ *p++ = emit_op_si12_rj_rd(op, src->LAam.RI.index, ++ iregEnc(src->LAam.RI.base), iregEnc(dst)); ++ return p; ++ case LAload_LDX_D: ++ case LAload_LDX_BU: ++ case LAload_LDX_HU: ++ case LAload_LDX_WU: ++ vassert(src->tag == LAam_RR); ++ *p++ = emit_op_rk_rj_rd(op, iregEnc(src->LAam.RR.index), ++ iregEnc(src->LAam.RR.base), iregEnc(dst)); ++ return p; ++ default: ++ return NULL; ++ } ++} ++ ++static UInt* mkStore ( UInt* p, LOONGARCH64StoreOp op, ++ LOONGARCH64AMode* dst, HReg src ) ++{ ++ switch (op) { ++ case LAstore_ST_B: ++ case LAstore_ST_H: ++ case LAstore_ST_W: ++ case LAstore_ST_D: ++ vassert(dst->tag == LAam_RI); ++ *p++ = emit_op_si12_rj_rd(op, dst->LAam.RI.index, ++ iregEnc(dst->LAam.RI.base), iregEnc(src)); ++ return p; ++ case LAstore_STX_B: ++ case LAstore_STX_H: ++ case LAstore_STX_W: ++ case LAstore_STX_D: ++ vassert(dst->tag == LAam_RR); ++ *p++ = emit_op_rk_rj_rd(op, iregEnc(dst->LAam.RR.index), ++ iregEnc(dst->LAam.RR.base), iregEnc(src)); ++ return p; ++ default: ++ return NULL; ++ } ++} ++ ++static inline UInt* mkLLSC ( UInt* p, LOONGARCH64LLSCOp op, ++ LOONGARCH64AMode* addr, HReg val ) ++{ ++ switch (op) { ++ case LAllsc_LL_W: ++ case LAllsc_SC_W: ++ case LAllsc_LL_D: ++ case LAllsc_SC_D: ++ vassert(addr->tag == LAam_RI); ++ *p++ = emit_op_si14_rj_rd(op, addr->LAam.RI.index, ++ iregEnc(addr->LAam.RI.base), iregEnc(val)); ++ return p; ++ default: ++ return NULL; ++ } ++} ++ ++static inline UInt* mkBar ( UInt* p, LOONGARCH64BarOp op, UShort hint ) ++{ ++ switch (op) { ++ case LAbar_DBAR: ++ case LAbar_IBAR: ++ *p++ = emit_op_hint15(op, hint); ++ return p; ++ default: ++ return NULL; ++ } ++} ++ ++static inline UInt* mkFpUnary ( UInt* p, LOONGARCH64FpUnOp op, HReg src, HReg dst ) ++{ ++ switch (op) { ++ case LAfpun_FABS_S: ++ case LAfpun_FABS_D: ++ case LAfpun_FNEG_S: ++ case LAfpun_FNEG_D: ++ case LAfpun_FLOGB_S: ++ case LAfpun_FLOGB_D: ++ case LAfpun_FSQRT_S: ++ case LAfpun_FSQRT_D: ++ case LAfpun_FRSQRT_S: ++ case LAfpun_FRSQRT_D: ++ case LAfpun_FCVT_S_D: ++ case LAfpun_FCVT_D_S: ++ case LAfpun_FTINT_W_S: ++ case LAfpun_FTINT_W_D: ++ case LAfpun_FTINT_L_S: ++ case LAfpun_FTINT_L_D: ++ case LAfpun_FFINT_S_W: ++ case LAfpun_FFINT_S_L: ++ case LAfpun_FFINT_D_W: ++ case LAfpun_FFINT_D_L: ++ case LAfpun_FRINT_S: ++ case LAfpun_FRINT_D: ++ *p++ = emit_op_fj_fd(op, fregEnc(src), fregEnc(dst)); ++ return p; ++ default: ++ return NULL; ++ } ++} ++ ++static inline UInt* mkFpBinary ( UInt* p, LOONGARCH64FpBinOp op, HReg src2, ++ HReg src1, HReg dst ) ++{ ++ switch (op) { ++ case LAfpbin_FADD_S: ++ case LAfpbin_FADD_D: ++ case LAfpbin_FSUB_S: ++ case LAfpbin_FSUB_D: ++ case LAfpbin_FMUL_S: ++ case LAfpbin_FMUL_D: ++ case LAfpbin_FDIV_S: ++ case LAfpbin_FDIV_D: ++ case LAfpbin_FMAX_S: ++ case LAfpbin_FMAX_D: ++ case LAfpbin_FMIN_S: ++ case LAfpbin_FMIN_D: ++ case LAfpbin_FMAXA_S: ++ case LAfpbin_FMAXA_D: ++ case LAfpbin_FMINA_S: ++ case LAfpbin_FMINA_D: ++ case LAfpbin_FSCALEB_S: ++ case LAfpbin_FSCALEB_D: ++ *p++ = emit_op_fk_fj_fd(op, fregEnc(src2), fregEnc(src1), fregEnc(dst)); ++ return p; ++ default: ++ return NULL; ++ } ++} ++ ++static inline UInt* mkFpTrinary ( UInt* p, LOONGARCH64FpTriOp op, HReg src3, ++ HReg src2, HReg src1, HReg dst ) ++{ ++ switch (op) { ++ case LAfpbin_FMADD_S: ++ case LAfpbin_FMADD_D: ++ case LAfpbin_FMSUB_S: ++ case LAfpbin_FMSUB_D: ++ *p++ = emit_op_fa_fk_fj_fd(op, fregEnc(src3), fregEnc(src2), ++ fregEnc(src1), fregEnc(dst)); ++ return p; ++ default: ++ return NULL; ++ } ++} ++ ++static inline UInt* mkFpLoad ( UInt* p, LOONGARCH64FpLoadOp op, ++ LOONGARCH64AMode* src, HReg dst ) ++{ ++ switch (op) { ++ case LAfpload_FLD_S: ++ case LAfpload_FLD_D: ++ vassert(src->tag == LAam_RI); ++ *p++ = emit_op_si12_rj_rd(op, src->LAam.RI.index, ++ iregEnc(src->LAam.RI.base), fregEnc(dst)); ++ return p; ++ case LAfpload_FLDX_S: ++ case LAfpload_FLDX_D: ++ vassert(src->tag == LAam_RR); ++ *p++ = emit_op_rk_rj_rd(op, iregEnc(src->LAam.RR.index), ++ iregEnc(src->LAam.RR.base), fregEnc(dst)); ++ return p; ++ default: ++ return NULL; ++ } ++} ++ ++static inline UInt* mkFpStore ( UInt* p, LOONGARCH64FpStoreOp op, ++ LOONGARCH64AMode* dst, HReg src ) ++{ ++ switch (op) { ++ case LAfpstore_FST_S: ++ case LAfpstore_FST_D: ++ vassert(dst->tag == LAam_RI); ++ *p++ = emit_op_si12_rj_rd(op, dst->LAam.RI.index, ++ iregEnc(dst->LAam.RI.base), fregEnc(src)); ++ return p; ++ case LAfpstore_FSTX_S: ++ case LAfpstore_FSTX_D: ++ vassert(dst->tag == LAam_RR); ++ *p++ = emit_op_rk_rj_rd(op, iregEnc(dst->LAam.RR.index), ++ iregEnc(dst->LAam.RR.base), fregEnc(src)); ++ return p; ++ default: ++ return NULL; ++ } ++} ++ ++static inline UInt* mkFpMove ( UInt* p, LOONGARCH64FpMoveOp op, HReg src, HReg dst ) ++{ ++ switch (op) { ++ case LAfpmove_FMOV_S: ++ case LAfpmove_FMOV_D: ++ *p++ = emit_op_fj_fd(op, fregEnc(src), fregEnc(dst)); ++ return p; ++ case LAfpmove_MOVGR2FR_W: ++ case LAfpmove_MOVGR2FR_D: ++ *p++ = emit_op_rj_fd(op, iregEnc(src), fregEnc(dst)); ++ return p; ++ case LAfpmove_MOVFR2GR_S: ++ case LAfpmove_MOVFR2GR_D: ++ *p++ = emit_op_fj_rd(op, fregEnc(src), iregEnc(dst)); ++ return p; ++ case LAfpmove_MOVGR2FCSR: ++ *p++ = emit_op_rj_fcsr(op, iregEnc(src), fcsrEnc(dst)); ++ return p; ++ case LAfpmove_MOVFCSR2GR: ++ *p++ = emit_op_fcsr_rd(op, fcsrEnc(src), iregEnc(dst)); ++ return p; ++ default: ++ return NULL; ++ } ++} ++ ++static inline UInt* mkFpCmp ( UInt* p, LOONGARCH64FpCmpOp op, HReg src2, ++ HReg src1, HReg dst ) ++{ ++ /* ++ fcmp.cond.[sd] $fcc0, src1, src2 ++ movcf2gr dst, $fcc0 ++ */ ++ switch (op) { ++ case LAfpcmp_FCMP_CLT_S: ++ case LAfpcmp_FCMP_CLT_D: ++ case LAfpcmp_FCMP_CEQ_S: ++ case LAfpcmp_FCMP_CEQ_D: ++ case LAfpcmp_FCMP_CUN_S: ++ case LAfpcmp_FCMP_CUN_D: ++ *p++ = emit_op_fk_fj_cd(op, fregEnc(src2), fregEnc(src1), 0); ++ *p++ = emit_op_cj_rd(LAextra_MOVCF2GR, 0, iregEnc(dst)); ++ return p; ++ default: ++ return NULL; ++ } ++} ++ ++static inline UInt* mkCas ( UInt* p, HReg old, HReg addr, HReg expd, ++ HReg data, Bool size64 ) ++{ ++ /* ++ ll.[wd] old, addr, 0 ++ bne old, expd, barrier ++ or $t0, data, $zero ++ sc.[wd] $t0, addr, 0 ++ beq $t0, zero, fail ++ or old, expd, $zero ++ b end ++ barrier: ++ dbar 0 ++ fail: ++ or old, data, $zero ++ end: ++ */ ++ UInt o = iregEnc(old); ++ UInt a = iregEnc(addr); ++ UInt e = iregEnc(expd); ++ UInt d = iregEnc(data); ++ UInt t = 12; ++ UInt z = 0; ++ ++ if (size64) { ++ *p++ = emit_op_si14_rj_rd(LAllsc_LL_D, 0, a, o); ++ } else { ++ *p++ = emit_op_ui6_rj_rd(LAbin_SLLI_W, 0, e, e); // Sign-extend expd ++ *p++ = emit_op_si14_rj_rd(LAllsc_LL_W, 0, a, o); ++ } ++ *p++ = emit_op_offs16_rj_rd(LAextra_BNE, 6, o, e); ++ *p++ = emit_op_rk_rj_rd(LAbin_OR, z, d, t); ++ if (size64) { ++ *p++ = emit_op_si14_rj_rd(LAllsc_SC_D, 0, a, t); ++ } else { ++ *p++ = emit_op_si14_rj_rd(LAllsc_SC_W, 0, a, t); ++ } ++ *p++ = emit_op_offs16_rj_rd(LAextra_BEQ, 4, t, z); ++ *p++ = emit_op_rk_rj_rd(LAbin_OR, z, e, o); ++ *p++ = emit_op_offs26(LAextra_B, 3); ++ *p++ = emit_op_hint15(LAbar_DBAR, 0); ++ *p++ = emit_op_rk_rj_rd(LAbin_OR, z, d, o); ++ return p; ++} ++ ++static inline UInt* mkCmp ( UInt* p, LOONGARCH64CondCode cond, ++ HReg src2, HReg src1, HReg dst ) ++{ ++ UInt d = iregEnc(dst); ++ UInt s1 = iregEnc(src1); ++ UInt s2 = iregEnc(src2); ++ ++ switch (cond) { ++ case LAcc_EQ: ++ /* ++ xor dst, src1, src2 ++ sltui dst, dst, 1 ++ */ ++ *p++ = emit_op_rk_rj_rd(LAbin_XOR, s2, s1, d); ++ *p++ = emit_op_si12_rj_rd(LAextra_SLTUI, 1, d, d); ++ return p; ++ case LAcc_NE: ++ /* ++ xor dst, src1, src2 ++ sltu dst, $zero, dst ++ */ ++ *p++ = emit_op_rk_rj_rd(LAbin_XOR, s2, s1, d); ++ *p++ = emit_op_rk_rj_rd(LAextra_SLTU, d, 0, d); ++ return p; ++ case LAcc_LT: ++ /* slt dst, src1, src2 */ ++ *p++ = emit_op_rk_rj_rd(LAextra_SLT, s2, s1, d); ++ return p; ++ case LAcc_GE: ++ /* ++ slt dst, src1, src2 ++ sltui dst, dst, 1 ++ */ ++ *p++ = emit_op_rk_rj_rd(LAextra_SLT, s2, s1, d); ++ *p++ = emit_op_si12_rj_rd(LAextra_SLTUI, 1, d, d); ++ return p; ++ case LAcc_LTU: ++ /* sltu dst, src1, src2 */ ++ *p++ = emit_op_rk_rj_rd(LAextra_SLTU, s2, s1, d); ++ return p; ++ case LAcc_GEU: ++ /* ++ sltu dst, src1, src2 ++ sltui dst, dst, 1 ++ */ ++ *p++ = emit_op_rk_rj_rd(LAextra_SLTU, s2, s1, d); ++ *p++ = emit_op_si12_rj_rd(LAextra_SLTUI, 1, d, d); ++ return p; ++ /* No LAcc_AL here. ++ case LAcc_AL: ++ break; ++ */ ++ default: ++ return NULL; ++ } ++} ++ ++static inline UInt* mkCMove ( UInt* p, HReg cond, HReg r0, ++ HReg r1, HReg dst, Bool isInt ) ++{ ++ if (isInt) { ++ /* ++ masknez $t0, r0, cond ++ maskeqz dst, r1, cond ++ or dst, $t0, dst ++ */ ++ UInt c = iregEnc(cond); ++ UInt d = iregEnc(dst); ++ *p++ = emit_op_rk_rj_rd(LAextra_MASKNEZ, c, iregEnc(r0), 12); ++ *p++ = emit_op_rk_rj_rd(LAextra_MASKEQZ, c, iregEnc(r1), d); ++ *p++ = emit_op_rk_rj_rd(LAbin_OR, d, 12, d); ++ } else { ++ /* ++ movgr2cf $fcc0, cond ++ fsel dst, r0, r1, $fcc0 ++ */ ++ *p++ = emit_op_rj_cd(LAextra_MOVGR2CF, iregEnc(cond), 0); ++ *p++ = emit_op_ca_fk_fj_fd(LAextra_FSEL, 0, fregEnc(r1), ++ fregEnc(r0), fregEnc(dst)); ++ } ++ return p; ++} ++ ++static inline UInt* mkCall ( UInt* p, HReg cond, Addr64 target, RetLoc rloc ) ++{ ++ if (!hregIsInvalid(cond) && rloc.pri != RLPri_None) { ++ /* The call might not happen (it isn't unconditional) and ++ it returns a result. In this case we will need to ++ generate a control flow diamond to put 0x555..555 in ++ the return register(s) in the case where the call ++ doesn't happen. If this ever becomes necessary, maybe ++ copy code from the 32-bit ARM equivalent. Until that ++ day, just give up. */ ++ return NULL; ++ } ++ ++ UInt* ptmp = NULL; ++ if (!hregIsInvalid(cond)) { ++ /* Create a hole to put a conditional branch in. We'll ++ patch it once we know the branch length. */ ++ ptmp = p; ++ p++; ++ } ++ ++ /* ++ $t0 = target ++ jirl $ra, $t0, 0 ++ */ ++ p = mkLoadImm(p, hregT0(), target); ++ *p++ = emit_op_offs16_rj_rd(LAextra_JIRL, 0, 12, 1); ++ ++ /* Patch the hole if necessary */ ++ if (!hregIsInvalid(cond)) { ++ vassert(ptmp != NULL); ++ UInt offs = (UInt)(p - ptmp); ++ vassert(offs >= 3 && offs <= 6); ++ /* beq cond, $zero, offs */ ++ *ptmp++ = emit_op_offs16_rj_rd(LAextra_BEQ, offs, iregEnc(cond), 0); ++ } ++ ++ return p; ++} ++ ++static inline UInt* mkXDirect ( UInt* p, Addr64 dstGA, ++ LOONGARCH64AMode* amPC, ++ HReg cond, Bool toFastEP, ++ const void* disp_cp_chain_me_to_slowEP, ++ const void* disp_cp_chain_me_to_fastEP ) ++{ ++ /* NB: what goes on here has to be very closely coordinated ++ with chainXDirect_LOONGARCH64 and unchainXDirect_LOONGARCH64 below. */ ++ /* We're generating chain-me requests here, so we need to be ++ sure this is actually allowed -- no-redir translations ++ can't use chain-me's. Hence: */ ++ vassert(disp_cp_chain_me_to_slowEP != NULL); ++ vassert(disp_cp_chain_me_to_fastEP != NULL); ++ ++ /* Use ptmp for backpatching conditional jumps. */ ++ UInt* ptmp = NULL; ++ ++ /* First off, if this is conditional, create a conditional ++ jump over the rest of it. Or at least, leave a space for ++ it that we will shortly fill in. */ ++ if (!hregIsInvalid(cond)) { ++ ptmp = p; ++ p++; ++ } ++ ++ /* Update the guest PC. ++ $t0 = dstGA ++ st.d $t0, amPC ++ */ ++ p = mkLoadImm(p, hregT0(), (ULong)dstGA); ++ p = mkStore(p, LAstore_ST_D, amPC, hregT0()); ++ ++ /* --- FIRST PATCHABLE BYTE follows --- */ ++ /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're ++ calling to) backs up the return address, so as to find the ++ address of the first patchable byte. So: don't change the ++ number of instructions (5) below. */ ++ /* ++ la $t0, VG_(disp_cp_chain_me_to_{slowEP,fastEP}) ++ jirl $ra, $t0, 0 ++ */ ++ const void* disp_cp_chain_me = toFastEP ? disp_cp_chain_me_to_fastEP ++ : disp_cp_chain_me_to_slowEP; ++ p = mkLoadImm_EXACTLY4(p, hregT0(), (ULong)(Addr)disp_cp_chain_me); ++ *p++ = emit_op_offs16_rj_rd(LAextra_JIRL, 0, 12, 1); ++ /* --- END of PATCHABLE BYTES --- */ ++ ++ /* Fix up the conditional jump, if there was one. */ ++ if (!hregIsInvalid(cond)) { ++ vassert(ptmp != NULL); ++ UInt offs = (UInt)(p - ptmp); ++ vassert(offs >= 8 && offs <= 11); ++ /* beq cond, $zero, offs */ ++ *ptmp++ = emit_op_offs16_rj_rd(LAextra_BEQ, offs, iregEnc(cond), 0); ++ } ++ ++ return p; ++} ++ ++static inline UInt* mkXIndir ( UInt* p, HReg dstGA, LOONGARCH64AMode* amPC, ++ HReg cond, const void* disp_cp_xindir ) ++{ ++ /* We're generating transfers that could lead indirectly to a ++ chain-me, so we need to be sure this is actually allowed -- ++ no-redir translations are not allowed to reach normal ++ translations without going through the scheduler. That means ++ no XDirects or XIndirs out from no-redir translations. ++ Hence: */ ++ vassert(disp_cp_xindir != NULL); ++ ++ /* Use ptmp for backpatching conditional jumps. */ ++ UInt* ptmp = NULL; ++ ++ /* First off, if this is conditional, create a conditional ++ jump over the rest of it. */ ++ if (!hregIsInvalid(cond)) { ++ ptmp = p; ++ p++; ++ } ++ ++ /* Update the guest PC. ++ or $t0, dstGA, $zero ++ st.d $t0, amPC ++ */ ++ *p++ = emit_op_rk_rj_rd(LAbin_OR, 0, iregEnc(dstGA), 12); ++ p = mkStore(p, LAstore_ST_D, amPC, hregT0()); ++ ++ /* ++ la $t0, VG_(disp_cp_xindir) ++ jirl $ra, $t0, 0 ++ */ ++ p = mkLoadImm(p, hregT0(), (ULong)(Addr)disp_cp_xindir); ++ *p++ = emit_op_offs16_rj_rd(LAextra_JIRL, 0, 12, 1); ++ ++ /* Fix up the conditional jump, if there was one. */ ++ if (!hregIsInvalid(cond)) { ++ vassert(ptmp != NULL); ++ UInt offs = (UInt)(p - ptmp); ++ vassert(offs >= 5 && offs <= 8); ++ /* beq cond, $zero, offs */ ++ *ptmp++ = emit_op_offs16_rj_rd(LAextra_BEQ, offs, iregEnc(cond), 0); ++ } ++ ++ return p; ++} ++ ++static inline UInt* mkXAssisted ( UInt* p, HReg dstGA, LOONGARCH64AMode* amPC, ++ HReg cond, IRJumpKind jk, ++ const void* disp_cp_xassisted ) ++{ ++ /* First off, if this is conditional, create a conditional jump ++ over the rest of it. Or at least, leave a space for it that ++ we will shortly fill in. */ ++ UInt* ptmp = NULL; ++ if (!hregIsInvalid(cond)) { ++ ptmp = p; ++ p++; ++ } ++ ++ /* Update the guest PC. ++ or $t0, dstGA, $zero ++ st.d $t0, amPC ++ */ ++ *p++ = emit_op_rk_rj_rd(LAbin_OR, 0, iregEnc(dstGA), 12); ++ p = mkStore(p, LAstore_ST_D, amPC, hregT0()); ++ ++ /* li.w $s8, magic_number */ ++ UInt trcval = 0; ++ switch (jk) { ++ case Ijk_Boring: ++ trcval = VEX_TRC_JMP_BORING; ++ break; ++ case Ijk_ClientReq: ++ trcval = VEX_TRC_JMP_CLIENTREQ; ++ break; ++ case Ijk_NoDecode: ++ trcval = VEX_TRC_JMP_NODECODE; ++ break; ++ case Ijk_InvalICache: ++ trcval = VEX_TRC_JMP_INVALICACHE; ++ break; ++ case Ijk_NoRedir: ++ trcval = VEX_TRC_JMP_NOREDIR; ++ break; ++ case Ijk_SigTRAP: ++ trcval = VEX_TRC_JMP_SIGTRAP; ++ break; ++ case Ijk_SigSEGV: ++ trcval = VEX_TRC_JMP_SIGSEGV; ++ break; ++ case Ijk_SigBUS: ++ trcval = VEX_TRC_JMP_SIGBUS; ++ break; ++ case Ijk_SigFPE_IntDiv: ++ trcval = VEX_TRC_JMP_SIGFPE_INTDIV; ++ break; ++ case Ijk_SigFPE_IntOvf: ++ trcval = VEX_TRC_JMP_SIGFPE_INTOVF; ++ break; ++ case Ijk_SigSYS: ++ trcval = VEX_TRC_JMP_SIGSYS; ++ break; ++ case Ijk_Sys_syscall: ++ trcval = VEX_TRC_JMP_SYS_SYSCALL; ++ break; ++ /* We don't expect to see the following being assisted. ++ case Ijk_Call: ++ case Ijk_Ret: ++ case Ijk_Yield: ++ case Ijk_EmWarn: ++ case Ijk_EmFail: ++ case Ijk_MapFail: ++ case Ijk_FlushDCache: ++ case Ijk_SigILL: ++ case Ijk_SigFPE: ++ case Ijk_Sys_int32: ++ case Ijk_Sys_int128: ++ case Ijk_Sys_int129: ++ case Ijk_Sys_int130: ++ case Ijk_Sys_int145: ++ case Ijk_Sys_int210: ++ case Ijk_Sys_sysenter: ++ */ ++ default: ++ ppIRJumpKind(jk); ++ vpanic("emit_LOONGARCH64Instr.LAin_XAssisted: unexpected jump kind"); ++ } ++ vassert(trcval != 0); ++ p = mkLoadImm(p, hregGSP(), trcval); ++ ++ /* ++ la $t0, VG_(disp_cp_xassisted) ++ jirl $ra, $t0, 0 ++ */ ++ p = mkLoadImm(p, hregT0(), (ULong)(Addr)disp_cp_xassisted); ++ *p++ = emit_op_offs16_rj_rd(LAextra_JIRL, 0, 12, 1); ++ ++ /* Fix up the conditional jump, if there was one. */ ++ if (!hregIsInvalid(cond)) { ++ vassert(ptmp != NULL); ++ UInt offs = (UInt)(p - ptmp); ++ vassert(offs >= 6 && offs <= 12); ++ /* beq cond, $zero, offs */ ++ *ptmp++ = emit_op_offs16_rj_rd(LAextra_BEQ, offs, iregEnc(cond), 0); ++ } ++ ++ return p; ++} ++ ++static inline UInt* mkEvCheck ( UInt* p, LOONGARCH64AMode* amCounter, ++ LOONGARCH64AMode* amFailAddr ) ++{ ++ UInt* p0 = p; ++ ++ /* ++ ld.w $t0, amCounter ++ addi.d $t0, $t0, -1 ++ st.w $t0, amCounter ++ bge $t0, $zero, nofail ++ ld.d $t0, amFailAddr ++ jirl $ra, $t0, 0 ++ nofail: ++ */ ++ p = mkLoad(p, LAload_LD_W, amCounter, hregT0()); ++ *p++ = emit_op_si12_rj_rd(LAbin_ADDI_D, -1 & 0xfff, 12, 12); ++ p = mkStore(p, LAstore_ST_W, amCounter, hregT0()); ++ *p++ = emit_op_offs16_rj_rd(LAextra_BGE, 3, 12, 0); ++ p = mkLoad(p, LAload_LD_W, amFailAddr, hregT0()); ++ *p++ = emit_op_offs16_rj_rd(LAextra_JIRL, 0, 12, 1); ++ ++ /* Crosscheck */ ++ vassert(evCheckSzB_LOONGARCH64() == (UChar*)p - (UChar*)p0); ++ return p; ++} ++ ++static inline UInt* mkProfInc ( UInt* p ) ++{ ++ /* ++ li $t0, 0x6555755585559555UL ++ ld.d $t1, $t0, 0 ++ addi.d $t1, $t1, 1 ++ st.d $t1, $t0, 0 ++ */ ++ p = mkLoadImm_EXACTLY4(p, hregT0(), 0x6555755585559555UL); ++ *p++ = emit_op_si12_rj_rd(LAload_LD_D, 0, 12, 13); ++ *p++ = emit_op_si12_rj_rd(LAbin_ADDI_D, 1, 13, 13); ++ *p++ = emit_op_si12_rj_rd(LAstore_ST_D, 0, 12, 13); ++ return p; ++} ++ ++/* Emit an instruction into buf and return the number of bytes used. ++ Note that buf is not the insn's final place, and therefore it is ++ imperative to emit position-independent code. If the emitted ++ instruction was a profiler inc, set *is_profInc to True, else ++ leave it unchanged. */ ++Int emit_LOONGARCH64Instr ( /*MB_MOD*/Bool* is_profInc, ++ UChar* buf, ++ Int nbuf, ++ const LOONGARCH64Instr* i, ++ Bool mode64, ++ VexEndness endness_host, ++ const void* disp_cp_chain_me_to_slowEP, ++ const void* disp_cp_chain_me_to_fastEP, ++ const void* disp_cp_xindir, ++ const void* disp_cp_xassisted ) ++{ ++ vassert(mode64 == True); ++ ++ UInt* p = (UInt*)buf; ++ vassert(nbuf >= 32); ++ vassert((((HWord)buf) & 3) == 0); ++ ++ switch (i->tag) { ++ case LAin_LI: ++ p = mkLoadImm(p, i->LAin.LI.dst, i->LAin.LI.imm); ++ break; ++ case LAin_Un: ++ p = mkUnary(p, i->LAin.Unary.op, i->LAin.Unary.src, ++ i->LAin.Unary.dst); ++ break; ++ case LAin_Bin: ++ p = mkBinary(p, i->LAin.Binary.op, i->LAin.Binary.src2, ++ i->LAin.Binary.src1, i->LAin.Binary.dst); ++ break; ++ case LAin_Load: ++ p = mkLoad(p, i->LAin.Load.op, i->LAin.Load.src, ++ i->LAin.Load.dst); ++ break; ++ case LAin_Store: ++ p = mkStore(p, i->LAin.Store.op, i->LAin.Store.dst, ++ i->LAin.Store.src); ++ break; ++ case LAin_LLSC: ++ p = mkLLSC(p, i->LAin.LLSC.op, i->LAin.LLSC.addr, i->LAin.LLSC.val); ++ break; ++ case LAin_Bar: ++ p = mkBar(p, i->LAin.Bar.op, i->LAin.Bar.hint); ++ break; ++ case LAin_FpUn: ++ p = mkFpUnary(p, i->LAin.FpUnary.op, i->LAin.FpUnary.src, ++ i->LAin.FpUnary.dst); ++ break; ++ case LAin_FpBin: ++ p = mkFpBinary(p, i->LAin.FpBinary.op, i->LAin.FpBinary.src2, ++ i->LAin.FpBinary.src1, i->LAin.FpBinary.dst); ++ break; ++ case LAin_FpTri: ++ p = mkFpTrinary(p, i->LAin.FpTrinary.op, i->LAin.FpTrinary.src3, ++ i->LAin.FpTrinary.src2, i->LAin.FpTrinary.src1, ++ i->LAin.FpTrinary.dst); ++ break; ++ case LAin_FpLoad: ++ p = mkFpLoad(p, i->LAin.FpLoad.op, i->LAin.FpLoad.src, ++ i->LAin.FpLoad.dst); ++ break; ++ case LAin_FpStore: ++ p = mkFpStore(p, i->LAin.FpStore.op, i->LAin.FpStore.dst, ++ i->LAin.FpStore.src); ++ break; ++ case LAin_FpMove: ++ p = mkFpMove(p, i->LAin.FpMove.op, i->LAin.FpMove.src, ++ i->LAin.FpMove.dst); ++ break; ++ case LAin_FpCmp: ++ p = mkFpCmp(p, i->LAin.FpCmp.op, i->LAin.FpCmp.src2, ++ i->LAin.FpCmp.src1, i->LAin.FpCmp.dst); ++ break; ++ case LAin_Cas: ++ p = mkCas(p, i->LAin.Cas.old, i->LAin.Cas.addr, i->LAin.Cas.expd, ++ i->LAin.Cas.data, i->LAin.Cas.size64); ++ break; ++ case LAin_Cmp: ++ p = mkCmp(p, i->LAin.Cmp.cond, i->LAin.Cmp.src2, ++ i->LAin.Cmp.src1, i->LAin.Cmp.dst); ++ break; ++ case LAin_CMove: ++ p = mkCMove(p, i->LAin.CMove.cond, i->LAin.CMove.r0, ++ i->LAin.CMove.r1, i->LAin.CMove.dst, ++ i->LAin.CMove.isInt); ++ break; ++ case LAin_Call: ++ p = mkCall(p, i->LAin.Call.cond, i->LAin.Call.target, ++ i->LAin.Call.rloc); ++ break; ++ case LAin_XDirect: ++ p = mkXDirect(p, i->LAin.XDirect.dstGA, i->LAin.XDirect.amPC, ++ i->LAin.XDirect.cond, i->LAin.XDirect.toFastEP, ++ disp_cp_chain_me_to_slowEP, ++ disp_cp_chain_me_to_fastEP); ++ break; ++ case LAin_XIndir: ++ p = mkXIndir(p, i->LAin.XIndir.dstGA, i->LAin.XIndir.amPC, ++ i->LAin.XIndir.cond, disp_cp_xindir); ++ break; ++ case LAin_XAssisted: ++ p = mkXAssisted(p, i->LAin.XAssisted.dstGA, i->LAin.XAssisted.amPC, ++ i->LAin.XAssisted.cond, i->LAin.XAssisted.jk, ++ disp_cp_xassisted); ++ break; ++ case LAin_EvCheck: ++ p = mkEvCheck(p, i->LAin.EvCheck.amCounter, ++ i->LAin.EvCheck.amFailAddr); ++ break; ++ case LAin_ProfInc: ++ p = mkProfInc(p); ++ break; ++ default: ++ p = NULL; ++ break; ++ } ++ ++ if (p == NULL) { ++ ppLOONGARCH64Instr(i, True); ++ vpanic("emit_LOONGARCH64Instr"); ++ /*NOTREACHED*/ ++ } ++ ++ vassert(((UChar*)p) - &buf[0] <= 48); ++ return ((UChar*)p) - &buf[0]; ++} ++ ++/* How big is an event check? See case for mkEvCheck just above. That ++ crosschecks what this returns, so we can tell if we're inconsistent. */ ++Int evCheckSzB_LOONGARCH64 ( void ) ++{ ++ return 6 * 4; // 6 insns ++} ++ ++/* NB: what goes on here has to be very closely coordinated with the ++ emitInstr case for XDirect, above. */ ++VexInvalRange chainXDirect_LOONGARCH64 ( VexEndness endness_host, ++ void* place_to_chain, ++ const void* disp_cp_chain_me_EXPECTED, ++ const void* place_to_jump_to ) ++{ ++ vassert(endness_host == VexEndnessLE); ++ ++ /* What we're expecting to see is: ++ * la $t0, disp_cp_chain_me_to_EXPECTED ++ * jirl $ra, $t0, 0 ++ * viz ++ * <16 bytes generated by mkLoadImm_EXACTLY4> ++ * jirl $ra, $t0, 0 ++ */ ++ UInt* p = (UInt*)place_to_chain; ++ vassert(((HWord)p & 3) == 0); ++ vassert(is_LoadImm_EXACTLY4(p, hregT0(), (ULong)(Addr)disp_cp_chain_me_EXPECTED)); ++ vassert(p[4] == emit_op_offs16_rj_rd(LAextra_JIRL, 0, 12, 1)); ++ ++ /* And what we want to change it to is: ++ * la $t0, place_to_jump_to ++ * jirl $ra, $t0, 0 ++ * viz ++ * <16 bytes generated by mkLoadImm_EXACTLY4> ++ * jirl $ra, $t0, 0 ++ * ++ * The replacement has the same length as the original. ++ */ ++ p = mkLoadImm_EXACTLY4(p, hregT0(), (ULong)(Addr)place_to_jump_to); ++ *p++ = emit_op_offs16_rj_rd(LAextra_JIRL, 0, 12, 1); ++ ++ VexInvalRange vir = { (HWord)place_to_chain, 4 * 4 + 4 }; ++ return vir; ++} ++ ++/* NB: what goes on here has to be very closely coordinated with the ++ emitInstr case for XDirect, above. */ ++VexInvalRange unchainXDirect_LOONGARCH64 ( VexEndness endness_host, ++ void* place_to_unchain, ++ const void* place_to_jump_to_EXPECTED, ++ const void* disp_cp_chain_me ) ++{ ++ vassert(endness_host == VexEndnessLE); ++ ++ /* What we're expecting to see is: ++ * la $t0, place_to_jump_to_EXPECTED ++ * jirl $ra, $t0, 0 ++ * viz ++ * <16 bytes generated by mkLoadImm_EXACTLY4> ++ * jirl $ra, $t0, 0 ++ */ ++ UInt* p = (UInt*)place_to_unchain; ++ vassert(((HWord)p & 3) == 0); ++ vassert(is_LoadImm_EXACTLY4(p, hregT0(), (ULong)(Addr)place_to_jump_to_EXPECTED)); ++ vassert(p[4] == emit_op_offs16_rj_rd(LAextra_JIRL, 0, 12, 1)); ++ ++ /* And what we want to change it to is: ++ * la $t0, disp_cp_chain_me ++ * jirl $ra, $t0, 0 ++ * viz ++ * <16 bytes generated by mkLoadImm_EXACTLY4> ++ * jirl $ra, $t0, 0 ++ * ++ * The replacement has the same length as the original. ++ */ ++ p = mkLoadImm_EXACTLY4(p, hregT0(), (ULong)(Addr)disp_cp_chain_me); ++ *p++ = emit_op_offs16_rj_rd(LAextra_JIRL, 0, 12, 1); ++ ++ VexInvalRange vir = { (HWord)place_to_unchain, 4 * 4 + 4 }; ++ return vir; ++} ++ ++/* Patch the counter address into a profile inc point, as previously ++ created by the mkProfInc. */ ++VexInvalRange patchProfInc_LOONGARCH64 ( VexEndness endness_host, ++ void* place_to_patch, ++ const ULong* location_of_counter ) ++{ ++ vassert(endness_host == VexEndnessLE); ++ vassert(sizeof(ULong*) == 8); ++ ++ /* ++ $t0 = NotKnownYet ++ ld.d $t1, $t0, 0 ++ addi.d $t1, $t1, 1 ++ st.d $t1, $t0, 0 ++ */ ++ UInt* p = (UInt*)place_to_patch; ++ vassert(((HWord)p & 3) == 0); ++ vassert(is_LoadImm_EXACTLY4(p, hregT0(), 0x6555755585559555UL)); ++ vassert(p[4] == emit_op_si12_rj_rd(LAload_LD_D, 0, 12, 13)); ++ vassert(p[5] == emit_op_si12_rj_rd(LAbin_ADDI_D, 1, 13, 13)); ++ vassert(p[6] == emit_op_si12_rj_rd(LAstore_ST_D, 0, 12, 13)); ++ ++ p = mkLoadImm_EXACTLY4(p, hregT0(), (ULong)(Addr)location_of_counter); ++ ++ VexInvalRange vir = { (HWord)place_to_patch, 4 * 4 }; ++ return vir; ++} ++ ++ ++/*---------------------------------------------------------------*/ ++/*--- end host_loongarch64_defs.c ---*/ ++/*---------------------------------------------------------------*/ +diff --git a/VEX/priv/host_loongarch64_defs.h b/VEX/priv/host_loongarch64_defs.h +new file mode 100644 +index 000000000..89365d6d1 +--- /dev/null ++++ b/VEX/priv/host_loongarch64_defs.h +@@ -0,0 +1,685 @@ ++ ++/*---------------------------------------------------------------*/ ++/*--- begin host_loongarch64_defs.h ---*/ ++/*---------------------------------------------------------------*/ ++ ++/* ++ This file is part of Valgrind, a dynamic binary instrumentation ++ framework. ++ ++ Copyright (C) 2021-2022 Loongson Technology Corporation Limited ++ ++ This program is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License as ++ published by the Free Software Foundation; either version 2 of the ++ License, or (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, see . ++ ++ The GNU General Public License is contained in the file COPYING. ++*/ ++ ++#ifndef __VEX_HOST_LOONGARCH64_DEFS_H ++#define __VEX_HOST_LOONGARCH64_DEFS_H ++ ++#include "libvex_basictypes.h" ++#include "libvex.h" /* VexArch */ ++#include "host_generic_regs.h" /* HReg */ ++ ++ ++/* --------- Registers. --------- */ ++ ++#define ST_IN static inline ++ ++/* Integer static registers */ ++ST_IN HReg hregLOONGARCH64_R23 ( void ) { return mkHReg(False, HRcInt64, 23, 0); } ++ST_IN HReg hregLOONGARCH64_R24 ( void ) { return mkHReg(False, HRcInt64, 24, 1); } ++ST_IN HReg hregLOONGARCH64_R25 ( void ) { return mkHReg(False, HRcInt64, 25, 2); } ++ST_IN HReg hregLOONGARCH64_R26 ( void ) { return mkHReg(False, HRcInt64, 26, 3); } ++ST_IN HReg hregLOONGARCH64_R27 ( void ) { return mkHReg(False, HRcInt64, 27, 4); } ++ST_IN HReg hregLOONGARCH64_R28 ( void ) { return mkHReg(False, HRcInt64, 28, 5); } ++ST_IN HReg hregLOONGARCH64_R29 ( void ) { return mkHReg(False, HRcInt64, 29, 6); } ++ST_IN HReg hregLOONGARCH64_R30 ( void ) { return mkHReg(False, HRcInt64, 30, 7); } ++/* $r31 is used as guest stack pointer */ ++ ++/* Integer temporary registers */ ++/* $r12 is used as a chaining/ProfInc/Cmove/genSpill/genReload temporary */ ++/* $r13 is used as a ProfInc temporary */ ++ST_IN HReg hregLOONGARCH64_R14 ( void ) { return mkHReg(False, HRcInt64, 14, 8); } ++ST_IN HReg hregLOONGARCH64_R15 ( void ) { return mkHReg(False, HRcInt64, 15, 9); } ++ST_IN HReg hregLOONGARCH64_R16 ( void ) { return mkHReg(False, HRcInt64, 16, 10); } ++ST_IN HReg hregLOONGARCH64_R17 ( void ) { return mkHReg(False, HRcInt64, 17, 11); } ++ST_IN HReg hregLOONGARCH64_R18 ( void ) { return mkHReg(False, HRcInt64, 18, 12); } ++ST_IN HReg hregLOONGARCH64_R19 ( void ) { return mkHReg(False, HRcInt64, 19, 13); } ++ST_IN HReg hregLOONGARCH64_R20 ( void ) { return mkHReg(False, HRcInt64, 20, 14); } ++ ++/* Floating point static registers */ ++ST_IN HReg hregLOONGARCH64_F24 ( void ) { return mkHReg(False, HRcFlt64, 24, 15); } ++ST_IN HReg hregLOONGARCH64_F25 ( void ) { return mkHReg(False, HRcFlt64, 25, 16); } ++ST_IN HReg hregLOONGARCH64_F26 ( void ) { return mkHReg(False, HRcFlt64, 26, 17); } ++ST_IN HReg hregLOONGARCH64_F27 ( void ) { return mkHReg(False, HRcFlt64, 27, 18); } ++ST_IN HReg hregLOONGARCH64_F28 ( void ) { return mkHReg(False, HRcFlt64, 28, 19); } ++ST_IN HReg hregLOONGARCH64_F29 ( void ) { return mkHReg(False, HRcFlt64, 29, 20); } ++ST_IN HReg hregLOONGARCH64_F30 ( void ) { return mkHReg(False, HRcFlt64, 30, 21); } ++ST_IN HReg hregLOONGARCH64_F31 ( void ) { return mkHReg(False, HRcFlt64, 31, 22); } ++ ++/* Other Integer registers */ ++ST_IN HReg hregLOONGARCH64_R0 ( void ) { return mkHReg(False, HRcInt64, 0, 23); } ++ST_IN HReg hregLOONGARCH64_R1 ( void ) { return mkHReg(False, HRcInt64, 1, 24); } ++ST_IN HReg hregLOONGARCH64_R2 ( void ) { return mkHReg(False, HRcInt64, 2, 25); } ++ST_IN HReg hregLOONGARCH64_R3 ( void ) { return mkHReg(False, HRcInt64, 3, 26); } ++ST_IN HReg hregLOONGARCH64_R4 ( void ) { return mkHReg(False, HRcInt64, 4, 27); } ++ST_IN HReg hregLOONGARCH64_R5 ( void ) { return mkHReg(False, HRcInt64, 5, 28); } ++ST_IN HReg hregLOONGARCH64_R6 ( void ) { return mkHReg(False, HRcInt64, 6, 29); } ++ST_IN HReg hregLOONGARCH64_R7 ( void ) { return mkHReg(False, HRcInt64, 7, 30); } ++ST_IN HReg hregLOONGARCH64_R8 ( void ) { return mkHReg(False, HRcInt64, 8, 31); } ++ST_IN HReg hregLOONGARCH64_R9 ( void ) { return mkHReg(False, HRcInt64, 9, 32); } ++ST_IN HReg hregLOONGARCH64_R10 ( void ) { return mkHReg(False, HRcInt64, 10, 33); } ++ST_IN HReg hregLOONGARCH64_R11 ( void ) { return mkHReg(False, HRcInt64, 11, 34); } ++ST_IN HReg hregLOONGARCH64_R12 ( void ) { return mkHReg(False, HRcInt64, 12, 35); } ++ST_IN HReg hregLOONGARCH64_R13 ( void ) { return mkHReg(False, HRcInt64, 13, 36); } ++ST_IN HReg hregLOONGARCH64_R21 ( void ) { return mkHReg(False, HRcInt64, 21, 37); } ++ST_IN HReg hregLOONGARCH64_R22 ( void ) { return mkHReg(False, HRcInt64, 22, 38); } ++ST_IN HReg hregLOONGARCH64_R31 ( void ) { return mkHReg(False, HRcInt64, 31, 39); } ++ ++/* Special registers */ ++ST_IN HReg hregLOONGARCH64_FCSR3 ( void ) { return mkHReg(False, HRcInt32, 3, 40); } ++ ++#undef ST_IN ++ ++#define hregZERO() hregLOONGARCH64_R0() ++#define hregSP() hregLOONGARCH64_R3() ++#define hregT0() hregLOONGARCH64_R12() ++#define hregT1() hregLOONGARCH64_R13() ++#define hregGSP() hregLOONGARCH64_R31() ++ ++extern UInt ppHRegLOONGARCH64 ( HReg reg ); ++ ++/* Number of registers used arg passing in function calls */ ++#define LOONGARCH64_N_ARGREGS 8 /* a0 ... a7 */ ++ ++ ++/* --------- Condition codes, LOONGARCH64 encoding. --------- */ ++typedef enum { ++ LAcc_EQ = 0, /* equal */ ++ LAcc_NE = 1, /* not equal */ ++ ++ LAcc_LT = 2, /* less than (signed) */ ++ LAcc_GE = 3, /* great equal (signed) */ ++ ++ LAcc_LTU = 4, /* less than (unsigned) */ ++ LAcc_GEU = 5, /* great equal (unsigned) */ ++ ++ LAcc_AL = 6 /* always (unconditional) */ ++} LOONGARCH64CondCode; ++ ++ ++/* --------- Memory address expressions (amodes). --------- */ ++ ++typedef enum { ++ LAam_RI, /* Reg + Imm (signed 12-bit or signed 14-bit) */ ++ LAam_RR /* Reg1 + Reg2 */ ++} LOONGARCH64AModeTag; ++ ++typedef struct { ++ LOONGARCH64AModeTag tag; ++ union { ++ struct { ++ HReg base; ++ UShort index; ++ } RI; ++ struct { ++ HReg base; ++ HReg index; ++ } RR; ++ } LAam; ++} LOONGARCH64AMode; ++ ++extern LOONGARCH64AMode* LOONGARCH64AMode_RI ( HReg reg, UShort imm ); ++extern LOONGARCH64AMode* LOONGARCH64AMode_RR ( HReg base, HReg index ); ++ ++ ++/* --------- Operand, which can be reg or imm. --------- */ ++ ++typedef enum { ++ LAri_Reg, ++ LAri_Imm ++} LOONGARCH64RITag; ++ ++typedef struct { ++ LOONGARCH64RITag tag; ++ union { ++ struct { ++ HReg reg; ++ } R; ++ struct { ++ UShort imm; ++ UChar size; // size == 5 || size == 6 || size == 12 ++ Bool isSigned; ++ } I; ++ } LAri; ++} LOONGARCH64RI; ++ ++extern LOONGARCH64RI* LOONGARCH64RI_R ( HReg reg ); ++extern LOONGARCH64RI* LOONGARCH64RI_I ( UShort imm, UChar size, Bool isSigned ); ++ ++ ++/* --------- Instructions. --------- */ ++ ++/* Tags for unary operations */ ++typedef enum { ++ LAun_CLZ_W = 0x00001400, ++ LAun_CTZ_W = 0x00001c00, ++ LAun_CLZ_D = 0x00002400, ++ LAun_CTZ_D = 0x00002c00, ++ LAun_EXT_W_H = 0x00005800, ++ LAun_EXT_W_B = 0x00005c00 ++} LOONGARCH64UnOp; ++ ++/* Tags for binary operations */ ++typedef enum { ++ LAbin_ADD_W = 0x00100000, ++ LAbin_ADD_D = 0x00108000, ++ LAbin_SUB_W = 0x00110000, ++ LAbin_SUB_D = 0x00118000, ++ LAbin_NOR = 0x00140000, ++ LAbin_AND = 0x00148000, ++ LAbin_OR = 0x00150000, ++ LAbin_XOR = 0x00158000, ++ LAbin_SLL_W = 0x00170000, ++ LAbin_SRL_W = 0x00178000, ++ LAbin_SRA_W = 0x00180000, ++ LAbin_SLL_D = 0x00188000, ++ LAbin_SRL_D = 0x00190000, ++ LAbin_SRA_D = 0x00198000, ++ LAbin_MUL_W = 0x001c0000, ++ LAbin_MUL_D = 0x001d8000, ++ LAbin_MULH_W = 0x001c8000, ++ LAbin_MULH_WU = 0x001d0000, ++ LAbin_MULH_D = 0x001e0000, ++ LAbin_MULH_DU = 0x001e8000, ++ LAbin_MULW_D_W = 0x001f0000, ++ LAbin_MULW_D_WU = 0x001f8000, ++ LAbin_DIV_W = 0x00200000, ++ LAbin_MOD_W = 0x00208000, ++ LAbin_DIV_WU = 0x00210000, ++ LAbin_MOD_WU = 0x00218000, ++ LAbin_DIV_D = 0x00220000, ++ LAbin_MOD_D = 0x00228000, ++ LAbin_DIV_DU = 0x00230000, ++ LAbin_MOD_DU = 0x00238000, ++ LAbin_SLLI_W = 0x00408000, ++ LAbin_SLLI_D = 0x00410000, ++ LAbin_SRLI_W = 0x00448000, ++ LAbin_SRLI_D = 0x00450000, ++ LAbin_SRAI_W = 0x00488000, ++ LAbin_SRAI_D = 0x00490000, ++ LAbin_ADDI_W = 0x02800000, ++ LAbin_ADDI_D = 0x02c00000, ++ LAbin_ANDI = 0x03400000, ++ LAbin_ORI = 0x03800000, ++ LAbin_XORI = 0x03c00000 ++} LOONGARCH64BinOp; ++ ++/* Tags for load operations */ ++typedef enum { ++ LAload_LD_W = 0x28800000, ++ LAload_LD_D = 0x28c00000, ++ LAload_LD_BU = 0x2a000000, ++ LAload_LD_HU = 0x2a400000, ++ LAload_LD_WU = 0x2a800000, ++ LAload_LDX_D = 0x380c0000, ++ LAload_LDX_BU = 0x38200000, ++ LAload_LDX_HU = 0x38240000, ++ LAload_LDX_WU = 0x38280000 ++} LOONGARCH64LoadOp; ++ ++/* Tags for store operations */ ++typedef enum { ++ LAstore_ST_B = 0x29000000, ++ LAstore_ST_H = 0x29400000, ++ LAstore_ST_W = 0x29800000, ++ LAstore_ST_D = 0x29c00000, ++ LAstore_STX_B = 0x38100000, ++ LAstore_STX_H = 0x38140000, ++ LAstore_STX_W = 0x38180000, ++ LAstore_STX_D = 0x381c0000 ++} LOONGARCH64StoreOp; ++ ++/* Tags for ll/sc operations */ ++typedef enum { ++ LAllsc_LL_W = 0x20000000, ++ LAllsc_SC_W = 0x21000000, ++ LAllsc_LL_D = 0x22000000, ++ LAllsc_SC_D = 0x23000000 ++} LOONGARCH64LLSCOp; ++ ++/* Tags for barrier operations */ ++typedef enum { ++ LAbar_DBAR = 0x38720000, ++ LAbar_IBAR = 0x38728000 ++} LOONGARCH64BarOp; ++ ++/* Tags for floating point unary operations */ ++typedef enum { ++ LAfpun_FABS_S = 0x01140400, ++ LAfpun_FABS_D = 0x01140800, ++ LAfpun_FNEG_S = 0x01141400, ++ LAfpun_FNEG_D = 0x01141800, ++ LAfpun_FLOGB_S = 0x01142400, ++ LAfpun_FLOGB_D = 0x01142800, ++ LAfpun_FSQRT_S = 0x01144400, ++ LAfpun_FSQRT_D = 0x01144800, ++ LAfpun_FRSQRT_S = 0x01146400, ++ LAfpun_FRSQRT_D = 0x01146800, ++ LAfpun_FCVT_S_D = 0x01191800, ++ LAfpun_FCVT_D_S = 0x01192400, ++ LAfpun_FTINT_W_S = 0x011b0400, ++ LAfpun_FTINT_W_D = 0x011b0800, ++ LAfpun_FTINT_L_S = 0x011b2400, ++ LAfpun_FTINT_L_D = 0x011b2800, ++ LAfpun_FFINT_S_W = 0x011d1000, ++ LAfpun_FFINT_S_L = 0x011d1800, ++ LAfpun_FFINT_D_W = 0x011d2000, ++ LAfpun_FFINT_D_L = 0x011d2800, ++ LAfpun_FRINT_S = 0x011e4400, ++ LAfpun_FRINT_D = 0x011e4800 ++} LOONGARCH64FpUnOp; ++ ++/* Tags for floating point binary operations */ ++typedef enum { ++ LAfpbin_FADD_S = 0x01008000, ++ LAfpbin_FADD_D = 0x01010000, ++ LAfpbin_FSUB_S = 0x01028000, ++ LAfpbin_FSUB_D = 0x01030000, ++ LAfpbin_FMUL_S = 0x01048000, ++ LAfpbin_FMUL_D = 0x01050000, ++ LAfpbin_FDIV_S = 0x01068000, ++ LAfpbin_FDIV_D = 0x01070000, ++ LAfpbin_FMAX_S = 0x01088000, ++ LAfpbin_FMAX_D = 0x01090000, ++ LAfpbin_FMIN_S = 0x010a8000, ++ LAfpbin_FMIN_D = 0x010b0000, ++ LAfpbin_FMAXA_S = 0x010c8000, ++ LAfpbin_FMAXA_D = 0x010d0000, ++ LAfpbin_FMINA_S = 0x010e8000, ++ LAfpbin_FMINA_D = 0x010f0000, ++ LAfpbin_FSCALEB_S = 0x01108000, ++ LAfpbin_FSCALEB_D = 0x01110000 ++} LOONGARCH64FpBinOp; ++ ++/* Tags for floating point trinary operations */ ++typedef enum { ++ LAfpbin_FMADD_S = 0x08100000, ++ LAfpbin_FMADD_D = 0x08200000, ++ LAfpbin_FMSUB_S = 0x08500000, ++ LAfpbin_FMSUB_D = 0x08600000 ++} LOONGARCH64FpTriOp; ++ ++/* Tags for floating point load operations */ ++typedef enum { ++ LAfpload_FLD_S = 0x2b000000, ++ LAfpload_FLD_D = 0x2b800000, ++ LAfpload_FLDX_S = 0x38300000, ++ LAfpload_FLDX_D = 0x38340000 ++} LOONGARCH64FpLoadOp; ++ ++/* Tags for floating point store operations */ ++typedef enum { ++ LAfpstore_FST_S = 0x2b400000, ++ LAfpstore_FST_D = 0x2bc00000, ++ LAfpstore_FSTX_S = 0x38380000, ++ LAfpstore_FSTX_D = 0x383c0000 ++} LOONGARCH64FpStoreOp; ++ ++/* Tags for floating point move operations */ ++typedef enum { ++ LAfpmove_FMOV_S = 0x01149400, ++ LAfpmove_FMOV_D = 0x01149800, ++ LAfpmove_MOVGR2FR_W = 0x0114a400, ++ LAfpmove_MOVGR2FR_D = 0x0114a800, ++ LAfpmove_MOVFR2GR_S = 0x0114b400, ++ LAfpmove_MOVFR2GR_D = 0x0114b800, ++ LAfpmove_MOVGR2FCSR = 0x0114c000, ++ LAfpmove_MOVFCSR2GR = 0x0114c800 ++} LOONGARCH64FpMoveOp; ++ ++/* Tags for floating point compare operations */ ++typedef enum { ++ LAfpcmp_FCMP_CLT_S = 0x0c110000, ++ LAfpcmp_FCMP_CLT_D = 0x0c210000, ++ LAfpcmp_FCMP_CEQ_S = 0x0c120000, ++ LAfpcmp_FCMP_CEQ_D = 0x0c220000, ++ LAfpcmp_FCMP_CUN_S = 0x0c140000, ++ LAfpcmp_FCMP_CUN_D = 0x0c240000 ++} LOONGARCH64FpCmpOp; ++ ++/* Tags for extra operations, we only use them when emiting code directly */ ++typedef enum { ++ LAextra_MOVGR2CF = 0x0114d800, ++ LAextra_MOVCF2GR = 0x0114dc00, ++ LAextra_SLT = 0x00120000, ++ LAextra_SLTU = 0x00128000, ++ LAextra_MASKEQZ = 0x00130000, ++ LAextra_MASKNEZ = 0x00138000, ++ LAextra_SLTI = 0x02000000, ++ LAextra_SLTUI = 0x02400000, ++ LAextra_LU52I_D = 0x03000000, ++ LAextra_FSEL = 0x0d000000, ++ LAextra_LU12I_W = 0x14000000, ++ LAextra_LU32I_D = 0x16000000, ++ LAextra_JIRL = 0x4c000000, ++ LAextra_B = 0x50000000, ++ LAextra_BEQ = 0x58000000, ++ LAextra_BNE = 0x5c000000, ++ LAextra_BGE = 0x64000000 ++} LOONGARCH64ExtraOp; ++ ++/* Tags for instructions */ ++typedef enum { ++ /* Pseudo-insn, used for generating a 64-bit ++ literal to register */ ++ LAin_LI, /* load imm */ ++ ++ /* Integer insns */ ++ LAin_Un, /* unary */ ++ LAin_Bin, /* binary */ ++ LAin_Load, /* load */ ++ LAin_Store, /* store */ ++ LAin_LLSC, /* ll/sc */ ++ LAin_Bar, /* barrier */ ++ ++ /* Floating point insns */ ++ LAin_FpUn, /* floating point unary */ ++ LAin_FpBin, /* floating point binary */ ++ LAin_FpTri, /* floating point trinary */ ++ LAin_FpLoad, /* floating point load */ ++ LAin_FpStore, /* floating point store */ ++ LAin_FpMove, /* floating point move */ ++ LAin_FpCmp, /* floating point compare */ ++ ++ /* Pseudo-insn */ ++ LAin_Cas, /* compare and swap */ ++ LAin_Cmp, /* word compare */ ++ LAin_CMove, /* condition move */ ++ ++ /* Call target (an absolute address), on given ++ condition (which could be LAcc_AL). */ ++ LAin_Call, /* call */ ++ ++ /* The following 5 insns are mandated by translation chaining */ ++ LAin_XDirect, /* direct transfer to GA */ ++ LAin_XIndir, /* indirect transfer to GA */ ++ LAin_XAssisted, /* assisted transfer to GA */ ++ LAin_EvCheck, /* Event check */ ++ LAin_ProfInc /* 64-bit profile counter increment */ ++} LOONGARCH64InstrTag; ++ ++typedef struct { ++ LOONGARCH64InstrTag tag; ++ union { ++ struct { ++ ULong imm; ++ HReg dst; ++ } LI; ++ struct { ++ LOONGARCH64UnOp op; ++ HReg src; ++ HReg dst; ++ } Unary; ++ struct { ++ LOONGARCH64BinOp op; ++ LOONGARCH64RI* src2; ++ HReg src1; ++ HReg dst; ++ } Binary; ++ struct { ++ LOONGARCH64LoadOp op; ++ LOONGARCH64AMode* src; ++ HReg dst; ++ } Load; ++ struct { ++ LOONGARCH64StoreOp op; ++ LOONGARCH64AMode* dst; ++ HReg src; ++ } Store; ++ struct { ++ LOONGARCH64LLSCOp op; ++ Bool isLoad; ++ LOONGARCH64AMode* addr; ++ HReg val; ++ } LLSC; ++ struct { ++ LOONGARCH64BarOp op; ++ UShort hint; ++ } Bar; ++ struct { ++ LOONGARCH64FpUnOp op; ++ HReg src; ++ HReg dst; ++ } FpUnary; ++ struct { ++ LOONGARCH64FpBinOp op; ++ HReg src2; ++ HReg src1; ++ HReg dst; ++ } FpBinary; ++ struct { ++ LOONGARCH64FpTriOp op; ++ HReg src3; ++ HReg src2; ++ HReg src1; ++ HReg dst; ++ } FpTrinary; ++ struct { ++ LOONGARCH64FpLoadOp op; ++ LOONGARCH64AMode* src; ++ HReg dst; ++ } FpLoad; ++ struct { ++ LOONGARCH64FpStoreOp op; ++ LOONGARCH64AMode* dst; ++ HReg src; ++ } FpStore; ++ struct { ++ LOONGARCH64FpMoveOp op; ++ HReg src; ++ HReg dst; ++ } FpMove; ++ struct { ++ LOONGARCH64FpCmpOp op; ++ HReg src2; ++ HReg src1; ++ HReg dst; ++ } FpCmp; ++ struct { ++ HReg old; ++ HReg addr; ++ HReg expd; ++ HReg data; ++ Bool size64; ++ } Cas; ++ struct { ++ LOONGARCH64CondCode cond; ++ HReg dst; ++ HReg src1; ++ HReg src2; ++ } Cmp; ++ struct { ++ HReg cond; ++ HReg r0; ++ HReg r1; ++ HReg dst; ++ Bool isInt; ++ } CMove; ++ struct { ++ HReg cond; ++ Addr64 target; ++ UInt nArgRegs; ++ RetLoc rloc; ++ } Call; ++ struct { ++ Addr64 dstGA; ++ LOONGARCH64AMode* amPC; ++ HReg cond; ++ Bool toFastEP; ++ } XDirect; ++ struct { ++ HReg dstGA; ++ LOONGARCH64AMode* amPC; ++ HReg cond; ++ } XIndir; ++ struct { ++ HReg dstGA; ++ LOONGARCH64AMode* amPC; ++ HReg cond; ++ IRJumpKind jk; ++ } XAssisted; ++ struct { ++ LOONGARCH64AMode* amCounter; ++ LOONGARCH64AMode* amFailAddr; ++ } EvCheck; ++ struct { ++ /* No fields. The address of the counter to inc is ++ installed later, post-translation, by patching it in, ++ as it is not known at translation time. */ ++ } ProfInc; ++ } LAin; ++} LOONGARCH64Instr; ++ ++extern LOONGARCH64Instr* LOONGARCH64Instr_LI ( ULong imm, HReg dst ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_Unary ( LOONGARCH64UnOp op, ++ HReg src, HReg dst ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_Binary ( LOONGARCH64BinOp op, ++ LOONGARCH64RI* src2, ++ HReg src1, HReg dst ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_Load ( LOONGARCH64LoadOp op, ++ LOONGARCH64AMode* src, ++ HReg dst ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_Store ( LOONGARCH64StoreOp op, ++ LOONGARCH64AMode* dst, ++ HReg src ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_LLSC ( LOONGARCH64LLSCOp op, ++ Bool isLoad, ++ LOONGARCH64AMode* addr, ++ HReg val ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_Bar ( LOONGARCH64BarOp op, ++ UShort hint ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_FpUnary ( LOONGARCH64FpUnOp op, ++ HReg src, HReg dst ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_FpBinary ( LOONGARCH64FpBinOp op, ++ HReg src2, HReg src1, ++ HReg dst ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_FpTrinary ( LOONGARCH64FpTriOp op, ++ HReg src3, HReg src2, ++ HReg src1, HReg dst ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_FpLoad ( LOONGARCH64FpLoadOp op, ++ LOONGARCH64AMode* src, ++ HReg dst ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_FpStore ( LOONGARCH64FpStoreOp op, ++ LOONGARCH64AMode* dst, ++ HReg src ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_FpMove ( LOONGARCH64FpMoveOp op, ++ HReg src, HReg dst ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_FpCmp ( LOONGARCH64FpCmpOp op, ++ HReg src2, HReg src1, ++ HReg dst ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_Cas ( HReg old, HReg addr, ++ HReg expd, HReg data, ++ Bool size64 ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_Cmp ( LOONGARCH64CondCode cond, ++ HReg src2, HReg src1, ++ HReg dst ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_CMove ( HReg cond, HReg r0, HReg r1, ++ HReg dst, Bool isInt ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_Call ( HReg cond, Addr64 target, ++ UInt nArgRegs, RetLoc rloc ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_XDirect ( Addr64 dstGA, ++ LOONGARCH64AMode* amPC, ++ HReg cond, Bool toFastEP ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_XIndir ( HReg dstGA, ++ LOONGARCH64AMode* amPC, ++ HReg cond ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_XAssisted ( HReg dstGA, ++ LOONGARCH64AMode* amPC, ++ HReg cond, IRJumpKind jk ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_EvCheck ( LOONGARCH64AMode* amCounter, ++ LOONGARCH64AMode* amFailAddr ); ++extern LOONGARCH64Instr* LOONGARCH64Instr_ProfInc ( void ); ++ ++extern void ppLOONGARCH64Instr ( const LOONGARCH64Instr* i, Bool mode64 ); ++ ++/* Some functions that insulate the register allocator from details ++ of the underlying instruction set. */ ++extern void getRegUsage_LOONGARCH64Instr ( HRegUsage* u, ++ const LOONGARCH64Instr* i, ++ Bool mode64 ); ++extern void mapRegs_LOONGARCH64Instr ( HRegRemap* m, LOONGARCH64Instr* i, ++ Bool mode64 ); ++extern Int emit_LOONGARCH64Instr (/*MB_MOD*/Bool* is_profInc, ++ UChar* buf, ++ Int nbuf, ++ const LOONGARCH64Instr* i, ++ Bool mode64, ++ VexEndness endness_host, ++ const void* disp_cp_chain_me_to_slowEP, ++ const void* disp_cp_chain_me_to_fastEP, ++ const void* disp_cp_xindir, ++ const void* disp_cp_xassisted ); ++ ++extern void genSpill_LOONGARCH64 ( /*OUT*/ HInstr** i1, /*OUT*/ HInstr** i2, ++ HReg rreg, Int offsetB, Bool mode64); ++extern void genReload_LOONGARCH64 ( /*OUT*/ HInstr** i1, /*OUT*/ HInstr** i2, ++ HReg rreg, Int offsetB, Bool mode64); ++extern LOONGARCH64Instr* genMove_LOONGARCH64 ( HReg from, HReg to, ++ Bool mode64 ); ++ ++extern const RRegUniverse* getRRegUniverse_LOONGARCH64 ( void ); ++ ++extern HInstrArray* iselSB_LOONGARCH64 ( const IRSB*, ++ VexArch, ++ const VexArchInfo*, ++ const VexAbiInfo*, ++ Int offs_Host_EvC_Counter, ++ Int offs_Host_EvC_FailAddr, ++ Bool chainingAllowed, ++ Bool addProfInc, ++ Addr max_ga ); ++ ++/* How big is an event check? See case for Min_EvCheck in ++ emit_LOONGARCH64Instr just above. That crosschecks what this returns, ++ so we can tell if we're inconsistent. */ ++extern Int evCheckSzB_LOONGARCH64 ( void ); ++ ++/* NB: what goes on here has to be very closely coordinated with the ++ emitInstr case for XDirect, above. */ ++extern VexInvalRange chainXDirect_LOONGARCH64 ( VexEndness endness_host, ++ void* place_to_chain, ++ const void* disp_cp_chain_me_EXPECTED, ++ const void* place_to_jump_to ); ++ ++/* NB: what goes on here has to be very closely coordinated with the ++ emitInstr case for XDirect, above. */ ++extern VexInvalRange unchainXDirect_LOONGARCH64 ( VexEndness endness_host, ++ void* place_to_unchain, ++ const void* place_to_jump_to_EXPECTED, ++ const void* disp_cp_chain_me ); ++ ++/* Patch the counter address into a profile inc point, as previously ++ created by the Min_ProfInc case for emit_LOONGARCH64Instr. */ ++extern VexInvalRange patchProfInc_LOONGARCH64 ( VexEndness endness_host, ++ void* place_to_patch, ++ const ULong* location_of_counter ); ++ ++#endif /* ndef __VEX_HOST_LOONGARCH64_DEFS_H */ ++ ++ ++/*---------------------------------------------------------------*/ ++/*--- end host-loongarch64_defs.h ---*/ ++/*---------------------------------------------------------------*/ +diff --git a/VEX/priv/host_loongarch64_isel.c b/VEX/priv/host_loongarch64_isel.c +new file mode 100644 +index 000000000..c3c4ac8da +--- /dev/null ++++ b/VEX/priv/host_loongarch64_isel.c +@@ -0,0 +1,2867 @@ ++ ++/*---------------------------------------------------------------*/ ++/*--- begin host_loongarch64_isel.c ---*/ ++/*---------------------------------------------------------------*/ ++ ++/* ++ This file is part of Valgrind, a dynamic binary instrumentation ++ framework. ++ ++ Copyright (C) 2021-2022 Loongson Technology Corporation Limited ++ ++ This program is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License as ++ published by the Free Software Foundation; either version 2 of the ++ License, or (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details-> ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, see . ++ ++ The GNU General Public License is contained in the file COPYING. ++*/ ++ ++#include "libvex_basictypes.h" ++#include "libvex_ir.h" ++#include "libvex.h" ++ ++#include "main_util.h" ++#include "main_globals.h" ++#include "host_generic_regs.h" ++#include "host_loongarch64_defs.h" ++ ++ ++/*---------------------------------------------------------*/ ++/*--- ISelEnv ---*/ ++/*---------------------------------------------------------*/ ++ ++/* This carries around: ++ ++ - A mapping from IRTemp to IRType, giving the type of any IRTemp we ++ might encounter. This is computed before insn selection starts, ++ and does not change. ++ ++ - A mapping from IRTemp to HReg. This tells the insn selector ++ which virtual register is associated with each IRTemp temporary. ++ This is computed before insn selection starts, and does not ++ change. We expect this mapping to map precisely the same set of ++ IRTemps as the type mapping does. ++ ++ |vregmap| holds the primary register for the IRTemp. ++ |vregmapHI| is only used for 128-bit integer-typed ++ IRTemps. It holds the identity of a second ++ 64-bit virtual HReg, which holds the high half ++ of the value. ++ ++ - The code array, that is, the insns selected so far. ++ ++ - A counter, for generating new virtual registers. ++ ++ - The host hardware capabilities word. This is set at the start ++ and does not change. ++ ++ - A Bool for indicating whether we may generate chain-me ++ instructions for control flow transfers, or whether we must use ++ XAssisted. ++ ++ - The maximum guest address of any guest insn in this block. ++ Actually, the address of the highest-addressed byte from any insn ++ in this block. Is set at the start and does not change. This is ++ used for detecting jumps which are definitely forward-edges from ++ this block, and therefore can be made (chained) to the fast entry ++ point of the destination, thereby avoiding the destination's ++ event check. ++ ++ - An IRExpr*, which may be NULL, holding the IR expression (an ++ IRRoundingMode-encoded value) to which the FPU's rounding mode ++ was most recently set. Setting to NULL is always safe. Used to ++ avoid redundant settings of the FPU's rounding mode, as ++ described in set_FPCR_rounding_mode below. ++ ++ Note, this is all (well, mostly) host-independent. ++*/ ++ ++typedef ++ struct { ++ /* Constant -- are set at the start and do not change. */ ++ IRTypeEnv* type_env; ++ ++ HReg* vregmap; ++ HReg* vregmapHI; ++ Int n_vregmap; ++ ++ UInt hwcaps; ++ ++ Bool chainingAllowed; ++ Addr64 max_ga; ++ ++ /* These are modified as we go along. */ ++ HInstrArray* code; ++ Int vreg_ctr; ++ } ++ ISelEnv; ++ ++ ++static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp ) ++{ ++ vassert(tmp < env->n_vregmap); ++ return env->vregmap[tmp]; ++} ++ ++static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO, ++ ISelEnv* env, IRTemp tmp ) ++{ ++ vassert(tmp < env->n_vregmap); ++ vassert(!hregIsInvalid(env->vregmapHI[tmp])); ++ *vrLO = env->vregmap[tmp]; ++ *vrHI = env->vregmapHI[tmp]; ++} ++ ++static void addInstr ( ISelEnv* env, LOONGARCH64Instr* instr ) ++{ ++ addHInstr(env->code, instr); ++ if (vex_traceflags & VEX_TRACE_VCODE) { ++ ppLOONGARCH64Instr(instr, True); ++ vex_printf("\n"); ++ } ++} ++ ++static HReg newVRegI ( ISelEnv* env ) ++{ ++ HReg reg = mkHReg(True/*virtual reg*/, HRcInt64, 0, env->vreg_ctr); ++ env->vreg_ctr++; ++ return reg; ++} ++ ++static HReg newVRegF ( ISelEnv* env ) ++{ ++ HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0, env->vreg_ctr); ++ env->vreg_ctr++; ++ return reg; ++} ++ ++ ++/*---------------------------------------------------------*/ ++/*--- ISEL: Forward declarations ---*/ ++/*---------------------------------------------------------*/ ++ ++/* These are organised as iselXXX and iselXXX_wrk pairs. The ++ iselXXX_wrk do the real work, but are not to be called directly. ++ For each XXX, iselXXX calls its iselXXX_wrk counterpart, then ++ checks that all returned registers are virtual. You should not ++ call the _wrk version directly. ++*/ ++ ++static LOONGARCH64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, ++ IRExpr* e, IRType dty ); ++static LOONGARCH64AMode* iselIntExpr_AMode ( ISelEnv* env, ++ IRExpr* e, IRType dty ); ++ ++static LOONGARCH64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e, ++ UChar size, Bool isSigned ); ++static LOONGARCH64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e, ++ UChar size, Bool isSigned ); ++ ++static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ); ++static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ); ++ ++static HReg iselCondCode_R_wrk ( ISelEnv* env, IRExpr* e ); ++static HReg iselCondCode_R ( ISelEnv* env, IRExpr* e ); ++ ++static void iselInt128Expr_wrk ( HReg* hi, HReg* lo, ++ ISelEnv* env, IRExpr* e ); ++static void iselInt128Expr ( HReg* hi, HReg* lo, ++ ISelEnv* env, IRExpr* e ); ++ ++static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ); ++static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ); ++ ++ ++/*---------------------------------------------------------*/ ++/*--- ISEL: Misc helpers ---*/ ++/*---------------------------------------------------------*/ ++ ++/* Generate move insn */ ++static LOONGARCH64Instr* LOONGARCH64Instr_Move ( HReg to, HReg from ) ++{ ++ LOONGARCH64RI *ri = LOONGARCH64RI_R(hregZERO()); ++ return LOONGARCH64Instr_Binary(LAbin_OR, ri, from, to); ++} ++ ++/* Generate LOONGARCH64AMode from HReg and UInt */ ++static LOONGARCH64AMode* mkLOONGARCH64AMode_RI ( HReg reg, UInt imm ) ++{ ++ vassert(imm < (1 << 12)); ++ return LOONGARCH64AMode_RI(reg, (UShort)imm); ++} ++ ++/* Set floating point rounding mode */ ++static void set_rounding_mode ( ISelEnv* env, IRExpr* mode ) ++{ ++ /* ++ rounding mode | LOONGARCH | IR ++ ------------------------------ ++ to nearest | 00 | 00 ++ to zero | 01 | 11 ++ to +infinity | 10 | 10 ++ to -infinity | 11 | 01 ++ */ ++ ++ /* rm = XOR(rm, (rm << 1)) & 3 */ ++ HReg rm = iselIntExpr_R(env, mode); ++ HReg tmp = newVRegI(env); ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(1, 5, False); ++ LOONGARCH64RI* ri2 = LOONGARCH64RI_R(rm); ++ LOONGARCH64RI* ri3 = LOONGARCH64RI_I(3, 12, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri, rm, tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_XOR, ri2, tmp, rm)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri3, rm, rm)); ++ ++ /* Save old value of FCSR3 */ ++ HReg fcsr = newVRegI(env); ++ addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVFCSR2GR, ++ hregLOONGARCH64_FCSR3(), fcsr)); ++ ++ /* Store old FCSR3 to stack */ ++ LOONGARCH64RI* ri4 = LOONGARCH64RI_I(-4 & 0xfff, 12, True); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_ADDI_D, ri4, hregSP(), hregSP())); ++ LOONGARCH64AMode* am = LOONGARCH64AMode_RI(hregSP(), 0); ++ addInstr(env, LOONGARCH64Instr_Store(LAstore_ST_W, am, fcsr)); ++ ++ /* Set new value of FCSR3 */ ++ LOONGARCH64RI* ri5 = LOONGARCH64RI_I(8, 5, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri5, rm, rm)); ++ addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FCSR, ++ rm, hregLOONGARCH64_FCSR3())); ++} ++ ++static void set_rounding_mode_default ( ISelEnv* env ) ++{ ++ /* Load old FCSR3 from stack */ ++ HReg fcsr = newVRegI(env); ++ LOONGARCH64AMode* am = LOONGARCH64AMode_RI(hregSP(), 0); ++ addInstr(env, LOONGARCH64Instr_Load(LAload_LD_WU, am, fcsr)); ++ ++ /* Restore SP */ ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(4, 12, True); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_ADDI_D, ri, hregSP(), hregSP())); ++ ++ /* Set new value of FCSR3 */ ++ addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FCSR, ++ fcsr, hregLOONGARCH64_FCSR3())); ++} ++ ++/* Convert LOONGARCH FCMP cond to IR result */ ++static HReg convert_cond_to_IR ( ISelEnv* env, HReg src2, HReg src1, Bool size64 ) ++{ ++ HReg tmp = newVRegI(env); ++ HReg dst = newVRegI(env); ++ ++ LOONGARCH64RI* ri1 = LOONGARCH64RI_I(63, 6, False); ++ LOONGARCH64RI* ri2 = LOONGARCH64RI_I(0x45, 12, False); ++ if (size64) ++ addInstr(env, LOONGARCH64Instr_FpCmp(LAfpcmp_FCMP_CUN_D, src2, src1, tmp)); ++ else ++ addInstr(env, LOONGARCH64Instr_FpCmp(LAfpcmp_FCMP_CUN_S, src2, src1, tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri1, tmp, tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRAI_D, ri1, tmp, tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri2, tmp, dst)); ++ ++ LOONGARCH64RI* ri3 = LOONGARCH64RI_I(0x1, 12, False); ++ LOONGARCH64RI* ri4 = LOONGARCH64RI_R(tmp); ++ if (size64) ++ addInstr(env, LOONGARCH64Instr_FpCmp(LAfpcmp_FCMP_CLT_D, src2, src1, tmp)); ++ else ++ addInstr(env, LOONGARCH64Instr_FpCmp(LAfpcmp_FCMP_CLT_S, src2, src1, tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri1, tmp, tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRAI_D, ri1, tmp, tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri3, tmp, tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, ri4, dst, dst)); ++ ++ LOONGARCH64RI* ri5 = LOONGARCH64RI_I(0x40, 12, False); ++ if (size64) ++ addInstr(env, LOONGARCH64Instr_FpCmp(LAfpcmp_FCMP_CEQ_D, src2, src1, tmp)); ++ else ++ addInstr(env, LOONGARCH64Instr_FpCmp(LAfpcmp_FCMP_CEQ_S, src2, src1, tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri1, tmp, tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRAI_D, ri1, tmp, tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri5, tmp, tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, ri4, dst, dst)); ++ ++ return dst; ++} ++ ++ ++/*---------------------------------------------------------*/ ++/*--- ISEL: Function call helpers ---*/ ++/*---------------------------------------------------------*/ ++ ++/* Used only in doHelperCall. See big comment in doHelperCall re ++ handling of register-parameter args. This function figures out ++ whether evaluation of an expression might require use of a fixed ++ register. If in doubt return True (safe but suboptimal). ++*/ ++static Bool mightRequireFixedRegs ( IRExpr* e ) ++{ ++ if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) { ++ // These are always "safe" -- either a copy of SP in some ++ // arbitrary vreg, or a copy of $r31, respectively. ++ return False; ++ } ++ /* Else it's a "normal" expression. */ ++ switch (e->tag) { ++ case Iex_RdTmp: case Iex_Const: case Iex_Get: ++ return False; ++ default: ++ return True; ++ } ++} ++ ++/* Do a complete function call. |guard| is a Ity_Bit expression ++ indicating whether or not the call happens. If guard==NULL, the ++ call is unconditional. |retloc| is set to indicate where the ++ return value is after the call. The caller (of this fn) must ++ generate code to add |stackAdjustAfterCall| to the stack pointer ++ after the call is done. Returns True iff it managed to handle this ++ combination of arg/return types, else returns False. */ ++static Bool doHelperCall( /*OUT*/UInt* stackAdjustAfterCall, ++ /*OUT*/RetLoc* retloc, ++ ISelEnv* env, ++ IRExpr* guard, ++ IRCallee* cee, IRType retTy, IRExpr** args ) ++{ ++ HReg cond; ++ HReg argregs[LOONGARCH64_N_ARGREGS]; ++ HReg tmpregs[LOONGARCH64_N_ARGREGS]; ++ Bool go_fast; ++ Int n_args, i, nextArgReg; ++ Addr64 target; ++ ++ vassert(LOONGARCH64_N_ARGREGS == 8); ++ ++ /* Set default returns. We'll update them later if needed. */ ++ *stackAdjustAfterCall = 0; ++ *retloc = mk_RetLoc_INVALID(); ++ ++ /* These are used for cross-checking that IR-level constraints on ++ the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */ ++ UInt nVECRETs = 0; ++ UInt nGSPTRs = 0; ++ ++ /* Marshal args for a call and do the call. ++ ++ This function only deals with a tiny set of possibilities, which ++ cover all helpers in practice. The restrictions are that only ++ arguments in registers are supported, hence only ++ LOONGARCH64_N_ARGREGS x 64 integer bits in total can be passed. ++ In fact the only supported arg type is I64. ++ ++ The return type can be I{64,32}. We currently do not add vector ++ support. ++ ++ |args| may also contain IRExpr_GSPTR(), in which case the ++ value in $r31 is passed as the corresponding argument. ++ ++ Generating code which is both efficient and correct when ++ parameters are to be passed in registers is difficult, for the ++ reasons elaborated in detail in comments attached to ++ doHelperCall() in priv/host_x86_isel.c. Here, we use a variant ++ of the method described in those comments. ++ ++ The problem is split into two cases: the fast scheme and the ++ slow scheme. In the fast scheme, arguments are computed ++ directly into the target (real) registers. This is only safe ++ when we can be sure that computation of each argument will not ++ trash any real registers set by computation of any other ++ argument. ++ ++ In the slow scheme, all args are first computed into vregs, and ++ once they are all done, they are moved to the relevant real ++ regs. This always gives correct code, but it also gives a bunch ++ of vreg-to-rreg moves which are usually redundant but are hard ++ for the register allocator to get rid of. ++ ++ To decide which scheme to use, all argument expressions are ++ first examined. If they are all so simple that it is clear they ++ will be evaluated without use of any fixed registers, use the ++ fast scheme, else use the slow scheme. Note also that only ++ unconditional calls may use the fast scheme, since having to ++ compute a condition expression could itself trash real ++ registers. ++ ++ Note this requires being able to examine an expression and ++ determine whether or not evaluation of it might use a fixed ++ register. That requires knowledge of how the rest of this insn ++ selector works. Currently just the following 3 are regarded as ++ safe -- hopefully they cover the majority of arguments in ++ practice: IRExpr_Tmp IRExpr_Const IRExpr_Get. ++ */ ++ ++ /* LOONGARCH64 calling convention: up to eight registers ($a0 ... $a7) ++ are allowed to be used for passing integer arguments. They correspond ++ to regs $r4 ... $r11. Note that the cee->regparms field is meaningless ++ on LOONGARCH64 host (since we only implement one calling convention) ++ and so we always ignore it. */ ++ ++ n_args = 0; ++ for (i = 0; args[i]; i++) { ++ IRExpr* arg = args[i]; ++ if (UNLIKELY(arg->tag == Iex_VECRET)) { ++ nVECRETs++; ++ } else if (UNLIKELY(arg->tag == Iex_GSPTR)) { ++ nGSPTRs++; ++ } ++ n_args++; ++ } ++ ++ if (n_args > LOONGARCH64_N_ARGREGS) { ++ vpanic("doHelperCall(loongarch64): cannot currently handle > 8 args"); ++ } ++ ++ argregs[0] = hregLOONGARCH64_R4(); ++ argregs[1] = hregLOONGARCH64_R5(); ++ argregs[2] = hregLOONGARCH64_R6(); ++ argregs[3] = hregLOONGARCH64_R7(); ++ argregs[4] = hregLOONGARCH64_R8(); ++ argregs[5] = hregLOONGARCH64_R9(); ++ argregs[6] = hregLOONGARCH64_R10(); ++ argregs[7] = hregLOONGARCH64_R11(); ++ ++ tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG; ++ tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG; ++ ++ /* First decide which scheme (slow or fast) is to be used. First assume the ++ fast scheme, and select slow if any contraindications (wow) appear. */ ++ ++ go_fast = True; ++ ++ if (guard) { ++ if (guard->tag == Iex_Const ++ && guard->Iex.Const.con->tag == Ico_U1 ++ && guard->Iex.Const.con->Ico.U1 == True) { ++ /* unconditional */ ++ } else { ++ /* Not manifestly unconditional -- be conservative. */ ++ go_fast = False; ++ } ++ } ++ ++ if (go_fast) { ++ for (i = 0; i < n_args; i++) { ++ if (mightRequireFixedRegs(args[i])) { ++ go_fast = False; ++ break; ++ } ++ } ++ } ++ ++ if (go_fast) { ++ if (retTy == Ity_V128 || retTy == Ity_V256) { ++ go_fast = False; ++ vpanic("doHelperCall(loongarch64): currently do not support vector"); ++ } ++ } ++ ++ /* At this point the scheme to use has been established. Generate ++ code to get the arg values into the argument rregs. If we run ++ out of arg regs, give up. */ ++ ++ if (go_fast) { ++ /* FAST SCHEME */ ++ nextArgReg = 0; ++ ++ for (i = 0; i < n_args; i++) { ++ IRExpr* arg = args[i]; ++ ++ IRType aTy = Ity_INVALID; ++ if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) ++ aTy = typeOfIRExpr(env->type_env, args[i]); ++ ++ if (nextArgReg >= LOONGARCH64_N_ARGREGS) ++ return False; /* out of argregs */ ++ ++ if (aTy == Ity_I64) { ++ addInstr(env, LOONGARCH64Instr_Move(argregs[nextArgReg], ++ iselIntExpr_R(env, args[i]))); ++ nextArgReg++; ++ } else if (arg->tag == Iex_GSPTR) { ++ addInstr(env, LOONGARCH64Instr_Move(argregs[nextArgReg], hregGSP())); ++ nextArgReg++; ++ } else if (arg->tag == Iex_VECRET) { ++ // because of the go_fast logic above, we can't get here, ++ // since vector return values makes us use the slow path ++ // instead. ++ vassert(0); ++ } else ++ return False; /* unhandled arg type */ ++ } ++ ++ /* Fast scheme only applies for unconditional calls. Hence: */ ++ cond = INVALID_HREG; ++ } else { ++ /* SLOW SCHEME; move via temporaries */ ++ nextArgReg = 0; ++ ++ for (i = 0; i < n_args; i++) { ++ IRExpr* arg = args[i]; ++ ++ IRType aTy = Ity_INVALID; ++ if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) ++ aTy = typeOfIRExpr(env->type_env, args[i]); ++ ++ if (nextArgReg >= LOONGARCH64_N_ARGREGS) ++ return False; /* out of argregs */ ++ ++ if (aTy == Ity_I64) { ++ tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]); ++ nextArgReg++; ++ } else if (arg->tag == Iex_GSPTR) { ++ tmpregs[nextArgReg] = hregGSP(); ++ nextArgReg++; ++ } else if (arg->tag == Iex_VECRET) { ++ vpanic("doHelperCall(loongarch64): currently do not support vector"); ++ nextArgReg++; ++ } else ++ return False; /* unhandled arg type */ ++ } ++ ++ /* Now we can compute the condition. We can't do it earlier ++ because the argument computations could trash the condition ++ codes. Be a bit clever to handle the common case where the ++ guard is 1:Bit. */ ++ cond = INVALID_HREG; ++ if (guard) { ++ if (guard->tag == Iex_Const ++ && guard->Iex.Const.con->tag == Ico_U1 ++ && guard->Iex.Const.con->Ico.U1 == True) { ++ /* unconditional -- do nothing */ ++ } else { ++ cond = iselCondCode_R(env, guard); ++ } ++ } ++ ++ /* Move the args to their final destinations. */ ++ for (i = 0; i < nextArgReg; i++) { ++ vassert(!(hregIsInvalid(tmpregs[i]))); ++ /* None of these insns, including any spill code that might ++ be generated, may alter the condition codes. */ ++ addInstr(env, LOONGARCH64Instr_Move(argregs[i], tmpregs[i])); ++ } ++ } ++ ++ /* Should be assured by checks above */ ++ vassert(nextArgReg <= LOONGARCH64_N_ARGREGS); ++ ++ /* Do final checks, set the return values, and generate the call ++ instruction proper. */ ++ vassert(nGSPTRs == 0 || nGSPTRs == 1); ++ vassert(nVECRETs == ((retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0)); ++ vassert(*stackAdjustAfterCall == 0); ++ vassert(is_RetLoc_INVALID(*retloc)); ++ switch (retTy) { ++ case Ity_INVALID: ++ /* Function doesn't return a value. */ ++ *retloc = mk_RetLoc_simple(RLPri_None); ++ break; ++ case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64: ++ *retloc = mk_RetLoc_simple(RLPri_Int); ++ break; ++ case Ity_V128: ++ case Ity_V256: ++ vpanic("doHelperCall(loongarch64): currently do not support vector"); ++ break; ++ default: ++ /* IR can denote other possible return types, but we don't ++ handle those here. */ ++ vassert(0); ++ } ++ ++ /* Finally, generate the call itself. This needs the *retloc value ++ set in the switch above, which is why it's at the end. */ ++ ++ /* nextArgReg doles out argument registers. Since these are ++ assigned in the order $a0 .. $a7, its numeric value at this point, ++ which must be between 0 and 8 inclusive, is going to be equal to ++ the number of arg regs in use for the call. Hence bake that ++ number into the call (we'll need to know it when doing register ++ allocation, to know what regs the call reads.) */ ++ ++ target = (Addr)cee->addr; ++ addInstr(env, LOONGARCH64Instr_Call(cond, target, nextArgReg, *retloc)); ++ ++ return True; /* success */ ++} ++ ++ ++/*---------------------------------------------------------*/ ++/*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/ ++/*---------------------------------------------------------*/ ++ ++/* Select insns for an integer-typed expression, and add them to the ++ code list. Return a reg holding the result. This reg will be a ++ virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you ++ want to modify it, ask for a new vreg, copy it in there, and modify ++ the copy. The register allocator will do its best to map both ++ vregs to the same real register, so the copies will often disappear ++ later in the game. ++ ++ This should handle expressions of 64, 32, 16 and 8-bit type. ++ All results are returned in a (mode64 ? 64bit : 32bit) register. ++ For 16- and 8-bit expressions, the upper (32/48/56 : 16/24) bits ++ are arbitrary, so you should mask or sign extend partial values ++ if necessary. ++*/ ++ ++/* --------------------- AMode --------------------- */ ++ ++static LOONGARCH64AMode* iselIntExpr_AMode ( ISelEnv* env, ++ IRExpr* e, IRType dty ) ++{ ++ LOONGARCH64AMode* am = iselIntExpr_AMode_wrk(env, e, dty); ++ ++ /* sanity checks ... */ ++ switch (am->tag) { ++ case LAam_RI: ++ vassert(am->LAam.RI.index < (1 << 11)); /* The sign bit (bit 12) must be 0. */ ++ vassert(hregClass(am->LAam.RI.base) == HRcInt64); ++ vassert(hregIsVirtual(am->LAam.RI.base)); ++ break; ++ case LAam_RR: ++ vassert(hregClass(am->LAam.RR.base) == HRcInt64); ++ vassert(hregIsVirtual(am->LAam.RR.base)); ++ vassert(hregClass(am->LAam.RR.index) == HRcInt64); ++ vassert(hregIsVirtual(am->LAam.RR.index)); ++ break; ++ default: ++ vpanic("iselIntExpr_AMode: unknown LOONGARCH64 AMode tag"); ++ break; ++ } ++ ++ return am; ++} ++ ++/* DO NOT CALL THIS DIRECTLY ! */ ++static LOONGARCH64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, ++ IRExpr* e, IRType dty ) ++{ ++ IRType ty = typeOfIRExpr(env->type_env, e); ++ vassert(e); ++ vassert(ty == Ity_I64); ++ ++ /* Add64(expr, i), where i <= 0x7ff */ ++ if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_Add64 ++ && e->Iex.Binop.arg2->tag == Iex_Const ++ && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64 ++ && e->Iex.Binop.arg2->Iex.Const.con->Ico.U64 <= 0x7ff) { ++ return LOONGARCH64AMode_RI(iselIntExpr_R(env, e->Iex.Binop.arg1), ++ (UShort)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64); ++ } ++ ++ /* Add64(expr, expr) */ ++ if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_Add64) { ++ HReg base = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ HReg index = iselIntExpr_R(env, e->Iex.Binop.arg2); ++ return LOONGARCH64AMode_RR(base, index); ++ } ++ ++ /* Doesn't match anything in particular. Generate it into ++ a register and use that. */ ++ return LOONGARCH64AMode_RI(iselIntExpr_R(env, e), 0); ++} ++ ++/* --------------------- RI --------------------- */ ++ ++static LOONGARCH64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e, ++ UChar size, Bool isSigned ) ++{ ++ LOONGARCH64RI* ri = iselIntExpr_RI_wrk(env, e, size, isSigned); ++ ++ /* sanity checks ... */ ++ switch (ri->tag) { ++ case LAri_Imm: ++ vassert(ri->LAri.I.size == 5 || ri->LAri.I.size == 6 ++ || ri->LAri.I.size == 12); ++ if (ri->LAri.I.size == 5) { ++ vassert(ri->LAri.I.isSigned == False); ++ vassert(ri->LAri.I.imm < (1 << 5)); ++ } else if (ri->LAri.I.size == 6) { ++ vassert(ri->LAri.I.isSigned == False); ++ vassert(ri->LAri.I.imm < (1 << 6)); ++ } else { ++ vassert(ri->LAri.I.imm < (1 << 12)); ++ } ++ break; ++ case LAri_Reg: ++ vassert(hregClass(ri->LAri.R.reg) == HRcInt64); ++ vassert(hregIsVirtual(ri->LAri.R.reg)); ++ break; ++ default: ++ vpanic("iselIntExpr_RI: unknown LOONGARCH64 RI tag"); ++ break; ++ } ++ ++ return ri; ++} ++ ++/* DO NOT CALL THIS DIRECTLY ! */ ++static LOONGARCH64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e, ++ UChar size, Bool isSigned ) ++{ ++ IRType ty = typeOfIRExpr(env->type_env, e); ++ vassert(e); ++ vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32 || ty == Ity_I64); ++ ++ LOONGARCH64RI *ri = NULL; ++ ++ /* special case: immediate */ ++ if (e->tag == Iex_Const) { ++ switch (e->Iex.Const.con->tag) { ++ case Ico_U32: ++ if (!isSigned && e->Iex.Const.con->Ico.U32 < (1 << size)) { ++ UShort imm = e->Iex.Const.con->Ico.U32; ++ ri = LOONGARCH64RI_I(imm, size, isSigned); ++ } ++ break; ++ case Ico_U64: ++ if (!isSigned && e->Iex.Const.con->Ico.U64 < (1 << size)) { ++ UShort imm = e->Iex.Const.con->Ico.U64; ++ ri = LOONGARCH64RI_I(imm, size, isSigned); ++ } ++ break; ++ default: ++ break; ++ } ++ /* else fail, fall through to default case */ ++ } ++ ++ if (ri == NULL) { ++ /* default case: calculate into a register and return that */ ++ HReg reg = iselIntExpr_R(env, e); ++ ri = LOONGARCH64RI_R(reg); ++ } ++ ++ return ri; ++} ++ ++/* --------------------- Reg --------------------- */ ++ ++static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ) ++{ ++ HReg r = iselIntExpr_R_wrk(env, e); ++ ++ /* sanity checks ... */ ++ vassert(hregClass(r) == HRcInt64); ++ vassert(hregIsVirtual(r)); ++ ++ return r; ++} ++ ++/* DO NOT CALL THIS DIRECTLY ! */ ++static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) ++{ ++ IRType ty = typeOfIRExpr(env->type_env, e); ++ vassert(e); ++ vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32 || ty == Ity_I64); ++ ++ switch (e->tag) { ++ /* --------- TEMP --------- */ ++ case Iex_RdTmp: ++ return lookupIRTemp(env, e->Iex.RdTmp.tmp); ++ ++ /* --------- LOAD --------- */ ++ case Iex_Load: { ++ if (e->Iex.Load.end != Iend_LE) ++ goto irreducible; ++ ++ LOONGARCH64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr, ty); ++ HReg dst = newVRegI(env); ++ LOONGARCH64LoadOp op; ++ switch(ty) { ++ case Ity_I8: ++ op = (am->tag == LAam_RI) ? LAload_LD_BU : LAload_LDX_BU; ++ break; ++ case Ity_I16: ++ op = (am->tag == LAam_RI) ? LAload_LD_HU : LAload_LDX_HU; ++ break; ++ case Ity_I32: ++ op = (am->tag == LAam_RI) ? LAload_LD_WU : LAload_LDX_WU; ++ break; ++ case Ity_I64: ++ op = (am->tag == LAam_RI) ? LAload_LD_D : LAload_LDX_D; ++ break; ++ default: ++ goto irreducible; ++ } ++ addInstr(env, LOONGARCH64Instr_Load(op, am, dst)); ++ return dst; ++ } ++ ++ /* --------- BINARY OP --------- */ ++ case Iex_Binop: { ++ switch (e->Iex.Binop.op) { ++ case Iop_32HLto64: { ++ HReg dst = newVRegI(env); ++ HReg hi = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* lo = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False); ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(32, 6, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri, hi, dst)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, lo, dst, dst)); ++ return dst; ++ } ++ case Iop_Add32: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 12, True); ++ LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_ADD_W : LAbin_ADDI_W; ++ addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_Add64: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 12, True); ++ LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_ADD_D : LAbin_ADDI_D; ++ addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_And32: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 12, False); ++ LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_AND : LAbin_ANDI; ++ addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_And64: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 12, False); ++ LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_AND : LAbin_ANDI; ++ addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_DivModS32to32: { ++ HReg dst = newVRegI(env); ++ HReg tmp = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2); ++ LOONGARCH64RI* ri1 = LOONGARCH64RI_I(0, 5, False); ++ LOONGARCH64RI* ri2 = LOONGARCH64RI_R(src2); ++ LOONGARCH64RI* ri3 = LOONGARCH64RI_I(32, 6, False); ++ LOONGARCH64RI* ri4 = LOONGARCH64RI_R(tmp); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri1, src1, src1)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri1, src2, src2)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_DIV_W, ri2, src1, dst)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_MOD_W, ri2, src1, tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri3, tmp, tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri3, dst, dst)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRLI_D, ri3, dst, dst)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, ri4, dst, dst)); ++ return dst; ++ } ++ case Iop_DivModU32to32: { ++ HReg dst = newVRegI(env); ++ HReg tmp = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2); ++ LOONGARCH64RI* ri1 = LOONGARCH64RI_I(0, 5, False); ++ LOONGARCH64RI* ri2 = LOONGARCH64RI_R(src2); ++ LOONGARCH64RI* ri3 = LOONGARCH64RI_I(32, 6, False); ++ LOONGARCH64RI* ri4 = LOONGARCH64RI_R(tmp); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri1, src1, src1)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri1, src2, src2)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_DIV_WU, ri2, src1, dst)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_MOD_WU, ri2, src1, tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri3, tmp, tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri3, dst, dst)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRLI_D, ri3, dst, dst)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, ri4, dst, dst)); ++ return dst; ++ } ++ case Iop_DivS32: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2); ++ LOONGARCH64RI* ri1 = LOONGARCH64RI_I(0, 5, False); ++ LOONGARCH64RI* ri2 = LOONGARCH64RI_R(src2); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri1, src1, src1)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri1, src2, src2)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_DIV_W, ri2, src1, dst)); ++ return dst; ++ } ++ case Iop_DivS64: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_DIV_D, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_DivU32: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2); ++ LOONGARCH64RI* ri1 = LOONGARCH64RI_I(0, 5, False); ++ LOONGARCH64RI* ri2 = LOONGARCH64RI_R(src2); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri1, src1, src1)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri1, src2, src2)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_DIV_WU, ri2, src1, dst)); ++ return dst; ++ } ++ case Iop_DivU64: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_DIV_DU, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_CmpF32: { ++ HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1); ++ HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2); ++ return convert_cond_to_IR(env, src2, src1, False); ++ } ++ case Iop_CmpF64: { ++ HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1); ++ HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2); ++ return convert_cond_to_IR(env, src2, src1, True); ++ } ++ case Iop_F32toI32S: { ++ HReg tmp = newVRegF(env); ++ HReg dst = newVRegI(env); ++ HReg src = iselFltExpr(env, e->Iex.Binop.arg2); ++ set_rounding_mode(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FTINT_W_S, src, tmp)); ++ set_rounding_mode_default(env); ++ addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVFR2GR_S, tmp, dst)); ++ return dst; ++ } ++ case Iop_F32toI64S: { ++ HReg tmp = newVRegF(env); ++ HReg dst = newVRegI(env); ++ HReg src = iselFltExpr(env, e->Iex.Binop.arg2); ++ set_rounding_mode(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FTINT_L_S, src, tmp)); ++ set_rounding_mode_default(env); ++ addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVFR2GR_D, tmp, dst)); ++ return dst; ++ } ++ case Iop_F64toI32S: { ++ HReg tmp = newVRegF(env); ++ HReg dst = newVRegI(env); ++ HReg src = iselFltExpr(env, e->Iex.Binop.arg2); ++ set_rounding_mode(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FTINT_W_D, src, tmp)); ++ set_rounding_mode_default(env); ++ addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVFR2GR_S, tmp, dst)); ++ return dst; ++ } ++ case Iop_F64toI64S: { ++ HReg tmp = newVRegF(env); ++ HReg dst = newVRegI(env); ++ HReg src = iselFltExpr(env, e->Iex.Binop.arg2); ++ set_rounding_mode(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FTINT_L_D, src, tmp)); ++ set_rounding_mode_default(env); ++ addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVFR2GR_D, tmp, dst)); ++ return dst; ++ } ++ case Iop_Max32U: { ++ HReg cond = newVRegI(env); ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2); ++ addInstr(env, LOONGARCH64Instr_Cmp(LAcc_LTU, src2, src1, cond)); ++ addInstr(env, LOONGARCH64Instr_CMove(cond, src1, src2, dst, True)); ++ return dst; ++ } ++ case Iop_MullS32: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_MULW_D_W, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_MullU32: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_MULW_D_WU, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_Or32: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 12, False); ++ LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_OR : LAbin_ORI; ++ addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_Or64: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 12, False); ++ LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_OR : LAbin_ORI; ++ addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_Sar32: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 5, False); ++ LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_SRA_W : LAbin_SRAI_W; ++ addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_Sar64: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 6, False); ++ LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_SRA_D : LAbin_SRAI_D; ++ addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_Shl32: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 5, False); ++ LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_SLL_W : LAbin_SLLI_W; ++ addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_Shl64: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 6, False); ++ LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_SLL_D : LAbin_SLLI_D; ++ addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_Shr32: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 5, False); ++ LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_SRL_W : LAbin_SRLI_W; ++ addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_Shr64: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 6, False); ++ LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_SRL_D : LAbin_SRLI_D; ++ addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_Sub32: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SUB_W, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_Sub64: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SUB_D, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_Xor32: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 12, False); ++ LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_XOR : LAbin_XORI; ++ addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_Xor64: { ++ HReg dst = newVRegI(env); ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 12, False); ++ LOONGARCH64BinOp op = (src2->tag == LAri_Reg) ? LAbin_XOR : LAbin_XORI; ++ addInstr(env, LOONGARCH64Instr_Binary(op, src2, src1, dst)); ++ return dst; ++ } ++ default: ++ goto irreducible; ++ } ++ } ++ ++ /* --------- UNARY OP --------- */ ++ case Iex_Unop: { ++ switch (e->Iex.Unop.op) { ++ case Iop_128HIto64: { ++ HReg hi, lo; ++ iselInt128Expr(&hi, &lo, env, e->Iex.Unop.arg); ++ return hi; ++ } ++ case Iop_128to64: { ++ HReg hi, lo; ++ iselInt128Expr(&hi, &lo, env, e->Iex.Unop.arg); ++ return lo; ++ } ++ case Iop_16Sto64: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ addInstr(env, LOONGARCH64Instr_Unary(LAun_EXT_W_H, src, dst)); ++ return dst; ++ } ++ case Iop_16Uto64: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(48, 6, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri, src, dst)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRLI_D, ri, dst, dst)); ++ return dst; ++ } ++ case Iop_1Sto32: { ++ HReg dst = newVRegI(env); ++ HReg src = iselCondCode_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(63, 6, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri, src, dst)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRAI_D, ri, dst, dst)); ++ return dst; ++ } ++ case Iop_1Sto64: { ++ HReg dst = newVRegI(env); ++ HReg src = iselCondCode_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(63, 6, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri, src, dst)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRAI_D, ri, dst, dst)); ++ return dst; ++ } ++ case Iop_1Uto64: { ++ HReg dst = newVRegI(env); ++ HReg src = iselCondCode_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(0x1, 12, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri, src, dst)); ++ return dst; ++ } ++ case Iop_1Uto8: { ++ HReg dst = newVRegI(env); ++ HReg src = iselCondCode_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(0x1, 12, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri, src, dst)); ++ return dst; ++ } ++ case Iop_32Sto64: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(0, 5, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri, src, dst)); ++ return dst; ++ } ++ case Iop_32Uto64: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(32, 6, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri, src, dst)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRLI_D, ri, dst, dst)); ++ return dst; ++ } ++ case Iop_32to8: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(0xff, 12, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri, src, dst)); ++ return dst; ++ } ++ case Iop_64HIto32: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(32, 6, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRLI_D, ri, src, dst)); ++ return dst; ++ } ++ case Iop_64to32: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(32, 6, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri, src, dst)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRLI_D, ri, dst, dst)); ++ return dst; ++ } ++ case Iop_64to8: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(0xff, 12, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri, src, dst)); ++ return dst; ++ } ++ case Iop_8Sto64: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ addInstr(env, LOONGARCH64Instr_Unary(LAun_EXT_W_B, src, dst)); ++ return dst; ++ } ++ case Iop_8Uto32: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(0xff, 12, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri, src, dst)); ++ return dst; ++ } ++ case Iop_8Uto64: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(0xff, 12, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_ANDI, ri, src, dst)); ++ return dst; ++ } ++ case Iop_CmpwNEZ32: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(63, 6, False); ++ addInstr(env, LOONGARCH64Instr_Cmp(LAcc_NE, hregZERO(), src, dst)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri, dst, dst)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRAI_D, ri, dst, dst)); ++ return dst; ++ } ++ case Iop_CmpwNEZ64: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(63, 6, False); ++ addInstr(env, LOONGARCH64Instr_Cmp(LAcc_NE, hregZERO(), src, dst)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_D, ri, dst, dst)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SRAI_D, ri, dst, dst)); ++ return dst; ++ } ++ case Iop_Clz32: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ addInstr(env, LOONGARCH64Instr_Unary(LAun_CLZ_W, src, dst)); ++ return dst; ++ } ++ case Iop_Clz64: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ addInstr(env, LOONGARCH64Instr_Unary(LAun_CLZ_D, src, dst)); ++ return dst; ++ } ++ case Iop_Ctz32: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ addInstr(env, LOONGARCH64Instr_Unary(LAun_CTZ_W, src, dst)); ++ return dst; ++ } ++ case Iop_Ctz64: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ addInstr(env, LOONGARCH64Instr_Unary(LAun_CTZ_D, src, dst)); ++ return dst; ++ } ++ case Iop_Left16: { ++ HReg tmp = newVRegI(env); ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_R(src); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SUB_D, ri, hregZERO(), tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, ri, tmp, dst)); ++ return dst; ++ } ++ case Iop_Left32: { ++ HReg tmp = newVRegI(env); ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_R(src); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SUB_D, ri, hregZERO(), tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, ri, tmp, dst)); ++ return dst; ++ } ++ case Iop_Left64: { ++ HReg tmp = newVRegI(env); ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_R(src); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SUB_D, ri, hregZERO(), tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, ri, tmp, dst)); ++ return dst; ++ } ++ case Iop_Left8: { ++ HReg tmp = newVRegI(env); ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_R(src); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SUB_D, ri, hregZERO(), tmp)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, ri, tmp, dst)); ++ return dst; ++ } ++ case Iop_ReinterpF32asI32: { ++ HReg dst = newVRegI(env); ++ HReg src = iselFltExpr(env, e->Iex.Unop.arg); ++ addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVFR2GR_S, src, dst)); ++ return dst; ++ } ++ case Iop_ReinterpF64asI64: { ++ HReg dst = newVRegI(env); ++ HReg src = iselFltExpr(env, e->Iex.Unop.arg); ++ addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVFR2GR_D, src, dst)); ++ return dst; ++ } ++ case Iop_Not32: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_R(hregZERO()); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_NOR, ri, src, dst)); ++ return dst; ++ } ++ case Iop_Not64: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_R(hregZERO()); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_NOR, ri, src, dst)); ++ return dst; ++ } ++ default: ++ goto irreducible; ++ } ++ } ++ ++ /* --------- GET --------- */ ++ case Iex_Get: { ++ HReg dst = newVRegI(env); ++ if (e->Iex.Get.offset < 1024) { ++ LOONGARCH64AMode* am = LOONGARCH64AMode_RI(hregGSP(), e->Iex.Get.offset); ++ LOONGARCH64LoadOp op; ++ switch(ty) { ++ case Ity_I8: ++ op = LAload_LD_BU; ++ break; ++ case Ity_I16: ++ op = LAload_LD_HU; ++ break; ++ case Ity_I32: ++ op = LAload_LD_WU; ++ break; ++ case Ity_I64: ++ op = LAload_LD_D; ++ break; ++ default: ++ goto irreducible; ++ } ++ addInstr(env, LOONGARCH64Instr_Load(op, am, dst)); ++ } else { ++ HReg tmp = newVRegI(env); ++ LOONGARCH64AMode* am = LOONGARCH64AMode_RR(hregGSP(), tmp); ++ LOONGARCH64LoadOp op; ++ switch(ty) { ++ case Ity_I8: ++ op = LAload_LDX_BU; ++ break; ++ case Ity_I16: ++ op = LAload_LDX_HU; ++ break; ++ case Ity_I32: ++ op = LAload_LDX_WU; ++ break; ++ case Ity_I64: ++ op = LAload_LDX_D; ++ break; ++ default: ++ goto irreducible; ++ } ++ addInstr(env, LOONGARCH64Instr_LI(e->Iex.Get.offset, tmp)); ++ addInstr(env, LOONGARCH64Instr_Load(op, am, dst)); ++ } ++ return dst; ++ } ++ ++ /* --------- CCALL --------- */ ++ case Iex_CCall: { ++ HReg dst = newVRegI(env); ++ vassert(ty == e->Iex.CCall.retty); ++ ++ /* be very restrictive for now. Only 64-bit ints allowed for ++ args, and 64 bits for return type. Don't forget to change ++ the RetLoc if more types are allowed in future. */ ++ if (e->Iex.CCall.retty != Ity_I64) ++ goto irreducible; ++ ++ /* Marshal args, do the call, clear stack. */ ++ UInt addToSp = 0; ++ RetLoc rloc = mk_RetLoc_INVALID(); ++ Bool ok = doHelperCall(&addToSp, &rloc, env, NULL, ++ e->Iex.CCall.cee, e->Iex.CCall.retty, ++ e->Iex.CCall.args); ++ ++ if (ok) { ++ vassert(is_sane_RetLoc(rloc)); ++ vassert(rloc.pri == RLPri_Int); ++ vassert(addToSp == 0); ++ addInstr(env, LOONGARCH64Instr_Move(dst, hregLOONGARCH64_R4())); ++ return dst; ++ } ++ goto irreducible; ++ } ++ ++ /* --------- LITERAL --------- */ ++ /* 64-bit literals */ ++ case Iex_Const: { ++ ULong imm = 0; ++ HReg dst = newVRegI(env); ++ switch (e->Iex.Const.con->tag) { ++ case Ico_U64: ++ imm = e->Iex.Const.con->Ico.U64; ++ break; ++ case Ico_U32: ++ imm = e->Iex.Const.con->Ico.U32; ++ break; ++ case Ico_U16: ++ imm = e->Iex.Const.con->Ico.U16; ++ break; ++ case Ico_U8: ++ imm = e->Iex.Const.con->Ico.U8; ++ break; ++ default: ++ ppIRExpr(e); ++ vpanic("iselIntExpr_R.Iex_Const(loongarch64)"); ++ } ++ addInstr(env, LOONGARCH64Instr_LI(imm, dst)); ++ return dst; ++ } ++ ++ case Iex_ITE: { ++ HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse); ++ HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue); ++ HReg cond = iselCondCode_R(env, e->Iex.ITE.cond); ++ HReg dst = newVRegI(env); ++ addInstr(env, LOONGARCH64Instr_CMove(cond, r0, r1, dst, True)); ++ return dst; ++ } ++ ++ default: ++ break; ++ } ++ ++ /* We get here if no pattern matched. */ ++irreducible: ++ ppIRExpr(e); ++ vpanic("iselIntExpr_R(loongarch64): cannot reduce tree"); ++} ++ ++/* ------------------- CondCode ------------------- */ ++ ++/* Generate code to evaluated a bit-typed expression, returning the ++ condition code which would correspond when the expression would ++ notionally have returned 1. */ ++ ++static HReg iselCondCode_R ( ISelEnv* env, IRExpr* e ) ++{ ++ HReg r = iselCondCode_R_wrk(env, e); ++ ++ /* sanity checks ... */ ++ vassert(hregClass(r) == HRcInt64); ++ vassert(hregIsVirtual(r)); ++ ++ return r; ++} ++ ++/* DO NOT CALL THIS DIRECTLY ! */ ++static HReg iselCondCode_R_wrk ( ISelEnv* env, IRExpr* e ) ++{ ++ vassert(e); ++ vassert(typeOfIRExpr(env->type_env, e) == Ity_I1); ++ ++ HReg dst = newVRegI(env); ++ ++ /* var */ ++ if (e->tag == Iex_RdTmp) { ++ HReg tmp = newVRegI(env); ++ dst = lookupIRTemp(env, e->Iex.RdTmp.tmp); ++ addInstr(env, LOONGARCH64Instr_LI(1, tmp)); ++ addInstr(env, LOONGARCH64Instr_Cmp(LAcc_EQ, dst, tmp, dst)); ++ return dst; ++ } ++ ++ /* const */ ++ if (e->tag == Iex_Const && e->Iex.Const.con->tag == Ico_U1) { ++ UInt imm = e->Iex.Const.con->Ico.U1; ++ addInstr(env, LOONGARCH64Instr_LI(imm, dst)); ++ return dst; ++ } ++ ++ if (e->tag == Iex_Unop) { ++ if (e->Iex.Unop.op == Iop_Not1) { ++ HReg src = iselCondCode_R(env, e->Iex.Unop.arg); ++ LOONGARCH64RI* ri = LOONGARCH64RI_R(hregZERO()); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_NOR, ri, src, dst)); ++ return dst; ++ } ++ ++ LOONGARCH64CondCode cc; ++ switch (e->Iex.Unop.op) { ++ case Iop_CmpNEZ16: ++ cc = LAcc_NE; ++ break; ++ case Iop_CmpNEZ32: ++ cc = LAcc_NE; ++ break; ++ case Iop_CmpNEZ64: ++ cc = LAcc_NE; ++ break; ++ case Iop_CmpNEZ8: ++ cc = LAcc_NE; ++ break; ++ default: ++ goto irreducible; ++ } ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ addInstr(env, LOONGARCH64Instr_Cmp(cc, hregZERO(), src, dst)); ++ return dst; ++ } ++ ++ if (e->tag == Iex_Binop) { ++ if (e->Iex.Binop.op == Iop_And1) { ++ HReg src1 = iselCondCode_R(env, e->Iex.Binop.arg1); ++ HReg src2 = iselCondCode_R(env, e->Iex.Binop.arg2); ++ LOONGARCH64RI* ri = LOONGARCH64RI_R(src2); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_AND, ri, src1, dst)); ++ return dst; ++ } else if (e->Iex.Binop.op == Iop_Or1) { ++ HReg src1 = iselCondCode_R(env, e->Iex.Binop.arg1); ++ HReg src2 = iselCondCode_R(env, e->Iex.Binop.arg2); ++ LOONGARCH64RI* ri = LOONGARCH64RI_R(src2); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_OR, ri, src1, dst)); ++ return dst; ++ } ++ ++ Bool extend = False; ++ Bool reverse = False; ++ LOONGARCH64CondCode cc; ++ switch (e->Iex.Binop.op) { ++ case Iop_CasCmpEQ32: ++ cc = LAcc_EQ; ++ break; ++ case Iop_CasCmpEQ64: ++ cc = LAcc_EQ; ++ break; ++ case Iop_CasCmpNE32: ++ cc = LAcc_NE; ++ break; ++ case Iop_CasCmpNE64: ++ cc = LAcc_NE; ++ break; ++ case Iop_CmpEQ32: ++ cc = LAcc_EQ; ++ break; ++ case Iop_CmpEQ64: ++ cc = LAcc_EQ; ++ break; ++ case Iop_CmpLE32S: ++ cc = LAcc_GE; ++ reverse = True; ++ break; ++ case Iop_CmpLE32U: ++ cc = LAcc_GEU; ++ reverse = True; ++ break; ++ case Iop_CmpLE64S: ++ cc = LAcc_GE; ++ reverse = True; ++ break; ++ case Iop_CmpLE64U: ++ cc = LAcc_GEU; ++ reverse = True; ++ break; ++ case Iop_CmpLT32S: ++ cc = LAcc_LT; ++ extend = True; ++ break; ++ case Iop_CmpLT32U: ++ cc = LAcc_LTU; ++ extend = True; ++ break; ++ case Iop_CmpLT64S: ++ cc = LAcc_LT; ++ break; ++ case Iop_CmpLT64U: ++ cc = LAcc_LTU; ++ break; ++ case Iop_CmpNE32: ++ cc = LAcc_NE; ++ break; ++ case Iop_CmpNE64: ++ cc = LAcc_NE; ++ break; ++ default: ++ goto irreducible; ++ } ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2); ++ if (extend) { ++ /* Sign-extend */ ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(0, 5, False); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri, src1, src1)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_SLLI_W, ri, src2, src2)); ++ } ++ if (reverse) { ++ addInstr(env, LOONGARCH64Instr_Cmp(cc, src1, src2, dst)); ++ } else { ++ addInstr(env, LOONGARCH64Instr_Cmp(cc, src2, src1, dst)); ++ } ++ return dst; ++ } ++ ++ /* We get here if no pattern matched. */ ++irreducible: ++ ppIRExpr(e); ++ vpanic("iselCondCode(loongarch64): cannot reduce tree"); ++} ++ ++ ++/*---------------------------------------------------------*/ ++/*--- ISEL: Integer expressions (128 bit) ---*/ ++/*---------------------------------------------------------*/ ++ ++/* Compute a 128-bit value into a register pair, which is returned as ++ the first two parameters. As with iselIntExpr_R, these may be ++ either real or virtual regs; in any case they must not be changed ++ by subsequent code emitted by the caller. */ ++ ++static void iselInt128Expr (HReg* hi, HReg* lo, ISelEnv* env, IRExpr* e) ++{ ++ iselInt128Expr_wrk(hi, lo, env, e); ++ ++ /* sanity checks ... */ ++ vassert(hregClass(*hi) == HRcInt64); ++ vassert(hregIsVirtual(*hi)); ++ vassert(hregClass(*lo) == HRcInt64); ++ vassert(hregIsVirtual(*lo)); ++} ++ ++/* DO NOT CALL THIS DIRECTLY ! */ ++static void iselInt128Expr_wrk (HReg* hi, HReg* lo, ISelEnv* env, IRExpr* e) ++{ ++ vassert(e); ++ vassert(typeOfIRExpr(env->type_env, e) == Ity_I128); ++ ++ /* --------- TEMP --------- */ ++ if (e->tag == Iex_RdTmp) { ++ lookupIRTempPair(hi, lo, env, e->Iex.RdTmp.tmp); ++ return; ++ } ++ ++ /* --------- BINARY OP --------- */ ++ if (e->tag == Iex_Binop) { ++ switch (e->Iex.Binop.op) { ++ case Iop_64HLto128: { ++ *hi = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ *lo = iselIntExpr_R(env, e->Iex.Binop.arg2); ++ return; ++ } ++ case Iop_DivModS64to64: { ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False); ++ HReg dstLo = newVRegI(env); ++ HReg dstHi = newVRegI(env); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_DIV_D, src2, src1, dstLo)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_MOD_D, src2, src1, dstHi)); ++ *hi = dstHi; ++ *lo = dstLo; ++ return; ++ } ++ case Iop_DivModU64to64: { ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False); ++ HReg dstLo = newVRegI(env); ++ HReg dstHi = newVRegI(env); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_DIV_DU, src2, src1, dstLo)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_MOD_DU, src2, src1, dstHi)); ++ *hi = dstHi; ++ *lo = dstLo; ++ return; ++ } ++ case Iop_MullS64: { ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False); ++ HReg dstLo = newVRegI(env); ++ HReg dstHi = newVRegI(env); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_MUL_D, src2, src1, dstLo)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_MULH_D, src2, src1, dstHi)); ++ *hi = dstHi; ++ *lo = dstLo; ++ return; ++ } ++ case Iop_MullU64: { ++ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1); ++ LOONGARCH64RI* src2 = iselIntExpr_RI(env, e->Iex.Binop.arg2, 0, False); ++ HReg dstLo = newVRegI(env); ++ HReg dstHi = newVRegI(env); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_MUL_D, src2, src1, dstLo)); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_MULH_DU, src2, src1, dstHi)); ++ *hi = dstHi; ++ *lo = dstLo; ++ return; ++ } ++ default: ++ goto irreducible; ++ } ++ } ++ ++ /* We get here if no pattern matched. */ ++irreducible: ++ ppIRExpr(e); ++ vpanic("iselInt128Expr(loongarch64): cannot reduce tree"); ++} ++ ++ ++/*---------------------------------------------------------*/ ++/*--- ISEL: Floating point expressions (64/32 bit) ---*/ ++/*---------------------------------------------------------*/ ++ ++/* Compute a floating point value into a register, the identity of ++ which is returned. As with iselIntExpr_R, the reg may be either ++ real or virtual; in any case it must not be changed by subsequent ++ code emitted by the caller. */ ++ ++static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ) ++{ ++ HReg r = iselFltExpr_wrk(env, e); ++ ++ /* sanity checks ... */ ++ vassert(hregClass(r) == HRcFlt64); ++ vassert(hregIsVirtual(r)); ++ ++ return r; ++} ++ ++/* DO NOT CALL THIS DIRECTLY */ ++static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) ++{ ++ IRType ty = typeOfIRExpr(env->type_env, e); ++ vassert(e); ++ vassert(ty == Ity_F32 || ty == Ity_F64); ++ ++ switch (e->tag) { ++ /* --------- TEMP --------- */ ++ case Iex_RdTmp: ++ return lookupIRTemp(env, e->Iex.RdTmp.tmp); ++ ++ /* --------- LOAD --------- */ ++ case Iex_Load: { ++ if (e->Iex.Load.end != Iend_LE) ++ goto irreducible; ++ ++ LOONGARCH64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr, ty); ++ HReg dst = newVRegF(env); ++ LOONGARCH64FpLoadOp op; ++ switch(ty) { ++ case Ity_F32: ++ op = (am->tag == LAam_RI) ? LAfpload_FLD_S : LAfpload_FLDX_S; ++ break; ++ case Ity_F64: ++ op = (am->tag == LAam_RI) ? LAfpload_FLD_D : LAfpload_FLDX_D; ++ break; ++ default: ++ goto irreducible; ++ } ++ addInstr(env, LOONGARCH64Instr_FpLoad(op, am, dst)); ++ return dst; ++ } ++ ++ /* --------- GET --------- */ ++ case Iex_Get: { ++ HReg dst = newVRegF(env); ++ if (e->Iex.Get.offset < 1024) { ++ LOONGARCH64AMode* am = LOONGARCH64AMode_RI(hregGSP(), e->Iex.Get.offset); ++ LOONGARCH64FpLoadOp op; ++ switch(ty) { ++ case Ity_F32: ++ op = LAfpload_FLD_S; ++ break; ++ case Ity_F64: ++ op = LAfpload_FLD_D; ++ break; ++ default: ++ goto irreducible; ++ } ++ addInstr(env, LOONGARCH64Instr_FpLoad(op, am, dst)); ++ } else { ++ HReg tmp = newVRegI(env); ++ LOONGARCH64AMode* am = LOONGARCH64AMode_RR(hregGSP(), tmp); ++ LOONGARCH64FpLoadOp op; ++ switch(ty) { ++ case Ity_F32: ++ op = LAfpload_FLDX_S; ++ break; ++ case Ity_F64: ++ op = LAfpload_FLDX_D; ++ break; ++ default: ++ goto irreducible; ++ } ++ addInstr(env, LOONGARCH64Instr_LI(e->Iex.Get.offset, tmp)); ++ addInstr(env, LOONGARCH64Instr_FpLoad(op, am, dst)); ++ } ++ return dst; ++ } ++ ++ /* --------- QUATERNARY OP --------- */ ++ case Iex_Qop: { ++ switch (e->Iex.Qop.details->op) { ++ case Iop_MAddF32: { ++ HReg dst = newVRegF(env); ++ HReg src1 = iselFltExpr(env, e->Iex.Qop.details->arg2); ++ HReg src2 = iselFltExpr(env, e->Iex.Qop.details->arg3); ++ HReg src3 = iselFltExpr(env, e->Iex.Qop.details->arg4); ++ set_rounding_mode(env, e->Iex.Qop.details->arg1); ++ addInstr(env, LOONGARCH64Instr_FpTrinary(LAfpbin_FMADD_S, src3, src2, src1, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_MAddF64: { ++ HReg dst = newVRegF(env); ++ HReg src1 = iselFltExpr(env, e->Iex.Qop.details->arg2); ++ HReg src2 = iselFltExpr(env, e->Iex.Qop.details->arg3); ++ HReg src3 = iselFltExpr(env, e->Iex.Qop.details->arg4); ++ set_rounding_mode(env, e->Iex.Qop.details->arg1); ++ addInstr(env, LOONGARCH64Instr_FpTrinary(LAfpbin_FMADD_D, src3, src2, src1, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_MSubF32: { ++ HReg dst = newVRegF(env); ++ HReg src1 = iselFltExpr(env, e->Iex.Qop.details->arg2); ++ HReg src2 = iselFltExpr(env, e->Iex.Qop.details->arg3); ++ HReg src3 = iselFltExpr(env, e->Iex.Qop.details->arg4); ++ set_rounding_mode(env, e->Iex.Qop.details->arg1); ++ addInstr(env, LOONGARCH64Instr_FpTrinary(LAfpbin_FMSUB_S, src3, src2, src1, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_MSubF64: { ++ HReg dst = newVRegF(env); ++ HReg src1 = iselFltExpr(env, e->Iex.Qop.details->arg2); ++ HReg src2 = iselFltExpr(env, e->Iex.Qop.details->arg3); ++ HReg src3 = iselFltExpr(env, e->Iex.Qop.details->arg4); ++ set_rounding_mode(env, e->Iex.Qop.details->arg1); ++ addInstr(env, LOONGARCH64Instr_FpTrinary(LAfpbin_FMSUB_D, src3, src2, src1, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ default: ++ goto irreducible; ++ } ++ } ++ ++ /* --------- TERNARY OP --------- */ ++ case Iex_Triop: { ++ switch (e->Iex.Triop.details->op) { ++ case Iop_AddF32: { ++ HReg dst = newVRegF(env); ++ HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2); ++ HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3); ++ set_rounding_mode(env, e->Iex.Triop.details->arg1); ++ addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FADD_S, src2, src1, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_AddF64: { ++ HReg dst = newVRegF(env); ++ HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2); ++ HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3); ++ set_rounding_mode(env, e->Iex.Triop.details->arg1); ++ addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FADD_D, src2, src1, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_DivF32: { ++ HReg dst = newVRegF(env); ++ HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2); ++ HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3); ++ set_rounding_mode(env, e->Iex.Triop.details->arg1); ++ addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FDIV_S, src2, src1, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_DivF64: { ++ HReg dst = newVRegF(env); ++ HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2); ++ HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3); ++ set_rounding_mode(env, e->Iex.Triop.details->arg1); ++ addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FDIV_D, src2, src1, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_MulF32: { ++ HReg dst = newVRegF(env); ++ HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2); ++ HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3); ++ set_rounding_mode(env, e->Iex.Triop.details->arg1); ++ addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMUL_S, src2, src1, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_MulF64: { ++ HReg dst = newVRegF(env); ++ HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2); ++ HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3); ++ set_rounding_mode(env, e->Iex.Triop.details->arg1); ++ addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMUL_D, src2, src1, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_ScaleBF32: { ++ HReg dst = newVRegF(env); ++ HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2); ++ HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3); ++ set_rounding_mode(env, e->Iex.Triop.details->arg1); ++ addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FSCALEB_S, src2, src1, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_ScaleBF64: { ++ HReg dst = newVRegF(env); ++ HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2); ++ HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3); ++ set_rounding_mode(env, e->Iex.Triop.details->arg1); ++ addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FSCALEB_D, src2, src1, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_SubF32: { ++ HReg dst = newVRegF(env); ++ HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2); ++ HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3); ++ set_rounding_mode(env, e->Iex.Triop.details->arg1); ++ addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FSUB_S, src2, src1, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_SubF64: { ++ HReg dst = newVRegF(env); ++ HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2); ++ HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3); ++ set_rounding_mode(env, e->Iex.Triop.details->arg1); ++ addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FSUB_D, src2, src1, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ default: ++ goto irreducible; ++ } ++ } ++ ++ /* --------- BINARY OP --------- */ ++ case Iex_Binop: { ++ switch (e->Iex.Binop.op) { ++ case Iop_F64toF32: { ++ HReg dst = newVRegF(env); ++ HReg src = iselFltExpr(env, e->Iex.Binop.arg2); ++ set_rounding_mode(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FCVT_S_D, src, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_I32StoF32: { ++ HReg tmp = newVRegF(env); ++ HReg dst = newVRegF(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2); ++ addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FR_D, src, tmp)); ++ set_rounding_mode(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FFINT_S_W, tmp, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_I64StoF32: { ++ HReg tmp = newVRegF(env); ++ HReg dst = newVRegF(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2); ++ addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FR_D, src, tmp)); ++ set_rounding_mode(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FFINT_S_L, tmp, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_I64StoF64: { ++ HReg tmp = newVRegF(env); ++ HReg dst = newVRegF(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2); ++ addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FR_D, src, tmp)); ++ set_rounding_mode(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FFINT_D_L, tmp, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_LogBF32: { ++ HReg dst = newVRegF(env); ++ HReg src = iselFltExpr(env, e->Iex.Binop.arg2); ++ set_rounding_mode(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FLOGB_S, src, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_LogBF64: { ++ HReg dst = newVRegF(env); ++ HReg src = iselFltExpr(env, e->Iex.Binop.arg2); ++ set_rounding_mode(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FLOGB_D, src, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_MaxNumAbsF32: { ++ HReg dst = newVRegF(env); ++ HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2); ++ HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMAXA_S, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_MaxNumF32: { ++ HReg dst = newVRegF(env); ++ HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2); ++ HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMAX_S, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_MaxNumAbsF64: { ++ HReg dst = newVRegF(env); ++ HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2); ++ HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMAXA_D, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_MaxNumF64: { ++ HReg dst = newVRegF(env); ++ HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2); ++ HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMAX_D, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_MinNumAbsF32: { ++ HReg dst = newVRegF(env); ++ HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2); ++ HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMINA_S, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_MinNumF32: { ++ HReg dst = newVRegF(env); ++ HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2); ++ HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMIN_S, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_MinNumAbsF64: { ++ HReg dst = newVRegF(env); ++ HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2); ++ HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMINA_D, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_MinNumF64: { ++ HReg dst = newVRegF(env); ++ HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2); ++ HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpBinary(LAfpbin_FMIN_D, src2, src1, dst)); ++ return dst; ++ } ++ case Iop_RoundF32toInt: { ++ HReg dst = newVRegF(env); ++ HReg src = iselFltExpr(env, e->Iex.Binop.arg2); ++ set_rounding_mode(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FRINT_S, src, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_RoundF64toInt: { ++ HReg dst = newVRegF(env); ++ HReg src = iselFltExpr(env, e->Iex.Binop.arg2); ++ set_rounding_mode(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FRINT_D, src, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_RSqrtF32: { ++ HReg dst = newVRegF(env); ++ HReg src = iselFltExpr(env, e->Iex.Binop.arg2); ++ set_rounding_mode(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FRSQRT_S, src, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_RSqrtF64: { ++ HReg dst = newVRegF(env); ++ HReg src = iselFltExpr(env, e->Iex.Binop.arg2); ++ set_rounding_mode(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FRSQRT_D, src, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_SqrtF32: { ++ HReg dst = newVRegF(env); ++ HReg src = iselFltExpr(env, e->Iex.Binop.arg2); ++ set_rounding_mode(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FSQRT_S, src, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ case Iop_SqrtF64: { ++ HReg dst = newVRegF(env); ++ HReg src = iselFltExpr(env, e->Iex.Binop.arg2); ++ set_rounding_mode(env, e->Iex.Binop.arg1); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FSQRT_D, src, dst)); ++ set_rounding_mode_default(env); ++ return dst; ++ } ++ default: ++ goto irreducible; ++ } ++ } ++ ++ /* --------- UNARY OP --------- */ ++ case Iex_Unop: { ++ switch (e->Iex.Unop.op) { ++ case Iop_AbsF32: { ++ HReg dst = newVRegF(env); ++ HReg src = iselFltExpr(env, e->Iex.Unop.arg); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FABS_S, src, dst)); ++ return dst; ++ } ++ case Iop_AbsF64: { ++ HReg dst = newVRegF(env); ++ HReg src = iselFltExpr(env, e->Iex.Unop.arg); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FABS_D, src, dst)); ++ return dst; ++ } ++ case Iop_F32toF64: { ++ HReg dst = newVRegF(env); ++ HReg src = iselFltExpr(env, e->Iex.Unop.arg); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FCVT_D_S, src, dst)); ++ return dst; ++ } ++ case Iop_I32StoF64: { ++ HReg tmp = newVRegF(env); ++ HReg dst = newVRegF(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FR_D, src, tmp)); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FFINT_D_W, tmp, dst)); ++ return dst; ++ } ++ case Iop_NegF32: { ++ HReg dst = newVRegF(env); ++ HReg src = iselFltExpr(env, e->Iex.Unop.arg); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FNEG_S, src, dst)); ++ return dst; ++ } ++ case Iop_NegF64: { ++ HReg dst = newVRegF(env); ++ HReg src = iselFltExpr(env, e->Iex.Unop.arg); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FNEG_D, src, dst)); ++ return dst; ++ } ++ case Iop_ReinterpI32asF32: { ++ HReg dst = newVRegF(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FR_W, src, dst)); ++ return dst; ++ } ++ case Iop_ReinterpI64asF64: { ++ HReg dst = newVRegF(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FR_D, src, dst)); ++ return dst; ++ } ++ default: ++ goto irreducible; ++ } ++ } ++ ++ /* --------- LITERAL --------- */ ++ case Iex_Const: { ++ /* Just handle the one case. */ ++ IRConst* con = e->Iex.Const.con; ++ if (con->tag == Ico_F32i && con->Ico.F32i == 1) { ++ HReg tmp = newVRegI(env); ++ HReg dst = newVRegF(env); ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(1, 12, True); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_ADDI_W, ri, hregZERO(), tmp)); ++ addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FR_W, tmp, dst)); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FFINT_S_W, dst, dst)); ++ return dst; ++ } else if (con->tag == Ico_F64i && con->Ico.F64i == 1) { ++ HReg tmp = newVRegI(env); ++ HReg dst = newVRegF(env); ++ LOONGARCH64RI* ri = LOONGARCH64RI_I(1, 12, True); ++ addInstr(env, LOONGARCH64Instr_Binary(LAbin_ADDI_D, ri, hregZERO(), tmp)); ++ addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_MOVGR2FR_D, tmp, dst)); ++ addInstr(env, LOONGARCH64Instr_FpUnary(LAfpun_FFINT_D_L, dst, dst)); ++ return dst; ++ } else { ++ goto irreducible; ++ } ++ } ++ ++ case Iex_ITE: { ++ HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse); ++ HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue); ++ HReg cond = iselCondCode_R(env, e->Iex.ITE.cond); ++ HReg dst = newVRegF(env); ++ addInstr(env, LOONGARCH64Instr_CMove(cond, r0, r1, dst, False)); ++ return dst; ++ } ++ ++ default: ++ break; ++ } ++ ++ /* We get here if no pattern matched. */ ++irreducible: ++ ppIRExpr(e); ++ vpanic("iselFltExpr(loongarch64): cannot reduce tree"); ++} ++ ++ ++/*---------------------------------------------------------*/ ++/*--- ISEL: Statements ---*/ ++/*---------------------------------------------------------*/ ++ ++static void iselStmtStore ( ISelEnv* env, IRStmt* stmt ) ++{ ++ IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr); ++ IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data); ++ ++ if (tya != Ity_I64 || stmt->Ist.Store.end != Iend_LE) ++ vpanic("iselStmt(loongarch64): Ist_Store"); ++ ++ Bool fp = False; ++ LOONGARCH64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); ++ LOONGARCH64StoreOp op; ++ LOONGARCH64FpStoreOp fop; ++ switch (tyd) { ++ case Ity_I8: ++ op = (am->tag == LAam_RI) ? LAstore_ST_B : LAstore_STX_B; ++ break; ++ case Ity_I16: ++ op = (am->tag == LAam_RI) ? LAstore_ST_H : LAstore_STX_H; ++ break; ++ case Ity_I32: ++ op = (am->tag == LAam_RI) ? LAstore_ST_W : LAstore_STX_W; ++ break; ++ case Ity_I64: ++ op = (am->tag == LAam_RI) ? LAstore_ST_D : LAstore_STX_D; ++ break; ++ case Ity_F32: ++ fop = (am->tag == LAam_RI) ? LAfpstore_FST_S : LAfpstore_FSTX_S; ++ fp = True; ++ break; ++ case Ity_F64: ++ fop = (am->tag == LAam_RI) ? LAfpstore_FST_D : LAfpstore_FSTX_D; ++ fp = True; ++ break; ++ default: ++ vpanic("iselStmt(loongarch64): Ist_Store"); ++ break; ++ } ++ ++ if (fp) { ++ HReg src = iselFltExpr(env, stmt->Ist.Store.data); ++ addInstr(env, LOONGARCH64Instr_FpStore(fop, am, src)); ++ } else { ++ HReg src = iselIntExpr_R(env, stmt->Ist.Store.data); ++ addInstr(env, LOONGARCH64Instr_Store(op, am, src)); ++ } ++} ++ ++static void iselStmtPut ( ISelEnv* env, IRStmt* stmt ) ++{ ++ IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); ++ ++ Bool fp = False; ++ LOONGARCH64AMode* am; ++ LOONGARCH64StoreOp op; ++ LOONGARCH64FpStoreOp fop; ++ if (stmt->Ist.Put.offset < 1024) { ++ switch (ty) { ++ case Ity_I8: ++ op = LAstore_ST_B; ++ break; ++ case Ity_I16: ++ op = LAstore_ST_H; ++ break; ++ case Ity_I32: ++ op = LAstore_ST_W; ++ break; ++ case Ity_I64: ++ op = LAstore_ST_D; ++ break; ++ case Ity_F32: ++ fop = LAfpstore_FST_S; ++ fp = True; ++ break; ++ case Ity_F64: ++ fop = LAfpstore_FST_D; ++ fp = True; ++ break; ++ default: ++ vpanic("iselStmt(loongarch64): Ist_Put"); ++ break; ++ } ++ ++ am = LOONGARCH64AMode_RI(hregGSP(), stmt->Ist.Put.offset); ++ if (fp) { ++ HReg src = iselFltExpr(env, stmt->Ist.Put.data); ++ addInstr(env, LOONGARCH64Instr_FpStore(fop, am, src)); ++ } else { ++ HReg src = iselIntExpr_R(env, stmt->Ist.Put.data); ++ addInstr(env, LOONGARCH64Instr_Store(op, am, src)); ++ } ++ } else { ++ switch (ty) { ++ case Ity_I8: ++ op = LAstore_STX_B; ++ break; ++ case Ity_I16: ++ op = LAstore_STX_H; ++ break; ++ case Ity_I32: ++ op = LAstore_STX_W; ++ break; ++ case Ity_I64: ++ op = LAstore_STX_D; ++ break; ++ case Ity_F32: ++ fop = LAfpstore_FSTX_S; ++ fp = True; ++ break; ++ case Ity_F64: ++ fop = LAfpstore_FSTX_D; ++ fp = True; ++ break; ++ default: ++ vpanic("iselStmt(loongarch64): Ist_Put"); ++ break; ++ } ++ ++ HReg tmp = newVRegI(env); ++ addInstr(env, LOONGARCH64Instr_LI(stmt->Ist.Put.offset, tmp)); ++ am = LOONGARCH64AMode_RR(hregGSP(), tmp); ++ if (fp) { ++ HReg src = iselFltExpr(env, stmt->Ist.Put.data); ++ addInstr(env, LOONGARCH64Instr_FpStore(fop, am, src)); ++ } else { ++ HReg src = iselIntExpr_R(env, stmt->Ist.Put.data); ++ addInstr(env, LOONGARCH64Instr_Store(op, am, src)); ++ } ++ } ++} ++ ++static void iselStmtTmp ( ISelEnv* env, IRStmt* stmt ) ++{ ++ IRTemp tmp = stmt->Ist.WrTmp.tmp; ++ IRType ty = typeOfIRTemp(env->type_env, tmp); ++ ++ if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32 || ty == Ity_I64) { ++ HReg dst = lookupIRTemp(env, tmp); ++ HReg src = iselIntExpr_R(env, stmt->Ist.WrTmp.data); ++ addInstr(env, LOONGARCH64Instr_Move(dst, src)); ++ } else if (ty == Ity_I1) { ++ HReg dst = lookupIRTemp(env, tmp); ++ HReg src = iselCondCode_R(env, stmt->Ist.WrTmp.data); ++ addInstr(env, LOONGARCH64Instr_Move(dst, src)); ++ } else if (ty == Ity_F32) { ++ HReg dst = lookupIRTemp(env, tmp); ++ HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data); ++ addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_FMOV_S, src, dst)); ++ } else if (ty == Ity_F64) { ++ HReg dst = lookupIRTemp(env, tmp); ++ HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data); ++ addInstr(env, LOONGARCH64Instr_FpMove(LAfpmove_FMOV_D, src, dst)); ++ } else { ++ vpanic("iselStmt(loongarch64): Ist_WrTmp"); ++ } ++} ++ ++static void iselStmtDirty ( ISelEnv* env, IRStmt* stmt ) ++{ ++ IRDirty* d = stmt->Ist.Dirty.details; ++ ++ /* Figure out the return type, if any. */ ++ IRType retty = Ity_INVALID; ++ if (d->tmp != IRTemp_INVALID) ++ retty = typeOfIRTemp(env->type_env, d->tmp); ++ ++ Bool retty_ok = False; ++ switch (retty) { ++ case Ity_INVALID: /* function doesn't return anything */ ++ case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64: ++ retty_ok = True; ++ break; ++ default: ++ break; ++ } ++ if (!retty_ok) ++ vpanic("iselStmt(loongarch64): Ist_Dirty"); ++ ++ /* Marshal args, do the call, and set the return value to 0x555..555 ++ if this is a conditional call that returns a value and the ++ call is skipped. */ ++ UInt addToSp = 0; ++ RetLoc rloc = mk_RetLoc_INVALID(); ++ doHelperCall(&addToSp, &rloc, env, d->guard, d->cee, retty, d->args); ++ vassert(is_sane_RetLoc(rloc)); ++ ++ /* Now figure out what to do with the returned value, if any. */ ++ switch (retty) { ++ case Ity_INVALID: { ++ /* No return value. Nothing to do. */ ++ vassert(d->tmp == IRTemp_INVALID); ++ vassert(rloc.pri == RLPri_None); ++ vassert(addToSp == 0); ++ break; ++ } ++ case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64: { ++ vassert(rloc.pri == RLPri_Int); ++ vassert(addToSp == 0); ++ /* The returned value is in $a0. Park it in the register ++ associated with tmp. */ ++ HReg dst = lookupIRTemp(env, d->tmp); ++ addInstr(env, LOONGARCH64Instr_Move(dst, hregLOONGARCH64_R4())); ++ break; ++ } ++ default: ++ /*NOTREACHED*/ ++ vassert(0); ++ break; ++ } ++} ++ ++static void iselStmtLLSC ( ISelEnv* env, IRStmt* stmt ) ++{ ++ IRTemp res = stmt->Ist.LLSC.result; ++ IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.addr); ++ ++ /* Temporary solution; this need to be rewritten again for LOONGARCH64. ++ On LOONGARCH64 you can not read from address that is locked with LL ++ before SC. If you read from address that is locked than SC will fall. ++ */ ++ if (stmt->Ist.LLSC.storedata == NULL) { ++ /* LL */ ++ IRType ty = typeOfIRTemp(env->type_env, res); ++ LOONGARCH64LLSCOp op; ++ switch (ty) { ++ case Ity_I32: ++ op = LAllsc_LL_W; ++ break; ++ case Ity_I64: ++ op = LAllsc_LL_D; ++ break; ++ default: ++ vpanic("iselStmt(loongarch64): Ist_LLSC"); ++ break; ++ } ++ LOONGARCH64AMode* addr = iselIntExpr_AMode(env, stmt->Ist.LLSC.addr, tya); ++ HReg val = lookupIRTemp(env, res); ++ addInstr(env, LOONGARCH64Instr_LLSC(op, True, addr, val)); ++ } else { ++ /* SC */ ++ IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata); ++ LOONGARCH64LLSCOp op; ++ switch (tyd) { ++ case Ity_I32: ++ op = LAllsc_SC_W; ++ break; ++ case Ity_I64: ++ op = LAllsc_SC_D; ++ break; ++ default: ++ vpanic("iselStmt(loongarch64): Ist_LLSC"); ++ break; ++ } ++ LOONGARCH64AMode* addr = iselIntExpr_AMode(env, stmt->Ist.LLSC.addr, tya); ++ HReg val = iselIntExpr_R(env, stmt->Ist.LLSC.storedata); ++ HReg dst = lookupIRTemp(env, res); ++ HReg tmp = newVRegI(env); ++ addInstr(env, LOONGARCH64Instr_Move(tmp, val)); ++ addInstr(env, LOONGARCH64Instr_LLSC(op, False, addr, tmp)); ++ addInstr(env, LOONGARCH64Instr_Move(dst, tmp)); ++ } ++} ++ ++static void iselStmtCas ( ISelEnv* env, IRStmt* stmt ) ++{ ++ IRCAS* cas = stmt->Ist.CAS.details; ++ if (cas->oldHi == IRTemp_INVALID && cas->end == Iend_LE) { ++ /* "normal" singleton CAS */ ++ HReg old = lookupIRTemp(env, cas->oldLo); ++ HReg addr = iselIntExpr_R(env, cas->addr); ++ HReg expd = iselIntExpr_R(env, cas->expdLo); ++ HReg data = iselIntExpr_R(env, cas->dataLo); ++ IRType ty = typeOfIRTemp(env->type_env, cas->oldLo); ++ Bool size64; ++ switch (ty) { ++ case Ity_I32: ++ size64 = False; ++ break; ++ case Ity_I64: ++ size64 = True; ++ break; ++ default: ++ vpanic("iselStmt(loongarch64): Ist_CAS"); ++ break; ++ } ++ addInstr(env, LOONGARCH64Instr_Cas(old, addr, expd, data, size64)); ++ } else { ++ vpanic("iselStmt(loongarch64): Ist_CAS"); ++ } ++} ++ ++static void iselStmtMBE ( ISelEnv* env, IRStmt* stmt ) ++{ ++ switch (stmt->Ist.MBE.event) { ++ case Imbe_Fence: ++ case Imbe_CancelReservation: ++ addInstr(env, LOONGARCH64Instr_Bar(LAbar_DBAR, 0)); ++ break; ++ case Imbe_InsnFence: ++ addInstr(env, LOONGARCH64Instr_Bar(LAbar_IBAR, 0)); ++ break; ++ default: ++ vpanic("iselStmt(loongarch64): Ist_MBE"); ++ break; ++ } ++} ++ ++static void iselStmtExit ( ISelEnv* env, IRStmt* stmt ) ++{ ++ if (stmt->Ist.Exit.dst->tag != Ico_U64) ++ vpanic("iselStmt(loongarch64): Ist_Exit: dst is not a 64-bit value"); ++ ++ HReg cond = iselCondCode_R(env, stmt->Ist.Exit.guard); ++ LOONGARCH64AMode* am = mkLOONGARCH64AMode_RI(hregGSP(), stmt->Ist.Exit.offsIP); ++ ++ /* Case: boring transfer to known address */ ++ if (stmt->Ist.Exit.jk == Ijk_Boring || stmt->Ist.Exit.jk == Ijk_Call) { ++ if (env->chainingAllowed) { ++ /* .. almost always true .. */ ++ /* Skip the event check at the dst if this is a forwards edge. */ ++ Bool toFastEP = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga; ++ addInstr(env, LOONGARCH64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64, ++ am, cond, toFastEP)); ++ } else { ++ /* .. very occasionally .. */ ++ /* We can't use chaining, so ask for an assisted transfer, ++ as that's the only alternative that is allowable. */ ++ HReg dst = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); ++ addInstr(env, LOONGARCH64Instr_XAssisted(dst, am, cond, Ijk_Boring)); ++ } ++ return; ++ } ++ ++ /* Case: assisted transfer to arbitrary address */ ++ switch (stmt->Ist.Exit.jk) { ++ /* Keep this list in sync with that for iselNext below */ ++ case Ijk_ClientReq: ++ case Ijk_Yield: ++ case Ijk_NoDecode: ++ case Ijk_InvalICache: ++ case Ijk_NoRedir: ++ case Ijk_SigILL: ++ case Ijk_SigTRAP: ++ case Ijk_SigSEGV: ++ case Ijk_SigBUS: ++ case Ijk_SigFPE_IntDiv: ++ case Ijk_SigFPE_IntOvf: ++ case Ijk_SigSYS: ++ case Ijk_Sys_syscall: { ++ HReg dst = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); ++ addInstr(env, LOONGARCH64Instr_XAssisted(dst, am, cond, stmt->Ist.Exit.jk)); ++ break; ++ } ++ default: ++ /* Do we ever expect to see any other kind? */ ++ ppIRJumpKind(stmt->Ist.Exit.jk); ++ vpanic("iselStmt(loongarch64): Ist_Exit: unexpected jump kind"); ++ break; ++ } ++} ++ ++static void iselStmt(ISelEnv* env, IRStmt* stmt) ++{ ++ if (vex_traceflags & VEX_TRACE_VCODE) { ++ vex_printf("\n-- "); ++ ppIRStmt(stmt); ++ vex_printf("\n"); ++ } ++ ++ switch (stmt->tag) { ++ /* --------- STORE --------- */ ++ /* little-endian write to memory */ ++ case Ist_Store: ++ iselStmtStore(env, stmt); ++ break; ++ ++ /* --------- PUT --------- */ ++ /* write guest state, fixed offset */ ++ case Ist_Put: ++ iselStmtPut(env, stmt); ++ break; ++ ++ /* --------- TMP --------- */ ++ /* assign value to temporary */ ++ case Ist_WrTmp: ++ iselStmtTmp(env, stmt); ++ break; ++ ++ /* --------- Call to DIRTY helper --------- */ ++ /* call complex ("dirty") helper function */ ++ case Ist_Dirty: ++ iselStmtDirty(env, stmt); ++ break; ++ ++ /* --------- Load Linked and Store Conditional --------- */ ++ case Ist_LLSC: ++ iselStmtLLSC(env, stmt); ++ break; ++ ++ /* --------- CAS --------- */ ++ case Ist_CAS: ++ iselStmtCas(env, stmt); ++ break; ++ ++ /* --------- MEM FENCE --------- */ ++ case Ist_MBE: ++ iselStmtMBE(env, stmt); ++ break; ++ ++ /* --------- INSTR MARK --------- */ ++ /* Doesn't generate any executable code ... */ ++ case Ist_IMark: ++ break; ++ ++ /* --------- ABI HINT --------- */ ++ /* These have no meaning (denotation in the IR) and so we ignore ++ them ... if any actually made it this far. */ ++ case Ist_AbiHint: ++ break; ++ ++ /* --------- NO-OP --------- */ ++ case Ist_NoOp: ++ break; ++ ++ /* --------- EXIT --------- */ ++ case Ist_Exit: ++ iselStmtExit(env, stmt); ++ break; ++ ++ default: ++ ppIRStmt(stmt); ++ vpanic("iselStmt(loongarch64)"); ++ break; ++ } ++} ++ ++ ++/*---------------------------------------------------------*/ ++/*--- ISEL: Basic block terminators (Nexts) ---*/ ++/*---------------------------------------------------------*/ ++ ++static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk, Int offsIP ) ++{ ++ if (vex_traceflags & VEX_TRACE_VCODE) { ++ vex_printf("\n-- PUT(%d) = ", offsIP); ++ ppIRExpr(next); ++ vex_printf("; exit-"); ++ ppIRJumpKind(jk); ++ vex_printf("\n"); ++ } ++ ++ /* Case: boring transfer to known address */ ++ if (next->tag == Iex_Const) { ++ IRConst* cdst = next->Iex.Const.con; ++ vassert(cdst->tag == Ico_U64); ++ if (jk == Ijk_Boring || jk == Ijk_Call) { ++ /* Boring transfer to known address */ ++ LOONGARCH64AMode* am = mkLOONGARCH64AMode_RI(hregGSP(), offsIP); ++ if (env->chainingAllowed) { ++ /* .. almost always true .. */ ++ /* Skip the event check at the dst if this is a forwards edge. */ ++ Bool toFastEP = ((Addr64)cdst->Ico.U64) > env->max_ga; ++ addInstr(env, LOONGARCH64Instr_XDirect(cdst->Ico.U64, am, ++ INVALID_HREG, toFastEP)); ++ } else { ++ /* .. very occasionally .. */ ++ /* We can't use chaining, so ask for an assisted transfer, ++ as that's the only alternative that is allowable. */ ++ HReg dst = iselIntExpr_R(env, next); ++ addInstr(env, LOONGARCH64Instr_XAssisted(dst, am, INVALID_HREG, Ijk_Boring)); ++ } ++ return; ++ } ++ } ++ ++ /* Case: call/return (==boring) transfer to any address */ ++ switch (jk) { ++ case Ijk_Boring: ++ case Ijk_Ret: ++ case Ijk_Call: { ++ HReg dst = iselIntExpr_R(env, next); ++ LOONGARCH64AMode* am = mkLOONGARCH64AMode_RI(hregGSP(), offsIP); ++ if (env->chainingAllowed) { ++ addInstr(env, LOONGARCH64Instr_XIndir(dst, am, INVALID_HREG)); ++ } else { ++ addInstr(env, LOONGARCH64Instr_XAssisted(dst, am, ++ INVALID_HREG, Ijk_Boring)); ++ } ++ return; ++ } ++ default: ++ break; ++ } ++ ++ /* Case: assisted transfer to arbitrary address */ ++ switch (jk) { ++ /* Keep this list in sync with that for Ist_Exit above */ ++ case Ijk_ClientReq: ++ case Ijk_Yield: ++ case Ijk_NoDecode: ++ case Ijk_InvalICache: ++ case Ijk_NoRedir: ++ case Ijk_SigILL: ++ case Ijk_SigTRAP: ++ case Ijk_SigSEGV: ++ case Ijk_SigBUS: ++ case Ijk_SigFPE_IntDiv: ++ case Ijk_SigFPE_IntOvf: ++ case Ijk_SigSYS: ++ case Ijk_Sys_syscall: { ++ HReg dst = iselIntExpr_R(env, next); ++ LOONGARCH64AMode* am = mkLOONGARCH64AMode_RI(hregGSP(), offsIP); ++ addInstr(env, LOONGARCH64Instr_XAssisted(dst, am, INVALID_HREG, jk)); ++ return; ++ } ++ default: ++ break; ++ } ++ ++ vex_printf("\n-- PUT(%d) = ", offsIP); ++ ppIRExpr(next); ++ vex_printf("; exit-"); ++ ppIRJumpKind(jk); ++ vex_printf("\n"); ++ vassert(0); // are we expecting any other kind? ++} ++ ++ ++/*---------------------------------------------------------*/ ++/*--- Insn selector top-level ---*/ ++/*---------------------------------------------------------*/ ++ ++/* Translate an entire BB to LOONGARCH64 code. */ ++HInstrArray* iselSB_LOONGARCH64 ( const IRSB* bb, ++ VexArch arch_host, ++ const VexArchInfo* archinfo_host, ++ const VexAbiInfo* vbi, ++ Int offs_Host_EvC_Counter, ++ Int offs_Host_EvC_FailAddr, ++ Bool chainingAllowed, ++ Bool addProfInc, ++ Addr max_ga ) ++{ ++ Int i, j; ++ HReg hreg, hregHI; ++ ISelEnv* env; ++ UInt hwcaps_host = archinfo_host->hwcaps; ++ LOONGARCH64AMode *amCounter, *amFailAddr; ++ ++ /* sanity ... */ ++ vassert(arch_host == VexArchLOONGARCH64); ++ vassert((hwcaps_host & ~(VEX_HWCAPS_LOONGARCH_CPUCFG ++ | VEX_HWCAPS_LOONGARCH_LAM ++ | VEX_HWCAPS_LOONGARCH_UAL ++ | VEX_HWCAPS_LOONGARCH_FP ++ | VEX_HWCAPS_LOONGARCH_LSX ++ | VEX_HWCAPS_LOONGARCH_LASX ++ | VEX_HWCAPS_LOONGARCH_COMPLEX ++ | VEX_HWCAPS_LOONGARCH_CRYPTO ++ | VEX_HWCAPS_LOONGARCH_LVZP ++ | VEX_HWCAPS_LOONGARCH_X86BT ++ | VEX_HWCAPS_LOONGARCH_ARMBT ++ | VEX_HWCAPS_LOONGARCH_MIPSBT ++ | VEX_HWCAPS_LOONGARCH_ISA_32BIT ++ | VEX_HWCAPS_LOONGARCH_ISA_64BIT)) == 0); ++ ++ /* Check that the host's endianness is as expected. */ ++ vassert(archinfo_host->endness == VexEndnessLE); ++ ++ /* Make up an initial environment to use. */ ++ env = LibVEX_Alloc_inline(sizeof(ISelEnv)); ++ env->vreg_ctr = 0; ++ ++ /* Set up output code array. */ ++ env->code = newHInstrArray(); ++ ++ /* Copy BB's type env. */ ++ env->type_env = bb->tyenv; ++ ++ /* Make up an IRTemp -> virtual HReg mapping. This doesn't ++ change as we go along. */ ++ env->n_vregmap = bb->tyenv->types_used; ++ env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg)); ++ env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg)); ++ ++ /* and finally ... */ ++ env->chainingAllowed = chainingAllowed; ++ env->hwcaps = hwcaps_host; ++ env->max_ga = max_ga; ++ ++ /* For each IR temporary, allocate a suitably-kinded virtual register. */ ++ j = 0; ++ for (i = 0; i < env->n_vregmap; i++) { ++ hregHI = hreg = INVALID_HREG; ++ switch (bb->tyenv->types[i]) { ++ case Ity_I1: ++ case Ity_I8: ++ case Ity_I16: ++ case Ity_I32: ++ case Ity_I64: ++ hreg = mkHReg(True, HRcInt64, 0, j++); ++ break; ++ case Ity_I128: ++ hreg = mkHReg(True, HRcInt64, 0, j++); ++ hregHI = mkHReg(True, HRcInt64, 0, j++); ++ break; ++ case Ity_F16: // we'll use HRcFlt64 regs for F16 too ++ case Ity_F32: // we'll use HRcFlt64 regs for F32 too ++ case Ity_F64: ++ hreg = mkHReg(True, HRcFlt64, 0, j++); ++ break; ++ default: ++ ppIRType(bb->tyenv->types[i]); ++ vpanic("iselBB(loongarch64): IRTemp type"); ++ } ++ env->vregmap[i] = hreg; ++ env->vregmapHI[i] = hregHI; ++ } ++ env->vreg_ctr = j; ++ ++ /* The very first instruction must be an event check. */ ++ amCounter = mkLOONGARCH64AMode_RI(hregGSP(), offs_Host_EvC_Counter); ++ amFailAddr = mkLOONGARCH64AMode_RI(hregGSP(), offs_Host_EvC_FailAddr); ++ addInstr(env, LOONGARCH64Instr_EvCheck(amCounter, amFailAddr)); ++ ++ /* Possibly a block counter increment (for profiling). At this ++ point we don't know the address of the counter, so just pretend ++ it is zero. It will have to be patched later, but before this ++ translation is used, by a call to LibVEX_patchProfCtr. */ ++ if (addProfInc) { ++ addInstr(env, LOONGARCH64Instr_ProfInc()); ++ } ++ ++ /* Ok, finally we can iterate over the statements. */ ++ for (i = 0; i < bb->stmts_used; i++) ++ iselStmt(env, bb->stmts[i]); ++ ++ iselNext(env, bb->next, bb->jumpkind, bb->offsIP); ++ ++ /* record the number of vregs we used. */ ++ env->code->n_vregs = env->vreg_ctr; ++ return env->code; ++} ++ ++ ++/*---------------------------------------------------------------*/ ++/*--- end host_loongarch64_isel.c ---*/ ++/*---------------------------------------------------------------*/ +diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c +index 2d82c41a1..382c283f2 100644 +--- a/VEX/priv/ir_defs.c ++++ b/VEX/priv/ir_defs.c +@@ -280,6 +280,8 @@ void ppIROp ( IROp op ) + case Iop_SubF64: vex_printf("SubF64"); return; + case Iop_MulF64: vex_printf("MulF64"); return; + case Iop_DivF64: vex_printf("DivF64"); return; ++ case Iop_ScaleBF64: vex_printf("ScaleBF64"); return; ++ case Iop_ScaleBF32: vex_printf("ScaleBF32"); return; + case Iop_AddF64r32: vex_printf("AddF64r32"); return; + case Iop_SubF64r32: vex_printf("SubF64r32"); return; + case Iop_MulF64r32: vex_printf("MulF64r32"); return; +@@ -356,6 +358,10 @@ void ppIROp ( IROp op ) + case Iop_SqrtF64: vex_printf("SqrtF64"); return; + case Iop_SqrtF32: vex_printf("SqrtF32"); return; + case Iop_SqrtF16: vex_printf("SqrtF16"); return; ++ case Iop_RSqrtF32: vex_printf("RSqrtF32"); return; ++ case Iop_RSqrtF64: vex_printf("RSqrtF64"); return; ++ case Iop_LogBF32: vex_printf("LogBF32"); return; ++ case Iop_LogBF64: vex_printf("LogBF64"); return; + case Iop_SinF64: vex_printf("SinF64"); return; + case Iop_CosF64: vex_printf("CosF64"); return; + case Iop_TanF64: vex_printf("TanF64"); return; +@@ -379,8 +385,12 @@ void ppIROp ( IROp op ) + + case Iop_MaxNumF64: vex_printf("MaxNumF64"); return; + case Iop_MinNumF64: vex_printf("MinNumF64"); return; ++ case Iop_MaxNumAbsF64: vex_printf("MaxNumAbsF64"); return; ++ case Iop_MinNumAbsF64: vex_printf("MinNumAbsF64"); return; + case Iop_MaxNumF32: vex_printf("MaxNumF32"); return; + case Iop_MinNumF32: vex_printf("MinNumF32"); return; ++ case Iop_MaxNumAbsF32: vex_printf("MaxNumAbsF32"); return; ++ case Iop_MinNumAbsF32: vex_printf("MinNumAbsF32"); return; + + case Iop_F16toF64: vex_printf("F16toF64"); return; + case Iop_F64toF16: vex_printf("F64toF16"); return; +@@ -1434,10 +1444,13 @@ Bool primopMightTrap ( IROp op ) + case Iop_1Uto8: case Iop_1Uto32: case Iop_1Uto64: case Iop_1Sto8: + case Iop_1Sto16: case Iop_1Sto32: case Iop_1Sto64: + case Iop_AddF64: case Iop_SubF64: case Iop_MulF64: case Iop_DivF64: ++ case Iop_ScaleBF64: case Iop_ScaleBF32: + case Iop_AddF32: case Iop_SubF32: case Iop_MulF32: case Iop_DivF32: + case Iop_AddF64r32: case Iop_SubF64r32: case Iop_MulF64r32: + case Iop_DivF64r32: case Iop_NegF64: case Iop_AbsF64: + case Iop_NegF32: case Iop_AbsF32: case Iop_SqrtF64: case Iop_SqrtF32: ++ case Iop_RSqrtF64: case Iop_RSqrtF32: ++ case Iop_LogBF64: case Iop_LogBF32: + case Iop_NegF16: case Iop_AbsF16: case Iop_SqrtF16: case Iop_SubF16: + case Iop_AddF16: + case Iop_CmpF64: case Iop_CmpF32: case Iop_CmpF16: case Iop_CmpF128: +@@ -1477,8 +1490,11 @@ Bool primopMightTrap ( IROp op ) + case Iop_RSqrtEst5GoodF64: case Iop_RoundF64toF64_NEAREST: + case Iop_RoundF64toF64_NegINF: case Iop_RoundF64toF64_PosINF: + case Iop_RoundF64toF64_ZERO: case Iop_TruncF64asF32: case Iop_RoundF64toF32: +- case Iop_RecpExpF64: case Iop_RecpExpF32: case Iop_MaxNumF64: +- case Iop_MinNumF64: case Iop_MaxNumF32: case Iop_MinNumF32: ++ case Iop_RecpExpF64: case Iop_RecpExpF32: ++ case Iop_MaxNumF64: case Iop_MinNumF64: ++ case Iop_MaxNumAbsF64: case Iop_MinNumAbsF64: ++ case Iop_MaxNumF32: case Iop_MinNumF32: ++ case Iop_MaxNumAbsF32: case Iop_MinNumAbsF32: + case Iop_F16toF64: case Iop_F64toF16: case Iop_F16toF32: + case Iop_F32toF16: case Iop_QAdd32S: case Iop_QSub32S: + case Iop_Add16x2: case Iop_Sub16x2: +@@ -2075,6 +2091,7 @@ void ppIRJumpKind ( IRJumpKind kind ) + case Ijk_SigFPE: vex_printf("SigFPE"); break; + case Ijk_SigFPE_IntDiv: vex_printf("SigFPE_IntDiv"); break; + case Ijk_SigFPE_IntOvf: vex_printf("SigFPE_IntOvf"); break; ++ case Ijk_SigSYS: vex_printf("SigSYS"); break; + case Ijk_Sys_syscall: vex_printf("Sys_syscall"); break; + case Ijk_Sys_int32: vex_printf("Sys_int32"); break; + case Ijk_Sys_int128: vex_printf("Sys_int128"); break; +@@ -2094,6 +2111,8 @@ void ppIRMBusEvent ( IRMBusEvent event ) + vex_printf("Fence"); break; + case Imbe_CancelReservation: + vex_printf("CancelReservation"); break; ++ case Imbe_InsnFence: ++ vex_printf("InsnFence"); break; + default: + vpanic("ppIRMBusEvent"); + } +@@ -3372,12 +3391,14 @@ void typeOfPrimop ( IROp op, + + case Iop_AddF64: case Iop_SubF64: + case Iop_MulF64: case Iop_DivF64: ++ case Iop_ScaleBF64: + case Iop_AddF64r32: case Iop_SubF64r32: + case Iop_MulF64r32: case Iop_DivF64r32: + TERNARY(ity_RMode,Ity_F64,Ity_F64, Ity_F64); + + case Iop_AddF32: case Iop_SubF32: + case Iop_MulF32: case Iop_DivF32: ++ case Iop_ScaleBF32: + TERNARY(ity_RMode,Ity_F32,Ity_F32, Ity_F32); + + case Iop_AddF16: +@@ -3394,10 +3415,14 @@ void typeOfPrimop ( IROp op, + UNARY(Ity_F16, Ity_F16); + + case Iop_SqrtF64: ++ case Iop_RSqrtF64: ++ case Iop_LogBF64: + case Iop_RecpExpF64: + BINARY(ity_RMode,Ity_F64, Ity_F64); + + case Iop_SqrtF32: ++ case Iop_RSqrtF32: ++ case Iop_LogBF32: + case Iop_RoundF32toInt: + case Iop_RecpExpF32: + BINARY(ity_RMode,Ity_F32, Ity_F32); +@@ -3406,9 +3431,11 @@ void typeOfPrimop ( IROp op, + BINARY(ity_RMode, Ity_F16, Ity_F16); + + case Iop_MaxNumF64: case Iop_MinNumF64: ++ case Iop_MaxNumAbsF64: case Iop_MinNumAbsF64: + BINARY(Ity_F64,Ity_F64, Ity_F64); + + case Iop_MaxNumF32: case Iop_MinNumF32: ++ case Iop_MaxNumAbsF32: case Iop_MinNumAbsF32: + BINARY(Ity_F32,Ity_F32, Ity_F32); + + case Iop_CmpF16: +@@ -5246,7 +5273,9 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy ) + break; + case Ist_MBE: + switch (stmt->Ist.MBE.event) { +- case Imbe_Fence: case Imbe_CancelReservation: ++ case Imbe_Fence: ++ case Imbe_CancelReservation: ++ case Imbe_InsnFence: + break; + default: sanityCheckFail(bb,stmt,"IRStmt.MBE.event: unknown"); + break; +diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c +index 482047c7a..98d4a81c6 100644 +--- a/VEX/priv/main_main.c ++++ b/VEX/priv/main_main.c +@@ -43,6 +43,7 @@ + #include "libvex_guest_s390x.h" + #include "libvex_guest_mips32.h" + #include "libvex_guest_mips64.h" ++#include "libvex_guest_loongarch64.h" + + #include "main_globals.h" + #include "main_util.h" +@@ -57,6 +58,7 @@ + #include "host_s390_defs.h" + #include "host_mips_defs.h" + #include "host_nanomips_defs.h" ++#include "host_loongarch64_defs.h" + + #include "guest_generic_bb_to_IR.h" + #include "guest_x86_defs.h" +@@ -67,6 +69,7 @@ + #include "guest_s390_defs.h" + #include "guest_mips_defs.h" + #include "guest_nanomips_defs.h" ++#include "guest_loongarch64_defs.h" + + #include "host_generic_simd128.h" + +@@ -163,6 +166,14 @@ + #define NANOMIPSST(f) vassert(0) + #endif + ++#if defined(VGA_loongarch64) || defined(VEXMULTIARCH) ++#define LOONGARCH64FN(f) f ++#define LOONGARCH64ST(f) f ++#else ++#define LOONGARCH64FN(f) NULL ++#define LOONGARCH64ST(f) vassert(0) ++#endif ++ + /* This file contains the top level interface to the library. */ + + /* --------- fwds ... --------- */ +@@ -541,6 +552,23 @@ IRSB* LibVEX_FrontEnd ( /*MOD*/ VexTranslateArgs* vta, + vassert(sizeof( ((VexGuestMIPS32State*)0)->guest_NRADDR ) == 4); + break; + ++ case VexArchLOONGARCH64: ++ preciseMemExnsFn ++ = LOONGARCH64FN(guest_loongarch64_state_requires_precise_mem_exns); ++ disInstrFn = LOONGARCH64FN(disInstr_LOONGARCH64); ++ specHelper = LOONGARCH64FN(guest_loongarch64_spechelper); ++ guest_layout = LOONGARCH64FN(&loongarch64Guest_layout); ++ offB_CMSTART = offsetof(VexGuestLOONGARCH64State, guest_CMSTART); ++ offB_CMLEN = offsetof(VexGuestLOONGARCH64State, guest_CMLEN); ++ offB_GUEST_IP = offsetof(VexGuestLOONGARCH64State, guest_PC); ++ szB_GUEST_IP = sizeof( ((VexGuestLOONGARCH64State*)0)->guest_PC ); ++ vassert(vta->archinfo_guest.endness == VexEndnessLE); ++ vassert(sizeof(VexGuestLOONGARCH64State) % LibVEX_GUEST_STATE_ALIGN == 0); ++ vassert(sizeof( ((VexGuestLOONGARCH64State*)0)->guest_CMSTART) == 8); ++ vassert(sizeof( ((VexGuestLOONGARCH64State*)0)->guest_CMLEN ) == 8); ++ vassert(sizeof( ((VexGuestLOONGARCH64State*)0)->guest_NRADDR ) == 8); ++ break; ++ + default: + vpanic("LibVEX_Translate: unsupported guest insn set"); + } +@@ -878,6 +906,14 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, + offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS32State,host_EvC_FAILADDR); + break; + ++ case VexArchLOONGARCH64: ++ preciseMemExnsFn ++ = LOONGARCH64FN(guest_loongarch64_state_requires_precise_mem_exns); ++ guest_sizeB = sizeof(VexGuestLOONGARCH64State); ++ offB_HOST_EvC_COUNTER = offsetof(VexGuestLOONGARCH64State, host_EvC_COUNTER); ++ offB_HOST_EvC_FAILADDR = offsetof(VexGuestLOONGARCH64State, host_EvC_FAILADDR); ++ break; ++ + default: + vpanic("LibVEX_Codegen: unsupported guest insn set"); + } +@@ -1052,6 +1088,23 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, + || vta->archinfo_host.endness == VexEndnessBE); + break; + ++ case VexArchLOONGARCH64: ++ mode64 = True; ++ rRegUniv = LOONGARCH64FN(getRRegUniverse_LOONGARCH64()); ++ getRegUsage ++ = CAST_TO_TYPEOF(getRegUsage) LOONGARCH64FN(getRegUsage_LOONGARCH64Instr); ++ mapRegs = CAST_TO_TYPEOF(mapRegs) LOONGARCH64FN(mapRegs_LOONGARCH64Instr); ++ genSpill = CAST_TO_TYPEOF(genSpill) LOONGARCH64FN(genSpill_LOONGARCH64); ++ genReload = CAST_TO_TYPEOF(genReload) LOONGARCH64FN(genReload_LOONGARCH64); ++ genMove = CAST_TO_TYPEOF(genMove) LOONGARCH64FN(genMove_LOONGARCH64); ++ ppInstr = CAST_TO_TYPEOF(ppInstr) LOONGARCH64FN(ppLOONGARCH64Instr); ++ ppReg = CAST_TO_TYPEOF(ppReg) LOONGARCH64FN(ppHRegLOONGARCH64); ++ iselSB = LOONGARCH64FN(iselSB_LOONGARCH64); ++ emit = CAST_TO_TYPEOF(emit) LOONGARCH64FN(emit_LOONGARCH64Instr); ++ vassert(vta->archinfo_host.endness == VexEndnessLE ++ || vta->archinfo_host.endness == VexEndnessBE); ++ break; ++ + default: + vpanic("LibVEX_Translate: unsupported host insn set"); + } +@@ -1297,6 +1350,11 @@ VexInvalRange LibVEX_Chain ( VexArch arch_host, + place_to_chain, + disp_cp_chain_me_EXPECTED, + place_to_jump_to)); ++ case VexArchLOONGARCH64: ++ LOONGARCH64ST(return chainXDirect_LOONGARCH64(endness_host, ++ place_to_chain, ++ disp_cp_chain_me_EXPECTED, ++ place_to_jump_to)); + default: + vassert(0); + } +@@ -1359,6 +1417,11 @@ VexInvalRange LibVEX_UnChain ( VexArch arch_host, + place_to_unchain, + place_to_jump_to_EXPECTED, + disp_cp_chain_me)); ++ case VexArchLOONGARCH64: ++ LOONGARCH64ST(return unchainXDirect_LOONGARCH64(endness_host, ++ place_to_unchain, ++ place_to_jump_to_EXPECTED, ++ disp_cp_chain_me)); + default: + vassert(0); + } +@@ -1389,6 +1452,8 @@ Int LibVEX_evCheckSzB ( VexArch arch_host ) + MIPS64ST(cached = evCheckSzB_MIPS()); break; + case VexArchNANOMIPS: + NANOMIPSST(cached = evCheckSzB_NANOMIPS()); break; ++ case VexArchLOONGARCH64: ++ LOONGARCH64ST(cached = evCheckSzB_LOONGARCH64()); break; + default: + vassert(0); + } +@@ -1432,6 +1497,10 @@ VexInvalRange LibVEX_PatchProfInc ( VexArch arch_host, + case VexArchNANOMIPS: + NANOMIPSST(return patchProfInc_NANOMIPS(endness_host, place_to_patch, + location_of_counter)); ++ case VexArchLOONGARCH64: ++ LOONGARCH64ST(return patchProfInc_LOONGARCH64(endness_host, ++ place_to_patch, ++ location_of_counter)); + default: + vassert(0); + } +@@ -1515,6 +1584,7 @@ const HChar* LibVEX_ppVexArch ( VexArch arch ) + case VexArchMIPS32: return "MIPS32"; + case VexArchMIPS64: return "MIPS64"; + case VexArchNANOMIPS: return "NANOMIPS"; ++ case VexArchLOONGARCH64: return "LOONGARCH64"; + default: return "VexArch???"; + } + } +@@ -1585,6 +1655,7 @@ static IRType arch_word_size (VexArch arch) { + case VexArchMIPS64: + case VexArchPPC64: + case VexArchS390X: ++ case VexArchLOONGARCH64: + return Ity_I64; + + default: +@@ -1925,6 +1996,38 @@ static const HChar* show_hwcaps_mips64 ( UInt hwcaps ) + return "Unsupported baseline"; + } + ++static const HChar* show_hwcaps_loongarch64 ( UInt hwcaps ) ++{ ++ static const HChar prefix[] = "loongarch64"; ++ static const struct { ++ UInt hwcaps_bit; ++ HChar name[16]; ++ } hwcaps_list[] = { ++ { VEX_HWCAPS_LOONGARCH_CPUCFG, "cpucfg" }, ++ { VEX_HWCAPS_LOONGARCH_LAM, "lam" }, ++ { VEX_HWCAPS_LOONGARCH_UAL, "ual" }, ++ { VEX_HWCAPS_LOONGARCH_FP, "fpu" }, ++ { VEX_HWCAPS_LOONGARCH_LSX, "lsx" }, ++ { VEX_HWCAPS_LOONGARCH_LASX, "lasx" }, ++ { VEX_HWCAPS_LOONGARCH_COMPLEX, "complex" }, ++ { VEX_HWCAPS_LOONGARCH_CRYPTO, "crypto" }, ++ { VEX_HWCAPS_LOONGARCH_LVZP, "lvz" }, ++ { VEX_HWCAPS_LOONGARCH_X86BT, "lbt_x86" }, ++ { VEX_HWCAPS_LOONGARCH_ARMBT, "lbt_arm" }, ++ { VEX_HWCAPS_LOONGARCH_MIPSBT, "lbt_mips" } ++ }; ++ static HChar buf[sizeof(prefix) + ++ NUM_HWCAPS * (sizeof hwcaps_list[0].name + 1) + 1]; // '\0' ++ ++ HChar *p = buf + vex_sprintf(buf, "%s", prefix); ++ UInt i; ++ for (i = 0 ; i < NUM_HWCAPS; ++i) { ++ if (hwcaps & hwcaps_list[i].hwcaps_bit) ++ p = p + vex_sprintf(p, "-%s", hwcaps_list[i].name); ++ } ++ return buf; ++} ++ + #undef NUM_HWCAPS + + /* Thie function must not return NULL. */ +@@ -1941,6 +2044,7 @@ static const HChar* show_hwcaps ( VexArch arch, UInt hwcaps ) + case VexArchS390X: return show_hwcaps_s390x(hwcaps); + case VexArchMIPS32: return show_hwcaps_mips32(hwcaps); + case VexArchMIPS64: return show_hwcaps_mips64(hwcaps); ++ case VexArchLOONGARCH64: return show_hwcaps_loongarch64(hwcaps); + default: return NULL; + } + } +@@ -2203,6 +2307,11 @@ static void check_hwcaps ( VexArch arch, UInt hwcaps ) + return; + invalid_hwcaps(arch, hwcaps, "Unsupported baseline\n"); + ++ case VexArchLOONGARCH64: ++ if (!(hwcaps & VEX_HWCAPS_LOONGARCH_ISA_64BIT)) ++ invalid_hwcaps(arch, hwcaps, "Unsupported baseline\n"); ++ return; ++ + default: + vpanic("unknown architecture"); + } +diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h +index ec50d52ca..88cb9e732 100644 +--- a/VEX/pub/libvex.h ++++ b/VEX/pub/libvex.h +@@ -60,6 +60,7 @@ typedef + VexArchMIPS32, + VexArchMIPS64, + VexArchNANOMIPS, ++ VexArchLOONGARCH64, + } + VexArch; + +@@ -299,6 +300,22 @@ typedef + (VEX_MIPS_PROC_ID(x) == VEX_PRID_IMP_P5600) && \ + (VEX_MIPS_HOST_FP_MODE(x))) + ++/* LoongArch baseline capability */ ++#define VEX_HWCAPS_LOONGARCH_CPUCFG (1 << 0) /* CPU has CPUCFG */ ++#define VEX_HWCAPS_LOONGARCH_LAM (1 << 1) /* CPU has Atomic instructions */ ++#define VEX_HWCAPS_LOONGARCH_UAL (1 << 2) /* CPU has Unaligned Access support */ ++#define VEX_HWCAPS_LOONGARCH_FP (1 << 3) /* CPU has FPU */ ++#define VEX_HWCAPS_LOONGARCH_LSX (1 << 4) /* CPU has 128-bit SIMD instructions */ ++#define VEX_HWCAPS_LOONGARCH_LASX (1 << 5) /* CPU has 256-bit SIMD instructions */ ++#define VEX_HWCAPS_LOONGARCH_COMPLEX (1 << 6) /* CPU has Complex instructions */ ++#define VEX_HWCAPS_LOONGARCH_CRYPTO (1 << 7) /* CPU has Crypto instructions */ ++#define VEX_HWCAPS_LOONGARCH_LVZP (1 << 8) /* CPU has Virtualization extension */ ++#define VEX_HWCAPS_LOONGARCH_X86BT (1 << 9) /* CPU has X86 Binary Translation */ ++#define VEX_HWCAPS_LOONGARCH_ARMBT (1 << 10) /* CPU has ARM Binary Translation */ ++#define VEX_HWCAPS_LOONGARCH_MIPSBT (1 << 11) /* CPU has MIPS Binary Translation */ ++#define VEX_HWCAPS_LOONGARCH_ISA_32BIT (1 << 30) /* 32-bit ISA */ ++#define VEX_HWCAPS_LOONGARCH_ISA_64BIT (1 << 31) /* 64-bit ISA */ ++ + /* These return statically allocated strings. */ + + extern const HChar* LibVEX_ppVexArch ( VexArch ); +@@ -419,6 +436,7 @@ void LibVEX_default_VexArchInfo ( /*OUT*/VexArchInfo* vai ); + guest is mips32 ==> applicable, default True + guest is mips64 ==> applicable, default True + guest is arm64 ==> applicable, default False ++ guest is loongarch64 ==> const True + + host_ppc_calls_use_fndescrs: + host is ppc32-linux ==> False +@@ -1025,6 +1043,10 @@ extern void LibVEX_InitIRI ( const IRICB * ); + ~~~~~ + r21 is GSP. + ++ loongarch64 ++ ~~~~~ ++ r31 is GSP. ++ + ALL GUEST ARCHITECTURES + ~~~~~~~~~~~~~~~~~~~~~~~ + The guest state must contain two pseudo-registers, guest_CMSTART +diff --git a/VEX/pub/libvex_basictypes.h b/VEX/pub/libvex_basictypes.h +index e3f1485d5..b4c81bf54 100644 +--- a/VEX/pub/libvex_basictypes.h ++++ b/VEX/pub/libvex_basictypes.h +@@ -198,6 +198,10 @@ typedef unsigned long HWord; + # define VEX_HOST_WORDSIZE 4 + # define VEX_REGPARM(_n) /* */ + ++#elif defined(__loongarch__) && (__loongarch_grlen == 64) ++# define VEX_HOST_WORDSIZE 8 ++# define VEX_REGPARM(_n) /* */ ++ + #else + # error "Vex: Fatal: Can't establish the host architecture" + #endif +diff --git a/VEX/pub/libvex_guest_loongarch64.h b/VEX/pub/libvex_guest_loongarch64.h +new file mode 100644 +index 000000000..36a6cb3ca +--- /dev/null ++++ b/VEX/pub/libvex_guest_loongarch64.h +@@ -0,0 +1,172 @@ ++ ++/*---------------------------------------------------------------*/ ++/*--- begin libvex_guest_loongarch64.h ---*/ ++/*---------------------------------------------------------------*/ ++ ++/* ++ This file is part of Valgrind, a dynamic binary instrumentation ++ framework. ++ ++ Copyright (C) 2021-2022 Loongson Technology Corporation Limited ++ ++ This program is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License as ++ published by the Free Software Foundation; either version 2 of the ++ License, or (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, see . ++ ++ The GNU General Public License is contained in the file COPYING. ++ ++ Neither the names of the U.S. Department of Energy nor the ++ University of California nor the names of its contributors may be ++ used to endorse or promote products derived from this software ++ without prior written permission. ++*/ ++ ++#ifndef __LIBVEX_PUB_GUEST_LOONGARCH64_H ++#define __LIBVEX_PUB_GUEST_LOONGARCH64_H ++ ++#include "libvex_basictypes.h" ++ ++ ++/*---------------------------------------------------------------*/ ++/*--- Vex's representation of the LOONGARCH64 CPU state. ---*/ ++/*---------------------------------------------------------------*/ ++ ++typedef ++ struct { ++ /* Event check fail addr and counter. */ ++ ULong host_EvC_FAILADDR; ++ UInt host_EvC_COUNTER; ++ UInt _padding0; ++ ++ /* CPU Registers */ ++ ULong guest_R0; /* Constant zero */ ++ ULong guest_R1; /* Return address */ ++ ULong guest_R2; /* Thread pointer */ ++ ULong guest_R3; /* Stack pointer */ ++ ULong guest_R4; /* Argument registers / Return value */ ++ ULong guest_R5; ++ ULong guest_R6; /* Argument registers */ ++ ULong guest_R7; ++ ULong guest_R8; ++ ULong guest_R9; ++ ULong guest_R10; ++ ULong guest_R11; ++ ULong guest_R12; /* Temporary registers */ ++ ULong guest_R13; ++ ULong guest_R14; ++ ULong guest_R15; ++ ULong guest_R16; ++ ULong guest_R17; ++ ULong guest_R18; ++ ULong guest_R19; ++ ULong guest_R20; ++ ULong guest_R21; /* Reserved */ ++ ULong guest_R22; /* Frame pointer / Static register */ ++ ULong guest_R23; /* Static registers */ ++ ULong guest_R24; ++ ULong guest_R25; ++ ULong guest_R26; ++ ULong guest_R27; ++ ULong guest_R28; ++ ULong guest_R29; ++ ULong guest_R30; ++ ULong guest_R31; ++ ++ ULong guest_PC; /* Program counter */ ++ ++ UChar guest_FCC0; /* Condition Flag Registers */ ++ UChar guest_FCC1; ++ UChar guest_FCC2; ++ UChar guest_FCC3; ++ UChar guest_FCC4; ++ UChar guest_FCC5; ++ UChar guest_FCC6; ++ UChar guest_FCC7; ++ UInt guest_FCSR; /* FP/SIMD Control and Status Register */ ++ ++ /* Various pseudo-regs mandated by Vex or Valgrind. */ ++ /* Emulation notes */ ++ UInt guest_EMNOTE; ++ ++ /* For clflush: record start and length of area to invalidate */ ++ ULong guest_CMSTART; ++ ULong guest_CMLEN; ++ ++ /* Used to record the unredirected guest address at the start of ++ a translation whose start has been redirected. By reading ++ this pseudo-register shortly afterwards, the translation can ++ find out what the corresponding no-redirection address was. ++ Note, this is only set for wrap-style redirects, not for ++ replace-style ones. */ ++ ULong guest_NRADDR; ++ ++ /* Fallback LL/SC support. */ ++ ULong guest_LLSC_SIZE; /* 0==no transaction, else 4 or 8. */ ++ ULong guest_LLSC_ADDR; /* Address of the transaction. */ ++ ULong guest_LLSC_DATA; /* Original value at ADDR. */ ++ ++ ULong _padding1; ++ ++ /* FPU/SIMD Registers */ ++ U256 guest_X0; ++ U256 guest_X1; ++ U256 guest_X2; ++ U256 guest_X3; ++ U256 guest_X4; ++ U256 guest_X5; ++ U256 guest_X6; ++ U256 guest_X7; ++ U256 guest_X8; ++ U256 guest_X9; ++ U256 guest_X10; ++ U256 guest_X11; ++ U256 guest_X12; ++ U256 guest_X13; ++ U256 guest_X14; ++ U256 guest_X15; ++ U256 guest_X16; ++ U256 guest_X17; ++ U256 guest_X18; ++ U256 guest_X19; ++ U256 guest_X20; ++ U256 guest_X21; ++ U256 guest_X22; ++ U256 guest_X23; ++ U256 guest_X24; ++ U256 guest_X25; ++ U256 guest_X26; ++ U256 guest_X27; ++ U256 guest_X28; ++ U256 guest_X29; ++ U256 guest_X30; ++ U256 guest_X31; ++ ++ /* VexGuestLOONGARCH64State should have a 16-aligned size */ ++} VexGuestLOONGARCH64State; ++ ++/*---------------------------------------------------------------*/ ++/*--- Utility functions for LOONGARCH64 guest stuff. ---*/ ++/*---------------------------------------------------------------*/ ++ ++/* ALL THE FOLLOWING ARE VISIBLE TO LIBRARY CLIENT. */ ++ ++/* Initialise all guest LOONGARCH64 state. */ ++ ++extern ++void LibVEX_GuestLOONGARCH64_initialise ( /*OUT*/ ++ VexGuestLOONGARCH64State* vex_state ); ++ ++#endif /* ndef __LIBVEX_PUB_GUEST_LOONGARCH64_H */ ++ ++/*---------------------------------------------------------------*/ ++/*--- libvex_guest_loongarch64.h ---*/ ++/*---------------------------------------------------------------*/ +diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h +index 85805bb69..afe8dfd29 100644 +--- a/VEX/pub/libvex_ir.h ++++ b/VEX/pub/libvex_ir.h +@@ -588,10 +588,10 @@ typedef + + /* Binary operations, with rounding. */ + /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ +- Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64, ++ Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64, Iop_ScaleBF64, + + /* :: IRRoundingMode(I32) x F32 x F32 -> F32 */ +- Iop_AddF32, Iop_SubF32, Iop_MulF32, Iop_DivF32, ++ Iop_AddF32, Iop_SubF32, Iop_MulF32, Iop_DivF32, Iop_ScaleBF32, + + /* Variants of the above which produce a 64-bit result but which + round their result to a IEEE float range first. */ +@@ -610,10 +610,10 @@ typedef + + /* Unary operations, with rounding. */ + /* :: IRRoundingMode(I32) x F64 -> F64 */ +- Iop_SqrtF64, ++ Iop_SqrtF64, Iop_RSqrtF64, Iop_LogBF64, + + /* :: IRRoundingMode(I32) x F32 -> F32 */ +- Iop_SqrtF32, ++ Iop_SqrtF32, Iop_RSqrtF32, Iop_LogBF32, + + /* :: IRRoundingMode(I32) x F16 -> F16 */ + Iop_SqrtF16, +@@ -829,10 +829,14 @@ typedef + + /* --------- Possibly required by IEEE 754-2008. --------- */ + +- Iop_MaxNumF64, /* max, F64, numerical operand if other is a qNaN */ +- Iop_MinNumF64, /* min, F64, ditto */ +- Iop_MaxNumF32, /* max, F32, ditto */ +- Iop_MinNumF32, /* min, F32, ditto */ ++ Iop_MaxNumF64, /* max, F64, numerical operand if other is a qNaN */ ++ Iop_MinNumF64, /* min, F64, ditto */ ++ Iop_MaxNumAbsF64, /* max abs, F64, ditto */ ++ Iop_MinNumAbsF64, /* min abs, F64, ditto */ ++ Iop_MaxNumF32, /* max, F32, ditto */ ++ Iop_MinNumF32, /* min, F32, ditto */ ++ Iop_MaxNumAbsF32, /* max abs, F32, ditto */ ++ Iop_MinNumAbsF32, /* min abs, F32, ditto */ + + /* ------------------ 16-bit scalar FP ------------------ */ + +@@ -2503,6 +2507,7 @@ typedef + Ijk_SigFPE, /* current instruction synths generic SIGFPE */ + Ijk_SigFPE_IntDiv, /* current instruction synths SIGFPE - IntDiv */ + Ijk_SigFPE_IntOvf, /* current instruction synths SIGFPE - IntOvf */ ++ Ijk_SigSYS, /* current instruction synths SIGSYS */ + /* Unfortunately, various guest-dependent syscall kinds. They + all mean: do a syscall before continuing. */ + Ijk_Sys_syscall, /* amd64/x86 'syscall', ppc 'sc', arm 'svc #0' */ +@@ -2662,7 +2667,12 @@ typedef + /* Needed only on ARM. It cancels a reservation made by a + preceding Linked-Load, and needs to be handed through to the + back end, just as LL and SC themselves are. */ +- Imbe_CancelReservation ++ Imbe_CancelReservation, ++ /* Needed only on LOONGARCH64. It completes the synchronization ++ between the store operation and the instruction fetch operation ++ within a single processor core, and needs to be handed through ++ to the back end. */ ++ Imbe_InsnFence + } + IRMBusEvent; + +diff --git a/VEX/pub/libvex_trc_values.h b/VEX/pub/libvex_trc_values.h +index cfd54ded3..90e2b60af 100644 +--- a/VEX/pub/libvex_trc_values.h ++++ b/VEX/pub/libvex_trc_values.h +@@ -57,6 +57,7 @@ + continuing */ + #define VEX_TRC_JMP_SIGBUS 93 /* deliver SIGBUS before continuing */ + #define VEX_TRC_JMP_SIGFPE 105 /* deliver SIGFPE before continuing */ ++#define VEX_TRC_JMP_SIGSYS 115 /* deliver SIGSYS before continuing */ + + #define VEX_TRC_JMP_SIGFPE_INTDIV 97 /* deliver SIGFPE (integer divide + by zero) before continuing */ +diff --git a/cachegrind/cg_arch.c b/cachegrind/cg_arch.c +index 52e898218..1d906464e 100644 +--- a/cachegrind/cg_arch.c ++++ b/cachegrind/cg_arch.c +@@ -475,6 +475,13 @@ configure_caches(cache_t *I1c, cache_t *D1c, cache_t *LLc, + *D1c = (cache_t) { 65536, 2, 64 }; + *LLc = (cache_t) { 262144, 8, 64 }; + ++#elif defined(VGA_loongarch64) ++ ++ // Set caches to default (for LOONGARCH64 - 3A5000) ++ *I1c = (cache_t) { 65536, 4, 64 }; ++ *D1c = (cache_t) { 65536, 4, 64 }; ++ *LLc = (cache_t) { 262144, 16, 64 }; ++ + #else + + #error "Unknown arch" +diff --git a/cachegrind/cg_branchpred.c b/cachegrind/cg_branchpred.c +index ba433ec2c..0d91b29cd 100644 +--- a/cachegrind/cg_branchpred.c ++++ b/cachegrind/cg_branchpred.c +@@ -44,7 +44,7 @@ + guaranteed to be zero? */ + #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \ + || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips) \ +- || defined(VGA_arm64) ++ || defined(VGA_arm64) || defined(VGA_loongarch64) + # define N_IADDR_LO_ZERO_BITS 2 + #elif defined(VGA_x86) || defined(VGA_amd64) + # define N_IADDR_LO_ZERO_BITS 0 +diff --git a/configure.ac b/configure.ac +index 79b17f394..7cb42252a 100755 +--- a/configure.ac ++++ b/configure.ac +@@ -310,6 +310,11 @@ case "${host_cpu}" in + ARCH_MAX="nanomips" + ;; + ++ loongarch64*) ++ AC_MSG_RESULT([ok (${host_cpu})]) ++ ARCH_MAX="loongarch64" ++ ;; ++ + *) + AC_MSG_RESULT([no (${host_cpu})]) + AC_MSG_ERROR([Unsupported host architecture. Sorry]) +@@ -941,6 +946,17 @@ case "$ARCH_MAX-$VGCONF_OS" in + valt_load_address_sec_inner="0xUNSET" + AC_MSG_RESULT([ok (${ARCH_MAX}-${VGCONF_OS})]) + ;; ++ loongarch64-linux) ++ VGCONF_ARCH_PRI="loongarch64" ++ VGCONF_ARCH_SEC="" ++ VGCONF_PLATFORM_PRI_CAPS="LOONGARCH64_LINUX" ++ VGCONF_PLATFORM_SEC_CAPS="" ++ valt_load_address_pri_norml="0x58000000" ++ valt_load_address_pri_inner="0x38000000" ++ valt_load_address_sec_norml="0xUNSET" ++ valt_load_address_sec_inner="0xUNSET" ++ AC_MSG_RESULT([ok (${ARCH_MAX}-${VGCONF_OS})]) ++ ;; + x86-solaris) + VGCONF_ARCH_PRI="x86" + VGCONF_ARCH_SEC="" +@@ -1033,6 +1049,8 @@ AM_CONDITIONAL(VGCONF_ARCHS_INCLUDE_MIPS64, + test x$VGCONF_PLATFORM_PRI_CAPS = xMIPS64_LINUX ) + AM_CONDITIONAL(VGCONF_ARCHS_INCLUDE_NANOMIPS, + test x$VGCONF_PLATFORM_PRI_CAPS = xNANOMIPS_LINUX ) ++AM_CONDITIONAL(VGCONF_ARCHS_INCLUDE_LOONGARCH64, ++ test x$VGCONF_PLATFORM_PRI_CAPS = xLOONGARCH64_LINUX ) + + # Set up VGCONF_PLATFORMS_INCLUDE_. Either one or two of these + # become defined. +@@ -1063,6 +1081,8 @@ AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_MIPS64_LINUX, + test x$VGCONF_PLATFORM_PRI_CAPS = xMIPS64_LINUX) + AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_NANOMIPS_LINUX, + test x$VGCONF_PLATFORM_PRI_CAPS = xNANOMIPS_LINUX) ++AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_LOONGARCH64_LINUX, ++ test x$VGCONF_PLATFORM_PRI_CAPS = xLOONGARCH64_LINUX) + AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_X86_FREEBSD, + test x$VGCONF_PLATFORM_PRI_CAPS = xX86_FREEBSD \ + -o x$VGCONF_PLATFORM_SEC_CAPS = xX86_FREEBSD) +@@ -1094,7 +1114,8 @@ AM_CONDITIONAL(VGCONF_OS_IS_LINUX, + -o x$VGCONF_PLATFORM_PRI_CAPS = xS390X_LINUX \ + -o x$VGCONF_PLATFORM_PRI_CAPS = xMIPS32_LINUX \ + -o x$VGCONF_PLATFORM_PRI_CAPS = xMIPS64_LINUX \ +- -o x$VGCONF_PLATFORM_PRI_CAPS = xNANOMIPS_LINUX) ++ -o x$VGCONF_PLATFORM_PRI_CAPS = xNANOMIPS_LINUX \ ++ -o x$VGCONF_PLATFORM_PRI_CAPS = xLOONGARCH64_LINUX) + AM_CONDITIONAL(VGCONF_OS_IS_FREEBSD, + test x$VGCONF_PLATFORM_PRI_CAPS = xX86_FREEBSD \ + -o x$VGCONF_PLATFORM_PRI_CAPS = xAMD64_FREEBSD) +@@ -4908,7 +4929,8 @@ elif test x$VGCONF_PLATFORM_PRI_CAPS = xAMD64_LINUX \ + -o x$VGCONF_PLATFORM_PRI_CAPS = xPPC64_LINUX \ + -o x$VGCONF_PLATFORM_PRI_CAPS = xARM64_LINUX \ + -o x$VGCONF_PLATFORM_PRI_CAPS = xMIPS64_LINUX \ +- -o x$VGCONF_PLATFORM_PRI_CAPS = xS390X_LINUX ; then ++ -o x$VGCONF_PLATFORM_PRI_CAPS = xS390X_LINUX \ ++ -o x$VGCONF_PLATFORM_PRI_CAPS = xLOONGARCH64_LINUX; then + mflag_primary=$FLAG_M64 + elif test x$VGCONF_PLATFORM_PRI_CAPS = xX86_DARWIN ; then + mflag_primary="$FLAG_M32 -arch i386" +@@ -5398,6 +5420,7 @@ AC_CONFIG_FILES([ + memcheck/tests/amd64-linux/Makefile + memcheck/tests/arm64-linux/Makefile + memcheck/tests/x86-linux/Makefile ++ memcheck/tests/loongarch64-linux/Makefile + memcheck/tests/amd64-solaris/Makefile + memcheck/tests/x86-solaris/Makefile + memcheck/tests/amd64-freebsd/Makefile +@@ -5443,6 +5466,7 @@ AC_CONFIG_FILES([ + none/tests/mips32/Makefile + none/tests/mips64/Makefile + none/tests/nanomips/Makefile ++ none/tests/loongarch64/Makefile + none/tests/linux/Makefile + none/tests/darwin/Makefile + none/tests/solaris/Makefile +diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am +index 80115f21f..8703f9fa0 100644 +--- a/coregrind/Makefile.am ++++ b/coregrind/Makefile.am +@@ -387,6 +387,7 @@ COREGRIND_SOURCES_COMMON = \ + m_dispatch/dispatch-mips32-linux.S \ + m_dispatch/dispatch-mips64-linux.S \ + m_dispatch/dispatch-nanomips-linux.S \ ++ m_dispatch/dispatch-loongarch64-linux.S \ + m_dispatch/dispatch-x86-freebsd.S \ + m_dispatch/dispatch-amd64-freebsd.S \ + m_dispatch/dispatch-x86-darwin.S \ +@@ -411,6 +412,7 @@ COREGRIND_SOURCES_COMMON = \ + m_gdbserver/valgrind-low-mips32.c \ + m_gdbserver/valgrind-low-mips64.c \ + m_gdbserver/valgrind-low-nanomips.c \ ++ m_gdbserver/valgrind-low-loongarch64.c \ + m_gdbserver/version.c \ + m_initimg/initimg-linux.c \ + m_initimg/initimg-freebsd.c \ +@@ -438,6 +440,7 @@ COREGRIND_SOURCES_COMMON = \ + m_sigframe/sigframe-mips32-linux.c \ + m_sigframe/sigframe-mips64-linux.c \ + m_sigframe/sigframe-nanomips-linux.c \ ++ m_sigframe/sigframe-loongarch64-linux.c \ + m_sigframe/sigframe-x86-darwin.c \ + m_sigframe/sigframe-amd64-darwin.c \ + m_sigframe/sigframe-solaris.c \ +@@ -452,6 +455,7 @@ COREGRIND_SOURCES_COMMON = \ + m_syswrap/syscall-mips32-linux.S \ + m_syswrap/syscall-mips64-linux.S \ + m_syswrap/syscall-nanomips-linux.S \ ++ m_syswrap/syscall-loongarch64-linux.S \ + m_syswrap/syscall-x86-freebsd.S \ + m_syswrap/syscall-amd64-freebsd.S \ + m_syswrap/syscall-x86-darwin.S \ +@@ -477,6 +481,7 @@ COREGRIND_SOURCES_COMMON = \ + m_syswrap/syswrap-mips32-linux.c \ + m_syswrap/syswrap-mips64-linux.c \ + m_syswrap/syswrap-nanomips-linux.c \ ++ m_syswrap/syswrap-loongarch64-linux.c \ + m_syswrap/syswrap-x86-darwin.c \ + m_syswrap/syswrap-amd64-darwin.c \ + m_syswrap/syswrap-xen.c \ +@@ -761,7 +766,15 @@ GDBSERVER_XML_FILES = \ + m_gdbserver/mips64-linux-valgrind.xml \ + m_gdbserver/mips64-fpu-valgrind-s1.xml \ + m_gdbserver/mips64-fpu-valgrind-s2.xml \ +- m_gdbserver/mips64-fpu.xml ++ m_gdbserver/mips64-fpu.xml \ ++ m_gdbserver/loongarch-base64.xml \ ++ m_gdbserver/loongarch-fpu64.xml \ ++ m_gdbserver/loongarch64-linux.xml \ ++ m_gdbserver/loongarch-base64-valgrind-s1.xml \ ++ m_gdbserver/loongarch-base64-valgrind-s2.xml \ ++ m_gdbserver/loongarch-fpu64-valgrind-s1.xml \ ++ m_gdbserver/loongarch-fpu64-valgrind-s2.xml \ ++ m_gdbserver/loongarch64-linux-valgrind.xml + + # so as to make sure these get copied into the install tree + vglibdir = $(pkglibexecdir) +diff --git a/coregrind/launcher-linux.c b/coregrind/launcher-linux.c +index bc95e3c11..5307fd13d 100644 +--- a/coregrind/launcher-linux.c ++++ b/coregrind/launcher-linux.c +@@ -67,6 +67,10 @@ + #define EM_NANOMIPS 249 + #endif + ++#ifndef EM_LOONGARCH ++#define EM_LOONGARCH 258 ++#endif ++ + #ifndef E_MIPS_ABI_O32 + #define E_MIPS_ABI_O32 0x00001000 + #endif +@@ -314,6 +318,10 @@ static const char *select_platform(const char *clientname) + (header.ehdr64.e_ident[EI_OSABI] == ELFOSABI_SYSV || + header.ehdr64.e_ident[EI_OSABI] == ELFOSABI_LINUX)) { + platform = "ppc64le-linux"; ++ } else if (header.ehdr64.e_machine == EM_LOONGARCH && ++ (header.ehdr64.e_ident[EI_OSABI] == ELFOSABI_SYSV || ++ header.ehdr64.e_ident[EI_OSABI] == ELFOSABI_LINUX)) { ++ platform = "loongarch64-linux"; + } + } else if (header.c[EI_DATA] == ELFDATA2MSB) { + # if !defined(VGPV_arm_linux_android) \ +@@ -415,7 +423,8 @@ int main(int argc, char** argv, char** envp) + (0==strcmp(VG_PLATFORM,"s390x-linux")) || + (0==strcmp(VG_PLATFORM,"mips32-linux")) || + (0==strcmp(VG_PLATFORM,"mips64-linux")) || +- (0==strcmp(VG_PLATFORM,"nanomips-linux"))) ++ (0==strcmp(VG_PLATFORM,"nanomips-linux")) || ++ (0==strcmp(VG_PLATFORM,"loongarch64-linux"))) + default_platform = VG_PLATFORM; + # elif defined(VGO_solaris) + if ((0==strcmp(VG_PLATFORM,"x86-solaris")) || +diff --git a/coregrind/m_aspacemgr/aspacemgr-common.c b/coregrind/m_aspacemgr/aspacemgr-common.c +index 816d2274f..0e79d3f3c 100644 +--- a/coregrind/m_aspacemgr/aspacemgr-common.c ++++ b/coregrind/m_aspacemgr/aspacemgr-common.c +@@ -157,7 +157,8 @@ SysRes VG_(am_do_mmap_NO_NOTIFY)( Addr start, SizeT length, UInt prot, + # elif defined(VGP_amd64_linux) \ + || defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux) \ + || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \ +- || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) ++ || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \ ++ || defined(VGP_loongarch64_linux) + res = VG_(do_syscall6)(__NR_mmap, (UWord)start, length, + prot, flags, fd, offset); + # elif defined(VGP_x86_darwin) +@@ -262,7 +263,8 @@ SysRes ML_(am_do_relocate_nooverlap_mapping_NO_NOTIFY)( + + SysRes ML_(am_open) ( const HChar* pathname, Int flags, Int mode ) + { +-# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) ++# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGP_loongarch64_linux) + /* ARM64 wants to use __NR_openat rather than __NR_open. */ + SysRes res = VG_(do_syscall4)(__NR_openat, + VKI_AT_FDCWD, (UWord)pathname, flags, mode); +@@ -291,7 +293,8 @@ void ML_(am_close) ( Int fd ) + Int ML_(am_readlink)(const HChar* path, HChar* buf, UInt bufsiz) + { + SysRes res; +-# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) ++# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGP_loongarch64_linux) + res = VG_(do_syscall4)(__NR_readlinkat, VKI_AT_FDCWD, + (UWord)path, (UWord)buf, bufsiz); + # elif defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd) +diff --git a/coregrind/m_aspacemgr/aspacemgr-linux.c b/coregrind/m_aspacemgr/aspacemgr-linux.c +index ae38d8bd0..021c5a267 100644 +--- a/coregrind/m_aspacemgr/aspacemgr-linux.c ++++ b/coregrind/m_aspacemgr/aspacemgr-linux.c +@@ -2773,7 +2773,8 @@ static SysRes VG_(am_mmap_file_float_valgrind_flags) ( SizeT length, UInt prot, + req.rkind = MAny; + req.start = 0; + #if defined(VGA_arm) || defined(VGA_arm64) \ +- || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips) ++ || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips) \ ++ || defined(VGA_loongarch64) + aspacem_assert(VKI_SHMLBA >= VKI_PAGE_SIZE); + #else + aspacem_assert(VKI_SHMLBA == VKI_PAGE_SIZE); +diff --git a/coregrind/m_cache.c b/coregrind/m_cache.c +index 428a4df43..38fa44ea1 100644 +--- a/coregrind/m_cache.c ++++ b/coregrind/m_cache.c +@@ -660,6 +660,239 @@ get_cache_info(VexArchInfo *vai) + return True; + } + ++#elif defined(VGA_loongarch64) ++ ++/* ++ * LoongArch method is straightforward, just extract appropriate bits via ++ * cpucfg instruction (__builtin_loongarch_cpucfg). ++ * ++ * 1. Get the properties of the cache from cpucfg16. ++ * 2. For each level of cache, get the properties from cpucfg17/18/19/20. ++ * ++ * It's a bit nasty since we have to get the total number of caches first. ++ * To avoid duplicating reads, I use "struct cache_status" to store some ++ * necessary information. ++ */ ++ ++#define BIT(x) (1UL << (x)) ++#define GENMASK(h, l) (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (64 - 1 - (h)))) ++ ++#define LOONGARCH_CPUCFG16 0x10 ++#define CPUCFG16_L1_IUPRE BIT(0) ++#define CPUCFG16_L1_IUUNIFY BIT(1) ++#define CPUCFG16_L1_DPRE BIT(2) ++#define CPUCFG16_L2_IUPRE BIT(3) ++#define CPUCFG16_L2_IUUNIFY BIT(4) ++#define CPUCFG16_L2_DPRE BIT(7) ++#define CPUCFG16_L3_IUPRE BIT(10) ++#define CPUCFG16_L3_IUUNIFY BIT(11) ++#define CPUCFG16_L3_DPRE BIT(14) ++ ++#define LOONGARCH_CPUCFG17 0x11 ++#define CPUCFG17_L1I_WAYS_M GENMASK(15, 0) ++#define CPUCFG17_L1I_SETS_M GENMASK(23, 16) ++#define CPUCFG17_L1I_SIZE_M GENMASK(30, 24) ++#define CPUCFG17_L1I_WAYS 0 ++#define CPUCFG17_L1I_SETS 16 ++#define CPUCFG17_L1I_SIZE 24 ++ ++#define LOONGARCH_CPUCFG18 0x12 ++#define CPUCFG18_L1D_WAYS_M GENMASK(15, 0) ++#define CPUCFG18_L1D_SETS_M GENMASK(23, 16) ++#define CPUCFG18_L1D_SIZE_M GENMASK(30, 24) ++#define CPUCFG18_L1D_WAYS 0 ++#define CPUCFG18_L1D_SETS 16 ++#define CPUCFG18_L1D_SIZE 24 ++ ++#define LOONGARCH_CPUCFG19 0x13 ++#define CPUCFG19_L2_WAYS_M GENMASK(15, 0) ++#define CPUCFG19_L2_SETS_M GENMASK(23, 16) ++#define CPUCFG19_L2_SIZE_M GENMASK(30, 24) ++#define CPUCFG19_L2_WAYS 0 ++#define CPUCFG19_L2_SETS 16 ++#define CPUCFG19_L2_SIZE 24 ++ ++#define LOONGARCH_CPUCFG20 0x14 ++#define CPUCFG20_L3_WAYS_M GENMASK(15, 0) ++#define CPUCFG20_L3_SETS_M GENMASK(23, 16) ++#define CPUCFG20_L3_SIZE_M GENMASK(30, 24) ++#define CPUCFG20_L3_WAYS 0 ++#define CPUCFG20_L3_SETS 16 ++#define CPUCFG20_L3_SIZE 24 ++ ++struct cache_status { ++ Bool has_iu; ++ Bool is_u; ++ Bool has_d; ++ Bool exist; ++ UInt num; ++}; ++ ++static inline UInt ++cpucfg(UInt reg) ++{ ++ return (UInt)__builtin_loongarch_cpucfg(reg); ++} ++ ++static void ++get_status(struct cache_status status[], UInt n) ++{ ++ Bool has_iu = status[n].has_iu; ++ Bool is_u = status[n].is_u; ++ Bool has_d = status[n].has_d; ++ ++ /* has_d only works with no ucache */ ++ status[n].has_d = has_d = toBool(!(has_iu && is_u) && has_d); ++ ++ status[n].exist = toBool(has_iu || has_d); ++ status[n].num = has_iu + has_d; ++} ++ ++static void ++get_cache(VexCacheInfo *ci, VexCacheKind kind, UInt level, ++ UInt line_size, UInt sets, UInt ways, UInt index) ++{ ++ UInt assoc = ways; ++ UInt size = sets * ways * line_size; ++ ci->caches[index] = VEX_CACHE_INIT(kind, level, size, line_size, assoc); ++} ++ ++static void ++get_cache_info_for_l1(VexCacheInfo *ci, struct cache_status status[]) ++{ ++ UInt config; ++ UInt line_size, sets, ways; ++ UInt index = 0; ++ ++ if (!status[0].exist) ++ return; ++ ++ if (status[0].has_iu) { ++ config = cpucfg(LOONGARCH_CPUCFG17); ++ line_size = 1 << ((config & CPUCFG17_L1I_SIZE_M) >> CPUCFG17_L1I_SIZE); ++ sets = 1 << ((config & CPUCFG17_L1I_SETS_M) >> CPUCFG17_L1I_SETS); ++ ways = ((config & CPUCFG17_L1I_WAYS_M) >> CPUCFG17_L1I_WAYS) + 1; ++ get_cache(ci, status[0].is_u ? UNIFIED_CACHE : INSN_CACHE, ++ 1, line_size, sets, ways, index++); ++ } ++ ++ if (status[0].has_d) { ++ config = cpucfg(LOONGARCH_CPUCFG18); ++ line_size = 1 << ((config & CPUCFG18_L1D_SIZE_M) >> CPUCFG18_L1D_SIZE); ++ sets = 1 << ((config & CPUCFG18_L1D_SETS_M) >> CPUCFG18_L1D_SETS); ++ ways = ((config & CPUCFG18_L1D_WAYS_M) >> CPUCFG18_L1D_WAYS) + 1; ++ get_cache(ci, DATA_CACHE, 1, line_size, sets, ways, index++); ++ } ++ ++ /* Sanity check */ ++ vg_assert(index == status[0].num); ++} ++ ++static void ++get_cache_info_for_l2(VexCacheInfo *ci, struct cache_status status[]) ++{ ++ UInt config; ++ UInt line_size, sets, ways; ++ UInt index = status[0].num; ++ ++ if (!status[1].exist) ++ return; ++ ++ config = cpucfg(LOONGARCH_CPUCFG19); ++ line_size = 1 << ((config & CPUCFG19_L2_SIZE_M) >> CPUCFG19_L2_SIZE); ++ sets = 1 << ((config & CPUCFG19_L2_SETS_M) >> CPUCFG19_L2_SETS); ++ ways = ((config & CPUCFG19_L2_WAYS_M) >> CPUCFG19_L2_WAYS) + 1; ++ ++ if (status[1].has_iu) ++ get_cache(ci, status[1].is_u ? UNIFIED_CACHE : INSN_CACHE, ++ 2, line_size, sets, ways, index++); ++ ++ if (status[1].has_d) ++ get_cache(ci, DATA_CACHE, 2, line_size, sets, ways, index++); ++ ++ /* Sanity check */ ++ vg_assert(index == status[0].num + status[1].num); ++} ++ ++static void ++get_cache_info_for_l3(VexCacheInfo *ci, struct cache_status status[]) ++{ ++ UInt config; ++ UInt line_size, sets, ways; ++ UInt index = status[0].num + status[1].num; ++ ++ if (!status[2].exist) ++ return; ++ ++ config = cpucfg(LOONGARCH_CPUCFG20); ++ line_size = 1 << ((config & CPUCFG20_L3_SIZE_M) >> CPUCFG20_L3_SIZE); ++ sets = 1 << ((config & CPUCFG20_L3_SETS_M) >> CPUCFG20_L3_SETS); ++ ways = ((config & CPUCFG20_L3_WAYS_M) >> CPUCFG20_L3_WAYS) + 1; ++ ++ if (status[2].has_iu) ++ get_cache(ci, status[2].is_u ? UNIFIED_CACHE : INSN_CACHE, ++ 3, line_size, sets, ways, index++); ++ ++ if (status[2].has_d) ++ get_cache(ci, DATA_CACHE, 3, line_size, sets, ways, index++); ++ ++ /* Sanity check */ ++ vg_assert(index == status[0].num + status[1].num + status[2].num); ++} ++ ++static Bool ++get_cache_info_from_cpucfg(VexCacheInfo *ci) ++{ ++ Int i; ++ struct cache_status status[3]; ++ UInt config = cpucfg(LOONGARCH_CPUCFG16); ++ ++ /* NB: Bool is unsigned char! */ ++ /* For l1 */ ++ status[0].has_iu = toBool(config & CPUCFG16_L1_IUPRE); ++ status[0].is_u = toBool(config & CPUCFG16_L1_IUUNIFY); ++ status[0].has_d = toBool(config & CPUCFG16_L1_DPRE); ++ get_status(status, 0); ++ ++ /* For l2 */ ++ status[1].has_iu = toBool(config & CPUCFG16_L2_IUPRE); ++ status[1].is_u = toBool(config & CPUCFG16_L2_IUUNIFY); ++ status[1].has_d = toBool(config & CPUCFG16_L2_DPRE); ++ get_status(status, 1); ++ ++ /* For l3 */ ++ status[2].has_iu = toBool(config & CPUCFG16_L3_IUPRE); ++ status[2].is_u = toBool(config & CPUCFG16_L3_IUUNIFY); ++ status[2].has_d = toBool(config & CPUCFG16_L3_DPRE); ++ get_status(status, 2); ++ ++ ci->num_levels = 0; ++ ci->num_caches = 0; ++ for (i = 0; i < 3; i++) { ++ ci->num_levels += status[i].exist; ++ ci->num_caches += status[i].num; ++ } ++ ++ if (ci->num_caches == 0) { ++ VG_(debugLog)(1, "cache", "Autodetect failed\n"); ++ return False; ++ } ++ ++ ci->caches = VG_(malloc)("m_cache", ci->num_caches * sizeof(VexCache)); ++ get_cache_info_for_l1(ci, status); ++ get_cache_info_for_l2(ci, status); ++ get_cache_info_for_l3(ci, status); ++ return True; ++} ++ ++static Bool ++get_cache_info(VexArchInfo *vai) ++{ ++ VexCacheInfo *ci = &vai->hwcache_info; ++ ci->icaches_maintain_coherence = True; ++ return get_cache_info_from_cpucfg(ci); ++} ++ + #else + + #error "Unknown arch" +diff --git a/coregrind/m_coredump/coredump-elf.c b/coregrind/m_coredump/coredump-elf.c +index 4a8c29c52..82b1b436a 100644 +--- a/coregrind/m_coredump/coredump-elf.c ++++ b/coregrind/m_coredump/coredump-elf.c +@@ -489,6 +489,40 @@ static void fill_prstatus(const ThreadState *tst, + regs[VKI_MIPS32_EF_CP0_STATUS] = arch->vex.guest_CP0_status; + regs[VKI_MIPS32_EF_CP0_EPC] = arch->vex.guest_PC; + # undef DO ++#elif defined(VGP_loongarch64_linux) ++ regs->regs[0] = arch->vex.guest_R0; ++ regs->regs[1] = arch->vex.guest_R1; ++ regs->regs[2] = arch->vex.guest_R2; ++ regs->regs[3] = arch->vex.guest_R3; ++ regs->regs[4] = arch->vex.guest_R4; ++ regs->regs[5] = arch->vex.guest_R5; ++ regs->regs[6] = arch->vex.guest_R6; ++ regs->regs[7] = arch->vex.guest_R7; ++ regs->regs[8] = arch->vex.guest_R8; ++ regs->regs[9] = arch->vex.guest_R9; ++ regs->regs[10] = arch->vex.guest_R10; ++ regs->regs[11] = arch->vex.guest_R11; ++ regs->regs[12] = arch->vex.guest_R12; ++ regs->regs[13] = arch->vex.guest_R13; ++ regs->regs[14] = arch->vex.guest_R14; ++ regs->regs[15] = arch->vex.guest_R15; ++ regs->regs[16] = arch->vex.guest_R16; ++ regs->regs[17] = arch->vex.guest_R17; ++ regs->regs[18] = arch->vex.guest_R18; ++ regs->regs[19] = arch->vex.guest_R19; ++ regs->regs[20] = arch->vex.guest_R20; ++ regs->regs[21] = arch->vex.guest_R21; ++ regs->regs[22] = arch->vex.guest_R22; ++ regs->regs[23] = arch->vex.guest_R23; ++ regs->regs[24] = arch->vex.guest_R24; ++ regs->regs[25] = arch->vex.guest_R25; ++ regs->regs[26] = arch->vex.guest_R26; ++ regs->regs[27] = arch->vex.guest_R27; ++ regs->regs[28] = arch->vex.guest_R28; ++ regs->regs[29] = arch->vex.guest_R29; ++ regs->regs[30] = arch->vex.guest_R30; ++ regs->regs[31] = arch->vex.guest_R31; ++ regs->csr_era = arch->vex.guest_PC; + #elif defined(VGP_amd64_freebsd) + regs->rflags = LibVEX_GuestAMD64_get_rflags( &arch->vex ); + regs->rsp = arch->vex.guest_RSP; +@@ -654,6 +688,14 @@ static void fill_fpu(const ThreadState *tst, vki_elf_fpregset_t *fpu) + # undef DO + #elif defined(VGP_nanomips_linux) + ++#elif defined(VGP_loongarch64_linux) ++# define DO(n) (*fpu)[n] = *(const double*)(&arch->vex.guest_X##n) ++ DO(0); DO(1); DO(2); DO(3); DO(4); DO(5); DO(6); DO(7); ++ DO(8); DO(9); DO(10); DO(11); DO(12); DO(13); DO(14); DO(15); ++ DO(16); DO(17); DO(18); DO(19); DO(20); DO(21); DO(22); DO(23); ++ DO(24); DO(25); DO(26); DO(27); DO(28); DO(29); DO(30); DO(31); ++# undef DO ++ + #elif defined(VGP_x86_freebsd) + + #elif defined(VGP_amd64_freebsd) +diff --git a/coregrind/m_debuginfo/d3basics.c b/coregrind/m_debuginfo/d3basics.c +index e9e8944af..4cd99cb0c 100644 +--- a/coregrind/m_debuginfo/d3basics.c ++++ b/coregrind/m_debuginfo/d3basics.c +@@ -555,6 +555,9 @@ static Bool get_Dwarf_Reg( /*OUT*/Addr* a, Word regno, const RegSummary* regs ) + # elif defined(VGP_arm64_linux) + if (regno == 31) { *a = regs->sp; return True; } + if (regno == 29) { *a = regs->fp; return True; } ++# elif defined(VGP_loongarch64_linux) ++ if (regno == 3) { *a = regs->sp; return True; } ++ if (regno == 22) { *a = regs->fp; return True; } + # else + # error "Unknown platform" + # endif +diff --git a/coregrind/m_debuginfo/debuginfo.c b/coregrind/m_debuginfo/debuginfo.c +index 2d2accc99..a7ff4a57e 100644 +--- a/coregrind/m_debuginfo/debuginfo.c ++++ b/coregrind/m_debuginfo/debuginfo.c +@@ -1262,7 +1262,7 @@ ULong VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV, Int use_fd ) + is_ro_map = False; + + # if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32) \ +- || defined(VGA_mips64) || defined(VGA_nanomips) ++ || defined(VGA_mips64) || defined(VGA_nanomips) || defined(VGA_loongarch64) + is_rx_map = seg->hasR && seg->hasX; + is_rw_map = seg->hasR && seg->hasW; + # elif defined(VGA_amd64) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \ +@@ -2998,6 +2998,11 @@ UWord evalCfiExpr ( const XArray* exprs, Int ix, + case Creg_ARM64_SP: return eec->uregs->sp; + case Creg_ARM64_X30: return eec->uregs->x30; + case Creg_ARM64_X29: return eec->uregs->x29; ++# elif defined(VGA_loongarch64) ++ case Creg_LOONGARCH64_PC: return eec->uregs->pc; ++ case Creg_LOONGARCH64_RA: return eec->uregs->ra; ++ case Creg_LOONGARCH64_SP: return eec->uregs->sp; ++ case Creg_LOONGARCH64_FP: return eec->uregs->fp; + # else + # error "Unsupported arch" + # endif +@@ -3269,6 +3274,13 @@ static Addr compute_cfa ( const D3UnwindRegs* uregs, + case CFIC_ARM64_X29REL: + cfa = cfsi_m->cfa_off + uregs->x29; + break; ++# elif defined(VGA_loongarch64) ++ case CFIC_IA_SPREL: ++ cfa = cfsi_m->cfa_off + uregs->sp; ++ break; ++ case CFIC_IA_BPREL: ++ cfa = cfsi_m->cfa_off + uregs->fp; ++ break; + # else + # error "Unsupported arch" + # endif +@@ -3340,6 +3352,14 @@ Addr ML_(get_CFA) ( Addr ip, Addr sp, Addr fp, + return compute_cfa(&uregs, + min_accessible, max_accessible, ce->di, ce->cfsi_m); + } ++#elif defined(VGA_loongarch64) ++ { D3UnwindRegs uregs; ++ uregs.pc = ip; ++ uregs.sp = sp; ++ uregs.fp = fp; ++ return compute_cfa(&uregs, ++ min_accessible, max_accessible, ce->di, ce->cfsi_m); ++ } + + # else + return 0; /* indicates failure */ +@@ -3391,6 +3411,8 @@ void VG_(ppUnwindInfo) (Addr from, Addr to) + For arm64, the unwound registers are: X29(FP) X30(LR) SP PC. + + For s390, the unwound registers are: R11(FP) R14(LR) R15(SP) F0..F7 PC. ++ ++ For loongarch64, the unwound registers are: FP SP PC + */ + Bool VG_(use_CF_info) ( /*MOD*/D3UnwindRegs* uregsHere, + Addr min_accessible, +@@ -3414,6 +3436,8 @@ Bool VG_(use_CF_info) ( /*MOD*/D3UnwindRegs* uregsHere, + # elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) + # elif defined(VGP_arm64_linux) + ipHere = uregsHere->pc; ++# elif defined(VGA_loongarch64) ++ ipHere = uregsHere->pc; + # else + # error "Unknown arch" + # endif +@@ -3559,6 +3583,10 @@ Bool VG_(use_CF_info) ( /*MOD*/D3UnwindRegs* uregsHere, + COMPUTE(uregsPrev.sp, uregsHere->sp, cfsi_m->sp_how, cfsi_m->sp_off); + COMPUTE(uregsPrev.x30, uregsHere->x30, cfsi_m->x30_how, cfsi_m->x30_off); + COMPUTE(uregsPrev.x29, uregsHere->x29, cfsi_m->x29_how, cfsi_m->x29_off); ++# elif defined(VGA_loongarch64) ++ COMPUTE(uregsPrev.pc, uregsHere->ra, cfsi_m->ra_how, cfsi_m->ra_off); ++ COMPUTE(uregsPrev.sp, uregsHere->sp, cfsi_m->sp_how, cfsi_m->sp_off); ++ COMPUTE(uregsPrev.fp, uregsHere->fp, cfsi_m->fp_how, cfsi_m->fp_off); + # else + # error "Unknown arch" + # endif +diff --git a/coregrind/m_debuginfo/priv_storage.h b/coregrind/m_debuginfo/priv_storage.h +index a4b90d36b..8188064d4 100644 +--- a/coregrind/m_debuginfo/priv_storage.h ++++ b/coregrind/m_debuginfo/priv_storage.h +@@ -367,6 +367,19 @@ typedef + Int fp_off; + } + DiCfSI_m; ++#elif defined(VGA_loongarch64) ++typedef ++ struct { ++ UChar cfa_how; /* a CFIC_ value */ ++ UChar ra_how; /* a CFIR_ value */ ++ UChar sp_how; /* a CFIR_ value */ ++ UChar fp_how; /* a CFIR_ value */ ++ Int cfa_off; ++ Int ra_off; ++ Int sp_off; ++ Int fp_off; ++ } ++ DiCfSI_m; + #else + # error "Unknown arch" + #endif +@@ -422,7 +435,11 @@ typedef + Creg_S390_SP, + Creg_S390_FP, + Creg_S390_LR, +- Creg_MIPS_RA ++ Creg_MIPS_RA, ++ Creg_LOONGARCH64_PC, ++ Creg_LOONGARCH64_RA, ++ Creg_LOONGARCH64_SP, ++ Creg_LOONGARCH64_FP + } + CfiReg; + +diff --git a/coregrind/m_debuginfo/readdwarf.c b/coregrind/m_debuginfo/readdwarf.c +index 79d6764ea..2636cc2cc 100644 +--- a/coregrind/m_debuginfo/readdwarf.c ++++ b/coregrind/m_debuginfo/readdwarf.c +@@ -2066,6 +2066,10 @@ void ML_(read_debuginfo_dwarf1) ( + # define FP_REG 30 + # define SP_REG 29 + # define RA_REG_DEFAULT 31 ++#elif defined(VGP_loongarch64_linux) ++# define FP_REG 22 ++# define SP_REG 3 ++# define RA_REG_DEFAULT 1 + #else + # error "Unknown platform" + #endif +@@ -2084,6 +2088,8 @@ void ML_(read_debuginfo_dwarf1) ( + # define N_CFI_REGS 128 + #elif defined(VGP_s390x_linux) + # define N_CFI_REGS 66 ++#elif defined(VGP_loongarch64_linux) ++# define N_CFI_REGS 32 + #else + # define N_CFI_REGS 20 + #endif +@@ -2310,6 +2316,10 @@ static void initUnwindContext ( /*OUT*/UnwindContext* ctx ) + start out as RR_Same. */ + ctx->state[j].reg[29/*FP*/].tag = RR_Same; + ctx->state[j].reg[30/*LR*/].tag = RR_Same; ++# elif defined(VGA_loongarch64) ++ /* Registers fp and ra start out implicitly as RR_Same. */ ++ ctx->state[j].reg[FP_REG].tag = RR_Same; ++ ctx->state[j].reg[RA_REG_DEFAULT].tag = RR_Same; + # endif + } + } +@@ -2392,7 +2402,8 @@ static Bool summarise_context(/*OUT*/Addr* base, + if (ctxs->cfa_is_regoff && ctxs->cfa_reg == SP_REG) { + si_m->cfa_off = ctxs->cfa_off; + # if defined(VGA_x86) || defined(VGA_amd64) || defined(VGA_s390x) \ +- || defined(VGA_mips32) || defined(VGA_nanomips) || defined(VGA_mips64) ++ || defined(VGA_mips32) || defined(VGA_nanomips) || defined(VGA_mips64) \ ++ || defined(VGA_loongarch64) + si_m->cfa_how = CFIC_IA_SPREL; + # elif defined(VGA_arm) + si_m->cfa_how = CFIC_ARM_R13REL; +@@ -2406,7 +2417,8 @@ static Bool summarise_context(/*OUT*/Addr* base, + if (ctxs->cfa_is_regoff && ctxs->cfa_reg == FP_REG) { + si_m->cfa_off = ctxs->cfa_off; + # if defined(VGA_x86) || defined(VGA_amd64) || defined(VGA_s390x) \ +- || defined(VGA_mips32) || defined(VGA_nanomips) || defined(VGA_mips64) ++ || defined(VGA_mips32) || defined(VGA_nanomips) || defined(VGA_mips64) \ ++ || defined(VGA_loongarch64) + si_m->cfa_how = CFIC_IA_BPREL; + # elif defined(VGA_arm) + si_m->cfa_how = CFIC_ARM_R12REL; +@@ -2786,6 +2798,30 @@ static Bool summarise_context(/*OUT*/Addr* base, + # elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) + /* These don't use CFI based unwinding (is that really true?) */ + ++# elif defined(VGA_loongarch64) ++ ++ /* --- entire tail of this fn specialised for loongarch64 --- */ ++ ++ SUMMARISE_HOW(si_m->ra_how, si_m->ra_off, ctxs->reg[ctx->ra_reg]); ++ SUMMARISE_HOW(si_m->fp_how, si_m->fp_off, ctxs->reg[FP_REG]); ++ ++ /* on loongarch64, it seems the old sp value before the call is always ++ the same as the CFA. Therefore ... */ ++ si_m->sp_how = CFIR_CFAREL; ++ si_m->sp_off = 0; ++ ++ /* bogus looking range? Note, we require that the difference is ++ representable in 32 bits. */ ++ if (loc_start >= ctx->loc) ++ { why = 4; goto failed; } ++ if (ctx->loc - loc_start > 10000000 /* let's say */) ++ { why = 5; goto failed; } ++ ++ *base = loc_start + ctx->initloc; ++ *len = (UInt)(ctx->loc - loc_start); ++ ++ return True; ++ + # else + # error "Unknown arch" + # endif +@@ -2885,6 +2921,13 @@ static Int copy_convert_CfiExpr_tree ( XArray* dstxa, + return ML_(CfiExpr_CfiReg)( dstxa, Creg_ARM64_X30 ); + # elif defined(VGA_ppc32) || defined(VGA_ppc64be) \ + || defined(VGA_ppc64le) ++# elif defined(VGA_loongarch64) ++ if (dwreg == SP_REG) ++ return ML_(CfiExpr_CfiReg)( dstxa, Creg_LOONGARCH64_SP ); ++ if (dwreg == FP_REG) ++ return ML_(CfiExpr_CfiReg)( dstxa, Creg_LOONGARCH64_FP ); ++ if (dwreg == srcuc->ra_reg) ++ return ML_(CfiExpr_CfiReg)( dstxa, Creg_LOONGARCH64_RA ); + # else + # error "Unknown arch" + # endif +diff --git a/coregrind/m_debuginfo/readelf.c b/coregrind/m_debuginfo/readelf.c +index ce7b7998d..bf7feffb5 100644 +--- a/coregrind/m_debuginfo/readelf.c ++++ b/coregrind/m_debuginfo/readelf.c +@@ -1780,7 +1780,8 @@ static HChar* readlink_path (const HChar *path) + + while (tries > 0) { + SysRes res; +-#if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) ++#if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGP_loongarch64_linux) + res = VG_(do_syscall4)(__NR_readlinkat, VKI_AT_FDCWD, + (UWord)path, (UWord)buf, bufsiz); + #elif defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd) +@@ -2653,6 +2654,7 @@ Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di ) + || defined(VGP_arm_linux) || defined (VGP_s390x_linux) \ + || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \ + || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGP_loongarch64_linux) \ + || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris) \ + || defined(VGP_x86_freebsd) || defined(VGP_amd64_freebsd) + /* Accept .plt where mapped as rx (code) */ +diff --git a/coregrind/m_debuginfo/storage.c b/coregrind/m_debuginfo/storage.c +index c3fa62e96..bef564808 100644 +--- a/coregrind/m_debuginfo/storage.c ++++ b/coregrind/m_debuginfo/storage.c +@@ -260,6 +260,11 @@ void ML_(ppDiCfSI) ( const XArray* /* of CfiExpr */ exprs, + SHOW_HOW(si_m->x30_how, si_m->x30_off); + VG_(printf)(" X29="); + SHOW_HOW(si_m->x29_how, si_m->x29_off); ++# elif defined(VGP_loongarch64_linux) ++ VG_(printf)(" SP="); ++ SHOW_HOW(si_m->sp_how, si_m->sp_off); ++ VG_(printf)(" FP="); ++ SHOW_HOW(si_m->fp_how, si_m->fp_off); + # else + # error "Unknown arch" + # endif +@@ -1010,6 +1015,10 @@ static void ppCfiReg ( CfiReg reg ) + case Creg_S390_SP: VG_(printf)("SP"); break; + case Creg_S390_FP: VG_(printf)("FP"); break; + case Creg_S390_LR: VG_(printf)("LR"); break; ++ case Creg_LOONGARCH64_PC: VG_(printf)("PC"); break; ++ case Creg_LOONGARCH64_RA: VG_(printf)("RA"); break; ++ case Creg_LOONGARCH64_SP: VG_(printf)("SP"); break; ++ case Creg_LOONGARCH64_FP: VG_(printf)("FP"); break; + default: vg_assert(0); + } + } +diff --git a/coregrind/m_debuglog.c b/coregrind/m_debuglog.c +index 355c3caf5..1d4f08984 100644 +--- a/coregrind/m_debuglog.c ++++ b/coregrind/m_debuglog.c +@@ -601,6 +601,41 @@ static UInt local_sys_getpid ( void ) + return a0; + } + ++#elif defined(VGP_loongarch64_linux) ++ ++static UInt local_sys_write_stderr ( const HChar* buf, Int n ) ++{ ++ ULong ret; ++ __asm__ volatile ( ++ "li.w $a0, 2 \n\t" // stderr ++ "move $a1, %1 \n\t" ++ "move $a2, %2 \n\t" ++ "li.w $a7, " VG_STRINGIFY(__NR_write) " \n\t" ++ "syscall 0 \n\t" ++ "move %0, $a0 \n\t" ++ : "=r" (ret) ++ : "r" (buf), "r" (n) ++ : "memory", "$a0", "$a1", "$a2", "$a3", "$a4", "$a5", "$a6", "$a7", ++ "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8" ++ ); ++ return ret >= 0 ? (UInt)ret : -1; ++} ++ ++static UInt local_sys_getpid ( void ) ++{ ++ ULong ret; ++ __asm__ volatile ( ++ "li.w $a7, " VG_STRINGIFY(__NR_getpid) " \n\t" ++ "syscall 0 \n\t" ++ "move %0, $a0 \n\t" ++ : "=r" (ret) ++ : ++ : "memory", "$a0", "$a1", "$a2", "$a3", "$a4", "$a5", "$a6", "$a7", ++ "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8" ++ ); ++ return (UInt)ret; ++} ++ + #elif defined(VGP_x86_solaris) + static UInt local_sys_write_stderr ( const HChar* buf, Int n ) + { +diff --git a/coregrind/m_dispatch/dispatch-loongarch64-linux.S b/coregrind/m_dispatch/dispatch-loongarch64-linux.S +new file mode 100644 +index 000000000..dec165294 +--- /dev/null ++++ b/coregrind/m_dispatch/dispatch-loongarch64-linux.S +@@ -0,0 +1,314 @@ ++ ++/*--------------------------------------------------------------------*/ ++/*--- The core dispatch loop, for jumping to a code address. ---*/ ++/*--- dispatch-loongarch64-linux.S ---*/ ++/*--------------------------------------------------------------------*/ ++ ++/* ++ This file is part of Valgrind, a dynamic binary instrumentation ++ framework. ++ ++ Copyright (C) 2021-2022 Loongson Technology Corporation Limited ++ ++ This program is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License as ++ published by the Free Software Foundation; either version 2 of the ++ License, or (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, see . ++ ++ The GNU General Public License is contained in the file COPYING. ++*/ ++ ++#include "pub_core_basics_asm.h" ++ ++#if defined(VGP_loongarch64_linux) ++ ++#include "pub_core_dispatch_asm.h" ++#include "pub_core_transtab_asm.h" ++#include "libvex_guest_offsets.h" /* for OFFSET_loongarch64_* */ ++ ++ ++/*------------------------------------------------------------*/ ++/*--- ---*/ ++/*--- The dispatch loop. VG_(disp_run_translations) is ---*/ ++/*--- used to run all translations, ---*/ ++/*--- including no-redir ones. ---*/ ++/*--- ---*/ ++/*------------------------------------------------------------*/ ++ ++/*----------------------------------------------------*/ ++/*--- Entry and preamble (set everything up) ---*/ ++/*----------------------------------------------------*/ ++ ++/* signature: ++void VG_(disp_run_translations)( UWord* two_words, ++ void* guest_state, ++ Addr host_addr ); ++*/ ++ ++.text ++.globl VG_(disp_run_translations) ++VG_(disp_run_translations): ++ /* a0 holds two_words */ ++ /* a1 holds guest_state */ ++ /* a2 holds host_addr */ ++ ++ /* New stack frame. Stack must remain 16 aligned (at least) */ ++ addi.d $sp, $sp, -96 ++ ++ /* Save ra */ ++ st.d $ra, $sp, 0 ++ ++ /* .. and s0 - s8 */ ++ st.d $s0, $sp, 8 ++ st.d $s1, $sp, 16 ++ st.d $s2, $sp, 24 ++ st.d $s3, $sp, 32 ++ st.d $s4, $sp, 40 ++ st.d $s5, $sp, 48 ++ st.d $s6, $sp, 56 ++ st.d $s7, $sp, 64 ++ st.d $s8, $sp, 72 ++ ++ /* ... and fp */ ++ st.d $fp, $sp, 80 ++ ++ /* and a0. In postamble it will be restored such that the ++ return values can be written */ ++ st.d $a0, $sp, 88 ++ ++ /* Load address of guest state into s8 */ ++ move $s8, $a1 ++ ++ /* and jump into the code cache. Chained translations in ++ the code cache run, until for whatever reason, they can't ++ continue. When that happens, the translation in question ++ will jump (or call) to one of the continuation points ++ VG_(cp_...) below. */ ++ ibar 0 /* Insn sync barrier */ ++ jr $a2 ++ /*NOTREACHED*/ ++ ++/*----------------------------------------------------*/ ++/*--- Postamble and exit. ---*/ ++/*----------------------------------------------------*/ ++ ++postamble: ++ /* At this point, a0 and a1 contain two ++ words to be returned to the caller. a0 ++ holds a TRC value, and a1 optionally may ++ hold another word (for CHAIN_ME exits, the ++ address of the place to patch.) */ ++ ++ /* Restore a0 from stack to t0; holds address of two_words */ ++ ld.d $t0, $sp, 88 ++ st.d $a0, $t0, 0 /* Store a0 to two_words[0] */ ++ st.d $a1, $t0, 8 /* Store a1 to two_words[1] */ ++ ++ /* Restore ra */ ++ ld.d $ra, $sp, 0 ++ ++ /* ... and s0 - s8 */ ++ ld.d $s0, $sp, 8 ++ ld.d $s1, $sp, 16 ++ ld.d $s2, $sp, 24 ++ ld.d $s3, $sp, 32 ++ ld.d $s4, $sp, 40 ++ ld.d $s5, $sp, 48 ++ ld.d $s6, $sp, 56 ++ ld.d $s7, $sp, 64 ++ ld.d $s8, $sp, 72 ++ ++ /* ... and fp */ ++ ld.d $fp, $sp, 80 ++ ++ addi.d $sp, $sp, 96 /* Restore sp */ ++ jr $ra ++ /*NOTREACHED*/ ++ ++/*----------------------------------------------------*/ ++/*--- Continuation points ---*/ ++/*----------------------------------------------------*/ ++ ++/* ------ Chain me to slow entry point ------ */ ++.globl VG_(disp_cp_chain_me_to_slowEP) ++VG_(disp_cp_chain_me_to_slowEP): ++ /* We got called. The return address indicates ++ where the patching needs to happen. Collect ++ the return address and, exit back to C land, ++ handing the caller the pair (Chain_me_S, RA) */ ++ li.w $a0, VG_TRC_CHAIN_ME_TO_SLOW_EP ++ move $a1, $ra ++ /* 4 * 4 = mkLoadImm_EXACTLY4 ++ 4 = jirl $ra, $t0, 0 */ ++ addi.d $a1, $a1, -20 ++ b postamble ++ /*NOTREACHED*/ ++ ++/* ------ Chain me to fast entry point ------ */ ++.globl VG_(disp_cp_chain_me_to_fastEP) ++VG_(disp_cp_chain_me_to_fastEP): ++ /* We got called. The return address indicates ++ where the patching needs to happen. Collect ++ the return address and, exit back to C land, ++ handing the caller the pair (Chain_me_S, RA) */ ++ li.w $a0, VG_TRC_CHAIN_ME_TO_FAST_EP ++ move $a1, $ra ++ /* 4 * 4 = mkLoadImm_EXACTLY4 ++ 4 = jirl $ra, $t0, 0 */ ++ addi.d $a1, $a1, -20 ++ b postamble ++ /*NOTREACHED*/ ++ ++/* ------ Indirect but boring jump ------ */ ++.globl VG_(disp_cp_xindir) ++VG_(disp_cp_xindir): ++ /* Where are we going? */ ++ ld.d $t0, $s8, OFFSET_loongarch64_PC ++ ++ /* Stats only */ ++ la.local $t4, VG_(stats__n_xIndirs_32) ++ ld.d $t1, $t4, 0 ++ addi.d $t1, $t1, 1 ++ st.w $t1, $t4, 0 ++ ++ /* LIVE: s8 (guest state ptr), t0 (guest address to go to). ++ We use 6 temporaries: ++ t6 (to point at the relevant FastCacheSet), ++ t1, t2, t3 (scratch, for swapping entries within a set) ++ t4, t5 (other scratch) ++ */ ++ ++ /* Try a fast lookup in the translation cache. This is pretty much ++ a handcoded version of VG_(lookupInFastCache). */ ++ ++ // Compute t6 = VG_TT_FAST_HASH(guest) ++ srli.d $t6, $t0, 2 // g2 = guest >> 2 ++ srli.d $t5, $t0, (VG_TT_FAST_BITS + 2) // (g2 >> VG_TT_FAST_BITS) ++ xor $t6, $t6, $t5 // (g2 >> VG_TT_FAST_BITS) ^ g2 ++ li.w $t5, VG_TT_FAST_MASK ++ and $t6, $t6, $t5 // setNo ++ ++ // Compute t6 = &VG_(tt_fast)[t6] ++ la.local $t5, VG_(tt_fast) ++ slli.d $t6, $t6, VG_FAST_CACHE_SET_BITS ++ add.d $t6, $t6, $t5 ++ ++ /* LIVE: s8 (guest state ptr), t0 (guest addr), t6 (cache set) */ ++0: // try way 0 ++ ld.d $t4, $t6, FCS_g0 // .guest0 ++ ld.d $t5, $t6, FCS_h0 // .host0 ++ bne $t4, $t0, 1f // cmp against .guest0 ++ // hit at way 0 ++ // goto .host0 ++ jr $t5 ++ /*NOTREACHED*/ ++ ++1: // try way 1 ++ ld.d $t4, $t6, FCS_g1 ++ bne $t4, $t0, 2f // cmp against .guest1 ++ // hit at way 1; swap upwards ++ ld.d $t1, $t6, FCS_g0 // $t1 = old .guest0 ++ ld.d $t2, $t6, FCS_h0 // $t2 = old .host0 ++ ld.d $t3, $t6, FCS_h1 // $t3 = old .host1 ++ st.d $t0, $t6, FCS_g0 // new .guest0 = guest ++ st.d $t3, $t6, FCS_h0 // new .host0 = old .host1 ++ st.d $t1, $t6, FCS_g1 // new .guest1 = old .guest0 ++ st.d $t2, $t6, FCS_h1 // new .host1 = old .host0 ++ ++ // stats only ++ la.local $t4, VG_(stats__n_xIndir_hits1_32) ++ ld.d $t5, $t4, 0 ++ addi.d $t5, $t5, 1 ++ st.w $t5, $t4, 0 ++ // goto old .host1 a.k.a. new .host0 ++ jr $t3 ++ /*NOTREACHED*/ ++ ++2: // try way 2 ++ ld.d $t4, $t6, FCS_g2 ++ bne $t4, $t0, 3f // cmp against .guest2 ++ // hit at way 2; swap upwards ++ ld.d $t1, $t6, FCS_g1 ++ ld.d $t2, $t6, FCS_h1 ++ ld.d $t3, $t6, FCS_h2 ++ st.d $t0, $t6, FCS_g1 ++ st.d $t3, $t6, FCS_h1 ++ st.d $t1, $t6, FCS_g2 ++ st.d $t2, $t6, FCS_h2 ++ ++ // stats only ++ la.local $t4, VG_(stats__n_xIndir_hits2_32) ++ ld.d $t5, $t4, 0 ++ addi.d $t5, $t5, 1 ++ st.w $t5, $t4, 0 ++ // goto old .host2 a.k.a. new .host1 ++ jr $t3 ++ /*NOTREACHED*/ ++ ++3: // try way 3 ++ ld.d $t4, $t6, FCS_g3 ++ bne $t4, $t0, 4f // cmp against .guest3 ++ // hit at way 3; swap upwards ++ ld.d $t1, $t6, FCS_g2 ++ ld.d $t2, $t6, FCS_h2 ++ ld.d $t3, $t6, FCS_h3 ++ st.d $t0, $t6, FCS_g2 ++ st.d $t3, $t6, FCS_h2 ++ st.d $t1, $t6, FCS_g3 ++ st.d $t2, $t6, FCS_h3 ++ ++ // stats only ++ la.local $t4, VG_(stats__n_xIndir_hits3_32) ++ ld.d $t5, $t4, 0 ++ addi.d $t5, $t5, 1 ++ st.w $t5, $t4, 0 ++ // goto old .host3 a.k.a. new .host2 ++ jr $t3 ++ /*NOTREACHED*/ ++ ++4: // fast lookup failed: ++ /* stats only */ ++ la.local $t4, VG_(stats__n_xIndir_misses_32) ++ ld.d $t5, $t4, 0 ++ addi.d $t5, $t5, 1 ++ st.w $t5, $t4, 0 ++ ++ li.w $a0, VG_TRC_INNER_FASTMISS ++ move $a1, $zero ++ b postamble ++ /*NOTREACHED*/ ++ ++/* ------ Assisted jump ------ */ ++.globl VG_(disp_cp_xassisted) ++VG_(disp_cp_xassisted): ++ /* guest-state-pointer contains the TRC. Put the value into the ++ return register */ ++ move $a0, $s8 ++ move $a1, $zero ++ b postamble ++ ++/* ------ Event check failed ------ */ ++.globl VG_(disp_cp_evcheck_fail) ++VG_(disp_cp_evcheck_fail): ++ li.w $a0, VG_TRC_INNER_COUNTERZERO ++ move $a1, $zero ++ b postamble ++ ++.size VG_(disp_run_translations), .-VG_(disp_run_translations) ++ ++#endif // defined(VGP_loongarch64_linux) ++ ++/* Let the linker know we don't need an executable stack */ ++MARK_STACK_NO_EXEC ++ ++/*--------------------------------------------------------------------*/ ++/*--- end dispatch-loongarch64-linux.S ---*/ ++/*--------------------------------------------------------------------*/ +diff --git a/coregrind/m_gdbserver/loongarch-base64-valgrind-s1.xml b/coregrind/m_gdbserver/loongarch-base64-valgrind-s1.xml +new file mode 100644 +index 000000000..cab700cca +--- /dev/null ++++ b/coregrind/m_gdbserver/loongarch-base64-valgrind-s1.xml +@@ -0,0 +1,45 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/coregrind/m_gdbserver/loongarch-base64-valgrind-s2.xml b/coregrind/m_gdbserver/loongarch-base64-valgrind-s2.xml +new file mode 100644 +index 000000000..cbacbbbbe +--- /dev/null ++++ b/coregrind/m_gdbserver/loongarch-base64-valgrind-s2.xml +@@ -0,0 +1,45 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/coregrind/m_gdbserver/loongarch-base64.xml b/coregrind/m_gdbserver/loongarch-base64.xml +new file mode 100644 +index 000000000..fadca8b9e +--- /dev/null ++++ b/coregrind/m_gdbserver/loongarch-base64.xml +@@ -0,0 +1,45 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/coregrind/m_gdbserver/loongarch-fpu64-valgrind-s1.xml b/coregrind/m_gdbserver/loongarch-fpu64-valgrind-s1.xml +new file mode 100644 +index 000000000..b5c7cab50 +--- /dev/null ++++ b/coregrind/m_gdbserver/loongarch-fpu64-valgrind-s1.xml +@@ -0,0 +1,57 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/coregrind/m_gdbserver/loongarch-fpu64-valgrind-s2.xml b/coregrind/m_gdbserver/loongarch-fpu64-valgrind-s2.xml +new file mode 100644 +index 000000000..501660ebb +--- /dev/null ++++ b/coregrind/m_gdbserver/loongarch-fpu64-valgrind-s2.xml +@@ -0,0 +1,57 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/coregrind/m_gdbserver/loongarch-fpu64.xml b/coregrind/m_gdbserver/loongarch-fpu64.xml +new file mode 100644 +index 000000000..74ab55a01 +--- /dev/null ++++ b/coregrind/m_gdbserver/loongarch-fpu64.xml +@@ -0,0 +1,57 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/coregrind/m_gdbserver/loongarch64-linux-valgrind.xml b/coregrind/m_gdbserver/loongarch64-linux-valgrind.xml +new file mode 100644 +index 000000000..8915a72a9 +--- /dev/null ++++ b/coregrind/m_gdbserver/loongarch64-linux-valgrind.xml +@@ -0,0 +1,18 @@ ++ ++ ++ ++ ++ ++ loongarch ++ GNU/Linux ++ ++ ++ ++ ++ ++ ++ +diff --git a/coregrind/m_gdbserver/loongarch64-linux.xml b/coregrind/m_gdbserver/loongarch64-linux.xml +new file mode 100644 +index 000000000..f1eed8338 +--- /dev/null ++++ b/coregrind/m_gdbserver/loongarch64-linux.xml +@@ -0,0 +1,14 @@ ++ ++ ++ ++ ++ ++ loongarch ++ GNU/Linux ++ ++ ++ +diff --git a/coregrind/m_gdbserver/target.c b/coregrind/m_gdbserver/target.c +index 490276b6c..5e0a8ad24 100644 +--- a/coregrind/m_gdbserver/target.c ++++ b/coregrind/m_gdbserver/target.c +@@ -867,6 +867,8 @@ void valgrind_initialize_target(void) + mips64_init_architecture(&the_low_target); + #elif defined(VGA_nanomips) + nanomips_init_architecture(&the_low_target); ++#elif defined(VGA_loongarch64) ++ loongarch64_init_architecture(&the_low_target); + #else + #error "architecture missing in target.c valgrind_initialize_target" + #endif +diff --git a/coregrind/m_gdbserver/valgrind-low-loongarch64.c b/coregrind/m_gdbserver/valgrind-low-loongarch64.c +new file mode 100644 +index 000000000..a606baf63 +--- /dev/null ++++ b/coregrind/m_gdbserver/valgrind-low-loongarch64.c +@@ -0,0 +1,272 @@ ++/* Low level interface to valgrind, for the remote server for GDB integrated ++ in valgrind. ++ Copyright (C) 2021 ++ Free Software Foundation, Inc. ++ ++ This file is part of VALGRIND. ++ It has been inspired from files from gdbserver in gdb 13. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 2 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ Boston, MA 02110-1301, USA. */ ++ ++#include "server.h" ++#include "target.h" ++#include "regdef.h" ++#include "regcache.h" ++ ++#include "pub_core_machine.h" ++#include "pub_core_debuginfo.h" ++#include "pub_core_threadstate.h" ++#include "pub_core_transtab.h" ++#include "pub_core_gdbserver.h" ++ ++#include "valgrind_low.h" ++ ++#include "libvex_guest_loongarch64.h" ++ ++static struct reg regs[] = { ++ { "r0", 0, 64 }, ++ { "r1", 64, 64 }, ++ { "r2", 128, 64 }, ++ { "r3", 192, 64 }, ++ { "r4", 256, 64 }, ++ { "r5", 320, 64 }, ++ { "r6", 384, 64 }, ++ { "r7", 448, 64 }, ++ { "r8", 512, 64 }, ++ { "r9", 576, 64 }, ++ { "r10", 640, 64 }, ++ { "r11", 704, 64 }, ++ { "r12", 768, 64 }, ++ { "r13", 832, 64 }, ++ { "r14", 896, 64 }, ++ { "r15", 960, 64 }, ++ { "r16", 1024, 64 }, ++ { "r17", 1088, 64 }, ++ { "r18", 1152, 64 }, ++ { "r19", 1216, 64 }, ++ { "r20", 1280, 64 }, ++ { "r21", 1344, 64 }, ++ { "r22", 1408, 64 }, ++ { "r23", 1472, 64 }, ++ { "r24", 1536, 64 }, ++ { "r25", 1600, 64 }, ++ { "r26", 1664, 64 }, ++ { "r27", 1728, 64 }, ++ { "r28", 1792, 64 }, ++ { "r29", 1856, 64 }, ++ { "r30", 1920, 64 }, ++ { "r31", 1984, 64 }, ++ { "orig_a0", 2048, 64 }, ++ { "pc", 2112, 64 }, ++ { "badv", 2176, 64 }, ++ { "f0", 2240, 64 }, ++ { "f1", 2304, 64 }, ++ { "f2", 2368, 64 }, ++ { "f3", 2432, 64 }, ++ { "f4", 2496, 64 }, ++ { "f5", 2560, 64 }, ++ { "f6", 2624, 64 }, ++ { "f7", 2688, 64 }, ++ { "f8", 2752, 64 }, ++ { "f9", 2816, 64 }, ++ { "f10", 2880, 64 }, ++ { "f11", 2944, 64 }, ++ { "f12", 3008, 64 }, ++ { "f13", 3072, 64 }, ++ { "f14", 3136, 64 }, ++ { "f15", 3200, 64 }, ++ { "f16", 3264, 64 }, ++ { "f17", 3328, 64 }, ++ { "f18", 3392, 64 }, ++ { "f19", 3456, 64 }, ++ { "f20", 3520, 64 }, ++ { "f21", 3584, 64 }, ++ { "f22", 3648, 64 }, ++ { "f23", 3712, 64 }, ++ { "f24", 3776, 64 }, ++ { "f25", 3840, 64 }, ++ { "f26", 3904, 64 }, ++ { "f27", 3968, 64 }, ++ { "f28", 4032, 64 }, ++ { "f29", 4096, 64 }, ++ { "f30", 4160, 64 }, ++ { "f31", 4224, 64 }, ++ { "fcc0", 4288, 8 }, ++ { "fcc1", 4296, 8 }, ++ { "fcc2", 4304, 8 }, ++ { "fcc3", 4312, 8 }, ++ { "fcc4", 4320, 8 }, ++ { "fcc5", 4328, 8 }, ++ { "fcc6", 4336, 8 }, ++ { "fcc7", 4344, 8 }, ++ { "fcsr", 4352, 32 } ++}; ++ ++#define num_regs (sizeof (regs) / sizeof (regs[0])) ++ ++static const char* expedite_regs[] = { "r3", "pc", NULL }; ++ ++static ++CORE_ADDR get_pc (void) ++{ ++ unsigned long pc; ++ ++ collect_register_by_name ("pc", &pc); ++ ++ dlog(1, "stop pc is %p\n", (void*) pc); ++ return pc; ++} ++ ++static ++void set_pc (CORE_ADDR newpc) ++{ ++ supply_register_by_name ("pc", &newpc); ++} ++ ++/* store registers in the guest state (gdbserver_to_valgrind) ++ or fetch register from the guest state (valgrind_to_gdbserver). */ ++static ++void transfer_register (ThreadId tid, int abs_regno, void* buf, ++ transfer_direction dir, int size, Bool* mod) ++{ ++ ThreadState* tst = VG_(get_ThreadState)(tid); ++ int set = abs_regno / num_regs; ++ int regno = abs_regno % num_regs; ++ *mod = False; ++ ++ VexGuestLOONGARCH64State* loongarch64 = (VexGuestLOONGARCH64State*) get_arch (set, tst); ++ ++ switch (regno) { ++ // numbers here have to match the order of regs above ++ // Attention: gdb order does not match valgrind order. ++ case 0: VG_(transfer) (&loongarch64->guest_R0, buf, dir, size, mod); break; ++ case 1: VG_(transfer) (&loongarch64->guest_R1, buf, dir, size, mod); break; ++ case 2: VG_(transfer) (&loongarch64->guest_R2, buf, dir, size, mod); break; ++ case 3: VG_(transfer) (&loongarch64->guest_R3, buf, dir, size, mod); break; ++ case 4: VG_(transfer) (&loongarch64->guest_R4, buf, dir, size, mod); break; ++ case 5: VG_(transfer) (&loongarch64->guest_R5, buf, dir, size, mod); break; ++ case 6: VG_(transfer) (&loongarch64->guest_R6, buf, dir, size, mod); break; ++ case 7: VG_(transfer) (&loongarch64->guest_R7, buf, dir, size, mod); break; ++ case 8: VG_(transfer) (&loongarch64->guest_R8, buf, dir, size, mod); break; ++ case 9: VG_(transfer) (&loongarch64->guest_R9, buf, dir, size, mod); break; ++ case 10: VG_(transfer) (&loongarch64->guest_R10, buf, dir, size, mod); break; ++ case 11: VG_(transfer) (&loongarch64->guest_R11, buf, dir, size, mod); break; ++ case 12: VG_(transfer) (&loongarch64->guest_R12, buf, dir, size, mod); break; ++ case 13: VG_(transfer) (&loongarch64->guest_R13, buf, dir, size, mod); break; ++ case 14: VG_(transfer) (&loongarch64->guest_R14, buf, dir, size, mod); break; ++ case 15: VG_(transfer) (&loongarch64->guest_R15, buf, dir, size, mod); break; ++ case 16: VG_(transfer) (&loongarch64->guest_R16, buf, dir, size, mod); break; ++ case 17: VG_(transfer) (&loongarch64->guest_R17, buf, dir, size, mod); break; ++ case 18: VG_(transfer) (&loongarch64->guest_R18, buf, dir, size, mod); break; ++ case 19: VG_(transfer) (&loongarch64->guest_R19, buf, dir, size, mod); break; ++ case 20: VG_(transfer) (&loongarch64->guest_R20, buf, dir, size, mod); break; ++ case 21: VG_(transfer) (&loongarch64->guest_R21, buf, dir, size, mod); break; ++ case 22: VG_(transfer) (&loongarch64->guest_R22, buf, dir, size, mod); break; ++ case 23: VG_(transfer) (&loongarch64->guest_R23, buf, dir, size, mod); break; ++ case 24: VG_(transfer) (&loongarch64->guest_R24, buf, dir, size, mod); break; ++ case 25: VG_(transfer) (&loongarch64->guest_R25, buf, dir, size, mod); break; ++ case 26: VG_(transfer) (&loongarch64->guest_R26, buf, dir, size, mod); break; ++ case 27: VG_(transfer) (&loongarch64->guest_R27, buf, dir, size, mod); break; ++ case 28: VG_(transfer) (&loongarch64->guest_R28, buf, dir, size, mod); break; ++ case 29: VG_(transfer) (&loongarch64->guest_R29, buf, dir, size, mod); break; ++ case 30: VG_(transfer) (&loongarch64->guest_R30, buf, dir, size, mod); break; ++ case 31: VG_(transfer) (&loongarch64->guest_R31, buf, dir, size, mod); break; ++ case 32: *mod = False; break; // GDBTD?? arg0 ++ case 33: VG_(transfer) (&loongarch64->guest_PC, buf, dir, size, mod); break; ++ case 34: *mod = False; break; // GDBTD?? badvaddr ++ case 35: VG_(transfer) (&loongarch64->guest_X0, buf, dir, size, mod); break; ++ case 36: VG_(transfer) (&loongarch64->guest_X1, buf, dir, size, mod); break; ++ case 37: VG_(transfer) (&loongarch64->guest_X2, buf, dir, size, mod); break; ++ case 38: VG_(transfer) (&loongarch64->guest_X3, buf, dir, size, mod); break; ++ case 39: VG_(transfer) (&loongarch64->guest_X4, buf, dir, size, mod); break; ++ case 40: VG_(transfer) (&loongarch64->guest_X5, buf, dir, size, mod); break; ++ case 41: VG_(transfer) (&loongarch64->guest_X6, buf, dir, size, mod); break; ++ case 42: VG_(transfer) (&loongarch64->guest_X7, buf, dir, size, mod); break; ++ case 43: VG_(transfer) (&loongarch64->guest_X8, buf, dir, size, mod); break; ++ case 44: VG_(transfer) (&loongarch64->guest_X9, buf, dir, size, mod); break; ++ case 45: VG_(transfer) (&loongarch64->guest_X10, buf, dir, size, mod); break; ++ case 46: VG_(transfer) (&loongarch64->guest_X11, buf, dir, size, mod); break; ++ case 47: VG_(transfer) (&loongarch64->guest_X12, buf, dir, size, mod); break; ++ case 48: VG_(transfer) (&loongarch64->guest_X13, buf, dir, size, mod); break; ++ case 49: VG_(transfer) (&loongarch64->guest_X14, buf, dir, size, mod); break; ++ case 50: VG_(transfer) (&loongarch64->guest_X15, buf, dir, size, mod); break; ++ case 51: VG_(transfer) (&loongarch64->guest_X16, buf, dir, size, mod); break; ++ case 52: VG_(transfer) (&loongarch64->guest_X17, buf, dir, size, mod); break; ++ case 53: VG_(transfer) (&loongarch64->guest_X18, buf, dir, size, mod); break; ++ case 54: VG_(transfer) (&loongarch64->guest_X19, buf, dir, size, mod); break; ++ case 55: VG_(transfer) (&loongarch64->guest_X20, buf, dir, size, mod); break; ++ case 56: VG_(transfer) (&loongarch64->guest_X21, buf, dir, size, mod); break; ++ case 57: VG_(transfer) (&loongarch64->guest_X22, buf, dir, size, mod); break; ++ case 58: VG_(transfer) (&loongarch64->guest_X23, buf, dir, size, mod); break; ++ case 59: VG_(transfer) (&loongarch64->guest_X24, buf, dir, size, mod); break; ++ case 60: VG_(transfer) (&loongarch64->guest_X25, buf, dir, size, mod); break; ++ case 61: VG_(transfer) (&loongarch64->guest_X26, buf, dir, size, mod); break; ++ case 62: VG_(transfer) (&loongarch64->guest_X27, buf, dir, size, mod); break; ++ case 63: VG_(transfer) (&loongarch64->guest_X28, buf, dir, size, mod); break; ++ case 64: VG_(transfer) (&loongarch64->guest_X29, buf, dir, size, mod); break; ++ case 65: VG_(transfer) (&loongarch64->guest_X30, buf, dir, size, mod); break; ++ case 66: VG_(transfer) (&loongarch64->guest_X31, buf, dir, size, mod); break; ++ case 67: VG_(transfer) (&loongarch64->guest_FCC0, buf, dir, size, mod); break; ++ case 68: VG_(transfer) (&loongarch64->guest_FCC1, buf, dir, size, mod); break; ++ case 69: VG_(transfer) (&loongarch64->guest_FCC2, buf, dir, size, mod); break; ++ case 70: VG_(transfer) (&loongarch64->guest_FCC3, buf, dir, size, mod); break; ++ case 71: VG_(transfer) (&loongarch64->guest_FCC4, buf, dir, size, mod); break; ++ case 72: VG_(transfer) (&loongarch64->guest_FCC5, buf, dir, size, mod); break; ++ case 73: VG_(transfer) (&loongarch64->guest_FCC6, buf, dir, size, mod); break; ++ case 74: VG_(transfer) (&loongarch64->guest_FCC7, buf, dir, size, mod); break; ++ case 75: VG_(transfer) (&loongarch64->guest_FCSR, buf, dir, size, mod); break; ++ default: vg_assert(0); ++ } ++} ++ ++static ++const char* target_xml (Bool shadow_mode) ++{ ++ if (shadow_mode) { ++ return "loongarch64-linux-valgrind.xml"; ++ } else { ++ return "loongarch64-linux.xml"; ++ } ++} ++ ++static CORE_ADDR** target_get_dtv (ThreadState* tst) ++{ ++ VexGuestLOONGARCH64State* loongarch64 = (VexGuestLOONGARCH64State*)&tst->arch.vex; ++ // Top of LoongArch tcbhead structure is located 0x0 bytes before the value ++ // of $r2. Dtv is the first of two pointers in tcbhead structure. ++ // More details can be found in GLIBC/sysdeps/nptl/tls.h. ++ return (CORE_ADDR**)((CORE_ADDR)loongarch64->guest_R2 ++ - 0x0 - 2 * sizeof(CORE_ADDR)); ++} ++ ++static struct valgrind_target_ops low_target = { ++ num_regs, ++ regs, ++ 3, // SP ++ transfer_register, ++ get_pc, ++ set_pc, ++ "loongarch64", ++ target_xml, ++ target_get_dtv ++}; ++ ++void loongarch64_init_architecture (struct valgrind_target_ops* target) ++{ ++ *target = low_target; ++ set_register_cache (regs, num_regs); ++ gdbserver_expedite_regs = expedite_regs; ++} +diff --git a/coregrind/m_gdbserver/valgrind_low.h b/coregrind/m_gdbserver/valgrind_low.h +index c6c0bb63b..833f3612e 100644 +--- a/coregrind/m_gdbserver/valgrind_low.h ++++ b/coregrind/m_gdbserver/valgrind_low.h +@@ -108,5 +108,6 @@ extern void s390x_init_architecture (struct valgrind_target_ops *target); + extern void mips32_init_architecture (struct valgrind_target_ops *target); + extern void mips64_init_architecture (struct valgrind_target_ops *target); + extern void nanomips_init_architecture (struct valgrind_target_ops *target); ++extern void loongarch64_init_architecture (struct valgrind_target_ops *target); + + #endif +diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c +index 7a7d45335..237a11f09 100644 +--- a/coregrind/m_initimg/initimg-linux.c ++++ b/coregrind/m_initimg/initimg-linux.c +@@ -913,7 +913,8 @@ Addr setup_client_stack( void* init_sp, + && !defined(VGP_ppc64le_linux) \ + && !defined(VGP_mips32_linux) && !defined(VGP_mips64_linux) \ + && !defined(VGP_nanomips_linux) \ +- && !defined(VGP_s390x_linux) ++ && !defined(VGP_s390x_linux) \ ++ && !defined(VGP_loongarch64_linux) + case AT_SYSINFO_EHDR: { + /* Trash this, because we don't reproduce it */ + const NSegment* ehdrseg = VG_(am_find_nsegment)((Addr)auxv->u.a_ptr); +@@ -1344,6 +1345,20 @@ void VG_(ii_finalise_image)( IIFinaliseImageInfo iifii ) + arch->vex.guest_PC = iifii.initial_client_IP; + arch->vex.guest_r31 = iifii.initial_client_SP; + ++# elif defined(VGP_loongarch64_linux) ++ vg_assert(0 == sizeof(VexGuestLOONGARCH64State) % LibVEX_GUEST_STATE_ALIGN); ++ ++ /* Zero out the initial state, and set up the simulated FPU in a ++ sane way. */ ++ LibVEX_GuestLOONGARCH64_initialise(&arch->vex); ++ ++ /* Zero out the shadow areas. */ ++ VG_(memset)(&arch->vex_shadow1, 0, sizeof(VexGuestLOONGARCH64State)); ++ VG_(memset)(&arch->vex_shadow2, 0, sizeof(VexGuestLOONGARCH64State)); ++ ++ arch->vex.guest_R3 = iifii.initial_client_SP; ++ arch->vex.guest_PC = iifii.initial_client_IP; ++ + # else + # error Unknown platform + # endif +diff --git a/coregrind/m_libcassert.c b/coregrind/m_libcassert.c +index 35f37f88d..0ad514129 100644 +--- a/coregrind/m_libcassert.c ++++ b/coregrind/m_libcassert.c +@@ -264,6 +264,26 @@ + (srP)->misc.MIPS32.r31 = (UInt)ra; \ + (srP)->misc.MIPS32.r28 = (UInt)gp; \ + } ++#elif defined(VGP_loongarch64_linux) ++# define GET_STARTREGS(srP) \ ++ { \ ++ ULong pc, sp, fp, ra; \ ++ __asm__ __volatile__( \ ++ "pcaddi %0, 0 \n\t" \ ++ "move %1, $sp \n\t" \ ++ "move %2, $fp \n\t" \ ++ "move %3, $ra \n\t" \ ++ : "=r" (pc), \ ++ "=r" (sp), \ ++ "=r" (fp), \ ++ "=r" (ra) \ ++ : /* reads none */ \ ++ : /* no trashed */ ); \ ++ (srP)->r_pc = (ULong)pc; \ ++ (srP)->r_sp = (ULong)sp; \ ++ (srP)->misc.LOONGARCH64.r_fp = (ULong)fp; \ ++ (srP)->misc.LOONGARCH64.r_ra = (ULong)ra; \ ++ } + #else + # error Unknown platform + #endif +diff --git a/coregrind/m_libcfile.c b/coregrind/m_libcfile.c +index 5d3a349f2..bbbd4e7fb 100644 +--- a/coregrind/m_libcfile.c ++++ b/coregrind/m_libcfile.c +@@ -264,7 +264,8 @@ Bool VG_(resolve_filemode) ( Int fd, Int * result ) + + SysRes VG_(mknod) ( const HChar* pathname, Int mode, UWord dev ) + { +-# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) ++# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGP_loongarch64_linux) + /* ARM64 wants to use __NR_mknodat rather than __NR_mknod. */ + SysRes res = VG_(do_syscall4)(__NR_mknodat, + VKI_AT_FDCWD, (UWord)pathname, mode, dev); +@@ -290,7 +291,8 @@ SysRes VG_(mknod) ( const HChar* pathname, Int mode, UWord dev ) + + SysRes VG_(open) ( const HChar* pathname, Int flags, Int mode ) + { +-# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) ++# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGP_loongarch64_linux) + /* ARM64 wants to use __NR_openat rather than __NR_open. */ + SysRes res = VG_(do_syscall4)(__NR_openat, + VKI_AT_FDCWD, (UWord)pathname, flags, mode); +@@ -384,7 +386,8 @@ Int VG_(pipe) ( Int fd[2] ) + } else { + return -1; + } +-# elif defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) ++# elif defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGP_loongarch64_linux) + SysRes res = VG_(do_syscall2)(__NR_pipe2, (UWord)fd, 0); + return sr_isError(res) ? -1 : 0; + # elif defined(VGO_linux) +@@ -517,12 +520,19 @@ SysRes VG_(stat) ( const HChar* file_name, struct vg_stat* vgbuf ) + { struct vki_statx buf; + res = VG_(do_syscall5)(__NR_statx, VKI_AT_FDCWD, (UWord)file_name, 0, + VKI_STATX_ALL, (UWord)&buf); ++# if defined(VGP_loongarch64_linux) ++ /* On LoongArch64 Linux platform, only statx is available. */ ++ if (!sr_isError(res)) ++ TRANSLATE_statx_TO_vg_stat(vgbuf, &buf); ++ return res; ++# else + if (!(sr_isError(res) && sr_Err(res) == VKI_ENOSYS)) { + /* Success, or any failure except ENOSYS */ + if (!sr_isError(res)) + TRANSLATE_statx_TO_vg_stat(vgbuf, &buf); + return res; + } ++# endif + } + # endif + # if defined(VGO_linux) || defined(VGO_darwin) +@@ -602,12 +612,19 @@ Int VG_(fstat) ( Int fd, struct vg_stat* vgbuf ) + const char* file_name = ""; + res = VG_(do_syscall5)(__NR_statx, fd, (RegWord)file_name, + VKI_AT_EMPTY_PATH, VKI_STATX_ALL, (RegWord)&buf); ++# if defined(VGP_loongarch64_linux) ++ /* On LoongArch64 Linux platform, only statx is available. */ ++ if (!sr_isError(res)) ++ TRANSLATE_statx_TO_vg_stat(vgbuf, &buf); ++ return sr_isError(res) ? (-1) : 0; ++# else + if (!(sr_isError(res) && sr_Err(res) == VKI_ENOSYS)) { + /* Success, or any failure except ENOSYS */ + if (!sr_isError(res)) + TRANSLATE_statx_TO_vg_stat(vgbuf, &buf); + return sr_isError(res) ? (-1) : 0; + } ++# endif + } + #endif + # if defined(VGO_linux) || defined(VGO_darwin) +@@ -731,7 +748,8 @@ SysRes VG_(dup) ( Int oldfd ) + + SysRes VG_(dup2) ( Int oldfd, Int newfd ) + { +-# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) ++# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGP_loongarch64_linux) + /* We only have dup3, that means we have to mimic dup2. + The only real difference is when oldfd == newfd. + dup3 always returns an error, but dup2 returns only an +@@ -777,7 +795,7 @@ Int VG_(rename) ( const HChar* old_name, const HChar* new_name ) + # if defined(VGO_solaris) || defined(VGP_arm64_linux) + SysRes res = VG_(do_syscall4)(__NR_renameat, VKI_AT_FDCWD, (UWord)old_name, + VKI_AT_FDCWD, (UWord)new_name); +-# elif defined(VGP_nanomips_linux) ++# elif defined(VGP_nanomips_linux) || defined(VGP_loongarch64_linux) + SysRes res = VG_(do_syscall5)(__NR_renameat2, VKI_AT_FDCWD, (UWord)old_name, + VKI_AT_FDCWD, (UWord)new_name, 0); + +@@ -791,7 +809,8 @@ Int VG_(rename) ( const HChar* old_name, const HChar* new_name ) + + Int VG_(unlink) ( const HChar* file_name ) + { +-# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) ++# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGP_loongarch64_linux) + SysRes res = VG_(do_syscall2)(__NR_unlinkat, VKI_AT_FDCWD, + (UWord)file_name); + # elif defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd) +@@ -870,7 +889,8 @@ const HChar *VG_(get_startup_wd) ( void ) + SysRes VG_(poll) (struct vki_pollfd *fds, Int nfds, Int timeout) + { + SysRes res; +-# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) ++# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGP_loongarch64_linux) + /* ARM64 wants to use __NR_ppoll rather than __NR_poll. */ + struct vki_timespec timeout_ts; + if (timeout >= 0) { +@@ -915,7 +935,8 @@ SSizeT VG_(readlink) (const HChar* path, HChar* buf, SizeT bufsiz) + { + SysRes res; + /* res = readlink( path, buf, bufsiz ); */ +-# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) ++# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGP_loongarch64_linux) + res = VG_(do_syscall4)(__NR_readlinkat, VKI_AT_FDCWD, + (UWord)path, (UWord)buf, bufsiz); + # elif defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd) +@@ -994,7 +1015,8 @@ Int VG_(access) ( const HChar* path, Bool irusr, Bool iwusr, Bool ixusr ) + UWord w = (irusr ? VKI_R_OK : 0) + | (iwusr ? VKI_W_OK : 0) + | (ixusr ? VKI_X_OK : 0); +-# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) ++# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGP_loongarch64_linux) + SysRes res = VG_(do_syscall3)(__NR_faccessat, VKI_AT_FDCWD, (UWord)path, w); + # elif defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd) + SysRes res = VG_(do_syscall2)(__NR_access, (UWord)path, w); +@@ -1140,7 +1162,8 @@ SysRes VG_(pread) ( Int fd, void* buf, Int count, OffT offset ) + return res; + # elif defined(VGP_amd64_linux) || defined(VGP_s390x_linux) \ + || defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux) \ +- || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) ++ || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \ ++ || defined(VGP_loongarch64_linux) + res = VG_(do_syscall4)(__NR_pread64, fd, (UWord)buf, count, offset); + return res; + # elif defined(VGP_amd64_freebsd) +@@ -1404,7 +1427,8 @@ Int VG_(socket) ( Int domain, Int type, Int protocol ) + + # elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \ + || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \ +- || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) || defined(VGO_freebsd) ++ || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGO_freebsd) || defined(VGP_loongarch64_linux) + SysRes res; + res = VG_(do_syscall3)(__NR_socket, domain, type, protocol ); + return sr_isError(res) ? -1 : sr_Res(res); +@@ -1459,7 +1483,8 @@ Int my_connect ( Int sockfd, struct vki_sockaddr_in* serv_addr, Int addrlen ) + + # elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \ + || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \ +- || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) || defined(VGO_freebsd) ++ || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGO_freebsd) || defined(VGP_loongarch64_linux) + SysRes res; + res = VG_(do_syscall3)(__NR_connect, sockfd, (UWord)serv_addr, addrlen); + return sr_isError(res) ? -1 : sr_Res(res); +@@ -1506,7 +1531,8 @@ Int VG_(write_socket)( Int sd, const void *msg, Int count ) + + # elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \ + || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \ +- || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) || defined(VGO_freebsd) ++ || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGO_freebsd) || defined(VGP_loongarch64_linux) + SysRes res; + res = VG_(do_syscall6)(__NR_sendto, sd, (UWord)msg, + count, VKI_MSG_NOSIGNAL, 0,0); +@@ -1544,7 +1570,8 @@ Int VG_(getsockname) ( Int sd, struct vki_sockaddr *name, Int *namelen) + # elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \ + || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \ + || defined(VGP_nanomips_linux) || defined(VGO_freebsd) \ +- || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) ++ || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \ ++ || defined(VGP_loongarch64_linux) + SysRes res; + res = VG_(do_syscall3)( __NR_getsockname, + (UWord)sd, (UWord)name, (UWord)namelen ); +@@ -1583,7 +1610,8 @@ Int VG_(getpeername) ( Int sd, struct vki_sockaddr *name, Int *namelen) + + # elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \ + || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \ +- || defined(VGP_nanomips_linux) || defined(VGO_freebsd) ++ || defined(VGP_nanomips_linux) || defined(VGO_freebsd) \ ++ || defined(VGP_loongarch64_linux) + SysRes res; + res = VG_(do_syscall3)( __NR_getpeername, + (UWord)sd, (UWord)name, (UWord)namelen ); +@@ -1625,7 +1653,7 @@ Int VG_(getsockopt) ( Int sd, Int level, Int optname, void *optval, + # elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \ + || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \ + || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ +- || defined(VGO_freebsd) ++ || defined(VGO_freebsd) || defined(VGP_loongarch64_linux) + SysRes res; + res = VG_(do_syscall5)( __NR_getsockopt, + (UWord)sd, (UWord)level, (UWord)optname, +@@ -1669,7 +1697,8 @@ Int VG_(setsockopt) ( Int sd, Int level, Int optname, void *optval, + + # elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \ + || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \ +- || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) ++ || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGP_loongarch64_linux) + SysRes res; + res = VG_(do_syscall5)( __NR_setsockopt, + (UWord)sd, (UWord)level, (UWord)optname, +diff --git a/coregrind/m_libcproc.c b/coregrind/m_libcproc.c +index 592d69bf1..61827cb7d 100644 +--- a/coregrind/m_libcproc.c ++++ b/coregrind/m_libcproc.c +@@ -698,7 +698,8 @@ Int VG_(gettid)(void) + * the /proc/self link is pointing... + */ + +-# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) ++# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGP_loongarch64_linux) + res = VG_(do_syscall4)(__NR_readlinkat, VKI_AT_FDCWD, + (UWord)"/proc/self", + (UWord)pid, sizeof(pid)); +@@ -753,7 +754,8 @@ Int VG_(getpid) ( void ) + Int VG_(getpgrp) ( void ) + { + /* ASSUMES SYSCALL ALWAYS SUCCEEDS */ +-# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) ++# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGP_loongarch64_linux) + return sr_Res( VG_(do_syscall1)(__NR_getpgid, 0) ); + # elif defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_freebsd) + return sr_Res( VG_(do_syscall0)(__NR_getpgrp) ); +@@ -850,7 +852,7 @@ Int VG_(getgroups)( Int size, UInt* list ) + || defined(VGO_darwin) || defined(VGP_s390x_linux) \ + || defined(VGP_mips32_linux) || defined(VGP_arm64_linux) \ + || defined(VGO_solaris) || defined(VGP_nanomips_linux) \ +- || defined(VGO_freebsd) ++ || defined(VGO_freebsd) || defined(VGP_loongarch64_linux) + SysRes sres; + sres = VG_(do_syscall2)(__NR_getgroups, size, (Addr)list); + if (sr_isError(sres)) +@@ -944,7 +946,8 @@ Int VG_(fork) ( void ) + fds[0] = fds[1] = -1; + } + +-# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) ++# if defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGP_loongarch64_linux) + SysRes res; + res = VG_(do_syscall5)(__NR_clone, VKI_SIGCHLD, + (UWord)NULL, (UWord)NULL, (UWord)NULL, (UWord)NULL); +diff --git a/coregrind/m_libcsetjmp.c b/coregrind/m_libcsetjmp.c +index 4f1ecb150..a563f9393 100644 +--- a/coregrind/m_libcsetjmp.c ++++ b/coregrind/m_libcsetjmp.c +@@ -741,6 +741,72 @@ __asm__( + ); + #endif /* VGP_nanomips_linux */ + ++#if defined(VGP_loongarch64_linux) ++ ++__asm__( ++".text \n\t" ++".globl VG_MINIMAL_SETJMP; \n\t" ++"VG_MINIMAL_SETJMP: \n\t" ++" st.d $ra, $a0, 0 \n\t" ++" st.d $sp, $a0, 8 \n\t" ++" st.d $r21, $a0, 16 \n\t" ++" st.d $fp, $a0, 24 \n\t" ++" st.d $s0, $a0, 32 \n\t" ++" st.d $s1, $a0, 40 \n\t" ++" st.d $s2, $a0, 48 \n\t" ++" st.d $s3, $a0, 56 \n\t" ++" st.d $s4, $a0, 64 \n\t" ++" st.d $s5, $a0, 72 \n\t" ++" st.d $s6, $a0, 80 \n\t" ++" st.d $s7, $a0, 88 \n\t" ++" st.d $s8, $a0, 96 \n\t" ++#if !defined(__loongarch_soft_float) ++" fst.d $f24, $a0, 104 \n\t" ++" fst.d $f25, $a0, 112 \n\t" ++" fst.d $f26, $a0, 120 \n\t" ++" fst.d $f27, $a0, 128 \n\t" ++" fst.d $f28, $a0, 136 \n\t" ++" fst.d $f29, $a0, 144 \n\t" ++" fst.d $f30, $a0, 152 \n\t" ++" fst.d $f30, $a0, 160 \n\t" ++#endif ++" move $a0, $zero \n\t" ++" jr $ra \n\t" ++" \n\t" ++".text \n\t" ++".globl VG_MINIMAL_LONGJMP; \n\t" ++"VG_MINIMAL_LONGJMP: \n\t" ++" ld.d $ra, $a0, 0 \n\t" ++" ld.d $sp, $a0, 8 \n\t" ++" ld.d $r21, $a0, 16 \n\t" ++" ld.d $fp, $a0, 24 \n\t" ++" ld.d $s0, $a0, 32 \n\t" ++" ld.d $s1, $a0, 40 \n\t" ++" ld.d $s2, $a0, 48 \n\t" ++" ld.d $s3, $a0, 56 \n\t" ++" ld.d $s4, $a0, 64 \n\t" ++" ld.d $s5, $a0, 72 \n\t" ++" ld.d $s6, $a0, 80 \n\t" ++" ld.d $s7, $a0, 88 \n\t" ++" ld.d $s8, $a0, 96 \n\t" ++#if !defined(__loongarch_soft_float) ++" fld.d $f24, $a0, 104 \n\t" ++" fld.d $f25, $a0, 112 \n\t" ++" fld.d $f26, $a0, 120 \n\t" ++" fld.d $f27, $a0, 128 \n\t" ++" fld.d $f28, $a0, 136 \n\t" ++" fld.d $f29, $a0, 144 \n\t" ++" fld.d $f30, $a0, 152 \n\t" ++" fld.d $f30, $a0, 160 \n\t" ++#endif ++" bnez $a1, 1f \n\t" ++" addi.d $a1, $a1, 1 \n\t" ++"1: \n\t" ++" move $a0, $a1 \n\t" ++" jr $ra \n\t" ++); ++#endif /* VGP_loongarch64_linux */ ++ + /*--------------------------------------------------------------------*/ + /*--- end ---*/ + /*--------------------------------------------------------------------*/ +diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c +index 052b5d186..48e4b3f22 100644 +--- a/coregrind/m_machine.c ++++ b/coregrind/m_machine.c +@@ -152,6 +152,13 @@ void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs, + = VG_(threads)[tid].arch.vex.guest_r31; + regs->misc.MIPS64.r28 + = VG_(threads)[tid].arch.vex.guest_r28; ++# elif defined(VGA_loongarch64) ++ regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC; ++ regs->r_sp = VG_(threads)[tid].arch.vex.guest_R3; ++ regs->misc.LOONGARCH64.r_fp ++ = VG_(threads)[tid].arch.vex.guest_R22; ++ regs->misc.LOONGARCH64.r_ra ++ = VG_(threads)[tid].arch.vex.guest_R1; + # else + # error "Unknown arch" + # endif +@@ -369,6 +376,39 @@ static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId, + (*f)(tid, "x28", vex->guest_X28); + (*f)(tid, "x29", vex->guest_X29); + (*f)(tid, "x30", vex->guest_X30); ++#elif defined(VGA_loongarch64) ++ (*f)(tid, "r0" , vex->guest_R0 ); ++ (*f)(tid, "r1" , vex->guest_R1 ); ++ (*f)(tid, "r2" , vex->guest_R2 ); ++ (*f)(tid, "r3" , vex->guest_R3 ); ++ (*f)(tid, "r4" , vex->guest_R4 ); ++ (*f)(tid, "r5" , vex->guest_R5 ); ++ (*f)(tid, "r6" , vex->guest_R6 ); ++ (*f)(tid, "r7" , vex->guest_R7 ); ++ (*f)(tid, "r8" , vex->guest_R8 ); ++ (*f)(tid, "r9" , vex->guest_R9 ); ++ (*f)(tid, "r10", vex->guest_R10); ++ (*f)(tid, "r11", vex->guest_R11); ++ (*f)(tid, "r12", vex->guest_R12); ++ (*f)(tid, "r13", vex->guest_R13); ++ (*f)(tid, "r14", vex->guest_R14); ++ (*f)(tid, "r15", vex->guest_R15); ++ (*f)(tid, "r16", vex->guest_R16); ++ (*f)(tid, "r17", vex->guest_R17); ++ (*f)(tid, "r18", vex->guest_R18); ++ (*f)(tid, "r19", vex->guest_R19); ++ (*f)(tid, "r20", vex->guest_R20); ++ (*f)(tid, "r21", vex->guest_R21); ++ (*f)(tid, "r22", vex->guest_R22); ++ (*f)(tid, "r23", vex->guest_R23); ++ (*f)(tid, "r24", vex->guest_R24); ++ (*f)(tid, "r25", vex->guest_R25); ++ (*f)(tid, "r26", vex->guest_R26); ++ (*f)(tid, "r27", vex->guest_R27); ++ (*f)(tid, "r28", vex->guest_R28); ++ (*f)(tid, "r29", vex->guest_R29); ++ (*f)(tid, "r30", vex->guest_R30); ++ (*f)(tid, "r31", vex->guest_R31); + #else + # error Unknown arch + #endif +@@ -479,7 +519,7 @@ Int VG_(machine_arm_archlevel) = 4; + testing, so we need a VG_MINIMAL_JMP_BUF. */ + #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \ + || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) \ +- || defined(VGA_mips64) || defined(VGA_arm64) ++ || defined(VGA_mips64) || defined(VGA_arm64) || defined(VGA_loongarch64) + #include "pub_core_libcsetjmp.h" + static VG_MINIMAL_JMP_BUF(env_unsup_insn); + static void handler_unsup_insn ( Int x ) { +@@ -859,6 +899,105 @@ static Bool VG_(parse_cpuinfo)(void) + + #endif /* defined(VGP_arm64_linux) */ + ++#if defined(VGA_loongarch64) ++ ++/* ++ * Initialize hwcaps by parsing /proc/cpuinfo. Returns False if it can not ++ * determine what CPU it is (it searches only for the models that are or may be ++ * supported by Valgrind). ++ */ ++static Bool VG_(parse_cpuinfo)(void) ++{ ++ Int n, fh; ++ SysRes fd; ++ SizeT num_bytes, file_buf_size; ++ HChar *file_buf; ++ ++ const char *search_Loongson_str = "Model Name\t\t: Loongson"; ++ ++ /* Slurp contents of /proc/cpuinfo into FILE_BUF */ ++ fd = VG_(open)("/proc/cpuinfo", 0, VKI_S_IRUSR); ++ if (sr_isError(fd)) ++ return False; ++ ++ fh = sr_Res(fd); ++ ++ /* Determine the size of /proc/cpuinfo. ++ Work around broken-ness in /proc file system implementation. ++ fstat returns a zero size for /proc/cpuinfo although it is ++ claimed to be a regular file. */ ++ num_bytes = 0; ++ file_buf_size = 1000; ++ file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1); ++ while (True) { ++ n = VG_(read)(fh, file_buf, file_buf_size); ++ if (n < 0) ++ break; ++ ++ num_bytes += n; ++ if (n < file_buf_size) ++ break; /* reached EOF */ ++ } ++ ++ if (n < 0) ++ num_bytes = 0; /* read error; ignore contents */ ++ ++ if (num_bytes > file_buf_size) { ++ VG_(free)(file_buf); ++ VG_(lseek)(fh, 0, VKI_SEEK_SET); ++ file_buf = VG_(malloc)("cpuinfo", num_bytes + 1); ++ n = VG_(read)(fh, file_buf, num_bytes); ++ if (n < 0) ++ num_bytes = 0; ++ } ++ ++ file_buf[num_bytes] = '\0'; ++ VG_(close)(fh); ++ ++ /* Parse file */ ++ vai.hwcaps = 0; ++ if (VG_(strstr)(file_buf, search_Loongson_str) == NULL) { ++ /* Did not find string in the proc file. */ ++ VG_(free)(file_buf); ++ return False; ++ } ++ ++ if (VG_(strstr)(file_buf, "loongarch32") != NULL) ++ vai.hwcaps |= VEX_HWCAPS_LOONGARCH_ISA_32BIT; ++ if (VG_(strstr)(file_buf, "loongarch64") != NULL) ++ vai.hwcaps |= VEX_HWCAPS_LOONGARCH_ISA_64BIT; ++ ++ if (VG_(strstr)(file_buf, "cpucfg") != NULL) ++ vai.hwcaps |= VEX_HWCAPS_LOONGARCH_CPUCFG; ++ if (VG_(strstr)(file_buf, "lam") != NULL) ++ vai.hwcaps |= VEX_HWCAPS_LOONGARCH_LAM; ++ if (VG_(strstr)(file_buf, "ual") != NULL) ++ vai.hwcaps |= VEX_HWCAPS_LOONGARCH_UAL; ++ if (VG_(strstr)(file_buf, "fpu") != NULL) ++ vai.hwcaps |= VEX_HWCAPS_LOONGARCH_FP; ++ if (VG_(strstr)(file_buf, "lsx") != NULL) ++ vai.hwcaps |= VEX_HWCAPS_LOONGARCH_LSX; ++ if (VG_(strstr)(file_buf, "lasx") != NULL) ++ vai.hwcaps |= VEX_HWCAPS_LOONGARCH_LASX; ++ if (VG_(strstr)(file_buf, "complex") != NULL) ++ vai.hwcaps |= VEX_HWCAPS_LOONGARCH_COMPLEX; ++ if (VG_(strstr)(file_buf, "crypto") != NULL) ++ vai.hwcaps |= VEX_HWCAPS_LOONGARCH_CRYPTO; ++ if (VG_(strstr)(file_buf, "lvz") != NULL) ++ vai.hwcaps |= VEX_HWCAPS_LOONGARCH_LVZP; ++ if (VG_(strstr)(file_buf, "lbt_x86") != NULL) ++ vai.hwcaps |= VEX_HWCAPS_LOONGARCH_X86BT; ++ if (VG_(strstr)(file_buf, "lbt_arm") != NULL) ++ vai.hwcaps |= VEX_HWCAPS_LOONGARCH_ARMBT; ++ if (VG_(strstr)(file_buf, "lbt_mips") != NULL) ++ vai.hwcaps |= VEX_HWCAPS_LOONGARCH_MIPSBT; ++ ++ VG_(free)(file_buf); ++ return True; ++} ++ ++#endif /* defined(VGP_loongarch64) */ ++ + Bool VG_(machine_get_hwcaps)( void ) + { + vg_assert(hwcaps_done == False); +@@ -2227,6 +2366,54 @@ Bool VG_(machine_get_hwcaps)( void ) + + return True; + } ++ ++#elif defined(VGA_loongarch64) ++ { ++ va = VexArchLOONGARCH64; ++ vai.endness = VexEndnessLE; ++ vai.hwcaps = 0; ++ ++ if (!VG_(parse_cpuinfo)()) ++ return False; ++ ++ /* Same instruction set detection algorithm as for ppc32/arm... */ ++ vki_sigset_t saved_set, tmp_set; ++ vki_sigaction_fromK_t saved_sigill_act; ++ vki_sigaction_toK_t tmp_sigill_act; ++ ++ vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); ++ ++ VG_(sigemptyset)(&tmp_set); ++ VG_(sigaddset)(&tmp_set, VKI_SIGILL); ++ ++ Int r; ++ r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); ++ vg_assert(r == 0); ++ ++ r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); ++ vg_assert(r == 0); ++ tmp_sigill_act = saved_sigill_act; ++ ++ /* NODEFER: signal handler does not return (from the kernel's point of ++ view), hence if it is to successfully catch a signal more than once, ++ we need the NODEFER flag. */ ++ tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; ++ tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; ++ tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; ++ tmp_sigill_act.ksa_handler = handler_unsup_insn; ++ VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); ++ ++ VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act); ++ VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); ++ VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); ++ ++ VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps); ++ ++ VG_(machine_get_cache_info)(&vai); ++ ++ return True; ++ } ++ + #else + # error "Unknown arch" + #endif +@@ -2367,6 +2554,9 @@ Int VG_(machine_get_size_of_largest_guest_register) ( void ) + # elif defined(VGA_mips64) + return 8; + ++# elif defined(VGA_loongarch64) ++ return 8; ++ + # else + # error "Unknown arch" + # endif +@@ -2383,7 +2573,7 @@ void* VG_(fnptr_to_fnentry)( void* f ) + || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \ + || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \ + || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris) \ +- || defined(VGP_nanomips_linux) ++ || defined(VGP_nanomips_linux) || defined(VGP_loongarch64_linux) + return f; + # elif defined(VGP_ppc64be_linux) + /* ppc64-linux uses the AIX scheme, in which f is a pointer to a +diff --git a/coregrind/m_main.c b/coregrind/m_main.c +index a857e5afe..f9c0c6fb6 100644 +--- a/coregrind/m_main.c ++++ b/coregrind/m_main.c +@@ -1481,6 +1481,7 @@ Int valgrind_main ( Int argc, HChar **argv, HChar **envp ) + "AMD Athlon or above)\n"); + VG_(printf)(" * AMD Athlon64/Opteron\n"); + VG_(printf)(" * ARM (armv7)\n"); ++ VG_(printf)(" * LoongArch (3A5000 and above)\n"); + VG_(printf)(" * MIPS (mips32 and above; mips64 and above)\n"); + VG_(printf)(" * PowerPC (most; ppc405 and above)\n"); + VG_(printf)(" * System z (64bit only - s390x; z990 and above)\n"); +@@ -2534,6 +2535,11 @@ static void final_tidyup(ThreadId tid) + VG_TRACK(post_reg_write, Vg_CoreClientReq, tid, + offsetof(VexGuestS390XState, guest_r2), + sizeof(VG_(threads)[tid].arch.vex.guest_r2)); ++# elif defined(VGA_loongarch64) ++ VG_(threads)[tid].arch.vex.guest_R4 = to_run; ++ VG_TRACK(post_reg_write, Vg_CoreClientReq, tid, ++ offsetof(VexGuestLOONGARCH64State, guest_R4), ++ sizeof(VG_(threads)[tid].arch.vex.guest_R4)); + #else + I_die_here : architecture missing in m_main.c + #endif +@@ -3062,6 +3068,29 @@ asm( + ".set pop \n\t" + ".previous \n\t" + ); ++#elif defined(VGP_loongarch64_linux) ++asm(" \n\t" ++ ".text \n\t" ++ ".globl _start \n\t" ++ ".type _start,@function \n\t" ++ "_start: \n\t" ++ /* t0 = &vgPlain_interim_stack + VG_STACK_GUARD_SZB + ++ VG_DEFAULT_STACK_ACTIVE_SZB */ ++ "la.local $t0, vgPlain_interim_stack \n\t" ++ "li.w $t1, "VG_STRINGIFY(VG_STACK_GUARD_SZB)" \n\t" ++ "add.d $t0, $t0, $t1 \n\t" ++ "li.w $t2, "VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)" \n\t" ++ "add.d $t0, $t0, $t2 \n\t" ++ /* allocate 16 bytes on the new stack in t0, and aligned */ ++ "addi.d $t0, $t0, -16 \n\t" ++ "bstrins.d $t0, $zero, 3, 0 \n\t" ++ /* a0 = sp, sp = t0, and then call _start_in_C_linux */ ++ "move $a0, $sp \n\t" ++ "move $sp, $t0 \n\t" ++ "la.local $t0, _start_in_C_linux \n\t" ++ "jr $t0 \n\t" ++ ".previous \n\t" ++); + #else + # error "Unknown platform" + #endif +@@ -3107,11 +3136,11 @@ void _start_in_C_linux ( UWord* pArgc ) + # if defined(VGP_ppc32_linux) || defined(VGP_ppc64be_linux) \ + || defined(VGP_ppc64le_linux) || defined(VGP_arm64_linux) \ + || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \ +- || defined(VGP_nanomips_linux) ++ || defined(VGP_nanomips_linux) || defined(VGP_loongarch64_linux) + { +- /* ppc32/ppc64, arm64, mips32/64 can be configured with different +- page sizes. Determine this early. This is an ugly hack and really +- should be moved into valgrind_main. */ ++ /* ppc32/ppc64, arm64, mips32/64, loongarch64 can be configured with ++ different page sizes. Determine this early. This is an ugly hack ++ and really should be moved into valgrind_main. */ + UWord *sp = &pArgc[1+argc+1]; + while (*sp++ != 0) + ; +diff --git a/coregrind/m_options.c b/coregrind/m_options.c +index 1483af2d9..640af7121 100644 +--- a/coregrind/m_options.c ++++ b/coregrind/m_options.c +@@ -203,7 +203,8 @@ UInt VG_(clo_unw_stack_scan_frames) = 5; + VgSmc VG_(clo_smc_check) = Vg_SmcAllNonFile; + #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \ + || defined(VGA_arm) || defined(VGA_arm64) \ +- || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips) ++ || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips) \ ++ || defined(VGA_loongarch64) + VgSmc VG_(clo_smc_check) = Vg_SmcStack; + #else + # error "Unknown arch" +diff --git a/coregrind/m_redir.c b/coregrind/m_redir.c +index 37c67f4c1..a0bc86481 100644 +--- a/coregrind/m_redir.c ++++ b/coregrind/m_redir.c +@@ -1229,6 +1229,7 @@ Bool VG_(is_soname_ld_so) (const HChar *soname) + if (VG_STREQ(soname, VG_U_LD_LINUX_AARCH64_SO_1)) return True; + if (VG_STREQ(soname, VG_U_LD_LINUX_ARMHF_SO_3)) return True; + if (VG_STREQ(soname, VG_U_LD_LINUX_MIPSN8_S0_1)) return True; ++ if (VG_STREQ(soname, VG_U_LD_LINUX_LOONGARCH_LP64D_SO_1)) return True; + # elif defined(VGO_freebsd) + if (VG_STREQ(soname, VG_U_LD_ELF_SO_1)) return True; + if (VG_STREQ(soname, VG_U_LD_ELF32_SO_1)) return True; +@@ -1668,6 +1669,22 @@ void VG_(redir_initialise) ( void ) + ); + } + ++#elif defined(VGP_loongarch64_linux) ++ /* If we're using memcheck, use these intercepts right from ++ the start, otherwise ld.so makes a lot of noise. */ ++ if (0==VG_(strcmp)("Memcheck", VG_(details).name)) { ++ add_hardwired_spec( ++ "ld-linux-loongarch-lp64d.so.1", "strlen", ++ (Addr)&VG_(loongarch64_linux_REDIR_FOR_strlen), ++ complain_about_stripped_glibc_ldso ++ ); ++ add_hardwired_spec( ++ "ld-linux-loongarch-lp64d.so.1", "strchr", ++ (Addr)&VG_(loongarch64_linux_REDIR_FOR_strchr), ++ complain_about_stripped_glibc_ldso ++ ); ++ } ++ + # elif defined(VGP_x86_solaris) + /* If we're using memcheck, use these intercepts right from + the start, otherwise ld.so makes a lot of noise. */ +diff --git a/coregrind/m_scheduler/scheduler.c b/coregrind/m_scheduler/scheduler.c +index 3062c1afc..27dc24c8a 100644 +--- a/coregrind/m_scheduler/scheduler.c ++++ b/coregrind/m_scheduler/scheduler.c +@@ -271,6 +271,7 @@ const HChar* name_of_sched_event ( UInt event ) + case VEX_TRC_JMP_SIGBUS: return "SIGBUS"; + case VEX_TRC_JMP_SIGFPE_INTOVF: + case VEX_TRC_JMP_SIGFPE_INTDIV: return "SIGFPE"; ++ case VEX_TRC_JMP_SIGSYS: return "SIGSYS"; + case VEX_TRC_JMP_EMWARN: return "EMWARN"; + case VEX_TRC_JMP_EMFAIL: return "EMFAIL"; + case VEX_TRC_JMP_CLIENTREQ: return "CLIENTREQ"; +@@ -1657,6 +1658,10 @@ VgSchedReturnCode VG_(scheduler) ( ThreadId tid ) + VG_(synth_sigfpe)(tid, VKI_FPE_INTOVF); + break; + ++ case VEX_TRC_JMP_SIGSYS: ++ VG_(synth_sigsys)(tid); ++ break; ++ + case VEX_TRC_JMP_NODECODE: { + Addr addr = VG_(get_IP)(tid); + +@@ -1821,6 +1826,9 @@ void VG_(nuke_all_threads_except) ( ThreadId me, VgSchedReturnCode src ) + #elif defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips) + # define VG_CLREQ_ARGS guest_r12 + # define VG_CLREQ_RET guest_r11 ++#elif defined(VGA_loongarch64) ++# define VG_CLREQ_ARGS guest_R12 ++# define VG_CLREQ_RET guest_R11 + #else + # error Unknown arch + #endif +diff --git a/coregrind/m_sigframe/sigframe-loongarch64-linux.c b/coregrind/m_sigframe/sigframe-loongarch64-linux.c +new file mode 100644 +index 000000000..eda6c885c +--- /dev/null ++++ b/coregrind/m_sigframe/sigframe-loongarch64-linux.c +@@ -0,0 +1,285 @@ ++ ++/*--------------------------------------------------------------------*/ ++/*--- Create/destroy signal delivery frames. ---*/ ++/*--- sigframe-loongarch64-linux.c ---*/ ++/*--------------------------------------------------------------------*/ ++ ++/* ++ This file is part of Valgrind, a dynamic binary instrumentation ++ framework. ++ ++ Copyright (C) 2021-2022 Loongson Technology Corporation Limited ++ ++ This program is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License as ++ published by the Free Software Foundation; either version 2 of the ++ License, or (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, see . ++ ++ The GNU General Public License is contained in the file COPYING. ++*/ ++ ++#if defined(VGP_loongarch64_linux) ++ ++#include "pub_core_basics.h" ++#include "pub_core_vki.h" ++#include "pub_core_vkiscnums.h" ++#include "pub_core_threadstate.h" ++#include "pub_core_aspacemgr.h" ++#include "pub_core_libcbase.h" ++#include "pub_core_libcassert.h" ++#include "pub_core_libcprint.h" ++#include "pub_core_machine.h" ++#include "pub_core_options.h" ++#include "pub_core_sigframe.h" ++#include "pub_core_signals.h" ++#include "pub_core_tooliface.h" ++#include "pub_core_trampoline.h" ++#include "priv_sigframe.h" ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Signal frame layouts ---*/ ++/*------------------------------------------------------------*/ ++ ++struct vg_sig_private { ++ UInt magicPI; ++ UInt sigNo_private; ++ VexGuestLOONGARCH64State vex_shadow1; ++ VexGuestLOONGARCH64State vex_shadow2; ++}; ++ ++struct rt_sigframe { ++ struct vki_siginfo rs_info; ++ struct vki_ucontext rs_uctx; ++ struct vg_sig_private priv; ++}; ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Creating signal frames ---*/ ++/*------------------------------------------------------------*/ ++ ++static void create_siginfo ( ThreadId tid, ++ struct rt_sigframe *frame, ++ const vki_siginfo_t *si) ++{ ++ VG_TRACK(pre_mem_write, Vg_CoreSignal, tid, "signal frame siginfo", ++ (Addr)&frame->rs_info, sizeof(frame->rs_info)); ++ ++ VG_(memcpy)(&frame->rs_info, si, sizeof(vki_siginfo_t)); ++ ++ VG_TRACK(post_mem_write, Vg_CoreSignal, tid, ++ (Addr)&frame->rs_info, sizeof(frame->rs_info)); ++} ++ ++static void create_sigcontext ( ThreadState *tst, ++ struct vki_sigcontext **sc) ++{ ++ struct vki_sigcontext *sctx = *sc; ++ ++ VG_TRACK(pre_mem_write, Vg_CoreSignal, tst->tid, "signal frame mcontext", ++ (Addr)sctx, sizeof(ULong) * 32); ++ ++ sctx->sc_regs[1] = tst->arch.vex.guest_R1; ++ sctx->sc_regs[2] = tst->arch.vex.guest_R2; ++ sctx->sc_regs[3] = tst->arch.vex.guest_R3; ++ sctx->sc_regs[4] = tst->arch.vex.guest_R4; ++ sctx->sc_regs[5] = tst->arch.vex.guest_R5; ++ sctx->sc_regs[6] = tst->arch.vex.guest_R6; ++ sctx->sc_regs[7] = tst->arch.vex.guest_R7; ++ sctx->sc_regs[8] = tst->arch.vex.guest_R8; ++ sctx->sc_regs[9] = tst->arch.vex.guest_R9; ++ sctx->sc_regs[10] = tst->arch.vex.guest_R10; ++ sctx->sc_regs[11] = tst->arch.vex.guest_R11; ++ sctx->sc_regs[12] = tst->arch.vex.guest_R12; ++ sctx->sc_regs[13] = tst->arch.vex.guest_R13; ++ sctx->sc_regs[14] = tst->arch.vex.guest_R14; ++ sctx->sc_regs[15] = tst->arch.vex.guest_R15; ++ sctx->sc_regs[16] = tst->arch.vex.guest_R16; ++ sctx->sc_regs[17] = tst->arch.vex.guest_R17; ++ sctx->sc_regs[18] = tst->arch.vex.guest_R18; ++ sctx->sc_regs[19] = tst->arch.vex.guest_R19; ++ sctx->sc_regs[20] = tst->arch.vex.guest_R20; ++ sctx->sc_regs[21] = tst->arch.vex.guest_R21; ++ sctx->sc_regs[22] = tst->arch.vex.guest_R22; ++ sctx->sc_regs[23] = tst->arch.vex.guest_R23; ++ sctx->sc_regs[24] = tst->arch.vex.guest_R24; ++ sctx->sc_regs[25] = tst->arch.vex.guest_R25; ++ sctx->sc_regs[26] = tst->arch.vex.guest_R26; ++ sctx->sc_regs[27] = tst->arch.vex.guest_R27; ++ sctx->sc_regs[28] = tst->arch.vex.guest_R28; ++ sctx->sc_regs[29] = tst->arch.vex.guest_R29; ++ sctx->sc_regs[30] = tst->arch.vex.guest_R30; ++ sctx->sc_regs[31] = tst->arch.vex.guest_R31; ++ sctx->sc_pc = tst->arch.vex.guest_PC; ++} ++ ++static void create_ucontext ( ThreadState *tst, ++ ThreadId tid, ++ struct vki_ucontext *uc, ++ const vki_sigset_t *mask, ++ struct vki_sigcontext **sc, ++ const vki_siginfo_t *siginfo) ++{ ++ VG_TRACK(pre_mem_write, Vg_CoreSignal, tid, "signal frame ucontext", ++ (Addr)uc, offsetof(struct vki_ucontext, uc_mcontext)); ++ ++ uc->uc_flags = 0; ++ uc->uc_link = 0; ++ uc->uc_stack = tst->altstack; ++ uc->uc_sigmask = *mask; ++ ++ VG_TRACK(post_mem_write, Vg_CoreSignal, tid, (Addr)uc, ++ offsetof(struct vki_ucontext, uc_mcontext)); ++ ++ create_sigcontext(tst, sc); ++} ++ ++/* EXPORTED */ ++void VG_(sigframe_create) ( ThreadId tid, ++ Bool on_altstack, ++ Addr sp_top_of_frame, ++ const vki_siginfo_t *siginfo, ++ const struct vki_ucontext *siguc, ++ void *handler, ++ UInt flags, ++ const vki_sigset_t *mask, ++ void *restorer ) ++{ ++ UInt size = sizeof(struct rt_sigframe); ++ Addr sp = VG_ROUNDDN(sp_top_of_frame - size, 16); ++ ++ ThreadState *tst = VG_(get_ThreadState)(tid); ++ if (! ML_(sf_maybe_extend_stack)(tst, sp, size, flags)) ++ return; ++ ++ struct rt_sigframe *frame = (struct rt_sigframe *)sp; ++ create_siginfo(tid, frame, siginfo); ++ ++ struct vki_ucontext *uctx = &frame->rs_uctx; ++ struct vki_sigcontext *sctx = &(frame->rs_uctx.uc_mcontext); ++ create_ucontext(tst, tid, uctx, mask, &sctx, siginfo); ++ ++ /* ++ Arguments to signal handler: ++ ++ a0 = signal number ++ a1 = pointer to siginfo ++ a2 = pointer to ucontext ++ ++ csr_era point to the signal handler, $r3 (sp) points to ++ the struct rt_sigframe. ++ */ ++ ++ Int sigNo = siginfo->si_signo; ++ tst->arch.vex.guest_R4 = sigNo; ++ tst->arch.vex.guest_R5 = (Addr) &frame->rs_info; ++ tst->arch.vex.guest_R6 = (Addr) &frame->rs_uctx; ++ tst->arch.vex.guest_R3 = (Addr) frame; ++ tst->arch.vex.guest_R1 = (Addr) &VG_(loongarch64_linux_SUBST_FOR_rt_sigreturn); ++ ++ struct vg_sig_private *priv = &frame->priv; ++ priv->magicPI = 0x31415927; ++ priv->sigNo_private = sigNo; ++ priv->vex_shadow1 = tst->arch.vex_shadow1; ++ priv->vex_shadow2 = tst->arch.vex_shadow2; ++ ++ /* Set the thread so it will next run the handler. */ ++ VG_TRACK(post_reg_write, Vg_CoreSignal, tid, VG_O_STACK_PTR, sizeof(Addr)); ++ ++ if (VG_(clo_trace_signals)) ++ VG_(printf)("handler = %p\n", handler); ++ ++ tst->arch.vex.guest_PC = (Addr) handler; ++ /* This thread needs to be marked runnable, but we leave that ++ the caller to do. */ ++} ++ ++ ++/*------------------------------------------------------------*/ ++/*--- Destroying signal frames ---*/ ++/*------------------------------------------------------------*/ ++ ++static void restore_regs ( ThreadState *tst, ++ struct vki_sigcontext *mc) ++{ ++ tst->arch.vex.guest_R1 = mc->sc_regs[1]; ++ tst->arch.vex.guest_R2 = mc->sc_regs[2]; ++ tst->arch.vex.guest_R3 = mc->sc_regs[3]; ++ tst->arch.vex.guest_R4 = mc->sc_regs[4]; ++ tst->arch.vex.guest_R5 = mc->sc_regs[5]; ++ tst->arch.vex.guest_R6 = mc->sc_regs[6]; ++ tst->arch.vex.guest_R7 = mc->sc_regs[7]; ++ tst->arch.vex.guest_R8 = mc->sc_regs[8]; ++ tst->arch.vex.guest_R9 = mc->sc_regs[9]; ++ tst->arch.vex.guest_R10 = mc->sc_regs[10]; ++ tst->arch.vex.guest_R11 = mc->sc_regs[11]; ++ tst->arch.vex.guest_R12 = mc->sc_regs[12]; ++ tst->arch.vex.guest_R13 = mc->sc_regs[13]; ++ tst->arch.vex.guest_R14 = mc->sc_regs[14]; ++ tst->arch.vex.guest_R15 = mc->sc_regs[15]; ++ tst->arch.vex.guest_R16 = mc->sc_regs[16]; ++ tst->arch.vex.guest_R17 = mc->sc_regs[17]; ++ tst->arch.vex.guest_R18 = mc->sc_regs[18]; ++ tst->arch.vex.guest_R19 = mc->sc_regs[19]; ++ tst->arch.vex.guest_R20 = mc->sc_regs[20]; ++ tst->arch.vex.guest_R21 = mc->sc_regs[21]; ++ tst->arch.vex.guest_R22 = mc->sc_regs[22]; ++ tst->arch.vex.guest_R23 = mc->sc_regs[23]; ++ tst->arch.vex.guest_R24 = mc->sc_regs[24]; ++ tst->arch.vex.guest_R25 = mc->sc_regs[25]; ++ tst->arch.vex.guest_R26 = mc->sc_regs[26]; ++ tst->arch.vex.guest_R27 = mc->sc_regs[27]; ++ tst->arch.vex.guest_R28 = mc->sc_regs[28]; ++ tst->arch.vex.guest_R29 = mc->sc_regs[29]; ++ tst->arch.vex.guest_R30 = mc->sc_regs[30]; ++ tst->arch.vex.guest_R31 = mc->sc_regs[31]; ++ tst->arch.vex.guest_PC = mc->sc_pc; ++} ++ ++/* EXPORTED */ ++void VG_(sigframe_destroy)( ThreadId tid, Bool isRT ) ++{ ++ vg_assert(VG_(is_valid_tid)(tid)); ++ ++ ThreadState *tst = VG_(get_ThreadState)(tid); ++ Addr sp = tst->arch.vex.guest_R3; ++ struct rt_sigframe *frame = (struct rt_sigframe *)sp; ++ struct vki_ucontext *uc = &frame->rs_uctx; ++ ++ tst->sig_mask = uc->uc_sigmask; ++ tst->tmp_sig_mask = uc->uc_sigmask; ++ ++ struct vki_sigcontext *mc = &uc->uc_mcontext; ++ restore_regs(tst, mc); ++ ++ struct vg_sig_private *priv = &frame->priv; ++ vg_assert(priv->magicPI == 0x31415927); ++ tst->arch.vex_shadow1 = priv->vex_shadow1; ++ tst->arch.vex_shadow2 = priv->vex_shadow2; ++ ++ UInt frame_size = sizeof(*frame); ++ VG_TRACK(die_mem_stack_signal, sp, frame_size); ++ ++ if (VG_(clo_trace_signals)) ++ VG_(message)(Vg_DebugMsg, ++ "VG_(signal_return) (thread %u): isRT=%d valid magic; PC=%#llx\n", ++ tid, isRT, tst->arch.vex.guest_PC); ++ ++ Int sigNo = priv->sigNo_private; ++ VG_TRACK( post_deliver_signal, tid, sigNo ); ++} ++ ++#endif /* defined(VGP_loongarch64_linux) */ ++ ++/*--------------------------------------------------------------------*/ ++/*--- end sigframe-loongarch64-linux.c ---*/ ++/*--------------------------------------------------------------------*/ +diff --git a/coregrind/m_signals.c b/coregrind/m_signals.c +index b3c94fcc9..62c689bed 100644 +--- a/coregrind/m_signals.c ++++ b/coregrind/m_signals.c +@@ -628,6 +628,22 @@ VgHashTable *ht_sigchld_ignore = NULL; + (srP)->misc.MIPS32.r28 = (uc)->uc_mcontext.sc_regs[28]; \ + } + ++#elif defined(VGP_loongarch64_linux) ++# define VG_UCONTEXT_INSTR_PTR(uc) (((uc)->uc_mcontext.sc_pc)) ++# define VG_UCONTEXT_STACK_PTR(uc) ((uc)->uc_mcontext.sc_regs[3]) ++# define VG_UCONTEXT_FRAME_PTR(uc) ((uc)->uc_mcontext.sc_regs[22]) ++# define VG_UCONTEXT_SYSCALL_NUM(uc) ((uc)->uc_mcontext.sc_regs[11]) ++# define VG_UCONTEXT_SYSCALL_SYSRES(uc) \ ++ /* Convert the value in uc_mcontext.regs[4] into a SysRes. */ \ ++ VG_(mk_SysRes_loongarch64_linux)((uc)->uc_mcontext.sc_regs[4]) ++ ++# define VG_UCONTEXT_TO_UnwindStartRegs(srP, uc) \ ++ { (srP)->r_pc = (uc)->uc_mcontext.sc_pc; \ ++ (srP)->r_sp = (uc)->uc_mcontext.sc_regs[3]; \ ++ (srP)->misc.LOONGARCH64.r_fp = (uc)->uc_mcontext.sc_regs[22]; \ ++ (srP)->misc.LOONGARCH64.r_ra = (uc)->uc_mcontext.sc_regs[1]; \ ++ } ++ + #elif defined(VGP_x86_solaris) + # define VG_UCONTEXT_INSTR_PTR(uc) ((Addr)(uc)->uc_mcontext.gregs[VKI_EIP]) + # define VG_UCONTEXT_STACK_PTR(uc) ((Addr)(uc)->uc_mcontext.gregs[VKI_UESP]) +@@ -899,8 +915,10 @@ void calculate_SKSS_from_SCSS ( SKSS* dst ) + if (skss_handler != VKI_SIG_IGN && skss_handler != VKI_SIG_DFL) + skss_flags |= VKI_SA_SIGINFO; + ++# if !defined(VGP_loongarch64_linux) + /* use our own restorer */ + skss_flags |= VKI_SA_RESTORER; ++# endif + + /* Create SKSS entry for this signal. */ + if (sig != VKI_SIGKILL && sig != VKI_SIGSTOP) +@@ -1052,6 +1070,15 @@ extern void my_sigreturn(void); + " li $t4, " #name "\n" \ + " syscall[32]\n" \ + ".previous\n" ++ ++#elif defined(VGP_loongarch64_linux) ++# define _MY_SIGRETURN(name) \ ++ ".text\n" \ ++ "my_sigreturn:\n" \ ++ " li.w $a7, " #name "\n" \ ++ " syscall 0\n" \ ++ ".previous\n" ++ + #elif defined(VGP_x86_solaris) || defined(VGP_amd64_solaris) + /* Not used on Solaris. */ + # define _MY_SIGRETURN(name) \ +@@ -1111,7 +1138,8 @@ static void handle_SCSS_change ( Bool force_update ) + ksa.sa_flags = skss.skss_per_sig[sig].skss_flags; + # if !defined(VGP_ppc32_linux) && \ + !defined(VGP_x86_darwin) && !defined(VGP_amd64_darwin) && \ +- !defined(VGP_mips32_linux) && !defined(VGO_solaris) && !defined(VGO_freebsd) ++ !defined(VGP_mips32_linux) && !defined(VGO_solaris) && \ ++ !defined(VGO_freebsd) && !defined(VGP_loongarch64_linux) + ksa.sa_restorer = my_sigreturn; + # endif + /* Re above ifdef (also the assertion below), PaulM says: +@@ -1159,7 +1187,7 @@ static void handle_SCSS_change ( Bool force_update ) + !defined(VGP_x86_darwin) && !defined(VGP_amd64_darwin) && \ + !defined(VGP_mips32_linux) && !defined(VGP_mips64_linux) && \ + !defined(VGP_nanomips_linux) && !defined(VGO_solaris) && \ +- !defined(VGO_freebsd) ++ !defined(VGO_freebsd) && !defined(VGP_loongarch64_linux) + vg_assert(ksa_old.sa_restorer == my_sigreturn); + # endif + VG_(sigaddset)( &ksa_old.sa_mask, VKI_SIGKILL ); +@@ -1280,7 +1308,7 @@ SysRes VG_(do_sys_sigaction) ( Int signo, + old_act->sa_flags = scss.scss_per_sig[signo].scss_flags; + old_act->sa_mask = scss.scss_per_sig[signo].scss_mask; + # if !defined(VGO_darwin) && !defined(VGO_freebsd) && \ +- !defined(VGO_solaris) ++ !defined(VGO_solaris) && !defined(VGP_loongarch64_linux) + old_act->sa_restorer = scss.scss_per_sig[signo].scss_restorer; + # endif + } +@@ -1293,7 +1321,7 @@ SysRes VG_(do_sys_sigaction) ( Int signo, + + scss.scss_per_sig[signo].scss_restorer = NULL; + # if !defined(VGO_darwin) && !defined(VGO_freebsd) && \ +- !defined(VGO_solaris) ++ !defined(VGO_solaris) && !defined(VGP_loongarch64_linux) + scss.scss_per_sig[signo].scss_restorer = new_act->sa_restorer; + # endif + +@@ -1653,7 +1681,7 @@ void VG_(kill_self)(Int sigNo) + sa.ksa_handler = VKI_SIG_DFL; + sa.sa_flags = 0; + # if !defined(VGO_darwin) && !defined(VGO_freebsd) && \ +- !defined(VGO_solaris) ++ !defined(VGO_solaris) && !defined(VGP_loongarch64_linux) + sa.sa_restorer = 0; + # endif + VG_(sigemptyset)(&sa.sa_mask); +@@ -2296,8 +2324,9 @@ void VG_(synth_sigtrap)(ThreadId tid) + // Synthesise a SIGFPE. + void VG_(synth_sigfpe)(ThreadId tid, UInt code) + { +-// Only tested on mips32, mips64, s390x and nanomips. +-#if !defined(VGA_mips32) && !defined(VGA_mips64) && !defined(VGA_s390x) && !defined(VGA_nanomips) ++// Only tested on mips32, mips64, s390x, nanomips and loongarch64. ++#if !defined(VGA_mips32) && !defined(VGA_mips64) && !defined(VGA_s390x) \ ++ && !defined(VGA_nanomips) && !defined(VGA_loongarch64) + vg_assert(0); + #else + vki_siginfo_t info; +@@ -2319,6 +2348,30 @@ void VG_(synth_sigfpe)(ThreadId tid, UInt code) + #endif + } + ++// Synthesise a SIGSYS. ++void VG_(synth_sigsys)(ThreadId tid) ++{ ++// Only tested on loongarch64-linux. ++#if !defined(VGP_loongarch64_linux) ++ vg_assert(0); ++#else ++ vki_siginfo_t info; ++ ++ vg_assert(VG_(threads)[tid].status == VgTs_Runnable); ++ ++ VG_(memset)(&info, 0, sizeof(info)); ++ info.si_signo = VKI_SIGSYS; ++ info.si_code = VKI_SI_KERNEL; ++ ++ if (VG_(gdbserver_report_signal) (&info, tid)) { ++ resume_scheduler(tid); ++ deliver_signal(tid, &info, NULL); ++ } ++ else ++ resume_scheduler(tid); ++#endif ++} ++ + /* Make a signal pending for a thread, for later delivery. + VG_(poll_signals) will arrange for it to be delivered at the right + time. +@@ -3043,7 +3096,7 @@ void pp_ksigaction ( vki_sigaction_toK_t* sa ) + sa->ksa_handler, + (UInt)sa->sa_flags, + # if !defined(VGO_darwin) && !defined(VGO_freebsd) && \ +- !defined(VGO_solaris) ++ !defined(VGO_solaris) && !defined(VGP_loongarch64_linux) + sa->sa_restorer + # else + (void*)0 +@@ -3066,7 +3119,7 @@ void VG_(set_default_handler)(Int signo) + sa.ksa_handler = VKI_SIG_DFL; + sa.sa_flags = 0; + # if !defined(VGO_darwin) && !defined(VGO_freebsd) && \ +- !defined(VGO_solaris) ++ !defined(VGO_solaris) && !defined(VGP_loongarch64_linux) + sa.sa_restorer = 0; + # endif + VG_(sigemptyset)(&sa.sa_mask); +@@ -3188,7 +3241,7 @@ void VG_(sigstartup_actions) ( void ) + tsa.ksa_handler = (void *)sync_signalhandler; + tsa.sa_flags = VKI_SA_SIGINFO; + # if !defined(VGO_darwin) && !defined(VGO_freebsd) && \ +- !defined(VGO_solaris) ++ !defined(VGO_solaris) && !defined(VGP_loongarch64_linux) + tsa.sa_restorer = 0; + # endif + VG_(sigfillset)(&tsa.sa_mask); +@@ -3216,7 +3269,7 @@ void VG_(sigstartup_actions) ( void ) + + scss.scss_per_sig[i].scss_restorer = NULL; + # if !defined(VGO_darwin) && !defined(VGO_freebsd) && \ +- !defined(VGO_solaris) ++ !defined(VGO_solaris) && !defined(VGP_loongarch64_linux) + scss.scss_per_sig[i].scss_restorer = sa.sa_restorer; + # endif + +diff --git a/coregrind/m_stacktrace.c b/coregrind/m_stacktrace.c +index 308bebdd8..df13de1fe 100644 +--- a/coregrind/m_stacktrace.c ++++ b/coregrind/m_stacktrace.c +@@ -1502,6 +1502,100 @@ UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known, + + #endif + ++/* ---------------------- loongarch64 ----------------------- */ ++ ++#if defined(VGP_loongarch64_linux) ++UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known, ++ /*OUT*/Addr* ips, UInt max_n_ips, ++ /*OUT*/Addr* sps, /*OUT*/Addr* fps, ++ const UnwindStartRegs* startRegs, ++ Addr fp_max_orig ) ++{ ++ Bool debug = False; ++ Int i; ++ Addr fp_max; ++ UInt n_found = 0; ++ const Int cmrf = VG_(clo_merge_recursive_frames); ++ ++ vg_assert(sizeof(Addr) == sizeof(UWord)); ++ vg_assert(sizeof(Addr) == sizeof(void*)); ++ ++ D3UnwindRegs uregs; ++ uregs.pc = startRegs->r_pc; ++ uregs.sp = startRegs->r_sp; ++ uregs.fp = startRegs->misc.LOONGARCH64.r_fp; ++ uregs.ra = startRegs->misc.LOONGARCH64.r_ra; ++ Addr fp_min = uregs.sp - VG_STACK_REDZONE_SZB; ++ ++ /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1], ++ stopping when the trail goes cold, which we guess to be ++ when FP is not a reasonable stack location. */ ++ ++ fp_max = VG_PGROUNDUP(fp_max_orig); ++ if (fp_max >= sizeof(Addr)) ++ fp_max -= sizeof(Addr); ++ ++ if (debug) ++ VG_(printf)("\nmax_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, " ++ "fp_max=0x%lx pc=0x%lx sp=0x%lx fp=0x%lx ra=0x%lx\n", ++ max_n_ips, fp_min, fp_max_orig, fp_max, ++ uregs.pc, uregs.sp, uregs.fp, uregs.ra); ++ ++ if (sps) sps[0] = uregs.sp; ++ if (fps) fps[0] = uregs.fp; ++ ips[0] = uregs.pc; ++ i = 1; ++ ++ /* Loop unwinding the stack, using CFI. */ ++ while (True) { ++ if (debug) ++ VG_(printf)("i: %d, pc: 0x%lx, sp: 0x%lx, fp: 0x%lx, ra: 0x%lx\n", ++ i, uregs.pc, uregs.sp, uregs.fp, uregs.ra); ++ if (i >= max_n_ips) ++ break; ++ ++ if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) { ++ if (sps) sps[i] = uregs.sp; ++ if (fps) fps[i] = uregs.fp; ++ ips[i++] = uregs.pc - 1; ++ if (debug) ++ VG_(printf)( ++ "USING CFI: pc: 0x%lx, sp: 0x%lx, fp: 0x%lx, ra: 0x%lx\n", ++ uregs.pc, uregs.sp, uregs.fp, uregs.ra); ++ uregs.pc = uregs.pc - 1; ++ RECURSIVE_MERGE(cmrf,ips,i); ++ continue; ++ } ++ ++ /* A problem on the first frame? Lets assume it was a bad jump. ++ We will use the link register and the current stack and frame ++ pointers and see if we can use the CFI in the next round. */ ++ if (i == 1) { ++ uregs.pc = uregs.ra; ++ uregs.ra = 0; ++ ++ if (sps) sps[i] = uregs.sp; ++ if (fps) fps[i] = uregs.fp; ++ ips[i++] = uregs.pc - 1; ++ if (debug) ++ VG_(printf)( ++ "USING bad-jump: pc: 0x%lx, sp: 0x%lx, fp: 0x%lx, ra: 0x%lx\n", ++ uregs.pc, uregs.sp, uregs.fp, uregs.ra); ++ uregs.pc = uregs.pc - 1; ++ RECURSIVE_MERGE(cmrf,ips,i); ++ continue; ++ } ++ ++ /* No luck. We have to give up. */ ++ break; ++ } ++ ++ n_found = i; ++ return n_found; ++} ++ ++#endif ++ + /*------------------------------------------------------------*/ + /*--- ---*/ + /*--- END platform-dependent unwinder worker functions ---*/ +diff --git a/coregrind/m_syscall.c b/coregrind/m_syscall.c +index 1e49ed412..84d30b921 100644 +--- a/coregrind/m_syscall.c ++++ b/coregrind/m_syscall.c +@@ -204,6 +204,17 @@ SysRes VG_(mk_SysRes_arm64_linux) ( Long val ) { + return res; + } + ++SysRes VG_(mk_SysRes_loongarch64_linux) ( UWord val ) { ++ SysRes res; ++ res._isError = val >= -4095 && val <= -1; ++ if (res._isError) { ++ res._val = (UWord)(-val); ++ } else { ++ res._val = (UWord)val; ++ } ++ return res; ++} ++ + /* Generic constructors. */ + SysRes VG_(mk_SysRes_Success) ( UWord res ) { + SysRes r; +@@ -1034,6 +1045,22 @@ asm ( + ".previous \n\t" + ); + ++#elif defined(VGP_loongarch64_linux) ++extern UWord do_syscall_WRK (UWord a1, UWord a2, UWord a3, /* $a0, $a1, $a2 */ ++ UWord a4, UWord a5, UWord a6, /* $a3, $a4, $a5 */ ++ UWord syscall_no); /* $a6 */ ++asm ( ++ ".text \n\t" ++ ".globl do_syscall_WRK \n\t" ++ ".type do_syscall_WRK, @function \n\t" ++ "do_syscall_WRK: \n\t" ++ " move $a7, $a6 \n\t" /* a7 = syscall_no */ ++ " syscall 0 \n\t" ++ " jr $ra \n\t" ++ ".size do_syscall_WRK, .-do_syscall_WRK \n\t" ++ ".previous \n\t" ++); ++ + #elif defined(VGP_x86_solaris) + + extern ULong +@@ -1274,6 +1301,11 @@ SysRes VG_(do_syscall) ( UWord sysno, RegWord a1, RegWord a2, RegWord a3, + do_syscall_WRK(a1, a2, a3, a4, a5, a6, sysno, ®_a0); + return VG_(mk_SysRes_nanomips_linux)(reg_a0); + ++#elif defined(VGP_loongarch64_linux) ++ UWord val = 0; ++ val = do_syscall_WRK(a1, a2, a3, a4, a5, a6, sysno); ++ return VG_(mk_SysRes_loongarch64_linux)(val); ++ + # elif defined(VGP_x86_solaris) + UInt val, val2, err = False; + Bool restart; +diff --git a/coregrind/m_syswrap/priv_syswrap-linux.h b/coregrind/m_syswrap/priv_syswrap-linux.h +index a73b6247e..1b75d586a 100644 +--- a/coregrind/m_syswrap/priv_syswrap-linux.h ++++ b/coregrind/m_syswrap/priv_syswrap-linux.h +@@ -105,6 +105,7 @@ DECL_TEMPLATE(linux, sys_epoll_create1); + DECL_TEMPLATE(linux, sys_epoll_ctl); + DECL_TEMPLATE(linux, sys_epoll_wait); + DECL_TEMPLATE(linux, sys_epoll_pwait); ++DECL_TEMPLATE(linux, sys_epoll_pwait2); + DECL_TEMPLATE(linux, sys_eventfd); + DECL_TEMPLATE(linux, sys_eventfd2); + +@@ -330,6 +331,12 @@ DECL_TEMPLATE(linux, sys_openat2); + // Linux-specific (new in Linux 5.14) + DECL_TEMPLATE(linux, sys_memfd_secret); + ++// Linux-specific (since Linux 5.6) ++DECL_TEMPLATE(linux, sys_pidfd_getfd); ++ ++// Since Linux 6.6 ++DECL_TEMPLATE(linux, sys_fchmodat2); ++ + /* --------------------------------------------------------------------- + Wrappers for sockets and ipc-ery. These are split into standalone + procedures because x86-linux hides them inside multiplexors +@@ -508,6 +515,13 @@ extern UInt do_syscall_clone_nanomips_linux ( Word (*fn) (void *), /* a0 - 4 */ + Int* child_tid, /* a4 - 8 */ + Int* parent_tid, /* a5 - 9 */ + void* tls_ptr); /* a6 - 10 */ ++extern UInt do_syscall_clone_loongarch64_linux ( Word (*fn) (void *), /* a0 */ ++ void* stack, /* a1 */ ++ Int flags, /* a2 */ ++ void* arg, /* a3 */ ++ Int* child_tid, /* a4 */ ++ Int* parent_tid, /* a5 */ ++ void* tls_ptr); /* a6 */ + #endif // __PRIV_SYSWRAP_LINUX_H + + /*--------------------------------------------------------------------*/ +diff --git a/coregrind/m_syswrap/priv_types_n_macros.h b/coregrind/m_syswrap/priv_types_n_macros.h +index dd241839a..11a9d5e1b 100644 +--- a/coregrind/m_syswrap/priv_types_n_macros.h ++++ b/coregrind/m_syswrap/priv_types_n_macros.h +@@ -94,7 +94,8 @@ typedef + || defined(VGP_ppc32_linux) \ + || defined(VGP_arm_linux) || defined(VGP_s390x_linux) \ + || defined(VGP_arm64_linux) \ +- || defined(VGP_nanomips_linux) ++ || defined(VGP_nanomips_linux) \ ++ || defined(VGP_loongarch64_linux) + Int o_arg1; + Int o_arg2; + Int o_arg3; +diff --git a/coregrind/m_syswrap/syscall-loongarch64-linux.S b/coregrind/m_syswrap/syscall-loongarch64-linux.S +new file mode 100644 +index 000000000..5c18041ac +--- /dev/null ++++ b/coregrind/m_syswrap/syscall-loongarch64-linux.S +@@ -0,0 +1,143 @@ ++ ++/*--------------------------------------------------------------------*/ ++/*--- Support for doing system calls. syscall-loongarch64-linux.S ---*/ ++/*--------------------------------------------------------------------*/ ++ ++/* ++ This file is part of Valgrind, a dynamic binary instrumentation ++ framework. ++ ++ Copyright (C) 2021-2022 Loongson Technology Corporation Limited ++ ++ This program is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License as ++ published by the Free Software Foundation; either version 2 of the ++ License, or (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, see . ++ ++ The GNU General Public License is contained in the file COPYING. ++*/ ++ ++#include "pub_core_basics_asm.h" ++ ++#if defined(VGP_loongarch64_linux) ++ ++#include "pub_core_vkiscnums_asm.h" ++#include "libvex_guest_offsets.h" ++ ++/*----------------------------------------------------------------*/ ++/* ++ Perform a syscall for the client. This will run a syscall ++ with the client's specific per-thread signal mask. ++ ++ The structure of this function is such that, if the syscall is ++ interrupted by a signal, we can determine exactly what ++ execution state we were in with respect to the execution of ++ the syscall by examining the value of PC in the signal ++ handler. This means that we can always do the appropriate ++ thing to precisely emulate the kernel's signal/syscall ++ interactions. ++ ++ The syscall number is taken from the argument, even though it ++ should also be in guest_state->guest_R11. The syscall result ++ is written back to guest_state->guest_R4 on completion. ++ ++ VG_(fixup_guest_state_after_syscall_interrupted) does the ++ thread state fixup in the case where we were interrupted by a ++ signal. ++ ++ Prototype: ++ ++ UWord ML_(do_syscall_for_client_WRK)( ++ Int syscallno, // $r4 - a0 ++ void* guest_state, // $r5 - a1 ++ const vki_sigset_t *sysmask, // $r6 - a2 ++ const vki_sigset_t *postmask, // $r7 - a3 ++ Int nsigwords) // $r8 - a4 ++*/ ++ ++/* from vki-loongarch64-linux.h */ ++#define VKI_SIG_SETMASK 2 ++ ++.globl ML_(do_syscall_for_client_WRK) ++ML_(do_syscall_for_client_WRK): ++ ++ /* Save regs on stack */ ++ addi.d $sp, $sp, -24 ++ st.d $a1, $sp, 0 /* guest_state */ ++ st.d $a3, $sp, 8 /* postmask */ ++ st.d $a4, $sp, 16 /* sigsetSzB */ ++ ++1: li.w $a7, __NR_rt_sigprocmask ++ li.w $a0, VKI_SIG_SETMASK ++ move $a1, $a2 /* syscall_mask */ ++ move $a2, $a3 /* postmask */ ++ move $a3, $a4 /* sigsetSzB */ ++ syscall 0 ++ ++ bnez $a0, 5f ++ ++ /* Actually do the client syscall */ ++ ld.d $a6, $sp, 0 /* guest_state */ ++ ++ ld.d $a0, $a6, OFFSET_loongarch64_R4 /* a0 */ ++ ld.d $a1, $a6, OFFSET_loongarch64_R5 /* a1 */ ++ ld.d $a2, $a6, OFFSET_loongarch64_R6 /* a2 */ ++ ld.d $a3, $a6, OFFSET_loongarch64_R7 /* a3 */ ++ ld.d $a4, $a6, OFFSET_loongarch64_R8 /* a4 */ ++ ld.d $a5, $a6, OFFSET_loongarch64_R9 /* a5 */ ++ ++ ld.d $a7, $a6, OFFSET_loongarch64_R11 /* syscallno */ ++ ++2: syscall 0 ++ ++ /* Saving return values into guest state */ ++3: st.d $a0, $a6, OFFSET_loongarch64_R4 /* a0 */ ++ ++4: li.w $a7, __NR_rt_sigprocmask ++ li.w $a0, VKI_SIG_SETMASK ++ ld.d $a1, $sp, 8 /* postmask */ ++ move $a2, $zero /* 0 (zero) */ ++ ld.d $a3, $sp, 16 /* sigsetSzB */ ++ syscall 0 ++ ++ beqz $a0, 6f ++ ++5: /* error */ ++ li.w $a0, 0x8000 ++ ++6: /* Restore sp and return */ ++ addi.d $sp, $sp, 24 ++ jr $ra ++ ++.section .rodata ++/* export the ranges so that ++ VG_(fixup_guest_state_after_syscall_interrupted) can do the ++ right thing */ ++ ++.globl ML_(blksys_setup) ++.globl ML_(blksys_restart) ++.globl ML_(blksys_complete) ++.globl ML_(blksys_committed) ++.globl ML_(blksys_finished) ++ML_(blksys_setup): .quad 1b ++ML_(blksys_restart): .quad 2b ++ML_(blksys_complete): .quad 3b ++ML_(blksys_committed): .quad 4b ++ML_(blksys_finished): .quad 5b ++ ++#endif // defined(VGP_loongarch64_linux) ++ ++/* Let the linker know we don't need an executable stack */ ++MARK_STACK_NO_EXEC ++ ++/*--------------------------------------------------------------------*/ ++/*--- end syscall-loongarch64-linux.S ---*/ ++/*--------------------------------------------------------------------*/ +diff --git a/coregrind/m_syswrap/syswrap-amd64-linux.c b/coregrind/m_syswrap/syswrap-amd64-linux.c +index 1aeebd274..d93d93721 100644 +--- a/coregrind/m_syswrap/syswrap-amd64-linux.c ++++ b/coregrind/m_syswrap/syswrap-amd64-linux.c +@@ -880,10 +880,14 @@ static SyscallTableEntry syscall_table[] = { + GENX_(__NR_clone3, sys_ni_syscall), // 435 + LINXY(__NR_close_range, sys_close_range), // 436 + LINXY(__NR_openat2, sys_openat2), // 437 +- ++ LINXY(__NR_pidfd_getfd, sys_pidfd_getfd), // 438 + LINX_(__NR_faccessat2, sys_faccessat2), // 439 + ++ LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), // 441 ++ + LINXY(__NR_memfd_secret, sys_memfd_secret), // 447 ++ ++ LINX_(__NR_fchmodat2, sys_fchmodat2), // 452 + }; + + SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno ) +diff --git a/coregrind/m_syswrap/syswrap-arm-linux.c b/coregrind/m_syswrap/syswrap-arm-linux.c +index bca509589..492abdb82 100644 +--- a/coregrind/m_syswrap/syswrap-arm-linux.c ++++ b/coregrind/m_syswrap/syswrap-arm-linux.c +@@ -1056,7 +1056,11 @@ static SyscallTableEntry syscall_main_table[] = { + GENX_(__NR_clone3, sys_ni_syscall), // 435 + LINXY(__NR_close_range, sys_close_range), // 436 + ++ LINXY(__NR_pidfd_getfd, sys_pidfd_getfd), // 438 + LINX_(__NR_faccessat2, sys_faccessat2), // 439 ++ LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), // 441 ++ ++ LINX_(__NR_fchmodat2, sys_fchmodat2), // 452 + }; + + +diff --git a/coregrind/m_syswrap/syswrap-arm64-linux.c b/coregrind/m_syswrap/syswrap-arm64-linux.c +index 953236000..d94228504 100644 +--- a/coregrind/m_syswrap/syswrap-arm64-linux.c ++++ b/coregrind/m_syswrap/syswrap-arm64-linux.c +@@ -835,9 +835,14 @@ static SyscallTableEntry syscall_main_table[] = { + GENX_(__NR_clone3, sys_ni_syscall), // 435 + LINXY(__NR_close_range, sys_close_range), // 436 + ++ LINXY(__NR_pidfd_getfd, sys_pidfd_getfd), // 438 + LINX_(__NR_faccessat2, sys_faccessat2), // 439 + ++ LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), // 441 ++ + LINXY(__NR_memfd_secret, sys_memfd_secret), // 447 ++ ++ LINX_(__NR_fchmodat2, sys_fchmodat2), // 452 + }; + + +diff --git a/coregrind/m_syswrap/syswrap-generic.c b/coregrind/m_syswrap/syswrap-generic.c +index efdae60e1..88b0593cd 100644 +--- a/coregrind/m_syswrap/syswrap-generic.c ++++ b/coregrind/m_syswrap/syswrap-generic.c +@@ -3439,7 +3439,7 @@ POST(sys_newfstat) + #endif + + #if !defined(VGO_solaris) && !defined(VGP_arm64_linux) && \ +- !defined(VGP_nanomips_linux) ++ !defined(VGP_nanomips_linux) && !defined(VGP_loongarch64_linux) + static vki_sigset_t fork_saved_mask; + + // In Linux, the sys_fork() function varies across architectures, but we +diff --git a/coregrind/m_syswrap/syswrap-linux.c b/coregrind/m_syswrap/syswrap-linux.c +index 26f1fbee3..f84200cf4 100644 +--- a/coregrind/m_syswrap/syswrap-linux.c ++++ b/coregrind/m_syswrap/syswrap-linux.c +@@ -310,6 +310,16 @@ static void run_a_thread_NORETURN ( Word tidW ) + : "r" (VgTs_Empty), "n" (__NR_exit), "m" (tst->os_state.exitcode) + : "memory" , "$t4", "$a0" + ); ++#elif defined(VGP_loongarch64_linux) ++ asm volatile ( ++ "st.w %1, %0 \n\t" /* set tst->status = VgTs_Empty */ ++ "li.w $a7, %2 \n\t" /* set a7 = __NR_exit */ ++ "ld.w $a0, %3 \n\t" /* set a0 = tst->os_state.exitcode */ ++ "syscall 0 \n\t" /* exit(tst->os_state.exitcode) */ ++ : "=m" (tst->status) ++ : "r" (VgTs_Empty), "n" (__NR_exit), "m" (tst->os_state.exitcode) ++ : "memory", "a0", "a7" ++ ); + #else + # error Unknown platform + #endif +@@ -535,6 +545,13 @@ static SysRes clone_new_thread ( Word (*fn)(void *), + (ML_(start_thread_NORETURN), stack, flags, ctst, + child_tidptr, parent_tidptr, NULL); + res = VG_ (mk_SysRes_nanomips_linux) (ret); ++#elif defined(VGP_loongarch64_linux) ++ UInt ret = 0; ++ ctst->arch.vex.guest_R4 = 0; ++ ret = do_syscall_clone_loongarch64_linux ++ (ML_(start_thread_NORETURN), stack, flags, ctst, ++ child_tidptr, parent_tidptr, NULL); ++ res = VG_(mk_SysRes_loongarch64_linux)(ret); + #else + # error Unknown platform + #endif +@@ -597,6 +614,8 @@ static SysRes setup_child_tls (ThreadId ctid, Addr tlsaddr) + #elif defined(VGP_mips32_linux) || defined(VGP_nanomips_linux) + ctst->arch.vex.guest_ULR = tlsaddr; + ctst->arch.vex.guest_r27 = tlsaddr; ++#elif defined(VGP_loongarch64_linux) ++ ctst->arch.vex.guest_R2 = tlsaddr; + #else + # error Unknown platform + #endif +@@ -755,7 +774,7 @@ static SysRes ML_(do_fork_clone) ( ThreadId tid, UInt flags, + || defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux) \ + || defined(VGP_arm_linux) || defined(VGP_mips32_linux) \ + || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \ +- || defined(VGP_nanomips_linux) ++ || defined(VGP_nanomips_linux) || defined(VGP_loongarch64_linux) + res = VG_(do_syscall5)( __NR_clone, flags, + (UWord)NULL, (UWord)parent_tidptr, + (UWord)NULL, (UWord)child_tidptr ); +@@ -828,7 +847,8 @@ PRE(sys_clone) + #define PRA_CHILD_TIDPTR PRA5 + #define ARG_TLS ARG4 + #define PRA_TLS PRA4 +-#elif defined(VGP_amd64_linux) || defined(VGP_s390x_linux) ++#elif defined(VGP_amd64_linux) || defined(VGP_s390x_linux) \ ++ || defined(VGP_loongarch64_linux) + #define ARG_CHILD_TIDPTR ARG4 + #define PRA_CHILD_TIDPTR PRA4 + #define ARG_TLS ARG5 +@@ -2165,6 +2185,29 @@ POST(sys_epoll_pwait) + epoll_post_helper (tid, arrghs, status); + } + ++PRE(sys_epoll_pwait2) ++{ ++ *flags |= SfMayBlock; ++ PRINT("sys_epoll_pwait2 ( %ld, %#" FMT_REGWORD "x, %ld, %#" ++ FMT_REGWORD "x, %#" FMT_REGWORD "x, %" FMT_REGWORD "u )", ++ SARG1, ARG2, SARG3, ARG4, ARG5, ARG6); ++ PRE_REG_READ6(long, "epoll_pwait2", ++ int, epfd, struct vki_epoll_event *, events, ++ int, maxevents, const struct timespec64 *, timeout, ++ vki_sigset_t *, sigmask, vki_size_t, sigsetsize); ++ /* Assume all (maxevents) events records should be (fully) writable. */ ++ PRE_MEM_WRITE( "epoll_pwait2(events)", ARG2, sizeof(struct vki_epoll_event)*ARG3); ++ /* epoll_pwait2 only supports 64bit timespec. */ ++ if (ARG4) ++ pre_read_timespec64(tid, "epoll_pwait2(timeout)", ARG4); ++ if (ARG5) ++ PRE_MEM_READ( "epoll_pwait2(sigmask)", ARG5, sizeof(vki_sigset_t) ); ++} ++POST(sys_epoll_pwait2) ++{ ++ epoll_post_helper (tid, arrghs, status); ++} ++ + PRE(sys_eventfd) + { + PRINT("sys_eventfd ( %" FMT_REGWORD "u )", ARG1); +@@ -4317,9 +4360,11 @@ PRE(sys_sigaction) + PRE_MEM_READ( "sigaction(act->sa_handler)", (Addr)&sa->ksa_handler, sizeof(sa->ksa_handler)); + PRE_MEM_READ( "sigaction(act->sa_mask)", (Addr)&sa->sa_mask, sizeof(sa->sa_mask)); + PRE_MEM_READ( "sigaction(act->sa_flags)", (Addr)&sa->sa_flags, sizeof(sa->sa_flags)); ++#if !defined(VGP_loongarch64_linux) + if (ML_(safe_to_deref)(sa,sizeof(struct vki_old_sigaction)) + && (sa->sa_flags & VKI_SA_RESTORER)) + PRE_MEM_READ( "sigaction(act->sa_restorer)", (Addr)&sa->sa_restorer, sizeof(sa->sa_restorer)); ++#endif + } + + if (ARG3 != 0) { +@@ -4349,7 +4394,9 @@ PRE(sys_sigaction) + + new.ksa_handler = oldnew->ksa_handler; + new.sa_flags = oldnew->sa_flags; ++#if !defined(VGP_loongarch64_linux) + new.sa_restorer = oldnew->sa_restorer; ++#endif + convert_sigset_to_rt(&oldnew->sa_mask, &new.sa_mask); + newp = &new; + } +@@ -4362,7 +4409,9 @@ PRE(sys_sigaction) + + oldold->ksa_handler = oldp->ksa_handler; + oldold->sa_flags = oldp->sa_flags; ++#if !defined(VGP_loongarch64_linux) + oldold->sa_restorer = oldp->sa_restorer; ++#endif + oldold->sa_mask = oldp->sa_mask.sig[0]; + } + } +@@ -4435,10 +4484,13 @@ PRE(sys_rt_sigaction) + PRE_MEM_READ( "rt_sigaction(act->sa_handler)", (Addr)&sa->ksa_handler, sizeof(sa->ksa_handler)); + PRE_MEM_READ( "rt_sigaction(act->sa_mask)", (Addr)&sa->sa_mask, sizeof(sa->sa_mask)); + PRE_MEM_READ( "rt_sigaction(act->sa_flags)", (Addr)&sa->sa_flags, sizeof(sa->sa_flags)); ++#if !defined(VGP_loongarch64_linux) + if (ML_(safe_to_deref)(sa,sizeof(vki_sigaction_toK_t)) + && (sa->sa_flags & VKI_SA_RESTORER)) + PRE_MEM_READ( "rt_sigaction(act->sa_restorer)", (Addr)&sa->sa_restorer, sizeof(sa->sa_restorer)); ++#endif + } ++ + if (ARG3 != 0) + PRE_MEM_WRITE( "rt_sigaction(oldact)", ARG3, sizeof(vki_sigaction_fromK_t)); + +@@ -6034,6 +6086,17 @@ PRE(sys_fchmodat) + PRE_MEM_RASCIIZ( "fchmodat(path)", ARG2 ); + } + ++PRE(sys_fchmodat2) ++{ ++ PRINT("sys_fchmodat2 ( %ld, %#" FMT_REGWORD "x(%s), %" FMT_REGWORD "u, %" ++ FMT_REGWORD "u )", ++ SARG1, ARG2, (HChar*)(Addr)ARG2, ARG3, ARG4); ++ PRE_REG_READ4(long, "fchmodat2", ++ int, dfd, const char *, path, vki_mode_t, mode, ++ unsigned int, flags); ++ PRE_MEM_RASCIIZ( "fchmodat2(pathname)", ARG2 ); ++} ++ + PRE(sys_faccessat) + { + PRINT("sys_faccessat ( %ld, %#" FMT_REGWORD "x(%s), %ld )", +@@ -6790,7 +6853,8 @@ POST(sys_lookup_dcookie) + #endif + + #if defined(VGP_amd64_linux) || defined(VGP_s390x_linux) \ +- || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) ++ || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGP_loongarch64_linux) + PRE(sys_lookup_dcookie) + { + *flags |= SfMayBlock; +@@ -13621,6 +13685,24 @@ POST(sys_pidfd_open) + } + } + ++PRE(sys_pidfd_getfd) ++{ ++ PRINT("sys_pidfd_getfd ( %ld, %ld, %ld )", SARG1, SARG2, SARG3); ++ PRE_REG_READ3(long, "pidfd_getfd", int, pidfd, int, targetfd, unsigned int, flags); ++} ++ ++POST(sys_pidfd_getfd) ++{ ++ vg_assert(SUCCESS); ++ if (!ML_(fd_allowed)(RES, "pidfd_getfd", tid, True)) { ++ VG_(close)(RES); ++ SET_STATUS_Failure( VKI_EMFILE ); ++ } else { ++ if (VG_(clo_track_fds)) ++ ML_(record_fd_open_nameless) (tid, RES); ++ } ++} ++ + #undef PRE + #undef POST + +diff --git a/coregrind/m_syswrap/syswrap-loongarch64-linux.c b/coregrind/m_syswrap/syswrap-loongarch64-linux.c +new file mode 100644 +index 000000000..108ddc465 +--- /dev/null ++++ b/coregrind/m_syswrap/syswrap-loongarch64-linux.c +@@ -0,0 +1,648 @@ ++ ++/*---------------------------------------------------------------------*/ ++/*--- Platform-specific syscalls stuff. syswrap-loongarch64-linux.c ---*/ ++/*---------------------------------------------------------------------*/ ++ ++/* ++ This file is part of Valgrind, a dynamic binary instrumentation ++ framework. ++ ++ Copyright (C) 2021-2022 Loongson Technology Corporation Limited ++ ++ This program is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License as ++ published by the Free Software Foundation; either version 2 of the ++ License, or (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, see . ++ ++ The GNU General Public License is contained in the file COPYING. ++*/ ++ ++#if defined(VGP_loongarch64_linux) ++ ++#include "pub_core_basics.h" ++#include "pub_core_vki.h" ++#include "pub_core_vkiscnums.h" ++#include "pub_core_threadstate.h" ++#include "pub_core_aspacemgr.h" ++#include "pub_core_libcbase.h" ++#include "pub_core_libcassert.h" ++#include "pub_core_libcprint.h" ++#include "pub_core_libcsignal.h" ++#include "pub_core_options.h" ++#include "pub_core_scheduler.h" ++#include "pub_core_sigframe.h" // For VG_(sigframe_destroy)() ++#include "pub_core_syscall.h" ++#include "pub_core_syswrap.h" ++#include "pub_core_tooliface.h" ++ ++#include "priv_types_n_macros.h" ++#include "priv_syswrap-generic.h" /* for decls of generic wrappers */ ++#include "priv_syswrap-linux.h" /* for decls of linux-ish wrappers */ ++#include "priv_syswrap-main.h" ++ ++ ++/* --------------------------------------------------------------------- ++ clone() handling ++ ------------------------------------------------------------------ */ ++ ++/* Call f(arg1), but first switch stacks, using 'stack' as the new ++ stack, and use 'retaddr' as f's return-to address. Also, clear all ++ the integer registers before entering f. */ ++__attribute__((noreturn)) ++void ML_(call_on_new_stack_0_1) ( Addr stack, ++ Addr retaddr, ++ void (*f) (Word), ++ Word arg1 ); ++asm ( ++".text\n" ++".globl vgModuleLocal_call_on_new_stack_0_1 \n\t" ++"vgModuleLocal_call_on_new_stack_0_1: \n\t" ++" move $sp, $a0 \n\t" /* sp = stack */ ++" move $ra, $a1 \n\t" /* ra = retaddr */ ++" move $t0, $a2 \n\t" /* t0 = f */ ++" move $a0, $a3 \n\t" /* a0 = arg1 */ ++" move $a1, $zero \n\t" /* zero all GP regs */ ++" move $a2, $zero \n\t" ++" move $a3, $zero \n\t" ++" move $a4, $zero \n\t" ++" move $a5, $zero \n\t" ++" move $a6, $zero \n\t" ++" move $a7, $zero \n\t" ++/* don't zero out t0 */ ++" move $t1, $zero \n\t" ++" move $t2, $zero \n\t" ++" move $t3, $zero \n\t" ++" move $t4, $zero \n\t" ++" move $t5, $zero \n\t" ++" move $t6, $zero \n\t" ++" move $t7, $zero \n\t" ++" move $t8, $zero \n\t" ++" jr $t0 \n\t" /* jump to f */ ++".previous \n\t" ++); ++ ++/* ++ Perform a clone system call. clone is strange because it has ++ fork()-like return-twice semantics, so it needs special ++ handling here. ++ ++ Upon entry, we have: ++ ++ Word (*fn)(void*) in a0 ++ void* child_stack in a1 ++ int flags in a2 ++ void* arg in a3 ++ pid_t* child_tid in a4 ++ pid_t* parent_tid in a5 ++ void* tls_ptr in a6 ++ ++ System call requires: ++ ++ unsigned long clone_flags in a0 ++ unsigned long newsp in a1 ++ int* parent_tidptr in a2 ++ int* child_tidptr in a3 ++ unsigned long tls in a4 ++ int __NR_clone in a7 ++*/ ++ ++#define __NR_CLONE VG_STRINGIFY(__NR_clone) ++#define __NR_EXIT VG_STRINGIFY(__NR_exit) ++ ++// See priv_syswrap-linux.h for arg profile. ++asm( ++".text \n\t" ++".globl do_syscall_clone_loongarch64_linux \n\t" ++"do_syscall_clone_loongarch64_linux: \n\t" ++/* Save ra */ ++" addi.d $sp, $sp, -16 \n\t" ++" st.d $ra, $sp, 0 \n\t" ++ ++/* Save fn and arg */ ++" addi.d $a1, $a1, -16 \n\t" ++" st.d $a0, $a1, 0 \n\t" /* fn */ ++" st.d $a3, $a1, 8 \n\t" /* arg */ ++ ++/* Call sys_clone */ ++" move $a0, $a2 \n\t" /* flags */ ++" move $a2, $a5 \n\t" /* parent */ ++" move $a3, $a4 \n\t" /* child */ ++" move $a4, $a6 \n\t" /* tls */ ++" li.w $a7, " __NR_CLONE " \n\t" ++" syscall 0 \n\t" ++ ++/* If we are a child? */ ++" bnez $a0, 1f \n\t" ++ ++/* Restore fn and arg */ ++" ld.d $a1, $sp, 0 \n\t" /* fn */ ++" ld.d $a0, $sp, 8 \n\t" /* arg */ ++ ++/* Call fn(arg) */ ++" jr $a1 \n\t" ++ ++/* Call exit(a0) */ ++" li.w $a7, " __NR_EXIT" \n\t" ++" syscall 0 \n\t" ++ ++/* If we are parent or error, just return to caller */ ++"1: \n\t" ++" ld.d $ra, $sp, 0 \n\t" ++" addi.d $sp, $sp, 16 \n\t" ++" jr $ra \n\t" ++".previous \n\t" ++); ++ ++#undef __NR_CLONE ++#undef __NR_EXIT ++ ++/* --------------------------------------------------------------------- ++ More thread stuff ++ ------------------------------------------------------------------ */ ++ ++// loongarch64 doesn't have any architecture specific thread stuff that ++// needs to be cleaned up ++void VG_(cleanup_thread) ( ThreadArchState* arch ) ++{ ++} ++ ++/* --------------------------------------------------------------------- ++ PRE/POST wrappers for loongarch64/Linux-specific syscalls ++ ------------------------------------------------------------------ */ ++ ++#define PRE(name) DEFN_PRE_TEMPLATE(loongarch64_linux, name) ++#define POST(name) DEFN_POST_TEMPLATE(loongarch64_linux, name) ++ ++/* Add prototypes for the wrappers declared here, so that gcc doesn't ++ harass us for not having prototypes. Really this is a kludge -- ++ the right thing to do is to make these wrappers 'static' since they ++ aren't visible outside this file, but that requires even more macro ++ magic. */ ++DECL_TEMPLATE(loongarch64_linux, sys_ptrace); ++DECL_TEMPLATE(loongarch64_linux, sys_mmap); ++DECL_TEMPLATE(loongarch64_linux, sys_rt_sigreturn); ++ ++PRE(sys_ptrace) ++{ ++ PRINT("sys_ptrace ( %ld, %ld, %lx, %lx )", ++ SARG1, SARG2, ARG3, ARG4); ++ PRE_REG_READ4(int, "ptrace", ++ long, request, ++ long, pid, ++ unsigned long, addr, ++ unsigned long, data); ++ switch (ARG1) { ++ case VKI_PTRACE_PEEKTEXT: ++ case VKI_PTRACE_PEEKDATA: ++ case VKI_PTRACE_PEEKUSR: ++ PRE_MEM_WRITE("ptrace(peek)", ARG4, sizeof(long)); ++ break; ++ case VKI_PTRACE_GETEVENTMSG: ++ PRE_MEM_WRITE("ptrace(geteventmsg)", ARG4, sizeof(unsigned long)); ++ break; ++ case VKI_PTRACE_GETSIGINFO: ++ PRE_MEM_WRITE("ptrace(getsiginfo)", ARG4, sizeof(vki_siginfo_t)); ++ break; ++ case VKI_PTRACE_SETSIGINFO: ++ PRE_MEM_READ("ptrace(setsiginfo)", ARG4, sizeof(vki_siginfo_t)); ++ break; ++ case VKI_PTRACE_GETREGSET: ++ ML_(linux_PRE_getregset)(tid, ARG3, ARG4); ++ break; ++ default: ++ break; ++ } ++} ++ ++POST(sys_ptrace) ++{ ++ switch (ARG1) { ++ case VKI_PTRACE_TRACEME: ++ ML_(linux_POST_traceme)(tid); ++ break; ++ case VKI_PTRACE_PEEKTEXT: ++ case VKI_PTRACE_PEEKDATA: ++ case VKI_PTRACE_PEEKUSR: ++ POST_MEM_WRITE (ARG4, sizeof(long)); ++ break; ++ case VKI_PTRACE_GETEVENTMSG: ++ POST_MEM_WRITE (ARG4, sizeof(unsigned long)); ++ break; ++ case VKI_PTRACE_GETSIGINFO: ++ POST_MEM_WRITE (ARG4, sizeof(vki_siginfo_t)); ++ break; ++ case VKI_PTRACE_GETREGSET: ++ ML_(linux_POST_getregset)(tid, ARG3, ARG4); ++ break; ++ default: ++ break; ++ } ++} ++ ++PRE(sys_mmap) ++{ ++ SysRes r; ++ ++ PRINT("sys_mmap ( %#lx, %lu, %lu, %#lx, %lu, %lu )", ++ ARG1, ARG2, ARG3, ARG4, ARG5, ARG6 ); ++ PRE_REG_READ6(long, "mmap", ++ unsigned long, addr, unsigned long, len, ++ unsigned long, prot, unsigned long, flags, ++ unsigned long, fd, vki_off_t, offset); ++ ++ r = ML_(generic_PRE_sys_mmap)( tid, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6 ); ++ SET_STATUS_from_SysRes(r); ++} ++ ++PRE(sys_rt_sigreturn) ++{ ++ /* See comments on PRE(sys_rt_sigreturn) in syswrap-loongarch64-linux.c for ++ an explanation of what follows. */ ++ ThreadState* tst; ++ PRINT("rt_sigreturn ( )"); ++ ++ vg_assert(VG_(is_valid_tid)(tid)); ++ vg_assert(tid >= 1 && tid < VG_N_THREADS); ++ vg_assert(VG_(is_running_thread)(tid)); ++ ++ tst = VG_(get_ThreadState)(tid); ++ ++ /* This is only so that the PC is (might be) useful to report if ++ something goes wrong in the sigreturn */ ++ ML_(fixup_guest_state_to_restart_syscall)(&tst->arch); ++ ++ /* Restore register state from frame and remove it */ ++ VG_(sigframe_destroy)(tid, True); ++ ++ /* Tell the driver not to update the guest state with the "result", ++ and set a bogus result to keep it happy. */ ++ *flags |= SfNoWriteResult; ++ SET_STATUS_Success(0); ++ ++ /* Check to see if any signals arose as a result of this. */ ++ *flags |= SfPollAfter; ++} ++ ++#undef PRE ++#undef POST ++ ++/* --------------------------------------------------------------------- ++ The loongarch64/Linux syscall table ++ ------------------------------------------------------------------ */ ++ ++#define PLAX_(sysno, name) WRAPPER_ENTRY_X_(loongarch64_linux, sysno, name) ++#define PLAXY(sysno, name) WRAPPER_ENTRY_XY(loongarch64_linux, sysno, name) ++ ++// This table maps from __NR_xxx syscall numbers (from ++// linux/include/uapi/asm-generic/unistd.h) to the appropriate PRE/POST ++// sys_foo() wrappers on loongarch64 (as per sys_call_table in ++// linux/arch/loongarch/kernel/syscall.c). ++// ++// For those syscalls not handled by Valgrind, the annotation indicate its ++// arch/OS combination, eg. */* (generic), */Linux (Linux only), ?/? ++// (unknown). ++ ++static SyscallTableEntry syscall_main_table[] = { ++ LINXY(__NR_io_setup, sys_io_setup), // 0 ++ LINX_(__NR_io_destroy, sys_io_destroy), // 1 ++ LINX_(__NR_io_submit, sys_io_submit), // 2 ++ LINXY(__NR_io_cancel, sys_io_cancel), // 3 ++ LINXY(__NR_io_getevents, sys_io_getevents), // 4 ++ LINX_(__NR_setxattr, sys_setxattr), // 5 ++ LINX_(__NR_lsetxattr, sys_lsetxattr), // 6 ++ LINX_(__NR_fsetxattr, sys_fsetxattr), // 7 ++ LINXY(__NR_getxattr, sys_getxattr), // 8 ++ LINXY(__NR_lgetxattr, sys_lgetxattr), // 9 ++ LINXY(__NR_fgetxattr, sys_fgetxattr), // 10 ++ LINXY(__NR_listxattr, sys_listxattr), // 11 ++ LINXY(__NR_llistxattr, sys_llistxattr), // 12 ++ LINXY(__NR_flistxattr, sys_flistxattr), // 13 ++ LINX_(__NR_removexattr, sys_removexattr), // 14 ++ LINX_(__NR_lremovexattr, sys_lremovexattr), // 15 ++ LINX_(__NR_fremovexattr, sys_fremovexattr), // 16 ++ GENXY(__NR_getcwd, sys_getcwd), // 17 ++ LINXY(__NR_lookup_dcookie, sys_lookup_dcookie), // 18 ++ LINXY(__NR_eventfd2, sys_eventfd2), // 19 ++ LINXY(__NR_epoll_create1, sys_epoll_create1), // 20 ++ LINX_(__NR_epoll_ctl, sys_epoll_ctl), // 21 ++ LINXY(__NR_epoll_pwait, sys_epoll_pwait), // 22 ++ GENXY(__NR_dup, sys_dup), // 23 ++ LINXY(__NR_dup3, sys_dup3), // 24 ++ LINXY(__NR3264_fcntl, sys_fcntl), // 25 ++ LINXY(__NR_inotify_init1, sys_inotify_init1), // 26 ++ LINX_(__NR_inotify_add_watch, sys_inotify_add_watch), // 27 ++ LINX_(__NR_inotify_rm_watch, sys_inotify_rm_watch), // 28 ++ LINXY(__NR_ioctl, sys_ioctl), // 29 ++ LINX_(__NR_ioprio_set, sys_ioprio_set), // 30 ++ LINX_(__NR_ioprio_get, sys_ioprio_get), // 31 ++ GENX_(__NR_flock, sys_flock), // 32 ++ LINX_(__NR_mknodat, sys_mknodat), // 33 ++ LINX_(__NR_mkdirat, sys_mkdirat), // 34 ++ LINX_(__NR_unlinkat, sys_unlinkat), // 35 ++ LINX_(__NR_symlinkat, sys_symlinkat), // 36 ++ LINX_(__NR_linkat, sys_linkat), // 37 ++ // (__NR_renameat, sys_renameat), // 38 ++ LINX_(__NR_umount2, sys_umount), // 39 ++ LINX_(__NR_mount, sys_mount), // 40 ++ LINX_(__NR_pivot_root, sys_pivot_root), // 41 ++ // (__NR_nfsservctl, sys_ni_syscall), // 42 ++ GENXY(__NR3264_statfs, sys_statfs), // 43 ++ GENXY(__NR3264_fstatfs, sys_fstatfs), // 44 ++ GENX_(__NR3264_truncate, sys_truncate), // 45 ++ GENX_(__NR3264_ftruncate, sys_ftruncate), // 46 ++ LINX_(__NR_fallocate, sys_fallocate), // 47 ++ LINX_(__NR_faccessat, sys_faccessat), // 48 ++ GENX_(__NR_chdir, sys_chdir), // 49 ++ GENX_(__NR_fchdir, sys_fchdir), // 50 ++ GENX_(__NR_chroot, sys_chroot), // 51 ++ GENX_(__NR_fchmod, sys_fchmod), // 52 ++ LINX_(__NR_fchmodat, sys_fchmodat), // 53 ++ LINX_(__NR_fchownat, sys_fchownat), // 54 ++ GENX_(__NR_fchown, sys_fchown), // 55 ++ LINXY(__NR_openat, sys_openat), // 56 ++ GENXY(__NR_close, sys_close), // 57 ++ LINX_(__NR_vhangup, sys_vhangup), // 58 ++ LINXY(__NR_pipe2, sys_pipe2), // 59 ++ LINX_(__NR_quotactl, sys_quotactl), // 60 ++ GENXY(__NR_getdents64, sys_getdents64), // 61 ++ LINX_(__NR3264_lseek, sys_lseek), // 62 ++ GENXY(__NR_read, sys_read), // 63 ++ GENX_(__NR_write, sys_write), // 64 ++ GENXY(__NR_readv, sys_readv), // 65 ++ GENX_(__NR_writev, sys_writev), // 66 ++ GENXY(__NR_pread64, sys_pread64), // 67 ++ GENX_(__NR_pwrite64, sys_pwrite64), // 68 ++ LINXY(__NR_preadv, sys_preadv), // 69 ++ LINX_(__NR_pwritev, sys_pwritev), // 70 ++ LINXY(__NR3264_sendfile, sys_sendfile), // 71 ++ LINXY(__NR_pselect6, sys_pselect6), // 72 ++ LINXY(__NR_ppoll, sys_ppoll), // 73 ++ LINXY(__NR_signalfd4, sys_signalfd4), // 74 ++ LINX_(__NR_vmsplice, sys_vmsplice), // 75 ++ LINX_(__NR_splice, sys_splice), // 76 ++ LINX_(__NR_tee, sys_tee), // 77 ++ LINX_(__NR_readlinkat, sys_readlinkat), // 78 ++ // (__NR3264_fstatat, sys_newfstatat), // 79 ++ // (__NR3264_fstat, sys_newfstat), // 80 ++ GENX_(__NR_sync, sys_sync), // 81 ++ GENX_(__NR_fsync, sys_fsync), // 82 ++ GENX_(__NR_fdatasync, sys_fdatasync), // 83 ++ LINX_(__NR_sync_file_range, sys_sync_file_range), // 84 ++ LINXY(__NR_timerfd_create, sys_timerfd_create), // 85 ++ LINXY(__NR_timerfd_settime, sys_timerfd_settime), // 86 ++ LINXY(__NR_timerfd_gettime, sys_timerfd_gettime), // 87 ++ LINX_(__NR_utimensat, sys_utimensat), // 88 ++ GENX_(__NR_acct, sys_acct), // 89 ++ LINXY(__NR_capget, sys_capget), // 90 ++ LINX_(__NR_capset, sys_capset), // 91 ++ LINX_(__NR_personality, sys_personality), // 92 ++ GENX_(__NR_exit, sys_exit), // 93 ++ LINX_(__NR_exit_group, sys_exit_group), // 94 ++ LINXY(__NR_waitid, sys_waitid), // 95 ++ LINX_(__NR_set_tid_address, sys_set_tid_address), // 96 ++ LINX_(__NR_unshare, sys_unshare), // 97 ++ LINXY(__NR_futex, sys_futex), // 98 ++ LINX_(__NR_set_robust_list, sys_set_robust_list), // 99 ++ LINXY(__NR_get_robust_list, sys_get_robust_list), // 100 ++ GENXY(__NR_nanosleep, sys_nanosleep), // 101 ++ GENXY(__NR_getitimer, sys_getitimer), // 102 ++ GENXY(__NR_setitimer, sys_setitimer), // 103 ++ // (__NR_kexec_load, sys_kexec_load), // 104 ++ LINX_(__NR_init_module, sys_init_module), // 105 ++ LINX_(__NR_delete_module, sys_delete_module), // 106 ++ LINXY(__NR_timer_create, sys_timer_create), // 107 ++ LINXY(__NR_timer_gettime, sys_timer_gettime), // 108 ++ LINX_(__NR_timer_getoverrun, sys_timer_getoverrun), // 109 ++ LINXY(__NR_timer_settime, sys_timer_settime), // 110 ++ LINX_(__NR_timer_delete, sys_timer_delete), // 111 ++ LINX_(__NR_clock_settime, sys_clock_settime), // 112 ++ LINXY(__NR_clock_gettime, sys_clock_gettime), // 113 ++ LINXY(__NR_clock_getres, sys_clock_getres), // 114 ++ LINXY(__NR_clock_nanosleep, sys_clock_nanosleep), // 115 ++ LINXY(__NR_syslog, sys_syslog), // 116 ++ PLAXY(__NR_ptrace, sys_ptrace), // 117 ++ LINXY(__NR_sched_setparam, sys_sched_setparam), // 118 ++ LINX_(__NR_sched_setscheduler, sys_sched_setscheduler), // 119 ++ LINX_(__NR_sched_getscheduler, sys_sched_getscheduler), // 120 ++ LINXY(__NR_sched_getparam, sys_sched_getparam), // 121 ++ LINX_(__NR_sched_setaffinity, sys_sched_setaffinity), // 122 ++ LINXY(__NR_sched_getaffinity, sys_sched_getaffinity), // 123 ++ LINX_(__NR_sched_yield, sys_sched_yield), // 124 ++ LINX_(__NR_sched_get_priority_max, sys_sched_get_priority_max), // 125 ++ LINX_(__NR_sched_get_priority_min, sys_sched_get_priority_min), // 126 ++ LINXY(__NR_sched_rr_get_interval, sys_sched_rr_get_interval), // 127 ++ // (__NR_restart_syscall, sys_restart_syscall), // 128 ++ GENX_(__NR_kill, sys_kill), // 129 ++ LINXY(__NR_tkill, sys_tkill), // 130 ++ LINX_(__NR_tgkill, sys_tgkill), // 131 ++ GENXY(__NR_sigaltstack, sys_sigaltstack), // 132 ++ LINX_(__NR_rt_sigsuspend, sys_rt_sigsuspend), // 133 ++ LINXY(__NR_rt_sigaction, sys_rt_sigaction), // 134 ++ LINXY(__NR_rt_sigprocmask, sys_rt_sigprocmask), // 135 ++ LINXY(__NR_rt_sigpending, sys_rt_sigpending), // 136 ++ LINXY(__NR_rt_sigtimedwait, sys_rt_sigtimedwait), // 137 ++ LINXY(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo), // 138 ++ PLAX_(__NR_rt_sigreturn, sys_rt_sigreturn), // 139 ++ GENX_(__NR_setpriority, sys_setpriority), // 140 ++ GENX_(__NR_getpriority, sys_getpriority), // 141 ++ // (__NR_reboot, sys_reboot), // 142 ++ GENX_(__NR_setregid, sys_setregid), // 143 ++ GENX_(__NR_setgid, sys_setgid), // 144 ++ GENX_(__NR_setreuid, sys_setreuid), // 145 ++ GENX_(__NR_setuid, sys_setuid), // 146 ++ LINX_(__NR_setresuid, sys_setresuid), // 147 ++ LINXY(__NR_getresuid, sys_getresuid), // 148 ++ LINX_(__NR_setresgid, sys_setresgid), // 149 ++ LINXY(__NR_getresgid, sys_getresgid), // 150 ++ LINX_(__NR_setfsuid, sys_setfsuid), // 151 ++ LINX_(__NR_setfsgid, sys_setfsgid), // 152 ++ GENXY(__NR_times, sys_times), // 153 ++ GENX_(__NR_setpgid, sys_setpgid), // 154 ++ GENX_(__NR_getpgid, sys_getpgid), // 155 ++ GENX_(__NR_getsid, sys_getsid), // 156 ++ GENX_(__NR_setsid, sys_setsid), // 157 ++ GENXY(__NR_getgroups, sys_getgroups), // 158 ++ GENX_(__NR_setgroups, sys_setgroups), // 159 ++ GENXY(__NR_uname, sys_newuname), // 160 ++ GENX_(__NR_sethostname, sys_sethostname), // 161 ++ // (__NR_setdomainname, sys_setdomainname), // 162 ++ // (__NR_getrlimit, sys_old_getrlimit), // 163 ++ // (__NR_setrlimit, sys_setrlimit), // 164 ++ GENXY(__NR_getrusage, sys_getrusage), // 165 ++ GENX_(__NR_umask, sys_umask), // 166 ++ LINXY(__NR_prctl, sys_prctl), // 167 ++ LINXY(__NR_getcpu, sys_getcpu), // 168 ++ GENXY(__NR_gettimeofday, sys_gettimeofday), // 169 ++ GENX_(__NR_settimeofday, sys_settimeofday), // 170 ++ LINXY(__NR_adjtimex, sys_adjtimex), // 171 ++ GENX_(__NR_getpid, sys_getpid), // 172 ++ GENX_(__NR_getppid, sys_getppid), // 173 ++ GENX_(__NR_getuid, sys_getuid), // 174 ++ GENX_(__NR_geteuid, sys_geteuid), // 175 ++ GENX_(__NR_getgid, sys_getgid), // 176 ++ GENX_(__NR_getegid, sys_getegid), // 177 ++ LINX_(__NR_gettid, sys_gettid), // 178 ++ LINXY(__NR_sysinfo, sys_sysinfo), // 179 ++ LINXY(__NR_mq_open, sys_mq_open), // 180 ++ LINX_(__NR_mq_unlink, sys_mq_unlink), // 181 ++ LINX_(__NR_mq_timedsend, sys_mq_timedsend), // 182 ++ LINXY(__NR_mq_timedreceive, sys_mq_timedreceive), // 183 ++ LINX_(__NR_mq_notify, sys_mq_notify), // 184 ++ LINXY(__NR_mq_getsetattr, sys_mq_getsetattr), // 185 ++ LINX_(__NR_msgget, sys_msgget), // 186 ++ LINXY(__NR_msgctl, sys_msgctl), // 187 ++ LINXY(__NR_msgrcv, sys_msgrcv), // 188 ++ LINX_(__NR_msgsnd, sys_msgsnd), // 189 ++ LINX_(__NR_semget, sys_semget), // 190 ++ LINXY(__NR_semctl, sys_semctl), // 191 ++ LINX_(__NR_semtimedop, sys_semtimedop), // 192 ++ LINX_(__NR_semop, sys_semop), // 193 ++ LINX_(__NR_shmget, sys_shmget), // 194 ++ LINXY(__NR_shmctl, sys_shmctl), // 195 ++ LINXY(__NR_shmat, sys_shmat), // 196 ++ LINXY(__NR_shmdt, sys_shmdt), // 197 ++ LINXY(__NR_socket, sys_socket), // 198 ++ LINXY(__NR_socketpair, sys_socketpair), // 199 ++ LINX_(__NR_bind, sys_bind), // 200 ++ LINX_(__NR_listen, sys_listen), // 201 ++ LINXY(__NR_accept, sys_accept), // 202 ++ LINX_(__NR_connect, sys_connect), // 203 ++ LINXY(__NR_getsockname, sys_getsockname), // 204 ++ LINXY(__NR_getpeername, sys_getpeername), // 205 ++ LINX_(__NR_sendto, sys_sendto), // 206 ++ LINXY(__NR_recvfrom, sys_recvfrom), // 207 ++ LINX_(__NR_setsockopt, sys_setsockopt), // 208 ++ LINXY(__NR_getsockopt, sys_getsockopt), // 209 ++ LINX_(__NR_shutdown, sys_shutdown), // 210 ++ LINX_(__NR_sendmsg, sys_sendmsg), // 211 ++ LINXY(__NR_recvmsg, sys_recvmsg), // 212 ++ LINX_(__NR_readahead, sys_readahead), // 213 ++ GENX_(__NR_brk, sys_brk), // 214 ++ GENXY(__NR_munmap, sys_munmap), // 215 ++ GENX_(__NR_mremap, sys_mremap), // 216 ++ LINX_(__NR_add_key, sys_add_key), // 217 ++ LINX_(__NR_request_key, sys_request_key), // 218 ++ LINXY(__NR_keyctl, sys_keyctl), // 219 ++ LINX_(__NR_clone, sys_clone), // 220 ++ GENX_(__NR_execve, sys_execve), // 221 ++ PLAX_(__NR3264_mmap, sys_mmap), // 222 ++ LINX_(__NR3264_fadvise64, sys_fadvise64), // 223 ++ // (__NR_swapon, sys_swapon), // 224 ++ // (__NR_swapoff, sys_swapoff), // 225 ++ GENXY(__NR_mprotect, sys_mprotect), // 226 ++ GENX_(__NR_msync, sys_msync), // 227 ++ GENX_(__NR_mlock, sys_mlock), // 228 ++ GENX_(__NR_munlock, sys_munlock), // 229 ++ GENX_(__NR_mlockall, sys_mlockall), // 230 ++ LINX_(__NR_munlockall, sys_munlockall), // 231 ++ GENXY(__NR_mincore, sys_mincore), // 232 ++ GENX_(__NR_madvise, sys_madvise), // 233 ++ // (__NR_remap_file_pages, sys_remap_file_pages), // 234 ++ LINX_(__NR_mbind, sys_mbind), // 235 ++ LINXY(__NR_get_mempolicy, sys_get_mempolicy), // 236 ++ LINX_(__NR_set_mempolicy, sys_set_mempolicy), // 237 ++ // (__NR_migrate_pages, sys_migrate_pages), // 238 ++ LINXY(__NR_move_pages, sys_move_pages), // 239 ++ LINXY(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo), // 240 ++ LINXY(__NR_perf_event_open, sys_perf_event_open), // 241 ++ LINXY(__NR_accept4, sys_accept4), // 242 ++ LINXY(__NR_recvmmsg, sys_recvmmsg), // 243 ++ ++ GENXY(__NR_wait4, sys_wait4), // 260 ++ LINXY(__NR_prlimit64, sys_prlimit64), // 261 ++ LINXY(__NR_fanotify_init, sys_fanotify_init), // 262 ++ LINX_(__NR_fanotify_mark, sys_fanotify_mark), // 263 ++ LINXY(__NR_name_to_handle_at, sys_name_to_handle_at), // 264 ++ LINXY(__NR_open_by_handle_at, sys_open_by_handle_at), // 265 ++ LINXY(__NR_clock_adjtime, sys_clock_adjtime), // 266 ++ LINX_(__NR_syncfs, sys_syncfs), // 267 ++ LINX_(__NR_setns, sys_setns), // 268 ++ LINXY(__NR_sendmmsg, sys_sendmmsg), // 269 ++ LINXY(__NR_process_vm_readv, sys_process_vm_readv), // 270 ++ LINX_(__NR_process_vm_writev, sys_process_vm_writev), // 271 ++ LINX_(__NR_kcmp, sys_kcmp), // 272 ++ LINX_(__NR_finit_module, sys_finit_module), // 273 ++ LINX_(__NR_sched_setattr, sys_sched_setattr), // 274 ++ LINXY(__NR_sched_getattr, sys_sched_getattr), // 275 ++ LINX_(__NR_renameat2, sys_renameat2), // 276 ++ // (__NR_seccomp, sys_seccomp), // 277 ++ LINXY(__NR_getrandom, sys_getrandom), // 278 ++ LINXY(__NR_memfd_create, sys_memfd_create), // 279 ++ LINXY(__NR_bpf, sys_bpf), // 280 ++ LINX_(__NR_execveat, sys_execveat), // 281 ++ // (__NR_userfaultfd, sys_userfaultfd), // 282 ++ LINX_(__NR_membarrier, sys_membarrier), // 283 ++ // (__NR_mlock2, sys_mlock2), // 284 ++ LINX_(__NR_copy_file_range, sys_copy_file_range), // 285 ++ LINXY(__NR_preadv2, sys_preadv2), // 286 ++ LINX_(__NR_pwritev2, sys_pwritev2), // 287 ++ // (__NR_pkey_mprotect, sys_pkey_mprotect), // 288 ++ // (__NR_pkey_alloc, sys_pkey_alloc), // 289 ++ // (__NR_pkey_free, sys_pkey_free), // 290 ++ LINXY(__NR_statx, sys_statx), // 291 ++ // (__NR_io_pgetevents, sys_io_pgetevents), // 292 ++ // (__NR_rseq, sys_rseq), // 293 ++ // (__NR_kexec_file_load, sys_kexec_file_load), // 294 ++ ++ // (__NR_pidfd_send_signal, sys_pidfd_send_signal), // 424 ++ LINXY(__NR_io_uring_setup, sys_io_uring_setup), // 425 ++ LINXY(__NR_io_uring_enter, sys_io_uring_enter), // 426 ++ LINXY(__NR_io_uring_register, sys_io_uring_register), // 427 ++ // (__NR_open_tree, sys_open_tree), // 428 ++ // (__NR_move_mount, sys_move_mount), // 429 ++ // (__NR_fsopen, sys_fsopen), // 430 ++ // (__NR_fsconfig, sys_fsconfig), // 431 ++ // (__NR_fsmount, sys_fsmount), // 432 ++ // (__NR_fspick, sys_fspick), // 433 ++ // (__NR_pidfd_open, sys_pidfd_open), // 434 ++ GENX_(__NR_clone3, sys_ni_syscall), // 435 ++ LINXY(__NR_close_range, sys_close_range), // 436 ++ LINXY(__NR_openat2, sys_openat2), // 437 ++ LINXY(__NR_pidfd_getfd, sys_pidfd_getfd), // 438 ++ LINX_(__NR_faccessat2, sys_faccessat2), // 439 ++ // (__NR_process_madvise, sys_process_madvise), // 440 ++ LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), // 441 ++ // (__NR_mount_setattr, sys_mount_setattr), // 442 ++ // (__NR_quotactl_fd, sys_quotactl_fd), // 443 ++ // (__NR_landlock_create_ruleset, sys_landlock_create_ruleset), // 444 ++ // (__NR_landlock_add_rule, sys_landlock_add_rule), // 445 ++ // (__NR_landlock_restrict_self, sys_landlock_restrict_self), // 446 ++ // (__NR_memfd_secret, sys_memfd_secret), // 447 ++ // (__NR_process_mrelease, sys_process_mrelease), // 448 ++ // (__NR_futex_waitv, sys_futex_waitv) // 449 ++ // (__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) // 450 ++ // (__NR_cachestat, sys_cachestat) // 451 ++ LINX_(__NR_fchmodat2, sys_fchmodat2) // 452 ++}; ++ ++SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno ) ++{ ++ const UInt syscall_main_table_size ++ = sizeof(syscall_main_table) / sizeof(syscall_main_table[0]); ++ ++ /* Is it in the contiguous initial section of the table? */ ++ if (sysno < syscall_main_table_size) { ++ SyscallTableEntry* sys = &syscall_main_table[sysno]; ++ if (sys->before == NULL) ++ return NULL; /* no entry */ ++ else ++ return sys; ++ } ++ ++ /* Can't find a wrapper */ ++ return NULL; ++} ++ ++#endif /* defined(VGP_loongarch64_linux) */ ++ ++/*--------------------------------------------------------------------*/ ++/*--- end syswrap-loongarch64-linux.c ---*/ ++/*--------------------------------------------------------------------*/ +diff --git a/coregrind/m_syswrap/syswrap-main.c b/coregrind/m_syswrap/syswrap-main.c +index 4f8c0fe1c..b4f95e570 100644 +--- a/coregrind/m_syswrap/syswrap-main.c ++++ b/coregrind/m_syswrap/syswrap-main.c +@@ -60,20 +60,21 @@ + /* Useful info which needs to be recorded somewhere: + Use of registers in syscalls is: + +- NUM ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT ++ NUM ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT + LINUX: +- x86 eax ebx ecx edx esi edi ebp n/a n/a eax (== NUM) +- amd64 rax rdi rsi rdx r10 r8 r9 n/a n/a rax (== NUM) +- ppc32 r0 r3 r4 r5 r6 r7 r8 n/a n/a r3+CR0.SO (== ARG1) +- ppc64 r0 r3 r4 r5 r6 r7 r8 n/a n/a r3+CR0.SO (== ARG1) +- arm r7 r0 r1 r2 r3 r4 r5 n/a n/a r0 (== ARG1) +- mips32 v0 a0 a1 a2 a3 stack stack n/a n/a v0 (== NUM) +- mips64 v0 a0 a1 a2 a3 a4 a5 a6 a7 v0 (== NUM) +- arm64 x8 x0 x1 x2 x3 x4 x5 n/a n/a x0 ?? (== ARG1??) ++ x86 eax ebx ecx edx esi edi ebp n/a n/a eax (== NUM) ++ amd64 rax rdi rsi rdx r10 r8 r9 n/a n/a rax (== NUM) ++ ppc32 r0 r3 r4 r5 r6 r7 r8 n/a n/a r3+CR0.SO (== ARG1) ++ ppc64 r0 r3 r4 r5 r6 r7 r8 n/a n/a r3+CR0.SO (== ARG1) ++ arm r7 r0 r1 r2 r3 r4 r5 n/a n/a r0 (== ARG1) ++ mips32 v0 a0 a1 a2 a3 stack stack n/a n/a v0 (== NUM) ++ mips64 v0 a0 a1 a2 a3 a4 a5 a6 a7 v0 (== NUM) ++ arm64 x8 x0 x1 x2 x3 x4 x5 n/a n/a x0 ?? (== ARG1??) ++ loongarch64 r11 r4 r5 r6 r7 r8 r9 n/a n/a r4 (== ARG1) + + FreeBSD: +- x86 eax +4 +8 +12 +16 +20 +24 +28 +32 edx:eax, eflags.c +- amd64 rax rdi rsi rdx rcx r8 r9 +8 +16 rdx:rax, rflags.c ++ x86 eax +4 +8 +12 +16 +20 +24 +28 +32 edx:eax, eflags.c ++ amd64 rax rdi rsi rdx rcx r8 r9 +8 +16 rdx:rax, rflags.c + + On s390x the svc instruction is used for system calls. The system call + number is encoded in the instruction (8 bit immediate field). Since Linux +@@ -703,6 +704,17 @@ void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs* canonical, + canonical->arg6 = gst->guest_r9; // a5 + canonical->arg7 = gst->guest_r10; // a6 + canonical->arg8 = gst->guest_r11; // a7 ++ ++#elif defined(VGP_loongarch64_linux) ++ VexGuestLOONGARCH64State* gst = (VexGuestLOONGARCH64State*)gst_vanilla; ++ canonical->sysno = gst->guest_R11; // a7 ++ canonical->arg1 = gst->guest_R4; // a0 ++ canonical->arg2 = gst->guest_R5; // a1 ++ canonical->arg3 = gst->guest_R6; // a2 ++ canonical->arg4 = gst->guest_R7; // a3 ++ canonical->arg5 = gst->guest_R8; // a4 ++ canonical->arg6 = gst->guest_R9; // a5 ++ + #elif defined(VGP_x86_darwin) + VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; + UWord *stack = (UWord *)gst->guest_ESP; +@@ -1126,6 +1138,16 @@ void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs* canonical, + gst->guest_r10 = canonical->arg7; + gst->guest_r11 = canonical->arg8; + ++#elif defined(VGP_loongarch64_linux) ++ VexGuestLOONGARCH64State* gst = (VexGuestLOONGARCH64State*)gst_vanilla; ++ gst->guest_R11 = canonical->sysno; ++ gst->guest_R4 = canonical->arg1; ++ gst->guest_R5 = canonical->arg2; ++ gst->guest_R6 = canonical->arg3; ++ gst->guest_R7 = canonical->arg4; ++ gst->guest_R8 = canonical->arg5; ++ gst->guest_R9 = canonical->arg6; ++ + #elif defined(VGP_x86_solaris) + VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; + UWord *stack = (UWord *)gst->guest_ESP; +@@ -1240,6 +1262,13 @@ void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus* canonical, + RegWord a0 = gst->guest_r4; // a0 + canonical->sres = VG_(mk_SysRes_nanomips_linux)(a0); + canonical->what = SsComplete; ++ ++# elif defined(VGP_loongarch64_linux) ++ VexGuestLOONGARCH64State* gst = (VexGuestLOONGARCH64State*)gst_vanilla; ++ ULong a0 = gst->guest_R4; // a0 ++ canonical->sres = VG_(mk_SysRes_loongarch64_linux)(a0); ++ canonical->what = SsComplete; ++ + # elif defined(VGP_amd64_freebsd) + /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */ + VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; +@@ -1606,6 +1635,20 @@ void putSyscallStatusIntoGuestState ( /*IN*/ ThreadId tid, + VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, + OFFSET_mips32_r4, sizeof(UWord) ); + ++# elif defined(VGP_loongarch64_linux) ++ VexGuestLOONGARCH64State* gst = (VexGuestLOONGARCH64State*)gst_vanilla; ++ vg_assert(canonical->what == SsComplete); ++ if (sr_isError(canonical->sres)) { ++ /* This isn't exactly right, in that really a Failure with res ++ not in the range 1 .. 4095 is unrepresentable in the ++ Linux-loongarch64 scheme. Oh well. */ ++ gst->guest_R4 = - (Long)sr_Err(canonical->sres); ++ } else { ++ gst->guest_R4 = sr_Res(canonical->sres); ++ } ++ VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, ++ OFFSET_loongarch64_R4, sizeof(UWord) ); ++ + # elif defined(VGP_x86_solaris) + VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; + SysRes sres = canonical->sres; +@@ -1855,6 +1898,15 @@ void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout ) + layout->s_arg7 = sizeof(UWord) * 1; + layout->s_arg8 = sizeof(UWord) * 2; + ++#elif defined(VGP_loongarch64_linux) ++ layout->o_sysno = OFFSET_loongarch64_R11; ++ layout->o_arg1 = OFFSET_loongarch64_R4; ++ layout->o_arg2 = OFFSET_loongarch64_R5; ++ layout->o_arg3 = OFFSET_loongarch64_R6; ++ layout->o_arg4 = OFFSET_loongarch64_R7; ++ layout->o_arg5 = OFFSET_loongarch64_R8; ++ layout->o_arg6 = OFFSET_loongarch64_R9; ++ + #else + # error "getSyscallLayout: unknown arch" + #endif +@@ -2899,6 +2951,25 @@ void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch ) + arch->vex.guest_PC -= 2; + } + } ++ ++#elif defined(VGP_loongarch64_linux) ++ arch->vex.guest_PC -= 4; // sizeof(loongarch instr) ++ ++ /* Make sure our caller is actually sane, and we're really backing ++ back over a syscall. ++ ++ syscall 0 == 00 2B 00 00 ++ */ ++ { ++ UChar *p = (UChar *)(Addr)(arch->vex.guest_PC); ++ if (p[0] != 0x00 || p[1] != 0x00 || p[2] != 0x2B || p[3] != 0x00) ++ VG_(message)(Vg_DebugMsg, ++ "?! restarting over syscall at %#llx %02x %02x %02x %02x\n", ++ (ULong)arch->vex.guest_PC, p[0], p[1], p[2], p[3]); ++ ++ vg_assert(p[0] == 0x00 && p[1] == 0x00 && p[2] == 0x2B && p[3] == 0x00); ++ } ++ + #elif defined(VGP_x86_solaris) + arch->vex.guest_EIP -= 2; // sizeof(int $0x91) or sizeof(syscall) + +diff --git a/coregrind/m_syswrap/syswrap-mips32-linux.c b/coregrind/m_syswrap/syswrap-mips32-linux.c +index de27998b3..47d0a2fa3 100644 +--- a/coregrind/m_syswrap/syswrap-mips32-linux.c ++++ b/coregrind/m_syswrap/syswrap-mips32-linux.c +@@ -1140,7 +1140,11 @@ static SyscallTableEntry syscall_main_table[] = { + GENX_(__NR_clone3, sys_ni_syscall), // 435 + LINXY(__NR_close_range, sys_close_range), // 436 + ++ LINXY(__NR_pidfd_getfd, sys_pidfd_getfd), // 438 + LINX_ (__NR_faccessat2, sys_faccessat2), // 439 ++ LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), // 441 ++ ++ LINX_(__NR_fchmodat2, sys_fchmodat2), // 452 + }; + + SyscallTableEntry* ML_(get_linux_syscall_entry) (UInt sysno) +diff --git a/coregrind/m_syswrap/syswrap-mips64-linux.c b/coregrind/m_syswrap/syswrap-mips64-linux.c +index 67e7c2c2f..037e34a79 100644 +--- a/coregrind/m_syswrap/syswrap-mips64-linux.c ++++ b/coregrind/m_syswrap/syswrap-mips64-linux.c +@@ -818,7 +818,10 @@ static SyscallTableEntry syscall_main_table[] = { + LINXY (__NR_pidfd_open, sys_pidfd_open), + GENX_ (__NR_clone3, sys_ni_syscall), + LINXY (__NR_close_range, sys_close_range), ++ LINXY (__NR_pidfd_getfd, sys_pidfd_getfd), + LINX_ (__NR_faccessat2, sys_faccessat2), ++ LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), ++ LINX_ (__NR_fchmodat2, sys_fchmodat2), + }; + + SyscallTableEntry * ML_(get_linux_syscall_entry) ( UInt sysno ) +diff --git a/coregrind/m_syswrap/syswrap-nanomips-linux.c b/coregrind/m_syswrap/syswrap-nanomips-linux.c +index 9c535c68e..043932716 100644 +--- a/coregrind/m_syswrap/syswrap-nanomips-linux.c ++++ b/coregrind/m_syswrap/syswrap-nanomips-linux.c +@@ -827,7 +827,10 @@ static SyscallTableEntry syscall_main_table[] = { + LINXY (__NR_pidfd_open, sys_pidfd_open), + GENX_ (__NR_clone3, sys_ni_syscall), + LINXY (__NR_close_range, sys_close_range), ++ LINXY(__NR_pidfd_getfd, sys_pidfd_getfd), + LINX_ (__NR_faccessat2, sys_faccessat2), ++ LINXY (__NR_epoll_pwait2, sys_epoll_pwait2), ++ LINX_ (__NR_fchmodat2, sys_fchmodat2), + }; + + SyscallTableEntry* ML_(get_linux_syscall_entry) (UInt sysno) +diff --git a/coregrind/m_syswrap/syswrap-ppc32-linux.c b/coregrind/m_syswrap/syswrap-ppc32-linux.c +index 12c073027..81a518fe0 100644 +--- a/coregrind/m_syswrap/syswrap-ppc32-linux.c ++++ b/coregrind/m_syswrap/syswrap-ppc32-linux.c +@@ -1060,7 +1060,11 @@ static SyscallTableEntry syscall_table[] = { + GENX_(__NR_clone3, sys_ni_syscall), // 435 + LINXY(__NR_close_range, sys_close_range), // 436 + ++ LINXY(__NR_pidfd_getfd, sys_pidfd_getfd), // 438 + LINX_(__NR_faccessat2, sys_faccessat2), // 439 ++ LINXY (__NR_epoll_pwait2, sys_epoll_pwait2), // 441 ++ ++ LINX_ (__NR_fchmodat2, sys_fchmodat2), // 452 + }; + + SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno ) +diff --git a/coregrind/m_syswrap/syswrap-ppc64-linux.c b/coregrind/m_syswrap/syswrap-ppc64-linux.c +index 3c33d1267..f72e4246b 100644 +--- a/coregrind/m_syswrap/syswrap-ppc64-linux.c ++++ b/coregrind/m_syswrap/syswrap-ppc64-linux.c +@@ -1029,7 +1029,11 @@ static SyscallTableEntry syscall_table[] = { + GENX_(__NR_clone3, sys_ni_syscall), // 435 + LINXY(__NR_close_range, sys_close_range), // 436 + ++ LINXY(__NR_pidfd_getfd, sys_pidfd_getfd), // 438 + LINX_(__NR_faccessat2, sys_faccessat2), // 439 ++ LINXY (__NR_epoll_pwait2, sys_epoll_pwait2), // 441 ++ ++ LINX_ (__NR_fchmodat2, sys_fchmodat2), // 452 + }; + + SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno ) +diff --git a/coregrind/m_syswrap/syswrap-s390x-linux.c b/coregrind/m_syswrap/syswrap-s390x-linux.c +index a377cb731..2c2a438fb 100644 +--- a/coregrind/m_syswrap/syswrap-s390x-linux.c ++++ b/coregrind/m_syswrap/syswrap-s390x-linux.c +@@ -870,7 +870,14 @@ static SyscallTableEntry syscall_table[] = { + GENX_(__NR_clone3, sys_ni_syscall), // 435 + LINXY(__NR_close_range, sys_close_range), // 436 + ++ LINXY(__NR_pidfd_getfd, sys_pidfd_getfd), // 438 + LINX_(__NR_faccessat2, sys_faccessat2), // 439 ++ ++ LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), // 441 ++ ++ LINXY(__NR_memfd_secret, sys_memfd_secret), // 447 ++ ++ LINX_ (__NR_fchmodat2, sys_fchmodat2), // 452 + }; + + SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno ) +diff --git a/coregrind/m_syswrap/syswrap-x86-linux.c b/coregrind/m_syswrap/syswrap-x86-linux.c +index a9ba15dfe..f57b5395c 100644 +--- a/coregrind/m_syswrap/syswrap-x86-linux.c ++++ b/coregrind/m_syswrap/syswrap-x86-linux.c +@@ -1651,11 +1651,14 @@ static SyscallTableEntry syscall_table[] = { + GENX_(__NR_clone3, sys_ni_syscall), // 435 + LINXY(__NR_close_range, sys_close_range), // 436 + LINXY(__NR_openat2, sys_openat2), // 437 +- +- ++ LINXY(__NR_pidfd_getfd, sys_pidfd_getfd), // 438 + LINX_(__NR_faccessat2, sys_faccessat2), // 439 + ++ LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), // 441 ++ + LINXY(__NR_memfd_secret, sys_memfd_secret), // 447 ++ ++ LINX_(__NR_fchmodat2, sys_fchmodat2), // 452 + }; + + SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno ) +diff --git a/coregrind/m_trampoline.S b/coregrind/m_trampoline.S +index da9697232..f02d53f08 100644 +--- a/coregrind/m_trampoline.S ++++ b/coregrind/m_trampoline.S +@@ -1520,6 +1520,53 @@ VG_(trampoline_stuff_end): + # undef UD2_1024 + # undef UD2_PAGE + ++/*------------------- loongarch64-linux -------------------*/ ++#else ++#if defined(VGP_loongarch64_linux) ++ ++.global VG_(trampoline_stuff_start) ++VG_(trampoline_stuff_start): ++ ++.global VG_(loongarch64_linux_SUBST_FOR_rt_sigreturn) ++VG_(loongarch64_linux_SUBST_FOR_rt_sigreturn): ++ li.w $a7, __NR_rt_sigreturn ++ syscall 0 ++ .long 0 /*illegal insn*/ ++ ++/* There's no particular reason that this needs to be handwritten ++ assembly, but since that's what this file contains, here's a ++ simple strlen() and strchr() implementations. ++*/ ++ ++.global VG_(loongarch64_linux_REDIR_FOR_strlen) ++.type VG_(loongarch64_linux_REDIR_FOR_strlen), @function ++VG_(loongarch64_linux_REDIR_FOR_strlen): ++ move $t0, $a0 ++ strlen_loop: ++ ld.bu $t1, $a0, 0 ++ addi.d $a0, $a0, 1 ++ bne $t1, $zero, strlen_loop ++ sub.d $a0, $a0, $t0 ++ addi.d $a0, $a0, -1 ++ jr $ra ++.size VG_(loongarch64_linux_REDIR_FOR_strlen), .-VG_(loongarch64_linux_REDIR_FOR_strlen) ++ ++.global VG_(loongarch64_linux_REDIR_FOR_strchr) ++.type VG_(loongarch64_linux_REDIR_FOR_strchr), @function ++VG_(loongarch64_linux_REDIR_FOR_strchr): ++ strchr_loop: ++ ld.bu $t0, $a0, 0 ++ beq $t0, $a1, strchr_end ++ addi.d $a0, $a0, 1 ++ bne $t0, $zero, strchr_loop ++ move $a0, $zero ++ strchr_end: ++ jr $ra ++.size VG_(loongarch64_linux_REDIR_FOR_strchr), .-VG_(loongarch64_linux_REDIR_FOR_strchr) ++ ++.global VG_(trampoline_stuff_end) ++VG_(trampoline_stuff_end): ++ + /*---------------- x86-solaris ----------------*/ + #else + #if defined(VGP_x86_solaris) +@@ -1719,6 +1766,7 @@ VG_(trampoline_stuff_end): + #endif + #endif + #endif ++#endif + + /* Let the linker know we don't need an executable stack */ + MARK_STACK_NO_EXEC +diff --git a/coregrind/m_translate.c b/coregrind/m_translate.c +index 8ae06d2a6..8afaf8e0f 100644 +--- a/coregrind/m_translate.c ++++ b/coregrind/m_translate.c +@@ -1750,6 +1750,11 @@ Bool VG_(translate) ( ThreadId tid, + vex_archinfo.arm64_requires_fallback_LLSC; + # endif + ++# if defined(VGP_loongarch64_linux) ++ /* For now, we only use fallback LLSC */ ++ vex_abiinfo.guest__use_fallback_LLSC = True; ++# endif ++ + /* Set up closure args. */ + closure.tid = tid; + closure.nraddr = nraddr; +diff --git a/coregrind/m_vki.c b/coregrind/m_vki.c +index 0cc1882a1..11c5fe316 100644 +--- a/coregrind/m_vki.c ++++ b/coregrind/m_vki.c +@@ -37,13 +37,13 @@ + describing the kernel interface, so this file is nearly empty. */ + + +-/* ppc32/64, arm64 and mips32/64 (linux) determine page size at startup, +- hence m_vki is the logical place to store that info. */ ++/* ppc32/64, arm64, mips32/64 and loongarch64 (linux) determine page size ++ at startup, hence m_vki is the logical place to store that info. */ + + #if defined(VGP_ppc32_linux) || defined(VGP_ppc64be_linux) \ + || defined(VGP_ppc64le_linux) || defined(VGP_arm64_linux) \ + || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \ +- || defined(VGP_nanomips_linux) ++ || defined(VGP_nanomips_linux) || defined(VGP_loongarch64_linux) + unsigned long VKI_PAGE_SHIFT = 12; + unsigned long VKI_PAGE_SIZE = 1UL << 12; + #endif +diff --git a/coregrind/pub_core_aspacemgr.h b/coregrind/pub_core_aspacemgr.h +index b867108a2..a2b41f374 100644 +--- a/coregrind/pub_core_aspacemgr.h ++++ b/coregrind/pub_core_aspacemgr.h +@@ -335,7 +335,8 @@ extern Bool VG_(am_relocate_nooverlap_client)( /*OUT*/Bool* need_discard, + #if defined(VGP_ppc32_linux) \ + || defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux) \ + || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \ +- || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) ++ || defined(VGP_arm64_linux) || defined(VGP_nanomips_linux) \ ++ || defined(VGP_loongarch64_linux) + # define VG_STACK_GUARD_SZB 65536 // 1 or 16 pages + #else + # define VG_STACK_GUARD_SZB 8192 // 2 pages +diff --git a/coregrind/pub_core_basics.h b/coregrind/pub_core_basics.h +index abc5a066a..132545166 100644 +--- a/coregrind/pub_core_basics.h ++++ b/coregrind/pub_core_basics.h +@@ -55,8 +55,8 @@ + + typedef + struct { +- ULong r_pc; /* x86:EIP, amd64:RIP, ppc:CIA, arm:R15, mips:pc */ +- ULong r_sp; /* x86:ESP, amd64:RSP, ppc:R1, arm:R13, mips:sp */ ++ ULong r_pc; /* x86:EIP, amd64:RIP, ppc:CIA, arm:R15, mips:pc, loongarch64:pc */ ++ ULong r_sp; /* x86:ESP, amd64:RSP, ppc:R1, arm:R13, mips:sp, loongarch64:sp */ + union { + struct { + UInt r_ebp; +@@ -102,6 +102,10 @@ typedef + ULong r31; /* Return address of the last subroutine call */ + ULong r28; + } MIPS64; ++ struct { ++ ULong r_fp; /* Stack frame pointer or static variable */ ++ ULong r_ra; /* Return address of the last subroutine call */ ++ } LOONGARCH64; + } misc; + } + UnwindStartRegs; +diff --git a/coregrind/pub_core_debuginfo.h b/coregrind/pub_core_debuginfo.h +index 938ed00cc..d8c0db545 100644 +--- a/coregrind/pub_core_debuginfo.h ++++ b/coregrind/pub_core_debuginfo.h +@@ -131,6 +131,10 @@ typedef + typedef + struct { Addr pc; Addr sp; Addr fp; Addr ra; } + D3UnwindRegs; ++#elif defined(VGA_loongarch64) ++typedef ++ struct { Addr pc; Addr ra; Addr sp; Addr fp; } ++ D3UnwindRegs; + #else + # error "Unsupported arch" + #endif +diff --git a/coregrind/pub_core_machine.h b/coregrind/pub_core_machine.h +index a9b7dd8b1..4793d599c 100644 +--- a/coregrind/pub_core_machine.h ++++ b/coregrind/pub_core_machine.h +@@ -126,6 +126,11 @@ + # define VG_ELF_MACHINE EM_NANOMIPS + # define VG_ELF_CLASS ELFCLASS32 + # undef VG_PLAT_USES_PPCTOC ++#elif defined(VGP_loongarch64_linux) ++# define VG_ELF_DATA2XXX ELFDATA2LSB ++# define VG_ELF_MACHINE EM_LOONGARCH ++# define VG_ELF_CLASS ELFCLASS64 ++# undef VG_PLAT_USES_PPCTOC + #else + # error Unknown platform + #endif +@@ -163,6 +168,10 @@ + # define VG_INSTR_PTR guest_PC + # define VG_STACK_PTR guest_r29 + # define VG_FRAME_PTR guest_r30 ++#elif defined(VGA_loongarch64) ++# define VG_INSTR_PTR guest_PC ++# define VG_STACK_PTR guest_R3 ++# define VG_FRAME_PTR guest_R22 + #else + # error Unknown arch + #endif +@@ -234,6 +243,10 @@ void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs, + s390x: initially: call VG_(machine_get_hwcaps) + + then safe to use VG_(machine_get_VexArchInfo) ++ ------------- ++ loongarch64: initially: call VG_(machine_get_hwcaps) ++ ++ then safe to use VG_(machine_get_VexArchInfo) + + VG_(machine_get_hwcaps) may use signals (although it attempts to + leave signal state unchanged) and therefore should only be +diff --git a/coregrind/pub_core_mallocfree.h b/coregrind/pub_core_mallocfree.h +index b5922ca50..d285caa1a 100644 +--- a/coregrind/pub_core_mallocfree.h ++++ b/coregrind/pub_core_mallocfree.h +@@ -78,6 +78,7 @@ typedef Int ArenaId; + defined(VGP_ppc64le_linux) || \ + defined(VGP_s390x_linux) || \ + (defined(VGP_mips64_linux) && !defined(VGABI_N32)) || \ ++ defined(VGP_loongarch64_linux) || \ + defined(VGP_x86_freebsd) || \ + defined(VGP_amd64_freebsd) || \ + defined(VGP_x86_darwin) || \ +diff --git a/coregrind/pub_core_signals.h b/coregrind/pub_core_signals.h +index ae8555ba8..c53323fbe 100644 +--- a/coregrind/pub_core_signals.h ++++ b/coregrind/pub_core_signals.h +@@ -77,6 +77,7 @@ extern void VG_(synth_sigill) (ThreadId tid, Addr addr); + extern void VG_(synth_sigtrap) (ThreadId tid); + extern void VG_(synth_sigbus) (ThreadId tid); + extern void VG_(synth_sigfpe) (ThreadId tid, UInt code); ++extern void VG_(synth_sigsys) (ThreadId tid); + + /* Extend the stack to cover addr, if possible */ + extern Bool VG_(extend_stack)(ThreadId tid, Addr addr); +diff --git a/coregrind/pub_core_syscall.h b/coregrind/pub_core_syscall.h +index 6c4f82591..5d7ff4435 100644 +--- a/coregrind/pub_core_syscall.h ++++ b/coregrind/pub_core_syscall.h +@@ -105,6 +105,7 @@ extern SysRes VG_(mk_SysRes_mips32_linux)( UWord v0, UWord v1, + extern SysRes VG_(mk_SysRes_mips64_linux)( ULong v0, ULong v1, + ULong a3 ); + extern SysRes VG_(mk_SysRes_nanomips_linux)( UWord a0); ++extern SysRes VG_(mk_SysRes_loongarch64_linux)( UWord a0 ); + extern SysRes VG_(mk_SysRes_x86_solaris) ( Bool isErr, UInt val, UInt val2 ); + extern SysRes VG_(mk_SysRes_amd64_solaris) ( Bool isErr, ULong val, ULong val2 ); + extern SysRes VG_(mk_SysRes_Error) ( UWord val ); +diff --git a/coregrind/pub_core_trampoline.h b/coregrind/pub_core_trampoline.h +index 54c575a72..3700acb1d 100644 +--- a/coregrind/pub_core_trampoline.h ++++ b/coregrind/pub_core_trampoline.h +@@ -171,6 +171,12 @@ extern Char* VG_(nanomips_linux_REDIR_FOR_index)( const Char*, Int ); + extern UInt VG_(nanomips_linux_REDIR_FOR_strlen)( void* ); + #endif + ++#if defined(VGP_loongarch64_linux) ++extern Addr VG_(loongarch64_linux_SUBST_FOR_rt_sigreturn); ++extern UInt VG_(loongarch64_linux_REDIR_FOR_strlen)( void* ); ++extern Char* VG_(loongarch64_linux_REDIR_FOR_strchr)( const Char*, Int ); ++#endif ++ + #if defined(VGP_x86_solaris) + extern SizeT VG_(x86_solaris_REDIR_FOR_strcmp)(const HChar *, const HChar *); + extern SizeT VG_(x86_solaris_REDIR_FOR_strlen)(const HChar *); +diff --git a/coregrind/pub_core_transtab.h b/coregrind/pub_core_transtab.h +index 6cc11f658..fe9392626 100644 +--- a/coregrind/pub_core_transtab.h ++++ b/coregrind/pub_core_transtab.h +@@ -81,7 +81,8 @@ static inline UWord VG_TT_FAST_HASH ( Addr guest ) { + } + + #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \ +- || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_arm64) ++ || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_arm64) \ ++ || defined(VGA_loongarch64) + static inline UWord VG_TT_FAST_HASH ( Addr guest ) { + // Instructions are 4-byte aligned. + UWord merged = ((UWord)guest) >> 2; +diff --git a/coregrind/pub_core_transtab_asm.h b/coregrind/pub_core_transtab_asm.h +index 8b585f17d..e73c89ae3 100644 +--- a/coregrind/pub_core_transtab_asm.h ++++ b/coregrind/pub_core_transtab_asm.h +@@ -83,7 +83,7 @@ + #if defined(VGA_amd64) || defined(VGA_arm64) \ + || defined(VGA_ppc64be) || defined(VGA_ppc64le) \ + || (defined(VGA_mips64) && defined(VGABI_64)) \ +- || defined(VGA_s390x) ++ || defined(VGA_s390x) || defined(VGA_loongarch64) + // And all other 64-bit hosts + # define VG_FAST_CACHE_SET_BITS 6 + // These FCS_{g,h}{0,1,2,3} are the values of +diff --git a/coregrind/vgdb-invoker-ptrace.c b/coregrind/vgdb-invoker-ptrace.c +index 78a6a168c..798fe5f8e 100644 +--- a/coregrind/vgdb-invoker-ptrace.c ++++ b/coregrind/vgdb-invoker-ptrace.c +@@ -50,9 +50,10 @@ + // Rather we use PTRACE_GETREGS or PTRACE_PEEKUSER. + + // The only platform on which we must use PTRACE_GETREGSET is arm64. ++// We use PTRACE_GETREGSET on loongarch64 as well. + // The resulting vgdb cannot work in a bi-arch setup. + // -1 means we will check that PTRACE_GETREGSET works. +-# if defined(VGA_arm64) ++# if defined(VGA_arm64) || defined(VGA_loongarch64) + #define USE_PTRACE_GETREGSET + # endif + #endif +@@ -529,6 +530,9 @@ static struct user_regs_struct user_save; + # else + static struct user_pt_regs user_save; + # endif ++# elif defined(VGA_loongarch64) ++/* loongarch64 is extra special, glibc only defined user_regs_struct. */ ++static struct user_regs_struct user_save; + # else + static struct user user_save; + # endif +@@ -805,6 +809,9 @@ Bool invoker_invoke_gdbserver (pid_t pid) + # else + struct user_pt_regs user_mod; + # endif ++# elif defined(VGA_loongarch64) ++/* loongarch64 is extra special, glibc only defined user_regs_struct. */ ++ struct user_regs_struct user_mod; + # else + struct user user_mod; + # endif +@@ -874,6 +881,8 @@ Bool invoker_invoke_gdbserver (pid_t pid) + sp = p[29]; + #elif defined(VGA_mips64) + sp = user_mod.regs[29]; ++#elif defined(VGA_loongarch64) ++ sp = user_mod.regs[3]; + #else + I_die_here : (sp) architecture missing in vgdb-invoker-ptrace.c + #endif +@@ -961,6 +970,8 @@ Bool invoker_invoke_gdbserver (pid_t pid) + + #elif defined(VGA_mips64) + assert(0); // cannot vgdb a 32 bits executable with a 64 bits exe ++#elif defined(VGA_loongarch64) ++ assert(0); // cannot vgdb a 32 bits executable with a 64 bits exe + #else + I_die_here : architecture missing in vgdb-invoker-ptrace.c + #endif +@@ -1068,6 +1079,12 @@ Bool invoker_invoke_gdbserver (pid_t pid) + user_mod.regs[31] = bad_return; + user_mod.regs[34] = shared64->invoke_gdbserver; + user_mod.regs[25] = shared64->invoke_gdbserver; ++#elif defined(VGA_loongarch64) ++ /* put check arg in register a0 */ ++ user_mod.regs[4] = check; ++ /* put NULL return address in ra */ ++ user_mod.regs[1] = bad_return; ++ user_mod.csr_era = shared64->invoke_gdbserver; + #else + I_die_here: architecture missing in vgdb-invoker-ptrace.c + #endif +diff --git a/drd/drd_bitmap.h b/drd/drd_bitmap.h +index 3b71d749a..1f11f23c4 100644 +--- a/drd/drd_bitmap.h ++++ b/drd/drd_bitmap.h +@@ -140,7 +140,7 @@ Addr make_address(const UWord a1, const UWord a0) + #define BITS_PER_BITS_PER_UWORD 5 + #elif defined(VGA_amd64) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \ + || defined(VGA_s390x) || (defined(VGA_mips64) && !defined(VGABI_N32)) \ +- || defined(VGA_arm64) ++ || defined(VGA_arm64) || defined(VGA_loongarch64) + #define BITS_PER_BITS_PER_UWORD 6 + #else + #error Unknown platform. +diff --git a/drd/drd_load_store.c b/drd/drd_load_store.c +index fba1dac71..dda4ea385 100644 +--- a/drd/drd_load_store.c ++++ b/drd/drd_load_store.c +@@ -53,6 +53,8 @@ + #define STACK_POINTER_OFFSET OFFSET_mips32_r29 + #elif defined(VGA_mips64) + #define STACK_POINTER_OFFSET OFFSET_mips64_r29 ++#elif defined(VGA_loongarch64) ++#define STACK_POINTER_OFFSET OFFSET_loongarch64_R3 + #else + #error Unknown architecture. + #endif +@@ -634,6 +636,8 @@ IRSB* DRD_(instrument)(VgCallbackClosure* const closure, + break; /* not interesting to DRD */ + case Imbe_CancelReservation: + break; /* not interesting to DRD */ ++ case Imbe_InsnFence: ++ break; /* not interesting to DRD */ + default: + tl_assert(0); + } +diff --git a/drd/tests/pth_barrier_thr_cr.supp b/drd/tests/pth_barrier_thr_cr.supp +index 653b2d293..34482ccb9 100644 +--- a/drd/tests/pth_barrier_thr_cr.supp ++++ b/drd/tests/pth_barrier_thr_cr.supp +@@ -9,3 +9,14 @@ + fun:pthread_barrier_wait_intercept + fun:pthread_barrier_wait + } ++{ ++ number-of-concurrent-pthead_barrier_wait()-calls-exceeds-barrier-count ++ drd:BarrierErr ++ fun:pthread_barrier_wait@* ++} ++{ ++ number-of-concurrent-pthead_barrier_wait()-calls-exceeds-barrier-count ++ drd:BarrierErr ++ fun:pthread_barrier_wait_intercept ++ fun:pthread_barrier_wait@* ++} +diff --git a/gdbserver_tests/Makefile.am b/gdbserver_tests/Makefile.am +index fbcb6596d..30e17c0b9 100755 +--- a/gdbserver_tests/Makefile.am ++++ b/gdbserver_tests/Makefile.am +@@ -15,6 +15,7 @@ dist_noinst_SCRIPTS = \ + filter_gdb filter_make_empty \ + filter_memcheck_monitor filter_stderr filter_vgdb \ + filter_helgrind_monitor filter_helgrind_monitor_solaris \ ++ filter_helgrind_monitor_loongarch64 \ + filter_passsigalrm \ + send_signal + +diff --git a/gdbserver_tests/filter_helgrind_monitor b/gdbserver_tests/filter_helgrind_monitor +index 4fc2e9af6..21bf6be14 100755 +--- a/gdbserver_tests/filter_helgrind_monitor ++++ b/gdbserver_tests/filter_helgrind_monitor +@@ -14,6 +14,8 @@ if $dir/../tests/os_test solaris; then + $dir/filter_helgrind_monitor_solaris + elif $dir/../tests/os_test freebsd; then + gsed -e '/\(rtld_start.S\|kill.S\|_exit.S\|_select.S\): No such file or directory/d' ++elif $dir/../tests/arch_test loongarch64; then ++ $dir/filter_helgrind_monitor_loongarch64 + else + cat + fi | +diff --git a/gdbserver_tests/filter_helgrind_monitor_loongarch64 b/gdbserver_tests/filter_helgrind_monitor_loongarch64 +new file mode 100755 +index 000000000..cda73e4c2 +--- /dev/null ++++ b/gdbserver_tests/filter_helgrind_monitor_loongarch64 +@@ -0,0 +1,43 @@ ++#!/usr/bin/env perl ++# From gdbserver_tests/filter_helgrind_monitor_solaris ++ ++# ++# Filter out all helgrind information about locks except the one named "mx". ++# One lock record looks like: ++# Lock ga 0x........ { ++# Address 0x........ is 2648 bytes inside data symbol "_rtld_local" ++# kind mbRec ++# } ++ ++use strict; ++use warnings; ++ ++my $lock_start_line = undef; ++my $skip_to_closing_line = 0; ++while () { ++ my $line = $_; ++ chomp($line); ++ if ($line =~ /^Lock ga 0x[\.]+\s+{$/) { ++ $lock_start_line = $line; ++ $skip_to_closing_line = 1; ++ } elsif (($lock_start_line) && ++ ($line =~ /\s*Address 0x[\.]+ is \d+ bytes inside data symbol "(\S+)"/)) { ++ if ($1 eq "mx") { ++ print "$lock_start_line\n"; ++ print "$line\n"; ++ $skip_to_closing_line = 0; ++ } ++ } elsif ($line =~ /^}$/) { ++ if ($skip_to_closing_line == 0) { ++ print "$line\n"; ++ } ++ undef($lock_start_line); ++ $skip_to_closing_line = 0; ++ } else { ++ if ($skip_to_closing_line == 0) { ++ print "$line\n"; ++ } ++ } ++} ++ ++exit 0; +diff --git a/helgrind/hg_main.c b/helgrind/hg_main.c +index cebc2bd2a..d2f882936 100644 +--- a/helgrind/hg_main.c ++++ b/helgrind/hg_main.c +@@ -4870,6 +4870,7 @@ IRSB* hg_instrument ( VgCallbackClosure* closure, + switch (st->Ist.MBE.event) { + case Imbe_Fence: + case Imbe_CancelReservation: ++ case Imbe_InsnFence: + break; /* not interesting */ + default: + goto unhandled; +diff --git a/helgrind/tests/annotate_hbefore.c b/helgrind/tests/annotate_hbefore.c +index 259d3b64c..3200c6cd0 100644 +--- a/helgrind/tests/annotate_hbefore.c ++++ b/helgrind/tests/annotate_hbefore.c +@@ -314,6 +314,36 @@ UWord do_acasW ( UWord* addr, UWord expected, UWord nyu ) + return success; + } + ++#elif defined(VGA_loongarch64) ++ ++// loongarch64 ++/* return 1 if success, 0 if failure */ ++UWord do_acasW ( UWord* addr, UWord expected, UWord nyu ) ++{ ++ UWord success; ++ UWord block[3] = { (UWord)addr, nyu, expected }; ++ ++ __asm__ __volatile__( ++ " ld.d $t0, %1, 0 \n\t" ++ " ld.d $t2, %1, 16 \n\t" ++ " ld.d $t3, %1, 8 \n\t" ++ " ll.d $t1, $t0, 0 \n\t" ++ " bne $t1, $t2, 1f \n\t" ++ " sc.d $t3, $t0, 0 \n\t" ++ " move %0, $t3 \n\t" ++ " b 2f \n\t" ++ "1: \n\t" ++ " move %0, $zero \n\t" ++ "2: \n\t" ++ : /*out*/ "=r" (success) ++ : /*in*/ "r" (&block[0]) ++ : /*trash*/ "t0", "t1", "t2", "t3", "memory" ++ ); ++ ++ assert(success == 0 || success == 1); ++ return success; ++} ++ + #endif + + void atomic_incW ( UWord* w ) +diff --git a/helgrind/tests/tc07_hbl1.c b/helgrind/tests/tc07_hbl1.c +index 54297dee6..246d13c0b 100644 +--- a/helgrind/tests/tc07_hbl1.c ++++ b/helgrind/tests/tc07_hbl1.c +@@ -18,6 +18,7 @@ + #undef PLAT_arm64_linux + #undef PLAT_s390x_linux + #undef PLAT_mips32_linux ++#undef PLAT_loongarch64_linux + #undef PLAT_x86_solaris + #undef PLAT_amd64_solaris + +@@ -47,6 +48,8 @@ + # define PLAT_mips32_linux 1 + #elif defined(__linux__) && defined(__nanomips__) + # define PLAT_nanomips_linux 1 ++#elif defined(__linux__) && defined(__loongarch__) && (__loongarch_grlen == 64) ++# define PLAT_loongarch64_linux 1 + #elif defined(__sun__) && defined(__i386__) + # define PLAT_x86_solaris 1 + #elif defined(__sun__) && defined(__x86_64__) +@@ -131,6 +134,20 @@ + : /*out*/ : /*in*/ "r"(&(_lval)) \ + : /*trash*/ "$t0", "$t1", "memory" \ + ) ++#elif defined(PLAT_loongarch64_linux) ++# define INC(_lval,_lqual) \ ++ __asm__ __volatile__ ( \ ++ "1: \n" \ ++ " move $t0, %0 \n" \ ++ " ll.w $t1, $t0, 0 \n" \ ++ " addi.w $t1, $t1, 1 \n" \ ++ " sc.w $t1, $t0, 0 \n" \ ++ " li.w $t2, 1 \n" \ ++ " bne $t1, $t2, 1b \n" \ ++ : /*out*/ \ ++ : /*in*/ "r" (&(_lval)) \ ++ : /*trash*/ "$t0", "$t1", "$t2", "memory" \ ++ ) + #else + # error "Fix Me for this platform" + #endif +diff --git a/helgrind/tests/tc08_hbl2.c b/helgrind/tests/tc08_hbl2.c +index c3a2ec794..8683168a5 100644 +--- a/helgrind/tests/tc08_hbl2.c ++++ b/helgrind/tests/tc08_hbl2.c +@@ -35,6 +35,7 @@ + #undef PLAT_s390x_linux + #undef PLAT_mips32_linux + #undef PLAT_mips64_linux ++#undef PLAT_loongarch64_linux + #undef PLAT_x86_solaris + #undef PLAT_amd64_solaris + +@@ -68,6 +69,8 @@ + #endif + #elif defined(__linux__) && defined(__nanomips__) + # define PLAT_nanomips_linux 1 ++#elif defined(__linux__) && defined(__loongarch__) && (__loongarch_grlen == 64) ++# define PLAT_loongarch64_linux 1 + #elif defined(__sun__) && defined(__i386__) + # define PLAT_x86_solaris 1 + #elif defined(__sun__) && defined(__x86_64__) +@@ -151,6 +154,20 @@ + : /*out*/ : /*in*/ "r"(&(_lval)) \ + : /*trash*/ "$t0", "$t1", "memory" \ + ) ++#elif defined(PLAT_loongarch64_linux) ++# define INC(_lval,_lqual) \ ++ __asm__ __volatile__ ( \ ++ "1: \n" \ ++ " move $t0, %0 \n" \ ++ " ll.w $t1, $t0, 0 \n" \ ++ " addi.w $t1, $t1, 1 \n" \ ++ " sc.w $t1, $t0, 0 \n" \ ++ " li.w $t2, 1 \n" \ ++ " bne $t1, $t2, 1b \n" \ ++ : /*out*/ \ ++ : /*in*/ "r" (&(_lval)) \ ++ : /*trash*/ "$t0", "$t1", "$t2", "memory" \ ++ ) + #else + # error "Fix Me for this platform" + #endif +diff --git a/helgrind/tests/tc11_XCHG.c b/helgrind/tests/tc11_XCHG.c +index f6ff1c984..0d307ac0c 100644 +--- a/helgrind/tests/tc11_XCHG.c ++++ b/helgrind/tests/tc11_XCHG.c +@@ -20,6 +20,7 @@ + #undef PLAT_arm_linux + #undef PLAT_s390x_linux + #undef PLAT_mips32_linux ++#undef PLAT_loongarch64_linux + #undef PLAT_x86_solaris + #undef PLAT_amd64_solaris + +@@ -49,6 +50,8 @@ + # define PLAT_mips32_linux 1 + #elif defined(__linux__) && defined(__nanomips__) + # define PLAT_nanomips_linux 1 ++#elif defined(__linux__) && defined(__loongarch__) && (__loongarch_grlen == 64) ++# define PLAT_loongarch64_linux 1 + #elif defined(__sun__) && defined(__i386__) + # define PLAT_x86_solaris 1 + #elif defined(__sun__) && defined(__x86_64__) +@@ -146,6 +149,21 @@ + # define XCHG_M_R_with_redundant_LOCK(_addr,_lval) \ + XCHG_M_R(_addr,_lval) + ++#elif defined(PLAT_loongarch64_linux) ++# define XCHG_M_R(_addr,_lval) \ ++ __asm__ __volatile__( \ ++ "move $t0, %2 \n\t" \ ++ "ll.w $t1, %1 \n\t" \ ++ "sc.w $t0, %1 \n\t" \ ++ "move %0, $t1 \n\t" \ ++ : /*out*/ "=r"(_lval), "+ZC"(_addr) \ ++ : /*in*/ "r"(_lval) \ ++ : "$t0", "$t1", "memory" \ ++ ) ++ ++# define XCHG_M_R_with_redundant_LOCK(_addr,_lval) \ ++ XCHG_M_R(_addr,_lval) ++ + #else + # error "Unsupported architecture" + +diff --git a/include/Makefile.am b/include/Makefile.am +index 972d394b8..abfa2c915 100644 +--- a/include/Makefile.am ++++ b/include/Makefile.am +@@ -63,6 +63,7 @@ nobase_pkginclude_HEADERS = \ + vki/vki-posixtypes-mips32-linux.h \ + vki/vki-posixtypes-mips64-linux.h \ + vki/vki-posixtypes-nanomips-linux.h \ ++ vki/vki-posixtypes-loongarch64-linux.h \ + vki/vki-amd64-linux.h \ + vki/vki-arm64-linux.h \ + vki/vki-ppc32-linux.h \ +@@ -75,6 +76,7 @@ nobase_pkginclude_HEADERS = \ + vki/vki-mips32-linux.h \ + vki/vki-mips64-linux.h \ + vki/vki-nanomips-linux.h \ ++ vki/vki-loongarch64-linux.h \ + vki/vki-scnums-amd64-linux.h \ + vki/vki-scnums-arm64-linux.h \ + vki/vki-scnums-ppc32-linux.h \ +@@ -86,6 +88,7 @@ nobase_pkginclude_HEADERS = \ + vki/vki-scnums-mips32-linux.h \ + vki/vki-scnums-mips64-linux.h \ + vki/vki-scnums-nanomips-linux.h \ ++ vki/vki-scnums-loongarch64-linux.h \ + vki/vki-scnums-darwin.h \ + vki/vki-scnums-solaris.h \ + vki/vki-scnums-shared-linux.h \ +diff --git a/include/pub_tool_basics.h b/include/pub_tool_basics.h +index d22a42523..079196524 100644 +--- a/include/pub_tool_basics.h ++++ b/include/pub_tool_basics.h +@@ -442,7 +442,8 @@ static inline Bool sr_EQ ( UInt sysno, SysRes sr1, SysRes sr2 ) { + + #if defined(VGA_x86) || defined(VGA_amd64) || defined (VGA_arm) \ + || ((defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips)) \ +- && defined (_MIPSEL)) || defined(VGA_arm64) || defined(VGA_ppc64le) ++ && defined (_MIPSEL)) || defined(VGA_arm64) || defined(VGA_ppc64le) \ ++ || defined (VGA_loongarch64) + # define VG_LITTLEENDIAN 1 + #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_s390x) \ + || ((defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips)) \ +@@ -490,7 +491,8 @@ static inline Bool sr_EQ ( UInt sysno, SysRes sr1, SysRes sr2 ) { + || defined(VGA_ppc64be) || defined(VGA_ppc64le) \ + || defined(VGA_arm) || defined(VGA_s390x) \ + || defined(VGA_mips32) || defined(VGA_mips64) \ +- || defined(VGA_arm64) || defined(VGA_nanomips) ++ || defined(VGA_arm64) || defined(VGA_nanomips) \ ++ || defined(VGA_loongarch64) + # define VG_REGPARM(n) /* */ + #else + # error Unknown arch +diff --git a/include/pub_tool_guest.h b/include/pub_tool_guest.h +index 08a72efac..87e8cc2bc 100644 +--- a/include/pub_tool_guest.h ++++ b/include/pub_tool_guest.h +@@ -62,6 +62,9 @@ + #elif defined(VGA_mips64) + # include "libvex_guest_mips64.h" + typedef VexGuestMIPS64State VexGuestArchState; ++#elif defined(VGA_loongarch64) ++# include "libvex_guest_loongarch64.h" ++ typedef VexGuestLOONGARCH64State VexGuestArchState; + #else + # error Unknown arch + #endif +diff --git a/include/pub_tool_libcsetjmp.h b/include/pub_tool_libcsetjmp.h +index 6b278d285..86304a4f4 100644 +--- a/include/pub_tool_libcsetjmp.h ++++ b/include/pub_tool_libcsetjmp.h +@@ -126,6 +126,14 @@ UWord VG_MINIMAL_SETJMP(VG_MINIMAL_JMP_BUF(_env)); + __attribute__((noreturn)) + void VG_MINIMAL_LONGJMP(VG_MINIMAL_JMP_BUF(_env)); + ++#elif defined(VGP_loongarch64_linux) ++ ++#define VG_MINIMAL_JMP_BUF(_name) ULong _name [13+8+1] ++__attribute__((returns_twice)) ++UWord VG_MINIMAL_SETJMP(VG_MINIMAL_JMP_BUF(_env)); ++__attribute__((noreturn)) ++void VG_MINIMAL_LONGJMP(VG_MINIMAL_JMP_BUF(_env)); ++ + #else + + /* The default implementation. */ +diff --git a/include/pub_tool_machine.h b/include/pub_tool_machine.h +index 9bdd4f514..12377f97a 100644 +--- a/include/pub_tool_machine.h ++++ b/include/pub_tool_machine.h +@@ -108,6 +108,12 @@ + # define VG_CLREQ_SZB 20 + # define VG_STACK_REDZONE_SZB 0 + ++#elif defined(VGP_loongarch64_linux) ++# define VG_MIN_INSTR_SZB 4 ++# define VG_MAX_INSTR_SZB 8 ++# define VG_CLREQ_SZB 20 ++# define VG_STACK_REDZONE_SZB 0 ++ + #else + # error Unknown platform + #endif +diff --git a/include/pub_tool_redir.h b/include/pub_tool_redir.h +index f88d3b571..d1bb8cbce 100644 +--- a/include/pub_tool_redir.h ++++ b/include/pub_tool_redir.h +@@ -321,6 +321,8 @@ + + #define VG_U_LD_LINUX_MIPSN8_S0_1 "ld-linux-mipsn8.so.1" + ++#define VG_U_LD_LINUX_LOONGARCH_LP64D_SO_1 "ld-linux-loongarch-lp64d.so.1" ++ + #endif + + /* --- Sonames for FreeBSD ELF linkers, plus unencoded versions. --- */ +diff --git a/include/pub_tool_vkiscnums_asm.h b/include/pub_tool_vkiscnums_asm.h +index 14b483c4d..b2222aadf 100644 +--- a/include/pub_tool_vkiscnums_asm.h ++++ b/include/pub_tool_vkiscnums_asm.h +@@ -74,6 +74,10 @@ + # include "vki/vki-scnums-shared-linux.h" + # include "vki/vki-scnums-mips64-linux.h" + ++#elif defined(VGP_loongarch64_linux) ++# include "vki/vki-scnums-shared-linux.h" ++# include "vki/vki-scnums-loongarch64-linux.h" ++ + #elif defined(VGP_x86_freebsd) || defined(VGP_amd64_freebsd) + # include "vki/vki-scnums-freebsd.h" + +diff --git a/include/valgrind.h.in b/include/valgrind.h.in +index aa0b43125..b330497f7 100644 +--- a/include/valgrind.h.in ++++ b/include/valgrind.h.in +@@ -125,6 +125,7 @@ + #undef PLAT_mips32_linux + #undef PLAT_mips64_linux + #undef PLAT_nanomips_linux ++#undef PLAT_loongarch64_linux + #undef PLAT_x86_solaris + #undef PLAT_amd64_solaris + +@@ -169,6 +170,8 @@ + # define PLAT_mips32_linux 1 + #elif defined(__linux__) && defined(__nanomips__) + # define PLAT_nanomips_linux 1 ++#elif defined(__linux__) && defined(__loongarch__) && (__loongarch_grlen == 64) ++# define PLAT_loongarch64_linux 1 + #elif defined(__sun) && defined(__i386__) + # define PLAT_x86_solaris 1 + #elif defined(__sun) && defined(__x86_64__) +@@ -1125,7 +1128,75 @@ typedef + ); \ + } while (0) + +-#endif ++#endif /* PLAT_nanomips_linux */ ++ ++/* --------------------- loongarch64-linux --------------------- */ ++#if defined(PLAT_loongarch64_linux) ++ ++typedef ++ struct { ++ unsigned long nraddr; /* where's the code? */ ++ } ++ OrigFn; ++ ++#define __SPECIAL_INSTRUCTION_PREAMBLE \ ++ "srli.d $zero, $zero, 3 \n\t" \ ++ "srli.d $zero, $zero, 13 \n\t" \ ++ "srli.d $zero, $zero, 29 \n\t" \ ++ "srli.d $zero, $zero, 19 \n\t" ++ ++#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ ++ _zzq_default, _zzq_request, \ ++ _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ ++ __extension__ \ ++ ({ \ ++ volatile unsigned long int _zzq_args[6]; \ ++ volatile unsigned long int _zzq_result; \ ++ _zzq_args[0] = (unsigned long int)(_zzq_request); \ ++ _zzq_args[1] = (unsigned long int)(_zzq_arg1); \ ++ _zzq_args[2] = (unsigned long int)(_zzq_arg2); \ ++ _zzq_args[3] = (unsigned long int)(_zzq_arg3); \ ++ _zzq_args[4] = (unsigned long int)(_zzq_arg4); \ ++ _zzq_args[5] = (unsigned long int)(_zzq_arg5); \ ++ __asm__ volatile("move $a7, %1 \n\t" /*default*/ \ ++ "move $t0, %2 \n\t" /*ptr*/ \ ++ __SPECIAL_INSTRUCTION_PREAMBLE \ ++ /* $a7 = client_request ( $t0 ) */ \ ++ "or $t1, $t1, $t1 \n\t" \ ++ "move %0, $a7 \n\t" /*result*/ \ ++ : "=r" (_zzq_result) \ ++ : "r" (_zzq_default), "r" (&_zzq_args[0]) \ ++ : "$a7", "$t0", "memory"); \ ++ _zzq_result; \ ++ }) ++ ++#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ ++ { \ ++ volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ ++ volatile unsigned long int __addr; \ ++ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ ++ /* $a7 = guest_NRADDR */ \ ++ "or $t2, $t2, $t2 \n\t" \ ++ "move %0, $a7 \n\t" /*result*/ \ ++ : "=r" (__addr) \ ++ : \ ++ : "$a7"); \ ++ _zzq_orig->nraddr = __addr; \ ++ } ++ ++#define VALGRIND_CALL_NOREDIR_T8 \ ++ __SPECIAL_INSTRUCTION_PREAMBLE \ ++ /* call-noredir $t8 */ \ ++ "or $t3, $t3, $t3 \n\t" ++ ++#define VALGRIND_VEX_INJECT_IR() \ ++ do { \ ++ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ ++ "or $t4, $t4, $t4 \n\t" \ ++ ); \ ++ } while (0) ++ ++#endif /* PLAT_loongarch64_linux */ + /* Insert assembly code for other platforms here... */ + + #endif /* NVALGRIND */ +@@ -6603,6 +6674,457 @@ typedef + + #endif /* PLAT_mips64_linux */ + ++/* --------------------- loongarch64-linux --------------------- */ ++ ++#if defined(PLAT_loongarch64_linux) ++ ++/* These regs are trashed by the hidden call. */ ++#define __CALLER_SAVED_REGS \ ++ "$ra", "$a0", "$a1", "$a2", "$a3", "$a4", "$a5", "$a6", "$a7", \ ++ "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8", \ ++ "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", \ ++ "$f8", "$f9", "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", \ ++ "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23" ++ ++/* $s0 is callee-saved, so we can use it to save and restore SP around ++ the hidden call. */ ++#define VALGRIND_ALIGN_STACK \ ++ "move $s0, $sp \n\t" \ ++ "bstrins.d $sp, $zero, 3, 0 \n\t" ++#define VALGRIND_RESTORE_STACK \ ++ "move $sp, $s0 \n\t" ++ ++/* These CALL_FN_ macros assume that on loongarch64-linux, ++ sizeof(unsigned long) == 8. */ ++ ++#define CALL_FN_W_v(lval, orig) \ ++ do { \ ++ volatile OrigFn _orig = (orig); \ ++ volatile unsigned long _argvec[1]; \ ++ volatile unsigned long _res; \ ++ _argvec[0] = (unsigned long)_orig.nraddr; \ ++ __asm__ volatile( \ ++ VALGRIND_ALIGN_STACK \ ++ "ld.d $t8, %1, 0 \n\t" /* target->t8 */ \ ++ VALGRIND_CALL_NOREDIR_T8 \ ++ VALGRIND_RESTORE_STACK \ ++ "move %0, $a0 \n\t" \ ++ : /*out*/ "=r" (_res) \ ++ : /*in*/ "r" (&_argvec[0]) \ ++ : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0" \ ++ ); \ ++ lval = (__typeof__(lval)) _res; \ ++ } while (0) ++ ++#define CALL_FN_W_W(lval, orig, arg1) \ ++ do { \ ++ volatile OrigFn _orig = (orig); \ ++ volatile unsigned long _argvec[2]; \ ++ volatile unsigned long _res; \ ++ _argvec[0] = (unsigned long)_orig.nraddr; \ ++ _argvec[1] = (unsigned long)(arg1); \ ++ __asm__ volatile( \ ++ VALGRIND_ALIGN_STACK \ ++ "ld.d $a0, %1, 8 \n\t" /* arg1 */ \ ++ "ld.d $t8, %1, 0 \n\t" /* target->t8 */ \ ++ VALGRIND_CALL_NOREDIR_T8 \ ++ VALGRIND_RESTORE_STACK \ ++ "move %0, $a0 \n\t" \ ++ : /*out*/ "=r" (_res) \ ++ : /*in*/ "r" (&_argvec[0]) \ ++ : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0" \ ++ ); \ ++ lval = (__typeof__(lval)) _res; \ ++ } while (0) ++ ++#define CALL_FN_W_WW(lval, orig, arg1, arg2) \ ++ do { \ ++ volatile OrigFn _orig = (orig); \ ++ volatile unsigned long _argvec[3]; \ ++ volatile unsigned long _res; \ ++ _argvec[0] = (unsigned long)_orig.nraddr; \ ++ _argvec[1] = (unsigned long)(arg1); \ ++ _argvec[2] = (unsigned long)(arg2); \ ++ __asm__ volatile( \ ++ VALGRIND_ALIGN_STACK \ ++ "ld.d $a0, %1, 8 \n\t" /* arg1 */ \ ++ "ld.d $a1, %1, 16 \n\t" /* arg2 */ \ ++ "ld.d $t8, %1, 0 \n\t" /* target->t8 */ \ ++ VALGRIND_CALL_NOREDIR_T8 \ ++ VALGRIND_RESTORE_STACK \ ++ "move %0, $a0 \n\t" \ ++ : /*out*/ "=r" (_res) \ ++ : /*in*/ "r" (&_argvec[0]) \ ++ : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0" \ ++ ); \ ++ lval = (__typeof__(lval)) _res; \ ++ } while (0) ++ ++#define CALL_FN_W_WWW(lval, orig, arg1, arg2, arg3) \ ++ do { \ ++ volatile OrigFn _orig = (orig); \ ++ volatile unsigned long _argvec[4]; \ ++ volatile unsigned long _res; \ ++ _argvec[0] = (unsigned long)_orig.nraddr; \ ++ _argvec[1] = (unsigned long)(arg1); \ ++ _argvec[2] = (unsigned long)(arg2); \ ++ _argvec[3] = (unsigned long)(arg3); \ ++ __asm__ volatile( \ ++ VALGRIND_ALIGN_STACK \ ++ "ld.d $a0, %1, 8 \n\t" /* arg1 */ \ ++ "ld.d $a1, %1, 16 \n\t" /* arg2 */ \ ++ "ld.d $a2, %1, 24 \n\t" /* arg3 */ \ ++ "ld.d $t8, %1, 0 \n\t" /* target->t8 */ \ ++ VALGRIND_CALL_NOREDIR_T8 \ ++ VALGRIND_RESTORE_STACK \ ++ "move %0, $a0 \n\t" \ ++ : /*out*/ "=r" (_res) \ ++ : /*in*/ "r" (&_argvec[0]) \ ++ : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0" \ ++ ); \ ++ lval = (__typeof__(lval)) _res; \ ++ } while (0) ++ ++#define CALL_FN_W_WWWW(lval, orig, arg1, arg2, arg3, arg4) \ ++ do { \ ++ volatile OrigFn _orig = (orig); \ ++ volatile unsigned long _argvec[5]; \ ++ volatile unsigned long _res; \ ++ _argvec[0] = (unsigned long)_orig.nraddr; \ ++ _argvec[1] = (unsigned long)(arg1); \ ++ _argvec[2] = (unsigned long)(arg2); \ ++ _argvec[3] = (unsigned long)(arg3); \ ++ _argvec[4] = (unsigned long)(arg4); \ ++ __asm__ volatile( \ ++ VALGRIND_ALIGN_STACK \ ++ "ld.d $a0, %1, 8 \n\t" /* arg1 */ \ ++ "ld.d $a1, %1, 16 \n\t" /* arg2 */ \ ++ "ld.d $a2, %1, 24 \n\t" /* arg3 */ \ ++ "ld.d $a3, %1, 32 \n\t" /* arg4 */ \ ++ "ld.d $t8, %1, 0 \n\t" /* target->t8 */ \ ++ VALGRIND_CALL_NOREDIR_T8 \ ++ VALGRIND_RESTORE_STACK \ ++ "move %0, $a0 \n\t" \ ++ : /*out*/ "=r" (_res) \ ++ : /*in*/ "r" (&_argvec[0]) \ ++ : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0" \ ++ ); \ ++ lval = (__typeof__(lval)) _res; \ ++ } while (0) ++ ++#define CALL_FN_W_5W(lval, orig, arg1, arg2, arg3, arg4, arg5) \ ++ do { \ ++ volatile OrigFn _orig = (orig); \ ++ volatile unsigned long _argvec[6]; \ ++ volatile unsigned long _res; \ ++ _argvec[0] = (unsigned long)_orig.nraddr; \ ++ _argvec[1] = (unsigned long)(arg1); \ ++ _argvec[2] = (unsigned long)(arg2); \ ++ _argvec[3] = (unsigned long)(arg3); \ ++ _argvec[4] = (unsigned long)(arg4); \ ++ _argvec[5] = (unsigned long)(arg5); \ ++ __asm__ volatile( \ ++ VALGRIND_ALIGN_STACK \ ++ "ld.d $a0, %1, 8 \n\t" /* arg1 */ \ ++ "ld.d $a1, %1, 16 \n\t" /* arg2 */ \ ++ "ld.d $a2, %1, 24 \n\t" /* arg3 */ \ ++ "ld.d $a3, %1, 32 \n\t" /* arg4 */ \ ++ "ld.d $a4, %1, 40 \n\t" /* arg5 */ \ ++ "ld.d $t8, %1, 0 \n\t" /* target->t8 */ \ ++ VALGRIND_CALL_NOREDIR_T8 \ ++ VALGRIND_RESTORE_STACK \ ++ "move %0, $a0 \n\t" \ ++ : /*out*/ "=r" (_res) \ ++ : /*in*/ "r" (&_argvec[0]) \ ++ : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0" \ ++ ); \ ++ lval = (__typeof__(lval)) _res; \ ++ } while (0) ++ ++#define CALL_FN_W_6W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ ++ arg6) \ ++ do { \ ++ volatile OrigFn _orig = (orig); \ ++ volatile unsigned long _argvec[7]; \ ++ volatile unsigned long _res; \ ++ _argvec[0] = (unsigned long)_orig.nraddr; \ ++ _argvec[1] = (unsigned long)(arg1); \ ++ _argvec[2] = (unsigned long)(arg2); \ ++ _argvec[3] = (unsigned long)(arg3); \ ++ _argvec[4] = (unsigned long)(arg4); \ ++ _argvec[5] = (unsigned long)(arg5); \ ++ _argvec[6] = (unsigned long)(arg6); \ ++ __asm__ volatile( \ ++ VALGRIND_ALIGN_STACK \ ++ "ld.d $a0, %1, 8 \n\t" /* arg1 */ \ ++ "ld.d $a1, %1, 16 \n\t" /* arg2 */ \ ++ "ld.d $a2, %1, 24 \n\t" /* arg3 */ \ ++ "ld.d $a3, %1, 32 \n\t" /* arg4 */ \ ++ "ld.d $a4, %1, 40 \n\t" /* arg5 */ \ ++ "ld.d $a5, %1, 48 \n\t" /* arg6 */ \ ++ "ld.d $t8, %1, 0 \n\t" /* target->t8 */ \ ++ VALGRIND_CALL_NOREDIR_T8 \ ++ VALGRIND_RESTORE_STACK \ ++ "move %0, $a0 \n\t" \ ++ : /*out*/ "=r" (_res) \ ++ : /*in*/ "r" (&_argvec[0]) \ ++ : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0" \ ++ ); \ ++ lval = (__typeof__(lval)) _res; \ ++ } while (0) ++ ++#define CALL_FN_W_7W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ ++ arg6, arg7) \ ++ do { \ ++ volatile OrigFn _orig = (orig); \ ++ volatile unsigned long _argvec[8]; \ ++ volatile unsigned long _res; \ ++ _argvec[0] = (unsigned long)_orig.nraddr; \ ++ _argvec[1] = (unsigned long)(arg1); \ ++ _argvec[2] = (unsigned long)(arg2); \ ++ _argvec[3] = (unsigned long)(arg3); \ ++ _argvec[4] = (unsigned long)(arg4); \ ++ _argvec[5] = (unsigned long)(arg5); \ ++ _argvec[6] = (unsigned long)(arg6); \ ++ _argvec[7] = (unsigned long)(arg7); \ ++ __asm__ volatile( \ ++ VALGRIND_ALIGN_STACK \ ++ "ld.d $a0, %1, 8 \n\t" /* arg1 */ \ ++ "ld.d $a1, %1, 16 \n\t" /* arg2 */ \ ++ "ld.d $a2, %1, 24 \n\t" /* arg3 */ \ ++ "ld.d $a3, %1, 32 \n\t" /* arg4 */ \ ++ "ld.d $a4, %1, 40 \n\t" /* arg5 */ \ ++ "ld.d $a5, %1, 48 \n\t" /* arg6 */ \ ++ "ld.d $a6, %1, 56 \n\t" /* arg7 */ \ ++ "ld.d $t8, %1, 0 \n\t" /* target->t8 */ \ ++ VALGRIND_CALL_NOREDIR_T8 \ ++ VALGRIND_RESTORE_STACK \ ++ "move %0, $a0 \n\t" \ ++ : /*out*/ "=r" (_res) \ ++ : /*in*/ "r" (&_argvec[0]) \ ++ : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0" \ ++ ); \ ++ lval = (__typeof__(lval)) _res; \ ++ } while (0) ++ ++#define CALL_FN_W_8W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ ++ arg6, arg7, arg8) \ ++ do { \ ++ volatile OrigFn _orig = (orig); \ ++ volatile unsigned long _argvec[9]; \ ++ volatile unsigned long _res; \ ++ _argvec[0] = (unsigned long)_orig.nraddr; \ ++ _argvec[1] = (unsigned long)(arg1); \ ++ _argvec[2] = (unsigned long)(arg2); \ ++ _argvec[3] = (unsigned long)(arg3); \ ++ _argvec[4] = (unsigned long)(arg4); \ ++ _argvec[5] = (unsigned long)(arg5); \ ++ _argvec[6] = (unsigned long)(arg6); \ ++ _argvec[7] = (unsigned long)(arg7); \ ++ _argvec[8] = (unsigned long)(arg8); \ ++ __asm__ volatile( \ ++ VALGRIND_ALIGN_STACK \ ++ "ld.d $a0, %1, 8 \n\t" /* arg1 */ \ ++ "ld.d $a1, %1, 16 \n\t" /* arg2 */ \ ++ "ld.d $a2, %1, 24 \n\t" /* arg3 */ \ ++ "ld.d $a3, %1, 32 \n\t" /* arg4 */ \ ++ "ld.d $a4, %1, 40 \n\t" /* arg5 */ \ ++ "ld.d $a5, %1, 48 \n\t" /* arg6 */ \ ++ "ld.d $a6, %1, 56 \n\t" /* arg7 */ \ ++ "ld.d $a7, %1, 64 \n\t" /* arg8 */ \ ++ "ld.d $t8, %1, 0 \n\t" /* target->t8 */ \ ++ VALGRIND_CALL_NOREDIR_T8 \ ++ VALGRIND_RESTORE_STACK \ ++ "move %0, $a0 \n\t" \ ++ : /*out*/ "=r" (_res) \ ++ : /*in*/ "r" (&_argvec[0]) \ ++ : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0" \ ++ ); \ ++ lval = (__typeof__(lval)) _res; \ ++ } while (0) ++ ++#define CALL_FN_W_9W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ ++ arg6, arg7, arg8, arg9) \ ++ do { \ ++ volatile OrigFn _orig = (orig); \ ++ volatile unsigned long _argvec[10]; \ ++ volatile unsigned long _res; \ ++ _argvec[0] = (unsigned long)_orig.nraddr; \ ++ _argvec[1] = (unsigned long)(arg1); \ ++ _argvec[2] = (unsigned long)(arg2); \ ++ _argvec[3] = (unsigned long)(arg3); \ ++ _argvec[4] = (unsigned long)(arg4); \ ++ _argvec[5] = (unsigned long)(arg5); \ ++ _argvec[6] = (unsigned long)(arg6); \ ++ _argvec[7] = (unsigned long)(arg7); \ ++ _argvec[8] = (unsigned long)(arg8); \ ++ _argvec[9] = (unsigned long)(arg9); \ ++ __asm__ volatile( \ ++ VALGRIND_ALIGN_STACK \ ++ "addi.d $sp, $sp, -8 \n\t" \ ++ "ld.d $a0, %1, 72 \n\t" \ ++ "st.d $a0, $sp, 0 \n\t" /* arg9 */ \ ++ "ld.d $a0, %1, 8 \n\t" /* arg1 */ \ ++ "ld.d $a1, %1, 16 \n\t" /* arg2 */ \ ++ "ld.d $a2, %1, 24 \n\t" /* arg3 */ \ ++ "ld.d $a3, %1, 32 \n\t" /* arg4 */ \ ++ "ld.d $a4, %1, 40 \n\t" /* arg5 */ \ ++ "ld.d $a5, %1, 48 \n\t" /* arg6 */ \ ++ "ld.d $a6, %1, 56 \n\t" /* arg7 */ \ ++ "ld.d $a7, %1, 64 \n\t" /* arg8 */ \ ++ "ld.d $t8, %1, 0 \n\t" /* target->t8 */ \ ++ VALGRIND_CALL_NOREDIR_T8 \ ++ VALGRIND_RESTORE_STACK \ ++ "move %0, $a0 \n\t" \ ++ : /*out*/ "=r" (_res) \ ++ : /*in*/ "r" (&_argvec[0]) \ ++ : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0" \ ++ ); \ ++ lval = (__typeof__(lval)) _res; \ ++ } while (0) ++ ++#define CALL_FN_W_10W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ ++ arg6, arg7, arg8, arg9, arg10) \ ++ do { \ ++ volatile OrigFn _orig = (orig); \ ++ volatile unsigned long _argvec[11]; \ ++ volatile unsigned long _res; \ ++ _argvec[0] = (unsigned long)_orig.nraddr; \ ++ _argvec[1] = (unsigned long)(arg1); \ ++ _argvec[2] = (unsigned long)(arg2); \ ++ _argvec[3] = (unsigned long)(arg3); \ ++ _argvec[4] = (unsigned long)(arg4); \ ++ _argvec[5] = (unsigned long)(arg5); \ ++ _argvec[6] = (unsigned long)(arg6); \ ++ _argvec[7] = (unsigned long)(arg7); \ ++ _argvec[8] = (unsigned long)(arg8); \ ++ _argvec[9] = (unsigned long)(arg9); \ ++ _argvec[10] = (unsigned long)(arg10); \ ++ __asm__ volatile( \ ++ VALGRIND_ALIGN_STACK \ ++ "addi.d $sp, $sp, -16 \n\t" \ ++ "ld.d $a0, %1, 72 \n\t" \ ++ "st.d $a0, $sp, 0 \n\t" /* arg9 */ \ ++ "ld.d $a0, %1, 80 \n\t" \ ++ "st.d $a0, $sp, 8 \n\t" /* arg10 */ \ ++ "ld.d $a0, %1, 8 \n\t" /* arg1 */ \ ++ "ld.d $a1, %1, 16 \n\t" /* arg2 */ \ ++ "ld.d $a2, %1, 24 \n\t" /* arg3 */ \ ++ "ld.d $a3, %1, 32 \n\t" /* arg4 */ \ ++ "ld.d $a4, %1, 40 \n\t" /* arg5 */ \ ++ "ld.d $a5, %1, 48 \n\t" /* arg6 */ \ ++ "ld.d $a6, %1, 56 \n\t" /* arg7 */ \ ++ "ld.d $a7, %1, 64 \n\t" /* arg8 */ \ ++ "ld.d $t8, %1, 0 \n\t" /* target->t8 */ \ ++ VALGRIND_CALL_NOREDIR_T8 \ ++ VALGRIND_RESTORE_STACK \ ++ "move %0, $a0 \n\t" \ ++ : /*out*/ "=r" (_res) \ ++ : /*in*/ "r" (&_argvec[0]) \ ++ : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0" \ ++ ); \ ++ lval = (__typeof__(lval)) _res; \ ++ } while (0) ++ ++#define CALL_FN_W_11W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ ++ arg6, arg7, arg8, arg9, arg10, \ ++ arg11) \ ++ do { \ ++ volatile OrigFn _orig = (orig); \ ++ volatile unsigned long _argvec[12]; \ ++ volatile unsigned long _res; \ ++ _argvec[0] = (unsigned long)_orig.nraddr; \ ++ _argvec[1] = (unsigned long)(arg1); \ ++ _argvec[2] = (unsigned long)(arg2); \ ++ _argvec[3] = (unsigned long)(arg3); \ ++ _argvec[4] = (unsigned long)(arg4); \ ++ _argvec[5] = (unsigned long)(arg5); \ ++ _argvec[6] = (unsigned long)(arg6); \ ++ _argvec[7] = (unsigned long)(arg7); \ ++ _argvec[8] = (unsigned long)(arg8); \ ++ _argvec[9] = (unsigned long)(arg9); \ ++ _argvec[10] = (unsigned long)(arg10); \ ++ _argvec[11] = (unsigned long)(arg11); \ ++ __asm__ volatile( \ ++ VALGRIND_ALIGN_STACK \ ++ "addi.d $sp, $sp, -24 \n\t" \ ++ "ld.d $a0, %1, 72 \n\t" \ ++ "st.d $a0, $sp, 0 \n\t" /* arg9 */ \ ++ "ld.d $a0, %1, 80 \n\t" \ ++ "st.d $a0, $sp, 8 \n\t" /* arg10 */ \ ++ "ld.d $a0, %1, 88 \n\t" \ ++ "st.d $a0, $sp, 16 \n\t" /* arg11 */ \ ++ "ld.d $a0, %1, 8 \n\t" /* arg1 */ \ ++ "ld.d $a1, %1, 16 \n\t" /* arg2 */ \ ++ "ld.d $a2, %1, 24 \n\t" /* arg3 */ \ ++ "ld.d $a3, %1, 32 \n\t" /* arg4 */ \ ++ "ld.d $a4, %1, 40 \n\t" /* arg5 */ \ ++ "ld.d $a5, %1, 48 \n\t" /* arg6 */ \ ++ "ld.d $a6, %1, 56 \n\t" /* arg7 */ \ ++ "ld.d $a7, %1, 64 \n\t" /* arg8 */ \ ++ "ld.d $t8, %1, 0 \n\t" /* target->t8 */ \ ++ VALGRIND_CALL_NOREDIR_T8 \ ++ VALGRIND_RESTORE_STACK \ ++ "move %0, $a0 \n\t" \ ++ : /*out*/ "=r" (_res) \ ++ : /*in*/ "r" (&_argvec[0]) \ ++ : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0" \ ++ ); \ ++ lval = (__typeof__(lval)) _res; \ ++ } while (0) ++ ++#define CALL_FN_W_12W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ ++ arg6, arg7, arg8, arg9, arg10, \ ++ arg11, arg12) \ ++ do { \ ++ volatile OrigFn _orig = (orig); \ ++ volatile unsigned long _argvec[13]; \ ++ volatile unsigned long _res; \ ++ _argvec[0] = (unsigned long)_orig.nraddr; \ ++ _argvec[1] = (unsigned long)(arg1); \ ++ _argvec[2] = (unsigned long)(arg2); \ ++ _argvec[3] = (unsigned long)(arg3); \ ++ _argvec[4] = (unsigned long)(arg4); \ ++ _argvec[5] = (unsigned long)(arg5); \ ++ _argvec[6] = (unsigned long)(arg6); \ ++ _argvec[7] = (unsigned long)(arg7); \ ++ _argvec[8] = (unsigned long)(arg8); \ ++ _argvec[9] = (unsigned long)(arg9); \ ++ _argvec[10] = (unsigned long)(arg10); \ ++ _argvec[11] = (unsigned long)(arg11); \ ++ _argvec[12] = (unsigned long)(arg12); \ ++ __asm__ volatile( \ ++ VALGRIND_ALIGN_STACK \ ++ "addi.d $sp, $sp, -32 \n\t" \ ++ "ld.d $a0, %1, 72 \n\t" \ ++ "st.d $a0, $sp, 0 \n\t" /* arg9 */ \ ++ "ld.d $a0, %1, 80 \n\t" \ ++ "st.d $a0, $sp, 8 \n\t" /* arg10 */ \ ++ "ld.d $a0, %1, 88 \n\t" \ ++ "st.d $a0, $sp, 16 \n\t" /* arg11 */ \ ++ "ld.d $a0, %1, 96 \n\t" \ ++ "st.d $a0, $sp, 24 \n\t" /* arg12 */ \ ++ "ld.d $a0, %1, 8 \n\t" /* arg1 */ \ ++ "ld.d $a1, %1, 16 \n\t" /* arg2 */ \ ++ "ld.d $a2, %1, 24 \n\t" /* arg3 */ \ ++ "ld.d $a3, %1, 32 \n\t" /* arg4 */ \ ++ "ld.d $a4, %1, 40 \n\t" /* arg5 */ \ ++ "ld.d $a5, %1, 48 \n\t" /* arg6 */ \ ++ "ld.d $a6, %1, 56 \n\t" /* arg7 */ \ ++ "ld.d $a7, %1, 64 \n\t" /* arg8 */ \ ++ "ld.d $t8, %1, 0 \n\t" /* target->t8 */ \ ++ VALGRIND_CALL_NOREDIR_T8 \ ++ VALGRIND_RESTORE_STACK \ ++ "move %0, $a0 \n\t" \ ++ : /*out*/ "=r" (_res) \ ++ : /*in*/ "r" (&_argvec[0]) \ ++ : /*trash*/ "memory", __CALLER_SAVED_REGS, "$s0" \ ++ ); \ ++ lval = (__typeof__(lval)) _res; \ ++ } while (0) ++ ++#endif /* PLAT_loongarch64_linux */ ++ + /* ------------------------------------------------------------------ */ + /* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS. */ + /* */ +@@ -7159,6 +7681,7 @@ VALGRIND_PRINTF_BACKTRACE(const char *format, ...) + #undef PLAT_mips32_linux + #undef PLAT_mips64_linux + #undef PLAT_nanomips_linux ++#undef PLAT_loongarch64_linux + #undef PLAT_x86_solaris + #undef PLAT_amd64_solaris + +diff --git a/include/vki/vki-linux.h b/include/vki/vki-linux.h +index be3d76690..0a60c0a09 100644 +--- a/include/vki/vki-linux.h ++++ b/include/vki/vki-linux.h +@@ -97,6 +97,8 @@ + # include "vki-posixtypes-mips64-linux.h" + #elif defined(VGA_nanomips) + # include "vki-posixtypes-nanomips-linux.h" ++#elif defined(VGA_loongarch64) ++# include "vki-posixtypes-loongarch64-linux.h" + #else + # error Unknown platform + #endif +@@ -225,6 +227,8 @@ typedef unsigned int vki_uint; + # include "vki-mips64-linux.h" + #elif defined(VGA_nanomips) + # include "vki-nanomips-linux.h" ++#elif defined(VGA_loongarch64) ++# include "vki-loongarch64-linux.h" + #else + # error Unknown platform + #endif +@@ -531,6 +535,7 @@ typedef struct vki_siginfo { + * Digital reserves positive values for kernel-generated signals. + */ + #define VKI_SI_USER 0 /* sent by kill, sigsend, raise */ ++#define VKI_SI_KERNEL 0x80 /* sent by the kernel from somewhere */ + #define VKI_SI_TKILL -6 /* sent by tkill system call */ + + /* +diff --git a/include/vki/vki-loongarch64-linux.h b/include/vki/vki-loongarch64-linux.h +new file mode 100644 +index 000000000..97d3f66dd +--- /dev/null ++++ b/include/vki/vki-loongarch64-linux.h +@@ -0,0 +1,811 @@ ++ ++/*--------------------------------------------------------------------*/ ++/*--- loongarch/Linux-specific kernel interface. ---*/ ++/*--- vki-loongarch64-linux.h ---*/ ++/*--------------------------------------------------------------------*/ ++ ++/* ++ This file is part of Valgrind, a dynamic binary instrumentation ++ framework. ++ ++ Copyright (C) 2021-2022 Loongson Technology Corporation Limited ++ ++ This program is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License as ++ published by the Free Software Foundation; either version 2 of the ++ License, or (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, see . ++*/ ++ ++#ifndef __VKI_LOONGARCH64_LINUX_H ++#define __VKI_LOONGARCH64_LINUX_H ++ ++// loongarch64 is little-endian. ++#define VKI_LITTLE_ENDIAN 1 ++ ++//---------------------------------------------------------------------- ++// From linux-5.15.2/include/uapi/asm-generic/int-ll64.h ++//---------------------------------------------------------------------- ++ ++typedef __signed__ char __vki_s8; ++typedef unsigned char __vki_u8; ++ ++typedef __signed__ short __vki_s16; ++typedef unsigned short __vki_u16; ++ ++typedef __signed__ int __vki_s32; ++typedef unsigned int __vki_u32; ++ ++typedef __signed__ long long __vki_s64; ++typedef unsigned long long __vki_u64; ++ ++//---------------------------------------------------------------------- ++// From linux-5.15.2/include/asm-generic/int-ll64.h ++//---------------------------------------------------------------------- ++ ++typedef __vki_s8 vki_s8; ++typedef __vki_u8 vki_u8; ++typedef __vki_s16 vki_s16; ++typedef __vki_u16 vki_u16; ++typedef __vki_s32 vki_s32; ++typedef __vki_u32 vki_u32; ++typedef __vki_s64 vki_s64; ++typedef __vki_u64 vki_u64; ++ ++//---------------------------------------------------------------------- ++// From linux-5.15.2/include/linux/types.h ++//---------------------------------------------------------------------- ++ ++typedef vki_u8 vki_u_int8_t; ++typedef vki_s8 vki_int8_t; ++typedef vki_u16 vki_u_int16_t; ++typedef vki_s16 vki_int16_t; ++typedef vki_u32 vki_u_int32_t; ++typedef vki_s32 vki_int32_t; ++ ++typedef vki_u8 vki_uint8_t; ++typedef vki_u16 vki_uint16_t; ++typedef vki_u32 vki_uint32_t; ++ ++typedef vki_u64 vki_uint64_t; ++typedef vki_u64 vki_u_int64_t; ++typedef vki_s64 vki_int64_t; ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/arch/loongarch/include/asm/page.h ++//---------------------------------------------------------------------- ++ ++/* loongarch64 uses runtime pagesize detection */ ++extern UWord VKI_PAGE_SHIFT; ++extern UWord VKI_PAGE_SIZE; ++#define VKI_PAGE_MASK (~(PAGE_SIZE - 1)) ++#define VKI_MAX_PAGE_SHIFT 16 ++#define VKI_MAX_PAGE_SIZE (1UL << VKI_MAX_PAGE_SHIFT) ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/arch/loongarch/include/asm/shmparam.h ++//---------------------------------------------------------------------- ++ ++#define VKI_SHMLBA 0x00010000 // SZ_64K ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/arch/loongarch/include/uapi/asm/signal.h ++//---------------------------------------------------------------------- ++ ++#define VKI_MINSIGSTKSZ 4096 ++#define VKI_SIGSTKSZ 16384 ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/signal-defs.h ++//---------------------------------------------------------------------- ++ ++#define VKI_SA_NOCLDSTOP 0x00000001 ++#define VKI_SA_NOCLDWAIT 0x00000002 ++#define VKI_SA_SIGINFO 0x00000004 ++/* 0x00000008 used on alpha, mips, parisc */ ++/* 0x00000010 used on alpha, parisc */ ++/* 0x00000020 used on alpha, parisc, sparc */ ++/* 0x00000040 used on alpha, parisc */ ++/* 0x00000080 used on parisc */ ++/* 0x00000100 used on sparc */ ++/* 0x00000200 used on sparc */ ++#define VKI_SA_UNSUPPORTED 0x00000400 ++#define VKI_SA_EXPOSE_TAGBITS 0x00000800 ++/* 0x00010000 used on mips */ ++/* 0x00800000 used for internal SA_IMMUTABLE */ ++/* 0x01000000 used on x86 */ ++/* 0x02000000 used on x86 */ ++/* ++ * New architectures should not define the obsolete ++ * VKI_SA_RESTORER 0x04000000 ++ */ ++#define VKI_SA_ONSTACK 0x08000000 ++#define VKI_SA_RESTART 0x10000000 ++#define VKI_SA_NODEFER 0x40000000 ++#define VKI_SA_RESETHAND 0x80000000 ++ ++#define VKI_SA_NOMASK VKI_SA_NODEFER ++#define VKI_SA_ONESHOT VKI_SA_RESETHAND ++ ++#define VKI_SIG_BLOCK 0 /* for blocking signals */ ++#define VKI_SIG_UNBLOCK 1 /* for unblocking signals */ ++#define VKI_SIG_SETMASK 2 /* for setting the signal mask */ ++ ++typedef void __vki_signalfn_t(int); ++typedef __vki_signalfn_t __user *__vki_sighandler_t; ++ ++typedef void __vki_restorefn_t(void); ++typedef __vki_restorefn_t __user *__vki_igrestore_t; ++ ++#define VKI_SIG_DFL ((__vki_sighandler_t)0) /* default signal handling */ ++#define VKI_SIG_IGN ((__vki_sighandler_t)1) /* ignore signal */ ++#define VKI_SIG_ERR ((__vki_sighandler_t)-1) /* error return from signal */ ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/signal.h ++//---------------------------------------------------------------------- ++ ++#define _VKI_NSIG 64 ++#define _VKI_NSIG_BPW 64 // __BITS_PER_LONG == 64 ++#define _VKI_NSIG_WORDS (_VKI_NSIG / _VKI_NSIG_BPW) ++ ++#define VKI_SIGHUP 1 ++#define VKI_SIGINT 2 ++#define VKI_SIGQUIT 3 ++#define VKI_SIGILL 4 ++#define VKI_SIGTRAP 5 ++#define VKI_SIGABRT 6 ++#define VKI_SIGIOT 6 ++#define VKI_SIGBUS 7 ++#define VKI_SIGFPE 8 ++#define VKI_SIGKILL 9 ++#define VKI_SIGUSR1 10 ++#define VKI_SIGSEGV 11 ++#define VKI_SIGUSR2 12 ++#define VKI_SIGPIPE 13 ++#define VKI_SIGALRM 14 ++#define VKI_SIGTERM 15 ++#define VKI_SIGSTKFLT 16 ++#define VKI_SIGCHLD 17 ++#define VKI_SIGCONT 18 ++#define VKI_SIGSTOP 19 ++#define VKI_SIGTSTP 20 ++#define VKI_SIGTTIN 21 ++#define VKI_SIGTTOU 22 ++#define VKI_SIGURG 23 ++#define VKI_SIGXCPU 24 ++#define VKI_SIGXFSZ 25 ++#define VKI_SIGVTALRM 26 ++#define VKI_SIGPROF 27 ++#define VKI_SIGWINCH 28 ++#define VKI_SIGIO 29 ++#define VKI_SIGPOLL VKI_SIGIO ++/* ++#define VKI_SIGLOST 29 ++*/ ++#define VKI_SIGPWR 30 ++#define VKI_SIGSYS 31 ++#define VKI_SIGUNUSED 31 ++ ++#define VKI_SIGRTMIN 32 ++#define VKI_SIGRTMAX _VKI_NSIG ++ ++typedef struct { ++ unsigned long sig[_VKI_NSIG_WORDS]; ++} vki_sigset_t; ++ ++typedef unsigned long vki_old_sigset_t; ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/linux/signal.h ++//---------------------------------------------------------------------- ++ ++#define VKI_SS_ONSTACK 1 ++#define VKI_SS_DISABLE 2 ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/linux/signal_types.h ++//---------------------------------------------------------------------- ++ ++struct vki_sigaction { ++ __vki_sighandler_t sa_handler; ++ unsigned long sa_flags; ++ vki_sigset_t sa_mask; /* mask last for extensibility */ ++}; ++ ++struct vki_sigaction_base { ++ // [[Nb: a 'k' prefix is added to "sa_handler" because ++ // bits/sigaction.h (which gets dragged in somehow via signal.h) ++ // #defines it as something else. Since that is done for glibc's ++ // purposes, which we don't care about here, we use our own name.]] ++ __vki_sighandler_t ksa_handler; ++ unsigned long sa_flags; ++ vki_sigset_t sa_mask; /* mask last for extensibility */ ++}; ++ ++/* On Linux we use the same type for passing sigactions to ++ and from the kernel. Hence: */ ++typedef struct vki_sigaction_base vki_sigaction_toK_t; ++typedef struct vki_sigaction_base vki_sigaction_fromK_t; ++ ++typedef struct vki_sigaltstack { ++ void __user *ss_sp; ++ int ss_flags; ++ __vki_kernel_size_t ss_size; ++} vki_stack_t; ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/arch/loongarch/include/uapi/asm/sigcontext.h ++//---------------------------------------------------------------------- ++ ++struct vki_sigcontext { ++ __vki_u64 sc_pc; ++ __vki_u64 sc_regs[32]; ++ __vki_u32 sc_flags; ++ __vki_u64 sc_extcontext[0] __attribute__((__aligned__(16))); ++}; ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/linux/mman.h ++//---------------------------------------------------------------------- ++ ++#define VKI_MAP_SHARED 0x01 /* Share changes */ ++#define VKI_MAP_PRIVATE 0x02 /* Changes are private */ ++#define VKI_MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/mman-common.h ++//---------------------------------------------------------------------- ++ ++#define VKI_PROT_READ 0x1 /* page can be read */ ++#define VKI_PROT_WRITE 0x2 /* page can be written */ ++#define VKI_PROT_EXEC 0x4 /* page can be executed */ ++#define VKI_PROT_SEM 0x8 /* page may be used for atomic ops */ ++/* 0x10 reserved for arch-specific use */ ++/* 0x20 reserved for arch-specific use */ ++#define VKI_PROT_NONE 0x0 /* page can not be accessed */ ++#define VKI_PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ ++#define VKI_PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ ++ ++/* 0x01 - 0x03 are defined in linux/mman.h */ ++#define VKI_MAP_TYPE 0x0f /* Mask for type of mapping */ ++#define VKI_MAP_FIXED 0x10 /* Interpret addr exactly */ ++#define VKI_MAP_ANONYMOUS 0x20 /* don't use a file */ ++ ++/* 0x0100 - 0x4000 flags are defined in asm-generic/mman.h */ ++#define VKI_MAP_POPULATE 0x008000 /* populate (prefault) pagetables */ ++#define VKI_MAP_NONBLOCK 0x010000 /* do not block on IO */ ++#define VKI_MAP_STACK 0x020000 /* give out an address that is best suited for process/thread stacks */ ++#define VKI_MAP_HUGETLB 0x040000 /* create a huge page mapping */ ++#define VKI_MAP_SYNC 0x080000 /* perform synchronous page faults for the mapping */ ++#define VKI_MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ ++ ++#define VKI_MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be uninitialized */ ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/fcntl.h ++//---------------------------------------------------------------------- ++ ++#define VKI_O_ACCMODE 00000003 ++#define VKI_O_RDONLY 00000000 ++#define VKI_O_WRONLY 00000001 ++#define VKI_O_RDWR 00000002 ++#define VKI_O_CREAT 00000100 /* not fcntl */ ++#define VKI_O_EXCL 00000200 /* not fcntl */ ++#define VKI_O_NOCTTY 00000400 /* not fcntl */ ++#define VKI_O_TRUNC 00001000 /* not fcntl */ ++#define VKI_O_APPEND 00002000 ++#define VKI_O_NONBLOCK 00004000 ++#define VKI_O_DSYNC 00010000 /* used to be O_SYNC, see below */ ++#define VKI_FASYNC 00020000 /* fcntl, for BSD compatibility */ ++#define VKI_O_DIRECT 00040000 /* direct disk access hint */ ++#define VKI_O_LARGEFILE 00100000 ++ ++#define VKI_F_DUPFD 0 /* dup */ ++#define VKI_F_GETFD 1 /* get close_on_exec */ ++#define VKI_F_SETFD 2 /* set/clear close_on_exec */ ++#define VKI_F_GETFL 3 /* get file->f_flags */ ++#define VKI_F_SETFL 4 /* set file->f_flags */ ++#define VKI_F_GETLK 5 ++#define VKI_F_SETLK 6 ++#define VKI_F_SETLKW 7 ++#define VKI_F_SETOWN 8 /* for sockets. */ ++#define VKI_F_GETOWN 9 /* for sockets. */ ++#define VKI_F_SETSIG 10 /* for sockets. */ ++#define VKI_F_GETSIG 11 /* for sockets. */ ++ ++#define VKI_F_SETOWN_EX 15 ++#define VKI_F_GETOWN_EX 16 ++ ++#define VKI_F_GETOWNER_UIDS 17 ++ ++#define VKI_F_OFD_GETLK 36 ++#define VKI_F_OFD_SETLK 37 ++#define VKI_F_OFD_SETLKW 38 ++ ++#define VKI_F_OWNER_TID 0 ++#define VKI_F_OWNER_PID 1 ++#define VKI_F_OWNER_PGRP 2 ++ ++struct vki_f_owner_ex { ++ int type; ++ __vki_kernel_pid_t pid; ++}; ++ ++#define VKI_FD_CLOEXEC 1 /* actually anything with low bit set goes */ ++ ++#define VKI_F_RDLCK 0 ++#define VKI_F_WRLCK 1 ++#define VKI_F_UNLCK 2 ++ ++#define VKI_F_EXLCK 4 /* or 3 */ ++#define VKI_F_SHLCK 8 /* or 4 */ ++ ++#define VKI_LOCK_SH 1 /* shared lock */ ++#define VKI_LOCK_EX 2 /* exclusive lock */ ++#define VKI_LOCK_NB 4 /* or'd with one of the above to prevent blocking */ ++#define VKI_LOCK_UN 8 /* remove lock */ ++ ++#define VKI_LOCK_MAND 32 /* This is a mandatory flock ... */ ++#define VKI_LOCK_READ 64 /* which allows concurrent read operations */ ++#define VKI_LOCK_WRITE 128 /* which allows concurrent write operations */ ++#define VKI_LOCK_RW 192 /* which allows concurrent read & write ops */ ++ ++#define VKI_F_LINUX_SPECIFIC_BASE 1024 ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/linux/fcntl.h ++//---------------------------------------------------------------------- ++ ++#define VKI_AT_FDCWD -100 /* Special value used to indicate ++ openat should use the current ++ working directory. */ ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/resource.h ++//---------------------------------------------------------------------- ++ ++#define VKI_RLIMIT_DATA 2 /* max data size */ ++#define VKI_RLIMIT_STACK 3 /* max stack size */ ++#define VKI_RLIMIT_CORE 4 /* max core file size */ ++#define VKI_RLIMIT_NOFILE 7 /* max number of open files */ ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/socket.h ++//---------------------------------------------------------------------- ++ ++#define VKI_SOL_SOCKET 1 ++#define VKI_SO_TYPE 3 ++ ++#define VKI_SO_ATTACH_FILTER 26 ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/sockios.h ++//---------------------------------------------------------------------- ++ ++#define VKI_FIOSETOWN 0x8901 ++#define VKI_SIOCSPGRP 0x8902 ++#define VKI_FIOGETOWN 0x8903 ++#define VKI_SIOCGPGRP 0x8904 ++#define VKI_SIOCATMARK 0x8905 ++#define VKI_SIOCGSTAMP_OLD 0x8906 /* Get stamp (timeval) */ ++#define VKI_SIOCGSTAMPNS_OLD 0x8907 /* Get stamp (timespec) */ ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/linux/sockios.h ++//---------------------------------------------------------------------- ++ ++#define VKI_SIOCGSTAMP VKI_SIOCGSTAMP_OLD ++#define VKI_SIOCGSTAMPNS VKI_SIOCGSTAMPNS_OLD ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/stat.h ++//---------------------------------------------------------------------- ++ ++struct vki_stat { ++ unsigned long st_dev; /* Device. */ ++ unsigned long st_ino; /* File serial number. */ ++ unsigned int st_mode; /* File mode. */ ++ unsigned int st_nlink; /* Link count. */ ++ unsigned int st_uid; /* User ID of the file's owner. */ ++ unsigned int st_gid; /* Group ID of the file's group. */ ++ unsigned long st_rdev; /* Device number, if device. */ ++ unsigned long __pad1; ++ long st_size; /* Size of file, in bytes. */ ++ int st_blksize; /* Optimal block size for I/O. */ ++ int __pad2; ++ long st_blocks; /* Number 512-byte blocks allocated. */ ++ long st_atime; /* Time of last access. */ ++ unsigned long st_atime_nsec; ++ long st_mtime; /* Time of last modification. */ ++ unsigned long st_mtime_nsec; ++ long st_ctime; /* Time of last status change. */ ++ unsigned long st_ctime_nsec; ++ unsigned int __unused4; ++ unsigned int __unused5; ++}; ++ ++struct vki_stat64 { ++ unsigned long long st_dev; /* Device. */ ++ unsigned long long st_ino; /* File serial number. */ ++ unsigned int st_mode; /* File mode. */ ++ unsigned int st_nlink; /* Link count. */ ++ unsigned int st_uid; /* User ID of the file's owner. */ ++ unsigned int st_gid; /* Group ID of the file's group. */ ++ unsigned long long st_rdev; /* Device number, if device. */ ++ unsigned long long __pad1; ++ long long st_size; /* Size of file, in bytes. */ ++ int st_blksize; /* Optimal block size for I/O. */ ++ int __pad2; ++ long long st_blocks; /* Number 512-byte blocks allocated. */ ++ int st_atime; /* Time of last access. */ ++ unsigned int st_atime_nsec; ++ int st_mtime; /* Time of last modification. */ ++ unsigned int st_mtime_nsec; ++ int st_ctime; /* Time of last status change. */ ++ unsigned int st_ctime_nsec; ++ unsigned int __unused4; ++ unsigned int __unused5; ++}; ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/statfs.h ++//---------------------------------------------------------------------- ++ ++#define __vki_statfs_word __vki_kernel_long_t ++ ++struct vki_statfs { ++ __vki_statfs_word f_type; ++ __vki_statfs_word f_bsize; ++ __vki_statfs_word f_blocks; ++ __vki_statfs_word f_bfree; ++ __vki_statfs_word f_bavail; ++ __vki_statfs_word f_files; ++ __vki_statfs_word f_ffree; ++ __vki_kernel_fsid_t f_fsid; ++ __vki_statfs_word f_namelen; ++ __vki_statfs_word f_frsize; ++ __vki_statfs_word f_flags; ++ __vki_statfs_word f_spare[4]; ++}; ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/termios.h ++//---------------------------------------------------------------------- ++ ++struct vki_winsize { ++ unsigned short ws_row; ++ unsigned short ws_col; ++ unsigned short ws_xpixel; ++ unsigned short ws_ypixel; ++}; ++ ++#define VKI_NCC 8 ++struct vki_termio { ++ unsigned short c_iflag; /* input mode flags */ ++ unsigned short c_oflag; /* output mode flags */ ++ unsigned short c_cflag; /* control mode flags */ ++ unsigned short c_lflag; /* local mode flags */ ++ unsigned char c_line; /* line discipline */ ++ unsigned char c_cc[VKI_NCC]; /* control characters */ ++}; ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/termbits.h ++//---------------------------------------------------------------------- ++ ++typedef unsigned char vki_cc_t; ++typedef unsigned int vki_speed_t; ++typedef unsigned int vki_tcflag_t; ++ ++#define VKI_NCCS 19 ++struct vki_termios { ++ vki_tcflag_t c_iflag; /* input mode flags */ ++ vki_tcflag_t c_oflag; /* output mode flags */ ++ vki_tcflag_t c_cflag; /* control mode flags */ ++ vki_tcflag_t c_lflag; /* local mode flags */ ++ vki_cc_t c_line; /* line discipline */ ++ vki_cc_t c_cc[VKI_NCCS]; /* control characters */ ++}; ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/ioctl.h ++//---------------------------------------------------------------------- ++ ++#define _VKI_IOC_NRBITS 8 ++#define _VKI_IOC_TYPEBITS 8 ++#define _VKI_IOC_SIZEBITS 14 ++#define _VKI_IOC_DIRBITS 2 ++ ++#define _VKI_IOC_NRMASK ((1 << _VKI_IOC_NRBITS)-1) ++#define _VKI_IOC_TYPEMASK ((1 << _VKI_IOC_TYPEBITS)-1) ++#define _VKI_IOC_SIZEMASK ((1 << _VKI_IOC_SIZEBITS)-1) ++#define _VKI_IOC_DIRMASK ((1 << _VKI_IOC_DIRBITS)-1) ++ ++#define _VKI_IOC_NRSHIFT 0 ++#define _VKI_IOC_TYPESHIFT (_VKI_IOC_NRSHIFT+_VKI_IOC_NRBITS) ++#define _VKI_IOC_SIZESHIFT (_VKI_IOC_TYPESHIFT+_VKI_IOC_TYPEBITS) ++#define _VKI_IOC_DIRSHIFT (_VKI_IOC_SIZESHIFT+_VKI_IOC_SIZEBITS) ++ ++#define _VKI_IOC_NONE 0U ++#define _VKI_IOC_WRITE 1U ++#define _VKI_IOC_READ 2U ++ ++#define _VKI_IOC(dir,type,nr,size) \ ++ (((dir) << _VKI_IOC_DIRSHIFT) | \ ++ ((type) << _VKI_IOC_TYPESHIFT) | \ ++ ((nr) << _VKI_IOC_NRSHIFT) | \ ++ ((size) << _VKI_IOC_SIZESHIFT)) ++ ++#define _VKI_IO(type,nr) _VKI_IOC(_VKI_IOC_NONE,(type),(nr),0) ++#define _VKI_IOR(type,nr,size) _VKI_IOC(_VKI_IOC_READ,(type),(nr),(_VKI_IOC_TYPECHECK(size))) ++#define _VKI_IOW(type,nr,size) _VKI_IOC(_VKI_IOC_WRITE,(type),(nr),(_VKI_IOC_TYPECHECK(size))) ++#define _VKI_IOWR(type,nr,size) _VKI_IOC(_VKI_IOC_READ|_VKI_IOC_WRITE,(type),(nr),(_VKI_IOC_TYPECHECK(size))) ++#define _VKI_IOR_BAD(type,nr,size) _VKI_IOC(_VKI_IOC_READ,(type),(nr),sizeof(size)) ++#define _VKI_IOW_BAD(type,nr,size) _VKI_IOC(_VKI_IOC_WRITE,(type),(nr),sizeof(size)) ++#define _VKI_IOWR_BAD(type,nr,size) _VKI_IOC(_VKI_IOC_READ|_VKI_IOC_WRITE,(type),(nr),sizeof(size)) ++ ++#define _VKI_IOC_DIR(nr) (((nr) >> _VKI_IOC_DIRSHIFT) & _VKI_IOC_DIRMASK) ++#define _VKI_IOC_TYPE(nr) (((nr) >> _VKI_IOC_TYPESHIFT) & _VKI_IOC_TYPEMASK) ++#define _VKI_IOC_NR(nr) (((nr) >> _VKI_IOC_NRSHIFT) & _VKI_IOC_NRMASK) ++#define _VKI_IOC_SIZE(nr) (((nr) >> _VKI_IOC_SIZESHIFT) & _VKI_IOC_SIZEMASK) ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/ioctls.h ++//---------------------------------------------------------------------- ++ ++#define VKI_TCGETS 0x5401 ++#define VKI_TCSETS 0x5402 ++#define VKI_TCSETSW 0x5403 ++#define VKI_TCSETSF 0x5404 ++#define VKI_TCGETA 0x5405 ++#define VKI_TCSETA 0x5406 ++#define VKI_TCSETAW 0x5407 ++#define VKI_TCSETAF 0x5408 ++#define VKI_TCSBRK 0x5409 ++#define VKI_TCXONC 0x540A ++#define VKI_TCFLSH 0x540B ++#define VKI_TIOCEXCL 0x540C ++#define VKI_TIOCNXCL 0x540D ++#define VKI_TIOCSCTTY 0x540E ++#define VKI_TIOCGPGRP 0x540F ++#define VKI_TIOCSPGRP 0x5410 ++#define VKI_TIOCOUTQ 0x5411 ++#define VKI_TIOCSTI 0x5412 ++#define VKI_TIOCGWINSZ 0x5413 ++#define VKI_TIOCSWINSZ 0x5414 ++#define VKI_TIOCMGET 0x5415 ++#define VKI_TIOCMBIS 0x5416 ++#define VKI_TIOCMBIC 0x5417 ++#define VKI_TIOCMSET 0x5418 ++#define VKI_TIOCGSOFTCAR 0x5419 ++#define VKI_TIOCSSOFTCAR 0x541A ++#define VKI_FIONREAD 0x541B ++#define VKI_TIOCINQ VKI_FIONREAD ++#define VKI_TIOCLINUX 0x541C ++#define VKI_TIOCCONS 0x541D ++#define VKI_TIOCGSERIAL 0x541E ++#define VKI_TIOCSSERIAL 0x541F ++#define VKI_TIOCPKT 0x5420 ++#define VKI_FIONBIO 0x5421 ++#define VKI_TIOCNOTTY 0x5422 ++#define VKI_TIOCSETD 0x5423 ++#define VKI_TIOCGETD 0x5424 ++#define VKI_TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ ++#define VKI_TIOCSBRK 0x5427 /* BSD compatibility */ ++#define VKI_TIOCCBRK 0x5428 /* BSD compatibility */ ++#define VKI_TIOCGSID 0x5429 /* Return the session ID of FD */ ++#define VKI_TCGETS2 _VKI_IOR('T', 0x2A, struct termios2) ++#define VKI_TCSETS2 _VKI_IOW('T', 0x2B, struct termios2) ++#define VKI_TCSETSW2 _VKI_IOW('T', 0x2C, struct termios2) ++#define VKI_TCSETSF2 _VKI_IOW('T', 0x2D, struct termios2) ++#define VKI_TIOCGRS485 0x542E ++#define VKI_TIOCSRS485 0x542F ++#define VKI_TIOCGPTN _VKI_IOR('T', 0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ ++#define VKI_TIOCSPTLCK _VKI_IOW('T', 0x31, int) /* Lock/unlock Pty */ ++#define VKI_TIOCGDEV _VKI_IOR('T', 0x32, unsigned int) /* Get primary device node of /dev/console */ ++#define VKI_TCGETX 0x5432 /* SYS5 TCGETX compatibility */ ++#define VKI_TCSETX 0x5433 ++#define VKI_TCSETXF 0x5434 ++#define VKI_TCSETXW 0x5435 ++#define VKI_TIOCSIG _VKI_IOW('T', 0x36, int) /* pty: generate signal */ ++#define VKI_TIOCVHANGUP 0x5437 ++#define VKI_TIOCGPKT _VKI_IOR('T', 0x38, int) /* Get packet mode state */ ++#define VKI_TIOCGPTLCK _VKI_IOR('T', 0x39, int) /* Get Pty lock state */ ++#define VKI_TIOCGEXCL _VKI_IOR('T', 0x40, int) /* Get exclusive mode state */ ++#define VKI_TIOCGPTPEER _VKI_IO('T', 0x41) /* Safely open the slave */ ++#define VKI_TIOCGISO7816 _VKI_IOR('T', 0x42, struct serial_iso7816) ++#define VKI_TIOCSISO7816 _VKI_IOWR('T', 0x43, struct serial_iso7816) ++ ++#define VKI_FIONCLEX 0x5450 ++#define VKI_FIOCLEX 0x5451 ++#define VKI_FIOASYNC 0x5452 ++#define VKI_TIOCSERCONFIG 0x5453 ++#define VKI_TIOCSERGWILD 0x5454 ++#define VKI_TIOCSERSWILD 0x5455 ++#define VKI_TIOCGLCKTRMIOS 0x5456 ++#define VKI_TIOCSLCKTRMIOS 0x5457 ++#define VKI_TIOCSERGSTRUCT 0x5458 /* For debugging only */ ++#define VKI_TIOCSERGETLSR 0x5459 /* Get line status register */ ++#define VKI_TIOCSERGETMULTI 0x545A /* Get multiport config */ ++#define VKI_TIOCSERSETMULTI 0x545B /* Set multiport config */ ++ ++#define VKI_TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ ++#define VKI_TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ ++ ++#define VKI_FIOQSIZE 0x5460 ++ ++#define VKI_TIOCPKT_DATA 0 ++#define VKI_TIOCPKT_FLUSHREAD 1 ++#define VKI_TIOCPKT_FLUSHWRITE 2 ++#define VKI_TIOCPKT_STOP 4 ++#define VKI_TIOCPKT_START 8 ++#define VKI_TIOCPKT_NOSTOP 16 ++#define VKI_TIOCPKT_DOSTOP 32 ++#define VKI_TIOCPKT_IOCTL 64 ++ ++#define VKI_TIOCSER_TEMT 0x01 /* Transmitter physically empty */ ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/poll.h ++//---------------------------------------------------------------------- ++ ++#define VKI_POLLIN 0x0001 ++ ++struct vki_pollfd { ++ int fd; ++ short events; ++ short revents; ++}; ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/arch/loongarch/include/asm/elf.h ++//---------------------------------------------------------------------- ++ ++#define VKI_ELF_NGREG 45 ++#define VKI_ELF_NFPREG 34 ++ ++typedef unsigned long vki_elf_greg_t; ++typedef vki_elf_greg_t vki_elf_gregset_t[VKI_ELF_NGREG]; ++ ++typedef double vki_elf_fpreg_t; ++typedef vki_elf_fpreg_t vki_elf_fpregset_t[VKI_ELF_NFPREG]; ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/arch/loongarch/include/uapi/asm/ucontext.h ++//---------------------------------------------------------------------- ++ ++struct vki_ucontext { ++ unsigned long uc_flags; ++ struct vki_ucontext *uc_link; ++ vki_stack_t uc_stack; ++ vki_sigset_t uc_sigmask; ++ __vki_u8 __unused[1024 / 8 - sizeof(vki_sigset_t)]; ++ struct vki_sigcontext uc_mcontext; ++}; ++ ++typedef char vki_modify_ldt_t; ++ ++ ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/ipcbuf.h ++//---------------------------------------------------------------------- ++ ++struct vki_ipc64_perm { ++ __vki_kernel_key_t key; ++ __vki_kernel_uid32_t uid; ++ __vki_kernel_gid32_t gid; ++ __vki_kernel_uid32_t cuid; ++ __vki_kernel_gid32_t cgid; ++ __vki_kernel_mode_t mode; ++ unsigned char __pad1[4 - sizeof(__vki_kernel_mode_t)]; /* pad if mode_t is u16: */ ++ unsigned short seq; ++ unsigned short __pad2; ++ __vki_kernel_ulong_t __unused1; ++ __vki_kernel_ulong_t __unused2; ++}; ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/sembuf.h ++//---------------------------------------------------------------------- ++struct vki_semid64_ds { ++ struct vki_ipc64_perm sem_perm; /* permissions .. see ipc.h */ ++ long sem_otime; /* last semop time */ ++ long sem_ctime; /* last change time */ ++ unsigned long sem_nsems; /* no. of semaphores in array */ ++ unsigned long __unused3; ++ unsigned long __unused4; ++}; ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/msgbuf.h ++//---------------------------------------------------------------------- ++ ++struct vki_msqid64_ds { ++ struct vki_ipc64_perm msg_perm; ++ long msg_stime; /* last msgsnd time */ ++ long msg_rtime; /* last msgrcv time */ ++ long msg_ctime; /* last change time */ ++ unsigned long msg_cbytes; /* current number of bytes on queue */ ++ unsigned long msg_qnum; /* number of messages in queue */ ++ unsigned long msg_qbytes; /* max number of bytes on queue */ ++ __vki_kernel_pid_t msg_lspid; /* pid of last msgsnd */ ++ __vki_kernel_pid_t msg_lrpid; /* last receive pid */ ++ unsigned long __unused4; ++ unsigned long __unused5; ++}; ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/shmbuf.h ++//---------------------------------------------------------------------- ++ ++struct vki_shmid64_ds { ++ struct vki_ipc64_perm shm_perm; /* operation perms */ ++ vki_size_t shm_segsz; /* size of segment (bytes) */ ++ long shm_atime; /* last attach time */ ++ long shm_dtime; /* last detach time */ ++ long shm_ctime; /* last change time */ ++ __vki_kernel_pid_t shm_cpid; /* pid of creator */ ++ __vki_kernel_pid_t shm_lpid; /* pid of last operator */ ++ unsigned long shm_nattch; /* no. of current attaches */ ++ unsigned long __unused4; ++ unsigned long __unused5; ++}; ++ ++struct vki_shminfo64 { ++ unsigned long shmmax; ++ unsigned long shmmin; ++ unsigned long shmmni; ++ unsigned long shmseg; ++ unsigned long shmall; ++ unsigned long __unused1; ++ unsigned long __unused2; ++ unsigned long __unused3; ++ unsigned long __unused4; ++}; ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/arch/loongarch/include/uapi/asm/ptrace.h ++//---------------------------------------------------------------------- ++ ++struct vki_user_pt_regs { ++ /* Saved main processor registers. */ ++ unsigned long regs[32]; ++ ++ /* Original syscall arg0. */ ++ unsigned long orig_a0; ++ ++ /* Saved special registers. */ ++ unsigned long csr_era; ++ unsigned long csr_badv; ++ unsigned long reserved[10]; ++} __attribute__((aligned(8))); ++ ++#define vki_user_regs_struct vki_user_pt_regs ++ ++struct vki_user_fp_state { ++ vki_uint64_t fpr[32]; ++ vki_uint64_t fcc; ++ vki_uint32_t fcsr; ++}; ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/errno.h ++//---------------------------------------------------------------------- ++ ++#define VKI_ENOSYS 38 /* Invalid system call number */ ++#define VKI_EOVERFLOW 75 /* Value too large for defined data type */ ++ ++#endif // __VKI_LOONGARCH64_LINUX_H ++ ++/*--------------------------------------------------------------------*/ ++/*--- end vki-loongarch64-linux.h ---*/ ++/*--------------------------------------------------------------------*/ +diff --git a/include/vki/vki-posixtypes-loongarch64-linux.h b/include/vki/vki-posixtypes-loongarch64-linux.h +new file mode 100644 +index 000000000..0282a2a39 +--- /dev/null ++++ b/include/vki/vki-posixtypes-loongarch64-linux.h +@@ -0,0 +1,76 @@ ++ ++/*--------------------------------------------------------------------*/ ++/*--- loongarch/Linux-specific kernel interface: posix types. ---*/ ++/*--- vki-posixtypes-loongarch64-linux.h ---*/ ++/*--------------------------------------------------------------------*/ ++ ++/* ++ This file is part of Valgrind, a dynamic binary instrumentation ++ framework. ++ ++ Copyright (C) 2021-2022 Loongson Technology Corporation Limited ++ ++ This program is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License as ++ published by the Free Software Foundation; either version 2 of the ++ License, or (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, see . ++ ++ The GNU General Public License is contained in the file COPYING. ++*/ ++ ++#ifndef __VKI_POSIXTYPES_LOONGARCH64_LINUX_H ++#define __VKI_POSIXTYPES_LOONGARCH64_LINUX_H ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/posix_types.h ++//---------------------------------------------------------------------- ++ ++typedef long __vki_kernel_long_t; ++typedef unsigned long __vki_kernel_ulong_t; ++typedef __vki_kernel_ulong_t __vki_kernel_ino_t; ++typedef unsigned int __vki_kernel_mode_t; ++typedef int __vki_kernel_pid_t; ++typedef int __vki_kernel_ipc_pid_t; ++typedef unsigned int __vki_kernel_uid_t; ++typedef unsigned int __vki_kernel_gid_t; ++typedef __vki_kernel_long_t __vki_kernel_suseconds_t; ++typedef int __vki_kernel_daddr_t; ++typedef unsigned int __vki_kernel_uid32_t; ++typedef unsigned int __vki_kernel_gid32_t; ++typedef __vki_kernel_uid_t __vki_kernel_old_uid_t; ++typedef __vki_kernel_gid_t __vki_kernel_old_gid_t; ++typedef unsigned int __vki_kernel_old_dev_t; ++ ++typedef __vki_kernel_ulong_t __vki_kernel_size_t; ++typedef __vki_kernel_long_t __vki_kernel_ssize_t; ++typedef __vki_kernel_long_t __vki_kernel_ptrdiff_t; ++ ++typedef struct { ++ int val[2]; ++} __vki_kernel_fsid_t; ++ ++typedef __vki_kernel_long_t __vki_kernel_off_t; ++typedef long long __vki_kernel_loff_t; ++typedef __vki_kernel_long_t __vki_kernel_old_time_t; ++typedef __vki_kernel_long_t __vki_kernel_time_t; ++typedef long long __vki_kernel_time64_t; ++typedef __vki_kernel_long_t __vki_kernel_clock_t; ++typedef int __vki_kernel_timer_t; ++typedef int __vki_kernel_clockid_t; ++typedef char * __vki_kernel_caddr_t; ++typedef unsigned short __vki_kernel_uid16_t; ++typedef unsigned short __vki_kernel_gid16_t; ++ ++#endif // __VKI_POSIXTYPES_LOONGARCH64_LINUX_H ++ ++/*--------------------------------------------------------------------*/ ++/*--- end vki-posixtypes-loongarch64-linux.h ---*/ ++/*--------------------------------------------------------------------*/ +diff --git a/include/vki/vki-scnums-loongarch64-linux.h b/include/vki/vki-scnums-loongarch64-linux.h +new file mode 100644 +index 000000000..ed3ef7e43 +--- /dev/null ++++ b/include/vki/vki-scnums-loongarch64-linux.h +@@ -0,0 +1,333 @@ ++/*--------------------------------------------------------------------*/ ++/*--- System call numbers for loongarch-linux. ---*/ ++/*--- vki-scnums-loongarch64-linux.h ---*/ ++/*--------------------------------------------------------------------*/ ++ ++/* ++ This file is part of Valgrind, a dynamic binary instrumentation ++ framework. ++ ++ Copyright (C) 2021-2022 Loongson Technology Corporation Limited ++ ++ This program is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License as ++ published by the Free Software Foundation; either version 2 of the ++ License, or (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, see . ++ ++ The GNU General Public License is contained in the file COPYING. ++*/ ++ ++#ifndef __VKI_SCNUMS_LOONGARCH64_LINUX_H ++#define __VKI_SCNUMS_LOONGARCH64_LINUX_H ++ ++//---------------------------------------------------------------------- ++// From linux-5.19-rc1/include/uapi/asm-generic/unistd.h ++//---------------------------------------------------------------------- ++ ++#define __NR_io_setup 0 ++#define __NR_io_destroy 1 ++#define __NR_io_submit 2 ++#define __NR_io_cancel 3 ++#define __NR_io_getevents 4 ++#define __NR_setxattr 5 ++#define __NR_lsetxattr 6 ++#define __NR_fsetxattr 7 ++#define __NR_getxattr 8 ++#define __NR_lgetxattr 9 ++#define __NR_fgetxattr 10 ++#define __NR_listxattr 11 ++#define __NR_llistxattr 12 ++#define __NR_flistxattr 13 ++#define __NR_removexattr 14 ++#define __NR_lremovexattr 15 ++#define __NR_fremovexattr 16 ++#define __NR_getcwd 17 ++#define __NR_lookup_dcookie 18 ++#define __NR_eventfd2 19 ++#define __NR_epoll_create1 20 ++#define __NR_epoll_ctl 21 ++#define __NR_epoll_pwait 22 ++#define __NR_dup 23 ++#define __NR_dup3 24 ++#define __NR3264_fcntl 25 ++#define __NR_inotify_init1 26 ++#define __NR_inotify_add_watch 27 ++#define __NR_inotify_rm_watch 28 ++#define __NR_ioctl 29 ++#define __NR_ioprio_set 30 ++#define __NR_ioprio_get 31 ++#define __NR_flock 32 ++#define __NR_mknodat 33 ++#define __NR_mkdirat 34 ++#define __NR_unlinkat 35 ++#define __NR_symlinkat 36 ++#define __NR_linkat 37 ++// #define __NR_renameat 38 ++#define __NR_umount2 39 ++#define __NR_mount 40 ++#define __NR_pivot_root 41 ++#define __NR_nfsservctl 42 ++#define __NR3264_statfs 43 ++#define __NR3264_fstatfs 44 ++#define __NR3264_truncate 45 ++#define __NR3264_ftruncate 46 ++#define __NR_fallocate 47 ++#define __NR_faccessat 48 ++#define __NR_chdir 49 ++#define __NR_fchdir 50 ++#define __NR_chroot 51 ++#define __NR_fchmod 52 ++#define __NR_fchmodat 53 ++#define __NR_fchownat 54 ++#define __NR_fchown 55 ++#define __NR_openat 56 ++#define __NR_close 57 ++#define __NR_vhangup 58 ++#define __NR_pipe2 59 ++#define __NR_quotactl 60 ++#define __NR_getdents64 61 ++#define __NR3264_lseek 62 ++#define __NR_read 63 ++#define __NR_write 64 ++#define __NR_readv 65 ++#define __NR_writev 66 ++#define __NR_pread64 67 ++#define __NR_pwrite64 68 ++#define __NR_preadv 69 ++#define __NR_pwritev 70 ++#define __NR3264_sendfile 71 ++#define __NR_pselect6 72 ++#define __NR_ppoll 73 ++#define __NR_signalfd4 74 ++#define __NR_vmsplice 75 ++#define __NR_splice 76 ++#define __NR_tee 77 ++#define __NR_readlinkat 78 ++// #define __NR3264_fstatat 79 ++// #define __NR3264_fstat 80 ++#define __NR_sync 81 ++#define __NR_fsync 82 ++#define __NR_fdatasync 83 ++#define __NR_sync_file_range 84 ++#define __NR_timerfd_create 85 ++#define __NR_timerfd_settime 86 ++#define __NR_timerfd_gettime 87 ++#define __NR_utimensat 88 ++#define __NR_acct 89 ++#define __NR_capget 90 ++#define __NR_capset 91 ++#define __NR_personality 92 ++#define __NR_exit 93 ++#define __NR_exit_group 94 ++#define __NR_waitid 95 ++#define __NR_set_tid_address 96 ++#define __NR_unshare 97 ++#define __NR_futex 98 ++#define __NR_set_robust_list 99 ++#define __NR_get_robust_list 100 ++#define __NR_nanosleep 101 ++#define __NR_getitimer 102 ++#define __NR_setitimer 103 ++#define __NR_kexec_load 104 ++#define __NR_init_module 105 ++#define __NR_delete_module 106 ++#define __NR_timer_create 107 ++#define __NR_timer_gettime 108 ++#define __NR_timer_getoverrun 109 ++#define __NR_timer_settime 110 ++#define __NR_timer_delete 111 ++#define __NR_clock_settime 112 ++#define __NR_clock_gettime 113 ++#define __NR_clock_getres 114 ++#define __NR_clock_nanosleep 115 ++#define __NR_syslog 116 ++#define __NR_ptrace 117 ++#define __NR_sched_setparam 118 ++#define __NR_sched_setscheduler 119 ++#define __NR_sched_getscheduler 120 ++#define __NR_sched_getparam 121 ++#define __NR_sched_setaffinity 122 ++#define __NR_sched_getaffinity 123 ++#define __NR_sched_yield 124 ++#define __NR_sched_get_priority_max 125 ++#define __NR_sched_get_priority_min 126 ++#define __NR_sched_rr_get_interval 127 ++#define __NR_restart_syscall 128 ++#define __NR_kill 129 ++#define __NR_tkill 130 ++#define __NR_tgkill 131 ++#define __NR_sigaltstack 132 ++#define __NR_rt_sigsuspend 133 ++#define __NR_rt_sigaction 134 ++#define __NR_rt_sigprocmask 135 ++#define __NR_rt_sigpending 136 ++#define __NR_rt_sigtimedwait 137 ++#define __NR_rt_sigqueueinfo 138 ++#define __NR_rt_sigreturn 139 ++#define __NR_setpriority 140 ++#define __NR_getpriority 141 ++#define __NR_reboot 142 ++#define __NR_setregid 143 ++#define __NR_setgid 144 ++#define __NR_setreuid 145 ++#define __NR_setuid 146 ++#define __NR_setresuid 147 ++#define __NR_getresuid 148 ++#define __NR_setresgid 149 ++#define __NR_getresgid 150 ++#define __NR_setfsuid 151 ++#define __NR_setfsgid 152 ++#define __NR_times 153 ++#define __NR_setpgid 154 ++#define __NR_getpgid 155 ++#define __NR_getsid 156 ++#define __NR_setsid 157 ++#define __NR_getgroups 158 ++#define __NR_setgroups 159 ++#define __NR_uname 160 ++#define __NR_sethostname 161 ++#define __NR_setdomainname 162 ++// #define __NR_getrlimit 163 ++// #define __NR_setrlimit 164 ++#define __NR_getrusage 165 ++#define __NR_umask 166 ++#define __NR_prctl 167 ++#define __NR_getcpu 168 ++#define __NR_gettimeofday 169 ++#define __NR_settimeofday 170 ++#define __NR_adjtimex 171 ++#define __NR_getpid 172 ++#define __NR_getppid 173 ++#define __NR_getuid 174 ++#define __NR_geteuid 175 ++#define __NR_getgid 176 ++#define __NR_getegid 177 ++#define __NR_gettid 178 ++#define __NR_sysinfo 179 ++#define __NR_mq_open 180 ++#define __NR_mq_unlink 181 ++#define __NR_mq_timedsend 182 ++#define __NR_mq_timedreceive 183 ++#define __NR_mq_notify 184 ++#define __NR_mq_getsetattr 185 ++#define __NR_msgget 186 ++#define __NR_msgctl 187 ++#define __NR_msgrcv 188 ++#define __NR_msgsnd 189 ++#define __NR_semget 190 ++#define __NR_semctl 191 ++#define __NR_semtimedop 192 ++#define __NR_semop 193 ++#define __NR_shmget 194 ++#define __NR_shmctl 195 ++#define __NR_shmat 196 ++#define __NR_shmdt 197 ++#define __NR_socket 198 ++#define __NR_socketpair 199 ++#define __NR_bind 200 ++#define __NR_listen 201 ++#define __NR_accept 202 ++#define __NR_connect 203 ++#define __NR_getsockname 204 ++#define __NR_getpeername 205 ++#define __NR_sendto 206 ++#define __NR_recvfrom 207 ++#define __NR_setsockopt 208 ++#define __NR_getsockopt 209 ++#define __NR_shutdown 210 ++#define __NR_sendmsg 211 ++#define __NR_recvmsg 212 ++#define __NR_readahead 213 ++#define __NR_brk 214 ++#define __NR_munmap 215 ++#define __NR_mremap 216 ++#define __NR_add_key 217 ++#define __NR_request_key 218 ++#define __NR_keyctl 219 ++#define __NR_clone 220 ++#define __NR_execve 221 ++#define __NR3264_mmap 222 ++#define __NR3264_fadvise64 223 ++#define __NR_swapon 224 ++#define __NR_swapoff 225 ++#define __NR_mprotect 226 ++#define __NR_msync 227 ++#define __NR_mlock 228 ++#define __NR_munlock 229 ++#define __NR_mlockall 230 ++#define __NR_munlockall 231 ++#define __NR_mincore 232 ++#define __NR_madvise 233 ++#define __NR_remap_file_pages 234 ++#define __NR_mbind 235 ++#define __NR_get_mempolicy 236 ++#define __NR_set_mempolicy 237 ++#define __NR_migrate_pages 238 ++#define __NR_move_pages 239 ++#define __NR_rt_tgsigqueueinfo 240 ++#define __NR_perf_event_open 241 ++#define __NR_accept4 242 ++#define __NR_recvmmsg 243 ++ ++#define __NR_wait4 260 ++#define __NR_prlimit64 261 ++#define __NR_fanotify_init 262 ++#define __NR_fanotify_mark 263 ++#define __NR_name_to_handle_at 264 ++#define __NR_open_by_handle_at 265 ++#define __NR_clock_adjtime 266 ++#define __NR_syncfs 267 ++#define __NR_setns 268 ++#define __NR_sendmmsg 269 ++#define __NR_process_vm_readv 270 ++#define __NR_process_vm_writev 271 ++#define __NR_kcmp 272 ++#define __NR_finit_module 273 ++#define __NR_sched_setattr 274 ++#define __NR_sched_getattr 275 ++#define __NR_renameat2 276 ++#define __NR_seccomp 277 ++#define __NR_getrandom 278 ++#define __NR_memfd_create 279 ++#define __NR_bpf 280 ++#define __NR_execveat 281 ++#define __NR_userfaultfd 282 ++#define __NR_membarrier 283 ++#define __NR_mlock2 284 ++#define __NR_copy_file_range 285 ++#define __NR_preadv2 286 ++#define __NR_pwritev2 287 ++#define __NR_pkey_mprotect 288 ++#define __NR_pkey_alloc 289 ++#define __NR_pkey_free 290 ++#define __NR_statx 291 ++#define __NR_io_pgetevents 292 ++#define __NR_rseq 293 ++#define __NR_kexec_file_load 294 ++#define __NR_pidfd_getfd 438 ++#define __NR_epoll_pwait2 441 ++#define __NR_fchmodat2 452 ++ ++#define __NR_fcntl __NR3264_fcntl ++#define __NR_statfs __NR3264_statfs ++#define __NR_fstatfs __NR3264_fstatfs ++#define __NR_truncate __NR3264_truncate ++#define __NR_ftruncate __NR3264_ftruncate ++#define __NR_lseek __NR3264_lseek ++#define __NR_sendfile __NR3264_sendfile ++#define __NR_mmap __NR3264_mmap ++#define __NR_fadvise64 __NR3264_fadvise64 ++ ++#endif /* __VKI_SCNUMS_LOONGARCH64_LINUX_H */ ++ ++/*--------------------------------------------------------------------*/ ++/*--- end vki-scnums-loongarch64-linux.h ---*/ ++/*--------------------------------------------------------------------*/ +diff --git a/include/vki/vki-scnums-shared-linux.h b/include/vki/vki-scnums-shared-linux.h +index d90cdd312..068a2cd12 100644 +--- a/include/vki/vki-scnums-shared-linux.h ++++ b/include/vki/vki-scnums-shared-linux.h +@@ -43,9 +43,13 @@ + #define __NR_clone3 435 + #define __NR_close_range 436 + #define __NR_openat2 437 +- ++#define __NR_pidfd_getfd 438 + #define __NR_faccessat2 439 + ++#define __NR_epoll_pwait2 441 ++ + #define __NR_memfd_secret 447 + ++#define __NR_fchmodat2 452 ++ + #endif +diff --git a/massif/tests/Makefile.am b/massif/tests/Makefile.am +index 84c9b1273..2dec57d1e 100644 +--- a/massif/tests/Makefile.am ++++ b/massif/tests/Makefile.am +@@ -11,6 +11,8 @@ EXTRA_DIST = \ + big-alloc.post.exp big-alloc.post.exp-64bit big-alloc.post.exp-ppc64 \ + big-alloc.stderr.exp big-alloc.vgtest \ + big-alloc.post.exp-x86-freebsd \ ++ big-alloc.post.exp-loongarch64 \ ++ bug469146.post.exp bug469146.stderr.exp bug469146.vgtest \ + deep-A.post.exp deep-A.stderr.exp deep-A.vgtest \ + deep-B.post.exp deep-B.stderr.exp deep-B.vgtest \ + deep-C.post.exp deep-C.stderr.exp deep-C.vgtest \ +diff --git a/massif/tests/big-alloc.post.exp-loongarch64 b/massif/tests/big-alloc.post.exp-loongarch64 +new file mode 100644 +index 000000000..0dd5671af +--- /dev/null ++++ b/massif/tests/big-alloc.post.exp-loongarch64 +@@ -0,0 +1,54 @@ ++-------------------------------------------------------------------------------- ++Command: ./big-alloc ++Massif arguments: --stacks=no --time-unit=B --massif-out-file=massif.out --ignore-fn=__part_load_locale --ignore-fn=__time_load_locale --ignore-fn=dwarf2_unwind_dyld_add_image_hook --ignore-fn=get_or_create_key_element ++ms_print arguments: massif.out ++-------------------------------------------------------------------------------- ++ ++ ++ MB ++100.2^ : ++ | : ++ | @@@@@@@: ++ | @ : ++ | :::::::@ : ++ | : @ : ++ | :::::::: @ : ++ | : : @ : ++ | :::::::: : @ : ++ | : : : @ : ++ | :::::::: : : @ : ++ | : : : : @ : ++ | ::::::::: : : : @ : ++ | : : : : : @ : ++ | :::::::: : : : : @ : ++ | : : : : : : @ : ++ | :::::::: : : : : : @ : ++ | : : : : : : : @ : ++ | :::::::: : : : : : : @ : ++ | : : : : : : : : @ : ++ 0 +----------------------------------------------------------------------->MB ++ 0 100.2 ++ ++Number of snapshots: 11 ++ Detailed snapshots: [9] ++ ++-------------------------------------------------------------------------------- ++ n time(B) total(B) useful-heap(B) extra-heap(B) stacks(B) ++-------------------------------------------------------------------------------- ++ 0 0 0 0 0 0 ++ 1 10,502,088 10,502,088 10,485,760 16,328 0 ++ 2 21,004,176 21,004,176 20,971,520 32,656 0 ++ 3 31,506,264 31,506,264 31,457,280 48,984 0 ++ 4 42,008,352 42,008,352 41,943,040 65,312 0 ++ 5 52,510,440 52,510,440 52,428,800 81,640 0 ++ 6 63,012,528 63,012,528 62,914,560 97,968 0 ++ 7 73,514,616 73,514,616 73,400,320 114,296 0 ++ 8 84,016,704 84,016,704 83,886,080 130,624 0 ++ 9 94,518,792 94,518,792 94,371,840 146,952 0 ++99.84% (94,371,840B) (heap allocation functions) malloc/new/new[], --alloc-fns, etc. ++->99.84% (94,371,840B) 0x........: main (big-alloc.c:12) ++ ++-------------------------------------------------------------------------------- ++ n time(B) total(B) useful-heap(B) extra-heap(B) stacks(B) ++-------------------------------------------------------------------------------- ++ 10 105,020,880 105,020,880 104,857,600 163,280 0 +diff --git a/memcheck/mc_machine.c b/memcheck/mc_machine.c +index 176c8e5cb..49f98948e 100644 +--- a/memcheck/mc_machine.c ++++ b/memcheck/mc_machine.c +@@ -1394,6 +1394,118 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB ) + offset,szB); + tl_assert(0); + # undef GOF ++# undef SZB ++ ++ /* ----------------- loongarch64 ----------------- */ ++ ++# elif defined(VGA_loongarch64) ++ ++# define GOF(_fieldname) \ ++ (offsetof(VexGuestLOONGARCH64State,guest_##_fieldname)) ++# define SZB(_fieldname) \ ++ (sizeof(((VexGuestLOONGARCH64State*)0)->guest_##_fieldname)) ++ ++ Int o = offset; ++ Int sz = szB; ++ Bool is48 = sz == 8 || sz == 4; ++ Bool is1248 = sz == 8 || sz == 4 || sz == 2 || sz == 1; ++ ++ tl_assert(sz > 0); ++ tl_assert(host_is_little_endian()); ++ ++ if (o == GOF(R0) && is1248) return o; ++ if (o == GOF(R1) && is1248) return o; ++ if (o == GOF(R2) && is1248) return o; ++ if (o == GOF(R3) && is1248) return o; ++ if (o == GOF(R4) && is1248) return o; ++ if (o == GOF(R5) && is1248) return o; ++ if (o == GOF(R6) && is1248) return o; ++ if (o == GOF(R7) && is1248) return o; ++ if (o == GOF(R8) && is1248) return o; ++ if (o == GOF(R9) && is1248) return o; ++ if (o == GOF(R10) && is1248) return o; ++ if (o == GOF(R11) && is1248) return o; ++ if (o == GOF(R12) && is1248) return o; ++ if (o == GOF(R13) && is1248) return o; ++ if (o == GOF(R14) && is1248) return o; ++ if (o == GOF(R15) && is1248) return o; ++ if (o == GOF(R16) && is1248) return o; ++ if (o == GOF(R17) && is1248) return o; ++ if (o == GOF(R18) && is1248) return o; ++ if (o == GOF(R19) && is1248) return o; ++ if (o == GOF(R20) && is1248) return o; ++ if (o == GOF(R21) && is1248) return o; ++ if (o == GOF(R22) && is1248) return o; ++ if (o == GOF(R23) && is1248) return o; ++ if (o == GOF(R24) && is1248) return o; ++ if (o == GOF(R25) && is1248) return o; ++ if (o == GOF(R26) && is1248) return o; ++ if (o == GOF(R27) && is1248) return o; ++ if (o == GOF(R28) && is1248) return o; ++ if (o == GOF(R29) && is1248) return o; ++ if (o == GOF(R30) && is1248) return o; ++ if (o == GOF(R31) && is1248) return o; ++ ++ if (o == GOF(PC) && sz == 8) return -1; /* slot unused */ ++ ++ if (o >= GOF(X0) && o + sz <= GOF(X0) + SZB(X0)) return GOF(X0); ++ if (o >= GOF(X1) && o + sz <= GOF(X1) + SZB(X1)) return GOF(X1); ++ if (o >= GOF(X2) && o + sz <= GOF(X2) + SZB(X2)) return GOF(X2); ++ if (o >= GOF(X3) && o + sz <= GOF(X3) + SZB(X3)) return GOF(X3); ++ if (o >= GOF(X4) && o + sz <= GOF(X4) + SZB(X4)) return GOF(X4); ++ if (o >= GOF(X5) && o + sz <= GOF(X5) + SZB(X5)) return GOF(X5); ++ if (o >= GOF(X6) && o + sz <= GOF(X6) + SZB(X6)) return GOF(X6); ++ if (o >= GOF(X7) && o + sz <= GOF(X7) + SZB(X7)) return GOF(X7); ++ if (o >= GOF(X8) && o + sz <= GOF(X8) + SZB(X8)) return GOF(X8); ++ if (o >= GOF(X9) && o + sz <= GOF(X9) + SZB(X9)) return GOF(X9); ++ if (o >= GOF(X10) && o + sz <= GOF(X10) + SZB(X10)) return GOF(X10); ++ if (o >= GOF(X11) && o + sz <= GOF(X11) + SZB(X11)) return GOF(X11); ++ if (o >= GOF(X12) && o + sz <= GOF(X12) + SZB(X12)) return GOF(X12); ++ if (o >= GOF(X13) && o + sz <= GOF(X13) + SZB(X13)) return GOF(X13); ++ if (o >= GOF(X14) && o + sz <= GOF(X14) + SZB(X14)) return GOF(X14); ++ if (o >= GOF(X15) && o + sz <= GOF(X15) + SZB(X15)) return GOF(X15); ++ if (o >= GOF(X16) && o + sz <= GOF(X16) + SZB(X16)) return GOF(X16); ++ if (o >= GOF(X17) && o + sz <= GOF(X17) + SZB(X17)) return GOF(X17); ++ if (o >= GOF(X18) && o + sz <= GOF(X18) + SZB(X18)) return GOF(X18); ++ if (o >= GOF(X19) && o + sz <= GOF(X19) + SZB(X19)) return GOF(X19); ++ if (o >= GOF(X20) && o + sz <= GOF(X20) + SZB(X20)) return GOF(X20); ++ if (o >= GOF(X21) && o + sz <= GOF(X21) + SZB(X21)) return GOF(X21); ++ if (o >= GOF(X22) && o + sz <= GOF(X22) + SZB(X22)) return GOF(X22); ++ if (o >= GOF(X23) && o + sz <= GOF(X23) + SZB(X23)) return GOF(X23); ++ if (o >= GOF(X24) && o + sz <= GOF(X24) + SZB(X24)) return GOF(X24); ++ if (o >= GOF(X25) && o + sz <= GOF(X25) + SZB(X25)) return GOF(X25); ++ if (o >= GOF(X26) && o + sz <= GOF(X26) + SZB(X26)) return GOF(X26); ++ if (o >= GOF(X27) && o + sz <= GOF(X27) + SZB(X27)) return GOF(X27); ++ if (o >= GOF(X28) && o + sz <= GOF(X28) + SZB(X28)) return GOF(X28); ++ if (o >= GOF(X29) && o + sz <= GOF(X29) + SZB(X29)) return GOF(X29); ++ if (o >= GOF(X30) && o + sz <= GOF(X30) + SZB(X30)) return GOF(X30); ++ if (o >= GOF(X31) && o + sz <= GOF(X31) + SZB(X31)) return GOF(X31); ++ ++ if (o == GOF(FCC0) && sz == 1) return -1; /* slot unused */ ++ if (o == GOF(FCC1) && sz == 1) return -1; /* slot unused */ ++ if (o == GOF(FCC2) && sz == 1) return -1; /* slot unused */ ++ if (o == GOF(FCC3) && sz == 1) return -1; /* slot unused */ ++ if (o == GOF(FCC4) && sz == 1) return -1; /* slot unused */ ++ if (o == GOF(FCC5) && sz == 1) return -1; /* slot unused */ ++ if (o == GOF(FCC6) && sz == 1) return -1; /* slot unused */ ++ if (o == GOF(FCC7) && sz == 1) return -1; /* slot unused */ ++ if (o == GOF(FCSR) && sz == 4) return -1; /* slot unused */ ++ ++ if (o == GOF(EMNOTE) && sz == 4) return -1; /* slot unused */ ++ ++ if (o == GOF(CMSTART) && sz == 8) return -1; /* slot unused */ ++ if (o == GOF(CMLEN) && sz == 8) return -1; /* slot unused */ ++ ++ if (o == GOF(NRADDR) && sz == 8) return -1; /* slot unused */ ++ ++ if (o == GOF(LLSC_SIZE) && sz == 8) return -1; /* slot unused */ ++ if (o == GOF(LLSC_ADDR) && sz == 8) return -1; /* slot unused */ ++ if (o == GOF(LLSC_DATA) && is48) return -1; /* slot unused */ ++ ++ VG_(printf)("MC_(get_otrack_shadow_offset)(loongarch64)(off=%d,sz=%d)\n", ++ offset,szB); ++ tl_assert(0); ++# undef GOF + # undef SZB + + # else +@@ -1517,6 +1629,13 @@ IRType MC_(get_otrack_reg_array_equiv_int_type) ( IRRegArray* arr ) + VG_(printf)("\n"); + tl_assert(0); + ++ /* ----------------- loongarch64 ----------------- */ ++# elif defined(VGA_loongarch64) ++ VG_(printf)("get_reg_array_equiv_int_type(loongarch64): unhandled: "); ++ ppIRRegArray(arr); ++ VG_(printf)("\n"); ++ tl_assert(0); ++ + # else + # error "FIXME: not implemented for this architecture" + # endif +diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c +index 72ccb3c8c..4e441f069 100644 +--- a/memcheck/mc_translate.c ++++ b/memcheck/mc_translate.c +@@ -3385,6 +3385,7 @@ IRAtom* expr2vbits_Triop ( MCEnv* mce, + case Iop_MulD64: + case Iop_MulF64r32: + case Iop_DivF64: ++ case Iop_ScaleBF64: + case Iop_DivD64: + case Iop_DivF64r32: + case Iop_ScaleF64: +@@ -3404,6 +3405,7 @@ IRAtom* expr2vbits_Triop ( MCEnv* mce, + case Iop_SubF32: + case Iop_MulF32: + case Iop_DivF32: ++ case Iop_ScaleBF32: + /* I32(rm) x F32 x F32 -> I32 */ + return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3); + case Iop_AddF16: +@@ -4410,6 +4412,8 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, + case Iop_TanF64: + case Iop_2xm1F64: + case Iop_SqrtF64: ++ case Iop_RSqrtF64: ++ case Iop_LogBF64: + case Iop_RecpExpF64: + /* I32(rm) x I64/F64 -> I64/F64 */ + return mkLazy2(mce, Ity_I64, vatom1, vatom2); +@@ -4471,6 +4475,8 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, + + case Iop_RoundF32toInt: + case Iop_SqrtF32: ++ case Iop_RSqrtF32: ++ case Iop_LogBF32: + case Iop_RecpExpF32: + /* I32(rm) x I32/F32 -> I32/F32 */ + return mkLazy2(mce, Ity_I32, vatom1, vatom2); +@@ -4553,11 +4559,15 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, + + case Iop_MaxNumF32: + case Iop_MinNumF32: ++ case Iop_MaxNumAbsF32: ++ case Iop_MinNumAbsF32: + /* F32 x F32 -> F32 */ + return mkLazy2(mce, Ity_I32, vatom1, vatom2); + + case Iop_MaxNumF64: + case Iop_MinNumF64: ++ case Iop_MaxNumAbsF64: ++ case Iop_MinNumAbsF64: + /* F64 x F64 -> F64 */ + return mkLazy2(mce, Ity_I64, vatom1, vatom2); + +@@ -8648,6 +8658,9 @@ IRSB* MC_(instrument) ( VgCallbackClosure* closure, + mce.dlbo.dl_CmpEQ64_CmpNE64 = DLexpensive; + # elif defined(VGA_arm) + mce.dlbo.dl_CmpEQ32_CmpNE32 = DLexpensive; ++# elif defined(VGA_loongarch64) ++ mce.dlbo.dl_CmpEQ32_CmpNE32 = DLexpensive; ++ mce.dlbo.dl_CmpEQ64_CmpNE64 = DLexpensive; + # endif + + /* preInstrumentationAnalysis() will allocate &mce.tmpHowUsed and then +diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am +index 9bbbe7bec..3f7931099 100644 +--- a/memcheck/tests/Makefile.am ++++ b/memcheck/tests/Makefile.am +@@ -50,6 +50,9 @@ endif + if VGCONF_PLATFORMS_INCLUDE_ARM64_LINUX + SUBDIRS += arm64-linux + endif ++if VGCONF_PLATFORMS_INCLUDE_LOONGARCH64_LINUX ++SUBDIRS += loongarch64-linux ++endif + if VGCONF_PLATFORMS_INCLUDE_X86_SOLARIS + SUBDIRS += x86-solaris + endif +@@ -65,7 +68,7 @@ endif + + DIST_SUBDIRS = x86 amd64 ppc32 ppc64 s390x linux \ + darwin solaris x86-linux amd64-linux arm64-linux \ +- x86-solaris amd64-solaris mips32 mips64 \ ++ loongarch64-linux x86-solaris amd64-solaris mips32 mips64 \ + freebsd amd64-freebsd x86-freebsd \ + common . + +diff --git a/memcheck/tests/atomic_incs.c b/memcheck/tests/atomic_incs.c +index 1c738c530..8c0055082 100644 +--- a/memcheck/tests/atomic_incs.c ++++ b/memcheck/tests/atomic_incs.c +@@ -245,6 +245,29 @@ __attribute__((noinline)) void atomic_add_8bit ( char* p, int n ) + ); + } while (block[2] != 1); + #endif ++#elif defined(VGA_loongarch64) ++ unsigned long long int block[3] ++ = { (unsigned long long int)(Addr)p, (unsigned long long int)n, 0x0ULL }; ++ do { ++ __asm__ __volatile__( ++ "move $t0, %0 \n\t" ++ "ld.d $t1, $t0, 0 \n\t" // p ++ "ld.d $t2, $t0, 8 \n\t" // n ++ "andi $t2, $t2, 0xff \n\t" // n = n & 0xff ++ "li.d $s0, 0xff \n\t" ++ "nor $s0, $s0, $zero \n\t" // $s0 = 0xffffff00 ++ "ll.d $t3, $t1, 0 \n\t" // $t3 = old value ++ "and $s0, $s0, $t3 \n\t" // $s0 = $t3 & 0xffffff00 ++ "add.d $t3, $t3, $t2 \n\t" // $t3 = $t3 + n ++ "andi $t3, $t3, 0xff \n\t" // $t3 = $t3 & 0xff ++ "or $t3, $t3, $s0 \n\t" // $t3 = $t3 | $s0 ++ "sc.d $t3, $t1, 0 \n\t" ++ "st.d $t3, $t0, 16 \n\t" // save result ++ : /*out*/ ++ : /*in*/ "r" (&block[0]) ++ : /*trash*/ "t0", "t1", "t2", "t3", "s0", "memory" ++ ); ++ } while (block[2] != 1); + #else + # error "Unsupported arch" + #endif +@@ -461,6 +484,30 @@ __attribute__((noinline)) void atomic_add_16bit ( short* p, int n ) + ); + } while (block[2] != 1); + #endif ++#elif defined(VGA_loongarch64) ++ unsigned long long int block[3] ++ = { (unsigned long long int)(Addr)p, (unsigned long long int)n, 0x0ULL }; ++ do { ++ __asm__ __volatile__( ++ "move $t0, %0 \n\t" ++ "ld.d $t1, $t0, 0 \n\t" // p ++ "ld.d $t2, $t0, 8 \n\t" // n ++ "li.d $s0, 0xffff \n\t" ++ "and $t2, $t2, $s0 \n\t" // n = n & 0xffff ++ "nor $s0, $s0, $zero \n\t" // $s0= 0xffff0000 ++ "ll.d $t3, $t1, 0 \n\t" // $t3 = old value ++ "and $s0, $s0, $t3 \n\t" // $s0 = $t3 & 0xffff0000 ++ "add.d $t3, $t3, $t2 \n\t" // $t3 = $t3 + n ++ "li.d $t2, 0xffff \n\t" ++ "and $t3, $t3, $t2 \n\t" // $t3 = $t3 & 0xffff ++ "or $t3, $t3, $s0 \n\t" // $t3 = $t3 | $s0 ++ "sc.d $t3, $t1, 0 \n\t" ++ "st.d $t3, $t0, 16 \n\t" // save result ++ : /*out*/ ++ : /*in*/ "r" (&block[0]) ++ : /*trash*/ "t0", "t1", "t2", "t3", "s0", "memory" ++ ); ++ } while (block[2] != 1); + #else + # error "Unsupported arch" + #endif +@@ -616,6 +663,23 @@ __attribute__((noinline)) void atomic_add_32bit ( int* p, int n ) + : /*trash*/ "memory", "t0", "t1", "t2", "t3" + ); + } while (block[2] != 1); ++#elif defined(VGA_loongarch64) ++ unsigned long long int block[3] ++ = { (unsigned long long int)(Addr)p, (unsigned long long int)n, 0x0ULL }; ++ do { ++ __asm__ __volatile__( ++ "move $t0, %0 \n\t" ++ "ld.d $t1, $t0, 0 \n\t" // p ++ "ld.d $t2, $t0, 8 \n\t" // n ++ "ll.d $t3, $t1, 0 \n\t" ++ "add.d $t3, $t3, $t2 \n\t" ++ "sc.d $t3, $t1, 0 \n\t" ++ "st.d $t3, $t0, 16 \n\t" ++ : /*out*/ ++ : /*in*/ "r" (&block[0]) ++ : /*trash*/ "t0", "t1", "t2", "t3", "memory" ++ ); ++ } while (block[2] != 1); + #else + # error "Unsupported arch" + #endif +@@ -718,6 +782,23 @@ __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n ) + : /*trash*/ "memory", "t0", "t1", "t2", "t3" + ); + } while (block[2] != 1); ++#elif defined(VGA_loongarch64) ++ unsigned long long int block[3] ++ = { (unsigned long long int)(Addr)p, (unsigned long long int)n, 0x0ULL }; ++ do { ++ __asm__ __volatile__( ++ "move $t0, %0 \n\t" ++ "ld.d $t1, $t0, 0 \n\t" // p ++ "ld.d $t2, $t0, 8 \n\t" // n ++ "ll.d $t3, $t1, 0 \n\t" ++ "add.d $t3, $t3, $t2 \n\t" ++ "sc.d $t3, $t1, 0 \n\t" ++ "st.d $t3, $t0, 16 \n\t" ++ : /*out*/ ++ : /*in*/ "r" (&block[0]) ++ : /*trash*/ "t0", "t1", "t2", "t3", "memory" ++ ); ++ } while (block[2] != 1); + #else + # error "Unsupported arch" + #endif +@@ -731,7 +812,8 @@ __attribute__((noinline)) void atomic_add_128bit ( MyU128* p, + || defined(VGA_amd64) \ + || defined(VGA_ppc64be) || defined(VGA_ppc64le) \ + || defined(VGA_arm) \ +- || defined(VGA_s390x) ++ || defined(VGA_s390x) \ ++ || defined(VGA_loongarch64) + /* do nothing; is not supported */ + #elif defined(VGA_arm64) + unsigned long long int block[3] +diff --git a/memcheck/tests/leak-segv-jmp.c b/memcheck/tests/leak-segv-jmp.c +index 97bddaf23..4890f5bcf 100644 +--- a/memcheck/tests/leak-segv-jmp.c ++++ b/memcheck/tests/leak-segv-jmp.c +@@ -182,6 +182,19 @@ extern UWord do_syscall_WRK ( + return out; + } + ++#elif defined(VGP_loongarch64_linux) ++extern UWord do_syscall_WRK (UWord a1, UWord a2, UWord a3, /* $a0, $a1, $a2 */ ++ UWord a4, UWord a5, UWord a6, /* $a3, $a4, $a5 */ ++ UWord syscall_no); /* $a6 */ ++asm ( ++ ".text \n\t" ++ ".globl do_syscall_WRK \n\t" ++ "do_syscall_WRK: \n\t" ++ " move $a7, $a6 \n\t" /* a7 = syscall_no */ ++ " syscall 0 \n\t" ++ " jr $ra \n\t" ++); ++ + #elif defined(VGP_x86_solaris) + extern ULong + do_syscall_WRK(UWord a1, UWord a2, UWord a3, +@@ -338,7 +351,7 @@ static void non_simd_mprotect (long tid, void* addr, long len) + &err); + if (err) + mprotect_result = -1; +-#elif defined(VGP_arm64_linux) ++#elif defined(VGP_arm64_linux) || defined (VGP_loongarch64_linux) + mprotect_result = do_syscall_WRK((UWord) addr, len, PROT_NONE, + 0, 0, 0, + __NR_mprotect); +diff --git a/memcheck/tests/leak-segv-jmp.stderr.exp b/memcheck/tests/leak-segv-jmp.stderr.exp +index b30fd76ac..0eea1785c 100644 +--- a/memcheck/tests/leak-segv-jmp.stderr.exp ++++ b/memcheck/tests/leak-segv-jmp.stderr.exp +@@ -14,8 +14,8 @@ To see them, rerun with: --leak-check=full --show-leak-kinds=all + expecting a leak + 1,000 bytes in 1 blocks are definitely lost in loss record ... of ... + at 0x........: malloc (vg_replace_malloc.c:...) +- by 0x........: f (leak-segv-jmp.c:389) +- by 0x........: main (leak-segv-jmp.c:464) ++ by 0x........: f (leak-segv-jmp.c:402) ++ by 0x........: main (leak-segv-jmp.c:477) + + LEAK SUMMARY: + definitely lost: 1,000 bytes in 1 blocks +@@ -30,8 +30,8 @@ mprotect result 0 + expecting a leak again + 1,000 bytes in 1 blocks are definitely lost in loss record ... of ... + at 0x........: malloc (vg_replace_malloc.c:...) +- by 0x........: f (leak-segv-jmp.c:389) +- by 0x........: main (leak-segv-jmp.c:464) ++ by 0x........: f (leak-segv-jmp.c:402) ++ by 0x........: main (leak-segv-jmp.c:477) + + LEAK SUMMARY: + definitely lost: 1,000 bytes in 1 blocks +@@ -46,8 +46,8 @@ full mprotect result 0 + expecting a leak again after full mprotect + 1,000 bytes in 1 blocks are definitely lost in loss record ... of ... + at 0x........: malloc (vg_replace_malloc.c:...) +- by 0x........: f (leak-segv-jmp.c:389) +- by 0x........: main (leak-segv-jmp.c:464) ++ by 0x........: f (leak-segv-jmp.c:402) ++ by 0x........: main (leak-segv-jmp.c:477) + + LEAK SUMMARY: + definitely lost: 1,000 bytes in 1 blocks +@@ -62,13 +62,13 @@ mprotect result 0 + expecting heuristic not to crash after full mprotect + 1,000 bytes in 1 blocks are definitely lost in loss record ... of ... + at 0x........: malloc (vg_replace_malloc.c:...) +- by 0x........: f (leak-segv-jmp.c:389) +- by 0x........: main (leak-segv-jmp.c:464) ++ by 0x........: f (leak-segv-jmp.c:402) ++ by 0x........: main (leak-segv-jmp.c:477) + + 200,000 bytes in 1 blocks are possibly lost in loss record ... of ... + at 0x........: calloc (vg_replace_malloc.c:...) +- by 0x........: f (leak-segv-jmp.c:436) +- by 0x........: main (leak-segv-jmp.c:464) ++ by 0x........: f (leak-segv-jmp.c:449) ++ by 0x........: main (leak-segv-jmp.c:477) + + LEAK SUMMARY: + definitely lost: 1,000 bytes in 1 blocks +diff --git a/memcheck/tests/leak.h b/memcheck/tests/leak.h +index 79e3cd6ac..df9d2e759 100644 +--- a/memcheck/tests/leak.h ++++ b/memcheck/tests/leak.h +@@ -148,6 +148,27 @@ + do { \ + __asm__ __volatile__ ("movl $0, %ecx\n\t"); \ + } while (0) ++#elif defined (VGA_loongarch64) ++#define CLEAR_CALLER_SAVED_REGS \ ++ do { \ ++ __asm__ __volatile__ ("move $a0, $zero \n\t" \ ++ "move $a1, $zero \n\t" \ ++ "move $a2, $zero \n\t" \ ++ "move $a3, $zero \n\t" \ ++ "move $a4, $zero \n\t" \ ++ "move $a5, $zero \n\t" \ ++ "move $a6, $zero \n\t" \ ++ "move $a7, $zero \n\t" \ ++ "move $t0, $zero \n\t" \ ++ "move $t1, $zero \n\t" \ ++ "move $t2, $zero \n\t" \ ++ "move $t3, $zero \n\t" \ ++ "move $t4, $zero \n\t" \ ++ "move $t5, $zero \n\t" \ ++ "move $t6, $zero \n\t" \ ++ "move $t7, $zero \n\t" \ ++ "move $t8, $zero \n\t"); \ ++ } while (0) + #else + #define CLEAR_CALLER_SAVED_REGS /*nothing*/ + #endif +diff --git a/memcheck/tests/loongarch64-linux/Makefile.am b/memcheck/tests/loongarch64-linux/Makefile.am +new file mode 100644 +index 000000000..5afcaa4ec +--- /dev/null ++++ b/memcheck/tests/loongarch64-linux/Makefile.am +@@ -0,0 +1,17 @@ ++ ++include $(top_srcdir)/Makefile.tool-tests.am ++ ++dist_noinst_SCRIPTS = \ ++ filter_stderr ++ ++noinst_HEADERS = scalar.h ++ ++EXTRA_DIST = \ ++ scalar.stderr.exp scalar.vgtest ++ ++check_PROGRAMS = \ ++ scalar ++ ++AM_CFLAGS += @FLAG_M64@ ++AM_CXXFLAGS += @FLAG_M64@ ++AM_CCASFLAGS += @FLAG_M64@ +diff --git a/memcheck/tests/loongarch64-linux/filter_stderr b/memcheck/tests/loongarch64-linux/filter_stderr +new file mode 100755 +index 000000000..a778e971f +--- /dev/null ++++ b/memcheck/tests/loongarch64-linux/filter_stderr +@@ -0,0 +1,3 @@ ++#! /bin/sh ++ ++../filter_stderr "$@" +diff --git a/memcheck/tests/loongarch64-linux/scalar.c b/memcheck/tests/loongarch64-linux/scalar.c +new file mode 100644 +index 000000000..e39069213 +--- /dev/null ++++ b/memcheck/tests/loongarch64-linux/scalar.c +@@ -0,0 +1,1296 @@ ++/* This is the loongarch64 variant of memcheck/tests/x86-linux/scalar.c. ++ Syscalls are in x86 number order to make exp comparison easier. */ ++#define _GNU_SOURCE ++#include "../../memcheck.h" ++#include "scalar.h" ++#include ++#include ++#include ++#include // MREMAP_FIXED ++ ++// Here we are trying to trigger every syscall error (scalar errors and ++// memory errors) for every syscall. We do this by passing a lot of bogus ++// arguments, mostly 0 and 1 (often it's 1 because NULL ptr args often aren't ++// checked for memory errors, or in order to have a non-zero length used ++// with some buffer). So most of the syscalls don't actually succeed and do ++// anything. ++// ++// Occasionally we have to be careful not to cause Valgrind to seg fault in ++// its pre-syscall wrappers; it does so because it can't know in general ++// when memory is unaddressable, and so tries to dereference it when doing ++// PRE_MEM_READ/PRE_MEM_WRITE calls. (Note that Memcheck will ++// always issue an error message immediately before these seg faults occur). ++// ++// The output has numbers like "3s 2m" for each syscall. "s" is short for ++// "scalar", ie. the argument itself is undefined. "m" is short for "memory", ++// ie. the argument points to memory which is unaddressable. ++int main(void) ++{ ++ // uninitialised, but we know px[0] is 0x0 ++ long* px = malloc(sizeof(long)); ++ long x0 = px[0]; ++ long res; ++ ++ // All __NR_xxx numbers are taken from x86 ++ ++ // __NR_restart_syscall 0 // XXX: not yet handled, perhaps should be... ++ GO(__NR_restart_syscall, "n/a"); ++ //SY(__NR_restart_syscall); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_exit 1 ++ GO(__NR_exit, "below"); ++ // (see below) ++ ++ // __NR_fork 2 ++ //GO(__NR_fork, "other"); ++ // (sse scalar_fork.c) ++ ++ // __NR_read 3 ++ // Nb: here we are also getting an error from the syscall arg itself. ++ GO(__NR_read, "1+3s 1m"); ++ SY(__NR_read+x0, x0, x0, x0+1); FAILx(EFAULT); ++ ++ // __NR_write 4 ++ GO(__NR_write, "3s 1m"); ++ SY(__NR_write, x0, x0, x0+1); FAIL; ++ ++ // __NR_open 5 ++ //GO(__NR_open, "(2-args) 2s 1m"); ++ //SY(__NR_open, x0, x0); FAIL; ++ ++ // Only 1s 0m errors -- the other 2s 1m have been checked in the previous ++ // open test, and if we test them they may be commoned up but they also ++ // may not. ++ //GO(__NR_open, "(3-args) 1s 0m"); ++ //SY(__NR_open, "scalar.c", O_CREAT|O_EXCL, x0); FAIL; ++ ++ // __NR_close 6 ++ GO(__NR_close, "1s 0m"); ++ SY(__NR_close, x0-1); FAIL; ++ ++ // __NR_waitpid 7 ++ //GO(__NR_waitpid, "3s 1m"); ++ //SY(__NR_waitpid, x0, x0+1, x0); FAIL; ++ ++ // __NR_creat 8 ++ //GO(__NR_creat, "2s 1m"); ++ //SY(__NR_creat, x0, x0); FAIL; ++ ++ // __NR_link 9 ++ //GO(__NR_link, "2s 2m"); ++ //SY(__NR_link, x0, x0); FAIL; ++ ++ // __NR_unlink 10 ++ //GO(__NR_unlink, "1s 1m"); ++ //SY(__NR_unlink, x0); FAIL; ++ ++ // __NR_execve 11 ++ GO(__NR_execve, "3s 1m"); ++ SY(__NR_execve, x0 + 1, x0 + 1, x0); FAIL; ++ ++ GO(__NR_execve, "3s 1m"); ++ SY(__NR_execve, x0 + 1, x0, x0 + 1); FAIL; ++ ++ char *argv_envp[] = {(char *) (x0 + 1), NULL}; ++ GO(__NR_execve, "4s 2m"); ++ SY(__NR_execve, x0 + 1, x0 + argv_envp, x0); FAIL; ++ char *argv_ok[] = {"frob", NULL}; ++ GO(__NR_execve, "4s 2m"); ++ SY(__NR_execve, x0 + 1, x0 + argv_ok, x0 + argv_envp); FAIL; ++ ++ // __NR_chdir 12 ++ GO(__NR_chdir, "1s 1m"); ++ SY(__NR_chdir, x0); FAIL; ++ ++ // __NR_time 13 ++ //GO(__NR_time, "1s 1m"); ++ //SY(__NR_time, x0+1); FAIL; ++ ++ // __NR_mknod 14 ++ //O(__NR_mknod, "3s 1m"); ++ //Y(__NR_mknod, x0, x0, x0); FAIL; ++ ++ // __NR_chmod 15 ++ //GO(__NR_chmod, "2s 1m"); ++ //SY(__NR_chmod, x0, x0); FAIL; ++ ++ // __NR_lchown 16 ++ //GO(__NR_lchown, "n/a"); ++ //SY(__NR_lchown); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_break 17 ++ //GO(__NR_break, "ni"); ++ //SY(__NR_break); FAIL; ++ ++ // __NR_oldstat 18 ++ //GO(__NR_oldstat, "n/a"); ++ // (obsolete, not handled by Valgrind) ++ ++ // __NR_lseek 19 ++ GO(__NR_lseek, "3s 0m"); ++ SY(__NR_lseek, x0-1, x0, x0); FAILx(EBADF); ++ ++ // __NR_getpid 20 ++ GO(__NR_getpid, "0s 0m"); ++ SY(__NR_getpid); SUCC; ++ ++ // __NR_mount 21 ++ GO(__NR_mount, "5s 3m"); ++ SY(__NR_mount, x0, x0, x0, x0, x0); FAIL; ++ ++ // __NR_umount 22 ++ //GO(__NR_umount, "1s 1m"); ++ //SY(__NR_umount, x0); FAIL; ++ ++ // __NR_setuid 23 ++ GO(__NR_setuid, "1s 0m"); ++ SY(__NR_setuid, x0-1); FAIL; ++ ++ // __NR_getuid 24 ++ GO(__NR_getuid, "0s 0m"); ++ SY(__NR_getuid); SUCC; ++ ++ // __NR_stime 25 ++ //GO(__NR_stime, "n/a"); ++ //SY(__NR_stime); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_ptrace 26 ++ // XXX: memory pointed to be arg3 goes unchecked... otherwise would be 2m ++ //GO(__NR_ptrace, "4s 1m"); ++ //SY(__NR_ptrace, x0+PTRACE_GETREGS, x0, x0, x0); FAIL; ++ ++ // __NR_alarm 27 ++ //GO(__NR_alarm, "1s 0m"); ++ //SY(__NR_alarm, x0); SUCC; ++ ++ // __NR_oldfstat 28 ++ //GO(__NR_oldfstat, "n/a"); ++ // (obsolete, not handled by Valgrind) ++ ++ // __NR_pause 29 ++ //GO(__NR_pause, "ignore"); ++ // (hard to test, and no args so not much to be gained -- don't bother) ++ ++ // __NR_utime 30 ++ //GO(__NR_utime, "2s 2m"); ++ //SY(__NR_utime, x0, x0+1); FAIL; ++ ++ // __NR_stty 31 ++ //GO(__NR_stty, "ni"); ++ //SY(__NR_stty); FAIL; ++ ++ // __NR_gtty 32 ++ //GO(__NR_gtty, "ni"); ++ //SY(__NR_gtty); FAIL; ++ ++ // __NR_access 33 ++ //GO(__NR_access, "2s 1m"); ++ //SY(__NR_access, x0, x0); FAIL; ++ ++ // __NR_nice 34 ++ //GO(__NR_nice, "1s 0m"); ++ //SY(__NR_nice, x0); SUCC; ++ ++ // __NR_ftime 35 ++ //GO(__NR_ftime, "ni"); ++ //SY(__NR_ftime); FAIL; ++ ++ // __NR_sync 36 ++ GO(__NR_sync, "0s 0m"); ++ SY(__NR_sync); SUCC; ++ ++ // __NR_kill 37 ++ GO(__NR_kill, "2s 0m"); ++ SY(__NR_kill, x0, x0); SUCC; ++ ++ // __NR_rename 38 ++ //GO(__NR_rename, "2s 2m"); ++ //SY(__NR_rename, x0, x0); FAIL; ++ ++ // __NR_mkdir 39 ++ //GO(__NR_mkdir, "2s 1m"); ++ //SY(__NR_mkdir, x0, x0); FAIL; ++ ++ // __NR_rmdir 40 ++ //GO(__NR_rmdir, "1s 1m"); ++ //SY(__NR_rmdir, x0); FAIL; ++ ++ // __NR_dup 41 ++ GO(__NR_dup, "1s 0m"); ++ SY(__NR_dup, x0-1); FAIL; ++ ++ // __NR_pipe 42 ++ //GO(__NR_pipe, "1s 1m"); ++ //SY(__NR_pipe, x0); FAIL; ++ ++ // __NR_times 43 ++ GO(__NR_times, "1s 1m"); ++ SY(__NR_times, x0+1); FAIL; ++ ++ // __NR_prof 44 ++ //GO(__NR_prof, "ni"); ++ //SY(__NR_prof); FAIL; ++ ++ // __NR_brk 45 ++ GO(__NR_brk, "1s 0m"); ++ SY(__NR_brk, x0); SUCC; ++ ++ // __NR_setgid 46 ++ GO(__NR_setgid, "1s 0m"); ++ SY(__NR_setgid, x0-1); FAIL; ++ ++ // __NR_getgid 47 ++ GO(__NR_getgid, "0s 0m"); ++ SY(__NR_getgid); SUCC; ++ ++ // __NR_signal 48 ++ //GO(__NR_signal, "n/a"); ++ //SY(__NR_signal); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_geteuid 49 ++ GO(__NR_geteuid, "0s 0m"); ++ SY(__NR_geteuid); SUCC; ++ ++ // __NR_getegid 50 ++ GO(__NR_getegid, "0s 0m"); ++ SY(__NR_getegid); SUCC; ++ ++ // __NR_acct 51 ++ GO(__NR_acct, "1s 1m"); ++ SY(__NR_acct, x0-1); FAIL; ++ ++ // __NR_umount2 52 ++ GO(__NR_umount2, "2s 1m"); ++ SY(__NR_umount2, x0, x0); FAIL; ++ ++ // __NR_lock 53 ++ //GO(__NR_lock, "ni"); ++ //SY(__NR_lock); FAIL; ++ ++ // __NR_ioctl 54 ++#include ++ GO(__NR_ioctl, "3s 1m"); ++ SY(__NR_ioctl, x0, x0+TCSETS, x0); FAIL; ++ ++ // __NR_fcntl 55 ++ // As with sys_open(), the 'fd' error is suppressed for the later ones. ++ // For F_GETFD the 3rd arg is ignored ++ GO(__NR_fcntl, "(GETFD) 2s 0m"); ++ SY(__NR_fcntl, x0-1, x0+F_GETFD, x0); FAILx(EBADF); ++ ++ // For F_DUPFD the 3rd arg is 'arg'. We don't check the 1st two args ++ // because any errors may or may not be commoned up with the ones from ++ // the previous fcntl call. ++ GO(__NR_fcntl, "(DUPFD) 1s 0m"); ++ SY(__NR_fcntl, -1, F_DUPFD, x0); FAILx(EBADF); ++ ++ // For F_GETLK the 3rd arg is 'lock'. On x86, this fails w/EBADF. But ++ // on amd64 in 32-bit mode it fails w/EFAULT. We don't check the 1st two ++ // args for the reason given above. ++ GO(__NR_fcntl, "(GETLK) 1s 5m"); ++ SY(__NR_fcntl, -1, F_GETLK, x0); FAIL; //FAILx(EBADF); ++ ++ // __NR_mpx 56 ++ //GO(__NR_mpx, "ni"); ++ //SY(__NR_mpx); FAIL; ++ ++ // __NR_setpgid 57 ++ GO(__NR_setpgid, "2s 0m"); ++ SY(__NR_setpgid, x0, x0-1); FAIL; ++ ++ // __NR_ulimit 58 ++ //GO(__NR_ulimit, "ni"); ++ //SY(__NR_ulimit); FAIL; ++ ++ // __NR_oldolduname 59 ++ //GO(__NR_oldolduname, "n/a"); ++ // (obsolete, not handled by Valgrind) ++ ++ // __NR_umask 60 ++ GO(__NR_umask, "1s 0m"); ++ SY(__NR_umask, x0+022); SUCC; ++ ++ // __NR_chroot 61 ++ GO(__NR_chroot, "1s 1m"); ++ SY(__NR_chroot, x0); FAIL; ++ ++ // __NR_ustat 62 ++ //GO(__NR_ustat, "n/a"); ++ // (deprecated, not handled by Valgrind) ++ ++ // __NR_dup2 63 ++ //GO(__NR_dup2, "2s 0m"); ++ //SY(__NR_dup2, x0-1, x0); FAIL; ++ ++ // __NR_getppid 64 ++ GO(__NR_getppid, "0s 0m"); ++ SY(__NR_getppid); SUCC; ++ ++ // __NR_getpgrp 65 ++ //GO(__NR_getpgrp, "0s 0m"); ++ //SY(__NR_getpgrp); SUCC; ++ ++ // __NR_setsid 66 ++ GO(__NR_setsid, "0s 0m"); ++ SY(__NR_setsid); SUCC_OR_FAIL; ++ ++ // __NR_sigaction 67 ++ //GO(__NR_sigaction, "3s 4m"); ++ //SY(__NR_sigaction, x0, x0+&px[1], x0+&px[1]); FAIL; ++ ++ // __NR_sgetmask 68 sys_sgetmask() ++ //GO(__NR_sgetmask, "n/a"); ++ //SY(__NR_sgetmask); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_ssetmask 69 ++ //GO(__NR_ssetmask, "n/a"); ++ //SY(__NR_ssetmask); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_setreuid 70 ++ GO(__NR_setreuid, "2s 0m"); ++ SY(__NR_setreuid, x0-1, x0-1); SUCC; ++ ++ // __NR_setregid 71 ++ GO(__NR_setregid, "2s 0m"); ++ SY(__NR_setregid, x0-1, x0-1); SUCC; ++ ++ // __NR_sigsuspend 72 ++ // XXX: how do you use this function? ++ //GO(__NR_sigsuspend, "ignore"); ++ // (I don't know how to test this...) ++ ++ // __NR_sigpending 73 ++ //GO(__NR_sigpending, "1s 1m"); ++ //SY(__NR_sigpending, x0); FAIL; ++ ++ // __NR_sethostname 74 ++ GO(__NR_sethostname, "n/a"); ++ //SY(__NR_sethostname); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_setrlimit 75 ++ //GO(__NR_setrlimit, "2s 1m"); ++ //SY(__NR_setrlimit, x0, x0); FAIL; ++ ++ // __NR_getrlimit 76 ++ //GO(__NR_getrlimit, "2s 1m"); ++ //SY(__NR_getrlimit, x0, x0); FAIL; ++ ++ // __NR_getrusage 77 ++ GO(__NR_getrusage, "2s 1m"); ++ SY(__NR_getrusage, x0, x0); FAIL; ++ ++ // __NR_gettimeofday 78 ++ GO(__NR_gettimeofday, "2s 2m"); ++ SY(__NR_gettimeofday, x0+1, x0+1); FAIL; ++ ++ // __NR_settimeofday 79 ++ GO(__NR_settimeofday, "2s 2m"); ++ SY(__NR_settimeofday, x0+1, x0+1); FAIL; ++ ++ // __NR_getgroups 80 ++ GO(__NR_getgroups, "2s 1m"); ++ SY(__NR_getgroups, x0+1, x0+1); FAIL; ++ ++ // __NR_setgroups 81 ++ GO(__NR_setgroups, "2s 1m"); ++ SY(__NR_setgroups, x0+1, x0+1); FAIL; ++ ++ // __NR_select 82 ++ //{ ++ // long args[5] = { x0+8, x0+0xffffffee, x0+1, x0+1, x0+1 }; ++ // GO(__NR_select, "1s 5m"); ++ // SY(__NR_select, args+x0); FAIL; ++ //} ++ ++ // __NR_symlink 83 ++ //GO(__NR_symlink, "2s 2m"); ++ //SY(__NR_symlink, x0, x0); FAIL; ++ ++ // __NR_oldlstat 84 ++ //GO(__NR_oldlstat, "n/a"); ++ // (obsolete, not handled by Valgrind) ++ ++ // __NR_readlink 85 ++ //GO(__NR_readlink, "3s 2m"); ++ //SY(__NR_readlink, x0+1, x0+1, x0+1); FAIL; ++ ++ // __NR_uselib 86 ++ //GO(__NR_uselib, "n/a"); ++ //SY(__NR_uselib); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_swapon 87 ++ GO(__NR_swapon, "n/a"); ++ //SY(__NR_swapon); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_reboot 88 ++ GO(__NR_reboot, "n/a"); ++ //SY(__NR_reboot); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_readdir 89 ++ //GO(__NR_readdir, "n/a"); ++ // (superseded, not handled by Valgrind) ++ ++ // __NR_mmap 90 ++ { ++ long args[6] = { x0, x0, x0, x0, x0-1, x0 }; ++ GO(__NR_mmap, "1s 1m"); ++ SY(__NR_mmap, args+x0); FAIL; ++ } ++ ++ // __NR_munmap 91 ++ GO(__NR_munmap, "2s 0m"); ++ SY(__NR_munmap, x0, x0); FAIL; ++ ++ // __NR_truncate 92 ++ GO(__NR_truncate, "2s 1m"); ++ SY(__NR_truncate, x0, x0); FAIL; ++ ++ // __NR_ftruncate 93 ++ GO(__NR_ftruncate, "2s 0m"); ++ SY(__NR_ftruncate, x0, x0); FAIL; ++ ++ // __NR_fchmod 94 ++ GO(__NR_fchmod, "2s 0m"); ++ SY(__NR_fchmod, x0-1, x0); FAIL; ++ ++ // __NR_fchown 95 ++ GO(__NR_fchown, "3s 0m"); ++ SY(__NR_fchown, x0-1, x0, x0); FAIL; ++ ++ // __NR_getpriority 96 ++ GO(__NR_getpriority, "2s 0m"); ++ SY(__NR_getpriority, x0-1, x0); FAIL; ++ ++ // __NR_setpriority 97 ++ GO(__NR_setpriority, "3s 0m"); ++ SY(__NR_setpriority, x0-1, x0, x0); FAIL; ++ ++ // __NR_profil 98 ++ //GO(__NR_profil, "ni"); ++ //SY(__NR_profil); FAIL; ++ ++ // __NR_statfs 99 ++ GO(__NR_statfs, "2s 2m"); ++ SY(__NR_statfs, x0, x0); FAIL; ++ ++ // __NR_fstatfs 100 ++ GO(__NR_fstatfs, "2s 1m"); ++ SY(__NR_fstatfs, x0, x0); FAIL; ++ ++ // __NR_ioperm 101 ++ //GO(__NR_ioperm, "3s 0m"); ++ //SY(__NR_ioperm, x0, x0, x0); FAIL; ++ ++ // __NR_socketcall 102 ++ //GO(__NR_socketcall, "XXX"); ++ // (XXX: need to do all sub-cases properly) ++ ++ // __NR_syslog 103 ++ GO(__NR_syslog, "3s 1m"); ++ SY(__NR_syslog, x0+2, x0, x0+1); FAIL; ++ ++ // __NR_setitimer 104 ++ GO(__NR_setitimer, "3s 2m"); ++ SY(__NR_setitimer, x0, x0+1, x0+1); FAIL; ++ ++ // __NR_getitimer 105 ++ GO(__NR_getitimer, "2s 1m"); ++ SY(__NR_getitimer, x0, x0, x0); FAIL; ++ ++ // __NR_stat 106 ++ //GO(__NR_stat, "2s 2m"); ++ //SY(__NR_stat, x0, x0); FAIL; ++ ++ // __NR_lstat 107 ++ //GO(__NR_lstat, "2s 2m"); ++ //SY(__NR_lstat, x0, x0); FAIL; ++ ++ // __NR_fstat 108 ++ //GO(__NR_fstat, "2s 1m"); ++ //SY(__NR_fstat, x0, x0); FAIL; ++ ++ // __NR_olduname 109 ++ //GO(__NR_olduname, "n/a"); ++ // (obsolete, not handled by Valgrind) ++ ++ // __NR_iopl 110 ++ //GO(__NR_iopl, "1s 0m"); ++ //SY(__NR_iopl, x0+100); FAIL; ++ ++ // __NR_vhangup 111 ++ GO(__NR_vhangup, "0s 0m"); ++ SY(__NR_vhangup); SUCC_OR_FAIL; // Will succeed for superuser ++ ++ // __NR_idle 112 ++ //GO(__NR_idle, "ni"); ++ //SY(__NR_idle); FAIL; ++ ++ // __NR_vm86old 113 ++ //GO(__NR_vm86old, "n/a"); ++ // (will probably never be handled by Valgrind) ++ ++ // __NR_wait4 114 ++ GO(__NR_wait4, "4s 2m"); ++ SY(__NR_wait4, x0, x0+1, x0, x0+1); FAIL; ++ ++ // __NR_swapoff 115 ++ GO(__NR_swapoff, "n/a"); ++ //SY(__NR_swapoff); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_sysinfo 116 ++ GO(__NR_sysinfo, "1s 1m"); ++ SY(__NR_sysinfo, x0); FAIL; ++ ++ // __NR_ipc 117 ++ // XXX: This is simplistic -- need to do all the sub-cases properly. ++ // XXX: Also, should be 6 scalar errors, except glibc's syscall() doesn't ++ // use the 6th one! ++ //GO(__NR_ipc, "5s 0m"); ++ //SY(__NR_ipc, x0+4, x0, x0, x0, x0, x0); FAIL; ++ ++ // __NR_fsync 118 ++ GO(__NR_fsync, "1s 0m"); ++ SY(__NR_fsync, x0-1); FAIL; ++ ++ // __NR_sigreturn 119 ++ //GO(__NR_sigreturn, "n/a"); ++ //SY(__NR_sigreturn); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_clone 120 ++#ifndef CLONE_PARENT_SETTID ++#define CLONE_PARENT_SETTID 0x00100000 ++#endif ++ GO(__NR_clone, "5s 3m"); ++ SY(__NR_clone, x0|CLONE_PARENT_SETTID|CLONE_SETTLS|CLONE_CHILD_SETTID|SIGCHLD, x0, x0, x0, x0); FAIL; ++ if (0 == res) { ++ SY(__NR_exit, 0); FAIL; ++ } ++ ++ // __NR_setdomainname 121 ++ GO(__NR_setdomainname, "n/a"); ++ //SY(__NR_setdomainname); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_uname 122 ++ GO(__NR_uname, "1s 1m"); ++ SY(__NR_uname, x0); FAIL; ++ ++ // __NR_modify_ldt 123 ++ //GO(__NR_modify_ldt, "3s 1m"); ++ //SY(__NR_modify_ldt, x0+1, x0, x0+1); FAILx(EINVAL); ++ ++ // __NR_adjtimex 124 ++ // XXX: need to do properly, but deref'ing NULL causing Valgrind to crash... ++ GO(__NR_adjtimex, "XXX"); ++ //SY(__NR_adjtimex, x0); FAIL; ++ ++ // __NR_mprotect 125 ++ GO(__NR_mprotect, "3s 0m"); ++ SY(__NR_mprotect, x0+1, x0, x0); FAILx(EINVAL); ++ ++ // __NR_sigprocmask 126 ++ //GO(__NR_sigprocmask, "3s 2m"); ++ //SY(__NR_sigprocmask, x0, x0+&px[1], x0+&px[1]); SUCC; ++ ++ // __NR_create_module 127 ++ //GO(__NR_create_module, "ni"); ++ //SY(__NR_create_module); FAIL; ++ ++ // __NR_init_module 128 ++ GO(__NR_init_module, "3s 2m"); ++ SY(__NR_init_module, x0, x0+1, x0); FAIL; ++ ++ // __NR_delete_module 129 ++ GO(__NR_delete_module, "n/a"); ++ //SY(__NR_delete_module); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_get_kernel_syms 130 ++ //GO(__NR_get_kernel_syms, "ni"); ++ //SY(__NR_get_kernel_syms); FAIL; ++ ++ // __NR_quotactl 131 ++ GO(__NR_quotactl, "4s 1m"); ++ SY(__NR_quotactl, x0, x0, x0, x0); FAIL; ++ ++ // __NR_getpgid 132 ++ GO(__NR_getpgid, "1s 0m"); ++ SY(__NR_getpgid, x0-1); FAIL; ++ ++ // __NR_fchdir 133 ++ GO(__NR_fchdir, "1s 0m"); ++ SY(__NR_fchdir, x0-1); FAIL; ++ ++ // __NR_bdflush 134 ++ //GO(__NR_bdflush, "n/a"); ++ //SY(__NR_bdflush); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_sysfs 135 ++ //GO(__NR_sysfs, "n/a"); ++ //SY(__NR_sysfs); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_personality 136 ++ GO(__NR_personality, "1s 0m"); ++ SY(__NR_personality, x0+0xffffffff); SUCC; ++ ++ // __NR_afs_syscall 137 ++ //GO(__NR_afs_syscall, "ni"); ++ //SY(__NR_afs_syscall); FAIL; ++ ++ // __NR_setfsuid 138 ++ GO(__NR_setfsuid, "1s 0m"); ++ SY(__NR_setfsuid, x0); SUCC; // This syscall has a stupid return value ++ ++ // __NR_setfsgid 139 ++ GO(__NR_setfsgid, "1s 0m"); ++ SY(__NR_setfsgid, x0); SUCC; // This syscall has a stupid return value ++ ++ // __NR__llseek 140 ++ //GO(__NR__llseek, "5s 1m"); ++ //SY(__NR__llseek, x0, x0, x0, x0, x0); FAIL; ++ ++ // __NR_getdents 141 ++ //GO(__NR_getdents, "3s 1m"); ++ //SY(__NR_getdents, x0, x0, x0+1); FAIL; ++ ++ // __NR__newselect 142 ++ //GO(__NR__newselect, "5s 4m"); ++ //SY(__NR__newselect, x0+8, x0+0xffffffff, x0+1, x0+1, x0+1); FAIL; ++ ++ // __NR_flock 143 ++ GO(__NR_flock, "2s 0m"); ++ SY(__NR_flock, x0, x0); FAIL; ++ ++ // __NR_msync 144 ++ GO(__NR_msync, "3s 1m"); ++ SY(__NR_msync, x0, x0+1, x0); FAIL; ++ ++ // __NR_readv 145 ++ GO(__NR_readv, "3s 1m"); ++ SY(__NR_readv, x0, x0, x0+1); FAIL; ++ ++ // __NR_writev 146 ++ GO(__NR_writev, "3s 1m"); ++ SY(__NR_writev, x0, x0, x0+1); FAIL; ++ ++ // __NR_getsid 147 ++ GO(__NR_getsid, "1s 0m"); ++ SY(__NR_getsid, x0-1); FAIL; ++ ++ // __NR_fdatasync 148 ++ GO(__NR_fdatasync, "1s 0m"); ++ SY(__NR_fdatasync, x0-1); FAIL; ++ ++ // __NR__sysctl 149 ++ //GO(__NR__sysctl, "1s 1m"); ++ //SY(__NR__sysctl, x0); FAIL; ++ ++ // __NR_mlock 150 ++ GO(__NR_mlock, "2s 0m"); ++ SY(__NR_mlock, x0, x0+1); FAIL; ++ ++ // __NR_munlock 151 ++ GO(__NR_munlock, "2s 0m"); ++ SY(__NR_munlock, x0, x0+1); FAIL; ++ ++ // __NR_mlockall 152 ++ GO(__NR_mlockall, "1s 0m"); ++ SY(__NR_mlockall, x0-1); FAIL; ++ ++ // __NR_munlockall 153 ++ GO(__NR_munlockall, "0s 0m"); ++ SY(__NR_munlockall); SUCC_OR_FAILx(EPERM); ++ ++ // __NR_sched_setparam 154 ++ GO(__NR_sched_setparam, "2s 1m"); ++ SY(__NR_sched_setparam, x0, x0); FAIL; ++ ++ // __NR_sched_getparam 155 ++ GO(__NR_sched_getparam, "2s 1m"); ++ SY(__NR_sched_getparam, x0, x0); FAIL; ++ ++ // __NR_sched_setscheduler 156 ++ GO(__NR_sched_setscheduler, "3s 1m"); ++ SY(__NR_sched_setscheduler, x0-1, x0, x0+1); FAIL; ++ ++ // __NR_sched_getscheduler 157 ++ GO(__NR_sched_getscheduler, "1s 0m"); ++ SY(__NR_sched_getscheduler, x0-1); FAIL; ++ ++ // __NR_sched_yield 158 ++ GO(__NR_sched_yield, "0s 0m"); ++ SY(__NR_sched_yield); SUCC; ++ ++ // __NR_sched_get_priority_max 159 ++ GO(__NR_sched_get_priority_max, "1s 0m"); ++ SY(__NR_sched_get_priority_max, x0-1); FAIL; ++ ++ // __NR_sched_get_priority_min 160 ++ GO(__NR_sched_get_priority_min, "1s 0m"); ++ SY(__NR_sched_get_priority_min, x0-1); FAIL; ++ ++ // __NR_sched_rr_get_interval 161 ++ GO(__NR_sched_rr_get_interval, "n/a"); ++ //SY(__NR_sched_rr_get_interval); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_nanosleep 162 ++ GO(__NR_nanosleep, "2s 2m"); ++ SY(__NR_nanosleep, x0, x0+1); FAIL; ++ ++ // __NR_mremap 163 ++ GO(__NR_mremap, "5s 0m"); ++ SY(__NR_mremap, x0+1, x0, x0, x0+MREMAP_FIXED, x0); FAILx(EINVAL); ++ ++ // __NR_setresuid 164 ++ GO(__NR_setresuid, "3s 0m"); ++ SY(__NR_setresuid, x0-1, x0-1, x0-1); SUCC; ++ ++ // __NR_getresuid 165 ++ GO(__NR_getresuid, "3s 3m"); ++ SY(__NR_getresuid, x0, x0, x0); FAIL; ++ ++ // __NR_vm86 166 ++ //GO(__NR_vm86, "n/a"); ++ // (will probably never be handled by Valgrind) ++ ++ // __NR_query_module 167 ++ //GO(__NR_query_module, "ni"); ++ //SY(__NR_query_module); FAIL; ++ ++ // __NR_poll 168 ++ //GO(__NR_poll, "3s 1m"); ++ //SY(__NR_poll, x0, x0+1, x0); FAIL; ++ ++ // __NR_nfsservctl 169 ++ GO(__NR_nfsservctl, "n/a"); ++ //SY(__NR_nfsservctl); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_setresgid 170 ++ GO(__NR_setresgid, "3s 0m"); ++ SY(__NR_setresgid, x0-1, x0-1, x0-1); SUCC; ++ ++ // __NR_getresgid 171 ++ GO(__NR_getresgid, "3s 3m"); ++ SY(__NR_getresgid, x0, x0, x0); FAIL; ++ ++ // __NR_prctl 172 ++#include ++ GO(__NR_prctl, "5s 0m"); ++ SY(__NR_prctl, x0, x0, x0, x0, x0); FAIL; ++ ++ char buf16[16] = "123456789012345."; ++ buf16[15] = x0; // this will cause 'using unitialised value' ++ GO(__NR_prctl, "2s 0m"); ++ SY(__NR_prctl, x0 + PR_SET_NAME, buf16); SUCC; ++ ++ char buf17[17] = "1234567890123456."; ++ buf17[16] = x0; // this must not cause 'using unitialised value' ++ GO(__NR_prctl, "1s 0m"); ++ SY(__NR_prctl, x0 + PR_SET_NAME, buf17); SUCC; ++ ++ // __NR_rt_sigreturn 173 ++ GO(__NR_rt_sigreturn, "n/a"); ++ //SY(__NR_rt_sigreturn); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_rt_sigaction 174 ++ GO(__NR_rt_sigaction, "4s 4m"); ++ SY(__NR_rt_sigaction, x0, x0+&px[2], x0+&px[2], x0); FAIL; ++ ++ // __NR_rt_sigprocmask 175 ++ GO(__NR_rt_sigprocmask, "4s 2m"); ++ SY(__NR_rt_sigprocmask, x0, x0+1, x0+1, x0); FAIL; ++ ++ // __NR_rt_sigpending 176 ++ GO(__NR_rt_sigpending, "2s 1m"); ++ SY(__NR_rt_sigpending, x0, x0+1); FAIL; ++ ++ // __NR_rt_sigtimedwait 177 ++ GO(__NR_rt_sigtimedwait, "4s 3m"); ++ SY(__NR_rt_sigtimedwait, x0+1, x0+1, x0+1, x0); FAIL; ++ ++ // __NR_rt_sigqueueinfo 178 ++ GO(__NR_rt_sigqueueinfo, "3s 1m"); ++ SY(__NR_rt_sigqueueinfo, x0, x0+1, x0); FAIL; ++ ++ // __NR_rt_sigsuspend 179 ++ GO(__NR_rt_sigsuspend, "2s 1m"); ++ SY(__NR_rt_sigsuspend, x0 + 1, x0 + sizeof(sigset_t)); FAILx(EFAULT); ++ ++ // __NR_pread64 180 ++ GO(__NR_pread64, "5s 1m"); ++ SY(__NR_pread64, x0, x0, x0+1, x0, x0); FAIL; ++ ++ // __NR_pwrite64 181 ++ GO(__NR_pwrite64, "5s 1m"); ++ SY(__NR_pwrite64, x0, x0, x0+1, x0, x0); FAIL; ++ ++ // __NR_chown 182 ++ //GO(__NR_chown, "3s 1m"); ++ //SY(__NR_chown, x0, x0, x0); FAIL; ++ ++ // __NR_getcwd 183 ++ GO(__NR_getcwd, "2s 1m"); ++ SY(__NR_getcwd, x0, x0+1); FAIL; ++ ++ // __NR_capget 184 ++ GO(__NR_capget, "2s 2m"); ++ SY(__NR_capget, x0, x0+1); FAIL; ++ ++ // __NR_capset 185 ++ GO(__NR_capset, "2s 2m"); ++ SY(__NR_capset, x0, x0); FAIL; ++ ++ // __NR_sigaltstack 186 ++ { ++ struct our_sigaltstack { ++ void *ss_sp; ++ int ss_flags; ++ size_t ss_size; ++ } ss; ++ ss.ss_sp = NULL; ++ ss.ss_flags = 0; ++ ss.ss_size = 0; ++ VALGRIND_MAKE_MEM_NOACCESS(& ss, sizeof(struct our_sigaltstack)); ++ GO(__NR_sigaltstack, "2s 2m"); ++ SY(__NR_sigaltstack, x0+&ss, x0+&ss); SUCC; ++ } ++ ++ // __NR_sendfile 187 ++ GO(__NR_sendfile, "4s 1m"); ++ SY(__NR_sendfile, x0, x0, x0+1, x0); FAIL; ++ ++ // __NR_getpmsg 188 ++ // Could do 5s 4m with more effort, but I can't be bothered for this ++ // crappy non-standard syscall. ++ //GO(__NR_getpmsg, "5s 0m"); ++ //SY(__NR_getpmsg, x0, x0, x0, x0); FAIL; ++ ++ // __NR_putpmsg 189 ++ // Could do 5s 2m with more effort, but I can't be bothered for this ++ // crappy non-standard syscall. ++ //GO(__NR_putpmsg, "5s 0m"); ++ //SY(__NR_putpmsg, x0, x0, x0, x0, x0); FAIL; ++ ++ // __NR_vfork 190 ++ //GO(__NR_vfork, "other"); ++ // (sse scalar_vfork.c) ++ ++ // __NR_ugetrlimit 191 ++ //GO(__NR_ugetrlimit, "2s 1m"); ++ //SY(__NR_ugetrlimit, x0, x0); FAIL; ++ ++ // __NR_mmap2 192 ++ //GO(__NR_mmap2, "6s 0m"); ++ //SY(__NR_mmap2, x0, x0, x0, x0, x0-1, x0); FAIL; ++ ++ // __NR_truncate64 193 ++ //GO(__NR_truncate64, "3s 1m"); ++ //SY(__NR_truncate64, x0, x0, x0); FAIL; ++ ++ // __NR_ftruncate64 194 ++ //GO(__NR_ftruncate64, "3s 0m"); ++ //SY(__NR_ftruncate64, x0, x0, x0); FAIL; ++ ++ // __NR_stat64 195 ++ //GO(__NR_stat64, "2s 2m"); ++ //SY(__NR_stat64, x0, x0); FAIL; ++ ++ // __NR_lstat64 196 ++ //GO(__NR_lstat64, "2s 2m"); ++ //SY(__NR_lstat64, x0, x0); FAIL; ++ ++ // __NR_fstat64 197 ++ //GO(__NR_fstat64, "2s 1m"); ++ //SY(__NR_fstat64, x0, x0); FAIL; ++ ++ // __NR_lchown32 198 ++ //GO(__NR_lchown32, "3s 1m"); ++ //SY(__NR_lchown32, x0, x0, x0); FAIL; ++ ++ // __NR_getuid32 199 ++ //GO(__NR_getuid32, "0s 0m"); ++ //SY(__NR_getuid32); SUCC; ++ ++ // __NR_getgid32 200 ++ //GO(__NR_getgid32, "0s 0m"); ++ //SY(__NR_getgid32); SUCC; ++ ++ // __NR_geteuid32 201 ++ //GO(__NR_geteuid32, "0s 0m"); ++ //SY(__NR_geteuid32); SUCC; ++ ++ // __NR_getegid32 202 ++ //GO(__NR_getegid32, "0s 0m"); ++ //SY(__NR_getegid32); SUCC; ++ ++ // __NR_setreuid32 203 ++ //GO(__NR_setreuid32, "2s 0m"); ++ //SY(__NR_setreuid32, x0-1, x0-1); SUCC; ++ ++ // __NR_setregid32 204 ++ //GO(__NR_setregid32, "2s 0m"); ++ //SY(__NR_setregid32, x0-1, x0-1); SUCC; ++ ++ // __NR_getgroups32 205 ++ //GO(__NR_getgroups32, "2s 1m"); ++ //SY(__NR_getgroups32, x0+1, x0+1); FAIL; ++ ++ // __NR_setgroups32 206 ++ //GO(__NR_setgroups32, "2s 1m"); ++ //SY(__NR_setgroups32, x0+1, x0+1); FAIL; ++ ++ // __NR_fchown32 207 ++ //GO(__NR_fchown32, "3s 0m"); ++ //SY(__NR_fchown32, x0-1, x0, x0); FAIL; ++ ++ // __NR_setresuid32 208 ++ //GO(__NR_setresuid32, "3s 0m"); ++ //SY(__NR_setresuid32, x0-1, x0-1, x0-1); SUCC; ++ ++ // __NR_getresuid32 209 ++ //GO(__NR_getresuid32, "3s 3m"); ++ //SY(__NR_getresuid32, x0, x0, x0); FAIL; ++ ++ // __NR_setresgid32 210 ++ //GO(__NR_setresgid32, "3s 0m"); ++ //SY(__NR_setresgid32, x0-1, x0-1, x0-1); SUCC; ++ ++ // __NR_getresgid32 211 ++ //GO(__NR_getresgid32, "3s 3m"); ++ //SY(__NR_getresgid32, x0, x0, x0); FAIL; ++ ++ // __NR_chown32 212 ++ //GO(__NR_chown32, "3s 1m"); ++ //SY(__NR_chown32, x0, x0, x0); FAIL; ++ ++ // __NR_setuid32 213 ++ //GO(__NR_setuid32, "1s 0m"); ++ //SY(__NR_setuid32, x0-1); FAIL; ++ ++ // __NR_setgid32 214 ++ //GO(__NR_setgid32, "1s 0m"); ++ //SY(__NR_setgid32, x0-1); FAIL; ++ ++ // __NR_setfsuid32 215 ++ //GO(__NR_setfsuid32, "1s 0m"); ++ //SY(__NR_setfsuid32, x0); SUCC; // This syscall has a stupid return value ++ ++ // __NR_setfsgid32 216 ++ //GO(__NR_setfsgid32, "1s 0m"); ++ //SY(__NR_setfsgid32, x0); SUCC; // This syscall has a stupid return value ++ ++ // __NR_pivot_root 217 ++ GO(__NR_pivot_root, "n/a"); ++ //SY(__NR_pivot_root); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_mincore 218 ++ GO(__NR_mincore, "3s 1m"); ++ SY(__NR_mincore, x0, x0+40960, x0); FAIL; ++ ++ // __NR_madvise 219 ++ GO(__NR_madvise, "3s 0m"); ++ SY(__NR_madvise, x0, x0+1, x0); FAILx(ENOMEM); ++ ++ // __NR_getdents64 220 ++ GO(__NR_getdents64, "3s 1m"); ++ SY(__NR_getdents64, x0, x0, x0+1); FAIL; ++ ++ // __NR_fcntl64 221 ++ // As with sys_open(), we don't trigger errors for the 1st two args for ++ // the later ones. ++ // For F_GETFD the 3rd arg is ignored. ++ //GO(__NR_fcntl64, "(GETFD) 2s 0m"); ++ //SY(__NR_fcntl64, x0-1, x0+F_GETFD, x0); FAILx(EBADF); ++ ++ // For F_DUPFD the 3rd arg is 'arg' ++ //GO(__NR_fcntl64, "(DUPFD) 1s 0m"); ++ //SY(__NR_fcntl64, -1, F_DUPFD, x0); FAILx(EBADF); ++ ++ // For F_GETLK the 3rd arg is 'lock'. ++ // On x86, this fails w/EBADF. But on amd64 in 32-bit mode it fails ++ // w/EFAULT. ++ //GO(__NR_fcntl64, "(GETLK) 1s 0m"); ++ //SY(__NR_fcntl64, -1, +F_GETLK, x0); FAIL; //FAILx(EBADF); ++ ++ // 222 ++ GO(222, "ni"); ++ SY(222); FAIL; ++ ++ // 223 ++ GO(223, "ni"); ++ SY(223); FAIL; ++ ++ // __NR_gettid 224 ++ GO(__NR_gettid, "n/a"); ++ //SY(__NR_gettid); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_readahead 225 ++ GO(__NR_readahead, "n/a"); ++ //SY(__NR_readahead); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_setxattr 226 ++ GO(__NR_setxattr, "5s 3m"); ++ SY(__NR_setxattr, x0, x0, x0, x0+1, x0); FAIL; ++ ++ // __NR_lsetxattr 227 ++ GO(__NR_lsetxattr, "5s 3m"); ++ SY(__NR_lsetxattr, x0, x0, x0, x0+1, x0); FAIL; ++ ++ // __NR_fsetxattr 228 ++ GO(__NR_fsetxattr, "5s 2m"); ++ SY(__NR_fsetxattr, x0, x0, x0, x0+1, x0); FAIL; ++ ++ // __NR_getxattr 229 ++ GO(__NR_getxattr, "4s 3m"); ++ SY(__NR_getxattr, x0, x0, x0, x0+1); FAIL; ++ ++ // __NR_lgetxattr 230 ++ GO(__NR_lgetxattr, "4s 3m"); ++ SY(__NR_lgetxattr, x0, x0, x0, x0+1); FAIL; ++ ++ // __NR_fgetxattr 231 ++ GO(__NR_fgetxattr, "4s 2m"); ++ SY(__NR_fgetxattr, x0, x0, x0, x0+1); FAIL; ++ ++ // __NR_listxattr 232 ++ GO(__NR_listxattr, "3s 2m"); ++ SY(__NR_listxattr, x0, x0, x0+1); FAIL; ++ ++ // __NR_llistxattr 233 ++ GO(__NR_llistxattr, "3s 2m"); ++ SY(__NR_llistxattr, x0, x0, x0+1); FAIL; ++ ++ // __NR_flistxattr 234 ++ GO(__NR_flistxattr, "3s 1m"); ++ SY(__NR_flistxattr, x0-1, x0, x0+1); FAIL; /* kernel returns EBADF, but both seem correct */ ++ ++ // __NR_removexattr 235 ++ GO(__NR_removexattr, "2s 2m"); ++ SY(__NR_removexattr, x0, x0); FAIL; ++ ++ // __NR_lremovexattr 236 ++ GO(__NR_lremovexattr, "2s 2m"); ++ SY(__NR_lremovexattr, x0, x0); FAIL; ++ ++ // __NR_fremovexattr 237 ++ GO(__NR_fremovexattr, "2s 1m"); ++ SY(__NR_fremovexattr, x0, x0); FAIL; ++ ++ // __NR_tkill 238 ++ GO(__NR_tkill, "n/a"); ++ //SY(__NR_tkill); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_sendfile64 239 ++ //GO(__NR_sendfile64, "4s 1m"); ++ //SY(__NR_sendfile64, x0, x0, x0+1, x0); FAIL; ++ ++ // __NR_futex 240 ++#ifndef FUTEX_WAIT ++#define FUTEX_WAIT 0 ++#endif ++ // XXX: again, glibc not doing 6th arg means we have only 5s errors ++ GO(__NR_futex, "4s 2m"); ++ SY(__NR_futex, x0+FUTEX_WAIT, x0, x0, x0+1); FAIL; ++ ++ // __NR_sched_setaffinity 241 ++ GO(__NR_sched_setaffinity, "3s 1m"); ++ SY(__NR_sched_setaffinity, x0, x0+1, x0); FAIL; ++ ++ // __NR_sched_getaffinity 242 ++ GO(__NR_sched_getaffinity, "3s 1m"); ++ SY(__NR_sched_getaffinity, x0, x0+1, x0); FAIL; ++ ++ // __NR_set_thread_area 243 ++ //GO(__NR_set_thread_area, "1s 1m"); ++ //SY(__NR_set_thread_area, x0); FAILx(EFAULT); ++ ++ // __NR_get_thread_area 244 ++ //GO(__NR_get_thread_area, "1s 1m"); ++ //SY(__NR_get_thread_area, x0); FAILx(EFAULT); ++ ++ // __NR_io_setup 245 ++ GO(__NR_io_setup, "2s 1m"); ++ SY(__NR_io_setup, x0, x0); FAIL; ++ ++ // __NR_io_destroy 246 ++ { ++ // jump through hoops to prevent the PRE(io_destroy) wrapper crashing. ++ struct fake_aio_ring { ++ unsigned id; /* kernel internal index number */ ++ unsigned nr; /* number of io_events */ ++ // There are more fields in the real aio_ring, but the 'nr' field is ++ // the only one used by the PRE() wrapper. ++ } ring = { 0, 0 }; ++ struct fake_aio_ring* ringptr = ˚ ++ GO(__NR_io_destroy, "1s 0m"); ++ SY(__NR_io_destroy, x0+&ringptr); FAIL; ++ } ++ ++ // __NR_io_getevents 247 ++ GO(__NR_io_getevents, "5s 2m"); ++ SY(__NR_io_getevents, x0, x0, x0+1, x0, x0+1); FAIL; ++ ++ // __NR_io_submit 248 ++ GO(__NR_io_submit, "3s 1m"); ++ SY(__NR_io_submit, x0, x0+1, x0); FAIL; ++ ++ // __NR_io_cancel 249 ++ GO(__NR_io_cancel, "3s 2m"); ++ SY(__NR_io_cancel, x0, x0, x0); FAIL; ++ ++ // __NR_fadvise64 250 ++ GO(__NR_fadvise64, "n/a"); ++ //SY(__NR_fadvise64); // (Not yet handled by Valgrind) FAIL; ++ ++ // 251 ++ GO(251, "ni"); ++ SY(251); FAIL; ++ ++ // __NR_exit_group 252 ++ GO(__NR_exit_group, "other"); ++ // (see scalar_exit_group.c) ++ ++ // __NR_lookup_dcookie 253 ++ GO(__NR_lookup_dcookie, "4s 1m"); ++ SY(__NR_lookup_dcookie, x0, x0, x0, x0+1); FAIL; ++ ++ // __NR_epoll_create 254 ++ //GO(__NR_epoll_create, "1s 0m"); ++ //SY(__NR_epoll_create, x0); SUCC_OR_FAIL; ++ ++ // __NR_epoll_ctl 255 ++ GO(__NR_epoll_ctl, "4s 1m"); ++ SY(__NR_epoll_ctl, x0, x0, x0, x0); FAIL; ++ ++ // __NR_epoll_wait 256 ++ //GO(__NR_epoll_wait, "4s 1m"); ++ //SY(__NR_epoll_wait, x0, x0, x0+1, x0); FAIL; ++ ++ // __NR_remap_file_pages 257 ++ GO(__NR_remap_file_pages, "n/a"); ++ //SY(__NR_remap_file_pages); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_set_tid_address 258 ++ GO(__NR_set_tid_address, "1s 0m"); ++ SY(__NR_set_tid_address, x0); SUCC_OR_FAILx(ENOSYS); ++ ++ // __NR_timer_create 259 ++ GO(__NR_timer_create, "3s 2m"); ++ SY(__NR_timer_create, x0, x0+1, x0); FAIL; ++ ++ // __NR_timer_settime (__NR_timer_create+1) ++ GO(__NR_timer_settime, "4s 2m"); ++ SY(__NR_timer_settime, x0, x0, x0, x0+1); FAIL; ++ ++ // __NR_timer_gettime (__NR_timer_create+2) ++ GO(__NR_timer_gettime, "2s 1m"); ++ SY(__NR_timer_gettime, x0, x0); FAIL; ++ ++ // __NR_timer_getoverrun (__NR_timer_create+3) ++ GO(__NR_timer_getoverrun, "1s 0m"); ++ SY(__NR_timer_getoverrun, x0); FAIL; ++ ++ // __NR_timer_delete (__NR_timer_create+4) ++ GO(__NR_timer_delete, "1s 0m"); ++ SY(__NR_timer_delete, x0); FAIL; ++ ++ // __NR_clock_settime (__NR_timer_create+5) ++ GO(__NR_clock_settime, "2s 1m"); ++ SY(__NR_clock_settime, x0, x0); FAIL; FAIL; ++ ++ // __NR_clock_gettime (__NR_timer_create+6) ++ GO(__NR_clock_gettime, "2s 1m"); ++ SY(__NR_clock_gettime, x0, x0); FAIL; ++ ++ // __NR_clock_getres (__NR_timer_create+7) ++ GO(__NR_clock_getres, "2s 1m"); ++ SY(__NR_clock_getres, x0+1, x0+1); FAIL; FAIL; ++ ++ // __NR_clock_nanosleep (__NR_timer_create+8) ++ GO(__NR_clock_nanosleep, "n/a"); ++ //SY(__NR_clock_nanosleep); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_statfs64 268 ++ //GO(__NR_statfs64, "3s 2m"); ++ //SY(__NR_statfs64, x0, x0+1, x0); FAIL; ++ ++ // __NR_fstatfs64 269 ++ //GO(__NR_fstatfs64, "3s 1m"); ++ //SY(__NR_fstatfs64, x0, x0+1, x0); FAIL; ++ ++ // __NR_tgkill 270 ++ GO(__NR_tgkill, "n/a"); ++ //SY(__NR_tgkill); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_utimes 271 ++ //GO(__NR_utimes, "2s 2m"); ++ //SY(__NR_utimes, x0, x0+1); FAIL; ++ ++ // __NR_fadvise64_64 272 ++ //GO(__NR_fadvise64_64, "n/a"); ++ //SY(__NR_fadvise64_64); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_vserver 273 ++ //GO(__NR_vserver, "ni"); ++ //SY(__NR_vserver); FAIL; ++ ++ // __NR_mbind 274 ++ GO(__NR_mbind, "n/a"); ++ //SY(__NR_mbind); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_get_mempolicy 275 ++ GO(__NR_get_mempolicy, "n/a"); ++ //SY(__NR_get_mempolicy); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_set_mempolicy 276 ++ GO(__NR_set_mempolicy, "n/a"); ++ //SY(__NR_set_mempolicy); // (Not yet handled by Valgrind) FAIL; ++ ++ // __NR_mq_open 277 ++ GO(__NR_mq_open, "4s 3m"); ++ SY(__NR_mq_open, x0, x0+O_CREAT, x0, x0+1); FAIL; ++ ++ // __NR_mq_unlink (__NR_mq_open+1) ++ GO(__NR_mq_unlink, "1s 1m"); ++ SY(__NR_mq_unlink, x0); FAIL; ++ ++ // __NR_mq_timedsend (__NR_mq_open+2) ++ GO(__NR_mq_timedsend, "5s 2m"); ++ SY(__NR_mq_timedsend, x0, x0, x0+1, x0, x0+1); FAIL; ++ ++ // __NR_mq_timedreceive (__NR_mq_open+3) ++ GO(__NR_mq_timedreceive, "5s 3m"); ++ SY(__NR_mq_timedreceive, x0, x0, x0+1, x0+1, x0+1); FAIL; ++ ++ // __NR_mq_notify (__NR_mq_open+4) ++ GO(__NR_mq_notify, "2s 1m"); ++ SY(__NR_mq_notify, x0, x0+1); FAIL; ++ ++ // __NR_mq_getsetattr (__NR_mq_open+5) ++ GO(__NR_mq_getsetattr, "3s 2m"); ++ SY(__NR_mq_getsetattr, x0, x0+1, x0+1); FAIL; ++ ++ // __NR_sys_kexec_load 283 ++ //GO(__NR_sys_kexec_load, "ni"); ++ //SY(__NR_sys_kexec_load); FAIL; ++ ++ // __NR_epoll_create1 329 ++ GO(__NR_epoll_create1, "1s 0m"); ++ SY(__NR_epoll_create1, x0); SUCC_OR_FAIL; ++ ++ // __NR_process_vm_readv 347 ++ GO(__NR_process_vm_readv, "6s 2m"); ++ SY(__NR_process_vm_readv, x0, x0, x0+1, x0, x0+1, x0); FAIL; ++ ++ // __NR_process_vm_writev 348 ++ GO(__NR_process_vm_writev, "6s 2m"); ++ SY(__NR_process_vm_writev, x0, x0, x0+1, x0, x0+1, x0); FAIL; ++ ++ // no such syscall... ++ GO(9999, "1e"); ++ SY(9999); FAIL; ++ ++ // __NR_exit 1 ++ GO(__NR_exit, "1s 0m"); ++ SY(__NR_exit, x0); FAIL; ++ ++ assert(0); ++} +diff --git a/memcheck/tests/loongarch64-linux/scalar.h b/memcheck/tests/loongarch64-linux/scalar.h +new file mode 100644 +index 000000000..4d86d2c0a +--- /dev/null ++++ b/memcheck/tests/loongarch64-linux/scalar.h +@@ -0,0 +1,62 @@ ++/* This is the loongarch64 variant of memcheck/tests/x86-linux/scalar.h */ ++#include "../../../include/vki/vki-scnums-loongarch64-linux.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++// Since we use vki_unistd.h, we can't include . So we have to ++// declare this ourselves. ++extern long int syscall (long int __sysno, ...) __THROW; ++ ++// Thorough syscall scalar arg checking. Also serves as thorough checking ++// for (very) basic syscall use. Generally not trying to do anything ++// meaningful with the syscalls. ++ ++#define GO(__NR_xxx, s) \ ++ fprintf(stderr, "-----------------------------------------------------\n" \ ++ "%3d:%20s %s\n" \ ++ "-----------------------------------------------------\n", \ ++ __NR_xxx, #__NR_xxx, s); ++ ++#define SY res = syscall ++ ++#define FAIL assert(-1 == res); ++#define SUCC assert(-1 != res); ++#define SUCC_OR_FAIL /* no test */ ++ ++#define FAILx(E) \ ++ do { \ ++ int myerrno = errno; \ ++ if (-1 == res) { \ ++ if (E == myerrno) { \ ++ /* as expected */ \ ++ } else { \ ++ fprintf(stderr, "Expected error %s (%d), got %d\n", #E, E, myerrno); \ ++ exit(1); \ ++ } \ ++ } else { \ ++ fprintf(stderr, "Expected error %s (%d), got success\n", #E, E); \ ++ exit(1); \ ++ } \ ++ } while (0); ++ ++#define SUCC_OR_FAILx(E) \ ++ do { \ ++ int myerrno = errno; \ ++ if (-1 == res) { \ ++ if (E == myerrno) { \ ++ /* as expected */ \ ++ } else { \ ++ fprintf(stderr, "Expected error %s (%d), got %d\n", #E, E, myerrno); \ ++ exit(1); \ ++ } \ ++ } \ ++ } while (0); +diff --git a/memcheck/tests/loongarch64-linux/scalar.stderr.exp b/memcheck/tests/loongarch64-linux/scalar.stderr.exp +new file mode 100644 +index 000000000..a90b90af5 +--- /dev/null ++++ b/memcheck/tests/loongarch64-linux/scalar.stderr.exp +@@ -0,0 +1,2716 @@ ++----------------------------------------------------- ++128:__NR_restart_syscall n/a ++----------------------------------------------------- ++----------------------------------------------------- ++ 93: __NR_exit below ++----------------------------------------------------- ++----------------------------------------------------- ++ 63: __NR_read 1+3s 1m ++----------------------------------------------------- ++Syscall param (syscallno) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:51) ++ ++Syscall param read(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:51) ++ ++Syscall param read(buf) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:51) ++ ++Syscall param read(count) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:51) ++ ++Syscall param read(buf) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:51) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 64: __NR_write 3s 1m ++----------------------------------------------------- ++Syscall param write(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:55) ++ ++Syscall param write(buf) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:55) ++ ++Syscall param write(count) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:55) ++ ++Syscall param write(buf) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:55) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 57: __NR_close 1s 0m ++----------------------------------------------------- ++Syscall param close(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:69) ++ ++----------------------------------------------------- ++221: __NR_execve 3s 1m ++----------------------------------------------------- ++Syscall param execve(filename) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:89) ++ ++Syscall param execve(argv) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:89) ++ ++Syscall param execve(envp) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:89) ++ ++Syscall param execve(filename) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:89) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param execve(argv) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:89) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++221: __NR_execve 3s 1m ++----------------------------------------------------- ++Syscall param execve(filename) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:92) ++ ++Syscall param execve(argv) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:92) ++ ++Syscall param execve(envp) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:92) ++ ++Syscall param execve(filename) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:92) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param execve(argv) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:92) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++221: __NR_execve 4s 2m ++----------------------------------------------------- ++Syscall param execve(filename) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:96) ++ ++Syscall param execve(argv) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:96) ++ ++Syscall param execve(envp) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:96) ++ ++Syscall param execve(filename) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:96) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param execve(argv) points to uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:96) ++ Address 0x........ is on thread 1's stack ++ in frame #1, created by main (scalar.c:28) ++ ++Syscall param execve(argv[0]) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:96) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++221: __NR_execve 4s 2m ++----------------------------------------------------- ++Syscall param execve(filename) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:99) ++ ++Syscall param execve(argv) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:99) ++ ++Syscall param execve(envp) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:99) ++ ++Syscall param execve(filename) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:99) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param execve(envp) points to uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:99) ++ Address 0x........ is on thread 1's stack ++ in frame #1, created by main (scalar.c:28) ++ ++Syscall param execve(envp[i]) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:99) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 49: __NR_chdir 1s 1m ++----------------------------------------------------- ++Syscall param chdir(path) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:103) ++ ++Syscall param chdir(path) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:103) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 62: __NR_lseek 3s 0m ++----------------------------------------------------- ++Syscall param lseek(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:131) ++ ++Syscall param lseek(offset) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:131) ++ ++Syscall param lseek(whence) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:131) ++ ++----------------------------------------------------- ++172: __NR_getpid 0s 0m ++----------------------------------------------------- ++----------------------------------------------------- ++ 40: __NR_mount 5s 3m ++----------------------------------------------------- ++Syscall param mount(source) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:139) ++ ++Syscall param mount(target) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:139) ++ ++Syscall param mount(type) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:139) ++ ++Syscall param mount(flags) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:139) ++ ++Syscall param mount(data) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:139) ++ ++Syscall param mount(target) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:139) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param mount(type) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:139) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++146: __NR_setuid 1s 0m ++----------------------------------------------------- ++Syscall param setuid(uid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:147) ++ ++----------------------------------------------------- ++174: __NR_getuid 0s 0m ++----------------------------------------------------- ++----------------------------------------------------- ++ 81: __NR_sync 0s 0m ++----------------------------------------------------- ++----------------------------------------------------- ++129: __NR_kill 2s 0m ++----------------------------------------------------- ++Syscall param kill(pid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:204) ++ ++Syscall param kill(signal) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:204) ++ ++----------------------------------------------------- ++ 23: __NR_dup 1s 0m ++----------------------------------------------------- ++Syscall param dup(oldfd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:220) ++ ++----------------------------------------------------- ++153: __NR_times 1s 1m ++----------------------------------------------------- ++Syscall param times(buf) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:228) ++ ++Syscall param times(buf) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:228) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++214: __NR_brk 1s 0m ++----------------------------------------------------- ++Syscall param brk(end_data_segment) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:236) ++ ++----------------------------------------------------- ++144: __NR_setgid 1s 0m ++----------------------------------------------------- ++Syscall param setgid(gid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:240) ++ ++----------------------------------------------------- ++176: __NR_getgid 0s 0m ++----------------------------------------------------- ++----------------------------------------------------- ++175: __NR_geteuid 0s 0m ++----------------------------------------------------- ++----------------------------------------------------- ++177: __NR_getegid 0s 0m ++----------------------------------------------------- ++----------------------------------------------------- ++ 89: __NR_acct 1s 1m ++----------------------------------------------------- ++Syscall param acct(filename) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:260) ++ ++Syscall param acct(filename) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:260) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 39: __NR_umount2 2s 1m ++----------------------------------------------------- ++Syscall param umount2(path) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:264) ++ ++Syscall param umount2(flags) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:264) ++ ++Syscall param umount2(path) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:264) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 29: __NR_ioctl 3s 1m ++----------------------------------------------------- ++Syscall param ioctl(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:273) ++ ++Syscall param ioctl(request) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:273) ++ ++Syscall param ioctl(arg) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:273) ++ ++Syscall param ioctl(TCSET{S,SW,SF}) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:273) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 25: __NR_fcntl (GETFD) 2s 0m ++----------------------------------------------------- ++Syscall param fcntl(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:279) ++ ++Syscall param fcntl(cmd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:279) ++ ++----------------------------------------------------- ++ 25: __NR_fcntl (DUPFD) 1s 0m ++----------------------------------------------------- ++Syscall param fcntl(arg) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:285) ++ ++----------------------------------------------------- ++ 25: __NR_fcntl (GETLK) 1s 5m ++----------------------------------------------------- ++Syscall param fcntl(lock) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:291) ++ ++Syscall param fcntl(lock->l_type) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:291) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param fcntl(lock->l_whence) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:291) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param fcntl(lock->l_start) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:291) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param fcntl(lock->l_len) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:291) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param fcntl(lock->l_pid) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:291) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++154: __NR_setpgid 2s 0m ++----------------------------------------------------- ++Syscall param setpgid(pid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:299) ++ ++Syscall param setpgid(pgid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:299) ++ ++----------------------------------------------------- ++166: __NR_umask 1s 0m ++----------------------------------------------------- ++Syscall param umask(mask) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:311) ++ ++----------------------------------------------------- ++ 51: __NR_chroot 1s 1m ++----------------------------------------------------- ++Syscall param chroot(path) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:315) ++ ++Syscall param chroot(path) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:315) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++173: __NR_getppid 0s 0m ++----------------------------------------------------- ++----------------------------------------------------- ++157: __NR_setsid 0s 0m ++----------------------------------------------------- ++----------------------------------------------------- ++145: __NR_setreuid 2s 0m ++----------------------------------------------------- ++Syscall param setreuid(ruid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:351) ++ ++Syscall param setreuid(euid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:351) ++ ++----------------------------------------------------- ++143: __NR_setregid 2s 0m ++----------------------------------------------------- ++Syscall param setregid(rgid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:355) ++ ++Syscall param setregid(egid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:355) ++ ++----------------------------------------------------- ++161: __NR_sethostname n/a ++----------------------------------------------------- ++----------------------------------------------------- ++165: __NR_getrusage 2s 1m ++----------------------------------------------------- ++Syscall param getrusage(who) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:380) ++ ++Syscall param getrusage(usage) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:380) ++ ++Syscall param getrusage(usage) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:380) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++169: __NR_gettimeofday 2s 2m ++----------------------------------------------------- ++Syscall param gettimeofday(tv) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:384) ++ ++Syscall param gettimeofday(tz) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:384) ++ ++Syscall param gettimeofday(tv) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:384) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param gettimeofday(tz) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:384) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++170: __NR_settimeofday 2s 2m ++----------------------------------------------------- ++Syscall param settimeofday(tv) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:388) ++ ++Syscall param settimeofday(tz) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:388) ++ ++Syscall param settimeofday(tv) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:388) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param settimeofday(tz) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:388) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++158: __NR_getgroups 2s 1m ++----------------------------------------------------- ++Syscall param getgroups(size) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:392) ++ ++Syscall param getgroups(list) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:392) ++ ++Syscall param getgroups(list) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:392) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++159: __NR_setgroups 2s 1m ++----------------------------------------------------- ++Syscall param setgroups(size) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:396) ++ ++Syscall param setgroups(list) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:396) ++ ++Syscall param setgroups(list) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:396) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++224: __NR_swapon n/a ++----------------------------------------------------- ++----------------------------------------------------- ++142: __NR_reboot n/a ++----------------------------------------------------- ++----------------------------------------------------- ++222: __NR_mmap 1s 1m ++----------------------------------------------------- ++Syscall param mmap(addr) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:437) ++ ++----------------------------------------------------- ++215: __NR_munmap 2s 0m ++----------------------------------------------------- ++Syscall param munmap(start) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:442) ++ ++Syscall param munmap(length) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:442) ++ ++----------------------------------------------------- ++ 45: __NR_truncate 2s 1m ++----------------------------------------------------- ++Syscall param truncate(path) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:446) ++ ++ ++More than 100 errors detected. Subsequent errors ++will still be recorded, but in less detail than before. ++Syscall param truncate(length) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:446) ++ ++Syscall param truncate(path) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:446) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 46: __NR_ftruncate 2s 0m ++----------------------------------------------------- ++Syscall param ftruncate(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:450) ++ ++Syscall param ftruncate(length) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:450) ++ ++----------------------------------------------------- ++ 52: __NR_fchmod 2s 0m ++----------------------------------------------------- ++Syscall param fchmod(fildes) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:454) ++ ++Syscall param fchmod(mode) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:454) ++ ++----------------------------------------------------- ++ 55: __NR_fchown 3s 0m ++----------------------------------------------------- ++Syscall param fchown(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:458) ++ ++Syscall param fchown(owner) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:458) ++ ++Syscall param fchown(group) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:458) ++ ++----------------------------------------------------- ++141: __NR_getpriority 2s 0m ++----------------------------------------------------- ++Syscall param getpriority(which) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:462) ++ ++Syscall param getpriority(who) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:462) ++ ++----------------------------------------------------- ++140: __NR_setpriority 3s 0m ++----------------------------------------------------- ++Syscall param setpriority(which) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:466) ++ ++Syscall param setpriority(who) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:466) ++ ++Syscall param setpriority(prio) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:466) ++ ++----------------------------------------------------- ++ 43: __NR_statfs 2s 2m ++----------------------------------------------------- ++Syscall param statfs(path) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:474) ++ ++Syscall param statfs(buf) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:474) ++ ++Syscall param statfs(path) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:474) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param statfs(buf) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:474) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 44: __NR_fstatfs 2s 1m ++----------------------------------------------------- ++Syscall param fstatfs(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:478) ++ ++Syscall param fstatfs(buf) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:478) ++ ++Syscall param fstatfs(buf) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:478) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++116: __NR_syslog 3s 1m ++----------------------------------------------------- ++Syscall param syslog(type) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:490) ++ ++Syscall param syslog(bufp) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:490) ++ ++Syscall param syslog(len) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:490) ++ ++Syscall param syslog(bufp) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:490) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++103: __NR_setitimer 3s 2m ++----------------------------------------------------- ++Syscall param setitimer(which) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:494) ++ ++Syscall param setitimer(value) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:494) ++ ++Syscall param setitimer(ovalue) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:494) ++ ++Syscall param setitimer(&value->it_interval) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:494) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param setitimer(&value->it_value) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:494) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param setitimer(&ovalue->it_interval) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:494) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param setitimer(&ovalue->it_value) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:494) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++102: __NR_getitimer 2s 1m ++----------------------------------------------------- ++Syscall param getitimer(which) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:498) ++ ++Syscall param getitimer(value) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:498) ++ ++Syscall param getitimer(&value->it_interval) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:498) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param getitimer(&value->it_value) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:498) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 58: __NR_vhangup 0s 0m ++----------------------------------------------------- ++----------------------------------------------------- ++260: __NR_wait4 4s 2m ++----------------------------------------------------- ++Syscall param wait4(pid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:534) ++ ++Syscall param wait4(status) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:534) ++ ++Syscall param wait4(options) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:534) ++ ++Syscall param wait4(rusage) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:534) ++ ++Syscall param wait4(status) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:534) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param wait4(rusage) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:534) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++225: __NR_swapoff n/a ++----------------------------------------------------- ++----------------------------------------------------- ++179: __NR_sysinfo 1s 1m ++----------------------------------------------------- ++Syscall param sysinfo(info) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:542) ++ ++Syscall param sysinfo(info) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:542) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 82: __NR_fsync 1s 0m ++----------------------------------------------------- ++Syscall param fsync(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:553) ++ ++----------------------------------------------------- ++220: __NR_clone 5s 3m ++----------------------------------------------------- ++Syscall param clone(flags) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:564) ++ ++Syscall param clone(child_stack) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:564) ++ ++Syscall param clone(parent_tidptr) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:564) ++ ++Syscall param clone(parent_tidptr) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:564) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param clone(tlsinfo) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:564) ++ ++Syscall param clone(child_tidptr) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:564) ++ ++Syscall param clone(child_tidptr) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:564) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++162: __NR_setdomainname n/a ++----------------------------------------------------- ++----------------------------------------------------- ++160: __NR_uname 1s 1m ++----------------------------------------------------- ++Syscall param uname(buf) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:575) ++ ++Syscall param uname(buf) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:575) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++171: __NR_adjtimex XXX ++----------------------------------------------------- ++----------------------------------------------------- ++226: __NR_mprotect 3s 0m ++----------------------------------------------------- ++Syscall param mprotect(addr) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:588) ++ ++Syscall param mprotect(len) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:588) ++ ++Syscall param mprotect(prot) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:588) ++ ++----------------------------------------------------- ++105: __NR_init_module 3s 2m ++----------------------------------------------------- ++Syscall param init_module(umod) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:600) ++ ++Syscall param init_module(len) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:600) ++ ++Syscall param init_module(uargs) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:600) ++ ++Syscall param init_module(umod) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:600) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param init_module(uargs) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:600) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++106: __NR_delete_module n/a ++----------------------------------------------------- ++----------------------------------------------------- ++ 60: __NR_quotactl 4s 1m ++----------------------------------------------------- ++Syscall param quotactl(cmd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:612) ++ ++Syscall param quotactl(special) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:612) ++ ++Syscall param quotactl(id) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:612) ++ ++Syscall param quotactl(addr) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:612) ++ ++Syscall param quotactl(special) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:612) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++155: __NR_getpgid 1s 0m ++----------------------------------------------------- ++Syscall param getpgid(pid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:616) ++ ++----------------------------------------------------- ++ 50: __NR_fchdir 1s 0m ++----------------------------------------------------- ++Syscall param fchdir(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:620) ++ ++----------------------------------------------------- ++ 92: __NR_personality 1s 0m ++----------------------------------------------------- ++Syscall param personality(persona) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:632) ++ ++----------------------------------------------------- ++151: __NR_setfsuid 1s 0m ++----------------------------------------------------- ++Syscall param setfsuid(uid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:640) ++ ++----------------------------------------------------- ++152: __NR_setfsgid 1s 0m ++----------------------------------------------------- ++Syscall param setfsgid(gid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:644) ++ ++----------------------------------------------------- ++ 32: __NR_flock 2s 0m ++----------------------------------------------------- ++Syscall param flock(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:660) ++ ++Syscall param flock(operation) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:660) ++ ++----------------------------------------------------- ++227: __NR_msync 3s 1m ++----------------------------------------------------- ++Syscall param msync(start) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:664) ++ ++Syscall param msync(length) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:664) ++ ++Syscall param msync(flags) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:664) ++ ++Syscall param msync(start) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:664) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 65: __NR_readv 3s 1m ++----------------------------------------------------- ++Syscall param readv(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:668) ++ ++Syscall param readv(vector) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:668) ++ ++Syscall param readv(count) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:668) ++ ++Syscall param readv(vector) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:668) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 66: __NR_writev 3s 1m ++----------------------------------------------------- ++Syscall param writev(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:672) ++ ++Syscall param writev(vector) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:672) ++ ++Syscall param writev(count) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:672) ++ ++Syscall param writev(vector) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:672) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++156: __NR_getsid 1s 0m ++----------------------------------------------------- ++Syscall param getsid(pid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:676) ++ ++----------------------------------------------------- ++ 83: __NR_fdatasync 1s 0m ++----------------------------------------------------- ++Syscall param fdatasync(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:680) ++ ++----------------------------------------------------- ++228: __NR_mlock 2s 0m ++----------------------------------------------------- ++Syscall param mlock(addr) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:688) ++ ++Syscall param mlock(len) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:688) ++ ++----------------------------------------------------- ++229: __NR_munlock 2s 0m ++----------------------------------------------------- ++Syscall param munlock(addr) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:692) ++ ++Syscall param munlock(len) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:692) ++ ++----------------------------------------------------- ++230: __NR_mlockall 1s 0m ++----------------------------------------------------- ++Syscall param mlockall(flags) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:696) ++ ++----------------------------------------------------- ++231: __NR_munlockall 0s 0m ++----------------------------------------------------- ++----------------------------------------------------- ++118: __NR_sched_setparam 2s 1m ++----------------------------------------------------- ++Syscall param sched_setparam(pid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:704) ++ ++Syscall param sched_setparam(p) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:704) ++ ++Syscall param sched_setparam(p) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:704) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++121: __NR_sched_getparam 2s 1m ++----------------------------------------------------- ++Syscall param sched_getparam(pid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:708) ++ ++Syscall param sched_getparam(p) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:708) ++ ++Syscall param sched_getparam(p) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:708) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++119:__NR_sched_setscheduler 3s 1m ++----------------------------------------------------- ++Syscall param sched_setscheduler(pid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:712) ++ ++Syscall param sched_setscheduler(policy) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:712) ++ ++Syscall param sched_setscheduler(p) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:712) ++ ++Syscall param sched_setscheduler(p) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:712) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++120:__NR_sched_getscheduler 1s 0m ++----------------------------------------------------- ++Syscall param sched_getscheduler(pid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:716) ++ ++----------------------------------------------------- ++124: __NR_sched_yield 0s 0m ++----------------------------------------------------- ++----------------------------------------------------- ++125:__NR_sched_get_priority_max 1s 0m ++----------------------------------------------------- ++Syscall param sched_get_priority_max(policy) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:724) ++ ++----------------------------------------------------- ++126:__NR_sched_get_priority_min 1s 0m ++----------------------------------------------------- ++Syscall param sched_get_priority_min(policy) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:728) ++ ++----------------------------------------------------- ++127:__NR_sched_rr_get_interval n/a ++----------------------------------------------------- ++----------------------------------------------------- ++101: __NR_nanosleep 2s 2m ++----------------------------------------------------- ++Syscall param nanosleep(req) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:736) ++ ++Syscall param nanosleep(rem) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:736) ++ ++Syscall param nanosleep(req) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:736) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param nanosleep(rem) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:736) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++216: __NR_mremap 5s 0m ++----------------------------------------------------- ++Syscall param mremap(old_addr) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:740) ++ ++Syscall param mremap(old_size) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:740) ++ ++Syscall param mremap(new_size) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:740) ++ ++Syscall param mremap(flags) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:740) ++ ++Syscall param mremap(new_addr) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:740) ++ ++----------------------------------------------------- ++147: __NR_setresuid 3s 0m ++----------------------------------------------------- ++Syscall param setresuid(ruid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:744) ++ ++Syscall param setresuid(euid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:744) ++ ++Syscall param setresuid(suid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:744) ++ ++----------------------------------------------------- ++148: __NR_getresuid 3s 3m ++----------------------------------------------------- ++Syscall param getresuid(ruid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:748) ++ ++Syscall param getresuid(euid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:748) ++ ++Syscall param getresuid(suid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:748) ++ ++Syscall param getresuid(ruid) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:748) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param getresuid(euid) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:748) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param getresuid(suid) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:748) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 42: __NR_nfsservctl n/a ++----------------------------------------------------- ++----------------------------------------------------- ++149: __NR_setresgid 3s 0m ++----------------------------------------------------- ++Syscall param setresgid(rgid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:768) ++ ++Syscall param setresgid(egid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:768) ++ ++Syscall param setresgid(sgid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:768) ++ ++----------------------------------------------------- ++150: __NR_getresgid 3s 3m ++----------------------------------------------------- ++Syscall param getresgid(rgid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:772) ++ ++Syscall param getresgid(egid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:772) ++ ++Syscall param getresgid(sgid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:772) ++ ++Syscall param getresgid(rgid) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:772) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param getresgid(egid) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:772) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param getresgid(sgid) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:772) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++167: __NR_prctl 5s 0m ++----------------------------------------------------- ++Syscall param prctl(option) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:777) ++ ++Syscall param prctl(arg2) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:777) ++ ++Syscall param prctl(arg3) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:777) ++ ++Syscall param prctl(arg4) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:777) ++ ++Syscall param prctl(arg5) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:777) ++ ++----------------------------------------------------- ++167: __NR_prctl 2s 0m ++----------------------------------------------------- ++Syscall param prctl(option) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:782) ++ ++Syscall param prctl(set-name) points to uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:782) ++ Address 0x........ is on thread 1's stack ++ in frame #1, created by main (scalar.c:28) ++ ++----------------------------------------------------- ++167: __NR_prctl 1s 0m ++----------------------------------------------------- ++Syscall param prctl(option) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:787) ++ ++----------------------------------------------------- ++139: __NR_rt_sigreturn n/a ++----------------------------------------------------- ++----------------------------------------------------- ++134: __NR_rt_sigaction 4s 4m ++----------------------------------------------------- ++Syscall param rt_sigaction(signum) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:795) ++ ++Syscall param rt_sigaction(act) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:795) ++ ++Syscall param rt_sigaction(oldact) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:795) ++ ++Syscall param rt_sigaction(sigsetsize) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:795) ++ ++Syscall param rt_sigaction(act->sa_handler) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:795) ++ Address 0x........ is 8 bytes after a block of size 8 alloc'd ++ at 0x........: malloc (vg_replace_malloc.c:...) ++ by 0x........: main (scalar.c:30) ++ ++Syscall param rt_sigaction(act->sa_mask) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:795) ++ Address 0x........ is 16 bytes after a block of size 16 in arena "client" ++ ++Syscall param rt_sigaction(act->sa_flags) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:795) ++ Address 0x........ is 16 bytes after a block of size 8 alloc'd ++ at 0x........: malloc (vg_replace_malloc.c:...) ++ by 0x........: main (scalar.c:30) ++ ++Syscall param rt_sigaction(oldact) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:795) ++ Address 0x........ is 8 bytes after a block of size 8 alloc'd ++ at 0x........: malloc (vg_replace_malloc.c:...) ++ by 0x........: main (scalar.c:30) ++ ++----------------------------------------------------- ++135: __NR_rt_sigprocmask 4s 2m ++----------------------------------------------------- ++Syscall param rt_sigprocmask(how) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:799) ++ ++Syscall param rt_sigprocmask(set) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:799) ++ ++Syscall param rt_sigprocmask(oldset) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:799) ++ ++Syscall param rt_sigprocmask(sigsetsize) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:799) ++ ++Syscall param rt_sigprocmask(set) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:799) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param rt_sigprocmask(oldset) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:799) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++136: __NR_rt_sigpending 2s 1m ++----------------------------------------------------- ++Syscall param rt_sigpending(set) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:803) ++ ++Syscall param rt_sigpending(sigsetsize) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:803) ++ ++Syscall param rt_sigpending(set) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:803) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++137:__NR_rt_sigtimedwait 4s 3m ++----------------------------------------------------- ++Syscall param rt_sigtimedwait(set) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:807) ++ ++Syscall param rt_sigtimedwait(info) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:807) ++ ++Syscall param rt_sigtimedwait(timeout) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:807) ++ ++Syscall param rt_sigtimedwait(sigsetsize) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:807) ++ ++Syscall param rt_sigtimedwait(set) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:807) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param rt_sigtimedwait(info) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:807) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param rt_sigtimedwait(timeout) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:807) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++138:__NR_rt_sigqueueinfo 3s 1m ++----------------------------------------------------- ++Syscall param rt_sigqueueinfo(pid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:811) ++ ++Syscall param rt_sigqueueinfo(sig) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:811) ++ ++Syscall param rt_sigqueueinfo(uinfo) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:811) ++ ++Syscall param rt_sigqueueinfo(uinfo) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:811) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++133: __NR_rt_sigsuspend 2s 1m ++----------------------------------------------------- ++Syscall param rt_sigsuspend(mask) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:815) ++ ++Syscall param rt_sigsuspend(size) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:815) ++ ++Syscall param rt_sigsuspend(mask) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:815) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 67: __NR_pread64 5s 1m ++----------------------------------------------------- ++Syscall param pread64(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:819) ++ ++Syscall param pread64(buf) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:819) ++ ++Syscall param pread64(count) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:819) ++ ++Syscall param pread64(offset) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:819) ++ ++Syscall param pread64(buf) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:819) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 68: __NR_pwrite64 5s 1m ++----------------------------------------------------- ++Syscall param pwrite64(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:823) ++ ++Syscall param pwrite64(buf) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:823) ++ ++Syscall param pwrite64(count) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:823) ++ ++Syscall param pwrite64(offset) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:823) ++ ++Syscall param pwrite64(buf) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:823) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 17: __NR_getcwd 2s 1m ++----------------------------------------------------- ++Syscall param getcwd(buf) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:831) ++ ++Syscall param getcwd(size) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:831) ++ ++Syscall param getcwd(buf) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:831) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 90: __NR_capget 2s 2m ++----------------------------------------------------- ++Syscall param capget(header) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:835) ++ ++Syscall param capget(data) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:835) ++ ++Syscall param capget(header) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:835) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param capget(data) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:835) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 91: __NR_capset 2s 2m ++----------------------------------------------------- ++Syscall param capset(header) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:839) ++ ++Syscall param capset(data) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:839) ++ ++Syscall param capset(header) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:839) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param capset(data) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:839) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++132: __NR_sigaltstack 2s 2m ++----------------------------------------------------- ++Syscall param sigaltstack(ss) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:853) ++ ++Syscall param sigaltstack(oss) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:853) ++ ++Syscall param sigaltstack(ss) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:853) ++ Address 0x........ is on thread 1's stack ++ in frame #1, created by main (scalar.c:28) ++ ++Syscall param sigaltstack(oss) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:853) ++ Address 0x........ is on thread 1's stack ++ in frame #1, created by main (scalar.c:28) ++ ++----------------------------------------------------- ++ 71: __NR_sendfile 4s 1m ++----------------------------------------------------- ++Syscall param sendfile(out_fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:858) ++ ++Syscall param sendfile(in_fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:858) ++ ++Syscall param sendfile(offset) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:858) ++ ++Syscall param sendfile(count) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:858) ++ ++Syscall param sendfile(offset) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:858) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 41: __NR_pivot_root n/a ++----------------------------------------------------- ++----------------------------------------------------- ++232: __NR_mincore 3s 1m ++----------------------------------------------------- ++Syscall param mincore(start) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:986) ++ ++Syscall param mincore(length) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:986) ++ ++Syscall param mincore(vec) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:986) ++ ++Syscall param mincore(vec) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:986) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++233: __NR_madvise 3s 0m ++----------------------------------------------------- ++Syscall param madvise(start) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:990) ++ ++Syscall param madvise(length) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:990) ++ ++Syscall param madvise(advice) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:990) ++ ++----------------------------------------------------- ++ 61: __NR_getdents64 3s 1m ++----------------------------------------------------- ++Syscall param getdents64(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:994) ++ ++Syscall param getdents64(dirp) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:994) ++ ++Syscall param getdents64(count) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:994) ++ ++Syscall param getdents64(dirp) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:994) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++222: 222 ni ++----------------------------------------------------- ++----------------------------------------------------- ++223: 223 ni ++----------------------------------------------------- ++----------------------------------------------------- ++178: __NR_gettid n/a ++----------------------------------------------------- ++----------------------------------------------------- ++213: __NR_readahead n/a ++----------------------------------------------------- ++----------------------------------------------------- ++ 5: __NR_setxattr 5s 3m ++----------------------------------------------------- ++Syscall param setxattr(path) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1031) ++ ++Syscall param setxattr(name) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1031) ++ ++Syscall param setxattr(value) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1031) ++ ++Syscall param setxattr(size) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1031) ++ ++Syscall param setxattr(flags) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1031) ++ ++Syscall param setxattr(path) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1031) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param setxattr(name) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1031) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param setxattr(value) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1031) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 6: __NR_lsetxattr 5s 3m ++----------------------------------------------------- ++Syscall param lsetxattr(path) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1035) ++ ++Syscall param lsetxattr(name) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1035) ++ ++Syscall param lsetxattr(value) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1035) ++ ++Syscall param lsetxattr(size) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1035) ++ ++Syscall param lsetxattr(flags) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1035) ++ ++Syscall param lsetxattr(path) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1035) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param lsetxattr(name) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1035) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param lsetxattr(value) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1035) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 7: __NR_fsetxattr 5s 2m ++----------------------------------------------------- ++Syscall param fsetxattr(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1039) ++ ++Syscall param fsetxattr(name) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1039) ++ ++Syscall param fsetxattr(value) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1039) ++ ++Syscall param fsetxattr(size) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1039) ++ ++Syscall param fsetxattr(flags) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1039) ++ ++Syscall param fsetxattr(name) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1039) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param fsetxattr(value) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1039) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 8: __NR_getxattr 4s 3m ++----------------------------------------------------- ++Syscall param getxattr(path) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1043) ++ ++Syscall param getxattr(name) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1043) ++ ++Syscall param getxattr(value) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1043) ++ ++Syscall param getxattr(size) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1043) ++ ++Syscall param getxattr(path) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1043) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param getxattr(name) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1043) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param getxattr(value) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1043) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 9: __NR_lgetxattr 4s 3m ++----------------------------------------------------- ++Syscall param lgetxattr(path) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1047) ++ ++Syscall param lgetxattr(name) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1047) ++ ++Syscall param lgetxattr(value) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1047) ++ ++Syscall param lgetxattr(size) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1047) ++ ++Syscall param lgetxattr(path) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1047) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param lgetxattr(name) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1047) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param lgetxattr(value) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1047) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 10: __NR_fgetxattr 4s 2m ++----------------------------------------------------- ++Syscall param fgetxattr(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1051) ++ ++Syscall param fgetxattr(name) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1051) ++ ++Syscall param fgetxattr(value) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1051) ++ ++Syscall param fgetxattr(size) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1051) ++ ++Syscall param fgetxattr(name) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1051) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param fgetxattr(value) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1051) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 11: __NR_listxattr 3s 2m ++----------------------------------------------------- ++Syscall param listxattr(path) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1055) ++ ++Syscall param listxattr(list) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1055) ++ ++Syscall param listxattr(size) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1055) ++ ++Syscall param listxattr(path) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1055) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param listxattr(list) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1055) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 12: __NR_llistxattr 3s 2m ++----------------------------------------------------- ++Syscall param llistxattr(path) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1059) ++ ++Syscall param llistxattr(list) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1059) ++ ++Syscall param llistxattr(size) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1059) ++ ++Syscall param llistxattr(path) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1059) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param llistxattr(list) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1059) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 13: __NR_flistxattr 3s 1m ++----------------------------------------------------- ++Syscall param flistxattr(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1063) ++ ++Syscall param flistxattr(list) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1063) ++ ++Syscall param flistxattr(size) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1063) ++ ++Syscall param flistxattr(list) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1063) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 14: __NR_removexattr 2s 2m ++----------------------------------------------------- ++Syscall param removexattr(path) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1067) ++ ++Syscall param removexattr(name) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1067) ++ ++Syscall param removexattr(path) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1067) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param removexattr(name) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1067) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 15: __NR_lremovexattr 2s 2m ++----------------------------------------------------- ++Syscall param lremovexattr(path) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1071) ++ ++Syscall param lremovexattr(name) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1071) ++ ++Syscall param lremovexattr(path) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1071) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param lremovexattr(name) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1071) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 16: __NR_fremovexattr 2s 1m ++----------------------------------------------------- ++Syscall param fremovexattr(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1075) ++ ++Syscall param fremovexattr(name) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1075) ++ ++Syscall param fremovexattr(name) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1075) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++130: __NR_tkill n/a ++----------------------------------------------------- ++----------------------------------------------------- ++ 98: __NR_futex 4s 2m ++----------------------------------------------------- ++Syscall param futex(futex) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1091) ++ ++Syscall param futex(op) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1091) ++ ++Syscall param futex(val) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1091) ++ ++Syscall param futex(utime) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1091) ++ ++Syscall param futex(futex) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1091) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param futex(timeout) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1091) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++122:__NR_sched_setaffinity 3s 1m ++----------------------------------------------------- ++Syscall param sched_setaffinity(pid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1095) ++ ++Syscall param sched_setaffinity(len) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1095) ++ ++Syscall param sched_setaffinity(mask) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1095) ++ ++Syscall param sched_setaffinity(mask) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1095) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++123:__NR_sched_getaffinity 3s 1m ++----------------------------------------------------- ++Syscall param sched_getaffinity(pid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1099) ++ ++Syscall param sched_getaffinity(len) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1099) ++ ++Syscall param sched_getaffinity(mask) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1099) ++ ++Syscall param sched_getaffinity(mask) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1099) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 0: __NR_io_setup 2s 1m ++----------------------------------------------------- ++Syscall param io_setup(nr_events) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1111) ++ ++Syscall param io_setup(ctxp) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1111) ++ ++Syscall param io_setup(ctxp) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1111) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 1: __NR_io_destroy 1s 0m ++----------------------------------------------------- ++Syscall param io_destroy(ctx) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1124) ++ ++----------------------------------------------------- ++ 4: __NR_io_getevents 5s 2m ++----------------------------------------------------- ++Syscall param io_getevents(ctx_id) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1129) ++ ++Syscall param io_getevents(min_nr) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1129) ++ ++Syscall param io_getevents(nr) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1129) ++ ++Syscall param io_getevents(events) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1129) ++ ++Syscall param io_getevents(timeout) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1129) ++ ++Syscall param io_getevents(events) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1129) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param io_getevents(timeout) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1129) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 2: __NR_io_submit 3s 1m ++----------------------------------------------------- ++Syscall param io_submit(ctx_id) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1133) ++ ++Syscall param io_submit(nr) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1133) ++ ++Syscall param io_submit(iocbpp) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1133) ++ ++Syscall param io_submit(iocbpp) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1133) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 3: __NR_io_cancel 3s 2m ++----------------------------------------------------- ++Syscall param io_cancel(ctx_id) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1137) ++ ++Syscall param io_cancel(iocb) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1137) ++ ++Syscall param io_cancel(result) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1137) ++ ++Syscall param io_cancel(iocb) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1137) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param io_cancel(result) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1137) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++223: __NR_fadvise64 n/a ++----------------------------------------------------- ++----------------------------------------------------- ++251: 251 ni ++----------------------------------------------------- ++WARNING: unhandled loongarch64-linux syscall: 251 ++You may be able to write your own handler. ++Read the file README_MISSING_SYSCALL_OR_IOCTL. ++Nevertheless we consider this a bug. Please report ++it at http://valgrind.org/support/bug_reports.html. ++----------------------------------------------------- ++ 94: __NR_exit_group other ++----------------------------------------------------- ++----------------------------------------------------- ++ 18: __NR_lookup_dcookie 4s 1m ++----------------------------------------------------- ++Syscall param lookup_dcookie(cookie) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1153) ++ ++Syscall param lookup_dcookie(buf) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1153) ++ ++Syscall param lookup_dcookie(len) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1153) ++ ++----------------------------------------------------- ++ 21: __NR_epoll_ctl 4s 1m ++----------------------------------------------------- ++Syscall param epoll_ctl(epfd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1161) ++ ++Syscall param epoll_ctl(op) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1161) ++ ++Syscall param epoll_ctl(fd) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1161) ++ ++Syscall param epoll_ctl(event) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1161) ++ ++Syscall param epoll_ctl(event) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1161) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++234:__NR_remap_file_pages n/a ++----------------------------------------------------- ++----------------------------------------------------- ++ 96:__NR_set_tid_address 1s 0m ++----------------------------------------------------- ++Syscall param set_tid_address(tidptr) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1173) ++ ++----------------------------------------------------- ++107: __NR_timer_create 3s 2m ++----------------------------------------------------- ++Syscall param timer_create(clockid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1177) ++ ++Syscall param timer_create(evp) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1177) ++ ++Syscall param timer_create(timerid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1177) ++ ++Syscall param timer_create(evp.sigev_value) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1177) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param timer_create(evp.sigev_signo) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1177) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param timer_create(evp.sigev_notify) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1177) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param timer_create(timerid) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1177) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++110: __NR_timer_settime 4s 2m ++----------------------------------------------------- ++Syscall param timer_settime(timerid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1181) ++ ++Syscall param timer_settime(flags) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1181) ++ ++Syscall param timer_settime(value) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1181) ++ ++Syscall param timer_settime(ovalue) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1181) ++ ++Syscall param timer_settime(value) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1181) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param timer_settime(ovalue) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1181) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++108: __NR_timer_gettime 2s 1m ++----------------------------------------------------- ++Syscall param timer_gettime(timerid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1185) ++ ++Syscall param timer_gettime(value) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1185) ++ ++Syscall param timer_gettime(value) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1185) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++109:__NR_timer_getoverrun 1s 0m ++----------------------------------------------------- ++Syscall param timer_getoverrun(timerid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1189) ++ ++----------------------------------------------------- ++111: __NR_timer_delete 1s 0m ++----------------------------------------------------- ++Syscall param timer_delete(timerid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1193) ++ ++----------------------------------------------------- ++112: __NR_clock_settime 2s 1m ++----------------------------------------------------- ++Syscall param clock_settime(clk_id) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1197) ++ ++Syscall param clock_settime(tp) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1197) ++ ++Syscall param clock_settime(tp) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1197) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++113: __NR_clock_gettime 2s 1m ++----------------------------------------------------- ++Syscall param clock_gettime(clk_id) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1201) ++ ++Syscall param clock_gettime(tp) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1201) ++ ++Syscall param clock_gettime(tp) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1201) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++114: __NR_clock_getres 2s 1m ++----------------------------------------------------- ++Syscall param clock_getres(clk_id) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1205) ++ ++Syscall param clock_getres(res) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1205) ++ ++Syscall param clock_getres(res) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1205) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++115:__NR_clock_nanosleep n/a ++----------------------------------------------------- ++----------------------------------------------------- ++131: __NR_tgkill n/a ++----------------------------------------------------- ++----------------------------------------------------- ++235: __NR_mbind n/a ++----------------------------------------------------- ++----------------------------------------------------- ++236: __NR_get_mempolicy n/a ++----------------------------------------------------- ++----------------------------------------------------- ++237: __NR_set_mempolicy n/a ++----------------------------------------------------- ++----------------------------------------------------- ++180: __NR_mq_open 4s 3m ++----------------------------------------------------- ++Syscall param mq_open(name) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1249) ++ ++Syscall param mq_open(oflag) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1249) ++ ++Syscall param mq_open(mode) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1249) ++ ++Syscall param mq_open(attr) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1249) ++ ++Syscall param mq_open(name) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1249) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param mq_open(attr->mq_maxmsg) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1249) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param mq_open(attr->mq_msgsize) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1249) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++181: __NR_mq_unlink 1s 1m ++----------------------------------------------------- ++Syscall param mq_unlink(name) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1253) ++ ++Syscall param mq_unlink(name) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1253) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++182: __NR_mq_timedsend 5s 2m ++----------------------------------------------------- ++Syscall param mq_timedsend(mqdes) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1257) ++ ++Syscall param mq_timedsend(msg_ptr) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1257) ++ ++Syscall param mq_timedsend(msg_len) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1257) ++ ++Syscall param mq_timedsend(msg_prio) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1257) ++ ++Syscall param mq_timedsend(abs_timeout) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1257) ++ ++Syscall param mq_timedsend(msg_ptr) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1257) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param mq_timedsend(abs_timeout) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1257) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++183:__NR_mq_timedreceive 5s 3m ++----------------------------------------------------- ++Syscall param mq_timedreceive(mqdes) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1261) ++ ++Syscall param mq_timedreceive(msg_ptr) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1261) ++ ++Syscall param mq_timedreceive(msg_len) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1261) ++ ++Syscall param mq_timedreceive(msg_prio) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1261) ++ ++Syscall param mq_timedreceive(abs_timeout) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1261) ++ ++Syscall param mq_timedreceive(msg_ptr) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1261) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param mq_timedreceive(msg_prio) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1261) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param mq_timedreceive(abs_timeout) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1261) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++184: __NR_mq_notify 2s 1m ++----------------------------------------------------- ++Syscall param mq_notify(mqdes) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1265) ++ ++Syscall param mq_notify(notification) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1265) ++ ++Syscall param mq_notify(notification) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1265) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++185: __NR_mq_getsetattr 3s 2m ++----------------------------------------------------- ++Syscall param mq_getsetattr(mqdes) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1269) ++ ++Syscall param mq_getsetattr(mqstat) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1269) ++ ++Syscall param mq_getsetattr(omqstat) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1269) ++ ++Syscall param mq_getsetattr(mqstat->mq_flags) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1269) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param mq_getsetattr(omqstat) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1269) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++ 20: __NR_epoll_create1 1s 0m ++----------------------------------------------------- ++Syscall param epoll_create1(flags) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1277) ++ ++----------------------------------------------------- ++270:__NR_process_vm_readv 6s 2m ++----------------------------------------------------- ++Syscall param process_vm_readv(pid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1281) ++ ++Syscall param process_vm_readv(lvec) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1281) ++ ++Syscall param process_vm_readv(liovcnt) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1281) ++ ++Syscall param process_vm_readv(rvec) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1281) ++ ++Syscall param process_vm_readv(riovcnt) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1281) ++ ++Syscall param process_vm_readv(flags) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1281) ++ ++Syscall param process_vm_readv(lvec) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1281) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param process_vm_readv(rvec) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1281) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++271:__NR_process_vm_writev 6s 2m ++----------------------------------------------------- ++Syscall param process_vm_writev(pid) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1285) ++ ++Syscall param process_vm_writev(lvec) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1285) ++ ++Syscall param process_vm_writev(liovcnt) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1285) ++ ++Syscall param process_vm_writev(rvec) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1285) ++ ++Syscall param process_vm_writev(riovcnt) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1285) ++ ++Syscall param process_vm_writev(flags) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1285) ++ ++Syscall param process_vm_writev(lvec) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1285) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++Syscall param process_vm_writev(rvec) points to unaddressable byte(s) ++ ... ++ by 0x........: main (scalar.c:1285) ++ Address 0x........ is not stack'd, malloc'd or (recently) free'd ++ ++----------------------------------------------------- ++9999: 9999 1e ++----------------------------------------------------- ++WARNING: unhandled loongarch64-linux syscall: 9999 ++You may be able to write your own handler. ++Read the file README_MISSING_SYSCALL_OR_IOCTL. ++Nevertheless we consider this a bug. Please report ++it at http://valgrind.org/support/bug_reports.html. ++----------------------------------------------------- ++ 93: __NR_exit 1s 0m ++----------------------------------------------------- ++Syscall param exit(status) contains uninitialised byte(s) ++ ... ++ by 0x........: main (scalar.c:1293) ++ +diff --git a/memcheck/tests/loongarch64-linux/scalar.vgtest b/memcheck/tests/loongarch64-linux/scalar.vgtest +new file mode 100644 +index 000000000..53e87e8d7 +--- /dev/null ++++ b/memcheck/tests/loongarch64-linux/scalar.vgtest +@@ -0,0 +1,5 @@ ++prog: scalar ++# Do not run under root ++prereq: [ `id -u` -ne 0 ] ++vgopts: -q --error-limit=no ++args: < scalar.c +diff --git a/memcheck/tests/unit_libcbase.c b/memcheck/tests/unit_libcbase.c +index 0ce65be26..48036033e 100644 +--- a/memcheck/tests/unit_libcbase.c ++++ b/memcheck/tests/unit_libcbase.c +@@ -9,14 +9,14 @@ + #include "pub_tool_vki.h" + #include "m_libcbase.c" + +-/* On PPC, MIPS and ARM64 Linux VKI_PAGE_SIZE is a variable, not a macro. */ ++/* On PPC, MIPS, ARM64 and LOONGARCH64 Linux VKI_PAGE_SIZE is a variable, not a macro. */ + #if defined(VGP_ppc32_linux) || defined(VGP_ppc64be_linux) \ + || defined(VGP_ppc64le_linux) + unsigned long VKI_PAGE_SIZE = 1UL << 12; + #elif defined(VGP_arm64_linux) + unsigned long VKI_PAGE_SIZE = 1UL << 16; + #elif defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \ +- || defined (VGP_nanomips_linux) ++ || defined (VGP_nanomips_linux) || defined(VGP_loongarch64_linux) + #include + unsigned long VKI_PAGE_SIZE; + #endif +diff --git a/memcheck/tests/vbit-test/irops.c b/memcheck/tests/vbit-test/irops.c +index a09470905..3ed4a2578 100644 +--- a/memcheck/tests/vbit-test/irops.c ++++ b/memcheck/tests/vbit-test/irops.c +@@ -34,287 +34,297 @@ + That is not necessary but helpful when supporting a new architecture. + */ + static irop_t irops[] = { +- { DEFOP(Iop_Add8, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Add16, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Add32, UNDEF_INT_ADD), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Add64, UNDEF_INT_ADD), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_Sub8, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Sub16, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Sub32, UNDEF_INT_SUB), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Sub64, UNDEF_INT_SUB), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32, mips assert +- { DEFOP(Iop_Mul8, UNDEF_LEFT), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_Mul16, UNDEF_LEFT), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_Mul32, UNDEF_LEFT), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Mul64, UNDEF_LEFT), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32, mips assert +- { DEFOP(Iop_Or1, UNDEF_OR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Or8, UNDEF_OR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Or16, UNDEF_OR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Or32, UNDEF_OR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Or64, UNDEF_OR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_And1, UNDEF_AND), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_And8, UNDEF_AND), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_And16, UNDEF_AND), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_And32, UNDEF_AND), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_And64, UNDEF_AND), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Xor8, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Xor16, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Xor32, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Xor64, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Shl8, UNDEF_SHL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_Shl16, UNDEF_SHL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_Shl32, UNDEF_SHL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Shl64, UNDEF_SHL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32 asserts +- { DEFOP(Iop_Shr8, UNDEF_SHR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // ppc32/64 assert +- { DEFOP(Iop_Shr16, UNDEF_SHR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, // ppc32/64 assert +- { DEFOP(Iop_Shr32, UNDEF_SHR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Shr64, UNDEF_SHR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32 asserts +- { DEFOP(Iop_Sar8, UNDEF_SAR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // ppc32/64 assert +- { DEFOP(Iop_Sar16, UNDEF_SAR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, // ppc32/64 assert +- { DEFOP(Iop_Sar32, UNDEF_SAR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Sar64, UNDEF_SAR), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32 asserts +- { DEFOP(Iop_CmpEQ8, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_CmpEQ16, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_CmpEQ32, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_CmpEQ64, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32, mips assert +- { DEFOP(Iop_CmpNE8, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_CmpNE16, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_CmpNE32, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_CmpNE64, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32, mips assert +- { DEFOP(Iop_Not1, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Not8, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Not16, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Not32, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_Not64, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_CasCmpEQ8, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_CasCmpEQ16, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_CasCmpEQ32, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_CasCmpEQ64, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, ++ { DEFOP(Iop_Add8, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_Add16, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_Add32, UNDEF_INT_ADD), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_Add64, UNDEF_INT_ADD), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_Sub8, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_Sub16, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_Sub32, UNDEF_INT_SUB), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_Sub64, UNDEF_INT_SUB), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32, mips assert ++ { DEFOP(Iop_Mul8, UNDEF_LEFT), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_Mul16, UNDEF_LEFT), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_Mul32, UNDEF_LEFT), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_Mul64, UNDEF_LEFT), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 0 }, // ppc32, mips assert ++ { DEFOP(Iop_Or1, UNDEF_OR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_Or8, UNDEF_OR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_Or16, UNDEF_OR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_Or32, UNDEF_OR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_Or64, UNDEF_OR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_And1, UNDEF_AND), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_And8, UNDEF_AND), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_And16, UNDEF_AND), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_And32, UNDEF_AND), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_And64, UNDEF_AND), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_Xor8, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_Xor16, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_Xor32, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_Xor64, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_Shl8, UNDEF_SHL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_Shl16, UNDEF_SHL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_Shl32, UNDEF_SHL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_Shl64, UNDEF_SHL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32 asserts ++ { DEFOP(Iop_Shr8, UNDEF_SHR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // ppc32/64 assert ++ { DEFOP(Iop_Shr16, UNDEF_SHR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, // ppc32/64 assert ++ { DEFOP(Iop_Shr32, UNDEF_SHR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_Shr64, UNDEF_SHR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32 asserts ++ { DEFOP(Iop_Sar8, UNDEF_SAR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // ppc32/64 assert ++ { DEFOP(Iop_Sar16, UNDEF_SAR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, // ppc32/64 assert ++ { DEFOP(Iop_Sar32, UNDEF_SAR), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_Sar64, UNDEF_SAR), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32 asserts ++ { DEFOP(Iop_CmpEQ8, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_CmpEQ16, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_CmpEQ32, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_CmpEQ64, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32, mips assert ++ { DEFOP(Iop_CmpNE8, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_CmpNE16, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_CmpNE32, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_CmpNE64, UNDEF_CMP_EQ_NE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32, mips assert ++ { DEFOP(Iop_Not1, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_Not8, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_Not16, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_Not32, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_Not64, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, ++ { DEFOP(Iop_CasCmpEQ8, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_CasCmpEQ16, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_CasCmpEQ32, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, ++ { DEFOP(Iop_CasCmpEQ64, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, + +- { DEFOP(Iop_CasCmpNE8, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_CasCmpNE16, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_CasCmpNE32, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_CasCmpNE64, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, ++ { DEFOP(Iop_CasCmpNE8, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_CasCmpNE16, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_CasCmpNE32, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, ++ { DEFOP(Iop_CasCmpNE64, UNDEF_NONE), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, + { DEFOP(Iop_ExpCmpNE8, UNDEF_UNKNOWN), }, // exact (expensive) equality + { DEFOP(Iop_ExpCmpNE16, UNDEF_UNKNOWN), }, // exact (expensive) equality + { DEFOP(Iop_ExpCmpNE32, UNDEF_UNKNOWN), }, // exact (expensive) equality + { DEFOP(Iop_ExpCmpNE64, UNDEF_UNKNOWN), }, // exact (expensive) equality +- { DEFOP(Iop_MullS8, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_MullS16, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_MullS32, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts ++ { DEFOP(Iop_MullS8, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_MullS16, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_MullS32, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts + // s390 has signed multiplication of 64-bit values but the result + // is 64-bit (not 128-bit). So we cannot test this op standalone. +- { DEFOP(Iop_MullS64, UNDEF_LEFT), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 1 }, // ppc32, mips assert +- { DEFOP(Iop_MullU8, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, +- { DEFOP(Iop_MullU16, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, +- { DEFOP(Iop_MullU32, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_MullU64, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 1 }, // ppc32, mips assert +- { DEFOP(Iop_Clz64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 1 }, // ppc32 asserts +- { DEFOP(Iop_Clz32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 }, +- { DEFOP(Iop_Ctz64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, +- { DEFOP(Iop_Ctz32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, +- { DEFOP(Iop_ClzNat64, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, // ppc32 asserts +- { DEFOP(Iop_ClzNat32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0 }, +- { DEFOP(Iop_CtzNat64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, +- { DEFOP(Iop_CtzNat32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 1, .mips32 =0, .mips64 = 0 }, +- { DEFOP(Iop_PopCount64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, +- { DEFOP(Iop_PopCount32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0 }, +- { DEFOP(Iop_CmpLT32S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 }, +- { DEFOP(Iop_CmpLT64S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 1 }, // ppc, mips assert +- { DEFOP(Iop_CmpLE32S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 }, +- { DEFOP(Iop_CmpLE64S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 1 }, // ppc, mips assert +- { DEFOP(Iop_CmpLT32U, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 }, +- { DEFOP(Iop_CmpLT64U, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 1}, // ppc32, mips assert +- { DEFOP(Iop_CmpLE32U, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 }, +- { DEFOP(Iop_CmpLE64U, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, // ppc32 asserts ++ { DEFOP(Iop_MullS64, UNDEF_LEFT), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 1, .loongarch64 = 1 }, // ppc32, mips assert ++ { DEFOP(Iop_MullU8, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_MullU16, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_MullU32, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_MullU64, UNDEF_LEFT), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 1, .loongarch64 = 1 }, // ppc32, mips assert ++ { DEFOP(Iop_Clz64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 1, .loongarch64 = 1 }, // ppc32 asserts ++ { DEFOP(Iop_Clz32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_Ctz64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0, .loongarch64 = 1 }, ++ { DEFOP(Iop_Ctz32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0, .loongarch64 = 1 }, ++ { DEFOP(Iop_ClzNat64, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0, .loongarch64 = 0 }, // ppc32 asserts ++ { DEFOP(Iop_ClzNat32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_CtzNat64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_CtzNat32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 1, .mips32 =0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_PopCount64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_PopCount32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_CmpLT32S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_CmpLT64S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 1, .loongarch64 = 1 }, // ppc, mips assert ++ { DEFOP(Iop_CmpLE32S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_CmpLE64S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 1, .loongarch64 = 1 }, // ppc, mips assert ++ { DEFOP(Iop_CmpLT32U, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_CmpLT64U, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 1, .loongarch64 = 1 }, // ppc32, mips assert ++ { DEFOP(Iop_CmpLE32U, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_CmpLE64U, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0, .loongarch64 = 1 }, // ppc32 asserts + { DEFOP(Iop_CmpNEZ8, UNDEF_ALL), }, // not supported by mc_translate + { DEFOP(Iop_CmpNEZ16, UNDEF_ALL), }, // not supported by mc_translate + { DEFOP(Iop_CmpNEZ32, UNDEF_ALL), }, // not supported by mc_translate + { DEFOP(Iop_CmpNEZ64, UNDEF_ALL), }, // not supported by mc_translate +- { DEFOP(Iop_CmpwNEZ32, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_CmpwNEZ64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts ++ { DEFOP(Iop_CmpwNEZ32, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_CmpwNEZ64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts + { DEFOP(Iop_Left8, UNDEF_UNKNOWN), }, // not supported by mc_translate + { DEFOP(Iop_Left16, UNDEF_UNKNOWN), }, // not supported by mc_translate + { DEFOP(Iop_Left32, UNDEF_UNKNOWN), }, // not supported by mc_translate + { DEFOP(Iop_Left64, UNDEF_UNKNOWN), }, // not supported by mc_translate + { DEFOP(Iop_Max32U, UNDEF_UNKNOWN), }, // not supported by mc_translate +- { DEFOP(Iop_CmpORD32U, UNDEF_ORD), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, // support added in vbit-test +- { DEFOP(Iop_CmpORD64U, UNDEF_ORD), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // support added in vbit-test +- { DEFOP(Iop_CmpORD32S, UNDEF_ORD), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, // support added in vbit-test +- { DEFOP(Iop_CmpORD64S, UNDEF_ORD), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // support added in vbit-test +- { DEFOP(Iop_DivU32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_DivS32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_DivU64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // ppc32 asserts +- { DEFOP(Iop_DivS64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // ppc32 asserts +- { DEFOP(Iop_DivU64E, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // ppc32 asserts +- { DEFOP(Iop_DivS64E, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // ppc32 asserts +- { DEFOP(Iop_DivU32E, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_DivS32E, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, ++ { DEFOP(Iop_CmpORD32U, UNDEF_ORD), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // support added in vbit-test ++ { DEFOP(Iop_CmpORD64U, UNDEF_ORD), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // support added in vbit-test ++ { DEFOP(Iop_CmpORD32S, UNDEF_ORD), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // support added in vbit-test ++ { DEFOP(Iop_CmpORD64S, UNDEF_ORD), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // support added in vbit-test ++ { DEFOP(Iop_DivU32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, ++ { DEFOP(Iop_DivS32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, ++ { DEFOP(Iop_DivU64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, // ppc32 asserts ++ { DEFOP(Iop_DivS64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, // ppc32 asserts ++ { DEFOP(Iop_DivU64E, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // ppc32 asserts ++ { DEFOP(Iop_DivS64E, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // ppc32 asserts ++ { DEFOP(Iop_DivU32E, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_DivS32E, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, + // On s390 the DivMod operations always appear in a certain context + // So they cannot be tested in isolation on that platform. +- { DEFOP(Iop_DivModU64to32, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_DivModS64to32, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_DivModU32to32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_DivModS32to32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_DivModU128to64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // mips asserts +- { DEFOP(Iop_DivModS128to64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // mips asserts +- { DEFOP(Iop_DivModS64to64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_DivModU64to64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_8Uto16, UNDEF_ZEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_8Uto32, UNDEF_ZEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_8Uto64, UNDEF_ZEXT), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32 assert +- { DEFOP(Iop_16Uto32, UNDEF_ZEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_16Uto64, UNDEF_ZEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32 assert +- { DEFOP(Iop_32Uto64, UNDEF_ZEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_8Sto16, UNDEF_SEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_8Sto32, UNDEF_SEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_8Sto64, UNDEF_SEXT), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32, mips assert +- { DEFOP(Iop_16Sto32, UNDEF_SEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_16Sto64, UNDEF_SEXT), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32, mips assert +- { DEFOP(Iop_32Sto64, UNDEF_SEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_64to8, UNDEF_TRUNC), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32, mips assert +- { DEFOP(Iop_32to8, UNDEF_TRUNC), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_64to16, UNDEF_TRUNC), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32, mips assert +- { DEFOP(Iop_16to8, UNDEF_TRUNC), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_16HIto8, UNDEF_UPPER), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_8HLto16, UNDEF_CONCAT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, // ppc isel +- { DEFOP(Iop_32to16, UNDEF_TRUNC), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_32HIto16, UNDEF_UPPER), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_16HLto32, UNDEF_CONCAT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, // ppc isel +- { DEFOP(Iop_64to32, UNDEF_TRUNC), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_64HIto32, UNDEF_UPPER), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, +- { DEFOP(Iop_32HLto64, UNDEF_CONCAT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_128to64, UNDEF_TRUNC), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_128HIto64, UNDEF_UPPER), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_64HLto128, UNDEF_CONCAT), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_32to1, UNDEF_TRUNC), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_64to1, UNDEF_TRUNC), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32, mips assert +- { DEFOP(Iop_1Uto8, UNDEF_ZEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_1Uto32, UNDEF_ZEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_1Uto64, UNDEF_ZEXT), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // ppc32 assert +- { DEFOP(Iop_1Sto8, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, ++ { DEFOP(Iop_DivModU64to32, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_DivModS64to32, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_DivModU32to32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_DivModS32to32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_DivModU128to64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // mips asserts ++ { DEFOP(Iop_DivModS128to64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // mips asserts ++ { DEFOP(Iop_DivModS64to64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_DivModU64to64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_8Uto16, UNDEF_ZEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_8Uto32, UNDEF_ZEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_8Uto64, UNDEF_ZEXT), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32 assert ++ { DEFOP(Iop_16Uto32, UNDEF_ZEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_16Uto64, UNDEF_ZEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32 assert ++ { DEFOP(Iop_32Uto64, UNDEF_ZEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_8Sto16, UNDEF_SEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_8Sto32, UNDEF_SEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_8Sto64, UNDEF_SEXT), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32, mips assert ++ { DEFOP(Iop_16Sto32, UNDEF_SEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_16Sto64, UNDEF_SEXT), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32, mips assert ++ { DEFOP(Iop_32Sto64, UNDEF_SEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_64to8, UNDEF_TRUNC), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 1, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32, mips assert ++ { DEFOP(Iop_32to8, UNDEF_TRUNC), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_64to16, UNDEF_TRUNC), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 0 }, // ppc32, mips assert ++ { DEFOP(Iop_16to8, UNDEF_TRUNC), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_16HIto8, UNDEF_UPPER), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_8HLto16, UNDEF_CONCAT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, // ppc isel ++ { DEFOP(Iop_32to16, UNDEF_TRUNC), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_32HIto16, UNDEF_UPPER), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_16HLto32, UNDEF_CONCAT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, // ppc isel ++ { DEFOP(Iop_64to32, UNDEF_TRUNC), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_64HIto32, UNDEF_UPPER), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_32HLto64, UNDEF_CONCAT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_128to64, UNDEF_TRUNC), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_128HIto64, UNDEF_UPPER), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_64HLto128, UNDEF_CONCAT), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_32to1, UNDEF_TRUNC), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_64to1, UNDEF_TRUNC), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 0 }, // ppc32, mips assert ++ { DEFOP(Iop_1Uto8, UNDEF_ZEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_1Uto32, UNDEF_ZEXT), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_1Uto64, UNDEF_ZEXT), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // ppc32 assert ++ { DEFOP(Iop_1Sto8, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 0 }, + { DEFOP(Iop_1Sto16, UNDEF_ALL), }, // not handled by mc_translate +- { DEFOP(Iop_1Sto32, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_1Sto64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_AddF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_SubF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_MulF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_DivF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_AddF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_AddF16, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_SubF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_SubF16, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1,.ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_MulF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_DivF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_AddF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_SubF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_MulF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_DivF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_NegF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_AbsF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_NegF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_NegF16, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_AbsF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_AbsF16, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_SqrtF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_SqrtF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_SqrtF16, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_CmpF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_CmpF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, // mips asserts +- { DEFOP(Iop_CmpF16, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_CmpF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_F64toI16S, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_F64toI32S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_F64toI64S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_F64toI64U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_F64toI32U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_I32StoF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_I64StoF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, +- { DEFOP(Iop_I64UtoF64, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, // mips asserts +- { DEFOP(Iop_I64UtoF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_I32UtoF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_I32UtoF64, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_F32toI32S, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_F32toI64S, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, +- { DEFOP(Iop_F32toI32U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_F32toI64U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_I32StoF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_I64StoF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, +- { DEFOP(Iop_F32toF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_F64toF32, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_ReinterpF64asI64, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_ReinterpI64asF64, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_ReinterpF32asI32, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1 }, ++ { DEFOP(Iop_1Sto32, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_1Sto64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_AddF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_SubF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_MulF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_DivF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_ScaleBF64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, ++ { DEFOP(Iop_AddF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_AddF16, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_SubF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_SubF16, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1,.ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_MulF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_DivF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_ScaleBF32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, ++ { DEFOP(Iop_AddF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_SubF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_MulF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_DivF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_NegF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_AbsF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_NegF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_NegF16, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_AbsF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_AbsF16, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_SqrtF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_RSqrtF64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, ++ { DEFOP(Iop_LogBF64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, ++ { DEFOP(Iop_SqrtF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_RSqrtF32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, ++ { DEFOP(Iop_LogBF32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, ++ { DEFOP(Iop_SqrtF16, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_CmpF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_CmpF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_CmpF16, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .arm64 = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_CmpF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_F64toI16S, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_F64toI32S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_F64toI64S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, ++ { DEFOP(Iop_F64toI64U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_F64toI32U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_I32StoF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_I64StoF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_I64UtoF64, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, // mips asserts ++ { DEFOP(Iop_I64UtoF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_I32UtoF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_I32UtoF64, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_F32toI32S, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 1 }, ++ { DEFOP(Iop_F32toI64S, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_F32toI32U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_F32toI64U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_I32StoF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_I64StoF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_F32toF64, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_F64toF32, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_ReinterpF64asI64, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_ReinterpI64asF64, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, // mips asserts ++ { DEFOP(Iop_ReinterpF32asI32, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, + // ppc requires this op to show up in a specific context. So it cannot be + // tested standalone on that platform. +- { DEFOP(Iop_ReinterpI32asF32, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_F64HLtoF128, UNDEF_CONCAT), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_F128HItoF64, UNDEF_UPPER), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_F128LOtoF64, UNDEF_TRUNC), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_AddF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_SubF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_MulF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_DivF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_MAddF128, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_MSubF128, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_NegMAddF128, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_NegMSubF128, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_NegF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_AbsF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_SqrtF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_I32StoF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_I64StoF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_I32UtoF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_I64UtoF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_F32toF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_F64toF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_F128toI32S, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_F128toI64S, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_F128toI32U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_F128toI64U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_F128toI128S, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_F128toF64, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_F128toF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_RndF128, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_TruncF128toI32S,UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_TruncF128toI32U,UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_TruncF128toI64U,UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_TruncF128toI64S,UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_AtanF64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_Yl2xF64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_Yl2xp1F64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_PRemF64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_PRemC3210F64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_PRem1F64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_PRem1C3210F64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_ScaleF64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_SinF64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_CosF64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_TanF64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_2xm1F64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_RoundF128toInt, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_RoundF64toInt, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, +- { DEFOP(Iop_RoundF32toInt, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 }, +- { DEFOP(Iop_MAddF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, +- { DEFOP(Iop_MSubF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1 }, +- { DEFOP(Iop_MAddF64, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, +- { DEFOP(Iop_MSubF64, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, +- { DEFOP(Iop_MAddF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_MSubF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_RSqrtEst5GoodF64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, +- { DEFOP(Iop_RoundF64toF64_NEAREST, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, +- { DEFOP(Iop_RoundF64toF64_NegINF, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, +- { DEFOP(Iop_RoundF64toF64_PosINF, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, +- { DEFOP(Iop_RoundF64toF64_ZERO, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, +- { DEFOP(Iop_TruncF64asF32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 }, // mips asserts +- { DEFOP(Iop_RoundF64toF32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0 }, ++ { DEFOP(Iop_ReinterpI32asF32, UNDEF_SAME), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_F64HLtoF128, UNDEF_CONCAT), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_F128HItoF64, UNDEF_UPPER), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_F128LOtoF64, UNDEF_TRUNC), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_AddF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_SubF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_MulF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_DivF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_MAddF128, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_MSubF128, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_NegMAddF128, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_NegMSubF128, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_NegF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_AbsF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_SqrtF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_I32StoF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_I64StoF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_I32UtoF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_I64UtoF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_F32toF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_F64toF128, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_F128toI32S, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_F128toI64S, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_F128toI32U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_F128toI64U, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_F128toI128S, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_F128toF64, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_F128toF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_RndF128, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_TruncF128toI32S,UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_TruncF128toI32U,UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_TruncF128toI64U,UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_TruncF128toI64S,UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_AtanF64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_Yl2xF64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_Yl2xp1F64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_PRemF64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_PRemC3210F64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_PRem1F64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_PRem1C3210F64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_ScaleF64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_SinF64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_CosF64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_TanF64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_2xm1F64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_RoundF128toInt, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_RoundF64toInt, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_RoundF32toInt, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_MAddF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_MSubF32, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_MAddF64, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_MSubF64, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_MAddF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_MSubF64r32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_RSqrtEst5GoodF64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, ++ { DEFOP(Iop_RoundF64toF64_NEAREST, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_RoundF64toF64_NegINF, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_RoundF64toF64_PosINF, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_RoundF64toF64_ZERO, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 0 }, ++ { DEFOP(Iop_TruncF64asF32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1, .loongarch64 = 0 }, // mips asserts ++ { DEFOP(Iop_RoundF64toF32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 0, .loongarch64 = 0 }, + { DEFOP(Iop_RecpExpF64, UNDEF_UNKNOWN), }, + { DEFOP(Iop_RecpExpF32, UNDEF_UNKNOWN), }, + + /* --------- Possibly required by IEEE 754-2008. --------- */ +- { DEFOP(Iop_MaxNumF64, UNDEF_ALL), .arm = 1 }, +- { DEFOP(Iop_MinNumF64, UNDEF_ALL), .arm = 1 }, +- { DEFOP(Iop_MaxNumF32, UNDEF_ALL), .arm = 1 }, +- { DEFOP(Iop_MinNumF32, UNDEF_ALL), .arm = 1 }, ++ { DEFOP(Iop_MaxNumF64, UNDEF_ALL), .arm = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_MinNumF64, UNDEF_ALL), .arm = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_MaxNumAbsF64, UNDEF_ALL), .arm = 0, .loongarch64 = 1 }, ++ { DEFOP(Iop_MinNumAbsF64, UNDEF_ALL), .arm = 0, .loongarch64 = 1 }, ++ { DEFOP(Iop_MaxNumF32, UNDEF_ALL), .arm = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_MinNumF32, UNDEF_ALL), .arm = 1, .loongarch64 = 1 }, ++ { DEFOP(Iop_MaxNumAbsF32, UNDEF_ALL), .arm = 0, .loongarch64 = 1 }, ++ { DEFOP(Iop_MinNumAbsF32, UNDEF_ALL), .arm = 0, .loongarch64 = 1 }, + + /* ------------------ 16-bit scalar FP ------------------ */ + { DEFOP(Iop_F16toF64, UNDEF_ALL), .arm64 = 1 }, +diff --git a/memcheck/tests/vbit-test/vtest.h b/memcheck/tests/vbit-test/vtest.h +index c724f4142..fe17f67da 100644 +--- a/memcheck/tests/vbit-test/vtest.h ++++ b/memcheck/tests/vbit-test/vtest.h +@@ -179,15 +179,16 @@ typedef struct { + unsigned immediate_type; + + // Indicate whether IROp can be tested on a particular architecture +- unsigned s390x : 1; +- unsigned amd64 : 1; +- unsigned ppc32 : 1; +- unsigned ppc64 : 1; +- unsigned arm : 1; +- unsigned arm64 : 1; +- unsigned x86 : 1; +- unsigned mips32 : 1; +- unsigned mips64 : 1; ++ unsigned s390x : 1; ++ unsigned amd64 : 1; ++ unsigned ppc32 : 1; ++ unsigned ppc64 : 1; ++ unsigned arm : 1; ++ unsigned arm64 : 1; ++ unsigned x86 : 1; ++ unsigned mips32 : 1; ++ unsigned mips64 : 1; ++ unsigned loongarch64: 1; + } irop_t; + + +diff --git a/nightly/conf/loongarch64.conf b/nightly/conf/loongarch64.conf +new file mode 100644 +index 000000000..8603671db +--- /dev/null ++++ b/nightly/conf/loongarch64.conf +@@ -0,0 +1,3 @@ ++export ABT_DETAILS=`uname -mrs` ++export ABT_JOBS=4 ++export ABT_PERF="--vg=../valgrind-new --vg=../valgrind-old" +diff --git a/nightly/conf/loongarch64.sendemail b/nightly/conf/loongarch64.sendemail +new file mode 100644 +index 000000000..dd806040c +--- /dev/null ++++ b/nightly/conf/loongarch64.sendemail +@@ -0,0 +1,7 @@ ++#!/bin/sh ++ ++subject=$1 ++body=$2 ++file=$3 ++ ++(cat "$body" "$file") | mail -s "$subject" valgrind-testresults@lists.sourceforge.net -f "Feiyang Chen " +diff --git a/none/tests/Makefile.am b/none/tests/Makefile.am +index c0dd7c21d..532f46035 100644 +--- a/none/tests/Makefile.am ++++ b/none/tests/Makefile.am +@@ -35,7 +35,9 @@ endif + if VGCONF_ARCHS_INCLUDE_NANOMIPS + SUBDIRS += nanomips + endif +- ++if VGCONF_ARCHS_INCLUDE_LOONGARCH64 ++SUBDIRS += loongarch64 ++endif + + # OS-specific tests + if VGCONF_OS_IS_LINUX +@@ -75,8 +77,9 @@ SUBDIRS += x86-freebsd + endif + + DIST_SUBDIRS = x86 amd64 ppc32 ppc64 arm arm64 s390x mips32 mips64 nanomips \ +- linux darwin solaris freebsd amd64-linux x86-linux amd64-darwin \ +- x86-darwin amd64-solaris x86-solaris x86-freebsd scripts . ++ loongarch64 linux darwin solaris freebsd amd64-linux x86-linux \ ++ amd64-darwin x86-darwin amd64-solaris x86-solaris x86-freebsd \ ++ scripts . + + dist_noinst_SCRIPTS = \ + filter_cmdline0 \ +diff --git a/none/tests/allexec_prepare_prereq b/none/tests/allexec_prepare_prereq +index a541f4299..49c45c7cc 100755 +--- a/none/tests/allexec_prepare_prereq ++++ b/none/tests/allexec_prepare_prereq +@@ -28,11 +28,12 @@ pair() + } + + +-pair x86 amd64 +-pair ppc32 ppc64 +-pair s390x_unexisting_in_32bits s390x +-pair arm arm64 +-pair mips32 mips64 +-pair nanomips nanoMIPS_unexisting_in_64bits ++pair x86 amd64 ++pair ppc32 ppc64 ++pair s390x_unexisting_in_32bits s390x ++pair arm arm64 ++pair mips32 mips64 ++pair nanomips nanoMIPS_unexisting_in_64bits ++pair loongarch_unexisting_in_32bits loongarch64 + + exit 0 +diff --git a/none/tests/libvex_test.c b/none/tests/libvex_test.c +index 5b57a4c2e..3080ce667 100644 +--- a/none/tests/libvex_test.c ++++ b/none/tests/libvex_test.c +@@ -76,6 +76,8 @@ __attribute__((noinline)) static void get_guest_arch(VexArch *ga) + *ga = VexArchMIPS64; + #elif defined(VGA_nanomips) + *ga = VexArchNANOMIPS; ++#elif defined(VGA_loongarch64) ++ *ga = VexArchLOONGARCH64; + #else + missing arch; + #endif +@@ -113,6 +115,7 @@ static VexEndness arch_endness (VexArch va) { + else + return VexEndnessBE; + } ++ case VexArchLOONGARCH64: return VexEndnessLE; + default: failure_exit(); + } + } +@@ -139,6 +142,7 @@ static UInt arch_hwcaps (VexArch va) { + case VexArchMIPS64: return VEX_PRID_COMP_MIPS | VEX_MIPS_HOST_FR; + #endif + case VexArchNANOMIPS: return 0; ++ case VexArchLOONGARCH64: return VEX_HWCAPS_LOONGARCH_ISA_64BIT; + default: failure_exit(); + } + } +@@ -156,6 +160,7 @@ static Bool mode64 (VexArch va) { + case VexArchMIPS32: return False; + case VexArchMIPS64: return True; + case VexArchNANOMIPS: return False; ++ case VexArchLOONGARCH64: return True; + default: failure_exit(); + } + } +@@ -275,7 +280,7 @@ int main(int argc, char **argv) + // explicitly via command line arguments. + if (multiarch) { + VexArch va; +- for (va = VexArchX86; va <= VexArchNANOMIPS; va++) { ++ for (va = VexArchX86; va <= VexArchLOONGARCH64; va++) { + vta.arch_host = va; + vta.archinfo_host.endness = arch_endness (vta.arch_host); + vta.archinfo_host.hwcaps = arch_hwcaps (vta.arch_host); +diff --git a/none/tests/loongarch64/Makefile.am b/none/tests/loongarch64/Makefile.am +new file mode 100644 +index 000000000..c8e5b5123 +--- /dev/null ++++ b/none/tests/loongarch64/Makefile.am +@@ -0,0 +1,39 @@ ++ ++include $(top_srcdir)/Makefile.tool-tests.am ++ ++dist_noinst_SCRIPTS = filter_stderr ++ ++EXTRA_DIST = \ ++ atomic.stdout.exp atomic.stderr.exp atomic.vgtest \ ++ branch.stdout.exp branch.stderr.exp branch.vgtest \ ++ cpucfg.stdout.exp cpucfg.stderr.exp cpucfg.vgtest \ ++ fault.stdout.exp fault.stderr.exp fault.vgtest \ ++ fault_fp.stdout.exp fault_fp.stderr.exp fault_fp.vgtest \ ++ float.stdout.exp float.stderr.exp float.vgtest \ ++ integer.stdout.exp integer.stderr.exp integer.vgtest \ ++ llsc.stdout.exp llsc.stderr.exp llsc.vgtest \ ++ memory.stdout.exp memory.stderr.exp memory.vgtest \ ++ move.stdout.exp move.stderr.exp move.vgtest \ ++ pc.stdout.exp pc.stderr.exp pc.vgtest \ ++ special.stdout.exp special.stderr.exp special.vgtest ++ ++check_PROGRAMS = \ ++ allexec \ ++ atomic \ ++ branch \ ++ cpucfg \ ++ fault \ ++ fault_fp \ ++ float \ ++ integer \ ++ llsc \ ++ memory \ ++ move \ ++ pc \ ++ special ++ ++AM_CFLAGS += @FLAG_M64@ ++AM_CXXFLAGS += @FLAG_M64@ ++AM_CCASFLAGS += @FLAG_M64@ ++ ++allexec_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_NONNULL@ +diff --git a/none/tests/loongarch64/allexec.c b/none/tests/loongarch64/allexec.c +new file mode 120000 +index 000000000..6d6a9cf28 +--- /dev/null ++++ b/none/tests/loongarch64/allexec.c +@@ -0,0 +1 @@ ++../allexec.c +\ No newline at end of file +diff --git a/none/tests/loongarch64/atomic.c b/none/tests/loongarch64/atomic.c +new file mode 100644 +index 000000000..916d5f787 +--- /dev/null ++++ b/none/tests/loongarch64/atomic.c +@@ -0,0 +1,75 @@ ++#include ++ ++#define TESTINST_AM(insn, res, val, addr) \ ++ { \ ++ __asm__ __volatile__( \ ++ "move $t1, %1 \n\t" \ ++ "move $t2, %2 \n\t" \ ++ insn " $t0, $t1, $t2 \n\t" \ ++ "move %0, $t0 \n\t" \ ++ : "=r" (res) \ ++ : "r" (val), "r" (addr) \ ++ : "$t0", "$t1", "$t2", "memory"); \ ++ } ++ ++#define TESTINST_AM_4(insn, v) \ ++ { \ ++ printf(#insn ".w ::\n"); \ ++ TESTINST_AM(#insn ".w", res_i, v, &val_i); \ ++ printf("old: %d new: %d\n", res_i, val_i); \ ++ \ ++ printf(#insn "_db.w ::\n"); \ ++ TESTINST_AM(#insn "_db.w", res_i, v, &val_i); \ ++ printf("old: %d new: %d\n", res_i, val_i); \ ++ \ ++ printf(#insn ".d ::\n"); \ ++ TESTINST_AM(#insn ".d", res_l, v, &val_l); \ ++ printf("old: %ld new: %ld\n", res_l, val_l); \ ++ \ ++ printf(#insn "_db.d ::\n"); \ ++ TESTINST_AM(#insn "_db.d", res_l, v, &val_l); \ ++ printf("old: %ld new: %ld\n", res_l, val_l); \ ++ } ++ ++#define TESTINST_AM_U_4(insn, v) \ ++ { \ ++ printf(#insn ".wu ::\n"); \ ++ TESTINST_AM(#insn ".wu", res_i, v, &val_i); \ ++ printf("old: %u new: %u\n", res_i, val_i); \ ++ \ ++ printf(#insn "_db.wu ::\n"); \ ++ TESTINST_AM(#insn "_db.wu", res_i, v, &val_i); \ ++ printf("old: %u new: %u\n", res_i, val_i); \ ++ \ ++ printf(#insn ".du ::\n"); \ ++ TESTINST_AM(#insn ".du", res_l, v, &val_l); \ ++ printf("old: %lu new: %lu\n", res_l, val_l); \ ++ \ ++ printf(#insn "_db.du ::\n"); \ ++ TESTINST_AM(#insn "_db.du", res_l, v, &val_l); \ ++ printf("old: %lu new: %lu\n", res_l, val_l); \ ++ } ++ ++void test(void) ++{ ++ int res_i; ++ long res_l; ++ int val_i = 1; ++ long val_l = 1; ++ ++ TESTINST_AM_4(amswap, 2); ++ TESTINST_AM_4(amadd, 5); ++ TESTINST_AM_4(amand, 3); ++ TESTINST_AM_4(amor, 8); ++ TESTINST_AM_4(amxor, 4); ++ TESTINST_AM_4(ammax, 16); ++ TESTINST_AM_4(ammin, -1); ++ TESTINST_AM_U_4(ammax, 9); ++ TESTINST_AM_U_4(ammin, 6); ++} ++ ++int main(void) ++{ ++ test(); ++ return 0; ++} +diff --git a/none/tests/loongarch64/atomic.stderr.exp b/none/tests/loongarch64/atomic.stderr.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/none/tests/loongarch64/atomic.stdout.exp b/none/tests/loongarch64/atomic.stdout.exp +new file mode 100644 +index 000000000..7eab9ebd0 +--- /dev/null ++++ b/none/tests/loongarch64/atomic.stdout.exp +@@ -0,0 +1,72 @@ ++amswap.w :: ++old: 1 new: 2 ++amswap_db.w :: ++old: 2 new: 2 ++amswap.d :: ++old: 1 new: 2 ++amswap_db.d :: ++old: 2 new: 2 ++amadd.w :: ++old: 2 new: 7 ++amadd_db.w :: ++old: 7 new: 12 ++amadd.d :: ++old: 2 new: 7 ++amadd_db.d :: ++old: 7 new: 12 ++amand.w :: ++old: 12 new: 0 ++amand_db.w :: ++old: 0 new: 0 ++amand.d :: ++old: 12 new: 0 ++amand_db.d :: ++old: 0 new: 0 ++amor.w :: ++old: 0 new: 8 ++amor_db.w :: ++old: 8 new: 8 ++amor.d :: ++old: 0 new: 8 ++amor_db.d :: ++old: 8 new: 8 ++amxor.w :: ++old: 8 new: 12 ++amxor_db.w :: ++old: 12 new: 8 ++amxor.d :: ++old: 8 new: 12 ++amxor_db.d :: ++old: 12 new: 8 ++ammax.w :: ++old: 8 new: 16 ++ammax_db.w :: ++old: 16 new: 16 ++ammax.d :: ++old: 8 new: 16 ++ammax_db.d :: ++old: 16 new: 16 ++ammin.w :: ++old: 16 new: -1 ++ammin_db.w :: ++old: -1 new: -1 ++ammin.d :: ++old: 16 new: -1 ++ammin_db.d :: ++old: -1 new: -1 ++ammax.wu :: ++old: 4294967295 new: 4294967295 ++ammax_db.wu :: ++old: 4294967295 new: 4294967295 ++ammax.du :: ++old: 18446744073709551615 new: 18446744073709551615 ++ammax_db.du :: ++old: 18446744073709551615 new: 18446744073709551615 ++ammin.wu :: ++old: 4294967295 new: 6 ++ammin_db.wu :: ++old: 6 new: 6 ++ammin.du :: ++old: 18446744073709551615 new: 6 ++ammin_db.du :: ++old: 6 new: 6 +diff --git a/none/tests/loongarch64/atomic.vgtest b/none/tests/loongarch64/atomic.vgtest +new file mode 100644 +index 000000000..8fe5ce5f3 +--- /dev/null ++++ b/none/tests/loongarch64/atomic.vgtest +@@ -0,0 +1,3 @@ ++prereq: ../../../tests/loongarch64_features lam ++prog: atomic ++vgopts: -q +diff --git a/none/tests/loongarch64/branch.c b/none/tests/loongarch64/branch.c +new file mode 100644 +index 000000000..e702d3fa1 +--- /dev/null ++++ b/none/tests/loongarch64/branch.c +@@ -0,0 +1,148 @@ ++#include ++ ++#define TESTINST_B_RR(insn, val1, val2) \ ++ { \ ++ int res; \ ++ unsigned long v1 = (unsigned long)val1; \ ++ unsigned long v2 = (unsigned long)val2; \ ++ __asm__ __volatile__( \ ++ insn " %1, %2, 1f \n\t" \ ++ " move %0, $zero \n\t" \ ++ " b 2f \n\t" \ ++ "1: \n\t" \ ++ " addi.w %0, $zero, 1 \n\t" \ ++ "2: \n\t" \ ++ : "=r" (res) \ ++ : "r" (v1), "r" (v2) \ ++ : "memory"); \ ++ printf("%s::\n", insn); \ ++ printf("input: %#lx %#lx\n", v1, v2); \ ++ printf("output: %d\n", res); \ ++ } ++ ++#define TESTINST_B_R(insn, val) \ ++ { \ ++ int res; \ ++ unsigned long v = (unsigned long)val; \ ++ __asm__ __volatile__( \ ++ insn " %1, 1f \n\t" \ ++ " move %0, $zero \n\t" \ ++ " b 2f \n\t" \ ++ "1: \n\t" \ ++ " addi.w %0, $zero, 1 \n\t" \ ++ "2: \n\t" \ ++ : "=r" (res) \ ++ : "r" (v) \ ++ : "memory"); \ ++ printf("%s::\n", insn); \ ++ printf("input: %#lx\n", v); \ ++ printf("output: %d\n", res); \ ++ } ++ ++#define TESTINST_B_C(insn, val) \ ++ { \ ++ int res; \ ++ unsigned long v = (unsigned long)val; \ ++ __asm__ __volatile__( \ ++ " movgr2cf $fcc0, %1 \n\t" \ ++ insn " $fcc0, 1f \n\t" \ ++ " move %0, $zero \n\t" \ ++ " b 2f \n\t" \ ++ "1: \n\t" \ ++ " addi.w %0, $zero, 1 \n\t" \ ++ "2: \n\t" \ ++ : "=r" (res) \ ++ : "r" (v) \ ++ : "$fcc0", "memory"); \ ++ printf("%s::\n", insn); \ ++ printf("input: %#lx\n", v); \ ++ printf("output: %d\n", res); \ ++ } ++ ++#define TESTINST_BL() \ ++ { \ ++ int res; \ ++ __asm__ __volatile__( \ ++ " move %0, $zero \n\t" \ ++ " bl 1f \n\t" \ ++ " addi.w %0, %0, 1 \n\t" \ ++ " b 2f \n\t" \ ++ "1: \n\t" \ ++ " addi.w %0, %0, 1 \n\t" \ ++ " jr $ra \n\t" \ ++ "2: \n\t" \ ++ : "=r" (res) \ ++ : \ ++ : "$ra", "memory"); \ ++ printf("bl::\n"); \ ++ printf("res: %d\n", res); \ ++ } ++ ++#define TESTINST_JIRL(insn) \ ++ { \ ++ unsigned long addr1, addr2; \ ++ __asm__ __volatile__( \ ++ " pcaddi $t0, 2 \n\t" \ ++ " jirl %0, $t0, 0 \n\t" \ ++ " pcaddi %1, 0 \n\t" \ ++ : "=r" (addr1), "=r" (addr2) \ ++ : \ ++ : "$t0", "memory"); \ ++ printf("jirl::\n"); \ ++ printf("res: %d\n", addr1 == addr2); \ ++ } ++ ++void test(void) ++{ ++ /* ---------------- beq rj, rd, offs16 ---------------- */ ++ TESTINST_B_RR("beq", 1, 2); ++ TESTINST_B_RR("beq", 1, 1); ++ ++ /* ---------------- bne rj, rd, offs16 ---------------- */ ++ TESTINST_B_RR("bne", 1, 2); ++ TESTINST_B_RR("bne", 1, 1); ++ ++ /* ---------------- blt rj, rd, offs16 ---------------- */ ++ TESTINST_B_RR("blt", 1, 2); ++ TESTINST_B_RR("blt", 1, 0); ++ ++ /* ---------------- bge rj, rd, offs16 ---------------- */ ++ TESTINST_B_RR("bge", 1, 2); ++ TESTINST_B_RR("bge", 0, 0); ++ ++ /* ---------------- bltu rj, rd, offs16 ---------------- */ ++ TESTINST_B_RR("bltu", -1, 2); ++ TESTINST_B_RR("bltu", 0, 1); ++ ++ /* ---------------- bgeu rj, rd, offs16 ---------------- */ ++ TESTINST_B_RR("bgeu", -1, 2); ++ TESTINST_B_RR("bgeu", 0, 1); ++ ++ /* ---------------- beqz rj, offs21 ---------------- */ ++ TESTINST_B_R("beqz", 0); ++ TESTINST_B_R("beqz", -1); ++ ++ /* ---------------- bnez rj, offs21 ---------------- */ ++ TESTINST_B_R("bnez", 0); ++ TESTINST_B_R("bnez", -1); ++ ++ /* ---------------- bceqz cj, offs21 ---------------- */ ++ TESTINST_B_C("bceqz", 0); ++ TESTINST_B_C("bceqz", 1); ++ ++ /* ---------------- bcnez cj, offs21 ---------------- */ ++ TESTINST_B_C("bcnez", 0); ++ TESTINST_B_C("bcnez", 1); ++ ++ /* ---------------- bl offs26 ---------------- */ ++ TESTINST_BL(); ++ ++ /* ---------------- jirl rd, rj, offs16 ---------------- */ ++ TESTINST_JIRL(); ++} ++ ++int main(void) ++{ ++ test(); ++ return 0; ++} +diff --git a/none/tests/loongarch64/branch.stderr.exp b/none/tests/loongarch64/branch.stderr.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/none/tests/loongarch64/branch.stdout.exp b/none/tests/loongarch64/branch.stdout.exp +new file mode 100644 +index 000000000..0a32d6ad7 +--- /dev/null ++++ b/none/tests/loongarch64/branch.stdout.exp +@@ -0,0 +1,64 @@ ++beq:: ++input: 0x1 0x2 ++output: 0 ++beq:: ++input: 0x1 0x1 ++output: 1 ++bne:: ++input: 0x1 0x2 ++output: 1 ++bne:: ++input: 0x1 0x1 ++output: 0 ++blt:: ++input: 0x1 0x2 ++output: 1 ++blt:: ++input: 0x1 0 ++output: 0 ++bge:: ++input: 0x1 0x2 ++output: 0 ++bge:: ++input: 0 0 ++output: 1 ++bltu:: ++input: 0xffffffffffffffff 0x2 ++output: 0 ++bltu:: ++input: 0 0x1 ++output: 1 ++bgeu:: ++input: 0xffffffffffffffff 0x2 ++output: 1 ++bgeu:: ++input: 0 0x1 ++output: 0 ++beqz:: ++input: 0 ++output: 1 ++beqz:: ++input: 0xffffffffffffffff ++output: 0 ++bnez:: ++input: 0 ++output: 0 ++bnez:: ++input: 0xffffffffffffffff ++output: 1 ++bceqz:: ++input: 0 ++output: 1 ++bceqz:: ++input: 0x1 ++output: 0 ++bcnez:: ++input: 0 ++output: 0 ++bcnez:: ++input: 0x1 ++output: 1 ++bl:: ++res: 2 ++jirl:: ++res: 1 +diff --git a/none/tests/loongarch64/branch.vgtest b/none/tests/loongarch64/branch.vgtest +new file mode 100644 +index 000000000..535c05590 +--- /dev/null ++++ b/none/tests/loongarch64/branch.vgtest +@@ -0,0 +1,2 @@ ++prog: branch ++vgopts: -q +diff --git a/none/tests/loongarch64/cpucfg.c b/none/tests/loongarch64/cpucfg.c +new file mode 100644 +index 000000000..f5d0570eb +--- /dev/null ++++ b/none/tests/loongarch64/cpucfg.c +@@ -0,0 +1,24 @@ ++#include ++ ++void test(int reg) ++{ ++ int res; ++ __asm__ __volatile__( ++ "cpucfg %0, %1 \n\t" ++ : "=r" (res) ++ : "r" (reg) ++ : "memory"); ++ printf("cpucfg ::\n"); ++ printf("input: %x\n", (unsigned)reg); ++ printf("output: %x\n", (unsigned)res); ++} ++ ++int main(void) ++{ ++ int i; ++ ++ for (i = 0; i < 24; i++) ++ test(i); ++ ++ return 0; ++} +diff --git a/none/tests/loongarch64/cpucfg.stderr.exp b/none/tests/loongarch64/cpucfg.stderr.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/none/tests/loongarch64/cpucfg.stdout.exp b/none/tests/loongarch64/cpucfg.stdout.exp +new file mode 100644 +index 000000000..49e0ba7b1 +--- /dev/null ++++ b/none/tests/loongarch64/cpucfg.stdout.exp +@@ -0,0 +1,72 @@ ++cpucfg :: ++input: 0 ++output: 14c010 ++cpucfg :: ++input: 1 ++output: 3f2f2fe ++cpucfg :: ++input: 2 ++output: 7ccfc7 ++cpucfg :: ++input: 3 ++output: fcff ++cpucfg :: ++input: 4 ++output: 5f5e100 ++cpucfg :: ++input: 5 ++output: 10001 ++cpucfg :: ++input: 6 ++output: 7f33 ++cpucfg :: ++input: 7 ++output: 0 ++cpucfg :: ++input: 8 ++output: 0 ++cpucfg :: ++input: 9 ++output: 0 ++cpucfg :: ++input: a ++output: 0 ++cpucfg :: ++input: b ++output: 0 ++cpucfg :: ++input: c ++output: 0 ++cpucfg :: ++input: d ++output: 0 ++cpucfg :: ++input: e ++output: 0 ++cpucfg :: ++input: f ++output: 0 ++cpucfg :: ++input: 10 ++output: 2c3d ++cpucfg :: ++input: 11 ++output: 6080003 ++cpucfg :: ++input: 12 ++output: 6080003 ++cpucfg :: ++input: 13 ++output: 608000f ++cpucfg :: ++input: 14 ++output: 60e000f ++cpucfg :: ++input: 15 ++output: 0 ++cpucfg :: ++input: 16 ++output: 0 ++cpucfg :: ++input: 17 ++output: 0 +diff --git a/none/tests/loongarch64/cpucfg.vgtest b/none/tests/loongarch64/cpucfg.vgtest +new file mode 100644 +index 000000000..fea964445 +--- /dev/null ++++ b/none/tests/loongarch64/cpucfg.vgtest +@@ -0,0 +1,3 @@ ++prereq: ../../../tests/loongarch64_features cpucfg ++prog: cpucfg ++vgopts: -q +diff --git a/none/tests/loongarch64/fault.c b/none/tests/loongarch64/fault.c +new file mode 100644 +index 000000000..294176857 +--- /dev/null ++++ b/none/tests/loongarch64/fault.c +@@ -0,0 +1,234 @@ ++#include ++#include ++#include ++#include ++#include ++ ++#define NUM 24 ++ ++unsigned long mem[NUM] = { ++ 0x121f1e1f0000e680, 0x0000000000010700, 0x000000030000e7dc, ++ 0xffffffff0000b0d0, 0x232f2e2f2ab05fd0, 0x242c2b2b0000b6a0, ++ 0x252a2e2b0000be80, 0x262d2d2a0000de10, 0x3f343f3e0000df20, ++ 0x3e353d3c2ab05fe0, 0x363a3c3b0000dfd0, 0x3b373b3a00010300, ++ 0x0000e680121f1e1f, 0x0001070000000000, 0x0000e7dc00000003, ++ 0x0000b0d0ffffffff, 0x2ab05fd0232f2e2f, 0x0000b6a0242c2b2b, ++ 0x0000be80252a2e2b, 0x0000de10262d2d2a, 0x0000df203f343f3e, ++ 0x2ab05fe03e353d3c, 0x0000dfd0363a3c3b, 0x000103003b373b3a ++}; ++ ++long val1 = 0; ++long val2 = 0xfdecba9087654321UL; ++char *p = (char *)mem; ++ ++#define TESTINST_LOAD_RRR(n, insn, addr1, addr2) \ ++ void test ## n (void) \ ++ { \ ++ printf("test %d\n", n); \ ++ printf("%s ::\n", insn); \ ++ __asm__ __volatile__( \ ++ insn " %0, %1, %2 \n\t" \ ++ : "=r" (val1) \ ++ : "r" (addr1), "r" (addr2) \ ++ : "memory"); \ ++ printf("output: %ld\n", val1); \ ++ } ++ ++#define TESTINST_STORE_RRR(n, insn, addr1, addr2) \ ++ void test ## n (void) \ ++ { \ ++ printf("test %d\n", n); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %ld\n", val2); \ ++ __asm__ __volatile__( \ ++ insn " %0, %1, %2 \n\t" \ ++ : \ ++ : "r" (val2), "r" (addr1), "r" (addr2) \ ++ : "memory"); \ ++ } ++ ++#define TESTINST_RR(n, insn, v1, v2) \ ++ void test ## n (void) \ ++ { \ ++ printf("test %d\n", n); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %ld %ld\n", (long)v1, (long)v2); \ ++ __asm__ __volatile__( \ ++ insn " %0, %1 \n\t" \ ++ : \ ++ : "r" (v1), "r" (v2) \ ++ : "memory"); \ ++ } ++ ++#define TESTINST_I(n, insn, imm) \ ++ void test ## n (void) \ ++ { \ ++ printf("test %d\n", n); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %d\n", imm); \ ++ __asm__ __volatile__( \ ++ insn " " #imm " \n\t" \ ++ : \ ++ : \ ++ : "memory"); \ ++ } ++ ++static sigjmp_buf escape; ++ ++static void handler(int sig, siginfo_t *si, void *uc) ++{ ++ fprintf(stderr, "signal: %d\n", sig); ++ fprintf(stderr, "code: %d\n", si->si_code); ++ siglongjmp(escape, 1); ++} ++ ++static inline void show(void) ++{ ++ int i; ++ printf("memory block:\n"); ++ for (i = 0; i < NUM; i++) ++ printf("0x%lx:\t%#018lx\n", i * sizeof(unsigned long), mem[i]); ++} ++ ++TESTINST_LOAD_RRR(1, "ldgt.b", &p[0], &p[64]); ++TESTINST_LOAD_RRR(2, "ldgt.b", &p[1], &p[0] ); ++TESTINST_LOAD_RRR(3, "ldgt.h", &p[1], &p[0] ); ++TESTINST_LOAD_RRR(4, "ldgt.h", &p[2], &p[64]); ++TESTINST_LOAD_RRR(5, "ldgt.h", &p[4], &p[0] ); ++TESTINST_LOAD_RRR(6, "ldgt.w", &p[2], &p[0] ); ++TESTINST_LOAD_RRR(7, "ldgt.w", &p[8], &p[64]); ++TESTINST_LOAD_RRR(8, "ldgt.w", &p[12], &p[0] ); ++TESTINST_LOAD_RRR(9, "ldgt.d", &p[4], &p[0] ); ++TESTINST_LOAD_RRR(10, "ldgt.d", &p[16], &p[64]); ++TESTINST_LOAD_RRR(11, "ldgt.d", &p[32], &p[0] ); ++ ++TESTINST_LOAD_RRR(12, "ldle.b", &p[64], &p[0] ); ++TESTINST_LOAD_RRR(13, "ldle.b", &p[65], &p[96]); ++TESTINST_LOAD_RRR(14, "ldle.h", &p[65], &p[0] ); ++TESTINST_LOAD_RRR(15, "ldle.h", &p[66], &p[0] ); ++TESTINST_LOAD_RRR(16, "ldle.h", &p[68], &p[96]); ++TESTINST_LOAD_RRR(17, "ldle.w", &p[66], &p[0] ); ++TESTINST_LOAD_RRR(18, "ldle.w", &p[72], &p[0] ); ++TESTINST_LOAD_RRR(19, "ldle.w", &p[76], &p[96]); ++TESTINST_LOAD_RRR(20, "ldle.d", &p[68], &p[0] ); ++TESTINST_LOAD_RRR(21, "ldle.d", &p[80], &p[0] ); ++TESTINST_LOAD_RRR(22, "ldle.d", &p[88], &p[96]); ++ ++TESTINST_STORE_RRR(23, "ldgt.b", &p[0], &p[64]); ++TESTINST_STORE_RRR(24, "ldgt.b", &p[1], &p[0] ); ++TESTINST_STORE_RRR(25, "ldgt.h", &p[1], &p[0] ); ++TESTINST_STORE_RRR(26, "ldgt.h", &p[2], &p[64]); ++TESTINST_STORE_RRR(27, "ldgt.h", &p[4], &p[0] ); ++TESTINST_STORE_RRR(28, "ldgt.w", &p[2], &p[0] ); ++TESTINST_STORE_RRR(29, "ldgt.w", &p[8], &p[64]); ++TESTINST_STORE_RRR(30, "ldgt.w", &p[12], &p[0] ); ++TESTINST_STORE_RRR(31, "ldgt.d", &p[4], &p[0] ); ++TESTINST_STORE_RRR(32, "ldgt.d", &p[16], &p[64]); ++TESTINST_STORE_RRR(33, "ldgt.d", &p[32], &p[0] ); ++ ++TESTINST_STORE_RRR(34, "ldle.b", &p[64], &p[0] ); ++TESTINST_STORE_RRR(35, "ldle.b", &p[65], &p[96]); ++TESTINST_STORE_RRR(36, "ldle.h", &p[65], &p[0] ); ++TESTINST_STORE_RRR(37, "ldle.h", &p[66], &p[0] ); ++TESTINST_STORE_RRR(38, "ldle.h", &p[68], &p[96]); ++TESTINST_STORE_RRR(39, "ldle.w", &p[66], &p[0] ); ++TESTINST_STORE_RRR(40, "ldle.w", &p[72], &p[0] ); ++TESTINST_STORE_RRR(41, "ldle.w", &p[76], &p[96]); ++TESTINST_STORE_RRR(42, "ldle.d", &p[68], &p[0] ); ++TESTINST_STORE_RRR(43, "ldle.d", &p[80], &p[0] ); ++TESTINST_STORE_RRR(44, "ldle.d", &p[88], &p[96]); ++ ++TESTINST_RR(45, "asrtle.d", 123, 456); ++TESTINST_RR(46, "asrtle.d", 789, 0); ++TESTINST_RR(47, "asrtgt.d", 123, 456); ++TESTINST_RR(48, "asrtgt.d", 789, 0); ++ ++TESTINST_I(49, "break", 0); ++TESTINST_I(50, "break", 6); ++TESTINST_I(51, "break", 7); ++TESTINST_I(52, "break", 100); ++ ++struct test { ++ void (*func)(void); ++ bool show; ++} tests[] = { ++ { test1, false }, ++ { test2, false }, ++ { test3, false }, ++ { test4, false }, ++ { test5, false }, ++ { test6, false }, ++ { test7, false }, ++ { test8, false }, ++ { test9, false }, ++ { test10, false }, ++ { test11, true }, ++ { test12, false }, ++ { test13, false }, ++ { test14, false }, ++ { test15, false }, ++ { test16, false }, ++ { test17, false }, ++ { test18, false }, ++ { test19, false }, ++ { test20, false }, ++ { test21, false }, ++ { test22, true }, ++ { test23, false }, ++ { test24, false }, ++ { test25, false }, ++ { test26, false }, ++ { test27, false }, ++ { test28, false }, ++ { test29, false }, ++ { test30, false }, ++ { test31, false }, ++ { test32, false }, ++ { test33, true }, ++ { test34, false }, ++ { test35, false }, ++ { test36, false }, ++ { test37, false }, ++ { test38, false }, ++ { test39, false }, ++ { test40, false }, ++ { test41, false }, ++ { test42, false }, ++ { test43, false }, ++ { test44, true }, ++ { test45, false }, ++ { test46, false }, ++ { test47, false }, ++ { test48, false }, ++ { test49, false }, ++ { test50, false }, ++ { test51, false }, ++ { test52, false } ++}; ++ ++int main(void) ++{ ++ int i; ++ struct sigaction sa; ++ int sigs[] = { SIGSYS, SIGBUS, SIGFPE, SIGTRAP }; ++ ++ sa.sa_sigaction = handler; ++ sa.sa_flags = SA_SIGINFO; ++ sigfillset(&sa.sa_mask); ++ ++ for(i = 0; i < sizeof(sigs) / sizeof(sigs[0]); i++) ++ sigaction(sigs[i], &sa, NULL); ++ ++ show(); ++ for(i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) { ++ if (sigsetjmp(escape, 1) == 0) { ++ fprintf(stderr, "test %d\n", i + 1); ++ tests[i].func(); ++ if (tests[i].show) ++ show(); ++ fprintf(stderr, "no fault\n"); ++ } ++ } ++ ++ return 0; ++} +diff --git a/none/tests/loongarch64/fault.stderr.exp b/none/tests/loongarch64/fault.stderr.exp +new file mode 100644 +index 000000000..3f566684c +--- /dev/null ++++ b/none/tests/loongarch64/fault.stderr.exp +@@ -0,0 +1,138 @@ ++test 1 ++signal: 31 ++code: 128 ++test 2 ++no fault ++test 3 ++signal: 7 ++code: 1 ++test 4 ++signal: 31 ++code: 128 ++test 5 ++no fault ++test 6 ++signal: 7 ++code: 1 ++test 7 ++signal: 31 ++code: 128 ++test 8 ++no fault ++test 9 ++signal: 7 ++code: 1 ++test 10 ++signal: 31 ++code: 128 ++test 11 ++no fault ++test 12 ++signal: 31 ++code: 128 ++test 13 ++no fault ++test 14 ++signal: 7 ++code: 1 ++test 15 ++signal: 31 ++code: 128 ++test 16 ++no fault ++test 17 ++signal: 7 ++code: 1 ++test 18 ++signal: 31 ++code: 128 ++test 19 ++no fault ++test 20 ++signal: 7 ++code: 1 ++test 21 ++signal: 31 ++code: 128 ++test 22 ++no fault ++test 23 ++signal: 31 ++code: 128 ++test 24 ++no fault ++test 25 ++signal: 7 ++code: 1 ++test 26 ++signal: 31 ++code: 128 ++test 27 ++no fault ++test 28 ++signal: 7 ++code: 1 ++test 29 ++signal: 31 ++code: 128 ++test 30 ++no fault ++test 31 ++signal: 7 ++code: 1 ++test 32 ++signal: 31 ++code: 128 ++test 33 ++no fault ++test 34 ++signal: 31 ++code: 128 ++test 35 ++no fault ++test 36 ++signal: 7 ++code: 1 ++test 37 ++signal: 31 ++code: 128 ++test 38 ++no fault ++test 39 ++signal: 7 ++code: 1 ++test 40 ++signal: 31 ++code: 128 ++test 41 ++no fault ++test 42 ++signal: 7 ++code: 1 ++test 43 ++signal: 31 ++code: 128 ++test 44 ++no fault ++test 45 ++no fault ++test 46 ++signal: 31 ++code: 128 ++test 47 ++signal: 31 ++code: 128 ++test 48 ++no fault ++test 49 ++signal: 5 ++code: 1 ++test 50 ++signal: 8 ++code: 2 ++test 51 ++signal: 8 ++code: 1 ++test 52 ++signal: 5 ++code: 1 +diff --git a/none/tests/loongarch64/fault.stdout.exp b/none/tests/loongarch64/fault.stdout.exp +new file mode 100644 +index 000000000..d2e342df8 +--- /dev/null ++++ b/none/tests/loongarch64/fault.stdout.exp +@@ -0,0 +1,267 @@ ++memory block: ++0x0: 0x121f1e1f0000e680 ++0x8: 0x0000000000010700 ++0x10: 0x000000030000e7dc ++0x18: 0xffffffff0000b0d0 ++0x20: 0x232f2e2f2ab05fd0 ++0x28: 0x242c2b2b0000b6a0 ++0x30: 0x252a2e2b0000be80 ++0x38: 0x262d2d2a0000de10 ++0x40: 0x3f343f3e0000df20 ++0x48: 0x3e353d3c2ab05fe0 ++0x50: 0x363a3c3b0000dfd0 ++0x58: 0x3b373b3a00010300 ++0x60: 0x0000e680121f1e1f ++0x68: 0x0001070000000000 ++0x70: 0x0000e7dc00000003 ++0x78: 0x0000b0d0ffffffff ++0x80: 0x2ab05fd0232f2e2f ++0x88: 0x0000b6a0242c2b2b ++0x90: 0x0000be80252a2e2b ++0x98: 0x0000de10262d2d2a ++0xa0: 0x0000df203f343f3e ++0xa8: 0x2ab05fe03e353d3c ++0xb0: 0x0000dfd0363a3c3b ++0xb8: 0x000103003b373b3a ++test 1 ++ldgt.b :: ++test 2 ++ldgt.b :: ++output: -26 ++test 3 ++ldgt.h :: ++test 4 ++ldgt.h :: ++test 5 ++ldgt.h :: ++output: 7711 ++test 6 ++ldgt.w :: ++test 7 ++ldgt.w :: ++test 8 ++ldgt.w :: ++output: 0 ++test 9 ++ldgt.d :: ++test 10 ++ldgt.d :: ++test 11 ++ldgt.d :: ++output: 2535295895347421136 ++memory block: ++0x0: 0x121f1e1f0000e680 ++0x8: 0x0000000000010700 ++0x10: 0x000000030000e7dc ++0x18: 0xffffffff0000b0d0 ++0x20: 0x232f2e2f2ab05fd0 ++0x28: 0x242c2b2b0000b6a0 ++0x30: 0x252a2e2b0000be80 ++0x38: 0x262d2d2a0000de10 ++0x40: 0x3f343f3e0000df20 ++0x48: 0x3e353d3c2ab05fe0 ++0x50: 0x363a3c3b0000dfd0 ++0x58: 0x3b373b3a00010300 ++0x60: 0x0000e680121f1e1f ++0x68: 0x0001070000000000 ++0x70: 0x0000e7dc00000003 ++0x78: 0x0000b0d0ffffffff ++0x80: 0x2ab05fd0232f2e2f ++0x88: 0x0000b6a0242c2b2b ++0x90: 0x0000be80252a2e2b ++0x98: 0x0000de10262d2d2a ++0xa0: 0x0000df203f343f3e ++0xa8: 0x2ab05fe03e353d3c ++0xb0: 0x0000dfd0363a3c3b ++0xb8: 0x000103003b373b3a ++test 12 ++ldle.b :: ++test 13 ++ldle.b :: ++output: -33 ++test 14 ++ldle.h :: ++test 15 ++ldle.h :: ++test 16 ++ldle.h :: ++output: 16190 ++test 17 ++ldle.w :: ++test 18 ++ldle.w :: ++test 19 ++ldle.w :: ++output: 1043676476 ++test 20 ++ldle.d :: ++test 21 ++ldle.d :: ++test 22 ++ldle.d :: ++output: 4266944292251042560 ++memory block: ++0x0: 0x121f1e1f0000e680 ++0x8: 0x0000000000010700 ++0x10: 0x000000030000e7dc ++0x18: 0xffffffff0000b0d0 ++0x20: 0x232f2e2f2ab05fd0 ++0x28: 0x242c2b2b0000b6a0 ++0x30: 0x252a2e2b0000be80 ++0x38: 0x262d2d2a0000de10 ++0x40: 0x3f343f3e0000df20 ++0x48: 0x3e353d3c2ab05fe0 ++0x50: 0x363a3c3b0000dfd0 ++0x58: 0x3b373b3a00010300 ++0x60: 0x0000e680121f1e1f ++0x68: 0x0001070000000000 ++0x70: 0x0000e7dc00000003 ++0x78: 0x0000b0d0ffffffff ++0x80: 0x2ab05fd0232f2e2f ++0x88: 0x0000b6a0242c2b2b ++0x90: 0x0000be80252a2e2b ++0x98: 0x0000de10262d2d2a ++0xa0: 0x0000df203f343f3e ++0xa8: 0x2ab05fe03e353d3c ++0xb0: 0x0000dfd0363a3c3b ++0xb8: 0x000103003b373b3a ++test 23 ++ldgt.b :: ++input: -149539557700451551 ++test 24 ++ldgt.b :: ++input: -149539557700451551 ++test 25 ++ldgt.h :: ++input: -149539557700451551 ++test 26 ++ldgt.h :: ++input: -149539557700451551 ++test 27 ++ldgt.h :: ++input: -149539557700451551 ++test 28 ++ldgt.w :: ++input: -149539557700451551 ++test 29 ++ldgt.w :: ++input: -149539557700451551 ++test 30 ++ldgt.w :: ++input: -149539557700451551 ++test 31 ++ldgt.d :: ++input: -149539557700451551 ++test 32 ++ldgt.d :: ++input: -149539557700451551 ++test 33 ++ldgt.d :: ++input: -149539557700451551 ++memory block: ++0x0: 0x121f1e1f0000e680 ++0x8: 0x0000000000010700 ++0x10: 0x000000030000e7dc ++0x18: 0xffffffff0000b0d0 ++0x20: 0x232f2e2f2ab05fd0 ++0x28: 0x242c2b2b0000b6a0 ++0x30: 0x252a2e2b0000be80 ++0x38: 0x262d2d2a0000de10 ++0x40: 0x3f343f3e0000df20 ++0x48: 0x3e353d3c2ab05fe0 ++0x50: 0x363a3c3b0000dfd0 ++0x58: 0x3b373b3a00010300 ++0x60: 0x0000e680121f1e1f ++0x68: 0x0001070000000000 ++0x70: 0x0000e7dc00000003 ++0x78: 0x0000b0d0ffffffff ++0x80: 0x2ab05fd0232f2e2f ++0x88: 0x0000b6a0242c2b2b ++0x90: 0x0000be80252a2e2b ++0x98: 0x0000de10262d2d2a ++0xa0: 0x0000df203f343f3e ++0xa8: 0x2ab05fe03e353d3c ++0xb0: 0x0000dfd0363a3c3b ++0xb8: 0x000103003b373b3a ++test 34 ++ldle.b :: ++input: -149539557700451551 ++test 35 ++ldle.b :: ++input: -149539557700451551 ++test 36 ++ldle.h :: ++input: -149539557700451551 ++test 37 ++ldle.h :: ++input: -149539557700451551 ++test 38 ++ldle.h :: ++input: -149539557700451551 ++test 39 ++ldle.w :: ++input: -149539557700451551 ++test 40 ++ldle.w :: ++input: -149539557700451551 ++test 41 ++ldle.w :: ++input: -149539557700451551 ++test 42 ++ldle.d :: ++input: -149539557700451551 ++test 43 ++ldle.d :: ++input: -149539557700451551 ++test 44 ++ldle.d :: ++input: -149539557700451551 ++memory block: ++0x0: 0x121f1e1f0000e680 ++0x8: 0x0000000000010700 ++0x10: 0x000000030000e7dc ++0x18: 0xffffffff0000b0d0 ++0x20: 0x232f2e2f2ab05fd0 ++0x28: 0x242c2b2b0000b6a0 ++0x30: 0x252a2e2b0000be80 ++0x38: 0x262d2d2a0000de10 ++0x40: 0x3f343f3e0000df20 ++0x48: 0x3e353d3c2ab05fe0 ++0x50: 0x363a3c3b0000dfd0 ++0x58: 0x3b373b3a00010300 ++0x60: 0x0000e680121f1e1f ++0x68: 0x0001070000000000 ++0x70: 0x0000e7dc00000003 ++0x78: 0x0000b0d0ffffffff ++0x80: 0x2ab05fd0232f2e2f ++0x88: 0x0000b6a0242c2b2b ++0x90: 0x0000be80252a2e2b ++0x98: 0x0000de10262d2d2a ++0xa0: 0x0000df203f343f3e ++0xa8: 0x2ab05fe03e353d3c ++0xb0: 0x0000dfd0363a3c3b ++0xb8: 0x000103003b373b3a ++test 45 ++asrtle.d :: ++input: 123 456 ++test 46 ++asrtle.d :: ++input: 789 0 ++test 47 ++asrtgt.d :: ++input: 123 456 ++test 48 ++asrtgt.d :: ++input: 789 0 ++test 49 ++break :: ++input: 0 ++test 50 ++break :: ++input: 6 ++test 51 ++break :: ++input: 7 ++test 52 ++break :: ++input: 100 +diff --git a/none/tests/loongarch64/fault.vgtest b/none/tests/loongarch64/fault.vgtest +new file mode 100644 +index 000000000..24bf21afe +--- /dev/null ++++ b/none/tests/loongarch64/fault.vgtest +@@ -0,0 +1,2 @@ ++prog: fault ++vgopts: -q +diff --git a/none/tests/loongarch64/fault_fp.c b/none/tests/loongarch64/fault_fp.c +new file mode 100644 +index 000000000..0d5862dcc +--- /dev/null ++++ b/none/tests/loongarch64/fault_fp.c +@@ -0,0 +1,163 @@ ++#include ++#include ++#include ++#include ++#include ++ ++#define NUM 24 ++ ++unsigned long mem[NUM] = { ++ 0x121f1e1f0000e680, 0x0000000000010700, 0x000000030000e7dc, ++ 0xffffffff0000b0d0, 0x232f2e2f2ab05fd0, 0x242c2b2b0000b6a0, ++ 0x252a2e2b0000be80, 0x262d2d2a0000de10, 0x3f343f3e0000df20, ++ 0x3e353d3c2ab05fe0, 0x363a3c3b0000dfd0, 0x3b373b3a00010300, ++ 0x0000e680121f1e1f, 0x0001070000000000, 0x0000e7dc00000003, ++ 0x0000b0d0ffffffff, 0x2ab05fd0232f2e2f, 0x0000b6a0242c2b2b, ++ 0x0000be80252a2e2b, 0x0000de10262d2d2a, 0x0000df203f343f3e, ++ 0x2ab05fe03e353d3c, 0x0000dfd0363a3c3b, 0x000103003b373b3a ++}; ++ ++long val1 = 0; ++long val2 = 0xfdecba9087654321UL; ++char *p = (char *)mem; ++ ++#define TESTINST_LOAD_FRR_S(n, insn, addr1, addr2) \ ++ void test ## n (void) \ ++ { \ ++ printf("test %d\n", n); \ ++ printf("%s ::\n", insn); \ ++ __asm__ __volatile__( \ ++ insn " %0, %1, %2 \n\t" \ ++ : "=f" (val1) \ ++ : "r" (addr1), "r" (addr2) \ ++ : "memory"); \ ++ printf("output: %d\n", (int)val1); \ ++ } ++ ++#define TESTINST_LOAD_FRR_D(n, insn, addr1, addr2) \ ++ void test ## n (void) \ ++ { \ ++ printf("test %d\n", n); \ ++ printf("%s ::\n", insn); \ ++ __asm__ __volatile__( \ ++ insn " %0, %1, %2 \n\t" \ ++ : "=f" (val1) \ ++ : "r" (addr1), "r" (addr2) \ ++ : "memory"); \ ++ printf("output: %ld\n", val1); \ ++ } ++ ++#define TESTINST_STORE_FRR(n, insn, addr1, addr2) \ ++ void test ## n (void) \ ++ { \ ++ printf("test %d\n", n); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %ld\n", val2); \ ++ __asm__ __volatile__( \ ++ insn " %0, %1, %2 \n\t" \ ++ : \ ++ : "f" (val2), "r" (addr1), "r" (addr2) \ ++ : "memory"); \ ++ } ++ ++static sigjmp_buf escape; ++ ++static void handler(int sig, siginfo_t *si, void *uc) ++{ ++ fprintf(stderr, "signal: %d\n", sig); ++ fprintf(stderr, "code: %d\n", si->si_code); ++ siglongjmp(escape, 1); ++} ++ ++static inline void show(void) ++{ ++ int i; ++ printf("memory block:\n"); ++ for (i = 0; i < NUM; i++) ++ printf("0x%lx:\t%#018lx\n", i * sizeof(unsigned long), mem[i]); ++} ++ ++TESTINST_LOAD_FRR_S(1, "fldgt.s", &p[2], &p[0] ); ++TESTINST_LOAD_FRR_S(2, "fldgt.s", &p[8], &p[64]); ++TESTINST_LOAD_FRR_S(3, "fldgt.s", &p[12], &p[0] ); ++TESTINST_LOAD_FRR_D(4, "fldgt.d", &p[4], &p[0] ); ++TESTINST_LOAD_FRR_D(5, "fldgt.d", &p[16], &p[64]); ++TESTINST_LOAD_FRR_D(6, "fldgt.d", &p[32], &p[0] ); ++ ++TESTINST_LOAD_FRR_S(7, "fldle.s", &p[66], &p[0] ); ++TESTINST_LOAD_FRR_S(8, "fldle.s", &p[72], &p[0] ); ++TESTINST_LOAD_FRR_S(9, "fldle.s", &p[76], &p[96]); ++TESTINST_LOAD_FRR_D(10, "fldle.d", &p[68], &p[0] ); ++TESTINST_LOAD_FRR_D(11, "fldle.d", &p[80], &p[0] ); ++TESTINST_LOAD_FRR_D(12, "fldle.d", &p[88], &p[96]); ++ ++TESTINST_STORE_FRR(13, "fstgt.s", &p[2], &p[0] ); ++TESTINST_STORE_FRR(14, "fstgt.s", &p[8], &p[64]); ++TESTINST_STORE_FRR(15, "fstgt.s", &p[12], &p[0] ); ++TESTINST_STORE_FRR(16, "fstgt.d", &p[4], &p[0] ); ++TESTINST_STORE_FRR(17, "fstgt.d", &p[16], &p[64]); ++TESTINST_STORE_FRR(18, "fstgt.d", &p[32], &p[0] ); ++ ++TESTINST_STORE_FRR(19, "fstle.s", &p[66], &p[0] ); ++TESTINST_STORE_FRR(20, "fstle.s", &p[72], &p[0] ); ++TESTINST_STORE_FRR(21, "fstle.s", &p[76], &p[96]); ++TESTINST_STORE_FRR(22, "fstle.d", &p[68], &p[0] ); ++TESTINST_STORE_FRR(23, "fstle.d", &p[80], &p[0] ); ++TESTINST_STORE_FRR(24, "fstle.d", &p[88], &p[96]); ++ ++struct test { ++ void (*func)(void); ++ bool show; ++} tests[] = { ++ { test1, false }, ++ { test2, false }, ++ { test3, false }, ++ { test4, false }, ++ { test5, false }, ++ { test6, true }, ++ { test7, false }, ++ { test8, false }, ++ { test9, false }, ++ { test10, false }, ++ { test11, false }, ++ { test12, true }, ++ { test13, false }, ++ { test14, false }, ++ { test15, false }, ++ { test16, false }, ++ { test17, false }, ++ { test18, true }, ++ { test19, false }, ++ { test20, false }, ++ { test21, false }, ++ { test22, false }, ++ { test23, false }, ++ { test24, true } ++}; ++ ++int main(void) ++{ ++ int i; ++ struct sigaction sa; ++ int sigs[] = { SIGSYS, SIGBUS }; ++ ++ sa.sa_sigaction = handler; ++ sa.sa_flags = SA_SIGINFO; ++ sigfillset(&sa.sa_mask); ++ ++ for(i = 0; i < sizeof(sigs) / sizeof(sigs[0]); i++) ++ sigaction(sigs[i], &sa, NULL); ++ ++ show(); ++ for(i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) { ++ if (sigsetjmp(escape, 1) == 0) { ++ fprintf(stderr, "test %d\n", i + 1); ++ tests[i].func(); ++ if (tests[i].show) ++ show(); ++ fprintf(stderr, "no fault\n"); ++ } ++ } ++ ++ return 0; ++} +diff --git a/none/tests/loongarch64/fault_fp.stderr.exp b/none/tests/loongarch64/fault_fp.stderr.exp +new file mode 100644 +index 000000000..a983dead4 +--- /dev/null ++++ b/none/tests/loongarch64/fault_fp.stderr.exp +@@ -0,0 +1,64 @@ ++test 1 ++signal: 7 ++code: 1 ++test 2 ++signal: 31 ++code: 128 ++test 3 ++no fault ++test 4 ++signal: 7 ++code: 1 ++test 5 ++signal: 31 ++code: 128 ++test 6 ++no fault ++test 7 ++signal: 7 ++code: 1 ++test 8 ++signal: 31 ++code: 128 ++test 9 ++no fault ++test 10 ++signal: 7 ++code: 1 ++test 11 ++signal: 31 ++code: 128 ++test 12 ++no fault ++test 13 ++signal: 7 ++code: 1 ++test 14 ++signal: 31 ++code: 128 ++test 15 ++no fault ++test 16 ++signal: 7 ++code: 1 ++test 17 ++signal: 31 ++code: 128 ++test 18 ++no fault ++test 19 ++signal: 7 ++code: 1 ++test 20 ++signal: 31 ++code: 128 ++test 21 ++no fault ++test 22 ++signal: 7 ++code: 1 ++test 23 ++signal: 31 ++code: 128 ++test 24 ++no fault +diff --git a/none/tests/loongarch64/fault_fp.stdout.exp b/none/tests/loongarch64/fault_fp.stdout.exp +new file mode 100644 +index 000000000..254a12353 +--- /dev/null ++++ b/none/tests/loongarch64/fault_fp.stdout.exp +@@ -0,0 +1,189 @@ ++memory block: ++0x0: 0x121f1e1f0000e680 ++0x8: 0x0000000000010700 ++0x10: 0x000000030000e7dc ++0x18: 0xffffffff0000b0d0 ++0x20: 0x232f2e2f2ab05fd0 ++0x28: 0x242c2b2b0000b6a0 ++0x30: 0x252a2e2b0000be80 ++0x38: 0x262d2d2a0000de10 ++0x40: 0x3f343f3e0000df20 ++0x48: 0x3e353d3c2ab05fe0 ++0x50: 0x363a3c3b0000dfd0 ++0x58: 0x3b373b3a00010300 ++0x60: 0x0000e680121f1e1f ++0x68: 0x0001070000000000 ++0x70: 0x0000e7dc00000003 ++0x78: 0x0000b0d0ffffffff ++0x80: 0x2ab05fd0232f2e2f ++0x88: 0x0000b6a0242c2b2b ++0x90: 0x0000be80252a2e2b ++0x98: 0x0000de10262d2d2a ++0xa0: 0x0000df203f343f3e ++0xa8: 0x2ab05fe03e353d3c ++0xb0: 0x0000dfd0363a3c3b ++0xb8: 0x000103003b373b3a ++test 1 ++fldgt.s :: ++test 2 ++fldgt.s :: ++test 3 ++fldgt.s :: ++output: 0 ++test 4 ++fldgt.d :: ++test 5 ++fldgt.d :: ++test 6 ++fldgt.d :: ++output: 2535295895347421136 ++memory block: ++0x0: 0x121f1e1f0000e680 ++0x8: 0x0000000000010700 ++0x10: 0x000000030000e7dc ++0x18: 0xffffffff0000b0d0 ++0x20: 0x232f2e2f2ab05fd0 ++0x28: 0x242c2b2b0000b6a0 ++0x30: 0x252a2e2b0000be80 ++0x38: 0x262d2d2a0000de10 ++0x40: 0x3f343f3e0000df20 ++0x48: 0x3e353d3c2ab05fe0 ++0x50: 0x363a3c3b0000dfd0 ++0x58: 0x3b373b3a00010300 ++0x60: 0x0000e680121f1e1f ++0x68: 0x0001070000000000 ++0x70: 0x0000e7dc00000003 ++0x78: 0x0000b0d0ffffffff ++0x80: 0x2ab05fd0232f2e2f ++0x88: 0x0000b6a0242c2b2b ++0x90: 0x0000be80252a2e2b ++0x98: 0x0000de10262d2d2a ++0xa0: 0x0000df203f343f3e ++0xa8: 0x2ab05fe03e353d3c ++0xb0: 0x0000dfd0363a3c3b ++0xb8: 0x000103003b373b3a ++test 7 ++fldle.s :: ++test 8 ++fldle.s :: ++test 9 ++fldle.s :: ++output: 1043676476 ++test 10 ++fldle.d :: ++test 11 ++fldle.d :: ++test 12 ++fldle.d :: ++output: 4266944292251042560 ++memory block: ++0x0: 0x121f1e1f0000e680 ++0x8: 0x0000000000010700 ++0x10: 0x000000030000e7dc ++0x18: 0xffffffff0000b0d0 ++0x20: 0x232f2e2f2ab05fd0 ++0x28: 0x242c2b2b0000b6a0 ++0x30: 0x252a2e2b0000be80 ++0x38: 0x262d2d2a0000de10 ++0x40: 0x3f343f3e0000df20 ++0x48: 0x3e353d3c2ab05fe0 ++0x50: 0x363a3c3b0000dfd0 ++0x58: 0x3b373b3a00010300 ++0x60: 0x0000e680121f1e1f ++0x68: 0x0001070000000000 ++0x70: 0x0000e7dc00000003 ++0x78: 0x0000b0d0ffffffff ++0x80: 0x2ab05fd0232f2e2f ++0x88: 0x0000b6a0242c2b2b ++0x90: 0x0000be80252a2e2b ++0x98: 0x0000de10262d2d2a ++0xa0: 0x0000df203f343f3e ++0xa8: 0x2ab05fe03e353d3c ++0xb0: 0x0000dfd0363a3c3b ++0xb8: 0x000103003b373b3a ++test 13 ++fstgt.s :: ++input: -149539557700451551 ++test 14 ++fstgt.s :: ++input: -149539557700451551 ++test 15 ++fstgt.s :: ++input: -149539557700451551 ++test 16 ++fstgt.d :: ++input: -149539557700451551 ++test 17 ++fstgt.d :: ++input: -149539557700451551 ++test 18 ++fstgt.d :: ++input: -149539557700451551 ++memory block: ++0x0: 0x121f1e1f0000e680 ++0x8: 0x8765432100010700 ++0x10: 0x000000030000e7dc ++0x18: 0xffffffff0000b0d0 ++0x20: 0xfdecba9087654321 ++0x28: 0x242c2b2b0000b6a0 ++0x30: 0x252a2e2b0000be80 ++0x38: 0x262d2d2a0000de10 ++0x40: 0x3f343f3e0000df20 ++0x48: 0x3e353d3c2ab05fe0 ++0x50: 0x363a3c3b0000dfd0 ++0x58: 0x3b373b3a00010300 ++0x60: 0x0000e680121f1e1f ++0x68: 0x0001070000000000 ++0x70: 0x0000e7dc00000003 ++0x78: 0x0000b0d0ffffffff ++0x80: 0x2ab05fd0232f2e2f ++0x88: 0x0000b6a0242c2b2b ++0x90: 0x0000be80252a2e2b ++0x98: 0x0000de10262d2d2a ++0xa0: 0x0000df203f343f3e ++0xa8: 0x2ab05fe03e353d3c ++0xb0: 0x0000dfd0363a3c3b ++0xb8: 0x000103003b373b3a ++test 19 ++fstle.s :: ++input: -149539557700451551 ++test 20 ++fstle.s :: ++input: -149539557700451551 ++test 21 ++fstle.s :: ++input: -149539557700451551 ++test 22 ++fstle.d :: ++input: -149539557700451551 ++test 23 ++fstle.d :: ++input: -149539557700451551 ++test 24 ++fstle.d :: ++input: -149539557700451551 ++memory block: ++0x0: 0x121f1e1f0000e680 ++0x8: 0x8765432100010700 ++0x10: 0x000000030000e7dc ++0x18: 0xffffffff0000b0d0 ++0x20: 0xfdecba9087654321 ++0x28: 0x242c2b2b0000b6a0 ++0x30: 0x252a2e2b0000be80 ++0x38: 0x262d2d2a0000de10 ++0x40: 0x3f343f3e0000df20 ++0x48: 0x876543212ab05fe0 ++0x50: 0x363a3c3b0000dfd0 ++0x58: 0xfdecba9087654321 ++0x60: 0x0000e680121f1e1f ++0x68: 0x0001070000000000 ++0x70: 0x0000e7dc00000003 ++0x78: 0x0000b0d0ffffffff ++0x80: 0x2ab05fd0232f2e2f ++0x88: 0x0000b6a0242c2b2b ++0x90: 0x0000be80252a2e2b ++0x98: 0x0000de10262d2d2a ++0xa0: 0x0000df203f343f3e ++0xa8: 0x2ab05fe03e353d3c ++0xb0: 0x0000dfd0363a3c3b ++0xb8: 0x000103003b373b3a +diff --git a/none/tests/loongarch64/fault_fp.vgtest b/none/tests/loongarch64/fault_fp.vgtest +new file mode 100644 +index 000000000..b750af8ea +--- /dev/null ++++ b/none/tests/loongarch64/fault_fp.vgtest +@@ -0,0 +1,3 @@ ++prereq: ../../../tests/loongarch64_features fpu ++prog: fault_fp ++vgopts: -q +diff --git a/none/tests/loongarch64/filter_stderr b/none/tests/loongarch64/filter_stderr +new file mode 100755 +index 000000000..0ae9313a9 +--- /dev/null ++++ b/none/tests/loongarch64/filter_stderr +@@ -0,0 +1,3 @@ ++#! /bin/sh ++ ++../filter_stderr +diff --git a/none/tests/loongarch64/float.c b/none/tests/loongarch64/float.c +new file mode 100644 +index 000000000..7aa1b6c67 +--- /dev/null ++++ b/none/tests/loongarch64/float.c +@@ -0,0 +1,804 @@ ++#include ++#include ++ ++#define NUM 24 ++ ++const float fj_s[NUM] = { ++ 0, 456.25, 3, -1, ++ 1384.5, -7.25, 1000000000, -5786.5, ++ 1752, 0.015625, 0.03125, -248562.75, ++ -45786.5, 456, 34.03125, 45786.75, ++ 1752065, 107, -45667.25, -7, ++ -347856.5, 356047.5, -1.0, 23.0625 ++}; ++ ++const double fj_d[NUM] = { ++ 0, 456.25, 3, -1, ++ 1384.5, -7.25, 1000000000, -5786.5, ++ 1752, 0.015625, 0.03125, -248562.75, ++ -45786.5, 456, 34.03125, 45786.75, ++ 1752065, 107, -45667.25, -7, ++ -347856.5, 356047.5, -1.0, 23.0625 ++}; ++ ++const float fk_s[NUM] = { ++ -4578.5, 456.25, 34.03125, 4578.75, ++ 175, 107, -456.25, -7.25, ++ -3478.5, 356.5, -1.0, 23.0625, ++ 0, 456.25, 3, -1, ++ 1384.5, -7, 100, -5786.5, ++ 1752, 0.015625, 0.03125, -248562.75 ++}; ++ ++const double fk_d[NUM] = { ++ -45786.5, 456.25, 34.03125, 45786.75, ++ 1752065, 107, -45667.25, -7.25, ++ -347856.5, 356047.5, -1.0, 23.0625, ++ 0, 456.25, 3, -1, ++ 1384.5, -7, 1000000000, -5786.5, ++ 1752, 0.015625, 0.03125, -248562.75 ++}; ++ ++const float fa_s[NUM] = { ++ -347856.5, 356047.5, -1.0, 23.0625, ++ 1752, 0.015625, 0.03125, -248562.75, ++ 1384.5, -7.25, 1000000000, -5786.5, ++ -347856.75, 356047.75, -1.0, 23.03125, ++ 0, 456.25, 3, -1, ++ -45786.5, 456, 34.03125, 45786.03125 ++}; ++ ++const double fa_d[NUM] = { ++ -347856.5, 356047.5, -1.0, 23.0625, ++ 1752, 0.015625, 0.03125, -248562.75, ++ 1384.5, -7.25, 1000000000, -5786.5, ++ -347856.75, 356047.75, -1.0, 23.03125, ++ 0, 456.25, 3, -1, ++ -45786.5, 456, 34.03125, 45786.03125 ++}; ++ ++const int fj_w[NUM] = { ++ 0, 456, 3, -1, ++ 0xffffffff, 356, 1000000000, -5786, ++ 1752, 24575, 10, -248562, ++ -45786, 456, 34, 45786, ++ 1752065, 107, -45667, -7, ++ -347856, 0x80000000, 0xfffffff, 23 ++}; ++ ++const long fj_l[NUM] = { ++ 18, 25, 3, -1, ++ 0xffffffff, 356, 1000000, -5786, ++ -1, 24575, 10, -125458, ++ -486, 456, 34, 45786, ++ 0, 1700000, -45667, -7, ++ -347856, 0x80000000, 0xfffffff, 23 ++}; ++ ++const int cf[NUM] = { ++ 0, 1, 0, 1, ++ 1, 0, 1, 0, ++ 0, 0, 1, 1, ++ 1, 1, 0, 0, ++ 0, 0, 0, 0, ++ 1, 1, 1, 1 ++}; ++ ++typedef enum { ++ TO_NEAREST = 0, ++ TO_ZERO, ++ TO_PLUS_INFINITY, ++ TO_MINUS_INFINITY ++} round_mode_t; ++ ++typedef enum { ++ FADD_S, FADD_D, FSUB_S, FSUB_D, ++ FMUL_S, FMUL_D, FDIV_S, FDIV_D, ++ FMADD_S, FMADD_D, FMSUB_S, FMSUB_D, ++ FNMADD_S, FNMADD_D, FNMSUB_S, FNMSUB_D, ++ FMAX_S, FMAX_D, FMIN_S, FMIN_D, ++ FMAXA_S, FMAXA_D, FMINA_S, FMINA_D, ++ FABS_S, FABS_D, FNEG_S, FNEG_D, ++ FSQRT_S, FSQRT_D, ++ FRECIP_S, FRECIP_D, ++ FRSQRT_S, FRSQRT_D, ++ FSCALEB_S, FSCALEB_D, ++ FLOGB_S, FLOGB_D, ++ FCVT_S_D, FCVT_D_S, ++ FTINTRM_W_S, FTINTRM_W_D, FTINTRM_L_S, FTINTRM_L_D, ++ FTINTRP_W_S, FTINTRP_W_D, FTINTRP_L_S, FTINTRP_L_D, ++ FTINTRZ_W_S, FTINTRZ_W_D, FTINTRZ_L_S, FTINTRZ_L_D, ++ FTINTRNE_W_S, FTINTRNE_W_D, FTINTRNE_L_S, FTINTRNE_L_D, ++ FTINT_W_S, FTINT_W_D, FTINT_L_S, FTINT_L_D, ++ FFINT_S_W, FFINT_S_L, FFINT_D_W, FFINT_D_L, ++ FRINT_S, FRINT_D, ++ FCMP_CAF_S, FCMP_CAF_D, FCMP_SAF_S, FCMP_SAF_D, ++ FCMP_CLT_S, FCMP_CLT_D, FCMP_SLT_S, FCMP_SLT_D, ++ FCMP_CEQ_S, FCMP_CEQ_D, FCMP_SEQ_S, FCMP_SEQ_D, ++ FCMP_CLE_S, FCMP_CLE_D, FCMP_SLE_S, FCMP_SLE_D, ++ FCMP_CUN_S, FCMP_CUN_D, FCMP_SUN_S, FCMP_SUN_D, ++ FCMP_CULT_S, FCMP_CULT_D, FCMP_SULT_S, FCMP_SULT_D, ++ FCMP_CUEQ_S, FCMP_CUEQ_D, FCMP_SUEQ_S, FCMP_SUEQ_D, ++ FCMP_CULE_S, FCMP_CULE_D, FCMP_SULE_S, FCMP_SULE_D, ++ FCMP_CNE_S, FCMP_CNE_D, FCMP_SNE_S, FCMP_SNE_D, ++ FCMP_COR_S, FCMP_COR_D, FCMP_SOR_S, FCMP_SOR_D, ++ FCMP_CUNE_S, FCMP_CUNE_D, FCMP_SUNE_S, FCMP_SUNE_D, ++ FSEL, FMOV_S, FMOV_D ++} op_t; ++ ++static inline void set_fcsr(round_mode_t mode) ++{ ++ __asm__ __volatile__("movgr2fcsr $r0, %0" : : "r" (mode << 8)); ++ ++ const char *round_mode_name[] = { "near", "zero", "+inf", "-inf" }; ++ printf("roundig mode: %s\n", round_mode_name[mode]); ++} ++ ++#define TESTINST_FF_S(insn, v1) \ ++ { \ ++ unsigned int fcsr; \ ++ float fd_s; \ ++ __asm__ __volatile__( \ ++ insn " %0, %2 \n\t" \ ++ "movfcsr2gr %1, $r0 \n\t" \ ++ : "=f" (fd_s), "=r" (fcsr) \ ++ : "f" (v1) \ ++ : "memory"); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %.6f\n", v1); \ ++ printf("output: %.6f\n", fd_s); \ ++ printf("fcsr: %#x\n", fcsr); \ ++ } ++ ++#define TESTINST_FF_D(insn, v1) \ ++ { \ ++ unsigned int fcsr; \ ++ double fd_d; \ ++ __asm__ __volatile__( \ ++ insn " %0, %2 \n\t" \ ++ "movfcsr2gr %1, $r0 \n\t" \ ++ : "=f" (fd_d), "=r" (fcsr) \ ++ : "f" (v1) \ ++ : "memory"); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %.15f\n", v1); \ ++ printf("output: %.15f\n", fd_d); \ ++ printf("fcsr: %#x\n", fcsr); \ ++ } ++ ++#define TESTINST_FFF_S(insn, v1, v2) \ ++ { \ ++ unsigned int fcsr; \ ++ float fd_s; \ ++ __asm__ __volatile__( \ ++ insn " %0, %2, %3 \n\t" \ ++ "movfcsr2gr %1, $r0 \n\t" \ ++ : "=f" (fd_s), "=r" (fcsr) \ ++ : "f" (v1), "f" (v2) \ ++ : "memory"); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %.6f %.6f\n", v1, v2); \ ++ printf("output: %.6f\n", fd_s); \ ++ printf("fcsr: %#x\n", fcsr); \ ++ } ++ ++#define TESTINST_FFF_D(insn, v1, v2) \ ++ { \ ++ unsigned int fcsr; \ ++ double fd_s; \ ++ __asm__ __volatile__( \ ++ insn " %0, %2, %3 \n\t" \ ++ "movfcsr2gr %1, $r0 \n\t" \ ++ : "=f" (fd_s), "=r" (fcsr) \ ++ : "f" (v1), "f" (v2) \ ++ : "memory"); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %.15f %.15f\n", v1, v2); \ ++ printf("output: %.15f\n", fd_s); \ ++ printf("fcsr: %#x\n", fcsr); \ ++ } ++ ++#define TESTINST_FFFF_S(insn, v1, v2, v3) \ ++ { \ ++ unsigned int fcsr; \ ++ float fd_s; \ ++ __asm__ __volatile__( \ ++ insn " %0, %2, %3, %4 \n\t" \ ++ "movfcsr2gr %1, $r0 \n\t" \ ++ : "=f" (fd_s), "=r" (fcsr) \ ++ : "f" (v1), "f" (v2), "f" (v3) \ ++ : "memory"); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %.6f %.6f %.6f\n", v1, v2, v3); \ ++ printf("output: %.6f\n", fd_s); \ ++ printf("fcsr: %#x\n", fcsr); \ ++ } ++ ++#define TESTINST_FFFF_D(insn, v1, v2, v3) \ ++ { \ ++ unsigned int fcsr; \ ++ double fd_s; \ ++ __asm__ __volatile__( \ ++ insn " %0, %2, %3, %4 \n\t" \ ++ "movfcsr2gr %1, $r0 \n\t" \ ++ : "=f" (fd_s), "=r" (fcsr) \ ++ : "f" (v1), "f" (v2), "f" (v3) \ ++ : "memory"); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %.15f %.15f %.15f\n", v1, v2, v3); \ ++ printf("output: %.15f\n", fd_s); \ ++ printf("fcsr: %#x\n", fcsr); \ ++ } ++ ++#define TESTINST_FF_S_D(insn, v1) \ ++ { \ ++ unsigned int fcsr; \ ++ float fd_s; \ ++ __asm__ __volatile__( \ ++ insn " %0, %2 \n\t" \ ++ "movfcsr2gr %1, $r0 \n\t" \ ++ : "=f" (fd_s), "=r" (fcsr) \ ++ : "f" (v1) \ ++ : "memory"); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %.15f\n", v1); \ ++ printf("output: %.6f\n", fd_s); \ ++ printf("fcsr: %#x\n", fcsr); \ ++ } ++ ++#define TESTINST_FF_D_S(insn, v1) \ ++ { \ ++ unsigned int fcsr; \ ++ double fd_d; \ ++ __asm__ __volatile__( \ ++ insn " %0, %2 \n\t" \ ++ "movfcsr2gr %1, $r0 \n\t" \ ++ : "=f" (fd_d), "=r" (fcsr) \ ++ : "f" (v1) \ ++ : "memory"); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %.6f\n", v1); \ ++ printf("output: %.15f\n", fd_d); \ ++ printf("fcsr: %#x\n", fcsr); \ ++ } ++ ++#define TESTINST_FF_W_S(insn, v1) \ ++ { \ ++ unsigned int fcsr; \ ++ int fd_w; \ ++ __asm__ __volatile__( \ ++ insn " %0, %2 \n\t" \ ++ "movfcsr2gr %1, $r0 \n\t" \ ++ : "=f" (fd_w), "=r" (fcsr) \ ++ : "f" (v1) \ ++ : "memory"); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %.6f\n", v1); \ ++ printf("output: %d\n", fd_w); \ ++ printf("fcsr: %#x\n", fcsr); \ ++ } ++ ++#define TESTINST_FF_W_D(insn, v1) \ ++ { \ ++ unsigned int fcsr; \ ++ int fd_w; \ ++ __asm__ __volatile__( \ ++ insn " %0, %2 \n\t" \ ++ "movfcsr2gr %1, $r0 \n\t" \ ++ : "=f" (fd_w), "=r" (fcsr) \ ++ : "f" (v1) \ ++ : "memory"); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %.15f\n", v1); \ ++ printf("output: %d\n", fd_w); \ ++ printf("fcsr: %#x\n", fcsr); \ ++ } ++#define TESTINST_FF_L_S(insn, v1) \ ++ { \ ++ unsigned int fcsr; \ ++ long fd_l; \ ++ __asm__ __volatile__( \ ++ insn " %0, %2 \n\t" \ ++ "movfcsr2gr %1, $r0 \n\t" \ ++ : "=f" (fd_l), "=r" (fcsr) \ ++ : "f" (v1) \ ++ : "memory"); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %.6f\n", v1); \ ++ printf("output: %ld\n", fd_l); \ ++ printf("fcsr: %#x\n", fcsr); \ ++ } ++ ++#define TESTINST_FF_L_D(insn, v1) \ ++ { \ ++ unsigned int fcsr; \ ++ long fd_l; \ ++ __asm__ __volatile__( \ ++ insn " %0, %2 \n\t" \ ++ "movfcsr2gr %1, $r0 \n\t" \ ++ : "=f" (fd_l), "=r" (fcsr) \ ++ : "f" (v1) \ ++ : "memory"); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %.15f\n", v1); \ ++ printf("output: %ld\n", fd_l); \ ++ printf("fcsr: %#x\n", fcsr); \ ++ } ++ ++#define TESTINST_FF_S_W(insn, v1) \ ++ { \ ++ unsigned int fcsr; \ ++ float fd_s; \ ++ __asm__ __volatile__( \ ++ insn " %0, %2 \n\t" \ ++ "movfcsr2gr %1, $r0 \n\t" \ ++ : "=f" (fd_s), "=r" (fcsr) \ ++ : "f" (v1) \ ++ : "memory"); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %d\n", v1); \ ++ printf("output: %.6f\n", fd_s); \ ++ printf("fcsr: %#x\n", fcsr); \ ++ } ++ ++#define TESTINST_FF_S_L(insn, v1) \ ++ { \ ++ unsigned int fcsr; \ ++ float fd_s; \ ++ __asm__ __volatile__( \ ++ insn " %0, %2 \n\t" \ ++ "movfcsr2gr %1, $r0 \n\t" \ ++ : "=f" (fd_s), "=r" (fcsr) \ ++ : "f" (v1) \ ++ : "memory"); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %ld\n", v1); \ ++ printf("output: %.6f\n", fd_s); \ ++ printf("fcsr: %#x\n", fcsr); \ ++ } ++ ++#define TESTINST_FF_D_W(insn, v1) \ ++ { \ ++ unsigned int fcsr; \ ++ double fd_d; \ ++ __asm__ __volatile__( \ ++ insn " %0, %2 \n\t" \ ++ "movfcsr2gr %1, $r0 \n\t" \ ++ : "=f" (fd_d), "=r" (fcsr) \ ++ : "f" (v1) \ ++ : "memory"); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %d\n", v1); \ ++ printf("output: %.15f\n", fd_d); \ ++ printf("fcsr: %#x\n", fcsr); \ ++ } ++ ++#define TESTINST_FF_D_L(insn, v1) \ ++ { \ ++ unsigned int fcsr; \ ++ double fd_d; \ ++ __asm__ __volatile__( \ ++ insn " %0, %2 \n\t" \ ++ "movfcsr2gr %1, $r0 \n\t" \ ++ : "=f" (fd_d), "=r" (fcsr) \ ++ : "f" (v1) \ ++ : "memory"); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %ld\n", v1); \ ++ printf("output: %.15f\n", fd_d); \ ++ printf("fcsr: %#x\n", fcsr); \ ++ } ++ ++#define TESTINST_FFC_S(insn, v1, v2) \ ++ { \ ++ unsigned int fcsr; \ ++ int fcc; \ ++ __asm__ __volatile__( \ ++ insn " $fcc0, %2, %3 \n\t" \ ++ "movcf2gr %0, $fcc0 \n\t" \ ++ "movfcsr2gr %1, $r0 \n\t" \ ++ : "=r" (fcc), "=r" (fcsr) \ ++ : "f" (v1), "f" (v2) \ ++ : "$fcc0", "memory"); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %.6f %.6f\n", v1, v2); \ ++ printf("output: %d\n", fcc); \ ++ printf("fcsr: %#x\n", fcsr); \ ++ } ++ ++#define TESTINST_FFC_D(insn, v1, v2) \ ++ { \ ++ unsigned int fcsr; \ ++ int fcc; \ ++ __asm__ __volatile__( \ ++ insn " $fcc0, %2, %3 \n\t" \ ++ "movcf2gr %0, $fcc0 \n\t" \ ++ "movfcsr2gr %1, $r0 \n\t" \ ++ : "=r" (fcc), "=r" (fcsr) \ ++ : "f" (v1), "f" (v2) \ ++ : "$fcc0", "memory"); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %.15f %.15f\n", v1, v2); \ ++ printf("output: %d\n", fcc); \ ++ printf("fcsr: %#x\n", fcsr); \ ++ } ++ ++#define TESTINST_FFFC(insn, v1, v2, v3) \ ++ { \ ++ unsigned int fcsr; \ ++ double fd_s; \ ++ __asm__ __volatile__( \ ++ "movgr2cf $fcc0, %4 \n\t" \ ++ insn " %0, %2, %3, $fcc0 \n\t" \ ++ "movfcsr2gr %1, $r0 \n\t" \ ++ : "=f" (fd_s), "=r" (fcsr) \ ++ : "f" (v1), "f" (v2), "r" (v3) \ ++ : "memory"); \ ++ printf("%s ::\n", insn); \ ++ printf("input: %.15f %.15f %d\n", v1, v2, v3); \ ++ printf("output: %.15f\n", fd_s); \ ++ printf("fcsr: %#x\n", fcsr); \ ++ } ++ ++void test(op_t op) ++{ ++ int i; ++ round_mode_t mode; ++ for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++) { ++ for (i = 0; i < NUM; i++) { ++ set_fcsr(mode); ++ switch (op) { ++ case FADD_S: ++ TESTINST_FFF_S("fadd.s", fj_s[i], fk_s[i]); ++ break; ++ case FADD_D: ++ TESTINST_FFF_D("fadd.d", fj_d[i], fk_d[i]); ++ break; ++ case FSUB_S: ++ TESTINST_FFF_S("fsub.s", fj_s[i], fk_s[i]); ++ break; ++ case FSUB_D: ++ TESTINST_FFF_D("fsub.d", fj_d[i], fk_d[i]); ++ break; ++ case FMUL_S: ++ TESTINST_FFF_S("fmul.s", fj_s[i], fk_s[i]); ++ break; ++ case FMUL_D: ++ TESTINST_FFF_D("fmul.d", fj_d[i], fk_d[i]); ++ break; ++ case FDIV_S: ++ TESTINST_FFF_S("fdiv.s", fj_s[i], fk_s[i]); ++ break; ++ case FDIV_D: ++ TESTINST_FFF_D("fdiv.d", fj_d[i], fk_d[i]); ++ break; ++ case FMADD_S: ++ TESTINST_FFFF_S("fmadd.s", fj_s[i], fk_s[i], fa_s[i]); ++ break; ++ case FMADD_D: ++ TESTINST_FFFF_D("fmadd.d", fj_d[i], fk_d[i], fa_d[i]); ++ break; ++ case FMSUB_S: ++ TESTINST_FFFF_S("fmsub.s", fj_s[i], fk_s[i], fa_s[i]); ++ break; ++ case FMSUB_D: ++ TESTINST_FFFF_D("fmsub.d", fj_d[i], fk_d[i], fa_d[i]); ++ break; ++ case FNMADD_S: ++ TESTINST_FFFF_S("fnmadd.s", fj_s[i], fk_s[i], fa_s[i]); ++ break; ++ case FNMADD_D: ++ TESTINST_FFFF_D("fnmadd.d", fj_d[i], fk_d[i], fa_d[i]); ++ break; ++ case FNMSUB_S: ++ TESTINST_FFFF_S("fnmsub.s", fj_s[i], fk_s[i], fa_s[i]); ++ break; ++ case FNMSUB_D: ++ TESTINST_FFFF_D("fnmsub.d", fj_d[i], fk_d[i], fa_d[i]); ++ break; ++ case FMAX_S: ++ TESTINST_FFF_S("fmax.s", fj_s[i], fk_s[i]); ++ break; ++ case FMAX_D: ++ TESTINST_FFF_D("fmax.d", fj_d[i], fk_d[i]); ++ break; ++ case FMIN_S: ++ TESTINST_FFF_S("fmin.s", fj_s[i], fk_s[i]); ++ break; ++ case FMIN_D: ++ TESTINST_FFF_D("fmin.d", fj_d[i], fk_d[i]); ++ break; ++ case FMAXA_S: ++ TESTINST_FFF_S("fmaxa.s", fj_s[i], fk_s[i]); ++ break; ++ case FMAXA_D: ++ TESTINST_FFF_D("fmaxa.d", fj_d[i], fk_d[i]); ++ break; ++ case FMINA_S: ++ TESTINST_FFF_S("fmina.s", fj_s[i], fk_s[i]); ++ break; ++ case FMINA_D: ++ TESTINST_FFF_D("fmina.d", fj_d[i], fk_d[i]); ++ break; ++ case FABS_S: ++ TESTINST_FF_S("fabs.s", fj_s[i]); ++ break; ++ case FABS_D: ++ TESTINST_FF_D("fabs.d", fj_d[i]); ++ break; ++ case FNEG_S: ++ TESTINST_FF_S("fneg.s", fj_s[i]); ++ break; ++ case FNEG_D: ++ TESTINST_FF_D("fneg.d", fj_d[i]); ++ break; ++ case FSQRT_S: ++ TESTINST_FF_S("fsqrt.s", fj_s[i]); ++ break; ++ case FSQRT_D: ++ TESTINST_FF_D("fsqrt.d", fj_d[i]); ++ break; ++ case FRECIP_S: ++ TESTINST_FF_S("frecip.s", fj_s[i]); ++ break; ++ case FRECIP_D: ++ TESTINST_FF_D("frecip.d", fj_d[i]); ++ break; ++ case FRSQRT_S: ++ TESTINST_FF_S("frsqrt.s", fj_s[i]); ++ break; ++ case FRSQRT_D: ++ TESTINST_FF_D("frsqrt.d", fj_d[i]); ++ break; ++ case FSCALEB_S: ++ TESTINST_FFF_S("fscaleb.s", fj_s[i], fk_s[i]); ++ break; ++ case FSCALEB_D: ++ TESTINST_FFF_D("fscaleb.d", fj_d[i], fk_d[i]); ++ break; ++ case FLOGB_S: ++ TESTINST_FF_S("flogb.s", fj_s[i]); ++ break; ++ case FLOGB_D: ++ TESTINST_FF_D("flogb.d", fj_d[i]); ++ break; ++ case FCVT_S_D: ++ TESTINST_FF_S_D("fcvt.s.d", fj_d[i]); ++ break; ++ case FCVT_D_S: ++ TESTINST_FF_D_S("fcvt.d.s", fj_s[i]); ++ break; ++ case FTINTRM_W_S: ++ TESTINST_FF_W_S("ftintrm.w.s", fj_s[i]); ++ break; ++ case FTINTRM_W_D: ++ TESTINST_FF_W_D("ftintrm.w.d", fj_d[i]); ++ break; ++ case FTINTRM_L_S: ++ TESTINST_FF_L_S("ftintrm.l.s", fj_s[i]); ++ break; ++ case FTINTRM_L_D: ++ TESTINST_FF_L_D("ftintrm.l.d", fj_d[i]); ++ break; ++ case FTINTRP_W_S: ++ TESTINST_FF_W_S("ftintrp.w.s", fj_s[i]); ++ break; ++ case FTINTRP_W_D: ++ TESTINST_FF_W_D("ftintrp.w.d", fj_d[i]); ++ break; ++ case FTINTRP_L_S: ++ TESTINST_FF_L_S("ftintrp.l.s", fj_s[i]); ++ break; ++ case FTINTRP_L_D: ++ TESTINST_FF_L_D("ftintrp.l.d", fj_d[i]); ++ break; ++ case FTINTRZ_W_S: ++ TESTINST_FF_W_S("ftintrz.w.s", fj_s[i]); ++ break; ++ case FTINTRZ_W_D: ++ TESTINST_FF_W_D("ftintrz.w.d", fj_d[i]); ++ break; ++ case FTINTRZ_L_S: ++ TESTINST_FF_L_S("ftintrz.l.s", fj_s[i]); ++ break; ++ case FTINTRZ_L_D: ++ TESTINST_FF_L_D("ftintrz.l.d", fj_d[i]); ++ break; ++ case FTINTRNE_W_S: ++ TESTINST_FF_W_S("ftintrne.w.s", fj_s[i]); ++ break; ++ case FTINTRNE_W_D: ++ TESTINST_FF_W_D("ftintrne.w.d", fj_d[i]); ++ break; ++ case FTINTRNE_L_S: ++ TESTINST_FF_L_S("ftintrne.l.s", fj_s[i]); ++ break; ++ case FTINTRNE_L_D: ++ TESTINST_FF_L_D("ftintrne.l.d", fj_d[i]); ++ break; ++ case FTINT_W_S: ++ TESTINST_FF_W_S("ftint.w.s", fj_s[i]); ++ break; ++ case FTINT_W_D: ++ TESTINST_FF_W_D("ftint.w.d", fj_d[i]); ++ break; ++ case FTINT_L_S: ++ TESTINST_FF_L_S("ftint.l.s", fj_s[i]); ++ break; ++ case FTINT_L_D: ++ TESTINST_FF_L_D("ftint.l.d", fj_d[i]); ++ break; ++ case FFINT_S_W: ++ TESTINST_FF_S_W("ffint.s.w", fj_w[i]); ++ break; ++ case FFINT_S_L: ++ TESTINST_FF_S_L("ffint.s.l", fj_l[i]); ++ break; ++ case FFINT_D_W: ++ TESTINST_FF_D_W("ffint.d.w", fj_w[i]); ++ break; ++ case FFINT_D_L: ++ TESTINST_FF_D_L("ffint.d.l", fj_l[i]); ++ break; ++ case FRINT_S: ++ TESTINST_FF_S("frint.s", fj_s[i]); ++ break; ++ case FRINT_D: ++ TESTINST_FF_D("frint.d", fj_d[i]); ++ break; ++ case FCMP_CAF_S: ++ TESTINST_FFC_S("fcmp.caf.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_CAF_D: ++ TESTINST_FFC_D("fcmp.caf.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_SAF_S: ++ TESTINST_FFC_S("fcmp.saf.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_SAF_D: ++ TESTINST_FFC_D("fcmp.saf.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_CLT_S: ++ TESTINST_FFC_S("fcmp.clt.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_CLT_D: ++ TESTINST_FFC_D("fcmp.clt.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_SLT_S: ++ TESTINST_FFC_S("fcmp.slt.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_SLT_D: ++ TESTINST_FFC_D("fcmp.slt.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_CEQ_S: ++ TESTINST_FFC_S("fcmp.ceq.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_CEQ_D: ++ TESTINST_FFC_D("fcmp.ceq.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_SEQ_S: ++ TESTINST_FFC_S("fcmp.seq.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_SEQ_D: ++ TESTINST_FFC_D("fcmp.seq.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_CLE_S: ++ TESTINST_FFC_S("fcmp.cle.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_CLE_D: ++ TESTINST_FFC_D("fcmp.cle.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_SLE_S: ++ TESTINST_FFC_S("fcmp.sle.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_SLE_D: ++ TESTINST_FFC_D("fcmp.sle.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_CUN_S: ++ TESTINST_FFC_S("fcmp.cun.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_CUN_D: ++ TESTINST_FFC_D("fcmp.cun.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_SUN_S: ++ TESTINST_FFC_S("fcmp.sun.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_SUN_D: ++ TESTINST_FFC_D("fcmp.sun.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_CULT_S: ++ TESTINST_FFC_S("fcmp.cult.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_CULT_D: ++ TESTINST_FFC_D("fcmp.cult.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_SULT_S: ++ TESTINST_FFC_S("fcmp.sult.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_SULT_D: ++ TESTINST_FFC_D("fcmp.sult.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_CUEQ_S: ++ TESTINST_FFC_S("fcmp.cueq.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_CUEQ_D: ++ TESTINST_FFC_D("fcmp.cueq.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_SUEQ_S: ++ TESTINST_FFC_S("fcmp.sueq.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_SUEQ_D: ++ TESTINST_FFC_D("fcmp.sueq.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_CULE_S: ++ TESTINST_FFC_S("fcmp.cule.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_CULE_D: ++ TESTINST_FFC_D("fcmp.cule.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_SULE_S: ++ TESTINST_FFC_S("fcmp.sule.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_SULE_D: ++ TESTINST_FFC_D("fcmp.sule.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_CNE_S: ++ TESTINST_FFC_S("fcmp.cne.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_CNE_D: ++ TESTINST_FFC_D("fcmp.cne.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_SNE_S: ++ TESTINST_FFC_S("fcmp.sne.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_SNE_D: ++ TESTINST_FFC_D("fcmp.sne.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_COR_S: ++ TESTINST_FFC_S("fcmp.cor.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_COR_D: ++ TESTINST_FFC_D("fcmp.cor.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_SOR_S: ++ TESTINST_FFC_S("fcmp.sor.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_SOR_D: ++ TESTINST_FFC_D("fcmp.sor.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_CUNE_S: ++ TESTINST_FFC_S("fcmp.cune.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_CUNE_D: ++ TESTINST_FFC_D("fcmp.cune.d", fj_d[i], fk_d[i]); ++ break; ++ case FCMP_SUNE_S: ++ TESTINST_FFC_S("fcmp.sune.s", fj_s[i], fk_s[i]); ++ break; ++ case FCMP_SUNE_D: ++ TESTINST_FFC_D("fcmp.sune.d", fj_d[i], fk_d[i]); ++ break; ++ case FSEL: ++ TESTINST_FFFC("fsel", fj_d[i], fk_d[i], cf[i]); ++ break; ++ case FMOV_S: ++ TESTINST_FF_S("fmov.s", fj_s[i]); ++ break; ++ case FMOV_D: ++ TESTINST_FF_D("fmov.d", fj_d[i]); ++ break; ++ default: ++ assert(0); ++ break; ++ } ++ } ++ } ++} ++ ++int main(void) ++{ ++ op_t op; ++ for (op = FADD_S; op <= FMOV_D; op++) ++ test(op); ++ return 0; ++} +diff --git a/none/tests/loongarch64/float.stderr.exp b/none/tests/loongarch64/float.stderr.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/none/tests/loongarch64/float.stdout.exp b/none/tests/loongarch64/float.stdout.exp +new file mode 100644 +index 000000000..f4914ff0c +--- /dev/null ++++ b/none/tests/loongarch64/float.stdout.exp +@@ -0,0 +1,54240 @@ ++roundig mode: near ++fadd.s :: ++input: 0.000000 -4578.500000 ++output: -4578.500000 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: 456.250000 456.250000 ++output: 912.500000 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: 3.000000 34.031250 ++output: 37.031250 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: -1.000000 4578.750000 ++output: 4577.750000 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: 1384.500000 175.000000 ++output: 1559.500000 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: -7.250000 107.000000 ++output: 99.750000 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: 1000000000.000000 -456.250000 ++output: 999999552.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fadd.s :: ++input: -5786.500000 -7.250000 ++output: -5793.750000 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: 1752.000000 -3478.500000 ++output: -1726.500000 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: 0.015625 356.500000 ++output: 356.515625 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: 0.031250 -1.000000 ++output: -0.968750 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: -248562.750000 23.062500 ++output: -248539.687500 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: 456.000000 456.250000 ++output: 912.250000 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: 34.031250 3.000000 ++output: 37.031250 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: 45786.750000 -1.000000 ++output: 45785.750000 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: 1752065.000000 1384.500000 ++output: 1753449.500000 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: 107.000000 -7.000000 ++output: 100.000000 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: -45667.250000 100.000000 ++output: -45567.250000 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: -7.000000 -5786.500000 ++output: -5793.500000 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: -347856.500000 1752.000000 ++output: -346104.500000 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: 356047.500000 0.015625 ++output: 356047.500000 ++fcsr: 0x1010000 ++roundig mode: near ++fadd.s :: ++input: -1.000000 0.031250 ++output: -0.968750 ++fcsr: 0 ++roundig mode: near ++fadd.s :: ++input: 23.062500 -248562.750000 ++output: -248539.687500 ++fcsr: 0 ++roundig mode: zero ++fadd.s :: ++input: 0.000000 -4578.500000 ++output: -4578.500000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: 456.250000 456.250000 ++output: 912.500000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: 3.000000 34.031250 ++output: 37.031250 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: -1.000000 4578.750000 ++output: 4577.750000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: 1384.500000 175.000000 ++output: 1559.500000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: -7.250000 107.000000 ++output: 99.750000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: 1000000000.000000 -456.250000 ++output: 999999488.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fadd.s :: ++input: -5786.500000 -7.250000 ++output: -5793.750000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: 1752.000000 -3478.500000 ++output: -1726.500000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: 0.015625 356.500000 ++output: 356.515625 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: 0.031250 -1.000000 ++output: -0.968750 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: -248562.750000 23.062500 ++output: -248539.687500 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: 456.000000 456.250000 ++output: 912.250000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: 34.031250 3.000000 ++output: 37.031250 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: 45786.750000 -1.000000 ++output: 45785.750000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: 1752065.000000 1384.500000 ++output: 1753449.500000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: 107.000000 -7.000000 ++output: 100.000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: -45667.250000 100.000000 ++output: -45567.250000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: -7.000000 -5786.500000 ++output: -5793.500000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: -347856.500000 1752.000000 ++output: -346104.500000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: 356047.500000 0.015625 ++output: 356047.500000 ++fcsr: 0x1010100 ++roundig mode: zero ++fadd.s :: ++input: -1.000000 0.031250 ++output: -0.968750 ++fcsr: 0x100 ++roundig mode: zero ++fadd.s :: ++input: 23.062500 -248562.750000 ++output: -248539.687500 ++fcsr: 0x100 ++roundig mode: +inf ++fadd.s :: ++input: 0.000000 -4578.500000 ++output: -4578.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: 456.250000 456.250000 ++output: 912.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: 3.000000 34.031250 ++output: 37.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: -1.000000 4578.750000 ++output: 4577.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: 1384.500000 175.000000 ++output: 1559.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: -7.250000 107.000000 ++output: 99.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: 1000000000.000000 -456.250000 ++output: 999999552.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fadd.s :: ++input: -5786.500000 -7.250000 ++output: -5793.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: 1752.000000 -3478.500000 ++output: -1726.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: 0.015625 356.500000 ++output: 356.515625 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: 0.031250 -1.000000 ++output: -0.968750 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: -248562.750000 23.062500 ++output: -248539.687500 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: 456.000000 456.250000 ++output: 912.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: 34.031250 3.000000 ++output: 37.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: 45786.750000 -1.000000 ++output: 45785.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: 1752065.000000 1384.500000 ++output: 1753449.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: 107.000000 -7.000000 ++output: 100.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: -45667.250000 100.000000 ++output: -45567.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: -7.000000 -5786.500000 ++output: -5793.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: -347856.500000 1752.000000 ++output: -346104.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: 356047.500000 0.015625 ++output: 356047.531250 ++fcsr: 0x1010200 ++roundig mode: +inf ++fadd.s :: ++input: -1.000000 0.031250 ++output: -0.968750 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.s :: ++input: 23.062500 -248562.750000 ++output: -248539.687500 ++fcsr: 0x200 ++roundig mode: -inf ++fadd.s :: ++input: 0.000000 -4578.500000 ++output: -4578.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: 456.250000 456.250000 ++output: 912.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: 3.000000 34.031250 ++output: 37.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: -1.000000 4578.750000 ++output: 4577.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: 1384.500000 175.000000 ++output: 1559.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: -7.250000 107.000000 ++output: 99.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: 1000000000.000000 -456.250000 ++output: 999999488.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fadd.s :: ++input: -5786.500000 -7.250000 ++output: -5793.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: 1752.000000 -3478.500000 ++output: -1726.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: 0.015625 356.500000 ++output: 356.515625 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: 0.031250 -1.000000 ++output: -0.968750 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: -248562.750000 23.062500 ++output: -248539.687500 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: 456.000000 456.250000 ++output: 912.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: 34.031250 3.000000 ++output: 37.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: 45786.750000 -1.000000 ++output: 45785.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: 1752065.000000 1384.500000 ++output: 1753449.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: 107.000000 -7.000000 ++output: 100.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: -45667.250000 100.000000 ++output: -45567.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: -7.000000 -5786.500000 ++output: -5793.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: -347856.500000 1752.000000 ++output: -346104.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: 356047.500000 0.015625 ++output: 356047.500000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fadd.s :: ++input: -1.000000 0.031250 ++output: -0.968750 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.s :: ++input: 23.062500 -248562.750000 ++output: -248539.687500 ++fcsr: 0x300 ++roundig mode: near ++fadd.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -45786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 912.500000000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 37.031250000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 45785.750000000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1753449.500000000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 99.750000000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 999954332.750000000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -5793.750000000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -346104.500000000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 356047.515625000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -0.968750000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -248539.687500000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 912.250000000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 37.031250000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 45785.750000000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1753449.500000000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 100.000000000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 999954332.750000000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -5793.500000000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -346104.500000000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 356047.515625000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -0.968750000000000 ++fcsr: 0 ++roundig mode: near ++fadd.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -248539.687500000000000 ++fcsr: 0 ++roundig mode: zero ++fadd.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -45786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 912.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 37.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 45785.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1753449.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 99.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 999954332.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -5793.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -346104.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 356047.515625000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -0.968750000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -248539.687500000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 912.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 37.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 45785.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1753449.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 100.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 999954332.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -5793.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -346104.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 356047.515625000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -0.968750000000000 ++fcsr: 0x100 ++roundig mode: zero ++fadd.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -248539.687500000000000 ++fcsr: 0x100 ++roundig mode: +inf ++fadd.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -45786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 912.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 37.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 45785.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1753449.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 99.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 999954332.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -5793.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -346104.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 356047.515625000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -0.968750000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -248539.687500000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 912.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 37.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 45785.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1753449.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 100.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 999954332.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -5793.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -346104.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 356047.515625000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -0.968750000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fadd.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -248539.687500000000000 ++fcsr: 0x200 ++roundig mode: -inf ++fadd.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -45786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 912.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 37.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 45785.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1753449.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 99.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 999954332.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -5793.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -346104.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 356047.515625000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -0.968750000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -248539.687500000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 912.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 37.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 45785.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1753449.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 100.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 999954332.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -5793.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -346104.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 356047.515625000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -0.968750000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fadd.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -248539.687500000000000 ++fcsr: 0x300 ++roundig mode: near ++fsub.s :: ++input: 0.000000 -4578.500000 ++output: 4578.500000 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: 456.250000 456.250000 ++output: 0.000000 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: 3.000000 34.031250 ++output: -31.031250 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: -1.000000 4578.750000 ++output: -4579.750000 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: 1384.500000 175.000000 ++output: 1209.500000 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: -7.250000 107.000000 ++output: -114.250000 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: 1000000000.000000 -456.250000 ++output: 1000000448.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fsub.s :: ++input: -5786.500000 -7.250000 ++output: -5779.250000 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: 1752.000000 -3478.500000 ++output: 5230.500000 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: 0.015625 356.500000 ++output: -356.484375 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: 0.031250 -1.000000 ++output: 1.031250 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: -248562.750000 23.062500 ++output: -248585.812500 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: 456.000000 456.250000 ++output: -0.250000 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: 34.031250 3.000000 ++output: 31.031250 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: 45786.750000 -1.000000 ++output: 45787.750000 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: 1752065.000000 1384.500000 ++output: 1750680.500000 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: 107.000000 -7.000000 ++output: 114.000000 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: -45667.250000 100.000000 ++output: -45767.250000 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: -7.000000 -5786.500000 ++output: 5779.500000 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: -347856.500000 1752.000000 ++output: -349608.500000 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: 356047.500000 0.015625 ++output: 356047.500000 ++fcsr: 0x1010000 ++roundig mode: near ++fsub.s :: ++input: -1.000000 0.031250 ++output: -1.031250 ++fcsr: 0 ++roundig mode: near ++fsub.s :: ++input: 23.062500 -248562.750000 ++output: 248585.812500 ++fcsr: 0 ++roundig mode: zero ++fsub.s :: ++input: 0.000000 -4578.500000 ++output: 4578.500000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: 456.250000 456.250000 ++output: 0.000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: 3.000000 34.031250 ++output: -31.031250 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: -1.000000 4578.750000 ++output: -4579.750000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: 1384.500000 175.000000 ++output: 1209.500000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: -7.250000 107.000000 ++output: -114.250000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: 1000000000.000000 -456.250000 ++output: 1000000448.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fsub.s :: ++input: -5786.500000 -7.250000 ++output: -5779.250000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: 1752.000000 -3478.500000 ++output: 5230.500000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: 0.015625 356.500000 ++output: -356.484375 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: 0.031250 -1.000000 ++output: 1.031250 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: -248562.750000 23.062500 ++output: -248585.812500 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: 456.000000 456.250000 ++output: -0.250000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: 34.031250 3.000000 ++output: 31.031250 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: 45786.750000 -1.000000 ++output: 45787.750000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: 1752065.000000 1384.500000 ++output: 1750680.500000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: 107.000000 -7.000000 ++output: 114.000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: -45667.250000 100.000000 ++output: -45767.250000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: -7.000000 -5786.500000 ++output: 5779.500000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: -347856.500000 1752.000000 ++output: -349608.500000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: 356047.500000 0.015625 ++output: 356047.468750 ++fcsr: 0x1010100 ++roundig mode: zero ++fsub.s :: ++input: -1.000000 0.031250 ++output: -1.031250 ++fcsr: 0x100 ++roundig mode: zero ++fsub.s :: ++input: 23.062500 -248562.750000 ++output: 248585.812500 ++fcsr: 0x100 ++roundig mode: +inf ++fsub.s :: ++input: 0.000000 -4578.500000 ++output: 4578.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: 456.250000 456.250000 ++output: 0.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: 3.000000 34.031250 ++output: -31.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: -1.000000 4578.750000 ++output: -4579.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: 1384.500000 175.000000 ++output: 1209.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: -7.250000 107.000000 ++output: -114.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: 1000000000.000000 -456.250000 ++output: 1000000512.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsub.s :: ++input: -5786.500000 -7.250000 ++output: -5779.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: 1752.000000 -3478.500000 ++output: 5230.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: 0.015625 356.500000 ++output: -356.484375 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: 0.031250 -1.000000 ++output: 1.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: -248562.750000 23.062500 ++output: -248585.812500 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: 456.000000 456.250000 ++output: -0.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: 34.031250 3.000000 ++output: 31.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: 45786.750000 -1.000000 ++output: 45787.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: 1752065.000000 1384.500000 ++output: 1750680.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: 107.000000 -7.000000 ++output: 114.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: -45667.250000 100.000000 ++output: -45767.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: -7.000000 -5786.500000 ++output: 5779.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: -347856.500000 1752.000000 ++output: -349608.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: 356047.500000 0.015625 ++output: 356047.500000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsub.s :: ++input: -1.000000 0.031250 ++output: -1.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.s :: ++input: 23.062500 -248562.750000 ++output: 248585.812500 ++fcsr: 0x200 ++roundig mode: -inf ++fsub.s :: ++input: 0.000000 -4578.500000 ++output: 4578.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: 456.250000 456.250000 ++output: -0.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: 3.000000 34.031250 ++output: -31.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: -1.000000 4578.750000 ++output: -4579.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: 1384.500000 175.000000 ++output: 1209.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: -7.250000 107.000000 ++output: -114.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: 1000000000.000000 -456.250000 ++output: 1000000448.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsub.s :: ++input: -5786.500000 -7.250000 ++output: -5779.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: 1752.000000 -3478.500000 ++output: 5230.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: 0.015625 356.500000 ++output: -356.484375 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: 0.031250 -1.000000 ++output: 1.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: -248562.750000 23.062500 ++output: -248585.812500 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: 456.000000 456.250000 ++output: -0.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: 34.031250 3.000000 ++output: 31.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: 45786.750000 -1.000000 ++output: 45787.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: 1752065.000000 1384.500000 ++output: 1750680.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: 107.000000 -7.000000 ++output: 114.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: -45667.250000 100.000000 ++output: -45767.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: -7.000000 -5786.500000 ++output: 5779.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: -347856.500000 1752.000000 ++output: -349608.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: 356047.500000 0.015625 ++output: 356047.468750 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsub.s :: ++input: -1.000000 0.031250 ++output: -1.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.s :: ++input: 23.062500 -248562.750000 ++output: 248585.812500 ++fcsr: 0x300 ++roundig mode: near ++fsub.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 45786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0.000000000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: 3.000000000000000 34.031250000000000 ++output: -31.031250000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -45787.750000000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: -1750680.500000000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -114.250000000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1000045667.250000000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -5779.250000000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 349608.500000000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: -356047.484375000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1.031250000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -248585.812500000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: 456.000000000000000 456.250000000000000 ++output: -0.250000000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 31.031250000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 45787.750000000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1750680.500000000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 114.000000000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -1000045667.250000000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 5779.500000000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -349608.500000000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 356047.484375000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -1.031250000000000 ++fcsr: 0 ++roundig mode: near ++fsub.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 248585.812500000000000 ++fcsr: 0 ++roundig mode: zero ++fsub.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 45786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: 3.000000000000000 34.031250000000000 ++output: -31.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -45787.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: -1750680.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -114.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1000045667.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -5779.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 349608.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: -356047.484375000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -248585.812500000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: 456.000000000000000 456.250000000000000 ++output: -0.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 31.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 45787.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1750680.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 114.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -1000045667.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 5779.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -349608.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 356047.484375000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -1.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsub.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 248585.812500000000000 ++fcsr: 0x100 ++roundig mode: +inf ++fsub.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 45786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: 3.000000000000000 34.031250000000000 ++output: -31.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -45787.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: -1750680.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -114.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1000045667.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -5779.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 349608.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: -356047.484375000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -248585.812500000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: 456.000000000000000 456.250000000000000 ++output: -0.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 31.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 45787.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1750680.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 114.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -1000045667.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 5779.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -349608.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 356047.484375000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -1.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsub.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 248585.812500000000000 ++fcsr: 0x200 ++roundig mode: -inf ++fsub.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 45786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: 456.250000000000000 456.250000000000000 ++output: -0.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: 3.000000000000000 34.031250000000000 ++output: -31.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -45787.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: -1750680.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -114.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1000045667.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -5779.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 349608.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: -356047.484375000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -248585.812500000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: 456.000000000000000 456.250000000000000 ++output: -0.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 31.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 45787.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1750680.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 114.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -1000045667.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 5779.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -349608.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 356047.484375000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -1.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsub.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 248585.812500000000000 ++fcsr: 0x300 ++roundig mode: near ++fmul.s :: ++input: 0.000000 -4578.500000 ++output: -0.000000 ++fcsr: 0 ++roundig mode: near ++fmul.s :: ++input: 456.250000 456.250000 ++output: 208164.062500 ++fcsr: 0 ++roundig mode: near ++fmul.s :: ++input: 3.000000 34.031250 ++output: 102.093750 ++fcsr: 0 ++roundig mode: near ++fmul.s :: ++input: -1.000000 4578.750000 ++output: -4578.750000 ++fcsr: 0 ++roundig mode: near ++fmul.s :: ++input: 1384.500000 175.000000 ++output: 242287.500000 ++fcsr: 0 ++roundig mode: near ++fmul.s :: ++input: -7.250000 107.000000 ++output: -775.750000 ++fcsr: 0 ++roundig mode: near ++fmul.s :: ++input: 1000000000.000000 -456.250000 ++output: -456249999360.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fmul.s :: ++input: -5786.500000 -7.250000 ++output: 41952.125000 ++fcsr: 0 ++roundig mode: near ++fmul.s :: ++input: 1752.000000 -3478.500000 ++output: -6094332.000000 ++fcsr: 0 ++roundig mode: near ++fmul.s :: ++input: 0.015625 356.500000 ++output: 5.570312 ++fcsr: 0 ++roundig mode: near ++fmul.s :: ++input: 0.031250 -1.000000 ++output: -0.031250 ++fcsr: 0 ++roundig mode: near ++fmul.s :: ++input: -248562.750000 23.062500 ++output: -5732478.500000 ++fcsr: 0x1010000 ++roundig mode: near ++fmul.s :: ++input: -45786.500000 0.000000 ++output: -0.000000 ++fcsr: 0 ++roundig mode: near ++fmul.s :: ++input: 456.000000 456.250000 ++output: 208050.000000 ++fcsr: 0 ++roundig mode: near ++fmul.s :: ++input: 34.031250 3.000000 ++output: 102.093750 ++fcsr: 0 ++roundig mode: near ++fmul.s :: ++input: 45786.750000 -1.000000 ++output: -45786.750000 ++fcsr: 0 ++roundig mode: near ++fmul.s :: ++input: 1752065.000000 1384.500000 ++output: 2425733888.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fmul.s :: ++input: 107.000000 -7.000000 ++output: -749.000000 ++fcsr: 0 ++roundig mode: near ++fmul.s :: ++input: -45667.250000 100.000000 ++output: -4566725.000000 ++fcsr: 0 ++roundig mode: near ++fmul.s :: ++input: -7.000000 -5786.500000 ++output: 40505.500000 ++fcsr: 0 ++roundig mode: near ++fmul.s :: ++input: -347856.500000 1752.000000 ++output: -609444608.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fmul.s :: ++input: 356047.500000 0.015625 ++output: 5563.242188 ++fcsr: 0 ++roundig mode: near ++fmul.s :: ++input: -1.000000 0.031250 ++output: -0.031250 ++fcsr: 0 ++roundig mode: near ++fmul.s :: ++input: 23.062500 -248562.750000 ++output: -5732478.500000 ++fcsr: 0x1010000 ++roundig mode: zero ++fmul.s :: ++input: 0.000000 -4578.500000 ++output: -0.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.s :: ++input: 456.250000 456.250000 ++output: 208164.062500 ++fcsr: 0x100 ++roundig mode: zero ++fmul.s :: ++input: 3.000000 34.031250 ++output: 102.093750 ++fcsr: 0x100 ++roundig mode: zero ++fmul.s :: ++input: -1.000000 4578.750000 ++output: -4578.750000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.s :: ++input: 1384.500000 175.000000 ++output: 242287.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.s :: ++input: -7.250000 107.000000 ++output: -775.750000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.s :: ++input: 1000000000.000000 -456.250000 ++output: -456249999360.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fmul.s :: ++input: -5786.500000 -7.250000 ++output: 41952.125000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.s :: ++input: 1752.000000 -3478.500000 ++output: -6094332.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.s :: ++input: 0.015625 356.500000 ++output: 5.570312 ++fcsr: 0x100 ++roundig mode: zero ++fmul.s :: ++input: 0.031250 -1.000000 ++output: -0.031250 ++fcsr: 0x100 ++roundig mode: zero ++fmul.s :: ++input: -248562.750000 23.062500 ++output: -5732478.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fmul.s :: ++input: -45786.500000 0.000000 ++output: -0.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.s :: ++input: 456.000000 456.250000 ++output: 208050.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.s :: ++input: 34.031250 3.000000 ++output: 102.093750 ++fcsr: 0x100 ++roundig mode: zero ++fmul.s :: ++input: 45786.750000 -1.000000 ++output: -45786.750000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.s :: ++input: 1752065.000000 1384.500000 ++output: 2425733888.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fmul.s :: ++input: 107.000000 -7.000000 ++output: -749.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.s :: ++input: -45667.250000 100.000000 ++output: -4566725.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.s :: ++input: -7.000000 -5786.500000 ++output: 40505.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.s :: ++input: -347856.500000 1752.000000 ++output: -609444544.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fmul.s :: ++input: 356047.500000 0.015625 ++output: 5563.242187 ++fcsr: 0x100 ++roundig mode: zero ++fmul.s :: ++input: -1.000000 0.031250 ++output: -0.031250 ++fcsr: 0x100 ++roundig mode: zero ++fmul.s :: ++input: 23.062500 -248562.750000 ++output: -5732478.000000 ++fcsr: 0x1010100 ++roundig mode: +inf ++fmul.s :: ++input: 0.000000 -4578.500000 ++output: -0.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.s :: ++input: 456.250000 456.250000 ++output: 208164.062500 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.s :: ++input: 3.000000 34.031250 ++output: 102.093750 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.s :: ++input: -1.000000 4578.750000 ++output: -4578.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.s :: ++input: 1384.500000 175.000000 ++output: 242287.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.s :: ++input: -7.250000 107.000000 ++output: -775.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.s :: ++input: 1000000000.000000 -456.250000 ++output: -456249999360.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fmul.s :: ++input: -5786.500000 -7.250000 ++output: 41952.125000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.s :: ++input: 1752.000000 -3478.500000 ++output: -6094332.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.s :: ++input: 0.015625 356.500000 ++output: 5.570313 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.s :: ++input: 0.031250 -1.000000 ++output: -0.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.s :: ++input: -248562.750000 23.062500 ++output: -5732478.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fmul.s :: ++input: -45786.500000 0.000000 ++output: -0.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.s :: ++input: 456.000000 456.250000 ++output: 208050.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.s :: ++input: 34.031250 3.000000 ++output: 102.093750 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.s :: ++input: 45786.750000 -1.000000 ++output: -45786.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.s :: ++input: 1752065.000000 1384.500000 ++output: 2425734144.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fmul.s :: ++input: 107.000000 -7.000000 ++output: -749.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.s :: ++input: -45667.250000 100.000000 ++output: -4566725.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.s :: ++input: -7.000000 -5786.500000 ++output: 40505.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.s :: ++input: -347856.500000 1752.000000 ++output: -609444544.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fmul.s :: ++input: 356047.500000 0.015625 ++output: 5563.242188 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.s :: ++input: -1.000000 0.031250 ++output: -0.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.s :: ++input: 23.062500 -248562.750000 ++output: -5732478.000000 ++fcsr: 0x1010200 ++roundig mode: -inf ++fmul.s :: ++input: 0.000000 -4578.500000 ++output: -0.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.s :: ++input: 456.250000 456.250000 ++output: 208164.062500 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.s :: ++input: 3.000000 34.031250 ++output: 102.093750 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.s :: ++input: -1.000000 4578.750000 ++output: -4578.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.s :: ++input: 1384.500000 175.000000 ++output: 242287.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.s :: ++input: -7.250000 107.000000 ++output: -775.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.s :: ++input: 1000000000.000000 -456.250000 ++output: -456250032128.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fmul.s :: ++input: -5786.500000 -7.250000 ++output: 41952.125000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.s :: ++input: 1752.000000 -3478.500000 ++output: -6094332.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.s :: ++input: 0.015625 356.500000 ++output: 5.570312 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.s :: ++input: 0.031250 -1.000000 ++output: -0.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.s :: ++input: -248562.750000 23.062500 ++output: -5732478.500000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fmul.s :: ++input: -45786.500000 0.000000 ++output: -0.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.s :: ++input: 456.000000 456.250000 ++output: 208050.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.s :: ++input: 34.031250 3.000000 ++output: 102.093750 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.s :: ++input: 45786.750000 -1.000000 ++output: -45786.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.s :: ++input: 1752065.000000 1384.500000 ++output: 2425733888.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fmul.s :: ++input: 107.000000 -7.000000 ++output: -749.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.s :: ++input: -45667.250000 100.000000 ++output: -4566725.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.s :: ++input: -7.000000 -5786.500000 ++output: 40505.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.s :: ++input: -347856.500000 1752.000000 ++output: -609444608.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fmul.s :: ++input: 356047.500000 0.015625 ++output: 5563.242187 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.s :: ++input: -1.000000 0.031250 ++output: -0.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.s :: ++input: 23.062500 -248562.750000 ++output: -5732478.500000 ++fcsr: 0x1010300 ++roundig mode: near ++fmul.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -0.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 208164.062500000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 102.093750000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -45786.750000000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 2425733992.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -775.750000000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: -45667250000000.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 41952.125000000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -609444588.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 5563.242187500000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -0.031250000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -5732478.421875000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -0.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 208050.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 102.093750000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: -45786.750000000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 2425733992.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: -749.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -45667250000000.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 40505.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -609444588.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 5563.242187500000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -0.031250000000000 ++fcsr: 0 ++roundig mode: near ++fmul.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -5732478.421875000000000 ++fcsr: 0 ++roundig mode: zero ++fmul.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -0.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 208164.062500000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 102.093750000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -45786.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 2425733992.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -775.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: -45667250000000.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 41952.125000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -609444588.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 5563.242187500000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -0.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -5732478.421875000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -0.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 208050.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 102.093750000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: -45786.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 2425733992.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: -749.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -45667250000000.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 40505.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -609444588.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 5563.242187500000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -0.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmul.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -5732478.421875000000000 ++fcsr: 0x100 ++roundig mode: +inf ++fmul.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -0.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 208164.062500000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 102.093750000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -45786.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 2425733992.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -775.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: -45667250000000.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 41952.125000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -609444588.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 5563.242187500000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -0.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -5732478.421875000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -0.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 208050.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 102.093750000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: -45786.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 2425733992.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: -749.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -45667250000000.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 40505.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -609444588.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 5563.242187500000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -0.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmul.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -5732478.421875000000000 ++fcsr: 0x200 ++roundig mode: -inf ++fmul.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -0.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 208164.062500000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 102.093750000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -45786.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 2425733992.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -775.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: -45667250000000.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 41952.125000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -609444588.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 5563.242187500000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -0.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -5732478.421875000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -0.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 208050.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 102.093750000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: -45786.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 2425733992.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: -749.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -45667250000000.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 40505.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -609444588.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 5563.242187500000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -0.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmul.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -5732478.421875000000000 ++fcsr: 0x300 ++roundig mode: near ++fdiv.s :: ++input: 0.000000 -4578.500000 ++output: -0.000000 ++fcsr: 0 ++roundig mode: near ++fdiv.s :: ++input: 456.250000 456.250000 ++output: 1.000000 ++fcsr: 0 ++roundig mode: near ++fdiv.s :: ++input: 3.000000 34.031250 ++output: 0.088154 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.s :: ++input: -1.000000 4578.750000 ++output: -0.000218 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.s :: ++input: 1384.500000 175.000000 ++output: 7.911428 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.s :: ++input: -7.250000 107.000000 ++output: -0.067757 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.s :: ++input: 1000000000.000000 -456.250000 ++output: -2191780.750000 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.s :: ++input: -5786.500000 -7.250000 ++output: 798.137939 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.s :: ++input: 1752.000000 -3478.500000 ++output: -0.503665 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.s :: ++input: 0.015625 356.500000 ++output: 0.000044 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.s :: ++input: 0.031250 -1.000000 ++output: -0.031250 ++fcsr: 0 ++roundig mode: near ++fdiv.s :: ++input: -248562.750000 23.062500 ++output: -10777.789062 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.s :: ++input: -45786.500000 0.000000 ++output: -inf ++fcsr: 0x8080000 ++roundig mode: near ++fdiv.s :: ++input: 456.000000 456.250000 ++output: 0.999452 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.s :: ++input: 34.031250 3.000000 ++output: 11.343750 ++fcsr: 0 ++roundig mode: near ++fdiv.s :: ++input: 45786.750000 -1.000000 ++output: -45786.750000 ++fcsr: 0 ++roundig mode: near ++fdiv.s :: ++input: 1752065.000000 1384.500000 ++output: 1265.485718 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.s :: ++input: 107.000000 -7.000000 ++output: -15.285714 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.s :: ++input: -45667.250000 100.000000 ++output: -456.672485 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.s :: ++input: -7.000000 -5786.500000 ++output: 0.001210 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.s :: ++input: -347856.500000 1752.000000 ++output: -198.548233 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.s :: ++input: 356047.500000 0.015625 ++output: 22787040.000000 ++fcsr: 0 ++roundig mode: near ++fdiv.s :: ++input: -1.000000 0.031250 ++output: -32.000000 ++fcsr: 0 ++roundig mode: near ++fdiv.s :: ++input: 23.062500 -248562.750000 ++output: -0.000093 ++fcsr: 0x1010000 ++roundig mode: zero ++fdiv.s :: ++input: 0.000000 -4578.500000 ++output: -0.000000 ++fcsr: 0x100 ++roundig mode: zero ++fdiv.s :: ++input: 456.250000 456.250000 ++output: 1.000000 ++fcsr: 0x100 ++roundig mode: zero ++fdiv.s :: ++input: 3.000000 34.031250 ++output: 0.088154 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.s :: ++input: -1.000000 4578.750000 ++output: -0.000218 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.s :: ++input: 1384.500000 175.000000 ++output: 7.911428 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.s :: ++input: -7.250000 107.000000 ++output: -0.067757 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.s :: ++input: 1000000000.000000 -456.250000 ++output: -2191780.750000 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.s :: ++input: -5786.500000 -7.250000 ++output: 798.137878 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.s :: ++input: 1752.000000 -3478.500000 ++output: -0.503665 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.s :: ++input: 0.015625 356.500000 ++output: 0.000043 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.s :: ++input: 0.031250 -1.000000 ++output: -0.031250 ++fcsr: 0x100 ++roundig mode: zero ++fdiv.s :: ++input: -248562.750000 23.062500 ++output: -10777.788085 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.s :: ++input: -45786.500000 0.000000 ++output: -inf ++fcsr: 0x8080100 ++roundig mode: zero ++fdiv.s :: ++input: 456.000000 456.250000 ++output: 0.999452 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.s :: ++input: 34.031250 3.000000 ++output: 11.343750 ++fcsr: 0x100 ++roundig mode: zero ++fdiv.s :: ++input: 45786.750000 -1.000000 ++output: -45786.750000 ++fcsr: 0x100 ++roundig mode: zero ++fdiv.s :: ++input: 1752065.000000 1384.500000 ++output: 1265.485717 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.s :: ++input: 107.000000 -7.000000 ++output: -15.285714 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.s :: ++input: -45667.250000 100.000000 ++output: -456.672485 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.s :: ++input: -7.000000 -5786.500000 ++output: 0.001209 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.s :: ++input: -347856.500000 1752.000000 ++output: -198.548217 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.s :: ++input: 356047.500000 0.015625 ++output: 22787040.000000 ++fcsr: 0x100 ++roundig mode: zero ++fdiv.s :: ++input: -1.000000 0.031250 ++output: -32.000000 ++fcsr: 0x100 ++roundig mode: zero ++fdiv.s :: ++input: 23.062500 -248562.750000 ++output: -0.000092 ++fcsr: 0x1010100 ++roundig mode: +inf ++fdiv.s :: ++input: 0.000000 -4578.500000 ++output: -0.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fdiv.s :: ++input: 456.250000 456.250000 ++output: 1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fdiv.s :: ++input: 3.000000 34.031250 ++output: 0.088155 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.s :: ++input: -1.000000 4578.750000 ++output: -0.000218 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.s :: ++input: 1384.500000 175.000000 ++output: 7.911429 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.s :: ++input: -7.250000 107.000000 ++output: -0.067757 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.s :: ++input: 1000000000.000000 -456.250000 ++output: -2191780.750000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.s :: ++input: -5786.500000 -7.250000 ++output: 798.137940 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.s :: ++input: 1752.000000 -3478.500000 ++output: -0.503665 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.s :: ++input: 0.015625 356.500000 ++output: 0.000044 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.s :: ++input: 0.031250 -1.000000 ++output: -0.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fdiv.s :: ++input: -248562.750000 23.062500 ++output: -10777.788085 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.s :: ++input: -45786.500000 0.000000 ++output: -inf ++fcsr: 0x8080200 ++roundig mode: +inf ++fdiv.s :: ++input: 456.000000 456.250000 ++output: 0.999453 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.s :: ++input: 34.031250 3.000000 ++output: 11.343750 ++fcsr: 0x200 ++roundig mode: +inf ++fdiv.s :: ++input: 45786.750000 -1.000000 ++output: -45786.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fdiv.s :: ++input: 1752065.000000 1384.500000 ++output: 1265.485840 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.s :: ++input: 107.000000 -7.000000 ++output: -15.285714 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.s :: ++input: -45667.250000 100.000000 ++output: -456.672485 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.s :: ++input: -7.000000 -5786.500000 ++output: 0.001210 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.s :: ++input: -347856.500000 1752.000000 ++output: -198.548217 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.s :: ++input: 356047.500000 0.015625 ++output: 22787040.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fdiv.s :: ++input: -1.000000 0.031250 ++output: -32.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fdiv.s :: ++input: 23.062500 -248562.750000 ++output: -0.000092 ++fcsr: 0x1010200 ++roundig mode: -inf ++fdiv.s :: ++input: 0.000000 -4578.500000 ++output: -0.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fdiv.s :: ++input: 456.250000 456.250000 ++output: 1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fdiv.s :: ++input: 3.000000 34.031250 ++output: 0.088154 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.s :: ++input: -1.000000 4578.750000 ++output: -0.000219 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.s :: ++input: 1384.500000 175.000000 ++output: 7.911428 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.s :: ++input: -7.250000 107.000000 ++output: -0.067758 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.s :: ++input: 1000000000.000000 -456.250000 ++output: -2191781.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.s :: ++input: -5786.500000 -7.250000 ++output: 798.137878 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.s :: ++input: 1752.000000 -3478.500000 ++output: -0.503666 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.s :: ++input: 0.015625 356.500000 ++output: 0.000043 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.s :: ++input: 0.031250 -1.000000 ++output: -0.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fdiv.s :: ++input: -248562.750000 23.062500 ++output: -10777.789063 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.s :: ++input: -45786.500000 0.000000 ++output: -inf ++fcsr: 0x8080300 ++roundig mode: -inf ++fdiv.s :: ++input: 456.000000 456.250000 ++output: 0.999452 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.s :: ++input: 34.031250 3.000000 ++output: 11.343750 ++fcsr: 0x300 ++roundig mode: -inf ++fdiv.s :: ++input: 45786.750000 -1.000000 ++output: -45786.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fdiv.s :: ++input: 1752065.000000 1384.500000 ++output: 1265.485717 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.s :: ++input: 107.000000 -7.000000 ++output: -15.285716 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.s :: ++input: -45667.250000 100.000000 ++output: -456.672516 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.s :: ++input: -7.000000 -5786.500000 ++output: 0.001209 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.s :: ++input: -347856.500000 1752.000000 ++output: -198.548234 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.s :: ++input: 356047.500000 0.015625 ++output: 22787040.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fdiv.s :: ++input: -1.000000 0.031250 ++output: -32.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fdiv.s :: ++input: 23.062500 -248562.750000 ++output: -0.000093 ++fcsr: 0x1010300 ++roundig mode: near ++fdiv.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -0.000000000000000 ++fcsr: 0 ++roundig mode: near ++fdiv.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1.000000000000000 ++fcsr: 0 ++roundig mode: near ++fdiv.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0.088154269972452 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -0.000021840379586 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0.000790210408860 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -0.067757009345794 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: -21897.530505997186992 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 798.137931034482790 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -0.005036559615819 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0.000000043884594 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -0.031250000000000 ++fcsr: 0 ++roundig mode: near ++fdiv.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -10777.788617886179054 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -inf ++fcsr: 0x8080000 ++roundig mode: near ++fdiv.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0.999452054794521 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 11.343750000000000 ++fcsr: 0 ++roundig mode: near ++fdiv.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: -45786.750000000000000 ++fcsr: 0 ++roundig mode: near ++fdiv.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1265.485734922354595 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: -15.285714285714286 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -0.000045667250000 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0.001209712261298 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -198.548230593607315 ++fcsr: 0x1010000 ++roundig mode: near ++fdiv.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 22787040.000000000000000 ++fcsr: 0 ++roundig mode: near ++fdiv.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -32.000000000000000 ++fcsr: 0 ++roundig mode: near ++fdiv.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -0.000092783411835 ++fcsr: 0x1010000 ++roundig mode: zero ++fdiv.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -0.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fdiv.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fdiv.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0.088154269972451 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -0.000021840379585 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0.000790210408860 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -0.067757009345794 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: -21897.530505997183354 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 798.137931034482676 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -0.005036559615818 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0.000000043884594 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -0.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fdiv.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -10777.788617886177235 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -inf ++fcsr: 0x8080100 ++roundig mode: zero ++fdiv.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0.999452054794520 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 11.343750000000000 ++fcsr: 0x100 ++roundig mode: zero ++fdiv.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: -45786.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fdiv.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1265.485734922354595 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: -15.285714285714284 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -0.000045667249999 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0.001209712261297 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -198.548230593607286 ++fcsr: 0x1010100 ++roundig mode: zero ++fdiv.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 22787040.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fdiv.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -32.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fdiv.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -0.000092783411834 ++fcsr: 0x1010100 ++roundig mode: +inf ++fdiv.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -0.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fdiv.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fdiv.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0.088154269972452 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -0.000021840379585 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0.000790210408861 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -0.067757009345794 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: -21897.530505997183354 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 798.137931034482790 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -0.005036559615818 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0.000000043884595 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -0.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fdiv.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -10777.788617886177235 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -inf ++fcsr: 0x8080200 ++roundig mode: +inf ++fdiv.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0.999452054794521 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 11.343750000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fdiv.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: -45786.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fdiv.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1265.485734922354823 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: -15.285714285714284 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -0.000045667249999 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0.001209712261298 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -198.548230593607286 ++fcsr: 0x1010200 ++roundig mode: +inf ++fdiv.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 22787040.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fdiv.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -32.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fdiv.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -0.000092783411834 ++fcsr: 0x1010200 ++roundig mode: -inf ++fdiv.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -0.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fdiv.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fdiv.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0.088154269972451 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -0.000021840379586 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0.000790210408860 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -0.067757009345795 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: -21897.530505997186993 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 798.137931034482676 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -0.005036559615819 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0.000000043884594 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -0.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fdiv.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -10777.788617886179055 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -inf ++fcsr: 0x8080300 ++roundig mode: -inf ++fdiv.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0.999452054794520 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 11.343750000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fdiv.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: -45786.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fdiv.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1265.485734922354595 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: -15.285714285714287 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -0.000045667250001 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0.001209712261297 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -198.548230593607315 ++fcsr: 0x1010300 ++roundig mode: -inf ++fdiv.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 22787040.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fdiv.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -32.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fdiv.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -0.000092783411835 ++fcsr: 0x1010300 ++roundig mode: near ++fmadd.s :: ++input: 0.000000 -4578.500000 -347856.500000 ++output: -347856.500000 ++fcsr: 0 ++roundig mode: near ++fmadd.s :: ++input: 456.250000 456.250000 356047.500000 ++output: 564211.562500 ++fcsr: 0 ++roundig mode: near ++fmadd.s :: ++input: 3.000000 34.031250 -1.000000 ++output: 101.093750 ++fcsr: 0 ++roundig mode: near ++fmadd.s :: ++input: -1.000000 4578.750000 23.062500 ++output: -4555.687500 ++fcsr: 0 ++roundig mode: near ++fmadd.s :: ++input: 1384.500000 175.000000 1752.000000 ++output: 244039.500000 ++fcsr: 0 ++roundig mode: near ++fmadd.s :: ++input: -7.250000 107.000000 0.015625 ++output: -775.734375 ++fcsr: 0 ++roundig mode: near ++fmadd.s :: ++input: 1000000000.000000 -456.250000 0.031250 ++output: -456249999360.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fmadd.s :: ++input: -5786.500000 -7.250000 -248562.750000 ++output: -206610.625000 ++fcsr: 0 ++roundig mode: near ++fmadd.s :: ++input: 1752.000000 -3478.500000 1384.500000 ++output: -6092947.500000 ++fcsr: 0 ++roundig mode: near ++fmadd.s :: ++input: 0.015625 356.500000 -7.250000 ++output: -1.679688 ++fcsr: 0 ++roundig mode: near ++fmadd.s :: ++input: 0.031250 -1.000000 1000000000.000000 ++output: 1000000000.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fmadd.s :: ++input: -248562.750000 23.062500 -5786.500000 ++output: -5738265.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fmadd.s :: ++input: -45786.500000 0.000000 -347856.750000 ++output: -347856.750000 ++fcsr: 0 ++roundig mode: near ++fmadd.s :: ++input: 456.000000 456.250000 356047.750000 ++output: 564097.750000 ++fcsr: 0 ++roundig mode: near ++fmadd.s :: ++input: 34.031250 3.000000 -1.000000 ++output: 101.093750 ++fcsr: 0 ++roundig mode: near ++fmadd.s :: ++input: 45786.750000 -1.000000 23.031250 ++output: -45763.718750 ++fcsr: 0 ++roundig mode: near ++fmadd.s :: ++input: 1752065.000000 1384.500000 0.000000 ++output: 2425733888.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fmadd.s :: ++input: 107.000000 -7.000000 456.250000 ++output: -292.750000 ++fcsr: 0 ++roundig mode: near ++fmadd.s :: ++input: -45667.250000 100.000000 3.000000 ++output: -4566722.000000 ++fcsr: 0 ++roundig mode: near ++fmadd.s :: ++input: -7.000000 -5786.500000 -1.000000 ++output: 40504.500000 ++fcsr: 0 ++roundig mode: near ++fmadd.s :: ++input: -347856.500000 1752.000000 -45786.500000 ++output: -609490368.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fmadd.s :: ++input: 356047.500000 0.015625 456.000000 ++output: 6019.242188 ++fcsr: 0 ++roundig mode: near ++fmadd.s :: ++input: -1.000000 0.031250 34.031250 ++output: 34.000000 ++fcsr: 0 ++roundig mode: near ++fmadd.s :: ++input: 23.062500 -248562.750000 45786.031250 ++output: -5686692.500000 ++fcsr: 0x1010000 ++roundig mode: zero ++fmadd.s :: ++input: 0.000000 -4578.500000 -347856.500000 ++output: -347856.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.s :: ++input: 456.250000 456.250000 356047.500000 ++output: 564211.562500 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.s :: ++input: 3.000000 34.031250 -1.000000 ++output: 101.093750 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.s :: ++input: -1.000000 4578.750000 23.062500 ++output: -4555.687500 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.s :: ++input: 1384.500000 175.000000 1752.000000 ++output: 244039.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.s :: ++input: -7.250000 107.000000 0.015625 ++output: -775.734375 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.s :: ++input: 1000000000.000000 -456.250000 0.031250 ++output: -456249999360.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fmadd.s :: ++input: -5786.500000 -7.250000 -248562.750000 ++output: -206610.625000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.s :: ++input: 1752.000000 -3478.500000 1384.500000 ++output: -6092947.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.s :: ++input: 0.015625 356.500000 -7.250000 ++output: -1.679687 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.s :: ++input: 0.031250 -1.000000 1000000000.000000 ++output: 999999936.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fmadd.s :: ++input: -248562.750000 23.062500 -5786.500000 ++output: -5738264.500000 ++fcsr: 0x1010100 ++roundig mode: zero ++fmadd.s :: ++input: -45786.500000 0.000000 -347856.750000 ++output: -347856.750000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.s :: ++input: 456.000000 456.250000 356047.750000 ++output: 564097.750000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.s :: ++input: 34.031250 3.000000 -1.000000 ++output: 101.093750 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.s :: ++input: 45786.750000 -1.000000 23.031250 ++output: -45763.718750 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.s :: ++input: 1752065.000000 1384.500000 0.000000 ++output: 2425733888.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fmadd.s :: ++input: 107.000000 -7.000000 456.250000 ++output: -292.750000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.s :: ++input: -45667.250000 100.000000 3.000000 ++output: -4566722.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.s :: ++input: -7.000000 -5786.500000 -1.000000 ++output: 40504.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.s :: ++input: -347856.500000 1752.000000 -45786.500000 ++output: -609490368.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fmadd.s :: ++input: 356047.500000 0.015625 456.000000 ++output: 6019.242187 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.s :: ++input: -1.000000 0.031250 34.031250 ++output: 34.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.s :: ++input: 23.062500 -248562.750000 45786.031250 ++output: -5686692.000000 ++fcsr: 0x1010100 ++roundig mode: +inf ++fmadd.s :: ++input: 0.000000 -4578.500000 -347856.500000 ++output: -347856.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.s :: ++input: 456.250000 456.250000 356047.500000 ++output: 564211.562500 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.s :: ++input: 3.000000 34.031250 -1.000000 ++output: 101.093750 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.s :: ++input: -1.000000 4578.750000 23.062500 ++output: -4555.687500 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.s :: ++input: 1384.500000 175.000000 1752.000000 ++output: 244039.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.s :: ++input: -7.250000 107.000000 0.015625 ++output: -775.734375 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.s :: ++input: 1000000000.000000 -456.250000 0.031250 ++output: -456249999360.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fmadd.s :: ++input: -5786.500000 -7.250000 -248562.750000 ++output: -206610.625000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.s :: ++input: 1752.000000 -3478.500000 1384.500000 ++output: -6092947.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.s :: ++input: 0.015625 356.500000 -7.250000 ++output: -1.679687 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.s :: ++input: 0.031250 -1.000000 1000000000.000000 ++output: 1000000000.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fmadd.s :: ++input: -248562.750000 23.062500 -5786.500000 ++output: -5738264.500000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fmadd.s :: ++input: -45786.500000 0.000000 -347856.750000 ++output: -347856.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.s :: ++input: 456.000000 456.250000 356047.750000 ++output: 564097.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.s :: ++input: 34.031250 3.000000 -1.000000 ++output: 101.093750 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.s :: ++input: 45786.750000 -1.000000 23.031250 ++output: -45763.718750 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.s :: ++input: 1752065.000000 1384.500000 0.000000 ++output: 2425734144.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fmadd.s :: ++input: 107.000000 -7.000000 456.250000 ++output: -292.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.s :: ++input: -45667.250000 100.000000 3.000000 ++output: -4566722.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.s :: ++input: -7.000000 -5786.500000 -1.000000 ++output: 40504.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.s :: ++input: -347856.500000 1752.000000 -45786.500000 ++output: -609490368.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fmadd.s :: ++input: 356047.500000 0.015625 456.000000 ++output: 6019.242188 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.s :: ++input: -1.000000 0.031250 34.031250 ++output: 34.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.s :: ++input: 23.062500 -248562.750000 45786.031250 ++output: -5686692.000000 ++fcsr: 0x1010200 ++roundig mode: -inf ++fmadd.s :: ++input: 0.000000 -4578.500000 -347856.500000 ++output: -347856.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.s :: ++input: 456.250000 456.250000 356047.500000 ++output: 564211.562500 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.s :: ++input: 3.000000 34.031250 -1.000000 ++output: 101.093750 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.s :: ++input: -1.000000 4578.750000 23.062500 ++output: -4555.687500 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.s :: ++input: 1384.500000 175.000000 1752.000000 ++output: 244039.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.s :: ++input: -7.250000 107.000000 0.015625 ++output: -775.734375 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.s :: ++input: 1000000000.000000 -456.250000 0.031250 ++output: -456250032128.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fmadd.s :: ++input: -5786.500000 -7.250000 -248562.750000 ++output: -206610.625000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.s :: ++input: 1752.000000 -3478.500000 1384.500000 ++output: -6092947.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.s :: ++input: 0.015625 356.500000 -7.250000 ++output: -1.679688 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.s :: ++input: 0.031250 -1.000000 1000000000.000000 ++output: 999999936.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fmadd.s :: ++input: -248562.750000 23.062500 -5786.500000 ++output: -5738265.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fmadd.s :: ++input: -45786.500000 0.000000 -347856.750000 ++output: -347856.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.s :: ++input: 456.000000 456.250000 356047.750000 ++output: 564097.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.s :: ++input: 34.031250 3.000000 -1.000000 ++output: 101.093750 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.s :: ++input: 45786.750000 -1.000000 23.031250 ++output: -45763.718750 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.s :: ++input: 1752065.000000 1384.500000 0.000000 ++output: 2425733888.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fmadd.s :: ++input: 107.000000 -7.000000 456.250000 ++output: -292.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.s :: ++input: -45667.250000 100.000000 3.000000 ++output: -4566722.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.s :: ++input: -7.000000 -5786.500000 -1.000000 ++output: 40504.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.s :: ++input: -347856.500000 1752.000000 -45786.500000 ++output: -609490432.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fmadd.s :: ++input: 356047.500000 0.015625 456.000000 ++output: 6019.242187 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.s :: ++input: -1.000000 0.031250 34.031250 ++output: 34.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.s :: ++input: 23.062500 -248562.750000 45786.031250 ++output: -5686692.500000 ++fcsr: 0x1010300 ++roundig mode: near ++fmadd.d :: ++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: 456.250000000000000 456.250000000000000 356047.500000000000000 ++output: 564211.562500000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: 3.000000000000000 34.031250000000000 -1.000000000000000 ++output: 101.093750000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: -1.000000000000000 45786.750000000000000 23.062500000000000 ++output: -45763.687500000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000 ++output: 2425735744.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: -7.250000000000000 107.000000000000000 0.015625000000000 ++output: -775.734375000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000 ++output: -45667249999999.968750000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000 ++output: -206610.625000000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000 ++output: -609443203.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: 0.015625000000000 356047.500000000000000 -7.250000000000000 ++output: 5555.992187500000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000 ++output: 999999999.968750000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000 ++output: -5738264.921875000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000 ++output: -347856.750000000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: 456.000000000000000 456.250000000000000 356047.750000000000000 ++output: 564097.750000000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: 34.031250000000000 3.000000000000000 -1.000000000000000 ++output: 101.093750000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: 45786.750000000000000 -1.000000000000000 23.031250000000000 ++output: -45763.718750000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000 ++output: 2425733992.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: 107.000000000000000 -7.000000000000000 456.250000000000000 ++output: -292.750000000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000 ++output: -45667249999997.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000 ++output: 40504.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000 ++output: -609490374.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: 356047.500000000000000 0.015625000000000 456.000000000000000 ++output: 6019.242187500000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: -1.000000000000000 0.031250000000000 34.031250000000000 ++output: 34.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmadd.d :: ++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000 ++output: -5686692.390625000000000 ++fcsr: 0 ++roundig mode: zero ++fmadd.d :: ++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: 456.250000000000000 456.250000000000000 356047.500000000000000 ++output: 564211.562500000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: 3.000000000000000 34.031250000000000 -1.000000000000000 ++output: 101.093750000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: -1.000000000000000 45786.750000000000000 23.062500000000000 ++output: -45763.687500000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000 ++output: 2425735744.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: -7.250000000000000 107.000000000000000 0.015625000000000 ++output: -775.734375000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000 ++output: -45667249999999.968750000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000 ++output: -206610.625000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000 ++output: -609443203.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: 0.015625000000000 356047.500000000000000 -7.250000000000000 ++output: 5555.992187500000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000 ++output: 999999999.968750000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000 ++output: -5738264.921875000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000 ++output: -347856.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: 456.000000000000000 456.250000000000000 356047.750000000000000 ++output: 564097.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: 34.031250000000000 3.000000000000000 -1.000000000000000 ++output: 101.093750000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: 45786.750000000000000 -1.000000000000000 23.031250000000000 ++output: -45763.718750000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000 ++output: 2425733992.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: 107.000000000000000 -7.000000000000000 456.250000000000000 ++output: -292.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000 ++output: -45667249999997.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000 ++output: 40504.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000 ++output: -609490374.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: 356047.500000000000000 0.015625000000000 456.000000000000000 ++output: 6019.242187500000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: -1.000000000000000 0.031250000000000 34.031250000000000 ++output: 34.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmadd.d :: ++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000 ++output: -5686692.390625000000000 ++fcsr: 0x100 ++roundig mode: +inf ++fmadd.d :: ++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: 456.250000000000000 456.250000000000000 356047.500000000000000 ++output: 564211.562500000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: 3.000000000000000 34.031250000000000 -1.000000000000000 ++output: 101.093750000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: -1.000000000000000 45786.750000000000000 23.062500000000000 ++output: -45763.687500000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000 ++output: 2425735744.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: -7.250000000000000 107.000000000000000 0.015625000000000 ++output: -775.734375000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000 ++output: -45667249999999.968750000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000 ++output: -206610.625000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000 ++output: -609443203.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: 0.015625000000000 356047.500000000000000 -7.250000000000000 ++output: 5555.992187500000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000 ++output: 999999999.968750000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000 ++output: -5738264.921875000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000 ++output: -347856.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: 456.000000000000000 456.250000000000000 356047.750000000000000 ++output: 564097.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: 34.031250000000000 3.000000000000000 -1.000000000000000 ++output: 101.093750000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: 45786.750000000000000 -1.000000000000000 23.031250000000000 ++output: -45763.718750000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000 ++output: 2425733992.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: 107.000000000000000 -7.000000000000000 456.250000000000000 ++output: -292.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000 ++output: -45667249999997.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000 ++output: 40504.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000 ++output: -609490374.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: 356047.500000000000000 0.015625000000000 456.000000000000000 ++output: 6019.242187500000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: -1.000000000000000 0.031250000000000 34.031250000000000 ++output: 34.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmadd.d :: ++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000 ++output: -5686692.390625000000000 ++fcsr: 0x200 ++roundig mode: -inf ++fmadd.d :: ++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: 456.250000000000000 456.250000000000000 356047.500000000000000 ++output: 564211.562500000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: 3.000000000000000 34.031250000000000 -1.000000000000000 ++output: 101.093750000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: -1.000000000000000 45786.750000000000000 23.062500000000000 ++output: -45763.687500000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000 ++output: 2425735744.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: -7.250000000000000 107.000000000000000 0.015625000000000 ++output: -775.734375000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000 ++output: -45667249999999.968750000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000 ++output: -206610.625000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000 ++output: -609443203.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: 0.015625000000000 356047.500000000000000 -7.250000000000000 ++output: 5555.992187500000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000 ++output: 999999999.968750000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000 ++output: -5738264.921875000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000 ++output: -347856.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: 456.000000000000000 456.250000000000000 356047.750000000000000 ++output: 564097.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: 34.031250000000000 3.000000000000000 -1.000000000000000 ++output: 101.093750000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: 45786.750000000000000 -1.000000000000000 23.031250000000000 ++output: -45763.718750000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000 ++output: 2425733992.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: 107.000000000000000 -7.000000000000000 456.250000000000000 ++output: -292.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000 ++output: -45667249999997.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000 ++output: 40504.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000 ++output: -609490374.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: 356047.500000000000000 0.015625000000000 456.000000000000000 ++output: 6019.242187500000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: -1.000000000000000 0.031250000000000 34.031250000000000 ++output: 34.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmadd.d :: ++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000 ++output: -5686692.390625000000000 ++fcsr: 0x300 ++roundig mode: near ++fmsub.s :: ++input: 0.000000 -4578.500000 -347856.500000 ++output: 347856.500000 ++fcsr: 0 ++roundig mode: near ++fmsub.s :: ++input: 456.250000 456.250000 356047.500000 ++output: -147883.437500 ++fcsr: 0 ++roundig mode: near ++fmsub.s :: ++input: 3.000000 34.031250 -1.000000 ++output: 103.093750 ++fcsr: 0 ++roundig mode: near ++fmsub.s :: ++input: -1.000000 4578.750000 23.062500 ++output: -4601.812500 ++fcsr: 0 ++roundig mode: near ++fmsub.s :: ++input: 1384.500000 175.000000 1752.000000 ++output: 240535.500000 ++fcsr: 0 ++roundig mode: near ++fmsub.s :: ++input: -7.250000 107.000000 0.015625 ++output: -775.765625 ++fcsr: 0 ++roundig mode: near ++fmsub.s :: ++input: 1000000000.000000 -456.250000 0.031250 ++output: -456249999360.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fmsub.s :: ++input: -5786.500000 -7.250000 -248562.750000 ++output: 290514.875000 ++fcsr: 0 ++roundig mode: near ++fmsub.s :: ++input: 1752.000000 -3478.500000 1384.500000 ++output: -6095716.500000 ++fcsr: 0 ++roundig mode: near ++fmsub.s :: ++input: 0.015625 356.500000 -7.250000 ++output: 12.820312 ++fcsr: 0 ++roundig mode: near ++fmsub.s :: ++input: 0.031250 -1.000000 1000000000.000000 ++output: -1000000000.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fmsub.s :: ++input: -248562.750000 23.062500 -5786.500000 ++output: -5726692.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fmsub.s :: ++input: -45786.500000 0.000000 -347856.750000 ++output: 347856.750000 ++fcsr: 0 ++roundig mode: near ++fmsub.s :: ++input: 456.000000 456.250000 356047.750000 ++output: -147997.750000 ++fcsr: 0 ++roundig mode: near ++fmsub.s :: ++input: 34.031250 3.000000 -1.000000 ++output: 103.093750 ++fcsr: 0 ++roundig mode: near ++fmsub.s :: ++input: 45786.750000 -1.000000 23.031250 ++output: -45809.781250 ++fcsr: 0 ++roundig mode: near ++fmsub.s :: ++input: 1752065.000000 1384.500000 0.000000 ++output: 2425733888.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fmsub.s :: ++input: 107.000000 -7.000000 456.250000 ++output: -1205.250000 ++fcsr: 0 ++roundig mode: near ++fmsub.s :: ++input: -45667.250000 100.000000 3.000000 ++output: -4566728.000000 ++fcsr: 0 ++roundig mode: near ++fmsub.s :: ++input: -7.000000 -5786.500000 -1.000000 ++output: 40506.500000 ++fcsr: 0 ++roundig mode: near ++fmsub.s :: ++input: -347856.500000 1752.000000 -45786.500000 ++output: -609398784.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fmsub.s :: ++input: 356047.500000 0.015625 456.000000 ++output: 5107.242188 ++fcsr: 0 ++roundig mode: near ++fmsub.s :: ++input: -1.000000 0.031250 34.031250 ++output: -34.062500 ++fcsr: 0 ++roundig mode: near ++fmsub.s :: ++input: 23.062500 -248562.750000 45786.031250 ++output: -5778264.500000 ++fcsr: 0x1010000 ++roundig mode: zero ++fmsub.s :: ++input: 0.000000 -4578.500000 -347856.500000 ++output: 347856.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.s :: ++input: 456.250000 456.250000 356047.500000 ++output: -147883.437500 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.s :: ++input: 3.000000 34.031250 -1.000000 ++output: 103.093750 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.s :: ++input: -1.000000 4578.750000 23.062500 ++output: -4601.812500 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.s :: ++input: 1384.500000 175.000000 1752.000000 ++output: 240535.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.s :: ++input: -7.250000 107.000000 0.015625 ++output: -775.765625 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.s :: ++input: 1000000000.000000 -456.250000 0.031250 ++output: -456249999360.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fmsub.s :: ++input: -5786.500000 -7.250000 -248562.750000 ++output: 290514.875000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.s :: ++input: 1752.000000 -3478.500000 1384.500000 ++output: -6095716.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.s :: ++input: 0.015625 356.500000 -7.250000 ++output: 12.820312 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.s :: ++input: 0.031250 -1.000000 1000000000.000000 ++output: -1000000000.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fmsub.s :: ++input: -248562.750000 23.062500 -5786.500000 ++output: -5726691.500000 ++fcsr: 0x1010100 ++roundig mode: zero ++fmsub.s :: ++input: -45786.500000 0.000000 -347856.750000 ++output: 347856.750000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.s :: ++input: 456.000000 456.250000 356047.750000 ++output: -147997.750000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.s :: ++input: 34.031250 3.000000 -1.000000 ++output: 103.093750 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.s :: ++input: 45786.750000 -1.000000 23.031250 ++output: -45809.781250 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.s :: ++input: 1752065.000000 1384.500000 0.000000 ++output: 2425733888.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fmsub.s :: ++input: 107.000000 -7.000000 456.250000 ++output: -1205.250000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.s :: ++input: -45667.250000 100.000000 3.000000 ++output: -4566728.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.s :: ++input: -7.000000 -5786.500000 -1.000000 ++output: 40506.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.s :: ++input: -347856.500000 1752.000000 -45786.500000 ++output: -609398784.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fmsub.s :: ++input: 356047.500000 0.015625 456.000000 ++output: 5107.242187 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.s :: ++input: -1.000000 0.031250 34.031250 ++output: -34.062500 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.s :: ++input: 23.062500 -248562.750000 45786.031250 ++output: -5778264.000000 ++fcsr: 0x1010100 ++roundig mode: +inf ++fmsub.s :: ++input: 0.000000 -4578.500000 -347856.500000 ++output: 347856.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.s :: ++input: 456.250000 456.250000 356047.500000 ++output: -147883.437500 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.s :: ++input: 3.000000 34.031250 -1.000000 ++output: 103.093750 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.s :: ++input: -1.000000 4578.750000 23.062500 ++output: -4601.812500 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.s :: ++input: 1384.500000 175.000000 1752.000000 ++output: 240535.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.s :: ++input: -7.250000 107.000000 0.015625 ++output: -775.765625 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.s :: ++input: 1000000000.000000 -456.250000 0.031250 ++output: -456249999360.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fmsub.s :: ++input: -5786.500000 -7.250000 -248562.750000 ++output: 290514.875000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.s :: ++input: 1752.000000 -3478.500000 1384.500000 ++output: -6095716.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.s :: ++input: 0.015625 356.500000 -7.250000 ++output: 12.820313 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.s :: ++input: 0.031250 -1.000000 1000000000.000000 ++output: -1000000000.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fmsub.s :: ++input: -248562.750000 23.062500 -5786.500000 ++output: -5726691.500000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fmsub.s :: ++input: -45786.500000 0.000000 -347856.750000 ++output: 347856.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.s :: ++input: 456.000000 456.250000 356047.750000 ++output: -147997.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.s :: ++input: 34.031250 3.000000 -1.000000 ++output: 103.093750 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.s :: ++input: 45786.750000 -1.000000 23.031250 ++output: -45809.781250 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.s :: ++input: 1752065.000000 1384.500000 0.000000 ++output: 2425734144.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fmsub.s :: ++input: 107.000000 -7.000000 456.250000 ++output: -1205.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.s :: ++input: -45667.250000 100.000000 3.000000 ++output: -4566728.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.s :: ++input: -7.000000 -5786.500000 -1.000000 ++output: 40506.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.s :: ++input: -347856.500000 1752.000000 -45786.500000 ++output: -609398784.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fmsub.s :: ++input: 356047.500000 0.015625 456.000000 ++output: 5107.242188 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.s :: ++input: -1.000000 0.031250 34.031250 ++output: -34.062500 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.s :: ++input: 23.062500 -248562.750000 45786.031250 ++output: -5778264.000000 ++fcsr: 0x1010200 ++roundig mode: -inf ++fmsub.s :: ++input: 0.000000 -4578.500000 -347856.500000 ++output: 347856.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.s :: ++input: 456.250000 456.250000 356047.500000 ++output: -147883.437500 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.s :: ++input: 3.000000 34.031250 -1.000000 ++output: 103.093750 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.s :: ++input: -1.000000 4578.750000 23.062500 ++output: -4601.812500 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.s :: ++input: 1384.500000 175.000000 1752.000000 ++output: 240535.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.s :: ++input: -7.250000 107.000000 0.015625 ++output: -775.765625 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.s :: ++input: 1000000000.000000 -456.250000 0.031250 ++output: -456250032128.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fmsub.s :: ++input: -5786.500000 -7.250000 -248562.750000 ++output: 290514.875000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.s :: ++input: 1752.000000 -3478.500000 1384.500000 ++output: -6095716.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.s :: ++input: 0.015625 356.500000 -7.250000 ++output: 12.820312 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.s :: ++input: 0.031250 -1.000000 1000000000.000000 ++output: -1000000064.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fmsub.s :: ++input: -248562.750000 23.062500 -5786.500000 ++output: -5726692.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fmsub.s :: ++input: -45786.500000 0.000000 -347856.750000 ++output: 347856.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.s :: ++input: 456.000000 456.250000 356047.750000 ++output: -147997.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.s :: ++input: 34.031250 3.000000 -1.000000 ++output: 103.093750 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.s :: ++input: 45786.750000 -1.000000 23.031250 ++output: -45809.781250 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.s :: ++input: 1752065.000000 1384.500000 0.000000 ++output: 2425733888.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fmsub.s :: ++input: 107.000000 -7.000000 456.250000 ++output: -1205.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.s :: ++input: -45667.250000 100.000000 3.000000 ++output: -4566728.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.s :: ++input: -7.000000 -5786.500000 -1.000000 ++output: 40506.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.s :: ++input: -347856.500000 1752.000000 -45786.500000 ++output: -609398848.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fmsub.s :: ++input: 356047.500000 0.015625 456.000000 ++output: 5107.242187 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.s :: ++input: -1.000000 0.031250 34.031250 ++output: -34.062500 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.s :: ++input: 23.062500 -248562.750000 45786.031250 ++output: -5778264.500000 ++fcsr: 0x1010300 ++roundig mode: near ++fmsub.d :: ++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000 ++output: 347856.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: 456.250000000000000 456.250000000000000 356047.500000000000000 ++output: -147883.437500000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: 3.000000000000000 34.031250000000000 -1.000000000000000 ++output: 103.093750000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: -1.000000000000000 45786.750000000000000 23.062500000000000 ++output: -45809.812500000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000 ++output: 2425732240.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: -7.250000000000000 107.000000000000000 0.015625000000000 ++output: -775.765625000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000 ++output: -45667250000000.031250000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000 ++output: 290514.875000000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000 ++output: -609445972.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: 0.015625000000000 356047.500000000000000 -7.250000000000000 ++output: 5570.492187500000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000 ++output: -1000000000.031250000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000 ++output: -5726691.921875000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000 ++output: 347856.750000000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: 456.000000000000000 456.250000000000000 356047.750000000000000 ++output: -147997.750000000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: 34.031250000000000 3.000000000000000 -1.000000000000000 ++output: 103.093750000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: 45786.750000000000000 -1.000000000000000 23.031250000000000 ++output: -45809.781250000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000 ++output: 2425733992.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: 107.000000000000000 -7.000000000000000 456.250000000000000 ++output: -1205.250000000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000 ++output: -45667250000003.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000 ++output: 40506.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000 ++output: -609398801.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: 356047.500000000000000 0.015625000000000 456.000000000000000 ++output: 5107.242187500000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: -1.000000000000000 0.031250000000000 34.031250000000000 ++output: -34.062500000000000 ++fcsr: 0 ++roundig mode: near ++fmsub.d :: ++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000 ++output: -5778264.453125000000000 ++fcsr: 0 ++roundig mode: zero ++fmsub.d :: ++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000 ++output: 347856.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: 456.250000000000000 456.250000000000000 356047.500000000000000 ++output: -147883.437500000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: 3.000000000000000 34.031250000000000 -1.000000000000000 ++output: 103.093750000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: -1.000000000000000 45786.750000000000000 23.062500000000000 ++output: -45809.812500000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000 ++output: 2425732240.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: -7.250000000000000 107.000000000000000 0.015625000000000 ++output: -775.765625000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000 ++output: -45667250000000.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000 ++output: 290514.875000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000 ++output: -609445972.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: 0.015625000000000 356047.500000000000000 -7.250000000000000 ++output: 5570.492187500000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000 ++output: -1000000000.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000 ++output: -5726691.921875000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000 ++output: 347856.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: 456.000000000000000 456.250000000000000 356047.750000000000000 ++output: -147997.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: 34.031250000000000 3.000000000000000 -1.000000000000000 ++output: 103.093750000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: 45786.750000000000000 -1.000000000000000 23.031250000000000 ++output: -45809.781250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000 ++output: 2425733992.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: 107.000000000000000 -7.000000000000000 456.250000000000000 ++output: -1205.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000 ++output: -45667250000003.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000 ++output: 40506.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000 ++output: -609398801.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: 356047.500000000000000 0.015625000000000 456.000000000000000 ++output: 5107.242187500000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: -1.000000000000000 0.031250000000000 34.031250000000000 ++output: -34.062500000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmsub.d :: ++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000 ++output: -5778264.453125000000000 ++fcsr: 0x100 ++roundig mode: +inf ++fmsub.d :: ++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000 ++output: 347856.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: 456.250000000000000 456.250000000000000 356047.500000000000000 ++output: -147883.437500000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: 3.000000000000000 34.031250000000000 -1.000000000000000 ++output: 103.093750000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: -1.000000000000000 45786.750000000000000 23.062500000000000 ++output: -45809.812500000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000 ++output: 2425732240.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: -7.250000000000000 107.000000000000000 0.015625000000000 ++output: -775.765625000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000 ++output: -45667250000000.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000 ++output: 290514.875000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000 ++output: -609445972.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: 0.015625000000000 356047.500000000000000 -7.250000000000000 ++output: 5570.492187500000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000 ++output: -1000000000.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000 ++output: -5726691.921875000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000 ++output: 347856.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: 456.000000000000000 456.250000000000000 356047.750000000000000 ++output: -147997.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: 34.031250000000000 3.000000000000000 -1.000000000000000 ++output: 103.093750000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: 45786.750000000000000 -1.000000000000000 23.031250000000000 ++output: -45809.781250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000 ++output: 2425733992.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: 107.000000000000000 -7.000000000000000 456.250000000000000 ++output: -1205.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000 ++output: -45667250000003.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000 ++output: 40506.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000 ++output: -609398801.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: 356047.500000000000000 0.015625000000000 456.000000000000000 ++output: 5107.242187500000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: -1.000000000000000 0.031250000000000 34.031250000000000 ++output: -34.062500000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmsub.d :: ++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000 ++output: -5778264.453125000000000 ++fcsr: 0x200 ++roundig mode: -inf ++fmsub.d :: ++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000 ++output: 347856.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: 456.250000000000000 456.250000000000000 356047.500000000000000 ++output: -147883.437500000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: 3.000000000000000 34.031250000000000 -1.000000000000000 ++output: 103.093750000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: -1.000000000000000 45786.750000000000000 23.062500000000000 ++output: -45809.812500000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000 ++output: 2425732240.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: -7.250000000000000 107.000000000000000 0.015625000000000 ++output: -775.765625000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000 ++output: -45667250000000.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000 ++output: 290514.875000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000 ++output: -609445972.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: 0.015625000000000 356047.500000000000000 -7.250000000000000 ++output: 5570.492187500000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000 ++output: -1000000000.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000 ++output: -5726691.921875000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000 ++output: 347856.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: 456.000000000000000 456.250000000000000 356047.750000000000000 ++output: -147997.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: 34.031250000000000 3.000000000000000 -1.000000000000000 ++output: 103.093750000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: 45786.750000000000000 -1.000000000000000 23.031250000000000 ++output: -45809.781250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000 ++output: 2425733992.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: 107.000000000000000 -7.000000000000000 456.250000000000000 ++output: -1205.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000 ++output: -45667250000003.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000 ++output: 40506.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000 ++output: -609398801.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: 356047.500000000000000 0.015625000000000 456.000000000000000 ++output: 5107.242187500000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: -1.000000000000000 0.031250000000000 34.031250000000000 ++output: -34.062500000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmsub.d :: ++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000 ++output: -5778264.453125000000000 ++fcsr: 0x300 ++roundig mode: near ++fnmadd.s :: ++input: 0.000000 -4578.500000 -347856.500000 ++output: 347856.500000 ++fcsr: 0 ++roundig mode: near ++fnmadd.s :: ++input: 456.250000 456.250000 356047.500000 ++output: -564211.562500 ++fcsr: 0 ++roundig mode: near ++fnmadd.s :: ++input: 3.000000 34.031250 -1.000000 ++output: -101.093750 ++fcsr: 0 ++roundig mode: near ++fnmadd.s :: ++input: -1.000000 4578.750000 23.062500 ++output: 4555.687500 ++fcsr: 0 ++roundig mode: near ++fnmadd.s :: ++input: 1384.500000 175.000000 1752.000000 ++output: -244039.500000 ++fcsr: 0 ++roundig mode: near ++fnmadd.s :: ++input: -7.250000 107.000000 0.015625 ++output: 775.734375 ++fcsr: 0 ++roundig mode: near ++fnmadd.s :: ++input: 1000000000.000000 -456.250000 0.031250 ++output: 456249999360.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fnmadd.s :: ++input: -5786.500000 -7.250000 -248562.750000 ++output: 206610.625000 ++fcsr: 0 ++roundig mode: near ++fnmadd.s :: ++input: 1752.000000 -3478.500000 1384.500000 ++output: 6092947.500000 ++fcsr: 0 ++roundig mode: near ++fnmadd.s :: ++input: 0.015625 356.500000 -7.250000 ++output: 1.679688 ++fcsr: 0 ++roundig mode: near ++fnmadd.s :: ++input: 0.031250 -1.000000 1000000000.000000 ++output: -1000000000.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fnmadd.s :: ++input: -248562.750000 23.062500 -5786.500000 ++output: 5738265.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fnmadd.s :: ++input: -45786.500000 0.000000 -347856.750000 ++output: 347856.750000 ++fcsr: 0 ++roundig mode: near ++fnmadd.s :: ++input: 456.000000 456.250000 356047.750000 ++output: -564097.750000 ++fcsr: 0 ++roundig mode: near ++fnmadd.s :: ++input: 34.031250 3.000000 -1.000000 ++output: -101.093750 ++fcsr: 0 ++roundig mode: near ++fnmadd.s :: ++input: 45786.750000 -1.000000 23.031250 ++output: 45763.718750 ++fcsr: 0 ++roundig mode: near ++fnmadd.s :: ++input: 1752065.000000 1384.500000 0.000000 ++output: -2425733888.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fnmadd.s :: ++input: 107.000000 -7.000000 456.250000 ++output: 292.750000 ++fcsr: 0 ++roundig mode: near ++fnmadd.s :: ++input: -45667.250000 100.000000 3.000000 ++output: 4566722.000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.s :: ++input: -7.000000 -5786.500000 -1.000000 ++output: -40504.500000 ++fcsr: 0 ++roundig mode: near ++fnmadd.s :: ++input: -347856.500000 1752.000000 -45786.500000 ++output: 609490368.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fnmadd.s :: ++input: 356047.500000 0.015625 456.000000 ++output: -6019.242188 ++fcsr: 0 ++roundig mode: near ++fnmadd.s :: ++input: -1.000000 0.031250 34.031250 ++output: -34.000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.s :: ++input: 23.062500 -248562.750000 45786.031250 ++output: 5686692.500000 ++fcsr: 0x1010000 ++roundig mode: zero ++fnmadd.s :: ++input: 0.000000 -4578.500000 -347856.500000 ++output: 347856.500000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.s :: ++input: 456.250000 456.250000 356047.500000 ++output: -564211.562500 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.s :: ++input: 3.000000 34.031250 -1.000000 ++output: -101.093750 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.s :: ++input: -1.000000 4578.750000 23.062500 ++output: 4555.687500 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.s :: ++input: 1384.500000 175.000000 1752.000000 ++output: -244039.500000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.s :: ++input: -7.250000 107.000000 0.015625 ++output: 775.734375 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.s :: ++input: 1000000000.000000 -456.250000 0.031250 ++output: 456249999360.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fnmadd.s :: ++input: -5786.500000 -7.250000 -248562.750000 ++output: 206610.625000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.s :: ++input: 1752.000000 -3478.500000 1384.500000 ++output: 6092947.500000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.s :: ++input: 0.015625 356.500000 -7.250000 ++output: 1.679687 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.s :: ++input: 0.031250 -1.000000 1000000000.000000 ++output: -999999936.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fnmadd.s :: ++input: -248562.750000 23.062500 -5786.500000 ++output: 5738264.500000 ++fcsr: 0x1010100 ++roundig mode: zero ++fnmadd.s :: ++input: -45786.500000 0.000000 -347856.750000 ++output: 347856.750000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.s :: ++input: 456.000000 456.250000 356047.750000 ++output: -564097.750000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.s :: ++input: 34.031250 3.000000 -1.000000 ++output: -101.093750 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.s :: ++input: 45786.750000 -1.000000 23.031250 ++output: 45763.718750 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.s :: ++input: 1752065.000000 1384.500000 0.000000 ++output: -2425733888.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fnmadd.s :: ++input: 107.000000 -7.000000 456.250000 ++output: 292.750000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.s :: ++input: -45667.250000 100.000000 3.000000 ++output: 4566722.000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.s :: ++input: -7.000000 -5786.500000 -1.000000 ++output: -40504.500000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.s :: ++input: -347856.500000 1752.000000 -45786.500000 ++output: 609490368.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fnmadd.s :: ++input: 356047.500000 0.015625 456.000000 ++output: -6019.242187 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.s :: ++input: -1.000000 0.031250 34.031250 ++output: -34.000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.s :: ++input: 23.062500 -248562.750000 45786.031250 ++output: 5686692.000000 ++fcsr: 0x1010100 ++roundig mode: +inf ++fnmadd.s :: ++input: 0.000000 -4578.500000 -347856.500000 ++output: 347856.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.s :: ++input: 456.250000 456.250000 356047.500000 ++output: -564211.562500 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.s :: ++input: 3.000000 34.031250 -1.000000 ++output: -101.093750 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.s :: ++input: -1.000000 4578.750000 23.062500 ++output: 4555.687500 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.s :: ++input: 1384.500000 175.000000 1752.000000 ++output: -244039.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.s :: ++input: -7.250000 107.000000 0.015625 ++output: 775.734375 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.s :: ++input: 1000000000.000000 -456.250000 0.031250 ++output: 456249999360.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fnmadd.s :: ++input: -5786.500000 -7.250000 -248562.750000 ++output: 206610.625000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.s :: ++input: 1752.000000 -3478.500000 1384.500000 ++output: 6092947.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.s :: ++input: 0.015625 356.500000 -7.250000 ++output: 1.679688 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.s :: ++input: 0.031250 -1.000000 1000000000.000000 ++output: -1000000000.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fnmadd.s :: ++input: -248562.750000 23.062500 -5786.500000 ++output: 5738264.500000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fnmadd.s :: ++input: -45786.500000 0.000000 -347856.750000 ++output: 347856.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.s :: ++input: 456.000000 456.250000 356047.750000 ++output: -564097.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.s :: ++input: 34.031250 3.000000 -1.000000 ++output: -101.093750 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.s :: ++input: 45786.750000 -1.000000 23.031250 ++output: 45763.718750 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.s :: ++input: 1752065.000000 1384.500000 0.000000 ++output: -2425734144.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fnmadd.s :: ++input: 107.000000 -7.000000 456.250000 ++output: 292.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.s :: ++input: -45667.250000 100.000000 3.000000 ++output: 4566722.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.s :: ++input: -7.000000 -5786.500000 -1.000000 ++output: -40504.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.s :: ++input: -347856.500000 1752.000000 -45786.500000 ++output: 609490368.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fnmadd.s :: ++input: 356047.500000 0.015625 456.000000 ++output: -6019.242187 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.s :: ++input: -1.000000 0.031250 34.031250 ++output: -34.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.s :: ++input: 23.062500 -248562.750000 45786.031250 ++output: 5686692.000000 ++fcsr: 0x1010200 ++roundig mode: -inf ++fnmadd.s :: ++input: 0.000000 -4578.500000 -347856.500000 ++output: 347856.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.s :: ++input: 456.250000 456.250000 356047.500000 ++output: -564211.562500 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.s :: ++input: 3.000000 34.031250 -1.000000 ++output: -101.093750 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.s :: ++input: -1.000000 4578.750000 23.062500 ++output: 4555.687500 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.s :: ++input: 1384.500000 175.000000 1752.000000 ++output: -244039.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.s :: ++input: -7.250000 107.000000 0.015625 ++output: 775.734375 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.s :: ++input: 1000000000.000000 -456.250000 0.031250 ++output: 456250032128.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fnmadd.s :: ++input: -5786.500000 -7.250000 -248562.750000 ++output: 206610.625000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.s :: ++input: 1752.000000 -3478.500000 1384.500000 ++output: 6092947.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.s :: ++input: 0.015625 356.500000 -7.250000 ++output: 1.679687 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.s :: ++input: 0.031250 -1.000000 1000000000.000000 ++output: -999999936.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fnmadd.s :: ++input: -248562.750000 23.062500 -5786.500000 ++output: 5738265.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fnmadd.s :: ++input: -45786.500000 0.000000 -347856.750000 ++output: 347856.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.s :: ++input: 456.000000 456.250000 356047.750000 ++output: -564097.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.s :: ++input: 34.031250 3.000000 -1.000000 ++output: -101.093750 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.s :: ++input: 45786.750000 -1.000000 23.031250 ++output: 45763.718750 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.s :: ++input: 1752065.000000 1384.500000 0.000000 ++output: -2425733888.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fnmadd.s :: ++input: 107.000000 -7.000000 456.250000 ++output: 292.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.s :: ++input: -45667.250000 100.000000 3.000000 ++output: 4566722.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.s :: ++input: -7.000000 -5786.500000 -1.000000 ++output: -40504.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.s :: ++input: -347856.500000 1752.000000 -45786.500000 ++output: 609490432.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fnmadd.s :: ++input: 356047.500000 0.015625 456.000000 ++output: -6019.242188 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.s :: ++input: -1.000000 0.031250 34.031250 ++output: -34.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.s :: ++input: 23.062500 -248562.750000 45786.031250 ++output: 5686692.500000 ++fcsr: 0x1010300 ++roundig mode: near ++fnmadd.d :: ++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000 ++output: 347856.500000000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: 456.250000000000000 456.250000000000000 356047.500000000000000 ++output: -564211.562500000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: 3.000000000000000 34.031250000000000 -1.000000000000000 ++output: -101.093750000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: -1.000000000000000 45786.750000000000000 23.062500000000000 ++output: 45763.687500000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000 ++output: -2425735744.500000000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: -7.250000000000000 107.000000000000000 0.015625000000000 ++output: 775.734375000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000 ++output: 45667249999999.968750000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000 ++output: 206610.625000000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000 ++output: 609443203.500000000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: 0.015625000000000 356047.500000000000000 -7.250000000000000 ++output: -5555.992187500000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000 ++output: -999999999.968750000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000 ++output: 5738264.921875000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000 ++output: 347856.750000000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: 456.000000000000000 456.250000000000000 356047.750000000000000 ++output: -564097.750000000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: 34.031250000000000 3.000000000000000 -1.000000000000000 ++output: -101.093750000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: 45786.750000000000000 -1.000000000000000 23.031250000000000 ++output: 45763.718750000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000 ++output: -2425733992.500000000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: 107.000000000000000 -7.000000000000000 456.250000000000000 ++output: 292.750000000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000 ++output: 45667249999997.000000000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000 ++output: -40504.500000000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000 ++output: 609490374.500000000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: 356047.500000000000000 0.015625000000000 456.000000000000000 ++output: -6019.242187500000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: -1.000000000000000 0.031250000000000 34.031250000000000 ++output: -34.000000000000000 ++fcsr: 0 ++roundig mode: near ++fnmadd.d :: ++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000 ++output: 5686692.390625000000000 ++fcsr: 0 ++roundig mode: zero ++fnmadd.d :: ++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000 ++output: 347856.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: 456.250000000000000 456.250000000000000 356047.500000000000000 ++output: -564211.562500000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: 3.000000000000000 34.031250000000000 -1.000000000000000 ++output: -101.093750000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: -1.000000000000000 45786.750000000000000 23.062500000000000 ++output: 45763.687500000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000 ++output: -2425735744.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: -7.250000000000000 107.000000000000000 0.015625000000000 ++output: 775.734375000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000 ++output: 45667249999999.968750000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000 ++output: 206610.625000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000 ++output: 609443203.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: 0.015625000000000 356047.500000000000000 -7.250000000000000 ++output: -5555.992187500000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000 ++output: -999999999.968750000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000 ++output: 5738264.921875000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000 ++output: 347856.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: 456.000000000000000 456.250000000000000 356047.750000000000000 ++output: -564097.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: 34.031250000000000 3.000000000000000 -1.000000000000000 ++output: -101.093750000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: 45786.750000000000000 -1.000000000000000 23.031250000000000 ++output: 45763.718750000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000 ++output: -2425733992.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: 107.000000000000000 -7.000000000000000 456.250000000000000 ++output: 292.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000 ++output: 45667249999997.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000 ++output: -40504.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000 ++output: 609490374.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: 356047.500000000000000 0.015625000000000 456.000000000000000 ++output: -6019.242187500000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: -1.000000000000000 0.031250000000000 34.031250000000000 ++output: -34.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmadd.d :: ++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000 ++output: 5686692.390625000000000 ++fcsr: 0x100 ++roundig mode: +inf ++fnmadd.d :: ++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000 ++output: 347856.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: 456.250000000000000 456.250000000000000 356047.500000000000000 ++output: -564211.562500000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: 3.000000000000000 34.031250000000000 -1.000000000000000 ++output: -101.093750000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: -1.000000000000000 45786.750000000000000 23.062500000000000 ++output: 45763.687500000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000 ++output: -2425735744.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: -7.250000000000000 107.000000000000000 0.015625000000000 ++output: 775.734375000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000 ++output: 45667249999999.968750000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000 ++output: 206610.625000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000 ++output: 609443203.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: 0.015625000000000 356047.500000000000000 -7.250000000000000 ++output: -5555.992187500000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000 ++output: -999999999.968750000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000 ++output: 5738264.921875000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000 ++output: 347856.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: 456.000000000000000 456.250000000000000 356047.750000000000000 ++output: -564097.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: 34.031250000000000 3.000000000000000 -1.000000000000000 ++output: -101.093750000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: 45786.750000000000000 -1.000000000000000 23.031250000000000 ++output: 45763.718750000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000 ++output: -2425733992.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: 107.000000000000000 -7.000000000000000 456.250000000000000 ++output: 292.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000 ++output: 45667249999997.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000 ++output: -40504.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000 ++output: 609490374.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: 356047.500000000000000 0.015625000000000 456.000000000000000 ++output: -6019.242187500000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: -1.000000000000000 0.031250000000000 34.031250000000000 ++output: -34.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmadd.d :: ++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000 ++output: 5686692.390625000000000 ++fcsr: 0x200 ++roundig mode: -inf ++fnmadd.d :: ++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000 ++output: 347856.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: 456.250000000000000 456.250000000000000 356047.500000000000000 ++output: -564211.562500000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: 3.000000000000000 34.031250000000000 -1.000000000000000 ++output: -101.093750000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: -1.000000000000000 45786.750000000000000 23.062500000000000 ++output: 45763.687500000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000 ++output: -2425735744.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: -7.250000000000000 107.000000000000000 0.015625000000000 ++output: 775.734375000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000 ++output: 45667249999999.968750000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000 ++output: 206610.625000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000 ++output: 609443203.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: 0.015625000000000 356047.500000000000000 -7.250000000000000 ++output: -5555.992187500000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000 ++output: -999999999.968750000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000 ++output: 5738264.921875000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000 ++output: 347856.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: 456.000000000000000 456.250000000000000 356047.750000000000000 ++output: -564097.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: 34.031250000000000 3.000000000000000 -1.000000000000000 ++output: -101.093750000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: 45786.750000000000000 -1.000000000000000 23.031250000000000 ++output: 45763.718750000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000 ++output: -2425733992.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: 107.000000000000000 -7.000000000000000 456.250000000000000 ++output: 292.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000 ++output: 45667249999997.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000 ++output: -40504.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000 ++output: 609490374.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: 356047.500000000000000 0.015625000000000 456.000000000000000 ++output: -6019.242187500000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: -1.000000000000000 0.031250000000000 34.031250000000000 ++output: -34.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmadd.d :: ++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000 ++output: 5686692.390625000000000 ++fcsr: 0x300 ++roundig mode: near ++fnmsub.s :: ++input: 0.000000 -4578.500000 -347856.500000 ++output: -347856.500000 ++fcsr: 0 ++roundig mode: near ++fnmsub.s :: ++input: 456.250000 456.250000 356047.500000 ++output: 147883.437500 ++fcsr: 0 ++roundig mode: near ++fnmsub.s :: ++input: 3.000000 34.031250 -1.000000 ++output: -103.093750 ++fcsr: 0 ++roundig mode: near ++fnmsub.s :: ++input: -1.000000 4578.750000 23.062500 ++output: 4601.812500 ++fcsr: 0 ++roundig mode: near ++fnmsub.s :: ++input: 1384.500000 175.000000 1752.000000 ++output: -240535.500000 ++fcsr: 0 ++roundig mode: near ++fnmsub.s :: ++input: -7.250000 107.000000 0.015625 ++output: 775.765625 ++fcsr: 0 ++roundig mode: near ++fnmsub.s :: ++input: 1000000000.000000 -456.250000 0.031250 ++output: 456249999360.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fnmsub.s :: ++input: -5786.500000 -7.250000 -248562.750000 ++output: -290514.875000 ++fcsr: 0 ++roundig mode: near ++fnmsub.s :: ++input: 1752.000000 -3478.500000 1384.500000 ++output: 6095716.500000 ++fcsr: 0 ++roundig mode: near ++fnmsub.s :: ++input: 0.015625 356.500000 -7.250000 ++output: -12.820312 ++fcsr: 0 ++roundig mode: near ++fnmsub.s :: ++input: 0.031250 -1.000000 1000000000.000000 ++output: 1000000000.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fnmsub.s :: ++input: -248562.750000 23.062500 -5786.500000 ++output: 5726692.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fnmsub.s :: ++input: -45786.500000 0.000000 -347856.750000 ++output: -347856.750000 ++fcsr: 0 ++roundig mode: near ++fnmsub.s :: ++input: 456.000000 456.250000 356047.750000 ++output: 147997.750000 ++fcsr: 0 ++roundig mode: near ++fnmsub.s :: ++input: 34.031250 3.000000 -1.000000 ++output: -103.093750 ++fcsr: 0 ++roundig mode: near ++fnmsub.s :: ++input: 45786.750000 -1.000000 23.031250 ++output: 45809.781250 ++fcsr: 0 ++roundig mode: near ++fnmsub.s :: ++input: 1752065.000000 1384.500000 0.000000 ++output: -2425733888.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fnmsub.s :: ++input: 107.000000 -7.000000 456.250000 ++output: 1205.250000 ++fcsr: 0 ++roundig mode: near ++fnmsub.s :: ++input: -45667.250000 100.000000 3.000000 ++output: 4566728.000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.s :: ++input: -7.000000 -5786.500000 -1.000000 ++output: -40506.500000 ++fcsr: 0 ++roundig mode: near ++fnmsub.s :: ++input: -347856.500000 1752.000000 -45786.500000 ++output: 609398784.000000 ++fcsr: 0x1010000 ++roundig mode: near ++fnmsub.s :: ++input: 356047.500000 0.015625 456.000000 ++output: -5107.242188 ++fcsr: 0 ++roundig mode: near ++fnmsub.s :: ++input: -1.000000 0.031250 34.031250 ++output: 34.062500 ++fcsr: 0 ++roundig mode: near ++fnmsub.s :: ++input: 23.062500 -248562.750000 45786.031250 ++output: 5778264.500000 ++fcsr: 0x1010000 ++roundig mode: zero ++fnmsub.s :: ++input: 0.000000 -4578.500000 -347856.500000 ++output: -347856.500000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.s :: ++input: 456.250000 456.250000 356047.500000 ++output: 147883.437500 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.s :: ++input: 3.000000 34.031250 -1.000000 ++output: -103.093750 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.s :: ++input: -1.000000 4578.750000 23.062500 ++output: 4601.812500 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.s :: ++input: 1384.500000 175.000000 1752.000000 ++output: -240535.500000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.s :: ++input: -7.250000 107.000000 0.015625 ++output: 775.765625 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.s :: ++input: 1000000000.000000 -456.250000 0.031250 ++output: 456249999360.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fnmsub.s :: ++input: -5786.500000 -7.250000 -248562.750000 ++output: -290514.875000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.s :: ++input: 1752.000000 -3478.500000 1384.500000 ++output: 6095716.500000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.s :: ++input: 0.015625 356.500000 -7.250000 ++output: -12.820312 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.s :: ++input: 0.031250 -1.000000 1000000000.000000 ++output: 1000000000.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fnmsub.s :: ++input: -248562.750000 23.062500 -5786.500000 ++output: 5726691.500000 ++fcsr: 0x1010100 ++roundig mode: zero ++fnmsub.s :: ++input: -45786.500000 0.000000 -347856.750000 ++output: -347856.750000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.s :: ++input: 456.000000 456.250000 356047.750000 ++output: 147997.750000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.s :: ++input: 34.031250 3.000000 -1.000000 ++output: -103.093750 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.s :: ++input: 45786.750000 -1.000000 23.031250 ++output: 45809.781250 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.s :: ++input: 1752065.000000 1384.500000 0.000000 ++output: -2425733888.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fnmsub.s :: ++input: 107.000000 -7.000000 456.250000 ++output: 1205.250000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.s :: ++input: -45667.250000 100.000000 3.000000 ++output: 4566728.000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.s :: ++input: -7.000000 -5786.500000 -1.000000 ++output: -40506.500000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.s :: ++input: -347856.500000 1752.000000 -45786.500000 ++output: 609398784.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++fnmsub.s :: ++input: 356047.500000 0.015625 456.000000 ++output: -5107.242187 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.s :: ++input: -1.000000 0.031250 34.031250 ++output: 34.062500 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.s :: ++input: 23.062500 -248562.750000 45786.031250 ++output: 5778264.000000 ++fcsr: 0x1010100 ++roundig mode: +inf ++fnmsub.s :: ++input: 0.000000 -4578.500000 -347856.500000 ++output: -347856.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.s :: ++input: 456.250000 456.250000 356047.500000 ++output: 147883.437500 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.s :: ++input: 3.000000 34.031250 -1.000000 ++output: -103.093750 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.s :: ++input: -1.000000 4578.750000 23.062500 ++output: 4601.812500 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.s :: ++input: 1384.500000 175.000000 1752.000000 ++output: -240535.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.s :: ++input: -7.250000 107.000000 0.015625 ++output: 775.765625 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.s :: ++input: 1000000000.000000 -456.250000 0.031250 ++output: 456249999360.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fnmsub.s :: ++input: -5786.500000 -7.250000 -248562.750000 ++output: -290514.875000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.s :: ++input: 1752.000000 -3478.500000 1384.500000 ++output: 6095716.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.s :: ++input: 0.015625 356.500000 -7.250000 ++output: -12.820312 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.s :: ++input: 0.031250 -1.000000 1000000000.000000 ++output: 1000000000.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fnmsub.s :: ++input: -248562.750000 23.062500 -5786.500000 ++output: 5726691.500000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fnmsub.s :: ++input: -45786.500000 0.000000 -347856.750000 ++output: -347856.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.s :: ++input: 456.000000 456.250000 356047.750000 ++output: 147997.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.s :: ++input: 34.031250 3.000000 -1.000000 ++output: -103.093750 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.s :: ++input: 45786.750000 -1.000000 23.031250 ++output: 45809.781250 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.s :: ++input: 1752065.000000 1384.500000 0.000000 ++output: -2425734144.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fnmsub.s :: ++input: 107.000000 -7.000000 456.250000 ++output: 1205.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.s :: ++input: -45667.250000 100.000000 3.000000 ++output: 4566728.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.s :: ++input: -7.000000 -5786.500000 -1.000000 ++output: -40506.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.s :: ++input: -347856.500000 1752.000000 -45786.500000 ++output: 609398784.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++fnmsub.s :: ++input: 356047.500000 0.015625 456.000000 ++output: -5107.242187 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.s :: ++input: -1.000000 0.031250 34.031250 ++output: 34.062500 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.s :: ++input: 23.062500 -248562.750000 45786.031250 ++output: 5778264.000000 ++fcsr: 0x1010200 ++roundig mode: -inf ++fnmsub.s :: ++input: 0.000000 -4578.500000 -347856.500000 ++output: -347856.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.s :: ++input: 456.250000 456.250000 356047.500000 ++output: 147883.437500 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.s :: ++input: 3.000000 34.031250 -1.000000 ++output: -103.093750 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.s :: ++input: -1.000000 4578.750000 23.062500 ++output: 4601.812500 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.s :: ++input: 1384.500000 175.000000 1752.000000 ++output: -240535.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.s :: ++input: -7.250000 107.000000 0.015625 ++output: 775.765625 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.s :: ++input: 1000000000.000000 -456.250000 0.031250 ++output: 456250032128.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fnmsub.s :: ++input: -5786.500000 -7.250000 -248562.750000 ++output: -290514.875000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.s :: ++input: 1752.000000 -3478.500000 1384.500000 ++output: 6095716.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.s :: ++input: 0.015625 356.500000 -7.250000 ++output: -12.820313 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.s :: ++input: 0.031250 -1.000000 1000000000.000000 ++output: 1000000064.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fnmsub.s :: ++input: -248562.750000 23.062500 -5786.500000 ++output: 5726692.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fnmsub.s :: ++input: -45786.500000 0.000000 -347856.750000 ++output: -347856.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.s :: ++input: 456.000000 456.250000 356047.750000 ++output: 147997.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.s :: ++input: 34.031250 3.000000 -1.000000 ++output: -103.093750 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.s :: ++input: 45786.750000 -1.000000 23.031250 ++output: 45809.781250 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.s :: ++input: 1752065.000000 1384.500000 0.000000 ++output: -2425733888.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fnmsub.s :: ++input: 107.000000 -7.000000 456.250000 ++output: 1205.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.s :: ++input: -45667.250000 100.000000 3.000000 ++output: 4566728.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.s :: ++input: -7.000000 -5786.500000 -1.000000 ++output: -40506.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.s :: ++input: -347856.500000 1752.000000 -45786.500000 ++output: 609398848.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++fnmsub.s :: ++input: 356047.500000 0.015625 456.000000 ++output: -5107.242188 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.s :: ++input: -1.000000 0.031250 34.031250 ++output: 34.062500 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.s :: ++input: 23.062500 -248562.750000 45786.031250 ++output: 5778264.500000 ++fcsr: 0x1010300 ++roundig mode: near ++fnmsub.d :: ++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: 456.250000000000000 456.250000000000000 356047.500000000000000 ++output: 147883.437500000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: 3.000000000000000 34.031250000000000 -1.000000000000000 ++output: -103.093750000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: -1.000000000000000 45786.750000000000000 23.062500000000000 ++output: 45809.812500000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000 ++output: -2425732240.500000000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: -7.250000000000000 107.000000000000000 0.015625000000000 ++output: 775.765625000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000 ++output: 45667250000000.031250000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000 ++output: -290514.875000000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000 ++output: 609445972.500000000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: 0.015625000000000 356047.500000000000000 -7.250000000000000 ++output: -5570.492187500000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000 ++output: 1000000000.031250000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000 ++output: 5726691.921875000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000 ++output: -347856.750000000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: 456.000000000000000 456.250000000000000 356047.750000000000000 ++output: 147997.750000000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: 34.031250000000000 3.000000000000000 -1.000000000000000 ++output: -103.093750000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: 45786.750000000000000 -1.000000000000000 23.031250000000000 ++output: 45809.781250000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000 ++output: -2425733992.500000000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: 107.000000000000000 -7.000000000000000 456.250000000000000 ++output: 1205.250000000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000 ++output: 45667250000003.000000000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000 ++output: -40506.500000000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000 ++output: 609398801.500000000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: 356047.500000000000000 0.015625000000000 456.000000000000000 ++output: -5107.242187500000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: -1.000000000000000 0.031250000000000 34.031250000000000 ++output: 34.062500000000000 ++fcsr: 0 ++roundig mode: near ++fnmsub.d :: ++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000 ++output: 5778264.453125000000000 ++fcsr: 0 ++roundig mode: zero ++fnmsub.d :: ++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: 456.250000000000000 456.250000000000000 356047.500000000000000 ++output: 147883.437500000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: 3.000000000000000 34.031250000000000 -1.000000000000000 ++output: -103.093750000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: -1.000000000000000 45786.750000000000000 23.062500000000000 ++output: 45809.812500000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000 ++output: -2425732240.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: -7.250000000000000 107.000000000000000 0.015625000000000 ++output: 775.765625000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000 ++output: 45667250000000.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000 ++output: -290514.875000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000 ++output: 609445972.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: 0.015625000000000 356047.500000000000000 -7.250000000000000 ++output: -5570.492187500000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000 ++output: 1000000000.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000 ++output: 5726691.921875000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000 ++output: -347856.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: 456.000000000000000 456.250000000000000 356047.750000000000000 ++output: 147997.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: 34.031250000000000 3.000000000000000 -1.000000000000000 ++output: -103.093750000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: 45786.750000000000000 -1.000000000000000 23.031250000000000 ++output: 45809.781250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000 ++output: -2425733992.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: 107.000000000000000 -7.000000000000000 456.250000000000000 ++output: 1205.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000 ++output: 45667250000003.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000 ++output: -40506.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000 ++output: 609398801.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: 356047.500000000000000 0.015625000000000 456.000000000000000 ++output: -5107.242187500000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: -1.000000000000000 0.031250000000000 34.031250000000000 ++output: 34.062500000000000 ++fcsr: 0x100 ++roundig mode: zero ++fnmsub.d :: ++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000 ++output: 5778264.453125000000000 ++fcsr: 0x100 ++roundig mode: +inf ++fnmsub.d :: ++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: 456.250000000000000 456.250000000000000 356047.500000000000000 ++output: 147883.437500000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: 3.000000000000000 34.031250000000000 -1.000000000000000 ++output: -103.093750000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: -1.000000000000000 45786.750000000000000 23.062500000000000 ++output: 45809.812500000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000 ++output: -2425732240.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: -7.250000000000000 107.000000000000000 0.015625000000000 ++output: 775.765625000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000 ++output: 45667250000000.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000 ++output: -290514.875000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000 ++output: 609445972.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: 0.015625000000000 356047.500000000000000 -7.250000000000000 ++output: -5570.492187500000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000 ++output: 1000000000.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000 ++output: 5726691.921875000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000 ++output: -347856.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: 456.000000000000000 456.250000000000000 356047.750000000000000 ++output: 147997.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: 34.031250000000000 3.000000000000000 -1.000000000000000 ++output: -103.093750000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: 45786.750000000000000 -1.000000000000000 23.031250000000000 ++output: 45809.781250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000 ++output: -2425733992.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: 107.000000000000000 -7.000000000000000 456.250000000000000 ++output: 1205.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000 ++output: 45667250000003.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000 ++output: -40506.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000 ++output: 609398801.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: 356047.500000000000000 0.015625000000000 456.000000000000000 ++output: -5107.242187500000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: -1.000000000000000 0.031250000000000 34.031250000000000 ++output: 34.062500000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fnmsub.d :: ++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000 ++output: 5778264.453125000000000 ++fcsr: 0x200 ++roundig mode: -inf ++fnmsub.d :: ++input: 0.000000000000000 -45786.500000000000000 -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: 456.250000000000000 456.250000000000000 356047.500000000000000 ++output: 147883.437500000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: 3.000000000000000 34.031250000000000 -1.000000000000000 ++output: -103.093750000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: -1.000000000000000 45786.750000000000000 23.062500000000000 ++output: 45809.812500000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: 1384.500000000000000 1752065.000000000000000 1752.000000000000000 ++output: -2425732240.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: -7.250000000000000 107.000000000000000 0.015625000000000 ++output: 775.765625000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 0.031250000000000 ++output: 45667250000000.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: -5786.500000000000000 -7.250000000000000 -248562.750000000000000 ++output: -290514.875000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: 1752.000000000000000 -347856.500000000000000 1384.500000000000000 ++output: 609445972.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: 0.015625000000000 356047.500000000000000 -7.250000000000000 ++output: -5570.492187500000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: 0.031250000000000 -1.000000000000000 1000000000.000000000000000 ++output: 1000000000.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: -248562.750000000000000 23.062500000000000 -5786.500000000000000 ++output: 5726691.921875000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: -45786.500000000000000 0.000000000000000 -347856.750000000000000 ++output: -347856.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: 456.000000000000000 456.250000000000000 356047.750000000000000 ++output: 147997.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: 34.031250000000000 3.000000000000000 -1.000000000000000 ++output: -103.093750000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: 45786.750000000000000 -1.000000000000000 23.031250000000000 ++output: 45809.781250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: 1752065.000000000000000 1384.500000000000000 0.000000000000000 ++output: -2425733992.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: 107.000000000000000 -7.000000000000000 456.250000000000000 ++output: 1205.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: -45667.250000000000000 1000000000.000000000000000 3.000000000000000 ++output: 45667250000003.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: -7.000000000000000 -5786.500000000000000 -1.000000000000000 ++output: -40506.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: -347856.500000000000000 1752.000000000000000 -45786.500000000000000 ++output: 609398801.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: 356047.500000000000000 0.015625000000000 456.000000000000000 ++output: -5107.242187500000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: -1.000000000000000 0.031250000000000 34.031250000000000 ++output: 34.062500000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fnmsub.d :: ++input: 23.062500000000000 -248562.750000000000000 45786.031250000000000 ++output: 5778264.453125000000000 ++fcsr: 0x300 ++roundig mode: near ++fmax.s :: ++input: 0.000000 -4578.500000 ++output: 0.000000 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: 456.250000 456.250000 ++output: 456.250000 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: 3.000000 34.031250 ++output: 34.031250 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: -1.000000 4578.750000 ++output: 4578.750000 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: 1384.500000 175.000000 ++output: 1384.500000 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: -7.250000 107.000000 ++output: 107.000000 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: 1000000000.000000 -456.250000 ++output: 1000000000.000000 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: -5786.500000 -7.250000 ++output: -7.250000 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: 1752.000000 -3478.500000 ++output: 1752.000000 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: 0.015625 356.500000 ++output: 356.500000 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: 0.031250 -1.000000 ++output: 0.031250 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: -248562.750000 23.062500 ++output: 23.062500 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: -45786.500000 0.000000 ++output: 0.000000 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: 456.000000 456.250000 ++output: 456.250000 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: 34.031250 3.000000 ++output: 34.031250 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: 45786.750000 -1.000000 ++output: 45786.750000 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: 1752065.000000 1384.500000 ++output: 1752065.000000 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: 107.000000 -7.000000 ++output: 107.000000 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: -45667.250000 100.000000 ++output: 100.000000 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: -7.000000 -5786.500000 ++output: -7.000000 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: -347856.500000 1752.000000 ++output: 1752.000000 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: 356047.500000 0.015625 ++output: 356047.500000 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: -1.000000 0.031250 ++output: 0.031250 ++fcsr: 0 ++roundig mode: near ++fmax.s :: ++input: 23.062500 -248562.750000 ++output: 23.062500 ++fcsr: 0 ++roundig mode: zero ++fmax.s :: ++input: 0.000000 -4578.500000 ++output: 0.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: 456.250000 456.250000 ++output: 456.250000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: 3.000000 34.031250 ++output: 34.031250 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: -1.000000 4578.750000 ++output: 4578.750000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: 1384.500000 175.000000 ++output: 1384.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: -7.250000 107.000000 ++output: 107.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: 1000000000.000000 -456.250000 ++output: 1000000000.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: -5786.500000 -7.250000 ++output: -7.250000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: 1752.000000 -3478.500000 ++output: 1752.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: 0.015625 356.500000 ++output: 356.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: 0.031250 -1.000000 ++output: 0.031250 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: -248562.750000 23.062500 ++output: 23.062500 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: -45786.500000 0.000000 ++output: 0.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: 456.000000 456.250000 ++output: 456.250000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: 34.031250 3.000000 ++output: 34.031250 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: 45786.750000 -1.000000 ++output: 45786.750000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: 1752065.000000 1384.500000 ++output: 1752065.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: 107.000000 -7.000000 ++output: 107.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: -45667.250000 100.000000 ++output: 100.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: -7.000000 -5786.500000 ++output: -7.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: -347856.500000 1752.000000 ++output: 1752.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: 356047.500000 0.015625 ++output: 356047.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: -1.000000 0.031250 ++output: 0.031250 ++fcsr: 0x100 ++roundig mode: zero ++fmax.s :: ++input: 23.062500 -248562.750000 ++output: 23.062500 ++fcsr: 0x100 ++roundig mode: +inf ++fmax.s :: ++input: 0.000000 -4578.500000 ++output: 0.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: 456.250000 456.250000 ++output: 456.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: 3.000000 34.031250 ++output: 34.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: -1.000000 4578.750000 ++output: 4578.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: 1384.500000 175.000000 ++output: 1384.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: -7.250000 107.000000 ++output: 107.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: 1000000000.000000 -456.250000 ++output: 1000000000.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: -5786.500000 -7.250000 ++output: -7.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: 1752.000000 -3478.500000 ++output: 1752.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: 0.015625 356.500000 ++output: 356.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: 0.031250 -1.000000 ++output: 0.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: -248562.750000 23.062500 ++output: 23.062500 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: -45786.500000 0.000000 ++output: 0.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: 456.000000 456.250000 ++output: 456.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: 34.031250 3.000000 ++output: 34.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: 45786.750000 -1.000000 ++output: 45786.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: 1752065.000000 1384.500000 ++output: 1752065.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: 107.000000 -7.000000 ++output: 107.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: -45667.250000 100.000000 ++output: 100.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: -7.000000 -5786.500000 ++output: -7.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: -347856.500000 1752.000000 ++output: 1752.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: 356047.500000 0.015625 ++output: 356047.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: -1.000000 0.031250 ++output: 0.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.s :: ++input: 23.062500 -248562.750000 ++output: 23.062500 ++fcsr: 0x200 ++roundig mode: -inf ++fmax.s :: ++input: 0.000000 -4578.500000 ++output: 0.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: 456.250000 456.250000 ++output: 456.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: 3.000000 34.031250 ++output: 34.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: -1.000000 4578.750000 ++output: 4578.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: 1384.500000 175.000000 ++output: 1384.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: -7.250000 107.000000 ++output: 107.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: 1000000000.000000 -456.250000 ++output: 1000000000.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: -5786.500000 -7.250000 ++output: -7.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: 1752.000000 -3478.500000 ++output: 1752.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: 0.015625 356.500000 ++output: 356.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: 0.031250 -1.000000 ++output: 0.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: -248562.750000 23.062500 ++output: 23.062500 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: -45786.500000 0.000000 ++output: 0.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: 456.000000 456.250000 ++output: 456.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: 34.031250 3.000000 ++output: 34.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: 45786.750000 -1.000000 ++output: 45786.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: 1752065.000000 1384.500000 ++output: 1752065.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: 107.000000 -7.000000 ++output: 107.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: -45667.250000 100.000000 ++output: 100.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: -7.000000 -5786.500000 ++output: -7.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: -347856.500000 1752.000000 ++output: 1752.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: 356047.500000 0.015625 ++output: 356047.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: -1.000000 0.031250 ++output: 0.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.s :: ++input: 23.062500 -248562.750000 ++output: 23.062500 ++fcsr: 0x300 ++roundig mode: near ++fmax.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 34.031250000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 45786.750000000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -7.250000000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1752.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 356047.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0.031250000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 23.062500000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 34.031250000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 45786.750000000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1752065.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 107.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -7.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 356047.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0.031250000000000 ++fcsr: 0 ++roundig mode: near ++fmax.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 23.062500000000000 ++fcsr: 0 ++roundig mode: zero ++fmax.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 34.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 45786.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -7.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1752.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 356047.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 23.062500000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 34.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 45786.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -7.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 356047.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmax.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 23.062500000000000 ++fcsr: 0x100 ++roundig mode: +inf ++fmax.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 34.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 45786.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -7.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1752.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 356047.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 23.062500000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 34.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 45786.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -7.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 356047.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmax.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 23.062500000000000 ++fcsr: 0x200 ++roundig mode: -inf ++fmax.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 34.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 45786.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -7.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1752.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 356047.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 23.062500000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 34.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 45786.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -7.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 356047.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmax.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 23.062500000000000 ++fcsr: 0x300 ++roundig mode: near ++fmin.s :: ++input: 0.000000 -4578.500000 ++output: -4578.500000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: 456.250000 456.250000 ++output: 456.250000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: 3.000000 34.031250 ++output: 3.000000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: -1.000000 4578.750000 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: 1384.500000 175.000000 ++output: 175.000000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: -7.250000 107.000000 ++output: -7.250000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: 1000000000.000000 -456.250000 ++output: -456.250000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: -5786.500000 -7.250000 ++output: -5786.500000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: 1752.000000 -3478.500000 ++output: -3478.500000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: 0.015625 356.500000 ++output: 0.015625 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: 0.031250 -1.000000 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: -248562.750000 23.062500 ++output: -248562.750000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: 456.000000 456.250000 ++output: 456.000000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: 34.031250 3.000000 ++output: 3.000000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: 45786.750000 -1.000000 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: 1752065.000000 1384.500000 ++output: 1384.500000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: 107.000000 -7.000000 ++output: -7.000000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: -45667.250000 100.000000 ++output: -45667.250000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: -7.000000 -5786.500000 ++output: -5786.500000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: -347856.500000 1752.000000 ++output: -347856.500000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: 356047.500000 0.015625 ++output: 0.015625 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: -1.000000 0.031250 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++fmin.s :: ++input: 23.062500 -248562.750000 ++output: -248562.750000 ++fcsr: 0 ++roundig mode: zero ++fmin.s :: ++input: 0.000000 -4578.500000 ++output: -4578.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: 456.250000 456.250000 ++output: 456.250000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: 3.000000 34.031250 ++output: 3.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: -1.000000 4578.750000 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: 1384.500000 175.000000 ++output: 175.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: -7.250000 107.000000 ++output: -7.250000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: 1000000000.000000 -456.250000 ++output: -456.250000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: -5786.500000 -7.250000 ++output: -5786.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: 1752.000000 -3478.500000 ++output: -3478.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: 0.015625 356.500000 ++output: 0.015625 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: 0.031250 -1.000000 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: -248562.750000 23.062500 ++output: -248562.750000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: 456.000000 456.250000 ++output: 456.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: 34.031250 3.000000 ++output: 3.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: 45786.750000 -1.000000 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: 1752065.000000 1384.500000 ++output: 1384.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: 107.000000 -7.000000 ++output: -7.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: -45667.250000 100.000000 ++output: -45667.250000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: -7.000000 -5786.500000 ++output: -5786.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: -347856.500000 1752.000000 ++output: -347856.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: 356047.500000 0.015625 ++output: 0.015625 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: -1.000000 0.031250 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.s :: ++input: 23.062500 -248562.750000 ++output: -248562.750000 ++fcsr: 0x100 ++roundig mode: +inf ++fmin.s :: ++input: 0.000000 -4578.500000 ++output: -4578.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: 456.250000 456.250000 ++output: 456.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: 3.000000 34.031250 ++output: 3.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: -1.000000 4578.750000 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: 1384.500000 175.000000 ++output: 175.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: -7.250000 107.000000 ++output: -7.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: 1000000000.000000 -456.250000 ++output: -456.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: -5786.500000 -7.250000 ++output: -5786.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: 1752.000000 -3478.500000 ++output: -3478.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: 0.015625 356.500000 ++output: 0.015625 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: 0.031250 -1.000000 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: -248562.750000 23.062500 ++output: -248562.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: 456.000000 456.250000 ++output: 456.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: 34.031250 3.000000 ++output: 3.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: 45786.750000 -1.000000 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: 1752065.000000 1384.500000 ++output: 1384.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: 107.000000 -7.000000 ++output: -7.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: -45667.250000 100.000000 ++output: -45667.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: -7.000000 -5786.500000 ++output: -5786.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: -347856.500000 1752.000000 ++output: -347856.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: 356047.500000 0.015625 ++output: 0.015625 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: -1.000000 0.031250 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.s :: ++input: 23.062500 -248562.750000 ++output: -248562.750000 ++fcsr: 0x200 ++roundig mode: -inf ++fmin.s :: ++input: 0.000000 -4578.500000 ++output: -4578.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: 456.250000 456.250000 ++output: 456.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: 3.000000 34.031250 ++output: 3.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: -1.000000 4578.750000 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: 1384.500000 175.000000 ++output: 175.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: -7.250000 107.000000 ++output: -7.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: 1000000000.000000 -456.250000 ++output: -456.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: -5786.500000 -7.250000 ++output: -5786.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: 1752.000000 -3478.500000 ++output: -3478.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: 0.015625 356.500000 ++output: 0.015625 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: 0.031250 -1.000000 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: -248562.750000 23.062500 ++output: -248562.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: 456.000000 456.250000 ++output: 456.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: 34.031250 3.000000 ++output: 3.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: 45786.750000 -1.000000 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: 1752065.000000 1384.500000 ++output: 1384.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: 107.000000 -7.000000 ++output: -7.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: -45667.250000 100.000000 ++output: -45667.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: -7.000000 -5786.500000 ++output: -5786.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: -347856.500000 1752.000000 ++output: -347856.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: 356047.500000 0.015625 ++output: 0.015625 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: -1.000000 0.031250 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.s :: ++input: 23.062500 -248562.750000 ++output: -248562.750000 ++fcsr: 0x300 ++roundig mode: near ++fmin.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -45786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 3.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1384.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -7.250000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: -45667.250000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -5786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0.015625000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -248562.750000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 456.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1384.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: -7.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -45667.250000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -5786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -347856.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0.015625000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmin.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -248562.750000000000000 ++fcsr: 0 ++roundig mode: zero ++fmin.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -45786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 3.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1384.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -7.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: -45667.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -5786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0.015625000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -248562.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 456.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1384.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: -7.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -45667.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -5786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -347856.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0.015625000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmin.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -248562.750000000000000 ++fcsr: 0x100 ++roundig mode: +inf ++fmin.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -45786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 3.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1384.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -7.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: -45667.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -5786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0.015625000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -248562.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 456.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1384.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: -7.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -45667.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -5786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -347856.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0.015625000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmin.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -248562.750000000000000 ++fcsr: 0x200 ++roundig mode: -inf ++fmin.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -45786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 3.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1384.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -7.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: -45667.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -5786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0.015625000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -248562.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 456.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1384.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: -7.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -45667.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -5786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -347856.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0.015625000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmin.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -248562.750000000000000 ++fcsr: 0x300 ++roundig mode: near ++fmaxa.s :: ++input: 0.000000 -4578.500000 ++output: -4578.500000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: 456.250000 456.250000 ++output: 456.250000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: 3.000000 34.031250 ++output: 34.031250 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: -1.000000 4578.750000 ++output: 4578.750000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: 1384.500000 175.000000 ++output: 1384.500000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: -7.250000 107.000000 ++output: 107.000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: 1000000000.000000 -456.250000 ++output: 1000000000.000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: -5786.500000 -7.250000 ++output: -5786.500000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: 1752.000000 -3478.500000 ++output: -3478.500000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: 0.015625 356.500000 ++output: 356.500000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: 0.031250 -1.000000 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: -248562.750000 23.062500 ++output: -248562.750000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: 456.000000 456.250000 ++output: 456.250000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: 34.031250 3.000000 ++output: 34.031250 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: 45786.750000 -1.000000 ++output: 45786.750000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: 1752065.000000 1384.500000 ++output: 1752065.000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: 107.000000 -7.000000 ++output: 107.000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: -45667.250000 100.000000 ++output: -45667.250000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: -7.000000 -5786.500000 ++output: -5786.500000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: -347856.500000 1752.000000 ++output: -347856.500000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: 356047.500000 0.015625 ++output: 356047.500000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: -1.000000 0.031250 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.s :: ++input: 23.062500 -248562.750000 ++output: -248562.750000 ++fcsr: 0 ++roundig mode: zero ++fmaxa.s :: ++input: 0.000000 -4578.500000 ++output: -4578.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: 456.250000 456.250000 ++output: 456.250000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: 3.000000 34.031250 ++output: 34.031250 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: -1.000000 4578.750000 ++output: 4578.750000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: 1384.500000 175.000000 ++output: 1384.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: -7.250000 107.000000 ++output: 107.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: 1000000000.000000 -456.250000 ++output: 1000000000.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: -5786.500000 -7.250000 ++output: -5786.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: 1752.000000 -3478.500000 ++output: -3478.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: 0.015625 356.500000 ++output: 356.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: 0.031250 -1.000000 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: -248562.750000 23.062500 ++output: -248562.750000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: 456.000000 456.250000 ++output: 456.250000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: 34.031250 3.000000 ++output: 34.031250 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: 45786.750000 -1.000000 ++output: 45786.750000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: 1752065.000000 1384.500000 ++output: 1752065.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: 107.000000 -7.000000 ++output: 107.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: -45667.250000 100.000000 ++output: -45667.250000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: -7.000000 -5786.500000 ++output: -5786.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: -347856.500000 1752.000000 ++output: -347856.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: 356047.500000 0.015625 ++output: 356047.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: -1.000000 0.031250 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.s :: ++input: 23.062500 -248562.750000 ++output: -248562.750000 ++fcsr: 0x100 ++roundig mode: +inf ++fmaxa.s :: ++input: 0.000000 -4578.500000 ++output: -4578.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: 456.250000 456.250000 ++output: 456.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: 3.000000 34.031250 ++output: 34.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: -1.000000 4578.750000 ++output: 4578.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: 1384.500000 175.000000 ++output: 1384.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: -7.250000 107.000000 ++output: 107.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: 1000000000.000000 -456.250000 ++output: 1000000000.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: -5786.500000 -7.250000 ++output: -5786.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: 1752.000000 -3478.500000 ++output: -3478.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: 0.015625 356.500000 ++output: 356.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: 0.031250 -1.000000 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: -248562.750000 23.062500 ++output: -248562.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: 456.000000 456.250000 ++output: 456.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: 34.031250 3.000000 ++output: 34.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: 45786.750000 -1.000000 ++output: 45786.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: 1752065.000000 1384.500000 ++output: 1752065.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: 107.000000 -7.000000 ++output: 107.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: -45667.250000 100.000000 ++output: -45667.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: -7.000000 -5786.500000 ++output: -5786.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: -347856.500000 1752.000000 ++output: -347856.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: 356047.500000 0.015625 ++output: 356047.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: -1.000000 0.031250 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.s :: ++input: 23.062500 -248562.750000 ++output: -248562.750000 ++fcsr: 0x200 ++roundig mode: -inf ++fmaxa.s :: ++input: 0.000000 -4578.500000 ++output: -4578.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: 456.250000 456.250000 ++output: 456.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: 3.000000 34.031250 ++output: 34.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: -1.000000 4578.750000 ++output: 4578.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: 1384.500000 175.000000 ++output: 1384.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: -7.250000 107.000000 ++output: 107.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: 1000000000.000000 -456.250000 ++output: 1000000000.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: -5786.500000 -7.250000 ++output: -5786.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: 1752.000000 -3478.500000 ++output: -3478.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: 0.015625 356.500000 ++output: 356.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: 0.031250 -1.000000 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: -248562.750000 23.062500 ++output: -248562.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: 456.000000 456.250000 ++output: 456.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: 34.031250 3.000000 ++output: 34.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: 45786.750000 -1.000000 ++output: 45786.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: 1752065.000000 1384.500000 ++output: 1752065.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: 107.000000 -7.000000 ++output: 107.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: -45667.250000 100.000000 ++output: -45667.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: -7.000000 -5786.500000 ++output: -5786.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: -347856.500000 1752.000000 ++output: -347856.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: 356047.500000 0.015625 ++output: 356047.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: -1.000000 0.031250 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.s :: ++input: 23.062500 -248562.750000 ++output: -248562.750000 ++fcsr: 0x300 ++roundig mode: near ++fmaxa.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -45786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 34.031250000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 45786.750000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -5786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 356047.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -248562.750000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 34.031250000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 45786.750000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1752065.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 107.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -5786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -347856.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 356047.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmaxa.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -248562.750000000000000 ++fcsr: 0 ++roundig mode: zero ++fmaxa.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -45786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 34.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 45786.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -5786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 356047.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -248562.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 34.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 45786.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -5786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -347856.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 356047.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmaxa.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -248562.750000000000000 ++fcsr: 0x100 ++roundig mode: +inf ++fmaxa.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -45786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 34.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 45786.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -5786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 356047.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -248562.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 34.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 45786.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -5786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -347856.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 356047.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmaxa.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -248562.750000000000000 ++fcsr: 0x200 ++roundig mode: -inf ++fmaxa.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: -45786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 34.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 45786.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -5786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 356047.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -248562.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 34.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 45786.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -5786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -347856.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 356047.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmaxa.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: -248562.750000000000000 ++fcsr: 0x300 ++roundig mode: near ++fmina.s :: ++input: 0.000000 -4578.500000 ++output: 0.000000 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: 456.250000 456.250000 ++output: 456.250000 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: 3.000000 34.031250 ++output: 3.000000 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: -1.000000 4578.750000 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: 1384.500000 175.000000 ++output: 175.000000 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: -7.250000 107.000000 ++output: -7.250000 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: 1000000000.000000 -456.250000 ++output: -456.250000 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: -5786.500000 -7.250000 ++output: -7.250000 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: 1752.000000 -3478.500000 ++output: 1752.000000 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: 0.015625 356.500000 ++output: 0.015625 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: 0.031250 -1.000000 ++output: 0.031250 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: -248562.750000 23.062500 ++output: 23.062500 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: -45786.500000 0.000000 ++output: 0.000000 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: 456.000000 456.250000 ++output: 456.000000 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: 34.031250 3.000000 ++output: 3.000000 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: 45786.750000 -1.000000 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: 1752065.000000 1384.500000 ++output: 1384.500000 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: 107.000000 -7.000000 ++output: -7.000000 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: -45667.250000 100.000000 ++output: 100.000000 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: -7.000000 -5786.500000 ++output: -7.000000 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: -347856.500000 1752.000000 ++output: 1752.000000 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: 356047.500000 0.015625 ++output: 0.015625 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: -1.000000 0.031250 ++output: 0.031250 ++fcsr: 0 ++roundig mode: near ++fmina.s :: ++input: 23.062500 -248562.750000 ++output: 23.062500 ++fcsr: 0 ++roundig mode: zero ++fmina.s :: ++input: 0.000000 -4578.500000 ++output: 0.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: 456.250000 456.250000 ++output: 456.250000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: 3.000000 34.031250 ++output: 3.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: -1.000000 4578.750000 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: 1384.500000 175.000000 ++output: 175.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: -7.250000 107.000000 ++output: -7.250000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: 1000000000.000000 -456.250000 ++output: -456.250000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: -5786.500000 -7.250000 ++output: -7.250000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: 1752.000000 -3478.500000 ++output: 1752.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: 0.015625 356.500000 ++output: 0.015625 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: 0.031250 -1.000000 ++output: 0.031250 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: -248562.750000 23.062500 ++output: 23.062500 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: -45786.500000 0.000000 ++output: 0.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: 456.000000 456.250000 ++output: 456.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: 34.031250 3.000000 ++output: 3.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: 45786.750000 -1.000000 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: 1752065.000000 1384.500000 ++output: 1384.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: 107.000000 -7.000000 ++output: -7.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: -45667.250000 100.000000 ++output: 100.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: -7.000000 -5786.500000 ++output: -7.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: -347856.500000 1752.000000 ++output: 1752.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: 356047.500000 0.015625 ++output: 0.015625 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: -1.000000 0.031250 ++output: 0.031250 ++fcsr: 0x100 ++roundig mode: zero ++fmina.s :: ++input: 23.062500 -248562.750000 ++output: 23.062500 ++fcsr: 0x100 ++roundig mode: +inf ++fmina.s :: ++input: 0.000000 -4578.500000 ++output: 0.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: 456.250000 456.250000 ++output: 456.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: 3.000000 34.031250 ++output: 3.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: -1.000000 4578.750000 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: 1384.500000 175.000000 ++output: 175.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: -7.250000 107.000000 ++output: -7.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: 1000000000.000000 -456.250000 ++output: -456.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: -5786.500000 -7.250000 ++output: -7.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: 1752.000000 -3478.500000 ++output: 1752.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: 0.015625 356.500000 ++output: 0.015625 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: 0.031250 -1.000000 ++output: 0.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: -248562.750000 23.062500 ++output: 23.062500 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: -45786.500000 0.000000 ++output: 0.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: 456.000000 456.250000 ++output: 456.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: 34.031250 3.000000 ++output: 3.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: 45786.750000 -1.000000 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: 1752065.000000 1384.500000 ++output: 1384.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: 107.000000 -7.000000 ++output: -7.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: -45667.250000 100.000000 ++output: 100.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: -7.000000 -5786.500000 ++output: -7.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: -347856.500000 1752.000000 ++output: 1752.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: 356047.500000 0.015625 ++output: 0.015625 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: -1.000000 0.031250 ++output: 0.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.s :: ++input: 23.062500 -248562.750000 ++output: 23.062500 ++fcsr: 0x200 ++roundig mode: -inf ++fmina.s :: ++input: 0.000000 -4578.500000 ++output: 0.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: 456.250000 456.250000 ++output: 456.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: 3.000000 34.031250 ++output: 3.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: -1.000000 4578.750000 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: 1384.500000 175.000000 ++output: 175.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: -7.250000 107.000000 ++output: -7.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: 1000000000.000000 -456.250000 ++output: -456.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: -5786.500000 -7.250000 ++output: -7.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: 1752.000000 -3478.500000 ++output: 1752.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: 0.015625 356.500000 ++output: 0.015625 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: 0.031250 -1.000000 ++output: 0.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: -248562.750000 23.062500 ++output: 23.062500 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: -45786.500000 0.000000 ++output: 0.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: 456.000000 456.250000 ++output: 456.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: 34.031250 3.000000 ++output: 3.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: 45786.750000 -1.000000 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: 1752065.000000 1384.500000 ++output: 1384.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: 107.000000 -7.000000 ++output: -7.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: -45667.250000 100.000000 ++output: 100.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: -7.000000 -5786.500000 ++output: -7.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: -347856.500000 1752.000000 ++output: 1752.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: 356047.500000 0.015625 ++output: 0.015625 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: -1.000000 0.031250 ++output: 0.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.s :: ++input: 23.062500 -248562.750000 ++output: 23.062500 ++fcsr: 0x300 ++roundig mode: near ++fmina.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 3.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1384.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -7.250000000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: -45667.250000000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -7.250000000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1752.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0.015625000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0.031250000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 23.062500000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 456.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1384.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: -7.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -45667.250000000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -7.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0.015625000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0.031250000000000 ++fcsr: 0 ++roundig mode: near ++fmina.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 23.062500000000000 ++fcsr: 0 ++roundig mode: zero ++fmina.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 3.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1384.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -7.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: -45667.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -7.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1752.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0.015625000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 23.062500000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 456.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1384.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: -7.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -45667.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -7.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0.015625000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmina.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 23.062500000000000 ++fcsr: 0x100 ++roundig mode: +inf ++fmina.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 3.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1384.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -7.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: -45667.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -7.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1752.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0.015625000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 23.062500000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 456.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1384.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: -7.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -45667.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -7.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0.015625000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmina.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 23.062500000000000 ++fcsr: 0x200 ++roundig mode: -inf ++fmina.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 3.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1384.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -7.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: -45667.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -7.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1752.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0.015625000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 23.062500000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 456.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1384.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: -7.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -45667.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -7.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0.015625000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmina.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 23.062500000000000 ++fcsr: 0x300 ++roundig mode: near ++fabs.s :: ++input: 0.000000 ++output: 0.000000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: 456.250000 ++output: 456.250000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: 3.000000 ++output: 3.000000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: -1.000000 ++output: 1.000000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: 1384.500000 ++output: 1384.500000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: -7.250000 ++output: 7.250000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: 1000000000.000000 ++output: 1000000000.000000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: -5786.500000 ++output: 5786.500000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: 1752.000000 ++output: 1752.000000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: 0.015625 ++output: 0.015625 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: 0.031250 ++output: 0.031250 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: -248562.750000 ++output: 248562.750000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: -45786.500000 ++output: 45786.500000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: 456.000000 ++output: 456.000000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: 34.031250 ++output: 34.031250 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: 45786.750000 ++output: 45786.750000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: 1752065.000000 ++output: 1752065.000000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: 107.000000 ++output: 107.000000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: -45667.250000 ++output: 45667.250000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: -7.000000 ++output: 7.000000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: -347856.500000 ++output: 347856.500000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: 356047.500000 ++output: 356047.500000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: -1.000000 ++output: 1.000000 ++fcsr: 0 ++roundig mode: near ++fabs.s :: ++input: 23.062500 ++output: 23.062500 ++fcsr: 0 ++roundig mode: zero ++fabs.s :: ++input: 0.000000 ++output: 0.000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: 456.250000 ++output: 456.250000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: 3.000000 ++output: 3.000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: -1.000000 ++output: 1.000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: 1384.500000 ++output: 1384.500000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: -7.250000 ++output: 7.250000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: 1000000000.000000 ++output: 1000000000.000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: -5786.500000 ++output: 5786.500000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: 1752.000000 ++output: 1752.000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: 0.015625 ++output: 0.015625 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: 0.031250 ++output: 0.031250 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: -248562.750000 ++output: 248562.750000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: -45786.500000 ++output: 45786.500000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: 456.000000 ++output: 456.000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: 34.031250 ++output: 34.031250 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: 45786.750000 ++output: 45786.750000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: 1752065.000000 ++output: 1752065.000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: 107.000000 ++output: 107.000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: -45667.250000 ++output: 45667.250000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: -7.000000 ++output: 7.000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: -347856.500000 ++output: 347856.500000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: 356047.500000 ++output: 356047.500000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: -1.000000 ++output: 1.000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.s :: ++input: 23.062500 ++output: 23.062500 ++fcsr: 0x100 ++roundig mode: +inf ++fabs.s :: ++input: 0.000000 ++output: 0.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: 456.250000 ++output: 456.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: 3.000000 ++output: 3.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: -1.000000 ++output: 1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: 1384.500000 ++output: 1384.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: -7.250000 ++output: 7.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: 1000000000.000000 ++output: 1000000000.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: -5786.500000 ++output: 5786.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: 1752.000000 ++output: 1752.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: 0.015625 ++output: 0.015625 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: 0.031250 ++output: 0.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: -248562.750000 ++output: 248562.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: -45786.500000 ++output: 45786.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: 456.000000 ++output: 456.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: 34.031250 ++output: 34.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: 45786.750000 ++output: 45786.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: 1752065.000000 ++output: 1752065.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: 107.000000 ++output: 107.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: -45667.250000 ++output: 45667.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: -7.000000 ++output: 7.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: -347856.500000 ++output: 347856.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: 356047.500000 ++output: 356047.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: -1.000000 ++output: 1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.s :: ++input: 23.062500 ++output: 23.062500 ++fcsr: 0x200 ++roundig mode: -inf ++fabs.s :: ++input: 0.000000 ++output: 0.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: 456.250000 ++output: 456.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: 3.000000 ++output: 3.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: -1.000000 ++output: 1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: 1384.500000 ++output: 1384.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: -7.250000 ++output: 7.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: 1000000000.000000 ++output: 1000000000.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: -5786.500000 ++output: 5786.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: 1752.000000 ++output: 1752.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: 0.015625 ++output: 0.015625 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: 0.031250 ++output: 0.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: -248562.750000 ++output: 248562.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: -45786.500000 ++output: 45786.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: 456.000000 ++output: 456.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: 34.031250 ++output: 34.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: 45786.750000 ++output: 45786.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: 1752065.000000 ++output: 1752065.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: 107.000000 ++output: 107.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: -45667.250000 ++output: 45667.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: -7.000000 ++output: 7.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: -347856.500000 ++output: 347856.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: 356047.500000 ++output: 356047.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: -1.000000 ++output: 1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.s :: ++input: 23.062500 ++output: 23.062500 ++fcsr: 0x300 ++roundig mode: near ++fabs.d :: ++input: 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: -1.000000000000000 ++output: 1.000000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: 1384.500000000000000 ++output: 1384.500000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: -7.250000000000000 ++output: 7.250000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: -5786.500000000000000 ++output: 5786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: 0.015625000000000 ++output: 0.015625000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: 0.031250000000000 ++output: 0.031250000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: -248562.750000000000000 ++output: 248562.750000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: -45786.500000000000000 ++output: 45786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: 456.000000000000000 ++output: 456.000000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: 34.031250000000000 ++output: 34.031250000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: 45786.750000000000000 ++output: 45786.750000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: -45667.250000000000000 ++output: 45667.250000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: -7.000000000000000 ++output: 7.000000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: -347856.500000000000000 ++output: 347856.500000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: 356047.500000000000000 ++output: 356047.500000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: -1.000000000000000 ++output: 1.000000000000000 ++fcsr: 0 ++roundig mode: near ++fabs.d :: ++input: 23.062500000000000 ++output: 23.062500000000000 ++fcsr: 0 ++roundig mode: zero ++fabs.d :: ++input: 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: -1.000000000000000 ++output: 1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: 1384.500000000000000 ++output: 1384.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: -7.250000000000000 ++output: 7.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: -5786.500000000000000 ++output: 5786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: 0.015625000000000 ++output: 0.015625000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: 0.031250000000000 ++output: 0.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: -248562.750000000000000 ++output: 248562.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: -45786.500000000000000 ++output: 45786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: 456.000000000000000 ++output: 456.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: 34.031250000000000 ++output: 34.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: 45786.750000000000000 ++output: 45786.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: -45667.250000000000000 ++output: 45667.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: -7.000000000000000 ++output: 7.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: -347856.500000000000000 ++output: 347856.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: 356047.500000000000000 ++output: 356047.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: -1.000000000000000 ++output: 1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fabs.d :: ++input: 23.062500000000000 ++output: 23.062500000000000 ++fcsr: 0x100 ++roundig mode: +inf ++fabs.d :: ++input: 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: -1.000000000000000 ++output: 1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: 1384.500000000000000 ++output: 1384.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: -7.250000000000000 ++output: 7.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: -5786.500000000000000 ++output: 5786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: 0.015625000000000 ++output: 0.015625000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: 0.031250000000000 ++output: 0.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: -248562.750000000000000 ++output: 248562.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: -45786.500000000000000 ++output: 45786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: 456.000000000000000 ++output: 456.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: 34.031250000000000 ++output: 34.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: 45786.750000000000000 ++output: 45786.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: -45667.250000000000000 ++output: 45667.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: -7.000000000000000 ++output: 7.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: -347856.500000000000000 ++output: 347856.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: 356047.500000000000000 ++output: 356047.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: -1.000000000000000 ++output: 1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fabs.d :: ++input: 23.062500000000000 ++output: 23.062500000000000 ++fcsr: 0x200 ++roundig mode: -inf ++fabs.d :: ++input: 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: -1.000000000000000 ++output: 1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: 1384.500000000000000 ++output: 1384.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: -7.250000000000000 ++output: 7.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: -5786.500000000000000 ++output: 5786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: 0.015625000000000 ++output: 0.015625000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: 0.031250000000000 ++output: 0.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: -248562.750000000000000 ++output: 248562.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: -45786.500000000000000 ++output: 45786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: 456.000000000000000 ++output: 456.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: 34.031250000000000 ++output: 34.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: 45786.750000000000000 ++output: 45786.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: -45667.250000000000000 ++output: 45667.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: -7.000000000000000 ++output: 7.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: -347856.500000000000000 ++output: 347856.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: 356047.500000000000000 ++output: 356047.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: -1.000000000000000 ++output: 1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fabs.d :: ++input: 23.062500000000000 ++output: 23.062500000000000 ++fcsr: 0x300 ++roundig mode: near ++fneg.s :: ++input: 0.000000 ++output: -0.000000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: 456.250000 ++output: -456.250000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: 3.000000 ++output: -3.000000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: -1.000000 ++output: 1.000000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: 1384.500000 ++output: -1384.500000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: -7.250000 ++output: 7.250000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: 1000000000.000000 ++output: -1000000000.000000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: -5786.500000 ++output: 5786.500000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: 1752.000000 ++output: -1752.000000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: 0.015625 ++output: -0.015625 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: 0.031250 ++output: -0.031250 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: -248562.750000 ++output: 248562.750000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: -45786.500000 ++output: 45786.500000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: 456.000000 ++output: -456.000000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: 34.031250 ++output: -34.031250 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: 45786.750000 ++output: -45786.750000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: 1752065.000000 ++output: -1752065.000000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: 107.000000 ++output: -107.000000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: -45667.250000 ++output: 45667.250000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: -7.000000 ++output: 7.000000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: -347856.500000 ++output: 347856.500000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: 356047.500000 ++output: -356047.500000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: -1.000000 ++output: 1.000000 ++fcsr: 0 ++roundig mode: near ++fneg.s :: ++input: 23.062500 ++output: -23.062500 ++fcsr: 0 ++roundig mode: zero ++fneg.s :: ++input: 0.000000 ++output: -0.000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: 456.250000 ++output: -456.250000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: 3.000000 ++output: -3.000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: -1.000000 ++output: 1.000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: 1384.500000 ++output: -1384.500000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: -7.250000 ++output: 7.250000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: 1000000000.000000 ++output: -1000000000.000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: -5786.500000 ++output: 5786.500000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: 1752.000000 ++output: -1752.000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: 0.015625 ++output: -0.015625 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: 0.031250 ++output: -0.031250 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: -248562.750000 ++output: 248562.750000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: -45786.500000 ++output: 45786.500000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: 456.000000 ++output: -456.000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: 34.031250 ++output: -34.031250 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: 45786.750000 ++output: -45786.750000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: 1752065.000000 ++output: -1752065.000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: 107.000000 ++output: -107.000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: -45667.250000 ++output: 45667.250000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: -7.000000 ++output: 7.000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: -347856.500000 ++output: 347856.500000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: 356047.500000 ++output: -356047.500000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: -1.000000 ++output: 1.000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.s :: ++input: 23.062500 ++output: -23.062500 ++fcsr: 0x100 ++roundig mode: +inf ++fneg.s :: ++input: 0.000000 ++output: -0.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: 456.250000 ++output: -456.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: 3.000000 ++output: -3.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: -1.000000 ++output: 1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: 1384.500000 ++output: -1384.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: -7.250000 ++output: 7.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: 1000000000.000000 ++output: -1000000000.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: -5786.500000 ++output: 5786.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: 1752.000000 ++output: -1752.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: 0.015625 ++output: -0.015625 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: 0.031250 ++output: -0.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: -248562.750000 ++output: 248562.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: -45786.500000 ++output: 45786.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: 456.000000 ++output: -456.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: 34.031250 ++output: -34.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: 45786.750000 ++output: -45786.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: 1752065.000000 ++output: -1752065.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: 107.000000 ++output: -107.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: -45667.250000 ++output: 45667.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: -7.000000 ++output: 7.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: -347856.500000 ++output: 347856.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: 356047.500000 ++output: -356047.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: -1.000000 ++output: 1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.s :: ++input: 23.062500 ++output: -23.062500 ++fcsr: 0x200 ++roundig mode: -inf ++fneg.s :: ++input: 0.000000 ++output: -0.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: 456.250000 ++output: -456.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: 3.000000 ++output: -3.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: -1.000000 ++output: 1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: 1384.500000 ++output: -1384.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: -7.250000 ++output: 7.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: 1000000000.000000 ++output: -1000000000.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: -5786.500000 ++output: 5786.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: 1752.000000 ++output: -1752.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: 0.015625 ++output: -0.015625 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: 0.031250 ++output: -0.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: -248562.750000 ++output: 248562.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: -45786.500000 ++output: 45786.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: 456.000000 ++output: -456.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: 34.031250 ++output: -34.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: 45786.750000 ++output: -45786.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: 1752065.000000 ++output: -1752065.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: 107.000000 ++output: -107.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: -45667.250000 ++output: 45667.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: -7.000000 ++output: 7.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: -347856.500000 ++output: 347856.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: 356047.500000 ++output: -356047.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: -1.000000 ++output: 1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.s :: ++input: 23.062500 ++output: -23.062500 ++fcsr: 0x300 ++roundig mode: near ++fneg.d :: ++input: 0.000000000000000 ++output: -0.000000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: 456.250000000000000 ++output: -456.250000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: 3.000000000000000 ++output: -3.000000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: -1.000000000000000 ++output: 1.000000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: 1384.500000000000000 ++output: -1384.500000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: -7.250000000000000 ++output: 7.250000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: 1000000000.000000000000000 ++output: -1000000000.000000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: -5786.500000000000000 ++output: 5786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: 1752.000000000000000 ++output: -1752.000000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: 0.015625000000000 ++output: -0.015625000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: 0.031250000000000 ++output: -0.031250000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: -248562.750000000000000 ++output: 248562.750000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: -45786.500000000000000 ++output: 45786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: 456.000000000000000 ++output: -456.000000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: 34.031250000000000 ++output: -34.031250000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: 45786.750000000000000 ++output: -45786.750000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: 1752065.000000000000000 ++output: -1752065.000000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: 107.000000000000000 ++output: -107.000000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: -45667.250000000000000 ++output: 45667.250000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: -7.000000000000000 ++output: 7.000000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: -347856.500000000000000 ++output: 347856.500000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: 356047.500000000000000 ++output: -356047.500000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: -1.000000000000000 ++output: 1.000000000000000 ++fcsr: 0 ++roundig mode: near ++fneg.d :: ++input: 23.062500000000000 ++output: -23.062500000000000 ++fcsr: 0 ++roundig mode: zero ++fneg.d :: ++input: 0.000000000000000 ++output: -0.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: 456.250000000000000 ++output: -456.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: 3.000000000000000 ++output: -3.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: -1.000000000000000 ++output: 1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: 1384.500000000000000 ++output: -1384.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: -7.250000000000000 ++output: 7.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: 1000000000.000000000000000 ++output: -1000000000.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: -5786.500000000000000 ++output: 5786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: 1752.000000000000000 ++output: -1752.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: 0.015625000000000 ++output: -0.015625000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: 0.031250000000000 ++output: -0.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: -248562.750000000000000 ++output: 248562.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: -45786.500000000000000 ++output: 45786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: 456.000000000000000 ++output: -456.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: 34.031250000000000 ++output: -34.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: 45786.750000000000000 ++output: -45786.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: 1752065.000000000000000 ++output: -1752065.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: 107.000000000000000 ++output: -107.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: -45667.250000000000000 ++output: 45667.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: -7.000000000000000 ++output: 7.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: -347856.500000000000000 ++output: 347856.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: 356047.500000000000000 ++output: -356047.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: -1.000000000000000 ++output: 1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fneg.d :: ++input: 23.062500000000000 ++output: -23.062500000000000 ++fcsr: 0x100 ++roundig mode: +inf ++fneg.d :: ++input: 0.000000000000000 ++output: -0.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: 456.250000000000000 ++output: -456.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: 3.000000000000000 ++output: -3.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: -1.000000000000000 ++output: 1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: 1384.500000000000000 ++output: -1384.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: -7.250000000000000 ++output: 7.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: 1000000000.000000000000000 ++output: -1000000000.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: -5786.500000000000000 ++output: 5786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: 1752.000000000000000 ++output: -1752.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: 0.015625000000000 ++output: -0.015625000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: 0.031250000000000 ++output: -0.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: -248562.750000000000000 ++output: 248562.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: -45786.500000000000000 ++output: 45786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: 456.000000000000000 ++output: -456.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: 34.031250000000000 ++output: -34.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: 45786.750000000000000 ++output: -45786.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: 1752065.000000000000000 ++output: -1752065.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: 107.000000000000000 ++output: -107.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: -45667.250000000000000 ++output: 45667.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: -7.000000000000000 ++output: 7.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: -347856.500000000000000 ++output: 347856.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: 356047.500000000000000 ++output: -356047.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: -1.000000000000000 ++output: 1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fneg.d :: ++input: 23.062500000000000 ++output: -23.062500000000000 ++fcsr: 0x200 ++roundig mode: -inf ++fneg.d :: ++input: 0.000000000000000 ++output: -0.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: 456.250000000000000 ++output: -456.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: 3.000000000000000 ++output: -3.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: -1.000000000000000 ++output: 1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: 1384.500000000000000 ++output: -1384.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: -7.250000000000000 ++output: 7.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: 1000000000.000000000000000 ++output: -1000000000.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: -5786.500000000000000 ++output: 5786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: 1752.000000000000000 ++output: -1752.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: 0.015625000000000 ++output: -0.015625000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: 0.031250000000000 ++output: -0.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: -248562.750000000000000 ++output: 248562.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: -45786.500000000000000 ++output: 45786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: 456.000000000000000 ++output: -456.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: 34.031250000000000 ++output: -34.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: 45786.750000000000000 ++output: -45786.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: 1752065.000000000000000 ++output: -1752065.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: 107.000000000000000 ++output: -107.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: -45667.250000000000000 ++output: 45667.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: -7.000000000000000 ++output: 7.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: -347856.500000000000000 ++output: 347856.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: 356047.500000000000000 ++output: -356047.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: -1.000000000000000 ++output: 1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fneg.d :: ++input: 23.062500000000000 ++output: -23.062500000000000 ++fcsr: 0x300 ++roundig mode: near ++fsqrt.s :: ++input: 0.000000 ++output: 0.000000 ++fcsr: 0 ++roundig mode: near ++fsqrt.s :: ++input: 456.250000 ++output: 21.360010 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.s :: ++input: 3.000000 ++output: 1.732051 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++fsqrt.s :: ++input: 1384.500000 ++output: 37.208870 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.s :: ++input: -7.250000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++fsqrt.s :: ++input: 1000000000.000000 ++output: 31622.777344 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.s :: ++input: -5786.500000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++fsqrt.s :: ++input: 1752.000000 ++output: 41.856899 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.s :: ++input: 0.015625 ++output: 0.125000 ++fcsr: 0 ++roundig mode: near ++fsqrt.s :: ++input: 0.031250 ++output: 0.176777 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.s :: ++input: -248562.750000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++fsqrt.s :: ++input: -45786.500000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++fsqrt.s :: ++input: 456.000000 ++output: 21.354156 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.s :: ++input: 34.031250 ++output: 5.833631 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.s :: ++input: 45786.750000 ++output: 213.978394 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.s :: ++input: 1752065.000000 ++output: 1323.655884 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.s :: ++input: 107.000000 ++output: 10.344080 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.s :: ++input: -45667.250000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++fsqrt.s :: ++input: -7.000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++fsqrt.s :: ++input: -347856.500000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++fsqrt.s :: ++input: 356047.500000 ++output: 596.697144 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++fsqrt.s :: ++input: 23.062500 ++output: 4.802343 ++fcsr: 0x1010000 ++roundig mode: zero ++fsqrt.s :: ++input: 0.000000 ++output: 0.000000 ++fcsr: 0x100 ++roundig mode: zero ++fsqrt.s :: ++input: 456.250000 ++output: 21.360008 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.s :: ++input: 3.000000 ++output: 1.732050 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++fsqrt.s :: ++input: 1384.500000 ++output: 37.208866 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.s :: ++input: -7.250000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++fsqrt.s :: ++input: 1000000000.000000 ++output: 31622.775390 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.s :: ++input: -5786.500000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++fsqrt.s :: ++input: 1752.000000 ++output: 41.856895 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.s :: ++input: 0.015625 ++output: 0.125000 ++fcsr: 0x100 ++roundig mode: zero ++fsqrt.s :: ++input: 0.031250 ++output: 0.176776 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.s :: ++input: -248562.750000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++fsqrt.s :: ++input: -45786.500000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++fsqrt.s :: ++input: 456.000000 ++output: 21.354156 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.s :: ++input: 34.031250 ++output: 5.833630 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.s :: ++input: 45786.750000 ++output: 213.978378 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.s :: ++input: 1752065.000000 ++output: 1323.655883 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.s :: ++input: 107.000000 ++output: 10.344079 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.s :: ++input: -45667.250000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++fsqrt.s :: ++input: -7.000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++fsqrt.s :: ++input: -347856.500000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++fsqrt.s :: ++input: 356047.500000 ++output: 596.697143 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++fsqrt.s :: ++input: 23.062500 ++output: 4.802342 ++fcsr: 0x1010100 ++roundig mode: +inf ++fsqrt.s :: ++input: 0.000000 ++output: 0.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsqrt.s :: ++input: 456.250000 ++output: 21.360011 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.s :: ++input: 3.000000 ++output: 1.732051 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++fsqrt.s :: ++input: 1384.500000 ++output: 37.208870 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.s :: ++input: -7.250000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++fsqrt.s :: ++input: 1000000000.000000 ++output: 31622.777344 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.s :: ++input: -5786.500000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++fsqrt.s :: ++input: 1752.000000 ++output: 41.856900 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.s :: ++input: 0.015625 ++output: 0.125000 ++fcsr: 0x200 ++roundig mode: +inf ++fsqrt.s :: ++input: 0.031250 ++output: 0.176777 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.s :: ++input: -248562.750000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++fsqrt.s :: ++input: -45786.500000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++fsqrt.s :: ++input: 456.000000 ++output: 21.354159 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.s :: ++input: 34.031250 ++output: 5.833632 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.s :: ++input: 45786.750000 ++output: 213.978394 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.s :: ++input: 1752065.000000 ++output: 1323.656006 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.s :: ++input: 107.000000 ++output: 10.344081 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.s :: ++input: -45667.250000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++fsqrt.s :: ++input: -7.000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++fsqrt.s :: ++input: -347856.500000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++fsqrt.s :: ++input: 356047.500000 ++output: 596.697205 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++fsqrt.s :: ++input: 23.062500 ++output: 4.802344 ++fcsr: 0x1010200 ++roundig mode: -inf ++fsqrt.s :: ++input: 0.000000 ++output: 0.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsqrt.s :: ++input: 456.250000 ++output: 21.360008 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.s :: ++input: 3.000000 ++output: 1.732050 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++fsqrt.s :: ++input: 1384.500000 ++output: 37.208866 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.s :: ++input: -7.250000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++fsqrt.s :: ++input: 1000000000.000000 ++output: 31622.775390 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.s :: ++input: -5786.500000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++fsqrt.s :: ++input: 1752.000000 ++output: 41.856895 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.s :: ++input: 0.015625 ++output: 0.125000 ++fcsr: 0x300 ++roundig mode: -inf ++fsqrt.s :: ++input: 0.031250 ++output: 0.176776 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.s :: ++input: -248562.750000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++fsqrt.s :: ++input: -45786.500000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++fsqrt.s :: ++input: 456.000000 ++output: 21.354156 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.s :: ++input: 34.031250 ++output: 5.833630 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.s :: ++input: 45786.750000 ++output: 213.978378 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.s :: ++input: 1752065.000000 ++output: 1323.655883 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.s :: ++input: 107.000000 ++output: 10.344079 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.s :: ++input: -45667.250000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++fsqrt.s :: ++input: -7.000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++fsqrt.s :: ++input: -347856.500000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++fsqrt.s :: ++input: 356047.500000 ++output: 596.697143 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++fsqrt.s :: ++input: 23.062500 ++output: 4.802342 ++fcsr: 0x1010300 ++roundig mode: near ++fsqrt.d :: ++input: 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0 ++roundig mode: near ++fsqrt.d :: ++input: 456.250000000000000 ++output: 21.360009363293827 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.d :: ++input: 3.000000000000000 ++output: 1.732050807568877 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++fsqrt.d :: ++input: 1384.500000000000000 ++output: 37.208869910278111 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.d :: ++input: -7.250000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++fsqrt.d :: ++input: 1000000000.000000000000000 ++output: 31622.776601683792251 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.d :: ++input: -5786.500000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++fsqrt.d :: ++input: 1752.000000000000000 ++output: 41.856899072912697 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.d :: ++input: 0.015625000000000 ++output: 0.125000000000000 ++fcsr: 0 ++roundig mode: near ++fsqrt.d :: ++input: 0.031250000000000 ++output: 0.176776695296637 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.d :: ++input: -248562.750000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++fsqrt.d :: ++input: -45786.500000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++fsqrt.d :: ++input: 456.000000000000000 ++output: 21.354156504062622 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.d :: ++input: 34.031250000000000 ++output: 5.833630944789017 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.d :: ++input: 45786.750000000000000 ++output: 213.978386759036965 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.d :: ++input: 1752065.000000000000000 ++output: 1323.655922058296937 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.d :: ++input: 107.000000000000000 ++output: 10.344080432788601 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.d :: ++input: -45667.250000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++fsqrt.d :: ++input: -7.000000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++fsqrt.d :: ++input: -347856.500000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++fsqrt.d :: ++input: 356047.500000000000000 ++output: 596.697159369809583 ++fcsr: 0x1010000 ++roundig mode: near ++fsqrt.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++fsqrt.d :: ++input: 23.062500000000000 ++output: 4.802343178074636 ++fcsr: 0x1010000 ++roundig mode: zero ++fsqrt.d :: ++input: 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsqrt.d :: ++input: 456.250000000000000 ++output: 21.360009363293826 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.d :: ++input: 3.000000000000000 ++output: 1.732050807568877 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++fsqrt.d :: ++input: 1384.500000000000000 ++output: 37.208869910278110 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.d :: ++input: -7.250000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++fsqrt.d :: ++input: 1000000000.000000000000000 ++output: 31622.776601683792250 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.d :: ++input: -5786.500000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++fsqrt.d :: ++input: 1752.000000000000000 ++output: 41.856899072912696 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.d :: ++input: 0.015625000000000 ++output: 0.125000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsqrt.d :: ++input: 0.031250000000000 ++output: 0.176776695296636 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.d :: ++input: -248562.750000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++fsqrt.d :: ++input: -45786.500000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++fsqrt.d :: ++input: 456.000000000000000 ++output: 21.354156504062618 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.d :: ++input: 34.031250000000000 ++output: 5.833630944789016 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.d :: ++input: 45786.750000000000000 ++output: 213.978386759036936 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.d :: ++input: 1752065.000000000000000 ++output: 1323.655922058296710 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.d :: ++input: 107.000000000000000 ++output: 10.344080432788599 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.d :: ++input: -45667.250000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++fsqrt.d :: ++input: -7.000000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++fsqrt.d :: ++input: -347856.500000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++fsqrt.d :: ++input: 356047.500000000000000 ++output: 596.697159369809583 ++fcsr: 0x1010100 ++roundig mode: zero ++fsqrt.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++fsqrt.d :: ++input: 23.062500000000000 ++output: 4.802343178074636 ++fcsr: 0x1010100 ++roundig mode: +inf ++fsqrt.d :: ++input: 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsqrt.d :: ++input: 456.250000000000000 ++output: 21.360009363293831 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.d :: ++input: 3.000000000000000 ++output: 1.732050807568878 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++fsqrt.d :: ++input: 1384.500000000000000 ++output: 37.208869910278118 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.d :: ++input: -7.250000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++fsqrt.d :: ++input: 1000000000.000000000000000 ++output: 31622.776601683795889 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.d :: ++input: -5786.500000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++fsqrt.d :: ++input: 1752.000000000000000 ++output: 41.856899072912704 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.d :: ++input: 0.015625000000000 ++output: 0.125000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsqrt.d :: ++input: 0.031250000000000 ++output: 0.176776695296637 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.d :: ++input: -248562.750000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++fsqrt.d :: ++input: -45786.500000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++fsqrt.d :: ++input: 456.000000000000000 ++output: 21.354156504062623 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.d :: ++input: 34.031250000000000 ++output: 5.833630944789018 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.d :: ++input: 45786.750000000000000 ++output: 213.978386759036966 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.d :: ++input: 1752065.000000000000000 ++output: 1323.655922058296938 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.d :: ++input: 107.000000000000000 ++output: 10.344080432788602 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.d :: ++input: -45667.250000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++fsqrt.d :: ++input: -7.000000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++fsqrt.d :: ++input: -347856.500000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++fsqrt.d :: ++input: 356047.500000000000000 ++output: 596.697159369809697 ++fcsr: 0x1010200 ++roundig mode: +inf ++fsqrt.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++fsqrt.d :: ++input: 23.062500000000000 ++output: 4.802343178074638 ++fcsr: 0x1010200 ++roundig mode: -inf ++fsqrt.d :: ++input: 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsqrt.d :: ++input: 456.250000000000000 ++output: 21.360009363293826 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.d :: ++input: 3.000000000000000 ++output: 1.732050807568877 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++fsqrt.d :: ++input: 1384.500000000000000 ++output: 37.208869910278110 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.d :: ++input: -7.250000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++fsqrt.d :: ++input: 1000000000.000000000000000 ++output: 31622.776601683792250 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.d :: ++input: -5786.500000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++fsqrt.d :: ++input: 1752.000000000000000 ++output: 41.856899072912696 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.d :: ++input: 0.015625000000000 ++output: 0.125000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsqrt.d :: ++input: 0.031250000000000 ++output: 0.176776695296636 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.d :: ++input: -248562.750000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++fsqrt.d :: ++input: -45786.500000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++fsqrt.d :: ++input: 456.000000000000000 ++output: 21.354156504062618 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.d :: ++input: 34.031250000000000 ++output: 5.833630944789016 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.d :: ++input: 45786.750000000000000 ++output: 213.978386759036936 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.d :: ++input: 1752065.000000000000000 ++output: 1323.655922058296710 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.d :: ++input: 107.000000000000000 ++output: 10.344080432788599 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.d :: ++input: -45667.250000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++fsqrt.d :: ++input: -7.000000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++fsqrt.d :: ++input: -347856.500000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++fsqrt.d :: ++input: 356047.500000000000000 ++output: 596.697159369809583 ++fcsr: 0x1010300 ++roundig mode: -inf ++fsqrt.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++fsqrt.d :: ++input: 23.062500000000000 ++output: 4.802343178074636 ++fcsr: 0x1010300 ++roundig mode: near ++frecip.s :: ++input: 0.000000 ++output: inf ++fcsr: 0x8080000 ++roundig mode: near ++frecip.s :: ++input: 456.250000 ++output: 0.002192 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.s :: ++input: 3.000000 ++output: 0.333333 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++frecip.s :: ++input: 1384.500000 ++output: 0.000722 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.s :: ++input: -7.250000 ++output: -0.137931 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.s :: ++input: 1000000000.000000 ++output: 0.000000 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.s :: ++input: -5786.500000 ++output: -0.000173 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.s :: ++input: 1752.000000 ++output: 0.000571 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.s :: ++input: 0.015625 ++output: 64.000000 ++fcsr: 0 ++roundig mode: near ++frecip.s :: ++input: 0.031250 ++output: 32.000000 ++fcsr: 0 ++roundig mode: near ++frecip.s :: ++input: -248562.750000 ++output: -0.000004 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.s :: ++input: -45786.500000 ++output: -0.000022 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.s :: ++input: 456.000000 ++output: 0.002193 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.s :: ++input: 34.031250 ++output: 0.029385 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.s :: ++input: 45786.750000 ++output: 0.000022 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.s :: ++input: 1752065.000000 ++output: 0.000001 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.s :: ++input: 107.000000 ++output: 0.009346 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.s :: ++input: -45667.250000 ++output: -0.000022 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.s :: ++input: -7.000000 ++output: -0.142857 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.s :: ++input: -347856.500000 ++output: -0.000003 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.s :: ++input: 356047.500000 ++output: 0.000003 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++frecip.s :: ++input: 23.062500 ++output: 0.043360 ++fcsr: 0x1010000 ++roundig mode: zero ++frecip.s :: ++input: 0.000000 ++output: inf ++fcsr: 0x8080100 ++roundig mode: zero ++frecip.s :: ++input: 456.250000 ++output: 0.002191 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.s :: ++input: 3.000000 ++output: 0.333333 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++frecip.s :: ++input: 1384.500000 ++output: 0.000722 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.s :: ++input: -7.250000 ++output: -0.137931 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.s :: ++input: 1000000000.000000 ++output: 0.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.s :: ++input: -5786.500000 ++output: -0.000172 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.s :: ++input: 1752.000000 ++output: 0.000570 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.s :: ++input: 0.015625 ++output: 64.000000 ++fcsr: 0x100 ++roundig mode: zero ++frecip.s :: ++input: 0.031250 ++output: 32.000000 ++fcsr: 0x100 ++roundig mode: zero ++frecip.s :: ++input: -248562.750000 ++output: -0.000004 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.s :: ++input: -45786.500000 ++output: -0.000021 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.s :: ++input: 456.000000 ++output: 0.002192 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.s :: ++input: 34.031250 ++output: 0.029384 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.s :: ++input: 45786.750000 ++output: 0.000021 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.s :: ++input: 1752065.000000 ++output: 0.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.s :: ++input: 107.000000 ++output: 0.009345 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.s :: ++input: -45667.250000 ++output: -0.000021 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.s :: ++input: -7.000000 ++output: -0.142857 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.s :: ++input: -347856.500000 ++output: -0.000002 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.s :: ++input: 356047.500000 ++output: 0.000002 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++frecip.s :: ++input: 23.062500 ++output: 0.043360 ++fcsr: 0x1010100 ++roundig mode: +inf ++frecip.s :: ++input: 0.000000 ++output: inf ++fcsr: 0x8080200 ++roundig mode: +inf ++frecip.s :: ++input: 456.250000 ++output: 0.002192 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.s :: ++input: 3.000000 ++output: 0.333334 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++frecip.s :: ++input: 1384.500000 ++output: 0.000723 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.s :: ++input: -7.250000 ++output: -0.137931 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.s :: ++input: 1000000000.000000 ++output: 0.000001 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.s :: ++input: -5786.500000 ++output: -0.000172 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.s :: ++input: 1752.000000 ++output: 0.000571 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.s :: ++input: 0.015625 ++output: 64.000000 ++fcsr: 0x200 ++roundig mode: +inf ++frecip.s :: ++input: 0.031250 ++output: 32.000000 ++fcsr: 0x200 ++roundig mode: +inf ++frecip.s :: ++input: -248562.750000 ++output: -0.000004 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.s :: ++input: -45786.500000 ++output: -0.000021 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.s :: ++input: 456.000000 ++output: 0.002193 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.s :: ++input: 34.031250 ++output: 0.029385 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.s :: ++input: 45786.750000 ++output: 0.000022 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.s :: ++input: 1752065.000000 ++output: 0.000001 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.s :: ++input: 107.000000 ++output: 0.009346 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.s :: ++input: -45667.250000 ++output: -0.000021 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.s :: ++input: -7.000000 ++output: -0.142857 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.s :: ++input: -347856.500000 ++output: -0.000002 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.s :: ++input: 356047.500000 ++output: 0.000003 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++frecip.s :: ++input: 23.062500 ++output: 0.043361 ++fcsr: 0x1010200 ++roundig mode: -inf ++frecip.s :: ++input: 0.000000 ++output: inf ++fcsr: 0x8080300 ++roundig mode: -inf ++frecip.s :: ++input: 456.250000 ++output: 0.002191 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.s :: ++input: 3.000000 ++output: 0.333333 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++frecip.s :: ++input: 1384.500000 ++output: 0.000722 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.s :: ++input: -7.250000 ++output: -0.137932 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.s :: ++input: 1000000000.000000 ++output: 0.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.s :: ++input: -5786.500000 ++output: -0.000173 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.s :: ++input: 1752.000000 ++output: 0.000570 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.s :: ++input: 0.015625 ++output: 64.000000 ++fcsr: 0x300 ++roundig mode: -inf ++frecip.s :: ++input: 0.031250 ++output: 32.000000 ++fcsr: 0x300 ++roundig mode: -inf ++frecip.s :: ++input: -248562.750000 ++output: -0.000005 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.s :: ++input: -45786.500000 ++output: -0.000022 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.s :: ++input: 456.000000 ++output: 0.002192 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.s :: ++input: 34.031250 ++output: 0.029384 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.s :: ++input: 45786.750000 ++output: 0.000021 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.s :: ++input: 1752065.000000 ++output: 0.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.s :: ++input: 107.000000 ++output: 0.009345 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.s :: ++input: -45667.250000 ++output: -0.000022 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.s :: ++input: -7.000000 ++output: -0.142858 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.s :: ++input: -347856.500000 ++output: -0.000003 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.s :: ++input: 356047.500000 ++output: 0.000002 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++frecip.s :: ++input: 23.062500 ++output: 0.043360 ++fcsr: 0x1010300 ++roundig mode: near ++frecip.d :: ++input: 0.000000000000000 ++output: inf ++fcsr: 0x8080000 ++roundig mode: near ++frecip.d :: ++input: 456.250000000000000 ++output: 0.002191780821918 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.d :: ++input: 3.000000000000000 ++output: 0.333333333333333 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++frecip.d :: ++input: 1384.500000000000000 ++output: 0.000722282412423 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.d :: ++input: -7.250000000000000 ++output: -0.137931034482759 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.d :: ++input: 1000000000.000000000000000 ++output: 0.000000001000000 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.d :: ++input: -5786.500000000000000 ++output: -0.000172816037328 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.d :: ++input: 1752.000000000000000 ++output: 0.000570776255708 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.d :: ++input: 0.015625000000000 ++output: 64.000000000000000 ++fcsr: 0 ++roundig mode: near ++frecip.d :: ++input: 0.031250000000000 ++output: 32.000000000000000 ++fcsr: 0 ++roundig mode: near ++frecip.d :: ++input: -248562.750000000000000 ++output: -0.000004023128968 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.d :: ++input: -45786.500000000000000 ++output: -0.000021840498837 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.d :: ++input: 456.000000000000000 ++output: 0.002192982456140 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.d :: ++input: 34.031250000000000 ++output: 0.029384756657484 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.d :: ++input: 45786.750000000000000 ++output: 0.000021840379586 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.d :: ++input: 1752065.000000000000000 ++output: 0.000000570755080 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.d :: ++input: 107.000000000000000 ++output: 0.009345794392523 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.d :: ++input: -45667.250000000000000 ++output: -0.000021897530506 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.d :: ++input: -7.000000000000000 ++output: -0.142857142857143 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.d :: ++input: -347856.500000000000000 ++output: -0.000002874748639 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.d :: ++input: 356047.500000000000000 ++output: 0.000002808614019 ++fcsr: 0x1010000 ++roundig mode: near ++frecip.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++frecip.d :: ++input: 23.062500000000000 ++output: 0.043360433604336 ++fcsr: 0x1010000 ++roundig mode: zero ++frecip.d :: ++input: 0.000000000000000 ++output: inf ++fcsr: 0x8080100 ++roundig mode: zero ++frecip.d :: ++input: 456.250000000000000 ++output: 0.002191780821917 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.d :: ++input: 3.000000000000000 ++output: 0.333333333333333 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++frecip.d :: ++input: 1384.500000000000000 ++output: 0.000722282412423 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.d :: ++input: -7.250000000000000 ++output: -0.137931034482758 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.d :: ++input: 1000000000.000000000000000 ++output: 0.000000000999999 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.d :: ++input: -5786.500000000000000 ++output: -0.000172816037328 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.d :: ++input: 1752.000000000000000 ++output: 0.000570776255707 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.d :: ++input: 0.015625000000000 ++output: 64.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++frecip.d :: ++input: 0.031250000000000 ++output: 32.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++frecip.d :: ++input: -248562.750000000000000 ++output: -0.000004023128968 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.d :: ++input: -45786.500000000000000 ++output: -0.000021840498836 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.d :: ++input: 456.000000000000000 ++output: 0.002192982456140 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.d :: ++input: 34.031250000000000 ++output: 0.029384756657483 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.d :: ++input: 45786.750000000000000 ++output: 0.000021840379585 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.d :: ++input: 1752065.000000000000000 ++output: 0.000000570755080 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.d :: ++input: 107.000000000000000 ++output: 0.009345794392523 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.d :: ++input: -45667.250000000000000 ++output: -0.000021897530505 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.d :: ++input: -7.000000000000000 ++output: -0.142857142857142 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.d :: ++input: -347856.500000000000000 ++output: -0.000002874748639 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.d :: ++input: 356047.500000000000000 ++output: 0.000002808614019 ++fcsr: 0x1010100 ++roundig mode: zero ++frecip.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++frecip.d :: ++input: 23.062500000000000 ++output: 0.043360433604336 ++fcsr: 0x1010100 ++roundig mode: +inf ++frecip.d :: ++input: 0.000000000000000 ++output: inf ++fcsr: 0x8080200 ++roundig mode: +inf ++frecip.d :: ++input: 456.250000000000000 ++output: 0.002191780821918 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.d :: ++input: 3.000000000000000 ++output: 0.333333333333334 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++frecip.d :: ++input: 1384.500000000000000 ++output: 0.000722282412424 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.d :: ++input: -7.250000000000000 ++output: -0.137931034482758 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.d :: ++input: 1000000000.000000000000000 ++output: 0.000000001000001 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.d :: ++input: -5786.500000000000000 ++output: -0.000172816037328 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.d :: ++input: 1752.000000000000000 ++output: 0.000570776255708 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.d :: ++input: 0.015625000000000 ++output: 64.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++frecip.d :: ++input: 0.031250000000000 ++output: 32.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++frecip.d :: ++input: -248562.750000000000000 ++output: -0.000004023128968 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.d :: ++input: -45786.500000000000000 ++output: -0.000021840498836 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.d :: ++input: 456.000000000000000 ++output: 0.002192982456141 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.d :: ++input: 34.031250000000000 ++output: 0.029384756657484 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.d :: ++input: 45786.750000000000000 ++output: 0.000021840379586 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.d :: ++input: 1752065.000000000000000 ++output: 0.000000570755081 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.d :: ++input: 107.000000000000000 ++output: 0.009345794392524 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.d :: ++input: -45667.250000000000000 ++output: -0.000021897530505 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.d :: ++input: -7.000000000000000 ++output: -0.142857142857142 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.d :: ++input: -347856.500000000000000 ++output: -0.000002874748639 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.d :: ++input: 356047.500000000000000 ++output: 0.000002808614020 ++fcsr: 0x1010200 ++roundig mode: +inf ++frecip.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++frecip.d :: ++input: 23.062500000000000 ++output: 0.043360433604337 ++fcsr: 0x1010200 ++roundig mode: -inf ++frecip.d :: ++input: 0.000000000000000 ++output: inf ++fcsr: 0x8080300 ++roundig mode: -inf ++frecip.d :: ++input: 456.250000000000000 ++output: 0.002191780821917 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.d :: ++input: 3.000000000000000 ++output: 0.333333333333333 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++frecip.d :: ++input: 1384.500000000000000 ++output: 0.000722282412423 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.d :: ++input: -7.250000000000000 ++output: -0.137931034482759 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.d :: ++input: 1000000000.000000000000000 ++output: 0.000000000999999 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.d :: ++input: -5786.500000000000000 ++output: -0.000172816037329 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.d :: ++input: 1752.000000000000000 ++output: 0.000570776255707 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.d :: ++input: 0.015625000000000 ++output: 64.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++frecip.d :: ++input: 0.031250000000000 ++output: 32.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++frecip.d :: ++input: -248562.750000000000000 ++output: -0.000004023128969 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.d :: ++input: -45786.500000000000000 ++output: -0.000021840498837 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.d :: ++input: 456.000000000000000 ++output: 0.002192982456140 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.d :: ++input: 34.031250000000000 ++output: 0.029384756657483 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.d :: ++input: 45786.750000000000000 ++output: 0.000021840379585 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.d :: ++input: 1752065.000000000000000 ++output: 0.000000570755080 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.d :: ++input: 107.000000000000000 ++output: 0.009345794392523 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.d :: ++input: -45667.250000000000000 ++output: -0.000021897530506 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.d :: ++input: -7.000000000000000 ++output: -0.142857142857143 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.d :: ++input: -347856.500000000000000 ++output: -0.000002874748640 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.d :: ++input: 356047.500000000000000 ++output: 0.000002808614019 ++fcsr: 0x1010300 ++roundig mode: -inf ++frecip.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++frecip.d :: ++input: 23.062500000000000 ++output: 0.043360433604336 ++fcsr: 0x1010300 ++roundig mode: near ++frsqrt.s :: ++input: 0.000000 ++output: inf ++fcsr: 0x8080000 ++roundig mode: near ++frsqrt.s :: ++input: 456.250000 ++output: 0.046816 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.s :: ++input: 3.000000 ++output: 0.577350 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++frsqrt.s :: ++input: 1384.500000 ++output: 0.026875 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.s :: ++input: -7.250000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++frsqrt.s :: ++input: 1000000000.000000 ++output: 0.000032 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.s :: ++input: -5786.500000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++frsqrt.s :: ++input: 1752.000000 ++output: 0.023891 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.s :: ++input: 0.015625 ++output: 8.000000 ++fcsr: 0 ++roundig mode: near ++frsqrt.s :: ++input: 0.031250 ++output: 5.656854 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.s :: ++input: -248562.750000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++frsqrt.s :: ++input: -45786.500000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++frsqrt.s :: ++input: 456.000000 ++output: 0.046829 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.s :: ++input: 34.031250 ++output: 0.171420 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.s :: ++input: 45786.750000 ++output: 0.004673 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.s :: ++input: 1752065.000000 ++output: 0.000755 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.s :: ++input: 107.000000 ++output: 0.096674 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.s :: ++input: -45667.250000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++frsqrt.s :: ++input: -7.000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++frsqrt.s :: ++input: -347856.500000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++frsqrt.s :: ++input: 356047.500000 ++output: 0.001676 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++frsqrt.s :: ++input: 23.062500 ++output: 0.208232 ++fcsr: 0x1010000 ++roundig mode: zero ++frsqrt.s :: ++input: 0.000000 ++output: inf ++fcsr: 0x8080100 ++roundig mode: zero ++frsqrt.s :: ++input: 456.250000 ++output: 0.046816 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.s :: ++input: 3.000000 ++output: 0.577350 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++frsqrt.s :: ++input: 1384.500000 ++output: 0.026875 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.s :: ++input: -7.250000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++frsqrt.s :: ++input: 1000000000.000000 ++output: 0.000031 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.s :: ++input: -5786.500000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++frsqrt.s :: ++input: 1752.000000 ++output: 0.023890 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.s :: ++input: 0.015625 ++output: 8.000000 ++fcsr: 0x100 ++roundig mode: zero ++frsqrt.s :: ++input: 0.031250 ++output: 5.656854 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.s :: ++input: -248562.750000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++frsqrt.s :: ++input: -45786.500000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++frsqrt.s :: ++input: 456.000000 ++output: 0.046829 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.s :: ++input: 34.031250 ++output: 0.171419 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.s :: ++input: 45786.750000 ++output: 0.004673 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.s :: ++input: 1752065.000000 ++output: 0.000755 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.s :: ++input: 107.000000 ++output: 0.096673 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.s :: ++input: -45667.250000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++frsqrt.s :: ++input: -7.000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++frsqrt.s :: ++input: -347856.500000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++frsqrt.s :: ++input: 356047.500000 ++output: 0.001675 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++frsqrt.s :: ++input: 23.062500 ++output: 0.208231 ++fcsr: 0x1010100 ++roundig mode: +inf ++frsqrt.s :: ++input: 0.000000 ++output: inf ++fcsr: 0x8080200 ++roundig mode: +inf ++frsqrt.s :: ++input: 456.250000 ++output: 0.046817 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.s :: ++input: 3.000000 ++output: 0.577351 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++frsqrt.s :: ++input: 1384.500000 ++output: 0.026876 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.s :: ++input: -7.250000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++frsqrt.s :: ++input: 1000000000.000000 ++output: 0.000032 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.s :: ++input: -5786.500000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++frsqrt.s :: ++input: 1752.000000 ++output: 0.023891 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.s :: ++input: 0.015625 ++output: 8.000000 ++fcsr: 0x200 ++roundig mode: +inf ++frsqrt.s :: ++input: 0.031250 ++output: 5.656855 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.s :: ++input: -248562.750000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++frsqrt.s :: ++input: -45786.500000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++frsqrt.s :: ++input: 456.000000 ++output: 0.046830 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.s :: ++input: 34.031250 ++output: 0.171420 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.s :: ++input: 45786.750000 ++output: 0.004674 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.s :: ++input: 1752065.000000 ++output: 0.000756 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.s :: ++input: 107.000000 ++output: 0.096674 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.s :: ++input: -45667.250000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++frsqrt.s :: ++input: -7.000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++frsqrt.s :: ++input: -347856.500000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++frsqrt.s :: ++input: 356047.500000 ++output: 0.001676 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++frsqrt.s :: ++input: 23.062500 ++output: 0.208232 ++fcsr: 0x1010200 ++roundig mode: -inf ++frsqrt.s :: ++input: 0.000000 ++output: inf ++fcsr: 0x8080300 ++roundig mode: -inf ++frsqrt.s :: ++input: 456.250000 ++output: 0.046816 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.s :: ++input: 3.000000 ++output: 0.577350 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++frsqrt.s :: ++input: 1384.500000 ++output: 0.026875 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.s :: ++input: -7.250000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++frsqrt.s :: ++input: 1000000000.000000 ++output: 0.000031 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.s :: ++input: -5786.500000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++frsqrt.s :: ++input: 1752.000000 ++output: 0.023890 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.s :: ++input: 0.015625 ++output: 8.000000 ++fcsr: 0x300 ++roundig mode: -inf ++frsqrt.s :: ++input: 0.031250 ++output: 5.656854 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.s :: ++input: -248562.750000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++frsqrt.s :: ++input: -45786.500000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++frsqrt.s :: ++input: 456.000000 ++output: 0.046829 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.s :: ++input: 34.031250 ++output: 0.171419 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.s :: ++input: 45786.750000 ++output: 0.004673 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.s :: ++input: 1752065.000000 ++output: 0.000755 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.s :: ++input: 107.000000 ++output: 0.096673 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.s :: ++input: -45667.250000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++frsqrt.s :: ++input: -7.000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++frsqrt.s :: ++input: -347856.500000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++frsqrt.s :: ++input: 356047.500000 ++output: 0.001675 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++frsqrt.s :: ++input: 23.062500 ++output: 0.208231 ++fcsr: 0x1010300 ++roundig mode: near ++frsqrt.d :: ++input: 0.000000000000000 ++output: inf ++fcsr: 0x8080000 ++roundig mode: near ++frsqrt.d :: ++input: 456.250000000000000 ++output: 0.046816458878452 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.d :: ++input: 3.000000000000000 ++output: 0.577350269189626 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++frsqrt.d :: ++input: 1384.500000000000000 ++output: 0.026875312322339 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.d :: ++input: -7.250000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++frsqrt.d :: ++input: 1000000000.000000000000000 ++output: 0.000031622776602 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.d :: ++input: -5786.500000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++frsqrt.d :: ++input: 1752.000000000000000 ++output: 0.023890924128375 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.d :: ++input: 0.015625000000000 ++output: 8.000000000000000 ++fcsr: 0 ++roundig mode: near ++frsqrt.d :: ++input: 0.031250000000000 ++output: 5.656854249492380 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.d :: ++input: -248562.750000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++frsqrt.d :: ++input: -45786.500000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++frsqrt.d :: ++input: 456.000000000000000 ++output: 0.046829290579085 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.d :: ++input: 34.031250000000000 ++output: 0.171419825742193 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.d :: ++input: 45786.750000000000000 ++output: 0.004673369189974 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.d :: ++input: 1752065.000000000000000 ++output: 0.000755483342261 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.d :: ++input: 107.000000000000000 ++output: 0.096673648904566 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.d :: ++input: -45667.250000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++frsqrt.d :: ++input: -7.000000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++frsqrt.d :: ++input: -347856.500000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++frsqrt.d :: ++input: 356047.500000000000000 ++output: 0.001675892007021 ++fcsr: 0x1010000 ++roundig mode: near ++frsqrt.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++frsqrt.d :: ++input: 23.062500000000000 ++output: 0.208231682518141 ++fcsr: 0x1010000 ++roundig mode: zero ++frsqrt.d :: ++input: 0.000000000000000 ++output: inf ++fcsr: 0x8080100 ++roundig mode: zero ++frsqrt.d :: ++input: 456.250000000000000 ++output: 0.046816458878452 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.d :: ++input: 3.000000000000000 ++output: 0.577350269189625 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++frsqrt.d :: ++input: 1384.500000000000000 ++output: 0.026875312322338 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.d :: ++input: -7.250000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++frsqrt.d :: ++input: 1000000000.000000000000000 ++output: 0.000031622776601 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.d :: ++input: -5786.500000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++frsqrt.d :: ++input: 1752.000000000000000 ++output: 0.023890924128374 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.d :: ++input: 0.015625000000000 ++output: 8.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++frsqrt.d :: ++input: 0.031250000000000 ++output: 5.656854249492380 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.d :: ++input: -248562.750000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++frsqrt.d :: ++input: -45786.500000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++frsqrt.d :: ++input: 456.000000000000000 ++output: 0.046829290579084 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.d :: ++input: 34.031250000000000 ++output: 0.171419825742193 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.d :: ++input: 45786.750000000000000 ++output: 0.004673369189973 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.d :: ++input: 1752065.000000000000000 ++output: 0.000755483342260 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.d :: ++input: 107.000000000000000 ++output: 0.096673648904566 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.d :: ++input: -45667.250000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++frsqrt.d :: ++input: -7.000000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++frsqrt.d :: ++input: -347856.500000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++frsqrt.d :: ++input: 356047.500000000000000 ++output: 0.001675892007021 ++fcsr: 0x1010100 ++roundig mode: zero ++frsqrt.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++frsqrt.d :: ++input: 23.062500000000000 ++output: 0.208231682518141 ++fcsr: 0x1010100 ++roundig mode: +inf ++frsqrt.d :: ++input: 0.000000000000000 ++output: inf ++fcsr: 0x8080200 ++roundig mode: +inf ++frsqrt.d :: ++input: 456.250000000000000 ++output: 0.046816458878453 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.d :: ++input: 3.000000000000000 ++output: 0.577350269189626 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++frsqrt.d :: ++input: 1384.500000000000000 ++output: 0.026875312322339 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.d :: ++input: -7.250000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++frsqrt.d :: ++input: 1000000000.000000000000000 ++output: 0.000031622776602 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.d :: ++input: -5786.500000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++frsqrt.d :: ++input: 1752.000000000000000 ++output: 0.023890924128375 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.d :: ++input: 0.015625000000000 ++output: 8.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++frsqrt.d :: ++input: 0.031250000000000 ++output: 5.656854249492381 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.d :: ++input: -248562.750000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++frsqrt.d :: ++input: -45786.500000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++frsqrt.d :: ++input: 456.000000000000000 ++output: 0.046829290579085 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.d :: ++input: 34.031250000000000 ++output: 0.171419825742194 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.d :: ++input: 45786.750000000000000 ++output: 0.004673369189974 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.d :: ++input: 1752065.000000000000000 ++output: 0.000755483342261 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.d :: ++input: 107.000000000000000 ++output: 0.096673648904567 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.d :: ++input: -45667.250000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++frsqrt.d :: ++input: -7.000000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++frsqrt.d :: ++input: -347856.500000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++frsqrt.d :: ++input: 356047.500000000000000 ++output: 0.001675892007022 ++fcsr: 0x1010200 ++roundig mode: +inf ++frsqrt.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++frsqrt.d :: ++input: 23.062500000000000 ++output: 0.208231682518142 ++fcsr: 0x1010200 ++roundig mode: -inf ++frsqrt.d :: ++input: 0.000000000000000 ++output: inf ++fcsr: 0x8080300 ++roundig mode: -inf ++frsqrt.d :: ++input: 456.250000000000000 ++output: 0.046816458878452 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.d :: ++input: 3.000000000000000 ++output: 0.577350269189625 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++frsqrt.d :: ++input: 1384.500000000000000 ++output: 0.026875312322338 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.d :: ++input: -7.250000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++frsqrt.d :: ++input: 1000000000.000000000000000 ++output: 0.000031622776601 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.d :: ++input: -5786.500000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++frsqrt.d :: ++input: 1752.000000000000000 ++output: 0.023890924128374 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.d :: ++input: 0.015625000000000 ++output: 8.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++frsqrt.d :: ++input: 0.031250000000000 ++output: 5.656854249492380 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.d :: ++input: -248562.750000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++frsqrt.d :: ++input: -45786.500000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++frsqrt.d :: ++input: 456.000000000000000 ++output: 0.046829290579084 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.d :: ++input: 34.031250000000000 ++output: 0.171419825742193 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.d :: ++input: 45786.750000000000000 ++output: 0.004673369189973 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.d :: ++input: 1752065.000000000000000 ++output: 0.000755483342260 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.d :: ++input: 107.000000000000000 ++output: 0.096673648904566 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.d :: ++input: -45667.250000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++frsqrt.d :: ++input: -7.000000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++frsqrt.d :: ++input: -347856.500000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++frsqrt.d :: ++input: 356047.500000000000000 ++output: 0.001675892007021 ++fcsr: 0x1010300 ++roundig mode: -inf ++frsqrt.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++frsqrt.d :: ++input: 23.062500000000000 ++output: 0.208231682518141 ++fcsr: 0x1010300 ++roundig mode: near ++fscaleb.s :: ++input: 0.000000 -4578.500000 ++output: 0.000000 ++fcsr: 0 ++roundig mode: near ++fscaleb.s :: ++input: 456.250000 456.250000 ++output: inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.s :: ++input: 3.000000 34.031250 ++output: inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.s :: ++input: -1.000000 4578.750000 ++output: -inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.s :: ++input: 1384.500000 175.000000 ++output: inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.s :: ++input: -7.250000 107.000000 ++output: -inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.s :: ++input: 1000000000.000000 -456.250000 ++output: 0.000000 ++fcsr: 0x3030000 ++roundig mode: near ++fscaleb.s :: ++input: -5786.500000 -7.250000 ++output: -0.000000 ++fcsr: 0x3030000 ++roundig mode: near ++fscaleb.s :: ++input: 1752.000000 -3478.500000 ++output: 0.000000 ++fcsr: 0x3030000 ++roundig mode: near ++fscaleb.s :: ++input: 0.015625 356.500000 ++output: inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.s :: ++input: 0.031250 -1.000000 ++output: 0.000000 ++fcsr: 0x3030000 ++roundig mode: near ++fscaleb.s :: ++input: -248562.750000 23.062500 ++output: -inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0 ++roundig mode: near ++fscaleb.s :: ++input: 456.000000 456.250000 ++output: inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.s :: ++input: 34.031250 3.000000 ++output: inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.s :: ++input: 45786.750000 -1.000000 ++output: 0.000000 ++fcsr: 0x3030000 ++roundig mode: near ++fscaleb.s :: ++input: 1752065.000000 1384.500000 ++output: inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.s :: ++input: 107.000000 -7.000000 ++output: 0.000000 ++fcsr: 0x3030000 ++roundig mode: near ++fscaleb.s :: ++input: -45667.250000 100.000000 ++output: -inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.s :: ++input: -7.000000 -5786.500000 ++output: -0.000000 ++fcsr: 0x3030000 ++roundig mode: near ++fscaleb.s :: ++input: -347856.500000 1752.000000 ++output: -inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.s :: ++input: 356047.500000 0.015625 ++output: inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.s :: ++input: -1.000000 0.031250 ++output: -inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.s :: ++input: 23.062500 -248562.750000 ++output: 0.000000 ++fcsr: 0x3030000 ++roundig mode: zero ++fscaleb.s :: ++input: 0.000000 -4578.500000 ++output: 0.000000 ++fcsr: 0x100 ++roundig mode: zero ++fscaleb.s :: ++input: 456.250000 456.250000 ++output: 340282346638528859811704183484516925440.000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.s :: ++input: 3.000000 34.031250 ++output: 340282346638528859811704183484516925440.000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.s :: ++input: -1.000000 4578.750000 ++output: -340282346638528859811704183484516925440.000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.s :: ++input: 1384.500000 175.000000 ++output: 340282346638528859811704183484516925440.000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.s :: ++input: -7.250000 107.000000 ++output: -340282346638528859811704183484516925440.000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.s :: ++input: 1000000000.000000 -456.250000 ++output: 0.000000 ++fcsr: 0x3030100 ++roundig mode: zero ++fscaleb.s :: ++input: -5786.500000 -7.250000 ++output: -0.000000 ++fcsr: 0x3030100 ++roundig mode: zero ++fscaleb.s :: ++input: 1752.000000 -3478.500000 ++output: 0.000000 ++fcsr: 0x3030100 ++roundig mode: zero ++fscaleb.s :: ++input: 0.015625 356.500000 ++output: 340282346638528859811704183484516925440.000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.s :: ++input: 0.031250 -1.000000 ++output: 0.000000 ++fcsr: 0x3030100 ++roundig mode: zero ++fscaleb.s :: ++input: -248562.750000 23.062500 ++output: -340282346638528859811704183484516925440.000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0x100 ++roundig mode: zero ++fscaleb.s :: ++input: 456.000000 456.250000 ++output: 340282346638528859811704183484516925440.000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.s :: ++input: 34.031250 3.000000 ++output: 340282346638528859811704183484516925440.000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.s :: ++input: 45786.750000 -1.000000 ++output: 0.000000 ++fcsr: 0x3030100 ++roundig mode: zero ++fscaleb.s :: ++input: 1752065.000000 1384.500000 ++output: 340282346638528859811704183484516925440.000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.s :: ++input: 107.000000 -7.000000 ++output: 0.000000 ++fcsr: 0x3030100 ++roundig mode: zero ++fscaleb.s :: ++input: -45667.250000 100.000000 ++output: -340282346638528859811704183484516925440.000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.s :: ++input: -7.000000 -5786.500000 ++output: -0.000000 ++fcsr: 0x3030100 ++roundig mode: zero ++fscaleb.s :: ++input: -347856.500000 1752.000000 ++output: -340282346638528859811704183484516925440.000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.s :: ++input: 356047.500000 0.015625 ++output: 340282346638528859811704183484516925440.000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.s :: ++input: -1.000000 0.031250 ++output: -340282346638528859811704183484516925440.000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.s :: ++input: 23.062500 -248562.750000 ++output: 0.000000 ++fcsr: 0x3030100 ++roundig mode: +inf ++fscaleb.s :: ++input: 0.000000 -4578.500000 ++output: 0.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fscaleb.s :: ++input: 456.250000 456.250000 ++output: inf ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.s :: ++input: 3.000000 34.031250 ++output: inf ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.s :: ++input: -1.000000 4578.750000 ++output: -340282346638528859811704183484516925440.000000 ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.s :: ++input: 1384.500000 175.000000 ++output: inf ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.s :: ++input: -7.250000 107.000000 ++output: -340282346638528859811704183484516925440.000000 ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.s :: ++input: 1000000000.000000 -456.250000 ++output: 0.000001 ++fcsr: 0x3030200 ++roundig mode: +inf ++fscaleb.s :: ++input: -5786.500000 -7.250000 ++output: -0.000000 ++fcsr: 0x3030200 ++roundig mode: +inf ++fscaleb.s :: ++input: 1752.000000 -3478.500000 ++output: 0.000001 ++fcsr: 0x3030200 ++roundig mode: +inf ++fscaleb.s :: ++input: 0.015625 356.500000 ++output: inf ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.s :: ++input: 0.031250 -1.000000 ++output: 0.000001 ++fcsr: 0x3030200 ++roundig mode: +inf ++fscaleb.s :: ++input: -248562.750000 23.062500 ++output: -340282346638528859811704183484516925440.000000 ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fscaleb.s :: ++input: 456.000000 456.250000 ++output: inf ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.s :: ++input: 34.031250 3.000000 ++output: inf ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.s :: ++input: 45786.750000 -1.000000 ++output: 0.000001 ++fcsr: 0x3030200 ++roundig mode: +inf ++fscaleb.s :: ++input: 1752065.000000 1384.500000 ++output: inf ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.s :: ++input: 107.000000 -7.000000 ++output: 0.000001 ++fcsr: 0x3030200 ++roundig mode: +inf ++fscaleb.s :: ++input: -45667.250000 100.000000 ++output: -340282346638528859811704183484516925440.000000 ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.s :: ++input: -7.000000 -5786.500000 ++output: -0.000000 ++fcsr: 0x3030200 ++roundig mode: +inf ++fscaleb.s :: ++input: -347856.500000 1752.000000 ++output: -340282346638528859811704183484516925440.000000 ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.s :: ++input: 356047.500000 0.015625 ++output: inf ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.s :: ++input: -1.000000 0.031250 ++output: -340282346638528859811704183484516925440.000000 ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.s :: ++input: 23.062500 -248562.750000 ++output: 0.000001 ++fcsr: 0x3030200 ++roundig mode: -inf ++fscaleb.s :: ++input: 0.000000 -4578.500000 ++output: 0.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fscaleb.s :: ++input: 456.250000 456.250000 ++output: 340282346638528859811704183484516925440.000000 ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.s :: ++input: 3.000000 34.031250 ++output: 340282346638528859811704183484516925440.000000 ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.s :: ++input: -1.000000 4578.750000 ++output: -inf ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.s :: ++input: 1384.500000 175.000000 ++output: 340282346638528859811704183484516925440.000000 ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.s :: ++input: -7.250000 107.000000 ++output: -inf ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.s :: ++input: 1000000000.000000 -456.250000 ++output: 0.000000 ++fcsr: 0x3030300 ++roundig mode: -inf ++fscaleb.s :: ++input: -5786.500000 -7.250000 ++output: -0.000001 ++fcsr: 0x3030300 ++roundig mode: -inf ++fscaleb.s :: ++input: 1752.000000 -3478.500000 ++output: 0.000000 ++fcsr: 0x3030300 ++roundig mode: -inf ++fscaleb.s :: ++input: 0.015625 356.500000 ++output: 340282346638528859811704183484516925440.000000 ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.s :: ++input: 0.031250 -1.000000 ++output: 0.000000 ++fcsr: 0x3030300 ++roundig mode: -inf ++fscaleb.s :: ++input: -248562.750000 23.062500 ++output: -inf ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.s :: ++input: -45786.500000 0.000000 ++output: -45786.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fscaleb.s :: ++input: 456.000000 456.250000 ++output: 340282346638528859811704183484516925440.000000 ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.s :: ++input: 34.031250 3.000000 ++output: 340282346638528859811704183484516925440.000000 ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.s :: ++input: 45786.750000 -1.000000 ++output: 0.000000 ++fcsr: 0x3030300 ++roundig mode: -inf ++fscaleb.s :: ++input: 1752065.000000 1384.500000 ++output: 340282346638528859811704183484516925440.000000 ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.s :: ++input: 107.000000 -7.000000 ++output: 0.000000 ++fcsr: 0x3030300 ++roundig mode: -inf ++fscaleb.s :: ++input: -45667.250000 100.000000 ++output: -inf ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.s :: ++input: -7.000000 -5786.500000 ++output: -0.000001 ++fcsr: 0x3030300 ++roundig mode: -inf ++fscaleb.s :: ++input: -347856.500000 1752.000000 ++output: -inf ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.s :: ++input: 356047.500000 0.015625 ++output: 340282346638528859811704183484516925440.000000 ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.s :: ++input: -1.000000 0.031250 ++output: -inf ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.s :: ++input: 23.062500 -248562.750000 ++output: 0.000000 ++fcsr: 0x3030300 ++roundig mode: near ++fscaleb.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0.000000000000000 ++fcsr: 0 ++roundig mode: near ++fscaleb.d :: ++input: 456.250000000000000 456.250000000000000 ++output: inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.d :: ++input: 3.000000000000000 34.031250000000000 ++output: inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0.000000000000000 ++fcsr: 0x3030000 ++roundig mode: near ++fscaleb.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -0.000000000000000 ++fcsr: 0x3030000 ++roundig mode: near ++fscaleb.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0.000000000000000 ++fcsr: 0x3030000 ++roundig mode: near ++fscaleb.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x3030000 ++roundig mode: near ++fscaleb.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fscaleb.d :: ++input: 456.000000000000000 456.250000000000000 ++output: inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.d :: ++input: 34.031250000000000 3.000000000000000 ++output: inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x3030000 ++roundig mode: near ++fscaleb.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x3030000 ++roundig mode: near ++fscaleb.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -0.000000000000000 ++fcsr: 0x3030000 ++roundig mode: near ++fscaleb.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -inf ++fcsr: 0x5050000 ++roundig mode: near ++fscaleb.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0.000000000000000 ++fcsr: 0x3030000 ++roundig mode: zero ++fscaleb.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fscaleb.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0.000000000000000 ++fcsr: 0x3030100 ++roundig mode: zero ++fscaleb.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -0.000000000000000 ++fcsr: 0x3030100 ++roundig mode: zero ++fscaleb.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0.000000000000000 ++fcsr: 0x3030100 ++roundig mode: zero ++fscaleb.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x3030100 ++roundig mode: zero ++fscaleb.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fscaleb.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x3030100 ++roundig mode: zero ++fscaleb.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x3030100 ++roundig mode: zero ++fscaleb.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -0.000000000000000 ++fcsr: 0x3030100 ++roundig mode: zero ++fscaleb.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050100 ++roundig mode: zero ++fscaleb.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0.000000000000000 ++fcsr: 0x3030100 ++roundig mode: +inf ++fscaleb.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fscaleb.d :: ++input: 456.250000000000000 456.250000000000000 ++output: inf ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.d :: ++input: 3.000000000000000 34.031250000000000 ++output: inf ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: inf ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0.000000000000001 ++fcsr: 0x3030200 ++roundig mode: +inf ++fscaleb.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -0.000000000000000 ++fcsr: 0x3030200 ++roundig mode: +inf ++fscaleb.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0.000000000000001 ++fcsr: 0x3030200 ++roundig mode: +inf ++fscaleb.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: inf ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0.000000000000001 ++fcsr: 0x3030200 ++roundig mode: +inf ++fscaleb.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fscaleb.d :: ++input: 456.000000000000000 456.250000000000000 ++output: inf ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.d :: ++input: 34.031250000000000 3.000000000000000 ++output: inf ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0.000000000000001 ++fcsr: 0x3030200 ++roundig mode: +inf ++fscaleb.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: inf ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0.000000000000001 ++fcsr: 0x3030200 ++roundig mode: +inf ++fscaleb.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -0.000000000000000 ++fcsr: 0x3030200 ++roundig mode: +inf ++fscaleb.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: inf ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050200 ++roundig mode: +inf ++fscaleb.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0.000000000000001 ++fcsr: 0x3030200 ++roundig mode: -inf ++fscaleb.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fscaleb.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: -inf ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.d :: ++input: -7.250000000000000 107.000000000000000 ++output: -inf ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0.000000000000000 ++fcsr: 0x3030300 ++roundig mode: -inf ++fscaleb.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: -0.000000000000001 ++fcsr: 0x3030300 ++roundig mode: -inf ++fscaleb.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0.000000000000000 ++fcsr: 0x3030300 ++roundig mode: -inf ++fscaleb.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x3030300 ++roundig mode: -inf ++fscaleb.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: -inf ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: -45786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fscaleb.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x3030300 ++roundig mode: -inf ++fscaleb.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x3030300 ++roundig mode: -inf ++fscaleb.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: -inf ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: -0.000000000000001 ++fcsr: 0x3030300 ++roundig mode: -inf ++fscaleb.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: -inf ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000000000000 ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.d :: ++input: -1.000000000000000 0.031250000000000 ++output: -inf ++fcsr: 0x5050300 ++roundig mode: -inf ++fscaleb.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0.000000000000000 ++fcsr: 0x3030300 ++roundig mode: near ++flogb.s :: ++input: 0.000000 ++output: -inf ++fcsr: 0x8080000 ++roundig mode: near ++flogb.s :: ++input: 456.250000 ++output: 8.000000 ++fcsr: 0 ++roundig mode: near ++flogb.s :: ++input: 3.000000 ++output: 1.000000 ++fcsr: 0 ++roundig mode: near ++flogb.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++flogb.s :: ++input: 1384.500000 ++output: 10.000000 ++fcsr: 0 ++roundig mode: near ++flogb.s :: ++input: -7.250000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++flogb.s :: ++input: 1000000000.000000 ++output: 29.000000 ++fcsr: 0 ++roundig mode: near ++flogb.s :: ++input: -5786.500000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++flogb.s :: ++input: 1752.000000 ++output: 10.000000 ++fcsr: 0 ++roundig mode: near ++flogb.s :: ++input: 0.015625 ++output: -6.000000 ++fcsr: 0 ++roundig mode: near ++flogb.s :: ++input: 0.031250 ++output: -5.000000 ++fcsr: 0 ++roundig mode: near ++flogb.s :: ++input: -248562.750000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++flogb.s :: ++input: -45786.500000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++flogb.s :: ++input: 456.000000 ++output: 8.000000 ++fcsr: 0 ++roundig mode: near ++flogb.s :: ++input: 34.031250 ++output: 5.000000 ++fcsr: 0 ++roundig mode: near ++flogb.s :: ++input: 45786.750000 ++output: 15.000000 ++fcsr: 0 ++roundig mode: near ++flogb.s :: ++input: 1752065.000000 ++output: 20.000000 ++fcsr: 0 ++roundig mode: near ++flogb.s :: ++input: 107.000000 ++output: 6.000000 ++fcsr: 0 ++roundig mode: near ++flogb.s :: ++input: -45667.250000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++flogb.s :: ++input: -7.000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++flogb.s :: ++input: -347856.500000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++flogb.s :: ++input: 356047.500000 ++output: 18.000000 ++fcsr: 0 ++roundig mode: near ++flogb.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++flogb.s :: ++input: 23.062500 ++output: 4.000000 ++fcsr: 0 ++roundig mode: zero ++flogb.s :: ++input: 0.000000 ++output: -inf ++fcsr: 0x8080100 ++roundig mode: zero ++flogb.s :: ++input: 456.250000 ++output: 8.000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.s :: ++input: 3.000000 ++output: 1.000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++flogb.s :: ++input: 1384.500000 ++output: 10.000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.s :: ++input: -7.250000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++flogb.s :: ++input: 1000000000.000000 ++output: 29.000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.s :: ++input: -5786.500000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++flogb.s :: ++input: 1752.000000 ++output: 10.000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.s :: ++input: 0.015625 ++output: -6.000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.s :: ++input: 0.031250 ++output: -5.000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.s :: ++input: -248562.750000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++flogb.s :: ++input: -45786.500000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++flogb.s :: ++input: 456.000000 ++output: 8.000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.s :: ++input: 34.031250 ++output: 5.000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.s :: ++input: 45786.750000 ++output: 15.000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.s :: ++input: 1752065.000000 ++output: 20.000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.s :: ++input: 107.000000 ++output: 6.000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.s :: ++input: -45667.250000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++flogb.s :: ++input: -7.000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++flogb.s :: ++input: -347856.500000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++flogb.s :: ++input: 356047.500000 ++output: 18.000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++flogb.s :: ++input: 23.062500 ++output: 4.000000 ++fcsr: 0x100 ++roundig mode: +inf ++flogb.s :: ++input: 0.000000 ++output: -inf ++fcsr: 0x8080200 ++roundig mode: +inf ++flogb.s :: ++input: 456.250000 ++output: 8.000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.s :: ++input: 3.000000 ++output: 1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++flogb.s :: ++input: 1384.500000 ++output: 10.000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.s :: ++input: -7.250000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++flogb.s :: ++input: 1000000000.000000 ++output: 29.000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.s :: ++input: -5786.500000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++flogb.s :: ++input: 1752.000000 ++output: 10.000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.s :: ++input: 0.015625 ++output: -6.000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.s :: ++input: 0.031250 ++output: -5.000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.s :: ++input: -248562.750000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++flogb.s :: ++input: -45786.500000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++flogb.s :: ++input: 456.000000 ++output: 8.000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.s :: ++input: 34.031250 ++output: 5.000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.s :: ++input: 45786.750000 ++output: 15.000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.s :: ++input: 1752065.000000 ++output: 20.000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.s :: ++input: 107.000000 ++output: 6.000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.s :: ++input: -45667.250000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++flogb.s :: ++input: -7.000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++flogb.s :: ++input: -347856.500000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++flogb.s :: ++input: 356047.500000 ++output: 18.000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++flogb.s :: ++input: 23.062500 ++output: 4.000000 ++fcsr: 0x200 ++roundig mode: -inf ++flogb.s :: ++input: 0.000000 ++output: -inf ++fcsr: 0x8080300 ++roundig mode: -inf ++flogb.s :: ++input: 456.250000 ++output: 8.000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.s :: ++input: 3.000000 ++output: 1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++flogb.s :: ++input: 1384.500000 ++output: 10.000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.s :: ++input: -7.250000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++flogb.s :: ++input: 1000000000.000000 ++output: 29.000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.s :: ++input: -5786.500000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++flogb.s :: ++input: 1752.000000 ++output: 10.000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.s :: ++input: 0.015625 ++output: -6.000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.s :: ++input: 0.031250 ++output: -5.000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.s :: ++input: -248562.750000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++flogb.s :: ++input: -45786.500000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++flogb.s :: ++input: 456.000000 ++output: 8.000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.s :: ++input: 34.031250 ++output: 5.000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.s :: ++input: 45786.750000 ++output: 15.000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.s :: ++input: 1752065.000000 ++output: 20.000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.s :: ++input: 107.000000 ++output: 6.000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.s :: ++input: -45667.250000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++flogb.s :: ++input: -7.000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++flogb.s :: ++input: -347856.500000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++flogb.s :: ++input: 356047.500000 ++output: 18.000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.s :: ++input: -1.000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++flogb.s :: ++input: 23.062500 ++output: 4.000000 ++fcsr: 0x300 ++roundig mode: near ++flogb.d :: ++input: 0.000000000000000 ++output: -inf ++fcsr: 0x8080000 ++roundig mode: near ++flogb.d :: ++input: 456.250000000000000 ++output: 8.000000000000000 ++fcsr: 0 ++roundig mode: near ++flogb.d :: ++input: 3.000000000000000 ++output: 1.000000000000000 ++fcsr: 0 ++roundig mode: near ++flogb.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++flogb.d :: ++input: 1384.500000000000000 ++output: 10.000000000000000 ++fcsr: 0 ++roundig mode: near ++flogb.d :: ++input: -7.250000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++flogb.d :: ++input: 1000000000.000000000000000 ++output: 29.000000000000000 ++fcsr: 0 ++roundig mode: near ++flogb.d :: ++input: -5786.500000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++flogb.d :: ++input: 1752.000000000000000 ++output: 10.000000000000000 ++fcsr: 0 ++roundig mode: near ++flogb.d :: ++input: 0.015625000000000 ++output: -6.000000000000000 ++fcsr: 0 ++roundig mode: near ++flogb.d :: ++input: 0.031250000000000 ++output: -5.000000000000000 ++fcsr: 0 ++roundig mode: near ++flogb.d :: ++input: -248562.750000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++flogb.d :: ++input: -45786.500000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++flogb.d :: ++input: 456.000000000000000 ++output: 8.000000000000000 ++fcsr: 0 ++roundig mode: near ++flogb.d :: ++input: 34.031250000000000 ++output: 5.000000000000000 ++fcsr: 0 ++roundig mode: near ++flogb.d :: ++input: 45786.750000000000000 ++output: 15.000000000000000 ++fcsr: 0 ++roundig mode: near ++flogb.d :: ++input: 1752065.000000000000000 ++output: 20.000000000000000 ++fcsr: 0 ++roundig mode: near ++flogb.d :: ++input: 107.000000000000000 ++output: 6.000000000000000 ++fcsr: 0 ++roundig mode: near ++flogb.d :: ++input: -45667.250000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++flogb.d :: ++input: -7.000000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++flogb.d :: ++input: -347856.500000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++flogb.d :: ++input: 356047.500000000000000 ++output: 18.000000000000000 ++fcsr: 0 ++roundig mode: near ++flogb.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100000 ++roundig mode: near ++flogb.d :: ++input: 23.062500000000000 ++output: 4.000000000000000 ++fcsr: 0 ++roundig mode: zero ++flogb.d :: ++input: 0.000000000000000 ++output: -inf ++fcsr: 0x8080100 ++roundig mode: zero ++flogb.d :: ++input: 456.250000000000000 ++output: 8.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.d :: ++input: 3.000000000000000 ++output: 1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++flogb.d :: ++input: 1384.500000000000000 ++output: 10.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.d :: ++input: -7.250000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++flogb.d :: ++input: 1000000000.000000000000000 ++output: 29.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.d :: ++input: -5786.500000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++flogb.d :: ++input: 1752.000000000000000 ++output: 10.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.d :: ++input: 0.015625000000000 ++output: -6.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.d :: ++input: 0.031250000000000 ++output: -5.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.d :: ++input: -248562.750000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++flogb.d :: ++input: -45786.500000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++flogb.d :: ++input: 456.000000000000000 ++output: 8.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.d :: ++input: 34.031250000000000 ++output: 5.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.d :: ++input: 45786.750000000000000 ++output: 15.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.d :: ++input: 1752065.000000000000000 ++output: 20.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.d :: ++input: 107.000000000000000 ++output: 6.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.d :: ++input: -45667.250000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++flogb.d :: ++input: -7.000000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++flogb.d :: ++input: -347856.500000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++flogb.d :: ++input: 356047.500000000000000 ++output: 18.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++flogb.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100100 ++roundig mode: zero ++flogb.d :: ++input: 23.062500000000000 ++output: 4.000000000000000 ++fcsr: 0x100 ++roundig mode: +inf ++flogb.d :: ++input: 0.000000000000000 ++output: -inf ++fcsr: 0x8080200 ++roundig mode: +inf ++flogb.d :: ++input: 456.250000000000000 ++output: 8.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.d :: ++input: 3.000000000000000 ++output: 1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++flogb.d :: ++input: 1384.500000000000000 ++output: 10.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.d :: ++input: -7.250000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++flogb.d :: ++input: 1000000000.000000000000000 ++output: 29.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.d :: ++input: -5786.500000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++flogb.d :: ++input: 1752.000000000000000 ++output: 10.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.d :: ++input: 0.015625000000000 ++output: -6.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.d :: ++input: 0.031250000000000 ++output: -5.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.d :: ++input: -248562.750000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++flogb.d :: ++input: -45786.500000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++flogb.d :: ++input: 456.000000000000000 ++output: 8.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.d :: ++input: 34.031250000000000 ++output: 5.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.d :: ++input: 45786.750000000000000 ++output: 15.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.d :: ++input: 1752065.000000000000000 ++output: 20.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.d :: ++input: 107.000000000000000 ++output: 6.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.d :: ++input: -45667.250000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++flogb.d :: ++input: -7.000000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++flogb.d :: ++input: -347856.500000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++flogb.d :: ++input: 356047.500000000000000 ++output: 18.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++flogb.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100200 ++roundig mode: +inf ++flogb.d :: ++input: 23.062500000000000 ++output: 4.000000000000000 ++fcsr: 0x200 ++roundig mode: -inf ++flogb.d :: ++input: 0.000000000000000 ++output: -inf ++fcsr: 0x8080300 ++roundig mode: -inf ++flogb.d :: ++input: 456.250000000000000 ++output: 8.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.d :: ++input: 3.000000000000000 ++output: 1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++flogb.d :: ++input: 1384.500000000000000 ++output: 10.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.d :: ++input: -7.250000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++flogb.d :: ++input: 1000000000.000000000000000 ++output: 29.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.d :: ++input: -5786.500000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++flogb.d :: ++input: 1752.000000000000000 ++output: 10.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.d :: ++input: 0.015625000000000 ++output: -6.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.d :: ++input: 0.031250000000000 ++output: -5.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.d :: ++input: -248562.750000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++flogb.d :: ++input: -45786.500000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++flogb.d :: ++input: 456.000000000000000 ++output: 8.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.d :: ++input: 34.031250000000000 ++output: 5.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.d :: ++input: 45786.750000000000000 ++output: 15.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.d :: ++input: 1752065.000000000000000 ++output: 20.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.d :: ++input: 107.000000000000000 ++output: 6.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.d :: ++input: -45667.250000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++flogb.d :: ++input: -7.000000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++flogb.d :: ++input: -347856.500000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++flogb.d :: ++input: 356047.500000000000000 ++output: 18.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++flogb.d :: ++input: -1.000000000000000 ++output: nan ++fcsr: 0x10100300 ++roundig mode: -inf ++flogb.d :: ++input: 23.062500000000000 ++output: 4.000000000000000 ++fcsr: 0x300 ++roundig mode: near ++fcvt.s.d :: ++input: 0.000000000000000 ++output: 0.000000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: 456.250000000000000 ++output: 456.250000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: 3.000000000000000 ++output: 3.000000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: -1.000000000000000 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: 1384.500000000000000 ++output: 1384.500000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: -7.250000000000000 ++output: -7.250000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: 1000000000.000000000000000 ++output: 1000000000.000000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: -5786.500000000000000 ++output: -5786.500000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: 1752.000000000000000 ++output: 1752.000000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: 0.015625000000000 ++output: 0.015625 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: 0.031250000000000 ++output: 0.031250 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: -248562.750000000000000 ++output: -248562.750000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: -45786.500000000000000 ++output: -45786.500000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: 456.000000000000000 ++output: 456.000000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: 34.031250000000000 ++output: 34.031250 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: 45786.750000000000000 ++output: 45786.750000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: 1752065.000000000000000 ++output: 1752065.000000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: 107.000000000000000 ++output: 107.000000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: -45667.250000000000000 ++output: -45667.250000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: -7.000000000000000 ++output: -7.000000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: -347856.500000000000000 ++output: -347856.500000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: 356047.500000000000000 ++output: 356047.500000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: -1.000000000000000 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++fcvt.s.d :: ++input: 23.062500000000000 ++output: 23.062500 ++fcsr: 0 ++roundig mode: zero ++fcvt.s.d :: ++input: 0.000000000000000 ++output: 0.000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: 456.250000000000000 ++output: 456.250000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: 3.000000000000000 ++output: 3.000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: -1.000000000000000 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: 1384.500000000000000 ++output: 1384.500000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: -7.250000000000000 ++output: -7.250000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: 1000000000.000000000000000 ++output: 1000000000.000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: -5786.500000000000000 ++output: -5786.500000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: 1752.000000000000000 ++output: 1752.000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: 0.015625000000000 ++output: 0.015625 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: 0.031250000000000 ++output: 0.031250 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: -248562.750000000000000 ++output: -248562.750000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: -45786.500000000000000 ++output: -45786.500000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: 456.000000000000000 ++output: 456.000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: 34.031250000000000 ++output: 34.031250 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: 45786.750000000000000 ++output: 45786.750000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: 1752065.000000000000000 ++output: 1752065.000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: 107.000000000000000 ++output: 107.000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: -45667.250000000000000 ++output: -45667.250000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: -7.000000000000000 ++output: -7.000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: -347856.500000000000000 ++output: -347856.500000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: 356047.500000000000000 ++output: 356047.500000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: -1.000000000000000 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.s.d :: ++input: 23.062500000000000 ++output: 23.062500 ++fcsr: 0x100 ++roundig mode: +inf ++fcvt.s.d :: ++input: 0.000000000000000 ++output: 0.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: 456.250000000000000 ++output: 456.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: 3.000000000000000 ++output: 3.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: -1.000000000000000 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: 1384.500000000000000 ++output: 1384.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: -7.250000000000000 ++output: -7.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: 1000000000.000000000000000 ++output: 1000000000.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: -5786.500000000000000 ++output: -5786.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: 1752.000000000000000 ++output: 1752.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: 0.015625000000000 ++output: 0.015625 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: 0.031250000000000 ++output: 0.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: -248562.750000000000000 ++output: -248562.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: -45786.500000000000000 ++output: -45786.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: 456.000000000000000 ++output: 456.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: 34.031250000000000 ++output: 34.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: 45786.750000000000000 ++output: 45786.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: 1752065.000000000000000 ++output: 1752065.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: 107.000000000000000 ++output: 107.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: -45667.250000000000000 ++output: -45667.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: -7.000000000000000 ++output: -7.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: -347856.500000000000000 ++output: -347856.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: 356047.500000000000000 ++output: 356047.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: -1.000000000000000 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.s.d :: ++input: 23.062500000000000 ++output: 23.062500 ++fcsr: 0x200 ++roundig mode: -inf ++fcvt.s.d :: ++input: 0.000000000000000 ++output: 0.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: 456.250000000000000 ++output: 456.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: 3.000000000000000 ++output: 3.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: -1.000000000000000 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: 1384.500000000000000 ++output: 1384.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: -7.250000000000000 ++output: -7.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: 1000000000.000000000000000 ++output: 1000000000.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: -5786.500000000000000 ++output: -5786.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: 1752.000000000000000 ++output: 1752.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: 0.015625000000000 ++output: 0.015625 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: 0.031250000000000 ++output: 0.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: -248562.750000000000000 ++output: -248562.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: -45786.500000000000000 ++output: -45786.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: 456.000000000000000 ++output: 456.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: 34.031250000000000 ++output: 34.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: 45786.750000000000000 ++output: 45786.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: 1752065.000000000000000 ++output: 1752065.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: 107.000000000000000 ++output: 107.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: -45667.250000000000000 ++output: -45667.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: -7.000000000000000 ++output: -7.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: -347856.500000000000000 ++output: -347856.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: 356047.500000000000000 ++output: 356047.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: -1.000000000000000 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.s.d :: ++input: 23.062500000000000 ++output: 23.062500 ++fcsr: 0x300 ++roundig mode: near ++fcvt.d.s :: ++input: 0.000000 ++output: 0.000000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: 456.250000 ++output: 456.250000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: 3.000000 ++output: 3.000000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: -1.000000 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: 1384.500000 ++output: 1384.500000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: -7.250000 ++output: -7.250000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: 1000000000.000000 ++output: 1000000000.000000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: -5786.500000 ++output: -5786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: 1752.000000 ++output: 1752.000000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: 0.015625 ++output: 0.015625000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: 0.031250 ++output: 0.031250000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: -248562.750000 ++output: -248562.750000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: -45786.500000 ++output: -45786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: 456.000000 ++output: 456.000000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: 34.031250 ++output: 34.031250000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: 45786.750000 ++output: 45786.750000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: 1752065.000000 ++output: 1752065.000000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: 107.000000 ++output: 107.000000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: -45667.250000 ++output: -45667.250000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: -7.000000 ++output: -7.000000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: -347856.500000 ++output: -347856.500000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: 356047.500000 ++output: 356047.500000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: -1.000000 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++fcvt.d.s :: ++input: 23.062500 ++output: 23.062500000000000 ++fcsr: 0 ++roundig mode: zero ++fcvt.d.s :: ++input: 0.000000 ++output: 0.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: 456.250000 ++output: 456.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: 3.000000 ++output: 3.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: -1.000000 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: 1384.500000 ++output: 1384.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: -7.250000 ++output: -7.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: 1000000000.000000 ++output: 1000000000.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: -5786.500000 ++output: -5786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: 1752.000000 ++output: 1752.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: 0.015625 ++output: 0.015625000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: 0.031250 ++output: 0.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: -248562.750000 ++output: -248562.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: -45786.500000 ++output: -45786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: 456.000000 ++output: 456.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: 34.031250 ++output: 34.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: 45786.750000 ++output: 45786.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: 1752065.000000 ++output: 1752065.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: 107.000000 ++output: 107.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: -45667.250000 ++output: -45667.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: -7.000000 ++output: -7.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: -347856.500000 ++output: -347856.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: 356047.500000 ++output: 356047.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: -1.000000 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fcvt.d.s :: ++input: 23.062500 ++output: 23.062500000000000 ++fcsr: 0x100 ++roundig mode: +inf ++fcvt.d.s :: ++input: 0.000000 ++output: 0.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: 456.250000 ++output: 456.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: 3.000000 ++output: 3.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: -1.000000 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: 1384.500000 ++output: 1384.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: -7.250000 ++output: -7.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: 1000000000.000000 ++output: 1000000000.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: -5786.500000 ++output: -5786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: 1752.000000 ++output: 1752.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: 0.015625 ++output: 0.015625000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: 0.031250 ++output: 0.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: -248562.750000 ++output: -248562.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: -45786.500000 ++output: -45786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: 456.000000 ++output: 456.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: 34.031250 ++output: 34.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: 45786.750000 ++output: 45786.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: 1752065.000000 ++output: 1752065.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: 107.000000 ++output: 107.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: -45667.250000 ++output: -45667.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: -7.000000 ++output: -7.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: -347856.500000 ++output: -347856.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: 356047.500000 ++output: 356047.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: -1.000000 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fcvt.d.s :: ++input: 23.062500 ++output: 23.062500000000000 ++fcsr: 0x200 ++roundig mode: -inf ++fcvt.d.s :: ++input: 0.000000 ++output: 0.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: 456.250000 ++output: 456.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: 3.000000 ++output: 3.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: -1.000000 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: 1384.500000 ++output: 1384.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: -7.250000 ++output: -7.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: 1000000000.000000 ++output: 1000000000.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: -5786.500000 ++output: -5786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: 1752.000000 ++output: 1752.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: 0.015625 ++output: 0.015625000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: 0.031250 ++output: 0.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: -248562.750000 ++output: -248562.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: -45786.500000 ++output: -45786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: 456.000000 ++output: 456.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: 34.031250 ++output: 34.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: 45786.750000 ++output: 45786.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: 1752065.000000 ++output: 1752065.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: 107.000000 ++output: 107.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: -45667.250000 ++output: -45667.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: -7.000000 ++output: -7.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: -347856.500000 ++output: -347856.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: 356047.500000 ++output: 356047.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: -1.000000 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fcvt.d.s :: ++input: 23.062500 ++output: 23.062500000000000 ++fcsr: 0x300 ++roundig mode: near ++ftintrm.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.s :: ++input: -7.250000 ++output: -8 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.s :: ++input: -5786.500000 ++output: -5787 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.s :: ++input: -45786.500000 ++output: -45787 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.s :: ++input: -45667.250000 ++output: -45668 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.s :: ++input: -347856.500000 ++output: -347857 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010000 ++roundig mode: zero ++ftintrm.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.s :: ++input: -7.250000 ++output: -8 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.s :: ++input: -5786.500000 ++output: -5787 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.s :: ++input: -45786.500000 ++output: -45787 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.s :: ++input: -45667.250000 ++output: -45668 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.s :: ++input: -347856.500000 ++output: -347857 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftintrm.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: -7.250000 ++output: -8 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: -5786.500000 ++output: -5787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: -45786.500000 ++output: -45787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: -45667.250000 ++output: -45668 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: -347856.500000 ++output: -347857 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftintrm.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: -7.250000 ++output: -8 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: -5786.500000 ++output: -5787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: -45786.500000 ++output: -45787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: -45667.250000 ++output: -45668 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: -347856.500000 ++output: -347857 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010300 ++roundig mode: near ++ftintrm.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.d :: ++input: -7.250000000000000 ++output: -8 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.d :: ++input: -5786.500000000000000 ++output: -5787 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.d :: ++input: -45786.500000000000000 ++output: -45787 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.d :: ++input: -45667.250000000000000 ++output: -45668 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.d :: ++input: -347856.500000000000000 ++output: -347857 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrm.w.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010000 ++roundig mode: zero ++ftintrm.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.d :: ++input: -7.250000000000000 ++output: -8 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.d :: ++input: -5786.500000000000000 ++output: -5787 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.d :: ++input: -45786.500000000000000 ++output: -45787 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.d :: ++input: -45667.250000000000000 ++output: -45668 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.d :: ++input: -347856.500000000000000 ++output: -347857 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.w.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftintrm.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: -7.250000000000000 ++output: -8 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: -5786.500000000000000 ++output: -5787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: -45786.500000000000000 ++output: -45787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: -45667.250000000000000 ++output: -45668 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: -347856.500000000000000 ++output: -347857 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.w.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftintrm.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: -7.250000000000000 ++output: -8 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: -5786.500000000000000 ++output: -5787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: -45786.500000000000000 ++output: -45787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: -45667.250000000000000 ++output: -45668 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: -347856.500000000000000 ++output: -347857 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.w.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010300 ++roundig mode: near ++ftintrm.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.s :: ++input: -7.250000 ++output: -8 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.s :: ++input: -5786.500000 ++output: -5787 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.s :: ++input: -45786.500000 ++output: -45787 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.s :: ++input: -45667.250000 ++output: -45668 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.s :: ++input: -347856.500000 ++output: -347857 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010000 ++roundig mode: zero ++ftintrm.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.s :: ++input: -7.250000 ++output: -8 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.s :: ++input: -5786.500000 ++output: -5787 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.s :: ++input: -45786.500000 ++output: -45787 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.s :: ++input: -45667.250000 ++output: -45668 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.s :: ++input: -347856.500000 ++output: -347857 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftintrm.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: -7.250000 ++output: -8 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: -5786.500000 ++output: -5787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: -45786.500000 ++output: -45787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: -45667.250000 ++output: -45668 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: -347856.500000 ++output: -347857 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftintrm.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: -7.250000 ++output: -8 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: -5786.500000 ++output: -5787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: -45786.500000 ++output: -45787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: -45667.250000 ++output: -45668 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: -347856.500000 ++output: -347857 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010300 ++roundig mode: near ++ftintrm.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.d :: ++input: -7.250000000000000 ++output: -8 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.d :: ++input: -5786.500000000000000 ++output: -5787 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.d :: ++input: -45786.500000000000000 ++output: -45787 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.d :: ++input: -45667.250000000000000 ++output: -45668 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.d :: ++input: -347856.500000000000000 ++output: -347857 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrm.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrm.l.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010000 ++roundig mode: zero ++ftintrm.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.d :: ++input: -7.250000000000000 ++output: -8 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.d :: ++input: -5786.500000000000000 ++output: -5787 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.d :: ++input: -45786.500000000000000 ++output: -45787 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.d :: ++input: -45667.250000000000000 ++output: -45668 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.d :: ++input: -347856.500000000000000 ++output: -347857 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrm.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrm.l.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftintrm.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: -7.250000000000000 ++output: -8 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: -5786.500000000000000 ++output: -5787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: -45786.500000000000000 ++output: -45787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: -45667.250000000000000 ++output: -45668 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: -347856.500000000000000 ++output: -347857 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrm.l.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftintrm.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: -7.250000000000000 ++output: -8 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: -5786.500000000000000 ++output: -5787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: -45786.500000000000000 ++output: -45787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: -45667.250000000000000 ++output: -45668 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: -347856.500000000000000 ++output: -347857 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrm.l.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010300 ++roundig mode: near ++ftintrp.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.s :: ++input: 456.250000 ++output: 457 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.s :: ++input: 1384.500000 ++output: 1385 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.s :: ++input: 0.015625 ++output: 1 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.s :: ++input: 0.031250 ++output: 1 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.s :: ++input: 34.031250 ++output: 35 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.s :: ++input: 23.062500 ++output: 24 ++fcsr: 0x1010000 ++roundig mode: zero ++ftintrp.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.s :: ++input: 456.250000 ++output: 457 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.s :: ++input: 1384.500000 ++output: 1385 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.s :: ++input: 0.015625 ++output: 1 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.s :: ++input: 0.031250 ++output: 1 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.s :: ++input: 34.031250 ++output: 35 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.s :: ++input: 23.062500 ++output: 24 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftintrp.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: 456.250000 ++output: 457 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: 1384.500000 ++output: 1385 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: 0.015625 ++output: 1 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: 0.031250 ++output: 1 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: 34.031250 ++output: 35 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.s :: ++input: 23.062500 ++output: 24 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftintrp.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: 456.250000 ++output: 457 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: 1384.500000 ++output: 1385 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: 0.015625 ++output: 1 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: 0.031250 ++output: 1 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: 34.031250 ++output: 35 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.s :: ++input: 23.062500 ++output: 24 ++fcsr: 0x1010300 ++roundig mode: near ++ftintrp.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.d :: ++input: 456.250000000000000 ++output: 457 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.d :: ++input: 1384.500000000000000 ++output: 1385 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.d :: ++input: 0.015625000000000 ++output: 1 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.d :: ++input: 0.031250000000000 ++output: 1 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.d :: ++input: 34.031250000000000 ++output: 35 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrp.w.d :: ++input: 23.062500000000000 ++output: 24 ++fcsr: 0x1010000 ++roundig mode: zero ++ftintrp.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.d :: ++input: 456.250000000000000 ++output: 457 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.d :: ++input: 1384.500000000000000 ++output: 1385 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.d :: ++input: 0.015625000000000 ++output: 1 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.d :: ++input: 0.031250000000000 ++output: 1 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.d :: ++input: 34.031250000000000 ++output: 35 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.w.d :: ++input: 23.062500000000000 ++output: 24 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftintrp.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: 456.250000000000000 ++output: 457 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: 1384.500000000000000 ++output: 1385 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: 0.015625000000000 ++output: 1 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: 0.031250000000000 ++output: 1 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: 34.031250000000000 ++output: 35 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.w.d :: ++input: 23.062500000000000 ++output: 24 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftintrp.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: 456.250000000000000 ++output: 457 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: 1384.500000000000000 ++output: 1385 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: 0.015625000000000 ++output: 1 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: 0.031250000000000 ++output: 1 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: 34.031250000000000 ++output: 35 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.w.d :: ++input: 23.062500000000000 ++output: 24 ++fcsr: 0x1010300 ++roundig mode: near ++ftintrp.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.s :: ++input: 456.250000 ++output: 457 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.s :: ++input: 1384.500000 ++output: 1385 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.s :: ++input: 0.015625 ++output: 1 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.s :: ++input: 0.031250 ++output: 1 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.s :: ++input: 34.031250 ++output: 35 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.s :: ++input: 23.062500 ++output: 24 ++fcsr: 0x1010000 ++roundig mode: zero ++ftintrp.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.s :: ++input: 456.250000 ++output: 457 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.s :: ++input: 1384.500000 ++output: 1385 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.s :: ++input: 0.015625 ++output: 1 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.s :: ++input: 0.031250 ++output: 1 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.s :: ++input: 34.031250 ++output: 35 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.s :: ++input: 23.062500 ++output: 24 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftintrp.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: 456.250000 ++output: 457 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: 1384.500000 ++output: 1385 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: 0.015625 ++output: 1 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: 0.031250 ++output: 1 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: 34.031250 ++output: 35 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.s :: ++input: 23.062500 ++output: 24 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftintrp.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: 456.250000 ++output: 457 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: 1384.500000 ++output: 1385 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: 0.015625 ++output: 1 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: 0.031250 ++output: 1 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: 34.031250 ++output: 35 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.s :: ++input: 23.062500 ++output: 24 ++fcsr: 0x1010300 ++roundig mode: near ++ftintrp.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.d :: ++input: 456.250000000000000 ++output: 457 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.d :: ++input: 1384.500000000000000 ++output: 1385 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.d :: ++input: 0.015625000000000 ++output: 1 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.d :: ++input: 0.031250000000000 ++output: 1 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.d :: ++input: 34.031250000000000 ++output: 35 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrp.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrp.l.d :: ++input: 23.062500000000000 ++output: 24 ++fcsr: 0x1010000 ++roundig mode: zero ++ftintrp.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.d :: ++input: 456.250000000000000 ++output: 457 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.d :: ++input: 1384.500000000000000 ++output: 1385 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.d :: ++input: 0.015625000000000 ++output: 1 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.d :: ++input: 0.031250000000000 ++output: 1 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.d :: ++input: 34.031250000000000 ++output: 35 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrp.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrp.l.d :: ++input: 23.062500000000000 ++output: 24 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftintrp.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: 456.250000000000000 ++output: 457 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: 1384.500000000000000 ++output: 1385 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: 0.015625000000000 ++output: 1 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: 0.031250000000000 ++output: 1 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: 34.031250000000000 ++output: 35 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrp.l.d :: ++input: 23.062500000000000 ++output: 24 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftintrp.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: 456.250000000000000 ++output: 457 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: 1384.500000000000000 ++output: 1385 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: 0.015625000000000 ++output: 1 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: 0.031250000000000 ++output: 1 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: 34.031250000000000 ++output: 35 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrp.l.d :: ++input: 23.062500000000000 ++output: 24 ++fcsr: 0x1010300 ++roundig mode: near ++ftintrz.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010000 ++roundig mode: zero ++ftintrz.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftintrz.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftintrz.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010300 ++roundig mode: near ++ftintrz.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrz.w.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010000 ++roundig mode: zero ++ftintrz.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.w.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftintrz.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.w.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftintrz.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.w.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010300 ++roundig mode: near ++ftintrz.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010000 ++roundig mode: zero ++ftintrz.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftintrz.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftintrz.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010300 ++roundig mode: near ++ftintrz.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrz.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrz.l.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010000 ++roundig mode: zero ++ftintrz.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrz.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrz.l.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftintrz.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrz.l.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftintrz.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrz.l.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010300 ++roundig mode: near ++ftintrne.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010000 ++roundig mode: zero ++ftintrne.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftintrne.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftintrne.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010300 ++roundig mode: near ++ftintrne.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrne.w.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010000 ++roundig mode: zero ++ftintrne.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.w.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftintrne.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.w.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftintrne.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.w.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010300 ++roundig mode: near ++ftintrne.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010000 ++roundig mode: zero ++ftintrne.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftintrne.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftintrne.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010300 ++roundig mode: near ++ftintrne.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010000 ++roundig mode: near ++ftintrne.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftintrne.l.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010000 ++roundig mode: zero ++ftintrne.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010100 ++roundig mode: zero ++ftintrne.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftintrne.l.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftintrne.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftintrne.l.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftintrne.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftintrne.l.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010300 ++roundig mode: near ++ftint.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftint.w.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftint.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftint.w.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftint.w.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftint.w.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftint.w.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftint.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftint.w.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftint.w.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftint.w.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010000 ++roundig mode: zero ++ftint.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftint.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.s :: ++input: 456.250000 ++output: 457 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.s :: ++input: 1384.500000 ++output: 1385 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.s :: ++input: 0.015625 ++output: 1 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.s :: ++input: 0.031250 ++output: 1 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.s :: ++input: 34.031250 ++output: 35 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.s :: ++input: 23.062500 ++output: 24 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftint.w.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.s :: ++input: -7.250000 ++output: -8 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.s :: ++input: -5786.500000 ++output: -5787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.s :: ++input: -45786.500000 ++output: -45787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.s :: ++input: -45667.250000 ++output: -45668 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.s :: ++input: -347856.500000 ++output: -347857 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010300 ++roundig mode: near ++ftint.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftint.w.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftint.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftint.w.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftint.w.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftint.w.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftint.w.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftint.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftint.w.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftint.w.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftint.w.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010000 ++roundig mode: zero ++ftint.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftint.w.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftint.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.d :: ++input: 456.250000000000000 ++output: 457 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.d :: ++input: 1384.500000000000000 ++output: 1385 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.d :: ++input: 0.015625000000000 ++output: 1 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.d :: ++input: 0.031250000000000 ++output: 1 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.d :: ++input: 34.031250000000000 ++output: 35 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.w.d :: ++input: 23.062500000000000 ++output: 24 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftint.w.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.d :: ++input: -7.250000000000000 ++output: -8 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.d :: ++input: -5786.500000000000000 ++output: -5787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.d :: ++input: -45786.500000000000000 ++output: -45787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.d :: ++input: -45667.250000000000000 ++output: -45668 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.d :: ++input: -347856.500000000000000 ++output: -347857 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.w.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.w.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010300 ++roundig mode: near ++ftint.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftint.l.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftint.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftint.l.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftint.l.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftint.l.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftint.l.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftint.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftint.l.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftint.l.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftint.l.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010000 ++roundig mode: zero ++ftint.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftint.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.s :: ++input: 456.250000 ++output: 457 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.s :: ++input: 1384.500000 ++output: 1385 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.s :: ++input: -7.250000 ++output: -7 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.s :: ++input: -5786.500000 ++output: -5786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.s :: ++input: 0.015625 ++output: 1 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.s :: ++input: 0.031250 ++output: 1 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.s :: ++input: -248562.750000 ++output: -248562 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.s :: ++input: -45786.500000 ++output: -45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.s :: ++input: 34.031250 ++output: 35 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.s :: ++input: 45786.750000 ++output: 45787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.s :: ++input: -45667.250000 ++output: -45667 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.s :: ++input: -347856.500000 ++output: -347856 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.s :: ++input: 356047.500000 ++output: 356048 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.s :: ++input: 23.062500 ++output: 24 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftint.l.s :: ++input: 0.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.s :: ++input: 456.250000 ++output: 456 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.s :: ++input: 3.000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.s :: ++input: 1384.500000 ++output: 1384 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.s :: ++input: -7.250000 ++output: -8 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.s :: ++input: 1000000000.000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.s :: ++input: -5786.500000 ++output: -5787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.s :: ++input: 1752.000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.s :: ++input: 0.015625 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.s :: ++input: 0.031250 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.s :: ++input: -248562.750000 ++output: -248563 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.s :: ++input: -45786.500000 ++output: -45787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.s :: ++input: 456.000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.s :: ++input: 34.031250 ++output: 34 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.s :: ++input: 45786.750000 ++output: 45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.s :: ++input: 1752065.000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.s :: ++input: 107.000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.s :: ++input: -45667.250000 ++output: -45668 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.s :: ++input: -7.000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.s :: ++input: -347856.500000 ++output: -347857 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.s :: ++input: 356047.500000 ++output: 356047 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.s :: ++input: -1.000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.s :: ++input: 23.062500 ++output: 23 ++fcsr: 0x1010300 ++roundig mode: near ++ftint.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++ftint.l.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0 ++roundig mode: near ++ftint.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftint.l.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0 ++roundig mode: near ++ftint.l.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0 ++roundig mode: near ++ftint.l.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0 ++roundig mode: near ++ftint.l.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0 ++roundig mode: near ++ftint.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0 ++roundig mode: near ++ftint.l.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0 ++roundig mode: near ++ftint.l.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010000 ++roundig mode: near ++ftint.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0 ++roundig mode: near ++ftint.l.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010000 ++roundig mode: zero ++ftint.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010100 ++roundig mode: zero ++ftint.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x100 ++roundig mode: zero ++ftint.l.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010100 ++roundig mode: +inf ++ftint.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.d :: ++input: 456.250000000000000 ++output: 457 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.d :: ++input: 1384.500000000000000 ++output: 1385 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.d :: ++input: -7.250000000000000 ++output: -7 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.d :: ++input: -5786.500000000000000 ++output: -5786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.d :: ++input: 0.015625000000000 ++output: 1 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.d :: ++input: 0.031250000000000 ++output: 1 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.d :: ++input: -248562.750000000000000 ++output: -248562 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.d :: ++input: -45786.500000000000000 ++output: -45786 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.d :: ++input: 34.031250000000000 ++output: 35 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.d :: ++input: 45786.750000000000000 ++output: 45787 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.d :: ++input: -45667.250000000000000 ++output: -45667 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.d :: ++input: -347856.500000000000000 ++output: -347856 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.d :: ++input: 356047.500000000000000 ++output: 356048 ++fcsr: 0x1010200 ++roundig mode: +inf ++ftint.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x200 ++roundig mode: +inf ++ftint.l.d :: ++input: 23.062500000000000 ++output: 24 ++fcsr: 0x1010200 ++roundig mode: -inf ++ftint.l.d :: ++input: 0.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.d :: ++input: 456.250000000000000 ++output: 456 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.d :: ++input: 3.000000000000000 ++output: 3 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.d :: ++input: 1384.500000000000000 ++output: 1384 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.d :: ++input: -7.250000000000000 ++output: -8 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.d :: ++input: 1000000000.000000000000000 ++output: 1000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.d :: ++input: -5786.500000000000000 ++output: -5787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.d :: ++input: 1752.000000000000000 ++output: 1752 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.d :: ++input: 0.015625000000000 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.d :: ++input: 0.031250000000000 ++output: 0 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.d :: ++input: -248562.750000000000000 ++output: -248563 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.d :: ++input: -45786.500000000000000 ++output: -45787 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.d :: ++input: 456.000000000000000 ++output: 456 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.d :: ++input: 34.031250000000000 ++output: 34 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.d :: ++input: 45786.750000000000000 ++output: 45786 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.d :: ++input: 1752065.000000000000000 ++output: 1752065 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.d :: ++input: 107.000000000000000 ++output: 107 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.d :: ++input: -45667.250000000000000 ++output: -45668 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.d :: ++input: -7.000000000000000 ++output: -7 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.d :: ++input: -347856.500000000000000 ++output: -347857 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.d :: ++input: 356047.500000000000000 ++output: 356047 ++fcsr: 0x1010300 ++roundig mode: -inf ++ftint.l.d :: ++input: -1.000000000000000 ++output: -1 ++fcsr: 0x300 ++roundig mode: -inf ++ftint.l.d :: ++input: 23.062500000000000 ++output: 23 ++fcsr: 0x1010300 ++roundig mode: near ++ffint.s.w :: ++input: 0 ++output: 0.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: 456 ++output: 456.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: 3 ++output: 3.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: -1 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: -1 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: 356 ++output: 356.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: 1000000000 ++output: 1000000000.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: -5786 ++output: -5786.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: 1752 ++output: 1752.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: 24575 ++output: 24575.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: 10 ++output: 10.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: -248562 ++output: -248562.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: -45786 ++output: -45786.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: 456 ++output: 456.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: 34 ++output: 34.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: 45786 ++output: 45786.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: 1752065 ++output: 1752065.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: 107 ++output: 107.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: -45667 ++output: -45667.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: -7 ++output: -7.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: -347856 ++output: -347856.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: -2147483648 ++output: -2147483648.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.w :: ++input: 268435455 ++output: 268435456.000000 ++fcsr: 0x1010000 ++roundig mode: near ++ffint.s.w :: ++input: 23 ++output: 23.000000 ++fcsr: 0 ++roundig mode: zero ++ffint.s.w :: ++input: 0 ++output: 0.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: 456 ++output: 456.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: 3 ++output: 3.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: -1 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: -1 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: 356 ++output: 356.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: 1000000000 ++output: 1000000000.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: -5786 ++output: -5786.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: 1752 ++output: 1752.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: 24575 ++output: 24575.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: 10 ++output: 10.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: -248562 ++output: -248562.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: -45786 ++output: -45786.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: 456 ++output: 456.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: 34 ++output: 34.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: 45786 ++output: 45786.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: 1752065 ++output: 1752065.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: 107 ++output: 107.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: -45667 ++output: -45667.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: -7 ++output: -7.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: -347856 ++output: -347856.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: -2147483648 ++output: -2147483648.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.w :: ++input: 268435455 ++output: 268435440.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++ffint.s.w :: ++input: 23 ++output: 23.000000 ++fcsr: 0x100 ++roundig mode: +inf ++ffint.s.w :: ++input: 0 ++output: 0.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: 456 ++output: 456.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: 3 ++output: 3.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: -1 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: -1 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: 356 ++output: 356.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: 1000000000 ++output: 1000000000.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: -5786 ++output: -5786.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: 1752 ++output: 1752.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: 24575 ++output: 24575.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: 10 ++output: 10.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: -248562 ++output: -248562.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: -45786 ++output: -45786.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: 456 ++output: 456.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: 34 ++output: 34.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: 45786 ++output: 45786.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: 1752065 ++output: 1752065.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: 107 ++output: 107.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: -45667 ++output: -45667.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: -7 ++output: -7.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: -347856 ++output: -347856.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: -2147483648 ++output: -2147483648.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.w :: ++input: 268435455 ++output: 268435456.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++ffint.s.w :: ++input: 23 ++output: 23.000000 ++fcsr: 0x200 ++roundig mode: -inf ++ffint.s.w :: ++input: 0 ++output: 0.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: 456 ++output: 456.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: 3 ++output: 3.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: -1 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: -1 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: 356 ++output: 356.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: 1000000000 ++output: 1000000000.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: -5786 ++output: -5786.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: 1752 ++output: 1752.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: 24575 ++output: 24575.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: 10 ++output: 10.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: -248562 ++output: -248562.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: -45786 ++output: -45786.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: 456 ++output: 456.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: 34 ++output: 34.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: 45786 ++output: 45786.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: 1752065 ++output: 1752065.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: 107 ++output: 107.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: -45667 ++output: -45667.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: -7 ++output: -7.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: -347856 ++output: -347856.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: -2147483648 ++output: -2147483648.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.w :: ++input: 268435455 ++output: 268435440.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++ffint.s.w :: ++input: 23 ++output: 23.000000 ++fcsr: 0x300 ++roundig mode: near ++ffint.s.l :: ++input: 18 ++output: 18.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: 25 ++output: 25.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: 3 ++output: 3.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: -1 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: 4294967295 ++output: 4294967296.000000 ++fcsr: 0x1010000 ++roundig mode: near ++ffint.s.l :: ++input: 356 ++output: 356.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: 1000000 ++output: 1000000.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: -5786 ++output: -5786.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: -1 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: 24575 ++output: 24575.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: 10 ++output: 10.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: -125458 ++output: -125458.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: -486 ++output: -486.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: 456 ++output: 456.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: 34 ++output: 34.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: 45786 ++output: 45786.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: 0 ++output: 0.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: 1700000 ++output: 1700000.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: -45667 ++output: -45667.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: -7 ++output: -7.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: -347856 ++output: -347856.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: 2147483648 ++output: 2147483648.000000 ++fcsr: 0 ++roundig mode: near ++ffint.s.l :: ++input: 268435455 ++output: 268435456.000000 ++fcsr: 0x1010000 ++roundig mode: near ++ffint.s.l :: ++input: 23 ++output: 23.000000 ++fcsr: 0 ++roundig mode: zero ++ffint.s.l :: ++input: 18 ++output: 18.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: 25 ++output: 25.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: 3 ++output: 3.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: -1 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: 4294967295 ++output: 4294967040.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++ffint.s.l :: ++input: 356 ++output: 356.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: 1000000 ++output: 1000000.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: -5786 ++output: -5786.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: -1 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: 24575 ++output: 24575.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: 10 ++output: 10.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: -125458 ++output: -125458.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: -486 ++output: -486.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: 456 ++output: 456.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: 34 ++output: 34.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: 45786 ++output: 45786.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: 0 ++output: 0.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: 1700000 ++output: 1700000.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: -45667 ++output: -45667.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: -7 ++output: -7.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: -347856 ++output: -347856.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: 2147483648 ++output: 2147483648.000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.s.l :: ++input: 268435455 ++output: 268435440.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++ffint.s.l :: ++input: 23 ++output: 23.000000 ++fcsr: 0x100 ++roundig mode: +inf ++ffint.s.l :: ++input: 18 ++output: 18.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: 25 ++output: 25.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: 3 ++output: 3.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: -1 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: 4294967295 ++output: 4294967296.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++ffint.s.l :: ++input: 356 ++output: 356.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: 1000000 ++output: 1000000.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: -5786 ++output: -5786.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: -1 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: 24575 ++output: 24575.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: 10 ++output: 10.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: -125458 ++output: -125458.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: -486 ++output: -486.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: 456 ++output: 456.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: 34 ++output: 34.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: 45786 ++output: 45786.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: 0 ++output: 0.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: 1700000 ++output: 1700000.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: -45667 ++output: -45667.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: -7 ++output: -7.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: -347856 ++output: -347856.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: 2147483648 ++output: 2147483648.000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.s.l :: ++input: 268435455 ++output: 268435456.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++ffint.s.l :: ++input: 23 ++output: 23.000000 ++fcsr: 0x200 ++roundig mode: -inf ++ffint.s.l :: ++input: 18 ++output: 18.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: 25 ++output: 25.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: 3 ++output: 3.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: -1 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: 4294967295 ++output: 4294967040.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++ffint.s.l :: ++input: 356 ++output: 356.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: 1000000 ++output: 1000000.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: -5786 ++output: -5786.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: -1 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: 24575 ++output: 24575.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: 10 ++output: 10.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: -125458 ++output: -125458.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: -486 ++output: -486.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: 456 ++output: 456.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: 34 ++output: 34.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: 45786 ++output: 45786.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: 0 ++output: 0.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: 1700000 ++output: 1700000.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: -45667 ++output: -45667.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: -7 ++output: -7.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: -347856 ++output: -347856.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: 2147483648 ++output: 2147483648.000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.s.l :: ++input: 268435455 ++output: 268435440.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++ffint.s.l :: ++input: 23 ++output: 23.000000 ++fcsr: 0x300 ++roundig mode: near ++ffint.d.w :: ++input: 0 ++output: 0.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: 456 ++output: 456.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: 3 ++output: 3.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: -1 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: -1 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: 356 ++output: 356.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: 1000000000 ++output: 1000000000.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: -5786 ++output: -5786.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: 1752 ++output: 1752.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: 24575 ++output: 24575.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: 10 ++output: 10.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: -248562 ++output: -248562.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: -45786 ++output: -45786.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: 456 ++output: 456.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: 34 ++output: 34.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: 45786 ++output: 45786.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: 1752065 ++output: 1752065.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: 107 ++output: 107.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: -45667 ++output: -45667.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: -7 ++output: -7.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: -347856 ++output: -347856.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: -2147483648 ++output: -2147483648.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: 268435455 ++output: 268435455.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.w :: ++input: 23 ++output: 23.000000000000000 ++fcsr: 0 ++roundig mode: zero ++ffint.d.w :: ++input: 0 ++output: 0.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: 456 ++output: 456.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: 3 ++output: 3.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: -1 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: -1 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: 356 ++output: 356.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: 1000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: -5786 ++output: -5786.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: 1752 ++output: 1752.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: 24575 ++output: 24575.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: 10 ++output: 10.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: -248562 ++output: -248562.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: -45786 ++output: -45786.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: 456 ++output: 456.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: 34 ++output: 34.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: 45786 ++output: 45786.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: 1752065 ++output: 1752065.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: 107 ++output: 107.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: -45667 ++output: -45667.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: -7 ++output: -7.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: -347856 ++output: -347856.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: -2147483648 ++output: -2147483648.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: 268435455 ++output: 268435455.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.w :: ++input: 23 ++output: 23.000000000000000 ++fcsr: 0x100 ++roundig mode: +inf ++ffint.d.w :: ++input: 0 ++output: 0.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: 456 ++output: 456.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: 3 ++output: 3.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: -1 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: -1 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: 356 ++output: 356.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: 1000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: -5786 ++output: -5786.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: 1752 ++output: 1752.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: 24575 ++output: 24575.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: 10 ++output: 10.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: -248562 ++output: -248562.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: -45786 ++output: -45786.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: 456 ++output: 456.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: 34 ++output: 34.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: 45786 ++output: 45786.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: 1752065 ++output: 1752065.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: 107 ++output: 107.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: -45667 ++output: -45667.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: -7 ++output: -7.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: -347856 ++output: -347856.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: -2147483648 ++output: -2147483648.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: 268435455 ++output: 268435455.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.w :: ++input: 23 ++output: 23.000000000000000 ++fcsr: 0x200 ++roundig mode: -inf ++ffint.d.w :: ++input: 0 ++output: 0.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: 456 ++output: 456.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: 3 ++output: 3.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: -1 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: -1 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: 356 ++output: 356.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: 1000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: -5786 ++output: -5786.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: 1752 ++output: 1752.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: 24575 ++output: 24575.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: 10 ++output: 10.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: -248562 ++output: -248562.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: -45786 ++output: -45786.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: 456 ++output: 456.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: 34 ++output: 34.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: 45786 ++output: 45786.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: 1752065 ++output: 1752065.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: 107 ++output: 107.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: -45667 ++output: -45667.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: -7 ++output: -7.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: -347856 ++output: -347856.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: -2147483648 ++output: -2147483648.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: 268435455 ++output: 268435455.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.w :: ++input: 23 ++output: 23.000000000000000 ++fcsr: 0x300 ++roundig mode: near ++ffint.d.l :: ++input: 18 ++output: 18.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: 25 ++output: 25.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: 3 ++output: 3.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: -1 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: 4294967295 ++output: 4294967295.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: 356 ++output: 356.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: 1000000 ++output: 1000000.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: -5786 ++output: -5786.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: -1 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: 24575 ++output: 24575.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: 10 ++output: 10.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: -125458 ++output: -125458.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: -486 ++output: -486.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: 456 ++output: 456.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: 34 ++output: 34.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: 45786 ++output: 45786.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: 0 ++output: 0.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: 1700000 ++output: 1700000.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: -45667 ++output: -45667.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: -7 ++output: -7.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: -347856 ++output: -347856.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: 2147483648 ++output: 2147483648.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: 268435455 ++output: 268435455.000000000000000 ++fcsr: 0 ++roundig mode: near ++ffint.d.l :: ++input: 23 ++output: 23.000000000000000 ++fcsr: 0 ++roundig mode: zero ++ffint.d.l :: ++input: 18 ++output: 18.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: 25 ++output: 25.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: 3 ++output: 3.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: -1 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: 4294967295 ++output: 4294967295.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: 356 ++output: 356.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: 1000000 ++output: 1000000.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: -5786 ++output: -5786.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: -1 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: 24575 ++output: 24575.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: 10 ++output: 10.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: -125458 ++output: -125458.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: -486 ++output: -486.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: 456 ++output: 456.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: 34 ++output: 34.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: 45786 ++output: 45786.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: 0 ++output: 0.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: 1700000 ++output: 1700000.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: -45667 ++output: -45667.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: -7 ++output: -7.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: -347856 ++output: -347856.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: 2147483648 ++output: 2147483648.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: 268435455 ++output: 268435455.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++ffint.d.l :: ++input: 23 ++output: 23.000000000000000 ++fcsr: 0x100 ++roundig mode: +inf ++ffint.d.l :: ++input: 18 ++output: 18.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: 25 ++output: 25.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: 3 ++output: 3.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: -1 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: 4294967295 ++output: 4294967295.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: 356 ++output: 356.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: 1000000 ++output: 1000000.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: -5786 ++output: -5786.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: -1 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: 24575 ++output: 24575.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: 10 ++output: 10.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: -125458 ++output: -125458.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: -486 ++output: -486.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: 456 ++output: 456.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: 34 ++output: 34.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: 45786 ++output: 45786.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: 0 ++output: 0.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: 1700000 ++output: 1700000.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: -45667 ++output: -45667.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: -7 ++output: -7.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: -347856 ++output: -347856.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: 2147483648 ++output: 2147483648.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: 268435455 ++output: 268435455.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++ffint.d.l :: ++input: 23 ++output: 23.000000000000000 ++fcsr: 0x200 ++roundig mode: -inf ++ffint.d.l :: ++input: 18 ++output: 18.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: 25 ++output: 25.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: 3 ++output: 3.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: -1 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: 4294967295 ++output: 4294967295.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: 356 ++output: 356.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: 1000000 ++output: 1000000.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: -5786 ++output: -5786.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: -1 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: 24575 ++output: 24575.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: 10 ++output: 10.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: -125458 ++output: -125458.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: -486 ++output: -486.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: 456 ++output: 456.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: 34 ++output: 34.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: 45786 ++output: 45786.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: 0 ++output: 0.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: 1700000 ++output: 1700000.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: -45667 ++output: -45667.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: -7 ++output: -7.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: -347856 ++output: -347856.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: 2147483648 ++output: 2147483648.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: 268435455 ++output: 268435455.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++ffint.d.l :: ++input: 23 ++output: 23.000000000000000 ++fcsr: 0x300 ++roundig mode: near ++frint.s :: ++input: 0.000000 ++output: 0.000000 ++fcsr: 0 ++roundig mode: near ++frint.s :: ++input: 456.250000 ++output: 456.000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.s :: ++input: 3.000000 ++output: 3.000000 ++fcsr: 0 ++roundig mode: near ++frint.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++frint.s :: ++input: 1384.500000 ++output: 1384.000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.s :: ++input: -7.250000 ++output: -7.000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.s :: ++input: 1000000000.000000 ++output: 1000000000.000000 ++fcsr: 0 ++roundig mode: near ++frint.s :: ++input: -5786.500000 ++output: -5786.000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.s :: ++input: 1752.000000 ++output: 1752.000000 ++fcsr: 0 ++roundig mode: near ++frint.s :: ++input: 0.015625 ++output: 0.000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.s :: ++input: 0.031250 ++output: 0.000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.s :: ++input: -248562.750000 ++output: -248563.000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.s :: ++input: -45786.500000 ++output: -45786.000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.s :: ++input: 456.000000 ++output: 456.000000 ++fcsr: 0 ++roundig mode: near ++frint.s :: ++input: 34.031250 ++output: 34.000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.s :: ++input: 45786.750000 ++output: 45787.000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.s :: ++input: 1752065.000000 ++output: 1752065.000000 ++fcsr: 0 ++roundig mode: near ++frint.s :: ++input: 107.000000 ++output: 107.000000 ++fcsr: 0 ++roundig mode: near ++frint.s :: ++input: -45667.250000 ++output: -45667.000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.s :: ++input: -7.000000 ++output: -7.000000 ++fcsr: 0 ++roundig mode: near ++frint.s :: ++input: -347856.500000 ++output: -347856.000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.s :: ++input: 356047.500000 ++output: 356048.000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++frint.s :: ++input: 23.062500 ++output: 23.000000 ++fcsr: 0x1010000 ++roundig mode: zero ++frint.s :: ++input: 0.000000 ++output: 0.000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.s :: ++input: 456.250000 ++output: 456.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.s :: ++input: 3.000000 ++output: 3.000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.s :: ++input: 1384.500000 ++output: 1384.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.s :: ++input: -7.250000 ++output: -7.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.s :: ++input: 1000000000.000000 ++output: 1000000000.000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.s :: ++input: -5786.500000 ++output: -5786.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.s :: ++input: 1752.000000 ++output: 1752.000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.s :: ++input: 0.015625 ++output: 0.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.s :: ++input: 0.031250 ++output: 0.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.s :: ++input: -248562.750000 ++output: -248562.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.s :: ++input: -45786.500000 ++output: -45786.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.s :: ++input: 456.000000 ++output: 456.000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.s :: ++input: 34.031250 ++output: 34.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.s :: ++input: 45786.750000 ++output: 45786.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.s :: ++input: 1752065.000000 ++output: 1752065.000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.s :: ++input: 107.000000 ++output: 107.000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.s :: ++input: -45667.250000 ++output: -45667.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.s :: ++input: -7.000000 ++output: -7.000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.s :: ++input: -347856.500000 ++output: -347856.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.s :: ++input: 356047.500000 ++output: 356047.000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.s :: ++input: 23.062500 ++output: 23.000000 ++fcsr: 0x1010100 ++roundig mode: +inf ++frint.s :: ++input: 0.000000 ++output: 0.000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.s :: ++input: 456.250000 ++output: 457.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.s :: ++input: 3.000000 ++output: 3.000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.s :: ++input: 1384.500000 ++output: 1385.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.s :: ++input: -7.250000 ++output: -7.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.s :: ++input: 1000000000.000000 ++output: 1000000000.000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.s :: ++input: -5786.500000 ++output: -5786.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.s :: ++input: 1752.000000 ++output: 1752.000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.s :: ++input: 0.015625 ++output: 1.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.s :: ++input: 0.031250 ++output: 1.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.s :: ++input: -248562.750000 ++output: -248562.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.s :: ++input: -45786.500000 ++output: -45786.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.s :: ++input: 456.000000 ++output: 456.000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.s :: ++input: 34.031250 ++output: 35.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.s :: ++input: 45786.750000 ++output: 45787.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.s :: ++input: 1752065.000000 ++output: 1752065.000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.s :: ++input: 107.000000 ++output: 107.000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.s :: ++input: -45667.250000 ++output: -45667.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.s :: ++input: -7.000000 ++output: -7.000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.s :: ++input: -347856.500000 ++output: -347856.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.s :: ++input: 356047.500000 ++output: 356048.000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.s :: ++input: 23.062500 ++output: 24.000000 ++fcsr: 0x1010200 ++roundig mode: -inf ++frint.s :: ++input: 0.000000 ++output: 0.000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.s :: ++input: 456.250000 ++output: 456.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.s :: ++input: 3.000000 ++output: 3.000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.s :: ++input: 1384.500000 ++output: 1384.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.s :: ++input: -7.250000 ++output: -8.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.s :: ++input: 1000000000.000000 ++output: 1000000000.000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.s :: ++input: -5786.500000 ++output: -5787.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.s :: ++input: 1752.000000 ++output: 1752.000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.s :: ++input: 0.015625 ++output: 0.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.s :: ++input: 0.031250 ++output: 0.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.s :: ++input: -248562.750000 ++output: -248563.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.s :: ++input: -45786.500000 ++output: -45787.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.s :: ++input: 456.000000 ++output: 456.000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.s :: ++input: 34.031250 ++output: 34.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.s :: ++input: 45786.750000 ++output: 45786.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.s :: ++input: 1752065.000000 ++output: 1752065.000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.s :: ++input: 107.000000 ++output: 107.000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.s :: ++input: -45667.250000 ++output: -45668.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.s :: ++input: -7.000000 ++output: -7.000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.s :: ++input: -347856.500000 ++output: -347857.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.s :: ++input: 356047.500000 ++output: 356047.000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.s :: ++input: 23.062500 ++output: 23.000000 ++fcsr: 0x1010300 ++roundig mode: near ++frint.d :: ++input: 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0 ++roundig mode: near ++frint.d :: ++input: 456.250000000000000 ++output: 456.000000000000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.d :: ++input: 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0 ++roundig mode: near ++frint.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++frint.d :: ++input: 1384.500000000000000 ++output: 1384.000000000000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.d :: ++input: -7.250000000000000 ++output: -7.000000000000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.d :: ++input: 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0 ++roundig mode: near ++frint.d :: ++input: -5786.500000000000000 ++output: -5786.000000000000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.d :: ++input: 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0 ++roundig mode: near ++frint.d :: ++input: 0.015625000000000 ++output: 0.000000000000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.d :: ++input: 0.031250000000000 ++output: 0.000000000000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.d :: ++input: -248562.750000000000000 ++output: -248563.000000000000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.d :: ++input: -45786.500000000000000 ++output: -45786.000000000000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.d :: ++input: 456.000000000000000 ++output: 456.000000000000000 ++fcsr: 0 ++roundig mode: near ++frint.d :: ++input: 34.031250000000000 ++output: 34.000000000000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.d :: ++input: 45786.750000000000000 ++output: 45787.000000000000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.d :: ++input: 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0 ++roundig mode: near ++frint.d :: ++input: 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0 ++roundig mode: near ++frint.d :: ++input: -45667.250000000000000 ++output: -45667.000000000000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.d :: ++input: -7.000000000000000 ++output: -7.000000000000000 ++fcsr: 0 ++roundig mode: near ++frint.d :: ++input: -347856.500000000000000 ++output: -347856.000000000000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.d :: ++input: 356047.500000000000000 ++output: 356048.000000000000000 ++fcsr: 0x1010000 ++roundig mode: near ++frint.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++frint.d :: ++input: 23.062500000000000 ++output: 23.000000000000000 ++fcsr: 0x1010000 ++roundig mode: zero ++frint.d :: ++input: 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.d :: ++input: 456.250000000000000 ++output: 456.000000000000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.d :: ++input: 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.d :: ++input: 1384.500000000000000 ++output: 1384.000000000000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.d :: ++input: -7.250000000000000 ++output: -7.000000000000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.d :: ++input: 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.d :: ++input: -5786.500000000000000 ++output: -5786.000000000000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.d :: ++input: 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.d :: ++input: 0.015625000000000 ++output: 0.000000000000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.d :: ++input: 0.031250000000000 ++output: 0.000000000000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.d :: ++input: -248562.750000000000000 ++output: -248562.000000000000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.d :: ++input: -45786.500000000000000 ++output: -45786.000000000000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.d :: ++input: 456.000000000000000 ++output: 456.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.d :: ++input: 34.031250000000000 ++output: 34.000000000000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.d :: ++input: 45786.750000000000000 ++output: 45786.000000000000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.d :: ++input: 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.d :: ++input: 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.d :: ++input: -45667.250000000000000 ++output: -45667.000000000000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.d :: ++input: -7.000000000000000 ++output: -7.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.d :: ++input: -347856.500000000000000 ++output: -347856.000000000000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.d :: ++input: 356047.500000000000000 ++output: 356047.000000000000000 ++fcsr: 0x1010100 ++roundig mode: zero ++frint.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++frint.d :: ++input: 23.062500000000000 ++output: 23.000000000000000 ++fcsr: 0x1010100 ++roundig mode: +inf ++frint.d :: ++input: 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.d :: ++input: 456.250000000000000 ++output: 457.000000000000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.d :: ++input: 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.d :: ++input: 1384.500000000000000 ++output: 1385.000000000000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.d :: ++input: -7.250000000000000 ++output: -7.000000000000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.d :: ++input: 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.d :: ++input: -5786.500000000000000 ++output: -5786.000000000000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.d :: ++input: 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.d :: ++input: 0.015625000000000 ++output: 1.000000000000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.d :: ++input: 0.031250000000000 ++output: 1.000000000000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.d :: ++input: -248562.750000000000000 ++output: -248562.000000000000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.d :: ++input: -45786.500000000000000 ++output: -45786.000000000000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.d :: ++input: 456.000000000000000 ++output: 456.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.d :: ++input: 34.031250000000000 ++output: 35.000000000000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.d :: ++input: 45786.750000000000000 ++output: 45787.000000000000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.d :: ++input: 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.d :: ++input: 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.d :: ++input: -45667.250000000000000 ++output: -45667.000000000000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.d :: ++input: -7.000000000000000 ++output: -7.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.d :: ++input: -347856.500000000000000 ++output: -347856.000000000000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.d :: ++input: 356047.500000000000000 ++output: 356048.000000000000000 ++fcsr: 0x1010200 ++roundig mode: +inf ++frint.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++frint.d :: ++input: 23.062500000000000 ++output: 24.000000000000000 ++fcsr: 0x1010200 ++roundig mode: -inf ++frint.d :: ++input: 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.d :: ++input: 456.250000000000000 ++output: 456.000000000000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.d :: ++input: 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.d :: ++input: 1384.500000000000000 ++output: 1384.000000000000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.d :: ++input: -7.250000000000000 ++output: -8.000000000000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.d :: ++input: 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.d :: ++input: -5786.500000000000000 ++output: -5787.000000000000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.d :: ++input: 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.d :: ++input: 0.015625000000000 ++output: 0.000000000000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.d :: ++input: 0.031250000000000 ++output: 0.000000000000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.d :: ++input: -248562.750000000000000 ++output: -248563.000000000000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.d :: ++input: -45786.500000000000000 ++output: -45787.000000000000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.d :: ++input: 456.000000000000000 ++output: 456.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.d :: ++input: 34.031250000000000 ++output: 34.000000000000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.d :: ++input: 45786.750000000000000 ++output: 45786.000000000000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.d :: ++input: 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.d :: ++input: 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.d :: ++input: -45667.250000000000000 ++output: -45668.000000000000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.d :: ++input: -7.000000000000000 ++output: -7.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.d :: ++input: -347856.500000000000000 ++output: -347857.000000000000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.d :: ++input: 356047.500000000000000 ++output: 356047.000000000000000 ++fcsr: 0x1010300 ++roundig mode: -inf ++frint.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++frint.d :: ++input: 23.062500000000000 ++output: 23.000000000000000 ++fcsr: 0x1010300 ++roundig mode: near ++fcmp.caf.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.caf.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.caf.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.caf.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.caf.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.caf.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.caf.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.caf.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.caf.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.caf.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.caf.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.caf.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.saf.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.saf.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.saf.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.saf.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.saf.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.saf.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.saf.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.saf.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.saf.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.saf.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.saf.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.saf.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.clt.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.clt.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.clt.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.clt.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.clt.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.clt.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.clt.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.clt.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.clt.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.clt.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.clt.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.clt.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.slt.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.slt.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.slt.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.slt.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.slt.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.slt.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.slt.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.slt.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.slt.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.slt.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.slt.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.slt.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.ceq.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.ceq.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.ceq.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.ceq.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.ceq.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.ceq.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.ceq.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.ceq.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.seq.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.seq.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.seq.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.seq.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.seq.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.seq.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.seq.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.seq.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.seq.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.seq.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.seq.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.seq.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.cle.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.cle.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.cle.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.cle.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.cle.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cle.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.cle.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cle.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.cle.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cle.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.cle.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cle.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.sle.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.sle.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.sle.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.sle.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.sle.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sle.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.sle.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sle.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.sle.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sle.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.sle.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sle.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.cun.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.cun.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.cun.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.cun.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.cun.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cun.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.cun.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cun.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.cun.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cun.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.cun.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cun.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.sun.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.sun.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.sun.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.sun.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.sun.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sun.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.sun.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sun.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.sun.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sun.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.sun.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sun.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.cult.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.cult.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.cult.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.cult.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.cult.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cult.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.cult.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cult.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.cult.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cult.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.cult.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cult.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.sult.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.sult.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.sult.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.sult.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.sult.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sult.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.sult.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sult.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.sult.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sult.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.sult.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sult.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.cueq.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.cueq.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.cueq.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cueq.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.cueq.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cueq.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cueq.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cueq.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.sueq.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.sueq.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: 3.000000 34.031250 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: -1.000000 4578.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: -7.250000 107.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: -5786.500000 -7.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: 0.015625 356.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: -248562.750000 23.062500 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: -45786.500000 0.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: 456.000000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: -45667.250000 100.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: -347856.500000 1752.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: -1.000000 0.031250 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.sueq.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sueq.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.sueq.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sueq.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sueq.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sueq.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.cule.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.cule.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.cule.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.cule.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.cule.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cule.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.cule.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cule.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.cule.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cule.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.cule.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cule.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.sule.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.sule.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.sule.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.sule.s :: ++input: 0.000000 -4578.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: 1384.500000 175.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: 1000000000.000000 -456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: 1752.000000 -3478.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: 0.031250 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: 34.031250 3.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: 45786.750000 -1.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: 1752065.000000 1384.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: 107.000000 -7.000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: -7.000000 -5786.500000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: 356047.500000 0.015625 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.s :: ++input: 23.062500 -248562.750000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.sule.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sule.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: zero ++fcmp.sule.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sule.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.sule.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sule.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.sule.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sule.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: near ++fcmp.cne.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0 ++roundig mode: zero ++fcmp.cne.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.cne.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.cne.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: near ++fcmp.cne.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cne.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: zero ++fcmp.cne.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cne.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.cne.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cne.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.cne.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cne.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: near ++fcmp.sne.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0 ++roundig mode: zero ++fcmp.sne.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.sne.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.sne.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: near ++fcmp.sne.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sne.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: zero ++fcmp.sne.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sne.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.sne.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sne.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.sne.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sne.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: near ++fcmp.cor.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0 ++roundig mode: zero ++fcmp.cor.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.cor.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.cor.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: near ++fcmp.cor.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cor.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: zero ++fcmp.cor.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cor.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.cor.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cor.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.cor.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cor.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: near ++fcmp.sor.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0 ++roundig mode: zero ++fcmp.sor.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.sor.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.sor.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: 456.250000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: near ++fcmp.sor.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sor.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: zero ++fcmp.sor.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sor.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.sor.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sor.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.sor.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sor.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: near ++fcmp.cune.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0 ++roundig mode: zero ++fcmp.cune.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.cune.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.cune.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: near ++fcmp.cune.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.cune.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: zero ++fcmp.cune.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.cune.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.cune.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.cune.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.cune.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.cune.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: near ++fcmp.sune.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0 ++roundig mode: zero ++fcmp.sune.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.sune.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.sune.s :: ++input: 0.000000 -4578.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: 456.250000 456.250000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: 3.000000 34.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: -1.000000 4578.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: 1384.500000 175.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: -7.250000 107.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: 1000000000.000000 -456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: -5786.500000 -7.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: 1752.000000 -3478.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: 0.015625 356.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: 0.031250 -1.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: -248562.750000 23.062500 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: -45786.500000 0.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: 456.000000 456.250000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: 34.031250 3.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: 45786.750000 -1.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: 1752065.000000 1384.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: 107.000000 -7.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: -45667.250000 100.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: -7.000000 -5786.500000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: -347856.500000 1752.000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: 356047.500000 0.015625 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: -1.000000 0.031250 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.s :: ++input: 23.062500 -248562.750000 ++output: 1 ++fcsr: 0x300 ++roundig mode: near ++fcmp.sune.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0 ++roundig mode: near ++fcmp.sune.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0 ++roundig mode: zero ++fcmp.sune.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: zero ++fcmp.sune.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0x100 ++roundig mode: +inf ++fcmp.sune.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: +inf ++fcmp.sune.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0x200 ++roundig mode: -inf ++fcmp.sune.d :: ++input: 0.000000000000000 -45786.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: 456.250000000000000 456.250000000000000 ++output: 0 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: 3.000000000000000 34.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: -1.000000000000000 45786.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: 1384.500000000000000 1752065.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: -7.250000000000000 107.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: 1000000000.000000000000000 -45667.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: -5786.500000000000000 -7.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: 1752.000000000000000 -347856.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: 0.015625000000000 356047.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: 0.031250000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: -248562.750000000000000 23.062500000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: -45786.500000000000000 0.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: 456.000000000000000 456.250000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: 34.031250000000000 3.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: 45786.750000000000000 -1.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: 1752065.000000000000000 1384.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: 107.000000000000000 -7.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: -45667.250000000000000 1000000000.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: -7.000000000000000 -5786.500000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: -347856.500000000000000 1752.000000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: 356047.500000000000000 0.015625000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: -1.000000000000000 0.031250000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: -inf ++fcmp.sune.d :: ++input: 23.062500000000000 -248562.750000000000000 ++output: 1 ++fcsr: 0x300 ++roundig mode: near ++fsel :: ++input: 0.000000000000000 -45786.500000000000000 0 ++output: 0.000000000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: 456.250000000000000 456.250000000000000 1 ++output: 456.250000000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: 3.000000000000000 34.031250000000000 0 ++output: 3.000000000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: -1.000000000000000 45786.750000000000000 1 ++output: 45786.750000000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: 1384.500000000000000 1752065.000000000000000 1 ++output: 1752065.000000000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: -7.250000000000000 107.000000000000000 0 ++output: -7.250000000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: 1000000000.000000000000000 -45667.250000000000000 1 ++output: -45667.250000000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: -5786.500000000000000 -7.250000000000000 0 ++output: -5786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: 1752.000000000000000 -347856.500000000000000 0 ++output: 1752.000000000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: 0.015625000000000 356047.500000000000000 0 ++output: 0.015625000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: 0.031250000000000 -1.000000000000000 1 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: -248562.750000000000000 23.062500000000000 1 ++output: 23.062500000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: -45786.500000000000000 0.000000000000000 1 ++output: 0.000000000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: 456.000000000000000 456.250000000000000 1 ++output: 456.250000000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: 34.031250000000000 3.000000000000000 0 ++output: 34.031250000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: 45786.750000000000000 -1.000000000000000 0 ++output: 45786.750000000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: 1752065.000000000000000 1384.500000000000000 0 ++output: 1752065.000000000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: 107.000000000000000 -7.000000000000000 0 ++output: 107.000000000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: -45667.250000000000000 1000000000.000000000000000 0 ++output: -45667.250000000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: -7.000000000000000 -5786.500000000000000 0 ++output: -7.000000000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: -347856.500000000000000 1752.000000000000000 1 ++output: 1752.000000000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: 356047.500000000000000 0.015625000000000 1 ++output: 0.015625000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: -1.000000000000000 0.031250000000000 1 ++output: 0.031250000000000 ++fcsr: 0 ++roundig mode: near ++fsel :: ++input: 23.062500000000000 -248562.750000000000000 1 ++output: -248562.750000000000000 ++fcsr: 0 ++roundig mode: zero ++fsel :: ++input: 0.000000000000000 -45786.500000000000000 0 ++output: 0.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: 456.250000000000000 456.250000000000000 1 ++output: 456.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: 3.000000000000000 34.031250000000000 0 ++output: 3.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: -1.000000000000000 45786.750000000000000 1 ++output: 45786.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: 1384.500000000000000 1752065.000000000000000 1 ++output: 1752065.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: -7.250000000000000 107.000000000000000 0 ++output: -7.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: 1000000000.000000000000000 -45667.250000000000000 1 ++output: -45667.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: -5786.500000000000000 -7.250000000000000 0 ++output: -5786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: 1752.000000000000000 -347856.500000000000000 0 ++output: 1752.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: 0.015625000000000 356047.500000000000000 0 ++output: 0.015625000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: 0.031250000000000 -1.000000000000000 1 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: -248562.750000000000000 23.062500000000000 1 ++output: 23.062500000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: -45786.500000000000000 0.000000000000000 1 ++output: 0.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: 456.000000000000000 456.250000000000000 1 ++output: 456.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: 34.031250000000000 3.000000000000000 0 ++output: 34.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: 45786.750000000000000 -1.000000000000000 0 ++output: 45786.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: 1752065.000000000000000 1384.500000000000000 0 ++output: 1752065.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: 107.000000000000000 -7.000000000000000 0 ++output: 107.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: -45667.250000000000000 1000000000.000000000000000 0 ++output: -45667.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: -7.000000000000000 -5786.500000000000000 0 ++output: -7.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: -347856.500000000000000 1752.000000000000000 1 ++output: 1752.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: 356047.500000000000000 0.015625000000000 1 ++output: 0.015625000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: -1.000000000000000 0.031250000000000 1 ++output: 0.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fsel :: ++input: 23.062500000000000 -248562.750000000000000 1 ++output: -248562.750000000000000 ++fcsr: 0x100 ++roundig mode: +inf ++fsel :: ++input: 0.000000000000000 -45786.500000000000000 0 ++output: 0.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: 456.250000000000000 456.250000000000000 1 ++output: 456.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: 3.000000000000000 34.031250000000000 0 ++output: 3.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: -1.000000000000000 45786.750000000000000 1 ++output: 45786.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: 1384.500000000000000 1752065.000000000000000 1 ++output: 1752065.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: -7.250000000000000 107.000000000000000 0 ++output: -7.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: 1000000000.000000000000000 -45667.250000000000000 1 ++output: -45667.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: -5786.500000000000000 -7.250000000000000 0 ++output: -5786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: 1752.000000000000000 -347856.500000000000000 0 ++output: 1752.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: 0.015625000000000 356047.500000000000000 0 ++output: 0.015625000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: 0.031250000000000 -1.000000000000000 1 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: -248562.750000000000000 23.062500000000000 1 ++output: 23.062500000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: -45786.500000000000000 0.000000000000000 1 ++output: 0.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: 456.000000000000000 456.250000000000000 1 ++output: 456.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: 34.031250000000000 3.000000000000000 0 ++output: 34.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: 45786.750000000000000 -1.000000000000000 0 ++output: 45786.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: 1752065.000000000000000 1384.500000000000000 0 ++output: 1752065.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: 107.000000000000000 -7.000000000000000 0 ++output: 107.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: -45667.250000000000000 1000000000.000000000000000 0 ++output: -45667.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: -7.000000000000000 -5786.500000000000000 0 ++output: -7.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: -347856.500000000000000 1752.000000000000000 1 ++output: 1752.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: 356047.500000000000000 0.015625000000000 1 ++output: 0.015625000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: -1.000000000000000 0.031250000000000 1 ++output: 0.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fsel :: ++input: 23.062500000000000 -248562.750000000000000 1 ++output: -248562.750000000000000 ++fcsr: 0x200 ++roundig mode: -inf ++fsel :: ++input: 0.000000000000000 -45786.500000000000000 0 ++output: 0.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: 456.250000000000000 456.250000000000000 1 ++output: 456.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: 3.000000000000000 34.031250000000000 0 ++output: 3.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: -1.000000000000000 45786.750000000000000 1 ++output: 45786.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: 1384.500000000000000 1752065.000000000000000 1 ++output: 1752065.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: -7.250000000000000 107.000000000000000 0 ++output: -7.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: 1000000000.000000000000000 -45667.250000000000000 1 ++output: -45667.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: -5786.500000000000000 -7.250000000000000 0 ++output: -5786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: 1752.000000000000000 -347856.500000000000000 0 ++output: 1752.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: 0.015625000000000 356047.500000000000000 0 ++output: 0.015625000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: 0.031250000000000 -1.000000000000000 1 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: -248562.750000000000000 23.062500000000000 1 ++output: 23.062500000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: -45786.500000000000000 0.000000000000000 1 ++output: 0.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: 456.000000000000000 456.250000000000000 1 ++output: 456.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: 34.031250000000000 3.000000000000000 0 ++output: 34.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: 45786.750000000000000 -1.000000000000000 0 ++output: 45786.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: 1752065.000000000000000 1384.500000000000000 0 ++output: 1752065.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: 107.000000000000000 -7.000000000000000 0 ++output: 107.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: -45667.250000000000000 1000000000.000000000000000 0 ++output: -45667.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: -7.000000000000000 -5786.500000000000000 0 ++output: -7.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: -347856.500000000000000 1752.000000000000000 1 ++output: 1752.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: 356047.500000000000000 0.015625000000000 1 ++output: 0.015625000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: -1.000000000000000 0.031250000000000 1 ++output: 0.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fsel :: ++input: 23.062500000000000 -248562.750000000000000 1 ++output: -248562.750000000000000 ++fcsr: 0x300 ++roundig mode: near ++fmov.s :: ++input: 0.000000 ++output: 0.000000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: 456.250000 ++output: 456.250000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: 3.000000 ++output: 3.000000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: 1384.500000 ++output: 1384.500000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: -7.250000 ++output: -7.250000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: 1000000000.000000 ++output: 1000000000.000000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: -5786.500000 ++output: -5786.500000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: 1752.000000 ++output: 1752.000000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: 0.015625 ++output: 0.015625 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: 0.031250 ++output: 0.031250 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: -248562.750000 ++output: -248562.750000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: -45786.500000 ++output: -45786.500000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: 456.000000 ++output: 456.000000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: 34.031250 ++output: 34.031250 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: 45786.750000 ++output: 45786.750000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: 1752065.000000 ++output: 1752065.000000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: 107.000000 ++output: 107.000000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: -45667.250000 ++output: -45667.250000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: -7.000000 ++output: -7.000000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: -347856.500000 ++output: -347856.500000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: 356047.500000 ++output: 356047.500000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0 ++roundig mode: near ++fmov.s :: ++input: 23.062500 ++output: 23.062500 ++fcsr: 0 ++roundig mode: zero ++fmov.s :: ++input: 0.000000 ++output: 0.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: 456.250000 ++output: 456.250000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: 3.000000 ++output: 3.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: 1384.500000 ++output: 1384.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: -7.250000 ++output: -7.250000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: 1000000000.000000 ++output: 1000000000.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: -5786.500000 ++output: -5786.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: 1752.000000 ++output: 1752.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: 0.015625 ++output: 0.015625 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: 0.031250 ++output: 0.031250 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: -248562.750000 ++output: -248562.750000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: -45786.500000 ++output: -45786.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: 456.000000 ++output: 456.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: 34.031250 ++output: 34.031250 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: 45786.750000 ++output: 45786.750000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: 1752065.000000 ++output: 1752065.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: 107.000000 ++output: 107.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: -45667.250000 ++output: -45667.250000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: -7.000000 ++output: -7.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: -347856.500000 ++output: -347856.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: 356047.500000 ++output: 356047.500000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.s :: ++input: 23.062500 ++output: 23.062500 ++fcsr: 0x100 ++roundig mode: +inf ++fmov.s :: ++input: 0.000000 ++output: 0.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: 456.250000 ++output: 456.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: 3.000000 ++output: 3.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: 1384.500000 ++output: 1384.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: -7.250000 ++output: -7.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: 1000000000.000000 ++output: 1000000000.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: -5786.500000 ++output: -5786.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: 1752.000000 ++output: 1752.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: 0.015625 ++output: 0.015625 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: 0.031250 ++output: 0.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: -248562.750000 ++output: -248562.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: -45786.500000 ++output: -45786.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: 456.000000 ++output: 456.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: 34.031250 ++output: 34.031250 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: 45786.750000 ++output: 45786.750000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: 1752065.000000 ++output: 1752065.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: 107.000000 ++output: 107.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: -45667.250000 ++output: -45667.250000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: -7.000000 ++output: -7.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: -347856.500000 ++output: -347856.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: 356047.500000 ++output: 356047.500000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.s :: ++input: 23.062500 ++output: 23.062500 ++fcsr: 0x200 ++roundig mode: -inf ++fmov.s :: ++input: 0.000000 ++output: 0.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: 456.250000 ++output: 456.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: 3.000000 ++output: 3.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: 1384.500000 ++output: 1384.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: -7.250000 ++output: -7.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: 1000000000.000000 ++output: 1000000000.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: -5786.500000 ++output: -5786.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: 1752.000000 ++output: 1752.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: 0.015625 ++output: 0.015625 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: 0.031250 ++output: 0.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: -248562.750000 ++output: -248562.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: -45786.500000 ++output: -45786.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: 456.000000 ++output: 456.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: 34.031250 ++output: 34.031250 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: 45786.750000 ++output: 45786.750000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: 1752065.000000 ++output: 1752065.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: 107.000000 ++output: 107.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: -45667.250000 ++output: -45667.250000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: -7.000000 ++output: -7.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: -347856.500000 ++output: -347856.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: 356047.500000 ++output: 356047.500000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: -1.000000 ++output: -1.000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.s :: ++input: 23.062500 ++output: 23.062500 ++fcsr: 0x300 ++roundig mode: near ++fmov.d :: ++input: 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: 1384.500000000000000 ++output: 1384.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: -7.250000000000000 ++output: -7.250000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: -5786.500000000000000 ++output: -5786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: 0.015625000000000 ++output: 0.015625000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: 0.031250000000000 ++output: 0.031250000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: -248562.750000000000000 ++output: -248562.750000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: -45786.500000000000000 ++output: -45786.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: 456.000000000000000 ++output: 456.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: 34.031250000000000 ++output: 34.031250000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: 45786.750000000000000 ++output: 45786.750000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: -45667.250000000000000 ++output: -45667.250000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: -7.000000000000000 ++output: -7.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: 356047.500000000000000 ++output: 356047.500000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0 ++roundig mode: near ++fmov.d :: ++input: 23.062500000000000 ++output: 23.062500000000000 ++fcsr: 0 ++roundig mode: zero ++fmov.d :: ++input: 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: 1384.500000000000000 ++output: 1384.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: -7.250000000000000 ++output: -7.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: -5786.500000000000000 ++output: -5786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: 0.015625000000000 ++output: 0.015625000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: 0.031250000000000 ++output: 0.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: -248562.750000000000000 ++output: -248562.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: -45786.500000000000000 ++output: -45786.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: 456.000000000000000 ++output: 456.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: 34.031250000000000 ++output: 34.031250000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: 45786.750000000000000 ++output: 45786.750000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: -45667.250000000000000 ++output: -45667.250000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: -7.000000000000000 ++output: -7.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: 356047.500000000000000 ++output: 356047.500000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x100 ++roundig mode: zero ++fmov.d :: ++input: 23.062500000000000 ++output: 23.062500000000000 ++fcsr: 0x100 ++roundig mode: +inf ++fmov.d :: ++input: 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: 1384.500000000000000 ++output: 1384.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: -7.250000000000000 ++output: -7.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: -5786.500000000000000 ++output: -5786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: 0.015625000000000 ++output: 0.015625000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: 0.031250000000000 ++output: 0.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: -248562.750000000000000 ++output: -248562.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: -45786.500000000000000 ++output: -45786.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: 456.000000000000000 ++output: 456.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: 34.031250000000000 ++output: 34.031250000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: 45786.750000000000000 ++output: 45786.750000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: -45667.250000000000000 ++output: -45667.250000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: -7.000000000000000 ++output: -7.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: 356047.500000000000000 ++output: 356047.500000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x200 ++roundig mode: +inf ++fmov.d :: ++input: 23.062500000000000 ++output: 23.062500000000000 ++fcsr: 0x200 ++roundig mode: -inf ++fmov.d :: ++input: 0.000000000000000 ++output: 0.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: 456.250000000000000 ++output: 456.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: 3.000000000000000 ++output: 3.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: 1384.500000000000000 ++output: 1384.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: -7.250000000000000 ++output: -7.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: 1000000000.000000000000000 ++output: 1000000000.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: -5786.500000000000000 ++output: -5786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: 1752.000000000000000 ++output: 1752.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: 0.015625000000000 ++output: 0.015625000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: 0.031250000000000 ++output: 0.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: -248562.750000000000000 ++output: -248562.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: -45786.500000000000000 ++output: -45786.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: 456.000000000000000 ++output: 456.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: 34.031250000000000 ++output: 34.031250000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: 45786.750000000000000 ++output: 45786.750000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: 1752065.000000000000000 ++output: 1752065.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: 107.000000000000000 ++output: 107.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: -45667.250000000000000 ++output: -45667.250000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: -7.000000000000000 ++output: -7.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: -347856.500000000000000 ++output: -347856.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: 356047.500000000000000 ++output: 356047.500000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: -1.000000000000000 ++output: -1.000000000000000 ++fcsr: 0x300 ++roundig mode: -inf ++fmov.d :: ++input: 23.062500000000000 ++output: 23.062500000000000 ++fcsr: 0x300 +diff --git a/none/tests/loongarch64/float.vgtest b/none/tests/loongarch64/float.vgtest +new file mode 100644 +index 000000000..e65d9699b +--- /dev/null ++++ b/none/tests/loongarch64/float.vgtest +@@ -0,0 +1,3 @@ ++prereq: ../../../tests/loongarch64_features fpu ++prog: float ++vgopts: -q +diff --git a/none/tests/loongarch64/integer.c b/none/tests/loongarch64/integer.c +new file mode 100644 +index 000000000..369d6285d +--- /dev/null ++++ b/none/tests/loongarch64/integer.c +@@ -0,0 +1,1311 @@ ++#include ++#include ++ ++typedef enum { ++ SA2, SA2_1 /* for alsl */, SA3, ++ MSBW, LSBW, MSBD, LSBD, ++ UI5, UI6, UI12, ++ SI12, SI14, SI16, SI20 ++} imm_t; ++ ++static inline void showImm (unsigned int i, imm_t ty) ++{ ++ switch (ty) { ++ case SA2: ++ assert(i < (1 << 2)); ++ break; ++ case SA2_1: ++ assert(i < (1 << 3)); ++ break; ++ case SA3: ++ assert(i < (1 << 3)); ++ break; ++ case MSBW: ++ assert(i < (1 << 5)); ++ break; ++ case LSBW: ++ assert(i < (1 << 5)); ++ break; ++ case MSBD: ++ assert(i < (1 << 6)); ++ break; ++ case LSBD: ++ assert(i < (1 << 6)); ++ break; ++ case UI5: ++ assert(i < (1 << 5)); ++ break; ++ case UI6: ++ assert(i < (1 << 6)); ++ break; ++ case UI12: ++ assert(i < (1 << 12)); ++ break; ++ case SI12: ++ assert(i < (1 << 12) || (i >> 12) == 0xfffff); ++ break; ++ case SI14: ++ assert(i < (1 << 14) || (i >> 14) == 0x3ffff); ++ break; ++ case SI16: ++ assert(i < (1 << 16) || (i >> 16) == 0xffff); ++ break; ++ case SI20: ++ assert(i < (1 << 20) || (i >> 20) == 0xfff); ++ break; ++ default: ++ assert(0); ++ break; ++ } ++ printf("%d", i); ++} ++ ++#define TESTINST_RR(insn, rd, rj, v1, v2) \ ++ { \ ++ unsigned long res1, res2; \ ++ unsigned long val1 = (unsigned long)v1; \ ++ unsigned long val2 = (unsigned long)v2; \ ++ __asm__ __volatile__( \ ++ "move " rd ", %2 \n\t" \ ++ "move " rj ", %3 \n\t" \ ++ insn " " rd ", " rj " \n\t" \ ++ "move %0, " rd " \n\t" \ ++ "move %1, " rj " \n\t" \ ++ : "=r" (res1), "=r" (res2) \ ++ : "r" (val1), "r" (val2) \ ++ : rd, rj, "memory"); \ ++ printf("%s %s, %s ::\n", insn, rd, rj); \ ++ printf("before: %s=%#018lx, %s=%#018lx\n", \ ++ rd, val1, rj, val2); \ ++ printf("after: %s=%#018lx, %s=%#018lx\n", \ ++ rd, res1, rj, res2); \ ++ } ++ ++#define TESTINST_RI(insn, rd, type, v1, imm) \ ++ { \ ++ unsigned long res1; \ ++ unsigned long val1 = (unsigned long)v1; \ ++ __asm__ __volatile__( \ ++ "move " rd ", %1 \n\t" \ ++ insn " " rd ", " #imm "\n\t" \ ++ "move %0, " rd " \n\t" \ ++ : "=r" (res1) \ ++ : "r" (val1) \ ++ : rd, "memory"); \ ++ printf("%s %s, ", insn, rd); \ ++ showImm(imm, type); \ ++ printf(" ::\n"); \ ++ printf("before: %s=%#018lx\n", rd, val1); \ ++ printf("after: %s=%#018lx\n", rd, res1); \ ++ } ++ ++#define TESTINST_RRRI(insn, rd, rj, rk, type, v1, v2, v3, imm) \ ++ { \ ++ unsigned long res1, res2, res3; \ ++ unsigned long val1 = (unsigned long)v1; \ ++ unsigned long val2 = (unsigned long)v2; \ ++ unsigned long val3 = (unsigned long)v3; \ ++ __asm__ __volatile__( \ ++ "move " rd ", %3 \n\t" \ ++ "move " rj ", %4 \n\t" \ ++ "move " rk ", %5 \n\t" \ ++ insn " " rd ", " rj ", " rk ", " #imm "\n\t" \ ++ "move %0, " rd " \n\t" \ ++ "move %1, " rj " \n\t" \ ++ "move %2, " rk " \n\t" \ ++ : "=r" (res1), "=r" (res2), "=r" (res3) \ ++ : "r" (val1), "r" (val2), "r" (val3) \ ++ : rd, rj, rk, "memory"); \ ++ printf("%s %s, %s, %s, ", insn, rd, rj, rk); \ ++ showImm(imm, type); \ ++ printf(" ::\n"); \ ++ printf("before: %s=%#018lx, %s=%#018lx, %s=%#018lx\n", \ ++ rd, val1, rj, val2, rk, val3); \ ++ printf("after: %s=%#018lx, %s=%#018lx, %s=%#018lx\n", \ ++ rd, res1, rj, res2, rk, res3); \ ++ } ++ ++#define TESTINST_RRR(insn, rd, rj, rk, v1, v2, v3) \ ++ { \ ++ unsigned long res1, res2, res3; \ ++ unsigned long val1 = (unsigned long)v1; \ ++ unsigned long val2 = (unsigned long)v2; \ ++ unsigned long val3 = (unsigned long)v3; \ ++ __asm__ __volatile__( \ ++ "move " rd ", %3 \n\t" \ ++ "move " rj ", %4 \n\t" \ ++ "move " rk ", %5 \n\t" \ ++ insn " " rd ", " rj ", " rk "\n\t" \ ++ "move %0, " rd " \n\t" \ ++ "move %1, " rj " \n\t" \ ++ "move %2, " rk " \n\t" \ ++ : "=r" (res1), "=r" (res2), "=r" (res3) \ ++ : "r" (val1), "r" (val2), "r" (val3) \ ++ : rd, rj, rk, "memory"); \ ++ printf("%s %s, %s, %s ::\n", insn, rd, rj, rk); \ ++ printf("before: %s=%#018lx, %s=%#018lx, %s=%#018lx\n", \ ++ rd, val1, rj, val2, rk, val3); \ ++ printf("after: %s=%#018lx, %s=%#018lx, %s=%#018lx\n", \ ++ rd, res1, rj, res2, rk, res3); \ ++ } ++ ++#define TESTINST_RRI(insn, rd, rj, type, v1, v2, imm) \ ++ { \ ++ unsigned long res1, res2; \ ++ unsigned long val1 = (unsigned long)v1; \ ++ unsigned long val2 = (unsigned long)v2; \ ++ __asm__ __volatile__( \ ++ "move " rd ", %2 \n\t" \ ++ "move " rj ", %3 \n\t" \ ++ insn " " rd ", " rj ", " #imm "\n\t" \ ++ "move %0, " rd " \n\t" \ ++ "move %1, " rj " \n\t" \ ++ : "=r" (res1), "=r" (res2) \ ++ : "r" (val1), "r" (val2) \ ++ : rd, rj, "memory"); \ ++ printf("%s %s, %s, ", insn, rd, rj); \ ++ showImm(imm, type); \ ++ printf(" ::\n"); \ ++ printf("before: %s=%#018lx, %s=%#018lx\n", \ ++ rd, val1, rj, val2); \ ++ printf("after: %s=%#018lx, %s=%#018lx\n", \ ++ rd, res1, rj, res2); \ ++ } ++ ++#define TESTINST_RRII(insn, rd, rj, type1, type2, v1, v2, imm1, imm2) \ ++ { \ ++ unsigned long res1, res2; \ ++ unsigned long val1 = (unsigned long)v1; \ ++ unsigned long val2 = (unsigned long)v2; \ ++ __asm__ __volatile__( \ ++ "move " rd ", %2 \n\t" \ ++ "move " rj ", %3 \n\t" \ ++ insn " " rd ", " rj ", " #imm1 ", " #imm2 "\n\t" \ ++ "move %0, " rd " \n\t" \ ++ "move %1, " rj " \n\t" \ ++ : "=r" (res1), "=r" (res2) \ ++ : "r" (val1), "r" (val2) \ ++ : rd, rj, "memory"); \ ++ printf("%s %s, %s, ", insn, rd, rj); \ ++ showImm(imm1, type1); \ ++ printf(", "); \ ++ showImm(imm2, type2); \ ++ printf(" ::\n"); \ ++ printf("before: %s=%#018lx, %s=%#018lx\n", \ ++ rd, val1, rj, val2); \ ++ printf("after: %s=%#018lx, %s=%#018lx\n", \ ++ rd, res1, rj, res2); \ ++ } ++ ++void test(void) ++{ ++ /* ---------------- add.w rd, rj, rk ---------------- */ ++ TESTINST_RRR("add.w", "$r19", "$r20", "$r25", 0xf7f01ffbc9696094UL, 0xb664b1ce21c8c7fcUL, 0xd0a02b79ace85cfUL); ++ TESTINST_RRR("add.w", "$r29", "$r9", "$r12", 0x5418cd6f6b640953UL, 0x6465907ca2dac58cUL, 0xefea76d0d526df3aUL); ++ TESTINST_RRR("add.w", "$r23", "$r15", "$r28", 0x6ae34fbc6f2f7a9aUL, 0xbf21c48ab5c2edccUL, 0x24824ebd458ed20eUL); ++ TESTINST_RRR("add.w", "$r27", "$r14", "$r26", 0x9f33e38db05616ccUL, 0xf12ee0c276c52c78UL, 0xc3054d65ecec3fe6UL); ++ TESTINST_RRR("add.w", "$r14", "$r23", "$r27", 0x17eaa07c4607901fUL, 0xa5fa9d0c8472848eUL, 0xa34301227bb57f76UL); ++ TESTINST_RRR("add.w", "$r19", "$r19", "$r4", 0xd2e0644d9532b5eaUL, 0x2957c6f0638238bcUL, 0xf01566d0031ee917UL); ++ TESTINST_RRR("add.w", "$r19", "$r26", "$r13", 0x7b39b3f2ccbdaf79UL, 0xee877221beef9d45UL, 0x4a743034eefe075dUL); ++ TESTINST_RRR("add.w", "$r29", "$r18", "$r14", 0x95214c4de7e6d3baUL, 0x26502eb481799cd1UL, 0x34d57b775083fb91UL); ++ TESTINST_RRR("add.w", "$r16", "$r26", "$r8", 0xb66b18865bbb3036UL, 0x8881ccbe1e31aa8dUL, 0xffe0d2dde8325edcUL); ++ TESTINST_RRR("add.w", "$r26", "$r5", "$r8", 0xc367af71c905540cUL, 0xcdcbe4860d983fe3UL, 0x6687aa19ee1fc503UL); ++ ++ /* ---------------- add.d rd, rj, rk ---------------- */ ++ TESTINST_RRR("add.d", "$r16", "$r18", "$r8", 0xbe5505b409ce995cUL, 0x561a85fd57e87226UL, 0x923f3293987edab0UL); ++ TESTINST_RRR("add.d", "$r12", "$r7", "$r29", 0xff2682151edc3476UL, 0x90beb037eacfe3dbUL, 0xa4017082880f1151UL); ++ TESTINST_RRR("add.d", "$r31", "$r31", "$r5", 0x81e38385e39d9f16UL, 0xedb2ffa50c0c8b5fUL, 0x8776f30d75fc97c2UL); ++ TESTINST_RRR("add.d", "$r31", "$r6", "$r26", 0x64ff385d97b60dc2UL, 0x80f903f206f08f60UL, 0x4f5b589532e85398UL); ++ TESTINST_RRR("add.d", "$r25", "$r10", "$r20", 0xdd8973d6b99634caUL, 0x34c0fe5a72dd43d9UL, 0x2494af03cf5878e7UL); ++ TESTINST_RRR("add.d", "$r5", "$r10", "$r4", 0x94b272b05ffe39c8UL, 0x152d15efbbc54c04UL, 0x25afc06cf151ab29UL); ++ TESTINST_RRR("add.d", "$r19", "$r30", "$r18", 0xa6e14d42459cadf6UL, 0x558620ff616141b1UL, 0x1978905697120747UL); ++ TESTINST_RRR("add.d", "$r7", "$r8", "$r20", 0x2ea6f88031a29aeUL, 0x6a08c12301e00d49UL, 0xdd533acf17f59142UL); ++ TESTINST_RRR("add.d", "$r24", "$r14", "$r26", 0xb88df6b8315eb7a6UL, 0x137d04f7f6fe285UL, 0x2ccb253ff7ea93d6UL); ++ TESTINST_RRR("add.d", "$r7", "$r19", "$r23", 0xad464722c0967f28UL, 0x30295c1fd85ae029UL, 0x2c69edb227e01d94UL); ++ ++ /* ---------------- sub.w rd, rj, rk ---------------- */ ++ TESTINST_RRR("sub.w", "$r16", "$r28", "$r17", 0x8b0ba4ef20207fddUL, 0x90493cb39ff734a2UL, 0x519842bab5cc1208UL); ++ TESTINST_RRR("sub.w", "$r6", "$r13", "$r15", 0x13af983aafc53691UL, 0x27bc6a037865e47fUL, 0xe20df003930575d5UL); ++ TESTINST_RRR("sub.w", "$r8", "$r19", "$r23", 0x4177aec74585d42dUL, 0xba89b6aa9b7728acUL, 0xe6a089b8eaf43feUL); ++ TESTINST_RRR("sub.w", "$r7", "$r10", "$r23", 0xca1b83a7ab88912UL, 0xd5e2759ea82c2c80UL, 0x76e9d6f88c2624ffUL); ++ TESTINST_RRR("sub.w", "$r19", "$r24", "$r24", 0x99d63505ea0474b3UL, 0x1b53c4c34957af8eUL, 0x6146da47b731d3edUL); ++ TESTINST_RRR("sub.w", "$r26", "$r31", "$r7", 0x8eca560d8234ff55UL, 0x5beb18985c3f451eUL, 0x9c9634dfaa7b9313UL); ++ TESTINST_RRR("sub.w", "$r29", "$r16", "$r6", 0x229544d2cb1d5a64UL, 0xd23751d515597128UL, 0xa09dd29330aa8d15UL); ++ TESTINST_RRR("sub.w", "$r12", "$r16", "$r4", 0x229f5aefe9fb7fb7UL, 0x740ed49b5e95faeUL, 0xbc6304a0df442807UL); ++ TESTINST_RRR("sub.w", "$r30", "$r29", "$r26", 0x94f3a67d188df281UL, 0x48e066cdad20ac2UL, 0x1e032e60568554a7UL); ++ TESTINST_RRR("sub.w", "$r18", "$r23", "$r25", 0xedb4f44fb338ba4fUL, 0xf06e698cd08c8e7bUL, 0xa22b91e88b77d4d8UL); ++ ++ /* ---------------- sub.d rd, rj, rk ---------------- */ ++ TESTINST_RRR("sub.d", "$r18", "$r10", "$r27", 0x68647aa06a23c8f9UL, 0xd001cb46cb78fc4fUL, 0x460cc8702b1761f9UL); ++ TESTINST_RRR("sub.d", "$r7", "$r24", "$r18", 0x8d18e952fb747f43UL, 0x1e7d1a019fb96490UL, 0xb466fb9891e8c151UL); ++ TESTINST_RRR("sub.d", "$r4", "$r16", "$r27", 0x5f6647277ca4c99dUL, 0xa1156b863ec98e1dUL, 0xc15612f3ce819d64UL); ++ TESTINST_RRR("sub.d", "$r4", "$r25", "$r9", 0xe67b33778df480b4UL, 0xc24b2711be7e4ef1UL, 0xd940ca25b956100fUL); ++ TESTINST_RRR("sub.d", "$r5", "$r12", "$r18", 0x258ae461ef798ce7UL, 0x3f4984ea3f5692deUL, 0x99fa673f30e69019UL); ++ TESTINST_RRR("sub.d", "$r13", "$r10", "$r9", 0xdafb48debea5211eUL, 0xeac1d3b25f6bf8dbUL, 0x297d671b1c96e48fUL); ++ TESTINST_RRR("sub.d", "$r7", "$r15", "$r23", 0xc6b03274ff37baf6UL, 0x5b37ffc2c84aec9UL, 0x74d62a52cbaaec15UL); ++ TESTINST_RRR("sub.d", "$r26", "$r18", "$r26", 0x35c71e0956ffcd43UL, 0xad703a4e8078070bUL, 0x634924e8a9fdbb9eUL); ++ TESTINST_RRR("sub.d", "$r16", "$r29", "$r5", 0x18bf961cba922928UL, 0x54ed9198405f8983UL, 0x977f5b65e5f86b4aUL); ++ TESTINST_RRR("sub.d", "$r31", "$r28", "$r14", 0xa38a1e8cb3c7ba00UL, 0xd220d1ef3cf8f3f7UL, 0xc972df2ace170d61UL); ++ ++ /* ---------------- slt rd, rj, rk ---------------- */ ++ TESTINST_RRR("slt", "$r12", "$r17", "$r18", 0xd7a0e65c279e1082UL, 0x819edf00a849ba44UL, 0x41a0b2fe37d44db2UL); ++ TESTINST_RRR("slt", "$r31", "$r13", "$r18", 0x2ef00a5cfd100f71UL, 0x4792cd9f9abf36d3UL, 0x2c117902110ef9a8UL); ++ TESTINST_RRR("slt", "$r4", "$r30", "$r29", 0x6d8be2fb73e2c006UL, 0xf76ce97d7658995eUL, 0x3856e09bfe39df6eUL); ++ TESTINST_RRR("slt", "$r4", "$r18", "$r10", 0xeddcb9dcf092c3f5UL, 0xe57b7c25d13dea8UL, 0x761d86b48cb5ce21UL); ++ TESTINST_RRR("slt", "$r16", "$r18", "$r16", 0xcddd92e2340cd593UL, 0xc9a30f4707743f80UL, 0x3ff7d36f17396d3aUL); ++ TESTINST_RRR("slt", "$r6", "$r14", "$r10", 0xa9e71c6376093499UL, 0x26bb3955b588461fUL, 0xfae7e7a950447826UL); ++ TESTINST_RRR("slt", "$r19", "$r4", "$r17", 0x35bb27f64ebd7d62UL, 0x4a7d3941ebf88bc1UL, 0xcda32e4b1c1d5c4UL); ++ TESTINST_RRR("slt", "$r19", "$r28", "$r15", 0x29419b8261e40b99UL, 0xe7e9b059033afa7dUL, 0x1ea916293b1cc3ddUL); ++ TESTINST_RRR("slt", "$r31", "$r16", "$r16", 0xe0fb75047bc62c9aUL, 0xa634f6174dcced7dUL, 0xcca5a9d25b670e70UL); ++ TESTINST_RRR("slt", "$r4", "$r4", "$r10", 0x724ee03fb3fcdec8UL, 0xae2587f097065e2cUL, 0x65c69548f83dd0dfUL); ++ ++ /* ---------------- sltu rd, rj, rk ---------------- */ ++ TESTINST_RRR("sltu", "$r14", "$r10", "$r13", 0x1956e5498db3fb6eUL, 0x2d909abfec4490bdUL, 0xa7d554ebe591d5ccUL); ++ TESTINST_RRR("sltu", "$r6", "$r5", "$r18", 0xc34214447a064eb8UL, 0xad4413e45f0a226aUL, 0x4b09aab500b04bffUL); ++ TESTINST_RRR("sltu", "$r31", "$r17", "$r17", 0x86e16a1618a639c4UL, 0x87917b281cef8df0UL, 0xd543115a56dee48UL); ++ TESTINST_RRR("sltu", "$r20", "$r6", "$r25", 0x164fff47b8b23752UL, 0x9ad830d46b1660f6UL, 0xc5d72c146f4aba72UL); ++ TESTINST_RRR("sltu", "$r6", "$r26", "$r7", 0x1428360430b7c9b5UL, 0xc2052dc6eea5a53cUL, 0xda1a8e35dd060adfUL); ++ TESTINST_RRR("sltu", "$r19", "$r15", "$r26", 0xdfc9984966167604UL, 0xa9ea12b5a37dd492UL, 0x7a24be9fcf349afcUL); ++ TESTINST_RRR("sltu", "$r29", "$r26", "$r29", 0x5a3822db2cc26fc5UL, 0x5985f02e77511d80UL, 0x370f15cc98f2a6c1UL); ++ TESTINST_RRR("sltu", "$r7", "$r28", "$r16", 0xe4594ee2cc8c6d7UL, 0x177ac0014f5dd20UL, 0xde1724c7590a4908UL); ++ TESTINST_RRR("sltu", "$r8", "$r12", "$r4", 0x1df979e50aa0ed18UL, 0x5b410cd0985fce18UL, 0x9d3c39d61e29025dUL); ++ TESTINST_RRR("sltu", "$r30", "$r23", "$r25", 0x1cba022788d49d13UL, 0xd2b40941478ee865UL, 0xa503a74e41535830UL); ++ ++ /* ---------------- slti rd, rj, si12 ---------------- */ ++ TESTINST_RRI("slti", "$r15", "$r27", SI12, 0xe24c4ca567d1d5f4UL, 0xfef05a88adf4b892UL, 1913); ++ TESTINST_RRI("slti", "$r8", "$r31", SI12, 0xfba7284a8ab83b2dUL, 0xff63b80173f1e368UL, -738); ++ TESTINST_RRI("slti", "$r31", "$r31", SI12, 0xb4599a9fa734365aUL, 0x4327139de75dde1eUL, -1544); ++ TESTINST_RRI("slti", "$r5", "$r4", SI12, 0xa5572272e0c04a20UL, 0x87657c1b1699936bUL, 1529); ++ TESTINST_RRI("slti", "$r10", "$r28", SI12, 0x1260731618214410UL, 0xd0de0dfbafb7960aUL, 557); ++ TESTINST_RRI("slti", "$r5", "$r12", SI12, 0x4c6317772a4b06b0UL, 0x7a1d4eeb507d649bUL, -222); ++ TESTINST_RRI("slti", "$r4", "$r31", SI12, 0x23b4d62a21994afbUL, 0x85304cc393f6506bUL, 717); ++ TESTINST_RRI("slti", "$r18", "$r26", SI12, 0x67b6f5dbf6a0c55dUL, 0x451013f9a2337f9fUL, 730); ++ TESTINST_RRI("slti", "$r25", "$r8", SI12, 0xdb278cca57f1ad7bUL, 0x7371a60f5af6334bUL, 1193); ++ TESTINST_RRI("slti", "$r17", "$r24", SI12, 0xffa3ed31f9ea3a29UL, 0x1138e06e1a45c4f3UL, 329); ++ ++ /* ---------------- sltui rd, rj, si12 ---------------- */ ++ TESTINST_RRI("sltui", "$r13", "$r26", SI12, 0x62677116040aebffUL, 0xeedd6ccd0e5e2771UL, -462); ++ TESTINST_RRI("sltui", "$r24", "$r28", SI12, 0xef9500b68a87984aUL, 0xaf5922683f40599dUL, 1890); ++ TESTINST_RRI("sltui", "$r9", "$r6", SI12, 0x9996aa21d2b51922UL, 0xd5214fb275e738dcUL, -1538); ++ TESTINST_RRI("sltui", "$r19", "$r26", SI12, 0x3eb2777655f0f1c5UL, 0x98ed915860f0eb26UL, -215); ++ TESTINST_RRI("sltui", "$r8", "$r19", SI12, 0x5c44b5807c43724cUL, 0x63a068026b529b03UL, -780); ++ TESTINST_RRI("sltui", "$r19", "$r17", SI12, 0xf6926016cdbfacc1UL, 0xec04a9bcc8d1192aUL, -1041); ++ TESTINST_RRI("sltui", "$r26", "$r14", SI12, 0x542f05c795aa07c2UL, 0xb634bf537df4c4ceUL, 1653); ++ TESTINST_RRI("sltui", "$r8", "$r5", SI12, 0x371daf74e330ee8bUL, 0xedb0321c888ae22eUL, 441); ++ TESTINST_RRI("sltui", "$r25", "$r4", SI12, 0xba813c7acc8f5621UL, 0x8d5ce4750fe7603bUL, 678); ++ TESTINST_RRI("sltui", "$r17", "$r15", SI12, 0x199b641cefe0a0e2UL, 0x7ea0508a3fed3453UL, 2019); ++ ++ /* ---------------- nor rd, rj, rk ---------------- */ ++ TESTINST_RRR("nor", "$r14", "$r28", "$r9", 0xccf23cf02a48844dUL, 0x2608ea0069c4e9ddUL, 0x1c7a04255a2d13f8UL); ++ TESTINST_RRR("nor", "$r6", "$r30", "$r4", 0xbfcc3de6da2483beUL, 0xd24e9abca28d6cb5UL, 0xbb01b508523673c6UL); ++ TESTINST_RRR("nor", "$r6", "$r28", "$r13", 0x28dacd828d5736d7UL, 0xb365ff31474f736cUL, 0x593621c0f82b445cUL); ++ TESTINST_RRR("nor", "$r24", "$r16", "$r31", 0x5898010a4c6cf1bbUL, 0xecac6e093ba6146aUL, 0x50e6093f19b1194UL); ++ TESTINST_RRR("nor", "$r15", "$r7", "$r20", 0x2ddb1dea334fd92aUL, 0x401d7a663be0b31aUL, 0xb6c008973a85f779UL); ++ TESTINST_RRR("nor", "$r18", "$r31", "$r29", 0xc987982e1d91684UL, 0x181f20f581ed38f4UL, 0xefaa786e00a2e5b9UL); ++ TESTINST_RRR("nor", "$r19", "$r31", "$r13", 0x39e476d555cd20bcUL, 0xfb8fab5d35576d50UL, 0x71a92a8377c0f729UL); ++ TESTINST_RRR("nor", "$r25", "$r7", "$r5", 0x7f36d0c6d173e8c8UL, 0x181763a9f9350680UL, 0x5ec5099605d7d418UL); ++ TESTINST_RRR("nor", "$r30", "$r23", "$r23", 0x688e1d04976ac8dbUL, 0xd37b6d6a1c510287UL, 0x8670301ee2a715dfUL); ++ TESTINST_RRR("nor", "$r5", "$r23", "$r14", 0x71c4a211dd9262f4UL, 0xcb8a4aebc2c6c4f2UL, 0x84d79a5254447c9UL); ++ ++ /* ---------------- and rd, rj, rk ---------------- */ ++ TESTINST_RRR("and", "$r8", "$r14", "$r31", 0xbddf22c4109e20b5UL, 0xb2d25973efd1a8ffUL, 0x28b78b59dfe641e9UL); ++ TESTINST_RRR("and", "$r19", "$r23", "$r17", 0xb25e185c549f6661UL, 0xb6ccc215c2f17718UL, 0xf20669c51aee8ffeUL); ++ TESTINST_RRR("and", "$r30", "$r27", "$r23", 0xa7f4ad796393e12bUL, 0xefbcf405df3e7affUL, 0x548a0141e9fe1700UL); ++ TESTINST_RRR("and", "$r18", "$r31", "$r29", 0xa399c7f46c61d974UL, 0xe0fe8cca1cbab773UL, 0x49e680ddee7f666bUL); ++ TESTINST_RRR("and", "$r5", "$r26", "$r25", 0x1682ca17c11f90acUL, 0x4e9706cb2c885742UL, 0x250ff6304dd87d57UL); ++ TESTINST_RRR("and", "$r28", "$r14", "$r8", 0xcacf15e6ffad256fUL, 0x99527f4fa2aa8fb1UL, 0xcff546a883b63cfbUL); ++ TESTINST_RRR("and", "$r28", "$r9", "$r28", 0xc60423b9cf70d112UL, 0x2fb0db47f1d8f166UL, 0x1e9cec9d13e85210UL); ++ TESTINST_RRR("and", "$r18", "$r28", "$r5", 0x5059c37ee38d2f25UL, 0x74bf57d85d90af3aUL, 0x35479df0ebec9209UL); ++ TESTINST_RRR("and", "$r23", "$r25", "$r12", 0x18742ef4c73416beUL, 0x8b93e775860ef52bUL, 0xa909915f60a546d2UL); ++ TESTINST_RRR("and", "$r18", "$r17", "$r24", 0xadb2cc6aec909946UL, 0x3068f8b21d583e4cUL, 0xcf8aae1918f3a88eUL); ++ ++ /* ---------------- or rd, rj, rk ---------------- */ ++ TESTINST_RRR("or", "$r19", "$r28", "$r25", 0x46819825f87044c2UL, 0x65cb2cc7e5f5a720UL, 0x1fc0130146f13f76UL); ++ TESTINST_RRR("or", "$r8", "$r25", "$r4", 0x45083dd59c60e6feUL, 0x936ecfaeb4d51c95UL, 0xdc37c27c69024f6eUL); ++ TESTINST_RRR("or", "$r15", "$r16", "$r8", 0x516659e51cf19b26UL, 0x7589da0802d59510UL, 0x6b713c60390f3fbfUL); ++ TESTINST_RRR("or", "$r9", "$r15", "$r6", 0x1646568625c40022UL, 0xa68db9141a88850cUL, 0x756d912fbefef973UL); ++ TESTINST_RRR("or", "$r24", "$r9", "$r25", 0xda34c24d14fce443UL, 0x6ad9bf24481630b0UL, 0x2aefcdfa652395bUL); ++ TESTINST_RRR("or", "$r13", "$r9", "$r14", 0x900358ad1e848728UL, 0xa0e361b5b891a62eUL, 0xddfa0c1377ce01acUL); ++ TESTINST_RRR("or", "$r23", "$r16", "$r15", 0x27a55515d39aded9UL, 0xd0daf17f9cb0bf5aUL, 0xf44c4372982c4d74UL); ++ TESTINST_RRR("or", "$r20", "$r16", "$r16", 0x7045887bb8325d6fUL, 0xbac771cbb78dae04UL, 0x23f4928023125a5cUL); ++ TESTINST_RRR("or", "$r30", "$r5", "$r7", 0xcf609aa2057d1b98UL, 0x379641544fd1cd48UL, 0x5275ef34f265f01aUL); ++ TESTINST_RRR("or", "$r23", "$r4", "$r30", 0xc43fc1c750887406UL, 0x44a3229c33d1cd65UL, 0xceaa00084fc04912UL); ++ ++ /* ---------------- xor rd, rj, rk ---------------- */ ++ TESTINST_RRR("xor", "$r6", "$r19", "$r31", 0x18522418b59bf8aUL, 0x270a2ec823f26e39UL, 0x99ef76e6d4495ae3UL); ++ TESTINST_RRR("xor", "$r28", "$r20", "$r27", 0x57de83cac9dade15UL, 0xd39fdecdfd4ccb08UL, 0xc97b854adacdb4UL); ++ TESTINST_RRR("xor", "$r4", "$r29", "$r5", 0x9f7356fff2445f77UL, 0xc3c3a34d2c226b5aUL, 0x51abdd266816b94fUL); ++ TESTINST_RRR("xor", "$r14", "$r6", "$r28", 0xdd5ca0b5c6c45804UL, 0xa0ba047990ec0798UL, 0x89e6efd43651c28UL); ++ TESTINST_RRR("xor", "$r8", "$r19", "$r23", 0xc3e35cd44af166faUL, 0x6affcfe12104ccc7UL, 0x4adbb3601a07a1d9UL); ++ TESTINST_RRR("xor", "$r16", "$r5", "$r18", 0x685cdc5ca969c8e1UL, 0xd88d0e2a9900b8ebUL, 0xdd4dfbba723cde28UL); ++ TESTINST_RRR("xor", "$r20", "$r18", "$r24", 0x2362838018fa39beUL, 0xbbc8d438b24c037aUL, 0xe020a8456a45b667UL); ++ TESTINST_RRR("xor", "$r19", "$r23", "$r19", 0x637cae50fc0a1c95UL, 0x514b81a7227dd07eUL, 0x59a27a7f9c8481c3UL); ++ TESTINST_RRR("xor", "$r20", "$r16", "$r18", 0xb728dd7a443bcc8fUL, 0xe2de9bf67cdbdc0cUL, 0x26687435fbe4dbf6UL); ++ TESTINST_RRR("xor", "$r23", "$r14", "$r6", 0x744915919b52e27eUL, 0x16863c1d3e1cded7UL, 0x40ce8607349c380UL); ++ ++ /* ---------------- orn rd, rj, rk ---------------- */ ++ TESTINST_RRR("orn", "$r24", "$r9", "$r15", 0x39320ce9aa25fb73UL, 0xaaec06dc1b47cf43UL, 0x5fa36a558c884a69UL); ++ TESTINST_RRR("orn", "$r12", "$r4", "$r26", 0xa9c2abcbc14e3f3cUL, 0x7c87d633528d97b0UL, 0xe383c14e72ab8677UL); ++ TESTINST_RRR("orn", "$r20", "$r24", "$r28", 0xb117d8b0280738a2UL, 0x318fd949c3ba430fUL, 0xc9edab5116dc1582UL); ++ TESTINST_RRR("orn", "$r8", "$r25", "$r25", 0xb140441a36f8ededUL, 0xa26782a5e34d7addUL, 0x61bdd5b78d019958UL); ++ TESTINST_RRR("orn", "$r16", "$r18", "$r25", 0xcda0e2c1bce1eeecUL, 0xa4486eefd2c444d9UL, 0xbd007605c829cadcUL); ++ TESTINST_RRR("orn", "$r5", "$r28", "$r19", 0x8196fca50795a2aaUL, 0xec7f689a0d676560UL, 0xb4450418c4e1b333UL); ++ TESTINST_RRR("orn", "$r15", "$r14", "$r8", 0xaf1e2a9fe35ba4edUL, 0xd2207f86d89b890aUL, 0xfb31b9e37313a94dUL); ++ TESTINST_RRR("orn", "$r27", "$r14", "$r14", 0x1f24566bfa353160UL, 0xc4e17319c4766becUL, 0x29a3bbaaf6b49218UL); ++ TESTINST_RRR("orn", "$r17", "$r12", "$r31", 0xf5195a72c175fed7UL, 0x7aa8d4840359cbf6UL, 0xa1a42af83c82215bUL); ++ TESTINST_RRR("orn", "$r16", "$r20", "$r20", 0x76bb09b5b50705e2UL, 0x613fdcbd8c1eba2aUL, 0xfb1e04641f5da4ffUL); ++ ++ /* ---------------- andn rd, rj, rk ---------------- */ ++ TESTINST_RRR("andn", "$r19", "$r31", "$r17", 0xbcc81a9b2e349626UL, 0x5a38a8ef9c7e30e4UL, 0xcb490976d0652986UL); ++ TESTINST_RRR("andn", "$r10", "$r4", "$r10", 0x9acfa0cd6ea107fdUL, 0x1d9b572e8f6bedb7UL, 0x768fe778d2a543eaUL); ++ TESTINST_RRR("andn", "$r6", "$r12", "$r26", 0x949e36cff3b5decbUL, 0x56723f7285834fc9UL, 0xf6fa544d6cd57fa8UL); ++ TESTINST_RRR("andn", "$r16", "$r6", "$r4", 0x44a39d85132d6513UL, 0x3ca7f972b865b7ceUL, 0xf18819e4740308bcUL); ++ TESTINST_RRR("andn", "$r19", "$r26", "$r15", 0x856d1e3162c8fa2dUL, 0xc1ef79456be3885UL, 0x3c089064e60da1dUL); ++ TESTINST_RRR("andn", "$r17", "$r28", "$r9", 0x512a518c554f4b0aUL, 0x43454425b8b7755UL, 0xdc5dca386b49bdd7UL); ++ TESTINST_RRR("andn", "$r16", "$r16", "$r14", 0xa9c14796fec54f89UL, 0xe31928f90d2723a4UL, 0xcf2deaf4af11410aUL); ++ TESTINST_RRR("andn", "$r9", "$r4", "$r20", 0x51d79964a699ec8dUL, 0xe82135537ca93e7fUL, 0xcbadcb1dc4dd0ed0UL); ++ TESTINST_RRR("andn", "$r18", "$r25", "$r25", 0xeb546ce75bcba3f5UL, 0x953d86e2bd6b136dUL, 0x4914dbeee506d8adUL); ++ TESTINST_RRR("andn", "$r27", "$r15", "$r14", 0xc8b599a43b0b4683UL, 0x509638630676b88UL, 0x3d278ed22a112a89UL); ++ ++ /* ---------------- mul.w rd, rj, rk ---------------- */ ++ TESTINST_RRR("mul.w", "$r28", "$r12", "$r10", 0xf6fcce3e1c5b1598UL, 0xef2747013f911fe8UL, 0x14a216fd69537967UL); ++ TESTINST_RRR("mul.w", "$r13", "$r18", "$r24", 0x5e8a32c1e1e12aa4UL, 0x30e007bb8dd185faUL, 0x1a74dd893af9fb5aUL); ++ TESTINST_RRR("mul.w", "$r10", "$r20", "$r4", 0xf06f4af61b0e0c24UL, 0x1b3624a77f26275fUL, 0x653052ae3a1347dfUL); ++ TESTINST_RRR("mul.w", "$r23", "$r19", "$r10", 0xccb5485ae4605cddUL, 0x67c67c647eaf9e6cUL, 0xfb9b6c7b49ec10cfUL); ++ TESTINST_RRR("mul.w", "$r12", "$r30", "$r7", 0xc1f45aaf98ffcb39UL, 0x906f0c08c0bae02eUL, 0xdf6cf5c05b5f2d34UL); ++ TESTINST_RRR("mul.w", "$r27", "$r12", "$r12", 0x9545c6d9f812c0d9UL, 0xacd016cb69e028b3UL, 0x2b68e3a280d9c0b6UL); ++ TESTINST_RRR("mul.w", "$r28", "$r7", "$r19", 0x4cf68a9590da3da5UL, 0x70ed8b9b03a6325dUL, 0x1125383d12dad118UL); ++ TESTINST_RRR("mul.w", "$r20", "$r12", "$r20", 0x10683d31408fb4c5UL, 0x9ef4ea79672ce58dUL, 0x960a13776923d3e4UL); ++ TESTINST_RRR("mul.w", "$r26", "$r19", "$r28", 0xbf8a20b69fa4357bUL, 0xf3e9b53a654e3cbfUL, 0x20afdeb5a4b4e1c9UL); ++ TESTINST_RRR("mul.w", "$r13", "$r26", "$r25", 0x78f637d350c666bfUL, 0xff742d96dc73e9e9UL, 0x94a3289b55744707UL); ++ ++ /* ---------------- mulh.w rd, rj, rk ---------------- */ ++ TESTINST_RRR("mulh.w", "$r18", "$r25", "$r14", 0xa988161162710d96UL, 0x37443c6f5d0625eaUL, 0x94da379219de8576UL); ++ TESTINST_RRR("mulh.w", "$r13", "$r16", "$r18", 0x246298a54a25030aUL, 0x33643ceed35cff64UL, 0xc25702631b42c849UL); ++ TESTINST_RRR("mulh.w", "$r20", "$r5", "$r15", 0x3b606ea986dcf13eUL, 0x269dcd16567786d2UL, 0x96c0983df45d5c03UL); ++ TESTINST_RRR("mulh.w", "$r19", "$r19", "$r25", 0xab8fc1c922ba3e7aUL, 0xdec5bddca513d198UL, 0xf05e814d67d43f5aUL); ++ TESTINST_RRR("mulh.w", "$r15", "$r28", "$r16", 0x82fcfa24449231baUL, 0xf37548fee13133f3UL, 0x256188ef96bb3d23UL); ++ TESTINST_RRR("mulh.w", "$r24", "$r9", "$r27", 0x858ddeb68e948058UL, 0xffb64d62e202462UL, 0xe07a6dae07f46c11UL); ++ TESTINST_RRR("mulh.w", "$r23", "$r20", "$r14", 0x7713930e419350ffUL, 0xd5d72e6efb86e428UL, 0x49f87e78ddcc8400UL); ++ TESTINST_RRR("mulh.w", "$r28", "$r20", "$r25", 0x552a9b7f3fa0c48aUL, 0xd616afd20f193287UL, 0xbcd2ae680b131cd2UL); ++ TESTINST_RRR("mulh.w", "$r16", "$r19", "$r12", 0x94b154fc890497c3UL, 0xd8217f47e4257a7cUL, 0xb47bb0e4cff83cbfUL); ++ TESTINST_RRR("mulh.w", "$r23", "$r23", "$r6", 0xafb7fddb344318fUL, 0xaafee418c4267e18UL, 0x1763f686cd41d46eUL); ++ ++ /* ---------------- mulh.wu rd, rj, rk ---------------- */ ++ TESTINST_RRR("mulh.wu", "$r18", "$r17", "$r8", 0xa92fa2817b19786cUL, 0xaf23e3d2092f080cUL, 0x771c36ac19259f2aUL); ++ TESTINST_RRR("mulh.wu", "$r16", "$r13", "$r8", 0xf4a7b7abe5f3831aUL, 0xe8beff7f8f4330cdUL, 0x38cebbe3d1af354dUL); ++ TESTINST_RRR("mulh.wu", "$r8", "$r23", "$r29", 0x6ca8c7d8ec316750UL, 0xc3a59754c752c3a5UL, 0x4b77e251de7f45f1UL); ++ TESTINST_RRR("mulh.wu", "$r20", "$r25", "$r30", 0x6faa5d1372250132UL, 0x68734123142c820aUL, 0xf7b4bdf342e2017UL); ++ TESTINST_RRR("mulh.wu", "$r31", "$r18", "$r19", 0x8cfa67422c1c5d5UL, 0xb48ac9531206cef2UL, 0x9f9f5d925c5cf738UL); ++ TESTINST_RRR("mulh.wu", "$r25", "$r7", "$r27", 0x85aa17ff1b3699baUL, 0x9a7aeabb800edb53UL, 0x4eb1ec754c7cdb59UL); ++ TESTINST_RRR("mulh.wu", "$r19", "$r4", "$r28", 0x821038d7fb43149cUL, 0x44cd20261f5ae87eUL, 0xf9d8916e8eb4ecb1UL); ++ TESTINST_RRR("mulh.wu", "$r30", "$r23", "$r28", 0xef34433557594fb3UL, 0x2f9401c8064c8ca0UL, 0x5de6287c2a56e507UL); ++ TESTINST_RRR("mulh.wu", "$r13", "$r6", "$r17", 0xd6b38c427ad5f669UL, 0xbe04ea8987b20188UL, 0x52cee1d144e3c134UL); ++ TESTINST_RRR("mulh.wu", "$r26", "$r19", "$r17", 0x2ea15eee9429b8a0UL, 0x43598be92000d9f7UL, 0x6364cfeb707aba6cUL); ++ ++ /* ---------------- mul.d rd, rj, rk ---------------- */ ++ TESTINST_RRR("mul.d", "$r19", "$r4", "$r10", 0xf0235819cf1bab1fUL, 0xdc7a0086353cfddfUL, 0x6f18aec465b5af87UL); ++ TESTINST_RRR("mul.d", "$r19", "$r31", "$r20", 0x24d7526c5e4669e3UL, 0xaab7dd46e5af2493UL, 0xd5df6eea42205e25UL); ++ TESTINST_RRR("mul.d", "$r15", "$r20", "$r4", 0x3740ba48d64cc478UL, 0xcfeffb7c35a98382UL, 0xeab050fc9bdb3c52UL); ++ TESTINST_RRR("mul.d", "$r29", "$r7", "$r25", 0xe8858552c0e8eac8UL, 0xb65ed231c27efb70UL, 0xbb753de59e4ca3d1UL); ++ TESTINST_RRR("mul.d", "$r5", "$r30", "$r4", 0xc4f17df5c983317dUL, 0xb2af9e86d443d8ceUL, 0xf9e3c6d18372d0d3UL); ++ TESTINST_RRR("mul.d", "$r25", "$r17", "$r29", 0xa09d11d50056b350UL, 0x6609b14ca65f9affUL, 0x692def5a14a3278cUL); ++ TESTINST_RRR("mul.d", "$r13", "$r15", "$r26", 0xd528ed047af75775UL, 0x896658fe826a0817UL, 0xa456f53d5f2760b1UL); ++ TESTINST_RRR("mul.d", "$r23", "$r9", "$r7", 0x5d33f63ce8637a69UL, 0xad38922264c721ffUL, 0xe0514fea4ee52acaUL); ++ TESTINST_RRR("mul.d", "$r25", "$r23", "$r30", 0x5d74125f059662f3UL, 0xa708100731e88710UL, 0x739e4de71fec92e0UL); ++ TESTINST_RRR("mul.d", "$r26", "$r18", "$r30", 0x110a94ffa2e12f32UL, 0x1b770d6c423d4f8UL, 0x38bf04d66f91531aUL); ++ ++ /* ---------------- mulh.d rd, rj, rk ---------------- */ ++ TESTINST_RRR("mulh.d", "$r5", "$r15", "$r12", 0xd72f46d42ca4db6bUL, 0xe1771af0e69e49a6UL, 0xd796f52fbd01a4bbUL); ++ TESTINST_RRR("mulh.d", "$r28", "$r18", "$r14", 0x904e699bcbe32b08UL, 0x9b5b69b4d817779cUL, 0xa02ca97cc4e37f13UL); ++ TESTINST_RRR("mulh.d", "$r6", "$r12", "$r7", 0xc75e1065b8dbcd34UL, 0xec7d8ae6a65f2fd3UL, 0xb7e32b52f40bc8efUL); ++ TESTINST_RRR("mulh.d", "$r5", "$r25", "$r19", 0x7b2e04c0c2f95e4fUL, 0x9a5037ff200e982aUL, 0xf862c0c6425ff2bcUL); ++ TESTINST_RRR("mulh.d", "$r14", "$r8", "$r23", 0x5fd7ae31ad151daaUL, 0x444243172f499ec0UL, 0x9003c8aeabc39884UL); ++ TESTINST_RRR("mulh.d", "$r7", "$r23", "$r13", 0xbc21ca397041a2bUL, 0xe886455c8737b2caUL, 0xd5ccec2f631a1d60UL); ++ TESTINST_RRR("mulh.d", "$r26", "$r16", "$r13", 0xd3894783f187ee9cUL, 0xa7a6c4abeda9a22cUL, 0x4375f7e49ed91384UL); ++ TESTINST_RRR("mulh.d", "$r17", "$r31", "$r16", 0xa93bd0cf9137745eUL, 0x3a1b2b922b7645f1UL, 0x7e33f64c19972ae3UL); ++ TESTINST_RRR("mulh.d", "$r20", "$r19", "$r8", 0xda9224c9ab488939UL, 0xb7f5978bf509641dUL, 0xf6fcd615333c30c0UL); ++ TESTINST_RRR("mulh.d", "$r12", "$r17", "$r20", 0xcdbd51e35d5c1df3UL, 0x254bd8eaadc946feUL, 0x9de163435088598bUL); ++ ++ /* ---------------- mulh.du rd, rj, rk ---------------- */ ++ TESTINST_RRR("mulh.du", "$r25", "$r28", "$r29", 0xf7ef0dbf1bf7938aUL, 0xd267d11ae422f604UL, 0x89d6fd68226e13dUL); ++ TESTINST_RRR("mulh.du", "$r7", "$r28", "$r24", 0xe568cf4a6d6bc199UL, 0x6efedad6fbe95f2aUL, 0xdf55853ed22d024eUL); ++ TESTINST_RRR("mulh.du", "$r25", "$r8", "$r9", 0xbf7c0226b0c2072UL, 0x794fd44a65c65ebbUL, 0xa0391c3fa3cf1e5cUL); ++ TESTINST_RRR("mulh.du", "$r30", "$r16", "$r7", 0x3df3f3b3ff17f61aUL, 0xcadd1f7e7150ad7bUL, 0xbdc63d3f762cf02dUL); ++ TESTINST_RRR("mulh.du", "$r6", "$r10", "$r19", 0x6601e05fc5f801cbUL, 0xbc10a70104969251UL, 0x2f50a00036fb7821UL); ++ TESTINST_RRR("mulh.du", "$r17", "$r9", "$r5", 0xffabc0cbdc8aa7b0UL, 0x5288bc60da558afbUL, 0x2795644a58b2668fUL); ++ TESTINST_RRR("mulh.du", "$r26", "$r8", "$r15", 0x68b64c997f561b59UL, 0xe2ed2375e64b1bf3UL, 0xe1033e583092ad96UL); ++ TESTINST_RRR("mulh.du", "$r10", "$r13", "$r30", 0x6450ec488eb4753bUL, 0x4287b82860366cf8UL, 0x1c15ed3f051fe8cUL); ++ TESTINST_RRR("mulh.du", "$r24", "$r13", "$r15", 0x1169fa9dd6f8273dUL, 0x6fd2cdb39e5d1fa3UL, 0xff0526e206880684UL); ++ TESTINST_RRR("mulh.du", "$r8", "$r9", "$r10", 0xe9cb6416a1492fbfUL, 0xaf89960e18913df0UL, 0x76b4251409ff9830UL); ++ ++ /* ---------------- mulw.d.w rd, rj, rk ---------------- */ ++ TESTINST_RRR("mulw.d.w", "$r6", "$r31", "$r7", 0x50ce021eb3b3f3a4UL, 0xb859e7514e4c4d7cUL, 0x372cb1e2b3200f36UL); ++ TESTINST_RRR("mulw.d.w", "$r31", "$r7", "$r28", 0x925642fa7e2de9abUL, 0x61404b6550238cebUL, 0x75ed502242ed0430UL); ++ TESTINST_RRR("mulw.d.w", "$r19", "$r16", "$r10", 0xef82de697f7239fUL, 0xdf1c56dfe5c0e48dUL, 0xbc7e740fe1b1dc25UL); ++ TESTINST_RRR("mulw.d.w", "$r29", "$r12", "$r27", 0xc104a400fa0d1dbfUL, 0x2aa34e8a5fad6c6fUL, 0x7f8e4d23644b0d4dUL); ++ TESTINST_RRR("mulw.d.w", "$r25", "$r16", "$r25", 0x5b8ff9172c849fb9UL, 0x843f90380af6f2afUL, 0x12f7f8780cb8bfe0UL); ++ TESTINST_RRR("mulw.d.w", "$r13", "$r13", "$r7", 0x6bba79a88056d891UL, 0x6757a43d403285abUL, 0x2d2ea385888c2664UL); ++ TESTINST_RRR("mulw.d.w", "$r12", "$r8", "$r23", 0x5c96927dcf1fb14eUL, 0x2b3767b9e9029d4bUL, 0x252bbcc66b5d834bUL); ++ TESTINST_RRR("mulw.d.w", "$r6", "$r13", "$r10", 0x5fa5a8b36e8ec3e0UL, 0xcbca4b4d518b9466UL, 0xabdf2ec674f70c5bUL); ++ TESTINST_RRR("mulw.d.w", "$r16", "$r15", "$r23", 0x5b94eeb9c3c9fa01UL, 0x5c4ebef486f83b43UL, 0x73f3781c3a1e9216UL); ++ TESTINST_RRR("mulw.d.w", "$r6", "$r31", "$r7", 0xbc263312a123caedUL, 0xe9aa8545d3a99a97UL, 0x71b5dbacf4f7f2b8UL); ++ ++ /* ---------------- mulw.d.wu rd, rj, rk ---------------- */ ++ TESTINST_RRR("mulw.d.wu", "$r14", "$r17", "$r30", 0x94452e0d7eb407b7UL, 0x629b1902a484a77dUL, 0x474359ca7f7165edUL); ++ TESTINST_RRR("mulw.d.wu", "$r26", "$r7", "$r5", 0xae9771f0d59319b3UL, 0x1bcb563dea8f3a3fUL, 0x759334cc2d543103UL); ++ TESTINST_RRR("mulw.d.wu", "$r25", "$r28", "$r27", 0x27ca0bf2d6cd2699UL, 0x5a015da9b52ffc64UL, 0x482a4fa5b5625914UL); ++ TESTINST_RRR("mulw.d.wu", "$r8", "$r4", "$r16", 0x22f61239dad7bc92UL, 0xe8c9964b31b0e199UL, 0x99fdef421aa22322UL); ++ TESTINST_RRR("mulw.d.wu", "$r29", "$r17", "$r15", 0xcc5eec6e4f2b5fdbUL, 0x2d08ada074c2ac37UL, 0x8967ce1cd4c2362eUL); ++ TESTINST_RRR("mulw.d.wu", "$r27", "$r23", "$r16", 0x2d057e2ead214d6cUL, 0x987e7a10a0f3ee5dUL, 0xd515e2a2f06be633UL); ++ TESTINST_RRR("mulw.d.wu", "$r15", "$r19", "$r12", 0xce24943d6fe20263UL, 0xd6bbdcb20d76de15UL, 0xcc277905bc41da62UL); ++ TESTINST_RRR("mulw.d.wu", "$r4", "$r4", "$r19", 0xe37942a26dc0e882UL, 0x6a30fb04c3b5431fUL, 0x4c937bed67cb6c73UL); ++ TESTINST_RRR("mulw.d.wu", "$r7", "$r12", "$r9", 0xbdebe7a7b19b7dc0UL, 0x3f6e790fb24d19f1UL, 0x7a19c4fdd0d29f3eUL); ++ TESTINST_RRR("mulw.d.wu", "$r31", "$r30", "$r28", 0x690687056e169108UL, 0xa8abab5bf1d42538UL, 0x636a31884ca1e99UL); ++ ++ /* ---------------- div.w rd, rj, rk ---------------- */ ++ TESTINST_RRR("div.w", "$r13", "$r28", "$r23", 0x16546290UL, 0x627aa138UL, 0x534168cUL); ++ TESTINST_RRR("div.w", "$r28", "$r19", "$r9", 0xffffffffbe03930dUL, 0x223d0ec7UL, 0xffffffff8404aa67UL); ++ TESTINST_RRR("div.w", "$r18", "$r19", "$r30", 0xffffffffac214649UL, 0xffffffff8019c3b7UL, 0xffffffff871cbf90UL); ++ TESTINST_RRR("div.w", "$r24", "$r25", "$r7", 0xffffffffa144ed80UL, 0x1c4370c7UL, 0x4695aa29UL); ++ TESTINST_RRR("div.w", "$r9", "$r27", "$r4", 0x3ae8b7c7UL, 0xfffffffff3a6ebb2UL, 0x181d816aUL); ++ TESTINST_RRR("div.w", "$r28", "$r15", "$r7", 0xffffffff956a7de4UL, 0xffffffff9aab217bUL, 0x3b061b78UL); ++ TESTINST_RRR("div.w", "$r25", "$r24", "$r12", 0x3c6167d4UL, 0x2673145eUL, 0x1d5e391UL); ++ TESTINST_RRR("div.w", "$r23", "$r15", "$r4", 0x3e0820eeUL, 0x42793c51UL, 0x286cdb51UL); ++ TESTINST_RRR("div.w", "$r28", "$r16", "$r30", 0xffffffffcf8fd242UL, 0x2a76141eUL, 0x2429a52UL); ++ TESTINST_RRR("div.w", "$r29", "$r8", "$r18", 0x74991388UL, 0xffffffffd594ef43UL, 0x6d3f9603UL); ++ ++ /* ---------------- mod.w rd, rj, rk ---------------- */ ++ TESTINST_RRR("mod.w", "$r8", "$r13", "$r14", 0x5cc9e6dbUL, 0xfffffffff7327c6dUL, 0x23eef833UL); ++ TESTINST_RRR("mod.w", "$r25", "$r24", "$r25", 0x539195e4UL, 0xffffffffd94f10c8UL, 0x2c5786d9UL); ++ TESTINST_RRR("mod.w", "$r10", "$r16", "$r23", 0xffffffff9b15f725UL, 0x448a831dUL, 0xffffffffd5d7d92bUL); ++ TESTINST_RRR("mod.w", "$r6", "$r5", "$r29", 0x1794d969UL, 0x2fba86b0UL, 0x40e6ab6bUL); ++ TESTINST_RRR("mod.w", "$r16", "$r14", "$r29", 0x6a503328UL, 0xffffffffdf0b2ad2UL, 0xffffffff90dc29c6UL); ++ TESTINST_RRR("mod.w", "$r30", "$r14", "$r18", 0xffffffffc7670acdUL, 0x53f3b34fUL, 0xffffffff84b62159UL); ++ TESTINST_RRR("mod.w", "$r31", "$r6", "$r18", 0xffffffff98334c95UL, 0xfffffffff241ffd8UL, 0xffffffffa73314aaUL); ++ TESTINST_RRR("mod.w", "$r12", "$r8", "$r4", 0xffffffffd9f19db4UL, 0xffffffffc89f9796UL, 0xffffffffaa8e2a3bUL); ++ TESTINST_RRR("mod.w", "$r23", "$r12", "$r4", 0xffffffff94e93220UL, 0xfffffffffea1587aUL, 0xffffffffb88b2b87UL); ++ TESTINST_RRR("mod.w", "$r13", "$r9", "$r18", 0xf718c0UL, 0xffffffffe264a3a5UL, 0x2f29ef3UL); ++ ++ /* ---------------- div.wu rd, rj, rk ---------------- */ ++ TESTINST_RRR("div.wu", "$r24", "$r5", "$r16", 0xddf57c5UL, 0x6b1a808cUL, 0x576fe70UL); ++ TESTINST_RRR("div.wu", "$r26", "$r7", "$r9", 0x665e82ffUL, 0x344d887fUL, 0x7fd6d6d8UL); ++ TESTINST_RRR("div.wu", "$r13", "$r18", "$r15", 0xffffffffe82e2cf8UL, 0x7c66b628UL, 0x305c899UL); ++ TESTINST_RRR("div.wu", "$r15", "$r14", "$r7", 0xb06b1fUL, 0x56016282UL, 0x95a8701UL); ++ TESTINST_RRR("div.wu", "$r19", "$r12", "$r31", 0xffffffffb3a487d1UL, 0xffffffffbe2fe16eUL, 0xffffffff8dc0ff7fUL); ++ TESTINST_RRR("div.wu", "$r6", "$r10", "$r20", 0x1bb491e9UL, 0x64e382eUL, 0x5977f9f1UL); ++ TESTINST_RRR("div.wu", "$r9", "$r29", "$r28", 0x498c3349UL, 0x14cbb257UL, 0xffffffff95165a4aUL); ++ TESTINST_RRR("div.wu", "$r10", "$r29", "$r15", 0xffffffffbb3f9c5dUL, 0x2755057dUL, 0x14039cc4UL); ++ TESTINST_RRR("div.wu", "$r24", "$r31", "$r7", 0xffffffffe5a9a3cdUL, 0xffffffffa1f84b49UL, 0xffffffffe45bd3b9UL); ++ TESTINST_RRR("div.wu", "$r23", "$r18", "$r6", 0x54e07e9fUL, 0xffffffffaccbdd8cUL, 0xfffffffff3729b57UL); ++ ++ /* ---------------- mod.wu rd, rj, rk ---------------- */ ++ TESTINST_RRR("mod.wu", "$r5", "$r20", "$r18", 0xffffffffa1ce2e4eUL, 0xffffffffdbeb0e2dUL, 0x70157135UL); ++ TESTINST_RRR("mod.wu", "$r14", "$r30", "$r17", 0x10e75d07UL, 0x39c3080UL, 0x1658d87bUL); ++ TESTINST_RRR("mod.wu", "$r28", "$r7", "$r4", 0x6df194dbUL, 0x55fae7c9UL, 0xffffffff9a87c1efUL); ++ TESTINST_RRR("mod.wu", "$r6", "$r14", "$r10", 0xffffffff8feb78ccUL, 0xffffffffe5032316UL, 0x18ab441eUL); ++ TESTINST_RRR("mod.wu", "$r13", "$r15", "$r9", 0xffffffffbb28952cUL, 0x2d43f57dUL, 0x2dfbf584UL); ++ TESTINST_RRR("mod.wu", "$r7", "$r30", "$r5", 0x9bfb2cfUL, 0x6595d7b3UL, 0xfffffffffffd1025UL); ++ TESTINST_RRR("mod.wu", "$r10", "$r9", "$r16", 0x342671c6UL, 0xfffffffff1ff8be3UL, 0xfffffffffaea052bUL); ++ TESTINST_RRR("mod.wu", "$r16", "$r16", "$r23", 0xffffffffc0356055UL, 0x2ac1f414UL, 0x4a75c890UL); ++ TESTINST_RRR("mod.wu", "$r19", "$r8", "$r7", 0xfffffffff8ed6580UL, 0x5fef460eUL, 0x68eedef2UL); ++ TESTINST_RRR("mod.wu", "$r29", "$r25", "$r25", 0xffffffff9ea76eb0UL, 0xffffffff818904b9UL, 0xffffffffe92f4f30UL); ++ ++ /* ---------------- div.d rd, rj, rk ---------------- */ ++ TESTINST_RRR("div.d", "$r7", "$r17", "$r7", 0xc8f25fb958f2d668UL, 0x74a14cbaa00fdeaUL, 0xcf95f3de82ceb015UL); ++ TESTINST_RRR("div.d", "$r10", "$r19", "$r12", 0x9ead8a6f6ea63534UL, 0xaf80d344d48e6cd5UL, 0xe1f40f759cbfe0e7UL); ++ TESTINST_RRR("div.d", "$r23", "$r28", "$r28", 0x35481a5285093e04UL, 0xfd79e3c19b697fa8UL, 0x6ffab603b9e1b7fbUL); ++ TESTINST_RRR("div.d", "$r30", "$r25", "$r4", 0x3eacf1d695a34b95UL, 0xfbff957ab051d494UL, 0x670724b8930d53fUL); ++ TESTINST_RRR("div.d", "$r31", "$r29", "$r6", 0xce8d3df48871d655UL, 0xf351f7f35927e83dUL, 0x93a3085686f4101fUL); ++ TESTINST_RRR("div.d", "$r17", "$r23", "$r8", 0xfc913f8b14dda5a5UL, 0x1f938af81988deUL, 0x9d021a9f06b46953UL); ++ TESTINST_RRR("div.d", "$r7", "$r29", "$r15", 0x4593da2923f2ac5bUL, 0x11fc5a958b182a55UL, 0x2edafaf2857c6697UL); ++ TESTINST_RRR("div.d", "$r13", "$r31", "$r27", 0x97236145608dd8c3UL, 0x1f0ee96afd23910bUL, 0xe35e4d5efd2204d3UL); ++ TESTINST_RRR("div.d", "$r13", "$r26", "$r14", 0x2c057bd222f216dfUL, 0x1e006853720971c3UL, 0x81e35a993e6a15b5UL); ++ TESTINST_RRR("div.d", "$r5", "$r9", "$r4", 0x93c0d85c66f2c5abUL, 0x774fbe894b2ed067UL, 0x2c46387d55732742UL); ++ ++ /* ---------------- mod.d rd, rj, rk ---------------- */ ++ TESTINST_RRR("mod.d", "$r19", "$r26", "$r16", 0x63304d2181f4a4daUL, 0x9ed948849ddee475UL, 0x18a360d3ab980398UL); ++ TESTINST_RRR("mod.d", "$r27", "$r23", "$r13", 0xf7156e74db7a8d92UL, 0x324e7001287ce2a8UL, 0x3cc7524686bed31cUL); ++ TESTINST_RRR("mod.d", "$r8", "$r26", "$r19", 0x7bda37a222135803UL, 0x1daf8fd66ff987edUL, 0x334631279104fc3bUL); ++ TESTINST_RRR("mod.d", "$r25", "$r15", "$r7", 0xd1a0f45d5b463d53UL, 0x9c4cd7bef3bf0712UL, 0x420a5c702006f3ccUL); ++ TESTINST_RRR("mod.d", "$r25", "$r18", "$r7", 0x93487a905cb08a75UL, 0x8c79cafa8bebf0a8UL, 0x1478409d192c144bUL); ++ TESTINST_RRR("mod.d", "$r8", "$r27", "$r27", 0x8756a1690dd7896dUL, 0x35273279ea76319fUL, 0xc5292f2331abc6ddUL); ++ TESTINST_RRR("mod.d", "$r15", "$r10", "$r24", 0xf8c476adbc930802UL, 0x8b5832bcd0f6c87eUL, 0x6cba54a72da38702UL); ++ TESTINST_RRR("mod.d", "$r27", "$r7", "$r6", 0x2387015bddb2c076UL, 0x231e30de7a72ad90UL, 0x81f1285973e8dc11UL); ++ TESTINST_RRR("mod.d", "$r16", "$r9", "$r12", 0x3388d23c07feb1daUL, 0xe8c01f744b310474UL, 0xa29071d702959009UL); ++ TESTINST_RRR("mod.d", "$r13", "$r10", "$r20", 0xbd45a261f8de4fe4UL, 0x6fb0a8c9a2681a8eUL, 0x2f1b7055cf2409ecUL); ++ ++ /* ---------------- div.du rd, rj, rk ---------------- */ ++ TESTINST_RRR("div.du", "$r17", "$r10", "$r24", 0x4d363fd48a626fdaUL, 0x7ccdeeaa6c24885fUL, 0xfcc68e72f59750aeUL); ++ TESTINST_RRR("div.du", "$r20", "$r20", "$r10", 0x808fa5cb6a75fd6fUL, 0xf3f712970031005UL, 0x1709a8adab2fa578UL); ++ TESTINST_RRR("div.du", "$r15", "$r14", "$r19", 0xcd3107423486c8feUL, 0xf6bc56277282cd14UL, 0x961ac833f00f3e3UL); ++ TESTINST_RRR("div.du", "$r4", "$r29", "$r18", 0xa0bfc2fc5b35fa79UL, 0x2b28c09aa5f12845UL, 0xed44da2fdf5dce00UL); ++ TESTINST_RRR("div.du", "$r4", "$r6", "$r25", 0x1fc6e23fd0f09ed0UL, 0xeaa71d9fb42223caUL, 0x45689545e60381cUL); ++ TESTINST_RRR("div.du", "$r10", "$r8", "$r12", 0xa3710c512d4c006cUL, 0xc011778733c50a6eUL, 0xb44475ee048d8167UL); ++ TESTINST_RRR("div.du", "$r29", "$r4", "$r29", 0x46d27abff0da1972UL, 0x17a4e863a182dcd0UL, 0x59a7b82980ac6a6dUL); ++ TESTINST_RRR("div.du", "$r15", "$r8", "$r30", 0x68120919dbbd9b19UL, 0x4c296c89a6f7a6dfUL, 0x9d9166c1cd0eecfaUL); ++ TESTINST_RRR("div.du", "$r7", "$r18", "$r17", 0xd2389cb7af92be89UL, 0x9a1f65b2c59cfda3UL, 0xe316cf92f8f0574fUL); ++ TESTINST_RRR("div.du", "$r15", "$r25", "$r17", 0x49651d72d87da955UL, 0xd22c499c27908743UL, 0x8d824b01058ecb8UL); ++ ++ /* ---------------- mod.du rd, rj, rk ---------------- */ ++ TESTINST_RRR("mod.du", "$r26", "$r8", "$r23", 0xb0bd66f10c34fe23UL, 0x5eb9b775d83b4893UL, 0x8867d4b638f2622UL); ++ TESTINST_RRR("mod.du", "$r8", "$r10", "$r25", 0xe236349cd47eeb11UL, 0x119102fd7b236a81UL, 0x8fd72a09e4fb45fUL); ++ TESTINST_RRR("mod.du", "$r25", "$r4", "$r5", 0x1b669725a0c3a970UL, 0x175359099c87b83UL, 0xcad295c79f1d835aUL); ++ TESTINST_RRR("mod.du", "$r7", "$r28", "$r20", 0x7117e70798869df4UL, 0xe35b93aa0c37fe97UL, 0x741084dead7970d0UL); ++ TESTINST_RRR("mod.du", "$r30", "$r24", "$r9", 0xc4d432a8ce91f693UL, 0x77c03aceb2ea6b45UL, 0xb8cd7773fb72b7caUL); ++ TESTINST_RRR("mod.du", "$r23", "$r9", "$r28", 0x13f1f3e1891b6b73UL, 0x9811699becce53a9UL, 0xed15e264f0c39b88UL); ++ TESTINST_RRR("mod.du", "$r13", "$r12", "$r14", 0xb8b22bcb0cb970e8UL, 0x16cdecd7c0091cd2UL, 0x4fcab819ebadbdfdUL); ++ TESTINST_RRR("mod.du", "$r30", "$r17", "$r12", 0xbf96226d2de1240dUL, 0x9fe4b2c7557d6b9aUL, 0x3668e581a5de6efdUL); ++ TESTINST_RRR("mod.du", "$r14", "$r4", "$r6", 0x9bc8f8a69a7f55c2UL, 0x530a9c5a21769babUL, 0x2805bef72d33cbd5UL); ++ TESTINST_RRR("mod.du", "$r23", "$r28", "$r12", 0x82a854f86e642cbaUL, 0xdd0fd63485d6c3dUL, 0x56b21f15cb9d2bf2UL); ++ ++ /* ---------------- alsl.w rd, rj, rk, sa2 ---------------- */ ++ TESTINST_RRRI("alsl.w", "$r18", "$r10", "$r15", SA2_1, 0xafb40df16156827bUL, 0x9b0b86116a0d89cbUL, 0x80086c066ea6842bUL, 2); ++ TESTINST_RRRI("alsl.w", "$r24", "$r5", "$r4", SA2_1, 0xb8b63b8205a919dfUL, 0x7319260322fa2d6dUL, 0x1efce6644a51ebf9UL, 2); ++ TESTINST_RRRI("alsl.w", "$r24", "$r5", "$r27", SA2_1, 0xb4f0fd355869e078UL, 0x26abeea20b7d1ac1UL, 0x4108f7f27e321c8fUL, 2); ++ TESTINST_RRRI("alsl.w", "$r24", "$r29", "$r10", SA2_1, 0x4b948e9a0b82df22UL, 0x11893c9dd43d0112UL, 0x51a030165671a055UL, 1); ++ TESTINST_RRRI("alsl.w", "$r5", "$r10", "$r18", SA2_1, 0xfc253ac9e2b55590UL, 0x2682507563a85b07UL, 0xa467083f66457d1dUL, 1); ++ TESTINST_RRRI("alsl.w", "$r20", "$r13", "$r10", SA2_1, 0x76e8c346a721cdabUL, 0x548f2762bfb1bc01UL, 0xa6e0d27e62dcc594UL, 3); ++ TESTINST_RRRI("alsl.w", "$r16", "$r6", "$r24", SA2_1, 0x39f77b88fc3b663UL, 0x281818bf4a36a7e5UL, 0x86cd2a06ef475a61UL, 3); ++ TESTINST_RRRI("alsl.w", "$r14", "$r18", "$r9", SA2_1, 0x8a58ea94346ff16UL, 0x4ff191f91397adeaUL, 0x4cda359b03c97a53UL, 4); ++ TESTINST_RRRI("alsl.w", "$r8", "$r6", "$r29", SA2_1, 0xae0bfa182556c725UL, 0xda179bc2f41d03d3UL, 0x1d23e4da08af7978UL, 1); ++ TESTINST_RRRI("alsl.w", "$r31", "$r26", "$r30", SA2_1, 0xd6af9fcd7ffd8e75UL, 0x3e88bb77d6665633UL, 0x23a0414c69b804c1UL, 1); ++ ++ /* ---------------- alsl.wu rd, rj, rk, sa2 ---------------- */ ++ TESTINST_RRRI("alsl.wu", "$r20", "$r24", "$r18", SA2_1, 0xc714872ff3c39370UL, 0xcaea31ddabb275f9UL, 0xedbfc2cedca8eb7aUL, 2); ++ TESTINST_RRRI("alsl.wu", "$r13", "$r26", "$r15", SA2_1, 0xe1a0ba1adcb75aa4UL, 0x8adbed432acf321aUL, 0xeae447eaa60bb142UL, 3); ++ TESTINST_RRRI("alsl.wu", "$r4", "$r17", "$r27", SA2_1, 0xb153f9ecea23068cUL, 0xd2066b089c9499a3UL, 0x36ed3c96ac4751aaUL, 3); ++ TESTINST_RRRI("alsl.wu", "$r20", "$r10", "$r4", SA2_1, 0x8fb2705357e98d66UL, 0xd353329585fc71ddUL, 0x739237ed6a677f00UL, 4); ++ TESTINST_RRRI("alsl.wu", "$r31", "$r12", "$r23", SA2_1, 0x6caac60acd9bc6f4UL, 0xc87131b9171530dfUL, 0x39c8e321a6e131c0UL, 2); ++ TESTINST_RRRI("alsl.wu", "$r13", "$r14", "$r19", SA2_1, 0xd2c7072036f54e45UL, 0x35ea1627556f8f98UL, 0x97054728433042d3UL, 2); ++ TESTINST_RRRI("alsl.wu", "$r7", "$r14", "$r5", SA2_1, 0x5a0f1fae80105d64UL, 0xd300b74879e33a53UL, 0x3a1e7389d0669d4cUL, 1); ++ TESTINST_RRRI("alsl.wu", "$r28", "$r4", "$r9", SA2_1, 0xcd7fd8389b4f4062UL, 0xad1830d644c205e7UL, 0xced1c031d73f9087UL, 1); ++ TESTINST_RRRI("alsl.wu", "$r13", "$r9", "$r29", SA2_1, 0x81601560f53b081UL, 0xd3ee3c45f08cd218UL, 0xa7d5a43a1df2aa1dUL, 4); ++ TESTINST_RRRI("alsl.wu", "$r30", "$r29", "$r31", SA2_1, 0xf383bd5bfae7e46dUL, 0x67862a0151c65567UL, 0x9cdcbf604f46c48aUL, 2); ++ ++ /* ---------------- alsl.d rd, rj, rk, sa2 ---------------- */ ++ TESTINST_RRRI("alsl.d", "$r18", "$r28", "$r16", SA2_1, 0x53e533e973dfa49cUL, 0x6665a9d32abaaf55UL, 0xf70490874fb75e6eUL, 4); ++ TESTINST_RRRI("alsl.d", "$r10", "$r30", "$r18", SA2_1, 0xfb14c3e6acd722c3UL, 0xcae19862ab088fccUL, 0x87c434d85259d923UL, 2); ++ TESTINST_RRRI("alsl.d", "$r17", "$r25", "$r26", SA2_1, 0x95e79a567c313ec7UL, 0x83a0e706c2c4c534UL, 0x2f49f1e9d5b91fc9UL, 1); ++ TESTINST_RRRI("alsl.d", "$r7", "$r24", "$r24", SA2_1, 0x35b966d0db9f681cUL, 0xc0bc97593f1054fcUL, 0x7e564928b0a53ac6UL, 2); ++ TESTINST_RRRI("alsl.d", "$r6", "$r30", "$r24", SA2_1, 0x38ad1fb21e071421UL, 0xb959c439b0436d6dUL, 0x647c742c9ce02fc5UL, 3); ++ TESTINST_RRRI("alsl.d", "$r18", "$r28", "$r10", SA2_1, 0x1bde2962dc5bb68bUL, 0x67c403d00c9389bdUL, 0x8fc18921f225d05aUL, 2); ++ TESTINST_RRRI("alsl.d", "$r8", "$r27", "$r15", SA2_1, 0x5b8de9d8b393fa06UL, 0x393ec1c28e89e9d8UL, 0x1a59f9d852c3f8baUL, 3); ++ TESTINST_RRRI("alsl.d", "$r27", "$r24", "$r6", SA2_1, 0x72195c1ca51cc4dbUL, 0x4ee5b51e1e161ab2UL, 0x8a10acb4b625fefUL, 4); ++ TESTINST_RRRI("alsl.d", "$r29", "$r4", "$r18", SA2_1, 0xf3ed9e39d83d3decUL, 0xa3816509b9a6c23dUL, 0x6949e8e534450dd5UL, 2); ++ TESTINST_RRRI("alsl.d", "$r16", "$r13", "$r8", SA2_1, 0x588f388f25a342dfUL, 0xde33a74109c7be30UL, 0x8b02cf06997a065aUL, 1); ++ ++ /* ---------------- lu12i.w rd, si20 ---------------- */ ++ TESTINST_RI("lu12i.w", "$r9", SI20, 0xdf45bd002ccf48e1UL, 94146); ++ TESTINST_RI("lu12i.w", "$r10", SI20, 0xa5138a37d09ada8aUL, 129014); ++ TESTINST_RI("lu12i.w", "$r18", SI20, 0xefe46a52b8b3e5eUL, -130138); ++ TESTINST_RI("lu12i.w", "$r7", SI20, 0x29084adf6d033a88UL, -467080); ++ TESTINST_RI("lu12i.w", "$r10", SI20, 0xe9072e7fec2a5d1cUL, 360675); ++ TESTINST_RI("lu12i.w", "$r28", SI20, 0x2f7d41c7bd959cd5UL, 205272); ++ TESTINST_RI("lu12i.w", "$r16", SI20, 0xcb48200d89b48566UL, -266298); ++ TESTINST_RI("lu12i.w", "$r12", SI20, 0xd605223c244f4a50UL, -186346); ++ TESTINST_RI("lu12i.w", "$r15", SI20, 0x22c035c8c90016beUL, 247864); ++ TESTINST_RI("lu12i.w", "$r20", SI20, 0x6b2fd1aa0b603fecUL, -511005); ++ ++ /* ---------------- lu32i.d rd, si20 ---------------- */ ++ TESTINST_RI("lu32i.d", "$r8", SI20, 0xb331616751ed8877UL, -310956); ++ TESTINST_RI("lu32i.d", "$r17", SI20, 0xe49bab8d80e1dd7UL, 35590); ++ TESTINST_RI("lu32i.d", "$r4", SI20, 0x842cdc9ac0a0adf6UL, 500474); ++ TESTINST_RI("lu32i.d", "$r23", SI20, 0xc9ca69b8e5ab079eUL, -447277); ++ TESTINST_RI("lu32i.d", "$r12", SI20, 0x27d83e1c77dec50aUL, -503028); ++ TESTINST_RI("lu32i.d", "$r26", SI20, 0xc00dcc918a89f350UL, -355708); ++ TESTINST_RI("lu32i.d", "$r16", SI20, 0xd180188cdc073491UL, -231989); ++ TESTINST_RI("lu32i.d", "$r26", SI20, 0x4efae034432bbb3bUL, 250642); ++ TESTINST_RI("lu32i.d", "$r15", SI20, 0x7bf2141e673e336fUL, 237105); ++ TESTINST_RI("lu32i.d", "$r4", SI20, 0x187c50bfc5eb8f32UL, -312071); ++ ++ /* ---------------- lu52i.d rd, rj, si12 ---------------- */ ++ TESTINST_RRI("lu52i.d", "$r8", "$r25", SI12, 0x1da74dfcb33d471aUL, 0x453ae9f1200f4d41UL, 1920); ++ TESTINST_RRI("lu52i.d", "$r14", "$r25", SI12, 0x5e954055ebaec78fUL, 0xb7637f9119e12e31UL, -2008); ++ TESTINST_RRI("lu52i.d", "$r26", "$r24", SI12, 0xead69e40b96b23bfUL, 0x779862b03d1ab575UL, -1803); ++ TESTINST_RRI("lu52i.d", "$r5", "$r25", SI12, 0x452236306da7c667UL, 0x9f16a6e48cca3a7bUL, -1406); ++ TESTINST_RRI("lu52i.d", "$r26", "$r23", SI12, 0x5604b9744291e45aUL, 0x70eecb3116b1795cUL, -667); ++ TESTINST_RRI("lu52i.d", "$r14", "$r27", SI12, 0x6d9a8cfe459c1c48UL, 0x85452bdd40205e0dUL, -1221); ++ TESTINST_RRI("lu52i.d", "$r25", "$r8", SI12, 0x1a8d72e42f68a33dUL, 0x7089b6fe4c1f7a70UL, 423); ++ TESTINST_RRI("lu52i.d", "$r30", "$r10", SI12, 0x7c4fe646acac7ac0UL, 0xe7d222ba1fd5cae2UL, -177); ++ TESTINST_RRI("lu52i.d", "$r6", "$r13", SI12, 0xdb3d6a615a9e492fUL, 0xaa9303648ff489f2UL, -1438); ++ TESTINST_RRI("lu52i.d", "$r25", "$r4", SI12, 0x8b41b813d85b8ee8UL, 0xe4d31961e42e713cUL, -634); ++ ++ /* ---------------- addi.w rd, rj, si12 ---------------- */ ++ TESTINST_RRI("addi.w", "$r6", "$r27", SI12, 0x12845f036198fa6fUL, 0xda77c63c764655daUL, 1727); ++ TESTINST_RRI("addi.w", "$r9", "$r8", SI12, 0x21a7e3cfa2649a4fUL, 0xc64c73b3bd4c1dcbUL, -381); ++ TESTINST_RRI("addi.w", "$r16", "$r6", SI12, 0x6c47b02ef52a3502UL, 0x24ca1a646dac5cc3UL, -186); ++ TESTINST_RRI("addi.w", "$r20", "$r31", SI12, 0xb6144d8f9513c78eUL, 0xc4b808764e894e6cUL, 1503); ++ TESTINST_RRI("addi.w", "$r19", "$r17", SI12, 0xcf97c9215c961121UL, 0x9b714c4cb899399bUL, -1918); ++ TESTINST_RRI("addi.w", "$r14", "$r8", SI12, 0xe1abf22f6c3c82ecUL, 0x4110e9c1b5f59ef6UL, -1781); ++ TESTINST_RRI("addi.w", "$r29", "$r18", SI12, 0x4b64427195dda12dUL, 0xadf5af70b7b3f37bUL, 2047); ++ TESTINST_RRI("addi.w", "$r4", "$r30", SI12, 0xfc785d46f5bbdff4UL, 0x1e061e9d51362d9cUL, 244); ++ TESTINST_RRI("addi.w", "$r7", "$r23", SI12, 0xe037576d82c12e8dUL, 0xa77c8da72af708f1UL, -376); ++ TESTINST_RRI("addi.w", "$r23", "$r17", SI12, 0xa10df57c4103efUL, 0x26d2628746ad0a3eUL, 1924); ++ ++ /* ---------------- addi.d rd, rj, si12 ---------------- */ ++ TESTINST_RRI("addi.d", "$r14", "$r14", SI12, 0x61b497fb58a816d9UL, 0x29eb218dd65d9d6cUL, 152); ++ TESTINST_RRI("addi.d", "$r20", "$r13", SI12, 0xd80db8387a8cdd93UL, 0x5e23e4b01f2bbd6dUL, -640); ++ TESTINST_RRI("addi.d", "$r13", "$r25", SI12, 0x5dfea060c6e8f587UL, 0x95f49b783954f9f9UL, -743); ++ TESTINST_RRI("addi.d", "$r4", "$r30", SI12, 0xd72f370f6ce7bc4cUL, 0x148550b0f97ce601UL, 676); ++ TESTINST_RRI("addi.d", "$r26", "$r8", SI12, 0xa4120a67f8d6df1aUL, 0xa83f4bbcaf5bc52eUL, 1630); ++ TESTINST_RRI("addi.d", "$r20", "$r29", SI12, 0xa8f9c82780ac16d5UL, 0x7ab169a5751642bcUL, -1971); ++ TESTINST_RRI("addi.d", "$r8", "$r8", SI12, 0x6f22bdb480c14540UL, 0x94e1253c331b17f2UL, 1160); ++ TESTINST_RRI("addi.d", "$r15", "$r27", SI12, 0x312473547bcfe03UL, 0x7a786cbc8149d818UL, 844); ++ TESTINST_RRI("addi.d", "$r8", "$r26", SI12, 0xee2b1be852671bc3UL, 0x6a36d61dfee3a6fbUL, -1185); ++ TESTINST_RRI("addi.d", "$r17", "$r27", SI12, 0x70e068b54ed72e20UL, 0x922681ab8837027bUL, -2046); ++ ++ /* ---------------- addu16i.d rd, rj, si16 ---------------- */ ++ TESTINST_RRI("addu16i.d", "$r20", "$r29", SI16, 0x8232770e3472bdc3UL, 0x4d28c5567787c26eUL, -14564); ++ TESTINST_RRI("addu16i.d", "$r29", "$r4", SI16, 0x9076403ed2f0fdf4UL, 0x471cafb4183a389fUL, -3511); ++ TESTINST_RRI("addu16i.d", "$r26", "$r15", SI16, 0xdec118b1eb13234UL, 0x6ff5ce56111b301UL, 25897); ++ TESTINST_RRI("addu16i.d", "$r9", "$r5", SI16, 0x73209239d98fb81aUL, 0x1dc8f0ba4710eba3UL, -21829); ++ TESTINST_RRI("addu16i.d", "$r28", "$r25", SI16, 0xa39ba8429a9c13a6UL, 0x4fffb32851c13ff2UL, -23832); ++ TESTINST_RRI("addu16i.d", "$r23", "$r30", SI16, 0x8abd919f5ea43b1UL, 0x40078826f7336f0eUL, -32189); ++ TESTINST_RRI("addu16i.d", "$r28", "$r24", SI16, 0x695e543e25e7d3e4UL, 0x30279db606efa8ecUL, 16372); ++ TESTINST_RRI("addu16i.d", "$r4", "$r18", SI16, 0xa125cadb71209757UL, 0xff287b5e7fb2a2baUL, -28041); ++ TESTINST_RRI("addu16i.d", "$r5", "$r17", SI16, 0xd5d3e6da7c594ca9UL, 0x2bc9be0ef252584cUL, -11268); ++ TESTINST_RRI("addu16i.d", "$r29", "$r28", SI16, 0xee0391151007613UL, 0xae616c39d87c4b6eUL, -15645); ++ ++ /* ---------------- andi rd, rj, ui12 ---------------- */ ++ TESTINST_RRI("andi", "$r28", "$r18", UI12, 0xd62f833fbbd483b3UL, 0xa2f268cdcf18dd00UL, 1288); ++ TESTINST_RRI("andi", "$r12", "$r13", UI12, 0xc40efc9a74a3a13bUL, 0xfd609200795f877cUL, 153); ++ TESTINST_RRI("andi", "$r6", "$r18", UI12, 0x79ee7ee7a7865b79UL, 0x644bec92dca1ad7fUL, 3633); ++ TESTINST_RRI("andi", "$r5", "$r31", UI12, 0x2d64be0e5c2ec0f6UL, 0x87253b6589f182c7UL, 3299); ++ TESTINST_RRI("andi", "$r28", "$r5", UI12, 0xf2e4ed85d98a1860UL, 0x9f58e4edd98b60d1UL, 3189); ++ TESTINST_RRI("andi", "$r18", "$r29", UI12, 0x3c067920d48cf0d2UL, 0x2bf35e68c503ecfeUL, 4031); ++ TESTINST_RRI("andi", "$r20", "$r24", UI12, 0xe1d95be05fd57a64UL, 0xd33e771521b24bd3UL, 3252); ++ TESTINST_RRI("andi", "$r6", "$r23", UI12, 0x23341b2d86d02365UL, 0x16de10f2b4a45064UL, 1665); ++ TESTINST_RRI("andi", "$r27", "$r14", UI12, 0xd7db9d77aea4dcf5UL, 0x142272b737435eb7UL, 325); ++ TESTINST_RRI("andi", "$r23", "$r16", UI12, 0x57fee53581b09718UL, 0x2ace25d9e2ddbaaUL, 1056); ++ ++ /* ---------------- ori rd, rj, ui12 ---------------- */ ++ TESTINST_RRI("ori", "$r26", "$r13", UI12, 0x6d47cf7e5bb5c13eUL, 0x93aed4996805ba3bUL, 3251); ++ TESTINST_RRI("ori", "$r10", "$r25", UI12, 0x42f0332098f938afUL, 0xd7916fe8d569567bUL, 568); ++ TESTINST_RRI("ori", "$r12", "$r17", UI12, 0xc507d4150a742b76UL, 0x2b9a102a5b5b15f7UL, 1798); ++ TESTINST_RRI("ori", "$r15", "$r15", UI12, 0xa54ad5ecc0e72adbUL, 0x37c18ad4ec6e678cUL, 1781); ++ TESTINST_RRI("ori", "$r5", "$r4", UI12, 0x1f388b2a2b18004dUL, 0xb5fa23fbb02eeedbUL, 682); ++ TESTINST_RRI("ori", "$r27", "$r24", UI12, 0x73b086f8a8b4d7b5UL, 0xd23e30ab1e45470aUL, 1931); ++ TESTINST_RRI("ori", "$r28", "$r6", UI12, 0x972967beac695928UL, 0x2c701d0bc28816c5UL, 3593); ++ TESTINST_RRI("ori", "$r27", "$r4", UI12, 0x54fecbbf0a06e5a6UL, 0xf0b6d846464a3331UL, 3679); ++ TESTINST_RRI("ori", "$r9", "$r16", UI12, 0x71f3cd001c729062UL, 0xc5720758095e4592UL, 905); ++ TESTINST_RRI("ori", "$r26", "$r7", UI12, 0xd7ce86800c3c0f4bUL, 0xc4a58f787cdf5bb2UL, 3473); ++ ++ /* ---------------- xori rd, rj, ui12 ---------------- */ ++ TESTINST_RRI("xori", "$r27", "$r31", UI12, 0xe6d49c2dc629fbc7UL, 0x91832665d1a898e2UL, 2690); ++ TESTINST_RRI("xori", "$r15", "$r5", UI12, 0xada49c0d48beffc5UL, 0xe3cf426f1be4766UL, 697); ++ TESTINST_RRI("xori", "$r9", "$r20", UI12, 0x174a71d6d3757e3eUL, 0x25ed4678037622beUL, 2268); ++ TESTINST_RRI("xori", "$r31", "$r15", UI12, 0x1fac1694b40fbf2eUL, 0x4fe4fb2e0b660ca2UL, 3817); ++ TESTINST_RRI("xori", "$r17", "$r14", UI12, 0x2dc443400df4e153UL, 0x1db25e602ef8ece5UL, 3929); ++ TESTINST_RRI("xori", "$r4", "$r28", UI12, 0x5fb5ad5a84e97835UL, 0xc52da11293641639UL, 2735); ++ TESTINST_RRI("xori", "$r5", "$r13", UI12, 0x5c5fc4ba45da005fUL, 0xe46f853b7d602b84UL, 1153); ++ TESTINST_RRI("xori", "$r30", "$r26", UI12, 0x1419915b6f92678bUL, 0xa984612f1266da94UL, 3867); ++ TESTINST_RRI("xori", "$r13", "$r13", UI12, 0xc2b8fd036ba6314bUL, 0x4cf49604f644713cUL, 3426); ++ TESTINST_RRI("xori", "$r25", "$r23", UI12, 0xde46e3673c9a75dcUL, 0xfa1177a89f08c81eUL, 2669); ++ ++ /* ---------------- sll.w rd, rj, rk ---------------- */ ++ TESTINST_RRR("sll.w", "$r13", "$r8", "$r12", 0x26131fa72f4b76f1UL, 0xf34f7108538078d0UL, 0x10bbd12a8e087501UL); ++ TESTINST_RRR("sll.w", "$r29", "$r8", "$r15", 0xb6f529da4017d0d9UL, 0x49fbfb11ef643171UL, 0x9d0425e747d11bdeUL); ++ TESTINST_RRR("sll.w", "$r30", "$r31", "$r12", 0xcfc5236f5c070644UL, 0xba8301a1087b3a96UL, 0xff7589561824e1beUL); ++ TESTINST_RRR("sll.w", "$r28", "$r10", "$r7", 0x37fa51674df87149UL, 0x39212605c5d0cf7dUL, 0x18a8e323326ce5aaUL); ++ TESTINST_RRR("sll.w", "$r8", "$r9", "$r14", 0x707a9e0ece8abe40UL, 0x94b7b20a80c16c7bUL, 0x6887c46efb4cc181UL); ++ TESTINST_RRR("sll.w", "$r8", "$r4", "$r24", 0xd718a01b03a53964UL, 0x8ebd8bfeec304e2aUL, 0x6b4a83a6838b5d1UL); ++ TESTINST_RRR("sll.w", "$r23", "$r31", "$r27", 0xf50cab824a06d30eUL, 0xa8ee12cbd8dec935UL, 0x118002b3f0cecbabUL); ++ TESTINST_RRR("sll.w", "$r8", "$r25", "$r26", 0x8163368243faadeeUL, 0x3a04f47bf19a4cc8UL, 0x6a58cd3a57b4eeb4UL); ++ TESTINST_RRR("sll.w", "$r25", "$r13", "$r12", 0x3d6831e1afab1b1aUL, 0x9ee672580cb39777UL, 0x9084acd2bc7404caUL); ++ TESTINST_RRR("sll.w", "$r20", "$r5", "$r29", 0x90f7ee3ff75817a6UL, 0xe4ae07989d6148d7UL, 0x3e208bfcf046fffdUL); ++ ++ /* ---------------- srl.w rd, rj, rk ---------------- */ ++ TESTINST_RRR("srl.w", "$r20", "$r29", "$r30", 0xff3f6b79b5e2b56dUL, 0x1195aa09fa92d26bUL, 0xa93a8fd11ad5ae99UL); ++ TESTINST_RRR("srl.w", "$r8", "$r15", "$r4", 0x5d2fb7cd04ecd00cUL, 0x47bf914b6eca2852UL, 0x1bc63138cc45a75cUL); ++ TESTINST_RRR("srl.w", "$r20", "$r12", "$r18", 0x61fa22abda7c7b02UL, 0x9341cf09aa2e106eUL, 0x2dea831e9e121355UL); ++ TESTINST_RRR("srl.w", "$r30", "$r20", "$r26", 0x43e0249584da52dbUL, 0x482a209e436cda53UL, 0xb323a7f463f80660UL); ++ TESTINST_RRR("srl.w", "$r31", "$r16", "$r28", 0x4b10d05d93bf7288UL, 0x6d0330e88122d7c1UL, 0xc531cf8c92d53d03UL); ++ TESTINST_RRR("srl.w", "$r31", "$r15", "$r31", 0xd4654233c7648c3aUL, 0x12e6fc2a04cbf809UL, 0xcfe1c1b558a94808UL); ++ TESTINST_RRR("srl.w", "$r10", "$r30", "$r19", 0x602dee9c45a3b99bUL, 0x3ce0a6ac2acf19faUL, 0xdb5fab4bc2f82e7aUL); ++ TESTINST_RRR("srl.w", "$r17", "$r9", "$r23", 0x45106f11d4a57641UL, 0x5354795b675edacUL, 0xc67578c28ed7b6c7UL); ++ TESTINST_RRR("srl.w", "$r25", "$r26", "$r29", 0x1dc3b8477fba650cUL, 0x814377a71768e75UL, 0x60276c0e316db833UL); ++ TESTINST_RRR("srl.w", "$r31", "$r7", "$r30", 0x360fc92a085c2e14UL, 0x1b44ec96def89449UL, 0x56d6c5d85a81ed1fUL); ++ ++ /* ---------------- sra.w rd, rj, rk ---------------- */ ++ TESTINST_RRR("sra.w", "$r10", "$r17", "$r19", 0x576f2bfc771641b8UL, 0xfb1fb20b98a54405UL, 0xb20e9dae5a212078UL); ++ TESTINST_RRR("sra.w", "$r12", "$r16", "$r31", 0xbfdbb9a90ccc08a0UL, 0xb5d3c7f3b1a800a6UL, 0x57c3ff79f3b4198bUL); ++ TESTINST_RRR("sra.w", "$r18", "$r16", "$r5", 0xadcb6c153538b6b1UL, 0x99e245813e90b5e9UL, 0x7adff58363d5ebd2UL); ++ TESTINST_RRR("sra.w", "$r17", "$r28", "$r25", 0x7faea6a29686caf9UL, 0x801d40ea40b19beeUL, 0xf5174f678600d3fUL); ++ TESTINST_RRR("sra.w", "$r8", "$r27", "$r13", 0x86e5534832150e05UL, 0x47bb53d1cdc3560fUL, 0x917e2b49633a0f44UL); ++ TESTINST_RRR("sra.w", "$r26", "$r18", "$r20", 0xbfb83a0d762c171aUL, 0xbf67ed78d934d37cUL, 0x9f377995293fcc6bUL); ++ TESTINST_RRR("sra.w", "$r5", "$r25", "$r19", 0x266703af59334b0fUL, 0x4ed92cdab9f641c9UL, 0x5da1d0b8846d1a3dUL); ++ TESTINST_RRR("sra.w", "$r19", "$r27", "$r24", 0x72557561b3b40007UL, 0xd5db278ea099b3b5UL, 0x50b4a888b898610fUL); ++ TESTINST_RRR("sra.w", "$r16", "$r10", "$r4", 0xb349f888f1809ba3UL, 0x23d60a1fc100d89eUL, 0xc2846cc882dbc8e2UL); ++ TESTINST_RRR("sra.w", "$r23", "$r10", "$r31", 0xd7bdeddd344bb5afUL, 0xa015a07c13ff2234UL, 0x7c0fe410ce063a85UL); ++ ++ /* ---------------- sll.d rd, rj, rk ---------------- */ ++ TESTINST_RRR("sll.d", "$r28", "$r17", "$r10", 0x167adf26efd66416UL, 0xb861ba6e0aadf304UL, 0xa19e21ba0f406c33UL); ++ TESTINST_RRR("sll.d", "$r18", "$r29", "$r13", 0x3e8ea4dc3a9d9b44UL, 0x28ccf5dfa9cdc3b2UL, 0x33ef837a5a476bdcUL); ++ TESTINST_RRR("sll.d", "$r23", "$r27", "$r29", 0x23e29c76deed70caUL, 0x9e2265d8422e78dUL, 0xe9cc62bfd8a7c913UL); ++ TESTINST_RRR("sll.d", "$r16", "$r17", "$r17", 0xf5e858c7445fceddUL, 0x6735e4cf2fcb78fbUL, 0x726dd10e13b62663UL); ++ TESTINST_RRR("sll.d", "$r17", "$r15", "$r29", 0xfc1dbfc0551f8813UL, 0xec45100b21a74025UL, 0x186d3b737cbfd39aUL); ++ TESTINST_RRR("sll.d", "$r19", "$r15", "$r9", 0xbb01afe39a1e17b6UL, 0x3e66dd1100acc44aUL, 0xa9c74257f6e39cdfUL); ++ TESTINST_RRR("sll.d", "$r23", "$r9", "$r31", 0x945b101751c38d12UL, 0x262d14baae546199UL, 0x7ccdd8a7840948dfUL); ++ TESTINST_RRR("sll.d", "$r5", "$r31", "$r28", 0xa88eaecc1405995bUL, 0xd96ed500aff4596bUL, 0x6994841a196c562eUL); ++ TESTINST_RRR("sll.d", "$r27", "$r10", "$r25", 0x1e9540fa8237a849UL, 0x9aad6101b2470a60UL, 0x90c95628696f752fUL); ++ TESTINST_RRR("sll.d", "$r4", "$r26", "$r18", 0xb4dc3cdeab2e8454UL, 0xd27a92db3b2906cUL, 0x2bc7647c40c0b375UL); ++ ++ /* ---------------- srl.d rd, rj, rk ---------------- */ ++ TESTINST_RRR("srl.d", "$r6", "$r27", "$r13", 0x66ebeca9a7fad574UL, 0xdc837ce646ea6b51UL, 0xa57259e1758c564bUL); ++ TESTINST_RRR("srl.d", "$r6", "$r20", "$r5", 0x91794316e6c5e65UL, 0xdc7c47d39d64a16UL, 0x35f029b9942e11c8UL); ++ TESTINST_RRR("srl.d", "$r15", "$r5", "$r4", 0xbc963842b3ebc906UL, 0x42ea773b0bd19807UL, 0xd05cd2c4b01ea630UL); ++ TESTINST_RRR("srl.d", "$r18", "$r25", "$r28", 0x30d908baaa31230eUL, 0x779272ae228746a5UL, 0xf7b665809a3f303bUL); ++ TESTINST_RRR("srl.d", "$r5", "$r28", "$r27", 0x1f1d414f1d0f1feUL, 0x647277d3759d74bfUL, 0xa5c5fce39b4a1810UL); ++ TESTINST_RRR("srl.d", "$r24", "$r9", "$r26", 0x5fa44419162fc2c8UL, 0x9d2a589e6f6b3440UL, 0x810a615115238d8dUL); ++ TESTINST_RRR("srl.d", "$r31", "$r23", "$r30", 0xfa1a7ad64758b758UL, 0xe3d69d99e87b4297UL, 0x87fd8dc0a78e86bbUL); ++ TESTINST_RRR("srl.d", "$r26", "$r10", "$r24", 0x540888639a787231UL, 0x168791cefeb1660aUL, 0xd02b158115db9cdfUL); ++ TESTINST_RRR("srl.d", "$r23", "$r15", "$r12", 0xff3e950565409999UL, 0xe15a01fa0e34ea3bUL, 0x237aba34fe552f8eUL); ++ TESTINST_RRR("srl.d", "$r8", "$r16", "$r4", 0x825bafd36cc0d32eUL, 0x321677304d1b1406UL, 0xca68c6c83dfa5837UL); ++ ++ /* ---------------- sra.d rd, rj, rk ---------------- */ ++ TESTINST_RRR("sra.d", "$r23", "$r19", "$r16", 0x4cab63abd8f64774UL, 0x2c007c3ac68d7c80UL, 0xd8f4ac963a8b2c01UL); ++ TESTINST_RRR("sra.d", "$r18", "$r30", "$r25", 0x531de73fca30361aUL, 0x2857ba730cd281ffUL, 0xacab0fe400e4c113UL); ++ TESTINST_RRR("sra.d", "$r31", "$r13", "$r10", 0x3184416bc93a5e26UL, 0xad5864bc4022de96UL, 0xf7007bdbf1f728abUL); ++ TESTINST_RRR("sra.d", "$r6", "$r25", "$r23", 0x9184d2df291f3402UL, 0x7c0b117dcad80c03UL, 0x35b29b0dde1a94bdUL); ++ TESTINST_RRR("sra.d", "$r16", "$r6", "$r29", 0x2849e543d35dff5fUL, 0x9f13f36a632a3fUL, 0xf31f881e12072fe2UL); ++ TESTINST_RRR("sra.d", "$r7", "$r29", "$r10", 0x25c763f8366139ddUL, 0xfd77fd6e69e371c6UL, 0xcaa2ec6ad4f3b996UL); ++ TESTINST_RRR("sra.d", "$r24", "$r25", "$r26", 0x472602300b4f04c9UL, 0x54ceea832a5677e9UL, 0x5f63e9d9d6eb4af0UL); ++ TESTINST_RRR("sra.d", "$r23", "$r4", "$r27", 0xe8b449325a0ed51eUL, 0xd96928476f8441a5UL, 0x7e1ae8fd9c849dceUL); ++ TESTINST_RRR("sra.d", "$r15", "$r9", "$r12", 0x71601a1a2b155f51UL, 0xbcbb1d162563240UL, 0x5a906ad2f4abb4c7UL); ++ TESTINST_RRR("sra.d", "$r16", "$r29", "$r23", 0x1686886f27d397fbUL, 0x851328b2655e5689UL, 0x1634457590cd4033UL); ++ ++ /* ---------------- rotr.w rd, rj, rk ---------------- */ ++ TESTINST_RRR("rotr.w", "$r8", "$r5", "$r18", 0xc4394aae4c13908bUL, 0xa0c5728d1211b595UL, 0x3d562746b3943f3bUL); ++ TESTINST_RRR("rotr.w", "$r19", "$r18", "$r10", 0x284b501639de116bUL, 0x4248ad6cc0107902UL, 0xb41907b756bf8004UL); ++ TESTINST_RRR("rotr.w", "$r29", "$r8", "$r4", 0x2656b50c7d689f19UL, 0x7b5d21fdce9bcb73UL, 0x5b212fbe9e6b8522UL); ++ TESTINST_RRR("rotr.w", "$r25", "$r6", "$r30", 0x4c79ed7a1695fc25UL, 0x6bac1698a978f50fUL, 0xf1d58570dfb10203UL); ++ TESTINST_RRR("rotr.w", "$r14", "$r18", "$r6", 0xe894476b4ebbff23UL, 0x1398b65ae1e91c98UL, 0xebb6c3f5f689d2d8UL); ++ TESTINST_RRR("rotr.w", "$r19", "$r29", "$r26", 0x2595423cc93ecd7cUL, 0x6c462c2d29d8f908UL, 0x19142efd8e0b48b8UL); ++ TESTINST_RRR("rotr.w", "$r23", "$r10", "$r25", 0x68b4d913b267a3a2UL, 0x69afb673907e4506UL, 0xbd09ff2ed890862dUL); ++ TESTINST_RRR("rotr.w", "$r9", "$r14", "$r27", 0x17a45b8cbdebd6efUL, 0x33effef864846356UL, 0x3f52e437f2d5da62UL); ++ TESTINST_RRR("rotr.w", "$r5", "$r12", "$r23", 0x2d191b1a9707cf26UL, 0x86fa75433dac3d39UL, 0x21136a02424e5da4UL); ++ TESTINST_RRR("rotr.w", "$r29", "$r18", "$r27", 0x7d989f74f9944f8dUL, 0x50fe5829a153e6UL, 0x926776f9140b06fcUL); ++ ++ /* ---------------- rotr.d rd, rj, rk ---------------- */ ++ TESTINST_RRR("rotr.d", "$r29", "$r19", "$r13", 0x1e02c0c28ec3f9b1UL, 0xf2e79e6ff240b188UL, 0x60f500663eddf444UL); ++ TESTINST_RRR("rotr.d", "$r30", "$r4", "$r14", 0x97f6be8229e2e822UL, 0xf79aaeb2c03a2113UL, 0xbbdb2cb642605ed7UL); ++ TESTINST_RRR("rotr.d", "$r6", "$r19", "$r7", 0x1611806010ce99d8UL, 0xcb64270e0fc5b4c7UL, 0xeda6972c46af03cUL); ++ TESTINST_RRR("rotr.d", "$r4", "$r15", "$r30", 0xe63084e97bd0efb3UL, 0x6e1aa322e38e9b66UL, 0xa7df0f1d92106e2dUL); ++ TESTINST_RRR("rotr.d", "$r16", "$r27", "$r10", 0x1ff92fbb0f10ff9aUL, 0x15c2eb91c9ae124UL, 0x8b4c97ee7f9bc2faUL); ++ TESTINST_RRR("rotr.d", "$r28", "$r7", "$r25", 0xbd766a63bbead21cUL, 0xd97b509610db5e7UL, 0x3151203010315af5UL); ++ TESTINST_RRR("rotr.d", "$r9", "$r20", "$r23", 0x8a2bb5eacea50d68UL, 0x947ec1930151adb9UL, 0xc2f39e045d278b7bUL); ++ TESTINST_RRR("rotr.d", "$r25", "$r13", "$r23", 0xcaddb8ea7bd492c7UL, 0x416a1b790dbf45cbUL, 0x44c59965e1c6af25UL); ++ TESTINST_RRR("rotr.d", "$r14", "$r7", "$r31", 0x8ca18b58047c8b5aUL, 0x93a6cdc3585b5446UL, 0x70cd84ec07e33cefUL); ++ TESTINST_RRR("rotr.d", "$r14", "$r9", "$r4", 0x48bd5c133004f490UL, 0xad095be0915fe20bUL, 0xc1fff6ff603a47b3UL); ++ ++ /* ---------------- slli.w rd, rj, ui5 ---------------- */ ++ TESTINST_RRI("slli.w", "$r18", "$r8", UI5, 0xe7f8823a2989c395UL, 0xf0ccc85519ad1e0aUL, 10); ++ TESTINST_RRI("slli.w", "$r27", "$r17", UI5, 0x2e66b550a3bb071dUL, 0x20943aa3eaa4024eUL, 30); ++ TESTINST_RRI("slli.w", "$r27", "$r23", UI5, 0x70daa2bee8209243UL, 0x2e9160afd2e28a64UL, 31); ++ TESTINST_RRI("slli.w", "$r10", "$r13", UI5, 0x701c424632b5dc29UL, 0x591054db6afe1725UL, 12); ++ TESTINST_RRI("slli.w", "$r7", "$r15", UI5, 0xdd1d7fe3ae579499UL, 0x2e077f689088c0c7UL, 19); ++ TESTINST_RRI("slli.w", "$r6", "$r8", UI5, 0xff732113ddaab79bUL, 0x9cacf8e6d9e37f97UL, 12); ++ TESTINST_RRI("slli.w", "$r5", "$r19", UI5, 0xcef75ddd2adc5853UL, 0xcc24ed9167fd06eaUL, 22); ++ TESTINST_RRI("slli.w", "$r17", "$r8", UI5, 0x3c8788fed3e8a049UL, 0xccf9b2d2c2e80251UL, 7); ++ TESTINST_RRI("slli.w", "$r14", "$r29", UI5, 0xe1b0b077db4f08eUL, 0x76aea4b9ae43cdfbUL, 10); ++ TESTINST_RRI("slli.w", "$r23", "$r30", UI5, 0x13d8514aeb0dc12bUL, 0x9c8352804e7e8ccbUL, 26); ++ ++ /* ---------------- slli.d rd, rj, ui6 ---------------- */ ++ TESTINST_RRI("slli.d", "$r27", "$r28", UI6, 0x689a2c4141835926UL, 0x1b6ff38e611d1e4dUL, 5); ++ TESTINST_RRI("slli.d", "$r5", "$r20", UI6, 0xff3391c2323defa6UL, 0xe99a134a0c1a2574UL, 1); ++ TESTINST_RRI("slli.d", "$r27", "$r7", UI6, 0xc32d8fb319ba47e6UL, 0xc6530e0e601d3631UL, 61); ++ TESTINST_RRI("slli.d", "$r5", "$r26", UI6, 0x979553ff112cdf52UL, 0x931e420364fdcacaUL, 45); ++ TESTINST_RRI("slli.d", "$r27", "$r5", UI6, 0xa7f70b048a4087b0UL, 0xc1b829210c3cd5a9UL, 60); ++ TESTINST_RRI("slli.d", "$r23", "$r10", UI6, 0xcd547af78ac66ca7UL, 0xa2c0802de6c82645UL, 59); ++ TESTINST_RRI("slli.d", "$r13", "$r30", UI6, 0x410b8f25e1234eeUL, 0xdbaacfe884cda24dUL, 56); ++ TESTINST_RRI("slli.d", "$r16", "$r4", UI6, 0x44a2ff35045ec37cUL, 0xee2240010629a8eeUL, 20); ++ TESTINST_RRI("slli.d", "$r19", "$r20", UI6, 0x8617d88408d75cacUL, 0xba15483820d66ae7UL, 25); ++ TESTINST_RRI("slli.d", "$r24", "$r27", UI6, 0x669e0e9b99d5b604UL, 0xf5d1ffc374e53c7dUL, 13); ++ ++ /* ---------------- srli.w rd, rj, ui5 ---------------- */ ++ TESTINST_RRI("srli.w", "$r20", "$r16", UI5, 0x7f5310ac5eaa9924UL, 0xea8b69613d183eeUL, 10); ++ TESTINST_RRI("srli.w", "$r13", "$r15", UI5, 0x5f4d9313f9224389UL, 0xd544272206f4e814UL, 0); ++ TESTINST_RRI("srli.w", "$r17", "$r18", UI5, 0xd9b2c942f996cc8aUL, 0x704cd1d89de5c2b4UL, 7); ++ TESTINST_RRI("srli.w", "$r27", "$r28", UI5, 0xa3eef8efc97e0d4fUL, 0x8c449e6236daa7a2UL, 18); ++ TESTINST_RRI("srli.w", "$r9", "$r10", UI5, 0x6c044927152e5fc9UL, 0x592a1607944e0109UL, 29); ++ TESTINST_RRI("srli.w", "$r8", "$r24", UI5, 0xcaa01b37d49db675UL, 0x5e35848bbc958164UL, 31); ++ TESTINST_RRI("srli.w", "$r6", "$r16", UI5, 0xe2fbe1accb343769UL, 0x85f5e17c7d785222UL, 18); ++ TESTINST_RRI("srli.w", "$r18", "$r25", UI5, 0x4653c07e0627825fUL, 0x44fffa524ffd0417UL, 31); ++ TESTINST_RRI("srli.w", "$r5", "$r26", UI5, 0x817ebd7154c8ed46UL, 0xc7399a9899fc5958UL, 22); ++ TESTINST_RRI("srli.w", "$r27", "$r4", UI5, 0x3e4b17b34f2b08d0UL, 0x5bedb97aefd697f4UL, 27); ++ ++ /* ---------------- srli.d rd, rj, ui6 ---------------- */ ++ TESTINST_RRI("srli.d", "$r31", "$r9", UI6, 0x8fc21da189af52edUL, 0x235bf33e3e612a15UL, 51); ++ TESTINST_RRI("srli.d", "$r26", "$r7", UI6, 0xcd1eaac4df2531ddUL, 0xe87216fce9c75788UL, 36); ++ TESTINST_RRI("srli.d", "$r6", "$r31", UI6, 0xc0282beeb7dc6618UL, 0x8b58604d6be3e8e0UL, 29); ++ TESTINST_RRI("srli.d", "$r20", "$r6", UI6, 0x1546fdd9fc133e39UL, 0x74067840bb05a992UL, 18); ++ TESTINST_RRI("srli.d", "$r28", "$r20", UI6, 0xaa1f88b09e13e4c6UL, 0x6e153faa5221e893UL, 28); ++ TESTINST_RRI("srli.d", "$r26", "$r4", UI6, 0x2ba2151c80dbea7aUL, 0x21246f3c7063edf9UL, 55); ++ TESTINST_RRI("srli.d", "$r28", "$r29", UI6, 0xcd72eff1b5aa0877UL, 0x5d9488c1d61a1544UL, 34); ++ TESTINST_RRI("srli.d", "$r13", "$r7", UI6, 0x5953b78fbd8109a9UL, 0x862731652b653859UL, 62); ++ TESTINST_RRI("srli.d", "$r29", "$r18", UI6, 0xab821449d149a976UL, 0xcb73553146cc4bdcUL, 25); ++ TESTINST_RRI("srli.d", "$r28", "$r7", UI6, 0x31272fa88123357dUL, 0xe9359f7a9f92ec5UL, 2); ++ ++ /* ---------------- srai.w rd, rj, ui5 ---------------- */ ++ TESTINST_RRI("srai.w", "$r26", "$r23", UI5, 0xe73a55c2b7005c01UL, 0xfcd659254f4b3fe7UL, 2); ++ TESTINST_RRI("srai.w", "$r31", "$r10", UI5, 0x2e0c4330fae0890aUL, 0xa76ca364a204c82bUL, 0); ++ TESTINST_RRI("srai.w", "$r31", "$r8", UI5, 0x64790bb6e8674f68UL, 0xce5594f964c4a026UL, 0); ++ TESTINST_RRI("srai.w", "$r15", "$r31", UI5, 0xccfb53c708026acdUL, 0xce185873627515b5UL, 27); ++ TESTINST_RRI("srai.w", "$r16", "$r28", UI5, 0x994c4d22e90185a2UL, 0x49995d51019e1050UL, 1); ++ TESTINST_RRI("srai.w", "$r13", "$r16", UI5, 0x484408b57b3ab89UL, 0x437401347e23c399UL, 16); ++ TESTINST_RRI("srai.w", "$r4", "$r9", UI5, 0xd1d936105b7cca3UL, 0xd49c3c65e292b942UL, 7); ++ TESTINST_RRI("srai.w", "$r24", "$r15", UI5, 0xaa9377005232ec93UL, 0xde29d0172b40f03dUL, 10); ++ TESTINST_RRI("srai.w", "$r19", "$r14", UI5, 0xa49c65a4c2cde36dUL, 0x782e0d4b8a7a28d0UL, 24); ++ TESTINST_RRI("srai.w", "$r24", "$r27", UI5, 0x404f816ff696bbc8UL, 0x1b6900e15f252315UL, 24); ++ ++ /* ---------------- srai.d rd, rj, ui6 ---------------- */ ++ TESTINST_RRI("srai.d", "$r24", "$r4", UI6, 0x96250384fede78c7UL, 0x6c501d9ec5e9e731UL, 22); ++ TESTINST_RRI("srai.d", "$r30", "$r19", UI6, 0xcfc52d7caaf7bf47UL, 0x82499a30d50f8b83UL, 17); ++ TESTINST_RRI("srai.d", "$r12", "$r12", UI6, 0x628a1a46bbe30c16UL, 0xaba392c50d63ea53UL, 5); ++ TESTINST_RRI("srai.d", "$r24", "$r9", UI6, 0x21c1bb01f0253d8UL, 0xb35e31d92548a2feUL, 2); ++ TESTINST_RRI("srai.d", "$r28", "$r7", UI6, 0x2a5ac0a983332ec3UL, 0x2297ae499a473c6dUL, 62); ++ TESTINST_RRI("srai.d", "$r8", "$r17", UI6, 0xa27cf36651750e09UL, 0x1984e046b042d0cfUL, 31); ++ TESTINST_RRI("srai.d", "$r25", "$r16", UI6, 0x7df3822fb20b8dedUL, 0xb4e464563029fac8UL, 37); ++ TESTINST_RRI("srai.d", "$r14", "$r5", UI6, 0xe8c1939c13a2e6caUL, 0x6a22077c63497a9aUL, 57); ++ TESTINST_RRI("srai.d", "$r25", "$r15", UI6, 0xf2df68e25cccf72eUL, 0xe0af648201f919fcUL, 10); ++ TESTINST_RRI("srai.d", "$r6", "$r15", UI6, 0xa24591b35142aa9cUL, 0x12b20ac67de77b8dUL, 49); ++ ++ /* ---------------- rotri.w rd, rj, ui5 ---------------- */ ++ TESTINST_RRI("rotri.w", "$r18", "$r6", UI5, 0xf0c65b137926ba00UL, 0x95e0f5f057a212c5UL, 20); ++ TESTINST_RRI("rotri.w", "$r9", "$r16", UI5, 0xe36356471d2a7e18UL, 0xb8af3071021bd869UL, 27); ++ TESTINST_RRI("rotri.w", "$r5", "$r31", UI5, 0x5992fc9cfce2ebe9UL, 0x6c427c821603d01aUL, 1); ++ TESTINST_RRI("rotri.w", "$r27", "$r13", UI5, 0x239c57dca2ab060UL, 0xed54e28825b25471UL, 23); ++ TESTINST_RRI("rotri.w", "$r18", "$r18", UI5, 0xb84df2305a710936UL, 0x8aae5248c6d4973cUL, 7); ++ TESTINST_RRI("rotri.w", "$r4", "$r27", UI5, 0x730e1701570ac9fcUL, 0xd55b9d54232536e7UL, 29); ++ TESTINST_RRI("rotri.w", "$r19", "$r18", UI5, 0x36dbceffa501d8dcUL, 0x8415238fa1dd314fUL, 0); ++ TESTINST_RRI("rotri.w", "$r13", "$r24", UI5, 0xc1ac428ddf5193UL, 0x3b588028fcfbb0a8UL, 21); ++ TESTINST_RRI("rotri.w", "$r14", "$r25", UI5, 0x733414543ca8145eUL, 0xded24831de35be08UL, 29); ++ TESTINST_RRI("rotri.w", "$r27", "$r5", UI5, 0x60afaebb36d22ba0UL, 0xfd31a16f03582b5UL, 8); ++ ++ /* ---------------- rotri.d rd, rj, ui6 ---------------- */ ++ TESTINST_RRI("rotri.d", "$r20", "$r7", UI6, 0xe112a6d47c0444c1UL, 0xbd9bbb91bdc381c5UL, 53); ++ TESTINST_RRI("rotri.d", "$r27", "$r16", UI6, 0xf254a827c1ef7351UL, 0x3de084650f757cebUL, 62); ++ TESTINST_RRI("rotri.d", "$r30", "$r17", UI6, 0x31c36a8c83999eb2UL, 0x107098a9863e85d5UL, 10); ++ TESTINST_RRI("rotri.d", "$r29", "$r8", UI6, 0xf2e7a25c121af3c3UL, 0xb177c110c3dd3225UL, 46); ++ TESTINST_RRI("rotri.d", "$r4", "$r26", UI6, 0xdd94ff60f2e1abffUL, 0xb76d3e4a0af02e4dUL, 45); ++ TESTINST_RRI("rotri.d", "$r10", "$r9", UI6, 0x6064d48d901beca7UL, 0xea20b33360134ab2UL, 42); ++ TESTINST_RRI("rotri.d", "$r4", "$r26", UI6, 0x27f1e63c8f7f71cfUL, 0xf4c5c8a69f37a1bdUL, 27); ++ TESTINST_RRI("rotri.d", "$r9", "$r16", UI6, 0x7d4cb07a3ab72944UL, 0xd5ee210421c6080eUL, 20); ++ TESTINST_RRI("rotri.d", "$r24", "$r26", UI6, 0x1ce66a79f3e45e6fUL, 0x6e1767144ffa6e2dUL, 4); ++ TESTINST_RRI("rotri.d", "$r4", "$r18", UI6, 0x4173f8102b03399UL, 0xde7066568917d899UL, 46); ++ ++ /* ---------------- ext.w.h rd, rj ---------------- */ ++ TESTINST_RR("ext.w.h", "$r17", "$r14", 0x58af862c6fc4208dUL, 0x6235b0cfe4eed6edUL); ++ TESTINST_RR("ext.w.h", "$r31", "$r20", 0x425af3dcd83fa9fdUL, 0x6e59403101a538f1UL); ++ TESTINST_RR("ext.w.h", "$r18", "$r27", 0xcb140226bf788367UL, 0x58a5430ee4e1616eUL); ++ TESTINST_RR("ext.w.h", "$r15", "$r10", 0xd3debaf05f7d909fUL, 0x6f7083340247fb12UL); ++ TESTINST_RR("ext.w.h", "$r12", "$r15", 0x5dc6f7191af80bcfUL, 0xb1f1c8f4b11c03d9UL); ++ TESTINST_RR("ext.w.h", "$r7", "$r15", 0x5ffe304a5c9dc9d2UL, 0x102fb4fa33193103UL); ++ TESTINST_RR("ext.w.h", "$r16", "$r16", 0x533616e37505799fUL, 0xf988c7255086f4f5UL); ++ TESTINST_RR("ext.w.h", "$r13", "$r25", 0x805a406557ed3facUL, 0xdc6ce0f2993b219bUL); ++ TESTINST_RR("ext.w.h", "$r19", "$r20", 0xcc49c20125c4755dUL, 0xde7b765222a9703aUL); ++ TESTINST_RR("ext.w.h", "$r18", "$r7", 0xe0dd9155cbe168c6UL, 0xc1063421eae07663UL); ++ ++ /* ---------------- ext.w.b rd, rj ---------------- */ ++ TESTINST_RR("ext.w.b", "$r16", "$r23", 0x21666e814555aa02UL, 0x926b8d68b5c40592UL); ++ TESTINST_RR("ext.w.b", "$r8", "$r20", 0xf68ae0a0ac497dedUL, 0xbfb5d489716d0c5UL); ++ TESTINST_RR("ext.w.b", "$r24", "$r15", 0xbc84e54c82fd6e51UL, 0x7d814b11e5eb07f6UL); ++ TESTINST_RR("ext.w.b", "$r31", "$r17", 0x14e575a8dda1f0d3UL, 0x6a111e663a52244cUL); ++ TESTINST_RR("ext.w.b", "$r16", "$r8", 0x911acc218fcf640bUL, 0xac1405ad05b23e43UL); ++ TESTINST_RR("ext.w.b", "$r28", "$r8", 0x77fb13eaa8995607UL, 0x5c97a81f12da7d3UL); ++ TESTINST_RR("ext.w.b", "$r9", "$r23", 0xb88cfdb98683e15eUL, 0x74893b34973e16cbUL); ++ TESTINST_RR("ext.w.b", "$r31", "$r4", 0xc7168cb4f7d079e4UL, 0xf4fc215bc2c5273eUL); ++ TESTINST_RR("ext.w.b", "$r4", "$r18", 0xe2e5dca4727b373UL, 0xa1b97136f32e452bUL); ++ TESTINST_RR("ext.w.b", "$r8", "$r29", 0x625eb5236f483daaUL, 0x3ceca34ee347e7c8UL); ++ ++ /* ---------------- clo.w rd, rj ---------------- */ ++ TESTINST_RR("clo.w", "$r4", "$r13", 0xbcca747f77aca28UL, 0x8df71972c1a17096UL); ++ TESTINST_RR("clo.w", "$r27", "$r5", 0x98a9e6d99d8e84cbUL, 0xdc59d3c8fc1540e4UL); ++ TESTINST_RR("clo.w", "$r9", "$r14", 0xe8e78b162c95ed66UL, 0xdfad6854bbf442e6UL); ++ TESTINST_RR("clo.w", "$r13", "$r26", 0xa3db2cf80f9112cdUL, 0x7676463dd6f13f80UL); ++ TESTINST_RR("clo.w", "$r7", "$r16", 0xb5213ab31b574031UL, 0x478c19ebdeaa74c0UL); ++ TESTINST_RR("clo.w", "$r13", "$r12", 0xd68d9661284fb9d7UL, 0x702bf24fddd8bfe0UL); ++ TESTINST_RR("clo.w", "$r18", "$r20", 0x510cd4002aff4c6cUL, 0x4fc898e8b83669eeUL); ++ TESTINST_RR("clo.w", "$r5", "$r9", 0x53c0de96f709208dUL, 0xe56d87b898438b5UL); ++ TESTINST_RR("clo.w", "$r20", "$r5", 0x96187854fcce4fd1UL, 0xf1248bea6ed8be30UL); ++ TESTINST_RR("clo.w", "$r20", "$r31", 0xb1abb4795d411683UL, 0x1025f914a9225e6UL); ++ ++ /* ---------------- clz.w rd, rj ---------------- */ ++ TESTINST_RR("clz.w", "$r19", "$r8", 0x374348642747a8dcUL, 0xd8ec1d547d95ada5UL); ++ TESTINST_RR("clz.w", "$r26", "$r4", 0x741ab4d14b9ee1f8UL, 0x99e2ef840817cfffUL); ++ TESTINST_RR("clz.w", "$r17", "$r4", 0x45c9ce7217f501b3UL, 0xa387a194cd03bcf1UL); ++ TESTINST_RR("clz.w", "$r13", "$r26", 0x69707656f354d758UL, 0xd4a8f8ab02b876b0UL); ++ TESTINST_RR("clz.w", "$r25", "$r13", 0x103ce6ee41e094c3UL, 0xd7a85bf4006e655aUL); ++ TESTINST_RR("clz.w", "$r5", "$r13", 0x3910578929e7cd4aUL, 0x93c87b02b7b1b603UL); ++ TESTINST_RR("clz.w", "$r18", "$r29", 0x10639f8979feefe5UL, 0x9d8b4b8f8493f844UL); ++ TESTINST_RR("clz.w", "$r25", "$r16", 0x7b35b3e995b3b44dUL, 0xad953d0ae0b3e870UL); ++ TESTINST_RR("clz.w", "$r6", "$r25", 0xda6cbd19f10ef86fUL, 0x1d6665db1162cfb4UL); ++ TESTINST_RR("clz.w", "$r5", "$r12", 0x8a6f4d6ec8d7c00dUL, 0x19b40cb8dd8d1679UL); ++ ++ /* ---------------- cto.w rd, rj ---------------- */ ++ TESTINST_RR("cto.w", "$r7", "$r15", 0x7285e9c364562d11UL, 0x963655c7f58de520UL); ++ TESTINST_RR("cto.w", "$r4", "$r15", 0x105dceebc6d7e641UL, 0xfc01c17baaca9c46UL); ++ TESTINST_RR("cto.w", "$r31", "$r28", 0xdeff9742b93f0591UL, 0x2cf98074b0151f33UL); ++ TESTINST_RR("cto.w", "$r13", "$r8", 0xeee665743cd218ffUL, 0xbdd700b2535aa3b7UL); ++ TESTINST_RR("cto.w", "$r23", "$r13", 0x1cc22cfd7c0c869cUL, 0x5b848b64decbee8fUL); ++ TESTINST_RR("cto.w", "$r12", "$r18", 0x5c32b3db803e5988UL, 0x2d5d1ebf93b79dd0UL); ++ TESTINST_RR("cto.w", "$r17", "$r9", 0xc11d806786501f0eUL, 0xd175fe2ca41bda38UL); ++ TESTINST_RR("cto.w", "$r24", "$r16", 0x504f9b43af62e2adUL, 0xfce545d98e2361daUL); ++ TESTINST_RR("cto.w", "$r24", "$r8", 0xc13ac5668538f5a4UL, 0x3096912e575d64dbUL); ++ TESTINST_RR("cto.w", "$r27", "$r17", 0xd27f68629dd8d4fbUL, 0x15ac43632e175a8bUL); ++ ++ /* ---------------- ctz.w rd, rj ---------------- */ ++ TESTINST_RR("ctz.w", "$r8", "$r12", 0xfc9bd3736a3c08bdUL, 0xaebba33c2e268daaUL); ++ TESTINST_RR("ctz.w", "$r5", "$r27", 0x5dc8af7bac7db01aUL, 0xabce2f0e113597aaUL); ++ TESTINST_RR("ctz.w", "$r18", "$r6", 0xe4ac5b59d8442dfeUL, 0x935d1b694e96bd04UL); ++ TESTINST_RR("ctz.w", "$r9", "$r15", 0x9b760f465efbb52eUL, 0x834c9974dba65d99UL); ++ TESTINST_RR("ctz.w", "$r13", "$r7", 0x95b5748f5f8bfb38UL, 0x75dd7a9890cdf2d9UL); ++ TESTINST_RR("ctz.w", "$r29", "$r17", 0xa25119fd892d1b20UL, 0x38c12e795dc52acfUL); ++ TESTINST_RR("ctz.w", "$r15", "$r12", 0x95c2ce0f0446807cUL, 0x623a5915ac8164b2UL); ++ TESTINST_RR("ctz.w", "$r6", "$r17", 0xd9034892a300dca8UL, 0x5911fea4e6ce1df3UL); ++ TESTINST_RR("ctz.w", "$r10", "$r25", 0xda1e0d0eb34884abUL, 0x8d70d49a10ba8968UL); ++ TESTINST_RR("ctz.w", "$r14", "$r13", 0x207d275c076e5247UL, 0xd243debc9b557922UL); ++ ++ /* ---------------- clo.d rd, rj ---------------- */ ++ TESTINST_RR("clo.d", "$r7", "$r16", 0x9432ccd773e86812UL, 0x9f921ea959c97c2bUL); ++ TESTINST_RR("clo.d", "$r7", "$r12", 0xaf19ef0b422b09bfUL, 0x8773ec5c72444fe2UL); ++ TESTINST_RR("clo.d", "$r5", "$r10", 0xa2912bc0ca36fa58UL, 0x2c93a7506a8979b7UL); ++ TESTINST_RR("clo.d", "$r7", "$r28", 0x69dd3f71121c7380UL, 0x1784b7c2c7558b4aUL); ++ TESTINST_RR("clo.d", "$r15", "$r9", 0x95b40b42f113ceccUL, 0xf0cdb7b9c17bb9e1UL); ++ TESTINST_RR("clo.d", "$r9", "$r27", 0x1961ee1499945d08UL, 0x23c7a2252c1cbc78UL); ++ TESTINST_RR("clo.d", "$r30", "$r19", 0xda0aa8b04f719a51UL, 0x8f93c7a1b3cc9f12UL); ++ TESTINST_RR("clo.d", "$r26", "$r20", 0xdd4f62bfe1237a28UL, 0xd61c7bfe05165d04UL); ++ TESTINST_RR("clo.d", "$r26", "$r6", 0x44a1378e22d6ec81UL, 0x1b21543ee9abd103UL); ++ TESTINST_RR("clo.d", "$r24", "$r16", 0x51efcf6ef8eb9917UL, 0x602cbdf020ee6da8UL); ++ ++ /* ---------------- clz.d rd, rj ---------------- */ ++ TESTINST_RR("clz.d", "$r27", "$r7", 0x91df318f7b476077UL, 0x6ca0b9cf9bb84c4aUL); ++ TESTINST_RR("clz.d", "$r19", "$r30", 0x435d7fb412d9c12cUL, 0xc926e58bdb46104eUL); ++ TESTINST_RR("clz.d", "$r12", "$r30", 0x906b06441b2ef62bUL, 0x4b9b91966077ef0UL); ++ TESTINST_RR("clz.d", "$r28", "$r6", 0x28bb3e3324f33e14UL, 0x7628cd8752be6223UL); ++ TESTINST_RR("clz.d", "$r14", "$r15", 0xb7a5ae04bf2e60c0UL, 0x41a328a79afda305UL); ++ TESTINST_RR("clz.d", "$r4", "$r23", 0x5fd8327a265b1a3bUL, 0x66b92d8b5b842d4aUL); ++ TESTINST_RR("clz.d", "$r18", "$r29", 0x73df6808e38c72adUL, 0x6b91b11261dd26b6UL); ++ TESTINST_RR("clz.d", "$r13", "$r8", 0xd8d2dbd71d1783adUL, 0xdc50b7586ccab6a1UL); ++ TESTINST_RR("clz.d", "$r17", "$r10", 0xee6f842bb7686b8dUL, 0xdf52e003cd95f02fUL); ++ TESTINST_RR("clz.d", "$r13", "$r8", 0x91e717aef96cc046UL, 0x5dd0743ed560ba78UL); ++ ++ /* ---------------- cto.d rd, rj ---------------- */ ++ TESTINST_RR("cto.d", "$r31", "$r5", 0xf361d5d1fb232769UL, 0x1530b67240d804cfUL); ++ TESTINST_RR("cto.d", "$r5", "$r26", 0xbedb393d17f69d40UL, 0xcef56269ef7aecdaUL); ++ TESTINST_RR("cto.d", "$r5", "$r31", 0xadd75db878cdbf84UL, 0x8e08acc65c97f0b2UL); ++ TESTINST_RR("cto.d", "$r31", "$r31", 0x6a8a89827e4929f9UL, 0x7df0f59d97924bb3UL); ++ TESTINST_RR("cto.d", "$r14", "$r30", 0xefb0874ef3600b6dUL, 0x97a4b45ab971a548UL); ++ TESTINST_RR("cto.d", "$r5", "$r17", 0x144271fb49c8d2d8UL, 0x787e6dbb4fec4d21UL); ++ TESTINST_RR("cto.d", "$r28", "$r20", 0xd6d0953d2a12c998UL, 0xafd578caad0dfa09UL); ++ TESTINST_RR("cto.d", "$r16", "$r18", 0xde650be54a7990cUL, 0x3ea8f45e10441829UL); ++ TESTINST_RR("cto.d", "$r15", "$r16", 0xbbd328743f49a86UL, 0x5cafc638b6b509beUL); ++ TESTINST_RR("cto.d", "$r6", "$r20", 0x598ee27859cf8d0eUL, 0x4bce530e537ad762UL); ++ ++ /* ---------------- ctz.d rd, rj ---------------- */ ++ TESTINST_RR("ctz.d", "$r14", "$r28", 0xf2e4d886a8fd3fe3UL, 0xdafbabdfefac692UL); ++ TESTINST_RR("ctz.d", "$r6", "$r27", 0xe005a6a20d44fbcaUL, 0xe000ac4f4cfb2ce2UL); ++ TESTINST_RR("ctz.d", "$r15", "$r26", 0x871c2ccd50ec0784UL, 0xa82b0d96dd72f11cUL); ++ TESTINST_RR("ctz.d", "$r17", "$r20", 0xebe7d9f4ec5055d5UL, 0x65575957936d1d6eUL); ++ TESTINST_RR("ctz.d", "$r19", "$r8", 0x394effa243e5f14cUL, 0xf6852349a7b00561UL); ++ TESTINST_RR("ctz.d", "$r5", "$r9", 0x3c67392fc408e9dbUL, 0xeff4bf8e886d7cc3UL); ++ TESTINST_RR("ctz.d", "$r31", "$r15", 0xbf5435775bd0435bUL, 0x19760246c8d1d680UL); ++ TESTINST_RR("ctz.d", "$r9", "$r5", 0xccde230362ce06aUL, 0x7590c6e73077c2bcUL); ++ TESTINST_RR("ctz.d", "$r28", "$r25", 0x2518777b06d608a0UL, 0xb87647dad481ba32UL); ++ TESTINST_RR("ctz.d", "$r23", "$r19", 0xbe232a9fe2090e75UL, 0x2dceda5cdc990d2eUL); ++ ++ /* ---------------- revb.2h rd, rj ---------------- */ ++ TESTINST_RR("revb.2h", "$r29", "$r30", 0x75397084990a0745UL, 0xd4c83f5966c1c17UL); ++ TESTINST_RR("revb.2h", "$r17", "$r23", 0xecfbee2a69bbe344UL, 0x5a42dc5dc5705f68UL); ++ TESTINST_RR("revb.2h", "$r6", "$r14", 0xbfeffdbd68845522UL, 0x3490af5b50fd56bfUL); ++ TESTINST_RR("revb.2h", "$r13", "$r6", 0x58e1821d319a1598UL, 0x4c6711d021a72be6UL); ++ TESTINST_RR("revb.2h", "$r18", "$r8", 0x6e14994d4e16ff86UL, 0x9fda01513ab5ceb8UL); ++ TESTINST_RR("revb.2h", "$r7", "$r30", 0x9979d3a3fcfc9323UL, 0x504c708535bc136fUL); ++ TESTINST_RR("revb.2h", "$r28", "$r19", 0x9daf4aa3a33eec5fUL, 0xaa376fc54f4be6f5UL); ++ TESTINST_RR("revb.2h", "$r30", "$r8", 0x2e0bba43ec83e59eUL, 0xaee8b8acd436f6daUL); ++ TESTINST_RR("revb.2h", "$r14", "$r7", 0x9634787c9be10863UL, 0xe9da521d42716c0aUL); ++ TESTINST_RR("revb.2h", "$r23", "$r14", 0x687b89225667081aUL, 0x9089e36a4f12f9c6UL); ++ ++ /* ---------------- revb.4h rd, rj ---------------- */ ++ TESTINST_RR("revb.4h", "$r4", "$r25", 0xc42859bd06b669d2UL, 0x782e4ae6ab812191UL); ++ TESTINST_RR("revb.4h", "$r18", "$r19", 0x45ca4499d789fe5bUL, 0x6e558c98b95d346dUL); ++ TESTINST_RR("revb.4h", "$r24", "$r10", 0x2d04871fd753c43fUL, 0xbeab033e2b5a979eUL); ++ TESTINST_RR("revb.4h", "$r24", "$r8", 0xbc4deb39fb2ffe2eUL, 0x5e3e50b8025e77f3UL); ++ TESTINST_RR("revb.4h", "$r7", "$r14", 0xf44a6ea6f42e0918UL, 0x9f617a848e4ad8f2UL); ++ TESTINST_RR("revb.4h", "$r13", "$r12", 0xda815ff8648e92b9UL, 0xa401e74c4dd88e12UL); ++ TESTINST_RR("revb.4h", "$r31", "$r19", 0x7964d861d2ecb8d5UL, 0xe402e87f73fb4c68UL); ++ TESTINST_RR("revb.4h", "$r29", "$r25", 0x6beff3fa6167cdccUL, 0x11e350b71aee0229UL); ++ TESTINST_RR("revb.4h", "$r4", "$r8", 0x357a56e8ae275376UL, 0xdf8ebc175f4be7e3UL); ++ TESTINST_RR("revb.4h", "$r15", "$r27", 0xeb11b29acfe397d6UL, 0x42d231083cd97aa0UL); ++ ++ /* ---------------- revb.2w rd, rj ---------------- */ ++ TESTINST_RR("revb.2w", "$r27", "$r31", 0x978f867dd7f0cb8UL, 0x19eec2d357cd6a06UL); ++ TESTINST_RR("revb.2w", "$r10", "$r10", 0x7897a40c4fda96d5UL, 0xcb849783a18de892UL); ++ TESTINST_RR("revb.2w", "$r23", "$r14", 0x18338c734be53a1UL, 0x6258664ec1bb96b8UL); ++ TESTINST_RR("revb.2w", "$r12", "$r19", 0x7417ec4fef3451ccUL, 0x216ad32ee149542bUL); ++ TESTINST_RR("revb.2w", "$r31", "$r30", 0x8132835b9905b650UL, 0x6fac007fbefdecf2UL); ++ TESTINST_RR("revb.2w", "$r25", "$r10", 0x7336ebe375c83bedUL, 0x643f76ac3010a6bbUL); ++ TESTINST_RR("revb.2w", "$r31", "$r29", 0x5d99f79f18e805b8UL, 0xe65e70ca4cf299faUL); ++ TESTINST_RR("revb.2w", "$r30", "$r19", 0xec10dd6d7249c5faUL, 0x3f6bb22d66caf299UL); ++ TESTINST_RR("revb.2w", "$r6", "$r30", 0x2c394783817c0870UL, 0xd823cff07efd78dbUL); ++ TESTINST_RR("revb.2w", "$r4", "$r15", 0xc5acf61f075cd4e4UL, 0xc154dd7479b90c6cUL); ++ ++ /* ---------------- revb.d rd, rj ---------------- */ ++ TESTINST_RR("revb.d", "$r6", "$r23", 0xe6e05a0dafda37ceUL, 0x2ac7d047f197f6fbUL); ++ TESTINST_RR("revb.d", "$r19", "$r4", 0xc07a757bea6011ffUL, 0xcef6cef3e0f941ffUL); ++ TESTINST_RR("revb.d", "$r6", "$r15", 0x711bb31e18fcb2f3UL, 0x522068042cf5be1aUL); ++ TESTINST_RR("revb.d", "$r9", "$r7", 0xf9654c655c67392eUL, 0xa1b065742110e3f4UL); ++ TESTINST_RR("revb.d", "$r29", "$r4", 0x70c0dcad23609060UL, 0x5d04b7b2ece6f6bbUL); ++ TESTINST_RR("revb.d", "$r15", "$r4", 0x809930516f3136ebUL, 0xda33327a8d42ef55UL); ++ TESTINST_RR("revb.d", "$r10", "$r4", 0x1a7ee04b354f6af5UL, 0xcda6c6943e46fed7UL); ++ TESTINST_RR("revb.d", "$r20", "$r4", 0x315f95452d748459UL, 0xa001e934745758e0UL); ++ TESTINST_RR("revb.d", "$r6", "$r8", 0xabbd06000374627aUL, 0x85441006689de89bUL); ++ TESTINST_RR("revb.d", "$r27", "$r24", 0x2d404e69f54afa48UL, 0x46f47b822772f3cdUL); ++ ++ /* ---------------- revh.2w rd, rj ---------------- */ ++ TESTINST_RR("revh.2w", "$r6", "$r15", 0x5b764c7bfb1999ebUL, 0x86603fc3f96843edUL); ++ TESTINST_RR("revh.2w", "$r19", "$r10", 0xf39f8e6b43dd63ceUL, 0x141d294d06276941UL); ++ TESTINST_RR("revh.2w", "$r5", "$r20", 0x3ff54e5c35d83e69UL, 0xd677d6a21384278aUL); ++ TESTINST_RR("revh.2w", "$r4", "$r31", 0xce463b02a2f840ccUL, 0x6f87c9636f9cfca6UL); ++ TESTINST_RR("revh.2w", "$r19", "$r26", 0x34abc96ddde64e27UL, 0x723ec7ce92720502UL); ++ TESTINST_RR("revh.2w", "$r8", "$r18", 0x1454a1ee8739c235UL, 0xd890efa373a6dfb0UL); ++ TESTINST_RR("revh.2w", "$r12", "$r31", 0xf0c8b856751cae70UL, 0xb675dff2568e6ebfUL); ++ TESTINST_RR("revh.2w", "$r24", "$r9", 0xb36984e3a7a3eaeaUL, 0xa169cfa9f35f6a8aUL); ++ TESTINST_RR("revh.2w", "$r25", "$r27", 0x640b3e6b41180473UL, 0x9bc307f0a2ef368fUL); ++ TESTINST_RR("revh.2w", "$r7", "$r9", 0x897e1406a0eb2dc9UL, 0x1921bcf657fecdccUL); ++ ++ /* ---------------- revh.d rd, rj ---------------- */ ++ TESTINST_RR("revh.d", "$r14", "$r25", 0xec3573411ea025e5UL, 0x6976d4371b08f1abUL); ++ TESTINST_RR("revh.d", "$r24", "$r31", 0x9ef9e5cb1375d42aUL, 0x9ce130c8a579e11dUL); ++ TESTINST_RR("revh.d", "$r9", "$r28", 0x3c8cd0055a5e7031UL, 0xf05f9381753ded16UL); ++ TESTINST_RR("revh.d", "$r24", "$r26", 0x6a4e5797f19041f6UL, 0xd26a5ae65e21041cUL); ++ TESTINST_RR("revh.d", "$r14", "$r24", 0xe2cb9a83aee22d97UL, 0x6405d71e0bb63321UL); ++ TESTINST_RR("revh.d", "$r19", "$r23", 0x91cdf3bcd9afe76dUL, 0x171953826107396aUL); ++ TESTINST_RR("revh.d", "$r23", "$r14", 0x93ed49255d084e12UL, 0x374bd76990198b43UL); ++ TESTINST_RR("revh.d", "$r31", "$r12", 0x8e54a908f04882bUL, 0xf7e8756491b9d346UL); ++ TESTINST_RR("revh.d", "$r31", "$r20", 0xbb7cd34502fdf01fUL, 0x906b7289a6957d3fUL); ++ TESTINST_RR("revh.d", "$r27", "$r30", 0xacbca1aacdd9dd3fUL, 0x3072d9c69004d4b5UL); ++ ++ /* ---------------- bitrev.4b rd, rj ---------------- */ ++ TESTINST_RR("bitrev.4b", "$r23", "$r19", 0xb422f2854b491d92UL, 0x7649084cec69098aUL); ++ TESTINST_RR("bitrev.4b", "$r27", "$r16", 0xd14736328d74b448UL, 0x1abee3a271c71db9UL); ++ TESTINST_RR("bitrev.4b", "$r15", "$r23", 0xf17c0f0ccfbb2c38UL, 0x490107ff4155bd17UL); ++ TESTINST_RR("bitrev.4b", "$r5", "$r18", 0x8408d6a30523619dUL, 0x625d5aedf0add9fbUL); ++ TESTINST_RR("bitrev.4b", "$r8", "$r15", 0xc41a2fdb60ba75a6UL, 0xe2562eab3b333a00UL); ++ TESTINST_RR("bitrev.4b", "$r17", "$r18", 0x6a409394f364c02aUL, 0xea970d90edb343ccUL); ++ TESTINST_RR("bitrev.4b", "$r25", "$r29", 0xd8d1c9b8dcff266dUL, 0xacca47ac7597ca65UL); ++ TESTINST_RR("bitrev.4b", "$r26", "$r24", 0xe2a0d11df8c5055bUL, 0xc57559d03e3e216dUL); ++ TESTINST_RR("bitrev.4b", "$r8", "$r27", 0xb6a5815170d657f0UL, 0x9f60901eefa1347aUL); ++ TESTINST_RR("bitrev.4b", "$r20", "$r16", 0x432a2fbf2b073732UL, 0x604b8d7ecb5e86dcUL); ++ ++ /* ---------------- bitrev.8b rd, rj ---------------- */ ++ TESTINST_RR("bitrev.8b", "$r25", "$r7", 0x22b2e6007f742fd1UL, 0xe8c23886def1bbc9UL); ++ TESTINST_RR("bitrev.8b", "$r28", "$r30", 0xf985d7779c5ca157UL, 0x285cbdc0f47395d1UL); ++ TESTINST_RR("bitrev.8b", "$r29", "$r13", 0xd9b8364a793bc50cUL, 0xded35d7c7ba73d29UL); ++ TESTINST_RR("bitrev.8b", "$r12", "$r28", 0x18d7769bc1147dc5UL, 0xfb6cda8c7f12313aUL); ++ TESTINST_RR("bitrev.8b", "$r23", "$r6", 0xeff84dc134b3acbeUL, 0xee7c4e89e333eda8UL); ++ TESTINST_RR("bitrev.8b", "$r24", "$r20", 0xad65748f0bc46e9fUL, 0xd0d88137a6284eacUL); ++ TESTINST_RR("bitrev.8b", "$r10", "$r5", 0xe0e1c1e262352e89UL, 0x9c43ebc4f7c65dc1UL); ++ TESTINST_RR("bitrev.8b", "$r27", "$r13", 0x444a53aa65d317dcUL, 0x473eea7ea5691da7UL); ++ TESTINST_RR("bitrev.8b", "$r13", "$r9", 0xfc48d0fdf4c7a6e5UL, 0x5dcad407df3401a5UL); ++ TESTINST_RR("bitrev.8b", "$r12", "$r5", 0xebef32fcbd91e9aUL, 0xe1eeea527816355eUL); ++ ++ /* ---------------- bitrev.w rd, rj ---------------- */ ++ TESTINST_RR("bitrev.w", "$r18", "$r15", 0x2028b0c8691a767UL, 0x5822df2950c9c2d3UL); ++ TESTINST_RR("bitrev.w", "$r30", "$r27", 0x2a2d48209d9f377bUL, 0xde9d59b836df41fcUL); ++ TESTINST_RR("bitrev.w", "$r17", "$r4", 0xe6fb8b07c90464e6UL, 0x65976cb5c6c6a5b0UL); ++ TESTINST_RR("bitrev.w", "$r9", "$r31", 0x1b95159ec5c37644UL, 0x62c549b741c2adadUL); ++ TESTINST_RR("bitrev.w", "$r17", "$r14", 0x8b414dfa7156f0ceUL, 0x9642d0186f420e7cUL); ++ TESTINST_RR("bitrev.w", "$r15", "$r8", 0x2722ecb374b4d5e3UL, 0xeaf151a286bbc4cfUL); ++ TESTINST_RR("bitrev.w", "$r27", "$r19", 0x58ec913c63634a5UL, 0xe723c39df96a4fd2UL); ++ TESTINST_RR("bitrev.w", "$r7", "$r26", 0xa245e7dd80a324a2UL, 0xe7d6c2b2683291eUL); ++ TESTINST_RR("bitrev.w", "$r31", "$r6", 0x114292ed02ba1255UL, 0x13cd62afac5ac3d4UL); ++ TESTINST_RR("bitrev.w", "$r7", "$r25", 0xbd46d88fc8d2933bUL, 0x69ce9ccb487dadd1UL); ++ ++ /* ---------------- bitrev.d rd, rj ---------------- */ ++ TESTINST_RR("bitrev.d", "$r4", "$r29", 0xeaacaeb60b227eabUL, 0x799f36da44887e2cUL); ++ TESTINST_RR("bitrev.d", "$r29", "$r6", 0xcfbb055ab1ebf7faUL, 0x2924f63fec744b02UL); ++ TESTINST_RR("bitrev.d", "$r28", "$r31", 0xaac74a398d76900dUL, 0xf6c75e45e33b4cb7UL); ++ TESTINST_RR("bitrev.d", "$r24", "$r12", 0xfc8bc33fb4a8d023UL, 0xcccd98e9d53aa26aUL); ++ TESTINST_RR("bitrev.d", "$r8", "$r7", 0x7502cd68289f4c3aUL, 0x746ddfd3c3a512b1UL); ++ TESTINST_RR("bitrev.d", "$r6", "$r16", 0xe8b94bfe615774aeUL, 0x518770bbee53d619UL); ++ TESTINST_RR("bitrev.d", "$r24", "$r4", 0x6318c17dbae816c3UL, 0x9ab684e129b57f07UL); ++ TESTINST_RR("bitrev.d", "$r27", "$r23", 0x8a22909b005a86b8UL, 0x69337e8c3b1fc2bbUL); ++ TESTINST_RR("bitrev.d", "$r20", "$r9", 0x9f43885d40caf0UL, 0x193cbf609dbc33d4UL); ++ TESTINST_RR("bitrev.d", "$r30", "$r19", 0x30fa02e0fc390ac9UL, 0x21686c931c6260daUL); ++ ++ /* ---------------- bytepick.w rd, rj, rk, sa2 ---------------- */ ++ TESTINST_RRRI("bytepick.w", "$r26", "$r15", "$r19", SA2, 0x1b0b980dd3271273UL, 0x8737ca6c8106ceeeUL, 0x2807e0dcb47d6efUL, 1); ++ TESTINST_RRRI("bytepick.w", "$r15", "$r17", "$r7", SA2, 0x3d2e3fbcbd032001UL, 0x5eced8cf3da8b205UL, 0xb8155b41321e09c0UL, 0); ++ TESTINST_RRRI("bytepick.w", "$r12", "$r15", "$r17", SA2, 0x2670c80f12a87520UL, 0x29ab42125e3ea5c8UL, 0x32a39ac435460f2fUL, 3); ++ TESTINST_RRRI("bytepick.w", "$r4", "$r20", "$r18", SA2, 0x5a64271926277c04UL, 0xcbde225cc736e5d5UL, 0x18abacc874db47e9UL, 3); ++ TESTINST_RRRI("bytepick.w", "$r8", "$r5", "$r24", SA2, 0xdb41606ce3f9df94UL, 0xc3f6ce370d754a3fUL, 0x34ad5a423a5c42e3UL, 3); ++ TESTINST_RRRI("bytepick.w", "$r5", "$r30", "$r14", SA2, 0xedb3aad221050d0bUL, 0x46f5823389f2581aUL, 0xf766f1e75349809eUL, 2); ++ TESTINST_RRRI("bytepick.w", "$r4", "$r19", "$r18", SA2, 0xf92ed0231f25c991UL, 0xba59df0352ed6b3eUL, 0x58d6fbce4e4325e8UL, 0); ++ TESTINST_RRRI("bytepick.w", "$r18", "$r28", "$r24", SA2, 0x177dcaf8fcd30180UL, 0xbdc04b3b8f707462UL, 0x6102168606deb3edUL, 3); ++ TESTINST_RRRI("bytepick.w", "$r13", "$r27", "$r29", SA2, 0x383d82c5d717259bUL, 0x495e30e5e680d7fcUL, 0x1c17f315ebb3bec3UL, 2); ++ TESTINST_RRRI("bytepick.w", "$r5", "$r29", "$r4", SA2, 0x26a0fb212ab80a3aUL, 0x78b167aecd81f869UL, 0x6daab499f228fef4UL, 1); ++ ++ /* ---------------- bytepick.d rd, rj, rk, sa3 ---------------- */ ++ TESTINST_RRRI("bytepick.d", "$r28", "$r4", "$r28", SA3, 0x794fa22d52f7e834UL, 0x2f084db071d3bcceUL, 0xa0cf51d7020f10c1UL, 7); ++ TESTINST_RRRI("bytepick.d", "$r10", "$r18", "$r4", SA3, 0x9fd7a6b378604833UL, 0x37da15f8a7154cabUL, 0xaedd64328d27a0a8UL, 2); ++ TESTINST_RRRI("bytepick.d", "$r7", "$r6", "$r24", SA3, 0xdee49920d429d3c2UL, 0x15e3f61f2f82a2d1UL, 0xdeba03c7761e4678UL, 3); ++ TESTINST_RRRI("bytepick.d", "$r19", "$r16", "$r5", SA3, 0x53bda4d18e61fc44UL, 0xc79bd94439006673UL, 0xa8024ab452a2bd52UL, 4); ++ TESTINST_RRRI("bytepick.d", "$r26", "$r19", "$r25", SA3, 0xc8aae5136d925592UL, 0xea109dd2837d3acfUL, 0x30e93a75e695666aUL, 7); ++ TESTINST_RRRI("bytepick.d", "$r8", "$r14", "$r8", SA3, 0xa03db273c845b37fUL, 0xa7fd0053a136769fUL, 0x6ab932903229b035UL, 2); ++ TESTINST_RRRI("bytepick.d", "$r9", "$r14", "$r23", SA3, 0x2f160a0d147b300fUL, 0xdae9d5d15bb8f5b5UL, 0xc4fdfbb29d49dfe4UL, 2); ++ TESTINST_RRRI("bytepick.d", "$r20", "$r18", "$r15", SA3, 0x30cefdebc30b841aUL, 0xbfd016fb0312277cUL, 0x44269b95d496912fUL, 5); ++ TESTINST_RRRI("bytepick.d", "$r12", "$r17", "$r5", SA3, 0xde32bc5d3471eed2UL, 0xdb807610c6e762e4UL, 0xb2148e34e649d1b8UL, 2); ++ TESTINST_RRRI("bytepick.d", "$r5", "$r24", "$r28", SA3, 0x9ab1be6a0faa61a8UL, 0x97d4a12579967739UL, 0xaa592ef1fd606badUL, 3); ++ ++ /* ---------------- maskeqz rd, rj, rk ---------------- */ ++ TESTINST_RRR("maskeqz", "$r14", "$r28", "$r25", 0xc263b6b8f3404c8dUL, 0x90ef733c88c88866UL, 0xd256888d94e8d21aUL); ++ TESTINST_RRR("maskeqz", "$r13", "$r9", "$r15", 0x5bdd86b962c61db4UL, 0x8a78f7b88a728d92UL, 0x69e707acb2c26a83UL); ++ TESTINST_RRR("maskeqz", "$r7", "$r7", "$r13", 0xea86abdbdea660cbUL, 0xfb778deef0a5b893UL, 0xad10e23c971d1a9fUL); ++ TESTINST_RRR("maskeqz", "$r8", "$r7", "$r19", 0xf64df33b6146939fUL, 0xe7376d3da44f4dfdUL, 0x7987e122af2505abUL); ++ TESTINST_RRR("maskeqz", "$r10", "$r27", "$r29", 0x404a261c069b488bUL, 0x81886c523ec2658cUL, 0x3236dc83d0a27cc1UL); ++ TESTINST_RRR("maskeqz", "$r23", "$r16", "$r25", 0x8671050519b7bda0UL, 0x26fa2567b106d73aUL, 0xd884011e0d767feUL); ++ TESTINST_RRR("maskeqz", "$r5", "$r19", "$r18", 0xbd8d4cef53122132UL, 0x4976c047c57ec148UL, 0x602312f372049a5eUL); ++ TESTINST_RRR("maskeqz", "$r29", "$r24", "$r23", 0x7f390b695d8b12eUL, 0x70043e7666a24a34UL, 0xfee8f8f90ab3ac9bUL); ++ TESTINST_RRR("maskeqz", "$r25", "$r4", "$r18", 0x7eaffcb6dac1b5bUL, 0x4b12f8c6738216a2UL, 0x409acb80b7391511UL); ++ TESTINST_RRR("maskeqz", "$r30", "$r6", "$r24", 0x14d829636b628dc9UL, 0xdb88a366a2271c2cUL, 0xea0d5998835940aUL); ++ ++ /* ---------------- masknez rd, rj, rk ---------------- */ ++ TESTINST_RRR("masknez", "$r14", "$r24", "$r5", 0x46b15bbb9507bd79UL, 0xc92af628c880a454UL, 0x846a586db0af0965UL); ++ TESTINST_RRR("masknez", "$r30", "$r8", "$r8", 0x43cd20b5234db4e8UL, 0x7aeee6ab6b10561fUL, 0x45ab4fdb4ca8b325UL); ++ TESTINST_RRR("masknez", "$r24", "$r19", "$r15", 0xd3d50bbb34b528e2UL, 0xdd71746b0beedae3UL, 0xa34d82fc50174094UL); ++ TESTINST_RRR("masknez", "$r29", "$r26", "$r26", 0x576cb2da15b1462dUL, 0x6c669f0195b50b7aUL, 0xec1609ef36aa938fUL); ++ TESTINST_RRR("masknez", "$r4", "$r29", "$r10", 0xaa220f67a02617dbUL, 0xffcd18e3016e10fUL, 0x4cf9bdd8dca7f88fUL); ++ TESTINST_RRR("masknez", "$r23", "$r9", "$r29", 0x774e1c840428fbdeUL, 0x391268694388d2a7UL, 0xf06192a4e5780c53UL); ++ TESTINST_RRR("masknez", "$r7", "$r25", "$r28", 0x7b75099f16135faaUL, 0xf95af681c18bf31cUL, 0x2f6122581dfdef74UL); ++ TESTINST_RRR("masknez", "$r26", "$r10", "$r16", 0xe6006c9bd6bae204UL, 0x7e84e5db1181249dUL, 0x6ab2371059cdc875UL); ++ TESTINST_RRR("masknez", "$r26", "$r15", "$r28", 0xb4c9c784ef74245fUL, 0x20cc1c4c169ca02cUL, 0x606eeb8ce6278d16UL); ++ TESTINST_RRR("masknez", "$r19", "$r16", "$r16", 0x75a721553f7c7054UL, 0x7b63b7b7b3f5bd5fUL, 0xf8c7933e92e155eeUL); ++ ++ /* ---------------- bstrins.w rd, rj, msbw, lsbw ---------------- */ ++ TESTINST_RRII("bstrins.w", "$r27", "$r16", MSBW, LSBW, 0x431055863e78b187UL, 0xe18dda9620a50e9dUL, 31, 8); ++ TESTINST_RRII("bstrins.w", "$r26", "$r27", MSBW, LSBW, 0x19f800eab7e1ab51UL, 0x61e7d86005d21d29UL, 30, 27); ++ TESTINST_RRII("bstrins.w", "$r15", "$r4", MSBW, LSBW, 0xb141d462e777528dUL, 0xb7aebff9bcca1643UL, 17, 14); ++ TESTINST_RRII("bstrins.w", "$r30", "$r17", MSBW, LSBW, 0xfac48083375844feUL, 0x6d3283ba14cc27ebUL, 24, 6); ++ TESTINST_RRII("bstrins.w", "$r12", "$r12", MSBW, LSBW, 0x9b7629774f19f64aUL, 0x84ee8d65b2842686UL, 30, 25); ++ TESTINST_RRII("bstrins.w", "$r15", "$r10", MSBW, LSBW, 0x290172844863090fUL, 0x85ea298976069fcdUL, 26, 1); ++ TESTINST_RRII("bstrins.w", "$r10", "$r13", MSBW, LSBW, 0x66942ba1c15e85aaUL, 0xddb2dfa7474a4370UL, 23, 8); ++ TESTINST_RRII("bstrins.w", "$r5", "$r20", MSBW, LSBW, 0x3dcfecca80bf0d79UL, 0x5044b246f2d3f890UL, 18, 16); ++ TESTINST_RRII("bstrins.w", "$r23", "$r5", MSBW, LSBW, 0xa11723142f1472a7UL, 0xcbaaa9a23d119663UL, 25, 21); ++ TESTINST_RRII("bstrins.w", "$r20", "$r31", MSBW, LSBW, 0x6a1110240ba884b8UL, 0x45cadf0ffe08cc25UL, 13, 12); ++ ++ /* ---------------- bstrpick.w rd, rj, msbw, lsbw ---------------- */ ++ TESTINST_RRII("bstrpick.w", "$r5", "$r23", MSBW, LSBW, 0x6885eaa89f691954UL, 0x94f8458597294f2eUL, 23, 11); ++ TESTINST_RRII("bstrpick.w", "$r25", "$r8", MSBW, LSBW, 0x11be9b9923ebee96UL, 0x23deda120a49df15UL, 18, 11); ++ TESTINST_RRII("bstrpick.w", "$r6", "$r6", MSBW, LSBW, 0x3546d655181289bcUL, 0x7ee84a41c952b690UL, 10, 3); ++ TESTINST_RRII("bstrpick.w", "$r25", "$r5", MSBW, LSBW, 0xb2eec884ea77f548UL, 0x23992bc40919416fUL, 15, 9); ++ TESTINST_RRII("bstrpick.w", "$r26", "$r14", MSBW, LSBW, 0x8e591161730ac582UL, 0xf45f4435cc1cb138UL, 21, 8); ++ TESTINST_RRII("bstrpick.w", "$r9", "$r14", MSBW, LSBW, 0x1ac92d930e8361f9UL, 0xcc11dd56e96c6256UL, 7, 3); ++ TESTINST_RRII("bstrpick.w", "$r19", "$r9", MSBW, LSBW, 0xd15fd80fafe60a58UL, 0xb1426a8c680d628cUL, 8, 8); ++ TESTINST_RRII("bstrpick.w", "$r17", "$r13", MSBW, LSBW, 0xfa48c3cd091d2b5eUL, 0x3a2827a58a014a72UL, 30, 12); ++ TESTINST_RRII("bstrpick.w", "$r6", "$r31", MSBW, LSBW, 0xca10a858ebfa78a1UL, 0x202a38722f270884UL, 16, 7); ++ TESTINST_RRII("bstrpick.w", "$r20", "$r10", MSBW, LSBW, 0xc010deb269ae6ba2UL, 0x98f1d297734f9f4cUL, 31, 15); ++ ++ /* ---------------- bstrins.d rd, rj, msbd, lsbd ---------------- */ ++ TESTINST_RRII("bstrins.d", "$r29", "$r17", MSBD, LSBD, 0x7cf4a9ec79307e59UL, 0xb1b5afc00eef90a3UL, 60, 25); ++ TESTINST_RRII("bstrins.d", "$r10", "$r27", MSBD, LSBD, 0xc708602dee32579fUL, 0x199d90a711e94375UL, 31, 22); ++ TESTINST_RRII("bstrins.d", "$r4", "$r24", MSBD, LSBD, 0x4e5ce98e217a4b59UL, 0xaf25b5661daefdeaUL, 58, 58); ++ TESTINST_RRII("bstrins.d", "$r12", "$r30", MSBD, LSBD, 0x9505d862c56b1708UL, 0x7f3f0c983ce27863UL, 16, 6); ++ TESTINST_RRII("bstrins.d", "$r29", "$r5", MSBD, LSBD, 0x248f295ef3afe5aaUL, 0x9469277db61227b7UL, 43, 0); ++ TESTINST_RRII("bstrins.d", "$r31", "$r31", MSBD, LSBD, 0xbc5f0c47c3a63a94UL, 0x4aacc1c77ad0c09aUL, 49, 23); ++ TESTINST_RRII("bstrins.d", "$r6", "$r24", MSBD, LSBD, 0x79110235b8c34188UL, 0x75e3e311aef2bef9UL, 12, 2); ++ TESTINST_RRII("bstrins.d", "$r6", "$r16", MSBD, LSBD, 0xaa6e63ffd80b76c5UL, 0xb1ea7dcb3af0881dUL, 43, 13); ++ TESTINST_RRII("bstrins.d", "$r15", "$r25", MSBD, LSBD, 0x5b68a802f26a1804UL, 0xb4f651115b84591bUL, 53, 29); ++ TESTINST_RRII("bstrins.d", "$r9", "$r9", MSBD, LSBD, 0x3394218c965d5f1aUL, 0xf3d30b5d4d4089b4UL, 61, 40); ++ ++ /* ---------------- bstrpick.d rd, rj, msbd, lsbd ---------------- */ ++ TESTINST_RRII("bstrpick.d", "$r27", "$r27", MSBD, LSBD, 0x503c8fae2d6d7b58UL, 0x9fd9869ca812de0cUL, 63, 33); ++ TESTINST_RRII("bstrpick.d", "$r14", "$r5", MSBD, LSBD, 0x65f05eaa5e13856aUL, 0xd52c72fbeccc39f5UL, 52, 40); ++ TESTINST_RRII("bstrpick.d", "$r13", "$r20", MSBD, LSBD, 0x9cea777df4d2eae0UL, 0x6326727a36499800UL, 48, 14); ++ TESTINST_RRII("bstrpick.d", "$r10", "$r17", MSBD, LSBD, 0xf30a073a4a56604bUL, 0xc12d112f6a0c8f1UL, 43, 20); ++ TESTINST_RRII("bstrpick.d", "$r13", "$r25", MSBD, LSBD, 0xe559d975e0d9ac85UL, 0xcf41f30cc4a46713UL, 55, 37); ++ TESTINST_RRII("bstrpick.d", "$r29", "$r4", MSBD, LSBD, 0x41843db6c2a206cbUL, 0x343f795d45fcff8cUL, 34, 20); ++ TESTINST_RRII("bstrpick.d", "$r27", "$r28", MSBD, LSBD, 0xb359821297377feeUL, 0x4fc51c5773e64f69UL, 27, 10); ++ TESTINST_RRII("bstrpick.d", "$r24", "$r24", MSBD, LSBD, 0xed3cb5d1e8f0e55eUL, 0x9cdbb70a8b8d3945UL, 63, 20); ++ TESTINST_RRII("bstrpick.d", "$r7", "$r30", MSBD, LSBD, 0x11b7344343be1ccfUL, 0xa3422c671803480fUL, 34, 30); ++ TESTINST_RRII("bstrpick.d", "$r15", "$r4", MSBD, LSBD, 0x3670c6b869f28085UL, 0x2caa9d9c1351e402UL, 55, 4); ++ ++ /* ---------------- crc.w.b.w rd, rj, rk ---------------- */ ++ TESTINST_RRR("crc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4b154113f7d32514UL, 0xcce230caafbf9cc9UL); ++ TESTINST_RRR("crc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x33d5d595721d4f13UL, 0xf4509311f443a7ceUL); ++ TESTINST_RRR("crc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4a3c6de6954cbc17UL, 0x111b21e39fbd7254UL); ++ TESTINST_RRR("crc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xfbb5c64ed1b044c6UL, 0x33ca4c4fb3960326UL); ++ TESTINST_RRR("crc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x2b7c5939d7c0f528UL, 0xb73870a5a6630162UL); ++ TESTINST_RRR("crc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x02fe41918ac5cdbaUL, 0x48e0815289728f05UL); ++ TESTINST_RRR("crc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xb60a8f381f187baeUL, 0x008c208cc413ff72UL); ++ ++ /* ---------------- crc.w.h.w rd, rj, rk ---------------- */ ++ TESTINST_RRR("crc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4b154113f7d32514UL, 0xcce230caafbf9cc9UL); ++ TESTINST_RRR("crc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x33d5d595721d4f13UL, 0xf4509311f443a7ceUL); ++ TESTINST_RRR("crc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4a3c6de6954cbc17UL, 0x111b21e39fbd7254UL); ++ TESTINST_RRR("crc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xfbb5c64ed1b044c6UL, 0x33ca4c4fb3960326UL); ++ TESTINST_RRR("crc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x2b7c5939d7c0f528UL, 0xb73870a5a6630162UL); ++ TESTINST_RRR("crc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x02fe41918ac5cdbaUL, 0x48e0815289728f05UL); ++ TESTINST_RRR("crc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xb60a8f381f187baeUL, 0x008c208cc413ff72UL); ++ ++ /* ---------------- crc.w.w.w rd, rj, rk ---------------- */ ++ TESTINST_RRR("crc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4b154113f7d32514UL, 0xcce230caafbf9cc9UL); ++ TESTINST_RRR("crc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x33d5d595721d4f13UL, 0xf4509311f443a7ceUL); ++ TESTINST_RRR("crc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4a3c6de6954cbc17UL, 0x111b21e39fbd7254UL); ++ TESTINST_RRR("crc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xfbb5c64ed1b044c6UL, 0x33ca4c4fb3960326UL); ++ TESTINST_RRR("crc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x2b7c5939d7c0f528UL, 0xb73870a5a6630162UL); ++ TESTINST_RRR("crc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x02fe41918ac5cdbaUL, 0x48e0815289728f05UL); ++ TESTINST_RRR("crc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xb60a8f381f187baeUL, 0x008c208cc413ff72UL); ++ ++ /* ---------------- crc.w.d.w rd, rj, rk ---------------- */ ++ TESTINST_RRR("crc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4b154113f7d32514UL, 0xcce230caafbf9cc9UL); ++ TESTINST_RRR("crc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x33d5d595721d4f13UL, 0xf4509311f443a7ceUL); ++ TESTINST_RRR("crc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4a3c6de6954cbc17UL, 0x111b21e39fbd7254UL); ++ TESTINST_RRR("crc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xfbb5c64ed1b044c6UL, 0x33ca4c4fb3960326UL); ++ TESTINST_RRR("crc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x2b7c5939d7c0f528UL, 0xb73870a5a6630162UL); ++ TESTINST_RRR("crc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x02fe41918ac5cdbaUL, 0x48e0815289728f05UL); ++ TESTINST_RRR("crc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xb60a8f381f187baeUL, 0x008c208cc413ff72UL); ++ ++ /* ---------------- crcc.w.b.w rd, rj, rk ---------------- */ ++ TESTINST_RRR("crcc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4b154113f7d32514UL, 0xcce230caafbf9cc9UL); ++ TESTINST_RRR("crcc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x33d5d595721d4f13UL, 0xf4509311f443a7ceUL); ++ TESTINST_RRR("crcc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4a3c6de6954cbc17UL, 0x111b21e39fbd7254UL); ++ TESTINST_RRR("crcc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xfbb5c64ed1b044c6UL, 0x33ca4c4fb3960326UL); ++ TESTINST_RRR("crcc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x2b7c5939d7c0f528UL, 0xb73870a5a6630162UL); ++ TESTINST_RRR("crcc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x02fe41918ac5cdbaUL, 0x48e0815289728f05UL); ++ TESTINST_RRR("crcc.w.b.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xb60a8f381f187baeUL, 0x008c208cc413ff72UL); ++ ++ /* ---------------- crcc.w.h.w rd, rj, rk ---------------- */ ++ TESTINST_RRR("crcc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4b154113f7d32514UL, 0xcce230caafbf9cc9UL); ++ TESTINST_RRR("crcc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x33d5d595721d4f13UL, 0xf4509311f443a7ceUL); ++ TESTINST_RRR("crcc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4a3c6de6954cbc17UL, 0x111b21e39fbd7254UL); ++ TESTINST_RRR("crcc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xfbb5c64ed1b044c6UL, 0x33ca4c4fb3960326UL); ++ TESTINST_RRR("crcc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x2b7c5939d7c0f528UL, 0xb73870a5a6630162UL); ++ TESTINST_RRR("crcc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x02fe41918ac5cdbaUL, 0x48e0815289728f05UL); ++ TESTINST_RRR("crcc.w.h.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xb60a8f381f187baeUL, 0x008c208cc413ff72UL); ++ ++ /* ---------------- crcc.w.w.w rd, rj, rk ---------------- */ ++ TESTINST_RRR("crcc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4b154113f7d32514UL, 0xcce230caafbf9cc9UL); ++ TESTINST_RRR("crcc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x33d5d595721d4f13UL, 0xf4509311f443a7ceUL); ++ TESTINST_RRR("crcc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4a3c6de6954cbc17UL, 0x111b21e39fbd7254UL); ++ TESTINST_RRR("crcc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xfbb5c64ed1b044c6UL, 0x33ca4c4fb3960326UL); ++ TESTINST_RRR("crcc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x2b7c5939d7c0f528UL, 0xb73870a5a6630162UL); ++ TESTINST_RRR("crcc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x02fe41918ac5cdbaUL, 0x48e0815289728f05UL); ++ TESTINST_RRR("crcc.w.w.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xb60a8f381f187baeUL, 0x008c208cc413ff72UL); ++ ++ /* ---------------- crcc.w.d.w rd, rj, rk ---------------- */ ++ TESTINST_RRR("crcc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4b154113f7d32514UL, 0xcce230caafbf9cc9UL); ++ TESTINST_RRR("crcc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x33d5d595721d4f13UL, 0xf4509311f443a7ceUL); ++ TESTINST_RRR("crcc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x4a3c6de6954cbc17UL, 0x111b21e39fbd7254UL); ++ TESTINST_RRR("crcc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xfbb5c64ed1b044c6UL, 0x33ca4c4fb3960326UL); ++ TESTINST_RRR("crcc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x2b7c5939d7c0f528UL, 0xb73870a5a6630162UL); ++ TESTINST_RRR("crcc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0x02fe41918ac5cdbaUL, 0x48e0815289728f05UL); ++ TESTINST_RRR("crcc.w.d.w", "$r12", "$r13", "$r14", 0x123456789abcdefUL, 0xb60a8f381f187baeUL, 0x008c208cc413ff72UL); ++} ++ ++int main(void) ++{ ++ test(); ++ return 0; ++} +diff --git a/none/tests/loongarch64/integer.stderr.exp b/none/tests/loongarch64/integer.stderr.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/none/tests/loongarch64/integer.stdout.exp b/none/tests/loongarch64/integer.stdout.exp +new file mode 100644 +index 000000000..a4592b797 +--- /dev/null ++++ b/none/tests/loongarch64/integer.stdout.exp +@@ -0,0 +1,2748 @@ ++add.w $r19, $r20, $r25 :: ++before: $r19=0xf7f01ffbc9696094, $r20=0xb664b1ce21c8c7fc, $r25=0x0d0a02b79ace85cf ++after: $r19=0xffffffffbc974dcb, $r20=0xb664b1ce21c8c7fc, $r25=0x0d0a02b79ace85cf ++add.w $r29, $r9, $r12 :: ++before: $r29=0x5418cd6f6b640953, $r9=0x6465907ca2dac58c, $r12=0xefea76d0d526df3a ++after: $r29=0x000000007801a4c6, $r9=0x6465907ca2dac58c, $r12=0xefea76d0d526df3a ++add.w $r23, $r15, $r28 :: ++before: $r23=0x6ae34fbc6f2f7a9a, $r15=0xbf21c48ab5c2edcc, $r28=0x24824ebd458ed20e ++after: $r23=0xfffffffffb51bfda, $r15=0xbf21c48ab5c2edcc, $r28=0x24824ebd458ed20e ++add.w $r27, $r14, $r26 :: ++before: $r27=0x9f33e38db05616cc, $r14=0xf12ee0c276c52c78, $r26=0xc3054d65ecec3fe6 ++after: $r27=0x0000000063b16c5e, $r14=0xf12ee0c276c52c78, $r26=0xc3054d65ecec3fe6 ++add.w $r14, $r23, $r27 :: ++before: $r14=0x17eaa07c4607901f, $r23=0xa5fa9d0c8472848e, $r27=0xa34301227bb57f76 ++after: $r14=0x0000000000280404, $r23=0xa5fa9d0c8472848e, $r27=0xa34301227bb57f76 ++add.w $r19, $r19, $r4 :: ++before: $r19=0xd2e0644d9532b5ea, $r19=0x2957c6f0638238bc, $r4=0xf01566d0031ee917 ++after: $r19=0x0000000066a121d3, $r19=0x0000000066a121d3, $r4=0xf01566d0031ee917 ++add.w $r19, $r26, $r13 :: ++before: $r19=0x7b39b3f2ccbdaf79, $r26=0xee877221beef9d45, $r13=0x4a743034eefe075d ++after: $r19=0xffffffffadeda4a2, $r26=0xee877221beef9d45, $r13=0x4a743034eefe075d ++add.w $r29, $r18, $r14 :: ++before: $r29=0x95214c4de7e6d3ba, $r18=0x26502eb481799cd1, $r14=0x34d57b775083fb91 ++after: $r29=0xffffffffd1fd9862, $r18=0x26502eb481799cd1, $r14=0x34d57b775083fb91 ++add.w $r16, $r26, $r8 :: ++before: $r16=0xb66b18865bbb3036, $r26=0x8881ccbe1e31aa8d, $r8=0xffe0d2dde8325edc ++after: $r16=0x0000000006640969, $r26=0x8881ccbe1e31aa8d, $r8=0xffe0d2dde8325edc ++add.w $r26, $r5, $r8 :: ++before: $r26=0xc367af71c905540c, $r5=0xcdcbe4860d983fe3, $r8=0x6687aa19ee1fc503 ++after: $r26=0xfffffffffbb804e6, $r5=0xcdcbe4860d983fe3, $r8=0x6687aa19ee1fc503 ++add.d $r16, $r18, $r8 :: ++before: $r16=0xbe5505b409ce995c, $r18=0x561a85fd57e87226, $r8=0x923f3293987edab0 ++after: $r16=0xe859b890f0674cd6, $r18=0x561a85fd57e87226, $r8=0x923f3293987edab0 ++add.d $r12, $r7, $r29 :: ++before: $r12=0xff2682151edc3476, $r7=0x90beb037eacfe3db, $r29=0xa4017082880f1151 ++after: $r12=0x34c020ba72def52c, $r7=0x90beb037eacfe3db, $r29=0xa4017082880f1151 ++add.d $r31, $r31, $r5 :: ++before: $r31=0x81e38385e39d9f16, $r31=0xedb2ffa50c0c8b5f, $r5=0x8776f30d75fc97c2 ++after: $r31=0x7529f2b282092321, $r31=0x7529f2b282092321, $r5=0x8776f30d75fc97c2 ++add.d $r31, $r6, $r26 :: ++before: $r31=0x64ff385d97b60dc2, $r6=0x80f903f206f08f60, $r26=0x4f5b589532e85398 ++after: $r31=0xd0545c8739d8e2f8, $r6=0x80f903f206f08f60, $r26=0x4f5b589532e85398 ++add.d $r25, $r10, $r20 :: ++before: $r25=0xdd8973d6b99634ca, $r10=0x34c0fe5a72dd43d9, $r20=0x2494af03cf5878e7 ++after: $r25=0x5955ad5e4235bcc0, $r10=0x34c0fe5a72dd43d9, $r20=0x2494af03cf5878e7 ++add.d $r5, $r10, $r4 :: ++before: $r5=0x94b272b05ffe39c8, $r10=0x152d15efbbc54c04, $r4=0x25afc06cf151ab29 ++after: $r5=0x3adcd65cad16f72d, $r10=0x152d15efbbc54c04, $r4=0x25afc06cf151ab29 ++add.d $r19, $r30, $r18 :: ++before: $r19=0xa6e14d42459cadf6, $r30=0x558620ff616141b1, $r18=0x1978905697120747 ++after: $r19=0x6efeb155f87348f8, $r30=0x558620ff616141b1, $r18=0x1978905697120747 ++add.d $r7, $r8, $r20 :: ++before: $r7=0x02ea6f88031a29ae, $r8=0x6a08c12301e00d49, $r20=0xdd533acf17f59142 ++after: $r7=0x475bfbf219d59e8b, $r8=0x6a08c12301e00d49, $r20=0xdd533acf17f59142 ++add.d $r24, $r14, $r26 :: ++before: $r24=0xb88df6b8315eb7a6, $r14=0x0137d04f7f6fe285, $r26=0x2ccb253ff7ea93d6 ++after: $r24=0x2e02f58f775a765b, $r14=0x0137d04f7f6fe285, $r26=0x2ccb253ff7ea93d6 ++add.d $r7, $r19, $r23 :: ++before: $r7=0xad464722c0967f28, $r19=0x30295c1fd85ae029, $r23=0x2c69edb227e01d94 ++after: $r7=0x5c9349d2003afdbd, $r19=0x30295c1fd85ae029, $r23=0x2c69edb227e01d94 ++sub.w $r16, $r28, $r17 :: ++before: $r16=0x8b0ba4ef20207fdd, $r28=0x90493cb39ff734a2, $r17=0x519842bab5cc1208 ++after: $r16=0xffffffffea2b229a, $r28=0x90493cb39ff734a2, $r17=0x519842bab5cc1208 ++sub.w $r6, $r13, $r15 :: ++before: $r6=0x13af983aafc53691, $r13=0x27bc6a037865e47f, $r15=0xe20df003930575d5 ++after: $r6=0xffffffffe5606eaa, $r13=0x27bc6a037865e47f, $r15=0xe20df003930575d5 ++sub.w $r8, $r19, $r23 :: ++before: $r8=0x4177aec74585d42d, $r19=0xba89b6aa9b7728ac, $r23=0x0e6a089b8eaf43fe ++after: $r8=0x000000000cc7e4ae, $r19=0xba89b6aa9b7728ac, $r23=0x0e6a089b8eaf43fe ++sub.w $r7, $r10, $r23 :: ++before: $r7=0x0ca1b83a7ab88912, $r10=0xd5e2759ea82c2c80, $r23=0x76e9d6f88c2624ff ++after: $r7=0x000000001c060781, $r10=0xd5e2759ea82c2c80, $r23=0x76e9d6f88c2624ff ++sub.w $r19, $r24, $r24 :: ++before: $r19=0x99d63505ea0474b3, $r24=0x1b53c4c34957af8e, $r24=0x6146da47b731d3ed ++after: $r19=000000000000000000, $r24=0x6146da47b731d3ed, $r24=0x6146da47b731d3ed ++sub.w $r26, $r31, $r7 :: ++before: $r26=0x8eca560d8234ff55, $r31=0x5beb18985c3f451e, $r7=0x9c9634dfaa7b9313 ++after: $r26=0xffffffffb1c3b20b, $r31=0x5beb18985c3f451e, $r7=0x9c9634dfaa7b9313 ++sub.w $r29, $r16, $r6 :: ++before: $r29=0x229544d2cb1d5a64, $r16=0xd23751d515597128, $r6=0xa09dd29330aa8d15 ++after: $r29=0xffffffffe4aee413, $r16=0xd23751d515597128, $r6=0xa09dd29330aa8d15 ++sub.w $r12, $r16, $r4 :: ++before: $r12=0x229f5aefe9fb7fb7, $r16=0x0740ed49b5e95fae, $r4=0xbc6304a0df442807 ++after: $r12=0xffffffffd6a537a7, $r16=0x0740ed49b5e95fae, $r4=0xbc6304a0df442807 ++sub.w $r30, $r29, $r26 :: ++before: $r30=0x94f3a67d188df281, $r29=0x048e066cdad20ac2, $r26=0x1e032e60568554a7 ++after: $r30=0xffffffff844cb61b, $r29=0x048e066cdad20ac2, $r26=0x1e032e60568554a7 ++sub.w $r18, $r23, $r25 :: ++before: $r18=0xedb4f44fb338ba4f, $r23=0xf06e698cd08c8e7b, $r25=0xa22b91e88b77d4d8 ++after: $r18=0x000000004514b9a3, $r23=0xf06e698cd08c8e7b, $r25=0xa22b91e88b77d4d8 ++sub.d $r18, $r10, $r27 :: ++before: $r18=0x68647aa06a23c8f9, $r10=0xd001cb46cb78fc4f, $r27=0x460cc8702b1761f9 ++after: $r18=0x89f502d6a0619a56, $r10=0xd001cb46cb78fc4f, $r27=0x460cc8702b1761f9 ++sub.d $r7, $r24, $r18 :: ++before: $r7=0x8d18e952fb747f43, $r24=0x1e7d1a019fb96490, $r18=0xb466fb9891e8c151 ++after: $r7=0x6a161e690dd0a33f, $r24=0x1e7d1a019fb96490, $r18=0xb466fb9891e8c151 ++sub.d $r4, $r16, $r27 :: ++before: $r4=0x5f6647277ca4c99d, $r16=0xa1156b863ec98e1d, $r27=0xc15612f3ce819d64 ++after: $r4=0xdfbf58927047f0b9, $r16=0xa1156b863ec98e1d, $r27=0xc15612f3ce819d64 ++sub.d $r4, $r25, $r9 :: ++before: $r4=0xe67b33778df480b4, $r25=0xc24b2711be7e4ef1, $r9=0xd940ca25b956100f ++after: $r4=0xe90a5cec05283ee2, $r25=0xc24b2711be7e4ef1, $r9=0xd940ca25b956100f ++sub.d $r5, $r12, $r18 :: ++before: $r5=0x258ae461ef798ce7, $r12=0x3f4984ea3f5692de, $r18=0x99fa673f30e69019 ++after: $r5=0xa54f1dab0e7002c5, $r12=0x3f4984ea3f5692de, $r18=0x99fa673f30e69019 ++sub.d $r13, $r10, $r9 :: ++before: $r13=0xdafb48debea5211e, $r10=0xeac1d3b25f6bf8db, $r9=0x297d671b1c96e48f ++after: $r13=0xc1446c9742d5144c, $r10=0xeac1d3b25f6bf8db, $r9=0x297d671b1c96e48f ++sub.d $r7, $r15, $r23 :: ++before: $r7=0xc6b03274ff37baf6, $r15=0x05b37ffc2c84aec9, $r23=0x74d62a52cbaaec15 ++after: $r7=0x90dd55a960d9c2b4, $r15=0x05b37ffc2c84aec9, $r23=0x74d62a52cbaaec15 ++sub.d $r26, $r18, $r26 :: ++before: $r26=0x35c71e0956ffcd43, $r18=0xad703a4e8078070b, $r26=0x634924e8a9fdbb9e ++after: $r26=0x4a271565d67a4b6d, $r18=0xad703a4e8078070b, $r26=0x4a271565d67a4b6d ++sub.d $r16, $r29, $r5 :: ++before: $r16=0x18bf961cba922928, $r29=0x54ed9198405f8983, $r5=0x977f5b65e5f86b4a ++after: $r16=0xbd6e36325a671e39, $r29=0x54ed9198405f8983, $r5=0x977f5b65e5f86b4a ++sub.d $r31, $r28, $r14 :: ++before: $r31=0xa38a1e8cb3c7ba00, $r28=0xd220d1ef3cf8f3f7, $r14=0xc972df2ace170d61 ++after: $r31=0x08adf2c46ee1e696, $r28=0xd220d1ef3cf8f3f7, $r14=0xc972df2ace170d61 ++slt $r12, $r17, $r18 :: ++before: $r12=0xd7a0e65c279e1082, $r17=0x819edf00a849ba44, $r18=0x41a0b2fe37d44db2 ++after: $r12=0x0000000000000001, $r17=0x819edf00a849ba44, $r18=0x41a0b2fe37d44db2 ++slt $r31, $r13, $r18 :: ++before: $r31=0x2ef00a5cfd100f71, $r13=0x4792cd9f9abf36d3, $r18=0x2c117902110ef9a8 ++after: $r31=000000000000000000, $r13=0x4792cd9f9abf36d3, $r18=0x2c117902110ef9a8 ++slt $r4, $r30, $r29 :: ++before: $r4=0x6d8be2fb73e2c006, $r30=0xf76ce97d7658995e, $r29=0x3856e09bfe39df6e ++after: $r4=0x0000000000000001, $r30=0xf76ce97d7658995e, $r29=0x3856e09bfe39df6e ++slt $r4, $r18, $r10 :: ++before: $r4=0xeddcb9dcf092c3f5, $r18=0x0e57b7c25d13dea8, $r10=0x761d86b48cb5ce21 ++after: $r4=0x0000000000000001, $r18=0x0e57b7c25d13dea8, $r10=0x761d86b48cb5ce21 ++slt $r16, $r18, $r16 :: ++before: $r16=0xcddd92e2340cd593, $r18=0xc9a30f4707743f80, $r16=0x3ff7d36f17396d3a ++after: $r16=0x0000000000000001, $r18=0xc9a30f4707743f80, $r16=0x0000000000000001 ++slt $r6, $r14, $r10 :: ++before: $r6=0xa9e71c6376093499, $r14=0x26bb3955b588461f, $r10=0xfae7e7a950447826 ++after: $r6=000000000000000000, $r14=0x26bb3955b588461f, $r10=0xfae7e7a950447826 ++slt $r19, $r4, $r17 :: ++before: $r19=0x35bb27f64ebd7d62, $r4=0x4a7d3941ebf88bc1, $r17=0x0cda32e4b1c1d5c4 ++after: $r19=000000000000000000, $r4=0x4a7d3941ebf88bc1, $r17=0x0cda32e4b1c1d5c4 ++slt $r19, $r28, $r15 :: ++before: $r19=0x29419b8261e40b99, $r28=0xe7e9b059033afa7d, $r15=0x1ea916293b1cc3dd ++after: $r19=0x0000000000000001, $r28=0xe7e9b059033afa7d, $r15=0x1ea916293b1cc3dd ++slt $r31, $r16, $r16 :: ++before: $r31=0xe0fb75047bc62c9a, $r16=0xa634f6174dcced7d, $r16=0xcca5a9d25b670e70 ++after: $r31=000000000000000000, $r16=0xcca5a9d25b670e70, $r16=0xcca5a9d25b670e70 ++slt $r4, $r4, $r10 :: ++before: $r4=0x724ee03fb3fcdec8, $r4=0xae2587f097065e2c, $r10=0x65c69548f83dd0df ++after: $r4=0x0000000000000001, $r4=0x0000000000000001, $r10=0x65c69548f83dd0df ++sltu $r14, $r10, $r13 :: ++before: $r14=0x1956e5498db3fb6e, $r10=0x2d909abfec4490bd, $r13=0xa7d554ebe591d5cc ++after: $r14=0x0000000000000001, $r10=0x2d909abfec4490bd, $r13=0xa7d554ebe591d5cc ++sltu $r6, $r5, $r18 :: ++before: $r6=0xc34214447a064eb8, $r5=0xad4413e45f0a226a, $r18=0x4b09aab500b04bff ++after: $r6=000000000000000000, $r5=0xad4413e45f0a226a, $r18=0x4b09aab500b04bff ++sltu $r31, $r17, $r17 :: ++before: $r31=0x86e16a1618a639c4, $r17=0x87917b281cef8df0, $r17=0x0d543115a56dee48 ++after: $r31=000000000000000000, $r17=0x0d543115a56dee48, $r17=0x0d543115a56dee48 ++sltu $r20, $r6, $r25 :: ++before: $r20=0x164fff47b8b23752, $r6=0x9ad830d46b1660f6, $r25=0xc5d72c146f4aba72 ++after: $r20=0x0000000000000001, $r6=0x9ad830d46b1660f6, $r25=0xc5d72c146f4aba72 ++sltu $r6, $r26, $r7 :: ++before: $r6=0x1428360430b7c9b5, $r26=0xc2052dc6eea5a53c, $r7=0xda1a8e35dd060adf ++after: $r6=0x0000000000000001, $r26=0xc2052dc6eea5a53c, $r7=0xda1a8e35dd060adf ++sltu $r19, $r15, $r26 :: ++before: $r19=0xdfc9984966167604, $r15=0xa9ea12b5a37dd492, $r26=0x7a24be9fcf349afc ++after: $r19=000000000000000000, $r15=0xa9ea12b5a37dd492, $r26=0x7a24be9fcf349afc ++sltu $r29, $r26, $r29 :: ++before: $r29=0x5a3822db2cc26fc5, $r26=0x5985f02e77511d80, $r29=0x370f15cc98f2a6c1 ++after: $r29=000000000000000000, $r26=0x5985f02e77511d80, $r29=000000000000000000 ++sltu $r7, $r28, $r16 :: ++before: $r7=0x0e4594ee2cc8c6d7, $r28=0x0177ac0014f5dd20, $r16=0xde1724c7590a4908 ++after: $r7=0x0000000000000001, $r28=0x0177ac0014f5dd20, $r16=0xde1724c7590a4908 ++sltu $r8, $r12, $r4 :: ++before: $r8=0x1df979e50aa0ed18, $r12=0x5b410cd0985fce18, $r4=0x9d3c39d61e29025d ++after: $r8=0x0000000000000001, $r12=0x5b410cd0985fce18, $r4=0x9d3c39d61e29025d ++sltu $r30, $r23, $r25 :: ++before: $r30=0x1cba022788d49d13, $r23=0xd2b40941478ee865, $r25=0xa503a74e41535830 ++after: $r30=000000000000000000, $r23=0xd2b40941478ee865, $r25=0xa503a74e41535830 ++slti $r15, $r27, 1913 :: ++before: $r15=0xe24c4ca567d1d5f4, $r27=0xfef05a88adf4b892 ++after: $r15=0x0000000000000001, $r27=0xfef05a88adf4b892 ++slti $r8, $r31, -738 :: ++before: $r8=0xfba7284a8ab83b2d, $r31=0xff63b80173f1e368 ++after: $r8=0x0000000000000001, $r31=0xff63b80173f1e368 ++slti $r31, $r31, -1544 :: ++before: $r31=0xb4599a9fa734365a, $r31=0x4327139de75dde1e ++after: $r31=000000000000000000, $r31=000000000000000000 ++slti $r5, $r4, 1529 :: ++before: $r5=0xa5572272e0c04a20, $r4=0x87657c1b1699936b ++after: $r5=0x0000000000000001, $r4=0x87657c1b1699936b ++slti $r10, $r28, 557 :: ++before: $r10=0x1260731618214410, $r28=0xd0de0dfbafb7960a ++after: $r10=0x0000000000000001, $r28=0xd0de0dfbafb7960a ++slti $r5, $r12, -222 :: ++before: $r5=0x4c6317772a4b06b0, $r12=0x7a1d4eeb507d649b ++after: $r5=000000000000000000, $r12=0x7a1d4eeb507d649b ++slti $r4, $r31, 717 :: ++before: $r4=0x23b4d62a21994afb, $r31=0x85304cc393f6506b ++after: $r4=0x0000000000000001, $r31=0x85304cc393f6506b ++slti $r18, $r26, 730 :: ++before: $r18=0x67b6f5dbf6a0c55d, $r26=0x451013f9a2337f9f ++after: $r18=000000000000000000, $r26=0x451013f9a2337f9f ++slti $r25, $r8, 1193 :: ++before: $r25=0xdb278cca57f1ad7b, $r8=0x7371a60f5af6334b ++after: $r25=000000000000000000, $r8=0x7371a60f5af6334b ++slti $r17, $r24, 329 :: ++before: $r17=0xffa3ed31f9ea3a29, $r24=0x1138e06e1a45c4f3 ++after: $r17=000000000000000000, $r24=0x1138e06e1a45c4f3 ++sltui $r13, $r26, -462 :: ++before: $r13=0x62677116040aebff, $r26=0xeedd6ccd0e5e2771 ++after: $r13=0x0000000000000001, $r26=0xeedd6ccd0e5e2771 ++sltui $r24, $r28, 1890 :: ++before: $r24=0xef9500b68a87984a, $r28=0xaf5922683f40599d ++after: $r24=000000000000000000, $r28=0xaf5922683f40599d ++sltui $r9, $r6, -1538 :: ++before: $r9=0x9996aa21d2b51922, $r6=0xd5214fb275e738dc ++after: $r9=0x0000000000000001, $r6=0xd5214fb275e738dc ++sltui $r19, $r26, -215 :: ++before: $r19=0x3eb2777655f0f1c5, $r26=0x98ed915860f0eb26 ++after: $r19=0x0000000000000001, $r26=0x98ed915860f0eb26 ++sltui $r8, $r19, -780 :: ++before: $r8=0x5c44b5807c43724c, $r19=0x63a068026b529b03 ++after: $r8=0x0000000000000001, $r19=0x63a068026b529b03 ++sltui $r19, $r17, -1041 :: ++before: $r19=0xf6926016cdbfacc1, $r17=0xec04a9bcc8d1192a ++after: $r19=0x0000000000000001, $r17=0xec04a9bcc8d1192a ++sltui $r26, $r14, 1653 :: ++before: $r26=0x542f05c795aa07c2, $r14=0xb634bf537df4c4ce ++after: $r26=000000000000000000, $r14=0xb634bf537df4c4ce ++sltui $r8, $r5, 441 :: ++before: $r8=0x371daf74e330ee8b, $r5=0xedb0321c888ae22e ++after: $r8=000000000000000000, $r5=0xedb0321c888ae22e ++sltui $r25, $r4, 678 :: ++before: $r25=0xba813c7acc8f5621, $r4=0x8d5ce4750fe7603b ++after: $r25=000000000000000000, $r4=0x8d5ce4750fe7603b ++sltui $r17, $r15, 2019 :: ++before: $r17=0x199b641cefe0a0e2, $r15=0x7ea0508a3fed3453 ++after: $r17=000000000000000000, $r15=0x7ea0508a3fed3453 ++nor $r14, $r28, $r9 :: ++before: $r14=0xccf23cf02a48844d, $r28=0x2608ea0069c4e9dd, $r9=0x1c7a04255a2d13f8 ++after: $r14=0xc18511da84120402, $r28=0x2608ea0069c4e9dd, $r9=0x1c7a04255a2d13f8 ++nor $r6, $r30, $r4 :: ++before: $r6=0xbfcc3de6da2483be, $r30=0xd24e9abca28d6cb5, $r4=0xbb01b508523673c6 ++after: $r6=0x04b040430d408008, $r30=0xd24e9abca28d6cb5, $r4=0xbb01b508523673c6 ++nor $r6, $r28, $r13 :: ++before: $r6=0x28dacd828d5736d7, $r28=0xb365ff31474f736c, $r13=0x593621c0f82b445c ++after: $r6=0x0488000e00908883, $r28=0xb365ff31474f736c, $r13=0x593621c0f82b445c ++nor $r24, $r16, $r31 :: ++before: $r24=0x5898010a4c6cf1bb, $r16=0xecac6e093ba6146a, $r31=0x050e6093f19b1194 ++after: $r24=0x125191640440ea01, $r16=0xecac6e093ba6146a, $r31=0x050e6093f19b1194 ++nor $r15, $r7, $r20 :: ++before: $r15=0x2ddb1dea334fd92a, $r7=0x401d7a663be0b31a, $r20=0xb6c008973a85f779 ++after: $r15=0x09228508c41a0884, $r7=0x401d7a663be0b31a, $r20=0xb6c008973a85f779 ++nor $r18, $r31, $r29 :: ++before: $r18=0x0c987982e1d91684, $r31=0x181f20f581ed38f4, $r29=0xefaa786e00a2e5b9 ++after: $r18=0x004087007e100202, $r31=0x181f20f581ed38f4, $r29=0xefaa786e00a2e5b9 ++nor $r19, $r31, $r13 :: ++before: $r19=0x39e476d555cd20bc, $r31=0xfb8fab5d35576d50, $r13=0x71a92a8377c0f729 ++after: $r19=0x0450542088280086, $r31=0xfb8fab5d35576d50, $r13=0x71a92a8377c0f729 ++nor $r25, $r7, $r5 :: ++before: $r25=0x7f36d0c6d173e8c8, $r7=0x181763a9f9350680, $r5=0x5ec5099605d7d418 ++after: $r25=0xa128944002082967, $r7=0x181763a9f9350680, $r5=0x5ec5099605d7d418 ++nor $r30, $r23, $r23 :: ++before: $r30=0x688e1d04976ac8db, $r23=0xd37b6d6a1c510287, $r23=0x8670301ee2a715df ++after: $r30=0x798fcfe11d58ea20, $r23=0x8670301ee2a715df, $r23=0x8670301ee2a715df ++nor $r5, $r23, $r14 :: ++before: $r5=0x71c4a211dd9262f4, $r23=0xcb8a4aebc2c6c4f2, $r14=0x084d79a5254447c9 ++after: $r5=0x3430841018393804, $r23=0xcb8a4aebc2c6c4f2, $r14=0x084d79a5254447c9 ++and $r8, $r14, $r31 :: ++before: $r8=0xbddf22c4109e20b5, $r14=0xb2d25973efd1a8ff, $r31=0x28b78b59dfe641e9 ++after: $r8=0x20920951cfc000e9, $r14=0xb2d25973efd1a8ff, $r31=0x28b78b59dfe641e9 ++and $r19, $r23, $r17 :: ++before: $r19=0xb25e185c549f6661, $r23=0xb6ccc215c2f17718, $r17=0xf20669c51aee8ffe ++after: $r19=0xb204400502e00718, $r23=0xb6ccc215c2f17718, $r17=0xf20669c51aee8ffe ++and $r30, $r27, $r23 :: ++before: $r30=0xa7f4ad796393e12b, $r27=0xefbcf405df3e7aff, $r23=0x548a0141e9fe1700 ++after: $r30=0x44880001c93e1200, $r27=0xefbcf405df3e7aff, $r23=0x548a0141e9fe1700 ++and $r18, $r31, $r29 :: ++before: $r18=0xa399c7f46c61d974, $r31=0xe0fe8cca1cbab773, $r29=0x49e680ddee7f666b ++after: $r18=0x40e680c80c3a2663, $r31=0xe0fe8cca1cbab773, $r29=0x49e680ddee7f666b ++and $r5, $r26, $r25 :: ++before: $r5=0x1682ca17c11f90ac, $r26=0x4e9706cb2c885742, $r25=0x250ff6304dd87d57 ++after: $r5=0x040706000c885542, $r26=0x4e9706cb2c885742, $r25=0x250ff6304dd87d57 ++and $r28, $r14, $r8 :: ++before: $r28=0xcacf15e6ffad256f, $r14=0x99527f4fa2aa8fb1, $r8=0xcff546a883b63cfb ++after: $r28=0x8950460882a20cb1, $r14=0x99527f4fa2aa8fb1, $r8=0xcff546a883b63cfb ++and $r28, $r9, $r28 :: ++before: $r28=0xc60423b9cf70d112, $r9=0x2fb0db47f1d8f166, $r28=0x1e9cec9d13e85210 ++after: $r28=0x0e90c80511c85000, $r9=0x2fb0db47f1d8f166, $r28=0x0e90c80511c85000 ++and $r18, $r28, $r5 :: ++before: $r18=0x5059c37ee38d2f25, $r28=0x74bf57d85d90af3a, $r5=0x35479df0ebec9209 ++after: $r18=0x340715d049808208, $r28=0x74bf57d85d90af3a, $r5=0x35479df0ebec9209 ++and $r23, $r25, $r12 :: ++before: $r23=0x18742ef4c73416be, $r25=0x8b93e775860ef52b, $r12=0xa909915f60a546d2 ++after: $r23=0x8901815500044402, $r25=0x8b93e775860ef52b, $r12=0xa909915f60a546d2 ++and $r18, $r17, $r24 :: ++before: $r18=0xadb2cc6aec909946, $r17=0x3068f8b21d583e4c, $r24=0xcf8aae1918f3a88e ++after: $r18=0x0008a8101850280c, $r17=0x3068f8b21d583e4c, $r24=0xcf8aae1918f3a88e ++or $r19, $r28, $r25 :: ++before: $r19=0x46819825f87044c2, $r28=0x65cb2cc7e5f5a720, $r25=0x1fc0130146f13f76 ++after: $r19=0x7fcb3fc7e7f5bf76, $r28=0x65cb2cc7e5f5a720, $r25=0x1fc0130146f13f76 ++or $r8, $r25, $r4 :: ++before: $r8=0x45083dd59c60e6fe, $r25=0x936ecfaeb4d51c95, $r4=0xdc37c27c69024f6e ++after: $r8=0xdf7fcffefdd75fff, $r25=0x936ecfaeb4d51c95, $r4=0xdc37c27c69024f6e ++or $r15, $r16, $r8 :: ++before: $r15=0x516659e51cf19b26, $r16=0x7589da0802d59510, $r8=0x6b713c60390f3fbf ++after: $r15=0x7ff9fe683bdfbfbf, $r16=0x7589da0802d59510, $r8=0x6b713c60390f3fbf ++or $r9, $r15, $r6 :: ++before: $r9=0x1646568625c40022, $r15=0xa68db9141a88850c, $r6=0x756d912fbefef973 ++after: $r9=0xf7edb93fbefefd7f, $r15=0xa68db9141a88850c, $r6=0x756d912fbefef973 ++or $r24, $r9, $r25 :: ++before: $r24=0xda34c24d14fce443, $r9=0x6ad9bf24481630b0, $r25=0x02aefcdfa652395b ++after: $r24=0x6affffffee5639fb, $r9=0x6ad9bf24481630b0, $r25=0x02aefcdfa652395b ++or $r13, $r9, $r14 :: ++before: $r13=0x900358ad1e848728, $r9=0xa0e361b5b891a62e, $r14=0xddfa0c1377ce01ac ++after: $r13=0xfdfb6db7ffdfa7ae, $r9=0xa0e361b5b891a62e, $r14=0xddfa0c1377ce01ac ++or $r23, $r16, $r15 :: ++before: $r23=0x27a55515d39aded9, $r16=0xd0daf17f9cb0bf5a, $r15=0xf44c4372982c4d74 ++after: $r23=0xf4def37f9cbcff7e, $r16=0xd0daf17f9cb0bf5a, $r15=0xf44c4372982c4d74 ++or $r20, $r16, $r16 :: ++before: $r20=0x7045887bb8325d6f, $r16=0xbac771cbb78dae04, $r16=0x23f4928023125a5c ++after: $r20=0x23f4928023125a5c, $r16=0x23f4928023125a5c, $r16=0x23f4928023125a5c ++or $r30, $r5, $r7 :: ++before: $r30=0xcf609aa2057d1b98, $r5=0x379641544fd1cd48, $r7=0x5275ef34f265f01a ++after: $r30=0x77f7ef74fff5fd5a, $r5=0x379641544fd1cd48, $r7=0x5275ef34f265f01a ++or $r23, $r4, $r30 :: ++before: $r23=0xc43fc1c750887406, $r4=0x44a3229c33d1cd65, $r30=0xceaa00084fc04912 ++after: $r23=0xceab229c7fd1cd77, $r4=0x44a3229c33d1cd65, $r30=0xceaa00084fc04912 ++xor $r6, $r19, $r31 :: ++before: $r6=0x018522418b59bf8a, $r19=0x270a2ec823f26e39, $r31=0x99ef76e6d4495ae3 ++after: $r6=0xbee5582ef7bb34da, $r19=0x270a2ec823f26e39, $r31=0x99ef76e6d4495ae3 ++xor $r28, $r20, $r27 :: ++before: $r28=0x57de83cac9dade15, $r20=0xd39fdecdfd4ccb08, $r27=0x00c97b854adacdb4 ++after: $r28=0xd356a548b79606bc, $r20=0xd39fdecdfd4ccb08, $r27=0x00c97b854adacdb4 ++xor $r4, $r29, $r5 :: ++before: $r4=0x9f7356fff2445f77, $r29=0xc3c3a34d2c226b5a, $r5=0x51abdd266816b94f ++after: $r4=0x92687e6b4434d215, $r29=0xc3c3a34d2c226b5a, $r5=0x51abdd266816b94f ++xor $r14, $r6, $r28 :: ++before: $r14=0xdd5ca0b5c6c45804, $r6=0xa0ba047990ec0798, $r28=0x089e6efd43651c28 ++after: $r14=0xa8246a84d3891bb0, $r6=0xa0ba047990ec0798, $r28=0x089e6efd43651c28 ++xor $r8, $r19, $r23 :: ++before: $r8=0xc3e35cd44af166fa, $r19=0x6affcfe12104ccc7, $r23=0x4adbb3601a07a1d9 ++after: $r8=0x20247c813b036d1e, $r19=0x6affcfe12104ccc7, $r23=0x4adbb3601a07a1d9 ++xor $r16, $r5, $r18 :: ++before: $r16=0x685cdc5ca969c8e1, $r5=0xd88d0e2a9900b8eb, $r18=0xdd4dfbba723cde28 ++after: $r16=0x05c0f590eb3c66c3, $r5=0xd88d0e2a9900b8eb, $r18=0xdd4dfbba723cde28 ++xor $r20, $r18, $r24 :: ++before: $r20=0x2362838018fa39be, $r18=0xbbc8d438b24c037a, $r24=0xe020a8456a45b667 ++after: $r20=0x5be87c7dd809b51d, $r18=0xbbc8d438b24c037a, $r24=0xe020a8456a45b667 ++xor $r19, $r23, $r19 :: ++before: $r19=0x637cae50fc0a1c95, $r23=0x514b81a7227dd07e, $r19=0x59a27a7f9c8481c3 ++after: $r19=0x08e9fbd8bef951bd, $r23=0x514b81a7227dd07e, $r19=0x08e9fbd8bef951bd ++xor $r20, $r16, $r18 :: ++before: $r20=0xb728dd7a443bcc8f, $r16=0xe2de9bf67cdbdc0c, $r18=0x26687435fbe4dbf6 ++after: $r20=0xc4b6efc3873f07fa, $r16=0xe2de9bf67cdbdc0c, $r18=0x26687435fbe4dbf6 ++xor $r23, $r14, $r6 :: ++before: $r23=0x744915919b52e27e, $r14=0x16863c1d3e1cded7, $r6=0x040ce8607349c380 ++after: $r23=0x128ad47d4d551d57, $r14=0x16863c1d3e1cded7, $r6=0x040ce8607349c380 ++orn $r24, $r9, $r15 :: ++before: $r24=0x39320ce9aa25fb73, $r9=0xaaec06dc1b47cf43, $r15=0x5fa36a558c884a69 ++after: $r24=0xaafc97fe7b77ffd7, $r9=0xaaec06dc1b47cf43, $r15=0x5fa36a558c884a69 ++orn $r12, $r4, $r26 :: ++before: $r12=0xa9c2abcbc14e3f3c, $r4=0x7c87d633528d97b0, $r26=0xe383c14e72ab8677 ++after: $r12=0x7cfffeb3dfddffb8, $r4=0x7c87d633528d97b0, $r26=0xe383c14e72ab8677 ++orn $r20, $r24, $r28 :: ++before: $r20=0xb117d8b0280738a2, $r24=0x318fd949c3ba430f, $r28=0xc9edab5116dc1582 ++after: $r20=0x379fddefebbbeb7f, $r24=0x318fd949c3ba430f, $r28=0xc9edab5116dc1582 ++orn $r8, $r25, $r25 :: ++before: $r8=0xb140441a36f8eded, $r25=0xa26782a5e34d7add, $r25=0x61bdd5b78d019958 ++after: $r8=0xffffffffffffffff, $r25=0x61bdd5b78d019958, $r25=0x61bdd5b78d019958 ++orn $r16, $r18, $r25 :: ++before: $r16=0xcda0e2c1bce1eeec, $r18=0xa4486eefd2c444d9, $r25=0xbd007605c829cadc ++after: $r16=0xe6ffeffff7d675fb, $r18=0xa4486eefd2c444d9, $r25=0xbd007605c829cadc ++orn $r5, $r28, $r19 :: ++before: $r5=0x8196fca50795a2aa, $r28=0xec7f689a0d676560, $r19=0xb4450418c4e1b333 ++after: $r5=0xeffffbff3f7f6dec, $r28=0xec7f689a0d676560, $r19=0xb4450418c4e1b333 ++orn $r15, $r14, $r8 :: ++before: $r15=0xaf1e2a9fe35ba4ed, $r14=0xd2207f86d89b890a, $r8=0xfb31b9e37313a94d ++after: $r15=0xd6ee7f9edcffdfba, $r14=0xd2207f86d89b890a, $r8=0xfb31b9e37313a94d ++orn $r27, $r14, $r14 :: ++before: $r27=0x1f24566bfa353160, $r14=0xc4e17319c4766bec, $r14=0x29a3bbaaf6b49218 ++after: $r27=0xffffffffffffffff, $r14=0x29a3bbaaf6b49218, $r14=0x29a3bbaaf6b49218 ++orn $r17, $r12, $r31 :: ++before: $r17=0xf5195a72c175fed7, $r12=0x7aa8d4840359cbf6, $r31=0xa1a42af83c82215b ++after: $r17=0x7efbd587c37ddff6, $r12=0x7aa8d4840359cbf6, $r31=0xa1a42af83c82215b ++orn $r16, $r20, $r20 :: ++before: $r16=0x76bb09b5b50705e2, $r20=0x613fdcbd8c1eba2a, $r20=0xfb1e04641f5da4ff ++after: $r16=0xffffffffffffffff, $r20=0xfb1e04641f5da4ff, $r20=0xfb1e04641f5da4ff ++andn $r19, $r31, $r17 :: ++before: $r19=0xbcc81a9b2e349626, $r31=0x5a38a8ef9c7e30e4, $r17=0xcb490976d0652986 ++after: $r19=0x1030a0890c1a1060, $r31=0x5a38a8ef9c7e30e4, $r17=0xcb490976d0652986 ++andn $r10, $r4, $r10 :: ++before: $r10=0x9acfa0cd6ea107fd, $r4=0x1d9b572e8f6bedb7, $r10=0x768fe778d2a543ea ++after: $r10=0x091010060d4aac15, $r4=0x1d9b572e8f6bedb7, $r10=0x091010060d4aac15 ++andn $r6, $r12, $r26 :: ++before: $r6=0x949e36cff3b5decb, $r12=0x56723f7285834fc9, $r26=0xf6fa544d6cd57fa8 ++after: $r6=0x00002b3281020041, $r12=0x56723f7285834fc9, $r26=0xf6fa544d6cd57fa8 ++andn $r16, $r6, $r4 :: ++before: $r16=0x44a39d85132d6513, $r6=0x3ca7f972b865b7ce, $r4=0xf18819e4740308bc ++after: $r16=0x0c27e0128864b742, $r6=0x3ca7f972b865b7ce, $r4=0xf18819e4740308bc ++andn $r19, $r26, $r15 :: ++before: $r19=0x856d1e3162c8fa2d, $r26=0x0c1ef79456be3885, $r15=0x03c089064e60da1d ++after: $r19=0x0c1e7690109e2080, $r26=0x0c1ef79456be3885, $r15=0x03c089064e60da1d ++andn $r17, $r28, $r9 :: ++before: $r17=0x512a518c554f4b0a, $r28=0x043454425b8b7755, $r9=0xdc5dca386b49bdd7 ++after: $r17=0x0020144210824200, $r28=0x043454425b8b7755, $r9=0xdc5dca386b49bdd7 ++andn $r16, $r16, $r14 :: ++before: $r16=0xa9c14796fec54f89, $r16=0xe31928f90d2723a4, $r14=0xcf2deaf4af11410a ++after: $r16=0x20100009002622a4, $r16=0x20100009002622a4, $r14=0xcf2deaf4af11410a ++andn $r9, $r4, $r20 :: ++before: $r9=0x51d79964a699ec8d, $r4=0xe82135537ca93e7f, $r20=0xcbadcb1dc4dd0ed0 ++after: $r9=0x200034423820302f, $r4=0xe82135537ca93e7f, $r20=0xcbadcb1dc4dd0ed0 ++andn $r18, $r25, $r25 :: ++before: $r18=0xeb546ce75bcba3f5, $r25=0x953d86e2bd6b136d, $r25=0x4914dbeee506d8ad ++after: $r18=000000000000000000, $r25=0x4914dbeee506d8ad, $r25=0x4914dbeee506d8ad ++andn $r27, $r15, $r14 :: ++before: $r27=0xc8b599a43b0b4683, $r15=0x0509638630676b88, $r14=0x3d278ed22a112a89 ++after: $r27=0x0008610410664100, $r15=0x0509638630676b88, $r14=0x3d278ed22a112a89 ++mul.w $r28, $r12, $r10 :: ++before: $r28=0xf6fcce3e1c5b1598, $r12=0xef2747013f911fe8, $r10=0x14a216fd69537967 ++after: $r28=0xffffffffabb07e58, $r12=0xef2747013f911fe8, $r10=0x14a216fd69537967 ++mul.w $r13, $r18, $r24 :: ++before: $r13=0x5e8a32c1e1e12aa4, $r18=0x30e007bb8dd185fa, $r24=0x1a74dd893af9fb5a ++after: $r13=0x000000003e2f37e4, $r18=0x30e007bb8dd185fa, $r24=0x1a74dd893af9fb5a ++mul.w $r10, $r20, $r4 :: ++before: $r10=0xf06f4af61b0e0c24, $r20=0x1b3624a77f26275f, $r4=0x653052ae3a1347df ++after: $r10=0xffffffffc934a4c1, $r20=0x1b3624a77f26275f, $r4=0x653052ae3a1347df ++mul.w $r23, $r19, $r10 :: ++before: $r23=0xccb5485ae4605cdd, $r19=0x67c67c647eaf9e6c, $r10=0xfb9b6c7b49ec10cf ++after: $r23=0x000000004177d954, $r19=0x67c67c647eaf9e6c, $r10=0xfb9b6c7b49ec10cf ++mul.w $r12, $r30, $r7 :: ++before: $r12=0xc1f45aaf98ffcb39, $r30=0x906f0c08c0bae02e, $r7=0xdf6cf5c05b5f2d34 ++after: $r12=0xffffffff8a6f9f58, $r30=0x906f0c08c0bae02e, $r7=0xdf6cf5c05b5f2d34 ++mul.w $r27, $r12, $r12 :: ++before: $r27=0x9545c6d9f812c0d9, $r12=0xacd016cb69e028b3, $r12=0x2b68e3a280d9c0b6 ++after: $r27=0x00000000459d8164, $r12=0x2b68e3a280d9c0b6, $r12=0x2b68e3a280d9c0b6 ++mul.w $r28, $r7, $r19 :: ++before: $r28=0x4cf68a9590da3da5, $r7=0x70ed8b9b03a6325d, $r19=0x1125383d12dad118 ++after: $r28=0x0000000073e4a5b8, $r7=0x70ed8b9b03a6325d, $r19=0x1125383d12dad118 ++mul.w $r20, $r12, $r20 :: ++before: $r20=0x10683d31408fb4c5, $r12=0x9ef4ea79672ce58d, $r20=0x960a13776923d3e4 ++after: $r20=0x000000001c76a894, $r12=0x9ef4ea79672ce58d, $r20=0x000000001c76a894 ++mul.w $r26, $r19, $r28 :: ++before: $r26=0xbf8a20b69fa4357b, $r19=0xf3e9b53a654e3cbf, $r28=0x20afdeb5a4b4e1c9 ++after: $r26=0x00000000601d90f7, $r19=0xf3e9b53a654e3cbf, $r28=0x20afdeb5a4b4e1c9 ++mul.w $r13, $r26, $r25 :: ++before: $r13=0x78f637d350c666bf, $r26=0xff742d96dc73e9e9, $r25=0x94a3289b55744707 ++after: $r13=0xffffffff879f045f, $r26=0xff742d96dc73e9e9, $r25=0x94a3289b55744707 ++mulh.w $r18, $r25, $r14 :: ++before: $r18=0xa988161162710d96, $r25=0x37443c6f5d0625ea, $r14=0x94da379219de8576 ++after: $r18=0x0000000009667587, $r25=0x37443c6f5d0625ea, $r14=0x94da379219de8576 ++mulh.w $r13, $r16, $r18 :: ++before: $r13=0x246298a54a25030a, $r16=0x33643ceed35cff64, $r18=0xc25702631b42c849 ++after: $r13=0xfffffffffb3f29fd, $r16=0x33643ceed35cff64, $r18=0xc25702631b42c849 ++mulh.w $r20, $r5, $r15 :: ++before: $r20=0x3b606ea986dcf13e, $r5=0x269dcd16567786d2, $r15=0x96c0983df45d5c03 ++after: $r20=0xfffffffffc11ee2e, $r5=0x269dcd16567786d2, $r15=0x96c0983df45d5c03 ++mulh.w $r19, $r19, $r25 :: ++before: $r19=0xab8fc1c922ba3e7a, $r19=0xdec5bddca513d198, $r25=0xf05e814d67d43f5a ++after: $r19=0xffffffffdb1f973d, $r19=0xffffffffdb1f973d, $r25=0xf05e814d67d43f5a ++mulh.w $r15, $r28, $r16 :: ++before: $r15=0x82fcfa24449231ba, $r28=0xf37548fee13133f3, $r16=0x256188ef96bb3d23 ++after: $r15=0x000000000cab1812, $r28=0xf37548fee13133f3, $r16=0x256188ef96bb3d23 ++mulh.w $r24, $r9, $r27 :: ++before: $r24=0x858ddeb68e948058, $r9=0x0ffb64d62e202462, $r27=0xe07a6dae07f46c11 ++after: $r24=0x00000000016eeb19, $r9=0x0ffb64d62e202462, $r27=0xe07a6dae07f46c11 ++mulh.w $r23, $r20, $r14 :: ++before: $r23=0x7713930e419350ff, $r20=0xd5d72e6efb86e428, $r14=0x49f87e78ddcc8400 ++after: $r23=0x000000000098fbfd, $r20=0xd5d72e6efb86e428, $r14=0x49f87e78ddcc8400 ++mulh.w $r28, $r20, $r25 :: ++before: $r28=0x552a9b7f3fa0c48a, $r20=0xd616afd20f193287, $r25=0xbcd2ae680b131cd2 ++after: $r28=0x0000000000a735bd, $r20=0xd616afd20f193287, $r25=0xbcd2ae680b131cd2 ++mulh.w $r16, $r19, $r12 :: ++before: $r16=0x94b154fc890497c3, $r19=0xd8217f47e4257a7c, $r12=0xb47bb0e4cff83cbf ++after: $r16=0x000000000539d140, $r19=0xd8217f47e4257a7c, $r12=0xb47bb0e4cff83cbf ++mulh.w $r23, $r23, $r6 :: ++before: $r23=0x0afb7fddb344318f, $r23=0xaafee418c4267e18, $r6=0x1763f686cd41d46e ++after: $r23=0x000000000bdcf0fd, $r23=0x000000000bdcf0fd, $r6=0x1763f686cd41d46e ++mulh.wu $r18, $r17, $r8 :: ++before: $r18=0xa92fa2817b19786c, $r17=0xaf23e3d2092f080c, $r8=0x771c36ac19259f2a ++after: $r18=0x0000000000e6f14b, $r17=0xaf23e3d2092f080c, $r8=0x771c36ac19259f2a ++mulh.wu $r16, $r13, $r8 :: ++before: $r16=0xf4a7b7abe5f3831a, $r13=0xe8beff7f8f4330cd, $r8=0x38cebbe3d1af354d ++after: $r16=0x000000007557e799, $r13=0xe8beff7f8f4330cd, $r8=0x38cebbe3d1af354d ++mulh.wu $r8, $r23, $r29 :: ++before: $r8=0x6ca8c7d8ec316750, $r23=0xc3a59754c752c3a5, $r29=0x4b77e251de7f45f1 ++after: $r8=0xffffffffad3cde2d, $r23=0xc3a59754c752c3a5, $r29=0x4b77e251de7f45f1 ++mulh.wu $r20, $r25, $r30 :: ++before: $r20=0x6faa5d1372250132, $r25=0x68734123142c820a, $r30=0x0f7b4bdf342e2017 ++after: $r20=0x00000000041cacf0, $r25=0x68734123142c820a, $r30=0x0f7b4bdf342e2017 ++mulh.wu $r31, $r18, $r19 :: ++before: $r31=0x08cfa67422c1c5d5, $r18=0xb48ac9531206cef2, $r19=0x9f9f5d925c5cf738 ++after: $r31=0x000000000680fe39, $r18=0xb48ac9531206cef2, $r19=0x9f9f5d925c5cf738 ++mulh.wu $r25, $r7, $r27 :: ++before: $r25=0x85aa17ff1b3699ba, $r7=0x9a7aeabb800edb53, $r27=0x4eb1ec754c7cdb59 ++after: $r25=0x000000002642de08, $r7=0x9a7aeabb800edb53, $r27=0x4eb1ec754c7cdb59 ++mulh.wu $r19, $r4, $r28 :: ++before: $r19=0x821038d7fb43149c, $r4=0x44cd20261f5ae87e, $r28=0xf9d8916e8eb4ecb1 ++after: $r19=0x00000000117a95de, $r4=0x44cd20261f5ae87e, $r28=0xf9d8916e8eb4ecb1 ++mulh.wu $r30, $r23, $r28 :: ++before: $r30=0xef34433557594fb3, $r23=0x2f9401c8064c8ca0, $r28=0x5de6287c2a56e507 ++after: $r30=0x00000000010ab26c, $r23=0x2f9401c8064c8ca0, $r28=0x5de6287c2a56e507 ++mulh.wu $r13, $r6, $r17 :: ++before: $r13=0xd6b38c427ad5f669, $r6=0xbe04ea8987b20188, $r17=0x52cee1d144e3c134 ++after: $r13=0x00000000248401a8, $r6=0xbe04ea8987b20188, $r17=0x52cee1d144e3c134 ++mulh.wu $r26, $r19, $r17 :: ++before: $r26=0x2ea15eee9429b8a0, $r19=0x43598be92000d9f7, $r17=0x6364cfeb707aba6c ++after: $r26=0x000000000e0fb712, $r19=0x43598be92000d9f7, $r17=0x6364cfeb707aba6c ++mul.d $r19, $r4, $r10 :: ++before: $r19=0xf0235819cf1bab1f, $r4=0xdc7a0086353cfddf, $r10=0x6f18aec465b5af87 ++after: $r19=0xb1beaa2f3e605199, $r4=0xdc7a0086353cfddf, $r10=0x6f18aec465b5af87 ++mul.d $r19, $r31, $r20 :: ++before: $r19=0x24d7526c5e4669e3, $r31=0xaab7dd46e5af2493, $r20=0xd5df6eea42205e25 ++after: $r19=0x7ec27945fa1e433f, $r31=0xaab7dd46e5af2493, $r20=0xd5df6eea42205e25 ++mul.d $r15, $r20, $r4 :: ++before: $r15=0x3740ba48d64cc478, $r20=0xcfeffb7c35a98382, $r4=0xeab050fc9bdb3c52 ++after: $r15=0x3ae548f8215497a4, $r20=0xcfeffb7c35a98382, $r4=0xeab050fc9bdb3c52 ++mul.d $r29, $r7, $r25 :: ++before: $r29=0xe8858552c0e8eac8, $r7=0xb65ed231c27efb70, $r25=0xbb753de59e4ca3d1 ++after: $r29=0x57c0018869039670, $r7=0xb65ed231c27efb70, $r25=0xbb753de59e4ca3d1 ++mul.d $r5, $r30, $r4 :: ++before: $r5=0xc4f17df5c983317d, $r30=0xb2af9e86d443d8ce, $r4=0xf9e3c6d18372d0d3 ++after: $r5=0xeff3c7ee09cf11ca, $r30=0xb2af9e86d443d8ce, $r4=0xf9e3c6d18372d0d3 ++mul.d $r25, $r17, $r29 :: ++before: $r25=0xa09d11d50056b350, $r17=0x6609b14ca65f9aff, $r29=0x692def5a14a3278c ++after: $r25=0xbd4098e529429c74, $r17=0x6609b14ca65f9aff, $r29=0x692def5a14a3278c ++mul.d $r13, $r15, $r26 :: ++before: $r13=0xd528ed047af75775, $r15=0x896658fe826a0817, $r26=0xa456f53d5f2760b1 ++after: $r13=0x630a2082b2d937e7, $r15=0x896658fe826a0817, $r26=0xa456f53d5f2760b1 ++mul.d $r23, $r9, $r7 :: ++before: $r23=0x5d33f63ce8637a69, $r9=0xad38922264c721ff, $r7=0xe0514fea4ee52aca ++after: $r23=0x9d478a3f4bcfa936, $r9=0xad38922264c721ff, $r7=0xe0514fea4ee52aca ++mul.d $r25, $r23, $r30 :: ++before: $r25=0x5d74125f059662f3, $r23=0xa708100731e88710, $r30=0x739e4de71fec92e0 ++after: $r25=0xeb30fae9bb3d4e00, $r23=0xa708100731e88710, $r30=0x739e4de71fec92e0 ++mul.d $r26, $r18, $r30 :: ++before: $r26=0x110a94ffa2e12f32, $r18=0x01b770d6c423d4f8, $r30=0x38bf04d66f91531a ++after: $r26=0x5937a05ab2280930, $r18=0x01b770d6c423d4f8, $r30=0x38bf04d66f91531a ++mulh.d $r5, $r15, $r12 :: ++before: $r5=0xd72f46d42ca4db6b, $r15=0xe1771af0e69e49a6, $r12=0xd796f52fbd01a4bb ++after: $r5=0x04d1eb3a7f530298, $r15=0xe1771af0e69e49a6, $r12=0xd796f52fbd01a4bb ++mulh.d $r28, $r18, $r14 :: ++before: $r28=0x904e699bcbe32b08, $r18=0x9b5b69b4d817779c, $r14=0xa02ca97cc4e37f13 ++after: $r28=0x25ac2970a5b47a76, $r18=0x9b5b69b4d817779c, $r14=0xa02ca97cc4e37f13 ++mulh.d $r6, $r12, $r7 :: ++before: $r6=0xc75e1065b8dbcd34, $r12=0xec7d8ae6a65f2fd3, $r7=0xb7e32b52f40bc8ef ++after: $r6=0x057ee36929066010, $r12=0xec7d8ae6a65f2fd3, $r7=0xb7e32b52f40bc8ef ++mulh.d $r5, $r25, $r19 :: ++before: $r5=0x7b2e04c0c2f95e4f, $r25=0x9a5037ff200e982a, $r19=0xf862c0c6425ff2bc ++after: $r5=0x03064462e05709fc, $r25=0x9a5037ff200e982a, $r19=0xf862c0c6425ff2bc ++mulh.d $r14, $r8, $r23 :: ++before: $r14=0x5fd7ae31ad151daa, $r8=0x444243172f499ec0, $r23=0x9003c8aeabc39884 ++after: $r14=0xe22404eefbd57910, $r8=0x444243172f499ec0, $r23=0x9003c8aeabc39884 ++mulh.d $r7, $r23, $r13 :: ++before: $r7=0x0bc21ca397041a2b, $r23=0xe886455c8737b2ca, $r13=0xd5ccec2f631a1d60 ++after: $r7=0x03dea7b02a86f5c2, $r23=0xe886455c8737b2ca, $r13=0xd5ccec2f631a1d60 ++mulh.d $r26, $r16, $r13 :: ++before: $r26=0xd3894783f187ee9c, $r16=0xa7a6c4abeda9a22c, $r13=0x4375f7e49ed91384 ++after: $r26=0xe8b7ef23e33e1269, $r16=0xa7a6c4abeda9a22c, $r13=0x4375f7e49ed91384 ++mulh.d $r17, $r31, $r16 :: ++before: $r17=0xa93bd0cf9137745e, $r31=0x3a1b2b922b7645f1, $r16=0x7e33f64c19972ae3 ++after: $r17=0x1ca52ac301413b29, $r31=0x3a1b2b922b7645f1, $r16=0x7e33f64c19972ae3 ++mulh.d $r20, $r19, $r8 :: ++before: $r20=0xda9224c9ab488939, $r19=0xb7f5978bf509641d, $r8=0xf6fcd615333c30c0 ++after: $r20=0x028941970b9c41ae, $r19=0xb7f5978bf509641d, $r8=0xf6fcd615333c30c0 ++mulh.d $r12, $r17, $r20 :: ++before: $r12=0xcdbd51e35d5c1df3, $r17=0x254bd8eaadc946fe, $r20=0x9de163435088598b ++after: $r12=0xf1b4813d0885ff33, $r17=0x254bd8eaadc946fe, $r20=0x9de163435088598b ++mulh.du $r25, $r28, $r29 :: ++before: $r25=0xf7ef0dbf1bf7938a, $r28=0xd267d11ae422f604, $r29=0x089d6fd68226e13d ++after: $r25=0x0714a41f660b233e, $r28=0xd267d11ae422f604, $r29=0x089d6fd68226e13d ++mulh.du $r7, $r28, $r24 :: ++before: $r7=0xe568cf4a6d6bc199, $r28=0x6efedad6fbe95f2a, $r24=0xdf55853ed22d024e ++after: $r7=0x60d51505935d32a4, $r28=0x6efedad6fbe95f2a, $r24=0xdf55853ed22d024e ++mulh.du $r25, $r8, $r9 :: ++before: $r25=0x0bf7c0226b0c2072, $r8=0x794fd44a65c65ebb, $r9=0xa0391c3fa3cf1e5c ++after: $r25=0x4becf4d7a7a9ffeb, $r8=0x794fd44a65c65ebb, $r9=0xa0391c3fa3cf1e5c ++mulh.du $r30, $r16, $r7 :: ++before: $r30=0x3df3f3b3ff17f61a, $r16=0xcadd1f7e7150ad7b, $r7=0xbdc63d3f762cf02d ++after: $r30=0x966257cfb0059e70, $r16=0xcadd1f7e7150ad7b, $r7=0xbdc63d3f762cf02d ++mulh.du $r6, $r10, $r19 :: ++before: $r6=0x6601e05fc5f801cb, $r10=0xbc10a70104969251, $r19=0x2f50a00036fb7821 ++after: $r6=0x22c24967f0edf696, $r10=0xbc10a70104969251, $r19=0x2f50a00036fb7821 ++mulh.du $r17, $r9, $r5 :: ++before: $r17=0xffabc0cbdc8aa7b0, $r9=0x5288bc60da558afb, $r5=0x2795644a58b2668f ++after: $r17=0x0cc2fe9dc756ea23, $r9=0x5288bc60da558afb, $r5=0x2795644a58b2668f ++mulh.du $r26, $r8, $r15 :: ++before: $r26=0x68b64c997f561b59, $r8=0xe2ed2375e64b1bf3, $r15=0xe1033e583092ad96 ++after: $r26=0xc7754c35a4f2f082, $r8=0xe2ed2375e64b1bf3, $r15=0xe1033e583092ad96 ++mulh.du $r10, $r13, $r30 :: ++before: $r10=0x6450ec488eb4753b, $r13=0x4287b82860366cf8, $r30=0x01c15ed3f051fe8c ++after: $r10=0x0074c8aee8c0e7ce, $r13=0x4287b82860366cf8, $r30=0x01c15ed3f051fe8c ++mulh.du $r24, $r13, $r15 :: ++before: $r24=0x1169fa9dd6f8273d, $r13=0x6fd2cdb39e5d1fa3, $r15=0xff0526e206880684 ++after: $r24=0x6f653afff4bd7810, $r13=0x6fd2cdb39e5d1fa3, $r15=0xff0526e206880684 ++mulh.du $r8, $r9, $r10 :: ++before: $r8=0xe9cb6416a1492fbf, $r9=0xaf89960e18913df0, $r10=0x76b4251409ff9830 ++after: $r8=0x5164f154a1871400, $r9=0xaf89960e18913df0, $r10=0x76b4251409ff9830 ++mulw.d.w $r6, $r31, $r7 :: ++before: $r6=0x50ce021eb3b3f3a4, $r31=0xb859e7514e4c4d7c, $r7=0x372cb1e2b3200f36 ++after: $r6=0xe87cdae260229c28, $r31=0xb859e7514e4c4d7c, $r7=0x372cb1e2b3200f36 ++mulw.d.w $r31, $r7, $r28 :: ++before: $r31=0x925642fa7e2de9ab, $r7=0x61404b6550238ceb, $r28=0x75ed502242ed0430 ++after: $r31=0x14f35c8da06d1810, $r7=0x61404b6550238ceb, $r28=0x75ed502242ed0430 ++mulw.d.w $r19, $r16, $r10 :: ++before: $r19=0x0ef82de697f7239f, $r16=0xdf1c56dfe5c0e48d, $r10=0xbc7e740fe1b1dc25 ++after: $r19=0x031b681eebc73461, $r16=0xdf1c56dfe5c0e48d, $r10=0xbc7e740fe1b1dc25 ++mulw.d.w $r29, $r12, $r27 :: ++before: $r29=0xc104a400fa0d1dbf, $r12=0x2aa34e8a5fad6c6f, $r27=0x7f8e4d23644b0d4d ++after: $r29=0x257bcb22b6304063, $r12=0x2aa34e8a5fad6c6f, $r27=0x7f8e4d23644b0d4d ++mulw.d.w $r25, $r16, $r25 :: ++before: $r25=0x5b8ff9172c849fb9, $r16=0x843f90380af6f2af, $r25=0x12f7f8780cb8bfe0 ++after: $r25=0x008b7d1678ecea20, $r16=0x843f90380af6f2af, $r25=0x008b7d1678ecea20 ++mulw.d.w $r13, $r13, $r7 :: ++before: $r13=0x6bba79a88056d891, $r13=0x6757a43d403285ab, $r7=0x2d2ea385888c2664 ++after: $r13=0xe20b7699851798cc, $r13=0xe20b7699851798cc, $r7=0x2d2ea385888c2664 ++mulw.d.w $r12, $r8, $r23 :: ++before: $r12=0x5c96927dcf1fb14e, $r8=0x2b3767b9e9029d4b, $r23=0x252bbcc66b5d834b ++after: $r12=0xf65bb1e7178075f9, $r8=0x2b3767b9e9029d4b, $r23=0x252bbcc66b5d834b ++mulw.d.w $r6, $r13, $r10 :: ++before: $r6=0x5fa5a8b36e8ec3e0, $r13=0xcbca4b4d518b9466, $r10=0xabdf2ec674f70c5b ++after: $r6=0x2541f0d9edfc8842, $r13=0xcbca4b4d518b9466, $r10=0xabdf2ec674f70c5b ++mulw.d.w $r16, $r15, $r23 :: ++before: $r16=0x5b94eeb9c3c9fa01, $r15=0x5c4ebef486f83b43, $r23=0x73f3781c3a1e9216 ++after: $r16=0xe485c9734afb4dc2, $r15=0x5c4ebef486f83b43, $r23=0x73f3781c3a1e9216 ++mulw.d.w $r6, $r31, $r7 :: ++before: $r6=0xbc263312a123caed, $r31=0xe9aa8545d3a99a97, $r7=0x71b5dbacf4f7f2b8 ++after: $r6=0x01e91b5b89bada88, $r31=0xe9aa8545d3a99a97, $r7=0x71b5dbacf4f7f2b8 ++mulw.d.wu $r14, $r17, $r30 :: ++before: $r14=0x94452e0d7eb407b7, $r17=0x629b1902a484a77d, $r30=0x474359ca7f7165ed ++after: $r14=0x51e6af2596105fb9, $r17=0x629b1902a484a77d, $r30=0x474359ca7f7165ed ++mulw.d.wu $r26, $r7, $r5 :: ++before: $r26=0xae9771f0d59319b3, $r7=0x1bcb563dea8f3a3f, $r5=0x759334cc2d543103 ++after: $r26=0x29885124597fbdbd, $r7=0x1bcb563dea8f3a3f, $r5=0x759334cc2d543103 ++mulw.d.wu $r25, $r28, $r27 :: ++before: $r25=0x27ca0bf2d6cd2699, $r28=0x5a015da9b52ffc64, $r27=0x482a4fa5b5625914 ++after: $r25=0x806088dd28c67bd0, $r28=0x5a015da9b52ffc64, $r27=0x482a4fa5b5625914 ++mulw.d.wu $r8, $r4, $r16 :: ++before: $r8=0x22f61239dad7bc92, $r4=0xe8c9964b31b0e199, $r16=0x99fdef421aa22322 ++after: $r8=0x052b6faa1527e152, $r4=0xe8c9964b31b0e199, $r16=0x99fdef421aa22322 ++mulw.d.wu $r29, $r17, $r15 :: ++before: $r29=0xcc5eec6e4f2b5fdb, $r17=0x2d08ada074c2ac37, $r15=0x8967ce1cd4c2362e ++after: $r29=0x6109cada18fc8be2, $r17=0x2d08ada074c2ac37, $r15=0x8967ce1cd4c2362e ++mulw.d.wu $r27, $r23, $r16 :: ++before: $r27=0x2d057e2ead214d6c, $r23=0x987e7a10a0f3ee5d, $r16=0xd515e2a2f06be633 ++after: $r27=0x97288627099f0a87, $r23=0x987e7a10a0f3ee5d, $r16=0xd515e2a2f06be633 ++mulw.d.wu $r15, $r19, $r12 :: ++before: $r15=0xce24943d6fe20263, $r19=0xd6bbdcb20d76de15, $r12=0xcc277905bc41da62 ++after: $r15=0x09e6c1c22ff3e60a, $r19=0xd6bbdcb20d76de15, $r12=0xcc277905bc41da62 ++mulw.d.wu $r4, $r4, $r19 :: ++before: $r4=0xe37942a26dc0e882, $r4=0x6a30fb04c3b5431f, $r19=0x4c937bed67cb6c73 ++after: $r4=0x4f5971a615533aed, $r4=0x4f5971a615533aed, $r19=0x4c937bed67cb6c73 ++mulw.d.wu $r7, $r12, $r9 :: ++before: $r7=0xbdebe7a7b19b7dc0, $r12=0x3f6e790fb24d19f1, $r9=0x7a19c4fdd0d29f3e ++after: $r7=0x9171573c297af75e, $r12=0x3f6e790fb24d19f1, $r9=0x7a19c4fdd0d29f3e ++mulw.d.wu $r31, $r30, $r28 :: ++before: $r31=0x690687056e169108, $r30=0xa8abab5bf1d42538, $r28=0x0636a31884ca1e99 ++after: $r31=0x7d70517da256ce78, $r30=0xa8abab5bf1d42538, $r28=0x0636a31884ca1e99 ++div.w $r13, $r28, $r23 :: ++before: $r13=0x0000000016546290, $r28=0x00000000627aa138, $r23=0x000000000534168c ++after: $r13=0x0000000000000012, $r28=0x00000000627aa138, $r23=0x000000000534168c ++div.w $r28, $r19, $r9 :: ++before: $r28=0xffffffffbe03930d, $r19=0x00000000223d0ec7, $r9=0xffffffff8404aa67 ++after: $r28=000000000000000000, $r19=0x00000000223d0ec7, $r9=0xffffffff8404aa67 ++div.w $r18, $r19, $r30 :: ++before: $r18=0xffffffffac214649, $r19=0xffffffff8019c3b7, $r30=0xffffffff871cbf90 ++after: $r18=0x0000000000000001, $r19=0xffffffff8019c3b7, $r30=0xffffffff871cbf90 ++div.w $r24, $r25, $r7 :: ++before: $r24=0xffffffffa144ed80, $r25=0x000000001c4370c7, $r7=0x000000004695aa29 ++after: $r24=000000000000000000, $r25=0x000000001c4370c7, $r7=0x000000004695aa29 ++div.w $r9, $r27, $r4 :: ++before: $r9=0x000000003ae8b7c7, $r27=0xfffffffff3a6ebb2, $r4=0x00000000181d816a ++after: $r9=000000000000000000, $r27=0xfffffffff3a6ebb2, $r4=0x00000000181d816a ++div.w $r28, $r15, $r7 :: ++before: $r28=0xffffffff956a7de4, $r15=0xffffffff9aab217b, $r7=0x000000003b061b78 ++after: $r28=0xffffffffffffffff, $r15=0xffffffff9aab217b, $r7=0x000000003b061b78 ++div.w $r25, $r24, $r12 :: ++before: $r25=0x000000003c6167d4, $r24=0x000000002673145e, $r12=0x0000000001d5e391 ++after: $r25=0x0000000000000014, $r24=0x000000002673145e, $r12=0x0000000001d5e391 ++div.w $r23, $r15, $r4 :: ++before: $r23=0x000000003e0820ee, $r15=0x0000000042793c51, $r4=0x00000000286cdb51 ++after: $r23=0x0000000000000001, $r15=0x0000000042793c51, $r4=0x00000000286cdb51 ++div.w $r28, $r16, $r30 :: ++before: $r28=0xffffffffcf8fd242, $r16=0x000000002a76141e, $r30=0x0000000002429a52 ++after: $r28=0x0000000000000012, $r16=0x000000002a76141e, $r30=0x0000000002429a52 ++div.w $r29, $r8, $r18 :: ++before: $r29=0x0000000074991388, $r8=0xffffffffd594ef43, $r18=0x000000006d3f9603 ++after: $r29=000000000000000000, $r8=0xffffffffd594ef43, $r18=0x000000006d3f9603 ++mod.w $r8, $r13, $r14 :: ++before: $r8=0x000000005cc9e6db, $r13=0xfffffffff7327c6d, $r14=0x0000000023eef833 ++after: $r8=0xfffffffff7327c6d, $r13=0xfffffffff7327c6d, $r14=0x0000000023eef833 ++mod.w $r25, $r24, $r25 :: ++before: $r25=0x00000000539195e4, $r24=0xffffffffd94f10c8, $r25=0x000000002c5786d9 ++after: $r25=0xffffffffd94f10c8, $r24=0xffffffffd94f10c8, $r25=0xffffffffd94f10c8 ++mod.w $r10, $r16, $r23 :: ++before: $r10=0xffffffff9b15f725, $r16=0x00000000448a831d, $r23=0xffffffffd5d7d92b ++after: $r10=0x000000001a625c48, $r16=0x00000000448a831d, $r23=0xffffffffd5d7d92b ++mod.w $r6, $r5, $r29 :: ++before: $r6=0x000000001794d969, $r5=0x000000002fba86b0, $r29=0x0000000040e6ab6b ++after: $r6=0x000000002fba86b0, $r5=0x000000002fba86b0, $r29=0x0000000040e6ab6b ++mod.w $r16, $r14, $r29 :: ++before: $r16=0x000000006a503328, $r14=0xffffffffdf0b2ad2, $r29=0xffffffff90dc29c6 ++after: $r16=0xffffffffdf0b2ad2, $r14=0xffffffffdf0b2ad2, $r29=0xffffffff90dc29c6 ++mod.w $r30, $r14, $r18 :: ++before: $r30=0xffffffffc7670acd, $r14=0x0000000053f3b34f, $r18=0xffffffff84b62159 ++after: $r30=0x0000000053f3b34f, $r14=0x0000000053f3b34f, $r18=0xffffffff84b62159 ++mod.w $r31, $r6, $r18 :: ++before: $r31=0xffffffff98334c95, $r6=0xfffffffff241ffd8, $r18=0xffffffffa73314aa ++after: $r31=0xfffffffff241ffd8, $r6=0xfffffffff241ffd8, $r18=0xffffffffa73314aa ++mod.w $r12, $r8, $r4 :: ++before: $r12=0xffffffffd9f19db4, $r8=0xffffffffc89f9796, $r4=0xffffffffaa8e2a3b ++after: $r12=0xffffffffc89f9796, $r8=0xffffffffc89f9796, $r4=0xffffffffaa8e2a3b ++mod.w $r23, $r12, $r4 :: ++before: $r23=0xffffffff94e93220, $r12=0xfffffffffea1587a, $r4=0xffffffffb88b2b87 ++after: $r23=0xfffffffffea1587a, $r12=0xfffffffffea1587a, $r4=0xffffffffb88b2b87 ++mod.w $r13, $r9, $r18 :: ++before: $r13=0x0000000000f718c0, $r9=0xffffffffe264a3a5, $r18=0x0000000002f29ef3 ++after: $r13=0xffffffffffded923, $r9=0xffffffffe264a3a5, $r18=0x0000000002f29ef3 ++div.wu $r24, $r5, $r16 :: ++before: $r24=0x000000000ddf57c5, $r5=0x000000006b1a808c, $r16=0x000000000576fe70 ++after: $r24=0x0000000000000013, $r5=0x000000006b1a808c, $r16=0x000000000576fe70 ++div.wu $r26, $r7, $r9 :: ++before: $r26=0x00000000665e82ff, $r7=0x00000000344d887f, $r9=0x000000007fd6d6d8 ++after: $r26=000000000000000000, $r7=0x00000000344d887f, $r9=0x000000007fd6d6d8 ++div.wu $r13, $r18, $r15 :: ++before: $r13=0xffffffffe82e2cf8, $r18=0x000000007c66b628, $r15=0x000000000305c899 ++after: $r13=0x0000000000000029, $r18=0x000000007c66b628, $r15=0x000000000305c899 ++div.wu $r15, $r14, $r7 :: ++before: $r15=0x0000000000b06b1f, $r14=0x0000000056016282, $r7=0x00000000095a8701 ++after: $r15=0x0000000000000009, $r14=0x0000000056016282, $r7=0x00000000095a8701 ++div.wu $r19, $r12, $r31 :: ++before: $r19=0xffffffffb3a487d1, $r12=0xffffffffbe2fe16e, $r31=0xffffffff8dc0ff7f ++after: $r19=0x0000000000000001, $r12=0xffffffffbe2fe16e, $r31=0xffffffff8dc0ff7f ++div.wu $r6, $r10, $r20 :: ++before: $r6=0x000000001bb491e9, $r10=0x00000000064e382e, $r20=0x000000005977f9f1 ++after: $r6=000000000000000000, $r10=0x00000000064e382e, $r20=0x000000005977f9f1 ++div.wu $r9, $r29, $r28 :: ++before: $r9=0x00000000498c3349, $r29=0x0000000014cbb257, $r28=0xffffffff95165a4a ++after: $r9=000000000000000000, $r29=0x0000000014cbb257, $r28=0xffffffff95165a4a ++div.wu $r10, $r29, $r15 :: ++before: $r10=0xffffffffbb3f9c5d, $r29=0x000000002755057d, $r15=0x0000000014039cc4 ++after: $r10=0x0000000000000001, $r29=0x000000002755057d, $r15=0x0000000014039cc4 ++div.wu $r24, $r31, $r7 :: ++before: $r24=0xffffffffe5a9a3cd, $r31=0xffffffffa1f84b49, $r7=0xffffffffe45bd3b9 ++after: $r24=000000000000000000, $r31=0xffffffffa1f84b49, $r7=0xffffffffe45bd3b9 ++div.wu $r23, $r18, $r6 :: ++before: $r23=0x0000000054e07e9f, $r18=0xffffffffaccbdd8c, $r6=0xfffffffff3729b57 ++after: $r23=000000000000000000, $r18=0xffffffffaccbdd8c, $r6=0xfffffffff3729b57 ++mod.wu $r5, $r20, $r18 :: ++before: $r5=0xffffffffa1ce2e4e, $r20=0xffffffffdbeb0e2d, $r18=0x0000000070157135 ++after: $r5=0x000000006bd59cf8, $r20=0xffffffffdbeb0e2d, $r18=0x0000000070157135 ++mod.wu $r14, $r30, $r17 :: ++before: $r14=0x0000000010e75d07, $r30=0x00000000039c3080, $r17=0x000000001658d87b ++after: $r14=0x00000000039c3080, $r30=0x00000000039c3080, $r17=0x000000001658d87b ++mod.wu $r28, $r7, $r4 :: ++before: $r28=0x000000006df194db, $r7=0x0000000055fae7c9, $r4=0xffffffff9a87c1ef ++after: $r28=0x0000000055fae7c9, $r7=0x0000000055fae7c9, $r4=0xffffffff9a87c1ef ++mod.wu $r6, $r14, $r10 :: ++before: $r6=0xffffffff8feb78cc, $r14=0xffffffffe5032316, $r10=0x0000000018ab441e ++after: $r6=0x0000000006fdbe08, $r14=0xffffffffe5032316, $r10=0x0000000018ab441e ++mod.wu $r13, $r15, $r9 :: ++before: $r13=0xffffffffbb28952c, $r15=0x000000002d43f57d, $r9=0x000000002dfbf584 ++after: $r13=0x000000002d43f57d, $r15=0x000000002d43f57d, $r9=0x000000002dfbf584 ++mod.wu $r7, $r30, $r5 :: ++before: $r7=0x0000000009bfb2cf, $r30=0x000000006595d7b3, $r5=0xfffffffffffd1025 ++after: $r7=0x000000006595d7b3, $r30=0x000000006595d7b3, $r5=0xfffffffffffd1025 ++mod.wu $r10, $r9, $r16 :: ++before: $r10=0x00000000342671c6, $r9=0xfffffffff1ff8be3, $r16=0xfffffffffaea052b ++after: $r10=0xfffffffff1ff8be3, $r9=0xfffffffff1ff8be3, $r16=0xfffffffffaea052b ++mod.wu $r16, $r16, $r23 :: ++before: $r16=0xffffffffc0356055, $r16=0x000000002ac1f414, $r23=0x000000004a75c890 ++after: $r16=0x000000002ac1f414, $r16=0x000000002ac1f414, $r23=0x000000004a75c890 ++mod.wu $r19, $r8, $r7 :: ++before: $r19=0xfffffffff8ed6580, $r8=0x000000005fef460e, $r7=0x0000000068eedef2 ++after: $r19=0x000000005fef460e, $r8=0x000000005fef460e, $r7=0x0000000068eedef2 ++mod.wu $r29, $r25, $r25 :: ++before: $r29=0xffffffff9ea76eb0, $r25=0xffffffff818904b9, $r25=0xffffffffe92f4f30 ++after: $r29=000000000000000000, $r25=0xffffffffe92f4f30, $r25=0xffffffffe92f4f30 ++div.d $r7, $r17, $r7 :: ++before: $r7=0xc8f25fb958f2d668, $r17=0x074a14cbaa00fdea, $r7=0xcf95f3de82ceb015 ++after: $r7=000000000000000000, $r17=0x074a14cbaa00fdea, $r7=000000000000000000 ++div.d $r10, $r19, $r12 :: ++before: $r10=0x9ead8a6f6ea63534, $r19=0xaf80d344d48e6cd5, $r12=0xe1f40f759cbfe0e7 ++after: $r10=0x0000000000000002, $r19=0xaf80d344d48e6cd5, $r12=0xe1f40f759cbfe0e7 ++div.d $r23, $r28, $r28 :: ++before: $r23=0x35481a5285093e04, $r28=0xfd79e3c19b697fa8, $r28=0x6ffab603b9e1b7fb ++after: $r23=0x0000000000000001, $r28=0x6ffab603b9e1b7fb, $r28=0x6ffab603b9e1b7fb ++div.d $r30, $r25, $r4 :: ++before: $r30=0x3eacf1d695a34b95, $r25=0xfbff957ab051d494, $r4=0x0670724b8930d53f ++after: $r30=000000000000000000, $r25=0xfbff957ab051d494, $r4=0x0670724b8930d53f ++div.d $r31, $r29, $r6 :: ++before: $r31=0xce8d3df48871d655, $r29=0xf351f7f35927e83d, $r6=0x93a3085686f4101f ++after: $r31=000000000000000000, $r29=0xf351f7f35927e83d, $r6=0x93a3085686f4101f ++div.d $r17, $r23, $r8 :: ++before: $r17=0xfc913f8b14dda5a5, $r23=0x001f938af81988de, $r8=0x9d021a9f06b46953 ++after: $r17=000000000000000000, $r23=0x001f938af81988de, $r8=0x9d021a9f06b46953 ++div.d $r7, $r29, $r15 :: ++before: $r7=0x4593da2923f2ac5b, $r29=0x11fc5a958b182a55, $r15=0x2edafaf2857c6697 ++after: $r7=000000000000000000, $r29=0x11fc5a958b182a55, $r15=0x2edafaf2857c6697 ++div.d $r13, $r31, $r27 :: ++before: $r13=0x97236145608dd8c3, $r31=0x1f0ee96afd23910b, $r27=0xe35e4d5efd2204d3 ++after: $r13=0xffffffffffffffff, $r31=0x1f0ee96afd23910b, $r27=0xe35e4d5efd2204d3 ++div.d $r13, $r26, $r14 :: ++before: $r13=0x2c057bd222f216df, $r26=0x1e006853720971c3, $r14=0x81e35a993e6a15b5 ++after: $r13=000000000000000000, $r26=0x1e006853720971c3, $r14=0x81e35a993e6a15b5 ++div.d $r5, $r9, $r4 :: ++before: $r5=0x93c0d85c66f2c5ab, $r9=0x774fbe894b2ed067, $r4=0x2c46387d55732742 ++after: $r5=0x0000000000000002, $r9=0x774fbe894b2ed067, $r4=0x2c46387d55732742 ++mod.d $r19, $r26, $r16 :: ++before: $r19=0x63304d2181f4a4da, $r26=0x9ed948849ddee475, $r16=0x18a360d3ab980398 ++after: $r19=0xe8c36affa0a6ef3d, $r26=0x9ed948849ddee475, $r16=0x18a360d3ab980398 ++mod.d $r27, $r23, $r13 :: ++before: $r27=0xf7156e74db7a8d92, $r23=0x324e7001287ce2a8, $r13=0x3cc7524686bed31c ++after: $r27=0x324e7001287ce2a8, $r23=0x324e7001287ce2a8, $r13=0x3cc7524686bed31c ++mod.d $r8, $r26, $r19 :: ++before: $r8=0x7bda37a222135803, $r26=0x1daf8fd66ff987ed, $r19=0x334631279104fc3b ++after: $r8=0x1daf8fd66ff987ed, $r26=0x1daf8fd66ff987ed, $r19=0x334631279104fc3b ++mod.d $r25, $r15, $r7 :: ++before: $r25=0xd1a0f45d5b463d53, $r15=0x9c4cd7bef3bf0712, $r7=0x420a5c702006f3cc ++after: $r25=0xde57342f13c5fade, $r15=0x9c4cd7bef3bf0712, $r7=0x420a5c702006f3cc ++mod.d $r25, $r18, $r7 :: ++before: $r25=0x93487a905cb08a75, $r18=0x8c79cafa8bebf0a8, $r7=0x1478409d192c144b ++after: $r25=0xf2d30e0c09c8561f, $r18=0x8c79cafa8bebf0a8, $r7=0x1478409d192c144b ++mod.d $r8, $r27, $r27 :: ++before: $r8=0x8756a1690dd7896d, $r27=0x35273279ea76319f, $r27=0xc5292f2331abc6dd ++after: $r8=000000000000000000, $r27=0xc5292f2331abc6dd, $r27=0xc5292f2331abc6dd ++mod.d $r15, $r10, $r24 :: ++before: $r15=0xf8c476adbc930802, $r10=0x8b5832bcd0f6c87e, $r24=0x6cba54a72da38702 ++after: $r15=0xf8128763fe9a4f80, $r10=0x8b5832bcd0f6c87e, $r24=0x6cba54a72da38702 ++mod.d $r27, $r7, $r6 :: ++before: $r27=0x2387015bddb2c076, $r7=0x231e30de7a72ad90, $r6=0x81f1285973e8dc11 ++after: $r27=0x231e30de7a72ad90, $r7=0x231e30de7a72ad90, $r6=0x81f1285973e8dc11 ++mod.d $r16, $r9, $r12 :: ++before: $r16=0x3388d23c07feb1da, $r9=0xe8c01f744b310474, $r12=0xa29071d702959009 ++after: $r16=0xe8c01f744b310474, $r9=0xe8c01f744b310474, $r12=0xa29071d702959009 ++mod.d $r13, $r10, $r20 :: ++before: $r13=0xbd45a261f8de4fe4, $r10=0x6fb0a8c9a2681a8e, $r20=0x2f1b7055cf2409ec ++after: $r13=0x1179c81e042006b6, $r10=0x6fb0a8c9a2681a8e, $r20=0x2f1b7055cf2409ec ++div.du $r17, $r10, $r24 :: ++before: $r17=0x4d363fd48a626fda, $r10=0x7ccdeeaa6c24885f, $r24=0xfcc68e72f59750ae ++after: $r17=000000000000000000, $r10=0x7ccdeeaa6c24885f, $r24=0xfcc68e72f59750ae ++div.du $r20, $r20, $r10 :: ++before: $r20=0x808fa5cb6a75fd6f, $r20=0x0f3f712970031005, $r10=0x1709a8adab2fa578 ++after: $r20=000000000000000000, $r20=000000000000000000, $r10=0x1709a8adab2fa578 ++div.du $r15, $r14, $r19 :: ++before: $r15=0xcd3107423486c8fe, $r14=0xf6bc56277282cd14, $r19=0x0961ac833f00f3e3 ++after: $r15=0x000000000000001a, $r14=0xf6bc56277282cd14, $r19=0x0961ac833f00f3e3 ++div.du $r4, $r29, $r18 :: ++before: $r4=0xa0bfc2fc5b35fa79, $r29=0x2b28c09aa5f12845, $r18=0xed44da2fdf5dce00 ++after: $r4=000000000000000000, $r29=0x2b28c09aa5f12845, $r18=0xed44da2fdf5dce00 ++div.du $r4, $r6, $r25 :: ++before: $r4=0x1fc6e23fd0f09ed0, $r6=0xeaa71d9fb42223ca, $r25=0x045689545e60381c ++after: $r4=0x0000000000000036, $r6=0xeaa71d9fb42223ca, $r25=0x045689545e60381c ++div.du $r10, $r8, $r12 :: ++before: $r10=0xa3710c512d4c006c, $r8=0xc011778733c50a6e, $r12=0xb44475ee048d8167 ++after: $r10=0x0000000000000001, $r8=0xc011778733c50a6e, $r12=0xb44475ee048d8167 ++div.du $r29, $r4, $r29 :: ++before: $r29=0x46d27abff0da1972, $r4=0x17a4e863a182dcd0, $r29=0x59a7b82980ac6a6d ++after: $r29=000000000000000000, $r4=0x17a4e863a182dcd0, $r29=000000000000000000 ++div.du $r15, $r8, $r30 :: ++before: $r15=0x68120919dbbd9b19, $r8=0x4c296c89a6f7a6df, $r30=0x9d9166c1cd0eecfa ++after: $r15=000000000000000000, $r8=0x4c296c89a6f7a6df, $r30=0x9d9166c1cd0eecfa ++div.du $r7, $r18, $r17 :: ++before: $r7=0xd2389cb7af92be89, $r18=0x9a1f65b2c59cfda3, $r17=0xe316cf92f8f0574f ++after: $r7=000000000000000000, $r18=0x9a1f65b2c59cfda3, $r17=0xe316cf92f8f0574f ++div.du $r15, $r25, $r17 :: ++before: $r15=0x49651d72d87da955, $r25=0xd22c499c27908743, $r17=0x08d824b01058ecb8 ++after: $r15=0x0000000000000017, $r25=0xd22c499c27908743, $r17=0x08d824b01058ecb8 ++mod.du $r26, $r8, $r23 :: ++before: $r26=0xb0bd66f10c34fe23, $r8=0x5eb9b775d83b4893, $r23=0x08867d4b638f2622 ++after: $r26=0x00f255389114a51d, $r8=0x5eb9b775d83b4893, $r23=0x08867d4b638f2622 ++mod.du $r8, $r10, $r25 :: ++before: $r8=0xe236349cd47eeb11, $r10=0x119102fd7b236a81, $r25=0x08fd72a09e4fb45f ++after: $r8=0x0893905cdcd3b622, $r10=0x119102fd7b236a81, $r25=0x08fd72a09e4fb45f ++mod.du $r25, $r4, $r5 :: ++before: $r25=0x1b669725a0c3a970, $r4=0x0175359099c87b83, $r5=0xcad295c79f1d835a ++after: $r25=0x0175359099c87b83, $r4=0x0175359099c87b83, $r5=0xcad295c79f1d835a ++mod.du $r7, $r28, $r20 :: ++before: $r7=0x7117e70798869df4, $r28=0xe35b93aa0c37fe97, $r20=0x741084dead7970d0 ++after: $r7=0x6f4b0ecb5ebe8dc7, $r28=0xe35b93aa0c37fe97, $r20=0x741084dead7970d0 ++mod.du $r30, $r24, $r9 :: ++before: $r30=0xc4d432a8ce91f693, $r24=0x77c03aceb2ea6b45, $r9=0xb8cd7773fb72b7ca ++after: $r30=0x77c03aceb2ea6b45, $r24=0x77c03aceb2ea6b45, $r9=0xb8cd7773fb72b7ca ++mod.du $r23, $r9, $r28 :: ++before: $r23=0x13f1f3e1891b6b73, $r9=0x9811699becce53a9, $r28=0xed15e264f0c39b88 ++after: $r23=0x9811699becce53a9, $r9=0x9811699becce53a9, $r28=0xed15e264f0c39b88 ++mod.du $r13, $r12, $r14 :: ++before: $r13=0xb8b22bcb0cb970e8, $r12=0x16cdecd7c0091cd2, $r14=0x4fcab819ebadbdfd ++after: $r13=0x16cdecd7c0091cd2, $r12=0x16cdecd7c0091cd2, $r14=0x4fcab819ebadbdfd ++mod.du $r30, $r17, $r12 :: ++before: $r30=0xbf96226d2de1240d, $r17=0x9fe4b2c7557d6b9a, $r12=0x3668e581a5de6efd ++after: $r30=0x3312e7c409c08da0, $r17=0x9fe4b2c7557d6b9a, $r12=0x3668e581a5de6efd ++mod.du $r14, $r4, $r6 :: ++before: $r14=0x9bc8f8a69a7f55c2, $r4=0x530a9c5a21769bab, $r6=0x2805bef72d33cbd5 ++after: $r14=0x02ff1e6bc70f0401, $r4=0x530a9c5a21769bab, $r6=0x2805bef72d33cbd5 ++mod.du $r23, $r28, $r12 :: ++before: $r23=0x82a854f86e642cba, $r28=0x0dd0fd63485d6c3d, $r12=0x56b21f15cb9d2bf2 ++after: $r23=0x0dd0fd63485d6c3d, $r28=0x0dd0fd63485d6c3d, $r12=0x56b21f15cb9d2bf2 ++alsl.w $r18, $r10, $r15, 2 :: ++before: $r18=0xafb40df16156827b, $r10=0x9b0b86116a0d89cb, $r15=0x80086c066ea6842b ++after: $r18=0x0000000016dcab57, $r10=0x9b0b86116a0d89cb, $r15=0x80086c066ea6842b ++alsl.w $r24, $r5, $r4, 2 :: ++before: $r24=0xb8b63b8205a919df, $r5=0x7319260322fa2d6d, $r4=0x1efce6644a51ebf9 ++after: $r24=0xffffffffd63aa1ad, $r5=0x7319260322fa2d6d, $r4=0x1efce6644a51ebf9 ++alsl.w $r24, $r5, $r27, 2 :: ++before: $r24=0xb4f0fd355869e078, $r5=0x26abeea20b7d1ac1, $r27=0x4108f7f27e321c8f ++after: $r24=0xffffffffac268793, $r5=0x26abeea20b7d1ac1, $r27=0x4108f7f27e321c8f ++alsl.w $r24, $r29, $r10, 1 :: ++before: $r24=0x4b948e9a0b82df22, $r29=0x11893c9dd43d0112, $r10=0x51a030165671a055 ++after: $r24=0xfffffffffeeba279, $r29=0x11893c9dd43d0112, $r10=0x51a030165671a055 ++alsl.w $r5, $r10, $r18, 1 :: ++before: $r5=0xfc253ac9e2b55590, $r10=0x2682507563a85b07, $r18=0xa467083f66457d1d ++after: $r5=0x000000002d96332b, $r10=0x2682507563a85b07, $r18=0xa467083f66457d1d ++alsl.w $r20, $r13, $r10, 3 :: ++before: $r20=0x76e8c346a721cdab, $r13=0x548f2762bfb1bc01, $r10=0xa6e0d27e62dcc594 ++after: $r20=0x00000000606aa59c, $r13=0x548f2762bfb1bc01, $r10=0xa6e0d27e62dcc594 ++alsl.w $r16, $r6, $r24, 3 :: ++before: $r16=0x039f77b88fc3b663, $r6=0x281818bf4a36a7e5, $r24=0x86cd2a06ef475a61 ++after: $r16=0x0000000040fc9989, $r6=0x281818bf4a36a7e5, $r24=0x86cd2a06ef475a61 ++alsl.w $r14, $r18, $r9, 4 :: ++before: $r14=0x08a58ea94346ff16, $r18=0x4ff191f91397adea, $r9=0x4cda359b03c97a53 ++after: $r14=0x000000003d4458f3, $r18=0x4ff191f91397adea, $r9=0x4cda359b03c97a53 ++alsl.w $r8, $r6, $r29, 1 :: ++before: $r8=0xae0bfa182556c725, $r6=0xda179bc2f41d03d3, $r29=0x1d23e4da08af7978 ++after: $r8=0xfffffffff0e9811e, $r6=0xda179bc2f41d03d3, $r29=0x1d23e4da08af7978 ++alsl.w $r31, $r26, $r30, 1 :: ++before: $r31=0xd6af9fcd7ffd8e75, $r26=0x3e88bb77d6665633, $r30=0x23a0414c69b804c1 ++after: $r31=0x000000001684b127, $r26=0x3e88bb77d6665633, $r30=0x23a0414c69b804c1 ++alsl.wu $r20, $r24, $r18, 2 :: ++before: $r20=0xc714872ff3c39370, $r24=0xcaea31ddabb275f9, $r18=0xedbfc2cedca8eb7a ++after: $r20=0x000000008b72c35e, $r24=0xcaea31ddabb275f9, $r18=0xedbfc2cedca8eb7a ++alsl.wu $r13, $r26, $r15, 3 :: ++before: $r13=0xe1a0ba1adcb75aa4, $r26=0x8adbed432acf321a, $r15=0xeae447eaa60bb142 ++after: $r13=0x00000000fc854212, $r26=0x8adbed432acf321a, $r15=0xeae447eaa60bb142 ++alsl.wu $r4, $r17, $r27, 3 :: ++before: $r4=0xb153f9ecea23068c, $r17=0xd2066b089c9499a3, $r27=0x36ed3c96ac4751aa ++after: $r4=0x0000000090ec1ec2, $r17=0xd2066b089c9499a3, $r27=0x36ed3c96ac4751aa ++alsl.wu $r20, $r10, $r4, 4 :: ++before: $r20=0x8fb2705357e98d66, $r10=0xd353329585fc71dd, $r4=0x739237ed6a677f00 ++after: $r20=0x00000000ca2e9cd0, $r10=0xd353329585fc71dd, $r4=0x739237ed6a677f00 ++alsl.wu $r31, $r12, $r23, 2 :: ++before: $r31=0x6caac60acd9bc6f4, $r12=0xc87131b9171530df, $r23=0x39c8e321a6e131c0 ++after: $r31=0x000000000335f53c, $r12=0xc87131b9171530df, $r23=0x39c8e321a6e131c0 ++alsl.wu $r13, $r14, $r19, 2 :: ++before: $r13=0xd2c7072036f54e45, $r14=0x35ea1627556f8f98, $r19=0x97054728433042d3 ++after: $r13=0x0000000098ee8133, $r14=0x35ea1627556f8f98, $r19=0x97054728433042d3 ++alsl.wu $r7, $r14, $r5, 1 :: ++before: $r7=0x5a0f1fae80105d64, $r14=0xd300b74879e33a53, $r5=0x3a1e7389d0669d4c ++after: $r7=0x00000000c42d11f2, $r14=0xd300b74879e33a53, $r5=0x3a1e7389d0669d4c ++alsl.wu $r28, $r4, $r9, 1 :: ++before: $r28=0xcd7fd8389b4f4062, $r4=0xad1830d644c205e7, $r9=0xced1c031d73f9087 ++after: $r28=0x0000000060c39c55, $r4=0xad1830d644c205e7, $r9=0xced1c031d73f9087 ++alsl.wu $r13, $r9, $r29, 4 :: ++before: $r13=0x081601560f53b081, $r9=0xd3ee3c45f08cd218, $r29=0xa7d5a43a1df2aa1d ++after: $r13=0x0000000026bfcb9d, $r9=0xd3ee3c45f08cd218, $r29=0xa7d5a43a1df2aa1d ++alsl.wu $r30, $r29, $r31, 2 :: ++before: $r30=0xf383bd5bfae7e46d, $r29=0x67862a0151c65567, $r31=0x9cdcbf604f46c48a ++after: $r30=0x0000000096601a26, $r29=0x67862a0151c65567, $r31=0x9cdcbf604f46c48a ++alsl.d $r18, $r28, $r16, 4 :: ++before: $r18=0x53e533e973dfa49c, $r28=0x6665a9d32abaaf55, $r16=0xf70490874fb75e6e ++after: $r18=0x5d5f2db9fb6253be, $r28=0x6665a9d32abaaf55, $r16=0xf70490874fb75e6e ++alsl.d $r10, $r30, $r18, 2 :: ++before: $r10=0xfb14c3e6acd722c3, $r30=0xcae19862ab088fcc, $r18=0x87c434d85259d923 ++after: $r10=0xb34a9662fe7c1853, $r30=0xcae19862ab088fcc, $r18=0x87c434d85259d923 ++alsl.d $r17, $r25, $r26, 1 :: ++before: $r17=0x95e79a567c313ec7, $r25=0x83a0e706c2c4c534, $r26=0x2f49f1e9d5b91fc9 ++after: $r17=0x368bbff75b42aa31, $r25=0x83a0e706c2c4c534, $r26=0x2f49f1e9d5b91fc9 ++alsl.d $r7, $r24, $r24, 2 :: ++before: $r7=0x35b966d0db9f681c, $r24=0xc0bc97593f1054fc, $r24=0x7e564928b0a53ac6 ++after: $r7=0x77af6dcb733a25de, $r24=0x7e564928b0a53ac6, $r24=0x7e564928b0a53ac6 ++alsl.d $r6, $r30, $r24, 3 :: ++before: $r6=0x38ad1fb21e071421, $r30=0xb959c439b0436d6d, $r24=0x647c742c9ce02fc5 ++after: $r6=0x2f4a95fa1efb9b2d, $r30=0xb959c439b0436d6d, $r24=0x647c742c9ce02fc5 ++alsl.d $r18, $r28, $r10, 2 :: ++before: $r18=0x1bde2962dc5bb68b, $r28=0x67c403d00c9389bd, $r10=0x8fc18921f225d05a ++after: $r18=0x2ed198622473f74e, $r28=0x67c403d00c9389bd, $r10=0x8fc18921f225d05a ++alsl.d $r8, $r27, $r15, 3 :: ++before: $r8=0x5b8de9d8b393fa06, $r27=0x393ec1c28e89e9d8, $r15=0x1a59f9d852c3f8ba ++after: $r8=0xe45007ecc713477a, $r27=0x393ec1c28e89e9d8, $r15=0x1a59f9d852c3f8ba ++alsl.d $r27, $r24, $r6, 4 :: ++before: $r27=0x72195c1ca51cc4db, $r24=0x4ee5b51e1e161ab2, $r6=0x08a10acb4b625fef ++after: $r27=0xf6fc5cad2cc40b0f, $r24=0x4ee5b51e1e161ab2, $r6=0x08a10acb4b625fef ++alsl.d $r29, $r4, $r18, 2 :: ++before: $r29=0xf3ed9e39d83d3dec, $r4=0xa3816509b9a6c23d, $r18=0x6949e8e534450dd5 ++after: $r29=0xf74f7d0c1ae016c9, $r4=0xa3816509b9a6c23d, $r18=0x6949e8e534450dd5 ++alsl.d $r16, $r13, $r8, 1 :: ++before: $r16=0x588f388f25a342df, $r13=0xde33a74109c7be30, $r8=0x8b02cf06997a065a ++after: $r16=0x476a1d88ad0982ba, $r13=0xde33a74109c7be30, $r8=0x8b02cf06997a065a ++lu12i.w $r9, 94146 :: ++before: $r9=0xdf45bd002ccf48e1 ++after: $r9=0x0000000016fc2000 ++lu12i.w $r10, 129014 :: ++before: $r10=0xa5138a37d09ada8a ++after: $r10=0x000000001f7f6000 ++lu12i.w $r18, -130138 :: ++before: $r18=0x0efe46a52b8b3e5e ++after: $r18=0xffffffffe03a6000 ++lu12i.w $r7, -467080 :: ++before: $r7=0x29084adf6d033a88 ++after: $r7=0xffffffff8df78000 ++lu12i.w $r10, 360675 :: ++before: $r10=0xe9072e7fec2a5d1c ++after: $r10=0x00000000580e3000 ++lu12i.w $r28, 205272 :: ++before: $r28=0x2f7d41c7bd959cd5 ++after: $r28=0x00000000321d8000 ++lu12i.w $r16, -266298 :: ++before: $r16=0xcb48200d89b48566 ++after: $r16=0xffffffffbefc6000 ++lu12i.w $r12, -186346 :: ++before: $r12=0xd605223c244f4a50 ++after: $r12=0xffffffffd2816000 ++lu12i.w $r15, 247864 :: ++before: $r15=0x22c035c8c90016be ++after: $r15=0x000000003c838000 ++lu12i.w $r20, -511005 :: ++before: $r20=0x6b2fd1aa0b603fec ++after: $r20=0xffffffff833e3000 ++lu32i.d $r8, -310956 :: ++before: $r8=0xb331616751ed8877 ++after: $r8=0xfffb415451ed8877 ++lu32i.d $r17, 35590 :: ++before: $r17=0x0e49bab8d80e1dd7 ++after: $r17=0x00008b06d80e1dd7 ++lu32i.d $r4, 500474 :: ++before: $r4=0x842cdc9ac0a0adf6 ++after: $r4=0x0007a2fac0a0adf6 ++lu32i.d $r23, -447277 :: ++before: $r23=0xc9ca69b8e5ab079e ++after: $r23=0xfff92cd3e5ab079e ++lu32i.d $r12, -503028 :: ++before: $r12=0x27d83e1c77dec50a ++after: $r12=0xfff8530c77dec50a ++lu32i.d $r26, -355708 :: ++before: $r26=0xc00dcc918a89f350 ++after: $r26=0xfffa92848a89f350 ++lu32i.d $r16, -231989 :: ++before: $r16=0xd180188cdc073491 ++after: $r16=0xfffc75cbdc073491 ++lu32i.d $r26, 250642 :: ++before: $r26=0x4efae034432bbb3b ++after: $r26=0x0003d312432bbb3b ++lu32i.d $r15, 237105 :: ++before: $r15=0x7bf2141e673e336f ++after: $r15=0x00039e31673e336f ++lu32i.d $r4, -312071 :: ++before: $r4=0x187c50bfc5eb8f32 ++after: $r4=0xfffb3cf9c5eb8f32 ++lu52i.d $r8, $r25, 1920 :: ++before: $r8=0x1da74dfcb33d471a, $r25=0x453ae9f1200f4d41 ++after: $r8=0x780ae9f1200f4d41, $r25=0x453ae9f1200f4d41 ++lu52i.d $r14, $r25, -2008 :: ++before: $r14=0x5e954055ebaec78f, $r25=0xb7637f9119e12e31 ++after: $r14=0x82837f9119e12e31, $r25=0xb7637f9119e12e31 ++lu52i.d $r26, $r24, -1803 :: ++before: $r26=0xead69e40b96b23bf, $r24=0x779862b03d1ab575 ++after: $r26=0x8f5862b03d1ab575, $r24=0x779862b03d1ab575 ++lu52i.d $r5, $r25, -1406 :: ++before: $r5=0x452236306da7c667, $r25=0x9f16a6e48cca3a7b ++after: $r5=0xa826a6e48cca3a7b, $r25=0x9f16a6e48cca3a7b ++lu52i.d $r26, $r23, -667 :: ++before: $r26=0x5604b9744291e45a, $r23=0x70eecb3116b1795c ++after: $r26=0xd65ecb3116b1795c, $r23=0x70eecb3116b1795c ++lu52i.d $r14, $r27, -1221 :: ++before: $r14=0x6d9a8cfe459c1c48, $r27=0x85452bdd40205e0d ++after: $r14=0xb3b52bdd40205e0d, $r27=0x85452bdd40205e0d ++lu52i.d $r25, $r8, 423 :: ++before: $r25=0x1a8d72e42f68a33d, $r8=0x7089b6fe4c1f7a70 ++after: $r25=0x1a79b6fe4c1f7a70, $r8=0x7089b6fe4c1f7a70 ++lu52i.d $r30, $r10, -177 :: ++before: $r30=0x7c4fe646acac7ac0, $r10=0xe7d222ba1fd5cae2 ++after: $r30=0xf4f222ba1fd5cae2, $r10=0xe7d222ba1fd5cae2 ++lu52i.d $r6, $r13, -1438 :: ++before: $r6=0xdb3d6a615a9e492f, $r13=0xaa9303648ff489f2 ++after: $r6=0xa62303648ff489f2, $r13=0xaa9303648ff489f2 ++lu52i.d $r25, $r4, -634 :: ++before: $r25=0x8b41b813d85b8ee8, $r4=0xe4d31961e42e713c ++after: $r25=0xd8631961e42e713c, $r4=0xe4d31961e42e713c ++addi.w $r6, $r27, 1727 :: ++before: $r6=0x12845f036198fa6f, $r27=0xda77c63c764655da ++after: $r6=0x0000000076465c99, $r27=0xda77c63c764655da ++addi.w $r9, $r8, -381 :: ++before: $r9=0x21a7e3cfa2649a4f, $r8=0xc64c73b3bd4c1dcb ++after: $r9=0xffffffffbd4c1c4e, $r8=0xc64c73b3bd4c1dcb ++addi.w $r16, $r6, -186 :: ++before: $r16=0x6c47b02ef52a3502, $r6=0x24ca1a646dac5cc3 ++after: $r16=0x000000006dac5c09, $r6=0x24ca1a646dac5cc3 ++addi.w $r20, $r31, 1503 :: ++before: $r20=0xb6144d8f9513c78e, $r31=0xc4b808764e894e6c ++after: $r20=0x000000004e89544b, $r31=0xc4b808764e894e6c ++addi.w $r19, $r17, -1918 :: ++before: $r19=0xcf97c9215c961121, $r17=0x9b714c4cb899399b ++after: $r19=0xffffffffb899321d, $r17=0x9b714c4cb899399b ++addi.w $r14, $r8, -1781 :: ++before: $r14=0xe1abf22f6c3c82ec, $r8=0x4110e9c1b5f59ef6 ++after: $r14=0xffffffffb5f59801, $r8=0x4110e9c1b5f59ef6 ++addi.w $r29, $r18, 2047 :: ++before: $r29=0x4b64427195dda12d, $r18=0xadf5af70b7b3f37b ++after: $r29=0xffffffffb7b3fb7a, $r18=0xadf5af70b7b3f37b ++addi.w $r4, $r30, 244 :: ++before: $r4=0xfc785d46f5bbdff4, $r30=0x1e061e9d51362d9c ++after: $r4=0x0000000051362e90, $r30=0x1e061e9d51362d9c ++addi.w $r7, $r23, -376 :: ++before: $r7=0xe037576d82c12e8d, $r23=0xa77c8da72af708f1 ++after: $r7=0x000000002af70779, $r23=0xa77c8da72af708f1 ++addi.w $r23, $r17, 1924 :: ++before: $r23=0x00a10df57c4103ef, $r17=0x26d2628746ad0a3e ++after: $r23=0x0000000046ad11c2, $r17=0x26d2628746ad0a3e ++addi.d $r14, $r14, 152 :: ++before: $r14=0x61b497fb58a816d9, $r14=0x29eb218dd65d9d6c ++after: $r14=0x29eb218dd65d9e04, $r14=0x29eb218dd65d9e04 ++addi.d $r20, $r13, -640 :: ++before: $r20=0xd80db8387a8cdd93, $r13=0x5e23e4b01f2bbd6d ++after: $r20=0x5e23e4b01f2bbaed, $r13=0x5e23e4b01f2bbd6d ++addi.d $r13, $r25, -743 :: ++before: $r13=0x5dfea060c6e8f587, $r25=0x95f49b783954f9f9 ++after: $r13=0x95f49b783954f712, $r25=0x95f49b783954f9f9 ++addi.d $r4, $r30, 676 :: ++before: $r4=0xd72f370f6ce7bc4c, $r30=0x148550b0f97ce601 ++after: $r4=0x148550b0f97ce8a5, $r30=0x148550b0f97ce601 ++addi.d $r26, $r8, 1630 :: ++before: $r26=0xa4120a67f8d6df1a, $r8=0xa83f4bbcaf5bc52e ++after: $r26=0xa83f4bbcaf5bcb8c, $r8=0xa83f4bbcaf5bc52e ++addi.d $r20, $r29, -1971 :: ++before: $r20=0xa8f9c82780ac16d5, $r29=0x7ab169a5751642bc ++after: $r20=0x7ab169a575163b09, $r29=0x7ab169a5751642bc ++addi.d $r8, $r8, 1160 :: ++before: $r8=0x6f22bdb480c14540, $r8=0x94e1253c331b17f2 ++after: $r8=0x94e1253c331b1c7a, $r8=0x94e1253c331b1c7a ++addi.d $r15, $r27, 844 :: ++before: $r15=0x0312473547bcfe03, $r27=0x7a786cbc8149d818 ++after: $r15=0x7a786cbc8149db64, $r27=0x7a786cbc8149d818 ++addi.d $r8, $r26, -1185 :: ++before: $r8=0xee2b1be852671bc3, $r26=0x6a36d61dfee3a6fb ++after: $r8=0x6a36d61dfee3a25a, $r26=0x6a36d61dfee3a6fb ++addi.d $r17, $r27, -2046 :: ++before: $r17=0x70e068b54ed72e20, $r27=0x922681ab8837027b ++after: $r17=0x922681ab8836fa7d, $r27=0x922681ab8837027b ++addu16i.d $r20, $r29, -14564 :: ++before: $r20=0x8232770e3472bdc3, $r29=0x4d28c5567787c26e ++after: $r20=0x4d28c5563ea3c26e, $r29=0x4d28c5567787c26e ++addu16i.d $r29, $r4, -3511 :: ++before: $r29=0x9076403ed2f0fdf4, $r4=0x471cafb4183a389f ++after: $r29=0x471cafb40a83389f, $r4=0x471cafb4183a389f ++addu16i.d $r26, $r15, 25897 :: ++before: $r26=0x0dec118b1eb13234, $r15=0x06ff5ce56111b301 ++after: $r26=0x06ff5ce5c63ab301, $r15=0x06ff5ce56111b301 ++addu16i.d $r9, $r5, -21829 :: ++before: $r9=0x73209239d98fb81a, $r5=0x1dc8f0ba4710eba3 ++after: $r9=0x1dc8f0b9f1cbeba3, $r5=0x1dc8f0ba4710eba3 ++addu16i.d $r28, $r25, -23832 :: ++before: $r28=0xa39ba8429a9c13a6, $r25=0x4fffb32851c13ff2 ++after: $r28=0x4fffb327f4a93ff2, $r25=0x4fffb32851c13ff2 ++addu16i.d $r23, $r30, -32189 :: ++before: $r23=0x08abd919f5ea43b1, $r30=0x40078826f7336f0e ++after: $r23=0x4007882679766f0e, $r30=0x40078826f7336f0e ++addu16i.d $r28, $r24, 16372 :: ++before: $r28=0x695e543e25e7d3e4, $r24=0x30279db606efa8ec ++after: $r28=0x30279db646e3a8ec, $r24=0x30279db606efa8ec ++addu16i.d $r4, $r18, -28041 :: ++before: $r4=0xa125cadb71209757, $r18=0xff287b5e7fb2a2ba ++after: $r4=0xff287b5e1229a2ba, $r18=0xff287b5e7fb2a2ba ++addu16i.d $r5, $r17, -11268 :: ++before: $r5=0xd5d3e6da7c594ca9, $r17=0x2bc9be0ef252584c ++after: $r5=0x2bc9be0ec64e584c, $r17=0x2bc9be0ef252584c ++addu16i.d $r29, $r28, -15645 :: ++before: $r29=0x0ee0391151007613, $r28=0xae616c39d87c4b6e ++after: $r29=0xae616c399b5f4b6e, $r28=0xae616c39d87c4b6e ++andi $r28, $r18, 1288 :: ++before: $r28=0xd62f833fbbd483b3, $r18=0xa2f268cdcf18dd00 ++after: $r28=0x0000000000000500, $r18=0xa2f268cdcf18dd00 ++andi $r12, $r13, 153 :: ++before: $r12=0xc40efc9a74a3a13b, $r13=0xfd609200795f877c ++after: $r12=0x0000000000000018, $r13=0xfd609200795f877c ++andi $r6, $r18, 3633 :: ++before: $r6=0x79ee7ee7a7865b79, $r18=0x644bec92dca1ad7f ++after: $r6=0x0000000000000c31, $r18=0x644bec92dca1ad7f ++andi $r5, $r31, 3299 :: ++before: $r5=0x2d64be0e5c2ec0f6, $r31=0x87253b6589f182c7 ++after: $r5=0x00000000000000c3, $r31=0x87253b6589f182c7 ++andi $r28, $r5, 3189 :: ++before: $r28=0xf2e4ed85d98a1860, $r5=0x9f58e4edd98b60d1 ++after: $r28=0x0000000000000051, $r5=0x9f58e4edd98b60d1 ++andi $r18, $r29, 4031 :: ++before: $r18=0x3c067920d48cf0d2, $r29=0x2bf35e68c503ecfe ++after: $r18=0x0000000000000cbe, $r29=0x2bf35e68c503ecfe ++andi $r20, $r24, 3252 :: ++before: $r20=0xe1d95be05fd57a64, $r24=0xd33e771521b24bd3 ++after: $r20=0x0000000000000890, $r24=0xd33e771521b24bd3 ++andi $r6, $r23, 1665 :: ++before: $r6=0x23341b2d86d02365, $r23=0x16de10f2b4a45064 ++after: $r6=000000000000000000, $r23=0x16de10f2b4a45064 ++andi $r27, $r14, 325 :: ++before: $r27=0xd7db9d77aea4dcf5, $r14=0x142272b737435eb7 ++after: $r27=0x0000000000000005, $r14=0x142272b737435eb7 ++andi $r23, $r16, 1056 :: ++before: $r23=0x57fee53581b09718, $r16=0x02ace25d9e2ddbaa ++after: $r23=0x0000000000000020, $r16=0x02ace25d9e2ddbaa ++ori $r26, $r13, 3251 :: ++before: $r26=0x6d47cf7e5bb5c13e, $r13=0x93aed4996805ba3b ++after: $r26=0x93aed4996805bebb, $r13=0x93aed4996805ba3b ++ori $r10, $r25, 568 :: ++before: $r10=0x42f0332098f938af, $r25=0xd7916fe8d569567b ++after: $r10=0xd7916fe8d569567b, $r25=0xd7916fe8d569567b ++ori $r12, $r17, 1798 :: ++before: $r12=0xc507d4150a742b76, $r17=0x2b9a102a5b5b15f7 ++after: $r12=0x2b9a102a5b5b17f7, $r17=0x2b9a102a5b5b15f7 ++ori $r15, $r15, 1781 :: ++before: $r15=0xa54ad5ecc0e72adb, $r15=0x37c18ad4ec6e678c ++after: $r15=0x37c18ad4ec6e67fd, $r15=0x37c18ad4ec6e67fd ++ori $r5, $r4, 682 :: ++before: $r5=0x1f388b2a2b18004d, $r4=0xb5fa23fbb02eeedb ++after: $r5=0xb5fa23fbb02eeefb, $r4=0xb5fa23fbb02eeedb ++ori $r27, $r24, 1931 :: ++before: $r27=0x73b086f8a8b4d7b5, $r24=0xd23e30ab1e45470a ++after: $r27=0xd23e30ab1e45478b, $r24=0xd23e30ab1e45470a ++ori $r28, $r6, 3593 :: ++before: $r28=0x972967beac695928, $r6=0x2c701d0bc28816c5 ++after: $r28=0x2c701d0bc2881ecd, $r6=0x2c701d0bc28816c5 ++ori $r27, $r4, 3679 :: ++before: $r27=0x54fecbbf0a06e5a6, $r4=0xf0b6d846464a3331 ++after: $r27=0xf0b6d846464a3f7f, $r4=0xf0b6d846464a3331 ++ori $r9, $r16, 905 :: ++before: $r9=0x71f3cd001c729062, $r16=0xc5720758095e4592 ++after: $r9=0xc5720758095e479b, $r16=0xc5720758095e4592 ++ori $r26, $r7, 3473 :: ++before: $r26=0xd7ce86800c3c0f4b, $r7=0xc4a58f787cdf5bb2 ++after: $r26=0xc4a58f787cdf5fb3, $r7=0xc4a58f787cdf5bb2 ++xori $r27, $r31, 2690 :: ++before: $r27=0xe6d49c2dc629fbc7, $r31=0x91832665d1a898e2 ++after: $r27=0x91832665d1a89260, $r31=0x91832665d1a898e2 ++xori $r15, $r5, 697 :: ++before: $r15=0xada49c0d48beffc5, $r5=0x0e3cf426f1be4766 ++after: $r15=0x0e3cf426f1be45df, $r5=0x0e3cf426f1be4766 ++xori $r9, $r20, 2268 :: ++before: $r9=0x174a71d6d3757e3e, $r20=0x25ed4678037622be ++after: $r9=0x25ed467803762a62, $r20=0x25ed4678037622be ++xori $r31, $r15, 3817 :: ++before: $r31=0x1fac1694b40fbf2e, $r15=0x4fe4fb2e0b660ca2 ++after: $r31=0x4fe4fb2e0b66024b, $r15=0x4fe4fb2e0b660ca2 ++xori $r17, $r14, 3929 :: ++before: $r17=0x2dc443400df4e153, $r14=0x1db25e602ef8ece5 ++after: $r17=0x1db25e602ef8e3bc, $r14=0x1db25e602ef8ece5 ++xori $r4, $r28, 2735 :: ++before: $r4=0x5fb5ad5a84e97835, $r28=0xc52da11293641639 ++after: $r4=0xc52da11293641c96, $r28=0xc52da11293641639 ++xori $r5, $r13, 1153 :: ++before: $r5=0x5c5fc4ba45da005f, $r13=0xe46f853b7d602b84 ++after: $r5=0xe46f853b7d602f05, $r13=0xe46f853b7d602b84 ++xori $r30, $r26, 3867 :: ++before: $r30=0x1419915b6f92678b, $r26=0xa984612f1266da94 ++after: $r30=0xa984612f1266d58f, $r26=0xa984612f1266da94 ++xori $r13, $r13, 3426 :: ++before: $r13=0xc2b8fd036ba6314b, $r13=0x4cf49604f644713c ++after: $r13=0x4cf49604f6447c5e, $r13=0x4cf49604f6447c5e ++xori $r25, $r23, 2669 :: ++before: $r25=0xde46e3673c9a75dc, $r23=0xfa1177a89f08c81e ++after: $r25=0xfa1177a89f08c273, $r23=0xfa1177a89f08c81e ++sll.w $r13, $r8, $r12 :: ++before: $r13=0x26131fa72f4b76f1, $r8=0xf34f7108538078d0, $r12=0x10bbd12a8e087501 ++after: $r13=0xffffffffa700f1a0, $r8=0xf34f7108538078d0, $r12=0x10bbd12a8e087501 ++sll.w $r29, $r8, $r15 :: ++before: $r29=0xb6f529da4017d0d9, $r8=0x49fbfb11ef643171, $r15=0x9d0425e747d11bde ++after: $r29=0x0000000040000000, $r8=0x49fbfb11ef643171, $r15=0x9d0425e747d11bde ++sll.w $r30, $r31, $r12 :: ++before: $r30=0xcfc5236f5c070644, $r31=0xba8301a1087b3a96, $r12=0xff7589561824e1be ++after: $r30=0xffffffff80000000, $r31=0xba8301a1087b3a96, $r12=0xff7589561824e1be ++sll.w $r28, $r10, $r7 :: ++before: $r28=0x37fa51674df87149, $r10=0x39212605c5d0cf7d, $r7=0x18a8e323326ce5aa ++after: $r28=0x00000000433df400, $r10=0x39212605c5d0cf7d, $r7=0x18a8e323326ce5aa ++sll.w $r8, $r9, $r14 :: ++before: $r8=0x707a9e0ece8abe40, $r9=0x94b7b20a80c16c7b, $r14=0x6887c46efb4cc181 ++after: $r8=0x000000000182d8f6, $r9=0x94b7b20a80c16c7b, $r14=0x6887c46efb4cc181 ++sll.w $r8, $r4, $r24 :: ++before: $r8=0xd718a01b03a53964, $r4=0x8ebd8bfeec304e2a, $r24=0x06b4a83a6838b5d1 ++after: $r8=0xffffffff9c540000, $r4=0x8ebd8bfeec304e2a, $r24=0x06b4a83a6838b5d1 ++sll.w $r23, $r31, $r27 :: ++before: $r23=0xf50cab824a06d30e, $r31=0xa8ee12cbd8dec935, $r27=0x118002b3f0cecbab ++after: $r23=0xfffffffff649a800, $r31=0xa8ee12cbd8dec935, $r27=0x118002b3f0cecbab ++sll.w $r8, $r25, $r26 :: ++before: $r8=0x8163368243faadee, $r25=0x3a04f47bf19a4cc8, $r26=0x6a58cd3a57b4eeb4 ++after: $r8=0xffffffffcc800000, $r25=0x3a04f47bf19a4cc8, $r26=0x6a58cd3a57b4eeb4 ++sll.w $r25, $r13, $r12 :: ++before: $r25=0x3d6831e1afab1b1a, $r13=0x9ee672580cb39777, $r12=0x9084acd2bc7404ca ++after: $r25=0xffffffffce5ddc00, $r13=0x9ee672580cb39777, $r12=0x9084acd2bc7404ca ++sll.w $r20, $r5, $r29 :: ++before: $r20=0x90f7ee3ff75817a6, $r5=0xe4ae07989d6148d7, $r29=0x3e208bfcf046fffd ++after: $r20=0xffffffffe0000000, $r5=0xe4ae07989d6148d7, $r29=0x3e208bfcf046fffd ++srl.w $r20, $r29, $r30 :: ++before: $r20=0xff3f6b79b5e2b56d, $r29=0x1195aa09fa92d26b, $r30=0xa93a8fd11ad5ae99 ++after: $r20=0x000000000000007d, $r29=0x1195aa09fa92d26b, $r30=0xa93a8fd11ad5ae99 ++srl.w $r8, $r15, $r4 :: ++before: $r8=0x5d2fb7cd04ecd00c, $r15=0x47bf914b6eca2852, $r4=0x1bc63138cc45a75c ++after: $r8=0x0000000000000006, $r15=0x47bf914b6eca2852, $r4=0x1bc63138cc45a75c ++srl.w $r20, $r12, $r18 :: ++before: $r20=0x61fa22abda7c7b02, $r12=0x9341cf09aa2e106e, $r18=0x2dea831e9e121355 ++after: $r20=0x0000000000000551, $r12=0x9341cf09aa2e106e, $r18=0x2dea831e9e121355 ++srl.w $r30, $r20, $r26 :: ++before: $r30=0x43e0249584da52db, $r20=0x482a209e436cda53, $r26=0xb323a7f463f80660 ++after: $r30=0x00000000436cda53, $r20=0x482a209e436cda53, $r26=0xb323a7f463f80660 ++srl.w $r31, $r16, $r28 :: ++before: $r31=0x4b10d05d93bf7288, $r16=0x6d0330e88122d7c1, $r28=0xc531cf8c92d53d03 ++after: $r31=0x0000000010245af8, $r16=0x6d0330e88122d7c1, $r28=0xc531cf8c92d53d03 ++srl.w $r31, $r15, $r31 :: ++before: $r31=0xd4654233c7648c3a, $r15=0x12e6fc2a04cbf809, $r31=0xcfe1c1b558a94808 ++after: $r31=0x000000000004cbf8, $r15=0x12e6fc2a04cbf809, $r31=0x000000000004cbf8 ++srl.w $r10, $r30, $r19 :: ++before: $r10=0x602dee9c45a3b99b, $r30=0x3ce0a6ac2acf19fa, $r19=0xdb5fab4bc2f82e7a ++after: $r10=0x000000000000000a, $r30=0x3ce0a6ac2acf19fa, $r19=0xdb5fab4bc2f82e7a ++srl.w $r17, $r9, $r23 :: ++before: $r17=0x45106f11d4a57641, $r9=0x05354795b675edac, $r23=0xc67578c28ed7b6c7 ++after: $r17=0x00000000016cebdb, $r9=0x05354795b675edac, $r23=0xc67578c28ed7b6c7 ++srl.w $r25, $r26, $r29 :: ++before: $r25=0x1dc3b8477fba650c, $r26=0x0814377a71768e75, $r29=0x60276c0e316db833 ++after: $r25=0x0000000000000e2e, $r26=0x0814377a71768e75, $r29=0x60276c0e316db833 ++srl.w $r31, $r7, $r30 :: ++before: $r31=0x360fc92a085c2e14, $r7=0x1b44ec96def89449, $r30=0x56d6c5d85a81ed1f ++after: $r31=0x0000000000000001, $r7=0x1b44ec96def89449, $r30=0x56d6c5d85a81ed1f ++sra.w $r10, $r17, $r19 :: ++before: $r10=0x576f2bfc771641b8, $r17=0xfb1fb20b98a54405, $r19=0xb20e9dae5a212078 ++after: $r10=0xffffffffffffff98, $r17=0xfb1fb20b98a54405, $r19=0xb20e9dae5a212078 ++sra.w $r12, $r16, $r31 :: ++before: $r12=0xbfdbb9a90ccc08a0, $r16=0xb5d3c7f3b1a800a6, $r31=0x57c3ff79f3b4198b ++after: $r12=0xfffffffffff63500, $r16=0xb5d3c7f3b1a800a6, $r31=0x57c3ff79f3b4198b ++sra.w $r18, $r16, $r5 :: ++before: $r18=0xadcb6c153538b6b1, $r16=0x99e245813e90b5e9, $r5=0x7adff58363d5ebd2 ++after: $r18=0x0000000000000fa4, $r16=0x99e245813e90b5e9, $r5=0x7adff58363d5ebd2 ++sra.w $r17, $r28, $r25 :: ++before: $r17=0x7faea6a29686caf9, $r28=0x801d40ea40b19bee, $r25=0x0f5174f678600d3f ++after: $r17=000000000000000000, $r28=0x801d40ea40b19bee, $r25=0x0f5174f678600d3f ++sra.w $r8, $r27, $r13 :: ++before: $r8=0x86e5534832150e05, $r27=0x47bb53d1cdc3560f, $r13=0x917e2b49633a0f44 ++after: $r8=0xfffffffffcdc3560, $r27=0x47bb53d1cdc3560f, $r13=0x917e2b49633a0f44 ++sra.w $r26, $r18, $r20 :: ++before: $r26=0xbfb83a0d762c171a, $r18=0xbf67ed78d934d37c, $r20=0x9f377995293fcc6b ++after: $r26=0xfffffffffffb269a, $r18=0xbf67ed78d934d37c, $r20=0x9f377995293fcc6b ++sra.w $r5, $r25, $r19 :: ++before: $r5=0x266703af59334b0f, $r25=0x4ed92cdab9f641c9, $r19=0x5da1d0b8846d1a3d ++after: $r5=0xfffffffffffffffd, $r25=0x4ed92cdab9f641c9, $r19=0x5da1d0b8846d1a3d ++sra.w $r19, $r27, $r24 :: ++before: $r19=0x72557561b3b40007, $r27=0xd5db278ea099b3b5, $r24=0x50b4a888b898610f ++after: $r19=0xffffffffffff4133, $r27=0xd5db278ea099b3b5, $r24=0x50b4a888b898610f ++sra.w $r16, $r10, $r4 :: ++before: $r16=0xb349f888f1809ba3, $r10=0x23d60a1fc100d89e, $r4=0xc2846cc882dbc8e2 ++after: $r16=0xfffffffff0403627, $r10=0x23d60a1fc100d89e, $r4=0xc2846cc882dbc8e2 ++sra.w $r23, $r10, $r31 :: ++before: $r23=0xd7bdeddd344bb5af, $r10=0xa015a07c13ff2234, $r31=0x7c0fe410ce063a85 ++after: $r23=0x00000000009ff911, $r10=0xa015a07c13ff2234, $r31=0x7c0fe410ce063a85 ++sll.d $r28, $r17, $r10 :: ++before: $r28=0x167adf26efd66416, $r17=0xb861ba6e0aadf304, $r10=0xa19e21ba0f406c33 ++after: $r28=0x9820000000000000, $r17=0xb861ba6e0aadf304, $r10=0xa19e21ba0f406c33 ++sll.d $r18, $r29, $r13 :: ++before: $r18=0x3e8ea4dc3a9d9b44, $r29=0x28ccf5dfa9cdc3b2, $r13=0x33ef837a5a476bdc ++after: $r18=0xfa9cdc3b20000000, $r29=0x28ccf5dfa9cdc3b2, $r13=0x33ef837a5a476bdc ++sll.d $r23, $r27, $r29 :: ++before: $r23=0x23e29c76deed70ca, $r27=0x09e2265d8422e78d, $r29=0xe9cc62bfd8a7c913 ++after: $r23=0x32ec21173c680000, $r27=0x09e2265d8422e78d, $r29=0xe9cc62bfd8a7c913 ++sll.d $r16, $r17, $r17 :: ++before: $r16=0xf5e858c7445fcedd, $r17=0x6735e4cf2fcb78fb, $r17=0x726dd10e13b62663 ++after: $r16=0x9db1331800000000, $r17=0x726dd10e13b62663, $r17=0x726dd10e13b62663 ++sll.d $r17, $r15, $r29 :: ++before: $r17=0xfc1dbfc0551f8813, $r15=0xec45100b21a74025, $r29=0x186d3b737cbfd39a ++after: $r17=0x2c869d0094000000, $r15=0xec45100b21a74025, $r29=0x186d3b737cbfd39a ++sll.d $r19, $r15, $r9 :: ++before: $r19=0xbb01afe39a1e17b6, $r15=0x3e66dd1100acc44a, $r9=0xa9c74257f6e39cdf ++after: $r19=0x8056622500000000, $r15=0x3e66dd1100acc44a, $r9=0xa9c74257f6e39cdf ++sll.d $r23, $r9, $r31 :: ++before: $r23=0x945b101751c38d12, $r9=0x262d14baae546199, $r31=0x7ccdd8a7840948df ++after: $r23=0x572a30cc80000000, $r9=0x262d14baae546199, $r31=0x7ccdd8a7840948df ++sll.d $r5, $r31, $r28 :: ++before: $r5=0xa88eaecc1405995b, $r31=0xd96ed500aff4596b, $r28=0x6994841a196c562e ++after: $r5=0x165ac00000000000, $r31=0xd96ed500aff4596b, $r28=0x6994841a196c562e ++sll.d $r27, $r10, $r25 :: ++before: $r27=0x1e9540fa8237a849, $r10=0x9aad6101b2470a60, $r25=0x90c95628696f752f ++after: $r27=0x8530000000000000, $r10=0x9aad6101b2470a60, $r25=0x90c95628696f752f ++sll.d $r4, $r26, $r18 :: ++before: $r4=0xb4dc3cdeab2e8454, $r26=0x0d27a92db3b2906c, $r18=0x2bc7647c40c0b375 ++after: $r4=0x0d80000000000000, $r26=0x0d27a92db3b2906c, $r18=0x2bc7647c40c0b375 ++srl.d $r6, $r27, $r13 :: ++before: $r6=0x66ebeca9a7fad574, $r27=0xdc837ce646ea6b51, $r13=0xa57259e1758c564b ++after: $r6=0x001b906f9cc8dd4d, $r27=0xdc837ce646ea6b51, $r13=0xa57259e1758c564b ++srl.d $r6, $r20, $r5 :: ++before: $r6=0x091794316e6c5e65, $r20=0x0dc7c47d39d64a16, $r5=0x35f029b9942e11c8 ++after: $r6=0x000dc7c47d39d64a, $r20=0x0dc7c47d39d64a16, $r5=0x35f029b9942e11c8 ++srl.d $r15, $r5, $r4 :: ++before: $r15=0xbc963842b3ebc906, $r5=0x42ea773b0bd19807, $r4=0xd05cd2c4b01ea630 ++after: $r15=0x00000000000042ea, $r5=0x42ea773b0bd19807, $r4=0xd05cd2c4b01ea630 ++srl.d $r18, $r25, $r28 :: ++before: $r18=0x30d908baaa31230e, $r25=0x779272ae228746a5, $r28=0xf7b665809a3f303b ++after: $r18=0x000000000000000e, $r25=0x779272ae228746a5, $r28=0xf7b665809a3f303b ++srl.d $r5, $r28, $r27 :: ++before: $r5=0x01f1d414f1d0f1fe, $r28=0x647277d3759d74bf, $r27=0xa5c5fce39b4a1810 ++after: $r5=0x0000647277d3759d, $r28=0x647277d3759d74bf, $r27=0xa5c5fce39b4a1810 ++srl.d $r24, $r9, $r26 :: ++before: $r24=0x5fa44419162fc2c8, $r9=0x9d2a589e6f6b3440, $r26=0x810a615115238d8d ++after: $r24=0x0004e952c4f37b59, $r9=0x9d2a589e6f6b3440, $r26=0x810a615115238d8d ++srl.d $r31, $r23, $r30 :: ++before: $r31=0xfa1a7ad64758b758, $r23=0xe3d69d99e87b4297, $r30=0x87fd8dc0a78e86bb ++after: $r31=0x000000000000001c, $r23=0xe3d69d99e87b4297, $r30=0x87fd8dc0a78e86bb ++srl.d $r26, $r10, $r24 :: ++before: $r26=0x540888639a787231, $r10=0x168791cefeb1660a, $r24=0xd02b158115db9cdf ++after: $r26=0x000000002d0f239d, $r10=0x168791cefeb1660a, $r24=0xd02b158115db9cdf ++srl.d $r23, $r15, $r12 :: ++before: $r23=0xff3e950565409999, $r15=0xe15a01fa0e34ea3b, $r12=0x237aba34fe552f8e ++after: $r23=0x0003856807e838d3, $r15=0xe15a01fa0e34ea3b, $r12=0x237aba34fe552f8e ++srl.d $r8, $r16, $r4 :: ++before: $r8=0x825bafd36cc0d32e, $r16=0x321677304d1b1406, $r4=0xca68c6c83dfa5837 ++after: $r8=0x0000000000000064, $r16=0x321677304d1b1406, $r4=0xca68c6c83dfa5837 ++sra.d $r23, $r19, $r16 :: ++before: $r23=0x4cab63abd8f64774, $r19=0x2c007c3ac68d7c80, $r16=0xd8f4ac963a8b2c01 ++after: $r23=0x16003e1d6346be40, $r19=0x2c007c3ac68d7c80, $r16=0xd8f4ac963a8b2c01 ++sra.d $r18, $r30, $r25 :: ++before: $r18=0x531de73fca30361a, $r30=0x2857ba730cd281ff, $r25=0xacab0fe400e4c113 ++after: $r18=0x0000050af74e619a, $r30=0x2857ba730cd281ff, $r25=0xacab0fe400e4c113 ++sra.d $r31, $r13, $r10 :: ++before: $r31=0x3184416bc93a5e26, $r13=0xad5864bc4022de96, $r10=0xf7007bdbf1f728ab ++after: $r31=0xfffffffffff5ab0c, $r13=0xad5864bc4022de96, $r10=0xf7007bdbf1f728ab ++sra.d $r6, $r25, $r23 :: ++before: $r6=0x9184d2df291f3402, $r25=0x7c0b117dcad80c03, $r23=0x35b29b0dde1a94bd ++after: $r6=0x0000000000000003, $r25=0x7c0b117dcad80c03, $r23=0x35b29b0dde1a94bd ++sra.d $r16, $r6, $r29 :: ++before: $r16=0x2849e543d35dff5f, $r6=0x009f13f36a632a3f, $r29=0xf31f881e12072fe2 ++after: $r16=0x000000000027c4fc, $r6=0x009f13f36a632a3f, $r29=0xf31f881e12072fe2 ++sra.d $r7, $r29, $r10 :: ++before: $r7=0x25c763f8366139dd, $r29=0xfd77fd6e69e371c6, $r10=0xcaa2ec6ad4f3b996 ++after: $r7=0xfffffff5dff5b9a7, $r29=0xfd77fd6e69e371c6, $r10=0xcaa2ec6ad4f3b996 ++sra.d $r24, $r25, $r26 :: ++before: $r24=0x472602300b4f04c9, $r25=0x54ceea832a5677e9, $r26=0x5f63e9d9d6eb4af0 ++after: $r24=0x00000000000054ce, $r25=0x54ceea832a5677e9, $r26=0x5f63e9d9d6eb4af0 ++sra.d $r23, $r4, $r27 :: ++before: $r23=0xe8b449325a0ed51e, $r4=0xd96928476f8441a5, $r27=0x7e1ae8fd9c849dce ++after: $r23=0xffff65a4a11dbe11, $r4=0xd96928476f8441a5, $r27=0x7e1ae8fd9c849dce ++sra.d $r15, $r9, $r12 :: ++before: $r15=0x71601a1a2b155f51, $r9=0x0bcbb1d162563240, $r12=0x5a906ad2f4abb4c7 ++after: $r15=0x00179763a2c4ac64, $r9=0x0bcbb1d162563240, $r12=0x5a906ad2f4abb4c7 ++sra.d $r16, $r29, $r23 :: ++before: $r16=0x1686886f27d397fb, $r29=0x851328b2655e5689, $r23=0x1634457590cd4033 ++after: $r16=0xfffffffffffff0a2, $r29=0x851328b2655e5689, $r23=0x1634457590cd4033 ++rotr.w $r8, $r5, $r18 :: ++before: $r8=0xc4394aae4c13908b, $r5=0xa0c5728d1211b595, $r18=0x3d562746b3943f3b ++after: $r8=0x000000004236b2a2, $r5=0xa0c5728d1211b595, $r18=0x3d562746b3943f3b ++rotr.w $r19, $r18, $r10 :: ++before: $r19=0x284b501639de116b, $r18=0x4248ad6cc0107902, $r10=0xb41907b756bf8004 ++after: $r19=0x000000002c010790, $r18=0x4248ad6cc0107902, $r10=0xb41907b756bf8004 ++rotr.w $r29, $r8, $r4 :: ++before: $r29=0x2656b50c7d689f19, $r8=0x7b5d21fdce9bcb73, $r4=0x5b212fbe9e6b8522 ++after: $r29=0xfffffffff3a6f2dc, $r8=0x7b5d21fdce9bcb73, $r4=0x5b212fbe9e6b8522 ++rotr.w $r25, $r6, $r30 :: ++before: $r25=0x4c79ed7a1695fc25, $r6=0x6bac1698a978f50f, $r30=0xf1d58570dfb10203 ++after: $r25=0xfffffffff52f1ea1, $r6=0x6bac1698a978f50f, $r30=0xf1d58570dfb10203 ++rotr.w $r14, $r18, $r6 :: ++before: $r14=0xe894476b4ebbff23, $r18=0x1398b65ae1e91c98, $r6=0xebb6c3f5f689d2d8 ++after: $r14=0xffffffffe91c98e1, $r18=0x1398b65ae1e91c98, $r6=0xebb6c3f5f689d2d8 ++rotr.w $r19, $r29, $r26 :: ++before: $r19=0x2595423cc93ecd7c, $r29=0x6c462c2d29d8f908, $r26=0x19142efd8e0b48b8 ++after: $r19=0xffffffffd8f90829, $r29=0x6c462c2d29d8f908, $r26=0x19142efd8e0b48b8 ++rotr.w $r23, $r10, $r25 :: ++before: $r23=0x68b4d913b267a3a2, $r10=0x69afb673907e4506, $r25=0xbd09ff2ed890862d ++after: $r23=0x00000000283483f2, $r10=0x69afb673907e4506, $r25=0xbd09ff2ed890862d ++rotr.w $r9, $r14, $r27 :: ++before: $r9=0x17a45b8cbdebd6ef, $r14=0x33effef864846356, $r27=0x3f52e437f2d5da62 ++after: $r9=0xffffffff992118d5, $r14=0x33effef864846356, $r27=0x3f52e437f2d5da62 ++rotr.w $r5, $r12, $r23 :: ++before: $r5=0x2d191b1a9707cf26, $r12=0x86fa75433dac3d39, $r23=0x21136a02424e5da4 ++after: $r5=0xffffffff93dac3d3, $r12=0x86fa75433dac3d39, $r23=0x21136a02424e5da4 ++rotr.w $r29, $r18, $r27 :: ++before: $r29=0x7d989f74f9944f8d, $r18=0x0050fe5829a153e6, $r27=0x926776f9140b06fc ++after: $r29=0xffffffff9a153e62, $r18=0x0050fe5829a153e6, $r27=0x926776f9140b06fc ++rotr.d $r29, $r19, $r13 :: ++before: $r29=0x1e02c0c28ec3f9b1, $r19=0xf2e79e6ff240b188, $r13=0x60f500663eddf444 ++after: $r29=0x8f2e79e6ff240b18, $r19=0xf2e79e6ff240b188, $r13=0x60f500663eddf444 ++rotr.d $r30, $r4, $r14 :: ++before: $r30=0x97f6be8229e2e822, $r4=0xf79aaeb2c03a2113, $r14=0xbbdb2cb642605ed7 ++after: $r30=0x744227ef355d6580, $r4=0xf79aaeb2c03a2113, $r14=0xbbdb2cb642605ed7 ++rotr.d $r6, $r19, $r7 :: ++before: $r6=0x1611806010ce99d8, $r19=0xcb64270e0fc5b4c7, $r7=0x0eda6972c46af03c ++after: $r6=0xb64270e0fc5b4c7c, $r19=0xcb64270e0fc5b4c7, $r7=0x0eda6972c46af03c ++rotr.d $r4, $r15, $r30 :: ++before: $r4=0xe63084e97bd0efb3, $r15=0x6e1aa322e38e9b66, $r30=0xa7df0f1d92106e2d ++after: $r4=0x19171c74db3370d5, $r15=0x6e1aa322e38e9b66, $r30=0xa7df0f1d92106e2d ++rotr.d $r16, $r27, $r10 :: ++before: $r16=0x1ff92fbb0f10ff9a, $r27=0x015c2eb91c9ae124, $r10=0x8b4c97ee7f9bc2fa ++after: $r16=0x570bae4726b84900, $r27=0x015c2eb91c9ae124, $r10=0x8b4c97ee7f9bc2fa ++rotr.d $r28, $r7, $r25 :: ++before: $r28=0xbd766a63bbead21c, $r7=0x0d97b509610db5e7, $r25=0x3151203010315af5 ++after: $r28=0xbda84b086daf386c, $r7=0x0d97b509610db5e7, $r25=0x3151203010315af5 ++rotr.d $r9, $r20, $r23 :: ++before: $r9=0x8a2bb5eacea50d68, $r20=0x947ec1930151adb9, $r23=0xc2f39e045d278b7b ++after: $r9=0x8fd832602a35b732, $r20=0x947ec1930151adb9, $r23=0xc2f39e045d278b7b ++rotr.d $r25, $r13, $r23 :: ++before: $r25=0xcaddb8ea7bd492c7, $r13=0x416a1b790dbf45cb, $r23=0x44c59965e1c6af25 ++after: $r25=0xc86dfa2e5a0b50db, $r13=0x416a1b790dbf45cb, $r23=0x44c59965e1c6af25 ++rotr.d $r14, $r7, $r31 :: ++before: $r14=0x8ca18b58047c8b5a, $r7=0x93a6cdc3585b5446, $r31=0x70cd84ec07e33cef ++after: $r14=0x9b86b0b6a88d274d, $r7=0x93a6cdc3585b5446, $r31=0x70cd84ec07e33cef ++rotr.d $r14, $r9, $r4 :: ++before: $r14=0x48bd5c133004f490, $r9=0xad095be0915fe20b, $r4=0xc1fff6ff603a47b3 ++after: $r14=0x2b7c122bfc4175a1, $r9=0xad095be0915fe20b, $r4=0xc1fff6ff603a47b3 ++slli.w $r18, $r8, 10 :: ++before: $r18=0xe7f8823a2989c395, $r8=0xf0ccc85519ad1e0a ++after: $r18=0xffffffffb4782800, $r8=0xf0ccc85519ad1e0a ++slli.w $r27, $r17, 30 :: ++before: $r27=0x2e66b550a3bb071d, $r17=0x20943aa3eaa4024e ++after: $r27=0xffffffff80000000, $r17=0x20943aa3eaa4024e ++slli.w $r27, $r23, 31 :: ++before: $r27=0x70daa2bee8209243, $r23=0x2e9160afd2e28a64 ++after: $r27=000000000000000000, $r23=0x2e9160afd2e28a64 ++slli.w $r10, $r13, 12 :: ++before: $r10=0x701c424632b5dc29, $r13=0x591054db6afe1725 ++after: $r10=0xffffffffe1725000, $r13=0x591054db6afe1725 ++slli.w $r7, $r15, 19 :: ++before: $r7=0xdd1d7fe3ae579499, $r15=0x2e077f689088c0c7 ++after: $r7=0x0000000006380000, $r15=0x2e077f689088c0c7 ++slli.w $r6, $r8, 12 :: ++before: $r6=0xff732113ddaab79b, $r8=0x9cacf8e6d9e37f97 ++after: $r6=0x0000000037f97000, $r8=0x9cacf8e6d9e37f97 ++slli.w $r5, $r19, 22 :: ++before: $r5=0xcef75ddd2adc5853, $r19=0xcc24ed9167fd06ea ++after: $r5=0xffffffffba800000, $r19=0xcc24ed9167fd06ea ++slli.w $r17, $r8, 7 :: ++before: $r17=0x3c8788fed3e8a049, $r8=0xccf9b2d2c2e80251 ++after: $r17=0x0000000074012880, $r8=0xccf9b2d2c2e80251 ++slli.w $r14, $r29, 10 :: ++before: $r14=0x0e1b0b077db4f08e, $r29=0x76aea4b9ae43cdfb ++after: $r14=0x000000000f37ec00, $r29=0x76aea4b9ae43cdfb ++slli.w $r23, $r30, 26 :: ++before: $r23=0x13d8514aeb0dc12b, $r30=0x9c8352804e7e8ccb ++after: $r23=0x000000002c000000, $r30=0x9c8352804e7e8ccb ++slli.d $r27, $r28, 5 :: ++before: $r27=0x689a2c4141835926, $r28=0x1b6ff38e611d1e4d ++after: $r27=0x6dfe71cc23a3c9a0, $r28=0x1b6ff38e611d1e4d ++slli.d $r5, $r20, 1 :: ++before: $r5=0xff3391c2323defa6, $r20=0xe99a134a0c1a2574 ++after: $r5=0xd334269418344ae8, $r20=0xe99a134a0c1a2574 ++slli.d $r27, $r7, 61 :: ++before: $r27=0xc32d8fb319ba47e6, $r7=0xc6530e0e601d3631 ++after: $r27=0x2000000000000000, $r7=0xc6530e0e601d3631 ++slli.d $r5, $r26, 45 :: ++before: $r5=0x979553ff112cdf52, $r26=0x931e420364fdcaca ++after: $r5=0xb959400000000000, $r26=0x931e420364fdcaca ++slli.d $r27, $r5, 60 :: ++before: $r27=0xa7f70b048a4087b0, $r5=0xc1b829210c3cd5a9 ++after: $r27=0x9000000000000000, $r5=0xc1b829210c3cd5a9 ++slli.d $r23, $r10, 59 :: ++before: $r23=0xcd547af78ac66ca7, $r10=0xa2c0802de6c82645 ++after: $r23=0x2800000000000000, $r10=0xa2c0802de6c82645 ++slli.d $r13, $r30, 56 :: ++before: $r13=0x0410b8f25e1234ee, $r30=0xdbaacfe884cda24d ++after: $r13=0x4d00000000000000, $r30=0xdbaacfe884cda24d ++slli.d $r16, $r4, 20 :: ++before: $r16=0x44a2ff35045ec37c, $r4=0xee2240010629a8ee ++after: $r16=0x0010629a8ee00000, $r4=0xee2240010629a8ee ++slli.d $r19, $r20, 25 :: ++before: $r19=0x8617d88408d75cac, $r20=0xba15483820d66ae7 ++after: $r19=0x7041acd5ce000000, $r20=0xba15483820d66ae7 ++slli.d $r24, $r27, 13 :: ++before: $r24=0x669e0e9b99d5b604, $r27=0xf5d1ffc374e53c7d ++after: $r24=0x3ff86e9ca78fa000, $r27=0xf5d1ffc374e53c7d ++srli.w $r20, $r16, 10 :: ++before: $r20=0x7f5310ac5eaa9924, $r16=0x0ea8b69613d183ee ++after: $r20=0x000000000004f460, $r16=0x0ea8b69613d183ee ++srli.w $r13, $r15, 0 :: ++before: $r13=0x5f4d9313f9224389, $r15=0xd544272206f4e814 ++after: $r13=0x0000000006f4e814, $r15=0xd544272206f4e814 ++srli.w $r17, $r18, 7 :: ++before: $r17=0xd9b2c942f996cc8a, $r18=0x704cd1d89de5c2b4 ++after: $r17=0x00000000013bcb85, $r18=0x704cd1d89de5c2b4 ++srli.w $r27, $r28, 18 :: ++before: $r27=0xa3eef8efc97e0d4f, $r28=0x8c449e6236daa7a2 ++after: $r27=0x0000000000000db6, $r28=0x8c449e6236daa7a2 ++srli.w $r9, $r10, 29 :: ++before: $r9=0x6c044927152e5fc9, $r10=0x592a1607944e0109 ++after: $r9=0x0000000000000004, $r10=0x592a1607944e0109 ++srli.w $r8, $r24, 31 :: ++before: $r8=0xcaa01b37d49db675, $r24=0x5e35848bbc958164 ++after: $r8=0x0000000000000001, $r24=0x5e35848bbc958164 ++srli.w $r6, $r16, 18 :: ++before: $r6=0xe2fbe1accb343769, $r16=0x85f5e17c7d785222 ++after: $r6=0x0000000000001f5e, $r16=0x85f5e17c7d785222 ++srli.w $r18, $r25, 31 :: ++before: $r18=0x4653c07e0627825f, $r25=0x44fffa524ffd0417 ++after: $r18=000000000000000000, $r25=0x44fffa524ffd0417 ++srli.w $r5, $r26, 22 :: ++before: $r5=0x817ebd7154c8ed46, $r26=0xc7399a9899fc5958 ++after: $r5=0x0000000000000267, $r26=0xc7399a9899fc5958 ++srli.w $r27, $r4, 27 :: ++before: $r27=0x3e4b17b34f2b08d0, $r4=0x5bedb97aefd697f4 ++after: $r27=0x000000000000001d, $r4=0x5bedb97aefd697f4 ++srli.d $r31, $r9, 51 :: ++before: $r31=0x8fc21da189af52ed, $r9=0x235bf33e3e612a15 ++after: $r31=0x000000000000046b, $r9=0x235bf33e3e612a15 ++srli.d $r26, $r7, 36 :: ++before: $r26=0xcd1eaac4df2531dd, $r7=0xe87216fce9c75788 ++after: $r26=0x000000000e87216f, $r7=0xe87216fce9c75788 ++srli.d $r6, $r31, 29 :: ++before: $r6=0xc0282beeb7dc6618, $r31=0x8b58604d6be3e8e0 ++after: $r6=0x000000045ac3026b, $r31=0x8b58604d6be3e8e0 ++srli.d $r20, $r6, 18 :: ++before: $r20=0x1546fdd9fc133e39, $r6=0x74067840bb05a992 ++after: $r20=0x00001d019e102ec1, $r6=0x74067840bb05a992 ++srli.d $r28, $r20, 28 :: ++before: $r28=0xaa1f88b09e13e4c6, $r20=0x6e153faa5221e893 ++after: $r28=0x00000006e153faa5, $r20=0x6e153faa5221e893 ++srli.d $r26, $r4, 55 :: ++before: $r26=0x2ba2151c80dbea7a, $r4=0x21246f3c7063edf9 ++after: $r26=0x0000000000000042, $r4=0x21246f3c7063edf9 ++srli.d $r28, $r29, 34 :: ++before: $r28=0xcd72eff1b5aa0877, $r29=0x5d9488c1d61a1544 ++after: $r28=0x0000000017652230, $r29=0x5d9488c1d61a1544 ++srli.d $r13, $r7, 62 :: ++before: $r13=0x5953b78fbd8109a9, $r7=0x862731652b653859 ++after: $r13=0x0000000000000002, $r7=0x862731652b653859 ++srli.d $r29, $r18, 25 :: ++before: $r29=0xab821449d149a976, $r18=0xcb73553146cc4bdc ++after: $r29=0x00000065b9aa98a3, $r18=0xcb73553146cc4bdc ++srli.d $r28, $r7, 2 :: ++before: $r28=0x31272fa88123357d, $r7=0x0e9359f7a9f92ec5 ++after: $r28=0x03a4d67dea7e4bb1, $r7=0x0e9359f7a9f92ec5 ++srai.w $r26, $r23, 2 :: ++before: $r26=0xe73a55c2b7005c01, $r23=0xfcd659254f4b3fe7 ++after: $r26=0x0000000013d2cff9, $r23=0xfcd659254f4b3fe7 ++srai.w $r31, $r10, 0 :: ++before: $r31=0x2e0c4330fae0890a, $r10=0xa76ca364a204c82b ++after: $r31=0xffffffffa204c82b, $r10=0xa76ca364a204c82b ++srai.w $r31, $r8, 0 :: ++before: $r31=0x64790bb6e8674f68, $r8=0xce5594f964c4a026 ++after: $r31=0x0000000064c4a026, $r8=0xce5594f964c4a026 ++srai.w $r15, $r31, 27 :: ++before: $r15=0xccfb53c708026acd, $r31=0xce185873627515b5 ++after: $r15=0x000000000000000c, $r31=0xce185873627515b5 ++srai.w $r16, $r28, 1 :: ++before: $r16=0x994c4d22e90185a2, $r28=0x49995d51019e1050 ++after: $r16=0x0000000000cf0828, $r28=0x49995d51019e1050 ++srai.w $r13, $r16, 16 :: ++before: $r13=0x0484408b57b3ab89, $r16=0x437401347e23c399 ++after: $r13=0x0000000000007e23, $r16=0x437401347e23c399 ++srai.w $r4, $r9, 7 :: ++before: $r4=0x0d1d936105b7cca3, $r9=0xd49c3c65e292b942 ++after: $r4=0xffffffffffc52572, $r9=0xd49c3c65e292b942 ++srai.w $r24, $r15, 10 :: ++before: $r24=0xaa9377005232ec93, $r15=0xde29d0172b40f03d ++after: $r24=0x00000000000ad03c, $r15=0xde29d0172b40f03d ++srai.w $r19, $r14, 24 :: ++before: $r19=0xa49c65a4c2cde36d, $r14=0x782e0d4b8a7a28d0 ++after: $r19=0xffffffffffffff8a, $r14=0x782e0d4b8a7a28d0 ++srai.w $r24, $r27, 24 :: ++before: $r24=0x404f816ff696bbc8, $r27=0x1b6900e15f252315 ++after: $r24=0x000000000000005f, $r27=0x1b6900e15f252315 ++srai.d $r24, $r4, 22 :: ++before: $r24=0x96250384fede78c7, $r4=0x6c501d9ec5e9e731 ++after: $r24=0x000001b140767b17, $r4=0x6c501d9ec5e9e731 ++srai.d $r30, $r19, 17 :: ++before: $r30=0xcfc52d7caaf7bf47, $r19=0x82499a30d50f8b83 ++after: $r30=0xffffc124cd186a87, $r19=0x82499a30d50f8b83 ++srai.d $r12, $r12, 5 :: ++before: $r12=0x628a1a46bbe30c16, $r12=0xaba392c50d63ea53 ++after: $r12=0xfd5d1c96286b1f52, $r12=0xfd5d1c96286b1f52 ++srai.d $r24, $r9, 2 :: ++before: $r24=0x021c1bb01f0253d8, $r9=0xb35e31d92548a2fe ++after: $r24=0xecd78c76495228bf, $r9=0xb35e31d92548a2fe ++srai.d $r28, $r7, 62 :: ++before: $r28=0x2a5ac0a983332ec3, $r7=0x2297ae499a473c6d ++after: $r28=000000000000000000, $r7=0x2297ae499a473c6d ++srai.d $r8, $r17, 31 :: ++before: $r8=0xa27cf36651750e09, $r17=0x1984e046b042d0cf ++after: $r8=0x000000003309c08d, $r17=0x1984e046b042d0cf ++srai.d $r25, $r16, 37 :: ++before: $r25=0x7df3822fb20b8ded, $r16=0xb4e464563029fac8 ++after: $r25=0xfffffffffda72322, $r16=0xb4e464563029fac8 ++srai.d $r14, $r5, 57 :: ++before: $r14=0xe8c1939c13a2e6ca, $r5=0x6a22077c63497a9a ++after: $r14=0x0000000000000035, $r5=0x6a22077c63497a9a ++srai.d $r25, $r15, 10 :: ++before: $r25=0xf2df68e25cccf72e, $r15=0xe0af648201f919fc ++after: $r25=0xfff82bd920807e46, $r15=0xe0af648201f919fc ++srai.d $r6, $r15, 49 :: ++before: $r6=0xa24591b35142aa9c, $r15=0x12b20ac67de77b8d ++after: $r6=0x0000000000000959, $r15=0x12b20ac67de77b8d ++rotri.w $r18, $r6, 20 :: ++before: $r18=0xf0c65b137926ba00, $r6=0x95e0f5f057a212c5 ++after: $r18=0x00000000212c557a, $r6=0x95e0f5f057a212c5 ++rotri.w $r9, $r16, 27 :: ++before: $r9=0xe36356471d2a7e18, $r16=0xb8af3071021bd869 ++after: $r9=0x00000000437b0d20, $r16=0xb8af3071021bd869 ++rotri.w $r5, $r31, 1 :: ++before: $r5=0x5992fc9cfce2ebe9, $r31=0x6c427c821603d01a ++after: $r5=0x000000000b01e80d, $r31=0x6c427c821603d01a ++rotri.w $r27, $r13, 23 :: ++before: $r27=0x0239c57dca2ab060, $r13=0xed54e28825b25471 ++after: $r27=0x0000000064a8e24b, $r13=0xed54e28825b25471 ++rotri.w $r18, $r18, 7 :: ++before: $r18=0xb84df2305a710936, $r18=0x8aae5248c6d4973c ++after: $r18=0x00000000798da92e, $r18=0x00000000798da92e ++rotri.w $r4, $r27, 29 :: ++before: $r4=0x730e1701570ac9fc, $r27=0xd55b9d54232536e7 ++after: $r4=0x000000001929b739, $r27=0xd55b9d54232536e7 ++rotri.w $r19, $r18, 0 :: ++before: $r19=0x36dbceffa501d8dc, $r18=0x8415238fa1dd314f ++after: $r19=0xffffffffa1dd314f, $r18=0x8415238fa1dd314f ++rotri.w $r13, $r24, 21 :: ++before: $r13=0x00c1ac428ddf5193, $r24=0x3b588028fcfbb0a8 ++after: $r13=0xffffffffdd8547e7, $r24=0x3b588028fcfbb0a8 ++rotri.w $r14, $r25, 29 :: ++before: $r14=0x733414543ca8145e, $r25=0xded24831de35be08 ++after: $r14=0xfffffffff1adf046, $r25=0xded24831de35be08 ++rotri.w $r27, $r5, 8 :: ++before: $r27=0x60afaebb36d22ba0, $r5=0x0fd31a16f03582b5 ++after: $r27=0xffffffffb5f03582, $r5=0x0fd31a16f03582b5 ++rotri.d $r20, $r7, 53 :: ++before: $r20=0xe112a6d47c0444c1, $r7=0xbd9bbb91bdc381c5 ++after: $r20=0xdddc8dee1c0e2dec, $r7=0xbd9bbb91bdc381c5 ++rotri.d $r27, $r16, 62 :: ++before: $r27=0xf254a827c1ef7351, $r16=0x3de084650f757ceb ++after: $r27=0xf78211943dd5f3ac, $r16=0x3de084650f757ceb ++rotri.d $r30, $r17, 10 :: ++before: $r30=0x31c36a8c83999eb2, $r17=0x107098a9863e85d5 ++after: $r30=0x75441c262a618fa1, $r17=0x107098a9863e85d5 ++rotri.d $r29, $r8, 46 :: ++before: $r29=0xf2e7a25c121af3c3, $r8=0xb177c110c3dd3225 ++after: $r29=0x04430f74c896c5df, $r8=0xb177c110c3dd3225 ++rotri.d $r4, $r26, 45 :: ++before: $r4=0xdd94ff60f2e1abff, $r26=0xb76d3e4a0af02e4d ++after: $r4=0xf2505781726dbb69, $r26=0xb76d3e4a0af02e4d ++rotri.d $r10, $r9, 42 :: ++before: $r10=0x6064d48d901beca7, $r9=0xea20b33360134ab2 ++after: $r10=0xccd804d2acba882c, $r9=0xea20b33360134ab2 ++rotri.d $r4, $r26, 27 :: ++before: $r4=0x27f1e63c8f7f71cf, $r26=0xf4c5c8a69f37a1bd ++after: $r4=0xe6f437be98b914d3, $r26=0xf4c5c8a69f37a1bd ++rotri.d $r9, $r16, 20 :: ++before: $r9=0x7d4cb07a3ab72944, $r16=0xd5ee210421c6080e ++after: $r9=0x6080ed5ee210421c, $r16=0xd5ee210421c6080e ++rotri.d $r24, $r26, 4 :: ++before: $r24=0x1ce66a79f3e45e6f, $r26=0x6e1767144ffa6e2d ++after: $r24=0xd6e1767144ffa6e2, $r26=0x6e1767144ffa6e2d ++rotri.d $r4, $r18, 46 :: ++before: $r4=0x04173f8102b03399, $r18=0xde7066568917d899 ++after: $r4=0x995a245f626779c1, $r18=0xde7066568917d899 ++ext.w.h $r17, $r14 :: ++before: $r17=0x58af862c6fc4208d, $r14=0x6235b0cfe4eed6ed ++after: $r17=0xffffffffffffd6ed, $r14=0x6235b0cfe4eed6ed ++ext.w.h $r31, $r20 :: ++before: $r31=0x425af3dcd83fa9fd, $r20=0x6e59403101a538f1 ++after: $r31=0x00000000000038f1, $r20=0x6e59403101a538f1 ++ext.w.h $r18, $r27 :: ++before: $r18=0xcb140226bf788367, $r27=0x58a5430ee4e1616e ++after: $r18=0x000000000000616e, $r27=0x58a5430ee4e1616e ++ext.w.h $r15, $r10 :: ++before: $r15=0xd3debaf05f7d909f, $r10=0x6f7083340247fb12 ++after: $r15=0xfffffffffffffb12, $r10=0x6f7083340247fb12 ++ext.w.h $r12, $r15 :: ++before: $r12=0x5dc6f7191af80bcf, $r15=0xb1f1c8f4b11c03d9 ++after: $r12=0x00000000000003d9, $r15=0xb1f1c8f4b11c03d9 ++ext.w.h $r7, $r15 :: ++before: $r7=0x5ffe304a5c9dc9d2, $r15=0x102fb4fa33193103 ++after: $r7=0x0000000000003103, $r15=0x102fb4fa33193103 ++ext.w.h $r16, $r16 :: ++before: $r16=0x533616e37505799f, $r16=0xf988c7255086f4f5 ++after: $r16=0xfffffffffffff4f5, $r16=0xfffffffffffff4f5 ++ext.w.h $r13, $r25 :: ++before: $r13=0x805a406557ed3fac, $r25=0xdc6ce0f2993b219b ++after: $r13=0x000000000000219b, $r25=0xdc6ce0f2993b219b ++ext.w.h $r19, $r20 :: ++before: $r19=0xcc49c20125c4755d, $r20=0xde7b765222a9703a ++after: $r19=0x000000000000703a, $r20=0xde7b765222a9703a ++ext.w.h $r18, $r7 :: ++before: $r18=0xe0dd9155cbe168c6, $r7=0xc1063421eae07663 ++after: $r18=0x0000000000007663, $r7=0xc1063421eae07663 ++ext.w.b $r16, $r23 :: ++before: $r16=0x21666e814555aa02, $r23=0x926b8d68b5c40592 ++after: $r16=0xffffffffffffff92, $r23=0x926b8d68b5c40592 ++ext.w.b $r8, $r20 :: ++before: $r8=0xf68ae0a0ac497ded, $r20=0x0bfb5d489716d0c5 ++after: $r8=0xffffffffffffffc5, $r20=0x0bfb5d489716d0c5 ++ext.w.b $r24, $r15 :: ++before: $r24=0xbc84e54c82fd6e51, $r15=0x7d814b11e5eb07f6 ++after: $r24=0xfffffffffffffff6, $r15=0x7d814b11e5eb07f6 ++ext.w.b $r31, $r17 :: ++before: $r31=0x14e575a8dda1f0d3, $r17=0x6a111e663a52244c ++after: $r31=0x000000000000004c, $r17=0x6a111e663a52244c ++ext.w.b $r16, $r8 :: ++before: $r16=0x911acc218fcf640b, $r8=0xac1405ad05b23e43 ++after: $r16=0x0000000000000043, $r8=0xac1405ad05b23e43 ++ext.w.b $r28, $r8 :: ++before: $r28=0x77fb13eaa8995607, $r8=0x05c97a81f12da7d3 ++after: $r28=0xffffffffffffffd3, $r8=0x05c97a81f12da7d3 ++ext.w.b $r9, $r23 :: ++before: $r9=0xb88cfdb98683e15e, $r23=0x74893b34973e16cb ++after: $r9=0xffffffffffffffcb, $r23=0x74893b34973e16cb ++ext.w.b $r31, $r4 :: ++before: $r31=0xc7168cb4f7d079e4, $r4=0xf4fc215bc2c5273e ++after: $r31=0x000000000000003e, $r4=0xf4fc215bc2c5273e ++ext.w.b $r4, $r18 :: ++before: $r4=0x0e2e5dca4727b373, $r18=0xa1b97136f32e452b ++after: $r4=0x000000000000002b, $r18=0xa1b97136f32e452b ++ext.w.b $r8, $r29 :: ++before: $r8=0x625eb5236f483daa, $r29=0x3ceca34ee347e7c8 ++after: $r8=0xffffffffffffffc8, $r29=0x3ceca34ee347e7c8 ++clo.w $r4, $r13 :: ++before: $r4=0x0bcca747f77aca28, $r13=0x8df71972c1a17096 ++after: $r4=0x0000000000000002, $r13=0x8df71972c1a17096 ++clo.w $r27, $r5 :: ++before: $r27=0x98a9e6d99d8e84cb, $r5=0xdc59d3c8fc1540e4 ++after: $r27=0x0000000000000006, $r5=0xdc59d3c8fc1540e4 ++clo.w $r9, $r14 :: ++before: $r9=0xe8e78b162c95ed66, $r14=0xdfad6854bbf442e6 ++after: $r9=0x0000000000000001, $r14=0xdfad6854bbf442e6 ++clo.w $r13, $r26 :: ++before: $r13=0xa3db2cf80f9112cd, $r26=0x7676463dd6f13f80 ++after: $r13=0x0000000000000002, $r26=0x7676463dd6f13f80 ++clo.w $r7, $r16 :: ++before: $r7=0xb5213ab31b574031, $r16=0x478c19ebdeaa74c0 ++after: $r7=0x0000000000000002, $r16=0x478c19ebdeaa74c0 ++clo.w $r13, $r12 :: ++before: $r13=0xd68d9661284fb9d7, $r12=0x702bf24fddd8bfe0 ++after: $r13=0x0000000000000002, $r12=0x702bf24fddd8bfe0 ++clo.w $r18, $r20 :: ++before: $r18=0x510cd4002aff4c6c, $r20=0x4fc898e8b83669ee ++after: $r18=0x0000000000000001, $r20=0x4fc898e8b83669ee ++clo.w $r5, $r9 :: ++before: $r5=0x53c0de96f709208d, $r9=0x0e56d87b898438b5 ++after: $r5=0x0000000000000001, $r9=0x0e56d87b898438b5 ++clo.w $r20, $r5 :: ++before: $r20=0x96187854fcce4fd1, $r5=0xf1248bea6ed8be30 ++after: $r20=000000000000000000, $r5=0xf1248bea6ed8be30 ++clo.w $r20, $r31 :: ++before: $r20=0xb1abb4795d411683, $r31=0x01025f914a9225e6 ++after: $r20=000000000000000000, $r31=0x01025f914a9225e6 ++clz.w $r19, $r8 :: ++before: $r19=0x374348642747a8dc, $r8=0xd8ec1d547d95ada5 ++after: $r19=0x0000000000000001, $r8=0xd8ec1d547d95ada5 ++clz.w $r26, $r4 :: ++before: $r26=0x741ab4d14b9ee1f8, $r4=0x99e2ef840817cfff ++after: $r26=0x0000000000000004, $r4=0x99e2ef840817cfff ++clz.w $r17, $r4 :: ++before: $r17=0x45c9ce7217f501b3, $r4=0xa387a194cd03bcf1 ++after: $r17=000000000000000000, $r4=0xa387a194cd03bcf1 ++clz.w $r13, $r26 :: ++before: $r13=0x69707656f354d758, $r26=0xd4a8f8ab02b876b0 ++after: $r13=0x0000000000000006, $r26=0xd4a8f8ab02b876b0 ++clz.w $r25, $r13 :: ++before: $r25=0x103ce6ee41e094c3, $r13=0xd7a85bf4006e655a ++after: $r25=0x0000000000000009, $r13=0xd7a85bf4006e655a ++clz.w $r5, $r13 :: ++before: $r5=0x3910578929e7cd4a, $r13=0x93c87b02b7b1b603 ++after: $r5=000000000000000000, $r13=0x93c87b02b7b1b603 ++clz.w $r18, $r29 :: ++before: $r18=0x10639f8979feefe5, $r29=0x9d8b4b8f8493f844 ++after: $r18=000000000000000000, $r29=0x9d8b4b8f8493f844 ++clz.w $r25, $r16 :: ++before: $r25=0x7b35b3e995b3b44d, $r16=0xad953d0ae0b3e870 ++after: $r25=000000000000000000, $r16=0xad953d0ae0b3e870 ++clz.w $r6, $r25 :: ++before: $r6=0xda6cbd19f10ef86f, $r25=0x1d6665db1162cfb4 ++after: $r6=0x0000000000000003, $r25=0x1d6665db1162cfb4 ++clz.w $r5, $r12 :: ++before: $r5=0x8a6f4d6ec8d7c00d, $r12=0x19b40cb8dd8d1679 ++after: $r5=000000000000000000, $r12=0x19b40cb8dd8d1679 ++cto.w $r7, $r15 :: ++before: $r7=0x7285e9c364562d11, $r15=0x963655c7f58de520 ++after: $r7=000000000000000000, $r15=0x963655c7f58de520 ++cto.w $r4, $r15 :: ++before: $r4=0x105dceebc6d7e641, $r15=0xfc01c17baaca9c46 ++after: $r4=000000000000000000, $r15=0xfc01c17baaca9c46 ++cto.w $r31, $r28 :: ++before: $r31=0xdeff9742b93f0591, $r28=0x2cf98074b0151f33 ++after: $r31=0x0000000000000002, $r28=0x2cf98074b0151f33 ++cto.w $r13, $r8 :: ++before: $r13=0xeee665743cd218ff, $r8=0xbdd700b2535aa3b7 ++after: $r13=0x0000000000000003, $r8=0xbdd700b2535aa3b7 ++cto.w $r23, $r13 :: ++before: $r23=0x1cc22cfd7c0c869c, $r13=0x5b848b64decbee8f ++after: $r23=0x0000000000000004, $r13=0x5b848b64decbee8f ++cto.w $r12, $r18 :: ++before: $r12=0x5c32b3db803e5988, $r18=0x2d5d1ebf93b79dd0 ++after: $r12=000000000000000000, $r18=0x2d5d1ebf93b79dd0 ++cto.w $r17, $r9 :: ++before: $r17=0xc11d806786501f0e, $r9=0xd175fe2ca41bda38 ++after: $r17=000000000000000000, $r9=0xd175fe2ca41bda38 ++cto.w $r24, $r16 :: ++before: $r24=0x504f9b43af62e2ad, $r16=0xfce545d98e2361da ++after: $r24=000000000000000000, $r16=0xfce545d98e2361da ++cto.w $r24, $r8 :: ++before: $r24=0xc13ac5668538f5a4, $r8=0x3096912e575d64db ++after: $r24=0x0000000000000002, $r8=0x3096912e575d64db ++cto.w $r27, $r17 :: ++before: $r27=0xd27f68629dd8d4fb, $r17=0x15ac43632e175a8b ++after: $r27=0x0000000000000002, $r17=0x15ac43632e175a8b ++ctz.w $r8, $r12 :: ++before: $r8=0xfc9bd3736a3c08bd, $r12=0xaebba33c2e268daa ++after: $r8=0x0000000000000001, $r12=0xaebba33c2e268daa ++ctz.w $r5, $r27 :: ++before: $r5=0x5dc8af7bac7db01a, $r27=0xabce2f0e113597aa ++after: $r5=0x0000000000000001, $r27=0xabce2f0e113597aa ++ctz.w $r18, $r6 :: ++before: $r18=0xe4ac5b59d8442dfe, $r6=0x935d1b694e96bd04 ++after: $r18=0x0000000000000002, $r6=0x935d1b694e96bd04 ++ctz.w $r9, $r15 :: ++before: $r9=0x9b760f465efbb52e, $r15=0x834c9974dba65d99 ++after: $r9=000000000000000000, $r15=0x834c9974dba65d99 ++ctz.w $r13, $r7 :: ++before: $r13=0x95b5748f5f8bfb38, $r7=0x75dd7a9890cdf2d9 ++after: $r13=000000000000000000, $r7=0x75dd7a9890cdf2d9 ++ctz.w $r29, $r17 :: ++before: $r29=0xa25119fd892d1b20, $r17=0x38c12e795dc52acf ++after: $r29=000000000000000000, $r17=0x38c12e795dc52acf ++ctz.w $r15, $r12 :: ++before: $r15=0x95c2ce0f0446807c, $r12=0x623a5915ac8164b2 ++after: $r15=0x0000000000000001, $r12=0x623a5915ac8164b2 ++ctz.w $r6, $r17 :: ++before: $r6=0xd9034892a300dca8, $r17=0x5911fea4e6ce1df3 ++after: $r6=000000000000000000, $r17=0x5911fea4e6ce1df3 ++ctz.w $r10, $r25 :: ++before: $r10=0xda1e0d0eb34884ab, $r25=0x8d70d49a10ba8968 ++after: $r10=0x0000000000000003, $r25=0x8d70d49a10ba8968 ++ctz.w $r14, $r13 :: ++before: $r14=0x207d275c076e5247, $r13=0xd243debc9b557922 ++after: $r14=0x0000000000000001, $r13=0xd243debc9b557922 ++clo.d $r7, $r16 :: ++before: $r7=0x9432ccd773e86812, $r16=0x9f921ea959c97c2b ++after: $r7=0x0000000000000001, $r16=0x9f921ea959c97c2b ++clo.d $r7, $r12 :: ++before: $r7=0xaf19ef0b422b09bf, $r12=0x8773ec5c72444fe2 ++after: $r7=0x0000000000000001, $r12=0x8773ec5c72444fe2 ++clo.d $r5, $r10 :: ++before: $r5=0xa2912bc0ca36fa58, $r10=0x2c93a7506a8979b7 ++after: $r5=000000000000000000, $r10=0x2c93a7506a8979b7 ++clo.d $r7, $r28 :: ++before: $r7=0x69dd3f71121c7380, $r28=0x1784b7c2c7558b4a ++after: $r7=000000000000000000, $r28=0x1784b7c2c7558b4a ++clo.d $r15, $r9 :: ++before: $r15=0x95b40b42f113cecc, $r9=0xf0cdb7b9c17bb9e1 ++after: $r15=0x0000000000000004, $r9=0xf0cdb7b9c17bb9e1 ++clo.d $r9, $r27 :: ++before: $r9=0x1961ee1499945d08, $r27=0x23c7a2252c1cbc78 ++after: $r9=000000000000000000, $r27=0x23c7a2252c1cbc78 ++clo.d $r30, $r19 :: ++before: $r30=0xda0aa8b04f719a51, $r19=0x8f93c7a1b3cc9f12 ++after: $r30=0x0000000000000001, $r19=0x8f93c7a1b3cc9f12 ++clo.d $r26, $r20 :: ++before: $r26=0xdd4f62bfe1237a28, $r20=0xd61c7bfe05165d04 ++after: $r26=0x0000000000000002, $r20=0xd61c7bfe05165d04 ++clo.d $r26, $r6 :: ++before: $r26=0x44a1378e22d6ec81, $r6=0x1b21543ee9abd103 ++after: $r26=000000000000000000, $r6=0x1b21543ee9abd103 ++clo.d $r24, $r16 :: ++before: $r24=0x51efcf6ef8eb9917, $r16=0x602cbdf020ee6da8 ++after: $r24=000000000000000000, $r16=0x602cbdf020ee6da8 ++clz.d $r27, $r7 :: ++before: $r27=0x91df318f7b476077, $r7=0x6ca0b9cf9bb84c4a ++after: $r27=0x0000000000000001, $r7=0x6ca0b9cf9bb84c4a ++clz.d $r19, $r30 :: ++before: $r19=0x435d7fb412d9c12c, $r30=0xc926e58bdb46104e ++after: $r19=000000000000000000, $r30=0xc926e58bdb46104e ++clz.d $r12, $r30 :: ++before: $r12=0x906b06441b2ef62b, $r30=0x04b9b91966077ef0 ++after: $r12=0x0000000000000005, $r30=0x04b9b91966077ef0 ++clz.d $r28, $r6 :: ++before: $r28=0x28bb3e3324f33e14, $r6=0x7628cd8752be6223 ++after: $r28=0x0000000000000001, $r6=0x7628cd8752be6223 ++clz.d $r14, $r15 :: ++before: $r14=0xb7a5ae04bf2e60c0, $r15=0x41a328a79afda305 ++after: $r14=0x0000000000000001, $r15=0x41a328a79afda305 ++clz.d $r4, $r23 :: ++before: $r4=0x5fd8327a265b1a3b, $r23=0x66b92d8b5b842d4a ++after: $r4=0x0000000000000001, $r23=0x66b92d8b5b842d4a ++clz.d $r18, $r29 :: ++before: $r18=0x73df6808e38c72ad, $r29=0x6b91b11261dd26b6 ++after: $r18=0x0000000000000001, $r29=0x6b91b11261dd26b6 ++clz.d $r13, $r8 :: ++before: $r13=0xd8d2dbd71d1783ad, $r8=0xdc50b7586ccab6a1 ++after: $r13=000000000000000000, $r8=0xdc50b7586ccab6a1 ++clz.d $r17, $r10 :: ++before: $r17=0xee6f842bb7686b8d, $r10=0xdf52e003cd95f02f ++after: $r17=000000000000000000, $r10=0xdf52e003cd95f02f ++clz.d $r13, $r8 :: ++before: $r13=0x91e717aef96cc046, $r8=0x5dd0743ed560ba78 ++after: $r13=0x0000000000000001, $r8=0x5dd0743ed560ba78 ++cto.d $r31, $r5 :: ++before: $r31=0xf361d5d1fb232769, $r5=0x1530b67240d804cf ++after: $r31=0x0000000000000004, $r5=0x1530b67240d804cf ++cto.d $r5, $r26 :: ++before: $r5=0xbedb393d17f69d40, $r26=0xcef56269ef7aecda ++after: $r5=000000000000000000, $r26=0xcef56269ef7aecda ++cto.d $r5, $r31 :: ++before: $r5=0xadd75db878cdbf84, $r31=0x8e08acc65c97f0b2 ++after: $r5=000000000000000000, $r31=0x8e08acc65c97f0b2 ++cto.d $r31, $r31 :: ++before: $r31=0x6a8a89827e4929f9, $r31=0x7df0f59d97924bb3 ++after: $r31=0x0000000000000002, $r31=0x0000000000000002 ++cto.d $r14, $r30 :: ++before: $r14=0xefb0874ef3600b6d, $r30=0x97a4b45ab971a548 ++after: $r14=000000000000000000, $r30=0x97a4b45ab971a548 ++cto.d $r5, $r17 :: ++before: $r5=0x144271fb49c8d2d8, $r17=0x787e6dbb4fec4d21 ++after: $r5=0x0000000000000001, $r17=0x787e6dbb4fec4d21 ++cto.d $r28, $r20 :: ++before: $r28=0xd6d0953d2a12c998, $r20=0xafd578caad0dfa09 ++after: $r28=0x0000000000000001, $r20=0xafd578caad0dfa09 ++cto.d $r16, $r18 :: ++before: $r16=0x0de650be54a7990c, $r18=0x3ea8f45e10441829 ++after: $r16=0x0000000000000001, $r18=0x3ea8f45e10441829 ++cto.d $r15, $r16 :: ++before: $r15=0x0bbd328743f49a86, $r16=0x5cafc638b6b509be ++after: $r15=000000000000000000, $r16=0x5cafc638b6b509be ++cto.d $r6, $r20 :: ++before: $r6=0x598ee27859cf8d0e, $r20=0x4bce530e537ad762 ++after: $r6=000000000000000000, $r20=0x4bce530e537ad762 ++ctz.d $r14, $r28 :: ++before: $r14=0xf2e4d886a8fd3fe3, $r28=0x0dafbabdfefac692 ++after: $r14=0x0000000000000001, $r28=0x0dafbabdfefac692 ++ctz.d $r6, $r27 :: ++before: $r6=0xe005a6a20d44fbca, $r27=0xe000ac4f4cfb2ce2 ++after: $r6=0x0000000000000001, $r27=0xe000ac4f4cfb2ce2 ++ctz.d $r15, $r26 :: ++before: $r15=0x871c2ccd50ec0784, $r26=0xa82b0d96dd72f11c ++after: $r15=0x0000000000000002, $r26=0xa82b0d96dd72f11c ++ctz.d $r17, $r20 :: ++before: $r17=0xebe7d9f4ec5055d5, $r20=0x65575957936d1d6e ++after: $r17=0x0000000000000001, $r20=0x65575957936d1d6e ++ctz.d $r19, $r8 :: ++before: $r19=0x394effa243e5f14c, $r8=0xf6852349a7b00561 ++after: $r19=000000000000000000, $r8=0xf6852349a7b00561 ++ctz.d $r5, $r9 :: ++before: $r5=0x3c67392fc408e9db, $r9=0xeff4bf8e886d7cc3 ++after: $r5=000000000000000000, $r9=0xeff4bf8e886d7cc3 ++ctz.d $r31, $r15 :: ++before: $r31=0xbf5435775bd0435b, $r15=0x19760246c8d1d680 ++after: $r31=0x0000000000000007, $r15=0x19760246c8d1d680 ++ctz.d $r9, $r5 :: ++before: $r9=0x0ccde230362ce06a, $r5=0x7590c6e73077c2bc ++after: $r9=0x0000000000000002, $r5=0x7590c6e73077c2bc ++ctz.d $r28, $r25 :: ++before: $r28=0x2518777b06d608a0, $r25=0xb87647dad481ba32 ++after: $r28=0x0000000000000001, $r25=0xb87647dad481ba32 ++ctz.d $r23, $r19 :: ++before: $r23=0xbe232a9fe2090e75, $r19=0x2dceda5cdc990d2e ++after: $r23=0x0000000000000001, $r19=0x2dceda5cdc990d2e ++revb.2h $r29, $r30 :: ++before: $r29=0x75397084990a0745, $r30=0x0d4c83f5966c1c17 ++after: $r29=0x000000006c96171c, $r30=0x0d4c83f5966c1c17 ++revb.2h $r17, $r23 :: ++before: $r17=0xecfbee2a69bbe344, $r23=0x5a42dc5dc5705f68 ++after: $r17=0x0000000070c5685f, $r23=0x5a42dc5dc5705f68 ++revb.2h $r6, $r14 :: ++before: $r6=0xbfeffdbd68845522, $r14=0x3490af5b50fd56bf ++after: $r6=0xfffffffffd50bf56, $r14=0x3490af5b50fd56bf ++revb.2h $r13, $r6 :: ++before: $r13=0x58e1821d319a1598, $r6=0x4c6711d021a72be6 ++after: $r13=0xffffffffa721e62b, $r6=0x4c6711d021a72be6 ++revb.2h $r18, $r8 :: ++before: $r18=0x6e14994d4e16ff86, $r8=0x9fda01513ab5ceb8 ++after: $r18=0xffffffffb53ab8ce, $r8=0x9fda01513ab5ceb8 ++revb.2h $r7, $r30 :: ++before: $r7=0x9979d3a3fcfc9323, $r30=0x504c708535bc136f ++after: $r7=0xffffffffbc356f13, $r30=0x504c708535bc136f ++revb.2h $r28, $r19 :: ++before: $r28=0x9daf4aa3a33eec5f, $r19=0xaa376fc54f4be6f5 ++after: $r28=0x000000004b4ff5e6, $r19=0xaa376fc54f4be6f5 ++revb.2h $r30, $r8 :: ++before: $r30=0x2e0bba43ec83e59e, $r8=0xaee8b8acd436f6da ++after: $r30=0x0000000036d4daf6, $r8=0xaee8b8acd436f6da ++revb.2h $r14, $r7 :: ++before: $r14=0x9634787c9be10863, $r7=0xe9da521d42716c0a ++after: $r14=0x0000000071420a6c, $r7=0xe9da521d42716c0a ++revb.2h $r23, $r14 :: ++before: $r23=0x687b89225667081a, $r14=0x9089e36a4f12f9c6 ++after: $r23=0x00000000124fc6f9, $r14=0x9089e36a4f12f9c6 ++revb.4h $r4, $r25 :: ++before: $r4=0xc42859bd06b669d2, $r25=0x782e4ae6ab812191 ++after: $r4=0x2e78e64a81ab9121, $r25=0x782e4ae6ab812191 ++revb.4h $r18, $r19 :: ++before: $r18=0x45ca4499d789fe5b, $r19=0x6e558c98b95d346d ++after: $r18=0x556e988c5db96d34, $r19=0x6e558c98b95d346d ++revb.4h $r24, $r10 :: ++before: $r24=0x2d04871fd753c43f, $r10=0xbeab033e2b5a979e ++after: $r24=0xabbe3e035a2b9e97, $r10=0xbeab033e2b5a979e ++revb.4h $r24, $r8 :: ++before: $r24=0xbc4deb39fb2ffe2e, $r8=0x5e3e50b8025e77f3 ++after: $r24=0x3e5eb8505e02f377, $r8=0x5e3e50b8025e77f3 ++revb.4h $r7, $r14 :: ++before: $r7=0xf44a6ea6f42e0918, $r14=0x9f617a848e4ad8f2 ++after: $r7=0x619f847a4a8ef2d8, $r14=0x9f617a848e4ad8f2 ++revb.4h $r13, $r12 :: ++before: $r13=0xda815ff8648e92b9, $r12=0xa401e74c4dd88e12 ++after: $r13=0x01a44ce7d84d128e, $r12=0xa401e74c4dd88e12 ++revb.4h $r31, $r19 :: ++before: $r31=0x7964d861d2ecb8d5, $r19=0xe402e87f73fb4c68 ++after: $r31=0x02e47fe8fb73684c, $r19=0xe402e87f73fb4c68 ++revb.4h $r29, $r25 :: ++before: $r29=0x6beff3fa6167cdcc, $r25=0x11e350b71aee0229 ++after: $r29=0xe311b750ee1a2902, $r25=0x11e350b71aee0229 ++revb.4h $r4, $r8 :: ++before: $r4=0x357a56e8ae275376, $r8=0xdf8ebc175f4be7e3 ++after: $r4=0x8edf17bc4b5fe3e7, $r8=0xdf8ebc175f4be7e3 ++revb.4h $r15, $r27 :: ++before: $r15=0xeb11b29acfe397d6, $r27=0x42d231083cd97aa0 ++after: $r15=0xd2420831d93ca07a, $r27=0x42d231083cd97aa0 ++revb.2w $r27, $r31 :: ++before: $r27=0x0978f867dd7f0cb8, $r31=0x19eec2d357cd6a06 ++after: $r27=0xd3c2ee19066acd57, $r31=0x19eec2d357cd6a06 ++revb.2w $r10, $r10 :: ++before: $r10=0x7897a40c4fda96d5, $r10=0xcb849783a18de892 ++after: $r10=0x839784cb92e88da1, $r10=0x839784cb92e88da1 ++revb.2w $r23, $r14 :: ++before: $r23=0x018338c734be53a1, $r14=0x6258664ec1bb96b8 ++after: $r23=0x4e665862b896bbc1, $r14=0x6258664ec1bb96b8 ++revb.2w $r12, $r19 :: ++before: $r12=0x7417ec4fef3451cc, $r19=0x216ad32ee149542b ++after: $r12=0x2ed36a212b5449e1, $r19=0x216ad32ee149542b ++revb.2w $r31, $r30 :: ++before: $r31=0x8132835b9905b650, $r30=0x6fac007fbefdecf2 ++after: $r31=0x7f00ac6ff2ecfdbe, $r30=0x6fac007fbefdecf2 ++revb.2w $r25, $r10 :: ++before: $r25=0x7336ebe375c83bed, $r10=0x643f76ac3010a6bb ++after: $r25=0xac763f64bba61030, $r10=0x643f76ac3010a6bb ++revb.2w $r31, $r29 :: ++before: $r31=0x5d99f79f18e805b8, $r29=0xe65e70ca4cf299fa ++after: $r31=0xca705ee6fa99f24c, $r29=0xe65e70ca4cf299fa ++revb.2w $r30, $r19 :: ++before: $r30=0xec10dd6d7249c5fa, $r19=0x3f6bb22d66caf299 ++after: $r30=0x2db26b3f99f2ca66, $r19=0x3f6bb22d66caf299 ++revb.2w $r6, $r30 :: ++before: $r6=0x2c394783817c0870, $r30=0xd823cff07efd78db ++after: $r6=0xf0cf23d8db78fd7e, $r30=0xd823cff07efd78db ++revb.2w $r4, $r15 :: ++before: $r4=0xc5acf61f075cd4e4, $r15=0xc154dd7479b90c6c ++after: $r4=0x74dd54c16c0cb979, $r15=0xc154dd7479b90c6c ++revb.d $r6, $r23 :: ++before: $r6=0xe6e05a0dafda37ce, $r23=0x2ac7d047f197f6fb ++after: $r6=0xfbf697f147d0c72a, $r23=0x2ac7d047f197f6fb ++revb.d $r19, $r4 :: ++before: $r19=0xc07a757bea6011ff, $r4=0xcef6cef3e0f941ff ++after: $r19=0xff41f9e0f3cef6ce, $r4=0xcef6cef3e0f941ff ++revb.d $r6, $r15 :: ++before: $r6=0x711bb31e18fcb2f3, $r15=0x522068042cf5be1a ++after: $r6=0x1abef52c04682052, $r15=0x522068042cf5be1a ++revb.d $r9, $r7 :: ++before: $r9=0xf9654c655c67392e, $r7=0xa1b065742110e3f4 ++after: $r9=0xf4e310217465b0a1, $r7=0xa1b065742110e3f4 ++revb.d $r29, $r4 :: ++before: $r29=0x70c0dcad23609060, $r4=0x5d04b7b2ece6f6bb ++after: $r29=0xbbf6e6ecb2b7045d, $r4=0x5d04b7b2ece6f6bb ++revb.d $r15, $r4 :: ++before: $r15=0x809930516f3136eb, $r4=0xda33327a8d42ef55 ++after: $r15=0x55ef428d7a3233da, $r4=0xda33327a8d42ef55 ++revb.d $r10, $r4 :: ++before: $r10=0x1a7ee04b354f6af5, $r4=0xcda6c6943e46fed7 ++after: $r10=0xd7fe463e94c6a6cd, $r4=0xcda6c6943e46fed7 ++revb.d $r20, $r4 :: ++before: $r20=0x315f95452d748459, $r4=0xa001e934745758e0 ++after: $r20=0xe058577434e901a0, $r4=0xa001e934745758e0 ++revb.d $r6, $r8 :: ++before: $r6=0xabbd06000374627a, $r8=0x85441006689de89b ++after: $r6=0x9be89d6806104485, $r8=0x85441006689de89b ++revb.d $r27, $r24 :: ++before: $r27=0x2d404e69f54afa48, $r24=0x46f47b822772f3cd ++after: $r27=0xcdf37227827bf446, $r24=0x46f47b822772f3cd ++revh.2w $r6, $r15 :: ++before: $r6=0x5b764c7bfb1999eb, $r15=0x86603fc3f96843ed ++after: $r6=0x3fc3866043edf968, $r15=0x86603fc3f96843ed ++revh.2w $r19, $r10 :: ++before: $r19=0xf39f8e6b43dd63ce, $r10=0x141d294d06276941 ++after: $r19=0x294d141d69410627, $r10=0x141d294d06276941 ++revh.2w $r5, $r20 :: ++before: $r5=0x3ff54e5c35d83e69, $r20=0xd677d6a21384278a ++after: $r5=0xd6a2d677278a1384, $r20=0xd677d6a21384278a ++revh.2w $r4, $r31 :: ++before: $r4=0xce463b02a2f840cc, $r31=0x6f87c9636f9cfca6 ++after: $r4=0xc9636f87fca66f9c, $r31=0x6f87c9636f9cfca6 ++revh.2w $r19, $r26 :: ++before: $r19=0x34abc96ddde64e27, $r26=0x723ec7ce92720502 ++after: $r19=0xc7ce723e05029272, $r26=0x723ec7ce92720502 ++revh.2w $r8, $r18 :: ++before: $r8=0x1454a1ee8739c235, $r18=0xd890efa373a6dfb0 ++after: $r8=0xefa3d890dfb073a6, $r18=0xd890efa373a6dfb0 ++revh.2w $r12, $r31 :: ++before: $r12=0xf0c8b856751cae70, $r31=0xb675dff2568e6ebf ++after: $r12=0xdff2b6756ebf568e, $r31=0xb675dff2568e6ebf ++revh.2w $r24, $r9 :: ++before: $r24=0xb36984e3a7a3eaea, $r9=0xa169cfa9f35f6a8a ++after: $r24=0xcfa9a1696a8af35f, $r9=0xa169cfa9f35f6a8a ++revh.2w $r25, $r27 :: ++before: $r25=0x640b3e6b41180473, $r27=0x9bc307f0a2ef368f ++after: $r25=0x07f09bc3368fa2ef, $r27=0x9bc307f0a2ef368f ++revh.2w $r7, $r9 :: ++before: $r7=0x897e1406a0eb2dc9, $r9=0x1921bcf657fecdcc ++after: $r7=0xbcf61921cdcc57fe, $r9=0x1921bcf657fecdcc ++revh.d $r14, $r25 :: ++before: $r14=0xec3573411ea025e5, $r25=0x6976d4371b08f1ab ++after: $r14=0xf1ab1b08d4376976, $r25=0x6976d4371b08f1ab ++revh.d $r24, $r31 :: ++before: $r24=0x9ef9e5cb1375d42a, $r31=0x9ce130c8a579e11d ++after: $r24=0xe11da57930c89ce1, $r31=0x9ce130c8a579e11d ++revh.d $r9, $r28 :: ++before: $r9=0x3c8cd0055a5e7031, $r28=0xf05f9381753ded16 ++after: $r9=0xed16753d9381f05f, $r28=0xf05f9381753ded16 ++revh.d $r24, $r26 :: ++before: $r24=0x6a4e5797f19041f6, $r26=0xd26a5ae65e21041c ++after: $r24=0x041c5e215ae6d26a, $r26=0xd26a5ae65e21041c ++revh.d $r14, $r24 :: ++before: $r14=0xe2cb9a83aee22d97, $r24=0x6405d71e0bb63321 ++after: $r14=0x33210bb6d71e6405, $r24=0x6405d71e0bb63321 ++revh.d $r19, $r23 :: ++before: $r19=0x91cdf3bcd9afe76d, $r23=0x171953826107396a ++after: $r19=0x396a610753821719, $r23=0x171953826107396a ++revh.d $r23, $r14 :: ++before: $r23=0x93ed49255d084e12, $r14=0x374bd76990198b43 ++after: $r23=0x8b439019d769374b, $r14=0x374bd76990198b43 ++revh.d $r31, $r12 :: ++before: $r31=0x08e54a908f04882b, $r12=0xf7e8756491b9d346 ++after: $r31=0xd34691b97564f7e8, $r12=0xf7e8756491b9d346 ++revh.d $r31, $r20 :: ++before: $r31=0xbb7cd34502fdf01f, $r20=0x906b7289a6957d3f ++after: $r31=0x7d3fa6957289906b, $r20=0x906b7289a6957d3f ++revh.d $r27, $r30 :: ++before: $r27=0xacbca1aacdd9dd3f, $r30=0x3072d9c69004d4b5 ++after: $r27=0xd4b59004d9c63072, $r30=0x3072d9c69004d4b5 ++bitrev.4b $r23, $r19 :: ++before: $r23=0xb422f2854b491d92, $r19=0x7649084cec69098a ++after: $r23=0x0000000037969051, $r19=0x7649084cec69098a ++bitrev.4b $r27, $r16 :: ++before: $r27=0xd14736328d74b448, $r16=0x1abee3a271c71db9 ++after: $r27=0xffffffff8ee3b89d, $r16=0x1abee3a271c71db9 ++bitrev.4b $r15, $r23 :: ++before: $r15=0xf17c0f0ccfbb2c38, $r23=0x490107ff4155bd17 ++after: $r15=0xffffffff82aabde8, $r23=0x490107ff4155bd17 ++bitrev.4b $r5, $r18 :: ++before: $r5=0x8408d6a30523619d, $r18=0x625d5aedf0add9fb ++after: $r5=0x000000000fb59bdf, $r18=0x625d5aedf0add9fb ++bitrev.4b $r8, $r15 :: ++before: $r8=0xc41a2fdb60ba75a6, $r15=0xe2562eab3b333a00 ++after: $r8=0xffffffffdccc5c00, $r15=0xe2562eab3b333a00 ++bitrev.4b $r17, $r18 :: ++before: $r17=0x6a409394f364c02a, $r18=0xea970d90edb343cc ++after: $r17=0xffffffffb7cdc233, $r18=0xea970d90edb343cc ++bitrev.4b $r25, $r29 :: ++before: $r25=0xd8d1c9b8dcff266d, $r29=0xacca47ac7597ca65 ++after: $r25=0xffffffffaee953a6, $r29=0xacca47ac7597ca65 ++bitrev.4b $r26, $r24 :: ++before: $r26=0xe2a0d11df8c5055b, $r24=0xc57559d03e3e216d ++after: $r26=0x000000007c7c84b6, $r24=0xc57559d03e3e216d ++bitrev.4b $r8, $r27 :: ++before: $r8=0xb6a5815170d657f0, $r27=0x9f60901eefa1347a ++after: $r8=0xfffffffff7852c5e, $r27=0x9f60901eefa1347a ++bitrev.4b $r20, $r16 :: ++before: $r20=0x432a2fbf2b073732, $r16=0x604b8d7ecb5e86dc ++after: $r20=0xffffffffd37a613b, $r16=0x604b8d7ecb5e86dc ++bitrev.8b $r25, $r7 :: ++before: $r25=0x22b2e6007f742fd1, $r7=0xe8c23886def1bbc9 ++after: $r25=0x17431c617b8fdd93, $r7=0xe8c23886def1bbc9 ++bitrev.8b $r28, $r30 :: ++before: $r28=0xf985d7779c5ca157, $r30=0x285cbdc0f47395d1 ++after: $r28=0x143abd032fcea98b, $r30=0x285cbdc0f47395d1 ++bitrev.8b $r29, $r13 :: ++before: $r29=0xd9b8364a793bc50c, $r13=0xded35d7c7ba73d29 ++after: $r29=0x7bcbba3edee5bc94, $r13=0xded35d7c7ba73d29 ++bitrev.8b $r12, $r28 :: ++before: $r12=0x18d7769bc1147dc5, $r28=0xfb6cda8c7f12313a ++after: $r12=0xdf365b31fe488c5c, $r28=0xfb6cda8c7f12313a ++bitrev.8b $r23, $r6 :: ++before: $r23=0xeff84dc134b3acbe, $r6=0xee7c4e89e333eda8 ++after: $r23=0x773e7291c7ccb715, $r6=0xee7c4e89e333eda8 ++bitrev.8b $r24, $r20 :: ++before: $r24=0xad65748f0bc46e9f, $r20=0xd0d88137a6284eac ++after: $r24=0x0b1b81ec65147235, $r20=0xd0d88137a6284eac ++bitrev.8b $r10, $r5 :: ++before: $r10=0xe0e1c1e262352e89, $r5=0x9c43ebc4f7c65dc1 ++after: $r10=0x39c2d723ef63ba83, $r5=0x9c43ebc4f7c65dc1 ++bitrev.8b $r27, $r13 :: ++before: $r27=0x444a53aa65d317dc, $r13=0x473eea7ea5691da7 ++after: $r27=0xe27c577ea596b8e5, $r13=0x473eea7ea5691da7 ++bitrev.8b $r13, $r9 :: ++before: $r13=0xfc48d0fdf4c7a6e5, $r9=0x5dcad407df3401a5 ++after: $r13=0xba532be0fb2c80a5, $r9=0x5dcad407df3401a5 ++bitrev.8b $r12, $r5 :: ++before: $r12=0x0ebef32fcbd91e9a, $r5=0xe1eeea527816355e ++after: $r12=0x8777574a1e68ac7a, $r5=0xe1eeea527816355e ++bitrev.w $r18, $r15 :: ++before: $r18=0x02028b0c8691a767, $r15=0x5822df2950c9c2d3 ++after: $r18=0xffffffffcb43930a, $r15=0x5822df2950c9c2d3 ++bitrev.w $r30, $r27 :: ++before: $r30=0x2a2d48209d9f377b, $r27=0xde9d59b836df41fc ++after: $r30=0x000000003f82fb6c, $r27=0xde9d59b836df41fc ++bitrev.w $r17, $r4 :: ++before: $r17=0xe6fb8b07c90464e6, $r4=0x65976cb5c6c6a5b0 ++after: $r17=0x000000000da56363, $r4=0x65976cb5c6c6a5b0 ++bitrev.w $r9, $r31 :: ++before: $r9=0x1b95159ec5c37644, $r31=0x62c549b741c2adad ++after: $r9=0xffffffffb5b54382, $r31=0x62c549b741c2adad ++bitrev.w $r17, $r14 :: ++before: $r17=0x8b414dfa7156f0ce, $r14=0x9642d0186f420e7c ++after: $r17=0x000000003e7042f6, $r14=0x9642d0186f420e7c ++bitrev.w $r15, $r8 :: ++before: $r15=0x2722ecb374b4d5e3, $r8=0xeaf151a286bbc4cf ++after: $r15=0xfffffffff323dd61, $r8=0xeaf151a286bbc4cf ++bitrev.w $r27, $r19 :: ++before: $r27=0x058ec913c63634a5, $r19=0xe723c39df96a4fd2 ++after: $r27=0x000000004bf2569f, $r19=0xe723c39df96a4fd2 ++bitrev.w $r7, $r26 :: ++before: $r7=0xa245e7dd80a324a2, $r26=0x0e7d6c2b2683291e ++after: $r7=0x000000007894c164, $r26=0x0e7d6c2b2683291e ++bitrev.w $r31, $r6 :: ++before: $r31=0x114292ed02ba1255, $r6=0x13cd62afac5ac3d4 ++after: $r31=0x000000002bc35a35, $r6=0x13cd62afac5ac3d4 ++bitrev.w $r7, $r25 :: ++before: $r7=0xbd46d88fc8d2933b, $r25=0x69ce9ccb487dadd1 ++after: $r7=0xffffffff8bb5be12, $r25=0x69ce9ccb487dadd1 ++bitrev.d $r4, $r29 :: ++before: $r4=0xeaacaeb60b227eab, $r29=0x799f36da44887e2c ++after: $r4=0x347e11225b6cf99e, $r29=0x799f36da44887e2c ++bitrev.d $r29, $r6 :: ++before: $r29=0xcfbb055ab1ebf7fa, $r6=0x2924f63fec744b02 ++after: $r29=0x40d22e37fc6f2494, $r6=0x2924f63fec744b02 ++bitrev.d $r28, $r31 :: ++before: $r28=0xaac74a398d76900d, $r31=0xf6c75e45e33b4cb7 ++after: $r28=0xed32dcc7a27ae36f, $r31=0xf6c75e45e33b4cb7 ++bitrev.d $r24, $r12 :: ++before: $r24=0xfc8bc33fb4a8d023, $r12=0xcccd98e9d53aa26a ++after: $r24=0x56455cab9719b333, $r12=0xcccd98e9d53aa26a ++bitrev.d $r8, $r7 :: ++before: $r8=0x7502cd68289f4c3a, $r7=0x746ddfd3c3a512b1 ++after: $r8=0x8d48a5c3cbfbb62e, $r7=0x746ddfd3c3a512b1 ++bitrev.d $r6, $r16 :: ++before: $r6=0xe8b94bfe615774ae, $r16=0x518770bbee53d619 ++after: $r6=0x986bca77dd0ee18a, $r16=0x518770bbee53d619 ++bitrev.d $r24, $r4 :: ++before: $r24=0x6318c17dbae816c3, $r4=0x9ab684e129b57f07 ++after: $r24=0xe0fead9487216d59, $r4=0x9ab684e129b57f07 ++bitrev.d $r27, $r23 :: ++before: $r27=0x8a22909b005a86b8, $r23=0x69337e8c3b1fc2bb ++after: $r27=0xdd43f8dc317ecc96, $r23=0x69337e8c3b1fc2bb ++bitrev.d $r20, $r9 :: ++before: $r20=0x009f43885d40caf0, $r9=0x193cbf609dbc33d4 ++after: $r20=0x2bcc3db906fd3c98, $r9=0x193cbf609dbc33d4 ++bitrev.d $r30, $r19 :: ++before: $r30=0x30fa02e0fc390ac9, $r19=0x21686c931c6260da ++after: $r30=0x5b064638c9361684, $r19=0x21686c931c6260da ++bytepick.w $r26, $r15, $r19, 1 :: ++before: $r26=0x1b0b980dd3271273, $r15=0x8737ca6c8106ceee, $r19=0x02807e0dcb47d6ef ++after: $r26=0x0000000047d6ef81, $r15=0x8737ca6c8106ceee, $r19=0x02807e0dcb47d6ef ++bytepick.w $r15, $r17, $r7, 0 :: ++before: $r15=0x3d2e3fbcbd032001, $r17=0x5eced8cf3da8b205, $r7=0xb8155b41321e09c0 ++after: $r15=0x00000000321e09c0, $r17=0x5eced8cf3da8b205, $r7=0xb8155b41321e09c0 ++bytepick.w $r12, $r15, $r17, 3 :: ++before: $r12=0x2670c80f12a87520, $r15=0x29ab42125e3ea5c8, $r17=0x32a39ac435460f2f ++after: $r12=0x000000002f5e3ea5, $r15=0x29ab42125e3ea5c8, $r17=0x32a39ac435460f2f ++bytepick.w $r4, $r20, $r18, 3 :: ++before: $r4=0x5a64271926277c04, $r20=0xcbde225cc736e5d5, $r18=0x18abacc874db47e9 ++after: $r4=0xffffffffe9c736e5, $r20=0xcbde225cc736e5d5, $r18=0x18abacc874db47e9 ++bytepick.w $r8, $r5, $r24, 3 :: ++before: $r8=0xdb41606ce3f9df94, $r5=0xc3f6ce370d754a3f, $r24=0x34ad5a423a5c42e3 ++after: $r8=0xffffffffe30d754a, $r5=0xc3f6ce370d754a3f, $r24=0x34ad5a423a5c42e3 ++bytepick.w $r5, $r30, $r14, 2 :: ++before: $r5=0xedb3aad221050d0b, $r30=0x46f5823389f2581a, $r14=0xf766f1e75349809e ++after: $r5=0xffffffff809e89f2, $r30=0x46f5823389f2581a, $r14=0xf766f1e75349809e ++bytepick.w $r4, $r19, $r18, 0 :: ++before: $r4=0xf92ed0231f25c991, $r19=0xba59df0352ed6b3e, $r18=0x58d6fbce4e4325e8 ++after: $r4=0x000000004e4325e8, $r19=0xba59df0352ed6b3e, $r18=0x58d6fbce4e4325e8 ++bytepick.w $r18, $r28, $r24, 3 :: ++before: $r18=0x177dcaf8fcd30180, $r28=0xbdc04b3b8f707462, $r24=0x6102168606deb3ed ++after: $r18=0xffffffffed8f7074, $r28=0xbdc04b3b8f707462, $r24=0x6102168606deb3ed ++bytepick.w $r13, $r27, $r29, 2 :: ++before: $r13=0x383d82c5d717259b, $r27=0x495e30e5e680d7fc, $r29=0x1c17f315ebb3bec3 ++after: $r13=0xffffffffbec3e680, $r27=0x495e30e5e680d7fc, $r29=0x1c17f315ebb3bec3 ++bytepick.w $r5, $r29, $r4, 1 :: ++before: $r5=0x26a0fb212ab80a3a, $r29=0x78b167aecd81f869, $r4=0x6daab499f228fef4 ++after: $r5=0x0000000028fef4cd, $r29=0x78b167aecd81f869, $r4=0x6daab499f228fef4 ++bytepick.d $r28, $r4, $r28, 7 :: ++before: $r28=0x794fa22d52f7e834, $r4=0x2f084db071d3bcce, $r28=0xa0cf51d7020f10c1 ++after: $r28=0xc12f084db071d3bc, $r4=0x2f084db071d3bcce, $r28=0xc12f084db071d3bc ++bytepick.d $r10, $r18, $r4, 2 :: ++before: $r10=0x9fd7a6b378604833, $r18=0x37da15f8a7154cab, $r4=0xaedd64328d27a0a8 ++after: $r10=0x64328d27a0a837da, $r18=0x37da15f8a7154cab, $r4=0xaedd64328d27a0a8 ++bytepick.d $r7, $r6, $r24, 3 :: ++before: $r7=0xdee49920d429d3c2, $r6=0x15e3f61f2f82a2d1, $r24=0xdeba03c7761e4678 ++after: $r7=0xc7761e467815e3f6, $r6=0x15e3f61f2f82a2d1, $r24=0xdeba03c7761e4678 ++bytepick.d $r19, $r16, $r5, 4 :: ++before: $r19=0x53bda4d18e61fc44, $r16=0xc79bd94439006673, $r5=0xa8024ab452a2bd52 ++after: $r19=0x52a2bd52c79bd944, $r16=0xc79bd94439006673, $r5=0xa8024ab452a2bd52 ++bytepick.d $r26, $r19, $r25, 7 :: ++before: $r26=0xc8aae5136d925592, $r19=0xea109dd2837d3acf, $r25=0x30e93a75e695666a ++after: $r26=0x6aea109dd2837d3a, $r19=0xea109dd2837d3acf, $r25=0x30e93a75e695666a ++bytepick.d $r8, $r14, $r8, 2 :: ++before: $r8=0xa03db273c845b37f, $r14=0xa7fd0053a136769f, $r8=0x6ab932903229b035 ++after: $r8=0x32903229b035a7fd, $r14=0xa7fd0053a136769f, $r8=0x32903229b035a7fd ++bytepick.d $r9, $r14, $r23, 2 :: ++before: $r9=0x2f160a0d147b300f, $r14=0xdae9d5d15bb8f5b5, $r23=0xc4fdfbb29d49dfe4 ++after: $r9=0xfbb29d49dfe4dae9, $r14=0xdae9d5d15bb8f5b5, $r23=0xc4fdfbb29d49dfe4 ++bytepick.d $r20, $r18, $r15, 5 :: ++before: $r20=0x30cefdebc30b841a, $r18=0xbfd016fb0312277c, $r15=0x44269b95d496912f ++after: $r20=0x96912fbfd016fb03, $r18=0xbfd016fb0312277c, $r15=0x44269b95d496912f ++bytepick.d $r12, $r17, $r5, 2 :: ++before: $r12=0xde32bc5d3471eed2, $r17=0xdb807610c6e762e4, $r5=0xb2148e34e649d1b8 ++after: $r12=0x8e34e649d1b8db80, $r17=0xdb807610c6e762e4, $r5=0xb2148e34e649d1b8 ++bytepick.d $r5, $r24, $r28, 3 :: ++before: $r5=0x9ab1be6a0faa61a8, $r24=0x97d4a12579967739, $r28=0xaa592ef1fd606bad ++after: $r5=0xf1fd606bad97d4a1, $r24=0x97d4a12579967739, $r28=0xaa592ef1fd606bad ++maskeqz $r14, $r28, $r25 :: ++before: $r14=0xc263b6b8f3404c8d, $r28=0x90ef733c88c88866, $r25=0xd256888d94e8d21a ++after: $r14=0x90ef733c88c88866, $r28=0x90ef733c88c88866, $r25=0xd256888d94e8d21a ++maskeqz $r13, $r9, $r15 :: ++before: $r13=0x5bdd86b962c61db4, $r9=0x8a78f7b88a728d92, $r15=0x69e707acb2c26a83 ++after: $r13=0x8a78f7b88a728d92, $r9=0x8a78f7b88a728d92, $r15=0x69e707acb2c26a83 ++maskeqz $r7, $r7, $r13 :: ++before: $r7=0xea86abdbdea660cb, $r7=0xfb778deef0a5b893, $r13=0xad10e23c971d1a9f ++after: $r7=0xfb778deef0a5b893, $r7=0xfb778deef0a5b893, $r13=0xad10e23c971d1a9f ++maskeqz $r8, $r7, $r19 :: ++before: $r8=0xf64df33b6146939f, $r7=0xe7376d3da44f4dfd, $r19=0x7987e122af2505ab ++after: $r8=0xe7376d3da44f4dfd, $r7=0xe7376d3da44f4dfd, $r19=0x7987e122af2505ab ++maskeqz $r10, $r27, $r29 :: ++before: $r10=0x404a261c069b488b, $r27=0x81886c523ec2658c, $r29=0x3236dc83d0a27cc1 ++after: $r10=0x81886c523ec2658c, $r27=0x81886c523ec2658c, $r29=0x3236dc83d0a27cc1 ++maskeqz $r23, $r16, $r25 :: ++before: $r23=0x8671050519b7bda0, $r16=0x26fa2567b106d73a, $r25=0x0d884011e0d767fe ++after: $r23=0x26fa2567b106d73a, $r16=0x26fa2567b106d73a, $r25=0x0d884011e0d767fe ++maskeqz $r5, $r19, $r18 :: ++before: $r5=0xbd8d4cef53122132, $r19=0x4976c047c57ec148, $r18=0x602312f372049a5e ++after: $r5=0x4976c047c57ec148, $r19=0x4976c047c57ec148, $r18=0x602312f372049a5e ++maskeqz $r29, $r24, $r23 :: ++before: $r29=0x07f390b695d8b12e, $r24=0x70043e7666a24a34, $r23=0xfee8f8f90ab3ac9b ++after: $r29=0x70043e7666a24a34, $r24=0x70043e7666a24a34, $r23=0xfee8f8f90ab3ac9b ++maskeqz $r25, $r4, $r18 :: ++before: $r25=0x07eaffcb6dac1b5b, $r4=0x4b12f8c6738216a2, $r18=0x409acb80b7391511 ++after: $r25=0x4b12f8c6738216a2, $r4=0x4b12f8c6738216a2, $r18=0x409acb80b7391511 ++maskeqz $r30, $r6, $r24 :: ++before: $r30=0x14d829636b628dc9, $r6=0xdb88a366a2271c2c, $r24=0x0ea0d5998835940a ++after: $r30=0xdb88a366a2271c2c, $r6=0xdb88a366a2271c2c, $r24=0x0ea0d5998835940a ++masknez $r14, $r24, $r5 :: ++before: $r14=0x46b15bbb9507bd79, $r24=0xc92af628c880a454, $r5=0x846a586db0af0965 ++after: $r14=000000000000000000, $r24=0xc92af628c880a454, $r5=0x846a586db0af0965 ++masknez $r30, $r8, $r8 :: ++before: $r30=0x43cd20b5234db4e8, $r8=0x7aeee6ab6b10561f, $r8=0x45ab4fdb4ca8b325 ++after: $r30=000000000000000000, $r8=0x45ab4fdb4ca8b325, $r8=0x45ab4fdb4ca8b325 ++masknez $r24, $r19, $r15 :: ++before: $r24=0xd3d50bbb34b528e2, $r19=0xdd71746b0beedae3, $r15=0xa34d82fc50174094 ++after: $r24=000000000000000000, $r19=0xdd71746b0beedae3, $r15=0xa34d82fc50174094 ++masknez $r29, $r26, $r26 :: ++before: $r29=0x576cb2da15b1462d, $r26=0x6c669f0195b50b7a, $r26=0xec1609ef36aa938f ++after: $r29=000000000000000000, $r26=0xec1609ef36aa938f, $r26=0xec1609ef36aa938f ++masknez $r4, $r29, $r10 :: ++before: $r4=0xaa220f67a02617db, $r29=0x0ffcd18e3016e10f, $r10=0x4cf9bdd8dca7f88f ++after: $r4=000000000000000000, $r29=0x0ffcd18e3016e10f, $r10=0x4cf9bdd8dca7f88f ++masknez $r23, $r9, $r29 :: ++before: $r23=0x774e1c840428fbde, $r9=0x391268694388d2a7, $r29=0xf06192a4e5780c53 ++after: $r23=000000000000000000, $r9=0x391268694388d2a7, $r29=0xf06192a4e5780c53 ++masknez $r7, $r25, $r28 :: ++before: $r7=0x7b75099f16135faa, $r25=0xf95af681c18bf31c, $r28=0x2f6122581dfdef74 ++after: $r7=000000000000000000, $r25=0xf95af681c18bf31c, $r28=0x2f6122581dfdef74 ++masknez $r26, $r10, $r16 :: ++before: $r26=0xe6006c9bd6bae204, $r10=0x7e84e5db1181249d, $r16=0x6ab2371059cdc875 ++after: $r26=000000000000000000, $r10=0x7e84e5db1181249d, $r16=0x6ab2371059cdc875 ++masknez $r26, $r15, $r28 :: ++before: $r26=0xb4c9c784ef74245f, $r15=0x20cc1c4c169ca02c, $r28=0x606eeb8ce6278d16 ++after: $r26=000000000000000000, $r15=0x20cc1c4c169ca02c, $r28=0x606eeb8ce6278d16 ++masknez $r19, $r16, $r16 :: ++before: $r19=0x75a721553f7c7054, $r16=0x7b63b7b7b3f5bd5f, $r16=0xf8c7933e92e155ee ++after: $r19=000000000000000000, $r16=0xf8c7933e92e155ee, $r16=0xf8c7933e92e155ee ++bstrins.w $r27, $r16, 31, 8 :: ++before: $r27=0x431055863e78b187, $r16=0xe18dda9620a50e9d ++after: $r27=0xffffffffa50e9d87, $r16=0xe18dda9620a50e9d ++bstrins.w $r26, $r27, 30, 27 :: ++before: $r26=0x19f800eab7e1ab51, $r27=0x61e7d86005d21d29 ++after: $r26=0xffffffffcfe1ab51, $r27=0x61e7d86005d21d29 ++bstrins.w $r15, $r4, 17, 14 :: ++before: $r15=0xb141d462e777528d, $r4=0xb7aebff9bcca1643 ++after: $r15=0xffffffffe774d28d, $r4=0xb7aebff9bcca1643 ++bstrins.w $r30, $r17, 24, 6 :: ++before: $r30=0xfac48083375844fe, $r17=0x6d3283ba14cc27eb ++after: $r30=0x000000003709fafe, $r17=0x6d3283ba14cc27eb ++bstrins.w $r12, $r12, 30, 25 :: ++before: $r12=0x9b7629774f19f64a, $r12=0x84ee8d65b2842686 ++after: $r12=0xffffffff8c842686, $r12=0xffffffff8c842686 ++bstrins.w $r15, $r10, 26, 1 :: ++before: $r15=0x290172844863090f, $r10=0x85ea298976069fcd ++after: $r15=0x000000004c0d3f9b, $r10=0x85ea298976069fcd ++bstrins.w $r10, $r13, 23, 8 :: ++before: $r10=0x66942ba1c15e85aa, $r13=0xddb2dfa7474a4370 ++after: $r10=0xffffffffc14370aa, $r13=0xddb2dfa7474a4370 ++bstrins.w $r5, $r20, 18, 16 :: ++before: $r5=0x3dcfecca80bf0d79, $r20=0x5044b246f2d3f890 ++after: $r5=0xffffffff80b80d79, $r20=0x5044b246f2d3f890 ++bstrins.w $r23, $r5, 25, 21 :: ++before: $r23=0xa11723142f1472a7, $r5=0xcbaaa9a23d119663 ++after: $r23=0x000000002c7472a7, $r5=0xcbaaa9a23d119663 ++bstrins.w $r20, $r31, 13, 12 :: ++before: $r20=0x6a1110240ba884b8, $r31=0x45cadf0ffe08cc25 ++after: $r20=0x000000000ba894b8, $r31=0x45cadf0ffe08cc25 ++bstrpick.w $r5, $r23, 23, 11 :: ++before: $r5=0x6885eaa89f691954, $r23=0x94f8458597294f2e ++after: $r5=0x0000000000000529, $r23=0x94f8458597294f2e ++bstrpick.w $r25, $r8, 18, 11 :: ++before: $r25=0x11be9b9923ebee96, $r8=0x23deda120a49df15 ++after: $r25=0x000000000000003b, $r8=0x23deda120a49df15 ++bstrpick.w $r6, $r6, 10, 3 :: ++before: $r6=0x3546d655181289bc, $r6=0x7ee84a41c952b690 ++after: $r6=0x00000000000000d2, $r6=0x00000000000000d2 ++bstrpick.w $r25, $r5, 15, 9 :: ++before: $r25=0xb2eec884ea77f548, $r5=0x23992bc40919416f ++after: $r25=0x0000000000000020, $r5=0x23992bc40919416f ++bstrpick.w $r26, $r14, 21, 8 :: ++before: $r26=0x8e591161730ac582, $r14=0xf45f4435cc1cb138 ++after: $r26=0x0000000000001cb1, $r14=0xf45f4435cc1cb138 ++bstrpick.w $r9, $r14, 7, 3 :: ++before: $r9=0x1ac92d930e8361f9, $r14=0xcc11dd56e96c6256 ++after: $r9=0x000000000000000a, $r14=0xcc11dd56e96c6256 ++bstrpick.w $r19, $r9, 8, 8 :: ++before: $r19=0xd15fd80fafe60a58, $r9=0xb1426a8c680d628c ++after: $r19=000000000000000000, $r9=0xb1426a8c680d628c ++bstrpick.w $r17, $r13, 30, 12 :: ++before: $r17=0xfa48c3cd091d2b5e, $r13=0x3a2827a58a014a72 ++after: $r17=0x000000000000a014, $r13=0x3a2827a58a014a72 ++bstrpick.w $r6, $r31, 16, 7 :: ++before: $r6=0xca10a858ebfa78a1, $r31=0x202a38722f270884 ++after: $r6=0x0000000000000211, $r31=0x202a38722f270884 ++bstrpick.w $r20, $r10, 31, 15 :: ++before: $r20=0xc010deb269ae6ba2, $r10=0x98f1d297734f9f4c ++after: $r20=0x000000000000e69f, $r10=0x98f1d297734f9f4c ++bstrins.d $r29, $r17, 60, 25 :: ++before: $r29=0x7cf4a9ec79307e59, $r17=0xb1b5afc00eef90a3 ++after: $r29=0x601ddf2147307e59, $r17=0xb1b5afc00eef90a3 ++bstrins.d $r10, $r27, 31, 22 :: ++before: $r10=0xc708602dee32579f, $r27=0x199d90a711e94375 ++after: $r10=0xc708602ddd72579f, $r27=0x199d90a711e94375 ++bstrins.d $r4, $r24, 58, 58 :: ++before: $r4=0x4e5ce98e217a4b59, $r24=0xaf25b5661daefdea ++after: $r4=0x4a5ce98e217a4b59, $r24=0xaf25b5661daefdea ++bstrins.d $r12, $r30, 16, 6 :: ++before: $r12=0x9505d862c56b1708, $r30=0x7f3f0c983ce27863 ++after: $r12=0x9505d862c56a18c8, $r30=0x7f3f0c983ce27863 ++bstrins.d $r29, $r5, 43, 0 :: ++before: $r29=0x248f295ef3afe5aa, $r5=0x9469277db61227b7 ++after: $r29=0x248f277db61227b7, $r5=0x9469277db61227b7 ++bstrins.d $r31, $r31, 49, 23 :: ++before: $r31=0xbc5f0c47c3a63a94, $r31=0x4aacc1c77ad0c09a ++after: $r31=0x4aad68604d50c09a, $r31=0x4aad68604d50c09a ++bstrins.d $r6, $r24, 12, 2 :: ++before: $r6=0x79110235b8c34188, $r24=0x75e3e311aef2bef9 ++after: $r6=0x79110235b8c35be4, $r24=0x75e3e311aef2bef9 ++bstrins.d $r6, $r16, 43, 13 :: ++before: $r6=0xaa6e63ffd80b76c5, $r16=0xb1ea7dcb3af0881d ++after: $r6=0xaa6e675e1103b6c5, $r16=0xb1ea7dcb3af0881d ++bstrins.d $r15, $r25, 53, 29 :: ++before: $r15=0x5b68a802f26a1804, $r25=0xb4f651115b84591b ++after: $r15=0x5b708b23726a1804, $r25=0xb4f651115b84591b ++bstrins.d $r9, $r9, 61, 40 :: ++before: $r9=0x3394218c965d5f1a, $r9=0xf3d30b5d4d4089b4 ++after: $r9=0xc089b45d4d4089b4, $r9=0xc089b45d4d4089b4 ++bstrpick.d $r27, $r27, 63, 33 :: ++before: $r27=0x503c8fae2d6d7b58, $r27=0x9fd9869ca812de0c ++after: $r27=0x000000004fecc34e, $r27=0x000000004fecc34e ++bstrpick.d $r14, $r5, 52, 40 :: ++before: $r14=0x65f05eaa5e13856a, $r5=0xd52c72fbeccc39f5 ++after: $r14=0x0000000000000c72, $r5=0xd52c72fbeccc39f5 ++bstrpick.d $r13, $r20, 48, 14 :: ++before: $r13=0x9cea777df4d2eae0, $r20=0x6326727a36499800 ++after: $r13=0x00000001c9e8d926, $r20=0x6326727a36499800 ++bstrpick.d $r10, $r17, 43, 20 :: ++before: $r10=0xf30a073a4a56604b, $r17=0x0c12d112f6a0c8f1 ++after: $r10=0x0000000000112f6a, $r17=0x0c12d112f6a0c8f1 ++bstrpick.d $r13, $r25, 55, 37 :: ++before: $r13=0xe559d975e0d9ac85, $r25=0xcf41f30cc4a46713 ++after: $r13=0x0000000000020f98, $r25=0xcf41f30cc4a46713 ++bstrpick.d $r29, $r4, 34, 20 :: ++before: $r29=0x41843db6c2a206cb, $r4=0x343f795d45fcff8c ++after: $r29=0x000000000000545f, $r4=0x343f795d45fcff8c ++bstrpick.d $r27, $r28, 27, 10 :: ++before: $r27=0xb359821297377fee, $r28=0x4fc51c5773e64f69 ++after: $r27=0x000000000000f993, $r28=0x4fc51c5773e64f69 ++bstrpick.d $r24, $r24, 63, 20 :: ++before: $r24=0xed3cb5d1e8f0e55e, $r24=0x9cdbb70a8b8d3945 ++after: $r24=0x000009cdbb70a8b8, $r24=0x000009cdbb70a8b8 ++bstrpick.d $r7, $r30, 34, 30 :: ++before: $r7=0x11b7344343be1ccf, $r30=0xa3422c671803480f ++after: $r7=0x000000000000001c, $r30=0xa3422c671803480f ++bstrpick.d $r15, $r4, 55, 4 :: ++before: $r15=0x3670c6b869f28085, $r4=0x2caa9d9c1351e402 ++after: $r15=0x000aa9d9c1351e40, $r4=0x2caa9d9c1351e402 ++crc.w.b.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9 ++after: $r12=0xfffffffff8cd11f5, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9 ++crc.w.b.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce ++after: $r12=0xfffffffff896edce, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce ++crc.w.b.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254 ++after: $r12=0xffffffffef4aad58, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254 ++crc.w.b.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326 ++after: $r12=0xffffffffa0b9747b, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326 ++crc.w.b.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162 ++after: $r12=0xffffffff96afcb8f, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162 ++crc.w.b.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05 ++after: $r12=0x000000005b57dc92, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05 ++crc.w.b.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72 ++after: $r12=0xffffffff8fa18d00, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72 ++crc.w.h.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9 ++after: $r12=0xffffffff862b1fc5, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9 ++crc.w.h.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce ++after: $r12=0xffffffff9a47255b, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce ++crc.w.h.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254 ++after: $r12=0xffffffffa7886ccc, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254 ++crc.w.h.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326 ++after: $r12=0xffffffffb6c69449, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326 ++crc.w.h.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162 ++after: $r12=0xffffffffb04637e9, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162 ++crc.w.h.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05 ++after: $r12=0xfffffffffb8f1bb9, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05 ++crc.w.h.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72 ++after: $r12=0xffffffffc7580939, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72 ++crc.w.w.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9 ++after: $r12=0xffffffffb24959b6, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9 ++crc.w.w.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce ++after: $r12=0x00000000532cb693, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce ++crc.w.w.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254 ++after: $r12=0xffffffffffe2757b, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254 ++crc.w.w.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326 ++after: $r12=0xffffffffc3c8592d, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326 ++crc.w.w.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162 ++after: $r12=0xffffffffe44ccdd5, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162 ++crc.w.w.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05 ++after: $r12=0x0000000004826ea7, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05 ++crc.w.w.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72 ++after: $r12=0x00000000784b67ea, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72 ++crc.w.d.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9 ++after: $r12=0xffffffffbaef431f, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9 ++crc.w.d.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce ++after: $r12=0xffffffffe7361109, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce ++crc.w.d.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254 ++after: $r12=0xfffffffff9af6423, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254 ++crc.w.d.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326 ++after: $r12=0x000000006d4f1805, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326 ++crc.w.d.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162 ++after: $r12=0xffffffffb22e077e, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162 ++crc.w.d.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05 ++after: $r12=0xffffffff8cb8356f, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05 ++crc.w.d.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72 ++after: $r12=0xffffffffbe1261f9, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72 ++crcc.w.b.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9 ++after: $r12=0x000000006c5412e4, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9 ++crcc.w.b.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce ++after: $r12=0x000000006c0feedf, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce ++crcc.w.b.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254 ++after: $r12=0x0000000052b4533a, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254 ++crcc.w.b.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326 ++after: $r12=0xffffffffe3833e19, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326 ++crcc.w.b.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162 ++after: $r12=0x000000002a3f5685, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162 ++crcc.w.b.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05 ++after: $r12=0xffffffffec8f3c62, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05 ++crcc.w.b.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72 ++after: $r12=0xffffffff9e543d84, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72 ++crcc.w.h.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9 ++after: $r12=0x00000000318af1d5, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9 ++crcc.w.h.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce ++after: $r12=0xffffffff92c4f3f9, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce ++crcc.w.h.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254 ++after: $r12=0xffffffffa40568c3, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254 ++crcc.w.h.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326 ++after: $r12=0xffffffffeeee153e, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326 ++crcc.w.h.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162 ++after: $r12=0x0000000071b26b5b, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162 ++crcc.w.h.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05 ++after: $r12=0xfffffffffcb406be, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05 ++crcc.w.h.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72 ++after: $r12=0xffffffffade3076c, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72 ++crcc.w.w.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9 ++after: $r12=0x000000004f6e8750, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9 ++crcc.w.w.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce ++after: $r12=0x000000004548949c, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce ++crcc.w.w.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254 ++after: $r12=0x0000000050fc77a7, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254 ++crcc.w.w.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326 ++after: $r12=0x000000000b0f3746, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326 ++crcc.w.w.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162 ++after: $r12=0xffffffff92a3acf2, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162 ++crcc.w.w.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05 ++after: $r12=0xffffffffd91fb7ba, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05 ++crcc.w.w.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72 ++after: $r12=0x000000006b548718, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72 ++crcc.w.d.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9 ++after: $r12=0xffffffff8547ffea, $r13=0x4b154113f7d32514, $r14=0xcce230caafbf9cc9 ++crcc.w.d.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce ++after: $r12=0x000000001a265977, $r13=0x33d5d595721d4f13, $r14=0xf4509311f443a7ce ++crcc.w.d.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254 ++after: $r12=0x000000000e1737b7, $r13=0x4a3c6de6954cbc17, $r14=0x111b21e39fbd7254 ++crcc.w.d.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326 ++after: $r12=0x000000007c13f4c5, $r13=0xfbb5c64ed1b044c6, $r14=0x33ca4c4fb3960326 ++crcc.w.d.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162 ++after: $r12=0xffffffff9d9455e3, $r13=0x2b7c5939d7c0f528, $r14=0xb73870a5a6630162 ++crcc.w.d.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05 ++after: $r12=0x000000006df1745f, $r13=0x02fe41918ac5cdba, $r14=0x48e0815289728f05 ++crcc.w.d.w $r12, $r13, $r14 :: ++before: $r12=0x0123456789abcdef, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72 ++after: $r12=0xffffffffb7862239, $r13=0xb60a8f381f187bae, $r14=0x008c208cc413ff72 +diff --git a/none/tests/loongarch64/integer.vgtest b/none/tests/loongarch64/integer.vgtest +new file mode 100644 +index 000000000..daa059178 +--- /dev/null ++++ b/none/tests/loongarch64/integer.vgtest +@@ -0,0 +1,2 @@ ++prog: integer ++vgopts: -q +diff --git a/none/tests/loongarch64/llsc.c b/none/tests/loongarch64/llsc.c +new file mode 100644 +index 000000000..fcb7e3cb3 +--- /dev/null ++++ b/none/tests/loongarch64/llsc.c +@@ -0,0 +1,69 @@ ++#include ++ ++#define TESTINST_LLSC_W(insn, res, addr, offs) \ ++ { \ ++ __asm__ __volatile__( \ ++ "move $t1, %1 \n\t" \ ++ "ll.w $t0, $t1, %2 \n\t" \ ++ insn " \n\t" \ ++ "sc.w $t0, $t1, %2 \n\t" \ ++ "move %0, $t0 \n\t" \ ++ : "=r" (res) \ ++ : "r" (addr), "i" (offs) \ ++ : "$t0", "$t1", "memory"); \ ++ } ++ ++#define TESTINST_LLSC_D(insn, res, addr, offs) \ ++ { \ ++ __asm__ __volatile__( \ ++ "move $t1, %1 \n\t" \ ++ "ll.d $t0, $t1, %2 \n\t" \ ++ insn " \n\t" \ ++ "sc.d $t0, $t1, %2 \n\t" \ ++ "move %0, $t0 \n\t" \ ++ : "=r" (res) \ ++ : "r" (addr), "i" (offs) \ ++ : "$t0", "$t1", "memory"); \ ++ } ++ ++void test(void) ++{ ++ int res_i; ++ long res_l; ++ int val_i[2] = { 6, 10 }; ++ long val_l[2] = { 6, 10 }; ++ ++ /* ---------------- ll.w rd, rj, si14 ---------------- */ ++ /* ---------------- sc.w rd, rj, si14 ---------------- */ ++ printf("ll.w sc.w ::\n"); ++ ++ do { ++ TESTINST_LLSC_W("addi.w $t0, $t0, 1", res_i, val_i, 0); ++ } while (res_i != 1); ++ printf("res: %d val: %d\n", res_i, val_i[0]); ++ ++ do { ++ TESTINST_LLSC_W("sub.w $t0, $zero, $t0", res_i, val_i, 4); ++ } while (res_i != 1); ++ printf("res: %d val: %d\n", res_i, val_i[1]); ++ ++ /* ---------------- ll.d rd, rj, si14 ---------------- */ ++ /* ---------------- sc.d rd, rj, si14 ---------------- */ ++ printf("ll.d sc.d ::\n"); ++ ++ do { ++ TESTINST_LLSC_D("addi.d $t0, $t0, 1", res_l, val_l, 0); ++ } while (res_l != 1); ++ printf("res: %ld val: %ld\n", res_l, val_l[0]); ++ ++ do { ++ TESTINST_LLSC_D("sub.d $t0, $zero, $t0", res_l, val_l, 8); ++ } while (res_l != 1); ++ printf("res: %ld val: %ld\n", res_l, val_l[1]); ++} ++ ++int main(void) ++{ ++ test(); ++ return 0; ++} +diff --git a/none/tests/loongarch64/llsc.stderr.exp b/none/tests/loongarch64/llsc.stderr.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/none/tests/loongarch64/llsc.stdout.exp b/none/tests/loongarch64/llsc.stdout.exp +new file mode 100644 +index 000000000..dd1925156 +--- /dev/null ++++ b/none/tests/loongarch64/llsc.stdout.exp +@@ -0,0 +1,6 @@ ++ll.w sc.w :: ++res: 1 val: 7 ++res: 1 val: -10 ++ll.d sc.d :: ++res: 1 val: 7 ++res: 1 val: -10 +diff --git a/none/tests/loongarch64/llsc.vgtest b/none/tests/loongarch64/llsc.vgtest +new file mode 100644 +index 000000000..685c27911 +--- /dev/null ++++ b/none/tests/loongarch64/llsc.vgtest +@@ -0,0 +1,2 @@ ++prog: llsc ++vgopts: -q +diff --git a/none/tests/loongarch64/memory.c b/none/tests/loongarch64/memory.c +new file mode 100644 +index 000000000..385efc02c +--- /dev/null ++++ b/none/tests/loongarch64/memory.c +@@ -0,0 +1,345 @@ ++#include ++ ++#define NUM 24 ++ ++unsigned long mem[NUM] = { ++ 0x121f1e1f0000e680, 0x0000000000010700, 0x000000030000e7dc, ++ 0xffffffff0000b0d0, 0x232f2e2f2ab05fd0, 0x242c2b2b0000b6a0, ++ 0x252a2e2b0000be80, 0x262d2d2a0000de10, 0x3f343f3e0000df20, ++ 0x3e353d3c2ab05fe0, 0x363a3c3b0000dfd0, 0x3b373b3a00010300, ++ 0x0000e680121f1e1f, 0x0001070000000000, 0x0000e7dc00000003, ++ 0x0000b0d0ffffffff, 0x2ab05fd0232f2e2f, 0x0000b6a0242c2b2b, ++ 0x0000be80252a2e2b, 0x0000de10262d2d2a, 0x0000df203f343f3e, ++ 0x2ab05fe03e353d3c, 0x0000dfd0363a3c3b, 0x000103003b373b3a ++}; ++ ++#define TESTINST_LOAD_RRI(insn, val, addr, offs) \ ++ { \ ++ __asm__ __volatile__( \ ++ insn " %0, %1, " #offs " \n\t" \ ++ : "=r" (val) \ ++ : "r" (addr) \ ++ : "memory"); \ ++ } ++ ++#define TESTINST_LOAD_RRR(insn, val, addr, offs) \ ++ { \ ++ __asm__ __volatile__( \ ++ insn " %0, %1, %2 \n\t" \ ++ : "=r" (val) \ ++ : "r" (addr), "r" (offs) \ ++ : "memory"); \ ++ } ++ ++#define TESTINST_LOAD_FRI(insn, val, addr, offs) \ ++ { \ ++ __asm__ __volatile__( \ ++ insn " %0, %1, " #offs " \n\t" \ ++ : "=f" (val) \ ++ : "r" (addr) \ ++ : "memory"); \ ++ } ++ ++#define TESTINST_LOAD_FRR(insn, val, addr, offs) \ ++ { \ ++ __asm__ __volatile__( \ ++ insn " %0, %1, %2 \n\t" \ ++ : "=f" (val) \ ++ : "r" (addr), "r" (offs) \ ++ : "memory"); \ ++ } ++ ++#define TESTINST_STORE_RRI(insn, val, addr, offs) \ ++ { \ ++ __asm__ __volatile__( \ ++ insn " %0, %1, " #offs " \n\t" \ ++ : \ ++ : "r" (val), "r" (addr) \ ++ : "memory"); \ ++ } ++ ++#define TESTINST_STORE_RRR(insn, val, addr, offs) \ ++ { \ ++ __asm__ __volatile__( \ ++ insn " %0, %1, %2 \n\t" \ ++ : \ ++ : "r" (val), "r" (addr), "r" (offs) \ ++ : "memory"); \ ++ } ++ ++#define TESTINST_STORE_FRI(insn, val, addr, offs) \ ++ { \ ++ __asm__ __volatile__( \ ++ insn " %0, %1, " #offs " \n\t" \ ++ : \ ++ : "f" (val), "r" (addr) \ ++ : "memory"); \ ++ } ++ ++#define TESTINST_STORE_FRR(insn, val, addr, offs) \ ++ { \ ++ __asm__ __volatile__( \ ++ insn " %0, %1, %2 \n\t" \ ++ : \ ++ : "f" (val), "r" (addr), "r" (offs) \ ++ : "memory"); \ ++ } ++ ++static inline void show(void) ++{ ++ int i; ++ printf("memory block:\n"); ++ for (i = 0; i < NUM; i++) ++ printf("0x%lx:\t%#018lx\n", i * sizeof(unsigned long), mem[i]); ++} ++ ++void test(void) ++{ ++ char s8; ++ unsigned char u8; ++ short s16; ++ unsigned short u16; ++ int s32; ++ unsigned int u32; ++ long s64; ++ unsigned long u64; ++ ++ show(); ++ ++ /* ---------------- ld.b rd, rj, si12 ---------------- */ ++ printf("test ld.b: "); ++ TESTINST_LOAD_RRI("ld.b", s8, mem, 0); ++ printf("%d ", (int)s8); ++ TESTINST_LOAD_RRI("ld.b", s8, mem, 24); ++ printf("%d\n", (int)s8); ++ ++ /* ---------------- ld.bu rd, rj, si12 ---------------- */ ++ printf("test ld.bu: "); ++ TESTINST_LOAD_RRI("ld.b", u8, mem, 0); ++ printf("%u ", (unsigned)s8); ++ TESTINST_LOAD_RRI("ld.b", u8, mem, 24); ++ printf("%u\n", (unsigned)s8); ++ ++ /* ---------------- ld.h rd, rj, si12 ---------------- */ ++ printf("test ld.h: "); ++ TESTINST_LOAD_RRI("ld.h", s16, mem, 0); ++ printf("%hd ", s16); ++ TESTINST_LOAD_RRI("ld.h", s16, mem, 24); ++ printf("%hd\n", s16); ++ ++ /* ---------------- ld.hu rd, rj, si12 ---------------- */ ++ printf("test ld.hu: "); ++ TESTINST_LOAD_RRI("ld.hu", u16, mem, 0); ++ printf("%hu ", u16); ++ TESTINST_LOAD_RRI("ld.hu", u16, mem, 24); ++ printf("%hu\n", u16); ++ ++ /* ---------------- ld.w rd, rj, si12 ---------------- */ ++ printf("test ld.w: "); ++ TESTINST_LOAD_RRI("ld.w", s32, mem, 0); ++ printf("%d ", s32); ++ TESTINST_LOAD_RRI("ld.w", s32, mem, 24); ++ printf("%d\n", s32); ++ ++ /* ---------------- ld.wu rd, rj, si12 ---------------- */ ++ printf("test ld.wu: "); ++ TESTINST_LOAD_RRI("ld.wu", u32, mem, 0); ++ printf("%u ", u32); ++ TESTINST_LOAD_RRI("ld.wu", u32, mem, 24); ++ printf("%u\n", u32); ++ ++ /* ---------------- ld.d rd, rj, si12 ---------------- */ ++ printf("test ld.d: "); ++ TESTINST_LOAD_RRI("ld.d", s64, mem, 0); ++ printf("%ld ", s64); ++ TESTINST_LOAD_RRI("ld.d", s64, mem, 24); ++ printf("%ld ", s64); ++ TESTINST_LOAD_RRI("ld.d", u64, mem, 0); ++ printf("%lu ", u64); ++ TESTINST_LOAD_RRI("ld.d", u64, mem, 24); ++ printf("%lu\n", u64); ++ ++ /* ---------------- ldx.b rd, rj, rk ---------------- */ ++ printf("test ldx.b: "); ++ TESTINST_LOAD_RRR("ldx.b", s8, mem, 0); ++ printf("%d ", (int)s8); ++ TESTINST_LOAD_RRR("ldx.b", s8, mem, 24); ++ printf("%d\n", (int)s8); ++ ++ /* ---------------- ldx.bu rd, rj, rk ---------------- */ ++ printf("test ldx.bu: "); ++ TESTINST_LOAD_RRR("ldx.b", u8, mem, 0); ++ printf("%u ", (unsigned)s8); ++ TESTINST_LOAD_RRR("ldx.b", u8, mem, 24); ++ printf("%u\n", (unsigned)s8); ++ ++ /* ---------------- ldx.h rd, rj, rk ---------------- */ ++ printf("test ldx.h: "); ++ TESTINST_LOAD_RRR("ldx.h", s16, mem, 0); ++ printf("%hd ", s16); ++ TESTINST_LOAD_RRR("ldx.h", s16, mem, 24); ++ printf("%hd\n", s16); ++ ++ /* ---------------- ldx.hu rd, rj, rk ---------------- */ ++ printf("test ld.hu: "); ++ TESTINST_LOAD_RRR("ldx.hu", u16, mem, 0); ++ printf("%hu ", u16); ++ TESTINST_LOAD_RRR("ldx.hu", u16, mem, 24); ++ printf("%hu\n", u16); ++ ++ /* ---------------- ldx.w rd, rj, rk ---------------- */ ++ printf("test ldx.w: "); ++ TESTINST_LOAD_RRR("ldx.w", s32, mem, 0); ++ printf("%d ", s32); ++ TESTINST_LOAD_RRR("ldx.w", s32, mem, 24); ++ printf("%d\n", s32); ++ ++ /* ---------------- ldx.wu rd, rj, rk ---------------- */ ++ printf("test ldx.wu: "); ++ TESTINST_LOAD_RRR("ldx.wu", u32, mem, 0); ++ printf("%u ", u32); ++ TESTINST_LOAD_RRR("ldx.wu", u32, mem, 24); ++ printf("%u\n", u32); ++ ++ /* ---------------- ldx.d rd, rj, rk ---------------- */ ++ printf("test ldx.d: "); ++ TESTINST_LOAD_RRR("ldx.d", s64, mem, 0); ++ printf("%ld ", s64); ++ TESTINST_LOAD_RRR("ldx.d", s64, mem, 24); ++ printf("%ld ", s64); ++ TESTINST_LOAD_RRR("ldx.d", u64, mem, 0); ++ printf("%lu ", u64); ++ TESTINST_LOAD_RRR("ldx.d", u64, mem, 24); ++ printf("%lu\n", u64); ++ ++ /* ---------------- ldptr.w rd, rj, si14 ---------------- */ ++ printf("test ldptr.w: "); ++ TESTINST_LOAD_RRI("ldptr.w", s32, mem, 0); ++ printf("%d ", s32); ++ TESTINST_LOAD_RRI("ldptr.w", s32, mem, 24); ++ printf("%d\n", s32); ++ ++ /* ---------------- ldptr.d rd, rj, si14 ---------------- */ ++ printf("test ldptr.d: "); ++ TESTINST_LOAD_RRI("ldptr.d", s64, mem, 0); ++ printf("%ld ", s64); ++ TESTINST_LOAD_RRI("ldptr.d", s64, mem, 24); ++ printf("%ld\n", s64); ++ ++ /* ---------------- fld.s fd, rj, si12 ---------------- */ ++ printf("test fld.s: "); ++ TESTINST_LOAD_FRI("fld.s", u32, mem, 0); ++ printf("%#x ", u32); ++ TESTINST_LOAD_FRI("fld.s", u32, mem, 24); ++ printf("%#x\n", u32); ++ ++ /* ---------------- fld.d fd, rj, si12 ---------------- */ ++ printf("test fld.d: "); ++ TESTINST_LOAD_FRI("fld.d", u64, mem, 0); ++ printf("%#lx ", u64); ++ TESTINST_LOAD_FRI("fld.d", u64, mem, 24); ++ printf("%#lx\n", u64); ++ ++ /* ---------------- fldx.s fd, rj, rk ---------------- */ ++ printf("test fldx.s: "); ++ TESTINST_LOAD_FRR("fldx.s", u32, mem, 0); ++ printf("%#x ", u32); ++ TESTINST_LOAD_FRR("fldx.s", u32, mem, 24); ++ printf("%#x\n", u32); ++ ++ /* ---------------- fldx.d fd, rj, rk ---------------- */ ++ printf("test fldx.d: "); ++ TESTINST_LOAD_FRR("fldx.d", u64, mem, 0); ++ printf("%#lx ", u64); ++ TESTINST_LOAD_FRR("fldx.d", u64, mem, 24); ++ printf("%#lx\n", u64); ++ ++ show(); ++ ++ u8 = 0xfe; ++ s8 = (char)u8; ++ u16 = 0xfedc; ++ s16 = (short)u16; ++ u32 = 0xfedcba98; ++ s32 = (int)u32; ++ u64 = 0xfedcba9876543210; ++ s64 = (long)u64; ++ ++ /* ---------------- st.b rd, rj, si12 ---------------- */ ++ printf("test st.b\n"); ++ TESTINST_STORE_RRI("st.b", s8, mem, 0); ++ TESTINST_STORE_RRI("st.b", u8, mem, 1); ++ ++ /* ---------------- st.h rd, rj, si12 ---------------- */ ++ printf("test st.h\n"); ++ TESTINST_STORE_RRI("st.h", s16, mem, 2); ++ TESTINST_STORE_RRI("st.h", u16, mem, 4); ++ ++ /* ---------------- st.w rd, rj, si12 ---------------- */ ++ printf("test st.w\n"); ++ TESTINST_STORE_RRI("st.w", s32, mem, 8); ++ TESTINST_STORE_RRI("st.w", u32, mem, 12); ++ ++ /* ---------------- st.d rd, rj, si12 ---------------- */ ++ printf("test st.d\n"); ++ TESTINST_STORE_RRI("st.d", s64, mem, 16); ++ TESTINST_STORE_RRI("st.d", u64, mem, 24); ++ ++ /* ---------------- stx.b rd, rj, rk ---------------- */ ++ printf("test stx.b\n"); ++ TESTINST_STORE_RRR("stx.b", s8, mem, 32); ++ TESTINST_STORE_RRR("stx.b", u8, mem, 33); ++ ++ /* ---------------- stx.h rd, rj, rk ---------------- */ ++ printf("test stx.h\n"); ++ TESTINST_STORE_RRR("stx.h", s16, mem, 34); ++ TESTINST_STORE_RRR("stx.h", u16, mem, 36); ++ ++ /* ---------------- stx.w rd, rj, rk ---------------- */ ++ printf("test stx.w\n"); ++ TESTINST_STORE_RRR("stx.w", s32, mem, 40); ++ TESTINST_STORE_RRR("stx.w", u32, mem, 44); ++ ++ /* ---------------- stx.d rd, rj, rk ---------------- */ ++ printf("test stx.d\n"); ++ TESTINST_STORE_RRR("stx.d", s64, mem, 48); ++ TESTINST_STORE_RRR("stx.d", u64, mem, 56); ++ ++ /* ---------------- stptr.w rd, rj, si14 ---------------- */ ++ printf("test stptr.w\n"); ++ TESTINST_STORE_RRI("stptr.w", s64, mem, 64); ++ TESTINST_STORE_RRI("stptr.w", u64, mem, 68); ++ ++ /* ---------------- stptr.d rd, rj, si14 ---------------- */ ++ printf("test stptr.d\n"); ++ TESTINST_STORE_RRI("stptr.d", s64, mem, 72); ++ TESTINST_STORE_RRI("stptr.d", u64, mem, 80); ++ ++ /* ---------------- fst.s rd, rj, si12 ---------------- */ ++ printf("test fst.w\n"); ++ TESTINST_STORE_FRI("fst.s", u32, mem, 84); ++ TESTINST_STORE_FRI("fst.s", u32, mem, 88); ++ ++ /* ---------------- fst.d rd, rj, si12 ---------------- */ ++ printf("test fst.d\n"); ++ TESTINST_STORE_FRI("fst.d", u64, mem, 96); ++ TESTINST_STORE_FRI("fst.d", u64, mem, 104); ++ ++ /* ---------------- fstx.s rd, rj, rk ---------------- */ ++ printf("test fstx.w\n"); ++ TESTINST_STORE_FRR("fstx.s", u32, mem, 108); ++ TESTINST_STORE_FRR("fstx.s", u32, mem, 112); ++ ++ /* ---------------- fstx.d rd, rj, rk ---------------- */ ++ printf("test fstx.d\n"); ++ TESTINST_STORE_FRR("fstx.d", u64, mem, 120); ++ TESTINST_STORE_FRR("fstx.d", u64, mem, 128); ++ ++ show(); ++} ++ ++int main(void) ++{ ++ test(); ++ return 0; ++} +diff --git a/none/tests/loongarch64/memory.stderr.exp b/none/tests/loongarch64/memory.stderr.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/none/tests/loongarch64/memory.stdout.exp b/none/tests/loongarch64/memory.stdout.exp +new file mode 100644 +index 000000000..928961df0 +--- /dev/null ++++ b/none/tests/loongarch64/memory.stdout.exp +@@ -0,0 +1,109 @@ ++memory block: ++0x0: 0x121f1e1f0000e680 ++0x8: 0x0000000000010700 ++0x10: 0x000000030000e7dc ++0x18: 0xffffffff0000b0d0 ++0x20: 0x232f2e2f2ab05fd0 ++0x28: 0x242c2b2b0000b6a0 ++0x30: 0x252a2e2b0000be80 ++0x38: 0x262d2d2a0000de10 ++0x40: 0x3f343f3e0000df20 ++0x48: 0x3e353d3c2ab05fe0 ++0x50: 0x363a3c3b0000dfd0 ++0x58: 0x3b373b3a00010300 ++0x60: 0x0000e680121f1e1f ++0x68: 0x0001070000000000 ++0x70: 0x0000e7dc00000003 ++0x78: 0x0000b0d0ffffffff ++0x80: 0x2ab05fd0232f2e2f ++0x88: 0x0000b6a0242c2b2b ++0x90: 0x0000be80252a2e2b ++0x98: 0x0000de10262d2d2a ++0xa0: 0x0000df203f343f3e ++0xa8: 0x2ab05fe03e353d3c ++0xb0: 0x0000dfd0363a3c3b ++0xb8: 0x000103003b373b3a ++test ld.b: -128 -48 ++test ld.bu: 4294967248 4294967248 ++test ld.h: -6528 -20272 ++test ld.hu: 59008 45264 ++test ld.w: 59008 45264 ++test ld.wu: 59008 45264 ++test ld.d: 1305795535453611648 -4294922032 1305795535453611648 18446744069414629584 ++test ldx.b: -128 -48 ++test ldx.bu: 4294967248 4294967248 ++test ldx.h: -6528 -20272 ++test ld.hu: 59008 45264 ++test ldx.w: 59008 45264 ++test ldx.wu: 59008 45264 ++test ldx.d: 1305795535453611648 -4294922032 1305795535453611648 18446744069414629584 ++test ldptr.w: 59008 45264 ++test ldptr.d: 1305795535453611648 -4294922032 ++test fld.s: 0xe680 0xb0d0 ++test fld.d: 0x121f1e1f0000e680 0xffffffff0000b0d0 ++test fldx.s: 0xe680 0xb0d0 ++test fldx.d: 0x121f1e1f0000e680 0xffffffff0000b0d0 ++memory block: ++0x0: 0x121f1e1f0000e680 ++0x8: 0x0000000000010700 ++0x10: 0x000000030000e7dc ++0x18: 0xffffffff0000b0d0 ++0x20: 0x232f2e2f2ab05fd0 ++0x28: 0x242c2b2b0000b6a0 ++0x30: 0x252a2e2b0000be80 ++0x38: 0x262d2d2a0000de10 ++0x40: 0x3f343f3e0000df20 ++0x48: 0x3e353d3c2ab05fe0 ++0x50: 0x363a3c3b0000dfd0 ++0x58: 0x3b373b3a00010300 ++0x60: 0x0000e680121f1e1f ++0x68: 0x0001070000000000 ++0x70: 0x0000e7dc00000003 ++0x78: 0x0000b0d0ffffffff ++0x80: 0x2ab05fd0232f2e2f ++0x88: 0x0000b6a0242c2b2b ++0x90: 0x0000be80252a2e2b ++0x98: 0x0000de10262d2d2a ++0xa0: 0x0000df203f343f3e ++0xa8: 0x2ab05fe03e353d3c ++0xb0: 0x0000dfd0363a3c3b ++0xb8: 0x000103003b373b3a ++test st.b ++test st.h ++test st.w ++test st.d ++test stx.b ++test stx.h ++test stx.w ++test stx.d ++test stptr.w ++test stptr.d ++test fst.w ++test fst.d ++test fstx.w ++test fstx.d ++memory block: ++0x0: 0x121ffedcfedcfefe ++0x8: 0xfedcba98fedcba98 ++0x10: 0xfedcba9876543210 ++0x18: 0xfedcba9876543210 ++0x20: 0x232ffedcfedcfefe ++0x28: 0xfedcba98fedcba98 ++0x30: 0xfedcba9876543210 ++0x38: 0xfedcba9876543210 ++0x40: 0x7654321076543210 ++0x48: 0xfedcba9876543210 ++0x50: 0xfedcba9876543210 ++0x58: 0x3b373b3afedcba98 ++0x60: 0xfedcba9876543210 ++0x68: 0xfedcba9876543210 ++0x70: 0x0000e7dcfedcba98 ++0x78: 0xfedcba9876543210 ++0x80: 0xfedcba9876543210 ++0x88: 0x0000b6a0242c2b2b ++0x90: 0x0000be80252a2e2b ++0x98: 0x0000de10262d2d2a ++0xa0: 0x0000df203f343f3e ++0xa8: 0x2ab05fe03e353d3c ++0xb0: 0x0000dfd0363a3c3b ++0xb8: 0x000103003b373b3a +diff --git a/none/tests/loongarch64/memory.vgtest b/none/tests/loongarch64/memory.vgtest +new file mode 100644 +index 000000000..be6895e8f +--- /dev/null ++++ b/none/tests/loongarch64/memory.vgtest +@@ -0,0 +1,2 @@ ++prog: memory ++vgopts: -q +diff --git a/none/tests/loongarch64/move.c b/none/tests/loongarch64/move.c +new file mode 100644 +index 000000000..3b7f46dd9 +--- /dev/null ++++ b/none/tests/loongarch64/move.c +@@ -0,0 +1,112 @@ ++#include ++ ++#define TESTINST_MOV(v1, v2, v3, v4, v5, v6, val) \ ++ { \ ++ unsigned long res1 = (unsigned long)v1; \ ++ unsigned long res2 = (unsigned long)v2; \ ++ unsigned long res3 = (unsigned long)v3; \ ++ unsigned long res4 = (unsigned long)v4; \ ++ unsigned long res5 = (unsigned long)v5; \ ++ unsigned long res6 = (unsigned long)v6; \ ++ __asm__ __volatile__( \ ++ "movgr2fr.w %0, %6 \n\t" \ ++ "movgr2fr.d %1, %6 \n\t" \ ++ "movgr2frh.w %2, %6 \n\t" \ ++ "movfr2gr.s %3, %7 \n\t" \ ++ "movfrh2gr.s %4, %7 \n\t" \ ++ "movfr2gr.d %5, %7 \n\t" \ ++ : "+f" (res1), "+f" (res2), "+f" (res3), \ ++ "+r" (res4), "+r" (res5), "+r" (res6) \ ++ : "r" (val), "f" (val) \ ++ : "memory"); \ ++ printf("movgr2fr.w ::\n"); \ ++ printf("input: %#018lx %#018lx\n", v1, val); \ ++ printf("output: %#018lx\n", res1); \ ++ printf("movgr2fr.d ::\n"); \ ++ printf("input: %#018lx %#018lx\n", v2, val); \ ++ printf("output: %#018lx\n", res2); \ ++ printf("movgr2frh.w ::\n"); \ ++ printf("input: %#018lx %#018lx\n", v3, val); \ ++ printf("output: %#018lx\n", res3); \ ++ printf("movfr2gr.s ::\n"); \ ++ printf("input: %#018lx %#018lx\n", v4, val); \ ++ printf("output: %#018lx\n", res4); \ ++ printf("movfrh2gr.s ::\n"); \ ++ printf("input: %#018lx %#018lx\n", v5, val); \ ++ printf("output: %#018lx\n", res5); \ ++ printf("movfr2gr.d ::\n"); \ ++ printf("input: %#018lx %#018lx\n", v6, val); \ ++ printf("output: %#018lx\n", res6); \ ++ } ++ ++#define TESTINST_FSCR(fcsr, val) \ ++ { \ ++ unsigned long res; \ ++ __asm__ __volatile__( \ ++ "movgr2fcsr " fcsr ", %1 \n\t" \ ++ "movfcsr2gr %0, " fcsr " \n\t" \ ++ : "=r" (res) \ ++ : "r" (val) \ ++ : "memory"); \ ++ printf("movgr2fcsr movfcsr2gr ::\n"); \ ++ printf("input: %#018lx\n", val); \ ++ printf("output: %#018lx\n", res); \ ++ } ++ ++#define TESTINST_CF(fcc, v1, v2, val) \ ++ { \ ++ unsigned long res1 = (unsigned long)v1; \ ++ unsigned long res2 = (unsigned long)v2; \ ++ __asm__ __volatile__( \ ++ "movfr2cf " fcc ", %2 \n\t" \ ++ "movcf2fr %0, " fcc " \n\t" \ ++ "movgr2cf " fcc ", %3 \n\t" \ ++ "movcf2gr %1, " fcc " \n\t" \ ++ : "+f" (res1), "+r" (res2) \ ++ : "f" (val), "r" (val) \ ++ : "memory"); \ ++ printf("movfr2cf movcf2fr ::\n"); \ ++ printf("input: %#018lx %#018lx\n", v1, val); \ ++ printf("output: %lx\n", res1); \ ++ printf("movgr2cf movcf2gr ::\n"); \ ++ printf("input: %#018lx %#018lx\n", v2, val); \ ++ printf("output: %lx\n", res2); \ ++ } ++ ++void test(void) ++{ ++ TESTINST_MOV(0x1234123412341234UL, 0x5678567856785678UL, 0x9abc9abc9abc9abcUL, 0xdef0def0def0def0UL, 0x2468246824682468UL, 0x3579357935793579UL, 0x0123456789abcdefUL); ++ TESTINST_MOV(0x1234123412341234UL, 0x5678567856785678UL, 0x9abc9abc9abc9abcUL, 0xdef0def0def0def0UL, 0x2468246824682468UL, 0x3579357935793579UL, 0xfedcba9876543210UL); ++ ++ TESTINST_FSCR("$r0", 0x0123456789abcdefUL); ++ TESTINST_FSCR("$r0", 0xfedcba9876543210UL); ++ TESTINST_FSCR("$r1", 0x0123456789abcdefUL); ++ TESTINST_FSCR("$r1", 0xfedcba9876543210UL); ++ TESTINST_FSCR("$r2", 0x0123456789abcdefUL); ++ TESTINST_FSCR("$r2", 0xfedcba9876543210UL); ++ TESTINST_FSCR("$r3", 0x0123456789abcdefUL); ++ TESTINST_FSCR("$r3", 0xfedcba9876543210UL); ++ ++ TESTINST_CF("$fcc0", 0x1234123412341234UL, 0x5678567856785678UL, 0xffffffffffffffffUL); ++ TESTINST_CF("$fcc0", 0x1234123412341234UL, 0x5678567856785678UL, 0xdef0def0def0def0UL); ++ TESTINST_CF("$fcc1", 0x1234123412341234UL, 0x5678567856785678UL, 0xffffffffffffffffUL); ++ TESTINST_CF("$fcc1", 0x1234123412341234UL, 0x5678567856785678UL, 0xdef0def0def0def0UL); ++ TESTINST_CF("$fcc2", 0x1234123412341234UL, 0x5678567856785678UL, 0xffffffffffffffffUL); ++ TESTINST_CF("$fcc2", 0x1234123412341234UL, 0x5678567856785678UL, 0xdef0def0def0def0UL); ++ TESTINST_CF("$fcc3", 0x1234123412341234UL, 0x5678567856785678UL, 0xffffffffffffffffUL); ++ TESTINST_CF("$fcc3", 0x1234123412341234UL, 0x5678567856785678UL, 0xdef0def0def0def0UL); ++ TESTINST_CF("$fcc4", 0x1234123412341234UL, 0x5678567856785678UL, 0xffffffffffffffffUL); ++ TESTINST_CF("$fcc4", 0x1234123412341234UL, 0x5678567856785678UL, 0xdef0def0def0def0UL); ++ TESTINST_CF("$fcc5", 0x1234123412341234UL, 0x5678567856785678UL, 0xffffffffffffffffUL); ++ TESTINST_CF("$fcc5", 0x1234123412341234UL, 0x5678567856785678UL, 0xdef0def0def0def0UL); ++ TESTINST_CF("$fcc6", 0x1234123412341234UL, 0x5678567856785678UL, 0xffffffffffffffffUL); ++ TESTINST_CF("$fcc6", 0x1234123412341234UL, 0x5678567856785678UL, 0xdef0def0def0def0UL); ++ TESTINST_CF("$fcc7", 0x1234123412341234UL, 0x5678567856785678UL, 0xffffffffffffffffUL); ++ TESTINST_CF("$fcc7", 0x1234123412341234UL, 0x5678567856785678UL, 0xdef0def0def0def0UL); ++} ++ ++int main(void) ++{ ++ test(); ++ return 0; ++} +diff --git a/none/tests/loongarch64/move.stderr.exp b/none/tests/loongarch64/move.stderr.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/none/tests/loongarch64/move.stdout.exp b/none/tests/loongarch64/move.stdout.exp +new file mode 100644 +index 000000000..12baf3888 +--- /dev/null ++++ b/none/tests/loongarch64/move.stdout.exp +@@ -0,0 +1,156 @@ ++movgr2fr.w :: ++input: 0x1234123412341234 0x0123456789abcdef ++output: 0x0123456789abcdef ++movgr2fr.d :: ++input: 0x5678567856785678 0x0123456789abcdef ++output: 0x0123456789abcdef ++movgr2frh.w :: ++input: 0x9abc9abc9abc9abc 0x0123456789abcdef ++output: 0x89abcdef9abc9abc ++movfr2gr.s :: ++input: 0xdef0def0def0def0 0x0123456789abcdef ++output: 0xffffffff89abcdef ++movfrh2gr.s :: ++input: 0x2468246824682468 0x0123456789abcdef ++output: 0x0000000001234567 ++movfr2gr.d :: ++input: 0x3579357935793579 0x0123456789abcdef ++output: 0x0123456789abcdef ++movgr2fr.w :: ++input: 0x1234123412341234 0xfedcba9876543210 ++output: 0xfedcba9876543210 ++movgr2fr.d :: ++input: 0x5678567856785678 0xfedcba9876543210 ++output: 0xfedcba9876543210 ++movgr2frh.w :: ++input: 0x9abc9abc9abc9abc 0xfedcba9876543210 ++output: 0x765432109abc9abc ++movfr2gr.s :: ++input: 0xdef0def0def0def0 0xfedcba9876543210 ++output: 0x0000000076543210 ++movfrh2gr.s :: ++input: 0x2468246824682468 0xfedcba9876543210 ++output: 0xfffffffffedcba98 ++movfr2gr.d :: ++input: 0x3579357935793579 0xfedcba9876543210 ++output: 0xfedcba9876543210 ++movgr2fcsr movfcsr2gr :: ++input: 0x0123456789abcdef ++output: 0x00000000090b01cf ++movgr2fcsr movfcsr2gr :: ++input: 0xfedcba9876543210 ++output: 0x0000000016140210 ++movgr2fcsr movfcsr2gr :: ++input: 0x0123456789abcdef ++output: 0x000000000000008f ++movgr2fcsr movfcsr2gr :: ++input: 0xfedcba9876543210 ++output: 0x0000000000000010 ++movgr2fcsr movfcsr2gr :: ++input: 0x0123456789abcdef ++output: 0x00000000090b0000 ++movgr2fcsr movfcsr2gr :: ++input: 0xfedcba9876543210 ++output: 0x0000000016140000 ++movgr2fcsr movfcsr2gr :: ++input: 0x0123456789abcdef ++output: 0x0000000000000100 ++movgr2fcsr movfcsr2gr :: ++input: 0xfedcba9876543210 ++output: 0x0000000000000200 ++movfr2cf movcf2fr :: ++input: 0x1234123412341234 0xffffffffffffffff ++output: 1 ++movgr2cf movcf2gr :: ++input: 0x5678567856785678 0xffffffffffffffff ++output: 1 ++movfr2cf movcf2fr :: ++input: 0x1234123412341234 0xdef0def0def0def0 ++output: 0 ++movgr2cf movcf2gr :: ++input: 0x5678567856785678 0xdef0def0def0def0 ++output: 0 ++movfr2cf movcf2fr :: ++input: 0x1234123412341234 0xffffffffffffffff ++output: 1 ++movgr2cf movcf2gr :: ++input: 0x5678567856785678 0xffffffffffffffff ++output: 1 ++movfr2cf movcf2fr :: ++input: 0x1234123412341234 0xdef0def0def0def0 ++output: 0 ++movgr2cf movcf2gr :: ++input: 0x5678567856785678 0xdef0def0def0def0 ++output: 0 ++movfr2cf movcf2fr :: ++input: 0x1234123412341234 0xffffffffffffffff ++output: 1 ++movgr2cf movcf2gr :: ++input: 0x5678567856785678 0xffffffffffffffff ++output: 1 ++movfr2cf movcf2fr :: ++input: 0x1234123412341234 0xdef0def0def0def0 ++output: 0 ++movgr2cf movcf2gr :: ++input: 0x5678567856785678 0xdef0def0def0def0 ++output: 0 ++movfr2cf movcf2fr :: ++input: 0x1234123412341234 0xffffffffffffffff ++output: 1 ++movgr2cf movcf2gr :: ++input: 0x5678567856785678 0xffffffffffffffff ++output: 1 ++movfr2cf movcf2fr :: ++input: 0x1234123412341234 0xdef0def0def0def0 ++output: 0 ++movgr2cf movcf2gr :: ++input: 0x5678567856785678 0xdef0def0def0def0 ++output: 0 ++movfr2cf movcf2fr :: ++input: 0x1234123412341234 0xffffffffffffffff ++output: 1 ++movgr2cf movcf2gr :: ++input: 0x5678567856785678 0xffffffffffffffff ++output: 1 ++movfr2cf movcf2fr :: ++input: 0x1234123412341234 0xdef0def0def0def0 ++output: 0 ++movgr2cf movcf2gr :: ++input: 0x5678567856785678 0xdef0def0def0def0 ++output: 0 ++movfr2cf movcf2fr :: ++input: 0x1234123412341234 0xffffffffffffffff ++output: 1 ++movgr2cf movcf2gr :: ++input: 0x5678567856785678 0xffffffffffffffff ++output: 1 ++movfr2cf movcf2fr :: ++input: 0x1234123412341234 0xdef0def0def0def0 ++output: 0 ++movgr2cf movcf2gr :: ++input: 0x5678567856785678 0xdef0def0def0def0 ++output: 0 ++movfr2cf movcf2fr :: ++input: 0x1234123412341234 0xffffffffffffffff ++output: 1 ++movgr2cf movcf2gr :: ++input: 0x5678567856785678 0xffffffffffffffff ++output: 1 ++movfr2cf movcf2fr :: ++input: 0x1234123412341234 0xdef0def0def0def0 ++output: 0 ++movgr2cf movcf2gr :: ++input: 0x5678567856785678 0xdef0def0def0def0 ++output: 0 ++movfr2cf movcf2fr :: ++input: 0x1234123412341234 0xffffffffffffffff ++output: 1 ++movgr2cf movcf2gr :: ++input: 0x5678567856785678 0xffffffffffffffff ++output: 1 ++movfr2cf movcf2fr :: ++input: 0x1234123412341234 0xdef0def0def0def0 ++output: 0 ++movgr2cf movcf2gr :: ++input: 0x5678567856785678 0xdef0def0def0def0 ++output: 0 +diff --git a/none/tests/loongarch64/move.vgtest b/none/tests/loongarch64/move.vgtest +new file mode 100644 +index 000000000..358d44b7a +--- /dev/null ++++ b/none/tests/loongarch64/move.vgtest +@@ -0,0 +1,3 @@ ++prereq: ../../../tests/loongarch64_features fpu ++prog: move ++vgopts: -q +diff --git a/none/tests/loongarch64/pc.c b/none/tests/loongarch64/pc.c +new file mode 100644 +index 000000000..a4938463a +--- /dev/null ++++ b/none/tests/loongarch64/pc.c +@@ -0,0 +1,66 @@ ++#include ++#include ++ ++#define TESTINST_RI(insn, imm, offs, clear) \ ++ { \ ++ unsigned long res, exp; \ ++ __asm__ __volatile__( \ ++ " la.local $t0, 1f \n\t" \ ++ " jirl %0, $t0, 0 \n\t" \ ++ "1: \n\t" \ ++ insn " %1," #imm " \n\t" \ ++ : "=r" (exp), "=r" (res) \ ++ : \ ++ : "$t0", "memory"); \ ++ printf("test %s\n", insn); \ ++ exp += (long)imm << 40 >> (40 - offs); \ ++ if (clear) \ ++ exp &= 0xfffffffffffff000UL; \ ++ if (res != exp) \ ++ printf("res: %#lx, exp: %#lx\n", res, exp); \ ++ } ++ ++void test(void) ++{ ++ /* ---------------- pcaddi rd, si20 ---------------- */ ++ TESTINST_RI("pcaddi", 0, 2, false); ++ TESTINST_RI("pcaddi", 1, 2, false); ++ TESTINST_RI("pcaddi", 100, 2, false); ++ TESTINST_RI("pcaddi", 12345, 2, false); ++ TESTINST_RI("pcaddi", -12345, 2, false); ++ TESTINST_RI("pcaddi", 524287, 2, false); ++ TESTINST_RI("pcaddi", -524288, 2, false); ++ ++ /* ---------------- pcaddu12i rd, si20 ---------------- */ ++ TESTINST_RI("pcaddu12i", 0, 12, false); ++ TESTINST_RI("pcaddu12i", 1, 12, false); ++ TESTINST_RI("pcaddu12i", 100, 12, false); ++ TESTINST_RI("pcaddu12i", 12345, 12, false); ++ TESTINST_RI("pcaddu12i", -12345, 12, false); ++ TESTINST_RI("pcaddu12i", 524287, 12, false); ++ TESTINST_RI("pcaddu12i", -524288, 12, false); ++ ++ /* ---------------- pcaddu18i rd, si20 ---------------- */ ++ TESTINST_RI("pcaddu18i", 0, 18, false); ++ TESTINST_RI("pcaddu18i", 1, 18, false); ++ TESTINST_RI("pcaddu18i", 100, 18, false); ++ TESTINST_RI("pcaddu18i", 12345, 18, false); ++ TESTINST_RI("pcaddu18i", -12345, 18, false); ++ TESTINST_RI("pcaddu18i", 524287, 18, false); ++ TESTINST_RI("pcaddu18i", -524288, 18, false); ++ ++ /* ---------------- pcalau12i rd, si20 ---------------- */ ++ TESTINST_RI("pcalau12i", 0, 12, true); ++ TESTINST_RI("pcalau12i", 1, 12, true); ++ TESTINST_RI("pcalau12i", 100, 12, true); ++ TESTINST_RI("pcalau12i", 12345, 12, true); ++ TESTINST_RI("pcalau12i", -12345, 12, true); ++ TESTINST_RI("pcalau12i", 524287, 12, true); ++ TESTINST_RI("pcalau12i", -524288, 12, true); ++} ++ ++int main(void) ++{ ++ test(); ++ return 0; ++} +diff --git a/none/tests/loongarch64/pc.stderr.exp b/none/tests/loongarch64/pc.stderr.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/none/tests/loongarch64/pc.stdout.exp b/none/tests/loongarch64/pc.stdout.exp +new file mode 100644 +index 000000000..422ea404b +--- /dev/null ++++ b/none/tests/loongarch64/pc.stdout.exp +@@ -0,0 +1,28 @@ ++test pcaddi ++test pcaddi ++test pcaddi ++test pcaddi ++test pcaddi ++test pcaddi ++test pcaddi ++test pcaddu12i ++test pcaddu12i ++test pcaddu12i ++test pcaddu12i ++test pcaddu12i ++test pcaddu12i ++test pcaddu12i ++test pcaddu18i ++test pcaddu18i ++test pcaddu18i ++test pcaddu18i ++test pcaddu18i ++test pcaddu18i ++test pcaddu18i ++test pcalau12i ++test pcalau12i ++test pcalau12i ++test pcalau12i ++test pcalau12i ++test pcalau12i ++test pcalau12i +diff --git a/none/tests/loongarch64/pc.vgtest b/none/tests/loongarch64/pc.vgtest +new file mode 100644 +index 000000000..468226df2 +--- /dev/null ++++ b/none/tests/loongarch64/pc.vgtest +@@ -0,0 +1,2 @@ ++prog: pc ++vgopts: -q +diff --git a/none/tests/loongarch64/special.c b/none/tests/loongarch64/special.c +new file mode 100644 +index 000000000..e1e8c9430 +--- /dev/null ++++ b/none/tests/loongarch64/special.c +@@ -0,0 +1,112 @@ ++#include ++ ++#define TESTINST_HRI(insn, hint, addr, offs) \ ++ { \ ++ __asm__ __volatile__( \ ++ insn " " #hint ", %0, " #offs " \n\t" \ ++ : \ ++ : "r" (addr) \ ++ : "memory" \ ++ ); \ ++ printf("test %s\n", insn); \ ++ } ++ ++#define TESTINST_HRR(insn, hint, addr, offs) \ ++ { \ ++ __asm__ __volatile__( \ ++ insn " %0, %1, \n\t" \ ++ : \ ++ : "r" (addr), "r" (offs) \ ++ : "memory" \ ++ ); \ ++ printf("test %s\n", insn); \ ++ } ++ ++#define TESTINST_CODE(insn, code) \ ++ { \ ++ __asm__ __volatile__( \ ++ insn " " #code " \n\t" \ ++ : \ ++ : \ ++ : "memory" \ ++ ); \ ++ printf("test %s\n", insn); \ ++ } ++ ++#define TESTINST_RR(insn, id) \ ++ { \ ++ unsigned long res = 0; \ ++ __asm__ __volatile__( \ ++ insn " %0, %1 \n\t" \ ++ : "+r" (res) \ ++ : "r" (id) \ ++ : "memory" \ ++ ); \ ++ printf("test %s\n", insn); \ ++ printf("res: %ld\n", res); \ ++ } ++ ++unsigned long mem[8]; ++ ++void test(void) ++{ ++ /* ---------------- preld hint, rj, si12 ---------------- */ ++ TESTINST_HRI("preld", 0, mem, 0); ++ TESTINST_HRI("preld", 1, mem, 1); ++ TESTINST_HRI("preld", 2, mem, 2); ++ TESTINST_HRI("preld", 3, mem, 3); ++ TESTINST_HRI("preld", 4, mem, 4); ++ TESTINST_HRI("preld", 5, mem, 5); ++ TESTINST_HRI("preld", 6, mem, 6); ++ TESTINST_HRI("preld", 7, mem, 7); ++ TESTINST_HRI("preld", 8, mem, 8); ++ TESTINST_HRI("preld", 9, mem, 9); ++ ++ /* ---------------- preldx hint, rj, rk ---------------- */ ++ TESTINST_HRI("preld", 31, mem, 10); ++ TESTINST_HRI("preld", 30, mem, 12); ++ TESTINST_HRI("preld", 29, mem, 14); ++ TESTINST_HRI("preld", 28, mem, 16); ++ TESTINST_HRI("preld", 27, mem, 18); ++ TESTINST_HRI("preld", 26, mem, 20); ++ TESTINST_HRI("preld", 25, mem, 22); ++ TESTINST_HRI("preld", 24, mem, 24); ++ TESTINST_HRI("preld", 23, mem, 26); ++ TESTINST_HRI("preld", 22, mem, 28); ++ ++ /* ---------------- dbar code ---------------- */ ++ TESTINST_CODE("dbar", 0); ++ TESTINST_CODE("dbar", 2); ++ TESTINST_CODE("dbar", 4); ++ TESTINST_CODE("dbar", 6); ++ TESTINST_CODE("dbar", 8); ++ ++ /* ---------------- ibar code ---------------- */ ++ TESTINST_CODE("ibar", 9); ++ TESTINST_CODE("ibar", 7); ++ TESTINST_CODE("ibar", 5); ++ TESTINST_CODE("ibar", 3); ++ TESTINST_CODE("ibar", 1); ++ ++ /* ---------------- rdtimel.w rd, rj ---------------- */ ++ TESTINST_RR("rdtimel.w", 0); ++ TESTINST_RR("rdtimel.w", 1); ++ TESTINST_RR("rdtimel.w", 2); ++ ++ /* ---------------- rdtimeh.w rd, rj ---------------- */ ++ TESTINST_RR("rdtimeh.w", 0); ++ TESTINST_RR("rdtimeh.w", 1); ++ TESTINST_RR("rdtimeh.w", 2); ++ ++ ++ /* ---------------- rdtime.d rd, rj ---------------- */ ++ TESTINST_RR("rdtime.d", 0); ++ TESTINST_RR("rdtime.d", 1); ++ TESTINST_RR("rdtime.d", 2); ++} ++ ++int main(void) ++{ ++ test(); ++ return 0; ++} +diff --git a/none/tests/loongarch64/special.stderr.exp b/none/tests/loongarch64/special.stderr.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/none/tests/loongarch64/special.stdout.exp b/none/tests/loongarch64/special.stdout.exp +new file mode 100644 +index 000000000..7bd523e10 +--- /dev/null ++++ b/none/tests/loongarch64/special.stdout.exp +@@ -0,0 +1,48 @@ ++test preld ++test preld ++test preld ++test preld ++test preld ++test preld ++test preld ++test preld ++test preld ++test preld ++test preld ++test preld ++test preld ++test preld ++test preld ++test preld ++test preld ++test preld ++test preld ++test preld ++test dbar ++test dbar ++test dbar ++test dbar ++test dbar ++test ibar ++test ibar ++test ibar ++test ibar ++test ibar ++test rdtimel.w ++res: 0 ++test rdtimel.w ++res: 0 ++test rdtimel.w ++res: 0 ++test rdtimeh.w ++res: 0 ++test rdtimeh.w ++res: 0 ++test rdtimeh.w ++res: 0 ++test rdtime.d ++res: 0 ++test rdtime.d ++res: 0 ++test rdtime.d ++res: 0 +diff --git a/none/tests/loongarch64/special.vgtest b/none/tests/loongarch64/special.vgtest +new file mode 100644 +index 000000000..b2f2ae952 +--- /dev/null ++++ b/none/tests/loongarch64/special.vgtest +@@ -0,0 +1,2 @@ ++prog: special ++vgopts: -q +diff --git a/tests/Makefile.am b/tests/Makefile.am +index 916e5085d..e21f68bf5 100644 +--- a/tests/Makefile.am ++++ b/tests/Makefile.am +@@ -52,7 +52,8 @@ check_PROGRAMS = \ + power_insn_available \ + is_ppc64_BE \ + min_power_isa \ +- arm64_features ++ arm64_features \ ++ loongarch64_features + + + AM_CFLAGS += $(AM_FLAG_M3264_PRI) +diff --git a/tests/arch_test.c b/tests/arch_test.c +index 37cc1bc76..97b6bc7c8 100644 +--- a/tests/arch_test.c ++++ b/tests/arch_test.c +@@ -34,6 +34,7 @@ char* all_archs[] = { + "mips32", + "mips64", + "nanomips", ++ "loongarch64", + NULL + }; + +@@ -79,6 +80,10 @@ static Bool go(char* arch) + + #elif defined(VGP_nanomips_linux) + if ( 0 == strcmp( arch, "nanomips" ) ) return True; ++ ++#elif defined(VGP_loongarch64_linux) ++ if ( 0 == strcmp( arch, "loongarch64" ) ) return True; ++ + #else + # error Unknown platform + #endif // VGP_* +diff --git a/tests/loongarch64_features.c b/tests/loongarch64_features.c +new file mode 100644 +index 000000000..45ba2d1c0 +--- /dev/null ++++ b/tests/loongarch64_features.c +@@ -0,0 +1,81 @@ ++#include ++#include ++#include ++ ++// This file determines loongarch64 features a processor supports. ++// For now, we only support loongarch64-linux. ++// ++// We return: ++// - 0 if the machine has the asked-for feature. ++// - 1 if the machine doesn't have the asked-for feature. ++// - 2 if the asked-for feature isn't recognised (this will always be the case ++// for any feature if run on a non-loongarch64 machine). ++// - 3 if there was a usage error (it also prints an error message). ++#define FEATURE_PRESENT 0 ++#define FEATURE_NOT_PRESENT 1 ++#define UNRECOGNISED_FEATURE 2 ++#define USAGE_ERROR 3 ++ ++#if defined(VGA_loongarch64) ++ ++static int go(const char* feature_name) ++{ ++ int i, len, found; ++ FILE* fp; ++ char buf[256]; ++ const char* features[] = { ++ "cpucfg", "lam", "ual", "fpu", ++ "lsx", "lasx", "complex", "crypto", ++ "lvz", "lbt_x86", "lbt_arm", "lbt_mips" ++ }; ++ ++ found = 0; ++ len = sizeof(features) / sizeof(features[0]); ++ for (i = 0; i < len; i++) { ++ if (strcmp(feature_name, features[i]) == 0) { ++ found = 1; ++ break; ++ } ++ } ++ ++ if (!found) ++ return UNRECOGNISED_FEATURE; ++ ++ fp = fopen("/proc/cpuinfo", "r"); ++ if(fp == NULL) ++ return UNRECOGNISED_FEATURE; ++ ++ while (fgets(buf, sizeof(buf), fp) != NULL) { ++ if (strstr(buf, feature_name) != NULL) { ++ fclose(fp); ++ return FEATURE_PRESENT; ++ } ++ } ++ ++ fclose(fp); ++ return FEATURE_NOT_PRESENT; ++} ++ ++#else ++ ++static int go(const char* feature_name) ++{ ++ // Feature not recognised (non-loongarch64 machine!) ++ return UNRECOGNISED_FEATURE; ++} ++ ++#endif // defined(VGA_loongarch64) ++ ++ ++//--------------------------------------------------------------------------- ++// main ++//--------------------------------------------------------------------------- ++int main(int argc, char **argv) ++{ ++ if (argc != 2) { ++ fprintf(stderr, "usage: loongarch64_features \n"); ++ exit(USAGE_ERROR); ++ } ++ ++ return go(argv[1]); ++} +diff --git a/tests/platform_test b/tests/platform_test +index c23a4f645..a1eaf8f1c 100644 +--- a/tests/platform_test ++++ b/tests/platform_test +@@ -14,6 +14,7 @@ all_platforms= + all_platforms="$all_platforms x86-linux amd64-linux ppc32-linux ppc64-linux" + all_platforms="$all_platforms arm-linux arm64-linux" + all_platforms="$all_platforms s390x-linux mips32-linux mips64-linux" ++all_platforms="$all_platforms loongarch64-linux" + all_platforms="$all_platforms x86-darwin amd64-darwin" + all_platforms="$all_platforms x86-solaris amd64-solaris" + all_platforms="$all_platforms x86-freebsd amd64-freebsd" diff --git a/vaultwarden/PKGBUILD b/vaultwarden/PKGBUILD index 037bc72ce2..aa52fe163a 100644 --- a/vaultwarden/PKGBUILD +++ b/vaultwarden/PKGBUILD @@ -54,7 +54,7 @@ prepare() { /^# ROCKET_TLS/a ROCKET_LIMITS={json=10485760}" .env.template # download dependencies - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/virt-manager/600.patch b/virt-manager/600.patch new file mode 100644 index 0000000000..17a3e4b0cb --- /dev/null +++ b/virt-manager/600.patch @@ -0,0 +1,186 @@ +From 564b110d66843a36604bacd4107ef773ac0e8933 Mon Sep 17 00:00:00 2001 +From: li weigang +Date: Mon, 30 Oct 2023 14:47:47 +0800 +Subject: [PATCH] add loongarch support + +--- + virtManager/createvm.py | 5 ++++- + virtinst/devices/disk.py | 2 ++ + virtinst/devices/video.py | 2 ++ + virtinst/domain/cpu.py | 5 +++++ + virtinst/domain/os.py | 3 +++ + virtinst/domcapabilities.py | 6 +++++- + virtinst/guest.py | 16 ++++++++++++---- + 7 files changed, 33 insertions(+), 6 deletions(-) + +diff --git a/virtManager/createvm.py b/virtManager/createvm.py +index 95aff71b2..df6dbb789 100644 +--- a/virtManager/createvm.py ++++ b/virtManager/createvm.py +@@ -476,7 +476,8 @@ def _set_caps_state(self): + + installable_arch = bool(guest.os.is_x86() or + guest.os.is_ppc64() or +- guest.os.is_s390x()) ++ guest.os.is_s390x() or ++ guest.os.is_loongarch()) + + default_efi = ( + self.config.get_default_firmware_setting() == "uefi" and +@@ -857,6 +858,8 @@ def _populate_machine(self): + machines.sort() + + defmachine = None ++ if self._capsinfo.arch in ["loongarch64"]: ++ defmachine = "loongson7a" + prios = [] + recommended_machine = virtinst.Guest.get_recommended_machine( + self._capsinfo) +diff --git a/virtinst/devices/disk.py b/virtinst/devices/disk.py +index 9609ebacf..1e2c56f8a 100644 +--- a/virtinst/devices/disk.py ++++ b/virtinst/devices/disk.py +@@ -982,6 +982,8 @@ def _default_bus(self, guest): + if self.conn.is_bhyve(): + # IDE bus is not supported by bhyve + return "sata" ++ if self.is_cdrom() and guest.os.is_loongarch(): ++ return "scsi" + return "ide" + + def set_defaults(self, guest): +diff --git a/virtinst/devices/video.py b/virtinst/devices/video.py +index 70067a72c..d10fd7aa4 100644 +--- a/virtinst/devices/video.py ++++ b/virtinst/devices/video.py +@@ -27,6 +27,8 @@ class DeviceVideo(Device): + + @staticmethod + def default_model(guest): ++ if guest.os.is_loongarch(): ++ return "virtio" + if not guest.os.is_hvm(): + return None + if guest.os.is_pseries(): +diff --git a/virtinst/domain/cpu.py b/virtinst/domain/cpu.py +index c635932ed..2c66b9dfc 100644 +--- a/virtinst/domain/cpu.py ++++ b/virtinst/domain/cpu.py +@@ -462,5 +462,10 @@ def set_defaults(self, guest): + # -M virt defaults to a 32bit CPU, even if using aarch64 + self.set_model(guest, "cortex-a57") + ++ elif guest.os.is_loongarch() and guest.type == "kvm": ++ if guest.os.arch != self.conn.caps.host.cpu.arch: ++ return ++ self.set_special_mode(guest, guest.loongarch_cpu_default) ++ + elif guest.os.is_x86() and guest.type == "kvm": + self._set_cpu_x86_kvm_default(guest) +diff --git a/virtinst/domain/os.py b/virtinst/domain/os.py +index 4310e6238..ae2cd97b1 100644 +--- a/virtinst/domain/os.py ++++ b/virtinst/domain/os.py +@@ -78,6 +78,9 @@ def is_riscv(self): + def is_riscv_virt(self): + return self.is_riscv() and str(self.machine).startswith("virt") + ++ def is_loongarch(self): ++ return self.arch == "loongarch64" ++ + ################## + # XML properties # + ################## +diff --git a/virtinst/domcapabilities.py b/virtinst/domcapabilities.py +index db08bf65f..8694cbd3a 100644 +--- a/virtinst/domcapabilities.py ++++ b/virtinst/domcapabilities.py +@@ -291,6 +291,10 @@ def build_from_guest(guest): + r".*arm/QEMU_EFI.*", # fedora, gerd's firmware repo + r".*edk2-arm-code\.fd" # upstream qemu + ], ++ "loongarch64": [ ++ ".*loongarch_bios.bin", # loongarch ++ ".*loongarch_bios.bin", # gerd's firmware repo ++ ], + } + + def find_uefi_path_for_arch(self): +@@ -446,7 +450,7 @@ def supports_graphics_spice(self): + # support. Use our pre-existing logic + if not self.conn.is_qemu() and not self.conn.is_test(): + return False +- return self.conn.caps.host.cpu.arch in ["i686", "x86_64"] ++ return self.conn.caps.host.cpu.arch in ["i686", "x86_64", "loongarch64"] + + return self.devices.graphics.get_enum("type").has_value("spice") + +diff --git a/virtinst/guest.py b/virtinst/guest.py +index babe3de66..7bcccd817 100644 +--- a/virtinst/guest.py ++++ b/virtinst/guest.py +@@ -213,6 +213,7 @@ def __init__(self, *args, **kwargs): + self.skip_default_tpm = False + self.have_default_tpm = False + self.x86_cpu_default = self.cpu.SPECIAL_MODE_APP_DEFAULT ++ self.loongarch_cpu_default = self.cpu.SPECIAL_MODE_HOST_MODEL_ONLY + + # qemu 6.1, fairly new when we added this option, has an unfortunate + # bug with >= 15 root ports, so we choose 14 instead of our original 16 +@@ -353,7 +354,8 @@ def _supports_virtio(self, os_support): + if (self.os.is_arm_machvirt() or + self.os.is_riscv_virt() or + self.os.is_s390x() or +- self.os.is_pseries()): ++ self.os.is_pseries() or ++ self.os.is_loongarch()): + return True + + if not os_support: +@@ -542,7 +544,7 @@ def prefers_uefi(self): + # and doesn't break QEMU internal snapshots + prefer_efi = self.osinfo.requires_firmware_efi(self.os.arch) + else: +- prefer_efi = self.os.is_arm_machvirt() or self.conn.is_bhyve() ++ prefer_efi = self.os.is_arm_machvirt() or self.conn.is_bhyve() or self.os.is_loongarch() + + log.debug("Prefer EFI => %s", prefer_efi) + return prefer_efi +@@ -559,6 +561,8 @@ def set_uefi_path(self, path): + """ + self.os.loader_ro = True + self.os.loader_type = "pflash" ++ if (self.os.is_loongarch()): ++ self.os.loader_type = "rom" + self.os.loader = path + + # If the firmware name contains "secboot" it is probably build +@@ -908,7 +912,8 @@ def _add_default_input_device(self): + usb_tablet = True + if (self.os.is_arm_machvirt() or + self.os.is_riscv_virt() or +- self.os.is_pseries()): ++ self.os.is_pseries() or ++ self.os.is_loongarch()): + usb_tablet = True + usb_keyboard = True + +@@ -1022,7 +1027,8 @@ def _add_default_graphics(self): + if self.os.is_container() and not self.conn.is_vz(): + return + if (not self.os.is_x86() and +- not self.os.is_pseries()): ++ not self.os.is_pseries() and ++ not self.os.is_loongarch()): + return + self.add_device(DeviceGraphics(self.conn)) + +@@ -1164,6 +1170,8 @@ def _add_spice_sound(self): + self.add_device(dev) + + def _add_spice_usbredir(self): ++ if self.os.is_loongarch(): ++ return + if not self.lookup_domcaps().supports_redirdev_usb(): + return # pragma: no cover + if self.skip_default_usbredir: diff --git a/virt-manager/PKGBUILD b/virt-manager/PKGBUILD index dbe1d98d12..891f4997ec 100644 --- a/virt-manager/PKGBUILD +++ b/virt-manager/PKGBUILD @@ -7,15 +7,22 @@ pkgbase=virt-manager pkgname=(virt-install virt-manager) pkgver=4.1.0 -pkgrel=2 +pkgrel=3 arch=('any') url='https://virt-manager.org/' license=('GPL') makedepends=('python-docutils' 'python-setuptools') checkdepends=('python-pytest' 'libosinfo' 'libvirt-python' 'python-gobject' 'python-requests' 'cpio' 'cdrtools') optdepends=('x11-ssh-askpass: provide password for remote machines connected via ssh tunnel') -source=("https://releases.pagure.org/virt-manager/virt-manager-${pkgver}.tar.gz") -b2sums=('1b4203be81bd7b82251225d691a4d9068f268e610f049bcadb96de5b539e964ca0b001f22f06ddd8266b58b079f60046f6d11942a1e4eadbc43f0607c46bbddd') +source=("https://releases.pagure.org/virt-manager/virt-manager-${pkgver}.tar.gz" + 600.patch) +b2sums=('1b4203be81bd7b82251225d691a4d9068f268e610f049bcadb96de5b539e964ca0b001f22f06ddd8266b58b079f60046f6d11942a1e4eadbc43f0607c46bbddd' + 'b97145d2f550e15c7a3bb2c93b4b82b20e33c098b51cf38c76314c58da665ead76b891cb7ceb0bceaf18f4128209d0bade0148c0ebbdc9c876fff7ae89a6e129') + +parpare() { + cd ${pkgbase}-${pkgver} + patch -p1 -i $srcdir/600.patch +} build() { cd ${pkgbase}-${pkgver} diff --git a/virtiofsd/PKGBUILD b/virtiofsd/PKGBUILD index e947b36182..eaaa95f478 100644 --- a/virtiofsd/PKGBUILD +++ b/virtiofsd/PKGBUILD @@ -27,7 +27,7 @@ prepare() { # use /usr/lib instead of /usr/libexec: https://gitlab.com/virtio-fs/virtiofsd/-/issues/86 sed 's/libexec/lib/' -i 50-$pkgname.json - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked } build() { diff --git a/virtualbox-host-modules-arch/PKGBUILD b/virtualbox-host-modules-arch/PKGBUILD index f37df363f4..825c62299b 100644 --- a/virtualbox-host-modules-arch/PKGBUILD +++ b/virtualbox-host-modules-arch/PKGBUILD @@ -26,7 +26,7 @@ package(){ _kernver="$( '5AA3BC334FD7E3369E7C77B291C559DBE4C9123B' # Adrián Pérez de Castro @@ -104,6 +110,8 @@ prepare() { # https://bugs.archlinux.org/task/79783 # https://github.com/WebKit/WebKit/pull/18614 patch -Np1 -i ../GTK-Disable-DMABuf-renderer-for-NVIDIA-proprietary-drivers.patch + patch -p1 -i $srcdir/webkit2-gtk-fix-build.patch + patch -p1 -i $srcdir/webkit2gtk-fix-cmake-build.patch } build() { @@ -127,7 +135,12 @@ build() { # :(.text+0x4a019): undefined reference to `ipint_extern_table_fill' # collect2: error: ld returned 1 exit status export CC=clang CXX=clang++ - LDFLAGS+=" -fuse-ld=lld" +# LDFLAGS+=" -fuse-ld=lld" +# clang didn't support -mlsx + CFLAGS=${CFLAGS/-mlsx /} + CXXFLAGS=${CXXFLAGS/-mlsx /} + CFLAGS=${CFLAGS/-fstack-clash-protection/} + CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/} # Produce minimal debug info: 4.3 GB of debug data makes the # build too slow and is too much to package for debuginfod diff --git a/webkit2gtk-4.1/webkit2-gtk-fix-build.patch b/webkit2gtk-4.1/webkit2-gtk-fix-build.patch new file mode 100644 index 0000000000..3e03c66611 --- /dev/null +++ b/webkit2gtk-4.1/webkit2-gtk-fix-build.patch @@ -0,0 +1,10 @@ +--- webkitgtk-2.38.5/Source/ThirdParty/ANGLE/include/GLSLANG/ShaderVars.h 2022-08-31 15:59:51.894493300 +0800 ++++ webkitgtk-2.38.5/Source/ThirdParty/ANGLE/include/GLSLANG/ShaderVars.h 2023-03-09 13:32:01.655350948 +0800 +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + + // This type is defined here to simplify ANGLE's integration with glslang for SPIR-V. + using ShCompileOptions = uint64_t; diff --git a/webkit2gtk-4.1/webkit2gtk-fix-cmake-build.patch b/webkit2gtk-4.1/webkit2gtk-fix-cmake-build.patch new file mode 100644 index 0000000000..b695514152 --- /dev/null +++ b/webkit2gtk-4.1/webkit2gtk-fix-cmake-build.patch @@ -0,0 +1,11 @@ +--- webkitgtk-2.42.3/Source/cmake/FindGStreamer.cmake 2023-12-27 00:33:15.109385432 +0800 ++++ webkitgtk-2.42.3/Source/cmake/FindGStreamer.cmake 2023-12-27 00:33:15.129385924 +0800 +@@ -65,7 +65,7 @@ + + string(REGEX MATCH "(.*)>=(.*)" _dummy "${_pkgconfig_name}") + if ("${CMAKE_MATCH_2}" STREQUAL "") +- pkg_check_modules(PC_${_component_prefix} "${_pkgconfig_name} >= ${GStreamer_FIND_VERSION}") ++ pkg_check_modules(PC_${_component_prefix} "${_pkgconfig_name}>=${GStreamer_FIND_VERSION}") + else () + pkg_check_modules(PC_${_component_prefix} ${_pkgconfig_name}) + endif () diff --git a/webkit2gtk/PKGBUILD b/webkit2gtk/PKGBUILD index 1df216e118..975bbe0b43 100644 --- a/webkit2gtk/PKGBUILD +++ b/webkit2gtk/PKGBUILD @@ -80,15 +80,21 @@ source=( $url/releases/webkitgtk-$pkgver.tar.xz{,.asc} GTK-MiniBrowser-should-hide-the-toolbar-when-using-full-screen.patch GTK-Disable-DMABuf-renderer-for-NVIDIA-proprietary-drivers.patch + webkit2-gtk-fix-build.patch + webkit2gtk-fix-cmake-build.patch ) sha256sums=('52288b30bda22373442cecb86f9c9a569ad8d4769a1f97b352290ed92a67ed86' 'SKIP' 'a921d6be1303e9f23474971f381886fd291ec5bb1a7ff1e85acede8cfb88bef2' - '655f3b2c96355ac83c4fa1fc6048e3256bbfdbfb9727e1e18c5af12613536206') + '655f3b2c96355ac83c4fa1fc6048e3256bbfdbfb9727e1e18c5af12613536206' + '08917be7a1af4bb371c9919117912f1acffc9bc8fe9434693e3b0184ac352bc0' + '2ce442337d9f9871170f7face452a965d724a418ecb752be4222cc867b9c0e5e') b2sums=('3a8cd0818e0d989ab778cda63dd873d7e185ec20fbfe609b9da70041fe38ac30351046516600cb8eb86089e43136487d81c922690468daa70ed2a436561c2401' 'SKIP' 'd440d82c769f1b35caf5464dc850cdf1c896224205c90c17d8b0a44aee62e4b1383e11306936aaca067fde8836770d346d5122d7b05c91a5c7c1741c89c65e2f' - 'daa782d4d40cc12a05c02b2494e879333f66f32820f1a1b89b7ab68f62fd53043b116ecb5ef476004095a7c7b924b12695b7e87e21dd547f66e72fa02a972f0d') + 'daa782d4d40cc12a05c02b2494e879333f66f32820f1a1b89b7ab68f62fd53043b116ecb5ef476004095a7c7b924b12695b7e87e21dd547f66e72fa02a972f0d' + '22602b8f9836d666ca8db8200e4c965c0560263786d713f041db9814f5c93233da76c5f29e8540079e6bf97097ab063aa88938de39a32f547d954de5b8669acf' + '6fa62e8d83678ea8938b1aef0425422354ce5597bad455adca89e7509677edcc7c1a263b0c38057e20ae404a7a2cc3dffe9f411ff9fc29baa5a59ea585d998b6') validpgpkeys=( 'D7FCF61CF9A2DEAB31D81BD3F3D322D0EC4582C3' # Carlos Garcia Campos '5AA3BC334FD7E3369E7C77B291C559DBE4C9123B' # Adrián Pérez de Castro @@ -104,6 +110,8 @@ prepare() { # https://bugs.archlinux.org/task/79783 # https://github.com/WebKit/WebKit/pull/18614 patch -Np1 -i ../GTK-Disable-DMABuf-renderer-for-NVIDIA-proprietary-drivers.patch + patch -p1 -i $srcdir/webkit2-gtk-fix-build.patch + patch -p1 -i $srcdir/webkit2gtk-fix-cmake-build.patch } build() { @@ -127,7 +135,14 @@ build() { # :(.text+0x4a019): undefined reference to `ipint_extern_table_fill' # collect2: error: ld returned 1 exit status export CC=clang CXX=clang++ - LDFLAGS+=" -fuse-ld=lld" +# LDFLAGS+=" -fuse-ld=lld" +# clang didn't support -mlsx + CFLAGS=${CFLAGS/-mlsx /} + CXXFLAGS=${CXXFLAGS/-mlsx /} + CFLAGS=${CFLAGS/-fstack-clash-protection/} + CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/} + CFLAGS=${CFLAGS/-Wa,-mno-relax/} + CXXFLAGS=${CXXFLAGS/-Wa,-mno-relax/} # Produce minimal debug info: 4.3 GB of debug data makes the # build too slow and is too much to package for debuginfod diff --git a/webkit2gtk/webkit2-gtk-fix-build.patch b/webkit2gtk/webkit2-gtk-fix-build.patch new file mode 100644 index 0000000000..3e03c66611 --- /dev/null +++ b/webkit2gtk/webkit2-gtk-fix-build.patch @@ -0,0 +1,10 @@ +--- webkitgtk-2.38.5/Source/ThirdParty/ANGLE/include/GLSLANG/ShaderVars.h 2022-08-31 15:59:51.894493300 +0800 ++++ webkitgtk-2.38.5/Source/ThirdParty/ANGLE/include/GLSLANG/ShaderVars.h 2023-03-09 13:32:01.655350948 +0800 +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + + // This type is defined here to simplify ANGLE's integration with glslang for SPIR-V. + using ShCompileOptions = uint64_t; diff --git a/webkit2gtk/webkit2gtk-fix-cmake-build.patch b/webkit2gtk/webkit2gtk-fix-cmake-build.patch new file mode 100644 index 0000000000..b695514152 --- /dev/null +++ b/webkit2gtk/webkit2gtk-fix-cmake-build.patch @@ -0,0 +1,11 @@ +--- webkitgtk-2.42.3/Source/cmake/FindGStreamer.cmake 2023-12-27 00:33:15.109385432 +0800 ++++ webkitgtk-2.42.3/Source/cmake/FindGStreamer.cmake 2023-12-27 00:33:15.129385924 +0800 +@@ -65,7 +65,7 @@ + + string(REGEX MATCH "(.*)>=(.*)" _dummy "${_pkgconfig_name}") + if ("${CMAKE_MATCH_2}" STREQUAL "") +- pkg_check_modules(PC_${_component_prefix} "${_pkgconfig_name} >= ${GStreamer_FIND_VERSION}") ++ pkg_check_modules(PC_${_component_prefix} "${_pkgconfig_name}>=${GStreamer_FIND_VERSION}") + else () + pkg_check_modules(PC_${_component_prefix} ${_pkgconfig_name}) + endif () diff --git a/webkitgtk-6.0/PKGBUILD b/webkitgtk-6.0/PKGBUILD index 7364ac6d90..71791cd56a 100644 --- a/webkitgtk-6.0/PKGBUILD +++ b/webkitgtk-6.0/PKGBUILD @@ -80,15 +80,18 @@ source=( $url/releases/webkitgtk-$pkgver.tar.xz{,.asc} GTK-MiniBrowser-should-hide-the-toolbar-when-using-full-screen.patch GTK-Disable-DMABuf-renderer-for-NVIDIA-proprietary-drivers.patch + webkit2gtk-fix-cmake-build.patch ) sha256sums=('52288b30bda22373442cecb86f9c9a569ad8d4769a1f97b352290ed92a67ed86' 'SKIP' 'a921d6be1303e9f23474971f381886fd291ec5bb1a7ff1e85acede8cfb88bef2' - '655f3b2c96355ac83c4fa1fc6048e3256bbfdbfb9727e1e18c5af12613536206') + '655f3b2c96355ac83c4fa1fc6048e3256bbfdbfb9727e1e18c5af12613536206' + '2ce442337d9f9871170f7face452a965d724a418ecb752be4222cc867b9c0e5e') b2sums=('3a8cd0818e0d989ab778cda63dd873d7e185ec20fbfe609b9da70041fe38ac30351046516600cb8eb86089e43136487d81c922690468daa70ed2a436561c2401' 'SKIP' 'd440d82c769f1b35caf5464dc850cdf1c896224205c90c17d8b0a44aee62e4b1383e11306936aaca067fde8836770d346d5122d7b05c91a5c7c1741c89c65e2f' - 'daa782d4d40cc12a05c02b2494e879333f66f32820f1a1b89b7ab68f62fd53043b116ecb5ef476004095a7c7b924b12695b7e87e21dd547f66e72fa02a972f0d') + 'daa782d4d40cc12a05c02b2494e879333f66f32820f1a1b89b7ab68f62fd53043b116ecb5ef476004095a7c7b924b12695b7e87e21dd547f66e72fa02a972f0d' + '6fa62e8d83678ea8938b1aef0425422354ce5597bad455adca89e7509677edcc7c1a263b0c38057e20ae404a7a2cc3dffe9f411ff9fc29baa5a59ea585d998b6') validpgpkeys=( 'D7FCF61CF9A2DEAB31D81BD3F3D322D0EC4582C3' # Carlos Garcia Campos '5AA3BC334FD7E3369E7C77B291C559DBE4C9123B' # Adrián Pérez de Castro @@ -104,6 +107,7 @@ prepare() { # https://bugs.archlinux.org/task/79783 # https://github.com/WebKit/WebKit/pull/18614 patch -Np1 -i ../GTK-Disable-DMABuf-renderer-for-NVIDIA-proprietary-drivers.patch + patch -p1 -i $srcdir/webkit2gtk-fix-cmake-build.patch } build() { @@ -127,7 +131,12 @@ build() { # :(.text+0x4a019): undefined reference to `ipint_extern_table_fill' # collect2: error: ld returned 1 exit status export CC=clang CXX=clang++ - LDFLAGS+=" -fuse-ld=lld" +# LDFLAGS+=" -fuse-ld=lld" +# clang didn't support -mlsx + CFLAGS=${CFLAGS/-mlsx /} + CXXFLAGS=${CXXFLAGS/-mlsx /} + CFLAGS=${CFLAGS/-fstack-clash-protection/} + CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/} # Produce minimal debug info: 4.3 GB of debug data makes the # build too slow and is too much to package for debuginfod diff --git a/webkitgtk-6.0/webkit2gtk-fix-cmake-build.patch b/webkitgtk-6.0/webkit2gtk-fix-cmake-build.patch new file mode 100644 index 0000000000..b695514152 --- /dev/null +++ b/webkitgtk-6.0/webkit2gtk-fix-cmake-build.patch @@ -0,0 +1,11 @@ +--- webkitgtk-2.42.3/Source/cmake/FindGStreamer.cmake 2023-12-27 00:33:15.109385432 +0800 ++++ webkitgtk-2.42.3/Source/cmake/FindGStreamer.cmake 2023-12-27 00:33:15.129385924 +0800 +@@ -65,7 +65,7 @@ + + string(REGEX MATCH "(.*)>=(.*)" _dummy "${_pkgconfig_name}") + if ("${CMAKE_MATCH_2}" STREQUAL "") +- pkg_check_modules(PC_${_component_prefix} "${_pkgconfig_name} >= ${GStreamer_FIND_VERSION}") ++ pkg_check_modules(PC_${_component_prefix} "${_pkgconfig_name}>=${GStreamer_FIND_VERSION}") + else () + pkg_check_modules(PC_${_component_prefix} ${_pkgconfig_name}) + endif () diff --git a/webrtc-audio-processing-1/PKGBUILD b/webrtc-audio-processing-1/PKGBUILD index a0e08bd2d6..0ce2b728f3 100644 --- a/webrtc-audio-processing-1/PKGBUILD +++ b/webrtc-audio-processing-1/PKGBUILD @@ -17,8 +17,10 @@ makedepends=( ) provides=(libwebrtc-audio-{coding,processing}-${pkgver%%.*}.so) _commit=8e258a1933d405073c9e6465628a69ac7d2a1f13 # tags/v1.3^0 -source=("git+https://gitlab.freedesktop.org/pulseaudio/webrtc-audio-processing.git#commit=$_commit") -b2sums=('SKIP') +source=("git+https://gitlab.freedesktop.org/pulseaudio/webrtc-audio-processing.git#commit=$_commit" + webrtc-audio-processing-la64.patch) +b2sums=('SKIP' + '82645a6da3e482209975fd04eec41bf4b94781e2fd0ed9df433c959738cba26c41e6d7833fadc5427a37feaa8a12a71ee5600b9531de6a6651f1b60ccd4983ed') pkgver() { cd webrtc-audio-processing @@ -27,6 +29,7 @@ pkgver() { prepare() { cd webrtc-audio-processing + patch -p1 -i $srcdir/webrtc-audio-processing-la64.patch } build() { diff --git a/webrtc-audio-processing-1/webrtc-audio-processing-la64.patch b/webrtc-audio-processing-1/webrtc-audio-processing-la64.patch new file mode 100644 index 0000000000..5bb38adf9a --- /dev/null +++ b/webrtc-audio-processing-1/webrtc-audio-processing-la64.patch @@ -0,0 +1,12 @@ +--- webrtc-audio-processing/webrtc/rtc_base/system/arch.h 2023-11-10 21:50:34.000000000 +0800 ++++ webrtc-audio-processing/webrtc/rtc_base/system/arch.h 2023-11-10 21:54:23.409972954 +0800 +@@ -57,6 +57,9 @@ + #elif defined(__EMSCRIPTEN__) + #define WEBRTC_ARCH_32_BITS + #define WEBRTC_ARCH_LITTLE_ENDIAN ++#elif defined(__loongarch_lp64) ++#define WEBRTC_ARCH_64_BITS ++#define WEBRTC_ARCH_LITTLE_ENDIAN + #else + #error Please add support for your architecture in rtc_base/system/arch.h + #endif diff --git a/webrtc-audio-processing/PKGBUILD b/webrtc-audio-processing/PKGBUILD index efae882ce0..d858285951 100644 --- a/webrtc-audio-processing/PKGBUILD +++ b/webrtc-audio-processing/PKGBUILD @@ -11,7 +11,8 @@ depends=(gcc-libs) makedepends=(git) provides=(libwebrtc_audio_processing.so) _commit=e882a5442ac22c93648e12837248d651d18b9247 # tags/v0.3.1^0 -source=("git+https://gitlab.freedesktop.org/pulseaudio/webrtc-audio-processing.git#commit=$_commit") +source=("git+https://gitlab.freedesktop.org/pulseaudio/webrtc-audio-processing.git#commit=$_commit" + webrtc-audio-processing-la64.patch) b2sums=('SKIP') pkgver() { @@ -22,6 +23,7 @@ pkgver() { prepare() { cd $pkgname NOCONFIGURE=1 ./autogen.sh + patch -p1 -i $srcdir/webrtc-audio-processing-la64.patch } build() { diff --git a/webrtc-audio-processing/webrtc-audio-processing-la64.patch b/webrtc-audio-processing/webrtc-audio-processing-la64.patch new file mode 100644 index 0000000000..caa03863a4 --- /dev/null +++ b/webrtc-audio-processing/webrtc-audio-processing-la64.patch @@ -0,0 +1,14 @@ +Index: webrtc-audio-processing/webrtc/typedefs.h +=================================================================== +--- webrtc-audio-processing.orig/webrtc/typedefs.h ++++ webrtc-audio-processing/webrtc/typedefs.h +@@ -41,6 +41,9 @@ + //#define WEBRTC_ARCH_ARMEL + #define WEBRTC_ARCH_32_BITS + #define WEBRTC_ARCH_LITTLE_ENDIAN ++#elif defined(__loongarch64) ++#define WEBRTC_ARCH_64_BITS ++#define WEBRTC_ARCH_LITTLE_ENDIAN + #elif defined(__MIPSEL__) + #define WEBRTC_ARCH_32_BITS + #define WEBRTC_ARCH_LITTLE_ENDIAN diff --git a/whipper/PKGBUILD b/whipper/PKGBUILD index 97e2549b2a..3cf6fcb119 100644 --- a/whipper/PKGBUILD +++ b/whipper/PKGBUILD @@ -60,7 +60,7 @@ build() { check() { cd ${pkgname}-${pkgver} local python_version=$(python -c 'import sys; print("".join(map(str, sys.version_info[:2])))') - PYTHONPATH="build/lib.linux-${CARCH}-cpython-${python_version}/" python -m unittest discover + PYTHONPATH="build/lib.linux-`uname -m`-cpython-${python_version}/" python -m unittest discover } package() { diff --git a/wiki-tui/PKGBUILD b/wiki-tui/PKGBUILD index ef54f82e23..09b328163b 100644 --- a/wiki-tui/PKGBUILD +++ b/wiki-tui/PKGBUILD @@ -15,7 +15,7 @@ sha512sums=('9240c17ab9410bf4182349701d06df73f56b11fceb24415c5249a982026fb236d87 prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/wldash/PKGBUILD b/wldash/PKGBUILD index 9dac7c9516..54857a8d8e 100644 --- a/wldash/PKGBUILD +++ b/wldash/PKGBUILD @@ -24,7 +24,9 @@ prepare() { build() { cd "${pkgname}-v${pkgver}" - cargo build --release --locked + export CARGO_REGISTRIES_MY_REGISTRY_INDEX="https://gitee.com/yetist/crates.io-index" + rm Cargo.lock + cargo build --release -v } package() { diff --git a/wolf-shaper/PKGBUILD b/wolf-shaper/PKGBUILD index 60377ea686..7431924589 100644 --- a/wolf-shaper/PKGBUILD +++ b/wolf-shaper/PKGBUILD @@ -120,6 +120,6 @@ package_wolf-shaper-vst3() { pkgdesc+=" - VST3 plugin" groups=(pro-audio vst3-plugins) - install -vDm 755 $pkgbase/bin/$pkgbase.vst3/Contents/$CARCH-linux/*.so -t "$pkgdir/usr/lib/vst3/$pkgbase.vst3/Contents/$CARCH-linux/" + install -vDm 755 $pkgbase/bin/$pkgbase.vst3/Contents/`uname -m`-linux/*.so -t "$pkgdir/usr/lib/vst3/$pkgbase.vst3/Contents/`uname -m`-linux/" } diff --git a/woodpecker/PKGBUILD b/woodpecker/PKGBUILD index 776397f181..bc9a8816b2 100644 --- a/woodpecker/PKGBUILD +++ b/woodpecker/PKGBUILD @@ -70,6 +70,10 @@ build() { export CGO_CFLAGS="${CFLAGS}" export CGO_CXXFLAGS="${CXXFLAGS}" export GOPATH="${srcdir}" + export GOPROXY=https://goproxy.cn + go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664 + go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305 + go mod tidy # build server/agent/cli go build -v \ diff --git a/wpewebkit/PKGBUILD b/wpewebkit/PKGBUILD index faf4944448..5bfd15a877 100644 --- a/wpewebkit/PKGBUILD +++ b/wpewebkit/PKGBUILD @@ -67,11 +67,14 @@ makedepends=( ) source=( $url/releases/wpewebkit-$pkgver.tar.xz{,.asc} + webkit2gtk-fix-cmake-build.patch ) sha256sums=('8836040a3687581970b47a232b713e7023c080d5613427f52db619c29fb253a4' - 'SKIP') + 'SKIP' + '2ce442337d9f9871170f7face452a965d724a418ecb752be4222cc867b9c0e5e') b2sums=('cfce325c574a738c5c7c8b14365d4d07496bc62b54d67d1d1e82b7f497f747ef886ea418ab199d6cbb8c6ac5df76db5b5a092d98abbb95874f11e621241dfeff' - 'SKIP') + 'SKIP' + '6fa62e8d83678ea8938b1aef0425422354ce5597bad455adca89e7509677edcc7c1a263b0c38057e20ae404a7a2cc3dffe9f411ff9fc29baa5a59ea585d998b6') validpgpkeys=( 'D7FCF61CF9A2DEAB31D81BD3F3D322D0EC4582C3' # Carlos Garcia Campos '5AA3BC334FD7E3369E7C77B291C559DBE4C9123B' # Adrián Pérez de Castro @@ -79,6 +82,7 @@ validpgpkeys=( prepare() { cd wpewebkit-$pkgver + patch -p1 -i $srcdir/webkit2gtk-fix-cmake-build.patch } build() { @@ -102,7 +106,13 @@ build() { # :(.text+0x4a019): undefined reference to `ipint_extern_table_fill' # collect2: error: ld returned 1 exit status export CC=clang CXX=clang++ - LDFLAGS+=" -fuse-ld=lld" +# LDFLAGS+=" -fuse-ld=lld" +# clang didn't support -mlsx + CFLAGS=${CFLAGS/-mlsx /} + CXXFLAGS=${CXXFLAGS/-mlsx /} + CFLAGS=${CFLAGS/-fstack-clash-protection/} + CXXFLAGS=${CXXFLAGS/-fstack-clash-protection/} + # Produce minimal debug info: 4.3 GB of debug data makes the # build too slow and is too much to package for debuginfod diff --git a/wpewebkit/webkit2gtk-fix-cmake-build.patch b/wpewebkit/webkit2gtk-fix-cmake-build.patch new file mode 100644 index 0000000000..b695514152 --- /dev/null +++ b/wpewebkit/webkit2gtk-fix-cmake-build.patch @@ -0,0 +1,11 @@ +--- webkitgtk-2.42.3/Source/cmake/FindGStreamer.cmake 2023-12-27 00:33:15.109385432 +0800 ++++ webkitgtk-2.42.3/Source/cmake/FindGStreamer.cmake 2023-12-27 00:33:15.129385924 +0800 +@@ -65,7 +65,7 @@ + + string(REGEX MATCH "(.*)>=(.*)" _dummy "${_pkgconfig_name}") + if ("${CMAKE_MATCH_2}" STREQUAL "") +- pkg_check_modules(PC_${_component_prefix} "${_pkgconfig_name} >= ${GStreamer_FIND_VERSION}") ++ pkg_check_modules(PC_${_component_prefix} "${_pkgconfig_name}>=${GStreamer_FIND_VERSION}") + else () + pkg_check_modules(PC_${_component_prefix} ${_pkgconfig_name}) + endif () diff --git a/x11vnc/PKGBUILD b/x11vnc/PKGBUILD index 42d4a6696a..d1d313efd3 100644 --- a/x11vnc/PKGBUILD +++ b/x11vnc/PKGBUILD @@ -26,8 +26,8 @@ source=("git+https://github.com/LibVNC/x11vnc.git#commit=${_commit}?signed" validpgpkeys=('25E71D2709955ECD4D041E03421BB3B45C6067F8') # Christian Beier sha256sums=('SKIP' 'd39a399d7db8e942e55639ed04a51b3c4f5d31d213d4639b1e26a44d92029403' - '2c71af4c586eabaa11744da65916f9223b928d1fba820f117243f6c8c585f16b' - 'e9c121a0b16013059ce903ed3e7560fabc5015e3b058a3acec85d7ae7102fcf0' + '9358217c23e586cb34d98564e5031dd58bf43e621d23435629d2105c901d0aa8' + '6047df38ca8a27760a6359a7f4029dd006f0423a4cd262cb0833da41defe1792' 'cfb19d44e09e960e2fdb958c9258bccf23c2677715314985f7e819f1dcedb6e4') prepare() { diff --git a/x264/PKGBUILD b/x264/PKGBUILD index aee89b150a..68e62cbdf6 100644 --- a/x264/PKGBUILD +++ b/x264/PKGBUILD @@ -35,10 +35,11 @@ _commit=31e19f92f00c7003fa115047ce50978bc98c3a0d source=(git+https://code.videolan.org/videolan/x264.git#commit=${_commit}) sha256sums=(SKIP) -pkgver() { - cd x264 - ./version.sh | grep X264_POINTVER | sed -r 's/^#define X264_POINTVER "([0-9]+\.[0-9]+)\.([0-9]+) (.*)"$/\1.r\2.\3/' -} +#pkgver() { +# cd x264 +# +# ./version.sh | grep X264_POINTVER | sed -r 's/^#define X264_POINTVER "([0-9]+\.[0-9]+)\.([0-9]+) (.*)"$/\1.r\2.\3/' +#} build() { cd x264 diff --git a/x86_64-linux-gnu-binutils/PKGBUILD b/x86_64-linux-gnu-binutils/PKGBUILD new file mode 100644 index 0000000000..7bc0085619 --- /dev/null +++ b/x86_64-linux-gnu-binutils/PKGBUILD @@ -0,0 +1,66 @@ +# Maintainer: Xiaotian Wu + +_target=x86_64-linux-gnu +pkgname=$_target-binutils +pkgver=2.41 +pkgrel=1 +pkgdesc='A set of programs to assemble and manipulate binary and object files for 32-bit and 64-bit x86' +arch=(loong64) +url='https://www.gnu.org/software/binutils/' +license=(GPL) +depends=(zlib libelf) +groups=(x86) +source=(https://ftp.gnu.org/gnu/binutils/binutils-$pkgver.tar.bz2{,.sig}) +sha1sums=('b180faf37b6e1c321d6ccbbf66194f17f7acf47c' + 'SKIP') +sha256sums=('a4c4bec052f7b8370024e60389e194377f3f48b56618418ea51067f67aaab30b' + 'SKIP') +validpgpkeys=('3A24BC1E8FB409FA9F14371813FCEF89DD9E3C4F') # Nick Clifton (Chief Binutils Maintainer) + +prepare() { + cd binutils-$pkgver + sed -i "/ac_cpp=/s/\$CPPFLAGS/\$CPPFLAGS -O2/" libiberty/configure +} + +build() { + cd binutils-$pkgver + +# unset CPPFLAGS + ./configure --target=$_target \ + --with-sysroot=/usr/$_target \ + --prefix=/usr \ + --enable-multilib \ + --with-gnu-as \ + --with-gnu-ld \ + --disable-nls \ + --enable-ld=default \ + --enable-gold \ + --enable-plugins \ + --enable-deterministic-archives \ + --with-system-zlib + make +} + +check() { + cd binutils-$pkgver + + # unset LDFLAGS as testsuite makes assumptions about which ones are active + # do not abort on errors - manually check log files + make -k LDFLAGS="" check || true +} + +package() { + cd binutils-$pkgver + + make DESTDIR="$pkgdir" install + + # Remove file conflicting with host binutils and manpages for MS Windows tools + rm "$pkgdir"/usr/share/man/man1/$_target-{dlltool,windres,windmc}* + rm "$pkgdir"/usr/lib/bfd-plugins/libdep.so + +# rm -r "$pkgdir"/usr/include +# rm -r "$pkgdir"/usr/lib/gprofng/ + + # Remove info documents that conflict with host version + rm -r "$pkgdir"/usr/share/info +} diff --git a/x86_64-linux-gnu-binutils/keys/pgp/3A24BC1E8FB409FA9F14371813FCEF89DD9E3C4F.asc b/x86_64-linux-gnu-binutils/keys/pgp/3A24BC1E8FB409FA9F14371813FCEF89DD9E3C4F.asc new file mode 100644 index 0000000000..a76485a9fc --- /dev/null +++ b/x86_64-linux-gnu-binutils/keys/pgp/3A24BC1E8FB409FA9F14371813FCEF89DD9E3C4F.asc @@ -0,0 +1,51 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mQINBFm/2cUBEADkvRqMWfAryJ52T4J/640Av5cam9ojdFih9MjcX7QWFxIzJfTF +Yq2z+nb4omdfZosdCJL2zGcn6C0AxpHNvxR9HMDkEyFHKrjDh4xWU+pH4z9azQEq +Jh331X7UzbZldqQo16VkuVavgsTJaHcXm+nGIBTcUbl2oiTtHhmuaYxx6JTMcFjC +7vyO5mLBw78wt52HBYweJ0NjHBvvH/JxbAAULSPRUC61K0exlO49VFbFETQNG1hZ +TKEji95fPbre7PpXQ0ewQShUgttEE/J3UA4jYaF9lOcZgUzbA27xTV//KomP0D30 +yr4e4EJEJYYNKa3hofTEHDXeeNgM25tprhBUMdbVRZpf2Keuk2uDVwc+EiOVri48 +rb1NU+60sOXvoGO6Ks81+mhAGmrBrlgLhAp8K1HPHI4MG4gHnrMqX2rEGUGRPFjC +3qqVVlPm8H05PnosNqDLQ1Pf7C0pVgsCx6hKQB7Y1qBui7aoj9zeFaQgpYef+CEE +RIKEcWwrjaOJwK3pi9HFdxS0NNWYZj8HPzz/AsgTTQdsbulPlVq2SsctmOnL42CZ +OCTppGYwl53CG/EqVY+UQBzFzJBaY8TJRFFYVEy5/HH4H11rMoZwqIkk71EOGU3X +6mWlANRikR3M4GhVITRzuaV69Fed+OeXcCmP94ASLfuhBR2uynmcHpBKpwARAQAB +tDtOaWNrIENsaWZ0b24gKENoaWVmIEJpbnV0aWxzIE1haW50YWluZXIpIDxuaWNr +Y0ByZWRoYXQuY29tPokCOAQTAQIAIgUCWb/ZxQIbAwYLCQgHAwIGFQgCCQoLBBYC +AwECHgECF4AACgkQE/zvid2ePE9cOxAA3cX1bdDaTFttTqukdPXLCtD2aNwJos4v +B4LYPSgugLkYaHIQH9d1NQPhS0TlUeovnFNESLaVsoihv0YmBUCyL4jE52FRoTjE +6fUhYkFNqIWN2HYwkVrSap2UUJFquRVoVbPkbSup8P+D8eydBbdxsY6f+5E8Rtz5 +ibVnPZTib7CyqnFokJITWjzGdIP0Gn+JWVa6jtHTImWx1MtqiuVRDapUhrIoUIjf +98HQn9/N5ylEFYQTw7tzaJNWeGUoGYS8+8n/0sNbuYQUU/zwMVY9wpJcrXaas6yZ +XGpF/tua59t9LFCct+07YAUSWyaBXqBW3PKQz7QP+oE8yje91XrhOQam04eJhPIB +LO88g6/UrdKaY7evBB8bJ76Zpn1yqsYOXwAxifD0gDcRTQcB2s5MYXYmizn2GoUm +1MnCJeAfQCi/YMobR+c8xEEkRU83Tnnw3pmAbRU6OcPihEFuK/+SOMKIuV1QWmjk +bAr4g9XeXvaN+TRJ9Hl/k1k/sj+uOfyGIaFzM/fpaLmFk8vHeej4i2/C6cL4mnah +wYBDHAfHO65ZUIBAssdA6AeJ+PGsYeYhqs6zkpaA2b0wT4f9s7BPSqi0Veky8bUY +YY7WpjzDcHnj1gEeIU55EhOQ42dnEfv7WrIAXanOP8SjhgqAUkb3R88azZCpEMTH +iCE4bFxzOmi5Ag0EWb/ZxQEQALaJE/3u23rTvPLkitaTJFqKkwPVylzkwmKdvd2q +eEFk1qys2J3tACTMyYVnYTSXy5EJH2zJyhUfLnhLp8jJZF4oU5QehOaJPcMmzI/C +ZS1AmH+jnm6pukdZAowTzJyt4IKSapr+7mxcxX1YQ2XewMnFYpLkAA2dHaChLSU/ +EHJXe3+O4DgEURTFMa3SRN/J4GNMBacKXnMSSYylI5DcIOZ/v0IGa5MAXHrP1Hwm +1rBmloIcgmzexczBf+IcWgCLThyFPffv+2pfLK1XaS82OzBC7fS01pB/eDOkjQuK +y16sKZX6Rt57vud40uE5a0lpyItC2P7u7QWL4yT5pMF+oS8bm3YWgEntV380RyZp +qgJGZTZLNq2T4ZgfiaueEV4JzOnG2/QRGjOUrNQaYzKy5V127CTnRg4BYF/uLEmi +zLcI3O3U1+mEz6h48wkAojO1B6AZ8Lm+JuxOW5ouGcrkTEuIG56GcDwMWS/Pw/vN +sDyNmOCjy9eEKWJgmMmLaq59HpfTd8IOeaYyuAQHAsYt/zzKy0giMgjhCQtuc99E +4nQE9KZ44DKsnqRabK9s3zYE3PIkCFIEZcUiJXSXWWOIdJ43j+YyFHU5hqXfECM6 +rzKGBeBUGTzyWcOX6YwRM4LzQDVJwYG8cVfth+v4/ImcXR43D4WVxxBEAjKag02b ++1yfABEBAAGJAh8EGAECAAkFAlm/2cUCGwwACgkQE/zvid2ePE/dqQ/6ApUwgsZz +tps0MOdRddjPwz44pWXS5MG45irMQXELGQyxkrafc8lwHeABYstoK8dpopTcJGE3 +dZGL3JNz1YWxQ5AV4uyqBn5N8RubcA8NzR6DQP+OGPIwzMketvVC/cbbKDZqf0uT +Dy3jP65OFhSkTEIynYv1Mb4JJl3Sq+haUbfWLAV5nboSuHmiZE6Bz2+TjdoVkNwH +Bfpqxu6MlWka+P98SUcmY8iVhPy9QC1XFOGdFDFf1kYgHW27mFwds35NQhNARgft +AVz9FZXruW6tFIIfisjr3rVjD9R8VgL7l5vMr9ylOFpepnI6+wd2X1566HW7F1Zw +1DIrY2NHL7kL5635bHrJY4n7o/n7Elk/Ca/MAqzdIZxz6orfXeImsqZ6ODn4Y47P +ToS3Tr3bMNN9N6tmOPQZkJGHDBExbhAi/Jp8fpWxMmpVCUl6c85cOBCR4s8tZsvG +YOjR3CvqKrX4bb8GElrhOvAJa6DdmZXc7AyoVMaTvhpq3gJYKmC64oqt7zwIHwaC +xTbP6C6oUp9ENRV7nHnXN3BlvIgCo4QEs6HkDzkmgYlCEOKBiDyVMSkPDZdsspa+ +K4GlU2Swi/BDJMjtDxyo+K0M81LXXxOeRfEIfPtZ3ddxBKPva1uSsuz+pbN9d1JY +8Ko5T/h16susi2ReUyNJEJaSnjO5z13TQ1U= +=93P0 +-----END PGP PUBLIC KEY BLOCK----- diff --git a/x86_64-linux-gnu-gcc/PKGBUILD b/x86_64-linux-gnu-gcc/PKGBUILD new file mode 100644 index 0000000000..34db35ad14 --- /dev/null +++ b/x86_64-linux-gnu-gcc/PKGBUILD @@ -0,0 +1,96 @@ +# Maintainer: Xiaotian Wu + +_target=x86_64-linux-gnu +pkgname=$_target-gcc +pkgver=13.2.0 +pkgrel=1 +pkgdesc='The GNU Compiler Collection - cross compiler for x86_64 target' +arch=('loong64') +url='https://gcc.gnu.org/' +license=('GPL' 'LGPL' 'FDL') +groups=('x86') +depends=($_target-binutils $_target-glibc libmpc zlib libisl zstd) +makedepends=(gmp mpfr) +options=(!emptydirs !strip staticlibs !lto) +source=(https://ftp.gnu.org/gnu/gcc/gcc-$pkgver/gcc-$pkgver.tar.xz{,.sig}) + #https://gcc.gnu.org/pub/gcc/snapshots/$_snapshot/gcc-$_snapshot.tar.xz +sha256sums=('e275e76442a6067341a27f04c5c6b83d8613144004c0413528863dc6b5c743da' + 'SKIP') +validpgpkeys=(D3A93CAD751C2AF4F8C7AD516C35B99309B5FA62 # Jakub Jelinek + 33C235A34C46AA3FFB293709A328C3A2C3C45C06 # Jakub Jelinek + 13975A70E63C361C73AE69EF6EEB81F8981C74C7) # Richard Guenther + +if [ -n "$_snapshot" ]; then + _basedir=gcc-$_snapshot +else + _basedir=gcc-$pkgver +fi + +prepare() { + cd $_basedir + + echo $pkgver > gcc/BASE-VER + + # Do not run fixincludes + sed -i 's@\./fixinc\.sh@-c true@' gcc/Makefile.in + + rm -rf "$srcdir"/gcc-build + mkdir "$srcdir"/gcc-build +} + +build() { + cd gcc-build + + # using -pipe causes spurious test-suite failures + # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=48565 + CFLAGS=${CFLAGS/-pipe/} + CXXFLAGS=${CXXFLAGS/-pipe/} + + # Credits @allanmcrae + # https://github.com/allanmcrae/toolchain/blob/f18604d70c5933c31b51a320978711e4e6791cf1/gcc/PKGBUILD + # TODO: properly deal with the build issues resulting from this + CFLAGS=${CFLAGS/-Werror=format-security/} + CXXFLAGS=${CXXFLAGS/-Werror=format-security/} + + "$srcdir"/$_basedir/configure \ + --prefix=/usr \ + --program-prefix=$_target- \ + --with-local-prefix=/usr/$_target \ + --with-sysroot=/usr/$_target \ + --with-build-sysroot=/usr/$_target \ + --with-native-system-header-dir=/include \ + --libdir=/usr/lib --libexecdir=/usr/lib \ + --target=$_target --host=$CHOST --build=$CHOST \ + --disable-nls --enable-default-pie \ + --enable-languages=c,c++,fortran \ + --enable-shared --enable-threads=posix \ + --with-system-zlib --with-isl --enable-__cxa_atexit \ + --disable-libunwind-exceptions --enable-clocale=gnu \ + --disable-libstdcxx-pch --disable-libssp \ + --enable-gnu-unique-object --enable-linker-build-id \ + --enable-lto --enable-plugin --enable-install-libiberty \ + --with-linker-hash-style=gnu --enable-gnu-indirect-function \ + --disable-multilib --disable-werror \ + --enable-checking=release + + make +} + +package() { + cd gcc-build + + make DESTDIR="$pkgdir" install-gcc install-target-{libgcc,libstdc++-v3,libgomp,libgfortran,libquadmath,libatomic} + + # strip target binaries + find "$pkgdir"/usr/lib/gcc/$_target/ "$pkgdir"/usr/$_target/lib \ + -type f -and \( -name \*.a -or -name \*.o \) \ + -exec $_target-objcopy -R .comment -R .note -R .debug_info -R .debug_aranges \ + -R .debug_pubnames -R .debug_pubtypes -R .debug_abbrev -R .debug_line \ + -R .debug_str -R .debug_ranges -R .debug_loc '{}' \; + + # strip host binaries + find "$pkgdir"/usr/bin/ "$pkgdir"/usr/lib/gcc/$_target/ -type f -and \( -executable \) -exec strip '{}' \; + + # Remove files that conflict with host gcc package + rm -r "$pkgdir/usr/share/"{man/man7,info,"gcc-$pkgver"} +} diff --git a/x86_64-linux-gnu-gcc/keys/pgp/13975A70E63C361C73AE69EF6EEB81F8981C74C7.asc b/x86_64-linux-gnu-gcc/keys/pgp/13975A70E63C361C73AE69EF6EEB81F8981C74C7.asc new file mode 100644 index 0000000000..d6ed3d0d87 --- /dev/null +++ b/x86_64-linux-gnu-gcc/keys/pgp/13975A70E63C361C73AE69EF6EEB81F8981C74C7.asc @@ -0,0 +1,53 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mQGiBDs4dV0RBACZII57dgbfnCC7RTrJ1yc0F1ofEZJJ/x4tAtSHMDNj2zTnLR25 +5AHmxN85namwJdn7ixXSZv1FMPCeTs6jDk98YuA9r5uuCNPqCNZsuQtREpN7h+wO +IeRrhvg9/F11mty/5NthXNh8P2ELnkWXSHu6DvTQyGppAtxueOL0CjRrpwCggVYu +vxui5mqNq9+lILbMi2Zm3UkD/0T/0HupthZFXbuzY/h/nyqzoPOxnSAAAx6N7SiE +2w9OQ1w3K8WOFoPH9P0cnIQ+KnMSGQV4C2WY/d8YtShnKkXRYZVvlK+aiwmvf1kU +yNyUqaA/GhW5FWN26zFQc3G5Y9TDjgBqjd6SequZztK5M5cknJGJn+otpdQtA1Dx +2KEABACSYjdRNT3OvQJ7OSz4x4C58JKz/P69WsNZxqYVo66P7PGxM7V2GykFPbG7 +agyEMWP1alvUK551IamVtXN+mD7h3uwi5Er0cFBBfV8bSLjmhSchVpyQpiMe2iAr +IFeWox7IUp3zoT35/CP4xMu5l8pza61U5+hK3G7ud5ZQzVvh8bQtUmljaGFyZCBH +dWVudGhlciA8cmljaGFyZC5ndWVudGhlckBnbWFpbC5jb20+iGUEExECACUCGwMC +HgECF4ACGQEFAlZi3pMGCwkIBwMCBhUIAgkKCwQWAgMBAAoJEG7rgfiYHHTHIBIA +n20wZDYF0KrfbJNzK4/VwAEAzN+wAJ9Dpbhtq4sRoH3cbadBsD2mXXthOrQrUmlj +aGFyZCBHdWVudGhlciAoV29yaykgPHJndWVudGhlckBzdXNlLmRlPohiBBMRAgAi +AhsDAh4BAheABQJWYt6YBgsJCAcDAgYVCAIJCgsEFgIDAQAKCRBu64H4mBx0x2iy +AJ4tmLvgNsphsrpKKfDDyV0tzR5FuACeNymltMsgfFyvoueBvji/h+HyObm0K1Jp +Y2hhcmQgR3VlbnRoZXIgPHJpY2hhcmQuZ3VlbnRoZXJAZ214Lm5ldD6IYgQTEQIA +IgIbAwIeAQIXgAUCVmLemAYLCQgHAwIGFQgCCQoLBBYCAwEACgkQbuuB+JgcdMde +DQCfZRUFDCB8sLK6B6wqRmwCsb3EK6MAnjSG6ZtgrdEjSQSmfAcIV/9W367MtCxS +aWNoYXJkIEd1ZW50aGVyIChHQ0MpIDxyZ3VlbnRoQGdjYy5nbnUub3JnPohiBBMR +AgAiAhsDAh4BAheABQJWYt6YBgsJCAcDAgYVCAIJCgsEFgIDAQAKCRBu64H4mBx0 +x9TwAJ4/9S1pd6cS2MHldWQpUdIuOBiUHACaAjNPvdqSN1SLEjH5GGlFZjo1c3+5 +AQsEQybx0QEIAPjHD/kts6GQbtsV+6+aZgfCK6MVZe14MOXFG60FmnHPzXymorzu +7DxSQOkKiKU49mUklTIQ1ErGIr8nCzjmVHmm7CH53dy8/OklPgpecBLGSP9WiqQ8 +TJxNUiDWQA0r2HWVAsi86N+E3e9ubN4VSK4yd6JMR1Mp2KgyS2LK2PjRo+o7mulO +FaAAoxmi8gWIVR6sv5dkh3g/6/DfKg32U8CWjFp5IXKmkyMPSH5eOZL4eBfx4Ia1 +cFcWfDJZdsYQ+EB+auzHqyr/DS5on4aS72WAppWkwH1Mu/fYnOEY37yF4GwYPb/M +5Loz2wTMxdjfflzCMdfQ56CuMlfc84MeT/MABimJAWcEGBECAAkCGwIFAkl7HCkB +KcBdIAQZAQIABgUCQybx0QAKCRA6sAmW/CamQbh0CADqF4FDBMsQh8+vkhFvXOTQ +vtXkQMuQedryaCGHcS/e5/J0xb5uLybMnnFyh9tIy8cj9sc79yeTTuXSQLJgU02X +h6EL4osGpe5JWWvFbY4SyNkyR3UcpXgzkH80crsHF5ixwCxy7PTEjQ48yT09hig4 +eiDHQ1fS4ox0F5aUQ4q2mk4bNtU6WHEP/8l6BKwSUC9/lfFQmMnk7SeIQqTwgC9T +agPZhCQz/tNZBo+t3ETlRcfPZ2djCGRJm5mbMG/pwEy1L9frdopzBYk56yEpuA97 +HjmIDvt34YbAOlPYPSCsvnhzZdoVrRv/qBcGxNhdYA7zwiGDlrjRf7Rg1KT8izvo +CRBu64H4mBx0x1WDAJUTqjXmf02pQphfYo7qalOuVr1tAJ0UUgg+PXpgrP0lp4LQ +8SbkxvBRhbkBCwRDJvH8AQgAtyi1+vdUzhYos5lmUznkTURFBGWMvPSOnB62I2Mq +0ZAazhyRjb6EuSTuGcusJXPWzRlsUFPdmyQjIqhPJ9ZkUSLOieIBMU4VqgWc7GZX +K7P4Luh8TxQrz5YOtnpj5Hev/yj63ACDLIbzShizBSteZ+TZL+aH7/8XK/36o4rC +Ep5OH69RiPcCUFEHQkxF7vaPdnqyH82/JtUOxSW1zYcr+7XHpHa+UNtI62Q4MV9x +9Wi6vBBvJ6zZSarpVi7ViIf5PVMGuWb5nA1YShEtQVKnQnn6pBqRUF4iLcyrunIL +PGp13htUUahaDr4qWUP5VKmELT9IitOTR0BX6e3E2h6NnwAGKYhJBBgRAgAJBQJD +JvH8AhsMAAoJEG7rgfiYHHTHMuEAnRcN4qTQ1V92e+2RzJm2IYbVJjPBAJwNY6s3 +lbrcC7Zc2E/k5fxwwenSTrkBDQQ7OHVeEAQAl3WryGIZfi9uPLNZlIvRFBErvUKL +zc7n+/c1GaUVMxXcF/Iauegblh41OoV2Kcz1sFx52MLDSDTV1DwDn4fNzwP7DYOe +9h4EBpMePG1DS7LQ0LoD682rvey6Cvww+eFmBBXdiEqCXvPuW4d3WMnOsQqL5BiS +QH+GiwIrLFN7yj8AAwUD/j9FOzif1GLdoSG8fsEi//axq0sXI+NtRUOrvmrkTCG3 +o5rZOJNwz+KBQpP45LdzosO4V/kPVuJ5U4EprEPRqejfSTW+oK+Bgm0mfImgw7Jv +adkNeXfJdwYidutyF1jjroVdqprSjAAaoZgSi2sw03CFx1WkdL+GCccwN6IVl5OI +iEYEGBECAAYFAjs4dV4ACgkQbuuB+JgcdMeqzACfeHjT2PFYdy88PHNVGw5se9Pq +GPYAnArpX32fDdu/xhuqjqHrNkwyO/Yo +=c9j1 +-----END PGP PUBLIC KEY BLOCK----- diff --git a/x86_64-linux-gnu-gcc/keys/pgp/33C235A34C46AA3FFB293709A328C3A2C3C45C06.asc b/x86_64-linux-gnu-gcc/keys/pgp/33C235A34C46AA3FFB293709A328C3A2C3C45C06.asc new file mode 100644 index 0000000000..c03bedc94b --- /dev/null +++ b/x86_64-linux-gnu-gcc/keys/pgp/33C235A34C46AA3FFB293709A328C3A2C3C45C06.asc @@ -0,0 +1,16 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mQGiBECGYZsRBAC9VE8N8vHAG87MTC2wbtnmbSD8Wc2xYCaLofZAH+vXyio3Dnwx +jQLlj7IgwRWNAVq13uL0wn0WAsGop5Cs7nA/JD4MEBBNSdnvq1bMYitch2PTtAU+ +h6HaI9JXBDUh4AKZz2rllKgbigMHlgIugxnKTAMJIhS63lCTHWEDlnycJwCgqSX9 +hDs9eBC5coearGDhc0BDvTsD/A05YkZkQBgsYD6cjWFwNLJIcaHORKlLLZ9gRJO5 +LVcKaCEgYSWAM7dadJeqIFi9RkXdv+cWozxTgrGlY4T7/PakIBB7wWj2Zl72mW5a +NHT2vAemB8IFV1saiFXZM+qDhCHbV4yKSmNOQHY1VnSCUrgINiM0qlTz08yjUazK +fm2BBACDF3ZfUQNeHC9zwfsgCzKnqOm7FSlwOyI0f+j83B5PH2+KuzuyEqYoxGp+ +2d1zTxvbOeBBaX8T1M4n5d9ixiFMhgbTzuyit3nn6cp5j2L0IAS9pw0kaWpPMhpQ +zydNgnaBxHs1Y+cP4iM/4FWFCvfjUdR7xULdEzkgGxevu8pNEbQgSmFrdWIgSmVs +aW5layA8amFrdWJAcmVkaGF0LmNvbT6IZAQTEQIAJAIbAwYLCQgHAwIDFQIDAxYC +AQIeAQIXgAUCTI3tMgUJHtOOlwAKCRCjKMOiw8RcBjySAJ9ApMXF3+gWIr0zpMxv +Wb53/oxsHgCaAl6V5JS9GJUnrPiHKdR+sMFPkd4= +=MB2O +-----END PGP PUBLIC KEY BLOCK----- diff --git a/x86_64-linux-gnu-gcc/keys/pgp/D3A93CAD751C2AF4F8C7AD516C35B99309B5FA62.asc b/x86_64-linux-gnu-gcc/keys/pgp/D3A93CAD751C2AF4F8C7AD516C35B99309B5FA62.asc new file mode 100644 index 0000000000..d80a382548 --- /dev/null +++ b/x86_64-linux-gnu-gcc/keys/pgp/D3A93CAD751C2AF4F8C7AD516C35B99309B5FA62.asc @@ -0,0 +1,122 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mQINBF7Ps4YBEAC5i0PA1CA3te8UeAxWm8zH5KRyoXyD+IuVHar9fPR13J/IkUgO +0f4kebDaGQGjyPoBuLHWtshQwSjDP9059eMbfne6fhe3UxqRjfknWxr83S0pSrDI +xgdIsxMQT6dxm1YYpp+pK6PRs/tHMtXHtSJc4HwkW187nx7c7lfKXmwVoqUuEjvW +irKyJRVNw68WZjYLmmIsRIIZcUMOE2lItPkejerHZobOuTkuXslgkWH3zeKCK8JD +em9npzxIkLgrl8Ub0HxWdkAc6o+gj3Ih0QthvC8P7gxNuTJyf8SVaZFla+ky/t7Z +kLmIhSBLzNSosscOtz9sdI4seXsOGgWeGRORp/+zF5ISnD3kFg3OtIudW8p4J7oA +OICWkPIuEOXPCz5VIUmaY2Eswh76YgW7u60JMv/v0Agpjy23hovvG6HArMO8Letr +Y5CWC+G9wp/xTo3TeyQ9mrYcKMjvrZzCos+SFaGF0lcExWpk610XQf+8/1FlhJ4U +SiQCy78o1pW8dOpLWvWe7y9YtRm3DTgYDCDpcMzYVZPrp2oPg5h4nW5sfhPJ01yu +gwTLDo/AILMQSkr1IVbfMkP7Mxtev51nRjxL7JCMB4bHx+uyNs56LCqdLctrF6Aa +HrS7yaP3ym3BsHrH3TqAaTGW7rs/hrZ6MaWbU4bBxL49z4GyXWRJqglePwARAQAB +tCBKYWt1YiBKZWxpbmVrIDxqYWt1YkByZWRoYXQuY29tPokCOAQTAQIAIgUCXs+z +hgIbAwYLCQgHAwIGFQgCCQoLBBYCAwECHgECF4AACgkQbDW5kwm1+mJqRxAAtvq9 +NevjleUPVMJhz3X5pprhCNM/8LGSbhE284tDYv0inT9huKaMeaw/hQvSSoniac87 +zb0S5JDWYHEZ6PhOs7ZASuSwtBp3SdGtjQTMJMSuAHvEjSvBR+0EkCu+85oJxwDS +wMMNEy1xubsyqhETD6DF/wudDr5r8IpB0A4vzFmaFv5wdbdnywd/sQzU9Fnl7GyX +/1Wgd94dIiznr7fdxJtsdphw4WzoJf5EYTdozs+biVhnz+NJuniTjg8IKoEOl7oY +Nrmqwfijw7FqgcbuXb9UAsxaVLFHZl3GVhXAmbQoz4io3PVw5BR+p+zeWvndeONu +jndE8QN3PyFb/WHyWRnCv7goUb5uLZU4aDqf34PP8fral3HBaaaXB53NybvJVjEU +nMzvjpVQj4J0yFzy+NlFwJb3oT03EDNkEK+SQrKHv4xz/atgXbfFYZ8oynCkypmt +aw/udPuLpy2dBY6wTAhSvdzkiD9swgbgX2idNLXfoeU1AaiKd+mez3X51Arf49pn +nL5wDCBkT2BUwX41ntIGgrNMcMNFbfNt99kUNaZ15oWI9Ia3DyWMxvgmDg4Ev8m8 +pGHH3Hq3eueyPySdJh+I8x+ipyIpFW+7AYS49L4LI7A7jFs8Nas22Qi6RoTFV3ep +0qkEL4Lgrh8ooxhhSLOmsfyJDiXAHiz5sVUZ4wXRymrKaAEQAAEBAAAAAAAAAAAA +AAAA/9j/4AAQSkZJRgABAQEBLAEsAAD/4gKwSUNDX1BST0ZJTEUAAQEAAAKgbGNt +cwQwAABtbnRyUkdCIFhZWiAH5AAFABwADQASAABhY3NwQVBQTAAAAAAAAAAAAAAA +AAAAAAAAAAAAAAAAAAAA9tYAAQAAAADTLWxjbXMAAAAAAAAAAAAAAAAAAAAAAAAA +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA1kZXNjAAABIAAAAEBjcHJ0AAAB +YAAAADZ3dHB0AAABmAAAABRjaGFkAAABrAAAACxyWFlaAAAB2AAAABRiWFlaAAAB +7AAAABRnWFlaAAACAAAAABRyVFJDAAACFAAAACBnVFJDAAACFAAAACBiVFJDAAAC +FAAAACBjaHJtAAACNAAAACRkbW5kAAACWAAAACRkbWRkAAACfAAAACRtbHVjAAAA +AAAAAAEAAAAMZW5VUwAAACQAAAAcAEcASQBNAFAAIABiAHUAaQBsAHQALQBpAG4A +IABzAFIARwBCbWx1YwAAAAAAAAABAAAADGVuVVMAAAAaAAAAHABQAHUAYgBsAGkA +YwAgAEQAbwBtAGEAaQBuAABYWVogAAAAAAAA9tYAAQAAAADTLXNmMzIAAAAAAAEM +QgAABd7///MlAAAHkwAA/ZD///uh///9ogAAA9wAAMBuWFlaIAAAAAAAAG+gAAA4 +9QAAA5BYWVogAAAAAAAAJJ8AAA+EAAC2xFhZWiAAAAAAAABilwAAt4cAABjZcGFy +YQAAAAAAAwAAAAJmZgAA8qcAAA1ZAAAT0AAACltjaHJtAAAAAAADAAAAAKPXAABU +fAAATM0AAJmaAAAmZwAAD1xtbHVjAAAAAAAAAAEAAAAMZW5VUwAAAAgAAAAcAEcA +SQBNAFBtbHVjAAAAAAAAAAEAAAAMZW5VUwAAAAgAAAAcAHMAUgBHAEL/2wBDABsS +FBcUERsXFhceHBsgKEIrKCUlKFE6PTBCYFVlZF9VXVtqeJmBanGQc1tdhbWGkJ6j +q62rZ4C8ybqmx5moq6T/2wBDARweHigjKE4rK06kbl1upKSkpKSkpKSkpKSkpKSk +pKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKT/wgARCACVAGkDAREA +AhEBAxEB/8QAGQAAAgMBAAAAAAAAAAAAAAAAAAECAwUE/8QAFgEBAQEAAAAAAAAA +AAAAAAAAAAEC/9oADAMBAAIQAxAAAAHTAAEVFZIvGAAAABxmcUQAWVqHYAAAHMYg +RJUgIVapoAAAZEcy2SzWQitKrKrNs6wADGilb5qxWBCKbnnue6tYAAx4qmrlmrAg +VpRc9tmoACMSVKycty1pBEU2a9nUAEDIzuQrGspUkLJS02aR2XIAjJmyGsEasYEU +67O25AAzpaJqxYDJDKyKaVzfYABAyc6m0kkIktSdNzoWAAAjJlc1FWoyyuzYuZAA +ABnxytWSipIXPUmhQAAAGZLzklZWga1lwAAAVGbnTURUojZs2MAAiURwzQtgwIlN +mnc3gAHKRAJQYgCxFh0DA5BDJDGAiBAgSLzoKxDAAABCABlp/8QAJBAAAgEEAgEF +AQEAAAAAAAAAAAECAxESIAQQMRMhMDIzFEH/2gAIAQEAAQUC1lUhE/opirQl8VWs +oE6056RnKBRrqe9eeFPyW2oV8tuTK8xIsYoxQ4jXXHnnDTk/rEWrGjh/bTlfrHZj +OIvfStJzmul032zjxtS7l9WvZeB+L+xbq15UZPWYumWF3HzR+2nIjZrpjF0yJSja +Ok/eK2ZQhdbSWMixbvy0rLbkP3v231x43+Ct+lhF+6StT2nNQL5MsWLH+rxpdEqq +Q7yer8053WkvsYowRgYGJijFIZGppL7fEm0Rm2zEwRgjExMTExMEYI9NHpoUEmf/ +xAAcEQACAgIDAAAAAAAAAAAAAAAAEQFQMEACECD/2gAIAQMBAT8Bslrxurwu5zcs +02EVLGMYx0f/xAAXEQEBAQEAAAAAAAAAAAAAAAARAFBw/9oACAECAQE/AexkRGJ/ +/8QAIhAAAQMEAQUBAAAAAAAAAAAAAREgMAACITFREjJAYXGB/9oACAEBAAY/Am5u +rdYuiQZNbZg0h7n+4EvOXdPEOdiYt/Ibi1YfrCkSTFqwFxgU6ejkpJTdMH5omTml +L0u20xIZ8Ungf//EACMQAAIBAwUAAwEBAAAAAAAAAAABERAhMSBBUWFxMJGxoeH/ +2gAIAQEAAT8h0tIUmNP+R1CJ7E5+F7AFkcFwjJFH15CHD+tbGNOGshttLu6IIIpM +OUfSl86mysBJUI8HUdAgjo3nEac/ioQVGNCkA1vrSsehgIVGh1MU2zpkFRshLUdr +ckdsyW9xmZFc3aJciLUiBKCxFQSlDDVhFcveNNra7MKJcbJGFGZz+bT0FmAkZErG +I1RuDL2XfSiSfFLYle4xSiZQ59JlzrnvqZqJDdiGpMsi02WtEVvAgnIrUG7ExixZ +fBe/hyQww2RYgPWtFeOS7sggM2DUoluWItLU4buW9XDhEoEiCBok7CAsfrSwQnlH +Wex7PR7GSwEJYVaz2Jziq3UTJJJJGxsdMgFlkqNW5udjPQjyyPZHsj2R7PQ7mdjO +xkgTdP/aAAwDAQACAAMAAAAQAAkkgAAAECzAAAAh67cgAASff5kAAVogX8AAychk +wAEUiaXQAE4oCsGAEhH4sRAAfuVo2AAHjbiwAAhXVUAAAHjyPAAAFoFMAAAjSioA +AHZKSAAAWL7+KkAiyySWyAkAkkkAg//EAB0RAAMAAwEBAQEAAAAAAAAAAAABERAg +MCExUEH/2gAIAQMBAT8Q/BhOaEiIhMGuCFq+K6se0FhYeXs/gkOiTDVQhCeizW+C +ITEJhfdifkFh6MQ1eyZT6QuGy/zdMQoeDKN8EUTw2Ub4J4QTLygkQhCD3Sols+FF +6ai/hf/EABsRAQADAQEBAQAAAAAAAAAAAAEAETAgEFBA/9oACAECAQE/EPo3Lg4P +RjX6iMGo+EXy4Oxy4moduz5XgeGDs8nay9qJTgV8P//EACgQAQACAQMEAgICAwEA +AAAAAAEAESExUWEQIEFxgZGh4TCxwdHw8f/aAAgBAQABPxDsX9zgUmqfUA1vAoVu +GgE/uALETj+E7QdS6D3CdU7ELWcwG00loM8nh+IAc2vHp33HgJHzKZVhYgzEv5j9 +IlEESUjY7QBUE1VR+3cnS6qK8pHYIO0HggQKpBXRKVkMMfLGwd+e2hRV5KgwdAEo +qDpaksMovmb189og7D+UgwjN4dpXQNEyuHKQlmQA2di0LHFOYtgjUCNGLX1FdVQV +XB4oljN+JYwxUiA3yX/x+OzAaqPaA32CfKi4GKbGU8gfmWKwF3T6iU4Bsqpbd1j1 +VYZlgpBZfY5KgWxMzMUlxknlhGFbE08RYmC6IQKhodpKV430zIQqzGXr4jslohFc +twgixDZs4mB6Wxt2iZYvxxFgjaEwFQiaksxzZqRqjBvcf67ksTeK9oN+niGGtQdu +cTRhv5ihlmJUoTa0QxlAHfdpgUT22MolOqmDTEKqOiDK2Ttv/BUh4AxR/RKfDMNU +y7SKyfqYGqyvz3jGhdA1Y+qLvUMJcaQOfuUWCNrZBAoagHbTabI6ij8H3HSKr9TC +lhNMcKmCChezSoizCW9oAuYjon2Qfb6YvoiANPxn/NSvlfUpZt+ZoGcwDlBAMAP2 +gAUI6J1yfMLJwQk5ym8pA6KuJHxUrx4+pk9vJ66N02egcnRnLHLHLHLCnmF/0z/3 +Sf8AaQ0wm/T/2YkCOAQTAQIAIgUCXs+8lgIbAwYLCQgHAwIGFQgCCQoLBBYCAwEC +HgECF4AACgkQbDW5kwm1+mKtXQ/+MEPsOs4J0K0CBLgX25Qz0y1Hi4G+n7x1W5oZ +fft8yvxbmsMT7BjMGJh4jNzdQndAbY26H5U3s5OtlL1w2kMqt7b7/o9BPjL+ndIj +ujgkSrOh5xaUhPBbLaGmhiYSzjvLsXovRGl41D+wneFDJ3H5fonFd78gH5+2k5ak +7mgYgOt0GhTeEFNxBgv1OMMxZJ+aiIiApMFVtc4q2zLWxzRNyMeTUAKhKPTz5Dz5 +LCrnpx391e4gx9pTqY24mADVRRrDtP7YdgsMDP0nXRqTbyRsc/0wcUdl9/czVRvz +SADCZon3q7gb7ar3Jfr/5CFdN/2AQXw/IEqkF3pST900s+1WMZhOOQ/FDt/J6ETs +q6cL2487N7QASeGfF7cczVOKQU1tsBhpk2Wqxe0c8KnsBN8Klq62g1qs7sJ8V6vv +s64QLOgsekM1ep6w9n79Z5mxRG2KTYyg03S2jeEJklAGm1wFDgIDL7kuVYplRFTQ +LG7YaRI1IzRi6JCeYpF4tlvNfhGuvtzX8ADHk+yYbnksgXixLKBtY+9jqQEvtcSG +8xrrHcHj6kQs0IlRRKCnhpbegve7TkNBZUkOBl235c8/YNOISAW1ls5Lqxd2qk9j +n98L7r0dT2ylyYR1pUIl4g4gLL6wN8VM5O/RNypcPtMhmSjikoD3sviXN6RAQ0Vy +eL39geS5Ag0EXs+zhgEQALiuGiBqVzOlpSaCcwOREam/tYWCm/3o9AEoKhZ8VNOw +Hy9bQZ7Qnty04Z686NGq6kOc0rXJxO5dSeqiA4OfhmkpiheD3GgEKauVcCN1EDtn +0Yb0+/lJWBqHiXAa+haEUNsRkZdbLAaCBY6aG3S8Y9hSb1dqbed9QoDdC5DljqlT +JUzRbQL3fFBlsYejtWTNWUHP9OdeUTIzdPCf2iENMo+PYZG3LxycwQMza0TDzmuV +SLRbBYxLjYwzjVoiI7RYvV+aFs2HZSgtA0P2BmC+pjleeSfWeHiNAZxPH6Steb8T +MOL/AydLrMcGnjleREco1YUNL8ho2zUAv0h+S3bzGhb7gtkOxVzu6G1EHoflTCrw +dsAfeA8WCBZM+KLqLJcn85ehH7/N0ZBy/Y5Bu76JitYO/DJvqAR6jSiR2fJBQLiS +EC3SOvhWwJEYexNWI3eSpDYvxdjQ33tFz6QTUHneifs1aByl6MH0WxRDYspIs1c7 +X6afqIWrVUsXmjh/IT18hGOEdVDZ8ktK1U2adQP32OmZdQ5ij86Ydyypr9cwHK97 +6VdttRLNKs7KElFempkHtde7ue+XylrzeMYQj1Mmtw4baL8jxg4AHmk06U3VEaT7 +PQIJphZMwLMes3hk4e47b3C8aeCAAYA1sfma4rCU8hTbrAoSV5qhDCDbRZiAhF0B +ABEBAAGJAh8EGAECAAkFAl7Ps4YCGwwACgkQbDW5kwm1+mIrNw/+LNy8YHShVO+E +uQeia3E0cVu6/qCyM/UdjCfaSYH9NwQldHUbed70IY72MqYfk+sqToHsGaCAoSef +CSRLsELQ7sAMsfqvwNb8dibON6q9Ju5JESUwu4txmVlLlkXFlyxgk/fTeBTT89wT +7ZYXjSVa75WRDDwJo8BLJ1N726UxVDcUm6F4Gb+elaMi0jVGClKVG/ZHLgWRlfK4 +8dp1a2CPrqvRAeGvnqyQy4q6UtV1FcwYlxzl3G2ubB8YjlQg1FG7CoSfgHPZFZ+f +PtCvW3kUjeTOZlDQ8UNEsyvUtwL+ntKCpKwxRdAFLWbfVgl1HRtcK0eXQf+Nepn5 ++lRgNRfUlzFInYUkn0QEFJQQgAmdxuQVLutLzgchxR94MBr6c18jhu+95qoKIfNp +eSaOpxDZx9WqnMCsBmns+YKmdaFaj5MukzPXIQxw6o9Ez+AgPY9VRmIDvaK/0BJr +gW/deFGcF12tMHzaQ4+It7eidDezYn6CWqvRAX45wpW5ig7GUXm5Xp8MSq9HUHpv +xSESDao996QgfR6lFJ7YIy+cQcCS5ynHcJkpnLGdS0G8UzN4iYT+zAsnOybNrETF +xFPsTAddu9c+sOjRUHUF8r9kGQZ3eKxCp3rNfELUVTjyGv2yOpun+Z1xHAeLIq5M +Eq4PBxKJz/7TpPbxn6pwUaSrLdJBkXo= +=Vl0Y +-----END PGP PUBLIC KEY BLOCK----- diff --git a/x86_64-linux-gnu-gdb/PKGBUILD b/x86_64-linux-gnu-gdb/PKGBUILD new file mode 100644 index 0000000000..5ba374c75a --- /dev/null +++ b/x86_64-linux-gnu-gdb/PKGBUILD @@ -0,0 +1,49 @@ +# Maintainer: Xiaotian Wu + +_target=x86_64-linux-gnu +pkgname=$_target-gdb +pkgver=13.1 +pkgrel=1 +pkgdesc='The GNU Debugger for the 32bit and 64bit x86 target' +arch=(loong64 x86_64) +url='https://www.gnu.org/software/gdb/' +license=(GPL3) +depends=(xz ncurses expat python guile gdb-common mpfr libelf source-highlight) +makedepends=(boost) +options=(!emptydirs) +source=(https://ftp.gnu.org/gnu/gdb/gdb-$pkgver.tar.xz{,.sig}) +validpgpkeys=('F40ADB902B24264AA42E50BF92EDB04BFF325CF3') # Joel Brobecker +sha256sums=('115ad5c18d69a6be2ab15882d365dda2a2211c14f480b3502c6eba576e2e95a0' + 'SKIP') +validpgpkeys=('F40ADB902B24264AA42E50BF92EDB04BFF325CF3') # Joel Brobecker + +build() { + cd gdb-$pkgver + + mkdir -p build && cd build + ../configure \ + --target=$_target \ + --prefix=/usr \ + --enable-languages=c,c++ \ + --disable-multilib \ + --enable-interwork \ + --with-system-readline \ + --disable-nls \ + --enable-source-highlight \ + --with-python=/usr/bin/python \ + --with-system-gdbinit=/etc/gdb/gdbinit + + make +} + +package() { + cd gdb-$pkgver/build + + make -C gdb DESTDIR=$pkgdir install + + # Following files conflict with 'gdb'/'gdb-common' packages + rm -r "$pkgdir"/usr/include/gdb/ + rm -r "$pkgdir"/usr/share/gdb/ + rm -r "$pkgdir"/usr/share/info/ + rm -r "$pkgdir"/usr/share/man/man5/ +} diff --git a/x86_64-linux-gnu-glibc/PKGBUILD b/x86_64-linux-gnu-glibc/PKGBUILD new file mode 100644 index 0000000000..f88ab5f72f --- /dev/null +++ b/x86_64-linux-gnu-glibc/PKGBUILD @@ -0,0 +1,126 @@ +# Maintainer: Xiaotian Wu + +_target=x86_64-linux-gnu +pkgname=$_target-glibc +pkgver=2.38 +pkgrel=2 +pkgdesc='GNU C Library x86_64 target' +arch=(any) +url='https://www.gnu.org/software/libc/' +license=(GPL LGPL) +depends=($_target-gcc $_target-linux-api-headers) +groups=(x86) +makedepends=(python) +options=(!buildflags !strip staticlibs) +source=(https://ftp.gnu.org/gnu/libc/glibc-$pkgver.tar.xz{,.sig} + sdt.h sdt-config.h + reenable_DT_HASH.patch + ) +sha256sums=('fb82998998b2b29965467bc1b69d152e9c307d2cf301c9eafb4555b770ef3fd2' + 'SKIP' + '774061aff612a377714a509918a9e0e0aafce708b87d2d7e06b1bd1f6542fe70' + 'cdc234959c6fdb43f000d3bb7d1080b0103f4080f5e67bcfe8ae1aaf477812f0' + 'cf9fe494f7ec69752a63d1b0a9ad689aa620888ae9b902b6383a6fbc7c1726a7') +validpgpkeys=(7273542B39962DF7B299931416792B4EA25340F8 # "Carlos O'Donell " + BC7C7372637EC10C57D7AA6579C43DFBF1CF2187) # Siddhesh Poyarekar + +prepare() { + mkdir -p glibc-build lib32-glibc-build + cd glibc-$pkgver + patch -Np1 -i "${srcdir}"/reenable_DT_HASH.patch +} + +build() { + # remove hardening options for building libraries + export CFLAGS="-U_FORTIFY_SOURCE -O2" + export CPPFLAGS="-U_FORTIFY_SOURCE -O2" + unset LD_LIBRARY_PATH + + export BUILD_CC=gcc + + local _configure_flags=( + --prefix=/usr + --target=$_target + --host=$_target + --build=$CHOST + --includedir=/include + --with-headers=/usr/$_target/include + --with-bugurl=https://bugs.archlinux.org/ + --enable-fortify-source + --enable-cet + --disable-nscd + --enable-kernel=4.4 + --enable-add-ons + --enable-bind-now + --disable-profile + --enable-stackguard-randomization + --enable-lock-elision + --enable-multi-arch + --disable-werror + ) + + ( + cd glibc-build + export CC=${_target}-gcc + export CXX=${_target}-g++ + export AR=${_target}-ar + export RANLIB=${_target}-ranlib + + echo 'slibdir=/lib' >> configparms + echo 'rtlddir=/lib' >> configparms + echo 'sbindir=/bin' >> configparms + echo 'rootsbindir=/bin' >> configparms + echo 'build-programs=no' >> configparms + + # Credits @allanmcrae + # https://github.com/allanmcrae/toolchain/blob/f18604d70c5933c31b51a320978711e4e6791cf1/glibc/PKGBUILD + # remove fortify for building libraries + # CFLAGS=${CFLAGS/-Wp,-D_FORTIFY_SOURCE=2/} + + "${srcdir}"/glibc-$pkgver/configure \ + --libdir=/lib \ + --libexecdir=/lib \ + "${_configure_flags[@]}" + + make -O + ) + +# ( +# cd lib32-glibc-build +# export CC="${_target}-gcc -m32 -mstackrealign" +# export CXX="${_target}-g++ -m32 -mstackrealign" +# +# echo "slibdir=/lib32" >> configparms +# echo "rtlddir=/lib32" >> configparms +# echo "sbindir=/bin" >> configparms +# echo "rootsbindir=/bin" >> configparms +# echo 'build-programs=no' >> configparms +# +# "${srcdir}"/glibc-$pkgver/configure \ +# --host=i686-pc-linux-gnu \ +# --libdir=/lib32 \ +# --libexecdir=/lib32 \ +# "${_configure_flags[@]}" +# +# make -O +# ) +} + +package() { + cd glibc-build + + make install_root="$pkgdir"/usr/$_target install +# make -C glibc-build DESTDIR="${pkgdir}" install + install -Dm644 "${srcdir}"/sdt.h "${pkgdir}"/usr/$_target/include/sys/sdt.h + install -Dm644 "${srcdir}"/sdt-config.h "${pkgdir}"/usr/$_target/include/sys/sdt-config.h +# cd lib32-glibc-build + +# make install_root="$pkgdir"/usr/$_target install +# make DESTDIR="${pkgdir}" install + +# # Dynamic linker +# install -d "${pkgdir}"/usr/lib +# ln -s ../lib32/ld-linux.so.2 "${pkgdir}"/usr/lib/ + + rm -r "$pkgdir"/usr/$_target/{etc,usr/share,var} +} diff --git a/x86_64-linux-gnu-glibc/keys/pgp/7273542B39962DF7B299931416792B4EA25340F8.asc b/x86_64-linux-gnu-glibc/keys/pgp/7273542B39962DF7B299931416792B4EA25340F8.asc new file mode 100644 index 0000000000..f1de42479a --- /dev/null +++ b/x86_64-linux-gnu-glibc/keys/pgp/7273542B39962DF7B299931416792B4EA25340F8.asc @@ -0,0 +1,54 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mQINBFef5BoBEACvJ15QMMZh4stKHbz0rs78XsOdxuug37dumTx6ngrDCwZ61k7n +HQ+uxLuoQvLSc6YJGBEfiNFbs1hvhRFNR7xJbzRYmin7kJZZ/06fH2cgTkQhN0mR +BP8KsKKT+7SvvBL785ZfAhArWf5m5Tl0CktZ8yoG8g9dM4SgdvdSdzZUaWBVHc6T +jdAb9YEQ1/jpyfHsQp+PWLuQZI8nZUm+I3IBDLkbbuJVQklKzpT1b8yxVSsHCyIP +FRqDDUjPL5G4WnUVy529OzfrciBvHdxGsYYDV8FX7fv6V/S3eL6qmZbObivIbLD2 +NbeDqw6vNpr+aehEwgwNbMVuVfH1PVHJV8Qkgxg4PqPgQC7GbIhxxYroGbLJCQ41 +j25M+oqCO/XW/FUu/9x0vY5w0RsZFhlmSP5lBDcaiy3SUgp3MSTePGuxpPlLVMeP +xKvabSS7EErLKlrAEmDgnUYYdPqGCefA+5N9Rn2JPfP7SoQEp2pHhEyM6Xg9x7TJ ++JNuDowQCgwussmeDt2ZUeMl3s1f6/XePfTd3l8c8Yn5Fc8reRa28dFANU6oXiZf +7/h3iQXPg81BsLMJK3aA/nyajRrNxL8dHIx7BjKX0/gxpOozlUHZHl73KhAvrBRa +qLrr2tIPLkKrf3d7wdz4llg4NAGIU4ERdTTne1QAwS6x2tNa9GO9tXGPawARAQAB +tClDYXJsb3MgTydEb25lbGwgPGNhcmxvc0BzeXN0ZW1oYWx0ZWQub3JnPokCQQQT +AQIAKwIbAwYLCQgHAwIGFQgCCQoLBBYCAwECHgECF4ACGQEFAls7bO0FCQdd78kA +CgkQFnkrTqJTQPijtg//d+nIhSrlAadHxlKZpsFyS3pWQgybSfZnPQVcP1BYfTIA +SpjqHi4idXxVw79AHOAagL769GMy7QUQo+jFrE41Brt7/9oBbD6Gy0HVtOWcdQtD +KEWFCxKGU7utP05cOLxBfbDPpPn9zSXcJHIKiSrx91gcxTL9xCDribjLFmn6Zcef +irkNNzlFwgEXurL1x8+e62gANIE/QuKUkFe/2w284sHZsdk6pdRdnH3m3WCLm/HR +beeJE/Qmkb9pFzLBr/dz/43owjfc+GIn4i3+FJZVwgM5qZW2NIBClt8rQ7nl4+cH +EMryH8SuIDrluBG0pUDMz9pyHzb1oaC+Jb+WmywBZcHxibj/qAsYGlD/vQXjAGvA +3tIpheKvU6oC0O0KqkaYeb4Hf/kmL19NKdEexuD8brcZKi69JZowTIbCPrwXhUcZ +wyqP8eXaHX/wPYGiwhvaLoVtjXZ8Qqb2ZRw1aeyWM3IWDuRAyPCCgrRg80LvNu0i +vBIKQTb2ZZAFH33oAYSz+rf6FW9PX8/AWJP3spo3W6a7rJ03SqqQOJKsTF6LxcEg +mvPun0uk/h/TWid42xkTgEx121VA/j5srkqZ5mhYvB39uUtDT4jakx2EziommvqD +iPx10rHRT/vh3MWdMkkCUITywOJVyG+iYtTM/IbppJJJPj0pISR17ydMEko7GwC0 +KkNhcmxvcyBPJ0RvbmVsbCAoV29yaykgPGNhcmxvc0ByZWRoYXQuY29tPokCPgQT +AQIAKAIbAwYLCQgHAwIGFQgCCQoLBBYCAwECHgECF4AFAls7bPMFCQdd78kACgkQ +FnkrTqJTQPjDng/+OWH1/ORl2HmBfvvWgNALtNKi6uaqbLqne7DKuWCMRKLytgHg +uXOdB3plVFIQy8TX42johGF7LbaSIICGDCZFh0hu8xFgy277kN26UmJPlI9gobBx +Ew8SI37lAD2vkS/gDmwQlyfx8kqODL+S/33lUqY3Ak31/ZcPXQOa/6axwb2Zfbg3 +2rfqA9yFQmKvTct1L09xz64TQIlPq1r07Vaf4u1GATW6iZ2QtvOGyKzkA2VHoXaM +1RIsLhw7LHM9W4Vto++e0ZWj8l83fGqwYXbE92T0aIlBwNxTOJUleARzL95g8Ouz +0P++wJ3LoH1zkffLeC0Ms3K9nhrHqCHu1ISJ/QZITYi3fqEOoNYiMfvGaL914kFm +sEpCC6O0FOR/A676eMfgk8M/jkSL40yZURDpOxPXSIdRHUFxJUKvPdMseB4u0fB6 +y+KbSntCMEeA9jKbpl6YZIrU85JFX9S94YV4PSV/qcjzWvJ3msKoK5+DFIDvhg7b +gu/4p1mTk14B20olGcHVpkdfxBNszb31utzo83JHHyK9CZj3Y9Had5ALuVG7ZGsE +MDMwIp8V3uf4DmlGDkuxkWmoQamsyP9dbBA1zkRbC+OL8KV9YNnyxvKZFRsZjamM +BMC/uTi7nvoqJ6XWFBExaBvHm+khz7O+aDk9+LsrGx6yjUgOdd+F/GWKm8i0LENh +cmxvcyBPJ0RvbmVsbCAoV29yaykgPGNvZG9uZWxsQHJlZGhhdC5jb20+iQI+BBMB +AgAoAhsDBgsJCAcDAgYVCAIJCgsEFgIDAQIeAQIXgAUCWzts8gUJB13vyQAKCRAW +eStOolNA+JX6D/oCtzDbVyoF2mJf5wQZCnU1y2nHmg4Jio7enULyjIfEcpu0SXRm +EbkTauZjFbQNw6di36dW67gsMke3wwiciKy+avR9E7cXaEtPllYOMAUYHLBHaViI +f2Tv54Dc1N5gM7ej5CLxLMYajMOh3427l3MCVAO9ecvJBp8geO5AVN/6q0lPTznb +rW9ZUeegxH6mUd2aQTRywQ9TmEiRSENmCHtwfIDoAT0CGTpI2V0gnv947KCXSS4+ +yITabbZot4hsB3eGPtJuxjVOvjD3KbL8Aon5QG+TpnORFnRCqhpNsGlQCpvoe12L +B/+QtvWSOwlrriKrVcG7BHfwjb7CjadHN4pgVjNHohgkavSGGCJp8JVTWLTkmEny +AjII3uXi6l4t7rGDpEZlGa6PRVVbAkat4T/BVpRcR7GDb5h2xaZ8b7JNqiC8BgB5 +J8i6duJrgr6Q6OmYmdL//Hs9SQiikrDjPrt2VlO2krma/aZaymUwGJjkny1B7ETZ +bmvmT1qfVN+zXxbzsfuS4dyiYMhNgw8dEhXj/pwBmraxgqcosoZM85t+Fg7TtFif +w3SDKCJF/v7BndYEdfWEGK6iZkItZ1feavZKfFl0eJ52tUMRnUrjpfBz+xiM/cBG +HdDy0qnJfc1NZoulQLkloKEsITVLm3htRzBui8JWBtW7cI0TQ9TdMOtUvA== +=JckU +-----END PGP PUBLIC KEY BLOCK----- diff --git a/x86_64-linux-gnu-glibc/keys/pgp/BC7C7372637EC10C57D7AA6579C43DFBF1CF2187.asc b/x86_64-linux-gnu-glibc/keys/pgp/BC7C7372637EC10C57D7AA6579C43DFBF1CF2187.asc new file mode 100644 index 0000000000..67d0c96568 --- /dev/null +++ b/x86_64-linux-gnu-glibc/keys/pgp/BC7C7372637EC10C57D7AA6579C43DFBF1CF2187.asc @@ -0,0 +1,68 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mQENBFMAZNMBCACeatEKl6YY9iEVxzS64bPbvJsA1mLE2XFWmKXyYzm58dFqPMa0 +OQQTKCxjFCOrc+LD2KtmypttcahKnk5Lk1lNU/lV2hCuR7jJ37sL+/TFQuMMgsLP +ED6XU4/AYK3VUJvgGYLBnMVfpAGYLB5rnPvhFNx0r2KItO/CfiSEyD4g1Wu26SUA +XGOp5hbSyBRGhju+8YJlhHBBjn3vZfw7IpwAWDVjK0crqMhGDXoZwK+ADUFY5NER +AkT3Lb7d11F6+W4558WQZCYIWa3rZ62d986OE7+7xKmJUcLLWvlv5spgUmvotZ4D +MzaKba+waY5ygXdGIpm5seVIEUCTaBIe6QVLABEBAAG0KFNpZGRoZXNoIFBveWFy +ZWthciA8c2lkZGhlc2hAZ290cGx0Lm9yZz6JATgEEwECACIFAle0y0wCGwMGCwkI +BwMCBhUIAgkKCwQWAgMBAh4BAheAAAoJEHnEPfvxzyGHUggIAJtLMvzHjRQi/Dg6 +oSMUMPtH7oEmgnk482dC4MGNl/bCtUV7VVIAtEN5TGvHxk1EKBNqj49Q+mZjef82 +iluW1RbXRY6+72yZ380yUC41SY+2hVOurJ//h3nvE+YHfO7QjV97yhIegc1kdwAr +VtgNNApLxj5Nc2epT824uaSznVhwyAS2OIHFLmiMYuIW338uXVEug1XKBHwJ9Mpp +Vblk4zapc9fRGvRG72ivbTGXNgcza+Kkx1IdA0XB2dEQaAE1XR0FOshKtpeSDRER +wZ17+rRT8EjmkRsR7qm1uvPSNW7wMTtlj8ox/XuSyG0Coy1NRgqe5bi53ha1kBoK +lLaxvyW0KFNpZGRoZXNoIFBveWFyZWthciA8c2lkZGhlc2hAcmVkaGF0LmNvbT6J +AT4EMAECACgFAljc8cohHSBObyBsb25nZXIgZW1wbG95ZWQgd2l0aCBSZWQgSGF0 +AAoJEHnEPfvxzyGHhvoH/3KWe6JIWptc283au0UROXog3VdBFM3pE6SgMhOlFTM2 +r9fU24rvsTZgAMC7N7TxDil0JajMR6CYXoapDncuRs4u27D4uK/oUqHxL6CHuDKw +GwURM9OjqV1kJY1gPYn9IZL3XHejg/YwxodGKK4jRJrL0prR5HSiR6QyQVgJ886D +pOyHGqUwi5GGLZVAgwo9NBsr2GEmXMBmwGU44g+UuCSBiySvXwsBDDx2j34Q166t +eoz+CHsIf4J3UPv2nIR6L0EvboTw39m55aTlyJ3dPHh8OeKwTAZCFzzOv0WxINcC +fVWnL138fOkILt4u12Tv7D2K99PI/bYv1Xeal+zRtBO0KFNpZGRoZXNoIFBveWFy +ZWthciA8c3BveWFyZWtAcmVkaGF0LmNvbT6JAT4EMAECACgFAljc8awhHSBObyBs +b25nZXIgZW1wbG95ZWQgd2l0aCBSZWQgSGF0AAoJEHnEPfvxzyGHT5UH/0eeAKeR +jobfz+8n98UgYzPZnihlS1yd8wznaVThm0cgqUp1hu3NIHuDiirr/VCRwxqP+hmJ +ulwnQsJZwMllf2riFxbnnun4VBeocENxqE/m5EHLHjKkZklhYJSxbxWysXt7BYZb +7+2S3zvlP7TCl2Hb7JhdJgUiOdondBBWAygA+uxolabetIv6X3v8evr+H87PMeOw +lcaTxO2DXPAAsGDqxPJNSzVtiB5WEz6/2fRhsSGkisDSZTs5d/SL+lLS/FfRR2NT +SN20+2/eepzMJM5fyoV9vVkytI3XxhQsepeMya34DEP92ltJnhnG/tToUvHVttOd +IIVwZtDWIYwSg6a0KVNpZGRoZXNoIFBveWFyZWthciA8c2lkQHJlc2VydmVkLWJp +dC5jb20+iQE3BBMBCAAhBQJVwGR2AhsDBQsJCAcCBhUICQoLAgQWAgMBAh4BAheA +AAoJEHnEPfvxzyGHPs8H/3BzCbDhXI1txfjYCCUDJwDMtY2iqcHINZb7LPGJGDbx +vuAwEgJcbrpK6QlbAqBXAg4hwBOzM+CLZRPRQ0g4hBsNQv1m+1WIJdbUfS9ZL4O9 +XPWSHYdKY0U+83XPyaValGOPMvSb2glOy2RRRC+CECN9CaQNbfJo91ZfmMk3waNb +EpZ6Te04vZ9zdoRHz3D7qhr2U2Hazlvv6P9TGqKFRbEbMgMxBJM7L6WiiBhFIIyB +D4N0NaB0xnc1JB1fwpfrRfRT9CPWeqmeXvnt0bGJAlzpG7tc3d0evva2mMVTooyq +C8vXiCRsszcrG5NYOPYkgnt0ahqivVGXd/5F57tMtGq0PFNpZGRoZXNoIFBveWFy +ZWthciAoUGVyc29uYWwpIDxzaWRkaGVzaC5wb3lhcmVrYXJAZ21haWwuY29tPokB +OAQTAQIAIgIbAwYLCQgHAwIGFQgCCQoLBBYCAwECHgECF4AFAlTqsncACgkQecQ9 ++/HPIYd28Qf9GfOK+VgDxaTzyixbGHWVWpk1nie67BdPvaorTb1jOUVsI582jKO+ +WDIKmI2PTFk9RwsN47q4s/QyN8oCjgYCYmuj+cse8Zh5acEJx2ENJRmP0QLJU3eK +IQMxKJ7SG4UHyVBlqdchWryZq5KDGbAKf3WtMhgBzr825UnybImpZ7qPfJM8u78y +jHPQvBKkKaAzhVwC65Lt+ESQA9+EZqvYeEwlYpAq5gmKKaD/QialyI/8FEsshBqw +DdLzFMMDSjFxVukjiC5t3WJvFz52v+tzEXE0HZsV26p/LrPSA+cei/s75FBmw9qz +4AM0YDOrEaeEG+CSSPLGgQYFYMp1EX+LvrRFU2lkZGhlc2ggUG95YXJla2FyICho +dHRwczovL3NvdXJjZXdhcmUub3JnKSA8c2lkZGhlc2hAc291cmNld2FyZS5vcmc+ +iQE4BBMBAgAiBQJXtMr3AhsDBgsJCAcDAgYVCAIJCgsEFgIDAQIeAQIXgAAKCRB5 +xD378c8hh6doB/49aFm02tJCigXO7NrWRg7esjdQT66g8/LYYa2O3oHef6yACSmt +bdJWpbZ5aReRledYcsI1WVtLgUHOchrspeKVO6KWAA5cFMDjRhMP0ArhxyL4KDqz +U9AHpo3t9bYBGKNfH9Df3a0rVknkIB9NyENPAJpbJOdrND0TDWXhyOHLGsFC6WuE +MtQudfSKMOhKFqySN0xZQUKlb11n2MT479gqlw7o/UPDy1gDtssG+zG9DD72Xkq3 +lbzzthM/e7ldcB8dMRoVLl9e1XZLoOfL/RAV/z9UxHezRjjYDV69EPxzKsgj+Id3 +AzmY9XeSqUeDWX17z6ARV0mh4uGePEriEmPCuQENBFMAZNMBCAChC0iEpSfa897U +gTzZKcqsCD5+P/2QbhDSUHyFiFmDPa+9rAUR2YIopbDeyu4OhGu7y1FYL+fliJxO +D1hUGqlEmzLm047IZ2iACHklNK7JcEUartgfR5kvJmqwflGHUPjLD1RCJ6wfLq6B +X/CYe4ftjqrNjClDLGsqBckJFbcIdxyPwDE5Jiuorp9wIpDivifVi0MdkKn/ny42 +Of2SI26MG3fBitweIeJFD23lCOUzYOsXClcsVTzMvB2s32g8JVB34dOytFBVrWhb +sPb97ZWonjkyx9A9HAV8mEwKoLOwuunMrccPkK+v/rh2vDDERA0MExBMMInJN4dI +fryIQrFdABEBAAGJAR8EGAECAAkCGwwFAlTqs98ACgkQecQ9+/HPIYf1Mwf9ENd6 +C/2kvJJvbFzmvDNa6EOvTVKaqTBDhgVjuZ2ivMGdkCoeA9OQ3zWu8k+RYDyyPmuJ +HToFm1tn4hP8DGDjIr46Bb3jnZcz6bHsOp9quf3L6KbKa4ghiVqM05ML9Xb/YH0y +ge3QybfiAnWm6e2qIbzYucXmYDOBsQojta369CZ+zQEdy+baULFQ+Hg02vY4NKqv +xIhfri0B/Ng+m7MbUv163u2/7Eyit4xOrLYbouuMOxd1+TNasJPFwrKgjQNWdnPs +1pCxh+GXgf0a8WqbtB9P0wIQQbWw6OuuRmkW9zUisxuKyUo10hEHOK52v2O/7N3P +bgdMo3cl19PJpx81Tg== +=RgMn +-----END PGP PUBLIC KEY BLOCK----- diff --git a/x86_64-linux-gnu-glibc/reenable_DT_HASH.patch b/x86_64-linux-gnu-glibc/reenable_DT_HASH.patch new file mode 100644 index 0000000000..87a2329d82 --- /dev/null +++ b/x86_64-linux-gnu-glibc/reenable_DT_HASH.patch @@ -0,0 +1,28 @@ +From 31915e55f9c34f6137ab1c5ac002375a2d5d4589 Mon Sep 17 00:00:00 2001 +From: Frederik Schwan +Date: Fri, 4 Aug 2023 15:19:57 +0200 +Subject: [PATCH] force --hash-style=both to keep compatibility with old niche + software + +--- + Makeconfig | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/Makeconfig b/Makeconfig +index 77d7fd14df..2ae67c4beb 100644 +--- a/Makeconfig ++++ b/Makeconfig +@@ -378,6 +378,10 @@ relro-LDFLAGS = -Wl,-z,relro + LDFLAGS.so += $(relro-LDFLAGS) + LDFLAGS-rtld += $(relro-LDFLAGS) + ++hashstyle-LDFLAGS = -Wl,--hash-style=both ++LDFLAGS.so += $(hashstyle-LDFLAGS) ++LDFLAGS-rtld += $(hashstyle-LDFLAGS) ++ + # Linker options to enable and disable DT_RELR. + ifeq ($(have-dt-relr),yes) + dt-relr-ldflag = -Wl,-z,pack-relative-relocs +-- +2.41.0 + diff --git a/x86_64-linux-gnu-glibc/sdt-config.h b/x86_64-linux-gnu-glibc/sdt-config.h new file mode 100644 index 0000000000..733045a527 --- /dev/null +++ b/x86_64-linux-gnu-glibc/sdt-config.h @@ -0,0 +1,6 @@ +/* includes/sys/sdt-config.h. Generated from sdt-config.h.in by configure. + + This file just defines _SDT_ASM_SECTION_AUTOGROUP_SUPPORT to 0 or 1 to + indicate whether the assembler supports "?" in .pushsection directives. */ + +#define _SDT_ASM_SECTION_AUTOGROUP_SUPPORT 1 diff --git a/x86_64-linux-gnu-glibc/sdt.h b/x86_64-linux-gnu-glibc/sdt.h new file mode 100644 index 0000000000..c0c5a492cb --- /dev/null +++ b/x86_64-linux-gnu-glibc/sdt.h @@ -0,0 +1,430 @@ +/* - Systemtap static probe definition macros. + + This file is dedicated to the public domain, pursuant to CC0 + (https://creativecommons.org/publicdomain/zero/1.0/) +*/ + +#ifndef _SYS_SDT_H +#define _SYS_SDT_H 1 + +/* + This file defines a family of macros + + STAP_PROBEn(op1, ..., opn) + + that emit a nop into the instruction stream, and some data into an auxiliary + note section. The data in the note section describes the operands, in terms + of size and location. Each location is encoded as assembler operand string. + Consumer tools such as gdb or systemtap insert breakpoints on top of + the nop, and decode the location operand-strings, like an assembler, + to find the values being passed. + + The operand strings are selected by the compiler for each operand. + They are constrained by gcc inline-assembler codes. The default is: + + #define STAP_SDT_ARG_CONSTRAINT nor + + This is a good default if the operands tend to be integral and + moderate in number (smaller than number of registers). In other + cases, the compiler may report "'asm' requires impossible reload" or + similar. In this case, consider simplifying the macro call (fewer + and simpler operands), reduce optimization, or override the default + constraints string via: + + #define STAP_SDT_ARG_CONSTRAINT g + #include + + See also: + https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation + https://gcc.gnu.org/onlinedocs/gcc/Constraints.html + */ + + + +#ifdef __ASSEMBLER__ +# define _SDT_PROBE(provider, name, n, arglist) \ + _SDT_ASM_BODY(provider, name, _SDT_ASM_STRING_1, (_SDT_DEPAREN_##n arglist)) \ + _SDT_ASM_BASE +# define _SDT_ASM_1(x) x; +# define _SDT_ASM_2(a, b) a,b; +# define _SDT_ASM_3(a, b, c) a,b,c; +# define _SDT_ASM_5(a, b, c, d, e) a,b,c,d,e; +# define _SDT_ASM_STRING_1(x) .asciz #x; +# define _SDT_DEPAREN_0() /* empty */ +# define _SDT_DEPAREN_1(a) a +# define _SDT_DEPAREN_2(a,b) a b +# define _SDT_DEPAREN_3(a,b,c) a b c +# define _SDT_DEPAREN_4(a,b,c,d) a b c d +# define _SDT_DEPAREN_5(a,b,c,d,e) a b c d e +# define _SDT_DEPAREN_6(a,b,c,d,e,f) a b c d e f +# define _SDT_DEPAREN_7(a,b,c,d,e,f,g) a b c d e f g +# define _SDT_DEPAREN_8(a,b,c,d,e,f,g,h) a b c d e f g h +# define _SDT_DEPAREN_9(a,b,c,d,e,f,g,h,i) a b c d e f g h i +# define _SDT_DEPAREN_10(a,b,c,d,e,f,g,h,i,j) a b c d e f g h i j +# define _SDT_DEPAREN_11(a,b,c,d,e,f,g,h,i,j,k) a b c d e f g h i j k +# define _SDT_DEPAREN_12(a,b,c,d,e,f,g,h,i,j,k,l) a b c d e f g h i j k l +#else +# define _SDT_PROBE(provider, name, n, arglist) \ + do { \ + __asm__ __volatile__ (_SDT_ASM_BODY(provider, name, _SDT_ASM_ARGS, (n)) \ + :: _SDT_ASM_OPERANDS_##n arglist); \ + __asm__ __volatile__ (_SDT_ASM_BASE); \ + } while (0) +# define _SDT_S(x) #x +# define _SDT_ASM_1(x) _SDT_S(x) "\n" +# define _SDT_ASM_2(a, b) _SDT_S(a) "," _SDT_S(b) "\n" +# define _SDT_ASM_3(a, b, c) _SDT_S(a) "," _SDT_S(b) "," \ + _SDT_S(c) "\n" +# define _SDT_ASM_5(a, b, c, d, e) _SDT_S(a) "," _SDT_S(b) "," \ + _SDT_S(c) "," _SDT_S(d) "," \ + _SDT_S(e) "\n" +# define _SDT_ASM_ARGS(n) _SDT_ASM_STRING(_SDT_ASM_TEMPLATE_##n) +# define _SDT_ASM_STRING_1(x) _SDT_ASM_1(.asciz #x) + +# define _SDT_ARGFMT(no) %n[_SDT_S##no]@_SDT_ARGTMPL(_SDT_A##no) + +# ifndef STAP_SDT_ARG_CONSTRAINT +# if defined __powerpc__ +# define STAP_SDT_ARG_CONSTRAINT nZr +# else +# define STAP_SDT_ARG_CONSTRAINT nor +# endif +# endif + +# define _SDT_STRINGIFY(x) #x +# define _SDT_ARG_CONSTRAINT_STRING(x) _SDT_STRINGIFY(x) +# define _SDT_ARG(n, x) \ + [_SDT_S##n] "n" ((_SDT_ARGSIGNED (x) ? 1 : -1) * (int) _SDT_ARGSIZE (x)), \ + [_SDT_A##n] _SDT_ARG_CONSTRAINT_STRING (STAP_SDT_ARG_CONSTRAINT) (_SDT_ARGVAL (x)) +#endif +#define _SDT_ASM_STRING(x) _SDT_ASM_STRING_1(x) + +#define _SDT_ARGARRAY(x) (__builtin_classify_type (x) == 14 \ + || __builtin_classify_type (x) == 5) + +#ifdef __cplusplus +# define _SDT_ARGSIGNED(x) (!_SDT_ARGARRAY (x) \ + && __sdt_type<__typeof (x)>::__sdt_signed) +# define _SDT_ARGSIZE(x) (_SDT_ARGARRAY (x) \ + ? sizeof (void *) : sizeof (x)) +# define _SDT_ARGVAL(x) (x) + +# include + +template +struct __sdt_type +{ + static const bool __sdt_signed = false; +}; + +#define __SDT_ALWAYS_SIGNED(T) \ +template<> struct __sdt_type { static const bool __sdt_signed = true; }; +#define __SDT_COND_SIGNED(T,CT) \ +template<> struct __sdt_type { static const bool __sdt_signed = ((CT)(-1) < 1); }; +__SDT_ALWAYS_SIGNED(signed char) +__SDT_ALWAYS_SIGNED(short) +__SDT_ALWAYS_SIGNED(int) +__SDT_ALWAYS_SIGNED(long) +__SDT_ALWAYS_SIGNED(long long) +__SDT_ALWAYS_SIGNED(volatile signed char) +__SDT_ALWAYS_SIGNED(volatile short) +__SDT_ALWAYS_SIGNED(volatile int) +__SDT_ALWAYS_SIGNED(volatile long) +__SDT_ALWAYS_SIGNED(volatile long long) +__SDT_ALWAYS_SIGNED(const signed char) +__SDT_ALWAYS_SIGNED(const short) +__SDT_ALWAYS_SIGNED(const int) +__SDT_ALWAYS_SIGNED(const long) +__SDT_ALWAYS_SIGNED(const long long) +__SDT_ALWAYS_SIGNED(const volatile signed char) +__SDT_ALWAYS_SIGNED(const volatile short) +__SDT_ALWAYS_SIGNED(const volatile int) +__SDT_ALWAYS_SIGNED(const volatile long) +__SDT_ALWAYS_SIGNED(const volatile long long) +__SDT_COND_SIGNED(char, char) +__SDT_COND_SIGNED(wchar_t, wchar_t) +__SDT_COND_SIGNED(volatile char, char) +__SDT_COND_SIGNED(volatile wchar_t, wchar_t) +__SDT_COND_SIGNED(const char, char) +__SDT_COND_SIGNED(const wchar_t, wchar_t) +__SDT_COND_SIGNED(const volatile char, char) +__SDT_COND_SIGNED(const volatile wchar_t, wchar_t) +#if defined (__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) +/* __SDT_COND_SIGNED(char16_t) */ +/* __SDT_COND_SIGNED(char32_t) */ +#endif + +template +struct __sdt_type<__sdt_E[]> : public __sdt_type<__sdt_E *> {}; + +template +struct __sdt_type<__sdt_E[__sdt_N]> : public __sdt_type<__sdt_E *> {}; + +#elif !defined(__ASSEMBLER__) +__extension__ extern unsigned long long __sdt_unsp; +# define _SDT_ARGINTTYPE(x) \ + __typeof (__builtin_choose_expr (((__builtin_classify_type (x) \ + + 3) & -4) == 4, (x), 0U)) +# define _SDT_ARGSIGNED(x) \ + (!__extension__ \ + (__builtin_constant_p ((((unsigned long long) \ + (_SDT_ARGINTTYPE (x)) __sdt_unsp) \ + & ((unsigned long long)1 << (sizeof (unsigned long long) \ + * __CHAR_BIT__ - 1))) == 0) \ + || (_SDT_ARGINTTYPE (x)) -1 > (_SDT_ARGINTTYPE (x)) 0)) +# define _SDT_ARGSIZE(x) \ + (_SDT_ARGARRAY (x) ? sizeof (void *) : sizeof (x)) +# define _SDT_ARGVAL(x) (x) +#endif + +#if defined __powerpc__ || defined __powerpc64__ +# define _SDT_ARGTMPL(id) %I[id]%[id] +#elif defined __i386__ +# define _SDT_ARGTMPL(id) %w[id] /* gcc.gnu.org/PR80115 */ +#else +# define _SDT_ARGTMPL(id) %[id] +#endif + +#ifdef __LP64__ +# define _SDT_ASM_ADDR .8byte +#else +# define _SDT_ASM_ADDR .4byte +#endif + +/* The ia64 and s390 nop instructions take an argument. */ +#if defined(__ia64__) || defined(__s390__) || defined(__s390x__) +#define _SDT_NOP nop 0 +#else +#define _SDT_NOP nop +#endif + +#define _SDT_NOTE_NAME "stapsdt" +#define _SDT_NOTE_TYPE 3 + +/* If the assembler supports the necessary feature, then we can play + nice with code in COMDAT sections, which comes up in C++ code. + Without that assembler support, some combinations of probe placements + in certain kinds of C++ code may produce link-time errors. */ +#include "sdt-config.h" +#if _SDT_ASM_SECTION_AUTOGROUP_SUPPORT +# define _SDT_ASM_AUTOGROUP "?" +#else +# define _SDT_ASM_AUTOGROUP "" +#endif + +#define _SDT_ASM_BODY(provider, name, pack_args, args) \ + _SDT_ASM_1(990: _SDT_NOP) \ + _SDT_ASM_3( .pushsection .note.stapsdt,_SDT_ASM_AUTOGROUP,"note") \ + _SDT_ASM_1( .balign 4) \ + _SDT_ASM_3( .4byte 992f-991f, 994f-993f, _SDT_NOTE_TYPE) \ + _SDT_ASM_1(991: .asciz _SDT_NOTE_NAME) \ + _SDT_ASM_1(992: .balign 4) \ + _SDT_ASM_1(993: _SDT_ASM_ADDR 990b) \ + _SDT_ASM_1( _SDT_ASM_ADDR _.stapsdt.base) \ + _SDT_SEMAPHORE(provider,name) \ + _SDT_ASM_STRING(provider) \ + _SDT_ASM_STRING(name) \ + pack_args args \ + _SDT_ASM_1(994: .balign 4) \ + _SDT_ASM_1( .popsection) + +#define _SDT_ASM_BASE \ + _SDT_ASM_1(.ifndef _.stapsdt.base) \ + _SDT_ASM_5( .pushsection .stapsdt.base,"aG","progbits", \ + .stapsdt.base,comdat) \ + _SDT_ASM_1( .weak _.stapsdt.base) \ + _SDT_ASM_1( .hidden _.stapsdt.base) \ + _SDT_ASM_1( _.stapsdt.base: .space 1) \ + _SDT_ASM_2( .size _.stapsdt.base, 1) \ + _SDT_ASM_1( .popsection) \ + _SDT_ASM_1(.endif) + +#if defined _SDT_HAS_SEMAPHORES +#define _SDT_SEMAPHORE(p,n) _SDT_ASM_1( _SDT_ASM_ADDR p##_##n##_semaphore) +#else +#define _SDT_SEMAPHORE(p,n) _SDT_ASM_1( _SDT_ASM_ADDR 0) +#endif + +#define _SDT_ASM_TEMPLATE_0 /* no arguments */ +#define _SDT_ASM_TEMPLATE_1 _SDT_ARGFMT(1) +#define _SDT_ASM_TEMPLATE_2 _SDT_ASM_TEMPLATE_1 _SDT_ARGFMT(2) +#define _SDT_ASM_TEMPLATE_3 _SDT_ASM_TEMPLATE_2 _SDT_ARGFMT(3) +#define _SDT_ASM_TEMPLATE_4 _SDT_ASM_TEMPLATE_3 _SDT_ARGFMT(4) +#define _SDT_ASM_TEMPLATE_5 _SDT_ASM_TEMPLATE_4 _SDT_ARGFMT(5) +#define _SDT_ASM_TEMPLATE_6 _SDT_ASM_TEMPLATE_5 _SDT_ARGFMT(6) +#define _SDT_ASM_TEMPLATE_7 _SDT_ASM_TEMPLATE_6 _SDT_ARGFMT(7) +#define _SDT_ASM_TEMPLATE_8 _SDT_ASM_TEMPLATE_7 _SDT_ARGFMT(8) +#define _SDT_ASM_TEMPLATE_9 _SDT_ASM_TEMPLATE_8 _SDT_ARGFMT(9) +#define _SDT_ASM_TEMPLATE_10 _SDT_ASM_TEMPLATE_9 _SDT_ARGFMT(10) +#define _SDT_ASM_TEMPLATE_11 _SDT_ASM_TEMPLATE_10 _SDT_ARGFMT(11) +#define _SDT_ASM_TEMPLATE_12 _SDT_ASM_TEMPLATE_11 _SDT_ARGFMT(12) +#define _SDT_ASM_OPERANDS_0() [__sdt_dummy] "g" (0) +#define _SDT_ASM_OPERANDS_1(arg1) _SDT_ARG(1, arg1) +#define _SDT_ASM_OPERANDS_2(arg1, arg2) \ + _SDT_ASM_OPERANDS_1(arg1), _SDT_ARG(2, arg2) +#define _SDT_ASM_OPERANDS_3(arg1, arg2, arg3) \ + _SDT_ASM_OPERANDS_2(arg1, arg2), _SDT_ARG(3, arg3) +#define _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4) \ + _SDT_ASM_OPERANDS_3(arg1, arg2, arg3), _SDT_ARG(4, arg4) +#define _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5) \ + _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4), _SDT_ARG(5, arg5) +#define _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6) \ + _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5), _SDT_ARG(6, arg6) +#define _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ + _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6), _SDT_ARG(7, arg7) +#define _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \ + _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7), \ + _SDT_ARG(8, arg8) +#define _SDT_ASM_OPERANDS_9(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9) \ + _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8), \ + _SDT_ARG(9, arg9) +#define _SDT_ASM_OPERANDS_10(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \ + _SDT_ASM_OPERANDS_9(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9), \ + _SDT_ARG(10, arg10) +#define _SDT_ASM_OPERANDS_11(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \ + _SDT_ASM_OPERANDS_10(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10), \ + _SDT_ARG(11, arg11) +#define _SDT_ASM_OPERANDS_12(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \ + _SDT_ASM_OPERANDS_11(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11), \ + _SDT_ARG(12, arg12) + +/* These macros can be used in C, C++, or assembly code. + In assembly code the arguments should use normal assembly operand syntax. */ + +#define STAP_PROBE(provider, name) \ + _SDT_PROBE(provider, name, 0, ()) +#define STAP_PROBE1(provider, name, arg1) \ + _SDT_PROBE(provider, name, 1, (arg1)) +#define STAP_PROBE2(provider, name, arg1, arg2) \ + _SDT_PROBE(provider, name, 2, (arg1, arg2)) +#define STAP_PROBE3(provider, name, arg1, arg2, arg3) \ + _SDT_PROBE(provider, name, 3, (arg1, arg2, arg3)) +#define STAP_PROBE4(provider, name, arg1, arg2, arg3, arg4) \ + _SDT_PROBE(provider, name, 4, (arg1, arg2, arg3, arg4)) +#define STAP_PROBE5(provider, name, arg1, arg2, arg3, arg4, arg5) \ + _SDT_PROBE(provider, name, 5, (arg1, arg2, arg3, arg4, arg5)) +#define STAP_PROBE6(provider, name, arg1, arg2, arg3, arg4, arg5, arg6) \ + _SDT_PROBE(provider, name, 6, (arg1, arg2, arg3, arg4, arg5, arg6)) +#define STAP_PROBE7(provider, name, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ + _SDT_PROBE(provider, name, 7, (arg1, arg2, arg3, arg4, arg5, arg6, arg7)) +#define STAP_PROBE8(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8) \ + _SDT_PROBE(provider, name, 8, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8)) +#define STAP_PROBE9(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9)\ + _SDT_PROBE(provider, name, 9, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9)) +#define STAP_PROBE10(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \ + _SDT_PROBE(provider, name, 10, \ + (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10)) +#define STAP_PROBE11(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \ + _SDT_PROBE(provider, name, 11, \ + (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11)) +#define STAP_PROBE12(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \ + _SDT_PROBE(provider, name, 12, \ + (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12)) + +/* This STAP_PROBEV macro can be used in variadic scenarios, where the + number of probe arguments is not known until compile time. Since + variadic macro support may vary with compiler options, you must + pre-#define SDT_USE_VARIADIC to enable this type of probe. + + The trick to count __VA_ARGS__ was inspired by this post by + Laurent Deniau : + http://groups.google.com/group/comp.std.c/msg/346fc464319b1ee5 + + Note that our _SDT_NARG is called with an extra 0 arg that's not + counted, so we don't have to worry about the behavior of macros + called without any arguments. */ + +#ifdef SDT_USE_VARIADIC +#define _SDT_NARG(...) __SDT_NARG(__VA_ARGS__, 12,11,10,9,8,7,6,5,4,3,2,1,0) +#define __SDT_NARG(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12, N, ...) N +#define _SDT_PROBE_N(provider, name, N, ...) \ + _SDT_PROBE(provider, name, N, (__VA_ARGS__)) +#define STAP_PROBEV(provider, name, ...) \ + _SDT_PROBE_N(provider, name, _SDT_NARG(0, ##__VA_ARGS__), ##__VA_ARGS__) +#endif + +/* These macros are for use in asm statements. You must compile + with -std=gnu99 or -std=c99 to use the STAP_PROBE_ASM macro. + + The STAP_PROBE_ASM macro generates a quoted string to be used in the + template portion of the asm statement, concatenated with strings that + contain the actual assembly code around the probe site. + + For example: + + asm ("before\n" + STAP_PROBE_ASM(provider, fooprobe, %eax 4(%esi)) + "after"); + + emits the assembly code for "before\nafter", with a probe in between. + The probe arguments are the %eax register, and the value of the memory + word located 4 bytes past the address in the %esi register. Note that + because this is a simple asm, not a GNU C extended asm statement, these + % characters do not need to be doubled to generate literal %reg names. + + In a GNU C extended asm statement, the probe arguments can be specified + using the macro STAP_PROBE_ASM_TEMPLATE(n) for n arguments. The paired + macro STAP_PROBE_ASM_OPERANDS gives the C values of these probe arguments, + and appears in the input operand list of the asm statement. For example: + + asm ("someinsn %0,%1\n" // %0 is output operand, %1 is input operand + STAP_PROBE_ASM(provider, fooprobe, STAP_PROBE_ASM_TEMPLATE(3)) + "otherinsn %[namedarg]" + : "r" (outvar) + : "g" (some_value), [namedarg] "i" (1234), + STAP_PROBE_ASM_OPERANDS(3, some_value, some_ptr->field, 1234)); + + This is just like writing: + + STAP_PROBE3(provider, fooprobe, some_value, some_ptr->field, 1234)); + + but the probe site is right between "someinsn" and "otherinsn". + + The probe arguments in STAP_PROBE_ASM can be given as assembly + operands instead, even inside a GNU C extended asm statement. + Note that these can use operand templates like %0 or %[name], + and likewise they must write %%reg for a literal operand of %reg. */ + +#if __STDC_VERSION__ >= 199901L +# define STAP_PROBE_ASM(provider, name, ...) \ + _SDT_ASM_BODY(provider, name, _SDT_ASM_STRING, (__VA_ARGS__)) \ + _SDT_ASM_BASE +# define STAP_PROBE_ASM_OPERANDS(n, ...) _SDT_ASM_OPERANDS_##n(__VA_ARGS__) +#else +# define STAP_PROBE_ASM(provider, name, args) \ + _SDT_ASM_BODY(provider, name, _SDT_ASM_STRING, (args)) \ + _SDT_ASM_BASE +#endif +#define STAP_PROBE_ASM_TEMPLATE(n) _SDT_ASM_TEMPLATE_##n + + +/* DTrace compatible macro names. */ +#define DTRACE_PROBE(provider,probe) \ + STAP_PROBE(provider,probe) +#define DTRACE_PROBE1(provider,probe,parm1) \ + STAP_PROBE1(provider,probe,parm1) +#define DTRACE_PROBE2(provider,probe,parm1,parm2) \ + STAP_PROBE2(provider,probe,parm1,parm2) +#define DTRACE_PROBE3(provider,probe,parm1,parm2,parm3) \ + STAP_PROBE3(provider,probe,parm1,parm2,parm3) +#define DTRACE_PROBE4(provider,probe,parm1,parm2,parm3,parm4) \ + STAP_PROBE4(provider,probe,parm1,parm2,parm3,parm4) +#define DTRACE_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5) \ + STAP_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5) +#define DTRACE_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6) \ + STAP_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6) +#define DTRACE_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7) \ + STAP_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7) +#define DTRACE_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8) \ + STAP_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8) +#define DTRACE_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9) \ + STAP_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9) +#define DTRACE_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10) \ + STAP_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10) +#define DTRACE_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11) \ + STAP_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11) +#define DTRACE_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12) \ + STAP_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12) + + +#endif /* sys/sdt.h */ diff --git a/x86_64-linux-gnu-linux-api-headers/PKGBUILD b/x86_64-linux-gnu-linux-api-headers/PKGBUILD new file mode 100644 index 0000000000..af8b70d94e --- /dev/null +++ b/x86_64-linux-gnu-linux-api-headers/PKGBUILD @@ -0,0 +1,29 @@ +# Maintainer: Xiaotian Wu + +_target_arch=x86 +_target=x86_64-linux-gnu +pkgname=$_target-linux-api-headers +pkgver=6.0 +pkgrel=1 +pkgdesc="Kernel headers sanitized for use in userspace ($_target)" +arch=(any) +url='https://www.kernel.org' +license=(GPL2) +makedepends=(rsync) +source=(https://www.kernel.org/pub/linux/kernel/v6.x/linux-$pkgver.tar.{xz,sign}) +sha256sums=('5c2443a5538de52688efb55c27ab0539c1f5eb58c0cfd16a2b9fbb08fd81788e' + 'SKIP') +validpgpkeys=( + 'ABAF11C65A2970B130ABE3C479BE3E4300411886' # Linus Torvalds + '647F28654894E3BD457199BE38DBBDC86092693E' # Greg Kroah-Hartman +) + + +package() { + cd linux-$pkgver + + make INSTALL_HDR_PATH="$pkgdir/usr/$_target/" ARCH=$_target_arch V=0 headers_install + + # clean-up unnecessary files generated during install + find "$pkgdir" \( -name .install -or -name ..install.cmd \) -delete +} diff --git a/xdg-desktop-portal-wlr/PKGBUILD b/xdg-desktop-portal-wlr/PKGBUILD index 7834b50745..f5923ac7ad 100644 --- a/xdg-desktop-portal-wlr/PKGBUILD +++ b/xdg-desktop-portal-wlr/PKGBUILD @@ -11,7 +11,7 @@ arch=('loong64' 'x86_64') license=('MIT') provides=('xdg-desktop-portal-impl') depends=('xdg-desktop-portal' 'pipewire' 'pipewire-session-manager' 'libinih') -makedepends=('meson' 'wayland-protocols' 'wayland' 'scdoc') +makedepends=('meson' 'wayland-protocols' 'wayland' 'scdoc' 'mesa') optdepends=( 'slurp: to choose which output to screencast using slurp' 'wofi: to choose which output to screencast using wofi' diff --git a/xf86-video-loongson/PKGBUILD b/xf86-video-loongson/PKGBUILD new file mode 100644 index 0000000000..9b4078f5c2 --- /dev/null +++ b/xf86-video-loongson/PKGBUILD @@ -0,0 +1,46 @@ +# Maintainer: Jan de Groot +# Contributor: Alexander Baldeck + +pkgname=xf86-video-loongson +pkgver=0.2.0 +pkgrel=1 +epoch=1 +pkgdesc="X.org loongson video driver" +arch=('loong64' 'x86_64') +url="https://xorg.freedesktop.org/" +license=('custom') +depends=('systemd-libs' 'mesa') +makedepends=('xorg-server-devel' 'systemd' 'X-ABI-VIDEODRV_VERSION=25.2') +conflicts=('xorg-server<1.20.0' 'X-ABI-VIDEODRV_VERSION<25' 'X-ABI-VIDEODRV_VERSION>=26') +groups=('xorg-drivers') +source=(${url}/releases/individual/driver/${pkgname}-${pkgver}.tar.gz) +sha512sums=('bdc6601c45f2e228f374dee36085c9fdea52cd2c95dd67d21d49b3f61daf38eaf04775478215ac472447edd70def661093b8385de3741f97aa670c2fc4a3ad01') + +build() { + cd ${pkgname}-${pkgver} + + CFLAGS+=' -fcommon' # https://wiki.gentoo.org/wiki/Gcc_10_porting_notes/fno_common + + # Since pacman 5.0.2-2, hardened flags are now enabled in makepkg.conf + # With them, module fail to load with undefined symbol. + # See https://bugs.archlinux.org/task/55102 / https://bugs.archlinux.org/task/54845 + export CFLAGS=${CFLAGS/-fno-plt} + export CXXFLAGS=${CXXFLAGS/-fno-plt} + export LDFLAGS=${LDFLAGS/,-z,now} + + ./configure --prefix=/usr + make +} + +check() { + cd ${pkgname}-${pkgver} + make check +} + +package() { + cd ${pkgname}-${pkgver} + + make "DESTDIR=${pkgdir}" install + install -m755 -d "${pkgdir}/usr/share/licenses/${pkgname}" + install -m644 COPYING "${pkgdir}/usr/share/licenses/${pkgname}/" +} diff --git a/xorg-server/0001-modesetting-match-against-Multimedia-Video-Controlle.patch b/xorg-server/0001-modesetting-match-against-Multimedia-Video-Controlle.patch new file mode 100644 index 0000000000..8eaa54598a --- /dev/null +++ b/xorg-server/0001-modesetting-match-against-Multimedia-Video-Controlle.patch @@ -0,0 +1,32 @@ +From 4eda654b6099981294b35bd93c1e4e92e71f376a Mon Sep 17 00:00:00 2001 +From: "Liu, Chang" +Date: Wed, 8 Nov 2023 13:02:10 +0800 +Subject: [PATCH] modesetting: match against Multimedia Video Controllers as + well + +Some GPU devices such as those found in the Loongson 7A2000 bridge +chips and 2K2000 SOCs identify as Multimedia Video Controllers +(PCI class 0x4 subclass 0x0). These have standard KMS drivers in +the kernel and the modesetting driver works flawlessly, so match +against these types of devices as well. +--- + hw/xfree86/drivers/modesetting/driver.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/xfree86/drivers/modesetting/driver.c b/hw/xfree86/drivers/modesetting/driver.c +index 9a69452bd..69a2b683f 100644 +--- a/hw/xfree86/drivers/modesetting/driver.c ++++ b/hw/xfree86/drivers/modesetting/driver.c +@@ -96,6 +96,9 @@ static const struct pci_id_match ms_device_match[] = { + { + PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY, + 0x00030000, 0x00ff0000, 0}, ++ { ++ PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY, ++ 0x00040000, 0x00ff0000, 0}, + + {0, 0, 0}, + }; +-- +2.42.0 + diff --git a/xorg-server/10-modeset.conf b/xorg-server/10-modeset.conf new file mode 100644 index 0000000000..d9bf1c0c32 --- /dev/null +++ b/xorg-server/10-modeset.conf @@ -0,0 +1,6 @@ +Section "Device" + Identifier "Generic Kernel Modesetting Device" + Driver "modesetting" + Option "kmsdev" "/dev/dri/card0" + Option "ShadowFB" "true" +EndSection diff --git a/xorg-server/PKGBUILD b/xorg-server/PKGBUILD index 8cc685bd18..c8bc25ee3d 100644 --- a/xorg-server/PKGBUILD +++ b/xorg-server/PKGBUILD @@ -20,6 +20,8 @@ makedepends=('xorgproto' 'pixman' 'libx11' 'mesa' 'mesa-libgl' 'xtrans' source=(https://xorg.freedesktop.org/releases/individual/xserver/${pkgbase}-${pkgver}.tar.xz{,.sig} xvfb-run # with updates from FC master xvfb-run.1 + 10-modeset.conf + 0001-modesetting-match-against-Multimedia-Video-Controlle.patch ) validpgpkeys=('3C2C43D9447D5938EF4551EBE23B7E70B467F0BF' # Peter Hutterer (Who-T) '67DC86F2623FC5FD4BB5225D14706DBE1E4B4540' # Olivier Fourdan @@ -31,6 +33,11 @@ sha512sums=('ad5edacbe8c7e2ebe6b4a690af94c7ea5ebc781d00b0e58ae2d273c78ceee2fa00b prepare() { cd ${pkgbase}-$pkgver + + # FS#73274 + patch -Np1 -i ../xephyr_Dont_check_for_SeatId_anymore.patch + # fix modesetting driver for loongson and gsgpu + patch -Np1 -i ../0001-modesetting-match-against-Multimedia-Video-Controlle.patch } build() { @@ -50,6 +57,7 @@ build() { -D xephyr=true \ -D glamor=true \ -D udev=true \ + -D udev_kms=true \ -D dtrace=false \ -D systemd_logind=true \ -D suid_wrapper=true \ @@ -111,6 +119,7 @@ package_xorg-server() { # distro specific files must be installed in /usr/share/X11/xorg.conf.d install -m755 -d "${pkgdir}/etc/X11/xorg.conf.d" + install -m644 -Dt "${pkgdir}/usr/share/X11/xorg.conf.d" 10-modeset.conf # license install -m644 -Dt "${pkgdir}/usr/share/licenses/${pkgname}" "${pkgbase}-${pkgver}"/COPYING diff --git a/xsd/0120-g++10.patch b/xsd/0120-g++10.patch new file mode 100644 index 0000000000..441b27897c --- /dev/null +++ b/xsd/0120-g++10.patch @@ -0,0 +1,19 @@ +Description: Fix FTBFS with gcc-10 +Author: Boris Kolpackov +Origin: upstream, https://git.codesynthesis.com/cgit/libxsd-frontend/libxsd-frontend/commit/?id=5029f8665190879285787a9dcdaf5f997cadd2e2 +Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=957999 +Last-Update: 2020-09-10 +--- +This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ +Index: trunk/libxsd-frontend/xsd-frontend/semantic-graph/elements.cxx +=================================================================== +--- trunk.orig/libxsd-frontend/xsd-frontend/semantic-graph/elements.cxx ++++ trunk/libxsd-frontend/xsd-frontend/semantic-graph/elements.cxx +@@ -3,6 +3,7 @@ + // license : GNU GPL v2 + exceptions; see accompanying LICENSE file + + #include ++#include + + #include + diff --git a/xsd/xsd-c++17.patch b/xsd/xsd-c++17.patch new file mode 100644 index 0000000000..6db0b25a20 --- /dev/null +++ b/xsd/xsd-c++17.patch @@ -0,0 +1,48 @@ +Index: xsd-4.0.0+dep/libcutl/cutl/shared-ptr/base.cxx +=================================================================== +--- xsd-4.0.0+dep.orig/libcutl/cutl/shared-ptr/base.cxx ++++ xsd-4.0.0+dep/libcutl/cutl/shared-ptr/base.cxx +@@ -25,7 +25,7 @@ namespace cutl + // + // + void* +-operator new (size_t n, cutl::share s) throw (std::bad_alloc) ++operator new (size_t n, cutl::share s) noexcept(false) + { + if (s == shared) + { +Index: xsd-4.0.0+dep/libcutl/cutl/shared-ptr/base.hxx +=================================================================== +--- xsd-4.0.0+dep.orig/libcutl/cutl/shared-ptr/base.hxx ++++ xsd-4.0.0+dep/libcutl/cutl/shared-ptr/base.hxx +@@ -31,7 +31,7 @@ extern LIBCUTL_EXPORT cutl::share shared + extern LIBCUTL_EXPORT cutl::share exclusive; + + LIBCUTL_EXPORT void* +-operator new (std::size_t, cutl::share) throw (std::bad_alloc); ++operator new (std::size_t, cutl::share) noexcept(false); + + LIBCUTL_EXPORT void + operator delete (void*, cutl::share) throw (); +@@ -61,7 +61,7 @@ namespace cutl + _ref_count () const; + + void* +- operator new (std::size_t, share) throw (std::bad_alloc); ++ operator new (std::size_t, share) noexcept(false); + + void + operator delete (void*, share) throw (); +Index: xsd-4.0.0+dep/libcutl/cutl/shared-ptr/base.ixx +=================================================================== +--- xsd-4.0.0+dep.orig/libcutl/cutl/shared-ptr/base.ixx ++++ xsd-4.0.0+dep/libcutl/cutl/shared-ptr/base.ixx +@@ -59,7 +59,7 @@ namespace cutl + } + + inline void* shared_base:: +- operator new (std::size_t n, share) throw (std::bad_alloc) ++ operator new (std::size_t n, share) noexcept(false) + { + return ::operator new (n); + } diff --git a/xsv/PKGBUILD b/xsv/PKGBUILD index 1fac1b7130..534dcf8217 100644 --- a/xsv/PKGBUILD +++ b/xsv/PKGBUILD @@ -23,7 +23,7 @@ pkgver() { prepare() { cd xsv - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/yaegi/PKGBUILD b/yaegi/PKGBUILD index f1894a36f8..62bf9aee55 100644 --- a/yaegi/PKGBUILD +++ b/yaegi/PKGBUILD @@ -18,7 +18,7 @@ build() { export CGO_CFLAGS="${CFLAGS}" export CGO_CXXFLAGS="${CXXFLAGS}" export CGO_LDFLAGS="${LDFLAGS}" - export GOFLAGS="-buildmode=pie -trimpath -mod=readonly -modcacherw -ldflags=-linkmode=external" + export GOFLAGS="-trimpath -mod=readonly -modcacherw -ldflags=-linkmode=external" go build -v ./cmd/yaegi } diff --git a/yazi/PKGBUILD b/yazi/PKGBUILD index 9ec862be5d..8938622e04 100644 --- a/yazi/PKGBUILD +++ b/yazi/PKGBUILD @@ -27,7 +27,7 @@ options=('!lto') prepare() { cd "$srcdir/$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/zbus_xmlgen/PKGBUILD b/zbus_xmlgen/PKGBUILD index ae3ce4b3de..d7105fe894 100644 --- a/zbus_xmlgen/PKGBUILD +++ b/zbus_xmlgen/PKGBUILD @@ -28,7 +28,7 @@ prepare() { cd $_project_name-$pkgname-$pkgver/$pkgname export RUSTUP_TOOLCHAIN=stable - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/zellij/PKGBUILD b/zellij/PKGBUILD index 98ff84fbed..8c31661200 100644 --- a/zellij/PKGBUILD +++ b/zellij/PKGBUILD @@ -16,7 +16,7 @@ options=('!lto') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/zenith/PKGBUILD b/zenith/PKGBUILD index cfc8248459..5d3858342a 100644 --- a/zenith/PKGBUILD +++ b/zenith/PKGBUILD @@ -16,7 +16,7 @@ sha256sums=('2cbcea2625cfa97c161b974ad412a47e330f7fd31bec0479e329ed3606cfc569') prepare() { cd "$pkgname-$pkgver" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/zip/PKGBUILD b/zip/PKGBUILD index 201ddb348f..a727417948 100644 --- a/zip/PKGBUILD +++ b/zip/PKGBUILD @@ -25,6 +25,8 @@ options=('!makeflags') prepare() { cd "${srcdir}/${pkgname}${_pkgver}" + CFLAGS=${CFLAGS/-Wformat -Werror=format-security/} + CXXFLAGS=${CXXFLAGS/-Wformat -Werror=format-security/} sed -e "/^CFLAGS_NOOPT =/s/\$/ $CPPFLAGS $CFLAGS/" -i unix/Makefile sed -e "s/^LFLAGS1=''/LFLAGS1=$LDFLAGS/" -i unix/configure patch -p1 -i ../zip-3.0-currdir.patch diff --git a/zola/PKGBUILD b/zola/PKGBUILD index 9d86e9a119..646d503e4e 100644 --- a/zola/PKGBUILD +++ b/zola/PKGBUILD @@ -15,7 +15,7 @@ sha256sums=('c0e1711a68bc005c2e0ecc76a468f3459739c9e54af34850cb725d04391e19b5') prepare() { cd zola-$pkgver - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/zoxide/PKGBUILD b/zoxide/PKGBUILD index 9ef8dbe036..4cb5fb27ca 100644 --- a/zoxide/PKGBUILD +++ b/zoxide/PKGBUILD @@ -27,7 +27,7 @@ pkgver() { prepare() { cd "$pkgname" - cargo fetch --locked --target "$CARCH-unknown-linux-gnu" + cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" } build() { diff --git a/zram-generator/PKGBUILD b/zram-generator/PKGBUILD index 2563aeb42d..72158f3ad8 100644 --- a/zram-generator/PKGBUILD +++ b/zram-generator/PKGBUILD @@ -15,6 +15,12 @@ sha256sums=('506d47acbabffa7013bb40a1f61c6edfa758a7bd55820d06ef49c7bc83dba762') build() { cd zram-generator-$pkgver + find -name Cargo.lock -exec rm -f {} \; + mkdir .cargo + cat > .cargo/config.toml < Date: Mon, 19 Feb 2024 12:00:04 +0800 Subject: [PATCH 02/23] lua-bit32 --- lua-bit32/PKGBUILD | 2 +- ocaml/{ocaml-5.0.0-la64.patch => ocaml-5.1.0-la64.patch} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename ocaml/{ocaml-5.0.0-la64.patch => ocaml-5.1.0-la64.patch} (100%) diff --git a/lua-bit32/PKGBUILD b/lua-bit32/PKGBUILD index a747de9216..c0d2d5e395 100644 --- a/lua-bit32/PKGBUILD +++ b/lua-bit32/PKGBUILD @@ -21,7 +21,7 @@ makedepends=(lua options=(debug) _archive="$_project-$_compatrel" _rockspec="$_rockname-${pkgver%_*}-$_rockrel.rockspec" -_rock="$_rockname-${pkgver%_*}-$_rockrel.linux-$CARCH.rock" +_rock="$_rockname-${pkgver%_*}-$_rockrel.linux-`uname -m`.rock" source=("$url/archive/v$_compatrel/$_archive.tar.gz" "${url/github/raw.githubusercontent}/1e31f3ddc517b4e521c73f7d6eaecd5e1787daa6/rockspecs/$_rockspec") sha256sums=('d1ed32f091856f6fffab06232da79c48b437afd4cd89e5c1fc85d7905b011430' diff --git a/ocaml/ocaml-5.0.0-la64.patch b/ocaml/ocaml-5.1.0-la64.patch similarity index 100% rename from ocaml/ocaml-5.0.0-la64.patch rename to ocaml/ocaml-5.1.0-la64.patch From c2a993d62618646c749a22fcd25b20264d3c3be8 Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Mon, 19 Feb 2024 14:49:41 +0800 Subject: [PATCH 03/23] boost --- boost/PKGBUILD | 7 ++--- boost/boost-1.79.0-la64.patch | 55 ----------------------------------- 2 files changed, 2 insertions(+), 60 deletions(-) delete mode 100644 boost/boost-1.79.0-la64.patch diff --git a/boost/PKGBUILD b/boost/PKGBUILD index 8cc5cad354..5aae060550 100644 --- a/boost/PKGBUILD +++ b/boost/PKGBUILD @@ -21,13 +21,11 @@ makedepends=('icu' 'python' 'python-numpy' 'bzip2' 'zlib' 'openmpi' 'zstd') source=(https://boostorg.jfrog.io/artifactory/main/release/$pkgver/source/$_srcname.tar.bz2 boost-1.81.0-phoenix-multiple-definitions.patch $pkgname-support-fn.contains-f-where-f-is-a-function.patch::https://github.com/boostorg/function/commit/7ca2310b15e3.patch - $pkgname-ublas-c++20-iterator.patch::https://github.com/boostorg/ublas/commit/a31e5cffa85f.patch - boost-1.79.0-la64.patch) + $pkgname-ublas-c++20-iterator.patch::https://github.com/boostorg/ublas/commit/a31e5cffa85f.patch) sha256sums=('6478edfe2f3305127cffe8caf73ea0176c53769f4bf1585be237eb30798c3b8e' '3ebf428ef6be090a7b56a233330375539ac429333b83708e28fe5db049cfecdb' '1b5998ee8fb389dd6df55a3684d29ffa37246bc007e8e6712bf2be6c7f745036' - 'aa38addb40d5f44b4a8472029b475e7e6aef1c460509eb7d8edf03491dc1b5ee' - '0fb9188bf211deff0d48dfb7cef614bbdebcd7dccea6e8c015da5d691eda5d94') + 'aa38addb40d5f44b4a8472029b475e7e6aef1c460509eb7d8edf03491dc1b5ee') prepare() { cd $_srcname @@ -42,7 +40,6 @@ prepare() { # https://github.com/boostorg/ublas/pull/97 patch -Np2 -i ../$pkgname-ublas-c++20-iterator.patch - patch -Np1 -i $srcdir/boost-1.79.0-la64.patch } build() { diff --git a/boost/boost-1.79.0-la64.patch b/boost/boost-1.79.0-la64.patch deleted file mode 100644 index 2b9602a900..0000000000 --- a/boost/boost-1.79.0-la64.patch +++ /dev/null @@ -1,55 +0,0 @@ -diff --git a/boostcpp.jam b/boostcpp.jam -index 082536e2a5..7565dae80d 100644 ---- a/boostcpp.jam -+++ b/boostcpp.jam -@@ -634,7 +634,7 @@ rule address-model ( ) - return @boostcpp.deduce-address-model ; - } - --local deducable-architectures = arm mips1 power riscv s390x sparc x86 combined ; -+local deducable-architectures = arm loongarch mips1 power riscv s390x sparc x86 combined ; - feature.feature deduced-architecture : $(deducable-architectures) : propagated optional composite hidden ; - for a in $(deducable-architectures) - { -@@ -645,9 +645,10 @@ rule deduce-architecture ( properties * ) - { - local result ; - local filtered = [ toolset-properties $(properties) ] ; -- local names = arm mips1 power riscv s390x sparc x86 combined ; -+ local names = arm loongarch mips1 power riscv s390x sparc x86 combined ; - local idx = [ configure.find-builds "default architecture" : $(filtered) - : /boost/architecture//arm -+ : /boost/architecture//loongarch - : /boost/architecture//mips1 - : /boost/architecture//power - : /boost/architecture//riscv -Submodule libs/config 08dced51e9..5c177b2269: -diff --git a/libs/config/checks/architecture/Jamfile.jam b/libs/config/checks/architecture/Jamfile.jam -index 2ba54f9a..e8838b41 100644 ---- a/libs/config/checks/architecture/Jamfile.jam -+++ b/libs/config/checks/architecture/Jamfile.jam -@@ -18,6 +18,7 @@ obj 64 : 64.cpp ; - - obj arm : arm.cpp ; - obj combined : combined.cpp ; -+obj loongarch : loongarch.cpp ; - obj mips : mips.cpp ; - alias mips1 : mips ; # Backwards compatibility - obj power : power.cpp ; -diff --git a/libs/config/checks/architecture/loongarch.cpp b/libs/config/checks/architecture/loongarch.cpp -new file mode 100644 -index 00000000..5be8cb09 ---- /dev/null -+++ b/libs/config/checks/architecture/loongarch.cpp -@@ -0,0 +1,11 @@ -+// loongarch.cpp -+// -+// Copyright (c) 2012 Steven Watanabe -+// -+// Distributed under the Boost Software License Version 1.0. (See -+// accompanying file LICENSE_1_0.txt or copy at -+// http://www.boost.org/LICENSE_1_0.txt) -+ -+#if !defined(__loongarch__) -+#error "Not LoongArch" -+#endif From b75f28c67a2a1e41b3c379e437588b21e5ee0fbc Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Mon, 19 Feb 2024 16:34:02 +0800 Subject: [PATCH 04/23] xorg-server --- xorg-server/PKGBUILD | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/xorg-server/PKGBUILD b/xorg-server/PKGBUILD index c8bc25ee3d..bc969a1560 100644 --- a/xorg-server/PKGBUILD +++ b/xorg-server/PKGBUILD @@ -29,13 +29,12 @@ validpgpkeys=('3C2C43D9447D5938EF4551EBE23B7E70B467F0BF' # Peter Hutterer (Who- sha512sums=('ad5edacbe8c7e2ebe6b4a690af94c7ea5ebc781d00b0e58ae2d273c78ceee2fa00b86d10479ad69da1b3233490619bae5a33db64c967c24bbfc5d5d39ddce1cb' 'SKIP' '672375cb5028ba9cda286e317d17bd8c9a9039483e7f79c21f223fd08ba07655729e9f59a082f4b8f5d8de45a77a9e9affce1002fb8c6657e26ef1a490654e49' - 'de5e2cb3c6825e6cf1f07ca0d52423e17f34d70ec7935e9dd24be5fb9883bf1e03b50ff584931bd3b41095c510ab2aa44d2573fd5feaebdcb59363b65607ff22') + 'de5e2cb3c6825e6cf1f07ca0d52423e17f34d70ec7935e9dd24be5fb9883bf1e03b50ff584931bd3b41095c510ab2aa44d2573fd5feaebdcb59363b65607ff22' + '1aa711f4948cd1557d77bd47a64ea92be55cf737b8204214c6c3ae2ecd00dc6928fd7a789d1aa5faaff5d6162f895a41efd332e8f1710d0a7c9b33326b057ec5' + '0e55cc994dd8f1309c6ea40cb1f5b2d763d850a466c9d30bd8619fd68ce369b19f098d28ab684db1bd2758a6402654cf93188651e863f0e1be4eed8cf9b40e14') prepare() { cd ${pkgbase}-$pkgver - - # FS#73274 - patch -Np1 -i ../xephyr_Dont_check_for_SeatId_anymore.patch # fix modesetting driver for loongson and gsgpu patch -Np1 -i ../0001-modesetting-match-against-Multimedia-Video-Controlle.patch } From e89488ef83290abafc3803b2dffd32d6503686c3 Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Tue, 20 Feb 2024 19:37:42 +0800 Subject: [PATCH 05/23] libjxl --- libjxl/PKGBUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libjxl/PKGBUILD b/libjxl/PKGBUILD index c491dde5c1..81f977eb81 100644 --- a/libjxl/PKGBUILD +++ b/libjxl/PKGBUILD @@ -82,7 +82,7 @@ package_libjxl() { DESTDIR="$pkgdir" cmake --install build install -D -m644 libjxl/{LICENSE,PATENTS} -t "${pkgdir}/usr/share/licenses/${pkgname}" -# mv "${pkgdir}/usr/share/java"/{org.jpeg.jpegxl,jpegxl}.jar + mv "${pkgdir}/usr/share/java"/{org.jpeg.jpegxl,jpegxl}.jar } package_libjxl-doc() { From 11f7d223c21e7932fcdba090d32c4b4fdf27ebe0 Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Tue, 20 Feb 2024 21:07:13 +0800 Subject: [PATCH 06/23] mediainfo --- mediainfo/PKGBUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mediainfo/PKGBUILD b/mediainfo/PKGBUILD index e9f6488e15..6fa948cb16 100644 --- a/mediainfo/PKGBUILD +++ b/mediainfo/PKGBUILD @@ -14,8 +14,8 @@ license=('BSD') depends=("libmediainfo=$pkgver") makedepends=('wxwidgets-gtk3') source=("$pkgname-$pkgver.tar.gz::https://github.com/MediaArea/MediaInfo/archive/v$pkgver.tar.gz") -sha512sums=('e5adb989bd9686d64c952794155993d7e7d9b0d81500e99cd9b7af61258bb71639ac1aa913dcd36ba51dca47e81ab8244de2bff37b603bd461cf13c42f59487a') -b2sums=('9f0a1d1c0f9540be16963fb8f8d363f071efa25fcb2deb2494481d0eca58db79a29a6c5f8cc22eb01df30626ce44da7a0d921de24f6553e809e2588009445fa1') +sha512sums=('33747cacd0657b67e7bc63596cb58cb4a8c6c2cab50c70b3075c1ffd12a9d165121a1dbcb2114a1e822273807341faa436181c3a0c3dfb1d98e45f120052d720') +b2sums=('33b19a211b500caa0cb1af4ba17dfea70ffa1b169f7ff7a903614b53a08c54f6f8216f8a2e2bfffe02eef8733c741bc10af928111d336675634ce95418a5405d') build() { cd "MediaInfo-$pkgver" From ae552bb24894b8af24540f3781777644e0b90d18 Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Thu, 22 Feb 2024 11:44:00 +0800 Subject: [PATCH 07/23] v2ray --- v2ray/PKGBUILD | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/v2ray/PKGBUILD b/v2ray/PKGBUILD index 87f2774636..6cbaf8d418 100644 --- a/v2ray/PKGBUILD +++ b/v2ray/PKGBUILD @@ -27,8 +27,7 @@ build() { export CGO_CFLAGS="${CFLAGS}" export CGO_CPPFLAGS="${CPPFLAGS}" export GOPROXY=https://goproxy.cn - go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664 - go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305 + go mod edit -replace=github.com/quic-go/quic-go=github.com/quic-go/quic-go@v0.41.0 go mod tidy go build -o v2ray ./main } From 0dd9a83b4d25c32ad2f23bafa1d7caac51970998 Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Thu, 22 Feb 2024 19:26:05 +0800 Subject: [PATCH 08/23] cargo-c --- cargo-c/PKGBUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cargo-c/PKGBUILD b/cargo-c/PKGBUILD index ac384b4564..b27ca3b58f 100644 --- a/cargo-c/PKGBUILD +++ b/cargo-c/PKGBUILD @@ -20,7 +20,7 @@ sha256sums=('a52bb78cf6db00aa1caf06c679cfece27357c84367d8ac167d715e05e5f5a778' prepare() { ln -sf "../${pkgname}-${pkgver}.Cargo.lock" "${pkgname}-${pkgver}/Cargo.lock" - cargo fetch --locked --manifest-path="${pkgname}-${pkgver}/Cargo.toml" + cargo fetch --locked --target "$(rustc -vV | sed -n 's/host: //p')" --manifest-path="${pkgname}-${pkgver}/Cargo.toml" } build() { From 5ca49dbd14ca2e3d56c4ae887cbef3244eb07960 Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Thu, 7 Mar 2024 12:52:03 +0800 Subject: [PATCH 09/23] edk2 --- edk2/60-edk2-loongarch64.json | 5 ++-- edk2/PKGBUILD | 14 ++++------- edk2/relax_edk2_gcc14.diff | 44 ----------------------------------- 3 files changed, 8 insertions(+), 55 deletions(-) delete mode 100644 edk2/relax_edk2_gcc14.diff diff --git a/edk2/60-edk2-loongarch64.json b/edk2/60-edk2-loongarch64.json index 4806bee426..b998701d8c 100644 --- a/edk2/60-edk2-loongarch64.json +++ b/edk2/60-edk2-loongarch64.json @@ -5,6 +5,7 @@ ], "mapping": { "device": "flash", + "mode": "split", "executable": { "filename": "/usr/share/edk2/loongarch64/QEMU_CODE.fd", "format": "raw" @@ -18,12 +19,12 @@ { "architecture": "loongarch64", "machines": [ - "virt-*" + "virt" ] } ], "features": [ - "verbose-static" + "acpi" ], "tags": [ diff --git a/edk2/PKGBUILD b/edk2/PKGBUILD index e684ce9b15..ef009286e7 100644 --- a/edk2/PKGBUILD +++ b/edk2/PKGBUILD @@ -30,7 +30,7 @@ makedepends=( options=(!makeflags) source=( git+$url#tag=$_commit - edk2-platforms::git+https://github.com/tianocore/edk2-platforms.git + edk2-platforms::git+https://github.com/tianocore/edk2-platforms.git#tag=4e34823e9c954c67a7b5b81799549d493d1235fa $pkgbase-softfloat::git+https://github.com/ucb-bar/berkeley-softfloat-3.git https://www.openssl.org/source/openssl-$_openssl_ver.tar.gz{,.asc} https://github.com/google/brotli/archive/v$_brotli_ver/brotli-$_brotli_ver.tar.gz @@ -59,7 +59,6 @@ source=( 82-edk2-ovmf-ia32-on-x86_64-csm-4m.json $pkgbase-202202-brotli.patch edk2-use-env-toolchains.patch - relax_edk2_gcc14.diff ) sha512sums=('SKIP' 'SKIP' @@ -79,7 +78,7 @@ sha512sums=('SKIP' 'b17d3ff5c9230c394ca4ee8229842c801b0cab3d88b546f2094dd0b42f2bc535f5bda3f9faee4b5418482185887648f906daaf0b7307c4c19747f5f0ab504f9a' '126822ef6198e87fb38014a5ba21969c9a163b41df3cdef6825317971ecc8df4a63099113e687634b88648acc93f24917d729e1c44295d2df7012288740307d3' 'bbf663d539a985504d5fbc95552a2a60ac860a6bce4a62ecc551292d838b41cba3b5203f580a76a05e9f862ef98e7a3e5da39505c1f39d8ef48c08778fac584a' - 'a6af6487c470e9af86022ee44ea53d2a0f513cacc413c9cc61eeec80ee2d1569daddf311ce8ccf4660d7899d7dd8119f1ef74953d91462c949c4d7ce8c129f16' + '66a0d97f0cc8b4c2184f235ace4fd372efa2cfa175178a05b40053c5e4ef887487e11813d3ff7193c4554c8841166c73dc4f930cb6ae04cd28125e51631d0a69' 'b5829aaf5ebae0073de26695eddbda61d117fbfb5e3c9f169fade31127ceb9bbc332af760bf6033d90a277d44c095fc30fe0d69defd81fb1aaf82cba0cf6fb90' '2e03935b57fabbbac4493ba6d54ac5b68abfd75775a56c95f5ba8c4627ba38260a3691a335e597c65096c50ce5038389efbb41ef5822a1ff49a8f312d8e37f75' '6e91029d451c9d43c1488ce0e252e6abc18fb1da48b6938d6ae3644fce58c97da6fff6addc60740b1b9ed5e6b86e9d7e94ee0dd55ea73833a82401b4c6f8c936' @@ -91,8 +90,7 @@ sha512sums=('SKIP' '692e5bdefb61ae7b8d6e2063f163e2b68136b2522d606806766186f10c5fae1f7583fd83cda52c235d0d8eb0651e5a711f505021a8d8d949d8dccfce7f0c82ac' 'c699ad500f24569643a4581f4bb5be0e4a90d160f0b3ae7728cf8e27b39665983b80439ca7b853b1bd9a174c8c123cbaf7ed3cd4a17d6460f4fec670c62a1183' 'd074c794796d17d77eed7c34201d93d7ef3f1322fe1ea4a2ddd7137fae884d49f94f465ee39cfd8346b026142668a41f5a8671e521409505dd6d002f71c0eebc' - '94d889b4bc1dacf6ab4543b5e6ac7f99a5ec71f0362577df9e49f6ed0af4275455d8fb2f1ce8c279b7e4ce8a24cafa2006446f9fe793ab60680dcd328abf0429' - '19c992e3c4d99a5335666e40b6619d3c701359db502722950217e7916ecb14901a5d14c400c8e871d91fe1477ab71849697bdad06014df3b13a59cbadfb0bf1d') + '94d889b4bc1dacf6ab4543b5e6ac7f99a5ec71f0362577df9e49f6ed0af4275455d8fb2f1ce8c279b7e4ce8a24cafa2006446f9fe793ab60680dcd328abf0429') b2sums=('SKIP' 'SKIP' 'SKIP' @@ -111,7 +109,7 @@ b2sums=('SKIP' '01dbc4cad102535504eace2d9da225a481b62785d37365f1dea2d1210990ca6177485aa0134a074c09d253b539f12ae810706a77a46779ddb7dd4f1b9b934011' 'f84ff505702e4b2a38b6fd23fbb732c25d3102a04bb6918b0cc3b3d7528a92626324199cea4ed91955aade98f308f1d1037255f26cc9ee21ace75fc6376e7df6' '04a7eb373d6ea1415d7cd6e8dea0d16b75cbb1fb88572a30b8ce9960dd0404adc7f25fce2ccfb103eb09405411dc4d4e0084236e4c814916d81e957dc6aedfd4' - 'e869e1c5751a1691598c3247dcfbcadc6652ecc27cc26dc66cd9e1f7336ea7c4b5e757892137a259b50441d86b83939f01943113787bedb4318a42657dcaac59' + 'e273fb4d50a98e8dcbc6e439579508d03f38e64fc69e5671e9a01a0e6591b0241cedfe07f9fe9eaedca07c96a118c1ba7bf5556e51888826874c7e0386446cc2' 'b4fcc2351b2d77b85cdce35180353aef06900af1554479853bf915d27a756d4bbed50a50e85b72e2e7f4868e6dec3b9c5b27f743d7c112e24e4e0c50cd103a33' '1783b83c6e39c99feb59043c3cff48b24bef55d43949cd9a3097dfbee73a6cf511c180d610a52de876ccea9833fec46d7a88ebce8114e54620b9988232fb9bcc' '1d76eda20067c1bb9928b0304244ab5770a9c4e1f401a74d51da31a47f3a5d6e1e64b5394768cdae6a5bc396b68b6a32eb1a407e1c6377461dd2d5f2f5a2538d' @@ -123,8 +121,7 @@ b2sums=('SKIP' '0ad956e3e662909abafd0b9a2b7ef12e35a8832183cb41e17dcafaa4f5db1e47ef20b3040268644daebb24f66c18b99de07f41e7d62089691c07de688a08f05a' 'a44b5ffc35d78925ac7362ec2cf75475d02e05ed0b9e8771c909d090187aaff7436e8d856d58b8a56827990006b813c63318b60a8a7780844c829a2b13a502cf' '644c071dc4fbbccaa64b0b1babcad60395ffce1a7a317a6f5380eff44cbb886be5f29156a8e967ab02b508a33954fcf5602606b43362cc3bb1936a8cfc3a3c07' - '8c2e7b07d669e97388567f6a2d449a3f33fe98823d7a0807882b4a72f5d22c8e9e3141f0009abf2b398adb95562f8e281e6d53041e0c2ac36f178a320d5be55a' - '02869de544482f0d4d8a796cb94fc76491c49f00ab361218fd2bb6b480a974c8ce3e8706ab70f3d977724491db47bccfc0b7947edae1e57f46212ebf127750d6') + '8c2e7b07d669e97388567f6a2d449a3f33fe98823d7a0807882b4a72f5d22c8e9e3141f0009abf2b398adb95562f8e281e6d53041e0c2ac36f178a320d5be55a') validpgpkeys=( 8657ABB260F056B1E5190839D9C4D26D0E604491 # Matt Caswell 7953AC1FBC3DC8B3B292393ED5E9E43F7DF9EE8C # Richard Levitte @@ -143,7 +140,6 @@ prepare() { # patch to be able to use brotli 1.0.9 patch -Np1 -d $pkgbase -i ../$pkgbase-202202-brotli.patch patch -Np1 -d $pkgbase -i ../edk2-use-env-toolchains.patch - patch -Np1 -d $pkgbase -i ../relax_edk2_gcc14.diff cd $pkgbase diff --git a/edk2/relax_edk2_gcc14.diff b/edk2/relax_edk2_gcc14.diff deleted file mode 100644 index 35901ff55b..0000000000 --- a/edk2/relax_edk2_gcc14.diff +++ /dev/null @@ -1,44 +0,0 @@ -diff --git a/BaseTools/Source/C/GenFw/Elf64Convert.c b/BaseTools/Source/C/GenFw/Elf64Convert.c -index d53ecb1767..8018d68db1 100644 ---- a/BaseTools/Source/C/GenFw/Elf64Convert.c -+++ b/BaseTools/Source/C/GenFw/Elf64Convert.c -@@ -1778,7 +1778,11 @@ WriteSections64 ( - case R_LARCH_TLS_LD64_HI20: - case R_LARCH_TLS_GD_PC_HI20: - case R_LARCH_TLS_GD64_HI20: -+ case R_LARCH_32_PCREL: - case R_LARCH_RELAX: -+ case R_LARCH_DELETE: -+ case R_LARCH_ALIGN: -+ case R_LARCH_PCREL20_S2: - // - // These types are not used or do not require fixup. - // -@@ -2185,7 +2189,11 @@ WriteRelocations64 ( - case R_LARCH_TLS_LD64_HI20: - case R_LARCH_TLS_GD_PC_HI20: - case R_LARCH_TLS_GD64_HI20: -+ case R_LARCH_32_PCREL: - case R_LARCH_RELAX: -+ case R_LARCH_DELETE: -+ case R_LARCH_ALIGN: -+ case R_LARCH_PCREL20_S2: - // - // These types are not used or do not require fixup in PE format files. - // -diff --git a/BaseTools/Source/C/GenFw/elf_common.h b/BaseTools/Source/C/GenFw/elf_common.h -index ccd32804b0..d3a5303953 100644 ---- a/BaseTools/Source/C/GenFw/elf_common.h -+++ b/BaseTools/Source/C/GenFw/elf_common.h -@@ -1144,5 +1144,10 @@ typedef struct { - #define R_LARCH_TLS_LD64_HI20 96 - #define R_LARCH_TLS_GD_PC_HI20 97 - #define R_LARCH_TLS_GD64_HI20 98 --#define R_LARCH_RELAX 99 -+#define R_LARCH_32_PCREL 99 -+#define R_LARCH_RELAX 100 -+#define R_LARCH_DELETE 101 -+#define R_LARCH_ALIGN 102 -+#define R_LARCH_PCREL20_S2 103 -+ - #endif /* !_SYS_ELF_COMMON_H_ */ From 71220844a45c02ad2aebc7ed172c1430a4ed1ffb Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Wed, 13 Mar 2024 21:26:51 +0800 Subject: [PATCH 10/23] linux-tools --- linux-tools/PKGBUILD | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/linux-tools/PKGBUILD b/linux-tools/PKGBUILD index 47421ceb24..b305c2aabd 100644 --- a/linux-tools/PKGBUILD +++ b/linux-tools/PKGBUILD @@ -4,7 +4,7 @@ pkgbase=linux-tools pkgname=( 'bootconfig' 'bpf' - 'cgroup_event_listener' +# 'cgroup_event_listener' 'cpupower' 'hyperv' 'linux-tools-meta' @@ -14,7 +14,7 @@ pkgname=( 'usbip' # 'x86_energy_perf_policy' ) -pkgver=6.7 +pkgver=6.8 pkgrel=1 license=('GPL2') arch=('loong64' 'x86_64') @@ -123,10 +123,10 @@ build() { make popd - echo ':: cgroup_event_listener' - pushd linux/tools/cgroup - make - popd +# echo ':: cgroup_event_listener' +# pushd linux/tools/cgroup +# make +# popd # echo ':: turbostat' # pushd linux/tools/power/x86/turbostat @@ -159,7 +159,7 @@ package_linux-tools-meta() { depends=( 'bootconfig' 'bpf' - 'cgroup_event_listener' +# 'cgroup_event_listener' 'cpupower' 'hyperv' 'perf' From 5e858b5ee0156df9fd9d86eaf1631833d331af2b Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Wed, 13 Mar 2024 22:42:48 +0800 Subject: [PATCH 11/23] firefox --- .../0001-Add-support-for-LoongArch64.patch | 36 ++-------- ...2-Enable-VA-API-support-for-AMD-GPUs.patch | 6 +- ...=> 0003-Enable-WebRTC-for-LoongArch.patch} | 70 +++++++++---------- ...rchitectural-limit-on-VA-API-support.patch | 40 ----------- ...0004-Fix-libyuv-build-with-LSX-LASX.patch} | 14 ++-- firefox/PKGBUILD | 32 ++++----- 6 files changed, 65 insertions(+), 133 deletions(-) rename firefox/{0004-Enable-WebRTC-for-LoongArch.patch => 0003-Enable-WebRTC-for-LoongArch.patch} (77%) delete mode 100644 firefox/0003-Remove-architectural-limit-on-VA-API-support.patch rename firefox/{0005-Fix-libyuv-build-with-LSX-LASX.patch => 0004-Fix-libyuv-build-with-LSX-LASX.patch} (98%) diff --git a/firefox/0001-Add-support-for-LoongArch64.patch b/firefox/0001-Add-support-for-LoongArch64.patch index b8a33207c9..01bd919907 100644 --- a/firefox/0001-Add-support-for-LoongArch64.patch +++ b/firefox/0001-Add-support-for-LoongArch64.patch @@ -1,7 +1,7 @@ -From 0c4dfaca7c7a38244034a6d872c0c7aeec0d4819 Mon Sep 17 00:00:00 2001 +From 6725bfacfd2142d0209c9d9af3c99f4c9d118aeb Mon Sep 17 00:00:00 2001 From: Jiangjin Wang Date: Sun, 22 Oct 2023 22:13:17 -0700 -Subject: [PATCH 1/5] Add support for LoongArch64 +Subject: [PATCH 1/4] Add support for LoongArch64 Adapted from LoongArchLinux. Rebased to Firefox 118.0.2. @@ -9,14 +9,12 @@ Co-Authored-By: loongson Co-Authored-By: WANG Xuerui --- third_party/libwebrtc/build/build_config.h | 4 ++++ - third_party/rust/nix/.cargo-checksum.json | 2 +- - third_party/rust/nix/src/sys/ioctl/linux.rs | 1 + toolkit/components/telemetry/pingsender/pingsender.cpp | 1 + toolkit/moz.configure | 2 +- - 5 files changed, 8 insertions(+), 2 deletions(-) + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/third_party/libwebrtc/build/build_config.h b/third_party/libwebrtc/build/build_config.h -index c39ae9da50f99..28191de02654b 100644 +index c39ae9d..28191de 100644 --- a/third_party/libwebrtc/build/build_config.h +++ b/third_party/libwebrtc/build/build_config.h @@ -210,6 +210,10 @@ @@ -30,28 +28,8 @@ index c39ae9da50f99..28191de02654b 100644 #else #error Please add support for your architecture in build/build_config.h #endif -diff --git a/third_party/rust/nix/.cargo-checksum.json b/third_party/rust/nix/.cargo-checksum.json -index f4c932b88926b..b7b9c9f3c9a89 100644 ---- a/third_party/rust/nix/.cargo-checksum.json -+++ b/third_party/rust/nix/.cargo-checksum.json -@@ -1 +1 @@ --{"files":{"CHANGELOG.md":"8ee4e556e53d1b39400a48675d3ecff0bf27e419accab7ca3be76ab934289548","Cargo.toml":"2e6eff9170182f107188b8bc9802efd044ef47178afc7f138950ecff1c1ceb96","LICENSE":"66e3ee1fa7f909ad3c612d556f2a0cdabcd809ad6e66f3b0605015ac64841b70","README.md":"1ed9a0e26ae6e575b3262ae734dd02889455593b761ee62403ea5a64104f3c9c","src/dir.rs":"0280a2dc480bd913f24ed84fbe26569fa2e8eefa660e5ad7c21e05fc34c14d16","src/env.rs":"028bc5e20139ebba418a655a2978a53335dc7680bf1de43d2c8333dd72cfa5c4","src/errno.rs":"e55d075858e349d9afea9ce0480f7fb7ba4dccccf0694fd7b3280b918836203c","src/fcntl.rs":"ea8f43d8fec0b6c3b7d903333e4c1ce85611684a4afd561c55cfe4b61a979e94","src/features.rs":"5b4a0831e5f4b79a6f0e42ed052fd66c875da18959750be51e41fb59ac19feed","src/ifaddrs.rs":"377865eb48040d28c392a1aec0221320108e3392ea285d23405ae2cfa5c54b20","src/kmod.rs":"c818ced08d55ae36fdf82fa914ba856b688e37234d574d3faa37128211d512fb","src/lib.rs":"a62fac2ba7111157c5b64251f67f8a189f04bd587d5c80703454a596ea7ae5d9","src/macros.rs":"e23d7d8be22ef0bf9febaaf2739585453103607c0139bd3995a324e4a16d011e","src/mount/bsd.rs":"4cf35606a63d7ca41caac3b38f01e2b70c63e71978c0529f19fc79182629dbe0","src/mount/linux.rs":"6e5d61788dedf1ca4416c6c6a3a9c6c747f9352c26d863f4a1d4142e288584d6","src/mount/mod.rs":"ba9f60eb831224ab73bdd87e00e15d13b9ce9efb70b18bf8f3fe60406d522b3e","src/mqueue.rs":"ed0a189036b2437b5f7f7f1312fa545540b06ca72171b451d8bce42cc3627534","src/net/if_.rs":"b32a8a1f952de60d95e549779a5c673fd72aa665e86bfdfc8ec6badf3016b9b1","src/net/mod.rs":"577f70170e53d4a6de1abb70bf8f1031ec3e65c0e63ef5fcf05c907125e7ac17","src/poll.rs":"2fc1d144fb40db51811c6357b520ab7993529702d8f0d8060c903118ff4f7259","src/pty.rs":"27b4f76c23acf02542674017067fee74cdcac907338458700a1aa4d6f6a62e27","src/sched.rs":"403aa5ebed81910263d42a94717612b737550bf053227b7d90f1c8949188d919","src/sys/aio.rs":"ae091de8540c97da374a39e7d154c1b3ce50f41e6fc20a45c6b06eb838e74366","src/sys/epoll.rs":"28e22debf474d1b047e8044a00b354c25dab2fa125960f9f2f14cc34289fd5c9","src/sys/event.rs":"dbd8e84bccb813839295b0a336485783ef19548d2317931f0ceb5ee62f839a40","src/sys/eventfd.rs":"c8db8f5874726fdad289ad5e2603a7d71a1ae5a899dcde3a35d3edff8b498b7e","src/sys/inotify.rs":"5b4da774313afa9c28c3f92f9d07dce9bf4c8d044fd6a16f19480e79a19e808b","src/sys/ioctl/bsd.rs":"bbd02e30b0a78c1cb22777d9b00cfcbba9c68505cffc06118ac68474cf6fea39","src/sys/ioctl/linux.rs":"028181834d119b834bf399f2b8a6176cc57e75144693f28f32059d087d8c8018","src/sys/ioctl/mod.rs":"89b20579476b2e0254e0ecb1b41830cccd7027a22cbdb816a9d4ec3924842ac1","src/sys/memfd.rs":"f58d7fbe67c4b994832d72f5fbd59c136c8f1ae88ea8b0bc1c099db2d847ee6c","src/sys/mman.rs":"17df1bc34ba92bdd6bad1e11e4ef139998117f6c468c8f560421858f3cc899a5","src/sys/mod.rs":"baabf649f758ad4acce849ec1795dd4e4f9c6539e677bad5fa777300a4871dcb","src/sys/personality.rs":"aa89760c023bfec3fca5d8636f9eac9d337f5547933793ce6df7a0de97ae6ee1","src/sys/pthread.rs":"258cdf7ff0b61a4afa6d228109e4cb4fb88d859bb8dfe6c959d95130fb010906","src/sys/ptrace/bsd.rs":"4c590d8f023ff52f396f8b6f2150c08e5c9486d3088d9c173db33a70d616b800","src/sys/ptrace/linux.rs":"c82db3fb18aa97755f9ccb440a957cd46d664968a94045830c5d74d2d53bc19f","src/sys/ptrace/mod.rs":"e9e5d970097f5eafffba900959d4fdbf233bff9ed7f599fc9896bb44d86a57a4","src/sys/quota.rs":"02e698a25f0986fb43aa88689f3d3d8b9edc6ae48496ad02f7214fccaa493e00","src/sys/reboot.rs":"eacdf57694a6629fb05787e16450446102a62818274495f2ad4e445807d09221","src/sys/resource.rs":"d498d0c00fd30e35e1269a8902cb812014d813f63ec95364f8f59f1912ba5657","src/sys/select.rs":"65c39b129d3cc85b8ca026ff26dcf80c5639824f43715881c3c1bbb6bf0c8a60","src/sys/sendfile.rs":"7a62099f9771fecff49b9c11210341e3c1a4acf22f8dfb96d395e29421648676","src/sys/signal.rs":"c3e13a2edea54d190a4b051f62efc97953c00b5051a9fda0e39e3bc732a31939","src/sys/signalfd.rs":"583524434fd37143be3db37fa6f6cbd339f7946416f05b58a95e246947e5cc9d","src/sys/socket/addr.rs":"84df895052f59ec84774b189ffb285d2a37a9703af6c8310ae5040cca1a2583e","src/sys/socket/mod.rs":"6deb55438cad3606385303f036b0efd842dfd759fba93611911f5a4f2613c9dc","src/sys/socket/sockopt.rs":"ed1f920364bfe88bbe6eaeeefb27a63bfcdd7d67604aca2f03e22f2b502df55a","src/sys/stat.rs":"337dea8d55d6177dc85b3235b40b8a3e81af7f4a6e2806a0b2f730bec5424350","src/sys/statfs.rs":"17103659a85279bac046c69cb3b22bf2c11c2492cffb0edfa4c3b233d161a2f2","src/sys/statvfs.rs":"f81e3900ef90d62e7eceaf1b6ff8dcfd965466714c033eb4717687f692171f48","src/sys/sysinfo.rs":"b4519b1ca091c9dbe94d2a6fd6304944bf3df5626973d2c6884022559706f0d9","src/sys/termios.rs":"7923f9846a8122096b6b1cd240d3618b876ce500a751ac434954d172e2e85745","src/sys/time.rs":"9026033b60a5ccc95b70424aef043c8c748722e2ea8c7c86366ecd4585b651a0","src/sys/timer.rs":"8c10f0e7cfac857ad00460be30bc68b957909cc9296e70718d3b5d4a0babafde","src/sys/timerfd.rs":"ef7c48aefdcfac13316eeddbef5da04cf12e9f574b8d9f43402c02b6b8db86b3","src/sys/uio.rs":"e1d59ccbee9d46c65d3aa8c36aa3a3222539beea0d20163a8b707d08fca14e09","src/sys/utsname.rs":"0cdda0cc111caaa0e4ebe2d4588bdc825d878e5bcb7a9136073b15f87a20e11f","src/sys/wait.rs":"cc70d2d9b880ff6c48577a479c209af6127067bc013a90ee22538e4dfad7d2b4","src/time.rs":"d4e0872361a57810837f5bd790cbca3a2b9db1ac4694a3c52d1564ad3532d3be","src/ucontext.rs":"b8f2e04757a9c2bc38c3b1e259d3a013da8a730fe9bfbe5487637395681b43d3","src/unistd.rs":"e19be456124731c5b93aef92ed72a7c4c9092e28db0649814ba3fcc1f0d620fa","test/common/mod.rs":"1d7e28e3635754664cd056f3a1079232ff5c118df619e1d0551a9972eb0b3cd6","test/sys/mod.rs":"87b2891d83067ff21f72b8ff7fde3019dc45b6877282ac278b6da151de45c7a7","test/sys/test_aio.rs":"4dac9f716f852f1f438f78d6e64bf041e6fd316bf15dcb27afffaf0894bdefa6","test/sys/test_aio_drop.rs":"614070155fa16a979b7341d001639c5ce24a1d6f632c3abce45a5a6d49c4039b","test/sys/test_epoll.rs":"ffe95e36c79e37426ef8e8ca3b137b7f35ea0333ce666a20a4b7878db17680e9","test/sys/test_inotify.rs":"a141b9a995892547b51ceeb6761a70a6b86d37e8f38d13ea2c497b81b4b0f49f","test/sys/test_ioctl.rs":"00ccc5afb665e533a0a4b6d6a6be438bcaea19fce335390feef4e91d17b3036c","test/sys/test_mman.rs":"2b4161964c9204b74659028b0f89a88f4e3bcc9886137a3039737cd91d2698cb","test/sys/test_pthread.rs":"ace36a2f5587f1874854281b4fd84e4e4d892a1e3c5cc38ced57975739522ad6","test/sys/test_ptrace.rs":"0385eebc8b1b8c72f655b745769decd9143ad83018198375982da0896310456b","test/sys/test_select.rs":"54cea1c34ad28d5770a613c1c3cbc3b1064b22037ec2b9d3fcd422d3be9e60a7","test/sys/test_signal.rs":"acc9941227bd3e2afad323613c2b8c83902ed0486d3745fd72704f395924f1e4","test/sys/test_signalfd.rs":"0e1060143e2612c490bc3d0168d0bbb042ef55e3f1d91d2578b9e42e4310a14d","test/sys/test_socket.rs":"d2df1001f9a0b2dac0b88051a67c3868bb216e72e4da4eecd11c4448b9fa4b40","test/sys/test_sockopt.rs":"4465f22f718442f3f7b502e052dad02b93cebfa3b71fa55ff4f25fb02534acab","test/sys/test_stat.rs":"6630a28217fd708bb84cd4f7e7101836b74f2420f9888923fdab664ccc331c1d","test/sys/test_sysinfo.rs":"ffd49bc96375914a2c4a4a59730cae8072f85771e2c4a80d3403df38d967e272","test/sys/test_termios.rs":"e5bcef10c84bd7583d600d5601835bcb3cfc88781cb283ab0185bbef5faf4327","test/sys/test_timerfd.rs":"cfed3abf58118611d08f6985251a7739cff67108e11214222a1d2394a3a026ce","test/sys/test_uio.rs":"32656bd0a5699e4d019aa928edf104637937179782914a82d50d37226e84c421","test/sys/test_wait.rs":"6fd59fffeeb09ff620c359baefd062ba777598982b6cb001ccc07b6bc7605493","test/test.rs":"11f40b0718ddd1a150cb9e703d56d0b2a9462306505a2245ddf273a2011f48b5","test/test_clearenv.rs":"45ca548035b3c20ec87314715feaba2be973709a635d85b8cde46fd1d9f1ecd4","test/test_dir.rs":"ae3c11c58cb06da6557aa2a839c6653c54cd7724283fffe9df5a5d3feabdd89a","test/test_fcntl.rs":"71dcb87f7b04d78fc62937ba46cb7f0f1f2dbb330b63a996ea2e8ec9056b98a9","test/test_kmod/hello_mod/Makefile":"0219f7bce0603f97d997fb377ca071966c90333ecc665e78a54dfeb97a9c811b","test/test_kmod/hello_mod/hello.c":"bcac6b19c5bd807e1f3878c15e426acc85785a8ade9840c3bb4d068635c9188c","test/test_kmod/mod.rs":"b4ae25841c2f06f32de9f1acd8230eeccd7095721302ebe78ad454e4e4f9c783","test/test_mount.rs":"6dd242b6e23c9c39e1a75612bbea62573898818ab374c3c032c2cdb97033554d","test/test_mq.rs":"136071f24131aac0e65d5f29ac18e3806641dfae1164813f5570c0e3a6f70553","test/test_net.rs":"f2912327ebb2a3d37e6cff02a5ac3106cf889cc5c74404db4ef0034059ba26f1","test/test_nix_path.rs":"01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b","test/test_nmount.rs":"d6c112547bb80968170b5497cda4b6cbf69dabec6f51d494bd52298995ceff18","test/test_poll.rs":"3e0b8f0397ba080785c61a3bfc3d637bc87f324bc4e52b5f1bf3ca0d32dbc9fe","test/test_pty.rs":"b26238a0783746cb31880e11eebc1913149be999ce75fbc2d6677bdd1e2731b2","test/test_ptymaster_drop.rs":"ae63c815f5028ddc67d194e86559483018ab1816316bdb917f40cee9364fd8a5","test/test_resource.rs":"40aef790ab745cec31a4b333d2ca406b462aa9bdf4a6d3756371e498b8d51e9a","test/test_sched.rs":"c4579bd376fab8816e63b07fa9ace31dc08e63ebb7c855a2c450698090d1d1e8","test/test_sendfile.rs":"bb41b4f3621b518e397d3a5b5ad3c5dcef3fe506afe516eab7572fbab92b77e3","test/test_stat.rs":"c407ca47a5258750076d041afad2f6add4c3563be36628bde1c5b314f5d0765d","test/test_time.rs":"f7a21b1e279e60e84909d5dadda97ded66d3326b131fe317badf9af0a1b50335","test/test_timer.rs":"3ae20d364f075d2811f3ff94eda9886682cc21d8807656007d2464fe36d1e361","test/test_unistd.rs":"20a00be4fbe26302ea5fe50ce25b99265dc763db138663d6aa1d7ac729a1d292"},"package":"bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"} -\ No newline at end of file -+{"files":{"CHANGELOG.md":"8ee4e556e53d1b39400a48675d3ecff0bf27e419accab7ca3be76ab934289548","Cargo.toml":"2e6eff9170182f107188b8bc9802efd044ef47178afc7f138950ecff1c1ceb96","LICENSE":"66e3ee1fa7f909ad3c612d556f2a0cdabcd809ad6e66f3b0605015ac64841b70","README.md":"1ed9a0e26ae6e575b3262ae734dd02889455593b761ee62403ea5a64104f3c9c","src/dir.rs":"0280a2dc480bd913f24ed84fbe26569fa2e8eefa660e5ad7c21e05fc34c14d16","src/env.rs":"028bc5e20139ebba418a655a2978a53335dc7680bf1de43d2c8333dd72cfa5c4","src/errno.rs":"e55d075858e349d9afea9ce0480f7fb7ba4dccccf0694fd7b3280b918836203c","src/fcntl.rs":"ea8f43d8fec0b6c3b7d903333e4c1ce85611684a4afd561c55cfe4b61a979e94","src/features.rs":"5b4a0831e5f4b79a6f0e42ed052fd66c875da18959750be51e41fb59ac19feed","src/ifaddrs.rs":"377865eb48040d28c392a1aec0221320108e3392ea285d23405ae2cfa5c54b20","src/kmod.rs":"c818ced08d55ae36fdf82fa914ba856b688e37234d574d3faa37128211d512fb","src/lib.rs":"a62fac2ba7111157c5b64251f67f8a189f04bd587d5c80703454a596ea7ae5d9","src/macros.rs":"e23d7d8be22ef0bf9febaaf2739585453103607c0139bd3995a324e4a16d011e","src/mount/bsd.rs":"4cf35606a63d7ca41caac3b38f01e2b70c63e71978c0529f19fc79182629dbe0","src/mount/linux.rs":"6e5d61788dedf1ca4416c6c6a3a9c6c747f9352c26d863f4a1d4142e288584d6","src/mount/mod.rs":"ba9f60eb831224ab73bdd87e00e15d13b9ce9efb70b18bf8f3fe60406d522b3e","src/mqueue.rs":"ed0a189036b2437b5f7f7f1312fa545540b06ca72171b451d8bce42cc3627534","src/net/if_.rs":"b32a8a1f952de60d95e549779a5c673fd72aa665e86bfdfc8ec6badf3016b9b1","src/net/mod.rs":"577f70170e53d4a6de1abb70bf8f1031ec3e65c0e63ef5fcf05c907125e7ac17","src/poll.rs":"2fc1d144fb40db51811c6357b520ab7993529702d8f0d8060c903118ff4f7259","src/pty.rs":"27b4f76c23acf02542674017067fee74cdcac907338458700a1aa4d6f6a62e27","src/sched.rs":"403aa5ebed81910263d42a94717612b737550bf053227b7d90f1c8949188d919","src/sys/aio.rs":"ae091de8540c97da374a39e7d154c1b3ce50f41e6fc20a45c6b06eb838e74366","src/sys/epoll.rs":"28e22debf474d1b047e8044a00b354c25dab2fa125960f9f2f14cc34289fd5c9","src/sys/event.rs":"dbd8e84bccb813839295b0a336485783ef19548d2317931f0ceb5ee62f839a40","src/sys/eventfd.rs":"c8db8f5874726fdad289ad5e2603a7d71a1ae5a899dcde3a35d3edff8b498b7e","src/sys/inotify.rs":"5b4da774313afa9c28c3f92f9d07dce9bf4c8d044fd6a16f19480e79a19e808b","src/sys/ioctl/bsd.rs":"bbd02e30b0a78c1cb22777d9b00cfcbba9c68505cffc06118ac68474cf6fea39","src/sys/ioctl/linux.rs":"54bad026ee637b73b95dad8135b6db61cae855670fd9323e7bf21acaff0827f4","src/sys/ioctl/mod.rs":"89b20579476b2e0254e0ecb1b41830cccd7027a22cbdb816a9d4ec3924842ac1","src/sys/memfd.rs":"f58d7fbe67c4b994832d72f5fbd59c136c8f1ae88ea8b0bc1c099db2d847ee6c","src/sys/mman.rs":"17df1bc34ba92bdd6bad1e11e4ef139998117f6c468c8f560421858f3cc899a5","src/sys/mod.rs":"baabf649f758ad4acce849ec1795dd4e4f9c6539e677bad5fa777300a4871dcb","src/sys/personality.rs":"aa89760c023bfec3fca5d8636f9eac9d337f5547933793ce6df7a0de97ae6ee1","src/sys/pthread.rs":"258cdf7ff0b61a4afa6d228109e4cb4fb88d859bb8dfe6c959d95130fb010906","src/sys/ptrace/bsd.rs":"4c590d8f023ff52f396f8b6f2150c08e5c9486d3088d9c173db33a70d616b800","src/sys/ptrace/linux.rs":"c82db3fb18aa97755f9ccb440a957cd46d664968a94045830c5d74d2d53bc19f","src/sys/ptrace/mod.rs":"e9e5d970097f5eafffba900959d4fdbf233bff9ed7f599fc9896bb44d86a57a4","src/sys/quota.rs":"02e698a25f0986fb43aa88689f3d3d8b9edc6ae48496ad02f7214fccaa493e00","src/sys/reboot.rs":"eacdf57694a6629fb05787e16450446102a62818274495f2ad4e445807d09221","src/sys/resource.rs":"d498d0c00fd30e35e1269a8902cb812014d813f63ec95364f8f59f1912ba5657","src/sys/select.rs":"65c39b129d3cc85b8ca026ff26dcf80c5639824f43715881c3c1bbb6bf0c8a60","src/sys/sendfile.rs":"7a62099f9771fecff49b9c11210341e3c1a4acf22f8dfb96d395e29421648676","src/sys/signal.rs":"c3e13a2edea54d190a4b051f62efc97953c00b5051a9fda0e39e3bc732a31939","src/sys/signalfd.rs":"583524434fd37143be3db37fa6f6cbd339f7946416f05b58a95e246947e5cc9d","src/sys/socket/addr.rs":"84df895052f59ec84774b189ffb285d2a37a9703af6c8310ae5040cca1a2583e","src/sys/socket/mod.rs":"6deb55438cad3606385303f036b0efd842dfd759fba93611911f5a4f2613c9dc","src/sys/socket/sockopt.rs":"ed1f920364bfe88bbe6eaeeefb27a63bfcdd7d67604aca2f03e22f2b502df55a","src/sys/stat.rs":"337dea8d55d6177dc85b3235b40b8a3e81af7f4a6e2806a0b2f730bec5424350","src/sys/statfs.rs":"17103659a85279bac046c69cb3b22bf2c11c2492cffb0edfa4c3b233d161a2f2","src/sys/statvfs.rs":"f81e3900ef90d62e7eceaf1b6ff8dcfd965466714c033eb4717687f692171f48","src/sys/sysinfo.rs":"b4519b1ca091c9dbe94d2a6fd6304944bf3df5626973d2c6884022559706f0d9","src/sys/termios.rs":"7923f9846a8122096b6b1cd240d3618b876ce500a751ac434954d172e2e85745","src/sys/time.rs":"9026033b60a5ccc95b70424aef043c8c748722e2ea8c7c86366ecd4585b651a0","src/sys/timer.rs":"8c10f0e7cfac857ad00460be30bc68b957909cc9296e70718d3b5d4a0babafde","src/sys/timerfd.rs":"ef7c48aefdcfac13316eeddbef5da04cf12e9f574b8d9f43402c02b6b8db86b3","src/sys/uio.rs":"e1d59ccbee9d46c65d3aa8c36aa3a3222539beea0d20163a8b707d08fca14e09","src/sys/utsname.rs":"0cdda0cc111caaa0e4ebe2d4588bdc825d878e5bcb7a9136073b15f87a20e11f","src/sys/wait.rs":"cc70d2d9b880ff6c48577a479c209af6127067bc013a90ee22538e4dfad7d2b4","src/time.rs":"d4e0872361a57810837f5bd790cbca3a2b9db1ac4694a3c52d1564ad3532d3be","src/ucontext.rs":"b8f2e04757a9c2bc38c3b1e259d3a013da8a730fe9bfbe5487637395681b43d3","src/unistd.rs":"e19be456124731c5b93aef92ed72a7c4c9092e28db0649814ba3fcc1f0d620fa","test/common/mod.rs":"1d7e28e3635754664cd056f3a1079232ff5c118df619e1d0551a9972eb0b3cd6","test/sys/mod.rs":"87b2891d83067ff21f72b8ff7fde3019dc45b6877282ac278b6da151de45c7a7","test/sys/test_aio.rs":"4dac9f716f852f1f438f78d6e64bf041e6fd316bf15dcb27afffaf0894bdefa6","test/sys/test_aio_drop.rs":"614070155fa16a979b7341d001639c5ce24a1d6f632c3abce45a5a6d49c4039b","test/sys/test_epoll.rs":"ffe95e36c79e37426ef8e8ca3b137b7f35ea0333ce666a20a4b7878db17680e9","test/sys/test_inotify.rs":"a141b9a995892547b51ceeb6761a70a6b86d37e8f38d13ea2c497b81b4b0f49f","test/sys/test_ioctl.rs":"00ccc5afb665e533a0a4b6d6a6be438bcaea19fce335390feef4e91d17b3036c","test/sys/test_mman.rs":"2b4161964c9204b74659028b0f89a88f4e3bcc9886137a3039737cd91d2698cb","test/sys/test_pthread.rs":"ace36a2f5587f1874854281b4fd84e4e4d892a1e3c5cc38ced57975739522ad6","test/sys/test_ptrace.rs":"0385eebc8b1b8c72f655b745769decd9143ad83018198375982da0896310456b","test/sys/test_select.rs":"54cea1c34ad28d5770a613c1c3cbc3b1064b22037ec2b9d3fcd422d3be9e60a7","test/sys/test_signal.rs":"acc9941227bd3e2afad323613c2b8c83902ed0486d3745fd72704f395924f1e4","test/sys/test_signalfd.rs":"0e1060143e2612c490bc3d0168d0bbb042ef55e3f1d91d2578b9e42e4310a14d","test/sys/test_socket.rs":"d2df1001f9a0b2dac0b88051a67c3868bb216e72e4da4eecd11c4448b9fa4b40","test/sys/test_sockopt.rs":"4465f22f718442f3f7b502e052dad02b93cebfa3b71fa55ff4f25fb02534acab","test/sys/test_stat.rs":"6630a28217fd708bb84cd4f7e7101836b74f2420f9888923fdab664ccc331c1d","test/sys/test_sysinfo.rs":"ffd49bc96375914a2c4a4a59730cae8072f85771e2c4a80d3403df38d967e272","test/sys/test_termios.rs":"e5bcef10c84bd7583d600d5601835bcb3cfc88781cb283ab0185bbef5faf4327","test/sys/test_timerfd.rs":"cfed3abf58118611d08f6985251a7739cff67108e11214222a1d2394a3a026ce","test/sys/test_uio.rs":"32656bd0a5699e4d019aa928edf104637937179782914a82d50d37226e84c421","test/sys/test_wait.rs":"6fd59fffeeb09ff620c359baefd062ba777598982b6cb001ccc07b6bc7605493","test/test.rs":"11f40b0718ddd1a150cb9e703d56d0b2a9462306505a2245ddf273a2011f48b5","test/test_clearenv.rs":"45ca548035b3c20ec87314715feaba2be973709a635d85b8cde46fd1d9f1ecd4","test/test_dir.rs":"ae3c11c58cb06da6557aa2a839c6653c54cd7724283fffe9df5a5d3feabdd89a","test/test_fcntl.rs":"71dcb87f7b04d78fc62937ba46cb7f0f1f2dbb330b63a996ea2e8ec9056b98a9","test/test_kmod/hello_mod/Makefile":"0219f7bce0603f97d997fb377ca071966c90333ecc665e78a54dfeb97a9c811b","test/test_kmod/hello_mod/hello.c":"bcac6b19c5bd807e1f3878c15e426acc85785a8ade9840c3bb4d068635c9188c","test/test_kmod/mod.rs":"b4ae25841c2f06f32de9f1acd8230eeccd7095721302ebe78ad454e4e4f9c783","test/test_mount.rs":"6dd242b6e23c9c39e1a75612bbea62573898818ab374c3c032c2cdb97033554d","test/test_mq.rs":"136071f24131aac0e65d5f29ac18e3806641dfae1164813f5570c0e3a6f70553","test/test_net.rs":"f2912327ebb2a3d37e6cff02a5ac3106cf889cc5c74404db4ef0034059ba26f1","test/test_nix_path.rs":"01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b","test/test_nmount.rs":"d6c112547bb80968170b5497cda4b6cbf69dabec6f51d494bd52298995ceff18","test/test_poll.rs":"3e0b8f0397ba080785c61a3bfc3d637bc87f324bc4e52b5f1bf3ca0d32dbc9fe","test/test_pty.rs":"b26238a0783746cb31880e11eebc1913149be999ce75fbc2d6677bdd1e2731b2","test/test_ptymaster_drop.rs":"ae63c815f5028ddc67d194e86559483018ab1816316bdb917f40cee9364fd8a5","test/test_resource.rs":"40aef790ab745cec31a4b333d2ca406b462aa9bdf4a6d3756371e498b8d51e9a","test/test_sched.rs":"c4579bd376fab8816e63b07fa9ace31dc08e63ebb7c855a2c450698090d1d1e8","test/test_sendfile.rs":"bb41b4f3621b518e397d3a5b5ad3c5dcef3fe506afe516eab7572fbab92b77e3","test/test_stat.rs":"c407ca47a5258750076d041afad2f6add4c3563be36628bde1c5b314f5d0765d","test/test_time.rs":"f7a21b1e279e60e84909d5dadda97ded66d3326b131fe317badf9af0a1b50335","test/test_timer.rs":"3ae20d364f075d2811f3ff94eda9886682cc21d8807656007d2464fe36d1e361","test/test_unistd.rs":"20a00be4fbe26302ea5fe50ce25b99265dc763db138663d6aa1d7ac729a1d292"},"package":"bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"} -diff --git a/third_party/rust/nix/src/sys/ioctl/linux.rs b/third_party/rust/nix/src/sys/ioctl/linux.rs -index 0c0a2090538f8..214d9e8c60281 100644 ---- a/third_party/rust/nix/src/sys/ioctl/linux.rs -+++ b/third_party/rust/nix/src/sys/ioctl/linux.rs -@@ -41,6 +41,7 @@ mod consts { - target_arch = "s390x", - target_arch = "x86_64", - target_arch = "aarch64", -+ target_arch = "loongarch64", - target_arch = "riscv32", - target_arch = "riscv64" - ))] diff --git a/toolkit/components/telemetry/pingsender/pingsender.cpp b/toolkit/components/telemetry/pingsender/pingsender.cpp -index 30f2907c720e1..e6645227a2949 100644 +index 30f2907..e664522 100644 --- a/toolkit/components/telemetry/pingsender/pingsender.cpp +++ b/toolkit/components/telemetry/pingsender/pingsender.cpp @@ -10,6 +10,7 @@ @@ -63,10 +41,10 @@ index 30f2907c720e1..e6645227a2949 100644 #include diff --git a/toolkit/moz.configure b/toolkit/moz.configure -index 8b462ecde463f..a4aa84cc7c45e 100644 +index f93a5d6..a8920f8 100644 --- a/toolkit/moz.configure +++ b/toolkit/moz.configure -@@ -2432,7 +2432,7 @@ with only_when(compile_environment | artifact_builds): +@@ -2426,7 +2426,7 @@ with only_when(compile_environment | artifact_builds): use_nasm = False elif target.cpu == "x86_64": flags = ["-D__x86_64__", "-DPIC", "-DELF", "-Pconfig_unix64.asm"] diff --git a/firefox/0002-Enable-VA-API-support-for-AMD-GPUs.patch b/firefox/0002-Enable-VA-API-support-for-AMD-GPUs.patch index 143927ffe1..2ad77f89a6 100644 --- a/firefox/0002-Enable-VA-API-support-for-AMD-GPUs.patch +++ b/firefox/0002-Enable-VA-API-support-for-AMD-GPUs.patch @@ -1,14 +1,14 @@ -From 9f3a0a22ba3c6ad1f14e90cfc2774b87215d7135 Mon Sep 17 00:00:00 2001 +From 13c493de8626823c9b0a2eada572778b1d58b629 Mon Sep 17 00:00:00 2001 From: Jiangjin Wang Date: Tue, 14 Nov 2023 18:14:20 -0800 -Subject: [PATCH 2/5] Enable VA-API support for AMD GPUs +Subject: [PATCH 2/4] Enable VA-API support for AMD GPUs --- widget/gtk/GfxInfo.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/widget/gtk/GfxInfo.cpp b/widget/gtk/GfxInfo.cpp -index b34e85baa28e5..8c95ce0d4274b 100644 +index b34e85b..8c95ce0 100644 --- a/widget/gtk/GfxInfo.cpp +++ b/widget/gtk/GfxInfo.cpp @@ -1112,14 +1112,6 @@ const nsTArray& GfxInfo::GetGfxDriverInfo() { diff --git a/firefox/0004-Enable-WebRTC-for-LoongArch.patch b/firefox/0003-Enable-WebRTC-for-LoongArch.patch similarity index 77% rename from firefox/0004-Enable-WebRTC-for-LoongArch.patch rename to firefox/0003-Enable-WebRTC-for-LoongArch.patch index 3cd2fcf4f4..64f2394c06 100644 --- a/firefox/0004-Enable-WebRTC-for-LoongArch.patch +++ b/firefox/0003-Enable-WebRTC-for-LoongArch.patch @@ -1,7 +1,7 @@ -From 476458e2e0cafaa5fe5fbc6a99750dd920e7ba67 Mon Sep 17 00:00:00 2001 -From: Jiangjin Wang -Date: Tue, 21 Nov 2023 17:17:16 -0800 -Subject: [PATCH 4/5] Enable WebRTC for LoongArch +From bb10f5774025e90234c2d2ad95351d3b307fce2b Mon Sep 17 00:00:00 2001 +From: Xiaotian Wu +Date: Mon, 19 Feb 2024 15:52:44 +0800 +Subject: [PATCH 3/4] Enable WebRTC for LoongArch --- .../common_audio/common_audio_c_gn/moz.build | 8 ++++++ @@ -14,14 +14,14 @@ Subject: [PATCH 4/5] Enable WebRTC for LoongArch 7 files changed, 60 insertions(+) diff --git a/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build b/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build -index 60ee6cfc164be..1e69b2881ca90 100644 +index 2a9bfac..24f2199 100644 --- a/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build +++ b/third_party/libwebrtc/common_audio/common_audio_c_gn/moz.build -@@ -255,6 +255,14 @@ if CONFIG["CPU_ARCH"] == "riscv64": +@@ -258,6 +258,14 @@ if CONFIG["TARGET_CPU"] == "riscv64": "/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12.c" ] -+if CONFIG["CPU_ARCH"] == "loongarch64": ++if CONFIG["TARGET_CPU"] == "loongarch64": + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/common_audio/signal_processing/complex_bit_reverse.c", @@ -29,52 +29,52 @@ index 60ee6cfc164be..1e69b2881ca90 100644 + "/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12.c" + ] + - if CONFIG["CPU_ARCH"] == "x86": + if CONFIG["TARGET_CPU"] == "x86": DEFINES["WEBRTC_ENABLE_AVX2"] = True diff --git a/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build b/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build -index d2d0287623b54..36ad6222b3dea 100644 +index 316c199..08f2b64 100644 --- a/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build +++ b/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor_gn/moz.build -@@ -174,6 +174,12 @@ if CONFIG["CPU_ARCH"] == "riscv64": +@@ -177,6 +177,12 @@ if CONFIG["TARGET_CPU"] == "riscv64": "/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c" ] -+if CONFIG["CPU_ARCH"] == "loongarch64": ++if CONFIG["TARGET_CPU"] == "loongarch64": + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c" + ] + - if CONFIG["CPU_ARCH"] == "x86": + if CONFIG["TARGET_CPU"] == "x86": DEFINES["WEBRTC_ENABLE_AVX2"] = True diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build -index 9874037197896..147e12653cbe2 100644 +index a645dce..57ea718 100644 --- a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build +++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build -@@ -206,6 +206,12 @@ if CONFIG["CPU_ARCH"] == "riscv64": +@@ -209,6 +209,12 @@ if CONFIG["TARGET_CPU"] == "riscv64": "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" ] -+if CONFIG["CPU_ARCH"] == "loongarch64": ++if CONFIG["TARGET_CPU"] == "loongarch64": + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + - if CONFIG["CPU_ARCH"] == "x86": + if CONFIG["TARGET_CPU"] == "x86": DEFINES["WEBRTC_ENABLE_AVX2"] = True diff --git a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build -index b0a5d1522da86..0efac49ac5dc3 100644 +index 22a76e9..7489dda 100644 --- a/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build +++ b/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn/moz.build -@@ -390,6 +390,34 @@ if CONFIG["CPU_ARCH"] == "riscv64": +@@ -393,6 +393,34 @@ if CONFIG["TARGET_CPU"] == "riscv64": "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc" ] -+if CONFIG["CPU_ARCH"] == "loongarch64": ++if CONFIG["TARGET_CPU"] == "loongarch64": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_USE_X11"] = True @@ -102,51 +102,51 @@ index b0a5d1522da86..0efac49ac5dc3 100644 + "/third_party/libwebrtc/modules/desktop_capture/linux/x11/x_window_property.cc" + ] + - if CONFIG["CPU_ARCH"] == "x86": + if CONFIG["TARGET_CPU"] == "x86": DEFINES["WEBRTC_ENABLE_AVX2"] = True diff --git a/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build b/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build -index 8edb2c2344870..e6cf9f56540f7 100644 +index de23257..be48aa6 100644 --- a/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build +++ b/third_party/libwebrtc/modules/desktop_capture/primitives_gn/moz.build -@@ -148,6 +148,10 @@ if CONFIG["CPU_ARCH"] == "riscv64": +@@ -150,6 +150,10 @@ if CONFIG["TARGET_CPU"] == "riscv64": DEFINES["USE_X11"] = "1" -+if CONFIG["CPU_ARCH"] == "loongarch64": ++if CONFIG["TARGET_CPU"] == "loongarch64": + + DEFINES["USE_X11"] = "1" + - if CONFIG["CPU_ARCH"] == "x86": + if CONFIG["TARGET_CPU"] == "x86": DEFINES["WEBRTC_ENABLE_AVX2"] = True diff --git a/third_party/libwebrtc/moz.build b/third_party/libwebrtc/moz.build -index f528cb1108180..88fd9792acdf1 100644 +index 7baea55..73cda0d 100644 --- a/third_party/libwebrtc/moz.build +++ b/third_party/libwebrtc/moz.build -@@ -692,3 +692,10 @@ if CONFIG["CPU_ARCH"] == "riscv64" and CONFIG["MOZ_X11"] == "1" and CONFIG["OS_T +@@ -694,3 +694,10 @@ if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGE "/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn", "/third_party/libwebrtc/modules/desktop_capture/primitives_gn" ] + -+if CONFIG["CPU_ARCH"] == "loongarch64" and CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": ++if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "loongarch64": + + DIRS += [ + "/third_party/libwebrtc/modules/desktop_capture/desktop_capture_gn", + "/third_party/libwebrtc/modules/desktop_capture/primitives_gn" + ] diff --git a/toolkit/moz.configure b/toolkit/moz.configure -index 67fc08237bba4..f7252539c7eaa 100644 +index a8920f8..0ff8c6f 100644 --- a/toolkit/moz.configure +++ b/toolkit/moz.configure -@@ -1328,6 +1328,7 @@ def webrtc_default(target): - "ppc", - "ppc64", - "riscv64", +@@ -1322,6 +1322,7 @@ def webrtc_default(target): + "aarch64", + "x86", + "ia64", + "loongarch64", - ) - - return os_match and cpu_match and target.endianness == "little" + "mips32", + "mips64", + "ppc", -- 2.43.0 diff --git a/firefox/0003-Remove-architectural-limit-on-VA-API-support.patch b/firefox/0003-Remove-architectural-limit-on-VA-API-support.patch deleted file mode 100644 index aa45fa3e87..0000000000 --- a/firefox/0003-Remove-architectural-limit-on-VA-API-support.patch +++ /dev/null @@ -1,40 +0,0 @@ -From b25c3742c98c87de9621eac8b672f9381e15c088 Mon Sep 17 00:00:00 2001 -From: Jiangjin Wang -Date: Tue, 14 Nov 2023 18:16:46 -0800 -Subject: [PATCH 3/5] Remove architectural limit on VA-API support - ---- - toolkit/moz.configure | 9 ++------- - 1 file changed, 2 insertions(+), 7 deletions(-) - -diff --git a/toolkit/moz.configure b/toolkit/moz.configure -index a4aa84cc7c45e..67fc08237bba4 100644 ---- a/toolkit/moz.configure -+++ b/toolkit/moz.configure -@@ -537,11 +537,8 @@ set_define("MOZ_WAYLAND", depends_if(wayland_headers)(lambda _: True)) - - # Hardware-accelerated video decode with VAAPI and V4L2 on Linux - # ============================================================== --@depends(target, toolkit_gtk) --def vaapi(target, toolkit_gtk): -- # VAAPI is mostly used on x86(-64) but is sometimes used on ARM/ARM64 SOCs. -- if target.cpu in ("arm", "aarch64", "x86", "x86_64") and toolkit_gtk: -- return True -+set_config("MOZ_ENABLE_VAAPI", True, when=toolkit_gtk) -+set_define("MOZ_ENABLE_VAAPI", True, when=toolkit_gtk) - - - @depends(target, toolkit_gtk) -@@ -552,9 +549,7 @@ def v4l2(target, toolkit_gtk): - return True - - --set_config("MOZ_ENABLE_VAAPI", True, when=vaapi) - set_config("MOZ_ENABLE_V4L2", True, when=v4l2) --set_define("MOZ_ENABLE_VAAPI", True, when=vaapi) - set_define("MOZ_ENABLE_V4L2", True, when=v4l2) - - --- -2.43.0 - diff --git a/firefox/0005-Fix-libyuv-build-with-LSX-LASX.patch b/firefox/0004-Fix-libyuv-build-with-LSX-LASX.patch similarity index 98% rename from firefox/0005-Fix-libyuv-build-with-LSX-LASX.patch rename to firefox/0004-Fix-libyuv-build-with-LSX-LASX.patch index f69d1ab983..b1a37bd933 100644 --- a/firefox/0005-Fix-libyuv-build-with-LSX-LASX.patch +++ b/firefox/0004-Fix-libyuv-build-with-LSX-LASX.patch @@ -1,7 +1,7 @@ -From 7a3c2cbce2b6cf951c94850596dac20b5c3a98dc Mon Sep 17 00:00:00 2001 +From 4afef7f509cae7267407c6b5b839aba955cd66d1 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Sun, 31 Dec 2023 13:16:33 +0800 -Subject: [PATCH 5/5] Fix libyuv build with LSX & LASX +Subject: [PATCH 4/4] Fix libyuv build with LSX & LASX This is not of upstream quality, and will not be upstreamed as-is. This is only meant as a quick-and-dirty build fix for LoongArch early @@ -15,7 +15,7 @@ adopters. 5 files changed, 92 insertions(+), 28 deletions(-) diff --git a/media/libyuv/libyuv/BUILD.gn b/media/libyuv/libyuv/BUILD.gn -index a72ff06558000..7d70848be9f1a 100644 +index a72ff06..7d70848 100644 --- a/media/libyuv/libyuv/BUILD.gn +++ b/media/libyuv/libyuv/BUILD.gn @@ -69,6 +69,14 @@ group("libyuv") { @@ -77,7 +77,7 @@ index a72ff06558000..7d70848be9f1a 100644 config("libyuv_unittest_warnings_config") { if (!is_win) { diff --git a/media/libyuv/libyuv/libyuv.gni b/media/libyuv/libyuv/libyuv.gni -index 852f08ca9d61f..ecad693508811 100644 +index 852f08c..ecad693 100644 --- a/media/libyuv/libyuv/libyuv.gni +++ b/media/libyuv/libyuv/libyuv.gni @@ -20,4 +20,6 @@ declare_args() { @@ -88,7 +88,7 @@ index 852f08ca9d61f..ecad693508811 100644 + libyuv_use_lasx = current_cpu == "loong64" || current_cpu == "loongarch64" } diff --git a/media/libyuv/libyuv/libyuv.gypi b/media/libyuv/libyuv/libyuv.gypi -index 48936aa7b0239..9c19abf9c34c9 100644 +index 48936aa..9c19abf 100644 --- a/media/libyuv/libyuv/libyuv.gypi +++ b/media/libyuv/libyuv/libyuv.gypi @@ -18,6 +18,7 @@ @@ -125,7 +125,7 @@ index 48936aa7b0239..9c19abf9c34c9 100644 'source/scale_neon.cc', 'source/scale_neon64.cc', diff --git a/media/libyuv/libyuv/source/row_lasx.cc b/media/libyuv/libyuv/source/row_lasx.cc -index 29ac9254d9924..8c325483b116a 100644 +index 29ac925..8c32548 100644 --- a/media/libyuv/libyuv/source/row_lasx.cc +++ b/media/libyuv/libyuv/source/row_lasx.cc @@ -543,8 +543,8 @@ void I422ToARGB4444Row_LASX(const uint8_t* src_y, @@ -264,7 +264,7 @@ index 29ac9254d9924..8c325483b116a 100644 } // extern "C" } // namespace libyuv diff --git a/media/libyuv/libyuv/source/row_lsx.cc b/media/libyuv/libyuv/source/row_lsx.cc -index 9c1e16f22e02d..91221ff03ca29 100644 +index 9c1e16f..91221ff 100644 --- a/media/libyuv/libyuv/source/row_lsx.cc +++ b/media/libyuv/libyuv/source/row_lsx.cc @@ -407,7 +407,7 @@ void ARGB1555ToUVRow_LSX(const uint8_t* src_argb1555, diff --git a/firefox/PKGBUILD b/firefox/PKGBUILD index cb2ecff444..26f4516441 100644 --- a/firefox/PKGBUILD +++ b/firefox/PKGBUILD @@ -65,9 +65,8 @@ source=( identity-icons-brand.svg 0001-Add-support-for-LoongArch64.patch 0002-Enable-VA-API-support-for-AMD-GPUs.patch - 0003-Remove-architectural-limit-on-VA-API-support.patch - 0004-Enable-WebRTC-for-LoongArch.patch - 0005-Fix-libyuv-build-with-LSX-LASX.patch + 0003-Enable-WebRTC-for-LoongArch.patch + 0004-Fix-libyuv-build-with-LSX-LASX.patch ) validpgpkeys=( # Mozilla Software Releases @@ -78,20 +77,18 @@ sha256sums=('b84815a90e147965e4c0b50599c85b1022ab0fce42105e5ef45c630dcca5dec3' 'SKIP' '1f241fdc619f92a914c75aece7c7c717401d7467c9a306458e106b05f34e5044' 'a9b8b4a0a1f4a7b4af77d5fc70c2686d624038909263c795ecc81e0aec7711e9' - '94ccc1e5efe217a8491bea4f3d80c962ccda2ebb3203f67e4d995190d29b0544' - 'ab6eb723cb2b70831cf4e66d6e315e0842f77467812c67d5de2365fc5117c320' - '779cafabc2c738dc26a1f945695802f038af916d6b86ede9493b1cceca7e7428' - '322d0fb02661018d819f5db218b94f8f680b0e5bc6a3648db35de465431590cc' - 'f7b56a3bd993b0a0c05f305c40fabcc4af62c68d43097c0731db9525ab6156bd') + 'baf2381cce9d0a99eade7e6d8b2a9566cfaa292e6b11cf1a7527f048875b81d4' + '7d37addd28ab5d84a15d236aba916bd4ba9c81442eb92f430e023928aecf4c22' + '9b8141c6731216ea21e76c6efa1de3381bf8ec62b6fdd030d5c8e1e4829e62b5' + 'e782d4b2ea14a5b662dab21e449923a425fdace096375244ac4750e0466d9fd8') b2sums=('7252cd58fef9f5fcb504c8c9f885567109c05e6ec92157459cc384edc6935adb206e3be0b805aeaa37dbd72656c3243db1291b745dd0f705f37a61319a4dc820' 'SKIP' 'd07557840097dd48a60c51cc5111950781e1c6ce255557693bd11306c7a9258b2a82548329762148f117b2295145f9e66e0483a18e2fe09c5afcffed2e4b8628' '63a8dd9d8910f9efb353bed452d8b4b2a2da435857ccee083fc0c557f8c4c1339ca593b463db320f70387a1b63f1a79e709e9d12c69520993e26d85a3d742e34' - 'cab0bf0922520866aa9fddf9142512a0ff30437ab779dc4e266b278aea363d4077db5edad11ac30190ee69cc321ec6a7ab3eea8003982faeac991389417af7e9' - '19dc9f0e2aa13be99f7226dbf1e80eee67bbce3ac3ed8256894158565d62324589bc075df402bc43f5d597cc2de60bed6d68b58e20e9caa1f34776f680fe45b5' - 'bb2658edb90dc022df36d89206789ef30222ff1b26376b61b2340d421738fe240063bb7113c9deb828f00e8e297bda8b87e2da9d26796aa7fd4d48bef1aa0719' - '65e9739926174b3eecb8e01cae9805861f6a7c2cadf9faef7cda92acbef3a569a31b2e8c0f4f8ab726416a60e601e7a2a4eb1fc6cb74dfd2cb4ab1572c1fdd97' - 'e2fc795f224f34d14fab2655235a7e31b5fdbe84937ded697f6b1ccd1751bfb3a05ae6b46846201e201487d02a987322f3fc36cdd3208d4904c844caf3f2d628') + '8c19b8be27f5b333321e73bca80b1d1f3d87140f931d311f3cadc1dc18a4d5cf98f02448a8e8640d40186c3545dba2f61b7d47d7ceed23d02c9bd147d60c9009' + '3dc11f75ab18dfb5a02e6de36adb04e5584b324a7de1d2890afc7eaf8bc754f447846009a24ed0d532289f524a71dadc6bf0130b8cb893c91d3d581885818346' + '070ae992fe6f2d5a1881feb2a974e501048cc7ac2c4be472994d97381791693be53ab8ba65546008cacfc8c3197325dc46a0db8e42f9b08caa9e1f7d99bbe688' + 'cecd147e5825dcbc4694a53f77942fc808893cee0afb9cb826803d3f79887eee8d4f98178788b2319434dd0554540a43e7302b6a1cc1f35fef3d650635cdd79f') # Google API keys (see http://www.chromium.org/developers/how-tos/api-keys) # Note: These are for Arch Linux use ONLY. For your own distribution, please @@ -109,12 +106,10 @@ prepare() { mkdir mozbuild cd firefox-$pkgver -# patch -Np1 -i ../firefox-118-loong.patch patch -Np1 -i ../0001-Add-support-for-LoongArch64.patch patch -Np1 -i ../0002-Enable-VA-API-support-for-AMD-GPUs.patch - patch -Np1 -i ../0003-Remove-architectural-limit-on-VA-API-support.patch - patch -Np1 -i ../0004-Enable-WebRTC-for-LoongArch.patch - patch -Np1 -i ../0005-Fix-libyuv-build-with-LSX-LASX.patch + patch -Np1 -i ../0003-Enable-WebRTC-for-LoongArch.patch + patch -Np1 -i ../0004-Fix-libyuv-build-with-LSX-LASX.patch echo -n "$_google_api_key" >google-api-key echo -n "$_mozilla_api_key" >mozilla-api-key @@ -130,7 +125,7 @@ ac_add_options --enable-optimize ac_add_options --enable-rust-simd #ac_add_options --enable-linker=lld ac_add_options --disable-install-strip -ac_add_options --disable-elf-hack +#ac_add_options --disable-elf-hack ac_add_options --disable-bootstrap ac_add_options --without-wasm-sandboxed-libraries @@ -162,7 +157,6 @@ END } build() { - set -x cd firefox-$pkgver export MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE=pip From dbbefe9b391152ae30392e97e47e025150295a69 Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Thu, 14 Mar 2024 20:10:41 +0800 Subject: [PATCH 12/23] cocogitto --- cocogitto/PKGBUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cocogitto/PKGBUILD b/cocogitto/PKGBUILD index c12465f811..1286652052 100644 --- a/cocogitto/PKGBUILD +++ b/cocogitto/PKGBUILD @@ -20,7 +20,7 @@ sha256sums=('2a0e332b7028ffcfeb113c734b4bf506c34362730e371b03a3e4a71142099330') prepare() { cd "$_archive" -# cargo fetch --locked --target "`uname -m`-unknown-linux-gnu" + cargo fetch --locked --target "$(rustc -vV | sed -n 's/host: //p')" mkdir {completions,man} } From 6e4e9c329c873ccde474af9c8c7d09c28a9ff581 Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Thu, 14 Mar 2024 20:45:19 +0800 Subject: [PATCH 13/23] deepin-grand-search --- deepin-grand-search/62.patch | 126 ----------------------------------- deepin-grand-search/PKGBUILD | 4 +- 2 files changed, 1 insertion(+), 129 deletions(-) delete mode 100644 deepin-grand-search/62.patch diff --git a/deepin-grand-search/62.patch b/deepin-grand-search/62.patch deleted file mode 100644 index 7c83b5a7be..0000000000 --- a/deepin-grand-search/62.patch +++ /dev/null @@ -1,126 +0,0 @@ -From 66dd5c6c79922ee5366d9bf09e8a9879f7231306 Mon Sep 17 00:00:00 2001 -From: xzl -Date: Thu, 7 Sep 2023 16:05:54 +0800 -Subject: [PATCH] fix: fix build error in v23 - -Log: ---- - src/grand-search-daemon/main.cpp | 1 + - .../exhibition/matchresult/listview/grandsearchlistdelegate.cpp | 1 + - .../gui/exhibition/matchresult/listview/grandsearchlistview.cpp | 1 + - src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp | 1 + - src/grand-search/gui/searchconfig/bestmatchwidget.cpp | 1 + - .../gui/searchconfig/blacklistview/blacklistview.cpp | 1 + - src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp | 1 + - src/libgrand-search-daemon/main.cpp | 1 + - src/preview-plugin/audio-preview/audioview.cpp | 1 + - 9 files changed, 9 insertions(+) - -diff --git a/src/grand-search-daemon/main.cpp b/src/grand-search-daemon/main.cpp -index a2aada02..87bab961 100644 ---- a/src/grand-search-daemon/main.cpp -+++ b/src/grand-search-daemon/main.cpp -@@ -10,6 +10,7 @@ - #include - - #include -+#include - - #include - #include -diff --git a/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistdelegate.cpp b/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistdelegate.cpp -index 26cb863b..b71278c9 100755 ---- a/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistdelegate.cpp -+++ b/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistdelegate.cpp -@@ -28,6 +28,7 @@ - #define TailMaxWidth 150 // 拖尾信息最大显示宽度 - - DWIDGET_USE_NAMESPACE -+DGUI_USE_NAMESPACE - using namespace GrandSearch; - - GrandSearchListDelegate::GrandSearchListDelegate(QAbstractItemView *parent) -diff --git a/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistview.cpp b/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistview.cpp -index f0133d41..d665d6b5 100755 ---- a/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistview.cpp -+++ b/src/grand-search/gui/exhibition/matchresult/listview/grandsearchlistview.cpp -@@ -20,6 +20,7 @@ - - using namespace GrandSearch; - DCORE_USE_NAMESPACE -+DGUI_USE_NAMESPACE - DWIDGET_USE_NAMESPACE - - #define ICON_SIZE 24 -diff --git a/src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp b/src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp -index 2d6acdc8..16e3a19e 100644 ---- a/src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp -+++ b/src/grand-search/gui/exhibition/preview/generalpreviewplugin.cpp -@@ -25,6 +25,7 @@ - using namespace GrandSearch; - - DWIDGET_USE_NAMESPACE -+DGUI_USE_NAMESPACE - - NameLabel::NameLabel(const QString &text, QWidget *parent, Qt::WindowFlags f): - QLabel(text, parent, f) -diff --git a/src/grand-search/gui/searchconfig/bestmatchwidget.cpp b/src/grand-search/gui/searchconfig/bestmatchwidget.cpp -index 85e9d7cb..37606ebb 100644 ---- a/src/grand-search/gui/searchconfig/bestmatchwidget.cpp -+++ b/src/grand-search/gui/searchconfig/bestmatchwidget.cpp -@@ -14,6 +14,7 @@ - #include - - DWIDGET_USE_NAMESPACE -+DGUI_USE_NAMESPACE - using namespace GrandSearch; - - BestMatchWidget::BestMatchWidget(QWidget *parent) -diff --git a/src/grand-search/gui/searchconfig/blacklistview/blacklistview.cpp b/src/grand-search/gui/searchconfig/blacklistview/blacklistview.cpp -index 1473e4c8..eb634305 100644 ---- a/src/grand-search/gui/searchconfig/blacklistview/blacklistview.cpp -+++ b/src/grand-search/gui/searchconfig/blacklistview/blacklistview.cpp -@@ -26,6 +26,7 @@ DCORE_USE_NAMESPACE - #define InitCount 7 // 初始显示数量 - - DWIDGET_USE_NAMESPACE -+DGUI_USE_NAMESPACE - using namespace GrandSearch; - - BlackListView::BlackListView(QWidget *parent) -diff --git a/src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp b/src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp -index 1449f095..ba0efbfa 100644 ---- a/src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp -+++ b/src/grand-search/gui/searchconfig/switchwidget/switchwidget.cpp -@@ -12,6 +12,7 @@ - #define ICONLABELSIZE 36 - - DWIDGET_USE_NAMESPACE -+DGUI_USE_NAMESPACE - using namespace GrandSearch; - - SwitchWidget::SwitchWidget(const QString &title, QWidget *parent) -diff --git a/src/libgrand-search-daemon/main.cpp b/src/libgrand-search-daemon/main.cpp -index 1dba137b..53aab763 100644 ---- a/src/libgrand-search-daemon/main.cpp -+++ b/src/libgrand-search-daemon/main.cpp -@@ -13,6 +13,7 @@ - #include - #include - #include -+#include - - GRANDSEARCH_USE_NAMESPACE - DCORE_USE_NAMESPACE -diff --git a/src/preview-plugin/audio-preview/audioview.cpp b/src/preview-plugin/audio-preview/audioview.cpp -index d50fde46..2e7d8cd7 100644 ---- a/src/preview-plugin/audio-preview/audioview.cpp -+++ b/src/preview-plugin/audio-preview/audioview.cpp -@@ -19,6 +19,7 @@ - #define MARGIN_SIZE 15 - - DWIDGET_USE_NAMESPACE -+DGUI_USE_NAMESPACE - GRANDSEARCH_USE_NAMESPACE - using namespace GrandSearch::audio_preview; - diff --git a/deepin-grand-search/PKGBUILD b/deepin-grand-search/PKGBUILD index 86f4e48c7d..469780e349 100644 --- a/deepin-grand-search/PKGBUILD +++ b/deepin-grand-search/PKGBUILD @@ -13,8 +13,7 @@ depends=(deepin-anything deepin-application-manager taglib ffmpeg icu deepin-pdf makedepends=(cmake deepin-dock ninja qt5-tools) groups=(deepin-extra) source=("$pkgname-$pkgver.tar.gz::https://github.com/linuxdeepin/dde-grand-search/archive/$pkgver.tar.gz" - taglib-2.patch - 62.patch) + taglib-2.patch) sha512sums=('7d2bd203b9c0dfef57a0667690252a9b3b3f3b5b2e30f44f6706de0d98885908f21f982fd19257812b92a0564e4e7888f8a6789bee2aa5ac2c573a2cadf0b838' '8364cd5aa0350a7d109be7ce10035c6c4e3fd6686205bc880017b1fc93a10cff6e78a8f66daeb25427c416a6dc075482136146c9d8278aee6de71653673d59a0') @@ -23,7 +22,6 @@ prepare() { # https://github.com/linuxdeepin/dde-grand-search/pull/65 sed -i 's/-fPIE -pie//g' src/*/CMakeLists.txt - patch -p1 -i $srcdir/62.patch patch -p1 -i ../taglib-2.patch } From 25721e6546581b764c82da617aaddd0ff305d418 Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Thu, 14 Mar 2024 21:04:18 +0800 Subject: [PATCH 14/23] pixman --- pixman/83.patch | 6 +++--- pixman/PKGBUILD | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pixman/83.patch b/pixman/83.patch index 34c100bbe7..0cd9bb9b37 100644 --- a/pixman/83.patch +++ b/pixman/83.patch @@ -2288,7 +2288,7 @@ index 0000000..d6d0169 + */ + +#ifdef HAVE_CONFIG_H -+#include ++#include +#endif + +#include "pixman-private.h" @@ -7174,7 +7174,7 @@ index 0000000..a77211c + */ + +#ifdef HAVE_CONFIG_H -+#include ++#include +#endif + +#include "pixman-private.h" @@ -7281,7 +7281,7 @@ index 0000000..a4c261a + */ + +#ifdef HAVE_CONFIG_H -+#include ++#include +#endif + +#include "pixman-private.h" diff --git a/pixman/PKGBUILD b/pixman/PKGBUILD index 5358758bb8..43007d07ec 100644 --- a/pixman/PKGBUILD +++ b/pixman/PKGBUILD @@ -15,8 +15,9 @@ provides=('libpixman-1.so') source=(https://xorg.freedesktop.org/releases/individual/lib/${pkgname}-${pkgver}.tar.xz 83.patch) sha512sums=('1a1d21b86b3c6784c4c2606b7069723b4dbb747eac9fce95bca86516787840379ffd49abc42d11e7143e85c32c85496f33c2562c7a910ca6f963599affdc3224' - '0b7970cd955e31211fa55882974bbb321b0e7421da5c06a2cff196ea3e0efcefce50b4ba5f19a23417aaea2145529c762800dbb5887dfdcfa240efe400d44f7f') + 'f0ddc083f1cafaebdaf3a3b2d3b9c85b55f3354c7c115b4eff31eb46ddbf71cd6bedea207e10a4692ba73e9857e2e8b49e197de1a037589bb842ee666e829a83') #validpgpkeys=('') # Maarten Lankhorst +options=(!lto) prepare() { cd "$pkgname-$pkgver" From 177d138c25a8fb1d70ec5a49eac8804b2c330b64 Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Mon, 1 Apr 2024 14:07:55 +0800 Subject: [PATCH 15/23] update --- 0ad/PKGBUILD | 4 +- arch-wiki-docs/PKGBUILD | 2 +- avahi/PKGBUILD | 2 +- blender/PKGBUILD | 3 +- clang/PKGBUILD | 4 +- dbeaver-plugin-apache-poi/PKGBUILD | 4 +- discord/PKGBUILD | 4 +- electron25/PKGBUILD | 9 +- firefox-developer-edition/PKGBUILD | 19 +++- gcc12/PKGBUILD | 5 +- glslang/PKGBUILD | 2 +- keybase/PKGBUILD | 7 +- libretro-mame/PKGBUILD | 2 +- libvirt/PKGBUILD | 10 +- libxslt/PKGBUILD | 2 +- linux-hardened/PKGBUILD | 4 +- llvm/PKGBUILD | 2 +- lua-yaml/PKGBUILD | 2 +- nsxiv/PKGBUILD | 2 +- ocaml/PKGBUILD | 8 +- ocaml/ocaml-5.1.0-la64.patch | 169 ++++++++++++++++++----------- 21 files changed, 154 insertions(+), 112 deletions(-) diff --git a/0ad/PKGBUILD b/0ad/PKGBUILD index 453c704d8a..b10879627c 100644 --- a/0ad/PKGBUILD +++ b/0ad/PKGBUILD @@ -19,7 +19,7 @@ source=("https://releases.wildfiregames.com/$pkgname-$_pkgver-unix-build.tar.xz" fix_python_3.11_ftbfs.patch https://github.com/0ad/0ad/commit/839edc3a.patch https://github.com/0ad/0ad/commit/093e1eb2.patch - https://github.com/0ad/0ad/commit/d2426312.patch) + https://github.com/0ad/0ad/commit/d2426312.patch 0ad-fix-build.patch 0ad-la64.patch) sha512sums=('aaf647d5d8454c244015d2a198beeaaebc571a5bc96446f3acff8dbd05f9843029c500bf4162651a5e1fcdb42bd5fb5b4f5c512c78372479fbd8565dd093f272' @@ -27,7 +27,7 @@ sha512sums=('aaf647d5d8454c244015d2a198beeaaebc571a5bc96446f3acff8dbd05f9843029c '748a75420541947e2a215b3a8789a0e137179e4981d0977e1c4b20cd7b86af2d96b9976e04d60ace8d5ee465d542cadc42ee9bceedaaa97d2b320f533e3e3892' '1dfc8a0c6ac29040f72d9bbf6b631a74cbdec444b9078a015345139228666354d9b5059f85b640ce3afc0f590bcbe8afd5e158509a0c95751e1cd69fece46876' 'a7fd1454385f56b7c8cb0fc6ac001761d4419df4aeec570ba846c7df4eb327d25b9ff1a7946cb334315109fa90ca2c1820583619f4e1ec5d53805afa08e10093' - '5f32d47f01d845e07b2f919c9b04ac5e50dc9977fa97f981eba4a53677a29d797d0d76bc385ac047dd7c7d24af7d95cd8256d433bd43ce1a6606763c0ea736cb') + '5f32d47f01d845e07b2f919c9b04ac5e50dc9977fa97f981eba4a53677a29d797d0d76bc385ac047dd7c7d24af7d95cd8256d433bd43ce1a6606763c0ea736cb' '956effc37bbad8ca44a5e82a8750ca82c1c2347152dd684ebc2921953d4fa81ef9291b5bb5de05559b2b4ef79c336b837216892f0bcf806e50aac8c4ea42edde' 'c01e52a4241736eda82f6002c3627d9c4b5b505109969fc608d95dd71db8681df8f3de6a372bca8fe977bee14f5180f4c27681e40d26b0a06ddc556122886d04') diff --git a/arch-wiki-docs/PKGBUILD b/arch-wiki-docs/PKGBUILD index 412708374a..2be93f6f7d 100644 --- a/arch-wiki-docs/PKGBUILD +++ b/arch-wiki-docs/PKGBUILD @@ -2,7 +2,7 @@ # Maintainer: Sergej Pupykin pkgname=arch-wiki-docs -pkgver=20240103 +pkgver=20240207 pkgrel=1 pkgdesc='Pages from Arch Wiki optimized for offline browsing' arch=('any') diff --git a/avahi/PKGBUILD b/avahi/PKGBUILD index b2cb0dbe02..4e67c866b8 100644 --- a/avahi/PKGBUILD +++ b/avahi/PKGBUILD @@ -3,7 +3,7 @@ # Contributor: Douglas Soares de Andrade pkgname=avahi -pkgver=0.8+r194+g3f79789 +pkgver=0.8+r194+g3f79789c pkgrel=1 epoch=1 pkgdesc="Service Discovery for Linux using mDNS/DNS-SD (compatible with Bonjour)" diff --git a/blender/PKGBUILD b/blender/PKGBUILD index 41c4cff075..2b55992fce 100644 --- a/blender/PKGBUILD +++ b/blender/PKGBUILD @@ -18,7 +18,7 @@ url="https://www.blender.org" depends=('libpng' 'libtiff' 'openexr' 'python' 'desktop-file-utils' 'python-requests' 'potrace' 'shared-mime-info' 'hicolor-icon-theme' 'xdg-utils' 'glew' 'openjpeg2' 'python-numpy' 'freetype2' 'openal' 'ffmpeg' 'fftw' 'boost-libs' 'opencollada' 'alembic' 'openxr' - 'openimageio' 'libsndfile' 'jack' 'opencolorio' + 'openimageio' 'libsndfile' 'jack' 'opencolorio' 'materialx' 'jemalloc' 'libspnav' 'ptex' 'opensubdiv' 'openvdb' 'sdl2' 'libharu' 'draco' 'level-zero-loader' 'libxkbcommon' 'libepoxy' 'openshadinglanguage' 'intel-oneapi-compiler-shared-runtime-libs' 'intel-oneapi-compiler-dpcpp-cpp-runtime-libs') @@ -116,7 +116,6 @@ build() { -DWITH_PYTHON_INSTALL=OFF \ -DOCLOC_INSTALL_DIR=/usr \ -DUSD_ROOT_DIR=/usr \ - -DWITH_MATERIALX=OFF \ -DWITH_CYCLES=OFF \ -DSYCL_OFFLINE_COMPILER_PARALLEL_JOBS=8 cmake --build build diff --git a/clang/PKGBUILD b/clang/PKGBUILD index 2604666164..f7242aef54 100644 --- a/clang/PKGBUILD +++ b/clang/PKGBUILD @@ -3,7 +3,7 @@ pkgname=clang pkgver=16.0.6 -pkgrel=2 +pkgrel=3 pkgdesc="C language family frontend for LLVM" arch=('loong64' 'x86_64') url="https://clang.llvm.org/" @@ -34,7 +34,7 @@ sha256sums=('1186b6e6eefeadd09912ed73b3729e85b59f043724bb2818a95a2ec024571840' 'SKIP' '15f5b9aeeba938530af977d5f9205612737a091a7f0f6c8075df8723b7713f70' 'SKIP' - 'c102e8a6a2adb0e8729865ffb8799b22bb8a9bdf0f421991880fa4393378370a' + '0d4dc477f5a28f9f16639dc094b6d9bc14228d5de771547394799d2d5f8cd1df' '45da5783f4e89e4507a351ed0ffbbe6ec240e21ff7070797a89c5ccf434ac612') validpgpkeys=('474E22316ABF4785A88C6E8EA2C794A986419D8A' # Tom Stellard 'D574BD5D1D0E98895E3BF90044F2485E45D59042') # Tobias Hieta diff --git a/dbeaver-plugin-apache-poi/PKGBUILD b/dbeaver-plugin-apache-poi/PKGBUILD index ae386c042b..fa66ccc803 100644 --- a/dbeaver-plugin-apache-poi/PKGBUILD +++ b/dbeaver-plugin-apache-poi/PKGBUILD @@ -1,7 +1,7 @@ # Maintainer: Muflone http://www.muflone.com/contacts/english/ pkgname=dbeaver-plugin-apache-poi -pkgver=5.2.4 +pkgver=5.2.5 pkgrel=1 pkgdesc='DBeaver library for Microsoft Office documents' arch=('any') @@ -11,7 +11,7 @@ makedepends=('unzip') depends=('dbeaver>=4.2.5') source=("https://dbeaver.io/update/ce/latest/plugins/org.jkiss.bundle.apache.poi_${pkgver}.jar" "${pkgname}.info") -sha256sums=('59061d440466b1869528adc1ee7c934421e9cae8008a8caa3801d34967ec3735' +sha256sums=('1f979af0c82db69228a116bce039a42f1a0984e4251bfd4e2f4c6ea63699f1de' '5a280fb2b234f76842c3e3a41d08cfecb0f159cf5ca322b3f0001755e7093150') noextract=("org.jkiss.bundle.apache.poi_${pkgver}.jar") diff --git a/discord/PKGBUILD b/discord/PKGBUILD index 3147b4cbdc..bd9e92c851 100644 --- a/discord/PKGBUILD +++ b/discord/PKGBUILD @@ -19,8 +19,8 @@ source=("https://dl.discordapp.net/apps/linux/$pkgver/$pkgname-$pkgver.tar.gz" "LICENSE-$pkgver.html::https://discordapp.com/terms" "OSS-LICENSES-$pkgver.html::https://discordapp.com/licenses") sha512sums=('ec11acfe3f96762cd7c230eb38e098818d9ebf32ff530f061ed3dbb5d08c3303bb4d20206510e3621f21ba89c08474367c13e7fd3c46f80b7229db37fb81db6c' - 'a47ee83d4878f936a42faaa97d87f8a23261be1d5a5fc1c072be6b8a1fd505ccb7a3b39a8f7675677a241421f80b4cc57957d32eaf1fee96f91e0fe976cce167' - '4587af901dc20a843f05ba54411c7d9a39d6976e4c3689fb94c504d6ed8af9dd46aa54c2b87a9dcdee1dd4bcbf79d68be66556ce9d329959f849cde2a8522e4a') + '6fd75707e5ddf52454b0172309a820def89a0d400336bbe40c9176eaebcf6d65d2d083636c76a2595a14193508254520007bc8d31e14f4be929334a5f3bf91bb' + 'c1de9d8abf32a750c27c44232b1f1f8f8b569c1a16fd081587efc5a8bef8a124b278e333f9cdd62341f4e2fdc60fb7a0e7d7d13213315feee6058f4a225258ba') prepare() { cd $_pkgname diff --git a/electron25/PKGBUILD b/electron25/PKGBUILD index 4565753c8d..95cff093fb 100644 --- a/electron25/PKGBUILD +++ b/electron25/PKGBUILD @@ -63,7 +63,7 @@ source=("git+https://github.com/electron/electron.git#tag=v$pkgver" use-system-libraries-in-node.patch libxml2-2.12.patch icu-74.patch - electron-la64.patch +#electron-la64.patch ) # shellcheck disable=SC2034 sha256sums=('SKIP' @@ -80,10 +80,7 @@ sha256sums=('SKIP' 'ff588a8a4fd2f79eb8a4f11cf1aa151298ffb895be566c57cc355d47f161f53f' 'bfae9e773edfd0ddbc617777fdd4c0609cba2b048be7afe40f97768e4eb6117e' '547e092f6a20ebd15e486b31111145bc94b8709ec230da89c591963001378845') - '621ed210d75d0e846192c1571bb30db988721224a41572c27769c0288d361c11' - '1b782b0f6d4f645e4e0daa8a4852d63f0c972aa0473319216ff04613a0592a69' - 'ba4dd0a25a4fc3267ed19ccb39f28b28176ca3f97f53a4e9f5e9215280040ea0' - '671fd958b429414a66c209c8b91b6876a77bf4ed38244044ba14703de3f02a66') +# '671fd958b429414a66c209c8b91b6876a77bf4ed38244044ba14703de3f02a66') # Possible replacements are listed in build/linux/unbundle/replace_gn_files.py # Keys are the names in the above script; values are the dependencies in Arch @@ -152,7 +149,7 @@ EOF pushd src/electron patch -Np1 -i ../../std-vector-non-const.patch - patch -Np1 -i ../../electron-la64.patch +# patch -Np1 -i ../../electron-la64.patch popd echo "Running hooks..." diff --git a/firefox-developer-edition/PKGBUILD b/firefox-developer-edition/PKGBUILD index 8873766e38..585d38e252 100644 --- a/firefox-developer-edition/PKGBUILD +++ b/firefox-developer-edition/PKGBUILD @@ -26,7 +26,7 @@ makedepends=( imake inetutils jack - lld +# lld llvm mesa nasm @@ -75,7 +75,12 @@ sha256sums=('535c880e5f894a75c83bb9399120302fb213df6723dbbf233e28d2904246409a' 'SKIP' '6522f3eeefbd3550e3bb7dafb4c5c0cef7be0eec11ef3da79e0562b96edc04e3' 'a9b8b4a0a1f4a7b4af77d5fc70c2686d624038909263c795ecc81e0aec7711e9' - '294e6ec79b0a64b5cb2372dbe7a5f9191dd1f68d7aece244e208581a98db8fb3') + '294e6ec79b0a64b5cb2372dbe7a5f9191dd1f68d7aece244e208581a98db8fb3' + '94ccc1e5efe217a8491bea4f3d80c962ccda2ebb3203f67e4d995190d29b0544' + 'ab6eb723cb2b70831cf4e66d6e315e0842f77467812c67d5de2365fc5117c320' + '779cafabc2c738dc26a1f945695802f038af916d6b86ede9493b1cceca7e7428' + '322d0fb02661018d819f5db218b94f8f680b0e5bc6a3648db35de465431590cc' + 'f7b56a3bd993b0a0c05f305c40fabcc4af62c68d43097c0731db9525ab6156bd') b2sums=('4eeb4ea242b9187abafb8e580f2038747bc2962230fa598a4de0f25f999ab378d92fc61fcea39165f0800cc0d89a2bc0fbccca9d92f28281eca979b576821393' 'SKIP' 'd2d14042a03ffcc5ed9212fca9cc167e8bfb2ba3f0d61a89441e033484cb914424d0f2544e0f1bc58992fee9cae03a73679352ee0fac9777fa5633ddc8d76e7d' @@ -105,10 +110,14 @@ prepare() { # Change install dir from 'firefox' to 'firefox-developer-edition' patch -Np1 -i ../firefox-install-dir.patch - patch -Np1 -i ../0001-Add-support-for-LoongArch64.patch + echo "aaaaa" +# patch -Np1 -i ../0001-Add-support-for-LoongArch64.patch + echo "bbbb" patch -Np1 -i ../0002-Enable-VA-API-support-for-AMD-GPUs.patch - patch -Np1 -i ../0003-Remove-architectural-limit-on-VA-API-support.patch - patch -Np1 -i ../0004-Enable-WebRTC-for-LoongArch.patch +# patch -Np1 -i ../0003-Remove-architectural-limit-on-VA-API-support.patch + echo "cccc" +# patch -Np1 -i ../0004-Enable-WebRTC-for-LoongArch.patch + echo "dddd" patch -Np1 -i ../0005-Fix-libyuv-build-with-LSX-LASX.patch echo -n "$_google_api_key" >google-api-key diff --git a/gcc12/PKGBUILD b/gcc12/PKGBUILD index ae16e7ecd7..bcd9d39be0 100644 --- a/gcc12/PKGBUILD +++ b/gcc12/PKGBUILD @@ -39,6 +39,7 @@ options=(!emptydirs !lto) _libdir=usr/lib/gcc/$CHOST/${pkgver%%+*} source=(git+https://sourceware.org/git/gcc.git#commit=${_commit} c89 c99 + gcc-12-loong64.patch ) validpgpkeys=(F3691687D867B81B51CE07D9BBE43771487328A9 # bpiotrowski@archlinux.org 86CFFCA918CF3AF47147588051E8B148A9999C34 # evangelos@foutrelis.com @@ -46,7 +47,8 @@ validpgpkeys=(F3691687D867B81B51CE07D9BBE43771487328A9 # bpiotrowski@archlinux. D3A93CAD751C2AF4F8C7AD516C35B99309B5FA62) # Jakub Jelinek sha256sums=('SKIP' 'de48736f6e4153f03d0a5d38ceb6c6fdb7f054e8f47ddd6af0a3dbf14f27b931' - '2513c6d9984dd0a2058557bf00f06d8d5181734e41dcfe07be7ed86f2959622a') + '2513c6d9984dd0a2058557bf00f06d8d5181734e41dcfe07be7ed86f2959622a' + '4018da43b2cb96145557af4409a009c63c95d4986fd16bee021ea76142615b23') prepare() { [[ ! -d gcc ]] && ln -s gcc-${pkgver/+/-} gcc @@ -58,6 +60,7 @@ prepare() { # Arch Linux installs x86_64 libraries /lib sed -i '/m64=/s/lib64/lib/' gcc/config/i386/t-linux64 + patch -p1 -i $srcdir/gcc-12-loong64.patch mkdir -p "$srcdir/gcc-build" } diff --git a/glslang/PKGBUILD b/glslang/PKGBUILD index cbe377fb12..df136c8618 100644 --- a/glslang/PKGBUILD +++ b/glslang/PKGBUILD @@ -23,7 +23,7 @@ options=('staticlibs') source=(${pkgname}-${pkgver}.tar.gz::https://github.com/KhronosGroup/glslang/archive/${pkgver}.tar.gz ${pkgname}-3420.patch::https://github.com/KhronosGroup/glslang/pull/3420.patch) sha256sums=('1c4d0a5a38c8aaf89a2d7e6093be734320599f5a6775b2726beeb05b0c054e66' - '8930d3829bae4e0cd911bf63728d4d49d736d261af1e2cef912b769e6fa8373f') + '785a72531afe9dc17c88eb24a86c7137e15560f1dc4389aa811c5b0f4979029f') prepare() { cd ${pkgname}-${pkgver} diff --git a/keybase/PKGBUILD b/keybase/PKGBUILD index d8ab4a14f3..27240d999d 100644 --- a/keybase/PKGBUILD +++ b/keybase/PKGBUILD @@ -27,7 +27,7 @@ sha256sums=('22e5ae4d1f951ea9f3ffc3cb74de9b9f41b828b2c8a4e5cb6401de6fbccf497b' 'SKIP' '7459a6846ff24c2bf7e6ab1ce31880829cf2692f23ffb3bf77e455f4de7ca34e' '74fd7a777275bdf2128f121e27f722f692302a50d89c6c1d3ec82df1deaffee3' - '0059b988777ecf30a07bb982a164546ff83333cc9d869b0756ab3c034065b8a7') + '5a46d9433efb4244509d26fdf04340fb628de1d19a4dff6944510f9bba69d378') validpgpkeys=('222B85B0F90BE2D24CFEB93F47484E50656D16C7') # Keybase.io Code Signing (v1) prepare() { @@ -73,6 +73,11 @@ build() { export GOFLAGS="-buildmode=pie -trimpath -ldflags=-linkmode=external -mod=readonly -modcacherw" export GOPATH="$srcdir/.gopath" + go mod edit -replace=golang.org/x/sys=github.com/golang/sys@v0.0.0-20220622161953-175b2fd9d664 + go mod edit -replace=golang.org/x/net=github.com/golang/net@v0.0.0-20220622184535-263ec571b305 + go mod edit -replace=github.com/qrtz/nativemessaging=github.com/yetist/nativemessaging@v0.0.0-20240219130319-0aba78239ecd + go mod edit -replace=go.etcd.io/bbolt=go.etcd.io/bbolt@v1.3.7-0.20221114114133-eedea6cb26ef + go mod tidy go build -a -tags production -o ./bin/keybase github.com/keybase/client/go/keybase go build -a -tags production -o ./bin/kbnm github.com/keybase/client/go/kbnm go build -a -tags production -o ./bin/kbfsfuse github.com/keybase/client/go/kbfs/kbfsfuse diff --git a/libretro-mame/PKGBUILD b/libretro-mame/PKGBUILD index b76a54b16c..061e9c5006 100644 --- a/libretro-mame/PKGBUILD +++ b/libretro-mame/PKGBUILD @@ -2,7 +2,7 @@ # Contributor: Oliver Jaksch pkgname=libretro-mame -pkgver=89040 +pkgver=90047 pkgrel=1 arch=(loong64 x86_64) pkgdesc='MAME Arcade core' diff --git a/libvirt/PKGBUILD b/libvirt/PKGBUILD index 27232c604f..06bdf76b66 100644 --- a/libvirt/PKGBUILD +++ b/libvirt/PKGBUILD @@ -86,6 +86,7 @@ source=( libvirt-loongarch.patch ) sha256sums=('8ba2e72ec8bdd2418554a1474c42c35704c30174b7611eaf9a16544b71bcf00a' + 'SKIP' 'f0562941282b157e2ebba9d203c33f4f9c0f3f93562129448f7de6e5df0575fc') validpgpkeys=('453B65310595562855471199CA68BE8010084C9C') # Jiří Denemark @@ -187,12 +188,3 @@ package_libvirt-storage-iscsi-direct() { install -Dv -t "$pkgdir/usr/lib/libvirt/storage-backend" "$pkgdir/../libvirt_storage_backend_iscsi-direct.so" } - -package_libvirt-storage-rbd() { - pkgdesc="Libvirt RBD storage backend" - depends=("libvirt=$pkgver") - optdepends=() - backup=() - - install -Dv -t "$pkgdir/usr/lib/libvirt/storage-backend" "$pkgdir/../libvirt_storage_backend_rbd.so" -} diff --git a/libxslt/PKGBUILD b/libxslt/PKGBUILD index ef0aee7c8f..7489ccaef8 100644 --- a/libxslt/PKGBUILD +++ b/libxslt/PKGBUILD @@ -53,7 +53,7 @@ build() { cd libxslt ./configure "${configure_options[@]}" sed -i -e 's/ -shared / -Wl,-O1,--as-needed\0/g' libtool - make + make V=1 } check() { diff --git a/linux-hardened/PKGBUILD b/linux-hardened/PKGBUILD index 1c16ec51c9..2b0e16a3c2 100644 --- a/linux-hardened/PKGBUILD +++ b/linux-hardened/PKGBUILD @@ -44,10 +44,10 @@ validpgpkeys=( ) # https://www.kernel.org/pub/linux/kernel/v6.x/sha256sums.asc sha256sums=('d0e6ce60f0ccd162aabe130c00509590de790e33642a12ed4249aa08ac14f674' - '46a1e0e43247d09c5ae29cfa7a79e272767a49b90c5761c2e4a5656a4ced6cf2' + '79aa07a1108582118c5e4721b9b5440053791d7a98ceb9538d42a511e39097eb' 'c9b26d463e27257d6ad13e59d489db5bd9b103e506dc80d7917bf48471480c85') b2sums=('15b6b33c6fdac5329d56424afc09a722053f045ca1a1553d583d80296a20a3e545d6ac1fd7950e575816df3e2d1c1033aeba779aa374ee25406f05d284d56815' - '914edb986d34ddaa20738ec6d4f0d68b2500ee4662be3f58c1f62ecfa87f3ab88205acf91ec7d03d2f925880d538d0b1716183add857d2bff533e5a0d0596ba5' + 'ca79cea706454ee6aab3bfbc01d3067cef4fdfd49413c8bca52480596ec394d932a4e252b4bbcc3a605bd3a56b4b73493c47a7cdd3e984a5b42767fce0f1c025' 'd285dfd1304c9bc42f9fdaa18f8a393fc599be5d6144abe43959c8f63de7b8973821c50135ce81764428659ddd0eb634d7cce6da697cab9752311ad05dee2df6') export KBUILD_BUILD_HOST=archlinux diff --git a/llvm/PKGBUILD b/llvm/PKGBUILD index d4be161983..c56cbbdaad 100644 --- a/llvm/PKGBUILD +++ b/llvm/PKGBUILD @@ -3,7 +3,7 @@ pkgname=('llvm' 'llvm-libs') pkgver=16.0.6 -pkgrel=1 +pkgrel=2 arch=('loong64' 'x86_64') url="https://llvm.org/" license=('custom:Apache 2.0 with LLVM Exception') diff --git a/lua-yaml/PKGBUILD b/lua-yaml/PKGBUILD index fec793aaf0..9cc34452e1 100644 --- a/lua-yaml/PKGBUILD +++ b/lua-yaml/PKGBUILD @@ -20,7 +20,7 @@ checkdepends=("${_luadeps[@]/#/lua-}" lua-lut) options=(debug) _archive="$_rockname-REL-$pkgver" -_rock="${_archive/-REL}-$_rockrel.linux-$CARCH.rock" +_rock="${_archive/-REL}-$_rockrel.linux-`uname -m`.rock" _rockspec="${_archive/-REL}-$_rockrel.rockspec" source=("https://github.com/lubyk/$_rockname/archive/REL-$pkgver/$_archive.tar.gz") sha256sums=('b4391d182677ab644403bf1ac028c7421c2605db124f9792193013c582a273ec') diff --git a/nsxiv/PKGBUILD b/nsxiv/PKGBUILD index dbe10b71b0..514d6ddf97 100644 --- a/nsxiv/PKGBUILD +++ b/nsxiv/PKGBUILD @@ -15,7 +15,7 @@ depends=('imlib2' 'libx11' # core dependencies 'hicolor-icon-theme') # make icon source=("$pkgname-$pkgver.tar.gz"::"https://codeberg.org/nsxiv/nsxiv/archive/v$pkgver.tar.gz") -sha256sums=('49ef1eb775ef6c34f55dada7a3f446c9c5c6773c9e208509ffef27a656338a90') +sha256sums=('09d1d72b3cbcf17a04e26beb5e81acc9495aaba1f8f1be907bdcd8e4e3007db3') prepare() { cd "$pkgname" diff --git a/ocaml/PKGBUILD b/ocaml/PKGBUILD index a60a990534..91fbe0b6c0 100644 --- a/ocaml/PKGBUILD +++ b/ocaml/PKGBUILD @@ -11,15 +11,15 @@ url="https://caml.inria.fr/" makedepends=('ncurses>=5.6-7' autoconf) optdepends=('ncurses: advanced ncurses features' 'tk: advanced tk features') source=(https://caml.inria.fr/distrib/ocaml-${pkgver%.*}/${pkgname}-${pkgver}.tar.xz -ocaml-5.0.0-la64.patch) +ocaml-5.1.0-la64.patch) sha512sums=('23579b76592e225f2ddec58d78084dfd11befede18b61be71d3896fd72a90cc0fe4fb1f64a7dcbc83239ed69ec4254e13ab86fd810671851044c2a353da3adc5' - 'a95f2e02b318183d76b858b0a1d66ad5c23977d72f6d964b95a8851edf4170ed3971602e031842ef04615d2f6b36198f62aa4ff7e57c188af052d45f22192f65') + 'abb86947fa2c9f1180cb3255c969db67436ef46e04001f39384e37f8560ca257fd5878bb2cb350ee2b490a08fa234922bb14e2bd38962e38e12a71f0b97f5ffd') options=('!makeflags' '!emptydirs' 'staticlibs') prepare() { cd "${srcdir}/${pkgname}-${pkgver}" - patch -p1 -i $srcdir/ocaml-5.0.0-la64.patch + patch -p1 -i $srcdir/ocaml-5.1.0-la64.patch autoconf } @@ -27,7 +27,7 @@ build() { cd "${srcdir}/${pkgname}-${pkgver}" CFLAGS+=' -ffat-lto-objects' CXXFLAGS+=' -ffat-lto-objects' - ./configure --prefix /usr --mandir /usr/share/man --enable-frame-pointers + ./configure --prefix /usr --mandir /usr/share/man #--enable-frame-pointers make --debug=v world.opt } diff --git a/ocaml/ocaml-5.1.0-la64.patch b/ocaml/ocaml-5.1.0-la64.patch index 6f7678ccf2..180bd1dfdb 100644 --- a/ocaml/ocaml-5.1.0-la64.patch +++ b/ocaml/ocaml-5.1.0-la64.patch @@ -1,23 +1,56 @@ +diff --git a/.gitignore b/.gitignore +index 9e7022db80..d7416f9a4b 100644 +--- a/.gitignore ++++ b/.gitignore +@@ -71,6 +71,9 @@ META + /asmcomp/reload.ml + /asmcomp/scheduling.ml + /asmcomp/CSE.ml ++/asmcomp/loongarch64/CSE.ml ++/asmcomp/loongarch64/reload.ml ++/asmcomp/loongarch64/scheduling.ml + + /boot/ocamlrun + /boot/ocamlruns +@@ -331,3 +334,4 @@ META + /yacc/ocamlyacc + /yacc/version.h + /yacc/.gdb_history ++ diff --git a/Makefile b/Makefile -index bb2c245ea..db03683fb 100644 +index bc12f75dfb..9ec39ef6a7 100644 --- a/Makefile +++ b/Makefile -@@ -528,6 +528,14 @@ partialclean:: +@@ -31,7 +31,7 @@ include stdlib/StdlibModules + + CAMLC = $(BOOT_OCAMLC) $(BOOT_STDLIBFLAGS) -use-prims runtime/primitives + CAMLOPT=$(OCAMLRUN) ./ocamlopt$(EXE) $(STDLIBFLAGS) -I otherlibs/dynlink +-ARCHES=amd64 arm64 power s390x riscv ++ARCHES=amd64 arm64 loongarch64 power s390x riscv + VPATH = utils parsing typing bytecomp file_formats lambda middle_end \ + middle_end/closure middle_end/flambda middle_end/flambda/base_types \ + asmcomp driver toplevel tools +@@ -557,8 +557,18 @@ partialclean:: beforedepend:: lambda/runtimedef.ml +-# Choose the right machine-dependent files ++# If any of these loongarch files need to be modified, please copy the ++# corresponding file from asmcomp/riscv64 to asmcomp/loongarch64, delete the ++# corresponding rule below, update the clean target accordingly, and remove ++# the file from .gitignore. +asmcomp/loongarch64/CSE.ml: asmcomp/riscv/CSE.ml + cp $< $@ +asmcomp/loongarch64/reload.ml: asmcomp/riscv/reload.ml + cp $< $@ +asmcomp/loongarch64/scheduling.ml: asmcomp/riscv/scheduling.ml + cp $< $@ -+ -+ - # Choose the right machine-dependent files - asmcomp/arch.ml: asmcomp/$(ARCH)/arch.ml -@@ -1031,6 +1039,7 @@ clean:: ++# Choose the right machine-dependent files + asmcomp/arch.mli: asmcomp/$(ARCH)/arch.mli + cd asmcomp; $(LN) $(ARCH)/arch.mli . + +@@ -1061,6 +1071,7 @@ clean:: rm -f runtime/domain_state*.inc rm -rf $(DEPDIR) rm -f stdlib/libcamlrun.a stdlib/libcamlrun.lib @@ -27,14 +60,16 @@ index bb2c245ea..db03683fb 100644 runtimeopt: stdlib/libasmrun.$(A) diff --git a/asmcomp/loongarch64/NOTES.md b/asmcomp/loongarch64/NOTES.md new file mode 100644 -index 000000000..f9b63dd62 +index 0000000000..aacca61de0 --- /dev/null +++ b/asmcomp/loongarch64/NOTES.md -@@ -0,0 +1,11 @@ +@@ -0,0 +1,13 @@ +# Supported platforms + +LoongArch in 64-bit mode + ++Debian architecture name: `loongarch64` ++ +# Reference documents + +* Instruction set specification: @@ -44,7 +79,7 @@ index 000000000..f9b63dd62 + - https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html diff --git a/asmcomp/loongarch64/arch.ml b/asmcomp/loongarch64/arch.ml new file mode 100644 -index 000000000..fee052684 +index 0000000000..44bb39d2ea --- /dev/null +++ b/asmcomp/loongarch64/arch.ml @@ -0,0 +1,96 @@ @@ -63,7 +98,7 @@ index 000000000..fee052684 +(* *) +(**************************************************************************) + -+(* Specific operations for the Loongarch processor *) ++(* Specific operations for the LoongArch processor *) + +open Format + @@ -146,7 +181,7 @@ index 000000000..fee052684 +let operation_can_raise _ = false diff --git a/asmcomp/loongarch64/arch.mli b/asmcomp/loongarch64/arch.mli new file mode 100644 -index 000000000..57174fabe +index 0000000000..57174fabea --- /dev/null +++ b/asmcomp/loongarch64/arch.mli @@ -0,0 +1,76 @@ @@ -228,10 +263,10 @@ index 000000000..57174fabe +val operation_can_raise : specific_operation -> bool diff --git a/asmcomp/loongarch64/emit.mlp b/asmcomp/loongarch64/emit.mlp new file mode 100644 -index 000000000..b80b4f172 +index 0000000000..92a2ab2695 --- /dev/null +++ b/asmcomp/loongarch64/emit.mlp -@@ -0,0 +1,772 @@ +@@ -0,0 +1,775 @@ +(**************************************************************************) +(* *) +(* OCaml *) @@ -285,9 +320,6 @@ index 000000000..b80b4f172 + +(* Output a symbol *) + -+let emit_symbol s = -+ emit_symbol '$' s -+ +let emit_jump op s = + if !Clflags.dlcode || !Clflags.pic_code + then `{emit_string op} %plt({emit_symbol s})` @@ -518,6 +550,8 @@ index 000000000..b80b4f172 + ` move {emit_reg dst}, {emit_reg src}\n` + | {loc = Reg _; typ = Float}, {loc = Reg _; typ = Float} -> + ` fmov.d {emit_reg dst}, {emit_reg src}\n` ++ | {loc = Reg _; typ = Float}, {loc = Reg _; typ = (Val | Int | Addr)} -> ++ ` movfr2gr.d {emit_reg dst}, {emit_reg src}\n` + | {loc = Reg _; typ = (Val | Int | Addr)}, {loc = Stack s} -> + let (base, ofs) = slot_offset env s (register_class dst) in + emit_store src ofs base @@ -530,7 +564,6 @@ index 000000000..b80b4f172 + | {loc = Stack s; typ = Float}, {loc = Reg _} -> + let (base, ofs) = slot_offset env s (register_class src) in + emit_float_load dst ofs base -+ | {loc = Reg _; typ = Float}, {loc = Reg _; typ = (Val | Int | Addr)} + | {loc = Stack _}, {loc = Stack _} + | {loc = Unknown}, _ | _, {loc = Unknown} -> + Misc.fatal_error "Emit: Imove" @@ -645,7 +678,8 @@ index 000000000..b80b4f172 + let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in + emit_addimm reg_alloc_ptr reg_alloc_ptr n; + ` ld.d {emit_reg reg_tmp}, {emit_reg reg_domain_state_ptr}, {emit_int offset}\n`; -+ ` bltu {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}, {emit_label lbl_call_gc}\n`; ++ ` sltu {emit_reg reg_tmp}, {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}\n`; ++ ` bnez {emit_reg reg_tmp}, {emit_label lbl_call_gc}\n`; + `{emit_label lbl_after_alloc}:\n`; + ` addi.d {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, 8\n`; + env.call_gc_sites <- @@ -711,6 +745,9 @@ index 000000000..b80b4f172 + ` sltu {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n`; + ` xori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`; + end ++ | Lop(Icompf cmp) -> ++ let negated = emit_float_test cmp ~res:i.res.(0) ~arg:i.arg in ++ if negated then ` xori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, 1\n`; + | Lop(Iintop (Icheckbound)) -> + let lbl = bound_error_label env i.dbg in + ` bleu {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_label lbl}\n` @@ -875,24 +912,25 @@ index 000000000..b80b4f172 + preproc_stack_check + ~fun_body:fundecl.fun_body ~frame_size:(frame_size env) ~trap_size:16 + in -+ let handle_overflow = ref None in -+ if contains_nontail_calls || max_frame_size >= stack_threshold_size then begin -+ let overflow = new_label () and ret = new_label () in -+ let threshold_offset = Domainstate.stack_ctx_words * 8 + stack_threshold_size in -+ let f = max_frame_size + threshold_offset in -+ let offset = Domainstate.(idx_of_field Domain_current_stack) * 8 in -+ ` ld.d {emit_reg reg_tmp}, {emit_reg reg_domain_state_ptr}, {emit_int offset}\n`; -+ emit_addimm reg_tmp reg_tmp f; -+ ` bltu $sp, {emit_reg reg_tmp}, {emit_label overflow}\n`; -+ `{emit_label ret}:\n`; -+ handle_overflow := Some (overflow, ret) -+ end; ++ let handle_overflow = ++ if contains_nontail_calls || max_frame_size >= stack_threshold_size then begin ++ let overflow = new_label () and ret = new_label () in ++ let threshold_offset = Domainstate.stack_ctx_words * 8 + stack_threshold_size in ++ let f = max_frame_size + threshold_offset in ++ let offset = Domainstate.(idx_of_field Domain_current_stack) * 8 in ++ ` ld.d {emit_reg reg_tmp}, {emit_reg reg_domain_state_ptr}, {emit_int offset}\n`; ++ emit_addimm reg_tmp reg_tmp f; ++ ` bltu $sp, {emit_reg reg_tmp}, {emit_label overflow}\n`; ++ `{emit_label ret}:\n`; ++ Some (overflow, ret) ++ end else None ++ in + + emit_all env fundecl.fun_body; + List.iter emit_call_gc env.call_gc_sites; + List.iter emit_call_bound_error env.bound_error_sites; + -+ begin match !handle_overflow with ++ begin match handle_overflow with + | None -> () + | Some (overflow, ret) -> + `{emit_label overflow}:\n`; @@ -1006,10 +1044,10 @@ index 000000000..b80b4f172 + } diff --git a/asmcomp/loongarch64/proc.ml b/asmcomp/loongarch64/proc.ml new file mode 100644 -index 000000000..62666c748 +index 0000000000..0380761184 --- /dev/null +++ b/asmcomp/loongarch64/proc.ml -@@ -0,0 +1,319 @@ +@@ -0,0 +1,318 @@ +# 2 "asmcomp/loongarch64/proc.ml" +(**************************************************************************) +(* *) @@ -1051,8 +1089,8 @@ index 000000000..62666c748 + s0 18 general purpose (preserved by C) + t0, t1 19-20 temporaries (used by call veneers) + s1 21 trap pointer (preserved by C) -+ s7 22 allocation pointer (preserved by C) -+ s8 23 domain pointer (preserved by C) ++ s7 22 allocation pointer (preserved by C) ++ s8 23 domain pointer (preserved by C) + + Floating-point register map + --------------------------- @@ -1222,6 +1260,9 @@ index 000000000..62666c748 + if !float <= last_float then begin + loc.(i) <- [| phys_reg !float |]; + incr float ++ end else if !int <= last_int then begin ++ loc.(i) <- [| phys_reg !int |]; ++ incr int + end else begin + loc.(i) <- [| stack_slot (make_stack !ofs) Float |]; + ofs := !ofs + size_float @@ -1241,10 +1282,6 @@ index 000000000..62666c748 + +let loc_exn_bucket = phys_reg 0 + -+(* Volatile registers: none *) -+ -+let regs_are_volatile _ = false -+ +(* Registers destroyed by operations *) + +let destroyed_at_c_noalloc_call = @@ -1331,7 +1368,7 @@ index 000000000..62666c748 +let init () = () diff --git a/asmcomp/loongarch64/selection.ml b/asmcomp/loongarch64/selection.ml new file mode 100644 -index 000000000..be29364c1 +index 0000000000..be29364c16 --- /dev/null +++ b/asmcomp/loongarch64/selection.ml @@ -0,0 +1,70 @@ @@ -1406,13 +1443,13 @@ index 000000000..be29364c1 +let fundecl ~future_funcnames f = + (new selector)#emit_fundecl ~future_funcnames f diff --git a/configure b/configure -index 19764d19a..6415b4cc1 100755 +index 4c3b5fda20..f748759c9a 100755 Binary files a/configure and b/configure differ diff --git a/configure.ac b/configure.ac -index a7974b042..069a931d7 100644 +index aba3569f7c..335f1fbe00 100644 --- a/configure.ac +++ b/configure.ac -@@ -1079,7 +1079,8 @@ AS_IF([test x"$supports_shared_libraries" = 'xtrue'], +@@ -1163,7 +1163,8 @@ AS_IF([test x"$supports_shared_libraries" = 'xtrue'], [aarch64-*-freebsd*], [natdynlink=true], [aarch64-*-openbsd*], [natdynlink=true], [aarch64-*-netbsd*], [natdynlink=true], @@ -1422,18 +1459,18 @@ index a7974b042..069a931d7 100644 AS_CASE([$enable_native_toplevel,$natdynlink], [yes,false], -@@ -1199,7 +1200,9 @@ AS_CASE([$host], +@@ -1285,7 +1286,9 @@ AS_CASE([$host], [x86_64-*-cygwin*], - [arch=amd64; system=cygwin], + [has_native_backend=yes; arch=amd64; system=cygwin], [riscv64-*-linux*], -- [arch=riscv; model=riscv64; system=linux] -+ [arch=riscv; model=riscv64; system=linux], +- [has_native_backend=yes; arch=riscv; model=riscv64; system=linux] ++ [has_native_backend=yes; arch=riscv; model=riscv64; system=linux], + [loongarch64-*-linux*], + [has_native_backend=yes; arch=loongarch64; system=linux] ) - AS_CASE([$ccomptype], -@@ -1302,7 +1305,7 @@ default_aspp="$CC -c" + native_cflags='' +@@ -1394,7 +1397,7 @@ default_aspp="$CC -c" AS_CASE([$as_target,$ocaml_cv_cc_vendor], [*-*-linux*,gcc-*], [AS_CASE([$as_cpu], @@ -1442,7 +1479,7 @@ index a7974b042..069a931d7 100644 [default_as="${toolpref}as"])], [i686-pc-windows,*], [default_as="ml -nologo -coff -Cp -c -Fo" -@@ -1940,7 +1943,7 @@ AS_IF([$native_compiler], +@@ -2073,7 +2076,7 @@ AS_IF([$native_compiler], AS_IF([test x"$enable_frame_pointers" = "xyes"], [AS_CASE(["$host,$cc_basename"], @@ -1452,11 +1489,11 @@ index a7974b042..069a931d7 100644 frame_pointers=true AC_DEFINE([WITH_FRAME_POINTERS]) diff --git a/runtime/caml/stack.h b/runtime/caml/stack.h -index 0c2e0b2fe..ebdc1d55a 100644 +index d595abd0da..59d72a0aca 100644 --- a/runtime/caml/stack.h +++ b/runtime/caml/stack.h -@@ -70,6 +70,17 @@ - #define Saved_return_address(sp) *((intnat *)((sp) - 8)) +@@ -75,6 +75,17 @@ + #define Pop_frame_pointer(sp) sp += sizeof(value) #endif +#ifdef TARGET_loongarch64 @@ -1475,7 +1512,7 @@ index 0c2e0b2fe..ebdc1d55a 100644 extern intnat caml_globals_inited; diff --git a/runtime/loongarch64.S b/runtime/loongarch64.S new file mode 100644 -index 000000000..d2289f821 +index 0000000000..e51eadb940 --- /dev/null +++ b/runtime/loongarch64.S @@ -0,0 +1,827 @@ @@ -2294,21 +2331,21 @@ index 000000000..d2289f821 + +/* GC roots for callback */ + -+ -+ .section .data ++OBJECT(caml_system.frametable) ++ .quad 2 /* two descriptors */ ++ .quad L(caml_retaddr) /* return address into callback */ ++ .short -1 /* negative frame size => use callback link */ ++ .short 0 /* no roots */ + .align 3 -+ .globl caml_system__frametable -+ .type caml_system__frametable, @object -+caml_system__frametable: -+ .quad 1 /* one descriptor */ -+ .quad .Lcaml_retaddr /* return address into callback */ -+ .short -1 /* negative frame size => use callback link */ -+ .short 0 /* no roots */ ++ .quad L(frame_runstack) /* return address into fiber handler */ ++ .short -1 /* negative frame size => use callback link */ ++ .short 0 /* no roots */ + .align 3 -+ .size caml_system__frametable, .-caml_system__frametable ++END_OBJECT(caml_system.frametable) ++.end diff --git a/testsuite/tools/asmgen_loongarch64.S b/testsuite/tools/asmgen_loongarch64.S new file mode 100644 -index 000000000..97fbeae04 +index 0000000000..97fbeae046 --- /dev/null +++ b/testsuite/tools/asmgen_loongarch64.S @@ -0,0 +1,75 @@ From a8fbe115f1ee44849b715215f977911a5eeaf8dd Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Wed, 3 Apr 2024 18:01:48 +0800 Subject: [PATCH 16/23] trojan --- trojan/PKGBUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trojan/PKGBUILD b/trojan/PKGBUILD index 7a73ad018a..6fcef08e1e 100644 --- a/trojan/PKGBUILD +++ b/trojan/PKGBUILD @@ -3,7 +3,7 @@ pkgname=trojan pkgver=1.16.0 -pkgrel=10 +pkgrel=11 pkgdesc="An unidentifiable mechanism that helps you bypass GFW" arch=('loong64' 'x86_64') url="https://github.com/trojan-gfw/trojan" From ad5e346aa753ca9f34ed4bbc401b54be8743b876 Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Wed, 24 Apr 2024 10:56:47 +0800 Subject: [PATCH 17/23] update --- apr-util/PKGBUILD | 2 +- atril/PKGBUILD | 2 +- enchant/PKGBUILD | 2 +- eog/PKGBUILD | 2 +- evince/PKGBUILD | 2 +- fcitx5-chinese-addons/PKGBUILD | 2 +- fcitx5-configtool/PKGBUILD | 2 +- firefox/PKGBUILD | 2 +- frei0r-plugins/PKGBUILD | 2 +- gdb/PKGBUILD | 2 +- geary/PKGBUILD | 2 +- gedit/PKGBUILD | 2 +- gnome-software/PKGBUILD | 2 +- graphicsmagick/PKGBUILD | 2 +- graphviz/PKGBUILD | 2 +- imagemagick/PKGBUILD | 2 +- imath/PKGBUILD | 2 +- imlib2/PKGBUILD | 2 +- libcmis/PKGBUILD | 2 +- libheif/PKGBUILD | 2 +- libime/PKGBUILD | 2 +- libixion/PKGBUILD | 2 +- liborcus/PKGBUILD | 2 +- libpeas-2/PKGBUILD | 2 +- libpeas/PKGBUILD | 2 +- libreoffice-fresh/PKGBUILD | 5 ++++- lm_sensors/PKGBUILD | 2 +- mate-applets/PKGBUILD | 4 ++-- mutter/PKGBUILD | 2 +- notepadqq/PKGBUILD | 2 +- obs-studio/PKGBUILD | 2 +- qt5-base/PKGBUILD | 2 +- qt6-base/PKGBUILD | 2 +- remmina/PKGBUILD | 2 +- samba/PKGBUILD | 2 +- sdl2_image/PKGBUILD | 2 +- source-highlight/PKGBUILD | 2 +- subversion/PKGBUILD | 2 +- thunderbird/PKGBUILD | 2 +- tracker3-miners/PKGBUILD | 2 +- vlc/PKGBUILD | 2 +- weston/PKGBUILD | 2 +- workrave/PKGBUILD | 2 +- 43 files changed, 47 insertions(+), 44 deletions(-) diff --git a/apr-util/PKGBUILD b/apr-util/PKGBUILD index 4f01001b24..ee6e36aa12 100644 --- a/apr-util/PKGBUILD +++ b/apr-util/PKGBUILD @@ -3,7 +3,7 @@ pkgname=apr-util pkgver=1.6.3 -pkgrel=2 +pkgrel=3 pkgdesc="The Apache Portable Runtime" arch=('loong64' 'x86_64') url="https://apr.apache.org/" diff --git a/atril/PKGBUILD b/atril/PKGBUILD index 0443df48a2..1ed46496ef 100644 --- a/atril/PKGBUILD +++ b/atril/PKGBUILD @@ -4,7 +4,7 @@ pkgname=atril pkgver=1.26.1 -pkgrel=1 +pkgrel=2 pkgdesc="MATE document viewer" url="https://mate-desktop.org" arch=('loong64' 'x86_64') diff --git a/enchant/PKGBUILD b/enchant/PKGBUILD index 691f2e6da1..20b6fb1b97 100644 --- a/enchant/PKGBUILD +++ b/enchant/PKGBUILD @@ -4,7 +4,7 @@ pkgname=enchant pkgver=2.6.5 -pkgrel=1 +pkgrel=2 pkgdesc="A wrapper library for generic spell checking" arch=('loong64' 'x86_64') url="https://abiword.github.io/enchant/" diff --git a/eog/PKGBUILD b/eog/PKGBUILD index 85990855d9..8a391bec03 100644 --- a/eog/PKGBUILD +++ b/eog/PKGBUILD @@ -8,7 +8,7 @@ pkgname=( eog-docs ) pkgver=45.2 -pkgrel=1 +pkgrel=2 pkgdesc="Eye of Gnome: An image viewing and cataloging program" url="https://wiki.gnome.org/Apps/EyeOfGnome" arch=(loong64 x86_64) diff --git a/evince/PKGBUILD b/evince/PKGBUILD index 2638f181b7..b26f161ad5 100644 --- a/evince/PKGBUILD +++ b/evince/PKGBUILD @@ -8,7 +8,7 @@ pkgname=( evince-lib-docs ) pkgver=45.0 -pkgrel=1 +pkgrel=2 pkgdesc="Document viewer (PDF, PostScript, XPS, djvu, dvi, tiff, cbr, cbz, cb7, cbt)" url="https://wiki.gnome.org/Apps/Evince" arch=(loong64 x86_64) diff --git a/fcitx5-chinese-addons/PKGBUILD b/fcitx5-chinese-addons/PKGBUILD index 01eb6db0d7..82a5bf946c 100644 --- a/fcitx5-chinese-addons/PKGBUILD +++ b/fcitx5-chinese-addons/PKGBUILD @@ -3,7 +3,7 @@ pkgname=fcitx5-chinese-addons pkgver=5.1.3 -pkgrel=1 +pkgrel=2 pkgdesc="Addons related to Chinese, including IME previous bundled inside fcitx4" arch=('loong64' 'x86_64') url="https://github.com/fcitx/fcitx5-chinese-addons" diff --git a/fcitx5-configtool/PKGBUILD b/fcitx5-configtool/PKGBUILD index a45cce2416..dcd20e82c1 100644 --- a/fcitx5-configtool/PKGBUILD +++ b/fcitx5-configtool/PKGBUILD @@ -3,7 +3,7 @@ pkgname=fcitx5-configtool pkgver=5.1.3 -pkgrel=1 +pkgrel=2 pkgdesc="Configuration Tool for Fcitx5" arch=('loong64' 'x86_64') url="https://github.com/fcitx/fcitx5-configtool" diff --git a/firefox/PKGBUILD b/firefox/PKGBUILD index 26f4516441..02224113f2 100644 --- a/firefox/PKGBUILD +++ b/firefox/PKGBUILD @@ -4,7 +4,7 @@ pkgname=firefox pkgver=122.0 -pkgrel=1 +pkgrel=2 pkgdesc="Standalone web browser from mozilla.org" url="https://www.mozilla.org/firefox/" arch=(loong64 x86_64) diff --git a/frei0r-plugins/PKGBUILD b/frei0r-plugins/PKGBUILD index d2911cab2c..d498d1f134 100644 --- a/frei0r-plugins/PKGBUILD +++ b/frei0r-plugins/PKGBUILD @@ -3,7 +3,7 @@ pkgname=frei0r-plugins pkgver=2.3.2 -pkgrel=2 +pkgrel=3 pkgdesc='Collection of video effect plugins' arch=('loong64' 'x86_64') url='https://frei0r.dyne.org/' diff --git a/gdb/PKGBUILD b/gdb/PKGBUILD index b8b36af61d..efa638e64f 100644 --- a/gdb/PKGBUILD +++ b/gdb/PKGBUILD @@ -8,7 +8,7 @@ pkgbase=gdb # of gdb (for arm/avr/...) pkgname=(gdb gdb-common) pkgver=14.1 -pkgrel=1 +pkgrel=2 pkgdesc='The GNU Debugger' arch=(loong64 x86_64) url='https://www.gnu.org/software/gdb/' diff --git a/geary/PKGBUILD b/geary/PKGBUILD index cffd5c1427..d439433ea2 100644 --- a/geary/PKGBUILD +++ b/geary/PKGBUILD @@ -5,7 +5,7 @@ pkgname=geary pkgver=44.1 -pkgrel=2 +pkgrel=3 epoch=1 pkgdesc='A lightweight email client for the GNOME desktop' arch=(loong64 x86_64) diff --git a/gedit/PKGBUILD b/gedit/PKGBUILD index 105eb1d054..1810e1d02f 100644 --- a/gedit/PKGBUILD +++ b/gedit/PKGBUILD @@ -4,7 +4,7 @@ pkgname=gedit pkgver=46.1 -pkgrel=1 +pkgrel=2 pkgdesc="GNOME Text Editor" url="https://wiki.gnome.org/Apps/Gedit" arch=(loong64 x86_64) diff --git a/gnome-software/PKGBUILD b/gnome-software/PKGBUILD index 8a0e45bdff..12c4687868 100644 --- a/gnome-software/PKGBUILD +++ b/gnome-software/PKGBUILD @@ -6,7 +6,7 @@ pkgname=gnome-software pkgver=45.3 -pkgrel=1 +pkgrel=2 pkgdesc="GNOME Software Tools" url="https://wiki.gnome.org/Apps/Software/" arch=(loong64 x86_64) diff --git a/graphicsmagick/PKGBUILD b/graphicsmagick/PKGBUILD index 63596ab251..14cada5322 100644 --- a/graphicsmagick/PKGBUILD +++ b/graphicsmagick/PKGBUILD @@ -6,7 +6,7 @@ pkgname=graphicsmagick pkgver=1.3.42 -pkgrel=2 +pkgrel=3 pkgdesc='Image processing system' url='http://www.graphicsmagick.org/' arch=(loong64 x86_64) diff --git a/graphviz/PKGBUILD b/graphviz/PKGBUILD index eba587dbe4..cb984b3716 100644 --- a/graphviz/PKGBUILD +++ b/graphviz/PKGBUILD @@ -5,7 +5,7 @@ pkgname=graphviz pkgver=9.0.0 -pkgrel=1 +pkgrel=2 pkgdesc='Graph visualization software' url='https://www.graphviz.org/' license=('EPL') diff --git a/imagemagick/PKGBUILD b/imagemagick/PKGBUILD index b03e93833a..f3ede7cde8 100644 --- a/imagemagick/PKGBUILD +++ b/imagemagick/PKGBUILD @@ -3,7 +3,7 @@ pkgname=imagemagick pkgver=7.1.1.27 -pkgrel=1 +pkgrel=2 _relname=ImageMagick-${pkgver%%.*} _tarname=ImageMagick-${pkgver%.*}-${pkgver##*.} pkgdesc='An image viewing/manipulation program' diff --git a/imath/PKGBUILD b/imath/PKGBUILD index bda655e067..6c558e605e 100644 --- a/imath/PKGBUILD +++ b/imath/PKGBUILD @@ -2,7 +2,7 @@ pkgname=imath pkgver=3.1.10 -pkgrel=1 +pkgrel=2 pkgdesc='A C++ and python library of 2D and 3D vector, matrix, and math operations for computer graphics' url='https://www.openexr.com/' arch=(loong64 x86_64) diff --git a/imlib2/PKGBUILD b/imlib2/PKGBUILD index 2f52a13b07..adf7e56e7e 100644 --- a/imlib2/PKGBUILD +++ b/imlib2/PKGBUILD @@ -4,7 +4,7 @@ pkgname=imlib2 pkgver=1.12.1 -pkgrel=2 +pkgrel=3 pkgdesc='Library that does image file loading and saving as well as rendering, manipulation, arbitrary polygon support' url='https://sourceforge.net/projects/enlightenment/' arch=('loong64' 'x86_64') diff --git a/libcmis/PKGBUILD b/libcmis/PKGBUILD index 6ad4955547..37586ee8a9 100644 --- a/libcmis/PKGBUILD +++ b/libcmis/PKGBUILD @@ -2,7 +2,7 @@ pkgname=libcmis pkgver=0.6.2 -pkgrel=1 +pkgrel=2 pkgdesc="a C/C++ client library for the CMIS protocol" arch=('loong64' 'x86_64') url="https://github.com/tdf/libcmis" diff --git a/libheif/PKGBUILD b/libheif/PKGBUILD index e334ed160d..1a2050c90f 100644 --- a/libheif/PKGBUILD +++ b/libheif/PKGBUILD @@ -3,7 +3,7 @@ pkgname=libheif pkgver=1.17.6 -pkgrel=3 +pkgrel=4 pkgdesc='An HEIF and AVIF file format decoder and encoder' arch=(loong64 x86_64) url='https://github.com/strukturag/libheif' diff --git a/libime/PKGBUILD b/libime/PKGBUILD index 055609d636..3679e35913 100644 --- a/libime/PKGBUILD +++ b/libime/PKGBUILD @@ -3,7 +3,7 @@ pkgname=libime pkgver=1.1.5 -pkgrel=1 +pkgrel=2 pkgdesc="A library to support generic input method implementation" arch=('loong64' 'x86_64') url="https://github.com/fcitx/libime" diff --git a/libixion/PKGBUILD b/libixion/PKGBUILD index 41e8a9cd74..848f399053 100644 --- a/libixion/PKGBUILD +++ b/libixion/PKGBUILD @@ -3,7 +3,7 @@ pkgname=libixion pkgver=0.19.0 -pkgrel=1 +pkgrel=2 pkgdesc="A general purpose formula parser & interpreter" arch=('loong64' 'x86_64') url="https://gitlab.com/ixion/ixion/blob/master/README.md" diff --git a/liborcus/PKGBUILD b/liborcus/PKGBUILD index ee32a3cc72..57ecc1604c 100644 --- a/liborcus/PKGBUILD +++ b/liborcus/PKGBUILD @@ -3,7 +3,7 @@ pkgname=liborcus pkgver=0.19.2 -pkgrel=1 +pkgrel=2 pkgdesc="File import filter library for spreadsheet documents." arch=('loong64' 'x86_64') url="https://gitlab.com/orcus/orcus/blob/master/README.md" diff --git a/libpeas-2/PKGBUILD b/libpeas-2/PKGBUILD index a4a05b771d..b9a11293b4 100644 --- a/libpeas-2/PKGBUILD +++ b/libpeas-2/PKGBUILD @@ -7,7 +7,7 @@ pkgname=( libpeas-2-docs ) pkgver=2.0.1 -pkgrel=1 +pkgrel=2 pkgdesc="GObject Plugin System" url="https://wiki.gnome.org/Projects/Libpeas" arch=(loong64 x86_64) diff --git a/libpeas/PKGBUILD b/libpeas/PKGBUILD index 90017bfba8..84bbddd9f4 100644 --- a/libpeas/PKGBUILD +++ b/libpeas/PKGBUILD @@ -8,7 +8,7 @@ pkgname=( libpeas-docs ) pkgver=1.36.0 -pkgrel=4 +pkgrel=5 pkgdesc="GObject Plugin System" url="https://wiki.gnome.org/Projects/Libpeas" arch=(loong64 x86_64) diff --git a/libreoffice-fresh/PKGBUILD b/libreoffice-fresh/PKGBUILD index ea641d0b5f..a4ed0ec1e5 100644 --- a/libreoffice-fresh/PKGBUILD +++ b/libreoffice-fresh/PKGBUILD @@ -12,7 +12,7 @@ pkgbase=libreoffice-fresh pkgname=('libreoffice-fresh-sdk' 'libreoffice-fresh') _LOver=7.6.4.1 pkgver=7.6.4 -pkgrel=2 +pkgrel=3 arch=('loong64' 'x86_64') license=('LGPL3') url="https://www.libreoffice.org/" @@ -58,6 +58,7 @@ source=(${_mirror}/libreoffice{,-help,-translations}-${_LOver}.tar.xz{,.asc} ${_additional_source_url2}/odfvalidator-1.2.0-incubating-SNAPSHOT-jar-with-dependencies-971c54fd38a968f5860014b44301872706f9e540.jar # for test suite ${_additional_source_url2}/f543e6e2d7275557a839a164941c0a86e5f2c3f2a0042bfc434c88c6dde9e140-opens___.ttf ${_additional_source_url2}/185d60944ea767075d27247c3162b3bc-unowinreg.dll + https://gitweb.gentoo.org/repo/gentoo.git/plain/app-office/libreoffice/files/libreoffice-7.6.5.2-gcc14.patch make-pyuno-work-with-system-wide-module-install.diff 623ea5c.diff fix-build-against-system-libxml-2.12.diff @@ -110,6 +111,7 @@ sha256sums=('13fea7b8f24c776313b9e08628aa590390bea45064be73bc70ee7b1b70aa6a1e' '984f2a479df79e27e7b01a5815ac53ae64e07746b882262d8a64566494515504' 'f543e6e2d7275557a839a164941c0a86e5f2c3f2a0042bfc434c88c6dde9e140' 'eafde646a7dbe46d20c291685b0beac2382174d78d66ee990e229a1bf6e6cec6' + '17cc24b1d4d47562ea28bd89b302d40c1e112494a1a6b52c64e3a5292c18453f' 'c463654a73ecfbc242ff109726fb4faecdbfb3d91affafe919b24bea65afb563' '440c9af5f3d1213d8ed7177282380f25cbc981cabc8b590dcb777aaae84178e5' '793a52abff29b3db51a1db9686b561911b9b3de70bd6dd02bbc1d78fcd960648' @@ -145,6 +147,7 @@ prepare() { # fix build with icu 74 patch -Np1 -i "${srcdir}"/libreoffice-7.5.8.2-icu-74-compatibility.patch + patch -Np1 -i ${srcdir}/libreoffice-7.6.5.2-gcc14.patch #use the CFLAGS but remove the LibO overridden ones for i in $CFLAGS; do diff --git a/lm_sensors/PKGBUILD b/lm_sensors/PKGBUILD index 28f4e15ee8..1f1203f520 100644 --- a/lm_sensors/PKGBUILD +++ b/lm_sensors/PKGBUILD @@ -5,7 +5,7 @@ pkgname=lm_sensors pkgver=3.6.0.r41.g31d1f125 _commit=31d1f125d8076f1c8c8f3224b31d240e6e6a1763 #_pkgver=${pkgver//./-} -pkgrel=5 +pkgrel=6 epoch=1 pkgdesc="Collection of user space tools for general SMBus access and hardware monitoring" arch=('loong64' 'x86_64') diff --git a/mate-applets/PKGBUILD b/mate-applets/PKGBUILD index b7f9c4386a..0d21b6e187 100644 --- a/mate-applets/PKGBUILD +++ b/mate-applets/PKGBUILD @@ -4,13 +4,13 @@ pkgname=mate-applets pkgver=1.26.1 -pkgrel=3 +pkgrel=4 pkgdesc="Applets for MATE panel" arch=('loong64' 'x86_64') url="https://mate-desktop.org" license=('GPL') depends=('gtksourceview3' 'libgtop' 'libnotify' 'mate-panel' 'polkit' 'upower' 'wireless_tools') -makedepends=('intltool' 'itstool' 'gucharmap' 'yelp-tools') +makedepends=('intltool' 'itstool' 'gucharmap' 'yelp-tools' 'libnl') optdepends=('fortune-mod: for displaying fortune cookies in the Wanda the Fish applet' 'gucharmap: character picker applet') groups=('mate-extra') conflicts=('mate-applets-gtk3' 'mate-netspeed' 'mate-netspeed-gtk3') diff --git a/mutter/PKGBUILD b/mutter/PKGBUILD index 6c534c147d..f0f46c8734 100644 --- a/mutter/PKGBUILD +++ b/mutter/PKGBUILD @@ -8,7 +8,7 @@ pkgname=( mutter-docs ) pkgver=45.3 -pkgrel=1 +pkgrel=2 pkgdesc="Window manager and compositor for GNOME" url="https://gitlab.gnome.org/GNOME/mutter" arch=(loong64 x86_64) diff --git a/notepadqq/PKGBUILD b/notepadqq/PKGBUILD index 575827b024..81552cf671 100644 --- a/notepadqq/PKGBUILD +++ b/notepadqq/PKGBUILD @@ -3,7 +3,7 @@ pkgname=notepadqq pkgver=2.0.0beta -pkgrel=1 +pkgrel=2 pkgdesc='Notepad++-like text editor for Linux' arch=('loong64' 'x86_64') url='https://notepadqq.com/' diff --git a/obs-studio/PKGBUILD b/obs-studio/PKGBUILD index 8f1d16ed9a..4846ce2aec 100644 --- a/obs-studio/PKGBUILD +++ b/obs-studio/PKGBUILD @@ -3,7 +3,7 @@ pkgname=obs-studio pkgver=30.0.2 -pkgrel=3 +pkgrel=4 pkgdesc="Free, open source software for live streaming and recording" arch=('loong64' 'x86_64') url="https://obsproject.com" diff --git a/qt5-base/PKGBUILD b/qt5-base/PKGBUILD index ac235d4b46..327c50ead9 100644 --- a/qt5-base/PKGBUILD +++ b/qt5-base/PKGBUILD @@ -5,7 +5,7 @@ pkgbase=qt5-base pkgname=(qt5-base qt5-xcb-private-headers) _basever=5.15.12 pkgver=5.15.12+kde+r149 -pkgrel=1 +pkgrel=2 _commit=2eb258f8548ed2218d5b2041c15b7359e99f475f arch=('loong64' 'x86_64') url='https://www.qt.io' diff --git a/qt6-base/PKGBUILD b/qt6-base/PKGBUILD index 27e7b3f346..52c4f85229 100644 --- a/qt6-base/PKGBUILD +++ b/qt6-base/PKGBUILD @@ -5,7 +5,7 @@ pkgname=qt6-base _qtver=6.6.1 pkgver=${_qtver/-/} -pkgrel=3 +pkgrel=4 arch=(loong64 x86_64) url='https://www.qt.io' license=(GPL3 LGPL3 FDL custom) diff --git a/remmina/PKGBUILD b/remmina/PKGBUILD index c5852b4d9c..279f91870c 100644 --- a/remmina/PKGBUILD +++ b/remmina/PKGBUILD @@ -4,7 +4,7 @@ pkgname=remmina epoch=1 pkgver=1.4.33 -pkgrel=3 +pkgrel=4 pkgdesc="remote desktop client written in GTK+" url="https://www.remmina.org/" arch=('loong64' 'x86_64') diff --git a/samba/PKGBUILD b/samba/PKGBUILD index 1eaf4e6938..4250a2013b 100644 --- a/samba/PKGBUILD +++ b/samba/PKGBUILD @@ -10,7 +10,7 @@ pkgbase=samba pkgname=('libwbclient' 'smbclient' 'samba') pkgver=4.19.4 -pkgrel=1 +pkgrel=2 arch=(loong64 x86_64) url="https://www.samba.org" license=('GPL-3.0-or-later') diff --git a/sdl2_image/PKGBUILD b/sdl2_image/PKGBUILD index 48920ecbc4..8194d845d1 100644 --- a/sdl2_image/PKGBUILD +++ b/sdl2_image/PKGBUILD @@ -1,7 +1,7 @@ # Maintainer: Sven-Hendrik Haase pkgname=sdl2_image pkgver=2.8.2 -pkgrel=2 +pkgrel=3 pkgdesc="A simple library to load images of various formats as SDL surfaces (Version 2)" arch=('loong64' 'x86_64') url="https://github.com/libsdl-org/SDL_image" diff --git a/source-highlight/PKGBUILD b/source-highlight/PKGBUILD index 85d564105f..5266ddf214 100644 --- a/source-highlight/PKGBUILD +++ b/source-highlight/PKGBUILD @@ -3,7 +3,7 @@ pkgname=source-highlight pkgver=3.1.9 -pkgrel=11 +pkgrel=12 pkgdesc="Convert source code to syntax highlighted document" arch=('loong64' 'x86_64') url="https://www.gnu.org/software/src-highlite/" diff --git a/subversion/PKGBUILD b/subversion/PKGBUILD index 5521e792b3..13d31ce45d 100644 --- a/subversion/PKGBUILD +++ b/subversion/PKGBUILD @@ -6,7 +6,7 @@ pkgname=subversion pkgver=1.14.2 -pkgrel=13 +pkgrel=14 pkgdesc="A Modern Concurrent Version Control System" arch=('loong64' 'x86_64') url="https://subversion.apache.org/" diff --git a/thunderbird/PKGBUILD b/thunderbird/PKGBUILD index 8e6ad4a43a..858622f412 100644 --- a/thunderbird/PKGBUILD +++ b/thunderbird/PKGBUILD @@ -8,7 +8,7 @@ pkgbase=thunderbird pkgname=(thunderbird) pkgver=115.7.0 -pkgrel=2 +pkgrel=3 pkgdesc='Standalone mail and news reader from mozilla.org' url='https://www.thunderbird.net/' arch=(loong64 x86_64) diff --git a/tracker3-miners/PKGBUILD b/tracker3-miners/PKGBUILD index d76ee3f594..9a0a7aca19 100644 --- a/tracker3-miners/PKGBUILD +++ b/tracker3-miners/PKGBUILD @@ -2,7 +2,7 @@ pkgname=tracker3-miners pkgver=3.6.2 -pkgrel=2 +pkgrel=3 pkgdesc="Filesystem indexer and metadata extractor" url="https://tracker.gnome.org/" arch=(loong64 x86_64) diff --git a/vlc/PKGBUILD b/vlc/PKGBUILD index e0a6b7e000..048ea0dd83 100644 --- a/vlc/PKGBUILD +++ b/vlc/PKGBUILD @@ -8,7 +8,7 @@ _vlcver=3.0.20 # optional fixup version including hyphen _vlcfixupver= pkgver=${_vlcver}${_vlcfixupver//-/.r} -pkgrel=7 +pkgrel=8 pkgdesc='Multi-platform MPEG, VCD/DVD, and DivX player' url='https://www.videolan.org/vlc/' arch=('loong64' 'x86_64') diff --git a/weston/PKGBUILD b/weston/PKGBUILD index 942a683086..9b3b899533 100644 --- a/weston/PKGBUILD +++ b/weston/PKGBUILD @@ -3,7 +3,7 @@ pkgname=weston pkgver=13.0.0 -pkgrel=1 +pkgrel=2 pkgdesc='Reference implementation of a Wayland compositor' arch=('loong64' 'x86_64') url='https://wayland.freedesktop.org/' diff --git a/workrave/PKGBUILD b/workrave/PKGBUILD index 7f555b9aef..a5c5ff1e43 100644 --- a/workrave/PKGBUILD +++ b/workrave/PKGBUILD @@ -4,7 +4,7 @@ pkgname=workrave pkgver=1.10.52 -pkgrel=3 +pkgrel=4 pkgdesc="Assist in the recovery and prevention of Repetitive Strain Injury (RSI)" arch=('loong64' 'x86_64') url="https://workrave.org/" From 3385cca4df3da68db3ed8d9fb444f913d5df8ba1 Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Wed, 24 Apr 2024 16:44:29 +0800 Subject: [PATCH 18/23] gnu-efi --- gnu-efi/PKGBUILD | 16 +++--- gnu-efi/gnu-efi-3.0.17-la64.patch | 89 ------------------------------- 2 files changed, 6 insertions(+), 99 deletions(-) delete mode 100644 gnu-efi/gnu-efi-3.0.17-la64.patch diff --git a/gnu-efi/PKGBUILD b/gnu-efi/PKGBUILD index c894b4c940..f10eb49d91 100644 --- a/gnu-efi/PKGBUILD +++ b/gnu-efi/PKGBUILD @@ -1,27 +1,23 @@ # Maintainer: David Runge pkgname=gnu-efi -pkgver=3.0.17 -pkgrel=3 +pkgver=3.0.18 +pkgrel=1 pkgdesc="Develop EFI applications using the GNU toolchain and the EFI development environment" arch=(loong64 x86_64) url="https://sourceforge.net/projects/gnu-efi/" -license=(BSD) +license=(BSD-2-Clause) conflicts=(gnu-efi-libs) provides=(gnu-efi-libs) replaces=(gnu-efi-libs) -source=(https://download.sourceforge.net/$pkgname/$pkgname-$pkgver.tar.bz2 - "gnu-efi-3.0.17-la64.patch") +source=(https://download.sourceforge.net/$pkgname/$pkgname-$pkgver.tar.bz2) options=(!lto !strip) -sha512sums=('0893ca234272584f889b1ae1c75341a9ceee60acfd32765daa5d704191ba00450536a287b949304c6d055d1bf125cc29e24fc41df8e5230e0da4f9d944876512' - 'cbeb446d4e3f3b7169b798c8014aedc30e5bc3d576856ebd69af7d9ee5277f99e16709687b1140c2eac3a2edddce49aa4a5d4d91a0e1ce408c3b7fe134a57ca7') -b2sums=('27f8171b411a6a8a138d44d91c7e4e4291aa399562825d51a398913572119482ffeb303d7508ae13eacd2cd10b8f5098405ab16eb56243587efe93235f661285' - '429d8a968edc6deb5e73a4faabb523dfb490a173d7c48a9270e1ccd5758d0262e9eb39bb47056375f07bd3f8ff7a4d83a3977565538d5e039652e9672220e9b1') +sha512sums=('39f9fa14b880441a94a04400ff8850efdd9474929e5501dfd05af06e7747b4d0f7cb742ac811c7026cf52d00508efb73018be4d61d63a1211de0cd931cbc473d') +b2sums=('e080fa4c57a281452a6473304871304d1b5c30d42ee728b4c0c084258ed2f6f2099c068ec5841cee81ecf664dd658dee3b94d68324ebaa498cb49cec4f7f7df9') prepare() { # -Werror, not even once sed -e 's/-Werror//g' -i $pkgname-$pkgver/Make.defaults - patch -d $pkgname-$pkgver -Np1 -i "../gnu-efi-3.0.17-la64.patch" } build() { diff --git a/gnu-efi/gnu-efi-3.0.17-la64.patch b/gnu-efi/gnu-efi-3.0.17-la64.patch deleted file mode 100644 index 9d979c8434..0000000000 --- a/gnu-efi/gnu-efi-3.0.17-la64.patch +++ /dev/null @@ -1,89 +0,0 @@ -diff --git a/gnuefi/elf_loongarch64_efi.lds b/gnuefi/elf_loongarch64_efi.lds -index e7b4d6b..7a212cd 100644 ---- a/gnuefi/elf_loongarch64_efi.lds -+++ b/gnuefi/elf_loongarch64_efi.lds -@@ -15,6 +15,7 @@ SECTIONS - } - _etext = .; - _text_size = . - _text; -+ . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE)); - .dynamic : { *(.dynamic) } - .data : ALIGN(4096) - { -@@ -33,16 +34,27 @@ SECTIONS - *(.sbss) - *(.scommon) - *(.dynbss) -- *(.bss) -+ *(.bss*) - *(COMMON) - . = ALIGN(16); - _bss_end = .; - } - -- .rela.dyn : { *(.rela.dyn) } -+ . = ALIGN(4096); -+ .rela : -+ { -+ *(.rela.text*) -+ *(.rela.data*) -+ *(.rela.got) -+ *(.rela.dyn) -+ *(.rela.stab) -+ *(.rela.init_array) -+ *(.rela.fini_array) -+ *(.rela.ctors) -+ *(.rela.dtors) -+ } -+ . = ALIGN(4096); - .rela.plt : { *(.rela.plt) } -- .rela.got : { *(.rela.got) } -- .rela.data : { *(.rela.data) *(.rela.data*) } - . = ALIGN(512); - _edata = .; - _data_size = . - _data; -@@ -52,7 +64,9 @@ SECTIONS - . = ALIGN(4096); - .dynstr : { *(.dynstr) } - . = ALIGN(4096); -- .note.gnu.build-id : { *(.note.gnu.build-id) } -+ .note.gnu.build-id : -+ { *(.note.gnu.build-id) } -+ . = DATA_SEGMENT_END (.); - /DISCARD/ : - { - *(.rel.reloc) -diff --git a/inc/loongarch64/efibind.h b/inc/loongarch64/efibind.h -index aaf3fb7..8ed83a5 100644 ---- a/inc/loongarch64/efibind.h -+++ b/inc/loongarch64/efibind.h -@@ -42,9 +42,10 @@ typedef int64_t intptr_t; - // Basic EFI types of various widths - // - --#ifndef __WCHAR_TYPE__ --# define __WCHAR_TYPE__ short --#endif -+#include -+ -+typedef wchar_t CHAR16; -+#define WCHAR CHAR16 - - typedef uint64_t UINT64; - typedef int64_t INT64; -@@ -54,12 +55,13 @@ typedef int32_t INT32; - - typedef uint16_t UINT16; - typedef int16_t INT16; -+ - typedef uint8_t UINT8; -+typedef char CHAR8; - typedef int8_t INT8; --typedef __WCHAR_TYPE__ WCHAR; - - #undef VOID --#define VOID void -+typedef void VOID; - - typedef int64_t INTN; - typedef uint64_t UINTN; From 732ab87ab207ce1a18ba04963fbdf1ae7c5f91f5 Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Thu, 25 Apr 2024 16:44:54 +0800 Subject: [PATCH 19/23] linux-hardened --- linux-hardened/PKGBUILD | 19 ++-- linux-hardened/config.la64 | 173 +++++++++++++++++++------------------ 2 files changed, 98 insertions(+), 94 deletions(-) diff --git a/linux-hardened/PKGBUILD b/linux-hardened/PKGBUILD index 2b0e16a3c2..ac5fd26595 100644 --- a/linux-hardened/PKGBUILD +++ b/linux-hardened/PKGBUILD @@ -4,8 +4,8 @@ # Contributor: Thomas Baechler pkgbase=linux-hardened -_ver=6.7.0 -_rdate=20231226 +_ver=6.8.6 +_rdate=20240424 pkgver=${_ver}.hardened1 pkgrel=1 pkgdesc='Security-Hardened Linux' @@ -27,6 +27,7 @@ makedepends=( graphviz imagemagick python-sphinx + python-yaml texlive-latexextra ) options=('!strip') @@ -43,12 +44,12 @@ validpgpkeys=( E240B57E2C4630BA768E2F26FC1B547C8D8172C8 # Levente Polyak ) # https://www.kernel.org/pub/linux/kernel/v6.x/sha256sums.asc -sha256sums=('d0e6ce60f0ccd162aabe130c00509590de790e33642a12ed4249aa08ac14f674' +sha256sums=('16533ee5666746c21d76e965cda3a6264f03fc58ada09546f928003890541e18' '79aa07a1108582118c5e4721b9b5440053791d7a98ceb9538d42a511e39097eb' - 'c9b26d463e27257d6ad13e59d489db5bd9b103e506dc80d7917bf48471480c85') -b2sums=('15b6b33c6fdac5329d56424afc09a722053f045ca1a1553d583d80296a20a3e545d6ac1fd7950e575816df3e2d1c1033aeba779aa374ee25406f05d284d56815' + 'a46f20931d23513ef039c934eadb60d08a67bbda480db2e42bbe18c921373a19') +b2sums=('40820d8b5a0c9023313f0a8a54f7bceed5ed7267965b6f0699d5aab267d699900adb66587ee09c1a0dd53d039cd11f4314c42afa32ab31767b82470040e6b116' 'ca79cea706454ee6aab3bfbc01d3067cef4fdfd49413c8bca52480596ec394d932a4e252b4bbcc3a605bd3a56b4b73493c47a7cdd3e984a5b42767fce0f1c025' - 'd285dfd1304c9bc42f9fdaa18f8a393fc599be5d6144abe43959c8f63de7b8973821c50135ce81764428659ddd0eb634d7cce6da697cab9752311ad05dee2df6') + '5b0588b23784f492861deeaa4a3803c1d1cfc342049551f3a4992e95662cba7827f46cc313bfbdabc7f6d6a1da0be2672e55690ef89ad605d6a1ef2ff1339d30') export KBUILD_BUILD_HOST=archlinux export KBUILD_BUILD_USER=$pkgbase @@ -82,12 +83,8 @@ prepare() { build() { cd $_srcname - - make htmldocs & - local pid_docs=$! - make all - wait "${pid_docs}" + make htmldocs } _package() { diff --git a/linux-hardened/config.la64 b/linux-hardened/config.la64 index 3b5aeb6747..ac94cd2566 100644 --- a/linux-hardened/config.la64 +++ b/linux-hardened/config.la64 @@ -1,15 +1,15 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/loongarch 6.7.0-rc7 Kernel Configuration +# Linux/loongarch 6.8.6 Kernel Configuration # -CONFIG_CC_VERSION_TEXT="gcc (GCC) 13.2.1 20230906" +CONFIG_CC_VERSION_TEXT="gcc (GCC) 14.0.1 20240421 (experimental)" CONFIG_CC_IS_GCC=y -CONFIG_GCC_VERSION=130201 +CONFIG_GCC_VERSION=140001 CONFIG_CLANG_VERSION=0 CONFIG_AS_IS_GNU=y -CONFIG_AS_VERSION=24100 +CONFIG_AS_VERSION=24200 CONFIG_LD_IS_BFD=y -CONFIG_LD_VERSION=24100 +CONFIG_LD_VERSION=24200 CONFIG_LLD_VERSION=0 CONFIG_CC_CAN_LINK=y CONFIG_CC_CAN_LINK_STATIC=y @@ -162,8 +162,10 @@ CONFIG_GENERIC_SCHED_CLOCK=y CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y CONFIG_CC_HAS_INT128=y CONFIG_CC_IMPLICIT_FALLTHROUGH="-Wimplicit-fallthrough=5" -CONFIG_GCC11_NO_ARRAY_BOUNDS=y +CONFIG_GCC10_NO_ARRAY_BOUNDS=y CONFIG_CC_NO_ARRAY_BOUNDS=y +CONFIG_GCC_NO_STRINGOP_OVERFLOW=y +CONFIG_CC_NO_STRINGOP_OVERFLOW=y CONFIG_NUMA_BALANCING=y CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y CONFIG_CGROUPS=y @@ -243,17 +245,17 @@ CONFIG_AIO=y CONFIG_IO_URING=y CONFIG_ADVISE_SYSCALLS=y CONFIG_MEMBARRIER=y +CONFIG_KCMP=y +CONFIG_RSEQ=y +# CONFIG_DEBUG_RSEQ is not set +CONFIG_CACHESTAT_SYSCALL=y +CONFIG_PC104=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_SELFTEST is not set CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_BASE_RELATIVE=y -CONFIG_KCMP=y -CONFIG_RSEQ=y -CONFIG_CACHESTAT_SYSCALL=y -# CONFIG_DEBUG_RSEQ is not set CONFIG_HAVE_PERF_EVENTS=y CONFIG_PERF_USE_VMALLOC=y -CONFIG_PC104=y # # Kernel Performance Events And Counters @@ -339,15 +341,15 @@ CONFIG_CPU_HAS_PREFETCH=y CONFIG_ARCH_SUPPORTS_KEXEC=y CONFIG_ARCH_SUPPORTS_CRASH_DUMP=y CONFIG_ARCH_SELECTS_CRASH_DUMP=y +CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION=y CONFIG_RELOCATABLE=y CONFIG_RANDOMIZE_BASE=y CONFIG_RANDOMIZE_BASE_MAX_OFFSET=0x01000000 -CONFIG_SECCOMP=y +CONFIG_HAVE_LIVEPATCH=y # end of Kernel type and options CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_ARCH_SPARSEMEM_ENABLE=y -CONFIG_ARCH_ENABLE_THP_MIGRATION=y CONFIG_ARCH_MEMORY_PROBE=y CONFIG_MMU=y CONFIG_ARCH_MMAP_RND_BITS_MIN=12 @@ -380,6 +382,7 @@ CONFIG_ARCH_SUPPORTS_ACPI=y CONFIG_ACPI=y CONFIG_ACPI_GENERIC_GSI=y CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y +CONFIG_ACPI_THERMAL_LIB=y # CONFIG_ACPI_DEBUGGER is not set CONFIG_ACPI_SPCR_TABLE=y CONFIG_ACPI_SLEEP=y @@ -417,14 +420,15 @@ CONFIG_ACPI_PPTT=y # end of Power management options CONFIG_HAVE_KVM=y +CONFIG_KVM_COMMON=y CONFIG_HAVE_KVM_DIRTY_RING=y CONFIG_HAVE_KVM_DIRTY_RING_ACQ_REL=y -CONFIG_HAVE_KVM_EVENTFD=y CONFIG_KVM_MMIO=y CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL=y CONFIG_KVM_XFER_TO_GUEST_WORK=y CONFIG_KVM_GENERIC_HARDWARE_ENABLING=y +CONFIG_KVM_GENERIC_MMU_NOTIFIER=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=m @@ -455,6 +459,7 @@ CONFIG_ARCH_WANTS_NO_INSTR=y CONFIG_HAVE_ASM_MODVERSIONS=y CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y CONFIG_HAVE_RSEQ=y +CONFIG_HAVE_RUST=y CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y CONFIG_HAVE_HW_BREAKPOINT=y CONFIG_HAVE_PERF_REGS=y @@ -466,6 +471,7 @@ CONFIG_MMU_LAZY_TLB_REFCOUNT=y CONFIG_ARCH_HAS_NMI_SAFE_THIS_CPU_OPS=y CONFIG_HAVE_ARCH_SECCOMP=y CONFIG_HAVE_ARCH_SECCOMP_FILTER=y +CONFIG_SECCOMP=y CONFIG_SECCOMP_FILTER=y # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_HAVE_STACKPROTECTOR=y @@ -490,6 +496,8 @@ CONFIG_ARCH_MMAP_RND_BITS=12 CONFIG_PAGE_SIZE_LESS_THAN_64KB=y CONFIG_PAGE_SIZE_LESS_THAN_256KB=y CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT=y +CONFIG_HAVE_OBJTOOL=y +CONFIG_HAVE_STACK_VALIDATION=y # CONFIG_COMPAT_32BIT_TIME is not set CONFIG_ARCH_USE_MEMREMAP_PROT=y # CONFIG_LOCK_EVENT_COUNTS is not set @@ -540,6 +548,7 @@ CONFIG_BLK_ICQ=y CONFIG_BLK_DEV_BSGLIB=y CONFIG_BLK_DEV_INTEGRITY=y CONFIG_BLK_DEV_INTEGRITY_T10=y +CONFIG_BLK_DEV_WRITE_MOUNTED=y CONFIG_BLK_DEV_ZONED=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_DEV_THROTTLING_LOW=y @@ -631,6 +640,7 @@ CONFIG_SWAP=y CONFIG_ZSWAP=y CONFIG_ZSWAP_DEFAULT_ON=y # CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON is not set +# CONFIG_ZSWAP_SHRINKER_DEFAULT_ON is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_DEFLATE is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_842 is not set @@ -649,9 +659,8 @@ CONFIG_ZSMALLOC=m CONFIG_ZSMALLOC_CHAIN_SIZE=8 # -# SLAB allocator options +# Slab allocator options # -# CONFIG_SLAB_DEPRECATED is not set CONFIG_SLUB=y # CONFIG_SLUB_TINY is not set CONFIG_SLAB_MERGE_DEFAULT=y @@ -660,7 +669,7 @@ CONFIG_SLAB_MERGE_DEFAULT=y # CONFIG_SLUB_STATS is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_RANDOM_KMALLOC_CACHES is not set -# end of SLAB allocator options +# end of Slab allocator options # CONFIG_SHUFFLE_PAGE_ALLOCATOR is not set # CONFIG_COMPAT_BRK is not set @@ -687,6 +696,7 @@ CONFIG_COMPACTION=y CONFIG_COMPACT_UNEVICTABLE_DEFAULT=1 CONFIG_PAGE_REPORTING=y CONFIG_MIGRATION=y +CONFIG_ARCH_ENABLE_THP_MIGRATION=y CONFIG_CONTIG_ALLOC=y CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PHYS_ADDR_T_64BIT=y @@ -696,6 +706,7 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y # CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set +# CONFIG_TRANSPARENT_HUGEPAGE_NEVER is not set # CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y @@ -708,6 +719,7 @@ CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=19 # CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set # CONFIG_IDLE_PAGE_TRACKING is not set +CONFIG_ARCH_HAS_CURRENT_STACK_POINTER=y CONFIG_ZONE_DMA32=y CONFIG_HMM_MIRROR=y CONFIG_VM_EVENT_COUNTERS=y @@ -1221,8 +1233,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m -CONFIG_BPFILTER=y -CONFIG_BPFILTER_UMH=m # CONFIG_IP_DCCP is not set CONFIG_IP_SCTP=m # CONFIG_SCTP_DBG_OBJCNT is not set @@ -1501,7 +1511,6 @@ CONFIG_BT_BNEP_MC_FILTER=y CONFIG_BT_BNEP_PROTO_FILTER=y CONFIG_BT_CMTP=m CONFIG_BT_HIDP=m -CONFIG_BT_HS=y CONFIG_BT_LE=y CONFIG_BT_LE_L2CAP_ECRED=y CONFIG_BT_6LOWPAN=m @@ -1715,7 +1724,6 @@ CONFIG_PCI_LOONGSON=y # Cadence-based PCIe controllers # # CONFIG_PCIE_CADENCE_PLAT_HOST is not set -# CONFIG_PCI_J721E_HOST is not set # end of Cadence-based PCIe controllers # @@ -1814,6 +1822,7 @@ CONFIG_DEV_COREDUMP=y # CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set CONFIG_HMEM_REPORTING=y # CONFIG_TEST_ASYNC_DRIVER_PROBE is not set +CONFIG_GENERIC_CPU_DEVICES=y CONFIG_GENERIC_CPU_AUTOPROBE=y CONFIG_SOC_BUS=y CONFIG_REGMAP=y @@ -2071,6 +2080,7 @@ CONFIG_ZRAM_DEF_COMP_ZSTD=y # CONFIG_ZRAM_DEF_COMP_842 is not set CONFIG_ZRAM_DEF_COMP="zstd" # CONFIG_ZRAM_WRITEBACK is not set +# CONFIG_ZRAM_TRACK_ENTRY_ACTIME is not set # CONFIG_ZRAM_MEMORY_TRACKING is not set # CONFIG_ZRAM_MULTI_COMP is not set CONFIG_BLK_DEV_LOOP=m @@ -2146,6 +2156,7 @@ CONFIG_TIFM_7XX1=m # CONFIG_HISI_HIKEY_USB is not set # CONFIG_OPEN_DICE is not set # CONFIG_VCPU_STALL_DETECTOR is not set +# CONFIG_NSM is not set # CONFIG_C2PORT is not set # @@ -2425,13 +2436,10 @@ CONFIG_PATA_PCMCIA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=m CONFIG_MD_BITMAP_FILE=y -CONFIG_MD_LINEAR=m CONFIG_MD_RAID0=m CONFIG_MD_RAID1=m CONFIG_MD_RAID10=m CONFIG_MD_RAID456=m -CONFIG_MD_MULTIPATH=m -# CONFIG_MD_FAULTY is not set # CONFIG_MD_CLUSTER is not set CONFIG_BCACHE=m # CONFIG_BCACHE_DEBUG is not set @@ -2688,6 +2696,8 @@ CONFIG_NET_VENDOR_MICROCHIP=y # CONFIG_LAN743X is not set # CONFIG_LAN966X_SWITCH is not set # CONFIG_VCAP is not set +CONFIG_NET_VENDOR_MOTORCOMM=y +CONFIG_YT6801=m CONFIG_NET_VENDOR_MICROSEMI=y # CONFIG_MSCC_OCELOT_SWITCH is not set CONFIG_NET_VENDOR_MICROSOFT=y @@ -2719,6 +2729,7 @@ CONFIG_8139TOO_PIO=y # CONFIG_8139TOO_8129 is not set # CONFIG_8139_OLD_RX_RESET is not set CONFIG_R8169=m +CONFIG_R8169_LEDS=y # CONFIG_NET_VENDOR_RENESAS is not set # CONFIG_NET_VENDOR_ROCKER is not set # CONFIG_NET_VENDOR_SAMSUNG is not set @@ -2815,6 +2826,7 @@ CONFIG_SMSC_PHY=m # CONFIG_DP83867_PHY is not set # CONFIG_DP83869_PHY is not set # CONFIG_DP83TD510_PHY is not set +# CONFIG_DP83TG720_PHY is not set # CONFIG_VITESSE_PHY is not set # CONFIG_XILINX_GMII2RGMII is not set # CONFIG_MICREL_KS8995MA is not set @@ -3028,9 +3040,6 @@ CONFIG_ATH11K_DEBUGFS=y CONFIG_ATH11K_SPECTRAL=y # CONFIG_ATH12K is not set CONFIG_WLAN_VENDOR_ATMEL=y -CONFIG_ATMEL=m -CONFIG_PCI_ATMEL=m -CONFIG_PCMCIA_ATMEL=m CONFIG_AT76C50X_USB=m CONFIG_WLAN_VENDOR_BROADCOM=y CONFIG_B43=m @@ -3072,9 +3081,6 @@ CONFIG_BRCMFMAC_USB=y CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCM_TRACING=y CONFIG_BRCMDBG=y -CONFIG_WLAN_VENDOR_CISCO=y -# CONFIG_AIRO is not set -CONFIG_AIRO_CS=m CONFIG_WLAN_VENDOR_INTEL=y CONFIG_IPW2100=m CONFIG_IPW2100_MONITOR=y @@ -3112,22 +3118,6 @@ CONFIG_IWLWIFI_OPMODE_MODULAR=y # end of Debugging Options CONFIG_WLAN_VENDOR_INTERSIL=y -CONFIG_HOSTAP=m -CONFIG_HOSTAP_FIRMWARE=y -CONFIG_HOSTAP_FIRMWARE_NVRAM=y -CONFIG_HOSTAP_PLX=m -CONFIG_HOSTAP_PCI=m -CONFIG_HOSTAP_CS=m -CONFIG_HERMES=m -CONFIG_HERMES_PRISM=y -CONFIG_HERMES_CACHE_FW_ON_INIT=y -CONFIG_PLX_HERMES=m -CONFIG_TMD_HERMES=m -CONFIG_NORTEL_HERMES=m -CONFIG_PCI_HERMES=m -CONFIG_PCMCIA_HERMES=m -CONFIG_PCMCIA_SPECTRUM=m -CONFIG_ORINOCO_USB=m CONFIG_P54_COMMON=m CONFIG_P54_USB=m CONFIG_P54_PCI=m @@ -3137,7 +3127,6 @@ CONFIG_P54_LEDS=y CONFIG_WLAN_VENDOR_MARVELL=y CONFIG_LIBERTAS=m CONFIG_LIBERTAS_USB=m -CONFIG_LIBERTAS_CS=m CONFIG_LIBERTAS_SDIO=m CONFIG_LIBERTAS_SPI=m # CONFIG_LIBERTAS_DEBUG is not set @@ -3178,9 +3167,10 @@ CONFIG_MT7921_COMMON=m CONFIG_MT7921E=m CONFIG_MT7921S=m CONFIG_MT7921U=m -# CONFIG_MT7996E is not set -# CONFIG_MT7925E is not set -# CONFIG_MT7925U is not set +CONFIG_MT7996E=m +CONFIG_MT7925_COMMON=m +CONFIG_MT7925E=m +CONFIG_MT7925U=m CONFIG_WLAN_VENDOR_MICROCHIP=y CONFIG_WILC1000=m CONFIG_WILC1000_SDIO=m @@ -3244,32 +3234,36 @@ CONFIG_RTL8XXXU_UNTESTED=y CONFIG_RTW88=m CONFIG_RTW88_CORE=m CONFIG_RTW88_PCI=m +CONFIG_RTW88_SDIO=m +CONFIG_RTW88_USB=m CONFIG_RTW88_8822B=m CONFIG_RTW88_8822C=m CONFIG_RTW88_8723D=m CONFIG_RTW88_8821C=m CONFIG_RTW88_8822BE=m -# CONFIG_RTW88_8822BS is not set -# CONFIG_RTW88_8822BU is not set +CONFIG_RTW88_8822BS=m +CONFIG_RTW88_8822BU=m CONFIG_RTW88_8822CE=m -# CONFIG_RTW88_8822CS is not set -# CONFIG_RTW88_8822CU is not set +CONFIG_RTW88_8822CS=m +CONFIG_RTW88_8822CU=m CONFIG_RTW88_8723DE=m -# CONFIG_RTW88_8723DS is not set -# CONFIG_RTW88_8723DU is not set +CONFIG_RTW88_8723DS=m +CONFIG_RTW88_8723DU=m CONFIG_RTW88_8821CE=m -# CONFIG_RTW88_8821CS is not set -# CONFIG_RTW88_8821CU is not set +CONFIG_RTW88_8821CS=m +CONFIG_RTW88_8821CU=m # CONFIG_RTW88_DEBUG is not set # CONFIG_RTW88_DEBUGFS is not set CONFIG_RTW89=m CONFIG_RTW89_CORE=m CONFIG_RTW89_PCI=m +CONFIG_RTW89_8851B=m CONFIG_RTW89_8852A=m +CONFIG_RTW89_8852B=m CONFIG_RTW89_8852C=m -# CONFIG_RTW89_8851BE is not set +CONFIG_RTW89_8851BE=m CONFIG_RTW89_8852AE=m -# CONFIG_RTW89_8852BE is not set +CONFIG_RTW89_8852BE=m CONFIG_RTW89_8852CE=m CONFIG_RTW89_DEBUG=y CONFIG_RTW89_DEBUGMSG=y @@ -3296,15 +3290,11 @@ CONFIG_WLCORE=m # CONFIG_WLCORE_SPI is not set CONFIG_WLCORE_SDIO=m CONFIG_WLAN_VENDOR_ZYDAS=y -# CONFIG_USB_ZD1201 is not set CONFIG_ZD1211RW=m # CONFIG_ZD1211RW_DEBUG is not set CONFIG_WLAN_VENDOR_QUANTENNA=y CONFIG_QTNFMAC=m CONFIG_QTNFMAC_PCIE=m -CONFIG_PCMCIA_RAYCS=m -CONFIG_PCMCIA_WL3501=m -CONFIG_USB_NET_RNDIS_WLAN=m CONFIG_MAC80211_HWSIM=m CONFIG_VIRT_WIFI=m # CONFIG_WAN is not set @@ -3464,6 +3454,7 @@ CONFIG_INPUT_JOYSTICK=y # CONFIG_JOYSTICK_QWIIC is not set # CONFIG_JOYSTICK_FSIA6B is not set # CONFIG_JOYSTICK_SENSEHAT is not set +# CONFIG_JOYSTICK_SEESAW is not set # CONFIG_INPUT_TABLET is not set # CONFIG_INPUT_TOUCHSCREEN is not set CONFIG_INPUT_MISC=y @@ -3776,7 +3767,6 @@ CONFIG_SPI_LOONGSON_PLATFORM=m # CONFIG_SPI_MXIC is not set # CONFIG_SPI_XCOMM is not set # CONFIG_SPI_XILINX is not set -# CONFIG_SPI_ZYNQMP_GQSPI is not set # CONFIG_SPI_AMD is not set # @@ -4022,6 +4012,7 @@ CONFIG_HWMON_VID=m # CONFIG_SENSORS_F71805F is not set # CONFIG_SENSORS_F71882FG is not set # CONFIG_SENSORS_F75375S is not set +# CONFIG_SENSORS_GIGABYTE_WATERFORCE is not set # CONFIG_SENSORS_GL518SM is not set # CONFIG_SENSORS_GL520SM is not set # CONFIG_SENSORS_G760A is not set @@ -4156,6 +4147,7 @@ CONFIG_SENSORS_W83627HF=m CONFIG_THERMAL=y # CONFIG_THERMAL_NETLINK is not set # CONFIG_THERMAL_STATISTICS is not set +# CONFIG_THERMAL_DEBUGFS is not set CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=0 CONFIG_THERMAL_HWMON=y CONFIG_THERMAL_OF=y @@ -4275,7 +4267,6 @@ CONFIG_MFD_CORE=m # CONFIG_MFD_SKY81452 is not set # CONFIG_MFD_STMPE is not set CONFIG_MFD_SYSCON=y -# CONFIG_MFD_TI_AM335X_TSCADC is not set # CONFIG_MFD_LP3943 is not set # CONFIG_MFD_LP8788 is not set # CONFIG_MFD_TI_LMU is not set @@ -4328,6 +4319,7 @@ CONFIG_REGULATOR=y # CONFIG_REGULATOR_FIXED_VOLTAGE is not set # CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set # CONFIG_REGULATOR_USERSPACE_CONSUMER is not set +# CONFIG_REGULATOR_NETLINK_EVENTS is not set # CONFIG_REGULATOR_88PG86X is not set # CONFIG_REGULATOR_ACT8865 is not set # CONFIG_REGULATOR_AD5398 is not set @@ -4775,7 +4767,10 @@ CONFIG_VIDEOBUF2_DMA_SG=m CONFIG_MEDIA_ATTACH=y CONFIG_VIDEO_IR_I2C=m CONFIG_VIDEO_CAMERA_SENSOR=y +# CONFIG_VIDEO_ALVIUM_CSI2 is not set # CONFIG_VIDEO_AR0521 is not set +# CONFIG_VIDEO_GC0308 is not set +# CONFIG_VIDEO_GC2145 is not set # CONFIG_VIDEO_HI556 is not set # CONFIG_VIDEO_HI846 is not set # CONFIG_VIDEO_HI847 is not set @@ -4821,6 +4816,7 @@ CONFIG_VIDEO_CAMERA_SENSOR=y # CONFIG_VIDEO_OV5675 is not set # CONFIG_VIDEO_OV5693 is not set # CONFIG_VIDEO_OV5695 is not set +# CONFIG_VIDEO_OV64A40 is not set # CONFIG_VIDEO_OV6650 is not set # CONFIG_VIDEO_OV7251 is not set # CONFIG_VIDEO_OV7640 is not set @@ -4844,6 +4840,12 @@ CONFIG_VIDEO_CAMERA_SENSOR=y # CONFIG_VIDEO_CCS is not set # CONFIG_VIDEO_ET8EK8 is not set +# +# Camera ISPs +# +# CONFIG_VIDEO_THP7312 is not set +# end of Camera ISPs + # # Lens drivers # @@ -4912,6 +4914,7 @@ CONFIG_VIDEO_CAMERA_SENSOR=y # CONFIG_VIDEO_TVP5150 is not set # CONFIG_VIDEO_TVP7002 is not set # CONFIG_VIDEO_TW2804 is not set +# CONFIG_VIDEO_TW9900 is not set # CONFIG_VIDEO_TW9903 is not set # CONFIG_VIDEO_TW9906 is not set # CONFIG_VIDEO_TW9910 is not set @@ -5258,9 +5261,8 @@ CONFIG_DRM_AMD_DC_SI=y # end of Display Engine Configuration # CONFIG_DRM_NOUVEAU is not set -CONFIG_DRM_GSGPU=m -CONFIG_DRM_GSGPU_USERPTR=y -CONFIG_DRM_GSGPU_GART_DEBUGFS=y +# CONFIG_DRM_XE is not set +# CONFIG_DRM_GSGPU is not set CONFIG_DRM_VGEM=m CONFIG_DRM_VKMS=m CONFIG_DRM_UDL=m @@ -5353,7 +5355,7 @@ CONFIG_DRM_PANEL_BRIDGE=y # CONFIG_DRM_CDNS_MHDP8546 is not set # end of Display Interface Bridges -# CONFIG_DRM_LOONGSON is not set +CONFIG_DRM_LOONGSON=m # CONFIG_DRM_ETNAVIV is not set # CONFIG_DRM_LOGICVC is not set # CONFIG_DRM_ARCPGU is not set @@ -5373,7 +5375,6 @@ CONFIG_DRM_SIMPLEDRM=m # CONFIG_TINYDRM_ST7735R is not set # CONFIG_DRM_GUD is not set # CONFIG_DRM_SSD130X is not set -# CONFIG_DRM_LEGACY is not set CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y # @@ -5437,7 +5438,7 @@ CONFIG_FB_SYS_FILLRECT=y CONFIG_FB_SYS_COPYAREA=y CONFIG_FB_SYS_IMAGEBLIT=y # CONFIG_FB_FOREIGN_ENDIAN is not set -CONFIG_FB_SYS_FOPS=y +CONFIG_FB_SYSMEM_FOPS=y CONFIG_FB_DEFERRED_IO=y CONFIG_FB_IOMEM_FOPS=y CONFIG_FB_IOMEM_HELPERS=y @@ -5474,6 +5475,7 @@ CONFIG_BACKLIGHT_CLASS_DEVICE=y # CONFIG_BACKLIGHT_LM3630A is not set # CONFIG_BACKLIGHT_LM3639 is not set # CONFIG_BACKLIGHT_LP855X is not set +# CONFIG_BACKLIGHT_MP3309C is not set # CONFIG_BACKLIGHT_GPIO is not set # CONFIG_BACKLIGHT_LV5207LP is not set # CONFIG_BACKLIGHT_BD6107 is not set @@ -6123,6 +6125,7 @@ CONFIG_HID_ZYDACRON=m CONFIG_HID_SENSOR_HUB=m CONFIG_HID_SENSOR_CUSTOM_SENSOR=m CONFIG_HID_ALPS=m +# CONFIG_HID_MCP2200 is not set CONFIG_HID_MCP2221=m # end of Special HID drivers @@ -6559,6 +6562,7 @@ CONFIG_TYPEC_STUSB160X=m CONFIG_TYPEC_MUX_PI3USB30532=m # CONFIG_TYPEC_MUX_NB7VPQ904M is not set # CONFIG_TYPEC_MUX_PTN36502 is not set +# CONFIG_TYPEC_MUX_WCD939X_USBSS is not set # end of USB Type-C Multiplexer/DeMultiplexer Switch support # @@ -6608,8 +6612,6 @@ CONFIG_MMC_HSQ=m CONFIG_MMC_TOSHIBA_PCI=m CONFIG_MMC_MTK=m CONFIG_MMC_SDHCI_XENON=m -# CONFIG_MMC_SDHCI_OMAP is not set -# CONFIG_MMC_SDHCI_AM654 is not set # CONFIG_SCSI_UFSHCD is not set CONFIG_MEMSTICK=m # CONFIG_MEMSTICK_DEBUG is not set @@ -6806,6 +6808,7 @@ CONFIG_RTC_DRV_DS1374=m CONFIG_RTC_DRV_DS1672=m # CONFIG_RTC_DRV_HYM8563 is not set CONFIG_RTC_DRV_MAX6900=m +# CONFIG_RTC_DRV_MAX31335 is not set # CONFIG_RTC_DRV_NCT3018Y is not set CONFIG_RTC_DRV_RS5C372=m CONFIG_RTC_DRV_ISL1208=m @@ -6908,6 +6911,7 @@ CONFIG_DMA_OF=y # CONFIG_DW_AXI_DMAC is not set # CONFIG_FSL_EDMA is not set # CONFIG_INTEL_IDMA64 is not set +# CONFIG_LS2X_APB_DMA is not set # CONFIG_PLX_DMA is not set # CONFIG_XILINX_DMA is not set # CONFIG_XILINX_XDMA is not set @@ -6955,6 +6959,7 @@ CONFIG_VFIO_GROUP=y CONFIG_VFIO_CONTAINER=y # CONFIG_VFIO_NOIOMMU is not set CONFIG_VFIO_VIRQFD=y +# CONFIG_VFIO_DEBUGFS is not set # # VFIO support for PCI devices @@ -7278,7 +7283,7 @@ CONFIG_PWM_SYSFS=y # CONFIG_PWM_FSL_FTM is not set # CONFIG_PWM_PCA9685 is not set # CONFIG_PWM_XILINX is not set -CONFIG_PWM_LS=m +# CONFIG_PWM_LS is not set # # IRQ chip support @@ -7335,6 +7340,7 @@ CONFIG_GENERIC_PHY=y # # Performance monitor support # +# CONFIG_DWC_PCIE_PMU is not set # end of Performance monitor support CONFIG_RAS=y @@ -7352,6 +7358,7 @@ CONFIG_USB4=m # CONFIG_DAX is not set CONFIG_NVMEM=y CONFIG_NVMEM_SYSFS=y +CONFIG_NVMEM_LAYOUTS=y # # Layout Types @@ -7387,6 +7394,7 @@ CONFIG_PM_OPP=y # CONFIG_VALIDATE_FS_PARSER=y CONFIG_FS_IOMAP=y +CONFIG_FS_STACK=y CONFIG_BUFFER_HEAD=y CONFIG_LEGACY_DIRECT_IO=y CONFIG_EXT2_FS=m @@ -7496,7 +7504,7 @@ CONFIG_OVERLAY_FS_METACOPY=y # CONFIG_NETFS_SUPPORT=m CONFIG_NETFS_STATS=y -CONFIG_FSCACHE=m +CONFIG_FSCACHE=y CONFIG_FSCACHE_STATS=y # CONFIG_FSCACHE_DEBUG is not set CONFIG_CACHEFILES=m @@ -7551,9 +7559,9 @@ CONFIG_TMPFS_INODE64=y # CONFIG_TMPFS_QUOTA is not set CONFIG_ARCH_SUPPORTS_HUGETLBFS=y CONFIG_HUGETLBFS=y +# CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON is not set CONFIG_HUGETLB_PAGE=y CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y -# CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON is not set CONFIG_CONFIGFS_FS=y CONFIG_EFIVAR_FS=y # end of Pseudo filesystems @@ -7678,6 +7686,7 @@ CONFIG_NFSD_SCSILAYOUT=y # CONFIG_NFSD_FLEXFILELAYOUT is not set CONFIG_NFSD_V4_2_INTER_SSC=y CONFIG_NFSD_V4_SECURITY_LABEL=y +# CONFIG_NFSD_LEGACY_CLIENT_TRACKING is not set CONFIG_GRACE_PERIOD=m CONFIG_LOCKD=m CONFIG_LOCKD_V4=y @@ -7954,14 +7963,12 @@ CONFIG_CRYPTO_ADIANTUM=m # CONFIG_CRYPTO_ARC4 is not set CONFIG_CRYPTO_CHACHA20=m CONFIG_CRYPTO_CBC=m -CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_CTR=y CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_OFB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XCTR=m CONFIG_CRYPTO_XTS=m @@ -8072,6 +8079,7 @@ CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_QAT_C3XXX is not set # CONFIG_CRYPTO_DEV_QAT_C62X is not set # CONFIG_CRYPTO_DEV_QAT_4XXX is not set +# CONFIG_CRYPTO_DEV_QAT_420XX is not set # CONFIG_CRYPTO_DEV_QAT_DH895xCCVF is not set # CONFIG_CRYPTO_DEV_QAT_C3XXXVF is not set # CONFIG_CRYPTO_DEV_QAT_C62XVF is not set @@ -8246,7 +8254,6 @@ CONFIG_FONT_SUPPORT=y CONFIG_FONTS=y # CONFIG_FONT_8x8 is not set # CONFIG_FONT_8x16 is not set -CONFIG_FONT_UTF8x16=y # CONFIG_FONT_6x11 is not set # CONFIG_FONT_7x14 is not set # CONFIG_FONT_PEARL_8x8 is not set @@ -8262,6 +8269,7 @@ CONFIG_SG_POOL=y CONFIG_MEMREGION=y CONFIG_ARCH_STACKWALK=y CONFIG_STACKDEPOT=y +CONFIG_STACKDEPOT_MAX_FRAMES=64 CONFIG_SBITMAP=y # CONFIG_LWQ_TEST is not set # end of Library routines @@ -8300,7 +8308,7 @@ CONFIG_DEBUG_MISC=y # # Compile-time checks and compiler options # -CONFIG_AS_HAS_NON_CONST_LEB128=y +CONFIG_AS_HAS_NON_CONST_ULEB128=y CONFIG_DEBUG_INFO_NONE=y # CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT is not set # CONFIG_DEBUG_INFO_DWARF4 is not set @@ -8432,8 +8440,6 @@ CONFIG_STACKTRACE=y # CONFIG_DEBUG_MAPLE_TREE is not set # end of Debug kernel data structures -# CONFIG_DEBUG_CREDENTIALS is not set - # # RCU Debugging # @@ -8476,6 +8482,7 @@ CONFIG_HAVE_SAMPLE_FTRACE_DIRECT_MULTI=y # # CONFIG_UNWINDER_GUESS is not set CONFIG_UNWINDER_PROLOGUE=y +# CONFIG_UNWINDER_ORC is not set # end of loongarch Debugging # From af90f8ce244082c0a59663f7f7600dee47886665 Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Mon, 29 Apr 2024 17:08:41 +0800 Subject: [PATCH 20/23] ffmpeg --- ffmpeg/PKGBUILD | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ffmpeg/PKGBUILD b/ffmpeg/PKGBUILD index 924225d2c9..a64e7d343b 100644 --- a/ffmpeg/PKGBUILD +++ b/ffmpeg/PKGBUILD @@ -6,7 +6,7 @@ pkgname=ffmpeg pkgver=6.1.1 -pkgrel=3 +pkgrel=4 epoch=2 pkgdesc='Complete solution to record, convert and stream audio and video' arch=(loong64 x86_64) @@ -156,7 +156,7 @@ build() { --disable-stripping \ --enable-amf \ --enable-avisynth \ - --disable-cuda-llvm \ + --enable-cuda-llvm \ --enable-lto \ --enable-fontconfig \ --enable-frei0r \ @@ -216,7 +216,6 @@ build() { --enable-shared \ --enable-version3 \ --disable-doc \ - --disable-lsx \ --enable-vulkan make make tools/qt-faststart From 41fb4f176b4d3ccdf104a529047796176b9c6444 Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Mon, 6 May 2024 10:07:01 +0800 Subject: [PATCH 21/23] openblas --- openblas/PKGBUILD | 11 +++++--- openblas/fix-loong.patch | 47 ------------------------------- openblas/openblas-loong64.patch | 50 +++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 51 deletions(-) delete mode 100644 openblas/fix-loong.patch create mode 100644 openblas/openblas-loong64.patch diff --git a/openblas/PKGBUILD b/openblas/PKGBUILD index ca66c556e8..14342bb92f 100644 --- a/openblas/PKGBUILD +++ b/openblas/PKGBUILD @@ -5,7 +5,7 @@ pkgbase=openblas pkgname=(openblas openblas64 blas-openblas blas64-openblas) _pkgname=OpenBLAS pkgver=0.3.26 -pkgrel=2 +pkgrel=3 _blasver=3.12.0 pkgdesc="An optimized BLAS library based on GotoBLAS2 1.13 BSD" arch=('loong64' 'x86_64') @@ -14,13 +14,16 @@ license=('BSD') depends=('gcc-libs') makedepends=('cmake' 'perl' 'gcc-fortran') source=(${_pkgname}-v${pkgver}.tar.gz::https://github.com/xianyi/OpenBLAS/archive/v${pkgver}.tar.gz -fix-loong.patch) + https://github.com/OpenMathLib/OpenBLAS/commit/992b71fea2e3916c294dd6a90062b4c31740cd85.patch + openblas-loong64.patch) sha512sums=('01d3a536fbfa62f276fd6b1ad0e218fb3d91f41545fc83ddc74979fa26372d8389f0baa20334badfe0adacd77bd944c50a47ac920577373fcc1d495553084373' - '195dc3c3daa56c55912831161bd9e73532c1a06b38c894a6eceb8d49befddda1b94e71dcd36e1d3403e2e5f70ded83febdee493059b16adc85ea52fb32e58f81') + '24a9d10752218c34bc1271a91b171ee6ec9e5b73ba0fa559f22d4a4c698b6bdb502a38a436ffeb8110b325d2e690010e855580f7607c9d30263d819be018d8c0' + 'e62892a2b290938cb8cad9040a2f9d5787325aa1e8e88eddc71f7e4dd6969b760d9658c0a04c05458272cdb00c339154f98daecb1987ca9b80a2d1528dd59546') prepare() { cd "$_pkgname-$pkgver" - patch -p1 -i "$srcdir/fix-loong.patch" + patch -p1 -i "$srcdir/992b71fea2e3916c294dd6a90062b4c31740cd85.patch" + patch -p1 -i "$srcdir/openblas-loong64.patch" } build() { diff --git a/openblas/fix-loong.patch b/openblas/fix-loong.patch deleted file mode 100644 index fc0489b826..0000000000 --- a/openblas/fix-loong.patch +++ /dev/null @@ -1,47 +0,0 @@ -diff --git a/.github/workflows/loongarch64.yml b/.github/workflows/loongarch64.yml -index 4a9bf98b6..b310d6938 100644 ---- a/.github/workflows/loongarch64.yml -+++ b/.github/workflows/loongarch64.yml -@@ -40,8 +40,8 @@ jobs: - - - name: Download and install loongarch64-toolchain - run: | -- wget https://github.com/loongson/build-tools/releases/download/2022.09.06/loongarch64-clfs-7.3-cross-tools-gcc-glibc.tar.xz -- tar -xf loongarch64-clfs-7.3-cross-tools-gcc-glibc.tar.xz -C /opt -+ wget https://github.com/loongson/build-tools/releases/download/2023.08.08/CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz -+ tar -xf CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz -C /opt - - - name: Set env - run: | -diff --git a/cmake/cc.cmake b/cmake/cc.cmake -index 00952e810..242b03b5f 100644 ---- a/cmake/cc.cmake -+++ b/cmake/cc.cmake -@@ -36,9 +36,9 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LS - - if (LOONGARCH64) - if (BINARY64) -- set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp64") -+ set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp64d") - else () -- set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp32") -+ set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=ilp32d") - endif () - set(BINARY_DEFINED 1) - endif () -diff --git a/cmake/fc.cmake b/cmake/fc.cmake -index c496f6368..b356dfda3 100644 ---- a/cmake/fc.cmake -+++ b/cmake/fc.cmake -@@ -61,9 +61,9 @@ if (${F_COMPILER} STREQUAL "GFORTRAN" OR ${F_COMPILER} STREQUAL "F95" OR CMAKE_F - endif () - if (LOONGARCH64) - if (BINARY64) -- set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64") -+ set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64d") - else () -- set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp32") -+ set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=ilp32d") - endif () - endif () - if (RISCV64) diff --git a/openblas/openblas-loong64.patch b/openblas/openblas-loong64.patch new file mode 100644 index 0000000000..83104eeb12 --- /dev/null +++ b/openblas/openblas-loong64.patch @@ -0,0 +1,50 @@ +Index: OpenBLAS-0.3.26/Makefile.system +=================================================================== +--- OpenBLAS-0.3.26.orig/Makefile.system ++++ OpenBLAS-0.3.26/Makefile.system +@@ -948,10 +948,7 @@ BINARY_DEFINED = 1 + endif + + ifeq ($(ARCH), loongarch64) +-LA64_ABI=$(shell $(CC) -mabi=lp64d -c $(TOPDIR)/cpuid_loongarch64.c -o /dev/null > /dev/null 2> /dev/null && echo lp64d) +-ifneq ($(LA64_ABI), lp64d) +-LA64_ABI=lp64 +-endif ++LA64_ABI=lp64d + CCOMMON_OPT += -march=loongarch64 -mabi=$(LA64_ABI) + FCOMMON_OPT += -march=loongarch64 -mabi=$(LA64_ABI) + endif +Index: OpenBLAS-0.3.26/cmake/cc.cmake +=================================================================== +--- OpenBLAS-0.3.26.orig/cmake/cc.cmake ++++ OpenBLAS-0.3.26/cmake/cc.cmake +@@ -36,12 +36,7 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU + + if (LOONGARCH64) + if (BINARY64) +- CHECK_CXX_COMPILER_FLAG("-mabi=lp64d" COMPILER_SUPPORT_LP64D_ABI) +- if(COMPILER_SUPPORT_LP64D_ABI) + set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp64d") +- else() +- set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp64") +- endif () + else () + CHECK_CXX_COMPILER_FLAG("-mabi=ilp32d" COMPILER_SUPPORT_ILP32D_ABI) + if(COMPILER_SUPPORT_ILP32D_ABI) +Index: OpenBLAS-0.3.26/cmake/fc.cmake +=================================================================== +--- OpenBLAS-0.3.26.orig/cmake/fc.cmake ++++ OpenBLAS-0.3.26/cmake/fc.cmake +@@ -61,12 +61,7 @@ if (${F_COMPILER} STREQUAL "GFORTRAN" OR + endif () + if (LOONGARCH64) + if (BINARY64) +- CHECK_CXX_COMPILER_FLAG("-mabi=lp64d" COMPILER_SUPPORT_LP64D_ABI) +- if(COMPILER_SUPPORT_LP64D_ABI) + set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64d") +- else() +- set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64") +- endif () + else () + CHECK_CXX_COMPILER_FLAG("-mabi=ilp32d" COMPILER_SUPPORT_ILP32D_ABI) + if(COMPILER_SUPPORT_ILP32D_ABI) From f1861174727dad903531a2bcf35f929ed1ba6385 Mon Sep 17 00:00:00 2001 From: Yingjie Wang Date: Tue, 20 Aug 2024 12:49:19 -0400 Subject: [PATCH 22/23] update: fastfetch 2.21.3 --- fastfetch/PKGBUILD | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/fastfetch/PKGBUILD b/fastfetch/PKGBUILD index 3729258934..7c2e209d9e 100644 --- a/fastfetch/PKGBUILD +++ b/fastfetch/PKGBUILD @@ -2,7 +2,7 @@ # Contributor: Mark Wagie pkgname=fastfetch -pkgver=2.7.1 +pkgver=2.21.3 pkgrel=1 pkgdesc="Like Neofetch, but much faster because written in C" arch=('loong64' 'x86_64') @@ -17,14 +17,12 @@ makedepends=( 'ddcutil' 'directx-headers' 'imagemagick' - 'libnm' 'libpulse' 'libxcb' 'libxrandr' 'mesa' 'ocl-icd' 'opencl-headers' - 'pciutils' 'vulkan-headers' 'vulkan-icd-loader' 'wayland' @@ -39,30 +37,28 @@ optdepends=( 'directx-headers: GPU detection in WSL' 'glib2: Output for values that are only stored in GSettings' 'imagemagick: Image output using sixel or kitty graphics protocol' - 'libnm: Wifi detection' + 'libelf: st term font detection and fast path of systemd version detection' 'libpulse: Sound detection' 'mesa: Needed by the OpenGL module for gl context creation.' 'libxrandr: Multi monitor support' 'ocl-icd: OpenCL module' - 'pciutils: GPU output' + 'hwdata: GPU output' 'vulkan-icd-loader: Vulkan module & fallback for GPU output' 'xfconf: Needed for XFWM theme and XFCE Terminal font' 'zlib: Faster image output when using kitty graphics protocol' 'libdrm: Displays detection' ) source=("${pkgname}-${pkgver}.tar.gz::${url}/archive/refs/tags/${pkgver}.tar.gz") -sha256sums=('64778068628426a1d4394f756cec70a62dd9f7fabc267dd7bdcbfc6302f6476e') +sha256sums=('cec1f126ade7a5ef971901b1cdbe79f5864523d7a0a92732991619485d13e2e7') build() { cmake -B build -S "${pkgname}-${pkgver}" \ - -DCMAKE_BUILD_TYPE='None' \ - -DCMAKE_INSTALL_PREFIX='/usr' \ -DCMAKE_BUILD_TYPE='RelWithDebInfo' \ + -DCMAKE_INSTALL_PREFIX='/usr' \ -DBUILD_TESTS='ON' \ -DENABLE_SQLITE3='OFF' \ -DENABLE_RPM='OFF' \ -DENABLE_IMAGEMAGICK6='OFF' \ - -DENABLE_DDCUTIL='ON' \ -Wno-dev cmake --build build } @@ -74,3 +70,4 @@ check() { package() { DESTDIR="${pkgdir}" cmake --install build } + From cc919c066d84a035e88efc6ad2034aeae2c39a0b Mon Sep 17 00:00:00 2001 From: Yingjie Wang Date: Thu, 22 Aug 2024 22:04:48 -0400 Subject: [PATCH 23/23] backport: LoongArch CPU support from commit 96905012a4 Backported the logic from upstream dev branch commit 96905012a413f4ded49c33a182105f1d6a98b8cf ("Fix #1204 CPU (Linux): support loongarch") to fastfetch 2.31.3. This patch enables fastfetch to correctly parse LoongArch CPU information on Loong Arch Linux. The patch should be removed once a new upstream release that includes the fix is available. --- fastfetch/PKGBUILD | 15 ++- ...tch-2.31.3-loongarch-support-9690501.patch | 110 ++++++++++++++++++ 2 files changed, 123 insertions(+), 2 deletions(-) create mode 100644 fastfetch/fastfetch-2.31.3-loongarch-support-9690501.patch diff --git a/fastfetch/PKGBUILD b/fastfetch/PKGBUILD index 7c2e209d9e..6cc2269cbc 100644 --- a/fastfetch/PKGBUILD +++ b/fastfetch/PKGBUILD @@ -48,8 +48,19 @@ optdepends=( 'zlib: Faster image output when using kitty graphics protocol' 'libdrm: Displays detection' ) -source=("${pkgname}-${pkgver}.tar.gz::${url}/archive/refs/tags/${pkgver}.tar.gz") -sha256sums=('cec1f126ade7a5ef971901b1cdbe79f5864523d7a0a92732991619485d13e2e7') +source=( + "${pkgname}-${pkgver}.tar.gz::${url}/archive/refs/tags/${pkgver}.tar.gz" + "fastfetch-2.31.3-loongarch-support-9690501.patch" +) +sha256sums=( + 'cec1f126ade7a5ef971901b1cdbe79f5864523d7a0a92732991619485d13e2e7' + 'f97904d5627386dba17dffba4e73044f14e0373a60031b8e45fbefe6f8913f51' +) + +prepare() { + cd "$pkgname-${pkgver}" + patch -Np1 -i ../fastfetch-2.31.3-loongarch-support-9690501.patch +} build() { cmake -B build -S "${pkgname}-${pkgver}" \ diff --git a/fastfetch/fastfetch-2.31.3-loongarch-support-9690501.patch b/fastfetch/fastfetch-2.31.3-loongarch-support-9690501.patch new file mode 100644 index 0000000000..87848e9b94 --- /dev/null +++ b/fastfetch/fastfetch-2.31.3-loongarch-support-9690501.patch @@ -0,0 +1,110 @@ +From 3047fdc15b2e3d5441f0e3e186b300b8cb5856b5 Mon Sep 17 00:00:00 2001 +From: Yingjie Wang +Date: Thu, 22 Aug 2024 21:47:14 -0400 +Subject: [PATCH] fastfetch: backport LoongArch CPU support from commit + 96905012a4 + +Backported the logic from upstream dev branch commit 96905012a413f4ded49c33a182105f1d6a98b8cf ("Fix #1204 CPU (Linux): support loongarch") to fastfetch 2.31.3. + +This patch enables fastfetch to correctly parse LoongArch CPU information on Loong Arch Linux. The patch should be removed once a new upstream release that includes the fix is available. +--- + src/detection/cpu/cpu_linux.c | 36 +++++++++++++++++++-------------- + src/detection/version/version.c | 2 ++ + 2 files changed, 23 insertions(+), 15 deletions(-) + +diff --git a/src/detection/cpu/cpu_linux.c b/src/detection/cpu/cpu_linux.c +index 5b948150..fe30070c 100644 +--- a/src/detection/cpu/cpu_linux.c ++++ b/src/detection/cpu/cpu_linux.c +@@ -137,7 +137,7 @@ static void detectArmName(FILE* cpuinfo, FFCPUResult* cpu, uint32_t implId) + } + #endif + +-static const char* parseCpuInfo(FILE* cpuinfo, FFCPUResult* cpu, FFstrbuf* physicalCoresBuffer, FFstrbuf* cpuMHz, FFstrbuf* cpuIsa, FFstrbuf* cpuUarch, FFstrbuf* cpuImplementer) ++static const char* parseCpuInfo(FF_MAYBE_UNUSED FILE* cpuinfo, FF_MAYBE_UNUSED FFCPUResult* cpu, FF_MAYBE_UNUSED FFstrbuf* physicalCoresBuffer, FF_MAYBE_UNUSED FFstrbuf* cpuMHz, FF_MAYBE_UNUSED FFstrbuf* cpuIsa, FF_MAYBE_UNUSED FFstrbuf* cpuUarch, FF_MAYBE_UNUSED FFstrbuf* cpuImplementer) + { + FF_AUTO_FREE char* line = NULL; + size_t len = 0; +@@ -145,17 +145,24 @@ static const char* parseCpuInfo(FILE* cpuinfo, FFCPUResult* cpu, FFstrbuf* physi + while(getline(&line, &len, cpuinfo) != -1) + { + //Stop after the first CPU +- if(*line == '\0' || *line == '\n') ++ if((*line == '\0' || *line == '\n') ++ #if __arm__ || __loongarch__ ++ && cpu->name.length > 0 ++ #endif ++ ) + break; + + (void)( +- ffParsePropLine(line, "model name :", &cpu->name) || +- ffParsePropLine(line, "vendor_id :", &cpu->vendor) || +- ffParsePropLine(line, "cpu cores :", physicalCoresBuffer) || +- ffParsePropLine(line, "cpu MHz :", cpuMHz) || +- ffParsePropLine(line, "isa :", cpuIsa) || +- ffParsePropLine(line, "uarch :", cpuUarch) || +- ++ // arm64 doesn't have "model name"; arm32 does have "model name" but its value is not useful. ++ // "Hardware" should always be used in this case ++ #if !(__arm__ || __aarch64__) ++ (cpu->name.length == 0 && ffParsePropLine(line, "model name :", &cpu->name)) || ++ (cpu->vendor.length == 0 && ffParsePropLine(line, "vendor_id :", &cpu->vendor)) || ++ (physicalCoresBuffer->length == 0 && ffParsePropLine(line, "cpu cores :", physicalCoresBuffer)) || ++ (cpuMHz->length == 0 && ffParsePropLine(line, "cpu MHz :", cpuMHz)) || ++ #endif ++ (cpuIsa->length == 0 && ffParsePropLine(line, "isa :", cpuIsa)) || ++ (cpuUarch->length == 0 && ffParsePropLine(line, "uarch :", cpuUarch)) || + #if __arm__ || __aarch64__ + (cpu->vendor.length == 0 && ffParsePropLine(line, "CPU implementer :", cpuImplementer)) || + #endif +@@ -163,10 +170,10 @@ static const char* parseCpuInfo(FILE* cpuinfo, FFCPUResult* cpu, FFstrbuf* physi + (cpu->name.length == 0 && ffParsePropLine(line, "Hardware :", &cpu->name)) || //For Android devices + #endif + #if __powerpc__ || __powerpc +- (cpu->name.length == 0 && ffParsePropLine(line, "cpu :", &cpu->name)) || //For POWER ++ (cpu->name.length == 0 && ffParsePropLine(line, "cpu :", &cpu->name)) || //For POWER + #endif + #if __mips__ +- (cpu->name.length == 0 && ffParsePropLine(line, "cpu model :", &cpu->name)) || //For MIPS ++ (cpu->name.length == 0 && ffParsePropLine(line, "cpu model :", &cpu->name)) || //For MIPS + #endif + false + ); +@@ -275,6 +282,9 @@ static double detectCPUTemp(void) + + static void parseIsa(FFstrbuf* cpuIsa) + { ++ // Always use the last part of the ISA string. Ref: #590 #1204 ++ ffStrbufSubstrAfterLastC(cpuIsa, ' '); ++ + if(ffStrbufStartsWithS(cpuIsa, "rv")) + { + // RISC-V ISA string example: "rv64imafdch_zicsr_zifencei". +@@ -290,10 +300,6 @@ static void parseIsa(FFstrbuf* cpuIsa) + } + // The final ISA output of the above example is "rv64gch". + } +- if(ffStrbufStartsWithS(cpuIsa, "mips")) +- { +- ffStrbufSubstrAfterLastC(cpuIsa, ' '); +- } + } + + void detectAsahi(FFCPUResult* cpu) +diff --git a/src/detection/version/version.c b/src/detection/version/version.c +index 5c1fb0ae..bd62386d 100644 +--- a/src/detection/version/version.c ++++ b/src/detection/version/version.c +@@ -16,6 +16,8 @@ + #define FF_ARCHITECTURE "riscv" + #elif defined(__s390x__) + #define FF_ARCHITECTURE "s390x" ++#elif defined(__loongarch__) ++ #define FF_ARCHITECTURE "loongarch" + #else + #define FF_ARCHITECTURE "unknown" + #endif +-- +2.43.0 +